From cb1d6cddda11fcc669a5d04def93e07ac293a463 Mon Sep 17 00:00:00 2001 From: Samuel Thibault Date: Wed, 3 Dec 2025 13:16:15 +0100 Subject: [PATCH] Import starpu_1.4.10+dfsg.orig.tar.gz [dgit import orig starpu_1.4.10+dfsg.orig.tar.gz] --- .gitlab-ci.yml | 225 + AUTHORS | 62 + COPYING.LGPL | 510 + ChangeLog | 1622 + INSTALL | 214 + Makefile.am | 244 + Makefile.in | 1554 + README.dev | 228 + README.md | 189 + STARPU-REVISION | 1 + STARPU-VERSION | 70 + TODO | 24 + aclocal.m4 | 1220 + autogen.sh | 30 + bubble/Makefile.am | 20 + bubble/Makefile.in | 889 + bubble/tests/Makefile.am | 81 + bubble/tests/Makefile.in | 1938 + bubble/tests/basic/b.c | 70 + bubble/tests/basic/b2t.c | 158 + bubble/tests/basic/basic.h | 129 + bubble/tests/basic/bb.c | 77 + bubble/tests/basic/bbt.c | 82 + bubble/tests/basic/bbtt.c | 88 + bubble/tests/basic/brbtt.c | 151 + bubble/tests/basic/brec.c | 177 + bubble/tests/basic/brec_level.c | 195 + bubble/tests/basic/brt.c | 140 + bubble/tests/basic/btb.c | 82 + bubble/tests/basic/btb_func.c | 130 + bubble/tests/basic/btt.c | 82 + bubble/tests/basic/gemm_dag.c | 319 + bubble/tests/basic/read.c | 117 + bubble/tests/basic/sync.c | 126 + bubble/tests/basic/tbbt.c | 87 + bubble/tests/basic/tbrbtt.c | 180 + bubble/tests/basic/tbtbt.c | 92 + bubble/tests/loader.c | 505 + bubble/tests/vector/vector.c | 228 + build-aux/ar-lib | 271 + build-aux/compile | 348 + build-aux/config.guess | 1754 + build-aux/config.sub | 1890 + build-aux/depcomp | 791 + build-aux/install-sh | 541 + build-aux/ltmain.sh | 11448 ++++ build-aux/missing | 215 + build-aux/test-driver | 153 + configure | 51344 ++++++++++++++++ configure.ac | 4573 ++ contrib/ci.inria.fr/Jenkinsfile-basic | 136 + contrib/ci.inria.fr/Jenkinsfile-bsd | 131 + contrib/ci.inria.fr/Jenkinsfile-windows | 140 + contrib/ci.inria.fr/job-0-tarball.sh | 41 + contrib/ci.inria.fr/job-1-build-windows.sh | 88 + contrib/ci.inria.fr/job-1-check-windows.bat | 27 + contrib/ci.inria.fr/job-1-check.sh | 150 + contrib/gitlab/build.sh | 38 + contrib/gitlab/chameleon.sh | 50 + contrib/gitlab/coverity.sh | 29 + contrib/gitlab/deploy.sh | 18 + contrib/gitlab/simgrid.sh | 19 + doc/Makefile.am | 69 + doc/Makefile.in | 1042 + doc/doxy.mk | 152 + doc/doxygen.cfg | 1918 + doc/doxygen/Makefile.am | 233 + doc/doxygen/Makefile.in | 1078 + doc/doxygen/chapters/api/bubble_support.doxy | 25 + doc/doxygen/chapters/api/fft_support.doxy | 79 + doc/doxygen/chapters/api/fortran_support.doxy | 21 + doc/doxygen/chapters/api/threads.doxy | 377 + doc/doxygen/chapters/fdl_1_3.doxy | 526 + doc/doxygen/chapters/files.doxy | 88 + doc/doxygen/chapters/foreword.doxy | 22 + doc/doxygen/chapters/images/arbiter.png | Bin 0 -> 784 bytes doc/doxygen/chapters/images/data_trace.png | Bin 0 -> 7799 bytes doc/doxygen/chapters/images/distrib_data.png | Bin 0 -> 6247 bytes .../chapters/images/distrib_data_histo.png | Bin 0 -> 9743 bytes .../chapters/images/eclipse_hello_build.png | Bin 0 -> 226826 bytes .../chapters/images/eclipse_hello_fxt.png | Bin 0 -> 236816 bytes .../chapters/images/eclipse_hello_graph.png | Bin 0 -> 16899 bytes .../chapters/images/eclipse_hello_hgraph.png | Bin 0 -> 20117 bytes .../images/eclipse_hello_paje_trace.png | Bin 0 -> 236343 bytes .../chapters/images/eclipse_hello_plugin.png | Bin 0 -> 236396 bytes .../chapters/images/eclipse_hello_run.png | Bin 0 -> 232946 bytes .../images/eclipse_hello_svg_graph.png | Bin 0 -> 232450 bytes .../chapters/images/eclipse_hello_vite.png | Bin 0 -> 88807 bytes .../chapters/images/eclipse_install_cdt.png | Bin 0 -> 109713 bytes .../chapters/images/eclipse_install_pde.png | Bin 0 -> 129811 bytes .../chapters/images/eclipse_installer.png | Bin 0 -> 85596 bytes .../chapters/images/paje_draw_histogram.png | Bin 0 -> 36775 bytes .../chapters/images/parallel_worker2.png | Bin 0 -> 3412 bytes doc/doxygen/chapters/images/runtime-par.png | Bin 0 -> 73678 bytes .../images/starpu_chol_model_11_type.png | Bin 0 -> 5871 bytes ..._linear_memset_regression_based_energy.png | Bin 0 -> 6958 bytes .../chapters/images/starpu_log_arr.png | Bin 0 -> 15960 bytes .../chapters/images/starpu_log_list.png | Bin 0 -> 14438 bytes ...rpu_non_linear_memset_regression_based.png | Bin 0 -> 8553 bytes ...u_non_linear_memset_regression_based_2.png | Bin 0 -> 8061 bytes ..._linear_memset_regression_based_energy.png | Bin 0 -> 9366 bytes ...wer_non_linear_memset_regression_based.png | Bin 0 -> 6923 bytes .../images/starpu_starpu_slu_lu_model_11.png | Bin 0 -> 7629 bytes .../images/starpupy_handle_func_perf.png | Bin 0 -> 170242 bytes .../starpupy_handle_func_perf_pickle.png | Bin 0 -> 116438 bytes .../chapters/images/starpupy_handle_perf.png | Bin 0 -> 162085 bytes .../images/starpupy_handle_perf_pickle.png | Bin 0 -> 117441 bytes doc/doxygen/chapters/images/starvz_visu.png | Bin 0 -> 118157 bytes doc/doxygen/chapters/images/starvz_visu_r.png | Bin 0 -> 57891 bytes .../chapters/images/tasks_size_overhead.png | Bin 0 -> 11755 bytes .../tasks_size_overhead_py_fut_pickle.png | Bin 0 -> 101489 bytes .../images/tasks_size_overhead_py_futur.png | Bin 0 -> 16345 bytes .../images/tasks_size_overhead_py_handle.png | Bin 0 -> 15509 bytes .../tasks_size_overhead_py_handle_pickle.png | Bin 0 -> 96256 bytes .../images/tasks_size_overhead_py_none.png | Bin 0 -> 14956 bytes .../tasks_size_overhead_py_noret_pickle.png | Bin 0 -> 94794 bytes doc/doxygen/chapters/images/temanejo.png | Bin 0 -> 101216 bytes .../chapters/images/trace_bw_heatmap.png | Bin 0 -> 5919 bytes .../chapters/images/trace_recv_use.png | Bin 0 -> 48867 bytes .../chapters/images/trace_send_use.png | Bin 0 -> 48178 bytes .../chapters/images/trace_volume_heatmap.png | Bin 0 -> 5717 bytes .../applications_intro.doxy | 30 + .../starpu_applications/code/stencil5.c | 57 + .../code/stencil5_starpu.c | 111 + .../code/stencil5_starpu_mpi.c | 103 + .../starpu_applications/code/vector_scal_c.c | 50 + .../code/vector_scal_c_align.c | 82 + .../code/vector_scal_cpu.c | 41 + .../code/vector_scal_starpu.c | 93 + .../chapters/starpu_applications/stencil.doxy | 101 + .../starpu_applications/vector_scaling.doxy | 185 + .../starpu_basics/basic_examples.doxy | 494 + .../chapters/starpu_basics/basics_intro.doxy | 60 + .../starpu_basics/code/basics_vector_scal_c.c | 126 + .../code/basics_vector_scal_cpu.c | 78 + .../code/basics_vector_scal_cuda.c | 43 + .../code/basics_vector_scal_opencl.c | 74 + .../code/basics_vector_scal_opencl_codelet.cl | 26 + .../starpu_basics/data_management.doxy | 753 + .../starpu_basics/examples_sources.doxy | 83 + .../starpu_basics/scaling_vector_example.doxy | 42 + .../chapters/starpu_basics/scheduling.doxy | 204 + .../starpu_basics/starpu_applications.doxy | 456 + doc/doxygen/chapters/starpu_basics/tasks.doxy | 412 + .../advanced_data_management.doxy | 947 + .../advanced_scheduling.doxy | 314 + .../starpu_extensions/advanced_tasks.doxy | 475 + .../chapters/starpu_extensions/bubble.doxy | 196 + .../chapters/starpu_extensions/code/complex.c | 21 + .../starpu_extensions/code/disk_compute.c | 177 + .../starpu_extensions/code/disk_copy.c | 121 + .../starpu_extensions/code/forkmode.c | 41 + .../starpu_extensions/code/multiformat.c | 63 + .../chapters/starpu_extensions/code/simgrid.c | 31 + .../starpu_extensions/cuda_support.doxy | 25 + .../starpu_extensions/debugging_tools.doxy | 128 + .../starpu_extensions/extensions_intro.doxy | 114 + .../starpu_extensions/fault_tolerance.doxy | 49 + .../starpu_extensions/fft_support.doxy | 82 + .../chapters/starpu_extensions/helpers.doxy | 49 + .../starpu_extensions/interoperability.doxy | 120 + .../starpu_extensions/max_fpga_support.doxy | 332 + .../starpu_extensions/mpi_support.doxy | 1342 + .../starpu_extensions/opencl_support.doxy | 28 + .../starpu_extensions/out_of_core.doxy | 240 + .../starpu_extensions/parallel_worker.doxy | 265 + .../scheduling_context_hypervisor.doxy | 235 + .../scheduling_contexts.doxy | 250 + .../scheduling_policy_definition.doxy | 704 + .../chapters/starpu_extensions/simgrid.doxy | 278 + .../socl_opencl_extensions.doxy | 90 + .../starpu_extensions/tcpip_support.doxy | 53 + .../starpu_extensions/transactions.doxy | 81 + .../starpu_faq/check_list_performance.doxy | 578 + doc/doxygen/chapters/starpu_faq/faq.doxy | 518 + .../chapters/starpu_faq/faq_intro.doxy | 31 + .../starpu_installation/building.doxy | 267 + .../configuration_and_initialization.doxy | 51 + .../configure_options.doxy | 865 + .../environment_variables.doxy | 2072 + .../installation_intro.doxy | 32 + .../starpu_introduction/doc_organization.doxy | 97 + .../starpu_introduction/glossary.doxy | 65 + .../introduction_intro.doxy | 168 + .../starpu_languages/code/java_spark.java | 61 + .../starpu_languages/code/java_starpu.java | 89 + .../starpu_languages/code/nf_initexit.f90 | 35 + .../chapters/starpu_languages/java.doxy | 33 + .../starpu_languages/languages_intro.doxy | 31 + .../native_fortran_support.doxy | 255 + .../openmp_runtime_support.doxy | 554 + .../chapters/starpu_languages/python.doxy | 1428 + .../benchmarking_starpu.doxy | 83 + .../offline_performance_tools.doxy | 1332 + .../online_performance_tools.doxy | 877 + .../performances_intro.doxy | 29 + doc/doxygen/chapters/version.html | 2 + doc/doxygen/chapters/version.sty | 2 + doc/doxygen/doxygen-config-include.cfg.in | 85 + doc/doxygen/doxygen-config.cfg.in | 93 + doc/doxygen/doxygen_filter.sh.in | 23 + doc/doxygen/refman.tex | 449 + doc/doxygen_dev/Makefile.am | 127 + doc/doxygen_dev/Makefile.in | 971 + .../chapters/000_introduction.doxy | 28 + doc/doxygen_dev/chapters/010_core.doxy | 393 + doc/doxygen_dev/chapters/version.html | 2 + doc/doxygen_dev/chapters/version.sty | 2 + doc/doxygen_dev/doxygen-config-include.cfg.in | 17 + doc/doxygen_dev/doxygen-config.cfg.in | 142 + doc/doxygen_dev/doxygen_filter.sh.in | 26 + doc/doxygen_dev/refman.tex | 151 + doc/doxygen_web_applications/Makefile.am | 56 + doc/doxygen_web_applications/Makefile.in | 893 + .../chapters/version.html | 2 + .../chapters/version.sty | 2 + .../doxygen-config.cfg.in | 44 + doc/doxygen_web_applications/refman.tex | 47 + doc/doxygen_web_basics/Makefile.am | 105 + doc/doxygen_web_basics/Makefile.in | 943 + doc/doxygen_web_basics/chapters/version.html | 2 + doc/doxygen_web_basics/chapters/version.sty | 2 + doc/doxygen_web_basics/doxygen-config.cfg.in | 49 + doc/doxygen_web_basics/refman.tex | 72 + doc/doxygen_web_extensions/Makefile.am | 121 + doc/doxygen_web_extensions/Makefile.in | 959 + .../chapters/version.html | 2 + .../chapters/version.sty | 2 + .../doxygen-config.cfg.in | 65 + doc/doxygen_web_extensions/refman.tex | 147 + doc/doxygen_web_faq/Makefile.am | 95 + doc/doxygen_web_faq/Makefile.in | 933 + doc/doxygen_web_faq/chapters/version.html | 2 + doc/doxygen_web_faq/chapters/version.sty | 2 + doc/doxygen_web_faq/doxygen-config.cfg.in | 43 + doc/doxygen_web_faq/refman.tex | 47 + doc/doxygen_web_installation/Makefile.am | 97 + doc/doxygen_web_installation/Makefile.in | 935 + .../chapters/version.html | 2 + .../chapters/version.sty | 2 + .../doxygen-config.cfg.in | 45 + doc/doxygen_web_installation/refman.tex | 57 + doc/doxygen_web_introduction/Makefile.am | 94 + doc/doxygen_web_introduction/Makefile.in | 932 + .../chapters/version.html | 2 + .../chapters/version.sty | 2 + .../doxygen-config.cfg.in | 42 + doc/doxygen_web_introduction/refman.tex | 42 + doc/doxygen_web_languages/Makefile.am | 100 + doc/doxygen_web_languages/Makefile.in | 938 + .../chapters/version.html | 2 + .../chapters/version.sty | 2 + .../doxygen-config.cfg.in | 46 + doc/doxygen_web_languages/refman.tex | 57 + doc/doxygen_web_performances/Makefile.am | 96 + doc/doxygen_web_performances/Makefile.in | 934 + .../chapters/version.html | 2 + .../chapters/version.sty | 2 + .../doxygen-config.cfg.in | 44 + doc/doxygen_web_performances/refman.tex | 52 + doc/extractHeadline.sh | 41 + doc/fixLinks.sh | 58 + doc/sectionNumbering.py | 63 + doc/title.tex | 54 + doc/tutorial/Makefile | 46 + doc/tutorial/README | 35 + doc/tutorial/hello_world.c | 73 + doc/tutorial/hello_world_msvc.c | 75 + doc/tutorial/vector_scal.c | 117 + doc/tutorial/vector_scal_cpu.c | 49 + doc/tutorial/vector_scal_cuda.cu | 44 + doc/tutorial/vector_scal_opencl.c | 64 + doc/tutorial/vector_scal_opencl_kernel.cl | 24 + eclipse-plugin/.classpath | 11 + eclipse-plugin/.project | 28 + .../.settings/org.eclipse.jdt.core.prefs | 9 + eclipse-plugin/META-INF/MANIFEST.MF | 12 + eclipse-plugin/Makefile.am | 35 + eclipse-plugin/Makefile.in | 948 + eclipse-plugin/build.properties | 8 + eclipse-plugin/build.xml | 343 + eclipse-plugin/examples/Makefile.am | 35 + eclipse-plugin/examples/Makefile.in | 826 + eclipse-plugin/examples/hello/.cproject.in | 242 + eclipse-plugin/examples/hello/.project | 26 + .../hello/.settings/language.settings.xml | 48 + eclipse-plugin/examples/hello/hello.c | 133 + eclipse-plugin/icons/fxt.png | Bin 0 -> 1364 bytes eclipse-plugin/icons/svg.png | Bin 0 -> 1751 bytes eclipse-plugin/icons/taskGraph.png | Bin 0 -> 1449 bytes eclipse-plugin/icons/vite.png | Bin 0 -> 1432 bytes eclipse-plugin/plugin.xml | 159 + eclipse-plugin/src/Makefile.am | 54 + eclipse-plugin/src/Makefile.in | 792 + eclipse-plugin/src/deploy/build.xml | 7 + eclipse-plugin/src/deploy/javaCompilerArgs | 79 + .../src/starpu/handlers/SvgHandler.java | 82 + .../src/starpu/handlers/TaskGraphHandler.java | 68 + .../src/starpu/handlers/TraceGenHandler.java | 70 + .../src/starpu/handlers/TraceUtils.java | 95 + .../src/starpu/handlers/TraceVizHandler.java | 49 + eclipse-plugin/tools/cproject.sh | 36 + eclipse-plugin/tools/install_workspace.sh | 30 + examples/Makefile.am | 1579 + examples/Makefile.in | 8263 +++ examples/README.txt | 121 + examples/api/bcsr_data_interface.c | 36 + examples/api/block_data_interface.c | 35 + examples/api/coo_data_interface.c | 27 + examples/api/csr_data_interface.c | 34 + examples/api/matrix_data_interface.c | 35 + examples/api/multiformat_data_interface.c | 27 + examples/api/tensor_data_interface.c | 37 + examples/api/variable_data_interface.c | 30 + examples/api/vector_data_interface.c | 33 + examples/api/void_data_interface.c | 27 + examples/axpy/axpy.c | 245 + examples/axpy/axpy.h | 24 + examples/axpy/axpy_opencl.c | 75 + examples/axpy/axpy_opencl_kernel.cl | 33 + examples/basic_examples/block.c | 146 + examples/basic_examples/block_cpu.c | 39 + examples/basic_examples/block_cuda.cu | 46 + examples/basic_examples/block_hip.hip | 47 + examples/basic_examples/block_opencl.c | 70 + .../basic_examples/block_opencl_kernel.cl | 22 + examples/basic_examples/dynamic_handles.c | 203 + examples/basic_examples/hello_world.c | 132 + examples/basic_examples/hooks.c | 62 + examples/basic_examples/mult.c | 467 + examples/basic_examples/mult_cuda.cu | 131 + examples/basic_examples/mult_hip.hip | 133 + examples/basic_examples/multiformat.c | 341 + .../multiformat_conversion_codelets.c | 88 + .../multiformat_conversion_codelets_cuda.cu | 49 + .../multiformat_conversion_codelets_opencl.c | 94 + ...ormat_conversion_codelets_opencl_kernel.cl | 29 + examples/basic_examples/multiformat_cuda.cu | 46 + examples/basic_examples/multiformat_opencl.c | 93 + .../multiformat_opencl_kernel.cl | 24 + examples/basic_examples/multiformat_types.h | 32 + examples/basic_examples/ndim.c | 117 + examples/basic_examples/task_insert_color.c | 88 + examples/basic_examples/topology.c | 34 + examples/basic_examples/variable.c | 110 + examples/basic_examples/variable_kernels.cu | 33 + .../basic_examples/variable_kernels_cpu.c | 26 + .../basic_examples/variable_kernels_opencl.c | 51 + .../variable_kernels_opencl_kernel.cl | 23 + examples/basic_examples/vector_scal.c | 203 + examples/basic_examples/vector_scal_c.c | 111 + examples/basic_examples/vector_scal_cpu.c | 25 + .../basic_examples/vector_scal_cpu_icc.icc | 26 + .../basic_examples/vector_scal_cpu_template.h | 93 + examples/basic_examples/vector_scal_cuda.cu | 46 + examples/basic_examples/vector_scal_fortran.F | 34 + examples/basic_examples/vector_scal_hip.hip | 45 + examples/basic_examples/vector_scal_opencl.c | 66 + .../vector_scal_opencl_kernel.cl | 24 + examples/binary/binary.c | 154 + examples/callback/callback.c | 93 + examples/callback/prologue.c | 107 + examples/cg/cg.c | 354 + examples/cg/cg.h | 54 + examples/cg/cg_kernels.c | 876 + examples/cholesky/cholesky.h | 307 + examples/cholesky/cholesky.sh | 89 + examples/cholesky/cholesky_compil.c | 434 + examples/cholesky/cholesky_compiled.c | 36 + examples/cholesky/cholesky_grain_tag.c | 500 + examples/cholesky/cholesky_implicit.c | 415 + examples/cholesky/cholesky_julia.sh | 19 + examples/cholesky/cholesky_kernels.c | 582 + examples/cholesky/cholesky_models.c | 203 + examples/cholesky/cholesky_tag.c | 480 + examples/cholesky/cholesky_tile_tag.c | 348 + examples/cholesky/libmy_dmda.c | 381 + examples/cholesky/libmy_dmda.h | 24 + examples/common/blas.c | 520 + examples/common/blas.h | 173 + examples/common/blas_model.c | 46 + examples/common/blas_model.h | 64 + examples/cpp/Makefile_add_vectors.mk | 34 + examples/cpp/Makefile_add_vectors_cpp11.mk | 34 + examples/cpp/add_vectors.cpp | 159 + examples/cpp/add_vectors_cpp11.cpp | 165 + examples/cpp/add_vectors_interface.cpp | 676 + examples/cpp/incrementer_cpp.cpp | 117 + examples/dependency/sequential_consistency.c | 180 + examples/dependency/task_end_dep.c | 125 + examples/dependency/task_end_dep_add.c | 107 + examples/filters/alloc.c | 102 + examples/filters/custom_mf/conversion.cu | 50 + .../filters/custom_mf/conversion_opencl.c | 97 + .../filters/custom_mf/conversion_opencl.cl | 32 + examples/filters/custom_mf/cuda.cu | 44 + .../custom_mf/custom_conversion_codelets.c | 95 + examples/filters/custom_mf/custom_interface.c | 495 + examples/filters/custom_mf/custom_interface.h | 48 + examples/filters/custom_mf/custom_mf_filter.c | 326 + examples/filters/custom_mf/custom_opencl.c | 95 + examples/filters/custom_mf/custom_opencl.cl | 26 + examples/filters/custom_mf/custom_types.h | 32 + examples/filters/f3d_cpu.c | 41 + examples/filters/f3d_cuda.cu | 50 + examples/filters/f3d_hip.hip | 50 + examples/filters/f4d_cpu.c | 47 + examples/filters/f4d_cuda.cu | 55 + examples/filters/f4d_hip.hip | 55 + examples/filters/f5d_print.c | 78 + examples/filters/fblock.c | 159 + examples/filters/fblock_cpu.c | 41 + examples/filters/fblock_cuda.cu | 48 + examples/filters/fblock_hip.hip | 48 + examples/filters/fblock_opencl.c | 69 + examples/filters/fblock_opencl_kernel.cl | 34 + examples/filters/fblock_pick_matrix.c | 133 + examples/filters/fblock_pick_variable.c | 136 + examples/filters/fblock_print.c | 78 + examples/filters/fmatrix.c | 119 + examples/filters/fmatrix_cpu.c | 39 + examples/filters/fmatrix_cuda.cu | 42 + examples/filters/fmatrix_hip.hip | 42 + examples/filters/fmatrix_pick_variable.c | 185 + examples/filters/fmatrix_pick_vector.c | 136 + examples/filters/fmatrix_print.c | 70 + examples/filters/fmultiple_cuda.cu | 78 + examples/filters/fmultiple_hip.hip | 78 + examples/filters/fmultiple_manual.c | 259 + examples/filters/fmultiple_submit.c | 226 + examples/filters/fmultiple_submit_implicit.c | 384 + examples/filters/fmultiple_submit_readonly.c | 406 + .../fmultiple_submit_readonly_downgrade.c | 417 + examples/filters/fndim.c | 141 + examples/filters/fndim_1d_pick_variable.c | 130 + examples/filters/fndim_2d_pick_vector.c | 140 + examples/filters/fndim_3d_pick_matrix.c | 137 + examples/filters/fndim_4d_pick_block.c | 138 + examples/filters/fndim_5d_pick_tensor.c | 139 + examples/filters/fndim_pick_ndim.c | 135 + examples/filters/fndim_pick_variable.c | 134 + examples/filters/fndim_to_block.c | 136 + examples/filters/fndim_to_matrix.c | 132 + examples/filters/fndim_to_tensor.c | 137 + examples/filters/fndim_to_variable.c | 119 + examples/filters/fndim_to_vector.c | 140 + examples/filters/fread.c | 146 + examples/filters/frecursive.c | 174 + examples/filters/ftensor.c | 138 + examples/filters/ftensor_cpu.c | 46 + examples/filters/ftensor_cuda.cu | 53 + examples/filters/ftensor_hip.hip | 53 + examples/filters/ftensor_pick_block.c | 134 + examples/filters/ftensor_pick_variable.c | 137 + examples/filters/ftensor_print.c | 87 + examples/filters/fvariable_cuda.cu | 34 + examples/filters/fvector.c | 121 + examples/filters/fvector_cpu.c | 34 + examples/filters/fvector_cuda.cu | 37 + examples/filters/fvector_hip.hip | 37 + examples/filters/fvector_pick_variable.c | 129 + examples/filters/shadow.c | 187 + examples/filters/shadow2d.c | 295 + examples/filters/shadow3d.c | 337 + examples/filters/shadow4d.c | 496 + examples/filters/shadownd.c | 874 + examples/fortran/Makefile | 27 + examples/fortran/hello.F | 35 + examples/fortran/hello_c.c | 36 + examples/fortran/starpu_fortran.h | 54 + examples/fortran90/Makefile.mk | 57 + examples/fortran90/f90_example.f90 | 149 + examples/fortran90/marshalling.c | 166 + examples/fortran90/mod_compute.f90 | 127 + examples/fortran90/mod_interface.f90 | 63 + examples/fortran90/mod_types.f90 | 37 + examples/fortran90/starpu_mod.f90 | 145 + examples/gl_interop/gl_interop.c | 138 + examples/gl_interop/gl_interop_idle.c | 161 + examples/heat/dw_factolu.c | 869 + examples/heat/dw_factolu.h | 221 + examples/heat/dw_factolu_grain.c | 382 + examples/heat/dw_factolu_kernels.c | 434 + examples/heat/dw_factolu_tag.c | 335 + examples/heat/dw_sparse_cg.c | 483 + examples/heat/dw_sparse_cg.h | 136 + examples/heat/dw_sparse_cg_kernels.c | 446 + examples/heat/heat.c | 819 + examples/heat/heat.h | 72 + examples/heat/heat.sh | 42 + examples/heat/heat_display.c | 241 + examples/heat/lu_kernels_model.c | 254 + examples/heat/lu_kernels_model.h | 43 + examples/incrementer/incrementer.c | 144 + examples/incrementer/incrementer_kernels.cu | 37 + .../incrementer/incrementer_kernels_opencl.c | 54 + .../incrementer_kernels_opencl_kernel.cl | 25 + examples/interface/complex.c | 292 + examples/interface/complex_codelet.h | 108 + .../complex_dev_handle/complex_dev_handle.c | 288 + .../complex_dev_handle_codelet.h | 109 + .../complex_dev_handle_filters.c | 94 + .../complex_dev_handle_interface.c | 305 + .../complex_dev_handle_interface.h | 61 + .../complex_dev_handle_kernels.cl | 41 + .../complex_dev_handle_kernels.cu | 49 + .../complex_dev_handle_kernels_opencl.c | 83 + examples/interface/complex_filters.c | 73 + examples/interface/complex_interface.c | 277 + examples/interface/complex_interface.h | 48 + examples/interface/complex_kernels.cl | 32 + examples/interface/complex_kernels.cu | 49 + examples/interface/complex_kernels_opencl.c | 75 + examples/loader.c | 505 + examples/lu/blas_complex.c | 212 + examples/lu/blas_complex.h | 155 + examples/lu/clu.c | 20 + examples/lu/clu_implicit.c | 20 + examples/lu/clu_implicit_pivot.c | 20 + examples/lu/clu_kernels.c | 20 + examples/lu/clu_pivot.c | 20 + examples/lu/complex_double.h | 52 + examples/lu/complex_float.h | 52 + examples/lu/dlu.c | 20 + examples/lu/dlu_implicit.c | 20 + examples/lu/dlu_implicit_pivot.c | 20 + examples/lu/dlu_kernels.c | 20 + examples/lu/dlu_pivot.c | 20 + examples/lu/lu-double.h | 50 + examples/lu/lu-float.h | 49 + examples/lu/lu.sh | 45 + examples/lu/lu_example.c | 480 + examples/lu/lu_example_complex_double.c | 20 + examples/lu/lu_example_complex_float.c | 20 + examples/lu/lu_example_double.c | 20 + examples/lu/lu_example_float.c | 20 + examples/lu/slu.c | 20 + examples/lu/slu_implicit.c | 20 + examples/lu/slu_implicit_pivot.c | 20 + examples/lu/slu_kernels.c | 20 + examples/lu/slu_pivot.c | 20 + examples/lu/xlu.c | 290 + examples/lu/xlu.h | 132 + examples/lu/xlu_implicit.c | 225 + examples/lu/xlu_implicit_pivot.c | 370 + examples/lu/xlu_kernels.c | 785 + examples/lu/xlu_kernels.h | 45 + examples/lu/xlu_pivot.c | 468 + examples/lu/zlu.c | 20 + examples/lu/zlu_implicit.c | 20 + examples/lu/zlu_implicit_pivot.c | 20 + examples/lu/zlu_kernels.c | 20 + examples/lu/zlu_pivot.c | 20 + examples/mandelbrot/mandelbrot.c | 639 + examples/matvecmult/matvecmult.c | 232 + examples/matvecmult/matvecmult_kernel.cl | 30 + examples/mlr/mlr.c | 227 + examples/mult/dgemm.c | 18 + examples/mult/dgemm_layout.c | 18 + examples/mult/double.h | 30 + examples/mult/sgemm.c | 18 + examples/mult/sgemm.sh | 76 + examples/mult/sgemm_layout.c | 18 + examples/mult/simple.h | 29 + examples/mult/xgemm.c | 533 + examples/mult/xgemm.h | 230 + examples/mult/xgemm_layout.c | 1199 + examples/native_fortran/Makefile_nf_dynbuf.mk | 49 + .../native_fortran/Makefile_nf_example.mk | 50 + examples/native_fortran/Makefile_nf_matrix.mk | 52 + .../native_fortran/Makefile_nf_partition.mk | 52 + .../native_fortran/Makefile_nf_sched_ctx.mk | 48 + examples/native_fortran/Makefile_nf_varbuf.mk | 48 + examples/native_fortran/Makefile_nf_vector.mk | 52 + examples/native_fortran/fstarpu_mod.f90 | 2697 + examples/native_fortran/nf_codelets.f90 | 115 + examples/native_fortran/nf_compute.f90 | 132 + examples/native_fortran/nf_dynbuf.f90 | 77 + examples/native_fortran/nf_dynbuf_cl.f90 | 38 + examples/native_fortran/nf_example.f90 | 188 + examples/native_fortran/nf_matrix.f90 | 120 + examples/native_fortran/nf_partition.f90 | 121 + examples/native_fortran/nf_partition_cl.f90 | 44 + examples/native_fortran/nf_sched_ctx.f90 | 175 + examples/native_fortran/nf_sched_ctx_cl.f90 | 41 + examples/native_fortran/nf_types.f90 | 37 + examples/native_fortran/nf_varbuf.f90 | 83 + examples/native_fortran/nf_varbuf_cl.f90 | 38 + examples/native_fortran/nf_vector.f90 | 152 + examples/openmp/vector_scal_omp.c | 142 + examples/parallel_workers/parallel_workers.c | 151 + .../parallel_workers/parallel_workers_func.c | 105 + .../parallel_workers_oldapi.c | 54 + examples/perf_monitoring/perf_counters_01.c | 131 + examples/perf_monitoring/perf_counters_02.c | 252 + examples/perf_steering/perf_knobs_01.c | 131 + examples/perf_steering/perf_knobs_02.c | 149 + examples/perf_steering/perf_knobs_03.c | 180 + examples/pi/pi.c | 217 + examples/pi/pi.h | 29 + examples/pi/pi_kernel.cu | 156 + examples/pi/pi_redux.c | 420 + examples/pi/pi_redux_kernel.cu | 133 + examples/pipeline/pipeline.c | 268 + examples/ppm_downscaler/ppm_downscaler.c | 186 + examples/ppm_downscaler/ppm_downscaler.h | 31 + examples/ppm_downscaler/yuv_downscaler.c | 320 + examples/ppm_downscaler/yuv_downscaler.h | 41 + examples/profiling/profiling.c | 157 + examples/profiling_tool/libprofiling_tool.c | 76 + examples/profiling_tool/prof.sh | 25 + examples/reductions/dot_product.c | 461 + examples/reductions/dot_product.h | 22 + examples/reductions/dot_product_kernels.cu | 38 + .../reductions/dot_product_opencl_kernels.cl | 42 + examples/reductions/minmax_reduction.c | 225 + examples/sched_ctx/axpy_partition_gpu.cu | 78 + examples/sched_ctx/axpy_partition_gpu.h | 134 + examples/sched_ctx/dummy_sched_with_ctx.c | 177 + examples/sched_ctx/gpu_partition.c | 255 + examples/sched_ctx/nested_sched_ctxs.c | 248 + examples/sched_ctx/parallel_code.c | 105 + .../sched_ctx/parallel_tasks_reuse_handle.c | 245 + examples/sched_ctx/prio.c | 60 + examples/sched_ctx/sched_ctx.c | 172 + examples/sched_ctx/sched_ctx_delete.c | 51 + examples/sched_ctx/sched_ctx_empty.c | 65 + examples/sched_ctx/sched_ctx_remove.c | 174 + .../sched_ctx_without_sched_policy.c | 177 + .../sched_ctx_without_sched_policy_awake.c | 166 + examples/sched_ctx/two_cpu_contexts.c | 124 + examples/sched_ctx_utils/sched_ctx_utils.c | 329 + examples/sched_ctx_utils/sched_ctx_utils.h | 27 + examples/scheduler/dummy_modular_sched.c | 247 + examples/scheduler/dummy_sched.c | 172 + examples/scheduler/heteroprio_test.c | 238 + examples/scheduler/libdummy_sched.c | 139 + examples/scheduler/libdummy_sched.sh | 24 + examples/scheduler/schedulers.sh | 81 + examples/scheduler/schedulers_context.sh | 74 + examples/spmd/vector_scal_spmd.c | 170 + examples/spmv/dw_block_spmv.c | 342 + examples/spmv/dw_block_spmv.h | 36 + examples/spmv/dw_block_spmv_kernels.c | 79 + .../spmv/matrix_market/examples/fidapm05.mtx | 522 + examples/spmv/matrix_market/mm_to_bcsr.c | 378 + examples/spmv/matrix_market/mm_to_bcsr.h | 53 + examples/spmv/matrix_market/mmio.c | 488 + examples/spmv/matrix_market/mmio.h | 142 + examples/spmv/spmv.c | 273 + examples/spmv/spmv.h | 41 + examples/spmv/spmv_cuda.cu | 102 + examples/spmv/spmv_kernels.c | 142 + examples/spmv/spmv_opencl.cl | 44 + examples/stencil/0.5.out | 79 + examples/stencil/0.out | 94 + examples/stencil/1.out | 75 + examples/stencil/2.out | 74 + examples/stencil/3.out | 75 + examples/stencil/4.out | 78 + examples/stencil/6.out | 83 + examples/stencil/Makefile.am | 152 + examples/stencil/Makefile.in | 1673 + examples/stencil/README | 46 + examples/stencil/implicit-stencil-blocks.c | 446 + examples/stencil/implicit-stencil-kernels.c | 763 + examples/stencil/implicit-stencil-tasks.c | 198 + examples/stencil/implicit-stencil.c | 398 + examples/stencil/implicit-stencil.h | 153 + examples/stencil/life.c | 48 + examples/stencil/life_cuda.cu | 78 + examples/stencil/life_opencl.c | 116 + examples/stencil/loader.c | 505 + examples/stencil/mpi.out | 94 + examples/stencil/results | 398 + examples/stencil/run | 28 + examples/stencil/shadow.cu | 58 + examples/stencil/shadow.h | 49 + examples/stencil/shadow_opencl.c | 112 + examples/stencil/stencil-blocks.c | 403 + examples/stencil/stencil-kernels.c | 631 + examples/stencil/stencil-tasks.c | 358 + examples/stencil/stencil.c | 392 + examples/stencil/stencil.h | 149 + examples/subgraphs/codelets.c | 85 + examples/subgraphs/main.h | 66 + examples/subgraphs/manual.c | 143 + examples/subgraphs/partition.c | 83 + examples/subgraphs/plan.c | 80 + examples/tag_example/tag_example.c | 246 + examples/tag_example/tag_example2.c | 143 + examples/tag_example/tag_example3.c | 147 + examples/tag_example/tag_example4.c | 157 + examples/tag_example/tag_restartable.c | 168 + examples/transactions/trs_inc.c | 155 + examples/transactions/trs_sgemm.c | 469 + .../worker_collections/worker_list_example.c | 95 + .../worker_collections/worker_tree_example.c | 109 + include/fstarpu_mod.f90 | 2697 + include/omp.h | 110 + include/pthread_win32/pthread.h | 520 + include/pthread_win32/semaphore.h | 72 + include/schedulers/starpu_heteroprio.h | 132 + include/schedulers/starpu_scheduler_toolbox.h | 168 + include/starpu.h | 859 + include/starpu_bitmap.h | 299 + include/starpu_bound.h | 97 + include/starpu_config.h.in | 393 + include/starpu_cublas.h | 62 + include/starpu_cublasLt.h | 65 + include/starpu_cublas_v2.h | 48 + include/starpu_cuda.h | 162 + include/starpu_cusolver.h | 99 + include/starpu_cusparse.h | 61 + include/starpu_data.h | 727 + include/starpu_data_filters.h | 1152 + include/starpu_data_interfaces.h | 2723 + include/starpu_deprecated_api.h | 122 + include/starpu_disk.h | 230 + include/starpu_driver.h | 116 + include/starpu_expert.h | 54 + include/starpu_fxt.h | 189 + include/starpu_hash.h | 74 + include/starpu_helper.h | 285 + include/starpu_hip.h | 148 + include/starpu_hipblas.h | 67 + include/starpu_max_fpga.h | 60 + include/starpu_mod.f90 | 145 + include/starpu_opencl.h | 359 + include/starpu_openmp.h | 1315 + include/starpu_parallel_worker.h | 191 + include/starpu_perf_monitoring.h | 268 + include/starpu_perf_steering.h | 270 + include/starpu_perfmodel.h | 527 + include/starpu_profiling.h | 393 + include/starpu_profiling_tool.h | 154 + include/starpu_rand.h | 92 + include/starpu_sched_component.h | 887 + include/starpu_sched_ctx.h | 454 + include/starpu_sched_ctx_hypervisor.h | 108 + include/starpu_scheduler.h | 563 + include/starpu_simgrid_wrap.h | 32 + include/starpu_sink.h | 37 + include/starpu_stdlib.h | 326 + include/starpu_task.h | 2085 + include/starpu_task_bundle.h | 96 + include/starpu_task_dep.h | 257 + include/starpu_task_list.h | 151 + include/starpu_task_util.h | 652 + include/starpu_thread.h | 510 + include/starpu_thread_util.h | 529 + include/starpu_tree.h | 60 + include/starpu_util.h | 887 + include/starpu_worker.h | 739 + julia/Makefile.am | 25 + julia/Makefile.in | 891 + julia/README | 68 + julia/examples/Makefile.am | 111 + julia/examples/Makefile.in | 1802 + julia/examples/axpy/axpy.jl | 99 + julia/examples/axpy/axpy.sh | 19 + julia/examples/black_scholes/black_scholes.jl | 208 + julia/examples/callback/callback.c | 93 + julia/examples/callback/callback.jl | 77 + julia/examples/callback/callback.sh | 19 + julia/examples/check_deps/check_deps.jl | 32 + julia/examples/check_deps/check_deps.sh | 20 + julia/examples/cholesky/cholesky.sh | 20 + julia/examples/cholesky/cholesky_codelets.jl | 52 + julia/examples/cholesky/cholesky_common.jl | 166 + julia/examples/cholesky/cholesky_implicit.jl | 71 + julia/examples/cholesky/cholesky_native.jl | 94 + julia/examples/cholesky/cholesky_tag.jl | 93 + julia/examples/dependency/end_dep.jl | 104 + julia/examples/dependency/end_dep.sh | 18 + julia/examples/dependency/tag_dep.jl | 122 + julia/examples/dependency/tag_dep.sh | 18 + julia/examples/dependency/task_dep.jl | 88 + julia/examples/dependency/task_dep.sh | 18 + julia/examples/execute.sh.in | 53 + julia/examples/gemm/gemm.jl | 144 + julia/examples/gemm/gemm.sh | 22 + julia/examples/gemm/gemm_native.jl | 56 + julia/examples/loader.c | 505 + julia/examples/mandelbrot/cpu_mandelbrot.c | 79 + julia/examples/mandelbrot/cpu_mandelbrot.h | 24 + julia/examples/mandelbrot/mandelbrot.c | 188 + julia/examples/mandelbrot/mandelbrot.jl | 123 + julia/examples/mandelbrot/mandelbrot.sh | 21 + .../examples/mandelbrot/mandelbrot_native.jl | 113 + julia/examples/mult/cpu_mult.c | 101 + julia/examples/mult/mult.c | 229 + julia/examples/mult/mult.jl | 150 + julia/examples/mult/mult_native.jl | 57 + julia/examples/mult/mult_starpu.sh | 22 + julia/examples/mult/perf.sh | 38 + .../task_insert_color/task_insert_color.c | 88 + .../task_insert_color/task_insert_color.jl | 70 + .../task_insert_color/task_insert_color.sh | 19 + julia/examples/variable/variable.jl | 53 + julia/examples/variable/variable.sh | 20 + julia/examples/variable/variable_native.jl | 41 + julia/examples/vector_scal/vector_scal.jl | 106 + julia/examples/vector_scal/vector_scal.sh | 20 + julia/src/Makefile.am | 59 + julia/src/Makefile.in | 1136 + julia/src/StarPU.jl | 120 + julia/src/blas.c | 194 + julia/src/blas.h | 148 + julia/src/blas.jl | 21 + julia/src/blas_wrapper.c | 50 + julia/src/callback_wrapper.c | 39 + julia/src/compiler/c.jl | 313 + julia/src/compiler/cuda.jl | 640 + julia/src/compiler/expression_manipulation.jl | 475 + julia/src/compiler/expressions.jl | 972 + julia/src/compiler/file_generation.jl | 170 + julia/src/compiler/include.jl | 28 + julia/src/compiler/parsing.jl | 67 + julia/src/compiler/utils.jl | 53 + julia/src/data.jl | 235 + julia/src/destructible.jl | 126 + julia/src/dynamic_compiler/Makefile.am | 49 + julia/src/dynamic_compiler/Makefile.in | 777 + julia/src/globals.jl | 50 + julia/src/init.jl | 73 + julia/src/linked_list.jl | 316 + julia/src/perfmodel.jl | 31 + julia/src/task.jl | 400 + julia/src/task_dep.jl | 48 + julia/src/translate_headers.jl | 113 + julia/src/utils.jl | 115 + m4/acinclude.m4 | 270 + m4/ax_cxx_compile_stdcxx.m4 | 562 + m4/ax_dlb_callback_arg.m4 | 37 + m4/libs.m4 | 262 + m4/libtool.m4 | 8427 +++ m4/ltoptions.m4 | 437 + m4/ltsugar.m4 | 124 + m4/ltversion.m4 | 24 + m4/lt~obsolete.m4 | 99 + m4/pkg.m4 | 157 + make/starpu-loader.mk | 95 + make/starpu-notests.mk | 32 + make/starpu-subdirtests.mk | 49 + make/starpu-tests.mk | 105 + make/starpu.mk | 74 + min-dgels/Makefile | 48 + min-dgels/Makefile.in | 48 + min-dgels/additional/blaswrap.h | 8 + min-dgels/additional/clapack.h | 7262 +++ min-dgels/additional/d_lg10.c | 21 + min-dgels/additional/d_sign.c | 18 + min-dgels/additional/dcopy.c | 107 + min-dgels/additional/dgelq2.c | 157 + min-dgels/additional/dgelqf.c | 251 + min-dgels/additional/dgels.c | 515 + min-dgels/additional/dgemm.c | 389 + min-dgels/additional/dgemv.c | 312 + min-dgels/additional/dgeqr2.c | 161 + min-dgels/additional/dgeqrf.c | 252 + min-dgels/additional/dger.c | 194 + min-dgels/additional/disnan.c | 52 + min-dgels/additional/dlabad.c | 72 + min-dgels/additional/dlaisnan.c | 58 + min-dgels/additional/dlamch.c | 1001 + min-dgels/additional/dlange.c | 199 + min-dgels/additional/dlapy2.c | 73 + min-dgels/additional/dlarf.c | 193 + min-dgels/additional/dlarfb.c | 774 + min-dgels/additional/dlarfg.c | 170 + min-dgels/additional/dlarfp.c | 192 + min-dgels/additional/dlarft.c | 325 + min-dgels/additional/dlascl.c | 354 + min-dgels/additional/dlaset.c | 152 + min-dgels/additional/dlassq.c | 116 + min-dgels/additional/dnrm2.c | 95 + min-dgels/additional/dorm2r.c | 235 + min-dgels/additional/dorml2.c | 231 + min-dgels/additional/dormlq.c | 334 + min-dgels/additional/dormqr.c | 327 + min-dgels/additional/dscal.c | 96 + min-dgels/additional/dtrmm.c | 453 + min-dgels/additional/dtrmv.c | 345 + min-dgels/additional/dtrsm.c | 490 + min-dgels/additional/dtrtrs.c | 183 + min-dgels/additional/f2c.h | 223 + min-dgels/additional/fio.h | 141 + min-dgels/additional/fmt.c | 530 + min-dgels/additional/fmt.h | 105 + min-dgels/additional/ieeeck.c | 166 + min-dgels/additional/iladlc.c | 88 + min-dgels/additional/iladlr.c | 90 + min-dgels/additional/ilaenv.c | 654 + min-dgels/additional/iparmq.c | 282 + min-dgels/additional/lsame.c | 117 + min-dgels/additional/mindgels.h | 8 + min-dgels/additional/pow_di.c | 41 + min-dgels/additional/s_cat.c | 86 + min-dgels/additional/sysdep1.h | 66 + min-dgels/additional/wsfe.c | 78 + min-dgels/additional/xerbla.c | 65 + min-dgels/base/BLAS/SRC/Makefile | 115 + min-dgels/base/BLAS/SRC/dasum.c | 101 + min-dgels/base/BLAS/SRC/daxpy.c | 107 + min-dgels/base/BLAS/SRC/dcabs1.c | 36 + min-dgels/base/BLAS/SRC/dcopy.c | 107 + min-dgels/base/BLAS/SRC/ddot.c | 110 + min-dgels/base/BLAS/SRC/dgbmv.c | 369 + min-dgels/base/BLAS/SRC/dgemm.c | 389 + min-dgels/base/BLAS/SRC/dgemv.c | 312 + min-dgels/base/BLAS/SRC/dger.c | 194 + min-dgels/base/BLAS/SRC/dnrm2.c | 95 + min-dgels/base/BLAS/SRC/drot.c | 86 + min-dgels/base/BLAS/SRC/drotg.c | 79 + min-dgels/base/BLAS/SRC/drotm.c | 215 + min-dgels/base/BLAS/SRC/drotmg.c | 293 + min-dgels/base/BLAS/SRC/dsbmv.c | 364 + min-dgels/base/BLAS/SRC/dscal.c | 96 + min-dgels/base/BLAS/SRC/dsdot.c | 135 + min-dgels/base/BLAS/SRC/dspmv.c | 312 + min-dgels/base/BLAS/SRC/dspr.c | 237 + min-dgels/base/BLAS/SRC/dspr2.c | 270 + min-dgels/base/BLAS/SRC/dswap.c | 114 + min-dgels/base/BLAS/SRC/dsymm.c | 362 + min-dgels/base/BLAS/SRC/dsymv.c | 313 + min-dgels/base/BLAS/SRC/dsyr.c | 238 + min-dgels/base/BLAS/SRC/dsyr2.c | 275 + min-dgels/base/BLAS/SRC/dsyr2k.c | 407 + min-dgels/base/BLAS/SRC/dsyrk.c | 372 + min-dgels/base/BLAS/SRC/dtbmv.c | 422 + min-dgels/base/BLAS/SRC/dtbsv.c | 426 + min-dgels/base/BLAS/SRC/dtpmv.c | 357 + min-dgels/base/BLAS/SRC/dtpsv.c | 360 + min-dgels/base/BLAS/SRC/dtrmm.c | 453 + min-dgels/base/BLAS/SRC/dtrmv.c | 345 + min-dgels/base/BLAS/SRC/dtrsm.c | 490 + min-dgels/base/BLAS/SRC/dtrsv.c | 348 + min-dgels/base/BLAS/SRC/dzasum.c | 80 + min-dgels/base/BLAS/SRC/dznrm2.c | 108 + min-dgels/base/BLAS/SRC/idamax.c | 93 + min-dgels/base/BLAS/SRC/izamax.c | 93 + min-dgels/base/BLAS/SRC/lsame.c | 117 + min-dgels/base/BLAS/SRC/xerbla.c | 77 + min-dgels/base/BLAS/SRC/xerbla_array.c | 102 + min-dgels/base/BLAS/WRAP/Makefile | 23 + min-dgels/base/BLAS/WRAP/README | 30 + min-dgels/base/BLAS/WRAP/cblas.h | 577 + min-dgels/base/BLAS/WRAP/cblaswr.c | 1744 + min-dgels/base/BLAS/WRAP/fblaswr.c | 1600 + min-dgels/base/BLAS/WRAP/fblaswr.h | 851 + min-dgels/base/BLAS/dblat2.in | 34 + min-dgels/base/BLAS/dblat3.in | 20 + min-dgels/base/COPYING | 36 + min-dgels/base/F2CLIBS/libf2c/Makefile | 220 + min-dgels/base/F2CLIBS/libf2c/Notice | 23 + min-dgels/base/F2CLIBS/libf2c/README | 374 + min-dgels/base/F2CLIBS/libf2c/abort_.c | 22 + min-dgels/base/F2CLIBS/libf2c/arithchk.c | 245 + min-dgels/base/F2CLIBS/libf2c/backspac.c | 77 + min-dgels/base/F2CLIBS/libf2c/c_abs.c | 20 + min-dgels/base/F2CLIBS/libf2c/c_cos.c | 23 + min-dgels/base/F2CLIBS/libf2c/c_div.c | 53 + min-dgels/base/F2CLIBS/libf2c/c_exp.c | 25 + min-dgels/base/F2CLIBS/libf2c/c_log.c | 23 + min-dgels/base/F2CLIBS/libf2c/c_sin.c | 23 + min-dgels/base/F2CLIBS/libf2c/c_sqrt.c | 41 + min-dgels/base/F2CLIBS/libf2c/cabs.c | 33 + min-dgels/base/F2CLIBS/libf2c/close.c | 101 + min-dgels/base/F2CLIBS/libf2c/comptry.bat | 5 + min-dgels/base/F2CLIBS/libf2c/ctype.c | 2 + min-dgels/base/F2CLIBS/libf2c/ctype.h | 47 + min-dgels/base/F2CLIBS/libf2c/d_abs.c | 18 + min-dgels/base/F2CLIBS/libf2c/d_acos.c | 19 + min-dgels/base/F2CLIBS/libf2c/d_asin.c | 19 + min-dgels/base/F2CLIBS/libf2c/d_atan.c | 19 + min-dgels/base/F2CLIBS/libf2c/d_atn2.c | 19 + min-dgels/base/F2CLIBS/libf2c/d_cnjg.c | 19 + min-dgels/base/F2CLIBS/libf2c/d_cos.c | 19 + min-dgels/base/F2CLIBS/libf2c/d_cosh.c | 19 + min-dgels/base/F2CLIBS/libf2c/d_dim.c | 16 + min-dgels/base/F2CLIBS/libf2c/d_exp.c | 19 + min-dgels/base/F2CLIBS/libf2c/d_imag.c | 16 + min-dgels/base/F2CLIBS/libf2c/d_int.c | 19 + min-dgels/base/F2CLIBS/libf2c/d_lg10.c | 21 + min-dgels/base/F2CLIBS/libf2c/d_log.c | 19 + min-dgels/base/F2CLIBS/libf2c/d_mod.c | 46 + min-dgels/base/F2CLIBS/libf2c/d_nint.c | 20 + min-dgels/base/F2CLIBS/libf2c/d_prod.c | 16 + min-dgels/base/F2CLIBS/libf2c/d_sign.c | 18 + min-dgels/base/F2CLIBS/libf2c/d_sin.c | 19 + min-dgels/base/F2CLIBS/libf2c/d_sinh.c | 19 + min-dgels/base/F2CLIBS/libf2c/d_sqrt.c | 19 + min-dgels/base/F2CLIBS/libf2c/d_tan.c | 19 + min-dgels/base/F2CLIBS/libf2c/d_tanh.c | 19 + min-dgels/base/F2CLIBS/libf2c/derf_.c | 18 + min-dgels/base/F2CLIBS/libf2c/derfc_.c | 20 + min-dgels/base/F2CLIBS/libf2c/dfe.c | 151 + min-dgels/base/F2CLIBS/libf2c/dolio.c | 26 + min-dgels/base/F2CLIBS/libf2c/dtime_.c | 63 + min-dgels/base/F2CLIBS/libf2c/due.c | 77 + min-dgels/base/F2CLIBS/libf2c/ef1asc_.c | 25 + min-dgels/base/F2CLIBS/libf2c/ef1cmc_.c | 20 + min-dgels/base/F2CLIBS/libf2c/endfile.c | 160 + min-dgels/base/F2CLIBS/libf2c/erf_.c | 22 + min-dgels/base/F2CLIBS/libf2c/erfc_.c | 22 + min-dgels/base/F2CLIBS/libf2c/err.c | 293 + min-dgels/base/F2CLIBS/libf2c/etime_.c | 57 + min-dgels/base/F2CLIBS/libf2c/exit_.c | 43 + min-dgels/base/F2CLIBS/libf2c/f2c.h | 223 + min-dgels/base/F2CLIBS/libf2c/f2c.h0 | 223 + min-dgels/base/F2CLIBS/libf2c/f2ch.add | 162 + min-dgels/base/F2CLIBS/libf2c/f77_aloc.c | 44 + min-dgels/base/F2CLIBS/libf2c/f77vers.c | 97 + min-dgels/base/F2CLIBS/libf2c/fio.h | 141 + min-dgels/base/F2CLIBS/libf2c/fmt.c | 530 + min-dgels/base/F2CLIBS/libf2c/fmt.h | 105 + min-dgels/base/F2CLIBS/libf2c/fmtlib.c | 51 + min-dgels/base/F2CLIBS/libf2c/fp.h | 28 + min-dgels/base/F2CLIBS/libf2c/ftell64_.c | 52 + min-dgels/base/F2CLIBS/libf2c/ftell_.c | 52 + min-dgels/base/F2CLIBS/libf2c/getarg_.c | 36 + min-dgels/base/F2CLIBS/libf2c/getenv_.c | 62 + min-dgels/base/F2CLIBS/libf2c/h_abs.c | 18 + min-dgels/base/F2CLIBS/libf2c/h_dim.c | 16 + min-dgels/base/F2CLIBS/libf2c/h_dnnt.c | 19 + min-dgels/base/F2CLIBS/libf2c/h_indx.c | 32 + min-dgels/base/F2CLIBS/libf2c/h_len.c | 16 + min-dgels/base/F2CLIBS/libf2c/h_mod.c | 16 + min-dgels/base/F2CLIBS/libf2c/h_nint.c | 19 + min-dgels/base/F2CLIBS/libf2c/h_sign.c | 18 + min-dgels/base/F2CLIBS/libf2c/hl_ge.c | 18 + min-dgels/base/F2CLIBS/libf2c/hl_gt.c | 18 + min-dgels/base/F2CLIBS/libf2c/hl_le.c | 18 + min-dgels/base/F2CLIBS/libf2c/hl_lt.c | 18 + min-dgels/base/F2CLIBS/libf2c/i77vers.c | 343 + min-dgels/base/F2CLIBS/libf2c/i_abs.c | 18 + min-dgels/base/F2CLIBS/libf2c/i_ceiling.c | 36 + min-dgels/base/F2CLIBS/libf2c/i_dim.c | 16 + min-dgels/base/F2CLIBS/libf2c/i_dnnt.c | 19 + min-dgels/base/F2CLIBS/libf2c/i_indx.c | 32 + min-dgels/base/F2CLIBS/libf2c/i_len.c | 16 + min-dgels/base/F2CLIBS/libf2c/i_len_trim.c | 22 + min-dgels/base/F2CLIBS/libf2c/i_mod.c | 16 + min-dgels/base/F2CLIBS/libf2c/i_nint.c | 19 + min-dgels/base/F2CLIBS/libf2c/i_sign.c | 18 + min-dgels/base/F2CLIBS/libf2c/iargc_.c | 17 + min-dgels/base/F2CLIBS/libf2c/iio.c | 159 + min-dgels/base/F2CLIBS/libf2c/ilnw.c | 83 + min-dgels/base/F2CLIBS/libf2c/inquire.c | 117 + min-dgels/base/F2CLIBS/libf2c/l_ge.c | 18 + min-dgels/base/F2CLIBS/libf2c/l_gt.c | 18 + min-dgels/base/F2CLIBS/libf2c/l_le.c | 18 + min-dgels/base/F2CLIBS/libf2c/l_lt.c | 18 + min-dgels/base/F2CLIBS/libf2c/lbitbits.c | 68 + min-dgels/base/F2CLIBS/libf2c/lbitshft.c | 17 + min-dgels/base/F2CLIBS/libf2c/libf2c.lbc | 153 + min-dgels/base/F2CLIBS/libf2c/libf2c.sy | 153 + min-dgels/base/F2CLIBS/libf2c/lio.h | 74 + min-dgels/base/F2CLIBS/libf2c/lread.c | 806 + min-dgels/base/F2CLIBS/libf2c/lwrite.c | 314 + min-dgels/base/F2CLIBS/libf2c/main.c | 148 + min-dgels/base/F2CLIBS/libf2c/math.hvc | 3 + min-dgels/base/F2CLIBS/libf2c/mkfile.plan9 | 162 + min-dgels/base/F2CLIBS/libf2c/open.c | 301 + min-dgels/base/F2CLIBS/libf2c/pow_ci.c | 26 + min-dgels/base/F2CLIBS/libf2c/pow_dd.c | 19 + min-dgels/base/F2CLIBS/libf2c/pow_di.c | 41 + min-dgels/base/F2CLIBS/libf2c/pow_hh.c | 39 + min-dgels/base/F2CLIBS/libf2c/pow_ii.c | 39 + min-dgels/base/F2CLIBS/libf2c/pow_qq.c | 39 + min-dgels/base/F2CLIBS/libf2c/pow_ri.c | 41 + min-dgels/base/F2CLIBS/libf2c/pow_zi.c | 60 + min-dgels/base/F2CLIBS/libf2c/pow_zz.c | 29 + min-dgels/base/F2CLIBS/libf2c/qbitbits.c | 72 + min-dgels/base/F2CLIBS/libf2c/qbitshft.c | 17 + min-dgels/base/F2CLIBS/libf2c/r_abs.c | 18 + min-dgels/base/F2CLIBS/libf2c/r_acos.c | 19 + min-dgels/base/F2CLIBS/libf2c/r_asin.c | 19 + min-dgels/base/F2CLIBS/libf2c/r_atan.c | 19 + min-dgels/base/F2CLIBS/libf2c/r_atn2.c | 19 + min-dgels/base/F2CLIBS/libf2c/r_cnjg.c | 18 + min-dgels/base/F2CLIBS/libf2c/r_cos.c | 19 + min-dgels/base/F2CLIBS/libf2c/r_cosh.c | 19 + min-dgels/base/F2CLIBS/libf2c/r_dim.c | 16 + min-dgels/base/F2CLIBS/libf2c/r_exp.c | 19 + min-dgels/base/F2CLIBS/libf2c/r_imag.c | 16 + min-dgels/base/F2CLIBS/libf2c/r_int.c | 19 + min-dgels/base/F2CLIBS/libf2c/r_lg10.c | 21 + min-dgels/base/F2CLIBS/libf2c/r_log.c | 19 + min-dgels/base/F2CLIBS/libf2c/r_mod.c | 46 + min-dgels/base/F2CLIBS/libf2c/r_nint.c | 20 + min-dgels/base/F2CLIBS/libf2c/r_sign.c | 18 + min-dgels/base/F2CLIBS/libf2c/r_sin.c | 19 + min-dgels/base/F2CLIBS/libf2c/r_sinh.c | 19 + min-dgels/base/F2CLIBS/libf2c/r_sqrt.c | 19 + min-dgels/base/F2CLIBS/libf2c/r_tan.c | 19 + min-dgels/base/F2CLIBS/libf2c/r_tanh.c | 19 + min-dgels/base/F2CLIBS/libf2c/rawio.h | 41 + min-dgels/base/F2CLIBS/libf2c/rdfmt.c | 553 + min-dgels/base/F2CLIBS/libf2c/rewind.c | 30 + min-dgels/base/F2CLIBS/libf2c/rsfe.c | 91 + min-dgels/base/F2CLIBS/libf2c/rsli.c | 109 + min-dgels/base/F2CLIBS/libf2c/rsne.c | 618 + min-dgels/base/F2CLIBS/libf2c/s_cat.c | 86 + min-dgels/base/F2CLIBS/libf2c/s_cmp.c | 50 + min-dgels/base/F2CLIBS/libf2c/s_copy.c | 57 + min-dgels/base/F2CLIBS/libf2c/s_paus.c | 96 + min-dgels/base/F2CLIBS/libf2c/s_rnge.c | 32 + min-dgels/base/F2CLIBS/libf2c/s_stop.c | 48 + min-dgels/base/F2CLIBS/libf2c/scomptry.bat | 5 + min-dgels/base/F2CLIBS/libf2c/sfe.c | 47 + min-dgels/base/F2CLIBS/libf2c/sig_die.c | 51 + min-dgels/base/F2CLIBS/libf2c/signal1.h0 | 35 + min-dgels/base/F2CLIBS/libf2c/signal_.c | 21 + min-dgels/base/F2CLIBS/libf2c/signbit.c | 24 + min-dgels/base/F2CLIBS/libf2c/sue.c | 90 + min-dgels/base/F2CLIBS/libf2c/sysdep1.h0 | 66 + min-dgels/base/F2CLIBS/libf2c/system_.c | 42 + min-dgels/base/F2CLIBS/libf2c/typesize.c | 18 + min-dgels/base/F2CLIBS/libf2c/uio.c | 75 + min-dgels/base/F2CLIBS/libf2c/uninit.c | 377 + min-dgels/base/F2CLIBS/libf2c/util.c | 57 + min-dgels/base/F2CLIBS/libf2c/wref.c | 294 + min-dgels/base/F2CLIBS/libf2c/wrtfmt.c | 377 + min-dgels/base/F2CLIBS/libf2c/wsfe.c | 78 + min-dgels/base/F2CLIBS/libf2c/wsle.c | 42 + min-dgels/base/F2CLIBS/libf2c/wsne.c | 32 + min-dgels/base/F2CLIBS/libf2c/xwsne.c | 77 + min-dgels/base/F2CLIBS/libf2c/z_abs.c | 18 + min-dgels/base/F2CLIBS/libf2c/z_cos.c | 21 + min-dgels/base/F2CLIBS/libf2c/z_div.c | 50 + min-dgels/base/F2CLIBS/libf2c/z_exp.c | 23 + min-dgels/base/F2CLIBS/libf2c/z_log.c | 121 + min-dgels/base/F2CLIBS/libf2c/z_sin.c | 21 + min-dgels/base/F2CLIBS/libf2c/z_sqrt.c | 35 + min-dgels/base/INCLUDE/blaswrap.h | 8 + min-dgels/base/INCLUDE/clapack.h | 7262 +++ min-dgels/base/INCLUDE/f2c.h | 223 + min-dgels/base/Makefile | 25 + min-dgels/base/README.install | 218 + min-dgels/base/SRC/Makefile | 177 + min-dgels/base/SRC/VARIANTS/Makefile | 68 + min-dgels/base/SRC/VARIANTS/README | 84 + .../base/SRC/VARIANTS/cholesky/RL/dpotrf.c | 233 + .../base/SRC/VARIANTS/cholesky/TOP/dpotrf.c | 225 + min-dgels/base/SRC/VARIANTS/lu/CR/dgetrf.c | 222 + min-dgels/base/SRC/VARIANTS/lu/LL/dgetrf.c | 257 + min-dgels/base/SRC/VARIANTS/lu/REC/dgetrf.c | 268 + min-dgels/base/SRC/VARIANTS/qr/LL/dgeqrf.c | 403 + min-dgels/base/SRC/VARIANTS/qr/LL/sceil.c | 44 + min-dgels/base/SRC/chla_transtype.c | 62 + min-dgels/base/SRC/dbdsdc.c | 514 + min-dgels/base/SRC/dbdsqr.c | 918 + min-dgels/base/SRC/ddisna.c | 227 + min-dgels/base/SRC/dgbbrd.c | 566 + min-dgels/base/SRC/dgbcon.c | 284 + min-dgels/base/SRC/dgbequ.c | 320 + min-dgels/base/SRC/dgbequb.c | 347 + min-dgels/base/SRC/dgbrfs.c | 455 + min-dgels/base/SRC/dgbrfsx.c | 687 + min-dgels/base/SRC/dgbsv.c | 176 + min-dgels/base/SRC/dgbsvx.c | 650 + min-dgels/base/SRC/dgbsvxx.c | 745 + min-dgels/base/SRC/dgbtf2.c | 262 + min-dgels/base/SRC/dgbtrf.c | 588 + min-dgels/base/SRC/dgbtrs.c | 244 + min-dgels/base/SRC/dgebak.c | 237 + min-dgels/base/SRC/dgebal.c | 402 + min-dgels/base/SRC/dgebd2.c | 304 + min-dgels/base/SRC/dgebrd.c | 336 + min-dgels/base/SRC/dgecon.c | 226 + min-dgels/base/SRC/dgeequ.c | 296 + min-dgels/base/SRC/dgeequb.c | 324 + min-dgels/base/SRC/dgees.c | 549 + min-dgels/base/SRC/dgeesx.c | 649 + min-dgels/base/SRC/dgeev.c | 566 + min-dgels/base/SRC/dgeevx.c | 703 + min-dgels/base/SRC/dgegs.c | 548 + min-dgels/base/SRC/dgegv.c | 842 + min-dgels/base/SRC/dgehd2.c | 191 + min-dgels/base/SRC/dgehrd.c | 342 + min-dgels/base/SRC/dgejsv.c | 2218 + min-dgels/base/SRC/dgelq2.c | 157 + min-dgels/base/SRC/dgelqf.c | 251 + min-dgels/base/SRC/dgels.c | 515 + min-dgels/base/SRC/dgelsd.c | 693 + min-dgels/base/SRC/dgelss.c | 828 + min-dgels/base/SRC/dgelsx.c | 438 + min-dgels/base/SRC/dgelsy.c | 495 + min-dgels/base/SRC/dgeql2.c | 159 + min-dgels/base/SRC/dgeqlf.c | 270 + min-dgels/base/SRC/dgeqp3.c | 358 + min-dgels/base/SRC/dgeqpf.c | 304 + min-dgels/base/SRC/dgeqr2.c | 161 + min-dgels/base/SRC/dgeqrf.c | 252 + min-dgels/base/SRC/dgerfs.c | 424 + min-dgels/base/SRC/dgerfsx.c | 666 + min-dgels/base/SRC/dgerq2.c | 155 + min-dgels/base/SRC/dgerqf.c | 269 + min-dgels/base/SRC/dgesc2.c | 176 + min-dgels/base/SRC/dgesdd.c | 1609 + min-dgels/base/SRC/dgesv.c | 138 + min-dgels/base/SRC/dgesvd.c | 4050 ++ min-dgels/base/SRC/dgesvj.c | 1796 + min-dgels/base/SRC/dgesvx.c | 587 + min-dgels/base/SRC/dgesvxx.c | 713 + min-dgels/base/SRC/dgetc2.c | 199 + min-dgels/base/SRC/dgetf2.c | 193 + min-dgels/base/SRC/dgetrf.c | 219 + min-dgels/base/SRC/dgetri.c | 264 + min-dgels/base/SRC/dgetrs.c | 186 + min-dgels/base/SRC/dggbak.c | 276 + min-dgels/base/SRC/dggbal.c | 627 + min-dgels/base/SRC/dgges.c | 692 + min-dgels/base/SRC/dggesx.c | 818 + min-dgels/base/SRC/dggev.c | 641 + min-dgels/base/SRC/dggevx.c | 885 + min-dgels/base/SRC/dggglm.c | 331 + min-dgels/base/SRC/dgghrd.c | 329 + min-dgels/base/SRC/dgglse.c | 340 + min-dgels/base/SRC/dggqrf.c | 267 + min-dgels/base/SRC/dggrqf.c | 268 + min-dgels/base/SRC/dggsvd.c | 405 + min-dgels/base/SRC/dggsvp.c | 512 + min-dgels/base/SRC/dgsvj0.c | 1159 + min-dgels/base/SRC/dgsvj1.c | 798 + min-dgels/base/SRC/dgtcon.c | 209 + min-dgels/base/SRC/dgtrfs.c | 451 + min-dgels/base/SRC/dgtsv.c | 315 + min-dgels/base/SRC/dgtsvx.c | 349 + min-dgels/base/SRC/dgttrf.c | 203 + min-dgels/base/SRC/dgttrs.c | 189 + min-dgels/base/SRC/dgtts2.c | 261 + min-dgels/base/SRC/dhgeqz.c | 1498 + min-dgels/base/SRC/dhsein.c | 491 + min-dgels/base/SRC/dhseqr.c | 487 + min-dgels/base/SRC/disnan.c | 52 + min-dgels/base/SRC/dla_gbamv.c | 316 + min-dgels/base/SRC/dla_gbrcond.c | 345 + min-dgels/base/SRC/dla_gbrfsx_extended.c | 630 + min-dgels/base/SRC/dla_gbrpvgrw.c | 136 + min-dgels/base/SRC/dla_geamv.c | 293 + min-dgels/base/SRC/dla_gercond.c | 299 + min-dgels/base/SRC/dla_gerfsx_extended.c | 622 + min-dgels/base/SRC/dla_lin_berr.c | 124 + min-dgels/base/SRC/dla_porcond.c | 309 + min-dgels/base/SRC/dla_porfsx_extended.c | 602 + min-dgels/base/SRC/dla_porpvgrw.c | 197 + min-dgels/base/SRC/dla_rpvgrw.c | 117 + min-dgels/base/SRC/dla_syamv.c | 299 + min-dgels/base/SRC/dla_syrcond.c | 322 + min-dgels/base/SRC/dla_syrfsx_extended.c | 608 + min-dgels/base/SRC/dla_syrpvgrw.c | 330 + min-dgels/base/SRC/dla_wwaddw.c | 80 + min-dgels/base/SRC/dlabad.c | 72 + min-dgels/base/SRC/dlabrd.c | 434 + min-dgels/base/SRC/dlacn2.c | 267 + min-dgels/base/SRC/dlacon.c | 258 + min-dgels/base/SRC/dlacpy.c | 125 + min-dgels/base/SRC/dladiv.c | 78 + min-dgels/base/SRC/dlae2.c | 142 + min-dgels/base/SRC/dlaebz.c | 640 + min-dgels/base/SRC/dlaed0.c | 440 + min-dgels/base/SRC/dlaed1.c | 249 + min-dgels/base/SRC/dlaed2.c | 532 + min-dgels/base/SRC/dlaed3.c | 338 + min-dgels/base/SRC/dlaed4.c | 954 + min-dgels/base/SRC/dlaed5.c | 148 + min-dgels/base/SRC/dlaed6.c | 374 + min-dgels/base/SRC/dlaed7.c | 354 + min-dgels/base/SRC/dlaed8.c | 475 + min-dgels/base/SRC/dlaed9.c | 274 + min-dgels/base/SRC/dlaeda.c | 287 + min-dgels/base/SRC/dlaein.c | 677 + min-dgels/base/SRC/dlaev2.c | 188 + min-dgels/base/SRC/dlaexc.c | 459 + min-dgels/base/SRC/dlag2.c | 356 + min-dgels/base/SRC/dlag2s.c | 115 + min-dgels/base/SRC/dlags2.c | 292 + min-dgels/base/SRC/dlagtf.c | 224 + min-dgels/base/SRC/dlagtm.c | 254 + min-dgels/base/SRC/dlagts.c | 351 + min-dgels/base/SRC/dlagv2.c | 351 + min-dgels/base/SRC/dlahqr.c | 631 + min-dgels/base/SRC/dlahr2.c | 315 + min-dgels/base/SRC/dlahrd.c | 285 + min-dgels/base/SRC/dlaic1.c | 326 + min-dgels/base/SRC/dlaisnan.c | 58 + min-dgels/base/SRC/dlaln2.c | 575 + min-dgels/base/SRC/dlals0.c | 473 + min-dgels/base/SRC/dlalsa.c | 456 + min-dgels/base/SRC/dlalsd.c | 529 + min-dgels/base/SRC/dlamrg.c | 131 + min-dgels/base/SRC/dlaneg.c | 218 + min-dgels/base/SRC/dlangb.c | 226 + min-dgels/base/SRC/dlange.c | 199 + min-dgels/base/SRC/dlangt.c | 195 + min-dgels/base/SRC/dlanhs.c | 205 + min-dgels/base/SRC/dlansb.c | 263 + min-dgels/base/SRC/dlansf.c | 1012 + min-dgels/base/SRC/dlansp.c | 263 + min-dgels/base/SRC/dlanst.c | 166 + min-dgels/base/SRC/dlansy.c | 239 + min-dgels/base/SRC/dlantb.c | 434 + min-dgels/base/SRC/dlantp.c | 391 + min-dgels/base/SRC/dlantr.c | 398 + min-dgels/base/SRC/dlanv2.c | 235 + min-dgels/base/SRC/dlapll.c | 127 + min-dgels/base/SRC/dlapmt.c | 178 + min-dgels/base/SRC/dlapy2.c | 73 + min-dgels/base/SRC/dlapy3.c | 83 + min-dgels/base/SRC/dlaqgb.c | 216 + min-dgels/base/SRC/dlaqge.c | 188 + min-dgels/base/SRC/dlaqp2.c | 237 + min-dgels/base/SRC/dlaqps.c | 345 + min-dgels/base/SRC/dlaqr0.c | 758 + min-dgels/base/SRC/dlaqr1.c | 127 + min-dgels/base/SRC/dlaqr2.c | 698 + min-dgels/base/SRC/dlaqr3.c | 715 + min-dgels/base/SRC/dlaqr4.c | 754 + min-dgels/base/SRC/dlaqr5.c | 1025 + min-dgels/base/SRC/dlaqsb.c | 185 + min-dgels/base/SRC/dlaqsp.c | 169 + min-dgels/base/SRC/dlaqsy.c | 172 + min-dgels/base/SRC/dlaqtr.c | 832 + min-dgels/base/SRC/dlar1v.c | 441 + min-dgels/base/SRC/dlar2v.c | 121 + min-dgels/base/SRC/dlarf.c | 193 + min-dgels/base/SRC/dlarfb.c | 774 + min-dgels/base/SRC/dlarfg.c | 170 + min-dgels/base/SRC/dlarfp.c | 192 + min-dgels/base/SRC/dlarft.c | 325 + min-dgels/base/SRC/dlarfx.c | 730 + min-dgels/base/SRC/dlargv.c | 130 + min-dgels/base/SRC/dlarnv.c | 146 + min-dgels/base/SRC/dlarra.c | 156 + min-dgels/base/SRC/dlarrb.c | 350 + min-dgels/base/SRC/dlarrc.c | 183 + min-dgels/base/SRC/dlarrd.c | 793 + min-dgels/base/SRC/dlarre.c | 861 + min-dgels/base/SRC/dlarrf.c | 423 + min-dgels/base/SRC/dlarrj.c | 338 + min-dgels/base/SRC/dlarrk.c | 193 + min-dgels/base/SRC/dlarrr.c | 176 + min-dgels/base/SRC/dlarrv.c | 988 + min-dgels/base/SRC/dlarscl2.c | 90 + min-dgels/base/SRC/dlartg.c | 190 + min-dgels/base/SRC/dlartv.c | 106 + min-dgels/base/SRC/dlaruv.c | 192 + min-dgels/base/SRC/dlarz.c | 194 + min-dgels/base/SRC/dlarzb.c | 288 + min-dgels/base/SRC/dlarzt.c | 229 + min-dgels/base/SRC/dlas2.c | 144 + min-dgels/base/SRC/dlascl.c | 354 + min-dgels/base/SRC/dlascl2.c | 90 + min-dgels/base/SRC/dlasd0.c | 291 + min-dgels/base/SRC/dlasd1.c | 288 + min-dgels/base/SRC/dlasd2.c | 609 + min-dgels/base/SRC/dlasd3.c | 452 + min-dgels/base/SRC/dlasd4.c | 1010 + min-dgels/base/SRC/dlasd5.c | 189 + min-dgels/base/SRC/dlasd6.c | 367 + min-dgels/base/SRC/dlasd7.c | 518 + min-dgels/base/SRC/dlasd8.c | 326 + min-dgels/base/SRC/dlasda.c | 488 + min-dgels/base/SRC/dlasdq.c | 380 + min-dgels/base/SRC/dlasdt.c | 136 + min-dgels/base/SRC/dlaset.c | 152 + min-dgels/base/SRC/dlasq1.c | 219 + min-dgels/base/SRC/dlasq2.c | 602 + min-dgels/base/SRC/dlasq3.c | 350 + min-dgels/base/SRC/dlasq4.c | 403 + min-dgels/base/SRC/dlasq5.c | 240 + min-dgels/base/SRC/dlasq6.c | 212 + min-dgels/base/SRC/dlasr.c | 453 + min-dgels/base/SRC/dlasrt.c | 286 + min-dgels/base/SRC/dlassq.c | 116 + min-dgels/base/SRC/dlasv2.c | 274 + min-dgels/base/SRC/dlaswp.c | 158 + min-dgels/base/SRC/dlasy2.c | 478 + min-dgels/base/SRC/dlasyf.c | 721 + min-dgels/base/SRC/dlat2s.c | 137 + min-dgels/base/SRC/dlatbs.c | 850 + min-dgels/base/SRC/dlatdf.c | 303 + min-dgels/base/SRC/dlatps.c | 824 + min-dgels/base/SRC/dlatrd.c | 355 + min-dgels/base/SRC/dlatrs.c | 815 + min-dgels/base/SRC/dlatrz.c | 163 + min-dgels/base/SRC/dlatzm.c | 193 + min-dgels/base/SRC/dlauu2.c | 183 + min-dgels/base/SRC/dlauum.c | 217 + min-dgels/base/SRC/dopgtr.c | 210 + min-dgels/base/SRC/dopmtr.c | 296 + min-dgels/base/SRC/dorg2l.c | 173 + min-dgels/base/SRC/dorg2r.c | 175 + min-dgels/base/SRC/dorgbr.c | 299 + min-dgels/base/SRC/dorghr.c | 216 + min-dgels/base/SRC/dorgl2.c | 175 + min-dgels/base/SRC/dorglq.c | 280 + min-dgels/base/SRC/dorgql.c | 289 + min-dgels/base/SRC/dorgqr.c | 281 + min-dgels/base/SRC/dorgr2.c | 174 + min-dgels/base/SRC/dorgrq.c | 289 + min-dgels/base/SRC/dorgtr.c | 250 + min-dgels/base/SRC/dorm2l.c | 231 + min-dgels/base/SRC/dorm2r.c | 235 + min-dgels/base/SRC/dormbr.c | 360 + min-dgels/base/SRC/dormhr.c | 257 + min-dgels/base/SRC/dorml2.c | 231 + min-dgels/base/SRC/dormlq.c | 334 + min-dgels/base/SRC/dormql.c | 327 + min-dgels/base/SRC/dormqr.c | 327 + min-dgels/base/SRC/dormr2.c | 227 + min-dgels/base/SRC/dormr3.c | 241 + min-dgels/base/SRC/dormrq.c | 335 + min-dgels/base/SRC/dormrz.c | 362 + min-dgels/base/SRC/dormtr.c | 295 + min-dgels/base/SRC/dpbcon.c | 233 + min-dgels/base/SRC/dpbequ.c | 203 + min-dgels/base/SRC/dpbrfs.c | 438 + min-dgels/base/SRC/dpbstf.c | 312 + min-dgels/base/SRC/dpbsv.c | 182 + min-dgels/base/SRC/dpbsvx.c | 515 + min-dgels/base/SRC/dpbtf2.c | 244 + min-dgels/base/SRC/dpbtrf.c | 471 + min-dgels/base/SRC/dpbtrs.c | 184 + min-dgels/base/SRC/dpftrf.c | 452 + min-dgels/base/SRC/dpftri.c | 403 + min-dgels/base/SRC/dpftrs.c | 240 + min-dgels/base/SRC/dpocon.c | 220 + min-dgels/base/SRC/dpoequ.c | 174 + min-dgels/base/SRC/dpoequb.c | 188 + min-dgels/base/SRC/dporfs.c | 422 + min-dgels/base/SRC/dporfsx.c | 622 + min-dgels/base/SRC/dposv.c | 151 + min-dgels/base/SRC/dposvx.c | 450 + min-dgels/base/SRC/dposvxx.c | 611 + min-dgels/base/SRC/dpotf2.c | 224 + min-dgels/base/SRC/dpotrf.c | 245 + min-dgels/base/SRC/dpotri.c | 125 + min-dgels/base/SRC/dpotrs.c | 166 + min-dgels/base/SRC/dppcon.c | 215 + min-dgels/base/SRC/dppequ.c | 208 + min-dgels/base/SRC/dpprfs.c | 413 + min-dgels/base/SRC/dppsv.c | 161 + min-dgels/base/SRC/dppsvx.c | 455 + min-dgels/base/SRC/dpptrf.c | 223 + min-dgels/base/SRC/dpptri.c | 173 + min-dgels/base/SRC/dpptrs.c | 170 + min-dgels/base/SRC/dpstf2.c | 395 + min-dgels/base/SRC/dpstrf.c | 471 + min-dgels/base/SRC/dptcon.c | 184 + min-dgels/base/SRC/dpteqr.c | 244 + min-dgels/base/SRC/dptrfs.c | 365 + min-dgels/base/SRC/dptsv.c | 130 + min-dgels/base/SRC/dptsvx.c | 283 + min-dgels/base/SRC/dpttrf.c | 181 + min-dgels/base/SRC/dpttrs.c | 156 + min-dgels/base/SRC/dptts2.c | 131 + min-dgels/base/SRC/drscl.c | 134 + min-dgels/base/SRC/dsbev.c | 268 + min-dgels/base/SRC/dsbevd.c | 338 + min-dgels/base/SRC/dsbevx.c | 520 + min-dgels/base/SRC/dsbgst.c | 1755 + min-dgels/base/SRC/dsbgv.c | 234 + min-dgels/base/SRC/dsbgvd.c | 327 + min-dgels/base/SRC/dsbgvx.c | 466 + min-dgels/base/SRC/dsbtrd.c | 713 + min-dgels/base/SRC/dsfrk.c | 517 + min-dgels/base/SRC/dsgesv.c | 416 + min-dgels/base/SRC/dspcon.c | 198 + min-dgels/base/SRC/dspev.c | 246 + min-dgels/base/SRC/dspevd.c | 314 + min-dgels/base/SRC/dspevx.c | 467 + min-dgels/base/SRC/dspgst.c | 284 + min-dgels/base/SRC/dspgv.c | 243 + min-dgels/base/SRC/dspgvd.c | 334 + min-dgels/base/SRC/dspgvx.c | 341 + min-dgels/base/SRC/dsposv.c | 418 + min-dgels/base/SRC/dsprfs.c | 421 + min-dgels/base/SRC/dspsv.c | 176 + min-dgels/base/SRC/dspsvx.c | 329 + min-dgels/base/SRC/dsptrd.c | 277 + min-dgels/base/SRC/dsptrf.c | 628 + min-dgels/base/SRC/dsptri.c | 411 + min-dgels/base/SRC/dsptrs.c | 456 + min-dgels/base/SRC/dstebz.c | 774 + min-dgels/base/SRC/dstedc.c | 488 + min-dgels/base/SRC/dstegr.c | 211 + min-dgels/base/SRC/dstein.c | 452 + min-dgels/base/SRC/dstemr.c | 728 + min-dgels/base/SRC/dsteqr.c | 621 + min-dgels/base/SRC/dsterf.c | 461 + min-dgels/base/SRC/dstev.c | 212 + min-dgels/base/SRC/dstevd.c | 273 + min-dgels/base/SRC/dstevr.c | 550 + min-dgels/base/SRC/dstevx.c | 432 + min-dgels/base/SRC/dsycon.c | 204 + min-dgels/base/SRC/dsyequb.c | 333 + min-dgels/base/SRC/dsyev.c | 283 + min-dgels/base/SRC/dsyevd.c | 353 + min-dgels/base/SRC/dsyevr.c | 652 + min-dgels/base/SRC/dsyevx.c | 536 + min-dgels/base/SRC/dsygs2.c | 299 + min-dgels/base/SRC/dsygst.c | 347 + min-dgels/base/SRC/dsygv.c | 285 + min-dgels/base/SRC/dsygvd.c | 338 + min-dgels/base/SRC/dsygvx.c | 396 + min-dgels/base/SRC/dsyrfs.c | 429 + min-dgels/base/SRC/dsyrfsx.c | 629 + min-dgels/base/SRC/dsysv.c | 215 + min-dgels/base/SRC/dsysvx.c | 370 + min-dgels/base/SRC/dsysvxx.c | 631 + min-dgels/base/SRC/dsytd2.c | 306 + min-dgels/base/SRC/dsytf2.c | 608 + min-dgels/base/SRC/dsytrd.c | 360 + min-dgels/base/SRC/dsytrf.c | 341 + min-dgels/base/SRC/dsytri.c | 396 + min-dgels/base/SRC/dsytrs.c | 453 + min-dgels/base/SRC/dtbcon.c | 247 + min-dgels/base/SRC/dtbrfs.c | 519 + min-dgels/base/SRC/dtbtrs.c | 204 + min-dgels/base/SRC/dtfsm.c | 976 + min-dgels/base/SRC/dtftri.c | 474 + min-dgels/base/SRC/dtfttp.c | 514 + min-dgels/base/SRC/dtfttr.c | 491 + min-dgels/base/SRC/dtgevc.c | 1418 + min-dgels/base/SRC/dtgex2.c | 711 + min-dgels/base/SRC/dtgexc.c | 514 + min-dgels/base/SRC/dtgsen.c | 836 + min-dgels/base/SRC/dtgsja.c | 625 + min-dgels/base/SRC/dtgsna.c | 695 + min-dgels/base/SRC/dtgsy2.c | 1113 + min-dgels/base/SRC/dtgsyl.c | 692 + min-dgels/base/SRC/dtpcon.c | 233 + min-dgels/base/SRC/dtprfs.c | 496 + min-dgels/base/SRC/dtptri.c | 219 + min-dgels/base/SRC/dtptrs.c | 193 + min-dgels/base/SRC/dtpttf.c | 499 + min-dgels/base/SRC/dtpttr.c | 144 + min-dgels/base/SRC/dtrcon.c | 241 + min-dgels/base/SRC/dtrevc.c | 1228 + min-dgels/base/SRC/dtrexc.c | 403 + min-dgels/base/SRC/dtrrfs.c | 493 + min-dgels/base/SRC/dtrsen.c | 530 + min-dgels/base/SRC/dtrsna.c | 606 + min-dgels/base/SRC/dtrsyl.c | 1319 + min-dgels/base/SRC/dtrti2.c | 183 + min-dgels/base/SRC/dtrtri.c | 242 + min-dgels/base/SRC/dtrtrs.c | 183 + min-dgels/base/SRC/dtrttf.c | 489 + min-dgels/base/SRC/dtrttp.c | 144 + min-dgels/base/SRC/dtzrqf.c | 221 + min-dgels/base/SRC/dtzrzf.c | 308 + min-dgels/base/SRC/dzsum1.c | 114 + min-dgels/base/SRC/icmax1.c | 127 + min-dgels/base/SRC/ieeeck.c | 166 + min-dgels/base/SRC/ilaclc.c | 94 + min-dgels/base/SRC/ilaclr.c | 96 + min-dgels/base/SRC/iladiag.c | 65 + min-dgels/base/SRC/iladlc.c | 88 + min-dgels/base/SRC/iladlr.c | 90 + min-dgels/base/SRC/ilaenv.c | 654 + min-dgels/base/SRC/ilaprec.c | 72 + min-dgels/base/SRC/ilaslc.c | 88 + min-dgels/base/SRC/ilaslr.c | 90 + min-dgels/base/SRC/ilatrans.c | 69 + min-dgels/base/SRC/ilauplo.c | 65 + min-dgels/base/SRC/ilaver.c | 47 + min-dgels/base/SRC/ilazlc.c | 94 + min-dgels/base/SRC/ilazlr.c | 96 + min-dgels/base/SRC/iparmq.c | 282 + min-dgels/base/SRC/izmax1.c | 127 + min-dgels/base/SRC/lsamen.c | 98 + min-dgels/base/SRC/maxloc.c | 71 + min-dgels/base/SRC/xerbla.c | 65 + min-dgels/base/SRC/xerbla_array.c | 102 + min-dgels/base/make.inc | 60 + mpi/GNUmakefile.in | 48 + mpi/Makefile.am | 39 + mpi/Makefile.in | 986 + mpi/dev/starpu_mpi_comm_check.sh | 108 + mpi/examples/Makefile.am | 630 + mpi/examples/Makefile.in | 3407 + mpi/examples/benchs/abstract_sendrecv_bench.c | 221 + mpi/examples/benchs/abstract_sendrecv_bench.h | 19 + mpi/examples/benchs/bcast_bench.c | 351 + mpi/examples/benchs/bench_helper.c | 62 + mpi/examples/benchs/bench_helper.h | 39 + mpi/examples/benchs/burst.c | 71 + mpi/examples/benchs/burst_gemm.c | 211 + mpi/examples/benchs/burst_helper.c | 250 + mpi/examples/benchs/burst_helper.h | 29 + mpi/examples/benchs/gemm_helper.c | 336 + mpi/examples/benchs/gemm_helper.h | 35 + .../benchs/recv_wait_finalize_bench.c | 294 + mpi/examples/benchs/sendrecv_bench.c | 126 + mpi/examples/benchs/sendrecv_gemm_bench.c | 203 + .../benchs/sendrecv_parallel_tasks_bench.c | 238 + mpi/examples/cache/cache.c | 112 + mpi/examples/cache/cache_disable.c | 99 + mpi/examples/cg/cg.c | 424 + mpi/examples/comm/comm.c | 158 + mpi/examples/comm/group.c | 132 + mpi/examples/comm/mix_comm.c | 197 + mpi/examples/complex/mpi_complex.c | 142 + mpi/examples/filters/filter.c | 171 + mpi/examples/helper.h | 39 + mpi/examples/loader.c | 505 + .../matrix_decomposition/mpi_cholesky.c | 87 + .../matrix_decomposition/mpi_cholesky.h | 30 + .../mpi_cholesky_codelets.c | 758 + .../mpi_cholesky_codelets.h | 29 + .../mpi_cholesky_distributed.c | 70 + .../mpi_cholesky_kernels.c | 338 + .../mpi_cholesky_kernels.h | 34 + .../mpi_cholesky_models.c | 45 + .../mpi_cholesky_models.h | 25 + .../mpi_decomposition_matrix.c | 130 + .../mpi_decomposition_matrix.h | 29 + .../mpi_decomposition_params.c | 165 + .../mpi_decomposition_params.h | 45 + mpi/examples/matrix_mult/mm.c | 385 + mpi/examples/matrix_mult/mm_2dbc.c | 401 + mpi/examples/mpi_lu/mpi_lu-double.h | 44 + mpi/examples/mpi_lu/mpi_lu-float.h | 44 + mpi/examples/mpi_lu/pdlu.c | 18 + mpi/examples/mpi_lu/pdlu_implicit.c | 19 + mpi/examples/mpi_lu/pdlu_kernels.c | 18 + mpi/examples/mpi_lu/plu_example.c | 697 + mpi/examples/mpi_lu/plu_example_double.c | 18 + mpi/examples/mpi_lu/plu_example_float.c | 18 + mpi/examples/mpi_lu/plu_implicit_example.c | 400 + .../mpi_lu/plu_implicit_example_double.c | 19 + .../mpi_lu/plu_implicit_example_float.c | 19 + mpi/examples/mpi_lu/plu_outofcore_example.c | 515 + .../mpi_lu/plu_outofcore_example_double.c | 20 + .../mpi_lu/plu_outofcore_example_float.c | 20 + mpi/examples/mpi_lu/plu_solve.c | 396 + mpi/examples/mpi_lu/plu_solve_double.c | 18 + mpi/examples/mpi_lu/plu_solve_float.c | 18 + mpi/examples/mpi_lu/pslu.c | 18 + mpi/examples/mpi_lu/pslu_implicit.c | 19 + mpi/examples/mpi_lu/pslu_kernels.c | 18 + mpi/examples/mpi_lu/pxlu.c | 919 + mpi/examples/mpi_lu/pxlu.h | 67 + mpi/examples/mpi_lu/pxlu_implicit.c | 189 + mpi/examples/mpi_lu/pxlu_kernels.c | 491 + mpi/examples/mpi_lu/pxlu_kernels.h | 31 + mpi/examples/mpi_redux/mpi_redux.c | 210 + mpi/examples/mpi_redux/mpi_redux_autowrapup.c | 233 + mpi/examples/mpi_redux/mpi_redux_tree.c | 188 + mpi/examples/native_fortran/fstarpu_mod.f90 | 2697 + .../native_fortran/fstarpu_mpi_mod.f90 | 776 + mpi/examples/native_fortran/nf_basic_ring.f90 | 108 + mpi/examples/native_fortran/nf_mm.f90 | 238 + mpi/examples/native_fortran/nf_mm_2dbc.f90 | 310 + mpi/examples/native_fortran/nf_mm_cl.f90 | 90 + mpi/examples/native_fortran/nf_mm_cl_blas.f90 | 91 + .../native_fortran/nf_mm_task_build.f90 | 247 + mpi/examples/native_fortran/nf_mpi_redux.f90 | 240 + .../native_fortran/nf_mpi_redux_tree.f90 | 228 + mpi/examples/native_fortran/nf_redux_test.f90 | 226 + mpi/examples/perf.sh | 104 + mpi/examples/stencil/stencil5.c | 264 + mpi/examples/stencil/stencil5_lb.c | 286 + mpi/examples/user_datatype/my_interface.c | 393 + mpi/examples/user_datatype/my_interface.h | 82 + mpi/examples/user_datatype/user_datatype.c | 145 + mpi/examples/user_datatype/user_datatype2.c | 107 + .../user_datatype/user_datatype_early.c | 100 + .../user_datatype/user_datatype_interface.c | 105 + mpi/include/fstarpu_mpi_mod.f90 | 776 + mpi/include/starpu_mpi.h | 1026 + mpi/include/starpu_mpi_ft.h | 134 + mpi/include/starpu_mpi_lb.h | 46 + mpi/packages/libstarpumpi.pc.in | 28 + mpi/packages/starpumpi-1.0.pc.in | 28 + mpi/packages/starpumpi-1.1.pc.in | 28 + mpi/packages/starpumpi-1.2.pc.in | 28 + mpi/packages/starpumpi-1.3.pc.in | 28 + mpi/packages/starpumpi-1.4.pc.in | 28 + mpi/src/Makefile.am | 148 + mpi/src/Makefile.in | 1511 + mpi/src/load_balancer/load_balancer.c | 160 + .../policy/data_movements_interface.c | 308 + .../policy/data_movements_interface.h | 49 + .../policy/load_balancer_policy.h | 54 + .../policy/load_data_interface.c | 276 + .../policy/load_data_interface.h | 71 + .../policy/load_heat_propagation.c | 666 + mpi/src/mpi/starpu_mpi_comm.c | 234 + mpi/src/mpi/starpu_mpi_comm.h | 47 + mpi/src/mpi/starpu_mpi_driver.h | 39 + mpi/src/mpi/starpu_mpi_early_data.c | 191 + mpi/src/mpi/starpu_mpi_early_data.h | 74 + mpi/src/mpi/starpu_mpi_early_request.c | 173 + mpi/src/mpi/starpu_mpi_early_request.h | 58 + mpi/src/mpi/starpu_mpi_mpi.c | 1787 + mpi/src/mpi/starpu_mpi_mpi.h | 57 + mpi/src/mpi/starpu_mpi_mpi_backend.c | 137 + mpi/src/mpi/starpu_mpi_mpi_backend.h | 86 + mpi/src/mpi/starpu_mpi_sync_data.c | 153 + mpi/src/mpi/starpu_mpi_sync_data.h | 48 + mpi/src/mpi/starpu_mpi_tag.c | 128 + mpi/src/mpi/starpu_mpi_tag.h | 45 + .../starpu_mpi_checkpoint.c | 276 + .../starpu_mpi_checkpoint.h | 70 + .../starpu_mpi_checkpoint_package.c | 177 + .../starpu_mpi_checkpoint_package.h | 51 + .../starpu_mpi_checkpoint_template.c | 554 + .../starpu_mpi_checkpoint_template.h | 227 + .../starpu_mpi_checkpoint_tracker.c | 250 + .../starpu_mpi_checkpoint_tracker.h | 50 + mpi/src/mpi_failure_tolerance/starpu_mpi_ft.c | 57 + mpi/src/mpi_failure_tolerance/starpu_mpi_ft.h | 32 + .../starpu_mpi_ft_service_comms.c | 371 + .../starpu_mpi_ft_service_comms.h | 37 + .../starpu_mpi_ft_stats.c | 41 + .../starpu_mpi_ft_stats.h | 288 + mpi/src/nmad/starpu_mpi_nmad.c | 857 + mpi/src/nmad/starpu_mpi_nmad.h | 58 + mpi/src/nmad/starpu_mpi_nmad_backend.c | 122 + mpi/src/nmad/starpu_mpi_nmad_backend.h | 60 + mpi/src/nmad/starpu_mpi_nmad_coop.c | 177 + mpi/src/nmad/starpu_mpi_nmad_coop.h | 41 + .../nmad/starpu_mpi_nmad_unknown_datatype.c | 156 + .../nmad/starpu_mpi_nmad_unknown_datatype.h | 46 + mpi/src/starpu_mpi.c | 742 + mpi/src/starpu_mpi_cache.c | 432 + mpi/src/starpu_mpi_cache.h | 41 + mpi/src/starpu_mpi_cache_stats.c | 61 + mpi/src/starpu_mpi_cache_stats.h | 43 + mpi/src/starpu_mpi_collective.c | 169 + mpi/src/starpu_mpi_coop_sends.c | 369 + mpi/src/starpu_mpi_datatype.c | 482 + mpi/src/starpu_mpi_datatype.h | 42 + mpi/src/starpu_mpi_fortran.c | 326 + mpi/src/starpu_mpi_fxt.c | 137 + mpi/src/starpu_mpi_fxt.h | 208 + mpi/src/starpu_mpi_helper.c | 123 + mpi/src/starpu_mpi_init.c | 456 + mpi/src/starpu_mpi_init.h | 36 + mpi/src/starpu_mpi_private.c | 103 + mpi/src/starpu_mpi_private.h | 403 + mpi/src/starpu_mpi_req.c | 115 + mpi/src/starpu_mpi_select_node.c | 126 + mpi/src/starpu_mpi_select_node.h | 38 + mpi/src/starpu_mpi_stats.c | 210 + mpi/src/starpu_mpi_stats.h | 43 + mpi/src/starpu_mpi_tags.c | 141 + mpi/src/starpu_mpi_task_insert.c | 1224 + mpi/src/starpu_mpi_task_insert.h | 37 + mpi/src/starpu_mpi_task_insert_fortran.c | 638 + mpi/tests/Makefile.am | 301 + mpi/tests/Makefile.in | 3325 + mpi/tests/attr.c | 38 + mpi/tests/block_interface.c | 161 + mpi/tests/block_interface_pinned.c | 162 + mpi/tests/broadcast.c | 113 + mpi/tests/callback.c | 132 + mpi/tests/checkpoints.c | 204 + mpi/tests/coop.c | 105 + mpi/tests/coop_acknowledgement.c | 138 + mpi/tests/coop_cache.c | 158 + mpi/tests/coop_chained_sends.c | 140 + mpi/tests/coop_datatype.c | 311 + mpi/tests/coop_insert_task.c | 133 + mpi/tests/coop_large.c | 120 + mpi/tests/coop_many.c | 130 + mpi/tests/coop_recv_not_yet_posted.c | 112 + mpi/tests/coop_recv_wait_finalize.c | 125 + mpi/tests/coop_user_defined_datatype.c | 184 + mpi/tests/coop_without_task.c | 88 + mpi/tests/coop_wrong_order.c | 131 + mpi/tests/data_cpy.c | 91 + mpi/tests/datatypes.c | 626 + mpi/tests/display_bindings.c | 44 + mpi/tests/driver.c | 160 + mpi/tests/early_request.c | 282 + mpi/tests/early_stuff.c | 136 + mpi/tests/gather.c | 84 + mpi/tests/gather2.c | 110 + mpi/tests/helper.h | 51 + mpi/tests/insert_task.c | 159 + mpi/tests/insert_task_block.c | 177 + mpi/tests/insert_task_can_execute.c | 98 + mpi/tests/insert_task_compute.c | 256 + mpi/tests/insert_task_count.c | 134 + mpi/tests/insert_task_dyn_handles.c | 170 + mpi/tests/insert_task_node_choice.c | 123 + mpi/tests/insert_task_owner.c | 199 + mpi/tests/insert_task_owner2.c | 150 + mpi/tests/insert_task_owner_data.c | 129 + mpi/tests/insert_task_recv_cache.c | 187 + mpi/tests/insert_task_sent_cache.c | 174 + mpi/tests/insert_task_seq.c | 156 + mpi/tests/insert_task_tags.c | 96 + mpi/tests/load_balancer.c | 74 + mpi/tests/loader.c | 505 + mpi/tests/matrix.c | 156 + mpi/tests/matrix2.c | 149 + mpi/tests/mpi_barrier.c | 37 + mpi/tests/mpi_data_cpy.c | 98 + mpi/tests/mpi_detached_tag.c | 91 + mpi/tests/mpi_earlyrecv.c | 140 + mpi/tests/mpi_earlyrecv2.c | 287 + mpi/tests/mpi_earlyrecv2_sync.c | 257 + mpi/tests/mpi_irecv.c | 89 + mpi/tests/mpi_irecv_detached.c | 108 + mpi/tests/mpi_isend.c | 90 + mpi/tests/mpi_isend_detached.c | 113 + mpi/tests/mpi_reduction.c | 220 + mpi/tests/mpi_reduction_kernels.c | 76 + mpi/tests/mpi_redux.c | 116 + mpi/tests/mpi_scatter_gather.c | 216 + mpi/tests/mpi_task_submit.c | 118 + mpi/tests/mpi_test.c | 99 + mpi/tests/multiple_send.c | 116 + mpi/tests/ndim_interface.c | 180 + mpi/tests/nothing.c | 70 + mpi/tests/pingpong.c | 198 + mpi/tests/policy_register.c | 135 + mpi/tests/policy_register_many.c | 63 + mpi/tests/policy_register_toomany.c | 66 + mpi/tests/policy_selection.c | 183 + mpi/tests/policy_selection2.c | 132 + mpi/tests/policy_unregister.c | 51 + mpi/tests/ring.c | 155 + mpi/tests/ring_async.c | 162 + mpi/tests/ring_async_implicit.c | 154 + mpi/tests/ring_kernel.cu | 33 + mpi/tests/ring_kernel_hip.hip | 33 + mpi/tests/ring_sync.c | 159 + mpi/tests/ring_sync_detached.c | 176 + mpi/tests/star.c | 109 + mpi/tests/starpu_redefine.c | 47 + mpi/tests/stats.c | 112 + mpi/tests/sync.c | 113 + mpi/tests/tags_allocate.c | 74 + mpi/tests/tags_checking.c | 166 + mpi/tests/temporary.c | 156 + mpi/tests/user_defined_datatype.c | 202 + mpi/tests/user_defined_datatype_value.h | 176 + mpi/tests/wait_for_all.c | 97 + mpi/tools/Makefile.am | 39 + mpi/tools/Makefile.in | 1091 + mpi/tools/starpu_replay.c | 1198 + mpi/tools/starpu_replay_sched.c | 439 + packages/libstarpu.pc.in | 29 + packages/starpu-1.0.pc.in | 29 + packages/starpu-1.1.pc.in | 29 + packages/starpu-1.2.pc.in | 29 + packages/starpu-1.3.in | 55 + packages/starpu-1.3.pc.in | 29 + packages/starpu-1.4.in | 55 + packages/starpu-1.4.pc.in | 29 + sc_hypervisor/Makefile.am | 27 + sc_hypervisor/Makefile.in | 952 + sc_hypervisor/examples/Makefile.am | 55 + sc_hypervisor/examples/Makefile.in | 1168 + .../app_driven_test/app_driven_test.c | 174 + sc_hypervisor/examples/cholesky/cholesky.h | 173 + .../examples/cholesky/cholesky_implicit.c | 379 + .../examples/cholesky/cholesky_kernels.c | 438 + .../examples/cholesky/cholesky_models.c | 288 + .../resize_hierarchical_ctxs.c | 162 + .../examples/lp_test/lp_resize_test.c | 137 + sc_hypervisor/examples/lp_test/lp_test.c | 137 + .../sched_ctx_utils/sched_ctx_utils.c | 531 + .../sched_ctx_utils/sched_ctx_utils.h | 33 + sc_hypervisor/include/sc_hypervisor.h | 295 + sc_hypervisor/include/sc_hypervisor_config.h | 230 + sc_hypervisor/include/sc_hypervisor_lp.h | 126 + .../include/sc_hypervisor_monitoring.h | 279 + sc_hypervisor/include/sc_hypervisor_policy.h | 227 + sc_hypervisor/src/Makefile.am | 46 + sc_hypervisor/src/Makefile.in | 1139 + .../hypervisor_policies/app_driven_policy.c | 36 + .../src/hypervisor_policies/feft_lp_policy.c | 381 + .../hypervisor_policies/gflops_rate_policy.c | 312 + .../hypervisor_policies/hard_coded_policy.c | 122 + .../src/hypervisor_policies/idle_policy.c | 55 + .../hypervisor_policies/ispeed_lp_policy.c | 258 + .../src/hypervisor_policies/ispeed_policy.c | 195 + .../hypervisor_policies/perf_count_policy.c | 371 + .../src/hypervisor_policies/teft_lp_policy.c | 346 + .../throughput_lp_policy.c | 357 + sc_hypervisor/src/policies_utils/dichotomy.c | 124 + .../src/policies_utils/lp_programs.c | 724 + sc_hypervisor/src/policies_utils/lp_tools.c | 936 + .../src/policies_utils/policy_tools.c | 620 + sc_hypervisor/src/policies_utils/speed.c | 321 + sc_hypervisor/src/policies_utils/task_pool.c | 99 + sc_hypervisor/src/sc_config.c | 257 + sc_hypervisor/src/sc_hypervisor.c | 1712 + sc_hypervisor/src/sc_hypervisor_intern.h | 127 + sc_hypervisor/src/uthash.h | 1024 + socl/Makefile.am | 24 + socl/Makefile.in | 949 + socl/README | 5 + socl/examples/Makefile.am | 61 + socl/examples/Makefile.in | 1726 + socl/examples/basic/basic.c | 253 + socl/examples/basicsplit/basicsplit.c | 271 + socl/examples/clinfo/clinfo.c | 319 + socl/examples/loader.c | 505 + socl/examples/mandelbrot/mandelbrot.c | 543 + socl/examples/mansched/mansched.c | 222 + socl/examples/matmul/matmul.c | 563 + socl/examples/testmap/testmap.c | 262 + socl/src/CL/cl.h | 1239 + socl/src/CL/cl_d3d10.h | 126 + socl/src/CL/cl_d3d11.h | 126 + socl/src/CL/cl_dx9_media_sharing.h | 127 + socl/src/CL/cl_ext.h | 213 + socl/src/CL/cl_gl.h | 165 + socl/src/CL/cl_gl_ext.h | 69 + socl/src/CL/cl_platform.h | 1204 + socl/src/CL/opencl.h | 54 + socl/src/Makefile.am | 136 + socl/src/Makefile.in | 1543 + socl/src/cl_buildprogram.c | 123 + socl/src/cl_createbuffer.c | 150 + socl/src/cl_createcommandqueue.c | 85 + socl/src/cl_createcontext.c | 159 + socl/src/cl_createcontextfromtype.c | 41 + socl/src/cl_createimage2d.c | 33 + socl/src/cl_createimage3d.c | 35 + socl/src/cl_createkernel.c | 211 + socl/src/cl_createkernelsinprogram.c | 28 + socl/src/cl_createprogramwithbinary.c | 34 + socl/src/cl_createprogramwithsource.c | 167 + socl/src/cl_createsampler.c | 30 + socl/src/cl_enqueuebarrier.c | 36 + socl/src/cl_enqueuebarrierwithwaitlist.c | 35 + socl/src/cl_enqueuecopybuffer.c | 121 + socl/src/cl_enqueuecopybuffertoimage.c | 32 + socl/src/cl_enqueuecopyimage.c | 32 + socl/src/cl_enqueuecopyimagetobuffer.c | 32 + socl/src/cl_enqueuemapbuffer.c | 71 + socl/src/cl_enqueuemapimage.c | 38 + socl/src/cl_enqueuemarker.c | 44 + socl/src/cl_enqueuemarkerwithwaitlist.c | 38 + socl/src/cl_enqueuenativekernel.c | 33 + socl/src/cl_enqueuendrangekernel.c | 248 + socl/src/cl_enqueuereadbuffer.c | 124 + socl/src/cl_enqueuereadimage.c | 34 + socl/src/cl_enqueuetask.c | 36 + socl/src/cl_enqueueunmapmemobject.c | 51 + socl/src/cl_enqueuewaitforevents.c | 30 + socl/src/cl_enqueuewritebuffer.c | 149 + socl/src/cl_enqueuewriteimage.c | 34 + socl/src/cl_finish.c | 32 + socl/src/cl_flush.c | 24 + socl/src/cl_getcommandqueueinfo.c | 42 + socl/src/cl_getcontextinfo.c | 41 + socl/src/cl_getdeviceids.c | 96 + socl/src/cl_getdeviceinfo.c | 56 + socl/src/cl_geteventinfo.c | 46 + socl/src/cl_geteventprofilinginfo.c | 39 + socl/src/cl_getextensionfunctionaddress.c | 48 + socl/src/cl_getimageinfo.c | 29 + socl/src/cl_getkernelinfo.c | 43 + socl/src/cl_getkernelworkgroupinfo.c | 34 + socl/src/cl_getmemobjectinfo.c | 44 + socl/src/cl_getplatformids.c | 50 + socl/src/cl_getplatforminfo.c | 50 + socl/src/cl_getprogrambuildinfo.c | 44 + socl/src/cl_getprograminfo.c | 46 + socl/src/cl_getsamplerinfo.c | 29 + socl/src/cl_getsupportedimageformats.c | 29 + socl/src/cl_icdgetplatformidskhr.c | 40 + socl/src/cl_releasecommandqueue.c | 26 + socl/src/cl_releasecontext.c | 29 + socl/src/cl_releaseevent.c | 29 + socl/src/cl_releasekernel.c | 29 + socl/src/cl_releasememobject.c | 26 + socl/src/cl_releaseprogram.c | 29 + socl/src/cl_releasesampler.c | 24 + socl/src/cl_retaincommandqueue.c | 29 + socl/src/cl_retaincontext.c | 29 + socl/src/cl_retainevent.c | 29 + socl/src/cl_retainkernel.c | 29 + socl/src/cl_retainmemobject.c | 29 + socl/src/cl_retainprogram.c | 29 + socl/src/cl_retainsampler.c | 24 + socl/src/cl_setcommandqueueproperty.c | 58 + socl/src/cl_setkernelarg.c | 102 + socl/src/cl_unloadcompiler.c | 24 + socl/src/cl_waitforevents.c | 40 + socl/src/command.c | 347 + socl/src/command.h | 211 + socl/src/command_list.c | 56 + socl/src/command_list.h | 29 + socl/src/command_queue.c | 116 + socl/src/command_queue.h | 29 + socl/src/debug.c | 74 + socl/src/debug.h | 53 + socl/src/event.c | 72 + socl/src/event.h | 36 + socl/src/gc.c | 229 + socl/src/gc.h | 55 + socl/src/getinfo.h | 60 + socl/src/init.c | 144 + socl/src/init.h | 33 + socl/src/mem_objects.c | 101 + socl/src/mem_objects.h | 25 + socl/src/ocl_icd.h | 905 + socl/src/socl.c | 163 + socl/src/socl.h | 789 + socl/src/task.c | 180 + socl/src/task.h | 51 + socl/src/util.c | 69 + socl/src/util.h | 49 + socl/vendors/install/socl.icd | 1 + socl/vendors/install/socl.icd.in | 1 + socl/vendors/nvidia.icd | 1 + socl/vendors/socl.icd.in | 1 + src/Makefile.am | 464 + src/Makefile.in | 3206 + src/common/barrier.c | 92 + src/common/barrier.h | 45 + src/common/barrier_counter.c | 192 + src/common/barrier_counter.h | 63 + src/common/config-src-build.h.in | 2 + src/common/config.h.in | 1166 + src/common/fxt.c | 502 + src/common/fxt.h | 1575 + src/common/graph.c | 493 + src/common/graph.h | 133 + src/common/hash.c | 81 + src/common/inlines.c | 22 + src/common/knobs.c | 918 + src/common/knobs.h | 390 + src/common/list.h | 470 + src/common/prio_list.h | 591 + src/common/rbtree.c | 500 + src/common/rbtree.h | 334 + src/common/rbtree_i.h | 190 + src/common/rwlock.c | 162 + src/common/rwlock.h | 57 + src/common/starpu_spinlock.c | 43 + src/common/starpu_spinlock.h | 135 + src/common/thread.c | 1196 + src/common/thread.h | 172 + src/common/timing.c | 266 + src/common/timing.h | 43 + src/common/uthash.h | 1030 + src/common/utils.c | 769 + src/common/utils.h | 215 + src/core/combined_workers.c | 178 + src/core/combined_workers.h | 29 + src/core/debug.c | 129 + src/core/debug.h | 317 + src/core/dependencies/cg.c | 419 + src/core/dependencies/cg.h | 141 + .../dependencies/data_arbiter_concurrency.c | 855 + src/core/dependencies/data_concurrency.c | 690 + src/core/dependencies/data_concurrency.h | 48 + src/core/dependencies/dependencies.c | 103 + src/core/dependencies/implicit_data_deps.c | 759 + src/core/dependencies/implicit_data_deps.h | 48 + src/core/dependencies/tags.c | 543 + src/core/dependencies/tags.h | 81 + src/core/dependencies/task_deps.c | 233 + src/core/detect_combined_workers.c | 337 + src/core/detect_combined_workers.h | 29 + src/core/devices.c | 104 + src/core/devices.h | 44 + src/core/disk.c | 546 + src/core/disk.h | 82 + src/core/disk_ops/disk_hdf5.c | 977 + src/core/disk_ops/disk_leveldb.cpp | 368 + src/core/disk_ops/disk_stdio.c | 461 + src/core/disk_ops/disk_unistd.c | 81 + src/core/disk_ops/disk_unistd_o_direct.c | 151 + src/core/disk_ops/unistd/disk_unistd_global.c | 1122 + src/core/disk_ops/unistd/disk_unistd_global.h | 78 + src/core/drivers.c | 71 + src/core/drivers.h | 39 + src/core/errorcheck.c | 93 + src/core/errorcheck.h | 84 + src/core/idle_hook.c | 116 + src/core/idle_hook.h | 30 + src/core/jobs.c | 1138 + src/core/jobs.h | 296 + src/core/parallel_task.c | 65 + src/core/perfmodel/energy_model.c | 299 + src/core/perfmodel/multiple_regression.c | 375 + src/core/perfmodel/multiple_regression.h | 34 + src/core/perfmodel/perfmodel.c | 972 + src/core/perfmodel/perfmodel.h | 136 + src/core/perfmodel/perfmodel_bus.c | 3364 + src/core/perfmodel/perfmodel_history.c | 2249 + src/core/perfmodel/perfmodel_nan.c | 101 + src/core/perfmodel/perfmodel_print.c | 324 + src/core/perfmodel/regression.c | 305 + src/core/perfmodel/regression.h | 34 + src/core/perfmodel/starpu-perfmodel.dtd | 64 + src/core/progress_hook.c | 116 + src/core/progress_hook.h | 30 + src/core/sched_ctx.c | 2934 + src/core/sched_ctx.h | 325 + src/core/sched_ctx_list.c | 433 + src/core/sched_ctx_list.h | 86 + src/core/sched_policy.c | 1280 + src/core/sched_policy.h | 134 + src/core/simgrid.c | 1597 + src/core/simgrid.h | 186 + src/core/simgrid_cpp.cpp | 169 + src/core/task.c | 2187 + src/core/task.h | 208 + src/core/task_bundle.c | 233 + src/core/task_bundle.h | 140 + src/core/topology.c | 2363 + src/core/topology.h | 174 + src/core/tree.c | 164 + src/core/workers.c | 3168 + src/core/workers.h | 1339 + src/datawizard/coherency.c | 1586 + src/datawizard/coherency.h | 421 + src/datawizard/copy_driver.c | 776 + src/datawizard/copy_driver.h | 106 + src/datawizard/data_request.c | 1035 + src/datawizard/data_request.h | 195 + src/datawizard/datastats.c | 117 + src/datawizard/datastats.h | 70 + src/datawizard/datawizard.c | 174 + src/datawizard/datawizard.h | 57 + src/datawizard/filters.c | 1139 + src/datawizard/filters.h | 36 + src/datawizard/footprint.c | 122 + src/datawizard/footprint.h | 41 + src/datawizard/interfaces/bcsr_filters.c | 105 + src/datawizard/interfaces/bcsr_interface.c | 499 + src/datawizard/interfaces/block_filters.c | 299 + src/datawizard/interfaces/block_interface.c | 555 + src/datawizard/interfaces/coo_interface.c | 258 + src/datawizard/interfaces/csr_filters.c | 58 + src/datawizard/interfaces/csr_interface.c | 436 + src/datawizard/interfaces/data_interface.c | 1302 + src/datawizard/interfaces/data_interface.h | 74 + src/datawizard/interfaces/matrix_filters.c | 230 + src/datawizard/interfaces/matrix_interface.c | 605 + .../interfaces/multiformat_interface.c | 652 + src/datawizard/interfaces/ndim_filters.c | 563 + src/datawizard/interfaces/ndim_interface.c | 798 + src/datawizard/interfaces/tensor_filters.c | 363 + src/datawizard/interfaces/tensor_interface.c | 633 + .../interfaces/variable_interface.c | 321 + src/datawizard/interfaces/vector_filters.c | 215 + src/datawizard/interfaces/vector_interface.c | 454 + src/datawizard/interfaces/void_interface.c | 184 + src/datawizard/malloc.c | 1131 + src/datawizard/malloc.h | 101 + src/datawizard/memalloc.c | 1985 + src/datawizard/memalloc.h | 103 + src/datawizard/memory_manager.c | 237 + src/datawizard/memory_manager.h | 56 + src/datawizard/memory_nodes.c | 233 + src/datawizard/memory_nodes.h | 191 + src/datawizard/memstats.c | 106 + src/datawizard/memstats.h | 58 + src/datawizard/node_ops.c | 33 + src/datawizard/node_ops.h | 174 + src/datawizard/reduction.c | 479 + src/datawizard/sort_data_handles.c | 140 + src/datawizard/sort_data_handles.h | 46 + src/datawizard/user_interactions.c | 861 + src/datawizard/write_back.c | 93 + src/datawizard/write_back.h | 35 + src/debug/latency.c | 51 + src/debug/starpu_debug_helpers.h | 45 + src/debug/structures_size.c | 38 + src/debug/traces/anim.c | 535 + src/debug/traces/starpu_fxt.c | 5421 ++ src/debug/traces/starpu_fxt.h | 112 + src/debug/traces/starpu_fxt_dag.c | 165 + src/debug/traces/starpu_fxt_mpi.c | 494 + src/debug/traces/starpu_paje.c | 609 + src/dolib.c | 64 + src/drivers/cpu/driver_cpu.c | 779 + src/drivers/cpu/driver_cpu.h | 38 + src/drivers/cuda/driver_cuda.c | 2615 + src/drivers/cuda/driver_cuda.h | 87 + src/drivers/cuda/driver_cuda0.c | 861 + src/drivers/cuda/driver_cuda1.c | 1418 + src/drivers/cuda/driver_cuda_init.c | 51 + src/drivers/cuda/starpu_cublas.c | 121 + src/drivers/cuda/starpu_cublasLt.c | 76 + src/drivers/cuda/starpu_cublas_v2.c | 67 + src/drivers/cuda/starpu_cusolver.c | 120 + src/drivers/cuda/starpu_cusparse.c | 80 + src/drivers/disk/driver_disk.c | 296 + src/drivers/disk/driver_disk.h | 29 + src/drivers/driver_common/driver_common.c | 948 + src/drivers/driver_common/driver_common.h | 54 + src/drivers/hip/driver_hip.c | 1711 + src/drivers/hip/driver_hip.h | 74 + src/drivers/hip/driver_hip_init.c | 51 + src/drivers/hip/starpu_hipblas.c | 145 + src/drivers/max/driver_max_fpga.c | 734 + src/drivers/max/driver_max_fpga.h | 54 + src/drivers/max/driver_max_fpga_init.c | 49 + src/drivers/mp_common/mp_common.c | 547 + src/drivers/mp_common/mp_common.h | 310 + src/drivers/mp_common/sink_common.c | 924 + src/drivers/mp_common/sink_common.h | 64 + src/drivers/mp_common/source_common.c | 1293 + src/drivers/mp_common/source_common.h | 94 + src/drivers/mpi/driver_mpi_common.c | 595 + src/drivers/mpi/driver_mpi_common.h | 71 + src/drivers/mpi/driver_mpi_init.c | 47 + src/drivers/mpi/driver_mpi_sink.c | 38 + src/drivers/mpi/driver_mpi_sink.h | 35 + src/drivers/mpi/driver_mpi_source.c | 399 + src/drivers/mpi/driver_mpi_source.h | 53 + src/drivers/opencl/driver_opencl.c | 1729 + src/drivers/opencl/driver_opencl.h | 76 + src/drivers/opencl/driver_opencl_init.c | 54 + src/drivers/opencl/driver_opencl_utils.c | 804 + src/drivers/opencl/driver_opencl_utils.h | 30 + src/drivers/tcpip/driver_tcpip_common.c | 1500 + src/drivers/tcpip/driver_tcpip_common.h | 87 + src/drivers/tcpip/driver_tcpip_common_func.h | 347 + src/drivers/tcpip/driver_tcpip_init.c | 47 + src/drivers/tcpip/driver_tcpip_sink.c | 44 + src/drivers/tcpip/driver_tcpip_sink.h | 35 + src/drivers/tcpip/driver_tcpip_source.c | 395 + src/drivers/tcpip/driver_tcpip_source.h | 51 + .../starpu_parallel_worker_create.c | 836 + .../starpu_parallel_worker_create.h | 127 + src/profiling/bound.c | 1209 + src/profiling/bound.h | 48 + src/profiling/callbacks.c | 260 + src/profiling/callbacks.h | 78 + src/profiling/profiling.c | 701 + src/profiling/profiling.h | 89 + src/profiling/profiling_helpers.c | 219 + .../component_best_implementation.c | 126 + src/sched_policies/component_composed.c | 237 + src/sched_policies/component_eager.c | 168 + .../component_eager_calibration.c | 81 + src/sched_policies/component_eager_prio.c | 165 + src/sched_policies/component_fifo.c | 303 + src/sched_policies/component_heft.c | 244 + src/sched_policies/component_heteroprio.c | 575 + src/sched_policies/component_mct.c | 132 + .../component_perfmodel_select.c | 105 + src/sched_policies/component_prio.c | 326 + src/sched_policies/component_random.c | 118 + src/sched_policies/component_sched.c | 788 + src/sched_policies/component_stage.c | 59 + src/sched_policies/component_userchoice.c | 56 + src/sched_policies/component_work_stealing.c | 405 + src/sched_policies/component_worker.c | 882 + .../deque_modeling_policy_data_aware.c | 1159 + src/sched_policies/eager_central_policy.c | 210 + .../eager_central_priority_policy.c | 258 + src/sched_policies/fifo_queues.c | 520 + src/sched_policies/fifo_queues.h | 53 + src/sched_policies/graph_test_policy.c | 365 + src/sched_policies/helper_mct.c | 222 + src/sched_policies/helper_mct.h | 77 + src/sched_policies/heteroprio.c | 3876 ++ src/sched_policies/heteroprio.h | 44 + src/sched_policies/hierarchical_heft.c | 90 + src/sched_policies/modular_eager.c | 42 + .../modular_eager_prefetching.c | 45 + src/sched_policies/modular_eager_prio.c | 47 + src/sched_policies/modular_ez.c | 489 + src/sched_policies/modular_gemm.c | 196 + src/sched_policies/modular_heft.c | 190 + src/sched_policies/modular_heft2.c | 78 + src/sched_policies/modular_heft_prio.c | 79 + src/sched_policies/modular_heteroprio.c | 51 + src/sched_policies/modular_heteroprio_heft.c | 62 + src/sched_policies/modular_parallel_heft.c | 82 + src/sched_policies/modular_parallel_random.c | 76 + src/sched_policies/modular_prio.c | 43 + src/sched_policies/modular_prio_prefetching.c | 87 + src/sched_policies/modular_random.c | 71 + .../modular_random_prefetching.c | 80 + src/sched_policies/modular_ws.c | 44 + src/sched_policies/parallel_eager.c | 369 + src/sched_policies/parallel_heft.c | 615 + src/sched_policies/prio_deque.c | 229 + src/sched_policies/prio_deque.h | 40 + src/sched_policies/random_policy.c | 114 + src/sched_policies/sched_component.h | 39 + src/sched_policies/scheduler_maker.c | 291 + src/sched_policies/work_stealing_policy.c | 923 + src/util/execute_on_all.c | 188 + src/util/file.c | 47 + src/util/fstarpu.c | 754 + src/util/misc.c | 88 + src/util/openmp_runtime_support.c | 2794 + src/util/openmp_runtime_support.h | 423 + src/util/openmp_runtime_support_environment.c | 795 + src/util/openmp_runtime_support_omp_api.c | 303 + src/util/starpu_create_sync_task.c | 55 + src/util/starpu_data_cpy.c | 213 + src/util/starpu_data_cpy.h | 33 + src/util/starpu_task_insert.c | 226 + src/util/starpu_task_insert_utils.c | 995 + src/util/starpu_task_insert_utils.h | 36 + src/worker_collection/worker_list.c | 309 + src/worker_collection/worker_tree.c | 372 + starpu_openmp_llvm/Makefile.am | 19 + starpu_openmp_llvm/Makefile.in | 889 + starpu_openmp_llvm/examples/Makefile.am | 42 + starpu_openmp_llvm/examples/Makefile.in | 1440 + starpu_openmp_llvm/examples/README | 46 + starpu_openmp_llvm/examples/hello-task.c | 60 + starpu_openmp_llvm/src/Makefile.am | 38 + starpu_openmp_llvm/src/Makefile.in | 1111 + .../src/openmp_runtime_support_llvm.c | 1062 + starpufft/Makefile.am | 31 + starpufft/Makefile.in | 977 + starpufft/include/starpufft.h | 71 + starpufft/packages/libstarpufft.pc.in | 26 + starpufft/packages/starpufft-1.0.pc.in | 26 + starpufft/packages/starpufft-1.1.pc.in | 26 + starpufft/packages/starpufft-1.2.pc.in | 26 + starpufft/packages/starpufft-1.3.pc.in | 26 + starpufft/packages/starpufft-1.4.pc.in | 26 + starpufft/src/Makefile.am | 54 + starpufft/src/Makefile.in | 941 + starpufft/src/cuda_kernels.cu | 18 + starpufft/src/cudaf_kernels.cu | 18 + starpufft/src/cudax_kernels.cu | 159 + starpufft/src/cudax_kernels.h | 22 + starpufft/src/starpufft-double.h | 54 + starpufft/src/starpufft-float.h | 54 + starpufft/src/starpufft.c | 18 + starpufft/src/starpufft_common.c | 20 + starpufft/src/starpufftf.c | 18 + starpufft/src/starpufftx.c | 542 + starpufft/src/starpufftx1d.c | 877 + starpufft/src/starpufftx2d.c | 880 + starpufft/src/starpufftx3d.c | 188 + starpufft/tests/Makefile.am | 61 + starpufft/tests/Makefile.in | 1563 + starpufft/tests/loader.c | 505 + starpufft/tests/test.c | 18 + starpufft/tests/test_threads.c | 18 + starpufft/tests/testf.c | 18 + starpufft/tests/testf_threads.c | 18 + starpufft/tests/testx.c | 316 + starpufft/tests/testx_threads.c | 112 + starpupy/Makefile.am | 22 + starpupy/Makefile.in | 891 + starpupy/benchmark/Makefile.am | 41 + starpupy/benchmark/Makefile.in | 1409 + starpupy/benchmark/handle_perf_plot.py | 83 + starpupy/benchmark/handle_perf_plot_pickle.py | 89 + starpupy/benchmark/tasks_size_overhead.gp | 44 + starpupy/benchmark/tasks_size_overhead.py | 182 + starpupy/benchmark/tasks_size_overhead.sh | 24 + starpupy/benchmark/test_handle_bench.py | 29 + starpupy/benchmark/test_handle_perf.py | 205 + starpupy/benchmark/test_handle_perf.sh | 25 + starpupy/benchmark/test_handle_perf_pickle.py | 214 + starpupy/benchmark/test_handle_perf_pickle.sh | 19 + starpupy/examples/Makefile.am | 100 + starpupy/examples/Makefile.in | 1739 + starpupy/examples/loader.c | 505 + starpupy/examples/starpu_py.concurrent.sh | 19 + starpupy/examples/starpu_py.py | 161 + starpupy/examples/starpu_py.sh | 19 + .../examples/starpu_py_handle.concurrent.sh | 19 + starpupy/examples/starpu_py_handle.py | 577 + starpupy/examples/starpu_py_handle.sh | 19 + starpupy/examples/starpu_py_np.concurrent.sh | 19 + starpupy/examples/starpu_py_np.py | 97 + starpupy/examples/starpu_py_np.sh | 19 + .../examples/starpu_py_numpy.concurrent.sh | 19 + starpupy/examples/starpu_py_numpy.py | 48 + starpupy/examples/starpu_py_numpy.sh | 19 + starpupy/examples/starpu_py_parallel.py | 403 + starpupy/examples/starpu_py_parallel.sh | 19 + .../starpu_py_partition.concurrent.sh | 19 + starpupy/examples/starpu_py_partition.py | 91 + starpupy/examples/starpu_py_partition.sh | 19 + .../starpu_py_perfmodel.concurrent.sh | 19 + starpupy/examples/starpu_py_perfmodel.py | 48 + starpupy/examples/starpu_py_perfmodel.sh | 19 + starpupy/execute.sh.in | 133 + starpupy/src/Makefile.am | 95 + starpupy/src/Makefile.in | 1014 + starpupy/src/__init__.py | 116 + starpupy/src/delay.py | 39 + starpupy/src/handle_access.py | 49 + starpupy/src/intermedia.py | 113 + starpupy/src/joblib.py | 379 + starpupy/src/setup.cfg.in | 24 + starpupy/src/setup.py.in | 56 + starpupy/src/starpu_task_wrapper.c | 2075 + starpupy/src/starpupy_buffer_interface.c | 883 + starpupy/src/starpupy_buffer_interface.h | 92 + starpupy/src/starpupy_cloudpickle.h | 42 + starpupy/src/starpupy_handle.c | 809 + starpupy/src/starpupy_handle.h | 38 + starpupy/src/starpupy_interface.c | 334 + starpupy/src/starpupy_interface.h | 42 + starpupy/src/starpupy_numpy_filters.c | 297 + starpupy/src/starpupy_numpy_filters.h | 23 + starpupy/src/starpupy_private.h | 36 + starpurm/Makefile.am | 34 + starpurm/Makefile.in | 1004 + starpurm/examples/Makefile.am | 42 + starpurm/examples/Makefile.in | 1597 + starpurm/examples/async_spawn.c | 317 + starpurm/examples/block_test/block_test.c | 269 + starpurm/examples/chameleon/dgemm.c | 309 + .../examples/cuda_vector_scale/vector_scale.c | 289 + .../cuda_vector_scale/vs_cuda_kernel.cu | 64 + starpurm/examples/spawn.c | 270 + starpurm/examples/vector_scale.c | 248 + starpurm/include/starpurm.h | 487 + starpurm/include/starpurm_config.h.in | 29 + starpurm/packages/starpurm-1.3.pc.in | 28 + starpurm/packages/starpurm-1.4.pc.in | 28 + starpurm/src/Makefile.am | 41 + starpurm/src/Makefile.in | 1118 + starpurm/src/starpurm.c | 1700 + starpurm/src/starpurm_dlb.c | 397 + starpurm/src/starpurm_private.h | 136 + starpurm/tests/01_init_exit.c | 29 + starpurm/tests/02_list_units.c | 54 + starpurm/tests/03_cpusets.c | 65 + starpurm/tests/04_drs_enable.c | 40 + starpurm/tests/Makefile.am | 39 + starpurm/tests/Makefile.in | 1540 + tests/Makefile.am | 1261 + tests/Makefile.in | 10313 ++++ tests/coverage/coverage.sh | 141 + tests/datawizard/acquire_cb.c | 107 + tests/datawizard/acquire_cb_insert.c | 169 + tests/datawizard/acquire_release.c | 104 + tests/datawizard/acquire_release2.c | 103 + tests/datawizard/acquire_release_to.c | 168 + tests/datawizard/acquire_try.c | 98 + tests/datawizard/allocate.c | 253 + tests/datawizard/allocate_many_numa_nodes.c | 129 + tests/datawizard/bcsr.c | 177 + tests/datawizard/cache.c | 98 + tests/datawizard/commute.c | 193 + tests/datawizard/commute2.c | 128 + tests/datawizard/copy.c | 115 + .../critical_section_with_void_interface.c | 96 + tests/datawizard/data_deinitialize.c | 238 + tests/datawizard/data_implicit_deps.c | 274 + tests/datawizard/data_invalidation.c | 238 + tests/datawizard/data_register.c | 108 + .../deinitialize_pending_requests.c | 60 + tests/datawizard/deps.c | 121 + tests/datawizard/dining_philosophers.c | 121 + tests/datawizard/double_parameter.c | 186 + tests/datawizard/dsm_stress.c | 270 + tests/datawizard/gpu_ptr_register.c | 298 + tests/datawizard/gpu_register.c | 322 + tests/datawizard/handle_to_pointer.c | 187 + tests/datawizard/in_place_partition.c | 120 + tests/datawizard/increment_init.c | 99 + tests/datawizard/increment_redux.c | 106 + tests/datawizard/increment_redux_lazy.c | 116 + tests/datawizard/increment_redux_partition.c | 123 + tests/datawizard/increment_redux_v2.c | 115 + tests/datawizard/increment_redux_with_args.c | 88 + tests/datawizard/interfaces/bcsr/bcsr_cuda.cu | 75 + .../interfaces/bcsr/bcsr_interface.c | 202 + .../datawizard/interfaces/bcsr/bcsr_opencl.c | 134 + .../interfaces/bcsr/bcsr_opencl_kernel.cl | 29 + .../datawizard/interfaces/block/block_cuda.cu | 81 + .../interfaces/block/block_interface.c | 158 + .../interfaces/block/block_opencl.c | 120 + .../interfaces/block/block_opencl_kernel.cl | 38 + tests/datawizard/interfaces/coo/coo_cuda.cu | 72 + .../datawizard/interfaces/coo/coo_interface.c | 174 + tests/datawizard/interfaces/coo/coo_opencl.c | 137 + .../interfaces/coo/coo_opencl_kernel.cl | 29 + tests/datawizard/interfaces/copy_interfaces.c | 144 + tests/datawizard/interfaces/csr/csr_cuda.cu | 71 + .../datawizard/interfaces/csr/csr_interface.c | 166 + tests/datawizard/interfaces/csr/csr_opencl.c | 132 + .../interfaces/csr/csr_opencl_kernel.cl | 29 + .../interfaces/matrix/matrix_cuda.cu | 74 + .../interfaces/matrix/matrix_interface.c | 140 + .../interfaces/matrix/matrix_opencl.c | 131 + .../interfaces/matrix/matrix_opencl_kernel.cl | 31 + .../interfaces/multiformat/advanced/generic.c | 208 + .../interfaces/multiformat/advanced/generic.h | 56 + .../advanced/multiformat_cuda_opencl.c | 172 + .../advanced/multiformat_data_release.c | 160 + .../advanced/multiformat_handle_conversion.c | 260 + .../multiformat/advanced/multiformat_worker.c | 156 + .../multiformat/advanced/same_handle.c | 142 + .../multiformat_conversion_codelets.c | 88 + .../multiformat_conversion_codelets_cuda.cu | 50 + .../multiformat_conversion_codelets_kernel.cl | 27 + .../multiformat_conversion_codelets_opencl.c | 111 + .../multiformat/multiformat_cuda.cu | 80 + .../multiformat/multiformat_interface.c | 168 + .../multiformat/multiformat_opencl.c | 137 + .../multiformat/multiformat_opencl_kernel.cl | 36 + .../multiformat/multiformat_types.h | 33 + tests/datawizard/interfaces/ndim/ndim_cuda.cu | 88 + .../interfaces/ndim/ndim_interface.c | 151 + .../datawizard/interfaces/ndim/ndim_opencl.c | 126 + .../interfaces/ndim/ndim_opencl_kernel.cl | 41 + .../interfaces/tensor/tensor_cuda.cu | 86 + .../interfaces/tensor/tensor_interface.c | 169 + .../interfaces/tensor/tensor_opencl.c | 124 + .../interfaces/tensor/tensor_opencl_kernel.cl | 41 + tests/datawizard/interfaces/test_interfaces.c | 557 + tests/datawizard/interfaces/test_interfaces.h | 99 + .../datawizard/interfaces/test_interfaces.sh | 28 + .../interfaces/variable/variable_cuda.cu | 69 + .../interfaces/variable/variable_interface.c | 110 + .../interfaces/variable/variable_opencl.c | 115 + .../variable/variable_opencl_kernel.cl | 29 + .../interfaces/vector/vector_cuda.cu | 68 + .../interfaces/vector/vector_interface.c | 124 + .../interfaces/vector/vector_opencl.c | 130 + .../interfaces/vector/vector_opencl_kernel.cl | 29 + .../interfaces/void/void_interface.c | 79 + .../datawizard/invalidate_pending_requests.c | 60 + tests/datawizard/lazy_allocation.c | 238 + tests/datawizard/locality.c | 174 + tests/datawizard/locality.sh | 41 + tests/datawizard/manual_reduction.c | 321 + tests/datawizard/mpi_like.c | 205 + tests/datawizard/mpi_like_async.c | 363 + tests/datawizard/no_unregister.c | 96 + tests/datawizard/noreclaim.c | 139 + tests/datawizard/nowhere.c | 145 + tests/datawizard/numa_overflow.c | 142 + tests/datawizard/partition_dep.c | 109 + tests/datawizard/partition_init.c | 110 + tests/datawizard/partition_lazy.c | 105 + tests/datawizard/partition_wontuse.c | 46 + tests/datawizard/partitioned_acquire.c | 121 + tests/datawizard/partitioned_initialization.c | 113 + tests/datawizard/readers_and_writers.c | 105 + tests/datawizard/readonly.c | 77 + tests/datawizard/reclaim.c | 197 + tests/datawizard/redux_acquire.c | 99 + tests/datawizard/scal.c | 102 + tests/datawizard/scal.h | 31 + tests/datawizard/scal_cuda.cu | 37 + tests/datawizard/scal_opencl.cl | 25 + tests/datawizard/scratch.c | 158 + tests/datawizard/scratch_cuda.cu | 51 + tests/datawizard/scratch_opencl.c | 84 + tests/datawizard/scratch_opencl_kernel.cl | 25 + tests/datawizard/scratch_reuse.c | 82 + tests/datawizard/simgrid-locality.c | 27 + tests/datawizard/specific_node.c | 266 + tests/datawizard/specific_node_same.c | 113 + tests/datawizard/sync_and_notify_data.c | 208 + .../sync_and_notify_data_implicit.c | 201 + .../sync_and_notify_data_kernels.cu | 53 + .../datawizard/sync_and_notify_data_opencl.c | 80 + .../sync_and_notify_data_opencl_codelet.cl | 30 + tests/datawizard/sync_with_data_with_mem.c | 142 + .../sync_with_data_with_mem_non_blocking.c | 175 + ...with_data_with_mem_non_blocking_implicit.c | 176 + .../task_with_multiple_time_the_same_handle.c | 149 + tests/datawizard/temporary_partition.c | 116 + .../datawizard/temporary_partition_implicit.c | 116 + tests/datawizard/temporary_partition_read.c | 104 + tests/datawizard/test_arbiter.cpp | 216 + tests/datawizard/unpartition.c | 127 + tests/datawizard/user_interaction_implicit.c | 101 + tests/datawizard/variable_parameters.c | 244 + tests/datawizard/variable_size.c | 372 + tests/datawizard/write_only_tmp_buffer.c | 157 + tests/datawizard/wt_broadcast.c | 103 + tests/datawizard/wt_host.c | 92 + tests/disk/disk_compute.c | 457 + tests/disk/disk_copy.c | 200 + tests/disk/disk_copy_to_disk.c | 423 + tests/disk/disk_copy_unpack.c | 179 + tests/disk/disk_pack.c | 288 + tests/disk/mem_reclaim.c | 295 + tests/energy/dynamic.sh | 70 + tests/energy/energy_efficiency.c | 550 + tests/energy/perfs.gp | 76 + tests/energy/static.sh | 72 + tests/errorcheck/invalid_blocking_calls.c | 131 + tests/errorcheck/invalid_tasks.c | 89 + tests/errorcheck/starpu_init_noworker.c | 56 + tests/errorcheck/workers_cpuid.c | 208 + tests/fault-tolerance/retry.c | 133 + tests/fortran90/init_01.f90 | 30 + tests/fortran90/starpu_mod.f90 | 145 + tests/helper.h | 136 + tests/helper/cublasLt_init.c | 72 + tests/helper/cublas_init.c | 71 + tests/helper/cusparse_init.c | 71 + tests/helper/execute_on_all.c | 55 + tests/helper/hipblas_init.c | 72 + tests/helper/pinned_memory.c | 49 + tests/helper/starpu_create_sync_task.c | 86 + tests/helper/starpu_data_cpy.c | 62 + tests/helper/starpu_data_dup_ro.c | 136 + tests/loader.c | 505 + tests/main/bind.c | 84 + tests/main/callback.c | 91 + tests/main/codelet_null_callback.c | 106 + tests/main/const_codelet.c | 132 + tests/main/deadlock.c | 71 + tests/main/declare_deps_after_submission.c | 95 + ...eclare_deps_after_submission_synchronous.c | 93 + tests/main/declare_deps_in_callback.c | 93 + tests/main/deploop.c | 94 + tests/main/deprecated_func.c | 162 + tests/main/display_binding.c | 42 + tests/main/driver_api/init_run_deinit.c | 274 + tests/main/driver_api/run_driver.c | 273 + tests/main/empty_task.c | 106 + tests/main/empty_task_chain.c | 75 + tests/main/empty_task_sync_point.c | 90 + tests/main/empty_task_sync_point_tasks.c | 68 + tests/main/execute_on_a_specific_worker.c | 180 + tests/main/execute_schedule.c | 160 + tests/main/get_children_tasks.c | 99 + tests/main/get_current_task.c | 122 + tests/main/hwloc_cpuset.c | 88 + tests/main/insert_task.c | 171 + tests/main/insert_task_array.c | 99 + tests/main/insert_task_dyn_handles.c | 358 + tests/main/insert_task_many.c | 272 + tests/main/insert_task_nullcodelet.c | 49 + tests/main/insert_task_pack.c | 64 + tests/main/insert_task_value.c | 339 + tests/main/insert_task_where.c | 89 + tests/main/job.c | 98 + tests/main/mkdtemp.c | 63 + tests/main/multithreaded.c | 124 + tests/main/multithreaded_init.c | 93 + tests/main/pack.c | 258 + tests/main/pause_resume.c | 103 + tests/main/regenerate.c | 132 + tests/main/regenerate_pipeline.c | 163 + tests/main/restart.c | 71 + tests/main/starpu_init.c | 146 + tests/main/starpu_task_bundle.c | 148 + tests/main/starpu_task_wait.c | 115 + tests/main/starpu_task_wait_for_all.c | 116 + tests/main/starpu_worker_exists.c | 89 + tests/main/static_restartable.c | 102 + tests/main/static_restartable_tag.c | 111 + .../static_restartable_using_initializer.c | 104 + tests/main/subgraph_repeat.c | 162 + tests/main/subgraph_repeat_regenerate.c | 184 + tests/main/subgraph_repeat_regenerate_tag.c | 231 + .../subgraph_repeat_regenerate_tag_cycle.c | 229 + tests/main/subgraph_repeat_tag.c | 194 + tests/main/submit.c | 118 + tests/main/tag_get_task.c | 69 + tests/main/tag_task_data_deps.c | 268 + tests/main/tag_wait_api.c | 137 + tests/main/task_end_dep.c | 131 + tests/main/task_wait_api.c | 131 + tests/main/wait_all_regenerable_tasks.c | 129 + tests/maxfpga/LMemLoopbackCpuCode.c | 62 + tests/maxfpga/MyTasksManager.maxj | 152 + tests/maxfpga/MyTasksMuxManager.maxj | 96 + tests/maxfpga/README.txt | 94 + tests/maxfpga/StreamFMACpuCode.cpp | 60 + tests/maxfpga/Task1.maxj | 24 + tests/maxfpga/Task2.maxj | 24 + tests/maxfpga/Task3.maxj | 24 + tests/maxfpga/max_fpga_advanced_static.c | 284 + tests/maxfpga/max_fpga_basic_static.c | 227 + tests/maxfpga/max_fpga_dynamic.c | 253 + tests/maxfpga/max_fpga_mux.c | 312 + tests/memory/memstress.gp | 26 + tests/memory/memstress.sh | 66 + tests/memory/memstress2.gp | 26 + tests/memory/memstress2.sh | 65 + .../microbenchs/async_tasks_data_overhead.sh | 22 + tests/microbenchs/async_tasks_overhead.c | 222 + tests/microbenchs/bandwidth.c | 360 + tests/microbenchs/bandwidth_scheds.sh | 108 + tests/microbenchs/display_structures_size.c | 32 + tests/microbenchs/local_pingpong.c | 118 + tests/microbenchs/matrix_as_vector.c | 289 + tests/microbenchs/microbench.sh | 118 + ...arallel_dependent_homogeneous_tasks_data.c | 161 + ...rallel_dependent_homogeneous_tasks_data.sh | 24 + ...parallel_independent_heterogeneous_tasks.c | 147 + ...arallel_independent_heterogeneous_tasks.sh | 24 + ...lel_independent_heterogeneous_tasks_data.c | 186 + ...el_independent_heterogeneous_tasks_data.sh | 24 + .../parallel_independent_homogeneous_tasks.c | 116 + .../parallel_independent_homogeneous_tasks.sh | 24 + ...allel_independent_homogeneous_tasks_data.c | 153 + ...llel_independent_homogeneous_tasks_data.sh | 24 + .../parallel_redux_heterogeneous_tasks_data.c | 219 + ...parallel_redux_heterogeneous_tasks_data.sh | 24 + .../parallel_redux_homogeneous_tasks_data.c | 186 + .../parallel_redux_homogeneous_tasks_data.sh | 24 + tests/microbenchs/prefetch_data_on_node.c | 192 + tests/microbenchs/redundant_buffer.c | 82 + tests/microbenchs/starpu_check.sh | 113 + tests/microbenchs/sync_tasks_data_overhead.sh | 22 + tests/microbenchs/sync_tasks_overhead.c | 197 + tests/microbenchs/tasks_data_overhead.sh | 22 + tests/microbenchs/tasks_overhead.c | 258 + tests/microbenchs/tasks_size_overhead.c | 345 + tests/microbenchs/tasks_size_overhead.gp | 44 + tests/microbenchs/tasks_size_overhead.sh | 23 + .../microbenchs/tasks_size_overhead_sched.sh | 41 + .../microbenchs/tasks_size_overhead_scheds.sh | 33 + tests/model-checking/Makefile.am | 77 + tests/model-checking/Makefile.in | 1439 + tests/model-checking/barrier.sh | 18 + tests/model-checking/platform.xml | 20 + tests/model-checking/prio_list.c | 182 + tests/model-checking/prio_list.sh | 18 + tests/model-checking/prio_list2.c | 21 + tests/model-checking/prio_list3.c | 21 + tests/model-checking/starpu-mc.sh.in | 36 + tests/model-checking/starpu_barrier.c | 149 + tests/openmp/api_01.c | 142 + tests/openmp/array_slice_01.c | 255 + tests/openmp/cuda_task_01.c | 205 + tests/openmp/environment.c | 54 + tests/openmp/init_exit_01.c | 39 + tests/openmp/init_exit_02.c | 49 + tests/openmp/parallel_01.c | 71 + tests/openmp/parallel_02.c | 92 + tests/openmp/parallel_03.c | 72 + tests/openmp/parallel_barrier_01.c | 80 + tests/openmp/parallel_critical_01.c | 92 + tests/openmp/parallel_critical_inline_01.c | 95 + tests/openmp/parallel_critical_named_01.c | 102 + .../parallel_critical_named_inline_01.c | 95 + tests/openmp/parallel_for_01.c | 198 + tests/openmp/parallel_for_02.c | 99 + tests/openmp/parallel_for_ordered_01.c | 216 + tests/openmp/parallel_master_01.c | 92 + tests/openmp/parallel_master_inline_01.c | 86 + tests/openmp/parallel_nested_lock_01.c | 128 + tests/openmp/parallel_sections_01.c | 115 + tests/openmp/parallel_sections_combined_01.c | 109 + tests/openmp/parallel_simple_lock_01.c | 118 + tests/openmp/parallel_single_copyprivate_01.c | 100 + .../parallel_single_copyprivate_inline_01.c | 99 + tests/openmp/parallel_single_inline_01.c | 103 + tests/openmp/parallel_single_nowait_01.c | 92 + tests/openmp/parallel_single_wait_01.c | 92 + tests/openmp/task_01.c | 100 + tests/openmp/task_02.c | 219 + tests/openmp/task_03.c | 77 + tests/openmp/taskgroup_01.c | 136 + tests/openmp/taskgroup_02.c | 140 + tests/openmp/taskloop.c | 87 + tests/openmp/taskwait_01.c | 118 + tests/overlap/gpu_concurrency.c | 125 + tests/overlap/long_kernel.cu | 34 + tests/overlap/overlap.c | 162 + tests/overlap/overlap.sh | 76 + .../combined_worker_assign_workerid.c | 157 + tests/parallel_tasks/cuda_only.c | 114 + .../parallel_tasks/explicit_combined_worker.c | 122 + tests/parallel_tasks/parallel_kernels.c | 129 + tests/parallel_tasks/parallel_kernels_spmd.c | 130 + .../parallel_tasks/parallel_kernels_trivial.c | 128 + tests/parallel_tasks/spmd_peager.c | 118 + tests/parallel_tasks/swap.c | 91 + tests/perfmodels/feed.c | 96 + tests/perfmodels/memory.c | 70 + .../perfmodels/non_linear_regression_based.c | 145 + tests/perfmodels/opencl_memset.c | 81 + tests/perfmodels/opencl_memset_kernel.cl | 29 + tests/perfmodels/path.c | 181 + tests/perfmodels/regression_based_check.c | 271 + tests/perfmodels/regression_based_energy.c | 301 + tests/perfmodels/regression_based_gpu.c | 399 + tests/perfmodels/regression_based_memset.c | 380 + tests/perfmodels/regression_based_multiimpl.c | 302 + tests/perfmodels/user_base.c | 136 + tests/perfmodels/valid_model.c | 181 + tests/perfmodels/value_nan.c | 106 + tests/regression/profiles.build.only.in | 31 + tests/regression/profiles.in | 62 + tests/regression/regression.sh.in | 140 + tests/sched_ctx/sched_ctx_hierarchy.c | 175 + tests/sched_ctx/sched_ctx_list.c | 195 + tests/sched_ctx/sched_ctx_policy_data.c | 76 + tests/sched_policies/data_locality.c | 220 + tests/sched_policies/execute_all_tasks.c | 104 + tests/sched_policies/prio.c | 145 + tests/sched_policies/simple_cpu_gpu_sched.c | 290 + tests/sched_policies/simple_deps.c | 119 + tests/sched_policies/workerids.c | 136 + tests/variable/increment.c | 156 + tests/variable/increment.h | 26 + tests/variable/increment_cuda.cu | 67 + tests/variable/increment_hip.hip | 70 + tests/variable/increment_opencl.c | 140 + tests/variable/increment_opencl_kernel.cl | 20 + tests/variable/neutral_opencl_kernel.cl | 20 + tests/variable/redux_opencl_kernel.cl | 20 + tools/Makefile.am | 563 + tools/Makefile.in | 2642 + tools/ayudame.cfg | 37 + tools/dev/checker/rename.sed | 182 + tools/dev/checker/rename.sh | 16 + tools/dev/cppcheck/suppressions.txt | 146 + tools/dev/lsan/suppressions | 43 + tools/dev/tsan/starpu.suppr | 114 + tools/dev/valgrind/bash.suppr | 26 + tools/dev/valgrind/blas.suppr | 23 + tools/dev/valgrind/fxt.suppr | 129 + tools/dev/valgrind/glpk.suppr | 23 + tools/dev/valgrind/hdf5.suppr | 61 + tools/dev/valgrind/helgrind.sh | 40 + tools/dev/valgrind/hwloc.suppr | 145 + tools/dev/valgrind/libc.suppr | 385 + tools/dev/valgrind/libgomp.suppr | 80 + tools/dev/valgrind/libnuma.suppr | 40 + tools/dev/valgrind/madmpi.suppr | 62 + tools/dev/valgrind/nvidia.suppr | 84 + tools/dev/valgrind/opencl.suppr | 760 + tools/dev/valgrind/openmp.suppr | 215 + tools/dev/valgrind/openmpi.suppr | 673 + tools/dev/valgrind/p11-kit.suppr | 22 + tools/dev/valgrind/padico.suppr | 579 + tools/dev/valgrind/papi.suppr | 51 + tools/dev/valgrind/pthread.suppr | 44 + tools/dev/valgrind/starpu.suppr | 220 + tools/dev/valgrind/starpu_pw.suppr | 62 + tools/dev/valgrind/starpupy.suppr | 75 + tools/dev/valgrind/valgrind.sh | 40 + tools/dev/valgrind/valgrind.suppr | 35 + tools/dev/valgrind/valgrind_xml.sh | 40 + tools/distrib/distrib.r | 63 + tools/distrib/distrib.sh | 28 + tools/gdbinit | 1238 + tools/loader.c | 505 + tools/msvc/starpu.sln | 20 + tools/msvc/starpu/starpu.vcxproj | 83 + tools/msvc/starpu_clean.bat | 24 + tools/msvc/starpu_exec.bat | 46 + tools/msvc/starpu_open.bat | 50 + tools/msvc/starpu_var.bat | 39 + tools/msvc/starpu_var.bat.in | 39 + tools/patch-ayudame | 48 + tools/perfmodels/README | 54 + tools/perfmodels/cluster.xml | 11 + tools/perfmodels/hostfile | 4 + tools/perfmodels/sampling/bus/attila.affinity | 7 + .../perfmodels/sampling/bus/attila.bandwidth | 33 + tools/perfmodels/sampling/bus/attila.config | 5 + tools/perfmodels/sampling/bus/attila.latency | 33 + .../sampling/bus/attila.platform.v4.xml | 277 + .../sampling/bus/attila.platform.xml | 275 + .../sampling/bus/hannibal-pitch.affinity | 7 + .../sampling/bus/hannibal-pitch.bandwidth | 17 + .../sampling/bus/hannibal-pitch.config | 4 + .../sampling/bus/hannibal-pitch.latency | 17 + .../bus/hannibal-pitch.platform.v4.xml | 114 + .../sampling/bus/hannibal-pitch.platform.xml | 114 + .../perfmodels/sampling/bus/hannibal.affinity | 7 + .../sampling/bus/hannibal.bandwidth | 17 + tools/perfmodels/sampling/bus/hannibal.config | 4 + .../perfmodels/sampling/bus/hannibal.latency | 17 + .../sampling/bus/hannibal.platform.v4.xml | 114 + .../sampling/bus/hannibal.platform.xml | 114 + tools/perfmodels/sampling/bus/idgraf.affinity | 9 + .../perfmodels/sampling/bus/idgraf.bandwidth | 17 + tools/perfmodels/sampling/bus/idgraf.config | 5 + tools/perfmodels/sampling/bus/idgraf.latency | 17 + .../sampling/bus/idgraf.platform.v4.xml | 1534 + .../sampling/bus/idgraf.platform.xml | 1532 + tools/perfmodels/sampling/bus/mirage.affinity | 7 + .../perfmodels/sampling/bus/mirage.bandwidth | 33 + tools/perfmodels/sampling/bus/mirage.config | 5 + tools/perfmodels/sampling/bus/mirage.latency | 33 + .../sampling/bus/mirage.platform.v4.xml | 277 + .../sampling/bus/mirage.platform.xml | 275 + .../perfmodels/sampling/bus/sirocco.affinity | 9 + .../perfmodels/sampling/bus/sirocco.bandwidth | 17 + tools/perfmodels/sampling/bus/sirocco.config | 5 + tools/perfmodels/sampling/bus/sirocco.latency | 17 + .../sampling/bus/sirocco.platform.v4.xml | 241 + .../sampling/bus/sirocco.platform.xml | 239 + .../sampling/codelets/45/add_scal.mirage | 40 + .../codelets/45/chol_model_gemm.attila | 144 + .../codelets/45/chol_model_gemm.hannibal | 104 + .../45/chol_model_gemm.hannibal-pitch | 104 + .../codelets/45/chol_model_gemm.idgraf | 314 + .../codelets/45/chol_model_gemm.mirage | 148 + .../codelets/45/chol_model_gemm.sirocco | 183 + .../codelets/45/chol_model_potrf.attila | 144 + .../codelets/45/chol_model_potrf.hannibal | 104 + .../45/chol_model_potrf.hannibal-pitch | 104 + .../codelets/45/chol_model_potrf.idgraf | 314 + .../codelets/45/chol_model_potrf.mirage | 148 + .../codelets/45/chol_model_potrf.sirocco | 183 + .../codelets/45/chol_model_syrk.attila | 144 + .../codelets/45/chol_model_syrk.hannibal | 104 + .../45/chol_model_syrk.hannibal-pitch | 104 + .../codelets/45/chol_model_syrk.idgraf | 314 + .../codelets/45/chol_model_syrk.mirage | 148 + .../codelets/45/chol_model_syrk.sirocco | 183 + .../codelets/45/chol_model_trsm.attila | 144 + .../codelets/45/chol_model_trsm.hannibal | 104 + .../45/chol_model_trsm.hannibal-pitch | 104 + .../codelets/45/chol_model_trsm.idgraf | 314 + .../codelets/45/chol_model_trsm.mirage | 148 + .../codelets/45/chol_model_trsm.sirocco | 183 + .../sampling/codelets/45/cl_update.attila | 144 + .../sampling/codelets/45/cl_update.idgraf | 314 + .../sampling/codelets/45/cl_update.mirage | 144 + .../sampling/codelets/45/cl_update.sirocco | 178 + .../sampling/codelets/45/func.mirage | 41 + .../sampling/codelets/45/log_arr.mirage | 62 + .../sampling/codelets/45/log_list.mirage | 62 + .../sampling/codelets/45/multi.mirage | 40 + .../sampling/codelets/45/multi_2arr.mirage | 41 + .../sampling/codelets/45/multi_list.mirage | 40 + .../sampling/codelets/45/null.idgraf | 8 + .../sampling/codelets/45/null.sirocco | 8 + .../codelets/45/overlap_sleep_1024_24.attila | 136 + .../codelets/45/overlap_sleep_1024_24.mirage | 136 + .../codelets/45/overlap_sleep_1024_24.sirocco | 168 + .../codelets/45/save_cl_bottom.attila | 144 + .../codelets/45/save_cl_bottom.idgraf | 314 + .../codelets/45/save_cl_bottom.mirage | 144 + .../codelets/45/save_cl_bottom.sirocco | 178 + .../sampling/codelets/45/save_cl_top.attila | 144 + .../sampling/codelets/45/save_cl_top.idgraf | 314 + .../sampling/codelets/45/save_cl_top.mirage | 144 + .../sampling/codelets/45/save_cl_top.sirocco | 178 + .../sampling/codelets/45/scal.mirage | 40 + .../sampling/codelets/45/scal_arr.mirage | 40 + .../sampling/codelets/45/sqrt.mirage | 40 + .../codelets/45/starpu_dgemm_gemm.attila | 145 + .../codelets/45/starpu_dgemm_gemm.idgraf | 314 + .../codelets/45/starpu_dgemm_gemm.mirage | 145 + .../codelets/45/starpu_dgemm_gemm.sirocco | 183 + .../45/starpu_dlu_lu_model_gemm.attila | 145 + .../45/starpu_dlu_lu_model_gemm.idgraf | 314 + .../45/starpu_dlu_lu_model_gemm.mirage | 144 + .../45/starpu_dlu_lu_model_gemm.sirocco | 183 + .../45/starpu_dlu_lu_model_gemm_atlas.attila | 145 + .../45/starpu_dlu_lu_model_gemm_atlas.idgraf | 314 + .../45/starpu_dlu_lu_model_gemm_atlas.mirage | 144 + .../45/starpu_dlu_lu_model_gemm_atlas.sirocco | 183 + .../45/starpu_dlu_lu_model_gemm_goto.attila | 145 + .../45/starpu_dlu_lu_model_gemm_goto.idgraf | 314 + .../45/starpu_dlu_lu_model_gemm_goto.mirage | 144 + .../45/starpu_dlu_lu_model_gemm_goto.sirocco | 183 + .../starpu_dlu_lu_model_gemm_openblas.attila | 145 + .../starpu_dlu_lu_model_gemm_openblas.idgraf | 314 + .../starpu_dlu_lu_model_gemm_openblas.mirage | 144 + .../starpu_dlu_lu_model_gemm_openblas.sirocco | 183 + .../45/starpu_dlu_lu_model_getrf.attila | 144 + .../45/starpu_dlu_lu_model_getrf.idgraf | 314 + .../45/starpu_dlu_lu_model_getrf.mirage | 144 + .../45/starpu_dlu_lu_model_getrf.sirocco | 183 + .../45/starpu_dlu_lu_model_getrf_atlas.attila | 144 + .../45/starpu_dlu_lu_model_getrf_atlas.idgraf | 314 + .../45/starpu_dlu_lu_model_getrf_atlas.mirage | 144 + .../starpu_dlu_lu_model_getrf_atlas.sirocco | 183 + .../45/starpu_dlu_lu_model_getrf_goto.attila | 144 + .../45/starpu_dlu_lu_model_getrf_goto.idgraf | 314 + .../45/starpu_dlu_lu_model_getrf_goto.mirage | 144 + .../45/starpu_dlu_lu_model_getrf_goto.sirocco | 183 + .../starpu_dlu_lu_model_getrf_openblas.attila | 144 + .../starpu_dlu_lu_model_getrf_openblas.idgraf | 314 + .../starpu_dlu_lu_model_getrf_openblas.mirage | 144 + ...starpu_dlu_lu_model_getrf_openblas.sirocco | 183 + .../45/starpu_dlu_lu_model_trsm_ll.attila | 145 + .../45/starpu_dlu_lu_model_trsm_ll.idgraf | 314 + .../45/starpu_dlu_lu_model_trsm_ll.mirage | 144 + .../45/starpu_dlu_lu_model_trsm_ll.sirocco | 183 + .../starpu_dlu_lu_model_trsm_ll_atlas.attila | 145 + .../starpu_dlu_lu_model_trsm_ll_atlas.idgraf | 314 + .../starpu_dlu_lu_model_trsm_ll_atlas.mirage | 144 + .../starpu_dlu_lu_model_trsm_ll_atlas.sirocco | 183 + .../starpu_dlu_lu_model_trsm_ll_goto.attila | 145 + .../starpu_dlu_lu_model_trsm_ll_goto.idgraf | 314 + .../starpu_dlu_lu_model_trsm_ll_goto.mirage | 144 + .../starpu_dlu_lu_model_trsm_ll_goto.sirocco | 183 + ...tarpu_dlu_lu_model_trsm_ll_openblas.attila | 145 + ...tarpu_dlu_lu_model_trsm_ll_openblas.idgraf | 314 + ...tarpu_dlu_lu_model_trsm_ll_openblas.mirage | 144 + ...arpu_dlu_lu_model_trsm_ll_openblas.sirocco | 183 + .../45/starpu_dlu_lu_model_trsm_ru.attila | 145 + .../45/starpu_dlu_lu_model_trsm_ru.idgraf | 314 + .../45/starpu_dlu_lu_model_trsm_ru.mirage | 144 + .../45/starpu_dlu_lu_model_trsm_ru.sirocco | 183 + .../starpu_dlu_lu_model_trsm_ru_atlas.attila | 145 + .../starpu_dlu_lu_model_trsm_ru_atlas.idgraf | 314 + .../starpu_dlu_lu_model_trsm_ru_atlas.mirage | 144 + .../starpu_dlu_lu_model_trsm_ru_atlas.sirocco | 183 + .../starpu_dlu_lu_model_trsm_ru_goto.attila | 145 + .../starpu_dlu_lu_model_trsm_ru_goto.idgraf | 314 + .../starpu_dlu_lu_model_trsm_ru_goto.mirage | 144 + .../starpu_dlu_lu_model_trsm_ru_goto.sirocco | 183 + ...tarpu_dlu_lu_model_trsm_ru_openblas.attila | 145 + ...tarpu_dlu_lu_model_trsm_ru_openblas.idgraf | 314 + ...tarpu_dlu_lu_model_trsm_ru_openblas.mirage | 144 + ...arpu_dlu_lu_model_trsm_ru_openblas.sirocco | 183 + .../codelets/45/starpu_sgemm_gemm.attila | 165 + .../codelets/45/starpu_sgemm_gemm.idgraf | 359 + .../codelets/45/starpu_sgemm_gemm.mirage | 165 + .../codelets/45/starpu_sgemm_gemm.sirocco | 208 + .../45/starpu_slu_lu_model_gemm.attila | 145 + .../45/starpu_slu_lu_model_gemm.hannibal | 104 + .../starpu_slu_lu_model_gemm.hannibal-pitch | 104 + .../45/starpu_slu_lu_model_gemm.idgraf | 314 + .../45/starpu_slu_lu_model_gemm.mirage | 144 + .../45/starpu_slu_lu_model_gemm.sirocco | 183 + .../45/starpu_slu_lu_model_gemm_atlas.attila | 145 + .../starpu_slu_lu_model_gemm_atlas.hannibal | 104 + ...rpu_slu_lu_model_gemm_atlas.hannibal-pitch | 104 + .../45/starpu_slu_lu_model_gemm_atlas.idgraf | 314 + .../45/starpu_slu_lu_model_gemm_atlas.mirage | 144 + .../45/starpu_slu_lu_model_gemm_atlas.sirocco | 183 + .../45/starpu_slu_lu_model_gemm_goto.attila | 145 + .../45/starpu_slu_lu_model_gemm_goto.hannibal | 104 + ...arpu_slu_lu_model_gemm_goto.hannibal-pitch | 104 + .../45/starpu_slu_lu_model_gemm_goto.idgraf | 314 + .../45/starpu_slu_lu_model_gemm_goto.mirage | 144 + .../45/starpu_slu_lu_model_gemm_goto.sirocco | 183 + .../starpu_slu_lu_model_gemm_openblas.attila | 145 + ...starpu_slu_lu_model_gemm_openblas.hannibal | 104 + ..._slu_lu_model_gemm_openblas.hannibal-pitch | 104 + .../starpu_slu_lu_model_gemm_openblas.idgraf | 314 + .../starpu_slu_lu_model_gemm_openblas.mirage | 144 + .../starpu_slu_lu_model_gemm_openblas.sirocco | 183 + .../45/starpu_slu_lu_model_getrf.attila | 144 + .../45/starpu_slu_lu_model_getrf.hannibal | 104 + .../starpu_slu_lu_model_getrf.hannibal-pitch | 104 + .../45/starpu_slu_lu_model_getrf.idgraf | 314 + .../45/starpu_slu_lu_model_getrf.mirage | 144 + .../45/starpu_slu_lu_model_getrf.sirocco | 183 + .../45/starpu_slu_lu_model_getrf_atlas.attila | 144 + .../starpu_slu_lu_model_getrf_atlas.hannibal | 104 + ...pu_slu_lu_model_getrf_atlas.hannibal-pitch | 104 + .../45/starpu_slu_lu_model_getrf_atlas.idgraf | 314 + .../45/starpu_slu_lu_model_getrf_atlas.mirage | 144 + .../starpu_slu_lu_model_getrf_atlas.sirocco | 183 + .../45/starpu_slu_lu_model_getrf_goto.attila | 144 + .../starpu_slu_lu_model_getrf_goto.hannibal | 104 + ...rpu_slu_lu_model_getrf_goto.hannibal-pitch | 104 + .../45/starpu_slu_lu_model_getrf_goto.idgraf | 314 + .../45/starpu_slu_lu_model_getrf_goto.mirage | 144 + .../45/starpu_slu_lu_model_getrf_goto.sirocco | 183 + .../starpu_slu_lu_model_getrf_openblas.attila | 144 + ...tarpu_slu_lu_model_getrf_openblas.hannibal | 104 + ...slu_lu_model_getrf_openblas.hannibal-pitch | 104 + .../starpu_slu_lu_model_getrf_openblas.idgraf | 314 + .../starpu_slu_lu_model_getrf_openblas.mirage | 144 + ...starpu_slu_lu_model_getrf_openblas.sirocco | 183 + .../45/starpu_slu_lu_model_trsm_ll.attila | 145 + .../45/starpu_slu_lu_model_trsm_ll.hannibal | 104 + ...starpu_slu_lu_model_trsm_ll.hannibal-pitch | 104 + .../45/starpu_slu_lu_model_trsm_ll.idgraf | 314 + .../45/starpu_slu_lu_model_trsm_ll.mirage | 144 + .../45/starpu_slu_lu_model_trsm_ll.sirocco | 183 + .../starpu_slu_lu_model_trsm_ll_atlas.attila | 145 + ...starpu_slu_lu_model_trsm_ll_atlas.hannibal | 104 + ..._slu_lu_model_trsm_ll_atlas.hannibal-pitch | 104 + .../starpu_slu_lu_model_trsm_ll_atlas.idgraf | 314 + .../starpu_slu_lu_model_trsm_ll_atlas.mirage | 144 + .../starpu_slu_lu_model_trsm_ll_atlas.sirocco | 183 + .../starpu_slu_lu_model_trsm_ll_goto.attila | 145 + .../starpu_slu_lu_model_trsm_ll_goto.hannibal | 104 + ...u_slu_lu_model_trsm_ll_goto.hannibal-pitch | 104 + .../starpu_slu_lu_model_trsm_ll_goto.idgraf | 314 + .../starpu_slu_lu_model_trsm_ll_goto.mirage | 144 + .../starpu_slu_lu_model_trsm_ll_goto.sirocco | 183 + ...tarpu_slu_lu_model_trsm_ll_openblas.attila | 145 + ...rpu_slu_lu_model_trsm_ll_openblas.hannibal | 104 + ...u_lu_model_trsm_ll_openblas.hannibal-pitch | 104 + ...tarpu_slu_lu_model_trsm_ll_openblas.idgraf | 314 + ...tarpu_slu_lu_model_trsm_ll_openblas.mirage | 144 + ...arpu_slu_lu_model_trsm_ll_openblas.sirocco | 183 + .../45/starpu_slu_lu_model_trsm_ru.attila | 145 + .../45/starpu_slu_lu_model_trsm_ru.hannibal | 104 + ...starpu_slu_lu_model_trsm_ru.hannibal-pitch | 104 + .../45/starpu_slu_lu_model_trsm_ru.idgraf | 314 + .../45/starpu_slu_lu_model_trsm_ru.mirage | 144 + .../45/starpu_slu_lu_model_trsm_ru.sirocco | 183 + .../starpu_slu_lu_model_trsm_ru_atlas.attila | 145 + ...starpu_slu_lu_model_trsm_ru_atlas.hannibal | 104 + ..._slu_lu_model_trsm_ru_atlas.hannibal-pitch | 104 + .../starpu_slu_lu_model_trsm_ru_atlas.idgraf | 314 + .../starpu_slu_lu_model_trsm_ru_atlas.mirage | 144 + .../starpu_slu_lu_model_trsm_ru_atlas.sirocco | 183 + .../starpu_slu_lu_model_trsm_ru_goto.attila | 145 + .../starpu_slu_lu_model_trsm_ru_goto.hannibal | 104 + ...u_slu_lu_model_trsm_ru_goto.hannibal-pitch | 104 + .../starpu_slu_lu_model_trsm_ru_goto.idgraf | 314 + .../starpu_slu_lu_model_trsm_ru_goto.mirage | 144 + .../starpu_slu_lu_model_trsm_ru_goto.sirocco | 183 + ...tarpu_slu_lu_model_trsm_ru_openblas.attila | 145 + ...rpu_slu_lu_model_trsm_ru_openblas.hannibal | 104 + ...u_lu_model_trsm_ru_openblas.hannibal-pitch | 104 + ...tarpu_slu_lu_model_trsm_ru_openblas.idgraf | 314 + ...tarpu_slu_lu_model_trsm_ru_openblas.mirage | 144 + ...arpu_slu_lu_model_trsm_ru_openblas.sirocco | 183 + .../sampling/codelets/tmp/mlr_init.out | 1765 + tools/perfs/bench_sgemm.sh | 93 + tools/perfs/error_model.gp | 46 + tools/perfs/error_model.sh | 109 + tools/release/Makefile | 113 + tools/release/README.md | 50 + tools/starpu_calibrate_bus.1 | 18 + tools/starpu_calibrate_bus.c | 88 + tools/starpu_codelet_histo_profile | 100 + tools/starpu_codelet_histo_profile.1 | 18 + tools/starpu_codelet_histo_profile.in | 100 + tools/starpu_codelet_profile | 81 + tools/starpu_codelet_profile.1 | 18 + tools/starpu_codelet_profile.in | 81 + tools/starpu_config | 86 + tools/starpu_config.1 | 28 + tools/starpu_config.cfg | 203 + tools/starpu_config.in | 86 + tools/starpu_env | 93 + tools/starpu_env.1 | 32 + tools/starpu_env.in | 93 + tools/starpu_fxt_data_trace.1 | 26 + tools/starpu_fxt_data_trace.c | 199 + tools/starpu_fxt_number_events_to_names.1 | 18 + tools/starpu_fxt_number_events_to_names.py | 255 + tools/starpu_fxt_number_events_to_names.py.in | 91 + tools/starpu_fxt_stats.1 | 27 + tools/starpu_fxt_stats.c | 220 + tools/starpu_fxt_tool.1 | 65 + tools/starpu_fxt_tool.c | 145 + tools/starpu_lp2paje.1 | 11 + tools/starpu_lp2paje.c | 162 + tools/starpu_machine_display.1 | 33 + tools/starpu_machine_display.c | 259 + tools/starpu_mlr_analysis | 87 + tools/starpu_mlr_analysis.Rmd | 256 + tools/starpu_mlr_analysis.in | 87 + tools/starpu_mpi_comm_matrix.1 | 21 + tools/starpu_mpi_comm_matrix.py | 118 + tools/starpu_mpi_comm_matrix.py.in | 118 + tools/starpu_msexec | 28 + tools/starpu_paje_draw_histogram | 146 + tools/starpu_paje_draw_histogram.1 | 27 + tools/starpu_paje_draw_histogram.R | 125 + tools/starpu_paje_draw_histogram.in | 146 + tools/starpu_paje_sort.in | 108 + tools/starpu_paje_state_stats | 146 + tools/starpu_paje_state_stats.1 | 27 + tools/starpu_paje_state_stats.R | 125 + tools/starpu_paje_state_stats.in | 146 + tools/starpu_paje_summary | 111 + tools/starpu_paje_summary.Rmd | 299 + tools/starpu_paje_summary.in | 111 + tools/starpu_perfmodel_display.1 | 41 + tools/starpu_perfmodel_display.c | 197 + tools/starpu_perfmodel_plot.1 | 50 + tools/starpu_perfmodel_plot.c | 699 + tools/starpu_perfmodel_recdump.c | 486 + tools/starpu_replay.c | 1198 + tools/starpu_replay_sched.c | 439 + tools/starpu_sched_display.c | 32 + tools/starpu_send_recv_data_use.py | 142 + tools/starpu_smpi.xslt | 59 + tools/starpu_smpirun | 178 + tools/starpu_smpirun.in | 178 + tools/starpu_tasks_rec_complete.1 | 13 + tools/starpu_tasks_rec_complete.c | 202 + tools/starpu_tcpipexec | 104 + tools/starpu_tcpipexec.in | 104 + tools/starpu_temanejo2.sh | 27 + tools/starpu_trace_state_stats.py | 397 + tools/starpu_workers_activity | 189 + tools/starpu_workers_activity.1 | 26 + tools/starpu_workers_activity.in | 189 + 3187 files changed, 856894 insertions(+) create mode 100644 .gitlab-ci.yml create mode 100644 AUTHORS create mode 100644 COPYING.LGPL create mode 100644 ChangeLog create mode 100644 INSTALL create mode 100644 Makefile.am create mode 100644 Makefile.in create mode 100644 README.dev create mode 100644 README.md create mode 100644 STARPU-REVISION create mode 100644 STARPU-VERSION create mode 100644 TODO create mode 100644 aclocal.m4 create mode 100755 autogen.sh create mode 100644 bubble/Makefile.am create mode 100644 bubble/Makefile.in create mode 100644 bubble/tests/Makefile.am create mode 100644 bubble/tests/Makefile.in create mode 100644 bubble/tests/basic/b.c create mode 100644 bubble/tests/basic/b2t.c create mode 100644 bubble/tests/basic/basic.h create mode 100644 bubble/tests/basic/bb.c create mode 100644 bubble/tests/basic/bbt.c create mode 100644 bubble/tests/basic/bbtt.c create mode 100644 bubble/tests/basic/brbtt.c create mode 100644 bubble/tests/basic/brec.c create mode 100644 bubble/tests/basic/brec_level.c create mode 100644 bubble/tests/basic/brt.c create mode 100644 bubble/tests/basic/btb.c create mode 100644 bubble/tests/basic/btb_func.c create mode 100644 bubble/tests/basic/btt.c create mode 100644 bubble/tests/basic/gemm_dag.c create mode 100644 bubble/tests/basic/read.c create mode 100644 bubble/tests/basic/sync.c create mode 100644 bubble/tests/basic/tbbt.c create mode 100644 bubble/tests/basic/tbrbtt.c create mode 100644 bubble/tests/basic/tbtbt.c create mode 100644 bubble/tests/loader.c create mode 100644 bubble/tests/vector/vector.c create mode 100755 build-aux/ar-lib create mode 100755 build-aux/compile create mode 100755 build-aux/config.guess create mode 100755 build-aux/config.sub create mode 100755 build-aux/depcomp create mode 100755 build-aux/install-sh create mode 100755 build-aux/ltmain.sh create mode 100755 build-aux/missing create mode 100755 build-aux/test-driver create mode 100755 configure create mode 100644 configure.ac create mode 100644 contrib/ci.inria.fr/Jenkinsfile-basic create mode 100644 contrib/ci.inria.fr/Jenkinsfile-bsd create mode 100644 contrib/ci.inria.fr/Jenkinsfile-windows create mode 100755 contrib/ci.inria.fr/job-0-tarball.sh create mode 100755 contrib/ci.inria.fr/job-1-build-windows.sh create mode 100644 contrib/ci.inria.fr/job-1-check-windows.bat create mode 100755 contrib/ci.inria.fr/job-1-check.sh create mode 100755 contrib/gitlab/build.sh create mode 100755 contrib/gitlab/chameleon.sh create mode 100755 contrib/gitlab/coverity.sh create mode 100755 contrib/gitlab/deploy.sh create mode 100755 contrib/gitlab/simgrid.sh create mode 100644 doc/Makefile.am create mode 100644 doc/Makefile.in create mode 100644 doc/doxy.mk create mode 100644 doc/doxygen.cfg create mode 100644 doc/doxygen/Makefile.am create mode 100644 doc/doxygen/Makefile.in create mode 100644 doc/doxygen/chapters/api/bubble_support.doxy create mode 100644 doc/doxygen/chapters/api/fft_support.doxy create mode 100644 doc/doxygen/chapters/api/fortran_support.doxy create mode 100644 doc/doxygen/chapters/api/threads.doxy create mode 100644 doc/doxygen/chapters/fdl_1_3.doxy create mode 100644 doc/doxygen/chapters/files.doxy create mode 100644 doc/doxygen/chapters/foreword.doxy create mode 100644 doc/doxygen/chapters/images/arbiter.png create mode 100644 doc/doxygen/chapters/images/data_trace.png create mode 100644 doc/doxygen/chapters/images/distrib_data.png create mode 100644 doc/doxygen/chapters/images/distrib_data_histo.png create mode 100644 doc/doxygen/chapters/images/eclipse_hello_build.png create mode 100644 doc/doxygen/chapters/images/eclipse_hello_fxt.png create mode 100644 doc/doxygen/chapters/images/eclipse_hello_graph.png create mode 100644 doc/doxygen/chapters/images/eclipse_hello_hgraph.png create mode 100644 doc/doxygen/chapters/images/eclipse_hello_paje_trace.png create mode 100644 doc/doxygen/chapters/images/eclipse_hello_plugin.png create mode 100644 doc/doxygen/chapters/images/eclipse_hello_run.png create mode 100644 doc/doxygen/chapters/images/eclipse_hello_svg_graph.png create mode 100644 doc/doxygen/chapters/images/eclipse_hello_vite.png create mode 100644 doc/doxygen/chapters/images/eclipse_install_cdt.png create mode 100644 doc/doxygen/chapters/images/eclipse_install_pde.png create mode 100644 doc/doxygen/chapters/images/eclipse_installer.png create mode 100644 doc/doxygen/chapters/images/paje_draw_histogram.png create mode 100644 doc/doxygen/chapters/images/parallel_worker2.png create mode 100644 doc/doxygen/chapters/images/runtime-par.png create mode 100644 doc/doxygen/chapters/images/starpu_chol_model_11_type.png create mode 100644 doc/doxygen/chapters/images/starpu_gflops_non_linear_memset_regression_based_energy.png create mode 100644 doc/doxygen/chapters/images/starpu_log_arr.png create mode 100644 doc/doxygen/chapters/images/starpu_log_list.png create mode 100644 doc/doxygen/chapters/images/starpu_non_linear_memset_regression_based.png create mode 100644 doc/doxygen/chapters/images/starpu_non_linear_memset_regression_based_2.png create mode 100644 doc/doxygen/chapters/images/starpu_non_linear_memset_regression_based_energy.png create mode 100644 doc/doxygen/chapters/images/starpu_power_non_linear_memset_regression_based.png create mode 100644 doc/doxygen/chapters/images/starpu_starpu_slu_lu_model_11.png create mode 100644 doc/doxygen/chapters/images/starpupy_handle_func_perf.png create mode 100644 doc/doxygen/chapters/images/starpupy_handle_func_perf_pickle.png create mode 100644 doc/doxygen/chapters/images/starpupy_handle_perf.png create mode 100644 doc/doxygen/chapters/images/starpupy_handle_perf_pickle.png create mode 100644 doc/doxygen/chapters/images/starvz_visu.png create mode 100644 doc/doxygen/chapters/images/starvz_visu_r.png create mode 100644 doc/doxygen/chapters/images/tasks_size_overhead.png create mode 100644 doc/doxygen/chapters/images/tasks_size_overhead_py_fut_pickle.png create mode 100644 doc/doxygen/chapters/images/tasks_size_overhead_py_futur.png create mode 100644 doc/doxygen/chapters/images/tasks_size_overhead_py_handle.png create mode 100644 doc/doxygen/chapters/images/tasks_size_overhead_py_handle_pickle.png create mode 100644 doc/doxygen/chapters/images/tasks_size_overhead_py_none.png create mode 100644 doc/doxygen/chapters/images/tasks_size_overhead_py_noret_pickle.png create mode 100644 doc/doxygen/chapters/images/temanejo.png create mode 100644 doc/doxygen/chapters/images/trace_bw_heatmap.png create mode 100644 doc/doxygen/chapters/images/trace_recv_use.png create mode 100644 doc/doxygen/chapters/images/trace_send_use.png create mode 100644 doc/doxygen/chapters/images/trace_volume_heatmap.png create mode 100644 doc/doxygen/chapters/starpu_applications/applications_intro.doxy create mode 100644 doc/doxygen/chapters/starpu_applications/code/stencil5.c create mode 100644 doc/doxygen/chapters/starpu_applications/code/stencil5_starpu.c create mode 100644 doc/doxygen/chapters/starpu_applications/code/stencil5_starpu_mpi.c create mode 100644 doc/doxygen/chapters/starpu_applications/code/vector_scal_c.c create mode 100644 doc/doxygen/chapters/starpu_applications/code/vector_scal_c_align.c create mode 100644 doc/doxygen/chapters/starpu_applications/code/vector_scal_cpu.c create mode 100644 doc/doxygen/chapters/starpu_applications/code/vector_scal_starpu.c create mode 100644 doc/doxygen/chapters/starpu_applications/stencil.doxy create mode 100644 doc/doxygen/chapters/starpu_applications/vector_scaling.doxy create mode 100644 doc/doxygen/chapters/starpu_basics/basic_examples.doxy create mode 100644 doc/doxygen/chapters/starpu_basics/basics_intro.doxy create mode 100644 doc/doxygen/chapters/starpu_basics/code/basics_vector_scal_c.c create mode 100644 doc/doxygen/chapters/starpu_basics/code/basics_vector_scal_cpu.c create mode 100644 doc/doxygen/chapters/starpu_basics/code/basics_vector_scal_cuda.c create mode 100644 doc/doxygen/chapters/starpu_basics/code/basics_vector_scal_opencl.c create mode 100644 doc/doxygen/chapters/starpu_basics/code/basics_vector_scal_opencl_codelet.cl create mode 100644 doc/doxygen/chapters/starpu_basics/data_management.doxy create mode 100644 doc/doxygen/chapters/starpu_basics/examples_sources.doxy create mode 100644 doc/doxygen/chapters/starpu_basics/scaling_vector_example.doxy create mode 100644 doc/doxygen/chapters/starpu_basics/scheduling.doxy create mode 100644 doc/doxygen/chapters/starpu_basics/starpu_applications.doxy create mode 100644 doc/doxygen/chapters/starpu_basics/tasks.doxy create mode 100644 doc/doxygen/chapters/starpu_extensions/advanced_data_management.doxy create mode 100644 doc/doxygen/chapters/starpu_extensions/advanced_scheduling.doxy create mode 100644 doc/doxygen/chapters/starpu_extensions/advanced_tasks.doxy create mode 100644 doc/doxygen/chapters/starpu_extensions/bubble.doxy create mode 100644 doc/doxygen/chapters/starpu_extensions/code/complex.c create mode 100644 doc/doxygen/chapters/starpu_extensions/code/disk_compute.c create mode 100644 doc/doxygen/chapters/starpu_extensions/code/disk_copy.c create mode 100644 doc/doxygen/chapters/starpu_extensions/code/forkmode.c create mode 100644 doc/doxygen/chapters/starpu_extensions/code/multiformat.c create mode 100644 doc/doxygen/chapters/starpu_extensions/code/simgrid.c create mode 100644 doc/doxygen/chapters/starpu_extensions/cuda_support.doxy create mode 100644 doc/doxygen/chapters/starpu_extensions/debugging_tools.doxy create mode 100644 doc/doxygen/chapters/starpu_extensions/extensions_intro.doxy create mode 100644 doc/doxygen/chapters/starpu_extensions/fault_tolerance.doxy create mode 100644 doc/doxygen/chapters/starpu_extensions/fft_support.doxy create mode 100644 doc/doxygen/chapters/starpu_extensions/helpers.doxy create mode 100644 doc/doxygen/chapters/starpu_extensions/interoperability.doxy create mode 100644 doc/doxygen/chapters/starpu_extensions/max_fpga_support.doxy create mode 100644 doc/doxygen/chapters/starpu_extensions/mpi_support.doxy create mode 100644 doc/doxygen/chapters/starpu_extensions/opencl_support.doxy create mode 100644 doc/doxygen/chapters/starpu_extensions/out_of_core.doxy create mode 100644 doc/doxygen/chapters/starpu_extensions/parallel_worker.doxy create mode 100644 doc/doxygen/chapters/starpu_extensions/scheduling_context_hypervisor.doxy create mode 100644 doc/doxygen/chapters/starpu_extensions/scheduling_contexts.doxy create mode 100644 doc/doxygen/chapters/starpu_extensions/scheduling_policy_definition.doxy create mode 100644 doc/doxygen/chapters/starpu_extensions/simgrid.doxy create mode 100644 doc/doxygen/chapters/starpu_extensions/socl_opencl_extensions.doxy create mode 100644 doc/doxygen/chapters/starpu_extensions/tcpip_support.doxy create mode 100644 doc/doxygen/chapters/starpu_extensions/transactions.doxy create mode 100644 doc/doxygen/chapters/starpu_faq/check_list_performance.doxy create mode 100644 doc/doxygen/chapters/starpu_faq/faq.doxy create mode 100644 doc/doxygen/chapters/starpu_faq/faq_intro.doxy create mode 100644 doc/doxygen/chapters/starpu_installation/building.doxy create mode 100644 doc/doxygen/chapters/starpu_installation/configuration_and_initialization.doxy create mode 100644 doc/doxygen/chapters/starpu_installation/configure_options.doxy create mode 100644 doc/doxygen/chapters/starpu_installation/environment_variables.doxy create mode 100644 doc/doxygen/chapters/starpu_installation/installation_intro.doxy create mode 100644 doc/doxygen/chapters/starpu_introduction/doc_organization.doxy create mode 100644 doc/doxygen/chapters/starpu_introduction/glossary.doxy create mode 100644 doc/doxygen/chapters/starpu_introduction/introduction_intro.doxy create mode 100644 doc/doxygen/chapters/starpu_languages/code/java_spark.java create mode 100644 doc/doxygen/chapters/starpu_languages/code/java_starpu.java create mode 100644 doc/doxygen/chapters/starpu_languages/code/nf_initexit.f90 create mode 100644 doc/doxygen/chapters/starpu_languages/java.doxy create mode 100644 doc/doxygen/chapters/starpu_languages/languages_intro.doxy create mode 100644 doc/doxygen/chapters/starpu_languages/native_fortran_support.doxy create mode 100644 doc/doxygen/chapters/starpu_languages/openmp_runtime_support.doxy create mode 100644 doc/doxygen/chapters/starpu_languages/python.doxy create mode 100644 doc/doxygen/chapters/starpu_performances/benchmarking_starpu.doxy create mode 100644 doc/doxygen/chapters/starpu_performances/offline_performance_tools.doxy create mode 100644 doc/doxygen/chapters/starpu_performances/online_performance_tools.doxy create mode 100644 doc/doxygen/chapters/starpu_performances/performances_intro.doxy create mode 100644 doc/doxygen/chapters/version.html create mode 100644 doc/doxygen/chapters/version.sty create mode 100644 doc/doxygen/doxygen-config-include.cfg.in create mode 100644 doc/doxygen/doxygen-config.cfg.in create mode 100755 doc/doxygen/doxygen_filter.sh.in create mode 100644 doc/doxygen/refman.tex create mode 100644 doc/doxygen_dev/Makefile.am create mode 100644 doc/doxygen_dev/Makefile.in create mode 100644 doc/doxygen_dev/chapters/000_introduction.doxy create mode 100644 doc/doxygen_dev/chapters/010_core.doxy create mode 100644 doc/doxygen_dev/chapters/version.html create mode 100644 doc/doxygen_dev/chapters/version.sty create mode 100644 doc/doxygen_dev/doxygen-config-include.cfg.in create mode 100644 doc/doxygen_dev/doxygen-config.cfg.in create mode 100755 doc/doxygen_dev/doxygen_filter.sh.in create mode 100644 doc/doxygen_dev/refman.tex create mode 100644 doc/doxygen_web_applications/Makefile.am create mode 100644 doc/doxygen_web_applications/Makefile.in create mode 100644 doc/doxygen_web_applications/chapters/version.html create mode 100644 doc/doxygen_web_applications/chapters/version.sty create mode 100644 doc/doxygen_web_applications/doxygen-config.cfg.in create mode 100644 doc/doxygen_web_applications/refman.tex create mode 100644 doc/doxygen_web_basics/Makefile.am create mode 100644 doc/doxygen_web_basics/Makefile.in create mode 100644 doc/doxygen_web_basics/chapters/version.html create mode 100644 doc/doxygen_web_basics/chapters/version.sty create mode 100644 doc/doxygen_web_basics/doxygen-config.cfg.in create mode 100644 doc/doxygen_web_basics/refman.tex create mode 100644 doc/doxygen_web_extensions/Makefile.am create mode 100644 doc/doxygen_web_extensions/Makefile.in create mode 100644 doc/doxygen_web_extensions/chapters/version.html create mode 100644 doc/doxygen_web_extensions/chapters/version.sty create mode 100644 doc/doxygen_web_extensions/doxygen-config.cfg.in create mode 100644 doc/doxygen_web_extensions/refman.tex create mode 100644 doc/doxygen_web_faq/Makefile.am create mode 100644 doc/doxygen_web_faq/Makefile.in create mode 100644 doc/doxygen_web_faq/chapters/version.html create mode 100644 doc/doxygen_web_faq/chapters/version.sty create mode 100644 doc/doxygen_web_faq/doxygen-config.cfg.in create mode 100644 doc/doxygen_web_faq/refman.tex create mode 100644 doc/doxygen_web_installation/Makefile.am create mode 100644 doc/doxygen_web_installation/Makefile.in create mode 100644 doc/doxygen_web_installation/chapters/version.html create mode 100644 doc/doxygen_web_installation/chapters/version.sty create mode 100644 doc/doxygen_web_installation/doxygen-config.cfg.in create mode 100644 doc/doxygen_web_installation/refman.tex create mode 100644 doc/doxygen_web_introduction/Makefile.am create mode 100644 doc/doxygen_web_introduction/Makefile.in create mode 100644 doc/doxygen_web_introduction/chapters/version.html create mode 100644 doc/doxygen_web_introduction/chapters/version.sty create mode 100644 doc/doxygen_web_introduction/doxygen-config.cfg.in create mode 100644 doc/doxygen_web_introduction/refman.tex create mode 100644 doc/doxygen_web_languages/Makefile.am create mode 100644 doc/doxygen_web_languages/Makefile.in create mode 100644 doc/doxygen_web_languages/chapters/version.html create mode 100644 doc/doxygen_web_languages/chapters/version.sty create mode 100644 doc/doxygen_web_languages/doxygen-config.cfg.in create mode 100644 doc/doxygen_web_languages/refman.tex create mode 100644 doc/doxygen_web_performances/Makefile.am create mode 100644 doc/doxygen_web_performances/Makefile.in create mode 100644 doc/doxygen_web_performances/chapters/version.html create mode 100644 doc/doxygen_web_performances/chapters/version.sty create mode 100644 doc/doxygen_web_performances/doxygen-config.cfg.in create mode 100644 doc/doxygen_web_performances/refman.tex create mode 100755 doc/extractHeadline.sh create mode 100755 doc/fixLinks.sh create mode 100755 doc/sectionNumbering.py create mode 100644 doc/title.tex create mode 100644 doc/tutorial/Makefile create mode 100644 doc/tutorial/README create mode 100644 doc/tutorial/hello_world.c create mode 100644 doc/tutorial/hello_world_msvc.c create mode 100644 doc/tutorial/vector_scal.c create mode 100644 doc/tutorial/vector_scal_cpu.c create mode 100644 doc/tutorial/vector_scal_cuda.cu create mode 100644 doc/tutorial/vector_scal_opencl.c create mode 100644 doc/tutorial/vector_scal_opencl_kernel.cl create mode 100644 eclipse-plugin/.classpath create mode 100644 eclipse-plugin/.project create mode 100644 eclipse-plugin/.settings/org.eclipse.jdt.core.prefs create mode 100644 eclipse-plugin/META-INF/MANIFEST.MF create mode 100644 eclipse-plugin/Makefile.am create mode 100644 eclipse-plugin/Makefile.in create mode 100644 eclipse-plugin/build.properties create mode 100644 eclipse-plugin/build.xml create mode 100644 eclipse-plugin/examples/Makefile.am create mode 100644 eclipse-plugin/examples/Makefile.in create mode 100644 eclipse-plugin/examples/hello/.cproject.in create mode 100644 eclipse-plugin/examples/hello/.project create mode 100644 eclipse-plugin/examples/hello/.settings/language.settings.xml create mode 100644 eclipse-plugin/examples/hello/hello.c create mode 100644 eclipse-plugin/icons/fxt.png create mode 100644 eclipse-plugin/icons/svg.png create mode 100644 eclipse-plugin/icons/taskGraph.png create mode 100644 eclipse-plugin/icons/vite.png create mode 100644 eclipse-plugin/plugin.xml create mode 100644 eclipse-plugin/src/Makefile.am create mode 100644 eclipse-plugin/src/Makefile.in create mode 100644 eclipse-plugin/src/deploy/build.xml create mode 100644 eclipse-plugin/src/deploy/javaCompilerArgs create mode 100644 eclipse-plugin/src/starpu/handlers/SvgHandler.java create mode 100644 eclipse-plugin/src/starpu/handlers/TaskGraphHandler.java create mode 100644 eclipse-plugin/src/starpu/handlers/TraceGenHandler.java create mode 100644 eclipse-plugin/src/starpu/handlers/TraceUtils.java create mode 100644 eclipse-plugin/src/starpu/handlers/TraceVizHandler.java create mode 100755 eclipse-plugin/tools/cproject.sh create mode 100755 eclipse-plugin/tools/install_workspace.sh create mode 100644 examples/Makefile.am create mode 100644 examples/Makefile.in create mode 100644 examples/README.txt create mode 100644 examples/api/bcsr_data_interface.c create mode 100644 examples/api/block_data_interface.c create mode 100644 examples/api/coo_data_interface.c create mode 100644 examples/api/csr_data_interface.c create mode 100644 examples/api/matrix_data_interface.c create mode 100644 examples/api/multiformat_data_interface.c create mode 100644 examples/api/tensor_data_interface.c create mode 100644 examples/api/variable_data_interface.c create mode 100644 examples/api/vector_data_interface.c create mode 100644 examples/api/void_data_interface.c create mode 100644 examples/axpy/axpy.c create mode 100644 examples/axpy/axpy.h create mode 100644 examples/axpy/axpy_opencl.c create mode 100644 examples/axpy/axpy_opencl_kernel.cl create mode 100644 examples/basic_examples/block.c create mode 100644 examples/basic_examples/block_cpu.c create mode 100644 examples/basic_examples/block_cuda.cu create mode 100644 examples/basic_examples/block_hip.hip create mode 100644 examples/basic_examples/block_opencl.c create mode 100644 examples/basic_examples/block_opencl_kernel.cl create mode 100644 examples/basic_examples/dynamic_handles.c create mode 100644 examples/basic_examples/hello_world.c create mode 100644 examples/basic_examples/hooks.c create mode 100644 examples/basic_examples/mult.c create mode 100644 examples/basic_examples/mult_cuda.cu create mode 100644 examples/basic_examples/mult_hip.hip create mode 100644 examples/basic_examples/multiformat.c create mode 100644 examples/basic_examples/multiformat_conversion_codelets.c create mode 100644 examples/basic_examples/multiformat_conversion_codelets_cuda.cu create mode 100644 examples/basic_examples/multiformat_conversion_codelets_opencl.c create mode 100644 examples/basic_examples/multiformat_conversion_codelets_opencl_kernel.cl create mode 100644 examples/basic_examples/multiformat_cuda.cu create mode 100644 examples/basic_examples/multiformat_opencl.c create mode 100644 examples/basic_examples/multiformat_opencl_kernel.cl create mode 100644 examples/basic_examples/multiformat_types.h create mode 100644 examples/basic_examples/ndim.c create mode 100644 examples/basic_examples/task_insert_color.c create mode 100644 examples/basic_examples/topology.c create mode 100644 examples/basic_examples/variable.c create mode 100644 examples/basic_examples/variable_kernels.cu create mode 100644 examples/basic_examples/variable_kernels_cpu.c create mode 100644 examples/basic_examples/variable_kernels_opencl.c create mode 100644 examples/basic_examples/variable_kernels_opencl_kernel.cl create mode 100644 examples/basic_examples/vector_scal.c create mode 100644 examples/basic_examples/vector_scal_c.c create mode 100644 examples/basic_examples/vector_scal_cpu.c create mode 100644 examples/basic_examples/vector_scal_cpu_icc.icc create mode 100644 examples/basic_examples/vector_scal_cpu_template.h create mode 100644 examples/basic_examples/vector_scal_cuda.cu create mode 100644 examples/basic_examples/vector_scal_fortran.F create mode 100644 examples/basic_examples/vector_scal_hip.hip create mode 100644 examples/basic_examples/vector_scal_opencl.c create mode 100644 examples/basic_examples/vector_scal_opencl_kernel.cl create mode 100644 examples/binary/binary.c create mode 100644 examples/callback/callback.c create mode 100644 examples/callback/prologue.c create mode 100644 examples/cg/cg.c create mode 100644 examples/cg/cg.h create mode 100644 examples/cg/cg_kernels.c create mode 100644 examples/cholesky/cholesky.h create mode 100755 examples/cholesky/cholesky.sh create mode 100644 examples/cholesky/cholesky_compil.c create mode 100644 examples/cholesky/cholesky_compiled.c create mode 100644 examples/cholesky/cholesky_grain_tag.c create mode 100644 examples/cholesky/cholesky_implicit.c create mode 100755 examples/cholesky/cholesky_julia.sh create mode 100644 examples/cholesky/cholesky_kernels.c create mode 100644 examples/cholesky/cholesky_models.c create mode 100644 examples/cholesky/cholesky_tag.c create mode 100644 examples/cholesky/cholesky_tile_tag.c create mode 100644 examples/cholesky/libmy_dmda.c create mode 100644 examples/cholesky/libmy_dmda.h create mode 100644 examples/common/blas.c create mode 100644 examples/common/blas.h create mode 100644 examples/common/blas_model.c create mode 100644 examples/common/blas_model.h create mode 100644 examples/cpp/Makefile_add_vectors.mk create mode 100644 examples/cpp/Makefile_add_vectors_cpp11.mk create mode 100644 examples/cpp/add_vectors.cpp create mode 100644 examples/cpp/add_vectors_cpp11.cpp create mode 100644 examples/cpp/add_vectors_interface.cpp create mode 100644 examples/cpp/incrementer_cpp.cpp create mode 100644 examples/dependency/sequential_consistency.c create mode 100644 examples/dependency/task_end_dep.c create mode 100644 examples/dependency/task_end_dep_add.c create mode 100644 examples/filters/alloc.c create mode 100644 examples/filters/custom_mf/conversion.cu create mode 100644 examples/filters/custom_mf/conversion_opencl.c create mode 100644 examples/filters/custom_mf/conversion_opencl.cl create mode 100644 examples/filters/custom_mf/cuda.cu create mode 100644 examples/filters/custom_mf/custom_conversion_codelets.c create mode 100644 examples/filters/custom_mf/custom_interface.c create mode 100644 examples/filters/custom_mf/custom_interface.h create mode 100644 examples/filters/custom_mf/custom_mf_filter.c create mode 100644 examples/filters/custom_mf/custom_opencl.c create mode 100644 examples/filters/custom_mf/custom_opencl.cl create mode 100644 examples/filters/custom_mf/custom_types.h create mode 100644 examples/filters/f3d_cpu.c create mode 100644 examples/filters/f3d_cuda.cu create mode 100644 examples/filters/f3d_hip.hip create mode 100644 examples/filters/f4d_cpu.c create mode 100644 examples/filters/f4d_cuda.cu create mode 100644 examples/filters/f4d_hip.hip create mode 100644 examples/filters/f5d_print.c create mode 100644 examples/filters/fblock.c create mode 100644 examples/filters/fblock_cpu.c create mode 100644 examples/filters/fblock_cuda.cu create mode 100644 examples/filters/fblock_hip.hip create mode 100644 examples/filters/fblock_opencl.c create mode 100644 examples/filters/fblock_opencl_kernel.cl create mode 100644 examples/filters/fblock_pick_matrix.c create mode 100644 examples/filters/fblock_pick_variable.c create mode 100644 examples/filters/fblock_print.c create mode 100644 examples/filters/fmatrix.c create mode 100644 examples/filters/fmatrix_cpu.c create mode 100644 examples/filters/fmatrix_cuda.cu create mode 100644 examples/filters/fmatrix_hip.hip create mode 100644 examples/filters/fmatrix_pick_variable.c create mode 100644 examples/filters/fmatrix_pick_vector.c create mode 100644 examples/filters/fmatrix_print.c create mode 100644 examples/filters/fmultiple_cuda.cu create mode 100644 examples/filters/fmultiple_hip.hip create mode 100644 examples/filters/fmultiple_manual.c create mode 100644 examples/filters/fmultiple_submit.c create mode 100644 examples/filters/fmultiple_submit_implicit.c create mode 100644 examples/filters/fmultiple_submit_readonly.c create mode 100644 examples/filters/fmultiple_submit_readonly_downgrade.c create mode 100644 examples/filters/fndim.c create mode 100644 examples/filters/fndim_1d_pick_variable.c create mode 100644 examples/filters/fndim_2d_pick_vector.c create mode 100644 examples/filters/fndim_3d_pick_matrix.c create mode 100644 examples/filters/fndim_4d_pick_block.c create mode 100644 examples/filters/fndim_5d_pick_tensor.c create mode 100644 examples/filters/fndim_pick_ndim.c create mode 100644 examples/filters/fndim_pick_variable.c create mode 100644 examples/filters/fndim_to_block.c create mode 100644 examples/filters/fndim_to_matrix.c create mode 100644 examples/filters/fndim_to_tensor.c create mode 100644 examples/filters/fndim_to_variable.c create mode 100644 examples/filters/fndim_to_vector.c create mode 100644 examples/filters/fread.c create mode 100644 examples/filters/frecursive.c create mode 100644 examples/filters/ftensor.c create mode 100644 examples/filters/ftensor_cpu.c create mode 100644 examples/filters/ftensor_cuda.cu create mode 100644 examples/filters/ftensor_hip.hip create mode 100644 examples/filters/ftensor_pick_block.c create mode 100644 examples/filters/ftensor_pick_variable.c create mode 100644 examples/filters/ftensor_print.c create mode 100644 examples/filters/fvariable_cuda.cu create mode 100644 examples/filters/fvector.c create mode 100644 examples/filters/fvector_cpu.c create mode 100644 examples/filters/fvector_cuda.cu create mode 100644 examples/filters/fvector_hip.hip create mode 100644 examples/filters/fvector_pick_variable.c create mode 100644 examples/filters/shadow.c create mode 100644 examples/filters/shadow2d.c create mode 100644 examples/filters/shadow3d.c create mode 100644 examples/filters/shadow4d.c create mode 100644 examples/filters/shadownd.c create mode 100644 examples/fortran/Makefile create mode 100644 examples/fortran/hello.F create mode 100644 examples/fortran/hello_c.c create mode 100644 examples/fortran/starpu_fortran.h create mode 100644 examples/fortran90/Makefile.mk create mode 100644 examples/fortran90/f90_example.f90 create mode 100644 examples/fortran90/marshalling.c create mode 100644 examples/fortran90/mod_compute.f90 create mode 100644 examples/fortran90/mod_interface.f90 create mode 100644 examples/fortran90/mod_types.f90 create mode 100644 examples/fortran90/starpu_mod.f90 create mode 100644 examples/gl_interop/gl_interop.c create mode 100644 examples/gl_interop/gl_interop_idle.c create mode 100644 examples/heat/dw_factolu.c create mode 100644 examples/heat/dw_factolu.h create mode 100644 examples/heat/dw_factolu_grain.c create mode 100644 examples/heat/dw_factolu_kernels.c create mode 100644 examples/heat/dw_factolu_tag.c create mode 100644 examples/heat/dw_sparse_cg.c create mode 100644 examples/heat/dw_sparse_cg.h create mode 100644 examples/heat/dw_sparse_cg_kernels.c create mode 100644 examples/heat/heat.c create mode 100644 examples/heat/heat.h create mode 100755 examples/heat/heat.sh create mode 100644 examples/heat/heat_display.c create mode 100644 examples/heat/lu_kernels_model.c create mode 100644 examples/heat/lu_kernels_model.h create mode 100644 examples/incrementer/incrementer.c create mode 100644 examples/incrementer/incrementer_kernels.cu create mode 100644 examples/incrementer/incrementer_kernels_opencl.c create mode 100644 examples/incrementer/incrementer_kernels_opencl_kernel.cl create mode 100644 examples/interface/complex.c create mode 100644 examples/interface/complex_codelet.h create mode 100644 examples/interface/complex_dev_handle/complex_dev_handle.c create mode 100644 examples/interface/complex_dev_handle/complex_dev_handle_codelet.h create mode 100644 examples/interface/complex_dev_handle/complex_dev_handle_filters.c create mode 100644 examples/interface/complex_dev_handle/complex_dev_handle_interface.c create mode 100644 examples/interface/complex_dev_handle/complex_dev_handle_interface.h create mode 100644 examples/interface/complex_dev_handle/complex_dev_handle_kernels.cl create mode 100644 examples/interface/complex_dev_handle/complex_dev_handle_kernels.cu create mode 100644 examples/interface/complex_dev_handle/complex_dev_handle_kernels_opencl.c create mode 100644 examples/interface/complex_filters.c create mode 100644 examples/interface/complex_interface.c create mode 100644 examples/interface/complex_interface.h create mode 100644 examples/interface/complex_kernels.cl create mode 100644 examples/interface/complex_kernels.cu create mode 100644 examples/interface/complex_kernels_opencl.c create mode 100644 examples/loader.c create mode 100644 examples/lu/blas_complex.c create mode 100644 examples/lu/blas_complex.h create mode 100644 examples/lu/clu.c create mode 100644 examples/lu/clu_implicit.c create mode 100644 examples/lu/clu_implicit_pivot.c create mode 100644 examples/lu/clu_kernels.c create mode 100644 examples/lu/clu_pivot.c create mode 100644 examples/lu/complex_double.h create mode 100644 examples/lu/complex_float.h create mode 100644 examples/lu/dlu.c create mode 100644 examples/lu/dlu_implicit.c create mode 100644 examples/lu/dlu_implicit_pivot.c create mode 100644 examples/lu/dlu_kernels.c create mode 100644 examples/lu/dlu_pivot.c create mode 100644 examples/lu/lu-double.h create mode 100644 examples/lu/lu-float.h create mode 100755 examples/lu/lu.sh create mode 100644 examples/lu/lu_example.c create mode 100644 examples/lu/lu_example_complex_double.c create mode 100644 examples/lu/lu_example_complex_float.c create mode 100644 examples/lu/lu_example_double.c create mode 100644 examples/lu/lu_example_float.c create mode 100644 examples/lu/slu.c create mode 100644 examples/lu/slu_implicit.c create mode 100644 examples/lu/slu_implicit_pivot.c create mode 100644 examples/lu/slu_kernels.c create mode 100644 examples/lu/slu_pivot.c create mode 100644 examples/lu/xlu.c create mode 100644 examples/lu/xlu.h create mode 100644 examples/lu/xlu_implicit.c create mode 100644 examples/lu/xlu_implicit_pivot.c create mode 100644 examples/lu/xlu_kernels.c create mode 100644 examples/lu/xlu_kernels.h create mode 100644 examples/lu/xlu_pivot.c create mode 100644 examples/lu/zlu.c create mode 100644 examples/lu/zlu_implicit.c create mode 100644 examples/lu/zlu_implicit_pivot.c create mode 100644 examples/lu/zlu_kernels.c create mode 100644 examples/lu/zlu_pivot.c create mode 100644 examples/mandelbrot/mandelbrot.c create mode 100644 examples/matvecmult/matvecmult.c create mode 100644 examples/matvecmult/matvecmult_kernel.cl create mode 100644 examples/mlr/mlr.c create mode 100644 examples/mult/dgemm.c create mode 100644 examples/mult/dgemm_layout.c create mode 100644 examples/mult/double.h create mode 100644 examples/mult/sgemm.c create mode 100755 examples/mult/sgemm.sh create mode 100644 examples/mult/sgemm_layout.c create mode 100644 examples/mult/simple.h create mode 100644 examples/mult/xgemm.c create mode 100644 examples/mult/xgemm.h create mode 100644 examples/mult/xgemm_layout.c create mode 100644 examples/native_fortran/Makefile_nf_dynbuf.mk create mode 100644 examples/native_fortran/Makefile_nf_example.mk create mode 100644 examples/native_fortran/Makefile_nf_matrix.mk create mode 100644 examples/native_fortran/Makefile_nf_partition.mk create mode 100644 examples/native_fortran/Makefile_nf_sched_ctx.mk create mode 100644 examples/native_fortran/Makefile_nf_varbuf.mk create mode 100644 examples/native_fortran/Makefile_nf_vector.mk create mode 100644 examples/native_fortran/fstarpu_mod.f90 create mode 100644 examples/native_fortran/nf_codelets.f90 create mode 100644 examples/native_fortran/nf_compute.f90 create mode 100644 examples/native_fortran/nf_dynbuf.f90 create mode 100644 examples/native_fortran/nf_dynbuf_cl.f90 create mode 100644 examples/native_fortran/nf_example.f90 create mode 100644 examples/native_fortran/nf_matrix.f90 create mode 100644 examples/native_fortran/nf_partition.f90 create mode 100644 examples/native_fortran/nf_partition_cl.f90 create mode 100644 examples/native_fortran/nf_sched_ctx.f90 create mode 100644 examples/native_fortran/nf_sched_ctx_cl.f90 create mode 100644 examples/native_fortran/nf_types.f90 create mode 100644 examples/native_fortran/nf_varbuf.f90 create mode 100644 examples/native_fortran/nf_varbuf_cl.f90 create mode 100644 examples/native_fortran/nf_vector.f90 create mode 100644 examples/openmp/vector_scal_omp.c create mode 100644 examples/parallel_workers/parallel_workers.c create mode 100644 examples/parallel_workers/parallel_workers_func.c create mode 100644 examples/parallel_workers/parallel_workers_oldapi.c create mode 100644 examples/perf_monitoring/perf_counters_01.c create mode 100644 examples/perf_monitoring/perf_counters_02.c create mode 100644 examples/perf_steering/perf_knobs_01.c create mode 100644 examples/perf_steering/perf_knobs_02.c create mode 100644 examples/perf_steering/perf_knobs_03.c create mode 100644 examples/pi/pi.c create mode 100644 examples/pi/pi.h create mode 100644 examples/pi/pi_kernel.cu create mode 100644 examples/pi/pi_redux.c create mode 100644 examples/pi/pi_redux_kernel.cu create mode 100644 examples/pipeline/pipeline.c create mode 100644 examples/ppm_downscaler/ppm_downscaler.c create mode 100644 examples/ppm_downscaler/ppm_downscaler.h create mode 100644 examples/ppm_downscaler/yuv_downscaler.c create mode 100644 examples/ppm_downscaler/yuv_downscaler.h create mode 100644 examples/profiling/profiling.c create mode 100644 examples/profiling_tool/libprofiling_tool.c create mode 100755 examples/profiling_tool/prof.sh create mode 100644 examples/reductions/dot_product.c create mode 100644 examples/reductions/dot_product.h create mode 100644 examples/reductions/dot_product_kernels.cu create mode 100644 examples/reductions/dot_product_opencl_kernels.cl create mode 100644 examples/reductions/minmax_reduction.c create mode 100644 examples/sched_ctx/axpy_partition_gpu.cu create mode 100644 examples/sched_ctx/axpy_partition_gpu.h create mode 100644 examples/sched_ctx/dummy_sched_with_ctx.c create mode 100644 examples/sched_ctx/gpu_partition.c create mode 100644 examples/sched_ctx/nested_sched_ctxs.c create mode 100644 examples/sched_ctx/parallel_code.c create mode 100644 examples/sched_ctx/parallel_tasks_reuse_handle.c create mode 100644 examples/sched_ctx/prio.c create mode 100644 examples/sched_ctx/sched_ctx.c create mode 100644 examples/sched_ctx/sched_ctx_delete.c create mode 100644 examples/sched_ctx/sched_ctx_empty.c create mode 100644 examples/sched_ctx/sched_ctx_remove.c create mode 100644 examples/sched_ctx/sched_ctx_without_sched_policy.c create mode 100644 examples/sched_ctx/sched_ctx_without_sched_policy_awake.c create mode 100644 examples/sched_ctx/two_cpu_contexts.c create mode 100644 examples/sched_ctx_utils/sched_ctx_utils.c create mode 100644 examples/sched_ctx_utils/sched_ctx_utils.h create mode 100644 examples/scheduler/dummy_modular_sched.c create mode 100644 examples/scheduler/dummy_sched.c create mode 100644 examples/scheduler/heteroprio_test.c create mode 100644 examples/scheduler/libdummy_sched.c create mode 100755 examples/scheduler/libdummy_sched.sh create mode 100755 examples/scheduler/schedulers.sh create mode 100755 examples/scheduler/schedulers_context.sh create mode 100644 examples/spmd/vector_scal_spmd.c create mode 100644 examples/spmv/dw_block_spmv.c create mode 100644 examples/spmv/dw_block_spmv.h create mode 100644 examples/spmv/dw_block_spmv_kernels.c create mode 100644 examples/spmv/matrix_market/examples/fidapm05.mtx create mode 100644 examples/spmv/matrix_market/mm_to_bcsr.c create mode 100644 examples/spmv/matrix_market/mm_to_bcsr.h create mode 100644 examples/spmv/matrix_market/mmio.c create mode 100644 examples/spmv/matrix_market/mmio.h create mode 100644 examples/spmv/spmv.c create mode 100644 examples/spmv/spmv.h create mode 100644 examples/spmv/spmv_cuda.cu create mode 100644 examples/spmv/spmv_kernels.c create mode 100644 examples/spmv/spmv_opencl.cl create mode 100644 examples/stencil/0.5.out create mode 100644 examples/stencil/0.out create mode 100644 examples/stencil/1.out create mode 100644 examples/stencil/2.out create mode 100644 examples/stencil/3.out create mode 100644 examples/stencil/4.out create mode 100644 examples/stencil/6.out create mode 100644 examples/stencil/Makefile.am create mode 100644 examples/stencil/Makefile.in create mode 100644 examples/stencil/README create mode 100644 examples/stencil/implicit-stencil-blocks.c create mode 100644 examples/stencil/implicit-stencil-kernels.c create mode 100644 examples/stencil/implicit-stencil-tasks.c create mode 100644 examples/stencil/implicit-stencil.c create mode 100644 examples/stencil/implicit-stencil.h create mode 100644 examples/stencil/life.c create mode 100644 examples/stencil/life_cuda.cu create mode 100644 examples/stencil/life_opencl.c create mode 100644 examples/stencil/loader.c create mode 100644 examples/stencil/mpi.out create mode 100644 examples/stencil/results create mode 100755 examples/stencil/run create mode 100644 examples/stencil/shadow.cu create mode 100644 examples/stencil/shadow.h create mode 100644 examples/stencil/shadow_opencl.c create mode 100644 examples/stencil/stencil-blocks.c create mode 100644 examples/stencil/stencil-kernels.c create mode 100644 examples/stencil/stencil-tasks.c create mode 100644 examples/stencil/stencil.c create mode 100644 examples/stencil/stencil.h create mode 100644 examples/subgraphs/codelets.c create mode 100644 examples/subgraphs/main.h create mode 100644 examples/subgraphs/manual.c create mode 100644 examples/subgraphs/partition.c create mode 100644 examples/subgraphs/plan.c create mode 100644 examples/tag_example/tag_example.c create mode 100644 examples/tag_example/tag_example2.c create mode 100644 examples/tag_example/tag_example3.c create mode 100644 examples/tag_example/tag_example4.c create mode 100644 examples/tag_example/tag_restartable.c create mode 100644 examples/transactions/trs_inc.c create mode 100644 examples/transactions/trs_sgemm.c create mode 100644 examples/worker_collections/worker_list_example.c create mode 100644 examples/worker_collections/worker_tree_example.c create mode 100644 include/fstarpu_mod.f90 create mode 100644 include/omp.h create mode 100644 include/pthread_win32/pthread.h create mode 100644 include/pthread_win32/semaphore.h create mode 100644 include/schedulers/starpu_heteroprio.h create mode 100644 include/schedulers/starpu_scheduler_toolbox.h create mode 100644 include/starpu.h create mode 100644 include/starpu_bitmap.h create mode 100644 include/starpu_bound.h create mode 100644 include/starpu_config.h.in create mode 100644 include/starpu_cublas.h create mode 100644 include/starpu_cublasLt.h create mode 100644 include/starpu_cublas_v2.h create mode 100644 include/starpu_cuda.h create mode 100644 include/starpu_cusolver.h create mode 100644 include/starpu_cusparse.h create mode 100644 include/starpu_data.h create mode 100644 include/starpu_data_filters.h create mode 100644 include/starpu_data_interfaces.h create mode 100644 include/starpu_deprecated_api.h create mode 100644 include/starpu_disk.h create mode 100644 include/starpu_driver.h create mode 100644 include/starpu_expert.h create mode 100644 include/starpu_fxt.h create mode 100644 include/starpu_hash.h create mode 100644 include/starpu_helper.h create mode 100644 include/starpu_hip.h create mode 100644 include/starpu_hipblas.h create mode 100644 include/starpu_max_fpga.h create mode 100644 include/starpu_mod.f90 create mode 100644 include/starpu_opencl.h create mode 100644 include/starpu_openmp.h create mode 100644 include/starpu_parallel_worker.h create mode 100644 include/starpu_perf_monitoring.h create mode 100644 include/starpu_perf_steering.h create mode 100644 include/starpu_perfmodel.h create mode 100644 include/starpu_profiling.h create mode 100644 include/starpu_profiling_tool.h create mode 100644 include/starpu_rand.h create mode 100644 include/starpu_sched_component.h create mode 100644 include/starpu_sched_ctx.h create mode 100644 include/starpu_sched_ctx_hypervisor.h create mode 100644 include/starpu_scheduler.h create mode 100644 include/starpu_simgrid_wrap.h create mode 100644 include/starpu_sink.h create mode 100644 include/starpu_stdlib.h create mode 100644 include/starpu_task.h create mode 100644 include/starpu_task_bundle.h create mode 100644 include/starpu_task_dep.h create mode 100644 include/starpu_task_list.h create mode 100644 include/starpu_task_util.h create mode 100644 include/starpu_thread.h create mode 100644 include/starpu_thread_util.h create mode 100644 include/starpu_tree.h create mode 100644 include/starpu_util.h create mode 100644 include/starpu_worker.h create mode 100644 julia/Makefile.am create mode 100644 julia/Makefile.in create mode 100644 julia/README create mode 100644 julia/examples/Makefile.am create mode 100644 julia/examples/Makefile.in create mode 100644 julia/examples/axpy/axpy.jl create mode 100755 julia/examples/axpy/axpy.sh create mode 100644 julia/examples/black_scholes/black_scholes.jl create mode 100644 julia/examples/callback/callback.c create mode 100644 julia/examples/callback/callback.jl create mode 100755 julia/examples/callback/callback.sh create mode 100644 julia/examples/check_deps/check_deps.jl create mode 100755 julia/examples/check_deps/check_deps.sh create mode 100755 julia/examples/cholesky/cholesky.sh create mode 100644 julia/examples/cholesky/cholesky_codelets.jl create mode 100644 julia/examples/cholesky/cholesky_common.jl create mode 100644 julia/examples/cholesky/cholesky_implicit.jl create mode 100644 julia/examples/cholesky/cholesky_native.jl create mode 100644 julia/examples/cholesky/cholesky_tag.jl create mode 100644 julia/examples/dependency/end_dep.jl create mode 100755 julia/examples/dependency/end_dep.sh create mode 100644 julia/examples/dependency/tag_dep.jl create mode 100755 julia/examples/dependency/tag_dep.sh create mode 100644 julia/examples/dependency/task_dep.jl create mode 100755 julia/examples/dependency/task_dep.sh create mode 100755 julia/examples/execute.sh.in create mode 100644 julia/examples/gemm/gemm.jl create mode 100755 julia/examples/gemm/gemm.sh create mode 100644 julia/examples/gemm/gemm_native.jl create mode 100644 julia/examples/loader.c create mode 100644 julia/examples/mandelbrot/cpu_mandelbrot.c create mode 100644 julia/examples/mandelbrot/cpu_mandelbrot.h create mode 100644 julia/examples/mandelbrot/mandelbrot.c create mode 100644 julia/examples/mandelbrot/mandelbrot.jl create mode 100755 julia/examples/mandelbrot/mandelbrot.sh create mode 100644 julia/examples/mandelbrot/mandelbrot_native.jl create mode 100644 julia/examples/mult/cpu_mult.c create mode 100644 julia/examples/mult/mult.c create mode 100644 julia/examples/mult/mult.jl create mode 100644 julia/examples/mult/mult_native.jl create mode 100755 julia/examples/mult/mult_starpu.sh create mode 100755 julia/examples/mult/perf.sh create mode 100644 julia/examples/task_insert_color/task_insert_color.c create mode 100644 julia/examples/task_insert_color/task_insert_color.jl create mode 100755 julia/examples/task_insert_color/task_insert_color.sh create mode 100644 julia/examples/variable/variable.jl create mode 100755 julia/examples/variable/variable.sh create mode 100644 julia/examples/variable/variable_native.jl create mode 100644 julia/examples/vector_scal/vector_scal.jl create mode 100755 julia/examples/vector_scal/vector_scal.sh create mode 100644 julia/src/Makefile.am create mode 100644 julia/src/Makefile.in create mode 100644 julia/src/StarPU.jl create mode 100644 julia/src/blas.c create mode 100644 julia/src/blas.h create mode 100644 julia/src/blas.jl create mode 100644 julia/src/blas_wrapper.c create mode 100644 julia/src/callback_wrapper.c create mode 100644 julia/src/compiler/c.jl create mode 100644 julia/src/compiler/cuda.jl create mode 100644 julia/src/compiler/expression_manipulation.jl create mode 100644 julia/src/compiler/expressions.jl create mode 100644 julia/src/compiler/file_generation.jl create mode 100644 julia/src/compiler/include.jl create mode 100644 julia/src/compiler/parsing.jl create mode 100644 julia/src/compiler/utils.jl create mode 100644 julia/src/data.jl create mode 100644 julia/src/destructible.jl create mode 100644 julia/src/dynamic_compiler/Makefile.am create mode 100644 julia/src/dynamic_compiler/Makefile.in create mode 100644 julia/src/globals.jl create mode 100644 julia/src/init.jl create mode 100644 julia/src/linked_list.jl create mode 100644 julia/src/perfmodel.jl create mode 100644 julia/src/task.jl create mode 100644 julia/src/task_dep.jl create mode 100644 julia/src/translate_headers.jl create mode 100644 julia/src/utils.jl create mode 100644 m4/acinclude.m4 create mode 100644 m4/ax_cxx_compile_stdcxx.m4 create mode 100644 m4/ax_dlb_callback_arg.m4 create mode 100644 m4/libs.m4 create mode 100644 m4/libtool.m4 create mode 100644 m4/ltoptions.m4 create mode 100644 m4/ltsugar.m4 create mode 100644 m4/ltversion.m4 create mode 100644 m4/lt~obsolete.m4 create mode 100644 m4/pkg.m4 create mode 100644 make/starpu-loader.mk create mode 100644 make/starpu-notests.mk create mode 100644 make/starpu-subdirtests.mk create mode 100644 make/starpu-tests.mk create mode 100644 make/starpu.mk create mode 100644 min-dgels/Makefile create mode 100644 min-dgels/Makefile.in create mode 100644 min-dgels/additional/blaswrap.h create mode 100644 min-dgels/additional/clapack.h create mode 100644 min-dgels/additional/d_lg10.c create mode 100644 min-dgels/additional/d_sign.c create mode 100644 min-dgels/additional/dcopy.c create mode 100644 min-dgels/additional/dgelq2.c create mode 100644 min-dgels/additional/dgelqf.c create mode 100644 min-dgels/additional/dgels.c create mode 100644 min-dgels/additional/dgemm.c create mode 100644 min-dgels/additional/dgemv.c create mode 100644 min-dgels/additional/dgeqr2.c create mode 100644 min-dgels/additional/dgeqrf.c create mode 100644 min-dgels/additional/dger.c create mode 100644 min-dgels/additional/disnan.c create mode 100644 min-dgels/additional/dlabad.c create mode 100644 min-dgels/additional/dlaisnan.c create mode 100644 min-dgels/additional/dlamch.c create mode 100644 min-dgels/additional/dlange.c create mode 100644 min-dgels/additional/dlapy2.c create mode 100644 min-dgels/additional/dlarf.c create mode 100644 min-dgels/additional/dlarfb.c create mode 100644 min-dgels/additional/dlarfg.c create mode 100644 min-dgels/additional/dlarfp.c create mode 100644 min-dgels/additional/dlarft.c create mode 100644 min-dgels/additional/dlascl.c create mode 100644 min-dgels/additional/dlaset.c create mode 100644 min-dgels/additional/dlassq.c create mode 100644 min-dgels/additional/dnrm2.c create mode 100644 min-dgels/additional/dorm2r.c create mode 100644 min-dgels/additional/dorml2.c create mode 100644 min-dgels/additional/dormlq.c create mode 100644 min-dgels/additional/dormqr.c create mode 100644 min-dgels/additional/dscal.c create mode 100644 min-dgels/additional/dtrmm.c create mode 100644 min-dgels/additional/dtrmv.c create mode 100644 min-dgels/additional/dtrsm.c create mode 100644 min-dgels/additional/dtrtrs.c create mode 100644 min-dgels/additional/f2c.h create mode 100644 min-dgels/additional/fio.h create mode 100644 min-dgels/additional/fmt.c create mode 100644 min-dgels/additional/fmt.h create mode 100644 min-dgels/additional/ieeeck.c create mode 100644 min-dgels/additional/iladlc.c create mode 100644 min-dgels/additional/iladlr.c create mode 100644 min-dgels/additional/ilaenv.c create mode 100644 min-dgels/additional/iparmq.c create mode 100644 min-dgels/additional/lsame.c create mode 100644 min-dgels/additional/mindgels.h create mode 100644 min-dgels/additional/pow_di.c create mode 100644 min-dgels/additional/s_cat.c create mode 100644 min-dgels/additional/sysdep1.h create mode 100644 min-dgels/additional/wsfe.c create mode 100644 min-dgels/additional/xerbla.c create mode 100644 min-dgels/base/BLAS/SRC/Makefile create mode 100644 min-dgels/base/BLAS/SRC/dasum.c create mode 100644 min-dgels/base/BLAS/SRC/daxpy.c create mode 100644 min-dgels/base/BLAS/SRC/dcabs1.c create mode 100644 min-dgels/base/BLAS/SRC/dcopy.c create mode 100644 min-dgels/base/BLAS/SRC/ddot.c create mode 100644 min-dgels/base/BLAS/SRC/dgbmv.c create mode 100644 min-dgels/base/BLAS/SRC/dgemm.c create mode 100644 min-dgels/base/BLAS/SRC/dgemv.c create mode 100644 min-dgels/base/BLAS/SRC/dger.c create mode 100644 min-dgels/base/BLAS/SRC/dnrm2.c create mode 100644 min-dgels/base/BLAS/SRC/drot.c create mode 100644 min-dgels/base/BLAS/SRC/drotg.c create mode 100644 min-dgels/base/BLAS/SRC/drotm.c create mode 100644 min-dgels/base/BLAS/SRC/drotmg.c create mode 100644 min-dgels/base/BLAS/SRC/dsbmv.c create mode 100644 min-dgels/base/BLAS/SRC/dscal.c create mode 100644 min-dgels/base/BLAS/SRC/dsdot.c create mode 100644 min-dgels/base/BLAS/SRC/dspmv.c create mode 100644 min-dgels/base/BLAS/SRC/dspr.c create mode 100644 min-dgels/base/BLAS/SRC/dspr2.c create mode 100644 min-dgels/base/BLAS/SRC/dswap.c create mode 100644 min-dgels/base/BLAS/SRC/dsymm.c create mode 100644 min-dgels/base/BLAS/SRC/dsymv.c create mode 100644 min-dgels/base/BLAS/SRC/dsyr.c create mode 100644 min-dgels/base/BLAS/SRC/dsyr2.c create mode 100644 min-dgels/base/BLAS/SRC/dsyr2k.c create mode 100644 min-dgels/base/BLAS/SRC/dsyrk.c create mode 100644 min-dgels/base/BLAS/SRC/dtbmv.c create mode 100644 min-dgels/base/BLAS/SRC/dtbsv.c create mode 100644 min-dgels/base/BLAS/SRC/dtpmv.c create mode 100644 min-dgels/base/BLAS/SRC/dtpsv.c create mode 100644 min-dgels/base/BLAS/SRC/dtrmm.c create mode 100644 min-dgels/base/BLAS/SRC/dtrmv.c create mode 100644 min-dgels/base/BLAS/SRC/dtrsm.c create mode 100644 min-dgels/base/BLAS/SRC/dtrsv.c create mode 100644 min-dgels/base/BLAS/SRC/dzasum.c create mode 100644 min-dgels/base/BLAS/SRC/dznrm2.c create mode 100644 min-dgels/base/BLAS/SRC/idamax.c create mode 100644 min-dgels/base/BLAS/SRC/izamax.c create mode 100644 min-dgels/base/BLAS/SRC/lsame.c create mode 100644 min-dgels/base/BLAS/SRC/xerbla.c create mode 100644 min-dgels/base/BLAS/SRC/xerbla_array.c create mode 100644 min-dgels/base/BLAS/WRAP/Makefile create mode 100644 min-dgels/base/BLAS/WRAP/README create mode 100644 min-dgels/base/BLAS/WRAP/cblas.h create mode 100644 min-dgels/base/BLAS/WRAP/cblaswr.c create mode 100644 min-dgels/base/BLAS/WRAP/fblaswr.c create mode 100644 min-dgels/base/BLAS/WRAP/fblaswr.h create mode 100644 min-dgels/base/BLAS/dblat2.in create mode 100644 min-dgels/base/BLAS/dblat3.in create mode 100644 min-dgels/base/COPYING create mode 100644 min-dgels/base/F2CLIBS/libf2c/Makefile create mode 100644 min-dgels/base/F2CLIBS/libf2c/Notice create mode 100644 min-dgels/base/F2CLIBS/libf2c/README create mode 100644 min-dgels/base/F2CLIBS/libf2c/abort_.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/arithchk.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/backspac.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/c_abs.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/c_cos.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/c_div.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/c_exp.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/c_log.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/c_sin.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/c_sqrt.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/cabs.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/close.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/comptry.bat create mode 100644 min-dgels/base/F2CLIBS/libf2c/ctype.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/ctype.h create mode 100644 min-dgels/base/F2CLIBS/libf2c/d_abs.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/d_acos.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/d_asin.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/d_atan.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/d_atn2.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/d_cnjg.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/d_cos.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/d_cosh.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/d_dim.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/d_exp.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/d_imag.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/d_int.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/d_lg10.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/d_log.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/d_mod.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/d_nint.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/d_prod.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/d_sign.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/d_sin.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/d_sinh.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/d_sqrt.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/d_tan.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/d_tanh.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/derf_.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/derfc_.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/dfe.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/dolio.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/dtime_.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/due.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/ef1asc_.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/ef1cmc_.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/endfile.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/erf_.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/erfc_.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/err.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/etime_.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/exit_.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/f2c.h create mode 100644 min-dgels/base/F2CLIBS/libf2c/f2c.h0 create mode 100644 min-dgels/base/F2CLIBS/libf2c/f2ch.add create mode 100644 min-dgels/base/F2CLIBS/libf2c/f77_aloc.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/f77vers.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/fio.h create mode 100644 min-dgels/base/F2CLIBS/libf2c/fmt.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/fmt.h create mode 100644 min-dgels/base/F2CLIBS/libf2c/fmtlib.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/fp.h create mode 100644 min-dgels/base/F2CLIBS/libf2c/ftell64_.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/ftell_.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/getarg_.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/getenv_.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/h_abs.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/h_dim.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/h_dnnt.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/h_indx.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/h_len.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/h_mod.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/h_nint.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/h_sign.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/hl_ge.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/hl_gt.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/hl_le.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/hl_lt.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/i77vers.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/i_abs.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/i_ceiling.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/i_dim.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/i_dnnt.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/i_indx.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/i_len.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/i_len_trim.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/i_mod.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/i_nint.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/i_sign.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/iargc_.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/iio.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/ilnw.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/inquire.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/l_ge.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/l_gt.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/l_le.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/l_lt.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/lbitbits.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/lbitshft.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/libf2c.lbc create mode 100644 min-dgels/base/F2CLIBS/libf2c/libf2c.sy create mode 100644 min-dgels/base/F2CLIBS/libf2c/lio.h create mode 100644 min-dgels/base/F2CLIBS/libf2c/lread.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/lwrite.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/main.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/math.hvc create mode 100644 min-dgels/base/F2CLIBS/libf2c/mkfile.plan9 create mode 100644 min-dgels/base/F2CLIBS/libf2c/open.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/pow_ci.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/pow_dd.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/pow_di.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/pow_hh.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/pow_ii.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/pow_qq.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/pow_ri.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/pow_zi.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/pow_zz.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/qbitbits.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/qbitshft.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/r_abs.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/r_acos.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/r_asin.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/r_atan.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/r_atn2.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/r_cnjg.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/r_cos.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/r_cosh.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/r_dim.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/r_exp.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/r_imag.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/r_int.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/r_lg10.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/r_log.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/r_mod.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/r_nint.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/r_sign.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/r_sin.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/r_sinh.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/r_sqrt.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/r_tan.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/r_tanh.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/rawio.h create mode 100644 min-dgels/base/F2CLIBS/libf2c/rdfmt.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/rewind.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/rsfe.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/rsli.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/rsne.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/s_cat.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/s_cmp.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/s_copy.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/s_paus.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/s_rnge.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/s_stop.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/scomptry.bat create mode 100644 min-dgels/base/F2CLIBS/libf2c/sfe.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/sig_die.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/signal1.h0 create mode 100644 min-dgels/base/F2CLIBS/libf2c/signal_.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/signbit.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/sue.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/sysdep1.h0 create mode 100644 min-dgels/base/F2CLIBS/libf2c/system_.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/typesize.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/uio.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/uninit.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/util.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/wref.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/wrtfmt.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/wsfe.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/wsle.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/wsne.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/xwsne.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/z_abs.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/z_cos.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/z_div.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/z_exp.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/z_log.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/z_sin.c create mode 100644 min-dgels/base/F2CLIBS/libf2c/z_sqrt.c create mode 100644 min-dgels/base/INCLUDE/blaswrap.h create mode 100644 min-dgels/base/INCLUDE/clapack.h create mode 100644 min-dgels/base/INCLUDE/f2c.h create mode 100644 min-dgels/base/Makefile create mode 100644 min-dgels/base/README.install create mode 100644 min-dgels/base/SRC/Makefile create mode 100644 min-dgels/base/SRC/VARIANTS/Makefile create mode 100644 min-dgels/base/SRC/VARIANTS/README create mode 100644 min-dgels/base/SRC/VARIANTS/cholesky/RL/dpotrf.c create mode 100644 min-dgels/base/SRC/VARIANTS/cholesky/TOP/dpotrf.c create mode 100644 min-dgels/base/SRC/VARIANTS/lu/CR/dgetrf.c create mode 100644 min-dgels/base/SRC/VARIANTS/lu/LL/dgetrf.c create mode 100644 min-dgels/base/SRC/VARIANTS/lu/REC/dgetrf.c create mode 100644 min-dgels/base/SRC/VARIANTS/qr/LL/dgeqrf.c create mode 100644 min-dgels/base/SRC/VARIANTS/qr/LL/sceil.c create mode 100644 min-dgels/base/SRC/chla_transtype.c create mode 100644 min-dgels/base/SRC/dbdsdc.c create mode 100644 min-dgels/base/SRC/dbdsqr.c create mode 100644 min-dgels/base/SRC/ddisna.c create mode 100644 min-dgels/base/SRC/dgbbrd.c create mode 100644 min-dgels/base/SRC/dgbcon.c create mode 100644 min-dgels/base/SRC/dgbequ.c create mode 100644 min-dgels/base/SRC/dgbequb.c create mode 100644 min-dgels/base/SRC/dgbrfs.c create mode 100644 min-dgels/base/SRC/dgbrfsx.c create mode 100644 min-dgels/base/SRC/dgbsv.c create mode 100644 min-dgels/base/SRC/dgbsvx.c create mode 100644 min-dgels/base/SRC/dgbsvxx.c create mode 100644 min-dgels/base/SRC/dgbtf2.c create mode 100644 min-dgels/base/SRC/dgbtrf.c create mode 100644 min-dgels/base/SRC/dgbtrs.c create mode 100644 min-dgels/base/SRC/dgebak.c create mode 100644 min-dgels/base/SRC/dgebal.c create mode 100644 min-dgels/base/SRC/dgebd2.c create mode 100644 min-dgels/base/SRC/dgebrd.c create mode 100644 min-dgels/base/SRC/dgecon.c create mode 100644 min-dgels/base/SRC/dgeequ.c create mode 100644 min-dgels/base/SRC/dgeequb.c create mode 100644 min-dgels/base/SRC/dgees.c create mode 100644 min-dgels/base/SRC/dgeesx.c create mode 100644 min-dgels/base/SRC/dgeev.c create mode 100644 min-dgels/base/SRC/dgeevx.c create mode 100644 min-dgels/base/SRC/dgegs.c create mode 100644 min-dgels/base/SRC/dgegv.c create mode 100644 min-dgels/base/SRC/dgehd2.c create mode 100644 min-dgels/base/SRC/dgehrd.c create mode 100644 min-dgels/base/SRC/dgejsv.c create mode 100644 min-dgels/base/SRC/dgelq2.c create mode 100644 min-dgels/base/SRC/dgelqf.c create mode 100644 min-dgels/base/SRC/dgels.c create mode 100644 min-dgels/base/SRC/dgelsd.c create mode 100644 min-dgels/base/SRC/dgelss.c create mode 100644 min-dgels/base/SRC/dgelsx.c create mode 100644 min-dgels/base/SRC/dgelsy.c create mode 100644 min-dgels/base/SRC/dgeql2.c create mode 100644 min-dgels/base/SRC/dgeqlf.c create mode 100644 min-dgels/base/SRC/dgeqp3.c create mode 100644 min-dgels/base/SRC/dgeqpf.c create mode 100644 min-dgels/base/SRC/dgeqr2.c create mode 100644 min-dgels/base/SRC/dgeqrf.c create mode 100644 min-dgels/base/SRC/dgerfs.c create mode 100644 min-dgels/base/SRC/dgerfsx.c create mode 100644 min-dgels/base/SRC/dgerq2.c create mode 100644 min-dgels/base/SRC/dgerqf.c create mode 100644 min-dgels/base/SRC/dgesc2.c create mode 100644 min-dgels/base/SRC/dgesdd.c create mode 100644 min-dgels/base/SRC/dgesv.c create mode 100644 min-dgels/base/SRC/dgesvd.c create mode 100644 min-dgels/base/SRC/dgesvj.c create mode 100644 min-dgels/base/SRC/dgesvx.c create mode 100644 min-dgels/base/SRC/dgesvxx.c create mode 100644 min-dgels/base/SRC/dgetc2.c create mode 100644 min-dgels/base/SRC/dgetf2.c create mode 100644 min-dgels/base/SRC/dgetrf.c create mode 100644 min-dgels/base/SRC/dgetri.c create mode 100644 min-dgels/base/SRC/dgetrs.c create mode 100644 min-dgels/base/SRC/dggbak.c create mode 100644 min-dgels/base/SRC/dggbal.c create mode 100644 min-dgels/base/SRC/dgges.c create mode 100644 min-dgels/base/SRC/dggesx.c create mode 100644 min-dgels/base/SRC/dggev.c create mode 100644 min-dgels/base/SRC/dggevx.c create mode 100644 min-dgels/base/SRC/dggglm.c create mode 100644 min-dgels/base/SRC/dgghrd.c create mode 100644 min-dgels/base/SRC/dgglse.c create mode 100644 min-dgels/base/SRC/dggqrf.c create mode 100644 min-dgels/base/SRC/dggrqf.c create mode 100644 min-dgels/base/SRC/dggsvd.c create mode 100644 min-dgels/base/SRC/dggsvp.c create mode 100644 min-dgels/base/SRC/dgsvj0.c create mode 100644 min-dgels/base/SRC/dgsvj1.c create mode 100644 min-dgels/base/SRC/dgtcon.c create mode 100644 min-dgels/base/SRC/dgtrfs.c create mode 100644 min-dgels/base/SRC/dgtsv.c create mode 100644 min-dgels/base/SRC/dgtsvx.c create mode 100644 min-dgels/base/SRC/dgttrf.c create mode 100644 min-dgels/base/SRC/dgttrs.c create mode 100644 min-dgels/base/SRC/dgtts2.c create mode 100644 min-dgels/base/SRC/dhgeqz.c create mode 100644 min-dgels/base/SRC/dhsein.c create mode 100644 min-dgels/base/SRC/dhseqr.c create mode 100644 min-dgels/base/SRC/disnan.c create mode 100644 min-dgels/base/SRC/dla_gbamv.c create mode 100644 min-dgels/base/SRC/dla_gbrcond.c create mode 100644 min-dgels/base/SRC/dla_gbrfsx_extended.c create mode 100644 min-dgels/base/SRC/dla_gbrpvgrw.c create mode 100644 min-dgels/base/SRC/dla_geamv.c create mode 100644 min-dgels/base/SRC/dla_gercond.c create mode 100644 min-dgels/base/SRC/dla_gerfsx_extended.c create mode 100644 min-dgels/base/SRC/dla_lin_berr.c create mode 100644 min-dgels/base/SRC/dla_porcond.c create mode 100644 min-dgels/base/SRC/dla_porfsx_extended.c create mode 100644 min-dgels/base/SRC/dla_porpvgrw.c create mode 100644 min-dgels/base/SRC/dla_rpvgrw.c create mode 100644 min-dgels/base/SRC/dla_syamv.c create mode 100644 min-dgels/base/SRC/dla_syrcond.c create mode 100644 min-dgels/base/SRC/dla_syrfsx_extended.c create mode 100644 min-dgels/base/SRC/dla_syrpvgrw.c create mode 100644 min-dgels/base/SRC/dla_wwaddw.c create mode 100644 min-dgels/base/SRC/dlabad.c create mode 100644 min-dgels/base/SRC/dlabrd.c create mode 100644 min-dgels/base/SRC/dlacn2.c create mode 100644 min-dgels/base/SRC/dlacon.c create mode 100644 min-dgels/base/SRC/dlacpy.c create mode 100644 min-dgels/base/SRC/dladiv.c create mode 100644 min-dgels/base/SRC/dlae2.c create mode 100644 min-dgels/base/SRC/dlaebz.c create mode 100644 min-dgels/base/SRC/dlaed0.c create mode 100644 min-dgels/base/SRC/dlaed1.c create mode 100644 min-dgels/base/SRC/dlaed2.c create mode 100644 min-dgels/base/SRC/dlaed3.c create mode 100644 min-dgels/base/SRC/dlaed4.c create mode 100644 min-dgels/base/SRC/dlaed5.c create mode 100644 min-dgels/base/SRC/dlaed6.c create mode 100644 min-dgels/base/SRC/dlaed7.c create mode 100644 min-dgels/base/SRC/dlaed8.c create mode 100644 min-dgels/base/SRC/dlaed9.c create mode 100644 min-dgels/base/SRC/dlaeda.c create mode 100644 min-dgels/base/SRC/dlaein.c create mode 100644 min-dgels/base/SRC/dlaev2.c create mode 100644 min-dgels/base/SRC/dlaexc.c create mode 100644 min-dgels/base/SRC/dlag2.c create mode 100644 min-dgels/base/SRC/dlag2s.c create mode 100644 min-dgels/base/SRC/dlags2.c create mode 100644 min-dgels/base/SRC/dlagtf.c create mode 100644 min-dgels/base/SRC/dlagtm.c create mode 100644 min-dgels/base/SRC/dlagts.c create mode 100644 min-dgels/base/SRC/dlagv2.c create mode 100644 min-dgels/base/SRC/dlahqr.c create mode 100644 min-dgels/base/SRC/dlahr2.c create mode 100644 min-dgels/base/SRC/dlahrd.c create mode 100644 min-dgels/base/SRC/dlaic1.c create mode 100644 min-dgels/base/SRC/dlaisnan.c create mode 100644 min-dgels/base/SRC/dlaln2.c create mode 100644 min-dgels/base/SRC/dlals0.c create mode 100644 min-dgels/base/SRC/dlalsa.c create mode 100644 min-dgels/base/SRC/dlalsd.c create mode 100644 min-dgels/base/SRC/dlamrg.c create mode 100644 min-dgels/base/SRC/dlaneg.c create mode 100644 min-dgels/base/SRC/dlangb.c create mode 100644 min-dgels/base/SRC/dlange.c create mode 100644 min-dgels/base/SRC/dlangt.c create mode 100644 min-dgels/base/SRC/dlanhs.c create mode 100644 min-dgels/base/SRC/dlansb.c create mode 100644 min-dgels/base/SRC/dlansf.c create mode 100644 min-dgels/base/SRC/dlansp.c create mode 100644 min-dgels/base/SRC/dlanst.c create mode 100644 min-dgels/base/SRC/dlansy.c create mode 100644 min-dgels/base/SRC/dlantb.c create mode 100644 min-dgels/base/SRC/dlantp.c create mode 100644 min-dgels/base/SRC/dlantr.c create mode 100644 min-dgels/base/SRC/dlanv2.c create mode 100644 min-dgels/base/SRC/dlapll.c create mode 100644 min-dgels/base/SRC/dlapmt.c create mode 100644 min-dgels/base/SRC/dlapy2.c create mode 100644 min-dgels/base/SRC/dlapy3.c create mode 100644 min-dgels/base/SRC/dlaqgb.c create mode 100644 min-dgels/base/SRC/dlaqge.c create mode 100644 min-dgels/base/SRC/dlaqp2.c create mode 100644 min-dgels/base/SRC/dlaqps.c create mode 100644 min-dgels/base/SRC/dlaqr0.c create mode 100644 min-dgels/base/SRC/dlaqr1.c create mode 100644 min-dgels/base/SRC/dlaqr2.c create mode 100644 min-dgels/base/SRC/dlaqr3.c create mode 100644 min-dgels/base/SRC/dlaqr4.c create mode 100644 min-dgels/base/SRC/dlaqr5.c create mode 100644 min-dgels/base/SRC/dlaqsb.c create mode 100644 min-dgels/base/SRC/dlaqsp.c create mode 100644 min-dgels/base/SRC/dlaqsy.c create mode 100644 min-dgels/base/SRC/dlaqtr.c create mode 100644 min-dgels/base/SRC/dlar1v.c create mode 100644 min-dgels/base/SRC/dlar2v.c create mode 100644 min-dgels/base/SRC/dlarf.c create mode 100644 min-dgels/base/SRC/dlarfb.c create mode 100644 min-dgels/base/SRC/dlarfg.c create mode 100644 min-dgels/base/SRC/dlarfp.c create mode 100644 min-dgels/base/SRC/dlarft.c create mode 100644 min-dgels/base/SRC/dlarfx.c create mode 100644 min-dgels/base/SRC/dlargv.c create mode 100644 min-dgels/base/SRC/dlarnv.c create mode 100644 min-dgels/base/SRC/dlarra.c create mode 100644 min-dgels/base/SRC/dlarrb.c create mode 100644 min-dgels/base/SRC/dlarrc.c create mode 100644 min-dgels/base/SRC/dlarrd.c create mode 100644 min-dgels/base/SRC/dlarre.c create mode 100644 min-dgels/base/SRC/dlarrf.c create mode 100644 min-dgels/base/SRC/dlarrj.c create mode 100644 min-dgels/base/SRC/dlarrk.c create mode 100644 min-dgels/base/SRC/dlarrr.c create mode 100644 min-dgels/base/SRC/dlarrv.c create mode 100644 min-dgels/base/SRC/dlarscl2.c create mode 100644 min-dgels/base/SRC/dlartg.c create mode 100644 min-dgels/base/SRC/dlartv.c create mode 100644 min-dgels/base/SRC/dlaruv.c create mode 100644 min-dgels/base/SRC/dlarz.c create mode 100644 min-dgels/base/SRC/dlarzb.c create mode 100644 min-dgels/base/SRC/dlarzt.c create mode 100644 min-dgels/base/SRC/dlas2.c create mode 100644 min-dgels/base/SRC/dlascl.c create mode 100644 min-dgels/base/SRC/dlascl2.c create mode 100644 min-dgels/base/SRC/dlasd0.c create mode 100644 min-dgels/base/SRC/dlasd1.c create mode 100644 min-dgels/base/SRC/dlasd2.c create mode 100644 min-dgels/base/SRC/dlasd3.c create mode 100644 min-dgels/base/SRC/dlasd4.c create mode 100644 min-dgels/base/SRC/dlasd5.c create mode 100644 min-dgels/base/SRC/dlasd6.c create mode 100644 min-dgels/base/SRC/dlasd7.c create mode 100644 min-dgels/base/SRC/dlasd8.c create mode 100644 min-dgels/base/SRC/dlasda.c create mode 100644 min-dgels/base/SRC/dlasdq.c create mode 100644 min-dgels/base/SRC/dlasdt.c create mode 100644 min-dgels/base/SRC/dlaset.c create mode 100644 min-dgels/base/SRC/dlasq1.c create mode 100644 min-dgels/base/SRC/dlasq2.c create mode 100644 min-dgels/base/SRC/dlasq3.c create mode 100644 min-dgels/base/SRC/dlasq4.c create mode 100644 min-dgels/base/SRC/dlasq5.c create mode 100644 min-dgels/base/SRC/dlasq6.c create mode 100644 min-dgels/base/SRC/dlasr.c create mode 100644 min-dgels/base/SRC/dlasrt.c create mode 100644 min-dgels/base/SRC/dlassq.c create mode 100644 min-dgels/base/SRC/dlasv2.c create mode 100644 min-dgels/base/SRC/dlaswp.c create mode 100644 min-dgels/base/SRC/dlasy2.c create mode 100644 min-dgels/base/SRC/dlasyf.c create mode 100644 min-dgels/base/SRC/dlat2s.c create mode 100644 min-dgels/base/SRC/dlatbs.c create mode 100644 min-dgels/base/SRC/dlatdf.c create mode 100644 min-dgels/base/SRC/dlatps.c create mode 100644 min-dgels/base/SRC/dlatrd.c create mode 100644 min-dgels/base/SRC/dlatrs.c create mode 100644 min-dgels/base/SRC/dlatrz.c create mode 100644 min-dgels/base/SRC/dlatzm.c create mode 100644 min-dgels/base/SRC/dlauu2.c create mode 100644 min-dgels/base/SRC/dlauum.c create mode 100644 min-dgels/base/SRC/dopgtr.c create mode 100644 min-dgels/base/SRC/dopmtr.c create mode 100644 min-dgels/base/SRC/dorg2l.c create mode 100644 min-dgels/base/SRC/dorg2r.c create mode 100644 min-dgels/base/SRC/dorgbr.c create mode 100644 min-dgels/base/SRC/dorghr.c create mode 100644 min-dgels/base/SRC/dorgl2.c create mode 100644 min-dgels/base/SRC/dorglq.c create mode 100644 min-dgels/base/SRC/dorgql.c create mode 100644 min-dgels/base/SRC/dorgqr.c create mode 100644 min-dgels/base/SRC/dorgr2.c create mode 100644 min-dgels/base/SRC/dorgrq.c create mode 100644 min-dgels/base/SRC/dorgtr.c create mode 100644 min-dgels/base/SRC/dorm2l.c create mode 100644 min-dgels/base/SRC/dorm2r.c create mode 100644 min-dgels/base/SRC/dormbr.c create mode 100644 min-dgels/base/SRC/dormhr.c create mode 100644 min-dgels/base/SRC/dorml2.c create mode 100644 min-dgels/base/SRC/dormlq.c create mode 100644 min-dgels/base/SRC/dormql.c create mode 100644 min-dgels/base/SRC/dormqr.c create mode 100644 min-dgels/base/SRC/dormr2.c create mode 100644 min-dgels/base/SRC/dormr3.c create mode 100644 min-dgels/base/SRC/dormrq.c create mode 100644 min-dgels/base/SRC/dormrz.c create mode 100644 min-dgels/base/SRC/dormtr.c create mode 100644 min-dgels/base/SRC/dpbcon.c create mode 100644 min-dgels/base/SRC/dpbequ.c create mode 100644 min-dgels/base/SRC/dpbrfs.c create mode 100644 min-dgels/base/SRC/dpbstf.c create mode 100644 min-dgels/base/SRC/dpbsv.c create mode 100644 min-dgels/base/SRC/dpbsvx.c create mode 100644 min-dgels/base/SRC/dpbtf2.c create mode 100644 min-dgels/base/SRC/dpbtrf.c create mode 100644 min-dgels/base/SRC/dpbtrs.c create mode 100644 min-dgels/base/SRC/dpftrf.c create mode 100644 min-dgels/base/SRC/dpftri.c create mode 100644 min-dgels/base/SRC/dpftrs.c create mode 100644 min-dgels/base/SRC/dpocon.c create mode 100644 min-dgels/base/SRC/dpoequ.c create mode 100644 min-dgels/base/SRC/dpoequb.c create mode 100644 min-dgels/base/SRC/dporfs.c create mode 100644 min-dgels/base/SRC/dporfsx.c create mode 100644 min-dgels/base/SRC/dposv.c create mode 100644 min-dgels/base/SRC/dposvx.c create mode 100644 min-dgels/base/SRC/dposvxx.c create mode 100644 min-dgels/base/SRC/dpotf2.c create mode 100644 min-dgels/base/SRC/dpotrf.c create mode 100644 min-dgels/base/SRC/dpotri.c create mode 100644 min-dgels/base/SRC/dpotrs.c create mode 100644 min-dgels/base/SRC/dppcon.c create mode 100644 min-dgels/base/SRC/dppequ.c create mode 100644 min-dgels/base/SRC/dpprfs.c create mode 100644 min-dgels/base/SRC/dppsv.c create mode 100644 min-dgels/base/SRC/dppsvx.c create mode 100644 min-dgels/base/SRC/dpptrf.c create mode 100644 min-dgels/base/SRC/dpptri.c create mode 100644 min-dgels/base/SRC/dpptrs.c create mode 100644 min-dgels/base/SRC/dpstf2.c create mode 100644 min-dgels/base/SRC/dpstrf.c create mode 100644 min-dgels/base/SRC/dptcon.c create mode 100644 min-dgels/base/SRC/dpteqr.c create mode 100644 min-dgels/base/SRC/dptrfs.c create mode 100644 min-dgels/base/SRC/dptsv.c create mode 100644 min-dgels/base/SRC/dptsvx.c create mode 100644 min-dgels/base/SRC/dpttrf.c create mode 100644 min-dgels/base/SRC/dpttrs.c create mode 100644 min-dgels/base/SRC/dptts2.c create mode 100644 min-dgels/base/SRC/drscl.c create mode 100644 min-dgels/base/SRC/dsbev.c create mode 100644 min-dgels/base/SRC/dsbevd.c create mode 100644 min-dgels/base/SRC/dsbevx.c create mode 100644 min-dgels/base/SRC/dsbgst.c create mode 100644 min-dgels/base/SRC/dsbgv.c create mode 100644 min-dgels/base/SRC/dsbgvd.c create mode 100644 min-dgels/base/SRC/dsbgvx.c create mode 100644 min-dgels/base/SRC/dsbtrd.c create mode 100644 min-dgels/base/SRC/dsfrk.c create mode 100644 min-dgels/base/SRC/dsgesv.c create mode 100644 min-dgels/base/SRC/dspcon.c create mode 100644 min-dgels/base/SRC/dspev.c create mode 100644 min-dgels/base/SRC/dspevd.c create mode 100644 min-dgels/base/SRC/dspevx.c create mode 100644 min-dgels/base/SRC/dspgst.c create mode 100644 min-dgels/base/SRC/dspgv.c create mode 100644 min-dgels/base/SRC/dspgvd.c create mode 100644 min-dgels/base/SRC/dspgvx.c create mode 100644 min-dgels/base/SRC/dsposv.c create mode 100644 min-dgels/base/SRC/dsprfs.c create mode 100644 min-dgels/base/SRC/dspsv.c create mode 100644 min-dgels/base/SRC/dspsvx.c create mode 100644 min-dgels/base/SRC/dsptrd.c create mode 100644 min-dgels/base/SRC/dsptrf.c create mode 100644 min-dgels/base/SRC/dsptri.c create mode 100644 min-dgels/base/SRC/dsptrs.c create mode 100644 min-dgels/base/SRC/dstebz.c create mode 100644 min-dgels/base/SRC/dstedc.c create mode 100644 min-dgels/base/SRC/dstegr.c create mode 100644 min-dgels/base/SRC/dstein.c create mode 100644 min-dgels/base/SRC/dstemr.c create mode 100644 min-dgels/base/SRC/dsteqr.c create mode 100644 min-dgels/base/SRC/dsterf.c create mode 100644 min-dgels/base/SRC/dstev.c create mode 100644 min-dgels/base/SRC/dstevd.c create mode 100644 min-dgels/base/SRC/dstevr.c create mode 100644 min-dgels/base/SRC/dstevx.c create mode 100644 min-dgels/base/SRC/dsycon.c create mode 100644 min-dgels/base/SRC/dsyequb.c create mode 100644 min-dgels/base/SRC/dsyev.c create mode 100644 min-dgels/base/SRC/dsyevd.c create mode 100644 min-dgels/base/SRC/dsyevr.c create mode 100644 min-dgels/base/SRC/dsyevx.c create mode 100644 min-dgels/base/SRC/dsygs2.c create mode 100644 min-dgels/base/SRC/dsygst.c create mode 100644 min-dgels/base/SRC/dsygv.c create mode 100644 min-dgels/base/SRC/dsygvd.c create mode 100644 min-dgels/base/SRC/dsygvx.c create mode 100644 min-dgels/base/SRC/dsyrfs.c create mode 100644 min-dgels/base/SRC/dsyrfsx.c create mode 100644 min-dgels/base/SRC/dsysv.c create mode 100644 min-dgels/base/SRC/dsysvx.c create mode 100644 min-dgels/base/SRC/dsysvxx.c create mode 100644 min-dgels/base/SRC/dsytd2.c create mode 100644 min-dgels/base/SRC/dsytf2.c create mode 100644 min-dgels/base/SRC/dsytrd.c create mode 100644 min-dgels/base/SRC/dsytrf.c create mode 100644 min-dgels/base/SRC/dsytri.c create mode 100644 min-dgels/base/SRC/dsytrs.c create mode 100644 min-dgels/base/SRC/dtbcon.c create mode 100644 min-dgels/base/SRC/dtbrfs.c create mode 100644 min-dgels/base/SRC/dtbtrs.c create mode 100644 min-dgels/base/SRC/dtfsm.c create mode 100644 min-dgels/base/SRC/dtftri.c create mode 100644 min-dgels/base/SRC/dtfttp.c create mode 100644 min-dgels/base/SRC/dtfttr.c create mode 100644 min-dgels/base/SRC/dtgevc.c create mode 100644 min-dgels/base/SRC/dtgex2.c create mode 100644 min-dgels/base/SRC/dtgexc.c create mode 100644 min-dgels/base/SRC/dtgsen.c create mode 100644 min-dgels/base/SRC/dtgsja.c create mode 100644 min-dgels/base/SRC/dtgsna.c create mode 100644 min-dgels/base/SRC/dtgsy2.c create mode 100644 min-dgels/base/SRC/dtgsyl.c create mode 100644 min-dgels/base/SRC/dtpcon.c create mode 100644 min-dgels/base/SRC/dtprfs.c create mode 100644 min-dgels/base/SRC/dtptri.c create mode 100644 min-dgels/base/SRC/dtptrs.c create mode 100644 min-dgels/base/SRC/dtpttf.c create mode 100644 min-dgels/base/SRC/dtpttr.c create mode 100644 min-dgels/base/SRC/dtrcon.c create mode 100644 min-dgels/base/SRC/dtrevc.c create mode 100644 min-dgels/base/SRC/dtrexc.c create mode 100644 min-dgels/base/SRC/dtrrfs.c create mode 100644 min-dgels/base/SRC/dtrsen.c create mode 100644 min-dgels/base/SRC/dtrsna.c create mode 100644 min-dgels/base/SRC/dtrsyl.c create mode 100644 min-dgels/base/SRC/dtrti2.c create mode 100644 min-dgels/base/SRC/dtrtri.c create mode 100644 min-dgels/base/SRC/dtrtrs.c create mode 100644 min-dgels/base/SRC/dtrttf.c create mode 100644 min-dgels/base/SRC/dtrttp.c create mode 100644 min-dgels/base/SRC/dtzrqf.c create mode 100644 min-dgels/base/SRC/dtzrzf.c create mode 100644 min-dgels/base/SRC/dzsum1.c create mode 100644 min-dgels/base/SRC/icmax1.c create mode 100644 min-dgels/base/SRC/ieeeck.c create mode 100644 min-dgels/base/SRC/ilaclc.c create mode 100644 min-dgels/base/SRC/ilaclr.c create mode 100644 min-dgels/base/SRC/iladiag.c create mode 100644 min-dgels/base/SRC/iladlc.c create mode 100644 min-dgels/base/SRC/iladlr.c create mode 100644 min-dgels/base/SRC/ilaenv.c create mode 100644 min-dgels/base/SRC/ilaprec.c create mode 100644 min-dgels/base/SRC/ilaslc.c create mode 100644 min-dgels/base/SRC/ilaslr.c create mode 100644 min-dgels/base/SRC/ilatrans.c create mode 100644 min-dgels/base/SRC/ilauplo.c create mode 100644 min-dgels/base/SRC/ilaver.c create mode 100644 min-dgels/base/SRC/ilazlc.c create mode 100644 min-dgels/base/SRC/ilazlr.c create mode 100644 min-dgels/base/SRC/iparmq.c create mode 100644 min-dgels/base/SRC/izmax1.c create mode 100644 min-dgels/base/SRC/lsamen.c create mode 100644 min-dgels/base/SRC/maxloc.c create mode 100644 min-dgels/base/SRC/xerbla.c create mode 100644 min-dgels/base/SRC/xerbla_array.c create mode 100644 min-dgels/base/make.inc create mode 100644 mpi/GNUmakefile.in create mode 100644 mpi/Makefile.am create mode 100644 mpi/Makefile.in create mode 100755 mpi/dev/starpu_mpi_comm_check.sh create mode 100644 mpi/examples/Makefile.am create mode 100644 mpi/examples/Makefile.in create mode 100644 mpi/examples/benchs/abstract_sendrecv_bench.c create mode 100644 mpi/examples/benchs/abstract_sendrecv_bench.h create mode 100644 mpi/examples/benchs/bcast_bench.c create mode 100644 mpi/examples/benchs/bench_helper.c create mode 100644 mpi/examples/benchs/bench_helper.h create mode 100644 mpi/examples/benchs/burst.c create mode 100644 mpi/examples/benchs/burst_gemm.c create mode 100644 mpi/examples/benchs/burst_helper.c create mode 100644 mpi/examples/benchs/burst_helper.h create mode 100644 mpi/examples/benchs/gemm_helper.c create mode 100644 mpi/examples/benchs/gemm_helper.h create mode 100644 mpi/examples/benchs/recv_wait_finalize_bench.c create mode 100644 mpi/examples/benchs/sendrecv_bench.c create mode 100644 mpi/examples/benchs/sendrecv_gemm_bench.c create mode 100644 mpi/examples/benchs/sendrecv_parallel_tasks_bench.c create mode 100644 mpi/examples/cache/cache.c create mode 100644 mpi/examples/cache/cache_disable.c create mode 100644 mpi/examples/cg/cg.c create mode 100644 mpi/examples/comm/comm.c create mode 100644 mpi/examples/comm/group.c create mode 100644 mpi/examples/comm/mix_comm.c create mode 100644 mpi/examples/complex/mpi_complex.c create mode 100644 mpi/examples/filters/filter.c create mode 100644 mpi/examples/helper.h create mode 100644 mpi/examples/loader.c create mode 100644 mpi/examples/matrix_decomposition/mpi_cholesky.c create mode 100644 mpi/examples/matrix_decomposition/mpi_cholesky.h create mode 100644 mpi/examples/matrix_decomposition/mpi_cholesky_codelets.c create mode 100644 mpi/examples/matrix_decomposition/mpi_cholesky_codelets.h create mode 100644 mpi/examples/matrix_decomposition/mpi_cholesky_distributed.c create mode 100644 mpi/examples/matrix_decomposition/mpi_cholesky_kernels.c create mode 100644 mpi/examples/matrix_decomposition/mpi_cholesky_kernels.h create mode 100644 mpi/examples/matrix_decomposition/mpi_cholesky_models.c create mode 100644 mpi/examples/matrix_decomposition/mpi_cholesky_models.h create mode 100644 mpi/examples/matrix_decomposition/mpi_decomposition_matrix.c create mode 100644 mpi/examples/matrix_decomposition/mpi_decomposition_matrix.h create mode 100644 mpi/examples/matrix_decomposition/mpi_decomposition_params.c create mode 100644 mpi/examples/matrix_decomposition/mpi_decomposition_params.h create mode 100644 mpi/examples/matrix_mult/mm.c create mode 100644 mpi/examples/matrix_mult/mm_2dbc.c create mode 100644 mpi/examples/mpi_lu/mpi_lu-double.h create mode 100644 mpi/examples/mpi_lu/mpi_lu-float.h create mode 100644 mpi/examples/mpi_lu/pdlu.c create mode 100644 mpi/examples/mpi_lu/pdlu_implicit.c create mode 100644 mpi/examples/mpi_lu/pdlu_kernels.c create mode 100644 mpi/examples/mpi_lu/plu_example.c create mode 100644 mpi/examples/mpi_lu/plu_example_double.c create mode 100644 mpi/examples/mpi_lu/plu_example_float.c create mode 100644 mpi/examples/mpi_lu/plu_implicit_example.c create mode 100644 mpi/examples/mpi_lu/plu_implicit_example_double.c create mode 100644 mpi/examples/mpi_lu/plu_implicit_example_float.c create mode 100644 mpi/examples/mpi_lu/plu_outofcore_example.c create mode 100644 mpi/examples/mpi_lu/plu_outofcore_example_double.c create mode 100644 mpi/examples/mpi_lu/plu_outofcore_example_float.c create mode 100644 mpi/examples/mpi_lu/plu_solve.c create mode 100644 mpi/examples/mpi_lu/plu_solve_double.c create mode 100644 mpi/examples/mpi_lu/plu_solve_float.c create mode 100644 mpi/examples/mpi_lu/pslu.c create mode 100644 mpi/examples/mpi_lu/pslu_implicit.c create mode 100644 mpi/examples/mpi_lu/pslu_kernels.c create mode 100644 mpi/examples/mpi_lu/pxlu.c create mode 100644 mpi/examples/mpi_lu/pxlu.h create mode 100644 mpi/examples/mpi_lu/pxlu_implicit.c create mode 100644 mpi/examples/mpi_lu/pxlu_kernels.c create mode 100644 mpi/examples/mpi_lu/pxlu_kernels.h create mode 100644 mpi/examples/mpi_redux/mpi_redux.c create mode 100644 mpi/examples/mpi_redux/mpi_redux_autowrapup.c create mode 100644 mpi/examples/mpi_redux/mpi_redux_tree.c create mode 100644 mpi/examples/native_fortran/fstarpu_mod.f90 create mode 100644 mpi/examples/native_fortran/fstarpu_mpi_mod.f90 create mode 100644 mpi/examples/native_fortran/nf_basic_ring.f90 create mode 100644 mpi/examples/native_fortran/nf_mm.f90 create mode 100644 mpi/examples/native_fortran/nf_mm_2dbc.f90 create mode 100644 mpi/examples/native_fortran/nf_mm_cl.f90 create mode 100644 mpi/examples/native_fortran/nf_mm_cl_blas.f90 create mode 100644 mpi/examples/native_fortran/nf_mm_task_build.f90 create mode 100644 mpi/examples/native_fortran/nf_mpi_redux.f90 create mode 100644 mpi/examples/native_fortran/nf_mpi_redux_tree.f90 create mode 100644 mpi/examples/native_fortran/nf_redux_test.f90 create mode 100755 mpi/examples/perf.sh create mode 100644 mpi/examples/stencil/stencil5.c create mode 100644 mpi/examples/stencil/stencil5_lb.c create mode 100644 mpi/examples/user_datatype/my_interface.c create mode 100644 mpi/examples/user_datatype/my_interface.h create mode 100644 mpi/examples/user_datatype/user_datatype.c create mode 100644 mpi/examples/user_datatype/user_datatype2.c create mode 100644 mpi/examples/user_datatype/user_datatype_early.c create mode 100644 mpi/examples/user_datatype/user_datatype_interface.c create mode 100644 mpi/include/fstarpu_mpi_mod.f90 create mode 100644 mpi/include/starpu_mpi.h create mode 100644 mpi/include/starpu_mpi_ft.h create mode 100644 mpi/include/starpu_mpi_lb.h create mode 100644 mpi/packages/libstarpumpi.pc.in create mode 100644 mpi/packages/starpumpi-1.0.pc.in create mode 100644 mpi/packages/starpumpi-1.1.pc.in create mode 100644 mpi/packages/starpumpi-1.2.pc.in create mode 100644 mpi/packages/starpumpi-1.3.pc.in create mode 100644 mpi/packages/starpumpi-1.4.pc.in create mode 100644 mpi/src/Makefile.am create mode 100644 mpi/src/Makefile.in create mode 100644 mpi/src/load_balancer/load_balancer.c create mode 100644 mpi/src/load_balancer/policy/data_movements_interface.c create mode 100644 mpi/src/load_balancer/policy/data_movements_interface.h create mode 100644 mpi/src/load_balancer/policy/load_balancer_policy.h create mode 100644 mpi/src/load_balancer/policy/load_data_interface.c create mode 100644 mpi/src/load_balancer/policy/load_data_interface.h create mode 100644 mpi/src/load_balancer/policy/load_heat_propagation.c create mode 100644 mpi/src/mpi/starpu_mpi_comm.c create mode 100644 mpi/src/mpi/starpu_mpi_comm.h create mode 100644 mpi/src/mpi/starpu_mpi_driver.h create mode 100644 mpi/src/mpi/starpu_mpi_early_data.c create mode 100644 mpi/src/mpi/starpu_mpi_early_data.h create mode 100644 mpi/src/mpi/starpu_mpi_early_request.c create mode 100644 mpi/src/mpi/starpu_mpi_early_request.h create mode 100644 mpi/src/mpi/starpu_mpi_mpi.c create mode 100644 mpi/src/mpi/starpu_mpi_mpi.h create mode 100644 mpi/src/mpi/starpu_mpi_mpi_backend.c create mode 100644 mpi/src/mpi/starpu_mpi_mpi_backend.h create mode 100644 mpi/src/mpi/starpu_mpi_sync_data.c create mode 100644 mpi/src/mpi/starpu_mpi_sync_data.h create mode 100644 mpi/src/mpi/starpu_mpi_tag.c create mode 100644 mpi/src/mpi/starpu_mpi_tag.h create mode 100644 mpi/src/mpi_failure_tolerance/starpu_mpi_checkpoint.c create mode 100644 mpi/src/mpi_failure_tolerance/starpu_mpi_checkpoint.h create mode 100644 mpi/src/mpi_failure_tolerance/starpu_mpi_checkpoint_package.c create mode 100644 mpi/src/mpi_failure_tolerance/starpu_mpi_checkpoint_package.h create mode 100644 mpi/src/mpi_failure_tolerance/starpu_mpi_checkpoint_template.c create mode 100644 mpi/src/mpi_failure_tolerance/starpu_mpi_checkpoint_template.h create mode 100644 mpi/src/mpi_failure_tolerance/starpu_mpi_checkpoint_tracker.c create mode 100644 mpi/src/mpi_failure_tolerance/starpu_mpi_checkpoint_tracker.h create mode 100644 mpi/src/mpi_failure_tolerance/starpu_mpi_ft.c create mode 100644 mpi/src/mpi_failure_tolerance/starpu_mpi_ft.h create mode 100644 mpi/src/mpi_failure_tolerance/starpu_mpi_ft_service_comms.c create mode 100644 mpi/src/mpi_failure_tolerance/starpu_mpi_ft_service_comms.h create mode 100644 mpi/src/mpi_failure_tolerance/starpu_mpi_ft_stats.c create mode 100644 mpi/src/mpi_failure_tolerance/starpu_mpi_ft_stats.h create mode 100644 mpi/src/nmad/starpu_mpi_nmad.c create mode 100644 mpi/src/nmad/starpu_mpi_nmad.h create mode 100644 mpi/src/nmad/starpu_mpi_nmad_backend.c create mode 100644 mpi/src/nmad/starpu_mpi_nmad_backend.h create mode 100644 mpi/src/nmad/starpu_mpi_nmad_coop.c create mode 100644 mpi/src/nmad/starpu_mpi_nmad_coop.h create mode 100644 mpi/src/nmad/starpu_mpi_nmad_unknown_datatype.c create mode 100644 mpi/src/nmad/starpu_mpi_nmad_unknown_datatype.h create mode 100644 mpi/src/starpu_mpi.c create mode 100644 mpi/src/starpu_mpi_cache.c create mode 100644 mpi/src/starpu_mpi_cache.h create mode 100644 mpi/src/starpu_mpi_cache_stats.c create mode 100644 mpi/src/starpu_mpi_cache_stats.h create mode 100644 mpi/src/starpu_mpi_collective.c create mode 100644 mpi/src/starpu_mpi_coop_sends.c create mode 100644 mpi/src/starpu_mpi_datatype.c create mode 100644 mpi/src/starpu_mpi_datatype.h create mode 100644 mpi/src/starpu_mpi_fortran.c create mode 100644 mpi/src/starpu_mpi_fxt.c create mode 100644 mpi/src/starpu_mpi_fxt.h create mode 100644 mpi/src/starpu_mpi_helper.c create mode 100644 mpi/src/starpu_mpi_init.c create mode 100644 mpi/src/starpu_mpi_init.h create mode 100644 mpi/src/starpu_mpi_private.c create mode 100644 mpi/src/starpu_mpi_private.h create mode 100644 mpi/src/starpu_mpi_req.c create mode 100644 mpi/src/starpu_mpi_select_node.c create mode 100644 mpi/src/starpu_mpi_select_node.h create mode 100644 mpi/src/starpu_mpi_stats.c create mode 100644 mpi/src/starpu_mpi_stats.h create mode 100644 mpi/src/starpu_mpi_tags.c create mode 100644 mpi/src/starpu_mpi_task_insert.c create mode 100644 mpi/src/starpu_mpi_task_insert.h create mode 100644 mpi/src/starpu_mpi_task_insert_fortran.c create mode 100644 mpi/tests/Makefile.am create mode 100644 mpi/tests/Makefile.in create mode 100644 mpi/tests/attr.c create mode 100644 mpi/tests/block_interface.c create mode 100644 mpi/tests/block_interface_pinned.c create mode 100644 mpi/tests/broadcast.c create mode 100644 mpi/tests/callback.c create mode 100644 mpi/tests/checkpoints.c create mode 100644 mpi/tests/coop.c create mode 100644 mpi/tests/coop_acknowledgement.c create mode 100644 mpi/tests/coop_cache.c create mode 100644 mpi/tests/coop_chained_sends.c create mode 100644 mpi/tests/coop_datatype.c create mode 100644 mpi/tests/coop_insert_task.c create mode 100644 mpi/tests/coop_large.c create mode 100644 mpi/tests/coop_many.c create mode 100644 mpi/tests/coop_recv_not_yet_posted.c create mode 100644 mpi/tests/coop_recv_wait_finalize.c create mode 100644 mpi/tests/coop_user_defined_datatype.c create mode 100644 mpi/tests/coop_without_task.c create mode 100644 mpi/tests/coop_wrong_order.c create mode 100644 mpi/tests/data_cpy.c create mode 100644 mpi/tests/datatypes.c create mode 100644 mpi/tests/display_bindings.c create mode 100644 mpi/tests/driver.c create mode 100644 mpi/tests/early_request.c create mode 100644 mpi/tests/early_stuff.c create mode 100644 mpi/tests/gather.c create mode 100644 mpi/tests/gather2.c create mode 100644 mpi/tests/helper.h create mode 100644 mpi/tests/insert_task.c create mode 100644 mpi/tests/insert_task_block.c create mode 100644 mpi/tests/insert_task_can_execute.c create mode 100644 mpi/tests/insert_task_compute.c create mode 100644 mpi/tests/insert_task_count.c create mode 100644 mpi/tests/insert_task_dyn_handles.c create mode 100644 mpi/tests/insert_task_node_choice.c create mode 100644 mpi/tests/insert_task_owner.c create mode 100644 mpi/tests/insert_task_owner2.c create mode 100644 mpi/tests/insert_task_owner_data.c create mode 100644 mpi/tests/insert_task_recv_cache.c create mode 100644 mpi/tests/insert_task_sent_cache.c create mode 100644 mpi/tests/insert_task_seq.c create mode 100644 mpi/tests/insert_task_tags.c create mode 100644 mpi/tests/load_balancer.c create mode 100644 mpi/tests/loader.c create mode 100644 mpi/tests/matrix.c create mode 100644 mpi/tests/matrix2.c create mode 100644 mpi/tests/mpi_barrier.c create mode 100644 mpi/tests/mpi_data_cpy.c create mode 100644 mpi/tests/mpi_detached_tag.c create mode 100644 mpi/tests/mpi_earlyrecv.c create mode 100644 mpi/tests/mpi_earlyrecv2.c create mode 100644 mpi/tests/mpi_earlyrecv2_sync.c create mode 100644 mpi/tests/mpi_irecv.c create mode 100644 mpi/tests/mpi_irecv_detached.c create mode 100644 mpi/tests/mpi_isend.c create mode 100644 mpi/tests/mpi_isend_detached.c create mode 100644 mpi/tests/mpi_reduction.c create mode 100644 mpi/tests/mpi_reduction_kernels.c create mode 100644 mpi/tests/mpi_redux.c create mode 100644 mpi/tests/mpi_scatter_gather.c create mode 100644 mpi/tests/mpi_task_submit.c create mode 100644 mpi/tests/mpi_test.c create mode 100644 mpi/tests/multiple_send.c create mode 100644 mpi/tests/ndim_interface.c create mode 100644 mpi/tests/nothing.c create mode 100644 mpi/tests/pingpong.c create mode 100644 mpi/tests/policy_register.c create mode 100644 mpi/tests/policy_register_many.c create mode 100644 mpi/tests/policy_register_toomany.c create mode 100644 mpi/tests/policy_selection.c create mode 100644 mpi/tests/policy_selection2.c create mode 100644 mpi/tests/policy_unregister.c create mode 100644 mpi/tests/ring.c create mode 100644 mpi/tests/ring_async.c create mode 100644 mpi/tests/ring_async_implicit.c create mode 100644 mpi/tests/ring_kernel.cu create mode 100644 mpi/tests/ring_kernel_hip.hip create mode 100644 mpi/tests/ring_sync.c create mode 100644 mpi/tests/ring_sync_detached.c create mode 100644 mpi/tests/star.c create mode 100644 mpi/tests/starpu_redefine.c create mode 100644 mpi/tests/stats.c create mode 100644 mpi/tests/sync.c create mode 100644 mpi/tests/tags_allocate.c create mode 100644 mpi/tests/tags_checking.c create mode 100644 mpi/tests/temporary.c create mode 100644 mpi/tests/user_defined_datatype.c create mode 100644 mpi/tests/user_defined_datatype_value.h create mode 100644 mpi/tests/wait_for_all.c create mode 100644 mpi/tools/Makefile.am create mode 100644 mpi/tools/Makefile.in create mode 100644 mpi/tools/starpu_replay.c create mode 100644 mpi/tools/starpu_replay_sched.c create mode 100644 packages/libstarpu.pc.in create mode 100644 packages/starpu-1.0.pc.in create mode 100644 packages/starpu-1.1.pc.in create mode 100644 packages/starpu-1.2.pc.in create mode 100644 packages/starpu-1.3.in create mode 100644 packages/starpu-1.3.pc.in create mode 100644 packages/starpu-1.4.in create mode 100644 packages/starpu-1.4.pc.in create mode 100644 sc_hypervisor/Makefile.am create mode 100644 sc_hypervisor/Makefile.in create mode 100644 sc_hypervisor/examples/Makefile.am create mode 100644 sc_hypervisor/examples/Makefile.in create mode 100644 sc_hypervisor/examples/app_driven_test/app_driven_test.c create mode 100644 sc_hypervisor/examples/cholesky/cholesky.h create mode 100644 sc_hypervisor/examples/cholesky/cholesky_implicit.c create mode 100644 sc_hypervisor/examples/cholesky/cholesky_kernels.c create mode 100644 sc_hypervisor/examples/cholesky/cholesky_models.c create mode 100644 sc_hypervisor/examples/hierarchical_ctxs/resize_hierarchical_ctxs.c create mode 100644 sc_hypervisor/examples/lp_test/lp_resize_test.c create mode 100644 sc_hypervisor/examples/lp_test/lp_test.c create mode 100644 sc_hypervisor/examples/sched_ctx_utils/sched_ctx_utils.c create mode 100644 sc_hypervisor/examples/sched_ctx_utils/sched_ctx_utils.h create mode 100644 sc_hypervisor/include/sc_hypervisor.h create mode 100644 sc_hypervisor/include/sc_hypervisor_config.h create mode 100644 sc_hypervisor/include/sc_hypervisor_lp.h create mode 100644 sc_hypervisor/include/sc_hypervisor_monitoring.h create mode 100644 sc_hypervisor/include/sc_hypervisor_policy.h create mode 100644 sc_hypervisor/src/Makefile.am create mode 100644 sc_hypervisor/src/Makefile.in create mode 100644 sc_hypervisor/src/hypervisor_policies/app_driven_policy.c create mode 100644 sc_hypervisor/src/hypervisor_policies/feft_lp_policy.c create mode 100644 sc_hypervisor/src/hypervisor_policies/gflops_rate_policy.c create mode 100644 sc_hypervisor/src/hypervisor_policies/hard_coded_policy.c create mode 100644 sc_hypervisor/src/hypervisor_policies/idle_policy.c create mode 100644 sc_hypervisor/src/hypervisor_policies/ispeed_lp_policy.c create mode 100644 sc_hypervisor/src/hypervisor_policies/ispeed_policy.c create mode 100644 sc_hypervisor/src/hypervisor_policies/perf_count_policy.c create mode 100644 sc_hypervisor/src/hypervisor_policies/teft_lp_policy.c create mode 100644 sc_hypervisor/src/hypervisor_policies/throughput_lp_policy.c create mode 100644 sc_hypervisor/src/policies_utils/dichotomy.c create mode 100644 sc_hypervisor/src/policies_utils/lp_programs.c create mode 100644 sc_hypervisor/src/policies_utils/lp_tools.c create mode 100644 sc_hypervisor/src/policies_utils/policy_tools.c create mode 100644 sc_hypervisor/src/policies_utils/speed.c create mode 100644 sc_hypervisor/src/policies_utils/task_pool.c create mode 100644 sc_hypervisor/src/sc_config.c create mode 100644 sc_hypervisor/src/sc_hypervisor.c create mode 100644 sc_hypervisor/src/sc_hypervisor_intern.h create mode 100644 sc_hypervisor/src/uthash.h create mode 100644 socl/Makefile.am create mode 100644 socl/Makefile.in create mode 100644 socl/README create mode 100644 socl/examples/Makefile.am create mode 100644 socl/examples/Makefile.in create mode 100644 socl/examples/basic/basic.c create mode 100644 socl/examples/basicsplit/basicsplit.c create mode 100644 socl/examples/clinfo/clinfo.c create mode 100644 socl/examples/loader.c create mode 100644 socl/examples/mandelbrot/mandelbrot.c create mode 100644 socl/examples/mansched/mansched.c create mode 100755 socl/examples/matmul/matmul.c create mode 100644 socl/examples/testmap/testmap.c create mode 100644 socl/src/CL/cl.h create mode 100644 socl/src/CL/cl_d3d10.h create mode 100644 socl/src/CL/cl_d3d11.h create mode 100644 socl/src/CL/cl_dx9_media_sharing.h create mode 100644 socl/src/CL/cl_ext.h create mode 100644 socl/src/CL/cl_gl.h create mode 100644 socl/src/CL/cl_gl_ext.h create mode 100644 socl/src/CL/cl_platform.h create mode 100644 socl/src/CL/opencl.h create mode 100644 socl/src/Makefile.am create mode 100644 socl/src/Makefile.in create mode 100644 socl/src/cl_buildprogram.c create mode 100644 socl/src/cl_createbuffer.c create mode 100644 socl/src/cl_createcommandqueue.c create mode 100644 socl/src/cl_createcontext.c create mode 100644 socl/src/cl_createcontextfromtype.c create mode 100644 socl/src/cl_createimage2d.c create mode 100644 socl/src/cl_createimage3d.c create mode 100644 socl/src/cl_createkernel.c create mode 100644 socl/src/cl_createkernelsinprogram.c create mode 100644 socl/src/cl_createprogramwithbinary.c create mode 100644 socl/src/cl_createprogramwithsource.c create mode 100644 socl/src/cl_createsampler.c create mode 100644 socl/src/cl_enqueuebarrier.c create mode 100644 socl/src/cl_enqueuebarrierwithwaitlist.c create mode 100644 socl/src/cl_enqueuecopybuffer.c create mode 100644 socl/src/cl_enqueuecopybuffertoimage.c create mode 100644 socl/src/cl_enqueuecopyimage.c create mode 100644 socl/src/cl_enqueuecopyimagetobuffer.c create mode 100644 socl/src/cl_enqueuemapbuffer.c create mode 100644 socl/src/cl_enqueuemapimage.c create mode 100644 socl/src/cl_enqueuemarker.c create mode 100644 socl/src/cl_enqueuemarkerwithwaitlist.c create mode 100644 socl/src/cl_enqueuenativekernel.c create mode 100644 socl/src/cl_enqueuendrangekernel.c create mode 100644 socl/src/cl_enqueuereadbuffer.c create mode 100644 socl/src/cl_enqueuereadimage.c create mode 100644 socl/src/cl_enqueuetask.c create mode 100644 socl/src/cl_enqueueunmapmemobject.c create mode 100644 socl/src/cl_enqueuewaitforevents.c create mode 100644 socl/src/cl_enqueuewritebuffer.c create mode 100644 socl/src/cl_enqueuewriteimage.c create mode 100644 socl/src/cl_finish.c create mode 100644 socl/src/cl_flush.c create mode 100644 socl/src/cl_getcommandqueueinfo.c create mode 100644 socl/src/cl_getcontextinfo.c create mode 100644 socl/src/cl_getdeviceids.c create mode 100644 socl/src/cl_getdeviceinfo.c create mode 100644 socl/src/cl_geteventinfo.c create mode 100644 socl/src/cl_geteventprofilinginfo.c create mode 100644 socl/src/cl_getextensionfunctionaddress.c create mode 100644 socl/src/cl_getimageinfo.c create mode 100644 socl/src/cl_getkernelinfo.c create mode 100644 socl/src/cl_getkernelworkgroupinfo.c create mode 100644 socl/src/cl_getmemobjectinfo.c create mode 100644 socl/src/cl_getplatformids.c create mode 100644 socl/src/cl_getplatforminfo.c create mode 100644 socl/src/cl_getprogrambuildinfo.c create mode 100644 socl/src/cl_getprograminfo.c create mode 100644 socl/src/cl_getsamplerinfo.c create mode 100644 socl/src/cl_getsupportedimageformats.c create mode 100644 socl/src/cl_icdgetplatformidskhr.c create mode 100644 socl/src/cl_releasecommandqueue.c create mode 100644 socl/src/cl_releasecontext.c create mode 100644 socl/src/cl_releaseevent.c create mode 100644 socl/src/cl_releasekernel.c create mode 100644 socl/src/cl_releasememobject.c create mode 100644 socl/src/cl_releaseprogram.c create mode 100644 socl/src/cl_releasesampler.c create mode 100644 socl/src/cl_retaincommandqueue.c create mode 100644 socl/src/cl_retaincontext.c create mode 100644 socl/src/cl_retainevent.c create mode 100644 socl/src/cl_retainkernel.c create mode 100644 socl/src/cl_retainmemobject.c create mode 100644 socl/src/cl_retainprogram.c create mode 100644 socl/src/cl_retainsampler.c create mode 100644 socl/src/cl_setcommandqueueproperty.c create mode 100644 socl/src/cl_setkernelarg.c create mode 100644 socl/src/cl_unloadcompiler.c create mode 100644 socl/src/cl_waitforevents.c create mode 100644 socl/src/command.c create mode 100644 socl/src/command.h create mode 100644 socl/src/command_list.c create mode 100644 socl/src/command_list.h create mode 100644 socl/src/command_queue.c create mode 100644 socl/src/command_queue.h create mode 100644 socl/src/debug.c create mode 100644 socl/src/debug.h create mode 100644 socl/src/event.c create mode 100644 socl/src/event.h create mode 100644 socl/src/gc.c create mode 100644 socl/src/gc.h create mode 100644 socl/src/getinfo.h create mode 100644 socl/src/init.c create mode 100644 socl/src/init.h create mode 100644 socl/src/mem_objects.c create mode 100644 socl/src/mem_objects.h create mode 100644 socl/src/ocl_icd.h create mode 100644 socl/src/socl.c create mode 100644 socl/src/socl.h create mode 100644 socl/src/task.c create mode 100644 socl/src/task.h create mode 100644 socl/src/util.c create mode 100644 socl/src/util.h create mode 100644 socl/vendors/install/socl.icd create mode 100644 socl/vendors/install/socl.icd.in create mode 100644 socl/vendors/nvidia.icd create mode 100644 socl/vendors/socl.icd.in create mode 100644 src/Makefile.am create mode 100644 src/Makefile.in create mode 100644 src/common/barrier.c create mode 100644 src/common/barrier.h create mode 100644 src/common/barrier_counter.c create mode 100644 src/common/barrier_counter.h create mode 100644 src/common/config-src-build.h.in create mode 100644 src/common/config.h.in create mode 100644 src/common/fxt.c create mode 100644 src/common/fxt.h create mode 100644 src/common/graph.c create mode 100644 src/common/graph.h create mode 100644 src/common/hash.c create mode 100644 src/common/inlines.c create mode 100644 src/common/knobs.c create mode 100644 src/common/knobs.h create mode 100644 src/common/list.h create mode 100644 src/common/prio_list.h create mode 100644 src/common/rbtree.c create mode 100644 src/common/rbtree.h create mode 100644 src/common/rbtree_i.h create mode 100644 src/common/rwlock.c create mode 100644 src/common/rwlock.h create mode 100644 src/common/starpu_spinlock.c create mode 100644 src/common/starpu_spinlock.h create mode 100644 src/common/thread.c create mode 100644 src/common/thread.h create mode 100644 src/common/timing.c create mode 100644 src/common/timing.h create mode 100644 src/common/uthash.h create mode 100644 src/common/utils.c create mode 100644 src/common/utils.h create mode 100644 src/core/combined_workers.c create mode 100644 src/core/combined_workers.h create mode 100644 src/core/debug.c create mode 100644 src/core/debug.h create mode 100644 src/core/dependencies/cg.c create mode 100644 src/core/dependencies/cg.h create mode 100644 src/core/dependencies/data_arbiter_concurrency.c create mode 100644 src/core/dependencies/data_concurrency.c create mode 100644 src/core/dependencies/data_concurrency.h create mode 100644 src/core/dependencies/dependencies.c create mode 100644 src/core/dependencies/implicit_data_deps.c create mode 100644 src/core/dependencies/implicit_data_deps.h create mode 100644 src/core/dependencies/tags.c create mode 100644 src/core/dependencies/tags.h create mode 100644 src/core/dependencies/task_deps.c create mode 100644 src/core/detect_combined_workers.c create mode 100644 src/core/detect_combined_workers.h create mode 100644 src/core/devices.c create mode 100644 src/core/devices.h create mode 100644 src/core/disk.c create mode 100644 src/core/disk.h create mode 100644 src/core/disk_ops/disk_hdf5.c create mode 100644 src/core/disk_ops/disk_leveldb.cpp create mode 100644 src/core/disk_ops/disk_stdio.c create mode 100644 src/core/disk_ops/disk_unistd.c create mode 100644 src/core/disk_ops/disk_unistd_o_direct.c create mode 100644 src/core/disk_ops/unistd/disk_unistd_global.c create mode 100644 src/core/disk_ops/unistd/disk_unistd_global.h create mode 100644 src/core/drivers.c create mode 100644 src/core/drivers.h create mode 100644 src/core/errorcheck.c create mode 100644 src/core/errorcheck.h create mode 100644 src/core/idle_hook.c create mode 100644 src/core/idle_hook.h create mode 100644 src/core/jobs.c create mode 100644 src/core/jobs.h create mode 100644 src/core/parallel_task.c create mode 100644 src/core/perfmodel/energy_model.c create mode 100644 src/core/perfmodel/multiple_regression.c create mode 100644 src/core/perfmodel/multiple_regression.h create mode 100644 src/core/perfmodel/perfmodel.c create mode 100644 src/core/perfmodel/perfmodel.h create mode 100644 src/core/perfmodel/perfmodel_bus.c create mode 100644 src/core/perfmodel/perfmodel_history.c create mode 100644 src/core/perfmodel/perfmodel_nan.c create mode 100644 src/core/perfmodel/perfmodel_print.c create mode 100644 src/core/perfmodel/regression.c create mode 100644 src/core/perfmodel/regression.h create mode 100644 src/core/perfmodel/starpu-perfmodel.dtd create mode 100644 src/core/progress_hook.c create mode 100644 src/core/progress_hook.h create mode 100644 src/core/sched_ctx.c create mode 100644 src/core/sched_ctx.h create mode 100644 src/core/sched_ctx_list.c create mode 100644 src/core/sched_ctx_list.h create mode 100644 src/core/sched_policy.c create mode 100644 src/core/sched_policy.h create mode 100644 src/core/simgrid.c create mode 100644 src/core/simgrid.h create mode 100644 src/core/simgrid_cpp.cpp create mode 100644 src/core/task.c create mode 100644 src/core/task.h create mode 100644 src/core/task_bundle.c create mode 100644 src/core/task_bundle.h create mode 100644 src/core/topology.c create mode 100644 src/core/topology.h create mode 100644 src/core/tree.c create mode 100644 src/core/workers.c create mode 100644 src/core/workers.h create mode 100644 src/datawizard/coherency.c create mode 100644 src/datawizard/coherency.h create mode 100644 src/datawizard/copy_driver.c create mode 100644 src/datawizard/copy_driver.h create mode 100644 src/datawizard/data_request.c create mode 100644 src/datawizard/data_request.h create mode 100644 src/datawizard/datastats.c create mode 100644 src/datawizard/datastats.h create mode 100644 src/datawizard/datawizard.c create mode 100644 src/datawizard/datawizard.h create mode 100644 src/datawizard/filters.c create mode 100644 src/datawizard/filters.h create mode 100644 src/datawizard/footprint.c create mode 100644 src/datawizard/footprint.h create mode 100644 src/datawizard/interfaces/bcsr_filters.c create mode 100644 src/datawizard/interfaces/bcsr_interface.c create mode 100644 src/datawizard/interfaces/block_filters.c create mode 100644 src/datawizard/interfaces/block_interface.c create mode 100644 src/datawizard/interfaces/coo_interface.c create mode 100644 src/datawizard/interfaces/csr_filters.c create mode 100644 src/datawizard/interfaces/csr_interface.c create mode 100644 src/datawizard/interfaces/data_interface.c create mode 100644 src/datawizard/interfaces/data_interface.h create mode 100644 src/datawizard/interfaces/matrix_filters.c create mode 100644 src/datawizard/interfaces/matrix_interface.c create mode 100644 src/datawizard/interfaces/multiformat_interface.c create mode 100644 src/datawizard/interfaces/ndim_filters.c create mode 100644 src/datawizard/interfaces/ndim_interface.c create mode 100644 src/datawizard/interfaces/tensor_filters.c create mode 100644 src/datawizard/interfaces/tensor_interface.c create mode 100644 src/datawizard/interfaces/variable_interface.c create mode 100644 src/datawizard/interfaces/vector_filters.c create mode 100644 src/datawizard/interfaces/vector_interface.c create mode 100644 src/datawizard/interfaces/void_interface.c create mode 100644 src/datawizard/malloc.c create mode 100644 src/datawizard/malloc.h create mode 100644 src/datawizard/memalloc.c create mode 100644 src/datawizard/memalloc.h create mode 100644 src/datawizard/memory_manager.c create mode 100644 src/datawizard/memory_manager.h create mode 100644 src/datawizard/memory_nodes.c create mode 100644 src/datawizard/memory_nodes.h create mode 100644 src/datawizard/memstats.c create mode 100644 src/datawizard/memstats.h create mode 100644 src/datawizard/node_ops.c create mode 100644 src/datawizard/node_ops.h create mode 100644 src/datawizard/reduction.c create mode 100644 src/datawizard/sort_data_handles.c create mode 100644 src/datawizard/sort_data_handles.h create mode 100644 src/datawizard/user_interactions.c create mode 100644 src/datawizard/write_back.c create mode 100644 src/datawizard/write_back.h create mode 100644 src/debug/latency.c create mode 100644 src/debug/starpu_debug_helpers.h create mode 100644 src/debug/structures_size.c create mode 100644 src/debug/traces/anim.c create mode 100644 src/debug/traces/starpu_fxt.c create mode 100644 src/debug/traces/starpu_fxt.h create mode 100644 src/debug/traces/starpu_fxt_dag.c create mode 100644 src/debug/traces/starpu_fxt_mpi.c create mode 100644 src/debug/traces/starpu_paje.c create mode 100644 src/dolib.c create mode 100644 src/drivers/cpu/driver_cpu.c create mode 100644 src/drivers/cpu/driver_cpu.h create mode 100644 src/drivers/cuda/driver_cuda.c create mode 100644 src/drivers/cuda/driver_cuda.h create mode 100644 src/drivers/cuda/driver_cuda0.c create mode 100644 src/drivers/cuda/driver_cuda1.c create mode 100644 src/drivers/cuda/driver_cuda_init.c create mode 100644 src/drivers/cuda/starpu_cublas.c create mode 100644 src/drivers/cuda/starpu_cublasLt.c create mode 100644 src/drivers/cuda/starpu_cublas_v2.c create mode 100644 src/drivers/cuda/starpu_cusolver.c create mode 100644 src/drivers/cuda/starpu_cusparse.c create mode 100644 src/drivers/disk/driver_disk.c create mode 100644 src/drivers/disk/driver_disk.h create mode 100644 src/drivers/driver_common/driver_common.c create mode 100644 src/drivers/driver_common/driver_common.h create mode 100644 src/drivers/hip/driver_hip.c create mode 100644 src/drivers/hip/driver_hip.h create mode 100644 src/drivers/hip/driver_hip_init.c create mode 100644 src/drivers/hip/starpu_hipblas.c create mode 100644 src/drivers/max/driver_max_fpga.c create mode 100644 src/drivers/max/driver_max_fpga.h create mode 100644 src/drivers/max/driver_max_fpga_init.c create mode 100644 src/drivers/mp_common/mp_common.c create mode 100644 src/drivers/mp_common/mp_common.h create mode 100644 src/drivers/mp_common/sink_common.c create mode 100644 src/drivers/mp_common/sink_common.h create mode 100644 src/drivers/mp_common/source_common.c create mode 100644 src/drivers/mp_common/source_common.h create mode 100644 src/drivers/mpi/driver_mpi_common.c create mode 100644 src/drivers/mpi/driver_mpi_common.h create mode 100644 src/drivers/mpi/driver_mpi_init.c create mode 100644 src/drivers/mpi/driver_mpi_sink.c create mode 100644 src/drivers/mpi/driver_mpi_sink.h create mode 100644 src/drivers/mpi/driver_mpi_source.c create mode 100644 src/drivers/mpi/driver_mpi_source.h create mode 100644 src/drivers/opencl/driver_opencl.c create mode 100644 src/drivers/opencl/driver_opencl.h create mode 100644 src/drivers/opencl/driver_opencl_init.c create mode 100644 src/drivers/opencl/driver_opencl_utils.c create mode 100644 src/drivers/opencl/driver_opencl_utils.h create mode 100644 src/drivers/tcpip/driver_tcpip_common.c create mode 100644 src/drivers/tcpip/driver_tcpip_common.h create mode 100644 src/drivers/tcpip/driver_tcpip_common_func.h create mode 100644 src/drivers/tcpip/driver_tcpip_init.c create mode 100644 src/drivers/tcpip/driver_tcpip_sink.c create mode 100644 src/drivers/tcpip/driver_tcpip_sink.h create mode 100644 src/drivers/tcpip/driver_tcpip_source.c create mode 100644 src/drivers/tcpip/driver_tcpip_source.h create mode 100644 src/parallel_worker/starpu_parallel_worker_create.c create mode 100644 src/parallel_worker/starpu_parallel_worker_create.h create mode 100644 src/profiling/bound.c create mode 100644 src/profiling/bound.h create mode 100644 src/profiling/callbacks.c create mode 100644 src/profiling/callbacks.h create mode 100644 src/profiling/profiling.c create mode 100644 src/profiling/profiling.h create mode 100644 src/profiling/profiling_helpers.c create mode 100644 src/sched_policies/component_best_implementation.c create mode 100644 src/sched_policies/component_composed.c create mode 100644 src/sched_policies/component_eager.c create mode 100644 src/sched_policies/component_eager_calibration.c create mode 100644 src/sched_policies/component_eager_prio.c create mode 100644 src/sched_policies/component_fifo.c create mode 100644 src/sched_policies/component_heft.c create mode 100644 src/sched_policies/component_heteroprio.c create mode 100644 src/sched_policies/component_mct.c create mode 100644 src/sched_policies/component_perfmodel_select.c create mode 100644 src/sched_policies/component_prio.c create mode 100644 src/sched_policies/component_random.c create mode 100644 src/sched_policies/component_sched.c create mode 100644 src/sched_policies/component_stage.c create mode 100644 src/sched_policies/component_userchoice.c create mode 100644 src/sched_policies/component_work_stealing.c create mode 100644 src/sched_policies/component_worker.c create mode 100644 src/sched_policies/deque_modeling_policy_data_aware.c create mode 100644 src/sched_policies/eager_central_policy.c create mode 100644 src/sched_policies/eager_central_priority_policy.c create mode 100644 src/sched_policies/fifo_queues.c create mode 100644 src/sched_policies/fifo_queues.h create mode 100644 src/sched_policies/graph_test_policy.c create mode 100644 src/sched_policies/helper_mct.c create mode 100644 src/sched_policies/helper_mct.h create mode 100644 src/sched_policies/heteroprio.c create mode 100644 src/sched_policies/heteroprio.h create mode 100644 src/sched_policies/hierarchical_heft.c create mode 100644 src/sched_policies/modular_eager.c create mode 100644 src/sched_policies/modular_eager_prefetching.c create mode 100644 src/sched_policies/modular_eager_prio.c create mode 100644 src/sched_policies/modular_ez.c create mode 100644 src/sched_policies/modular_gemm.c create mode 100644 src/sched_policies/modular_heft.c create mode 100644 src/sched_policies/modular_heft2.c create mode 100644 src/sched_policies/modular_heft_prio.c create mode 100644 src/sched_policies/modular_heteroprio.c create mode 100644 src/sched_policies/modular_heteroprio_heft.c create mode 100644 src/sched_policies/modular_parallel_heft.c create mode 100644 src/sched_policies/modular_parallel_random.c create mode 100644 src/sched_policies/modular_prio.c create mode 100644 src/sched_policies/modular_prio_prefetching.c create mode 100644 src/sched_policies/modular_random.c create mode 100644 src/sched_policies/modular_random_prefetching.c create mode 100644 src/sched_policies/modular_ws.c create mode 100644 src/sched_policies/parallel_eager.c create mode 100644 src/sched_policies/parallel_heft.c create mode 100644 src/sched_policies/prio_deque.c create mode 100644 src/sched_policies/prio_deque.h create mode 100644 src/sched_policies/random_policy.c create mode 100644 src/sched_policies/sched_component.h create mode 100644 src/sched_policies/scheduler_maker.c create mode 100644 src/sched_policies/work_stealing_policy.c create mode 100644 src/util/execute_on_all.c create mode 100644 src/util/file.c create mode 100644 src/util/fstarpu.c create mode 100644 src/util/misc.c create mode 100644 src/util/openmp_runtime_support.c create mode 100644 src/util/openmp_runtime_support.h create mode 100644 src/util/openmp_runtime_support_environment.c create mode 100644 src/util/openmp_runtime_support_omp_api.c create mode 100644 src/util/starpu_create_sync_task.c create mode 100644 src/util/starpu_data_cpy.c create mode 100644 src/util/starpu_data_cpy.h create mode 100644 src/util/starpu_task_insert.c create mode 100644 src/util/starpu_task_insert_utils.c create mode 100644 src/util/starpu_task_insert_utils.h create mode 100644 src/worker_collection/worker_list.c create mode 100644 src/worker_collection/worker_tree.c create mode 100644 starpu_openmp_llvm/Makefile.am create mode 100644 starpu_openmp_llvm/Makefile.in create mode 100644 starpu_openmp_llvm/examples/Makefile.am create mode 100644 starpu_openmp_llvm/examples/Makefile.in create mode 100644 starpu_openmp_llvm/examples/README create mode 100644 starpu_openmp_llvm/examples/hello-task.c create mode 100644 starpu_openmp_llvm/src/Makefile.am create mode 100644 starpu_openmp_llvm/src/Makefile.in create mode 100644 starpu_openmp_llvm/src/openmp_runtime_support_llvm.c create mode 100644 starpufft/Makefile.am create mode 100644 starpufft/Makefile.in create mode 100644 starpufft/include/starpufft.h create mode 100644 starpufft/packages/libstarpufft.pc.in create mode 100644 starpufft/packages/starpufft-1.0.pc.in create mode 100644 starpufft/packages/starpufft-1.1.pc.in create mode 100644 starpufft/packages/starpufft-1.2.pc.in create mode 100644 starpufft/packages/starpufft-1.3.pc.in create mode 100644 starpufft/packages/starpufft-1.4.pc.in create mode 100644 starpufft/src/Makefile.am create mode 100644 starpufft/src/Makefile.in create mode 100644 starpufft/src/cuda_kernels.cu create mode 100644 starpufft/src/cudaf_kernels.cu create mode 100644 starpufft/src/cudax_kernels.cu create mode 100644 starpufft/src/cudax_kernels.h create mode 100644 starpufft/src/starpufft-double.h create mode 100644 starpufft/src/starpufft-float.h create mode 100644 starpufft/src/starpufft.c create mode 100644 starpufft/src/starpufft_common.c create mode 100644 starpufft/src/starpufftf.c create mode 100644 starpufft/src/starpufftx.c create mode 100644 starpufft/src/starpufftx1d.c create mode 100644 starpufft/src/starpufftx2d.c create mode 100644 starpufft/src/starpufftx3d.c create mode 100644 starpufft/tests/Makefile.am create mode 100644 starpufft/tests/Makefile.in create mode 100644 starpufft/tests/loader.c create mode 100644 starpufft/tests/test.c create mode 100644 starpufft/tests/test_threads.c create mode 100644 starpufft/tests/testf.c create mode 100644 starpufft/tests/testf_threads.c create mode 100644 starpufft/tests/testx.c create mode 100644 starpufft/tests/testx_threads.c create mode 100644 starpupy/Makefile.am create mode 100644 starpupy/Makefile.in create mode 100644 starpupy/benchmark/Makefile.am create mode 100644 starpupy/benchmark/Makefile.in create mode 100644 starpupy/benchmark/handle_perf_plot.py create mode 100644 starpupy/benchmark/handle_perf_plot_pickle.py create mode 100755 starpupy/benchmark/tasks_size_overhead.gp create mode 100644 starpupy/benchmark/tasks_size_overhead.py create mode 100755 starpupy/benchmark/tasks_size_overhead.sh create mode 100644 starpupy/benchmark/test_handle_bench.py create mode 100644 starpupy/benchmark/test_handle_perf.py create mode 100755 starpupy/benchmark/test_handle_perf.sh create mode 100644 starpupy/benchmark/test_handle_perf_pickle.py create mode 100755 starpupy/benchmark/test_handle_perf_pickle.sh create mode 100644 starpupy/examples/Makefile.am create mode 100644 starpupy/examples/Makefile.in create mode 100644 starpupy/examples/loader.c create mode 100755 starpupy/examples/starpu_py.concurrent.sh create mode 100644 starpupy/examples/starpu_py.py create mode 100755 starpupy/examples/starpu_py.sh create mode 100755 starpupy/examples/starpu_py_handle.concurrent.sh create mode 100644 starpupy/examples/starpu_py_handle.py create mode 100755 starpupy/examples/starpu_py_handle.sh create mode 100755 starpupy/examples/starpu_py_np.concurrent.sh create mode 100644 starpupy/examples/starpu_py_np.py create mode 100755 starpupy/examples/starpu_py_np.sh create mode 100755 starpupy/examples/starpu_py_numpy.concurrent.sh create mode 100644 starpupy/examples/starpu_py_numpy.py create mode 100755 starpupy/examples/starpu_py_numpy.sh create mode 100644 starpupy/examples/starpu_py_parallel.py create mode 100755 starpupy/examples/starpu_py_parallel.sh create mode 100755 starpupy/examples/starpu_py_partition.concurrent.sh create mode 100644 starpupy/examples/starpu_py_partition.py create mode 100755 starpupy/examples/starpu_py_partition.sh create mode 100755 starpupy/examples/starpu_py_perfmodel.concurrent.sh create mode 100644 starpupy/examples/starpu_py_perfmodel.py create mode 100755 starpupy/examples/starpu_py_perfmodel.sh create mode 100755 starpupy/execute.sh.in create mode 100644 starpupy/src/Makefile.am create mode 100644 starpupy/src/Makefile.in create mode 100644 starpupy/src/__init__.py create mode 100644 starpupy/src/delay.py create mode 100644 starpupy/src/handle_access.py create mode 100644 starpupy/src/intermedia.py create mode 100644 starpupy/src/joblib.py create mode 100644 starpupy/src/setup.cfg.in create mode 100644 starpupy/src/setup.py.in create mode 100644 starpupy/src/starpu_task_wrapper.c create mode 100644 starpupy/src/starpupy_buffer_interface.c create mode 100644 starpupy/src/starpupy_buffer_interface.h create mode 100644 starpupy/src/starpupy_cloudpickle.h create mode 100644 starpupy/src/starpupy_handle.c create mode 100644 starpupy/src/starpupy_handle.h create mode 100644 starpupy/src/starpupy_interface.c create mode 100644 starpupy/src/starpupy_interface.h create mode 100644 starpupy/src/starpupy_numpy_filters.c create mode 100644 starpupy/src/starpupy_numpy_filters.h create mode 100644 starpupy/src/starpupy_private.h create mode 100644 starpurm/Makefile.am create mode 100644 starpurm/Makefile.in create mode 100644 starpurm/examples/Makefile.am create mode 100644 starpurm/examples/Makefile.in create mode 100644 starpurm/examples/async_spawn.c create mode 100644 starpurm/examples/block_test/block_test.c create mode 100644 starpurm/examples/chameleon/dgemm.c create mode 100644 starpurm/examples/cuda_vector_scale/vector_scale.c create mode 100644 starpurm/examples/cuda_vector_scale/vs_cuda_kernel.cu create mode 100644 starpurm/examples/spawn.c create mode 100644 starpurm/examples/vector_scale.c create mode 100644 starpurm/include/starpurm.h create mode 100644 starpurm/include/starpurm_config.h.in create mode 100644 starpurm/packages/starpurm-1.3.pc.in create mode 100644 starpurm/packages/starpurm-1.4.pc.in create mode 100644 starpurm/src/Makefile.am create mode 100644 starpurm/src/Makefile.in create mode 100644 starpurm/src/starpurm.c create mode 100644 starpurm/src/starpurm_dlb.c create mode 100644 starpurm/src/starpurm_private.h create mode 100644 starpurm/tests/01_init_exit.c create mode 100644 starpurm/tests/02_list_units.c create mode 100644 starpurm/tests/03_cpusets.c create mode 100644 starpurm/tests/04_drs_enable.c create mode 100644 starpurm/tests/Makefile.am create mode 100644 starpurm/tests/Makefile.in create mode 100644 tests/Makefile.am create mode 100644 tests/Makefile.in create mode 100755 tests/coverage/coverage.sh create mode 100644 tests/datawizard/acquire_cb.c create mode 100644 tests/datawizard/acquire_cb_insert.c create mode 100644 tests/datawizard/acquire_release.c create mode 100644 tests/datawizard/acquire_release2.c create mode 100644 tests/datawizard/acquire_release_to.c create mode 100644 tests/datawizard/acquire_try.c create mode 100644 tests/datawizard/allocate.c create mode 100644 tests/datawizard/allocate_many_numa_nodes.c create mode 100644 tests/datawizard/bcsr.c create mode 100644 tests/datawizard/cache.c create mode 100644 tests/datawizard/commute.c create mode 100644 tests/datawizard/commute2.c create mode 100644 tests/datawizard/copy.c create mode 100644 tests/datawizard/critical_section_with_void_interface.c create mode 100644 tests/datawizard/data_deinitialize.c create mode 100644 tests/datawizard/data_implicit_deps.c create mode 100644 tests/datawizard/data_invalidation.c create mode 100644 tests/datawizard/data_register.c create mode 100644 tests/datawizard/deinitialize_pending_requests.c create mode 100644 tests/datawizard/deps.c create mode 100644 tests/datawizard/dining_philosophers.c create mode 100644 tests/datawizard/double_parameter.c create mode 100644 tests/datawizard/dsm_stress.c create mode 100644 tests/datawizard/gpu_ptr_register.c create mode 100644 tests/datawizard/gpu_register.c create mode 100644 tests/datawizard/handle_to_pointer.c create mode 100644 tests/datawizard/in_place_partition.c create mode 100644 tests/datawizard/increment_init.c create mode 100644 tests/datawizard/increment_redux.c create mode 100644 tests/datawizard/increment_redux_lazy.c create mode 100644 tests/datawizard/increment_redux_partition.c create mode 100644 tests/datawizard/increment_redux_v2.c create mode 100644 tests/datawizard/increment_redux_with_args.c create mode 100644 tests/datawizard/interfaces/bcsr/bcsr_cuda.cu create mode 100644 tests/datawizard/interfaces/bcsr/bcsr_interface.c create mode 100644 tests/datawizard/interfaces/bcsr/bcsr_opencl.c create mode 100644 tests/datawizard/interfaces/bcsr/bcsr_opencl_kernel.cl create mode 100644 tests/datawizard/interfaces/block/block_cuda.cu create mode 100644 tests/datawizard/interfaces/block/block_interface.c create mode 100644 tests/datawizard/interfaces/block/block_opencl.c create mode 100644 tests/datawizard/interfaces/block/block_opencl_kernel.cl create mode 100644 tests/datawizard/interfaces/coo/coo_cuda.cu create mode 100644 tests/datawizard/interfaces/coo/coo_interface.c create mode 100644 tests/datawizard/interfaces/coo/coo_opencl.c create mode 100644 tests/datawizard/interfaces/coo/coo_opencl_kernel.cl create mode 100644 tests/datawizard/interfaces/copy_interfaces.c create mode 100644 tests/datawizard/interfaces/csr/csr_cuda.cu create mode 100644 tests/datawizard/interfaces/csr/csr_interface.c create mode 100644 tests/datawizard/interfaces/csr/csr_opencl.c create mode 100644 tests/datawizard/interfaces/csr/csr_opencl_kernel.cl create mode 100644 tests/datawizard/interfaces/matrix/matrix_cuda.cu create mode 100644 tests/datawizard/interfaces/matrix/matrix_interface.c create mode 100644 tests/datawizard/interfaces/matrix/matrix_opencl.c create mode 100644 tests/datawizard/interfaces/matrix/matrix_opencl_kernel.cl create mode 100644 tests/datawizard/interfaces/multiformat/advanced/generic.c create mode 100644 tests/datawizard/interfaces/multiformat/advanced/generic.h create mode 100644 tests/datawizard/interfaces/multiformat/advanced/multiformat_cuda_opencl.c create mode 100644 tests/datawizard/interfaces/multiformat/advanced/multiformat_data_release.c create mode 100644 tests/datawizard/interfaces/multiformat/advanced/multiformat_handle_conversion.c create mode 100644 tests/datawizard/interfaces/multiformat/advanced/multiformat_worker.c create mode 100644 tests/datawizard/interfaces/multiformat/advanced/same_handle.c create mode 100644 tests/datawizard/interfaces/multiformat/multiformat_conversion_codelets.c create mode 100644 tests/datawizard/interfaces/multiformat/multiformat_conversion_codelets_cuda.cu create mode 100644 tests/datawizard/interfaces/multiformat/multiformat_conversion_codelets_kernel.cl create mode 100644 tests/datawizard/interfaces/multiformat/multiformat_conversion_codelets_opencl.c create mode 100644 tests/datawizard/interfaces/multiformat/multiformat_cuda.cu create mode 100644 tests/datawizard/interfaces/multiformat/multiformat_interface.c create mode 100644 tests/datawizard/interfaces/multiformat/multiformat_opencl.c create mode 100644 tests/datawizard/interfaces/multiformat/multiformat_opencl_kernel.cl create mode 100644 tests/datawizard/interfaces/multiformat/multiformat_types.h create mode 100644 tests/datawizard/interfaces/ndim/ndim_cuda.cu create mode 100644 tests/datawizard/interfaces/ndim/ndim_interface.c create mode 100644 tests/datawizard/interfaces/ndim/ndim_opencl.c create mode 100644 tests/datawizard/interfaces/ndim/ndim_opencl_kernel.cl create mode 100644 tests/datawizard/interfaces/tensor/tensor_cuda.cu create mode 100644 tests/datawizard/interfaces/tensor/tensor_interface.c create mode 100644 tests/datawizard/interfaces/tensor/tensor_opencl.c create mode 100644 tests/datawizard/interfaces/tensor/tensor_opencl_kernel.cl create mode 100644 tests/datawizard/interfaces/test_interfaces.c create mode 100644 tests/datawizard/interfaces/test_interfaces.h create mode 100755 tests/datawizard/interfaces/test_interfaces.sh create mode 100644 tests/datawizard/interfaces/variable/variable_cuda.cu create mode 100644 tests/datawizard/interfaces/variable/variable_interface.c create mode 100644 tests/datawizard/interfaces/variable/variable_opencl.c create mode 100644 tests/datawizard/interfaces/variable/variable_opencl_kernel.cl create mode 100644 tests/datawizard/interfaces/vector/vector_cuda.cu create mode 100644 tests/datawizard/interfaces/vector/vector_interface.c create mode 100644 tests/datawizard/interfaces/vector/vector_opencl.c create mode 100644 tests/datawizard/interfaces/vector/vector_opencl_kernel.cl create mode 100644 tests/datawizard/interfaces/void/void_interface.c create mode 100644 tests/datawizard/invalidate_pending_requests.c create mode 100644 tests/datawizard/lazy_allocation.c create mode 100644 tests/datawizard/locality.c create mode 100755 tests/datawizard/locality.sh create mode 100644 tests/datawizard/manual_reduction.c create mode 100644 tests/datawizard/mpi_like.c create mode 100644 tests/datawizard/mpi_like_async.c create mode 100644 tests/datawizard/no_unregister.c create mode 100644 tests/datawizard/noreclaim.c create mode 100644 tests/datawizard/nowhere.c create mode 100644 tests/datawizard/numa_overflow.c create mode 100644 tests/datawizard/partition_dep.c create mode 100644 tests/datawizard/partition_init.c create mode 100644 tests/datawizard/partition_lazy.c create mode 100644 tests/datawizard/partition_wontuse.c create mode 100644 tests/datawizard/partitioned_acquire.c create mode 100644 tests/datawizard/partitioned_initialization.c create mode 100644 tests/datawizard/readers_and_writers.c create mode 100644 tests/datawizard/readonly.c create mode 100644 tests/datawizard/reclaim.c create mode 100644 tests/datawizard/redux_acquire.c create mode 100644 tests/datawizard/scal.c create mode 100644 tests/datawizard/scal.h create mode 100644 tests/datawizard/scal_cuda.cu create mode 100644 tests/datawizard/scal_opencl.cl create mode 100644 tests/datawizard/scratch.c create mode 100644 tests/datawizard/scratch_cuda.cu create mode 100644 tests/datawizard/scratch_opencl.c create mode 100644 tests/datawizard/scratch_opencl_kernel.cl create mode 100644 tests/datawizard/scratch_reuse.c create mode 100644 tests/datawizard/simgrid-locality.c create mode 100644 tests/datawizard/specific_node.c create mode 100644 tests/datawizard/specific_node_same.c create mode 100644 tests/datawizard/sync_and_notify_data.c create mode 100644 tests/datawizard/sync_and_notify_data_implicit.c create mode 100644 tests/datawizard/sync_and_notify_data_kernels.cu create mode 100644 tests/datawizard/sync_and_notify_data_opencl.c create mode 100644 tests/datawizard/sync_and_notify_data_opencl_codelet.cl create mode 100644 tests/datawizard/sync_with_data_with_mem.c create mode 100644 tests/datawizard/sync_with_data_with_mem_non_blocking.c create mode 100644 tests/datawizard/sync_with_data_with_mem_non_blocking_implicit.c create mode 100644 tests/datawizard/task_with_multiple_time_the_same_handle.c create mode 100644 tests/datawizard/temporary_partition.c create mode 100644 tests/datawizard/temporary_partition_implicit.c create mode 100644 tests/datawizard/temporary_partition_read.c create mode 100644 tests/datawizard/test_arbiter.cpp create mode 100644 tests/datawizard/unpartition.c create mode 100644 tests/datawizard/user_interaction_implicit.c create mode 100644 tests/datawizard/variable_parameters.c create mode 100644 tests/datawizard/variable_size.c create mode 100644 tests/datawizard/write_only_tmp_buffer.c create mode 100644 tests/datawizard/wt_broadcast.c create mode 100644 tests/datawizard/wt_host.c create mode 100644 tests/disk/disk_compute.c create mode 100644 tests/disk/disk_copy.c create mode 100644 tests/disk/disk_copy_to_disk.c create mode 100644 tests/disk/disk_copy_unpack.c create mode 100644 tests/disk/disk_pack.c create mode 100644 tests/disk/mem_reclaim.c create mode 100755 tests/energy/dynamic.sh create mode 100644 tests/energy/energy_efficiency.c create mode 100644 tests/energy/perfs.gp create mode 100755 tests/energy/static.sh create mode 100644 tests/errorcheck/invalid_blocking_calls.c create mode 100644 tests/errorcheck/invalid_tasks.c create mode 100644 tests/errorcheck/starpu_init_noworker.c create mode 100644 tests/errorcheck/workers_cpuid.c create mode 100644 tests/fault-tolerance/retry.c create mode 100644 tests/fortran90/init_01.f90 create mode 100644 tests/fortran90/starpu_mod.f90 create mode 100644 tests/helper.h create mode 100644 tests/helper/cublasLt_init.c create mode 100644 tests/helper/cublas_init.c create mode 100644 tests/helper/cusparse_init.c create mode 100644 tests/helper/execute_on_all.c create mode 100644 tests/helper/hipblas_init.c create mode 100644 tests/helper/pinned_memory.c create mode 100644 tests/helper/starpu_create_sync_task.c create mode 100644 tests/helper/starpu_data_cpy.c create mode 100644 tests/helper/starpu_data_dup_ro.c create mode 100644 tests/loader.c create mode 100644 tests/main/bind.c create mode 100644 tests/main/callback.c create mode 100644 tests/main/codelet_null_callback.c create mode 100644 tests/main/const_codelet.c create mode 100644 tests/main/deadlock.c create mode 100644 tests/main/declare_deps_after_submission.c create mode 100644 tests/main/declare_deps_after_submission_synchronous.c create mode 100644 tests/main/declare_deps_in_callback.c create mode 100644 tests/main/deploop.c create mode 100644 tests/main/deprecated_func.c create mode 100644 tests/main/display_binding.c create mode 100644 tests/main/driver_api/init_run_deinit.c create mode 100644 tests/main/driver_api/run_driver.c create mode 100644 tests/main/empty_task.c create mode 100644 tests/main/empty_task_chain.c create mode 100644 tests/main/empty_task_sync_point.c create mode 100644 tests/main/empty_task_sync_point_tasks.c create mode 100644 tests/main/execute_on_a_specific_worker.c create mode 100644 tests/main/execute_schedule.c create mode 100644 tests/main/get_children_tasks.c create mode 100644 tests/main/get_current_task.c create mode 100644 tests/main/hwloc_cpuset.c create mode 100644 tests/main/insert_task.c create mode 100644 tests/main/insert_task_array.c create mode 100644 tests/main/insert_task_dyn_handles.c create mode 100644 tests/main/insert_task_many.c create mode 100644 tests/main/insert_task_nullcodelet.c create mode 100644 tests/main/insert_task_pack.c create mode 100644 tests/main/insert_task_value.c create mode 100644 tests/main/insert_task_where.c create mode 100644 tests/main/job.c create mode 100644 tests/main/mkdtemp.c create mode 100644 tests/main/multithreaded.c create mode 100644 tests/main/multithreaded_init.c create mode 100644 tests/main/pack.c create mode 100644 tests/main/pause_resume.c create mode 100644 tests/main/regenerate.c create mode 100644 tests/main/regenerate_pipeline.c create mode 100644 tests/main/restart.c create mode 100644 tests/main/starpu_init.c create mode 100644 tests/main/starpu_task_bundle.c create mode 100644 tests/main/starpu_task_wait.c create mode 100644 tests/main/starpu_task_wait_for_all.c create mode 100644 tests/main/starpu_worker_exists.c create mode 100644 tests/main/static_restartable.c create mode 100644 tests/main/static_restartable_tag.c create mode 100644 tests/main/static_restartable_using_initializer.c create mode 100644 tests/main/subgraph_repeat.c create mode 100644 tests/main/subgraph_repeat_regenerate.c create mode 100644 tests/main/subgraph_repeat_regenerate_tag.c create mode 100644 tests/main/subgraph_repeat_regenerate_tag_cycle.c create mode 100644 tests/main/subgraph_repeat_tag.c create mode 100644 tests/main/submit.c create mode 100644 tests/main/tag_get_task.c create mode 100644 tests/main/tag_task_data_deps.c create mode 100644 tests/main/tag_wait_api.c create mode 100644 tests/main/task_end_dep.c create mode 100644 tests/main/task_wait_api.c create mode 100644 tests/main/wait_all_regenerable_tasks.c create mode 100644 tests/maxfpga/LMemLoopbackCpuCode.c create mode 100644 tests/maxfpga/MyTasksManager.maxj create mode 100644 tests/maxfpga/MyTasksMuxManager.maxj create mode 100644 tests/maxfpga/README.txt create mode 100644 tests/maxfpga/StreamFMACpuCode.cpp create mode 100644 tests/maxfpga/Task1.maxj create mode 100644 tests/maxfpga/Task2.maxj create mode 100644 tests/maxfpga/Task3.maxj create mode 100644 tests/maxfpga/max_fpga_advanced_static.c create mode 100644 tests/maxfpga/max_fpga_basic_static.c create mode 100644 tests/maxfpga/max_fpga_dynamic.c create mode 100644 tests/maxfpga/max_fpga_mux.c create mode 100644 tests/memory/memstress.gp create mode 100755 tests/memory/memstress.sh create mode 100644 tests/memory/memstress2.gp create mode 100755 tests/memory/memstress2.sh create mode 100755 tests/microbenchs/async_tasks_data_overhead.sh create mode 100644 tests/microbenchs/async_tasks_overhead.c create mode 100644 tests/microbenchs/bandwidth.c create mode 100755 tests/microbenchs/bandwidth_scheds.sh create mode 100644 tests/microbenchs/display_structures_size.c create mode 100644 tests/microbenchs/local_pingpong.c create mode 100644 tests/microbenchs/matrix_as_vector.c create mode 100755 tests/microbenchs/microbench.sh create mode 100644 tests/microbenchs/parallel_dependent_homogeneous_tasks_data.c create mode 100755 tests/microbenchs/parallel_dependent_homogeneous_tasks_data.sh create mode 100644 tests/microbenchs/parallel_independent_heterogeneous_tasks.c create mode 100755 tests/microbenchs/parallel_independent_heterogeneous_tasks.sh create mode 100644 tests/microbenchs/parallel_independent_heterogeneous_tasks_data.c create mode 100755 tests/microbenchs/parallel_independent_heterogeneous_tasks_data.sh create mode 100644 tests/microbenchs/parallel_independent_homogeneous_tasks.c create mode 100755 tests/microbenchs/parallel_independent_homogeneous_tasks.sh create mode 100644 tests/microbenchs/parallel_independent_homogeneous_tasks_data.c create mode 100755 tests/microbenchs/parallel_independent_homogeneous_tasks_data.sh create mode 100644 tests/microbenchs/parallel_redux_heterogeneous_tasks_data.c create mode 100755 tests/microbenchs/parallel_redux_heterogeneous_tasks_data.sh create mode 100644 tests/microbenchs/parallel_redux_homogeneous_tasks_data.c create mode 100755 tests/microbenchs/parallel_redux_homogeneous_tasks_data.sh create mode 100644 tests/microbenchs/prefetch_data_on_node.c create mode 100644 tests/microbenchs/redundant_buffer.c create mode 100755 tests/microbenchs/starpu_check.sh create mode 100755 tests/microbenchs/sync_tasks_data_overhead.sh create mode 100644 tests/microbenchs/sync_tasks_overhead.c create mode 100755 tests/microbenchs/tasks_data_overhead.sh create mode 100644 tests/microbenchs/tasks_overhead.c create mode 100644 tests/microbenchs/tasks_size_overhead.c create mode 100755 tests/microbenchs/tasks_size_overhead.gp create mode 100755 tests/microbenchs/tasks_size_overhead.sh create mode 100755 tests/microbenchs/tasks_size_overhead_sched.sh create mode 100755 tests/microbenchs/tasks_size_overhead_scheds.sh create mode 100644 tests/model-checking/Makefile.am create mode 100644 tests/model-checking/Makefile.in create mode 100755 tests/model-checking/barrier.sh create mode 100644 tests/model-checking/platform.xml create mode 100644 tests/model-checking/prio_list.c create mode 100755 tests/model-checking/prio_list.sh create mode 100644 tests/model-checking/prio_list2.c create mode 100644 tests/model-checking/prio_list3.c create mode 100755 tests/model-checking/starpu-mc.sh.in create mode 100644 tests/model-checking/starpu_barrier.c create mode 100644 tests/openmp/api_01.c create mode 100644 tests/openmp/array_slice_01.c create mode 100644 tests/openmp/cuda_task_01.c create mode 100644 tests/openmp/environment.c create mode 100644 tests/openmp/init_exit_01.c create mode 100644 tests/openmp/init_exit_02.c create mode 100644 tests/openmp/parallel_01.c create mode 100644 tests/openmp/parallel_02.c create mode 100644 tests/openmp/parallel_03.c create mode 100644 tests/openmp/parallel_barrier_01.c create mode 100644 tests/openmp/parallel_critical_01.c create mode 100644 tests/openmp/parallel_critical_inline_01.c create mode 100644 tests/openmp/parallel_critical_named_01.c create mode 100644 tests/openmp/parallel_critical_named_inline_01.c create mode 100644 tests/openmp/parallel_for_01.c create mode 100644 tests/openmp/parallel_for_02.c create mode 100644 tests/openmp/parallel_for_ordered_01.c create mode 100644 tests/openmp/parallel_master_01.c create mode 100644 tests/openmp/parallel_master_inline_01.c create mode 100644 tests/openmp/parallel_nested_lock_01.c create mode 100644 tests/openmp/parallel_sections_01.c create mode 100644 tests/openmp/parallel_sections_combined_01.c create mode 100644 tests/openmp/parallel_simple_lock_01.c create mode 100644 tests/openmp/parallel_single_copyprivate_01.c create mode 100644 tests/openmp/parallel_single_copyprivate_inline_01.c create mode 100644 tests/openmp/parallel_single_inline_01.c create mode 100644 tests/openmp/parallel_single_nowait_01.c create mode 100644 tests/openmp/parallel_single_wait_01.c create mode 100644 tests/openmp/task_01.c create mode 100644 tests/openmp/task_02.c create mode 100644 tests/openmp/task_03.c create mode 100644 tests/openmp/taskgroup_01.c create mode 100644 tests/openmp/taskgroup_02.c create mode 100644 tests/openmp/taskloop.c create mode 100644 tests/openmp/taskwait_01.c create mode 100644 tests/overlap/gpu_concurrency.c create mode 100644 tests/overlap/long_kernel.cu create mode 100644 tests/overlap/overlap.c create mode 100755 tests/overlap/overlap.sh create mode 100644 tests/parallel_tasks/combined_worker_assign_workerid.c create mode 100644 tests/parallel_tasks/cuda_only.c create mode 100644 tests/parallel_tasks/explicit_combined_worker.c create mode 100644 tests/parallel_tasks/parallel_kernels.c create mode 100644 tests/parallel_tasks/parallel_kernels_spmd.c create mode 100644 tests/parallel_tasks/parallel_kernels_trivial.c create mode 100644 tests/parallel_tasks/spmd_peager.c create mode 100644 tests/parallel_tasks/swap.c create mode 100644 tests/perfmodels/feed.c create mode 100644 tests/perfmodels/memory.c create mode 100644 tests/perfmodels/non_linear_regression_based.c create mode 100644 tests/perfmodels/opencl_memset.c create mode 100644 tests/perfmodels/opencl_memset_kernel.cl create mode 100644 tests/perfmodels/path.c create mode 100644 tests/perfmodels/regression_based_check.c create mode 100644 tests/perfmodels/regression_based_energy.c create mode 100644 tests/perfmodels/regression_based_gpu.c create mode 100644 tests/perfmodels/regression_based_memset.c create mode 100644 tests/perfmodels/regression_based_multiimpl.c create mode 100644 tests/perfmodels/user_base.c create mode 100644 tests/perfmodels/valid_model.c create mode 100644 tests/perfmodels/value_nan.c create mode 100644 tests/regression/profiles.build.only.in create mode 100644 tests/regression/profiles.in create mode 100755 tests/regression/regression.sh.in create mode 100644 tests/sched_ctx/sched_ctx_hierarchy.c create mode 100644 tests/sched_ctx/sched_ctx_list.c create mode 100644 tests/sched_ctx/sched_ctx_policy_data.c create mode 100644 tests/sched_policies/data_locality.c create mode 100644 tests/sched_policies/execute_all_tasks.c create mode 100644 tests/sched_policies/prio.c create mode 100644 tests/sched_policies/simple_cpu_gpu_sched.c create mode 100644 tests/sched_policies/simple_deps.c create mode 100644 tests/sched_policies/workerids.c create mode 100644 tests/variable/increment.c create mode 100644 tests/variable/increment.h create mode 100644 tests/variable/increment_cuda.cu create mode 100644 tests/variable/increment_hip.hip create mode 100644 tests/variable/increment_opencl.c create mode 100644 tests/variable/increment_opencl_kernel.cl create mode 100644 tests/variable/neutral_opencl_kernel.cl create mode 100644 tests/variable/redux_opencl_kernel.cl create mode 100644 tools/Makefile.am create mode 100644 tools/Makefile.in create mode 100644 tools/ayudame.cfg create mode 100644 tools/dev/checker/rename.sed create mode 100755 tools/dev/checker/rename.sh create mode 100644 tools/dev/cppcheck/suppressions.txt create mode 100644 tools/dev/lsan/suppressions create mode 100644 tools/dev/tsan/starpu.suppr create mode 100644 tools/dev/valgrind/bash.suppr create mode 100644 tools/dev/valgrind/blas.suppr create mode 100644 tools/dev/valgrind/fxt.suppr create mode 100644 tools/dev/valgrind/glpk.suppr create mode 100644 tools/dev/valgrind/hdf5.suppr create mode 100755 tools/dev/valgrind/helgrind.sh create mode 100644 tools/dev/valgrind/hwloc.suppr create mode 100644 tools/dev/valgrind/libc.suppr create mode 100644 tools/dev/valgrind/libgomp.suppr create mode 100644 tools/dev/valgrind/libnuma.suppr create mode 100644 tools/dev/valgrind/madmpi.suppr create mode 100644 tools/dev/valgrind/nvidia.suppr create mode 100644 tools/dev/valgrind/opencl.suppr create mode 100644 tools/dev/valgrind/openmp.suppr create mode 100644 tools/dev/valgrind/openmpi.suppr create mode 100644 tools/dev/valgrind/p11-kit.suppr create mode 100644 tools/dev/valgrind/padico.suppr create mode 100644 tools/dev/valgrind/papi.suppr create mode 100644 tools/dev/valgrind/pthread.suppr create mode 100644 tools/dev/valgrind/starpu.suppr create mode 100644 tools/dev/valgrind/starpu_pw.suppr create mode 100644 tools/dev/valgrind/starpupy.suppr create mode 100755 tools/dev/valgrind/valgrind.sh create mode 100644 tools/dev/valgrind/valgrind.suppr create mode 100755 tools/dev/valgrind/valgrind_xml.sh create mode 100755 tools/distrib/distrib.r create mode 100755 tools/distrib/distrib.sh create mode 100644 tools/gdbinit create mode 100644 tools/loader.c create mode 100755 tools/msvc/starpu.sln create mode 100755 tools/msvc/starpu/starpu.vcxproj create mode 100644 tools/msvc/starpu_clean.bat create mode 100644 tools/msvc/starpu_exec.bat create mode 100644 tools/msvc/starpu_open.bat create mode 100644 tools/msvc/starpu_var.bat create mode 100644 tools/msvc/starpu_var.bat.in create mode 100644 tools/patch-ayudame create mode 100644 tools/perfmodels/README create mode 100644 tools/perfmodels/cluster.xml create mode 100644 tools/perfmodels/hostfile create mode 100644 tools/perfmodels/sampling/bus/attila.affinity create mode 100644 tools/perfmodels/sampling/bus/attila.bandwidth create mode 100644 tools/perfmodels/sampling/bus/attila.config create mode 100644 tools/perfmodels/sampling/bus/attila.latency create mode 100644 tools/perfmodels/sampling/bus/attila.platform.v4.xml create mode 100644 tools/perfmodels/sampling/bus/attila.platform.xml create mode 100644 tools/perfmodels/sampling/bus/hannibal-pitch.affinity create mode 100644 tools/perfmodels/sampling/bus/hannibal-pitch.bandwidth create mode 100644 tools/perfmodels/sampling/bus/hannibal-pitch.config create mode 100644 tools/perfmodels/sampling/bus/hannibal-pitch.latency create mode 100644 tools/perfmodels/sampling/bus/hannibal-pitch.platform.v4.xml create mode 100644 tools/perfmodels/sampling/bus/hannibal-pitch.platform.xml create mode 100644 tools/perfmodels/sampling/bus/hannibal.affinity create mode 100644 tools/perfmodels/sampling/bus/hannibal.bandwidth create mode 100644 tools/perfmodels/sampling/bus/hannibal.config create mode 100644 tools/perfmodels/sampling/bus/hannibal.latency create mode 100644 tools/perfmodels/sampling/bus/hannibal.platform.v4.xml create mode 100644 tools/perfmodels/sampling/bus/hannibal.platform.xml create mode 100644 tools/perfmodels/sampling/bus/idgraf.affinity create mode 100644 tools/perfmodels/sampling/bus/idgraf.bandwidth create mode 100644 tools/perfmodels/sampling/bus/idgraf.config create mode 100644 tools/perfmodels/sampling/bus/idgraf.latency create mode 100644 tools/perfmodels/sampling/bus/idgraf.platform.v4.xml create mode 100644 tools/perfmodels/sampling/bus/idgraf.platform.xml create mode 100644 tools/perfmodels/sampling/bus/mirage.affinity create mode 100644 tools/perfmodels/sampling/bus/mirage.bandwidth create mode 100644 tools/perfmodels/sampling/bus/mirage.config create mode 100644 tools/perfmodels/sampling/bus/mirage.latency create mode 100644 tools/perfmodels/sampling/bus/mirage.platform.v4.xml create mode 100644 tools/perfmodels/sampling/bus/mirage.platform.xml create mode 100644 tools/perfmodels/sampling/bus/sirocco.affinity create mode 100644 tools/perfmodels/sampling/bus/sirocco.bandwidth create mode 100644 tools/perfmodels/sampling/bus/sirocco.config create mode 100644 tools/perfmodels/sampling/bus/sirocco.latency create mode 100644 tools/perfmodels/sampling/bus/sirocco.platform.v4.xml create mode 100644 tools/perfmodels/sampling/bus/sirocco.platform.xml create mode 100644 tools/perfmodels/sampling/codelets/45/add_scal.mirage create mode 100644 tools/perfmodels/sampling/codelets/45/chol_model_gemm.attila create mode 100644 tools/perfmodels/sampling/codelets/45/chol_model_gemm.hannibal create mode 100644 tools/perfmodels/sampling/codelets/45/chol_model_gemm.hannibal-pitch create mode 100644 tools/perfmodels/sampling/codelets/45/chol_model_gemm.idgraf create mode 100644 tools/perfmodels/sampling/codelets/45/chol_model_gemm.mirage create mode 100644 tools/perfmodels/sampling/codelets/45/chol_model_gemm.sirocco create mode 100644 tools/perfmodels/sampling/codelets/45/chol_model_potrf.attila create mode 100644 tools/perfmodels/sampling/codelets/45/chol_model_potrf.hannibal create mode 100644 tools/perfmodels/sampling/codelets/45/chol_model_potrf.hannibal-pitch create mode 100644 tools/perfmodels/sampling/codelets/45/chol_model_potrf.idgraf create mode 100644 tools/perfmodels/sampling/codelets/45/chol_model_potrf.mirage create mode 100644 tools/perfmodels/sampling/codelets/45/chol_model_potrf.sirocco create mode 100644 tools/perfmodels/sampling/codelets/45/chol_model_syrk.attila create mode 100644 tools/perfmodels/sampling/codelets/45/chol_model_syrk.hannibal create mode 100644 tools/perfmodels/sampling/codelets/45/chol_model_syrk.hannibal-pitch create mode 100644 tools/perfmodels/sampling/codelets/45/chol_model_syrk.idgraf create mode 100644 tools/perfmodels/sampling/codelets/45/chol_model_syrk.mirage create mode 100644 tools/perfmodels/sampling/codelets/45/chol_model_syrk.sirocco create mode 100644 tools/perfmodels/sampling/codelets/45/chol_model_trsm.attila create mode 100644 tools/perfmodels/sampling/codelets/45/chol_model_trsm.hannibal create mode 100644 tools/perfmodels/sampling/codelets/45/chol_model_trsm.hannibal-pitch create mode 100644 tools/perfmodels/sampling/codelets/45/chol_model_trsm.idgraf create mode 100644 tools/perfmodels/sampling/codelets/45/chol_model_trsm.mirage create mode 100644 tools/perfmodels/sampling/codelets/45/chol_model_trsm.sirocco create mode 100644 tools/perfmodels/sampling/codelets/45/cl_update.attila create mode 100644 tools/perfmodels/sampling/codelets/45/cl_update.idgraf create mode 100644 tools/perfmodels/sampling/codelets/45/cl_update.mirage create mode 100644 tools/perfmodels/sampling/codelets/45/cl_update.sirocco create mode 100644 tools/perfmodels/sampling/codelets/45/func.mirage create mode 100644 tools/perfmodels/sampling/codelets/45/log_arr.mirage create mode 100644 tools/perfmodels/sampling/codelets/45/log_list.mirage create mode 100644 tools/perfmodels/sampling/codelets/45/multi.mirage create mode 100644 tools/perfmodels/sampling/codelets/45/multi_2arr.mirage create mode 100644 tools/perfmodels/sampling/codelets/45/multi_list.mirage create mode 100644 tools/perfmodels/sampling/codelets/45/null.idgraf create mode 100644 tools/perfmodels/sampling/codelets/45/null.sirocco create mode 100644 tools/perfmodels/sampling/codelets/45/overlap_sleep_1024_24.attila create mode 100644 tools/perfmodels/sampling/codelets/45/overlap_sleep_1024_24.mirage create mode 100644 tools/perfmodels/sampling/codelets/45/overlap_sleep_1024_24.sirocco create mode 100644 tools/perfmodels/sampling/codelets/45/save_cl_bottom.attila create mode 100644 tools/perfmodels/sampling/codelets/45/save_cl_bottom.idgraf create mode 100644 tools/perfmodels/sampling/codelets/45/save_cl_bottom.mirage create mode 100644 tools/perfmodels/sampling/codelets/45/save_cl_bottom.sirocco create mode 100644 tools/perfmodels/sampling/codelets/45/save_cl_top.attila create mode 100644 tools/perfmodels/sampling/codelets/45/save_cl_top.idgraf create mode 100644 tools/perfmodels/sampling/codelets/45/save_cl_top.mirage create mode 100644 tools/perfmodels/sampling/codelets/45/save_cl_top.sirocco create mode 100644 tools/perfmodels/sampling/codelets/45/scal.mirage create mode 100644 tools/perfmodels/sampling/codelets/45/scal_arr.mirage create mode 100644 tools/perfmodels/sampling/codelets/45/sqrt.mirage create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_dgemm_gemm.attila create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_dgemm_gemm.idgraf create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_dgemm_gemm.mirage create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_dgemm_gemm.sirocco create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm.attila create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm.idgraf create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm.mirage create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm.sirocco create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_atlas.attila create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_atlas.idgraf create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_atlas.mirage create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_atlas.sirocco create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_goto.attila create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_goto.idgraf create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_goto.mirage create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_goto.sirocco create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_openblas.attila create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_openblas.idgraf create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_openblas.mirage create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_openblas.sirocco create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf.attila create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf.idgraf create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf.mirage create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf.sirocco create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_atlas.attila create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_atlas.idgraf create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_atlas.mirage create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_atlas.sirocco create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_goto.attila create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_goto.idgraf create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_goto.mirage create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_goto.sirocco create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_openblas.attila create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_openblas.idgraf create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_openblas.mirage create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_openblas.sirocco create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll.attila create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll.idgraf create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll.mirage create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll.sirocco create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_atlas.attila create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_atlas.idgraf create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_atlas.mirage create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_atlas.sirocco create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_goto.attila create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_goto.idgraf create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_goto.mirage create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_goto.sirocco create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_openblas.attila create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_openblas.idgraf create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_openblas.mirage create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_openblas.sirocco create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru.attila create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru.idgraf create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru.mirage create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru.sirocco create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_atlas.attila create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_atlas.idgraf create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_atlas.mirage create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_atlas.sirocco create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_goto.attila create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_goto.idgraf create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_goto.mirage create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_goto.sirocco create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_openblas.attila create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_openblas.idgraf create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_openblas.mirage create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_openblas.sirocco create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_sgemm_gemm.attila create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_sgemm_gemm.idgraf create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_sgemm_gemm.mirage create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_sgemm_gemm.sirocco create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm.attila create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm.hannibal create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm.hannibal-pitch create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm.idgraf create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm.mirage create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm.sirocco create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_atlas.attila create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_atlas.hannibal create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_atlas.hannibal-pitch create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_atlas.idgraf create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_atlas.mirage create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_atlas.sirocco create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_goto.attila create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_goto.hannibal create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_goto.hannibal-pitch create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_goto.idgraf create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_goto.mirage create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_goto.sirocco create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_openblas.attila create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_openblas.hannibal create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_openblas.hannibal-pitch create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_openblas.idgraf create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_openblas.mirage create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_openblas.sirocco create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf.attila create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf.hannibal create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf.hannibal-pitch create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf.idgraf create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf.mirage create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf.sirocco create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_atlas.attila create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_atlas.hannibal create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_atlas.hannibal-pitch create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_atlas.idgraf create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_atlas.mirage create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_atlas.sirocco create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_goto.attila create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_goto.hannibal create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_goto.hannibal-pitch create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_goto.idgraf create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_goto.mirage create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_goto.sirocco create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_openblas.attila create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_openblas.hannibal create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_openblas.hannibal-pitch create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_openblas.idgraf create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_openblas.mirage create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_openblas.sirocco create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll.attila create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll.hannibal create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll.hannibal-pitch create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll.idgraf create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll.mirage create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll.sirocco create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_atlas.attila create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_atlas.hannibal create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_atlas.hannibal-pitch create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_atlas.idgraf create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_atlas.mirage create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_atlas.sirocco create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_goto.attila create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_goto.hannibal create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_goto.hannibal-pitch create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_goto.idgraf create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_goto.mirage create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_goto.sirocco create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_openblas.attila create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_openblas.hannibal create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_openblas.hannibal-pitch create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_openblas.idgraf create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_openblas.mirage create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_openblas.sirocco create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru.attila create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru.hannibal create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru.hannibal-pitch create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru.idgraf create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru.mirage create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru.sirocco create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_atlas.attila create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_atlas.hannibal create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_atlas.hannibal-pitch create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_atlas.idgraf create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_atlas.mirage create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_atlas.sirocco create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_goto.attila create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_goto.hannibal create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_goto.hannibal-pitch create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_goto.idgraf create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_goto.mirage create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_goto.sirocco create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_openblas.attila create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_openblas.hannibal create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_openblas.hannibal-pitch create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_openblas.idgraf create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_openblas.mirage create mode 100644 tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_openblas.sirocco create mode 100644 tools/perfmodels/sampling/codelets/tmp/mlr_init.out create mode 100755 tools/perfs/bench_sgemm.sh create mode 100755 tools/perfs/error_model.gp create mode 100755 tools/perfs/error_model.sh create mode 100644 tools/release/Makefile create mode 100644 tools/release/README.md create mode 100644 tools/starpu_calibrate_bus.1 create mode 100644 tools/starpu_calibrate_bus.c create mode 100755 tools/starpu_codelet_histo_profile create mode 100644 tools/starpu_codelet_histo_profile.1 create mode 100755 tools/starpu_codelet_histo_profile.in create mode 100755 tools/starpu_codelet_profile create mode 100644 tools/starpu_codelet_profile.1 create mode 100755 tools/starpu_codelet_profile.in create mode 100755 tools/starpu_config create mode 100644 tools/starpu_config.1 create mode 100644 tools/starpu_config.cfg create mode 100644 tools/starpu_config.in create mode 100755 tools/starpu_env create mode 100644 tools/starpu_env.1 create mode 100755 tools/starpu_env.in create mode 100644 tools/starpu_fxt_data_trace.1 create mode 100644 tools/starpu_fxt_data_trace.c create mode 100644 tools/starpu_fxt_number_events_to_names.1 create mode 100755 tools/starpu_fxt_number_events_to_names.py create mode 100644 tools/starpu_fxt_number_events_to_names.py.in create mode 100644 tools/starpu_fxt_stats.1 create mode 100644 tools/starpu_fxt_stats.c create mode 100644 tools/starpu_fxt_tool.1 create mode 100644 tools/starpu_fxt_tool.c create mode 100644 tools/starpu_lp2paje.1 create mode 100644 tools/starpu_lp2paje.c create mode 100644 tools/starpu_machine_display.1 create mode 100644 tools/starpu_machine_display.c create mode 100755 tools/starpu_mlr_analysis create mode 100644 tools/starpu_mlr_analysis.Rmd create mode 100644 tools/starpu_mlr_analysis.in create mode 100644 tools/starpu_mpi_comm_matrix.1 create mode 100755 tools/starpu_mpi_comm_matrix.py create mode 100755 tools/starpu_mpi_comm_matrix.py.in create mode 100755 tools/starpu_msexec create mode 100755 tools/starpu_paje_draw_histogram create mode 100644 tools/starpu_paje_draw_histogram.1 create mode 100755 tools/starpu_paje_draw_histogram.R create mode 100755 tools/starpu_paje_draw_histogram.in create mode 100755 tools/starpu_paje_sort.in create mode 100755 tools/starpu_paje_state_stats create mode 100644 tools/starpu_paje_state_stats.1 create mode 100755 tools/starpu_paje_state_stats.R create mode 100755 tools/starpu_paje_state_stats.in create mode 100755 tools/starpu_paje_summary create mode 100644 tools/starpu_paje_summary.Rmd create mode 100755 tools/starpu_paje_summary.in create mode 100644 tools/starpu_perfmodel_display.1 create mode 100644 tools/starpu_perfmodel_display.c create mode 100644 tools/starpu_perfmodel_plot.1 create mode 100644 tools/starpu_perfmodel_plot.c create mode 100644 tools/starpu_perfmodel_recdump.c create mode 100644 tools/starpu_replay.c create mode 100644 tools/starpu_replay_sched.c create mode 100644 tools/starpu_sched_display.c create mode 100755 tools/starpu_send_recv_data_use.py create mode 100644 tools/starpu_smpi.xslt create mode 100755 tools/starpu_smpirun create mode 100644 tools/starpu_smpirun.in create mode 100644 tools/starpu_tasks_rec_complete.1 create mode 100644 tools/starpu_tasks_rec_complete.c create mode 100755 tools/starpu_tcpipexec create mode 100755 tools/starpu_tcpipexec.in create mode 100755 tools/starpu_temanejo2.sh create mode 100755 tools/starpu_trace_state_stats.py create mode 100755 tools/starpu_workers_activity create mode 100644 tools/starpu_workers_activity.1 create mode 100755 tools/starpu_workers_activity.in diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml new file mode 100644 index 0000000..f17cd95 --- /dev/null +++ b/.gitlab-ci.yml @@ -0,0 +1,225 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2021-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +--- +stages: + - build + - coverage + - check + - analyze + - deploy + - new-release + - set-release + - release + +default: + interruptible: true + +.main_template: + rules: + - if: ($CI_PIPELINE_SOURCE != "push") && ($CI_PIPELINE_SOURCE != "trigger") + +# to download all the artifacts https://gitlab.inria.fr/starpu/starpu/-/jobs/artifacts/master/download?job=build +# https://gitlab.inria.fr/starpu/starpu/-/jobs/artifacts/master/raw/starpu-1.3.99.tar.gz?job=build +# https://gitlab.inria.fr/starpu/starpu/-/jobs/artifacts/master/raw/starpu.pdf?job=build +# https://docs.gitlab.com/ee/ci/pipelines/job_artifacts.html#downloading-artifacts +build: + extends: .main_template + stage: build + image: registry.gitlab.inria.fr/starpu/starpu-docker/ci-debian12 + tags: ['ci.inria.fr', 'linux', 'large'] + script: + - ./contrib/gitlab/build.sh + artifacts: + paths: + - starpu*.tar.gz + - starpu*.pdf + +.check_template: + extends: .main_template + stage: check + needs: [build] + dependencies: + - build + script: + - ./contrib/gitlab/deploy.sh + +check_ci: + extends: .check_template + parallel: + matrix: + - NODE: [centos72amd64, macosx] + tags: + - ${NODE} + script: + - ./contrib/gitlab/deploy.sh + +check: + extends: .check_template + parallel: + matrix: + - NODE: [debian12, debian10, ubuntu1604, ubuntu1804, ubuntu2004] + image: registry.gitlab.inria.fr/starpu/starpu-docker/ci-${NODE} + tags: ['ci.inria.fr', 'linux', 'large'] + +check_simgrid: + extends: .check_template + image: registry.gitlab.inria.fr/starpu/starpu-docker/ci-debian10 + tags: ['ci.inria.fr', 'linux', 'large'] + script: + - ./contrib/gitlab/simgrid.sh + +.analyze_template: + extends: .main_template + stage: analyze + needs: [build] + dependencies: + - build + +analyze_coverity: + extends: .analyze_template + tags: + - starpu + - linux + - coverity + script: + - ./contrib/gitlab/coverity.sh + +.deploy_template: + extends: .main_template + stage: deploy + needs: [check] + dependencies: + - check + +deploy_chameleon: + extends: .deploy_template + image: registry.gitlab.inria.fr/solverstack/chameleon + tags: ['ci.inria.fr', 'linux', 'large'] + script: + - ./contrib/gitlab/chameleon.sh + +new-release: + stage: new-release + only: + - tags + tags: + - starpu-release + script: + - ./contrib/releases/new-release.sh + timeout: 3h + +set-release: + stage: set-release + dependencies: + - new-release + only: + - tags + tags: + - starpu-release + script: + - ./contrib/releases/set-release.sh + artifacts: + paths: + - release-cli.txt + +release: + stage: release + image: registry.gitlab.com/gitlab-org/release-cli:latest + dependencies: + - set-release + only: + - tags + tags: + - ci.inria.fr + script: + - cat release-cli.txt + - eval "$(cat release-cli.txt)" + +.coverage: + extends: .main_template + stage: coverage + +.coverage_artifacts: + before_script: + - rm -rf ./artifacts + artifacts: + when: always + paths: + - artifacts + +coverage: + extends: [.coverage, .coverage_artifacts] + timeout: 2h + variables: + RUNNER_SCRIPT_TIMEOUT: 118m + RUNNER_AFTER_SCRIPT_TIMEOUT: 2m + tags: ['starpu', 'node_gpu'] + script: + - ./contrib/gitlab/run_profile.sh coverage + after_script: + - ./contrib/gitlab/clean_profile.sh coverage + +coverage_update: + extends: .coverage + timeout: 2h + variables: + RUNNER_SCRIPT_TIMEOUT: 118m + RUNNER_AFTER_SCRIPT_TIMEOUT: 2m + dependencies: + - coverage + needs: + - coverage + tags: ['starpu', 'node_gpu'] + script: + - ./contrib/gitlab/upload.sh ./artifacts + +check_windows: + extends: .main_template + stage: check + needs: [build] + dependencies: + - build + timeout: 4h + variables: + RUNNER_SCRIPT_TIMEOUT: 238m + RUNNER_AFTER_SCRIPT_TIMEOUT: 2m + tags: + - starpu + - windows + script: + - sh -c "sed -i 's/\r//' ./contrib/gitlab/windows/build-windows.sh" + - sh -c ./contrib/gitlab/windows/build-windows.sh + artifacts: + when: always + paths: + - artifacts/* + +run_windows: + extends: .main_template + stage: check + needs: [check_windows] + dependencies: + - check_windows + timeout: 4h + variables: + RUNNER_SCRIPT_TIMEOUT: 238m + RUNNER_AFTER_SCRIPT_TIMEOUT: 2m + tags: + - starpu + - windows + script: + - sh -c "sed -i 's/\r//' ./contrib/gitlab/windows/run-windows.sh" + - sh -c ./contrib/gitlab/windows/run-windows.sh diff --git a/AUTHORS b/AUTHORS new file mode 100644 index 0000000..bd0fd60 --- /dev/null +++ b/AUTHORS @@ -0,0 +1,62 @@ +Authors +Augonnet Cédric, Université de Bordeaux, +Aumage Olivier, Inria, +Furmento Nathalie, CNRS, +Thibault Samuel, Université de Bordeaux, + +Contributors +Archipoff Simon, Université de Bordeaux, +Beauchamp Guillaume, Inria, +Bramas Berenger, Inria, +Buttari Alfredo, Enseeiht, +Cassagne Adrien, Inria, +Clet-Ortega Jérôme, Inria, +Cojean Terry, Université de Bordeaux, +Collin Nicolas, Inria, +Coti Camille, UQAM +Danjean Vincent, University Grenoble Alpes, +Denis Alexandre, Inria, +Eyraud-Dubois Lionel, Inria, +Flint Clément, Inria, +Fuentes Mathis, ATOS, +Guermouche Amina, Télécom SudParis, +Guilbaud Adrien, Inria, +He Kun, Inria, +Henry Sylvain, Université de Bordeaux, +Hugo Andra, Université de Bordeaux/Inria, +Jego Antoine, Enseeiht, +Juhoor Mehdi, Université de Bordeaux, +Juven Alexis, Inria, +Keryell-Even Maël, Inria, +Khorsi Yanis, Inria, +Kuhn Matthieu, ATOS, +Lambert Thibaut, Inria, +Leria Erwan, Université de Bordeaux, +Lizé Benoît, Airbus, +Lucas Gwenolé, University of Bordeaux, +Makni Mariem, Inria, +Nakov Stojce, Inria, +Namyst Raymond, Université de Bordeaux, +Nesi Lucas Leandro, Federal University of Rio Grande do Sul (UFRGS), +Pablo Joris, Inria, +Paillat Ludovic, Inria, +Pasqualinotto Damien, Université de Bordeaux, +Pinto Vinicius Garcia, +Pitoiset Samuel, Inria, +Point Gérald, CNRS, +Quôc-Dinh Nguyen, IT Sud-Paris, +Roelandt Cyril, Inria, +Sakka Chiheb, Inria, +Salingue Corentin, Inria, +Schnorr Lucas Mello, Federal University of Rio Grande do Sul (UFRGS), +Sergent Marc, CEA/Inria, +Simonet Anthony, Université de Bordeaux, +Stanisic Luka, Inria, +Subervie Bérangère, Inria, +Swartvagher Philippe, Inria, +Tessier François, Université de Bordeaux, +Videau Brice, University Grenoble Alpes, +Villeveygoux Leo, Université de Bordeaux, +Virouleau Philippe, Inria, +Wacrenier Pierre-André, Université de Bordeaux, + diff --git a/COPYING.LGPL b/COPYING.LGPL new file mode 100644 index 0000000..2d2d780 --- /dev/null +++ b/COPYING.LGPL @@ -0,0 +1,510 @@ + + GNU LESSER GENERAL PUBLIC LICENSE + Version 2.1, February 1999 + + Copyright (C) 1991, 1999 Free Software Foundation, Inc. + 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + +[This is the first released version of the Lesser GPL. It also counts + as the successor of the GNU Library Public License, version 2, hence + the version number 2.1.] + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +Licenses are intended to guarantee your freedom to share and change +free software--to make sure the software is free for all its users. + + This license, the Lesser General Public License, applies to some +specially designated software packages--typically libraries--of the +Free Software Foundation and other authors who decide to use it. You +can use it too, but we suggest you first think carefully about whether +this license or the ordinary General Public License is the better +strategy to use in any particular case, based on the explanations +below. + + When we speak of free software, we are referring to freedom of use, +not price. Our General Public Licenses are designed to make sure that +you have the freedom to distribute copies of free software (and charge +for this service if you wish); that you receive source code or can get +it if you want it; that you can change the software and use pieces of +it in new free programs; and that you are informed that you can do +these things. + + To protect your rights, we need to make restrictions that forbid +distributors to deny you these rights or to ask you to surrender these +rights. These restrictions translate to certain responsibilities for +you if you distribute copies of the library or if you modify it. + + For example, if you distribute copies of the library, whether gratis +or for a fee, you must give the recipients all the rights that we gave +you. You must make sure that they, too, receive or can get the source +code. If you link other code with the library, you must provide +complete object files to the recipients, so that they can relink them +with the library after making changes to the library and recompiling +it. And you must show them these terms so they know their rights. + + We protect your rights with a two-step method: (1) we copyright the +library, and (2) we offer you this license, which gives you legal +permission to copy, distribute and/or modify the library. + + To protect each distributor, we want to make it very clear that +there is no warranty for the free library. Also, if the library is +modified by someone else and passed on, the recipients should know +that what they have is not the original version, so that the original +author's reputation will not be affected by problems that might be +introduced by others. + + Finally, software patents pose a constant threat to the existence of +any free program. We wish to make sure that a company cannot +effectively restrict the users of a free program by obtaining a +restrictive license from a patent holder. Therefore, we insist that +any patent license obtained for a version of the library must be +consistent with the full freedom of use specified in this license. + + Most GNU software, including some libraries, is covered by the +ordinary GNU General Public License. This license, the GNU Lesser +General Public License, applies to certain designated libraries, and +is quite different from the ordinary General Public License. We use +this license for certain libraries in order to permit linking those +libraries into non-free programs. + + When a program is linked with a library, whether statically or using +a shared library, the combination of the two is legally speaking a +combined work, a derivative of the original library. The ordinary +General Public License therefore permits such linking only if the +entire combination fits its criteria of freedom. The Lesser General +Public License permits more lax criteria for linking other code with +the library. + + We call this license the "Lesser" General Public License because it +does Less to protect the user's freedom than the ordinary General +Public License. It also provides other free software developers Less +of an advantage over competing non-free programs. These disadvantages +are the reason we use the ordinary General Public License for many +libraries. However, the Lesser license provides advantages in certain +special circumstances. + + For example, on rare occasions, there may be a special need to +encourage the widest possible use of a certain library, so that it +becomes a de-facto standard. To achieve this, non-free programs must +be allowed to use the library. A more frequent case is that a free +library does the same job as widely used non-free libraries. In this +case, there is little to gain by limiting the free library to free +software only, so we use the Lesser General Public License. + + In other cases, permission to use a particular library in non-free +programs enables a greater number of people to use a large body of +free software. For example, permission to use the GNU C Library in +non-free programs enables many more people to use the whole GNU +operating system, as well as its variant, the GNU/Linux operating +system. + + Although the Lesser General Public License is Less protective of the +users' freedom, it does ensure that the user of a program that is +linked with the Library has the freedom and the wherewithal to run +that program using a modified version of the Library. + + The precise terms and conditions for copying, distribution and +modification follow. Pay close attention to the difference between a +"work based on the library" and a "work that uses the library". The +former contains code derived from the library, whereas the latter must +be combined with the library in order to run. + + GNU LESSER GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License Agreement applies to any software library or other +program which contains a notice placed by the copyright holder or +other authorized party saying it may be distributed under the terms of +this Lesser General Public License (also called "this License"). +Each licensee is addressed as "you". + + A "library" means a collection of software functions and/or data +prepared so as to be conveniently linked with application programs +(which use some of those functions and data) to form executables. + + The "Library", below, refers to any such software library or work +which has been distributed under these terms. A "work based on the +Library" means either the Library or any derivative work under +copyright law: that is to say, a work containing the Library or a +portion of it, either verbatim or with modifications and/or translated +straightforwardly into another language. (Hereinafter, translation is +included without limitation in the term "modification".) + + "Source code" for a work means the preferred form of the work for +making modifications to it. For a library, complete source code means +all the source code for all modules it contains, plus any associated +interface definition files, plus the scripts used to control +compilation and installation of the library. + + Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running a program using the Library is not restricted, and output from +such a program is covered only if its contents constitute a work based +on the Library (independent of the use of the Library in a tool for +writing it). Whether that is true depends on what the Library does +and what the program that uses the Library does. + + 1. You may copy and distribute verbatim copies of the Library's +complete source code as you receive it, in any medium, provided that +you conspicuously and appropriately publish on each copy an +appropriate copyright notice and disclaimer of warranty; keep intact +all the notices that refer to this License and to the absence of any +warranty; and distribute a copy of this License along with the +Library. + + You may charge a fee for the physical act of transferring a copy, +and you may at your option offer warranty protection in exchange for a +fee. + + 2. You may modify your copy or copies of the Library or any portion +of it, thus forming a work based on the Library, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) The modified work must itself be a software library. + + b) You must cause the files modified to carry prominent notices + stating that you changed the files and the date of any change. + + c) You must cause the whole of the work to be licensed at no + charge to all third parties under the terms of this License. + + d) If a facility in the modified Library refers to a function or a + table of data to be supplied by an application program that uses + the facility, other than as an argument passed when the facility + is invoked, then you must make a good faith effort to ensure that, + in the event an application does not supply such function or + table, the facility still operates, and performs whatever part of + its purpose remains meaningful. + + (For example, a function in a library to compute square roots has + a purpose that is entirely well-defined independent of the + application. Therefore, Subsection 2d requires that any + application-supplied function or table used by this function must + be optional: if the application does not supply it, the square + root function must still compute square roots.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Library, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Library, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote +it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Library. + +In addition, mere aggregation of another work not based on the Library +with the Library (or with a work based on the Library) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may opt to apply the terms of the ordinary GNU General Public +License instead of this License to a given copy of the Library. To do +this, you must alter all the notices that refer to this License, so +that they refer to the ordinary GNU General Public License, version 2, +instead of to this License. (If a newer version than version 2 of the +ordinary GNU General Public License has appeared, then you can specify +that version instead if you wish.) Do not make any other change in +these notices. + + Once this change is made in a given copy, it is irreversible for +that copy, so the ordinary GNU General Public License applies to all +subsequent copies and derivative works made from that copy. + + This option is useful when you wish to copy part of the code of +the Library into a program that is not a library. + + 4. You may copy and distribute the Library (or a portion or +derivative of it, under Section 2) in object code or executable form +under the terms of Sections 1 and 2 above provided that you accompany +it with the complete corresponding machine-readable source code, which +must be distributed under the terms of Sections 1 and 2 above on a +medium customarily used for software interchange. + + If distribution of object code is made by offering access to copy +from a designated place, then offering equivalent access to copy the +source code from the same place satisfies the requirement to +distribute the source code, even though third parties are not +compelled to copy the source along with the object code. + + 5. A program that contains no derivative of any portion of the +Library, but is designed to work with the Library by being compiled or +linked with it, is called a "work that uses the Library". Such a +work, in isolation, is not a derivative work of the Library, and +therefore falls outside the scope of this License. + + However, linking a "work that uses the Library" with the Library +creates an executable that is a derivative of the Library (because it +contains portions of the Library), rather than a "work that uses the +library". The executable is therefore covered by this License. +Section 6 states terms for distribution of such executables. + + When a "work that uses the Library" uses material from a header file +that is part of the Library, the object code for the work may be a +derivative work of the Library even though the source code is not. +Whether this is true is especially significant if the work can be +linked without the Library, or if the work is itself a library. The +threshold for this to be true is not precisely defined by law. + + If such an object file uses only numerical parameters, data +structure layouts and accessors, and small macros and small inline +functions (ten lines or less in length), then the use of the object +file is unrestricted, regardless of whether it is legally a derivative +work. (Executables containing this object code plus portions of the +Library will still fall under Section 6.) + + Otherwise, if the work is a derivative of the Library, you may +distribute the object code for the work under the terms of Section 6. +Any executables containing that work also fall under Section 6, +whether or not they are linked directly with the Library itself. + + 6. As an exception to the Sections above, you may also combine or +link a "work that uses the Library" with the Library to produce a +work containing portions of the Library, and distribute that work +under terms of your choice, provided that the terms permit +modification of the work for the customer's own use and reverse +engineering for debugging such modifications. + + You must give prominent notice with each copy of the work that the +Library is used in it and that the Library and its use are covered by +this License. You must supply a copy of this License. If the work +during execution displays copyright notices, you must include the +copyright notice for the Library among them, as well as a reference +directing the user to the copy of this License. Also, you must do one +of these things: + + a) Accompany the work with the complete corresponding + machine-readable source code for the Library including whatever + changes were used in the work (which must be distributed under + Sections 1 and 2 above); and, if the work is an executable linked + with the Library, with the complete machine-readable "work that + uses the Library", as object code and/or source code, so that the + user can modify the Library and then relink to produce a modified + executable containing the modified Library. (It is understood + that the user who changes the contents of definitions files in the + Library will not necessarily be able to recompile the application + to use the modified definitions.) + + b) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (1) uses at run time a + copy of the library already present on the user's computer system, + rather than copying library functions into the executable, and (2) + will operate properly with a modified version of the library, if + the user installs one, as long as the modified version is + interface-compatible with the version that the work was made with. + + c) Accompany the work with a written offer, valid for at least + three years, to give the same user the materials specified in + Subsection 6a, above, for a charge no more than the cost of + performing this distribution. + + d) If distribution of the work is made by offering access to copy + from a designated place, offer equivalent access to copy the above + specified materials from the same place. + + e) Verify that the user has already received a copy of these + materials or that you have already sent this user a copy. + + For an executable, the required form of the "work that uses the +Library" must include any data and utility programs needed for +reproducing the executable from it. However, as a special exception, +the materials to be distributed need not include anything that is +normally distributed (in either source or binary form) with the major +components (compiler, kernel, and so on) of the operating system on +which the executable runs, unless that component itself accompanies +the executable. + + It may happen that this requirement contradicts the license +restrictions of other proprietary libraries that do not normally +accompany the operating system. Such a contradiction means you cannot +use both them and the Library together in an executable that you +distribute. + + 7. You may place library facilities that are a work based on the +Library side-by-side in a single library together with other library +facilities not covered by this License, and distribute such a combined +library, provided that the separate distribution of the work based on +the Library and of the other library facilities is otherwise +permitted, and provided that you do these two things: + + a) Accompany the combined library with a copy of the same work + based on the Library, uncombined with any other library + facilities. This must be distributed under the terms of the + Sections above. + + b) Give prominent notice with the combined library of the fact + that part of it is a work based on the Library, and explaining + where to find the accompanying uncombined form of the same work. + + 8. You may not copy, modify, sublicense, link with, or distribute +the Library except as expressly provided under this License. Any +attempt otherwise to copy, modify, sublicense, link with, or +distribute the Library is void, and will automatically terminate your +rights under this License. However, parties who have received copies, +or rights, from you under this License will not have their licenses +terminated so long as such parties remain in full compliance. + + 9. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Library or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Library (or any work based on the +Library), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Library or works based on it. + + 10. Each time you redistribute the Library (or any work based on the +Library), the recipient automatically receives a license from the +original licensor to copy, distribute, link with or modify the Library +subject to these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties with +this License. + + 11. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Library at all. For example, if a patent +license would not permit royalty-free redistribution of the Library by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Library. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply, and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 12. If the distribution and/or use of the Library is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Library under this License +may add an explicit geographical distribution limitation excluding those +countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 13. The Free Software Foundation may publish revised and/or new +versions of the Lesser General Public License from time to time. +Such new versions will be similar in spirit to the present version, +but may differ in detail to address new problems or concerns. + +Each version is given a distinguishing version number. If the Library +specifies a version number of this License which applies to it and +"any later version", you have the option of following the terms and +conditions either of that version or of any later version published by +the Free Software Foundation. If the Library does not specify a +license version number, you may choose any version ever published by +the Free Software Foundation. + + 14. If you wish to incorporate parts of the Library into other free +programs whose distribution conditions are incompatible with these, +write to the author to ask for permission. For software which is +copyrighted by the Free Software Foundation, write to the Free +Software Foundation; we sometimes make exceptions for this. Our +decision will be guided by the two goals of preserving the free status +of all derivatives of our free software and of promoting the sharing +and reuse of software generally. + + NO WARRANTY + + 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO +WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. +EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR +OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY +KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE +LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME +THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN +WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY +AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU +FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR +CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE +LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING +RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A +FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF +SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH +DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Libraries + + If you develop a new library, and you want it to be of the greatest +possible use to the public, we recommend making it free software that +everyone can redistribute and change. You can do so by permitting +redistribution under these terms (or, alternatively, under the terms +of the ordinary General Public License). + + To apply these terms, attach the following notices to the library. +It is safest to attach them to the start of each source file to most +effectively convey the exclusion of warranty; and each file should +have at least the "copyright" line and a pointer to where the full +notice is found. + + + + Copyright (C) + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +Also add information on how to contact you by electronic and paper mail. + +You should also get your employer (if you work as a programmer) or +your school, if any, to sign a "copyright disclaimer" for the library, +if necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the + library `Frob' (a library for tweaking knobs) written by James + Random Hacker. + + , 1 April 1990 + Ty Coon, President of Vice + +That's all there is to it! + + diff --git a/ChangeLog b/ChangeLog new file mode 100644 index 0000000..fd28b76 --- /dev/null +++ b/ChangeLog @@ -0,0 +1,1622 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +StarPU 1.4.10 +============================================== +Small changes: + * Fix STARPU_NONE value so it can be used in a list of argument and + not misread as the end of arguments marker. + * Fix handle fetching which was done incorrectly when using parallel + tasks + * Add -d option to starpu_env to allow the use of relocatable + libraries + +StarPU 1.4.9 +============================================== +Small changes: + * Support STARPU_WORKERS_GETBIND in the case where only one thread per core is provided by the OS + * When using STARPU_WORKERS_GETBIND, fix counting nusedpus + * Fix each public API file by including starpu.h before the inclusion guard + * Fix make install on systems with a dist-packages python directory + +StarPU 1.4.8 +============================================== +Small features: + * Add png curve generation to starpu_perfmodel_plot + * Add STARPU_MPI_THREAD_MULTIPLE_SEND environment variable to enable parallel + sending with MPI. + * Add starpu_tag_clear + * Add starpu_cublasLt_init/shutdown/get_local_handle helpers. + +StarPU 1.4.7 +============================================== + +Small changes: + * Fix simgrid version of examples/mult + +StarPU 1.4.6 +============================================== + +Small features: + * Add FXT option -use-task-color to propagate the specified task + color to the contexts + +StarPU 1.4.5 +============================================== + * Do not link libstarpu against libnvidia-ml + +StarPU 1.4.4 +============================================== +Small changes: + * Fix build system for StarPU Python interface + +StarPU 1.4.3 +============================================== +Small features: + * Add starpu_data_partition_readonly_downgrade_submit(). + +Small changes: + * StarPUPY no longer requires python modules joblib and cloudpickle + to be mandatory + +StarPU 1.4.2 +============================================== +Small features: + * New functions starpu_mpi_data_cpy() and starpu_mpi_recv_prio() + * New functions starpu_bind_thread_on_worker(), + starpu_bind_thread_on_main(), starpu_bind_thread_on_cpu(), + and starpu_cpu_os_index() + * New macro STARPU_CUSOLVER_REPORT_ERROR + +StarPU 1.4.1 +============================================== + +Small features: + * Add starpu_mpi_tags_{allocate,free} functions to manage mpi data + tags in distributed memory. + +Changes: + * Fix StarPUPY when not using asyncio: we can use concurrent.futures + instead. + * Add STARPU_CODELET_PROFILING environment variable to disable codelet task + counting for applications to be able to have const codelets. + * In performance bounds, take into account the standard deviation to get the + "expected" upper bound, in terms of expected optimistic deviation from the + average, rather than the average. + +Small changes: + * Fix function starpu_mpi_wait_for_all() + * Fix building atomic functions with llvm on 32bit systems. + * SOCL: Fix missing CL_CALLBACK for various callback functions + * Update prologue function names for parallel workers + +StarPU 1.4.0 +============================================== + +New features: + * Add a starpu_mpi_task_submit-oriented way of submitting MPI tasks + with functions starpu_mpi_task_exchange_data_before_execution() and + starpu_mpi_task_exchange_data_after_execution() + * Possibility to specify different directories to store performance + model files with new variable STARPU_PERF_MODEL_PATH + * Checkpoint mechanism for MPI applications + * Transaction support + * OpenMP LLVM support + * Driver for HIP-based GPUs. + * Fault tolerance support with starpu_task_ft_failed(). + * Julia programming interface. + * Add get_max_size method to data interfaces for applications using data with + variable size to express their maximal potential size. + * New offline tool to draw graph showing elapsed time between sent + or received data and their use by tasks + * Add 4D tensor data interface. + * New sched_tasks.rec trace file which monitors task scheduling push/pop actions + * New STARPU_MPI_MEM_THROTTLE environment variable to throttle mpi + submission according to memory use. + * New number_events.data trace file which monitors number of events in trace + files. This file can be parsed by the new script + starpu_fxt_number_events_to_names.py to convert event keys to event names. + * New STARPU_PER_WORKER perfmodel. + * Add energy accounting in the simgrid mode: starpu_energy_use() and + starpu_energy_used(). + * New function starpu_mpi_get_thread_cpuid() to know where is bound the MPI + thread. + * New function starpu_get_pu_os_index() to convert logical index of a PU to + its OS index. + * New function starpu_get_hwloc_topology() to get the hwloc topology used by + StarPU. + * Add a task prefetch level, to improve retaining data in accelerators so we + can make prefetch more aggressive. + * Add starpu_data_dup_ro(). + * Add starpu_data_release_to() and starpu_data_release_to_on_node(). + * Add profiling based on papi performance counters. + * Add an experimental python interface (not actually parallel yet) + * Add task submission file+line in traces. + * Add papi- and nvml-based energy measurement. + * Add starpu_mpi_datatype_node_register and + starpu_mpi_interface_datatype_node_register which will be needed for + MPI/NUMA/GPUDirect. + * Add peek_data interface method. + * Add support of dynamic broadcasts when StarPU-MPI is used with + NewMadeleine. + * New STARPU_MPI_RECV_WAIT_FINALIZE environment variable to wait + communication library completely releases the handle to unlock tasks + (instead of just releasing the write lock). Only for NewMadeleine. + * Add STARPU_MPI_REDUX + * New StarPU Java Bindings + * Add starpu_data_query_status2 function. + * Add starpu_data_evict_from_node function. + * Add a StarPU Eclipse Plugin + * Add support for Maxeler FPGA accelerators. + * Add 4D tensors filters. + * Add n-dimension data interface and filters. + * New STARPU_FXT_EVENTS environment variable to select at runtime which + event categories has to be recorded. + * Add support of mpi_sync_clocks for more precise distributed traces. + * Add more worker states in STARPU_PROFILING: callback, waiting, scheduling. + * Support for hierarchical tasks + * Support mapping memory between CPU RAM and GPU RAM, instead of copying + data. + * New function starpu_get_memory_location_bitmap() and register in traces on + which NUMA node are buffers used for MPI or tasks. + * TCP/IP-based master-slave support. + * Set STARPU_WORKERS_GETBIND to 1 by default, to inherit CPU binding from + the job scheduler. + * Add starpu_{vector,matrix,block,tensor,ndim}_filter_pick_variable. + * New operator for data interfaces pack_meta(), unpack_meta() and + free_meta() which are used in master slave mode for data + interfaces with a dynamic content. + * Add CUSOLVER support. + * Add STARPU_NOFOOTPRINT data access flag. + +Small features: + * New function starpu_mpi_comm_register() to store the size and the + rank of the given communicator (update functions + starpu_mpi_comm_rank() and starpu_mpi_comm_size() to no longer + call directly the mpi functions) + * New configure option --with-check-cflags to define flags for C, + CXX and Fortran compilers + * FxT is now automatically enabled at build-time, but not enabled at + run-time by default any more, STARPU_FXT_TRACE needs to be explicitly set to + 1 to enable FxT trace recording. + * Deprecate starpu_free() and add new function starpu_free_noflag() + to specify allocated size. + * Reuse matrix tiles that have different shapes but same allocation size. + * Add starpu_task_create_sync + * Add ram_colind/rowptr to csr and bcsr data interfaces. This allows to make + starpu_bcsr_filter_vertical_block work on several memory nodes. + * Add cuda0 and cuda1 example drivers. + * New STARPU_EXPECTED_TRANSFER_TIME_WRITEBACK environment variable to tune + transfer estimation times. + * Add tool starpu_config to display the configuration StarPU was + compiled with + * Possibility to enable data locality enforcement when choosing a + worker to run a task implementation + * New function starpu_data_partition_clean_node() to specify node on + which to gather data + * Move to the public API some scheduler utility functions + * New variable STARPU_SCHED_LIB to dynamically load a new scheduling + policy + * Enable GPUDirect when MPI supports it. + * Install a module file in lib/modules + * New function starpu_worker_wait_for_initialisation() which waits + for all workers to be initialised + * Add in the public API the codelet starpu_codelet_nop which has an + empty function defined for all drivers + * Add starpu_task_expected_length_average and + starpu_task_expected_energy_average. + * Add STARPU_SIMGRID_TASK_PUSH_COST environment variable. + * Add starpu_memory_nodes_get_count_by_kind and + starpu_memory_node_get_ids_by_type. + * Add STARPU_MPI_REDUX_ARITY_THRESHOLD to tune the type of tree used in + distributed-memory reduction patterns that are automatically detected. + * New function starpu_data_set_reduction_methods_with_args() to + specify arguments to pass to the reduction and init tasks + +Changes: + * The redux codelet should expose the STARPU_COMMUTE flag, since StarPU + actually uses commutability. + * Rename STARPU_COMM_STATS environment variable to STARPU_MPI_STATS + * Function starpu_data_lookup has been removed, it is now up to the + calling code to manage a ptr-to-handle reverse lookup table when + needed. + * Cluster is renamed in parallel worker but keep the old API as + deprecated + * Removed pop_every_task scheduler method, unused since long. + +Small changes: + * starpu_mpi_task_insert() returns -ENODEV if no worker is available + on the node which is to execute the codelet (the other nodes do + not return -ENODEV) + * Add a synthetic energy efficiency testcase. + * Make reduction methods want the commute flag. + * Delete old MIC driver code + * Rename + - starpu_conf::sched_policy_init to starpu_conf::sched_policy_callback + and + - starpu_sched_ctx_get_sched_policy_init() to starpu_sched_ctx_get_sched_policy_callback() + as the callback function may not only be used for init purposes + * Change the default value for configure option --enable-maxcpus to + auto. it allows StarPU to automatically use the number of CPUs + on the build machine. + * New option --worker for tool starpu_machine_display to only + display workers of a specific type + * Remove the unused and untested mpi_ms_funcs field. + * The home_node parameter of the register_data_handle method is turned from + unsigned to int, to explicit that it may be -1. + * Value 0 for STARPU_MPI_NDETACHED_SEND and STARPU_MPI_NREADY_PROCESS will + now disable their behaviour. + * Distributed-memory reduction patterns are automatically wrapped-up if the user + do not call starpu_mpi_redux_data() + * Remove starpu_data_pointer_is_inside(). + +StarPU 1.3.12 +==================================================================== + +Small changes: + * Add starpu_data_deinitialize and starpu_data_deinitialize_submit + +StarPU 1.3.11 +==================================================================== + +Small changes: + * Fix building with cuda 12 + +StarPU 1.3.10 +==================================================================== + +Small features: + * Add starpu_worker_get_current_task_exp_end. + +Small changes: + * Change the default value for configure option --enable-maxcpus to + auto. it allows StarPU to automatically use the number of CPUs + on the build machine. + +StarPU 1.3.9 +==================================================================== + +Small changes: + * Add missing interface macros for BCSR data interface + +StarPU 1.3.8 +==================================================================== + +Small features: + * A codelet can now define a callback function pointer which will be + automatically called when the task does not define itself a + callback function, in that case, it can still be called from the + task callback function. + * New STARPU_WORKERS_COREID, STARPU_MAIN_THREAD_COREID and + STARPU_MPI_THREAD_COREID environment variables to bind threads to cores + instead of hyperthreads. + * New STARPU_TASK_PROGRESS environment variable to show task progression. + * Add STARPU_SIMGRID environment variable guard against native builds. + * Add starpu_cuda_get_nvmldev function. + * New configure option --with-check-cflags to define flags for C, + CXX and Fortran compilers + * Add starpu_sched_tree_deinitialize function. + * Add STARPU_SCHED_SORTED_ABOVE and STARPU_SCHED_SORTED_BELOW environment + variables. + * Add STARPU_SCHED_SIMPLE_PRE_DECISION. + * Add starpu_bcsr_filter_canonical_block_get_nchildren. + * Add unregister_data_handle handle ops. + +StarPU 1.3.7 +==================================================================== + +Small changes: + * Simgrid: bug fix for setting network/weight-S to 0.0 + +StarPU 1.3.6 (git revision fb9fbed81410d9f0ebbff5bdad1352df4705efe8) +==================================================================== + +Small features: + * New STARPU_BACKOFF_MIN and STARPU_BACKOFF_MAX environment variables to the + exponential backoff limits of the number of cycles to pause while drivers + are spinning. + * Add STARPU_DISPLAY_BINDINGS environment variable and + starpu_display_bindings() function to display all bindings on the machine by + calling hwloc-ps + * New function starpu_get_pu_os_index() to convert logical index of a PU to + its OS index. + * New function starpu_get_hwloc_topology() to get the hwloc topology used by + StarPU. + +StarPU 1.3.5 (git revision 5f7458799f548026fab357b18541bb462dde2b53) +==================================================================== + +Small features: + * New environment variable STARPU_FXT_SUFFIX to set the filename in + which to save the fxt trace + * New option -d for starpu_fxt_tool to specify in which directory to + generate files + +Small changes: + * Move MPI cache functions into the public API + * Add STARPU_MPI_NOBIND environment variable. + +StarPU 1.3.4 (git revision c37a5d024cd997596da41f765557c58099baf896) +==================================================================== + +Small features: + * New environment variables STARPU_BUS_STATS_FILE and + STARPU_WORKER_STATS_FILE to specify files in which to display + statistics about data transfers and workers. + * Add starpu_bcsr_filter_vertical_block filtering function. + * Add starpu_interface_copy2d, 3d, and 4d to easily request data copies from + data interfaces. + * Move optimized cuda 2d copy from interfaces to new + starpu_cuda_copy2d_async_sync and starpu_cuda_copy3d_async_sync, and use + them from starpu_interface_copy2d and 3d. + * New function starpu_task_watchdog_set_hook to specify a function + to be called when the watchdog is raised + * Add STARPU_LIMIT_CPU_NUMA_MEM environment variable. + * Add STARPU_WORKERS_GETBIND environment variable. + * Add STARPU_SCHED_SIMPLE_DECIDE_ALWAYS modular scheduler flag. + * And STARPU_LIMIT_BANDWIDTH environment variable. + * Add field starpu_conf::precedence_over_environment_variables to ignore + environment variables when parameters are set directly in starpu_conf + * Add starpu_data_get_coordinates_array + * MPI: new functions starpu_mpi_interface_datatype_register() and + starpu_mpi_interface_datatype_unregister() which take a enum + starpu_data_interface_id instead of a starpu_data_handle_t + * New script starpu_env to set up StarPU environment variables + +Small changes: + * New configure option --disable-build-doc-pdf + +StarPU 1.3.3 (git revision 11afc5b007fe1ab1c729b55b47a5a98ef7f3cfad) +==================================================================== + +New features: + * New semantic for starpu_task_insert() and alike parameters + STARPU_CALLBACK_ARG, STARPU_PROLOGUE_CALLBACK_ARG, and + STARPU_PROLOGUE_CALLBACK_POP_ARG which set respectively + starpu_task::callback_arg_free, + starpu_task::prologue_callback_arg_free and + starpu_task::prologue_callback_pop_arg_free to 1 when used. + New parameters STARPU_CALLBACK_ARG_NFREE, + STARPU_CALLBACK_WITH_ARG_NFREE, STARPU_PROLOGUE_CALLBACK_ARG_NFREE, and + STARPU_PROLOGUE_CALLBACK_POP_ARG_NFREE which set the corresponding + fields of starpu_task to 0. + * starpufft: Support 3D. + * New modular-eager-prio scheduler. + * Add 'ready' heuristic to modular schedulers. + * New modular-heteroprio scheduler. + * Add STARPU_TASK_SCHED_DATA + * Add support for staging schedulers. + * New modular-heteroprio-heft scheduler. + * New dmdap "data-aware performance model (priority)" scheduler + +Changes: + * Modification in the Native Fortran interface of the functions + fstarpu_mpi_task_insert, fstarpu_mpi_task_build and + fstarpu_mpi_task_post_build to only take 1 parameter being the MPI + communicator, the codelet and the various parameters for the task. + +Small features: + * New starpu_task_insert() and alike parameter STARPU_TASK_WORKERIDS + allowing to set the fields starpu_task::workerids_len and + starpu_task::workerids + * New starpu_task_insert() and alike parameters + STARPU_SEQUENTIAL_CONSISTENCY, STARPU_TASK_NO_SUBMITORDER and + STARPU_TASK_PROFILING_INFO + * New function starpu_create_callback_task() which creates and + submits an empty task with the specified callback + * Use the S4U interface of Simgrid instead of xbt and MSG. + +Small changes: + * Default modular worker queues to 2 tasks unless it's an heft + scheduler + * Separate out STATUS_SLEEPING_SCHEDULING state from + STATUS_SLEEPING state + When running the scheduler while being idle, workers do not go in + the STATUS_SCHEDULING state, so that that time is considered as + idle time instead of overhead. + +StarPU 1.3.2 (git revision af22a20fc00a37addf3cc6506305f89feed940b0) +==================================================================== + +Small changes: + * Improve OpenMP support to detect the environment is valid before + launching OpenMP + * Delete old code (drivers gordon, scc, starpu-top, and plugin gcc) + and update authors file accordingly + * Add Heteroprio documentation (including a simple example) + * Add a progression hook, to be called when workers are idle, which + is used in the NewMadeleine implementation of StarPU-MPI to ensure + communications progress. + +StarPU 1.3.1 (git revision 01949488b4f8e6fe26d2c200293b8aae5876b038) +==================================================================== + +Small features: + * Add starpu_filter_nparts_compute_chunk_size_and_offset helper. + * Add starpu_bcsr_filter_canonical_block_child_ops. + +Small changes: + * Improve detection of NVML availability. Do not only check the + library is available, also check the compiled code can be run. + +StarPU 1.3.0 (git revision 24ca83c6dbb102e1cfc41db3bb21c49662067062) +==================================================================== + +New features: + * New scheduler 'heteroprio' with heterogeneous priorities + * Support priorities for data transfers. + * Add support for multiple linear regression performance models + - Bump performance model file format version to 45. + * Add MPI Master-Slave support to use the cores of remote nodes. Use the + --enable-mpi-master-slave option to activate it. + * Add STARPU_CUDA_THREAD_PER_DEV environment variable to support driving all + GPUs from only one thread when almost all kernels are asynchronous. + * Add starpu_replay tool to replay tasks.rec files with Simgrid. + * Add experimental support of NUMA nodes. Use STARPU_USE_NUMA to activate it. + * Add a new set of functions to make Out-of-Core based on HDF5 Library. + * Add a new implementation of StarPU-MPI on top of NewMadeleine + * Add optional callbacks to notify an external resource manager + about workers going to sleep and waking up + * Add implicit support for asynchronous partition planning. This means one + does not need to call starpu_data_partition_submit() etc. explicitly any + more, StarPU will make the appropriate calls as needed. + * Add starpu_task_notify_ready_soon_register() to be notified when it is + determined when a task will be ready an estimated amount of time from now. + * New StarPU-MPI initialization function (starpu_mpi_init_conf()) + which allows StarPU-MPI to manage reserving a core for the MPI thread, or + merging it with CPU driver 0. + * Add possibility to delay the termination of a task with the + functions starpu_task_end_dep_add() which specifies the number of + calls to the function starpu_task_end_dep_release() needed to + trigger the task termination, or with starpu_task_declare_end_deps_array() + and starpu_task_declare_end_deps() to just declare termination dependencies + between tasks. + * Add possibility to define the sequential consistency at the task level + for each handle used by the task. + * Add STARPU_SPECIFIC_NODE_LOCAL, STARPU_SPECIFIC_NODE_CPU, and + STARPU_SPECIFIC_NODE_SLOW as generic values for codelet specific memory + nodes which can be used instead of exact node numbers. + * Add starpu_get_next_bindid() and starpu_bind_thread_on() to allow + binding an application-started thread on a free core. Use it in + StarPU-MPI to automatically bind the MPI thread on an available core. + * Add STARPU_RESERVE_NCPU environment variable and + starpu_config::reserve_ncpus field to make StarPU use a few cores + less. + * Add STARPU_MAIN_THREAD_BIND environment variable to make StarPU reserve a + core for the main thread. + * New StarPU-RM resource management module to share processor cores and + accelerator devices with other parallel runtime systems. Use + --enable-starpurm option to activate it. + * New schedulers modular-gemm, modular-pheft, modular-prandom and + modular-prandom-prio + * Add STARPU_MATRIX_SET_NX/NY/LD and STARPU_VECTOR_SET_NX to change a matrix + tile or vector size without reallocating the buffer. + * Application can change the allocation used by StarPU with + starpu_malloc_set_hooks() + * XML output for starpu_perfmodel_display and starpu_perfmodel_dump_xml() + function + +Small features: + * Scheduling contexts may now be associated a user data pointer at creation + time, that can later be recalled through starpu_sched_ctx_get_user_data(). + * New environment variables STARPU_SIMGRID_TASK_SUBMIT_COST and + STARPU_SIMGRID_FETCHING_INPUT_COST to simulate the cost of task + submission and data fetching in simgrid mode. + This provides more accurate simgrid predictions, especially for the + beginning of the execution and regarding data transfers. + * New environment variable STARPU_SIMGRID_SCHED_COST to take into + account the time to perform scheduling when running in SimGrid mode. + * New configure option --enable-mpi-pedantic-isend (disabled by + default) to acquire data in STARPU_RW (instead of STARPU_R) before + performing MPI_Isend() call + * New function starpu_worker_display_names() to display the names of + all the workers of a specified type. + * Arbiters now support concurrent read access. + * Add a field starpu_task::where similar to starpu_codelet::where + which allows to restrict where to execute a task. Also add + STARPU_TASK_WHERE to be used when calling starpu_task_insert(). + * Add SubmitOrder trace field. + * Add workerids and workerids_len task fields. + * Add priority management to StarPU-MPI. Can be disabled with + the STARPU_MPI_PRIORITIES environment variable. + * Add STARPU_MAIN_THREAD_CPUID and STARPU_MPI_THREAD_CPUID environment + variables. + * Add disk to disk copy functions and support asynchronous full read/write + in disk backends. + * New starpu_task_insert() parameter STARPU_CL_ARGS_NFREE which allows + to set codelet parameters but without freeing them. + * New starpu_task_insert() parameter STARPU_TASK_DEPS_ARRAY which + allows to declare task dependencies similarly to + starpu_task_declare_deps_array() + * Add dependency backward information in debugging mode for gdb's + starpu-print-task + * Add sched_data field in starpu_task structure. + * New starpu_fxt_tool option -label-deps to label dependencies on + the output graph + * New environment variable STARPU_GENERATE_TRACE_OPTIONS to specify + fxt options (to be used with STARPU_GENERATE_TRACE) + * New function starpu_task_set() similar as starpu_task_build() but + with a task object given as the first parameter + * New functions + starpu_data_partition_submit_sequential_consistency() and + starpu_data_unpartition_submit_sequential_consistency() + * Add a new value STARPU_TASK_SYNCHRONOUS to be used in + starpu_task_insert() to define if the task is (or not) synchronous + * Add memory states events in the traces. + * Add starpu_sched_component_estimated_end_min_add() to fix termination + estimations in modular schedulers. + * New function starpu_data_partition_not_automatic() to disable the + automatic partitioning of a data handle for which a asynchronous + plan has previously been submitted + * Add starpu_task_declare_deps() + * New function starpu_data_unpartition_submit_sequential_consistency_cb() + to specify a callback for the task submitting the unpartitioning + * New tool starpu_mpi_comm_trace.py to draw heatmap of MPI + communications + * Support for ARM performance libraries + * Add functionality to disable signal catching either through field + starpu_conf::catch_signals or through the environment variable + STARPU_CATCH_SIGNALS + * Support for OpenMP Taskloop directive + * Optional data interface init function (used by the vector and + matrix interfaces) + +Changes: + * Vastly improve simgrid simulation time. + * Switch default scheduler to lws. + * Add "to" parameter to pull_task and can_push methods of + components. + * Deprecate starpu_data_interface_ops::handle_to_pointer interface + operation in favor of new starpu_data_interface_ops::to_pointer + operation. + * Sort data access requests by priority. + * Cluster support is disabled by default, unless the configure + option --enable-cluster is specified + * For unpack operations, move the memory deallocation from + starpu_data_unpack() to the interface function + starpu_data_interface_ops::unpack_data(). Pack and unpack + functions of predefined interfaces + use public API starpu_malloc_on_node_flags() and + starpu_free_on_node_flags() to allocate and de-allocate memory + +Small changes: + * Use asynchronous transfers for task data fetches with were not prefetched. + * Allow to call starpu_sched_ctx_set_policy_data on the main + scheduler context + * Function starpu_is_initialized() is moved to the public API. + * Fix code to allow to submit tasks to empty contexts + * STARPU_COMM_STATS also displays the bandwidth + * Update data interfaces implementations to only use public API + +StarPU 1.2.11 (git revision xxx) +==================================================================== + +Small features: + * Add starpu_tag_notify_restart_from_apps(). + +StarPU 1.2.10 (git revision beb6ac9cc07dc9ae1c838a38d11ed2dae3775996) +==================================================================== + +Small features: + * New script starpu_env to set up StarPU environment variables + * New configure option --disable-build-doc-pdf + +StarPU 1.2.9 (git revision 3aca8da3138a99e93d7f93905d2543bd6f1ea1df) +==================================================================== + +Small changes: + * Add STARPU_SIMGRID_TRANSFER_COST environment variable to easily disable + data transfer costs. + * New dmdap "data-aware performance model (priority)" scheduler + * Modification in the Native Fortran interface of the functions + fstarpu_mpi_task_insert, fstarpu_mpi_task_build and + fstarpu_mpi_task_post_build to only take 1 parameter being the MPI + communicator, the codelet and the various parameters for the task. + +StarPU 1.2.8 (git revision f66374c9ad39aefb7cf5dfc31f9ab3d756bcdc3c) +==================================================================== + +Small features: + * Minor fixes + +StarPU 1.2.7 (git revision 07cb7533c22958a76351bec002955f0e2818c530) +==================================================================== + +Small features: + * Add STARPU_HWLOC_INPUT environment variable to save initialization time. + * Add starpu_data_set/get_ooc_flag. + * Use starpu_mpi_tag_t (int64_t) for MPI communication tag + +StarPU 1.2.6 (git revision 23049adea01837479f309a75c002dacd16eb34ad) +==================================================================== + +Small changes: + * Fix crash for lws scheduler + * Avoid making hwloc load PCI topology when CUDA is not enabled + +StarPU 1.2.5 (git revision 22f32916916d158e3420033aa160854d1dd341bd) +==================================================================== + +Small features: + * Add a new value STARPU_TASK_COLOR to be used in + starpu_task_insert() to pick up the color of a task in dag.dot + * Add starpu_data_pointer_is_inside(). + +Changes: + * Do not export -lcuda -lcudart -lOpenCL in *starpu*.pc. + +StarPU 1.2.4 (git revision 255cf98175ef462749780f30bfed21452b74b594) +==================================================================== + +Small features: + * Catch of signals SIGINT and SIGSEGV to dump fxt trace files. + * New configure option --disable-icc to disable the compilation of + specific ICC examples + * Add starpu_codelet_pack_arg_init, starpu_codelet_pack_arg, + starpu_codelet_pack_arg_fini for more fine-grain packing capabilities. + * Add starpu_task_insert_data_make_room, + starpu_task_insert_data_process_arg, + starpu_task_insert_data_process_array_arg, + starpu_task_insert_data_process_mode_array_arg + * Do not show internal tasks in fxt dag by default. Allow to hide + acquisitions too. + * Add a way to choose the dag.dot colors. + + +StarPU 1.2.3 (git revision 586ba6452a8eef99f275c891ce08933ae542c6c2) +==================================================================== + +New features: + * Add per-node MPI data. + +Small features: + * When debug is enabled, starpu data accessors first check the + validity of the data interface type + * Print disk bus performances when STARPU_BUS_STATS is set + * Add starpu_vector_filter_list_long filter. + * Data interfaces now define a name through the struct starpu_data_interface_ops + * StarPU-MPI : + - allow predefined data interface not to define a mpi datatype and + to be exchanged through pack/unpack operations + - New function starpu_mpi_comm_get_attr() which allows to return + the value of the attribute STARPU_MPI_TAG_UB, i.e the upper + bound for tag value. + - New configure option enable-mpi-verbose to manage the display of + extra MPI debug messages. + * Add STARPU_WATCHDOG_DELAY environment variable. + * Add a 'waiting' worker status + * Allow new value 'extra' for configure option --enable-verbose + +Small changes: + * Add data_unregister event in traces + * StarPU-MPI + - push detached requests at the back of the testing list, so they + are tested last since they will most probably finish latest + * Automatically initialize handles on data acquisition when + reduction methods are provided, and make sure a handle is + initialized before trying to read it. + +StarPU 1.2.2 (git revision a0b01437b7b91f33fb3ca36bdea35271cad34464) +=================================================================== + +New features: + * Add starpu_data_acquire_try and starpu_data_acquire_on_node_try. + * Add NVCC_CC environment variable. + * Add -no-flops and -no-events options to starpu_fxt_tool to make + traces lighter + * Add starpu_cusparse_init/shutdown/get_local_handle for proper CUDA + overlapping with cusparse. + * Allow precise debugging by setting STARPU_TASK_BREAK_ON_PUSH, + STARPU_TASK_BREAK_ON_SCHED, STARPU_TASK_BREAK_ON_POP, and + STARPU_TASK_BREAK_ON_EXEC environment variables, with the job_id + of a task. StarPU will raise SIGTRAP when the task is being + scheduled, pushed, or popped by the scheduler. + +Small features: + * New function starpu_worker_get_job_id(struct starpu_task *task) + which returns the job identifier for a given task + * Show package/numa topology in starpu_machine_display + * MPI: Add mpi communications in dag.dot + * Add STARPU_PERF_MODEL_HOMOGENEOUS_CPU environment variable to + allow having one perfmodel per CPU core + * Add starpu_perfmodel_arch_comb_fetch function. + * Add starpu_mpi_get_data_on_all_nodes_detached function. + +Small changes: + * Output generated through STARPU_MPI_COMM has been modified to + allow easier automated checking + * MPI: Fix reactivity of the beginning of the application, when a + lot of ready requests have to be processed at the same time, we + want to poll the pending requests from time to time. + * MPI: Fix gantt chart for starpu_mpi_irecv: it should use the + termination time of the request, not the submission time. + * MPI: Modify output generated through STARPU_MPI_COMM to allow + easier automated checking + * MPI: enable more tests in simgrid mode + * Use assumed-size instead of assumed-shape arrays for native + fortran API, for better backward compatibility. + * Fix odd ordering of CPU workers on CPUs due to GPUs stealing some + cores + +StarPU 1.2.1 (git revision 473acaec8a1fb4f4c73d8b868e4f044b736b41ea) +==================================================================== + +New features: + * Add starpu_fxt_trace_user_event_string. + * Add starpu_tasks_rec_complete tool to add estimation times in tasks.rec + files. + * Add STARPU_FXT_TRACE environment variable. + * Add starpu_data_set_user_data and starpu_data_get_user_data. + * Add STARPU_MPI_FAKE_SIZE and STARPU_MPI_FAKE_RANK to allow simulating + execution of just one MPI node. + * Add STARPU_PERF_MODEL_HOMOGENEOUS_CUDA/OPENCL/MIC/SCC to share performance + models between devices, making calibration much faster. + * Add modular-heft-prio scheduler. + * Add starpu_cublas_get_local_handle helper. + * Add starpu_data_set_name, starpu_data_set_coordinates_array, and + starpu_data_set_coordinates to describe data, and starpu_iteration_push and + starpu_iteration_pop to describe tasks, for better offline traces analysis. + * New function starpu_bus_print_filenames() to display filenames + storing bandwidth/affinity/latency information, available through + tools/starpu_machine_display -i + * Add support for Ayudame version 2.x debugging library. + * Add starpu_sched_ctx_get_workers_list_raw, much less costly than + starpu_sched_ctx_get_workers_list + * Add starpu_task_get_name and use it to warn about dmda etc. using + a dumb policy when calibration is not finished + * MPI: Add functions to test for cached values + +Changes: + * Fix performance regression of lws for small tasks. + * Improve native Fortran support for StarPU + +Small changes: + * Fix type of data home node to allow users to pass -1 to define + temporary data + * Fix compatibility with simgrid 3.14 + +StarPU 1.2.0 (git revision 5a86e9b61cd01b7797e18956283cc6ea22adfe11) +==================================================================== + +New features: + * MIC Xeon Phi support + * SCC support + * New function starpu_sched_ctx_exec_parallel_code to execute a + parallel code on the workers of the given scheduler context + * MPI: + - New internal communication system : a unique tag called + is now used for all communications, and a system + of hashmaps on each node which stores pending receives has been + implemented. Every message is now coupled with an envelope, sent + before the corresponding data, which allows the receiver to + allocate data correctly, and to submit the matching receive of + the envelope. + - New function + starpu_mpi_irecv_detached_sequential_consistency which + allows to enable or disable the sequential consistency for + the given data handle (sequential consistency will be + enabled or disabled based on the value of the function + parameter and the value of the sequential consistency + defined for the given data) + - New functions starpu_mpi_task_build() and + starpu_mpi_task_post_build() + - New flag STARPU_NODE_SELECTION_POLICY to specify a policy for + selecting a node to execute the codelet when several nodes + own data in W mode. + - New selection node policies can be un/registered with the + functions starpu_mpi_node_selection_register_policy() and + starpu_mpi_node_selection_unregister_policy() + - New environment variable STARPU_MPI_COMM which enables + basic tracing of communications. + - New function starpu_mpi_init_comm() which allows to specify + a MPI communicator. + * New STARPU_COMMUTE flag which can be passed along STARPU_W or STARPU_RW to + let starpu commute write accesses. + * Out-of-core support, through registration of disk areas as additional memory + nodes. It can be enabled programmatically or through the STARPU_DISK_SWAP* + environment variables. + * Reclaiming is now periodically done before memory becomes full. This can + be controlled through the STARPU_*_AVAILABLE_MEM environment variables. + * New hierarchical schedulers which allow the user to easily build + its own scheduler, by coding itself each "box" it wants, or by + combining existing boxes in StarPU to build it. Hierarchical + schedulers have very interesting scalability properties. + * Add STARPU_CUDA_ASYNC and STARPU_OPENCL_ASYNC flags to allow asynchronous + CUDA and OpenCL kernel execution. + * Add STARPU_CUDA_PIPELINE and STARPU_OPENCL_PIPELINE to specify how + many asynchronous tasks are submitted in advance on CUDA and + OpenCL devices. Setting the value to 0 forces a synchronous + execution of all tasks. + * Add CUDA concurrent kernel execution support through + the STARPU_NWORKER_PER_CUDA environment variable. + * Add CUDA and OpenCL kernel submission pipelining, to overlap costs and allow + concurrent kernel execution on Fermi cards. + * New locality work stealing scheduler (lws). + * Add STARPU_VARIABLE_NBUFFERS to be set in cl.nbuffers, and nbuffers and + modes field to the task structure, which permit to define codelets taking a + variable number of data. + * Add support for implementing OpenMP runtimes on top of StarPU + * New performance model format to better represent parallel tasks. + Used to provide estimations for the execution times of the + parallel tasks on scheduling contexts or combined workers. + * starpu_data_idle_prefetch_on_node and + starpu_idle_prefetch_task_input_on_node allow to queue prefetches to be done + only when the bus is idle. + * Make starpu_data_prefetch_on_node not forcibly flush data out, introduce + starpu_data_fetch_on_node for that. + * Add data access arbiters, to improve parallelism of concurrent data + accesses, notably with STARPU_COMMUTE. + * Anticipative writeback, to flush dirty data asynchronously before the + GPU device is full. Disabled by default. Use STARPU_MINIMUM_CLEAN_BUFFERS + and STARPU_TARGET_CLEAN_BUFFERS to enable it. + * Add starpu_data_wont_use to advise that a piece of data will not be used + in the close future. + * Enable anticipative writeback by default. + * New scheduler 'dmdasd' that considers priority when deciding on + which worker to schedule + * Add the capability to define specific MPI datatypes for + StarPU user-defined interfaces. + * Add tasks.rec trace output to make scheduling analysis easier. + * Add Fortran 90 module and example using it + * New StarPU-MPI gdb debug functions + * Generate animated html trace of modular schedulers. + * Add asynchronous partition planning. It only supports coherency through + the home node of data for now. + * Add STARPU_MALLOC_SIMULATION_FOLDED flag to save memory when simulating. + * Include application threads in the trace. + * Add starpu_task_get_task_scheduled_succs to get successors of a task. + * Add graph inspection facility for schedulers. + * New STARPU_LOCALITY flag to mark data which should be taken into account + by schedulers for improving locality. + * Experimental support for data locality in ws and lws. + * Add a preliminary framework for native Fortran support for StarPU + +Small features: + * Tasks can now have a name (via the field const char *name of + struct starpu_task) + * New functions starpu_data_acquire_cb_sequential_consistency() and + starpu_data_acquire_on_node_cb_sequential_consistency() which allows + to enable or disable sequential consistency + * New configure option --enable-fxt-lock which enables additional + trace events focused on locks behaviour during the execution + * Functions starpu_insert_task and starpu_mpi_insert_task are + renamed in starpu_task_insert and starpu_mpi_task_insert. Old + names are kept to avoid breaking old codes. + * New configure option --enable-calibration-heuristic which allows + the user to set the maximum authorized deviation of the + history-based calibrator. + * Allow application to provide the task footprint itself. + * New function starpu_sched_ctx_display_workers() to display worker + information belonging to a given scheduler context + * The option --enable-verbose can be called with + --enable-verbose=extra to increase the verbosity + * Add codelet size, footprint and tag id in the paje trace. + * Add STARPU_TAG_ONLY, to specify a tag for traces without making StarPU + manage the tag. + * On Linux x86, spinlocks now block after a hundred tries. This avoids + typical 10ms pauses when the application thread tries to submit tasks. + * New function char *starpu_worker_get_type_as_string(enum starpu_worker_archtype type) + * Improve static scheduling by adding support for specifying the task + execution order. + * Add starpu_worker_can_execute_task_impl and + starpu_worker_can_execute_task_first_impl to optimize getting the + working implementations + * Add STARPU_MALLOC_NORECLAIM flag to allocate without running a reclaim if + the node is out of memory. + * New flag STARPU_DATA_MODE_ARRAY for the function family + starpu_task_insert to allow to define a array of data handles + along with their access modes. + * New configure option --enable-new-check to enable new testcases + which are known to fail + * Add starpu_memory_allocate and _deallocate to let the application declare + its own allocation to the reclaiming engine. + * Add STARPU_SIMGRID_CUDA_MALLOC_COST and STARPU_SIMGRID_CUDA_QUEUE_COST to + disable CUDA costs simulation in simgrid mode. + * Add starpu_task_get_task_succs to get the list of children of a given + task. + * Add starpu_malloc_on_node_flags, starpu_free_on_node_flags, and + starpu_malloc_on_node_set_default_flags to control the allocation flags + used for allocations done by starpu. + * Ranges can be provided in STARPU_WORKERS_CPUID + * Add starpu_fxt_autostart_profiling to be able to avoid autostart. + * Add arch_cost_function perfmodel function field. + * Add STARPU_TASK_BREAK_ON_SCHED, STARPU_TASK_BREAK_ON_PUSH, and + STARPU_TASK_BREAK_ON_POP environment variables to debug schedulers. + * Add starpu_sched_display tool. + * Add starpu_memory_pin and starpu_memory_unpin to pin memory allocated + another way than starpu_malloc. + * Add STARPU_NOWHERE to create synchronization tasks with data. + * Document how to switch between different views of the same data. + * Add STARPU_NAME to specify a task name from a starpu_task_insert call. + * Add configure option to disable fortran --disable-fortran + * Add configure option to give path for smpirun executable --with-smpirun + * Add configure option to disable the build of tests --disable-build-tests + * Add starpu-all-tasks debugging support + * New function + void starpu_opencl_load_program_source_malloc(const char *source_file_name, char **located_file_name, char **located_dir_name, char **opencl_program_source) + which allocates the pointers located_file_name, located_dir_name + and opencl_program_source. + * Add submit_hook and do_schedule scheduler methods. + * Add starpu_sleep. + * Add starpu_task_list_ismember. + * Add _starpu_fifo_pop_this_task. + * Add STARPU_MAX_MEMORY_USE environment variable. + * Add starpu_worker_get_id_check(). + * New function starpu_mpi_wait_for_all(MPI_Comm comm) that allows to + wait until all StarPU tasks and communications for the given + communicator are completed. + * New function starpu_codelet_unpack_args_and_copyleft() which + allows to copy in a new buffer values which have not been unpacked by + the current call + * Add STARPU_CODELET_SIMGRID_EXECUTE flag. + * Add STARPU_CODELET_SIMGRID_EXECUTE_AND_INJECT flag. + * Add STARPU_CL_ARGS flag to starpu_task_insert() and + starpu_mpi_task_insert() functions call + +Changes: + * Data interfaces (variable, vector, matrix and block) now define + pack und unpack functions + * StarPU-MPI: Fix for being able to receive data which have not yet + been registered by the application (i.e it did not call + starpu_data_set_tag(), data are received as a raw memory) + * StarPU-MPI: Fix for being able to receive data with the same tag + from several nodes (see mpi/tests/gather.c) + * Remove the long-deprecated cost_model fields and task->buffers field. + * Fix complexity of implicit task/data dependency, from quadratic to linear. + +Small changes: + * Rename function starpu_trace_user_event() as + starpu_fxt_trace_user_event() + * "power" is renamed into "energy" wherever it applies, notably energy + consumption performance models + * Update starpu_task_build() to set starpu_task::cl_arg_free to 1 if + some arguments of type ::STARPU_VALUE are given. + * Simplify performance model loading API + * Better semantic for environment variables STARPU_NMIC and + STARPU_NMICDEVS, the number of devices and the number of cores. + STARPU_NMIC will be the number of devices, and STARPU_NMICCORES + will be the number of cores per device. + +StarPU 1.1.8 (git revision f7b7abe9f86361cbc96f2b51c6ad7336b7d1d628) +==================================================================== +The scheduling context release + +Small changes: + * Fix compatibility with simgrid 3.14 + * Fix lock ordering for memory reclaiming + +StarPU 1.1.7 (git revision 341044b67809892cf4a388e482766beb50256907) +==================================================================== +The scheduling context release + +Small changes: + * Fix type of data home node to allow users to pass -1 to define + temporary data + +StarPU 1.1.6 (git revision cdffbd5f5447e4d076d659232b3deb14f3c20da6) +==================================================================== +The scheduling context release + +Small features: + * Add starpu_task_get_task_succs to get the list of children of a given + task. + * Ranges can be provided in STARPU_WORKERS_CPUID + +Small changes: + * Various fixes for MacOS and windows systems + +StarPU 1.1.5 (git revision 20469c6f3e7ecd6c0568c8e4e4b5b652598308d8xxx) +======================================================================= +The scheduling context release + +New features: + * Add starpu_memory_pin and starpu_memory_unpin to pin memory allocated + another way than starpu_malloc. + * Add starpu_task_wait_for_n_submitted() and + STARPU_LIMIT_MAX_NSUBMITTED_TASKS/STARPU_LIMIT_MIN_NSUBMITTED_TASKS to + easily control the number of submitted tasks by making task submission + block. + * Add STARPU_NOWHERE to create synchronization tasks with data. + * Document how to switch between different views of the same data. + * Add Fortran 90 module and example using it + +StarPU 1.1.4 (git revision 2a3d30b28d6d099d271134a786335acdbb3931a3) +==================================================================== +The scheduling context release + +New features: + * Fix and actually enable the cache allocation. + * Enable allocation cache in main RAM when STARPU_LIMIT_CPU_MEM is set by + the user. + * New MPI functions starpu_mpi_issend and starpu_mpi_issend_detached + to send data using a synchronous and non-blocking mode (internally + uses MPI_Issend) + * New data access mode flag STARPU_SSEND to be set when calling + starpu_mpi_insert_task to specify the data has to be sent using a + synchronous and non-blocking mode + * New environment variable STARPU_PERF_MODEL_DIR which can be set to + specify a directory where to store performance model files in. + When unset, the files are stored in $STARPU_HOME/.starpu/sampling + * MPI: + - New function starpu_mpi_data_register_comm to register a data + with another communicator than MPI_COMM_WORLD + - New functions starpu_mpi_data_set_rank() and starpu_mpi_data_set_tag() + which call starpu_mpi_data_register_comm() + +Small features: + * Add starpu_memory_wait_available() to wait for a given size to become + available on a given node. + * New environment variable STARPU_RAND_SEED to set the seed used for random + numbers. + * New function starpu_mpi_cache_set() to enable or disable the + communication cache at runtime + * Add starpu_paje_sort which sorts Pajé traces. + +Changes: + * Fix complexity of implicit task/data dependency, from quadratic to linear. + +StarPU 1.1.3 (git revision 11afc5b007fe1ab1c729b55b47a5a98ef7f3cfad) +==================================================================== +The scheduling context release + +New features: + * One can register an existing on-GPU buffer to be used by a handle. + * Add the starpu_paje_summary statistics tool. + * Enable gpu-gpu transfers for matrices. + * Let interfaces declare which transfers they allow with the can_copy + method. + +Small changes: + * Lock performance model files while writing and reading them to avoid + issues on parallel launches, MPI runs notably. + * Lots of build fixes for icc on Windows. + +StarPU 1.1.2 (git revision d14c550798630bbc4f3da2b07d793c47e3018f02) +==================================================================== +The scheduling context release + +New features: + * The reduction init codelet is automatically used to initialize temporary + buffers. + * Traces now include a "scheduling" state, to show the overhead of the + scheduler. + * Add STARPU_CALIBRATE_MINIMUM environment variable to specify the minimum + number of calibration measurements. + * Add STARPU_TRACE_BUFFER_SIZE environment variable to specify the size of + the trace buffer. + +StarPU 1.1.1 (git revision dab2e51117fac5bef767f3a6b7677abb2147d2f2) +==================================================================== +The scheduling context release + +New features: + * MPI: + - New variable STARPU_MPI_CACHE_STATS to print statistics on + cache holding received data. + - New function starpu_mpi_data_register() which sets the rank + and tag of a data, and also allows to automatically clear + the MPI communication cache when unregistering the data. It + should be called instead of both calling + starpu_data_set_tag() and starpu_data_set_rank() + * Use streams for all CUDA transfers, even initiated by CPUs. + * Add paje traces statistics tools. + * Use streams for GPUA->GPUB and GPUB->GPUA transfers. + +Small features: + * New STARPU_EXECUTE_ON_WORKER flag to specify the worker on which + to execute the task. + * New STARPU_DISABLE_PINNING environment variable to disable host memory + pinning. + * New STARPU_DISABLE_KERNELS environment variable to disable actual kernel + execution. + * New starpu_memory_get_total function to get the size of a memory node. + * New starpu_parallel_task_barrier_init_n function to let a scheduler decide + a set of workers without going through combined workers. + +Changes: + * Fix simgrid execution. + * Rename starpu_get_nready_tasks_of_sched_ctx to starpu_sched_ctx_get_nready_tasks + * Rename starpu_get_nready_flops_of_sched_ctx to starpu_sched_ctx_get_nready_flops + * New functions starpu_pause() and starpu_resume() + * New codelet specific_nodes field to specify explicit target nodes for data. + * StarPU-MPI: Fix overzealous allocation of memory. + * Interfaces: Allow interface implementation to change pointers at will, in + unpack notably. + +Small changes: + * Use big fat abortions when one tries to make a task or callback + sleep, instead of just returning EDEADLCK which few people will test + * By default, StarPU FFT examples are not compiled and checked, the + configure option --enable-starpufft-examples needs to be specified + to change this behaviour. + +StarPU 1.1.0 (git revision 3c4bc72ccef30e767680cad3d749c4e9010d4476) +==================================================================== +The scheduling context release + +New features: + * OpenGL interoperability support. + * Capability to store compiled OpenCL kernels on the file system + * Capability to load compiled OpenCL kernels + * Performance models measurements can now be provided explicitly by + applications. + * Capability to emit communication statistics when running MPI code + * Add starpu_data_unregister_submit, starpu_data_acquire_on_node and + starpu_data_invalidate_submit + * New functionality to wrapper starpu_insert_task to pass a array of + data_handles via the parameter STARPU_DATA_ARRAY + * Enable GPU-GPU direct transfers. + * GCC plug-in + - Add `registered' attribute + - A new pass was added that warns about the use of possibly + unregistered memory buffers. + * SOCL + - Manual mapping of commands on specific devices is now + possible + - SOCL does not require StarPU CPU tasks anymore. CPU workers + are automatically disabled to enhance performance of OpenCL + CPU devices + * New interface: COO matrix. + * Data interfaces: The pack operation of user-defined data interface + defines a new parameter count which should be set to the size of + the buffer created by the packing of the data. + * MPI: + - Communication statistics for MPI can only be enabled at + execution time by defining the environment variable + STARPU_COMM_STATS + - Communication cache mechanism is enabled by default, and can + only be disabled at execution time by setting the + environment variable STARPU_MPI_CACHE to 0. + - Initialisation functions starpu_mpi_initialize_extended() + and starpu_mpi_initialize() have been made deprecated. One + should now use starpu_mpi_init(int *, char ***, int). The + last parameter indicates if MPI should be initialised. + - Collective detached operations have new parameters, a + callback function and a argument. This is to be consistent + with the detached point-to-point communications. + - When exchanging user-defined data interfaces, the size of + the data is the size returned by the pack operation, i.e + data with dynamic size can now be exchanged with StarPU-MPI. + * Add experimental simgrid support, to simulate execution with various + number of CPUs, GPUs, amount of memory, etc. + * Add support for OpenCL simulators (which provide simulated execution time) + * Add support for Temanejo, a task graph debugger + * Theoretical bound lp output now includes data transfer time. + * Update OpenCL driver to only enable CPU devices (the environment + variable STARPU_OPENCL_ONLY_ON_CPUS must be set to a positive + value when executing an application) + * Add Scheduling contexts to separate computation resources + - Scheduling policies take into account the set of resources corresponding + to the context it belongs to + - Add support to dynamically change scheduling contexts + (Create and Delete a context, Add Workers to a context, Remove workers from a context) + - Add support to indicate to which contexts the tasks are submitted + * Add the Hypervisor to manage the Scheduling Contexts automatically + - The Contexts can be registered to the Hypervisor + - Only the registered contexts are managed by the Hypervisor + - The Hypervisor can detect the initial distribution of resources of + a context and constructs it consequently (the cost of execution is required) + - Several policies can adapt dynamically the distribution of resources + in contexts if the initial one was not appropriate + - Add a platform to implement new policies of redistribution + of resources + * Implement a memory manager which checks the global amount of + memory available on devices, and checks there is enough memory + before doing an allocation on the device. + * Discard environment variable STARPU_LIMIT_GPU_MEM and define + instead STARPU_LIMIT_CUDA_MEM and STARPU_LIMIT_OPENCL_MEM + * Introduce new variables STARPU_LIMIT_CUDA_devid_MEM and + STARPU_LIMIT_OPENCL_devid_MEM to limit memory per specific device + * Introduce new variable STARPU_LIMIT_CPU_MEM to limit memory for + the CPU devices + * New function starpu_malloc_flags to define a memory allocation with + constraints based on the following values: + - STARPU_MALLOC_PINNED specifies memory should be pinned + - STARPU_MALLOC_COUNT specifies the memory allocation should be in + the limits defined by the environment variables STARPU_LIMIT_xxx + (see above). When no memory is left, starpu_malloc_flag tries + to reclaim memory from StarPU and returns -ENOMEM on failure. + * starpu_malloc calls starpu_malloc_flags with a value of flag set + to STARPU_MALLOC_PINNED + * Define new function starpu_free_flags similarly to starpu_malloc_flags + * Define new public API starpu_pthread which is similar to the + pthread API. It is provided with 2 implementations: a pthread one + and a Simgrid one. Applications using StarPU and wishing to use + the Simgrid StarPU features should use it. + * Allow to have a dynamically allocated number of buffers per task, + and so overwrite the value defined --enable-maxbuffers=XXX + * Performance models files are now stored in a directory whose name + include the version of the performance model format. The version + number is also written in the file itself. + When updating the format, the internal variable + _STARPU_PERFMODEL_VERSION should be updated. It is then possible + to switch easily between different versions of StarPU having + different performance model formats. + * Tasks can now define a optional prologue callback which is executed + on the host when the task becomes ready for execution, before getting + scheduled. + * Small CUDA allocations (<= 4MiB) are now batched to avoid the huge + cudaMalloc overhead. + * Prefetching is now done for all schedulers when it can be done whatever + the scheduling decision. + * Add a watchdog which permits to easily trigger a crash when StarPU gets + stuck. + * Document how to migrate data over MPI. + * New function starpu_wakeup_worker() to be used by schedulers to + wake up a single worker (instead of all workers) when submitting a + single task. + * The functions starpu_sched_set/get_min/max_priority set/get the + priorities of the current scheduling context, i.e the one which + was set by a call to starpu_sched_ctx_set_context() or the initial + context if the function has not been called yet. + * Fix for properly dealing with NAN on windows systems + +Small features: + * Add starpu_worker_get_by_type and starpu_worker_get_by_devid + * Add starpu_fxt_stop_profiling/starpu_fxt_start_profiling which permits to + pause trace recording. + * Add trace_buffer_size configuration field to permit to specify the tracing + buffer size. + * Add starpu_codelet_profile and starpu_codelet_histo_profile, tools which draw + the profile of a codelet. + * File STARPU-REVISION --- containing the SVN revision number from which + StarPU was compiled --- is installed in the share/doc/starpu directory + * starpu_perfmodel_plot can now directly draw GFlops curves. + * New configure option --enable-mpi-progression-hook to enable the + activity polling method for StarPU-MPI. + * Permit to disable sequential consistency for a given task. + * New macro STARPU_RELEASE_VERSION + * New function starpu_get_version() to return as 3 integers the + release version of StarPU. + * Enable by default data allocation cache + * New function starpu_perfmodel_directory() to print directory + storing performance models. Available through the new option -d of + the tool starpu_perfmodel_display + * New batch files to execute StarPU applications under Microsoft + Visual Studio (They are installed in path_to_starpu/bin/msvc)/ + * Add cl_arg_free, callback_arg_free, prologue_callback_arg_free fields to + enable automatic free(cl_arg); free(callback_arg); + free(prologue_callback_arg) on task destroy. + * New function starpu_task_build + * New configure options --with-simgrid-dir + --with-simgrid-include-dir and --with-simgrid-lib-dir to specify + the location of the SimGrid library + +Changes: + * Rename all filter functions to follow the pattern + starpu_DATATYPE_filter_FILTERTYPE. The script + tools/dev/rename_filter.sh is provided to update your existing + applications to use new filters function names. + * Renaming of diverse functions and datatypes. The script + tools/dev/rename.sh is provided to update your existing + applications to use the new names. It is also possible to compile + with the pkg-config package starpu-1.0 to keep using the old + names. It is however recommended to update your code and to use + the package starpu-1.1. + + * Fix the block filter functions. + * Fix StarPU-MPI on Darwin. + * The FxT code can now be used on systems other than Linux. + * Keep only one hashtable implementation common/uthash.h + * The cache of starpu_mpi_insert_task is fixed and thus now enabled by + default. + * Improve starpu_machine_display output. + * Standardize objects name in the performance model API + * SOCL + - Virtual SOCL device has been removed + - Automatic scheduling still available with command queues not + assigned to any device + - Remove modified OpenCL headers. ICD is now the only supported + way to use SOCL. + - SOCL test suite is only run when environment variable + SOCL_OCL_LIB_OPENCL is defined. It should contain the location + of the libOpenCL.so file of the OCL ICD implementation. + * Fix main memory leak on multiple unregister/re-register. + * Improve hwloc detection by configure + * Cell: + - It is no longer possible to enable the cell support via the + gordon driver + - Data interfaces no longer define functions to copy to and from + SPU devices + - Codelet no longer define pointer for Gordon implementations + - Gordon workers are no longer enabled + - Gordon performance models are no longer enabled + * Fix data transfer arrows in paje traces + * The "heft" scheduler no longer exists. Users should now pick "dmda" + instead. + * StarPU can now use poti to generate paje traces. + * Rename scheduling policy "parallel greedy" to "parallel eager" + * starpu_scheduler.h is no longer automatically included by + starpu.h, it has to be manually included when needed + * New batch files to run StarPU applications with Microsoft Visual C + * Add examples/release/Makefile to test StarPU examples against an + installed version of StarPU. That can also be used to test + examples using a previous API. + * Tutorial is installed in ${docdir}/tutorial + * Schedulers eager_central_policy, dm and dmda no longer erroneously respect + priorities. dmdas has to be used to respect priorities. + * StarPU-MPI: Fix potential bug for user-defined datatypes. As MPI + can reorder messages, we need to make sure the sending of the size + of the data has been completed. + * Documentation is now generated through doxygen. + * Modification of perfmodels output format for future improvements. + * Fix for properly dealing with NAN on windows systems + * Function starpu_sched_ctx_create() now takes a variable argument + list to define the scheduler to be used, and the minimum and + maximum priority values + * The functions starpu_sched_set/get_min/max_priority set/get the + priorities of the current scheduling context, i.e the one which + was set by a call to starpu_sched_ctx_set_context() or the initial + context if the function was not called yet. + * MPI: Fix of the livelock issue discovered while executing applications + on a CPU+GPU cluster of machines by adding a maximum trylock + threshold before a blocking lock. + +Small changes: + * STARPU_NCPU should now be used instead of STARPU_NCPUS. STARPU_NCPUS is + still available for compatibility reasons. + * include/starpu.h includes all include/starpu_*.h files, applications + therefore only need to have #include + * Active task wait is now included in blocked time. + * Fix GCC plugin linking issues starting with GCC 4.7. + * Fix forcing calibration of never-calibrated archs. + * CUDA applications are no longer compiled with the "-arch sm_13" + option. It is specifically added to applications which need it. + * Explicitly name the non-sleeping-non-running time "Overhead", and use + another color in vite traces. + * Use C99 variadic macro support, not GNU. + * Fix performance regression: dmda queues were inadvertently made + LIFOs in r9611. + +StarPU 1.0.3 (git revision 25f8b3a7b13050e99bf1725ca6f52cfd62e7a861) +==================================================================== + +Changes: + * Several bug fixes in the build system + * Bug fixes in source code for non-Linux systems + * Fix generating FXT traces bigger than 64MiB. + * Improve ENODEV error detections in StarPU FFT + +StarPU 1.0.2 (git revision 6f95de279d6d796a39debe8d6c5493b3bdbe0c37) +==================================================================== + +Changes: + * Add starpu_block_shadow_filter_func_vector and an example. + * Add tag dependency in trace-generated DAG. + * Fix CPU binding for optimized CPU-GPU transfers. + * Fix parallel tasks CPU binding and combined worker generation. + * Fix generating FXT traces bigger than 64MiB. + +StarPU 1.0.1 (git revision 97ea6e15a273e23e4ddabf491b0f9481373ca01a) +==================================================================== + +Changes: + * hwloc support. Warn users when hwloc is not found on the system and + produce error when not explicitly disabled. + * Several bug fixes + * GCC plug-in + - Add `#pragma starpu release' + - Fix bug when using `acquire' pragma with function parameters + - Slightly improve test suite coverage + - Relax the GCC version check + * Update SOCL to use new API + * Documentation improvement. + +StarPU 1.0.0 (git revision d3ad9ca318ec9acfeaf8eb7d8a018b09e4722292) +==================================================================== +The extensions-again release + +New features: + * Add SOCL, an OpenCL interface on top of StarPU. + * Add a gcc plugin to extend the C interface with pragmas which allows to + easily define codelets and issue tasks. + * Add reduction mode to starpu_mpi_insert_task. + * A new multi-format interface permits to use different binary formats + on CPUs & GPUs, the conversion functions being provided by the + application and called by StarPU as needed (and as less as + possible). + * Deprecate cost_model, and introduce cost_function, which is provided + with the whole task structure, the target arch and implementation + number. + * Permit the application to provide its own size base for performance + models. + * Applications can provide several implementations of a codelet for the + same architecture. + * Add a StarPU-Top feedback and steering interface. + * Permit to specify MPI tags for more efficient starpu_mpi_insert_task + +Changes: + * Fix several memory leaks and race conditions + * Make environment variables take precedence over the configuration + passed to starpu_init() + * Libtool interface versioning has been included in libraries names + (libstarpu-1.0.so, libstarpumpi-1.0.so, + libstarpufft-1.0.so, libsocl-1.0.so) + * Install headers under $includedir/starpu/1.0. + * Make where field for struct starpu_codelet optional. When unset, its + value will be automatically set based on the availability of the + different XXX_funcs fields of the codelet. + * Define access modes for data handles into starpu_codelet and no longer + in starpu_task. Hence mark (struct starpu_task).buffers as + deprecated, and add (struct starpu_task).handles and (struct + starpu_codelet).modes + * Fields xxx_func of struct starpu_codelet are made deprecated. One + should use fields xxx_funcs instead. + * Some types were renamed for consistency. when using pkg-config libstarpu, + starpu_deprecated_api.h is automatically included (after starpu.h) to + keep compatibility with existing software. Other changes are mentioned + below, compatibility is also preserved for them. + To port code to use new names (this is not mandatory), the + tools/dev/rename.sh script can be used, and pkg-config starpu-1.0 should + be used. + * The communication cost in the heft and dmda scheduling strategies now + take into account the contention brought by the number of GPUs. This + changes the meaning of the beta factor, whose default 1.0 value should + now be good enough in most case. + +Small features: + * Allow users to disable asynchronous data transfers between CPUs and + GPUs. + * Update OpenCL driver to enable CPU devices (the environment variable + STARPU_OPENCL_ON_CPUS must be set to a positive value when + executing an application) + * struct starpu_data_interface_ops --- operations on a data + interface --- define a new function pointer allocate_new_data + which creates a new data interface of the given type based on + an existing handle + * Add a field named magic to struct starpu_task which is set when + initialising the task. starpu_task_submit will fail if the + field does not have the right value. This will hence avoid + submitting tasks which have not been properly initialised. + * Add a hook function pre_exec_hook in struct starpu_sched_policy. + The function is meant to be called in drivers. Schedulers + can use it to be notified when a task is about being computed. + * Add codelet execution time statistics plot. + * Add bus speed in starpu_machine_display. + * Add a STARPU_DATA_ACQUIRE_CB which permits to inline the code to be + done. + * Add gdb functions. + * Add complex support to LU example. + * Permit to use the same data several times in write mode in the + parameters of the same task. + +Small changes: + * Increase default value for STARPU_MAXCPUS -- Maximum number of + CPUs supported -- to 64. + * Add man pages for some of the tools + * Add C++ application example in examples/cpp/ + * Add an OpenMP fork-join example. + * Documentation improvement. + + +StarPU 0.9 (git revision 12bba8528fc0d85367d885cddc383ba54efca464) +================================================================== +The extensions release + + * Provide the STARPU_REDUX data access mode + * Externalize the scheduler API. + * Add theoretical bound computation + * Add the void interface + * Add power consumption optimization + * Add parallel task support + * Add starpu_mpi_insert_task + * Add profiling information interface. + * Add STARPU_LIMIT_GPU_MEM environment variable. + * OpenCL fixes + * MPI fixes + * Improve optimization documentation + * Upgrade to hwloc 1.1 interface + * Add fortran example + * Add mandelbrot OpenCL example + * Add cg example + * Add stencil MPI example + * Initial support for CUDA4 + +StarPU 0.4 (git revision ad8d8be3619f211f228c141282d7d504646fc2a6) +================================================================== +The API strengthening release + + * Major API improvements + - Provide the STARPU_SCRATCH data access mode + - Rework data filter interface + - Rework data interface structure + - A script that automatically renames old functions to accommodate with the new + API is available from https://scm.gforge.inria.fr/svn/starpu/scripts/renaming + (login: anonsvn, password: anonsvn) + * Implement dependencies between task directly (eg. without tags) + * Implicit data-driven task dependencies simplifies the design of + data-parallel algorithms + * Add dynamic profiling capabilities + - Provide per-task feedback + - Provide per-worker feedback + - Provide feedback about memory transfers + * Provide a library to help accelerating MPI applications + * Improve data transfers overhead prediction + - Transparently benchmark buses to generate performance models + - Bind accelerator-controlling threads with respect to NUMA locality + * Improve StarPU's portability + - Add OpenCL support + - Add support for Windows + +StarPU 0.2.901 aka 0.3-rc1 (git revision 991f2abb772c17c3d45bbcf27f46197652e6a3ef) +================================================================================== +The asynchronous heterogeneous multi-accelerator release + + * Many API changes and code cleanups + - Implement starpu_worker_get_id + - Implement starpu_worker_get_name + - Implement starpu_worker_get_type + - Implement starpu_worker_get_count + - Implement starpu_display_codelet_stats + - Implement starpu_data_prefetch_on_node + - Expose the starpu_data_set_wt_mask function + * Support nvidia (heterogeneous) multi-GPU + * Add the data request mechanism + - All data transfers use data requests now + - Implement asynchronous data transfers + - Implement prefetch mechanism + - Chain data requests to support GPU->RAM->GPU transfers + * Make it possible to bypass the scheduler and to assign a task to a specific + worker + * Support restartable tasks to reinstanciate dependencies task graphs + * Improve performance prediction + - Model data transfer overhead + - One model is created for each accelerator + * Support for CUDA's driver API is deprecated + * The STARPU_WORKERS_CUDAID and STARPU_WORKERS_CPUID env. variables make it possible to + specify where to bind the workers + * Use the hwloc library to detect the actual number of cores + +StarPU 0.2.0 (git revision 73e989f0783e10815aff394f80242760c4ed098c) +==================================================================== +The Stabilizing-the-Basics release + + * Various API cleanups + * Mac OS X is supported now + * Add dynamic code loading facilities onto Cell's SPUs + * Improve performance analysis/feedback tools + * Application can interact with StarPU tasks + - The application may access/modify data managed by the DSM + - The application may wait for the termination of a (set of) task(s) + * An initial documentation is added + * More examples are supplied + + +StarPU 0.1.0 (git revision 911869a96b40c74eb92b30a43d3e08bf445d8078) +==================================================================== +First release. + +Status: + * Only supports Linux platforms yet + * Supported architectures + - multicore CPUs + - NVIDIA GPUs (with CUDA 2.x) + - experimental Cell/BE support + +Changes: + * Scheduling facilities + - run-time selection of the scheduling policy + - basic auto-tuning facilities + * Software-based DSM + - transparent data coherency management + - High-level expressive interface + + +# Local Variables: +# mode: text +# coding: utf-8 +# ispell-local-dictionary: "american" +# End: diff --git a/INSTALL b/INSTALL new file mode 100644 index 0000000..fcda47a --- /dev/null +++ b/INSTALL @@ -0,0 +1,214 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +Contents +========= + +* Installing StarPU on a Unix machine +* Installing StarPU on Windows + + + +Installing StarPU on a Unix machine +------------------------------------ +$ ./autogen.sh # If running the SVN version +$ ./configure --prefix= +$ make +$ make install + + + +Installing StarPU on Windows +---------------------------- + +If you are building from a tarball downloaded from the website, you can skip the +cygwin part. + +1. Install cygwin + + http://cygwin.com/install.html + + Make sure the following packages are available: + - (Devel)/subversion + - (Devel)/libtool + - (Devel)/gcc + - (Devel)/make + - your favorite editor (vi, emacs, ...) + - (Devel)/gdb + - (Archive)/zip + - (Devel)/pkg-config + +2. Install mingw + + http://www.mingw.org/ + +3. Install hwloc (not mandatory, but strongly recommended) + + http://www.open-mpi.org/projects/hwloc + + Be careful which version you are installing. Even if your machine + runs windows 64 bits, if you are running a 32 bits mingw (check the + output of the command uname -a), you will need to install the 32 + bits version of hwloc. + +4. Install Microsoft Visual C++ Studio Express + + http://www.microsoft.com/express/Downloads + + Add in your path the following directories. + (adjusting where necessary for the Installation location according to VC + version and on 64 and 32bit Windows versions) + + On cygwin, with Visual C++ 2010 e.g.; + + export PATH="/cygdrive/c/Program Files (x86)/Microsoft Visual Studio 10.0/Common7/IDE":$PATH + export PATH="/cygdrive/c/Program Files (x86)/Microsoft Visual Studio 10.0/VC/bin":$PATH + + On MingW, with Visual C++ 2010, e.g.; + + export PATH="/c/Program Files (x86)/Microsoft Visual Studio 10.0/Common7/IDE":$PATH + export PATH="/c/Program Files (x86)/Microsoft Visual Studio 10.0/VC/bin":$PATH + + Try to call , and without any option to make + sure these dump their help output with a series of options, otherwise no + .def or .lib file will be produced. + +5. Install GPU Drivers (not mandatory) + + 5.1 Install Cuda + + http://developer.nvidia.com/object/cuda_3_2_downloads.html + + You need to install at least the CUDA toolkit. + + libtool is not able to find the libraries automatically, you + need to make some copies: + + copy c:\cuda\lib\cuda.lib c:\cuda\lib\libcuda.lib + copy c:\cuda\lib\cudart.lib c:\cuda\lib\libcudart.lib + copy c:\cuda\lib\cublas.lib c:\cuda\lib\libcublas.lib + copy c:\cuda\lib\cufft.lib c:\cuda\lib\libcufft.lib + copy c:\cuda\lib\OpenCL.lib c:\cuda\lib\libOpenCL.lib + + (and if the version of your CUDA driver is >= 3.2) + + copy c:\cuda\lib\curand.lib c:\cuda\lib\libcurand.lib + + Add the CUDA bin directory in your path + + export PATH=/cygdrive/c/CUDA/bin:$PATH + + Since we build code using CUDA headers with gcc instead of Visual studio, + a fix is needed: c:\cuda\include\host_defines.h has a bogus CUDARTAPI + definition which makes linking fail completely. Replace the first + occurrence of + + #define CUDARTAPI + + with + + #ifdef _WIN32 + #define CUDARTAPI __stdcall + #else + #define CUDARTAPI + #endif + + While at it, you can also comment the __cdecl definition to avoid spurious + warnings. + + + 5.2 Install OpenCL + + http://developer.nvidia.com/object/opencl-download.html + + You need to download the NVIDIA Drivers for your version of + Windows. Executing the file will extract all files in a given + directory. The the driver installation will start, it will fail + if no compatibles drivers can be found on your system. + + Anyway, you should copy the *.dl_ files from the directory + (extraction path) in the bin directory of the CUDA installation + directory (the directory should be v3.2/bin/) + + 5.3 Install MsCompress + + http://gnuwin32.sourceforge.net/packages/mscompress.htm + + Go in the CUDA bin directory, uncompress .dl_ files and rename + them in .dll files + + cp /cygdrive/c/NVIDIA/DisplayDriver/190.89/International/*.dl_ . + for i in *.dl_ ; do /cygdrive/c/Program\ Files/GnuWin32/bin/msexpand.exe $i ; mv ${i%_} ${i%_}l ; done + +If you are building from a tarball downloaded from the website, you can skip the +autogen.sh part. + +6. Start autogen.sh from cygwin + + cd starpu-trunk + ./autogen.sh + +7. Start a MinGW shell + + /cygdrive/c/MinGW/msys/1.0/bin/sh.exe --login -i + +8. Configure, make, install from MinGW + + If you have a non-english version of windows, use + + export LANG=C + + else libtool has troubles parsing the translated output of the toolchain. + + cd starpu-trunk + mkdir build + cd build + ../configure --prefix=$PWD/target \ + --with-hwloc= \ + --with-cuda-dir= \ + --with-cuda-lib-dir=/lib/Win32 \ + --with-opencl-dir= + --disable-build-doc + --disable-build-examples --enable-quick-check + make + make check # not necessary but well advised + make install + + The option --disable-build-doc is necessary if you do not have a + working TeX binary installed as it is needed by texi2dvi to build + the documentation. + + To fasten the compilation process, the option + --disable-build-examples may also be used to disable the + compilation of the applications in the examples directory. Only the + applications in the test directory will be build. + + Also convert a couple of files to CRLF: + + sed -e 's/$/'$'\015'/ < README > $prefix/README.txt + sed -e 's/$/'$'\015'/ < AUTHORS > $prefix/AUTHORS.txt + sed -e 's/$/'$'\015'/ < COPYING.LGPL > $prefix/COPYING.LGPL.txt + +9. If you want your StarPU installation to be standalone, you need to + copy the DLL files from hwloc, Cuda, and OpenCL into the StarPU + installation bin directory, as well as MinGW/bin/libpthread*dll + + cp /bin/*dll target/bin + cp /bin/*dll target/bin + cp /cygdrive/c/MinGW/bin/libpthread*dll target/bin + + and set the StarPU bin directory in your path. + + export PATH=/bin:$PATH diff --git a/Makefile.am b/Makefile.am new file mode 100644 index 0000000..bef305d --- /dev/null +++ b/Makefile.am @@ -0,0 +1,244 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# Copyright (C) 2017-2017 Guillaume Beauchamp +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +ACLOCAL_AMFLAGS=-I m4 +CLEANFILES = *.gcno *.gcda *.linkinfo + +SUBDIRS = + +if STARPU_USE_MIN_DGELS +SUBDIRS += min-dgels +endif + +SUBDIRS += src + +SUBDIRS += tools + +if STARPU_BUILD_TESTS +SUBDIRS += tests +endif + +SUBDIRS += doc + +if STARPU_USE_MPI +SUBDIRS += mpi +endif + +if STARPU_BUBBLE +SUBDIRS += bubble +endif + +if STARPU_BUILD_EXAMPLES +SUBDIRS += examples +endif + +if STARPU_BUILD_SOCL +SUBDIRS += socl +endif + +if STARPU_BUILD_STARPUFFT +SUBDIRS += starpufft +endif + +if STARPU_BUILD_STARPURM +SUBDIRS += starpurm +endif + +if STARPU_OPENMP_LLVM +SUBDIRS += starpu_openmp_llvm +endif + +if STARPU_BUILD_STARPUPY +if STARPU_USE_CPU +SUBDIRS += starpupy +else +if STARPU_USE_MPI_MASTER_SLAVE +SUBDIRS += starpupy +else +if STARPU_USE_TCPIP_MASTER_SLAVE +SUBDIRS += starpupy +endif +endif +endif +endif + +if STARPU_BUILD_SC_HYPERVISOR +SUBDIRS += sc_hypervisor +endif + +if STARPU_USE_JULIA +SUBDIRS += julia +endif + +if STARPU_BUILD_ECLIPSE_PLUGIN +SUBDIRS += eclipse-plugin +endif + +pkgconfigdir = $(libdir)/pkgconfig +pkgconfig_DATA = packages/libstarpu.pc packages/starpu-1.0.pc packages/starpu-1.1.pc packages/starpu-1.2.pc packages/starpu-1.3.pc packages/starpu-1.4.pc + +versincludedir = $(includedir)/starpu/$(STARPU_EFFECTIVE_VERSION) +versinclude_HEADERS = \ + include/starpu.h \ + include/starpu_helper.h \ + include/starpu_bitmap.h \ + include/starpu_data_filters.h \ + include/starpu_data_interfaces.h \ + include/starpu_worker.h \ + include/starpu_task.h \ + include/starpu_task_dep.h \ + include/starpu_task_bundle.h \ + include/starpu_task_list.h \ + include/starpu_task_util.h \ + include/starpu_data.h \ + include/starpu_perfmodel.h \ + include/starpu_util.h \ + include/starpu_fxt.h \ + include/starpu_cuda.h \ + include/starpu_hip.h \ + include/starpu_opencl.h \ + include/starpu_max_fpga.h \ + include/starpu_openmp.h \ + include/starpu_sink.h \ + include/starpu_expert.h \ + include/starpu_profiling.h \ + include/starpu_profiling_tool.h \ + include/starpu_bound.h \ + include/starpu_scheduler.h \ + include/schedulers/starpu_heteroprio.h \ + include/starpu_sched_component.h \ + include/starpu_sched_ctx.h \ + include/starpu_sched_ctx_hypervisor.h \ + include/starpu_deprecated_api.h \ + include/starpu_hash.h \ + include/starpu_rand.h \ + include/starpu_disk.h \ + include/starpu_cublas.h \ + include/starpu_cublas_v2.h \ + include/starpu_cublasLt.h \ + include/starpu_cusolver.h \ + include/starpu_cusparse.h \ + include/starpu_hipblas.h \ + include/starpu_driver.h \ + include/starpu_stdlib.h \ + include/starpu_thread.h \ + include/starpu_thread_util.h \ + include/starpu_tree.h \ + include/starpu_simgrid_wrap.h \ + include/starpu_mod.f90 \ + include/fstarpu_mod.f90 \ + include/starpu_parallel_worker.h \ + include/starpu_perf_monitoring.h \ + include/starpu_perf_steering.h \ + include/schedulers/starpu_scheduler_toolbox.h + +if STARPU_OPENMP_LLVM +versinclude_HEADERS += \ + include/omp.h +endif + +nodist_versinclude_HEADERS = \ + include/starpu_config.h + +noinst_HEADERS = \ + include/pthread_win32/pthread.h \ + include/pthread_win32/semaphore.h + +if STARPU_DEVEL +all-local: + @if $(GREP) -r sys/time.h $$( find $(srcdir)/examples $(srcdir)/tests $(srcdir)/src $(srcdir)/mpi/src $(srcdir)/include -name \*.[ch] -a \! -name starpu_util.h -a \! -name timer.h -a \! -name loader.c ) ; \ + then \ + echo "Please do not include sys/time, it is not available on Windows, include starpu_util.h and use starpu_timing_now() instead" ; \ + false ; \ + fi + @if $(GREP) -re '\' $$( find $(srcdir)/src $(srcdir)/mpi/src $(srcdir)/include -name \*.[ch] -a \! -name starpu_util.h -a \! -name utils.c -a \! -name simgrid.h) ; \ + then \ + echo "Please do not use getenv, use starpu_getenv instead, which catches unsafe uses"; \ + false ; \ + fi +# we count the number of files which include unistd.h +# we count the number of files which properly include unistd.h i.e by first detecting if it's available +# and then we check both numbers are the same ...a + @UNISTD_ALL_LINES=$(shell $(GREP) -B1 -rs "^#include " $(srcdir)/src/ $(srcdir)/include/ $(srcdir)/mpi/src $(srcdir)/mpi/include |$(GREP) -v dolib|$(GREP) -v -e "--" | tr '\012' '@' | $(SED) 's/unistd.h>@/unistd.h>\n/g' | wc -l | tr -d ' ') ;\ + UNISTD_CORRECT_LINES=$(shell $(GREP) -B1 -rs "^#include " $(srcdir)/src/ $(srcdir)/include/ $(srcdir)/mpi/src $(srcdir)/mpi/include |$(GREP) -v dolib|$(GREP) -v -e "--" | tr '\012' '@' | $(SED) 's/unistd.h>@/unistd.h>\n/g' | $(GREP) '#ifdef .*HAVE_UNISTD_H.*:#include ' | wc -l | tr -d ' ') ;\ + if test $$UNISTD_ALL_LINES -ne $$UNISTD_CORRECT_LINES ; \ + then \ + echo "Please do not unconditionally include unistd.h, it is not available on Windows, include config.h and test for HAVE_UNISTD_H" ; \ + false ; \ + fi +endif + +if STARPU_HAVE_WINDOWS +txtdir = ${prefix} +else +txtdir = ${docdir} +endif +txt_DATA = AUTHORS COPYING.LGPL README.md README.dev STARPU-REVISION +EXTRA_DIST = autogen.sh AUTHORS COPYING.LGPL README.md README.dev STARPU-VERSION STARPU-REVISION + +EXTRA_DIST += .gitlab-ci.yml +EXTRA_DIST += contrib/ci.inria.fr/Jenkinsfile-basic +EXTRA_DIST += contrib/ci.inria.fr/Jenkinsfile-bsd +EXTRA_DIST += contrib/ci.inria.fr/Jenkinsfile-windows +EXTRA_DIST += contrib/ci.inria.fr/job-0-tarball.sh +EXTRA_DIST += contrib/ci.inria.fr/job-1-build-windows.sh +EXTRA_DIST += contrib/ci.inria.fr/job-1-check.sh +EXTRA_DIST += contrib/ci.inria.fr/job-1-check-windows.bat +EXTRA_DIST += contrib/gitlab/build.sh +EXTRA_DIST += contrib/gitlab/deploy.sh +EXTRA_DIST += contrib/gitlab/coverity.sh +EXTRA_DIST += contrib/gitlab/simgrid.sh +EXTRA_DIST += contrib/gitlab/chameleon.sh + +moduledir = ${libdir}/modules +module_DATA = packages/starpu-1.4 + +DISTCLEANFILES = STARPU-REVISION + +include ./make/starpu-subdirtests.mk + +ctags-local: + cd $(top_srcdir) ; $(CTAGS) -R -I LIST_TYPE + $(SED) -i $(top_srcdir)/tags -e '/^[^ ]* [^ ]* /d' -e '/^[^ ]*$$/d' + + +# Cyclomatic complexity reports. + +# The pmccabe tool, see . +PMCCABE = pmccabe + +VC_URL = "https://gitlab.inria.fr/starpu/starpu/-/blob/master/%FILENAME%" + +# Generate a cyclomatic complexity report. Note that examples and tests are +# excluded because they're not particularly relevant, and more importantly +# they all have a function called `main', which clobbers the report. +cyclomatic-complexity.html: + $(PMCCABE) \ + `find \( -name examples -o -name tests -o -path ./tools/dev/experimental \) -prune -o -name \*.c` \ + | sort -nr \ + | $(AWK) -f ${top_srcdir}/build-aux/pmccabe2html \ + -v lang=html -v name="$(PACKAGE_NAME)" \ + -v vcurl=$(VC_URL) \ + -v url="$(PACKAGE_URL)" \ + -v css=${top_srcdir}/build-aux/pmccabe.css \ + -v cut_dir=${top_srcdir}/ \ + > $@-tmp + mv $@-tmp $@ diff --git a/Makefile.in b/Makefile.in new file mode 100644 index 0000000..fb1c0ed --- /dev/null +++ b/Makefile.in @@ -0,0 +1,1554 @@ +# Makefile.in generated by automake 1.16.5 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2021 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +target_triplet = @target@ +@STARPU_USE_MIN_DGELS_TRUE@am__append_1 = min-dgels +@STARPU_BUILD_TESTS_TRUE@am__append_2 = tests +@STARPU_USE_MPI_TRUE@am__append_3 = mpi +@STARPU_BUBBLE_TRUE@am__append_4 = bubble +@STARPU_BUILD_EXAMPLES_TRUE@am__append_5 = examples +@STARPU_BUILD_SOCL_TRUE@am__append_6 = socl +@STARPU_BUILD_STARPUFFT_TRUE@am__append_7 = starpufft +@STARPU_BUILD_STARPURM_TRUE@am__append_8 = starpurm +@STARPU_OPENMP_LLVM_TRUE@am__append_9 = starpu_openmp_llvm +@STARPU_BUILD_STARPUPY_TRUE@@STARPU_USE_CPU_TRUE@am__append_10 = starpupy +@STARPU_BUILD_STARPUPY_TRUE@@STARPU_USE_CPU_FALSE@@STARPU_USE_MPI_MASTER_SLAVE_TRUE@am__append_11 = starpupy +@STARPU_BUILD_STARPUPY_TRUE@@STARPU_USE_CPU_FALSE@@STARPU_USE_MPI_MASTER_SLAVE_FALSE@@STARPU_USE_TCPIP_MASTER_SLAVE_TRUE@am__append_12 = starpupy +@STARPU_BUILD_SC_HYPERVISOR_TRUE@am__append_13 = sc_hypervisor +@STARPU_USE_JULIA_TRUE@am__append_14 = julia +@STARPU_BUILD_ECLIPSE_PLUGIN_TRUE@am__append_15 = eclipse-plugin +@STARPU_OPENMP_LLVM_TRUE@am__append_16 = \ +@STARPU_OPENMP_LLVM_TRUE@ include/omp.h + +subdir = . +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ + $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ + $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ + $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(top_srcdir)/configure \ + $(am__configure_deps) $(noinst_HEADERS) \ + $(am__versinclude_HEADERS_DIST) $(am__DIST_COMMON) +am__CONFIG_DISTCLEAN_FILES = config.status config.cache config.log \ + configure.lineno config.status.lineno +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/src/common/config.h \ + $(top_builddir)/src/common/config-src-build.h \ + $(top_builddir)/include/starpu_config.h \ + $(top_builddir)/starpurm/include/starpurm_config.h +CONFIG_CLEAN_FILES = tests/regression/regression.sh \ + tests/regression/profiles tests/regression/profiles.build.only \ + socl/vendors/socl.icd socl/vendors/install/socl.icd \ + packages/libstarpu.pc packages/starpu-1.0.pc \ + packages/starpu-1.1.pc packages/starpu-1.2.pc \ + packages/starpu-1.3.pc packages/starpu-1.4.pc \ + packages/starpu-1.3 packages/starpu-1.4 \ + mpi/packages/libstarpumpi.pc mpi/packages/starpumpi-1.0.pc \ + mpi/packages/starpumpi-1.1.pc mpi/packages/starpumpi-1.2.pc \ + mpi/packages/starpumpi-1.3.pc mpi/packages/starpumpi-1.4.pc \ + starpufft/packages/libstarpufft.pc \ + starpufft/packages/starpufft-1.0.pc \ + starpufft/packages/starpufft-1.1.pc \ + starpufft/packages/starpufft-1.2.pc \ + starpufft/packages/starpufft-1.3.pc \ + starpufft/packages/starpufft-1.4.pc \ + starpurm/packages/starpurm-1.3.pc \ + starpurm/packages/starpurm-1.4.pc tools/msvc/starpu_var.bat \ + min-dgels/Makefile eclipse-plugin/examples/hello/.cproject +CONFIG_CLEAN_VPATH_FILES = +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +SOURCES = +DIST_SOURCES = +RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \ + ctags-recursive dvi-recursive html-recursive info-recursive \ + install-data-recursive install-dvi-recursive \ + install-exec-recursive install-html-recursive \ + install-info-recursive install-pdf-recursive \ + install-ps-recursive install-recursive installcheck-recursive \ + installdirs-recursive pdf-recursive ps-recursive \ + tags-recursive uninstall-recursive +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__installdirs = "$(DESTDIR)$(moduledir)" "$(DESTDIR)$(pkgconfigdir)" \ + "$(DESTDIR)$(txtdir)" "$(DESTDIR)$(versincludedir)" \ + "$(DESTDIR)$(versincludedir)" +DATA = $(module_DATA) $(pkgconfig_DATA) $(txt_DATA) +am__versinclude_HEADERS_DIST = include/starpu.h \ + include/starpu_helper.h include/starpu_bitmap.h \ + include/starpu_data_filters.h include/starpu_data_interfaces.h \ + include/starpu_worker.h include/starpu_task.h \ + include/starpu_task_dep.h include/starpu_task_bundle.h \ + include/starpu_task_list.h include/starpu_task_util.h \ + include/starpu_data.h include/starpu_perfmodel.h \ + include/starpu_util.h include/starpu_fxt.h \ + include/starpu_cuda.h include/starpu_hip.h \ + include/starpu_opencl.h include/starpu_max_fpga.h \ + include/starpu_openmp.h include/starpu_sink.h \ + include/starpu_expert.h include/starpu_profiling.h \ + include/starpu_profiling_tool.h include/starpu_bound.h \ + include/starpu_scheduler.h \ + include/schedulers/starpu_heteroprio.h \ + include/starpu_sched_component.h include/starpu_sched_ctx.h \ + include/starpu_sched_ctx_hypervisor.h \ + include/starpu_deprecated_api.h include/starpu_hash.h \ + include/starpu_rand.h include/starpu_disk.h \ + include/starpu_cublas.h include/starpu_cublas_v2.h \ + include/starpu_cublasLt.h include/starpu_cusolver.h \ + include/starpu_cusparse.h include/starpu_hipblas.h \ + include/starpu_driver.h include/starpu_stdlib.h \ + include/starpu_thread.h include/starpu_thread_util.h \ + include/starpu_tree.h include/starpu_simgrid_wrap.h \ + include/starpu_mod.f90 include/fstarpu_mod.f90 \ + include/starpu_parallel_worker.h \ + include/starpu_perf_monitoring.h \ + include/starpu_perf_steering.h \ + include/schedulers/starpu_scheduler_toolbox.h include/omp.h +HEADERS = $(nodist_versinclude_HEADERS) $(noinst_HEADERS) \ + $(versinclude_HEADERS) +RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ + distclean-recursive maintainer-clean-recursive +am__recursive_targets = \ + $(RECURSIVE_TARGETS) \ + $(RECURSIVE_CLEAN_TARGETS) \ + $(am__extra_recursive_targets) +AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \ + cscope distdir distdir-am dist dist-all distcheck +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +DIST_SUBDIRS = min-dgels src tools tests doc mpi bubble examples socl \ + starpufft starpurm starpu_openmp_llvm starpupy sc_hypervisor \ + julia eclipse-plugin +am__DIST_COMMON = $(srcdir)/./make/starpu-subdirtests.mk \ + $(srcdir)/Makefile.in $(top_srcdir)/build-aux/ar-lib \ + $(top_srcdir)/build-aux/compile \ + $(top_srcdir)/build-aux/config.guess \ + $(top_srcdir)/build-aux/config.sub \ + $(top_srcdir)/build-aux/install-sh \ + $(top_srcdir)/build-aux/ltmain.sh \ + $(top_srcdir)/build-aux/missing \ + $(top_srcdir)/eclipse-plugin/examples/hello/.cproject.in \ + $(top_srcdir)/include/starpu_config.h.in \ + $(top_srcdir)/min-dgels/Makefile.in \ + $(top_srcdir)/mpi/packages/libstarpumpi.pc.in \ + $(top_srcdir)/mpi/packages/starpumpi-1.0.pc.in \ + $(top_srcdir)/mpi/packages/starpumpi-1.1.pc.in \ + $(top_srcdir)/mpi/packages/starpumpi-1.2.pc.in \ + $(top_srcdir)/mpi/packages/starpumpi-1.3.pc.in \ + $(top_srcdir)/mpi/packages/starpumpi-1.4.pc.in \ + $(top_srcdir)/packages/libstarpu.pc.in \ + $(top_srcdir)/packages/starpu-1.0.pc.in \ + $(top_srcdir)/packages/starpu-1.1.pc.in \ + $(top_srcdir)/packages/starpu-1.2.pc.in \ + $(top_srcdir)/packages/starpu-1.3.in \ + $(top_srcdir)/packages/starpu-1.3.pc.in \ + $(top_srcdir)/packages/starpu-1.4.in \ + $(top_srcdir)/packages/starpu-1.4.pc.in \ + $(top_srcdir)/socl/vendors/install/socl.icd.in \ + $(top_srcdir)/socl/vendors/socl.icd.in \ + $(top_srcdir)/src/common/config-src-build.h.in \ + $(top_srcdir)/src/common/config.h.in \ + $(top_srcdir)/starpufft/packages/libstarpufft.pc.in \ + $(top_srcdir)/starpufft/packages/starpufft-1.0.pc.in \ + $(top_srcdir)/starpufft/packages/starpufft-1.1.pc.in \ + $(top_srcdir)/starpufft/packages/starpufft-1.2.pc.in \ + $(top_srcdir)/starpufft/packages/starpufft-1.3.pc.in \ + $(top_srcdir)/starpufft/packages/starpufft-1.4.pc.in \ + $(top_srcdir)/starpurm/include/starpurm_config.h.in \ + $(top_srcdir)/starpurm/packages/starpurm-1.3.pc.in \ + $(top_srcdir)/starpurm/packages/starpurm-1.4.pc.in \ + $(top_srcdir)/tests/regression/profiles.build.only.in \ + $(top_srcdir)/tests/regression/profiles.in \ + $(top_srcdir)/tests/regression/regression.sh.in \ + $(top_srcdir)/tools/msvc/starpu_var.bat.in AUTHORS ChangeLog \ + INSTALL README.md TODO build-aux/ar-lib build-aux/compile \ + build-aux/config.guess build-aux/config.sub \ + build-aux/install-sh build-aux/ltmain.sh build-aux/missing +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +distdir = $(PACKAGE)-$(VERSION) +top_distdir = $(distdir) +am__remove_distdir = \ + if test -d "$(distdir)"; then \ + find "$(distdir)" -type d ! -perm -200 -exec chmod u+w {} ';' \ + && rm -rf "$(distdir)" \ + || { sleep 5 && rm -rf "$(distdir)"; }; \ + else :; fi +am__post_remove_distdir = $(am__remove_distdir) +am__relativize = \ + dir0=`pwd`; \ + sed_first='s,^\([^/]*\)/.*$$,\1,'; \ + sed_rest='s,^[^/]*/*,,'; \ + sed_last='s,^.*/\([^/]*\)$$,\1,'; \ + sed_butlast='s,/*[^/]*$$,,'; \ + while test -n "$$dir1"; do \ + first=`echo "$$dir1" | sed -e "$$sed_first"`; \ + if test "$$first" != "."; then \ + if test "$$first" = ".."; then \ + dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ + dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ + else \ + first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ + if test "$$first2" = "$$first"; then \ + dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ + else \ + dir2="../$$dir2"; \ + fi; \ + dir0="$$dir0"/"$$first"; \ + fi; \ + fi; \ + dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ + done; \ + reldir="$$dir2" +DIST_ARCHIVES = $(distdir).tar.gz +GZIP_ENV = --best +DIST_TARGETS = dist-gzip +# Exists only to be overridden by the user if desired. +AM_DISTCHECK_DVI_TARGET = dvi +distuninstallcheck_listfiles = find . -type f -print +am__distuninstallcheck_listfiles = $(distuninstallcheck_listfiles) \ + | sed 's|^\./|$(prefix)/|' | grep -v '$(infodir)/dir$$' +distcleancheck_listfiles = find . -type f -print +pkglibdir = @pkglibdir@ +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +APP_CFLAGS = @APP_CFLAGS@ +APP_CXXFLAGS = @APP_CXXFLAGS@ +APP_FCFLAGS = @APP_FCFLAGS@ +APP_FFLAGS = @APP_FFLAGS@ +AR = @AR@ +AS = @AS@ +ATLASDIR = @ATLASDIR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BLAS_LIB = @BLAS_LIB@ +BLAS_LIBS = @BLAS_LIBS@ +BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ +BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CC_OR_MPICC = @CC_OR_MPICC@ +CC_OR_NVCC = @CC_OR_NVCC@ +CFLAGS = @CFLAGS@ +COVERAGE = @COVERAGE@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CSCOPE = @CSCOPE@ +CTAGS = @CTAGS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DGELS_LIBS = @DGELS_LIBS@ +DLB_CFLAGS = @DLB_CFLAGS@ +DLB_LIBS = @DLB_LIBS@ +DLLTOOL = @DLLTOOL@ +DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +ECLIPSE = @ECLIPSE@ +EGREP = @EGREP@ +ETAGS = @ETAGS@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FC = @FC@ +FCFLAGS = @FCFLAGS@ +FFLAGS = @FFLAGS@ +FFTWF_CFLAGS = @FFTWF_CFLAGS@ +FFTWF_LIBS = @FFTWF_LIBS@ +FFTWL_CFLAGS = @FFTWL_CFLAGS@ +FFTWL_LIBS = @FFTWL_LIBS@ +FFTW_CFLAGS = @FFTW_CFLAGS@ +FFTW_LIBS = @FFTW_LIBS@ +FGREP = @FGREP@ +FILECMD = @FILECMD@ +FXTDIR = @FXTDIR@ +FXT_CFLAGS = @FXT_CFLAGS@ +FXT_LDFLAGS = @FXT_LDFLAGS@ +FXT_LIBS = @FXT_LIBS@ +GDB = @GDB@ +GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ +GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ +GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ +GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ +GOTODIR = @GOTODIR@ +GREP = @GREP@ +HAVE_CXX11 = @HAVE_CXX11@ +HAVE_FFTWFL = @HAVE_FFTWFL@ +HELP2MAN = @HELP2MAN@ +HIPCC = @HIPCC@ +HIPCCFLAGS = @HIPCCFLAGS@ +HIPCONFIG = @HIPCONFIG@ +HWLOC_CFLAGS = @HWLOC_CFLAGS@ +HWLOC_LIBS = @HWLOC_LIBS@ +HWLOC_REQUIRES = @HWLOC_REQUIRES@ +ICC = @ICC@ +ICC_ARGS = @ICC_ARGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JULIA = @JULIA@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ +LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ +LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ +LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ +LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ +LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ +LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ +LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ +LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ +LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ +LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ +LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ +LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ +LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ +LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ +LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ +LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ +LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ +LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ +LIBSTARPU_LINK = @LIBSTARPU_LINK@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAGMA_CFLAGS = @MAGMA_CFLAGS@ +MAGMA_LIBS = @MAGMA_LIBS@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPICC_LDFLAGS = @MPICC_LDFLAGS@ +MPICXX = @MPICXX@ +MPIEXEC = @MPIEXEC@ +MPIEXEC_ARGS = @MPIEXEC_ARGS@ +MPIFORT = @MPIFORT@ +MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ +MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ +NM = @NM@ +NMAD_CFLAGS = @NMAD_CFLAGS@ +NMAD_LIBS = @NMAD_LIBS@ +NMEDIT = @NMEDIT@ +NVCC = @NVCC@ +NVCCFLAGS = @NVCCFLAGS@ +NVCC_CC = @NVCC_CC@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ +OPENBLAS_LIBS = @OPENBLAS_LIBS@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PAPI_CFLAGS = @PAPI_CFLAGS@ +PAPI_LIBS = @PAPI_LIBS@ +PARALLEL = @PARALLEL@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PKG_CONFIG = @PKG_CONFIG@ +POTI_CFLAGS = @POTI_CFLAGS@ +POTI_LIBS = @POTI_LIBS@ +PROG_CLANG = @PROG_CLANG@ +PROG_DATE = @PROG_DATE@ +PROG_FIND = @PROG_FIND@ +PROG_STAT = @PROG_STAT@ +PYTHON = @PYTHON@ +PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ +PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ +PYTHON_VERSION = @PYTHON_VERSION@ +RANLIB = @RANLIB@ +REALBASH = @REALBASH@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ +SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ +SIMGRID_LIBS = @SIMGRID_LIBS@ +SIMGRID_MC = @SIMGRID_MC@ +SLIC_CONFIG = @SLIC_CONFIG@ +SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ +SOCL_VENDORS = @SOCL_VENDORS@ +STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ +STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ +STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ +STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ +STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ +STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ +STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ +STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ +STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ +STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ +STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ +STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ +STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ +STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ +STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ +STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ +STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ +STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ +STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ +STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ +STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ +STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ +STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ +STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ +STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ +STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ +STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ +STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ +STARPU_LIB_PATH = @STARPU_LIB_PATH@ +STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ +STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ +STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ +STARPU_MS_LIB = @STARPU_MS_LIB@ +STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ +STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ +STARPU_OPENBLAS = @STARPU_OPENBLAS@ +STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ +STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ +STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ +STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ +STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ +STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ +STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ +STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ +STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ +STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ +STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ +STARPU_SRC_DIR = @STARPU_SRC_DIR@ +STARPU_USE_CPU = @STARPU_USE_CPU@ +STARPU_USE_CUDA = @STARPU_USE_CUDA@ +STARPU_USE_FXT = @STARPU_USE_FXT@ +STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ +STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ +STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ +STRIP = @STRIP@ +VERSION = @VERSION@ +XMKMF = @XMKMF@ +X_CFLAGS = @X_CFLAGS@ +X_EXTRA_LIBS = @X_EXTRA_LIBS@ +X_LIBS = @X_LIBS@ +X_PRE_LIBS = @X_PRE_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_ct_F77 = @ac_ct_F77@ +ac_ct_FC = @ac_ct_FC@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +doxygencommand = @doxygencommand@ +dvidir = @dvidir@ +eclipsepath = @eclipsepath@ +epstopdfcommand = @epstopdfcommand@ +exec_prefix = @exec_prefix@ +gitcommand = @gitcommand@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +hwloccalccommand = @hwloccalccommand@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +juliapath = @juliapath@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +mpicc_path = @mpicc_path@ +mpicxx_path = @mpicxx_path@ +mpiexec_path = @mpiexec_path@ +mpifort_path = @mpifort_path@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +pdflatexcommand = @pdflatexcommand@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +runstatedir = @runstatedir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target = @target@ +target_alias = @target_alias@ +target_cpu = @target_cpu@ +target_os = @target_os@ +target_vendor = @target_vendor@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# Copyright (C) 2017-2017 Guillaume Beauchamp +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +ACLOCAL_AMFLAGS = -I m4 +CLEANFILES = *.gcno *.gcda *.linkinfo +SUBDIRS = $(am__append_1) src tools $(am__append_2) doc \ + $(am__append_3) $(am__append_4) $(am__append_5) \ + $(am__append_6) $(am__append_7) $(am__append_8) \ + $(am__append_9) $(am__append_10) $(am__append_11) \ + $(am__append_12) $(am__append_13) $(am__append_14) \ + $(am__append_15) +pkgconfigdir = $(libdir)/pkgconfig +pkgconfig_DATA = packages/libstarpu.pc packages/starpu-1.0.pc packages/starpu-1.1.pc packages/starpu-1.2.pc packages/starpu-1.3.pc packages/starpu-1.4.pc +versincludedir = $(includedir)/starpu/$(STARPU_EFFECTIVE_VERSION) +versinclude_HEADERS = include/starpu.h include/starpu_helper.h \ + include/starpu_bitmap.h include/starpu_data_filters.h \ + include/starpu_data_interfaces.h include/starpu_worker.h \ + include/starpu_task.h include/starpu_task_dep.h \ + include/starpu_task_bundle.h include/starpu_task_list.h \ + include/starpu_task_util.h include/starpu_data.h \ + include/starpu_perfmodel.h include/starpu_util.h \ + include/starpu_fxt.h include/starpu_cuda.h \ + include/starpu_hip.h include/starpu_opencl.h \ + include/starpu_max_fpga.h include/starpu_openmp.h \ + include/starpu_sink.h include/starpu_expert.h \ + include/starpu_profiling.h include/starpu_profiling_tool.h \ + include/starpu_bound.h include/starpu_scheduler.h \ + include/schedulers/starpu_heteroprio.h \ + include/starpu_sched_component.h include/starpu_sched_ctx.h \ + include/starpu_sched_ctx_hypervisor.h \ + include/starpu_deprecated_api.h include/starpu_hash.h \ + include/starpu_rand.h include/starpu_disk.h \ + include/starpu_cublas.h include/starpu_cublas_v2.h \ + include/starpu_cublasLt.h include/starpu_cusolver.h \ + include/starpu_cusparse.h include/starpu_hipblas.h \ + include/starpu_driver.h include/starpu_stdlib.h \ + include/starpu_thread.h include/starpu_thread_util.h \ + include/starpu_tree.h include/starpu_simgrid_wrap.h \ + include/starpu_mod.f90 include/fstarpu_mod.f90 \ + include/starpu_parallel_worker.h \ + include/starpu_perf_monitoring.h \ + include/starpu_perf_steering.h \ + include/schedulers/starpu_scheduler_toolbox.h $(am__append_16) +nodist_versinclude_HEADERS = \ + include/starpu_config.h + +noinst_HEADERS = \ + include/pthread_win32/pthread.h \ + include/pthread_win32/semaphore.h + +@STARPU_HAVE_WINDOWS_FALSE@txtdir = ${docdir} +@STARPU_HAVE_WINDOWS_TRUE@txtdir = ${prefix} +txt_DATA = AUTHORS COPYING.LGPL README.md README.dev STARPU-REVISION +EXTRA_DIST = autogen.sh AUTHORS COPYING.LGPL README.md README.dev \ + STARPU-VERSION STARPU-REVISION .gitlab-ci.yml \ + contrib/ci.inria.fr/Jenkinsfile-basic \ + contrib/ci.inria.fr/Jenkinsfile-bsd \ + contrib/ci.inria.fr/Jenkinsfile-windows \ + contrib/ci.inria.fr/job-0-tarball.sh \ + contrib/ci.inria.fr/job-1-build-windows.sh \ + contrib/ci.inria.fr/job-1-check.sh \ + contrib/ci.inria.fr/job-1-check-windows.bat \ + contrib/gitlab/build.sh contrib/gitlab/deploy.sh \ + contrib/gitlab/coverity.sh contrib/gitlab/simgrid.sh \ + contrib/gitlab/chameleon.sh +moduledir = ${libdir}/modules +module_DATA = packages/starpu-1.4 +DISTCLEANFILES = STARPU-REVISION + +# Cyclomatic complexity reports. + +# The pmccabe tool, see . +PMCCABE = pmccabe +VC_URL = "https://gitlab.inria.fr/starpu/starpu/-/blob/master/%FILENAME%" +all: all-recursive + +.SUFFIXES: +am--refresh: Makefile + @: +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(srcdir)/./make/starpu-subdirtests.mk $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + echo ' cd $(srcdir) && $(AUTOMAKE) --foreign'; \ + $(am__cd) $(srcdir) && $(AUTOMAKE) --foreign \ + && exit 0; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + echo ' $(SHELL) ./config.status'; \ + $(SHELL) ./config.status;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $@ $(am__maybe_remake_depfiles);; \ + esac; +$(srcdir)/./make/starpu-subdirtests.mk $(am__empty): + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + $(SHELL) ./config.status --recheck + +$(top_srcdir)/configure: $(am__configure_deps) + $(am__cd) $(srcdir) && $(AUTOCONF) +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + $(am__cd) $(srcdir) && $(ACLOCAL) $(ACLOCAL_AMFLAGS) +$(am__aclocal_m4_deps): + +src/common/config.h: src/common/stamp-h1 + @test -f $@ || rm -f src/common/stamp-h1 + @test -f $@ || $(MAKE) $(AM_MAKEFLAGS) src/common/stamp-h1 + +src/common/stamp-h1: $(top_srcdir)/src/common/config.h.in $(top_builddir)/config.status + @rm -f src/common/stamp-h1 + cd $(top_builddir) && $(SHELL) ./config.status src/common/config.h +$(top_srcdir)/src/common/config.h.in: $(am__configure_deps) + ($(am__cd) $(top_srcdir) && $(AUTOHEADER)) + rm -f src/common/stamp-h1 + touch $@ + +src/common/config-src-build.h: src/common/stamp-h2 + @test -f $@ || rm -f src/common/stamp-h2 + @test -f $@ || $(MAKE) $(AM_MAKEFLAGS) src/common/stamp-h2 + +src/common/stamp-h2: $(top_srcdir)/src/common/config-src-build.h.in $(top_builddir)/config.status + @rm -f src/common/stamp-h2 + cd $(top_builddir) && $(SHELL) ./config.status src/common/config-src-build.h + +include/starpu_config.h: include/stamp-h3 + @test -f $@ || rm -f include/stamp-h3 + @test -f $@ || $(MAKE) $(AM_MAKEFLAGS) include/stamp-h3 + +include/stamp-h3: $(top_srcdir)/include/starpu_config.h.in $(top_builddir)/config.status + @rm -f include/stamp-h3 + cd $(top_builddir) && $(SHELL) ./config.status include/starpu_config.h + +starpurm/include/starpurm_config.h: starpurm/include/stamp-h4 + @test -f $@ || rm -f starpurm/include/stamp-h4 + @test -f $@ || $(MAKE) $(AM_MAKEFLAGS) starpurm/include/stamp-h4 + +starpurm/include/stamp-h4: $(top_srcdir)/starpurm/include/starpurm_config.h.in $(top_builddir)/config.status + @rm -f starpurm/include/stamp-h4 + cd $(top_builddir) && $(SHELL) ./config.status starpurm/include/starpurm_config.h + +distclean-hdr: + -rm -f src/common/config.h src/common/stamp-h1 src/common/config-src-build.h src/common/stamp-h2 include/starpu_config.h include/stamp-h3 starpurm/include/starpurm_config.h starpurm/include/stamp-h4 +tests/regression/regression.sh: $(top_builddir)/config.status $(top_srcdir)/tests/regression/regression.sh.in + cd $(top_builddir) && $(SHELL) ./config.status $@ +tests/regression/profiles: $(top_builddir)/config.status $(top_srcdir)/tests/regression/profiles.in + cd $(top_builddir) && $(SHELL) ./config.status $@ +tests/regression/profiles.build.only: $(top_builddir)/config.status $(top_srcdir)/tests/regression/profiles.build.only.in + cd $(top_builddir) && $(SHELL) ./config.status $@ +socl/vendors/socl.icd: $(top_builddir)/config.status $(top_srcdir)/socl/vendors/socl.icd.in + cd $(top_builddir) && $(SHELL) ./config.status $@ +socl/vendors/install/socl.icd: $(top_builddir)/config.status $(top_srcdir)/socl/vendors/install/socl.icd.in + cd $(top_builddir) && $(SHELL) ./config.status $@ +packages/libstarpu.pc: $(top_builddir)/config.status $(top_srcdir)/packages/libstarpu.pc.in + cd $(top_builddir) && $(SHELL) ./config.status $@ +packages/starpu-1.0.pc: $(top_builddir)/config.status $(top_srcdir)/packages/starpu-1.0.pc.in + cd $(top_builddir) && $(SHELL) ./config.status $@ +packages/starpu-1.1.pc: $(top_builddir)/config.status $(top_srcdir)/packages/starpu-1.1.pc.in + cd $(top_builddir) && $(SHELL) ./config.status $@ +packages/starpu-1.2.pc: $(top_builddir)/config.status $(top_srcdir)/packages/starpu-1.2.pc.in + cd $(top_builddir) && $(SHELL) ./config.status $@ +packages/starpu-1.3.pc: $(top_builddir)/config.status $(top_srcdir)/packages/starpu-1.3.pc.in + cd $(top_builddir) && $(SHELL) ./config.status $@ +packages/starpu-1.4.pc: $(top_builddir)/config.status $(top_srcdir)/packages/starpu-1.4.pc.in + cd $(top_builddir) && $(SHELL) ./config.status $@ +packages/starpu-1.3: $(top_builddir)/config.status $(top_srcdir)/packages/starpu-1.3.in + cd $(top_builddir) && $(SHELL) ./config.status $@ +packages/starpu-1.4: $(top_builddir)/config.status $(top_srcdir)/packages/starpu-1.4.in + cd $(top_builddir) && $(SHELL) ./config.status $@ +mpi/packages/libstarpumpi.pc: $(top_builddir)/config.status $(top_srcdir)/mpi/packages/libstarpumpi.pc.in + cd $(top_builddir) && $(SHELL) ./config.status $@ +mpi/packages/starpumpi-1.0.pc: $(top_builddir)/config.status $(top_srcdir)/mpi/packages/starpumpi-1.0.pc.in + cd $(top_builddir) && $(SHELL) ./config.status $@ +mpi/packages/starpumpi-1.1.pc: $(top_builddir)/config.status $(top_srcdir)/mpi/packages/starpumpi-1.1.pc.in + cd $(top_builddir) && $(SHELL) ./config.status $@ +mpi/packages/starpumpi-1.2.pc: $(top_builddir)/config.status $(top_srcdir)/mpi/packages/starpumpi-1.2.pc.in + cd $(top_builddir) && $(SHELL) ./config.status $@ +mpi/packages/starpumpi-1.3.pc: $(top_builddir)/config.status $(top_srcdir)/mpi/packages/starpumpi-1.3.pc.in + cd $(top_builddir) && $(SHELL) ./config.status $@ +mpi/packages/starpumpi-1.4.pc: $(top_builddir)/config.status $(top_srcdir)/mpi/packages/starpumpi-1.4.pc.in + cd $(top_builddir) && $(SHELL) ./config.status $@ +starpufft/packages/libstarpufft.pc: $(top_builddir)/config.status $(top_srcdir)/starpufft/packages/libstarpufft.pc.in + cd $(top_builddir) && $(SHELL) ./config.status $@ +starpufft/packages/starpufft-1.0.pc: $(top_builddir)/config.status $(top_srcdir)/starpufft/packages/starpufft-1.0.pc.in + cd $(top_builddir) && $(SHELL) ./config.status $@ +starpufft/packages/starpufft-1.1.pc: $(top_builddir)/config.status $(top_srcdir)/starpufft/packages/starpufft-1.1.pc.in + cd $(top_builddir) && $(SHELL) ./config.status $@ +starpufft/packages/starpufft-1.2.pc: $(top_builddir)/config.status $(top_srcdir)/starpufft/packages/starpufft-1.2.pc.in + cd $(top_builddir) && $(SHELL) ./config.status $@ +starpufft/packages/starpufft-1.3.pc: $(top_builddir)/config.status $(top_srcdir)/starpufft/packages/starpufft-1.3.pc.in + cd $(top_builddir) && $(SHELL) ./config.status $@ +starpufft/packages/starpufft-1.4.pc: $(top_builddir)/config.status $(top_srcdir)/starpufft/packages/starpufft-1.4.pc.in + cd $(top_builddir) && $(SHELL) ./config.status $@ +starpurm/packages/starpurm-1.3.pc: $(top_builddir)/config.status $(top_srcdir)/starpurm/packages/starpurm-1.3.pc.in + cd $(top_builddir) && $(SHELL) ./config.status $@ +starpurm/packages/starpurm-1.4.pc: $(top_builddir)/config.status $(top_srcdir)/starpurm/packages/starpurm-1.4.pc.in + cd $(top_builddir) && $(SHELL) ./config.status $@ +tools/msvc/starpu_var.bat: $(top_builddir)/config.status $(top_srcdir)/tools/msvc/starpu_var.bat.in + cd $(top_builddir) && $(SHELL) ./config.status $@ +min-dgels/Makefile: $(top_builddir)/config.status $(top_srcdir)/min-dgels/Makefile.in + cd $(top_builddir) && $(SHELL) ./config.status $@ +eclipse-plugin/examples/hello/.cproject: $(top_builddir)/config.status $(top_srcdir)/eclipse-plugin/examples/hello/.cproject.in + cd $(top_builddir) && $(SHELL) ./config.status $@ + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +distclean-libtool: + -rm -f libtool config.lt +install-moduleDATA: $(module_DATA) + @$(NORMAL_INSTALL) + @list='$(module_DATA)'; test -n "$(moduledir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(moduledir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(moduledir)" || exit 1; \ + fi; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; \ + done | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(moduledir)'"; \ + $(INSTALL_DATA) $$files "$(DESTDIR)$(moduledir)" || exit $$?; \ + done + +uninstall-moduleDATA: + @$(NORMAL_UNINSTALL) + @list='$(module_DATA)'; test -n "$(moduledir)" || list=; \ + files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ + dir='$(DESTDIR)$(moduledir)'; $(am__uninstall_files_from_dir) +install-pkgconfigDATA: $(pkgconfig_DATA) + @$(NORMAL_INSTALL) + @list='$(pkgconfig_DATA)'; test -n "$(pkgconfigdir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(pkgconfigdir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(pkgconfigdir)" || exit 1; \ + fi; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; \ + done | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(pkgconfigdir)'"; \ + $(INSTALL_DATA) $$files "$(DESTDIR)$(pkgconfigdir)" || exit $$?; \ + done + +uninstall-pkgconfigDATA: + @$(NORMAL_UNINSTALL) + @list='$(pkgconfig_DATA)'; test -n "$(pkgconfigdir)" || list=; \ + files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ + dir='$(DESTDIR)$(pkgconfigdir)'; $(am__uninstall_files_from_dir) +install-txtDATA: $(txt_DATA) + @$(NORMAL_INSTALL) + @list='$(txt_DATA)'; test -n "$(txtdir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(txtdir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(txtdir)" || exit 1; \ + fi; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; \ + done | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(txtdir)'"; \ + $(INSTALL_DATA) $$files "$(DESTDIR)$(txtdir)" || exit $$?; \ + done + +uninstall-txtDATA: + @$(NORMAL_UNINSTALL) + @list='$(txt_DATA)'; test -n "$(txtdir)" || list=; \ + files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ + dir='$(DESTDIR)$(txtdir)'; $(am__uninstall_files_from_dir) +install-nodist_versincludeHEADERS: $(nodist_versinclude_HEADERS) + @$(NORMAL_INSTALL) + @list='$(nodist_versinclude_HEADERS)'; test -n "$(versincludedir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(versincludedir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(versincludedir)" || exit 1; \ + fi; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; \ + done | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_HEADER) $$files '$(DESTDIR)$(versincludedir)'"; \ + $(INSTALL_HEADER) $$files "$(DESTDIR)$(versincludedir)" || exit $$?; \ + done + +uninstall-nodist_versincludeHEADERS: + @$(NORMAL_UNINSTALL) + @list='$(nodist_versinclude_HEADERS)'; test -n "$(versincludedir)" || list=; \ + files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ + dir='$(DESTDIR)$(versincludedir)'; $(am__uninstall_files_from_dir) +install-versincludeHEADERS: $(versinclude_HEADERS) + @$(NORMAL_INSTALL) + @list='$(versinclude_HEADERS)'; test -n "$(versincludedir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(versincludedir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(versincludedir)" || exit 1; \ + fi; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; \ + done | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_HEADER) $$files '$(DESTDIR)$(versincludedir)'"; \ + $(INSTALL_HEADER) $$files "$(DESTDIR)$(versincludedir)" || exit $$?; \ + done + +uninstall-versincludeHEADERS: + @$(NORMAL_UNINSTALL) + @list='$(versinclude_HEADERS)'; test -n "$(versincludedir)" || list=; \ + files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ + dir='$(DESTDIR)$(versincludedir)'; $(am__uninstall_files_from_dir) + +# This directory's subdirectories are mostly independent; you can cd +# into them and run 'make' without going through this Makefile. +# To change the values of 'make' variables: instead of editing Makefiles, +# (1) if the variable is set in 'config.status', edit 'config.status' +# (which will cause the Makefiles to be regenerated when you run 'make'); +# (2) otherwise, pass the desired values on the 'make' command line. +$(am__recursive_targets): + @fail=; \ + if $(am__make_keepgoing); then \ + failcom='fail=yes'; \ + else \ + failcom='exit 1'; \ + fi; \ + dot_seen=no; \ + target=`echo $@ | sed s/-recursive//`; \ + case "$@" in \ + distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ + *) list='$(SUBDIRS)' ;; \ + esac; \ + for subdir in $$list; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + dot_seen=yes; \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || eval $$failcom; \ + done; \ + if test "$$dot_seen" = "no"; then \ + $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ + fi; test -z "$$fail" + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-recursive +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ + include_option=--etags-include; \ + empty_fix=.; \ + else \ + include_option=--include; \ + empty_fix=; \ + fi; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + test ! -f $$subdir/TAGS || \ + set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ + fi; \ + done; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-recursive + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscope: cscope.files + test ! -s cscope.files \ + || $(CSCOPE) -b -q $(AM_CSCOPEFLAGS) $(CSCOPEFLAGS) -i cscope.files $(CSCOPE_ARGS) +clean-cscope: + -rm -f cscope.files +cscope.files: clean-cscope cscopelist +cscopelist: cscopelist-recursive + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + -rm -f cscope.out cscope.in.out cscope.po.out cscope.files +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + $(am__remove_distdir) + test -d "$(distdir)" || mkdir "$(distdir)" + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done + @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + $(am__make_dryrun) \ + || test -d "$(distdir)/$$subdir" \ + || $(MKDIR_P) "$(distdir)/$$subdir" \ + || exit 1; \ + dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ + $(am__relativize); \ + new_distdir=$$reldir; \ + dir1=$$subdir; dir2="$(top_distdir)"; \ + $(am__relativize); \ + new_top_distdir=$$reldir; \ + echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ + echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ + ($(am__cd) $$subdir && \ + $(MAKE) $(AM_MAKEFLAGS) \ + top_distdir="$$new_top_distdir" \ + distdir="$$new_distdir" \ + am__remove_distdir=: \ + am__skip_length_check=: \ + am__skip_mode_fix=: \ + distdir) \ + || exit 1; \ + fi; \ + done + -test -n "$(am__skip_mode_fix)" \ + || find "$(distdir)" -type d ! -perm -755 \ + -exec chmod u+rwx,go+rx {} \; -o \ + ! -type d ! -perm -444 -links 1 -exec chmod a+r {} \; -o \ + ! -type d ! -perm -400 -exec chmod a+r {} \; -o \ + ! -type d ! -perm -444 -exec $(install_sh) -c -m a+r {} {} \; \ + || chmod -R a+r "$(distdir)" +dist-gzip: distdir + tardir=$(distdir) && $(am__tar) | eval GZIP= gzip $(GZIP_ENV) -c >$(distdir).tar.gz + $(am__post_remove_distdir) + +dist-bzip2: distdir + tardir=$(distdir) && $(am__tar) | BZIP2=$${BZIP2--9} bzip2 -c >$(distdir).tar.bz2 + $(am__post_remove_distdir) + +dist-lzip: distdir + tardir=$(distdir) && $(am__tar) | lzip -c $${LZIP_OPT--9} >$(distdir).tar.lz + $(am__post_remove_distdir) + +dist-xz: distdir + tardir=$(distdir) && $(am__tar) | XZ_OPT=$${XZ_OPT--e} xz -c >$(distdir).tar.xz + $(am__post_remove_distdir) + +dist-zstd: distdir + tardir=$(distdir) && $(am__tar) | zstd -c $${ZSTD_CLEVEL-$${ZSTD_OPT--19}} >$(distdir).tar.zst + $(am__post_remove_distdir) + +dist-tarZ: distdir + @echo WARNING: "Support for distribution archives compressed with" \ + "legacy program 'compress' is deprecated." >&2 + @echo WARNING: "It will be removed altogether in Automake 2.0" >&2 + tardir=$(distdir) && $(am__tar) | compress -c >$(distdir).tar.Z + $(am__post_remove_distdir) + +dist-shar: distdir + @echo WARNING: "Support for shar distribution archives is" \ + "deprecated." >&2 + @echo WARNING: "It will be removed altogether in Automake 2.0" >&2 + shar $(distdir) | eval GZIP= gzip $(GZIP_ENV) -c >$(distdir).shar.gz + $(am__post_remove_distdir) + +dist-zip: distdir + -rm -f $(distdir).zip + zip -rq $(distdir).zip $(distdir) + $(am__post_remove_distdir) + +dist dist-all: + $(MAKE) $(AM_MAKEFLAGS) $(DIST_TARGETS) am__post_remove_distdir='@:' + $(am__post_remove_distdir) + +# This target untars the dist file and tries a VPATH configuration. Then +# it guarantees that the distribution is self-contained by making another +# tarfile. +distcheck: dist + case '$(DIST_ARCHIVES)' in \ + *.tar.gz*) \ + eval GZIP= gzip $(GZIP_ENV) -dc $(distdir).tar.gz | $(am__untar) ;;\ + *.tar.bz2*) \ + bzip2 -dc $(distdir).tar.bz2 | $(am__untar) ;;\ + *.tar.lz*) \ + lzip -dc $(distdir).tar.lz | $(am__untar) ;;\ + *.tar.xz*) \ + xz -dc $(distdir).tar.xz | $(am__untar) ;;\ + *.tar.Z*) \ + uncompress -c $(distdir).tar.Z | $(am__untar) ;;\ + *.shar.gz*) \ + eval GZIP= gzip $(GZIP_ENV) -dc $(distdir).shar.gz | unshar ;;\ + *.zip*) \ + unzip $(distdir).zip ;;\ + *.tar.zst*) \ + zstd -dc $(distdir).tar.zst | $(am__untar) ;;\ + esac + chmod -R a-w $(distdir) + chmod u+w $(distdir) + mkdir $(distdir)/_build $(distdir)/_build/sub $(distdir)/_inst + chmod a-w $(distdir) + test -d $(distdir)/_build || exit 0; \ + dc_install_base=`$(am__cd) $(distdir)/_inst && pwd | sed -e 's,^[^:\\/]:[\\/],/,'` \ + && dc_destdir="$${TMPDIR-/tmp}/am-dc-$$$$/" \ + && am__cwd=`pwd` \ + && $(am__cd) $(distdir)/_build/sub \ + && ../../configure \ + $(AM_DISTCHECK_CONFIGURE_FLAGS) \ + $(DISTCHECK_CONFIGURE_FLAGS) \ + --srcdir=../.. --prefix="$$dc_install_base" \ + && $(MAKE) $(AM_MAKEFLAGS) \ + && $(MAKE) $(AM_MAKEFLAGS) $(AM_DISTCHECK_DVI_TARGET) \ + && $(MAKE) $(AM_MAKEFLAGS) check \ + && $(MAKE) $(AM_MAKEFLAGS) install \ + && $(MAKE) $(AM_MAKEFLAGS) installcheck \ + && $(MAKE) $(AM_MAKEFLAGS) uninstall \ + && $(MAKE) $(AM_MAKEFLAGS) distuninstallcheck_dir="$$dc_install_base" \ + distuninstallcheck \ + && chmod -R a-w "$$dc_install_base" \ + && ({ \ + (cd ../.. && umask 077 && mkdir "$$dc_destdir") \ + && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" install \ + && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" uninstall \ + && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" \ + distuninstallcheck_dir="$$dc_destdir" distuninstallcheck; \ + } || { rm -rf "$$dc_destdir"; exit 1; }) \ + && rm -rf "$$dc_destdir" \ + && $(MAKE) $(AM_MAKEFLAGS) dist \ + && rm -rf $(DIST_ARCHIVES) \ + && $(MAKE) $(AM_MAKEFLAGS) distcleancheck \ + && cd "$$am__cwd" \ + || exit 1 + $(am__post_remove_distdir) + @(echo "$(distdir) archives ready for distribution: "; \ + list='$(DIST_ARCHIVES)'; for i in $$list; do echo $$i; done) | \ + sed -e 1h -e 1s/./=/g -e 1p -e 1x -e '$$p' -e '$$x' +distuninstallcheck: + @test -n '$(distuninstallcheck_dir)' || { \ + echo 'ERROR: trying to run $@ with an empty' \ + '$$(distuninstallcheck_dir)' >&2; \ + exit 1; \ + }; \ + $(am__cd) '$(distuninstallcheck_dir)' || { \ + echo 'ERROR: cannot chdir into $(distuninstallcheck_dir)' >&2; \ + exit 1; \ + }; \ + test `$(am__distuninstallcheck_listfiles) | wc -l` -eq 0 \ + || { echo "ERROR: files left after uninstall:" ; \ + if test -n "$(DESTDIR)"; then \ + echo " (check DESTDIR support)"; \ + fi ; \ + $(distuninstallcheck_listfiles) ; \ + exit 1; } >&2 +distcleancheck: distclean + @if test '$(srcdir)' = . ; then \ + echo "ERROR: distcleancheck can only run from a VPATH build" ; \ + exit 1 ; \ + fi + @test `$(distcleancheck_listfiles) | wc -l` -eq 0 \ + || { echo "ERROR: files left in build directory after distclean:" ; \ + $(distcleancheck_listfiles) ; \ + exit 1; } >&2 +check-am: all-am +check: check-recursive +@STARPU_DEVEL_FALSE@all-local: +all-am: Makefile $(DATA) $(HEADERS) all-local +installdirs: installdirs-recursive +installdirs-am: + for dir in "$(DESTDIR)$(moduledir)" "$(DESTDIR)$(pkgconfigdir)" "$(DESTDIR)$(txtdir)" "$(DESTDIR)$(versincludedir)" "$(DESTDIR)$(versincludedir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-recursive +install-exec: install-exec-recursive +install-data: install-data-recursive +uninstall: uninstall-recursive + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-recursive +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + -test -z "$(DISTCLEANFILES)" || rm -f $(DISTCLEANFILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-recursive + +clean-am: clean-generic clean-libtool mostlyclean-am + +distclean: distclean-recursive + -rm -f $(am__CONFIG_DISTCLEAN_FILES) + -rm -f Makefile +distclean-am: clean-am distclean-generic distclean-hdr \ + distclean-libtool distclean-tags + +dvi: dvi-recursive + +dvi-am: + +html: html-recursive + +html-am: + +info: info-recursive + +info-am: + +install-data-am: install-moduleDATA install-nodist_versincludeHEADERS \ + install-pkgconfigDATA install-txtDATA \ + install-versincludeHEADERS + +install-dvi: install-dvi-recursive + +install-dvi-am: + +install-exec-am: + +install-html: install-html-recursive + +install-html-am: + +install-info: install-info-recursive + +install-info-am: + +install-man: + +install-pdf: install-pdf-recursive + +install-pdf-am: + +install-ps: install-ps-recursive + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-recursive + -rm -f $(am__CONFIG_DISTCLEAN_FILES) + -rm -rf $(top_srcdir)/autom4te.cache + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-recursive + +mostlyclean-am: mostlyclean-generic mostlyclean-libtool + +pdf: pdf-recursive + +pdf-am: + +ps: ps-recursive + +ps-am: + +uninstall-am: uninstall-moduleDATA uninstall-nodist_versincludeHEADERS \ + uninstall-pkgconfigDATA uninstall-txtDATA \ + uninstall-versincludeHEADERS + +.MAKE: $(am__recursive_targets) install-am install-strip + +.PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am all-local \ + am--refresh check check-am clean clean-cscope clean-generic \ + clean-libtool cscope cscopelist-am ctags ctags-am dist \ + dist-all dist-bzip2 dist-gzip dist-lzip dist-shar dist-tarZ \ + dist-xz dist-zip dist-zstd distcheck distclean \ + distclean-generic distclean-hdr distclean-libtool \ + distclean-tags distcleancheck distdir distuninstallcheck dvi \ + dvi-am html html-am info info-am install install-am \ + install-data install-data-am install-dvi install-dvi-am \ + install-exec install-exec-am install-html install-html-am \ + install-info install-info-am install-man install-moduleDATA \ + install-nodist_versincludeHEADERS install-pdf install-pdf-am \ + install-pkgconfigDATA install-ps install-ps-am install-strip \ + install-txtDATA install-versincludeHEADERS installcheck \ + installcheck-am installdirs installdirs-am maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-generic \ + mostlyclean-libtool pdf pdf-am ps ps-am tags tags-am uninstall \ + uninstall-am uninstall-moduleDATA \ + uninstall-nodist_versincludeHEADERS uninstall-pkgconfigDATA \ + uninstall-txtDATA uninstall-versincludeHEADERS + +.PRECIOUS: Makefile + + +@STARPU_DEVEL_TRUE@all-local: +@STARPU_DEVEL_TRUE@ @if $(GREP) -r sys/time.h $$( find $(srcdir)/examples $(srcdir)/tests $(srcdir)/src $(srcdir)/mpi/src $(srcdir)/include -name \*.[ch] -a \! -name starpu_util.h -a \! -name timer.h -a \! -name loader.c ) ; \ +@STARPU_DEVEL_TRUE@ then \ +@STARPU_DEVEL_TRUE@ echo "Please do not include sys/time, it is not available on Windows, include starpu_util.h and use starpu_timing_now() instead" ; \ +@STARPU_DEVEL_TRUE@ false ; \ +@STARPU_DEVEL_TRUE@ fi +@STARPU_DEVEL_TRUE@ @if $(GREP) -re '\' $$( find $(srcdir)/src $(srcdir)/mpi/src $(srcdir)/include -name \*.[ch] -a \! -name starpu_util.h -a \! -name utils.c -a \! -name simgrid.h) ; \ +@STARPU_DEVEL_TRUE@ then \ +@STARPU_DEVEL_TRUE@ echo "Please do not use getenv, use starpu_getenv instead, which catches unsafe uses"; \ +@STARPU_DEVEL_TRUE@ false ; \ +@STARPU_DEVEL_TRUE@ fi +# we count the number of files which include unistd.h +# we count the number of files which properly include unistd.h i.e by first detecting if it's available +# and then we check both numbers are the same ...a +@STARPU_DEVEL_TRUE@ @UNISTD_ALL_LINES=$(shell $(GREP) -B1 -rs "^#include " $(srcdir)/src/ $(srcdir)/include/ $(srcdir)/mpi/src $(srcdir)/mpi/include |$(GREP) -v dolib|$(GREP) -v -e "--" | tr '\012' '@' | $(SED) 's/unistd.h>@/unistd.h>\n/g' | wc -l | tr -d ' ') ;\ +@STARPU_DEVEL_TRUE@ UNISTD_CORRECT_LINES=$(shell $(GREP) -B1 -rs "^#include " $(srcdir)/src/ $(srcdir)/include/ $(srcdir)/mpi/src $(srcdir)/mpi/include |$(GREP) -v dolib|$(GREP) -v -e "--" | tr '\012' '@' | $(SED) 's/unistd.h>@/unistd.h>\n/g' | $(GREP) '#ifdef .*HAVE_UNISTD_H.*:#include ' | wc -l | tr -d ' ') ;\ +@STARPU_DEVEL_TRUE@ if test $$UNISTD_ALL_LINES -ne $$UNISTD_CORRECT_LINES ; \ +@STARPU_DEVEL_TRUE@ then \ +@STARPU_DEVEL_TRUE@ echo "Please do not unconditionally include unistd.h, it is not available on Windows, include config.h and test for HAVE_UNISTD_H" ; \ +@STARPU_DEVEL_TRUE@ false ; \ +@STARPU_DEVEL_TRUE@ fi + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +recheck: + RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i recheck || RET=1 ; \ + done ; \ + exit $$RET + +showcheckfailed: + @RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i showcheckfailed || RET=1 ; \ + done ; \ + exit $$RET + +showfailed: + @RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -s -C $$i showfailed || RET=1 ; \ + done ; \ + exit $$RET + +showcheck: + RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i showcheck || RET=1 ; \ + done ; \ + exit $$RET + +showsuite: + RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i showsuite || RET=1 ; \ + done ; \ + exit $$RET + +ctags-local: + cd $(top_srcdir) ; $(CTAGS) -R -I LIST_TYPE + $(SED) -i $(top_srcdir)/tags -e '/^[^ ]* [^ ]* /d' -e '/^[^ ]*$$/d' + +# Generate a cyclomatic complexity report. Note that examples and tests are +# excluded because they're not particularly relevant, and more importantly +# they all have a function called `main', which clobbers the report. +cyclomatic-complexity.html: + $(PMCCABE) \ + `find \( -name examples -o -name tests -o -path ./tools/dev/experimental \) -prune -o -name \*.c` \ + | sort -nr \ + | $(AWK) -f ${top_srcdir}/build-aux/pmccabe2html \ + -v lang=html -v name="$(PACKAGE_NAME)" \ + -v vcurl=$(VC_URL) \ + -v url="$(PACKAGE_URL)" \ + -v css=${top_srcdir}/build-aux/pmccabe.css \ + -v cut_dir=${top_srcdir}/ \ + > $@-tmp + mv $@-tmp $@ + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/README.dev b/README.dev new file mode 100644 index 0000000..80f04df --- /dev/null +++ b/README.dev @@ -0,0 +1,228 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +Contents +======== + +- Directory structure +- Developer Warnings +- Naming Conventions +- Coding Style +- Error handling +- Makefile.am +- Writing a new driver + +Directory structure +------------------- + +The directory structure is as follows: +- src : internal source for StarPU +- include : public API +- tests : unitary tests +- examples : examples using StarPU +- doc : documentation for StarPU +- tools : tools for StarPU + +StarPU extensions have their own directory (src/include/tests/examples) structure: + +- mpi : The MPI support +- socl : the StarPU OpenCL-compatible interface +- sc_hypervisor : The Scheduling Context Hypervisor +- starpufft : The FFT support +- eclipse-plugin : The Eclipse Plugin +- starpupy : The StarPU Python Interface +- starpurm : The StarPU Resource Manager + +Some directories contain only build system details: +- build-aux +- m4 +- autom4te.cache + + + +Developer Warnings +------------------ + +They are enabled only if the STARPU_DEVEL environment variable is +defined to a non-empty value, when calling configure. + + + +Tests +----- + +Please do try make check, at least with ./configure --enable-quick-check + +If a test fails, you can run it specifically again with + +make check TESTS=the_test + +You can also re-run only the failing tests with + +make recheck + + + +Naming Conventions +------------------ + +* Prefix names of public objects (types, functions, etc.) with "starpu" + +* Prefix names of internal objects (types, functions, etc.) with "_starpu" + +* Names for qualified types (struct, union, enum) do not end with _t, _s or similar. + Use _t only for typedef types, such as opaque public types, e.g + typedef struct _starpu_data_state* starpu_data_handle_t; + or + typedef uint64_t starpu_tag_t; + +* When a variable can only take a finite set of values, use an enum + type instead of defining macros for each of the values. + + + +Coding Style +------------ + +* Curly braces always go on a new line + + + +Error handling +-------------- +* Use STARPU_ABORT() for catastrophic errors, from which StarPU will never + recover. + + switch (node_kind) + { + case STARPU_CPU_RAM: + do_stg(); + break; + ... + default: + /* We cannot be here */ + STARPU_ABORT(); + } + +* Use STARPU_ASSERT() to run checks that are very likely to succeed, but still + are useful for debugging purposes. It should be OK to disable them with + --enable-fast. + + STARPU_ASSERT(j->terminated != 0) + +* Use STARPU_ASSERT_MSG() to run checks that might not succeed, and notably due + to application programming error. The additional message parameter should + guide the programmer into fixing their error. + + + +Documentation +------------- + +When adding a feature, we want four kinds of documentation: + +* Announcing the feature in ChangeLog. + +* At least one working example in examples/, or at least a working test in + tests/. Ideally enough examples and tests to cover all the various features. + +* A section in the Doxygen documentation, that explains in which case the + feature is useful and how to use it, and points to the abovementioned + example/test. + + It should cover all aspects of the feature, so programmers don't have to look + into the .h file or reference documentation to discover features. It however + does not need to dive into all details, that can be provided in the next + documentation. + +* Doxygen comments along the declarations in the .h file. These should document + each macro, enum, function, function parameter, flag, etc. And refer to the + abovementioned section so that somebody who finds some function/macro/etc. can + easily know what that is all about. + + + +Makefile.am +----------- + +Dependency libraries are appended to LIBS. +Only real LDFLAGS such as -no-undefined go to LDFLAGS. + +If a program foo needs more libraries, it can put then in foo_LDADD. + +(No, AM_LDADD does not exist) + +All install rules must use $(DESTDIR) so that + +./configure --prefix=/usr && make && make install DESTDIR=/tmp/foobar + +can properly work, as it is used by distributions. That can easily checked by +*not* running it as root. + + + +Writing a new driver +-------------------- + +Writing a new driver is essentially: + +- Creating an src/drivers/yourdriver/ and adding it to src/Makefile.am + + You can pick up src/drivers/cuda/driver_cuda0.c as an example of very basic driver which + should be relatively easy to get working. Once you have it working you can + try to get inspiration from src/drivers/cuda/driver_cuda1.c to implement + asynchronous data and kernel execution. + +- Adding fields in struct starpu_conf and struct starpu_codelet. + +- Adding cases in src/core/task.c, look for _CUDA for an example. + +- Adding initialization calls in src/core/topology.c, look for _CUDA for an example. + +- Adding cases in src/core/worker.c, look for _CUDA for an example. + +- Adding the case in src/datawizard/reduction.c, look for _CUDA for an example. + +- There are a few "Driver porters" notes in the code. + +- TODO: task & bus performance model + + For now the simplest is not to implement performance models. We'll rework the + support to make it very generic. + +- Other places can be extended to add features: asynchronous data transfers, + energy measurement, multiformat, memory mapping + + + +Adding a new FXT state +---------------------- + +This consists in: + +- Adding a code number in src/common/fxt.h + +- Adding the callable runtime macro in src/common/fxt.h + +- Calling these macros in the wanted place in the runtime + +- Adding a paje state in states_list src/debug/traces/starpu_fxt.c and in + src/debug/traces/starpu_paje.c + +- Adding the management of the code in _starpu_fxt_parse_new_file, usually + calling a function that does the actual paje state addition (a push/pop pair + or two state sets) + +A simple example can be found in 28740e7a91a2 ("Add a Parallel sync state"). diff --git a/README.md b/README.md new file mode 100644 index 0000000..84ed135 --- /dev/null +++ b/README.md @@ -0,0 +1,189 @@ + + +# StarPU: A Unified Runtime System for Heterogeneous Multicore Architectures + +## What is StarPU? + +StarPU is a runtime system that offers support for heterogeneous multicore +machines. While many efforts are devoted to design efficient computation kernels +for those architectures (e.g. to implement BLAS kernels on GPUs), +StarPU not only takes care of offloading such kernels (and +implementing data coherency across the machine), but it also makes +sure the kernels are executed as efficiently as possible. + +## What StarPU is not + +StarPU is not a new language, and it does not extend existing languages either. +StarPU does not help to write computation kernels. + +## (How) Could StarPU help me? + +While StarPU will not make it easier to write computation kernels, it does +simplify their actual offloading as StarPU handle most low level aspects +transparently. + +Obviously, it is crucial to have efficient kernels, but it must be noted that +the way those kernels are mapped and scheduled onto the computational resources +also affect the overall performance to a great extent. + +StarPU is especially helpful when considering multiple heterogeneous processing +resources: statically mapping and synchronizing tasks in such a heterogeneous +environment is already very difficult, making it in a portable way is virtually +impossible. On the other hand, the scheduling capabilities of StarPU makes it +possible to easily exploit all processors at the same time while taking +advantage of their specificities in a portable fashion. + +## Requirements + +* `make` +* `gcc` (version >= 4.1) +* if `CUDA` support is enabled + * `CUDA` (version >= 2.2) + * `CUBLAS` (version >= 2.2) +* if `OpenCL` support is enabled + * `AMD` SDK >= 2.3 if `AMD` driver is used + * `CUDA` >= 3.2 if `NVIDIA` driver is used +* extra requirements for the `git` version (we usually use the Debian testing versions) + * `autoconf` (version >= 2.60) + * `automake` + * `makeinfo` + * `libtool` (version >= 2) + +Remark: It is strongly recommended that you also install the hwloc library + before installing StarPU. This permits StarPU to actually map the processing + units according to the machine topology. For more details on hwloc, see + http://www.open-mpi.org/projects/hwloc/ . + +## Getting StarPU + +StarPU is available on https://gitlab.inria.fr/starpu/starpu + +The GIT repository access can be checked out with the following command. + +```shell +$ git clone https://gitlab.inria.fr/starpu/starpu.git +``` + +## Building and Installing + +### For git version only + +Please skip this step if you are building from a tarball. + +```shell +$ ./autogen.sh +``` + +### For all versions + +```shell +$ mkdir build && cd build +$ ../configure +$ make +$ make install +``` + +### Windows build + +StarPU can be built using MinGW or Cygwin. To avoid the cygwin dependency, +we provide MinGW-built binaries. The build process produces `libstarpu.dll`, +`libstarpu.def`, and `libstarpu.lib`, which should be enough to use it from e.g. +Microsoft Visual Studio. + +Update the video drivers to the latest stable release available for your +hardware. Old ATI drivers (< 2.3) contain bugs that cause OpenCL support in +StarPU to hang or exhibit incorrect behaviour. + +For details on the Windows build process, see the [INSTALL](https://gitlab.inria.fr/starpu/starpu/-/blob/master/INSTALL) file. + +## Running StarPU Applications on Microsoft Visual C + +Batch files are provided to run StarPU applications under Microsoft +Visual C. They are installed in `path_to_starpu/bin/msvc`. + +To execute a StarPU application, you first need to set the environment +variable `STARPU_PATH`. + +```shell +c:\....> cd c:\cygwin\home\ci\starpu\ +c:\....> set STARPU_PATH=c:\cygwin\home\ci\starpu\ +c:\....> cd bin\msvc +c:\....> starpu_open.bat starpu_simple.c +``` + +The batch script will run Microsoft Visual C with a basic project file +to run the given application. + +The batch script `starpu_clean.bat` can be used to delete all +compilation generated files. + +The batch script `starpu_exec.bat` can be used to compile and execute a +StarPU application from the command prompt. + +```shell +c:\....> cd c:\cygwin\home\ci\starpu\ +c:\....> set STARPU_PATH=c:\cygwin\home\ci\starpu\ +c:\....> cd bin\msvc +c:\....> starpu_exec.bat ..\..\..\..\examples\basic_examples\hello_world.c + +MSVC StarPU Execution +... +/out:hello_world.exe +... +Hello world (params = {1, 2.00000}) +Callback function got argument 0000042 +c:\....> +``` + +## Documentation + +Doxygen documentation is available in `doc/doxygen`. If the doxygen +tools are available on the machine, pdf and html documentation can be +generated by running + +```shell +$ make -C doc +``` + +The [documentation for the latest StarPU release](https://files.inria.fr/starpu/doc/html/) is available, as well as +the [documentation for the StarPU master branch](https://files.inria.fr/starpu/testing/master/doc/html/). + +## Trying + +Some examples ready to run are installed into `$prefix/lib/starpu/{examples,mpi}` + +## Upgrade + +To upgrade your source code from older version (there were quite a few +renamings), use the `tools/dev/rename.sh` script. + +## Contribute + +Contributions are welcome! Both on the +[main StarPU repository](https://gitlab.inria.fr/starpu/starpu) +and on the +[github StarPU mirror](https://github.com/starpu-runtime/starpu) + +Please see [our contribution page](https://starpu.gitlabpages.inria.fr/involved.html) for details. + +## Contact + +For any questions regarding StarPU, please contact the starpu-devel +mailing-list at starpu-devel@inria.fr or browse +[the StarPU website](https://starpu.gitlabpages.inria.fr/). + diff --git a/STARPU-REVISION b/STARPU-REVISION new file mode 100644 index 0000000..c3343b4 --- /dev/null +++ b/STARPU-REVISION @@ -0,0 +1 @@ +d352844511dfd6c6512e617ce76f6722f0c39d5e (HEAD, tag: starpu-1.4.10, origin/starpu-1.4) diff --git a/STARPU-VERSION b/STARPU-VERSION new file mode 100644 index 0000000..376de0e --- /dev/null +++ b/STARPU-VERSION @@ -0,0 +1,70 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +# Avoid using nvcc when making a coverity build, nvcc produces millions of +# lines of code which we don't want to analyze. Instead, build dumb .o files +# containing empty functions. + +# Versioning (SONAMEs) for StarPU libraries. + +# http://www.gnu.org/software/libtool/manual/html_node/Updating-version-info.html#Updating-version-info +# Here are a set of rules to help you update your library version information: +# Start with version information of ‘0:0:0’ for each libtool library. +# Update the version information only immediately before a public +# release of your software. More frequent updates are unnecessary, and +# only guarantee that the current interface number gets larger faster. +# - If the library source code has changed at all since the last +# update, then increment revision (‘c:r:a’ becomes ‘c:r+1:a’). +# - If any interfaces have been added, removed, or changed since the +# last update, increment current, and set revision to 0. +# - If any interfaces have been added since the last public release, +# then increment age. +# - If any interfaces have been removed or changed since the last +# public release, then set age to 0. change + +# This is the tarball version, major.minor +STARPU_EFFECTIVE_VERSION=1.4 + +# Note for StarPU 1.1: we have changed ABI +# Note for StarPU 1.2: reset everything to 0:0:0 + +# Libtool interface versioning (info "(libtool) Versioning"). +LIBSTARPU_INTERFACE_CURRENT=9 # increment upon ABI change +LIBSTARPU_INTERFACE_REVISION=0 # increment upon implementation change +LIBSTARPU_INTERFACE_AGE=0 # set to CURRENT - PREVIOUS interface + +LIBSTARPUFFT_INTERFACE_CURRENT=1 # increment upon ABI change +LIBSTARPUFFT_INTERFACE_REVISION=0 # increment upon implementation change +LIBSTARPUFFT_INTERFACE_AGE=0 # set to CURRENT - PREVIOUS interface + +LIBSTARPUMPI_INTERFACE_CURRENT=3 # increment upon ABI change +LIBSTARPUMPI_INTERFACE_REVISION=3 # increment upon implementation change +LIBSTARPUMPI_INTERFACE_AGE=0 # set to CURRENT - PREVIOUS interface + +LIBSOCL_INTERFACE_CURRENT=1 # increment upon ABI change +LIBSOCL_INTERFACE_REVISION=2 # increment upon implementation change +LIBSOCL_INTERFACE_AGE=0 # set to CURRENT - PREVIOUS interface + +LIBSTARPURM_INTERFACE_CURRENT=1 # increment upon ABI change +LIBSTARPURM_INTERFACE_REVISION=0 # increment upon implementation change +LIBSTARPURM_INTERFACE_AGE=0 # set to CURRENT - PREVIOUS interface + +LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT=1 # increment upon ABI change +LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION=0 # increment upon implementation change +LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE=0 # set to CURRENT - PREVIOUS interface + +LIBSTARPUJULIA_INTERFACE_CURRENT=1 # increment upon ABI change +LIBSTARPUJULIA_INTERFACE_REVISION=0 # increment upon implementation change +LIBSTARPUJULIA_INTERFACE_AGE=0 # set to CURRENT - PREVIOUS interface diff --git a/TODO b/TODO new file mode 100644 index 0000000..56949ff --- /dev/null +++ b/TODO @@ -0,0 +1,24 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +Moving access modes for data handles from struct starpu_task to struct starpu_codelet +===================================================================================== + +TODO list + +- Make struct starpu_data_descr private (or not, as it can still be used in tests and examples) + +- When cost_model is provided, but not cost_function, need to rebuild a struct starpu_data_descr diff --git a/aclocal.m4 b/aclocal.m4 new file mode 100644 index 0000000..6825fb9 --- /dev/null +++ b/aclocal.m4 @@ -0,0 +1,1220 @@ +# generated automatically by aclocal 1.16.5 -*- Autoconf -*- + +# Copyright (C) 1996-2021 Free Software Foundation, Inc. + +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +m4_ifndef([AC_CONFIG_MACRO_DIRS], [m4_defun([_AM_CONFIG_MACRO_DIRS], [])m4_defun([AC_CONFIG_MACRO_DIRS], [_AM_CONFIG_MACRO_DIRS($@)])]) +m4_ifndef([AC_AUTOCONF_VERSION], + [m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl +m4_if(m4_defn([AC_AUTOCONF_VERSION]), [2.71],, +[m4_warning([this file was generated for autoconf 2.71. +You have another version of autoconf. It may work, but is not guaranteed to. +If you have problems, you may need to regenerate the build system entirely. +To do so, use the procedure documented by the package, typically 'autoreconf'.])]) + +# Copyright (C) 2002-2021 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_AUTOMAKE_VERSION(VERSION) +# ---------------------------- +# Automake X.Y traces this macro to ensure aclocal.m4 has been +# generated from the m4 files accompanying Automake X.Y. +# (This private macro should not be called outside this file.) +AC_DEFUN([AM_AUTOMAKE_VERSION], +[am__api_version='1.16' +dnl Some users find AM_AUTOMAKE_VERSION and mistake it for a way to +dnl require some minimum version. Point them to the right macro. +m4_if([$1], [1.16.5], [], + [AC_FATAL([Do not call $0, use AM_INIT_AUTOMAKE([$1]).])])dnl +]) + +# _AM_AUTOCONF_VERSION(VERSION) +# ----------------------------- +# aclocal traces this macro to find the Autoconf version. +# This is a private macro too. Using m4_define simplifies +# the logic in aclocal, which can simply ignore this definition. +m4_define([_AM_AUTOCONF_VERSION], []) + +# AM_SET_CURRENT_AUTOMAKE_VERSION +# ------------------------------- +# Call AM_AUTOMAKE_VERSION and AM_AUTOMAKE_VERSION so they can be traced. +# This function is AC_REQUIREd by AM_INIT_AUTOMAKE. +AC_DEFUN([AM_SET_CURRENT_AUTOMAKE_VERSION], +[AM_AUTOMAKE_VERSION([1.16.5])dnl +m4_ifndef([AC_AUTOCONF_VERSION], + [m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl +_AM_AUTOCONF_VERSION(m4_defn([AC_AUTOCONF_VERSION]))]) + +# Copyright (C) 2011-2021 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_PROG_AR([ACT-IF-FAIL]) +# ------------------------- +# Try to determine the archiver interface, and trigger the ar-lib wrapper +# if it is needed. If the detection of archiver interface fails, run +# ACT-IF-FAIL (default is to abort configure with a proper error message). +AC_DEFUN([AM_PROG_AR], +[AC_BEFORE([$0], [LT_INIT])dnl +AC_BEFORE([$0], [AC_PROG_LIBTOOL])dnl +AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl +AC_REQUIRE_AUX_FILE([ar-lib])dnl +AC_CHECK_TOOLS([AR], [ar lib "link -lib"], [false]) +: ${AR=ar} + +AC_CACHE_CHECK([the archiver ($AR) interface], [am_cv_ar_interface], + [AC_LANG_PUSH([C]) + am_cv_ar_interface=ar + AC_COMPILE_IFELSE([AC_LANG_SOURCE([[int some_variable = 0;]])], + [am_ar_try='$AR cru libconftest.a conftest.$ac_objext >&AS_MESSAGE_LOG_FD' + AC_TRY_EVAL([am_ar_try]) + if test "$ac_status" -eq 0; then + am_cv_ar_interface=ar + else + am_ar_try='$AR -NOLOGO -OUT:conftest.lib conftest.$ac_objext >&AS_MESSAGE_LOG_FD' + AC_TRY_EVAL([am_ar_try]) + if test "$ac_status" -eq 0; then + am_cv_ar_interface=lib + else + am_cv_ar_interface=unknown + fi + fi + rm -f conftest.lib libconftest.a + ]) + AC_LANG_POP([C])]) + +case $am_cv_ar_interface in +ar) + ;; +lib) + # Microsoft lib, so override with the ar-lib wrapper script. + # FIXME: It is wrong to rewrite AR. + # But if we don't then we get into trouble of one sort or another. + # A longer-term fix would be to have automake use am__AR in this case, + # and then we could set am__AR="$am_aux_dir/ar-lib \$(AR)" or something + # similar. + AR="$am_aux_dir/ar-lib $AR" + ;; +unknown) + m4_default([$1], + [AC_MSG_ERROR([could not determine $AR interface])]) + ;; +esac +AC_SUBST([AR])dnl +]) + +# AM_AUX_DIR_EXPAND -*- Autoconf -*- + +# Copyright (C) 2001-2021 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# For projects using AC_CONFIG_AUX_DIR([foo]), Autoconf sets +# $ac_aux_dir to '$srcdir/foo'. In other projects, it is set to +# '$srcdir', '$srcdir/..', or '$srcdir/../..'. +# +# Of course, Automake must honor this variable whenever it calls a +# tool from the auxiliary directory. The problem is that $srcdir (and +# therefore $ac_aux_dir as well) can be either absolute or relative, +# depending on how configure is run. This is pretty annoying, since +# it makes $ac_aux_dir quite unusable in subdirectories: in the top +# source directory, any form will work fine, but in subdirectories a +# relative path needs to be adjusted first. +# +# $ac_aux_dir/missing +# fails when called from a subdirectory if $ac_aux_dir is relative +# $top_srcdir/$ac_aux_dir/missing +# fails if $ac_aux_dir is absolute, +# fails when called from a subdirectory in a VPATH build with +# a relative $ac_aux_dir +# +# The reason of the latter failure is that $top_srcdir and $ac_aux_dir +# are both prefixed by $srcdir. In an in-source build this is usually +# harmless because $srcdir is '.', but things will broke when you +# start a VPATH build or use an absolute $srcdir. +# +# So we could use something similar to $top_srcdir/$ac_aux_dir/missing, +# iff we strip the leading $srcdir from $ac_aux_dir. That would be: +# am_aux_dir='\$(top_srcdir)/'`expr "$ac_aux_dir" : "$srcdir//*\(.*\)"` +# and then we would define $MISSING as +# MISSING="\${SHELL} $am_aux_dir/missing" +# This will work as long as MISSING is not called from configure, because +# unfortunately $(top_srcdir) has no meaning in configure. +# However there are other variables, like CC, which are often used in +# configure, and could therefore not use this "fixed" $ac_aux_dir. +# +# Another solution, used here, is to always expand $ac_aux_dir to an +# absolute PATH. The drawback is that using absolute paths prevent a +# configured tree to be moved without reconfiguration. + +AC_DEFUN([AM_AUX_DIR_EXPAND], +[AC_REQUIRE([AC_CONFIG_AUX_DIR_DEFAULT])dnl +# Expand $ac_aux_dir to an absolute path. +am_aux_dir=`cd "$ac_aux_dir" && pwd` +]) + +# AM_CONDITIONAL -*- Autoconf -*- + +# Copyright (C) 1997-2021 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_CONDITIONAL(NAME, SHELL-CONDITION) +# ------------------------------------- +# Define a conditional. +AC_DEFUN([AM_CONDITIONAL], +[AC_PREREQ([2.52])dnl + m4_if([$1], [TRUE], [AC_FATAL([$0: invalid condition: $1])], + [$1], [FALSE], [AC_FATAL([$0: invalid condition: $1])])dnl +AC_SUBST([$1_TRUE])dnl +AC_SUBST([$1_FALSE])dnl +_AM_SUBST_NOTMAKE([$1_TRUE])dnl +_AM_SUBST_NOTMAKE([$1_FALSE])dnl +m4_define([_AM_COND_VALUE_$1], [$2])dnl +if $2; then + $1_TRUE= + $1_FALSE='#' +else + $1_TRUE='#' + $1_FALSE= +fi +AC_CONFIG_COMMANDS_PRE( +[if test -z "${$1_TRUE}" && test -z "${$1_FALSE}"; then + AC_MSG_ERROR([[conditional "$1" was never defined. +Usually this means the macro was only invoked conditionally.]]) +fi])]) + +# Copyright (C) 1999-2021 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + + +# There are a few dirty hacks below to avoid letting 'AC_PROG_CC' be +# written in clear, in which case automake, when reading aclocal.m4, +# will think it sees a *use*, and therefore will trigger all it's +# C support machinery. Also note that it means that autoscan, seeing +# CC etc. in the Makefile, will ask for an AC_PROG_CC use... + + +# _AM_DEPENDENCIES(NAME) +# ---------------------- +# See how the compiler implements dependency checking. +# NAME is "CC", "CXX", "OBJC", "OBJCXX", "UPC", or "GJC". +# We try a few techniques and use that to set a single cache variable. +# +# We don't AC_REQUIRE the corresponding AC_PROG_CC since the latter was +# modified to invoke _AM_DEPENDENCIES(CC); we would have a circular +# dependency, and given that the user is not expected to run this macro, +# just rely on AC_PROG_CC. +AC_DEFUN([_AM_DEPENDENCIES], +[AC_REQUIRE([AM_SET_DEPDIR])dnl +AC_REQUIRE([AM_OUTPUT_DEPENDENCY_COMMANDS])dnl +AC_REQUIRE([AM_MAKE_INCLUDE])dnl +AC_REQUIRE([AM_DEP_TRACK])dnl + +m4_if([$1], [CC], [depcc="$CC" am_compiler_list=], + [$1], [CXX], [depcc="$CXX" am_compiler_list=], + [$1], [OBJC], [depcc="$OBJC" am_compiler_list='gcc3 gcc'], + [$1], [OBJCXX], [depcc="$OBJCXX" am_compiler_list='gcc3 gcc'], + [$1], [UPC], [depcc="$UPC" am_compiler_list=], + [$1], [GCJ], [depcc="$GCJ" am_compiler_list='gcc3 gcc'], + [depcc="$$1" am_compiler_list=]) + +AC_CACHE_CHECK([dependency style of $depcc], + [am_cv_$1_dependencies_compiler_type], +[if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then + # We make a subdir and do the tests there. Otherwise we can end up + # making bogus files that we don't know about and never remove. For + # instance it was reported that on HP-UX the gcc test will end up + # making a dummy file named 'D' -- because '-MD' means "put the output + # in D". + rm -rf conftest.dir + mkdir conftest.dir + # Copy depcomp to subdir because otherwise we won't find it if we're + # using a relative directory. + cp "$am_depcomp" conftest.dir + cd conftest.dir + # We will build objects and dependencies in a subdirectory because + # it helps to detect inapplicable dependency modes. For instance + # both Tru64's cc and ICC support -MD to output dependencies as a + # side effect of compilation, but ICC will put the dependencies in + # the current directory while Tru64 will put them in the object + # directory. + mkdir sub + + am_cv_$1_dependencies_compiler_type=none + if test "$am_compiler_list" = ""; then + am_compiler_list=`sed -n ['s/^#*\([a-zA-Z0-9]*\))$/\1/p'] < ./depcomp` + fi + am__universal=false + m4_case([$1], [CC], + [case " $depcc " in #( + *\ -arch\ *\ -arch\ *) am__universal=true ;; + esac], + [CXX], + [case " $depcc " in #( + *\ -arch\ *\ -arch\ *) am__universal=true ;; + esac]) + + for depmode in $am_compiler_list; do + # Setup a source with many dependencies, because some compilers + # like to wrap large dependency lists on column 80 (with \), and + # we should not choose a depcomp mode which is confused by this. + # + # We need to recreate these files for each test, as the compiler may + # overwrite some of them when testing with obscure command lines. + # This happens at least with the AIX C compiler. + : > sub/conftest.c + for i in 1 2 3 4 5 6; do + echo '#include "conftst'$i'.h"' >> sub/conftest.c + # Using ": > sub/conftst$i.h" creates only sub/conftst1.h with + # Solaris 10 /bin/sh. + echo '/* dummy */' > sub/conftst$i.h + done + echo "${am__include} ${am__quote}sub/conftest.Po${am__quote}" > confmf + + # We check with '-c' and '-o' for the sake of the "dashmstdout" + # mode. It turns out that the SunPro C++ compiler does not properly + # handle '-M -o', and we need to detect this. Also, some Intel + # versions had trouble with output in subdirs. + am__obj=sub/conftest.${OBJEXT-o} + am__minus_obj="-o $am__obj" + case $depmode in + gcc) + # This depmode causes a compiler race in universal mode. + test "$am__universal" = false || continue + ;; + nosideeffect) + # After this tag, mechanisms are not by side-effect, so they'll + # only be used when explicitly requested. + if test "x$enable_dependency_tracking" = xyes; then + continue + else + break + fi + ;; + msvc7 | msvc7msys | msvisualcpp | msvcmsys) + # This compiler won't grok '-c -o', but also, the minuso test has + # not run yet. These depmodes are late enough in the game, and + # so weak that their functioning should not be impacted. + am__obj=conftest.${OBJEXT-o} + am__minus_obj= + ;; + none) break ;; + esac + if depmode=$depmode \ + source=sub/conftest.c object=$am__obj \ + depfile=sub/conftest.Po tmpdepfile=sub/conftest.TPo \ + $SHELL ./depcomp $depcc -c $am__minus_obj sub/conftest.c \ + >/dev/null 2>conftest.err && + grep sub/conftst1.h sub/conftest.Po > /dev/null 2>&1 && + grep sub/conftst6.h sub/conftest.Po > /dev/null 2>&1 && + grep $am__obj sub/conftest.Po > /dev/null 2>&1 && + ${MAKE-make} -s -f confmf > /dev/null 2>&1; then + # icc doesn't choke on unknown options, it will just issue warnings + # or remarks (even with -Werror). So we grep stderr for any message + # that says an option was ignored or not supported. + # When given -MP, icc 7.0 and 7.1 complain thusly: + # icc: Command line warning: ignoring option '-M'; no argument required + # The diagnosis changed in icc 8.0: + # icc: Command line remark: option '-MP' not supported + if (grep 'ignoring option' conftest.err || + grep 'not supported' conftest.err) >/dev/null 2>&1; then :; else + am_cv_$1_dependencies_compiler_type=$depmode + break + fi + fi + done + + cd .. + rm -rf conftest.dir +else + am_cv_$1_dependencies_compiler_type=none +fi +]) +AC_SUBST([$1DEPMODE], [depmode=$am_cv_$1_dependencies_compiler_type]) +AM_CONDITIONAL([am__fastdep$1], [ + test "x$enable_dependency_tracking" != xno \ + && test "$am_cv_$1_dependencies_compiler_type" = gcc3]) +]) + + +# AM_SET_DEPDIR +# ------------- +# Choose a directory name for dependency files. +# This macro is AC_REQUIREd in _AM_DEPENDENCIES. +AC_DEFUN([AM_SET_DEPDIR], +[AC_REQUIRE([AM_SET_LEADING_DOT])dnl +AC_SUBST([DEPDIR], ["${am__leading_dot}deps"])dnl +]) + + +# AM_DEP_TRACK +# ------------ +AC_DEFUN([AM_DEP_TRACK], +[AC_ARG_ENABLE([dependency-tracking], [dnl +AS_HELP_STRING( + [--enable-dependency-tracking], + [do not reject slow dependency extractors]) +AS_HELP_STRING( + [--disable-dependency-tracking], + [speeds up one-time build])]) +if test "x$enable_dependency_tracking" != xno; then + am_depcomp="$ac_aux_dir/depcomp" + AMDEPBACKSLASH='\' + am__nodep='_no' +fi +AM_CONDITIONAL([AMDEP], [test "x$enable_dependency_tracking" != xno]) +AC_SUBST([AMDEPBACKSLASH])dnl +_AM_SUBST_NOTMAKE([AMDEPBACKSLASH])dnl +AC_SUBST([am__nodep])dnl +_AM_SUBST_NOTMAKE([am__nodep])dnl +]) + +# Generate code to set up dependency tracking. -*- Autoconf -*- + +# Copyright (C) 1999-2021 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# _AM_OUTPUT_DEPENDENCY_COMMANDS +# ------------------------------ +AC_DEFUN([_AM_OUTPUT_DEPENDENCY_COMMANDS], +[{ + # Older Autoconf quotes --file arguments for eval, but not when files + # are listed without --file. Let's play safe and only enable the eval + # if we detect the quoting. + # TODO: see whether this extra hack can be removed once we start + # requiring Autoconf 2.70 or later. + AS_CASE([$CONFIG_FILES], + [*\'*], [eval set x "$CONFIG_FILES"], + [*], [set x $CONFIG_FILES]) + shift + # Used to flag and report bootstrapping failures. + am_rc=0 + for am_mf + do + # Strip MF so we end up with the name of the file. + am_mf=`AS_ECHO(["$am_mf"]) | sed -e 's/:.*$//'` + # Check whether this is an Automake generated Makefile which includes + # dependency-tracking related rules and includes. + # Grep'ing the whole file directly is not great: AIX grep has a line + # limit of 2048, but all sed's we know have understand at least 4000. + sed -n 's,^am--depfiles:.*,X,p' "$am_mf" | grep X >/dev/null 2>&1 \ + || continue + am_dirpart=`AS_DIRNAME(["$am_mf"])` + am_filepart=`AS_BASENAME(["$am_mf"])` + AM_RUN_LOG([cd "$am_dirpart" \ + && sed -e '/# am--include-marker/d' "$am_filepart" \ + | $MAKE -f - am--depfiles]) || am_rc=$? + done + if test $am_rc -ne 0; then + AC_MSG_FAILURE([Something went wrong bootstrapping makefile fragments + for automatic dependency tracking. If GNU make was not used, consider + re-running the configure script with MAKE="gmake" (or whatever is + necessary). You can also try re-running configure with the + '--disable-dependency-tracking' option to at least be able to build + the package (albeit without support for automatic dependency tracking).]) + fi + AS_UNSET([am_dirpart]) + AS_UNSET([am_filepart]) + AS_UNSET([am_mf]) + AS_UNSET([am_rc]) + rm -f conftest-deps.mk +} +])# _AM_OUTPUT_DEPENDENCY_COMMANDS + + +# AM_OUTPUT_DEPENDENCY_COMMANDS +# ----------------------------- +# This macro should only be invoked once -- use via AC_REQUIRE. +# +# This code is only required when automatic dependency tracking is enabled. +# This creates each '.Po' and '.Plo' makefile fragment that we'll need in +# order to bootstrap the dependency handling code. +AC_DEFUN([AM_OUTPUT_DEPENDENCY_COMMANDS], +[AC_CONFIG_COMMANDS([depfiles], + [test x"$AMDEP_TRUE" != x"" || _AM_OUTPUT_DEPENDENCY_COMMANDS], + [AMDEP_TRUE="$AMDEP_TRUE" MAKE="${MAKE-make}"])]) + +# Do all the work for Automake. -*- Autoconf -*- + +# Copyright (C) 1996-2021 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This macro actually does too much. Some checks are only needed if +# your package does certain things. But this isn't really a big deal. + +dnl Redefine AC_PROG_CC to automatically invoke _AM_PROG_CC_C_O. +m4_define([AC_PROG_CC], +m4_defn([AC_PROG_CC]) +[_AM_PROG_CC_C_O +]) + +# AM_INIT_AUTOMAKE(PACKAGE, VERSION, [NO-DEFINE]) +# AM_INIT_AUTOMAKE([OPTIONS]) +# ----------------------------------------------- +# The call with PACKAGE and VERSION arguments is the old style +# call (pre autoconf-2.50), which is being phased out. PACKAGE +# and VERSION should now be passed to AC_INIT and removed from +# the call to AM_INIT_AUTOMAKE. +# We support both call styles for the transition. After +# the next Automake release, Autoconf can make the AC_INIT +# arguments mandatory, and then we can depend on a new Autoconf +# release and drop the old call support. +AC_DEFUN([AM_INIT_AUTOMAKE], +[AC_PREREQ([2.65])dnl +m4_ifdef([_$0_ALREADY_INIT], + [m4_fatal([$0 expanded multiple times +]m4_defn([_$0_ALREADY_INIT]))], + [m4_define([_$0_ALREADY_INIT], m4_expansion_stack)])dnl +dnl Autoconf wants to disallow AM_ names. We explicitly allow +dnl the ones we care about. +m4_pattern_allow([^AM_[A-Z]+FLAGS$])dnl +AC_REQUIRE([AM_SET_CURRENT_AUTOMAKE_VERSION])dnl +AC_REQUIRE([AC_PROG_INSTALL])dnl +if test "`cd $srcdir && pwd`" != "`pwd`"; then + # Use -I$(srcdir) only when $(srcdir) != ., so that make's output + # is not polluted with repeated "-I." + AC_SUBST([am__isrc], [' -I$(srcdir)'])_AM_SUBST_NOTMAKE([am__isrc])dnl + # test to see if srcdir already configured + if test -f $srcdir/config.status; then + AC_MSG_ERROR([source directory already configured; run "make distclean" there first]) + fi +fi + +# test whether we have cygpath +if test -z "$CYGPATH_W"; then + if (cygpath --version) >/dev/null 2>/dev/null; then + CYGPATH_W='cygpath -w' + else + CYGPATH_W=echo + fi +fi +AC_SUBST([CYGPATH_W]) + +# Define the identity of the package. +dnl Distinguish between old-style and new-style calls. +m4_ifval([$2], +[AC_DIAGNOSE([obsolete], + [$0: two- and three-arguments forms are deprecated.]) +m4_ifval([$3], [_AM_SET_OPTION([no-define])])dnl + AC_SUBST([PACKAGE], [$1])dnl + AC_SUBST([VERSION], [$2])], +[_AM_SET_OPTIONS([$1])dnl +dnl Diagnose old-style AC_INIT with new-style AM_AUTOMAKE_INIT. +m4_if( + m4_ifset([AC_PACKAGE_NAME], [ok]):m4_ifset([AC_PACKAGE_VERSION], [ok]), + [ok:ok],, + [m4_fatal([AC_INIT should be called with package and version arguments])])dnl + AC_SUBST([PACKAGE], ['AC_PACKAGE_TARNAME'])dnl + AC_SUBST([VERSION], ['AC_PACKAGE_VERSION'])])dnl + +_AM_IF_OPTION([no-define],, +[AC_DEFINE_UNQUOTED([PACKAGE], ["$PACKAGE"], [Name of package]) + AC_DEFINE_UNQUOTED([VERSION], ["$VERSION"], [Version number of package])])dnl + +# Some tools Automake needs. +AC_REQUIRE([AM_SANITY_CHECK])dnl +AC_REQUIRE([AC_ARG_PROGRAM])dnl +AM_MISSING_PROG([ACLOCAL], [aclocal-${am__api_version}]) +AM_MISSING_PROG([AUTOCONF], [autoconf]) +AM_MISSING_PROG([AUTOMAKE], [automake-${am__api_version}]) +AM_MISSING_PROG([AUTOHEADER], [autoheader]) +AM_MISSING_PROG([MAKEINFO], [makeinfo]) +AC_REQUIRE([AM_PROG_INSTALL_SH])dnl +AC_REQUIRE([AM_PROG_INSTALL_STRIP])dnl +AC_REQUIRE([AC_PROG_MKDIR_P])dnl +# For better backward compatibility. To be removed once Automake 1.9.x +# dies out for good. For more background, see: +# +# +AC_SUBST([mkdir_p], ['$(MKDIR_P)']) +# We need awk for the "check" target (and possibly the TAP driver). The +# system "awk" is bad on some platforms. +AC_REQUIRE([AC_PROG_AWK])dnl +AC_REQUIRE([AC_PROG_MAKE_SET])dnl +AC_REQUIRE([AM_SET_LEADING_DOT])dnl +_AM_IF_OPTION([tar-ustar], [_AM_PROG_TAR([ustar])], + [_AM_IF_OPTION([tar-pax], [_AM_PROG_TAR([pax])], + [_AM_PROG_TAR([v7])])]) +_AM_IF_OPTION([no-dependencies],, +[AC_PROVIDE_IFELSE([AC_PROG_CC], + [_AM_DEPENDENCIES([CC])], + [m4_define([AC_PROG_CC], + m4_defn([AC_PROG_CC])[_AM_DEPENDENCIES([CC])])])dnl +AC_PROVIDE_IFELSE([AC_PROG_CXX], + [_AM_DEPENDENCIES([CXX])], + [m4_define([AC_PROG_CXX], + m4_defn([AC_PROG_CXX])[_AM_DEPENDENCIES([CXX])])])dnl +AC_PROVIDE_IFELSE([AC_PROG_OBJC], + [_AM_DEPENDENCIES([OBJC])], + [m4_define([AC_PROG_OBJC], + m4_defn([AC_PROG_OBJC])[_AM_DEPENDENCIES([OBJC])])])dnl +AC_PROVIDE_IFELSE([AC_PROG_OBJCXX], + [_AM_DEPENDENCIES([OBJCXX])], + [m4_define([AC_PROG_OBJCXX], + m4_defn([AC_PROG_OBJCXX])[_AM_DEPENDENCIES([OBJCXX])])])dnl +]) +# Variables for tags utilities; see am/tags.am +if test -z "$CTAGS"; then + CTAGS=ctags +fi +AC_SUBST([CTAGS]) +if test -z "$ETAGS"; then + ETAGS=etags +fi +AC_SUBST([ETAGS]) +if test -z "$CSCOPE"; then + CSCOPE=cscope +fi +AC_SUBST([CSCOPE]) + +AC_REQUIRE([AM_SILENT_RULES])dnl +dnl The testsuite driver may need to know about EXEEXT, so add the +dnl 'am__EXEEXT' conditional if _AM_COMPILER_EXEEXT was seen. This +dnl macro is hooked onto _AC_COMPILER_EXEEXT early, see below. +AC_CONFIG_COMMANDS_PRE(dnl +[m4_provide_if([_AM_COMPILER_EXEEXT], + [AM_CONDITIONAL([am__EXEEXT], [test -n "$EXEEXT"])])])dnl + +# POSIX will say in a future version that running "rm -f" with no argument +# is OK; and we want to be able to make that assumption in our Makefile +# recipes. So use an aggressive probe to check that the usage we want is +# actually supported "in the wild" to an acceptable degree. +# See automake bug#10828. +# To make any issue more visible, cause the running configure to be aborted +# by default if the 'rm' program in use doesn't match our expectations; the +# user can still override this though. +if rm -f && rm -fr && rm -rf; then : OK; else + cat >&2 <<'END' +Oops! + +Your 'rm' program seems unable to run without file operands specified +on the command line, even when the '-f' option is present. This is contrary +to the behaviour of most rm programs out there, and not conforming with +the upcoming POSIX standard: + +Please tell bug-automake@gnu.org about your system, including the value +of your $PATH and any error possibly output before this message. This +can help us improve future automake versions. + +END + if test x"$ACCEPT_INFERIOR_RM_PROGRAM" = x"yes"; then + echo 'Configuration will proceed anyway, since you have set the' >&2 + echo 'ACCEPT_INFERIOR_RM_PROGRAM variable to "yes"' >&2 + echo >&2 + else + cat >&2 <<'END' +Aborting the configuration process, to ensure you take notice of the issue. + +You can download and install GNU coreutils to get an 'rm' implementation +that behaves properly: . + +If you want to complete the configuration process using your problematic +'rm' anyway, export the environment variable ACCEPT_INFERIOR_RM_PROGRAM +to "yes", and re-run configure. + +END + AC_MSG_ERROR([Your 'rm' program is bad, sorry.]) + fi +fi +dnl The trailing newline in this macro's definition is deliberate, for +dnl backward compatibility and to allow trailing 'dnl'-style comments +dnl after the AM_INIT_AUTOMAKE invocation. See automake bug#16841. +]) + +dnl Hook into '_AC_COMPILER_EXEEXT' early to learn its expansion. Do not +dnl add the conditional right here, as _AC_COMPILER_EXEEXT may be further +dnl mangled by Autoconf and run in a shell conditional statement. +m4_define([_AC_COMPILER_EXEEXT], +m4_defn([_AC_COMPILER_EXEEXT])[m4_provide([_AM_COMPILER_EXEEXT])]) + +# When config.status generates a header, we must update the stamp-h file. +# This file resides in the same directory as the config header +# that is generated. The stamp files are numbered to have different names. + +# Autoconf calls _AC_AM_CONFIG_HEADER_HOOK (when defined) in the +# loop where config.status creates the headers, so we can generate +# our stamp files there. +AC_DEFUN([_AC_AM_CONFIG_HEADER_HOOK], +[# Compute $1's index in $config_headers. +_am_arg=$1 +_am_stamp_count=1 +for _am_header in $config_headers :; do + case $_am_header in + $_am_arg | $_am_arg:* ) + break ;; + * ) + _am_stamp_count=`expr $_am_stamp_count + 1` ;; + esac +done +echo "timestamp for $_am_arg" >`AS_DIRNAME(["$_am_arg"])`/stamp-h[]$_am_stamp_count]) + +# Copyright (C) 2001-2021 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_PROG_INSTALL_SH +# ------------------ +# Define $install_sh. +AC_DEFUN([AM_PROG_INSTALL_SH], +[AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl +if test x"${install_sh+set}" != xset; then + case $am_aux_dir in + *\ * | *\ *) + install_sh="\${SHELL} '$am_aux_dir/install-sh'" ;; + *) + install_sh="\${SHELL} $am_aux_dir/install-sh" + esac +fi +AC_SUBST([install_sh])]) + +# Copyright (C) 2003-2021 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# Check whether the underlying file-system supports filenames +# with a leading dot. For instance MS-DOS doesn't. +AC_DEFUN([AM_SET_LEADING_DOT], +[rm -rf .tst 2>/dev/null +mkdir .tst 2>/dev/null +if test -d .tst; then + am__leading_dot=. +else + am__leading_dot=_ +fi +rmdir .tst 2>/dev/null +AC_SUBST([am__leading_dot])]) + +# Check to see how 'make' treats includes. -*- Autoconf -*- + +# Copyright (C) 2001-2021 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_MAKE_INCLUDE() +# ----------------- +# Check whether make has an 'include' directive that can support all +# the idioms we need for our automatic dependency tracking code. +AC_DEFUN([AM_MAKE_INCLUDE], +[AC_MSG_CHECKING([whether ${MAKE-make} supports the include directive]) +cat > confinc.mk << 'END' +am__doit: + @echo this is the am__doit target >confinc.out +.PHONY: am__doit +END +am__include="#" +am__quote= +# BSD make does it like this. +echo '.include "confinc.mk" # ignored' > confmf.BSD +# Other make implementations (GNU, Solaris 10, AIX) do it like this. +echo 'include confinc.mk # ignored' > confmf.GNU +_am_result=no +for s in GNU BSD; do + AM_RUN_LOG([${MAKE-make} -f confmf.$s && cat confinc.out]) + AS_CASE([$?:`cat confinc.out 2>/dev/null`], + ['0:this is the am__doit target'], + [AS_CASE([$s], + [BSD], [am__include='.include' am__quote='"'], + [am__include='include' am__quote=''])]) + if test "$am__include" != "#"; then + _am_result="yes ($s style)" + break + fi +done +rm -f confinc.* confmf.* +AC_MSG_RESULT([${_am_result}]) +AC_SUBST([am__include])]) +AC_SUBST([am__quote])]) + +# Fake the existence of programs that GNU maintainers use. -*- Autoconf -*- + +# Copyright (C) 1997-2021 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_MISSING_PROG(NAME, PROGRAM) +# ------------------------------ +AC_DEFUN([AM_MISSING_PROG], +[AC_REQUIRE([AM_MISSING_HAS_RUN]) +$1=${$1-"${am_missing_run}$2"} +AC_SUBST($1)]) + +# AM_MISSING_HAS_RUN +# ------------------ +# Define MISSING if not defined so far and test if it is modern enough. +# If it is, set am_missing_run to use it, otherwise, to nothing. +AC_DEFUN([AM_MISSING_HAS_RUN], +[AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl +AC_REQUIRE_AUX_FILE([missing])dnl +if test x"${MISSING+set}" != xset; then + MISSING="\${SHELL} '$am_aux_dir/missing'" +fi +# Use eval to expand $SHELL +if eval "$MISSING --is-lightweight"; then + am_missing_run="$MISSING " +else + am_missing_run= + AC_MSG_WARN(['missing' script is too old or missing]) +fi +]) + +# Helper functions for option handling. -*- Autoconf -*- + +# Copyright (C) 2001-2021 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# _AM_MANGLE_OPTION(NAME) +# ----------------------- +AC_DEFUN([_AM_MANGLE_OPTION], +[[_AM_OPTION_]m4_bpatsubst($1, [[^a-zA-Z0-9_]], [_])]) + +# _AM_SET_OPTION(NAME) +# -------------------- +# Set option NAME. Presently that only means defining a flag for this option. +AC_DEFUN([_AM_SET_OPTION], +[m4_define(_AM_MANGLE_OPTION([$1]), [1])]) + +# _AM_SET_OPTIONS(OPTIONS) +# ------------------------ +# OPTIONS is a space-separated list of Automake options. +AC_DEFUN([_AM_SET_OPTIONS], +[m4_foreach_w([_AM_Option], [$1], [_AM_SET_OPTION(_AM_Option)])]) + +# _AM_IF_OPTION(OPTION, IF-SET, [IF-NOT-SET]) +# ------------------------------------------- +# Execute IF-SET if OPTION is set, IF-NOT-SET otherwise. +AC_DEFUN([_AM_IF_OPTION], +[m4_ifset(_AM_MANGLE_OPTION([$1]), [$2], [$3])]) + +# Copyright (C) 1999-2021 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# _AM_PROG_CC_C_O +# --------------- +# Like AC_PROG_CC_C_O, but changed for automake. We rewrite AC_PROG_CC +# to automatically call this. +AC_DEFUN([_AM_PROG_CC_C_O], +[AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl +AC_REQUIRE_AUX_FILE([compile])dnl +AC_LANG_PUSH([C])dnl +AC_CACHE_CHECK( + [whether $CC understands -c and -o together], + [am_cv_prog_cc_c_o], + [AC_LANG_CONFTEST([AC_LANG_PROGRAM([])]) + # Make sure it works both with $CC and with simple cc. + # Following AC_PROG_CC_C_O, we do the test twice because some + # compilers refuse to overwrite an existing .o file with -o, + # though they will create one. + am_cv_prog_cc_c_o=yes + for am_i in 1 2; do + if AM_RUN_LOG([$CC -c conftest.$ac_ext -o conftest2.$ac_objext]) \ + && test -f conftest2.$ac_objext; then + : OK + else + am_cv_prog_cc_c_o=no + break + fi + done + rm -f core conftest* + unset am_i]) +if test "$am_cv_prog_cc_c_o" != yes; then + # Losing compiler, so override with the script. + # FIXME: It is wrong to rewrite CC. + # But if we don't then we get into trouble of one sort or another. + # A longer-term fix would be to have automake use am__CC in this case, + # and then we could set am__CC="\$(top_srcdir)/compile \$(CC)" + CC="$am_aux_dir/compile $CC" +fi +AC_LANG_POP([C])]) + +# For backward compatibility. +AC_DEFUN_ONCE([AM_PROG_CC_C_O], [AC_REQUIRE([AC_PROG_CC])]) + +# Copyright (C) 2001-2021 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_RUN_LOG(COMMAND) +# ------------------- +# Run COMMAND, save the exit status in ac_status, and log it. +# (This has been adapted from Autoconf's _AC_RUN_LOG macro.) +AC_DEFUN([AM_RUN_LOG], +[{ echo "$as_me:$LINENO: $1" >&AS_MESSAGE_LOG_FD + ($1) >&AS_MESSAGE_LOG_FD 2>&AS_MESSAGE_LOG_FD + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&AS_MESSAGE_LOG_FD + (exit $ac_status); }]) + +# Check to make sure that the build environment is sane. -*- Autoconf -*- + +# Copyright (C) 1996-2021 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_SANITY_CHECK +# --------------- +AC_DEFUN([AM_SANITY_CHECK], +[AC_MSG_CHECKING([whether build environment is sane]) +# Reject unsafe characters in $srcdir or the absolute working directory +# name. Accept space and tab only in the latter. +am_lf=' +' +case `pwd` in + *[[\\\"\#\$\&\'\`$am_lf]]*) + AC_MSG_ERROR([unsafe absolute working directory name]);; +esac +case $srcdir in + *[[\\\"\#\$\&\'\`$am_lf\ \ ]]*) + AC_MSG_ERROR([unsafe srcdir value: '$srcdir']);; +esac + +# Do 'set' in a subshell so we don't clobber the current shell's +# arguments. Must try -L first in case configure is actually a +# symlink; some systems play weird games with the mod time of symlinks +# (eg FreeBSD returns the mod time of the symlink's containing +# directory). +if ( + am_has_slept=no + for am_try in 1 2; do + echo "timestamp, slept: $am_has_slept" > conftest.file + set X `ls -Lt "$srcdir/configure" conftest.file 2> /dev/null` + if test "$[*]" = "X"; then + # -L didn't work. + set X `ls -t "$srcdir/configure" conftest.file` + fi + if test "$[*]" != "X $srcdir/configure conftest.file" \ + && test "$[*]" != "X conftest.file $srcdir/configure"; then + + # If neither matched, then we have a broken ls. This can happen + # if, for instance, CONFIG_SHELL is bash and it inherits a + # broken ls alias from the environment. This has actually + # happened. Such a system could not be considered "sane". + AC_MSG_ERROR([ls -t appears to fail. Make sure there is not a broken + alias in your environment]) + fi + if test "$[2]" = conftest.file || test $am_try -eq 2; then + break + fi + # Just in case. + sleep 1 + am_has_slept=yes + done + test "$[2]" = conftest.file + ) +then + # Ok. + : +else + AC_MSG_ERROR([newly created file is older than distributed files! +Check your system clock]) +fi +AC_MSG_RESULT([yes]) +# If we didn't sleep, we still need to ensure time stamps of config.status and +# generated files are strictly newer. +am_sleep_pid= +if grep 'slept: no' conftest.file >/dev/null 2>&1; then + ( sleep 1 ) & + am_sleep_pid=$! +fi +AC_CONFIG_COMMANDS_PRE( + [AC_MSG_CHECKING([that generated files are newer than configure]) + if test -n "$am_sleep_pid"; then + # Hide warnings about reused PIDs. + wait $am_sleep_pid 2>/dev/null + fi + AC_MSG_RESULT([done])]) +rm -f conftest.file +]) + +# Copyright (C) 2009-2021 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_SILENT_RULES([DEFAULT]) +# -------------------------- +# Enable less verbose build rules; with the default set to DEFAULT +# ("yes" being less verbose, "no" or empty being verbose). +AC_DEFUN([AM_SILENT_RULES], +[AC_ARG_ENABLE([silent-rules], [dnl +AS_HELP_STRING( + [--enable-silent-rules], + [less verbose build output (undo: "make V=1")]) +AS_HELP_STRING( + [--disable-silent-rules], + [verbose build output (undo: "make V=0")])dnl +]) +case $enable_silent_rules in @%:@ ((( + yes) AM_DEFAULT_VERBOSITY=0;; + no) AM_DEFAULT_VERBOSITY=1;; + *) AM_DEFAULT_VERBOSITY=m4_if([$1], [yes], [0], [1]);; +esac +dnl +dnl A few 'make' implementations (e.g., NonStop OS and NextStep) +dnl do not support nested variable expansions. +dnl See automake bug#9928 and bug#10237. +am_make=${MAKE-make} +AC_CACHE_CHECK([whether $am_make supports nested variables], + [am_cv_make_support_nested_variables], + [if AS_ECHO([['TRUE=$(BAR$(V)) +BAR0=false +BAR1=true +V=1 +am__doit: + @$(TRUE) +.PHONY: am__doit']]) | $am_make -f - >/dev/null 2>&1; then + am_cv_make_support_nested_variables=yes +else + am_cv_make_support_nested_variables=no +fi]) +if test $am_cv_make_support_nested_variables = yes; then + dnl Using '$V' instead of '$(V)' breaks IRIX make. + AM_V='$(V)' + AM_DEFAULT_V='$(AM_DEFAULT_VERBOSITY)' +else + AM_V=$AM_DEFAULT_VERBOSITY + AM_DEFAULT_V=$AM_DEFAULT_VERBOSITY +fi +AC_SUBST([AM_V])dnl +AM_SUBST_NOTMAKE([AM_V])dnl +AC_SUBST([AM_DEFAULT_V])dnl +AM_SUBST_NOTMAKE([AM_DEFAULT_V])dnl +AC_SUBST([AM_DEFAULT_VERBOSITY])dnl +AM_BACKSLASH='\' +AC_SUBST([AM_BACKSLASH])dnl +_AM_SUBST_NOTMAKE([AM_BACKSLASH])dnl +]) + +# Copyright (C) 2001-2021 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_PROG_INSTALL_STRIP +# --------------------- +# One issue with vendor 'install' (even GNU) is that you can't +# specify the program used to strip binaries. This is especially +# annoying in cross-compiling environments, where the build's strip +# is unlikely to handle the host's binaries. +# Fortunately install-sh will honor a STRIPPROG variable, so we +# always use install-sh in "make install-strip", and initialize +# STRIPPROG with the value of the STRIP variable (set by the user). +AC_DEFUN([AM_PROG_INSTALL_STRIP], +[AC_REQUIRE([AM_PROG_INSTALL_SH])dnl +# Installed binaries are usually stripped using 'strip' when the user +# run "make install-strip". However 'strip' might not be the right +# tool to use in cross-compilation environments, therefore Automake +# will honor the 'STRIP' environment variable to overrule this program. +dnl Don't test for $cross_compiling = yes, because it might be 'maybe'. +if test "$cross_compiling" != no; then + AC_CHECK_TOOL([STRIP], [strip], :) +fi +INSTALL_STRIP_PROGRAM="\$(install_sh) -c -s" +AC_SUBST([INSTALL_STRIP_PROGRAM])]) + +# Copyright (C) 2006-2021 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# _AM_SUBST_NOTMAKE(VARIABLE) +# --------------------------- +# Prevent Automake from outputting VARIABLE = @VARIABLE@ in Makefile.in. +# This macro is traced by Automake. +AC_DEFUN([_AM_SUBST_NOTMAKE]) + +# AM_SUBST_NOTMAKE(VARIABLE) +# -------------------------- +# Public sister of _AM_SUBST_NOTMAKE. +AC_DEFUN([AM_SUBST_NOTMAKE], [_AM_SUBST_NOTMAKE($@)]) + +# Check how to create a tarball. -*- Autoconf -*- + +# Copyright (C) 2004-2021 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# _AM_PROG_TAR(FORMAT) +# -------------------- +# Check how to create a tarball in format FORMAT. +# FORMAT should be one of 'v7', 'ustar', or 'pax'. +# +# Substitute a variable $(am__tar) that is a command +# writing to stdout a FORMAT-tarball containing the directory +# $tardir. +# tardir=directory && $(am__tar) > result.tar +# +# Substitute a variable $(am__untar) that extract such +# a tarball read from stdin. +# $(am__untar) < result.tar +# +AC_DEFUN([_AM_PROG_TAR], +[# Always define AMTAR for backward compatibility. Yes, it's still used +# in the wild :-( We should find a proper way to deprecate it ... +AC_SUBST([AMTAR], ['$${TAR-tar}']) + +# We'll loop over all known methods to create a tar archive until one works. +_am_tools='gnutar m4_if([$1], [ustar], [plaintar]) pax cpio none' + +m4_if([$1], [v7], + [am__tar='$${TAR-tar} chof - "$$tardir"' am__untar='$${TAR-tar} xf -'], + + [m4_case([$1], + [ustar], + [# The POSIX 1988 'ustar' format is defined with fixed-size fields. + # There is notably a 21 bits limit for the UID and the GID. In fact, + # the 'pax' utility can hang on bigger UID/GID (see automake bug#8343 + # and bug#13588). + am_max_uid=2097151 # 2^21 - 1 + am_max_gid=$am_max_uid + # The $UID and $GID variables are not portable, so we need to resort + # to the POSIX-mandated id(1) utility. Errors in the 'id' calls + # below are definitely unexpected, so allow the users to see them + # (that is, avoid stderr redirection). + am_uid=`id -u || echo unknown` + am_gid=`id -g || echo unknown` + AC_MSG_CHECKING([whether UID '$am_uid' is supported by ustar format]) + if test $am_uid -le $am_max_uid; then + AC_MSG_RESULT([yes]) + else + AC_MSG_RESULT([no]) + _am_tools=none + fi + AC_MSG_CHECKING([whether GID '$am_gid' is supported by ustar format]) + if test $am_gid -le $am_max_gid; then + AC_MSG_RESULT([yes]) + else + AC_MSG_RESULT([no]) + _am_tools=none + fi], + + [pax], + [], + + [m4_fatal([Unknown tar format])]) + + AC_MSG_CHECKING([how to create a $1 tar archive]) + + # Go ahead even if we have the value already cached. We do so because we + # need to set the values for the 'am__tar' and 'am__untar' variables. + _am_tools=${am_cv_prog_tar_$1-$_am_tools} + + for _am_tool in $_am_tools; do + case $_am_tool in + gnutar) + for _am_tar in tar gnutar gtar; do + AM_RUN_LOG([$_am_tar --version]) && break + done + am__tar="$_am_tar --format=m4_if([$1], [pax], [posix], [$1]) -chf - "'"$$tardir"' + am__tar_="$_am_tar --format=m4_if([$1], [pax], [posix], [$1]) -chf - "'"$tardir"' + am__untar="$_am_tar -xf -" + ;; + plaintar) + # Must skip GNU tar: if it does not support --format= it doesn't create + # ustar tarball either. + (tar --version) >/dev/null 2>&1 && continue + am__tar='tar chf - "$$tardir"' + am__tar_='tar chf - "$tardir"' + am__untar='tar xf -' + ;; + pax) + am__tar='pax -L -x $1 -w "$$tardir"' + am__tar_='pax -L -x $1 -w "$tardir"' + am__untar='pax -r' + ;; + cpio) + am__tar='find "$$tardir" -print | cpio -o -H $1 -L' + am__tar_='find "$tardir" -print | cpio -o -H $1 -L' + am__untar='cpio -i -H $1 -d' + ;; + none) + am__tar=false + am__tar_=false + am__untar=false + ;; + esac + + # If the value was cached, stop now. We just wanted to have am__tar + # and am__untar set. + test -n "${am_cv_prog_tar_$1}" && break + + # tar/untar a dummy directory, and stop if the command works. + rm -rf conftest.dir + mkdir conftest.dir + echo GrepMe > conftest.dir/file + AM_RUN_LOG([tardir=conftest.dir && eval $am__tar_ >conftest.tar]) + rm -rf conftest.dir + if test -s conftest.tar; then + AM_RUN_LOG([$am__untar /dev/null 2>&1 && break + fi + done + rm -rf conftest.dir + + AC_CACHE_VAL([am_cv_prog_tar_$1], [am_cv_prog_tar_$1=$_am_tool]) + AC_MSG_RESULT([$am_cv_prog_tar_$1])]) + +AC_SUBST([am__tar]) +AC_SUBST([am__untar]) +]) # _AM_PROG_TAR + +m4_include([m4/acinclude.m4]) +m4_include([m4/ax_cxx_compile_stdcxx.m4]) +m4_include([m4/ax_dlb_callback_arg.m4]) +m4_include([m4/libs.m4]) +m4_include([m4/libtool.m4]) +m4_include([m4/ltoptions.m4]) +m4_include([m4/ltsugar.m4]) +m4_include([m4/ltversion.m4]) +m4_include([m4/lt~obsolete.m4]) +m4_include([m4/pkg.m4]) diff --git a/autogen.sh b/autogen.sh new file mode 100755 index 0000000..e31111e --- /dev/null +++ b/autogen.sh @@ -0,0 +1,30 @@ +#!/bin/sh +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +if ! libtool --version > /dev/null +then + # Perhaps we are on a Mac + if ! glibtool --version > /dev/null + then + echo "GNU Libtool is missing, please install it and fix the PATH to it." + exit 1 + else + export LIBTOOL=glibtool + export LIBTOOLIZE=glibtoolize + fi +fi +autoreconf -ivf -I m4 + diff --git a/bubble/Makefile.am b/bubble/Makefile.am new file mode 100644 index 0000000..a4fd8d6 --- /dev/null +++ b/bubble/Makefile.am @@ -0,0 +1,20 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +include $(top_srcdir)/make/starpu-subdirtests.mk + +SUBDIRS = +SUBDIRS += tests diff --git a/bubble/Makefile.in b/bubble/Makefile.in new file mode 100644 index 0000000..971c593 --- /dev/null +++ b/bubble/Makefile.in @@ -0,0 +1,889 @@ +# Makefile.in generated by automake 1.16.5 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2021 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +target_triplet = @target@ +subdir = bubble +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ + $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ + $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ + $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/src/common/config.h \ + $(top_builddir)/src/common/config-src-build.h \ + $(top_builddir)/include/starpu_config.h \ + $(top_builddir)/starpurm/include/starpurm_config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +SOURCES = +DIST_SOURCES = +RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \ + ctags-recursive dvi-recursive html-recursive info-recursive \ + install-data-recursive install-dvi-recursive \ + install-exec-recursive install-html-recursive \ + install-info-recursive install-pdf-recursive \ + install-ps-recursive install-recursive installcheck-recursive \ + installdirs-recursive pdf-recursive ps-recursive \ + tags-recursive uninstall-recursive +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ + distclean-recursive maintainer-clean-recursive +am__recursive_targets = \ + $(RECURSIVE_TARGETS) \ + $(RECURSIVE_CLEAN_TARGETS) \ + $(am__extra_recursive_targets) +AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \ + distdir distdir-am +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +DIST_SUBDIRS = $(SUBDIRS) +am__DIST_COMMON = $(srcdir)/Makefile.in \ + $(top_srcdir)/make/starpu-subdirtests.mk +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +am__relativize = \ + dir0=`pwd`; \ + sed_first='s,^\([^/]*\)/.*$$,\1,'; \ + sed_rest='s,^[^/]*/*,,'; \ + sed_last='s,^.*/\([^/]*\)$$,\1,'; \ + sed_butlast='s,/*[^/]*$$,,'; \ + while test -n "$$dir1"; do \ + first=`echo "$$dir1" | sed -e "$$sed_first"`; \ + if test "$$first" != "."; then \ + if test "$$first" = ".."; then \ + dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ + dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ + else \ + first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ + if test "$$first2" = "$$first"; then \ + dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ + else \ + dir2="../$$dir2"; \ + fi; \ + dir0="$$dir0"/"$$first"; \ + fi; \ + fi; \ + dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ + done; \ + reldir="$$dir2" +pkglibdir = @pkglibdir@ +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +APP_CFLAGS = @APP_CFLAGS@ +APP_CXXFLAGS = @APP_CXXFLAGS@ +APP_FCFLAGS = @APP_FCFLAGS@ +APP_FFLAGS = @APP_FFLAGS@ +AR = @AR@ +AS = @AS@ +ATLASDIR = @ATLASDIR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BLAS_LIB = @BLAS_LIB@ +BLAS_LIBS = @BLAS_LIBS@ +BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ +BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CC_OR_MPICC = @CC_OR_MPICC@ +CC_OR_NVCC = @CC_OR_NVCC@ +CFLAGS = @CFLAGS@ +COVERAGE = @COVERAGE@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CSCOPE = @CSCOPE@ +CTAGS = @CTAGS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DGELS_LIBS = @DGELS_LIBS@ +DLB_CFLAGS = @DLB_CFLAGS@ +DLB_LIBS = @DLB_LIBS@ +DLLTOOL = @DLLTOOL@ +DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +ECLIPSE = @ECLIPSE@ +EGREP = @EGREP@ +ETAGS = @ETAGS@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FC = @FC@ +FCFLAGS = @FCFLAGS@ +FFLAGS = @FFLAGS@ +FFTWF_CFLAGS = @FFTWF_CFLAGS@ +FFTWF_LIBS = @FFTWF_LIBS@ +FFTWL_CFLAGS = @FFTWL_CFLAGS@ +FFTWL_LIBS = @FFTWL_LIBS@ +FFTW_CFLAGS = @FFTW_CFLAGS@ +FFTW_LIBS = @FFTW_LIBS@ +FGREP = @FGREP@ +FILECMD = @FILECMD@ +FXTDIR = @FXTDIR@ +FXT_CFLAGS = @FXT_CFLAGS@ +FXT_LDFLAGS = @FXT_LDFLAGS@ +FXT_LIBS = @FXT_LIBS@ +GDB = @GDB@ +GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ +GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ +GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ +GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ +GOTODIR = @GOTODIR@ +GREP = @GREP@ +HAVE_CXX11 = @HAVE_CXX11@ +HAVE_FFTWFL = @HAVE_FFTWFL@ +HELP2MAN = @HELP2MAN@ +HIPCC = @HIPCC@ +HIPCCFLAGS = @HIPCCFLAGS@ +HIPCONFIG = @HIPCONFIG@ +HWLOC_CFLAGS = @HWLOC_CFLAGS@ +HWLOC_LIBS = @HWLOC_LIBS@ +HWLOC_REQUIRES = @HWLOC_REQUIRES@ +ICC = @ICC@ +ICC_ARGS = @ICC_ARGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JULIA = @JULIA@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ +LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ +LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ +LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ +LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ +LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ +LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ +LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ +LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ +LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ +LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ +LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ +LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ +LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ +LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ +LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ +LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ +LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ +LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ +LIBSTARPU_LINK = @LIBSTARPU_LINK@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAGMA_CFLAGS = @MAGMA_CFLAGS@ +MAGMA_LIBS = @MAGMA_LIBS@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPICC_LDFLAGS = @MPICC_LDFLAGS@ +MPICXX = @MPICXX@ +MPIEXEC = @MPIEXEC@ +MPIEXEC_ARGS = @MPIEXEC_ARGS@ +MPIFORT = @MPIFORT@ +MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ +MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ +NM = @NM@ +NMAD_CFLAGS = @NMAD_CFLAGS@ +NMAD_LIBS = @NMAD_LIBS@ +NMEDIT = @NMEDIT@ +NVCC = @NVCC@ +NVCCFLAGS = @NVCCFLAGS@ +NVCC_CC = @NVCC_CC@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ +OPENBLAS_LIBS = @OPENBLAS_LIBS@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PAPI_CFLAGS = @PAPI_CFLAGS@ +PAPI_LIBS = @PAPI_LIBS@ +PARALLEL = @PARALLEL@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PKG_CONFIG = @PKG_CONFIG@ +POTI_CFLAGS = @POTI_CFLAGS@ +POTI_LIBS = @POTI_LIBS@ +PROG_CLANG = @PROG_CLANG@ +PROG_DATE = @PROG_DATE@ +PROG_FIND = @PROG_FIND@ +PROG_STAT = @PROG_STAT@ +PYTHON = @PYTHON@ +PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ +PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ +PYTHON_VERSION = @PYTHON_VERSION@ +RANLIB = @RANLIB@ +REALBASH = @REALBASH@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ +SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ +SIMGRID_LIBS = @SIMGRID_LIBS@ +SIMGRID_MC = @SIMGRID_MC@ +SLIC_CONFIG = @SLIC_CONFIG@ +SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ +SOCL_VENDORS = @SOCL_VENDORS@ +STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ +STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ +STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ +STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ +STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ +STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ +STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ +STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ +STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ +STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ +STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ +STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ +STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ +STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ +STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ +STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ +STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ +STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ +STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ +STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ +STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ +STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ +STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ +STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ +STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ +STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ +STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ +STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ +STARPU_LIB_PATH = @STARPU_LIB_PATH@ +STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ +STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ +STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ +STARPU_MS_LIB = @STARPU_MS_LIB@ +STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ +STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ +STARPU_OPENBLAS = @STARPU_OPENBLAS@ +STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ +STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ +STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ +STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ +STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ +STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ +STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ +STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ +STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ +STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ +STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ +STARPU_SRC_DIR = @STARPU_SRC_DIR@ +STARPU_USE_CPU = @STARPU_USE_CPU@ +STARPU_USE_CUDA = @STARPU_USE_CUDA@ +STARPU_USE_FXT = @STARPU_USE_FXT@ +STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ +STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ +STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ +STRIP = @STRIP@ +VERSION = @VERSION@ +XMKMF = @XMKMF@ +X_CFLAGS = @X_CFLAGS@ +X_EXTRA_LIBS = @X_EXTRA_LIBS@ +X_LIBS = @X_LIBS@ +X_PRE_LIBS = @X_PRE_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_ct_F77 = @ac_ct_F77@ +ac_ct_FC = @ac_ct_FC@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +doxygencommand = @doxygencommand@ +dvidir = @dvidir@ +eclipsepath = @eclipsepath@ +epstopdfcommand = @epstopdfcommand@ +exec_prefix = @exec_prefix@ +gitcommand = @gitcommand@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +hwloccalccommand = @hwloccalccommand@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +juliapath = @juliapath@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +mpicc_path = @mpicc_path@ +mpicxx_path = @mpicxx_path@ +mpiexec_path = @mpiexec_path@ +mpifort_path = @mpifort_path@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +pdflatexcommand = @pdflatexcommand@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +runstatedir = @runstatedir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target = @target@ +target_alias = @target_alias@ +target_cpu = @target_cpu@ +target_os = @target_os@ +target_vendor = @target_vendor@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +SUBDIRS = tests +all: all-recursive + +.SUFFIXES: +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/make/starpu-subdirtests.mk $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign bubble/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign bubble/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; +$(top_srcdir)/make/starpu-subdirtests.mk $(am__empty): + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +# This directory's subdirectories are mostly independent; you can cd +# into them and run 'make' without going through this Makefile. +# To change the values of 'make' variables: instead of editing Makefiles, +# (1) if the variable is set in 'config.status', edit 'config.status' +# (which will cause the Makefiles to be regenerated when you run 'make'); +# (2) otherwise, pass the desired values on the 'make' command line. +$(am__recursive_targets): + @fail=; \ + if $(am__make_keepgoing); then \ + failcom='fail=yes'; \ + else \ + failcom='exit 1'; \ + fi; \ + dot_seen=no; \ + target=`echo $@ | sed s/-recursive//`; \ + case "$@" in \ + distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ + *) list='$(SUBDIRS)' ;; \ + esac; \ + for subdir in $$list; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + dot_seen=yes; \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || eval $$failcom; \ + done; \ + if test "$$dot_seen" = "no"; then \ + $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ + fi; test -z "$$fail" + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-recursive +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ + include_option=--etags-include; \ + empty_fix=.; \ + else \ + include_option=--include; \ + empty_fix=; \ + fi; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + test ! -f $$subdir/TAGS || \ + set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ + fi; \ + done; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-recursive + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-recursive + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done + @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + $(am__make_dryrun) \ + || test -d "$(distdir)/$$subdir" \ + || $(MKDIR_P) "$(distdir)/$$subdir" \ + || exit 1; \ + dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ + $(am__relativize); \ + new_distdir=$$reldir; \ + dir1=$$subdir; dir2="$(top_distdir)"; \ + $(am__relativize); \ + new_top_distdir=$$reldir; \ + echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ + echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ + ($(am__cd) $$subdir && \ + $(MAKE) $(AM_MAKEFLAGS) \ + top_distdir="$$new_top_distdir" \ + distdir="$$new_distdir" \ + am__remove_distdir=: \ + am__skip_length_check=: \ + am__skip_mode_fix=: \ + distdir) \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-recursive +all-am: Makefile +installdirs: installdirs-recursive +installdirs-am: +install: install-recursive +install-exec: install-exec-recursive +install-data: install-data-recursive +uninstall: uninstall-recursive + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-recursive +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-recursive + +clean-am: clean-generic clean-libtool mostlyclean-am + +distclean: distclean-recursive + -rm -f Makefile +distclean-am: clean-am distclean-generic distclean-tags + +dvi: dvi-recursive + +dvi-am: + +html: html-recursive + +html-am: + +info: info-recursive + +info-am: + +install-data-am: + +install-dvi: install-dvi-recursive + +install-dvi-am: + +install-exec-am: + +install-html: install-html-recursive + +install-html-am: + +install-info: install-info-recursive + +install-info-am: + +install-man: + +install-pdf: install-pdf-recursive + +install-pdf-am: + +install-ps: install-ps-recursive + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-recursive + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-recursive + +mostlyclean-am: mostlyclean-generic mostlyclean-libtool + +pdf: pdf-recursive + +pdf-am: + +ps: ps-recursive + +ps-am: + +uninstall-am: + +.MAKE: $(am__recursive_targets) install-am install-strip + +.PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am check \ + check-am clean clean-generic clean-libtool cscopelist-am ctags \ + ctags-am distclean distclean-generic distclean-libtool \ + distclean-tags distdir dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-html \ + install-html-am install-info install-info-am install-man \ + install-pdf install-pdf-am install-ps install-ps-am \ + install-strip installcheck installcheck-am installdirs \ + installdirs-am maintainer-clean maintainer-clean-generic \ + mostlyclean mostlyclean-generic mostlyclean-libtool pdf pdf-am \ + ps ps-am tags tags-am uninstall uninstall-am + +.PRECIOUS: Makefile + + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +recheck: + RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i recheck || RET=1 ; \ + done ; \ + exit $$RET + +showcheckfailed: + @RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i showcheckfailed || RET=1 ; \ + done ; \ + exit $$RET + +showfailed: + @RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -s -C $$i showfailed || RET=1 ; \ + done ; \ + exit $$RET + +showcheck: + RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i showcheck || RET=1 ; \ + done ; \ + exit $$RET + +showsuite: + RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i showsuite || RET=1 ; \ + done ; \ + exit $$RET + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/bubble/tests/Makefile.am b/bubble/tests/Makefile.am new file mode 100644 index 0000000..878f70c --- /dev/null +++ b/bubble/tests/Makefile.am @@ -0,0 +1,81 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2019-2023 Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +include $(top_srcdir)/make/starpu-tests.mk +include $(top_srcdir)/make/starpu-loader.mk + +AM_CFLAGS += $(APP_CFLAGS) +AM_CXXFLAGS += $(APP_CXXFLAGS) +AM_FFLAGS += $(APP_FFLAGS) +AM_FCFLAGS += $(APP_FCFLAGS) +AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ $(STARPU_H_CPPFLAGS) +AM_CPPFLAGS += -I$(top_srcdir)/bubble/include +AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@ +LIBS += $(top_builddir)/src/@LIBSTARPU_LINK@ $(STARPU_EXPORTED_LIBS) +LIBS += $(HWLOC_LIBS) +LIBS += $(STARPU_OPENCL_LDFLAGS) $(STARPU_CUDA_LDFLAGS) +#LIBS += ../src/libstarpububble-@STARPU_EFFECTIVE_VERSION@.la + +BUILT_SOURCES = + +if STARPU_USE_OPENCL +nobase_STARPU_OPENCL_DATA_DATA = +endif + +EXTRA_DIST = \ + basic/basic.h + +CLEANFILES = *.gcno *.gcda *.linkinfo core starpu_idle_microsec.log + +##################################### +# What to install and what to check # +##################################### + +if STARPU_HAVE_WINDOWS +check_PROGRAMS = $(myPROGRAMS) +else +check_PROGRAMS = $(LOADER) $(myPROGRAMS) +endif + +TESTS = $(myPROGRAMS) + +myPROGRAMS = + +if !STARPU_SIMGRID +if STARPU_BUBBLE +myPROGRAMS += \ + basic/b \ + basic/bb \ + basic/btb \ + basic/btb_func \ + basic/bbt \ + basic/btt \ + basic/bbtt \ + basic/tbbt \ + basic/tbtbt \ + basic/brt \ + basic/brbtt \ + basic/sync \ + basic/gemm_dag \ + basic/b2t \ + basic/brec \ + basic/brec_level \ + basic/read \ + basic/tbrbtt \ + vector/vector +endif +endif + +noinst_PROGRAMS += $(myPROGRAMS) diff --git a/bubble/tests/Makefile.in b/bubble/tests/Makefile.in new file mode 100644 index 0000000..1d23fcc --- /dev/null +++ b/bubble/tests/Makefile.in @@ -0,0 +1,1938 @@ +# Makefile.in generated by automake 1.16.5 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2021 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +target_triplet = @target@ +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@am__append_1 = --compiler-options -fno-strict-aliasing -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ $(STARPU_NVCC_H_CPPFLAGS) +@STARPU_USE_HIP_TRUE@am__append_2 = -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ +noinst_PROGRAMS = $(am__EXEEXT_3) $(am__EXEEXT_2) +# Make tests run through mpiexec +@STARPU_USE_MPI_MASTER_SLAVE_TRUE@am__append_3 = $(abs_top_srcdir)/tools/starpu_msexec +@STARPU_USE_MPI_MASTER_SLAVE_TRUE@am__append_4 = $(MPI_RUN_ENV) STARPU_NMPIMSTHREADS=4 +@STARPU_USE_TCPIP_MASTER_SLAVE_TRUE@am__append_5 = $(abs_top_srcdir)/tools/starpu_msexec +# switch off local socket usage +#MS_LAUNCHER = $(abs_top_builddir)/tools/starpu_tcpipexec -np 2 -nobind -ncpus 1 -nolocal +@STARPU_USE_TCPIP_MASTER_SLAVE_TRUE@am__append_6 = STARPU_RESERVE_NCPU=2 +@STARPU_HAVE_WINDOWS_FALSE@am__append_7 = loader +@STARPU_HAVE_WINDOWS_FALSE@check_PROGRAMS = $(am__EXEEXT_2) +@STARPU_HAVE_WINDOWS_TRUE@check_PROGRAMS = $(am__EXEEXT_2) +TESTS = $(am__EXEEXT_2) +@STARPU_BUBBLE_TRUE@@STARPU_SIMGRID_FALSE@am__append_8 = \ +@STARPU_BUBBLE_TRUE@@STARPU_SIMGRID_FALSE@ basic/b \ +@STARPU_BUBBLE_TRUE@@STARPU_SIMGRID_FALSE@ basic/bb \ +@STARPU_BUBBLE_TRUE@@STARPU_SIMGRID_FALSE@ basic/btb \ +@STARPU_BUBBLE_TRUE@@STARPU_SIMGRID_FALSE@ basic/btb_func \ +@STARPU_BUBBLE_TRUE@@STARPU_SIMGRID_FALSE@ basic/bbt \ +@STARPU_BUBBLE_TRUE@@STARPU_SIMGRID_FALSE@ basic/btt \ +@STARPU_BUBBLE_TRUE@@STARPU_SIMGRID_FALSE@ basic/bbtt \ +@STARPU_BUBBLE_TRUE@@STARPU_SIMGRID_FALSE@ basic/tbbt \ +@STARPU_BUBBLE_TRUE@@STARPU_SIMGRID_FALSE@ basic/tbtbt \ +@STARPU_BUBBLE_TRUE@@STARPU_SIMGRID_FALSE@ basic/brt \ +@STARPU_BUBBLE_TRUE@@STARPU_SIMGRID_FALSE@ basic/brbtt \ +@STARPU_BUBBLE_TRUE@@STARPU_SIMGRID_FALSE@ basic/sync \ +@STARPU_BUBBLE_TRUE@@STARPU_SIMGRID_FALSE@ basic/gemm_dag \ +@STARPU_BUBBLE_TRUE@@STARPU_SIMGRID_FALSE@ basic/b2t \ +@STARPU_BUBBLE_TRUE@@STARPU_SIMGRID_FALSE@ basic/brec \ +@STARPU_BUBBLE_TRUE@@STARPU_SIMGRID_FALSE@ basic/brec_level \ +@STARPU_BUBBLE_TRUE@@STARPU_SIMGRID_FALSE@ basic/read \ +@STARPU_BUBBLE_TRUE@@STARPU_SIMGRID_FALSE@ basic/tbrbtt \ +@STARPU_BUBBLE_TRUE@@STARPU_SIMGRID_FALSE@ vector/vector + +subdir = bubble/tests +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ + $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ + $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ + $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/src/common/config.h \ + $(top_builddir)/src/common/config-src-build.h \ + $(top_builddir)/include/starpu_config.h \ + $(top_builddir)/starpurm/include/starpurm_config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +@STARPU_BUBBLE_TRUE@@STARPU_SIMGRID_FALSE@am__EXEEXT_1 = \ +@STARPU_BUBBLE_TRUE@@STARPU_SIMGRID_FALSE@ basic/b$(EXEEXT) \ +@STARPU_BUBBLE_TRUE@@STARPU_SIMGRID_FALSE@ basic/bb$(EXEEXT) \ +@STARPU_BUBBLE_TRUE@@STARPU_SIMGRID_FALSE@ basic/btb$(EXEEXT) \ +@STARPU_BUBBLE_TRUE@@STARPU_SIMGRID_FALSE@ basic/btb_func$(EXEEXT) \ +@STARPU_BUBBLE_TRUE@@STARPU_SIMGRID_FALSE@ basic/bbt$(EXEEXT) \ +@STARPU_BUBBLE_TRUE@@STARPU_SIMGRID_FALSE@ basic/btt$(EXEEXT) \ +@STARPU_BUBBLE_TRUE@@STARPU_SIMGRID_FALSE@ basic/bbtt$(EXEEXT) \ +@STARPU_BUBBLE_TRUE@@STARPU_SIMGRID_FALSE@ basic/tbbt$(EXEEXT) \ +@STARPU_BUBBLE_TRUE@@STARPU_SIMGRID_FALSE@ basic/tbtbt$(EXEEXT) \ +@STARPU_BUBBLE_TRUE@@STARPU_SIMGRID_FALSE@ basic/brt$(EXEEXT) \ +@STARPU_BUBBLE_TRUE@@STARPU_SIMGRID_FALSE@ basic/brbtt$(EXEEXT) \ +@STARPU_BUBBLE_TRUE@@STARPU_SIMGRID_FALSE@ basic/sync$(EXEEXT) \ +@STARPU_BUBBLE_TRUE@@STARPU_SIMGRID_FALSE@ basic/gemm_dag$(EXEEXT) \ +@STARPU_BUBBLE_TRUE@@STARPU_SIMGRID_FALSE@ basic/b2t$(EXEEXT) \ +@STARPU_BUBBLE_TRUE@@STARPU_SIMGRID_FALSE@ basic/brec$(EXEEXT) \ +@STARPU_BUBBLE_TRUE@@STARPU_SIMGRID_FALSE@ basic/brec_level$(EXEEXT) \ +@STARPU_BUBBLE_TRUE@@STARPU_SIMGRID_FALSE@ basic/read$(EXEEXT) \ +@STARPU_BUBBLE_TRUE@@STARPU_SIMGRID_FALSE@ basic/tbrbtt$(EXEEXT) \ +@STARPU_BUBBLE_TRUE@@STARPU_SIMGRID_FALSE@ vector/vector$(EXEEXT) +am__EXEEXT_2 = $(am__EXEEXT_1) +@STARPU_HAVE_WINDOWS_FALSE@am__EXEEXT_3 = loader$(EXEEXT) +PROGRAMS = $(noinst_PROGRAMS) +basic_b_SOURCES = basic/b.c +am__dirstamp = $(am__leading_dot)dirstamp +basic_b_OBJECTS = basic/b.$(OBJEXT) +basic_b_LDADD = $(LDADD) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +basic_b2t_SOURCES = basic/b2t.c +basic_b2t_OBJECTS = basic/b2t.$(OBJEXT) +basic_b2t_LDADD = $(LDADD) +basic_bb_SOURCES = basic/bb.c +basic_bb_OBJECTS = basic/bb.$(OBJEXT) +basic_bb_LDADD = $(LDADD) +basic_bbt_SOURCES = basic/bbt.c +basic_bbt_OBJECTS = basic/bbt.$(OBJEXT) +basic_bbt_LDADD = $(LDADD) +basic_bbtt_SOURCES = basic/bbtt.c +basic_bbtt_OBJECTS = basic/bbtt.$(OBJEXT) +basic_bbtt_LDADD = $(LDADD) +basic_brbtt_SOURCES = basic/brbtt.c +basic_brbtt_OBJECTS = basic/brbtt.$(OBJEXT) +basic_brbtt_LDADD = $(LDADD) +basic_brec_SOURCES = basic/brec.c +basic_brec_OBJECTS = basic/brec.$(OBJEXT) +basic_brec_LDADD = $(LDADD) +basic_brec_level_SOURCES = basic/brec_level.c +basic_brec_level_OBJECTS = basic/brec_level.$(OBJEXT) +basic_brec_level_LDADD = $(LDADD) +basic_brt_SOURCES = basic/brt.c +basic_brt_OBJECTS = basic/brt.$(OBJEXT) +basic_brt_LDADD = $(LDADD) +basic_btb_SOURCES = basic/btb.c +basic_btb_OBJECTS = basic/btb.$(OBJEXT) +basic_btb_LDADD = $(LDADD) +basic_btb_func_SOURCES = basic/btb_func.c +basic_btb_func_OBJECTS = basic/btb_func.$(OBJEXT) +basic_btb_func_LDADD = $(LDADD) +basic_btt_SOURCES = basic/btt.c +basic_btt_OBJECTS = basic/btt.$(OBJEXT) +basic_btt_LDADD = $(LDADD) +basic_gemm_dag_SOURCES = basic/gemm_dag.c +basic_gemm_dag_OBJECTS = basic/gemm_dag.$(OBJEXT) +basic_gemm_dag_LDADD = $(LDADD) +basic_read_SOURCES = basic/read.c +basic_read_OBJECTS = basic/read.$(OBJEXT) +basic_read_LDADD = $(LDADD) +basic_sync_SOURCES = basic/sync.c +basic_sync_OBJECTS = basic/sync.$(OBJEXT) +basic_sync_LDADD = $(LDADD) +basic_tbbt_SOURCES = basic/tbbt.c +basic_tbbt_OBJECTS = basic/tbbt.$(OBJEXT) +basic_tbbt_LDADD = $(LDADD) +basic_tbrbtt_SOURCES = basic/tbrbtt.c +basic_tbrbtt_OBJECTS = basic/tbrbtt.$(OBJEXT) +basic_tbrbtt_LDADD = $(LDADD) +basic_tbtbt_SOURCES = basic/tbtbt.c +basic_tbtbt_OBJECTS = basic/tbtbt.$(OBJEXT) +basic_tbtbt_LDADD = $(LDADD) +loader_SOURCES = loader.c +loader_OBJECTS = loader-loader.$(OBJEXT) +loader_LDADD = $(LDADD) +vector_vector_SOURCES = vector/vector.c +vector_vector_OBJECTS = vector/vector.$(OBJEXT) +vector_vector_LDADD = $(LDADD) +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)/src/common -I$(top_builddir)/include -I$(top_builddir)/starpurm/include +depcomp = $(SHELL) $(top_srcdir)/build-aux/depcomp +am__maybe_remake_depfiles = depfiles +am__depfiles_remade = ./$(DEPDIR)/loader-loader.Po \ + basic/$(DEPDIR)/b.Po basic/$(DEPDIR)/b2t.Po \ + basic/$(DEPDIR)/bb.Po basic/$(DEPDIR)/bbt.Po \ + basic/$(DEPDIR)/bbtt.Po basic/$(DEPDIR)/brbtt.Po \ + basic/$(DEPDIR)/brec.Po basic/$(DEPDIR)/brec_level.Po \ + basic/$(DEPDIR)/brt.Po basic/$(DEPDIR)/btb.Po \ + basic/$(DEPDIR)/btb_func.Po basic/$(DEPDIR)/btt.Po \ + basic/$(DEPDIR)/gemm_dag.Po basic/$(DEPDIR)/read.Po \ + basic/$(DEPDIR)/sync.Po basic/$(DEPDIR)/tbbt.Po \ + basic/$(DEPDIR)/tbrbtt.Po basic/$(DEPDIR)/tbtbt.Po \ + vector/$(DEPDIR)/vector.Po +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = basic/b.c basic/b2t.c basic/bb.c basic/bbt.c basic/bbtt.c \ + basic/brbtt.c basic/brec.c basic/brec_level.c basic/brt.c \ + basic/btb.c basic/btb_func.c basic/btt.c basic/gemm_dag.c \ + basic/read.c basic/sync.c basic/tbbt.c basic/tbrbtt.c \ + basic/tbtbt.c loader.c vector/vector.c +DIST_SOURCES = basic/b.c basic/b2t.c basic/bb.c basic/bbt.c \ + basic/bbtt.c basic/brbtt.c basic/brec.c basic/brec_level.c \ + basic/brt.c basic/btb.c basic/btb_func.c basic/btt.c \ + basic/gemm_dag.c basic/read.c basic/sync.c basic/tbbt.c \ + basic/tbrbtt.c basic/tbtbt.c loader.c vector/vector.c +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__installdirs = "$(DESTDIR)$(STARPU_OPENCL_DATAdir)" +DATA = $(nobase_STARPU_OPENCL_DATA_DATA) +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +am__tty_colors_dummy = \ + mgn= red= grn= lgn= blu= brg= std=; \ + am__color_tests=no +am__tty_colors = { \ + $(am__tty_colors_dummy); \ + if test "X$(AM_COLOR_TESTS)" = Xno; then \ + am__color_tests=no; \ + elif test "X$(AM_COLOR_TESTS)" = Xalways; then \ + am__color_tests=yes; \ + elif test "X$$TERM" != Xdumb && { test -t 1; } 2>/dev/null; then \ + am__color_tests=yes; \ + fi; \ + if test $$am__color_tests = yes; then \ + red=''; \ + grn=''; \ + lgn=''; \ + blu=''; \ + mgn=''; \ + brg=''; \ + std=''; \ + fi; \ +} +am__recheck_rx = ^[ ]*:recheck:[ ]* +am__global_test_result_rx = ^[ ]*:global-test-result:[ ]* +am__copy_in_global_log_rx = ^[ ]*:copy-in-global-log:[ ]* +# A command that, given a newline-separated list of test names on the +# standard input, print the name of the tests that are to be re-run +# upon "make recheck". +am__list_recheck_tests = $(AWK) '{ \ + recheck = 1; \ + while ((rc = (getline line < ($$0 ".trs"))) != 0) \ + { \ + if (rc < 0) \ + { \ + if ((getline line2 < ($$0 ".log")) < 0) \ + recheck = 0; \ + break; \ + } \ + else if (line ~ /$(am__recheck_rx)[nN][Oo]/) \ + { \ + recheck = 0; \ + break; \ + } \ + else if (line ~ /$(am__recheck_rx)[yY][eE][sS]/) \ + { \ + break; \ + } \ + }; \ + if (recheck) \ + print $$0; \ + close ($$0 ".trs"); \ + close ($$0 ".log"); \ +}' +# A command that, given a newline-separated list of test names on the +# standard input, create the global log from their .trs and .log files. +am__create_global_log = $(AWK) ' \ +function fatal(msg) \ +{ \ + print "fatal: making $@: " msg | "cat >&2"; \ + exit 1; \ +} \ +function rst_section(header) \ +{ \ + print header; \ + len = length(header); \ + for (i = 1; i <= len; i = i + 1) \ + printf "="; \ + printf "\n\n"; \ +} \ +{ \ + copy_in_global_log = 1; \ + global_test_result = "RUN"; \ + while ((rc = (getline line < ($$0 ".trs"))) != 0) \ + { \ + if (rc < 0) \ + fatal("failed to read from " $$0 ".trs"); \ + if (line ~ /$(am__global_test_result_rx)/) \ + { \ + sub("$(am__global_test_result_rx)", "", line); \ + sub("[ ]*$$", "", line); \ + global_test_result = line; \ + } \ + else if (line ~ /$(am__copy_in_global_log_rx)[nN][oO]/) \ + copy_in_global_log = 0; \ + }; \ + if (copy_in_global_log) \ + { \ + rst_section(global_test_result ": " $$0); \ + while ((rc = (getline line < ($$0 ".log"))) != 0) \ + { \ + if (rc < 0) \ + fatal("failed to read from " $$0 ".log"); \ + print line; \ + }; \ + printf "\n"; \ + }; \ + close ($$0 ".trs"); \ + close ($$0 ".log"); \ +}' +# Restructured Text title. +am__rst_title = { sed 's/.*/ & /;h;s/./=/g;p;x;s/ *$$//;p;g' && echo; } +# Solaris 10 'make', and several other traditional 'make' implementations, +# pass "-e" to $(SHELL), and POSIX 2008 even requires this. Work around it +# by disabling -e (using the XSI extension "set +e") if it's set. +am__sh_e_setup = case $$- in *e*) set +e;; esac +# Default flags passed to test drivers. +am__common_driver_flags = \ + --color-tests "$$am__color_tests" \ + --enable-hard-errors "$$am__enable_hard_errors" \ + --expect-failure "$$am__expect_failure" +# To be inserted before the command running the test. Creates the +# directory for the log if needed. Stores in $dir the directory +# containing $f, in $tst the test, in $log the log. Executes the +# developer- defined test setup AM_TESTS_ENVIRONMENT (if any), and +# passes TESTS_ENVIRONMENT. Set up options for the wrapper that +# will run the test scripts (or their associated LOG_COMPILER, if +# thy have one). +am__check_pre = \ +$(am__sh_e_setup); \ +$(am__vpath_adj_setup) $(am__vpath_adj) \ +$(am__tty_colors); \ +srcdir=$(srcdir); export srcdir; \ +case "$@" in \ + */*) am__odir=`echo "./$@" | sed 's|/[^/]*$$||'`;; \ + *) am__odir=.;; \ +esac; \ +test "x$$am__odir" = x"." || test -d "$$am__odir" \ + || $(MKDIR_P) "$$am__odir" || exit $$?; \ +if test -f "./$$f"; then dir=./; \ +elif test -f "$$f"; then dir=; \ +else dir="$(srcdir)/"; fi; \ +tst=$$dir$$f; log='$@'; \ +if test -n '$(DISABLE_HARD_ERRORS)'; then \ + am__enable_hard_errors=no; \ +else \ + am__enable_hard_errors=yes; \ +fi; \ +case " $(XFAIL_TESTS) " in \ + *[\ \ ]$$f[\ \ ]* | *[\ \ ]$$dir$$f[\ \ ]*) \ + am__expect_failure=yes;; \ + *) \ + am__expect_failure=no;; \ +esac; \ +$(AM_TESTS_ENVIRONMENT) $(TESTS_ENVIRONMENT) +# A shell command to get the names of the tests scripts with any registered +# extension removed (i.e., equivalently, the names of the test logs, with +# the '.log' extension removed). The result is saved in the shell variable +# '$bases'. This honors runtime overriding of TESTS and TEST_LOGS. Sadly, +# we cannot use something simpler, involving e.g., "$(TEST_LOGS:.log=)", +# since that might cause problem with VPATH rewrites for suffix-less tests. +# See also 'test-harness-vpath-rewrite.sh' and 'test-trs-basic.sh'. +am__set_TESTS_bases = \ + bases='$(TEST_LOGS)'; \ + bases=`for i in $$bases; do echo $$i; done | sed 's/\.log$$//'`; \ + bases=`echo $$bases` +AM_TESTSUITE_SUMMARY_HEADER = ' for $(PACKAGE_STRING)' +RECHECK_LOGS = $(TEST_LOGS) +AM_RECURSIVE_TARGETS = check recheck +TEST_SUITE_LOG = test-suite.log +TEST_EXTENSIONS = @EXEEXT@ .test +LOG_DRIVER = $(SHELL) $(top_srcdir)/build-aux/test-driver +LOG_COMPILE = $(LOG_COMPILER) $(AM_LOG_FLAGS) $(LOG_FLAGS) +am__set_b = \ + case '$@' in \ + */*) \ + case '$*' in \ + */*) b='$*';; \ + *) b=`echo '$@' | sed 's/\.log$$//'`; \ + esac;; \ + *) \ + b='$*';; \ + esac +am__test_logs1 = $(TESTS:=.log) +am__test_logs2 = $(am__test_logs1:@EXEEXT@.log=.log) +TEST_LOGS = $(am__test_logs2:.test.log=.log) +TEST_LOG_DRIVER = $(SHELL) $(top_srcdir)/build-aux/test-driver +TEST_LOG_COMPILE = $(TEST_LOG_COMPILER) $(AM_TEST_LOG_FLAGS) \ + $(TEST_LOG_FLAGS) +am__DIST_COMMON = $(srcdir)/Makefile.in \ + $(top_srcdir)/build-aux/depcomp \ + $(top_srcdir)/build-aux/test-driver \ + $(top_srcdir)/make/starpu-loader.mk \ + $(top_srcdir)/make/starpu-tests.mk \ + $(top_srcdir)/make/starpu.mk +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +pkglibdir = @pkglibdir@ +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +APP_CFLAGS = @APP_CFLAGS@ +APP_CXXFLAGS = @APP_CXXFLAGS@ +APP_FCFLAGS = @APP_FCFLAGS@ +APP_FFLAGS = @APP_FFLAGS@ +AR = @AR@ +AS = @AS@ +ATLASDIR = @ATLASDIR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BLAS_LIB = @BLAS_LIB@ +BLAS_LIBS = @BLAS_LIBS@ +BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ +BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CC_OR_MPICC = @CC_OR_MPICC@ +CC_OR_NVCC = @CC_OR_NVCC@ +CFLAGS = @CFLAGS@ +COVERAGE = @COVERAGE@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CSCOPE = @CSCOPE@ +CTAGS = @CTAGS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DGELS_LIBS = @DGELS_LIBS@ +DLB_CFLAGS = @DLB_CFLAGS@ +DLB_LIBS = @DLB_LIBS@ +DLLTOOL = @DLLTOOL@ +DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +ECLIPSE = @ECLIPSE@ +EGREP = @EGREP@ +ETAGS = @ETAGS@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FC = @FC@ +FCFLAGS = @FCFLAGS@ +FFLAGS = @FFLAGS@ +FFTWF_CFLAGS = @FFTWF_CFLAGS@ +FFTWF_LIBS = @FFTWF_LIBS@ +FFTWL_CFLAGS = @FFTWL_CFLAGS@ +FFTWL_LIBS = @FFTWL_LIBS@ +FFTW_CFLAGS = @FFTW_CFLAGS@ +FFTW_LIBS = @FFTW_LIBS@ +FGREP = @FGREP@ +FILECMD = @FILECMD@ +FXTDIR = @FXTDIR@ +FXT_CFLAGS = @FXT_CFLAGS@ +FXT_LDFLAGS = @FXT_LDFLAGS@ +FXT_LIBS = @FXT_LIBS@ +GDB = @GDB@ +GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ +GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ +GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ +GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ +GOTODIR = @GOTODIR@ +GREP = @GREP@ +HAVE_CXX11 = @HAVE_CXX11@ +HAVE_FFTWFL = @HAVE_FFTWFL@ +HELP2MAN = @HELP2MAN@ +HIPCC = @HIPCC@ +HIPCCFLAGS = @HIPCCFLAGS@ $(am__append_2) +HIPCONFIG = @HIPCONFIG@ +HWLOC_CFLAGS = @HWLOC_CFLAGS@ +HWLOC_LIBS = @HWLOC_LIBS@ +HWLOC_REQUIRES = @HWLOC_REQUIRES@ +ICC = @ICC@ +ICC_ARGS = @ICC_ARGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JULIA = @JULIA@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ $(top_builddir)/src/@LIBSTARPU_LINK@ \ + $(STARPU_EXPORTED_LIBS) $(HWLOC_LIBS) $(STARPU_OPENCL_LDFLAGS) \ + $(STARPU_CUDA_LDFLAGS) +LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ +LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ +LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ +LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ +LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ +LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ +LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ +LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ +LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ +LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ +LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ +LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ +LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ +LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ +LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ +LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ +LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ +LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ +LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ +LIBSTARPU_LINK = @LIBSTARPU_LINK@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAGMA_CFLAGS = @MAGMA_CFLAGS@ +MAGMA_LIBS = @MAGMA_LIBS@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPICC_LDFLAGS = @MPICC_LDFLAGS@ +MPICXX = @MPICXX@ +MPIEXEC = @MPIEXEC@ +MPIEXEC_ARGS = @MPIEXEC_ARGS@ +MPIFORT = @MPIFORT@ +MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ +MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ +NM = @NM@ +NMAD_CFLAGS = @NMAD_CFLAGS@ +NMAD_LIBS = @NMAD_LIBS@ +NMEDIT = @NMEDIT@ +NVCC = @NVCC@ +NVCCFLAGS = @NVCCFLAGS@ $(am__append_1) +NVCC_CC = @NVCC_CC@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ +OPENBLAS_LIBS = @OPENBLAS_LIBS@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PAPI_CFLAGS = @PAPI_CFLAGS@ +PAPI_LIBS = @PAPI_LIBS@ +PARALLEL = @PARALLEL@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PKG_CONFIG = @PKG_CONFIG@ +POTI_CFLAGS = @POTI_CFLAGS@ +POTI_LIBS = @POTI_LIBS@ +PROG_CLANG = @PROG_CLANG@ +PROG_DATE = @PROG_DATE@ +PROG_FIND = @PROG_FIND@ +PROG_STAT = @PROG_STAT@ +PYTHON = @PYTHON@ +PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ +PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ +PYTHON_VERSION = @PYTHON_VERSION@ +RANLIB = @RANLIB@ +REALBASH = @REALBASH@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ +SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ +SIMGRID_LIBS = @SIMGRID_LIBS@ +SIMGRID_MC = @SIMGRID_MC@ +SLIC_CONFIG = @SLIC_CONFIG@ +SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ +SOCL_VENDORS = @SOCL_VENDORS@ +STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ +STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ +STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ +STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ +STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ +STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ +STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ +STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ +STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ +STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ +STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ +STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ +STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ +STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ +STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ +STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ +STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ +STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ +STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ +STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ +STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ +STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ +STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ +STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ +STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ +STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ +STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ +STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ +STARPU_LIB_PATH = @STARPU_LIB_PATH@ +STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ +STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ +STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ +STARPU_MS_LIB = @STARPU_MS_LIB@ +STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ +STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ +STARPU_OPENBLAS = @STARPU_OPENBLAS@ +STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ +STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ +STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ +STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ +STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ +STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ +STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ +STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ +STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ +STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ +STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ +STARPU_SRC_DIR = @STARPU_SRC_DIR@ +STARPU_USE_CPU = @STARPU_USE_CPU@ +STARPU_USE_CUDA = @STARPU_USE_CUDA@ +STARPU_USE_FXT = @STARPU_USE_FXT@ +STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ +STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ +STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ +STRIP = @STRIP@ +VERSION = @VERSION@ +XMKMF = @XMKMF@ +X_CFLAGS = @X_CFLAGS@ +X_EXTRA_LIBS = @X_EXTRA_LIBS@ +X_LIBS = @X_LIBS@ +X_PRE_LIBS = @X_PRE_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_ct_F77 = @ac_ct_F77@ +ac_ct_FC = @ac_ct_FC@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +doxygencommand = @doxygencommand@ +dvidir = @dvidir@ +eclipsepath = @eclipsepath@ +epstopdfcommand = @epstopdfcommand@ +exec_prefix = @exec_prefix@ +gitcommand = @gitcommand@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +hwloccalccommand = @hwloccalccommand@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +juliapath = @juliapath@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +mpicc_path = @mpicc_path@ +mpicxx_path = @mpicxx_path@ +mpiexec_path = @mpiexec_path@ +mpifort_path = @mpifort_path@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +pdflatexcommand = @pdflatexcommand@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +runstatedir = @runstatedir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target = @target@ +target_alias = @target_alias@ +target_cpu = @target_cpu@ +target_os = @target_os@ +target_vendor = @target_vendor@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +LAUNCHER_ENV = $(am__append_4) $(am__append_6) +LAUNCHER = $(am__append_3) $(am__append_5) + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2019-2023 Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +AM_CFLAGS = $(GLOBAL_AM_CFLAGS) $(APP_CFLAGS) +AM_CXXFLAGS = $(GLOBAL_AM_CXXFLAGS) $(APP_CXXFLAGS) +AM_FFLAGS = $(GLOBAL_AM_FFLAGS) $(APP_FFLAGS) +AM_FCFLAGS = $(GLOBAL_AM_FCFLAGS) $(APP_FCFLAGS) +@STARPU_USE_CUDA_TRUE@V_nvcc_ = $(V_nvcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_USE_CUDA_TRUE@V_nvcc_0 = @echo " NVCC " $@; +@STARPU_USE_CUDA_TRUE@V_nvcc_1 = +@STARPU_USE_CUDA_TRUE@V_nvcc = $(V_nvcc_$(V)) + +# Avoid using nvcc when making a coverity build, nvcc produces millions of +# lines of code which we don't want to analyze. Instead, build dumb .o files +# containing empty functions. +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_ = $(V_mynvcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_0 = @echo " myNVCC " $@; +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_1 = +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc = $(V_mynvcc_$(V)) +@STARPU_USE_HIP_TRUE@V_hipcc_ = $(V_hipcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_USE_HIP_TRUE@V_hipcc_0 = @echo " HIPCC " $@; +@STARPU_USE_HIP_TRUE@V_hipcc_1 = +@STARPU_USE_HIP_TRUE@V_hipcc = $(V_hipcc_$(V)) +V_icc_ = $(V_icc_$(AM_DEFAULT_VERBOSITY)) +V_icc_0 = @echo " ICC " $@; +V_icc_1 = +V_icc = $(V_icc_$(V)) +V_ln_ = $(V_ln_$(AM_DEFAULT_VERBOSITY)) +V_ln_0 = @echo " LN " $@; +V_ln_1 = +V_ln = $(V_ln_$(V)) +V_help2man_ = $(V_help2man_$(AM_DEFAULT_VERBOSITY)) +V_help2man_0 = @echo " HELP2MAN" $@; +V_help2man_1 = +V_help2man = $(V_help2man_$(V)) +# These are always defined, both for starpu-mpi and for mpi-ms +# For MPI tests we don't want to oversubscribe the system +MPI_RUN_ENV = STARPU_WORKERS_GETBIND=0 STARPU_WORKERS_NOBIND=1 STARPU_NCPU=3 +@STARPU_SIMGRID_FALSE@STARPU_MPIEXEC = $(MPIEXEC) $(MPIEXEC_ARGS) -np $(STARPU_MPI_NP) +@STARPU_SIMGRID_TRUE@STARPU_MPIEXEC = $(abs_top_builddir)/tools/starpu_smpirun -np $(STARPU_MPI_NP) -platform $(abs_top_srcdir)/tools/perfmodels/cluster.xml -hostfile $(abs_top_srcdir)/tools/perfmodels/hostfile + +# When GNU parallel is available and -j is passed to make, run tests through +# parallel, using a "starpu" semaphore. +# Also make test shell scripts run its tests through parallel, using a +# "substarpu" semaphore. This brings some overload, but only one level. +@HAVE_PARALLEL_TRUE@STARPU_SUB_PARALLEL = $(shell echo $(MAKEFLAGS) | sed -ne 's/.*-j\([0-9]\+\).*/parallel --semaphore --id substarpu --fg --fg-exit -j \1/p') +@STARPU_USE_MPI_MASTER_SLAVE_TRUE@MS_LAUNCHER = $(STARPU_MPIEXEC) +@STARPU_USE_TCPIP_MASTER_SLAVE_TRUE@MS_LAUNCHER = $(abs_top_builddir)/tools/starpu_tcpipexec -np 2 -nobind -ncpus 1 +@STARPU_HAVE_WINDOWS_FALSE@LOADER_BIN = $(LAUNCHER) $(LOADER) $(EXTERNAL) +@STARPU_HAVE_WINDOWS_TRUE@LOADER_BIN = $(LAUNCHER) $(EXTERNAL) +@STARPU_HAVE_WINDOWS_FALSE@loader_CPPFLAGS = $(AM_CPPFLAGS) -I$(top_builddir)/src/ +@STARPU_HAVE_AM111_FALSE@TESTS_ENVIRONMENT = $(LAUNCHER_ENV) top_builddir="$(abs_top_builddir)" top_srcdir="$(abs_top_srcdir)" $(LOADER_BIN) +@STARPU_HAVE_AM111_TRUE@TESTS_ENVIRONMENT = $(LAUNCHER_ENV) top_builddir="$(abs_top_builddir)" top_srcdir="$(abs_top_srcdir)" +@STARPU_HAVE_AM111_TRUE@LOG_COMPILER = $(LOADER_BIN) +AM_TESTS_FD_REDIRECT = 9>&2 +AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_builddir)/src \ + -I$(top_srcdir)/src/ $(STARPU_H_CPPFLAGS) \ + -I$(top_srcdir)/bubble/include +AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@ +#LIBS += ../src/libstarpububble-@STARPU_EFFECTIVE_VERSION@.la +BUILT_SOURCES = +@STARPU_USE_OPENCL_TRUE@nobase_STARPU_OPENCL_DATA_DATA = +EXTRA_DIST = \ + basic/basic.h + +CLEANFILES = *.gcno *.gcda *.linkinfo core starpu_idle_microsec.log +myPROGRAMS = $(am__append_8) +all: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) all-am + +.SUFFIXES: +.SUFFIXES: .c .cu .cubin .hip .lo .log .o .obj .test .test$(EXEEXT) .trs +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/make/starpu-tests.mk $(top_srcdir)/make/starpu.mk $(top_srcdir)/make/starpu-loader.mk $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign bubble/tests/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign bubble/tests/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; +$(top_srcdir)/make/starpu-tests.mk $(top_srcdir)/make/starpu.mk $(top_srcdir)/make/starpu-loader.mk $(am__empty): + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +clean-checkPROGRAMS: + @list='$(check_PROGRAMS)'; test -n "$$list" || exit 0; \ + echo " rm -f" $$list; \ + rm -f $$list || exit $$?; \ + test -n "$(EXEEXT)" || exit 0; \ + list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ + echo " rm -f" $$list; \ + rm -f $$list + +clean-noinstPROGRAMS: + @list='$(noinst_PROGRAMS)'; test -n "$$list" || exit 0; \ + echo " rm -f" $$list; \ + rm -f $$list || exit $$?; \ + test -n "$(EXEEXT)" || exit 0; \ + list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ + echo " rm -f" $$list; \ + rm -f $$list +basic/$(am__dirstamp): + @$(MKDIR_P) basic + @: > basic/$(am__dirstamp) +basic/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) basic/$(DEPDIR) + @: > basic/$(DEPDIR)/$(am__dirstamp) +basic/b.$(OBJEXT): basic/$(am__dirstamp) \ + basic/$(DEPDIR)/$(am__dirstamp) + +basic/b$(EXEEXT): $(basic_b_OBJECTS) $(basic_b_DEPENDENCIES) $(EXTRA_basic_b_DEPENDENCIES) basic/$(am__dirstamp) + @rm -f basic/b$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(basic_b_OBJECTS) $(basic_b_LDADD) $(LIBS) +basic/b2t.$(OBJEXT): basic/$(am__dirstamp) \ + basic/$(DEPDIR)/$(am__dirstamp) + +basic/b2t$(EXEEXT): $(basic_b2t_OBJECTS) $(basic_b2t_DEPENDENCIES) $(EXTRA_basic_b2t_DEPENDENCIES) basic/$(am__dirstamp) + @rm -f basic/b2t$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(basic_b2t_OBJECTS) $(basic_b2t_LDADD) $(LIBS) +basic/bb.$(OBJEXT): basic/$(am__dirstamp) \ + basic/$(DEPDIR)/$(am__dirstamp) + +basic/bb$(EXEEXT): $(basic_bb_OBJECTS) $(basic_bb_DEPENDENCIES) $(EXTRA_basic_bb_DEPENDENCIES) basic/$(am__dirstamp) + @rm -f basic/bb$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(basic_bb_OBJECTS) $(basic_bb_LDADD) $(LIBS) +basic/bbt.$(OBJEXT): basic/$(am__dirstamp) \ + basic/$(DEPDIR)/$(am__dirstamp) + +basic/bbt$(EXEEXT): $(basic_bbt_OBJECTS) $(basic_bbt_DEPENDENCIES) $(EXTRA_basic_bbt_DEPENDENCIES) basic/$(am__dirstamp) + @rm -f basic/bbt$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(basic_bbt_OBJECTS) $(basic_bbt_LDADD) $(LIBS) +basic/bbtt.$(OBJEXT): basic/$(am__dirstamp) \ + basic/$(DEPDIR)/$(am__dirstamp) + +basic/bbtt$(EXEEXT): $(basic_bbtt_OBJECTS) $(basic_bbtt_DEPENDENCIES) $(EXTRA_basic_bbtt_DEPENDENCIES) basic/$(am__dirstamp) + @rm -f basic/bbtt$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(basic_bbtt_OBJECTS) $(basic_bbtt_LDADD) $(LIBS) +basic/brbtt.$(OBJEXT): basic/$(am__dirstamp) \ + basic/$(DEPDIR)/$(am__dirstamp) + +basic/brbtt$(EXEEXT): $(basic_brbtt_OBJECTS) $(basic_brbtt_DEPENDENCIES) $(EXTRA_basic_brbtt_DEPENDENCIES) basic/$(am__dirstamp) + @rm -f basic/brbtt$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(basic_brbtt_OBJECTS) $(basic_brbtt_LDADD) $(LIBS) +basic/brec.$(OBJEXT): basic/$(am__dirstamp) \ + basic/$(DEPDIR)/$(am__dirstamp) + +basic/brec$(EXEEXT): $(basic_brec_OBJECTS) $(basic_brec_DEPENDENCIES) $(EXTRA_basic_brec_DEPENDENCIES) basic/$(am__dirstamp) + @rm -f basic/brec$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(basic_brec_OBJECTS) $(basic_brec_LDADD) $(LIBS) +basic/brec_level.$(OBJEXT): basic/$(am__dirstamp) \ + basic/$(DEPDIR)/$(am__dirstamp) + +basic/brec_level$(EXEEXT): $(basic_brec_level_OBJECTS) $(basic_brec_level_DEPENDENCIES) $(EXTRA_basic_brec_level_DEPENDENCIES) basic/$(am__dirstamp) + @rm -f basic/brec_level$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(basic_brec_level_OBJECTS) $(basic_brec_level_LDADD) $(LIBS) +basic/brt.$(OBJEXT): basic/$(am__dirstamp) \ + basic/$(DEPDIR)/$(am__dirstamp) + +basic/brt$(EXEEXT): $(basic_brt_OBJECTS) $(basic_brt_DEPENDENCIES) $(EXTRA_basic_brt_DEPENDENCIES) basic/$(am__dirstamp) + @rm -f basic/brt$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(basic_brt_OBJECTS) $(basic_brt_LDADD) $(LIBS) +basic/btb.$(OBJEXT): basic/$(am__dirstamp) \ + basic/$(DEPDIR)/$(am__dirstamp) + +basic/btb$(EXEEXT): $(basic_btb_OBJECTS) $(basic_btb_DEPENDENCIES) $(EXTRA_basic_btb_DEPENDENCIES) basic/$(am__dirstamp) + @rm -f basic/btb$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(basic_btb_OBJECTS) $(basic_btb_LDADD) $(LIBS) +basic/btb_func.$(OBJEXT): basic/$(am__dirstamp) \ + basic/$(DEPDIR)/$(am__dirstamp) + +basic/btb_func$(EXEEXT): $(basic_btb_func_OBJECTS) $(basic_btb_func_DEPENDENCIES) $(EXTRA_basic_btb_func_DEPENDENCIES) basic/$(am__dirstamp) + @rm -f basic/btb_func$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(basic_btb_func_OBJECTS) $(basic_btb_func_LDADD) $(LIBS) +basic/btt.$(OBJEXT): basic/$(am__dirstamp) \ + basic/$(DEPDIR)/$(am__dirstamp) + +basic/btt$(EXEEXT): $(basic_btt_OBJECTS) $(basic_btt_DEPENDENCIES) $(EXTRA_basic_btt_DEPENDENCIES) basic/$(am__dirstamp) + @rm -f basic/btt$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(basic_btt_OBJECTS) $(basic_btt_LDADD) $(LIBS) +basic/gemm_dag.$(OBJEXT): basic/$(am__dirstamp) \ + basic/$(DEPDIR)/$(am__dirstamp) + +basic/gemm_dag$(EXEEXT): $(basic_gemm_dag_OBJECTS) $(basic_gemm_dag_DEPENDENCIES) $(EXTRA_basic_gemm_dag_DEPENDENCIES) basic/$(am__dirstamp) + @rm -f basic/gemm_dag$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(basic_gemm_dag_OBJECTS) $(basic_gemm_dag_LDADD) $(LIBS) +basic/read.$(OBJEXT): basic/$(am__dirstamp) \ + basic/$(DEPDIR)/$(am__dirstamp) + +basic/read$(EXEEXT): $(basic_read_OBJECTS) $(basic_read_DEPENDENCIES) $(EXTRA_basic_read_DEPENDENCIES) basic/$(am__dirstamp) + @rm -f basic/read$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(basic_read_OBJECTS) $(basic_read_LDADD) $(LIBS) +basic/sync.$(OBJEXT): basic/$(am__dirstamp) \ + basic/$(DEPDIR)/$(am__dirstamp) + +basic/sync$(EXEEXT): $(basic_sync_OBJECTS) $(basic_sync_DEPENDENCIES) $(EXTRA_basic_sync_DEPENDENCIES) basic/$(am__dirstamp) + @rm -f basic/sync$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(basic_sync_OBJECTS) $(basic_sync_LDADD) $(LIBS) +basic/tbbt.$(OBJEXT): basic/$(am__dirstamp) \ + basic/$(DEPDIR)/$(am__dirstamp) + +basic/tbbt$(EXEEXT): $(basic_tbbt_OBJECTS) $(basic_tbbt_DEPENDENCIES) $(EXTRA_basic_tbbt_DEPENDENCIES) basic/$(am__dirstamp) + @rm -f basic/tbbt$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(basic_tbbt_OBJECTS) $(basic_tbbt_LDADD) $(LIBS) +basic/tbrbtt.$(OBJEXT): basic/$(am__dirstamp) \ + basic/$(DEPDIR)/$(am__dirstamp) + +basic/tbrbtt$(EXEEXT): $(basic_tbrbtt_OBJECTS) $(basic_tbrbtt_DEPENDENCIES) $(EXTRA_basic_tbrbtt_DEPENDENCIES) basic/$(am__dirstamp) + @rm -f basic/tbrbtt$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(basic_tbrbtt_OBJECTS) $(basic_tbrbtt_LDADD) $(LIBS) +basic/tbtbt.$(OBJEXT): basic/$(am__dirstamp) \ + basic/$(DEPDIR)/$(am__dirstamp) + +basic/tbtbt$(EXEEXT): $(basic_tbtbt_OBJECTS) $(basic_tbtbt_DEPENDENCIES) $(EXTRA_basic_tbtbt_DEPENDENCIES) basic/$(am__dirstamp) + @rm -f basic/tbtbt$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(basic_tbtbt_OBJECTS) $(basic_tbtbt_LDADD) $(LIBS) + +loader$(EXEEXT): $(loader_OBJECTS) $(loader_DEPENDENCIES) $(EXTRA_loader_DEPENDENCIES) + @rm -f loader$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(loader_OBJECTS) $(loader_LDADD) $(LIBS) +vector/$(am__dirstamp): + @$(MKDIR_P) vector + @: > vector/$(am__dirstamp) +vector/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) vector/$(DEPDIR) + @: > vector/$(DEPDIR)/$(am__dirstamp) +vector/vector.$(OBJEXT): vector/$(am__dirstamp) \ + vector/$(DEPDIR)/$(am__dirstamp) + +vector/vector$(EXEEXT): $(vector_vector_OBJECTS) $(vector_vector_DEPENDENCIES) $(EXTRA_vector_vector_DEPENDENCIES) vector/$(am__dirstamp) + @rm -f vector/vector$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(vector_vector_OBJECTS) $(vector_vector_LDADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + -rm -f basic/*.$(OBJEXT) + -rm -f vector/*.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/loader-loader.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@basic/$(DEPDIR)/b.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@basic/$(DEPDIR)/b2t.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@basic/$(DEPDIR)/bb.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@basic/$(DEPDIR)/bbt.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@basic/$(DEPDIR)/bbtt.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@basic/$(DEPDIR)/brbtt.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@basic/$(DEPDIR)/brec.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@basic/$(DEPDIR)/brec_level.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@basic/$(DEPDIR)/brt.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@basic/$(DEPDIR)/btb.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@basic/$(DEPDIR)/btb_func.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@basic/$(DEPDIR)/btt.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@basic/$(DEPDIR)/gemm_dag.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@basic/$(DEPDIR)/read.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@basic/$(DEPDIR)/sync.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@basic/$(DEPDIR)/tbbt.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@basic/$(DEPDIR)/tbrbtt.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@basic/$(DEPDIR)/tbtbt.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@vector/$(DEPDIR)/vector.Po@am__quote@ # am--include-marker + +$(am__depfiles_remade): + @$(MKDIR_P) $(@D) + @echo '# dummy' >$@-t && $(am__mv) $@-t $@ + +am--depfiles: $(am__depfiles_remade) + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ +@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +loader-loader.o: loader.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(loader_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT loader-loader.o -MD -MP -MF $(DEPDIR)/loader-loader.Tpo -c -o loader-loader.o `test -f 'loader.c' || echo '$(srcdir)/'`loader.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/loader-loader.Tpo $(DEPDIR)/loader-loader.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='loader.c' object='loader-loader.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(loader_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o loader-loader.o `test -f 'loader.c' || echo '$(srcdir)/'`loader.c + +loader-loader.obj: loader.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(loader_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT loader-loader.obj -MD -MP -MF $(DEPDIR)/loader-loader.Tpo -c -o loader-loader.obj `if test -f 'loader.c'; then $(CYGPATH_W) 'loader.c'; else $(CYGPATH_W) '$(srcdir)/loader.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/loader-loader.Tpo $(DEPDIR)/loader-loader.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='loader.c' object='loader-loader.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(loader_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o loader-loader.obj `if test -f 'loader.c'; then $(CYGPATH_W) 'loader.c'; else $(CYGPATH_W) '$(srcdir)/loader.c'; fi` + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + -rm -rf basic/.libs basic/_libs + -rm -rf vector/.libs vector/_libs +install-nobase_STARPU_OPENCL_DATADATA: $(nobase_STARPU_OPENCL_DATA_DATA) + @$(NORMAL_INSTALL) + @list='$(nobase_STARPU_OPENCL_DATA_DATA)'; test -n "$(STARPU_OPENCL_DATAdir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(STARPU_OPENCL_DATAdir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(STARPU_OPENCL_DATAdir)" || exit 1; \ + fi; \ + $(am__nobase_list) | while read dir files; do \ + xfiles=; for file in $$files; do \ + if test -f "$$file"; then xfiles="$$xfiles $$file"; \ + else xfiles="$$xfiles $(srcdir)/$$file"; fi; done; \ + test -z "$$xfiles" || { \ + test "x$$dir" = x. || { \ + echo " $(MKDIR_P) '$(DESTDIR)$(STARPU_OPENCL_DATAdir)/$$dir'"; \ + $(MKDIR_P) "$(DESTDIR)$(STARPU_OPENCL_DATAdir)/$$dir"; }; \ + echo " $(INSTALL_DATA) $$xfiles '$(DESTDIR)$(STARPU_OPENCL_DATAdir)/$$dir'"; \ + $(INSTALL_DATA) $$xfiles "$(DESTDIR)$(STARPU_OPENCL_DATAdir)/$$dir" || exit $$?; }; \ + done + +uninstall-nobase_STARPU_OPENCL_DATADATA: + @$(NORMAL_UNINSTALL) + @list='$(nobase_STARPU_OPENCL_DATA_DATA)'; test -n "$(STARPU_OPENCL_DATAdir)" || list=; \ + $(am__nobase_strip_setup); files=`$(am__nobase_strip)`; \ + dir='$(DESTDIR)$(STARPU_OPENCL_DATAdir)'; $(am__uninstall_files_from_dir) + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-am +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-am + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-am + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +# Recover from deleted '.trs' file; this should ensure that +# "rm -f foo.log; make foo.trs" re-run 'foo.test', and re-create +# both 'foo.log' and 'foo.trs'. Break the recipe in two subshells +# to avoid problems with "make -n". +.log.trs: + rm -f $< $@ + $(MAKE) $(AM_MAKEFLAGS) $< + +# Leading 'am--fnord' is there to ensure the list of targets does not +# expand to empty, as could happen e.g. with make check TESTS=''. +am--fnord $(TEST_LOGS) $(TEST_LOGS:.log=.trs): $(am__force_recheck) +am--force-recheck: + @: + +$(TEST_SUITE_LOG): $(TEST_LOGS) + @$(am__set_TESTS_bases); \ + am__f_ok () { test -f "$$1" && test -r "$$1"; }; \ + redo_bases=`for i in $$bases; do \ + am__f_ok $$i.trs && am__f_ok $$i.log || echo $$i; \ + done`; \ + if test -n "$$redo_bases"; then \ + redo_logs=`for i in $$redo_bases; do echo $$i.log; done`; \ + redo_results=`for i in $$redo_bases; do echo $$i.trs; done`; \ + if $(am__make_dryrun); then :; else \ + rm -f $$redo_logs && rm -f $$redo_results || exit 1; \ + fi; \ + fi; \ + if test -n "$$am__remaking_logs"; then \ + echo "fatal: making $(TEST_SUITE_LOG): possible infinite" \ + "recursion detected" >&2; \ + elif test -n "$$redo_logs"; then \ + am__remaking_logs=yes $(MAKE) $(AM_MAKEFLAGS) $$redo_logs; \ + fi; \ + if $(am__make_dryrun); then :; else \ + st=0; \ + errmsg="fatal: making $(TEST_SUITE_LOG): failed to create"; \ + for i in $$redo_bases; do \ + test -f $$i.trs && test -r $$i.trs \ + || { echo "$$errmsg $$i.trs" >&2; st=1; }; \ + test -f $$i.log && test -r $$i.log \ + || { echo "$$errmsg $$i.log" >&2; st=1; }; \ + done; \ + test $$st -eq 0 || exit 1; \ + fi + @$(am__sh_e_setup); $(am__tty_colors); $(am__set_TESTS_bases); \ + ws='[ ]'; \ + results=`for b in $$bases; do echo $$b.trs; done`; \ + test -n "$$results" || results=/dev/null; \ + all=` grep "^$$ws*:test-result:" $$results | wc -l`; \ + pass=` grep "^$$ws*:test-result:$$ws*PASS" $$results | wc -l`; \ + fail=` grep "^$$ws*:test-result:$$ws*FAIL" $$results | wc -l`; \ + skip=` grep "^$$ws*:test-result:$$ws*SKIP" $$results | wc -l`; \ + xfail=`grep "^$$ws*:test-result:$$ws*XFAIL" $$results | wc -l`; \ + xpass=`grep "^$$ws*:test-result:$$ws*XPASS" $$results | wc -l`; \ + error=`grep "^$$ws*:test-result:$$ws*ERROR" $$results | wc -l`; \ + if test `expr $$fail + $$xpass + $$error` -eq 0; then \ + success=true; \ + else \ + success=false; \ + fi; \ + br='==================='; br=$$br$$br$$br$$br; \ + result_count () \ + { \ + if test x"$$1" = x"--maybe-color"; then \ + maybe_colorize=yes; \ + elif test x"$$1" = x"--no-color"; then \ + maybe_colorize=no; \ + else \ + echo "$@: invalid 'result_count' usage" >&2; exit 4; \ + fi; \ + shift; \ + desc=$$1 count=$$2; \ + if test $$maybe_colorize = yes && test $$count -gt 0; then \ + color_start=$$3 color_end=$$std; \ + else \ + color_start= color_end=; \ + fi; \ + echo "$${color_start}# $$desc $$count$${color_end}"; \ + }; \ + create_testsuite_report () \ + { \ + result_count $$1 "TOTAL:" $$all "$$brg"; \ + result_count $$1 "PASS: " $$pass "$$grn"; \ + result_count $$1 "SKIP: " $$skip "$$blu"; \ + result_count $$1 "XFAIL:" $$xfail "$$lgn"; \ + result_count $$1 "FAIL: " $$fail "$$red"; \ + result_count $$1 "XPASS:" $$xpass "$$red"; \ + result_count $$1 "ERROR:" $$error "$$mgn"; \ + }; \ + { \ + echo "$(PACKAGE_STRING): $(subdir)/$(TEST_SUITE_LOG)" | \ + $(am__rst_title); \ + create_testsuite_report --no-color; \ + echo; \ + echo ".. contents:: :depth: 2"; \ + echo; \ + for b in $$bases; do echo $$b; done \ + | $(am__create_global_log); \ + } >$(TEST_SUITE_LOG).tmp || exit 1; \ + mv $(TEST_SUITE_LOG).tmp $(TEST_SUITE_LOG); \ + if $$success; then \ + col="$$grn"; \ + else \ + col="$$red"; \ + test x"$$VERBOSE" = x || cat $(TEST_SUITE_LOG); \ + fi; \ + echo "$${col}$$br$${std}"; \ + echo "$${col}Testsuite summary"$(AM_TESTSUITE_SUMMARY_HEADER)"$${std}"; \ + echo "$${col}$$br$${std}"; \ + create_testsuite_report --maybe-color; \ + echo "$$col$$br$$std"; \ + if $$success; then :; else \ + echo "$${col}See $(subdir)/$(TEST_SUITE_LOG)$${std}"; \ + if test -n "$(PACKAGE_BUGREPORT)"; then \ + echo "$${col}Please report to $(PACKAGE_BUGREPORT)$${std}"; \ + fi; \ + echo "$$col$$br$$std"; \ + fi; \ + $$success || exit 1 + +check-TESTS: $(check_PROGRAMS) + @list='$(RECHECK_LOGS)'; test -z "$$list" || rm -f $$list + @list='$(RECHECK_LOGS:.log=.trs)'; test -z "$$list" || rm -f $$list + @test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) + @set +e; $(am__set_TESTS_bases); \ + log_list=`for i in $$bases; do echo $$i.log; done`; \ + trs_list=`for i in $$bases; do echo $$i.trs; done`; \ + log_list=`echo $$log_list`; trs_list=`echo $$trs_list`; \ + $(MAKE) $(AM_MAKEFLAGS) $(TEST_SUITE_LOG) TEST_LOGS="$$log_list"; \ + exit $$?; +recheck: all $(check_PROGRAMS) + @test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) + @set +e; $(am__set_TESTS_bases); \ + bases=`for i in $$bases; do echo $$i; done \ + | $(am__list_recheck_tests)` || exit 1; \ + log_list=`for i in $$bases; do echo $$i.log; done`; \ + log_list=`echo $$log_list`; \ + $(MAKE) $(AM_MAKEFLAGS) $(TEST_SUITE_LOG) \ + am__force_recheck=am--force-recheck \ + TEST_LOGS="$$log_list"; \ + exit $$? +basic/b.log: basic/b$(EXEEXT) + @p='basic/b$(EXEEXT)'; \ + b='basic/b'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +basic/bb.log: basic/bb$(EXEEXT) + @p='basic/bb$(EXEEXT)'; \ + b='basic/bb'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +basic/btb.log: basic/btb$(EXEEXT) + @p='basic/btb$(EXEEXT)'; \ + b='basic/btb'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +basic/btb_func.log: basic/btb_func$(EXEEXT) + @p='basic/btb_func$(EXEEXT)'; \ + b='basic/btb_func'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +basic/bbt.log: basic/bbt$(EXEEXT) + @p='basic/bbt$(EXEEXT)'; \ + b='basic/bbt'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +basic/btt.log: basic/btt$(EXEEXT) + @p='basic/btt$(EXEEXT)'; \ + b='basic/btt'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +basic/bbtt.log: basic/bbtt$(EXEEXT) + @p='basic/bbtt$(EXEEXT)'; \ + b='basic/bbtt'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +basic/tbbt.log: basic/tbbt$(EXEEXT) + @p='basic/tbbt$(EXEEXT)'; \ + b='basic/tbbt'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +basic/tbtbt.log: basic/tbtbt$(EXEEXT) + @p='basic/tbtbt$(EXEEXT)'; \ + b='basic/tbtbt'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +basic/brt.log: basic/brt$(EXEEXT) + @p='basic/brt$(EXEEXT)'; \ + b='basic/brt'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +basic/brbtt.log: basic/brbtt$(EXEEXT) + @p='basic/brbtt$(EXEEXT)'; \ + b='basic/brbtt'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +basic/sync.log: basic/sync$(EXEEXT) + @p='basic/sync$(EXEEXT)'; \ + b='basic/sync'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +basic/gemm_dag.log: basic/gemm_dag$(EXEEXT) + @p='basic/gemm_dag$(EXEEXT)'; \ + b='basic/gemm_dag'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +basic/b2t.log: basic/b2t$(EXEEXT) + @p='basic/b2t$(EXEEXT)'; \ + b='basic/b2t'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +basic/brec.log: basic/brec$(EXEEXT) + @p='basic/brec$(EXEEXT)'; \ + b='basic/brec'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +basic/brec_level.log: basic/brec_level$(EXEEXT) + @p='basic/brec_level$(EXEEXT)'; \ + b='basic/brec_level'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +basic/read.log: basic/read$(EXEEXT) + @p='basic/read$(EXEEXT)'; \ + b='basic/read'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +basic/tbrbtt.log: basic/tbrbtt$(EXEEXT) + @p='basic/tbrbtt$(EXEEXT)'; \ + b='basic/tbrbtt'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +vector/vector.log: vector/vector$(EXEEXT) + @p='vector/vector$(EXEEXT)'; \ + b='vector/vector'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +.test.log: + @p='$<'; \ + $(am__set_b); \ + $(am__check_pre) $(TEST_LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_TEST_LOG_DRIVER_FLAGS) $(TEST_LOG_DRIVER_FLAGS) -- $(TEST_LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +@am__EXEEXT_TRUE@.test$(EXEEXT).log: +@am__EXEEXT_TRUE@ @p='$<'; \ +@am__EXEEXT_TRUE@ $(am__set_b); \ +@am__EXEEXT_TRUE@ $(am__check_pre) $(TEST_LOG_DRIVER) --test-name "$$f" \ +@am__EXEEXT_TRUE@ --log-file $$b.log --trs-file $$b.trs \ +@am__EXEEXT_TRUE@ $(am__common_driver_flags) $(AM_TEST_LOG_DRIVER_FLAGS) $(TEST_LOG_DRIVER_FLAGS) -- $(TEST_LOG_COMPILE) \ +@am__EXEEXT_TRUE@ "$$tst" $(AM_TESTS_FD_REDIRECT) +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am + $(MAKE) $(AM_MAKEFLAGS) $(check_PROGRAMS) + $(MAKE) $(AM_MAKEFLAGS) check-TESTS +check: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) check-am +all-am: Makefile $(PROGRAMS) $(DATA) +installdirs: + for dir in "$(DESTDIR)$(STARPU_OPENCL_DATAdir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) install-am +install-exec: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + -test -z "$(TEST_LOGS)" || rm -f $(TEST_LOGS) + -test -z "$(TEST_LOGS:.log=.trs)" || rm -f $(TEST_LOGS:.log=.trs) + -test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) + +clean-generic: + -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + -rm -f basic/$(DEPDIR)/$(am__dirstamp) + -rm -f basic/$(am__dirstamp) + -rm -f vector/$(DEPDIR)/$(am__dirstamp) + -rm -f vector/$(am__dirstamp) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." + -test -z "$(BUILT_SOURCES)" || rm -f $(BUILT_SOURCES) +clean: clean-am + +clean-am: clean-checkPROGRAMS clean-generic clean-libtool \ + clean-noinstPROGRAMS mostlyclean-am + +distclean: distclean-am + -rm -f ./$(DEPDIR)/loader-loader.Po + -rm -f basic/$(DEPDIR)/b.Po + -rm -f basic/$(DEPDIR)/b2t.Po + -rm -f basic/$(DEPDIR)/bb.Po + -rm -f basic/$(DEPDIR)/bbt.Po + -rm -f basic/$(DEPDIR)/bbtt.Po + -rm -f basic/$(DEPDIR)/brbtt.Po + -rm -f basic/$(DEPDIR)/brec.Po + -rm -f basic/$(DEPDIR)/brec_level.Po + -rm -f basic/$(DEPDIR)/brt.Po + -rm -f basic/$(DEPDIR)/btb.Po + -rm -f basic/$(DEPDIR)/btb_func.Po + -rm -f basic/$(DEPDIR)/btt.Po + -rm -f basic/$(DEPDIR)/gemm_dag.Po + -rm -f basic/$(DEPDIR)/read.Po + -rm -f basic/$(DEPDIR)/sync.Po + -rm -f basic/$(DEPDIR)/tbbt.Po + -rm -f basic/$(DEPDIR)/tbrbtt.Po + -rm -f basic/$(DEPDIR)/tbtbt.Po + -rm -f vector/$(DEPDIR)/vector.Po + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: install-nobase_STARPU_OPENCL_DATADATA + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f ./$(DEPDIR)/loader-loader.Po + -rm -f basic/$(DEPDIR)/b.Po + -rm -f basic/$(DEPDIR)/b2t.Po + -rm -f basic/$(DEPDIR)/bb.Po + -rm -f basic/$(DEPDIR)/bbt.Po + -rm -f basic/$(DEPDIR)/bbtt.Po + -rm -f basic/$(DEPDIR)/brbtt.Po + -rm -f basic/$(DEPDIR)/brec.Po + -rm -f basic/$(DEPDIR)/brec_level.Po + -rm -f basic/$(DEPDIR)/brt.Po + -rm -f basic/$(DEPDIR)/btb.Po + -rm -f basic/$(DEPDIR)/btb_func.Po + -rm -f basic/$(DEPDIR)/btt.Po + -rm -f basic/$(DEPDIR)/gemm_dag.Po + -rm -f basic/$(DEPDIR)/read.Po + -rm -f basic/$(DEPDIR)/sync.Po + -rm -f basic/$(DEPDIR)/tbbt.Po + -rm -f basic/$(DEPDIR)/tbrbtt.Po + -rm -f basic/$(DEPDIR)/tbtbt.Po + -rm -f vector/$(DEPDIR)/vector.Po + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-nobase_STARPU_OPENCL_DATADATA + +.MAKE: all check check-am install install-am install-exec \ + install-strip + +.PHONY: CTAGS GTAGS TAGS all all-am am--depfiles check check-TESTS \ + check-am clean clean-checkPROGRAMS clean-generic clean-libtool \ + clean-noinstPROGRAMS cscopelist-am ctags ctags-am distclean \ + distclean-compile distclean-generic distclean-libtool \ + distclean-tags distdir dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-html \ + install-html-am install-info install-info-am install-man \ + install-nobase_STARPU_OPENCL_DATADATA install-pdf \ + install-pdf-am install-ps install-ps-am install-strip \ + installcheck installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + recheck tags tags-am uninstall uninstall-am \ + uninstall-nobase_STARPU_OPENCL_DATADATA + +.PRECIOUS: Makefile + +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@.cu.o: +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ @$(MKDIR_P) `dirname $@` +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ $(V_mynvcc)grep 'extern *"C" *void *' $< | sed -ne 's/extern *"C" *void *\([a-zA-Z0-9_]*\) *(.*/void \1(void) {}/p' | $(CC) -x c - -o $@ -c + +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.cubin: +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) -cubin $< -o $@ $(NVCCFLAGS) + +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.o: +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) $< -c -o $@ $(NVCCFLAGS) +@STARPU_USE_HIP_TRUE@.hip.o: +@STARPU_USE_HIP_TRUE@ $(V_hipcc) $(HIPCC) $< -c -o $@ $(HIPCCFLAGS) + +STARPU_MPI_NP ?= 4 + +showcheckfailed: + @ for x in $(shell grep -l "^FAIL " $(TEST_LOGS) /dev/null 2>/dev/null) ; do cat $$x ; done + @RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i showcheckfailed || RET=1 ; \ + done ; \ + exit $$RET + +showfailed: + @! grep "^FAIL " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "ERROR: AddressSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "WARNING: AddressSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "ERROR: ThreadSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "WARNING: ThreadSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "ERROR: LeakSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "WARNING: LeakSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l " runtime error: " $(TEST_LOGS) /dev/null 2>/dev/null + @RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -s -C $$i showfailed || RET=1 ; \ + done ; \ + exit $$RET + +showcheck: + -cat $(TEST_LOGS) /dev/null + @! grep -q "ERROR: AddressSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q "WARNING: AddressSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q "ERROR: ThreadSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q "WARNING: ThreadSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q "ERROR: LeakSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q "WARNING: LeakSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q " runtime error: " $(TEST_LOGS) /dev/null + RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i showcheck || RET=1 ; \ + done ; \ + exit $$RET + +showsuite: + -cat $(TEST_SUITE_LOG) /dev/null + @! grep -q "ERROR: AddressSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q "WARNING: AddressSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q "ERROR: ThreadSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q "WARNING: ThreadSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q "ERROR: LeakSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q "WARNING: LeakSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q " runtime error: " $(TEST_SUITE_LOG) /dev/null + RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i showsuite || RET=1 ; \ + done ; \ + exit $$RET + +@STARPU_SIMGRID_TRUE@export STARPU_PERF_MODEL_DIR=$(abs_top_srcdir)/tools/perfmodels/sampling +@STARPU_SIMGRID_TRUE@export STARPU_HOSTNAME=mirage +@STARPU_SIMGRID_TRUE@export MALLOC_PERTURB_=0 + +@STARPU_SIMGRID_TRUE@env: +@STARPU_SIMGRID_TRUE@ @echo export STARPU_PERF_MODEL_DIR=$(STARPU_PERF_MODEL_DIR) +@STARPU_SIMGRID_TRUE@ @echo export STARPU_HOSTNAME=$(STARPU_HOSTNAME) +@STARPU_SIMGRID_TRUE@ @echo export MALLOC_PERTURB_=$(MALLOC_PERTURB_) + +@STARPU_SIMGRID_TRUE@export STARPU_SIMGRID=1 + +@STARPU_QUICK_CHECK_TRUE@export STARPU_QUICK_CHECK=1 + +@STARPU_LONG_CHECK_TRUE@export STARPU_LONG_CHECK=1 + +# +# Test loading goes through a lot of launchers: +# +# - $(LAUNCHER) is called first, to run the test through starpu_msexec, i.e. +# either mpirun or starpu_tcpipexec +# +# - $(LOADER), i.e. tests/loader, is then called to implement timeout, running +# gdb, etc. But if it detects that the test is a .sh script, it just executes +# it +# +# - $(STARPU_CHECK_LAUNCHER) $(STARPU_CHECK_LAUNCHER_ARGS) is called by loader +# to run the program through e.g. valgrind.sh +# +# When the program is a shell script, additionally: +# +# - $(STARPU_SUB_PARALLEL) is called to control parallelism (see below) +# +# - $(MS_LAUNCHER) is called to run the test through starpu_msexec +# +# - $(STARPU_LAUNCH) was set by tests/loader to its own path, to run the program +# through it. +# +# - $(STARPU_CHECK_LAUNCHER) $(STARPU_CHECK_LAUNCHER_ARGS) is called by loader +# + +export LAUNCHER +@HAVE_PARALLEL_TRUE@export STARPU_SUB_PARALLEL + +export MS_LAUNCHER + +LAUNCHER ?= +MS_LAUNCHER ?= +@STARPU_HAVE_WINDOWS_FALSE@LOADER ?= ./loader + +LSAN_OPTIONS ?= suppressions=$(abs_top_srcdir)/tools/dev/lsan/suppressions +TSAN_OPTIONS ?= suppressions=$(abs_top_srcdir)/tools/dev/tsan/starpu.suppr +export LSAN_OPTIONS +export TSAN_OPTIONS + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/bubble/tests/basic/b.c b/bubble/tests/basic/b.c new file mode 100644 index 0000000..854afce --- /dev/null +++ b/bubble/tests/basic/b.c @@ -0,0 +1,70 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2019-2019 Gwenole Lucas + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "basic.h" + +int main(int argv, char **argc) +{ + int ret, i; + int v[SIZE]; + + ret = starpu_init(NULL); + if (ret == -ENODEV) return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + if (starpu_cpu_worker_get_count() == 0) + { + FPRINTF(stderr, "We need at least 1 CPU worker.\n"); + starpu_shutdown(); + return 77; + } + + for (i=0; i +#include "basic.h" + +#define check_binary_task(x,y) x+=y + +struct starpu_codelet sub_data_chain_codelet = +{ + .cpu_funcs = {sub_data_func}, + .nbuffers = 2, + .name = "sub_data_chain_cl" +}; + +void bubble_chain_gen_dag(struct starpu_task *t, void *arg) +{ + FPRINTF(stderr, "Hello i am a bubble\n"); + int i; + starpu_data_handle_t *subdata = (starpu_data_handle_t *)arg; + + for(i=0 ; i 0; + return 1; +} + +void bubble_gen_dag(struct starpu_task *t, void *arg) +{ + FPRINTF(stderr, "Hello i am a bubble\n"); + int i; + starpu_data_handle_t *subdata = (starpu_data_handle_t *)arg; + + for(i=0 ; i +#include "basic.h" + +int main(int argv, char **argc) +{ + int ret, i; + int v[SIZE]; + + ret = starpu_init(NULL); + if (ret == -ENODEV) return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + if (starpu_cpu_worker_get_count() == 0) + { + FPRINTF(stderr, "We need at least 1 CPU worker.\n"); + starpu_shutdown(); + return 77; + } + + for (i=0; i +#include "basic.h" + +int main(int argv, char **argc) +{ + int ret, i; + int v[SIZE]; + + ret = starpu_init(NULL); + if (ret == -ENODEV) return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + if (starpu_cpu_worker_get_count() == 0) + { + FPRINTF(stderr, "We need at least 1 CPU worker.\n"); + starpu_shutdown(); + return 77; + } + + for (i=0; i +#include "basic.h" + +int main(int argv, char **argc) +{ + int ret, i; + int v[SIZE]; + + ret = starpu_init(NULL); + if (ret == -ENODEV) return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + if (starpu_cpu_worker_get_count() == 0) + { + FPRINTF(stderr, "We need at least 1 CPU worker.\n"); + starpu_shutdown(); + return 77; + } + + for (i=0; i +#define PARTS 2 +#define SIZE 8 +#include "basic.h" + +void rec2_bubble_gen_dag(struct starpu_task *t, void *arg) +{ + int i; + starpu_data_handle_t *subdata = (starpu_data_handle_t *)arg; + + FPRINTF(stderr, "Hello i am a bubble\n"); + + for(i=0 ; i +#define PARTS 1 +#define SIZE 25 + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +struct starpu_data_filter f = +{ + .filter_func = starpu_vector_filter_block, + .nchildren = PARTS +}; + +void sub_data_func(void *buffers[], void *arg) +{ + int *v = (int*)STARPU_VECTOR_GET_PTR(buffers[0]); + int nx = STARPU_VECTOR_GET_NX(buffers[0]); + int i; + + for(i=0 ; i +#define PARTS 2 +#define SIZE 24 + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +struct starpu_data_filter f = +{ + .filter_func = starpu_vector_filter_block, + .nchildren = PARTS +}; + +void sub_data_read_func(void *buffers[], void *arg) +{ +} + +void sub_data_func(void *buffers[], void *arg) +{ + int *v = (int*)STARPU_VECTOR_GET_PTR(buffers[0]); + int nx = STARPU_VECTOR_GET_NX(buffers[0]); + int i; + + for(i=0 ; i +#define PARTS 2 +#define SIZE 8 +#include "basic.h" + +void rec2_bubble_gen_dag(struct starpu_task *t, void *arg) +{ + int i; + starpu_data_handle_t *subdata = (starpu_data_handle_t *)arg; + + FPRINTF(stderr, "Hello i am a bubble\n"); + + for(i=0 ; i +#include "basic.h" + +int main(int argv, char **argc) +{ + int ret, i; + int v[SIZE]; + + ret = starpu_init(NULL); + if (ret == -ENODEV) return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + if (starpu_cpu_worker_get_count() == 0) + { + FPRINTF(stderr, "We need at least 1 CPU worker.\n"); + starpu_shutdown(); + return 77; + } + + for (i=0; i +#include "basic.h" + +struct starpu_codelet my_codelet; + +void my_task_func(void *buffers[], void *arg) +{ + int *v = (int*)STARPU_VECTOR_GET_PTR(buffers[0]); + int nx = STARPU_VECTOR_GET_NX(buffers[0]); + int i; + + print_vector(v, nx, "task"); + for(i=0 ; i +#include "basic.h" + +int main(int argv, char **argc) +{ + int ret, i; + int v[SIZE]; + + ret = starpu_init(NULL); + if (ret == -ENODEV) return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + if (starpu_cpu_worker_get_count() == 0) + { + FPRINTF(stderr, "We need at least 1 CPU worker.\n"); + starpu_shutdown(); + return 77; + } + + for (i=0; i +#include +#include + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +#define PARTS 4 +#define SIZE 16 + +#define SYNC 0 + +struct bubble_arg +{ + starpu_data_handle_t *A; + starpu_data_handle_t *B; + starpu_data_handle_t *C; + starpu_data_handle_t *subA; + starpu_data_handle_t *subB; + starpu_data_handle_t *subC; +}; + +struct starpu_data_filter f = +{ + .filter_func = starpu_vector_filter_block, + .nchildren = PARTS +}; + +void scam_func(void *buffers[], void *arg) +{ + assert(0); +} + +void real_func(void *buffers[], void *arg) +{ + int *A = (int*)STARPU_VECTOR_GET_PTR(buffers[0]); + int nx = STARPU_VECTOR_GET_NX(buffers[0]); + int i; + for (i=0; iA))) && +// (starpu_data_get_nb_children_async(*(b->B))) && +// (starpu_data_get_nb_children_async(*(b->C)))) +// return 1; +// else +// return 0; +} + +void insert_dag(starpu_data_handle_t *A, starpu_data_handle_t *B, starpu_data_handle_t *C, starpu_data_handle_t *subA, starpu_data_handle_t *subB, starpu_data_handle_t *subC, struct starpu_task *t); + +void bubble_gen_dag_func(struct starpu_task *t, void *arg) +{ + struct bubble_arg *b_a = (struct bubble_arg*)arg; + starpu_data_handle_t *subhandlesA = b_a->subA; + starpu_data_handle_t *subhandlesB = b_a->subB; + starpu_data_handle_t *subhandlesC = b_a->subC; + free(b_a); + + insert_dag(subhandlesA, subhandlesB, subhandlesC, NULL, NULL, NULL, t); +} + +void insert_dag(starpu_data_handle_t *A, starpu_data_handle_t *B, starpu_data_handle_t *C, starpu_data_handle_t *subA, starpu_data_handle_t *subB, starpu_data_handle_t *subC, struct starpu_task *t) +{ + int ret, i; + + for (i=0; iA = A; + b_a->B = B; + b_a->C = C; + b_a->subA = subA; + b_a->subB = subB; + b_a->subC = subC; + name = "bubble"; + } + + /* insert bubble on handle */ + /* printf("[INSERT] first - %s - %d\n", name, i); */ + ret = starpu_task_insert(&gemm_codelet, + STARPU_R, handleA1, + STARPU_R, handleB1, + STARPU_RW, handleC, + STARPU_BUBBLE_FUNC, is_bubble, + STARPU_BUBBLE_FUNC_ARG, b_a, + STARPU_BUBBLE_GEN_DAG_FUNC, bubble_gen_dag_func, + STARPU_BUBBLE_GEN_DAG_FUNC_ARG, b_a, + STARPU_BUBBLE_PARENT, t, + STARPU_TASK_SYNCHRONOUS, SYNC, + STARPU_NAME, name, + 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + + name = "task_lvl0"; + if (t) + { + name = "task_lvl1"; + } + + /* printf("[INSERT] second - %s - %d\n", name, i); */ + ret = starpu_task_insert(&gemm_codelet, + STARPU_R, handleA2, + STARPU_R, handleB2, + STARPU_RW, handleC, + STARPU_BUBBLE_FUNC, is_bubble, + STARPU_BUBBLE_FUNC_ARG, NULL, + STARPU_BUBBLE_GEN_DAG_FUNC, bubble_gen_dag_func, + STARPU_BUBBLE_GEN_DAG_FUNC_ARG, b_a, + STARPU_BUBBLE_PARENT, t, + STARPU_TASK_SYNCHRONOUS, SYNC, + STARPU_NAME, name, + 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + } +} + +void init_handles(int *data, starpu_data_handle_t *handles, starpu_data_handle_t *subhandles) +{ + int i,j; + for (i=0; i +#include "basic.h" + +struct starpu_codelet sub_data_chain_codelet = +{ + .cpu_funcs = {sub_data_func}, + .nbuffers = 1, + .name = "sub_data_chain_cl" +}; + +void bubble_chain_gen_dag(struct starpu_task *t, void *arg) +{ + FPRINTF(stderr, "Hello i am a bubble\n"); + starpu_data_handle_t *subdata = (starpu_data_handle_t *)arg; + int i; + + for(i=0 ; i +#include +#include + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +#define PARTS 2 +#define SIZE 16 + +struct starpu_data_filter f = +{ + .filter_func = starpu_vector_filter_block, + .nchildren = PARTS +}; + +void scam_func(void *buffers[], void *arg) +{ + assert(0); +} + +void real_func(void *buffers[], void *arg) +{ + int *A = (int*)STARPU_VECTOR_GET_PTR(buffers[0]); + int nx = STARPU_VECTOR_GET_NX(buffers[0]); + int i; + for (i=0; i +#include "basic.h" + +int main(int argv, char **argc) +{ + int ret, i; + int v[SIZE]; + + ret = starpu_init(NULL); + if (ret == -ENODEV) return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + if (starpu_cpu_worker_get_count() == 0) + { + FPRINTF(stderr, "We need at least 1 CPU worker.\n"); + starpu_shutdown(); + return 77; + } + + for (i=0; i +#include "basic.h" + +#define LENGTH 16 +#define NPARTS 2 + +struct handle_partition +{ + starpu_data_handle_t handle; + starpu_data_handle_t *sub; + starpu_data_handle_t *sub0; + starpu_data_handle_t *sub1; +}; + +struct starpu_data_filter filter = +{ + .filter_func = starpu_vector_filter_block, + .nchildren = NPARTS +}; + +void task_2arg_func(void *buffers[], void *arg) +{ + int *v1 = (int*)STARPU_VECTOR_GET_PTR(buffers[0]); + int *v2 = (int*)STARPU_VECTOR_GET_PTR(buffers[1]); + int nx = STARPU_VECTOR_GET_NX(buffers[0]); + int i; + + print_vector(v1, nx, "task"); + for(i=0 ; isub0[1], + STARPU_RW, handles->sub1[0], + STARPU_NAME, "Task", + 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + + ret = starpu_task_insert(&task_2arg_codelet, + STARPU_R, handles->sub0[1], + STARPU_RW, handles->sub1[1], + STARPU_NAME, "Task", + 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); +} + +struct starpu_codelet bubble_2arg_codelet = +{ + .cpu_funcs = {bubble_func}, + .bubble_func = is_bubble, + .bubble_gen_dag_func = bubble_2arg_gen_dag, + .nbuffers = 2 +}; + +void bubble_1arg_gen_dag(struct starpu_task *t, void *arg) +{ + FPRINTF(stderr, "Bubble level 1\n"); + struct handle_partition *handles = (struct handle_partition*)arg; + + int ret = starpu_task_insert(&bubble_2arg_codelet, + STARPU_R, handles->sub[0], + STARPU_RW, handles->sub[1], + STARPU_NAME, "BubbleLvl2", + STARPU_BUBBLE_GEN_DAG_FUNC_ARG, handles, + 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); +} + +struct starpu_codelet bubble_1arg_codelet = +{ + .cpu_funcs = {bubble_func}, + .bubble_func = is_bubble, + .bubble_gen_dag_func = bubble_1arg_gen_dag, + .nbuffers = 1 +}; + +int main(int argv, char **argc) +{ + int ret, i; + int v[LENGTH]; + + ret = starpu_init(NULL); + if (ret == -ENODEV) return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + if (starpu_cpu_worker_get_count() == 0) + { + FPRINTF(stderr, "We need at least 1 CPU worker.\n"); + starpu_shutdown(); + return 77; + } + + for (i=0; i +#include "basic.h" + +int main(int argv, char **argc) +{ + int ret, i; + int v[SIZE]; + + ret = starpu_init(NULL); + if (ret == -ENODEV) return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + if (starpu_cpu_worker_get_count() == 0) + { + FPRINTF(stderr, "We need at least 1 CPU worker.\n"); + starpu_shutdown(); + return 77; + } + + for (i=0; i +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if defined(_WIN32) && !defined(__MINGW32__) && !defined(__CYGWIN__) +#include +#else +#include +#endif + +#ifdef STARPU_QUICK_CHECK +/* Quick checks are supposed to be real quick, typically less than 1s each, sometimes 10s + add some extra times for tests which run with all schedulers +*/ +#define DEFAULT_TIMEOUT 100 +#elif !defined(STARPU_LONG_CHECK) +/* Normal checks are supposed to be short enough, typically less than 10s each, sometimes 1-2m */ +#define DEFAULT_TIMEOUT 300 +#else +/* Long checks can be very long */ +#define DEFAULT_TIMEOUT 1000 +#endif +#define AUTOTEST_SKIPPED_TEST 77 + +static pid_t child_pid = 0; +static int timeout; + +#if defined(_WIN32) && !defined(__MINGW32__) && !defined(__CYGWIN__) +static int mygettimeofday(struct timeval *tv, void *tz) +{ + if (tv) + { + FILETIME ft; + unsigned long long res; + GetSystemTimeAsFileTime(&ft); + /* 100-nanosecond intervals since January 1, 1601 */ + res = ft.dwHighDateTime; + res <<= 32; + res |= ft.dwLowDateTime; + res /= 10; + /* Now we have microseconds */ + res -= (((1970-1601)*365) + 89) * 24ULL * 3600ULL * 1000000ULL; + /* Now we are based on epoch */ + tv->tv_sec = res / 1000000ULL; + tv->tv_usec = res % 1000000ULL; + } +} +#else +#define mygettimeofday(tv,tz) gettimeofday(tv,tz) +#endif + +#ifdef STARPU_GDB_PATH +static int try_launch_gdb(const char *exe, const char *core) +{ +# define GDB_COMMANDS \ + "-ex", "py-list", \ + "-ex", "starpu-tasks", \ + "-ex", "starpu-workers", \ + "-ex", "starpu-print-datas-summary", \ + "-ex", "starpu-memusage", \ + "-ex", "starpu-print-archs", \ + "-ex", "starpu-print-registered-models", \ + "-ex", "bt full", \ + "-ex", "py-bt", \ + "-ex", "thread apply all bt full", \ + "-ex", "thread apply all py-bt", \ + + int err; + pid_t pid; + struct stat st; + const char *top_builddir; + char *gdb; + + err = stat(core, &st); + if (err != 0) + { + fprintf(stderr, "while looking for core file of %s: %s: %m\n", + exe, core); + return -1; + } + + if (!(st.st_mode & S_IFREG)) + { + fprintf(stderr, "%s: not a regular file\n", core); + return -1; + } + + top_builddir = getenv("top_builddir"); + + pid = fork(); + switch (pid) + { + case 0: /* kid */ + if (top_builddir != NULL) + { + /* Run gdb with Libtool. */ + gdb = alloca(strlen(top_builddir) + + sizeof("/libtool") + 1); + strcpy(gdb, top_builddir); + strcat(gdb, "/libtool"); + err = execl(gdb, "gdb", "--mode=execute", + STARPU_GDB_PATH, "--batch", + GDB_COMMANDS + exe, core, NULL); + } + else + { + /* Run gdb directly */ + gdb = STARPU_GDB_PATH; + err = execl(gdb, "gdb", "--batch", + GDB_COMMANDS + exe, core, NULL); + } + if (err != 0) + { + fprintf(stderr, "while launching `%s': %m\n", gdb); + exit(EXIT_FAILURE); + } + exit(EXIT_SUCCESS); + break; + + case -1: + fprintf(stderr, "fork: %m\n"); + return -1; + + default: /* parent */ + { + pid_t who; + int status; + who = waitpid(pid, &status, 0); + if (who != pid) + fprintf(stderr, "while waiting for gdb " + "process %d: %m\n", pid); + } + } + return 0; +# undef GDB_COMMANDS +} +#endif /* STARPU_GDB_PATH */ + +static void launch_gdb(const char *exe) +{ +#ifdef STARPU_GDB_PATH + char s[32]; + snprintf(s, sizeof(s), "core.%d", child_pid); + if (try_launch_gdb(exe, s) < 0) + try_launch_gdb(exe, "core"); +#endif /* STARPU_GDB_PATH */ +} + +static char *test_name; + +static void test_cleaner(int sig) +{ + pid_t child_gid; + int status; + (void) sig; + + // send signal to all loader family members + fprintf(stderr, "[error] test %s has been blocked for %d seconds. Mark it as failed\n", test_name, timeout); + child_gid = getpgid(child_pid); + kill(-child_gid, SIGQUIT); + waitpid(child_pid, &status, 0); + launch_gdb(test_name); + raise(SIGALRM); + exit(EXIT_FAILURE); +} + +static void forwardsig(int sig) +{ + pid_t child_gid; + child_gid = getpgid(child_pid); + kill(-child_gid, sig); +} + +static int _decode(char **src, char *motif, const char *value) +{ + char *found; + + found = strstr(*src, motif); + if (found == NULL) return 0; + + char *new_src = calloc(1, strlen(*src)-strlen(motif)+strlen(value)+1); + + strncpy(new_src, *src, found - *src); + strcat(new_src, value); + strcat(new_src, found+strlen(motif)); + + *src = new_src; + return 1; +} + +static void decode(char **src, char *motif, const char *value) +{ + if (*src) + { + if (strstr(*src, motif) && value == NULL) + { + fprintf(stderr, "error: $%s undefined\n", motif); + exit(EXIT_FAILURE); + } + int d = _decode(src, motif, value); + while (d) + d = _decode(src, motif, value); + } +} + +int main(int argc, char *argv[]) +{ + int child_exit_status; + char *test_args; + char *launcher; + char *launcher_args; + char *libtool; + char *cflags; + const char *top_builddir = getenv("top_builddir"); + struct sigaction sa; + int ret; + struct timeval start; + struct timeval end; + double timing; + int x=1; + int asan = 0, lsan = 0, tsan = 0, usan = 0; + + (void) argc; + test_args = NULL; + timeout = 0; + + launcher=getenv("STARPU_CHECK_LAUNCHER"); + launcher_args=getenv("STARPU_CHECK_LAUNCHER_ARGS"); + cflags = getenv("CFLAGS"); + if (cflags) + { + if (strstr(cflags, "-fsanitize=address")) + asan = 1; + if (strstr(cflags, "-fsanitize=leak")) + lsan = 1; + if (strstr(cflags, "-fsanitize=thread")) + tsan = 1; + if (strstr(cflags, "-fsanitize=undefined")) + usan = 1; + } + + if (argv[x] && strcmp(argv[x], "-t") == 0) + { + timeout = strtol(argv[x+1], NULL, 10); + x += 2; + } + else if (getenv("STARPU_TIMEOUT_ENV")) + { + /* get user-defined iter_max value */ + timeout = strtol(getenv("STARPU_TIMEOUT_ENV"), NULL, 10); + } + else if (timeout <= 0) + { + timeout = DEFAULT_TIMEOUT; + if ((launcher && strstr(launcher, "valgrind")) || + (launcher && strstr(launcher, "helgrind")) || + tsan) + timeout *= 20; + if (asan || usan || lsan || + (launcher && strstr(launcher, "compute-sanitizer"))) + timeout *= 5; + + if (timeout > 1750) + timeout = 1750; + } + +#ifdef STARPU_SIMGRID +#ifdef STARPU_DEBUG + timeout *= 20; +#endif +#endif + +#ifdef STARPU_USE_MPI_MASTER_SLAVE + /* compare values between the 2 values of timeout */ + if (getenv("MPIEXEC_TIMEOUT")) + { + int mpiexec_timeout = strtol(getenv("MPIEXEC_TIMEOUT"), NULL, 10); + if (mpiexec_timeout != timeout) + fprintf(stderr, "[warning] MPIEXEC_TIMEOUT and STARPU_TIMEOUT_ENV values are different (%d and %d). The behavior may be different than expected !\n", mpiexec_timeout, timeout); + } +#endif + + if (argv[x] && strcmp(argv[x], "-p") == 0) + { + test_name = malloc(strlen(argv[x+1]) + 1 + strlen(argv[x+2]) + 1); + sprintf(test_name, "%s/%s", argv[x+1], argv[x+2]); + x += 3; + } + else + { + test_name = argv[x]; + x += 1; + } + + if (!test_name) + { + fprintf(stderr, "[error] Need name of program to start\n"); + exit(EXIT_FAILURE); + } + + size_t len = strlen(test_name); + if (len >= 3 && + test_name[len-3] == '.' && + test_name[len-2] == 's' && + test_name[len-1] == 'h') + { + /* This is a shell script, don't run ourself on bash, but make + * the script call us for each program invocation */ + + char *launch = NULL; + if (top_builddir == NULL) + // this may fail if .libs is in the directory path + setenv("STARPU_LAUNCH", argv[0], 1); + else + { + launch = malloc(strlen(top_builddir) + strlen("/tests/loader") + 1); + strcpy(launch, top_builddir); + strcat(launch, "/tests/loader"); + setenv("STARPU_LAUNCH", launch, 1); + } + + execvp(test_name, argv+x-1); + + fprintf(stderr, "[error] '%s' failed to exec. test marked as failed\n", test_name); + free(launch); + exit(EXIT_FAILURE); + } + + if (strstr(test_name, "spmv/dw_block_spmv")) + { + test_args = (char *) calloc(512, sizeof(char)); + snprintf(test_args, 512, "%s/examples/spmv/matrix_market/examples/fidapm05.mtx", STARPU_SRC_DIR); + } + else if (strstr(test_name, "starpu_perfmodel_display")) + { + if (x >= argc) + test_args = strdup("-l"); + } + else if (strstr(test_name, "starpu_perfmodel_plot")) + { + if (x >= argc) + test_args = strdup("-l"); + } + + /* get launcher program */ + if (launcher_args) + launcher_args=strdup(launcher_args); + + if (top_builddir == NULL) + { + fprintf(stderr, + "warning: $top_builddir undefined, " + "so $STARPU_CHECK_LAUNCHER ignored\n"); + launcher = NULL; + launcher_args = NULL; + libtool = NULL; + } + else + { + libtool = malloc(strlen(top_builddir) + 1 + strlen("libtool") + 1); + strcpy(libtool, top_builddir); + strcat(libtool, "/libtool"); + } + + if (launcher) + { + const char *top_srcdir = getenv("top_srcdir"); + decode(&launcher, "@top_srcdir@", top_srcdir); + decode(&launcher_args, "@top_srcdir@", top_srcdir); + } + + setenv("STARPU_OPENCL_PROGRAM_DIR", STARPU_SRC_DIR, 1); + + /* set SIGALARM handler */ + sa.sa_flags = SA_RESETHAND | SA_NODEFER; + sigemptyset(&sa.sa_mask); + sa.sa_handler = test_cleaner; + if (-1 == sigaction(SIGALRM, &sa, NULL)) + perror("sigaction"); + + signal(SIGINT, forwardsig); + signal(SIGHUP, forwardsig); + signal(SIGPIPE, forwardsig); + signal(SIGTERM, forwardsig); + + child_pid = fork(); + if (child_pid == 0) + { + char *launcher_argv[100]; + int i=0; + + setpgid(0, 0); + + /* "Launchers" such as Valgrind need to be inserted + * after the Libtool-generated wrapper scripts, hence + * this special-case. */ + if (launcher && top_builddir != NULL) + { + launcher_argv[i++] = libtool; + launcher_argv[i++] = "--mode=execute"; + launcher_argv[i++] = launcher; + if (launcher_args) + { + launcher_argv[i++] = strtok(launcher_args, " "); + while (launcher_argv[i-1]) + { + launcher_argv[i++] = strtok(NULL, " "); + } + } + } + + launcher_argv[i++] = test_name; + if (test_args) + launcher_argv[i++] = test_args; + else while (argv[x]) + { + launcher_argv[i++] = argv[x++]; + } +#ifdef STARPU_SIMGRID +#ifdef STARPU_DEBUG + launcher_argv[i++] = "--cfg=contexts/factory:thread"; +#endif +#endif + launcher_argv[i++] = NULL; + execvp(*launcher_argv, launcher_argv); + + fprintf(stderr, "[error] '%s' failed to exec. test marked as failed\n", test_name); + exit(EXIT_FAILURE); + } + if (child_pid == -1) + { + fprintf(stderr, "[error] fork. test marked as failed\n"); + exit(EXIT_FAILURE); + } + free(test_args); + free(libtool); + + ret = EXIT_SUCCESS; + gettimeofday(&start, NULL); + alarm(timeout); + if (child_pid == waitpid(child_pid, &child_exit_status, 0)) + { + if (WIFEXITED(child_exit_status)) + { + int status = WEXITSTATUS(child_exit_status); + if (status == EXIT_SUCCESS) + { + alarm(0); + } + else + { + if (status != AUTOTEST_SKIPPED_TEST) + fprintf(stdout, "`%s' exited with return code %d\n", + test_name, status); + ret = status; + } + } + else if (WIFSIGNALED(child_exit_status)) + { + fprintf(stderr, "[error] `%s' killed with signal %d; test marked as failed\n", + test_name, WTERMSIG(child_exit_status)); + launch_gdb(test_name); + ret = EXIT_FAILURE; + } + else + { + fprintf(stderr, "[error] `%s' did not terminate normally; test marked as failed\n", + test_name); + ret = EXIT_FAILURE; + } + } + + gettimeofday(&end, NULL); + timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec)); + fprintf(stderr, "#Execution_time_in_seconds %f %s\n", timing/1000000, test_name); + + return ret; +} diff --git a/bubble/tests/vector/vector.c b/bubble/tests/vector/vector.c new file mode 100644 index 0000000..fccdff7 --- /dev/null +++ b/bubble/tests/vector/vector.c @@ -0,0 +1,228 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2019-2019 Gwenole Lucas + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +#define X 6 +#define SLICES 2 + +#define NITER 20 +#define TYPE int +#define PTYPE "%3d" + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +void func_cpu(void *descr[], void *_args) +{ + (void) _args; + int x; + int nx = STARPU_VECTOR_GET_NX(descr[0]); + TYPE *v = (TYPE *)STARPU_VECTOR_GET_PTR(descr[0]); + + for(x=0 ; x. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that program. + +# This file is maintained in Automake, please report +# bugs to or send patches to +# . + + +# func_error message +func_error () +{ + echo "$me: $1" 1>&2 + exit 1 +} + +file_conv= + +# func_file_conv build_file +# Convert a $build file to $host form and store it in $file +# Currently only supports Windows hosts. +func_file_conv () +{ + file=$1 + case $file in + / | /[!/]*) # absolute file, and not a UNC file + if test -z "$file_conv"; then + # lazily determine how to convert abs files + case `uname -s` in + MINGW*) + file_conv=mingw + ;; + CYGWIN* | MSYS*) + file_conv=cygwin + ;; + *) + file_conv=wine + ;; + esac + fi + case $file_conv in + mingw) + file=`cmd //C echo "$file " | sed -e 's/"\(.*\) " *$/\1/'` + ;; + cygwin | msys) + file=`cygpath -m "$file" || echo "$file"` + ;; + wine) + file=`winepath -w "$file" || echo "$file"` + ;; + esac + ;; + esac +} + +# func_at_file at_file operation archive +# Iterate over all members in AT_FILE performing OPERATION on ARCHIVE +# for each of them. +# When interpreting the content of the @FILE, do NOT use func_file_conv, +# since the user would need to supply preconverted file names to +# binutils ar, at least for MinGW. +func_at_file () +{ + operation=$2 + archive=$3 + at_file_contents=`cat "$1"` + eval set x "$at_file_contents" + shift + + for member + do + $AR -NOLOGO $operation:"$member" "$archive" || exit $? + done +} + +case $1 in + '') + func_error "no command. Try '$0 --help' for more information." + ;; + -h | --h*) + cat <. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that program. + +# This file is maintained in Automake, please report +# bugs to or send patches to +# . + +nl=' +' + +# We need space, tab and new line, in precisely that order. Quoting is +# there to prevent tools from complaining about whitespace usage. +IFS=" "" $nl" + +file_conv= + +# func_file_conv build_file lazy +# Convert a $build file to $host form and store it in $file +# Currently only supports Windows hosts. If the determined conversion +# type is listed in (the comma separated) LAZY, no conversion will +# take place. +func_file_conv () +{ + file=$1 + case $file in + / | /[!/]*) # absolute file, and not a UNC file + if test -z "$file_conv"; then + # lazily determine how to convert abs files + case `uname -s` in + MINGW*) + file_conv=mingw + ;; + CYGWIN* | MSYS*) + file_conv=cygwin + ;; + *) + file_conv=wine + ;; + esac + fi + case $file_conv/,$2, in + *,$file_conv,*) + ;; + mingw/*) + file=`cmd //C echo "$file " | sed -e 's/"\(.*\) " *$/\1/'` + ;; + cygwin/* | msys/*) + file=`cygpath -m "$file" || echo "$file"` + ;; + wine/*) + file=`winepath -w "$file" || echo "$file"` + ;; + esac + ;; + esac +} + +# func_cl_dashL linkdir +# Make cl look for libraries in LINKDIR +func_cl_dashL () +{ + func_file_conv "$1" + if test -z "$lib_path"; then + lib_path=$file + else + lib_path="$lib_path;$file" + fi + linker_opts="$linker_opts -LIBPATH:$file" +} + +# func_cl_dashl library +# Do a library search-path lookup for cl +func_cl_dashl () +{ + lib=$1 + found=no + save_IFS=$IFS + IFS=';' + for dir in $lib_path $LIB + do + IFS=$save_IFS + if $shared && test -f "$dir/$lib.dll.lib"; then + found=yes + lib=$dir/$lib.dll.lib + break + fi + if test -f "$dir/$lib.lib"; then + found=yes + lib=$dir/$lib.lib + break + fi + if test -f "$dir/lib$lib.a"; then + found=yes + lib=$dir/lib$lib.a + break + fi + done + IFS=$save_IFS + + if test "$found" != yes; then + lib=$lib.lib + fi +} + +# func_cl_wrapper cl arg... +# Adjust compile command to suit cl +func_cl_wrapper () +{ + # Assume a capable shell + lib_path= + shared=: + linker_opts= + for arg + do + if test -n "$eat"; then + eat= + else + case $1 in + -o) + # configure might choose to run compile as 'compile cc -o foo foo.c'. + eat=1 + case $2 in + *.o | *.[oO][bB][jJ]) + func_file_conv "$2" + set x "$@" -Fo"$file" + shift + ;; + *) + func_file_conv "$2" + set x "$@" -Fe"$file" + shift + ;; + esac + ;; + -I) + eat=1 + func_file_conv "$2" mingw + set x "$@" -I"$file" + shift + ;; + -I*) + func_file_conv "${1#-I}" mingw + set x "$@" -I"$file" + shift + ;; + -l) + eat=1 + func_cl_dashl "$2" + set x "$@" "$lib" + shift + ;; + -l*) + func_cl_dashl "${1#-l}" + set x "$@" "$lib" + shift + ;; + -L) + eat=1 + func_cl_dashL "$2" + ;; + -L*) + func_cl_dashL "${1#-L}" + ;; + -static) + shared=false + ;; + -Wl,*) + arg=${1#-Wl,} + save_ifs="$IFS"; IFS=',' + for flag in $arg; do + IFS="$save_ifs" + linker_opts="$linker_opts $flag" + done + IFS="$save_ifs" + ;; + -Xlinker) + eat=1 + linker_opts="$linker_opts $2" + ;; + -*) + set x "$@" "$1" + shift + ;; + *.cc | *.CC | *.cxx | *.CXX | *.[cC]++) + func_file_conv "$1" + set x "$@" -Tp"$file" + shift + ;; + *.c | *.cpp | *.CPP | *.lib | *.LIB | *.Lib | *.OBJ | *.obj | *.[oO]) + func_file_conv "$1" mingw + set x "$@" "$file" + shift + ;; + *) + set x "$@" "$1" + shift + ;; + esac + fi + shift + done + if test -n "$linker_opts"; then + linker_opts="-link$linker_opts" + fi + exec "$@" $linker_opts + exit 1 +} + +eat= + +case $1 in + '') + echo "$0: No command. Try '$0 --help' for more information." 1>&2 + exit 1; + ;; + -h | --h*) + cat <<\EOF +Usage: compile [--help] [--version] PROGRAM [ARGS] + +Wrapper for compilers which do not understand '-c -o'. +Remove '-o dest.o' from ARGS, run PROGRAM with the remaining +arguments, and rename the output as expected. + +If you are trying to build a whole package this is not the +right script to run: please start by reading the file 'INSTALL'. + +Report bugs to . +EOF + exit $? + ;; + -v | --v*) + echo "compile $scriptversion" + exit $? + ;; + cl | *[/\\]cl | cl.exe | *[/\\]cl.exe | \ + icl | *[/\\]icl | icl.exe | *[/\\]icl.exe ) + func_cl_wrapper "$@" # Doesn't return... + ;; +esac + +ofile= +cfile= + +for arg +do + if test -n "$eat"; then + eat= + else + case $1 in + -o) + # configure might choose to run compile as 'compile cc -o foo foo.c'. + # So we strip '-o arg' only if arg is an object. + eat=1 + case $2 in + *.o | *.obj) + ofile=$2 + ;; + *) + set x "$@" -o "$2" + shift + ;; + esac + ;; + *.c) + cfile=$1 + set x "$@" "$1" + shift + ;; + *) + set x "$@" "$1" + shift + ;; + esac + fi + shift +done + +if test -z "$ofile" || test -z "$cfile"; then + # If no '-o' option was seen then we might have been invoked from a + # pattern rule where we don't need one. That is ok -- this is a + # normal compilation that the losing compiler can handle. If no + # '.c' file was seen then we are probably linking. That is also + # ok. + exec "$@" +fi + +# Name of file we expect compiler to create. +cofile=`echo "$cfile" | sed 's|^.*[\\/]||; s|^[a-zA-Z]:||; s/\.c$/.o/'` + +# Create the lock directory. +# Note: use '[/\\:.-]' here to ensure that we don't use the same name +# that we are using for the .o file. Also, base the name on the expected +# object file name, since that is what matters with a parallel build. +lockdir=`echo "$cofile" | sed -e 's|[/\\:.-]|_|g'`.d +while true; do + if mkdir "$lockdir" >/dev/null 2>&1; then + break + fi + sleep 1 +done +# FIXME: race condition here if user kills between mkdir and trap. +trap "rmdir '$lockdir'; exit 1" 1 2 15 + +# Run the compile. +"$@" +ret=$? + +if test -f "$cofile"; then + test "$cofile" = "$ofile" || mv "$cofile" "$ofile" +elif test -f "${cofile}bj"; then + test "${cofile}bj" = "$ofile" || mv "${cofile}bj" "$ofile" +fi + +rmdir "$lockdir" +exit $ret + +# Local Variables: +# mode: shell-script +# sh-indentation: 2 +# eval: (add-hook 'before-save-hook 'time-stamp) +# time-stamp-start: "scriptversion=" +# time-stamp-format: "%:y-%02m-%02d.%02H" +# time-stamp-time-zone: "UTC0" +# time-stamp-end: "; # UTC" +# End: diff --git a/build-aux/config.guess b/build-aux/config.guess new file mode 100755 index 0000000..7f76b62 --- /dev/null +++ b/build-aux/config.guess @@ -0,0 +1,1754 @@ +#! /bin/sh +# Attempt to guess a canonical system name. +# Copyright 1992-2022 Free Software Foundation, Inc. + +# shellcheck disable=SC2006,SC2268 # see below for rationale + +timestamp='2022-01-09' + +# This file is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, see . +# +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that +# program. This Exception is an additional permission under section 7 +# of the GNU General Public License, version 3 ("GPLv3"). +# +# Originally written by Per Bothner; maintained since 2000 by Ben Elliston. +# +# You can get the latest version of this script from: +# https://git.savannah.gnu.org/cgit/config.git/plain/config.guess +# +# Please send patches to . + + +# The "shellcheck disable" line above the timestamp inhibits complaints +# about features and limitations of the classic Bourne shell that were +# superseded or lifted in POSIX. However, this script identifies a wide +# variety of pre-POSIX systems that do not have POSIX shells at all, and +# even some reasonably current systems (Solaris 10 as case-in-point) still +# have a pre-POSIX /bin/sh. + + +me=`echo "$0" | sed -e 's,.*/,,'` + +usage="\ +Usage: $0 [OPTION] + +Output the configuration name of the system \`$me' is run on. + +Options: + -h, --help print this help, then exit + -t, --time-stamp print date of last modification, then exit + -v, --version print version number, then exit + +Report bugs and patches to ." + +version="\ +GNU config.guess ($timestamp) + +Originally written by Per Bothner. +Copyright 1992-2022 Free Software Foundation, Inc. + +This is free software; see the source for copying conditions. There is NO +warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." + +help=" +Try \`$me --help' for more information." + +# Parse command line +while test $# -gt 0 ; do + case $1 in + --time-stamp | --time* | -t ) + echo "$timestamp" ; exit ;; + --version | -v ) + echo "$version" ; exit ;; + --help | --h* | -h ) + echo "$usage"; exit ;; + -- ) # Stop option processing + shift; break ;; + - ) # Use stdin as input. + break ;; + -* ) + echo "$me: invalid option $1$help" >&2 + exit 1 ;; + * ) + break ;; + esac +done + +if test $# != 0; then + echo "$me: too many arguments$help" >&2 + exit 1 +fi + +# Just in case it came from the environment. +GUESS= + +# CC_FOR_BUILD -- compiler used by this script. Note that the use of a +# compiler to aid in system detection is discouraged as it requires +# temporary files to be created and, as you can see below, it is a +# headache to deal with in a portable fashion. + +# Historically, `CC_FOR_BUILD' used to be named `HOST_CC'. We still +# use `HOST_CC' if defined, but it is deprecated. + +# Portable tmp directory creation inspired by the Autoconf team. + +tmp= +# shellcheck disable=SC2172 +trap 'test -z "$tmp" || rm -fr "$tmp"' 0 1 2 13 15 + +set_cc_for_build() { + # prevent multiple calls if $tmp is already set + test "$tmp" && return 0 + : "${TMPDIR=/tmp}" + # shellcheck disable=SC2039,SC3028 + { tmp=`(umask 077 && mktemp -d "$TMPDIR/cgXXXXXX") 2>/dev/null` && test -n "$tmp" && test -d "$tmp" ; } || + { test -n "$RANDOM" && tmp=$TMPDIR/cg$$-$RANDOM && (umask 077 && mkdir "$tmp" 2>/dev/null) ; } || + { tmp=$TMPDIR/cg-$$ && (umask 077 && mkdir "$tmp" 2>/dev/null) && echo "Warning: creating insecure temp directory" >&2 ; } || + { echo "$me: cannot create a temporary directory in $TMPDIR" >&2 ; exit 1 ; } + dummy=$tmp/dummy + case ${CC_FOR_BUILD-},${HOST_CC-},${CC-} in + ,,) echo "int x;" > "$dummy.c" + for driver in cc gcc c89 c99 ; do + if ($driver -c -o "$dummy.o" "$dummy.c") >/dev/null 2>&1 ; then + CC_FOR_BUILD=$driver + break + fi + done + if test x"$CC_FOR_BUILD" = x ; then + CC_FOR_BUILD=no_compiler_found + fi + ;; + ,,*) CC_FOR_BUILD=$CC ;; + ,*,*) CC_FOR_BUILD=$HOST_CC ;; + esac +} + +# This is needed to find uname on a Pyramid OSx when run in the BSD universe. +# (ghazi@noc.rutgers.edu 1994-08-24) +if test -f /.attbin/uname ; then + PATH=$PATH:/.attbin ; export PATH +fi + +UNAME_MACHINE=`(uname -m) 2>/dev/null` || UNAME_MACHINE=unknown +UNAME_RELEASE=`(uname -r) 2>/dev/null` || UNAME_RELEASE=unknown +UNAME_SYSTEM=`(uname -s) 2>/dev/null` || UNAME_SYSTEM=unknown +UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown + +case $UNAME_SYSTEM in +Linux|GNU|GNU/*) + LIBC=unknown + + set_cc_for_build + cat <<-EOF > "$dummy.c" + #include + #if defined(__UCLIBC__) + LIBC=uclibc + #elif defined(__dietlibc__) + LIBC=dietlibc + #elif defined(__GLIBC__) + LIBC=gnu + #else + #include + /* First heuristic to detect musl libc. */ + #ifdef __DEFINED_va_list + LIBC=musl + #endif + #endif + EOF + cc_set_libc=`$CC_FOR_BUILD -E "$dummy.c" 2>/dev/null | grep '^LIBC' | sed 's, ,,g'` + eval "$cc_set_libc" + + # Second heuristic to detect musl libc. + if [ "$LIBC" = unknown ] && + command -v ldd >/dev/null && + ldd --version 2>&1 | grep -q ^musl; then + LIBC=musl + fi + + # If the system lacks a compiler, then just pick glibc. + # We could probably try harder. + if [ "$LIBC" = unknown ]; then + LIBC=gnu + fi + ;; +esac + +# Note: order is significant - the case branches are not exclusive. + +case $UNAME_MACHINE:$UNAME_SYSTEM:$UNAME_RELEASE:$UNAME_VERSION in + *:NetBSD:*:*) + # NetBSD (nbsd) targets should (where applicable) match one or + # more of the tuples: *-*-netbsdelf*, *-*-netbsdaout*, + # *-*-netbsdecoff* and *-*-netbsd*. For targets that recently + # switched to ELF, *-*-netbsd* would select the old + # object file format. This provides both forward + # compatibility and a consistent mechanism for selecting the + # object file format. + # + # Note: NetBSD doesn't particularly care about the vendor + # portion of the name. We always set it to "unknown". + UNAME_MACHINE_ARCH=`(uname -p 2>/dev/null || \ + /sbin/sysctl -n hw.machine_arch 2>/dev/null || \ + /usr/sbin/sysctl -n hw.machine_arch 2>/dev/null || \ + echo unknown)` + case $UNAME_MACHINE_ARCH in + aarch64eb) machine=aarch64_be-unknown ;; + armeb) machine=armeb-unknown ;; + arm*) machine=arm-unknown ;; + sh3el) machine=shl-unknown ;; + sh3eb) machine=sh-unknown ;; + sh5el) machine=sh5le-unknown ;; + earmv*) + arch=`echo "$UNAME_MACHINE_ARCH" | sed -e 's,^e\(armv[0-9]\).*$,\1,'` + endian=`echo "$UNAME_MACHINE_ARCH" | sed -ne 's,^.*\(eb\)$,\1,p'` + machine=${arch}${endian}-unknown + ;; + *) machine=$UNAME_MACHINE_ARCH-unknown ;; + esac + # The Operating System including object format, if it has switched + # to ELF recently (or will in the future) and ABI. + case $UNAME_MACHINE_ARCH in + earm*) + os=netbsdelf + ;; + arm*|i386|m68k|ns32k|sh3*|sparc|vax) + set_cc_for_build + if echo __ELF__ | $CC_FOR_BUILD -E - 2>/dev/null \ + | grep -q __ELF__ + then + # Once all utilities can be ECOFF (netbsdecoff) or a.out (netbsdaout). + # Return netbsd for either. FIX? + os=netbsd + else + os=netbsdelf + fi + ;; + *) + os=netbsd + ;; + esac + # Determine ABI tags. + case $UNAME_MACHINE_ARCH in + earm*) + expr='s/^earmv[0-9]/-eabi/;s/eb$//' + abi=`echo "$UNAME_MACHINE_ARCH" | sed -e "$expr"` + ;; + esac + # The OS release + # Debian GNU/NetBSD machines have a different userland, and + # thus, need a distinct triplet. However, they do not need + # kernel version information, so it can be replaced with a + # suitable tag, in the style of linux-gnu. + case $UNAME_VERSION in + Debian*) + release='-gnu' + ;; + *) + release=`echo "$UNAME_RELEASE" | sed -e 's/[-_].*//' | cut -d. -f1,2` + ;; + esac + # Since CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM: + # contains redundant information, the shorter form: + # CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM is used. + GUESS=$machine-${os}${release}${abi-} + ;; + *:Bitrig:*:*) + UNAME_MACHINE_ARCH=`arch | sed 's/Bitrig.//'` + GUESS=$UNAME_MACHINE_ARCH-unknown-bitrig$UNAME_RELEASE + ;; + *:OpenBSD:*:*) + UNAME_MACHINE_ARCH=`arch | sed 's/OpenBSD.//'` + GUESS=$UNAME_MACHINE_ARCH-unknown-openbsd$UNAME_RELEASE + ;; + *:SecBSD:*:*) + UNAME_MACHINE_ARCH=`arch | sed 's/SecBSD.//'` + GUESS=$UNAME_MACHINE_ARCH-unknown-secbsd$UNAME_RELEASE + ;; + *:LibertyBSD:*:*) + UNAME_MACHINE_ARCH=`arch | sed 's/^.*BSD\.//'` + GUESS=$UNAME_MACHINE_ARCH-unknown-libertybsd$UNAME_RELEASE + ;; + *:MidnightBSD:*:*) + GUESS=$UNAME_MACHINE-unknown-midnightbsd$UNAME_RELEASE + ;; + *:ekkoBSD:*:*) + GUESS=$UNAME_MACHINE-unknown-ekkobsd$UNAME_RELEASE + ;; + *:SolidBSD:*:*) + GUESS=$UNAME_MACHINE-unknown-solidbsd$UNAME_RELEASE + ;; + *:OS108:*:*) + GUESS=$UNAME_MACHINE-unknown-os108_$UNAME_RELEASE + ;; + macppc:MirBSD:*:*) + GUESS=powerpc-unknown-mirbsd$UNAME_RELEASE + ;; + *:MirBSD:*:*) + GUESS=$UNAME_MACHINE-unknown-mirbsd$UNAME_RELEASE + ;; + *:Sortix:*:*) + GUESS=$UNAME_MACHINE-unknown-sortix + ;; + *:Twizzler:*:*) + GUESS=$UNAME_MACHINE-unknown-twizzler + ;; + *:Redox:*:*) + GUESS=$UNAME_MACHINE-unknown-redox + ;; + mips:OSF1:*.*) + GUESS=mips-dec-osf1 + ;; + alpha:OSF1:*:*) + # Reset EXIT trap before exiting to avoid spurious non-zero exit code. + trap '' 0 + case $UNAME_RELEASE in + *4.0) + UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $3}'` + ;; + *5.*) + UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $4}'` + ;; + esac + # According to Compaq, /usr/sbin/psrinfo has been available on + # OSF/1 and Tru64 systems produced since 1995. I hope that + # covers most systems running today. This code pipes the CPU + # types through head -n 1, so we only detect the type of CPU 0. + ALPHA_CPU_TYPE=`/usr/sbin/psrinfo -v | sed -n -e 's/^ The alpha \(.*\) processor.*$/\1/p' | head -n 1` + case $ALPHA_CPU_TYPE in + "EV4 (21064)") + UNAME_MACHINE=alpha ;; + "EV4.5 (21064)") + UNAME_MACHINE=alpha ;; + "LCA4 (21066/21068)") + UNAME_MACHINE=alpha ;; + "EV5 (21164)") + UNAME_MACHINE=alphaev5 ;; + "EV5.6 (21164A)") + UNAME_MACHINE=alphaev56 ;; + "EV5.6 (21164PC)") + UNAME_MACHINE=alphapca56 ;; + "EV5.7 (21164PC)") + UNAME_MACHINE=alphapca57 ;; + "EV6 (21264)") + UNAME_MACHINE=alphaev6 ;; + "EV6.7 (21264A)") + UNAME_MACHINE=alphaev67 ;; + "EV6.8CB (21264C)") + UNAME_MACHINE=alphaev68 ;; + "EV6.8AL (21264B)") + UNAME_MACHINE=alphaev68 ;; + "EV6.8CX (21264D)") + UNAME_MACHINE=alphaev68 ;; + "EV6.9A (21264/EV69A)") + UNAME_MACHINE=alphaev69 ;; + "EV7 (21364)") + UNAME_MACHINE=alphaev7 ;; + "EV7.9 (21364A)") + UNAME_MACHINE=alphaev79 ;; + esac + # A Pn.n version is a patched version. + # A Vn.n version is a released version. + # A Tn.n version is a released field test version. + # A Xn.n version is an unreleased experimental baselevel. + # 1.2 uses "1.2" for uname -r. + OSF_REL=`echo "$UNAME_RELEASE" | sed -e 's/^[PVTX]//' | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz` + GUESS=$UNAME_MACHINE-dec-osf$OSF_REL + ;; + Amiga*:UNIX_System_V:4.0:*) + GUESS=m68k-unknown-sysv4 + ;; + *:[Aa]miga[Oo][Ss]:*:*) + GUESS=$UNAME_MACHINE-unknown-amigaos + ;; + *:[Mm]orph[Oo][Ss]:*:*) + GUESS=$UNAME_MACHINE-unknown-morphos + ;; + *:OS/390:*:*) + GUESS=i370-ibm-openedition + ;; + *:z/VM:*:*) + GUESS=s390-ibm-zvmoe + ;; + *:OS400:*:*) + GUESS=powerpc-ibm-os400 + ;; + arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*) + GUESS=arm-acorn-riscix$UNAME_RELEASE + ;; + arm*:riscos:*:*|arm*:RISCOS:*:*) + GUESS=arm-unknown-riscos + ;; + SR2?01:HI-UX/MPP:*:* | SR8000:HI-UX/MPP:*:*) + GUESS=hppa1.1-hitachi-hiuxmpp + ;; + Pyramid*:OSx*:*:* | MIS*:OSx*:*:* | MIS*:SMP_DC-OSx*:*:*) + # akee@wpdis03.wpafb.af.mil (Earle F. Ake) contributed MIS and NILE. + case `(/bin/universe) 2>/dev/null` in + att) GUESS=pyramid-pyramid-sysv3 ;; + *) GUESS=pyramid-pyramid-bsd ;; + esac + ;; + NILE*:*:*:dcosx) + GUESS=pyramid-pyramid-svr4 + ;; + DRS?6000:unix:4.0:6*) + GUESS=sparc-icl-nx6 + ;; + DRS?6000:UNIX_SV:4.2*:7* | DRS?6000:isis:4.2*:7*) + case `/usr/bin/uname -p` in + sparc) GUESS=sparc-icl-nx7 ;; + esac + ;; + s390x:SunOS:*:*) + SUN_REL=`echo "$UNAME_RELEASE" | sed -e 's/[^.]*//'` + GUESS=$UNAME_MACHINE-ibm-solaris2$SUN_REL + ;; + sun4H:SunOS:5.*:*) + SUN_REL=`echo "$UNAME_RELEASE" | sed -e 's/[^.]*//'` + GUESS=sparc-hal-solaris2$SUN_REL + ;; + sun4*:SunOS:5.*:* | tadpole*:SunOS:5.*:*) + SUN_REL=`echo "$UNAME_RELEASE" | sed -e 's/[^.]*//'` + GUESS=sparc-sun-solaris2$SUN_REL + ;; + i86pc:AuroraUX:5.*:* | i86xen:AuroraUX:5.*:*) + GUESS=i386-pc-auroraux$UNAME_RELEASE + ;; + i86pc:SunOS:5.*:* | i86xen:SunOS:5.*:*) + set_cc_for_build + SUN_ARCH=i386 + # If there is a compiler, see if it is configured for 64-bit objects. + # Note that the Sun cc does not turn __LP64__ into 1 like gcc does. + # This test works for both compilers. + if test "$CC_FOR_BUILD" != no_compiler_found; then + if (echo '#ifdef __amd64'; echo IS_64BIT_ARCH; echo '#endif') | \ + (CCOPTS="" $CC_FOR_BUILD -m64 -E - 2>/dev/null) | \ + grep IS_64BIT_ARCH >/dev/null + then + SUN_ARCH=x86_64 + fi + fi + SUN_REL=`echo "$UNAME_RELEASE" | sed -e 's/[^.]*//'` + GUESS=$SUN_ARCH-pc-solaris2$SUN_REL + ;; + sun4*:SunOS:6*:*) + # According to config.sub, this is the proper way to canonicalize + # SunOS6. Hard to guess exactly what SunOS6 will be like, but + # it's likely to be more like Solaris than SunOS4. + SUN_REL=`echo "$UNAME_RELEASE" | sed -e 's/[^.]*//'` + GUESS=sparc-sun-solaris3$SUN_REL + ;; + sun4*:SunOS:*:*) + case `/usr/bin/arch -k` in + Series*|S4*) + UNAME_RELEASE=`uname -v` + ;; + esac + # Japanese Language versions have a version number like `4.1.3-JL'. + SUN_REL=`echo "$UNAME_RELEASE" | sed -e 's/-/_/'` + GUESS=sparc-sun-sunos$SUN_REL + ;; + sun3*:SunOS:*:*) + GUESS=m68k-sun-sunos$UNAME_RELEASE + ;; + sun*:*:4.2BSD:*) + UNAME_RELEASE=`(sed 1q /etc/motd | awk '{print substr($5,1,3)}') 2>/dev/null` + test "x$UNAME_RELEASE" = x && UNAME_RELEASE=3 + case `/bin/arch` in + sun3) + GUESS=m68k-sun-sunos$UNAME_RELEASE + ;; + sun4) + GUESS=sparc-sun-sunos$UNAME_RELEASE + ;; + esac + ;; + aushp:SunOS:*:*) + GUESS=sparc-auspex-sunos$UNAME_RELEASE + ;; + # The situation for MiNT is a little confusing. The machine name + # can be virtually everything (everything which is not + # "atarist" or "atariste" at least should have a processor + # > m68000). The system name ranges from "MiNT" over "FreeMiNT" + # to the lowercase version "mint" (or "freemint"). Finally + # the system name "TOS" denotes a system which is actually not + # MiNT. But MiNT is downward compatible to TOS, so this should + # be no problem. + atarist[e]:*MiNT:*:* | atarist[e]:*mint:*:* | atarist[e]:*TOS:*:*) + GUESS=m68k-atari-mint$UNAME_RELEASE + ;; + atari*:*MiNT:*:* | atari*:*mint:*:* | atarist[e]:*TOS:*:*) + GUESS=m68k-atari-mint$UNAME_RELEASE + ;; + *falcon*:*MiNT:*:* | *falcon*:*mint:*:* | *falcon*:*TOS:*:*) + GUESS=m68k-atari-mint$UNAME_RELEASE + ;; + milan*:*MiNT:*:* | milan*:*mint:*:* | *milan*:*TOS:*:*) + GUESS=m68k-milan-mint$UNAME_RELEASE + ;; + hades*:*MiNT:*:* | hades*:*mint:*:* | *hades*:*TOS:*:*) + GUESS=m68k-hades-mint$UNAME_RELEASE + ;; + *:*MiNT:*:* | *:*mint:*:* | *:*TOS:*:*) + GUESS=m68k-unknown-mint$UNAME_RELEASE + ;; + m68k:machten:*:*) + GUESS=m68k-apple-machten$UNAME_RELEASE + ;; + powerpc:machten:*:*) + GUESS=powerpc-apple-machten$UNAME_RELEASE + ;; + RISC*:Mach:*:*) + GUESS=mips-dec-mach_bsd4.3 + ;; + RISC*:ULTRIX:*:*) + GUESS=mips-dec-ultrix$UNAME_RELEASE + ;; + VAX*:ULTRIX*:*:*) + GUESS=vax-dec-ultrix$UNAME_RELEASE + ;; + 2020:CLIX:*:* | 2430:CLIX:*:*) + GUESS=clipper-intergraph-clix$UNAME_RELEASE + ;; + mips:*:*:UMIPS | mips:*:*:RISCos) + set_cc_for_build + sed 's/^ //' << EOF > "$dummy.c" +#ifdef __cplusplus +#include /* for printf() prototype */ + int main (int argc, char *argv[]) { +#else + int main (argc, argv) int argc; char *argv[]; { +#endif + #if defined (host_mips) && defined (MIPSEB) + #if defined (SYSTYPE_SYSV) + printf ("mips-mips-riscos%ssysv\\n", argv[1]); exit (0); + #endif + #if defined (SYSTYPE_SVR4) + printf ("mips-mips-riscos%ssvr4\\n", argv[1]); exit (0); + #endif + #if defined (SYSTYPE_BSD43) || defined(SYSTYPE_BSD) + printf ("mips-mips-riscos%sbsd\\n", argv[1]); exit (0); + #endif + #endif + exit (-1); + } +EOF + $CC_FOR_BUILD -o "$dummy" "$dummy.c" && + dummyarg=`echo "$UNAME_RELEASE" | sed -n 's/\([0-9]*\).*/\1/p'` && + SYSTEM_NAME=`"$dummy" "$dummyarg"` && + { echo "$SYSTEM_NAME"; exit; } + GUESS=mips-mips-riscos$UNAME_RELEASE + ;; + Motorola:PowerMAX_OS:*:*) + GUESS=powerpc-motorola-powermax + ;; + Motorola:*:4.3:PL8-*) + GUESS=powerpc-harris-powermax + ;; + Night_Hawk:*:*:PowerMAX_OS | Synergy:PowerMAX_OS:*:*) + GUESS=powerpc-harris-powermax + ;; + Night_Hawk:Power_UNIX:*:*) + GUESS=powerpc-harris-powerunix + ;; + m88k:CX/UX:7*:*) + GUESS=m88k-harris-cxux7 + ;; + m88k:*:4*:R4*) + GUESS=m88k-motorola-sysv4 + ;; + m88k:*:3*:R3*) + GUESS=m88k-motorola-sysv3 + ;; + AViiON:dgux:*:*) + # DG/UX returns AViiON for all architectures + UNAME_PROCESSOR=`/usr/bin/uname -p` + if test "$UNAME_PROCESSOR" = mc88100 || test "$UNAME_PROCESSOR" = mc88110 + then + if test "$TARGET_BINARY_INTERFACE"x = m88kdguxelfx || \ + test "$TARGET_BINARY_INTERFACE"x = x + then + GUESS=m88k-dg-dgux$UNAME_RELEASE + else + GUESS=m88k-dg-dguxbcs$UNAME_RELEASE + fi + else + GUESS=i586-dg-dgux$UNAME_RELEASE + fi + ;; + M88*:DolphinOS:*:*) # DolphinOS (SVR3) + GUESS=m88k-dolphin-sysv3 + ;; + M88*:*:R3*:*) + # Delta 88k system running SVR3 + GUESS=m88k-motorola-sysv3 + ;; + XD88*:*:*:*) # Tektronix XD88 system running UTekV (SVR3) + GUESS=m88k-tektronix-sysv3 + ;; + Tek43[0-9][0-9]:UTek:*:*) # Tektronix 4300 system running UTek (BSD) + GUESS=m68k-tektronix-bsd + ;; + *:IRIX*:*:*) + IRIX_REL=`echo "$UNAME_RELEASE" | sed -e 's/-/_/g'` + GUESS=mips-sgi-irix$IRIX_REL + ;; + ????????:AIX?:[12].1:2) # AIX 2.2.1 or AIX 2.1.1 is RT/PC AIX. + GUESS=romp-ibm-aix # uname -m gives an 8 hex-code CPU id + ;; # Note that: echo "'`uname -s`'" gives 'AIX ' + i*86:AIX:*:*) + GUESS=i386-ibm-aix + ;; + ia64:AIX:*:*) + if test -x /usr/bin/oslevel ; then + IBM_REV=`/usr/bin/oslevel` + else + IBM_REV=$UNAME_VERSION.$UNAME_RELEASE + fi + GUESS=$UNAME_MACHINE-ibm-aix$IBM_REV + ;; + *:AIX:2:3) + if grep bos325 /usr/include/stdio.h >/dev/null 2>&1; then + set_cc_for_build + sed 's/^ //' << EOF > "$dummy.c" + #include + + main() + { + if (!__power_pc()) + exit(1); + puts("powerpc-ibm-aix3.2.5"); + exit(0); + } +EOF + if $CC_FOR_BUILD -o "$dummy" "$dummy.c" && SYSTEM_NAME=`"$dummy"` + then + GUESS=$SYSTEM_NAME + else + GUESS=rs6000-ibm-aix3.2.5 + fi + elif grep bos324 /usr/include/stdio.h >/dev/null 2>&1; then + GUESS=rs6000-ibm-aix3.2.4 + else + GUESS=rs6000-ibm-aix3.2 + fi + ;; + *:AIX:*:[4567]) + IBM_CPU_ID=`/usr/sbin/lsdev -C -c processor -S available | sed 1q | awk '{ print $1 }'` + if /usr/sbin/lsattr -El "$IBM_CPU_ID" | grep ' POWER' >/dev/null 2>&1; then + IBM_ARCH=rs6000 + else + IBM_ARCH=powerpc + fi + if test -x /usr/bin/lslpp ; then + IBM_REV=`/usr/bin/lslpp -Lqc bos.rte.libc | \ + awk -F: '{ print $3 }' | sed s/[0-9]*$/0/` + else + IBM_REV=$UNAME_VERSION.$UNAME_RELEASE + fi + GUESS=$IBM_ARCH-ibm-aix$IBM_REV + ;; + *:AIX:*:*) + GUESS=rs6000-ibm-aix + ;; + ibmrt:4.4BSD:*|romp-ibm:4.4BSD:*) + GUESS=romp-ibm-bsd4.4 + ;; + ibmrt:*BSD:*|romp-ibm:BSD:*) # covers RT/PC BSD and + GUESS=romp-ibm-bsd$UNAME_RELEASE # 4.3 with uname added to + ;; # report: romp-ibm BSD 4.3 + *:BOSX:*:*) + GUESS=rs6000-bull-bosx + ;; + DPX/2?00:B.O.S.:*:*) + GUESS=m68k-bull-sysv3 + ;; + 9000/[34]??:4.3bsd:1.*:*) + GUESS=m68k-hp-bsd + ;; + hp300:4.4BSD:*:* | 9000/[34]??:4.3bsd:2.*:*) + GUESS=m68k-hp-bsd4.4 + ;; + 9000/[34678]??:HP-UX:*:*) + HPUX_REV=`echo "$UNAME_RELEASE" | sed -e 's/[^.]*.[0B]*//'` + case $UNAME_MACHINE in + 9000/31?) HP_ARCH=m68000 ;; + 9000/[34]??) HP_ARCH=m68k ;; + 9000/[678][0-9][0-9]) + if test -x /usr/bin/getconf; then + sc_cpu_version=`/usr/bin/getconf SC_CPU_VERSION 2>/dev/null` + sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null` + case $sc_cpu_version in + 523) HP_ARCH=hppa1.0 ;; # CPU_PA_RISC1_0 + 528) HP_ARCH=hppa1.1 ;; # CPU_PA_RISC1_1 + 532) # CPU_PA_RISC2_0 + case $sc_kernel_bits in + 32) HP_ARCH=hppa2.0n ;; + 64) HP_ARCH=hppa2.0w ;; + '') HP_ARCH=hppa2.0 ;; # HP-UX 10.20 + esac ;; + esac + fi + if test "$HP_ARCH" = ""; then + set_cc_for_build + sed 's/^ //' << EOF > "$dummy.c" + + #define _HPUX_SOURCE + #include + #include + + int main () + { + #if defined(_SC_KERNEL_BITS) + long bits = sysconf(_SC_KERNEL_BITS); + #endif + long cpu = sysconf (_SC_CPU_VERSION); + + switch (cpu) + { + case CPU_PA_RISC1_0: puts ("hppa1.0"); break; + case CPU_PA_RISC1_1: puts ("hppa1.1"); break; + case CPU_PA_RISC2_0: + #if defined(_SC_KERNEL_BITS) + switch (bits) + { + case 64: puts ("hppa2.0w"); break; + case 32: puts ("hppa2.0n"); break; + default: puts ("hppa2.0"); break; + } break; + #else /* !defined(_SC_KERNEL_BITS) */ + puts ("hppa2.0"); break; + #endif + default: puts ("hppa1.0"); break; + } + exit (0); + } +EOF + (CCOPTS="" $CC_FOR_BUILD -o "$dummy" "$dummy.c" 2>/dev/null) && HP_ARCH=`"$dummy"` + test -z "$HP_ARCH" && HP_ARCH=hppa + fi ;; + esac + if test "$HP_ARCH" = hppa2.0w + then + set_cc_for_build + + # hppa2.0w-hp-hpux* has a 64-bit kernel and a compiler generating + # 32-bit code. hppa64-hp-hpux* has the same kernel and a compiler + # generating 64-bit code. GNU and HP use different nomenclature: + # + # $ CC_FOR_BUILD=cc ./config.guess + # => hppa2.0w-hp-hpux11.23 + # $ CC_FOR_BUILD="cc +DA2.0w" ./config.guess + # => hppa64-hp-hpux11.23 + + if echo __LP64__ | (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | + grep -q __LP64__ + then + HP_ARCH=hppa2.0w + else + HP_ARCH=hppa64 + fi + fi + GUESS=$HP_ARCH-hp-hpux$HPUX_REV + ;; + ia64:HP-UX:*:*) + HPUX_REV=`echo "$UNAME_RELEASE" | sed -e 's/[^.]*.[0B]*//'` + GUESS=ia64-hp-hpux$HPUX_REV + ;; + 3050*:HI-UX:*:*) + set_cc_for_build + sed 's/^ //' << EOF > "$dummy.c" + #include + int + main () + { + long cpu = sysconf (_SC_CPU_VERSION); + /* The order matters, because CPU_IS_HP_MC68K erroneously returns + true for CPU_PA_RISC1_0. CPU_IS_PA_RISC returns correct + results, however. */ + if (CPU_IS_PA_RISC (cpu)) + { + switch (cpu) + { + case CPU_PA_RISC1_0: puts ("hppa1.0-hitachi-hiuxwe2"); break; + case CPU_PA_RISC1_1: puts ("hppa1.1-hitachi-hiuxwe2"); break; + case CPU_PA_RISC2_0: puts ("hppa2.0-hitachi-hiuxwe2"); break; + default: puts ("hppa-hitachi-hiuxwe2"); break; + } + } + else if (CPU_IS_HP_MC68K (cpu)) + puts ("m68k-hitachi-hiuxwe2"); + else puts ("unknown-hitachi-hiuxwe2"); + exit (0); + } +EOF + $CC_FOR_BUILD -o "$dummy" "$dummy.c" && SYSTEM_NAME=`"$dummy"` && + { echo "$SYSTEM_NAME"; exit; } + GUESS=unknown-hitachi-hiuxwe2 + ;; + 9000/7??:4.3bsd:*:* | 9000/8?[79]:4.3bsd:*:*) + GUESS=hppa1.1-hp-bsd + ;; + 9000/8??:4.3bsd:*:*) + GUESS=hppa1.0-hp-bsd + ;; + *9??*:MPE/iX:*:* | *3000*:MPE/iX:*:*) + GUESS=hppa1.0-hp-mpeix + ;; + hp7??:OSF1:*:* | hp8?[79]:OSF1:*:*) + GUESS=hppa1.1-hp-osf + ;; + hp8??:OSF1:*:*) + GUESS=hppa1.0-hp-osf + ;; + i*86:OSF1:*:*) + if test -x /usr/sbin/sysversion ; then + GUESS=$UNAME_MACHINE-unknown-osf1mk + else + GUESS=$UNAME_MACHINE-unknown-osf1 + fi + ;; + parisc*:Lites*:*:*) + GUESS=hppa1.1-hp-lites + ;; + C1*:ConvexOS:*:* | convex:ConvexOS:C1*:*) + GUESS=c1-convex-bsd + ;; + C2*:ConvexOS:*:* | convex:ConvexOS:C2*:*) + if getsysinfo -f scalar_acc + then echo c32-convex-bsd + else echo c2-convex-bsd + fi + exit ;; + C34*:ConvexOS:*:* | convex:ConvexOS:C34*:*) + GUESS=c34-convex-bsd + ;; + C38*:ConvexOS:*:* | convex:ConvexOS:C38*:*) + GUESS=c38-convex-bsd + ;; + C4*:ConvexOS:*:* | convex:ConvexOS:C4*:*) + GUESS=c4-convex-bsd + ;; + CRAY*Y-MP:*:*:*) + CRAY_REL=`echo "$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'` + GUESS=ymp-cray-unicos$CRAY_REL + ;; + CRAY*[A-Z]90:*:*:*) + echo "$UNAME_MACHINE"-cray-unicos"$UNAME_RELEASE" \ + | sed -e 's/CRAY.*\([A-Z]90\)/\1/' \ + -e y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/ \ + -e 's/\.[^.]*$/.X/' + exit ;; + CRAY*TS:*:*:*) + CRAY_REL=`echo "$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'` + GUESS=t90-cray-unicos$CRAY_REL + ;; + CRAY*T3E:*:*:*) + CRAY_REL=`echo "$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'` + GUESS=alphaev5-cray-unicosmk$CRAY_REL + ;; + CRAY*SV1:*:*:*) + CRAY_REL=`echo "$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'` + GUESS=sv1-cray-unicos$CRAY_REL + ;; + *:UNICOS/mp:*:*) + CRAY_REL=`echo "$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'` + GUESS=craynv-cray-unicosmp$CRAY_REL + ;; + F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*) + FUJITSU_PROC=`uname -m | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz` + FUJITSU_SYS=`uname -p | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/\///'` + FUJITSU_REL=`echo "$UNAME_RELEASE" | sed -e 's/ /_/'` + GUESS=${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL} + ;; + 5000:UNIX_System_V:4.*:*) + FUJITSU_SYS=`uname -p | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/\///'` + FUJITSU_REL=`echo "$UNAME_RELEASE" | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/ /_/'` + GUESS=sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL} + ;; + i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*) + GUESS=$UNAME_MACHINE-pc-bsdi$UNAME_RELEASE + ;; + sparc*:BSD/OS:*:*) + GUESS=sparc-unknown-bsdi$UNAME_RELEASE + ;; + *:BSD/OS:*:*) + GUESS=$UNAME_MACHINE-unknown-bsdi$UNAME_RELEASE + ;; + arm:FreeBSD:*:*) + UNAME_PROCESSOR=`uname -p` + set_cc_for_build + if echo __ARM_PCS_VFP | $CC_FOR_BUILD -E - 2>/dev/null \ + | grep -q __ARM_PCS_VFP + then + FREEBSD_REL=`echo "$UNAME_RELEASE" | sed -e 's/[-(].*//'` + GUESS=$UNAME_PROCESSOR-unknown-freebsd$FREEBSD_REL-gnueabi + else + FREEBSD_REL=`echo "$UNAME_RELEASE" | sed -e 's/[-(].*//'` + GUESS=$UNAME_PROCESSOR-unknown-freebsd$FREEBSD_REL-gnueabihf + fi + ;; + *:FreeBSD:*:*) + UNAME_PROCESSOR=`/usr/bin/uname -p` + case $UNAME_PROCESSOR in + amd64) + UNAME_PROCESSOR=x86_64 ;; + i386) + UNAME_PROCESSOR=i586 ;; + esac + FREEBSD_REL=`echo "$UNAME_RELEASE" | sed -e 's/[-(].*//'` + GUESS=$UNAME_PROCESSOR-unknown-freebsd$FREEBSD_REL + ;; + i*:CYGWIN*:*) + GUESS=$UNAME_MACHINE-pc-cygwin + ;; + *:MINGW64*:*) + GUESS=$UNAME_MACHINE-pc-mingw64 + ;; + *:MINGW*:*) + GUESS=$UNAME_MACHINE-pc-mingw32 + ;; + *:MSYS*:*) + GUESS=$UNAME_MACHINE-pc-msys + ;; + i*:PW*:*) + GUESS=$UNAME_MACHINE-pc-pw32 + ;; + *:SerenityOS:*:*) + GUESS=$UNAME_MACHINE-pc-serenity + ;; + *:Interix*:*) + case $UNAME_MACHINE in + x86) + GUESS=i586-pc-interix$UNAME_RELEASE + ;; + authenticamd | genuineintel | EM64T) + GUESS=x86_64-unknown-interix$UNAME_RELEASE + ;; + IA64) + GUESS=ia64-unknown-interix$UNAME_RELEASE + ;; + esac ;; + i*:UWIN*:*) + GUESS=$UNAME_MACHINE-pc-uwin + ;; + amd64:CYGWIN*:*:* | x86_64:CYGWIN*:*:*) + GUESS=x86_64-pc-cygwin + ;; + prep*:SunOS:5.*:*) + SUN_REL=`echo "$UNAME_RELEASE" | sed -e 's/[^.]*//'` + GUESS=powerpcle-unknown-solaris2$SUN_REL + ;; + *:GNU:*:*) + # the GNU system + GNU_ARCH=`echo "$UNAME_MACHINE" | sed -e 's,[-/].*$,,'` + GNU_REL=`echo "$UNAME_RELEASE" | sed -e 's,/.*$,,'` + GUESS=$GNU_ARCH-unknown-$LIBC$GNU_REL + ;; + *:GNU/*:*:*) + # other systems with GNU libc and userland + GNU_SYS=`echo "$UNAME_SYSTEM" | sed 's,^[^/]*/,,' | tr "[:upper:]" "[:lower:]"` + GNU_REL=`echo "$UNAME_RELEASE" | sed -e 's/[-(].*//'` + GUESS=$UNAME_MACHINE-unknown-$GNU_SYS$GNU_REL-$LIBC + ;; + *:Minix:*:*) + GUESS=$UNAME_MACHINE-unknown-minix + ;; + aarch64:Linux:*:*) + GUESS=$UNAME_MACHINE-unknown-linux-$LIBC + ;; + aarch64_be:Linux:*:*) + UNAME_MACHINE=aarch64_be + GUESS=$UNAME_MACHINE-unknown-linux-$LIBC + ;; + alpha:Linux:*:*) + case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' /proc/cpuinfo 2>/dev/null` in + EV5) UNAME_MACHINE=alphaev5 ;; + EV56) UNAME_MACHINE=alphaev56 ;; + PCA56) UNAME_MACHINE=alphapca56 ;; + PCA57) UNAME_MACHINE=alphapca56 ;; + EV6) UNAME_MACHINE=alphaev6 ;; + EV67) UNAME_MACHINE=alphaev67 ;; + EV68*) UNAME_MACHINE=alphaev68 ;; + esac + objdump --private-headers /bin/sh | grep -q ld.so.1 + if test "$?" = 0 ; then LIBC=gnulibc1 ; fi + GUESS=$UNAME_MACHINE-unknown-linux-$LIBC + ;; + arc:Linux:*:* | arceb:Linux:*:* | arc32:Linux:*:* | arc64:Linux:*:*) + GUESS=$UNAME_MACHINE-unknown-linux-$LIBC + ;; + arm*:Linux:*:*) + set_cc_for_build + if echo __ARM_EABI__ | $CC_FOR_BUILD -E - 2>/dev/null \ + | grep -q __ARM_EABI__ + then + GUESS=$UNAME_MACHINE-unknown-linux-$LIBC + else + if echo __ARM_PCS_VFP | $CC_FOR_BUILD -E - 2>/dev/null \ + | grep -q __ARM_PCS_VFP + then + GUESS=$UNAME_MACHINE-unknown-linux-${LIBC}eabi + else + GUESS=$UNAME_MACHINE-unknown-linux-${LIBC}eabihf + fi + fi + ;; + avr32*:Linux:*:*) + GUESS=$UNAME_MACHINE-unknown-linux-$LIBC + ;; + cris:Linux:*:*) + GUESS=$UNAME_MACHINE-axis-linux-$LIBC + ;; + crisv32:Linux:*:*) + GUESS=$UNAME_MACHINE-axis-linux-$LIBC + ;; + e2k:Linux:*:*) + GUESS=$UNAME_MACHINE-unknown-linux-$LIBC + ;; + frv:Linux:*:*) + GUESS=$UNAME_MACHINE-unknown-linux-$LIBC + ;; + hexagon:Linux:*:*) + GUESS=$UNAME_MACHINE-unknown-linux-$LIBC + ;; + i*86:Linux:*:*) + GUESS=$UNAME_MACHINE-pc-linux-$LIBC + ;; + ia64:Linux:*:*) + GUESS=$UNAME_MACHINE-unknown-linux-$LIBC + ;; + k1om:Linux:*:*) + GUESS=$UNAME_MACHINE-unknown-linux-$LIBC + ;; + loongarch32:Linux:*:* | loongarch64:Linux:*:* | loongarchx32:Linux:*:*) + GUESS=$UNAME_MACHINE-unknown-linux-$LIBC + ;; + m32r*:Linux:*:*) + GUESS=$UNAME_MACHINE-unknown-linux-$LIBC + ;; + m68*:Linux:*:*) + GUESS=$UNAME_MACHINE-unknown-linux-$LIBC + ;; + mips:Linux:*:* | mips64:Linux:*:*) + set_cc_for_build + IS_GLIBC=0 + test x"${LIBC}" = xgnu && IS_GLIBC=1 + sed 's/^ //' << EOF > "$dummy.c" + #undef CPU + #undef mips + #undef mipsel + #undef mips64 + #undef mips64el + #if ${IS_GLIBC} && defined(_ABI64) + LIBCABI=gnuabi64 + #else + #if ${IS_GLIBC} && defined(_ABIN32) + LIBCABI=gnuabin32 + #else + LIBCABI=${LIBC} + #endif + #endif + + #if ${IS_GLIBC} && defined(__mips64) && defined(__mips_isa_rev) && __mips_isa_rev>=6 + CPU=mipsisa64r6 + #else + #if ${IS_GLIBC} && !defined(__mips64) && defined(__mips_isa_rev) && __mips_isa_rev>=6 + CPU=mipsisa32r6 + #else + #if defined(__mips64) + CPU=mips64 + #else + CPU=mips + #endif + #endif + #endif + + #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL) + MIPS_ENDIAN=el + #else + #if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB) + MIPS_ENDIAN= + #else + MIPS_ENDIAN= + #endif + #endif +EOF + cc_set_vars=`$CC_FOR_BUILD -E "$dummy.c" 2>/dev/null | grep '^CPU\|^MIPS_ENDIAN\|^LIBCABI'` + eval "$cc_set_vars" + test "x$CPU" != x && { echo "$CPU${MIPS_ENDIAN}-unknown-linux-$LIBCABI"; exit; } + ;; + mips64el:Linux:*:*) + GUESS=$UNAME_MACHINE-unknown-linux-$LIBC + ;; + openrisc*:Linux:*:*) + GUESS=or1k-unknown-linux-$LIBC + ;; + or32:Linux:*:* | or1k*:Linux:*:*) + GUESS=$UNAME_MACHINE-unknown-linux-$LIBC + ;; + padre:Linux:*:*) + GUESS=sparc-unknown-linux-$LIBC + ;; + parisc64:Linux:*:* | hppa64:Linux:*:*) + GUESS=hppa64-unknown-linux-$LIBC + ;; + parisc:Linux:*:* | hppa:Linux:*:*) + # Look for CPU level + case `grep '^cpu[^a-z]*:' /proc/cpuinfo 2>/dev/null | cut -d' ' -f2` in + PA7*) GUESS=hppa1.1-unknown-linux-$LIBC ;; + PA8*) GUESS=hppa2.0-unknown-linux-$LIBC ;; + *) GUESS=hppa-unknown-linux-$LIBC ;; + esac + ;; + ppc64:Linux:*:*) + GUESS=powerpc64-unknown-linux-$LIBC + ;; + ppc:Linux:*:*) + GUESS=powerpc-unknown-linux-$LIBC + ;; + ppc64le:Linux:*:*) + GUESS=powerpc64le-unknown-linux-$LIBC + ;; + ppcle:Linux:*:*) + GUESS=powerpcle-unknown-linux-$LIBC + ;; + riscv32:Linux:*:* | riscv32be:Linux:*:* | riscv64:Linux:*:* | riscv64be:Linux:*:*) + GUESS=$UNAME_MACHINE-unknown-linux-$LIBC + ;; + s390:Linux:*:* | s390x:Linux:*:*) + GUESS=$UNAME_MACHINE-ibm-linux-$LIBC + ;; + sh64*:Linux:*:*) + GUESS=$UNAME_MACHINE-unknown-linux-$LIBC + ;; + sh*:Linux:*:*) + GUESS=$UNAME_MACHINE-unknown-linux-$LIBC + ;; + sparc:Linux:*:* | sparc64:Linux:*:*) + GUESS=$UNAME_MACHINE-unknown-linux-$LIBC + ;; + tile*:Linux:*:*) + GUESS=$UNAME_MACHINE-unknown-linux-$LIBC + ;; + vax:Linux:*:*) + GUESS=$UNAME_MACHINE-dec-linux-$LIBC + ;; + x86_64:Linux:*:*) + set_cc_for_build + LIBCABI=$LIBC + if test "$CC_FOR_BUILD" != no_compiler_found; then + if (echo '#ifdef __ILP32__'; echo IS_X32; echo '#endif') | \ + (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \ + grep IS_X32 >/dev/null + then + LIBCABI=${LIBC}x32 + fi + fi + GUESS=$UNAME_MACHINE-pc-linux-$LIBCABI + ;; + xtensa*:Linux:*:*) + GUESS=$UNAME_MACHINE-unknown-linux-$LIBC + ;; + i*86:DYNIX/ptx:4*:*) + # ptx 4.0 does uname -s correctly, with DYNIX/ptx in there. + # earlier versions are messed up and put the nodename in both + # sysname and nodename. + GUESS=i386-sequent-sysv4 + ;; + i*86:UNIX_SV:4.2MP:2.*) + # Unixware is an offshoot of SVR4, but it has its own version + # number series starting with 2... + # I am not positive that other SVR4 systems won't match this, + # I just have to hope. -- rms. + # Use sysv4.2uw... so that sysv4* matches it. + GUESS=$UNAME_MACHINE-pc-sysv4.2uw$UNAME_VERSION + ;; + i*86:OS/2:*:*) + # If we were able to find `uname', then EMX Unix compatibility + # is probably installed. + GUESS=$UNAME_MACHINE-pc-os2-emx + ;; + i*86:XTS-300:*:STOP) + GUESS=$UNAME_MACHINE-unknown-stop + ;; + i*86:atheos:*:*) + GUESS=$UNAME_MACHINE-unknown-atheos + ;; + i*86:syllable:*:*) + GUESS=$UNAME_MACHINE-pc-syllable + ;; + i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.[02]*:*) + GUESS=i386-unknown-lynxos$UNAME_RELEASE + ;; + i*86:*DOS:*:*) + GUESS=$UNAME_MACHINE-pc-msdosdjgpp + ;; + i*86:*:4.*:*) + UNAME_REL=`echo "$UNAME_RELEASE" | sed 's/\/MP$//'` + if grep Novell /usr/include/link.h >/dev/null 2>/dev/null; then + GUESS=$UNAME_MACHINE-univel-sysv$UNAME_REL + else + GUESS=$UNAME_MACHINE-pc-sysv$UNAME_REL + fi + ;; + i*86:*:5:[678]*) + # UnixWare 7.x, OpenUNIX and OpenServer 6. + case `/bin/uname -X | grep "^Machine"` in + *486*) UNAME_MACHINE=i486 ;; + *Pentium) UNAME_MACHINE=i586 ;; + *Pent*|*Celeron) UNAME_MACHINE=i686 ;; + esac + GUESS=$UNAME_MACHINE-unknown-sysv${UNAME_RELEASE}${UNAME_SYSTEM}${UNAME_VERSION} + ;; + i*86:*:3.2:*) + if test -f /usr/options/cb.name; then + UNAME_REL=`sed -n 's/.*Version //p' /dev/null >/dev/null ; then + UNAME_REL=`(/bin/uname -X|grep Release|sed -e 's/.*= //')` + (/bin/uname -X|grep i80486 >/dev/null) && UNAME_MACHINE=i486 + (/bin/uname -X|grep '^Machine.*Pentium' >/dev/null) \ + && UNAME_MACHINE=i586 + (/bin/uname -X|grep '^Machine.*Pent *II' >/dev/null) \ + && UNAME_MACHINE=i686 + (/bin/uname -X|grep '^Machine.*Pentium Pro' >/dev/null) \ + && UNAME_MACHINE=i686 + GUESS=$UNAME_MACHINE-pc-sco$UNAME_REL + else + GUESS=$UNAME_MACHINE-pc-sysv32 + fi + ;; + pc:*:*:*) + # Left here for compatibility: + # uname -m prints for DJGPP always 'pc', but it prints nothing about + # the processor, so we play safe by assuming i586. + # Note: whatever this is, it MUST be the same as what config.sub + # prints for the "djgpp" host, or else GDB configure will decide that + # this is a cross-build. + GUESS=i586-pc-msdosdjgpp + ;; + Intel:Mach:3*:*) + GUESS=i386-pc-mach3 + ;; + paragon:*:*:*) + GUESS=i860-intel-osf1 + ;; + i860:*:4.*:*) # i860-SVR4 + if grep Stardent /usr/include/sys/uadmin.h >/dev/null 2>&1 ; then + GUESS=i860-stardent-sysv$UNAME_RELEASE # Stardent Vistra i860-SVR4 + else # Add other i860-SVR4 vendors below as they are discovered. + GUESS=i860-unknown-sysv$UNAME_RELEASE # Unknown i860-SVR4 + fi + ;; + mini*:CTIX:SYS*5:*) + # "miniframe" + GUESS=m68010-convergent-sysv + ;; + mc68k:UNIX:SYSTEM5:3.51m) + GUESS=m68k-convergent-sysv + ;; + M680?0:D-NIX:5.3:*) + GUESS=m68k-diab-dnix + ;; + M68*:*:R3V[5678]*:*) + test -r /sysV68 && { echo 'm68k-motorola-sysv'; exit; } ;; + 3[345]??:*:4.0:3.0 | 3[34]??A:*:4.0:3.0 | 3[34]??,*:*:4.0:3.0 | 3[34]??/*:*:4.0:3.0 | 4400:*:4.0:3.0 | 4850:*:4.0:3.0 | SKA40:*:4.0:3.0 | SDS2:*:4.0:3.0 | SHG2:*:4.0:3.0 | S7501*:*:4.0:3.0) + OS_REL='' + test -r /etc/.relid \ + && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid` + /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ + && { echo i486-ncr-sysv4.3"$OS_REL"; exit; } + /bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \ + && { echo i586-ncr-sysv4.3"$OS_REL"; exit; } ;; + 3[34]??:*:4.0:* | 3[34]??,*:*:4.0:*) + /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ + && { echo i486-ncr-sysv4; exit; } ;; + NCR*:*:4.2:* | MPRAS*:*:4.2:*) + OS_REL='.3' + test -r /etc/.relid \ + && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid` + /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ + && { echo i486-ncr-sysv4.3"$OS_REL"; exit; } + /bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \ + && { echo i586-ncr-sysv4.3"$OS_REL"; exit; } + /bin/uname -p 2>/dev/null | /bin/grep pteron >/dev/null \ + && { echo i586-ncr-sysv4.3"$OS_REL"; exit; } ;; + m68*:LynxOS:2.*:* | m68*:LynxOS:3.0*:*) + GUESS=m68k-unknown-lynxos$UNAME_RELEASE + ;; + mc68030:UNIX_System_V:4.*:*) + GUESS=m68k-atari-sysv4 + ;; + TSUNAMI:LynxOS:2.*:*) + GUESS=sparc-unknown-lynxos$UNAME_RELEASE + ;; + rs6000:LynxOS:2.*:*) + GUESS=rs6000-unknown-lynxos$UNAME_RELEASE + ;; + PowerPC:LynxOS:2.*:* | PowerPC:LynxOS:3.[01]*:* | PowerPC:LynxOS:4.[02]*:*) + GUESS=powerpc-unknown-lynxos$UNAME_RELEASE + ;; + SM[BE]S:UNIX_SV:*:*) + GUESS=mips-dde-sysv$UNAME_RELEASE + ;; + RM*:ReliantUNIX-*:*:*) + GUESS=mips-sni-sysv4 + ;; + RM*:SINIX-*:*:*) + GUESS=mips-sni-sysv4 + ;; + *:SINIX-*:*:*) + if uname -p 2>/dev/null >/dev/null ; then + UNAME_MACHINE=`(uname -p) 2>/dev/null` + GUESS=$UNAME_MACHINE-sni-sysv4 + else + GUESS=ns32k-sni-sysv + fi + ;; + PENTIUM:*:4.0*:*) # Unisys `ClearPath HMP IX 4000' SVR4/MP effort + # says + GUESS=i586-unisys-sysv4 + ;; + *:UNIX_System_V:4*:FTX*) + # From Gerald Hewes . + # How about differentiating between stratus architectures? -djm + GUESS=hppa1.1-stratus-sysv4 + ;; + *:*:*:FTX*) + # From seanf@swdc.stratus.com. + GUESS=i860-stratus-sysv4 + ;; + i*86:VOS:*:*) + # From Paul.Green@stratus.com. + GUESS=$UNAME_MACHINE-stratus-vos + ;; + *:VOS:*:*) + # From Paul.Green@stratus.com. + GUESS=hppa1.1-stratus-vos + ;; + mc68*:A/UX:*:*) + GUESS=m68k-apple-aux$UNAME_RELEASE + ;; + news*:NEWS-OS:6*:*) + GUESS=mips-sony-newsos6 + ;; + R[34]000:*System_V*:*:* | R4000:UNIX_SYSV:*:* | R*000:UNIX_SV:*:*) + if test -d /usr/nec; then + GUESS=mips-nec-sysv$UNAME_RELEASE + else + GUESS=mips-unknown-sysv$UNAME_RELEASE + fi + ;; + BeBox:BeOS:*:*) # BeOS running on hardware made by Be, PPC only. + GUESS=powerpc-be-beos + ;; + BeMac:BeOS:*:*) # BeOS running on Mac or Mac clone, PPC only. + GUESS=powerpc-apple-beos + ;; + BePC:BeOS:*:*) # BeOS running on Intel PC compatible. + GUESS=i586-pc-beos + ;; + BePC:Haiku:*:*) # Haiku running on Intel PC compatible. + GUESS=i586-pc-haiku + ;; + x86_64:Haiku:*:*) + GUESS=x86_64-unknown-haiku + ;; + SX-4:SUPER-UX:*:*) + GUESS=sx4-nec-superux$UNAME_RELEASE + ;; + SX-5:SUPER-UX:*:*) + GUESS=sx5-nec-superux$UNAME_RELEASE + ;; + SX-6:SUPER-UX:*:*) + GUESS=sx6-nec-superux$UNAME_RELEASE + ;; + SX-7:SUPER-UX:*:*) + GUESS=sx7-nec-superux$UNAME_RELEASE + ;; + SX-8:SUPER-UX:*:*) + GUESS=sx8-nec-superux$UNAME_RELEASE + ;; + SX-8R:SUPER-UX:*:*) + GUESS=sx8r-nec-superux$UNAME_RELEASE + ;; + SX-ACE:SUPER-UX:*:*) + GUESS=sxace-nec-superux$UNAME_RELEASE + ;; + Power*:Rhapsody:*:*) + GUESS=powerpc-apple-rhapsody$UNAME_RELEASE + ;; + *:Rhapsody:*:*) + GUESS=$UNAME_MACHINE-apple-rhapsody$UNAME_RELEASE + ;; + arm64:Darwin:*:*) + GUESS=aarch64-apple-darwin$UNAME_RELEASE + ;; + *:Darwin:*:*) + UNAME_PROCESSOR=`uname -p` + case $UNAME_PROCESSOR in + unknown) UNAME_PROCESSOR=powerpc ;; + esac + if command -v xcode-select > /dev/null 2> /dev/null && \ + ! xcode-select --print-path > /dev/null 2> /dev/null ; then + # Avoid executing cc if there is no toolchain installed as + # cc will be a stub that puts up a graphical alert + # prompting the user to install developer tools. + CC_FOR_BUILD=no_compiler_found + else + set_cc_for_build + fi + if test "$CC_FOR_BUILD" != no_compiler_found; then + if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \ + (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \ + grep IS_64BIT_ARCH >/dev/null + then + case $UNAME_PROCESSOR in + i386) UNAME_PROCESSOR=x86_64 ;; + powerpc) UNAME_PROCESSOR=powerpc64 ;; + esac + fi + # On 10.4-10.6 one might compile for PowerPC via gcc -arch ppc + if (echo '#ifdef __POWERPC__'; echo IS_PPC; echo '#endif') | \ + (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \ + grep IS_PPC >/dev/null + then + UNAME_PROCESSOR=powerpc + fi + elif test "$UNAME_PROCESSOR" = i386 ; then + # uname -m returns i386 or x86_64 + UNAME_PROCESSOR=$UNAME_MACHINE + fi + GUESS=$UNAME_PROCESSOR-apple-darwin$UNAME_RELEASE + ;; + *:procnto*:*:* | *:QNX:[0123456789]*:*) + UNAME_PROCESSOR=`uname -p` + if test "$UNAME_PROCESSOR" = x86; then + UNAME_PROCESSOR=i386 + UNAME_MACHINE=pc + fi + GUESS=$UNAME_PROCESSOR-$UNAME_MACHINE-nto-qnx$UNAME_RELEASE + ;; + *:QNX:*:4*) + GUESS=i386-pc-qnx + ;; + NEO-*:NONSTOP_KERNEL:*:*) + GUESS=neo-tandem-nsk$UNAME_RELEASE + ;; + NSE-*:NONSTOP_KERNEL:*:*) + GUESS=nse-tandem-nsk$UNAME_RELEASE + ;; + NSR-*:NONSTOP_KERNEL:*:*) + GUESS=nsr-tandem-nsk$UNAME_RELEASE + ;; + NSV-*:NONSTOP_KERNEL:*:*) + GUESS=nsv-tandem-nsk$UNAME_RELEASE + ;; + NSX-*:NONSTOP_KERNEL:*:*) + GUESS=nsx-tandem-nsk$UNAME_RELEASE + ;; + *:NonStop-UX:*:*) + GUESS=mips-compaq-nonstopux + ;; + BS2000:POSIX*:*:*) + GUESS=bs2000-siemens-sysv + ;; + DS/*:UNIX_System_V:*:*) + GUESS=$UNAME_MACHINE-$UNAME_SYSTEM-$UNAME_RELEASE + ;; + *:Plan9:*:*) + # "uname -m" is not consistent, so use $cputype instead. 386 + # is converted to i386 for consistency with other x86 + # operating systems. + if test "${cputype-}" = 386; then + UNAME_MACHINE=i386 + elif test "x${cputype-}" != x; then + UNAME_MACHINE=$cputype + fi + GUESS=$UNAME_MACHINE-unknown-plan9 + ;; + *:TOPS-10:*:*) + GUESS=pdp10-unknown-tops10 + ;; + *:TENEX:*:*) + GUESS=pdp10-unknown-tenex + ;; + KS10:TOPS-20:*:* | KL10:TOPS-20:*:* | TYPE4:TOPS-20:*:*) + GUESS=pdp10-dec-tops20 + ;; + XKL-1:TOPS-20:*:* | TYPE5:TOPS-20:*:*) + GUESS=pdp10-xkl-tops20 + ;; + *:TOPS-20:*:*) + GUESS=pdp10-unknown-tops20 + ;; + *:ITS:*:*) + GUESS=pdp10-unknown-its + ;; + SEI:*:*:SEIUX) + GUESS=mips-sei-seiux$UNAME_RELEASE + ;; + *:DragonFly:*:*) + DRAGONFLY_REL=`echo "$UNAME_RELEASE" | sed -e 's/[-(].*//'` + GUESS=$UNAME_MACHINE-unknown-dragonfly$DRAGONFLY_REL + ;; + *:*VMS:*:*) + UNAME_MACHINE=`(uname -p) 2>/dev/null` + case $UNAME_MACHINE in + A*) GUESS=alpha-dec-vms ;; + I*) GUESS=ia64-dec-vms ;; + V*) GUESS=vax-dec-vms ;; + esac ;; + *:XENIX:*:SysV) + GUESS=i386-pc-xenix + ;; + i*86:skyos:*:*) + SKYOS_REL=`echo "$UNAME_RELEASE" | sed -e 's/ .*$//'` + GUESS=$UNAME_MACHINE-pc-skyos$SKYOS_REL + ;; + i*86:rdos:*:*) + GUESS=$UNAME_MACHINE-pc-rdos + ;; + i*86:Fiwix:*:*) + GUESS=$UNAME_MACHINE-pc-fiwix + ;; + *:AROS:*:*) + GUESS=$UNAME_MACHINE-unknown-aros + ;; + x86_64:VMkernel:*:*) + GUESS=$UNAME_MACHINE-unknown-esx + ;; + amd64:Isilon\ OneFS:*:*) + GUESS=x86_64-unknown-onefs + ;; + *:Unleashed:*:*) + GUESS=$UNAME_MACHINE-unknown-unleashed$UNAME_RELEASE + ;; +esac + +# Do we have a guess based on uname results? +if test "x$GUESS" != x; then + echo "$GUESS" + exit +fi + +# No uname command or uname output not recognized. +set_cc_for_build +cat > "$dummy.c" < +#include +#endif +#if defined(ultrix) || defined(_ultrix) || defined(__ultrix) || defined(__ultrix__) +#if defined (vax) || defined (__vax) || defined (__vax__) || defined(mips) || defined(__mips) || defined(__mips__) || defined(MIPS) || defined(__MIPS__) +#include +#if defined(_SIZE_T_) || defined(SIGLOST) +#include +#endif +#endif +#endif +main () +{ +#if defined (sony) +#if defined (MIPSEB) + /* BFD wants "bsd" instead of "newsos". Perhaps BFD should be changed, + I don't know.... */ + printf ("mips-sony-bsd\n"); exit (0); +#else +#include + printf ("m68k-sony-newsos%s\n", +#ifdef NEWSOS4 + "4" +#else + "" +#endif + ); exit (0); +#endif +#endif + +#if defined (NeXT) +#if !defined (__ARCHITECTURE__) +#define __ARCHITECTURE__ "m68k" +#endif + int version; + version=`(hostinfo | sed -n 's/.*NeXT Mach \([0-9]*\).*/\1/p') 2>/dev/null`; + if (version < 4) + printf ("%s-next-nextstep%d\n", __ARCHITECTURE__, version); + else + printf ("%s-next-openstep%d\n", __ARCHITECTURE__, version); + exit (0); +#endif + +#if defined (MULTIMAX) || defined (n16) +#if defined (UMAXV) + printf ("ns32k-encore-sysv\n"); exit (0); +#else +#if defined (CMU) + printf ("ns32k-encore-mach\n"); exit (0); +#else + printf ("ns32k-encore-bsd\n"); exit (0); +#endif +#endif +#endif + +#if defined (__386BSD__) + printf ("i386-pc-bsd\n"); exit (0); +#endif + +#if defined (sequent) +#if defined (i386) + printf ("i386-sequent-dynix\n"); exit (0); +#endif +#if defined (ns32000) + printf ("ns32k-sequent-dynix\n"); exit (0); +#endif +#endif + +#if defined (_SEQUENT_) + struct utsname un; + + uname(&un); + if (strncmp(un.version, "V2", 2) == 0) { + printf ("i386-sequent-ptx2\n"); exit (0); + } + if (strncmp(un.version, "V1", 2) == 0) { /* XXX is V1 correct? */ + printf ("i386-sequent-ptx1\n"); exit (0); + } + printf ("i386-sequent-ptx\n"); exit (0); +#endif + +#if defined (vax) +#if !defined (ultrix) +#include +#if defined (BSD) +#if BSD == 43 + printf ("vax-dec-bsd4.3\n"); exit (0); +#else +#if BSD == 199006 + printf ("vax-dec-bsd4.3reno\n"); exit (0); +#else + printf ("vax-dec-bsd\n"); exit (0); +#endif +#endif +#else + printf ("vax-dec-bsd\n"); exit (0); +#endif +#else +#if defined(_SIZE_T_) || defined(SIGLOST) + struct utsname un; + uname (&un); + printf ("vax-dec-ultrix%s\n", un.release); exit (0); +#else + printf ("vax-dec-ultrix\n"); exit (0); +#endif +#endif +#endif +#if defined(ultrix) || defined(_ultrix) || defined(__ultrix) || defined(__ultrix__) +#if defined(mips) || defined(__mips) || defined(__mips__) || defined(MIPS) || defined(__MIPS__) +#if defined(_SIZE_T_) || defined(SIGLOST) + struct utsname *un; + uname (&un); + printf ("mips-dec-ultrix%s\n", un.release); exit (0); +#else + printf ("mips-dec-ultrix\n"); exit (0); +#endif +#endif +#endif + +#if defined (alliant) && defined (i860) + printf ("i860-alliant-bsd\n"); exit (0); +#endif + + exit (1); +} +EOF + +$CC_FOR_BUILD -o "$dummy" "$dummy.c" 2>/dev/null && SYSTEM_NAME=`"$dummy"` && + { echo "$SYSTEM_NAME"; exit; } + +# Apollos put the system type in the environment. +test -d /usr/apollo && { echo "$ISP-apollo-$SYSTYPE"; exit; } + +echo "$0: unable to guess system type" >&2 + +case $UNAME_MACHINE:$UNAME_SYSTEM in + mips:Linux | mips64:Linux) + # If we got here on MIPS GNU/Linux, output extra information. + cat >&2 <&2 <&2 </dev/null || echo unknown` +uname -r = `(uname -r) 2>/dev/null || echo unknown` +uname -s = `(uname -s) 2>/dev/null || echo unknown` +uname -v = `(uname -v) 2>/dev/null || echo unknown` + +/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null` +/bin/uname -X = `(/bin/uname -X) 2>/dev/null` + +hostinfo = `(hostinfo) 2>/dev/null` +/bin/universe = `(/bin/universe) 2>/dev/null` +/usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null` +/bin/arch = `(/bin/arch) 2>/dev/null` +/usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null` +/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null` + +UNAME_MACHINE = "$UNAME_MACHINE" +UNAME_RELEASE = "$UNAME_RELEASE" +UNAME_SYSTEM = "$UNAME_SYSTEM" +UNAME_VERSION = "$UNAME_VERSION" +EOF +fi + +exit 1 + +# Local variables: +# eval: (add-hook 'before-save-hook 'time-stamp) +# time-stamp-start: "timestamp='" +# time-stamp-format: "%:y-%02m-%02d" +# time-stamp-end: "'" +# End: diff --git a/build-aux/config.sub b/build-aux/config.sub new file mode 100755 index 0000000..dba16e8 --- /dev/null +++ b/build-aux/config.sub @@ -0,0 +1,1890 @@ +#! /bin/sh +# Configuration validation subroutine script. +# Copyright 1992-2022 Free Software Foundation, Inc. + +# shellcheck disable=SC2006,SC2268 # see below for rationale + +timestamp='2022-01-03' + +# This file is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, see . +# +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that +# program. This Exception is an additional permission under section 7 +# of the GNU General Public License, version 3 ("GPLv3"). + + +# Please send patches to . +# +# Configuration subroutine to validate and canonicalize a configuration type. +# Supply the specified configuration type as an argument. +# If it is invalid, we print an error message on stderr and exit with code 1. +# Otherwise, we print the canonical config type on stdout and succeed. + +# You can get the latest version of this script from: +# https://git.savannah.gnu.org/cgit/config.git/plain/config.sub + +# This file is supposed to be the same for all GNU packages +# and recognize all the CPU types, system types and aliases +# that are meaningful with *any* GNU software. +# Each package is responsible for reporting which valid configurations +# it does not support. The user should be able to distinguish +# a failure to support a valid configuration from a meaningless +# configuration. + +# The goal of this file is to map all the various variations of a given +# machine specification into a single specification in the form: +# CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM +# or in some cases, the newer four-part form: +# CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM +# It is wrong to echo any other type of specification. + +# The "shellcheck disable" line above the timestamp inhibits complaints +# about features and limitations of the classic Bourne shell that were +# superseded or lifted in POSIX. However, this script identifies a wide +# variety of pre-POSIX systems that do not have POSIX shells at all, and +# even some reasonably current systems (Solaris 10 as case-in-point) still +# have a pre-POSIX /bin/sh. + +me=`echo "$0" | sed -e 's,.*/,,'` + +usage="\ +Usage: $0 [OPTION] CPU-MFR-OPSYS or ALIAS + +Canonicalize a configuration name. + +Options: + -h, --help print this help, then exit + -t, --time-stamp print date of last modification, then exit + -v, --version print version number, then exit + +Report bugs and patches to ." + +version="\ +GNU config.sub ($timestamp) + +Copyright 1992-2022 Free Software Foundation, Inc. + +This is free software; see the source for copying conditions. There is NO +warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." + +help=" +Try \`$me --help' for more information." + +# Parse command line +while test $# -gt 0 ; do + case $1 in + --time-stamp | --time* | -t ) + echo "$timestamp" ; exit ;; + --version | -v ) + echo "$version" ; exit ;; + --help | --h* | -h ) + echo "$usage"; exit ;; + -- ) # Stop option processing + shift; break ;; + - ) # Use stdin as input. + break ;; + -* ) + echo "$me: invalid option $1$help" >&2 + exit 1 ;; + + *local*) + # First pass through any local machine types. + echo "$1" + exit ;; + + * ) + break ;; + esac +done + +case $# in + 0) echo "$me: missing argument$help" >&2 + exit 1;; + 1) ;; + *) echo "$me: too many arguments$help" >&2 + exit 1;; +esac + +# Split fields of configuration type +# shellcheck disable=SC2162 +saved_IFS=$IFS +IFS="-" read field1 field2 field3 field4 <&2 + exit 1 + ;; + *-*-*-*) + basic_machine=$field1-$field2 + basic_os=$field3-$field4 + ;; + *-*-*) + # Ambiguous whether COMPANY is present, or skipped and KERNEL-OS is two + # parts + maybe_os=$field2-$field3 + case $maybe_os in + nto-qnx* | linux-* | uclinux-uclibc* \ + | uclinux-gnu* | kfreebsd*-gnu* | knetbsd*-gnu* | netbsd*-gnu* \ + | netbsd*-eabi* | kopensolaris*-gnu* | cloudabi*-eabi* \ + | storm-chaos* | os2-emx* | rtmk-nova*) + basic_machine=$field1 + basic_os=$maybe_os + ;; + android-linux) + basic_machine=$field1-unknown + basic_os=linux-android + ;; + *) + basic_machine=$field1-$field2 + basic_os=$field3 + ;; + esac + ;; + *-*) + # A lone config we happen to match not fitting any pattern + case $field1-$field2 in + decstation-3100) + basic_machine=mips-dec + basic_os= + ;; + *-*) + # Second component is usually, but not always the OS + case $field2 in + # Prevent following clause from handling this valid os + sun*os*) + basic_machine=$field1 + basic_os=$field2 + ;; + zephyr*) + basic_machine=$field1-unknown + basic_os=$field2 + ;; + # Manufacturers + dec* | mips* | sequent* | encore* | pc533* | sgi* | sony* \ + | att* | 7300* | 3300* | delta* | motorola* | sun[234]* \ + | unicom* | ibm* | next | hp | isi* | apollo | altos* \ + | convergent* | ncr* | news | 32* | 3600* | 3100* \ + | hitachi* | c[123]* | convex* | sun | crds | omron* | dg \ + | ultra | tti* | harris | dolphin | highlevel | gould \ + | cbm | ns | masscomp | apple | axis | knuth | cray \ + | microblaze* | sim | cisco \ + | oki | wec | wrs | winbond) + basic_machine=$field1-$field2 + basic_os= + ;; + *) + basic_machine=$field1 + basic_os=$field2 + ;; + esac + ;; + esac + ;; + *) + # Convert single-component short-hands not valid as part of + # multi-component configurations. + case $field1 in + 386bsd) + basic_machine=i386-pc + basic_os=bsd + ;; + a29khif) + basic_machine=a29k-amd + basic_os=udi + ;; + adobe68k) + basic_machine=m68010-adobe + basic_os=scout + ;; + alliant) + basic_machine=fx80-alliant + basic_os= + ;; + altos | altos3068) + basic_machine=m68k-altos + basic_os= + ;; + am29k) + basic_machine=a29k-none + basic_os=bsd + ;; + amdahl) + basic_machine=580-amdahl + basic_os=sysv + ;; + amiga) + basic_machine=m68k-unknown + basic_os= + ;; + amigaos | amigados) + basic_machine=m68k-unknown + basic_os=amigaos + ;; + amigaunix | amix) + basic_machine=m68k-unknown + basic_os=sysv4 + ;; + apollo68) + basic_machine=m68k-apollo + basic_os=sysv + ;; + apollo68bsd) + basic_machine=m68k-apollo + basic_os=bsd + ;; + aros) + basic_machine=i386-pc + basic_os=aros + ;; + aux) + basic_machine=m68k-apple + basic_os=aux + ;; + balance) + basic_machine=ns32k-sequent + basic_os=dynix + ;; + blackfin) + basic_machine=bfin-unknown + basic_os=linux + ;; + cegcc) + basic_machine=arm-unknown + basic_os=cegcc + ;; + convex-c1) + basic_machine=c1-convex + basic_os=bsd + ;; + convex-c2) + basic_machine=c2-convex + basic_os=bsd + ;; + convex-c32) + basic_machine=c32-convex + basic_os=bsd + ;; + convex-c34) + basic_machine=c34-convex + basic_os=bsd + ;; + convex-c38) + basic_machine=c38-convex + basic_os=bsd + ;; + cray) + basic_machine=j90-cray + basic_os=unicos + ;; + crds | unos) + basic_machine=m68k-crds + basic_os= + ;; + da30) + basic_machine=m68k-da30 + basic_os= + ;; + decstation | pmax | pmin | dec3100 | decstatn) + basic_machine=mips-dec + basic_os= + ;; + delta88) + basic_machine=m88k-motorola + basic_os=sysv3 + ;; + dicos) + basic_machine=i686-pc + basic_os=dicos + ;; + djgpp) + basic_machine=i586-pc + basic_os=msdosdjgpp + ;; + ebmon29k) + basic_machine=a29k-amd + basic_os=ebmon + ;; + es1800 | OSE68k | ose68k | ose | OSE) + basic_machine=m68k-ericsson + basic_os=ose + ;; + gmicro) + basic_machine=tron-gmicro + basic_os=sysv + ;; + go32) + basic_machine=i386-pc + basic_os=go32 + ;; + h8300hms) + basic_machine=h8300-hitachi + basic_os=hms + ;; + h8300xray) + basic_machine=h8300-hitachi + basic_os=xray + ;; + h8500hms) + basic_machine=h8500-hitachi + basic_os=hms + ;; + harris) + basic_machine=m88k-harris + basic_os=sysv3 + ;; + hp300 | hp300hpux) + basic_machine=m68k-hp + basic_os=hpux + ;; + hp300bsd) + basic_machine=m68k-hp + basic_os=bsd + ;; + hppaosf) + basic_machine=hppa1.1-hp + basic_os=osf + ;; + hppro) + basic_machine=hppa1.1-hp + basic_os=proelf + ;; + i386mach) + basic_machine=i386-mach + basic_os=mach + ;; + isi68 | isi) + basic_machine=m68k-isi + basic_os=sysv + ;; + m68knommu) + basic_machine=m68k-unknown + basic_os=linux + ;; + magnum | m3230) + basic_machine=mips-mips + basic_os=sysv + ;; + merlin) + basic_machine=ns32k-utek + basic_os=sysv + ;; + mingw64) + basic_machine=x86_64-pc + basic_os=mingw64 + ;; + mingw32) + basic_machine=i686-pc + basic_os=mingw32 + ;; + mingw32ce) + basic_machine=arm-unknown + basic_os=mingw32ce + ;; + monitor) + basic_machine=m68k-rom68k + basic_os=coff + ;; + morphos) + basic_machine=powerpc-unknown + basic_os=morphos + ;; + moxiebox) + basic_machine=moxie-unknown + basic_os=moxiebox + ;; + msdos) + basic_machine=i386-pc + basic_os=msdos + ;; + msys) + basic_machine=i686-pc + basic_os=msys + ;; + mvs) + basic_machine=i370-ibm + basic_os=mvs + ;; + nacl) + basic_machine=le32-unknown + basic_os=nacl + ;; + ncr3000) + basic_machine=i486-ncr + basic_os=sysv4 + ;; + netbsd386) + basic_machine=i386-pc + basic_os=netbsd + ;; + netwinder) + basic_machine=armv4l-rebel + basic_os=linux + ;; + news | news700 | news800 | news900) + basic_machine=m68k-sony + basic_os=newsos + ;; + news1000) + basic_machine=m68030-sony + basic_os=newsos + ;; + necv70) + basic_machine=v70-nec + basic_os=sysv + ;; + nh3000) + basic_machine=m68k-harris + basic_os=cxux + ;; + nh[45]000) + basic_machine=m88k-harris + basic_os=cxux + ;; + nindy960) + basic_machine=i960-intel + basic_os=nindy + ;; + mon960) + basic_machine=i960-intel + basic_os=mon960 + ;; + nonstopux) + basic_machine=mips-compaq + basic_os=nonstopux + ;; + os400) + basic_machine=powerpc-ibm + basic_os=os400 + ;; + OSE68000 | ose68000) + basic_machine=m68000-ericsson + basic_os=ose + ;; + os68k) + basic_machine=m68k-none + basic_os=os68k + ;; + paragon) + basic_machine=i860-intel + basic_os=osf + ;; + parisc) + basic_machine=hppa-unknown + basic_os=linux + ;; + psp) + basic_machine=mipsallegrexel-sony + basic_os=psp + ;; + pw32) + basic_machine=i586-unknown + basic_os=pw32 + ;; + rdos | rdos64) + basic_machine=x86_64-pc + basic_os=rdos + ;; + rdos32) + basic_machine=i386-pc + basic_os=rdos + ;; + rom68k) + basic_machine=m68k-rom68k + basic_os=coff + ;; + sa29200) + basic_machine=a29k-amd + basic_os=udi + ;; + sei) + basic_machine=mips-sei + basic_os=seiux + ;; + sequent) + basic_machine=i386-sequent + basic_os= + ;; + sps7) + basic_machine=m68k-bull + basic_os=sysv2 + ;; + st2000) + basic_machine=m68k-tandem + basic_os= + ;; + stratus) + basic_machine=i860-stratus + basic_os=sysv4 + ;; + sun2) + basic_machine=m68000-sun + basic_os= + ;; + sun2os3) + basic_machine=m68000-sun + basic_os=sunos3 + ;; + sun2os4) + basic_machine=m68000-sun + basic_os=sunos4 + ;; + sun3) + basic_machine=m68k-sun + basic_os= + ;; + sun3os3) + basic_machine=m68k-sun + basic_os=sunos3 + ;; + sun3os4) + basic_machine=m68k-sun + basic_os=sunos4 + ;; + sun4) + basic_machine=sparc-sun + basic_os= + ;; + sun4os3) + basic_machine=sparc-sun + basic_os=sunos3 + ;; + sun4os4) + basic_machine=sparc-sun + basic_os=sunos4 + ;; + sun4sol2) + basic_machine=sparc-sun + basic_os=solaris2 + ;; + sun386 | sun386i | roadrunner) + basic_machine=i386-sun + basic_os= + ;; + sv1) + basic_machine=sv1-cray + basic_os=unicos + ;; + symmetry) + basic_machine=i386-sequent + basic_os=dynix + ;; + t3e) + basic_machine=alphaev5-cray + basic_os=unicos + ;; + t90) + basic_machine=t90-cray + basic_os=unicos + ;; + toad1) + basic_machine=pdp10-xkl + basic_os=tops20 + ;; + tpf) + basic_machine=s390x-ibm + basic_os=tpf + ;; + udi29k) + basic_machine=a29k-amd + basic_os=udi + ;; + ultra3) + basic_machine=a29k-nyu + basic_os=sym1 + ;; + v810 | necv810) + basic_machine=v810-nec + basic_os=none + ;; + vaxv) + basic_machine=vax-dec + basic_os=sysv + ;; + vms) + basic_machine=vax-dec + basic_os=vms + ;; + vsta) + basic_machine=i386-pc + basic_os=vsta + ;; + vxworks960) + basic_machine=i960-wrs + basic_os=vxworks + ;; + vxworks68) + basic_machine=m68k-wrs + basic_os=vxworks + ;; + vxworks29k) + basic_machine=a29k-wrs + basic_os=vxworks + ;; + xbox) + basic_machine=i686-pc + basic_os=mingw32 + ;; + ymp) + basic_machine=ymp-cray + basic_os=unicos + ;; + *) + basic_machine=$1 + basic_os= + ;; + esac + ;; +esac + +# Decode 1-component or ad-hoc basic machines +case $basic_machine in + # Here we handle the default manufacturer of certain CPU types. It is in + # some cases the only manufacturer, in others, it is the most popular. + w89k) + cpu=hppa1.1 + vendor=winbond + ;; + op50n) + cpu=hppa1.1 + vendor=oki + ;; + op60c) + cpu=hppa1.1 + vendor=oki + ;; + ibm*) + cpu=i370 + vendor=ibm + ;; + orion105) + cpu=clipper + vendor=highlevel + ;; + mac | mpw | mac-mpw) + cpu=m68k + vendor=apple + ;; + pmac | pmac-mpw) + cpu=powerpc + vendor=apple + ;; + + # Recognize the various machine names and aliases which stand + # for a CPU type and a company and sometimes even an OS. + 3b1 | 7300 | 7300-att | att-7300 | pc7300 | safari | unixpc) + cpu=m68000 + vendor=att + ;; + 3b*) + cpu=we32k + vendor=att + ;; + bluegene*) + cpu=powerpc + vendor=ibm + basic_os=cnk + ;; + decsystem10* | dec10*) + cpu=pdp10 + vendor=dec + basic_os=tops10 + ;; + decsystem20* | dec20*) + cpu=pdp10 + vendor=dec + basic_os=tops20 + ;; + delta | 3300 | motorola-3300 | motorola-delta \ + | 3300-motorola | delta-motorola) + cpu=m68k + vendor=motorola + ;; + dpx2*) + cpu=m68k + vendor=bull + basic_os=sysv3 + ;; + encore | umax | mmax) + cpu=ns32k + vendor=encore + ;; + elxsi) + cpu=elxsi + vendor=elxsi + basic_os=${basic_os:-bsd} + ;; + fx2800) + cpu=i860 + vendor=alliant + ;; + genix) + cpu=ns32k + vendor=ns + ;; + h3050r* | hiux*) + cpu=hppa1.1 + vendor=hitachi + basic_os=hiuxwe2 + ;; + hp3k9[0-9][0-9] | hp9[0-9][0-9]) + cpu=hppa1.0 + vendor=hp + ;; + hp9k2[0-9][0-9] | hp9k31[0-9]) + cpu=m68000 + vendor=hp + ;; + hp9k3[2-9][0-9]) + cpu=m68k + vendor=hp + ;; + hp9k6[0-9][0-9] | hp6[0-9][0-9]) + cpu=hppa1.0 + vendor=hp + ;; + hp9k7[0-79][0-9] | hp7[0-79][0-9]) + cpu=hppa1.1 + vendor=hp + ;; + hp9k78[0-9] | hp78[0-9]) + # FIXME: really hppa2.0-hp + cpu=hppa1.1 + vendor=hp + ;; + hp9k8[67]1 | hp8[67]1 | hp9k80[24] | hp80[24] | hp9k8[78]9 | hp8[78]9 | hp9k893 | hp893) + # FIXME: really hppa2.0-hp + cpu=hppa1.1 + vendor=hp + ;; + hp9k8[0-9][13679] | hp8[0-9][13679]) + cpu=hppa1.1 + vendor=hp + ;; + hp9k8[0-9][0-9] | hp8[0-9][0-9]) + cpu=hppa1.0 + vendor=hp + ;; + i*86v32) + cpu=`echo "$1" | sed -e 's/86.*/86/'` + vendor=pc + basic_os=sysv32 + ;; + i*86v4*) + cpu=`echo "$1" | sed -e 's/86.*/86/'` + vendor=pc + basic_os=sysv4 + ;; + i*86v) + cpu=`echo "$1" | sed -e 's/86.*/86/'` + vendor=pc + basic_os=sysv + ;; + i*86sol2) + cpu=`echo "$1" | sed -e 's/86.*/86/'` + vendor=pc + basic_os=solaris2 + ;; + j90 | j90-cray) + cpu=j90 + vendor=cray + basic_os=${basic_os:-unicos} + ;; + iris | iris4d) + cpu=mips + vendor=sgi + case $basic_os in + irix*) + ;; + *) + basic_os=irix4 + ;; + esac + ;; + miniframe) + cpu=m68000 + vendor=convergent + ;; + *mint | mint[0-9]* | *MiNT | *MiNT[0-9]*) + cpu=m68k + vendor=atari + basic_os=mint + ;; + news-3600 | risc-news) + cpu=mips + vendor=sony + basic_os=newsos + ;; + next | m*-next) + cpu=m68k + vendor=next + case $basic_os in + openstep*) + ;; + nextstep*) + ;; + ns2*) + basic_os=nextstep2 + ;; + *) + basic_os=nextstep3 + ;; + esac + ;; + np1) + cpu=np1 + vendor=gould + ;; + op50n-* | op60c-*) + cpu=hppa1.1 + vendor=oki + basic_os=proelf + ;; + pa-hitachi) + cpu=hppa1.1 + vendor=hitachi + basic_os=hiuxwe2 + ;; + pbd) + cpu=sparc + vendor=tti + ;; + pbb) + cpu=m68k + vendor=tti + ;; + pc532) + cpu=ns32k + vendor=pc532 + ;; + pn) + cpu=pn + vendor=gould + ;; + power) + cpu=power + vendor=ibm + ;; + ps2) + cpu=i386 + vendor=ibm + ;; + rm[46]00) + cpu=mips + vendor=siemens + ;; + rtpc | rtpc-*) + cpu=romp + vendor=ibm + ;; + sde) + cpu=mipsisa32 + vendor=sde + basic_os=${basic_os:-elf} + ;; + simso-wrs) + cpu=sparclite + vendor=wrs + basic_os=vxworks + ;; + tower | tower-32) + cpu=m68k + vendor=ncr + ;; + vpp*|vx|vx-*) + cpu=f301 + vendor=fujitsu + ;; + w65) + cpu=w65 + vendor=wdc + ;; + w89k-*) + cpu=hppa1.1 + vendor=winbond + basic_os=proelf + ;; + none) + cpu=none + vendor=none + ;; + leon|leon[3-9]) + cpu=sparc + vendor=$basic_machine + ;; + leon-*|leon[3-9]-*) + cpu=sparc + vendor=`echo "$basic_machine" | sed 's/-.*//'` + ;; + + *-*) + # shellcheck disable=SC2162 + saved_IFS=$IFS + IFS="-" read cpu vendor <&2 + exit 1 + ;; + esac + ;; +esac + +# Here we canonicalize certain aliases for manufacturers. +case $vendor in + digital*) + vendor=dec + ;; + commodore*) + vendor=cbm + ;; + *) + ;; +esac + +# Decode manufacturer-specific aliases for certain operating systems. + +if test x$basic_os != x +then + +# First recognize some ad-hoc cases, or perhaps split kernel-os, or else just +# set os. +case $basic_os in + gnu/linux*) + kernel=linux + os=`echo "$basic_os" | sed -e 's|gnu/linux|gnu|'` + ;; + os2-emx) + kernel=os2 + os=`echo "$basic_os" | sed -e 's|os2-emx|emx|'` + ;; + nto-qnx*) + kernel=nto + os=`echo "$basic_os" | sed -e 's|nto-qnx|qnx|'` + ;; + *-*) + # shellcheck disable=SC2162 + saved_IFS=$IFS + IFS="-" read kernel os <&2 + exit 1 + ;; +esac + +# As a final step for OS-related things, validate the OS-kernel combination +# (given a valid OS), if there is a kernel. +case $kernel-$os in + linux-gnu* | linux-dietlibc* | linux-android* | linux-newlib* \ + | linux-musl* | linux-relibc* | linux-uclibc* ) + ;; + uclinux-uclibc* ) + ;; + -dietlibc* | -newlib* | -musl* | -relibc* | -uclibc* ) + # These are just libc implementations, not actual OSes, and thus + # require a kernel. + echo "Invalid configuration \`$1': libc \`$os' needs explicit kernel." 1>&2 + exit 1 + ;; + kfreebsd*-gnu* | kopensolaris*-gnu*) + ;; + vxworks-simlinux | vxworks-simwindows | vxworks-spe) + ;; + nto-qnx*) + ;; + os2-emx) + ;; + *-eabi* | *-gnueabi*) + ;; + -*) + # Blank kernel with real OS is always fine. + ;; + *-*) + echo "Invalid configuration \`$1': Kernel \`$kernel' not known to work with OS \`$os'." 1>&2 + exit 1 + ;; +esac + +# Here we handle the case where we know the os, and the CPU type, but not the +# manufacturer. We pick the logical manufacturer. +case $vendor in + unknown) + case $cpu-$os in + *-riscix*) + vendor=acorn + ;; + *-sunos*) + vendor=sun + ;; + *-cnk* | *-aix*) + vendor=ibm + ;; + *-beos*) + vendor=be + ;; + *-hpux*) + vendor=hp + ;; + *-mpeix*) + vendor=hp + ;; + *-hiux*) + vendor=hitachi + ;; + *-unos*) + vendor=crds + ;; + *-dgux*) + vendor=dg + ;; + *-luna*) + vendor=omron + ;; + *-genix*) + vendor=ns + ;; + *-clix*) + vendor=intergraph + ;; + *-mvs* | *-opened*) + vendor=ibm + ;; + *-os400*) + vendor=ibm + ;; + s390-* | s390x-*) + vendor=ibm + ;; + *-ptx*) + vendor=sequent + ;; + *-tpf*) + vendor=ibm + ;; + *-vxsim* | *-vxworks* | *-windiss*) + vendor=wrs + ;; + *-aux*) + vendor=apple + ;; + *-hms*) + vendor=hitachi + ;; + *-mpw* | *-macos*) + vendor=apple + ;; + *-*mint | *-mint[0-9]* | *-*MiNT | *-MiNT[0-9]*) + vendor=atari + ;; + *-vos*) + vendor=stratus + ;; + esac + ;; +esac + +echo "$cpu-$vendor-${kernel:+$kernel-}$os" +exit + +# Local variables: +# eval: (add-hook 'before-save-hook 'time-stamp) +# time-stamp-start: "timestamp='" +# time-stamp-format: "%:y-%02m-%02d" +# time-stamp-end: "'" +# End: diff --git a/build-aux/depcomp b/build-aux/depcomp new file mode 100755 index 0000000..715e343 --- /dev/null +++ b/build-aux/depcomp @@ -0,0 +1,791 @@ +#! /bin/sh +# depcomp - compile a program generating dependencies as side-effects + +scriptversion=2018-03-07.03; # UTC + +# Copyright (C) 1999-2021 Free Software Foundation, Inc. + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that program. + +# Originally written by Alexandre Oliva . + +case $1 in + '') + echo "$0: No command. Try '$0 --help' for more information." 1>&2 + exit 1; + ;; + -h | --h*) + cat <<\EOF +Usage: depcomp [--help] [--version] PROGRAM [ARGS] + +Run PROGRAMS ARGS to compile a file, generating dependencies +as side-effects. + +Environment variables: + depmode Dependency tracking mode. + source Source file read by 'PROGRAMS ARGS'. + object Object file output by 'PROGRAMS ARGS'. + DEPDIR directory where to store dependencies. + depfile Dependency file to output. + tmpdepfile Temporary file to use when outputting dependencies. + libtool Whether libtool is used (yes/no). + +Report bugs to . +EOF + exit $? + ;; + -v | --v*) + echo "depcomp $scriptversion" + exit $? + ;; +esac + +# Get the directory component of the given path, and save it in the +# global variables '$dir'. Note that this directory component will +# be either empty or ending with a '/' character. This is deliberate. +set_dir_from () +{ + case $1 in + */*) dir=`echo "$1" | sed -e 's|/[^/]*$|/|'`;; + *) dir=;; + esac +} + +# Get the suffix-stripped basename of the given path, and save it the +# global variable '$base'. +set_base_from () +{ + base=`echo "$1" | sed -e 's|^.*/||' -e 's/\.[^.]*$//'` +} + +# If no dependency file was actually created by the compiler invocation, +# we still have to create a dummy depfile, to avoid errors with the +# Makefile "include basename.Plo" scheme. +make_dummy_depfile () +{ + echo "#dummy" > "$depfile" +} + +# Factor out some common post-processing of the generated depfile. +# Requires the auxiliary global variable '$tmpdepfile' to be set. +aix_post_process_depfile () +{ + # If the compiler actually managed to produce a dependency file, + # post-process it. + if test -f "$tmpdepfile"; then + # Each line is of the form 'foo.o: dependency.h'. + # Do two passes, one to just change these to + # $object: dependency.h + # and one to simply output + # dependency.h: + # which is needed to avoid the deleted-header problem. + { sed -e "s,^.*\.[$lower]*:,$object:," < "$tmpdepfile" + sed -e "s,^.*\.[$lower]*:[$tab ]*,," -e 's,$,:,' < "$tmpdepfile" + } > "$depfile" + rm -f "$tmpdepfile" + else + make_dummy_depfile + fi +} + +# A tabulation character. +tab=' ' +# A newline character. +nl=' +' +# Character ranges might be problematic outside the C locale. +# These definitions help. +upper=ABCDEFGHIJKLMNOPQRSTUVWXYZ +lower=abcdefghijklmnopqrstuvwxyz +digits=0123456789 +alpha=${upper}${lower} + +if test -z "$depmode" || test -z "$source" || test -z "$object"; then + echo "depcomp: Variables source, object and depmode must be set" 1>&2 + exit 1 +fi + +# Dependencies for sub/bar.o or sub/bar.obj go into sub/.deps/bar.Po. +depfile=${depfile-`echo "$object" | + sed 's|[^\\/]*$|'${DEPDIR-.deps}'/&|;s|\.\([^.]*\)$|.P\1|;s|Pobj$|Po|'`} +tmpdepfile=${tmpdepfile-`echo "$depfile" | sed 's/\.\([^.]*\)$/.T\1/'`} + +rm -f "$tmpdepfile" + +# Avoid interferences from the environment. +gccflag= dashmflag= + +# Some modes work just like other modes, but use different flags. We +# parameterize here, but still list the modes in the big case below, +# to make depend.m4 easier to write. Note that we *cannot* use a case +# here, because this file can only contain one case statement. +if test "$depmode" = hp; then + # HP compiler uses -M and no extra arg. + gccflag=-M + depmode=gcc +fi + +if test "$depmode" = dashXmstdout; then + # This is just like dashmstdout with a different argument. + dashmflag=-xM + depmode=dashmstdout +fi + +cygpath_u="cygpath -u -f -" +if test "$depmode" = msvcmsys; then + # This is just like msvisualcpp but w/o cygpath translation. + # Just convert the backslash-escaped backslashes to single forward + # slashes to satisfy depend.m4 + cygpath_u='sed s,\\\\,/,g' + depmode=msvisualcpp +fi + +if test "$depmode" = msvc7msys; then + # This is just like msvc7 but w/o cygpath translation. + # Just convert the backslash-escaped backslashes to single forward + # slashes to satisfy depend.m4 + cygpath_u='sed s,\\\\,/,g' + depmode=msvc7 +fi + +if test "$depmode" = xlc; then + # IBM C/C++ Compilers xlc/xlC can output gcc-like dependency information. + gccflag=-qmakedep=gcc,-MF + depmode=gcc +fi + +case "$depmode" in +gcc3) +## gcc 3 implements dependency tracking that does exactly what +## we want. Yay! Note: for some reason libtool 1.4 doesn't like +## it if -MD -MP comes after the -MF stuff. Hmm. +## Unfortunately, FreeBSD c89 acceptance of flags depends upon +## the command line argument order; so add the flags where they +## appear in depend2.am. Note that the slowdown incurred here +## affects only configure: in makefiles, %FASTDEP% shortcuts this. + for arg + do + case $arg in + -c) set fnord "$@" -MT "$object" -MD -MP -MF "$tmpdepfile" "$arg" ;; + *) set fnord "$@" "$arg" ;; + esac + shift # fnord + shift # $arg + done + "$@" + stat=$? + if test $stat -ne 0; then + rm -f "$tmpdepfile" + exit $stat + fi + mv "$tmpdepfile" "$depfile" + ;; + +gcc) +## Note that this doesn't just cater to obsosete pre-3.x GCC compilers. +## but also to in-use compilers like IMB xlc/xlC and the HP C compiler. +## (see the conditional assignment to $gccflag above). +## There are various ways to get dependency output from gcc. Here's +## why we pick this rather obscure method: +## - Don't want to use -MD because we'd like the dependencies to end +## up in a subdir. Having to rename by hand is ugly. +## (We might end up doing this anyway to support other compilers.) +## - The DEPENDENCIES_OUTPUT environment variable makes gcc act like +## -MM, not -M (despite what the docs say). Also, it might not be +## supported by the other compilers which use the 'gcc' depmode. +## - Using -M directly means running the compiler twice (even worse +## than renaming). + if test -z "$gccflag"; then + gccflag=-MD, + fi + "$@" -Wp,"$gccflag$tmpdepfile" + stat=$? + if test $stat -ne 0; then + rm -f "$tmpdepfile" + exit $stat + fi + rm -f "$depfile" + echo "$object : \\" > "$depfile" + # The second -e expression handles DOS-style file names with drive + # letters. + sed -e 's/^[^:]*: / /' \ + -e 's/^['$alpha']:\/[^:]*: / /' < "$tmpdepfile" >> "$depfile" +## This next piece of magic avoids the "deleted header file" problem. +## The problem is that when a header file which appears in a .P file +## is deleted, the dependency causes make to die (because there is +## typically no way to rebuild the header). We avoid this by adding +## dummy dependencies for each header file. Too bad gcc doesn't do +## this for us directly. +## Some versions of gcc put a space before the ':'. On the theory +## that the space means something, we add a space to the output as +## well. hp depmode also adds that space, but also prefixes the VPATH +## to the object. Take care to not repeat it in the output. +## Some versions of the HPUX 10.20 sed can't process this invocation +## correctly. Breaking it into two sed invocations is a workaround. + tr ' ' "$nl" < "$tmpdepfile" \ + | sed -e 's/^\\$//' -e '/^$/d' -e "s|.*$object$||" -e '/:$/d' \ + | sed -e 's/$/ :/' >> "$depfile" + rm -f "$tmpdepfile" + ;; + +hp) + # This case exists only to let depend.m4 do its work. It works by + # looking at the text of this script. This case will never be run, + # since it is checked for above. + exit 1 + ;; + +sgi) + if test "$libtool" = yes; then + "$@" "-Wp,-MDupdate,$tmpdepfile" + else + "$@" -MDupdate "$tmpdepfile" + fi + stat=$? + if test $stat -ne 0; then + rm -f "$tmpdepfile" + exit $stat + fi + rm -f "$depfile" + + if test -f "$tmpdepfile"; then # yes, the sourcefile depend on other files + echo "$object : \\" > "$depfile" + # Clip off the initial element (the dependent). Don't try to be + # clever and replace this with sed code, as IRIX sed won't handle + # lines with more than a fixed number of characters (4096 in + # IRIX 6.2 sed, 8192 in IRIX 6.5). We also remove comment lines; + # the IRIX cc adds comments like '#:fec' to the end of the + # dependency line. + tr ' ' "$nl" < "$tmpdepfile" \ + | sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' \ + | tr "$nl" ' ' >> "$depfile" + echo >> "$depfile" + # The second pass generates a dummy entry for each header file. + tr ' ' "$nl" < "$tmpdepfile" \ + | sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' -e 's/$/:/' \ + >> "$depfile" + else + make_dummy_depfile + fi + rm -f "$tmpdepfile" + ;; + +xlc) + # This case exists only to let depend.m4 do its work. It works by + # looking at the text of this script. This case will never be run, + # since it is checked for above. + exit 1 + ;; + +aix) + # The C for AIX Compiler uses -M and outputs the dependencies + # in a .u file. In older versions, this file always lives in the + # current directory. Also, the AIX compiler puts '$object:' at the + # start of each line; $object doesn't have directory information. + # Version 6 uses the directory in both cases. + set_dir_from "$object" + set_base_from "$object" + if test "$libtool" = yes; then + tmpdepfile1=$dir$base.u + tmpdepfile2=$base.u + tmpdepfile3=$dir.libs/$base.u + "$@" -Wc,-M + else + tmpdepfile1=$dir$base.u + tmpdepfile2=$dir$base.u + tmpdepfile3=$dir$base.u + "$@" -M + fi + stat=$? + if test $stat -ne 0; then + rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" + exit $stat + fi + + for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" + do + test -f "$tmpdepfile" && break + done + aix_post_process_depfile + ;; + +tcc) + # tcc (Tiny C Compiler) understand '-MD -MF file' since version 0.9.26 + # FIXME: That version still under development at the moment of writing. + # Make that this statement remains true also for stable, released + # versions. + # It will wrap lines (doesn't matter whether long or short) with a + # trailing '\', as in: + # + # foo.o : \ + # foo.c \ + # foo.h \ + # + # It will put a trailing '\' even on the last line, and will use leading + # spaces rather than leading tabs (at least since its commit 0394caf7 + # "Emit spaces for -MD"). + "$@" -MD -MF "$tmpdepfile" + stat=$? + if test $stat -ne 0; then + rm -f "$tmpdepfile" + exit $stat + fi + rm -f "$depfile" + # Each non-empty line is of the form 'foo.o : \' or ' dep.h \'. + # We have to change lines of the first kind to '$object: \'. + sed -e "s|.*:|$object :|" < "$tmpdepfile" > "$depfile" + # And for each line of the second kind, we have to emit a 'dep.h:' + # dummy dependency, to avoid the deleted-header problem. + sed -n -e 's|^ *\(.*\) *\\$|\1:|p' < "$tmpdepfile" >> "$depfile" + rm -f "$tmpdepfile" + ;; + +## The order of this option in the case statement is important, since the +## shell code in configure will try each of these formats in the order +## listed in this file. A plain '-MD' option would be understood by many +## compilers, so we must ensure this comes after the gcc and icc options. +pgcc) + # Portland's C compiler understands '-MD'. + # Will always output deps to 'file.d' where file is the root name of the + # source file under compilation, even if file resides in a subdirectory. + # The object file name does not affect the name of the '.d' file. + # pgcc 10.2 will output + # foo.o: sub/foo.c sub/foo.h + # and will wrap long lines using '\' : + # foo.o: sub/foo.c ... \ + # sub/foo.h ... \ + # ... + set_dir_from "$object" + # Use the source, not the object, to determine the base name, since + # that's sadly what pgcc will do too. + set_base_from "$source" + tmpdepfile=$base.d + + # For projects that build the same source file twice into different object + # files, the pgcc approach of using the *source* file root name can cause + # problems in parallel builds. Use a locking strategy to avoid stomping on + # the same $tmpdepfile. + lockdir=$base.d-lock + trap " + echo '$0: caught signal, cleaning up...' >&2 + rmdir '$lockdir' + exit 1 + " 1 2 13 15 + numtries=100 + i=$numtries + while test $i -gt 0; do + # mkdir is a portable test-and-set. + if mkdir "$lockdir" 2>/dev/null; then + # This process acquired the lock. + "$@" -MD + stat=$? + # Release the lock. + rmdir "$lockdir" + break + else + # If the lock is being held by a different process, wait + # until the winning process is done or we timeout. + while test -d "$lockdir" && test $i -gt 0; do + sleep 1 + i=`expr $i - 1` + done + fi + i=`expr $i - 1` + done + trap - 1 2 13 15 + if test $i -le 0; then + echo "$0: failed to acquire lock after $numtries attempts" >&2 + echo "$0: check lockdir '$lockdir'" >&2 + exit 1 + fi + + if test $stat -ne 0; then + rm -f "$tmpdepfile" + exit $stat + fi + rm -f "$depfile" + # Each line is of the form `foo.o: dependent.h', + # or `foo.o: dep1.h dep2.h \', or ` dep3.h dep4.h \'. + # Do two passes, one to just change these to + # `$object: dependent.h' and one to simply `dependent.h:'. + sed "s,^[^:]*:,$object :," < "$tmpdepfile" > "$depfile" + # Some versions of the HPUX 10.20 sed can't process this invocation + # correctly. Breaking it into two sed invocations is a workaround. + sed 's,^[^:]*: \(.*\)$,\1,;s/^\\$//;/^$/d;/:$/d' < "$tmpdepfile" \ + | sed -e 's/$/ :/' >> "$depfile" + rm -f "$tmpdepfile" + ;; + +hp2) + # The "hp" stanza above does not work with aCC (C++) and HP's ia64 + # compilers, which have integrated preprocessors. The correct option + # to use with these is +Maked; it writes dependencies to a file named + # 'foo.d', which lands next to the object file, wherever that + # happens to be. + # Much of this is similar to the tru64 case; see comments there. + set_dir_from "$object" + set_base_from "$object" + if test "$libtool" = yes; then + tmpdepfile1=$dir$base.d + tmpdepfile2=$dir.libs/$base.d + "$@" -Wc,+Maked + else + tmpdepfile1=$dir$base.d + tmpdepfile2=$dir$base.d + "$@" +Maked + fi + stat=$? + if test $stat -ne 0; then + rm -f "$tmpdepfile1" "$tmpdepfile2" + exit $stat + fi + + for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" + do + test -f "$tmpdepfile" && break + done + if test -f "$tmpdepfile"; then + sed -e "s,^.*\.[$lower]*:,$object:," "$tmpdepfile" > "$depfile" + # Add 'dependent.h:' lines. + sed -ne '2,${ + s/^ *// + s/ \\*$// + s/$/:/ + p + }' "$tmpdepfile" >> "$depfile" + else + make_dummy_depfile + fi + rm -f "$tmpdepfile" "$tmpdepfile2" + ;; + +tru64) + # The Tru64 compiler uses -MD to generate dependencies as a side + # effect. 'cc -MD -o foo.o ...' puts the dependencies into 'foo.o.d'. + # At least on Alpha/Redhat 6.1, Compaq CCC V6.2-504 seems to put + # dependencies in 'foo.d' instead, so we check for that too. + # Subdirectories are respected. + set_dir_from "$object" + set_base_from "$object" + + if test "$libtool" = yes; then + # Libtool generates 2 separate objects for the 2 libraries. These + # two compilations output dependencies in $dir.libs/$base.o.d and + # in $dir$base.o.d. We have to check for both files, because + # one of the two compilations can be disabled. We should prefer + # $dir$base.o.d over $dir.libs/$base.o.d because the latter is + # automatically cleaned when .libs/ is deleted, while ignoring + # the former would cause a distcleancheck panic. + tmpdepfile1=$dir$base.o.d # libtool 1.5 + tmpdepfile2=$dir.libs/$base.o.d # Likewise. + tmpdepfile3=$dir.libs/$base.d # Compaq CCC V6.2-504 + "$@" -Wc,-MD + else + tmpdepfile1=$dir$base.d + tmpdepfile2=$dir$base.d + tmpdepfile3=$dir$base.d + "$@" -MD + fi + + stat=$? + if test $stat -ne 0; then + rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" + exit $stat + fi + + for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" + do + test -f "$tmpdepfile" && break + done + # Same post-processing that is required for AIX mode. + aix_post_process_depfile + ;; + +msvc7) + if test "$libtool" = yes; then + showIncludes=-Wc,-showIncludes + else + showIncludes=-showIncludes + fi + "$@" $showIncludes > "$tmpdepfile" + stat=$? + grep -v '^Note: including file: ' "$tmpdepfile" + if test $stat -ne 0; then + rm -f "$tmpdepfile" + exit $stat + fi + rm -f "$depfile" + echo "$object : \\" > "$depfile" + # The first sed program below extracts the file names and escapes + # backslashes for cygpath. The second sed program outputs the file + # name when reading, but also accumulates all include files in the + # hold buffer in order to output them again at the end. This only + # works with sed implementations that can handle large buffers. + sed < "$tmpdepfile" -n ' +/^Note: including file: *\(.*\)/ { + s//\1/ + s/\\/\\\\/g + p +}' | $cygpath_u | sort -u | sed -n ' +s/ /\\ /g +s/\(.*\)/'"$tab"'\1 \\/p +s/.\(.*\) \\/\1:/ +H +$ { + s/.*/'"$tab"'/ + G + p +}' >> "$depfile" + echo >> "$depfile" # make sure the fragment doesn't end with a backslash + rm -f "$tmpdepfile" + ;; + +msvc7msys) + # This case exists only to let depend.m4 do its work. It works by + # looking at the text of this script. This case will never be run, + # since it is checked for above. + exit 1 + ;; + +#nosideeffect) + # This comment above is used by automake to tell side-effect + # dependency tracking mechanisms from slower ones. + +dashmstdout) + # Important note: in order to support this mode, a compiler *must* + # always write the preprocessed file to stdout, regardless of -o. + "$@" || exit $? + + # Remove the call to Libtool. + if test "$libtool" = yes; then + while test "X$1" != 'X--mode=compile'; do + shift + done + shift + fi + + # Remove '-o $object'. + IFS=" " + for arg + do + case $arg in + -o) + shift + ;; + $object) + shift + ;; + *) + set fnord "$@" "$arg" + shift # fnord + shift # $arg + ;; + esac + done + + test -z "$dashmflag" && dashmflag=-M + # Require at least two characters before searching for ':' + # in the target name. This is to cope with DOS-style filenames: + # a dependency such as 'c:/foo/bar' could be seen as target 'c' otherwise. + "$@" $dashmflag | + sed "s|^[$tab ]*[^:$tab ][^:][^:]*:[$tab ]*|$object: |" > "$tmpdepfile" + rm -f "$depfile" + cat < "$tmpdepfile" > "$depfile" + # Some versions of the HPUX 10.20 sed can't process this sed invocation + # correctly. Breaking it into two sed invocations is a workaround. + tr ' ' "$nl" < "$tmpdepfile" \ + | sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' \ + | sed -e 's/$/ :/' >> "$depfile" + rm -f "$tmpdepfile" + ;; + +dashXmstdout) + # This case only exists to satisfy depend.m4. It is never actually + # run, as this mode is specially recognized in the preamble. + exit 1 + ;; + +makedepend) + "$@" || exit $? + # Remove any Libtool call + if test "$libtool" = yes; then + while test "X$1" != 'X--mode=compile'; do + shift + done + shift + fi + # X makedepend + shift + cleared=no eat=no + for arg + do + case $cleared in + no) + set ""; shift + cleared=yes ;; + esac + if test $eat = yes; then + eat=no + continue + fi + case "$arg" in + -D*|-I*) + set fnord "$@" "$arg"; shift ;; + # Strip any option that makedepend may not understand. Remove + # the object too, otherwise makedepend will parse it as a source file. + -arch) + eat=yes ;; + -*|$object) + ;; + *) + set fnord "$@" "$arg"; shift ;; + esac + done + obj_suffix=`echo "$object" | sed 's/^.*\././'` + touch "$tmpdepfile" + ${MAKEDEPEND-makedepend} -o"$obj_suffix" -f"$tmpdepfile" "$@" + rm -f "$depfile" + # makedepend may prepend the VPATH from the source file name to the object. + # No need to regex-escape $object, excess matching of '.' is harmless. + sed "s|^.*\($object *:\)|\1|" "$tmpdepfile" > "$depfile" + # Some versions of the HPUX 10.20 sed can't process the last invocation + # correctly. Breaking it into two sed invocations is a workaround. + sed '1,2d' "$tmpdepfile" \ + | tr ' ' "$nl" \ + | sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' \ + | sed -e 's/$/ :/' >> "$depfile" + rm -f "$tmpdepfile" "$tmpdepfile".bak + ;; + +cpp) + # Important note: in order to support this mode, a compiler *must* + # always write the preprocessed file to stdout. + "$@" || exit $? + + # Remove the call to Libtool. + if test "$libtool" = yes; then + while test "X$1" != 'X--mode=compile'; do + shift + done + shift + fi + + # Remove '-o $object'. + IFS=" " + for arg + do + case $arg in + -o) + shift + ;; + $object) + shift + ;; + *) + set fnord "$@" "$arg" + shift # fnord + shift # $arg + ;; + esac + done + + "$@" -E \ + | sed -n -e '/^# [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' \ + -e '/^#line [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' \ + | sed '$ s: \\$::' > "$tmpdepfile" + rm -f "$depfile" + echo "$object : \\" > "$depfile" + cat < "$tmpdepfile" >> "$depfile" + sed < "$tmpdepfile" '/^$/d;s/^ //;s/ \\$//;s/$/ :/' >> "$depfile" + rm -f "$tmpdepfile" + ;; + +msvisualcpp) + # Important note: in order to support this mode, a compiler *must* + # always write the preprocessed file to stdout. + "$@" || exit $? + + # Remove the call to Libtool. + if test "$libtool" = yes; then + while test "X$1" != 'X--mode=compile'; do + shift + done + shift + fi + + IFS=" " + for arg + do + case "$arg" in + -o) + shift + ;; + $object) + shift + ;; + "-Gm"|"/Gm"|"-Gi"|"/Gi"|"-ZI"|"/ZI") + set fnord "$@" + shift + shift + ;; + *) + set fnord "$@" "$arg" + shift + shift + ;; + esac + done + "$@" -E 2>/dev/null | + sed -n '/^#line [0-9][0-9]* "\([^"]*\)"/ s::\1:p' | $cygpath_u | sort -u > "$tmpdepfile" + rm -f "$depfile" + echo "$object : \\" > "$depfile" + sed < "$tmpdepfile" -n -e 's% %\\ %g' -e '/^\(.*\)$/ s::'"$tab"'\1 \\:p' >> "$depfile" + echo "$tab" >> "$depfile" + sed < "$tmpdepfile" -n -e 's% %\\ %g' -e '/^\(.*\)$/ s::\1\::p' >> "$depfile" + rm -f "$tmpdepfile" + ;; + +msvcmsys) + # This case exists only to let depend.m4 do its work. It works by + # looking at the text of this script. This case will never be run, + # since it is checked for above. + exit 1 + ;; + +none) + exec "$@" + ;; + +*) + echo "Unknown depmode $depmode" 1>&2 + exit 1 + ;; +esac + +exit 0 + +# Local Variables: +# mode: shell-script +# sh-indentation: 2 +# eval: (add-hook 'before-save-hook 'time-stamp) +# time-stamp-start: "scriptversion=" +# time-stamp-format: "%:y-%02m-%02d.%02H" +# time-stamp-time-zone: "UTC0" +# time-stamp-end: "; # UTC" +# End: diff --git a/build-aux/install-sh b/build-aux/install-sh new file mode 100755 index 0000000..ec298b5 --- /dev/null +++ b/build-aux/install-sh @@ -0,0 +1,541 @@ +#!/bin/sh +# install - install a program, script, or datafile + +scriptversion=2020-11-14.01; # UTC + +# This originates from X11R5 (mit/util/scripts/install.sh), which was +# later released in X11R6 (xc/config/util/install.sh) with the +# following copyright and license. +# +# Copyright (C) 1994 X Consortium +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to +# deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +# sell copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# X CONSORTIUM BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN +# AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNEC- +# TION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +# Except as contained in this notice, the name of the X Consortium shall not +# be used in advertising or otherwise to promote the sale, use or other deal- +# ings in this Software without prior written authorization from the X Consor- +# tium. +# +# +# FSF changes to this file are in the public domain. +# +# Calling this script install-sh is preferred over install.sh, to prevent +# 'make' implicit rules from creating a file called install from it +# when there is no Makefile. +# +# This script is compatible with the BSD install script, but was written +# from scratch. + +tab=' ' +nl=' +' +IFS=" $tab$nl" + +# Set DOITPROG to "echo" to test this script. + +doit=${DOITPROG-} +doit_exec=${doit:-exec} + +# Put in absolute file names if you don't have them in your path; +# or use environment vars. + +chgrpprog=${CHGRPPROG-chgrp} +chmodprog=${CHMODPROG-chmod} +chownprog=${CHOWNPROG-chown} +cmpprog=${CMPPROG-cmp} +cpprog=${CPPROG-cp} +mkdirprog=${MKDIRPROG-mkdir} +mvprog=${MVPROG-mv} +rmprog=${RMPROG-rm} +stripprog=${STRIPPROG-strip} + +posix_mkdir= + +# Desired mode of installed file. +mode=0755 + +# Create dirs (including intermediate dirs) using mode 755. +# This is like GNU 'install' as of coreutils 8.32 (2020). +mkdir_umask=22 + +backupsuffix= +chgrpcmd= +chmodcmd=$chmodprog +chowncmd= +mvcmd=$mvprog +rmcmd="$rmprog -f" +stripcmd= + +src= +dst= +dir_arg= +dst_arg= + +copy_on_change=false +is_target_a_directory=possibly + +usage="\ +Usage: $0 [OPTION]... [-T] SRCFILE DSTFILE + or: $0 [OPTION]... SRCFILES... DIRECTORY + or: $0 [OPTION]... -t DIRECTORY SRCFILES... + or: $0 [OPTION]... -d DIRECTORIES... + +In the 1st form, copy SRCFILE to DSTFILE. +In the 2nd and 3rd, copy all SRCFILES to DIRECTORY. +In the 4th, create DIRECTORIES. + +Options: + --help display this help and exit. + --version display version info and exit. + + -c (ignored) + -C install only if different (preserve data modification time) + -d create directories instead of installing files. + -g GROUP $chgrpprog installed files to GROUP. + -m MODE $chmodprog installed files to MODE. + -o USER $chownprog installed files to USER. + -p pass -p to $cpprog. + -s $stripprog installed files. + -S SUFFIX attempt to back up existing files, with suffix SUFFIX. + -t DIRECTORY install into DIRECTORY. + -T report an error if DSTFILE is a directory. + +Environment variables override the default commands: + CHGRPPROG CHMODPROG CHOWNPROG CMPPROG CPPROG MKDIRPROG MVPROG + RMPROG STRIPPROG + +By default, rm is invoked with -f; when overridden with RMPROG, +it's up to you to specify -f if you want it. + +If -S is not specified, no backups are attempted. + +Email bug reports to bug-automake@gnu.org. +Automake home page: https://www.gnu.org/software/automake/ +" + +while test $# -ne 0; do + case $1 in + -c) ;; + + -C) copy_on_change=true;; + + -d) dir_arg=true;; + + -g) chgrpcmd="$chgrpprog $2" + shift;; + + --help) echo "$usage"; exit $?;; + + -m) mode=$2 + case $mode in + *' '* | *"$tab"* | *"$nl"* | *'*'* | *'?'* | *'['*) + echo "$0: invalid mode: $mode" >&2 + exit 1;; + esac + shift;; + + -o) chowncmd="$chownprog $2" + shift;; + + -p) cpprog="$cpprog -p";; + + -s) stripcmd=$stripprog;; + + -S) backupsuffix="$2" + shift;; + + -t) + is_target_a_directory=always + dst_arg=$2 + # Protect names problematic for 'test' and other utilities. + case $dst_arg in + -* | [=\(\)!]) dst_arg=./$dst_arg;; + esac + shift;; + + -T) is_target_a_directory=never;; + + --version) echo "$0 $scriptversion"; exit $?;; + + --) shift + break;; + + -*) echo "$0: invalid option: $1" >&2 + exit 1;; + + *) break;; + esac + shift +done + +# We allow the use of options -d and -T together, by making -d +# take the precedence; this is for compatibility with GNU install. + +if test -n "$dir_arg"; then + if test -n "$dst_arg"; then + echo "$0: target directory not allowed when installing a directory." >&2 + exit 1 + fi +fi + +if test $# -ne 0 && test -z "$dir_arg$dst_arg"; then + # When -d is used, all remaining arguments are directories to create. + # When -t is used, the destination is already specified. + # Otherwise, the last argument is the destination. Remove it from $@. + for arg + do + if test -n "$dst_arg"; then + # $@ is not empty: it contains at least $arg. + set fnord "$@" "$dst_arg" + shift # fnord + fi + shift # arg + dst_arg=$arg + # Protect names problematic for 'test' and other utilities. + case $dst_arg in + -* | [=\(\)!]) dst_arg=./$dst_arg;; + esac + done +fi + +if test $# -eq 0; then + if test -z "$dir_arg"; then + echo "$0: no input file specified." >&2 + exit 1 + fi + # It's OK to call 'install-sh -d' without argument. + # This can happen when creating conditional directories. + exit 0 +fi + +if test -z "$dir_arg"; then + if test $# -gt 1 || test "$is_target_a_directory" = always; then + if test ! -d "$dst_arg"; then + echo "$0: $dst_arg: Is not a directory." >&2 + exit 1 + fi + fi +fi + +if test -z "$dir_arg"; then + do_exit='(exit $ret); exit $ret' + trap "ret=129; $do_exit" 1 + trap "ret=130; $do_exit" 2 + trap "ret=141; $do_exit" 13 + trap "ret=143; $do_exit" 15 + + # Set umask so as not to create temps with too-generous modes. + # However, 'strip' requires both read and write access to temps. + case $mode in + # Optimize common cases. + *644) cp_umask=133;; + *755) cp_umask=22;; + + *[0-7]) + if test -z "$stripcmd"; then + u_plus_rw= + else + u_plus_rw='% 200' + fi + cp_umask=`expr '(' 777 - $mode % 1000 ')' $u_plus_rw`;; + *) + if test -z "$stripcmd"; then + u_plus_rw= + else + u_plus_rw=,u+rw + fi + cp_umask=$mode$u_plus_rw;; + esac +fi + +for src +do + # Protect names problematic for 'test' and other utilities. + case $src in + -* | [=\(\)!]) src=./$src;; + esac + + if test -n "$dir_arg"; then + dst=$src + dstdir=$dst + test -d "$dstdir" + dstdir_status=$? + # Don't chown directories that already exist. + if test $dstdir_status = 0; then + chowncmd="" + fi + else + + # Waiting for this to be detected by the "$cpprog $src $dsttmp" command + # might cause directories to be created, which would be especially bad + # if $src (and thus $dsttmp) contains '*'. + if test ! -f "$src" && test ! -d "$src"; then + echo "$0: $src does not exist." >&2 + exit 1 + fi + + if test -z "$dst_arg"; then + echo "$0: no destination specified." >&2 + exit 1 + fi + dst=$dst_arg + + # If destination is a directory, append the input filename. + if test -d "$dst"; then + if test "$is_target_a_directory" = never; then + echo "$0: $dst_arg: Is a directory" >&2 + exit 1 + fi + dstdir=$dst + dstbase=`basename "$src"` + case $dst in + */) dst=$dst$dstbase;; + *) dst=$dst/$dstbase;; + esac + dstdir_status=0 + else + dstdir=`dirname "$dst"` + test -d "$dstdir" + dstdir_status=$? + fi + fi + + case $dstdir in + */) dstdirslash=$dstdir;; + *) dstdirslash=$dstdir/;; + esac + + obsolete_mkdir_used=false + + if test $dstdir_status != 0; then + case $posix_mkdir in + '') + # With -d, create the new directory with the user-specified mode. + # Otherwise, rely on $mkdir_umask. + if test -n "$dir_arg"; then + mkdir_mode=-m$mode + else + mkdir_mode= + fi + + posix_mkdir=false + # The $RANDOM variable is not portable (e.g., dash). Use it + # here however when possible just to lower collision chance. + tmpdir=${TMPDIR-/tmp}/ins$RANDOM-$$ + + trap ' + ret=$? + rmdir "$tmpdir/a/b" "$tmpdir/a" "$tmpdir" 2>/dev/null + exit $ret + ' 0 + + # Because "mkdir -p" follows existing symlinks and we likely work + # directly in world-writeable /tmp, make sure that the '$tmpdir' + # directory is successfully created first before we actually test + # 'mkdir -p'. + if (umask $mkdir_umask && + $mkdirprog $mkdir_mode "$tmpdir" && + exec $mkdirprog $mkdir_mode -p -- "$tmpdir/a/b") >/dev/null 2>&1 + then + if test -z "$dir_arg" || { + # Check for POSIX incompatibilities with -m. + # HP-UX 11.23 and IRIX 6.5 mkdir -m -p sets group- or + # other-writable bit of parent directory when it shouldn't. + # FreeBSD 6.1 mkdir -m -p sets mode of existing directory. + test_tmpdir="$tmpdir/a" + ls_ld_tmpdir=`ls -ld "$test_tmpdir"` + case $ls_ld_tmpdir in + d????-?r-*) different_mode=700;; + d????-?--*) different_mode=755;; + *) false;; + esac && + $mkdirprog -m$different_mode -p -- "$test_tmpdir" && { + ls_ld_tmpdir_1=`ls -ld "$test_tmpdir"` + test "$ls_ld_tmpdir" = "$ls_ld_tmpdir_1" + } + } + then posix_mkdir=: + fi + rmdir "$tmpdir/a/b" "$tmpdir/a" "$tmpdir" + else + # Remove any dirs left behind by ancient mkdir implementations. + rmdir ./$mkdir_mode ./-p ./-- "$tmpdir" 2>/dev/null + fi + trap '' 0;; + esac + + if + $posix_mkdir && ( + umask $mkdir_umask && + $doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir" + ) + then : + else + + # mkdir does not conform to POSIX, + # or it failed possibly due to a race condition. Create the + # directory the slow way, step by step, checking for races as we go. + + case $dstdir in + /*) prefix='/';; + [-=\(\)!]*) prefix='./';; + *) prefix='';; + esac + + oIFS=$IFS + IFS=/ + set -f + set fnord $dstdir + shift + set +f + IFS=$oIFS + + prefixes= + + for d + do + test X"$d" = X && continue + + prefix=$prefix$d + if test -d "$prefix"; then + prefixes= + else + if $posix_mkdir; then + (umask $mkdir_umask && + $doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir") && break + # Don't fail if two instances are running concurrently. + test -d "$prefix" || exit 1 + else + case $prefix in + *\'*) qprefix=`echo "$prefix" | sed "s/'/'\\\\\\\\''/g"`;; + *) qprefix=$prefix;; + esac + prefixes="$prefixes '$qprefix'" + fi + fi + prefix=$prefix/ + done + + if test -n "$prefixes"; then + # Don't fail if two instances are running concurrently. + (umask $mkdir_umask && + eval "\$doit_exec \$mkdirprog $prefixes") || + test -d "$dstdir" || exit 1 + obsolete_mkdir_used=true + fi + fi + fi + + if test -n "$dir_arg"; then + { test -z "$chowncmd" || $doit $chowncmd "$dst"; } && + { test -z "$chgrpcmd" || $doit $chgrpcmd "$dst"; } && + { test "$obsolete_mkdir_used$chowncmd$chgrpcmd" = false || + test -z "$chmodcmd" || $doit $chmodcmd $mode "$dst"; } || exit 1 + else + + # Make a couple of temp file names in the proper directory. + dsttmp=${dstdirslash}_inst.$$_ + rmtmp=${dstdirslash}_rm.$$_ + + # Trap to clean up those temp files at exit. + trap 'ret=$?; rm -f "$dsttmp" "$rmtmp" && exit $ret' 0 + + # Copy the file name to the temp name. + (umask $cp_umask && + { test -z "$stripcmd" || { + # Create $dsttmp read-write so that cp doesn't create it read-only, + # which would cause strip to fail. + if test -z "$doit"; then + : >"$dsttmp" # No need to fork-exec 'touch'. + else + $doit touch "$dsttmp" + fi + } + } && + $doit_exec $cpprog "$src" "$dsttmp") && + + # and set any options; do chmod last to preserve setuid bits. + # + # If any of these fail, we abort the whole thing. If we want to + # ignore errors from any of these, just make sure not to ignore + # errors from the above "$doit $cpprog $src $dsttmp" command. + # + { test -z "$chowncmd" || $doit $chowncmd "$dsttmp"; } && + { test -z "$chgrpcmd" || $doit $chgrpcmd "$dsttmp"; } && + { test -z "$stripcmd" || $doit $stripcmd "$dsttmp"; } && + { test -z "$chmodcmd" || $doit $chmodcmd $mode "$dsttmp"; } && + + # If -C, don't bother to copy if it wouldn't change the file. + if $copy_on_change && + old=`LC_ALL=C ls -dlL "$dst" 2>/dev/null` && + new=`LC_ALL=C ls -dlL "$dsttmp" 2>/dev/null` && + set -f && + set X $old && old=:$2:$4:$5:$6 && + set X $new && new=:$2:$4:$5:$6 && + set +f && + test "$old" = "$new" && + $cmpprog "$dst" "$dsttmp" >/dev/null 2>&1 + then + rm -f "$dsttmp" + else + # If $backupsuffix is set, and the file being installed + # already exists, attempt a backup. Don't worry if it fails, + # e.g., if mv doesn't support -f. + if test -n "$backupsuffix" && test -f "$dst"; then + $doit $mvcmd -f "$dst" "$dst$backupsuffix" 2>/dev/null + fi + + # Rename the file to the real destination. + $doit $mvcmd -f "$dsttmp" "$dst" 2>/dev/null || + + # The rename failed, perhaps because mv can't rename something else + # to itself, or perhaps because mv is so ancient that it does not + # support -f. + { + # Now remove or move aside any old file at destination location. + # We try this two ways since rm can't unlink itself on some + # systems and the destination file might be busy for other + # reasons. In this case, the final cleanup might fail but the new + # file should still install successfully. + { + test ! -f "$dst" || + $doit $rmcmd "$dst" 2>/dev/null || + { $doit $mvcmd -f "$dst" "$rmtmp" 2>/dev/null && + { $doit $rmcmd "$rmtmp" 2>/dev/null; :; } + } || + { echo "$0: cannot unlink or rename $dst" >&2 + (exit 1); exit 1 + } + } && + + # Now rename the file to the real destination. + $doit $mvcmd "$dsttmp" "$dst" + } + fi || exit 1 + + trap '' 0 + fi +done + +# Local variables: +# eval: (add-hook 'before-save-hook 'time-stamp) +# time-stamp-start: "scriptversion=" +# time-stamp-format: "%:y-%02m-%02d.%02H" +# time-stamp-time-zone: "UTC0" +# time-stamp-end: "; # UTC" +# End: diff --git a/build-aux/ltmain.sh b/build-aux/ltmain.sh new file mode 100755 index 0000000..9b12fbb --- /dev/null +++ b/build-aux/ltmain.sh @@ -0,0 +1,11448 @@ +#! /usr/bin/env sh +## DO NOT EDIT - This file generated from ./build-aux/ltmain.in +## by inline-source v2019-02-19.15 + +# libtool (GNU libtool) 2.4.7 +# Provide generalized library-building support services. +# Written by Gordon Matzigkeit , 1996 + +# Copyright (C) 1996-2019, 2021-2022 Free Software Foundation, Inc. +# This is free software; see the source for copying conditions. There is NO +# warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + +# GNU Libtool is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# As a special exception to the GNU General Public License, +# if you distribute this file as part of a program or library that +# is built using GNU Libtool, you may include this file under the +# same distribution terms that you use for the rest of that program. +# +# GNU Libtool is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + + +PROGRAM=libtool +PACKAGE=libtool +VERSION="2.4.7 Debian-2.4.7-5" +package_revision=2.4.7 + + +## ------ ## +## Usage. ## +## ------ ## + +# Run './libtool --help' for help with using this script from the +# command line. + + +## ------------------------------- ## +## User overridable command paths. ## +## ------------------------------- ## + +# After configure completes, it has a better idea of some of the +# shell tools we need than the defaults used by the functions shared +# with bootstrap, so set those here where they can still be over- +# ridden by the user, but otherwise take precedence. + +: ${AUTOCONF="autoconf"} +: ${AUTOMAKE="automake"} + + +## -------------------------- ## +## Source external libraries. ## +## -------------------------- ## + +# Much of our low-level functionality needs to be sourced from external +# libraries, which are installed to $pkgauxdir. + +# Set a version string for this script. +scriptversion=2019-02-19.15; # UTC + +# General shell script boiler plate, and helper functions. +# Written by Gary V. Vaughan, 2004 + +# This is free software. There is NO warranty; not even for +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# Copyright (C) 2004-2019, 2021 Bootstrap Authors +# +# This file is dual licensed under the terms of the MIT license +# , and GPL version 2 or later +# . You must apply one of +# these licenses when using or redistributing this software or any of +# the files within it. See the URLs above, or the file `LICENSE` +# included in the Bootstrap distribution for the full license texts. + +# Please report bugs or propose patches to: +# + + +## ------ ## +## Usage. ## +## ------ ## + +# Evaluate this file near the top of your script to gain access to +# the functions and variables defined here: +# +# . `echo "$0" | ${SED-sed} 's|[^/]*$||'`/build-aux/funclib.sh +# +# If you need to override any of the default environment variable +# settings, do that before evaluating this file. + + +## -------------------- ## +## Shell normalisation. ## +## -------------------- ## + +# Some shells need a little help to be as Bourne compatible as possible. +# Before doing anything else, make sure all that help has been provided! + +DUALCASE=1; export DUALCASE # for MKS sh +if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then : + emulate sh + NULLCMD=: + # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which + # is contrary to our usage. Disable this feature. + alias -g '${1+"$@"}'='"$@"' + setopt NO_GLOB_SUBST +else + case `(set -o) 2>/dev/null` in *posix*) set -o posix ;; esac +fi + +# NLS nuisances: We save the old values in case they are required later. +_G_user_locale= +_G_safe_locale= +for _G_var in LANG LANGUAGE LC_ALL LC_CTYPE LC_COLLATE LC_MESSAGES +do + eval "if test set = \"\${$_G_var+set}\"; then + save_$_G_var=\$$_G_var + $_G_var=C + export $_G_var + _G_user_locale=\"$_G_var=\\\$save_\$_G_var; \$_G_user_locale\" + _G_safe_locale=\"$_G_var=C; \$_G_safe_locale\" + fi" +done +# These NLS vars are set unconditionally (bootstrap issue #24). Unset those +# in case the environment reset is needed later and the $save_* variant is not +# defined (see the code above). +LC_ALL=C +LANGUAGE=C +export LANGUAGE LC_ALL + +# Make sure IFS has a sensible default +sp=' ' +nl=' +' +IFS="$sp $nl" + +# There are apparently some retarded systems that use ';' as a PATH separator! +if test "${PATH_SEPARATOR+set}" != set; then + PATH_SEPARATOR=: + (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && { + (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 || + PATH_SEPARATOR=';' + } +fi + + +# func_unset VAR +# -------------- +# Portably unset VAR. +# In some shells, an 'unset VAR' statement leaves a non-zero return +# status if VAR is already unset, which might be problematic if the +# statement is used at the end of a function (thus poisoning its return +# value) or when 'set -e' is active (causing even a spurious abort of +# the script in this case). +func_unset () +{ + { eval $1=; (eval unset $1) >/dev/null 2>&1 && eval unset $1 || : ; } +} + + +# Make sure CDPATH doesn't cause `cd` commands to output the target dir. +func_unset CDPATH + +# Make sure ${,E,F}GREP behave sanely. +func_unset GREP_OPTIONS + + +## ------------------------- ## +## Locate command utilities. ## +## ------------------------- ## + + +# func_executable_p FILE +# ---------------------- +# Check that FILE is an executable regular file. +func_executable_p () +{ + test -f "$1" && test -x "$1" +} + + +# func_path_progs PROGS_LIST CHECK_FUNC [PATH] +# -------------------------------------------- +# Search for either a program that responds to --version with output +# containing "GNU", or else returned by CHECK_FUNC otherwise, by +# trying all the directories in PATH with each of the elements of +# PROGS_LIST. +# +# CHECK_FUNC should accept the path to a candidate program, and +# set $func_check_prog_result if it truncates its output less than +# $_G_path_prog_max characters. +func_path_progs () +{ + _G_progs_list=$1 + _G_check_func=$2 + _G_PATH=${3-"$PATH"} + + _G_path_prog_max=0 + _G_path_prog_found=false + _G_save_IFS=$IFS; IFS=${PATH_SEPARATOR-:} + for _G_dir in $_G_PATH; do + IFS=$_G_save_IFS + test -z "$_G_dir" && _G_dir=. + for _G_prog_name in $_G_progs_list; do + for _exeext in '' .EXE; do + _G_path_prog=$_G_dir/$_G_prog_name$_exeext + func_executable_p "$_G_path_prog" || continue + case `"$_G_path_prog" --version 2>&1` in + *GNU*) func_path_progs_result=$_G_path_prog _G_path_prog_found=: ;; + *) $_G_check_func $_G_path_prog + func_path_progs_result=$func_check_prog_result + ;; + esac + $_G_path_prog_found && break 3 + done + done + done + IFS=$_G_save_IFS + test -z "$func_path_progs_result" && { + echo "no acceptable sed could be found in \$PATH" >&2 + exit 1 + } +} + + +# We want to be able to use the functions in this file before configure +# has figured out where the best binaries are kept, which means we have +# to search for them ourselves - except when the results are already set +# where we skip the searches. + +# Unless the user overrides by setting SED, search the path for either GNU +# sed, or the sed that truncates its output the least. +test -z "$SED" && { + _G_sed_script=s/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb/ + for _G_i in 1 2 3 4 5 6 7; do + _G_sed_script=$_G_sed_script$nl$_G_sed_script + done + echo "$_G_sed_script" 2>/dev/null | sed 99q >conftest.sed + _G_sed_script= + + func_check_prog_sed () + { + _G_path_prog=$1 + + _G_count=0 + printf 0123456789 >conftest.in + while : + do + cat conftest.in conftest.in >conftest.tmp + mv conftest.tmp conftest.in + cp conftest.in conftest.nl + echo '' >> conftest.nl + "$_G_path_prog" -f conftest.sed conftest.out 2>/dev/null || break + diff conftest.out conftest.nl >/dev/null 2>&1 || break + _G_count=`expr $_G_count + 1` + if test "$_G_count" -gt "$_G_path_prog_max"; then + # Best one so far, save it but keep looking for a better one + func_check_prog_result=$_G_path_prog + _G_path_prog_max=$_G_count + fi + # 10*(2^10) chars as input seems more than enough + test 10 -lt "$_G_count" && break + done + rm -f conftest.in conftest.tmp conftest.nl conftest.out + } + + func_path_progs "sed gsed" func_check_prog_sed "$PATH:/usr/xpg4/bin" + rm -f conftest.sed + SED=$func_path_progs_result +} + + +# Unless the user overrides by setting GREP, search the path for either GNU +# grep, or the grep that truncates its output the least. +test -z "$GREP" && { + func_check_prog_grep () + { + _G_path_prog=$1 + + _G_count=0 + _G_path_prog_max=0 + printf 0123456789 >conftest.in + while : + do + cat conftest.in conftest.in >conftest.tmp + mv conftest.tmp conftest.in + cp conftest.in conftest.nl + echo 'GREP' >> conftest.nl + "$_G_path_prog" -e 'GREP$' -e '-(cannot match)-' conftest.out 2>/dev/null || break + diff conftest.out conftest.nl >/dev/null 2>&1 || break + _G_count=`expr $_G_count + 1` + if test "$_G_count" -gt "$_G_path_prog_max"; then + # Best one so far, save it but keep looking for a better one + func_check_prog_result=$_G_path_prog + _G_path_prog_max=$_G_count + fi + # 10*(2^10) chars as input seems more than enough + test 10 -lt "$_G_count" && break + done + rm -f conftest.in conftest.tmp conftest.nl conftest.out + } + + func_path_progs "grep ggrep" func_check_prog_grep "$PATH:/usr/xpg4/bin" + GREP=$func_path_progs_result +} + + +## ------------------------------- ## +## User overridable command paths. ## +## ------------------------------- ## + +# All uppercase variable names are used for environment variables. These +# variables can be overridden by the user before calling a script that +# uses them if a suitable command of that name is not already available +# in the command search PATH. + +: ${CP="cp -f"} +: ${ECHO="printf %s\n"} +: ${EGREP="$GREP -E"} +: ${FGREP="$GREP -F"} +: ${LN_S="ln -s"} +: ${MAKE="make"} +: ${MKDIR="mkdir"} +: ${MV="mv -f"} +: ${RM="rm -f"} +: ${SHELL="${CONFIG_SHELL-/bin/sh}"} + + +## -------------------- ## +## Useful sed snippets. ## +## -------------------- ## + +sed_dirname='s|/[^/]*$||' +sed_basename='s|^.*/||' + +# Sed substitution that helps us do robust quoting. It backslashifies +# metacharacters that are still active within double-quoted strings. +sed_quote_subst='s|\([`"$\\]\)|\\\1|g' + +# Same as above, but do not quote variable references. +sed_double_quote_subst='s/\(["`\\]\)/\\\1/g' + +# Sed substitution that turns a string into a regex matching for the +# string literally. +sed_make_literal_regex='s|[].[^$\\*\/]|\\&|g' + +# Sed substitution that converts a w32 file name or path +# that contains forward slashes, into one that contains +# (escaped) backslashes. A very naive implementation. +sed_naive_backslashify='s|\\\\*|\\|g;s|/|\\|g;s|\\|\\\\|g' + +# Re-'\' parameter expansions in output of sed_double_quote_subst that +# were '\'-ed in input to the same. If an odd number of '\' preceded a +# '$' in input to sed_double_quote_subst, that '$' was protected from +# expansion. Since each input '\' is now two '\'s, look for any number +# of runs of four '\'s followed by two '\'s and then a '$'. '\' that '$'. +_G_bs='\\' +_G_bs2='\\\\' +_G_bs4='\\\\\\\\' +_G_dollar='\$' +sed_double_backslash="\ + s/$_G_bs4/&\\ +/g + s/^$_G_bs2$_G_dollar/$_G_bs&/ + s/\\([^$_G_bs]\\)$_G_bs2$_G_dollar/\\1$_G_bs2$_G_bs$_G_dollar/g + s/\n//g" + +# require_check_ifs_backslash +# --------------------------- +# Check if we can use backslash as IFS='\' separator, and set +# $check_ifs_backshlash_broken to ':' or 'false'. +require_check_ifs_backslash=func_require_check_ifs_backslash +func_require_check_ifs_backslash () +{ + _G_save_IFS=$IFS + IFS='\' + _G_check_ifs_backshlash='a\\b' + for _G_i in $_G_check_ifs_backshlash + do + case $_G_i in + a) + check_ifs_backshlash_broken=false + ;; + '') + break + ;; + *) + check_ifs_backshlash_broken=: + break + ;; + esac + done + IFS=$_G_save_IFS + require_check_ifs_backslash=: +} + + +## ----------------- ## +## Global variables. ## +## ----------------- ## + +# Except for the global variables explicitly listed below, the following +# functions in the '^func_' namespace, and the '^require_' namespace +# variables initialised in the 'Resource management' section, sourcing +# this file will not pollute your global namespace with anything +# else. There's no portable way to scope variables in Bourne shell +# though, so actually running these functions will sometimes place +# results into a variable named after the function, and often use +# temporary variables in the '^_G_' namespace. If you are careful to +# avoid using those namespaces casually in your sourcing script, things +# should continue to work as you expect. And, of course, you can freely +# overwrite any of the functions or variables defined here before +# calling anything to customize them. + +EXIT_SUCCESS=0 +EXIT_FAILURE=1 +EXIT_MISMATCH=63 # $? = 63 is used to indicate version mismatch to missing. +EXIT_SKIP=77 # $? = 77 is used to indicate a skipped test to automake. + +# Allow overriding, eg assuming that you follow the convention of +# putting '$debug_cmd' at the start of all your functions, you can get +# bash to show function call trace with: +# +# debug_cmd='echo "${FUNCNAME[0]} $*" >&2' bash your-script-name +debug_cmd=${debug_cmd-":"} +exit_cmd=: + +# By convention, finish your script with: +# +# exit $exit_status +# +# so that you can set exit_status to non-zero if you want to indicate +# something went wrong during execution without actually bailing out at +# the point of failure. +exit_status=$EXIT_SUCCESS + +# Work around backward compatibility issue on IRIX 6.5. On IRIX 6.4+, sh +# is ksh but when the shell is invoked as "sh" and the current value of +# the _XPG environment variable is not equal to 1 (one), the special +# positional parameter $0, within a function call, is the name of the +# function. +progpath=$0 + +# The name of this program. +progname=`$ECHO "$progpath" |$SED "$sed_basename"` + +# Make sure we have an absolute progpath for reexecution: +case $progpath in + [\\/]*|[A-Za-z]:\\*) ;; + *[\\/]*) + progdir=`$ECHO "$progpath" |$SED "$sed_dirname"` + progdir=`cd "$progdir" && pwd` + progpath=$progdir/$progname + ;; + *) + _G_IFS=$IFS + IFS=${PATH_SEPARATOR-:} + for progdir in $PATH; do + IFS=$_G_IFS + test -x "$progdir/$progname" && break + done + IFS=$_G_IFS + test -n "$progdir" || progdir=`pwd` + progpath=$progdir/$progname + ;; +esac + + +## ----------------- ## +## Standard options. ## +## ----------------- ## + +# The following options affect the operation of the functions defined +# below, and should be set appropriately depending on run-time para- +# meters passed on the command line. + +opt_dry_run=false +opt_quiet=false +opt_verbose=false + +# Categories 'all' and 'none' are always available. Append any others +# you will pass as the first argument to func_warning from your own +# code. +warning_categories= + +# By default, display warnings according to 'opt_warning_types'. Set +# 'warning_func' to ':' to elide all warnings, or func_fatal_error to +# treat the next displayed warning as a fatal error. +warning_func=func_warn_and_continue + +# Set to 'all' to display all warnings, 'none' to suppress all +# warnings, or a space delimited list of some subset of +# 'warning_categories' to display only the listed warnings. +opt_warning_types=all + + +## -------------------- ## +## Resource management. ## +## -------------------- ## + +# This section contains definitions for functions that each ensure a +# particular resource (a file, or a non-empty configuration variable for +# example) is available, and if appropriate to extract default values +# from pertinent package files. Call them using their associated +# 'require_*' variable to ensure that they are executed, at most, once. +# +# It's entirely deliberate that calling these functions can set +# variables that don't obey the namespace limitations obeyed by the rest +# of this file, in order that that they be as useful as possible to +# callers. + + +# require_term_colors +# ------------------- +# Allow display of bold text on terminals that support it. +require_term_colors=func_require_term_colors +func_require_term_colors () +{ + $debug_cmd + + test -t 1 && { + # COLORTERM and USE_ANSI_COLORS environment variables take + # precedence, because most terminfo databases neglect to describe + # whether color sequences are supported. + test -n "${COLORTERM+set}" && : ${USE_ANSI_COLORS="1"} + + if test 1 = "$USE_ANSI_COLORS"; then + # Standard ANSI escape sequences + tc_reset='' + tc_bold=''; tc_standout='' + tc_red=''; tc_green='' + tc_blue=''; tc_cyan='' + else + # Otherwise trust the terminfo database after all. + test -n "`tput sgr0 2>/dev/null`" && { + tc_reset=`tput sgr0` + test -n "`tput bold 2>/dev/null`" && tc_bold=`tput bold` + tc_standout=$tc_bold + test -n "`tput smso 2>/dev/null`" && tc_standout=`tput smso` + test -n "`tput setaf 1 2>/dev/null`" && tc_red=`tput setaf 1` + test -n "`tput setaf 2 2>/dev/null`" && tc_green=`tput setaf 2` + test -n "`tput setaf 4 2>/dev/null`" && tc_blue=`tput setaf 4` + test -n "`tput setaf 5 2>/dev/null`" && tc_cyan=`tput setaf 5` + } + fi + } + + require_term_colors=: +} + + +## ----------------- ## +## Function library. ## +## ----------------- ## + +# This section contains a variety of useful functions to call in your +# scripts. Take note of the portable wrappers for features provided by +# some modern shells, which will fall back to slower equivalents on +# less featureful shells. + + +# func_append VAR VALUE +# --------------------- +# Append VALUE onto the existing contents of VAR. + + # We should try to minimise forks, especially on Windows where they are + # unreasonably slow, so skip the feature probes when bash or zsh are + # being used: + if test set = "${BASH_VERSION+set}${ZSH_VERSION+set}"; then + : ${_G_HAVE_ARITH_OP="yes"} + : ${_G_HAVE_XSI_OPS="yes"} + # The += operator was introduced in bash 3.1 + case $BASH_VERSION in + [12].* | 3.0 | 3.0*) ;; + *) + : ${_G_HAVE_PLUSEQ_OP="yes"} + ;; + esac + fi + + # _G_HAVE_PLUSEQ_OP + # Can be empty, in which case the shell is probed, "yes" if += is + # useable or anything else if it does not work. + test -z "$_G_HAVE_PLUSEQ_OP" \ + && (eval 'x=a; x+=" b"; test "a b" = "$x"') 2>/dev/null \ + && _G_HAVE_PLUSEQ_OP=yes + +if test yes = "$_G_HAVE_PLUSEQ_OP" +then + # This is an XSI compatible shell, allowing a faster implementation... + eval 'func_append () + { + $debug_cmd + + eval "$1+=\$2" + }' +else + # ...otherwise fall back to using expr, which is often a shell builtin. + func_append () + { + $debug_cmd + + eval "$1=\$$1\$2" + } +fi + + +# func_append_quoted VAR VALUE +# ---------------------------- +# Quote VALUE and append to the end of shell variable VAR, separated +# by a space. +if test yes = "$_G_HAVE_PLUSEQ_OP"; then + eval 'func_append_quoted () + { + $debug_cmd + + func_quote_arg pretty "$2" + eval "$1+=\\ \$func_quote_arg_result" + }' +else + func_append_quoted () + { + $debug_cmd + + func_quote_arg pretty "$2" + eval "$1=\$$1\\ \$func_quote_arg_result" + } +fi + + +# func_append_uniq VAR VALUE +# -------------------------- +# Append unique VALUE onto the existing contents of VAR, assuming +# entries are delimited by the first character of VALUE. For example: +# +# func_append_uniq options " --another-option option-argument" +# +# will only append to $options if " --another-option option-argument " +# is not already present somewhere in $options already (note spaces at +# each end implied by leading space in second argument). +func_append_uniq () +{ + $debug_cmd + + eval _G_current_value='`$ECHO $'$1'`' + _G_delim=`expr "$2" : '\(.\)'` + + case $_G_delim$_G_current_value$_G_delim in + *"$2$_G_delim"*) ;; + *) func_append "$@" ;; + esac +} + + +# func_arith TERM... +# ------------------ +# Set func_arith_result to the result of evaluating TERMs. + test -z "$_G_HAVE_ARITH_OP" \ + && (eval 'test 2 = $(( 1 + 1 ))') 2>/dev/null \ + && _G_HAVE_ARITH_OP=yes + +if test yes = "$_G_HAVE_ARITH_OP"; then + eval 'func_arith () + { + $debug_cmd + + func_arith_result=$(( $* )) + }' +else + func_arith () + { + $debug_cmd + + func_arith_result=`expr "$@"` + } +fi + + +# func_basename FILE +# ------------------ +# Set func_basename_result to FILE with everything up to and including +# the last / stripped. +if test yes = "$_G_HAVE_XSI_OPS"; then + # If this shell supports suffix pattern removal, then use it to avoid + # forking. Hide the definitions single quotes in case the shell chokes + # on unsupported syntax... + _b='func_basename_result=${1##*/}' + _d='case $1 in + */*) func_dirname_result=${1%/*}$2 ;; + * ) func_dirname_result=$3 ;; + esac' + +else + # ...otherwise fall back to using sed. + _b='func_basename_result=`$ECHO "$1" |$SED "$sed_basename"`' + _d='func_dirname_result=`$ECHO "$1" |$SED "$sed_dirname"` + if test "X$func_dirname_result" = "X$1"; then + func_dirname_result=$3 + else + func_append func_dirname_result "$2" + fi' +fi + +eval 'func_basename () +{ + $debug_cmd + + '"$_b"' +}' + + +# func_dirname FILE APPEND NONDIR_REPLACEMENT +# ------------------------------------------- +# Compute the dirname of FILE. If nonempty, add APPEND to the result, +# otherwise set result to NONDIR_REPLACEMENT. +eval 'func_dirname () +{ + $debug_cmd + + '"$_d"' +}' + + +# func_dirname_and_basename FILE APPEND NONDIR_REPLACEMENT +# -------------------------------------------------------- +# Perform func_basename and func_dirname in a single function +# call: +# dirname: Compute the dirname of FILE. If nonempty, +# add APPEND to the result, otherwise set result +# to NONDIR_REPLACEMENT. +# value returned in "$func_dirname_result" +# basename: Compute filename of FILE. +# value retuned in "$func_basename_result" +# For efficiency, we do not delegate to the functions above but instead +# duplicate the functionality here. +eval 'func_dirname_and_basename () +{ + $debug_cmd + + '"$_b"' + '"$_d"' +}' + + +# func_echo ARG... +# ---------------- +# Echo program name prefixed message. +func_echo () +{ + $debug_cmd + + _G_message=$* + + func_echo_IFS=$IFS + IFS=$nl + for _G_line in $_G_message; do + IFS=$func_echo_IFS + $ECHO "$progname: $_G_line" + done + IFS=$func_echo_IFS +} + + +# func_echo_all ARG... +# -------------------- +# Invoke $ECHO with all args, space-separated. +func_echo_all () +{ + $ECHO "$*" +} + + +# func_echo_infix_1 INFIX ARG... +# ------------------------------ +# Echo program name, followed by INFIX on the first line, with any +# additional lines not showing INFIX. +func_echo_infix_1 () +{ + $debug_cmd + + $require_term_colors + + _G_infix=$1; shift + _G_indent=$_G_infix + _G_prefix="$progname: $_G_infix: " + _G_message=$* + + # Strip color escape sequences before counting printable length + for _G_tc in "$tc_reset" "$tc_bold" "$tc_standout" "$tc_red" "$tc_green" "$tc_blue" "$tc_cyan" + do + test -n "$_G_tc" && { + _G_esc_tc=`$ECHO "$_G_tc" | $SED "$sed_make_literal_regex"` + _G_indent=`$ECHO "$_G_indent" | $SED "s|$_G_esc_tc||g"` + } + done + _G_indent="$progname: "`echo "$_G_indent" | $SED 's|.| |g'`" " ## exclude from sc_prohibit_nested_quotes + + func_echo_infix_1_IFS=$IFS + IFS=$nl + for _G_line in $_G_message; do + IFS=$func_echo_infix_1_IFS + $ECHO "$_G_prefix$tc_bold$_G_line$tc_reset" >&2 + _G_prefix=$_G_indent + done + IFS=$func_echo_infix_1_IFS +} + + +# func_error ARG... +# ----------------- +# Echo program name prefixed message to standard error. +func_error () +{ + $debug_cmd + + $require_term_colors + + func_echo_infix_1 " $tc_standout${tc_red}error$tc_reset" "$*" >&2 +} + + +# func_fatal_error ARG... +# ----------------------- +# Echo program name prefixed message to standard error, and exit. +func_fatal_error () +{ + $debug_cmd + + func_error "$*" + exit $EXIT_FAILURE +} + + +# func_grep EXPRESSION FILENAME +# ----------------------------- +# Check whether EXPRESSION matches any line of FILENAME, without output. +func_grep () +{ + $debug_cmd + + $GREP "$1" "$2" >/dev/null 2>&1 +} + + +# func_len STRING +# --------------- +# Set func_len_result to the length of STRING. STRING may not +# start with a hyphen. + test -z "$_G_HAVE_XSI_OPS" \ + && (eval 'x=a/b/c; + test 5aa/bb/cc = "${#x}${x%%/*}${x%/*}${x#*/}${x##*/}"') 2>/dev/null \ + && _G_HAVE_XSI_OPS=yes + +if test yes = "$_G_HAVE_XSI_OPS"; then + eval 'func_len () + { + $debug_cmd + + func_len_result=${#1} + }' +else + func_len () + { + $debug_cmd + + func_len_result=`expr "$1" : ".*" 2>/dev/null || echo $max_cmd_len` + } +fi + + +# func_mkdir_p DIRECTORY-PATH +# --------------------------- +# Make sure the entire path to DIRECTORY-PATH is available. +func_mkdir_p () +{ + $debug_cmd + + _G_directory_path=$1 + _G_dir_list= + + if test -n "$_G_directory_path" && test : != "$opt_dry_run"; then + + # Protect directory names starting with '-' + case $_G_directory_path in + -*) _G_directory_path=./$_G_directory_path ;; + esac + + # While some portion of DIR does not yet exist... + while test ! -d "$_G_directory_path"; do + # ...make a list in topmost first order. Use a colon delimited + # list incase some portion of path contains whitespace. + _G_dir_list=$_G_directory_path:$_G_dir_list + + # If the last portion added has no slash in it, the list is done + case $_G_directory_path in */*) ;; *) break ;; esac + + # ...otherwise throw away the child directory and loop + _G_directory_path=`$ECHO "$_G_directory_path" | $SED -e "$sed_dirname"` + done + _G_dir_list=`$ECHO "$_G_dir_list" | $SED 's|:*$||'` + + func_mkdir_p_IFS=$IFS; IFS=: + for _G_dir in $_G_dir_list; do + IFS=$func_mkdir_p_IFS + # mkdir can fail with a 'File exist' error if two processes + # try to create one of the directories concurrently. Don't + # stop in that case! + $MKDIR "$_G_dir" 2>/dev/null || : + done + IFS=$func_mkdir_p_IFS + + # Bail out if we (or some other process) failed to create a directory. + test -d "$_G_directory_path" || \ + func_fatal_error "Failed to create '$1'" + fi +} + + +# func_mktempdir [BASENAME] +# ------------------------- +# Make a temporary directory that won't clash with other running +# libtool processes, and avoids race conditions if possible. If +# given, BASENAME is the basename for that directory. +func_mktempdir () +{ + $debug_cmd + + _G_template=${TMPDIR-/tmp}/${1-$progname} + + if test : = "$opt_dry_run"; then + # Return a directory name, but don't create it in dry-run mode + _G_tmpdir=$_G_template-$$ + else + + # If mktemp works, use that first and foremost + _G_tmpdir=`mktemp -d "$_G_template-XXXXXXXX" 2>/dev/null` + + if test ! -d "$_G_tmpdir"; then + # Failing that, at least try and use $RANDOM to avoid a race + _G_tmpdir=$_G_template-${RANDOM-0}$$ + + func_mktempdir_umask=`umask` + umask 0077 + $MKDIR "$_G_tmpdir" + umask $func_mktempdir_umask + fi + + # If we're not in dry-run mode, bomb out on failure + test -d "$_G_tmpdir" || \ + func_fatal_error "cannot create temporary directory '$_G_tmpdir'" + fi + + $ECHO "$_G_tmpdir" +} + + +# func_normal_abspath PATH +# ------------------------ +# Remove doubled-up and trailing slashes, "." path components, +# and cancel out any ".." path components in PATH after making +# it an absolute path. +func_normal_abspath () +{ + $debug_cmd + + # These SED scripts presuppose an absolute path with a trailing slash. + _G_pathcar='s|^/\([^/]*\).*$|\1|' + _G_pathcdr='s|^/[^/]*||' + _G_removedotparts=':dotsl + s|/\./|/|g + t dotsl + s|/\.$|/|' + _G_collapseslashes='s|/\{1,\}|/|g' + _G_finalslash='s|/*$|/|' + + # Start from root dir and reassemble the path. + func_normal_abspath_result= + func_normal_abspath_tpath=$1 + func_normal_abspath_altnamespace= + case $func_normal_abspath_tpath in + "") + # Empty path, that just means $cwd. + func_stripname '' '/' "`pwd`" + func_normal_abspath_result=$func_stripname_result + return + ;; + # The next three entries are used to spot a run of precisely + # two leading slashes without using negated character classes; + # we take advantage of case's first-match behaviour. + ///*) + # Unusual form of absolute path, do nothing. + ;; + //*) + # Not necessarily an ordinary path; POSIX reserves leading '//' + # and for example Cygwin uses it to access remote file shares + # over CIFS/SMB, so we conserve a leading double slash if found. + func_normal_abspath_altnamespace=/ + ;; + /*) + # Absolute path, do nothing. + ;; + *) + # Relative path, prepend $cwd. + func_normal_abspath_tpath=`pwd`/$func_normal_abspath_tpath + ;; + esac + + # Cancel out all the simple stuff to save iterations. We also want + # the path to end with a slash for ease of parsing, so make sure + # there is one (and only one) here. + func_normal_abspath_tpath=`$ECHO "$func_normal_abspath_tpath" | $SED \ + -e "$_G_removedotparts" -e "$_G_collapseslashes" -e "$_G_finalslash"` + while :; do + # Processed it all yet? + if test / = "$func_normal_abspath_tpath"; then + # If we ascended to the root using ".." the result may be empty now. + if test -z "$func_normal_abspath_result"; then + func_normal_abspath_result=/ + fi + break + fi + func_normal_abspath_tcomponent=`$ECHO "$func_normal_abspath_tpath" | $SED \ + -e "$_G_pathcar"` + func_normal_abspath_tpath=`$ECHO "$func_normal_abspath_tpath" | $SED \ + -e "$_G_pathcdr"` + # Figure out what to do with it + case $func_normal_abspath_tcomponent in + "") + # Trailing empty path component, ignore it. + ;; + ..) + # Parent dir; strip last assembled component from result. + func_dirname "$func_normal_abspath_result" + func_normal_abspath_result=$func_dirname_result + ;; + *) + # Actual path component, append it. + func_append func_normal_abspath_result "/$func_normal_abspath_tcomponent" + ;; + esac + done + # Restore leading double-slash if one was found on entry. + func_normal_abspath_result=$func_normal_abspath_altnamespace$func_normal_abspath_result +} + + +# func_notquiet ARG... +# -------------------- +# Echo program name prefixed message only when not in quiet mode. +func_notquiet () +{ + $debug_cmd + + $opt_quiet || func_echo ${1+"$@"} + + # A bug in bash halts the script if the last line of a function + # fails when set -e is in force, so we need another command to + # work around that: + : +} + + +# func_relative_path SRCDIR DSTDIR +# -------------------------------- +# Set func_relative_path_result to the relative path from SRCDIR to DSTDIR. +func_relative_path () +{ + $debug_cmd + + func_relative_path_result= + func_normal_abspath "$1" + func_relative_path_tlibdir=$func_normal_abspath_result + func_normal_abspath "$2" + func_relative_path_tbindir=$func_normal_abspath_result + + # Ascend the tree starting from libdir + while :; do + # check if we have found a prefix of bindir + case $func_relative_path_tbindir in + $func_relative_path_tlibdir) + # found an exact match + func_relative_path_tcancelled= + break + ;; + $func_relative_path_tlibdir*) + # found a matching prefix + func_stripname "$func_relative_path_tlibdir" '' "$func_relative_path_tbindir" + func_relative_path_tcancelled=$func_stripname_result + if test -z "$func_relative_path_result"; then + func_relative_path_result=. + fi + break + ;; + *) + func_dirname $func_relative_path_tlibdir + func_relative_path_tlibdir=$func_dirname_result + if test -z "$func_relative_path_tlibdir"; then + # Have to descend all the way to the root! + func_relative_path_result=../$func_relative_path_result + func_relative_path_tcancelled=$func_relative_path_tbindir + break + fi + func_relative_path_result=../$func_relative_path_result + ;; + esac + done + + # Now calculate path; take care to avoid doubling-up slashes. + func_stripname '' '/' "$func_relative_path_result" + func_relative_path_result=$func_stripname_result + func_stripname '/' '/' "$func_relative_path_tcancelled" + if test -n "$func_stripname_result"; then + func_append func_relative_path_result "/$func_stripname_result" + fi + + # Normalisation. If bindir is libdir, return '.' else relative path. + if test -n "$func_relative_path_result"; then + func_stripname './' '' "$func_relative_path_result" + func_relative_path_result=$func_stripname_result + fi + + test -n "$func_relative_path_result" || func_relative_path_result=. + + : +} + + +# func_quote_portable EVAL ARG +# ---------------------------- +# Internal function to portably implement func_quote_arg. Note that we still +# keep attention to performance here so we as much as possible try to avoid +# calling sed binary (so far O(N) complexity as long as func_append is O(1)). +func_quote_portable () +{ + $debug_cmd + + $require_check_ifs_backslash + + func_quote_portable_result=$2 + + # one-time-loop (easy break) + while true + do + if $1; then + func_quote_portable_result=`$ECHO "$2" | $SED \ + -e "$sed_double_quote_subst" -e "$sed_double_backslash"` + break + fi + + # Quote for eval. + case $func_quote_portable_result in + *[\\\`\"\$]*) + # Fallback to sed for $func_check_bs_ifs_broken=:, or when the string + # contains the shell wildcard characters. + case $check_ifs_backshlash_broken$func_quote_portable_result in + :*|*[\[\*\?]*) + func_quote_portable_result=`$ECHO "$func_quote_portable_result" \ + | $SED "$sed_quote_subst"` + break + ;; + esac + + func_quote_portable_old_IFS=$IFS + for _G_char in '\' '`' '"' '$' + do + # STATE($1) PREV($2) SEPARATOR($3) + set start "" "" + func_quote_portable_result=dummy"$_G_char$func_quote_portable_result$_G_char"dummy + IFS=$_G_char + for _G_part in $func_quote_portable_result + do + case $1 in + quote) + func_append func_quote_portable_result "$3$2" + set quote "$_G_part" "\\$_G_char" + ;; + start) + set first "" "" + func_quote_portable_result= + ;; + first) + set quote "$_G_part" "" + ;; + esac + done + done + IFS=$func_quote_portable_old_IFS + ;; + *) ;; + esac + break + done + + func_quote_portable_unquoted_result=$func_quote_portable_result + case $func_quote_portable_result in + # double-quote args containing shell metacharacters to delay + # word splitting, command substitution and variable expansion + # for a subsequent eval. + # many bourne shells cannot handle close brackets correctly + # in scan sets, so we specify it separately. + *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*|"") + func_quote_portable_result=\"$func_quote_portable_result\" + ;; + esac +} + + +# func_quotefast_eval ARG +# ----------------------- +# Quote one ARG (internal). This is equivalent to 'func_quote_arg eval ARG', +# but optimized for speed. Result is stored in $func_quotefast_eval. +if test xyes = `(x=; printf -v x %q yes; echo x"$x") 2>/dev/null`; then + printf -v _GL_test_printf_tilde %q '~' + if test '\~' = "$_GL_test_printf_tilde"; then + func_quotefast_eval () + { + printf -v func_quotefast_eval_result %q "$1" + } + else + # Broken older Bash implementations. Make those faster too if possible. + func_quotefast_eval () + { + case $1 in + '~'*) + func_quote_portable false "$1" + func_quotefast_eval_result=$func_quote_portable_result + ;; + *) + printf -v func_quotefast_eval_result %q "$1" + ;; + esac + } + fi +else + func_quotefast_eval () + { + func_quote_portable false "$1" + func_quotefast_eval_result=$func_quote_portable_result + } +fi + + +# func_quote_arg MODEs ARG +# ------------------------ +# Quote one ARG to be evaled later. MODEs argument may contain zero or more +# specifiers listed below separated by ',' character. This function returns two +# values: +# i) func_quote_arg_result +# double-quoted (when needed), suitable for a subsequent eval +# ii) func_quote_arg_unquoted_result +# has all characters that are still active within double +# quotes backslashified. Available only if 'unquoted' is specified. +# +# Available modes: +# ---------------- +# 'eval' (default) +# - escape shell special characters +# 'expand' +# - the same as 'eval'; but do not quote variable references +# 'pretty' +# - request aesthetic output, i.e. '"a b"' instead of 'a\ b'. This might +# be used later in func_quote to get output like: 'echo "a b"' instead +# of 'echo a\ b'. This is slower than default on some shells. +# 'unquoted' +# - produce also $func_quote_arg_unquoted_result which does not contain +# wrapping double-quotes. +# +# Examples for 'func_quote_arg pretty,unquoted string': +# +# string | *_result | *_unquoted_result +# ------------+-----------------------+------------------- +# " | \" | \" +# a b | "a b" | a b +# "a b" | "\"a b\"" | \"a b\" +# * | "*" | * +# z="${x-$y}" | "z=\"\${x-\$y}\"" | z=\"\${x-\$y}\" +# +# Examples for 'func_quote_arg pretty,unquoted,expand string': +# +# string | *_result | *_unquoted_result +# --------------+---------------------+-------------------- +# z="${x-$y}" | "z=\"${x-$y}\"" | z=\"${x-$y}\" +func_quote_arg () +{ + _G_quote_expand=false + case ,$1, in + *,expand,*) + _G_quote_expand=: + ;; + esac + + case ,$1, in + *,pretty,*|*,expand,*|*,unquoted,*) + func_quote_portable $_G_quote_expand "$2" + func_quote_arg_result=$func_quote_portable_result + func_quote_arg_unquoted_result=$func_quote_portable_unquoted_result + ;; + *) + # Faster quote-for-eval for some shells. + func_quotefast_eval "$2" + func_quote_arg_result=$func_quotefast_eval_result + ;; + esac +} + + +# func_quote MODEs ARGs... +# ------------------------ +# Quote all ARGs to be evaled later and join them into single command. See +# func_quote_arg's description for more info. +func_quote () +{ + $debug_cmd + _G_func_quote_mode=$1 ; shift + func_quote_result= + while test 0 -lt $#; do + func_quote_arg "$_G_func_quote_mode" "$1" + if test -n "$func_quote_result"; then + func_append func_quote_result " $func_quote_arg_result" + else + func_append func_quote_result "$func_quote_arg_result" + fi + shift + done +} + + +# func_stripname PREFIX SUFFIX NAME +# --------------------------------- +# strip PREFIX and SUFFIX from NAME, and store in func_stripname_result. +# PREFIX and SUFFIX must not contain globbing or regex special +# characters, hashes, percent signs, but SUFFIX may contain a leading +# dot (in which case that matches only a dot). +if test yes = "$_G_HAVE_XSI_OPS"; then + eval 'func_stripname () + { + $debug_cmd + + # pdksh 5.2.14 does not do ${X%$Y} correctly if both X and Y are + # positional parameters, so assign one to ordinary variable first. + func_stripname_result=$3 + func_stripname_result=${func_stripname_result#"$1"} + func_stripname_result=${func_stripname_result%"$2"} + }' +else + func_stripname () + { + $debug_cmd + + case $2 in + .*) func_stripname_result=`$ECHO "$3" | $SED -e "s%^$1%%" -e "s%\\\\$2\$%%"`;; + *) func_stripname_result=`$ECHO "$3" | $SED -e "s%^$1%%" -e "s%$2\$%%"`;; + esac + } +fi + + +# func_show_eval CMD [FAIL_EXP] +# ----------------------------- +# Unless opt_quiet is true, then output CMD. Then, if opt_dryrun is +# not true, evaluate CMD. If the evaluation of CMD fails, and FAIL_EXP +# is given, then evaluate it. +func_show_eval () +{ + $debug_cmd + + _G_cmd=$1 + _G_fail_exp=${2-':'} + + func_quote_arg pretty,expand "$_G_cmd" + eval "func_notquiet $func_quote_arg_result" + + $opt_dry_run || { + eval "$_G_cmd" + _G_status=$? + if test 0 -ne "$_G_status"; then + eval "(exit $_G_status); $_G_fail_exp" + fi + } +} + + +# func_show_eval_locale CMD [FAIL_EXP] +# ------------------------------------ +# Unless opt_quiet is true, then output CMD. Then, if opt_dryrun is +# not true, evaluate CMD. If the evaluation of CMD fails, and FAIL_EXP +# is given, then evaluate it. Use the saved locale for evaluation. +func_show_eval_locale () +{ + $debug_cmd + + _G_cmd=$1 + _G_fail_exp=${2-':'} + + $opt_quiet || { + func_quote_arg expand,pretty "$_G_cmd" + eval "func_echo $func_quote_arg_result" + } + + $opt_dry_run || { + eval "$_G_user_locale + $_G_cmd" + _G_status=$? + eval "$_G_safe_locale" + if test 0 -ne "$_G_status"; then + eval "(exit $_G_status); $_G_fail_exp" + fi + } +} + + +# func_tr_sh +# ---------- +# Turn $1 into a string suitable for a shell variable name. +# Result is stored in $func_tr_sh_result. All characters +# not in the set a-zA-Z0-9_ are replaced with '_'. Further, +# if $1 begins with a digit, a '_' is prepended as well. +func_tr_sh () +{ + $debug_cmd + + case $1 in + [0-9]* | *[!a-zA-Z0-9_]*) + func_tr_sh_result=`$ECHO "$1" | $SED -e 's/^\([0-9]\)/_\1/' -e 's/[^a-zA-Z0-9_]/_/g'` + ;; + * ) + func_tr_sh_result=$1 + ;; + esac +} + + +# func_verbose ARG... +# ------------------- +# Echo program name prefixed message in verbose mode only. +func_verbose () +{ + $debug_cmd + + $opt_verbose && func_echo "$*" + + : +} + + +# func_warn_and_continue ARG... +# ----------------------------- +# Echo program name prefixed warning message to standard error. +func_warn_and_continue () +{ + $debug_cmd + + $require_term_colors + + func_echo_infix_1 "${tc_red}warning$tc_reset" "$*" >&2 +} + + +# func_warning CATEGORY ARG... +# ---------------------------- +# Echo program name prefixed warning message to standard error. Warning +# messages can be filtered according to CATEGORY, where this function +# elides messages where CATEGORY is not listed in the global variable +# 'opt_warning_types'. +func_warning () +{ + $debug_cmd + + # CATEGORY must be in the warning_categories list! + case " $warning_categories " in + *" $1 "*) ;; + *) func_internal_error "invalid warning category '$1'" ;; + esac + + _G_category=$1 + shift + + case " $opt_warning_types " in + *" $_G_category "*) $warning_func ${1+"$@"} ;; + esac +} + + +# func_sort_ver VER1 VER2 +# ----------------------- +# 'sort -V' is not generally available. +# Note this deviates from the version comparison in automake +# in that it treats 1.5 < 1.5.0, and treats 1.4.4a < 1.4-p3a +# but this should suffice as we won't be specifying old +# version formats or redundant trailing .0 in bootstrap.conf. +# If we did want full compatibility then we should probably +# use m4_version_compare from autoconf. +func_sort_ver () +{ + $debug_cmd + + printf '%s\n%s\n' "$1" "$2" \ + | sort -t. -k 1,1n -k 2,2n -k 3,3n -k 4,4n -k 5,5n -k 6,6n -k 7,7n -k 8,8n -k 9,9n +} + +# func_lt_ver PREV CURR +# --------------------- +# Return true if PREV and CURR are in the correct order according to +# func_sort_ver, otherwise false. Use it like this: +# +# func_lt_ver "$prev_ver" "$proposed_ver" || func_fatal_error "..." +func_lt_ver () +{ + $debug_cmd + + test "x$1" = x`func_sort_ver "$1" "$2" | $SED 1q` +} + + +# Local variables: +# mode: shell-script +# sh-indentation: 2 +# eval: (add-hook 'before-save-hook 'time-stamp) +# time-stamp-pattern: "10/scriptversion=%:y-%02m-%02d.%02H; # UTC" +# time-stamp-time-zone: "UTC" +# End: +#! /bin/sh + +# A portable, pluggable option parser for Bourne shell. +# Written by Gary V. Vaughan, 2010 + +# This is free software. There is NO warranty; not even for +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# Copyright (C) 2010-2019, 2021 Bootstrap Authors +# +# This file is dual licensed under the terms of the MIT license +# , and GPL version 2 or later +# . You must apply one of +# these licenses when using or redistributing this software or any of +# the files within it. See the URLs above, or the file `LICENSE` +# included in the Bootstrap distribution for the full license texts. + +# Please report bugs or propose patches to: +# + +# Set a version string for this script. +scriptversion=2019-02-19.15; # UTC + + +## ------ ## +## Usage. ## +## ------ ## + +# This file is a library for parsing options in your shell scripts along +# with assorted other useful supporting features that you can make use +# of too. +# +# For the simplest scripts you might need only: +# +# #!/bin/sh +# . relative/path/to/funclib.sh +# . relative/path/to/options-parser +# scriptversion=1.0 +# func_options ${1+"$@"} +# eval set dummy "$func_options_result"; shift +# ...rest of your script... +# +# In order for the '--version' option to work, you will need to have a +# suitably formatted comment like the one at the top of this file +# starting with '# Written by ' and ending with '# Copyright'. +# +# For '-h' and '--help' to work, you will also need a one line +# description of your script's purpose in a comment directly above the +# '# Written by ' line, like the one at the top of this file. +# +# The default options also support '--debug', which will turn on shell +# execution tracing (see the comment above debug_cmd below for another +# use), and '--verbose' and the func_verbose function to allow your script +# to display verbose messages only when your user has specified +# '--verbose'. +# +# After sourcing this file, you can plug in processing for additional +# options by amending the variables from the 'Configuration' section +# below, and following the instructions in the 'Option parsing' +# section further down. + +## -------------- ## +## Configuration. ## +## -------------- ## + +# You should override these variables in your script after sourcing this +# file so that they reflect the customisations you have added to the +# option parser. + +# The usage line for option parsing errors and the start of '-h' and +# '--help' output messages. You can embed shell variables for delayed +# expansion at the time the message is displayed, but you will need to +# quote other shell meta-characters carefully to prevent them being +# expanded when the contents are evaled. +usage='$progpath [OPTION]...' + +# Short help message in response to '-h' and '--help'. Add to this or +# override it after sourcing this library to reflect the full set of +# options your script accepts. +usage_message="\ + --debug enable verbose shell tracing + -W, --warnings=CATEGORY + report the warnings falling in CATEGORY [all] + -v, --verbose verbosely report processing + --version print version information and exit + -h, --help print short or long help message and exit +" + +# Additional text appended to 'usage_message' in response to '--help'. +long_help_message=" +Warning categories include: + 'all' show all warnings + 'none' turn off all the warnings + 'error' warnings are treated as fatal errors" + +# Help message printed before fatal option parsing errors. +fatal_help="Try '\$progname --help' for more information." + + + +## ------------------------- ## +## Hook function management. ## +## ------------------------- ## + +# This section contains functions for adding, removing, and running hooks +# in the main code. A hook is just a list of function names that can be +# run in order later on. + +# func_hookable FUNC_NAME +# ----------------------- +# Declare that FUNC_NAME will run hooks added with +# 'func_add_hook FUNC_NAME ...'. +func_hookable () +{ + $debug_cmd + + func_append hookable_fns " $1" +} + + +# func_add_hook FUNC_NAME HOOK_FUNC +# --------------------------------- +# Request that FUNC_NAME call HOOK_FUNC before it returns. FUNC_NAME must +# first have been declared "hookable" by a call to 'func_hookable'. +func_add_hook () +{ + $debug_cmd + + case " $hookable_fns " in + *" $1 "*) ;; + *) func_fatal_error "'$1' does not accept hook functions." ;; + esac + + eval func_append ${1}_hooks '" $2"' +} + + +# func_remove_hook FUNC_NAME HOOK_FUNC +# ------------------------------------ +# Remove HOOK_FUNC from the list of hook functions to be called by +# FUNC_NAME. +func_remove_hook () +{ + $debug_cmd + + eval ${1}_hooks='`$ECHO "\$'$1'_hooks" |$SED "s| '$2'||"`' +} + + +# func_propagate_result FUNC_NAME_A FUNC_NAME_B +# --------------------------------------------- +# If the *_result variable of FUNC_NAME_A _is set_, assign its value to +# *_result variable of FUNC_NAME_B. +func_propagate_result () +{ + $debug_cmd + + func_propagate_result_result=: + if eval "test \"\${${1}_result+set}\" = set" + then + eval "${2}_result=\$${1}_result" + else + func_propagate_result_result=false + fi +} + + +# func_run_hooks FUNC_NAME [ARG]... +# --------------------------------- +# Run all hook functions registered to FUNC_NAME. +# It's assumed that the list of hook functions contains nothing more +# than a whitespace-delimited list of legal shell function names, and +# no effort is wasted trying to catch shell meta-characters or preserve +# whitespace. +func_run_hooks () +{ + $debug_cmd + + _G_rc_run_hooks=false + + case " $hookable_fns " in + *" $1 "*) ;; + *) func_fatal_error "'$1' does not support hook functions." ;; + esac + + eval _G_hook_fns=\$$1_hooks; shift + + for _G_hook in $_G_hook_fns; do + func_unset "${_G_hook}_result" + eval $_G_hook '${1+"$@"}' + func_propagate_result $_G_hook func_run_hooks + if $func_propagate_result_result; then + eval set dummy "$func_run_hooks_result"; shift + fi + done +} + + + +## --------------- ## +## Option parsing. ## +## --------------- ## + +# In order to add your own option parsing hooks, you must accept the +# full positional parameter list from your hook function. You may remove +# or edit any options that you action, and then pass back the remaining +# unprocessed options in '_result', escaped +# suitably for 'eval'. +# +# The '_result' variable is automatically unset +# before your hook gets called; for best performance, only set the +# *_result variable when necessary (i.e. don't call the 'func_quote' +# function unnecessarily because it can be an expensive operation on some +# machines). +# +# Like this: +# +# my_options_prep () +# { +# $debug_cmd +# +# # Extend the existing usage message. +# usage_message=$usage_message' +# -s, --silent don'\''t print informational messages +# ' +# # No change in '$@' (ignored completely by this hook). Leave +# # my_options_prep_result variable intact. +# } +# func_add_hook func_options_prep my_options_prep +# +# +# my_silent_option () +# { +# $debug_cmd +# +# args_changed=false +# +# # Note that, for efficiency, we parse as many options as we can +# # recognise in a loop before passing the remainder back to the +# # caller on the first unrecognised argument we encounter. +# while test $# -gt 0; do +# opt=$1; shift +# case $opt in +# --silent|-s) opt_silent=: +# args_changed=: +# ;; +# # Separate non-argument short options: +# -s*) func_split_short_opt "$_G_opt" +# set dummy "$func_split_short_opt_name" \ +# "-$func_split_short_opt_arg" ${1+"$@"} +# shift +# args_changed=: +# ;; +# *) # Make sure the first unrecognised option "$_G_opt" +# # is added back to "$@" in case we need it later, +# # if $args_changed was set to 'true'. +# set dummy "$_G_opt" ${1+"$@"}; shift; break ;; +# esac +# done +# +# # Only call 'func_quote' here if we processed at least one argument. +# if $args_changed; then +# func_quote eval ${1+"$@"} +# my_silent_option_result=$func_quote_result +# fi +# } +# func_add_hook func_parse_options my_silent_option +# +# +# my_option_validation () +# { +# $debug_cmd +# +# $opt_silent && $opt_verbose && func_fatal_help "\ +# '--silent' and '--verbose' options are mutually exclusive." +# } +# func_add_hook func_validate_options my_option_validation +# +# You'll also need to manually amend $usage_message to reflect the extra +# options you parse. It's preferable to append if you can, so that +# multiple option parsing hooks can be added safely. + + +# func_options_finish [ARG]... +# ---------------------------- +# Finishing the option parse loop (call 'func_options' hooks ATM). +func_options_finish () +{ + $debug_cmd + + func_run_hooks func_options ${1+"$@"} + func_propagate_result func_run_hooks func_options_finish +} + + +# func_options [ARG]... +# --------------------- +# All the functions called inside func_options are hookable. See the +# individual implementations for details. +func_hookable func_options +func_options () +{ + $debug_cmd + + _G_options_quoted=false + + for my_func in options_prep parse_options validate_options options_finish + do + func_unset func_${my_func}_result + func_unset func_run_hooks_result + eval func_$my_func '${1+"$@"}' + func_propagate_result func_$my_func func_options + if $func_propagate_result_result; then + eval set dummy "$func_options_result"; shift + _G_options_quoted=: + fi + done + + $_G_options_quoted || { + # As we (func_options) are top-level options-parser function and + # nobody quoted "$@" for us yet, we need to do it explicitly for + # caller. + func_quote eval ${1+"$@"} + func_options_result=$func_quote_result + } +} + + +# func_options_prep [ARG]... +# -------------------------- +# All initialisations required before starting the option parse loop. +# Note that when calling hook functions, we pass through the list of +# positional parameters. If a hook function modifies that list, and +# needs to propagate that back to rest of this script, then the complete +# modified list must be put in 'func_run_hooks_result' before returning. +func_hookable func_options_prep +func_options_prep () +{ + $debug_cmd + + # Option defaults: + opt_verbose=false + opt_warning_types= + + func_run_hooks func_options_prep ${1+"$@"} + func_propagate_result func_run_hooks func_options_prep +} + + +# func_parse_options [ARG]... +# --------------------------- +# The main option parsing loop. +func_hookable func_parse_options +func_parse_options () +{ + $debug_cmd + + _G_parse_options_requote=false + # this just eases exit handling + while test $# -gt 0; do + # Defer to hook functions for initial option parsing, so they + # get priority in the event of reusing an option name. + func_run_hooks func_parse_options ${1+"$@"} + func_propagate_result func_run_hooks func_parse_options + if $func_propagate_result_result; then + eval set dummy "$func_parse_options_result"; shift + # Even though we may have changed "$@", we passed the "$@" array + # down into the hook and it quoted it for us (because we are in + # this if-branch). No need to quote it again. + _G_parse_options_requote=false + fi + + # Break out of the loop if we already parsed every option. + test $# -gt 0 || break + + # We expect that one of the options parsed in this function matches + # and thus we remove _G_opt from "$@" and need to re-quote. + _G_match_parse_options=: + _G_opt=$1 + shift + case $_G_opt in + --debug|-x) debug_cmd='set -x' + func_echo "enabling shell trace mode" >&2 + $debug_cmd + ;; + + --no-warnings|--no-warning|--no-warn) + set dummy --warnings none ${1+"$@"} + shift + ;; + + --warnings|--warning|-W) + if test $# = 0 && func_missing_arg $_G_opt; then + _G_parse_options_requote=: + break + fi + case " $warning_categories $1" in + *" $1 "*) + # trailing space prevents matching last $1 above + func_append_uniq opt_warning_types " $1" + ;; + *all) + opt_warning_types=$warning_categories + ;; + *none) + opt_warning_types=none + warning_func=: + ;; + *error) + opt_warning_types=$warning_categories + warning_func=func_fatal_error + ;; + *) + func_fatal_error \ + "unsupported warning category: '$1'" + ;; + esac + shift + ;; + + --verbose|-v) opt_verbose=: ;; + --version) func_version ;; + -\?|-h) func_usage ;; + --help) func_help ;; + + # Separate optargs to long options (plugins may need this): + --*=*) func_split_equals "$_G_opt" + set dummy "$func_split_equals_lhs" \ + "$func_split_equals_rhs" ${1+"$@"} + shift + ;; + + # Separate optargs to short options: + -W*) + func_split_short_opt "$_G_opt" + set dummy "$func_split_short_opt_name" \ + "$func_split_short_opt_arg" ${1+"$@"} + shift + ;; + + # Separate non-argument short options: + -\?*|-h*|-v*|-x*) + func_split_short_opt "$_G_opt" + set dummy "$func_split_short_opt_name" \ + "-$func_split_short_opt_arg" ${1+"$@"} + shift + ;; + + --) _G_parse_options_requote=: ; break ;; + -*) func_fatal_help "unrecognised option: '$_G_opt'" ;; + *) set dummy "$_G_opt" ${1+"$@"}; shift + _G_match_parse_options=false + break + ;; + esac + + if $_G_match_parse_options; then + _G_parse_options_requote=: + fi + done + + if $_G_parse_options_requote; then + # save modified positional parameters for caller + func_quote eval ${1+"$@"} + func_parse_options_result=$func_quote_result + fi +} + + +# func_validate_options [ARG]... +# ------------------------------ +# Perform any sanity checks on option settings and/or unconsumed +# arguments. +func_hookable func_validate_options +func_validate_options () +{ + $debug_cmd + + # Display all warnings if -W was not given. + test -n "$opt_warning_types" || opt_warning_types=" $warning_categories" + + func_run_hooks func_validate_options ${1+"$@"} + func_propagate_result func_run_hooks func_validate_options + + # Bail if the options were screwed! + $exit_cmd $EXIT_FAILURE +} + + + +## ----------------- ## +## Helper functions. ## +## ----------------- ## + +# This section contains the helper functions used by the rest of the +# hookable option parser framework in ascii-betical order. + + +# func_fatal_help ARG... +# ---------------------- +# Echo program name prefixed message to standard error, followed by +# a help hint, and exit. +func_fatal_help () +{ + $debug_cmd + + eval \$ECHO \""Usage: $usage"\" + eval \$ECHO \""$fatal_help"\" + func_error ${1+"$@"} + exit $EXIT_FAILURE +} + + +# func_help +# --------- +# Echo long help message to standard output and exit. +func_help () +{ + $debug_cmd + + func_usage_message + $ECHO "$long_help_message" + exit 0 +} + + +# func_missing_arg ARGNAME +# ------------------------ +# Echo program name prefixed message to standard error and set global +# exit_cmd. +func_missing_arg () +{ + $debug_cmd + + func_error "Missing argument for '$1'." + exit_cmd=exit +} + + +# func_split_equals STRING +# ------------------------ +# Set func_split_equals_lhs and func_split_equals_rhs shell variables +# after splitting STRING at the '=' sign. +test -z "$_G_HAVE_XSI_OPS" \ + && (eval 'x=a/b/c; + test 5aa/bb/cc = "${#x}${x%%/*}${x%/*}${x#*/}${x##*/}"') 2>/dev/null \ + && _G_HAVE_XSI_OPS=yes + +if test yes = "$_G_HAVE_XSI_OPS" +then + # This is an XSI compatible shell, allowing a faster implementation... + eval 'func_split_equals () + { + $debug_cmd + + func_split_equals_lhs=${1%%=*} + func_split_equals_rhs=${1#*=} + if test "x$func_split_equals_lhs" = "x$1"; then + func_split_equals_rhs= + fi + }' +else + # ...otherwise fall back to using expr, which is often a shell builtin. + func_split_equals () + { + $debug_cmd + + func_split_equals_lhs=`expr "x$1" : 'x\([^=]*\)'` + func_split_equals_rhs= + test "x$func_split_equals_lhs=" = "x$1" \ + || func_split_equals_rhs=`expr "x$1" : 'x[^=]*=\(.*\)$'` + } +fi #func_split_equals + + +# func_split_short_opt SHORTOPT +# ----------------------------- +# Set func_split_short_opt_name and func_split_short_opt_arg shell +# variables after splitting SHORTOPT after the 2nd character. +if test yes = "$_G_HAVE_XSI_OPS" +then + # This is an XSI compatible shell, allowing a faster implementation... + eval 'func_split_short_opt () + { + $debug_cmd + + func_split_short_opt_arg=${1#??} + func_split_short_opt_name=${1%"$func_split_short_opt_arg"} + }' +else + # ...otherwise fall back to using expr, which is often a shell builtin. + func_split_short_opt () + { + $debug_cmd + + func_split_short_opt_name=`expr "x$1" : 'x\(-.\)'` + func_split_short_opt_arg=`expr "x$1" : 'x-.\(.*\)$'` + } +fi #func_split_short_opt + + +# func_usage +# ---------- +# Echo short help message to standard output and exit. +func_usage () +{ + $debug_cmd + + func_usage_message + $ECHO "Run '$progname --help |${PAGER-more}' for full usage" + exit 0 +} + + +# func_usage_message +# ------------------ +# Echo short help message to standard output. +func_usage_message () +{ + $debug_cmd + + eval \$ECHO \""Usage: $usage"\" + echo + $SED -n 's|^# || + /^Written by/{ + x;p;x + } + h + /^Written by/q' < "$progpath" + echo + eval \$ECHO \""$usage_message"\" +} + + +# func_version +# ------------ +# Echo version message to standard output and exit. +# The version message is extracted from the calling file's header +# comments, with leading '# ' stripped: +# 1. First display the progname and version +# 2. Followed by the header comment line matching /^# Written by / +# 3. Then a blank line followed by the first following line matching +# /^# Copyright / +# 4. Immediately followed by any lines between the previous matches, +# except lines preceding the intervening completely blank line. +# For example, see the header comments of this file. +func_version () +{ + $debug_cmd + + printf '%s\n' "$progname $scriptversion" + $SED -n ' + /^# Written by /!b + s|^# ||; p; n + + :fwd2blnk + /./ { + n + b fwd2blnk + } + p; n + + :holdwrnt + s|^# || + s|^# *$|| + /^Copyright /!{ + /./H + n + b holdwrnt + } + + s|\((C)\)[ 0-9,-]*[ ,-]\([1-9][0-9]* \)|\1 \2| + G + s|\(\n\)\n*|\1|g + p; q' < "$progpath" + + exit $? +} + + +# Local variables: +# mode: shell-script +# sh-indentation: 2 +# eval: (add-hook 'before-save-hook 'time-stamp) +# time-stamp-pattern: "30/scriptversion=%:y-%02m-%02d.%02H; # UTC" +# time-stamp-time-zone: "UTC" +# End: + +# Set a version string. +scriptversion='(GNU libtool) 2.4.7' + + +# func_echo ARG... +# ---------------- +# Libtool also displays the current mode in messages, so override +# funclib.sh func_echo with this custom definition. +func_echo () +{ + $debug_cmd + + _G_message=$* + + func_echo_IFS=$IFS + IFS=$nl + for _G_line in $_G_message; do + IFS=$func_echo_IFS + $ECHO "$progname${opt_mode+: $opt_mode}: $_G_line" + done + IFS=$func_echo_IFS +} + + +# func_warning ARG... +# ------------------- +# Libtool warnings are not categorized, so override funclib.sh +# func_warning with this simpler definition. +func_warning () +{ + $debug_cmd + + $warning_func ${1+"$@"} +} + + +## ---------------- ## +## Options parsing. ## +## ---------------- ## + +# Hook in the functions to make sure our own options are parsed during +# the option parsing loop. + +usage='$progpath [OPTION]... [MODE-ARG]...' + +# Short help message in response to '-h'. +usage_message="Options: + --config show all configuration variables + --debug enable verbose shell tracing + -n, --dry-run display commands without modifying any files + --features display basic configuration information and exit + --mode=MODE use operation mode MODE + --no-warnings equivalent to '-Wnone' + --preserve-dup-deps don't remove duplicate dependency libraries + --quiet, --silent don't print informational messages + --tag=TAG use configuration variables from tag TAG + -v, --verbose print more informational messages than default + --version print version information + -W, --warnings=CATEGORY report the warnings falling in CATEGORY [all] + -h, --help, --help-all print short, long, or detailed help message +" + +# Additional text appended to 'usage_message' in response to '--help'. +func_help () +{ + $debug_cmd + + func_usage_message + $ECHO "$long_help_message + +MODE must be one of the following: + + clean remove files from the build directory + compile compile a source file into a libtool object + execute automatically set library path, then run a program + finish complete the installation of libtool libraries + install install libraries or executables + link create a library or an executable + uninstall remove libraries from an installed directory + +MODE-ARGS vary depending on the MODE. When passed as first option, +'--mode=MODE' may be abbreviated as 'MODE' or a unique abbreviation of that. +Try '$progname --help --mode=MODE' for a more detailed description of MODE. + +When reporting a bug, please describe a test case to reproduce it and +include the following information: + + host-triplet: $host + shell: $SHELL + compiler: $LTCC + compiler flags: $LTCFLAGS + linker: $LD (gnu? $with_gnu_ld) + version: $progname $scriptversion Debian-2.4.7-5 + automake: `($AUTOMAKE --version) 2>/dev/null |$SED 1q` + autoconf: `($AUTOCONF --version) 2>/dev/null |$SED 1q` + +Report bugs to . +GNU libtool home page: . +General help using GNU software: ." + exit 0 +} + + +# func_lo2o OBJECT-NAME +# --------------------- +# Transform OBJECT-NAME from a '.lo' suffix to the platform specific +# object suffix. + +lo2o=s/\\.lo\$/.$objext/ +o2lo=s/\\.$objext\$/.lo/ + +if test yes = "$_G_HAVE_XSI_OPS"; then + eval 'func_lo2o () + { + case $1 in + *.lo) func_lo2o_result=${1%.lo}.$objext ;; + * ) func_lo2o_result=$1 ;; + esac + }' + + # func_xform LIBOBJ-OR-SOURCE + # --------------------------- + # Transform LIBOBJ-OR-SOURCE from a '.o' or '.c' (or otherwise) + # suffix to a '.lo' libtool-object suffix. + eval 'func_xform () + { + func_xform_result=${1%.*}.lo + }' +else + # ...otherwise fall back to using sed. + func_lo2o () + { + func_lo2o_result=`$ECHO "$1" | $SED "$lo2o"` + } + + func_xform () + { + func_xform_result=`$ECHO "$1" | $SED 's|\.[^.]*$|.lo|'` + } +fi + + +# func_fatal_configuration ARG... +# ------------------------------- +# Echo program name prefixed message to standard error, followed by +# a configuration failure hint, and exit. +func_fatal_configuration () +{ + func_fatal_error ${1+"$@"} \ + "See the $PACKAGE documentation for more information." \ + "Fatal configuration error." +} + + +# func_config +# ----------- +# Display the configuration for all the tags in this script. +func_config () +{ + re_begincf='^# ### BEGIN LIBTOOL' + re_endcf='^# ### END LIBTOOL' + + # Default configuration. + $SED "1,/$re_begincf CONFIG/d;/$re_endcf CONFIG/,\$d" < "$progpath" + + # Now print the configurations for the tags. + for tagname in $taglist; do + $SED -n "/$re_begincf TAG CONFIG: $tagname\$/,/$re_endcf TAG CONFIG: $tagname\$/p" < "$progpath" + done + + exit $? +} + + +# func_features +# ------------- +# Display the features supported by this script. +func_features () +{ + echo "host: $host" + if test yes = "$build_libtool_libs"; then + echo "enable shared libraries" + else + echo "disable shared libraries" + fi + if test yes = "$build_old_libs"; then + echo "enable static libraries" + else + echo "disable static libraries" + fi + + exit $? +} + + +# func_enable_tag TAGNAME +# ----------------------- +# Verify that TAGNAME is valid, and either flag an error and exit, or +# enable the TAGNAME tag. We also add TAGNAME to the global $taglist +# variable here. +func_enable_tag () +{ + # Global variable: + tagname=$1 + + re_begincf="^# ### BEGIN LIBTOOL TAG CONFIG: $tagname\$" + re_endcf="^# ### END LIBTOOL TAG CONFIG: $tagname\$" + sed_extractcf=/$re_begincf/,/$re_endcf/p + + # Validate tagname. + case $tagname in + *[!-_A-Za-z0-9,/]*) + func_fatal_error "invalid tag name: $tagname" + ;; + esac + + # Don't test for the "default" C tag, as we know it's + # there but not specially marked. + case $tagname in + CC) ;; + *) + if $GREP "$re_begincf" "$progpath" >/dev/null 2>&1; then + taglist="$taglist $tagname" + + # Evaluate the configuration. Be careful to quote the path + # and the sed script, to avoid splitting on whitespace, but + # also don't use non-portable quotes within backquotes within + # quotes we have to do it in 2 steps: + extractedcf=`$SED -n -e "$sed_extractcf" < "$progpath"` + eval "$extractedcf" + else + func_error "ignoring unknown tag $tagname" + fi + ;; + esac +} + + +# func_check_version_match +# ------------------------ +# Ensure that we are using m4 macros, and libtool script from the same +# release of libtool. +func_check_version_match () +{ + if test "$package_revision" != "$macro_revision"; then + if test "$VERSION" != "$macro_version"; then + if test -z "$macro_version"; then + cat >&2 <<_LT_EOF +$progname: Version mismatch error. This is $PACKAGE $VERSION, but the +$progname: definition of this LT_INIT comes from an older release. +$progname: You should recreate aclocal.m4 with macros from $PACKAGE $VERSION +$progname: and run autoconf again. +_LT_EOF + else + cat >&2 <<_LT_EOF +$progname: Version mismatch error. This is $PACKAGE $VERSION, but the +$progname: definition of this LT_INIT comes from $PACKAGE $macro_version. +$progname: You should recreate aclocal.m4 with macros from $PACKAGE $VERSION +$progname: and run autoconf again. +_LT_EOF + fi + else + cat >&2 <<_LT_EOF +$progname: Version mismatch error. This is $PACKAGE $VERSION, revision $package_revision, +$progname: but the definition of this LT_INIT comes from revision $macro_revision. +$progname: You should recreate aclocal.m4 with macros from revision $package_revision +$progname: of $PACKAGE $VERSION and run autoconf again. +_LT_EOF + fi + + exit $EXIT_MISMATCH + fi +} + + +# libtool_options_prep [ARG]... +# ----------------------------- +# Preparation for options parsed by libtool. +libtool_options_prep () +{ + $debug_mode + + # Option defaults: + opt_config=false + opt_dlopen= + opt_dry_run=false + opt_help=false + opt_mode= + opt_preserve_dup_deps=false + opt_quiet=false + + nonopt= + preserve_args= + + _G_rc_lt_options_prep=: + + _G_rc_lt_options_prep=: + + # Shorthand for --mode=foo, only valid as the first argument + case $1 in + clean|clea|cle|cl) + shift; set dummy --mode clean ${1+"$@"}; shift + ;; + compile|compil|compi|comp|com|co|c) + shift; set dummy --mode compile ${1+"$@"}; shift + ;; + execute|execut|execu|exec|exe|ex|e) + shift; set dummy --mode execute ${1+"$@"}; shift + ;; + finish|finis|fini|fin|fi|f) + shift; set dummy --mode finish ${1+"$@"}; shift + ;; + install|instal|insta|inst|ins|in|i) + shift; set dummy --mode install ${1+"$@"}; shift + ;; + link|lin|li|l) + shift; set dummy --mode link ${1+"$@"}; shift + ;; + uninstall|uninstal|uninsta|uninst|unins|unin|uni|un|u) + shift; set dummy --mode uninstall ${1+"$@"}; shift + ;; + *) + _G_rc_lt_options_prep=false + ;; + esac + + if $_G_rc_lt_options_prep; then + # Pass back the list of options. + func_quote eval ${1+"$@"} + libtool_options_prep_result=$func_quote_result + fi +} +func_add_hook func_options_prep libtool_options_prep + + +# libtool_parse_options [ARG]... +# --------------------------------- +# Provide handling for libtool specific options. +libtool_parse_options () +{ + $debug_cmd + + _G_rc_lt_parse_options=false + + # Perform our own loop to consume as many options as possible in + # each iteration. + while test $# -gt 0; do + _G_match_lt_parse_options=: + _G_opt=$1 + shift + case $_G_opt in + --dry-run|--dryrun|-n) + opt_dry_run=: + ;; + + --config) func_config ;; + + --dlopen|-dlopen) + opt_dlopen="${opt_dlopen+$opt_dlopen +}$1" + shift + ;; + + --preserve-dup-deps) + opt_preserve_dup_deps=: ;; + + --features) func_features ;; + + --finish) set dummy --mode finish ${1+"$@"}; shift ;; + + --help) opt_help=: ;; + + --help-all) opt_help=': help-all' ;; + + --mode) test $# = 0 && func_missing_arg $_G_opt && break + opt_mode=$1 + case $1 in + # Valid mode arguments: + clean|compile|execute|finish|install|link|relink|uninstall) ;; + + # Catch anything else as an error + *) func_error "invalid argument for $_G_opt" + exit_cmd=exit + break + ;; + esac + shift + ;; + + --no-silent|--no-quiet) + opt_quiet=false + func_append preserve_args " $_G_opt" + ;; + + --no-warnings|--no-warning|--no-warn) + opt_warning=false + func_append preserve_args " $_G_opt" + ;; + + --no-verbose) + opt_verbose=false + func_append preserve_args " $_G_opt" + ;; + + --silent|--quiet) + opt_quiet=: + opt_verbose=false + func_append preserve_args " $_G_opt" + ;; + + --tag) test $# = 0 && func_missing_arg $_G_opt && break + opt_tag=$1 + func_append preserve_args " $_G_opt $1" + func_enable_tag "$1" + shift + ;; + + --verbose|-v) opt_quiet=false + opt_verbose=: + func_append preserve_args " $_G_opt" + ;; + + # An option not handled by this hook function: + *) set dummy "$_G_opt" ${1+"$@"} ; shift + _G_match_lt_parse_options=false + break + ;; + esac + $_G_match_lt_parse_options && _G_rc_lt_parse_options=: + done + + if $_G_rc_lt_parse_options; then + # save modified positional parameters for caller + func_quote eval ${1+"$@"} + libtool_parse_options_result=$func_quote_result + fi +} +func_add_hook func_parse_options libtool_parse_options + + + +# libtool_validate_options [ARG]... +# --------------------------------- +# Perform any sanity checks on option settings and/or unconsumed +# arguments. +libtool_validate_options () +{ + # save first non-option argument + if test 0 -lt $#; then + nonopt=$1 + shift + fi + + # preserve --debug + test : = "$debug_cmd" || func_append preserve_args " --debug" + + case $host in + # Solaris2 added to fix http://debbugs.gnu.org/cgi/bugreport.cgi?bug=16452 + # see also: http://gcc.gnu.org/bugzilla/show_bug.cgi?id=59788 + *cygwin* | *mingw* | *pw32* | *cegcc* | *solaris2* | *os2*) + # don't eliminate duplications in $postdeps and $predeps + opt_duplicate_compiler_generated_deps=: + ;; + *) + opt_duplicate_compiler_generated_deps=$opt_preserve_dup_deps + ;; + esac + + $opt_help || { + # Sanity checks first: + func_check_version_match + + test yes != "$build_libtool_libs" \ + && test yes != "$build_old_libs" \ + && func_fatal_configuration "not configured to build any kind of library" + + # Darwin sucks + eval std_shrext=\"$shrext_cmds\" + + # Only execute mode is allowed to have -dlopen flags. + if test -n "$opt_dlopen" && test execute != "$opt_mode"; then + func_error "unrecognized option '-dlopen'" + $ECHO "$help" 1>&2 + exit $EXIT_FAILURE + fi + + # Change the help message to a mode-specific one. + generic_help=$help + help="Try '$progname --help --mode=$opt_mode' for more information." + } + + # Pass back the unparsed argument list + func_quote eval ${1+"$@"} + libtool_validate_options_result=$func_quote_result +} +func_add_hook func_validate_options libtool_validate_options + + +# Process options as early as possible so that --help and --version +# can return quickly. +func_options ${1+"$@"} +eval set dummy "$func_options_result"; shift + + + +## ----------- ## +## Main. ## +## ----------- ## + +magic='%%%MAGIC variable%%%' +magic_exe='%%%MAGIC EXE variable%%%' + +# Global variables. +extracted_archives= +extracted_serial=0 + +# If this variable is set in any of the actions, the command in it +# will be execed at the end. This prevents here-documents from being +# left over by shells. +exec_cmd= + + +# A function that is used when there is no print builtin or printf. +func_fallback_echo () +{ + eval 'cat <<_LTECHO_EOF +$1 +_LTECHO_EOF' +} + +# func_generated_by_libtool +# True iff stdin has been generated by Libtool. This function is only +# a basic sanity check; it will hardly flush out determined imposters. +func_generated_by_libtool_p () +{ + $GREP "^# Generated by .*$PACKAGE" > /dev/null 2>&1 +} + +# func_lalib_p file +# True iff FILE is a libtool '.la' library or '.lo' object file. +# This function is only a basic sanity check; it will hardly flush out +# determined imposters. +func_lalib_p () +{ + test -f "$1" && + $SED -e 4q "$1" 2>/dev/null | func_generated_by_libtool_p +} + +# func_lalib_unsafe_p file +# True iff FILE is a libtool '.la' library or '.lo' object file. +# This function implements the same check as func_lalib_p without +# resorting to external programs. To this end, it redirects stdin and +# closes it afterwards, without saving the original file descriptor. +# As a safety measure, use it only where a negative result would be +# fatal anyway. Works if 'file' does not exist. +func_lalib_unsafe_p () +{ + lalib_p=no + if test -f "$1" && test -r "$1" && exec 5<&0 <"$1"; then + for lalib_p_l in 1 2 3 4 + do + read lalib_p_line + case $lalib_p_line in + \#\ Generated\ by\ *$PACKAGE* ) lalib_p=yes; break;; + esac + done + exec 0<&5 5<&- + fi + test yes = "$lalib_p" +} + +# func_ltwrapper_script_p file +# True iff FILE is a libtool wrapper script +# This function is only a basic sanity check; it will hardly flush out +# determined imposters. +func_ltwrapper_script_p () +{ + test -f "$1" && + $lt_truncate_bin < "$1" 2>/dev/null | func_generated_by_libtool_p +} + +# func_ltwrapper_executable_p file +# True iff FILE is a libtool wrapper executable +# This function is only a basic sanity check; it will hardly flush out +# determined imposters. +func_ltwrapper_executable_p () +{ + func_ltwrapper_exec_suffix= + case $1 in + *.exe) ;; + *) func_ltwrapper_exec_suffix=.exe ;; + esac + $GREP "$magic_exe" "$1$func_ltwrapper_exec_suffix" >/dev/null 2>&1 +} + +# func_ltwrapper_scriptname file +# Assumes file is an ltwrapper_executable +# uses $file to determine the appropriate filename for a +# temporary ltwrapper_script. +func_ltwrapper_scriptname () +{ + func_dirname_and_basename "$1" "" "." + func_stripname '' '.exe' "$func_basename_result" + func_ltwrapper_scriptname_result=$func_dirname_result/$objdir/${func_stripname_result}_ltshwrapper +} + +# func_ltwrapper_p file +# True iff FILE is a libtool wrapper script or wrapper executable +# This function is only a basic sanity check; it will hardly flush out +# determined imposters. +func_ltwrapper_p () +{ + func_ltwrapper_script_p "$1" || func_ltwrapper_executable_p "$1" +} + + +# func_execute_cmds commands fail_cmd +# Execute tilde-delimited COMMANDS. +# If FAIL_CMD is given, eval that upon failure. +# FAIL_CMD may read-access the current command in variable CMD! +func_execute_cmds () +{ + $debug_cmd + + save_ifs=$IFS; IFS='~' + for cmd in $1; do + IFS=$sp$nl + eval cmd=\"$cmd\" + IFS=$save_ifs + func_show_eval "$cmd" "${2-:}" + done + IFS=$save_ifs +} + + +# func_source file +# Source FILE, adding directory component if necessary. +# Note that it is not necessary on cygwin/mingw to append a dot to +# FILE even if both FILE and FILE.exe exist: automatic-append-.exe +# behavior happens only for exec(3), not for open(2)! Also, sourcing +# 'FILE.' does not work on cygwin managed mounts. +func_source () +{ + $debug_cmd + + case $1 in + */* | *\\*) . "$1" ;; + *) . "./$1" ;; + esac +} + + +# func_resolve_sysroot PATH +# Replace a leading = in PATH with a sysroot. Store the result into +# func_resolve_sysroot_result +func_resolve_sysroot () +{ + func_resolve_sysroot_result=$1 + case $func_resolve_sysroot_result in + =*) + func_stripname '=' '' "$func_resolve_sysroot_result" + func_resolve_sysroot_result=$lt_sysroot$func_stripname_result + ;; + esac +} + +# func_replace_sysroot PATH +# If PATH begins with the sysroot, replace it with = and +# store the result into func_replace_sysroot_result. +func_replace_sysroot () +{ + case $lt_sysroot:$1 in + ?*:"$lt_sysroot"*) + func_stripname "$lt_sysroot" '' "$1" + func_replace_sysroot_result='='$func_stripname_result + ;; + *) + # Including no sysroot. + func_replace_sysroot_result=$1 + ;; + esac +} + +# func_infer_tag arg +# Infer tagged configuration to use if any are available and +# if one wasn't chosen via the "--tag" command line option. +# Only attempt this if the compiler in the base compile +# command doesn't match the default compiler. +# arg is usually of the form 'gcc ...' +func_infer_tag () +{ + $debug_cmd + + if test -n "$available_tags" && test -z "$tagname"; then + CC_quoted= + for arg in $CC; do + func_append_quoted CC_quoted "$arg" + done + CC_expanded=`func_echo_all $CC` + CC_quoted_expanded=`func_echo_all $CC_quoted` + case $@ in + # Blanks in the command may have been stripped by the calling shell, + # but not from the CC environment variable when configure was run. + " $CC "* | "$CC "* | " $CC_expanded "* | "$CC_expanded "* | \ + " $CC_quoted"* | "$CC_quoted "* | " $CC_quoted_expanded "* | "$CC_quoted_expanded "*) ;; + # Blanks at the start of $base_compile will cause this to fail + # if we don't check for them as well. + *) + for z in $available_tags; do + if $GREP "^# ### BEGIN LIBTOOL TAG CONFIG: $z$" < "$progpath" > /dev/null; then + # Evaluate the configuration. + eval "`$SED -n -e '/^# ### BEGIN LIBTOOL TAG CONFIG: '$z'$/,/^# ### END LIBTOOL TAG CONFIG: '$z'$/p' < $progpath`" + CC_quoted= + for arg in $CC; do + # Double-quote args containing other shell metacharacters. + func_append_quoted CC_quoted "$arg" + done + CC_expanded=`func_echo_all $CC` + CC_quoted_expanded=`func_echo_all $CC_quoted` + case "$@ " in + " $CC "* | "$CC "* | " $CC_expanded "* | "$CC_expanded "* | \ + " $CC_quoted"* | "$CC_quoted "* | " $CC_quoted_expanded "* | "$CC_quoted_expanded "*) + # The compiler in the base compile command matches + # the one in the tagged configuration. + # Assume this is the tagged configuration we want. + tagname=$z + break + ;; + esac + fi + done + # If $tagname still isn't set, then no tagged configuration + # was found and let the user know that the "--tag" command + # line option must be used. + if test -z "$tagname"; then + func_echo "unable to infer tagged configuration" + func_fatal_error "specify a tag with '--tag'" +# else +# func_verbose "using $tagname tagged configuration" + fi + ;; + esac + fi +} + + + +# func_write_libtool_object output_name pic_name nonpic_name +# Create a libtool object file (analogous to a ".la" file), +# but don't create it if we're doing a dry run. +func_write_libtool_object () +{ + write_libobj=$1 + if test yes = "$build_libtool_libs"; then + write_lobj=\'$2\' + else + write_lobj=none + fi + + if test yes = "$build_old_libs"; then + write_oldobj=\'$3\' + else + write_oldobj=none + fi + + $opt_dry_run || { + cat >${write_libobj}T </dev/null` + if test "$?" -eq 0 && test -n "$func_convert_core_file_wine_to_w32_tmp"; then + func_convert_core_file_wine_to_w32_result=`$ECHO "$func_convert_core_file_wine_to_w32_tmp" | + $SED -e "$sed_naive_backslashify"` + else + func_convert_core_file_wine_to_w32_result= + fi + fi +} +# end: func_convert_core_file_wine_to_w32 + + +# func_convert_core_path_wine_to_w32 ARG +# Helper function used by path conversion functions when $build is *nix, and +# $host is mingw, cygwin, or some other w32 environment. Relies on a correctly +# configured wine environment available, with the winepath program in $build's +# $PATH. Assumes ARG has no leading or trailing path separator characters. +# +# ARG is path to be converted from $build format to win32. +# Result is available in $func_convert_core_path_wine_to_w32_result. +# Unconvertible file (directory) names in ARG are skipped; if no directory names +# are convertible, then the result may be empty. +func_convert_core_path_wine_to_w32 () +{ + $debug_cmd + + # unfortunately, winepath doesn't convert paths, only file names + func_convert_core_path_wine_to_w32_result= + if test -n "$1"; then + oldIFS=$IFS + IFS=: + for func_convert_core_path_wine_to_w32_f in $1; do + IFS=$oldIFS + func_convert_core_file_wine_to_w32 "$func_convert_core_path_wine_to_w32_f" + if test -n "$func_convert_core_file_wine_to_w32_result"; then + if test -z "$func_convert_core_path_wine_to_w32_result"; then + func_convert_core_path_wine_to_w32_result=$func_convert_core_file_wine_to_w32_result + else + func_append func_convert_core_path_wine_to_w32_result ";$func_convert_core_file_wine_to_w32_result" + fi + fi + done + IFS=$oldIFS + fi +} +# end: func_convert_core_path_wine_to_w32 + + +# func_cygpath ARGS... +# Wrapper around calling the cygpath program via LT_CYGPATH. This is used when +# when (1) $build is *nix and Cygwin is hosted via a wine environment; or (2) +# $build is MSYS and $host is Cygwin, or (3) $build is Cygwin. In case (1) or +# (2), returns the Cygwin file name or path in func_cygpath_result (input +# file name or path is assumed to be in w32 format, as previously converted +# from $build's *nix or MSYS format). In case (3), returns the w32 file name +# or path in func_cygpath_result (input file name or path is assumed to be in +# Cygwin format). Returns an empty string on error. +# +# ARGS are passed to cygpath, with the last one being the file name or path to +# be converted. +# +# Specify the absolute *nix (or w32) name to cygpath in the LT_CYGPATH +# environment variable; do not put it in $PATH. +func_cygpath () +{ + $debug_cmd + + if test -n "$LT_CYGPATH" && test -f "$LT_CYGPATH"; then + func_cygpath_result=`$LT_CYGPATH "$@" 2>/dev/null` + if test "$?" -ne 0; then + # on failure, ensure result is empty + func_cygpath_result= + fi + else + func_cygpath_result= + func_error "LT_CYGPATH is empty or specifies non-existent file: '$LT_CYGPATH'" + fi +} +#end: func_cygpath + + +# func_convert_core_msys_to_w32 ARG +# Convert file name or path ARG from MSYS format to w32 format. Return +# result in func_convert_core_msys_to_w32_result. +func_convert_core_msys_to_w32 () +{ + $debug_cmd + + # awkward: cmd appends spaces to result + func_convert_core_msys_to_w32_result=`( cmd //c echo "$1" ) 2>/dev/null | + $SED -e 's/[ ]*$//' -e "$sed_naive_backslashify"` +} +#end: func_convert_core_msys_to_w32 + + +# func_convert_file_check ARG1 ARG2 +# Verify that ARG1 (a file name in $build format) was converted to $host +# format in ARG2. Otherwise, emit an error message, but continue (resetting +# func_to_host_file_result to ARG1). +func_convert_file_check () +{ + $debug_cmd + + if test -z "$2" && test -n "$1"; then + func_error "Could not determine host file name corresponding to" + func_error " '$1'" + func_error "Continuing, but uninstalled executables may not work." + # Fallback: + func_to_host_file_result=$1 + fi +} +# end func_convert_file_check + + +# func_convert_path_check FROM_PATHSEP TO_PATHSEP FROM_PATH TO_PATH +# Verify that FROM_PATH (a path in $build format) was converted to $host +# format in TO_PATH. Otherwise, emit an error message, but continue, resetting +# func_to_host_file_result to a simplistic fallback value (see below). +func_convert_path_check () +{ + $debug_cmd + + if test -z "$4" && test -n "$3"; then + func_error "Could not determine the host path corresponding to" + func_error " '$3'" + func_error "Continuing, but uninstalled executables may not work." + # Fallback. This is a deliberately simplistic "conversion" and + # should not be "improved". See libtool.info. + if test "x$1" != "x$2"; then + lt_replace_pathsep_chars="s|$1|$2|g" + func_to_host_path_result=`echo "$3" | + $SED -e "$lt_replace_pathsep_chars"` + else + func_to_host_path_result=$3 + fi + fi +} +# end func_convert_path_check + + +# func_convert_path_front_back_pathsep FRONTPAT BACKPAT REPL ORIG +# Modifies func_to_host_path_result by prepending REPL if ORIG matches FRONTPAT +# and appending REPL if ORIG matches BACKPAT. +func_convert_path_front_back_pathsep () +{ + $debug_cmd + + case $4 in + $1 ) func_to_host_path_result=$3$func_to_host_path_result + ;; + esac + case $4 in + $2 ) func_append func_to_host_path_result "$3" + ;; + esac +} +# end func_convert_path_front_back_pathsep + + +################################################## +# $build to $host FILE NAME CONVERSION FUNCTIONS # +################################################## +# invoked via '$to_host_file_cmd ARG' +# +# In each case, ARG is the path to be converted from $build to $host format. +# Result will be available in $func_to_host_file_result. + + +# func_to_host_file ARG +# Converts the file name ARG from $build format to $host format. Return result +# in func_to_host_file_result. +func_to_host_file () +{ + $debug_cmd + + $to_host_file_cmd "$1" +} +# end func_to_host_file + + +# func_to_tool_file ARG LAZY +# converts the file name ARG from $build format to toolchain format. Return +# result in func_to_tool_file_result. If the conversion in use is listed +# in (the comma separated) LAZY, no conversion takes place. +func_to_tool_file () +{ + $debug_cmd + + case ,$2, in + *,"$to_tool_file_cmd",*) + func_to_tool_file_result=$1 + ;; + *) + $to_tool_file_cmd "$1" + func_to_tool_file_result=$func_to_host_file_result + ;; + esac +} +# end func_to_tool_file + + +# func_convert_file_noop ARG +# Copy ARG to func_to_host_file_result. +func_convert_file_noop () +{ + func_to_host_file_result=$1 +} +# end func_convert_file_noop + + +# func_convert_file_msys_to_w32 ARG +# Convert file name ARG from (mingw) MSYS to (mingw) w32 format; automatic +# conversion to w32 is not available inside the cwrapper. Returns result in +# func_to_host_file_result. +func_convert_file_msys_to_w32 () +{ + $debug_cmd + + func_to_host_file_result=$1 + if test -n "$1"; then + func_convert_core_msys_to_w32 "$1" + func_to_host_file_result=$func_convert_core_msys_to_w32_result + fi + func_convert_file_check "$1" "$func_to_host_file_result" +} +# end func_convert_file_msys_to_w32 + + +# func_convert_file_cygwin_to_w32 ARG +# Convert file name ARG from Cygwin to w32 format. Returns result in +# func_to_host_file_result. +func_convert_file_cygwin_to_w32 () +{ + $debug_cmd + + func_to_host_file_result=$1 + if test -n "$1"; then + # because $build is cygwin, we call "the" cygpath in $PATH; no need to use + # LT_CYGPATH in this case. + func_to_host_file_result=`cygpath -m "$1"` + fi + func_convert_file_check "$1" "$func_to_host_file_result" +} +# end func_convert_file_cygwin_to_w32 + + +# func_convert_file_nix_to_w32 ARG +# Convert file name ARG from *nix to w32 format. Requires a wine environment +# and a working winepath. Returns result in func_to_host_file_result. +func_convert_file_nix_to_w32 () +{ + $debug_cmd + + func_to_host_file_result=$1 + if test -n "$1"; then + func_convert_core_file_wine_to_w32 "$1" + func_to_host_file_result=$func_convert_core_file_wine_to_w32_result + fi + func_convert_file_check "$1" "$func_to_host_file_result" +} +# end func_convert_file_nix_to_w32 + + +# func_convert_file_msys_to_cygwin ARG +# Convert file name ARG from MSYS to Cygwin format. Requires LT_CYGPATH set. +# Returns result in func_to_host_file_result. +func_convert_file_msys_to_cygwin () +{ + $debug_cmd + + func_to_host_file_result=$1 + if test -n "$1"; then + func_convert_core_msys_to_w32 "$1" + func_cygpath -u "$func_convert_core_msys_to_w32_result" + func_to_host_file_result=$func_cygpath_result + fi + func_convert_file_check "$1" "$func_to_host_file_result" +} +# end func_convert_file_msys_to_cygwin + + +# func_convert_file_nix_to_cygwin ARG +# Convert file name ARG from *nix to Cygwin format. Requires Cygwin installed +# in a wine environment, working winepath, and LT_CYGPATH set. Returns result +# in func_to_host_file_result. +func_convert_file_nix_to_cygwin () +{ + $debug_cmd + + func_to_host_file_result=$1 + if test -n "$1"; then + # convert from *nix to w32, then use cygpath to convert from w32 to cygwin. + func_convert_core_file_wine_to_w32 "$1" + func_cygpath -u "$func_convert_core_file_wine_to_w32_result" + func_to_host_file_result=$func_cygpath_result + fi + func_convert_file_check "$1" "$func_to_host_file_result" +} +# end func_convert_file_nix_to_cygwin + + +############################################# +# $build to $host PATH CONVERSION FUNCTIONS # +############################################# +# invoked via '$to_host_path_cmd ARG' +# +# In each case, ARG is the path to be converted from $build to $host format. +# The result will be available in $func_to_host_path_result. +# +# Path separators are also converted from $build format to $host format. If +# ARG begins or ends with a path separator character, it is preserved (but +# converted to $host format) on output. +# +# All path conversion functions are named using the following convention: +# file name conversion function : func_convert_file_X_to_Y () +# path conversion function : func_convert_path_X_to_Y () +# where, for any given $build/$host combination the 'X_to_Y' value is the +# same. If conversion functions are added for new $build/$host combinations, +# the two new functions must follow this pattern, or func_init_to_host_path_cmd +# will break. + + +# func_init_to_host_path_cmd +# Ensures that function "pointer" variable $to_host_path_cmd is set to the +# appropriate value, based on the value of $to_host_file_cmd. +to_host_path_cmd= +func_init_to_host_path_cmd () +{ + $debug_cmd + + if test -z "$to_host_path_cmd"; then + func_stripname 'func_convert_file_' '' "$to_host_file_cmd" + to_host_path_cmd=func_convert_path_$func_stripname_result + fi +} + + +# func_to_host_path ARG +# Converts the path ARG from $build format to $host format. Return result +# in func_to_host_path_result. +func_to_host_path () +{ + $debug_cmd + + func_init_to_host_path_cmd + $to_host_path_cmd "$1" +} +# end func_to_host_path + + +# func_convert_path_noop ARG +# Copy ARG to func_to_host_path_result. +func_convert_path_noop () +{ + func_to_host_path_result=$1 +} +# end func_convert_path_noop + + +# func_convert_path_msys_to_w32 ARG +# Convert path ARG from (mingw) MSYS to (mingw) w32 format; automatic +# conversion to w32 is not available inside the cwrapper. Returns result in +# func_to_host_path_result. +func_convert_path_msys_to_w32 () +{ + $debug_cmd + + func_to_host_path_result=$1 + if test -n "$1"; then + # Remove leading and trailing path separator characters from ARG. MSYS + # behavior is inconsistent here; cygpath turns them into '.;' and ';.'; + # and winepath ignores them completely. + func_stripname : : "$1" + func_to_host_path_tmp1=$func_stripname_result + func_convert_core_msys_to_w32 "$func_to_host_path_tmp1" + func_to_host_path_result=$func_convert_core_msys_to_w32_result + func_convert_path_check : ";" \ + "$func_to_host_path_tmp1" "$func_to_host_path_result" + func_convert_path_front_back_pathsep ":*" "*:" ";" "$1" + fi +} +# end func_convert_path_msys_to_w32 + + +# func_convert_path_cygwin_to_w32 ARG +# Convert path ARG from Cygwin to w32 format. Returns result in +# func_to_host_file_result. +func_convert_path_cygwin_to_w32 () +{ + $debug_cmd + + func_to_host_path_result=$1 + if test -n "$1"; then + # See func_convert_path_msys_to_w32: + func_stripname : : "$1" + func_to_host_path_tmp1=$func_stripname_result + func_to_host_path_result=`cygpath -m -p "$func_to_host_path_tmp1"` + func_convert_path_check : ";" \ + "$func_to_host_path_tmp1" "$func_to_host_path_result" + func_convert_path_front_back_pathsep ":*" "*:" ";" "$1" + fi +} +# end func_convert_path_cygwin_to_w32 + + +# func_convert_path_nix_to_w32 ARG +# Convert path ARG from *nix to w32 format. Requires a wine environment and +# a working winepath. Returns result in func_to_host_file_result. +func_convert_path_nix_to_w32 () +{ + $debug_cmd + + func_to_host_path_result=$1 + if test -n "$1"; then + # See func_convert_path_msys_to_w32: + func_stripname : : "$1" + func_to_host_path_tmp1=$func_stripname_result + func_convert_core_path_wine_to_w32 "$func_to_host_path_tmp1" + func_to_host_path_result=$func_convert_core_path_wine_to_w32_result + func_convert_path_check : ";" \ + "$func_to_host_path_tmp1" "$func_to_host_path_result" + func_convert_path_front_back_pathsep ":*" "*:" ";" "$1" + fi +} +# end func_convert_path_nix_to_w32 + + +# func_convert_path_msys_to_cygwin ARG +# Convert path ARG from MSYS to Cygwin format. Requires LT_CYGPATH set. +# Returns result in func_to_host_file_result. +func_convert_path_msys_to_cygwin () +{ + $debug_cmd + + func_to_host_path_result=$1 + if test -n "$1"; then + # See func_convert_path_msys_to_w32: + func_stripname : : "$1" + func_to_host_path_tmp1=$func_stripname_result + func_convert_core_msys_to_w32 "$func_to_host_path_tmp1" + func_cygpath -u -p "$func_convert_core_msys_to_w32_result" + func_to_host_path_result=$func_cygpath_result + func_convert_path_check : : \ + "$func_to_host_path_tmp1" "$func_to_host_path_result" + func_convert_path_front_back_pathsep ":*" "*:" : "$1" + fi +} +# end func_convert_path_msys_to_cygwin + + +# func_convert_path_nix_to_cygwin ARG +# Convert path ARG from *nix to Cygwin format. Requires Cygwin installed in a +# a wine environment, working winepath, and LT_CYGPATH set. Returns result in +# func_to_host_file_result. +func_convert_path_nix_to_cygwin () +{ + $debug_cmd + + func_to_host_path_result=$1 + if test -n "$1"; then + # Remove leading and trailing path separator characters from + # ARG. msys behavior is inconsistent here, cygpath turns them + # into '.;' and ';.', and winepath ignores them completely. + func_stripname : : "$1" + func_to_host_path_tmp1=$func_stripname_result + func_convert_core_path_wine_to_w32 "$func_to_host_path_tmp1" + func_cygpath -u -p "$func_convert_core_path_wine_to_w32_result" + func_to_host_path_result=$func_cygpath_result + func_convert_path_check : : \ + "$func_to_host_path_tmp1" "$func_to_host_path_result" + func_convert_path_front_back_pathsep ":*" "*:" : "$1" + fi +} +# end func_convert_path_nix_to_cygwin + + +# func_dll_def_p FILE +# True iff FILE is a Windows DLL '.def' file. +# Keep in sync with _LT_DLL_DEF_P in libtool.m4 +func_dll_def_p () +{ + $debug_cmd + + func_dll_def_p_tmp=`$SED -n \ + -e 's/^[ ]*//' \ + -e '/^\(;.*\)*$/d' \ + -e 's/^\(EXPORTS\|LIBRARY\)\([ ].*\)*$/DEF/p' \ + -e q \ + "$1"` + test DEF = "$func_dll_def_p_tmp" +} + + +# func_mode_compile arg... +func_mode_compile () +{ + $debug_cmd + + # Get the compilation command and the source file. + base_compile= + srcfile=$nonopt # always keep a non-empty value in "srcfile" + suppress_opt=yes + suppress_output= + arg_mode=normal + libobj= + later= + pie_flag= + + for arg + do + case $arg_mode in + arg ) + # do not "continue". Instead, add this to base_compile + lastarg=$arg + arg_mode=normal + ;; + + target ) + libobj=$arg + arg_mode=normal + continue + ;; + + normal ) + # Accept any command-line options. + case $arg in + -o) + test -n "$libobj" && \ + func_fatal_error "you cannot specify '-o' more than once" + arg_mode=target + continue + ;; + + -pie | -fpie | -fPIE) + func_append pie_flag " $arg" + continue + ;; + + -shared | -static | -prefer-pic | -prefer-non-pic) + func_append later " $arg" + continue + ;; + + -no-suppress) + suppress_opt=no + continue + ;; + + -Xcompiler) + arg_mode=arg # the next one goes into the "base_compile" arg list + continue # The current "srcfile" will either be retained or + ;; # replaced later. I would guess that would be a bug. + + -Wc,*) + func_stripname '-Wc,' '' "$arg" + args=$func_stripname_result + lastarg= + save_ifs=$IFS; IFS=, + for arg in $args; do + IFS=$save_ifs + func_append_quoted lastarg "$arg" + done + IFS=$save_ifs + func_stripname ' ' '' "$lastarg" + lastarg=$func_stripname_result + + # Add the arguments to base_compile. + func_append base_compile " $lastarg" + continue + ;; + + *) + # Accept the current argument as the source file. + # The previous "srcfile" becomes the current argument. + # + lastarg=$srcfile + srcfile=$arg + ;; + esac # case $arg + ;; + esac # case $arg_mode + + # Aesthetically quote the previous argument. + func_append_quoted base_compile "$lastarg" + done # for arg + + case $arg_mode in + arg) + func_fatal_error "you must specify an argument for -Xcompile" + ;; + target) + func_fatal_error "you must specify a target with '-o'" + ;; + *) + # Get the name of the library object. + test -z "$libobj" && { + func_basename "$srcfile" + libobj=$func_basename_result + } + ;; + esac + + # Recognize several different file suffixes. + # If the user specifies -o file.o, it is replaced with file.lo + case $libobj in + *.[cCFSifmso] | \ + *.ada | *.adb | *.ads | *.asm | \ + *.c++ | *.cc | *.ii | *.class | *.cpp | *.cxx | \ + *.[fF][09]? | *.for | *.java | *.go | *.obj | *.sx | *.cu | *.cup) + func_xform "$libobj" + libobj=$func_xform_result + ;; + esac + + case $libobj in + *.lo) func_lo2o "$libobj"; obj=$func_lo2o_result ;; + *) + func_fatal_error "cannot determine name of library object from '$libobj'" + ;; + esac + + func_infer_tag $base_compile + + for arg in $later; do + case $arg in + -shared) + test yes = "$build_libtool_libs" \ + || func_fatal_configuration "cannot build a shared library" + build_old_libs=no + continue + ;; + + -static) + build_libtool_libs=no + build_old_libs=yes + continue + ;; + + -prefer-pic) + pic_mode=yes + continue + ;; + + -prefer-non-pic) + pic_mode=no + continue + ;; + esac + done + + func_quote_arg pretty "$libobj" + test "X$libobj" != "X$func_quote_arg_result" \ + && $ECHO "X$libobj" | $GREP '[]~#^*{};<>?"'"'"' &()|`$[]' \ + && func_warning "libobj name '$libobj' may not contain shell special characters." + func_dirname_and_basename "$obj" "/" "" + objname=$func_basename_result + xdir=$func_dirname_result + lobj=$xdir$objdir/$objname + + test -z "$base_compile" && \ + func_fatal_help "you must specify a compilation command" + + # Delete any leftover library objects. + if test yes = "$build_old_libs"; then + removelist="$obj $lobj $libobj ${libobj}T" + else + removelist="$lobj $libobj ${libobj}T" + fi + + # On Cygwin there's no "real" PIC flag so we must build both object types + case $host_os in + cygwin* | mingw* | pw32* | os2* | cegcc*) + pic_mode=default + ;; + esac + if test no = "$pic_mode" && test pass_all != "$deplibs_check_method"; then + # non-PIC code in shared libraries is not supported + pic_mode=default + fi + + # Calculate the filename of the output object if compiler does + # not support -o with -c + if test no = "$compiler_c_o"; then + output_obj=`$ECHO "$srcfile" | $SED 's%^.*/%%; s%\.[^.]*$%%'`.$objext + lockfile=$output_obj.lock + else + output_obj= + need_locks=no + lockfile= + fi + + # Lock this critical section if it is needed + # We use this script file to make the link, it avoids creating a new file + if test yes = "$need_locks"; then + until $opt_dry_run || ln "$progpath" "$lockfile" 2>/dev/null; do + func_echo "Waiting for $lockfile to be removed" + sleep 2 + done + elif test warn = "$need_locks"; then + if test -f "$lockfile"; then + $ECHO "\ +*** ERROR, $lockfile exists and contains: +`cat $lockfile 2>/dev/null` + +This indicates that another process is trying to use the same +temporary object file, and libtool could not work around it because +your compiler does not support '-c' and '-o' together. If you +repeat this compilation, it may succeed, by chance, but you had better +avoid parallel builds (make -j) in this platform, or get a better +compiler." + + $opt_dry_run || $RM $removelist + exit $EXIT_FAILURE + fi + func_append removelist " $output_obj" + $ECHO "$srcfile" > "$lockfile" + fi + + $opt_dry_run || $RM $removelist + func_append removelist " $lockfile" + trap '$opt_dry_run || $RM $removelist; exit $EXIT_FAILURE' 1 2 15 + + func_to_tool_file "$srcfile" func_convert_file_msys_to_w32 + srcfile=$func_to_tool_file_result + func_quote_arg pretty "$srcfile" + qsrcfile=$func_quote_arg_result + + # Only build a PIC object if we are building libtool libraries. + if test yes = "$build_libtool_libs"; then + # Without this assignment, base_compile gets emptied. + fbsd_hideous_sh_bug=$base_compile + + if test no != "$pic_mode"; then + command="$base_compile $qsrcfile $pic_flag" + else + # Don't build PIC code + command="$base_compile $qsrcfile" + fi + + func_mkdir_p "$xdir$objdir" + + if test -z "$output_obj"; then + # Place PIC objects in $objdir + func_append command " -o $lobj" + fi + + func_show_eval_locale "$command" \ + 'test -n "$output_obj" && $RM $removelist; exit $EXIT_FAILURE' + + if test warn = "$need_locks" && + test "X`cat $lockfile 2>/dev/null`" != "X$srcfile"; then + $ECHO "\ +*** ERROR, $lockfile contains: +`cat $lockfile 2>/dev/null` + +but it should contain: +$srcfile + +This indicates that another process is trying to use the same +temporary object file, and libtool could not work around it because +your compiler does not support '-c' and '-o' together. If you +repeat this compilation, it may succeed, by chance, but you had better +avoid parallel builds (make -j) in this platform, or get a better +compiler." + + $opt_dry_run || $RM $removelist + exit $EXIT_FAILURE + fi + + # Just move the object if needed, then go on to compile the next one + if test -n "$output_obj" && test "X$output_obj" != "X$lobj"; then + func_show_eval '$MV "$output_obj" "$lobj"' \ + 'error=$?; $opt_dry_run || $RM $removelist; exit $error' + fi + + # Allow error messages only from the first compilation. + if test yes = "$suppress_opt"; then + suppress_output=' >/dev/null 2>&1' + fi + fi + + # Only build a position-dependent object if we build old libraries. + if test yes = "$build_old_libs"; then + if test yes != "$pic_mode"; then + # Don't build PIC code + command="$base_compile $qsrcfile$pie_flag" + else + command="$base_compile $qsrcfile $pic_flag" + fi + if test yes = "$compiler_c_o"; then + func_append command " -o $obj" + fi + + # Suppress compiler output if we already did a PIC compilation. + func_append command "$suppress_output" + func_show_eval_locale "$command" \ + '$opt_dry_run || $RM $removelist; exit $EXIT_FAILURE' + + if test warn = "$need_locks" && + test "X`cat $lockfile 2>/dev/null`" != "X$srcfile"; then + $ECHO "\ +*** ERROR, $lockfile contains: +`cat $lockfile 2>/dev/null` + +but it should contain: +$srcfile + +This indicates that another process is trying to use the same +temporary object file, and libtool could not work around it because +your compiler does not support '-c' and '-o' together. If you +repeat this compilation, it may succeed, by chance, but you had better +avoid parallel builds (make -j) in this platform, or get a better +compiler." + + $opt_dry_run || $RM $removelist + exit $EXIT_FAILURE + fi + + # Just move the object if needed + if test -n "$output_obj" && test "X$output_obj" != "X$obj"; then + func_show_eval '$MV "$output_obj" "$obj"' \ + 'error=$?; $opt_dry_run || $RM $removelist; exit $error' + fi + fi + + $opt_dry_run || { + func_write_libtool_object "$libobj" "$objdir/$objname" "$objname" + + # Unlock the critical section if it was locked + if test no != "$need_locks"; then + removelist=$lockfile + $RM "$lockfile" + fi + } + + exit $EXIT_SUCCESS +} + +$opt_help || { + test compile = "$opt_mode" && func_mode_compile ${1+"$@"} +} + +func_mode_help () +{ + # We need to display help for each of the modes. + case $opt_mode in + "") + # Generic help is extracted from the usage comments + # at the start of this file. + func_help + ;; + + clean) + $ECHO \ +"Usage: $progname [OPTION]... --mode=clean RM [RM-OPTION]... FILE... + +Remove files from the build directory. + +RM is the name of the program to use to delete files associated with each FILE +(typically '/bin/rm'). RM-OPTIONS are options (such as '-f') to be passed +to RM. + +If FILE is a libtool library, object or program, all the files associated +with it are deleted. Otherwise, only FILE itself is deleted using RM." + ;; + + compile) + $ECHO \ +"Usage: $progname [OPTION]... --mode=compile COMPILE-COMMAND... SOURCEFILE + +Compile a source file into a libtool library object. + +This mode accepts the following additional options: + + -o OUTPUT-FILE set the output file name to OUTPUT-FILE + -no-suppress do not suppress compiler output for multiple passes + -prefer-pic try to build PIC objects only + -prefer-non-pic try to build non-PIC objects only + -shared do not build a '.o' file suitable for static linking + -static only build a '.o' file suitable for static linking + -Wc,FLAG + -Xcompiler FLAG pass FLAG directly to the compiler + +COMPILE-COMMAND is a command to be used in creating a 'standard' object file +from the given SOURCEFILE. + +The output file name is determined by removing the directory component from +SOURCEFILE, then substituting the C source code suffix '.c' with the +library object suffix, '.lo'." + ;; + + execute) + $ECHO \ +"Usage: $progname [OPTION]... --mode=execute COMMAND [ARGS]... + +Automatically set library path, then run a program. + +This mode accepts the following additional options: + + -dlopen FILE add the directory containing FILE to the library path + +This mode sets the library path environment variable according to '-dlopen' +flags. + +If any of the ARGS are libtool executable wrappers, then they are translated +into their corresponding uninstalled binary, and any of their required library +directories are added to the library path. + +Then, COMMAND is executed, with ARGS as arguments." + ;; + + finish) + $ECHO \ +"Usage: $progname [OPTION]... --mode=finish [LIBDIR]... + +Complete the installation of libtool libraries. + +Each LIBDIR is a directory that contains libtool libraries. + +The commands that this mode executes may require superuser privileges. Use +the '--dry-run' option if you just want to see what would be executed." + ;; + + install) + $ECHO \ +"Usage: $progname [OPTION]... --mode=install INSTALL-COMMAND... + +Install executables or libraries. + +INSTALL-COMMAND is the installation command. The first component should be +either the 'install' or 'cp' program. + +The following components of INSTALL-COMMAND are treated specially: + + -inst-prefix-dir PREFIX-DIR Use PREFIX-DIR as a staging area for installation + +The rest of the components are interpreted as arguments to that command (only +BSD-compatible install options are recognized)." + ;; + + link) + $ECHO \ +"Usage: $progname [OPTION]... --mode=link LINK-COMMAND... + +Link object files or libraries together to form another library, or to +create an executable program. + +LINK-COMMAND is a command using the C compiler that you would use to create +a program from several object files. + +The following components of LINK-COMMAND are treated specially: + + -all-static do not do any dynamic linking at all + -avoid-version do not add a version suffix if possible + -bindir BINDIR specify path to binaries directory (for systems where + libraries must be found in the PATH setting at runtime) + -dlopen FILE '-dlpreopen' FILE if it cannot be dlopened at runtime + -dlpreopen FILE link in FILE and add its symbols to lt_preloaded_symbols + -export-dynamic allow symbols from OUTPUT-FILE to be resolved with dlsym(3) + -export-symbols SYMFILE + try to export only the symbols listed in SYMFILE + -export-symbols-regex REGEX + try to export only the symbols matching REGEX + -LLIBDIR search LIBDIR for required installed libraries + -lNAME OUTPUT-FILE requires the installed library libNAME + -module build a library that can dlopened + -no-fast-install disable the fast-install mode + -no-install link a not-installable executable + -no-undefined declare that a library does not refer to external symbols + -o OUTPUT-FILE create OUTPUT-FILE from the specified objects + -objectlist FILE use a list of object files found in FILE to specify objects + -os2dllname NAME force a short DLL name on OS/2 (no effect on other OSes) + -precious-files-regex REGEX + don't remove output files matching REGEX + -release RELEASE specify package release information + -rpath LIBDIR the created library will eventually be installed in LIBDIR + -R[ ]LIBDIR add LIBDIR to the runtime path of programs and libraries + -shared only do dynamic linking of libtool libraries + -shrext SUFFIX override the standard shared library file extension + -static do not do any dynamic linking of uninstalled libtool libraries + -static-libtool-libs + do not do any dynamic linking of libtool libraries + -version-info CURRENT[:REVISION[:AGE]] + specify library version info [each variable defaults to 0] + -weak LIBNAME declare that the target provides the LIBNAME interface + -Wc,FLAG + -Xcompiler FLAG pass linker-specific FLAG directly to the compiler + -Wa,FLAG + -Xassembler FLAG pass linker-specific FLAG directly to the assembler + -Wl,FLAG + -Xlinker FLAG pass linker-specific FLAG directly to the linker + -XCClinker FLAG pass link-specific FLAG to the compiler driver (CC) + +All other options (arguments beginning with '-') are ignored. + +Every other argument is treated as a filename. Files ending in '.la' are +treated as uninstalled libtool libraries, other files are standard or library +object files. + +If the OUTPUT-FILE ends in '.la', then a libtool library is created, +only library objects ('.lo' files) may be specified, and '-rpath' is +required, except when creating a convenience library. + +If OUTPUT-FILE ends in '.a' or '.lib', then a standard library is created +using 'ar' and 'ranlib', or on Windows using 'lib'. + +If OUTPUT-FILE ends in '.lo' or '.$objext', then a reloadable object file +is created, otherwise an executable program is created." + ;; + + uninstall) + $ECHO \ +"Usage: $progname [OPTION]... --mode=uninstall RM [RM-OPTION]... FILE... + +Remove libraries from an installation directory. + +RM is the name of the program to use to delete files associated with each FILE +(typically '/bin/rm'). RM-OPTIONS are options (such as '-f') to be passed +to RM. + +If FILE is a libtool library, all the files associated with it are deleted. +Otherwise, only FILE itself is deleted using RM." + ;; + + *) + func_fatal_help "invalid operation mode '$opt_mode'" + ;; + esac + + echo + $ECHO "Try '$progname --help' for more information about other modes." +} + +# Now that we've collected a possible --mode arg, show help if necessary +if $opt_help; then + if test : = "$opt_help"; then + func_mode_help + else + { + func_help noexit + for opt_mode in compile link execute install finish uninstall clean; do + func_mode_help + done + } | $SED -n '1p; 2,$s/^Usage:/ or: /p' + { + func_help noexit + for opt_mode in compile link execute install finish uninstall clean; do + echo + func_mode_help + done + } | + $SED '1d + /^When reporting/,/^Report/{ + H + d + } + $x + /information about other modes/d + /more detailed .*MODE/d + s/^Usage:.*--mode=\([^ ]*\) .*/Description of \1 mode:/' + fi + exit $? +fi + + +# func_mode_execute arg... +func_mode_execute () +{ + $debug_cmd + + # The first argument is the command name. + cmd=$nonopt + test -z "$cmd" && \ + func_fatal_help "you must specify a COMMAND" + + # Handle -dlopen flags immediately. + for file in $opt_dlopen; do + test -f "$file" \ + || func_fatal_help "'$file' is not a file" + + dir= + case $file in + *.la) + func_resolve_sysroot "$file" + file=$func_resolve_sysroot_result + + # Check to see that this really is a libtool archive. + func_lalib_unsafe_p "$file" \ + || func_fatal_help "'$lib' is not a valid libtool archive" + + # Read the libtool library. + dlname= + library_names= + func_source "$file" + + # Skip this library if it cannot be dlopened. + if test -z "$dlname"; then + # Warn if it was a shared library. + test -n "$library_names" && \ + func_warning "'$file' was not linked with '-export-dynamic'" + continue + fi + + func_dirname "$file" "" "." + dir=$func_dirname_result + + if test -f "$dir/$objdir/$dlname"; then + func_append dir "/$objdir" + else + if test ! -f "$dir/$dlname"; then + func_fatal_error "cannot find '$dlname' in '$dir' or '$dir/$objdir'" + fi + fi + ;; + + *.lo) + # Just add the directory containing the .lo file. + func_dirname "$file" "" "." + dir=$func_dirname_result + ;; + + *) + func_warning "'-dlopen' is ignored for non-libtool libraries and objects" + continue + ;; + esac + + # Get the absolute pathname. + absdir=`cd "$dir" && pwd` + test -n "$absdir" && dir=$absdir + + # Now add the directory to shlibpath_var. + if eval "test -z \"\$$shlibpath_var\""; then + eval "$shlibpath_var=\"\$dir\"" + else + eval "$shlibpath_var=\"\$dir:\$$shlibpath_var\"" + fi + done + + # This variable tells wrapper scripts just to set shlibpath_var + # rather than running their programs. + libtool_execute_magic=$magic + + # Check if any of the arguments is a wrapper script. + args= + for file + do + case $file in + -* | *.la | *.lo ) ;; + *) + # Do a test to see if this is really a libtool program. + if func_ltwrapper_script_p "$file"; then + func_source "$file" + # Transform arg to wrapped name. + file=$progdir/$program + elif func_ltwrapper_executable_p "$file"; then + func_ltwrapper_scriptname "$file" + func_source "$func_ltwrapper_scriptname_result" + # Transform arg to wrapped name. + file=$progdir/$program + fi + ;; + esac + # Quote arguments (to preserve shell metacharacters). + func_append_quoted args "$file" + done + + if $opt_dry_run; then + # Display what would be done. + if test -n "$shlibpath_var"; then + eval "\$ECHO \"\$shlibpath_var=\$$shlibpath_var\"" + echo "export $shlibpath_var" + fi + $ECHO "$cmd$args" + exit $EXIT_SUCCESS + else + if test -n "$shlibpath_var"; then + # Export the shlibpath_var. + eval "export $shlibpath_var" + fi + + # Restore saved environment variables + for lt_var in LANG LANGUAGE LC_ALL LC_CTYPE LC_COLLATE LC_MESSAGES + do + eval "if test \"\${save_$lt_var+set}\" = set; then + $lt_var=\$save_$lt_var; export $lt_var + else + $lt_unset $lt_var + fi" + done + + # Now prepare to actually exec the command. + exec_cmd=\$cmd$args + fi +} + +test execute = "$opt_mode" && func_mode_execute ${1+"$@"} + + +# func_mode_finish arg... +func_mode_finish () +{ + $debug_cmd + + libs= + libdirs= + admincmds= + + for opt in "$nonopt" ${1+"$@"} + do + if test -d "$opt"; then + func_append libdirs " $opt" + + elif test -f "$opt"; then + if func_lalib_unsafe_p "$opt"; then + func_append libs " $opt" + else + func_warning "'$opt' is not a valid libtool archive" + fi + + else + func_fatal_error "invalid argument '$opt'" + fi + done + + if test -n "$libs"; then + if test -n "$lt_sysroot"; then + sysroot_regex=`$ECHO "$lt_sysroot" | $SED "$sed_make_literal_regex"` + sysroot_cmd="s/\([ ']\)$sysroot_regex/\1/g;" + else + sysroot_cmd= + fi + + # Remove sysroot references + if $opt_dry_run; then + for lib in $libs; do + echo "removing references to $lt_sysroot and '=' prefixes from $lib" + done + else + tmpdir=`func_mktempdir` + for lib in $libs; do + $SED -e "$sysroot_cmd s/\([ ']-[LR]\)=/\1/g; s/\([ ']\)=/\1/g" $lib \ + > $tmpdir/tmp-la + mv -f $tmpdir/tmp-la $lib + done + ${RM}r "$tmpdir" + fi + fi + + if test -n "$finish_cmds$finish_eval" && test -n "$libdirs"; then + for libdir in $libdirs; do + if test -n "$finish_cmds"; then + # Do each command in the finish commands. + func_execute_cmds "$finish_cmds" 'admincmds="$admincmds +'"$cmd"'"' + fi + if test -n "$finish_eval"; then + # Do the single finish_eval. + eval cmds=\"$finish_eval\" + $opt_dry_run || eval "$cmds" || func_append admincmds " + $cmds" + fi + done + fi + + # Exit here if they wanted silent mode. + $opt_quiet && exit $EXIT_SUCCESS + + if test -n "$finish_cmds$finish_eval" && test -n "$libdirs"; then + echo "----------------------------------------------------------------------" + echo "Libraries have been installed in:" + for libdir in $libdirs; do + $ECHO " $libdir" + done + echo + echo "If you ever happen to want to link against installed libraries" + echo "in a given directory, LIBDIR, you must either use libtool, and" + echo "specify the full pathname of the library, or use the '-LLIBDIR'" + echo "flag during linking and do at least one of the following:" + if test -n "$shlibpath_var"; then + echo " - add LIBDIR to the '$shlibpath_var' environment variable" + echo " during execution" + fi + if test -n "$runpath_var"; then + echo " - add LIBDIR to the '$runpath_var' environment variable" + echo " during linking" + fi + if test -n "$hardcode_libdir_flag_spec"; then + libdir=LIBDIR + eval flag=\"$hardcode_libdir_flag_spec\" + + $ECHO " - use the '$flag' linker flag" + fi + if test -n "$admincmds"; then + $ECHO " - have your system administrator run these commands:$admincmds" + fi + if test -f /etc/ld.so.conf; then + echo " - have your system administrator add LIBDIR to '/etc/ld.so.conf'" + fi + echo + + echo "See any operating system documentation about shared libraries for" + case $host in + solaris2.[6789]|solaris2.1[0-9]) + echo "more information, such as the ld(1), crle(1) and ld.so(8) manual" + echo "pages." + ;; + *) + echo "more information, such as the ld(1) and ld.so(8) manual pages." + ;; + esac + echo "----------------------------------------------------------------------" + fi + exit $EXIT_SUCCESS +} + +test finish = "$opt_mode" && func_mode_finish ${1+"$@"} + + +# func_mode_install arg... +func_mode_install () +{ + $debug_cmd + + # There may be an optional sh(1) argument at the beginning of + # install_prog (especially on Windows NT). + if test "$SHELL" = "$nonopt" || test /bin/sh = "$nonopt" || + # Allow the use of GNU shtool's install command. + case $nonopt in *shtool*) :;; *) false;; esac + then + # Aesthetically quote it. + func_quote_arg pretty "$nonopt" + install_prog="$func_quote_arg_result " + arg=$1 + shift + else + install_prog= + arg=$nonopt + fi + + # The real first argument should be the name of the installation program. + # Aesthetically quote it. + func_quote_arg pretty "$arg" + func_append install_prog "$func_quote_arg_result" + install_shared_prog=$install_prog + case " $install_prog " in + *[\\\ /]cp\ *) install_cp=: ;; + *) install_cp=false ;; + esac + + # We need to accept at least all the BSD install flags. + dest= + files= + opts= + prev= + install_type= + isdir=false + stripme= + no_mode=: + for arg + do + arg2= + if test -n "$dest"; then + func_append files " $dest" + dest=$arg + continue + fi + + case $arg in + -d) isdir=: ;; + -f) + if $install_cp; then :; else + prev=$arg + fi + ;; + -g | -m | -o) + prev=$arg + ;; + -s) + stripme=" -s" + continue + ;; + -*) + ;; + *) + # If the previous option needed an argument, then skip it. + if test -n "$prev"; then + if test X-m = "X$prev" && test -n "$install_override_mode"; then + arg2=$install_override_mode + no_mode=false + fi + prev= + else + dest=$arg + continue + fi + ;; + esac + + # Aesthetically quote the argument. + func_quote_arg pretty "$arg" + func_append install_prog " $func_quote_arg_result" + if test -n "$arg2"; then + func_quote_arg pretty "$arg2" + fi + func_append install_shared_prog " $func_quote_arg_result" + done + + test -z "$install_prog" && \ + func_fatal_help "you must specify an install program" + + test -n "$prev" && \ + func_fatal_help "the '$prev' option requires an argument" + + if test -n "$install_override_mode" && $no_mode; then + if $install_cp; then :; else + func_quote_arg pretty "$install_override_mode" + func_append install_shared_prog " -m $func_quote_arg_result" + fi + fi + + if test -z "$files"; then + if test -z "$dest"; then + func_fatal_help "no file or destination specified" + else + func_fatal_help "you must specify a destination" + fi + fi + + # Strip any trailing slash from the destination. + func_stripname '' '/' "$dest" + dest=$func_stripname_result + + # Check to see that the destination is a directory. + test -d "$dest" && isdir=: + if $isdir; then + destdir=$dest + destname= + else + func_dirname_and_basename "$dest" "" "." + destdir=$func_dirname_result + destname=$func_basename_result + + # Not a directory, so check to see that there is only one file specified. + set dummy $files; shift + test "$#" -gt 1 && \ + func_fatal_help "'$dest' is not a directory" + fi + case $destdir in + [\\/]* | [A-Za-z]:[\\/]*) ;; + *) + for file in $files; do + case $file in + *.lo) ;; + *) + func_fatal_help "'$destdir' must be an absolute directory name" + ;; + esac + done + ;; + esac + + # This variable tells wrapper scripts just to set variables rather + # than running their programs. + libtool_install_magic=$magic + + staticlibs= + future_libdirs= + current_libdirs= + for file in $files; do + + # Do each installation. + case $file in + *.$libext) + # Do the static libraries later. + func_append staticlibs " $file" + ;; + + *.la) + func_resolve_sysroot "$file" + file=$func_resolve_sysroot_result + + # Check to see that this really is a libtool archive. + func_lalib_unsafe_p "$file" \ + || func_fatal_help "'$file' is not a valid libtool archive" + + library_names= + old_library= + relink_command= + func_source "$file" + + # Add the libdir to current_libdirs if it is the destination. + if test "X$destdir" = "X$libdir"; then + case "$current_libdirs " in + *" $libdir "*) ;; + *) func_append current_libdirs " $libdir" ;; + esac + else + # Note the libdir as a future libdir. + case "$future_libdirs " in + *" $libdir "*) ;; + *) func_append future_libdirs " $libdir" ;; + esac + fi + + func_dirname "$file" "/" "" + dir=$func_dirname_result + func_append dir "$objdir" + + if test -n "$relink_command"; then + # Determine the prefix the user has applied to our future dir. + inst_prefix_dir=`$ECHO "$destdir" | $SED -e "s%$libdir\$%%"` + + # Don't allow the user to place us outside of our expected + # location b/c this prevents finding dependent libraries that + # are installed to the same prefix. + # At present, this check doesn't affect windows .dll's that + # are installed into $libdir/../bin (currently, that works fine) + # but it's something to keep an eye on. + test "$inst_prefix_dir" = "$destdir" && \ + func_fatal_error "error: cannot install '$file' to a directory not ending in $libdir" + + if test -n "$inst_prefix_dir"; then + # Stick the inst_prefix_dir data into the link command. + relink_command=`$ECHO "$relink_command" | $SED "s%@inst_prefix_dir@%-inst-prefix-dir $inst_prefix_dir%"` + else + relink_command=`$ECHO "$relink_command" | $SED "s%@inst_prefix_dir@%%"` + fi + + func_warning "relinking '$file'" + func_show_eval "$relink_command" \ + 'func_fatal_error "error: relink '\''$file'\'' with the above command before installing it"' + fi + + # See the names of the shared library. + set dummy $library_names; shift + if test -n "$1"; then + realname=$1 + shift + + srcname=$realname + test -n "$relink_command" && srcname=${realname}T + + # Install the shared library and build the symlinks. + func_show_eval "$install_shared_prog $dir/$srcname $destdir/$realname" \ + 'exit $?' + tstripme=$stripme + case $host_os in + cygwin* | mingw* | pw32* | cegcc*) + case $realname in + *.dll.a) + tstripme= + ;; + esac + ;; + os2*) + case $realname in + *_dll.a) + tstripme= + ;; + esac + ;; + esac + if test -n "$tstripme" && test -n "$striplib"; then + func_show_eval "$striplib $destdir/$realname" 'exit $?' + fi + + if test "$#" -gt 0; then + # Delete the old symlinks, and create new ones. + # Try 'ln -sf' first, because the 'ln' binary might depend on + # the symlink we replace! Solaris /bin/ln does not understand -f, + # so we also need to try rm && ln -s. + for linkname + do + test "$linkname" != "$realname" \ + && func_show_eval "(cd $destdir && { $LN_S -f $realname $linkname || { $RM $linkname && $LN_S $realname $linkname; }; })" + done + fi + + # Do each command in the postinstall commands. + lib=$destdir/$realname + func_execute_cmds "$postinstall_cmds" 'exit $?' + fi + + # Install the pseudo-library for information purposes. + func_basename "$file" + name=$func_basename_result + instname=$dir/${name}i + func_show_eval "$install_prog $instname $destdir/$name" 'exit $?' + + # Maybe install the static library, too. + test -n "$old_library" && func_append staticlibs " $dir/$old_library" + ;; + + *.lo) + # Install (i.e. copy) a libtool object. + + # Figure out destination file name, if it wasn't already specified. + if test -n "$destname"; then + destfile=$destdir/$destname + else + func_basename "$file" + destfile=$func_basename_result + destfile=$destdir/$destfile + fi + + # Deduce the name of the destination old-style object file. + case $destfile in + *.lo) + func_lo2o "$destfile" + staticdest=$func_lo2o_result + ;; + *.$objext) + staticdest=$destfile + destfile= + ;; + *) + func_fatal_help "cannot copy a libtool object to '$destfile'" + ;; + esac + + # Install the libtool object if requested. + test -n "$destfile" && \ + func_show_eval "$install_prog $file $destfile" 'exit $?' + + # Install the old object if enabled. + if test yes = "$build_old_libs"; then + # Deduce the name of the old-style object file. + func_lo2o "$file" + staticobj=$func_lo2o_result + func_show_eval "$install_prog \$staticobj \$staticdest" 'exit $?' + fi + exit $EXIT_SUCCESS + ;; + + *) + # Figure out destination file name, if it wasn't already specified. + if test -n "$destname"; then + destfile=$destdir/$destname + else + func_basename "$file" + destfile=$func_basename_result + destfile=$destdir/$destfile + fi + + # If the file is missing, and there is a .exe on the end, strip it + # because it is most likely a libtool script we actually want to + # install + stripped_ext= + case $file in + *.exe) + if test ! -f "$file"; then + func_stripname '' '.exe' "$file" + file=$func_stripname_result + stripped_ext=.exe + fi + ;; + esac + + # Do a test to see if this is really a libtool program. + case $host in + *cygwin* | *mingw*) + if func_ltwrapper_executable_p "$file"; then + func_ltwrapper_scriptname "$file" + wrapper=$func_ltwrapper_scriptname_result + else + func_stripname '' '.exe' "$file" + wrapper=$func_stripname_result + fi + ;; + *) + wrapper=$file + ;; + esac + if func_ltwrapper_script_p "$wrapper"; then + notinst_deplibs= + relink_command= + + func_source "$wrapper" + + # Check the variables that should have been set. + test -z "$generated_by_libtool_version" && \ + func_fatal_error "invalid libtool wrapper script '$wrapper'" + + finalize=: + for lib in $notinst_deplibs; do + # Check to see that each library is installed. + libdir= + if test -f "$lib"; then + func_source "$lib" + fi + libfile=$libdir/`$ECHO "$lib" | $SED 's%^.*/%%g'` + if test -n "$libdir" && test ! -f "$libfile"; then + func_warning "'$lib' has not been installed in '$libdir'" + finalize=false + fi + done + + relink_command= + func_source "$wrapper" + + outputname= + if test no = "$fast_install" && test -n "$relink_command"; then + $opt_dry_run || { + if $finalize; then + tmpdir=`func_mktempdir` + func_basename "$file$stripped_ext" + file=$func_basename_result + outputname=$tmpdir/$file + # Replace the output file specification. + relink_command=`$ECHO "$relink_command" | $SED 's%@OUTPUT@%'"$outputname"'%g'` + + $opt_quiet || { + func_quote_arg expand,pretty "$relink_command" + eval "func_echo $func_quote_arg_result" + } + if eval "$relink_command"; then : + else + func_error "error: relink '$file' with the above command before installing it" + $opt_dry_run || ${RM}r "$tmpdir" + continue + fi + file=$outputname + else + func_warning "cannot relink '$file'" + fi + } + else + # Install the binary that we compiled earlier. + file=`$ECHO "$file$stripped_ext" | $SED "s%\([^/]*\)$%$objdir/\1%"` + fi + fi + + # remove .exe since cygwin /usr/bin/install will append another + # one anyway + case $install_prog,$host in + */usr/bin/install*,*cygwin*) + case $file:$destfile in + *.exe:*.exe) + # this is ok + ;; + *.exe:*) + destfile=$destfile.exe + ;; + *:*.exe) + func_stripname '' '.exe' "$destfile" + destfile=$func_stripname_result + ;; + esac + ;; + esac + func_show_eval "$install_prog\$stripme \$file \$destfile" 'exit $?' + $opt_dry_run || if test -n "$outputname"; then + ${RM}r "$tmpdir" + fi + ;; + esac + done + + for file in $staticlibs; do + func_basename "$file" + name=$func_basename_result + + # Set up the ranlib parameters. + oldlib=$destdir/$name + func_to_tool_file "$oldlib" func_convert_file_msys_to_w32 + tool_oldlib=$func_to_tool_file_result + + func_show_eval "$install_prog \$file \$oldlib" 'exit $?' + + if test -n "$stripme" && test -n "$old_striplib"; then + func_show_eval "$old_striplib $tool_oldlib" 'exit $?' + fi + + # Do each command in the postinstall commands. + func_execute_cmds "$old_postinstall_cmds" 'exit $?' + done + + test -n "$future_libdirs" && \ + func_warning "remember to run '$progname --finish$future_libdirs'" + + if test -n "$current_libdirs"; then + # Maybe just do a dry run. + $opt_dry_run && current_libdirs=" -n$current_libdirs" + exec_cmd='$SHELL "$progpath" $preserve_args --finish$current_libdirs' + else + exit $EXIT_SUCCESS + fi +} + +test install = "$opt_mode" && func_mode_install ${1+"$@"} + + +# func_generate_dlsyms outputname originator pic_p +# Extract symbols from dlprefiles and create ${outputname}S.o with +# a dlpreopen symbol table. +func_generate_dlsyms () +{ + $debug_cmd + + my_outputname=$1 + my_originator=$2 + my_pic_p=${3-false} + my_prefix=`$ECHO "$my_originator" | $SED 's%[^a-zA-Z0-9]%_%g'` + my_dlsyms= + + if test -n "$dlfiles$dlprefiles" || test no != "$dlself"; then + if test -n "$NM" && test -n "$global_symbol_pipe"; then + my_dlsyms=${my_outputname}S.c + else + func_error "not configured to extract global symbols from dlpreopened files" + fi + fi + + if test -n "$my_dlsyms"; then + case $my_dlsyms in + "") ;; + *.c) + # Discover the nlist of each of the dlfiles. + nlist=$output_objdir/$my_outputname.nm + + func_show_eval "$RM $nlist ${nlist}S ${nlist}T" + + # Parse the name list into a source file. + func_verbose "creating $output_objdir/$my_dlsyms" + + $opt_dry_run || $ECHO > "$output_objdir/$my_dlsyms" "\ +/* $my_dlsyms - symbol resolution table for '$my_outputname' dlsym emulation. */ +/* Generated by $PROGRAM (GNU $PACKAGE) $VERSION */ + +#ifdef __cplusplus +extern \"C\" { +#endif + +#if defined __GNUC__ && (((__GNUC__ == 4) && (__GNUC_MINOR__ >= 4)) || (__GNUC__ > 4)) +#pragma GCC diagnostic ignored \"-Wstrict-prototypes\" +#endif + +/* Keep this code in sync between libtool.m4, ltmain, lt_system.h, and tests. */ +#if defined _WIN32 || defined __CYGWIN__ || defined _WIN32_WCE +/* DATA imports from DLLs on WIN32 can't be const, because runtime + relocations are performed -- see ld's documentation on pseudo-relocs. */ +# define LT_DLSYM_CONST +#elif defined __osf__ +/* This system does not cope well with relocations in const data. */ +# define LT_DLSYM_CONST +#else +# define LT_DLSYM_CONST const +#endif + +#define STREQ(s1, s2) (strcmp ((s1), (s2)) == 0) + +/* External symbol declarations for the compiler. */\ +" + + if test yes = "$dlself"; then + func_verbose "generating symbol list for '$output'" + + $opt_dry_run || echo ': @PROGRAM@ ' > "$nlist" + + # Add our own program objects to the symbol list. + progfiles=`$ECHO "$objs$old_deplibs" | $SP2NL | $SED "$lo2o" | $NL2SP` + for progfile in $progfiles; do + func_to_tool_file "$progfile" func_convert_file_msys_to_w32 + func_verbose "extracting global C symbols from '$func_to_tool_file_result'" + $opt_dry_run || eval "$NM $func_to_tool_file_result | $global_symbol_pipe >> '$nlist'" + done + + if test -n "$exclude_expsyms"; then + $opt_dry_run || { + eval '$EGREP -v " ($exclude_expsyms)$" "$nlist" > "$nlist"T' + eval '$MV "$nlist"T "$nlist"' + } + fi + + if test -n "$export_symbols_regex"; then + $opt_dry_run || { + eval '$EGREP -e "$export_symbols_regex" "$nlist" > "$nlist"T' + eval '$MV "$nlist"T "$nlist"' + } + fi + + # Prepare the list of exported symbols + if test -z "$export_symbols"; then + export_symbols=$output_objdir/$outputname.exp + $opt_dry_run || { + $RM $export_symbols + eval "$SED -n -e '/^: @PROGRAM@ $/d' -e 's/^.* \(.*\)$/\1/p' "'< "$nlist" > "$export_symbols"' + case $host in + *cygwin* | *mingw* | *cegcc* ) + eval "echo EXPORTS "'> "$output_objdir/$outputname.def"' + eval 'cat "$export_symbols" >> "$output_objdir/$outputname.def"' + ;; + esac + } + else + $opt_dry_run || { + eval "$SED -e 's/\([].[*^$]\)/\\\\\1/g' -e 's/^/ /' -e 's/$/$/'"' < "$export_symbols" > "$output_objdir/$outputname.exp"' + eval '$GREP -f "$output_objdir/$outputname.exp" < "$nlist" > "$nlist"T' + eval '$MV "$nlist"T "$nlist"' + case $host in + *cygwin* | *mingw* | *cegcc* ) + eval "echo EXPORTS "'> "$output_objdir/$outputname.def"' + eval 'cat "$nlist" >> "$output_objdir/$outputname.def"' + ;; + esac + } + fi + fi + + for dlprefile in $dlprefiles; do + func_verbose "extracting global C symbols from '$dlprefile'" + func_basename "$dlprefile" + name=$func_basename_result + case $host in + *cygwin* | *mingw* | *cegcc* ) + # if an import library, we need to obtain dlname + if func_win32_import_lib_p "$dlprefile"; then + func_tr_sh "$dlprefile" + eval "curr_lafile=\$libfile_$func_tr_sh_result" + dlprefile_dlbasename= + if test -n "$curr_lafile" && func_lalib_p "$curr_lafile"; then + # Use subshell, to avoid clobbering current variable values + dlprefile_dlname=`source "$curr_lafile" && echo "$dlname"` + if test -n "$dlprefile_dlname"; then + func_basename "$dlprefile_dlname" + dlprefile_dlbasename=$func_basename_result + else + # no lafile. user explicitly requested -dlpreopen . + $sharedlib_from_linklib_cmd "$dlprefile" + dlprefile_dlbasename=$sharedlib_from_linklib_result + fi + fi + $opt_dry_run || { + if test -n "$dlprefile_dlbasename"; then + eval '$ECHO ": $dlprefile_dlbasename" >> "$nlist"' + else + func_warning "Could not compute DLL name from $name" + eval '$ECHO ": $name " >> "$nlist"' + fi + func_to_tool_file "$dlprefile" func_convert_file_msys_to_w32 + eval "$NM \"$func_to_tool_file_result\" 2>/dev/null | $global_symbol_pipe | + $SED -e '/I __imp/d' -e 's/I __nm_/D /;s/_nm__//' >> '$nlist'" + } + else # not an import lib + $opt_dry_run || { + eval '$ECHO ": $name " >> "$nlist"' + func_to_tool_file "$dlprefile" func_convert_file_msys_to_w32 + eval "$NM \"$func_to_tool_file_result\" 2>/dev/null | $global_symbol_pipe >> '$nlist'" + } + fi + ;; + *) + $opt_dry_run || { + eval '$ECHO ": $name " >> "$nlist"' + func_to_tool_file "$dlprefile" func_convert_file_msys_to_w32 + eval "$NM \"$func_to_tool_file_result\" 2>/dev/null | $global_symbol_pipe >> '$nlist'" + } + ;; + esac + done + + $opt_dry_run || { + # Make sure we have at least an empty file. + test -f "$nlist" || : > "$nlist" + + if test -n "$exclude_expsyms"; then + $EGREP -v " ($exclude_expsyms)$" "$nlist" > "$nlist"T + $MV "$nlist"T "$nlist" + fi + + # Try sorting and uniquifying the output. + if $GREP -v "^: " < "$nlist" | + if sort -k 3 /dev/null 2>&1; then + sort -k 3 + else + sort +2 + fi | + uniq > "$nlist"S; then + : + else + $GREP -v "^: " < "$nlist" > "$nlist"S + fi + + if test -f "$nlist"S; then + eval "$global_symbol_to_cdecl"' < "$nlist"S >> "$output_objdir/$my_dlsyms"' + else + echo '/* NONE */' >> "$output_objdir/$my_dlsyms" + fi + + func_show_eval '$RM "${nlist}I"' + if test -n "$global_symbol_to_import"; then + eval "$global_symbol_to_import"' < "$nlist"S > "$nlist"I' + fi + + echo >> "$output_objdir/$my_dlsyms" "\ + +/* The mapping between symbol names and symbols. */ +typedef struct { + const char *name; + void *address; +} lt_dlsymlist; +extern LT_DLSYM_CONST lt_dlsymlist +lt_${my_prefix}_LTX_preloaded_symbols[];\ +" + + if test -s "$nlist"I; then + echo >> "$output_objdir/$my_dlsyms" "\ +static void lt_syminit(void) +{ + LT_DLSYM_CONST lt_dlsymlist *symbol = lt_${my_prefix}_LTX_preloaded_symbols; + for (; symbol->name; ++symbol) + {" + $SED 's/.*/ if (STREQ (symbol->name, \"&\")) symbol->address = (void *) \&&;/' < "$nlist"I >> "$output_objdir/$my_dlsyms" + echo >> "$output_objdir/$my_dlsyms" "\ + } +}" + fi + echo >> "$output_objdir/$my_dlsyms" "\ +LT_DLSYM_CONST lt_dlsymlist +lt_${my_prefix}_LTX_preloaded_symbols[] = +{ {\"$my_originator\", (void *) 0}," + + if test -s "$nlist"I; then + echo >> "$output_objdir/$my_dlsyms" "\ + {\"@INIT@\", (void *) <_syminit}," + fi + + case $need_lib_prefix in + no) + eval "$global_symbol_to_c_name_address" < "$nlist" >> "$output_objdir/$my_dlsyms" + ;; + *) + eval "$global_symbol_to_c_name_address_lib_prefix" < "$nlist" >> "$output_objdir/$my_dlsyms" + ;; + esac + echo >> "$output_objdir/$my_dlsyms" "\ + {0, (void *) 0} +}; + +/* This works around a problem in FreeBSD linker */ +#ifdef FREEBSD_WORKAROUND +static const void *lt_preloaded_setup() { + return lt_${my_prefix}_LTX_preloaded_symbols; +} +#endif + +#ifdef __cplusplus +} +#endif\ +" + } # !$opt_dry_run + + pic_flag_for_symtable= + case "$compile_command " in + *" -static "*) ;; + *) + case $host in + # compiling the symbol table file with pic_flag works around + # a FreeBSD bug that causes programs to crash when -lm is + # linked before any other PIC object. But we must not use + # pic_flag when linking with -static. The problem exists in + # FreeBSD 2.2.6 and is fixed in FreeBSD 3.1. + *-*-freebsd2.*|*-*-freebsd3.0*|*-*-freebsdelf3.0*) + pic_flag_for_symtable=" $pic_flag -DFREEBSD_WORKAROUND" ;; + *-*-hpux*) + pic_flag_for_symtable=" $pic_flag" ;; + *) + $my_pic_p && pic_flag_for_symtable=" $pic_flag" + ;; + esac + ;; + esac + symtab_cflags= + for arg in $LTCFLAGS; do + case $arg in + -pie | -fpie | -fPIE) ;; + *) func_append symtab_cflags " $arg" ;; + esac + done + + # Now compile the dynamic symbol file. + func_show_eval '(cd $output_objdir && $LTCC$symtab_cflags -c$no_builtin_flag$pic_flag_for_symtable "$my_dlsyms")' 'exit $?' + + # Clean up the generated files. + func_show_eval '$RM "$output_objdir/$my_dlsyms" "$nlist" "${nlist}S" "${nlist}T" "${nlist}I"' + + # Transform the symbol file into the correct name. + symfileobj=$output_objdir/${my_outputname}S.$objext + case $host in + *cygwin* | *mingw* | *cegcc* ) + if test -f "$output_objdir/$my_outputname.def"; then + compile_command=`$ECHO "$compile_command" | $SED "s%@SYMFILE@%$output_objdir/$my_outputname.def $symfileobj%"` + finalize_command=`$ECHO "$finalize_command" | $SED "s%@SYMFILE@%$output_objdir/$my_outputname.def $symfileobj%"` + else + compile_command=`$ECHO "$compile_command" | $SED "s%@SYMFILE@%$symfileobj%"` + finalize_command=`$ECHO "$finalize_command" | $SED "s%@SYMFILE@%$symfileobj%"` + fi + ;; + *) + compile_command=`$ECHO "$compile_command" | $SED "s%@SYMFILE@%$symfileobj%"` + finalize_command=`$ECHO "$finalize_command" | $SED "s%@SYMFILE@%$symfileobj%"` + ;; + esac + ;; + *) + func_fatal_error "unknown suffix for '$my_dlsyms'" + ;; + esac + else + # We keep going just in case the user didn't refer to + # lt_preloaded_symbols. The linker will fail if global_symbol_pipe + # really was required. + + # Nullify the symbol file. + compile_command=`$ECHO "$compile_command" | $SED "s% @SYMFILE@%%"` + finalize_command=`$ECHO "$finalize_command" | $SED "s% @SYMFILE@%%"` + fi +} + +# func_cygming_gnu_implib_p ARG +# This predicate returns with zero status (TRUE) if +# ARG is a GNU/binutils-style import library. Returns +# with nonzero status (FALSE) otherwise. +func_cygming_gnu_implib_p () +{ + $debug_cmd + + func_to_tool_file "$1" func_convert_file_msys_to_w32 + func_cygming_gnu_implib_tmp=`$NM "$func_to_tool_file_result" | eval "$global_symbol_pipe" | $EGREP ' (_head_[A-Za-z0-9_]+_[ad]l*|[A-Za-z0-9_]+_[ad]l*_iname)$'` + test -n "$func_cygming_gnu_implib_tmp" +} + +# func_cygming_ms_implib_p ARG +# This predicate returns with zero status (TRUE) if +# ARG is an MS-style import library. Returns +# with nonzero status (FALSE) otherwise. +func_cygming_ms_implib_p () +{ + $debug_cmd + + func_to_tool_file "$1" func_convert_file_msys_to_w32 + func_cygming_ms_implib_tmp=`$NM "$func_to_tool_file_result" | eval "$global_symbol_pipe" | $GREP '_NULL_IMPORT_DESCRIPTOR'` + test -n "$func_cygming_ms_implib_tmp" +} + +# func_win32_libid arg +# return the library type of file 'arg' +# +# Need a lot of goo to handle *both* DLLs and import libs +# Has to be a shell function in order to 'eat' the argument +# that is supplied when $file_magic_command is called. +# Despite the name, also deal with 64 bit binaries. +func_win32_libid () +{ + $debug_cmd + + win32_libid_type=unknown + win32_fileres=`file -L $1 2>/dev/null` + case $win32_fileres in + *ar\ archive\ import\ library*) # definitely import + win32_libid_type="x86 archive import" + ;; + *ar\ archive*) # could be an import, or static + # Keep the egrep pattern in sync with the one in _LT_CHECK_MAGIC_METHOD. + if eval $OBJDUMP -f $1 | $SED -e '10q' 2>/dev/null | + $EGREP 'file format (pei*-i386(.*architecture: i386)?|pe-arm-wince|pe-x86-64)' >/dev/null; then + case $nm_interface in + "MS dumpbin") + if func_cygming_ms_implib_p "$1" || + func_cygming_gnu_implib_p "$1" + then + win32_nmres=import + else + win32_nmres= + fi + ;; + *) + func_to_tool_file "$1" func_convert_file_msys_to_w32 + win32_nmres=`eval $NM -f posix -A \"$func_to_tool_file_result\" | + $SED -n -e ' + 1,100{ + / I /{ + s|.*|import| + p + q + } + }'` + ;; + esac + case $win32_nmres in + import*) win32_libid_type="x86 archive import";; + *) win32_libid_type="x86 archive static";; + esac + fi + ;; + *DLL*) + win32_libid_type="x86 DLL" + ;; + *executable*) # but shell scripts are "executable" too... + case $win32_fileres in + *MS\ Windows\ PE\ Intel*) + win32_libid_type="x86 DLL" + ;; + esac + ;; + esac + $ECHO "$win32_libid_type" +} + +# func_cygming_dll_for_implib ARG +# +# Platform-specific function to extract the +# name of the DLL associated with the specified +# import library ARG. +# Invoked by eval'ing the libtool variable +# $sharedlib_from_linklib_cmd +# Result is available in the variable +# $sharedlib_from_linklib_result +func_cygming_dll_for_implib () +{ + $debug_cmd + + sharedlib_from_linklib_result=`$DLLTOOL --identify-strict --identify "$1"` +} + +# func_cygming_dll_for_implib_fallback_core SECTION_NAME LIBNAMEs +# +# The is the core of a fallback implementation of a +# platform-specific function to extract the name of the +# DLL associated with the specified import library LIBNAME. +# +# SECTION_NAME is either .idata$6 or .idata$7, depending +# on the platform and compiler that created the implib. +# +# Echos the name of the DLL associated with the +# specified import library. +func_cygming_dll_for_implib_fallback_core () +{ + $debug_cmd + + match_literal=`$ECHO "$1" | $SED "$sed_make_literal_regex"` + $OBJDUMP -s --section "$1" "$2" 2>/dev/null | + $SED '/^Contents of section '"$match_literal"':/{ + # Place marker at beginning of archive member dllname section + s/.*/====MARK====/ + p + d + } + # These lines can sometimes be longer than 43 characters, but + # are always uninteresting + /:[ ]*file format pe[i]\{,1\}-/d + /^In archive [^:]*:/d + # Ensure marker is printed + /^====MARK====/p + # Remove all lines with less than 43 characters + /^.\{43\}/!d + # From remaining lines, remove first 43 characters + s/^.\{43\}//' | + $SED -n ' + # Join marker and all lines until next marker into a single line + /^====MARK====/ b para + H + $ b para + b + :para + x + s/\n//g + # Remove the marker + s/^====MARK====// + # Remove trailing dots and whitespace + s/[\. \t]*$// + # Print + /./p' | + # we now have a list, one entry per line, of the stringified + # contents of the appropriate section of all members of the + # archive that possess that section. Heuristic: eliminate + # all those that have a first or second character that is + # a '.' (that is, objdump's representation of an unprintable + # character.) This should work for all archives with less than + # 0x302f exports -- but will fail for DLLs whose name actually + # begins with a literal '.' or a single character followed by + # a '.'. + # + # Of those that remain, print the first one. + $SED -e '/^\./d;/^.\./d;q' +} + +# func_cygming_dll_for_implib_fallback ARG +# Platform-specific function to extract the +# name of the DLL associated with the specified +# import library ARG. +# +# This fallback implementation is for use when $DLLTOOL +# does not support the --identify-strict option. +# Invoked by eval'ing the libtool variable +# $sharedlib_from_linklib_cmd +# Result is available in the variable +# $sharedlib_from_linklib_result +func_cygming_dll_for_implib_fallback () +{ + $debug_cmd + + if func_cygming_gnu_implib_p "$1"; then + # binutils import library + sharedlib_from_linklib_result=`func_cygming_dll_for_implib_fallback_core '.idata$7' "$1"` + elif func_cygming_ms_implib_p "$1"; then + # ms-generated import library + sharedlib_from_linklib_result=`func_cygming_dll_for_implib_fallback_core '.idata$6' "$1"` + else + # unknown + sharedlib_from_linklib_result= + fi +} + + +# func_extract_an_archive dir oldlib +func_extract_an_archive () +{ + $debug_cmd + + f_ex_an_ar_dir=$1; shift + f_ex_an_ar_oldlib=$1 + if test yes = "$lock_old_archive_extraction"; then + lockfile=$f_ex_an_ar_oldlib.lock + until $opt_dry_run || ln "$progpath" "$lockfile" 2>/dev/null; do + func_echo "Waiting for $lockfile to be removed" + sleep 2 + done + fi + func_show_eval "(cd \$f_ex_an_ar_dir && $AR x \"\$f_ex_an_ar_oldlib\")" \ + 'stat=$?; rm -f "$lockfile"; exit $stat' + if test yes = "$lock_old_archive_extraction"; then + $opt_dry_run || rm -f "$lockfile" + fi + if ($AR t "$f_ex_an_ar_oldlib" | sort | sort -uc >/dev/null 2>&1); then + : + else + func_fatal_error "object name conflicts in archive: $f_ex_an_ar_dir/$f_ex_an_ar_oldlib" + fi +} + + +# func_extract_archives gentop oldlib ... +func_extract_archives () +{ + $debug_cmd + + my_gentop=$1; shift + my_oldlibs=${1+"$@"} + my_oldobjs= + my_xlib= + my_xabs= + my_xdir= + + for my_xlib in $my_oldlibs; do + # Extract the objects. + case $my_xlib in + [\\/]* | [A-Za-z]:[\\/]*) my_xabs=$my_xlib ;; + *) my_xabs=`pwd`"/$my_xlib" ;; + esac + func_basename "$my_xlib" + my_xlib=$func_basename_result + my_xlib_u=$my_xlib + while :; do + case " $extracted_archives " in + *" $my_xlib_u "*) + func_arith $extracted_serial + 1 + extracted_serial=$func_arith_result + my_xlib_u=lt$extracted_serial-$my_xlib ;; + *) break ;; + esac + done + extracted_archives="$extracted_archives $my_xlib_u" + my_xdir=$my_gentop/$my_xlib_u + + func_mkdir_p "$my_xdir" + + case $host in + *-darwin*) + func_verbose "Extracting $my_xabs" + # Do not bother doing anything if just a dry run + $opt_dry_run || { + darwin_orig_dir=`pwd` + cd $my_xdir || exit $? + darwin_archive=$my_xabs + darwin_curdir=`pwd` + func_basename "$darwin_archive" + darwin_base_archive=$func_basename_result + darwin_arches=`$LIPO -info "$darwin_archive" 2>/dev/null | $GREP Architectures 2>/dev/null || true` + if test -n "$darwin_arches"; then + darwin_arches=`$ECHO "$darwin_arches" | $SED -e 's/.*are://'` + darwin_arch= + func_verbose "$darwin_base_archive has multiple architectures $darwin_arches" + for darwin_arch in $darwin_arches; do + func_mkdir_p "unfat-$$/$darwin_base_archive-$darwin_arch" + $LIPO -thin $darwin_arch -output "unfat-$$/$darwin_base_archive-$darwin_arch/$darwin_base_archive" "$darwin_archive" + cd "unfat-$$/$darwin_base_archive-$darwin_arch" + func_extract_an_archive "`pwd`" "$darwin_base_archive" + cd "$darwin_curdir" + $RM "unfat-$$/$darwin_base_archive-$darwin_arch/$darwin_base_archive" + done # $darwin_arches + ## Okay now we've a bunch of thin objects, gotta fatten them up :) + darwin_filelist=`find unfat-$$ -type f -name \*.o -print -o -name \*.lo -print | $SED -e "$sed_basename" | sort -u` + darwin_file= + darwin_files= + for darwin_file in $darwin_filelist; do + darwin_files=`find unfat-$$ -name $darwin_file -print | sort | $NL2SP` + $LIPO -create -output "$darwin_file" $darwin_files + done # $darwin_filelist + $RM -rf unfat-$$ + cd "$darwin_orig_dir" + else + cd $darwin_orig_dir + func_extract_an_archive "$my_xdir" "$my_xabs" + fi # $darwin_arches + } # !$opt_dry_run + ;; + *) + func_extract_an_archive "$my_xdir" "$my_xabs" + ;; + esac + my_oldobjs="$my_oldobjs "`find $my_xdir -name \*.$objext -print -o -name \*.lo -print | sort | $NL2SP` + done + + func_extract_archives_result=$my_oldobjs +} + + +# func_emit_wrapper [arg=no] +# +# Emit a libtool wrapper script on stdout. +# Don't directly open a file because we may want to +# incorporate the script contents within a cygwin/mingw +# wrapper executable. Must ONLY be called from within +# func_mode_link because it depends on a number of variables +# set therein. +# +# ARG is the value that the WRAPPER_SCRIPT_BELONGS_IN_OBJDIR +# variable will take. If 'yes', then the emitted script +# will assume that the directory where it is stored is +# the $objdir directory. This is a cygwin/mingw-specific +# behavior. +func_emit_wrapper () +{ + func_emit_wrapper_arg1=${1-no} + + $ECHO "\ +#! $SHELL + +# $output - temporary wrapper script for $objdir/$outputname +# Generated by $PROGRAM (GNU $PACKAGE) $VERSION +# +# The $output program cannot be directly executed until all the libtool +# libraries that it depends on are installed. +# +# This wrapper script should never be moved out of the build directory. +# If it is, it will not operate correctly. + +# Sed substitution that helps us do robust quoting. It backslashifies +# metacharacters that are still active within double-quoted strings. +sed_quote_subst='$sed_quote_subst' + +# Be Bourne compatible +if test -n \"\${ZSH_VERSION+set}\" && (emulate sh) >/dev/null 2>&1; then + emulate sh + NULLCMD=: + # Zsh 3.x and 4.x performs word splitting on \${1+\"\$@\"}, which + # is contrary to our usage. Disable this feature. + alias -g '\${1+\"\$@\"}'='\"\$@\"' + setopt NO_GLOB_SUBST +else + case \`(set -o) 2>/dev/null\` in *posix*) set -o posix;; esac +fi +BIN_SH=xpg4; export BIN_SH # for Tru64 +DUALCASE=1; export DUALCASE # for MKS sh + +# The HP-UX ksh and POSIX shell print the target directory to stdout +# if CDPATH is set. +(unset CDPATH) >/dev/null 2>&1 && unset CDPATH + +relink_command=\"$relink_command\" + +# This environment variable determines our operation mode. +if test \"\$libtool_install_magic\" = \"$magic\"; then + # install mode needs the following variables: + generated_by_libtool_version='$macro_version' + notinst_deplibs='$notinst_deplibs' +else + # When we are sourced in execute mode, \$file and \$ECHO are already set. + if test \"\$libtool_execute_magic\" != \"$magic\"; then + file=\"\$0\"" + + func_quote_arg pretty "$ECHO" + qECHO=$func_quote_arg_result + $ECHO "\ + +# A function that is used when there is no print builtin or printf. +func_fallback_echo () +{ + eval 'cat <<_LTECHO_EOF +\$1 +_LTECHO_EOF' +} + ECHO=$qECHO + fi + +# Very basic option parsing. These options are (a) specific to +# the libtool wrapper, (b) are identical between the wrapper +# /script/ and the wrapper /executable/ that is used only on +# windows platforms, and (c) all begin with the string "--lt-" +# (application programs are unlikely to have options that match +# this pattern). +# +# There are only two supported options: --lt-debug and +# --lt-dump-script. There is, deliberately, no --lt-help. +# +# The first argument to this parsing function should be the +# script's $0 value, followed by "$@". +lt_option_debug= +func_parse_lt_options () +{ + lt_script_arg0=\$0 + shift + for lt_opt + do + case \"\$lt_opt\" in + --lt-debug) lt_option_debug=1 ;; + --lt-dump-script) + lt_dump_D=\`\$ECHO \"X\$lt_script_arg0\" | $SED -e 's/^X//' -e 's%/[^/]*$%%'\` + test \"X\$lt_dump_D\" = \"X\$lt_script_arg0\" && lt_dump_D=. + lt_dump_F=\`\$ECHO \"X\$lt_script_arg0\" | $SED -e 's/^X//' -e 's%^.*/%%'\` + cat \"\$lt_dump_D/\$lt_dump_F\" + exit 0 + ;; + --lt-*) + \$ECHO \"Unrecognized --lt- option: '\$lt_opt'\" 1>&2 + exit 1 + ;; + esac + done + + # Print the debug banner immediately: + if test -n \"\$lt_option_debug\"; then + echo \"$outputname:$output:\$LINENO: libtool wrapper (GNU $PACKAGE) $VERSION\" 1>&2 + fi +} + +# Used when --lt-debug. Prints its arguments to stdout +# (redirection is the responsibility of the caller) +func_lt_dump_args () +{ + lt_dump_args_N=1; + for lt_arg + do + \$ECHO \"$outputname:$output:\$LINENO: newargv[\$lt_dump_args_N]: \$lt_arg\" + lt_dump_args_N=\`expr \$lt_dump_args_N + 1\` + done +} + +# Core function for launching the target application +func_exec_program_core () +{ +" + case $host in + # Backslashes separate directories on plain windows + *-*-mingw | *-*-os2* | *-cegcc*) + $ECHO "\ + if test -n \"\$lt_option_debug\"; then + \$ECHO \"$outputname:$output:\$LINENO: newargv[0]: \$progdir\\\\\$program\" 1>&2 + func_lt_dump_args \${1+\"\$@\"} 1>&2 + fi + exec \"\$progdir\\\\\$program\" \${1+\"\$@\"} +" + ;; + + *) + $ECHO "\ + if test -n \"\$lt_option_debug\"; then + \$ECHO \"$outputname:$output:\$LINENO: newargv[0]: \$progdir/\$program\" 1>&2 + func_lt_dump_args \${1+\"\$@\"} 1>&2 + fi + exec \"\$progdir/\$program\" \${1+\"\$@\"} +" + ;; + esac + $ECHO "\ + \$ECHO \"\$0: cannot exec \$program \$*\" 1>&2 + exit 1 +} + +# A function to encapsulate launching the target application +# Strips options in the --lt-* namespace from \$@ and +# launches target application with the remaining arguments. +func_exec_program () +{ + case \" \$* \" in + *\\ --lt-*) + for lt_wr_arg + do + case \$lt_wr_arg in + --lt-*) ;; + *) set x \"\$@\" \"\$lt_wr_arg\"; shift;; + esac + shift + done ;; + esac + func_exec_program_core \${1+\"\$@\"} +} + + # Parse options + func_parse_lt_options \"\$0\" \${1+\"\$@\"} + + # Find the directory that this script lives in. + thisdir=\`\$ECHO \"\$file\" | $SED 's%/[^/]*$%%'\` + test \"x\$thisdir\" = \"x\$file\" && thisdir=. + + # Follow symbolic links until we get to the real thisdir. + file=\`ls -ld \"\$file\" | $SED -n 's/.*-> //p'\` + while test -n \"\$file\"; do + destdir=\`\$ECHO \"\$file\" | $SED 's%/[^/]*\$%%'\` + + # If there was a directory component, then change thisdir. + if test \"x\$destdir\" != \"x\$file\"; then + case \"\$destdir\" in + [\\\\/]* | [A-Za-z]:[\\\\/]*) thisdir=\"\$destdir\" ;; + *) thisdir=\"\$thisdir/\$destdir\" ;; + esac + fi + + file=\`\$ECHO \"\$file\" | $SED 's%^.*/%%'\` + file=\`ls -ld \"\$thisdir/\$file\" | $SED -n 's/.*-> //p'\` + done + + # Usually 'no', except on cygwin/mingw when embedded into + # the cwrapper. + WRAPPER_SCRIPT_BELONGS_IN_OBJDIR=$func_emit_wrapper_arg1 + if test \"\$WRAPPER_SCRIPT_BELONGS_IN_OBJDIR\" = \"yes\"; then + # special case for '.' + if test \"\$thisdir\" = \".\"; then + thisdir=\`pwd\` + fi + # remove .libs from thisdir + case \"\$thisdir\" in + *[\\\\/]$objdir ) thisdir=\`\$ECHO \"\$thisdir\" | $SED 's%[\\\\/][^\\\\/]*$%%'\` ;; + $objdir ) thisdir=. ;; + esac + fi + + # Try to get the absolute directory name. + absdir=\`cd \"\$thisdir\" && pwd\` + test -n \"\$absdir\" && thisdir=\"\$absdir\" +" + + if test yes = "$fast_install"; then + $ECHO "\ + program=lt-'$outputname'$exeext + progdir=\"\$thisdir/$objdir\" + + if test ! -f \"\$progdir/\$program\" || + { file=\`ls -1dt \"\$progdir/\$program\" \"\$progdir/../\$program\" 2>/dev/null | $SED 1q\`; \\ + test \"X\$file\" != \"X\$progdir/\$program\"; }; then + + file=\"\$\$-\$program\" + + if test ! -d \"\$progdir\"; then + $MKDIR \"\$progdir\" + else + $RM \"\$progdir/\$file\" + fi" + + $ECHO "\ + + # relink executable if necessary + if test -n \"\$relink_command\"; then + if relink_command_output=\`eval \$relink_command 2>&1\`; then : + else + \$ECHO \"\$relink_command_output\" >&2 + $RM \"\$progdir/\$file\" + exit 1 + fi + fi + + $MV \"\$progdir/\$file\" \"\$progdir/\$program\" 2>/dev/null || + { $RM \"\$progdir/\$program\"; + $MV \"\$progdir/\$file\" \"\$progdir/\$program\"; } + $RM \"\$progdir/\$file\" + fi" + else + $ECHO "\ + program='$outputname' + progdir=\"\$thisdir/$objdir\" +" + fi + + $ECHO "\ + + if test -f \"\$progdir/\$program\"; then" + + # fixup the dll searchpath if we need to. + # + # Fix the DLL searchpath if we need to. Do this before prepending + # to shlibpath, because on Windows, both are PATH and uninstalled + # libraries must come first. + if test -n "$dllsearchpath"; then + $ECHO "\ + # Add the dll search path components to the executable PATH + PATH=$dllsearchpath:\$PATH +" + fi + + # Export our shlibpath_var if we have one. + if test yes = "$shlibpath_overrides_runpath" && test -n "$shlibpath_var" && test -n "$temp_rpath"; then + $ECHO "\ + # Add our own library path to $shlibpath_var + $shlibpath_var=\"$temp_rpath\$$shlibpath_var\" + + # Some systems cannot cope with colon-terminated $shlibpath_var + # The second colon is a workaround for a bug in BeOS R4 sed + $shlibpath_var=\`\$ECHO \"\$$shlibpath_var\" | $SED 's/::*\$//'\` + + export $shlibpath_var +" + fi + + $ECHO "\ + if test \"\$libtool_execute_magic\" != \"$magic\"; then + # Run the actual program with our arguments. + func_exec_program \${1+\"\$@\"} + fi + else + # The program doesn't exist. + \$ECHO \"\$0: error: '\$progdir/\$program' does not exist\" 1>&2 + \$ECHO \"This script is just a wrapper for \$program.\" 1>&2 + \$ECHO \"See the $PACKAGE documentation for more information.\" 1>&2 + exit 1 + fi +fi\ +" +} + + +# func_emit_cwrapperexe_src +# emit the source code for a wrapper executable on stdout +# Must ONLY be called from within func_mode_link because +# it depends on a number of variable set therein. +func_emit_cwrapperexe_src () +{ + cat < +#include +#ifdef _MSC_VER +# include +# include +# include +#else +# include +# include +# ifdef __CYGWIN__ +# include +# endif +#endif +#include +#include +#include +#include +#include +#include +#include +#include + +#define STREQ(s1, s2) (strcmp ((s1), (s2)) == 0) + +/* declarations of non-ANSI functions */ +#if defined __MINGW32__ +# ifdef __STRICT_ANSI__ +int _putenv (const char *); +# endif +#elif defined __CYGWIN__ +# ifdef __STRICT_ANSI__ +char *realpath (const char *, char *); +int putenv (char *); +int setenv (const char *, const char *, int); +# endif +/* #elif defined other_platform || defined ... */ +#endif + +/* portability defines, excluding path handling macros */ +#if defined _MSC_VER +# define setmode _setmode +# define stat _stat +# define chmod _chmod +# define getcwd _getcwd +# define putenv _putenv +# define S_IXUSR _S_IEXEC +#elif defined __MINGW32__ +# define setmode _setmode +# define stat _stat +# define chmod _chmod +# define getcwd _getcwd +# define putenv _putenv +#elif defined __CYGWIN__ +# define HAVE_SETENV +# define FOPEN_WB "wb" +/* #elif defined other platforms ... */ +#endif + +#if defined PATH_MAX +# define LT_PATHMAX PATH_MAX +#elif defined MAXPATHLEN +# define LT_PATHMAX MAXPATHLEN +#else +# define LT_PATHMAX 1024 +#endif + +#ifndef S_IXOTH +# define S_IXOTH 0 +#endif +#ifndef S_IXGRP +# define S_IXGRP 0 +#endif + +/* path handling portability macros */ +#ifndef DIR_SEPARATOR +# define DIR_SEPARATOR '/' +# define PATH_SEPARATOR ':' +#endif + +#if defined _WIN32 || defined __MSDOS__ || defined __DJGPP__ || \ + defined __OS2__ +# define HAVE_DOS_BASED_FILE_SYSTEM +# define FOPEN_WB "wb" +# ifndef DIR_SEPARATOR_2 +# define DIR_SEPARATOR_2 '\\' +# endif +# ifndef PATH_SEPARATOR_2 +# define PATH_SEPARATOR_2 ';' +# endif +#endif + +#ifndef DIR_SEPARATOR_2 +# define IS_DIR_SEPARATOR(ch) ((ch) == DIR_SEPARATOR) +#else /* DIR_SEPARATOR_2 */ +# define IS_DIR_SEPARATOR(ch) \ + (((ch) == DIR_SEPARATOR) || ((ch) == DIR_SEPARATOR_2)) +#endif /* DIR_SEPARATOR_2 */ + +#ifndef PATH_SEPARATOR_2 +# define IS_PATH_SEPARATOR(ch) ((ch) == PATH_SEPARATOR) +#else /* PATH_SEPARATOR_2 */ +# define IS_PATH_SEPARATOR(ch) ((ch) == PATH_SEPARATOR_2) +#endif /* PATH_SEPARATOR_2 */ + +#ifndef FOPEN_WB +# define FOPEN_WB "w" +#endif +#ifndef _O_BINARY +# define _O_BINARY 0 +#endif + +#define XMALLOC(type, num) ((type *) xmalloc ((num) * sizeof(type))) +#define XFREE(stale) do { \ + if (stale) { free (stale); stale = 0; } \ +} while (0) + +#if defined LT_DEBUGWRAPPER +static int lt_debug = 1; +#else +static int lt_debug = 0; +#endif + +const char *program_name = "libtool-wrapper"; /* in case xstrdup fails */ + +void *xmalloc (size_t num); +char *xstrdup (const char *string); +const char *base_name (const char *name); +char *find_executable (const char *wrapper); +char *chase_symlinks (const char *pathspec); +int make_executable (const char *path); +int check_executable (const char *path); +char *strendzap (char *str, const char *pat); +void lt_debugprintf (const char *file, int line, const char *fmt, ...); +void lt_fatal (const char *file, int line, const char *message, ...); +static const char *nonnull (const char *s); +static const char *nonempty (const char *s); +void lt_setenv (const char *name, const char *value); +char *lt_extend_str (const char *orig_value, const char *add, int to_end); +void lt_update_exe_path (const char *name, const char *value); +void lt_update_lib_path (const char *name, const char *value); +char **prepare_spawn (char **argv); +void lt_dump_script (FILE *f); +EOF + + cat <= 0) + && (st.st_mode & (S_IXUSR | S_IXGRP | S_IXOTH))) + return 1; + else + return 0; +} + +int +make_executable (const char *path) +{ + int rval = 0; + struct stat st; + + lt_debugprintf (__FILE__, __LINE__, "(make_executable): %s\n", + nonempty (path)); + if ((!path) || (!*path)) + return 0; + + if (stat (path, &st) >= 0) + { + rval = chmod (path, st.st_mode | S_IXOTH | S_IXGRP | S_IXUSR); + } + return rval; +} + +/* Searches for the full path of the wrapper. Returns + newly allocated full path name if found, NULL otherwise + Does not chase symlinks, even on platforms that support them. +*/ +char * +find_executable (const char *wrapper) +{ + int has_slash = 0; + const char *p; + const char *p_next; + /* static buffer for getcwd */ + char tmp[LT_PATHMAX + 1]; + size_t tmp_len; + char *concat_name; + + lt_debugprintf (__FILE__, __LINE__, "(find_executable): %s\n", + nonempty (wrapper)); + + if ((wrapper == NULL) || (*wrapper == '\0')) + return NULL; + + /* Absolute path? */ +#if defined HAVE_DOS_BASED_FILE_SYSTEM + if (isalpha ((unsigned char) wrapper[0]) && wrapper[1] == ':') + { + concat_name = xstrdup (wrapper); + if (check_executable (concat_name)) + return concat_name; + XFREE (concat_name); + } + else + { +#endif + if (IS_DIR_SEPARATOR (wrapper[0])) + { + concat_name = xstrdup (wrapper); + if (check_executable (concat_name)) + return concat_name; + XFREE (concat_name); + } +#if defined HAVE_DOS_BASED_FILE_SYSTEM + } +#endif + + for (p = wrapper; *p; p++) + if (*p == '/') + { + has_slash = 1; + break; + } + if (!has_slash) + { + /* no slashes; search PATH */ + const char *path = getenv ("PATH"); + if (path != NULL) + { + for (p = path; *p; p = p_next) + { + const char *q; + size_t p_len; + for (q = p; *q; q++) + if (IS_PATH_SEPARATOR (*q)) + break; + p_len = (size_t) (q - p); + p_next = (*q == '\0' ? q : q + 1); + if (p_len == 0) + { + /* empty path: current directory */ + if (getcwd (tmp, LT_PATHMAX) == NULL) + lt_fatal (__FILE__, __LINE__, "getcwd failed: %s", + nonnull (strerror (errno))); + tmp_len = strlen (tmp); + concat_name = + XMALLOC (char, tmp_len + 1 + strlen (wrapper) + 1); + memcpy (concat_name, tmp, tmp_len); + concat_name[tmp_len] = '/'; + strcpy (concat_name + tmp_len + 1, wrapper); + } + else + { + concat_name = + XMALLOC (char, p_len + 1 + strlen (wrapper) + 1); + memcpy (concat_name, p, p_len); + concat_name[p_len] = '/'; + strcpy (concat_name + p_len + 1, wrapper); + } + if (check_executable (concat_name)) + return concat_name; + XFREE (concat_name); + } + } + /* not found in PATH; assume curdir */ + } + /* Relative path | not found in path: prepend cwd */ + if (getcwd (tmp, LT_PATHMAX) == NULL) + lt_fatal (__FILE__, __LINE__, "getcwd failed: %s", + nonnull (strerror (errno))); + tmp_len = strlen (tmp); + concat_name = XMALLOC (char, tmp_len + 1 + strlen (wrapper) + 1); + memcpy (concat_name, tmp, tmp_len); + concat_name[tmp_len] = '/'; + strcpy (concat_name + tmp_len + 1, wrapper); + + if (check_executable (concat_name)) + return concat_name; + XFREE (concat_name); + return NULL; +} + +char * +chase_symlinks (const char *pathspec) +{ +#ifndef S_ISLNK + return xstrdup (pathspec); +#else + char buf[LT_PATHMAX]; + struct stat s; + char *tmp_pathspec = xstrdup (pathspec); + char *p; + int has_symlinks = 0; + while (strlen (tmp_pathspec) && !has_symlinks) + { + lt_debugprintf (__FILE__, __LINE__, + "checking path component for symlinks: %s\n", + tmp_pathspec); + if (lstat (tmp_pathspec, &s) == 0) + { + if (S_ISLNK (s.st_mode) != 0) + { + has_symlinks = 1; + break; + } + + /* search backwards for last DIR_SEPARATOR */ + p = tmp_pathspec + strlen (tmp_pathspec) - 1; + while ((p > tmp_pathspec) && (!IS_DIR_SEPARATOR (*p))) + p--; + if ((p == tmp_pathspec) && (!IS_DIR_SEPARATOR (*p))) + { + /* no more DIR_SEPARATORS left */ + break; + } + *p = '\0'; + } + else + { + lt_fatal (__FILE__, __LINE__, + "error accessing file \"%s\": %s", + tmp_pathspec, nonnull (strerror (errno))); + } + } + XFREE (tmp_pathspec); + + if (!has_symlinks) + { + return xstrdup (pathspec); + } + + tmp_pathspec = realpath (pathspec, buf); + if (tmp_pathspec == 0) + { + lt_fatal (__FILE__, __LINE__, + "could not follow symlinks for %s", pathspec); + } + return xstrdup (tmp_pathspec); +#endif +} + +char * +strendzap (char *str, const char *pat) +{ + size_t len, patlen; + + assert (str != NULL); + assert (pat != NULL); + + len = strlen (str); + patlen = strlen (pat); + + if (patlen <= len) + { + str += len - patlen; + if (STREQ (str, pat)) + *str = '\0'; + } + return str; +} + +void +lt_debugprintf (const char *file, int line, const char *fmt, ...) +{ + va_list args; + if (lt_debug) + { + (void) fprintf (stderr, "%s:%s:%d: ", program_name, file, line); + va_start (args, fmt); + (void) vfprintf (stderr, fmt, args); + va_end (args); + } +} + +static void +lt_error_core (int exit_status, const char *file, + int line, const char *mode, + const char *message, va_list ap) +{ + fprintf (stderr, "%s:%s:%d: %s: ", program_name, file, line, mode); + vfprintf (stderr, message, ap); + fprintf (stderr, ".\n"); + + if (exit_status >= 0) + exit (exit_status); +} + +void +lt_fatal (const char *file, int line, const char *message, ...) +{ + va_list ap; + va_start (ap, message); + lt_error_core (EXIT_FAILURE, file, line, "FATAL", message, ap); + va_end (ap); +} + +static const char * +nonnull (const char *s) +{ + return s ? s : "(null)"; +} + +static const char * +nonempty (const char *s) +{ + return (s && !*s) ? "(empty)" : nonnull (s); +} + +void +lt_setenv (const char *name, const char *value) +{ + lt_debugprintf (__FILE__, __LINE__, + "(lt_setenv) setting '%s' to '%s'\n", + nonnull (name), nonnull (value)); + { +#ifdef HAVE_SETENV + /* always make a copy, for consistency with !HAVE_SETENV */ + char *str = xstrdup (value); + setenv (name, str, 1); +#else + size_t len = strlen (name) + 1 + strlen (value) + 1; + char *str = XMALLOC (char, len); + sprintf (str, "%s=%s", name, value); + if (putenv (str) != EXIT_SUCCESS) + { + XFREE (str); + } +#endif + } +} + +char * +lt_extend_str (const char *orig_value, const char *add, int to_end) +{ + char *new_value; + if (orig_value && *orig_value) + { + size_t orig_value_len = strlen (orig_value); + size_t add_len = strlen (add); + new_value = XMALLOC (char, add_len + orig_value_len + 1); + if (to_end) + { + strcpy (new_value, orig_value); + strcpy (new_value + orig_value_len, add); + } + else + { + strcpy (new_value, add); + strcpy (new_value + add_len, orig_value); + } + } + else + { + new_value = xstrdup (add); + } + return new_value; +} + +void +lt_update_exe_path (const char *name, const char *value) +{ + lt_debugprintf (__FILE__, __LINE__, + "(lt_update_exe_path) modifying '%s' by prepending '%s'\n", + nonnull (name), nonnull (value)); + + if (name && *name && value && *value) + { + char *new_value = lt_extend_str (getenv (name), value, 0); + /* some systems can't cope with a ':'-terminated path #' */ + size_t len = strlen (new_value); + while ((len > 0) && IS_PATH_SEPARATOR (new_value[len-1])) + { + new_value[--len] = '\0'; + } + lt_setenv (name, new_value); + XFREE (new_value); + } +} + +void +lt_update_lib_path (const char *name, const char *value) +{ + lt_debugprintf (__FILE__, __LINE__, + "(lt_update_lib_path) modifying '%s' by prepending '%s'\n", + nonnull (name), nonnull (value)); + + if (name && *name && value && *value) + { + char *new_value = lt_extend_str (getenv (name), value, 0); + lt_setenv (name, new_value); + XFREE (new_value); + } +} + +EOF + case $host_os in + mingw*) + cat <<"EOF" + +/* Prepares an argument vector before calling spawn(). + Note that spawn() does not by itself call the command interpreter + (getenv ("COMSPEC") != NULL ? getenv ("COMSPEC") : + ({ OSVERSIONINFO v; v.dwOSVersionInfoSize = sizeof(OSVERSIONINFO); + GetVersionEx(&v); + v.dwPlatformId == VER_PLATFORM_WIN32_NT; + }) ? "cmd.exe" : "command.com"). + Instead it simply concatenates the arguments, separated by ' ', and calls + CreateProcess(). We must quote the arguments since Win32 CreateProcess() + interprets characters like ' ', '\t', '\\', '"' (but not '<' and '>') in a + special way: + - Space and tab are interpreted as delimiters. They are not treated as + delimiters if they are surrounded by double quotes: "...". + - Unescaped double quotes are removed from the input. Their only effect is + that within double quotes, space and tab are treated like normal + characters. + - Backslashes not followed by double quotes are not special. + - But 2*n+1 backslashes followed by a double quote become + n backslashes followed by a double quote (n >= 0): + \" -> " + \\\" -> \" + \\\\\" -> \\" + */ +#define SHELL_SPECIAL_CHARS "\"\\ \001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037" +#define SHELL_SPACE_CHARS " \001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037" +char ** +prepare_spawn (char **argv) +{ + size_t argc; + char **new_argv; + size_t i; + + /* Count number of arguments. */ + for (argc = 0; argv[argc] != NULL; argc++) + ; + + /* Allocate new argument vector. */ + new_argv = XMALLOC (char *, argc + 1); + + /* Put quoted arguments into the new argument vector. */ + for (i = 0; i < argc; i++) + { + const char *string = argv[i]; + + if (string[0] == '\0') + new_argv[i] = xstrdup ("\"\""); + else if (strpbrk (string, SHELL_SPECIAL_CHARS) != NULL) + { + int quote_around = (strpbrk (string, SHELL_SPACE_CHARS) != NULL); + size_t length; + unsigned int backslashes; + const char *s; + char *quoted_string; + char *p; + + length = 0; + backslashes = 0; + if (quote_around) + length++; + for (s = string; *s != '\0'; s++) + { + char c = *s; + if (c == '"') + length += backslashes + 1; + length++; + if (c == '\\') + backslashes++; + else + backslashes = 0; + } + if (quote_around) + length += backslashes + 1; + + quoted_string = XMALLOC (char, length + 1); + + p = quoted_string; + backslashes = 0; + if (quote_around) + *p++ = '"'; + for (s = string; *s != '\0'; s++) + { + char c = *s; + if (c == '"') + { + unsigned int j; + for (j = backslashes + 1; j > 0; j--) + *p++ = '\\'; + } + *p++ = c; + if (c == '\\') + backslashes++; + else + backslashes = 0; + } + if (quote_around) + { + unsigned int j; + for (j = backslashes; j > 0; j--) + *p++ = '\\'; + *p++ = '"'; + } + *p = '\0'; + + new_argv[i] = quoted_string; + } + else + new_argv[i] = (char *) string; + } + new_argv[argc] = NULL; + + return new_argv; +} +EOF + ;; + esac + + cat <<"EOF" +void lt_dump_script (FILE* f) +{ +EOF + func_emit_wrapper yes | + $SED -n -e ' +s/^\(.\{79\}\)\(..*\)/\1\ +\2/ +h +s/\([\\"]\)/\\\1/g +s/$/\\n/ +s/\([^\n]*\).*/ fputs ("\1", f);/p +g +D' + cat <<"EOF" +} +EOF +} +# end: func_emit_cwrapperexe_src + +# func_win32_import_lib_p ARG +# True if ARG is an import lib, as indicated by $file_magic_cmd +func_win32_import_lib_p () +{ + $debug_cmd + + case `eval $file_magic_cmd \"\$1\" 2>/dev/null | $SED -e 10q` in + *import*) : ;; + *) false ;; + esac +} + +# func_suncc_cstd_abi +# !!ONLY CALL THIS FOR SUN CC AFTER $compile_command IS FULLY EXPANDED!! +# Several compiler flags select an ABI that is incompatible with the +# Cstd library. Avoid specifying it if any are in CXXFLAGS. +func_suncc_cstd_abi () +{ + $debug_cmd + + case " $compile_command " in + *" -compat=g "*|*\ -std=c++[0-9][0-9]\ *|*" -library=stdcxx4 "*|*" -library=stlport4 "*) + suncc_use_cstd_abi=no + ;; + *) + suncc_use_cstd_abi=yes + ;; + esac +} + +# func_mode_link arg... +func_mode_link () +{ + $debug_cmd + + case $host in + *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2* | *-cegcc*) + # It is impossible to link a dll without this setting, and + # we shouldn't force the makefile maintainer to figure out + # what system we are compiling for in order to pass an extra + # flag for every libtool invocation. + # allow_undefined=no + + # FIXME: Unfortunately, there are problems with the above when trying + # to make a dll that has undefined symbols, in which case not + # even a static library is built. For now, we need to specify + # -no-undefined on the libtool link line when we can be certain + # that all symbols are satisfied, otherwise we get a static library. + allow_undefined=yes + ;; + *) + allow_undefined=yes + ;; + esac + libtool_args=$nonopt + base_compile="$nonopt $@" + compile_command=$nonopt + finalize_command=$nonopt + + compile_rpath= + finalize_rpath= + compile_shlibpath= + finalize_shlibpath= + convenience= + old_convenience= + deplibs= + old_deplibs= + compiler_flags= + linker_flags= + dllsearchpath= + lib_search_path=`pwd` + inst_prefix_dir= + new_inherited_linker_flags= + + avoid_version=no + bindir= + dlfiles= + dlprefiles= + dlself=no + export_dynamic=no + export_symbols= + export_symbols_regex= + generated= + libobjs= + ltlibs= + module=no + no_install=no + objs= + os2dllname= + non_pic_objects= + precious_files_regex= + prefer_static_libs=no + preload=false + prev= + prevarg= + release= + rpath= + xrpath= + perm_rpath= + temp_rpath= + thread_safe=no + vinfo= + vinfo_number=no + weak_libs= + single_module=$wl-single_module + func_infer_tag $base_compile + + # We need to know -static, to get the right output filenames. + for arg + do + case $arg in + -shared) + test yes != "$build_libtool_libs" \ + && func_fatal_configuration "cannot build a shared library" + build_old_libs=no + break + ;; + -all-static | -static | -static-libtool-libs) + case $arg in + -all-static) + if test yes = "$build_libtool_libs" && test -z "$link_static_flag"; then + func_warning "complete static linking is impossible in this configuration" + fi + if test -n "$link_static_flag"; then + dlopen_self=$dlopen_self_static + fi + prefer_static_libs=yes + ;; + -static) + if test -z "$pic_flag" && test -n "$link_static_flag"; then + dlopen_self=$dlopen_self_static + fi + prefer_static_libs=built + ;; + -static-libtool-libs) + if test -z "$pic_flag" && test -n "$link_static_flag"; then + dlopen_self=$dlopen_self_static + fi + prefer_static_libs=yes + ;; + esac + build_libtool_libs=no + build_old_libs=yes + break + ;; + esac + done + + # See if our shared archives depend on static archives. + test -n "$old_archive_from_new_cmds" && build_old_libs=yes + + # Go through the arguments, transforming them on the way. + while test "$#" -gt 0; do + arg=$1 + shift + func_quote_arg pretty,unquoted "$arg" + qarg=$func_quote_arg_unquoted_result + func_append libtool_args " $func_quote_arg_result" + + # If the previous option needs an argument, assign it. + if test -n "$prev"; then + case $prev in + output) + func_append compile_command " @OUTPUT@" + func_append finalize_command " @OUTPUT@" + ;; + esac + + case $prev in + bindir) + bindir=$arg + prev= + continue + ;; + dlfiles|dlprefiles) + $preload || { + # Add the symbol object into the linking commands. + func_append compile_command " @SYMFILE@" + func_append finalize_command " @SYMFILE@" + preload=: + } + case $arg in + *.la | *.lo) ;; # We handle these cases below. + force) + if test no = "$dlself"; then + dlself=needless + export_dynamic=yes + fi + prev= + continue + ;; + self) + if test dlprefiles = "$prev"; then + dlself=yes + elif test dlfiles = "$prev" && test yes != "$dlopen_self"; then + dlself=yes + else + dlself=needless + export_dynamic=yes + fi + prev= + continue + ;; + *) + if test dlfiles = "$prev"; then + func_append dlfiles " $arg" + else + func_append dlprefiles " $arg" + fi + prev= + continue + ;; + esac + ;; + expsyms) + export_symbols=$arg + test -f "$arg" \ + || func_fatal_error "symbol file '$arg' does not exist" + prev= + continue + ;; + expsyms_regex) + export_symbols_regex=$arg + prev= + continue + ;; + framework) + case $host in + *-*-darwin*) + case "$deplibs " in + *" $qarg.ltframework "*) ;; + *) func_append deplibs " $qarg.ltframework" # this is fixed later + ;; + esac + ;; + esac + prev= + continue + ;; + inst_prefix) + inst_prefix_dir=$arg + prev= + continue + ;; + mllvm) + # Clang does not use LLVM to link, so we can simply discard any + # '-mllvm $arg' options when doing the link step. + prev= + continue + ;; + objectlist) + if test -f "$arg"; then + save_arg=$arg + moreargs= + for fil in `cat "$save_arg"` + do +# func_append moreargs " $fil" + arg=$fil + # A libtool-controlled object. + + # Check to see that this really is a libtool object. + if func_lalib_unsafe_p "$arg"; then + pic_object= + non_pic_object= + + # Read the .lo file + func_source "$arg" + + if test -z "$pic_object" || + test -z "$non_pic_object" || + test none = "$pic_object" && + test none = "$non_pic_object"; then + func_fatal_error "cannot find name of object for '$arg'" + fi + + # Extract subdirectory from the argument. + func_dirname "$arg" "/" "" + xdir=$func_dirname_result + + if test none != "$pic_object"; then + # Prepend the subdirectory the object is found in. + pic_object=$xdir$pic_object + + if test dlfiles = "$prev"; then + if test yes = "$build_libtool_libs" && test yes = "$dlopen_support"; then + func_append dlfiles " $pic_object" + prev= + continue + else + # If libtool objects are unsupported, then we need to preload. + prev=dlprefiles + fi + fi + + # CHECK ME: I think I busted this. -Ossama + if test dlprefiles = "$prev"; then + # Preload the old-style object. + func_append dlprefiles " $pic_object" + prev= + fi + + # A PIC object. + func_append libobjs " $pic_object" + arg=$pic_object + fi + + # Non-PIC object. + if test none != "$non_pic_object"; then + # Prepend the subdirectory the object is found in. + non_pic_object=$xdir$non_pic_object + + # A standard non-PIC object + func_append non_pic_objects " $non_pic_object" + if test -z "$pic_object" || test none = "$pic_object"; then + arg=$non_pic_object + fi + else + # If the PIC object exists, use it instead. + # $xdir was prepended to $pic_object above. + non_pic_object=$pic_object + func_append non_pic_objects " $non_pic_object" + fi + else + # Only an error if not doing a dry-run. + if $opt_dry_run; then + # Extract subdirectory from the argument. + func_dirname "$arg" "/" "" + xdir=$func_dirname_result + + func_lo2o "$arg" + pic_object=$xdir$objdir/$func_lo2o_result + non_pic_object=$xdir$func_lo2o_result + func_append libobjs " $pic_object" + func_append non_pic_objects " $non_pic_object" + else + func_fatal_error "'$arg' is not a valid libtool object" + fi + fi + done + else + func_fatal_error "link input file '$arg' does not exist" + fi + arg=$save_arg + prev= + continue + ;; + os2dllname) + os2dllname=$arg + prev= + continue + ;; + precious_regex) + precious_files_regex=$arg + prev= + continue + ;; + release) + release=-$arg + prev= + continue + ;; + rpath | xrpath) + # We need an absolute path. + case $arg in + [\\/]* | [A-Za-z]:[\\/]*) ;; + *) + func_fatal_error "only absolute run-paths are allowed" + ;; + esac + if test rpath = "$prev"; then + case "$rpath " in + *" $arg "*) ;; + *) func_append rpath " $arg" ;; + esac + else + case "$xrpath " in + *" $arg "*) ;; + *) func_append xrpath " $arg" ;; + esac + fi + prev= + continue + ;; + shrext) + shrext_cmds=$arg + prev= + continue + ;; + weak) + func_append weak_libs " $arg" + prev= + continue + ;; + xassembler) + func_append compiler_flags " -Xassembler $qarg" + prev= + func_append compile_command " -Xassembler $qarg" + func_append finalize_command " -Xassembler $qarg" + continue + ;; + xcclinker) + func_append linker_flags " $qarg" + func_append compiler_flags " $qarg" + prev= + func_append compile_command " $qarg" + func_append finalize_command " $qarg" + continue + ;; + xcompiler) + func_append compiler_flags " $qarg" + prev= + func_append compile_command " $qarg" + func_append finalize_command " $qarg" + continue + ;; + xlinker) + func_append linker_flags " $qarg" + func_append compiler_flags " $wl$qarg" + prev= + func_append compile_command " $wl$qarg" + func_append finalize_command " $wl$qarg" + continue + ;; + *) + eval "$prev=\"\$arg\"" + prev= + continue + ;; + esac + fi # test -n "$prev" + + prevarg=$arg + + case $arg in + -all-static) + if test -n "$link_static_flag"; then + # See comment for -static flag below, for more details. + func_append compile_command " $link_static_flag" + func_append finalize_command " $link_static_flag" + fi + continue + ;; + + -allow-undefined) + # FIXME: remove this flag sometime in the future. + func_fatal_error "'-allow-undefined' must not be used because it is the default" + ;; + + -avoid-version) + avoid_version=yes + continue + ;; + + -bindir) + prev=bindir + continue + ;; + + -dlopen) + prev=dlfiles + continue + ;; + + -dlpreopen) + prev=dlprefiles + continue + ;; + + -export-dynamic) + export_dynamic=yes + continue + ;; + + -export-symbols | -export-symbols-regex) + if test -n "$export_symbols" || test -n "$export_symbols_regex"; then + func_fatal_error "more than one -exported-symbols argument is not allowed" + fi + if test X-export-symbols = "X$arg"; then + prev=expsyms + else + prev=expsyms_regex + fi + continue + ;; + + -framework) + prev=framework + continue + ;; + + -inst-prefix-dir) + prev=inst_prefix + continue + ;; + + # The native IRIX linker understands -LANG:*, -LIST:* and -LNO:* + # so, if we see these flags be careful not to treat them like -L + -L[A-Z][A-Z]*:*) + case $with_gcc/$host in + no/*-*-irix* | /*-*-irix*) + func_append compile_command " $arg" + func_append finalize_command " $arg" + ;; + esac + continue + ;; + + -L*) + func_stripname "-L" '' "$arg" + if test -z "$func_stripname_result"; then + if test "$#" -gt 0; then + func_fatal_error "require no space between '-L' and '$1'" + else + func_fatal_error "need path for '-L' option" + fi + fi + func_resolve_sysroot "$func_stripname_result" + dir=$func_resolve_sysroot_result + # We need an absolute path. + case $dir in + [\\/]* | [A-Za-z]:[\\/]*) ;; + *) + absdir=`cd "$dir" && pwd` + test -z "$absdir" && \ + func_fatal_error "cannot determine absolute directory name of '$dir'" + dir=$absdir + ;; + esac + case "$deplibs " in + *" -L$dir "* | *" $arg "*) + # Will only happen for absolute or sysroot arguments + ;; + *) + # Preserve sysroot, but never include relative directories + case $dir in + [\\/]* | [A-Za-z]:[\\/]* | =*) func_append deplibs " $arg" ;; + *) func_append deplibs " -L$dir" ;; + esac + func_append lib_search_path " $dir" + ;; + esac + case $host in + *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2* | *-cegcc*) + testbindir=`$ECHO "$dir" | $SED 's*/lib$*/bin*'` + case :$dllsearchpath: in + *":$dir:"*) ;; + ::) dllsearchpath=$dir;; + *) func_append dllsearchpath ":$dir";; + esac + case :$dllsearchpath: in + *":$testbindir:"*) ;; + ::) dllsearchpath=$testbindir;; + *) func_append dllsearchpath ":$testbindir";; + esac + ;; + esac + continue + ;; + + -l*) + if test X-lc = "X$arg" || test X-lm = "X$arg"; then + case $host in + *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-beos* | *-cegcc* | *-*-haiku*) + # These systems don't actually have a C or math library (as such) + continue + ;; + *-*-os2*) + # These systems don't actually have a C library (as such) + test X-lc = "X$arg" && continue + ;; + *-*-openbsd* | *-*-freebsd* | *-*-dragonfly* | *-*-bitrig* | *-*-midnightbsd*) + # Do not include libc due to us having libc/libc_r. + test X-lc = "X$arg" && continue + ;; + *-*-rhapsody* | *-*-darwin1.[012]) + # Rhapsody C and math libraries are in the System framework + func_append deplibs " System.ltframework" + continue + ;; + *-*-sco3.2v5* | *-*-sco5v6*) + # Causes problems with __ctype + test X-lc = "X$arg" && continue + ;; + *-*-sysv4.2uw2* | *-*-sysv5* | *-*-unixware* | *-*-OpenUNIX*) + # Compiler inserts libc in the correct place for threads to work + test X-lc = "X$arg" && continue + ;; + esac + elif test X-lc_r = "X$arg"; then + case $host in + *-*-openbsd* | *-*-freebsd* | *-*-dragonfly* | *-*-bitrig* | *-*-midnightbsd*) + # Do not include libc_r directly, use -pthread flag. + continue + ;; + esac + fi + func_append deplibs " $arg" + continue + ;; + + -mllvm) + prev=mllvm + continue + ;; + + -module) + module=yes + continue + ;; + + # Tru64 UNIX uses -model [arg] to determine the layout of C++ + # classes, name mangling, and exception handling. + # Darwin uses the -arch flag to determine output architecture. + -model|-arch|-isysroot|--sysroot) + func_append compiler_flags " $arg" + func_append compile_command " $arg" + func_append finalize_command " $arg" + prev=xcompiler + continue + ;; + # Solaris ld rejects as of 11.4. Refer to Oracle bug 22985199. + -pthread) + case $host in + *solaris2*) ;; + *) + case "$new_inherited_linker_flags " in + *" $arg "*) ;; + * ) func_append new_inherited_linker_flags " $arg" ;; + esac + ;; + esac + continue + ;; + -mt|-mthreads|-kthread|-Kthread|-pthreads|--thread-safe \ + |-threads|-fopenmp|-openmp|-mp|-xopenmp|-omp|-qsmp=*) + func_append compiler_flags " $arg" + func_append compile_command " $arg" + func_append finalize_command " $arg" + case "$new_inherited_linker_flags " in + *" $arg "*) ;; + * ) func_append new_inherited_linker_flags " $arg" ;; + esac + continue + ;; + + -multi_module) + single_module=$wl-multi_module + continue + ;; + + -no-fast-install) + fast_install=no + continue + ;; + + -no-install) + case $host in + *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2* | *-*-darwin* | *-cegcc*) + # The PATH hackery in wrapper scripts is required on Windows + # and Darwin in order for the loader to find any dlls it needs. + func_warning "'-no-install' is ignored for $host" + func_warning "assuming '-no-fast-install' instead" + fast_install=no + ;; + *) no_install=yes ;; + esac + continue + ;; + + -no-undefined) + allow_undefined=no + continue + ;; + + -objectlist) + prev=objectlist + continue + ;; + + -os2dllname) + prev=os2dllname + continue + ;; + + -o) prev=output ;; + + -precious-files-regex) + prev=precious_regex + continue + ;; + + -release) + prev=release + continue + ;; + + -rpath) + prev=rpath + continue + ;; + + -R) + prev=xrpath + continue + ;; + + -R*) + func_stripname '-R' '' "$arg" + dir=$func_stripname_result + # We need an absolute path. + case $dir in + [\\/]* | [A-Za-z]:[\\/]*) ;; + =*) + func_stripname '=' '' "$dir" + dir=$lt_sysroot$func_stripname_result + ;; + *) + func_fatal_error "only absolute run-paths are allowed" + ;; + esac + case "$xrpath " in + *" $dir "*) ;; + *) func_append xrpath " $dir" ;; + esac + continue + ;; + + -shared) + # The effects of -shared are defined in a previous loop. + continue + ;; + + -shrext) + prev=shrext + continue + ;; + + -static | -static-libtool-libs) + # The effects of -static are defined in a previous loop. + # We used to do the same as -all-static on platforms that + # didn't have a PIC flag, but the assumption that the effects + # would be equivalent was wrong. It would break on at least + # Digital Unix and AIX. + continue + ;; + + -thread-safe) + thread_safe=yes + continue + ;; + + -version-info) + prev=vinfo + continue + ;; + + -version-number) + prev=vinfo + vinfo_number=yes + continue + ;; + + -weak) + prev=weak + continue + ;; + + -Wc,*) + func_stripname '-Wc,' '' "$arg" + args=$func_stripname_result + arg= + save_ifs=$IFS; IFS=, + for flag in $args; do + IFS=$save_ifs + func_quote_arg pretty "$flag" + func_append arg " $func_quote_arg_result" + func_append compiler_flags " $func_quote_arg_result" + done + IFS=$save_ifs + func_stripname ' ' '' "$arg" + arg=$func_stripname_result + ;; + + -Wl,*) + func_stripname '-Wl,' '' "$arg" + args=$func_stripname_result + arg= + save_ifs=$IFS; IFS=, + for flag in $args; do + IFS=$save_ifs + func_quote_arg pretty "$flag" + func_append arg " $wl$func_quote_arg_result" + func_append compiler_flags " $wl$func_quote_arg_result" + func_append linker_flags " $func_quote_arg_result" + done + IFS=$save_ifs + func_stripname ' ' '' "$arg" + arg=$func_stripname_result + ;; + + -Xassembler) + prev=xassembler + continue + ;; + + -Xcompiler) + prev=xcompiler + continue + ;; + + -Xlinker) + prev=xlinker + continue + ;; + + -XCClinker) + prev=xcclinker + continue + ;; + + # -msg_* for osf cc + -msg_*) + func_quote_arg pretty "$arg" + arg=$func_quote_arg_result + ;; + + # Flags to be passed through unchanged, with rationale: + # -64, -mips[0-9] enable 64-bit mode for the SGI compiler + # -r[0-9][0-9]* specify processor for the SGI compiler + # -xarch=*, -xtarget=* enable 64-bit mode for the Sun compiler + # +DA*, +DD* enable 64-bit mode for the HP compiler + # -q* compiler args for the IBM compiler + # -m*, -t[45]*, -txscale* architecture-specific flags for GCC + # -F/path path to uninstalled frameworks, gcc on darwin + # -p, -pg, --coverage, -fprofile-* profiling flags for GCC + # -fstack-protector* stack protector flags for GCC + # @file GCC response files + # -tp=* Portland pgcc target processor selection + # --sysroot=* for sysroot support + # -O*, -g*, -flto*, -fwhopr*, -fuse-linker-plugin GCC link-time optimization + # -specs=* GCC specs files + # -stdlib=* select c++ std lib with clang + # -fsanitize=* Clang/GCC memory and address sanitizer + # -fuse-ld=* Linker select flags for GCC + # -static-* direct GCC to link specific libraries statically + # -fcilkplus Cilk Plus language extension features for C/C++ + # -Wa,* Pass flags directly to the assembler + -64|-mips[0-9]|-r[0-9][0-9]*|-xarch=*|-xtarget=*|+DA*|+DD*|-q*|-m*| \ + -t[45]*|-txscale*|-p|-pg|--coverage|-fprofile-*|-F*|@*|-tp=*|--sysroot=*| \ + -O*|-g*|-flto*|-fwhopr*|-fuse-linker-plugin|-fstack-protector*|-stdlib=*| \ + -specs=*|-fsanitize=*|-fuse-ld=*|-static-*|-fcilkplus|-Wa,*) + func_quote_arg pretty "$arg" + arg=$func_quote_arg_result + func_append compile_command " $arg" + func_append finalize_command " $arg" + func_append compiler_flags " $arg" + continue + ;; + + -Z*) + if test os2 = "`expr $host : '.*\(os2\)'`"; then + # OS/2 uses -Zxxx to specify OS/2-specific options + compiler_flags="$compiler_flags $arg" + func_append compile_command " $arg" + func_append finalize_command " $arg" + case $arg in + -Zlinker | -Zstack) + prev=xcompiler + ;; + esac + continue + else + # Otherwise treat like 'Some other compiler flag' below + func_quote_arg pretty "$arg" + arg=$func_quote_arg_result + fi + ;; + + # Some other compiler flag. + -* | +*) + func_quote_arg pretty "$arg" + arg=$func_quote_arg_result + ;; + + *.$objext) + # A standard object. + func_append objs " $arg" + ;; + + *.lo) + # A libtool-controlled object. + + # Check to see that this really is a libtool object. + if func_lalib_unsafe_p "$arg"; then + pic_object= + non_pic_object= + + # Read the .lo file + func_source "$arg" + + if test -z "$pic_object" || + test -z "$non_pic_object" || + test none = "$pic_object" && + test none = "$non_pic_object"; then + func_fatal_error "cannot find name of object for '$arg'" + fi + + # Extract subdirectory from the argument. + func_dirname "$arg" "/" "" + xdir=$func_dirname_result + + test none = "$pic_object" || { + # Prepend the subdirectory the object is found in. + pic_object=$xdir$pic_object + + if test dlfiles = "$prev"; then + if test yes = "$build_libtool_libs" && test yes = "$dlopen_support"; then + func_append dlfiles " $pic_object" + prev= + continue + else + # If libtool objects are unsupported, then we need to preload. + prev=dlprefiles + fi + fi + + # CHECK ME: I think I busted this. -Ossama + if test dlprefiles = "$prev"; then + # Preload the old-style object. + func_append dlprefiles " $pic_object" + prev= + fi + + # A PIC object. + func_append libobjs " $pic_object" + arg=$pic_object + } + + # Non-PIC object. + if test none != "$non_pic_object"; then + # Prepend the subdirectory the object is found in. + non_pic_object=$xdir$non_pic_object + + # A standard non-PIC object + func_append non_pic_objects " $non_pic_object" + if test -z "$pic_object" || test none = "$pic_object"; then + arg=$non_pic_object + fi + else + # If the PIC object exists, use it instead. + # $xdir was prepended to $pic_object above. + non_pic_object=$pic_object + func_append non_pic_objects " $non_pic_object" + fi + else + # Only an error if not doing a dry-run. + if $opt_dry_run; then + # Extract subdirectory from the argument. + func_dirname "$arg" "/" "" + xdir=$func_dirname_result + + func_lo2o "$arg" + pic_object=$xdir$objdir/$func_lo2o_result + non_pic_object=$xdir$func_lo2o_result + func_append libobjs " $pic_object" + func_append non_pic_objects " $non_pic_object" + else + func_fatal_error "'$arg' is not a valid libtool object" + fi + fi + ;; + + *.$libext) + # An archive. + func_append deplibs " $arg" + func_append old_deplibs " $arg" + continue + ;; + + *.la) + # A libtool-controlled library. + + func_resolve_sysroot "$arg" + if test dlfiles = "$prev"; then + # This library was specified with -dlopen. + func_append dlfiles " $func_resolve_sysroot_result" + prev= + elif test dlprefiles = "$prev"; then + # The library was specified with -dlpreopen. + func_append dlprefiles " $func_resolve_sysroot_result" + prev= + else + func_append deplibs " $func_resolve_sysroot_result" + fi + continue + ;; + + # Some other compiler argument. + *) + # Unknown arguments in both finalize_command and compile_command need + # to be aesthetically quoted because they are evaled later. + func_quote_arg pretty "$arg" + arg=$func_quote_arg_result + ;; + esac # arg + + # Now actually substitute the argument into the commands. + if test -n "$arg"; then + func_append compile_command " $arg" + func_append finalize_command " $arg" + fi + done # argument parsing loop + + test -n "$prev" && \ + func_fatal_help "the '$prevarg' option requires an argument" + + if test yes = "$export_dynamic" && test -n "$export_dynamic_flag_spec"; then + eval arg=\"$export_dynamic_flag_spec\" + func_append compile_command " $arg" + func_append finalize_command " $arg" + fi + + oldlibs= + # calculate the name of the file, without its directory + func_basename "$output" + outputname=$func_basename_result + libobjs_save=$libobjs + + if test -n "$shlibpath_var"; then + # get the directories listed in $shlibpath_var + eval shlib_search_path=\`\$ECHO \"\$$shlibpath_var\" \| \$SED \'s/:/ /g\'\` + else + shlib_search_path= + fi + eval sys_lib_search_path=\"$sys_lib_search_path_spec\" + eval sys_lib_dlsearch_path=\"$sys_lib_dlsearch_path_spec\" + + # Definition is injected by LT_CONFIG during libtool generation. + func_munge_path_list sys_lib_dlsearch_path "$LT_SYS_LIBRARY_PATH" + + func_dirname "$output" "/" "" + output_objdir=$func_dirname_result$objdir + func_to_tool_file "$output_objdir/" + tool_output_objdir=$func_to_tool_file_result + # Create the object directory. + func_mkdir_p "$output_objdir" + + # Determine the type of output + case $output in + "") + func_fatal_help "you must specify an output file" + ;; + *.$libext) linkmode=oldlib ;; + *.lo | *.$objext) linkmode=obj ;; + *.la) linkmode=lib ;; + *) linkmode=prog ;; # Anything else should be a program. + esac + + specialdeplibs= + + libs= + # Find all interdependent deplibs by searching for libraries + # that are linked more than once (e.g. -la -lb -la) + for deplib in $deplibs; do + if $opt_preserve_dup_deps; then + case "$libs " in + *" $deplib "*) func_append specialdeplibs " $deplib" ;; + esac + fi + func_append libs " $deplib" + done + + if test lib = "$linkmode"; then + libs="$predeps $libs $compiler_lib_search_path $postdeps" + + # Compute libraries that are listed more than once in $predeps + # $postdeps and mark them as special (i.e., whose duplicates are + # not to be eliminated). + pre_post_deps= + if $opt_duplicate_compiler_generated_deps; then + for pre_post_dep in $predeps $postdeps; do + case "$pre_post_deps " in + *" $pre_post_dep "*) func_append specialdeplibs " $pre_post_deps" ;; + esac + func_append pre_post_deps " $pre_post_dep" + done + fi + pre_post_deps= + fi + + deplibs= + newdependency_libs= + newlib_search_path= + need_relink=no # whether we're linking any uninstalled libtool libraries + notinst_deplibs= # not-installed libtool libraries + notinst_path= # paths that contain not-installed libtool libraries + + case $linkmode in + lib) + passes="conv dlpreopen link" + for file in $dlfiles $dlprefiles; do + case $file in + *.la) ;; + *) + func_fatal_help "libraries can '-dlopen' only libtool libraries: $file" + ;; + esac + done + ;; + prog) + compile_deplibs= + finalize_deplibs= + alldeplibs=false + newdlfiles= + newdlprefiles= + passes="conv scan dlopen dlpreopen link" + ;; + *) passes="conv" + ;; + esac + + for pass in $passes; do + # The preopen pass in lib mode reverses $deplibs; put it back here + # so that -L comes before libs that need it for instance... + if test lib,link = "$linkmode,$pass"; then + ## FIXME: Find the place where the list is rebuilt in the wrong + ## order, and fix it there properly + tmp_deplibs= + for deplib in $deplibs; do + tmp_deplibs="$deplib $tmp_deplibs" + done + deplibs=$tmp_deplibs + fi + + if test lib,link = "$linkmode,$pass" || + test prog,scan = "$linkmode,$pass"; then + libs=$deplibs + deplibs= + fi + if test prog = "$linkmode"; then + case $pass in + dlopen) libs=$dlfiles ;; + dlpreopen) libs=$dlprefiles ;; + link) + libs="$deplibs %DEPLIBS%" + test "X$link_all_deplibs" != Xno && libs="$libs $dependency_libs" + ;; + esac + fi + if test lib,dlpreopen = "$linkmode,$pass"; then + # Collect and forward deplibs of preopened libtool libs + for lib in $dlprefiles; do + # Ignore non-libtool-libs + dependency_libs= + func_resolve_sysroot "$lib" + case $lib in + *.la) func_source "$func_resolve_sysroot_result" ;; + esac + + # Collect preopened libtool deplibs, except any this library + # has declared as weak libs + for deplib in $dependency_libs; do + func_basename "$deplib" + deplib_base=$func_basename_result + case " $weak_libs " in + *" $deplib_base "*) ;; + *) func_append deplibs " $deplib" ;; + esac + done + done + libs=$dlprefiles + fi + if test dlopen = "$pass"; then + # Collect dlpreopened libraries + save_deplibs=$deplibs + deplibs= + fi + + for deplib in $libs; do + lib= + found=false + case $deplib in + -mt|-mthreads|-kthread|-Kthread|-pthread|-pthreads|--thread-safe \ + |-threads|-fopenmp|-openmp|-mp|-xopenmp|-omp|-qsmp=*) + if test prog,link = "$linkmode,$pass"; then + compile_deplibs="$deplib $compile_deplibs" + finalize_deplibs="$deplib $finalize_deplibs" + else + func_append compiler_flags " $deplib" + if test lib = "$linkmode"; then + case "$new_inherited_linker_flags " in + *" $deplib "*) ;; + * ) func_append new_inherited_linker_flags " $deplib" ;; + esac + fi + fi + continue + ;; + -l*) + if test lib != "$linkmode" && test prog != "$linkmode"; then + func_warning "'-l' is ignored for archives/objects" + continue + fi + func_stripname '-l' '' "$deplib" + name=$func_stripname_result + if test lib = "$linkmode"; then + searchdirs="$newlib_search_path $lib_search_path $compiler_lib_search_dirs $sys_lib_search_path $shlib_search_path" + else + searchdirs="$newlib_search_path $lib_search_path $sys_lib_search_path $shlib_search_path" + fi + for searchdir in $searchdirs; do + for search_ext in .la $std_shrext .so .a; do + # Search the libtool library + lib=$searchdir/lib$name$search_ext + if test -f "$lib"; then + if test .la = "$search_ext"; then + found=: + else + found=false + fi + break 2 + fi + done + done + if $found; then + # deplib is a libtool library + # If $allow_libtool_libs_with_static_runtimes && $deplib is a stdlib, + # We need to do some special things here, and not later. + if test yes = "$allow_libtool_libs_with_static_runtimes"; then + case " $predeps $postdeps " in + *" $deplib "*) + if func_lalib_p "$lib"; then + library_names= + old_library= + func_source "$lib" + for l in $old_library $library_names; do + ll=$l + done + if test "X$ll" = "X$old_library"; then # only static version available + found=false + func_dirname "$lib" "" "." + ladir=$func_dirname_result + lib=$ladir/$old_library + if test prog,link = "$linkmode,$pass"; then + compile_deplibs="$deplib $compile_deplibs" + finalize_deplibs="$deplib $finalize_deplibs" + else + deplibs="$deplib $deplibs" + test lib = "$linkmode" && newdependency_libs="$deplib $newdependency_libs" + fi + continue + fi + fi + ;; + *) ;; + esac + fi + else + # deplib doesn't seem to be a libtool library + if test prog,link = "$linkmode,$pass"; then + compile_deplibs="$deplib $compile_deplibs" + finalize_deplibs="$deplib $finalize_deplibs" + else + deplibs="$deplib $deplibs" + test lib = "$linkmode" && newdependency_libs="$deplib $newdependency_libs" + fi + continue + fi + ;; # -l + *.ltframework) + if test prog,link = "$linkmode,$pass"; then + compile_deplibs="$deplib $compile_deplibs" + finalize_deplibs="$deplib $finalize_deplibs" + else + deplibs="$deplib $deplibs" + if test lib = "$linkmode"; then + case "$new_inherited_linker_flags " in + *" $deplib "*) ;; + * ) func_append new_inherited_linker_flags " $deplib" ;; + esac + fi + fi + continue + ;; + -L*) + case $linkmode in + lib) + deplibs="$deplib $deplibs" + test conv = "$pass" && continue + newdependency_libs="$deplib $newdependency_libs" + func_stripname '-L' '' "$deplib" + func_resolve_sysroot "$func_stripname_result" + func_append newlib_search_path " $func_resolve_sysroot_result" + ;; + prog) + if test conv = "$pass"; then + deplibs="$deplib $deplibs" + continue + fi + if test scan = "$pass"; then + deplibs="$deplib $deplibs" + else + compile_deplibs="$deplib $compile_deplibs" + finalize_deplibs="$deplib $finalize_deplibs" + fi + func_stripname '-L' '' "$deplib" + func_resolve_sysroot "$func_stripname_result" + func_append newlib_search_path " $func_resolve_sysroot_result" + ;; + *) + func_warning "'-L' is ignored for archives/objects" + ;; + esac # linkmode + continue + ;; # -L + -R*) + if test link = "$pass"; then + func_stripname '-R' '' "$deplib" + func_resolve_sysroot "$func_stripname_result" + dir=$func_resolve_sysroot_result + # Make sure the xrpath contains only unique directories. + case "$xrpath " in + *" $dir "*) ;; + *) func_append xrpath " $dir" ;; + esac + fi + deplibs="$deplib $deplibs" + continue + ;; + *.la) + func_resolve_sysroot "$deplib" + lib=$func_resolve_sysroot_result + ;; + *.$libext) + if test conv = "$pass"; then + deplibs="$deplib $deplibs" + continue + fi + case $linkmode in + lib) + # Linking convenience modules into shared libraries is allowed, + # but linking other static libraries is non-portable. + case " $dlpreconveniencelibs " in + *" $deplib "*) ;; + *) + valid_a_lib=false + case $deplibs_check_method in + match_pattern*) + set dummy $deplibs_check_method; shift + match_pattern_regex=`expr "$deplibs_check_method" : "$1 \(.*\)"` + if eval "\$ECHO \"$deplib\"" 2>/dev/null | $SED 10q \ + | $EGREP "$match_pattern_regex" > /dev/null; then + valid_a_lib=: + fi + ;; + pass_all) + valid_a_lib=: + ;; + esac + if $valid_a_lib; then + echo + $ECHO "*** Warning: Linking the shared library $output against the" + $ECHO "*** static library $deplib is not portable!" + deplibs="$deplib $deplibs" + else + echo + $ECHO "*** Warning: Trying to link with static lib archive $deplib." + echo "*** I have the capability to make that library automatically link in when" + echo "*** you link to this library. But I can only do this if you have a" + echo "*** shared version of the library, which you do not appear to have" + echo "*** because the file extensions .$libext of this argument makes me believe" + echo "*** that it is just a static archive that I should not use here." + fi + ;; + esac + continue + ;; + prog) + if test link != "$pass"; then + deplibs="$deplib $deplibs" + else + compile_deplibs="$deplib $compile_deplibs" + finalize_deplibs="$deplib $finalize_deplibs" + fi + continue + ;; + esac # linkmode + ;; # *.$libext + *.lo | *.$objext) + if test conv = "$pass"; then + deplibs="$deplib $deplibs" + elif test prog = "$linkmode"; then + if test dlpreopen = "$pass" || test yes != "$dlopen_support" || test no = "$build_libtool_libs"; then + # If there is no dlopen support or we're linking statically, + # we need to preload. + func_append newdlprefiles " $deplib" + compile_deplibs="$deplib $compile_deplibs" + finalize_deplibs="$deplib $finalize_deplibs" + else + func_append newdlfiles " $deplib" + fi + fi + continue + ;; + %DEPLIBS%) + alldeplibs=: + continue + ;; + esac # case $deplib + + $found || test -f "$lib" \ + || func_fatal_error "cannot find the library '$lib' or unhandled argument '$deplib'" + + # Check to see that this really is a libtool archive. + func_lalib_unsafe_p "$lib" \ + || func_fatal_error "'$lib' is not a valid libtool archive" + + func_dirname "$lib" "" "." + ladir=$func_dirname_result + + dlname= + dlopen= + dlpreopen= + libdir= + library_names= + old_library= + inherited_linker_flags= + # If the library was installed with an old release of libtool, + # it will not redefine variables installed, or shouldnotlink + installed=yes + shouldnotlink=no + avoidtemprpath= + + + # Read the .la file + func_source "$lib" + + # Convert "-framework foo" to "foo.ltframework" + if test -n "$inherited_linker_flags"; then + tmp_inherited_linker_flags=`$ECHO "$inherited_linker_flags" | $SED 's/-framework \([^ $]*\)/\1.ltframework/g'` + for tmp_inherited_linker_flag in $tmp_inherited_linker_flags; do + case " $new_inherited_linker_flags " in + *" $tmp_inherited_linker_flag "*) ;; + *) func_append new_inherited_linker_flags " $tmp_inherited_linker_flag";; + esac + done + fi + dependency_libs=`$ECHO " $dependency_libs" | $SED 's% \([^ $]*\).ltframework% -framework \1%g'` + if test lib,link = "$linkmode,$pass" || + test prog,scan = "$linkmode,$pass" || + { test prog != "$linkmode" && test lib != "$linkmode"; }; then + test -n "$dlopen" && func_append dlfiles " $dlopen" + test -n "$dlpreopen" && func_append dlprefiles " $dlpreopen" + fi + + if test conv = "$pass"; then + # Only check for convenience libraries + deplibs="$lib $deplibs" + if test -z "$libdir"; then + if test -z "$old_library"; then + func_fatal_error "cannot find name of link library for '$lib'" + fi + # It is a libtool convenience library, so add in its objects. + func_append convenience " $ladir/$objdir/$old_library" + func_append old_convenience " $ladir/$objdir/$old_library" + tmp_libs= + for deplib in $dependency_libs; do + deplibs="$deplib $deplibs" + if $opt_preserve_dup_deps; then + case "$tmp_libs " in + *" $deplib "*) func_append specialdeplibs " $deplib" ;; + esac + fi + func_append tmp_libs " $deplib" + done + elif test prog != "$linkmode" && test lib != "$linkmode"; then + func_fatal_error "'$lib' is not a convenience library" + fi + continue + fi # $pass = conv + + + # Get the name of the library we link against. + linklib= + if test -n "$old_library" && + { test yes = "$prefer_static_libs" || + test built,no = "$prefer_static_libs,$installed"; }; then + linklib=$old_library + else + for l in $old_library $library_names; do + linklib=$l + done + fi + if test -z "$linklib"; then + func_fatal_error "cannot find name of link library for '$lib'" + fi + + # This library was specified with -dlopen. + if test dlopen = "$pass"; then + test -z "$libdir" \ + && func_fatal_error "cannot -dlopen a convenience library: '$lib'" + if test -z "$dlname" || + test yes != "$dlopen_support" || + test no = "$build_libtool_libs" + then + # If there is no dlname, no dlopen support or we're linking + # statically, we need to preload. We also need to preload any + # dependent libraries so libltdl's deplib preloader doesn't + # bomb out in the load deplibs phase. + func_append dlprefiles " $lib $dependency_libs" + else + func_append newdlfiles " $lib" + fi + continue + fi # $pass = dlopen + + # We need an absolute path. + case $ladir in + [\\/]* | [A-Za-z]:[\\/]*) abs_ladir=$ladir ;; + *) + abs_ladir=`cd "$ladir" && pwd` + if test -z "$abs_ladir"; then + func_warning "cannot determine absolute directory name of '$ladir'" + func_warning "passing it literally to the linker, although it might fail" + abs_ladir=$ladir + fi + ;; + esac + func_basename "$lib" + laname=$func_basename_result + + # Find the relevant object directory and library name. + if test yes = "$installed"; then + if test ! -f "$lt_sysroot$libdir/$linklib" && test -f "$abs_ladir/$linklib"; then + func_warning "library '$lib' was moved." + dir=$ladir + absdir=$abs_ladir + libdir=$abs_ladir + else + dir=$lt_sysroot$libdir + absdir=$lt_sysroot$libdir + fi + test yes = "$hardcode_automatic" && avoidtemprpath=yes + else + if test ! -f "$ladir/$objdir/$linklib" && test -f "$abs_ladir/$linklib"; then + dir=$ladir + absdir=$abs_ladir + # Remove this search path later + func_append notinst_path " $abs_ladir" + else + dir=$ladir/$objdir + absdir=$abs_ladir/$objdir + # Remove this search path later + func_append notinst_path " $abs_ladir" + fi + fi # $installed = yes + func_stripname 'lib' '.la' "$laname" + name=$func_stripname_result + + # This library was specified with -dlpreopen. + if test dlpreopen = "$pass"; then + if test -z "$libdir" && test prog = "$linkmode"; then + func_fatal_error "only libraries may -dlpreopen a convenience library: '$lib'" + fi + case $host in + # special handling for platforms with PE-DLLs. + *cygwin* | *mingw* | *cegcc* ) + # Linker will automatically link against shared library if both + # static and shared are present. Therefore, ensure we extract + # symbols from the import library if a shared library is present + # (otherwise, the dlopen module name will be incorrect). We do + # this by putting the import library name into $newdlprefiles. + # We recover the dlopen module name by 'saving' the la file + # name in a special purpose variable, and (later) extracting the + # dlname from the la file. + if test -n "$dlname"; then + func_tr_sh "$dir/$linklib" + eval "libfile_$func_tr_sh_result=\$abs_ladir/\$laname" + func_append newdlprefiles " $dir/$linklib" + else + func_append newdlprefiles " $dir/$old_library" + # Keep a list of preopened convenience libraries to check + # that they are being used correctly in the link pass. + test -z "$libdir" && \ + func_append dlpreconveniencelibs " $dir/$old_library" + fi + ;; + * ) + # Prefer using a static library (so that no silly _DYNAMIC symbols + # are required to link). + if test -n "$old_library"; then + func_append newdlprefiles " $dir/$old_library" + # Keep a list of preopened convenience libraries to check + # that they are being used correctly in the link pass. + test -z "$libdir" && \ + func_append dlpreconveniencelibs " $dir/$old_library" + # Otherwise, use the dlname, so that lt_dlopen finds it. + elif test -n "$dlname"; then + func_append newdlprefiles " $dir/$dlname" + else + func_append newdlprefiles " $dir/$linklib" + fi + ;; + esac + fi # $pass = dlpreopen + + if test -z "$libdir"; then + # Link the convenience library + if test lib = "$linkmode"; then + deplibs="$dir/$old_library $deplibs" + elif test prog,link = "$linkmode,$pass"; then + compile_deplibs="$dir/$old_library $compile_deplibs" + finalize_deplibs="$dir/$old_library $finalize_deplibs" + else + deplibs="$lib $deplibs" # used for prog,scan pass + fi + continue + fi + + + if test prog = "$linkmode" && test link != "$pass"; then + func_append newlib_search_path " $ladir" + deplibs="$lib $deplibs" + + linkalldeplibs=false + if test no != "$link_all_deplibs" || test -z "$library_names" || + test no = "$build_libtool_libs"; then + linkalldeplibs=: + fi + + tmp_libs= + for deplib in $dependency_libs; do + case $deplib in + -L*) func_stripname '-L' '' "$deplib" + func_resolve_sysroot "$func_stripname_result" + func_append newlib_search_path " $func_resolve_sysroot_result" + ;; + esac + # Need to link against all dependency_libs? + if $linkalldeplibs; then + deplibs="$deplib $deplibs" + else + # Need to hardcode shared library paths + # or/and link against static libraries + newdependency_libs="$deplib $newdependency_libs" + fi + if $opt_preserve_dup_deps; then + case "$tmp_libs " in + *" $deplib "*) func_append specialdeplibs " $deplib" ;; + esac + fi + func_append tmp_libs " $deplib" + done # for deplib + continue + fi # $linkmode = prog... + + if test prog,link = "$linkmode,$pass"; then + if test -n "$library_names" && + { { test no = "$prefer_static_libs" || + test built,yes = "$prefer_static_libs,$installed"; } || + test -z "$old_library"; }; then + # We need to hardcode the library path + if test -n "$shlibpath_var" && test -z "$avoidtemprpath"; then + # Make sure the rpath contains only unique directories. + case $temp_rpath: in + *"$absdir:"*) ;; + *) func_append temp_rpath "$absdir:" ;; + esac + fi + + # Hardcode the library path. + # Skip directories that are in the system default run-time + # search path. + case " $sys_lib_dlsearch_path " in + *" $absdir "*) ;; + *) + case "$compile_rpath " in + *" $absdir "*) ;; + *) func_append compile_rpath " $absdir" ;; + esac + ;; + esac + case " $sys_lib_dlsearch_path " in + *" $libdir "*) ;; + *) + case "$finalize_rpath " in + *" $libdir "*) ;; + *) func_append finalize_rpath " $libdir" ;; + esac + ;; + esac + fi # $linkmode,$pass = prog,link... + + if $alldeplibs && + { test pass_all = "$deplibs_check_method" || + { test yes = "$build_libtool_libs" && + test -n "$library_names"; }; }; then + # We only need to search for static libraries + continue + fi + fi + + link_static=no # Whether the deplib will be linked statically + use_static_libs=$prefer_static_libs + if test built = "$use_static_libs" && test yes = "$installed"; then + use_static_libs=no + fi + if test -n "$library_names" && + { test no = "$use_static_libs" || test -z "$old_library"; }; then + case $host in + *cygwin* | *mingw* | *cegcc* | *os2*) + # No point in relinking DLLs because paths are not encoded + func_append notinst_deplibs " $lib" + need_relink=no + ;; + *) + if test no = "$installed"; then + func_append notinst_deplibs " $lib" + need_relink=yes + fi + ;; + esac + # This is a shared library + + # Warn about portability, can't link against -module's on some + # systems (darwin). Don't bleat about dlopened modules though! + dlopenmodule= + for dlpremoduletest in $dlprefiles; do + if test "X$dlpremoduletest" = "X$lib"; then + dlopenmodule=$dlpremoduletest + break + fi + done + if test -z "$dlopenmodule" && test yes = "$shouldnotlink" && test link = "$pass"; then + echo + if test prog = "$linkmode"; then + $ECHO "*** Warning: Linking the executable $output against the loadable module" + else + $ECHO "*** Warning: Linking the shared library $output against the loadable module" + fi + $ECHO "*** $linklib is not portable!" + fi + if test lib = "$linkmode" && + test yes = "$hardcode_into_libs"; then + # Hardcode the library path. + # Skip directories that are in the system default run-time + # search path. + case " $sys_lib_dlsearch_path " in + *" $absdir "*) ;; + *) + case "$compile_rpath " in + *" $absdir "*) ;; + *) func_append compile_rpath " $absdir" ;; + esac + ;; + esac + case " $sys_lib_dlsearch_path " in + *" $libdir "*) ;; + *) + case "$finalize_rpath " in + *" $libdir "*) ;; + *) func_append finalize_rpath " $libdir" ;; + esac + ;; + esac + fi + + if test -n "$old_archive_from_expsyms_cmds"; then + # figure out the soname + set dummy $library_names + shift + realname=$1 + shift + libname=`eval "\\$ECHO \"$libname_spec\""` + # use dlname if we got it. it's perfectly good, no? + if test -n "$dlname"; then + soname=$dlname + elif test -n "$soname_spec"; then + # bleh windows + case $host in + *cygwin* | mingw* | *cegcc* | *os2*) + func_arith $current - $age + major=$func_arith_result + versuffix=-$major + ;; + esac + eval soname=\"$soname_spec\" + else + soname=$realname + fi + + # Make a new name for the extract_expsyms_cmds to use + soroot=$soname + func_basename "$soroot" + soname=$func_basename_result + func_stripname 'lib' '.dll' "$soname" + newlib=libimp-$func_stripname_result.a + + # If the library has no export list, then create one now + if test -f "$output_objdir/$soname-def"; then : + else + func_verbose "extracting exported symbol list from '$soname'" + func_execute_cmds "$extract_expsyms_cmds" 'exit $?' + fi + + # Create $newlib + if test -f "$output_objdir/$newlib"; then :; else + func_verbose "generating import library for '$soname'" + func_execute_cmds "$old_archive_from_expsyms_cmds" 'exit $?' + fi + # make sure the library variables are pointing to the new library + dir=$output_objdir + linklib=$newlib + fi # test -n "$old_archive_from_expsyms_cmds" + + if test prog = "$linkmode" || test relink != "$opt_mode"; then + add_shlibpath= + add_dir= + add= + lib_linked=yes + case $hardcode_action in + immediate | unsupported) + if test no = "$hardcode_direct"; then + add=$dir/$linklib + case $host in + *-*-sco3.2v5.0.[024]*) add_dir=-L$dir ;; + *-*-sysv4*uw2*) add_dir=-L$dir ;; + *-*-sysv5OpenUNIX* | *-*-sysv5UnixWare7.[01].[10]* | \ + *-*-unixware7*) add_dir=-L$dir ;; + *-*-darwin* ) + # if the lib is a (non-dlopened) module then we cannot + # link against it, someone is ignoring the earlier warnings + if /usr/bin/file -L $add 2> /dev/null | + $GREP ": [^:]* bundle" >/dev/null; then + if test "X$dlopenmodule" != "X$lib"; then + $ECHO "*** Warning: lib $linklib is a module, not a shared library" + if test -z "$old_library"; then + echo + echo "*** And there doesn't seem to be a static archive available" + echo "*** The link will probably fail, sorry" + else + add=$dir/$old_library + fi + elif test -n "$old_library"; then + add=$dir/$old_library + fi + fi + esac + elif test no = "$hardcode_minus_L"; then + case $host in + *-*-sunos*) add_shlibpath=$dir ;; + esac + add_dir=-L$dir + add=-l$name + elif test no = "$hardcode_shlibpath_var"; then + add_shlibpath=$dir + add=-l$name + else + lib_linked=no + fi + ;; + relink) + if test yes = "$hardcode_direct" && + test no = "$hardcode_direct_absolute"; then + add=$dir/$linklib + elif test yes = "$hardcode_minus_L"; then + add_dir=-L$absdir + # Try looking first in the location we're being installed to. + if test -n "$inst_prefix_dir"; then + case $libdir in + [\\/]*) + func_append add_dir " -L$inst_prefix_dir$libdir" + ;; + esac + fi + add=-l$name + elif test yes = "$hardcode_shlibpath_var"; then + add_shlibpath=$dir + add=-l$name + else + lib_linked=no + fi + ;; + *) lib_linked=no ;; + esac + + if test yes != "$lib_linked"; then + func_fatal_configuration "unsupported hardcode properties" + fi + + if test -n "$add_shlibpath"; then + case :$compile_shlibpath: in + *":$add_shlibpath:"*) ;; + *) func_append compile_shlibpath "$add_shlibpath:" ;; + esac + fi + if test prog = "$linkmode"; then + test -n "$add_dir" && compile_deplibs="$add_dir $compile_deplibs" + test -n "$add" && compile_deplibs="$add $compile_deplibs" + else + test -n "$add_dir" && deplibs="$add_dir $deplibs" + test -n "$add" && deplibs="$add $deplibs" + if test yes != "$hardcode_direct" && + test yes != "$hardcode_minus_L" && + test yes = "$hardcode_shlibpath_var"; then + case :$finalize_shlibpath: in + *":$libdir:"*) ;; + *) func_append finalize_shlibpath "$libdir:" ;; + esac + fi + fi + fi + + if test prog = "$linkmode" || test relink = "$opt_mode"; then + add_shlibpath= + add_dir= + add= + # Finalize command for both is simple: just hardcode it. + if test yes = "$hardcode_direct" && + test no = "$hardcode_direct_absolute"; then + add=$libdir/$linklib + elif test yes = "$hardcode_minus_L"; then + add_dir=-L$libdir + add=-l$name + elif test yes = "$hardcode_shlibpath_var"; then + case :$finalize_shlibpath: in + *":$libdir:"*) ;; + *) func_append finalize_shlibpath "$libdir:" ;; + esac + add=-l$name + elif test yes = "$hardcode_automatic"; then + if test -n "$inst_prefix_dir" && + test -f "$inst_prefix_dir$libdir/$linklib"; then + add=$inst_prefix_dir$libdir/$linklib + else + add=$libdir/$linklib + fi + else + # We cannot seem to hardcode it, guess we'll fake it. + add_dir=-L$libdir + # Try looking first in the location we're being installed to. + if test -n "$inst_prefix_dir"; then + case $libdir in + [\\/]*) + func_append add_dir " -L$inst_prefix_dir$libdir" + ;; + esac + fi + add=-l$name + fi + + if test prog = "$linkmode"; then + test -n "$add_dir" && finalize_deplibs="$add_dir $finalize_deplibs" + test -n "$add" && finalize_deplibs="$add $finalize_deplibs" + else + test -n "$add_dir" && deplibs="$add_dir $deplibs" + test -n "$add" && deplibs="$add $deplibs" + fi + fi + elif test prog = "$linkmode"; then + # Here we assume that one of hardcode_direct or hardcode_minus_L + # is not unsupported. This is valid on all known static and + # shared platforms. + if test unsupported != "$hardcode_direct"; then + test -n "$old_library" && linklib=$old_library + compile_deplibs="$dir/$linklib $compile_deplibs" + finalize_deplibs="$dir/$linklib $finalize_deplibs" + else + compile_deplibs="-l$name -L$dir $compile_deplibs" + finalize_deplibs="-l$name -L$dir $finalize_deplibs" + fi + elif test yes = "$build_libtool_libs"; then + # Not a shared library + if test pass_all != "$deplibs_check_method"; then + # We're trying link a shared library against a static one + # but the system doesn't support it. + + # Just print a warning and add the library to dependency_libs so + # that the program can be linked against the static library. + echo + $ECHO "*** Warning: This system cannot link to static lib archive $lib." + echo "*** I have the capability to make that library automatically link in when" + echo "*** you link to this library. But I can only do this if you have a" + echo "*** shared version of the library, which you do not appear to have." + if test yes = "$module"; then + echo "*** But as you try to build a module library, libtool will still create " + echo "*** a static module, that should work as long as the dlopening application" + echo "*** is linked with the -dlopen flag to resolve symbols at runtime." + if test -z "$global_symbol_pipe"; then + echo + echo "*** However, this would only work if libtool was able to extract symbol" + echo "*** lists from a program, using 'nm' or equivalent, but libtool could" + echo "*** not find such a program. So, this module is probably useless." + echo "*** 'nm' from GNU binutils and a full rebuild may help." + fi + if test no = "$build_old_libs"; then + build_libtool_libs=module + build_old_libs=yes + else + build_libtool_libs=no + fi + fi + else + deplibs="$dir/$old_library $deplibs" + link_static=yes + fi + fi # link shared/static library? + + if test lib = "$linkmode"; then + if test -n "$dependency_libs" && + { test yes != "$hardcode_into_libs" || + test yes = "$build_old_libs" || + test yes = "$link_static"; }; then + # Extract -R from dependency_libs + temp_deplibs= + for libdir in $dependency_libs; do + case $libdir in + -R*) func_stripname '-R' '' "$libdir" + temp_xrpath=$func_stripname_result + case " $xrpath " in + *" $temp_xrpath "*) ;; + *) func_append xrpath " $temp_xrpath";; + esac;; + *) func_append temp_deplibs " $libdir";; + esac + done + dependency_libs=$temp_deplibs + fi + + func_append newlib_search_path " $absdir" + # Link against this library + test no = "$link_static" && newdependency_libs="$abs_ladir/$laname $newdependency_libs" + # ... and its dependency_libs + tmp_libs= + for deplib in $dependency_libs; do + newdependency_libs="$deplib $newdependency_libs" + case $deplib in + -L*) func_stripname '-L' '' "$deplib" + func_resolve_sysroot "$func_stripname_result";; + *) func_resolve_sysroot "$deplib" ;; + esac + if $opt_preserve_dup_deps; then + case "$tmp_libs " in + *" $func_resolve_sysroot_result "*) + func_append specialdeplibs " $func_resolve_sysroot_result" ;; + esac + fi + func_append tmp_libs " $func_resolve_sysroot_result" + done + + if test no != "$link_all_deplibs"; then + # Add the search paths of all dependency libraries + for deplib in $dependency_libs; do + path= + case $deplib in + -L*) path=$deplib ;; + *.la) + func_resolve_sysroot "$deplib" + deplib=$func_resolve_sysroot_result + func_dirname "$deplib" "" "." + dir=$func_dirname_result + # We need an absolute path. + case $dir in + [\\/]* | [A-Za-z]:[\\/]*) absdir=$dir ;; + *) + absdir=`cd "$dir" && pwd` + if test -z "$absdir"; then + func_warning "cannot determine absolute directory name of '$dir'" + absdir=$dir + fi + ;; + esac + if $GREP "^installed=no" $deplib > /dev/null; then + case $host in + *-*-darwin*) + depdepl= + eval deplibrary_names=`$SED -n -e 's/^library_names=\(.*\)$/\1/p' $deplib` + if test -n "$deplibrary_names"; then + for tmp in $deplibrary_names; do + depdepl=$tmp + done + if test -f "$absdir/$objdir/$depdepl"; then + depdepl=$absdir/$objdir/$depdepl + darwin_install_name=`$OTOOL -L $depdepl | awk '{if (NR == 2) {print $1;exit}}'` + if test -z "$darwin_install_name"; then + darwin_install_name=`$OTOOL64 -L $depdepl | awk '{if (NR == 2) {print $1;exit}}'` + fi + func_append compiler_flags " $wl-dylib_file $wl$darwin_install_name:$depdepl" + func_append linker_flags " -dylib_file $darwin_install_name:$depdepl" + path= + fi + fi + ;; + *) + path=-L$absdir/$objdir + ;; + esac + else + eval libdir=`$SED -n -e 's/^libdir=\(.*\)$/\1/p' $deplib` + test -z "$libdir" && \ + func_fatal_error "'$deplib' is not a valid libtool archive" + test "$absdir" != "$libdir" && \ + func_warning "'$deplib' seems to be moved" + + path=-L$absdir + fi + ;; + esac + case " $deplibs " in + *" $path "*) ;; + *) deplibs="$path $deplibs" ;; + esac + done + fi # link_all_deplibs != no + fi # linkmode = lib + done # for deplib in $libs + if test link = "$pass"; then + if test prog = "$linkmode"; then + compile_deplibs="$new_inherited_linker_flags $compile_deplibs" + finalize_deplibs="$new_inherited_linker_flags $finalize_deplibs" + else + compiler_flags="$compiler_flags "`$ECHO " $new_inherited_linker_flags" | $SED 's% \([^ $]*\).ltframework% -framework \1%g'` + fi + fi + dependency_libs=$newdependency_libs + if test dlpreopen = "$pass"; then + # Link the dlpreopened libraries before other libraries + for deplib in $save_deplibs; do + deplibs="$deplib $deplibs" + done + fi + if test dlopen != "$pass"; then + test conv = "$pass" || { + # Make sure lib_search_path contains only unique directories. + lib_search_path= + for dir in $newlib_search_path; do + case "$lib_search_path " in + *" $dir "*) ;; + *) func_append lib_search_path " $dir" ;; + esac + done + newlib_search_path= + } + + if test prog,link = "$linkmode,$pass"; then + vars="compile_deplibs finalize_deplibs" + else + vars=deplibs + fi + for var in $vars dependency_libs; do + # Add libraries to $var in reverse order + eval tmp_libs=\"\$$var\" + new_libs= + for deplib in $tmp_libs; do + # FIXME: Pedantically, this is the right thing to do, so + # that some nasty dependency loop isn't accidentally + # broken: + #new_libs="$deplib $new_libs" + # Pragmatically, this seems to cause very few problems in + # practice: + case $deplib in + -L*) new_libs="$deplib $new_libs" ;; + -R*) ;; + *) + # And here is the reason: when a library appears more + # than once as an explicit dependence of a library, or + # is implicitly linked in more than once by the + # compiler, it is considered special, and multiple + # occurrences thereof are not removed. Compare this + # with having the same library being listed as a + # dependency of multiple other libraries: in this case, + # we know (pedantically, we assume) the library does not + # need to be listed more than once, so we keep only the + # last copy. This is not always right, but it is rare + # enough that we require users that really mean to play + # such unportable linking tricks to link the library + # using -Wl,-lname, so that libtool does not consider it + # for duplicate removal. + case " $specialdeplibs " in + *" $deplib "*) new_libs="$deplib $new_libs" ;; + *) + case " $new_libs " in + *" $deplib "*) ;; + *) new_libs="$deplib $new_libs" ;; + esac + ;; + esac + ;; + esac + done + tmp_libs= + for deplib in $new_libs; do + case $deplib in + -L*) + case " $tmp_libs " in + *" $deplib "*) ;; + *) func_append tmp_libs " $deplib" ;; + esac + ;; + *) func_append tmp_libs " $deplib" ;; + esac + done + eval $var=\"$tmp_libs\" + done # for var + fi + + # Add Sun CC postdeps if required: + test CXX = "$tagname" && { + case $host_os in + linux*) + case `$CC -V 2>&1 | $SED 5q` in + *Sun\ C*) # Sun C++ 5.9 + func_suncc_cstd_abi + + if test no != "$suncc_use_cstd_abi"; then + func_append postdeps ' -library=Cstd -library=Crun' + fi + ;; + esac + ;; + + solaris*) + func_cc_basename "$CC" + case $func_cc_basename_result in + CC* | sunCC*) + func_suncc_cstd_abi + + if test no != "$suncc_use_cstd_abi"; then + func_append postdeps ' -library=Cstd -library=Crun' + fi + ;; + esac + ;; + esac + } + + # Last step: remove runtime libs from dependency_libs + # (they stay in deplibs) + tmp_libs= + for i in $dependency_libs; do + case " $predeps $postdeps $compiler_lib_search_path " in + *" $i "*) + i= + ;; + esac + if test -n "$i"; then + func_append tmp_libs " $i" + fi + done + dependency_libs=$tmp_libs + done # for pass + if test prog = "$linkmode"; then + dlfiles=$newdlfiles + fi + if test prog = "$linkmode" || test lib = "$linkmode"; then + dlprefiles=$newdlprefiles + fi + + case $linkmode in + oldlib) + if test -n "$dlfiles$dlprefiles" || test no != "$dlself"; then + func_warning "'-dlopen' is ignored for archives" + fi + + case " $deplibs" in + *\ -l* | *\ -L*) + func_warning "'-l' and '-L' are ignored for archives" ;; + esac + + test -n "$rpath" && \ + func_warning "'-rpath' is ignored for archives" + + test -n "$xrpath" && \ + func_warning "'-R' is ignored for archives" + + test -n "$vinfo" && \ + func_warning "'-version-info/-version-number' is ignored for archives" + + test -n "$release" && \ + func_warning "'-release' is ignored for archives" + + test -n "$export_symbols$export_symbols_regex" && \ + func_warning "'-export-symbols' is ignored for archives" + + # Now set the variables for building old libraries. + build_libtool_libs=no + oldlibs=$output + func_append objs "$old_deplibs" + ;; + + lib) + # Make sure we only generate libraries of the form 'libNAME.la'. + case $outputname in + lib*) + func_stripname 'lib' '.la' "$outputname" + name=$func_stripname_result + eval shared_ext=\"$shrext_cmds\" + eval libname=\"$libname_spec\" + ;; + *) + test no = "$module" \ + && func_fatal_help "libtool library '$output' must begin with 'lib'" + + if test no != "$need_lib_prefix"; then + # Add the "lib" prefix for modules if required + func_stripname '' '.la' "$outputname" + name=$func_stripname_result + eval shared_ext=\"$shrext_cmds\" + eval libname=\"$libname_spec\" + else + func_stripname '' '.la' "$outputname" + libname=$func_stripname_result + fi + ;; + esac + + if test -n "$objs"; then + if test pass_all != "$deplibs_check_method"; then + func_fatal_error "cannot build libtool library '$output' from non-libtool objects on this host:$objs" + else + echo + $ECHO "*** Warning: Linking the shared library $output against the non-libtool" + $ECHO "*** objects $objs is not portable!" + func_append libobjs " $objs" + fi + fi + + test no = "$dlself" \ + || func_warning "'-dlopen self' is ignored for libtool libraries" + + set dummy $rpath + shift + test 1 -lt "$#" \ + && func_warning "ignoring multiple '-rpath's for a libtool library" + + install_libdir=$1 + + oldlibs= + if test -z "$rpath"; then + if test yes = "$build_libtool_libs"; then + # Building a libtool convenience library. + # Some compilers have problems with a '.al' extension so + # convenience libraries should have the same extension an + # archive normally would. + oldlibs="$output_objdir/$libname.$libext $oldlibs" + build_libtool_libs=convenience + build_old_libs=yes + fi + + test -n "$vinfo" && \ + func_warning "'-version-info/-version-number' is ignored for convenience libraries" + + test -n "$release" && \ + func_warning "'-release' is ignored for convenience libraries" + else + + # Parse the version information argument. + save_ifs=$IFS; IFS=: + set dummy $vinfo 0 0 0 + shift + IFS=$save_ifs + + test -n "$7" && \ + func_fatal_help "too many parameters to '-version-info'" + + # convert absolute version numbers to libtool ages + # this retains compatibility with .la files and attempts + # to make the code below a bit more comprehensible + + case $vinfo_number in + yes) + number_major=$1 + number_minor=$2 + number_revision=$3 + # + # There are really only two kinds -- those that + # use the current revision as the major version + # and those that subtract age and use age as + # a minor version. But, then there is irix + # that has an extra 1 added just for fun + # + case $version_type in + # correct linux to gnu/linux during the next big refactor + darwin|freebsd-elf|linux|midnightbsd-elf|osf|windows|none) + func_arith $number_major + $number_minor + current=$func_arith_result + age=$number_minor + revision=$number_revision + ;; + freebsd-aout|qnx|sunos) + current=$number_major + revision=$number_minor + age=0 + ;; + irix|nonstopux) + func_arith $number_major + $number_minor + current=$func_arith_result + age=$number_minor + revision=$number_minor + lt_irix_increment=no + ;; + *) + func_fatal_configuration "$modename: unknown library version type '$version_type'" + ;; + esac + ;; + no) + current=$1 + revision=$2 + age=$3 + ;; + esac + + # Check that each of the things are valid numbers. + case $current in + 0|[1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9]|[1-9][0-9][0-9][0-9][0-9]) ;; + *) + func_error "CURRENT '$current' must be a nonnegative integer" + func_fatal_error "'$vinfo' is not valid version information" + ;; + esac + + case $revision in + 0|[1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9]|[1-9][0-9][0-9][0-9][0-9]) ;; + *) + func_error "REVISION '$revision' must be a nonnegative integer" + func_fatal_error "'$vinfo' is not valid version information" + ;; + esac + + case $age in + 0|[1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9]|[1-9][0-9][0-9][0-9][0-9]) ;; + *) + func_error "AGE '$age' must be a nonnegative integer" + func_fatal_error "'$vinfo' is not valid version information" + ;; + esac + + if test "$age" -gt "$current"; then + func_error "AGE '$age' is greater than the current interface number '$current'" + func_fatal_error "'$vinfo' is not valid version information" + fi + + # Calculate the version variables. + major= + versuffix= + verstring= + case $version_type in + none) ;; + + darwin) + # Like Linux, but with the current version available in + # verstring for coding it into the library header + func_arith $current - $age + major=.$func_arith_result + versuffix=$major.$age.$revision + # Darwin ld doesn't like 0 for these options... + func_arith $current + 1 + minor_current=$func_arith_result + xlcverstring="$wl-compatibility_version $wl$minor_current $wl-current_version $wl$minor_current.$revision" + verstring="-compatibility_version $minor_current -current_version $minor_current.$revision" + # On Darwin other compilers + case $CC in + nagfor*) + verstring="$wl-compatibility_version $wl$minor_current $wl-current_version $wl$minor_current.$revision" + ;; + *) + verstring="-compatibility_version $minor_current -current_version $minor_current.$revision" + ;; + esac + ;; + + freebsd-aout) + major=.$current + versuffix=.$current.$revision + ;; + + freebsd-elf | midnightbsd-elf) + func_arith $current - $age + major=.$func_arith_result + versuffix=$major.$age.$revision + ;; + + irix | nonstopux) + if test no = "$lt_irix_increment"; then + func_arith $current - $age + else + func_arith $current - $age + 1 + fi + major=$func_arith_result + + case $version_type in + nonstopux) verstring_prefix=nonstopux ;; + *) verstring_prefix=sgi ;; + esac + verstring=$verstring_prefix$major.$revision + + # Add in all the interfaces that we are compatible with. + loop=$revision + while test 0 -ne "$loop"; do + func_arith $revision - $loop + iface=$func_arith_result + func_arith $loop - 1 + loop=$func_arith_result + verstring=$verstring_prefix$major.$iface:$verstring + done + + # Before this point, $major must not contain '.'. + major=.$major + versuffix=$major.$revision + ;; + + linux) # correct to gnu/linux during the next big refactor + func_arith $current - $age + major=.$func_arith_result + versuffix=$major.$age.$revision + ;; + + osf) + func_arith $current - $age + major=.$func_arith_result + versuffix=.$current.$age.$revision + verstring=$current.$age.$revision + + # Add in all the interfaces that we are compatible with. + loop=$age + while test 0 -ne "$loop"; do + func_arith $current - $loop + iface=$func_arith_result + func_arith $loop - 1 + loop=$func_arith_result + verstring=$verstring:$iface.0 + done + + # Make executables depend on our current version. + func_append verstring ":$current.0" + ;; + + qnx) + major=.$current + versuffix=.$current + ;; + + sco) + major=.$current + versuffix=.$current + ;; + + sunos) + major=.$current + versuffix=.$current.$revision + ;; + + windows) + # Use '-' rather than '.', since we only want one + # extension on DOS 8.3 file systems. + func_arith $current - $age + major=$func_arith_result + versuffix=-$major + ;; + + *) + func_fatal_configuration "unknown library version type '$version_type'" + ;; + esac + + # Clear the version info if we defaulted, and they specified a release. + if test -z "$vinfo" && test -n "$release"; then + major= + case $version_type in + darwin) + # we can't check for "0.0" in archive_cmds due to quoting + # problems, so we reset it completely + verstring= + ;; + *) + verstring=0.0 + ;; + esac + if test no = "$need_version"; then + versuffix= + else + versuffix=.0.0 + fi + fi + + # Remove version info from name if versioning should be avoided + if test yes,no = "$avoid_version,$need_version"; then + major= + versuffix= + verstring= + fi + + # Check to see if the archive will have undefined symbols. + if test yes = "$allow_undefined"; then + if test unsupported = "$allow_undefined_flag"; then + if test yes = "$build_old_libs"; then + func_warning "undefined symbols not allowed in $host shared libraries; building static only" + build_libtool_libs=no + else + func_fatal_error "can't build $host shared library unless -no-undefined is specified" + fi + fi + else + # Don't allow undefined symbols. + allow_undefined_flag=$no_undefined_flag + fi + + fi + + func_generate_dlsyms "$libname" "$libname" : + func_append libobjs " $symfileobj" + test " " = "$libobjs" && libobjs= + + if test relink != "$opt_mode"; then + # Remove our outputs, but don't remove object files since they + # may have been created when compiling PIC objects. + removelist= + tempremovelist=`$ECHO "$output_objdir/*"` + for p in $tempremovelist; do + case $p in + *.$objext | *.gcno) + ;; + $output_objdir/$outputname | $output_objdir/$libname.* | $output_objdir/$libname$release.*) + if test -n "$precious_files_regex"; then + if $ECHO "$p" | $EGREP -e "$precious_files_regex" >/dev/null 2>&1 + then + continue + fi + fi + func_append removelist " $p" + ;; + *) ;; + esac + done + test -n "$removelist" && \ + func_show_eval "${RM}r \$removelist" + fi + + # Now set the variables for building old libraries. + if test yes = "$build_old_libs" && test convenience != "$build_libtool_libs"; then + func_append oldlibs " $output_objdir/$libname.$libext" + + # Transform .lo files to .o files. + oldobjs="$objs "`$ECHO "$libobjs" | $SP2NL | $SED "/\.$libext$/d; $lo2o" | $NL2SP` + fi + + # Eliminate all temporary directories. + #for path in $notinst_path; do + # lib_search_path=`$ECHO "$lib_search_path " | $SED "s% $path % %g"` + # deplibs=`$ECHO "$deplibs " | $SED "s% -L$path % %g"` + # dependency_libs=`$ECHO "$dependency_libs " | $SED "s% -L$path % %g"` + #done + + if test -n "$xrpath"; then + # If the user specified any rpath flags, then add them. + temp_xrpath= + for libdir in $xrpath; do + func_replace_sysroot "$libdir" + func_append temp_xrpath " -R$func_replace_sysroot_result" + case "$finalize_rpath " in + *" $libdir "*) ;; + *) func_append finalize_rpath " $libdir" ;; + esac + done + if test yes != "$hardcode_into_libs" || test yes = "$build_old_libs"; then + dependency_libs="$temp_xrpath $dependency_libs" + fi + fi + + # Make sure dlfiles contains only unique files that won't be dlpreopened + old_dlfiles=$dlfiles + dlfiles= + for lib in $old_dlfiles; do + case " $dlprefiles $dlfiles " in + *" $lib "*) ;; + *) func_append dlfiles " $lib" ;; + esac + done + + # Make sure dlprefiles contains only unique files + old_dlprefiles=$dlprefiles + dlprefiles= + for lib in $old_dlprefiles; do + case "$dlprefiles " in + *" $lib "*) ;; + *) func_append dlprefiles " $lib" ;; + esac + done + + if test yes = "$build_libtool_libs"; then + if test -n "$rpath"; then + case $host in + *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2* | *-*-beos* | *-cegcc* | *-*-haiku*) + # these systems don't actually have a c library (as such)! + ;; + *-*-rhapsody* | *-*-darwin1.[012]) + # Rhapsody C library is in the System framework + func_append deplibs " System.ltframework" + ;; + *-*-netbsd*) + # Don't link with libc until the a.out ld.so is fixed. + ;; + *-*-openbsd* | *-*-freebsd* | *-*-dragonfly* | *-*-midnightbsd*) + # Do not include libc due to us having libc/libc_r. + ;; + *-*-sco3.2v5* | *-*-sco5v6*) + # Causes problems with __ctype + ;; + *-*-sysv4.2uw2* | *-*-sysv5* | *-*-unixware* | *-*-OpenUNIX*) + # Compiler inserts libc in the correct place for threads to work + ;; + *) + # Add libc to deplibs on all other systems if necessary. + if test yes = "$build_libtool_need_lc"; then + func_append deplibs " -lc" + fi + ;; + esac + fi + + # Transform deplibs into only deplibs that can be linked in shared. + name_save=$name + libname_save=$libname + release_save=$release + versuffix_save=$versuffix + major_save=$major + # I'm not sure if I'm treating the release correctly. I think + # release should show up in the -l (ie -lgmp5) so we don't want to + # add it in twice. Is that correct? + release= + versuffix= + major= + newdeplibs= + droppeddeps=no + case $deplibs_check_method in + pass_all) + # Don't check for shared/static. Everything works. + # This might be a little naive. We might want to check + # whether the library exists or not. But this is on + # osf3 & osf4 and I'm not really sure... Just + # implementing what was already the behavior. + newdeplibs=$deplibs + ;; + test_compile) + # This code stresses the "libraries are programs" paradigm to its + # limits. Maybe even breaks it. We compile a program, linking it + # against the deplibs as a proxy for the library. Then we can check + # whether they linked in statically or dynamically with ldd. + $opt_dry_run || $RM conftest.c + cat > conftest.c </dev/null` + $nocaseglob + else + potential_libs=`ls $i/$libnameglob[.-]* 2>/dev/null` + fi + for potent_lib in $potential_libs; do + # Follow soft links. + if ls -lLd "$potent_lib" 2>/dev/null | + $GREP " -> " >/dev/null; then + continue + fi + # The statement above tries to avoid entering an + # endless loop below, in case of cyclic links. + # We might still enter an endless loop, since a link + # loop can be closed while we follow links, + # but so what? + potlib=$potent_lib + while test -h "$potlib" 2>/dev/null; do + potliblink=`ls -ld $potlib | $SED 's/.* -> //'` + case $potliblink in + [\\/]* | [A-Za-z]:[\\/]*) potlib=$potliblink;; + *) potlib=`$ECHO "$potlib" | $SED 's|[^/]*$||'`"$potliblink";; + esac + done + if eval $file_magic_cmd \"\$potlib\" 2>/dev/null | + $SED -e 10q | + $EGREP "$file_magic_regex" > /dev/null; then + func_append newdeplibs " $a_deplib" + a_deplib= + break 2 + fi + done + done + fi + if test -n "$a_deplib"; then + droppeddeps=yes + echo + $ECHO "*** Warning: linker path does not have real file for library $a_deplib." + echo "*** I have the capability to make that library automatically link in when" + echo "*** you link to this library. But I can only do this if you have a" + echo "*** shared version of the library, which you do not appear to have" + echo "*** because I did check the linker path looking for a file starting" + if test -z "$potlib"; then + $ECHO "*** with $libname but no candidates were found. (...for file magic test)" + else + $ECHO "*** with $libname and none of the candidates passed a file format test" + $ECHO "*** using a file magic. Last file checked: $potlib" + fi + fi + ;; + *) + # Add a -L argument. + func_append newdeplibs " $a_deplib" + ;; + esac + done # Gone through all deplibs. + ;; + match_pattern*) + set dummy $deplibs_check_method; shift + match_pattern_regex=`expr "$deplibs_check_method" : "$1 \(.*\)"` + for a_deplib in $deplibs; do + case $a_deplib in + -l*) + func_stripname -l '' "$a_deplib" + name=$func_stripname_result + if test yes = "$allow_libtool_libs_with_static_runtimes"; then + case " $predeps $postdeps " in + *" $a_deplib "*) + func_append newdeplibs " $a_deplib" + a_deplib= + ;; + esac + fi + if test -n "$a_deplib"; then + libname=`eval "\\$ECHO \"$libname_spec\""` + for i in $lib_search_path $sys_lib_search_path $shlib_search_path; do + potential_libs=`ls $i/$libname[.-]* 2>/dev/null` + for potent_lib in $potential_libs; do + potlib=$potent_lib # see symlink-check above in file_magic test + if eval "\$ECHO \"$potent_lib\"" 2>/dev/null | $SED 10q | \ + $EGREP "$match_pattern_regex" > /dev/null; then + func_append newdeplibs " $a_deplib" + a_deplib= + break 2 + fi + done + done + fi + if test -n "$a_deplib"; then + droppeddeps=yes + echo + $ECHO "*** Warning: linker path does not have real file for library $a_deplib." + echo "*** I have the capability to make that library automatically link in when" + echo "*** you link to this library. But I can only do this if you have a" + echo "*** shared version of the library, which you do not appear to have" + echo "*** because I did check the linker path looking for a file starting" + if test -z "$potlib"; then + $ECHO "*** with $libname but no candidates were found. (...for regex pattern test)" + else + $ECHO "*** with $libname and none of the candidates passed a file format test" + $ECHO "*** using a regex pattern. Last file checked: $potlib" + fi + fi + ;; + *) + # Add a -L argument. + func_append newdeplibs " $a_deplib" + ;; + esac + done # Gone through all deplibs. + ;; + none | unknown | *) + newdeplibs= + tmp_deplibs=`$ECHO " $deplibs" | $SED 's/ -lc$//; s/ -[LR][^ ]*//g'` + if test yes = "$allow_libtool_libs_with_static_runtimes"; then + for i in $predeps $postdeps; do + # can't use Xsed below, because $i might contain '/' + tmp_deplibs=`$ECHO " $tmp_deplibs" | $SED "s|$i||"` + done + fi + case $tmp_deplibs in + *[!\ \ ]*) + echo + if test none = "$deplibs_check_method"; then + echo "*** Warning: inter-library dependencies are not supported in this platform." + else + echo "*** Warning: inter-library dependencies are not known to be supported." + fi + echo "*** All declared inter-library dependencies are being dropped." + droppeddeps=yes + ;; + esac + ;; + esac + versuffix=$versuffix_save + major=$major_save + release=$release_save + libname=$libname_save + name=$name_save + + case $host in + *-*-rhapsody* | *-*-darwin1.[012]) + # On Rhapsody replace the C library with the System framework + newdeplibs=`$ECHO " $newdeplibs" | $SED 's/ -lc / System.ltframework /'` + ;; + esac + + if test yes = "$droppeddeps"; then + if test yes = "$module"; then + echo + echo "*** Warning: libtool could not satisfy all declared inter-library" + $ECHO "*** dependencies of module $libname. Therefore, libtool will create" + echo "*** a static module, that should work as long as the dlopening" + echo "*** application is linked with the -dlopen flag." + if test -z "$global_symbol_pipe"; then + echo + echo "*** However, this would only work if libtool was able to extract symbol" + echo "*** lists from a program, using 'nm' or equivalent, but libtool could" + echo "*** not find such a program. So, this module is probably useless." + echo "*** 'nm' from GNU binutils and a full rebuild may help." + fi + if test no = "$build_old_libs"; then + oldlibs=$output_objdir/$libname.$libext + build_libtool_libs=module + build_old_libs=yes + else + build_libtool_libs=no + fi + else + echo "*** The inter-library dependencies that have been dropped here will be" + echo "*** automatically added whenever a program is linked with this library" + echo "*** or is declared to -dlopen it." + + if test no = "$allow_undefined"; then + echo + echo "*** Since this library must not contain undefined symbols," + echo "*** because either the platform does not support them or" + echo "*** it was explicitly requested with -no-undefined," + echo "*** libtool will only create a static version of it." + if test no = "$build_old_libs"; then + oldlibs=$output_objdir/$libname.$libext + build_libtool_libs=module + build_old_libs=yes + else + build_libtool_libs=no + fi + fi + fi + fi + # Done checking deplibs! + deplibs=$newdeplibs + fi + # Time to change all our "foo.ltframework" stuff back to "-framework foo" + case $host in + *-*-darwin*) + newdeplibs=`$ECHO " $newdeplibs" | $SED 's% \([^ $]*\).ltframework% -framework \1%g'` + new_inherited_linker_flags=`$ECHO " $new_inherited_linker_flags" | $SED 's% \([^ $]*\).ltframework% -framework \1%g'` + deplibs=`$ECHO " $deplibs" | $SED 's% \([^ $]*\).ltframework% -framework \1%g'` + ;; + esac + + # move library search paths that coincide with paths to not yet + # installed libraries to the beginning of the library search list + new_libs= + for path in $notinst_path; do + case " $new_libs " in + *" -L$path/$objdir "*) ;; + *) + case " $deplibs " in + *" -L$path/$objdir "*) + func_append new_libs " -L$path/$objdir" ;; + esac + ;; + esac + done + for deplib in $deplibs; do + case $deplib in + -L*) + case " $new_libs " in + *" $deplib "*) ;; + *) func_append new_libs " $deplib" ;; + esac + ;; + *) func_append new_libs " $deplib" ;; + esac + done + deplibs=$new_libs + + # All the library-specific variables (install_libdir is set above). + library_names= + old_library= + dlname= + + # Test again, we may have decided not to build it any more + if test yes = "$build_libtool_libs"; then + # Remove $wl instances when linking with ld. + # FIXME: should test the right _cmds variable. + case $archive_cmds in + *\$LD\ *) wl= ;; + esac + if test yes = "$hardcode_into_libs"; then + # Hardcode the library paths + hardcode_libdirs= + dep_rpath= + rpath=$finalize_rpath + test relink = "$opt_mode" || rpath=$compile_rpath$rpath + for libdir in $rpath; do + if test -n "$hardcode_libdir_flag_spec"; then + if test -n "$hardcode_libdir_separator"; then + func_replace_sysroot "$libdir" + libdir=$func_replace_sysroot_result + if test -z "$hardcode_libdirs"; then + hardcode_libdirs=$libdir + else + # Just accumulate the unique libdirs. + case $hardcode_libdir_separator$hardcode_libdirs$hardcode_libdir_separator in + *"$hardcode_libdir_separator$libdir$hardcode_libdir_separator"*) + ;; + *) + func_append hardcode_libdirs "$hardcode_libdir_separator$libdir" + ;; + esac + fi + else + eval flag=\"$hardcode_libdir_flag_spec\" + func_append dep_rpath " $flag" + fi + elif test -n "$runpath_var"; then + case "$perm_rpath " in + *" $libdir "*) ;; + *) func_append perm_rpath " $libdir" ;; + esac + fi + done + # Substitute the hardcoded libdirs into the rpath. + if test -n "$hardcode_libdir_separator" && + test -n "$hardcode_libdirs"; then + libdir=$hardcode_libdirs + eval "dep_rpath=\"$hardcode_libdir_flag_spec\"" + fi + if test -n "$runpath_var" && test -n "$perm_rpath"; then + # We should set the runpath_var. + rpath= + for dir in $perm_rpath; do + func_append rpath "$dir:" + done + eval "$runpath_var='$rpath\$$runpath_var'; export $runpath_var" + fi + test -n "$dep_rpath" && deplibs="$dep_rpath $deplibs" + fi + + shlibpath=$finalize_shlibpath + test relink = "$opt_mode" || shlibpath=$compile_shlibpath$shlibpath + if test -n "$shlibpath"; then + eval "$shlibpath_var='$shlibpath\$$shlibpath_var'; export $shlibpath_var" + fi + + # Get the real and link names of the library. + eval shared_ext=\"$shrext_cmds\" + eval library_names=\"$library_names_spec\" + set dummy $library_names + shift + realname=$1 + shift + + if test -n "$soname_spec"; then + eval soname=\"$soname_spec\" + else + soname=$realname + fi + if test -z "$dlname"; then + dlname=$soname + fi + + lib=$output_objdir/$realname + linknames= + for link + do + func_append linknames " $link" + done + + # Use standard objects if they are pic + test -z "$pic_flag" && libobjs=`$ECHO "$libobjs" | $SP2NL | $SED "$lo2o" | $NL2SP` + test "X$libobjs" = "X " && libobjs= + + delfiles= + if test -n "$export_symbols" && test -n "$include_expsyms"; then + $opt_dry_run || cp "$export_symbols" "$output_objdir/$libname.uexp" + export_symbols=$output_objdir/$libname.uexp + func_append delfiles " $export_symbols" + fi + + orig_export_symbols= + case $host_os in + cygwin* | mingw* | cegcc*) + if test -n "$export_symbols" && test -z "$export_symbols_regex"; then + # exporting using user supplied symfile + func_dll_def_p "$export_symbols" || { + # and it's NOT already a .def file. Must figure out + # which of the given symbols are data symbols and tag + # them as such. So, trigger use of export_symbols_cmds. + # export_symbols gets reassigned inside the "prepare + # the list of exported symbols" if statement, so the + # include_expsyms logic still works. + orig_export_symbols=$export_symbols + export_symbols= + always_export_symbols=yes + } + fi + ;; + esac + + # Prepare the list of exported symbols + if test -z "$export_symbols"; then + if test yes = "$always_export_symbols" || test -n "$export_symbols_regex"; then + func_verbose "generating symbol list for '$libname.la'" + export_symbols=$output_objdir/$libname.exp + $opt_dry_run || $RM $export_symbols + cmds=$export_symbols_cmds + save_ifs=$IFS; IFS='~' + for cmd1 in $cmds; do + IFS=$save_ifs + # Take the normal branch if the nm_file_list_spec branch + # doesn't work or if tool conversion is not needed. + case $nm_file_list_spec~$to_tool_file_cmd in + *~func_convert_file_noop | *~func_convert_file_msys_to_w32 | ~*) + try_normal_branch=yes + eval cmd=\"$cmd1\" + func_len " $cmd" + len=$func_len_result + ;; + *) + try_normal_branch=no + ;; + esac + if test yes = "$try_normal_branch" \ + && { test "$len" -lt "$max_cmd_len" \ + || test "$max_cmd_len" -le -1; } + then + func_show_eval "$cmd" 'exit $?' + skipped_export=false + elif test -n "$nm_file_list_spec"; then + func_basename "$output" + output_la=$func_basename_result + save_libobjs=$libobjs + save_output=$output + output=$output_objdir/$output_la.nm + func_to_tool_file "$output" + libobjs=$nm_file_list_spec$func_to_tool_file_result + func_append delfiles " $output" + func_verbose "creating $NM input file list: $output" + for obj in $save_libobjs; do + func_to_tool_file "$obj" + $ECHO "$func_to_tool_file_result" + done > "$output" + eval cmd=\"$cmd1\" + func_show_eval "$cmd" 'exit $?' + output=$save_output + libobjs=$save_libobjs + skipped_export=false + else + # The command line is too long to execute in one step. + func_verbose "using reloadable object file for export list..." + skipped_export=: + # Break out early, otherwise skipped_export may be + # set to false by a later but shorter cmd. + break + fi + done + IFS=$save_ifs + if test -n "$export_symbols_regex" && test : != "$skipped_export"; then + func_show_eval '$EGREP -e "$export_symbols_regex" "$export_symbols" > "${export_symbols}T"' + func_show_eval '$MV "${export_symbols}T" "$export_symbols"' + fi + fi + fi + + if test -n "$export_symbols" && test -n "$include_expsyms"; then + tmp_export_symbols=$export_symbols + test -n "$orig_export_symbols" && tmp_export_symbols=$orig_export_symbols + $opt_dry_run || eval '$ECHO "$include_expsyms" | $SP2NL >> "$tmp_export_symbols"' + fi + + if test : != "$skipped_export" && test -n "$orig_export_symbols"; then + # The given exports_symbols file has to be filtered, so filter it. + func_verbose "filter symbol list for '$libname.la' to tag DATA exports" + # FIXME: $output_objdir/$libname.filter potentially contains lots of + # 's' commands, which not all seds can handle. GNU sed should be fine + # though. Also, the filter scales superlinearly with the number of + # global variables. join(1) would be nice here, but unfortunately + # isn't a blessed tool. + $opt_dry_run || $SED -e '/[ ,]DATA/!d;s,\(.*\)\([ \,].*\),s|^\1$|\1\2|,' < $export_symbols > $output_objdir/$libname.filter + func_append delfiles " $export_symbols $output_objdir/$libname.filter" + export_symbols=$output_objdir/$libname.def + $opt_dry_run || $SED -f $output_objdir/$libname.filter < $orig_export_symbols > $export_symbols + fi + + tmp_deplibs= + for test_deplib in $deplibs; do + case " $convenience " in + *" $test_deplib "*) ;; + *) + func_append tmp_deplibs " $test_deplib" + ;; + esac + done + deplibs=$tmp_deplibs + + if test -n "$convenience"; then + if test -n "$whole_archive_flag_spec" && + test yes = "$compiler_needs_object" && + test -z "$libobjs"; then + # extract the archives, so we have objects to list. + # TODO: could optimize this to just extract one archive. + whole_archive_flag_spec= + fi + if test -n "$whole_archive_flag_spec"; then + save_libobjs=$libobjs + eval libobjs=\"\$libobjs $whole_archive_flag_spec\" + test "X$libobjs" = "X " && libobjs= + else + gentop=$output_objdir/${outputname}x + func_append generated " $gentop" + + func_extract_archives $gentop $convenience + func_append libobjs " $func_extract_archives_result" + test "X$libobjs" = "X " && libobjs= + fi + fi + + if test yes = "$thread_safe" && test -n "$thread_safe_flag_spec"; then + eval flag=\"$thread_safe_flag_spec\" + func_append linker_flags " $flag" + fi + + # Make a backup of the uninstalled library when relinking + if test relink = "$opt_mode"; then + $opt_dry_run || eval '(cd $output_objdir && $RM ${realname}U && $MV $realname ${realname}U)' || exit $? + fi + + # Do each of the archive commands. + if test yes = "$module" && test -n "$module_cmds"; then + if test -n "$export_symbols" && test -n "$module_expsym_cmds"; then + eval test_cmds=\"$module_expsym_cmds\" + cmds=$module_expsym_cmds + else + eval test_cmds=\"$module_cmds\" + cmds=$module_cmds + fi + else + if test -n "$export_symbols" && test -n "$archive_expsym_cmds"; then + eval test_cmds=\"$archive_expsym_cmds\" + cmds=$archive_expsym_cmds + else + eval test_cmds=\"$archive_cmds\" + cmds=$archive_cmds + fi + fi + + if test : != "$skipped_export" && + func_len " $test_cmds" && + len=$func_len_result && + test "$len" -lt "$max_cmd_len" || test "$max_cmd_len" -le -1; then + : + else + # The command line is too long to link in one step, link piecewise + # or, if using GNU ld and skipped_export is not :, use a linker + # script. + + # Save the value of $output and $libobjs because we want to + # use them later. If we have whole_archive_flag_spec, we + # want to use save_libobjs as it was before + # whole_archive_flag_spec was expanded, because we can't + # assume the linker understands whole_archive_flag_spec. + # This may have to be revisited, in case too many + # convenience libraries get linked in and end up exceeding + # the spec. + if test -z "$convenience" || test -z "$whole_archive_flag_spec"; then + save_libobjs=$libobjs + fi + save_output=$output + func_basename "$output" + output_la=$func_basename_result + + # Clear the reloadable object creation command queue and + # initialize k to one. + test_cmds= + concat_cmds= + objlist= + last_robj= + k=1 + + if test -n "$save_libobjs" && test : != "$skipped_export" && test yes = "$with_gnu_ld"; then + output=$output_objdir/$output_la.lnkscript + func_verbose "creating GNU ld script: $output" + echo 'INPUT (' > $output + for obj in $save_libobjs + do + func_to_tool_file "$obj" + $ECHO "$func_to_tool_file_result" >> $output + done + echo ')' >> $output + func_append delfiles " $output" + func_to_tool_file "$output" + output=$func_to_tool_file_result + elif test -n "$save_libobjs" && test : != "$skipped_export" && test -n "$file_list_spec"; then + output=$output_objdir/$output_la.lnk + func_verbose "creating linker input file list: $output" + : > $output + set x $save_libobjs + shift + firstobj= + if test yes = "$compiler_needs_object"; then + firstobj="$1 " + shift + fi + for obj + do + func_to_tool_file "$obj" + $ECHO "$func_to_tool_file_result" >> $output + done + func_append delfiles " $output" + func_to_tool_file "$output" + output=$firstobj\"$file_list_spec$func_to_tool_file_result\" + else + if test -n "$save_libobjs"; then + func_verbose "creating reloadable object files..." + output=$output_objdir/$output_la-$k.$objext + eval test_cmds=\"$reload_cmds\" + func_len " $test_cmds" + len0=$func_len_result + len=$len0 + + # Loop over the list of objects to be linked. + for obj in $save_libobjs + do + func_len " $obj" + func_arith $len + $func_len_result + len=$func_arith_result + if test -z "$objlist" || + test "$len" -lt "$max_cmd_len"; then + func_append objlist " $obj" + else + # The command $test_cmds is almost too long, add a + # command to the queue. + if test 1 -eq "$k"; then + # The first file doesn't have a previous command to add. + reload_objs=$objlist + eval concat_cmds=\"$reload_cmds\" + else + # All subsequent reloadable object files will link in + # the last one created. + reload_objs="$objlist $last_robj" + eval concat_cmds=\"\$concat_cmds~$reload_cmds~\$RM $last_robj\" + fi + last_robj=$output_objdir/$output_la-$k.$objext + func_arith $k + 1 + k=$func_arith_result + output=$output_objdir/$output_la-$k.$objext + objlist=" $obj" + func_len " $last_robj" + func_arith $len0 + $func_len_result + len=$func_arith_result + fi + done + # Handle the remaining objects by creating one last + # reloadable object file. All subsequent reloadable object + # files will link in the last one created. + test -z "$concat_cmds" || concat_cmds=$concat_cmds~ + reload_objs="$objlist $last_robj" + eval concat_cmds=\"\$concat_cmds$reload_cmds\" + if test -n "$last_robj"; then + eval concat_cmds=\"\$concat_cmds~\$RM $last_robj\" + fi + func_append delfiles " $output" + + else + output= + fi + + ${skipped_export-false} && { + func_verbose "generating symbol list for '$libname.la'" + export_symbols=$output_objdir/$libname.exp + $opt_dry_run || $RM $export_symbols + libobjs=$output + # Append the command to create the export file. + test -z "$concat_cmds" || concat_cmds=$concat_cmds~ + eval concat_cmds=\"\$concat_cmds$export_symbols_cmds\" + if test -n "$last_robj"; then + eval concat_cmds=\"\$concat_cmds~\$RM $last_robj\" + fi + } + + test -n "$save_libobjs" && + func_verbose "creating a temporary reloadable object file: $output" + + # Loop through the commands generated above and execute them. + save_ifs=$IFS; IFS='~' + for cmd in $concat_cmds; do + IFS=$save_ifs + $opt_quiet || { + func_quote_arg expand,pretty "$cmd" + eval "func_echo $func_quote_arg_result" + } + $opt_dry_run || eval "$cmd" || { + lt_exit=$? + + # Restore the uninstalled library and exit + if test relink = "$opt_mode"; then + ( cd "$output_objdir" && \ + $RM "${realname}T" && \ + $MV "${realname}U" "$realname" ) + fi + + exit $lt_exit + } + done + IFS=$save_ifs + + if test -n "$export_symbols_regex" && ${skipped_export-false}; then + func_show_eval '$EGREP -e "$export_symbols_regex" "$export_symbols" > "${export_symbols}T"' + func_show_eval '$MV "${export_symbols}T" "$export_symbols"' + fi + fi + + ${skipped_export-false} && { + if test -n "$export_symbols" && test -n "$include_expsyms"; then + tmp_export_symbols=$export_symbols + test -n "$orig_export_symbols" && tmp_export_symbols=$orig_export_symbols + $opt_dry_run || eval '$ECHO "$include_expsyms" | $SP2NL >> "$tmp_export_symbols"' + fi + + if test -n "$orig_export_symbols"; then + # The given exports_symbols file has to be filtered, so filter it. + func_verbose "filter symbol list for '$libname.la' to tag DATA exports" + # FIXME: $output_objdir/$libname.filter potentially contains lots of + # 's' commands, which not all seds can handle. GNU sed should be fine + # though. Also, the filter scales superlinearly with the number of + # global variables. join(1) would be nice here, but unfortunately + # isn't a blessed tool. + $opt_dry_run || $SED -e '/[ ,]DATA/!d;s,\(.*\)\([ \,].*\),s|^\1$|\1\2|,' < $export_symbols > $output_objdir/$libname.filter + func_append delfiles " $export_symbols $output_objdir/$libname.filter" + export_symbols=$output_objdir/$libname.def + $opt_dry_run || $SED -f $output_objdir/$libname.filter < $orig_export_symbols > $export_symbols + fi + } + + libobjs=$output + # Restore the value of output. + output=$save_output + + if test -n "$convenience" && test -n "$whole_archive_flag_spec"; then + eval libobjs=\"\$libobjs $whole_archive_flag_spec\" + test "X$libobjs" = "X " && libobjs= + fi + # Expand the library linking commands again to reset the + # value of $libobjs for piecewise linking. + + # Do each of the archive commands. + if test yes = "$module" && test -n "$module_cmds"; then + if test -n "$export_symbols" && test -n "$module_expsym_cmds"; then + cmds=$module_expsym_cmds + else + cmds=$module_cmds + fi + else + if test -n "$export_symbols" && test -n "$archive_expsym_cmds"; then + cmds=$archive_expsym_cmds + else + cmds=$archive_cmds + fi + fi + fi + + if test -n "$delfiles"; then + # Append the command to remove temporary files to $cmds. + eval cmds=\"\$cmds~\$RM $delfiles\" + fi + + # Add any objects from preloaded convenience libraries + if test -n "$dlprefiles"; then + gentop=$output_objdir/${outputname}x + func_append generated " $gentop" + + func_extract_archives $gentop $dlprefiles + func_append libobjs " $func_extract_archives_result" + test "X$libobjs" = "X " && libobjs= + fi + + save_ifs=$IFS; IFS='~' + for cmd in $cmds; do + IFS=$sp$nl + eval cmd=\"$cmd\" + IFS=$save_ifs + $opt_quiet || { + func_quote_arg expand,pretty "$cmd" + eval "func_echo $func_quote_arg_result" + } + $opt_dry_run || eval "$cmd" || { + lt_exit=$? + + # Restore the uninstalled library and exit + if test relink = "$opt_mode"; then + ( cd "$output_objdir" && \ + $RM "${realname}T" && \ + $MV "${realname}U" "$realname" ) + fi + + exit $lt_exit + } + done + IFS=$save_ifs + + # Restore the uninstalled library and exit + if test relink = "$opt_mode"; then + $opt_dry_run || eval '(cd $output_objdir && $RM ${realname}T && $MV $realname ${realname}T && $MV ${realname}U $realname)' || exit $? + + if test -n "$convenience"; then + if test -z "$whole_archive_flag_spec"; then + func_show_eval '${RM}r "$gentop"' + fi + fi + + exit $EXIT_SUCCESS + fi + + # Create links to the real library. + for linkname in $linknames; do + if test "$realname" != "$linkname"; then + func_show_eval '(cd "$output_objdir" && $RM "$linkname" && $LN_S "$realname" "$linkname")' 'exit $?' + fi + done + + # If -module or -export-dynamic was specified, set the dlname. + if test yes = "$module" || test yes = "$export_dynamic"; then + # On all known operating systems, these are identical. + dlname=$soname + fi + fi + ;; + + obj) + if test -n "$dlfiles$dlprefiles" || test no != "$dlself"; then + func_warning "'-dlopen' is ignored for objects" + fi + + case " $deplibs" in + *\ -l* | *\ -L*) + func_warning "'-l' and '-L' are ignored for objects" ;; + esac + + test -n "$rpath" && \ + func_warning "'-rpath' is ignored for objects" + + test -n "$xrpath" && \ + func_warning "'-R' is ignored for objects" + + test -n "$vinfo" && \ + func_warning "'-version-info' is ignored for objects" + + test -n "$release" && \ + func_warning "'-release' is ignored for objects" + + case $output in + *.lo) + test -n "$objs$old_deplibs" && \ + func_fatal_error "cannot build library object '$output' from non-libtool objects" + + libobj=$output + func_lo2o "$libobj" + obj=$func_lo2o_result + ;; + *) + libobj= + obj=$output + ;; + esac + + # Delete the old objects. + $opt_dry_run || $RM $obj $libobj + + # Objects from convenience libraries. This assumes + # single-version convenience libraries. Whenever we create + # different ones for PIC/non-PIC, this we'll have to duplicate + # the extraction. + reload_conv_objs= + gentop= + # if reload_cmds runs $LD directly, get rid of -Wl from + # whole_archive_flag_spec and hope we can get by with turning comma + # into space. + case $reload_cmds in + *\$LD[\ \$]*) wl= ;; + esac + if test -n "$convenience"; then + if test -n "$whole_archive_flag_spec"; then + eval tmp_whole_archive_flags=\"$whole_archive_flag_spec\" + test -n "$wl" || tmp_whole_archive_flags=`$ECHO "$tmp_whole_archive_flags" | $SED 's|,| |g'` + reload_conv_objs=$reload_objs\ $tmp_whole_archive_flags + else + gentop=$output_objdir/${obj}x + func_append generated " $gentop" + + func_extract_archives $gentop $convenience + reload_conv_objs="$reload_objs $func_extract_archives_result" + fi + fi + + # If we're not building shared, we need to use non_pic_objs + test yes = "$build_libtool_libs" || libobjs=$non_pic_objects + + # Create the old-style object. + reload_objs=$objs$old_deplibs' '`$ECHO "$libobjs" | $SP2NL | $SED "/\.$libext$/d; /\.lib$/d; $lo2o" | $NL2SP`' '$reload_conv_objs + + output=$obj + func_execute_cmds "$reload_cmds" 'exit $?' + + # Exit if we aren't doing a library object file. + if test -z "$libobj"; then + if test -n "$gentop"; then + func_show_eval '${RM}r "$gentop"' + fi + + exit $EXIT_SUCCESS + fi + + test yes = "$build_libtool_libs" || { + if test -n "$gentop"; then + func_show_eval '${RM}r "$gentop"' + fi + + # Create an invalid libtool object if no PIC, so that we don't + # accidentally link it into a program. + # $show "echo timestamp > $libobj" + # $opt_dry_run || eval "echo timestamp > $libobj" || exit $? + exit $EXIT_SUCCESS + } + + if test -n "$pic_flag" || test default != "$pic_mode"; then + # Only do commands if we really have different PIC objects. + reload_objs="$libobjs $reload_conv_objs" + output=$libobj + func_execute_cmds "$reload_cmds" 'exit $?' + fi + + if test -n "$gentop"; then + func_show_eval '${RM}r "$gentop"' + fi + + exit $EXIT_SUCCESS + ;; + + prog) + case $host in + *cygwin*) func_stripname '' '.exe' "$output" + output=$func_stripname_result.exe;; + esac + test -n "$vinfo" && \ + func_warning "'-version-info' is ignored for programs" + + test -n "$release" && \ + func_warning "'-release' is ignored for programs" + + $preload \ + && test unknown,unknown,unknown = "$dlopen_support,$dlopen_self,$dlopen_self_static" \ + && func_warning "'LT_INIT([dlopen])' not used. Assuming no dlopen support." + + case $host in + *-*-rhapsody* | *-*-darwin1.[012]) + # On Rhapsody replace the C library is the System framework + compile_deplibs=`$ECHO " $compile_deplibs" | $SED 's/ -lc / System.ltframework /'` + finalize_deplibs=`$ECHO " $finalize_deplibs" | $SED 's/ -lc / System.ltframework /'` + ;; + esac + + case $host in + *-*-darwin*) + # Don't allow lazy linking, it breaks C++ global constructors + # But is supposedly fixed on 10.4 or later (yay!). + if test CXX = "$tagname"; then + case ${MACOSX_DEPLOYMENT_TARGET-10.0} in + 10.[0123]) + func_append compile_command " $wl-bind_at_load" + func_append finalize_command " $wl-bind_at_load" + ;; + esac + fi + # Time to change all our "foo.ltframework" stuff back to "-framework foo" + compile_deplibs=`$ECHO " $compile_deplibs" | $SED 's% \([^ $]*\).ltframework% -framework \1%g'` + finalize_deplibs=`$ECHO " $finalize_deplibs" | $SED 's% \([^ $]*\).ltframework% -framework \1%g'` + ;; + esac + + + # move library search paths that coincide with paths to not yet + # installed libraries to the beginning of the library search list + new_libs= + for path in $notinst_path; do + case " $new_libs " in + *" -L$path/$objdir "*) ;; + *) + case " $compile_deplibs " in + *" -L$path/$objdir "*) + func_append new_libs " -L$path/$objdir" ;; + esac + ;; + esac + done + for deplib in $compile_deplibs; do + case $deplib in + -L*) + case " $new_libs " in + *" $deplib "*) ;; + *) func_append new_libs " $deplib" ;; + esac + ;; + *) func_append new_libs " $deplib" ;; + esac + done + compile_deplibs=$new_libs + + + func_append compile_command " $compile_deplibs" + func_append finalize_command " $finalize_deplibs" + + if test -n "$rpath$xrpath"; then + # If the user specified any rpath flags, then add them. + for libdir in $rpath $xrpath; do + # This is the magic to use -rpath. + case "$finalize_rpath " in + *" $libdir "*) ;; + *) func_append finalize_rpath " $libdir" ;; + esac + done + fi + + # Now hardcode the library paths + rpath= + hardcode_libdirs= + for libdir in $compile_rpath $finalize_rpath; do + if test -n "$hardcode_libdir_flag_spec"; then + if test -n "$hardcode_libdir_separator"; then + if test -z "$hardcode_libdirs"; then + hardcode_libdirs=$libdir + else + # Just accumulate the unique libdirs. + case $hardcode_libdir_separator$hardcode_libdirs$hardcode_libdir_separator in + *"$hardcode_libdir_separator$libdir$hardcode_libdir_separator"*) + ;; + *) + func_append hardcode_libdirs "$hardcode_libdir_separator$libdir" + ;; + esac + fi + else + eval flag=\"$hardcode_libdir_flag_spec\" + func_append rpath " $flag" + fi + elif test -n "$runpath_var"; then + case "$perm_rpath " in + *" $libdir "*) ;; + *) func_append perm_rpath " $libdir" ;; + esac + fi + case $host in + *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2* | *-cegcc*) + testbindir=`$ECHO "$libdir" | $SED -e 's*/lib$*/bin*'` + case :$dllsearchpath: in + *":$libdir:"*) ;; + ::) dllsearchpath=$libdir;; + *) func_append dllsearchpath ":$libdir";; + esac + case :$dllsearchpath: in + *":$testbindir:"*) ;; + ::) dllsearchpath=$testbindir;; + *) func_append dllsearchpath ":$testbindir";; + esac + ;; + esac + done + # Substitute the hardcoded libdirs into the rpath. + if test -n "$hardcode_libdir_separator" && + test -n "$hardcode_libdirs"; then + libdir=$hardcode_libdirs + eval rpath=\" $hardcode_libdir_flag_spec\" + fi + compile_rpath=$rpath + + rpath= + hardcode_libdirs= + for libdir in $finalize_rpath; do + if test -n "$hardcode_libdir_flag_spec"; then + if test -n "$hardcode_libdir_separator"; then + if test -z "$hardcode_libdirs"; then + hardcode_libdirs=$libdir + else + # Just accumulate the unique libdirs. + case $hardcode_libdir_separator$hardcode_libdirs$hardcode_libdir_separator in + *"$hardcode_libdir_separator$libdir$hardcode_libdir_separator"*) + ;; + *) + func_append hardcode_libdirs "$hardcode_libdir_separator$libdir" + ;; + esac + fi + else + eval flag=\"$hardcode_libdir_flag_spec\" + func_append rpath " $flag" + fi + elif test -n "$runpath_var"; then + case "$finalize_perm_rpath " in + *" $libdir "*) ;; + *) func_append finalize_perm_rpath " $libdir" ;; + esac + fi + done + # Substitute the hardcoded libdirs into the rpath. + if test -n "$hardcode_libdir_separator" && + test -n "$hardcode_libdirs"; then + libdir=$hardcode_libdirs + eval rpath=\" $hardcode_libdir_flag_spec\" + fi + finalize_rpath=$rpath + + if test -n "$libobjs" && test yes = "$build_old_libs"; then + # Transform all the library objects into standard objects. + compile_command=`$ECHO "$compile_command" | $SP2NL | $SED "$lo2o" | $NL2SP` + finalize_command=`$ECHO "$finalize_command" | $SP2NL | $SED "$lo2o" | $NL2SP` + fi + + func_generate_dlsyms "$outputname" "@PROGRAM@" false + + # template prelinking step + if test -n "$prelink_cmds"; then + func_execute_cmds "$prelink_cmds" 'exit $?' + fi + + wrappers_required=: + case $host in + *cegcc* | *mingw32ce*) + # Disable wrappers for cegcc and mingw32ce hosts, we are cross compiling anyway. + wrappers_required=false + ;; + *cygwin* | *mingw* ) + test yes = "$build_libtool_libs" || wrappers_required=false + ;; + *) + if test no = "$need_relink" || test yes != "$build_libtool_libs"; then + wrappers_required=false + fi + ;; + esac + $wrappers_required || { + # Replace the output file specification. + compile_command=`$ECHO "$compile_command" | $SED 's%@OUTPUT@%'"$output"'%g'` + link_command=$compile_command$compile_rpath + + # We have no uninstalled library dependencies, so finalize right now. + exit_status=0 + func_show_eval "$link_command" 'exit_status=$?' + + if test -n "$postlink_cmds"; then + func_to_tool_file "$output" + postlink_cmds=`func_echo_all "$postlink_cmds" | $SED -e 's%@OUTPUT@%'"$output"'%g' -e 's%@TOOL_OUTPUT@%'"$func_to_tool_file_result"'%g'` + func_execute_cmds "$postlink_cmds" 'exit $?' + fi + + # Delete the generated files. + if test -f "$output_objdir/${outputname}S.$objext"; then + func_show_eval '$RM "$output_objdir/${outputname}S.$objext"' + fi + + exit $exit_status + } + + if test -n "$compile_shlibpath$finalize_shlibpath"; then + compile_command="$shlibpath_var=\"$compile_shlibpath$finalize_shlibpath\$$shlibpath_var\" $compile_command" + fi + if test -n "$finalize_shlibpath"; then + finalize_command="$shlibpath_var=\"$finalize_shlibpath\$$shlibpath_var\" $finalize_command" + fi + + compile_var= + finalize_var= + if test -n "$runpath_var"; then + if test -n "$perm_rpath"; then + # We should set the runpath_var. + rpath= + for dir in $perm_rpath; do + func_append rpath "$dir:" + done + compile_var="$runpath_var=\"$rpath\$$runpath_var\" " + fi + if test -n "$finalize_perm_rpath"; then + # We should set the runpath_var. + rpath= + for dir in $finalize_perm_rpath; do + func_append rpath "$dir:" + done + finalize_var="$runpath_var=\"$rpath\$$runpath_var\" " + fi + fi + + if test yes = "$no_install"; then + # We don't need to create a wrapper script. + link_command=$compile_var$compile_command$compile_rpath + # Replace the output file specification. + link_command=`$ECHO "$link_command" | $SED 's%@OUTPUT@%'"$output"'%g'` + # Delete the old output file. + $opt_dry_run || $RM $output + # Link the executable and exit + func_show_eval "$link_command" 'exit $?' + + if test -n "$postlink_cmds"; then + func_to_tool_file "$output" + postlink_cmds=`func_echo_all "$postlink_cmds" | $SED -e 's%@OUTPUT@%'"$output"'%g' -e 's%@TOOL_OUTPUT@%'"$func_to_tool_file_result"'%g'` + func_execute_cmds "$postlink_cmds" 'exit $?' + fi + + exit $EXIT_SUCCESS + fi + + case $hardcode_action,$fast_install in + relink,*) + # Fast installation is not supported + link_command=$compile_var$compile_command$compile_rpath + relink_command=$finalize_var$finalize_command$finalize_rpath + + func_warning "this platform does not like uninstalled shared libraries" + func_warning "'$output' will be relinked during installation" + ;; + *,yes) + link_command=$finalize_var$compile_command$finalize_rpath + relink_command=`$ECHO "$compile_var$compile_command$compile_rpath" | $SED 's%@OUTPUT@%\$progdir/\$file%g'` + ;; + *,no) + link_command=$compile_var$compile_command$compile_rpath + relink_command=$finalize_var$finalize_command$finalize_rpath + ;; + *,needless) + link_command=$finalize_var$compile_command$finalize_rpath + relink_command= + ;; + esac + + # Replace the output file specification. + link_command=`$ECHO "$link_command" | $SED 's%@OUTPUT@%'"$output_objdir/$outputname"'%g'` + + # Delete the old output files. + $opt_dry_run || $RM $output $output_objdir/$outputname $output_objdir/lt-$outputname + + func_show_eval "$link_command" 'exit $?' + + if test -n "$postlink_cmds"; then + func_to_tool_file "$output_objdir/$outputname" + postlink_cmds=`func_echo_all "$postlink_cmds" | $SED -e 's%@OUTPUT@%'"$output_objdir/$outputname"'%g' -e 's%@TOOL_OUTPUT@%'"$func_to_tool_file_result"'%g'` + func_execute_cmds "$postlink_cmds" 'exit $?' + fi + + # Now create the wrapper script. + func_verbose "creating $output" + + # Quote the relink command for shipping. + if test -n "$relink_command"; then + # Preserve any variables that may affect compiler behavior + for var in $variables_saved_for_relink; do + if eval test -z \"\${$var+set}\"; then + relink_command="{ test -z \"\${$var+set}\" || $lt_unset $var || { $var=; export $var; }; }; $relink_command" + elif eval var_value=\$$var; test -z "$var_value"; then + relink_command="$var=; export $var; $relink_command" + else + func_quote_arg pretty "$var_value" + relink_command="$var=$func_quote_arg_result; export $var; $relink_command" + fi + done + func_quote eval cd "`pwd`" + func_quote_arg pretty,unquoted "($func_quote_result; $relink_command)" + relink_command=$func_quote_arg_unquoted_result + fi + + # Only actually do things if not in dry run mode. + $opt_dry_run || { + # win32 will think the script is a binary if it has + # a .exe suffix, so we strip it off here. + case $output in + *.exe) func_stripname '' '.exe' "$output" + output=$func_stripname_result ;; + esac + # test for cygwin because mv fails w/o .exe extensions + case $host in + *cygwin*) + exeext=.exe + func_stripname '' '.exe' "$outputname" + outputname=$func_stripname_result ;; + *) exeext= ;; + esac + case $host in + *cygwin* | *mingw* ) + func_dirname_and_basename "$output" "" "." + output_name=$func_basename_result + output_path=$func_dirname_result + cwrappersource=$output_path/$objdir/lt-$output_name.c + cwrapper=$output_path/$output_name.exe + $RM $cwrappersource $cwrapper + trap "$RM $cwrappersource $cwrapper; exit $EXIT_FAILURE" 1 2 15 + + func_emit_cwrapperexe_src > $cwrappersource + + # The wrapper executable is built using the $host compiler, + # because it contains $host paths and files. If cross- + # compiling, it, like the target executable, must be + # executed on the $host or under an emulation environment. + $opt_dry_run || { + $LTCC $LTCFLAGS -o $cwrapper $cwrappersource + $STRIP $cwrapper + } + + # Now, create the wrapper script for func_source use: + func_ltwrapper_scriptname $cwrapper + $RM $func_ltwrapper_scriptname_result + trap "$RM $func_ltwrapper_scriptname_result; exit $EXIT_FAILURE" 1 2 15 + $opt_dry_run || { + # note: this script will not be executed, so do not chmod. + if test "x$build" = "x$host"; then + $cwrapper --lt-dump-script > $func_ltwrapper_scriptname_result + else + func_emit_wrapper no > $func_ltwrapper_scriptname_result + fi + } + ;; + * ) + $RM $output + trap "$RM $output; exit $EXIT_FAILURE" 1 2 15 + + func_emit_wrapper no > $output + chmod +x $output + ;; + esac + } + exit $EXIT_SUCCESS + ;; + esac + + # See if we need to build an old-fashioned archive. + for oldlib in $oldlibs; do + + case $build_libtool_libs in + convenience) + oldobjs="$libobjs_save $symfileobj" + addlibs=$convenience + build_libtool_libs=no + ;; + module) + oldobjs=$libobjs_save + addlibs=$old_convenience + build_libtool_libs=no + ;; + *) + oldobjs="$old_deplibs $non_pic_objects" + $preload && test -f "$symfileobj" \ + && func_append oldobjs " $symfileobj" + addlibs=$old_convenience + ;; + esac + + if test -n "$addlibs"; then + gentop=$output_objdir/${outputname}x + func_append generated " $gentop" + + func_extract_archives $gentop $addlibs + func_append oldobjs " $func_extract_archives_result" + fi + + # Do each command in the archive commands. + if test -n "$old_archive_from_new_cmds" && test yes = "$build_libtool_libs"; then + cmds=$old_archive_from_new_cmds + else + + # Add any objects from preloaded convenience libraries + if test -n "$dlprefiles"; then + gentop=$output_objdir/${outputname}x + func_append generated " $gentop" + + func_extract_archives $gentop $dlprefiles + func_append oldobjs " $func_extract_archives_result" + fi + + # POSIX demands no paths to be encoded in archives. We have + # to avoid creating archives with duplicate basenames if we + # might have to extract them afterwards, e.g., when creating a + # static archive out of a convenience library, or when linking + # the entirety of a libtool archive into another (currently + # not supported by libtool). + if (for obj in $oldobjs + do + func_basename "$obj" + $ECHO "$func_basename_result" + done | sort | sort -uc >/dev/null 2>&1); then + : + else + echo "copying selected object files to avoid basename conflicts..." + gentop=$output_objdir/${outputname}x + func_append generated " $gentop" + func_mkdir_p "$gentop" + save_oldobjs=$oldobjs + oldobjs= + counter=1 + for obj in $save_oldobjs + do + func_basename "$obj" + objbase=$func_basename_result + case " $oldobjs " in + " ") oldobjs=$obj ;; + *[\ /]"$objbase "*) + while :; do + # Make sure we don't pick an alternate name that also + # overlaps. + newobj=lt$counter-$objbase + func_arith $counter + 1 + counter=$func_arith_result + case " $oldobjs " in + *[\ /]"$newobj "*) ;; + *) if test ! -f "$gentop/$newobj"; then break; fi ;; + esac + done + func_show_eval "ln $obj $gentop/$newobj || cp $obj $gentop/$newobj" + func_append oldobjs " $gentop/$newobj" + ;; + *) func_append oldobjs " $obj" ;; + esac + done + fi + func_to_tool_file "$oldlib" func_convert_file_msys_to_w32 + tool_oldlib=$func_to_tool_file_result + eval cmds=\"$old_archive_cmds\" + + func_len " $cmds" + len=$func_len_result + if test "$len" -lt "$max_cmd_len" || test "$max_cmd_len" -le -1; then + cmds=$old_archive_cmds + elif test -n "$archiver_list_spec"; then + func_verbose "using command file archive linking..." + for obj in $oldobjs + do + func_to_tool_file "$obj" + $ECHO "$func_to_tool_file_result" + done > $output_objdir/$libname.libcmd + func_to_tool_file "$output_objdir/$libname.libcmd" + oldobjs=" $archiver_list_spec$func_to_tool_file_result" + cmds=$old_archive_cmds + else + # the command line is too long to link in one step, link in parts + func_verbose "using piecewise archive linking..." + save_RANLIB=$RANLIB + RANLIB=: + objlist= + concat_cmds= + save_oldobjs=$oldobjs + oldobjs= + # Is there a better way of finding the last object in the list? + for obj in $save_oldobjs + do + last_oldobj=$obj + done + eval test_cmds=\"$old_archive_cmds\" + func_len " $test_cmds" + len0=$func_len_result + len=$len0 + for obj in $save_oldobjs + do + func_len " $obj" + func_arith $len + $func_len_result + len=$func_arith_result + func_append objlist " $obj" + if test "$len" -lt "$max_cmd_len"; then + : + else + # the above command should be used before it gets too long + oldobjs=$objlist + if test "$obj" = "$last_oldobj"; then + RANLIB=$save_RANLIB + fi + test -z "$concat_cmds" || concat_cmds=$concat_cmds~ + eval concat_cmds=\"\$concat_cmds$old_archive_cmds\" + objlist= + len=$len0 + fi + done + RANLIB=$save_RANLIB + oldobjs=$objlist + if test -z "$oldobjs"; then + eval cmds=\"\$concat_cmds\" + else + eval cmds=\"\$concat_cmds~\$old_archive_cmds\" + fi + fi + fi + func_execute_cmds "$cmds" 'exit $?' + done + + test -n "$generated" && \ + func_show_eval "${RM}r$generated" + + # Now create the libtool archive. + case $output in + *.la) + old_library= + test yes = "$build_old_libs" && old_library=$libname.$libext + func_verbose "creating $output" + + # Preserve any variables that may affect compiler behavior + for var in $variables_saved_for_relink; do + if eval test -z \"\${$var+set}\"; then + relink_command="{ test -z \"\${$var+set}\" || $lt_unset $var || { $var=; export $var; }; }; $relink_command" + elif eval var_value=\$$var; test -z "$var_value"; then + relink_command="$var=; export $var; $relink_command" + else + func_quote_arg pretty,unquoted "$var_value" + relink_command="$var=$func_quote_arg_unquoted_result; export $var; $relink_command" + fi + done + # Quote the link command for shipping. + func_quote eval cd "`pwd`" + relink_command="($func_quote_result; $SHELL \"$progpath\" $preserve_args --mode=relink $libtool_args @inst_prefix_dir@)" + func_quote_arg pretty,unquoted "$relink_command" + relink_command=$func_quote_arg_unquoted_result + if test yes = "$hardcode_automatic"; then + relink_command= + fi + + # Only create the output if not a dry run. + $opt_dry_run || { + for installed in no yes; do + if test yes = "$installed"; then + if test -z "$install_libdir"; then + break + fi + output=$output_objdir/${outputname}i + # Replace all uninstalled libtool libraries with the installed ones + newdependency_libs= + for deplib in $dependency_libs; do + case $deplib in + *.la) + func_basename "$deplib" + name=$func_basename_result + func_resolve_sysroot "$deplib" + eval libdir=`$SED -n -e 's/^libdir=\(.*\)$/\1/p' $func_resolve_sysroot_result` + test -z "$libdir" && \ + func_fatal_error "'$deplib' is not a valid libtool archive" + func_append newdependency_libs " ${lt_sysroot:+=}$libdir/$name" + ;; + -L*) + func_stripname -L '' "$deplib" + func_replace_sysroot "$func_stripname_result" + func_append newdependency_libs " -L$func_replace_sysroot_result" + ;; + -R*) + func_stripname -R '' "$deplib" + func_replace_sysroot "$func_stripname_result" + func_append newdependency_libs " -R$func_replace_sysroot_result" + ;; + *) func_append newdependency_libs " $deplib" ;; + esac + done + dependency_libs=$newdependency_libs + newdlfiles= + + for lib in $dlfiles; do + case $lib in + *.la) + func_basename "$lib" + name=$func_basename_result + eval libdir=`$SED -n -e 's/^libdir=\(.*\)$/\1/p' $lib` + test -z "$libdir" && \ + func_fatal_error "'$lib' is not a valid libtool archive" + func_append newdlfiles " ${lt_sysroot:+=}$libdir/$name" + ;; + *) func_append newdlfiles " $lib" ;; + esac + done + dlfiles=$newdlfiles + newdlprefiles= + for lib in $dlprefiles; do + case $lib in + *.la) + # Only pass preopened files to the pseudo-archive (for + # eventual linking with the app. that links it) if we + # didn't already link the preopened objects directly into + # the library: + func_basename "$lib" + name=$func_basename_result + eval libdir=`$SED -n -e 's/^libdir=\(.*\)$/\1/p' $lib` + test -z "$libdir" && \ + func_fatal_error "'$lib' is not a valid libtool archive" + func_append newdlprefiles " ${lt_sysroot:+=}$libdir/$name" + ;; + esac + done + dlprefiles=$newdlprefiles + else + newdlfiles= + for lib in $dlfiles; do + case $lib in + [\\/]* | [A-Za-z]:[\\/]*) abs=$lib ;; + *) abs=`pwd`"/$lib" ;; + esac + func_append newdlfiles " $abs" + done + dlfiles=$newdlfiles + newdlprefiles= + for lib in $dlprefiles; do + case $lib in + [\\/]* | [A-Za-z]:[\\/]*) abs=$lib ;; + *) abs=`pwd`"/$lib" ;; + esac + func_append newdlprefiles " $abs" + done + dlprefiles=$newdlprefiles + fi + $RM $output + # place dlname in correct position for cygwin + # In fact, it would be nice if we could use this code for all target + # systems that can't hard-code library paths into their executables + # and that have no shared library path variable independent of PATH, + # but it turns out we can't easily determine that from inspecting + # libtool variables, so we have to hard-code the OSs to which it + # applies here; at the moment, that means platforms that use the PE + # object format with DLL files. See the long comment at the top of + # tests/bindir.at for full details. + tdlname=$dlname + case $host,$output,$installed,$module,$dlname in + *cygwin*,*lai,yes,no,*.dll | *mingw*,*lai,yes,no,*.dll | *cegcc*,*lai,yes,no,*.dll) + # If a -bindir argument was supplied, place the dll there. + if test -n "$bindir"; then + func_relative_path "$install_libdir" "$bindir" + tdlname=$func_relative_path_result/$dlname + else + # Otherwise fall back on heuristic. + tdlname=../bin/$dlname + fi + ;; + esac + $ECHO > $output "\ +# $outputname - a libtool library file +# Generated by $PROGRAM (GNU $PACKAGE) $VERSION +# +# Please DO NOT delete this file! +# It is necessary for linking the library. + +# The name that we can dlopen(3). +dlname='$tdlname' + +# Names of this library. +library_names='$library_names' + +# The name of the static archive. +old_library='$old_library' + +# Linker flags that cannot go in dependency_libs. +inherited_linker_flags='$new_inherited_linker_flags' + +# Libraries that this one depends upon. +dependency_libs='$dependency_libs' + +# Names of additional weak libraries provided by this library +weak_library_names='$weak_libs' + +# Version information for $libname. +current=$current +age=$age +revision=$revision + +# Is this an already installed library? +installed=$installed + +# Should we warn about portability when linking against -modules? +shouldnotlink=$module + +# Files to dlopen/dlpreopen +dlopen='$dlfiles' +dlpreopen='$dlprefiles' + +# Directory that this library needs to be installed in: +libdir='$install_libdir'" + if test no,yes = "$installed,$need_relink"; then + $ECHO >> $output "\ +relink_command=\"$relink_command\"" + fi + done + } + + # Do a symbolic link so that the libtool archive can be found in + # LD_LIBRARY_PATH before the program is installed. + func_show_eval '( cd "$output_objdir" && $RM "$outputname" && $LN_S "../$outputname" "$outputname" )' 'exit $?' + ;; + esac + exit $EXIT_SUCCESS +} + +if test link = "$opt_mode" || test relink = "$opt_mode"; then + func_mode_link ${1+"$@"} +fi + + +# func_mode_uninstall arg... +func_mode_uninstall () +{ + $debug_cmd + + RM=$nonopt + files= + rmforce=false + exit_status=0 + + # This variable tells wrapper scripts just to set variables rather + # than running their programs. + libtool_install_magic=$magic + + for arg + do + case $arg in + -f) func_append RM " $arg"; rmforce=: ;; + -*) func_append RM " $arg" ;; + *) func_append files " $arg" ;; + esac + done + + test -z "$RM" && \ + func_fatal_help "you must specify an RM program" + + rmdirs= + + for file in $files; do + func_dirname "$file" "" "." + dir=$func_dirname_result + if test . = "$dir"; then + odir=$objdir + else + odir=$dir/$objdir + fi + func_basename "$file" + name=$func_basename_result + test uninstall = "$opt_mode" && odir=$dir + + # Remember odir for removal later, being careful to avoid duplicates + if test clean = "$opt_mode"; then + case " $rmdirs " in + *" $odir "*) ;; + *) func_append rmdirs " $odir" ;; + esac + fi + + # Don't error if the file doesn't exist and rm -f was used. + if { test -L "$file"; } >/dev/null 2>&1 || + { test -h "$file"; } >/dev/null 2>&1 || + test -f "$file"; then + : + elif test -d "$file"; then + exit_status=1 + continue + elif $rmforce; then + continue + fi + + rmfiles=$file + + case $name in + *.la) + # Possibly a libtool archive, so verify it. + if func_lalib_p "$file"; then + func_source $dir/$name + + # Delete the libtool libraries and symlinks. + for n in $library_names; do + func_append rmfiles " $odir/$n" + done + test -n "$old_library" && func_append rmfiles " $odir/$old_library" + + case $opt_mode in + clean) + case " $library_names " in + *" $dlname "*) ;; + *) test -n "$dlname" && func_append rmfiles " $odir/$dlname" ;; + esac + test -n "$libdir" && func_append rmfiles " $odir/$name $odir/${name}i" + ;; + uninstall) + if test -n "$library_names"; then + # Do each command in the postuninstall commands. + func_execute_cmds "$postuninstall_cmds" '$rmforce || exit_status=1' + fi + + if test -n "$old_library"; then + # Do each command in the old_postuninstall commands. + func_execute_cmds "$old_postuninstall_cmds" '$rmforce || exit_status=1' + fi + # FIXME: should reinstall the best remaining shared library. + ;; + esac + fi + ;; + + *.lo) + # Possibly a libtool object, so verify it. + if func_lalib_p "$file"; then + + # Read the .lo file + func_source $dir/$name + + # Add PIC object to the list of files to remove. + if test -n "$pic_object" && test none != "$pic_object"; then + func_append rmfiles " $dir/$pic_object" + fi + + # Add non-PIC object to the list of files to remove. + if test -n "$non_pic_object" && test none != "$non_pic_object"; then + func_append rmfiles " $dir/$non_pic_object" + fi + fi + ;; + + *) + if test clean = "$opt_mode"; then + noexename=$name + case $file in + *.exe) + func_stripname '' '.exe' "$file" + file=$func_stripname_result + func_stripname '' '.exe' "$name" + noexename=$func_stripname_result + # $file with .exe has already been added to rmfiles, + # add $file without .exe + func_append rmfiles " $file" + ;; + esac + # Do a test to see if this is a libtool program. + if func_ltwrapper_p "$file"; then + if func_ltwrapper_executable_p "$file"; then + func_ltwrapper_scriptname "$file" + relink_command= + func_source $func_ltwrapper_scriptname_result + func_append rmfiles " $func_ltwrapper_scriptname_result" + else + relink_command= + func_source $dir/$noexename + fi + + # note $name still contains .exe if it was in $file originally + # as does the version of $file that was added into $rmfiles + func_append rmfiles " $odir/$name $odir/${name}S.$objext" + if test yes = "$fast_install" && test -n "$relink_command"; then + func_append rmfiles " $odir/lt-$name" + fi + if test "X$noexename" != "X$name"; then + func_append rmfiles " $odir/lt-$noexename.c" + fi + fi + fi + ;; + esac + func_show_eval "$RM $rmfiles" 'exit_status=1' + done + + # Try to remove the $objdir's in the directories where we deleted files + for dir in $rmdirs; do + if test -d "$dir"; then + func_show_eval "rmdir $dir >/dev/null 2>&1" + fi + done + + exit $exit_status +} + +if test uninstall = "$opt_mode" || test clean = "$opt_mode"; then + func_mode_uninstall ${1+"$@"} +fi + +test -z "$opt_mode" && { + help=$generic_help + func_fatal_help "you must specify a MODE" +} + +test -z "$exec_cmd" && \ + func_fatal_help "invalid operation mode '$opt_mode'" + +if test -n "$exec_cmd"; then + eval exec "$exec_cmd" + exit $EXIT_FAILURE +fi + +exit $exit_status + + +# The TAGs below are defined such that we never get into a situation +# where we disable both kinds of libraries. Given conflicting +# choices, we go for a static library, that is the most portable, +# since we can't tell whether shared libraries were disabled because +# the user asked for that or because the platform doesn't support +# them. This is particularly important on AIX, because we don't +# support having both static and shared libraries enabled at the same +# time on that platform, so we default to a shared-only configuration. +# If a disable-shared tag is given, we'll fallback to a static-only +# configuration. But we'll never go from static-only to shared-only. + +# ### BEGIN LIBTOOL TAG CONFIG: disable-shared +build_libtool_libs=no +build_old_libs=yes +# ### END LIBTOOL TAG CONFIG: disable-shared + +# ### BEGIN LIBTOOL TAG CONFIG: disable-static +build_old_libs=`case $build_libtool_libs in yes) echo no;; *) echo yes;; esac` +# ### END LIBTOOL TAG CONFIG: disable-static + +# Local Variables: +# mode:shell-script +# sh-indentation:2 +# End: diff --git a/build-aux/missing b/build-aux/missing new file mode 100755 index 0000000..1fe1611 --- /dev/null +++ b/build-aux/missing @@ -0,0 +1,215 @@ +#! /bin/sh +# Common wrapper for a few potentially missing GNU programs. + +scriptversion=2018-03-07.03; # UTC + +# Copyright (C) 1996-2021 Free Software Foundation, Inc. +# Originally written by Fran,cois Pinard , 1996. + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that program. + +if test $# -eq 0; then + echo 1>&2 "Try '$0 --help' for more information" + exit 1 +fi + +case $1 in + + --is-lightweight) + # Used by our autoconf macros to check whether the available missing + # script is modern enough. + exit 0 + ;; + + --run) + # Back-compat with the calling convention used by older automake. + shift + ;; + + -h|--h|--he|--hel|--help) + echo "\ +$0 [OPTION]... PROGRAM [ARGUMENT]... + +Run 'PROGRAM [ARGUMENT]...', returning a proper advice when this fails due +to PROGRAM being missing or too old. + +Options: + -h, --help display this help and exit + -v, --version output version information and exit + +Supported PROGRAM values: + aclocal autoconf autoheader autom4te automake makeinfo + bison yacc flex lex help2man + +Version suffixes to PROGRAM as well as the prefixes 'gnu-', 'gnu', and +'g' are ignored when checking the name. + +Send bug reports to ." + exit $? + ;; + + -v|--v|--ve|--ver|--vers|--versi|--versio|--version) + echo "missing $scriptversion (GNU Automake)" + exit $? + ;; + + -*) + echo 1>&2 "$0: unknown '$1' option" + echo 1>&2 "Try '$0 --help' for more information" + exit 1 + ;; + +esac + +# Run the given program, remember its exit status. +"$@"; st=$? + +# If it succeeded, we are done. +test $st -eq 0 && exit 0 + +# Also exit now if we it failed (or wasn't found), and '--version' was +# passed; such an option is passed most likely to detect whether the +# program is present and works. +case $2 in --version|--help) exit $st;; esac + +# Exit code 63 means version mismatch. This often happens when the user +# tries to use an ancient version of a tool on a file that requires a +# minimum version. +if test $st -eq 63; then + msg="probably too old" +elif test $st -eq 127; then + # Program was missing. + msg="missing on your system" +else + # Program was found and executed, but failed. Give up. + exit $st +fi + +perl_URL=https://www.perl.org/ +flex_URL=https://github.com/westes/flex +gnu_software_URL=https://www.gnu.org/software + +program_details () +{ + case $1 in + aclocal|automake) + echo "The '$1' program is part of the GNU Automake package:" + echo "<$gnu_software_URL/automake>" + echo "It also requires GNU Autoconf, GNU m4 and Perl in order to run:" + echo "<$gnu_software_URL/autoconf>" + echo "<$gnu_software_URL/m4/>" + echo "<$perl_URL>" + ;; + autoconf|autom4te|autoheader) + echo "The '$1' program is part of the GNU Autoconf package:" + echo "<$gnu_software_URL/autoconf/>" + echo "It also requires GNU m4 and Perl in order to run:" + echo "<$gnu_software_URL/m4/>" + echo "<$perl_URL>" + ;; + esac +} + +give_advice () +{ + # Normalize program name to check for. + normalized_program=`echo "$1" | sed ' + s/^gnu-//; t + s/^gnu//; t + s/^g//; t'` + + printf '%s\n' "'$1' is $msg." + + configure_deps="'configure.ac' or m4 files included by 'configure.ac'" + case $normalized_program in + autoconf*) + echo "You should only need it if you modified 'configure.ac'," + echo "or m4 files included by it." + program_details 'autoconf' + ;; + autoheader*) + echo "You should only need it if you modified 'acconfig.h' or" + echo "$configure_deps." + program_details 'autoheader' + ;; + automake*) + echo "You should only need it if you modified 'Makefile.am' or" + echo "$configure_deps." + program_details 'automake' + ;; + aclocal*) + echo "You should only need it if you modified 'acinclude.m4' or" + echo "$configure_deps." + program_details 'aclocal' + ;; + autom4te*) + echo "You might have modified some maintainer files that require" + echo "the 'autom4te' program to be rebuilt." + program_details 'autom4te' + ;; + bison*|yacc*) + echo "You should only need it if you modified a '.y' file." + echo "You may want to install the GNU Bison package:" + echo "<$gnu_software_URL/bison/>" + ;; + lex*|flex*) + echo "You should only need it if you modified a '.l' file." + echo "You may want to install the Fast Lexical Analyzer package:" + echo "<$flex_URL>" + ;; + help2man*) + echo "You should only need it if you modified a dependency" \ + "of a man page." + echo "You may want to install the GNU Help2man package:" + echo "<$gnu_software_URL/help2man/>" + ;; + makeinfo*) + echo "You should only need it if you modified a '.texi' file, or" + echo "any other file indirectly affecting the aspect of the manual." + echo "You might want to install the Texinfo package:" + echo "<$gnu_software_URL/texinfo/>" + echo "The spurious makeinfo call might also be the consequence of" + echo "using a buggy 'make' (AIX, DU, IRIX), in which case you might" + echo "want to install GNU make:" + echo "<$gnu_software_URL/make/>" + ;; + *) + echo "You might have modified some files without having the proper" + echo "tools for further handling them. Check the 'README' file, it" + echo "often tells you about the needed prerequisites for installing" + echo "this package. You may also peek at any GNU archive site, in" + echo "case some other package contains this missing '$1' program." + ;; + esac +} + +give_advice "$1" | sed -e '1s/^/WARNING: /' \ + -e '2,$s/^/ /' >&2 + +# Propagate the correct exit status (expected to be 127 for a program +# not found, 63 for a program that failed due to version mismatch). +exit $st + +# Local variables: +# eval: (add-hook 'before-save-hook 'time-stamp) +# time-stamp-start: "scriptversion=" +# time-stamp-format: "%:y-%02m-%02d.%02H" +# time-stamp-time-zone: "UTC0" +# time-stamp-end: "; # UTC" +# End: diff --git a/build-aux/test-driver b/build-aux/test-driver new file mode 100755 index 0000000..be73b80 --- /dev/null +++ b/build-aux/test-driver @@ -0,0 +1,153 @@ +#! /bin/sh +# test-driver - basic testsuite driver script. + +scriptversion=2018-03-07.03; # UTC + +# Copyright (C) 2011-2021 Free Software Foundation, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that program. + +# This file is maintained in Automake, please report +# bugs to or send patches to +# . + +# Make unconditional expansion of undefined variables an error. This +# helps a lot in preventing typo-related bugs. +set -u + +usage_error () +{ + echo "$0: $*" >&2 + print_usage >&2 + exit 2 +} + +print_usage () +{ + cat <"$log_file" +"$@" >>"$log_file" 2>&1 +estatus=$? + +if test $enable_hard_errors = no && test $estatus -eq 99; then + tweaked_estatus=1 +else + tweaked_estatus=$estatus +fi + +case $tweaked_estatus:$expect_failure in + 0:yes) col=$red res=XPASS recheck=yes gcopy=yes;; + 0:*) col=$grn res=PASS recheck=no gcopy=no;; + 77:*) col=$blu res=SKIP recheck=no gcopy=yes;; + 99:*) col=$mgn res=ERROR recheck=yes gcopy=yes;; + *:yes) col=$lgn res=XFAIL recheck=no gcopy=yes;; + *:*) col=$red res=FAIL recheck=yes gcopy=yes;; +esac + +# Report the test outcome and exit status in the logs, so that one can +# know whether the test passed or failed simply by looking at the '.log' +# file, without the need of also peaking into the corresponding '.trs' +# file (automake bug#11814). +echo "$res $test_name (exit status: $estatus)" >>"$log_file" + +# Report outcome to console. +echo "${col}${res}${std}: $test_name" + +# Register the test result, and other relevant metadata. +echo ":test-result: $res" > $trs_file +echo ":global-test-result: $res" >> $trs_file +echo ":recheck: $recheck" >> $trs_file +echo ":copy-in-global-log: $gcopy" >> $trs_file + +# Local Variables: +# mode: shell-script +# sh-indentation: 2 +# eval: (add-hook 'before-save-hook 'time-stamp) +# time-stamp-start: "scriptversion=" +# time-stamp-format: "%:y-%02m-%02d.%02H" +# time-stamp-time-zone: "UTC0" +# time-stamp-end: "; # UTC" +# End: diff --git a/configure b/configure new file mode 100755 index 0000000..5430f2f --- /dev/null +++ b/configure @@ -0,0 +1,51344 @@ +#! /bin/sh +# Guess values for system-dependent variables and create Makefiles. +# Generated by GNU Autoconf 2.71 for StarPU 1.4.10. +# +# Report bugs to . +# +# +# Copyright (C) 1992-1996, 1998-2017, 2020-2021 Free Software Foundation, +# Inc. +# +# +# This configure script is free software; the Free Software Foundation +# gives unlimited permission to copy, distribute and modify it. +## -------------------- ## +## M4sh Initialization. ## +## -------------------- ## + +# Be more Bourne compatible +DUALCASE=1; export DUALCASE # for MKS sh +as_nop=: +if test ${ZSH_VERSION+y} && (emulate sh) >/dev/null 2>&1 +then : + emulate sh + NULLCMD=: + # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which + # is contrary to our usage. Disable this feature. + alias -g '${1+"$@"}'='"$@"' + setopt NO_GLOB_SUBST +else $as_nop + case `(set -o) 2>/dev/null` in #( + *posix*) : + set -o posix ;; #( + *) : + ;; +esac +fi + + + +# Reset variables that may have inherited troublesome values from +# the environment. + +# IFS needs to be set, to space, tab, and newline, in precisely that order. +# (If _AS_PATH_WALK were called with IFS unset, it would have the +# side effect of setting IFS to empty, thus disabling word splitting.) +# Quoting is to prevent editors from complaining about space-tab. +as_nl=' +' +export as_nl +IFS=" "" $as_nl" + +PS1='$ ' +PS2='> ' +PS4='+ ' + +# Ensure predictable behavior from utilities with locale-dependent output. +LC_ALL=C +export LC_ALL +LANGUAGE=C +export LANGUAGE + +# We cannot yet rely on "unset" to work, but we need these variables +# to be unset--not just set to an empty or harmless value--now, to +# avoid bugs in old shells (e.g. pre-3.0 UWIN ksh). This construct +# also avoids known problems related to "unset" and subshell syntax +# in other old shells (e.g. bash 2.01 and pdksh 5.2.14). +for as_var in BASH_ENV ENV MAIL MAILPATH CDPATH +do eval test \${$as_var+y} \ + && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || : +done + +# Ensure that fds 0, 1, and 2 are open. +if (exec 3>&0) 2>/dev/null; then :; else exec 0&1) 2>/dev/null; then :; else exec 1>/dev/null; fi +if (exec 3>&2) ; then :; else exec 2>/dev/null; fi + +# The user is always right. +if ${PATH_SEPARATOR+false} :; then + PATH_SEPARATOR=: + (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && { + (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 || + PATH_SEPARATOR=';' + } +fi + + +# Find who we are. Look in the path if we contain no directory separator. +as_myself= +case $0 in #(( + *[\\/]* ) as_myself=$0 ;; + *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + test -r "$as_dir$0" && as_myself=$as_dir$0 && break + done +IFS=$as_save_IFS + + ;; +esac +# We did not find ourselves, most probably we were run as `sh COMMAND' +# in which case we are not to be found in the path. +if test "x$as_myself" = x; then + as_myself=$0 +fi +if test ! -f "$as_myself"; then + printf "%s\n" "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2 + exit 1 +fi + + +# Use a proper internal environment variable to ensure we don't fall + # into an infinite loop, continuously re-executing ourselves. + if test x"${_as_can_reexec}" != xno && test "x$CONFIG_SHELL" != x; then + _as_can_reexec=no; export _as_can_reexec; + # We cannot yet assume a decent shell, so we have to provide a +# neutralization value for shells without unset; and this also +# works around shells that cannot unset nonexistent variables. +# Preserve -v and -x to the replacement shell. +BASH_ENV=/dev/null +ENV=/dev/null +(unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV +case $- in # (((( + *v*x* | *x*v* ) as_opts=-vx ;; + *v* ) as_opts=-v ;; + *x* ) as_opts=-x ;; + * ) as_opts= ;; +esac +exec $CONFIG_SHELL $as_opts "$as_myself" ${1+"$@"} +# Admittedly, this is quite paranoid, since all the known shells bail +# out after a failed `exec'. +printf "%s\n" "$0: could not re-execute with $CONFIG_SHELL" >&2 +exit 255 + fi + # We don't want this to propagate to other subprocesses. + { _as_can_reexec=; unset _as_can_reexec;} +if test "x$CONFIG_SHELL" = x; then + as_bourne_compatible="as_nop=: +if test \${ZSH_VERSION+y} && (emulate sh) >/dev/null 2>&1 +then : + emulate sh + NULLCMD=: + # Pre-4.2 versions of Zsh do word splitting on \${1+\"\$@\"}, which + # is contrary to our usage. Disable this feature. + alias -g '\${1+\"\$@\"}'='\"\$@\"' + setopt NO_GLOB_SUBST +else \$as_nop + case \`(set -o) 2>/dev/null\` in #( + *posix*) : + set -o posix ;; #( + *) : + ;; +esac +fi +" + as_required="as_fn_return () { (exit \$1); } +as_fn_success () { as_fn_return 0; } +as_fn_failure () { as_fn_return 1; } +as_fn_ret_success () { return 0; } +as_fn_ret_failure () { return 1; } + +exitcode=0 +as_fn_success || { exitcode=1; echo as_fn_success failed.; } +as_fn_failure && { exitcode=1; echo as_fn_failure succeeded.; } +as_fn_ret_success || { exitcode=1; echo as_fn_ret_success failed.; } +as_fn_ret_failure && { exitcode=1; echo as_fn_ret_failure succeeded.; } +if ( set x; as_fn_ret_success y && test x = \"\$1\" ) +then : + +else \$as_nop + exitcode=1; echo positional parameters were not saved. +fi +test x\$exitcode = x0 || exit 1 +blah=\$(echo \$(echo blah)) +test x\"\$blah\" = xblah || exit 1 +test -x / || exit 1" + as_suggested=" as_lineno_1=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_1a=\$LINENO + as_lineno_2=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_2a=\$LINENO + eval 'test \"x\$as_lineno_1'\$as_run'\" != \"x\$as_lineno_2'\$as_run'\" && + test \"x\`expr \$as_lineno_1'\$as_run' + 1\`\" = \"x\$as_lineno_2'\$as_run'\"' || exit 1 +test \$(( 1 + 1 )) = 2 || exit 1 + + test -n \"\${ZSH_VERSION+set}\${BASH_VERSION+set}\" || ( + ECHO='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' + ECHO=\$ECHO\$ECHO\$ECHO\$ECHO\$ECHO + ECHO=\$ECHO\$ECHO\$ECHO\$ECHO\$ECHO\$ECHO + PATH=/empty FPATH=/empty; export PATH FPATH + test \"X\`printf %s \$ECHO\`\" = \"X\$ECHO\" \\ + || test \"X\`print -r -- \$ECHO\`\" = \"X\$ECHO\" ) || exit 1" + if (eval "$as_required") 2>/dev/null +then : + as_have_required=yes +else $as_nop + as_have_required=no +fi + if test x$as_have_required = xyes && (eval "$as_suggested") 2>/dev/null +then : + +else $as_nop + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +as_found=false +for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + as_found=: + case $as_dir in #( + /*) + for as_base in sh bash ksh sh5; do + # Try only shells that exist, to save several forks. + as_shell=$as_dir$as_base + if { test -f "$as_shell" || test -f "$as_shell.exe"; } && + as_run=a "$as_shell" -c "$as_bourne_compatible""$as_required" 2>/dev/null +then : + CONFIG_SHELL=$as_shell as_have_required=yes + if as_run=a "$as_shell" -c "$as_bourne_compatible""$as_suggested" 2>/dev/null +then : + break 2 +fi +fi + done;; + esac + as_found=false +done +IFS=$as_save_IFS +if $as_found +then : + +else $as_nop + if { test -f "$SHELL" || test -f "$SHELL.exe"; } && + as_run=a "$SHELL" -c "$as_bourne_compatible""$as_required" 2>/dev/null +then : + CONFIG_SHELL=$SHELL as_have_required=yes +fi +fi + + + if test "x$CONFIG_SHELL" != x +then : + export CONFIG_SHELL + # We cannot yet assume a decent shell, so we have to provide a +# neutralization value for shells without unset; and this also +# works around shells that cannot unset nonexistent variables. +# Preserve -v and -x to the replacement shell. +BASH_ENV=/dev/null +ENV=/dev/null +(unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV +case $- in # (((( + *v*x* | *x*v* ) as_opts=-vx ;; + *v* ) as_opts=-v ;; + *x* ) as_opts=-x ;; + * ) as_opts= ;; +esac +exec $CONFIG_SHELL $as_opts "$as_myself" ${1+"$@"} +# Admittedly, this is quite paranoid, since all the known shells bail +# out after a failed `exec'. +printf "%s\n" "$0: could not re-execute with $CONFIG_SHELL" >&2 +exit 255 +fi + + if test x$as_have_required = xno +then : + printf "%s\n" "$0: This script requires a shell more modern than all" + printf "%s\n" "$0: the shells that I found on your system." + if test ${ZSH_VERSION+y} ; then + printf "%s\n" "$0: In particular, zsh $ZSH_VERSION has bugs and should" + printf "%s\n" "$0: be upgraded to zsh 4.3.4 or later." + else + printf "%s\n" "$0: Please tell bug-autoconf@gnu.org and +$0: starpu-devel@inria.fr about your system, including any +$0: error possibly output before this message. Then install +$0: a modern shell, or manually run the script under such a +$0: shell if you do have one." + fi + exit 1 +fi +fi +fi +SHELL=${CONFIG_SHELL-/bin/sh} +export SHELL +# Unset more variables known to interfere with behavior of common tools. +CLICOLOR_FORCE= GREP_OPTIONS= +unset CLICOLOR_FORCE GREP_OPTIONS + +## --------------------- ## +## M4sh Shell Functions. ## +## --------------------- ## +# as_fn_unset VAR +# --------------- +# Portably unset VAR. +as_fn_unset () +{ + { eval $1=; unset $1;} +} +as_unset=as_fn_unset + + +# as_fn_set_status STATUS +# ----------------------- +# Set $? to STATUS, without forking. +as_fn_set_status () +{ + return $1 +} # as_fn_set_status + +# as_fn_exit STATUS +# ----------------- +# Exit the shell with STATUS, even in a "trap 0" or "set -e" context. +as_fn_exit () +{ + set +e + as_fn_set_status $1 + exit $1 +} # as_fn_exit +# as_fn_nop +# --------- +# Do nothing but, unlike ":", preserve the value of $?. +as_fn_nop () +{ + return $? +} +as_nop=as_fn_nop + +# as_fn_mkdir_p +# ------------- +# Create "$as_dir" as a directory, including parents if necessary. +as_fn_mkdir_p () +{ + + case $as_dir in #( + -*) as_dir=./$as_dir;; + esac + test -d "$as_dir" || eval $as_mkdir_p || { + as_dirs= + while :; do + case $as_dir in #( + *\'*) as_qdir=`printf "%s\n" "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'( + *) as_qdir=$as_dir;; + esac + as_dirs="'$as_qdir' $as_dirs" + as_dir=`$as_dirname -- "$as_dir" || +$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$as_dir" : 'X\(//\)[^/]' \| \ + X"$as_dir" : 'X\(//\)$' \| \ + X"$as_dir" : 'X\(/\)' \| . 2>/dev/null || +printf "%s\n" X"$as_dir" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + test -d "$as_dir" && break + done + test -z "$as_dirs" || eval "mkdir $as_dirs" + } || test -d "$as_dir" || as_fn_error $? "cannot create directory $as_dir" + + +} # as_fn_mkdir_p + +# as_fn_executable_p FILE +# ----------------------- +# Test if FILE is an executable regular file. +as_fn_executable_p () +{ + test -f "$1" && test -x "$1" +} # as_fn_executable_p +# as_fn_append VAR VALUE +# ---------------------- +# Append the text in VALUE to the end of the definition contained in VAR. Take +# advantage of any shell optimizations that allow amortized linear growth over +# repeated appends, instead of the typical quadratic growth present in naive +# implementations. +if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null +then : + eval 'as_fn_append () + { + eval $1+=\$2 + }' +else $as_nop + as_fn_append () + { + eval $1=\$$1\$2 + } +fi # as_fn_append + +# as_fn_arith ARG... +# ------------------ +# Perform arithmetic evaluation on the ARGs, and store the result in the +# global $as_val. Take advantage of shells that can avoid forks. The arguments +# must be portable across $(()) and expr. +if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null +then : + eval 'as_fn_arith () + { + as_val=$(( $* )) + }' +else $as_nop + as_fn_arith () + { + as_val=`expr "$@" || test $? -eq 1` + } +fi # as_fn_arith + +# as_fn_nop +# --------- +# Do nothing but, unlike ":", preserve the value of $?. +as_fn_nop () +{ + return $? +} +as_nop=as_fn_nop + +# as_fn_error STATUS ERROR [LINENO LOG_FD] +# ---------------------------------------- +# Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are +# provided, also output the error to LOG_FD, referencing LINENO. Then exit the +# script with STATUS, using 1 if that was 0. +as_fn_error () +{ + as_status=$1; test $as_status -eq 0 && as_status=1 + if test "$4"; then + as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: $2" >&$4 + fi + printf "%s\n" "$as_me: error: $2" >&2 + as_fn_exit $as_status +} # as_fn_error + +if expr a : '\(a\)' >/dev/null 2>&1 && + test "X`expr 00001 : '.*\(...\)'`" = X001; then + as_expr=expr +else + as_expr=false +fi + +if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then + as_basename=basename +else + as_basename=false +fi + +if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then + as_dirname=dirname +else + as_dirname=false +fi + +as_me=`$as_basename -- "$0" || +$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ + X"$0" : 'X\(//\)$' \| \ + X"$0" : 'X\(/\)' \| . 2>/dev/null || +printf "%s\n" X/"$0" | + sed '/^.*\/\([^/][^/]*\)\/*$/{ + s//\1/ + q + } + /^X\/\(\/\/\)$/{ + s//\1/ + q + } + /^X\/\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + +# Avoid depending upon Character Ranges. +as_cr_letters='abcdefghijklmnopqrstuvwxyz' +as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' +as_cr_Letters=$as_cr_letters$as_cr_LETTERS +as_cr_digits='0123456789' +as_cr_alnum=$as_cr_Letters$as_cr_digits + + + as_lineno_1=$LINENO as_lineno_1a=$LINENO + as_lineno_2=$LINENO as_lineno_2a=$LINENO + eval 'test "x$as_lineno_1'$as_run'" != "x$as_lineno_2'$as_run'" && + test "x`expr $as_lineno_1'$as_run' + 1`" = "x$as_lineno_2'$as_run'"' || { + # Blame Lee E. McMahon (1931-1989) for sed's syntax. :-) + sed -n ' + p + /[$]LINENO/= + ' <$as_myself | + sed ' + s/[$]LINENO.*/&-/ + t lineno + b + :lineno + N + :loop + s/[$]LINENO\([^'$as_cr_alnum'_].*\n\)\(.*\)/\2\1\2/ + t loop + s/-\n.*// + ' >$as_me.lineno && + chmod +x "$as_me.lineno" || + { printf "%s\n" "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2; as_fn_exit 1; } + + # If we had to re-execute with $CONFIG_SHELL, we're ensured to have + # already done that, so ensure we don't try to do so again and fall + # in an infinite loop. This has already happened in practice. + _as_can_reexec=no; export _as_can_reexec + # Don't try to exec as it changes $[0], causing all sort of problems + # (the dirname of $[0] is not the place where we might find the + # original and so on. Autoconf is especially sensitive to this). + . "./$as_me.lineno" + # Exit status is that of the last command. + exit +} + + +# Determine whether it's possible to make 'echo' print without a newline. +# These variables are no longer used directly by Autoconf, but are AC_SUBSTed +# for compatibility with existing Makefiles. +ECHO_C= ECHO_N= ECHO_T= +case `echo -n x` in #((((( +-n*) + case `echo 'xy\c'` in + *c*) ECHO_T=' ';; # ECHO_T is single tab character. + xy) ECHO_C='\c';; + *) echo `echo ksh88 bug on AIX 6.1` > /dev/null + ECHO_T=' ';; + esac;; +*) + ECHO_N='-n';; +esac + +# For backward compatibility with old third-party macros, we provide +# the shell variables $as_echo and $as_echo_n. New code should use +# AS_ECHO(["message"]) and AS_ECHO_N(["message"]), respectively. +as_echo='printf %s\n' +as_echo_n='printf %s' + + +rm -f conf$$ conf$$.exe conf$$.file +if test -d conf$$.dir; then + rm -f conf$$.dir/conf$$.file +else + rm -f conf$$.dir + mkdir conf$$.dir 2>/dev/null +fi +if (echo >conf$$.file) 2>/dev/null; then + if ln -s conf$$.file conf$$ 2>/dev/null; then + as_ln_s='ln -s' + # ... but there are two gotchas: + # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail. + # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable. + # In both cases, we have to default to `cp -pR'. + ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe || + as_ln_s='cp -pR' + elif ln conf$$.file conf$$ 2>/dev/null; then + as_ln_s=ln + else + as_ln_s='cp -pR' + fi +else + as_ln_s='cp -pR' +fi +rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file +rmdir conf$$.dir 2>/dev/null + +if mkdir -p . 2>/dev/null; then + as_mkdir_p='mkdir -p "$as_dir"' +else + test -d ./-p && rmdir ./-p + as_mkdir_p=false +fi + +as_test_x='test -x' +as_executable_p=as_fn_executable_p + +# Sed expression to map a string onto a valid CPP name. +as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'" + +# Sed expression to map a string onto a valid variable name. +as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'" + +SHELL=${CONFIG_SHELL-/bin/sh} + + +test -n "$DJDIR" || exec 7<&0 &1 + +# Name of the host. +# hostname on some systems (SVR3.2, old GNU/Linux) returns a bogus exit status, +# so uname gets run too. +ac_hostname=`(hostname || uname -n) 2>/dev/null | sed 1q` + +# +# Initializations. +# +ac_default_prefix=/usr/local +ac_clean_files= +ac_config_libobj_dir=. +LIBOBJS= +cross_compiling=no +subdirs= +MFLAGS= +MAKEFLAGS= + +# Identity of this package. +PACKAGE_NAME='StarPU' +PACKAGE_TARNAME='starpu' +PACKAGE_VERSION='1.4.10' +PACKAGE_STRING='StarPU 1.4.10' +PACKAGE_BUGREPORT='starpu-devel@inria.fr' +PACKAGE_URL='http://gitlab.inria.fr/starpu/starpu' + +ac_unique_file="include/starpu.h" +# Factoring default headers for most tests. +ac_includes_default="\ +#include +#ifdef HAVE_STDIO_H +# include +#endif +#ifdef HAVE_STDLIB_H +# include +#endif +#ifdef HAVE_STRING_H +# include +#endif +#ifdef HAVE_INTTYPES_H +# include +#endif +#ifdef HAVE_STDINT_H +# include +#endif +#ifdef HAVE_STRINGS_H +# include +#endif +#ifdef HAVE_SYS_TYPES_H +# include +#endif +#ifdef HAVE_SYS_STAT_H +# include +#endif +#ifdef HAVE_UNISTD_H +# include +#endif" + +ac_header_c_list= +ac_func_c_list= +ac_subst_vars='am__EXEEXT_FALSE +am__EXEEXT_TRUE +LTLIBOBJS +LIBOBJS +STARPU_SANITIZE_FALSE +STARPU_SANITIZE_TRUE +SOCL_VENDORS +LIBSTARPU_LINK +STARPUPY_EXTRA_LINK_ARGS +STARPU_EXPORTED_LIBS +LIBSTARPU_LDFLAGS +STARPU_NVCC_H_CPPFLAGS +STARPU_H_CPPFLAGS +STARPU_OPTION_LIBS +STARPU_MODULE_LIBS +STARPU_LIB_PATH +STARPU_INCLUDE_PATH +ECLIPSE +STARPU_BUILD_ECLIPSE_PLUGIN_FALSE +STARPU_BUILD_ECLIPSE_PLUGIN_TRUE +eclipsepath +JULIA +STARPU_USE_JULIA_FALSE +STARPU_USE_JULIA_TRUE +juliapath +DOC_GENERATE_LATEX +STARPU_AVAILABLE_DOC_PDF_FALSE +STARPU_AVAILABLE_DOC_PDF_TRUE +STARPU_BUILD_DOC_PDF_FALSE +STARPU_BUILD_DOC_PDF_TRUE +STARPU_AVAILABLE_DOC_FALSE +STARPU_AVAILABLE_DOC_TRUE +STARPU_BUILD_DOC_FALSE +STARPU_BUILD_DOC_TRUE +epstopdfcommand +pdflatexcommand +doxygencommand +STARPU_STARPUPY_NUMPY_FALSE +STARPU_STARPUPY_NUMPY_TRUE +STARPU_BUILD_STARPUPY_FALSE +STARPU_BUILD_STARPUPY_TRUE +PYTHON_SETUP_OPTIONS +PYTHON_NUMPY_DIR +PYTHON_VERSION +PYTHON +STARPU_BUILD_STARPURM_EXAMPLES_FALSE +STARPU_BUILD_STARPURM_EXAMPLES_TRUE +STARPU_BUILD_STARPURM_FALSE +STARPU_BUILD_STARPURM_TRUE +STARPURM_HAVE_DLB_FALSE +STARPURM_HAVE_DLB_TRUE +DLB_LIBS +DLB_CFLAGS +STARPU_HAVE_AM111_FALSE +STARPU_HAVE_AM111_TRUE +STARPU_HAVE_HELP2MAN_FALSE +STARPU_HAVE_HELP2MAN_TRUE +HELP2MAN +STARPU_HAVE_ICC_FALSE +STARPU_HAVE_ICC_TRUE +ICC +ICC_ARGS +STARPU_HAVE_F77_H_FALSE +STARPU_HAVE_F77_H_TRUE +STARPU_HAVE_F77_H +HWLOC_REQUIRES +STARPU_HWLOC_HAVE_TOPOLOGY_DUP_FALSE +STARPU_HWLOC_HAVE_TOPOLOGY_DUP_TRUE +STARPU_HAVE_HWLOC +STARPU_HAVE_HWLOC_FALSE +STARPU_HAVE_HWLOC_TRUE +HWLOC_LIBS +HWLOC_CFLAGS +STARPU_BUILD_STARPUFFT_EXAMPLES_FALSE +STARPU_BUILD_STARPUFFT_EXAMPLES_TRUE +STARPU_BUILD_STARPUFFT_FALSE +STARPU_BUILD_STARPUFFT_TRUE +STARPU_HAVE_FFTWL_FALSE +STARPU_HAVE_FFTWL_TRUE +HAVE_FFTWFL +FFTWL_LIBS +FFTWL_CFLAGS +STARPU_HAVE_FFTWF_FALSE +STARPU_HAVE_FFTWF_TRUE +STARPU_HAVE_FFTWF +FFTWF_LIBS +FFTWF_CFLAGS +STARPU_HAVE_FFTW_FALSE +STARPU_HAVE_FFTW_TRUE +STARPU_HAVE_FFTW +FFTW_LIBS +FFTW_CFLAGS +STARPU_USE_MIN_DGELS_FALSE +STARPU_USE_MIN_DGELS_TRUE +DGELS_LIBS +STARPU_LAPACK_LDFLAGS +BLAS_LIB +STARPU_NO_BLAS_LIB_FALSE +STARPU_NO_BLAS_LIB_TRUE +STARPU_SYSTEM_BLAS_LIB_FALSE +STARPU_SYSTEM_BLAS_LIB_TRUE +STARPU_MKL_BLAS_LIB_FALSE +STARPU_MKL_BLAS_LIB_TRUE +STARPU_GOTO_BLAS_LIB_FALSE +STARPU_GOTO_BLAS_LIB_TRUE +STARPU_ATLAS_BLAS_LIB_FALSE +STARPU_ATLAS_BLAS_LIB_TRUE +STARPU_HAVE_CBLAS_SGEMV_FALSE +STARPU_HAVE_CBLAS_SGEMV_TRUE +STARPU_HAVE_LIBLAPACK_FALSE +STARPU_HAVE_LIBLAPACK_TRUE +STARPU_LIBLAPACK_LDFLAGS +STARPU_HAVE_CBLAS_H_FALSE +STARPU_HAVE_CBLAS_H_TRUE +BLAS_LIBS +BLAS_OPENBLAS_LIBS +BLAS_OPENBLAS_CFLAGS +STARPU_OPENBLAS +STARPU_OPENBLAS_LDFLAGS +OPENBLAS_LIBS +OPENBLAS_CFLAGS +ATLASDIR +STARPU_BLAS_LDFLAGS +GOTODIR +STARPU_HAVE_X11_FALSE +STARPU_HAVE_X11_TRUE +X_EXTRA_LIBS +X_LIBS +X_PRE_LIBS +X_CFLAGS +XMKMF +STARPU_HAVE_OPENGL_FALSE +STARPU_HAVE_OPENGL_TRUE +STARPU_OPENGL_RENDER +STARPU_OPENGL_RENDER_LDFLAGS +STARPU_BUILD_EXAMPLES_FALSE +STARPU_BUILD_EXAMPLES_TRUE +STARPU_BUILD_TESTS_FALSE +STARPU_BUILD_TESTS_TRUE +GDB +SOCL_OCL_LIB_OPENCL_DIR +STARPU_USE_SOCL_FALSE +STARPU_USE_SOCL_TRUE +STARPU_BUILD_SOCL_FALSE +STARPU_BUILD_SOCL_TRUE +STARPU_HAVE_OPENMP_FALSE +STARPU_HAVE_OPENMP_TRUE +STARPU_OPENMP_FALSE +STARPU_OPENMP_TRUE +STARPU_OPENMP_LLVM_FALSE +STARPU_OPENMP_LLVM_TRUE +STARPU_PARALLEL_WORKER_FALSE +STARPU_PARALLEL_WORKER_TRUE +OPENMP_CFLAGS +pkglibdir +GLOBAL_AM_FCFLAGS +GLOBAL_AM_FFLAGS +GLOBAL_AM_CXXFLAGS +GLOBAL_AM_CFLAGS +STARPU_DEVEL_FALSE +STARPU_DEVEL_TRUE +STARPU_EXPORT_DYNAMIC +STARPU_USE_MP_FALSE +STARPU_USE_MP_TRUE +STARPU_HAVE_LEVELDB_FALSE +STARPU_HAVE_LEVELDB_TRUE +STARPU_LEVELDB_LDFLAGS +STARPU_FXT_EVENT_DEFINES +STARPU_USE_AYUDAME2_FALSE +STARPU_USE_AYUDAME2_TRUE +STARPU_USE_AYUDAME1_FALSE +STARPU_USE_AYUDAME1_TRUE +STARPU_GLPK_LDFLAGS +STARPU_PERF_DEBUG +PAPI_LIBS +PAPI_CFLAGS +STARPU_USE_FXT_FALSE +STARPU_USE_FXT_TRUE +STARPU_USE_FXT +POTI_LIBS +POTI_CFLAGS +FXT_LDFLAGS +FXT_LIBS +FXT_CFLAGS +FXTDIR +STARPU_COVERITY_FALSE +STARPU_COVERITY_TRUE +STARPU_COVERAGE_ENABLED_FALSE +STARPU_COVERAGE_ENABLED_TRUE +COVERAGE +STARPU_HAVE_MPIFORT_FALSE +STARPU_HAVE_MPIFORT_TRUE +STARPU_HAVE_F77_FALSE +STARPU_HAVE_F77_TRUE +STARPU_HAVE_FC_FALSE +STARPU_HAVE_FC_TRUE +MPIFORT +mpifort_path +STARPU_USE_MAX_FPGA_FALSE +STARPU_USE_MAX_FPGA_TRUE +STARPU_USE_MAX_FPGA +SLIC_CONFIG +STARPU_OPENCL_LDFLAGS +STARPU_OPENCL_CPPFLAGS +STARPU_OPENCL_DATAdir +STARPU_USE_OPENCL_FALSE +STARPU_USE_OPENCL_TRUE +STARPU_USE_OPENCL +HIPCCFLAGS +STARPU_USE_HIP_FALSE +STARPU_USE_HIP_TRUE +STARPU_HIP_CPPFLAGS +STARPU_HIP_LDFLAGS +HIPCC +STARPU_USE_HIPBLAS_FALSE +STARPU_USE_HIPBLAS_TRUE +STARPU_USE_HIPBLAS +HIPCONFIG +NVCCFLAGS +NVCC_CC +STARPU_USE_CUDA1_FALSE +STARPU_USE_CUDA1_TRUE +STARPU_USE_CUDA0_FALSE +STARPU_USE_CUDA0_TRUE +STARPU_CUDA_CPPFLAGS +STARPU_CUFFT_LDFLAGS +STARPU_CUDA_LDFLAGS +STARPU_CURAND_LDFLAGS +STARPU_HAVE_CUFFTDOUBLECOMPLEX_FALSE +STARPU_HAVE_CUFFTDOUBLECOMPLEX_TRUE +STARPU_HAVE_MAGMA_FALSE +STARPU_HAVE_MAGMA_TRUE +STARPU_HAVE_MAGMA +MAGMA_LIBS +MAGMA_CFLAGS +CC_OR_NVCC +STARPU_CUDA_FORTRAN_LDFLAGS +STARPU_USE_CUDA_FALSE +STARPU_USE_CUDA_TRUE +STARPU_USE_CUDA +NVCC +STARPU_USE_CPU_FALSE +STARPU_USE_CPU_TRUE +STARPU_USE_CPU +STARPU_SC_HYPERVISOR_DEBUG_FALSE +STARPU_SC_HYPERVISOR_DEBUG_TRUE +STARPU_SC_HYPERVISOR_DEBUG +STARPU_USE_SC_HYPERVISOR_FALSE +STARPU_USE_SC_HYPERVISOR_TRUE +STARPU_BUILD_SC_HYPERVISOR_FALSE +STARPU_BUILD_SC_HYPERVISOR_TRUE +STARPU_SC_HYPERVISOR +STARPU_LIBNUMA_LDFLAGS +STARPU_HAVE_HDF5_FALSE +STARPU_HAVE_HDF5_TRUE +STARPU_HDF5_LDFLAGS +STARPU_NEW_CHECK_FALSE +STARPU_NEW_CHECK_TRUE +STARPU_LONG_CHECK_FALSE +STARPU_LONG_CHECK_TRUE +STARPU_QUICK_CHECK_FALSE +STARPU_QUICK_CHECK_TRUE +STARPU_SRC_DIR +STARPU_BUILD_DIR +STARPU_MS_LIB_ARCH +STARPU_OPENBSD_SYS_FALSE +STARPU_OPENBSD_SYS_TRUE +STARPU_HAVE_DARWIN_FALSE +STARPU_HAVE_DARWIN_TRUE +STARPU_LINUX_SYS_FALSE +STARPU_LINUX_SYS_TRUE +STARPU_HAVE_WINDOWS_FALSE +STARPU_HAVE_WINDOWS_TRUE +STARPU_HAVE_MS_LIB_FALSE +STARPU_HAVE_MS_LIB_TRUE +STARPU_MS_LIB +hwloccalccommand +MPICC_LDFLAGS +MPIEXEC_ARGS +STARPU_USE_MPI_FT_STATS_FALSE +STARPU_USE_MPI_FT_STATS_TRUE +STARPU_USE_MPI_FT_FALSE +STARPU_USE_MPI_FT_TRUE +STARPU_USE_MPI_FALSE +STARPU_USE_MPI_TRUE +STARPU_USE_MPI_NMAD_FALSE +STARPU_USE_MPI_NMAD_TRUE +STARPU_USE_MPI_MPI_FALSE +STARPU_USE_MPI_MPI_TRUE +STARPU_MPI_SYNC_CLOCKS_FALSE +STARPU_MPI_SYNC_CLOCKS_TRUE +MPI_SYNC_CLOCKS_LIBS +MPI_SYNC_CLOCKS_CFLAGS +STARPU_MPI_CHECK_FALSE +STARPU_MPI_CHECK_TRUE +STARPU_USE_TCPIP_MASTER_SLAVE_FALSE +STARPU_USE_TCPIP_MASTER_SLAVE_TRUE +STARPU_USE_MPI_MASTER_SLAVE_FALSE +STARPU_USE_MPI_MASTER_SLAVE_TRUE +NMAD_LIBS +NMAD_CFLAGS +CC_OR_MPICC +STARPU_MPI_MINIMAL_TESTS_FALSE +STARPU_MPI_MINIMAL_TESTS_TRUE +MPIEXEC +mpiexec_path +MPICXX +mpicxx_path +MPICC +mpicc_path +STARPU_CROSS_COMPILING_FALSE +STARPU_CROSS_COMPILING_TRUE +gitcommand +REALBASH +CXXCPP +LT_SYS_LIBRARY_PATH +OTOOL64 +OTOOL +LIPO +NMEDIT +DSYMUTIL +MANIFEST_TOOL +RANLIB +FILECMD +NM +ac_ct_DUMPBIN +DUMPBIN +LD +FGREP +LIBTOOL +OBJDUMP +DLLTOOL +AS +STARPU_HAVE_CXX11_FALSE +STARPU_HAVE_CXX11_TRUE +STARPU_HAVE_CXX11 +HAVE_CXX11 +SIMGRID_LDFLAGS +STARPU_SIMGRID_FALSE +STARPU_SIMGRID_TRUE +STARPU_SIMGRID_MC_FALSE +STARPU_SIMGRID_MC_TRUE +SIMGRID_MC +SIMGRID_LIBS +SIMGRID_CFLAGS +STARPU_BUBBLE_FALSE +STARPU_BUBBLE_TRUE +APP_FCFLAGS +APP_FFLAGS +APP_CXXFLAGS +APP_CFLAGS +PKG_CONFIG +HAVE_PARALLEL_FALSE +HAVE_PARALLEL_TRUE +PARALLEL +PROG_CLANG +PROG_FIND +PROG_DATE +PROG_STAT +EGREP +GREP +ac_ct_FC +FCFLAGS +FC +ac_ct_F77 +FFLAGS +F77 +LN_S +SED +CPP +am__fastdepCXX_FALSE +am__fastdepCXX_TRUE +CXXDEPMODE +ac_ct_CXX +CXXFLAGS +CXX +am__fastdepCC_FALSE +am__fastdepCC_TRUE +CCDEPMODE +am__nodep +AMDEPBACKSLASH +AMDEP_FALSE +AMDEP_TRUE +am__include +DEPDIR +OBJEXT +EXEEXT +ac_ct_CC +CPPFLAGS +LDFLAGS +CFLAGS +CC +ac_ct_AR +AR +AM_BACKSLASH +AM_DEFAULT_VERBOSITY +AM_DEFAULT_V +AM_V +CSCOPE +ETAGS +CTAGS +am__untar +am__tar +AMTAR +am__leading_dot +SET_MAKE +AWK +mkdir_p +MKDIR_P +INSTALL_STRIP_PROGRAM +STRIP +install_sh +MAKEINFO +AUTOHEADER +AUTOMAKE +AUTOCONF +ACLOCAL +VERSION +PACKAGE +CYGPATH_W +am__isrc +INSTALL_DATA +INSTALL_SCRIPT +INSTALL_PROGRAM +target_os +target_vendor +target_cpu +target +host_os +host_vendor +host_cpu +host +build_os +build_vendor +build_cpu +build +LIBSTARPUJULIA_INTERFACE_AGE +LIBSTARPUJULIA_INTERFACE_REVISION +LIBSTARPUJULIA_INTERFACE_CURRENT +LIBSOCL_INTERFACE_AGE +LIBSOCL_INTERFACE_REVISION +LIBSOCL_INTERFACE_CURRENT +LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE +LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION +LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT +LIBSTARPURM_INTERFACE_AGE +LIBSTARPURM_INTERFACE_REVISION +LIBSTARPURM_INTERFACE_CURRENT +LIBSTARPUFFT_INTERFACE_AGE +LIBSTARPUFFT_INTERFACE_REVISION +LIBSTARPUFFT_INTERFACE_CURRENT +LIBSTARPUMPI_INTERFACE_AGE +LIBSTARPUMPI_INTERFACE_REVISION +LIBSTARPUMPI_INTERFACE_CURRENT +LIBSTARPU_INTERFACE_AGE +LIBSTARPU_INTERFACE_REVISION +LIBSTARPU_INTERFACE_CURRENT +STARPU_EFFECTIVE_VERSION +STARPU_RELEASE_VERSION +STARPU_MINOR_VERSION +STARPU_MAJOR_VERSION +target_alias +host_alias +build_alias +LIBS +ECHO_T +ECHO_N +ECHO_C +DEFS +mandir +localedir +libdir +psdir +pdfdir +dvidir +htmldir +infodir +docdir +oldincludedir +includedir +runstatedir +localstatedir +sharedstatedir +sysconfdir +datadir +datarootdir +libexecdir +sbindir +bindir +program_transform_name +prefix +exec_prefix +PACKAGE_URL +PACKAGE_BUGREPORT +PACKAGE_STRING +PACKAGE_VERSION +PACKAGE_TARNAME +PACKAGE_NAME +PATH_SEPARATOR +SHELL +am__quote' +ac_subst_files='' +ac_user_opts=' +enable_option_checking +enable_silent_rules +enable_dependency_tracking +enable_simgrid +enable_starpupy +enable_prof_tool +enable_bubble +enable_bubble_verbose +enable_opencl_simulator +with_simgrid_dir +with_simgrid_include_dir +with_simgrid_lib_dir +enable_simgrid_mc +enable_blocking_drivers +enable_worker_callbacks +enable_shared +enable_static +with_pic +enable_fast_install +with_aix_soname +with_gnu_ld +with_sysroot +enable_libtool_lock +with_mpicc +with_mpicxx +with_smpirun +with_mpiexec +enable_mpi +enable_mpi_minimal_tests +enable_nmad +enable_mpi_master_slave +enable_maxmpidev +enable_tcpip_master_slave +enable_maxtcpipdev +enable_mpi_pedantic_isend +enable_mpi_check +enable_mpi_ft +enable_mpi_ft_stats +with_mpiexec_args +enable_mpi_verbose +enable_maxnumanodes +enable_native_winthreads +enable_default_drand48 +enable_quick_check +enable_long_check +enable_new_check +enable_valgrind +enable_hdf5 +with_hdf5_include_dir +with_hdf5_lib_dir +enable_max_sched_ctxs +enable_sc_hypervisor +enable_sc_hypervisor_debug +enable_maxcpus +enable_cpu +enable_maxcudadev +enable_cuda +with_cuda_dir +with_cuda_include_dir +with_cuda_lib_dir +enable_cuda_memcpy_peer +enable_cuda_map +enable_cuda0 +enable_cuda1 +enable_maxhipdev +enable_hip +with_hipblas +enable_hip_memcpy_peer +enable_maxopencldev +enable_opencl +with_opencl_dir +with_opencl_include_dir +with_opencl_lib_dir +enable_maxmaxfpgadev +enable_max_fpga +enable_asynchronous_copy +enable_asynchronous_cuda_copy +enable_asynchronous_opencl_copy +enable_asynchronous_mpi_master_slave_copy +enable_asynchronous_tcpip_master_slave_copy +enable_asynchronous_max_fpga_copy +enable_fortran +with_mpifort +enable_debug +enable_spinlock_check +enable_fstack_protector_all +enable_gdb +enable_full_gdb_information +enable_fast +enable_verbose +enable_coverage +enable_coverity +enable_fxt +with_fxt +enable_poti +enable_fxt_lock +enable_papi +enable_perf_debug +enable_model_debug +enable_memory_stats +enable_glpk +with_ayudame1_include_dir +with_ayudame2_include_dir +enable_ayudame1 +enable_ayudame2 +enable_data_locality_enforce +enable_maxbuffers +enable_fxt_max_files +enable_maxnodes +enable_allocation_cache +with_perf_model_dir +enable_maximplementations +enable_leveldb +enable_calibration_heuristic +enable_export_dynamic +with_check_flags +enable_parallel_worker +enable_openmp +enable_openmp_llvm +enable_socl +enable_build_tests +enable_build_examples +enable_opengl_render +with_x +enable_blas_lib +with_goto_dir +with_atlas_dir +with_mkl_cflags +with_mkl_ldflags +with_armpl_cflags +with_armpl_ldflags +enable_mlr +enable_mlr_system_blas +enable_starpufft +enable_starpufft_examples +with_hwloc +with_icc +with_icc_args +enable_icc +enable_starpurm +enable_starpurm_verbose +enable_dlb +with_dlb_include_dir +with_dlb_lib_dir +enable_starpurm_dlb_verbose +enable_starpurm_examples +enable_build_doc +enable_build_doc_pdf +enable_julia +enable_eclipse_plugin +' + ac_precious_vars='build_alias +host_alias +target_alias +CC +CFLAGS +LDFLAGS +LIBS +CPPFLAGS +CXX +CXXFLAGS +CCC +CPP +F77 +FFLAGS +FC +FCFLAGS +PKG_CONFIG +SIMGRID_CFLAGS +SIMGRID_LIBS +LT_SYS_LIBRARY_PATH +CXXCPP +NMAD_CFLAGS +NMAD_LIBS +MPI_SYNC_CLOCKS_CFLAGS +MPI_SYNC_CLOCKS_LIBS +STARPU_MS_LIB +MAGMA_CFLAGS +MAGMA_LIBS +NVCC +NVCC_CC +NVCCFLAGS +HIPCCFLAGS +FXT_CFLAGS +FXT_LIBS +FXT_LDFLAGS +POTI_CFLAGS +POTI_LIBS +PAPI_CFLAGS +PAPI_LIBS +XMKMF +OPENBLAS_CFLAGS +OPENBLAS_LIBS +BLAS_OPENBLAS_CFLAGS +BLAS_OPENBLAS_LIBS +BLAS_LIBS +DGELS_LIBS +FFTW_CFLAGS +FFTW_LIBS +FFTWF_CFLAGS +FFTWF_LIBS +FFTWL_CFLAGS +FFTWL_LIBS +HWLOC_CFLAGS +HWLOC_LIBS +PYTHON' + + +# Initialize some variables set by options. +ac_init_help= +ac_init_version=false +ac_unrecognized_opts= +ac_unrecognized_sep= +# The variables have the same names as the options, with +# dashes changed to underlines. +cache_file=/dev/null +exec_prefix=NONE +no_create= +no_recursion= +prefix=NONE +program_prefix=NONE +program_suffix=NONE +program_transform_name=s,x,x, +silent= +site= +srcdir= +verbose= +x_includes=NONE +x_libraries=NONE + +# Installation directory options. +# These are left unexpanded so users can "make install exec_prefix=/foo" +# and all the variables that are supposed to be based on exec_prefix +# by default will actually change. +# Use braces instead of parens because sh, perl, etc. also accept them. +# (The list follows the same order as the GNU Coding Standards.) +bindir='${exec_prefix}/bin' +sbindir='${exec_prefix}/sbin' +libexecdir='${exec_prefix}/libexec' +datarootdir='${prefix}/share' +datadir='${datarootdir}' +sysconfdir='${prefix}/etc' +sharedstatedir='${prefix}/com' +localstatedir='${prefix}/var' +runstatedir='${localstatedir}/run' +includedir='${prefix}/include' +oldincludedir='/usr/include' +docdir='${datarootdir}/doc/${PACKAGE_TARNAME}' +infodir='${datarootdir}/info' +htmldir='${docdir}' +dvidir='${docdir}' +pdfdir='${docdir}' +psdir='${docdir}' +libdir='${exec_prefix}/lib' +localedir='${datarootdir}/locale' +mandir='${datarootdir}/man' + +ac_prev= +ac_dashdash= +for ac_option +do + # If the previous option needs an argument, assign it. + if test -n "$ac_prev"; then + eval $ac_prev=\$ac_option + ac_prev= + continue + fi + + case $ac_option in + *=?*) ac_optarg=`expr "X$ac_option" : '[^=]*=\(.*\)'` ;; + *=) ac_optarg= ;; + *) ac_optarg=yes ;; + esac + + case $ac_dashdash$ac_option in + --) + ac_dashdash=yes ;; + + -bindir | --bindir | --bindi | --bind | --bin | --bi) + ac_prev=bindir ;; + -bindir=* | --bindir=* | --bindi=* | --bind=* | --bin=* | --bi=*) + bindir=$ac_optarg ;; + + -build | --build | --buil | --bui | --bu) + ac_prev=build_alias ;; + -build=* | --build=* | --buil=* | --bui=* | --bu=*) + build_alias=$ac_optarg ;; + + -cache-file | --cache-file | --cache-fil | --cache-fi \ + | --cache-f | --cache- | --cache | --cach | --cac | --ca | --c) + ac_prev=cache_file ;; + -cache-file=* | --cache-file=* | --cache-fil=* | --cache-fi=* \ + | --cache-f=* | --cache-=* | --cache=* | --cach=* | --cac=* | --ca=* | --c=*) + cache_file=$ac_optarg ;; + + --config-cache | -C) + cache_file=config.cache ;; + + -datadir | --datadir | --datadi | --datad) + ac_prev=datadir ;; + -datadir=* | --datadir=* | --datadi=* | --datad=*) + datadir=$ac_optarg ;; + + -datarootdir | --datarootdir | --datarootdi | --datarootd | --dataroot \ + | --dataroo | --dataro | --datar) + ac_prev=datarootdir ;; + -datarootdir=* | --datarootdir=* | --datarootdi=* | --datarootd=* \ + | --dataroot=* | --dataroo=* | --dataro=* | --datar=*) + datarootdir=$ac_optarg ;; + + -disable-* | --disable-*) + ac_useropt=`expr "x$ac_option" : 'x-*disable-\(.*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && + as_fn_error $? "invalid feature name: \`$ac_useropt'" + ac_useropt_orig=$ac_useropt + ac_useropt=`printf "%s\n" "$ac_useropt" | sed 's/[-+.]/_/g'` + case $ac_user_opts in + *" +"enable_$ac_useropt" +"*) ;; + *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--disable-$ac_useropt_orig" + ac_unrecognized_sep=', ';; + esac + eval enable_$ac_useropt=no ;; + + -docdir | --docdir | --docdi | --doc | --do) + ac_prev=docdir ;; + -docdir=* | --docdir=* | --docdi=* | --doc=* | --do=*) + docdir=$ac_optarg ;; + + -dvidir | --dvidir | --dvidi | --dvid | --dvi | --dv) + ac_prev=dvidir ;; + -dvidir=* | --dvidir=* | --dvidi=* | --dvid=* | --dvi=* | --dv=*) + dvidir=$ac_optarg ;; + + -enable-* | --enable-*) + ac_useropt=`expr "x$ac_option" : 'x-*enable-\([^=]*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && + as_fn_error $? "invalid feature name: \`$ac_useropt'" + ac_useropt_orig=$ac_useropt + ac_useropt=`printf "%s\n" "$ac_useropt" | sed 's/[-+.]/_/g'` + case $ac_user_opts in + *" +"enable_$ac_useropt" +"*) ;; + *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--enable-$ac_useropt_orig" + ac_unrecognized_sep=', ';; + esac + eval enable_$ac_useropt=\$ac_optarg ;; + + -exec-prefix | --exec_prefix | --exec-prefix | --exec-prefi \ + | --exec-pref | --exec-pre | --exec-pr | --exec-p | --exec- \ + | --exec | --exe | --ex) + ac_prev=exec_prefix ;; + -exec-prefix=* | --exec_prefix=* | --exec-prefix=* | --exec-prefi=* \ + | --exec-pref=* | --exec-pre=* | --exec-pr=* | --exec-p=* | --exec-=* \ + | --exec=* | --exe=* | --ex=*) + exec_prefix=$ac_optarg ;; + + -gas | --gas | --ga | --g) + # Obsolete; use --with-gas. + with_gas=yes ;; + + -help | --help | --hel | --he | -h) + ac_init_help=long ;; + -help=r* | --help=r* | --hel=r* | --he=r* | -hr*) + ac_init_help=recursive ;; + -help=s* | --help=s* | --hel=s* | --he=s* | -hs*) + ac_init_help=short ;; + + -host | --host | --hos | --ho) + ac_prev=host_alias ;; + -host=* | --host=* | --hos=* | --ho=*) + host_alias=$ac_optarg ;; + + -htmldir | --htmldir | --htmldi | --htmld | --html | --htm | --ht) + ac_prev=htmldir ;; + -htmldir=* | --htmldir=* | --htmldi=* | --htmld=* | --html=* | --htm=* \ + | --ht=*) + htmldir=$ac_optarg ;; + + -includedir | --includedir | --includedi | --included | --include \ + | --includ | --inclu | --incl | --inc) + ac_prev=includedir ;; + -includedir=* | --includedir=* | --includedi=* | --included=* | --include=* \ + | --includ=* | --inclu=* | --incl=* | --inc=*) + includedir=$ac_optarg ;; + + -infodir | --infodir | --infodi | --infod | --info | --inf) + ac_prev=infodir ;; + -infodir=* | --infodir=* | --infodi=* | --infod=* | --info=* | --inf=*) + infodir=$ac_optarg ;; + + -libdir | --libdir | --libdi | --libd) + ac_prev=libdir ;; + -libdir=* | --libdir=* | --libdi=* | --libd=*) + libdir=$ac_optarg ;; + + -libexecdir | --libexecdir | --libexecdi | --libexecd | --libexec \ + | --libexe | --libex | --libe) + ac_prev=libexecdir ;; + -libexecdir=* | --libexecdir=* | --libexecdi=* | --libexecd=* | --libexec=* \ + | --libexe=* | --libex=* | --libe=*) + libexecdir=$ac_optarg ;; + + -localedir | --localedir | --localedi | --localed | --locale) + ac_prev=localedir ;; + -localedir=* | --localedir=* | --localedi=* | --localed=* | --locale=*) + localedir=$ac_optarg ;; + + -localstatedir | --localstatedir | --localstatedi | --localstated \ + | --localstate | --localstat | --localsta | --localst | --locals) + ac_prev=localstatedir ;; + -localstatedir=* | --localstatedir=* | --localstatedi=* | --localstated=* \ + | --localstate=* | --localstat=* | --localsta=* | --localst=* | --locals=*) + localstatedir=$ac_optarg ;; + + -mandir | --mandir | --mandi | --mand | --man | --ma | --m) + ac_prev=mandir ;; + -mandir=* | --mandir=* | --mandi=* | --mand=* | --man=* | --ma=* | --m=*) + mandir=$ac_optarg ;; + + -nfp | --nfp | --nf) + # Obsolete; use --without-fp. + with_fp=no ;; + + -no-create | --no-create | --no-creat | --no-crea | --no-cre \ + | --no-cr | --no-c | -n) + no_create=yes ;; + + -no-recursion | --no-recursion | --no-recursio | --no-recursi \ + | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r) + no_recursion=yes ;; + + -oldincludedir | --oldincludedir | --oldincludedi | --oldincluded \ + | --oldinclude | --oldinclud | --oldinclu | --oldincl | --oldinc \ + | --oldin | --oldi | --old | --ol | --o) + ac_prev=oldincludedir ;; + -oldincludedir=* | --oldincludedir=* | --oldincludedi=* | --oldincluded=* \ + | --oldinclude=* | --oldinclud=* | --oldinclu=* | --oldincl=* | --oldinc=* \ + | --oldin=* | --oldi=* | --old=* | --ol=* | --o=*) + oldincludedir=$ac_optarg ;; + + -prefix | --prefix | --prefi | --pref | --pre | --pr | --p) + ac_prev=prefix ;; + -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=* | --p=*) + prefix=$ac_optarg ;; + + -program-prefix | --program-prefix | --program-prefi | --program-pref \ + | --program-pre | --program-pr | --program-p) + ac_prev=program_prefix ;; + -program-prefix=* | --program-prefix=* | --program-prefi=* \ + | --program-pref=* | --program-pre=* | --program-pr=* | --program-p=*) + program_prefix=$ac_optarg ;; + + -program-suffix | --program-suffix | --program-suffi | --program-suff \ + | --program-suf | --program-su | --program-s) + ac_prev=program_suffix ;; + -program-suffix=* | --program-suffix=* | --program-suffi=* \ + | --program-suff=* | --program-suf=* | --program-su=* | --program-s=*) + program_suffix=$ac_optarg ;; + + -program-transform-name | --program-transform-name \ + | --program-transform-nam | --program-transform-na \ + | --program-transform-n | --program-transform- \ + | --program-transform | --program-transfor \ + | --program-transfo | --program-transf \ + | --program-trans | --program-tran \ + | --progr-tra | --program-tr | --program-t) + ac_prev=program_transform_name ;; + -program-transform-name=* | --program-transform-name=* \ + | --program-transform-nam=* | --program-transform-na=* \ + | --program-transform-n=* | --program-transform-=* \ + | --program-transform=* | --program-transfor=* \ + | --program-transfo=* | --program-transf=* \ + | --program-trans=* | --program-tran=* \ + | --progr-tra=* | --program-tr=* | --program-t=*) + program_transform_name=$ac_optarg ;; + + -pdfdir | --pdfdir | --pdfdi | --pdfd | --pdf | --pd) + ac_prev=pdfdir ;; + -pdfdir=* | --pdfdir=* | --pdfdi=* | --pdfd=* | --pdf=* | --pd=*) + pdfdir=$ac_optarg ;; + + -psdir | --psdir | --psdi | --psd | --ps) + ac_prev=psdir ;; + -psdir=* | --psdir=* | --psdi=* | --psd=* | --ps=*) + psdir=$ac_optarg ;; + + -q | -quiet | --quiet | --quie | --qui | --qu | --q \ + | -silent | --silent | --silen | --sile | --sil) + silent=yes ;; + + -runstatedir | --runstatedir | --runstatedi | --runstated \ + | --runstate | --runstat | --runsta | --runst | --runs \ + | --run | --ru | --r) + ac_prev=runstatedir ;; + -runstatedir=* | --runstatedir=* | --runstatedi=* | --runstated=* \ + | --runstate=* | --runstat=* | --runsta=* | --runst=* | --runs=* \ + | --run=* | --ru=* | --r=*) + runstatedir=$ac_optarg ;; + + -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb) + ac_prev=sbindir ;; + -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \ + | --sbi=* | --sb=*) + sbindir=$ac_optarg ;; + + -sharedstatedir | --sharedstatedir | --sharedstatedi \ + | --sharedstated | --sharedstate | --sharedstat | --sharedsta \ + | --sharedst | --shareds | --shared | --share | --shar \ + | --sha | --sh) + ac_prev=sharedstatedir ;; + -sharedstatedir=* | --sharedstatedir=* | --sharedstatedi=* \ + | --sharedstated=* | --sharedstate=* | --sharedstat=* | --sharedsta=* \ + | --sharedst=* | --shareds=* | --shared=* | --share=* | --shar=* \ + | --sha=* | --sh=*) + sharedstatedir=$ac_optarg ;; + + -site | --site | --sit) + ac_prev=site ;; + -site=* | --site=* | --sit=*) + site=$ac_optarg ;; + + -srcdir | --srcdir | --srcdi | --srcd | --src | --sr) + ac_prev=srcdir ;; + -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=*) + srcdir=$ac_optarg ;; + + -sysconfdir | --sysconfdir | --sysconfdi | --sysconfd | --sysconf \ + | --syscon | --sysco | --sysc | --sys | --sy) + ac_prev=sysconfdir ;; + -sysconfdir=* | --sysconfdir=* | --sysconfdi=* | --sysconfd=* | --sysconf=* \ + | --syscon=* | --sysco=* | --sysc=* | --sys=* | --sy=*) + sysconfdir=$ac_optarg ;; + + -target | --target | --targe | --targ | --tar | --ta | --t) + ac_prev=target_alias ;; + -target=* | --target=* | --targe=* | --targ=* | --tar=* | --ta=* | --t=*) + target_alias=$ac_optarg ;; + + -v | -verbose | --verbose | --verbos | --verbo | --verb) + verbose=yes ;; + + -version | --version | --versio | --versi | --vers | -V) + ac_init_version=: ;; + + -with-* | --with-*) + ac_useropt=`expr "x$ac_option" : 'x-*with-\([^=]*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && + as_fn_error $? "invalid package name: \`$ac_useropt'" + ac_useropt_orig=$ac_useropt + ac_useropt=`printf "%s\n" "$ac_useropt" | sed 's/[-+.]/_/g'` + case $ac_user_opts in + *" +"with_$ac_useropt" +"*) ;; + *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--with-$ac_useropt_orig" + ac_unrecognized_sep=', ';; + esac + eval with_$ac_useropt=\$ac_optarg ;; + + -without-* | --without-*) + ac_useropt=`expr "x$ac_option" : 'x-*without-\(.*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && + as_fn_error $? "invalid package name: \`$ac_useropt'" + ac_useropt_orig=$ac_useropt + ac_useropt=`printf "%s\n" "$ac_useropt" | sed 's/[-+.]/_/g'` + case $ac_user_opts in + *" +"with_$ac_useropt" +"*) ;; + *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--without-$ac_useropt_orig" + ac_unrecognized_sep=', ';; + esac + eval with_$ac_useropt=no ;; + + --x) + # Obsolete; use --with-x. + with_x=yes ;; + + -x-includes | --x-includes | --x-include | --x-includ | --x-inclu \ + | --x-incl | --x-inc | --x-in | --x-i) + ac_prev=x_includes ;; + -x-includes=* | --x-includes=* | --x-include=* | --x-includ=* | --x-inclu=* \ + | --x-incl=* | --x-inc=* | --x-in=* | --x-i=*) + x_includes=$ac_optarg ;; + + -x-libraries | --x-libraries | --x-librarie | --x-librari \ + | --x-librar | --x-libra | --x-libr | --x-lib | --x-li | --x-l) + ac_prev=x_libraries ;; + -x-libraries=* | --x-libraries=* | --x-librarie=* | --x-librari=* \ + | --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*) + x_libraries=$ac_optarg ;; + + -*) as_fn_error $? "unrecognized option: \`$ac_option' +Try \`$0 --help' for more information" + ;; + + *=*) + ac_envvar=`expr "x$ac_option" : 'x\([^=]*\)='` + # Reject names that are not valid shell variable names. + case $ac_envvar in #( + '' | [0-9]* | *[!_$as_cr_alnum]* ) + as_fn_error $? "invalid variable name: \`$ac_envvar'" ;; + esac + eval $ac_envvar=\$ac_optarg + export $ac_envvar ;; + + *) + # FIXME: should be removed in autoconf 3.0. + printf "%s\n" "$as_me: WARNING: you should use --build, --host, --target" >&2 + expr "x$ac_option" : ".*[^-._$as_cr_alnum]" >/dev/null && + printf "%s\n" "$as_me: WARNING: invalid host type: $ac_option" >&2 + : "${build_alias=$ac_option} ${host_alias=$ac_option} ${target_alias=$ac_option}" + ;; + + esac +done + +if test -n "$ac_prev"; then + ac_option=--`echo $ac_prev | sed 's/_/-/g'` + as_fn_error $? "missing argument to $ac_option" +fi + +if test -n "$ac_unrecognized_opts"; then + case $enable_option_checking in + no) ;; + fatal) as_fn_error $? "unrecognized options: $ac_unrecognized_opts" ;; + *) printf "%s\n" "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2 ;; + esac +fi + +# Check all directory arguments for consistency. +for ac_var in exec_prefix prefix bindir sbindir libexecdir datarootdir \ + datadir sysconfdir sharedstatedir localstatedir includedir \ + oldincludedir docdir infodir htmldir dvidir pdfdir psdir \ + libdir localedir mandir runstatedir +do + eval ac_val=\$$ac_var + # Remove trailing slashes. + case $ac_val in + */ ) + ac_val=`expr "X$ac_val" : 'X\(.*[^/]\)' \| "X$ac_val" : 'X\(.*\)'` + eval $ac_var=\$ac_val;; + esac + # Be sure to have absolute directory names. + case $ac_val in + [\\/$]* | ?:[\\/]* ) continue;; + NONE | '' ) case $ac_var in *prefix ) continue;; esac;; + esac + as_fn_error $? "expected an absolute directory name for --$ac_var: $ac_val" +done + +# There might be people who depend on the old broken behavior: `$host' +# used to hold the argument of --host etc. +# FIXME: To remove some day. +build=$build_alias +host=$host_alias +target=$target_alias + +# FIXME: To remove some day. +if test "x$host_alias" != x; then + if test "x$build_alias" = x; then + cross_compiling=maybe + elif test "x$build_alias" != "x$host_alias"; then + cross_compiling=yes + fi +fi + +ac_tool_prefix= +test -n "$host_alias" && ac_tool_prefix=$host_alias- + +test "$silent" = yes && exec 6>/dev/null + + +ac_pwd=`pwd` && test -n "$ac_pwd" && +ac_ls_di=`ls -di .` && +ac_pwd_ls_di=`cd "$ac_pwd" && ls -di .` || + as_fn_error $? "working directory cannot be determined" +test "X$ac_ls_di" = "X$ac_pwd_ls_di" || + as_fn_error $? "pwd does not report name of working directory" + + +# Find the source files, if location was not specified. +if test -z "$srcdir"; then + ac_srcdir_defaulted=yes + # Try the directory containing this script, then the parent directory. + ac_confdir=`$as_dirname -- "$as_myself" || +$as_expr X"$as_myself" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$as_myself" : 'X\(//\)[^/]' \| \ + X"$as_myself" : 'X\(//\)$' \| \ + X"$as_myself" : 'X\(/\)' \| . 2>/dev/null || +printf "%s\n" X"$as_myself" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + srcdir=$ac_confdir + if test ! -r "$srcdir/$ac_unique_file"; then + srcdir=.. + fi +else + ac_srcdir_defaulted=no +fi +if test ! -r "$srcdir/$ac_unique_file"; then + test "$ac_srcdir_defaulted" = yes && srcdir="$ac_confdir or .." + as_fn_error $? "cannot find sources ($ac_unique_file) in $srcdir" +fi +ac_msg="sources are in $srcdir, but \`cd $srcdir' does not work" +ac_abs_confdir=`( + cd "$srcdir" && test -r "./$ac_unique_file" || as_fn_error $? "$ac_msg" + pwd)` +# When building in place, set srcdir=. +if test "$ac_abs_confdir" = "$ac_pwd"; then + srcdir=. +fi +# Remove unnecessary trailing slashes from srcdir. +# Double slashes in file names in object file debugging info +# mess up M-x gdb in Emacs. +case $srcdir in +*/) srcdir=`expr "X$srcdir" : 'X\(.*[^/]\)' \| "X$srcdir" : 'X\(.*\)'`;; +esac +for ac_var in $ac_precious_vars; do + eval ac_env_${ac_var}_set=\${${ac_var}+set} + eval ac_env_${ac_var}_value=\$${ac_var} + eval ac_cv_env_${ac_var}_set=\${${ac_var}+set} + eval ac_cv_env_${ac_var}_value=\$${ac_var} +done + +# +# Report the --help message. +# +if test "$ac_init_help" = "long"; then + # Omit some internal or obsolete options to make the list less imposing. + # This message is too long to be a string in the A/UX 3.1 sh. + cat <<_ACEOF +\`configure' configures StarPU 1.4.10 to adapt to many kinds of systems. + +Usage: $0 [OPTION]... [VAR=VALUE]... + +To assign environment variables (e.g., CC, CFLAGS...), specify them as +VAR=VALUE. See below for descriptions of some of the useful variables. + +Defaults for the options are specified in brackets. + +Configuration: + -h, --help display this help and exit + --help=short display options specific to this package + --help=recursive display the short help of all the included packages + -V, --version display version information and exit + -q, --quiet, --silent do not print \`checking ...' messages + --cache-file=FILE cache test results in FILE [disabled] + -C, --config-cache alias for \`--cache-file=config.cache' + -n, --no-create do not create output files + --srcdir=DIR find the sources in DIR [configure dir or \`..'] + +Installation directories: + --prefix=PREFIX install architecture-independent files in PREFIX + [$ac_default_prefix] + --exec-prefix=EPREFIX install architecture-dependent files in EPREFIX + [PREFIX] + +By default, \`make install' will install all the files in +\`$ac_default_prefix/bin', \`$ac_default_prefix/lib' etc. You can specify +an installation prefix other than \`$ac_default_prefix' using \`--prefix', +for instance \`--prefix=\$HOME'. + +For better control, use the options below. + +Fine tuning of the installation directories: + --bindir=DIR user executables [EPREFIX/bin] + --sbindir=DIR system admin executables [EPREFIX/sbin] + --libexecdir=DIR program executables [EPREFIX/libexec] + --sysconfdir=DIR read-only single-machine data [PREFIX/etc] + --sharedstatedir=DIR modifiable architecture-independent data [PREFIX/com] + --localstatedir=DIR modifiable single-machine data [PREFIX/var] + --runstatedir=DIR modifiable per-process data [LOCALSTATEDIR/run] + --libdir=DIR object code libraries [EPREFIX/lib] + --includedir=DIR C header files [PREFIX/include] + --oldincludedir=DIR C header files for non-gcc [/usr/include] + --datarootdir=DIR read-only arch.-independent data root [PREFIX/share] + --datadir=DIR read-only architecture-independent data [DATAROOTDIR] + --infodir=DIR info documentation [DATAROOTDIR/info] + --localedir=DIR locale-dependent data [DATAROOTDIR/locale] + --mandir=DIR man documentation [DATAROOTDIR/man] + --docdir=DIR documentation root [DATAROOTDIR/doc/starpu] + --htmldir=DIR html documentation [DOCDIR] + --dvidir=DIR dvi documentation [DOCDIR] + --pdfdir=DIR pdf documentation [DOCDIR] + --psdir=DIR ps documentation [DOCDIR] +_ACEOF + + cat <<\_ACEOF + +Program names: + --program-prefix=PREFIX prepend PREFIX to installed program names + --program-suffix=SUFFIX append SUFFIX to installed program names + --program-transform-name=PROGRAM run sed PROGRAM on installed program names + +X features: + --x-includes=DIR X include files are in DIR + --x-libraries=DIR X library files are in DIR + +System types: + --build=BUILD configure for building on BUILD [guessed] + --host=HOST cross-compile to build programs to run on HOST [BUILD] + --target=TARGET configure for building compilers for TARGET [HOST] +_ACEOF +fi + +if test -n "$ac_init_help"; then + case $ac_init_help in + short | recursive ) echo "Configuration of StarPU 1.4.10:";; + esac + cat <<\_ACEOF + +Optional Features: + --disable-option-checking ignore unrecognized --enable/--with options + --disable-FEATURE do not include FEATURE (same as --enable-FEATURE=no) + --enable-FEATURE[=ARG] include FEATURE [ARG=yes] + --enable-silent-rules less verbose build output (undo: "make V=1") + --disable-silent-rules verbose build output (undo: "make V=0") + --enable-dependency-tracking + do not reject slow dependency extractors + --disable-dependency-tracking + speeds up one-time build + --enable-simgrid Enable simulating execution in simgrid + --enable-starpupy enable StarPU python interface + --enable-prof-tool enable profiling tool + --enable-bubble build the hierarchical dags (a.k.a bubble) support + --enable-bubble-verbose display verbose bubble messages + --enable-opencl-simulator + Enable the use of an OpenCL simulator + --enable-simgrid-mc Enable using Model Checker of simgrid + --enable-blocking-drivers + enable blocking drivers + --enable-worker-callbacks + enable worker callbacks + --enable-shared[=PKGS] build shared libraries [default=yes] + --enable-static[=PKGS] build static libraries [default=yes] + --enable-fast-install[=PKGS] + optimize for fast installation [default=yes] + --disable-libtool-lock avoid locking (might break parallel builds) + --disable-mpi Disable StarPU MPI library generation + --enable-mpi-minimal-tests + Only enable a subset of MPI tests + --enable-nmad Enable StarPU MPI library generation using the new + madeleine backend + --enable-mpi-master-slave + Enable StarPU to run with the master-slave mode + --enable-maxmpidev= + maximum number of MPI master-slave devices + --enable-tcpip-master-slave + Enable StarPU to run with the master-slave mode + --enable-maxtcpipdev= + maximum number of TCP/IP master-slave devices + --enable-mpi-pedantic-isend + Prevent StarPU MPI from reading buffers while being + sent over MPI + --enable-mpi-check Enable execution of MPI testcases + --enable-mpi-ft Enable failure tolerance mechanisms provided by + StarPU + --enable-mpi-ft-stats Enable stats for failure tolerance mechanisms + --enable-mpi-verbose display MPI verbose debug messages + (--enable-mpi-verbose=extra increase the verbosity) + --enable-maxnumanodes= + maximum number of NUMA nodes + --enable-native-winthreads + Use native windows threads instead of pthread + --disable-default-drand48 + Do not use the default version of drand48 + --enable-quick-check Lower default values for the testcases run by make + check to allow a faster execution + --enable-long-check Enable some exhaustive checks which take a really + long time + --enable-new-check Enable new and known-to-fail testcases + --disable-valgrind Do not check the availability of valgrind.h and + helgrind.h + --enable-hdf5 enable HDF5 support + --enable-max-sched-ctxs= + maximum number of sched_ctxs + --enable-sc-hypervisor enable resizing contexts (experimental) + --enable-sc-hypervisor-debug + enable debug for resizing contexts (experimental) + --enable-maxcpus= + maximum number of CPUs + --disable-cpu do not use the CPU(s) + --enable-maxcudadev= + maximum number of CUDA devices + --disable-cuda do not use CUDA device(s) + --disable-cuda-memcpy-peer + do not allow peer transfers when using CUDA 4.0 + --disable-cuda-map do not allow CUDA memory mapping when available + --enable-cuda0 Enable the minimal-support CUDA driver (only for + testing) + --enable-cuda0 Enable the small-support CUDA driver (only for + testing) + --enable-maxhipdev= + maximum number of HIP devices + --enable-hip Enable the minimal-support HIP driver (only for + testing) + --disable-hip-memcpy-peer + if you want to disable peer transfers when using hip + --enable-maxopencldev= + maximum number of OPENCL devices + --disable-opencl do not use OpenCL device(s) + --enable-maxmaxfpgadev= + maximum number of Maxeler FPGA devices + --disable-max-fpga disable support for Maxeler FPGA + --disable-asynchronous-copy + disable asynchronous copy between CPU and GPU + --disable-asynchronous-cuda-copy + disable asynchronous copy between CPU and CUDA + devices + --disable-asynchronous-opencl-copy + disable asynchronous copy between CPU and OPENCL + devices + --disable-asynchronous-mpi-master-slave-copy + disable asynchronous copy between MPI Master and MPI + Slave devices + --disable-asynchronous-tcpip-master-slave-copy + disable asynchronous copy between TCP/IP Master and + TCP/IP Slave devices + --disable-asynchronous-max-fpga-copy + disable asynchronous copy between CPU and Maxeler + FPGA devices + --disable-fortran disable build of fortran examples + --enable-debug enable debug mode + --enable-spinlock-check enable spinlock check + --disable-fstack-protector-all + disable GCC option -fstack-protector-all + --disable-gdb disable gdb information + --disable-full-gdb-information + disable full gdb information + --enable-fast do not enforce assertions + --enable-verbose display verbose debug messages + (--enable-verbose=extra increase the verbosity) + --enable-coverage enable coverage checking + --enable-coverity enable coverity mode + --disable-fxt disable FxT trace mechanisms + --enable-poti Enable the use of the POTI library to generate Paje + traces + --enable-fxt-lock enable additional locking systems FxT traces + --disable-papi disable using papi + --enable-perf-debug enable performance debugging through gprof + --enable-model-debug enable performance model debugging + --enable-memory-stats enable memory stats + --disable-glpk disable using glpk for bound computation + --disable-ayudame1 Do not use Ayudame lib version 1 + --disable-ayudame2 Do not use Ayudame lib version 2 + --enable-data-locality-enforce + disable data locality enforcement + --enable-maxbuffers= + maximum number of buffers per task + --enable-fxt-max-files= + maximum number of mpi nodes for traces + --enable-maxnodes= + maximum number of memory nodes per MPI rank + --disable-allocation-cache + disable data allocation cache + --enable-maximplementations= + maximum number of implementations + --enable-leveldb Enable linking with LevelDB if available + --enable-calibration-heuristic= + Define the maximum authorized deviation of StarPU + history-based calibrator. + --disable-export-dynamic + Prevent the linker from adding all symbols to the + dynamic symbol table + --enable-parallel-worker + build the parallel worker support + --disable-openmp do not use OpenMP + --enable-openmp-llvm build the OpenMP LLVM runtime support + --enable-openmp build the OpenMP runtime support + --enable-socl build the OpenCL interface (experimental) + --disable-build-tests disable building of tests + --disable-build-examples + disable building of examples + --enable-opengl-render enable OpenGL rendering of some examples + --enable-blas-lib=blaslibname: + none default: no BLAS lib is used + atlas: use ATLAS library + goto: use GotoBLAS library + mkl: use MKL library (you may need to set specific CFLAGS and LDFLAGS with --with-mkl-cflags and --with-mkl-ldflags) + --enable-mlr Enable multiple linear regression models + --enable-mlr-system-blas + Make the multiple linear regression models use the + system BLAS instead of min-dgels + --disable-starpufft Disable build of StarPU-FFT + --enable-starpufft-examples + enable build of StarPU FFT examples + --enable-icc Enable the compilation of specific ICC examples + --enable-starpurm enable resource management support + --enable-starpurm-verbose + display resource management verbose debug messages + --enable-dlb enable DLB support + --enable-starpurm-dlb-verbose + display resource management verbose debug messages + --enable-starpurm-examples + enable build of StarPU Resource Manager examples + --disable-build-doc disable building of documentation + --enable-build-doc-pdf enable building of PDF documentation + --enable-julia enable the Julia extension + --enable-eclipse-plugin Build the Eclipse plugin + +Optional Packages: + --with-PACKAGE[=ARG] use PACKAGE [ARG=yes] + --without-PACKAGE do not use PACKAGE (same as --with-PACKAGE=no) + --with-simgrid-dir= + specify SimGrid installation directory + --with-simgrid-include-dir= + specify where SimGrid headers are installed + --with-simgrid-lib-dir= + specify where SimGrid libraries are installed + --with-pic[=PKGS] try to use only PIC/non-PIC objects [default=use + both] + --with-aix-soname=aix|svr4|both + shared library versioning (aka "SONAME") variant to + provide on AIX, [default=aix]. + --with-gnu-ld assume the C compiler uses GNU ld [default=no] + --with-sysroot[=DIR] Search for dependent libraries within DIR (or the + compiler's sysroot if not specified). + --with-mpicc= + Name or path of the mpicc compiler + --with-mpicxx= + Name or path of the mpicxx/mpic++ compiler + --with-smpirun= + Name or path of the smpirun helper + --with-mpiexec= + Name or path of mpiexec + --with-mpiexec-args= + Arguments for mpiexec + --with-hdf5-include-dir= + specify where HDF5 headers are installed + --with-hdf5-lib-dir= + specify where HDF5 libraries are installed + --with-cuda-dir= specify CUDA installation directory + --with-cuda-include-dir= + specify where CUDA headers are installed + --with-cuda-lib-dir= + specify where CUDA libraries are installed + --with-hipblas= specify where hipblas is installed + --with-opencl-dir= + specify OpenCL installation directory + --with-opencl-include-dir= + specify where OpenCL headers are installed + --with-opencl-lib-dir= + specify where OpenCL libraries are installed + --with-mpifort= + Name or path of the mpifort compiler + --with-fxt= specify FxT installation directory + --with-ayudame1-include-dir= + specify where Ayudame version 1 headers are + installed + --with-ayudame2-include-dir= + specify where Ayudame version 2 headers are + installed + --with-perf-model-dir= + specify where performance models should be stored + --with-check-flags Specify flags for C and Fortran compilers + --with-x use the X Window System + --with-goto-dir= specify GotoBLAS lib location + --with-atlas-dir= specify ATLAS lib location + --with-mkl-cflags specify MKL compilation flags + --with-mkl-ldflags specify MKL linking flags + --with-armpl-cflags specify ARMPL compilation flags + --with-armpl-ldflags specify ARMPL linking flags + --without-hwloc Disable hwloc (enabled by default) + --with-icc= + Name or path of the icc compiler + --with-icc-args= + Arguments for icc + --with-dlb-include-dir= + specify where DLB headers are installed + --with-dlb-lib-dir= + specify where DLB libraries are installed + +Some influential environment variables: + CC C compiler command + CFLAGS C compiler flags + LDFLAGS linker flags, e.g. -L if you have libraries in a + nonstandard directory + LIBS libraries to pass to the linker, e.g. -l + CPPFLAGS (Objective) C/C++ preprocessor flags, e.g. -I if + you have headers in a nonstandard directory + CXX C++ compiler command + CXXFLAGS C++ compiler flags + CPP C preprocessor + F77 Fortran 77 compiler command + FFLAGS Fortran 77 compiler flags + FC Fortran compiler command + FCFLAGS Fortran compiler flags + PKG_CONFIG path to pkg-config utility + SIMGRID_CFLAGS + C compiler flags for SIMGRID, overriding pkg-config + SIMGRID_LIBS + linker flags for SIMGRID, overriding pkg-config + LT_SYS_LIBRARY_PATH + User-defined run-time library search path. + CXXCPP C++ preprocessor + NMAD_CFLAGS C compiler flags for NMAD, overriding pkg-config + NMAD_LIBS linker flags for NMAD, overriding pkg-config + MPI_SYNC_CLOCKS_CFLAGS + C compiler flags for MPI_SYNC_CLOCKS, overriding pkg-config + MPI_SYNC_CLOCKS_LIBS + linker flags for MPI_SYNC_CLOCKS, overriding pkg-config + STARPU_MS_LIB + Path to Microsoft's Visual Studio `lib' tool + MAGMA_CFLAGS + C compiler flags for MAGMA, overriding pkg-config + MAGMA_LIBS linker flags for MAGMA, overriding pkg-config + NVCC CUDA compiler + NVCC_CC C compiler for CUDA compiler + NVCCFLAGS CUDA compiler flags + HIPCCFLAGS HIP compiler flags + FXT_CFLAGS C compiler flags for FXT, overriding pkg-config + FXT_LIBS linker flags for FXT, overriding pkg-config + FXT_LDFLAGS + POTI_CFLAGS C compiler flags for POTI, overriding pkg-config + POTI_LIBS linker flags for POTI, overriding pkg-config + PAPI_CFLAGS C compiler flags for PAPI, overriding pkg-config + PAPI_LIBS linker flags for PAPI, overriding pkg-config + XMKMF Path to xmkmf, Makefile generator for X Window System + OPENBLAS_CFLAGS + C compiler flags for OPENBLAS, overriding pkg-config + OPENBLAS_LIBS + linker flags for OPENBLAS, overriding pkg-config + BLAS_OPENBLAS_CFLAGS + C compiler flags for BLAS_OPENBLAS, overriding pkg-config + BLAS_OPENBLAS_LIBS + linker flags for BLAS_OPENBLAS, overriding pkg-config + BLAS_LIBS linker flags for blas + DGELS_LIBS linker flags for lapack dgels + FFTW_CFLAGS C compiler flags for FFTW, overriding pkg-config + FFTW_LIBS linker flags for FFTW, overriding pkg-config + FFTWF_CFLAGS + C compiler flags for FFTWF, overriding pkg-config + FFTWF_LIBS linker flags for FFTWF, overriding pkg-config + FFTWL_CFLAGS + C compiler flags for FFTWL, overriding pkg-config + FFTWL_LIBS linker flags for FFTWL, overriding pkg-config + HWLOC_CFLAGS + C compiler flags for HWLOC, overriding pkg-config + HWLOC_LIBS linker flags for HWLOC, overriding pkg-config + PYTHON Python3 interpreter + +Use these variables to override the choices made by `configure' or to help +it to find libraries and programs with nonstandard names/locations. + +Report bugs to . +StarPU home page: . +_ACEOF +ac_status=$? +fi + +if test "$ac_init_help" = "recursive"; then + # If there are subdirs, report their specific --help. + for ac_dir in : $ac_subdirs_all; do test "x$ac_dir" = x: && continue + test -d "$ac_dir" || + { cd "$srcdir" && ac_pwd=`pwd` && srcdir=. && test -d "$ac_dir"; } || + continue + ac_builddir=. + +case "$ac_dir" in +.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;; +*) + ac_dir_suffix=/`printf "%s\n" "$ac_dir" | sed 's|^\.[\\/]||'` + # A ".." for each directory in $ac_dir_suffix. + ac_top_builddir_sub=`printf "%s\n" "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'` + case $ac_top_builddir_sub in + "") ac_top_builddir_sub=. ac_top_build_prefix= ;; + *) ac_top_build_prefix=$ac_top_builddir_sub/ ;; + esac ;; +esac +ac_abs_top_builddir=$ac_pwd +ac_abs_builddir=$ac_pwd$ac_dir_suffix +# for backward compatibility: +ac_top_builddir=$ac_top_build_prefix + +case $srcdir in + .) # We are building in place. + ac_srcdir=. + ac_top_srcdir=$ac_top_builddir_sub + ac_abs_top_srcdir=$ac_pwd ;; + [\\/]* | ?:[\\/]* ) # Absolute name. + ac_srcdir=$srcdir$ac_dir_suffix; + ac_top_srcdir=$srcdir + ac_abs_top_srcdir=$srcdir ;; + *) # Relative name. + ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix + ac_top_srcdir=$ac_top_build_prefix$srcdir + ac_abs_top_srcdir=$ac_pwd/$srcdir ;; +esac +ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix + + cd "$ac_dir" || { ac_status=$?; continue; } + # Check for configure.gnu first; this name is used for a wrapper for + # Metaconfig's "Configure" on case-insensitive file systems. + if test -f "$ac_srcdir/configure.gnu"; then + echo && + $SHELL "$ac_srcdir/configure.gnu" --help=recursive + elif test -f "$ac_srcdir/configure"; then + echo && + $SHELL "$ac_srcdir/configure" --help=recursive + else + printf "%s\n" "$as_me: WARNING: no configuration information is in $ac_dir" >&2 + fi || ac_status=$? + cd "$ac_pwd" || { ac_status=$?; break; } + done +fi + +test -n "$ac_init_help" && exit $ac_status +if $ac_init_version; then + cat <<\_ACEOF +StarPU configure 1.4.10 +generated by GNU Autoconf 2.71 + +Copyright (C) 2021 Free Software Foundation, Inc. +This configure script is free software; the Free Software Foundation +gives unlimited permission to copy, distribute and modify it. +_ACEOF + exit +fi + +## ------------------------ ## +## Autoconf initialization. ## +## ------------------------ ## + +# ac_fn_c_try_compile LINENO +# -------------------------- +# Try to compile conftest.$ac_ext, and return whether this succeeded. +ac_fn_c_try_compile () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + rm -f conftest.$ac_objext conftest.beam + if { { ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +printf "%s\n" "$ac_try_echo"; } >&5 + (eval "$ac_compile") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + grep -v '^ *+' conftest.err >conftest.er1 + cat conftest.er1 >&5 + mv -f conftest.er1 conftest.err + fi + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext +then : + ac_retval=0 +else $as_nop + printf "%s\n" "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_retval=1 +fi + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + as_fn_set_status $ac_retval + +} # ac_fn_c_try_compile + +# ac_fn_cxx_try_compile LINENO +# ---------------------------- +# Try to compile conftest.$ac_ext, and return whether this succeeded. +ac_fn_cxx_try_compile () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + rm -f conftest.$ac_objext conftest.beam + if { { ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +printf "%s\n" "$ac_try_echo"; } >&5 + (eval "$ac_compile") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + grep -v '^ *+' conftest.err >conftest.er1 + cat conftest.er1 >&5 + mv -f conftest.er1 conftest.err + fi + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && { + test -z "$ac_cxx_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext +then : + ac_retval=0 +else $as_nop + printf "%s\n" "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_retval=1 +fi + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + as_fn_set_status $ac_retval + +} # ac_fn_cxx_try_compile + +# ac_fn_c_try_cpp LINENO +# ---------------------- +# Try to preprocess conftest.$ac_ext, and return whether this succeeded. +ac_fn_c_try_cpp () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + if { { ac_try="$ac_cpp conftest.$ac_ext" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +printf "%s\n" "$ac_try_echo"; } >&5 + (eval "$ac_cpp conftest.$ac_ext") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + grep -v '^ *+' conftest.err >conftest.er1 + cat conftest.er1 >&5 + mv -f conftest.er1 conftest.err + fi + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } > conftest.i && { + test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" || + test ! -s conftest.err + } +then : + ac_retval=0 +else $as_nop + printf "%s\n" "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_retval=1 +fi + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + as_fn_set_status $ac_retval + +} # ac_fn_c_try_cpp + +# ac_fn_f77_try_compile LINENO +# ---------------------------- +# Try to compile conftest.$ac_ext, and return whether this succeeded. +ac_fn_f77_try_compile () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + rm -f conftest.$ac_objext conftest.beam + if { { ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +printf "%s\n" "$ac_try_echo"; } >&5 + (eval "$ac_compile") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + grep -v '^ *+' conftest.err >conftest.er1 + cat conftest.er1 >&5 + mv -f conftest.er1 conftest.err + fi + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && { + test -z "$ac_f77_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext +then : + ac_retval=0 +else $as_nop + printf "%s\n" "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_retval=1 +fi + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + as_fn_set_status $ac_retval + +} # ac_fn_f77_try_compile + +# ac_fn_fc_try_compile LINENO +# --------------------------- +# Try to compile conftest.$ac_ext, and return whether this succeeded. +ac_fn_fc_try_compile () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + rm -f conftest.$ac_objext conftest.beam + if { { ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +printf "%s\n" "$ac_try_echo"; } >&5 + (eval "$ac_compile") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + grep -v '^ *+' conftest.err >conftest.er1 + cat conftest.er1 >&5 + mv -f conftest.er1 conftest.err + fi + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && { + test -z "$ac_fc_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext +then : + ac_retval=0 +else $as_nop + printf "%s\n" "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_retval=1 +fi + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + as_fn_set_status $ac_retval + +} # ac_fn_fc_try_compile + +# ac_fn_c_try_link LINENO +# ----------------------- +# Try to link conftest.$ac_ext, and return whether this succeeded. +ac_fn_c_try_link () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + rm -f conftest.$ac_objext conftest.beam conftest$ac_exeext + if { { ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +printf "%s\n" "$ac_try_echo"; } >&5 + (eval "$ac_link") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + grep -v '^ *+' conftest.err >conftest.er1 + cat conftest.er1 >&5 + mv -f conftest.er1 conftest.err + fi + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest$ac_exeext && { + test "$cross_compiling" = yes || + test -x conftest$ac_exeext + } +then : + ac_retval=0 +else $as_nop + printf "%s\n" "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_retval=1 +fi + # Delete the IPA/IPO (Inter Procedural Analysis/Optimization) information + # created by the PGI compiler (conftest_ipa8_conftest.oo), as it would + # interfere with the next link command; also delete a directory that is + # left behind by Apple's compiler. We do this before executing the actions. + rm -rf conftest.dSYM conftest_ipa8_conftest.oo + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + as_fn_set_status $ac_retval + +} # ac_fn_c_try_link + +# ac_fn_cxx_try_link LINENO +# ------------------------- +# Try to link conftest.$ac_ext, and return whether this succeeded. +ac_fn_cxx_try_link () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + rm -f conftest.$ac_objext conftest.beam conftest$ac_exeext + if { { ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +printf "%s\n" "$ac_try_echo"; } >&5 + (eval "$ac_link") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + grep -v '^ *+' conftest.err >conftest.er1 + cat conftest.er1 >&5 + mv -f conftest.er1 conftest.err + fi + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && { + test -z "$ac_cxx_werror_flag" || + test ! -s conftest.err + } && test -s conftest$ac_exeext && { + test "$cross_compiling" = yes || + test -x conftest$ac_exeext + } +then : + ac_retval=0 +else $as_nop + printf "%s\n" "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_retval=1 +fi + # Delete the IPA/IPO (Inter Procedural Analysis/Optimization) information + # created by the PGI compiler (conftest_ipa8_conftest.oo), as it would + # interfere with the next link command; also delete a directory that is + # left behind by Apple's compiler. We do this before executing the actions. + rm -rf conftest.dSYM conftest_ipa8_conftest.oo + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + as_fn_set_status $ac_retval + +} # ac_fn_cxx_try_link + +# ac_fn_f77_try_link LINENO +# ------------------------- +# Try to link conftest.$ac_ext, and return whether this succeeded. +ac_fn_f77_try_link () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + rm -f conftest.$ac_objext conftest.beam conftest$ac_exeext + if { { ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +printf "%s\n" "$ac_try_echo"; } >&5 + (eval "$ac_link") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + grep -v '^ *+' conftest.err >conftest.er1 + cat conftest.er1 >&5 + mv -f conftest.er1 conftest.err + fi + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && { + test -z "$ac_f77_werror_flag" || + test ! -s conftest.err + } && test -s conftest$ac_exeext && { + test "$cross_compiling" = yes || + test -x conftest$ac_exeext + } +then : + ac_retval=0 +else $as_nop + printf "%s\n" "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_retval=1 +fi + # Delete the IPA/IPO (Inter Procedural Analysis/Optimization) information + # created by the PGI compiler (conftest_ipa8_conftest.oo), as it would + # interfere with the next link command; also delete a directory that is + # left behind by Apple's compiler. We do this before executing the actions. + rm -rf conftest.dSYM conftest_ipa8_conftest.oo + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + as_fn_set_status $ac_retval + +} # ac_fn_f77_try_link + +# ac_fn_fc_try_link LINENO +# ------------------------ +# Try to link conftest.$ac_ext, and return whether this succeeded. +ac_fn_fc_try_link () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + rm -f conftest.$ac_objext conftest.beam conftest$ac_exeext + if { { ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +printf "%s\n" "$ac_try_echo"; } >&5 + (eval "$ac_link") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + grep -v '^ *+' conftest.err >conftest.er1 + cat conftest.er1 >&5 + mv -f conftest.er1 conftest.err + fi + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && { + test -z "$ac_fc_werror_flag" || + test ! -s conftest.err + } && test -s conftest$ac_exeext && { + test "$cross_compiling" = yes || + test -x conftest$ac_exeext + } +then : + ac_retval=0 +else $as_nop + printf "%s\n" "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_retval=1 +fi + # Delete the IPA/IPO (Inter Procedural Analysis/Optimization) information + # created by the PGI compiler (conftest_ipa8_conftest.oo), as it would + # interfere with the next link command; also delete a directory that is + # left behind by Apple's compiler. We do this before executing the actions. + rm -rf conftest.dSYM conftest_ipa8_conftest.oo + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + as_fn_set_status $ac_retval + +} # ac_fn_fc_try_link + +# ac_fn_c_check_header_compile LINENO HEADER VAR INCLUDES +# ------------------------------------------------------- +# Tests whether HEADER exists and can be compiled using the include files in +# INCLUDES, setting the cache variable VAR accordingly. +ac_fn_c_check_header_compile () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 +printf %s "checking for $2... " >&6; } +if eval test \${$3+y} +then : + printf %s "(cached) " >&6 +else $as_nop + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +#include <$2> +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + eval "$3=yes" +else $as_nop + eval "$3=no" +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext +fi +eval ac_res=\$$3 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +printf "%s\n" "$ac_res" >&6; } + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + +} # ac_fn_c_check_header_compile + +# ac_fn_c_check_func LINENO FUNC VAR +# ---------------------------------- +# Tests whether FUNC exists, setting the cache variable VAR accordingly +ac_fn_c_check_func () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 +printf %s "checking for $2... " >&6; } +if eval test \${$3+y} +then : + printf %s "(cached) " >&6 +else $as_nop + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +/* Define $2 to an innocuous variant, in case declares $2. + For example, HP-UX 11i declares gettimeofday. */ +#define $2 innocuous_$2 + +/* System header to define __stub macros and hopefully few prototypes, + which can conflict with char $2 (); below. */ + +#include +#undef $2 + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char $2 (); +/* The GNU C library defines this for functions which it implements + to always fail with ENOSYS. Some functions are actually named + something starting with __ and the normal name is an alias. */ +#if defined __stub_$2 || defined __stub___$2 +choke me +#endif + +int +main (void) +{ +return $2 (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + eval "$3=yes" +else $as_nop + eval "$3=no" +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +fi +eval ac_res=\$$3 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +printf "%s\n" "$ac_res" >&6; } + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + +} # ac_fn_c_check_func + +# ac_fn_c_try_run LINENO +# ---------------------- +# Try to run conftest.$ac_ext, and return whether this succeeded. Assumes that +# executables *can* be run. +ac_fn_c_try_run () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + if { { ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +printf "%s\n" "$ac_try_echo"; } >&5 + (eval "$ac_link") 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && { ac_try='./conftest$ac_exeext' + { { case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +printf "%s\n" "$ac_try_echo"; } >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; } +then : + ac_retval=0 +else $as_nop + printf "%s\n" "$as_me: program exited with status $ac_status" >&5 + printf "%s\n" "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_retval=$ac_status +fi + rm -rf conftest.dSYM conftest_ipa8_conftest.oo + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + as_fn_set_status $ac_retval + +} # ac_fn_c_try_run + +# ac_fn_c_check_type LINENO TYPE VAR INCLUDES +# ------------------------------------------- +# Tests whether TYPE exists after having included INCLUDES, setting cache +# variable VAR accordingly. +ac_fn_c_check_type () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 +printf %s "checking for $2... " >&6; } +if eval test \${$3+y} +then : + printf %s "(cached) " >&6 +else $as_nop + eval "$3=no" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +int +main (void) +{ +if (sizeof ($2)) + return 0; + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +int +main (void) +{ +if (sizeof (($2))) + return 0; + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + +else $as_nop + eval "$3=yes" +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext +fi +eval ac_res=\$$3 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +printf "%s\n" "$ac_res" >&6; } + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + +} # ac_fn_c_check_type + +# ac_fn_check_decl LINENO SYMBOL VAR INCLUDES EXTRA-OPTIONS FLAG-VAR +# ------------------------------------------------------------------ +# Tests whether SYMBOL is declared in INCLUDES, setting cache variable VAR +# accordingly. Pass EXTRA-OPTIONS to the compiler, using FLAG-VAR. +ac_fn_check_decl () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + as_decl_name=`echo $2|sed 's/ *(.*//'` + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether $as_decl_name is declared" >&5 +printf %s "checking whether $as_decl_name is declared... " >&6; } +if eval test \${$3+y} +then : + printf %s "(cached) " >&6 +else $as_nop + as_decl_use=`echo $2|sed -e 's/(/((/' -e 's/)/) 0&/' -e 's/,/) 0& (/g'` + eval ac_save_FLAGS=\$$6 + as_fn_append $6 " $5" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +int +main (void) +{ +#ifndef $as_decl_name +#ifdef __cplusplus + (void) $as_decl_use; +#else + (void) $as_decl_name; +#endif +#endif + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + eval "$3=yes" +else $as_nop + eval "$3=no" +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + eval $6=\$ac_save_FLAGS + +fi +eval ac_res=\$$3 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +printf "%s\n" "$ac_res" >&6; } + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + +} # ac_fn_check_decl + +# ac_fn_cxx_try_cpp LINENO +# ------------------------ +# Try to preprocess conftest.$ac_ext, and return whether this succeeded. +ac_fn_cxx_try_cpp () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + if { { ac_try="$ac_cpp conftest.$ac_ext" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +printf "%s\n" "$ac_try_echo"; } >&5 + (eval "$ac_cpp conftest.$ac_ext") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + grep -v '^ *+' conftest.err >conftest.er1 + cat conftest.er1 >&5 + mv -f conftest.er1 conftest.err + fi + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } > conftest.i && { + test -z "$ac_cxx_preproc_warn_flag$ac_cxx_werror_flag" || + test ! -s conftest.err + } +then : + ac_retval=0 +else $as_nop + printf "%s\n" "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_retval=1 +fi + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + as_fn_set_status $ac_retval + +} # ac_fn_cxx_try_cpp + +# ac_fn_c_compute_int LINENO EXPR VAR INCLUDES +# -------------------------------------------- +# Tries to find the compile-time value of EXPR in a program that includes +# INCLUDES, setting VAR accordingly. Returns whether the value could be +# computed +ac_fn_c_compute_int () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + if test "$cross_compiling" = yes; then + # Depending upon the size, compute the lo and hi bounds. +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +int +main (void) +{ +static int test_array [1 - 2 * !(($2) >= 0)]; +test_array [0] = 0; +return test_array [0]; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ac_lo=0 ac_mid=0 + while :; do + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +int +main (void) +{ +static int test_array [1 - 2 * !(($2) <= $ac_mid)]; +test_array [0] = 0; +return test_array [0]; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ac_hi=$ac_mid; break +else $as_nop + as_fn_arith $ac_mid + 1 && ac_lo=$as_val + if test $ac_lo -le $ac_mid; then + ac_lo= ac_hi= + break + fi + as_fn_arith 2 '*' $ac_mid + 1 && ac_mid=$as_val +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + done +else $as_nop + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +int +main (void) +{ +static int test_array [1 - 2 * !(($2) < 0)]; +test_array [0] = 0; +return test_array [0]; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ac_hi=-1 ac_mid=-1 + while :; do + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +int +main (void) +{ +static int test_array [1 - 2 * !(($2) >= $ac_mid)]; +test_array [0] = 0; +return test_array [0]; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ac_lo=$ac_mid; break +else $as_nop + as_fn_arith '(' $ac_mid ')' - 1 && ac_hi=$as_val + if test $ac_mid -le $ac_hi; then + ac_lo= ac_hi= + break + fi + as_fn_arith 2 '*' $ac_mid && ac_mid=$as_val +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + done +else $as_nop + ac_lo= ac_hi= +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext +# Binary search between lo and hi bounds. +while test "x$ac_lo" != "x$ac_hi"; do + as_fn_arith '(' $ac_hi - $ac_lo ')' / 2 + $ac_lo && ac_mid=$as_val + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +int +main (void) +{ +static int test_array [1 - 2 * !(($2) <= $ac_mid)]; +test_array [0] = 0; +return test_array [0]; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ac_hi=$ac_mid +else $as_nop + as_fn_arith '(' $ac_mid ')' + 1 && ac_lo=$as_val +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext +done +case $ac_lo in #(( +?*) eval "$3=\$ac_lo"; ac_retval=0 ;; +'') ac_retval=1 ;; +esac + else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +static long int longval (void) { return $2; } +static unsigned long int ulongval (void) { return $2; } +#include +#include +int +main (void) +{ + + FILE *f = fopen ("conftest.val", "w"); + if (! f) + return 1; + if (($2) < 0) + { + long int i = longval (); + if (i != ($2)) + return 1; + fprintf (f, "%ld", i); + } + else + { + unsigned long int i = ulongval (); + if (i != ($2)) + return 1; + fprintf (f, "%lu", i); + } + /* Do not output a trailing newline, as this causes \r\n confusion + on some platforms. */ + return ferror (f) || fclose (f) != 0; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_run "$LINENO" +then : + echo >>conftest.val; read $3 &5 +printf %s "checking for $2.$3... " >&6; } +if eval test \${$4+y} +then : + printf %s "(cached) " >&6 +else $as_nop + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$5 +int +main (void) +{ +static $2 ac_aggr; +if (ac_aggr.$3) +return 0; + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + eval "$4=yes" +else $as_nop + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$5 +int +main (void) +{ +static $2 ac_aggr; +if (sizeof ac_aggr.$3) +return 0; + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + eval "$4=yes" +else $as_nop + eval "$4=no" +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext +fi +eval ac_res=\$$4 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +printf "%s\n" "$ac_res" >&6; } + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + +} # ac_fn_c_check_member + +# ac_fn_cxx_check_header_compile LINENO HEADER VAR INCLUDES +# --------------------------------------------------------- +# Tests whether HEADER exists and can be compiled using the include files in +# INCLUDES, setting the cache variable VAR accordingly. +ac_fn_cxx_check_header_compile () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 +printf %s "checking for $2... " >&6; } +if eval test \${$3+y} +then : + printf %s "(cached) " >&6 +else $as_nop + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +#include <$2> +_ACEOF +if ac_fn_cxx_try_compile "$LINENO" +then : + eval "$3=yes" +else $as_nop + eval "$3=no" +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext +fi +eval ac_res=\$$3 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +printf "%s\n" "$ac_res" >&6; } + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + +} # ac_fn_cxx_check_header_compile +ac_configure_args_raw= +for ac_arg +do + case $ac_arg in + *\'*) + ac_arg=`printf "%s\n" "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;; + esac + as_fn_append ac_configure_args_raw " '$ac_arg'" +done + +case $ac_configure_args_raw in + *$as_nl*) + ac_safe_unquote= ;; + *) + ac_unsafe_z='|&;<>()$`\\"*?[ '' ' # This string ends in space, tab. + ac_unsafe_a="$ac_unsafe_z#~" + ac_safe_unquote="s/ '\\([^$ac_unsafe_a][^$ac_unsafe_z]*\\)'/ \\1/g" + ac_configure_args_raw=` printf "%s\n" "$ac_configure_args_raw" | sed "$ac_safe_unquote"`;; +esac + +cat >config.log <<_ACEOF +This file contains any messages produced by compilers while +running configure, to aid debugging if configure makes a mistake. + +It was created by StarPU $as_me 1.4.10, which was +generated by GNU Autoconf 2.71. Invocation command line was + + $ $0$ac_configure_args_raw + +_ACEOF +exec 5>>config.log +{ +cat <<_ASUNAME +## --------- ## +## Platform. ## +## --------- ## + +hostname = `(hostname || uname -n) 2>/dev/null | sed 1q` +uname -m = `(uname -m) 2>/dev/null || echo unknown` +uname -r = `(uname -r) 2>/dev/null || echo unknown` +uname -s = `(uname -s) 2>/dev/null || echo unknown` +uname -v = `(uname -v) 2>/dev/null || echo unknown` + +/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null || echo unknown` +/bin/uname -X = `(/bin/uname -X) 2>/dev/null || echo unknown` + +/bin/arch = `(/bin/arch) 2>/dev/null || echo unknown` +/usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null || echo unknown` +/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null || echo unknown` +/usr/bin/hostinfo = `(/usr/bin/hostinfo) 2>/dev/null || echo unknown` +/bin/machine = `(/bin/machine) 2>/dev/null || echo unknown` +/usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null || echo unknown` +/bin/universe = `(/bin/universe) 2>/dev/null || echo unknown` + +_ASUNAME + +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + printf "%s\n" "PATH: $as_dir" + done +IFS=$as_save_IFS + +} >&5 + +cat >&5 <<_ACEOF + + +## ----------- ## +## Core tests. ## +## ----------- ## + +_ACEOF + + +# Keep a trace of the command line. +# Strip out --no-create and --no-recursion so they do not pile up. +# Strip out --silent because we don't want to record it for future runs. +# Also quote any args containing shell meta-characters. +# Make two passes to allow for proper duplicate-argument suppression. +ac_configure_args= +ac_configure_args0= +ac_configure_args1= +ac_must_keep_next=false +for ac_pass in 1 2 +do + for ac_arg + do + case $ac_arg in + -no-create | --no-c* | -n | -no-recursion | --no-r*) continue ;; + -q | -quiet | --quiet | --quie | --qui | --qu | --q \ + | -silent | --silent | --silen | --sile | --sil) + continue ;; + *\'*) + ac_arg=`printf "%s\n" "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;; + esac + case $ac_pass in + 1) as_fn_append ac_configure_args0 " '$ac_arg'" ;; + 2) + as_fn_append ac_configure_args1 " '$ac_arg'" + if test $ac_must_keep_next = true; then + ac_must_keep_next=false # Got value, back to normal. + else + case $ac_arg in + *=* | --config-cache | -C | -disable-* | --disable-* \ + | -enable-* | --enable-* | -gas | --g* | -nfp | --nf* \ + | -q | -quiet | --q* | -silent | --sil* | -v | -verb* \ + | -with-* | --with-* | -without-* | --without-* | --x) + case "$ac_configure_args0 " in + "$ac_configure_args1"*" '$ac_arg' "* ) continue ;; + esac + ;; + -* ) ac_must_keep_next=true ;; + esac + fi + as_fn_append ac_configure_args " '$ac_arg'" + ;; + esac + done +done +{ ac_configure_args0=; unset ac_configure_args0;} +{ ac_configure_args1=; unset ac_configure_args1;} + +# When interrupted or exit'd, cleanup temporary files, and complete +# config.log. We remove comments because anyway the quotes in there +# would cause problems or look ugly. +# WARNING: Use '\'' to represent an apostrophe within the trap. +# WARNING: Do not start the trap code with a newline, due to a FreeBSD 4.0 bug. +trap 'exit_status=$? + # Sanitize IFS. + IFS=" "" $as_nl" + # Save into config.log some information that might help in debugging. + { + echo + + printf "%s\n" "## ---------------- ## +## Cache variables. ## +## ---------------- ##" + echo + # The following way of writing the cache mishandles newlines in values, +( + for ac_var in `(set) 2>&1 | sed -n '\''s/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'\''`; do + eval ac_val=\$$ac_var + case $ac_val in #( + *${as_nl}*) + case $ac_var in #( + *_cv_*) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5 +printf "%s\n" "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; + esac + case $ac_var in #( + _ | IFS | as_nl) ;; #( + BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #( + *) { eval $ac_var=; unset $ac_var;} ;; + esac ;; + esac + done + (set) 2>&1 | + case $as_nl`(ac_space='\'' '\''; set) 2>&1` in #( + *${as_nl}ac_space=\ *) + sed -n \ + "s/'\''/'\''\\\\'\'''\''/g; + s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\''\\2'\''/p" + ;; #( + *) + sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p" + ;; + esac | + sort +) + echo + + printf "%s\n" "## ----------------- ## +## Output variables. ## +## ----------------- ##" + echo + for ac_var in $ac_subst_vars + do + eval ac_val=\$$ac_var + case $ac_val in + *\'\''*) ac_val=`printf "%s\n" "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;; + esac + printf "%s\n" "$ac_var='\''$ac_val'\''" + done | sort + echo + + if test -n "$ac_subst_files"; then + printf "%s\n" "## ------------------- ## +## File substitutions. ## +## ------------------- ##" + echo + for ac_var in $ac_subst_files + do + eval ac_val=\$$ac_var + case $ac_val in + *\'\''*) ac_val=`printf "%s\n" "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;; + esac + printf "%s\n" "$ac_var='\''$ac_val'\''" + done | sort + echo + fi + + if test -s confdefs.h; then + printf "%s\n" "## ----------- ## +## confdefs.h. ## +## ----------- ##" + echo + cat confdefs.h + echo + fi + test "$ac_signal" != 0 && + printf "%s\n" "$as_me: caught signal $ac_signal" + printf "%s\n" "$as_me: exit $exit_status" + } >&5 + rm -f core *.core core.conftest.* && + rm -f -r conftest* confdefs* conf$$* $ac_clean_files && + exit $exit_status +' 0 +for ac_signal in 1 2 13 15; do + trap 'ac_signal='$ac_signal'; as_fn_exit 1' $ac_signal +done +ac_signal=0 + +# confdefs.h avoids OS command line length limits that DEFS can exceed. +rm -f -r conftest* confdefs.h + +printf "%s\n" "/* confdefs.h */" > confdefs.h + +# Predefined preprocessor variables. + +printf "%s\n" "#define PACKAGE_NAME \"$PACKAGE_NAME\"" >>confdefs.h + +printf "%s\n" "#define PACKAGE_TARNAME \"$PACKAGE_TARNAME\"" >>confdefs.h + +printf "%s\n" "#define PACKAGE_VERSION \"$PACKAGE_VERSION\"" >>confdefs.h + +printf "%s\n" "#define PACKAGE_STRING \"$PACKAGE_STRING\"" >>confdefs.h + +printf "%s\n" "#define PACKAGE_BUGREPORT \"$PACKAGE_BUGREPORT\"" >>confdefs.h + +printf "%s\n" "#define PACKAGE_URL \"$PACKAGE_URL\"" >>confdefs.h + + +# Let the site file select an alternate cache file if it wants to. +# Prefer an explicitly selected file to automatically selected ones. +if test -n "$CONFIG_SITE"; then + ac_site_files="$CONFIG_SITE" +elif test "x$prefix" != xNONE; then + ac_site_files="$prefix/share/config.site $prefix/etc/config.site" +else + ac_site_files="$ac_default_prefix/share/config.site $ac_default_prefix/etc/config.site" +fi + +for ac_site_file in $ac_site_files +do + case $ac_site_file in #( + */*) : + ;; #( + *) : + ac_site_file=./$ac_site_file ;; +esac + if test -f "$ac_site_file" && test -r "$ac_site_file"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: loading site script $ac_site_file" >&5 +printf "%s\n" "$as_me: loading site script $ac_site_file" >&6;} + sed 's/^/| /' "$ac_site_file" >&5 + . "$ac_site_file" \ + || { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "failed to load site script $ac_site_file +See \`config.log' for more details" "$LINENO" 5; } + fi +done + +if test -r "$cache_file"; then + # Some versions of bash will fail to source /dev/null (special files + # actually), so we avoid doing that. DJGPP emulates it as a regular file. + if test /dev/null != "$cache_file" && test -f "$cache_file"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: loading cache $cache_file" >&5 +printf "%s\n" "$as_me: loading cache $cache_file" >&6;} + case $cache_file in + [\\/]* | ?:[\\/]* ) . "$cache_file";; + *) . "./$cache_file";; + esac + fi +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: creating cache $cache_file" >&5 +printf "%s\n" "$as_me: creating cache $cache_file" >&6;} + >$cache_file +fi + +# Test code for whether the C compiler supports C89 (global declarations) +ac_c_conftest_c89_globals=' +/* Does the compiler advertise C89 conformance? + Do not test the value of __STDC__, because some compilers set it to 0 + while being otherwise adequately conformant. */ +#if !defined __STDC__ +# error "Compiler does not advertise C89 conformance" +#endif + +#include +#include +struct stat; +/* Most of the following tests are stolen from RCS 5.7 src/conf.sh. */ +struct buf { int x; }; +struct buf * (*rcsopen) (struct buf *, struct stat *, int); +static char *e (p, i) + char **p; + int i; +{ + return p[i]; +} +static char *f (char * (*g) (char **, int), char **p, ...) +{ + char *s; + va_list v; + va_start (v,p); + s = g (p, va_arg (v,int)); + va_end (v); + return s; +} + +/* OSF 4.0 Compaq cc is some sort of almost-ANSI by default. It has + function prototypes and stuff, but not \xHH hex character constants. + These do not provoke an error unfortunately, instead are silently treated + as an "x". The following induces an error, until -std is added to get + proper ANSI mode. Curiously \x00 != x always comes out true, for an + array size at least. It is necessary to write \x00 == 0 to get something + that is true only with -std. */ +int osf4_cc_array ['\''\x00'\'' == 0 ? 1 : -1]; + +/* IBM C 6 for AIX is almost-ANSI by default, but it replaces macro parameters + inside strings and character constants. */ +#define FOO(x) '\''x'\'' +int xlc6_cc_array[FOO(a) == '\''x'\'' ? 1 : -1]; + +int test (int i, double x); +struct s1 {int (*f) (int a);}; +struct s2 {int (*f) (double a);}; +int pairnames (int, char **, int *(*)(struct buf *, struct stat *, int), + int, int);' + +# Test code for whether the C compiler supports C89 (body of main). +ac_c_conftest_c89_main=' +ok |= (argc == 0 || f (e, argv, 0) != argv[0] || f (e, argv, 1) != argv[1]); +' + +# Test code for whether the C compiler supports C99 (global declarations) +ac_c_conftest_c99_globals=' +// Does the compiler advertise C99 conformance? +#if !defined __STDC_VERSION__ || __STDC_VERSION__ < 199901L +# error "Compiler does not advertise C99 conformance" +#endif + +#include +extern int puts (const char *); +extern int printf (const char *, ...); +extern int dprintf (int, const char *, ...); +extern void *malloc (size_t); + +// Check varargs macros. These examples are taken from C99 6.10.3.5. +// dprintf is used instead of fprintf to avoid needing to declare +// FILE and stderr. +#define debug(...) dprintf (2, __VA_ARGS__) +#define showlist(...) puts (#__VA_ARGS__) +#define report(test,...) ((test) ? puts (#test) : printf (__VA_ARGS__)) +static void +test_varargs_macros (void) +{ + int x = 1234; + int y = 5678; + debug ("Flag"); + debug ("X = %d\n", x); + showlist (The first, second, and third items.); + report (x>y, "x is %d but y is %d", x, y); +} + +// Check long long types. +#define BIG64 18446744073709551615ull +#define BIG32 4294967295ul +#define BIG_OK (BIG64 / BIG32 == 4294967297ull && BIG64 % BIG32 == 0) +#if !BIG_OK + #error "your preprocessor is broken" +#endif +#if BIG_OK +#else + #error "your preprocessor is broken" +#endif +static long long int bignum = -9223372036854775807LL; +static unsigned long long int ubignum = BIG64; + +struct incomplete_array +{ + int datasize; + double data[]; +}; + +struct named_init { + int number; + const wchar_t *name; + double average; +}; + +typedef const char *ccp; + +static inline int +test_restrict (ccp restrict text) +{ + // See if C++-style comments work. + // Iterate through items via the restricted pointer. + // Also check for declarations in for loops. + for (unsigned int i = 0; *(text+i) != '\''\0'\''; ++i) + continue; + return 0; +} + +// Check varargs and va_copy. +static bool +test_varargs (const char *format, ...) +{ + va_list args; + va_start (args, format); + va_list args_copy; + va_copy (args_copy, args); + + const char *str = ""; + int number = 0; + float fnumber = 0; + + while (*format) + { + switch (*format++) + { + case '\''s'\'': // string + str = va_arg (args_copy, const char *); + break; + case '\''d'\'': // int + number = va_arg (args_copy, int); + break; + case '\''f'\'': // float + fnumber = va_arg (args_copy, double); + break; + default: + break; + } + } + va_end (args_copy); + va_end (args); + + return *str && number && fnumber; +} +' + +# Test code for whether the C compiler supports C99 (body of main). +ac_c_conftest_c99_main=' + // Check bool. + _Bool success = false; + success |= (argc != 0); + + // Check restrict. + if (test_restrict ("String literal") == 0) + success = true; + char *restrict newvar = "Another string"; + + // Check varargs. + success &= test_varargs ("s, d'\'' f .", "string", 65, 34.234); + test_varargs_macros (); + + // Check flexible array members. + struct incomplete_array *ia = + malloc (sizeof (struct incomplete_array) + (sizeof (double) * 10)); + ia->datasize = 10; + for (int i = 0; i < ia->datasize; ++i) + ia->data[i] = i * 1.234; + + // Check named initializers. + struct named_init ni = { + .number = 34, + .name = L"Test wide string", + .average = 543.34343, + }; + + ni.number = 58; + + int dynamic_array[ni.number]; + dynamic_array[0] = argv[0][0]; + dynamic_array[ni.number - 1] = 543; + + // work around unused variable warnings + ok |= (!success || bignum == 0LL || ubignum == 0uLL || newvar[0] == '\''x'\'' + || dynamic_array[ni.number - 1] != 543); +' + +# Test code for whether the C compiler supports C11 (global declarations) +ac_c_conftest_c11_globals=' +// Does the compiler advertise C11 conformance? +#if !defined __STDC_VERSION__ || __STDC_VERSION__ < 201112L +# error "Compiler does not advertise C11 conformance" +#endif + +// Check _Alignas. +char _Alignas (double) aligned_as_double; +char _Alignas (0) no_special_alignment; +extern char aligned_as_int; +char _Alignas (0) _Alignas (int) aligned_as_int; + +// Check _Alignof. +enum +{ + int_alignment = _Alignof (int), + int_array_alignment = _Alignof (int[100]), + char_alignment = _Alignof (char) +}; +_Static_assert (0 < -_Alignof (int), "_Alignof is signed"); + +// Check _Noreturn. +int _Noreturn does_not_return (void) { for (;;) continue; } + +// Check _Static_assert. +struct test_static_assert +{ + int x; + _Static_assert (sizeof (int) <= sizeof (long int), + "_Static_assert does not work in struct"); + long int y; +}; + +// Check UTF-8 literals. +#define u8 syntax error! +char const utf8_literal[] = u8"happens to be ASCII" "another string"; + +// Check duplicate typedefs. +typedef long *long_ptr; +typedef long int *long_ptr; +typedef long_ptr long_ptr; + +// Anonymous structures and unions -- taken from C11 6.7.2.1 Example 1. +struct anonymous +{ + union { + struct { int i; int j; }; + struct { int k; long int l; } w; + }; + int m; +} v1; +' + +# Test code for whether the C compiler supports C11 (body of main). +ac_c_conftest_c11_main=' + _Static_assert ((offsetof (struct anonymous, i) + == offsetof (struct anonymous, w.k)), + "Anonymous union alignment botch"); + v1.i = 2; + v1.w.k = 5; + ok |= v1.i != 5; +' + +# Test code for whether the C compiler supports C11 (complete). +ac_c_conftest_c11_program="${ac_c_conftest_c89_globals} +${ac_c_conftest_c99_globals} +${ac_c_conftest_c11_globals} + +int +main (int argc, char **argv) +{ + int ok = 0; + ${ac_c_conftest_c89_main} + ${ac_c_conftest_c99_main} + ${ac_c_conftest_c11_main} + return ok; +} +" + +# Test code for whether the C compiler supports C99 (complete). +ac_c_conftest_c99_program="${ac_c_conftest_c89_globals} +${ac_c_conftest_c99_globals} + +int +main (int argc, char **argv) +{ + int ok = 0; + ${ac_c_conftest_c89_main} + ${ac_c_conftest_c99_main} + return ok; +} +" + +# Test code for whether the C compiler supports C89 (complete). +ac_c_conftest_c89_program="${ac_c_conftest_c89_globals} + +int +main (int argc, char **argv) +{ + int ok = 0; + ${ac_c_conftest_c89_main} + return ok; +} +" + +# Test code for whether the C++ compiler supports C++98 (global declarations) +ac_cxx_conftest_cxx98_globals=' +// Does the compiler advertise C++98 conformance? +#if !defined __cplusplus || __cplusplus < 199711L +# error "Compiler does not advertise C++98 conformance" +#endif + +// These inclusions are to reject old compilers that +// lack the unsuffixed header files. +#include +#include + +// and are *not* freestanding headers in C++98. +extern void assert (int); +namespace std { + extern int strcmp (const char *, const char *); +} + +// Namespaces, exceptions, and templates were all added after "C++ 2.0". +using std::exception; +using std::strcmp; + +namespace { + +void test_exception_syntax() +{ + try { + throw "test"; + } catch (const char *s) { + // Extra parentheses suppress a warning when building autoconf itself, + // due to lint rules shared with more typical C programs. + assert (!(strcmp) (s, "test")); + } +} + +template struct test_template +{ + T const val; + explicit test_template(T t) : val(t) {} + template T add(U u) { return static_cast(u) + val; } +}; + +} // anonymous namespace +' + +# Test code for whether the C++ compiler supports C++98 (body of main) +ac_cxx_conftest_cxx98_main=' + assert (argc); + assert (! argv[0]); +{ + test_exception_syntax (); + test_template tt (2.0); + assert (tt.add (4) == 6.0); + assert (true && !false); +} +' + +# Test code for whether the C++ compiler supports C++11 (global declarations) +ac_cxx_conftest_cxx11_globals=' +// Does the compiler advertise C++ 2011 conformance? +#if !defined __cplusplus || __cplusplus < 201103L +# error "Compiler does not advertise C++11 conformance" +#endif + +namespace cxx11test +{ + constexpr int get_val() { return 20; } + + struct testinit + { + int i; + double d; + }; + + class delegate + { + public: + delegate(int n) : n(n) {} + delegate(): delegate(2354) {} + + virtual int getval() { return this->n; }; + protected: + int n; + }; + + class overridden : public delegate + { + public: + overridden(int n): delegate(n) {} + virtual int getval() override final { return this->n * 2; } + }; + + class nocopy + { + public: + nocopy(int i): i(i) {} + nocopy() = default; + nocopy(const nocopy&) = delete; + nocopy & operator=(const nocopy&) = delete; + private: + int i; + }; + + // for testing lambda expressions + template Ret eval(Fn f, Ret v) + { + return f(v); + } + + // for testing variadic templates and trailing return types + template auto sum(V first) -> V + { + return first; + } + template auto sum(V first, Args... rest) -> V + { + return first + sum(rest...); + } +} +' + +# Test code for whether the C++ compiler supports C++11 (body of main) +ac_cxx_conftest_cxx11_main=' +{ + // Test auto and decltype + auto a1 = 6538; + auto a2 = 48573953.4; + auto a3 = "String literal"; + + int total = 0; + for (auto i = a3; *i; ++i) { total += *i; } + + decltype(a2) a4 = 34895.034; +} +{ + // Test constexpr + short sa[cxx11test::get_val()] = { 0 }; +} +{ + // Test initializer lists + cxx11test::testinit il = { 4323, 435234.23544 }; +} +{ + // Test range-based for + int array[] = {9, 7, 13, 15, 4, 18, 12, 10, 5, 3, + 14, 19, 17, 8, 6, 20, 16, 2, 11, 1}; + for (auto &x : array) { x += 23; } +} +{ + // Test lambda expressions + using cxx11test::eval; + assert (eval ([](int x) { return x*2; }, 21) == 42); + double d = 2.0; + assert (eval ([&](double x) { return d += x; }, 3.0) == 5.0); + assert (d == 5.0); + assert (eval ([=](double x) mutable { return d += x; }, 4.0) == 9.0); + assert (d == 5.0); +} +{ + // Test use of variadic templates + using cxx11test::sum; + auto a = sum(1); + auto b = sum(1, 2); + auto c = sum(1.0, 2.0, 3.0); +} +{ + // Test constructor delegation + cxx11test::delegate d1; + cxx11test::delegate d2(); + cxx11test::delegate d3(45); +} +{ + // Test override and final + cxx11test::overridden o1(55464); +} +{ + // Test nullptr + char *c = nullptr; +} +{ + // Test template brackets + test_template<::test_template> v(test_template(12)); +} +{ + // Unicode literals + char const *utf8 = u8"UTF-8 string \u2500"; + char16_t const *utf16 = u"UTF-8 string \u2500"; + char32_t const *utf32 = U"UTF-32 string \u2500"; +} +' + +# Test code for whether the C compiler supports C++11 (complete). +ac_cxx_conftest_cxx11_program="${ac_cxx_conftest_cxx98_globals} +${ac_cxx_conftest_cxx11_globals} + +int +main (int argc, char **argv) +{ + int ok = 0; + ${ac_cxx_conftest_cxx98_main} + ${ac_cxx_conftest_cxx11_main} + return ok; +} +" + +# Test code for whether the C compiler supports C++98 (complete). +ac_cxx_conftest_cxx98_program="${ac_cxx_conftest_cxx98_globals} +int +main (int argc, char **argv) +{ + int ok = 0; + ${ac_cxx_conftest_cxx98_main} + return ok; +} +" + +as_fn_append ac_header_c_list " stdio.h stdio_h HAVE_STDIO_H" +as_fn_append ac_header_c_list " stdlib.h stdlib_h HAVE_STDLIB_H" +as_fn_append ac_header_c_list " string.h string_h HAVE_STRING_H" +as_fn_append ac_header_c_list " inttypes.h inttypes_h HAVE_INTTYPES_H" +as_fn_append ac_header_c_list " stdint.h stdint_h HAVE_STDINT_H" +as_fn_append ac_header_c_list " strings.h strings_h HAVE_STRINGS_H" +as_fn_append ac_header_c_list " sys/stat.h sys_stat_h HAVE_SYS_STAT_H" +as_fn_append ac_header_c_list " sys/types.h sys_types_h HAVE_SYS_TYPES_H" +as_fn_append ac_header_c_list " unistd.h unistd_h HAVE_UNISTD_H" +as_fn_append ac_header_c_list " sys/param.h sys_param_h HAVE_SYS_PARAM_H" +as_fn_append ac_func_c_list " getpagesize HAVE_GETPAGESIZE" + +# Auxiliary files required by this configure script. +ac_aux_files="ltmain.sh compile ar-lib missing install-sh config.guess config.sub" + +# Locations in which to look for auxiliary files. +ac_aux_dir_candidates="${srcdir}/build-aux" + +# Search for a directory containing all of the required auxiliary files, +# $ac_aux_files, from the $PATH-style list $ac_aux_dir_candidates. +# If we don't find one directory that contains all the files we need, +# we report the set of missing files from the *first* directory in +# $ac_aux_dir_candidates and give up. +ac_missing_aux_files="" +ac_first_candidate=: +printf "%s\n" "$as_me:${as_lineno-$LINENO}: looking for aux files: $ac_aux_files" >&5 +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +as_found=false +for as_dir in $ac_aux_dir_candidates +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + as_found=: + + printf "%s\n" "$as_me:${as_lineno-$LINENO}: trying $as_dir" >&5 + ac_aux_dir_found=yes + ac_install_sh= + for ac_aux in $ac_aux_files + do + # As a special case, if "install-sh" is required, that requirement + # can be satisfied by any of "install-sh", "install.sh", or "shtool", + # and $ac_install_sh is set appropriately for whichever one is found. + if test x"$ac_aux" = x"install-sh" + then + if test -f "${as_dir}install-sh"; then + printf "%s\n" "$as_me:${as_lineno-$LINENO}: ${as_dir}install-sh found" >&5 + ac_install_sh="${as_dir}install-sh -c" + elif test -f "${as_dir}install.sh"; then + printf "%s\n" "$as_me:${as_lineno-$LINENO}: ${as_dir}install.sh found" >&5 + ac_install_sh="${as_dir}install.sh -c" + elif test -f "${as_dir}shtool"; then + printf "%s\n" "$as_me:${as_lineno-$LINENO}: ${as_dir}shtool found" >&5 + ac_install_sh="${as_dir}shtool install -c" + else + ac_aux_dir_found=no + if $ac_first_candidate; then + ac_missing_aux_files="${ac_missing_aux_files} install-sh" + else + break + fi + fi + else + if test -f "${as_dir}${ac_aux}"; then + printf "%s\n" "$as_me:${as_lineno-$LINENO}: ${as_dir}${ac_aux} found" >&5 + else + ac_aux_dir_found=no + if $ac_first_candidate; then + ac_missing_aux_files="${ac_missing_aux_files} ${ac_aux}" + else + break + fi + fi + fi + done + if test "$ac_aux_dir_found" = yes; then + ac_aux_dir="$as_dir" + break + fi + ac_first_candidate=false + + as_found=false +done +IFS=$as_save_IFS +if $as_found +then : + +else $as_nop + as_fn_error $? "cannot find required auxiliary files:$ac_missing_aux_files" "$LINENO" 5 +fi + + +# These three variables are undocumented and unsupported, +# and are intended to be withdrawn in a future Autoconf release. +# They can cause serious problems if a builder's source tree is in a directory +# whose full name contains unusual characters. +if test -f "${ac_aux_dir}config.guess"; then + ac_config_guess="$SHELL ${ac_aux_dir}config.guess" +fi +if test -f "${ac_aux_dir}config.sub"; then + ac_config_sub="$SHELL ${ac_aux_dir}config.sub" +fi +if test -f "$ac_aux_dir/configure"; then + ac_configure="$SHELL ${ac_aux_dir}configure" +fi + +# Check that the precious variables saved in the cache have kept the same +# value. +ac_cache_corrupted=false +for ac_var in $ac_precious_vars; do + eval ac_old_set=\$ac_cv_env_${ac_var}_set + eval ac_new_set=\$ac_env_${ac_var}_set + eval ac_old_val=\$ac_cv_env_${ac_var}_value + eval ac_new_val=\$ac_env_${ac_var}_value + case $ac_old_set,$ac_new_set in + set,) + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&5 +printf "%s\n" "$as_me: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&2;} + ac_cache_corrupted=: ;; + ,set) + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was not set in the previous run" >&5 +printf "%s\n" "$as_me: error: \`$ac_var' was not set in the previous run" >&2;} + ac_cache_corrupted=: ;; + ,);; + *) + if test "x$ac_old_val" != "x$ac_new_val"; then + # differences in whitespace do not lead to failure. + ac_old_val_w=`echo x $ac_old_val` + ac_new_val_w=`echo x $ac_new_val` + if test "$ac_old_val_w" != "$ac_new_val_w"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' has changed since the previous run:" >&5 +printf "%s\n" "$as_me: error: \`$ac_var' has changed since the previous run:" >&2;} + ac_cache_corrupted=: + else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&5 +printf "%s\n" "$as_me: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&2;} + eval $ac_var=\$ac_old_val + fi + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: former value: \`$ac_old_val'" >&5 +printf "%s\n" "$as_me: former value: \`$ac_old_val'" >&2;} + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: current value: \`$ac_new_val'" >&5 +printf "%s\n" "$as_me: current value: \`$ac_new_val'" >&2;} + fi;; + esac + # Pass precious variables to config.status. + if test "$ac_new_set" = set; then + case $ac_new_val in + *\'*) ac_arg=$ac_var=`printf "%s\n" "$ac_new_val" | sed "s/'/'\\\\\\\\''/g"` ;; + *) ac_arg=$ac_var=$ac_new_val ;; + esac + case " $ac_configure_args " in + *" '$ac_arg' "*) ;; # Avoid dups. Use of quotes ensures accuracy. + *) as_fn_append ac_configure_args " '$ac_arg'" ;; + esac + fi +done +if $ac_cache_corrupted; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: changes in the environment can compromise the build" >&5 +printf "%s\n" "$as_me: error: changes in the environment can compromise the build" >&2;} + as_fn_error $? "run \`${MAKE-make} distclean' and/or \`rm $cache_file' + and start over" "$LINENO" 5 +fi +## -------------------- ## +## Main body of script. ## +## -------------------- ## + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + + + +# libtool doesn't actually properly manage a space in the workdir +case `pwd` in + *[\\\"\#\$\&\'\`$am_lf\ \ ]*) + as_fn_error $? "unsafe absolute working directory name" "$LINENO" 5;; +esac + + +STARPU_MAJOR_VERSION="`echo $PACKAGE_VERSION | cut -d . -f 1`" +STARPU_MINOR_VERSION="`echo $PACKAGE_VERSION | cut -d . -f 2`" +STARPU_RELEASE_VERSION="`echo $PACKAGE_VERSION | cut -d . -f 3`" +STARPU_RELEASE_VERSION="`echo $PACKAGE_VERSION | cut -d . -f 3| sed 's/rc.*//'`" + + + + + +printf "%s\n" "#define STARPU_MAJOR_VERSION $STARPU_MAJOR_VERSION" >>confdefs.h + + +printf "%s\n" "#define STARPU_MINOR_VERSION $STARPU_MINOR_VERSION" >>confdefs.h + + +printf "%s\n" "#define STARPU_RELEASE_VERSION $STARPU_RELEASE_VERSION" >>confdefs.h + + +. "$srcdir/STARPU-VERSION" + + + + + + + + + + + + + + + + + + + + + + + + + + # Make sure we can run config.sub. +$SHELL "${ac_aux_dir}config.sub" sun4 >/dev/null 2>&1 || + as_fn_error $? "cannot run $SHELL ${ac_aux_dir}config.sub" "$LINENO" 5 + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking build system type" >&5 +printf %s "checking build system type... " >&6; } +if test ${ac_cv_build+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_build_alias=$build_alias +test "x$ac_build_alias" = x && + ac_build_alias=`$SHELL "${ac_aux_dir}config.guess"` +test "x$ac_build_alias" = x && + as_fn_error $? "cannot guess build type; you must specify one" "$LINENO" 5 +ac_cv_build=`$SHELL "${ac_aux_dir}config.sub" $ac_build_alias` || + as_fn_error $? "$SHELL ${ac_aux_dir}config.sub $ac_build_alias failed" "$LINENO" 5 + +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_build" >&5 +printf "%s\n" "$ac_cv_build" >&6; } +case $ac_cv_build in +*-*-*) ;; +*) as_fn_error $? "invalid value of canonical build" "$LINENO" 5;; +esac +build=$ac_cv_build +ac_save_IFS=$IFS; IFS='-' +set x $ac_cv_build +shift +build_cpu=$1 +build_vendor=$2 +shift; shift +# Remember, the first character of IFS is used to create $*, +# except with old shells: +build_os=$* +IFS=$ac_save_IFS +case $build_os in *\ *) build_os=`echo "$build_os" | sed 's/ /-/g'`;; esac + + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking host system type" >&5 +printf %s "checking host system type... " >&6; } +if test ${ac_cv_host+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test "x$host_alias" = x; then + ac_cv_host=$ac_cv_build +else + ac_cv_host=`$SHELL "${ac_aux_dir}config.sub" $host_alias` || + as_fn_error $? "$SHELL ${ac_aux_dir}config.sub $host_alias failed" "$LINENO" 5 +fi + +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_host" >&5 +printf "%s\n" "$ac_cv_host" >&6; } +case $ac_cv_host in +*-*-*) ;; +*) as_fn_error $? "invalid value of canonical host" "$LINENO" 5;; +esac +host=$ac_cv_host +ac_save_IFS=$IFS; IFS='-' +set x $ac_cv_host +shift +host_cpu=$1 +host_vendor=$2 +shift; shift +# Remember, the first character of IFS is used to create $*, +# except with old shells: +host_os=$* +IFS=$ac_save_IFS +case $host_os in *\ *) host_os=`echo "$host_os" | sed 's/ /-/g'`;; esac + + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking target system type" >&5 +printf %s "checking target system type... " >&6; } +if test ${ac_cv_target+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test "x$target_alias" = x; then + ac_cv_target=$ac_cv_host +else + ac_cv_target=`$SHELL "${ac_aux_dir}config.sub" $target_alias` || + as_fn_error $? "$SHELL ${ac_aux_dir}config.sub $target_alias failed" "$LINENO" 5 +fi + +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_target" >&5 +printf "%s\n" "$ac_cv_target" >&6; } +case $ac_cv_target in +*-*-*) ;; +*) as_fn_error $? "invalid value of canonical target" "$LINENO" 5;; +esac +target=$ac_cv_target +ac_save_IFS=$IFS; IFS='-' +set x $ac_cv_target +shift +target_cpu=$1 +target_vendor=$2 +shift; shift +# Remember, the first character of IFS is used to create $*, +# except with old shells: +target_os=$* +IFS=$ac_save_IFS +case $target_os in *\ *) target_os=`echo "$target_os" | sed 's/ /-/g'`;; esac + + +# The aliases save the names the user supplied, while $host etc. +# will get canonicalized. +test -n "$target_alias" && + test "$program_prefix$program_suffix$program_transform_name" = \ + NONENONEs,x,x, && + program_prefix=${target_alias}- + + +am__api_version='1.16' + + + # Find a good install program. We prefer a C program (faster), +# so one script is as good as another. But avoid the broken or +# incompatible versions: +# SysV /etc/install, /usr/sbin/install +# SunOS /usr/etc/install +# IRIX /sbin/install +# AIX /bin/install +# AmigaOS /C/install, which installs bootblocks on floppy discs +# AIX 4 /usr/bin/installbsd, which doesn't work without a -g flag +# AFS /usr/afsws/bin/install, which mishandles nonexistent args +# SVR4 /usr/ucb/install, which tries to use the nonexistent group "staff" +# OS/2's system install, which has a completely different semantic +# ./install, which can be erroneously created by make from ./install.sh. +# Reject install programs that cannot install multiple files. +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for a BSD-compatible install" >&5 +printf %s "checking for a BSD-compatible install... " >&6; } +if test -z "$INSTALL"; then +if test ${ac_cv_path_install+y} +then : + printf %s "(cached) " >&6 +else $as_nop + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + # Account for fact that we put trailing slashes in our PATH walk. +case $as_dir in #(( + ./ | /[cC]/* | \ + /etc/* | /usr/sbin/* | /usr/etc/* | /sbin/* | /usr/afsws/bin/* | \ + ?:[\\/]os2[\\/]install[\\/]* | ?:[\\/]OS2[\\/]INSTALL[\\/]* | \ + /usr/ucb/* ) ;; + *) + # OSF1 and SCO ODT 3.0 have their own names for install. + # Don't use installbsd from OSF since it installs stuff as root + # by default. + for ac_prog in ginstall scoinst install; do + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_prog$ac_exec_ext"; then + if test $ac_prog = install && + grep dspmsg "$as_dir$ac_prog$ac_exec_ext" >/dev/null 2>&1; then + # AIX install. It has an incompatible calling convention. + : + elif test $ac_prog = install && + grep pwplus "$as_dir$ac_prog$ac_exec_ext" >/dev/null 2>&1; then + # program-specific install script used by HP pwplus--don't use. + : + else + rm -rf conftest.one conftest.two conftest.dir + echo one > conftest.one + echo two > conftest.two + mkdir conftest.dir + if "$as_dir$ac_prog$ac_exec_ext" -c conftest.one conftest.two "`pwd`/conftest.dir/" && + test -s conftest.one && test -s conftest.two && + test -s conftest.dir/conftest.one && + test -s conftest.dir/conftest.two + then + ac_cv_path_install="$as_dir$ac_prog$ac_exec_ext -c" + break 3 + fi + fi + fi + done + done + ;; +esac + + done +IFS=$as_save_IFS + +rm -rf conftest.one conftest.two conftest.dir + +fi + if test ${ac_cv_path_install+y}; then + INSTALL=$ac_cv_path_install + else + # As a last resort, use the slow shell script. Don't cache a + # value for INSTALL within a source directory, because that will + # break other packages using the cache if that directory is + # removed, or if the value is a relative name. + INSTALL=$ac_install_sh + fi +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $INSTALL" >&5 +printf "%s\n" "$INSTALL" >&6; } + +# Use test -z because SunOS4 sh mishandles braces in ${var-val}. +# It thinks the first close brace ends the variable substitution. +test -z "$INSTALL_PROGRAM" && INSTALL_PROGRAM='${INSTALL}' + +test -z "$INSTALL_SCRIPT" && INSTALL_SCRIPT='${INSTALL}' + +test -z "$INSTALL_DATA" && INSTALL_DATA='${INSTALL} -m 644' + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether build environment is sane" >&5 +printf %s "checking whether build environment is sane... " >&6; } +# Reject unsafe characters in $srcdir or the absolute working directory +# name. Accept space and tab only in the latter. +am_lf=' +' +case `pwd` in + *[\\\"\#\$\&\'\`$am_lf]*) + as_fn_error $? "unsafe absolute working directory name" "$LINENO" 5;; +esac +case $srcdir in + *[\\\"\#\$\&\'\`$am_lf\ \ ]*) + as_fn_error $? "unsafe srcdir value: '$srcdir'" "$LINENO" 5;; +esac + +# Do 'set' in a subshell so we don't clobber the current shell's +# arguments. Must try -L first in case configure is actually a +# symlink; some systems play weird games with the mod time of symlinks +# (eg FreeBSD returns the mod time of the symlink's containing +# directory). +if ( + am_has_slept=no + for am_try in 1 2; do + echo "timestamp, slept: $am_has_slept" > conftest.file + set X `ls -Lt "$srcdir/configure" conftest.file 2> /dev/null` + if test "$*" = "X"; then + # -L didn't work. + set X `ls -t "$srcdir/configure" conftest.file` + fi + if test "$*" != "X $srcdir/configure conftest.file" \ + && test "$*" != "X conftest.file $srcdir/configure"; then + + # If neither matched, then we have a broken ls. This can happen + # if, for instance, CONFIG_SHELL is bash and it inherits a + # broken ls alias from the environment. This has actually + # happened. Such a system could not be considered "sane". + as_fn_error $? "ls -t appears to fail. Make sure there is not a broken + alias in your environment" "$LINENO" 5 + fi + if test "$2" = conftest.file || test $am_try -eq 2; then + break + fi + # Just in case. + sleep 1 + am_has_slept=yes + done + test "$2" = conftest.file + ) +then + # Ok. + : +else + as_fn_error $? "newly created file is older than distributed files! +Check your system clock" "$LINENO" 5 +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } +# If we didn't sleep, we still need to ensure time stamps of config.status and +# generated files are strictly newer. +am_sleep_pid= +if grep 'slept: no' conftest.file >/dev/null 2>&1; then + ( sleep 1 ) & + am_sleep_pid=$! +fi + +rm -f conftest.file + +test "$program_prefix" != NONE && + program_transform_name="s&^&$program_prefix&;$program_transform_name" +# Use a double $ so make ignores it. +test "$program_suffix" != NONE && + program_transform_name="s&\$&$program_suffix&;$program_transform_name" +# Double any \ or $. +# By default was `s,x,x', remove it if useless. +ac_script='s/[\\$]/&&/g;s/;s,x,x,$//' +program_transform_name=`printf "%s\n" "$program_transform_name" | sed "$ac_script"` + + +# Expand $ac_aux_dir to an absolute path. +am_aux_dir=`cd "$ac_aux_dir" && pwd` + + + if test x"${MISSING+set}" != xset; then + MISSING="\${SHELL} '$am_aux_dir/missing'" +fi +# Use eval to expand $SHELL +if eval "$MISSING --is-lightweight"; then + am_missing_run="$MISSING " +else + am_missing_run= + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: 'missing' script is too old or missing" >&5 +printf "%s\n" "$as_me: WARNING: 'missing' script is too old or missing" >&2;} +fi + +if test x"${install_sh+set}" != xset; then + case $am_aux_dir in + *\ * | *\ *) + install_sh="\${SHELL} '$am_aux_dir/install-sh'" ;; + *) + install_sh="\${SHELL} $am_aux_dir/install-sh" + esac +fi + +# Installed binaries are usually stripped using 'strip' when the user +# run "make install-strip". However 'strip' might not be the right +# tool to use in cross-compilation environments, therefore Automake +# will honor the 'STRIP' environment variable to overrule this program. +if test "$cross_compiling" != no; then + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}strip", so it can be a program name with args. +set dummy ${ac_tool_prefix}strip; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_STRIP+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$STRIP"; then + ac_cv_prog_STRIP="$STRIP" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_STRIP="${ac_tool_prefix}strip" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +STRIP=$ac_cv_prog_STRIP +if test -n "$STRIP"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $STRIP" >&5 +printf "%s\n" "$STRIP" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_STRIP"; then + ac_ct_STRIP=$STRIP + # Extract the first word of "strip", so it can be a program name with args. +set dummy strip; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ac_ct_STRIP+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$ac_ct_STRIP"; then + ac_cv_prog_ac_ct_STRIP="$ac_ct_STRIP" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_STRIP="strip" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_STRIP=$ac_cv_prog_ac_ct_STRIP +if test -n "$ac_ct_STRIP"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_STRIP" >&5 +printf "%s\n" "$ac_ct_STRIP" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + if test "x$ac_ct_STRIP" = x; then + STRIP=":" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + STRIP=$ac_ct_STRIP + fi +else + STRIP="$ac_cv_prog_STRIP" +fi + +fi +INSTALL_STRIP_PROGRAM="\$(install_sh) -c -s" + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for a race-free mkdir -p" >&5 +printf %s "checking for a race-free mkdir -p... " >&6; } +if test -z "$MKDIR_P"; then + if test ${ac_cv_path_mkdir+y} +then : + printf %s "(cached) " >&6 +else $as_nop + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH$PATH_SEPARATOR/opt/sfw/bin +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_prog in mkdir gmkdir; do + for ac_exec_ext in '' $ac_executable_extensions; do + as_fn_executable_p "$as_dir$ac_prog$ac_exec_ext" || continue + case `"$as_dir$ac_prog$ac_exec_ext" --version 2>&1` in #( + 'mkdir ('*'coreutils) '* | \ + 'BusyBox '* | \ + 'mkdir (fileutils) '4.1*) + ac_cv_path_mkdir=$as_dir$ac_prog$ac_exec_ext + break 3;; + esac + done + done + done +IFS=$as_save_IFS + +fi + + test -d ./--version && rmdir ./--version + if test ${ac_cv_path_mkdir+y}; then + MKDIR_P="$ac_cv_path_mkdir -p" + else + # As a last resort, use the slow shell script. Don't cache a + # value for MKDIR_P within a source directory, because that will + # break other packages using the cache if that directory is + # removed, or if the value is a relative name. + MKDIR_P="$ac_install_sh -d" + fi +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $MKDIR_P" >&5 +printf "%s\n" "$MKDIR_P" >&6; } + +for ac_prog in gawk mawk nawk awk +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_AWK+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$AWK"; then + ac_cv_prog_AWK="$AWK" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_AWK="$ac_prog" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +AWK=$ac_cv_prog_AWK +if test -n "$AWK"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $AWK" >&5 +printf "%s\n" "$AWK" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + + test -n "$AWK" && break +done + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether ${MAKE-make} sets \$(MAKE)" >&5 +printf %s "checking whether ${MAKE-make} sets \$(MAKE)... " >&6; } +set x ${MAKE-make} +ac_make=`printf "%s\n" "$2" | sed 's/+/p/g; s/[^a-zA-Z0-9_]/_/g'` +if eval test \${ac_cv_prog_make_${ac_make}_set+y} +then : + printf %s "(cached) " >&6 +else $as_nop + cat >conftest.make <<\_ACEOF +SHELL = /bin/sh +all: + @echo '@@@%%%=$(MAKE)=@@@%%%' +_ACEOF +# GNU make sometimes prints "make[1]: Entering ...", which would confuse us. +case `${MAKE-make} -f conftest.make 2>/dev/null` in + *@@@%%%=?*=@@@%%%*) + eval ac_cv_prog_make_${ac_make}_set=yes;; + *) + eval ac_cv_prog_make_${ac_make}_set=no;; +esac +rm -f conftest.make +fi +if eval test \$ac_cv_prog_make_${ac_make}_set = yes; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + SET_MAKE= +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + SET_MAKE="MAKE=${MAKE-make}" +fi + +rm -rf .tst 2>/dev/null +mkdir .tst 2>/dev/null +if test -d .tst; then + am__leading_dot=. +else + am__leading_dot=_ +fi +rmdir .tst 2>/dev/null + +# Check whether --enable-silent-rules was given. +if test ${enable_silent_rules+y} +then : + enableval=$enable_silent_rules; +fi + +case $enable_silent_rules in # ((( + yes) AM_DEFAULT_VERBOSITY=0;; + no) AM_DEFAULT_VERBOSITY=1;; + *) AM_DEFAULT_VERBOSITY=1;; +esac +am_make=${MAKE-make} +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether $am_make supports nested variables" >&5 +printf %s "checking whether $am_make supports nested variables... " >&6; } +if test ${am_cv_make_support_nested_variables+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if printf "%s\n" 'TRUE=$(BAR$(V)) +BAR0=false +BAR1=true +V=1 +am__doit: + @$(TRUE) +.PHONY: am__doit' | $am_make -f - >/dev/null 2>&1; then + am_cv_make_support_nested_variables=yes +else + am_cv_make_support_nested_variables=no +fi +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $am_cv_make_support_nested_variables" >&5 +printf "%s\n" "$am_cv_make_support_nested_variables" >&6; } +if test $am_cv_make_support_nested_variables = yes; then + AM_V='$(V)' + AM_DEFAULT_V='$(AM_DEFAULT_VERBOSITY)' +else + AM_V=$AM_DEFAULT_VERBOSITY + AM_DEFAULT_V=$AM_DEFAULT_VERBOSITY +fi +AM_BACKSLASH='\' + +if test "`cd $srcdir && pwd`" != "`pwd`"; then + # Use -I$(srcdir) only when $(srcdir) != ., so that make's output + # is not polluted with repeated "-I." + am__isrc=' -I$(srcdir)' + # test to see if srcdir already configured + if test -f $srcdir/config.status; then + as_fn_error $? "source directory already configured; run \"make distclean\" there first" "$LINENO" 5 + fi +fi + +# test whether we have cygpath +if test -z "$CYGPATH_W"; then + if (cygpath --version) >/dev/null 2>/dev/null; then + CYGPATH_W='cygpath -w' + else + CYGPATH_W=echo + fi +fi + + +# Define the identity of the package. + PACKAGE='starpu' + VERSION='1.4.10' + + +printf "%s\n" "#define PACKAGE \"$PACKAGE\"" >>confdefs.h + + +printf "%s\n" "#define VERSION \"$VERSION\"" >>confdefs.h + +# Some tools Automake needs. + +ACLOCAL=${ACLOCAL-"${am_missing_run}aclocal-${am__api_version}"} + + +AUTOCONF=${AUTOCONF-"${am_missing_run}autoconf"} + + +AUTOMAKE=${AUTOMAKE-"${am_missing_run}automake-${am__api_version}"} + + +AUTOHEADER=${AUTOHEADER-"${am_missing_run}autoheader"} + + +MAKEINFO=${MAKEINFO-"${am_missing_run}makeinfo"} + +# For better backward compatibility. To be removed once Automake 1.9.x +# dies out for good. For more background, see: +# +# +mkdir_p='$(MKDIR_P)' + +# We need awk for the "check" target (and possibly the TAP driver). The +# system "awk" is bad on some platforms. +# Always define AMTAR for backward compatibility. Yes, it's still used +# in the wild :-( We should find a proper way to deprecate it ... +AMTAR='$${TAR-tar}' + + +# We'll loop over all known methods to create a tar archive until one works. +_am_tools='gnutar pax cpio none' + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking how to create a pax tar archive" >&5 +printf %s "checking how to create a pax tar archive... " >&6; } + + # Go ahead even if we have the value already cached. We do so because we + # need to set the values for the 'am__tar' and 'am__untar' variables. + _am_tools=${am_cv_prog_tar_pax-$_am_tools} + + for _am_tool in $_am_tools; do + case $_am_tool in + gnutar) + for _am_tar in tar gnutar gtar; do + { echo "$as_me:$LINENO: $_am_tar --version" >&5 + ($_am_tar --version) >&5 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && break + done + am__tar="$_am_tar --format=posix -chf - "'"$$tardir"' + am__tar_="$_am_tar --format=posix -chf - "'"$tardir"' + am__untar="$_am_tar -xf -" + ;; + plaintar) + # Must skip GNU tar: if it does not support --format= it doesn't create + # ustar tarball either. + (tar --version) >/dev/null 2>&1 && continue + am__tar='tar chf - "$$tardir"' + am__tar_='tar chf - "$tardir"' + am__untar='tar xf -' + ;; + pax) + am__tar='pax -L -x pax -w "$$tardir"' + am__tar_='pax -L -x pax -w "$tardir"' + am__untar='pax -r' + ;; + cpio) + am__tar='find "$$tardir" -print | cpio -o -H pax -L' + am__tar_='find "$tardir" -print | cpio -o -H pax -L' + am__untar='cpio -i -H pax -d' + ;; + none) + am__tar=false + am__tar_=false + am__untar=false + ;; + esac + + # If the value was cached, stop now. We just wanted to have am__tar + # and am__untar set. + test -n "${am_cv_prog_tar_pax}" && break + + # tar/untar a dummy directory, and stop if the command works. + rm -rf conftest.dir + mkdir conftest.dir + echo GrepMe > conftest.dir/file + { echo "$as_me:$LINENO: tardir=conftest.dir && eval $am__tar_ >conftest.tar" >&5 + (tardir=conftest.dir && eval $am__tar_ >conftest.tar) >&5 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } + rm -rf conftest.dir + if test -s conftest.tar; then + { echo "$as_me:$LINENO: $am__untar &5 + ($am__untar &5 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } + { echo "$as_me:$LINENO: cat conftest.dir/file" >&5 + (cat conftest.dir/file) >&5 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } + grep GrepMe conftest.dir/file >/dev/null 2>&1 && break + fi + done + rm -rf conftest.dir + + if test ${am_cv_prog_tar_pax+y} +then : + printf %s "(cached) " >&6 +else $as_nop + am_cv_prog_tar_pax=$_am_tool +fi + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $am_cv_prog_tar_pax" >&5 +printf "%s\n" "$am_cv_prog_tar_pax" >&6; } + + + + + +# Variables for tags utilities; see am/tags.am +if test -z "$CTAGS"; then + CTAGS=ctags +fi + +if test -z "$ETAGS"; then + ETAGS=etags +fi + +if test -z "$CSCOPE"; then + CSCOPE=cscope +fi + + + +# POSIX will say in a future version that running "rm -f" with no argument +# is OK; and we want to be able to make that assumption in our Makefile +# recipes. So use an aggressive probe to check that the usage we want is +# actually supported "in the wild" to an acceptable degree. +# See automake bug#10828. +# To make any issue more visible, cause the running configure to be aborted +# by default if the 'rm' program in use doesn't match our expectations; the +# user can still override this though. +if rm -f && rm -fr && rm -rf; then : OK; else + cat >&2 <<'END' +Oops! + +Your 'rm' program seems unable to run without file operands specified +on the command line, even when the '-f' option is present. This is contrary +to the behaviour of most rm programs out there, and not conforming with +the upcoming POSIX standard: + +Please tell bug-automake@gnu.org about your system, including the value +of your $PATH and any error possibly output before this message. This +can help us improve future automake versions. + +END + if test x"$ACCEPT_INFERIOR_RM_PROGRAM" = x"yes"; then + echo 'Configuration will proceed anyway, since you have set the' >&2 + echo 'ACCEPT_INFERIOR_RM_PROGRAM variable to "yes"' >&2 + echo >&2 + else + cat >&2 <<'END' +Aborting the configuration process, to ensure you take notice of the issue. + +You can download and install GNU coreutils to get an 'rm' implementation +that behaves properly: . + +If you want to complete the configuration process using your problematic +'rm' anyway, export the environment variable ACCEPT_INFERIOR_RM_PROGRAM +to "yes", and re-run configure. + +END + as_fn_error $? "Your 'rm' program is bad, sorry." "$LINENO" 5 + fi +fi + + +# Check whether --enable-silent-rules was given. +if test ${enable_silent_rules+y} +then : + enableval=$enable_silent_rules; +fi + +case $enable_silent_rules in # ((( + yes) AM_DEFAULT_VERBOSITY=0;; + no) AM_DEFAULT_VERBOSITY=1;; + *) AM_DEFAULT_VERBOSITY=0;; +esac +am_make=${MAKE-make} +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether $am_make supports nested variables" >&5 +printf %s "checking whether $am_make supports nested variables... " >&6; } +if test ${am_cv_make_support_nested_variables+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if printf "%s\n" 'TRUE=$(BAR$(V)) +BAR0=false +BAR1=true +V=1 +am__doit: + @$(TRUE) +.PHONY: am__doit' | $am_make -f - >/dev/null 2>&1; then + am_cv_make_support_nested_variables=yes +else + am_cv_make_support_nested_variables=no +fi +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $am_cv_make_support_nested_variables" >&5 +printf "%s\n" "$am_cv_make_support_nested_variables" >&6; } +if test $am_cv_make_support_nested_variables = yes; then + AM_V='$(V)' + AM_DEFAULT_V='$(AM_DEFAULT_VERBOSITY)' +else + AM_V=$AM_DEFAULT_VERBOSITY + AM_DEFAULT_V=$AM_DEFAULT_VERBOSITY +fi +AM_BACKSLASH='\' + + + + + + + + + + + + + +DEPDIR="${am__leading_dot}deps" + +ac_config_commands="$ac_config_commands depfiles" + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether ${MAKE-make} supports the include directive" >&5 +printf %s "checking whether ${MAKE-make} supports the include directive... " >&6; } +cat > confinc.mk << 'END' +am__doit: + @echo this is the am__doit target >confinc.out +.PHONY: am__doit +END +am__include="#" +am__quote= +# BSD make does it like this. +echo '.include "confinc.mk" # ignored' > confmf.BSD +# Other make implementations (GNU, Solaris 10, AIX) do it like this. +echo 'include confinc.mk # ignored' > confmf.GNU +_am_result=no +for s in GNU BSD; do + { echo "$as_me:$LINENO: ${MAKE-make} -f confmf.$s && cat confinc.out" >&5 + (${MAKE-make} -f confmf.$s && cat confinc.out) >&5 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } + case $?:`cat confinc.out 2>/dev/null` in #( + '0:this is the am__doit target') : + case $s in #( + BSD) : + am__include='.include' am__quote='"' ;; #( + *) : + am__include='include' am__quote='' ;; +esac ;; #( + *) : + ;; +esac + if test "$am__include" != "#"; then + _am_result="yes ($s style)" + break + fi +done +rm -f confinc.* confmf.* +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: ${_am_result}" >&5 +printf "%s\n" "${_am_result}" >&6; } + +# Check whether --enable-dependency-tracking was given. +if test ${enable_dependency_tracking+y} +then : + enableval=$enable_dependency_tracking; +fi + +if test "x$enable_dependency_tracking" != xno; then + am_depcomp="$ac_aux_dir/depcomp" + AMDEPBACKSLASH='\' + am__nodep='_no' +fi + if test "x$enable_dependency_tracking" != xno; then + AMDEP_TRUE= + AMDEP_FALSE='#' +else + AMDEP_TRUE='#' + AMDEP_FALSE= +fi + + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}gcc", so it can be a program name with args. +set dummy ${ac_tool_prefix}gcc; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_CC+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_CC="${ac_tool_prefix}gcc" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +printf "%s\n" "$CC" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_CC"; then + ac_ct_CC=$CC + # Extract the first word of "gcc", so it can be a program name with args. +set dummy gcc; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ac_ct_CC+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$ac_ct_CC"; then + ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_CC="gcc" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_CC=$ac_cv_prog_ac_ct_CC +if test -n "$ac_ct_CC"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 +printf "%s\n" "$ac_ct_CC" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + if test "x$ac_ct_CC" = x; then + CC="" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + CC=$ac_ct_CC + fi +else + CC="$ac_cv_prog_CC" +fi + +if test -z "$CC"; then + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}cc", so it can be a program name with args. +set dummy ${ac_tool_prefix}cc; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_CC+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_CC="${ac_tool_prefix}cc" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +printf "%s\n" "$CC" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + + fi +fi +if test -z "$CC"; then + # Extract the first word of "cc", so it can be a program name with args. +set dummy cc; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_CC+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else + ac_prog_rejected=no +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + if test "$as_dir$ac_word$ac_exec_ext" = "/usr/ucb/cc"; then + ac_prog_rejected=yes + continue + fi + ac_cv_prog_CC="cc" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +if test $ac_prog_rejected = yes; then + # We found a bogon in the path, so make sure we never use it. + set dummy $ac_cv_prog_CC + shift + if test $# != 0; then + # We chose a different compiler from the bogus one. + # However, it has the same basename, so the bogon will be chosen + # first if we set CC to just the basename; use the full file name. + shift + ac_cv_prog_CC="$as_dir$ac_word${1+' '}$@" + fi +fi +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +printf "%s\n" "$CC" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + +fi +if test -z "$CC"; then + if test -n "$ac_tool_prefix"; then + for ac_prog in cl.exe + do + # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. +set dummy $ac_tool_prefix$ac_prog; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_CC+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_CC="$ac_tool_prefix$ac_prog" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +printf "%s\n" "$CC" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + + test -n "$CC" && break + done +fi +if test -z "$CC"; then + ac_ct_CC=$CC + for ac_prog in cl.exe +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ac_ct_CC+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$ac_ct_CC"; then + ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_CC="$ac_prog" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_CC=$ac_cv_prog_ac_ct_CC +if test -n "$ac_ct_CC"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 +printf "%s\n" "$ac_ct_CC" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + + test -n "$ac_ct_CC" && break +done + + if test "x$ac_ct_CC" = x; then + CC="" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + CC=$ac_ct_CC + fi +fi + +fi +if test -z "$CC"; then + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}clang", so it can be a program name with args. +set dummy ${ac_tool_prefix}clang; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_CC+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_CC="${ac_tool_prefix}clang" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +printf "%s\n" "$CC" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_CC"; then + ac_ct_CC=$CC + # Extract the first word of "clang", so it can be a program name with args. +set dummy clang; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ac_ct_CC+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$ac_ct_CC"; then + ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_CC="clang" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_CC=$ac_cv_prog_ac_ct_CC +if test -n "$ac_ct_CC"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 +printf "%s\n" "$ac_ct_CC" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + if test "x$ac_ct_CC" = x; then + CC="" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + CC=$ac_ct_CC + fi +else + CC="$ac_cv_prog_CC" +fi + +fi + + +test -z "$CC" && { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "no acceptable C compiler found in \$PATH +See \`config.log' for more details" "$LINENO" 5; } + +# Provide some information about the compiler. +printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for C compiler version" >&5 +set X $ac_compile +ac_compiler=$2 +for ac_option in --version -v -V -qversion -version; do + { { ac_try="$ac_compiler $ac_option >&5" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +printf "%s\n" "$ac_try_echo"; } >&5 + (eval "$ac_compiler $ac_option >&5") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + sed '10a\ +... rest of stderr output deleted ... + 10q' conftest.err >conftest.er1 + cat conftest.er1 >&5 + fi + rm -f conftest.er1 conftest.err + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } +done + +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +ac_clean_files_save=$ac_clean_files +ac_clean_files="$ac_clean_files a.out a.out.dSYM a.exe b.out" +# Try to create an executable without -o first, disregard a.out. +# It will help us diagnose broken compilers, and finding out an intuition +# of exeext. +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the C compiler works" >&5 +printf %s "checking whether the C compiler works... " >&6; } +ac_link_default=`printf "%s\n" "$ac_link" | sed 's/ -o *conftest[^ ]*//'` + +# The possible output files: +ac_files="a.out conftest.exe conftest a.exe a_out.exe b.out conftest.*" + +ac_rmfiles= +for ac_file in $ac_files +do + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;; + * ) ac_rmfiles="$ac_rmfiles $ac_file";; + esac +done +rm -f $ac_rmfiles + +if { { ac_try="$ac_link_default" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +printf "%s\n" "$ac_try_echo"; } >&5 + (eval "$ac_link_default") 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } +then : + # Autoconf-2.13 could set the ac_cv_exeext variable to `no'. +# So ignore a value of `no', otherwise this would lead to `EXEEXT = no' +# in a Makefile. We should not override ac_cv_exeext if it was cached, +# so that the user can short-circuit this test for compilers unknown to +# Autoconf. +for ac_file in $ac_files '' +do + test -f "$ac_file" || continue + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) + ;; + [ab].out ) + # We found the default executable, but exeext='' is most + # certainly right. + break;; + *.* ) + if test ${ac_cv_exeext+y} && test "$ac_cv_exeext" != no; + then :; else + ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` + fi + # We set ac_cv_exeext here because the later test for it is not + # safe: cross compilers may not add the suffix if given an `-o' + # argument, so we may need to know it at that point already. + # Even if this section looks crufty: it has the advantage of + # actually working. + break;; + * ) + break;; + esac +done +test "$ac_cv_exeext" = no && ac_cv_exeext= + +else $as_nop + ac_file='' +fi +if test -z "$ac_file" +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +printf "%s\n" "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +{ { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error 77 "C compiler cannot create executables +See \`config.log' for more details" "$LINENO" 5; } +else $as_nop + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for C compiler default output file name" >&5 +printf %s "checking for C compiler default output file name... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_file" >&5 +printf "%s\n" "$ac_file" >&6; } +ac_exeext=$ac_cv_exeext + +rm -f -r a.out a.out.dSYM a.exe conftest$ac_cv_exeext b.out +ac_clean_files=$ac_clean_files_save +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for suffix of executables" >&5 +printf %s "checking for suffix of executables... " >&6; } +if { { ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +printf "%s\n" "$ac_try_echo"; } >&5 + (eval "$ac_link") 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } +then : + # If both `conftest.exe' and `conftest' are `present' (well, observable) +# catch `conftest.exe'. For instance with Cygwin, `ls conftest' will +# work properly (i.e., refer to `conftest.exe'), while it won't with +# `rm'. +for ac_file in conftest.exe conftest conftest.*; do + test -f "$ac_file" || continue + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;; + *.* ) ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` + break;; + * ) break;; + esac +done +else $as_nop + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "cannot compute suffix of executables: cannot compile and link +See \`config.log' for more details" "$LINENO" 5; } +fi +rm -f conftest conftest$ac_cv_exeext +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_exeext" >&5 +printf "%s\n" "$ac_cv_exeext" >&6; } + +rm -f conftest.$ac_ext +EXEEXT=$ac_cv_exeext +ac_exeext=$EXEEXT +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +int +main (void) +{ +FILE *f = fopen ("conftest.out", "w"); + return ferror (f) || fclose (f) != 0; + + ; + return 0; +} +_ACEOF +ac_clean_files="$ac_clean_files conftest.out" +# Check that the compiler produces executables we can run. If not, either +# the compiler is broken, or we cross compile. +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether we are cross compiling" >&5 +printf %s "checking whether we are cross compiling... " >&6; } +if test "$cross_compiling" != yes; then + { { ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +printf "%s\n" "$ac_try_echo"; } >&5 + (eval "$ac_link") 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } + if { ac_try='./conftest$ac_cv_exeext' + { { case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +printf "%s\n" "$ac_try_echo"; } >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; }; then + cross_compiling=no + else + if test "$cross_compiling" = maybe; then + cross_compiling=yes + else + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error 77 "cannot run C compiled programs. +If you meant to cross compile, use \`--host'. +See \`config.log' for more details" "$LINENO" 5; } + fi + fi +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $cross_compiling" >&5 +printf "%s\n" "$cross_compiling" >&6; } + +rm -f conftest.$ac_ext conftest$ac_cv_exeext conftest.out +ac_clean_files=$ac_clean_files_save +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for suffix of object files" >&5 +printf %s "checking for suffix of object files... " >&6; } +if test ${ac_cv_objext+y} +then : + printf %s "(cached) " >&6 +else $as_nop + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.o conftest.obj +if { { ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +printf "%s\n" "$ac_try_echo"; } >&5 + (eval "$ac_compile") 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } +then : + for ac_file in conftest.o conftest.obj conftest.*; do + test -f "$ac_file" || continue; + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM ) ;; + *) ac_cv_objext=`expr "$ac_file" : '.*\.\(.*\)'` + break;; + esac +done +else $as_nop + printf "%s\n" "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +{ { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "cannot compute suffix of object files: cannot compile +See \`config.log' for more details" "$LINENO" 5; } +fi +rm -f conftest.$ac_cv_objext conftest.$ac_ext +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_objext" >&5 +printf "%s\n" "$ac_cv_objext" >&6; } +OBJEXT=$ac_cv_objext +ac_objext=$OBJEXT +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the compiler supports GNU C" >&5 +printf %s "checking whether the compiler supports GNU C... " >&6; } +if test ${ac_cv_c_compiler_gnu+y} +then : + printf %s "(cached) " >&6 +else $as_nop + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ +#ifndef __GNUC__ + choke me +#endif + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ac_compiler_gnu=yes +else $as_nop + ac_compiler_gnu=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext +ac_cv_c_compiler_gnu=$ac_compiler_gnu + +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_compiler_gnu" >&5 +printf "%s\n" "$ac_cv_c_compiler_gnu" >&6; } +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +if test $ac_compiler_gnu = yes; then + GCC=yes +else + GCC= +fi +ac_test_CFLAGS=${CFLAGS+y} +ac_save_CFLAGS=$CFLAGS +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether $CC accepts -g" >&5 +printf %s "checking whether $CC accepts -g... " >&6; } +if test ${ac_cv_prog_cc_g+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_save_c_werror_flag=$ac_c_werror_flag + ac_c_werror_flag=yes + ac_cv_prog_cc_g=no + CFLAGS="-g" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ac_cv_prog_cc_g=yes +else $as_nop + CFLAGS="" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + +else $as_nop + ac_c_werror_flag=$ac_save_c_werror_flag + CFLAGS="-g" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ac_cv_prog_cc_g=yes +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + ac_c_werror_flag=$ac_save_c_werror_flag +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_g" >&5 +printf "%s\n" "$ac_cv_prog_cc_g" >&6; } +if test $ac_test_CFLAGS; then + CFLAGS=$ac_save_CFLAGS +elif test $ac_cv_prog_cc_g = yes; then + if test "$GCC" = yes; then + CFLAGS="-g -O2" + else + CFLAGS="-g" + fi +else + if test "$GCC" = yes; then + CFLAGS="-O2" + else + CFLAGS= + fi +fi +ac_prog_cc_stdc=no +if test x$ac_prog_cc_stdc = xno +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CC option to enable C11 features" >&5 +printf %s "checking for $CC option to enable C11 features... " >&6; } +if test ${ac_cv_prog_cc_c11+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_cv_prog_cc_c11=no +ac_save_CC=$CC +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$ac_c_conftest_c11_program +_ACEOF +for ac_arg in '' -std=gnu11 +do + CC="$ac_save_CC $ac_arg" + if ac_fn_c_try_compile "$LINENO" +then : + ac_cv_prog_cc_c11=$ac_arg +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam + test "x$ac_cv_prog_cc_c11" != "xno" && break +done +rm -f conftest.$ac_ext +CC=$ac_save_CC +fi + +if test "x$ac_cv_prog_cc_c11" = xno +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5 +printf "%s\n" "unsupported" >&6; } +else $as_nop + if test "x$ac_cv_prog_cc_c11" = x +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: none needed" >&5 +printf "%s\n" "none needed" >&6; } +else $as_nop + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c11" >&5 +printf "%s\n" "$ac_cv_prog_cc_c11" >&6; } + CC="$CC $ac_cv_prog_cc_c11" +fi + ac_cv_prog_cc_stdc=$ac_cv_prog_cc_c11 + ac_prog_cc_stdc=c11 +fi +fi +if test x$ac_prog_cc_stdc = xno +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CC option to enable C99 features" >&5 +printf %s "checking for $CC option to enable C99 features... " >&6; } +if test ${ac_cv_prog_cc_c99+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_cv_prog_cc_c99=no +ac_save_CC=$CC +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$ac_c_conftest_c99_program +_ACEOF +for ac_arg in '' -std=gnu99 -std=c99 -c99 -qlanglvl=extc1x -qlanglvl=extc99 -AC99 -D_STDC_C99= +do + CC="$ac_save_CC $ac_arg" + if ac_fn_c_try_compile "$LINENO" +then : + ac_cv_prog_cc_c99=$ac_arg +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam + test "x$ac_cv_prog_cc_c99" != "xno" && break +done +rm -f conftest.$ac_ext +CC=$ac_save_CC +fi + +if test "x$ac_cv_prog_cc_c99" = xno +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5 +printf "%s\n" "unsupported" >&6; } +else $as_nop + if test "x$ac_cv_prog_cc_c99" = x +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: none needed" >&5 +printf "%s\n" "none needed" >&6; } +else $as_nop + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c99" >&5 +printf "%s\n" "$ac_cv_prog_cc_c99" >&6; } + CC="$CC $ac_cv_prog_cc_c99" +fi + ac_cv_prog_cc_stdc=$ac_cv_prog_cc_c99 + ac_prog_cc_stdc=c99 +fi +fi +if test x$ac_prog_cc_stdc = xno +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CC option to enable C89 features" >&5 +printf %s "checking for $CC option to enable C89 features... " >&6; } +if test ${ac_cv_prog_cc_c89+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_cv_prog_cc_c89=no +ac_save_CC=$CC +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$ac_c_conftest_c89_program +_ACEOF +for ac_arg in '' -qlanglvl=extc89 -qlanglvl=ansi -std -Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__" +do + CC="$ac_save_CC $ac_arg" + if ac_fn_c_try_compile "$LINENO" +then : + ac_cv_prog_cc_c89=$ac_arg +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam + test "x$ac_cv_prog_cc_c89" != "xno" && break +done +rm -f conftest.$ac_ext +CC=$ac_save_CC +fi + +if test "x$ac_cv_prog_cc_c89" = xno +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5 +printf "%s\n" "unsupported" >&6; } +else $as_nop + if test "x$ac_cv_prog_cc_c89" = x +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: none needed" >&5 +printf "%s\n" "none needed" >&6; } +else $as_nop + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c89" >&5 +printf "%s\n" "$ac_cv_prog_cc_c89" >&6; } + CC="$CC $ac_cv_prog_cc_c89" +fi + ac_cv_prog_cc_stdc=$ac_cv_prog_cc_c89 + ac_prog_cc_stdc=c89 +fi +fi + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether $CC understands -c and -o together" >&5 +printf %s "checking whether $CC understands -c and -o together... " >&6; } +if test ${am_cv_prog_cc_c_o+y} +then : + printf %s "(cached) " >&6 +else $as_nop + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF + # Make sure it works both with $CC and with simple cc. + # Following AC_PROG_CC_C_O, we do the test twice because some + # compilers refuse to overwrite an existing .o file with -o, + # though they will create one. + am_cv_prog_cc_c_o=yes + for am_i in 1 2; do + if { echo "$as_me:$LINENO: $CC -c conftest.$ac_ext -o conftest2.$ac_objext" >&5 + ($CC -c conftest.$ac_ext -o conftest2.$ac_objext) >&5 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } \ + && test -f conftest2.$ac_objext; then + : OK + else + am_cv_prog_cc_c_o=no + break + fi + done + rm -f core conftest* + unset am_i +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $am_cv_prog_cc_c_o" >&5 +printf "%s\n" "$am_cv_prog_cc_c_o" >&6; } +if test "$am_cv_prog_cc_c_o" != yes; then + # Losing compiler, so override with the script. + # FIXME: It is wrong to rewrite CC. + # But if we don't then we get into trouble of one sort or another. + # A longer-term fix would be to have automake use am__CC in this case, + # and then we could set am__CC="\$(top_srcdir)/compile \$(CC)" + CC="$am_aux_dir/compile $CC" +fi +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + +depcc="$CC" am_compiler_list= + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking dependency style of $depcc" >&5 +printf %s "checking dependency style of $depcc... " >&6; } +if test ${am_cv_CC_dependencies_compiler_type+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then + # We make a subdir and do the tests there. Otherwise we can end up + # making bogus files that we don't know about and never remove. For + # instance it was reported that on HP-UX the gcc test will end up + # making a dummy file named 'D' -- because '-MD' means "put the output + # in D". + rm -rf conftest.dir + mkdir conftest.dir + # Copy depcomp to subdir because otherwise we won't find it if we're + # using a relative directory. + cp "$am_depcomp" conftest.dir + cd conftest.dir + # We will build objects and dependencies in a subdirectory because + # it helps to detect inapplicable dependency modes. For instance + # both Tru64's cc and ICC support -MD to output dependencies as a + # side effect of compilation, but ICC will put the dependencies in + # the current directory while Tru64 will put them in the object + # directory. + mkdir sub + + am_cv_CC_dependencies_compiler_type=none + if test "$am_compiler_list" = ""; then + am_compiler_list=`sed -n 's/^#*\([a-zA-Z0-9]*\))$/\1/p' < ./depcomp` + fi + am__universal=false + case " $depcc " in #( + *\ -arch\ *\ -arch\ *) am__universal=true ;; + esac + + for depmode in $am_compiler_list; do + # Setup a source with many dependencies, because some compilers + # like to wrap large dependency lists on column 80 (with \), and + # we should not choose a depcomp mode which is confused by this. + # + # We need to recreate these files for each test, as the compiler may + # overwrite some of them when testing with obscure command lines. + # This happens at least with the AIX C compiler. + : > sub/conftest.c + for i in 1 2 3 4 5 6; do + echo '#include "conftst'$i'.h"' >> sub/conftest.c + # Using ": > sub/conftst$i.h" creates only sub/conftst1.h with + # Solaris 10 /bin/sh. + echo '/* dummy */' > sub/conftst$i.h + done + echo "${am__include} ${am__quote}sub/conftest.Po${am__quote}" > confmf + + # We check with '-c' and '-o' for the sake of the "dashmstdout" + # mode. It turns out that the SunPro C++ compiler does not properly + # handle '-M -o', and we need to detect this. Also, some Intel + # versions had trouble with output in subdirs. + am__obj=sub/conftest.${OBJEXT-o} + am__minus_obj="-o $am__obj" + case $depmode in + gcc) + # This depmode causes a compiler race in universal mode. + test "$am__universal" = false || continue + ;; + nosideeffect) + # After this tag, mechanisms are not by side-effect, so they'll + # only be used when explicitly requested. + if test "x$enable_dependency_tracking" = xyes; then + continue + else + break + fi + ;; + msvc7 | msvc7msys | msvisualcpp | msvcmsys) + # This compiler won't grok '-c -o', but also, the minuso test has + # not run yet. These depmodes are late enough in the game, and + # so weak that their functioning should not be impacted. + am__obj=conftest.${OBJEXT-o} + am__minus_obj= + ;; + none) break ;; + esac + if depmode=$depmode \ + source=sub/conftest.c object=$am__obj \ + depfile=sub/conftest.Po tmpdepfile=sub/conftest.TPo \ + $SHELL ./depcomp $depcc -c $am__minus_obj sub/conftest.c \ + >/dev/null 2>conftest.err && + grep sub/conftst1.h sub/conftest.Po > /dev/null 2>&1 && + grep sub/conftst6.h sub/conftest.Po > /dev/null 2>&1 && + grep $am__obj sub/conftest.Po > /dev/null 2>&1 && + ${MAKE-make} -s -f confmf > /dev/null 2>&1; then + # icc doesn't choke on unknown options, it will just issue warnings + # or remarks (even with -Werror). So we grep stderr for any message + # that says an option was ignored or not supported. + # When given -MP, icc 7.0 and 7.1 complain thusly: + # icc: Command line warning: ignoring option '-M'; no argument required + # The diagnosis changed in icc 8.0: + # icc: Command line remark: option '-MP' not supported + if (grep 'ignoring option' conftest.err || + grep 'not supported' conftest.err) >/dev/null 2>&1; then :; else + am_cv_CC_dependencies_compiler_type=$depmode + break + fi + fi + done + + cd .. + rm -rf conftest.dir +else + am_cv_CC_dependencies_compiler_type=none +fi + +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $am_cv_CC_dependencies_compiler_type" >&5 +printf "%s\n" "$am_cv_CC_dependencies_compiler_type" >&6; } +CCDEPMODE=depmode=$am_cv_CC_dependencies_compiler_type + + if + test "x$enable_dependency_tracking" != xno \ + && test "$am_cv_CC_dependencies_compiler_type" = gcc3; then + am__fastdepCC_TRUE= + am__fastdepCC_FALSE='#' +else + am__fastdepCC_TRUE='#' + am__fastdepCC_FALSE= +fi + + + + + if test -n "$ac_tool_prefix"; then + for ac_prog in ar lib "link -lib" + do + # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. +set dummy $ac_tool_prefix$ac_prog; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_AR+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$AR"; then + ac_cv_prog_AR="$AR" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_AR="$ac_tool_prefix$ac_prog" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +AR=$ac_cv_prog_AR +if test -n "$AR"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $AR" >&5 +printf "%s\n" "$AR" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + + test -n "$AR" && break + done +fi +if test -z "$AR"; then + ac_ct_AR=$AR + for ac_prog in ar lib "link -lib" +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ac_ct_AR+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$ac_ct_AR"; then + ac_cv_prog_ac_ct_AR="$ac_ct_AR" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_AR="$ac_prog" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_AR=$ac_cv_prog_ac_ct_AR +if test -n "$ac_ct_AR"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_AR" >&5 +printf "%s\n" "$ac_ct_AR" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + + test -n "$ac_ct_AR" && break +done + + if test "x$ac_ct_AR" = x; then + AR="false" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + AR=$ac_ct_AR + fi +fi + +: ${AR=ar} + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking the archiver ($AR) interface" >&5 +printf %s "checking the archiver ($AR) interface... " >&6; } +if test ${am_cv_ar_interface+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + am_cv_ar_interface=ar + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +int some_variable = 0; +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + am_ar_try='$AR cru libconftest.a conftest.$ac_objext >&5' + { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$am_ar_try\""; } >&5 + (eval $am_ar_try) 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } + if test "$ac_status" -eq 0; then + am_cv_ar_interface=ar + else + am_ar_try='$AR -NOLOGO -OUT:conftest.lib conftest.$ac_objext >&5' + { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$am_ar_try\""; } >&5 + (eval $am_ar_try) 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } + if test "$ac_status" -eq 0; then + am_cv_ar_interface=lib + else + am_cv_ar_interface=unknown + fi + fi + rm -f conftest.lib libconftest.a + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $am_cv_ar_interface" >&5 +printf "%s\n" "$am_cv_ar_interface" >&6; } + +case $am_cv_ar_interface in +ar) + ;; +lib) + # Microsoft lib, so override with the ar-lib wrapper script. + # FIXME: It is wrong to rewrite AR. + # But if we don't then we get into trouble of one sort or another. + # A longer-term fix would be to have automake use am__AR in this case, + # and then we could set am__AR="$am_aux_dir/ar-lib \$(AR)" or something + # similar. + AR="$am_aux_dir/ar-lib $AR" + ;; +unknown) + as_fn_error $? "could not determine $AR interface" "$LINENO" 5 + ;; +esac + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}gcc", so it can be a program name with args. +set dummy ${ac_tool_prefix}gcc; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_CC+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_CC="${ac_tool_prefix}gcc" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +printf "%s\n" "$CC" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_CC"; then + ac_ct_CC=$CC + # Extract the first word of "gcc", so it can be a program name with args. +set dummy gcc; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ac_ct_CC+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$ac_ct_CC"; then + ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_CC="gcc" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_CC=$ac_cv_prog_ac_ct_CC +if test -n "$ac_ct_CC"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 +printf "%s\n" "$ac_ct_CC" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + if test "x$ac_ct_CC" = x; then + CC="" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + CC=$ac_ct_CC + fi +else + CC="$ac_cv_prog_CC" +fi + +if test -z "$CC"; then + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}cc", so it can be a program name with args. +set dummy ${ac_tool_prefix}cc; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_CC+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_CC="${ac_tool_prefix}cc" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +printf "%s\n" "$CC" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + + fi +fi +if test -z "$CC"; then + # Extract the first word of "cc", so it can be a program name with args. +set dummy cc; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_CC+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else + ac_prog_rejected=no +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + if test "$as_dir$ac_word$ac_exec_ext" = "/usr/ucb/cc"; then + ac_prog_rejected=yes + continue + fi + ac_cv_prog_CC="cc" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +if test $ac_prog_rejected = yes; then + # We found a bogon in the path, so make sure we never use it. + set dummy $ac_cv_prog_CC + shift + if test $# != 0; then + # We chose a different compiler from the bogus one. + # However, it has the same basename, so the bogon will be chosen + # first if we set CC to just the basename; use the full file name. + shift + ac_cv_prog_CC="$as_dir$ac_word${1+' '}$@" + fi +fi +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +printf "%s\n" "$CC" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + +fi +if test -z "$CC"; then + if test -n "$ac_tool_prefix"; then + for ac_prog in cl.exe + do + # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. +set dummy $ac_tool_prefix$ac_prog; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_CC+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_CC="$ac_tool_prefix$ac_prog" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +printf "%s\n" "$CC" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + + test -n "$CC" && break + done +fi +if test -z "$CC"; then + ac_ct_CC=$CC + for ac_prog in cl.exe +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ac_ct_CC+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$ac_ct_CC"; then + ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_CC="$ac_prog" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_CC=$ac_cv_prog_ac_ct_CC +if test -n "$ac_ct_CC"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 +printf "%s\n" "$ac_ct_CC" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + + test -n "$ac_ct_CC" && break +done + + if test "x$ac_ct_CC" = x; then + CC="" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + CC=$ac_ct_CC + fi +fi + +fi +if test -z "$CC"; then + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}clang", so it can be a program name with args. +set dummy ${ac_tool_prefix}clang; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_CC+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_CC="${ac_tool_prefix}clang" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +printf "%s\n" "$CC" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_CC"; then + ac_ct_CC=$CC + # Extract the first word of "clang", so it can be a program name with args. +set dummy clang; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ac_ct_CC+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$ac_ct_CC"; then + ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_CC="clang" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_CC=$ac_cv_prog_ac_ct_CC +if test -n "$ac_ct_CC"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 +printf "%s\n" "$ac_ct_CC" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + if test "x$ac_ct_CC" = x; then + CC="" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + CC=$ac_ct_CC + fi +else + CC="$ac_cv_prog_CC" +fi + +fi + + +test -z "$CC" && { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "no acceptable C compiler found in \$PATH +See \`config.log' for more details" "$LINENO" 5; } + +# Provide some information about the compiler. +printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for C compiler version" >&5 +set X $ac_compile +ac_compiler=$2 +for ac_option in --version -v -V -qversion -version; do + { { ac_try="$ac_compiler $ac_option >&5" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +printf "%s\n" "$ac_try_echo"; } >&5 + (eval "$ac_compiler $ac_option >&5") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + sed '10a\ +... rest of stderr output deleted ... + 10q' conftest.err >conftest.er1 + cat conftest.er1 >&5 + fi + rm -f conftest.er1 conftest.err + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } +done + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the compiler supports GNU C" >&5 +printf %s "checking whether the compiler supports GNU C... " >&6; } +if test ${ac_cv_c_compiler_gnu+y} +then : + printf %s "(cached) " >&6 +else $as_nop + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ +#ifndef __GNUC__ + choke me +#endif + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ac_compiler_gnu=yes +else $as_nop + ac_compiler_gnu=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext +ac_cv_c_compiler_gnu=$ac_compiler_gnu + +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_compiler_gnu" >&5 +printf "%s\n" "$ac_cv_c_compiler_gnu" >&6; } +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +if test $ac_compiler_gnu = yes; then + GCC=yes +else + GCC= +fi +ac_test_CFLAGS=${CFLAGS+y} +ac_save_CFLAGS=$CFLAGS +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether $CC accepts -g" >&5 +printf %s "checking whether $CC accepts -g... " >&6; } +if test ${ac_cv_prog_cc_g+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_save_c_werror_flag=$ac_c_werror_flag + ac_c_werror_flag=yes + ac_cv_prog_cc_g=no + CFLAGS="-g" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ac_cv_prog_cc_g=yes +else $as_nop + CFLAGS="" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + +else $as_nop + ac_c_werror_flag=$ac_save_c_werror_flag + CFLAGS="-g" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ac_cv_prog_cc_g=yes +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + ac_c_werror_flag=$ac_save_c_werror_flag +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_g" >&5 +printf "%s\n" "$ac_cv_prog_cc_g" >&6; } +if test $ac_test_CFLAGS; then + CFLAGS=$ac_save_CFLAGS +elif test $ac_cv_prog_cc_g = yes; then + if test "$GCC" = yes; then + CFLAGS="-g -O2" + else + CFLAGS="-g" + fi +else + if test "$GCC" = yes; then + CFLAGS="-O2" + else + CFLAGS= + fi +fi +ac_prog_cc_stdc=no +if test x$ac_prog_cc_stdc = xno +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CC option to enable C11 features" >&5 +printf %s "checking for $CC option to enable C11 features... " >&6; } +if test ${ac_cv_prog_cc_c11+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_cv_prog_cc_c11=no +ac_save_CC=$CC +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$ac_c_conftest_c11_program +_ACEOF +for ac_arg in '' -std=gnu11 +do + CC="$ac_save_CC $ac_arg" + if ac_fn_c_try_compile "$LINENO" +then : + ac_cv_prog_cc_c11=$ac_arg +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam + test "x$ac_cv_prog_cc_c11" != "xno" && break +done +rm -f conftest.$ac_ext +CC=$ac_save_CC +fi + +if test "x$ac_cv_prog_cc_c11" = xno +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5 +printf "%s\n" "unsupported" >&6; } +else $as_nop + if test "x$ac_cv_prog_cc_c11" = x +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: none needed" >&5 +printf "%s\n" "none needed" >&6; } +else $as_nop + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c11" >&5 +printf "%s\n" "$ac_cv_prog_cc_c11" >&6; } + CC="$CC $ac_cv_prog_cc_c11" +fi + ac_cv_prog_cc_stdc=$ac_cv_prog_cc_c11 + ac_prog_cc_stdc=c11 +fi +fi +if test x$ac_prog_cc_stdc = xno +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CC option to enable C99 features" >&5 +printf %s "checking for $CC option to enable C99 features... " >&6; } +if test ${ac_cv_prog_cc_c99+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_cv_prog_cc_c99=no +ac_save_CC=$CC +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$ac_c_conftest_c99_program +_ACEOF +for ac_arg in '' -std=gnu99 -std=c99 -c99 -qlanglvl=extc1x -qlanglvl=extc99 -AC99 -D_STDC_C99= +do + CC="$ac_save_CC $ac_arg" + if ac_fn_c_try_compile "$LINENO" +then : + ac_cv_prog_cc_c99=$ac_arg +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam + test "x$ac_cv_prog_cc_c99" != "xno" && break +done +rm -f conftest.$ac_ext +CC=$ac_save_CC +fi + +if test "x$ac_cv_prog_cc_c99" = xno +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5 +printf "%s\n" "unsupported" >&6; } +else $as_nop + if test "x$ac_cv_prog_cc_c99" = x +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: none needed" >&5 +printf "%s\n" "none needed" >&6; } +else $as_nop + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c99" >&5 +printf "%s\n" "$ac_cv_prog_cc_c99" >&6; } + CC="$CC $ac_cv_prog_cc_c99" +fi + ac_cv_prog_cc_stdc=$ac_cv_prog_cc_c99 + ac_prog_cc_stdc=c99 +fi +fi +if test x$ac_prog_cc_stdc = xno +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CC option to enable C89 features" >&5 +printf %s "checking for $CC option to enable C89 features... " >&6; } +if test ${ac_cv_prog_cc_c89+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_cv_prog_cc_c89=no +ac_save_CC=$CC +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$ac_c_conftest_c89_program +_ACEOF +for ac_arg in '' -qlanglvl=extc89 -qlanglvl=ansi -std -Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__" +do + CC="$ac_save_CC $ac_arg" + if ac_fn_c_try_compile "$LINENO" +then : + ac_cv_prog_cc_c89=$ac_arg +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam + test "x$ac_cv_prog_cc_c89" != "xno" && break +done +rm -f conftest.$ac_ext +CC=$ac_save_CC +fi + +if test "x$ac_cv_prog_cc_c89" = xno +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5 +printf "%s\n" "unsupported" >&6; } +else $as_nop + if test "x$ac_cv_prog_cc_c89" = x +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: none needed" >&5 +printf "%s\n" "none needed" >&6; } +else $as_nop + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c89" >&5 +printf "%s\n" "$ac_cv_prog_cc_c89" >&6; } + CC="$CC $ac_cv_prog_cc_c89" +fi + ac_cv_prog_cc_stdc=$ac_cv_prog_cc_c89 + ac_prog_cc_stdc=c89 +fi +fi + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether $CC understands -c and -o together" >&5 +printf %s "checking whether $CC understands -c and -o together... " >&6; } +if test ${am_cv_prog_cc_c_o+y} +then : + printf %s "(cached) " >&6 +else $as_nop + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF + # Make sure it works both with $CC and with simple cc. + # Following AC_PROG_CC_C_O, we do the test twice because some + # compilers refuse to overwrite an existing .o file with -o, + # though they will create one. + am_cv_prog_cc_c_o=yes + for am_i in 1 2; do + if { echo "$as_me:$LINENO: $CC -c conftest.$ac_ext -o conftest2.$ac_objext" >&5 + ($CC -c conftest.$ac_ext -o conftest2.$ac_objext) >&5 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } \ + && test -f conftest2.$ac_objext; then + : OK + else + am_cv_prog_cc_c_o=no + break + fi + done + rm -f core conftest* + unset am_i +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $am_cv_prog_cc_c_o" >&5 +printf "%s\n" "$am_cv_prog_cc_c_o" >&6; } +if test "$am_cv_prog_cc_c_o" != yes; then + # Losing compiler, so override with the script. + # FIXME: It is wrong to rewrite CC. + # But if we don't then we get into trouble of one sort or another. + # A longer-term fix would be to have automake use am__CC in this case, + # and then we could set am__CC="\$(top_srcdir)/compile \$(CC)" + CC="$am_aux_dir/compile $CC" +fi +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + +depcc="$CC" am_compiler_list= + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking dependency style of $depcc" >&5 +printf %s "checking dependency style of $depcc... " >&6; } +if test ${am_cv_CC_dependencies_compiler_type+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then + # We make a subdir and do the tests there. Otherwise we can end up + # making bogus files that we don't know about and never remove. For + # instance it was reported that on HP-UX the gcc test will end up + # making a dummy file named 'D' -- because '-MD' means "put the output + # in D". + rm -rf conftest.dir + mkdir conftest.dir + # Copy depcomp to subdir because otherwise we won't find it if we're + # using a relative directory. + cp "$am_depcomp" conftest.dir + cd conftest.dir + # We will build objects and dependencies in a subdirectory because + # it helps to detect inapplicable dependency modes. For instance + # both Tru64's cc and ICC support -MD to output dependencies as a + # side effect of compilation, but ICC will put the dependencies in + # the current directory while Tru64 will put them in the object + # directory. + mkdir sub + + am_cv_CC_dependencies_compiler_type=none + if test "$am_compiler_list" = ""; then + am_compiler_list=`sed -n 's/^#*\([a-zA-Z0-9]*\))$/\1/p' < ./depcomp` + fi + am__universal=false + case " $depcc " in #( + *\ -arch\ *\ -arch\ *) am__universal=true ;; + esac + + for depmode in $am_compiler_list; do + # Setup a source with many dependencies, because some compilers + # like to wrap large dependency lists on column 80 (with \), and + # we should not choose a depcomp mode which is confused by this. + # + # We need to recreate these files for each test, as the compiler may + # overwrite some of them when testing with obscure command lines. + # This happens at least with the AIX C compiler. + : > sub/conftest.c + for i in 1 2 3 4 5 6; do + echo '#include "conftst'$i'.h"' >> sub/conftest.c + # Using ": > sub/conftst$i.h" creates only sub/conftst1.h with + # Solaris 10 /bin/sh. + echo '/* dummy */' > sub/conftst$i.h + done + echo "${am__include} ${am__quote}sub/conftest.Po${am__quote}" > confmf + + # We check with '-c' and '-o' for the sake of the "dashmstdout" + # mode. It turns out that the SunPro C++ compiler does not properly + # handle '-M -o', and we need to detect this. Also, some Intel + # versions had trouble with output in subdirs. + am__obj=sub/conftest.${OBJEXT-o} + am__minus_obj="-o $am__obj" + case $depmode in + gcc) + # This depmode causes a compiler race in universal mode. + test "$am__universal" = false || continue + ;; + nosideeffect) + # After this tag, mechanisms are not by side-effect, so they'll + # only be used when explicitly requested. + if test "x$enable_dependency_tracking" = xyes; then + continue + else + break + fi + ;; + msvc7 | msvc7msys | msvisualcpp | msvcmsys) + # This compiler won't grok '-c -o', but also, the minuso test has + # not run yet. These depmodes are late enough in the game, and + # so weak that their functioning should not be impacted. + am__obj=conftest.${OBJEXT-o} + am__minus_obj= + ;; + none) break ;; + esac + if depmode=$depmode \ + source=sub/conftest.c object=$am__obj \ + depfile=sub/conftest.Po tmpdepfile=sub/conftest.TPo \ + $SHELL ./depcomp $depcc -c $am__minus_obj sub/conftest.c \ + >/dev/null 2>conftest.err && + grep sub/conftst1.h sub/conftest.Po > /dev/null 2>&1 && + grep sub/conftst6.h sub/conftest.Po > /dev/null 2>&1 && + grep $am__obj sub/conftest.Po > /dev/null 2>&1 && + ${MAKE-make} -s -f confmf > /dev/null 2>&1; then + # icc doesn't choke on unknown options, it will just issue warnings + # or remarks (even with -Werror). So we grep stderr for any message + # that says an option was ignored or not supported. + # When given -MP, icc 7.0 and 7.1 complain thusly: + # icc: Command line warning: ignoring option '-M'; no argument required + # The diagnosis changed in icc 8.0: + # icc: Command line remark: option '-MP' not supported + if (grep 'ignoring option' conftest.err || + grep 'not supported' conftest.err) >/dev/null 2>&1; then :; else + am_cv_CC_dependencies_compiler_type=$depmode + break + fi + fi + done + + cd .. + rm -rf conftest.dir +else + am_cv_CC_dependencies_compiler_type=none +fi + +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $am_cv_CC_dependencies_compiler_type" >&5 +printf "%s\n" "$am_cv_CC_dependencies_compiler_type" >&6; } +CCDEPMODE=depmode=$am_cv_CC_dependencies_compiler_type + + if + test "x$enable_dependency_tracking" != xno \ + && test "$am_cv_CC_dependencies_compiler_type" = gcc3; then + am__fastdepCC_TRUE= + am__fastdepCC_FALSE='#' +else + am__fastdepCC_TRUE='#' + am__fastdepCC_FALSE= +fi + + + + + + + + + +ac_ext=cpp +ac_cpp='$CXXCPP $CPPFLAGS' +ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_cxx_compiler_gnu +if test -z "$CXX"; then + if test -n "$CCC"; then + CXX=$CCC + else + if test -n "$ac_tool_prefix"; then + for ac_prog in g++ c++ gpp aCC CC cxx cc++ cl.exe FCC KCC RCC xlC_r xlC clang++ + do + # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. +set dummy $ac_tool_prefix$ac_prog; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_CXX+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$CXX"; then + ac_cv_prog_CXX="$CXX" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_CXX="$ac_tool_prefix$ac_prog" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +CXX=$ac_cv_prog_CXX +if test -n "$CXX"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $CXX" >&5 +printf "%s\n" "$CXX" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + + test -n "$CXX" && break + done +fi +if test -z "$CXX"; then + ac_ct_CXX=$CXX + for ac_prog in g++ c++ gpp aCC CC cxx cc++ cl.exe FCC KCC RCC xlC_r xlC clang++ +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ac_ct_CXX+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$ac_ct_CXX"; then + ac_cv_prog_ac_ct_CXX="$ac_ct_CXX" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_CXX="$ac_prog" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_CXX=$ac_cv_prog_ac_ct_CXX +if test -n "$ac_ct_CXX"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CXX" >&5 +printf "%s\n" "$ac_ct_CXX" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + + test -n "$ac_ct_CXX" && break +done + + if test "x$ac_ct_CXX" = x; then + CXX="g++" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + CXX=$ac_ct_CXX + fi +fi + + fi +fi +# Provide some information about the compiler. +printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for C++ compiler version" >&5 +set X $ac_compile +ac_compiler=$2 +for ac_option in --version -v -V -qversion; do + { { ac_try="$ac_compiler $ac_option >&5" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +printf "%s\n" "$ac_try_echo"; } >&5 + (eval "$ac_compiler $ac_option >&5") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + sed '10a\ +... rest of stderr output deleted ... + 10q' conftest.err >conftest.er1 + cat conftest.er1 >&5 + fi + rm -f conftest.er1 conftest.err + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } +done + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the compiler supports GNU C++" >&5 +printf %s "checking whether the compiler supports GNU C++... " >&6; } +if test ${ac_cv_cxx_compiler_gnu+y} +then : + printf %s "(cached) " >&6 +else $as_nop + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ +#ifndef __GNUC__ + choke me +#endif + + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_compile "$LINENO" +then : + ac_compiler_gnu=yes +else $as_nop + ac_compiler_gnu=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext +ac_cv_cxx_compiler_gnu=$ac_compiler_gnu + +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_cxx_compiler_gnu" >&5 +printf "%s\n" "$ac_cv_cxx_compiler_gnu" >&6; } +ac_compiler_gnu=$ac_cv_cxx_compiler_gnu + +if test $ac_compiler_gnu = yes; then + GXX=yes +else + GXX= +fi +ac_test_CXXFLAGS=${CXXFLAGS+y} +ac_save_CXXFLAGS=$CXXFLAGS +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether $CXX accepts -g" >&5 +printf %s "checking whether $CXX accepts -g... " >&6; } +if test ${ac_cv_prog_cxx_g+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_save_cxx_werror_flag=$ac_cxx_werror_flag + ac_cxx_werror_flag=yes + ac_cv_prog_cxx_g=no + CXXFLAGS="-g" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_compile "$LINENO" +then : + ac_cv_prog_cxx_g=yes +else $as_nop + CXXFLAGS="" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_compile "$LINENO" +then : + +else $as_nop + ac_cxx_werror_flag=$ac_save_cxx_werror_flag + CXXFLAGS="-g" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_compile "$LINENO" +then : + ac_cv_prog_cxx_g=yes +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + ac_cxx_werror_flag=$ac_save_cxx_werror_flag +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cxx_g" >&5 +printf "%s\n" "$ac_cv_prog_cxx_g" >&6; } +if test $ac_test_CXXFLAGS; then + CXXFLAGS=$ac_save_CXXFLAGS +elif test $ac_cv_prog_cxx_g = yes; then + if test "$GXX" = yes; then + CXXFLAGS="-g -O2" + else + CXXFLAGS="-g" + fi +else + if test "$GXX" = yes; then + CXXFLAGS="-O2" + else + CXXFLAGS= + fi +fi +ac_prog_cxx_stdcxx=no +if test x$ac_prog_cxx_stdcxx = xno +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CXX option to enable C++11 features" >&5 +printf %s "checking for $CXX option to enable C++11 features... " >&6; } +if test ${ac_cv_prog_cxx_cxx11+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_cv_prog_cxx_cxx11=no +ac_save_CXX=$CXX +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$ac_cxx_conftest_cxx11_program +_ACEOF +for ac_arg in '' -std=gnu++11 -std=gnu++0x -std=c++11 -std=c++0x -qlanglvl=extended0x -AA +do + CXX="$ac_save_CXX $ac_arg" + if ac_fn_cxx_try_compile "$LINENO" +then : + ac_cv_prog_cxx_cxx11=$ac_arg +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam + test "x$ac_cv_prog_cxx_cxx11" != "xno" && break +done +rm -f conftest.$ac_ext +CXX=$ac_save_CXX +fi + +if test "x$ac_cv_prog_cxx_cxx11" = xno +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5 +printf "%s\n" "unsupported" >&6; } +else $as_nop + if test "x$ac_cv_prog_cxx_cxx11" = x +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: none needed" >&5 +printf "%s\n" "none needed" >&6; } +else $as_nop + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cxx_cxx11" >&5 +printf "%s\n" "$ac_cv_prog_cxx_cxx11" >&6; } + CXX="$CXX $ac_cv_prog_cxx_cxx11" +fi + ac_cv_prog_cxx_stdcxx=$ac_cv_prog_cxx_cxx11 + ac_prog_cxx_stdcxx=cxx11 +fi +fi +if test x$ac_prog_cxx_stdcxx = xno +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CXX option to enable C++98 features" >&5 +printf %s "checking for $CXX option to enable C++98 features... " >&6; } +if test ${ac_cv_prog_cxx_cxx98+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_cv_prog_cxx_cxx98=no +ac_save_CXX=$CXX +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$ac_cxx_conftest_cxx98_program +_ACEOF +for ac_arg in '' -std=gnu++98 -std=c++98 -qlanglvl=extended -AA +do + CXX="$ac_save_CXX $ac_arg" + if ac_fn_cxx_try_compile "$LINENO" +then : + ac_cv_prog_cxx_cxx98=$ac_arg +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam + test "x$ac_cv_prog_cxx_cxx98" != "xno" && break +done +rm -f conftest.$ac_ext +CXX=$ac_save_CXX +fi + +if test "x$ac_cv_prog_cxx_cxx98" = xno +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5 +printf "%s\n" "unsupported" >&6; } +else $as_nop + if test "x$ac_cv_prog_cxx_cxx98" = x +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: none needed" >&5 +printf "%s\n" "none needed" >&6; } +else $as_nop + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cxx_cxx98" >&5 +printf "%s\n" "$ac_cv_prog_cxx_cxx98" >&6; } + CXX="$CXX $ac_cv_prog_cxx_cxx98" +fi + ac_cv_prog_cxx_stdcxx=$ac_cv_prog_cxx_cxx98 + ac_prog_cxx_stdcxx=cxx98 +fi +fi + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +depcc="$CXX" am_compiler_list= + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking dependency style of $depcc" >&5 +printf %s "checking dependency style of $depcc... " >&6; } +if test ${am_cv_CXX_dependencies_compiler_type+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then + # We make a subdir and do the tests there. Otherwise we can end up + # making bogus files that we don't know about and never remove. For + # instance it was reported that on HP-UX the gcc test will end up + # making a dummy file named 'D' -- because '-MD' means "put the output + # in D". + rm -rf conftest.dir + mkdir conftest.dir + # Copy depcomp to subdir because otherwise we won't find it if we're + # using a relative directory. + cp "$am_depcomp" conftest.dir + cd conftest.dir + # We will build objects and dependencies in a subdirectory because + # it helps to detect inapplicable dependency modes. For instance + # both Tru64's cc and ICC support -MD to output dependencies as a + # side effect of compilation, but ICC will put the dependencies in + # the current directory while Tru64 will put them in the object + # directory. + mkdir sub + + am_cv_CXX_dependencies_compiler_type=none + if test "$am_compiler_list" = ""; then + am_compiler_list=`sed -n 's/^#*\([a-zA-Z0-9]*\))$/\1/p' < ./depcomp` + fi + am__universal=false + case " $depcc " in #( + *\ -arch\ *\ -arch\ *) am__universal=true ;; + esac + + for depmode in $am_compiler_list; do + # Setup a source with many dependencies, because some compilers + # like to wrap large dependency lists on column 80 (with \), and + # we should not choose a depcomp mode which is confused by this. + # + # We need to recreate these files for each test, as the compiler may + # overwrite some of them when testing with obscure command lines. + # This happens at least with the AIX C compiler. + : > sub/conftest.c + for i in 1 2 3 4 5 6; do + echo '#include "conftst'$i'.h"' >> sub/conftest.c + # Using ": > sub/conftst$i.h" creates only sub/conftst1.h with + # Solaris 10 /bin/sh. + echo '/* dummy */' > sub/conftst$i.h + done + echo "${am__include} ${am__quote}sub/conftest.Po${am__quote}" > confmf + + # We check with '-c' and '-o' for the sake of the "dashmstdout" + # mode. It turns out that the SunPro C++ compiler does not properly + # handle '-M -o', and we need to detect this. Also, some Intel + # versions had trouble with output in subdirs. + am__obj=sub/conftest.${OBJEXT-o} + am__minus_obj="-o $am__obj" + case $depmode in + gcc) + # This depmode causes a compiler race in universal mode. + test "$am__universal" = false || continue + ;; + nosideeffect) + # After this tag, mechanisms are not by side-effect, so they'll + # only be used when explicitly requested. + if test "x$enable_dependency_tracking" = xyes; then + continue + else + break + fi + ;; + msvc7 | msvc7msys | msvisualcpp | msvcmsys) + # This compiler won't grok '-c -o', but also, the minuso test has + # not run yet. These depmodes are late enough in the game, and + # so weak that their functioning should not be impacted. + am__obj=conftest.${OBJEXT-o} + am__minus_obj= + ;; + none) break ;; + esac + if depmode=$depmode \ + source=sub/conftest.c object=$am__obj \ + depfile=sub/conftest.Po tmpdepfile=sub/conftest.TPo \ + $SHELL ./depcomp $depcc -c $am__minus_obj sub/conftest.c \ + >/dev/null 2>conftest.err && + grep sub/conftst1.h sub/conftest.Po > /dev/null 2>&1 && + grep sub/conftst6.h sub/conftest.Po > /dev/null 2>&1 && + grep $am__obj sub/conftest.Po > /dev/null 2>&1 && + ${MAKE-make} -s -f confmf > /dev/null 2>&1; then + # icc doesn't choke on unknown options, it will just issue warnings + # or remarks (even with -Werror). So we grep stderr for any message + # that says an option was ignored or not supported. + # When given -MP, icc 7.0 and 7.1 complain thusly: + # icc: Command line warning: ignoring option '-M'; no argument required + # The diagnosis changed in icc 8.0: + # icc: Command line remark: option '-MP' not supported + if (grep 'ignoring option' conftest.err || + grep 'not supported' conftest.err) >/dev/null 2>&1; then :; else + am_cv_CXX_dependencies_compiler_type=$depmode + break + fi + fi + done + + cd .. + rm -rf conftest.dir +else + am_cv_CXX_dependencies_compiler_type=none +fi + +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $am_cv_CXX_dependencies_compiler_type" >&5 +printf "%s\n" "$am_cv_CXX_dependencies_compiler_type" >&6; } +CXXDEPMODE=depmode=$am_cv_CXX_dependencies_compiler_type + + if + test "x$enable_dependency_tracking" != xno \ + && test "$am_cv_CXX_dependencies_compiler_type" = gcc3; then + am__fastdepCXX_TRUE= + am__fastdepCXX_FALSE='#' +else + am__fastdepCXX_TRUE='#' + am__fastdepCXX_FALSE= +fi + + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking how to run the C preprocessor" >&5 +printf %s "checking how to run the C preprocessor... " >&6; } +# On Suns, sometimes $CPP names a directory. +if test -n "$CPP" && test -d "$CPP"; then + CPP= +fi +if test -z "$CPP"; then + if test ${ac_cv_prog_CPP+y} +then : + printf %s "(cached) " >&6 +else $as_nop + # Double quotes because $CC needs to be expanded + for CPP in "$CC -E" "$CC -E -traditional-cpp" cpp /lib/cpp + do + ac_preproc_ok=false +for ac_c_preproc_warn_flag in '' yes +do + # Use a header file that comes with gcc, so configuring glibc + # with a fresh cross-compiler works. + # On the NeXT, cc -E runs the code through the compiler's parser, + # not just through cpp. "Syntax error" is here to catch this case. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + Syntax error +_ACEOF +if ac_fn_c_try_cpp "$LINENO" +then : + +else $as_nop + # Broken: fails on valid input. +continue +fi +rm -f conftest.err conftest.i conftest.$ac_ext + + # OK, works on sane cases. Now check whether nonexistent headers + # can be detected and how. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +_ACEOF +if ac_fn_c_try_cpp "$LINENO" +then : + # Broken: success on invalid input. +continue +else $as_nop + # Passes both tests. +ac_preproc_ok=: +break +fi +rm -f conftest.err conftest.i conftest.$ac_ext + +done +# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. +rm -f conftest.i conftest.err conftest.$ac_ext +if $ac_preproc_ok +then : + break +fi + + done + ac_cv_prog_CPP=$CPP + +fi + CPP=$ac_cv_prog_CPP +else + ac_cv_prog_CPP=$CPP +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $CPP" >&5 +printf "%s\n" "$CPP" >&6; } +ac_preproc_ok=false +for ac_c_preproc_warn_flag in '' yes +do + # Use a header file that comes with gcc, so configuring glibc + # with a fresh cross-compiler works. + # On the NeXT, cc -E runs the code through the compiler's parser, + # not just through cpp. "Syntax error" is here to catch this case. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + Syntax error +_ACEOF +if ac_fn_c_try_cpp "$LINENO" +then : + +else $as_nop + # Broken: fails on valid input. +continue +fi +rm -f conftest.err conftest.i conftest.$ac_ext + + # OK, works on sane cases. Now check whether nonexistent headers + # can be detected and how. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +_ACEOF +if ac_fn_c_try_cpp "$LINENO" +then : + # Broken: success on invalid input. +continue +else $as_nop + # Passes both tests. +ac_preproc_ok=: +break +fi +rm -f conftest.err conftest.i conftest.$ac_ext + +done +# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. +rm -f conftest.i conftest.err conftest.$ac_ext +if $ac_preproc_ok +then : + +else $as_nop + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "C preprocessor \"$CPP\" fails sanity check +See \`config.log' for more details" "$LINENO" 5; } +fi + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for a sed that does not truncate output" >&5 +printf %s "checking for a sed that does not truncate output... " >&6; } +if test ${ac_cv_path_SED+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_script=s/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb/ + for ac_i in 1 2 3 4 5 6 7; do + ac_script="$ac_script$as_nl$ac_script" + done + echo "$ac_script" 2>/dev/null | sed 99q >conftest.sed + { ac_script=; unset ac_script;} + if test -z "$SED"; then + ac_path_SED_found=false + # Loop through the user's path and test for each of PROGNAME-LIST + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_prog in sed gsed + do + for ac_exec_ext in '' $ac_executable_extensions; do + ac_path_SED="$as_dir$ac_prog$ac_exec_ext" + as_fn_executable_p "$ac_path_SED" || continue +# Check for GNU ac_path_SED and select it if it is found. + # Check for GNU $ac_path_SED +case `"$ac_path_SED" --version 2>&1` in +*GNU*) + ac_cv_path_SED="$ac_path_SED" ac_path_SED_found=:;; +*) + ac_count=0 + printf %s 0123456789 >"conftest.in" + while : + do + cat "conftest.in" "conftest.in" >"conftest.tmp" + mv "conftest.tmp" "conftest.in" + cp "conftest.in" "conftest.nl" + printf "%s\n" '' >> "conftest.nl" + "$ac_path_SED" -f conftest.sed < "conftest.nl" >"conftest.out" 2>/dev/null || break + diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break + as_fn_arith $ac_count + 1 && ac_count=$as_val + if test $ac_count -gt ${ac_path_SED_max-0}; then + # Best one so far, save it but keep looking for a better one + ac_cv_path_SED="$ac_path_SED" + ac_path_SED_max=$ac_count + fi + # 10*(2^10) chars as input seems more than enough + test $ac_count -gt 10 && break + done + rm -f conftest.in conftest.tmp conftest.nl conftest.out;; +esac + + $ac_path_SED_found && break 3 + done + done + done +IFS=$as_save_IFS + if test -z "$ac_cv_path_SED"; then + as_fn_error $? "no acceptable sed could be found in \$PATH" "$LINENO" 5 + fi +else + ac_cv_path_SED=$SED +fi + +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_SED" >&5 +printf "%s\n" "$ac_cv_path_SED" >&6; } + SED="$ac_cv_path_SED" + rm -f conftest.sed + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether ln -s works" >&5 +printf %s "checking whether ln -s works... " >&6; } +LN_S=$as_ln_s +if test "$LN_S" = "ln -s"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no, using $LN_S" >&5 +printf "%s\n" "no, using $LN_S" >&6; } +fi + +ac_ext=f +ac_compile='$F77 -c $FFLAGS conftest.$ac_ext >&5' +ac_link='$F77 -o conftest$ac_exeext $FFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_f77_compiler_gnu +if test -n "$ac_tool_prefix"; then + for ac_prog in g77 xlf f77 frt pgf77 cf77 fort77 fl32 af77 xlf90 f90 pgf90 pghpf epcf90 gfortran g95 xlf95 f95 fort ifort ifc efc pgfortran pgf95 lf95 ftn nagfor + do + # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. +set dummy $ac_tool_prefix$ac_prog; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_F77+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$F77"; then + ac_cv_prog_F77="$F77" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_F77="$ac_tool_prefix$ac_prog" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +F77=$ac_cv_prog_F77 +if test -n "$F77"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $F77" >&5 +printf "%s\n" "$F77" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + + test -n "$F77" && break + done +fi +if test -z "$F77"; then + ac_ct_F77=$F77 + for ac_prog in g77 xlf f77 frt pgf77 cf77 fort77 fl32 af77 xlf90 f90 pgf90 pghpf epcf90 gfortran g95 xlf95 f95 fort ifort ifc efc pgfortran pgf95 lf95 ftn nagfor +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ac_ct_F77+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$ac_ct_F77"; then + ac_cv_prog_ac_ct_F77="$ac_ct_F77" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_F77="$ac_prog" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_F77=$ac_cv_prog_ac_ct_F77 +if test -n "$ac_ct_F77"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_F77" >&5 +printf "%s\n" "$ac_ct_F77" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + + test -n "$ac_ct_F77" && break +done + + if test "x$ac_ct_F77" = x; then + F77="" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + F77=$ac_ct_F77 + fi +fi + + +# Provide some information about the compiler. +printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for Fortran 77 compiler version" >&5 +set X $ac_compile +ac_compiler=$2 +for ac_option in --version -v -V -qversion; do + { { ac_try="$ac_compiler $ac_option >&5" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +printf "%s\n" "$ac_try_echo"; } >&5 + (eval "$ac_compiler $ac_option >&5") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + sed '10a\ +... rest of stderr output deleted ... + 10q' conftest.err >conftest.er1 + cat conftest.er1 >&5 + fi + rm -f conftest.er1 conftest.err + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } +done +rm -f a.out + +# If we don't use `.F' as extension, the preprocessor is not run on the +# input file. (Note that this only needs to work for GNU compilers.) +ac_save_ext=$ac_ext +ac_ext=F +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the compiler supports GNU Fortran 77" >&5 +printf %s "checking whether the compiler supports GNU Fortran 77... " >&6; } +if test ${ac_cv_f77_compiler_gnu+y} +then : + printf %s "(cached) " >&6 +else $as_nop + cat > conftest.$ac_ext <<_ACEOF + program main +#ifndef __GNUC__ + choke me +#endif + + end +_ACEOF +if ac_fn_f77_try_compile "$LINENO" +then : + ac_compiler_gnu=yes +else $as_nop + ac_compiler_gnu=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext +ac_cv_f77_compiler_gnu=$ac_compiler_gnu + +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_f77_compiler_gnu" >&5 +printf "%s\n" "$ac_cv_f77_compiler_gnu" >&6; } +ac_compiler_gnu=$ac_cv_f77_compiler_gnu + +ac_ext=$ac_save_ext +ac_test_FFLAGS=${FFLAGS+y} +ac_save_FFLAGS=$FFLAGS +FFLAGS= +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether $F77 accepts -g" >&5 +printf %s "checking whether $F77 accepts -g... " >&6; } +if test ${ac_cv_prog_f77_g+y} +then : + printf %s "(cached) " >&6 +else $as_nop + FFLAGS=-g +cat > conftest.$ac_ext <<_ACEOF + program main + + end +_ACEOF +if ac_fn_f77_try_compile "$LINENO" +then : + ac_cv_prog_f77_g=yes +else $as_nop + ac_cv_prog_f77_g=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_f77_g" >&5 +printf "%s\n" "$ac_cv_prog_f77_g" >&6; } +if test $ac_test_FFLAGS; then + FFLAGS=$ac_save_FFLAGS +elif test $ac_cv_prog_f77_g = yes; then + if test "x$ac_cv_f77_compiler_gnu" = xyes; then + FFLAGS="-g -O2" + else + FFLAGS="-g" + fi +else + if test "x$ac_cv_f77_compiler_gnu" = xyes; then + FFLAGS="-O2" + else + FFLAGS= + fi +fi + +if test $ac_compiler_gnu = yes; then + G77=yes +else + G77= +fi +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +ac_ext=${ac_fc_srcext-f} +ac_compile='$FC -c $FCFLAGS $ac_fcflags_srcext conftest.$ac_ext >&5' +ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_fc_compiler_gnu +if test -n "$ac_tool_prefix"; then + for ac_prog in gfortran g95 xlf95 f95 fort ifort ifc efc pgfortran pgf95 lf95 ftn nagfor xlf90 f90 pgf90 pghpf epcf90 g77 xlf f77 frt pgf77 cf77 fort77 fl32 af77 + do + # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. +set dummy $ac_tool_prefix$ac_prog; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_FC+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$FC"; then + ac_cv_prog_FC="$FC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_FC="$ac_tool_prefix$ac_prog" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +FC=$ac_cv_prog_FC +if test -n "$FC"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $FC" >&5 +printf "%s\n" "$FC" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + + test -n "$FC" && break + done +fi +if test -z "$FC"; then + ac_ct_FC=$FC + for ac_prog in gfortran g95 xlf95 f95 fort ifort ifc efc pgfortran pgf95 lf95 ftn nagfor xlf90 f90 pgf90 pghpf epcf90 g77 xlf f77 frt pgf77 cf77 fort77 fl32 af77 +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ac_ct_FC+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$ac_ct_FC"; then + ac_cv_prog_ac_ct_FC="$ac_ct_FC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_FC="$ac_prog" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_FC=$ac_cv_prog_ac_ct_FC +if test -n "$ac_ct_FC"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_FC" >&5 +printf "%s\n" "$ac_ct_FC" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + + test -n "$ac_ct_FC" && break +done + + if test "x$ac_ct_FC" = x; then + FC="" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + FC=$ac_ct_FC + fi +fi + + +# Provide some information about the compiler. +printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for Fortran compiler version" >&5 +set X $ac_compile +ac_compiler=$2 +for ac_option in --version -v -V -qversion; do + { { ac_try="$ac_compiler $ac_option >&5" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +printf "%s\n" "$ac_try_echo"; } >&5 + (eval "$ac_compiler $ac_option >&5") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + sed '10a\ +... rest of stderr output deleted ... + 10q' conftest.err >conftest.er1 + cat conftest.er1 >&5 + fi + rm -f conftest.er1 conftest.err + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } +done +rm -f a.out + +# If we don't use `.F' as extension, the preprocessor is not run on the +# input file. (Note that this only needs to work for GNU compilers.) +ac_save_ext=$ac_ext +ac_ext=F +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the compiler supports GNU Fortran" >&5 +printf %s "checking whether the compiler supports GNU Fortran... " >&6; } +if test ${ac_cv_fc_compiler_gnu+y} +then : + printf %s "(cached) " >&6 +else $as_nop + cat > conftest.$ac_ext <<_ACEOF + program main +#ifndef __GNUC__ + choke me +#endif + + end +_ACEOF +if ac_fn_fc_try_compile "$LINENO" +then : + ac_compiler_gnu=yes +else $as_nop + ac_compiler_gnu=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext +ac_cv_fc_compiler_gnu=$ac_compiler_gnu + +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_fc_compiler_gnu" >&5 +printf "%s\n" "$ac_cv_fc_compiler_gnu" >&6; } +ac_compiler_gnu=$ac_cv_fc_compiler_gnu + +ac_ext=$ac_save_ext +ac_test_FCFLAGS=${FCFLAGS+y} +ac_save_FCFLAGS=$FCFLAGS +FCFLAGS= +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether $FC accepts -g" >&5 +printf %s "checking whether $FC accepts -g... " >&6; } +if test ${ac_cv_prog_fc_g+y} +then : + printf %s "(cached) " >&6 +else $as_nop + FCFLAGS=-g +cat > conftest.$ac_ext <<_ACEOF + program main + + end +_ACEOF +if ac_fn_fc_try_compile "$LINENO" +then : + ac_cv_prog_fc_g=yes +else $as_nop + ac_cv_prog_fc_g=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_fc_g" >&5 +printf "%s\n" "$ac_cv_prog_fc_g" >&6; } +if test $ac_test_FCFLAGS; then + FCFLAGS=$ac_save_FCFLAGS +elif test $ac_cv_prog_fc_g = yes; then + if test "x$ac_cv_fc_compiler_gnu" = xyes; then + FCFLAGS="-g -O2" + else + FCFLAGS="-g" + fi +else + if test "x$ac_cv_fc_compiler_gnu" = xyes; then + FCFLAGS="-O2" + else + FCFLAGS= + fi +fi + +if test $ac_compiler_gnu = yes; then + GFC=yes +else + GFC= +fi +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for grep that handles long lines and -e" >&5 +printf %s "checking for grep that handles long lines and -e... " >&6; } +if test ${ac_cv_path_GREP+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -z "$GREP"; then + ac_path_GREP_found=false + # Loop through the user's path and test for each of PROGNAME-LIST + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_prog in grep ggrep + do + for ac_exec_ext in '' $ac_executable_extensions; do + ac_path_GREP="$as_dir$ac_prog$ac_exec_ext" + as_fn_executable_p "$ac_path_GREP" || continue +# Check for GNU ac_path_GREP and select it if it is found. + # Check for GNU $ac_path_GREP +case `"$ac_path_GREP" --version 2>&1` in +*GNU*) + ac_cv_path_GREP="$ac_path_GREP" ac_path_GREP_found=:;; +*) + ac_count=0 + printf %s 0123456789 >"conftest.in" + while : + do + cat "conftest.in" "conftest.in" >"conftest.tmp" + mv "conftest.tmp" "conftest.in" + cp "conftest.in" "conftest.nl" + printf "%s\n" 'GREP' >> "conftest.nl" + "$ac_path_GREP" -e 'GREP$' -e '-(cannot match)-' < "conftest.nl" >"conftest.out" 2>/dev/null || break + diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break + as_fn_arith $ac_count + 1 && ac_count=$as_val + if test $ac_count -gt ${ac_path_GREP_max-0}; then + # Best one so far, save it but keep looking for a better one + ac_cv_path_GREP="$ac_path_GREP" + ac_path_GREP_max=$ac_count + fi + # 10*(2^10) chars as input seems more than enough + test $ac_count -gt 10 && break + done + rm -f conftest.in conftest.tmp conftest.nl conftest.out;; +esac + + $ac_path_GREP_found && break 3 + done + done + done +IFS=$as_save_IFS + if test -z "$ac_cv_path_GREP"; then + as_fn_error $? "no acceptable grep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5 + fi +else + ac_cv_path_GREP=$GREP +fi + +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_GREP" >&5 +printf "%s\n" "$ac_cv_path_GREP" >&6; } + GREP="$ac_cv_path_GREP" + + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for egrep" >&5 +printf %s "checking for egrep... " >&6; } +if test ${ac_cv_path_EGREP+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if echo a | $GREP -E '(a|b)' >/dev/null 2>&1 + then ac_cv_path_EGREP="$GREP -E" + else + if test -z "$EGREP"; then + ac_path_EGREP_found=false + # Loop through the user's path and test for each of PROGNAME-LIST + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_prog in egrep + do + for ac_exec_ext in '' $ac_executable_extensions; do + ac_path_EGREP="$as_dir$ac_prog$ac_exec_ext" + as_fn_executable_p "$ac_path_EGREP" || continue +# Check for GNU ac_path_EGREP and select it if it is found. + # Check for GNU $ac_path_EGREP +case `"$ac_path_EGREP" --version 2>&1` in +*GNU*) + ac_cv_path_EGREP="$ac_path_EGREP" ac_path_EGREP_found=:;; +*) + ac_count=0 + printf %s 0123456789 >"conftest.in" + while : + do + cat "conftest.in" "conftest.in" >"conftest.tmp" + mv "conftest.tmp" "conftest.in" + cp "conftest.in" "conftest.nl" + printf "%s\n" 'EGREP' >> "conftest.nl" + "$ac_path_EGREP" 'EGREP$' < "conftest.nl" >"conftest.out" 2>/dev/null || break + diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break + as_fn_arith $ac_count + 1 && ac_count=$as_val + if test $ac_count -gt ${ac_path_EGREP_max-0}; then + # Best one so far, save it but keep looking for a better one + ac_cv_path_EGREP="$ac_path_EGREP" + ac_path_EGREP_max=$ac_count + fi + # 10*(2^10) chars as input seems more than enough + test $ac_count -gt 10 && break + done + rm -f conftest.in conftest.tmp conftest.nl conftest.out;; +esac + + $ac_path_EGREP_found && break 3 + done + done + done +IFS=$as_save_IFS + if test -z "$ac_cv_path_EGREP"; then + as_fn_error $? "no acceptable egrep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5 + fi +else + ac_cv_path_EGREP=$EGREP +fi + + fi +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_EGREP" >&5 +printf "%s\n" "$ac_cv_path_EGREP" >&6; } + EGREP="$ac_cv_path_EGREP" + + + + +for ac_prog in gstat stat +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_PROG_STAT+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$PROG_STAT"; then + ac_cv_prog_PROG_STAT="$PROG_STAT" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_PROG_STAT="$ac_prog" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +PROG_STAT=$ac_cv_prog_PROG_STAT +if test -n "$PROG_STAT"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $PROG_STAT" >&5 +printf "%s\n" "$PROG_STAT" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + + test -n "$PROG_STAT" && break +done + +for ac_prog in gdate date +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_PROG_DATE+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$PROG_DATE"; then + ac_cv_prog_PROG_DATE="$PROG_DATE" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_PROG_DATE="$ac_prog" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +PROG_DATE=$ac_cv_prog_PROG_DATE +if test -n "$PROG_DATE"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $PROG_DATE" >&5 +printf "%s\n" "$PROG_DATE" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + + test -n "$PROG_DATE" && break +done + +for ac_prog in find +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_path_PROG_FIND+y} +then : + printf %s "(cached) " >&6 +else $as_nop + case $PROG_FIND in + [\\/]* | ?:[\\/]*) + ac_cv_path_PROG_FIND="$PROG_FIND" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_path_PROG_FIND="$as_dir$ac_word$ac_exec_ext" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + ;; +esac +fi +PROG_FIND=$ac_cv_path_PROG_FIND +if test -n "$PROG_FIND"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $PROG_FIND" >&5 +printf "%s\n" "$PROG_FIND" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + + test -n "$PROG_FIND" && break +done + +for ac_prog in clang +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_PROG_CLANG+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$PROG_CLANG"; then + ac_cv_prog_PROG_CLANG="$PROG_CLANG" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_PROG_CLANG="$ac_prog" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +PROG_CLANG=$ac_cv_prog_PROG_CLANG +if test -n "$PROG_CLANG"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $PROG_CLANG" >&5 +printf "%s\n" "$PROG_CLANG" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + + test -n "$PROG_CLANG" && break +done + + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for parallel that supports semaphores with exit code" >&5 +printf %s "checking for parallel that supports semaphores with exit code... " >&6; } +if test ${ac_cv_path_PARALLEL+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -z "$PARALLEL"; then + ac_path_PARALLEL_found=false + # Loop through the user's path and test for each of PROGNAME-LIST + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_prog in parallel + do + for ac_exec_ext in '' $ac_executable_extensions; do + ac_path_PARALLEL="$as_dir$ac_prog$ac_exec_ext" + as_fn_executable_p "$ac_path_PARALLEL" || continue +parallel --semaphore --id starpu --fg --fg-exit -j 2 exit 42 > /dev/null 2>&1 + [ $? = 42 ] && ac_cv_path_PARALLEL=$ac_path_PARALLEL ac_path_PARALLEL_found=: + $ac_path_PARALLEL_found && break 3 + done + done + done +IFS=$as_save_IFS + if test -z "$ac_cv_path_PARALLEL"; then + ac_cv_path_PARALLEL=no + fi +else + ac_cv_path_PARALLEL=$PARALLEL +fi + +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_PARALLEL" >&5 +printf "%s\n" "$ac_cv_path_PARALLEL" >&6; } +PARALLEL=$ac_cv_path_PARALLEL + + if test "x$PARALLEL" != "xno"; then + HAVE_PARALLEL_TRUE= + HAVE_PARALLEL_FALSE='#' +else + HAVE_PARALLEL_TRUE='#' + HAVE_PARALLEL_FALSE= +fi + + + + + +if test "x$ac_cv_env_PKG_CONFIG_set" != "xset"; then + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}pkg-config", so it can be a program name with args. +set dummy ${ac_tool_prefix}pkg-config; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_path_PKG_CONFIG+y} +then : + printf %s "(cached) " >&6 +else $as_nop + case $PKG_CONFIG in + [\\/]* | ?:[\\/]*) + ac_cv_path_PKG_CONFIG="$PKG_CONFIG" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_path_PKG_CONFIG="$as_dir$ac_word$ac_exec_ext" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + ;; +esac +fi +PKG_CONFIG=$ac_cv_path_PKG_CONFIG +if test -n "$PKG_CONFIG"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $PKG_CONFIG" >&5 +printf "%s\n" "$PKG_CONFIG" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + +fi +if test -z "$ac_cv_path_PKG_CONFIG"; then + ac_pt_PKG_CONFIG=$PKG_CONFIG + # Extract the first word of "pkg-config", so it can be a program name with args. +set dummy pkg-config; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_path_ac_pt_PKG_CONFIG+y} +then : + printf %s "(cached) " >&6 +else $as_nop + case $ac_pt_PKG_CONFIG in + [\\/]* | ?:[\\/]*) + ac_cv_path_ac_pt_PKG_CONFIG="$ac_pt_PKG_CONFIG" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_path_ac_pt_PKG_CONFIG="$as_dir$ac_word$ac_exec_ext" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + ;; +esac +fi +ac_pt_PKG_CONFIG=$ac_cv_path_ac_pt_PKG_CONFIG +if test -n "$ac_pt_PKG_CONFIG"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_pt_PKG_CONFIG" >&5 +printf "%s\n" "$ac_pt_PKG_CONFIG" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + if test "x$ac_pt_PKG_CONFIG" = x; then + PKG_CONFIG="" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + PKG_CONFIG=$ac_pt_PKG_CONFIG + fi +else + PKG_CONFIG="$ac_cv_path_PKG_CONFIG" +fi + +fi +if test -n "$PKG_CONFIG"; then + _pkg_min_version=0.9.0 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking pkg-config is at least version $_pkg_min_version" >&5 +printf %s "checking pkg-config is at least version $_pkg_min_version... " >&6; } + if $PKG_CONFIG --atleast-pkgconfig-version $_pkg_min_version; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + PKG_CONFIG="" + fi + +fi + +# Check whether --enable-simgrid was given. +if test ${enable_simgrid+y} +then : + enableval=$enable_simgrid; enable_simgrid=$enableval +else $as_nop + enable_simgrid=no +fi + + +if test x$enable_perf_debug = xyes; then + enable_shared=no +fi + +default_enable_mpi_check=no + +if test x$enable_simgrid = xyes ; then + default_enable_mpi=no +else + default_enable_mpi=maybe +fi + + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler supports -Wno-unused" >&5 +printf %s "checking whether C compiler supports -Wno-unused... " >&6; } + + SAVED_CFLAGS="$CFLAGS" + CFLAGS="-Wno-unused" + + check_mpi="no" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ +const char *hello = "Hello World"; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + + APP_CFLAGS="$APP_CFLAGS -Wno-unused" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + + if test "$build_mpi_lib" = "no" -a "$build_nmad_lib" = "no" + then + if test "$check_mpi" = "yes" ; then + GLOBAL_AM_CFLAGS="$GLOBAL_AM_CFLAGS -Wno-unused" + fi + elif test "$check_mpi" = "yes" ; then + SAVED_CC="$CC" + CC="$MPICC" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether MPI C compiler supports -Wno-unused" >&5 +printf %s "checking whether MPI C compiler supports -Wno-unused... " >&6; } + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ +const char *hello = "Hello World"; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + + APP_CFLAGS="$APP_CFLAGS -Wno-unused" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + CC="$SAVED_CC" + fi + CFLAGS="$SAVED_CFLAGS" + + + + ac_ext=cpp +ac_cpp='$CXXCPP $CPPFLAGS' +ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_cxx_compiler_gnu + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether CXX compiler supports -Wno-unused" >&5 +printf %s "checking whether CXX compiler supports -Wno-unused... " >&6; } + + SAVED_CXXFLAGS="$CXXFLAGS" + CXXFLAGS="-Wno-unused" + + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ +const char *hello = "Hello World"; + + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_link "$LINENO" +then : + + APP_CXXFLAGS="$APP_CXXFLAGS -Wno-unused" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + CXXFLAGS="$SAVED_CXXFLAGS" + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + + ac_ext=f +ac_compile='$F77 -c $FFLAGS conftest.$ac_ext >&5' +ac_link='$F77 -o conftest$ac_exeext $FFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_f77_compiler_gnu + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether Fortran 77 compiler supports -Wno-unused" >&5 +printf %s "checking whether Fortran 77 compiler supports -Wno-unused... " >&6; } + + SAVED_FFLAGS="$FFLAGS" + FFLAGS="-Wno-unused" + + cat > conftest.$ac_ext <<_ACEOF + program main + + + end +_ACEOF +if ac_fn_f77_try_link "$LINENO" +then : + + APP_FFLAGS="$APP_FFLAGS -Wno-unused" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + FFLAGS="$SAVED_FFLAGS" + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + + ac_ext=${ac_fc_srcext-f} +ac_compile='$FC -c $FCFLAGS $ac_fcflags_srcext conftest.$ac_ext >&5' +ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_fc_compiler_gnu + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether Fortran compiler supports -Wno-unused" >&5 +printf %s "checking whether Fortran compiler supports -Wno-unused... " >&6; } + + SAVED_FCFLAGS="$FCFLAGS" + FCFLAGS="-Wno-unused" + + check_mpi="no" + cat > conftest.$ac_ext <<_ACEOF + program main + + + end +_ACEOF +if ac_fn_fc_try_link "$LINENO" +then : + + APP_FCFLAGS="$APP_FCFLAGS -Wno-unused" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + if test "$check_mpi" = "yes" ; then + SAVED_FC="$FC" + FC="$MPIFORT" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether MPI Fortran compiler supports -Wno-unused" >&5 +printf %s "checking whether MPI Fortran compiler supports -Wno-unused... " >&6; } + cat > conftest.$ac_ext <<_ACEOF + program main + + + end +_ACEOF +if ac_fn_fc_try_link "$LINENO" +then : + + APP_FCFLAGS="$APP_FCFLAGS -Wno-unused" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + FC="$SAVED_FC" + fi + FCFLAGS="$SAVED_FCFLAGS" + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + + + ac_ext=f +ac_compile='$F77 -c $FFLAGS conftest.$ac_ext >&5' +ac_link='$F77 -o conftest$ac_exeext $FFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_f77_compiler_gnu + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether Fortran 77 compiler supports -Wno-unused-dummy-argument" >&5 +printf %s "checking whether Fortran 77 compiler supports -Wno-unused-dummy-argument... " >&6; } + + SAVED_FFLAGS="$FFLAGS" + FFLAGS="-Wno-unused-dummy-argument" + + cat > conftest.$ac_ext <<_ACEOF + program main + + + end +_ACEOF +if ac_fn_f77_try_link "$LINENO" +then : + + APP_FFLAGS="$APP_FFLAGS -Wno-unused-dummy-argument" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + FFLAGS="$SAVED_FFLAGS" + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + + ac_ext=${ac_fc_srcext-f} +ac_compile='$FC -c $FCFLAGS $ac_fcflags_srcext conftest.$ac_ext >&5' +ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_fc_compiler_gnu + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether Fortran compiler supports -Wno-unused-dummy-argument" >&5 +printf %s "checking whether Fortran compiler supports -Wno-unused-dummy-argument... " >&6; } + + SAVED_FCFLAGS="$FCFLAGS" + FCFLAGS="-Wno-unused-dummy-argument" + + check_mpi="no" + cat > conftest.$ac_ext <<_ACEOF + program main + + + end +_ACEOF +if ac_fn_fc_try_link "$LINENO" +then : + + APP_FCFLAGS="$APP_FCFLAGS -Wno-unused-dummy-argument" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + if test "$check_mpi" = "yes" ; then + SAVED_FC="$FC" + FC="$MPIFORT" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether MPI Fortran compiler supports -Wno-unused-dummy-argument" >&5 +printf %s "checking whether MPI Fortran compiler supports -Wno-unused-dummy-argument... " >&6; } + cat > conftest.$ac_ext <<_ACEOF + program main + + + end +_ACEOF +if ac_fn_fc_try_link "$LINENO" +then : + + APP_FCFLAGS="$APP_FCFLAGS -Wno-unused-dummy-argument" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + FC="$SAVED_FC" + fi + FCFLAGS="$SAVED_FCFLAGS" + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + + + + + +ac_header= ac_cache= +for ac_item in $ac_header_c_list +do + if test $ac_cache; then + ac_fn_c_check_header_compile "$LINENO" $ac_header ac_cv_header_$ac_cache "$ac_includes_default" + if eval test \"x\$ac_cv_header_$ac_cache\" = xyes; then + printf "%s\n" "#define $ac_item 1" >> confdefs.h + fi + ac_header= ac_cache= + elif test $ac_header; then + ac_cache=$ac_item + else + ac_header=$ac_item + fi +done + + + + + + + + +if test $ac_cv_header_stdlib_h = yes && test $ac_cv_header_string_h = yes +then : + +printf "%s\n" "#define STDC_HEADERS 1" >>confdefs.h + +fi + +ac_func= +for ac_item in $ac_func_c_list +do + if test $ac_func; then + ac_fn_c_check_func "$LINENO" $ac_func ac_cv_func_$ac_func + if eval test \"x\$ac_cv_func_$ac_func\" = xyes; then + echo "#define $ac_item 1" >> confdefs.h + fi + ac_func= + else + ac_func=$ac_item + fi +done + + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for working mmap" >&5 +printf %s "checking for working mmap... " >&6; } +if test ${ac_cv_func_mmap_fixed_mapped+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test "$cross_compiling" = yes +then : + case "$host_os" in # (( + # Guess yes on platforms where we know the result. + linux*) ac_cv_func_mmap_fixed_mapped=yes ;; + # If we don't know, assume the worst. + *) ac_cv_func_mmap_fixed_mapped=no ;; + esac +else $as_nop + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$ac_includes_default +/* malloc might have been renamed as rpl_malloc. */ +#undef malloc + +/* Thanks to Mike Haertel and Jim Avera for this test. + Here is a matrix of mmap possibilities: + mmap private not fixed + mmap private fixed at somewhere currently unmapped + mmap private fixed at somewhere already mapped + mmap shared not fixed + mmap shared fixed at somewhere currently unmapped + mmap shared fixed at somewhere already mapped + For private mappings, we should verify that changes cannot be read() + back from the file, nor mmap's back from the file at a different + address. (There have been systems where private was not correctly + implemented like the infamous i386 svr4.0, and systems where the + VM page cache was not coherent with the file system buffer cache + like early versions of FreeBSD and possibly contemporary NetBSD.) + For shared mappings, we should conversely verify that changes get + propagated back to all the places they're supposed to be. + + Grep wants private fixed already mapped. + The main things grep needs to know about mmap are: + * does it exist and is it safe to write into the mmap'd area + * how to use it (BSD variants) */ + +#include +#include + +/* This mess was copied from the GNU getpagesize.h. */ +#ifndef HAVE_GETPAGESIZE +# ifdef _SC_PAGESIZE +# define getpagesize() sysconf(_SC_PAGESIZE) +# else /* no _SC_PAGESIZE */ +# ifdef HAVE_SYS_PARAM_H +# include +# ifdef EXEC_PAGESIZE +# define getpagesize() EXEC_PAGESIZE +# else /* no EXEC_PAGESIZE */ +# ifdef NBPG +# define getpagesize() NBPG * CLSIZE +# ifndef CLSIZE +# define CLSIZE 1 +# endif /* no CLSIZE */ +# else /* no NBPG */ +# ifdef NBPC +# define getpagesize() NBPC +# else /* no NBPC */ +# ifdef PAGESIZE +# define getpagesize() PAGESIZE +# endif /* PAGESIZE */ +# endif /* no NBPC */ +# endif /* no NBPG */ +# endif /* no EXEC_PAGESIZE */ +# else /* no HAVE_SYS_PARAM_H */ +# define getpagesize() 8192 /* punt totally */ +# endif /* no HAVE_SYS_PARAM_H */ +# endif /* no _SC_PAGESIZE */ + +#endif /* no HAVE_GETPAGESIZE */ + +int +main (void) +{ + char *data, *data2, *data3; + const char *cdata2; + int i, pagesize; + int fd, fd2; + + pagesize = getpagesize (); + + /* First, make a file with some known garbage in it. */ + data = (char *) malloc (pagesize); + if (!data) + return 1; + for (i = 0; i < pagesize; ++i) + *(data + i) = rand (); + umask (0); + fd = creat ("conftest.mmap", 0600); + if (fd < 0) + return 2; + if (write (fd, data, pagesize) != pagesize) + return 3; + close (fd); + + /* Next, check that the tail of a page is zero-filled. File must have + non-zero length, otherwise we risk SIGBUS for entire page. */ + fd2 = open ("conftest.txt", O_RDWR | O_CREAT | O_TRUNC, 0600); + if (fd2 < 0) + return 4; + cdata2 = ""; + if (write (fd2, cdata2, 1) != 1) + return 5; + data2 = (char *) mmap (0, pagesize, PROT_READ | PROT_WRITE, MAP_SHARED, fd2, 0L); + if (data2 == MAP_FAILED) + return 6; + for (i = 0; i < pagesize; ++i) + if (*(data2 + i)) + return 7; + close (fd2); + if (munmap (data2, pagesize)) + return 8; + + /* Next, try to mmap the file at a fixed address which already has + something else allocated at it. If we can, also make sure that + we see the same garbage. */ + fd = open ("conftest.mmap", O_RDWR); + if (fd < 0) + return 9; + if (data2 != mmap (data2, pagesize, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_FIXED, fd, 0L)) + return 10; + for (i = 0; i < pagesize; ++i) + if (*(data + i) != *(data2 + i)) + return 11; + + /* Finally, make sure that changes to the mapped area do not + percolate back to the file as seen by read(). (This is a bug on + some variants of i386 svr4.0.) */ + for (i = 0; i < pagesize; ++i) + *(data2 + i) = *(data2 + i) + 1; + data3 = (char *) malloc (pagesize); + if (!data3) + return 12; + if (read (fd, data3, pagesize) != pagesize) + return 13; + for (i = 0; i < pagesize; ++i) + if (*(data + i) != *(data3 + i)) + return 14; + close (fd); + free (data); + free (data3); + return 0; +} +_ACEOF +if ac_fn_c_try_run "$LINENO" +then : + ac_cv_func_mmap_fixed_mapped=yes +else $as_nop + ac_cv_func_mmap_fixed_mapped=no +fi +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ + conftest.$ac_objext conftest.beam conftest.$ac_ext +fi + +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_func_mmap_fixed_mapped" >&5 +printf "%s\n" "$ac_cv_func_mmap_fixed_mapped" >&6; } +if test $ac_cv_func_mmap_fixed_mapped = yes; then + +printf "%s\n" "#define HAVE_MMAP 1" >>confdefs.h + +fi +rm -f conftest.mmap conftest.txt + + +############################################################################### +# # +# Forwarded options # +# # +# Move here options whose values are needed early # +# # +############################################################################### +# +# Check whether --enable-starpupy was given. +if test ${enable_starpupy+y} +then : + enableval=$enable_starpupy; enable_starpupy=$enableval +else $as_nop + enable_starpupy=maybe +fi + + +############################################################################### +# # +# Profiling tool support # +# # +############################################################################### + +# Check whether --enable-prof-tool was given. +if test ${enable_prof_tool+y} +then : + enableval=$enable_prof_tool; enable_prof_tool=$enableval +else $as_nop + enable_prof_tool=yes +fi + +if test x$enable_prof_tool = xyes; then + +printf "%s\n" "#define STARPU_PROF_TOOL 1" >>confdefs.h + +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for profiling tool support" >&5 +printf %s "checking for profiling tool support... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_prof_tool" >&5 +printf "%s\n" "$enable_prof_tool" >&6; } + +############################################################################### +# # +# Hierarchical dags support # +# # +############################################################################### + +# Check whether --enable-bubble was given. +if test ${enable_bubble+y} +then : + enableval=$enable_bubble; enable_bubble=$enableval +else $as_nop + enable_bubble=no +fi + + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for hierarchical dags - a.k.a bubble - support" >&5 +printf %s "checking for hierarchical dags - a.k.a bubble - support... " >&6; } + +if test x$enable_bubble = xyes; then + +printf "%s\n" "#define STARPU_BUBBLE 1" >>confdefs.h + +fi + + if test "x$enable_bubble" = "xyes"; then + STARPU_BUBBLE_TRUE= + STARPU_BUBBLE_FALSE='#' +else + STARPU_BUBBLE_TRUE='#' + STARPU_BUBBLE_FALSE= +fi + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_bubble" >&5 +printf "%s\n" "$enable_bubble" >&6; } + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether bubble debug messages should be displayed" >&5 +printf %s "checking whether bubble debug messages should be displayed... " >&6; } +# Check whether --enable-bubble-verbose was given. +if test ${enable_bubble_verbose+y} +then : + enableval=$enable_bubble_verbose; enable_bubble_verbose=$enableval +else $as_nop + enable_bubble_verbose=no +fi + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_bubble_verbose" >&5 +printf "%s\n" "$enable_bubble_verbose" >&6; } +if test x$enable_bubble_verbose = xextra; then + +printf "%s\n" "#define STARPU_BUBBLE_VERBOSE 1" >>confdefs.h + +fi + +############################################################################### +# # +# Drivers # +# # +############################################################################### + +# Check whether --enable-opencl-simulator was given. +if test ${enable_opencl_simulator+y} +then : + enableval=$enable_opencl_simulator; enable_opencl_simulator=$enableval +else $as_nop + enable_opencl_simulator=no +fi + +if test x$enable_opencl_simulator = xyes; then + enable_simgrid=yes + +printf "%s\n" "#define STARPU_OPENCL_SIMULATOR 1" >>confdefs.h + +fi + + +# Check whether --with-simgrid-dir was given. +if test ${with_simgrid_dir+y} +then : + withval=$with_simgrid_dir; + simgrid_dir="$withval" + # in case this was not explicit yet + enable_simgrid=yes + +else $as_nop + simgrid_dir=no +fi + + + +# Check whether --with-simgrid-include-dir was given. +if test ${with_simgrid_include_dir+y} +then : + withval=$with_simgrid_include_dir; + simgrid_include_dir="$withval" + # in case this was not explicit yet + enable_simgrid=yes + +else $as_nop + simgrid_include_dir=no +fi + + + +# Check whether --with-simgrid-lib-dir was given. +if test ${with_simgrid_lib_dir+y} +then : + withval=$with_simgrid_lib_dir; + simgrid_lib_dir="$withval" + # in case this was not explicit yet + enable_simgrid=yes + +else $as_nop + simgrid_lib_dir=no +fi + + +if test x$enable_simgrid = xyes ; then + +pkg_failed=no +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for SIMGRID" >&5 +printf %s "checking for SIMGRID... " >&6; } + +if test -n "$PKG_CONFIG"; then + if test -n "$SIMGRID_CFLAGS"; then + pkg_cv_SIMGRID_CFLAGS="$SIMGRID_CFLAGS" + else + if test -n "$PKG_CONFIG" && \ + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"simgrid\""; } >&5 + ($PKG_CONFIG --exists --print-errors "simgrid") 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + pkg_cv_SIMGRID_CFLAGS=`$PKG_CONFIG --cflags "simgrid" 2>/dev/null` +else + pkg_failed=yes +fi + fi +else + pkg_failed=untried +fi +if test -n "$PKG_CONFIG"; then + if test -n "$SIMGRID_LIBS"; then + pkg_cv_SIMGRID_LIBS="$SIMGRID_LIBS" + else + if test -n "$PKG_CONFIG" && \ + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"simgrid\""; } >&5 + ($PKG_CONFIG --exists --print-errors "simgrid") 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + pkg_cv_SIMGRID_LIBS=`$PKG_CONFIG --libs "simgrid" 2>/dev/null` +else + pkg_failed=yes +fi + fi +else + pkg_failed=untried +fi + + + +if test $pkg_failed = yes; then + +if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then + _pkg_short_errors_supported=yes +else + _pkg_short_errors_supported=no +fi + if test $_pkg_short_errors_supported = yes; then + SIMGRID_PKG_ERRORS=`$PKG_CONFIG --short-errors --errors-to-stdout --print-errors "simgrid"` + else + SIMGRID_PKG_ERRORS=`$PKG_CONFIG --errors-to-stdout --print-errors "simgrid"` + fi + # Put the nasty error message in config.log where it belongs + echo "$SIMGRID_PKG_ERRORS" >&5 + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + : +elif test $pkg_failed = untried; then + : +else + SIMGRID_CFLAGS=$pkg_cv_SIMGRID_CFLAGS + SIMGRID_LIBS=$pkg_cv_SIMGRID_LIBS + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + : +fi + + if test "$simgrid_include_dir" != "no" ; then + SIMGRID_CFLAGS="-I$simgrid_include_dir $SIMGRID_CFLAGS" + fi + if test "$simgrid_lib_dir" != "no" ; then + SIMGRID_LIBS="-L$simgrid_lib_dir $SIMGRID_LIBS" + fi + if test "$simgrid_dir" != "no" ; then + SIMGRID_CFLAGS="-I$simgrid_dir/include $SIMGRID_CFLAGS" + SIMGRID_LIBS="-L$simgrid_dir/lib $SIMGRID_LIBS" + else + simgrid_dir="$(pkg-config --variable=prefix simgrid)" + fi + if test -n "$SIMGRID_CFLAGS" ; then + CFLAGS="$SIMGRID_CFLAGS $CFLAGS" + CXXFLAGS="$SIMGRID_CFLAGS $CXXFLAGS" + NVCCFLAGS="$SIMGRID_CFLAGS $NVCCFLAGS" + HIPCCFLAGS="$SIMGRID_CFLAGS $HIPCCFLAGS" + fi + SAVED_LIBS="${LIBS}" + LIBS="$SIMGRID_LIBS $LIBS" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for main in -lsimgrid" >&5 +printf %s "checking for main in -lsimgrid... " >&6; } +if test ${ac_cv_lib_simgrid_main+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-lsimgrid $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + +int +main (void) +{ +return main (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_simgrid_main=yes +else $as_nop + ac_cv_lib_simgrid_main=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_simgrid_main" >&5 +printf "%s\n" "$ac_cv_lib_simgrid_main" >&6; } +if test "x$ac_cv_lib_simgrid_main" = xyes +then : + printf "%s\n" "#define HAVE_LIBSIMGRID 1" >>confdefs.h + + LIBS="-lsimgrid $LIBS" + +else $as_nop + + as_fn_error $? "Simgrid support needs simgrid installed" "$LINENO" 5 + + +fi +ac_cv_lib_simgrid=ac_cv_lib_simgrid_main + + for ac_header in simgrid/msg.h +do : + ac_fn_c_check_header_compile "$LINENO" "simgrid/msg.h" "ac_cv_header_simgrid_msg_h" "$ac_includes_default" +if test "x$ac_cv_header_simgrid_msg_h" = xyes +then : + printf "%s\n" "#define HAVE_SIMGRID_MSG_H 1" >>confdefs.h + +printf "%s\n" "#define STARPU_HAVE_SIMGRID_MSG_H 1" >>confdefs.h + +fi + +done + for ac_header in msg/msg.h +do : + ac_fn_c_check_header_compile "$LINENO" "msg/msg.h" "ac_cv_header_msg_msg_h" "$ac_includes_default" +if test "x$ac_cv_header_msg_msg_h" = xyes +then : + printf "%s\n" "#define HAVE_MSG_MSG_H 1" >>confdefs.h + +printf "%s\n" "#define STARPU_HAVE_MSG_MSG_H 1" >>confdefs.h + +fi + +done + for ac_header in simgrid/host.h +do : + ac_fn_c_check_header_compile "$LINENO" "simgrid/host.h" "ac_cv_header_simgrid_host_h" "$ac_includes_default" +if test "x$ac_cv_header_simgrid_host_h" = xyes +then : + printf "%s\n" "#define HAVE_SIMGRID_HOST_H 1" >>confdefs.h + +printf "%s\n" "#define STARPU_HAVE_SIMGRID_HOST_H 1" >>confdefs.h + +fi + +done + for ac_header in simgrid/link.h +do : + ac_fn_c_check_header_compile "$LINENO" "simgrid/link.h" "ac_cv_header_simgrid_link_h" "$ac_includes_default" +if test "x$ac_cv_header_simgrid_link_h" = xyes +then : + printf "%s\n" "#define HAVE_SIMGRID_LINK_H 1" >>confdefs.h + +printf "%s\n" "#define STARPU_HAVE_SIMGRID_LINK_H 1" >>confdefs.h + +fi + +done + for ac_header in xbt/base.h +do : + ac_fn_c_check_header_compile "$LINENO" "xbt/base.h" "ac_cv_header_xbt_base_h" "$ac_includes_default" +if test "x$ac_cv_header_xbt_base_h" = xyes +then : + printf "%s\n" "#define HAVE_XBT_BASE_H 1" >>confdefs.h + +printf "%s\n" "#define STARPU_HAVE_XBT_BASE_H 1" >>confdefs.h + +fi + +done + for ac_header in simgrid/version.h +do : + ac_fn_c_check_header_compile "$LINENO" "simgrid/version.h" "ac_cv_header_simgrid_version_h" " + #ifdef STARPU_HAVE_XBT_BASE_H + #include + #endif + +" +if test "x$ac_cv_header_simgrid_version_h" = xyes +then : + printf "%s\n" "#define HAVE_SIMGRID_VERSION_H 1" >>confdefs.h + +printf "%s\n" "#define STARPU_HAVE_SIMGRID_VERSION_H 1" >>confdefs.h + +fi + +done + for ac_header in simgrid/simdag.h +do : + ac_fn_c_check_header_compile "$LINENO" "simgrid/simdag.h" "ac_cv_header_simgrid_simdag_h" "$ac_includes_default" +if test "x$ac_cv_header_simgrid_simdag_h" = xyes +then : + printf "%s\n" "#define HAVE_SIMGRID_SIMDAG_H 1" >>confdefs.h + +printf "%s\n" "#define STARPU_HAVE_SIMGRID_SIMDAG_H 1" >>confdefs.h + +fi + +done + for ac_header in xbt/synchro.h +do : + ac_fn_c_check_header_compile "$LINENO" "xbt/synchro.h" "ac_cv_header_xbt_synchro_h" "$ac_includes_default" +if test "x$ac_cv_header_xbt_synchro_h" = xyes +then : + printf "%s\n" "#define HAVE_XBT_SYNCHRO_H 1" >>confdefs.h + +printf "%s\n" "#define STARPU_HAVE_XBT_SYNCHRO_H 1" >>confdefs.h + +fi + +done + for ac_header in xbt/config.h +do : + ac_fn_c_check_header_compile "$LINENO" "xbt/config.h" "ac_cv_header_xbt_config_h" "$ac_includes_default" +if test "x$ac_cv_header_xbt_config_h" = xyes +then : + printf "%s\n" "#define HAVE_XBT_CONFIG_H 1" >>confdefs.h + +printf "%s\n" "#define STARPU_HAVE_XBT_CONFIG_H 1" >>confdefs.h + +fi + +done + for ac_header in simgrid/actor.h +do : + ac_fn_c_check_header_compile "$LINENO" "simgrid/actor.h" "ac_cv_header_simgrid_actor_h" "$ac_includes_default" +if test "x$ac_cv_header_simgrid_actor_h" = xyes +then : + printf "%s\n" "#define HAVE_SIMGRID_ACTOR_H 1" >>confdefs.h + +printf "%s\n" "#define STARPU_HAVE_SIMGRID_ACTOR_H 1" >>confdefs.h + +fi + +done + for ac_header in simgrid/engine.h +do : + ac_fn_c_check_header_compile "$LINENO" "simgrid/engine.h" "ac_cv_header_simgrid_engine_h" "$ac_includes_default" +if test "x$ac_cv_header_simgrid_engine_h" = xyes +then : + printf "%s\n" "#define HAVE_SIMGRID_ENGINE_H 1" >>confdefs.h + +printf "%s\n" "#define STARPU_HAVE_SIMGRID_ENGINE_H 1" >>confdefs.h + +fi + +done + for ac_header in simgrid/semaphore.h +do : + ac_fn_c_check_header_compile "$LINENO" "simgrid/semaphore.h" "ac_cv_header_simgrid_semaphore_h" "$ac_includes_default" +if test "x$ac_cv_header_simgrid_semaphore_h" = xyes +then : + printf "%s\n" "#define HAVE_SIMGRID_SEMAPHORE_H 1" >>confdefs.h + +printf "%s\n" "#define STARPU_HAVE_SIMGRID_SEMAPHORE_H 1" >>confdefs.h + +fi + +done + for ac_header in simgrid/mutex.h +do : + ac_fn_c_check_header_compile "$LINENO" "simgrid/mutex.h" "ac_cv_header_simgrid_mutex_h" "$ac_includes_default" +if test "x$ac_cv_header_simgrid_mutex_h" = xyes +then : + printf "%s\n" "#define HAVE_SIMGRID_MUTEX_H 1" >>confdefs.h + +printf "%s\n" "#define STARPU_HAVE_SIMGRID_MUTEX_H 1" >>confdefs.h + +fi + +done + for ac_header in simgrid/cond.h +do : + ac_fn_c_check_header_compile "$LINENO" "simgrid/cond.h" "ac_cv_header_simgrid_cond_h" "$ac_includes_default" +if test "x$ac_cv_header_simgrid_cond_h" = xyes +then : + printf "%s\n" "#define HAVE_SIMGRID_COND_H 1" >>confdefs.h + +printf "%s\n" "#define STARPU_HAVE_SIMGRID_COND_H 1" >>confdefs.h + +fi + +done + for ac_header in simgrid/barrier.h +do : + ac_fn_c_check_header_compile "$LINENO" "simgrid/barrier.h" "ac_cv_header_simgrid_barrier_h" "$ac_includes_default" +if test "x$ac_cv_header_simgrid_barrier_h" = xyes +then : + printf "%s\n" "#define HAVE_SIMGRID_BARRIER_H 1" >>confdefs.h + +printf "%s\n" "#define STARPU_HAVE_SIMGRID_BARRIER_H 1" >>confdefs.h + +fi + +done + ac_fn_c_check_header_compile "$LINENO" "simgrid/engine.h" "ac_cv_header_simgrid_engine_h" "$ac_includes_default" +if test "x$ac_cv_header_simgrid_engine_h" = xyes +then : + printf "%s\n" "#define HAVE_SIMGRID_ENGINE_H 1" >>confdefs.h + +fi + + for ac_header in simgrid/zone.h +do : + ac_fn_c_check_header_compile "$LINENO" "simgrid/zone.h" "ac_cv_header_simgrid_zone_h" "$ac_includes_default" +if test "x$ac_cv_header_simgrid_zone_h" = xyes +then : + printf "%s\n" "#define HAVE_SIMGRID_ZONE_H 1" >>confdefs.h + +printf "%s\n" "#define STARPU_HAVE_SIMGRID_ZONE_H 1" >>confdefs.h + +fi + +done + ac_fn_c_check_type "$LINENO" "smx_actor_t" "ac_cv_type_smx_actor_t" "#include +" +if test "x$ac_cv_type_smx_actor_t" = xyes +then : + +printf "%s\n" "#define HAVE_SMX_ACTOR_T 1" >>confdefs.h + + +printf "%s\n" "#define STARPU_HAVE_SMX_ACTOR_T 1" >>confdefs.h + +fi + + + # Latest functions + ac_fn_c_check_func "$LINENO" "MSG_process_attach" "ac_cv_func_MSG_process_attach" +if test "x$ac_cv_func_MSG_process_attach" = xyes +then : + printf "%s\n" "#define HAVE_MSG_PROCESS_ATTACH 1" >>confdefs.h + +fi +ac_fn_c_check_func "$LINENO" "sg_actor_attach" "ac_cv_func_sg_actor_attach" +if test "x$ac_cv_func_sg_actor_attach" = xyes +then : + printf "%s\n" "#define HAVE_SG_ACTOR_ATTACH 1" >>confdefs.h + +fi +ac_fn_c_check_func "$LINENO" "sg_actor_attach_pthread" "ac_cv_func_sg_actor_attach_pthread" +if test "x$ac_cv_func_sg_actor_attach_pthread" = xyes +then : + printf "%s\n" "#define HAVE_SG_ACTOR_ATTACH_PTHREAD 1" >>confdefs.h + +fi +ac_fn_c_check_func "$LINENO" "sg_actor_init" "ac_cv_func_sg_actor_init" +if test "x$ac_cv_func_sg_actor_init" = xyes +then : + printf "%s\n" "#define HAVE_SG_ACTOR_INIT 1" >>confdefs.h + +fi +ac_fn_c_check_func "$LINENO" "sg_actor_set_stacksize" "ac_cv_func_sg_actor_set_stacksize" +if test "x$ac_cv_func_sg_actor_set_stacksize" = xyes +then : + printf "%s\n" "#define HAVE_SG_ACTOR_SET_STACKSIZE 1" >>confdefs.h + +fi +ac_fn_c_check_func "$LINENO" "sg_actor_on_exit" "ac_cv_func_sg_actor_on_exit" +if test "x$ac_cv_func_sg_actor_on_exit" = xyes +then : + printf "%s\n" "#define HAVE_SG_ACTOR_ON_EXIT 1" >>confdefs.h + +fi +ac_fn_c_check_func "$LINENO" "MSG_zone_get_hosts" "ac_cv_func_MSG_zone_get_hosts" +if test "x$ac_cv_func_MSG_zone_get_hosts" = xyes +then : + printf "%s\n" "#define HAVE_MSG_ZONE_GET_HOSTS 1" >>confdefs.h + +fi +ac_fn_c_check_func "$LINENO" "sg_zone_get_hosts" "ac_cv_func_sg_zone_get_hosts" +if test "x$ac_cv_func_sg_zone_get_hosts" = xyes +then : + printf "%s\n" "#define HAVE_SG_ZONE_GET_HOSTS 1" >>confdefs.h + +fi +ac_fn_c_check_func "$LINENO" "sg_zone_get_all_hosts" "ac_cv_func_sg_zone_get_all_hosts" +if test "x$ac_cv_func_sg_zone_get_all_hosts" = xyes +then : + printf "%s\n" "#define HAVE_SG_ZONE_GET_ALL_HOSTS 1" >>confdefs.h + +fi +ac_fn_c_check_func "$LINENO" "MSG_process_self_name" "ac_cv_func_MSG_process_self_name" +if test "x$ac_cv_func_MSG_process_self_name" = xyes +then : + printf "%s\n" "#define HAVE_MSG_PROCESS_SELF_NAME 1" >>confdefs.h + +fi +ac_fn_c_check_func "$LINENO" "MSG_process_userdata_init" "ac_cv_func_MSG_process_userdata_init" +if test "x$ac_cv_func_MSG_process_userdata_init" = xyes +then : + printf "%s\n" "#define HAVE_MSG_PROCESS_USERDATA_INIT 1" >>confdefs.h + +fi +ac_fn_c_check_func "$LINENO" "sg_actor_get_data" "ac_cv_func_sg_actor_get_data" +if test "x$ac_cv_func_sg_actor_get_data" = xyes +then : + printf "%s\n" "#define HAVE_SG_ACTOR_GET_DATA 1" >>confdefs.h + +fi +ac_fn_c_check_func "$LINENO" "sg_actor_set_data" "ac_cv_func_sg_actor_set_data" +if test "x$ac_cv_func_sg_actor_set_data" = xyes +then : + printf "%s\n" "#define HAVE_SG_ACTOR_SET_DATA 1" >>confdefs.h + +fi +ac_fn_c_check_func "$LINENO" "sg_actor_data" "ac_cv_func_sg_actor_data" +if test "x$ac_cv_func_sg_actor_data" = xyes +then : + printf "%s\n" "#define HAVE_SG_ACTOR_DATA 1" >>confdefs.h + +fi + + ac_fn_c_check_func "$LINENO" "xbt_mutex_try_acquire" "ac_cv_func_xbt_mutex_try_acquire" +if test "x$ac_cv_func_xbt_mutex_try_acquire" = xyes +then : + printf "%s\n" "#define HAVE_XBT_MUTEX_TRY_ACQUIRE 1" >>confdefs.h + +fi +ac_fn_c_check_func "$LINENO" "smpi_process_set_user_data" "ac_cv_func_smpi_process_set_user_data" +if test "x$ac_cv_func_smpi_process_set_user_data" = xyes +then : + printf "%s\n" "#define HAVE_SMPI_PROCESS_SET_USER_DATA 1" >>confdefs.h + +fi +ac_fn_c_check_func "$LINENO" "SMPI_thread_create" "ac_cv_func_SMPI_thread_create" +if test "x$ac_cv_func_SMPI_thread_create" = xyes +then : + printf "%s\n" "#define HAVE_SMPI_THREAD_CREATE 1" >>confdefs.h + +fi +ac_fn_c_check_func "$LINENO" "sg_zone_get_by_name" "ac_cv_func_sg_zone_get_by_name" +if test "x$ac_cv_func_sg_zone_get_by_name" = xyes +then : + printf "%s\n" "#define HAVE_SG_ZONE_GET_BY_NAME 1" >>confdefs.h + +fi +ac_fn_c_check_func "$LINENO" "sg_link_get_name" "ac_cv_func_sg_link_get_name" +if test "x$ac_cv_func_sg_link_get_name" = xyes +then : + printf "%s\n" "#define HAVE_SG_LINK_GET_NAME 1" >>confdefs.h + +fi +ac_fn_c_check_func "$LINENO" "sg_link_name" "ac_cv_func_sg_link_name" +if test "x$ac_cv_func_sg_link_name" = xyes +then : + printf "%s\n" "#define HAVE_SG_LINK_NAME 1" >>confdefs.h + +fi +ac_fn_c_check_func "$LINENO" "sg_link_set_bandwidth" "ac_cv_func_sg_link_set_bandwidth" +if test "x$ac_cv_func_sg_link_set_bandwidth" = xyes +then : + printf "%s\n" "#define HAVE_SG_LINK_SET_BANDWIDTH 1" >>confdefs.h + +fi +ac_fn_c_check_func "$LINENO" "sg_link_bandwidth_set" "ac_cv_func_sg_link_bandwidth_set" +if test "x$ac_cv_func_sg_link_bandwidth_set" = xyes +then : + printf "%s\n" "#define HAVE_SG_LINK_BANDWIDTH_SET 1" >>confdefs.h + +fi +ac_fn_c_check_func "$LINENO" "sg_host_get_route" "ac_cv_func_sg_host_get_route" +if test "x$ac_cv_func_sg_host_get_route" = xyes +then : + printf "%s\n" "#define HAVE_SG_HOST_GET_ROUTE 1" >>confdefs.h + +fi +ac_fn_c_check_func "$LINENO" "sg_host_get_route_links" "ac_cv_func_sg_host_get_route_links" +if test "x$ac_cv_func_sg_host_get_route_links" = xyes +then : + printf "%s\n" "#define HAVE_SG_HOST_GET_ROUTE_LINKS 1" >>confdefs.h + +fi +ac_fn_c_check_func "$LINENO" "sg_host_route" "ac_cv_func_sg_host_route" +if test "x$ac_cv_func_sg_host_route" = xyes +then : + printf "%s\n" "#define HAVE_SG_HOST_ROUTE 1" >>confdefs.h + +fi +ac_fn_c_check_func "$LINENO" "sg_host_self" "ac_cv_func_sg_host_self" +if test "x$ac_cv_func_sg_host_self" = xyes +then : + printf "%s\n" "#define HAVE_SG_HOST_SELF 1" >>confdefs.h + +fi +ac_fn_c_check_func "$LINENO" "sg_host_list" "ac_cv_func_sg_host_list" +if test "x$ac_cv_func_sg_host_list" = xyes +then : + printf "%s\n" "#define HAVE_SG_HOST_LIST 1" >>confdefs.h + +fi +ac_fn_c_check_func "$LINENO" "sg_host_get_speed" "ac_cv_func_sg_host_get_speed" +if test "x$ac_cv_func_sg_host_get_speed" = xyes +then : + printf "%s\n" "#define HAVE_SG_HOST_GET_SPEED 1" >>confdefs.h + +fi +ac_fn_c_check_func "$LINENO" "sg_host_speed" "ac_cv_func_sg_host_speed" +if test "x$ac_cv_func_sg_host_speed" = xyes +then : + printf "%s\n" "#define HAVE_SG_HOST_SPEED 1" >>confdefs.h + +fi +ac_fn_c_check_func "$LINENO" "simcall_process_create" "ac_cv_func_simcall_process_create" +if test "x$ac_cv_func_simcall_process_create" = xyes +then : + printf "%s\n" "#define HAVE_SIMCALL_PROCESS_CREATE 1" >>confdefs.h + +fi +ac_fn_c_check_func "$LINENO" "sg_config_continue_after_help" "ac_cv_func_sg_config_continue_after_help" +if test "x$ac_cv_func_sg_config_continue_after_help" = xyes +then : + printf "%s\n" "#define HAVE_SG_CONFIG_CONTINUE_AFTER_HELP 1" >>confdefs.h + +fi + + ac_fn_c_check_func "$LINENO" "simgrid_set_maestro" "ac_cv_func_simgrid_set_maestro" +if test "x$ac_cv_func_simgrid_set_maestro" = xyes +then : + printf "%s\n" "#define HAVE_SIMGRID_SET_MAESTRO 1" >>confdefs.h + +fi + + + for ac_func in simgrid_init +do : + ac_fn_c_check_func "$LINENO" "simgrid_init" "ac_cv_func_simgrid_init" +if test "x$ac_cv_func_simgrid_init" = xyes +then : + printf "%s\n" "#define HAVE_SIMGRID_INIT 1" >>confdefs.h + +printf "%s\n" "#define STARPU_SIMGRID_HAVE_SIMGRID_INIT 1" >>confdefs.h + +fi + +done + + for ac_func in xbt_barrier_init +do : + ac_fn_c_check_func "$LINENO" "xbt_barrier_init" "ac_cv_func_xbt_barrier_init" +if test "x$ac_cv_func_xbt_barrier_init" = xyes +then : + printf "%s\n" "#define HAVE_XBT_BARRIER_INIT 1" >>confdefs.h + +printf "%s\n" "#define STARPU_SIMGRID_HAVE_XBT_BARRIER_INIT 1" >>confdefs.h + +fi + +done + ac_fn_c_check_func "$LINENO" "sg_actor_sleep_for" "ac_cv_func_sg_actor_sleep_for" +if test "x$ac_cv_func_sg_actor_sleep_for" = xyes +then : + printf "%s\n" "#define HAVE_SG_ACTOR_SLEEP_FOR 1" >>confdefs.h + +fi +ac_fn_c_check_func "$LINENO" "sg_actor_self" "ac_cv_func_sg_actor_self" +if test "x$ac_cv_func_sg_actor_self" = xyes +then : + printf "%s\n" "#define HAVE_SG_ACTOR_SELF 1" >>confdefs.h + +fi +ac_fn_c_check_func "$LINENO" "sg_actor_ref" "ac_cv_func_sg_actor_ref" +if test "x$ac_cv_func_sg_actor_ref" = xyes +then : + printf "%s\n" "#define HAVE_SG_ACTOR_REF 1" >>confdefs.h + +fi +ac_fn_c_check_func "$LINENO" "sg_host_get_properties" "ac_cv_func_sg_host_get_properties" +if test "x$ac_cv_func_sg_host_get_properties" = xyes +then : + printf "%s\n" "#define HAVE_SG_HOST_GET_PROPERTIES 1" >>confdefs.h + +fi +ac_fn_c_check_func "$LINENO" "sg_host_get_property_names" "ac_cv_func_sg_host_get_property_names" +if test "x$ac_cv_func_sg_host_get_property_names" = xyes +then : + printf "%s\n" "#define HAVE_SG_HOST_GET_PROPERTY_NAMES 1" >>confdefs.h + +fi +ac_fn_c_check_func "$LINENO" "sg_host_send_to" "ac_cv_func_sg_host_send_to" +if test "x$ac_cv_func_sg_host_send_to" = xyes +then : + printf "%s\n" "#define HAVE_SG_HOST_SEND_TO 1" >>confdefs.h + +fi +ac_fn_c_check_func "$LINENO" "sg_host_sendto" "ac_cv_func_sg_host_sendto" +if test "x$ac_cv_func_sg_host_sendto" = xyes +then : + printf "%s\n" "#define HAVE_SG_HOST_SENDTO 1" >>confdefs.h + +fi +ac_fn_c_check_func "$LINENO" "sg_cfg_set_int" "ac_cv_func_sg_cfg_set_int" +if test "x$ac_cv_func_sg_cfg_set_int" = xyes +then : + printf "%s\n" "#define HAVE_SG_CFG_SET_INT 1" >>confdefs.h + +fi +ac_fn_c_check_func "$LINENO" "sg_actor_self_execute" "ac_cv_func_sg_actor_self_execute" +if test "x$ac_cv_func_sg_actor_self_execute" = xyes +then : + printf "%s\n" "#define HAVE_SG_ACTOR_SELF_EXECUTE 1" >>confdefs.h + +fi +ac_fn_c_check_func "$LINENO" "sg_actor_execute" "ac_cv_func_sg_actor_execute" +if test "x$ac_cv_func_sg_actor_execute" = xyes +then : + printf "%s\n" "#define HAVE_SG_ACTOR_EXECUTE 1" >>confdefs.h + +fi +ac_fn_c_check_func "$LINENO" "simgrid_get_clock" "ac_cv_func_simgrid_get_clock" +if test "x$ac_cv_func_simgrid_get_clock" = xyes +then : + printf "%s\n" "#define HAVE_SIMGRID_GET_CLOCK 1" >>confdefs.h + +fi + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CC options needed to detect all undeclared functions" >&5 +printf %s "checking for $CC options needed to detect all undeclared functions... " >&6; } +if test ${ac_cv_c_undeclared_builtin_options+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_save_CFLAGS=$CFLAGS + ac_cv_c_undeclared_builtin_options='cannot detect' + for ac_arg in '' -fno-builtin; do + CFLAGS="$ac_save_CFLAGS $ac_arg" + # This test program should *not* compile successfully. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ +(void) strchr; + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + +else $as_nop + # This test program should compile successfully. + # No library function is consistently available on + # freestanding implementations, so test against a dummy + # declaration. Include always-available headers on the + # off chance that they somehow elicit warnings. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +#include +#include +#include +extern void ac_decl (int, char *); + +int +main (void) +{ +(void) ac_decl (0, (char *) 0); + (void) ac_decl; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + if test x"$ac_arg" = x +then : + ac_cv_c_undeclared_builtin_options='none needed' +else $as_nop + ac_cv_c_undeclared_builtin_options=$ac_arg +fi + break +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + done + CFLAGS=$ac_save_CFLAGS + +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_undeclared_builtin_options" >&5 +printf "%s\n" "$ac_cv_c_undeclared_builtin_options" >&6; } + case $ac_cv_c_undeclared_builtin_options in #( + 'cannot detect') : + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "cannot make $CC report undeclared builtins +See \`config.log' for more details" "$LINENO" 5; } ;; #( + 'none needed') : + ac_c_undeclared_builtin_options='' ;; #( + *) : + ac_c_undeclared_builtin_options=$ac_cv_c_undeclared_builtin_options ;; +esac + +ac_fn_check_decl "$LINENO" "smpi_process_set_user_data" "ac_cv_have_decl_smpi_process_set_user_data" "#include +" "$ac_c_undeclared_builtin_options" "CFLAGS" +if test "x$ac_cv_have_decl_smpi_process_set_user_data" = xyes +then : + ac_have_decl=1 +else $as_nop + ac_have_decl=0 +fi +printf "%s\n" "#define HAVE_DECL_SMPI_PROCESS_SET_USER_DATA $ac_have_decl" >>confdefs.h + + + # Oldies for compatibility with older simgrid + ac_fn_c_check_func "$LINENO" "MSG_get_as_by_name" "ac_cv_func_MSG_get_as_by_name" +if test "x$ac_cv_func_MSG_get_as_by_name" = xyes +then : + printf "%s\n" "#define HAVE_MSG_GET_AS_BY_NAME 1" >>confdefs.h + +fi +ac_fn_c_check_func "$LINENO" "MSG_zone_get_by_name" "ac_cv_func_MSG_zone_get_by_name" +if test "x$ac_cv_func_MSG_zone_get_by_name" = xyes +then : + printf "%s\n" "#define HAVE_MSG_ZONE_GET_BY_NAME 1" >>confdefs.h + +fi +ac_fn_c_check_func "$LINENO" "MSG_environment_get_routing_root" "ac_cv_func_MSG_environment_get_routing_root" +if test "x$ac_cv_func_MSG_environment_get_routing_root" = xyes +then : + printf "%s\n" "#define HAVE_MSG_ENVIRONMENT_GET_ROUTING_ROOT 1" >>confdefs.h + +fi +ac_fn_c_check_func "$LINENO" "MSG_host_get_speed" "ac_cv_func_MSG_host_get_speed" +if test "x$ac_cv_func_MSG_host_get_speed" = xyes +then : + printf "%s\n" "#define HAVE_MSG_HOST_GET_SPEED 1" >>confdefs.h + +fi + + LIBS="${SAVED_LIBS}" + + +printf "%s\n" "#define STARPU_SIMGRID 1" >>confdefs.h + + # We won't bind or detect anything + with_hwloc=no + + # disable mpi checks by default, they require static linking, we don't + # want that by default + default_enable_mpi_check=no + + # disable MPI support by default + default_enable_mpi=no + + ac_ext=cpp +ac_cpp='$CXXCPP $CPPFLAGS' +ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_cxx_compiler_gnu + + if test x$enable_shared = xno ; then + # When linking statically, libtool does not realize we need libstdc++ for simgrid_cpp.cpp + SIMGRID_LIBS="$SIMGRID_LIBS -lstdc++" + LIBS="$LIBS -lstdc++" + fi + SIMGRID_LDFLAGS="$SIMGRID_LIBS -lsimgrid" + + # Simgrid 3.12 & 3.13 need -std=c++11 to be able to build anything in C++... + case \ $CXXFLAGS\ in + *\ -std=*\ *) ;; + *) + # Make sure our C++ compiler can compile simgrid headers + SIMGRID_INCLUDES=" +#ifdef STARPU_HAVE_SIMGRID_MSG_H +#include +#include +#elif defined(STARPU_HAVE_MSG_MSG_H) +#include +#endif + +#ifdef STARPU_HAVE_XBT_BASE_H +#include +#endif +#ifdef STARPU_HAVE_SIMGRID_VERSION_H +#include +#endif +#ifdef STARPU_HAVE_SIMGRID_ZONE_H +#include +#endif +#ifdef STARPU_HAVE_SIMGRID_HOST_H +#include +#endif + +#include +" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$SIMGRID_INCLUDES +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_compile "$LINENO" +then : + +else $as_nop + CXXFLAGS="-std=c++11 $CXXFLAGS" +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + ;; + esac + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$SIMGRID_INCLUDES + #include +int +main (void) +{ +simgrid::s4u::Engine::on_time_advance_cb([](double delta) { }); + + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_compile "$LINENO" +then : + +printf "%s\n" "#define STARPU_HAVE_S4U_ON_TIME_ADVANCE_CB 1" >>confdefs.h + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + # Check whether --enable-simgrid-mc was given. +if test ${enable_simgrid_mc+y} +then : + enableval=$enable_simgrid_mc; enable_simgrid_mc=$enableval +else $as_nop + enable_simgrid_mc=no +fi + + if test x$enable_simgrid_mc = xyes ; then + +printf "%s\n" "#define STARPU_SIMGRID_MC 1" >>confdefs.h + + # Extract the first word of "simgrid-mc", so it can be a program name with args. +set dummy simgrid-mc; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_path_SIMGRID_MC+y} +then : + printf %s "(cached) " >&6 +else $as_nop + case $SIMGRID_MC in + [\\/]* | ?:[\\/]*) + ac_cv_path_SIMGRID_MC="$SIMGRID_MC" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +as_dummy="$simgrid_dir/bin:$PATH" +for as_dir in $as_dummy +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_path_SIMGRID_MC="$as_dir$ac_word$ac_exec_ext" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + test -z "$ac_cv_path_SIMGRID_MC" && ac_cv_path_SIMGRID_MC="no" + ;; +esac +fi +SIMGRID_MC=$ac_cv_path_SIMGRID_MC +if test -n "$SIMGRID_MC"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $SIMGRID_MC" >&5 +printf "%s\n" "$SIMGRID_MC" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + + LDFLAGS="$LDFLAGS -Wl,-znorelro -Wl,-znoseparate-code" + # libsimgrid needs to be linked from binaries themselves for MC to work + STARPU_EXPORTED_LIBS="$STARPU_EXPORTED_LIBS $SIMGRID_LDFLAGS" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#ifdef STARPU_HAVE_XBT_BASE_H +#include +#endif +#ifdef STARPU_HAVE_SIMGRID_VERSION_H +#include +#endif +#if SIMGRID_VERSION < 33100 +#error no mutex support with MC +#endif + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + +else $as_nop + as_fn_error $? "We need simgrid >= 3.31 for MC" "$LINENO" 5 +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + fi +fi + if test x$enable_simgrid_mc = xyes; then + STARPU_SIMGRID_MC_TRUE= + STARPU_SIMGRID_MC_FALSE='#' +else + STARPU_SIMGRID_MC_TRUE='#' + STARPU_SIMGRID_MC_FALSE= +fi + + if test x$enable_simgrid = xyes; then + STARPU_SIMGRID_TRUE= + STARPU_SIMGRID_FALSE='#' +else + STARPU_SIMGRID_TRUE='#' + STARPU_SIMGRID_FALSE= +fi + + + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether SimGrid is enabled" >&5 +printf %s "checking whether SimGrid is enabled... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_simgrid" >&5 +printf "%s\n" "$enable_simgrid" >&6; } + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether blocking drivers should be enabled" >&5 +printf %s "checking whether blocking drivers should be enabled... " >&6; } +# Check whether --enable-blocking-drivers was given. +if test ${enable_blocking_drivers+y} +then : + enableval=$enable_blocking_drivers; enable_blocking=$enableval +else $as_nop + enable_blocking=$enable_simgrid +fi + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_blocking" >&5 +printf "%s\n" "$enable_blocking" >&6; } + +if test x$enable_blocking = xno ; then + if test x$enable_simgrid = xyes ; then + as_fn_error $? "--disable-blocking-drivers cannot be used in simgrid mode" "$LINENO" 5 + fi + +printf "%s\n" "#define STARPU_NON_BLOCKING_DRIVERS 1" >>confdefs.h + +fi + +if test x$enable_blocking = xyes ; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether worker callbacks should be enabled" >&5 +printf %s "checking whether worker callbacks should be enabled... " >&6; } + # Check whether --enable-worker-callbacks was given. +if test ${enable_worker_callbacks+y} +then : + enableval=$enable_worker_callbacks; enable_worker_cb=$enableval +else $as_nop + enable_worker_cb=no +fi + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_worker_cb" >&5 +printf "%s\n" "$enable_worker_cb" >&6; } +else + # worker sleep/wake-up callbacks only make sense if blocking drivers are enabled + enable_worker_cb=no +fi + +if test x$enable_worker_cb = xyes ; then + +printf "%s\n" "#define STARPU_WORKER_CALLBACKS 1" >>confdefs.h + +fi + +############################################################################### +# # +# LIBTOOLS # +# # +############################################################################### + +#c++11 detection + ax_cxx_compile_cxx11_required=false + ac_ext=cpp +ac_cpp='$CXXCPP $CPPFLAGS' +ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_cxx_compiler_gnu + ac_success=no + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether $CXX supports C++11 features by default" >&5 +printf %s "checking whether $CXX supports C++11 features by default... " >&6; } +if test ${ax_cv_cxx_compile_cxx11+y} +then : + printf %s "(cached) " >&6 +else $as_nop + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + +// If the compiler admits that it is not ready for C++11, why torture it? +// Hopefully, this will speed up the test. + +#ifndef __cplusplus + +#error "This is not a C++ compiler" + +#elif __cplusplus < 201103L + +#error "This is not a C++11 compiler" + +#else + +namespace cxx11 +{ + + namespace test_static_assert + { + + template + struct check + { + static_assert(sizeof(int) <= sizeof(T), "not big enough"); + }; + + } + + namespace test_final_override + { + + struct Base + { + virtual void f() {} + }; + + struct Derived : public Base + { + virtual void f() override {} + }; + + } + + namespace test_double_right_angle_brackets + { + + template < typename T > + struct check {}; + + typedef check single_type; + typedef check> double_type; + typedef check>> triple_type; + typedef check>>> quadruple_type; + + } + + namespace test_decltype + { + + int + f() + { + int a = 1; + decltype(a) b = 2; + return a + b; + } + + } + + namespace test_type_deduction + { + + template < typename T1, typename T2 > + struct is_same + { + static const bool value = false; + }; + + template < typename T > + struct is_same + { + static const bool value = true; + }; + + template < typename T1, typename T2 > + auto + add(T1 a1, T2 a2) -> decltype(a1 + a2) + { + return a1 + a2; + } + + int + test(const int c, volatile int v) + { + static_assert(is_same::value == true, ""); + static_assert(is_same::value == false, ""); + static_assert(is_same::value == false, ""); + auto ac = c; + auto av = v; + auto sumi = ac + av + 'x'; + auto sumf = ac + av + 1.0; + static_assert(is_same::value == true, ""); + static_assert(is_same::value == true, ""); + static_assert(is_same::value == true, ""); + static_assert(is_same::value == false, ""); + static_assert(is_same::value == true, ""); + return (sumf > 0.0) ? sumi : add(c, v); + } + + } + + namespace test_noexcept + { + + int f() { return 0; } + int g() noexcept { return 0; } + + static_assert(noexcept(f()) == false, ""); + static_assert(noexcept(g()) == true, ""); + + } + + namespace test_constexpr + { + + template < typename CharT > + unsigned long constexpr + strlen_c_r(const CharT *const s, const unsigned long acc) noexcept + { + return *s ? strlen_c_r(s + 1, acc + 1) : acc; + } + + template < typename CharT > + unsigned long constexpr + strlen_c(const CharT *const s) noexcept + { + return strlen_c_r(s, 0UL); + } + + static_assert(strlen_c("") == 0UL, ""); + static_assert(strlen_c("1") == 1UL, ""); + static_assert(strlen_c("example") == 7UL, ""); + static_assert(strlen_c("another\0example") == 7UL, ""); + + } + + namespace test_rvalue_references + { + + template < int N > + struct answer + { + static constexpr int value = N; + }; + + answer<1> f(int&) { return answer<1>(); } + answer<2> f(const int&) { return answer<2>(); } + answer<3> f(int&&) { return answer<3>(); } + + void + test() + { + int i = 0; + const int c = 0; + static_assert(decltype(f(i))::value == 1, ""); + static_assert(decltype(f(c))::value == 2, ""); + static_assert(decltype(f(0))::value == 3, ""); + } + + } + + namespace test_uniform_initialization + { + + struct test + { + static const int zero {}; + static const int one {1}; + }; + + static_assert(test::zero == 0, ""); + static_assert(test::one == 1, ""); + + } + + namespace test_lambdas + { + + void + test1() + { + auto lambda1 = [](){}; + auto lambda2 = lambda1; + lambda1(); + lambda2(); + } + + int + test2() + { + auto a = [](int i, int j){ return i + j; }(1, 2); + auto b = []() -> int { return '0'; }(); + auto c = [=](){ return a + b; }(); + auto d = [&](){ return c; }(); + auto e = [a, &b](int x) mutable { + const auto identity = [](int y){ return y; }; + for (auto i = 0; i < a; ++i) + a += b--; + return x + identity(a + b); + }(0); + return a + b + c + d + e; + } + + int + test3() + { + const auto nullary = [](){ return 0; }; + const auto unary = [](int x){ return x; }; + using nullary_t = decltype(nullary); + using unary_t = decltype(unary); + const auto higher1st = [](nullary_t f){ return f(); }; + const auto higher2nd = [unary](nullary_t f1){ + return [unary, f1](unary_t f2){ return f2(unary(f1())); }; + }; + return higher1st(nullary) + higher2nd(nullary)(unary); + } + + } + + namespace test_variadic_templates + { + + template + struct sum; + + template + struct sum + { + static constexpr auto value = N0 + sum::value; + }; + + template <> + struct sum<> + { + static constexpr auto value = 0; + }; + + static_assert(sum<>::value == 0, ""); + static_assert(sum<1>::value == 1, ""); + static_assert(sum<23>::value == 23, ""); + static_assert(sum<1, 2>::value == 3, ""); + static_assert(sum<5, 5, 11>::value == 21, ""); + static_assert(sum<2, 3, 5, 7, 11, 13>::value == 41, ""); + + } + + // http://stackoverflow.com/questions/13728184/template-aliases-and-sfinae + // Clang 3.1 fails with headers of libstd++ 4.8.3 when using std::function + // because of this. + namespace test_template_alias_sfinae + { + + struct foo {}; + + template + using member = typename T::member_type; + + template + void func(...) {} + + template + void func(member*) {} + + void test(); + + void test() { func(0); } + + } + +} // namespace cxx11 + +#endif // __cplusplus >= 201103L + + + +_ACEOF +if ac_fn_cxx_try_compile "$LINENO" +then : + ax_cv_cxx_compile_cxx11=yes +else $as_nop + ax_cv_cxx_compile_cxx11=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_cxx_compile_cxx11" >&5 +printf "%s\n" "$ax_cv_cxx_compile_cxx11" >&6; } + if test x$ax_cv_cxx_compile_cxx11 = xyes; then + ac_success=yes + fi + + + + if test x$ac_success = xno; then + for switch in -std=c++11 -std=c++0x +std=c++11 "-h std=c++11"; do + cachevar=`printf "%s\n" "ax_cv_cxx_compile_cxx11_$switch" | $as_tr_sh` + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether $CXX supports C++11 features with $switch" >&5 +printf %s "checking whether $CXX supports C++11 features with $switch... " >&6; } +if eval test \${$cachevar+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_save_CXX="$CXX" + CXX="$CXX $switch" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + +// If the compiler admits that it is not ready for C++11, why torture it? +// Hopefully, this will speed up the test. + +#ifndef __cplusplus + +#error "This is not a C++ compiler" + +#elif __cplusplus < 201103L + +#error "This is not a C++11 compiler" + +#else + +namespace cxx11 +{ + + namespace test_static_assert + { + + template + struct check + { + static_assert(sizeof(int) <= sizeof(T), "not big enough"); + }; + + } + + namespace test_final_override + { + + struct Base + { + virtual void f() {} + }; + + struct Derived : public Base + { + virtual void f() override {} + }; + + } + + namespace test_double_right_angle_brackets + { + + template < typename T > + struct check {}; + + typedef check single_type; + typedef check> double_type; + typedef check>> triple_type; + typedef check>>> quadruple_type; + + } + + namespace test_decltype + { + + int + f() + { + int a = 1; + decltype(a) b = 2; + return a + b; + } + + } + + namespace test_type_deduction + { + + template < typename T1, typename T2 > + struct is_same + { + static const bool value = false; + }; + + template < typename T > + struct is_same + { + static const bool value = true; + }; + + template < typename T1, typename T2 > + auto + add(T1 a1, T2 a2) -> decltype(a1 + a2) + { + return a1 + a2; + } + + int + test(const int c, volatile int v) + { + static_assert(is_same::value == true, ""); + static_assert(is_same::value == false, ""); + static_assert(is_same::value == false, ""); + auto ac = c; + auto av = v; + auto sumi = ac + av + 'x'; + auto sumf = ac + av + 1.0; + static_assert(is_same::value == true, ""); + static_assert(is_same::value == true, ""); + static_assert(is_same::value == true, ""); + static_assert(is_same::value == false, ""); + static_assert(is_same::value == true, ""); + return (sumf > 0.0) ? sumi : add(c, v); + } + + } + + namespace test_noexcept + { + + int f() { return 0; } + int g() noexcept { return 0; } + + static_assert(noexcept(f()) == false, ""); + static_assert(noexcept(g()) == true, ""); + + } + + namespace test_constexpr + { + + template < typename CharT > + unsigned long constexpr + strlen_c_r(const CharT *const s, const unsigned long acc) noexcept + { + return *s ? strlen_c_r(s + 1, acc + 1) : acc; + } + + template < typename CharT > + unsigned long constexpr + strlen_c(const CharT *const s) noexcept + { + return strlen_c_r(s, 0UL); + } + + static_assert(strlen_c("") == 0UL, ""); + static_assert(strlen_c("1") == 1UL, ""); + static_assert(strlen_c("example") == 7UL, ""); + static_assert(strlen_c("another\0example") == 7UL, ""); + + } + + namespace test_rvalue_references + { + + template < int N > + struct answer + { + static constexpr int value = N; + }; + + answer<1> f(int&) { return answer<1>(); } + answer<2> f(const int&) { return answer<2>(); } + answer<3> f(int&&) { return answer<3>(); } + + void + test() + { + int i = 0; + const int c = 0; + static_assert(decltype(f(i))::value == 1, ""); + static_assert(decltype(f(c))::value == 2, ""); + static_assert(decltype(f(0))::value == 3, ""); + } + + } + + namespace test_uniform_initialization + { + + struct test + { + static const int zero {}; + static const int one {1}; + }; + + static_assert(test::zero == 0, ""); + static_assert(test::one == 1, ""); + + } + + namespace test_lambdas + { + + void + test1() + { + auto lambda1 = [](){}; + auto lambda2 = lambda1; + lambda1(); + lambda2(); + } + + int + test2() + { + auto a = [](int i, int j){ return i + j; }(1, 2); + auto b = []() -> int { return '0'; }(); + auto c = [=](){ return a + b; }(); + auto d = [&](){ return c; }(); + auto e = [a, &b](int x) mutable { + const auto identity = [](int y){ return y; }; + for (auto i = 0; i < a; ++i) + a += b--; + return x + identity(a + b); + }(0); + return a + b + c + d + e; + } + + int + test3() + { + const auto nullary = [](){ return 0; }; + const auto unary = [](int x){ return x; }; + using nullary_t = decltype(nullary); + using unary_t = decltype(unary); + const auto higher1st = [](nullary_t f){ return f(); }; + const auto higher2nd = [unary](nullary_t f1){ + return [unary, f1](unary_t f2){ return f2(unary(f1())); }; + }; + return higher1st(nullary) + higher2nd(nullary)(unary); + } + + } + + namespace test_variadic_templates + { + + template + struct sum; + + template + struct sum + { + static constexpr auto value = N0 + sum::value; + }; + + template <> + struct sum<> + { + static constexpr auto value = 0; + }; + + static_assert(sum<>::value == 0, ""); + static_assert(sum<1>::value == 1, ""); + static_assert(sum<23>::value == 23, ""); + static_assert(sum<1, 2>::value == 3, ""); + static_assert(sum<5, 5, 11>::value == 21, ""); + static_assert(sum<2, 3, 5, 7, 11, 13>::value == 41, ""); + + } + + // http://stackoverflow.com/questions/13728184/template-aliases-and-sfinae + // Clang 3.1 fails with headers of libstd++ 4.8.3 when using std::function + // because of this. + namespace test_template_alias_sfinae + { + + struct foo {}; + + template + using member = typename T::member_type; + + template + void func(...) {} + + template + void func(member*) {} + + void test(); + + void test() { func(0); } + + } + +} // namespace cxx11 + +#endif // __cplusplus >= 201103L + + + +_ACEOF +if ac_fn_cxx_try_compile "$LINENO" +then : + eval $cachevar=yes +else $as_nop + eval $cachevar=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CXX="$ac_save_CXX" +fi +eval ac_res=\$$cachevar + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +printf "%s\n" "$ac_res" >&6; } + if eval test x\$$cachevar = xyes; then + CXX="$CXX $switch" + if test -n "$CXXCPP" ; then + CXXCPP="$CXXCPP $switch" + fi + ac_success=yes + break + fi + done + fi + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + if test x$ax_cxx_compile_cxx11_required = xtrue; then + if test x$ac_success = xno; then + as_fn_error $? "*** A compiler with support for C++11 language features is required." "$LINENO" 5 + fi + fi + if test x$ac_success = xno; then + HAVE_CXX11=0 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: No compiler with C++11 support was found" >&5 +printf "%s\n" "$as_me: No compiler with C++11 support was found" >&6;} + else + HAVE_CXX11=1 + +printf "%s\n" "#define HAVE_CXX11 1" >>confdefs.h + + fi + + + +STARPU_HAVE_CXX11=$HAVE_CXX11 + + if test "$HAVE_CXX11" -eq 1; then + STARPU_HAVE_CXX11_TRUE= + STARPU_HAVE_CXX11_FALSE='#' +else + STARPU_HAVE_CXX11_TRUE='#' + STARPU_HAVE_CXX11_FALSE= +fi + +if test $HAVE_CXX11 -eq 1; then + +printf "%s\n" "#define STARPU_HAVE_CXX11 1" >>confdefs.h + +fi + + +case `pwd` in + *\ * | *\ *) + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: Libtool does not cope well with whitespace in \`pwd\`" >&5 +printf "%s\n" "$as_me: WARNING: Libtool does not cope well with whitespace in \`pwd\`" >&2;} ;; +esac + + + +macro_version='2.4.7' +macro_revision='2.4.7' + + + + + + + + + + + + + + +ltmain=$ac_aux_dir/ltmain.sh + +# Backslashify metacharacters that are still active within +# double-quoted strings. +sed_quote_subst='s/\(["`$\\]\)/\\\1/g' + +# Same as above, but do not quote variable references. +double_quote_subst='s/\(["`\\]\)/\\\1/g' + +# Sed substitution to delay expansion of an escaped shell variable in a +# double_quote_subst'ed string. +delay_variable_subst='s/\\\\\\\\\\\$/\\\\\\$/g' + +# Sed substitution to delay expansion of an escaped single quote. +delay_single_quote_subst='s/'\''/'\'\\\\\\\'\''/g' + +# Sed substitution to avoid accidental globbing in evaled expressions +no_glob_subst='s/\*/\\\*/g' + +ECHO='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' +ECHO=$ECHO$ECHO$ECHO$ECHO$ECHO +ECHO=$ECHO$ECHO$ECHO$ECHO$ECHO$ECHO + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking how to print strings" >&5 +printf %s "checking how to print strings... " >&6; } +# Test print first, because it will be a builtin if present. +if test "X`( print -r -- -n ) 2>/dev/null`" = X-n && \ + test "X`print -r -- $ECHO 2>/dev/null`" = "X$ECHO"; then + ECHO='print -r --' +elif test "X`printf %s $ECHO 2>/dev/null`" = "X$ECHO"; then + ECHO='printf %s\n' +else + # Use this function as a fallback that always works. + func_fallback_echo () + { + eval 'cat <<_LTECHO_EOF +$1 +_LTECHO_EOF' + } + ECHO='func_fallback_echo' +fi + +# func_echo_all arg... +# Invoke $ECHO with all args, space-separated. +func_echo_all () +{ + $ECHO "" +} + +case $ECHO in + printf*) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: printf" >&5 +printf "%s\n" "printf" >&6; } ;; + print*) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: print -r" >&5 +printf "%s\n" "print -r" >&6; } ;; + *) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: cat" >&5 +printf "%s\n" "cat" >&6; } ;; +esac + + + + + + + + + + + + + + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for a sed that does not truncate output" >&5 +printf %s "checking for a sed that does not truncate output... " >&6; } +if test ${ac_cv_path_SED+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_script=s/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb/ + for ac_i in 1 2 3 4 5 6 7; do + ac_script="$ac_script$as_nl$ac_script" + done + echo "$ac_script" 2>/dev/null | sed 99q >conftest.sed + { ac_script=; unset ac_script;} + if test -z "$SED"; then + ac_path_SED_found=false + # Loop through the user's path and test for each of PROGNAME-LIST + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_prog in sed gsed + do + for ac_exec_ext in '' $ac_executable_extensions; do + ac_path_SED="$as_dir$ac_prog$ac_exec_ext" + as_fn_executable_p "$ac_path_SED" || continue +# Check for GNU ac_path_SED and select it if it is found. + # Check for GNU $ac_path_SED +case `"$ac_path_SED" --version 2>&1` in +*GNU*) + ac_cv_path_SED="$ac_path_SED" ac_path_SED_found=:;; +*) + ac_count=0 + printf %s 0123456789 >"conftest.in" + while : + do + cat "conftest.in" "conftest.in" >"conftest.tmp" + mv "conftest.tmp" "conftest.in" + cp "conftest.in" "conftest.nl" + printf "%s\n" '' >> "conftest.nl" + "$ac_path_SED" -f conftest.sed < "conftest.nl" >"conftest.out" 2>/dev/null || break + diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break + as_fn_arith $ac_count + 1 && ac_count=$as_val + if test $ac_count -gt ${ac_path_SED_max-0}; then + # Best one so far, save it but keep looking for a better one + ac_cv_path_SED="$ac_path_SED" + ac_path_SED_max=$ac_count + fi + # 10*(2^10) chars as input seems more than enough + test $ac_count -gt 10 && break + done + rm -f conftest.in conftest.tmp conftest.nl conftest.out;; +esac + + $ac_path_SED_found && break 3 + done + done + done +IFS=$as_save_IFS + if test -z "$ac_cv_path_SED"; then + as_fn_error $? "no acceptable sed could be found in \$PATH" "$LINENO" 5 + fi +else + ac_cv_path_SED=$SED +fi + +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_SED" >&5 +printf "%s\n" "$ac_cv_path_SED" >&6; } + SED="$ac_cv_path_SED" + rm -f conftest.sed + +test -z "$SED" && SED=sed +Xsed="$SED -e 1s/^X//" + + + + + + + + + + + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for fgrep" >&5 +printf %s "checking for fgrep... " >&6; } +if test ${ac_cv_path_FGREP+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if echo 'ab*c' | $GREP -F 'ab*c' >/dev/null 2>&1 + then ac_cv_path_FGREP="$GREP -F" + else + if test -z "$FGREP"; then + ac_path_FGREP_found=false + # Loop through the user's path and test for each of PROGNAME-LIST + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_prog in fgrep + do + for ac_exec_ext in '' $ac_executable_extensions; do + ac_path_FGREP="$as_dir$ac_prog$ac_exec_ext" + as_fn_executable_p "$ac_path_FGREP" || continue +# Check for GNU ac_path_FGREP and select it if it is found. + # Check for GNU $ac_path_FGREP +case `"$ac_path_FGREP" --version 2>&1` in +*GNU*) + ac_cv_path_FGREP="$ac_path_FGREP" ac_path_FGREP_found=:;; +*) + ac_count=0 + printf %s 0123456789 >"conftest.in" + while : + do + cat "conftest.in" "conftest.in" >"conftest.tmp" + mv "conftest.tmp" "conftest.in" + cp "conftest.in" "conftest.nl" + printf "%s\n" 'FGREP' >> "conftest.nl" + "$ac_path_FGREP" FGREP < "conftest.nl" >"conftest.out" 2>/dev/null || break + diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break + as_fn_arith $ac_count + 1 && ac_count=$as_val + if test $ac_count -gt ${ac_path_FGREP_max-0}; then + # Best one so far, save it but keep looking for a better one + ac_cv_path_FGREP="$ac_path_FGREP" + ac_path_FGREP_max=$ac_count + fi + # 10*(2^10) chars as input seems more than enough + test $ac_count -gt 10 && break + done + rm -f conftest.in conftest.tmp conftest.nl conftest.out;; +esac + + $ac_path_FGREP_found && break 3 + done + done + done +IFS=$as_save_IFS + if test -z "$ac_cv_path_FGREP"; then + as_fn_error $? "no acceptable fgrep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5 + fi +else + ac_cv_path_FGREP=$FGREP +fi + + fi +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_FGREP" >&5 +printf "%s\n" "$ac_cv_path_FGREP" >&6; } + FGREP="$ac_cv_path_FGREP" + + +test -z "$GREP" && GREP=grep + + + + + + + + + + + + + + + + + + + +# Check whether --with-gnu-ld was given. +if test ${with_gnu_ld+y} +then : + withval=$with_gnu_ld; test no = "$withval" || with_gnu_ld=yes +else $as_nop + with_gnu_ld=no +fi + +ac_prog=ld +if test yes = "$GCC"; then + # Check if gcc -print-prog-name=ld gives a path. + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for ld used by $CC" >&5 +printf %s "checking for ld used by $CC... " >&6; } + case $host in + *-*-mingw*) + # gcc leaves a trailing carriage return, which upsets mingw + ac_prog=`($CC -print-prog-name=ld) 2>&5 | tr -d '\015'` ;; + *) + ac_prog=`($CC -print-prog-name=ld) 2>&5` ;; + esac + case $ac_prog in + # Accept absolute paths. + [\\/]* | ?:[\\/]*) + re_direlt='/[^/][^/]*/\.\./' + # Canonicalize the pathname of ld + ac_prog=`$ECHO "$ac_prog"| $SED 's%\\\\%/%g'` + while $ECHO "$ac_prog" | $GREP "$re_direlt" > /dev/null 2>&1; do + ac_prog=`$ECHO $ac_prog| $SED "s%$re_direlt%/%"` + done + test -z "$LD" && LD=$ac_prog + ;; + "") + # If it fails, then pretend we aren't using GCC. + ac_prog=ld + ;; + *) + # If it is relative, then search for the first ld in PATH. + with_gnu_ld=unknown + ;; + esac +elif test yes = "$with_gnu_ld"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for GNU ld" >&5 +printf %s "checking for GNU ld... " >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for non-GNU ld" >&5 +printf %s "checking for non-GNU ld... " >&6; } +fi +if test ${lt_cv_path_LD+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -z "$LD"; then + lt_save_ifs=$IFS; IFS=$PATH_SEPARATOR + for ac_dir in $PATH; do + IFS=$lt_save_ifs + test -z "$ac_dir" && ac_dir=. + if test -f "$ac_dir/$ac_prog" || test -f "$ac_dir/$ac_prog$ac_exeext"; then + lt_cv_path_LD=$ac_dir/$ac_prog + # Check to see if the program is GNU ld. I'd rather use --version, + # but apparently some variants of GNU ld only accept -v. + # Break only if it was the GNU/non-GNU ld that we prefer. + case `"$lt_cv_path_LD" -v 2>&1 &5 +printf "%s\n" "$LD" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi +test -z "$LD" && as_fn_error $? "no acceptable ld found in \$PATH" "$LINENO" 5 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if the linker ($LD) is GNU ld" >&5 +printf %s "checking if the linker ($LD) is GNU ld... " >&6; } +if test ${lt_cv_prog_gnu_ld+y} +then : + printf %s "(cached) " >&6 +else $as_nop + # I'd rather use --version here, but apparently some GNU lds only accept -v. +case `$LD -v 2>&1 &5 +printf "%s\n" "$lt_cv_prog_gnu_ld" >&6; } +with_gnu_ld=$lt_cv_prog_gnu_ld + + + + + + + + + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for BSD- or MS-compatible name lister (nm)" >&5 +printf %s "checking for BSD- or MS-compatible name lister (nm)... " >&6; } +if test ${lt_cv_path_NM+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$NM"; then + # Let the user override the test. + lt_cv_path_NM=$NM +else + lt_nm_to_check=${ac_tool_prefix}nm + if test -n "$ac_tool_prefix" && test "$build" = "$host"; then + lt_nm_to_check="$lt_nm_to_check nm" + fi + for lt_tmp_nm in $lt_nm_to_check; do + lt_save_ifs=$IFS; IFS=$PATH_SEPARATOR + for ac_dir in $PATH /usr/ccs/bin/elf /usr/ccs/bin /usr/ucb /bin; do + IFS=$lt_save_ifs + test -z "$ac_dir" && ac_dir=. + tmp_nm=$ac_dir/$lt_tmp_nm + if test -f "$tmp_nm" || test -f "$tmp_nm$ac_exeext"; then + # Check to see if the nm accepts a BSD-compat flag. + # Adding the 'sed 1q' prevents false positives on HP-UX, which says: + # nm: unknown option "B" ignored + # Tru64's nm complains that /dev/null is an invalid object file + # MSYS converts /dev/null to NUL, MinGW nm treats NUL as empty + case $build_os in + mingw*) lt_bad_file=conftest.nm/nofile ;; + *) lt_bad_file=/dev/null ;; + esac + case `"$tmp_nm" -B $lt_bad_file 2>&1 | $SED '1q'` in + *$lt_bad_file* | *'Invalid file or object type'*) + lt_cv_path_NM="$tmp_nm -B" + break 2 + ;; + *) + case `"$tmp_nm" -p /dev/null 2>&1 | $SED '1q'` in + */dev/null*) + lt_cv_path_NM="$tmp_nm -p" + break 2 + ;; + *) + lt_cv_path_NM=${lt_cv_path_NM="$tmp_nm"} # keep the first match, but + continue # so that we can try to find one that supports BSD flags + ;; + esac + ;; + esac + fi + done + IFS=$lt_save_ifs + done + : ${lt_cv_path_NM=no} +fi +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_path_NM" >&5 +printf "%s\n" "$lt_cv_path_NM" >&6; } +if test no != "$lt_cv_path_NM"; then + NM=$lt_cv_path_NM +else + # Didn't find any BSD compatible name lister, look for dumpbin. + if test -n "$DUMPBIN"; then : + # Let the user override the test. + else + if test -n "$ac_tool_prefix"; then + for ac_prog in dumpbin "link -dump" + do + # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. +set dummy $ac_tool_prefix$ac_prog; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_DUMPBIN+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$DUMPBIN"; then + ac_cv_prog_DUMPBIN="$DUMPBIN" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_DUMPBIN="$ac_tool_prefix$ac_prog" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +DUMPBIN=$ac_cv_prog_DUMPBIN +if test -n "$DUMPBIN"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $DUMPBIN" >&5 +printf "%s\n" "$DUMPBIN" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + + test -n "$DUMPBIN" && break + done +fi +if test -z "$DUMPBIN"; then + ac_ct_DUMPBIN=$DUMPBIN + for ac_prog in dumpbin "link -dump" +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ac_ct_DUMPBIN+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$ac_ct_DUMPBIN"; then + ac_cv_prog_ac_ct_DUMPBIN="$ac_ct_DUMPBIN" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_DUMPBIN="$ac_prog" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_DUMPBIN=$ac_cv_prog_ac_ct_DUMPBIN +if test -n "$ac_ct_DUMPBIN"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_DUMPBIN" >&5 +printf "%s\n" "$ac_ct_DUMPBIN" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + + test -n "$ac_ct_DUMPBIN" && break +done + + if test "x$ac_ct_DUMPBIN" = x; then + DUMPBIN=":" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + DUMPBIN=$ac_ct_DUMPBIN + fi +fi + + case `$DUMPBIN -symbols -headers /dev/null 2>&1 | $SED '1q'` in + *COFF*) + DUMPBIN="$DUMPBIN -symbols -headers" + ;; + *) + DUMPBIN=: + ;; + esac + fi + + if test : != "$DUMPBIN"; then + NM=$DUMPBIN + fi +fi +test -z "$NM" && NM=nm + + + + + + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking the name lister ($NM) interface" >&5 +printf %s "checking the name lister ($NM) interface... " >&6; } +if test ${lt_cv_nm_interface+y} +then : + printf %s "(cached) " >&6 +else $as_nop + lt_cv_nm_interface="BSD nm" + echo "int some_variable = 0;" > conftest.$ac_ext + (eval echo "\"\$as_me:$LINENO: $ac_compile\"" >&5) + (eval "$ac_compile" 2>conftest.err) + cat conftest.err >&5 + (eval echo "\"\$as_me:$LINENO: $NM \\\"conftest.$ac_objext\\\"\"" >&5) + (eval "$NM \"conftest.$ac_objext\"" 2>conftest.err > conftest.out) + cat conftest.err >&5 + (eval echo "\"\$as_me:$LINENO: output\"" >&5) + cat conftest.out >&5 + if $GREP 'External.*some_variable' conftest.out > /dev/null; then + lt_cv_nm_interface="MS dumpbin" + fi + rm -f conftest* +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_nm_interface" >&5 +printf "%s\n" "$lt_cv_nm_interface" >&6; } + +# find the maximum length of command line arguments +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking the maximum length of command line arguments" >&5 +printf %s "checking the maximum length of command line arguments... " >&6; } +if test ${lt_cv_sys_max_cmd_len+y} +then : + printf %s "(cached) " >&6 +else $as_nop + i=0 + teststring=ABCD + + case $build_os in + msdosdjgpp*) + # On DJGPP, this test can blow up pretty badly due to problems in libc + # (any single argument exceeding 2000 bytes causes a buffer overrun + # during glob expansion). Even if it were fixed, the result of this + # check would be larger than it should be. + lt_cv_sys_max_cmd_len=12288; # 12K is about right + ;; + + gnu*) + # Under GNU Hurd, this test is not required because there is + # no limit to the length of command line arguments. + # Libtool will interpret -1 as no limit whatsoever + lt_cv_sys_max_cmd_len=-1; + ;; + + cygwin* | mingw* | cegcc*) + # On Win9x/ME, this test blows up -- it succeeds, but takes + # about 5 minutes as the teststring grows exponentially. + # Worse, since 9x/ME are not pre-emptively multitasking, + # you end up with a "frozen" computer, even though with patience + # the test eventually succeeds (with a max line length of 256k). + # Instead, let's just punt: use the minimum linelength reported by + # all of the supported platforms: 8192 (on NT/2K/XP). + lt_cv_sys_max_cmd_len=8192; + ;; + + mint*) + # On MiNT this can take a long time and run out of memory. + lt_cv_sys_max_cmd_len=8192; + ;; + + amigaos*) + # On AmigaOS with pdksh, this test takes hours, literally. + # So we just punt and use a minimum line length of 8192. + lt_cv_sys_max_cmd_len=8192; + ;; + + bitrig* | darwin* | dragonfly* | freebsd* | midnightbsd* | netbsd* | openbsd*) + # This has been around since 386BSD, at least. Likely further. + if test -x /sbin/sysctl; then + lt_cv_sys_max_cmd_len=`/sbin/sysctl -n kern.argmax` + elif test -x /usr/sbin/sysctl; then + lt_cv_sys_max_cmd_len=`/usr/sbin/sysctl -n kern.argmax` + else + lt_cv_sys_max_cmd_len=65536 # usable default for all BSDs + fi + # And add a safety zone + lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 4` + lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \* 3` + ;; + + interix*) + # We know the value 262144 and hardcode it with a safety zone (like BSD) + lt_cv_sys_max_cmd_len=196608 + ;; + + os2*) + # The test takes a long time on OS/2. + lt_cv_sys_max_cmd_len=8192 + ;; + + osf*) + # Dr. Hans Ekkehard Plesser reports seeing a kernel panic running configure + # due to this test when exec_disable_arg_limit is 1 on Tru64. It is not + # nice to cause kernel panics so lets avoid the loop below. + # First set a reasonable default. + lt_cv_sys_max_cmd_len=16384 + # + if test -x /sbin/sysconfig; then + case `/sbin/sysconfig -q proc exec_disable_arg_limit` in + *1*) lt_cv_sys_max_cmd_len=-1 ;; + esac + fi + ;; + sco3.2v5*) + lt_cv_sys_max_cmd_len=102400 + ;; + sysv5* | sco5v6* | sysv4.2uw2*) + kargmax=`grep ARG_MAX /etc/conf/cf.d/stune 2>/dev/null` + if test -n "$kargmax"; then + lt_cv_sys_max_cmd_len=`echo $kargmax | $SED 's/.*[ ]//'` + else + lt_cv_sys_max_cmd_len=32768 + fi + ;; + *) + lt_cv_sys_max_cmd_len=`(getconf ARG_MAX) 2> /dev/null` + if test -n "$lt_cv_sys_max_cmd_len" && \ + test undefined != "$lt_cv_sys_max_cmd_len"; then + lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 4` + lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \* 3` + else + # Make teststring a little bigger before we do anything with it. + # a 1K string should be a reasonable start. + for i in 1 2 3 4 5 6 7 8; do + teststring=$teststring$teststring + done + SHELL=${SHELL-${CONFIG_SHELL-/bin/sh}} + # If test is not a shell built-in, we'll probably end up computing a + # maximum length that is only half of the actual maximum length, but + # we can't tell. + while { test X`env echo "$teststring$teststring" 2>/dev/null` \ + = "X$teststring$teststring"; } >/dev/null 2>&1 && + test 17 != "$i" # 1/2 MB should be enough + do + i=`expr $i + 1` + teststring=$teststring$teststring + done + # Only check the string length outside the loop. + lt_cv_sys_max_cmd_len=`expr "X$teststring" : ".*" 2>&1` + teststring= + # Add a significant safety factor because C++ compilers can tack on + # massive amounts of additional arguments before passing them to the + # linker. It appears as though 1/2 is a usable value. + lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 2` + fi + ;; + esac + +fi + +if test -n "$lt_cv_sys_max_cmd_len"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_sys_max_cmd_len" >&5 +printf "%s\n" "$lt_cv_sys_max_cmd_len" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: none" >&5 +printf "%s\n" "none" >&6; } +fi +max_cmd_len=$lt_cv_sys_max_cmd_len + + + + + + +: ${CP="cp -f"} +: ${MV="mv -f"} +: ${RM="rm -f"} + +if ( (MAIL=60; unset MAIL) || exit) >/dev/null 2>&1; then + lt_unset=unset +else + lt_unset=false +fi + + + + + +# test EBCDIC or ASCII +case `echo X|tr X '\101'` in + A) # ASCII based system + # \n is not interpreted correctly by Solaris 8 /usr/ucb/tr + lt_SP2NL='tr \040 \012' + lt_NL2SP='tr \015\012 \040\040' + ;; + *) # EBCDIC based system + lt_SP2NL='tr \100 \n' + lt_NL2SP='tr \r\n \100\100' + ;; +esac + + + + + + + + + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking how to convert $build file names to $host format" >&5 +printf %s "checking how to convert $build file names to $host format... " >&6; } +if test ${lt_cv_to_host_file_cmd+y} +then : + printf %s "(cached) " >&6 +else $as_nop + case $host in + *-*-mingw* ) + case $build in + *-*-mingw* ) # actually msys + lt_cv_to_host_file_cmd=func_convert_file_msys_to_w32 + ;; + *-*-cygwin* ) + lt_cv_to_host_file_cmd=func_convert_file_cygwin_to_w32 + ;; + * ) # otherwise, assume *nix + lt_cv_to_host_file_cmd=func_convert_file_nix_to_w32 + ;; + esac + ;; + *-*-cygwin* ) + case $build in + *-*-mingw* ) # actually msys + lt_cv_to_host_file_cmd=func_convert_file_msys_to_cygwin + ;; + *-*-cygwin* ) + lt_cv_to_host_file_cmd=func_convert_file_noop + ;; + * ) # otherwise, assume *nix + lt_cv_to_host_file_cmd=func_convert_file_nix_to_cygwin + ;; + esac + ;; + * ) # unhandled hosts (and "normal" native builds) + lt_cv_to_host_file_cmd=func_convert_file_noop + ;; +esac + +fi + +to_host_file_cmd=$lt_cv_to_host_file_cmd +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_to_host_file_cmd" >&5 +printf "%s\n" "$lt_cv_to_host_file_cmd" >&6; } + + + + + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking how to convert $build file names to toolchain format" >&5 +printf %s "checking how to convert $build file names to toolchain format... " >&6; } +if test ${lt_cv_to_tool_file_cmd+y} +then : + printf %s "(cached) " >&6 +else $as_nop + #assume ordinary cross tools, or native build. +lt_cv_to_tool_file_cmd=func_convert_file_noop +case $host in + *-*-mingw* ) + case $build in + *-*-mingw* ) # actually msys + lt_cv_to_tool_file_cmd=func_convert_file_msys_to_w32 + ;; + esac + ;; +esac + +fi + +to_tool_file_cmd=$lt_cv_to_tool_file_cmd +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_to_tool_file_cmd" >&5 +printf "%s\n" "$lt_cv_to_tool_file_cmd" >&6; } + + + + + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $LD option to reload object files" >&5 +printf %s "checking for $LD option to reload object files... " >&6; } +if test ${lt_cv_ld_reload_flag+y} +then : + printf %s "(cached) " >&6 +else $as_nop + lt_cv_ld_reload_flag='-r' +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_ld_reload_flag" >&5 +printf "%s\n" "$lt_cv_ld_reload_flag" >&6; } +reload_flag=$lt_cv_ld_reload_flag +case $reload_flag in +"" | " "*) ;; +*) reload_flag=" $reload_flag" ;; +esac +reload_cmds='$LD$reload_flag -o $output$reload_objs' +case $host_os in + cygwin* | mingw* | pw32* | cegcc*) + if test yes != "$GCC"; then + reload_cmds=false + fi + ;; + darwin*) + if test yes = "$GCC"; then + reload_cmds='$LTCC $LTCFLAGS -nostdlib $wl-r -o $output$reload_objs' + else + reload_cmds='$LD$reload_flag -o $output$reload_objs' + fi + ;; +esac + + + + + + + + + +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}file", so it can be a program name with args. +set dummy ${ac_tool_prefix}file; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_FILECMD+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$FILECMD"; then + ac_cv_prog_FILECMD="$FILECMD" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_FILECMD="${ac_tool_prefix}file" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +FILECMD=$ac_cv_prog_FILECMD +if test -n "$FILECMD"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $FILECMD" >&5 +printf "%s\n" "$FILECMD" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_FILECMD"; then + ac_ct_FILECMD=$FILECMD + # Extract the first word of "file", so it can be a program name with args. +set dummy file; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ac_ct_FILECMD+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$ac_ct_FILECMD"; then + ac_cv_prog_ac_ct_FILECMD="$ac_ct_FILECMD" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_FILECMD="file" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_FILECMD=$ac_cv_prog_ac_ct_FILECMD +if test -n "$ac_ct_FILECMD"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_FILECMD" >&5 +printf "%s\n" "$ac_ct_FILECMD" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + if test "x$ac_ct_FILECMD" = x; then + FILECMD=":" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + FILECMD=$ac_ct_FILECMD + fi +else + FILECMD="$ac_cv_prog_FILECMD" +fi + + + + + + + +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}objdump", so it can be a program name with args. +set dummy ${ac_tool_prefix}objdump; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_OBJDUMP+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$OBJDUMP"; then + ac_cv_prog_OBJDUMP="$OBJDUMP" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_OBJDUMP="${ac_tool_prefix}objdump" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +OBJDUMP=$ac_cv_prog_OBJDUMP +if test -n "$OBJDUMP"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $OBJDUMP" >&5 +printf "%s\n" "$OBJDUMP" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_OBJDUMP"; then + ac_ct_OBJDUMP=$OBJDUMP + # Extract the first word of "objdump", so it can be a program name with args. +set dummy objdump; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ac_ct_OBJDUMP+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$ac_ct_OBJDUMP"; then + ac_cv_prog_ac_ct_OBJDUMP="$ac_ct_OBJDUMP" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_OBJDUMP="objdump" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_OBJDUMP=$ac_cv_prog_ac_ct_OBJDUMP +if test -n "$ac_ct_OBJDUMP"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_OBJDUMP" >&5 +printf "%s\n" "$ac_ct_OBJDUMP" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + if test "x$ac_ct_OBJDUMP" = x; then + OBJDUMP="false" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + OBJDUMP=$ac_ct_OBJDUMP + fi +else + OBJDUMP="$ac_cv_prog_OBJDUMP" +fi + +test -z "$OBJDUMP" && OBJDUMP=objdump + + + + + + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking how to recognize dependent libraries" >&5 +printf %s "checking how to recognize dependent libraries... " >&6; } +if test ${lt_cv_deplibs_check_method+y} +then : + printf %s "(cached) " >&6 +else $as_nop + lt_cv_file_magic_cmd='$MAGIC_CMD' +lt_cv_file_magic_test_file= +lt_cv_deplibs_check_method='unknown' +# Need to set the preceding variable on all platforms that support +# interlibrary dependencies. +# 'none' -- dependencies not supported. +# 'unknown' -- same as none, but documents that we really don't know. +# 'pass_all' -- all dependencies passed with no checks. +# 'test_compile' -- check by making test program. +# 'file_magic [[regex]]' -- check by looking for files in library path +# that responds to the $file_magic_cmd with a given extended regex. +# If you have 'file' or equivalent on your system and you're not sure +# whether 'pass_all' will *always* work, you probably want this one. + +case $host_os in +aix[4-9]*) + lt_cv_deplibs_check_method=pass_all + ;; + +beos*) + lt_cv_deplibs_check_method=pass_all + ;; + +bsdi[45]*) + lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [ML]SB (shared object|dynamic lib)' + lt_cv_file_magic_cmd='$FILECMD -L' + lt_cv_file_magic_test_file=/shlib/libc.so + ;; + +cygwin*) + # func_win32_libid is a shell function defined in ltmain.sh + lt_cv_deplibs_check_method='file_magic ^x86 archive import|^x86 DLL' + lt_cv_file_magic_cmd='func_win32_libid' + ;; + +mingw* | pw32*) + # Base MSYS/MinGW do not provide the 'file' command needed by + # func_win32_libid shell function, so use a weaker test based on 'objdump', + # unless we find 'file', for example because we are cross-compiling. + if ( file / ) >/dev/null 2>&1; then + lt_cv_deplibs_check_method='file_magic ^x86 archive import|^x86 DLL' + lt_cv_file_magic_cmd='func_win32_libid' + else + # Keep this pattern in sync with the one in func_win32_libid. + lt_cv_deplibs_check_method='file_magic file format (pei*-i386(.*architecture: i386)?|pe-arm-wince|pe-x86-64)' + lt_cv_file_magic_cmd='$OBJDUMP -f' + fi + ;; + +cegcc*) + # use the weaker test based on 'objdump'. See mingw*. + lt_cv_deplibs_check_method='file_magic file format pe-arm-.*little(.*architecture: arm)?' + lt_cv_file_magic_cmd='$OBJDUMP -f' + ;; + +darwin* | rhapsody*) + lt_cv_deplibs_check_method=pass_all + ;; + +freebsd* | dragonfly* | midnightbsd*) + if echo __ELF__ | $CC -E - | $GREP __ELF__ > /dev/null; then + case $host_cpu in + i*86 ) + # Not sure whether the presence of OpenBSD here was a mistake. + # Let's accept both of them until this is cleared up. + lt_cv_deplibs_check_method='file_magic (FreeBSD|OpenBSD|DragonFly)/i[3-9]86 (compact )?demand paged shared library' + lt_cv_file_magic_cmd=$FILECMD + lt_cv_file_magic_test_file=`echo /usr/lib/libc.so.*` + ;; + esac + else + lt_cv_deplibs_check_method=pass_all + fi + ;; + +haiku*) + lt_cv_deplibs_check_method=pass_all + ;; + +hpux10.20* | hpux11*) + lt_cv_file_magic_cmd=$FILECMD + case $host_cpu in + ia64*) + lt_cv_deplibs_check_method='file_magic (s[0-9][0-9][0-9]|ELF-[0-9][0-9]) shared object file - IA64' + lt_cv_file_magic_test_file=/usr/lib/hpux32/libc.so + ;; + hppa*64*) + lt_cv_deplibs_check_method='file_magic (s[0-9][0-9][0-9]|ELF[ -][0-9][0-9])(-bit)?( [LM]SB)? shared object( file)?[, -]* PA-RISC [0-9]\.[0-9]' + lt_cv_file_magic_test_file=/usr/lib/pa20_64/libc.sl + ;; + *) + lt_cv_deplibs_check_method='file_magic (s[0-9][0-9][0-9]|PA-RISC[0-9]\.[0-9]) shared library' + lt_cv_file_magic_test_file=/usr/lib/libc.sl + ;; + esac + ;; + +interix[3-9]*) + # PIC code is broken on Interix 3.x, that's why |\.a not |_pic\.a here + lt_cv_deplibs_check_method='match_pattern /lib[^/]+(\.so|\.a)$' + ;; + +irix5* | irix6* | nonstopux*) + case $LD in + *-32|*"-32 ") libmagic=32-bit;; + *-n32|*"-n32 ") libmagic=N32;; + *-64|*"-64 ") libmagic=64-bit;; + *) libmagic=never-match;; + esac + lt_cv_deplibs_check_method=pass_all + ;; + +# This must be glibc/ELF. +linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*) + lt_cv_deplibs_check_method=pass_all + ;; + +netbsd* | netbsdelf*-gnu) + if echo __ELF__ | $CC -E - | $GREP __ELF__ > /dev/null; then + lt_cv_deplibs_check_method='match_pattern /lib[^/]+(\.so\.[0-9]+\.[0-9]+|_pic\.a)$' + else + lt_cv_deplibs_check_method='match_pattern /lib[^/]+(\.so|_pic\.a)$' + fi + ;; + +newos6*) + lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [ML]SB (executable|dynamic lib)' + lt_cv_file_magic_cmd=$FILECMD + lt_cv_file_magic_test_file=/usr/lib/libnls.so + ;; + +*nto* | *qnx*) + lt_cv_deplibs_check_method=pass_all + ;; + +openbsd* | bitrig*) + if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`"; then + lt_cv_deplibs_check_method='match_pattern /lib[^/]+(\.so\.[0-9]+\.[0-9]+|\.so|_pic\.a)$' + else + lt_cv_deplibs_check_method='match_pattern /lib[^/]+(\.so\.[0-9]+\.[0-9]+|_pic\.a)$' + fi + ;; + +osf3* | osf4* | osf5*) + lt_cv_deplibs_check_method=pass_all + ;; + +rdos*) + lt_cv_deplibs_check_method=pass_all + ;; + +solaris*) + lt_cv_deplibs_check_method=pass_all + ;; + +sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*) + lt_cv_deplibs_check_method=pass_all + ;; + +sysv4 | sysv4.3*) + case $host_vendor in + motorola) + lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [ML]SB (shared object|dynamic lib) M[0-9][0-9]* Version [0-9]' + lt_cv_file_magic_test_file=`echo /usr/lib/libc.so*` + ;; + ncr) + lt_cv_deplibs_check_method=pass_all + ;; + sequent) + lt_cv_file_magic_cmd='/bin/file' + lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [LM]SB (shared object|dynamic lib )' + ;; + sni) + lt_cv_file_magic_cmd='/bin/file' + lt_cv_deplibs_check_method="file_magic ELF [0-9][0-9]*-bit [LM]SB dynamic lib" + lt_cv_file_magic_test_file=/lib/libc.so + ;; + siemens) + lt_cv_deplibs_check_method=pass_all + ;; + pc) + lt_cv_deplibs_check_method=pass_all + ;; + esac + ;; + +tpf*) + lt_cv_deplibs_check_method=pass_all + ;; +os2*) + lt_cv_deplibs_check_method=pass_all + ;; +esac + +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_deplibs_check_method" >&5 +printf "%s\n" "$lt_cv_deplibs_check_method" >&6; } + +file_magic_glob= +want_nocaseglob=no +if test "$build" = "$host"; then + case $host_os in + mingw* | pw32*) + if ( shopt | grep nocaseglob ) >/dev/null 2>&1; then + want_nocaseglob=yes + else + file_magic_glob=`echo aAbBcCdDeEfFgGhHiIjJkKlLmMnNoOpPqQrRsStTuUvVwWxXyYzZ | $SED -e "s/\(..\)/s\/[\1]\/[\1]\/g;/g"` + fi + ;; + esac +fi + +file_magic_cmd=$lt_cv_file_magic_cmd +deplibs_check_method=$lt_cv_deplibs_check_method +test -z "$deplibs_check_method" && deplibs_check_method=unknown + + + + + + + + + + + + + + + + + + + + + + +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}dlltool", so it can be a program name with args. +set dummy ${ac_tool_prefix}dlltool; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_DLLTOOL+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$DLLTOOL"; then + ac_cv_prog_DLLTOOL="$DLLTOOL" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_DLLTOOL="${ac_tool_prefix}dlltool" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +DLLTOOL=$ac_cv_prog_DLLTOOL +if test -n "$DLLTOOL"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $DLLTOOL" >&5 +printf "%s\n" "$DLLTOOL" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_DLLTOOL"; then + ac_ct_DLLTOOL=$DLLTOOL + # Extract the first word of "dlltool", so it can be a program name with args. +set dummy dlltool; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ac_ct_DLLTOOL+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$ac_ct_DLLTOOL"; then + ac_cv_prog_ac_ct_DLLTOOL="$ac_ct_DLLTOOL" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_DLLTOOL="dlltool" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_DLLTOOL=$ac_cv_prog_ac_ct_DLLTOOL +if test -n "$ac_ct_DLLTOOL"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_DLLTOOL" >&5 +printf "%s\n" "$ac_ct_DLLTOOL" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + if test "x$ac_ct_DLLTOOL" = x; then + DLLTOOL="false" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + DLLTOOL=$ac_ct_DLLTOOL + fi +else + DLLTOOL="$ac_cv_prog_DLLTOOL" +fi + +test -z "$DLLTOOL" && DLLTOOL=dlltool + + + + + + + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking how to associate runtime and link libraries" >&5 +printf %s "checking how to associate runtime and link libraries... " >&6; } +if test ${lt_cv_sharedlib_from_linklib_cmd+y} +then : + printf %s "(cached) " >&6 +else $as_nop + lt_cv_sharedlib_from_linklib_cmd='unknown' + +case $host_os in +cygwin* | mingw* | pw32* | cegcc*) + # two different shell functions defined in ltmain.sh; + # decide which one to use based on capabilities of $DLLTOOL + case `$DLLTOOL --help 2>&1` in + *--identify-strict*) + lt_cv_sharedlib_from_linklib_cmd=func_cygming_dll_for_implib + ;; + *) + lt_cv_sharedlib_from_linklib_cmd=func_cygming_dll_for_implib_fallback + ;; + esac + ;; +*) + # fallback: assume linklib IS sharedlib + lt_cv_sharedlib_from_linklib_cmd=$ECHO + ;; +esac + +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_sharedlib_from_linklib_cmd" >&5 +printf "%s\n" "$lt_cv_sharedlib_from_linklib_cmd" >&6; } +sharedlib_from_linklib_cmd=$lt_cv_sharedlib_from_linklib_cmd +test -z "$sharedlib_from_linklib_cmd" && sharedlib_from_linklib_cmd=$ECHO + + + + + + + +if test -n "$ac_tool_prefix"; then + for ac_prog in ar + do + # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. +set dummy $ac_tool_prefix$ac_prog; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_AR+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$AR"; then + ac_cv_prog_AR="$AR" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_AR="$ac_tool_prefix$ac_prog" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +AR=$ac_cv_prog_AR +if test -n "$AR"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $AR" >&5 +printf "%s\n" "$AR" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + + test -n "$AR" && break + done +fi +if test -z "$AR"; then + ac_ct_AR=$AR + for ac_prog in ar +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ac_ct_AR+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$ac_ct_AR"; then + ac_cv_prog_ac_ct_AR="$ac_ct_AR" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_AR="$ac_prog" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_AR=$ac_cv_prog_ac_ct_AR +if test -n "$ac_ct_AR"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_AR" >&5 +printf "%s\n" "$ac_ct_AR" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + + test -n "$ac_ct_AR" && break +done + + if test "x$ac_ct_AR" = x; then + AR="false" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + AR=$ac_ct_AR + fi +fi + +: ${AR=ar} + + + + + + +# Use ARFLAGS variable as AR's operation code to sync the variable naming with +# Automake. If both AR_FLAGS and ARFLAGS are specified, AR_FLAGS should have +# higher priority because thats what people were doing historically (setting +# ARFLAGS for automake and AR_FLAGS for libtool). FIXME: Make the AR_FLAGS +# variable obsoleted/removed. + +test ${AR_FLAGS+y} || AR_FLAGS=${ARFLAGS-cr} +lt_ar_flags=$AR_FLAGS + + + + + + +# Make AR_FLAGS overridable by 'make ARFLAGS='. Don't try to run-time override +# by AR_FLAGS because that was never working and AR_FLAGS is about to die. + + + + + + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for archiver @FILE support" >&5 +printf %s "checking for archiver @FILE support... " >&6; } +if test ${lt_cv_ar_at_file+y} +then : + printf %s "(cached) " >&6 +else $as_nop + lt_cv_ar_at_file=no + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + echo conftest.$ac_objext > conftest.lst + lt_ar_try='$AR $AR_FLAGS libconftest.a @conftest.lst >&5' + { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$lt_ar_try\""; } >&5 + (eval $lt_ar_try) 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } + if test 0 -eq "$ac_status"; then + # Ensure the archiver fails upon bogus file names. + rm -f conftest.$ac_objext libconftest.a + { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$lt_ar_try\""; } >&5 + (eval $lt_ar_try) 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } + if test 0 -ne "$ac_status"; then + lt_cv_ar_at_file=@ + fi + fi + rm -f conftest.* libconftest.a + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_ar_at_file" >&5 +printf "%s\n" "$lt_cv_ar_at_file" >&6; } + +if test no = "$lt_cv_ar_at_file"; then + archiver_list_spec= +else + archiver_list_spec=$lt_cv_ar_at_file +fi + + + + + + + +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}strip", so it can be a program name with args. +set dummy ${ac_tool_prefix}strip; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_STRIP+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$STRIP"; then + ac_cv_prog_STRIP="$STRIP" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_STRIP="${ac_tool_prefix}strip" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +STRIP=$ac_cv_prog_STRIP +if test -n "$STRIP"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $STRIP" >&5 +printf "%s\n" "$STRIP" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_STRIP"; then + ac_ct_STRIP=$STRIP + # Extract the first word of "strip", so it can be a program name with args. +set dummy strip; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ac_ct_STRIP+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$ac_ct_STRIP"; then + ac_cv_prog_ac_ct_STRIP="$ac_ct_STRIP" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_STRIP="strip" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_STRIP=$ac_cv_prog_ac_ct_STRIP +if test -n "$ac_ct_STRIP"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_STRIP" >&5 +printf "%s\n" "$ac_ct_STRIP" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + if test "x$ac_ct_STRIP" = x; then + STRIP=":" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + STRIP=$ac_ct_STRIP + fi +else + STRIP="$ac_cv_prog_STRIP" +fi + +test -z "$STRIP" && STRIP=: + + + + + + +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}ranlib", so it can be a program name with args. +set dummy ${ac_tool_prefix}ranlib; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_RANLIB+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$RANLIB"; then + ac_cv_prog_RANLIB="$RANLIB" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_RANLIB="${ac_tool_prefix}ranlib" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +RANLIB=$ac_cv_prog_RANLIB +if test -n "$RANLIB"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $RANLIB" >&5 +printf "%s\n" "$RANLIB" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_RANLIB"; then + ac_ct_RANLIB=$RANLIB + # Extract the first word of "ranlib", so it can be a program name with args. +set dummy ranlib; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ac_ct_RANLIB+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$ac_ct_RANLIB"; then + ac_cv_prog_ac_ct_RANLIB="$ac_ct_RANLIB" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_RANLIB="ranlib" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_RANLIB=$ac_cv_prog_ac_ct_RANLIB +if test -n "$ac_ct_RANLIB"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_RANLIB" >&5 +printf "%s\n" "$ac_ct_RANLIB" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + if test "x$ac_ct_RANLIB" = x; then + RANLIB=":" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + RANLIB=$ac_ct_RANLIB + fi +else + RANLIB="$ac_cv_prog_RANLIB" +fi + +test -z "$RANLIB" && RANLIB=: + + + + + + +# Determine commands to create old-style static archives. +old_archive_cmds='$AR $AR_FLAGS $oldlib$oldobjs' +old_postinstall_cmds='chmod 644 $oldlib' +old_postuninstall_cmds= + +if test -n "$RANLIB"; then + case $host_os in + bitrig* | openbsd*) + old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB -t \$tool_oldlib" + ;; + *) + old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB \$tool_oldlib" + ;; + esac + old_archive_cmds="$old_archive_cmds~\$RANLIB \$tool_oldlib" +fi + +case $host_os in + darwin*) + lock_old_archive_extraction=yes ;; + *) + lock_old_archive_extraction=no ;; +esac + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +# If no C compiler was specified, use CC. +LTCC=${LTCC-"$CC"} + +# If no C compiler flags were specified, use CFLAGS. +LTCFLAGS=${LTCFLAGS-"$CFLAGS"} + +# Allow CC to be a program name with arguments. +compiler=$CC + + +# Check for command to grab the raw symbol name followed by C symbol from nm. +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking command to parse $NM output from $compiler object" >&5 +printf %s "checking command to parse $NM output from $compiler object... " >&6; } +if test ${lt_cv_sys_global_symbol_pipe+y} +then : + printf %s "(cached) " >&6 +else $as_nop + +# These are sane defaults that work on at least a few old systems. +# [They come from Ultrix. What could be older than Ultrix?!! ;)] + +# Character class describing NM global symbol codes. +symcode='[BCDEGRST]' + +# Regexp to match symbols that can be accessed directly from C. +sympat='\([_A-Za-z][_A-Za-z0-9]*\)' + +# Define system-specific variables. +case $host_os in +aix*) + symcode='[BCDT]' + ;; +cygwin* | mingw* | pw32* | cegcc*) + symcode='[ABCDGISTW]' + ;; +hpux*) + if test ia64 = "$host_cpu"; then + symcode='[ABCDEGRST]' + fi + ;; +irix* | nonstopux*) + symcode='[BCDEGRST]' + ;; +osf*) + symcode='[BCDEGQRST]' + ;; +solaris*) + symcode='[BDRT]' + ;; +sco3.2v5*) + symcode='[DT]' + ;; +sysv4.2uw2*) + symcode='[DT]' + ;; +sysv5* | sco5v6* | unixware* | OpenUNIX*) + symcode='[ABDT]' + ;; +sysv4) + symcode='[DFNSTU]' + ;; +esac + +# If we're using GNU nm, then use its standard symbol codes. +case `$NM -V 2>&1` in +*GNU* | *'with BFD'*) + symcode='[ABCDGIRSTW]' ;; +esac + +if test "$lt_cv_nm_interface" = "MS dumpbin"; then + # Gets list of data symbols to import. + lt_cv_sys_global_symbol_to_import="$SED -n -e 's/^I .* \(.*\)$/\1/p'" + # Adjust the below global symbol transforms to fixup imported variables. + lt_cdecl_hook=" -e 's/^I .* \(.*\)$/extern __declspec(dllimport) char \1;/p'" + lt_c_name_hook=" -e 's/^I .* \(.*\)$/ {\"\1\", (void *) 0},/p'" + lt_c_name_lib_hook="\ + -e 's/^I .* \(lib.*\)$/ {\"\1\", (void *) 0},/p'\ + -e 's/^I .* \(.*\)$/ {\"lib\1\", (void *) 0},/p'" +else + # Disable hooks by default. + lt_cv_sys_global_symbol_to_import= + lt_cdecl_hook= + lt_c_name_hook= + lt_c_name_lib_hook= +fi + +# Transform an extracted symbol line into a proper C declaration. +# Some systems (esp. on ia64) link data and code symbols differently, +# so use this general approach. +lt_cv_sys_global_symbol_to_cdecl="$SED -n"\ +$lt_cdecl_hook\ +" -e 's/^T .* \(.*\)$/extern int \1();/p'"\ +" -e 's/^$symcode$symcode* .* \(.*\)$/extern char \1;/p'" + +# Transform an extracted symbol line into symbol name and symbol address +lt_cv_sys_global_symbol_to_c_name_address="$SED -n"\ +$lt_c_name_hook\ +" -e 's/^: \(.*\) .*$/ {\"\1\", (void *) 0},/p'"\ +" -e 's/^$symcode$symcode* .* \(.*\)$/ {\"\1\", (void *) \&\1},/p'" + +# Transform an extracted symbol line into symbol name with lib prefix and +# symbol address. +lt_cv_sys_global_symbol_to_c_name_address_lib_prefix="$SED -n"\ +$lt_c_name_lib_hook\ +" -e 's/^: \(.*\) .*$/ {\"\1\", (void *) 0},/p'"\ +" -e 's/^$symcode$symcode* .* \(lib.*\)$/ {\"\1\", (void *) \&\1},/p'"\ +" -e 's/^$symcode$symcode* .* \(.*\)$/ {\"lib\1\", (void *) \&\1},/p'" + +# Handle CRLF in mingw tool chain +opt_cr= +case $build_os in +mingw*) + opt_cr=`$ECHO 'x\{0,1\}' | tr x '\015'` # option cr in regexp + ;; +esac + +# Try without a prefix underscore, then with it. +for ac_symprfx in "" "_"; do + + # Transform symcode, sympat, and symprfx into a raw symbol and a C symbol. + symxfrm="\\1 $ac_symprfx\\2 \\2" + + # Write the raw and C identifiers. + if test "$lt_cv_nm_interface" = "MS dumpbin"; then + # Fake it for dumpbin and say T for any non-static function, + # D for any global variable and I for any imported variable. + # Also find C++ and __fastcall symbols from MSVC++ or ICC, + # which start with @ or ?. + lt_cv_sys_global_symbol_pipe="$AWK '"\ +" {last_section=section; section=\$ 3};"\ +" /^COFF SYMBOL TABLE/{for(i in hide) delete hide[i]};"\ +" /Section length .*#relocs.*(pick any)/{hide[last_section]=1};"\ +" /^ *Symbol name *: /{split(\$ 0,sn,\":\"); si=substr(sn[2],2)};"\ +" /^ *Type *: code/{print \"T\",si,substr(si,length(prfx))};"\ +" /^ *Type *: data/{print \"I\",si,substr(si,length(prfx))};"\ +" \$ 0!~/External *\|/{next};"\ +" / 0+ UNDEF /{next}; / UNDEF \([^|]\)*()/{next};"\ +" {if(hide[section]) next};"\ +" {f=\"D\"}; \$ 0~/\(\).*\|/{f=\"T\"};"\ +" {split(\$ 0,a,/\||\r/); split(a[2],s)};"\ +" s[1]~/^[@?]/{print f,s[1],s[1]; next};"\ +" s[1]~prfx {split(s[1],t,\"@\"); print f,t[1],substr(t[1],length(prfx))}"\ +" ' prfx=^$ac_symprfx" + else + lt_cv_sys_global_symbol_pipe="$SED -n -e 's/^.*[ ]\($symcode$symcode*\)[ ][ ]*$ac_symprfx$sympat$opt_cr$/$symxfrm/p'" + fi + lt_cv_sys_global_symbol_pipe="$lt_cv_sys_global_symbol_pipe | $SED '/ __gnu_lto/d'" + + # Check to see that the pipe works correctly. + pipe_works=no + + rm -f conftest* + cat > conftest.$ac_ext <<_LT_EOF +#ifdef __cplusplus +extern "C" { +#endif +char nm_test_var; +void nm_test_func(void); +void nm_test_func(void){} +#ifdef __cplusplus +} +#endif +int main(){nm_test_var='a';nm_test_func();return(0);} +_LT_EOF + + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + # Now try to grab the symbols. + nlist=conftest.nm + $ECHO "$as_me:$LINENO: $NM conftest.$ac_objext | $lt_cv_sys_global_symbol_pipe > $nlist" >&5 + if eval "$NM" conftest.$ac_objext \| "$lt_cv_sys_global_symbol_pipe" \> $nlist 2>&5 && test -s "$nlist"; then + # Try sorting and uniquifying the output. + if sort "$nlist" | uniq > "$nlist"T; then + mv -f "$nlist"T "$nlist" + else + rm -f "$nlist"T + fi + + # Make sure that we snagged all the symbols we need. + if $GREP ' nm_test_var$' "$nlist" >/dev/null; then + if $GREP ' nm_test_func$' "$nlist" >/dev/null; then + cat <<_LT_EOF > conftest.$ac_ext +/* Keep this code in sync between libtool.m4, ltmain, lt_system.h, and tests. */ +#if defined _WIN32 || defined __CYGWIN__ || defined _WIN32_WCE +/* DATA imports from DLLs on WIN32 can't be const, because runtime + relocations are performed -- see ld's documentation on pseudo-relocs. */ +# define LT_DLSYM_CONST +#elif defined __osf__ +/* This system does not cope well with relocations in const data. */ +# define LT_DLSYM_CONST +#else +# define LT_DLSYM_CONST const +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +_LT_EOF + # Now generate the symbol file. + eval "$lt_cv_sys_global_symbol_to_cdecl"' < "$nlist" | $GREP -v main >> conftest.$ac_ext' + + cat <<_LT_EOF >> conftest.$ac_ext + +/* The mapping between symbol names and symbols. */ +LT_DLSYM_CONST struct { + const char *name; + void *address; +} +lt__PROGRAM__LTX_preloaded_symbols[] = +{ + { "@PROGRAM@", (void *) 0 }, +_LT_EOF + $SED "s/^$symcode$symcode* .* \(.*\)$/ {\"\1\", (void *) \&\1},/" < "$nlist" | $GREP -v main >> conftest.$ac_ext + cat <<\_LT_EOF >> conftest.$ac_ext + {0, (void *) 0} +}; + +/* This works around a problem in FreeBSD linker */ +#ifdef FREEBSD_WORKAROUND +static const void *lt_preloaded_setup() { + return lt__PROGRAM__LTX_preloaded_symbols; +} +#endif + +#ifdef __cplusplus +} +#endif +_LT_EOF + # Now try linking the two files. + mv conftest.$ac_objext conftstm.$ac_objext + lt_globsym_save_LIBS=$LIBS + lt_globsym_save_CFLAGS=$CFLAGS + LIBS=conftstm.$ac_objext + CFLAGS="$CFLAGS$lt_prog_compiler_no_builtin_flag" + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_link\""; } >&5 + (eval $ac_link) 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && test -s conftest$ac_exeext; then + pipe_works=yes + fi + LIBS=$lt_globsym_save_LIBS + CFLAGS=$lt_globsym_save_CFLAGS + else + echo "cannot find nm_test_func in $nlist" >&5 + fi + else + echo "cannot find nm_test_var in $nlist" >&5 + fi + else + echo "cannot run $lt_cv_sys_global_symbol_pipe" >&5 + fi + else + echo "$progname: failed program was:" >&5 + cat conftest.$ac_ext >&5 + fi + rm -rf conftest* conftst* + + # Do not use the global_symbol_pipe unless it works. + if test yes = "$pipe_works"; then + break + else + lt_cv_sys_global_symbol_pipe= + fi +done + +fi + +if test -z "$lt_cv_sys_global_symbol_pipe"; then + lt_cv_sys_global_symbol_to_cdecl= +fi +if test -z "$lt_cv_sys_global_symbol_pipe$lt_cv_sys_global_symbol_to_cdecl"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: failed" >&5 +printf "%s\n" "failed" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: ok" >&5 +printf "%s\n" "ok" >&6; } +fi + +# Response file support. +if test "$lt_cv_nm_interface" = "MS dumpbin"; then + nm_file_list_spec='@' +elif $NM --help 2>/dev/null | grep '[@]FILE' >/dev/null; then + nm_file_list_spec='@' +fi + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for sysroot" >&5 +printf %s "checking for sysroot... " >&6; } + +# Check whether --with-sysroot was given. +if test ${with_sysroot+y} +then : + withval=$with_sysroot; +else $as_nop + with_sysroot=no +fi + + +lt_sysroot= +case $with_sysroot in #( + yes) + if test yes = "$GCC"; then + lt_sysroot=`$CC --print-sysroot 2>/dev/null` + fi + ;; #( + /*) + lt_sysroot=`echo "$with_sysroot" | $SED -e "$sed_quote_subst"` + ;; #( + no|'') + ;; #( + *) + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $with_sysroot" >&5 +printf "%s\n" "$with_sysroot" >&6; } + as_fn_error $? "The sysroot must be an absolute path." "$LINENO" 5 + ;; +esac + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: ${lt_sysroot:-no}" >&5 +printf "%s\n" "${lt_sysroot:-no}" >&6; } + + + + + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for a working dd" >&5 +printf %s "checking for a working dd... " >&6; } +if test ${ac_cv_path_lt_DD+y} +then : + printf %s "(cached) " >&6 +else $as_nop + printf 0123456789abcdef0123456789abcdef >conftest.i +cat conftest.i conftest.i >conftest2.i +: ${lt_DD:=$DD} +if test -z "$lt_DD"; then + ac_path_lt_DD_found=false + # Loop through the user's path and test for each of PROGNAME-LIST + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_prog in dd + do + for ac_exec_ext in '' $ac_executable_extensions; do + ac_path_lt_DD="$as_dir$ac_prog$ac_exec_ext" + as_fn_executable_p "$ac_path_lt_DD" || continue +if "$ac_path_lt_DD" bs=32 count=1 conftest.out 2>/dev/null; then + cmp -s conftest.i conftest.out \ + && ac_cv_path_lt_DD="$ac_path_lt_DD" ac_path_lt_DD_found=: +fi + $ac_path_lt_DD_found && break 3 + done + done + done +IFS=$as_save_IFS + if test -z "$ac_cv_path_lt_DD"; then + : + fi +else + ac_cv_path_lt_DD=$lt_DD +fi + +rm -f conftest.i conftest2.i conftest.out +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_lt_DD" >&5 +printf "%s\n" "$ac_cv_path_lt_DD" >&6; } + + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking how to truncate binary pipes" >&5 +printf %s "checking how to truncate binary pipes... " >&6; } +if test ${lt_cv_truncate_bin+y} +then : + printf %s "(cached) " >&6 +else $as_nop + printf 0123456789abcdef0123456789abcdef >conftest.i +cat conftest.i conftest.i >conftest2.i +lt_cv_truncate_bin= +if "$ac_cv_path_lt_DD" bs=32 count=1 conftest.out 2>/dev/null; then + cmp -s conftest.i conftest.out \ + && lt_cv_truncate_bin="$ac_cv_path_lt_DD bs=4096 count=1" +fi +rm -f conftest.i conftest2.i conftest.out +test -z "$lt_cv_truncate_bin" && lt_cv_truncate_bin="$SED -e 4q" +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_truncate_bin" >&5 +printf "%s\n" "$lt_cv_truncate_bin" >&6; } + + + + + + + +# Calculate cc_basename. Skip known compiler wrappers and cross-prefix. +func_cc_basename () +{ + for cc_temp in $*""; do + case $cc_temp in + compile | *[\\/]compile | ccache | *[\\/]ccache ) ;; + distcc | *[\\/]distcc | purify | *[\\/]purify ) ;; + \-*) ;; + *) break;; + esac + done + func_cc_basename_result=`$ECHO "$cc_temp" | $SED "s%.*/%%; s%^$host_alias-%%"` +} + +# Check whether --enable-libtool-lock was given. +if test ${enable_libtool_lock+y} +then : + enableval=$enable_libtool_lock; +fi + +test no = "$enable_libtool_lock" || enable_libtool_lock=yes + +# Some flags need to be propagated to the compiler or linker for good +# libtool support. +case $host in +ia64-*-hpux*) + # Find out what ABI is being produced by ac_compile, and set mode + # options accordingly. + echo 'int i;' > conftest.$ac_ext + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + case `$FILECMD conftest.$ac_objext` in + *ELF-32*) + HPUX_IA64_MODE=32 + ;; + *ELF-64*) + HPUX_IA64_MODE=64 + ;; + esac + fi + rm -rf conftest* + ;; +*-*-irix6*) + # Find out what ABI is being produced by ac_compile, and set linker + # options accordingly. + echo '#line '$LINENO' "configure"' > conftest.$ac_ext + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + if test yes = "$lt_cv_prog_gnu_ld"; then + case `$FILECMD conftest.$ac_objext` in + *32-bit*) + LD="${LD-ld} -melf32bsmip" + ;; + *N32*) + LD="${LD-ld} -melf32bmipn32" + ;; + *64-bit*) + LD="${LD-ld} -melf64bmip" + ;; + esac + else + case `$FILECMD conftest.$ac_objext` in + *32-bit*) + LD="${LD-ld} -32" + ;; + *N32*) + LD="${LD-ld} -n32" + ;; + *64-bit*) + LD="${LD-ld} -64" + ;; + esac + fi + fi + rm -rf conftest* + ;; + +mips64*-*linux*) + # Find out what ABI is being produced by ac_compile, and set linker + # options accordingly. + echo '#line '$LINENO' "configure"' > conftest.$ac_ext + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + emul=elf + case `$FILECMD conftest.$ac_objext` in + *32-bit*) + emul="${emul}32" + ;; + *64-bit*) + emul="${emul}64" + ;; + esac + case `$FILECMD conftest.$ac_objext` in + *MSB*) + emul="${emul}btsmip" + ;; + *LSB*) + emul="${emul}ltsmip" + ;; + esac + case `$FILECMD conftest.$ac_objext` in + *N32*) + emul="${emul}n32" + ;; + esac + LD="${LD-ld} -m $emul" + fi + rm -rf conftest* + ;; + +x86_64-*kfreebsd*-gnu|x86_64-*linux*|powerpc*-*linux*| \ +s390*-*linux*|s390*-*tpf*|sparc*-*linux*) + # Find out what ABI is being produced by ac_compile, and set linker + # options accordingly. Note that the listed cases only cover the + # situations where additional linker options are needed (such as when + # doing 32-bit compilation for a host where ld defaults to 64-bit, or + # vice versa); the common cases where no linker options are needed do + # not appear in the list. + echo 'int i;' > conftest.$ac_ext + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + case `$FILECMD conftest.o` in + *32-bit*) + case $host in + x86_64-*kfreebsd*-gnu) + LD="${LD-ld} -m elf_i386_fbsd" + ;; + x86_64-*linux*) + case `$FILECMD conftest.o` in + *x86-64*) + LD="${LD-ld} -m elf32_x86_64" + ;; + *) + LD="${LD-ld} -m elf_i386" + ;; + esac + ;; + powerpc64le-*linux*) + LD="${LD-ld} -m elf32lppclinux" + ;; + powerpc64-*linux*) + LD="${LD-ld} -m elf32ppclinux" + ;; + s390x-*linux*) + LD="${LD-ld} -m elf_s390" + ;; + sparc64-*linux*) + LD="${LD-ld} -m elf32_sparc" + ;; + esac + ;; + *64-bit*) + case $host in + x86_64-*kfreebsd*-gnu) + LD="${LD-ld} -m elf_x86_64_fbsd" + ;; + x86_64-*linux*) + LD="${LD-ld} -m elf_x86_64" + ;; + powerpcle-*linux*) + LD="${LD-ld} -m elf64lppc" + ;; + powerpc-*linux*) + LD="${LD-ld} -m elf64ppc" + ;; + s390*-*linux*|s390*-*tpf*) + LD="${LD-ld} -m elf64_s390" + ;; + sparc*-*linux*) + LD="${LD-ld} -m elf64_sparc" + ;; + esac + ;; + esac + fi + rm -rf conftest* + ;; + +*-*-sco3.2v5*) + # On SCO OpenServer 5, we need -belf to get full-featured binaries. + SAVE_CFLAGS=$CFLAGS + CFLAGS="$CFLAGS -belf" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the C compiler needs -belf" >&5 +printf %s "checking whether the C compiler needs -belf... " >&6; } +if test ${lt_cv_cc_needs_belf+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + lt_cv_cc_needs_belf=yes +else $as_nop + lt_cv_cc_needs_belf=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_cc_needs_belf" >&5 +printf "%s\n" "$lt_cv_cc_needs_belf" >&6; } + if test yes != "$lt_cv_cc_needs_belf"; then + # this is probably gcc 2.8.0, egcs 1.0 or newer; no need for -belf + CFLAGS=$SAVE_CFLAGS + fi + ;; +*-*solaris*) + # Find out what ABI is being produced by ac_compile, and set linker + # options accordingly. + echo 'int i;' > conftest.$ac_ext + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + case `$FILECMD conftest.o` in + *64-bit*) + case $lt_cv_prog_gnu_ld in + yes*) + case $host in + i?86-*-solaris*|x86_64-*-solaris*) + LD="${LD-ld} -m elf_x86_64" + ;; + sparc*-*-solaris*) + LD="${LD-ld} -m elf64_sparc" + ;; + esac + # GNU ld 2.21 introduced _sol2 emulations. Use them if available. + if ${LD-ld} -V | grep _sol2 >/dev/null 2>&1; then + LD=${LD-ld}_sol2 + fi + ;; + *) + if ${LD-ld} -64 -r -o conftest2.o conftest.o >/dev/null 2>&1; then + LD="${LD-ld} -64" + fi + ;; + esac + ;; + esac + fi + rm -rf conftest* + ;; +esac + +need_locks=$enable_libtool_lock + +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}mt", so it can be a program name with args. +set dummy ${ac_tool_prefix}mt; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_MANIFEST_TOOL+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$MANIFEST_TOOL"; then + ac_cv_prog_MANIFEST_TOOL="$MANIFEST_TOOL" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_MANIFEST_TOOL="${ac_tool_prefix}mt" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +MANIFEST_TOOL=$ac_cv_prog_MANIFEST_TOOL +if test -n "$MANIFEST_TOOL"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $MANIFEST_TOOL" >&5 +printf "%s\n" "$MANIFEST_TOOL" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_MANIFEST_TOOL"; then + ac_ct_MANIFEST_TOOL=$MANIFEST_TOOL + # Extract the first word of "mt", so it can be a program name with args. +set dummy mt; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ac_ct_MANIFEST_TOOL+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$ac_ct_MANIFEST_TOOL"; then + ac_cv_prog_ac_ct_MANIFEST_TOOL="$ac_ct_MANIFEST_TOOL" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_MANIFEST_TOOL="mt" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_MANIFEST_TOOL=$ac_cv_prog_ac_ct_MANIFEST_TOOL +if test -n "$ac_ct_MANIFEST_TOOL"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_MANIFEST_TOOL" >&5 +printf "%s\n" "$ac_ct_MANIFEST_TOOL" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + if test "x$ac_ct_MANIFEST_TOOL" = x; then + MANIFEST_TOOL=":" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + MANIFEST_TOOL=$ac_ct_MANIFEST_TOOL + fi +else + MANIFEST_TOOL="$ac_cv_prog_MANIFEST_TOOL" +fi + +test -z "$MANIFEST_TOOL" && MANIFEST_TOOL=mt +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if $MANIFEST_TOOL is a manifest tool" >&5 +printf %s "checking if $MANIFEST_TOOL is a manifest tool... " >&6; } +if test ${lt_cv_path_mainfest_tool+y} +then : + printf %s "(cached) " >&6 +else $as_nop + lt_cv_path_mainfest_tool=no + echo "$as_me:$LINENO: $MANIFEST_TOOL '-?'" >&5 + $MANIFEST_TOOL '-?' 2>conftest.err > conftest.out + cat conftest.err >&5 + if $GREP 'Manifest Tool' conftest.out > /dev/null; then + lt_cv_path_mainfest_tool=yes + fi + rm -f conftest* +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_path_mainfest_tool" >&5 +printf "%s\n" "$lt_cv_path_mainfest_tool" >&6; } +if test yes != "$lt_cv_path_mainfest_tool"; then + MANIFEST_TOOL=: +fi + + + + + + + case $host_os in + rhapsody* | darwin*) + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}dsymutil", so it can be a program name with args. +set dummy ${ac_tool_prefix}dsymutil; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_DSYMUTIL+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$DSYMUTIL"; then + ac_cv_prog_DSYMUTIL="$DSYMUTIL" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_DSYMUTIL="${ac_tool_prefix}dsymutil" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +DSYMUTIL=$ac_cv_prog_DSYMUTIL +if test -n "$DSYMUTIL"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $DSYMUTIL" >&5 +printf "%s\n" "$DSYMUTIL" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_DSYMUTIL"; then + ac_ct_DSYMUTIL=$DSYMUTIL + # Extract the first word of "dsymutil", so it can be a program name with args. +set dummy dsymutil; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ac_ct_DSYMUTIL+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$ac_ct_DSYMUTIL"; then + ac_cv_prog_ac_ct_DSYMUTIL="$ac_ct_DSYMUTIL" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_DSYMUTIL="dsymutil" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_DSYMUTIL=$ac_cv_prog_ac_ct_DSYMUTIL +if test -n "$ac_ct_DSYMUTIL"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_DSYMUTIL" >&5 +printf "%s\n" "$ac_ct_DSYMUTIL" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + if test "x$ac_ct_DSYMUTIL" = x; then + DSYMUTIL=":" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + DSYMUTIL=$ac_ct_DSYMUTIL + fi +else + DSYMUTIL="$ac_cv_prog_DSYMUTIL" +fi + + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}nmedit", so it can be a program name with args. +set dummy ${ac_tool_prefix}nmedit; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_NMEDIT+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$NMEDIT"; then + ac_cv_prog_NMEDIT="$NMEDIT" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_NMEDIT="${ac_tool_prefix}nmedit" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +NMEDIT=$ac_cv_prog_NMEDIT +if test -n "$NMEDIT"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $NMEDIT" >&5 +printf "%s\n" "$NMEDIT" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_NMEDIT"; then + ac_ct_NMEDIT=$NMEDIT + # Extract the first word of "nmedit", so it can be a program name with args. +set dummy nmedit; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ac_ct_NMEDIT+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$ac_ct_NMEDIT"; then + ac_cv_prog_ac_ct_NMEDIT="$ac_ct_NMEDIT" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_NMEDIT="nmedit" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_NMEDIT=$ac_cv_prog_ac_ct_NMEDIT +if test -n "$ac_ct_NMEDIT"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_NMEDIT" >&5 +printf "%s\n" "$ac_ct_NMEDIT" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + if test "x$ac_ct_NMEDIT" = x; then + NMEDIT=":" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + NMEDIT=$ac_ct_NMEDIT + fi +else + NMEDIT="$ac_cv_prog_NMEDIT" +fi + + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}lipo", so it can be a program name with args. +set dummy ${ac_tool_prefix}lipo; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_LIPO+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$LIPO"; then + ac_cv_prog_LIPO="$LIPO" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_LIPO="${ac_tool_prefix}lipo" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +LIPO=$ac_cv_prog_LIPO +if test -n "$LIPO"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $LIPO" >&5 +printf "%s\n" "$LIPO" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_LIPO"; then + ac_ct_LIPO=$LIPO + # Extract the first word of "lipo", so it can be a program name with args. +set dummy lipo; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ac_ct_LIPO+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$ac_ct_LIPO"; then + ac_cv_prog_ac_ct_LIPO="$ac_ct_LIPO" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_LIPO="lipo" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_LIPO=$ac_cv_prog_ac_ct_LIPO +if test -n "$ac_ct_LIPO"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_LIPO" >&5 +printf "%s\n" "$ac_ct_LIPO" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + if test "x$ac_ct_LIPO" = x; then + LIPO=":" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + LIPO=$ac_ct_LIPO + fi +else + LIPO="$ac_cv_prog_LIPO" +fi + + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}otool", so it can be a program name with args. +set dummy ${ac_tool_prefix}otool; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_OTOOL+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$OTOOL"; then + ac_cv_prog_OTOOL="$OTOOL" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_OTOOL="${ac_tool_prefix}otool" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +OTOOL=$ac_cv_prog_OTOOL +if test -n "$OTOOL"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $OTOOL" >&5 +printf "%s\n" "$OTOOL" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_OTOOL"; then + ac_ct_OTOOL=$OTOOL + # Extract the first word of "otool", so it can be a program name with args. +set dummy otool; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ac_ct_OTOOL+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$ac_ct_OTOOL"; then + ac_cv_prog_ac_ct_OTOOL="$ac_ct_OTOOL" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_OTOOL="otool" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_OTOOL=$ac_cv_prog_ac_ct_OTOOL +if test -n "$ac_ct_OTOOL"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_OTOOL" >&5 +printf "%s\n" "$ac_ct_OTOOL" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + if test "x$ac_ct_OTOOL" = x; then + OTOOL=":" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + OTOOL=$ac_ct_OTOOL + fi +else + OTOOL="$ac_cv_prog_OTOOL" +fi + + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}otool64", so it can be a program name with args. +set dummy ${ac_tool_prefix}otool64; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_OTOOL64+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$OTOOL64"; then + ac_cv_prog_OTOOL64="$OTOOL64" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_OTOOL64="${ac_tool_prefix}otool64" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +OTOOL64=$ac_cv_prog_OTOOL64 +if test -n "$OTOOL64"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $OTOOL64" >&5 +printf "%s\n" "$OTOOL64" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_OTOOL64"; then + ac_ct_OTOOL64=$OTOOL64 + # Extract the first word of "otool64", so it can be a program name with args. +set dummy otool64; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ac_ct_OTOOL64+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$ac_ct_OTOOL64"; then + ac_cv_prog_ac_ct_OTOOL64="$ac_ct_OTOOL64" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_OTOOL64="otool64" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_OTOOL64=$ac_cv_prog_ac_ct_OTOOL64 +if test -n "$ac_ct_OTOOL64"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_OTOOL64" >&5 +printf "%s\n" "$ac_ct_OTOOL64" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + if test "x$ac_ct_OTOOL64" = x; then + OTOOL64=":" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + OTOOL64=$ac_ct_OTOOL64 + fi +else + OTOOL64="$ac_cv_prog_OTOOL64" +fi + + + + + + + + + + + + + + + + + + + + + + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for -single_module linker flag" >&5 +printf %s "checking for -single_module linker flag... " >&6; } +if test ${lt_cv_apple_cc_single_mod+y} +then : + printf %s "(cached) " >&6 +else $as_nop + lt_cv_apple_cc_single_mod=no + if test -z "$LT_MULTI_MODULE"; then + # By default we will add the -single_module flag. You can override + # by either setting the environment variable LT_MULTI_MODULE + # non-empty at configure time, or by adding -multi_module to the + # link flags. + rm -rf libconftest.dylib* + echo "int foo(void){return 1;}" > conftest.c + echo "$LTCC $LTCFLAGS $LDFLAGS -o libconftest.dylib \ +-dynamiclib -Wl,-single_module conftest.c" >&5 + $LTCC $LTCFLAGS $LDFLAGS -o libconftest.dylib \ + -dynamiclib -Wl,-single_module conftest.c 2>conftest.err + _lt_result=$? + # If there is a non-empty error log, and "single_module" + # appears in it, assume the flag caused a linker warning + if test -s conftest.err && $GREP single_module conftest.err; then + cat conftest.err >&5 + # Otherwise, if the output was created with a 0 exit code from + # the compiler, it worked. + elif test -f libconftest.dylib && test 0 = "$_lt_result"; then + lt_cv_apple_cc_single_mod=yes + else + cat conftest.err >&5 + fi + rm -rf libconftest.dylib* + rm -f conftest.* + fi +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_apple_cc_single_mod" >&5 +printf "%s\n" "$lt_cv_apple_cc_single_mod" >&6; } + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for -exported_symbols_list linker flag" >&5 +printf %s "checking for -exported_symbols_list linker flag... " >&6; } +if test ${lt_cv_ld_exported_symbols_list+y} +then : + printf %s "(cached) " >&6 +else $as_nop + lt_cv_ld_exported_symbols_list=no + save_LDFLAGS=$LDFLAGS + echo "_main" > conftest.sym + LDFLAGS="$LDFLAGS -Wl,-exported_symbols_list,conftest.sym" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + lt_cv_ld_exported_symbols_list=yes +else $as_nop + lt_cv_ld_exported_symbols_list=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + LDFLAGS=$save_LDFLAGS + +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_ld_exported_symbols_list" >&5 +printf "%s\n" "$lt_cv_ld_exported_symbols_list" >&6; } + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for -force_load linker flag" >&5 +printf %s "checking for -force_load linker flag... " >&6; } +if test ${lt_cv_ld_force_load+y} +then : + printf %s "(cached) " >&6 +else $as_nop + lt_cv_ld_force_load=no + cat > conftest.c << _LT_EOF +int forced_loaded() { return 2;} +_LT_EOF + echo "$LTCC $LTCFLAGS -c -o conftest.o conftest.c" >&5 + $LTCC $LTCFLAGS -c -o conftest.o conftest.c 2>&5 + echo "$AR $AR_FLAGS libconftest.a conftest.o" >&5 + $AR $AR_FLAGS libconftest.a conftest.o 2>&5 + echo "$RANLIB libconftest.a" >&5 + $RANLIB libconftest.a 2>&5 + cat > conftest.c << _LT_EOF +int main() { return 0;} +_LT_EOF + echo "$LTCC $LTCFLAGS $LDFLAGS -o conftest conftest.c -Wl,-force_load,./libconftest.a" >&5 + $LTCC $LTCFLAGS $LDFLAGS -o conftest conftest.c -Wl,-force_load,./libconftest.a 2>conftest.err + _lt_result=$? + if test -s conftest.err && $GREP force_load conftest.err; then + cat conftest.err >&5 + elif test -f conftest && test 0 = "$_lt_result" && $GREP forced_load conftest >/dev/null 2>&1; then + lt_cv_ld_force_load=yes + else + cat conftest.err >&5 + fi + rm -f conftest.err libconftest.a conftest conftest.c + rm -rf conftest.dSYM + +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_ld_force_load" >&5 +printf "%s\n" "$lt_cv_ld_force_load" >&6; } + case $host_os in + rhapsody* | darwin1.[012]) + _lt_dar_allow_undefined='$wl-undefined ${wl}suppress' ;; + darwin1.*) + _lt_dar_allow_undefined='$wl-flat_namespace $wl-undefined ${wl}suppress' ;; + darwin*) + case $MACOSX_DEPLOYMENT_TARGET,$host in + 10.[012],*|,*powerpc*-darwin[5-8]*) + _lt_dar_allow_undefined='$wl-flat_namespace $wl-undefined ${wl}suppress' ;; + *) + _lt_dar_allow_undefined='$wl-undefined ${wl}dynamic_lookup' ;; + esac + ;; + esac + if test yes = "$lt_cv_apple_cc_single_mod"; then + _lt_dar_single_mod='$single_module' + fi + if test yes = "$lt_cv_ld_exported_symbols_list"; then + _lt_dar_export_syms=' $wl-exported_symbols_list,$output_objdir/$libname-symbols.expsym' + else + _lt_dar_export_syms='~$NMEDIT -s $output_objdir/$libname-symbols.expsym $lib' + fi + if test : != "$DSYMUTIL" && test no = "$lt_cv_ld_force_load"; then + _lt_dsymutil='~$DSYMUTIL $lib || :' + else + _lt_dsymutil= + fi + ;; + esac + +# func_munge_path_list VARIABLE PATH +# ----------------------------------- +# VARIABLE is name of variable containing _space_ separated list of +# directories to be munged by the contents of PATH, which is string +# having a format: +# "DIR[:DIR]:" +# string "DIR[ DIR]" will be prepended to VARIABLE +# ":DIR[:DIR]" +# string "DIR[ DIR]" will be appended to VARIABLE +# "DIRP[:DIRP]::[DIRA:]DIRA" +# string "DIRP[ DIRP]" will be prepended to VARIABLE and string +# "DIRA[ DIRA]" will be appended to VARIABLE +# "DIR[:DIR]" +# VARIABLE will be replaced by "DIR[ DIR]" +func_munge_path_list () +{ + case x$2 in + x) + ;; + *:) + eval $1=\"`$ECHO $2 | $SED 's/:/ /g'` \$$1\" + ;; + x:*) + eval $1=\"\$$1 `$ECHO $2 | $SED 's/:/ /g'`\" + ;; + *::*) + eval $1=\"\$$1\ `$ECHO $2 | $SED -e 's/.*:://' -e 's/:/ /g'`\" + eval $1=\"`$ECHO $2 | $SED -e 's/::.*//' -e 's/:/ /g'`\ \$$1\" + ;; + *) + eval $1=\"`$ECHO $2 | $SED 's/:/ /g'`\" + ;; + esac +} + +ac_fn_c_check_header_compile "$LINENO" "dlfcn.h" "ac_cv_header_dlfcn_h" "$ac_includes_default +" +if test "x$ac_cv_header_dlfcn_h" = xyes +then : + printf "%s\n" "#define HAVE_DLFCN_H 1" >>confdefs.h + +fi + + + +func_stripname_cnf () +{ + case $2 in + .*) func_stripname_result=`$ECHO "$3" | $SED "s%^$1%%; s%\\\\$2\$%%"`;; + *) func_stripname_result=`$ECHO "$3" | $SED "s%^$1%%; s%$2\$%%"`;; + esac +} # func_stripname_cnf + + + + + +# Set options +enable_win32_dll=yes + +case $host in +*-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-cegcc*) + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}as", so it can be a program name with args. +set dummy ${ac_tool_prefix}as; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_AS+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$AS"; then + ac_cv_prog_AS="$AS" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_AS="${ac_tool_prefix}as" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +AS=$ac_cv_prog_AS +if test -n "$AS"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $AS" >&5 +printf "%s\n" "$AS" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_AS"; then + ac_ct_AS=$AS + # Extract the first word of "as", so it can be a program name with args. +set dummy as; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ac_ct_AS+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$ac_ct_AS"; then + ac_cv_prog_ac_ct_AS="$ac_ct_AS" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_AS="as" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_AS=$ac_cv_prog_ac_ct_AS +if test -n "$ac_ct_AS"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_AS" >&5 +printf "%s\n" "$ac_ct_AS" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + if test "x$ac_ct_AS" = x; then + AS="false" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + AS=$ac_ct_AS + fi +else + AS="$ac_cv_prog_AS" +fi + + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}dlltool", so it can be a program name with args. +set dummy ${ac_tool_prefix}dlltool; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_DLLTOOL+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$DLLTOOL"; then + ac_cv_prog_DLLTOOL="$DLLTOOL" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_DLLTOOL="${ac_tool_prefix}dlltool" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +DLLTOOL=$ac_cv_prog_DLLTOOL +if test -n "$DLLTOOL"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $DLLTOOL" >&5 +printf "%s\n" "$DLLTOOL" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_DLLTOOL"; then + ac_ct_DLLTOOL=$DLLTOOL + # Extract the first word of "dlltool", so it can be a program name with args. +set dummy dlltool; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ac_ct_DLLTOOL+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$ac_ct_DLLTOOL"; then + ac_cv_prog_ac_ct_DLLTOOL="$ac_ct_DLLTOOL" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_DLLTOOL="dlltool" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_DLLTOOL=$ac_cv_prog_ac_ct_DLLTOOL +if test -n "$ac_ct_DLLTOOL"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_DLLTOOL" >&5 +printf "%s\n" "$ac_ct_DLLTOOL" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + if test "x$ac_ct_DLLTOOL" = x; then + DLLTOOL="false" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + DLLTOOL=$ac_ct_DLLTOOL + fi +else + DLLTOOL="$ac_cv_prog_DLLTOOL" +fi + + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}objdump", so it can be a program name with args. +set dummy ${ac_tool_prefix}objdump; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_OBJDUMP+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$OBJDUMP"; then + ac_cv_prog_OBJDUMP="$OBJDUMP" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_OBJDUMP="${ac_tool_prefix}objdump" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +OBJDUMP=$ac_cv_prog_OBJDUMP +if test -n "$OBJDUMP"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $OBJDUMP" >&5 +printf "%s\n" "$OBJDUMP" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_OBJDUMP"; then + ac_ct_OBJDUMP=$OBJDUMP + # Extract the first word of "objdump", so it can be a program name with args. +set dummy objdump; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ac_ct_OBJDUMP+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$ac_ct_OBJDUMP"; then + ac_cv_prog_ac_ct_OBJDUMP="$ac_ct_OBJDUMP" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_OBJDUMP="objdump" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_OBJDUMP=$ac_cv_prog_ac_ct_OBJDUMP +if test -n "$ac_ct_OBJDUMP"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_OBJDUMP" >&5 +printf "%s\n" "$ac_ct_OBJDUMP" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + if test "x$ac_ct_OBJDUMP" = x; then + OBJDUMP="false" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + OBJDUMP=$ac_ct_OBJDUMP + fi +else + OBJDUMP="$ac_cv_prog_OBJDUMP" +fi + + ;; +esac + +test -z "$AS" && AS=as + + + + + +test -z "$DLLTOOL" && DLLTOOL=dlltool + + + + + +test -z "$OBJDUMP" && OBJDUMP=objdump + + + + + + + + enable_dlopen=no + + + + # Check whether --enable-shared was given. +if test ${enable_shared+y} +then : + enableval=$enable_shared; p=${PACKAGE-default} + case $enableval in + yes) enable_shared=yes ;; + no) enable_shared=no ;; + *) + enable_shared=no + # Look at the argument we got. We use all the common list separators. + lt_save_ifs=$IFS; IFS=$IFS$PATH_SEPARATOR, + for pkg in $enableval; do + IFS=$lt_save_ifs + if test "X$pkg" = "X$p"; then + enable_shared=yes + fi + done + IFS=$lt_save_ifs + ;; + esac +else $as_nop + enable_shared=yes +fi + + + + + + + + + + # Check whether --enable-static was given. +if test ${enable_static+y} +then : + enableval=$enable_static; p=${PACKAGE-default} + case $enableval in + yes) enable_static=yes ;; + no) enable_static=no ;; + *) + enable_static=no + # Look at the argument we got. We use all the common list separators. + lt_save_ifs=$IFS; IFS=$IFS$PATH_SEPARATOR, + for pkg in $enableval; do + IFS=$lt_save_ifs + if test "X$pkg" = "X$p"; then + enable_static=yes + fi + done + IFS=$lt_save_ifs + ;; + esac +else $as_nop + enable_static=yes +fi + + + + + + + + + + +# Check whether --with-pic was given. +if test ${with_pic+y} +then : + withval=$with_pic; lt_p=${PACKAGE-default} + case $withval in + yes|no) pic_mode=$withval ;; + *) + pic_mode=default + # Look at the argument we got. We use all the common list separators. + lt_save_ifs=$IFS; IFS=$IFS$PATH_SEPARATOR, + for lt_pkg in $withval; do + IFS=$lt_save_ifs + if test "X$lt_pkg" = "X$lt_p"; then + pic_mode=yes + fi + done + IFS=$lt_save_ifs + ;; + esac +else $as_nop + pic_mode=default +fi + + + + + + + + + # Check whether --enable-fast-install was given. +if test ${enable_fast_install+y} +then : + enableval=$enable_fast_install; p=${PACKAGE-default} + case $enableval in + yes) enable_fast_install=yes ;; + no) enable_fast_install=no ;; + *) + enable_fast_install=no + # Look at the argument we got. We use all the common list separators. + lt_save_ifs=$IFS; IFS=$IFS$PATH_SEPARATOR, + for pkg in $enableval; do + IFS=$lt_save_ifs + if test "X$pkg" = "X$p"; then + enable_fast_install=yes + fi + done + IFS=$lt_save_ifs + ;; + esac +else $as_nop + enable_fast_install=yes +fi + + + + + + + + + shared_archive_member_spec= +case $host,$enable_shared in +power*-*-aix[5-9]*,yes) + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking which variant of shared library versioning to provide" >&5 +printf %s "checking which variant of shared library versioning to provide... " >&6; } + +# Check whether --with-aix-soname was given. +if test ${with_aix_soname+y} +then : + withval=$with_aix_soname; case $withval in + aix|svr4|both) + ;; + *) + as_fn_error $? "Unknown argument to --with-aix-soname" "$LINENO" 5 + ;; + esac + lt_cv_with_aix_soname=$with_aix_soname +else $as_nop + if test ${lt_cv_with_aix_soname+y} +then : + printf %s "(cached) " >&6 +else $as_nop + lt_cv_with_aix_soname=aix +fi + + with_aix_soname=$lt_cv_with_aix_soname +fi + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $with_aix_soname" >&5 +printf "%s\n" "$with_aix_soname" >&6; } + if test aix != "$with_aix_soname"; then + # For the AIX way of multilib, we name the shared archive member + # based on the bitwidth used, traditionally 'shr.o' or 'shr_64.o', + # and 'shr.imp' or 'shr_64.imp', respectively, for the Import File. + # Even when GNU compilers ignore OBJECT_MODE but need '-maix64' flag, + # the AIX toolchain works better with OBJECT_MODE set (default 32). + if test 64 = "${OBJECT_MODE-32}"; then + shared_archive_member_spec=shr_64 + else + shared_archive_member_spec=shr + fi + fi + ;; +*) + with_aix_soname=aix + ;; +esac + + + + + + + + + + +# This can be used to rebuild libtool when needed +LIBTOOL_DEPS=$ltmain + +# Always use our own libtool. +LIBTOOL='$(SHELL) $(top_builddir)/libtool' + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +test -z "$LN_S" && LN_S="ln -s" + + + + + + + + + + + + + + +if test -n "${ZSH_VERSION+set}"; then + setopt NO_GLOB_SUBST +fi + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for objdir" >&5 +printf %s "checking for objdir... " >&6; } +if test ${lt_cv_objdir+y} +then : + printf %s "(cached) " >&6 +else $as_nop + rm -f .libs 2>/dev/null +mkdir .libs 2>/dev/null +if test -d .libs; then + lt_cv_objdir=.libs +else + # MS-DOS does not allow filenames that begin with a dot. + lt_cv_objdir=_libs +fi +rmdir .libs 2>/dev/null +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_objdir" >&5 +printf "%s\n" "$lt_cv_objdir" >&6; } +objdir=$lt_cv_objdir + + + + + +printf "%s\n" "#define LT_OBJDIR \"$lt_cv_objdir/\"" >>confdefs.h + + + + +case $host_os in +aix3*) + # AIX sometimes has problems with the GCC collect2 program. For some + # reason, if we set the COLLECT_NAMES environment variable, the problems + # vanish in a puff of smoke. + if test set != "${COLLECT_NAMES+set}"; then + COLLECT_NAMES= + export COLLECT_NAMES + fi + ;; +esac + +# Global variables: +ofile=libtool +can_build_shared=yes + +# All known linkers require a '.a' archive for static linking (except MSVC and +# ICC, which need '.lib'). +libext=a + +with_gnu_ld=$lt_cv_prog_gnu_ld + +old_CC=$CC +old_CFLAGS=$CFLAGS + +# Set sane defaults for various variables +test -z "$CC" && CC=cc +test -z "$LTCC" && LTCC=$CC +test -z "$LTCFLAGS" && LTCFLAGS=$CFLAGS +test -z "$LD" && LD=ld +test -z "$ac_objext" && ac_objext=o + +func_cc_basename $compiler +cc_basename=$func_cc_basename_result + + +# Only perform the check for file, if the check method requires it +test -z "$MAGIC_CMD" && MAGIC_CMD=file +case $deplibs_check_method in +file_magic*) + if test "$file_magic_cmd" = '$MAGIC_CMD'; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for ${ac_tool_prefix}file" >&5 +printf %s "checking for ${ac_tool_prefix}file... " >&6; } +if test ${lt_cv_path_MAGIC_CMD+y} +then : + printf %s "(cached) " >&6 +else $as_nop + case $MAGIC_CMD in +[\\/*] | ?:[\\/]*) + lt_cv_path_MAGIC_CMD=$MAGIC_CMD # Let the user override the test with a path. + ;; +*) + lt_save_MAGIC_CMD=$MAGIC_CMD + lt_save_ifs=$IFS; IFS=$PATH_SEPARATOR + ac_dummy="/usr/bin$PATH_SEPARATOR$PATH" + for ac_dir in $ac_dummy; do + IFS=$lt_save_ifs + test -z "$ac_dir" && ac_dir=. + if test -f "$ac_dir/${ac_tool_prefix}file"; then + lt_cv_path_MAGIC_CMD=$ac_dir/"${ac_tool_prefix}file" + if test -n "$file_magic_test_file"; then + case $deplibs_check_method in + "file_magic "*) + file_magic_regex=`expr "$deplibs_check_method" : "file_magic \(.*\)"` + MAGIC_CMD=$lt_cv_path_MAGIC_CMD + if eval $file_magic_cmd \$file_magic_test_file 2> /dev/null | + $EGREP "$file_magic_regex" > /dev/null; then + : + else + cat <<_LT_EOF 1>&2 + +*** Warning: the command libtool uses to detect shared libraries, +*** $file_magic_cmd, produces output that libtool cannot recognize. +*** The result is that libtool may fail to recognize shared libraries +*** as such. This will affect the creation of libtool libraries that +*** depend on shared libraries, but programs linked with such libtool +*** libraries will work regardless of this problem. Nevertheless, you +*** may want to report the problem to your system manager and/or to +*** bug-libtool@gnu.org + +_LT_EOF + fi ;; + esac + fi + break + fi + done + IFS=$lt_save_ifs + MAGIC_CMD=$lt_save_MAGIC_CMD + ;; +esac +fi + +MAGIC_CMD=$lt_cv_path_MAGIC_CMD +if test -n "$MAGIC_CMD"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $MAGIC_CMD" >&5 +printf "%s\n" "$MAGIC_CMD" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + + + + +if test -z "$lt_cv_path_MAGIC_CMD"; then + if test -n "$ac_tool_prefix"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for file" >&5 +printf %s "checking for file... " >&6; } +if test ${lt_cv_path_MAGIC_CMD+y} +then : + printf %s "(cached) " >&6 +else $as_nop + case $MAGIC_CMD in +[\\/*] | ?:[\\/]*) + lt_cv_path_MAGIC_CMD=$MAGIC_CMD # Let the user override the test with a path. + ;; +*) + lt_save_MAGIC_CMD=$MAGIC_CMD + lt_save_ifs=$IFS; IFS=$PATH_SEPARATOR + ac_dummy="/usr/bin$PATH_SEPARATOR$PATH" + for ac_dir in $ac_dummy; do + IFS=$lt_save_ifs + test -z "$ac_dir" && ac_dir=. + if test -f "$ac_dir/file"; then + lt_cv_path_MAGIC_CMD=$ac_dir/"file" + if test -n "$file_magic_test_file"; then + case $deplibs_check_method in + "file_magic "*) + file_magic_regex=`expr "$deplibs_check_method" : "file_magic \(.*\)"` + MAGIC_CMD=$lt_cv_path_MAGIC_CMD + if eval $file_magic_cmd \$file_magic_test_file 2> /dev/null | + $EGREP "$file_magic_regex" > /dev/null; then + : + else + cat <<_LT_EOF 1>&2 + +*** Warning: the command libtool uses to detect shared libraries, +*** $file_magic_cmd, produces output that libtool cannot recognize. +*** The result is that libtool may fail to recognize shared libraries +*** as such. This will affect the creation of libtool libraries that +*** depend on shared libraries, but programs linked with such libtool +*** libraries will work regardless of this problem. Nevertheless, you +*** may want to report the problem to your system manager and/or to +*** bug-libtool@gnu.org + +_LT_EOF + fi ;; + esac + fi + break + fi + done + IFS=$lt_save_ifs + MAGIC_CMD=$lt_save_MAGIC_CMD + ;; +esac +fi + +MAGIC_CMD=$lt_cv_path_MAGIC_CMD +if test -n "$MAGIC_CMD"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $MAGIC_CMD" >&5 +printf "%s\n" "$MAGIC_CMD" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + + else + MAGIC_CMD=: + fi +fi + + fi + ;; +esac + +# Use C for the default configuration in the libtool script + +lt_save_CC=$CC +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + +# Source file extension for C test sources. +ac_ext=c + +# Object file extension for compiled C test sources. +objext=o +objext=$objext + +# Code to be used in simple compile tests +lt_simple_compile_test_code="int some_variable = 0;" + +# Code to be used in simple link tests +lt_simple_link_test_code='int main(){return(0);}' + + + + + + + +# If no C compiler was specified, use CC. +LTCC=${LTCC-"$CC"} + +# If no C compiler flags were specified, use CFLAGS. +LTCFLAGS=${LTCFLAGS-"$CFLAGS"} + +# Allow CC to be a program name with arguments. +compiler=$CC + +# Save the default compiler, since it gets overwritten when the other +# tags are being tested, and _LT_TAGVAR(compiler, []) is a NOP. +compiler_DEFAULT=$CC + +# save warnings/boilerplate of simple test code +ac_outfile=conftest.$ac_objext +echo "$lt_simple_compile_test_code" >conftest.$ac_ext +eval "$ac_compile" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err +_lt_compiler_boilerplate=`cat conftest.err` +$RM conftest* + +ac_outfile=conftest.$ac_objext +echo "$lt_simple_link_test_code" >conftest.$ac_ext +eval "$ac_link" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err +_lt_linker_boilerplate=`cat conftest.err` +$RM -r conftest* + + +## CAVEAT EMPTOR: +## There is no encapsulation within the following macros, do not change +## the running order or otherwise move them around unless you know exactly +## what you are doing... +if test -n "$compiler"; then + +lt_prog_compiler_no_builtin_flag= + +if test yes = "$GCC"; then + case $cc_basename in + nvcc*) + lt_prog_compiler_no_builtin_flag=' -Xcompiler -fno-builtin' ;; + *) + lt_prog_compiler_no_builtin_flag=' -fno-builtin' ;; + esac + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if $compiler supports -fno-rtti -fno-exceptions" >&5 +printf %s "checking if $compiler supports -fno-rtti -fno-exceptions... " >&6; } +if test ${lt_cv_prog_compiler_rtti_exceptions+y} +then : + printf %s "(cached) " >&6 +else $as_nop + lt_cv_prog_compiler_rtti_exceptions=no + ac_outfile=conftest.$ac_objext + echo "$lt_simple_compile_test_code" > conftest.$ac_ext + lt_compiler_flag="-fno-rtti -fno-exceptions" ## exclude from sc_useless_quotes_in_assignment + # Insert the option either (1) after the last *FLAGS variable, or + # (2) before a word containing "conftest.", or (3) at the end. + # Note that $ac_compile itself does not contain backslashes and begins + # with a dollar sign (not a hyphen), so the echo should work correctly. + # The option is referenced via a variable to avoid confusing sed. + lt_compile=`echo "$ac_compile" | $SED \ + -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ + -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ + -e 's:$: $lt_compiler_flag:'` + (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5) + (eval "$lt_compile" 2>conftest.err) + ac_status=$? + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + if (exit $ac_status) && test -s "$ac_outfile"; then + # The compiler can only warn and ignore the option if not recognized + # So say no if there are warnings other than the usual output. + $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' >conftest.exp + $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 + if test ! -s conftest.er2 || diff conftest.exp conftest.er2 >/dev/null; then + lt_cv_prog_compiler_rtti_exceptions=yes + fi + fi + $RM conftest* + +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_rtti_exceptions" >&5 +printf "%s\n" "$lt_cv_prog_compiler_rtti_exceptions" >&6; } + +if test yes = "$lt_cv_prog_compiler_rtti_exceptions"; then + lt_prog_compiler_no_builtin_flag="$lt_prog_compiler_no_builtin_flag -fno-rtti -fno-exceptions" +else + : +fi + +fi + + + + + + + lt_prog_compiler_wl= +lt_prog_compiler_pic= +lt_prog_compiler_static= + + + if test yes = "$GCC"; then + lt_prog_compiler_wl='-Wl,' + lt_prog_compiler_static='-static' + + case $host_os in + aix*) + # All AIX code is PIC. + if test ia64 = "$host_cpu"; then + # AIX 5 now supports IA64 processor + lt_prog_compiler_static='-Bstatic' + fi + lt_prog_compiler_pic='-fPIC' + ;; + + amigaos*) + case $host_cpu in + powerpc) + # see comment about AmigaOS4 .so support + lt_prog_compiler_pic='-fPIC' + ;; + m68k) + # FIXME: we need at least 68020 code to build shared libraries, but + # adding the '-m68020' flag to GCC prevents building anything better, + # like '-m68040'. + lt_prog_compiler_pic='-m68020 -resident32 -malways-restore-a4' + ;; + esac + ;; + + beos* | irix5* | irix6* | nonstopux* | osf3* | osf4* | osf5*) + # PIC is the default for these OSes. + ;; + + mingw* | cygwin* | pw32* | os2* | cegcc*) + # This hack is so that the source file can tell whether it is being + # built for inclusion in a dll (and should export symbols for example). + # Although the cygwin gcc ignores -fPIC, still need this for old-style + # (--disable-auto-import) libraries + lt_prog_compiler_pic='-DDLL_EXPORT' + case $host_os in + os2*) + lt_prog_compiler_static='$wl-static' + ;; + esac + ;; + + darwin* | rhapsody*) + # PIC is the default on this platform + # Common symbols not allowed in MH_DYLIB files + lt_prog_compiler_pic='-fno-common' + ;; + + haiku*) + # PIC is the default for Haiku. + # The "-static" flag exists, but is broken. + lt_prog_compiler_static= + ;; + + hpux*) + # PIC is the default for 64-bit PA HP-UX, but not for 32-bit + # PA HP-UX. On IA64 HP-UX, PIC is the default but the pic flag + # sets the default TLS model and affects inlining. + case $host_cpu in + hppa*64*) + # +Z the default + ;; + *) + lt_prog_compiler_pic='-fPIC' + ;; + esac + ;; + + interix[3-9]*) + # Interix 3.x gcc -fpic/-fPIC options generate broken code. + # Instead, we relocate shared libraries at runtime. + ;; + + msdosdjgpp*) + # Just because we use GCC doesn't mean we suddenly get shared libraries + # on systems that don't support them. + lt_prog_compiler_can_build_shared=no + enable_shared=no + ;; + + *nto* | *qnx*) + # QNX uses GNU C++, but need to define -shared option too, otherwise + # it will coredump. + lt_prog_compiler_pic='-fPIC -shared' + ;; + + sysv4*MP*) + if test -d /usr/nec; then + lt_prog_compiler_pic=-Kconform_pic + fi + ;; + + *) + lt_prog_compiler_pic='-fPIC' + ;; + esac + + case $cc_basename in + nvcc*) # Cuda Compiler Driver 2.2 + lt_prog_compiler_wl='-Xlinker ' + if test -n "$lt_prog_compiler_pic"; then + lt_prog_compiler_pic="-Xcompiler $lt_prog_compiler_pic" + fi + ;; + esac + else + # PORTME Check for flag to pass linker flags through the system compiler. + case $host_os in + aix*) + lt_prog_compiler_wl='-Wl,' + if test ia64 = "$host_cpu"; then + # AIX 5 now supports IA64 processor + lt_prog_compiler_static='-Bstatic' + else + lt_prog_compiler_static='-bnso -bI:/lib/syscalls.exp' + fi + ;; + + darwin* | rhapsody*) + # PIC is the default on this platform + # Common symbols not allowed in MH_DYLIB files + lt_prog_compiler_pic='-fno-common' + case $cc_basename in + nagfor*) + # NAG Fortran compiler + lt_prog_compiler_wl='-Wl,-Wl,,' + lt_prog_compiler_pic='-PIC' + lt_prog_compiler_static='-Bstatic' + ;; + esac + ;; + + mingw* | cygwin* | pw32* | os2* | cegcc*) + # This hack is so that the source file can tell whether it is being + # built for inclusion in a dll (and should export symbols for example). + lt_prog_compiler_pic='-DDLL_EXPORT' + case $host_os in + os2*) + lt_prog_compiler_static='$wl-static' + ;; + esac + ;; + + hpux9* | hpux10* | hpux11*) + lt_prog_compiler_wl='-Wl,' + # PIC is the default for IA64 HP-UX and 64-bit HP-UX, but + # not for PA HP-UX. + case $host_cpu in + hppa*64*|ia64*) + # +Z the default + ;; + *) + lt_prog_compiler_pic='+Z' + ;; + esac + # Is there a better lt_prog_compiler_static that works with the bundled CC? + lt_prog_compiler_static='$wl-a ${wl}archive' + ;; + + irix5* | irix6* | nonstopux*) + lt_prog_compiler_wl='-Wl,' + # PIC (with -KPIC) is the default. + lt_prog_compiler_static='-non_shared' + ;; + + linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*) + case $cc_basename in + # old Intel for x86_64, which still supported -KPIC. + ecc*) + lt_prog_compiler_wl='-Wl,' + lt_prog_compiler_pic='-KPIC' + lt_prog_compiler_static='-static' + ;; + # flang / f18. f95 an alias for gfortran or flang on Debian + flang* | f18* | f95*) + lt_prog_compiler_wl='-Wl,' + lt_prog_compiler_pic='-fPIC' + lt_prog_compiler_static='-static' + ;; + # icc used to be incompatible with GCC. + # ICC 10 doesn't accept -KPIC any more. + icc* | ifort*) + lt_prog_compiler_wl='-Wl,' + lt_prog_compiler_pic='-fPIC' + lt_prog_compiler_static='-static' + ;; + # Lahey Fortran 8.1. + lf95*) + lt_prog_compiler_wl='-Wl,' + lt_prog_compiler_pic='--shared' + lt_prog_compiler_static='--static' + ;; + nagfor*) + # NAG Fortran compiler + lt_prog_compiler_wl='-Wl,-Wl,,' + lt_prog_compiler_pic='-PIC' + lt_prog_compiler_static='-Bstatic' + ;; + tcc*) + # Fabrice Bellard et al's Tiny C Compiler + lt_prog_compiler_wl='-Wl,' + lt_prog_compiler_pic='-fPIC' + lt_prog_compiler_static='-static' + ;; + pgcc* | pgf77* | pgf90* | pgf95* | pgfortran*) + # Portland Group compilers (*not* the Pentium gcc compiler, + # which looks to be a dead project) + lt_prog_compiler_wl='-Wl,' + lt_prog_compiler_pic='-fpic' + lt_prog_compiler_static='-Bstatic' + ;; + ccc*) + lt_prog_compiler_wl='-Wl,' + # All Alpha code is PIC. + lt_prog_compiler_static='-non_shared' + ;; + xl* | bgxl* | bgf* | mpixl*) + # IBM XL C 8.0/Fortran 10.1, 11.1 on PPC and BlueGene + lt_prog_compiler_wl='-Wl,' + lt_prog_compiler_pic='-qpic' + lt_prog_compiler_static='-qstaticlink' + ;; + *) + case `$CC -V 2>&1 | $SED 5q` in + *Sun\ Ceres\ Fortran* | *Sun*Fortran*\ [1-7].* | *Sun*Fortran*\ 8.[0-3]*) + # Sun Fortran 8.3 passes all unrecognized flags to the linker + lt_prog_compiler_pic='-KPIC' + lt_prog_compiler_static='-Bstatic' + lt_prog_compiler_wl='' + ;; + *Sun\ F* | *Sun*Fortran*) + lt_prog_compiler_pic='-KPIC' + lt_prog_compiler_static='-Bstatic' + lt_prog_compiler_wl='-Qoption ld ' + ;; + *Sun\ C*) + # Sun C 5.9 + lt_prog_compiler_pic='-KPIC' + lt_prog_compiler_static='-Bstatic' + lt_prog_compiler_wl='-Wl,' + ;; + *Intel*\ [CF]*Compiler*) + lt_prog_compiler_wl='-Wl,' + lt_prog_compiler_pic='-fPIC' + lt_prog_compiler_static='-static' + ;; + *Portland\ Group*) + lt_prog_compiler_wl='-Wl,' + lt_prog_compiler_pic='-fpic' + lt_prog_compiler_static='-Bstatic' + ;; + esac + ;; + esac + ;; + + newsos6) + lt_prog_compiler_pic='-KPIC' + lt_prog_compiler_static='-Bstatic' + ;; + + *nto* | *qnx*) + # QNX uses GNU C++, but need to define -shared option too, otherwise + # it will coredump. + lt_prog_compiler_pic='-fPIC -shared' + ;; + + osf3* | osf4* | osf5*) + lt_prog_compiler_wl='-Wl,' + # All OSF/1 code is PIC. + lt_prog_compiler_static='-non_shared' + ;; + + rdos*) + lt_prog_compiler_static='-non_shared' + ;; + + solaris*) + lt_prog_compiler_pic='-KPIC' + lt_prog_compiler_static='-Bstatic' + case $cc_basename in + f77* | f90* | f95* | sunf77* | sunf90* | sunf95*) + lt_prog_compiler_wl='-Qoption ld ';; + *) + lt_prog_compiler_wl='-Wl,';; + esac + ;; + + sunos4*) + lt_prog_compiler_wl='-Qoption ld ' + lt_prog_compiler_pic='-PIC' + lt_prog_compiler_static='-Bstatic' + ;; + + sysv4 | sysv4.2uw2* | sysv4.3*) + lt_prog_compiler_wl='-Wl,' + lt_prog_compiler_pic='-KPIC' + lt_prog_compiler_static='-Bstatic' + ;; + + sysv4*MP*) + if test -d /usr/nec; then + lt_prog_compiler_pic='-Kconform_pic' + lt_prog_compiler_static='-Bstatic' + fi + ;; + + sysv5* | unixware* | sco3.2v5* | sco5v6* | OpenUNIX*) + lt_prog_compiler_wl='-Wl,' + lt_prog_compiler_pic='-KPIC' + lt_prog_compiler_static='-Bstatic' + ;; + + unicos*) + lt_prog_compiler_wl='-Wl,' + lt_prog_compiler_can_build_shared=no + ;; + + uts4*) + lt_prog_compiler_pic='-pic' + lt_prog_compiler_static='-Bstatic' + ;; + + *) + lt_prog_compiler_can_build_shared=no + ;; + esac + fi + +case $host_os in + # For platforms that do not support PIC, -DPIC is meaningless: + *djgpp*) + lt_prog_compiler_pic= + ;; + *) + lt_prog_compiler_pic="$lt_prog_compiler_pic -DPIC" + ;; +esac + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $compiler option to produce PIC" >&5 +printf %s "checking for $compiler option to produce PIC... " >&6; } +if test ${lt_cv_prog_compiler_pic+y} +then : + printf %s "(cached) " >&6 +else $as_nop + lt_cv_prog_compiler_pic=$lt_prog_compiler_pic +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_pic" >&5 +printf "%s\n" "$lt_cv_prog_compiler_pic" >&6; } +lt_prog_compiler_pic=$lt_cv_prog_compiler_pic + +# +# Check to make sure the PIC flag actually works. +# +if test -n "$lt_prog_compiler_pic"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if $compiler PIC flag $lt_prog_compiler_pic works" >&5 +printf %s "checking if $compiler PIC flag $lt_prog_compiler_pic works... " >&6; } +if test ${lt_cv_prog_compiler_pic_works+y} +then : + printf %s "(cached) " >&6 +else $as_nop + lt_cv_prog_compiler_pic_works=no + ac_outfile=conftest.$ac_objext + echo "$lt_simple_compile_test_code" > conftest.$ac_ext + lt_compiler_flag="$lt_prog_compiler_pic -DPIC" ## exclude from sc_useless_quotes_in_assignment + # Insert the option either (1) after the last *FLAGS variable, or + # (2) before a word containing "conftest.", or (3) at the end. + # Note that $ac_compile itself does not contain backslashes and begins + # with a dollar sign (not a hyphen), so the echo should work correctly. + # The option is referenced via a variable to avoid confusing sed. + lt_compile=`echo "$ac_compile" | $SED \ + -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ + -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ + -e 's:$: $lt_compiler_flag:'` + (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5) + (eval "$lt_compile" 2>conftest.err) + ac_status=$? + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + if (exit $ac_status) && test -s "$ac_outfile"; then + # The compiler can only warn and ignore the option if not recognized + # So say no if there are warnings other than the usual output. + $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' >conftest.exp + $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 + if test ! -s conftest.er2 || diff conftest.exp conftest.er2 >/dev/null; then + lt_cv_prog_compiler_pic_works=yes + fi + fi + $RM conftest* + +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_pic_works" >&5 +printf "%s\n" "$lt_cv_prog_compiler_pic_works" >&6; } + +if test yes = "$lt_cv_prog_compiler_pic_works"; then + case $lt_prog_compiler_pic in + "" | " "*) ;; + *) lt_prog_compiler_pic=" $lt_prog_compiler_pic" ;; + esac +else + lt_prog_compiler_pic= + lt_prog_compiler_can_build_shared=no +fi + +fi + + + + + + + + + + + +# +# Check to make sure the static flag actually works. +# +wl=$lt_prog_compiler_wl eval lt_tmp_static_flag=\"$lt_prog_compiler_static\" +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if $compiler static flag $lt_tmp_static_flag works" >&5 +printf %s "checking if $compiler static flag $lt_tmp_static_flag works... " >&6; } +if test ${lt_cv_prog_compiler_static_works+y} +then : + printf %s "(cached) " >&6 +else $as_nop + lt_cv_prog_compiler_static_works=no + save_LDFLAGS=$LDFLAGS + LDFLAGS="$LDFLAGS $lt_tmp_static_flag" + echo "$lt_simple_link_test_code" > conftest.$ac_ext + if (eval $ac_link 2>conftest.err) && test -s conftest$ac_exeext; then + # The linker can only warn and ignore the option if not recognized + # So say no if there are warnings + if test -s conftest.err; then + # Append any errors to the config.log. + cat conftest.err 1>&5 + $ECHO "$_lt_linker_boilerplate" | $SED '/^$/d' > conftest.exp + $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 + if diff conftest.exp conftest.er2 >/dev/null; then + lt_cv_prog_compiler_static_works=yes + fi + else + lt_cv_prog_compiler_static_works=yes + fi + fi + $RM -r conftest* + LDFLAGS=$save_LDFLAGS + +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_static_works" >&5 +printf "%s\n" "$lt_cv_prog_compiler_static_works" >&6; } + +if test yes = "$lt_cv_prog_compiler_static_works"; then + : +else + lt_prog_compiler_static= +fi + + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if $compiler supports -c -o file.$ac_objext" >&5 +printf %s "checking if $compiler supports -c -o file.$ac_objext... " >&6; } +if test ${lt_cv_prog_compiler_c_o+y} +then : + printf %s "(cached) " >&6 +else $as_nop + lt_cv_prog_compiler_c_o=no + $RM -r conftest 2>/dev/null + mkdir conftest + cd conftest + mkdir out + echo "$lt_simple_compile_test_code" > conftest.$ac_ext + + lt_compiler_flag="-o out/conftest2.$ac_objext" + # Insert the option either (1) after the last *FLAGS variable, or + # (2) before a word containing "conftest.", or (3) at the end. + # Note that $ac_compile itself does not contain backslashes and begins + # with a dollar sign (not a hyphen), so the echo should work correctly. + lt_compile=`echo "$ac_compile" | $SED \ + -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ + -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ + -e 's:$: $lt_compiler_flag:'` + (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5) + (eval "$lt_compile" 2>out/conftest.err) + ac_status=$? + cat out/conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + if (exit $ac_status) && test -s out/conftest2.$ac_objext + then + # The compiler can only warn and ignore the option if not recognized + # So say no if there are warnings + $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' > out/conftest.exp + $SED '/^$/d; /^ *+/d' out/conftest.err >out/conftest.er2 + if test ! -s out/conftest.er2 || diff out/conftest.exp out/conftest.er2 >/dev/null; then + lt_cv_prog_compiler_c_o=yes + fi + fi + chmod u+w . 2>&5 + $RM conftest* + # SGI C++ compiler will create directory out/ii_files/ for + # template instantiation + test -d out/ii_files && $RM out/ii_files/* && rmdir out/ii_files + $RM out/* && rmdir out + cd .. + $RM -r conftest + $RM conftest* + +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_c_o" >&5 +printf "%s\n" "$lt_cv_prog_compiler_c_o" >&6; } + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if $compiler supports -c -o file.$ac_objext" >&5 +printf %s "checking if $compiler supports -c -o file.$ac_objext... " >&6; } +if test ${lt_cv_prog_compiler_c_o+y} +then : + printf %s "(cached) " >&6 +else $as_nop + lt_cv_prog_compiler_c_o=no + $RM -r conftest 2>/dev/null + mkdir conftest + cd conftest + mkdir out + echo "$lt_simple_compile_test_code" > conftest.$ac_ext + + lt_compiler_flag="-o out/conftest2.$ac_objext" + # Insert the option either (1) after the last *FLAGS variable, or + # (2) before a word containing "conftest.", or (3) at the end. + # Note that $ac_compile itself does not contain backslashes and begins + # with a dollar sign (not a hyphen), so the echo should work correctly. + lt_compile=`echo "$ac_compile" | $SED \ + -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ + -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ + -e 's:$: $lt_compiler_flag:'` + (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5) + (eval "$lt_compile" 2>out/conftest.err) + ac_status=$? + cat out/conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + if (exit $ac_status) && test -s out/conftest2.$ac_objext + then + # The compiler can only warn and ignore the option if not recognized + # So say no if there are warnings + $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' > out/conftest.exp + $SED '/^$/d; /^ *+/d' out/conftest.err >out/conftest.er2 + if test ! -s out/conftest.er2 || diff out/conftest.exp out/conftest.er2 >/dev/null; then + lt_cv_prog_compiler_c_o=yes + fi + fi + chmod u+w . 2>&5 + $RM conftest* + # SGI C++ compiler will create directory out/ii_files/ for + # template instantiation + test -d out/ii_files && $RM out/ii_files/* && rmdir out/ii_files + $RM out/* && rmdir out + cd .. + $RM -r conftest + $RM conftest* + +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_c_o" >&5 +printf "%s\n" "$lt_cv_prog_compiler_c_o" >&6; } + + + + +hard_links=nottested +if test no = "$lt_cv_prog_compiler_c_o" && test no != "$need_locks"; then + # do not overwrite the value of need_locks provided by the user + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if we can lock with hard links" >&5 +printf %s "checking if we can lock with hard links... " >&6; } + hard_links=yes + $RM conftest* + ln conftest.a conftest.b 2>/dev/null && hard_links=no + touch conftest.a + ln conftest.a conftest.b 2>&5 || hard_links=no + ln conftest.a conftest.b 2>/dev/null && hard_links=no + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $hard_links" >&5 +printf "%s\n" "$hard_links" >&6; } + if test no = "$hard_links"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: '$CC' does not support '-c -o', so 'make -j' may be unsafe" >&5 +printf "%s\n" "$as_me: WARNING: '$CC' does not support '-c -o', so 'make -j' may be unsafe" >&2;} + need_locks=warn + fi +else + need_locks=no +fi + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the $compiler linker ($LD) supports shared libraries" >&5 +printf %s "checking whether the $compiler linker ($LD) supports shared libraries... " >&6; } + + runpath_var= + allow_undefined_flag= + always_export_symbols=no + archive_cmds= + archive_expsym_cmds= + compiler_needs_object=no + enable_shared_with_static_runtimes=no + export_dynamic_flag_spec= + export_symbols_cmds='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols' + hardcode_automatic=no + hardcode_direct=no + hardcode_direct_absolute=no + hardcode_libdir_flag_spec= + hardcode_libdir_separator= + hardcode_minus_L=no + hardcode_shlibpath_var=unsupported + inherit_rpath=no + link_all_deplibs=unknown + module_cmds= + module_expsym_cmds= + old_archive_from_new_cmds= + old_archive_from_expsyms_cmds= + thread_safe_flag_spec= + whole_archive_flag_spec= + # include_expsyms should be a list of space-separated symbols to be *always* + # included in the symbol list + include_expsyms= + # exclude_expsyms can be an extended regexp of symbols to exclude + # it will be wrapped by ' (' and ')$', so one must not match beginning or + # end of line. Example: 'a|bc|.*d.*' will exclude the symbols 'a' and 'bc', + # as well as any symbol that contains 'd'. + exclude_expsyms='_GLOBAL_OFFSET_TABLE_|_GLOBAL__F[ID]_.*' + # Although _GLOBAL_OFFSET_TABLE_ is a valid symbol C name, most a.out + # platforms (ab)use it in PIC code, but their linkers get confused if + # the symbol is explicitly referenced. Since portable code cannot + # rely on this symbol name, it's probably fine to never include it in + # preloaded symbol tables. + # Exclude shared library initialization/finalization symbols. + extract_expsyms_cmds= + + case $host_os in + cygwin* | mingw* | pw32* | cegcc*) + # FIXME: the MSVC++ and ICC port hasn't been tested in a loooong time + # When not using gcc, we currently assume that we are using + # Microsoft Visual C++ or Intel C++ Compiler. + if test yes != "$GCC"; then + with_gnu_ld=no + fi + ;; + interix*) + # we just hope/assume this is gcc and not c89 (= MSVC++ or ICC) + with_gnu_ld=yes + ;; + openbsd* | bitrig*) + with_gnu_ld=no + ;; + linux* | k*bsd*-gnu | gnu*) + link_all_deplibs=no + ;; + esac + + ld_shlibs=yes + + # On some targets, GNU ld is compatible enough with the native linker + # that we're better off using the native interface for both. + lt_use_gnu_ld_interface=no + if test yes = "$with_gnu_ld"; then + case $host_os in + aix*) + # The AIX port of GNU ld has always aspired to compatibility + # with the native linker. However, as the warning in the GNU ld + # block says, versions before 2.19.5* couldn't really create working + # shared libraries, regardless of the interface used. + case `$LD -v 2>&1` in + *\ \(GNU\ Binutils\)\ 2.19.5*) ;; + *\ \(GNU\ Binutils\)\ 2.[2-9]*) ;; + *\ \(GNU\ Binutils\)\ [3-9]*) ;; + *) + lt_use_gnu_ld_interface=yes + ;; + esac + ;; + *) + lt_use_gnu_ld_interface=yes + ;; + esac + fi + + if test yes = "$lt_use_gnu_ld_interface"; then + # If archive_cmds runs LD, not CC, wlarc should be empty + wlarc='$wl' + + # Set some defaults for GNU ld with shared library support. These + # are reset later if shared libraries are not supported. Putting them + # here allows them to be overridden if necessary. + runpath_var=LD_RUN_PATH + hardcode_libdir_flag_spec='$wl-rpath $wl$libdir' + export_dynamic_flag_spec='$wl--export-dynamic' + # ancient GNU ld didn't support --whole-archive et. al. + if $LD --help 2>&1 | $GREP 'no-whole-archive' > /dev/null; then + whole_archive_flag_spec=$wlarc'--whole-archive$convenience '$wlarc'--no-whole-archive' + else + whole_archive_flag_spec= + fi + supports_anon_versioning=no + case `$LD -v | $SED -e 's/([^)]\+)\s\+//' 2>&1` in + *GNU\ gold*) supports_anon_versioning=yes ;; + *\ [01].* | *\ 2.[0-9].* | *\ 2.10.*) ;; # catch versions < 2.11 + *\ 2.11.93.0.2\ *) supports_anon_versioning=yes ;; # RH7.3 ... + *\ 2.11.92.0.12\ *) supports_anon_versioning=yes ;; # Mandrake 8.2 ... + *\ 2.11.*) ;; # other 2.11 versions + *) supports_anon_versioning=yes ;; + esac + + # See if GNU ld supports shared libraries. + case $host_os in + aix[3-9]*) + # On AIX/PPC, the GNU linker is very broken + if test ia64 != "$host_cpu"; then + ld_shlibs=no + cat <<_LT_EOF 1>&2 + +*** Warning: the GNU linker, at least up to release 2.19, is reported +*** to be unable to reliably create shared libraries on AIX. +*** Therefore, libtool is disabling shared libraries support. If you +*** really care for shared libraries, you may want to install binutils +*** 2.20 or above, or modify your PATH so that a non-GNU linker is found. +*** You will then need to restart the configuration process. + +_LT_EOF + fi + ;; + + amigaos*) + case $host_cpu in + powerpc) + # see comment about AmigaOS4 .so support + archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + archive_expsym_cmds='' + ;; + m68k) + archive_cmds='$RM $output_objdir/a2ixlibrary.data~$ECHO "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$ECHO "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$ECHO "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$ECHO "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR $AR_FLAGS $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)' + hardcode_libdir_flag_spec='-L$libdir' + hardcode_minus_L=yes + ;; + esac + ;; + + beos*) + if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then + allow_undefined_flag=unsupported + # Joseph Beckenbach says some releases of gcc + # support --undefined. This deserves some investigation. FIXME + archive_cmds='$CC -nostart $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + else + ld_shlibs=no + fi + ;; + + cygwin* | mingw* | pw32* | cegcc*) + # _LT_TAGVAR(hardcode_libdir_flag_spec, ) is actually meaningless, + # as there is no search path for DLLs. + hardcode_libdir_flag_spec='-L$libdir' + export_dynamic_flag_spec='$wl--export-all-symbols' + allow_undefined_flag=unsupported + always_export_symbols=no + enable_shared_with_static_runtimes=yes + export_symbols_cmds='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[BCDGRS][ ]/s/.*[ ]\([^ ]*\)/\1 DATA/;s/^.*[ ]__nm__\([^ ]*\)[ ][^ ]*/\1 DATA/;/^I[ ]/d;/^[AITW][ ]/s/.* //'\'' | sort | uniq > $export_symbols' + exclude_expsyms='[_]+GLOBAL_OFFSET_TABLE_|[_]+GLOBAL__[FID]_.*|[_]+head_[A-Za-z0-9_]+_dll|[A-Za-z0-9_]+_dll_iname' + + if $LD --help 2>&1 | $GREP 'auto-import' > /dev/null; then + archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags -o $output_objdir/$soname $wl--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' + # If the export-symbols file already is a .def file, use it as + # is; otherwise, prepend EXPORTS... + archive_expsym_cmds='if test DEF = "`$SED -n -e '\''s/^[ ]*//'\'' -e '\''/^\(;.*\)*$/d'\'' -e '\''s/^\(EXPORTS\|LIBRARY\)\([ ].*\)*$/DEF/p'\'' -e q $export_symbols`" ; then + cp $export_symbols $output_objdir/$soname.def; + else + echo EXPORTS > $output_objdir/$soname.def; + cat $export_symbols >> $output_objdir/$soname.def; + fi~ + $CC -shared $output_objdir/$soname.def $libobjs $deplibs $compiler_flags -o $output_objdir/$soname $wl--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' + else + ld_shlibs=no + fi + ;; + + haiku*) + archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + link_all_deplibs=yes + ;; + + os2*) + hardcode_libdir_flag_spec='-L$libdir' + hardcode_minus_L=yes + allow_undefined_flag=unsupported + shrext_cmds=.dll + archive_cmds='$ECHO "LIBRARY ${soname%$shared_ext} INITINSTANCE TERMINSTANCE" > $output_objdir/$libname.def~ + $ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~ + $ECHO "DATA MULTIPLE NONSHARED" >> $output_objdir/$libname.def~ + $ECHO EXPORTS >> $output_objdir/$libname.def~ + emxexp $libobjs | $SED /"_DLL_InitTerm"/d >> $output_objdir/$libname.def~ + $CC -Zdll -Zcrtdll -o $output_objdir/$soname $libobjs $deplibs $compiler_flags $output_objdir/$libname.def~ + emximp -o $lib $output_objdir/$libname.def' + archive_expsym_cmds='$ECHO "LIBRARY ${soname%$shared_ext} INITINSTANCE TERMINSTANCE" > $output_objdir/$libname.def~ + $ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~ + $ECHO "DATA MULTIPLE NONSHARED" >> $output_objdir/$libname.def~ + $ECHO EXPORTS >> $output_objdir/$libname.def~ + prefix_cmds="$SED"~ + if test EXPORTS = "`$SED 1q $export_symbols`"; then + prefix_cmds="$prefix_cmds -e 1d"; + fi~ + prefix_cmds="$prefix_cmds -e \"s/^\(.*\)$/_\1/g\""~ + cat $export_symbols | $prefix_cmds >> $output_objdir/$libname.def~ + $CC -Zdll -Zcrtdll -o $output_objdir/$soname $libobjs $deplibs $compiler_flags $output_objdir/$libname.def~ + emximp -o $lib $output_objdir/$libname.def' + old_archive_From_new_cmds='emximp -o $output_objdir/${libname}_dll.a $output_objdir/$libname.def' + enable_shared_with_static_runtimes=yes + file_list_spec='@' + ;; + + interix[3-9]*) + hardcode_direct=no + hardcode_shlibpath_var=no + hardcode_libdir_flag_spec='$wl-rpath,$libdir' + export_dynamic_flag_spec='$wl-E' + # Hack: On Interix 3.x, we cannot compile PIC because of a broken gcc. + # Instead, shared libraries are loaded at an image base (0x10000000 by + # default) and relocated if they conflict, which is a slow very memory + # consuming and fragmenting process. To avoid this, we pick a random, + # 256 KiB-aligned image base between 0x50000000 and 0x6FFC0000 at link + # time. Moving up from 0x10000000 also allows more sbrk(2) space. + archive_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-h,$soname $wl--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' + archive_expsym_cmds='$SED "s|^|_|" $export_symbols >$output_objdir/$soname.expsym~$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-h,$soname $wl--retain-symbols-file,$output_objdir/$soname.expsym $wl--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' + ;; + + gnu* | linux* | tpf* | k*bsd*-gnu | kopensolaris*-gnu) + tmp_diet=no + if test linux-dietlibc = "$host_os"; then + case $cc_basename in + diet\ *) tmp_diet=yes;; # linux-dietlibc with static linking (!diet-dyn) + esac + fi + if $LD --help 2>&1 | $EGREP ': supported targets:.* elf' > /dev/null \ + && test no = "$tmp_diet" + then + tmp_addflag=' $pic_flag' + tmp_sharedflag='-shared' + case $cc_basename,$host_cpu in + pgcc*) # Portland Group C compiler + whole_archive_flag_spec='$wl--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive' + tmp_addflag=' $pic_flag' + ;; + pgf77* | pgf90* | pgf95* | pgfortran*) + # Portland Group f77 and f90 compilers + whole_archive_flag_spec='$wl--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive' + tmp_addflag=' $pic_flag -Mnomain' ;; + ecc*,ia64* | icc*,ia64*) # Intel C compiler on ia64 + tmp_addflag=' -i_dynamic' ;; + efc*,ia64* | ifort*,ia64*) # Intel Fortran compiler on ia64 + tmp_addflag=' -i_dynamic -nofor_main' ;; + ifc* | ifort*) # Intel Fortran compiler + tmp_addflag=' -nofor_main' ;; + lf95*) # Lahey Fortran 8.1 + whole_archive_flag_spec= + tmp_sharedflag='--shared' ;; + nagfor*) # NAGFOR 5.3 + tmp_sharedflag='-Wl,-shared' ;; + xl[cC]* | bgxl[cC]* | mpixl[cC]*) # IBM XL C 8.0 on PPC (deal with xlf below) + tmp_sharedflag='-qmkshrobj' + tmp_addflag= ;; + nvcc*) # Cuda Compiler Driver 2.2 + whole_archive_flag_spec='$wl--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive' + compiler_needs_object=yes + ;; + esac + case `$CC -V 2>&1 | $SED 5q` in + *Sun\ C*) # Sun C 5.9 + whole_archive_flag_spec='$wl--whole-archive`new_convenience=; for conv in $convenience\"\"; do test -z \"$conv\" || new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive' + compiler_needs_object=yes + tmp_sharedflag='-G' ;; + *Sun\ F*) # Sun Fortran 8.3 + tmp_sharedflag='-G' ;; + esac + archive_cmds='$CC '"$tmp_sharedflag""$tmp_addflag"' $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + + if test yes = "$supports_anon_versioning"; then + archive_expsym_cmds='echo "{ global:" > $output_objdir/$libname.ver~ + cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~ + echo "local: *; };" >> $output_objdir/$libname.ver~ + $CC '"$tmp_sharedflag""$tmp_addflag"' $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-version-script $wl$output_objdir/$libname.ver -o $lib' + fi + + case $cc_basename in + tcc*) + hardcode_libdir_flag_spec='$wl-rpath $wl$libdir' + export_dynamic_flag_spec='-rdynamic' + ;; + xlf* | bgf* | bgxlf* | mpixlf*) + # IBM XL Fortran 10.1 on PPC cannot create shared libs itself + whole_archive_flag_spec='--whole-archive$convenience --no-whole-archive' + hardcode_libdir_flag_spec='$wl-rpath $wl$libdir' + archive_cmds='$LD -shared $libobjs $deplibs $linker_flags -soname $soname -o $lib' + if test yes = "$supports_anon_versioning"; then + archive_expsym_cmds='echo "{ global:" > $output_objdir/$libname.ver~ + cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~ + echo "local: *; };" >> $output_objdir/$libname.ver~ + $LD -shared $libobjs $deplibs $linker_flags -soname $soname -version-script $output_objdir/$libname.ver -o $lib' + fi + ;; + esac + else + ld_shlibs=no + fi + ;; + + netbsd* | netbsdelf*-gnu) + if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then + archive_cmds='$LD -Bshareable $libobjs $deplibs $linker_flags -o $lib' + wlarc= + else + archive_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + archive_expsym_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' + fi + ;; + + solaris*) + if $LD -v 2>&1 | $GREP 'BFD 2\.8' > /dev/null; then + ld_shlibs=no + cat <<_LT_EOF 1>&2 + +*** Warning: The releases 2.8.* of the GNU linker cannot reliably +*** create shared libraries on Solaris systems. Therefore, libtool +*** is disabling shared libraries support. We urge you to upgrade GNU +*** binutils to release 2.9.1 or newer. Another option is to modify +*** your PATH or compiler configuration so that the native linker is +*** used, and then restart. + +_LT_EOF + elif $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then + archive_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + archive_expsym_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' + else + ld_shlibs=no + fi + ;; + + sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX*) + case `$LD -v 2>&1` in + *\ [01].* | *\ 2.[0-9].* | *\ 2.1[0-5].*) + ld_shlibs=no + cat <<_LT_EOF 1>&2 + +*** Warning: Releases of the GNU linker prior to 2.16.91.0.3 cannot +*** reliably create shared libraries on SCO systems. Therefore, libtool +*** is disabling shared libraries support. We urge you to upgrade GNU +*** binutils to release 2.16.91.0.3 or newer. Another option is to modify +*** your PATH or compiler configuration so that the native linker is +*** used, and then restart. + +_LT_EOF + ;; + *) + # For security reasons, it is highly recommended that you always + # use absolute paths for naming shared libraries, and exclude the + # DT_RUNPATH tag from executables and libraries. But doing so + # requires that you compile everything twice, which is a pain. + if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then + hardcode_libdir_flag_spec='$wl-rpath $wl$libdir' + archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + archive_expsym_cmds='$CC -shared $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' + else + ld_shlibs=no + fi + ;; + esac + ;; + + sunos4*) + archive_cmds='$LD -assert pure-text -Bshareable -o $lib $libobjs $deplibs $linker_flags' + wlarc= + hardcode_direct=yes + hardcode_shlibpath_var=no + ;; + + *) + if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then + archive_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + archive_expsym_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' + else + ld_shlibs=no + fi + ;; + esac + + if test no = "$ld_shlibs"; then + runpath_var= + hardcode_libdir_flag_spec= + export_dynamic_flag_spec= + whole_archive_flag_spec= + fi + else + # PORTME fill in a description of your system's linker (not GNU ld) + case $host_os in + aix3*) + allow_undefined_flag=unsupported + always_export_symbols=yes + archive_expsym_cmds='$LD -o $output_objdir/$soname $libobjs $deplibs $linker_flags -bE:$export_symbols -T512 -H512 -bM:SRE~$AR $AR_FLAGS $lib $output_objdir/$soname' + # Note: this linker hardcodes the directories in LIBPATH if there + # are no directories specified by -L. + hardcode_minus_L=yes + if test yes = "$GCC" && test -z "$lt_prog_compiler_static"; then + # Neither direct hardcoding nor static linking is supported with a + # broken collect2. + hardcode_direct=unsupported + fi + ;; + + aix[4-9]*) + if test ia64 = "$host_cpu"; then + # On IA64, the linker does run time linking by default, so we don't + # have to do anything special. + aix_use_runtimelinking=no + exp_sym_flag='-Bexport' + no_entry_flag= + else + # If we're using GNU nm, then we don't want the "-C" option. + # -C means demangle to GNU nm, but means don't demangle to AIX nm. + # Without the "-l" option, or with the "-B" option, AIX nm treats + # weak defined symbols like other global defined symbols, whereas + # GNU nm marks them as "W". + # While the 'weak' keyword is ignored in the Export File, we need + # it in the Import File for the 'aix-soname' feature, so we have + # to replace the "-B" option with "-P" for AIX nm. + if $NM -V 2>&1 | $GREP 'GNU' > /dev/null; then + export_symbols_cmds='$NM -Bpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "W")) && (substr(\$ 3,1,1) != ".")) { if (\$ 2 == "W") { print \$ 3 " weak" } else { print \$ 3 } } }'\'' | sort -u > $export_symbols' + else + export_symbols_cmds='`func_echo_all $NM | $SED -e '\''s/B\([^B]*\)$/P\1/'\''` -PCpgl $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "L") || (\$ 2 == "W") || (\$ 2 == "V") || (\$ 2 == "Z")) && (substr(\$ 1,1,1) != ".")) { if ((\$ 2 == "W") || (\$ 2 == "V") || (\$ 2 == "Z")) { print \$ 1 " weak" } else { print \$ 1 } } }'\'' | sort -u > $export_symbols' + fi + aix_use_runtimelinking=no + + # Test if we are trying to use run time linking or normal + # AIX style linking. If -brtl is somewhere in LDFLAGS, we + # have runtime linking enabled, and use it for executables. + # For shared libraries, we enable/disable runtime linking + # depending on the kind of the shared library created - + # when "with_aix_soname,aix_use_runtimelinking" is: + # "aix,no" lib.a(lib.so.V) shared, rtl:no, for executables + # "aix,yes" lib.so shared, rtl:yes, for executables + # lib.a static archive + # "both,no" lib.so.V(shr.o) shared, rtl:yes + # lib.a(lib.so.V) shared, rtl:no, for executables + # "both,yes" lib.so.V(shr.o) shared, rtl:yes, for executables + # lib.a(lib.so.V) shared, rtl:no + # "svr4,*" lib.so.V(shr.o) shared, rtl:yes, for executables + # lib.a static archive + case $host_os in aix4.[23]|aix4.[23].*|aix[5-9]*) + for ld_flag in $LDFLAGS; do + if (test x-brtl = "x$ld_flag" || test x-Wl,-brtl = "x$ld_flag"); then + aix_use_runtimelinking=yes + break + fi + done + if test svr4,no = "$with_aix_soname,$aix_use_runtimelinking"; then + # With aix-soname=svr4, we create the lib.so.V shared archives only, + # so we don't have lib.a shared libs to link our executables. + # We have to force runtime linking in this case. + aix_use_runtimelinking=yes + LDFLAGS="$LDFLAGS -Wl,-brtl" + fi + ;; + esac + + exp_sym_flag='-bexport' + no_entry_flag='-bnoentry' + fi + + # When large executables or shared objects are built, AIX ld can + # have problems creating the table of contents. If linking a library + # or program results in "error TOC overflow" add -mminimal-toc to + # CXXFLAGS/CFLAGS for g++/gcc. In the cases where that is not + # enough to fix the problem, add -Wl,-bbigtoc to LDFLAGS. + + archive_cmds='' + hardcode_direct=yes + hardcode_direct_absolute=yes + hardcode_libdir_separator=':' + link_all_deplibs=yes + file_list_spec='$wl-f,' + case $with_aix_soname,$aix_use_runtimelinking in + aix,*) ;; # traditional, no import file + svr4,* | *,yes) # use import file + # The Import File defines what to hardcode. + hardcode_direct=no + hardcode_direct_absolute=no + ;; + esac + + if test yes = "$GCC"; then + case $host_os in aix4.[012]|aix4.[012].*) + # We only want to do this on AIX 4.2 and lower, the check + # below for broken collect2 doesn't work under 4.3+ + collect2name=`$CC -print-prog-name=collect2` + if test -f "$collect2name" && + strings "$collect2name" | $GREP resolve_lib_name >/dev/null + then + # We have reworked collect2 + : + else + # We have old collect2 + hardcode_direct=unsupported + # It fails to find uninstalled libraries when the uninstalled + # path is not listed in the libpath. Setting hardcode_minus_L + # to unsupported forces relinking + hardcode_minus_L=yes + hardcode_libdir_flag_spec='-L$libdir' + hardcode_libdir_separator= + fi + ;; + esac + shared_flag='-shared' + if test yes = "$aix_use_runtimelinking"; then + shared_flag="$shared_flag "'$wl-G' + fi + # Need to ensure runtime linking is disabled for the traditional + # shared library, or the linker may eventually find shared libraries + # /with/ Import File - we do not want to mix them. + shared_flag_aix='-shared' + shared_flag_svr4='-shared $wl-G' + else + # not using gcc + if test ia64 = "$host_cpu"; then + # VisualAge C++, Version 5.5 for AIX 5L for IA-64, Beta 3 Release + # chokes on -Wl,-G. The following line is correct: + shared_flag='-G' + else + if test yes = "$aix_use_runtimelinking"; then + shared_flag='$wl-G' + else + shared_flag='$wl-bM:SRE' + fi + shared_flag_aix='$wl-bM:SRE' + shared_flag_svr4='$wl-G' + fi + fi + + export_dynamic_flag_spec='$wl-bexpall' + # It seems that -bexpall does not export symbols beginning with + # underscore (_), so it is better to generate a list of symbols to export. + always_export_symbols=yes + if test aix,yes = "$with_aix_soname,$aix_use_runtimelinking"; then + # Warning - without using the other runtime loading flags (-brtl), + # -berok will link without error, but may produce a broken library. + allow_undefined_flag='-berok' + # Determine the default libpath from the value encoded in an + # empty executable. + if test set = "${lt_cv_aix_libpath+set}"; then + aix_libpath=$lt_cv_aix_libpath +else + if test ${lt_cv_aix_libpath_+y} +then : + printf %s "(cached) " >&6 +else $as_nop + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + + lt_aix_libpath_sed=' + /Import File Strings/,/^$/ { + /^0/ { + s/^0 *\([^ ]*\) *$/\1/ + p + } + }' + lt_cv_aix_libpath_=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` + # Check for a 64-bit object if we didn't find anything. + if test -z "$lt_cv_aix_libpath_"; then + lt_cv_aix_libpath_=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` + fi +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + if test -z "$lt_cv_aix_libpath_"; then + lt_cv_aix_libpath_=/usr/lib:/lib + fi + +fi + + aix_libpath=$lt_cv_aix_libpath_ +fi + + hardcode_libdir_flag_spec='$wl-blibpath:$libdir:'"$aix_libpath" + archive_expsym_cmds='$CC -o $output_objdir/$soname $libobjs $deplibs $wl'$no_entry_flag' $compiler_flags `if test -n "$allow_undefined_flag"; then func_echo_all "$wl$allow_undefined_flag"; else :; fi` $wl'$exp_sym_flag:\$export_symbols' '$shared_flag + else + if test ia64 = "$host_cpu"; then + hardcode_libdir_flag_spec='$wl-R $libdir:/usr/lib:/lib' + allow_undefined_flag="-z nodefs" + archive_expsym_cmds="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs '"\$wl$no_entry_flag"' $compiler_flags $wl$allow_undefined_flag '"\$wl$exp_sym_flag:\$export_symbols" + else + # Determine the default libpath from the value encoded in an + # empty executable. + if test set = "${lt_cv_aix_libpath+set}"; then + aix_libpath=$lt_cv_aix_libpath +else + if test ${lt_cv_aix_libpath_+y} +then : + printf %s "(cached) " >&6 +else $as_nop + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + + lt_aix_libpath_sed=' + /Import File Strings/,/^$/ { + /^0/ { + s/^0 *\([^ ]*\) *$/\1/ + p + } + }' + lt_cv_aix_libpath_=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` + # Check for a 64-bit object if we didn't find anything. + if test -z "$lt_cv_aix_libpath_"; then + lt_cv_aix_libpath_=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` + fi +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + if test -z "$lt_cv_aix_libpath_"; then + lt_cv_aix_libpath_=/usr/lib:/lib + fi + +fi + + aix_libpath=$lt_cv_aix_libpath_ +fi + + hardcode_libdir_flag_spec='$wl-blibpath:$libdir:'"$aix_libpath" + # Warning - without using the other run time loading flags, + # -berok will link without error, but may produce a broken library. + no_undefined_flag=' $wl-bernotok' + allow_undefined_flag=' $wl-berok' + if test yes = "$with_gnu_ld"; then + # We only use this code for GNU lds that support --whole-archive. + whole_archive_flag_spec='$wl--whole-archive$convenience $wl--no-whole-archive' + else + # Exported symbols can be pulled into shared objects from archives + whole_archive_flag_spec='$convenience' + fi + archive_cmds_need_lc=yes + archive_expsym_cmds='$RM -r $output_objdir/$realname.d~$MKDIR $output_objdir/$realname.d' + # -brtl affects multiple linker settings, -berok does not and is overridden later + compiler_flags_filtered='`func_echo_all "$compiler_flags " | $SED -e "s%-brtl\\([, ]\\)%-berok\\1%g"`' + if test svr4 != "$with_aix_soname"; then + # This is similar to how AIX traditionally builds its shared libraries. + archive_expsym_cmds="$archive_expsym_cmds"'~$CC '$shared_flag_aix' -o $output_objdir/$realname.d/$soname $libobjs $deplibs $wl-bnoentry '$compiler_flags_filtered'$wl-bE:$export_symbols$allow_undefined_flag~$AR $AR_FLAGS $output_objdir/$libname$release.a $output_objdir/$realname.d/$soname' + fi + if test aix != "$with_aix_soname"; then + archive_expsym_cmds="$archive_expsym_cmds"'~$CC '$shared_flag_svr4' -o $output_objdir/$realname.d/$shared_archive_member_spec.o $libobjs $deplibs $wl-bnoentry '$compiler_flags_filtered'$wl-bE:$export_symbols$allow_undefined_flag~$STRIP -e $output_objdir/$realname.d/$shared_archive_member_spec.o~( func_echo_all "#! $soname($shared_archive_member_spec.o)"; if test shr_64 = "$shared_archive_member_spec"; then func_echo_all "# 64"; else func_echo_all "# 32"; fi; cat $export_symbols ) > $output_objdir/$realname.d/$shared_archive_member_spec.imp~$AR $AR_FLAGS $output_objdir/$soname $output_objdir/$realname.d/$shared_archive_member_spec.o $output_objdir/$realname.d/$shared_archive_member_spec.imp' + else + # used by -dlpreopen to get the symbols + archive_expsym_cmds="$archive_expsym_cmds"'~$MV $output_objdir/$realname.d/$soname $output_objdir' + fi + archive_expsym_cmds="$archive_expsym_cmds"'~$RM -r $output_objdir/$realname.d' + fi + fi + ;; + + amigaos*) + case $host_cpu in + powerpc) + # see comment about AmigaOS4 .so support + archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + archive_expsym_cmds='' + ;; + m68k) + archive_cmds='$RM $output_objdir/a2ixlibrary.data~$ECHO "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$ECHO "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$ECHO "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$ECHO "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR $AR_FLAGS $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)' + hardcode_libdir_flag_spec='-L$libdir' + hardcode_minus_L=yes + ;; + esac + ;; + + bsdi[45]*) + export_dynamic_flag_spec=-rdynamic + ;; + + cygwin* | mingw* | pw32* | cegcc*) + # When not using gcc, we currently assume that we are using + # Microsoft Visual C++ or Intel C++ Compiler. + # hardcode_libdir_flag_spec is actually meaningless, as there is + # no search path for DLLs. + case $cc_basename in + cl* | icl*) + # Native MSVC or ICC + hardcode_libdir_flag_spec=' ' + allow_undefined_flag=unsupported + always_export_symbols=yes + file_list_spec='@' + # Tell ltmain to make .lib files, not .a files. + libext=lib + # Tell ltmain to make .dll files, not .so files. + shrext_cmds=.dll + # FIXME: Setting linknames here is a bad hack. + archive_cmds='$CC -o $output_objdir/$soname $libobjs $compiler_flags $deplibs -Wl,-DLL,-IMPLIB:"$tool_output_objdir$libname.dll.lib"~linknames=' + archive_expsym_cmds='if test DEF = "`$SED -n -e '\''s/^[ ]*//'\'' -e '\''/^\(;.*\)*$/d'\'' -e '\''s/^\(EXPORTS\|LIBRARY\)\([ ].*\)*$/DEF/p'\'' -e q $export_symbols`" ; then + cp "$export_symbols" "$output_objdir/$soname.def"; + echo "$tool_output_objdir$soname.def" > "$output_objdir/$soname.exp"; + else + $SED -e '\''s/^/-link -EXPORT:/'\'' < $export_symbols > $output_objdir/$soname.exp; + fi~ + $CC -o $tool_output_objdir$soname $libobjs $compiler_flags $deplibs "@$tool_output_objdir$soname.exp" -Wl,-DLL,-IMPLIB:"$tool_output_objdir$libname.dll.lib"~ + linknames=' + # The linker will not automatically build a static lib if we build a DLL. + # _LT_TAGVAR(old_archive_from_new_cmds, )='true' + enable_shared_with_static_runtimes=yes + exclude_expsyms='_NULL_IMPORT_DESCRIPTOR|_IMPORT_DESCRIPTOR_.*' + export_symbols_cmds='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[BCDGRS][ ]/s/.*[ ]\([^ ]*\)/\1,DATA/'\'' | $SED -e '\''/^[AITW][ ]/s/.*[ ]//'\'' | sort | uniq > $export_symbols' + # Don't use ranlib + old_postinstall_cmds='chmod 644 $oldlib' + postlink_cmds='lt_outputfile="@OUTPUT@"~ + lt_tool_outputfile="@TOOL_OUTPUT@"~ + case $lt_outputfile in + *.exe|*.EXE) ;; + *) + lt_outputfile=$lt_outputfile.exe + lt_tool_outputfile=$lt_tool_outputfile.exe + ;; + esac~ + if test : != "$MANIFEST_TOOL" && test -f "$lt_outputfile.manifest"; then + $MANIFEST_TOOL -manifest "$lt_tool_outputfile.manifest" -outputresource:"$lt_tool_outputfile" || exit 1; + $RM "$lt_outputfile.manifest"; + fi' + ;; + *) + # Assume MSVC and ICC wrapper + hardcode_libdir_flag_spec=' ' + allow_undefined_flag=unsupported + # Tell ltmain to make .lib files, not .a files. + libext=lib + # Tell ltmain to make .dll files, not .so files. + shrext_cmds=.dll + # FIXME: Setting linknames here is a bad hack. + archive_cmds='$CC -o $lib $libobjs $compiler_flags `func_echo_all "$deplibs" | $SED '\''s/ -lc$//'\''` -link -dll~linknames=' + # The linker will automatically build a .lib file if we build a DLL. + old_archive_from_new_cmds='true' + # FIXME: Should let the user specify the lib program. + old_archive_cmds='lib -OUT:$oldlib$oldobjs$old_deplibs' + enable_shared_with_static_runtimes=yes + ;; + esac + ;; + + darwin* | rhapsody*) + + + archive_cmds_need_lc=no + hardcode_direct=no + hardcode_automatic=yes + hardcode_shlibpath_var=unsupported + if test yes = "$lt_cv_ld_force_load"; then + whole_archive_flag_spec='`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience $wl-force_load,$conv\"; done; func_echo_all \"$new_convenience\"`' + + else + whole_archive_flag_spec='' + fi + link_all_deplibs=yes + allow_undefined_flag=$_lt_dar_allow_undefined + case $cc_basename in + ifort*|nagfor*) _lt_dar_can_shared=yes ;; + *) _lt_dar_can_shared=$GCC ;; + esac + if test yes = "$_lt_dar_can_shared"; then + output_verbose_link_cmd=func_echo_all + archive_cmds="\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring $_lt_dar_single_mod$_lt_dsymutil" + module_cmds="\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags$_lt_dsymutil" + archive_expsym_cmds="$SED 's|^|_|' < \$export_symbols > \$output_objdir/\$libname-symbols.expsym~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring $_lt_dar_single_mod$_lt_dar_export_syms$_lt_dsymutil" + module_expsym_cmds="$SED -e 's|^|_|' < \$export_symbols > \$output_objdir/\$libname-symbols.expsym~\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags$_lt_dar_export_syms$_lt_dsymutil" + + else + ld_shlibs=no + fi + + ;; + + dgux*) + archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + hardcode_libdir_flag_spec='-L$libdir' + hardcode_shlibpath_var=no + ;; + + # FreeBSD 2.2.[012] allows us to include c++rt0.o to get C++ constructor + # support. Future versions do this automatically, but an explicit c++rt0.o + # does not break anything, and helps significantly (at the cost of a little + # extra space). + freebsd2.2*) + archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags /usr/lib/c++rt0.o' + hardcode_libdir_flag_spec='-R$libdir' + hardcode_direct=yes + hardcode_shlibpath_var=no + ;; + + # Unfortunately, older versions of FreeBSD 2 do not have this feature. + freebsd2.*) + archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' + hardcode_direct=yes + hardcode_minus_L=yes + hardcode_shlibpath_var=no + ;; + + # FreeBSD 3 and greater uses gcc -shared to do shared libraries. + freebsd* | dragonfly* | midnightbsd*) + archive_cmds='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' + hardcode_libdir_flag_spec='-R$libdir' + hardcode_direct=yes + hardcode_shlibpath_var=no + ;; + + hpux9*) + if test yes = "$GCC"; then + archive_cmds='$RM $output_objdir/$soname~$CC -shared $pic_flag $wl+b $wl$install_libdir -o $output_objdir/$soname $libobjs $deplibs $compiler_flags~test "x$output_objdir/$soname" = "x$lib" || mv $output_objdir/$soname $lib' + else + archive_cmds='$RM $output_objdir/$soname~$LD -b +b $install_libdir -o $output_objdir/$soname $libobjs $deplibs $linker_flags~test "x$output_objdir/$soname" = "x$lib" || mv $output_objdir/$soname $lib' + fi + hardcode_libdir_flag_spec='$wl+b $wl$libdir' + hardcode_libdir_separator=: + hardcode_direct=yes + + # hardcode_minus_L: Not really in the search PATH, + # but as the default location of the library. + hardcode_minus_L=yes + export_dynamic_flag_spec='$wl-E' + ;; + + hpux10*) + if test yes,no = "$GCC,$with_gnu_ld"; then + archive_cmds='$CC -shared $pic_flag $wl+h $wl$soname $wl+b $wl$install_libdir -o $lib $libobjs $deplibs $compiler_flags' + else + archive_cmds='$LD -b +h $soname +b $install_libdir -o $lib $libobjs $deplibs $linker_flags' + fi + if test no = "$with_gnu_ld"; then + hardcode_libdir_flag_spec='$wl+b $wl$libdir' + hardcode_libdir_separator=: + hardcode_direct=yes + hardcode_direct_absolute=yes + export_dynamic_flag_spec='$wl-E' + # hardcode_minus_L: Not really in the search PATH, + # but as the default location of the library. + hardcode_minus_L=yes + fi + ;; + + hpux11*) + if test yes,no = "$GCC,$with_gnu_ld"; then + case $host_cpu in + hppa*64*) + archive_cmds='$CC -shared $wl+h $wl$soname -o $lib $libobjs $deplibs $compiler_flags' + ;; + ia64*) + archive_cmds='$CC -shared $pic_flag $wl+h $wl$soname $wl+nodefaultrpath -o $lib $libobjs $deplibs $compiler_flags' + ;; + *) + archive_cmds='$CC -shared $pic_flag $wl+h $wl$soname $wl+b $wl$install_libdir -o $lib $libobjs $deplibs $compiler_flags' + ;; + esac + else + case $host_cpu in + hppa*64*) + archive_cmds='$CC -b $wl+h $wl$soname -o $lib $libobjs $deplibs $compiler_flags' + ;; + ia64*) + archive_cmds='$CC -b $wl+h $wl$soname $wl+nodefaultrpath -o $lib $libobjs $deplibs $compiler_flags' + ;; + *) + + # Older versions of the 11.00 compiler do not understand -b yet + # (HP92453-01 A.11.01.20 doesn't, HP92453-01 B.11.X.35175-35176.GP does) + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if $CC understands -b" >&5 +printf %s "checking if $CC understands -b... " >&6; } +if test ${lt_cv_prog_compiler__b+y} +then : + printf %s "(cached) " >&6 +else $as_nop + lt_cv_prog_compiler__b=no + save_LDFLAGS=$LDFLAGS + LDFLAGS="$LDFLAGS -b" + echo "$lt_simple_link_test_code" > conftest.$ac_ext + if (eval $ac_link 2>conftest.err) && test -s conftest$ac_exeext; then + # The linker can only warn and ignore the option if not recognized + # So say no if there are warnings + if test -s conftest.err; then + # Append any errors to the config.log. + cat conftest.err 1>&5 + $ECHO "$_lt_linker_boilerplate" | $SED '/^$/d' > conftest.exp + $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 + if diff conftest.exp conftest.er2 >/dev/null; then + lt_cv_prog_compiler__b=yes + fi + else + lt_cv_prog_compiler__b=yes + fi + fi + $RM -r conftest* + LDFLAGS=$save_LDFLAGS + +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler__b" >&5 +printf "%s\n" "$lt_cv_prog_compiler__b" >&6; } + +if test yes = "$lt_cv_prog_compiler__b"; then + archive_cmds='$CC -b $wl+h $wl$soname $wl+b $wl$install_libdir -o $lib $libobjs $deplibs $compiler_flags' +else + archive_cmds='$LD -b +h $soname +b $install_libdir -o $lib $libobjs $deplibs $linker_flags' +fi + + ;; + esac + fi + if test no = "$with_gnu_ld"; then + hardcode_libdir_flag_spec='$wl+b $wl$libdir' + hardcode_libdir_separator=: + + case $host_cpu in + hppa*64*|ia64*) + hardcode_direct=no + hardcode_shlibpath_var=no + ;; + *) + hardcode_direct=yes + hardcode_direct_absolute=yes + export_dynamic_flag_spec='$wl-E' + + # hardcode_minus_L: Not really in the search PATH, + # but as the default location of the library. + hardcode_minus_L=yes + ;; + esac + fi + ;; + + irix5* | irix6* | nonstopux*) + if test yes = "$GCC"; then + archive_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` $wl-update_registry $wl$output_objdir/so_locations -o $lib' + # Try to use the -exported_symbol ld option, if it does not + # work, assume that -exports_file does not work either and + # implicitly export all symbols. + # This should be the same for all languages, so no per-tag cache variable. + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the $host_os linker accepts -exported_symbol" >&5 +printf %s "checking whether the $host_os linker accepts -exported_symbol... " >&6; } +if test ${lt_cv_irix_exported_symbol+y} +then : + printf %s "(cached) " >&6 +else $as_nop + save_LDFLAGS=$LDFLAGS + LDFLAGS="$LDFLAGS -shared $wl-exported_symbol ${wl}foo $wl-update_registry $wl/dev/null" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +int foo (void) { return 0; } +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + lt_cv_irix_exported_symbol=yes +else $as_nop + lt_cv_irix_exported_symbol=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + LDFLAGS=$save_LDFLAGS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_irix_exported_symbol" >&5 +printf "%s\n" "$lt_cv_irix_exported_symbol" >&6; } + if test yes = "$lt_cv_irix_exported_symbol"; then + archive_expsym_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` $wl-update_registry $wl$output_objdir/so_locations $wl-exports_file $wl$export_symbols -o $lib' + fi + link_all_deplibs=no + else + archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib' + archive_expsym_cmds='$CC -shared $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry $output_objdir/so_locations -exports_file $export_symbols -o $lib' + fi + archive_cmds_need_lc='no' + hardcode_libdir_flag_spec='$wl-rpath $wl$libdir' + hardcode_libdir_separator=: + inherit_rpath=yes + link_all_deplibs=yes + ;; + + linux*) + case $cc_basename in + tcc*) + # Fabrice Bellard et al's Tiny C Compiler + ld_shlibs=yes + archive_cmds='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' + hardcode_libdir_flag_spec='$wl-rpath $wl$libdir' + ;; + esac + ;; + + netbsd* | netbsdelf*-gnu) + if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then + archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' # a.out + else + archive_cmds='$LD -shared -o $lib $libobjs $deplibs $linker_flags' # ELF + fi + hardcode_libdir_flag_spec='-R$libdir' + hardcode_direct=yes + hardcode_shlibpath_var=no + ;; + + newsos6) + archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + hardcode_direct=yes + hardcode_libdir_flag_spec='$wl-rpath $wl$libdir' + hardcode_libdir_separator=: + hardcode_shlibpath_var=no + ;; + + *nto* | *qnx*) + ;; + + openbsd* | bitrig*) + if test -f /usr/libexec/ld.so; then + hardcode_direct=yes + hardcode_shlibpath_var=no + hardcode_direct_absolute=yes + if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`"; then + archive_cmds='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' + archive_expsym_cmds='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags $wl-retain-symbols-file,$export_symbols' + hardcode_libdir_flag_spec='$wl-rpath,$libdir' + export_dynamic_flag_spec='$wl-E' + else + archive_cmds='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' + hardcode_libdir_flag_spec='$wl-rpath,$libdir' + fi + else + ld_shlibs=no + fi + ;; + + os2*) + hardcode_libdir_flag_spec='-L$libdir' + hardcode_minus_L=yes + allow_undefined_flag=unsupported + shrext_cmds=.dll + archive_cmds='$ECHO "LIBRARY ${soname%$shared_ext} INITINSTANCE TERMINSTANCE" > $output_objdir/$libname.def~ + $ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~ + $ECHO "DATA MULTIPLE NONSHARED" >> $output_objdir/$libname.def~ + $ECHO EXPORTS >> $output_objdir/$libname.def~ + emxexp $libobjs | $SED /"_DLL_InitTerm"/d >> $output_objdir/$libname.def~ + $CC -Zdll -Zcrtdll -o $output_objdir/$soname $libobjs $deplibs $compiler_flags $output_objdir/$libname.def~ + emximp -o $lib $output_objdir/$libname.def' + archive_expsym_cmds='$ECHO "LIBRARY ${soname%$shared_ext} INITINSTANCE TERMINSTANCE" > $output_objdir/$libname.def~ + $ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~ + $ECHO "DATA MULTIPLE NONSHARED" >> $output_objdir/$libname.def~ + $ECHO EXPORTS >> $output_objdir/$libname.def~ + prefix_cmds="$SED"~ + if test EXPORTS = "`$SED 1q $export_symbols`"; then + prefix_cmds="$prefix_cmds -e 1d"; + fi~ + prefix_cmds="$prefix_cmds -e \"s/^\(.*\)$/_\1/g\""~ + cat $export_symbols | $prefix_cmds >> $output_objdir/$libname.def~ + $CC -Zdll -Zcrtdll -o $output_objdir/$soname $libobjs $deplibs $compiler_flags $output_objdir/$libname.def~ + emximp -o $lib $output_objdir/$libname.def' + old_archive_From_new_cmds='emximp -o $output_objdir/${libname}_dll.a $output_objdir/$libname.def' + enable_shared_with_static_runtimes=yes + file_list_spec='@' + ;; + + osf3*) + if test yes = "$GCC"; then + allow_undefined_flag=' $wl-expect_unresolved $wl\*' + archive_cmds='$CC -shared$allow_undefined_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` $wl-update_registry $wl$output_objdir/so_locations -o $lib' + else + allow_undefined_flag=' -expect_unresolved \*' + archive_cmds='$CC -shared$allow_undefined_flag $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib' + fi + archive_cmds_need_lc='no' + hardcode_libdir_flag_spec='$wl-rpath $wl$libdir' + hardcode_libdir_separator=: + ;; + + osf4* | osf5*) # as osf3* with the addition of -msym flag + if test yes = "$GCC"; then + allow_undefined_flag=' $wl-expect_unresolved $wl\*' + archive_cmds='$CC -shared$allow_undefined_flag $pic_flag $libobjs $deplibs $compiler_flags $wl-msym $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` $wl-update_registry $wl$output_objdir/so_locations -o $lib' + hardcode_libdir_flag_spec='$wl-rpath $wl$libdir' + else + allow_undefined_flag=' -expect_unresolved \*' + archive_cmds='$CC -shared$allow_undefined_flag $libobjs $deplibs $compiler_flags -msym -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib' + archive_expsym_cmds='for i in `cat $export_symbols`; do printf "%s %s\\n" -exported_symbol "\$i" >> $lib.exp; done; printf "%s\\n" "-hidden">> $lib.exp~ + $CC -shared$allow_undefined_flag $wl-input $wl$lib.exp $compiler_flags $libobjs $deplibs -soname $soname `test -n "$verstring" && $ECHO "-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib~$RM $lib.exp' + + # Both c and cxx compiler support -rpath directly + hardcode_libdir_flag_spec='-rpath $libdir' + fi + archive_cmds_need_lc='no' + hardcode_libdir_separator=: + ;; + + solaris*) + no_undefined_flag=' -z defs' + if test yes = "$GCC"; then + wlarc='$wl' + archive_cmds='$CC -shared $pic_flag $wl-z ${wl}text $wl-h $wl$soname -o $lib $libobjs $deplibs $compiler_flags' + archive_expsym_cmds='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ + $CC -shared $pic_flag $wl-z ${wl}text $wl-M $wl$lib.exp $wl-h $wl$soname -o $lib $libobjs $deplibs $compiler_flags~$RM $lib.exp' + else + case `$CC -V 2>&1` in + *"Compilers 5.0"*) + wlarc='' + archive_cmds='$LD -G$allow_undefined_flag -h $soname -o $lib $libobjs $deplibs $linker_flags' + archive_expsym_cmds='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ + $LD -G$allow_undefined_flag -M $lib.exp -h $soname -o $lib $libobjs $deplibs $linker_flags~$RM $lib.exp' + ;; + *) + wlarc='$wl' + archive_cmds='$CC -G$allow_undefined_flag -h $soname -o $lib $libobjs $deplibs $compiler_flags' + archive_expsym_cmds='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ + $CC -G$allow_undefined_flag -M $lib.exp -h $soname -o $lib $libobjs $deplibs $compiler_flags~$RM $lib.exp' + ;; + esac + fi + hardcode_libdir_flag_spec='-R$libdir' + hardcode_shlibpath_var=no + case $host_os in + solaris2.[0-5] | solaris2.[0-5].*) ;; + *) + # The compiler driver will combine and reorder linker options, + # but understands '-z linker_flag'. GCC discards it without '$wl', + # but is careful enough not to reorder. + # Supported since Solaris 2.6 (maybe 2.5.1?) + if test yes = "$GCC"; then + whole_archive_flag_spec='$wl-z ${wl}allextract$convenience $wl-z ${wl}defaultextract' + else + whole_archive_flag_spec='-z allextract$convenience -z defaultextract' + fi + ;; + esac + link_all_deplibs=yes + ;; + + sunos4*) + if test sequent = "$host_vendor"; then + # Use $CC to link under sequent, because it throws in some extra .o + # files that make .init and .fini sections work. + archive_cmds='$CC -G $wl-h $soname -o $lib $libobjs $deplibs $compiler_flags' + else + archive_cmds='$LD -assert pure-text -Bstatic -o $lib $libobjs $deplibs $linker_flags' + fi + hardcode_libdir_flag_spec='-L$libdir' + hardcode_direct=yes + hardcode_minus_L=yes + hardcode_shlibpath_var=no + ;; + + sysv4) + case $host_vendor in + sni) + archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + hardcode_direct=yes # is this really true??? + ;; + siemens) + ## LD is ld it makes a PLAMLIB + ## CC just makes a GrossModule. + archive_cmds='$LD -G -o $lib $libobjs $deplibs $linker_flags' + reload_cmds='$CC -r -o $output$reload_objs' + hardcode_direct=no + ;; + motorola) + archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + hardcode_direct=no #Motorola manual says yes, but my tests say they lie + ;; + esac + runpath_var='LD_RUN_PATH' + hardcode_shlibpath_var=no + ;; + + sysv4.3*) + archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + hardcode_shlibpath_var=no + export_dynamic_flag_spec='-Bexport' + ;; + + sysv4*MP*) + if test -d /usr/nec; then + archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + hardcode_shlibpath_var=no + runpath_var=LD_RUN_PATH + hardcode_runpath_var=yes + ld_shlibs=yes + fi + ;; + + sysv4*uw2* | sysv5OpenUNIX* | sysv5UnixWare7.[01].[10]* | unixware7* | sco3.2v5.0.[024]*) + no_undefined_flag='$wl-z,text' + archive_cmds_need_lc=no + hardcode_shlibpath_var=no + runpath_var='LD_RUN_PATH' + + if test yes = "$GCC"; then + archive_cmds='$CC -shared $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + archive_expsym_cmds='$CC -shared $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + else + archive_cmds='$CC -G $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + archive_expsym_cmds='$CC -G $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + fi + ;; + + sysv5* | sco3.2v5* | sco5v6*) + # Note: We CANNOT use -z defs as we might desire, because we do not + # link with -lc, and that would cause any symbols used from libc to + # always be unresolved, which means just about no library would + # ever link correctly. If we're not using GNU ld we use -z text + # though, which does catch some bad symbols but isn't as heavy-handed + # as -z defs. + no_undefined_flag='$wl-z,text' + allow_undefined_flag='$wl-z,nodefs' + archive_cmds_need_lc=no + hardcode_shlibpath_var=no + hardcode_libdir_flag_spec='$wl-R,$libdir' + hardcode_libdir_separator=':' + link_all_deplibs=yes + export_dynamic_flag_spec='$wl-Bexport' + runpath_var='LD_RUN_PATH' + + if test yes = "$GCC"; then + archive_cmds='$CC -shared $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + archive_expsym_cmds='$CC -shared $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + else + archive_cmds='$CC -G $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + archive_expsym_cmds='$CC -G $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + fi + ;; + + uts4*) + archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + hardcode_libdir_flag_spec='-L$libdir' + hardcode_shlibpath_var=no + ;; + + *) + ld_shlibs=no + ;; + esac + + if test sni = "$host_vendor"; then + case $host in + sysv4 | sysv4.2uw2* | sysv4.3* | sysv5*) + export_dynamic_flag_spec='$wl-Blargedynsym' + ;; + esac + fi + fi + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ld_shlibs" >&5 +printf "%s\n" "$ld_shlibs" >&6; } +test no = "$ld_shlibs" && can_build_shared=no + +with_gnu_ld=$with_gnu_ld + + + + + + + + + + + + + + + +# +# Do we need to explicitly link libc? +# +case "x$archive_cmds_need_lc" in +x|xyes) + # Assume -lc should be added + archive_cmds_need_lc=yes + + if test yes,yes = "$GCC,$enable_shared"; then + case $archive_cmds in + *'~'*) + # FIXME: we may have to deal with multi-command sequences. + ;; + '$CC '*) + # Test whether the compiler implicitly links with -lc since on some + # systems, -lgcc has to come before -lc. If gcc already passes -lc + # to ld, don't add -lc before -lgcc. + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether -lc should be explicitly linked in" >&5 +printf %s "checking whether -lc should be explicitly linked in... " >&6; } +if test ${lt_cv_archive_cmds_need_lc+y} +then : + printf %s "(cached) " >&6 +else $as_nop + $RM conftest* + echo "$lt_simple_compile_test_code" > conftest.$ac_ext + + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } 2>conftest.err; then + soname=conftest + lib=conftest + libobjs=conftest.$ac_objext + deplibs= + wl=$lt_prog_compiler_wl + pic_flag=$lt_prog_compiler_pic + compiler_flags=-v + linker_flags=-v + verstring= + output_objdir=. + libname=conftest + lt_save_allow_undefined_flag=$allow_undefined_flag + allow_undefined_flag= + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$archive_cmds 2\>\&1 \| $GREP \" -lc \" \>/dev/null 2\>\&1\""; } >&5 + (eval $archive_cmds 2\>\&1 \| $GREP \" -lc \" \>/dev/null 2\>\&1) 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } + then + lt_cv_archive_cmds_need_lc=no + else + lt_cv_archive_cmds_need_lc=yes + fi + allow_undefined_flag=$lt_save_allow_undefined_flag + else + cat conftest.err 1>&5 + fi + $RM conftest* + +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_archive_cmds_need_lc" >&5 +printf "%s\n" "$lt_cv_archive_cmds_need_lc" >&6; } + archive_cmds_need_lc=$lt_cv_archive_cmds_need_lc + ;; + esac + fi + ;; +esac + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking dynamic linker characteristics" >&5 +printf %s "checking dynamic linker characteristics... " >&6; } + +if test yes = "$GCC"; then + case $host_os in + darwin*) lt_awk_arg='/^libraries:/,/LR/' ;; + *) lt_awk_arg='/^libraries:/' ;; + esac + case $host_os in + mingw* | cegcc*) lt_sed_strip_eq='s|=\([A-Za-z]:\)|\1|g' ;; + *) lt_sed_strip_eq='s|=/|/|g' ;; + esac + lt_search_path_spec=`$CC -print-search-dirs | awk $lt_awk_arg | $SED -e "s/^libraries://" -e $lt_sed_strip_eq` + case $lt_search_path_spec in + *\;*) + # if the path contains ";" then we assume it to be the separator + # otherwise default to the standard path separator (i.e. ":") - it is + # assumed that no part of a normal pathname contains ";" but that should + # okay in the real world where ";" in dirpaths is itself problematic. + lt_search_path_spec=`$ECHO "$lt_search_path_spec" | $SED 's/;/ /g'` + ;; + *) + lt_search_path_spec=`$ECHO "$lt_search_path_spec" | $SED "s/$PATH_SEPARATOR/ /g"` + ;; + esac + # Ok, now we have the path, separated by spaces, we can step through it + # and add multilib dir if necessary... + lt_tmp_lt_search_path_spec= + lt_multi_os_dir=/`$CC $CPPFLAGS $CFLAGS $LDFLAGS -print-multi-os-directory 2>/dev/null` + # ...but if some path component already ends with the multilib dir we assume + # that all is fine and trust -print-search-dirs as is (GCC 4.2? or newer). + case "$lt_multi_os_dir; $lt_search_path_spec " in + "/; "* | "/.; "* | "/./; "* | *"$lt_multi_os_dir "* | *"$lt_multi_os_dir/ "*) + lt_multi_os_dir= + ;; + esac + for lt_sys_path in $lt_search_path_spec; do + if test -d "$lt_sys_path$lt_multi_os_dir"; then + lt_tmp_lt_search_path_spec="$lt_tmp_lt_search_path_spec $lt_sys_path$lt_multi_os_dir" + elif test -n "$lt_multi_os_dir"; then + test -d "$lt_sys_path" && \ + lt_tmp_lt_search_path_spec="$lt_tmp_lt_search_path_spec $lt_sys_path" + fi + done + lt_search_path_spec=`$ECHO "$lt_tmp_lt_search_path_spec" | awk ' +BEGIN {RS = " "; FS = "/|\n";} { + lt_foo = ""; + lt_count = 0; + for (lt_i = NF; lt_i > 0; lt_i--) { + if ($lt_i != "" && $lt_i != ".") { + if ($lt_i == "..") { + lt_count++; + } else { + if (lt_count == 0) { + lt_foo = "/" $lt_i lt_foo; + } else { + lt_count--; + } + } + } + } + if (lt_foo != "") { lt_freq[lt_foo]++; } + if (lt_freq[lt_foo] == 1) { print lt_foo; } +}'` + # AWK program above erroneously prepends '/' to C:/dos/paths + # for these hosts. + case $host_os in + mingw* | cegcc*) lt_search_path_spec=`$ECHO "$lt_search_path_spec" |\ + $SED 's|/\([A-Za-z]:\)|\1|g'` ;; + esac + sys_lib_search_path_spec=`$ECHO "$lt_search_path_spec" | $lt_NL2SP` +else + sys_lib_search_path_spec="/lib /usr/lib /usr/local/lib" +fi +library_names_spec= +libname_spec='lib$name' +soname_spec= +shrext_cmds=.so +postinstall_cmds= +postuninstall_cmds= +finish_cmds= +finish_eval= +shlibpath_var= +shlibpath_overrides_runpath=unknown +version_type=none +dynamic_linker="$host_os ld.so" +sys_lib_dlsearch_path_spec="/lib /usr/lib" +need_lib_prefix=unknown +hardcode_into_libs=no + +# when you set need_version to no, make sure it does not cause -set_version +# flags to be left without arguments +need_version=unknown + + + +case $host_os in +aix3*) + version_type=linux # correct to gnu/linux during the next big refactor + library_names_spec='$libname$release$shared_ext$versuffix $libname.a' + shlibpath_var=LIBPATH + + # AIX 3 has no versioning support, so we append a major version to the name. + soname_spec='$libname$release$shared_ext$major' + ;; + +aix[4-9]*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + hardcode_into_libs=yes + if test ia64 = "$host_cpu"; then + # AIX 5 supports IA64 + library_names_spec='$libname$release$shared_ext$major $libname$release$shared_ext$versuffix $libname$shared_ext' + shlibpath_var=LD_LIBRARY_PATH + else + # With GCC up to 2.95.x, collect2 would create an import file + # for dependence libraries. The import file would start with + # the line '#! .'. This would cause the generated library to + # depend on '.', always an invalid library. This was fixed in + # development snapshots of GCC prior to 3.0. + case $host_os in + aix4 | aix4.[01] | aix4.[01].*) + if { echo '#if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 97)' + echo ' yes ' + echo '#endif'; } | $CC -E - | $GREP yes > /dev/null; then + : + else + can_build_shared=no + fi + ;; + esac + # Using Import Files as archive members, it is possible to support + # filename-based versioning of shared library archives on AIX. While + # this would work for both with and without runtime linking, it will + # prevent static linking of such archives. So we do filename-based + # shared library versioning with .so extension only, which is used + # when both runtime linking and shared linking is enabled. + # Unfortunately, runtime linking may impact performance, so we do + # not want this to be the default eventually. Also, we use the + # versioned .so libs for executables only if there is the -brtl + # linker flag in LDFLAGS as well, or --with-aix-soname=svr4 only. + # To allow for filename-based versioning support, we need to create + # libNAME.so.V as an archive file, containing: + # *) an Import File, referring to the versioned filename of the + # archive as well as the shared archive member, telling the + # bitwidth (32 or 64) of that shared object, and providing the + # list of exported symbols of that shared object, eventually + # decorated with the 'weak' keyword + # *) the shared object with the F_LOADONLY flag set, to really avoid + # it being seen by the linker. + # At run time we better use the real file rather than another symlink, + # but for link time we create the symlink libNAME.so -> libNAME.so.V + + case $with_aix_soname,$aix_use_runtimelinking in + # AIX (on Power*) has no versioning support, so currently we cannot hardcode correct + # soname into executable. Probably we can add versioning support to + # collect2, so additional links can be useful in future. + aix,yes) # traditional libtool + dynamic_linker='AIX unversionable lib.so' + # If using run time linking (on AIX 4.2 or later) use lib.so + # instead of lib.a to let people know that these are not + # typical AIX shared libraries. + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + ;; + aix,no) # traditional AIX only + dynamic_linker='AIX lib.a(lib.so.V)' + # We preserve .a as extension for shared libraries through AIX4.2 + # and later when we are not doing run time linking. + library_names_spec='$libname$release.a $libname.a' + soname_spec='$libname$release$shared_ext$major' + ;; + svr4,*) # full svr4 only + dynamic_linker="AIX lib.so.V($shared_archive_member_spec.o)" + library_names_spec='$libname$release$shared_ext$major $libname$shared_ext' + # We do not specify a path in Import Files, so LIBPATH fires. + shlibpath_overrides_runpath=yes + ;; + *,yes) # both, prefer svr4 + dynamic_linker="AIX lib.so.V($shared_archive_member_spec.o), lib.a(lib.so.V)" + library_names_spec='$libname$release$shared_ext$major $libname$shared_ext' + # unpreferred sharedlib libNAME.a needs extra handling + postinstall_cmds='test -n "$linkname" || linkname="$realname"~func_stripname "" ".so" "$linkname"~$install_shared_prog "$dir/$func_stripname_result.$libext" "$destdir/$func_stripname_result.$libext"~test -z "$tstripme" || test -z "$striplib" || $striplib "$destdir/$func_stripname_result.$libext"' + postuninstall_cmds='for n in $library_names $old_library; do :; done~func_stripname "" ".so" "$n"~test "$func_stripname_result" = "$n" || func_append rmfiles " $odir/$func_stripname_result.$libext"' + # We do not specify a path in Import Files, so LIBPATH fires. + shlibpath_overrides_runpath=yes + ;; + *,no) # both, prefer aix + dynamic_linker="AIX lib.a(lib.so.V), lib.so.V($shared_archive_member_spec.o)" + library_names_spec='$libname$release.a $libname.a' + soname_spec='$libname$release$shared_ext$major' + # unpreferred sharedlib libNAME.so.V and symlink libNAME.so need extra handling + postinstall_cmds='test -z "$dlname" || $install_shared_prog $dir/$dlname $destdir/$dlname~test -z "$tstripme" || test -z "$striplib" || $striplib $destdir/$dlname~test -n "$linkname" || linkname=$realname~func_stripname "" ".a" "$linkname"~(cd "$destdir" && $LN_S -f $dlname $func_stripname_result.so)' + postuninstall_cmds='test -z "$dlname" || func_append rmfiles " $odir/$dlname"~for n in $old_library $library_names; do :; done~func_stripname "" ".a" "$n"~func_append rmfiles " $odir/$func_stripname_result.so"' + ;; + esac + shlibpath_var=LIBPATH + fi + ;; + +amigaos*) + case $host_cpu in + powerpc) + # Since July 2007 AmigaOS4 officially supports .so libraries. + # When compiling the executable, add -use-dynld -Lsobjs: to the compileline. + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + ;; + m68k) + library_names_spec='$libname.ixlibrary $libname.a' + # Create ${libname}_ixlibrary.a entries in /sys/libs. + finish_eval='for lib in `ls $libdir/*.ixlibrary 2>/dev/null`; do libname=`func_echo_all "$lib" | $SED '\''s%^.*/\([^/]*\)\.ixlibrary$%\1%'\''`; $RM /sys/libs/${libname}_ixlibrary.a; $show "cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a"; cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a || exit 1; done' + ;; + esac + ;; + +beos*) + library_names_spec='$libname$shared_ext' + dynamic_linker="$host_os ld.so" + shlibpath_var=LIBRARY_PATH + ;; + +bsdi[45]*) + version_type=linux # correct to gnu/linux during the next big refactor + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + finish_cmds='PATH="\$PATH:/sbin" ldconfig $libdir' + shlibpath_var=LD_LIBRARY_PATH + sys_lib_search_path_spec="/shlib /usr/lib /usr/X11/lib /usr/contrib/lib /lib /usr/local/lib" + sys_lib_dlsearch_path_spec="/shlib /usr/lib /usr/local/lib" + # the default ld.so.conf also contains /usr/contrib/lib and + # /usr/X11R6/lib (/usr/X11 is a link to /usr/X11R6), but let us allow + # libtool to hard-code these into programs + ;; + +cygwin* | mingw* | pw32* | cegcc*) + version_type=windows + shrext_cmds=.dll + need_version=no + need_lib_prefix=no + + case $GCC,$cc_basename in + yes,*) + # gcc + library_names_spec='$libname.dll.a' + # DLL is installed to $(libdir)/../bin by postinstall_cmds + postinstall_cmds='base_file=`basename \$file`~ + dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\$base_file'\''i; echo \$dlname'\''`~ + dldir=$destdir/`dirname \$dlpath`~ + test -d \$dldir || mkdir -p \$dldir~ + $install_prog $dir/$dlname \$dldir/$dlname~ + chmod a+x \$dldir/$dlname~ + if test -n '\''$stripme'\'' && test -n '\''$striplib'\''; then + eval '\''$striplib \$dldir/$dlname'\'' || exit \$?; + fi' + postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~ + dlpath=$dir/\$dldll~ + $RM \$dlpath' + shlibpath_overrides_runpath=yes + + case $host_os in + cygwin*) + # Cygwin DLLs use 'cyg' prefix rather than 'lib' + soname_spec='`echo $libname | $SED -e 's/^lib/cyg/'``echo $release | $SED -e 's/[.]/-/g'`$versuffix$shared_ext' + + sys_lib_search_path_spec="$sys_lib_search_path_spec /usr/lib/w32api" + ;; + mingw* | cegcc*) + # MinGW DLLs use traditional 'lib' prefix + soname_spec='$libname`echo $release | $SED -e 's/[.]/-/g'`$versuffix$shared_ext' + ;; + pw32*) + # pw32 DLLs use 'pw' prefix rather than 'lib' + library_names_spec='`echo $libname | $SED -e 's/^lib/pw/'``echo $release | $SED -e 's/[.]/-/g'`$versuffix$shared_ext' + ;; + esac + dynamic_linker='Win32 ld.exe' + ;; + + *,cl* | *,icl*) + # Native MSVC or ICC + libname_spec='$name' + soname_spec='$libname`echo $release | $SED -e 's/[.]/-/g'`$versuffix$shared_ext' + library_names_spec='$libname.dll.lib' + + case $build_os in + mingw*) + sys_lib_search_path_spec= + lt_save_ifs=$IFS + IFS=';' + for lt_path in $LIB + do + IFS=$lt_save_ifs + # Let DOS variable expansion print the short 8.3 style file name. + lt_path=`cd "$lt_path" 2>/dev/null && cmd //C "for %i in (".") do @echo %~si"` + sys_lib_search_path_spec="$sys_lib_search_path_spec $lt_path" + done + IFS=$lt_save_ifs + # Convert to MSYS style. + sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e 's|\\\\|/|g' -e 's| \\([a-zA-Z]\\):| /\\1|g' -e 's|^ ||'` + ;; + cygwin*) + # Convert to unix form, then to dos form, then back to unix form + # but this time dos style (no spaces!) so that the unix form looks + # like /cygdrive/c/PROGRA~1:/cygdr... + sys_lib_search_path_spec=`cygpath --path --unix "$LIB"` + sys_lib_search_path_spec=`cygpath --path --dos "$sys_lib_search_path_spec" 2>/dev/null` + sys_lib_search_path_spec=`cygpath --path --unix "$sys_lib_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"` + ;; + *) + sys_lib_search_path_spec=$LIB + if $ECHO "$sys_lib_search_path_spec" | $GREP ';[c-zC-Z]:/' >/dev/null; then + # It is most probably a Windows format PATH. + sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e 's/;/ /g'` + else + sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"` + fi + # FIXME: find the short name or the path components, as spaces are + # common. (e.g. "Program Files" -> "PROGRA~1") + ;; + esac + + # DLL is installed to $(libdir)/../bin by postinstall_cmds + postinstall_cmds='base_file=`basename \$file`~ + dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\$base_file'\''i; echo \$dlname'\''`~ + dldir=$destdir/`dirname \$dlpath`~ + test -d \$dldir || mkdir -p \$dldir~ + $install_prog $dir/$dlname \$dldir/$dlname' + postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~ + dlpath=$dir/\$dldll~ + $RM \$dlpath' + shlibpath_overrides_runpath=yes + dynamic_linker='Win32 link.exe' + ;; + + *) + # Assume MSVC and ICC wrapper + library_names_spec='$libname`echo $release | $SED -e 's/[.]/-/g'`$versuffix$shared_ext $libname.lib' + dynamic_linker='Win32 ld.exe' + ;; + esac + # FIXME: first we should search . and the directory the executable is in + shlibpath_var=PATH + ;; + +darwin* | rhapsody*) + dynamic_linker="$host_os dyld" + version_type=darwin + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$major$shared_ext $libname$shared_ext' + soname_spec='$libname$release$major$shared_ext' + shlibpath_overrides_runpath=yes + shlibpath_var=DYLD_LIBRARY_PATH + shrext_cmds='`test .$module = .yes && echo .so || echo .dylib`' + + sys_lib_search_path_spec="$sys_lib_search_path_spec /usr/local/lib" + sys_lib_dlsearch_path_spec='/usr/local/lib /lib /usr/lib' + ;; + +dgux*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + shlibpath_var=LD_LIBRARY_PATH + ;; + +freebsd* | dragonfly* | midnightbsd*) + # DragonFly does not have aout. When/if they implement a new + # versioning mechanism, adjust this. + if test -x /usr/bin/objformat; then + objformat=`/usr/bin/objformat` + else + case $host_os in + freebsd[23].*) objformat=aout ;; + *) objformat=elf ;; + esac + fi + version_type=freebsd-$objformat + case $version_type in + freebsd-elf*) + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + need_version=no + need_lib_prefix=no + ;; + freebsd-*) + library_names_spec='$libname$release$shared_ext$versuffix $libname$shared_ext$versuffix' + need_version=yes + ;; + esac + shlibpath_var=LD_LIBRARY_PATH + case $host_os in + freebsd2.*) + shlibpath_overrides_runpath=yes + ;; + freebsd3.[01]* | freebsdelf3.[01]*) + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + ;; + freebsd3.[2-9]* | freebsdelf3.[2-9]* | \ + freebsd4.[0-5] | freebsdelf4.[0-5] | freebsd4.1.1 | freebsdelf4.1.1) + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + ;; + *) # from 4.6 on, and DragonFly + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + ;; + esac + ;; + +haiku*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + dynamic_linker="$host_os runtime_loader" + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + shlibpath_var=LIBRARY_PATH + shlibpath_overrides_runpath=no + sys_lib_dlsearch_path_spec='/boot/home/config/lib /boot/common/lib /boot/system/lib' + hardcode_into_libs=yes + ;; + +hpux9* | hpux10* | hpux11*) + # Give a soname corresponding to the major version so that dld.sl refuses to + # link against other versions. + version_type=sunos + need_lib_prefix=no + need_version=no + case $host_cpu in + ia64*) + shrext_cmds='.so' + hardcode_into_libs=yes + dynamic_linker="$host_os dld.so" + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes # Unless +noenvvar is specified. + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + if test 32 = "$HPUX_IA64_MODE"; then + sys_lib_search_path_spec="/usr/lib/hpux32 /usr/local/lib/hpux32 /usr/local/lib" + sys_lib_dlsearch_path_spec=/usr/lib/hpux32 + else + sys_lib_search_path_spec="/usr/lib/hpux64 /usr/local/lib/hpux64" + sys_lib_dlsearch_path_spec=/usr/lib/hpux64 + fi + ;; + hppa*64*) + shrext_cmds='.sl' + hardcode_into_libs=yes + dynamic_linker="$host_os dld.sl" + shlibpath_var=LD_LIBRARY_PATH # How should we handle SHLIB_PATH + shlibpath_overrides_runpath=yes # Unless +noenvvar is specified. + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + sys_lib_search_path_spec="/usr/lib/pa20_64 /usr/ccs/lib/pa20_64" + sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec + ;; + *) + shrext_cmds='.sl' + dynamic_linker="$host_os dld.sl" + shlibpath_var=SHLIB_PATH + shlibpath_overrides_runpath=no # +s is required to enable SHLIB_PATH + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + ;; + esac + # HP-UX runs *really* slowly unless shared libraries are mode 555, ... + postinstall_cmds='chmod 555 $lib' + # or fails outright, so override atomically: + install_override_mode=555 + ;; + +interix[3-9]*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + dynamic_linker='Interix 3.x ld.so.1 (PE, like ELF)' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + ;; + +irix5* | irix6* | nonstopux*) + case $host_os in + nonstopux*) version_type=nonstopux ;; + *) + if test yes = "$lt_cv_prog_gnu_ld"; then + version_type=linux # correct to gnu/linux during the next big refactor + else + version_type=irix + fi ;; + esac + need_lib_prefix=no + need_version=no + soname_spec='$libname$release$shared_ext$major' + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$release$shared_ext $libname$shared_ext' + case $host_os in + irix5* | nonstopux*) + libsuff= shlibsuff= + ;; + *) + case $LD in # libtool.m4 will add one of these switches to LD + *-32|*"-32 "|*-melf32bsmip|*"-melf32bsmip ") + libsuff= shlibsuff= libmagic=32-bit;; + *-n32|*"-n32 "|*-melf32bmipn32|*"-melf32bmipn32 ") + libsuff=32 shlibsuff=N32 libmagic=N32;; + *-64|*"-64 "|*-melf64bmip|*"-melf64bmip ") + libsuff=64 shlibsuff=64 libmagic=64-bit;; + *) libsuff= shlibsuff= libmagic=never-match;; + esac + ;; + esac + shlibpath_var=LD_LIBRARY${shlibsuff}_PATH + shlibpath_overrides_runpath=no + sys_lib_search_path_spec="/usr/lib$libsuff /lib$libsuff /usr/local/lib$libsuff" + sys_lib_dlsearch_path_spec="/usr/lib$libsuff /lib$libsuff" + hardcode_into_libs=yes + ;; + +# No shared lib support for Linux oldld, aout, or coff. +linux*oldld* | linux*aout* | linux*coff*) + dynamic_linker=no + ;; + +linux*android*) + version_type=none # Android doesn't support versioned libraries. + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext' + soname_spec='$libname$release$shared_ext' + finish_cmds= + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + + # This implies no fast_install, which is unacceptable. + # Some rework will be needed to allow for fast_install + # before this can be enabled. + hardcode_into_libs=yes + + dynamic_linker='Android linker' + # Don't embed -rpath directories since the linker doesn't support them. + hardcode_libdir_flag_spec='-L$libdir' + ;; + +# This must be glibc/ELF. +linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + finish_cmds='PATH="\$PATH:/sbin" ldconfig -n $libdir' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + + # Some binutils ld are patched to set DT_RUNPATH + if test ${lt_cv_shlibpath_overrides_runpath+y} +then : + printf %s "(cached) " >&6 +else $as_nop + lt_cv_shlibpath_overrides_runpath=no + save_LDFLAGS=$LDFLAGS + save_libdir=$libdir + eval "libdir=/foo; wl=\"$lt_prog_compiler_wl\"; \ + LDFLAGS=\"\$LDFLAGS $hardcode_libdir_flag_spec\"" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + if ($OBJDUMP -p conftest$ac_exeext) 2>/dev/null | grep "RUNPATH.*$libdir" >/dev/null +then : + lt_cv_shlibpath_overrides_runpath=yes +fi +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + LDFLAGS=$save_LDFLAGS + libdir=$save_libdir + +fi + + shlibpath_overrides_runpath=$lt_cv_shlibpath_overrides_runpath + + # This implies no fast_install, which is unacceptable. + # Some rework will be needed to allow for fast_install + # before this can be enabled. + hardcode_into_libs=yes + + # Ideally, we could use ldconfig to report *all* directores which are + # searched for libraries, however this is still not possible. Aside from not + # being certain /sbin/ldconfig is available, command + # 'ldconfig -N -X -v | grep ^/' on 64bit Fedora does not report /usr/lib64, + # even though it is searched at run-time. Try to do the best guess by + # appending ld.so.conf contents (and includes) to the search path. + if test -f /etc/ld.so.conf; then + lt_ld_extra=`awk '/^include / { system(sprintf("cd /etc; cat %s 2>/dev/null", \$2)); skip = 1; } { if (!skip) print \$0; skip = 0; }' < /etc/ld.so.conf | $SED -e 's/#.*//;/^[ ]*hwcap[ ]/d;s/[:, ]/ /g;s/=[^=]*$//;s/=[^= ]* / /g;s/"//g;/^$/d' | tr '\n' ' '` + sys_lib_dlsearch_path_spec="/lib /usr/lib $lt_ld_extra" + fi + + # We used to test for /lib/ld.so.1 and disable shared libraries on + # powerpc, because MkLinux only supported shared libraries with the + # GNU dynamic linker. Since this was broken with cross compilers, + # most powerpc-linux boxes support dynamic linking these days and + # people can always --disable-shared, the test was removed, and we + # assume the GNU/Linux dynamic linker is in use. + dynamic_linker='GNU/Linux ld.so' + ;; + +netbsdelf*-gnu) + version_type=linux + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + dynamic_linker='NetBSD ld.elf_so' + ;; + +netbsd*) + version_type=sunos + need_lib_prefix=no + need_version=no + if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then + library_names_spec='$libname$release$shared_ext$versuffix $libname$shared_ext$versuffix' + finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir' + dynamic_linker='NetBSD (a.out) ld.so' + else + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + dynamic_linker='NetBSD ld.elf_so' + fi + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + ;; + +newsos6) + version_type=linux # correct to gnu/linux during the next big refactor + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + ;; + +*nto* | *qnx*) + version_type=qnx + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + dynamic_linker='ldqnx.so' + ;; + +openbsd* | bitrig*) + version_type=sunos + sys_lib_dlsearch_path_spec=/usr/lib + need_lib_prefix=no + if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`"; then + need_version=no + else + need_version=yes + fi + library_names_spec='$libname$release$shared_ext$versuffix $libname$shared_ext$versuffix' + finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + ;; + +os2*) + libname_spec='$name' + version_type=windows + shrext_cmds=.dll + need_version=no + need_lib_prefix=no + # OS/2 can only load a DLL with a base name of 8 characters or less. + soname_spec='`test -n "$os2dllname" && libname="$os2dllname"; + v=$($ECHO $release$versuffix | tr -d .-); + n=$($ECHO $libname | cut -b -$((8 - ${#v})) | tr . _); + $ECHO $n$v`$shared_ext' + library_names_spec='${libname}_dll.$libext' + dynamic_linker='OS/2 ld.exe' + shlibpath_var=BEGINLIBPATH + sys_lib_search_path_spec="/lib /usr/lib /usr/local/lib" + sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec + postinstall_cmds='base_file=`basename \$file`~ + dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\$base_file'\''i; $ECHO \$dlname'\''`~ + dldir=$destdir/`dirname \$dlpath`~ + test -d \$dldir || mkdir -p \$dldir~ + $install_prog $dir/$dlname \$dldir/$dlname~ + chmod a+x \$dldir/$dlname~ + if test -n '\''$stripme'\'' && test -n '\''$striplib'\''; then + eval '\''$striplib \$dldir/$dlname'\'' || exit \$?; + fi' + postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; $ECHO \$dlname'\''`~ + dlpath=$dir/\$dldll~ + $RM \$dlpath' + ;; + +osf3* | osf4* | osf5*) + version_type=osf + need_lib_prefix=no + need_version=no + soname_spec='$libname$release$shared_ext$major' + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + shlibpath_var=LD_LIBRARY_PATH + sys_lib_search_path_spec="/usr/shlib /usr/ccs/lib /usr/lib/cmplrs/cc /usr/lib /usr/local/lib /var/shlib" + sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec + ;; + +rdos*) + dynamic_linker=no + ;; + +solaris*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + # ldd complains unless libraries are executable + postinstall_cmds='chmod +x $lib' + ;; + +sunos4*) + version_type=sunos + library_names_spec='$libname$release$shared_ext$versuffix $libname$shared_ext$versuffix' + finish_cmds='PATH="\$PATH:/usr/etc" ldconfig $libdir' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + if test yes = "$with_gnu_ld"; then + need_lib_prefix=no + fi + need_version=yes + ;; + +sysv4 | sysv4.3*) + version_type=linux # correct to gnu/linux during the next big refactor + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + shlibpath_var=LD_LIBRARY_PATH + case $host_vendor in + sni) + shlibpath_overrides_runpath=no + need_lib_prefix=no + runpath_var=LD_RUN_PATH + ;; + siemens) + need_lib_prefix=no + ;; + motorola) + need_lib_prefix=no + need_version=no + shlibpath_overrides_runpath=no + sys_lib_search_path_spec='/lib /usr/lib /usr/ccs/lib' + ;; + esac + ;; + +sysv4*MP*) + if test -d /usr/nec; then + version_type=linux # correct to gnu/linux during the next big refactor + library_names_spec='$libname$shared_ext.$versuffix $libname$shared_ext.$major $libname$shared_ext' + soname_spec='$libname$shared_ext.$major' + shlibpath_var=LD_LIBRARY_PATH + fi + ;; + +sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*) + version_type=sco + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + if test yes = "$with_gnu_ld"; then + sys_lib_search_path_spec='/usr/local/lib /usr/gnu/lib /usr/ccs/lib /usr/lib /lib' + else + sys_lib_search_path_spec='/usr/ccs/lib /usr/lib' + case $host_os in + sco3.2v5*) + sys_lib_search_path_spec="$sys_lib_search_path_spec /lib" + ;; + esac + fi + sys_lib_dlsearch_path_spec='/usr/lib' + ;; + +tpf*) + # TPF is a cross-target only. Preferred cross-host = GNU/Linux. + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + ;; + +uts4*) + version_type=linux # correct to gnu/linux during the next big refactor + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + shlibpath_var=LD_LIBRARY_PATH + ;; + +*) + dynamic_linker=no + ;; +esac +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $dynamic_linker" >&5 +printf "%s\n" "$dynamic_linker" >&6; } +test no = "$dynamic_linker" && can_build_shared=no + +variables_saved_for_relink="PATH $shlibpath_var $runpath_var" +if test yes = "$GCC"; then + variables_saved_for_relink="$variables_saved_for_relink GCC_EXEC_PREFIX COMPILER_PATH LIBRARY_PATH" +fi + +if test set = "${lt_cv_sys_lib_search_path_spec+set}"; then + sys_lib_search_path_spec=$lt_cv_sys_lib_search_path_spec +fi + +if test set = "${lt_cv_sys_lib_dlsearch_path_spec+set}"; then + sys_lib_dlsearch_path_spec=$lt_cv_sys_lib_dlsearch_path_spec +fi + +# remember unaugmented sys_lib_dlsearch_path content for libtool script decls... +configure_time_dlsearch_path=$sys_lib_dlsearch_path_spec + +# ... but it needs LT_SYS_LIBRARY_PATH munging for other configure-time code +func_munge_path_list sys_lib_dlsearch_path_spec "$LT_SYS_LIBRARY_PATH" + +# to be used as default LT_SYS_LIBRARY_PATH value in generated libtool +configure_time_lt_sys_library_path=$LT_SYS_LIBRARY_PATH + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking how to hardcode library paths into programs" >&5 +printf %s "checking how to hardcode library paths into programs... " >&6; } +hardcode_action= +if test -n "$hardcode_libdir_flag_spec" || + test -n "$runpath_var" || + test yes = "$hardcode_automatic"; then + + # We can hardcode non-existent directories. + if test no != "$hardcode_direct" && + # If the only mechanism to avoid hardcoding is shlibpath_var, we + # have to relink, otherwise we might link with an installed library + # when we should be linking with a yet-to-be-installed one + ## test no != "$_LT_TAGVAR(hardcode_shlibpath_var, )" && + test no != "$hardcode_minus_L"; then + # Linking always hardcodes the temporary library directory. + hardcode_action=relink + else + # We can link without hardcoding, and we can hardcode nonexisting dirs. + hardcode_action=immediate + fi +else + # We cannot hardcode anything, or else we can only hardcode existing + # directories. + hardcode_action=unsupported +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $hardcode_action" >&5 +printf "%s\n" "$hardcode_action" >&6; } + +if test relink = "$hardcode_action" || + test yes = "$inherit_rpath"; then + # Fast installation is not supported + enable_fast_install=no +elif test yes = "$shlibpath_overrides_runpath" || + test no = "$enable_shared"; then + # Fast installation is not necessary + enable_fast_install=needless +fi + + + + + + + if test yes != "$enable_dlopen"; then + enable_dlopen=unknown + enable_dlopen_self=unknown + enable_dlopen_self_static=unknown +else + lt_cv_dlopen=no + lt_cv_dlopen_libs= + + case $host_os in + beos*) + lt_cv_dlopen=load_add_on + lt_cv_dlopen_libs= + lt_cv_dlopen_self=yes + ;; + + mingw* | pw32* | cegcc*) + lt_cv_dlopen=LoadLibrary + lt_cv_dlopen_libs= + ;; + + cygwin*) + lt_cv_dlopen=dlopen + lt_cv_dlopen_libs= + ;; + + darwin*) + # if libdl is installed we need to link against it + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for dlopen in -ldl" >&5 +printf %s "checking for dlopen in -ldl... " >&6; } +if test ${ac_cv_lib_dl_dlopen+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-ldl $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +char dlopen (); +int +main (void) +{ +return dlopen (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_dl_dlopen=yes +else $as_nop + ac_cv_lib_dl_dlopen=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_dl_dlopen" >&5 +printf "%s\n" "$ac_cv_lib_dl_dlopen" >&6; } +if test "x$ac_cv_lib_dl_dlopen" = xyes +then : + lt_cv_dlopen=dlopen lt_cv_dlopen_libs=-ldl +else $as_nop + + lt_cv_dlopen=dyld + lt_cv_dlopen_libs= + lt_cv_dlopen_self=yes + +fi + + ;; + + tpf*) + # Don't try to run any link tests for TPF. We know it's impossible + # because TPF is a cross-compiler, and we know how we open DSOs. + lt_cv_dlopen=dlopen + lt_cv_dlopen_libs= + lt_cv_dlopen_self=no + ;; + + *) + ac_fn_c_check_func "$LINENO" "shl_load" "ac_cv_func_shl_load" +if test "x$ac_cv_func_shl_load" = xyes +then : + lt_cv_dlopen=shl_load +else $as_nop + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for shl_load in -ldld" >&5 +printf %s "checking for shl_load in -ldld... " >&6; } +if test ${ac_cv_lib_dld_shl_load+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-ldld $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +char shl_load (); +int +main (void) +{ +return shl_load (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_dld_shl_load=yes +else $as_nop + ac_cv_lib_dld_shl_load=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_dld_shl_load" >&5 +printf "%s\n" "$ac_cv_lib_dld_shl_load" >&6; } +if test "x$ac_cv_lib_dld_shl_load" = xyes +then : + lt_cv_dlopen=shl_load lt_cv_dlopen_libs=-ldld +else $as_nop + ac_fn_c_check_func "$LINENO" "dlopen" "ac_cv_func_dlopen" +if test "x$ac_cv_func_dlopen" = xyes +then : + lt_cv_dlopen=dlopen +else $as_nop + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for dlopen in -ldl" >&5 +printf %s "checking for dlopen in -ldl... " >&6; } +if test ${ac_cv_lib_dl_dlopen+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-ldl $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +char dlopen (); +int +main (void) +{ +return dlopen (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_dl_dlopen=yes +else $as_nop + ac_cv_lib_dl_dlopen=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_dl_dlopen" >&5 +printf "%s\n" "$ac_cv_lib_dl_dlopen" >&6; } +if test "x$ac_cv_lib_dl_dlopen" = xyes +then : + lt_cv_dlopen=dlopen lt_cv_dlopen_libs=-ldl +else $as_nop + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for dlopen in -lsvld" >&5 +printf %s "checking for dlopen in -lsvld... " >&6; } +if test ${ac_cv_lib_svld_dlopen+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-lsvld $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +char dlopen (); +int +main (void) +{ +return dlopen (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_svld_dlopen=yes +else $as_nop + ac_cv_lib_svld_dlopen=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_svld_dlopen" >&5 +printf "%s\n" "$ac_cv_lib_svld_dlopen" >&6; } +if test "x$ac_cv_lib_svld_dlopen" = xyes +then : + lt_cv_dlopen=dlopen lt_cv_dlopen_libs=-lsvld +else $as_nop + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for dld_link in -ldld" >&5 +printf %s "checking for dld_link in -ldld... " >&6; } +if test ${ac_cv_lib_dld_dld_link+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-ldld $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +char dld_link (); +int +main (void) +{ +return dld_link (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_dld_dld_link=yes +else $as_nop + ac_cv_lib_dld_dld_link=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_dld_dld_link" >&5 +printf "%s\n" "$ac_cv_lib_dld_dld_link" >&6; } +if test "x$ac_cv_lib_dld_dld_link" = xyes +then : + lt_cv_dlopen=dld_link lt_cv_dlopen_libs=-ldld +fi + + +fi + + +fi + + +fi + + +fi + + +fi + + ;; + esac + + if test no = "$lt_cv_dlopen"; then + enable_dlopen=no + else + enable_dlopen=yes + fi + + case $lt_cv_dlopen in + dlopen) + save_CPPFLAGS=$CPPFLAGS + test yes = "$ac_cv_header_dlfcn_h" && CPPFLAGS="$CPPFLAGS -DHAVE_DLFCN_H" + + save_LDFLAGS=$LDFLAGS + wl=$lt_prog_compiler_wl eval LDFLAGS=\"\$LDFLAGS $export_dynamic_flag_spec\" + + save_LIBS=$LIBS + LIBS="$lt_cv_dlopen_libs $LIBS" + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether a program can dlopen itself" >&5 +printf %s "checking whether a program can dlopen itself... " >&6; } +if test ${lt_cv_dlopen_self+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test yes = "$cross_compiling"; then : + lt_cv_dlopen_self=cross +else + lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 + lt_status=$lt_dlunknown + cat > conftest.$ac_ext <<_LT_EOF +#line $LINENO "configure" +#include "confdefs.h" + +#if HAVE_DLFCN_H +#include +#endif + +#include + +#ifdef RTLD_GLOBAL +# define LT_DLGLOBAL RTLD_GLOBAL +#else +# ifdef DL_GLOBAL +# define LT_DLGLOBAL DL_GLOBAL +# else +# define LT_DLGLOBAL 0 +# endif +#endif + +/* We may have to define LT_DLLAZY_OR_NOW in the command line if we + find out it does not work in some platform. */ +#ifndef LT_DLLAZY_OR_NOW +# ifdef RTLD_LAZY +# define LT_DLLAZY_OR_NOW RTLD_LAZY +# else +# ifdef DL_LAZY +# define LT_DLLAZY_OR_NOW DL_LAZY +# else +# ifdef RTLD_NOW +# define LT_DLLAZY_OR_NOW RTLD_NOW +# else +# ifdef DL_NOW +# define LT_DLLAZY_OR_NOW DL_NOW +# else +# define LT_DLLAZY_OR_NOW 0 +# endif +# endif +# endif +# endif +#endif + +/* When -fvisibility=hidden is used, assume the code has been annotated + correspondingly for the symbols needed. */ +#if defined __GNUC__ && (((__GNUC__ == 3) && (__GNUC_MINOR__ >= 3)) || (__GNUC__ > 3)) +int fnord () __attribute__((visibility("default"))); +#endif + +int fnord () { return 42; } +int main () +{ + void *self = dlopen (0, LT_DLGLOBAL|LT_DLLAZY_OR_NOW); + int status = $lt_dlunknown; + + if (self) + { + if (dlsym (self,"fnord")) status = $lt_dlno_uscore; + else + { + if (dlsym( self,"_fnord")) status = $lt_dlneed_uscore; + else puts (dlerror ()); + } + /* dlclose (self); */ + } + else + puts (dlerror ()); + + return status; +} +_LT_EOF + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_link\""; } >&5 + (eval $ac_link) 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && test -s "conftest$ac_exeext" 2>/dev/null; then + (./conftest; exit; ) >&5 2>/dev/null + lt_status=$? + case x$lt_status in + x$lt_dlno_uscore) lt_cv_dlopen_self=yes ;; + x$lt_dlneed_uscore) lt_cv_dlopen_self=yes ;; + x$lt_dlunknown|x*) lt_cv_dlopen_self=no ;; + esac + else : + # compilation failed + lt_cv_dlopen_self=no + fi +fi +rm -fr conftest* + + +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_dlopen_self" >&5 +printf "%s\n" "$lt_cv_dlopen_self" >&6; } + + if test yes = "$lt_cv_dlopen_self"; then + wl=$lt_prog_compiler_wl eval LDFLAGS=\"\$LDFLAGS $lt_prog_compiler_static\" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether a statically linked program can dlopen itself" >&5 +printf %s "checking whether a statically linked program can dlopen itself... " >&6; } +if test ${lt_cv_dlopen_self_static+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test yes = "$cross_compiling"; then : + lt_cv_dlopen_self_static=cross +else + lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 + lt_status=$lt_dlunknown + cat > conftest.$ac_ext <<_LT_EOF +#line $LINENO "configure" +#include "confdefs.h" + +#if HAVE_DLFCN_H +#include +#endif + +#include + +#ifdef RTLD_GLOBAL +# define LT_DLGLOBAL RTLD_GLOBAL +#else +# ifdef DL_GLOBAL +# define LT_DLGLOBAL DL_GLOBAL +# else +# define LT_DLGLOBAL 0 +# endif +#endif + +/* We may have to define LT_DLLAZY_OR_NOW in the command line if we + find out it does not work in some platform. */ +#ifndef LT_DLLAZY_OR_NOW +# ifdef RTLD_LAZY +# define LT_DLLAZY_OR_NOW RTLD_LAZY +# else +# ifdef DL_LAZY +# define LT_DLLAZY_OR_NOW DL_LAZY +# else +# ifdef RTLD_NOW +# define LT_DLLAZY_OR_NOW RTLD_NOW +# else +# ifdef DL_NOW +# define LT_DLLAZY_OR_NOW DL_NOW +# else +# define LT_DLLAZY_OR_NOW 0 +# endif +# endif +# endif +# endif +#endif + +/* When -fvisibility=hidden is used, assume the code has been annotated + correspondingly for the symbols needed. */ +#if defined __GNUC__ && (((__GNUC__ == 3) && (__GNUC_MINOR__ >= 3)) || (__GNUC__ > 3)) +int fnord () __attribute__((visibility("default"))); +#endif + +int fnord () { return 42; } +int main () +{ + void *self = dlopen (0, LT_DLGLOBAL|LT_DLLAZY_OR_NOW); + int status = $lt_dlunknown; + + if (self) + { + if (dlsym (self,"fnord")) status = $lt_dlno_uscore; + else + { + if (dlsym( self,"_fnord")) status = $lt_dlneed_uscore; + else puts (dlerror ()); + } + /* dlclose (self); */ + } + else + puts (dlerror ()); + + return status; +} +_LT_EOF + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_link\""; } >&5 + (eval $ac_link) 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && test -s "conftest$ac_exeext" 2>/dev/null; then + (./conftest; exit; ) >&5 2>/dev/null + lt_status=$? + case x$lt_status in + x$lt_dlno_uscore) lt_cv_dlopen_self_static=yes ;; + x$lt_dlneed_uscore) lt_cv_dlopen_self_static=yes ;; + x$lt_dlunknown|x*) lt_cv_dlopen_self_static=no ;; + esac + else : + # compilation failed + lt_cv_dlopen_self_static=no + fi +fi +rm -fr conftest* + + +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_dlopen_self_static" >&5 +printf "%s\n" "$lt_cv_dlopen_self_static" >&6; } + fi + + CPPFLAGS=$save_CPPFLAGS + LDFLAGS=$save_LDFLAGS + LIBS=$save_LIBS + ;; + esac + + case $lt_cv_dlopen_self in + yes|no) enable_dlopen_self=$lt_cv_dlopen_self ;; + *) enable_dlopen_self=unknown ;; + esac + + case $lt_cv_dlopen_self_static in + yes|no) enable_dlopen_self_static=$lt_cv_dlopen_self_static ;; + *) enable_dlopen_self_static=unknown ;; + esac +fi + + + + + + + + + + + + + + + + + +striplib= +old_striplib= +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether stripping libraries is possible" >&5 +printf %s "checking whether stripping libraries is possible... " >&6; } +if test -z "$STRIP"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +else + if $STRIP -V 2>&1 | $GREP "GNU strip" >/dev/null; then + old_striplib="$STRIP --strip-debug" + striplib="$STRIP --strip-unneeded" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + else + case $host_os in + darwin*) + # FIXME - insert some real tests, host_os isn't really good enough + striplib="$STRIP -x" + old_striplib="$STRIP -S" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + ;; + freebsd*) + if $STRIP -V 2>&1 | $GREP "elftoolchain" >/dev/null; then + old_striplib="$STRIP --strip-debug" + striplib="$STRIP --strip-unneeded" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + fi + ;; + *) + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + ;; + esac + fi +fi + + + + + + + + + + + + + # Report what library types will actually be built + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if libtool supports shared libraries" >&5 +printf %s "checking if libtool supports shared libraries... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $can_build_shared" >&5 +printf "%s\n" "$can_build_shared" >&6; } + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether to build shared libraries" >&5 +printf %s "checking whether to build shared libraries... " >&6; } + test no = "$can_build_shared" && enable_shared=no + + # On AIX, shared libraries and static libraries use the same namespace, and + # are all built from PIC. + case $host_os in + aix3*) + test yes = "$enable_shared" && enable_static=no + if test -n "$RANLIB"; then + archive_cmds="$archive_cmds~\$RANLIB \$lib" + postinstall_cmds='$RANLIB $lib' + fi + ;; + + aix[4-9]*) + if test ia64 != "$host_cpu"; then + case $enable_shared,$with_aix_soname,$aix_use_runtimelinking in + yes,aix,yes) ;; # shared object as lib.so file only + yes,svr4,*) ;; # shared object as lib.so archive member only + yes,*) enable_static=no ;; # shared object in lib.a archive as well + esac + fi + ;; + esac + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_shared" >&5 +printf "%s\n" "$enable_shared" >&6; } + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether to build static libraries" >&5 +printf %s "checking whether to build static libraries... " >&6; } + # Make sure either enable_shared or enable_static is yes. + test yes = "$enable_shared" || enable_static=yes + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_static" >&5 +printf "%s\n" "$enable_static" >&6; } + + + + +fi +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +CC=$lt_save_CC + + if test -n "$CXX" && ( test no != "$CXX" && + ( (test g++ = "$CXX" && `g++ -v >/dev/null 2>&1` ) || + (test g++ != "$CXX"))); then + ac_ext=cpp +ac_cpp='$CXXCPP $CPPFLAGS' +ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_cxx_compiler_gnu +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking how to run the C++ preprocessor" >&5 +printf %s "checking how to run the C++ preprocessor... " >&6; } +if test -z "$CXXCPP"; then + if test ${ac_cv_prog_CXXCPP+y} +then : + printf %s "(cached) " >&6 +else $as_nop + # Double quotes because $CXX needs to be expanded + for CXXCPP in "$CXX -E" cpp /lib/cpp + do + ac_preproc_ok=false +for ac_cxx_preproc_warn_flag in '' yes +do + # Use a header file that comes with gcc, so configuring glibc + # with a fresh cross-compiler works. + # On the NeXT, cc -E runs the code through the compiler's parser, + # not just through cpp. "Syntax error" is here to catch this case. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + Syntax error +_ACEOF +if ac_fn_cxx_try_cpp "$LINENO" +then : + +else $as_nop + # Broken: fails on valid input. +continue +fi +rm -f conftest.err conftest.i conftest.$ac_ext + + # OK, works on sane cases. Now check whether nonexistent headers + # can be detected and how. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +_ACEOF +if ac_fn_cxx_try_cpp "$LINENO" +then : + # Broken: success on invalid input. +continue +else $as_nop + # Passes both tests. +ac_preproc_ok=: +break +fi +rm -f conftest.err conftest.i conftest.$ac_ext + +done +# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. +rm -f conftest.i conftest.err conftest.$ac_ext +if $ac_preproc_ok +then : + break +fi + + done + ac_cv_prog_CXXCPP=$CXXCPP + +fi + CXXCPP=$ac_cv_prog_CXXCPP +else + ac_cv_prog_CXXCPP=$CXXCPP +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $CXXCPP" >&5 +printf "%s\n" "$CXXCPP" >&6; } +ac_preproc_ok=false +for ac_cxx_preproc_warn_flag in '' yes +do + # Use a header file that comes with gcc, so configuring glibc + # with a fresh cross-compiler works. + # On the NeXT, cc -E runs the code through the compiler's parser, + # not just through cpp. "Syntax error" is here to catch this case. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + Syntax error +_ACEOF +if ac_fn_cxx_try_cpp "$LINENO" +then : + +else $as_nop + # Broken: fails on valid input. +continue +fi +rm -f conftest.err conftest.i conftest.$ac_ext + + # OK, works on sane cases. Now check whether nonexistent headers + # can be detected and how. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +_ACEOF +if ac_fn_cxx_try_cpp "$LINENO" +then : + # Broken: success on invalid input. +continue +else $as_nop + # Passes both tests. +ac_preproc_ok=: +break +fi +rm -f conftest.err conftest.i conftest.$ac_ext + +done +# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. +rm -f conftest.i conftest.err conftest.$ac_ext +if $ac_preproc_ok +then : + +else $as_nop + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "C++ preprocessor \"$CXXCPP\" fails sanity check +See \`config.log' for more details" "$LINENO" 5; } +fi + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +else + _lt_caught_CXX_error=yes +fi + +ac_ext=cpp +ac_cpp='$CXXCPP $CPPFLAGS' +ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_cxx_compiler_gnu + +archive_cmds_need_lc_CXX=no +allow_undefined_flag_CXX= +always_export_symbols_CXX=no +archive_expsym_cmds_CXX= +compiler_needs_object_CXX=no +export_dynamic_flag_spec_CXX= +hardcode_direct_CXX=no +hardcode_direct_absolute_CXX=no +hardcode_libdir_flag_spec_CXX= +hardcode_libdir_separator_CXX= +hardcode_minus_L_CXX=no +hardcode_shlibpath_var_CXX=unsupported +hardcode_automatic_CXX=no +inherit_rpath_CXX=no +module_cmds_CXX= +module_expsym_cmds_CXX= +link_all_deplibs_CXX=unknown +old_archive_cmds_CXX=$old_archive_cmds +reload_flag_CXX=$reload_flag +reload_cmds_CXX=$reload_cmds +no_undefined_flag_CXX= +whole_archive_flag_spec_CXX= +enable_shared_with_static_runtimes_CXX=no + +# Source file extension for C++ test sources. +ac_ext=cpp + +# Object file extension for compiled C++ test sources. +objext=o +objext_CXX=$objext + +# No sense in running all these tests if we already determined that +# the CXX compiler isn't working. Some variables (like enable_shared) +# are currently assumed to apply to all compilers on this platform, +# and will be corrupted by setting them based on a non-working compiler. +if test yes != "$_lt_caught_CXX_error"; then + # Code to be used in simple compile tests + lt_simple_compile_test_code="int some_variable = 0;" + + # Code to be used in simple link tests + lt_simple_link_test_code='int main(int, char *[]) { return(0); }' + + # ltmain only uses $CC for tagged configurations so make sure $CC is set. + + + + + + +# If no C compiler was specified, use CC. +LTCC=${LTCC-"$CC"} + +# If no C compiler flags were specified, use CFLAGS. +LTCFLAGS=${LTCFLAGS-"$CFLAGS"} + +# Allow CC to be a program name with arguments. +compiler=$CC + + + # save warnings/boilerplate of simple test code + ac_outfile=conftest.$ac_objext +echo "$lt_simple_compile_test_code" >conftest.$ac_ext +eval "$ac_compile" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err +_lt_compiler_boilerplate=`cat conftest.err` +$RM conftest* + + ac_outfile=conftest.$ac_objext +echo "$lt_simple_link_test_code" >conftest.$ac_ext +eval "$ac_link" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err +_lt_linker_boilerplate=`cat conftest.err` +$RM -r conftest* + + + # Allow CC to be a program name with arguments. + lt_save_CC=$CC + lt_save_CFLAGS=$CFLAGS + lt_save_LD=$LD + lt_save_GCC=$GCC + GCC=$GXX + lt_save_with_gnu_ld=$with_gnu_ld + lt_save_path_LD=$lt_cv_path_LD + if test -n "${lt_cv_prog_gnu_ldcxx+set}"; then + lt_cv_prog_gnu_ld=$lt_cv_prog_gnu_ldcxx + else + $as_unset lt_cv_prog_gnu_ld + fi + if test -n "${lt_cv_path_LDCXX+set}"; then + lt_cv_path_LD=$lt_cv_path_LDCXX + else + $as_unset lt_cv_path_LD + fi + test -z "${LDCXX+set}" || LD=$LDCXX + CC=${CXX-"c++"} + CFLAGS=$CXXFLAGS + compiler=$CC + compiler_CXX=$CC + func_cc_basename $compiler +cc_basename=$func_cc_basename_result + + + if test -n "$compiler"; then + # We don't want -fno-exception when compiling C++ code, so set the + # no_builtin_flag separately + if test yes = "$GXX"; then + lt_prog_compiler_no_builtin_flag_CXX=' -fno-builtin' + else + lt_prog_compiler_no_builtin_flag_CXX= + fi + + if test yes = "$GXX"; then + # Set up default GNU C++ configuration + + + +# Check whether --with-gnu-ld was given. +if test ${with_gnu_ld+y} +then : + withval=$with_gnu_ld; test no = "$withval" || with_gnu_ld=yes +else $as_nop + with_gnu_ld=no +fi + +ac_prog=ld +if test yes = "$GCC"; then + # Check if gcc -print-prog-name=ld gives a path. + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for ld used by $CC" >&5 +printf %s "checking for ld used by $CC... " >&6; } + case $host in + *-*-mingw*) + # gcc leaves a trailing carriage return, which upsets mingw + ac_prog=`($CC -print-prog-name=ld) 2>&5 | tr -d '\015'` ;; + *) + ac_prog=`($CC -print-prog-name=ld) 2>&5` ;; + esac + case $ac_prog in + # Accept absolute paths. + [\\/]* | ?:[\\/]*) + re_direlt='/[^/][^/]*/\.\./' + # Canonicalize the pathname of ld + ac_prog=`$ECHO "$ac_prog"| $SED 's%\\\\%/%g'` + while $ECHO "$ac_prog" | $GREP "$re_direlt" > /dev/null 2>&1; do + ac_prog=`$ECHO $ac_prog| $SED "s%$re_direlt%/%"` + done + test -z "$LD" && LD=$ac_prog + ;; + "") + # If it fails, then pretend we aren't using GCC. + ac_prog=ld + ;; + *) + # If it is relative, then search for the first ld in PATH. + with_gnu_ld=unknown + ;; + esac +elif test yes = "$with_gnu_ld"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for GNU ld" >&5 +printf %s "checking for GNU ld... " >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for non-GNU ld" >&5 +printf %s "checking for non-GNU ld... " >&6; } +fi +if test ${lt_cv_path_LD+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -z "$LD"; then + lt_save_ifs=$IFS; IFS=$PATH_SEPARATOR + for ac_dir in $PATH; do + IFS=$lt_save_ifs + test -z "$ac_dir" && ac_dir=. + if test -f "$ac_dir/$ac_prog" || test -f "$ac_dir/$ac_prog$ac_exeext"; then + lt_cv_path_LD=$ac_dir/$ac_prog + # Check to see if the program is GNU ld. I'd rather use --version, + # but apparently some variants of GNU ld only accept -v. + # Break only if it was the GNU/non-GNU ld that we prefer. + case `"$lt_cv_path_LD" -v 2>&1 &5 +printf "%s\n" "$LD" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi +test -z "$LD" && as_fn_error $? "no acceptable ld found in \$PATH" "$LINENO" 5 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if the linker ($LD) is GNU ld" >&5 +printf %s "checking if the linker ($LD) is GNU ld... " >&6; } +if test ${lt_cv_prog_gnu_ld+y} +then : + printf %s "(cached) " >&6 +else $as_nop + # I'd rather use --version here, but apparently some GNU lds only accept -v. +case `$LD -v 2>&1 &5 +printf "%s\n" "$lt_cv_prog_gnu_ld" >&6; } +with_gnu_ld=$lt_cv_prog_gnu_ld + + + + + + + + # Check if GNU C++ uses GNU ld as the underlying linker, since the + # archiving commands below assume that GNU ld is being used. + if test yes = "$with_gnu_ld"; then + archive_cmds_CXX='$CC $pic_flag -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname -o $lib' + archive_expsym_cmds_CXX='$CC $pic_flag -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' + + hardcode_libdir_flag_spec_CXX='$wl-rpath $wl$libdir' + export_dynamic_flag_spec_CXX='$wl--export-dynamic' + + # If archive_cmds runs LD, not CC, wlarc should be empty + # XXX I think wlarc can be eliminated in ltcf-cxx, but I need to + # investigate it a little bit more. (MM) + wlarc='$wl' + + # ancient GNU ld didn't support --whole-archive et. al. + if eval "`$CC -print-prog-name=ld` --help 2>&1" | + $GREP 'no-whole-archive' > /dev/null; then + whole_archive_flag_spec_CXX=$wlarc'--whole-archive$convenience '$wlarc'--no-whole-archive' + else + whole_archive_flag_spec_CXX= + fi + else + with_gnu_ld=no + wlarc= + + # A generic and very simple default shared library creation + # command for GNU C++ for the case where it uses the native + # linker, instead of GNU ld. If possible, this setting should + # overridden to take advantage of the native linker features on + # the platform it is being used on. + archive_cmds_CXX='$CC -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $lib' + fi + + # Commands to make compiler produce verbose output that lists + # what "hidden" libraries, object files and flags are used when + # linking a shared library. + output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP " \-L"' + + else + GXX=no + with_gnu_ld=no + wlarc= + fi + + # PORTME: fill in a description of your system's C++ link characteristics + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the $compiler linker ($LD) supports shared libraries" >&5 +printf %s "checking whether the $compiler linker ($LD) supports shared libraries... " >&6; } + ld_shlibs_CXX=yes + case $host_os in + aix3*) + # FIXME: insert proper C++ library support + ld_shlibs_CXX=no + ;; + aix[4-9]*) + if test ia64 = "$host_cpu"; then + # On IA64, the linker does run time linking by default, so we don't + # have to do anything special. + aix_use_runtimelinking=no + exp_sym_flag='-Bexport' + no_entry_flag= + else + aix_use_runtimelinking=no + + # Test if we are trying to use run time linking or normal + # AIX style linking. If -brtl is somewhere in LDFLAGS, we + # have runtime linking enabled, and use it for executables. + # For shared libraries, we enable/disable runtime linking + # depending on the kind of the shared library created - + # when "with_aix_soname,aix_use_runtimelinking" is: + # "aix,no" lib.a(lib.so.V) shared, rtl:no, for executables + # "aix,yes" lib.so shared, rtl:yes, for executables + # lib.a static archive + # "both,no" lib.so.V(shr.o) shared, rtl:yes + # lib.a(lib.so.V) shared, rtl:no, for executables + # "both,yes" lib.so.V(shr.o) shared, rtl:yes, for executables + # lib.a(lib.so.V) shared, rtl:no + # "svr4,*" lib.so.V(shr.o) shared, rtl:yes, for executables + # lib.a static archive + case $host_os in aix4.[23]|aix4.[23].*|aix[5-9]*) + for ld_flag in $LDFLAGS; do + case $ld_flag in + *-brtl*) + aix_use_runtimelinking=yes + break + ;; + esac + done + if test svr4,no = "$with_aix_soname,$aix_use_runtimelinking"; then + # With aix-soname=svr4, we create the lib.so.V shared archives only, + # so we don't have lib.a shared libs to link our executables. + # We have to force runtime linking in this case. + aix_use_runtimelinking=yes + LDFLAGS="$LDFLAGS -Wl,-brtl" + fi + ;; + esac + + exp_sym_flag='-bexport' + no_entry_flag='-bnoentry' + fi + + # When large executables or shared objects are built, AIX ld can + # have problems creating the table of contents. If linking a library + # or program results in "error TOC overflow" add -mminimal-toc to + # CXXFLAGS/CFLAGS for g++/gcc. In the cases where that is not + # enough to fix the problem, add -Wl,-bbigtoc to LDFLAGS. + + archive_cmds_CXX='' + hardcode_direct_CXX=yes + hardcode_direct_absolute_CXX=yes + hardcode_libdir_separator_CXX=':' + link_all_deplibs_CXX=yes + file_list_spec_CXX='$wl-f,' + case $with_aix_soname,$aix_use_runtimelinking in + aix,*) ;; # no import file + svr4,* | *,yes) # use import file + # The Import File defines what to hardcode. + hardcode_direct_CXX=no + hardcode_direct_absolute_CXX=no + ;; + esac + + if test yes = "$GXX"; then + case $host_os in aix4.[012]|aix4.[012].*) + # We only want to do this on AIX 4.2 and lower, the check + # below for broken collect2 doesn't work under 4.3+ + collect2name=`$CC -print-prog-name=collect2` + if test -f "$collect2name" && + strings "$collect2name" | $GREP resolve_lib_name >/dev/null + then + # We have reworked collect2 + : + else + # We have old collect2 + hardcode_direct_CXX=unsupported + # It fails to find uninstalled libraries when the uninstalled + # path is not listed in the libpath. Setting hardcode_minus_L + # to unsupported forces relinking + hardcode_minus_L_CXX=yes + hardcode_libdir_flag_spec_CXX='-L$libdir' + hardcode_libdir_separator_CXX= + fi + esac + shared_flag='-shared' + if test yes = "$aix_use_runtimelinking"; then + shared_flag=$shared_flag' $wl-G' + fi + # Need to ensure runtime linking is disabled for the traditional + # shared library, or the linker may eventually find shared libraries + # /with/ Import File - we do not want to mix them. + shared_flag_aix='-shared' + shared_flag_svr4='-shared $wl-G' + else + # not using gcc + if test ia64 = "$host_cpu"; then + # VisualAge C++, Version 5.5 for AIX 5L for IA-64, Beta 3 Release + # chokes on -Wl,-G. The following line is correct: + shared_flag='-G' + else + if test yes = "$aix_use_runtimelinking"; then + shared_flag='$wl-G' + else + shared_flag='$wl-bM:SRE' + fi + shared_flag_aix='$wl-bM:SRE' + shared_flag_svr4='$wl-G' + fi + fi + + export_dynamic_flag_spec_CXX='$wl-bexpall' + # It seems that -bexpall does not export symbols beginning with + # underscore (_), so it is better to generate a list of symbols to + # export. + always_export_symbols_CXX=yes + if test aix,yes = "$with_aix_soname,$aix_use_runtimelinking"; then + # Warning - without using the other runtime loading flags (-brtl), + # -berok will link without error, but may produce a broken library. + # The "-G" linker flag allows undefined symbols. + no_undefined_flag_CXX='-bernotok' + # Determine the default libpath from the value encoded in an empty + # executable. + if test set = "${lt_cv_aix_libpath+set}"; then + aix_libpath=$lt_cv_aix_libpath +else + if test ${lt_cv_aix_libpath__CXX+y} +then : + printf %s "(cached) " >&6 +else $as_nop + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_link "$LINENO" +then : + + lt_aix_libpath_sed=' + /Import File Strings/,/^$/ { + /^0/ { + s/^0 *\([^ ]*\) *$/\1/ + p + } + }' + lt_cv_aix_libpath__CXX=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` + # Check for a 64-bit object if we didn't find anything. + if test -z "$lt_cv_aix_libpath__CXX"; then + lt_cv_aix_libpath__CXX=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` + fi +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + if test -z "$lt_cv_aix_libpath__CXX"; then + lt_cv_aix_libpath__CXX=/usr/lib:/lib + fi + +fi + + aix_libpath=$lt_cv_aix_libpath__CXX +fi + + hardcode_libdir_flag_spec_CXX='$wl-blibpath:$libdir:'"$aix_libpath" + + archive_expsym_cmds_CXX='$CC -o $output_objdir/$soname $libobjs $deplibs $wl'$no_entry_flag' $compiler_flags `if test -n "$allow_undefined_flag"; then func_echo_all "$wl$allow_undefined_flag"; else :; fi` $wl'$exp_sym_flag:\$export_symbols' '$shared_flag + else + if test ia64 = "$host_cpu"; then + hardcode_libdir_flag_spec_CXX='$wl-R $libdir:/usr/lib:/lib' + allow_undefined_flag_CXX="-z nodefs" + archive_expsym_cmds_CXX="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs '"\$wl$no_entry_flag"' $compiler_flags $wl$allow_undefined_flag '"\$wl$exp_sym_flag:\$export_symbols" + else + # Determine the default libpath from the value encoded in an + # empty executable. + if test set = "${lt_cv_aix_libpath+set}"; then + aix_libpath=$lt_cv_aix_libpath +else + if test ${lt_cv_aix_libpath__CXX+y} +then : + printf %s "(cached) " >&6 +else $as_nop + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_link "$LINENO" +then : + + lt_aix_libpath_sed=' + /Import File Strings/,/^$/ { + /^0/ { + s/^0 *\([^ ]*\) *$/\1/ + p + } + }' + lt_cv_aix_libpath__CXX=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` + # Check for a 64-bit object if we didn't find anything. + if test -z "$lt_cv_aix_libpath__CXX"; then + lt_cv_aix_libpath__CXX=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` + fi +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + if test -z "$lt_cv_aix_libpath__CXX"; then + lt_cv_aix_libpath__CXX=/usr/lib:/lib + fi + +fi + + aix_libpath=$lt_cv_aix_libpath__CXX +fi + + hardcode_libdir_flag_spec_CXX='$wl-blibpath:$libdir:'"$aix_libpath" + # Warning - without using the other run time loading flags, + # -berok will link without error, but may produce a broken library. + no_undefined_flag_CXX=' $wl-bernotok' + allow_undefined_flag_CXX=' $wl-berok' + if test yes = "$with_gnu_ld"; then + # We only use this code for GNU lds that support --whole-archive. + whole_archive_flag_spec_CXX='$wl--whole-archive$convenience $wl--no-whole-archive' + else + # Exported symbols can be pulled into shared objects from archives + whole_archive_flag_spec_CXX='$convenience' + fi + archive_cmds_need_lc_CXX=yes + archive_expsym_cmds_CXX='$RM -r $output_objdir/$realname.d~$MKDIR $output_objdir/$realname.d' + # -brtl affects multiple linker settings, -berok does not and is overridden later + compiler_flags_filtered='`func_echo_all "$compiler_flags " | $SED -e "s%-brtl\\([, ]\\)%-berok\\1%g"`' + if test svr4 != "$with_aix_soname"; then + # This is similar to how AIX traditionally builds its shared + # libraries. Need -bnortl late, we may have -brtl in LDFLAGS. + archive_expsym_cmds_CXX="$archive_expsym_cmds_CXX"'~$CC '$shared_flag_aix' -o $output_objdir/$realname.d/$soname $libobjs $deplibs $wl-bnoentry '$compiler_flags_filtered'$wl-bE:$export_symbols$allow_undefined_flag~$AR $AR_FLAGS $output_objdir/$libname$release.a $output_objdir/$realname.d/$soname' + fi + if test aix != "$with_aix_soname"; then + archive_expsym_cmds_CXX="$archive_expsym_cmds_CXX"'~$CC '$shared_flag_svr4' -o $output_objdir/$realname.d/$shared_archive_member_spec.o $libobjs $deplibs $wl-bnoentry '$compiler_flags_filtered'$wl-bE:$export_symbols$allow_undefined_flag~$STRIP -e $output_objdir/$realname.d/$shared_archive_member_spec.o~( func_echo_all "#! $soname($shared_archive_member_spec.o)"; if test shr_64 = "$shared_archive_member_spec"; then func_echo_all "# 64"; else func_echo_all "# 32"; fi; cat $export_symbols ) > $output_objdir/$realname.d/$shared_archive_member_spec.imp~$AR $AR_FLAGS $output_objdir/$soname $output_objdir/$realname.d/$shared_archive_member_spec.o $output_objdir/$realname.d/$shared_archive_member_spec.imp' + else + # used by -dlpreopen to get the symbols + archive_expsym_cmds_CXX="$archive_expsym_cmds_CXX"'~$MV $output_objdir/$realname.d/$soname $output_objdir' + fi + archive_expsym_cmds_CXX="$archive_expsym_cmds_CXX"'~$RM -r $output_objdir/$realname.d' + fi + fi + ;; + + beos*) + if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then + allow_undefined_flag_CXX=unsupported + # Joseph Beckenbach says some releases of gcc + # support --undefined. This deserves some investigation. FIXME + archive_cmds_CXX='$CC -nostart $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + else + ld_shlibs_CXX=no + fi + ;; + + chorus*) + case $cc_basename in + *) + # FIXME: insert proper C++ library support + ld_shlibs_CXX=no + ;; + esac + ;; + + cygwin* | mingw* | pw32* | cegcc*) + case $GXX,$cc_basename in + ,cl* | no,cl* | ,icl* | no,icl*) + # Native MSVC or ICC + # hardcode_libdir_flag_spec is actually meaningless, as there is + # no search path for DLLs. + hardcode_libdir_flag_spec_CXX=' ' + allow_undefined_flag_CXX=unsupported + always_export_symbols_CXX=yes + file_list_spec_CXX='@' + # Tell ltmain to make .lib files, not .a files. + libext=lib + # Tell ltmain to make .dll files, not .so files. + shrext_cmds=.dll + # FIXME: Setting linknames here is a bad hack. + archive_cmds_CXX='$CC -o $output_objdir/$soname $libobjs $compiler_flags $deplibs -Wl,-DLL,-IMPLIB:"$tool_output_objdir$libname.dll.lib"~linknames=' + archive_expsym_cmds_CXX='if test DEF = "`$SED -n -e '\''s/^[ ]*//'\'' -e '\''/^\(;.*\)*$/d'\'' -e '\''s/^\(EXPORTS\|LIBRARY\)\([ ].*\)*$/DEF/p'\'' -e q $export_symbols`" ; then + cp "$export_symbols" "$output_objdir/$soname.def"; + echo "$tool_output_objdir$soname.def" > "$output_objdir/$soname.exp"; + else + $SED -e '\''s/^/-link -EXPORT:/'\'' < $export_symbols > $output_objdir/$soname.exp; + fi~ + $CC -o $tool_output_objdir$soname $libobjs $compiler_flags $deplibs "@$tool_output_objdir$soname.exp" -Wl,-DLL,-IMPLIB:"$tool_output_objdir$libname.dll.lib"~ + linknames=' + # The linker will not automatically build a static lib if we build a DLL. + # _LT_TAGVAR(old_archive_from_new_cmds, CXX)='true' + enable_shared_with_static_runtimes_CXX=yes + # Don't use ranlib + old_postinstall_cmds_CXX='chmod 644 $oldlib' + postlink_cmds_CXX='lt_outputfile="@OUTPUT@"~ + lt_tool_outputfile="@TOOL_OUTPUT@"~ + case $lt_outputfile in + *.exe|*.EXE) ;; + *) + lt_outputfile=$lt_outputfile.exe + lt_tool_outputfile=$lt_tool_outputfile.exe + ;; + esac~ + func_to_tool_file "$lt_outputfile"~ + if test : != "$MANIFEST_TOOL" && test -f "$lt_outputfile.manifest"; then + $MANIFEST_TOOL -manifest "$lt_tool_outputfile.manifest" -outputresource:"$lt_tool_outputfile" || exit 1; + $RM "$lt_outputfile.manifest"; + fi' + ;; + *) + # g++ + # _LT_TAGVAR(hardcode_libdir_flag_spec, CXX) is actually meaningless, + # as there is no search path for DLLs. + hardcode_libdir_flag_spec_CXX='-L$libdir' + export_dynamic_flag_spec_CXX='$wl--export-all-symbols' + allow_undefined_flag_CXX=unsupported + always_export_symbols_CXX=no + enable_shared_with_static_runtimes_CXX=yes + + if $LD --help 2>&1 | $GREP 'auto-import' > /dev/null; then + archive_cmds_CXX='$CC -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $output_objdir/$soname $wl--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' + # If the export-symbols file already is a .def file, use it as + # is; otherwise, prepend EXPORTS... + archive_expsym_cmds_CXX='if test DEF = "`$SED -n -e '\''s/^[ ]*//'\'' -e '\''/^\(;.*\)*$/d'\'' -e '\''s/^\(EXPORTS\|LIBRARY\)\([ ].*\)*$/DEF/p'\'' -e q $export_symbols`" ; then + cp $export_symbols $output_objdir/$soname.def; + else + echo EXPORTS > $output_objdir/$soname.def; + cat $export_symbols >> $output_objdir/$soname.def; + fi~ + $CC -shared -nostdlib $output_objdir/$soname.def $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $output_objdir/$soname $wl--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' + else + ld_shlibs_CXX=no + fi + ;; + esac + ;; + darwin* | rhapsody*) + + + archive_cmds_need_lc_CXX=no + hardcode_direct_CXX=no + hardcode_automatic_CXX=yes + hardcode_shlibpath_var_CXX=unsupported + if test yes = "$lt_cv_ld_force_load"; then + whole_archive_flag_spec_CXX='`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience $wl-force_load,$conv\"; done; func_echo_all \"$new_convenience\"`' + + else + whole_archive_flag_spec_CXX='' + fi + link_all_deplibs_CXX=yes + allow_undefined_flag_CXX=$_lt_dar_allow_undefined + case $cc_basename in + ifort*|nagfor*) _lt_dar_can_shared=yes ;; + *) _lt_dar_can_shared=$GCC ;; + esac + if test yes = "$_lt_dar_can_shared"; then + output_verbose_link_cmd=func_echo_all + archive_cmds_CXX="\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring $_lt_dar_single_mod$_lt_dsymutil" + module_cmds_CXX="\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags$_lt_dsymutil" + archive_expsym_cmds_CXX="$SED 's|^|_|' < \$export_symbols > \$output_objdir/\$libname-symbols.expsym~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring $_lt_dar_single_mod$_lt_dar_export_syms$_lt_dsymutil" + module_expsym_cmds_CXX="$SED -e 's|^|_|' < \$export_symbols > \$output_objdir/\$libname-symbols.expsym~\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags$_lt_dar_export_syms$_lt_dsymutil" + if test yes != "$lt_cv_apple_cc_single_mod"; then + archive_cmds_CXX="\$CC -r -keep_private_externs -nostdlib -o \$lib-master.o \$libobjs~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$lib-master.o \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring$_lt_dsymutil" + archive_expsym_cmds_CXX="$SED 's|^|_|' < \$export_symbols > \$output_objdir/\$libname-symbols.expsym~\$CC -r -keep_private_externs -nostdlib -o \$lib-master.o \$libobjs~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$lib-master.o \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring$_lt_dar_export_syms$_lt_dsymutil" + fi + + else + ld_shlibs_CXX=no + fi + + ;; + + os2*) + hardcode_libdir_flag_spec_CXX='-L$libdir' + hardcode_minus_L_CXX=yes + allow_undefined_flag_CXX=unsupported + shrext_cmds=.dll + archive_cmds_CXX='$ECHO "LIBRARY ${soname%$shared_ext} INITINSTANCE TERMINSTANCE" > $output_objdir/$libname.def~ + $ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~ + $ECHO "DATA MULTIPLE NONSHARED" >> $output_objdir/$libname.def~ + $ECHO EXPORTS >> $output_objdir/$libname.def~ + emxexp $libobjs | $SED /"_DLL_InitTerm"/d >> $output_objdir/$libname.def~ + $CC -Zdll -Zcrtdll -o $output_objdir/$soname $libobjs $deplibs $compiler_flags $output_objdir/$libname.def~ + emximp -o $lib $output_objdir/$libname.def' + archive_expsym_cmds_CXX='$ECHO "LIBRARY ${soname%$shared_ext} INITINSTANCE TERMINSTANCE" > $output_objdir/$libname.def~ + $ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~ + $ECHO "DATA MULTIPLE NONSHARED" >> $output_objdir/$libname.def~ + $ECHO EXPORTS >> $output_objdir/$libname.def~ + prefix_cmds="$SED"~ + if test EXPORTS = "`$SED 1q $export_symbols`"; then + prefix_cmds="$prefix_cmds -e 1d"; + fi~ + prefix_cmds="$prefix_cmds -e \"s/^\(.*\)$/_\1/g\""~ + cat $export_symbols | $prefix_cmds >> $output_objdir/$libname.def~ + $CC -Zdll -Zcrtdll -o $output_objdir/$soname $libobjs $deplibs $compiler_flags $output_objdir/$libname.def~ + emximp -o $lib $output_objdir/$libname.def' + old_archive_From_new_cmds_CXX='emximp -o $output_objdir/${libname}_dll.a $output_objdir/$libname.def' + enable_shared_with_static_runtimes_CXX=yes + file_list_spec_CXX='@' + ;; + + dgux*) + case $cc_basename in + ec++*) + # FIXME: insert proper C++ library support + ld_shlibs_CXX=no + ;; + ghcx*) + # Green Hills C++ Compiler + # FIXME: insert proper C++ library support + ld_shlibs_CXX=no + ;; + *) + # FIXME: insert proper C++ library support + ld_shlibs_CXX=no + ;; + esac + ;; + + freebsd2.*) + # C++ shared libraries reported to be fairly broken before + # switch to ELF + ld_shlibs_CXX=no + ;; + + freebsd-elf*) + archive_cmds_need_lc_CXX=no + ;; + + freebsd* | dragonfly* | midnightbsd*) + # FreeBSD 3 and later use GNU C++ and GNU ld with standard ELF + # conventions + ld_shlibs_CXX=yes + ;; + + haiku*) + archive_cmds_CXX='$CC -shared $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + link_all_deplibs_CXX=yes + ;; + + hpux9*) + hardcode_libdir_flag_spec_CXX='$wl+b $wl$libdir' + hardcode_libdir_separator_CXX=: + export_dynamic_flag_spec_CXX='$wl-E' + hardcode_direct_CXX=yes + hardcode_minus_L_CXX=yes # Not in the search PATH, + # but as the default + # location of the library. + + case $cc_basename in + CC*) + # FIXME: insert proper C++ library support + ld_shlibs_CXX=no + ;; + aCC*) + archive_cmds_CXX='$RM $output_objdir/$soname~$CC -b $wl+b $wl$install_libdir -o $output_objdir/$soname $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~test "x$output_objdir/$soname" = "x$lib" || mv $output_objdir/$soname $lib' + # Commands to make compiler produce verbose output that lists + # what "hidden" libraries, object files and flags are used when + # linking a shared library. + # + # There doesn't appear to be a way to prevent this compiler from + # explicitly linking system object files so we need to strip them + # from the output so that they don't get included in the library + # dependencies. + output_verbose_link_cmd='templist=`($CC -b $CFLAGS -v conftest.$objext 2>&1) | $EGREP " \-L"`; list= ; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"' + ;; + *) + if test yes = "$GXX"; then + archive_cmds_CXX='$RM $output_objdir/$soname~$CC -shared -nostdlib $pic_flag $wl+b $wl$install_libdir -o $output_objdir/$soname $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~test "x$output_objdir/$soname" = "x$lib" || mv $output_objdir/$soname $lib' + else + # FIXME: insert proper C++ library support + ld_shlibs_CXX=no + fi + ;; + esac + ;; + + hpux10*|hpux11*) + if test no = "$with_gnu_ld"; then + hardcode_libdir_flag_spec_CXX='$wl+b $wl$libdir' + hardcode_libdir_separator_CXX=: + + case $host_cpu in + hppa*64*|ia64*) + ;; + *) + export_dynamic_flag_spec_CXX='$wl-E' + ;; + esac + fi + case $host_cpu in + hppa*64*|ia64*) + hardcode_direct_CXX=no + hardcode_shlibpath_var_CXX=no + ;; + *) + hardcode_direct_CXX=yes + hardcode_direct_absolute_CXX=yes + hardcode_minus_L_CXX=yes # Not in the search PATH, + # but as the default + # location of the library. + ;; + esac + + case $cc_basename in + CC*) + # FIXME: insert proper C++ library support + ld_shlibs_CXX=no + ;; + aCC*) + case $host_cpu in + hppa*64*) + archive_cmds_CXX='$CC -b $wl+h $wl$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' + ;; + ia64*) + archive_cmds_CXX='$CC -b $wl+h $wl$soname $wl+nodefaultrpath -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' + ;; + *) + archive_cmds_CXX='$CC -b $wl+h $wl$soname $wl+b $wl$install_libdir -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' + ;; + esac + # Commands to make compiler produce verbose output that lists + # what "hidden" libraries, object files and flags are used when + # linking a shared library. + # + # There doesn't appear to be a way to prevent this compiler from + # explicitly linking system object files so we need to strip them + # from the output so that they don't get included in the library + # dependencies. + output_verbose_link_cmd='templist=`($CC -b $CFLAGS -v conftest.$objext 2>&1) | $GREP " \-L"`; list= ; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"' + ;; + *) + if test yes = "$GXX"; then + if test no = "$with_gnu_ld"; then + case $host_cpu in + hppa*64*) + archive_cmds_CXX='$CC -shared -nostdlib -fPIC $wl+h $wl$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' + ;; + ia64*) + archive_cmds_CXX='$CC -shared -nostdlib $pic_flag $wl+h $wl$soname $wl+nodefaultrpath -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' + ;; + *) + archive_cmds_CXX='$CC -shared -nostdlib $pic_flag $wl+h $wl$soname $wl+b $wl$install_libdir -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' + ;; + esac + fi + else + # FIXME: insert proper C++ library support + ld_shlibs_CXX=no + fi + ;; + esac + ;; + + interix[3-9]*) + hardcode_direct_CXX=no + hardcode_shlibpath_var_CXX=no + hardcode_libdir_flag_spec_CXX='$wl-rpath,$libdir' + export_dynamic_flag_spec_CXX='$wl-E' + # Hack: On Interix 3.x, we cannot compile PIC because of a broken gcc. + # Instead, shared libraries are loaded at an image base (0x10000000 by + # default) and relocated if they conflict, which is a slow very memory + # consuming and fragmenting process. To avoid this, we pick a random, + # 256 KiB-aligned image base between 0x50000000 and 0x6FFC0000 at link + # time. Moving up from 0x10000000 also allows more sbrk(2) space. + archive_cmds_CXX='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-h,$soname $wl--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' + archive_expsym_cmds_CXX='$SED "s|^|_|" $export_symbols >$output_objdir/$soname.expsym~$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-h,$soname $wl--retain-symbols-file,$output_objdir/$soname.expsym $wl--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' + ;; + irix5* | irix6*) + case $cc_basename in + CC*) + # SGI C++ + archive_cmds_CXX='$CC -shared -all -multigot $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib' + + # Archives containing C++ object files must be created using + # "CC -ar", where "CC" is the IRIX C++ compiler. This is + # necessary to make sure instantiated templates are included + # in the archive. + old_archive_cmds_CXX='$CC -ar -WR,-u -o $oldlib $oldobjs' + ;; + *) + if test yes = "$GXX"; then + if test no = "$with_gnu_ld"; then + archive_cmds_CXX='$CC -shared $pic_flag -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` $wl-update_registry $wl$output_objdir/so_locations -o $lib' + else + archive_cmds_CXX='$CC -shared $pic_flag -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` -o $lib' + fi + fi + link_all_deplibs_CXX=yes + ;; + esac + hardcode_libdir_flag_spec_CXX='$wl-rpath $wl$libdir' + hardcode_libdir_separator_CXX=: + inherit_rpath_CXX=yes + ;; + + linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*) + case $cc_basename in + KCC*) + # Kuck and Associates, Inc. (KAI) C++ Compiler + + # KCC will only create a shared library if the output file + # ends with ".so" (or ".sl" for HP-UX), so rename the library + # to its proper name (with version) after linking. + archive_cmds_CXX='tempext=`echo $shared_ext | $SED -e '\''s/\([^()0-9A-Za-z{}]\)/\\\\\1/g'\''`; templib=`echo $lib | $SED -e "s/\$tempext\..*/.so/"`; $CC $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags --soname $soname -o \$templib; mv \$templib $lib' + archive_expsym_cmds_CXX='tempext=`echo $shared_ext | $SED -e '\''s/\([^()0-9A-Za-z{}]\)/\\\\\1/g'\''`; templib=`echo $lib | $SED -e "s/\$tempext\..*/.so/"`; $CC $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags --soname $soname -o \$templib $wl-retain-symbols-file,$export_symbols; mv \$templib $lib' + # Commands to make compiler produce verbose output that lists + # what "hidden" libraries, object files and flags are used when + # linking a shared library. + # + # There doesn't appear to be a way to prevent this compiler from + # explicitly linking system object files so we need to strip them + # from the output so that they don't get included in the library + # dependencies. + output_verbose_link_cmd='templist=`$CC $CFLAGS -v conftest.$objext -o libconftest$shared_ext 2>&1 | $GREP "ld"`; rm -f libconftest$shared_ext; list= ; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"' + + hardcode_libdir_flag_spec_CXX='$wl-rpath,$libdir' + export_dynamic_flag_spec_CXX='$wl--export-dynamic' + + # Archives containing C++ object files must be created using + # "CC -Bstatic", where "CC" is the KAI C++ compiler. + old_archive_cmds_CXX='$CC -Bstatic -o $oldlib $oldobjs' + ;; + icpc* | ecpc* ) + # Intel C++ + with_gnu_ld=yes + # version 8.0 and above of icpc choke on multiply defined symbols + # if we add $predep_objects and $postdep_objects, however 7.1 and + # earlier do not add the objects themselves. + case `$CC -V 2>&1` in + *"Version 7."*) + archive_cmds_CXX='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname -o $lib' + archive_expsym_cmds_CXX='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' + ;; + *) # Version 8.0 or newer + tmp_idyn= + case $host_cpu in + ia64*) tmp_idyn=' -i_dynamic';; + esac + archive_cmds_CXX='$CC -shared'"$tmp_idyn"' $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + archive_expsym_cmds_CXX='$CC -shared'"$tmp_idyn"' $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' + ;; + esac + archive_cmds_need_lc_CXX=no + hardcode_libdir_flag_spec_CXX='$wl-rpath,$libdir' + export_dynamic_flag_spec_CXX='$wl--export-dynamic' + whole_archive_flag_spec_CXX='$wl--whole-archive$convenience $wl--no-whole-archive' + ;; + pgCC* | pgcpp*) + # Portland Group C++ compiler + case `$CC -V` in + *pgCC\ [1-5].* | *pgcpp\ [1-5].*) + prelink_cmds_CXX='tpldir=Template.dir~ + rm -rf $tpldir~ + $CC --prelink_objects --instantiation_dir $tpldir $objs $libobjs $compile_deplibs~ + compile_command="$compile_command `find $tpldir -name \*.o | sort | $NL2SP`"' + old_archive_cmds_CXX='tpldir=Template.dir~ + rm -rf $tpldir~ + $CC --prelink_objects --instantiation_dir $tpldir $oldobjs$old_deplibs~ + $AR $AR_FLAGS $oldlib$oldobjs$old_deplibs `find $tpldir -name \*.o | sort | $NL2SP`~ + $RANLIB $oldlib' + archive_cmds_CXX='tpldir=Template.dir~ + rm -rf $tpldir~ + $CC --prelink_objects --instantiation_dir $tpldir $predep_objects $libobjs $deplibs $convenience $postdep_objects~ + $CC -shared $pic_flag $predep_objects $libobjs $deplibs `find $tpldir -name \*.o | sort | $NL2SP` $postdep_objects $compiler_flags $wl-soname $wl$soname -o $lib' + archive_expsym_cmds_CXX='tpldir=Template.dir~ + rm -rf $tpldir~ + $CC --prelink_objects --instantiation_dir $tpldir $predep_objects $libobjs $deplibs $convenience $postdep_objects~ + $CC -shared $pic_flag $predep_objects $libobjs $deplibs `find $tpldir -name \*.o | sort | $NL2SP` $postdep_objects $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' + ;; + *) # Version 6 and above use weak symbols + archive_cmds_CXX='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname -o $lib' + archive_expsym_cmds_CXX='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' + ;; + esac + + hardcode_libdir_flag_spec_CXX='$wl--rpath $wl$libdir' + export_dynamic_flag_spec_CXX='$wl--export-dynamic' + whole_archive_flag_spec_CXX='$wl--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive' + ;; + cxx*) + # Compaq C++ + archive_cmds_CXX='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname -o $lib' + archive_expsym_cmds_CXX='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname -o $lib $wl-retain-symbols-file $wl$export_symbols' + + runpath_var=LD_RUN_PATH + hardcode_libdir_flag_spec_CXX='-rpath $libdir' + hardcode_libdir_separator_CXX=: + + # Commands to make compiler produce verbose output that lists + # what "hidden" libraries, object files and flags are used when + # linking a shared library. + # + # There doesn't appear to be a way to prevent this compiler from + # explicitly linking system object files so we need to strip them + # from the output so that they don't get included in the library + # dependencies. + output_verbose_link_cmd='templist=`$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP "ld"`; templist=`func_echo_all "$templist" | $SED "s/\(^.*ld.*\)\( .*ld .*$\)/\1/"`; list= ; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "X$list" | $Xsed' + ;; + xl* | mpixl* | bgxl*) + # IBM XL 8.0 on PPC, with GNU ld + hardcode_libdir_flag_spec_CXX='$wl-rpath $wl$libdir' + export_dynamic_flag_spec_CXX='$wl--export-dynamic' + archive_cmds_CXX='$CC -qmkshrobj $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + if test yes = "$supports_anon_versioning"; then + archive_expsym_cmds_CXX='echo "{ global:" > $output_objdir/$libname.ver~ + cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~ + echo "local: *; };" >> $output_objdir/$libname.ver~ + $CC -qmkshrobj $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-version-script $wl$output_objdir/$libname.ver -o $lib' + fi + ;; + *) + case `$CC -V 2>&1 | $SED 5q` in + *Sun\ C*) + # Sun C++ 5.9 + no_undefined_flag_CXX=' -zdefs' + archive_cmds_CXX='$CC -G$allow_undefined_flag -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' + archive_expsym_cmds_CXX='$CC -G$allow_undefined_flag -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-retain-symbols-file $wl$export_symbols' + hardcode_libdir_flag_spec_CXX='-R$libdir' + whole_archive_flag_spec_CXX='$wl--whole-archive`new_convenience=; for conv in $convenience\"\"; do test -z \"$conv\" || new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive' + compiler_needs_object_CXX=yes + + # Not sure whether something based on + # $CC $CFLAGS -v conftest.$objext -o libconftest$shared_ext 2>&1 + # would be better. + output_verbose_link_cmd='func_echo_all' + + # Archives containing C++ object files must be created using + # "CC -xar", where "CC" is the Sun C++ compiler. This is + # necessary to make sure instantiated templates are included + # in the archive. + old_archive_cmds_CXX='$CC -xar -o $oldlib $oldobjs' + ;; + esac + ;; + esac + ;; + + lynxos*) + # FIXME: insert proper C++ library support + ld_shlibs_CXX=no + ;; + + m88k*) + # FIXME: insert proper C++ library support + ld_shlibs_CXX=no + ;; + + mvs*) + case $cc_basename in + cxx*) + # FIXME: insert proper C++ library support + ld_shlibs_CXX=no + ;; + *) + # FIXME: insert proper C++ library support + ld_shlibs_CXX=no + ;; + esac + ;; + + netbsd*) + if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then + archive_cmds_CXX='$LD -Bshareable -o $lib $predep_objects $libobjs $deplibs $postdep_objects $linker_flags' + wlarc= + hardcode_libdir_flag_spec_CXX='-R$libdir' + hardcode_direct_CXX=yes + hardcode_shlibpath_var_CXX=no + fi + # Workaround some broken pre-1.5 toolchains + output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP conftest.$objext | $SED -e "s:-lgcc -lc -lgcc::"' + ;; + + *nto* | *qnx*) + ld_shlibs_CXX=yes + ;; + + openbsd* | bitrig*) + if test -f /usr/libexec/ld.so; then + hardcode_direct_CXX=yes + hardcode_shlibpath_var_CXX=no + hardcode_direct_absolute_CXX=yes + archive_cmds_CXX='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $lib' + hardcode_libdir_flag_spec_CXX='$wl-rpath,$libdir' + if test -z "`echo __ELF__ | $CC -E - | grep __ELF__`"; then + archive_expsym_cmds_CXX='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-retain-symbols-file,$export_symbols -o $lib' + export_dynamic_flag_spec_CXX='$wl-E' + whole_archive_flag_spec_CXX=$wlarc'--whole-archive$convenience '$wlarc'--no-whole-archive' + fi + output_verbose_link_cmd=func_echo_all + else + ld_shlibs_CXX=no + fi + ;; + + osf3* | osf4* | osf5*) + case $cc_basename in + KCC*) + # Kuck and Associates, Inc. (KAI) C++ Compiler + + # KCC will only create a shared library if the output file + # ends with ".so" (or ".sl" for HP-UX), so rename the library + # to its proper name (with version) after linking. + archive_cmds_CXX='tempext=`echo $shared_ext | $SED -e '\''s/\([^()0-9A-Za-z{}]\)/\\\\\1/g'\''`; templib=`echo "$lib" | $SED -e "s/\$tempext\..*/.so/"`; $CC $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags --soname $soname -o \$templib; mv \$templib $lib' + + hardcode_libdir_flag_spec_CXX='$wl-rpath,$libdir' + hardcode_libdir_separator_CXX=: + + # Archives containing C++ object files must be created using + # the KAI C++ compiler. + case $host in + osf3*) old_archive_cmds_CXX='$CC -Bstatic -o $oldlib $oldobjs' ;; + *) old_archive_cmds_CXX='$CC -o $oldlib $oldobjs' ;; + esac + ;; + RCC*) + # Rational C++ 2.4.1 + # FIXME: insert proper C++ library support + ld_shlibs_CXX=no + ;; + cxx*) + case $host in + osf3*) + allow_undefined_flag_CXX=' $wl-expect_unresolved $wl\*' + archive_cmds_CXX='$CC -shared$allow_undefined_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $soname `test -n "$verstring" && func_echo_all "$wl-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib' + hardcode_libdir_flag_spec_CXX='$wl-rpath $wl$libdir' + ;; + *) + allow_undefined_flag_CXX=' -expect_unresolved \*' + archive_cmds_CXX='$CC -shared$allow_undefined_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -msym -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib' + archive_expsym_cmds_CXX='for i in `cat $export_symbols`; do printf "%s %s\\n" -exported_symbol "\$i" >> $lib.exp; done~ + echo "-hidden">> $lib.exp~ + $CC -shared$allow_undefined_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -msym -soname $soname $wl-input $wl$lib.exp `test -n "$verstring" && $ECHO "-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib~ + $RM $lib.exp' + hardcode_libdir_flag_spec_CXX='-rpath $libdir' + ;; + esac + + hardcode_libdir_separator_CXX=: + + # Commands to make compiler produce verbose output that lists + # what "hidden" libraries, object files and flags are used when + # linking a shared library. + # + # There doesn't appear to be a way to prevent this compiler from + # explicitly linking system object files so we need to strip them + # from the output so that they don't get included in the library + # dependencies. + output_verbose_link_cmd='templist=`$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP "ld" | $GREP -v "ld:"`; templist=`func_echo_all "$templist" | $SED "s/\(^.*ld.*\)\( .*ld.*$\)/\1/"`; list= ; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"' + ;; + *) + if test yes,no = "$GXX,$with_gnu_ld"; then + allow_undefined_flag_CXX=' $wl-expect_unresolved $wl\*' + case $host in + osf3*) + archive_cmds_CXX='$CC -shared -nostdlib $allow_undefined_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` $wl-update_registry $wl$output_objdir/so_locations -o $lib' + ;; + *) + archive_cmds_CXX='$CC -shared $pic_flag -nostdlib $allow_undefined_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-msym $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` $wl-update_registry $wl$output_objdir/so_locations -o $lib' + ;; + esac + + hardcode_libdir_flag_spec_CXX='$wl-rpath $wl$libdir' + hardcode_libdir_separator_CXX=: + + # Commands to make compiler produce verbose output that lists + # what "hidden" libraries, object files and flags are used when + # linking a shared library. + output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP " \-L"' + + else + # FIXME: insert proper C++ library support + ld_shlibs_CXX=no + fi + ;; + esac + ;; + + psos*) + # FIXME: insert proper C++ library support + ld_shlibs_CXX=no + ;; + + sunos4*) + case $cc_basename in + CC*) + # Sun C++ 4.x + # FIXME: insert proper C++ library support + ld_shlibs_CXX=no + ;; + lcc*) + # Lucid + # FIXME: insert proper C++ library support + ld_shlibs_CXX=no + ;; + *) + # FIXME: insert proper C++ library support + ld_shlibs_CXX=no + ;; + esac + ;; + + solaris*) + case $cc_basename in + CC* | sunCC*) + # Sun C++ 4.2, 5.x and Centerline C++ + archive_cmds_need_lc_CXX=yes + no_undefined_flag_CXX=' -zdefs' + archive_cmds_CXX='$CC -G$allow_undefined_flag -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' + archive_expsym_cmds_CXX='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ + $CC -G$allow_undefined_flag $wl-M $wl$lib.exp -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~$RM $lib.exp' + + hardcode_libdir_flag_spec_CXX='-R$libdir' + hardcode_shlibpath_var_CXX=no + case $host_os in + solaris2.[0-5] | solaris2.[0-5].*) ;; + *) + # The compiler driver will combine and reorder linker options, + # but understands '-z linker_flag'. + # Supported since Solaris 2.6 (maybe 2.5.1?) + whole_archive_flag_spec_CXX='-z allextract$convenience -z defaultextract' + ;; + esac + link_all_deplibs_CXX=yes + + output_verbose_link_cmd='func_echo_all' + + # Archives containing C++ object files must be created using + # "CC -xar", where "CC" is the Sun C++ compiler. This is + # necessary to make sure instantiated templates are included + # in the archive. + old_archive_cmds_CXX='$CC -xar -o $oldlib $oldobjs' + ;; + gcx*) + # Green Hills C++ Compiler + archive_cmds_CXX='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-h $wl$soname -o $lib' + + # The C++ compiler must be used to create the archive. + old_archive_cmds_CXX='$CC $LDFLAGS -archive -o $oldlib $oldobjs' + ;; + *) + # GNU C++ compiler with Solaris linker + if test yes,no = "$GXX,$with_gnu_ld"; then + no_undefined_flag_CXX=' $wl-z ${wl}defs' + if $CC --version | $GREP -v '^2\.7' > /dev/null; then + archive_cmds_CXX='$CC -shared $pic_flag -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-h $wl$soname -o $lib' + archive_expsym_cmds_CXX='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ + $CC -shared $pic_flag -nostdlib $wl-M $wl$lib.exp $wl-h $wl$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~$RM $lib.exp' + + # Commands to make compiler produce verbose output that lists + # what "hidden" libraries, object files and flags are used when + # linking a shared library. + output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP " \-L"' + else + # g++ 2.7 appears to require '-G' NOT '-shared' on this + # platform. + archive_cmds_CXX='$CC -G -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-h $wl$soname -o $lib' + archive_expsym_cmds_CXX='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ + $CC -G -nostdlib $wl-M $wl$lib.exp $wl-h $wl$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~$RM $lib.exp' + + # Commands to make compiler produce verbose output that lists + # what "hidden" libraries, object files and flags are used when + # linking a shared library. + output_verbose_link_cmd='$CC -G $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP " \-L"' + fi + + hardcode_libdir_flag_spec_CXX='$wl-R $wl$libdir' + case $host_os in + solaris2.[0-5] | solaris2.[0-5].*) ;; + *) + whole_archive_flag_spec_CXX='$wl-z ${wl}allextract$convenience $wl-z ${wl}defaultextract' + ;; + esac + fi + ;; + esac + ;; + + sysv4*uw2* | sysv5OpenUNIX* | sysv5UnixWare7.[01].[10]* | unixware7* | sco3.2v5.0.[024]*) + no_undefined_flag_CXX='$wl-z,text' + archive_cmds_need_lc_CXX=no + hardcode_shlibpath_var_CXX=no + runpath_var='LD_RUN_PATH' + + case $cc_basename in + CC*) + archive_cmds_CXX='$CC -G $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + archive_expsym_cmds_CXX='$CC -G $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + ;; + *) + archive_cmds_CXX='$CC -shared $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + archive_expsym_cmds_CXX='$CC -shared $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + ;; + esac + ;; + + sysv5* | sco3.2v5* | sco5v6*) + # Note: We CANNOT use -z defs as we might desire, because we do not + # link with -lc, and that would cause any symbols used from libc to + # always be unresolved, which means just about no library would + # ever link correctly. If we're not using GNU ld we use -z text + # though, which does catch some bad symbols but isn't as heavy-handed + # as -z defs. + no_undefined_flag_CXX='$wl-z,text' + allow_undefined_flag_CXX='$wl-z,nodefs' + archive_cmds_need_lc_CXX=no + hardcode_shlibpath_var_CXX=no + hardcode_libdir_flag_spec_CXX='$wl-R,$libdir' + hardcode_libdir_separator_CXX=':' + link_all_deplibs_CXX=yes + export_dynamic_flag_spec_CXX='$wl-Bexport' + runpath_var='LD_RUN_PATH' + + case $cc_basename in + CC*) + archive_cmds_CXX='$CC -G $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + archive_expsym_cmds_CXX='$CC -G $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + old_archive_cmds_CXX='$CC -Tprelink_objects $oldobjs~ + '"$old_archive_cmds_CXX" + reload_cmds_CXX='$CC -Tprelink_objects $reload_objs~ + '"$reload_cmds_CXX" + ;; + *) + archive_cmds_CXX='$CC -shared $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + archive_expsym_cmds_CXX='$CC -shared $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + ;; + esac + ;; + + tandem*) + case $cc_basename in + NCC*) + # NonStop-UX NCC 3.20 + # FIXME: insert proper C++ library support + ld_shlibs_CXX=no + ;; + *) + # FIXME: insert proper C++ library support + ld_shlibs_CXX=no + ;; + esac + ;; + + vxworks*) + # FIXME: insert proper C++ library support + ld_shlibs_CXX=no + ;; + + *) + # FIXME: insert proper C++ library support + ld_shlibs_CXX=no + ;; + esac + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ld_shlibs_CXX" >&5 +printf "%s\n" "$ld_shlibs_CXX" >&6; } + test no = "$ld_shlibs_CXX" && can_build_shared=no + + GCC_CXX=$GXX + LD_CXX=$LD + + ## CAVEAT EMPTOR: + ## There is no encapsulation within the following macros, do not change + ## the running order or otherwise move them around unless you know exactly + ## what you are doing... + # Dependencies to place before and after the object being linked: +predep_objects_CXX= +postdep_objects_CXX= +predeps_CXX= +postdeps_CXX= +compiler_lib_search_path_CXX= + +cat > conftest.$ac_ext <<_LT_EOF +class Foo +{ +public: + Foo (void) { a = 0; } +private: + int a; +}; +_LT_EOF + + +_lt_libdeps_save_CFLAGS=$CFLAGS +case "$CC $CFLAGS " in #( +*\ -flto*\ *) CFLAGS="$CFLAGS -fno-lto" ;; +*\ -fwhopr*\ *) CFLAGS="$CFLAGS -fno-whopr" ;; +*\ -fuse-linker-plugin*\ *) CFLAGS="$CFLAGS -fno-use-linker-plugin" ;; +esac + +if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + # Parse the compiler output and extract the necessary + # objects, libraries and library flags. + + # Sentinel used to keep track of whether or not we are before + # the conftest object file. + pre_test_object_deps_done=no + + for p in `eval "$output_verbose_link_cmd"`; do + case $prev$p in + + -L* | -R* | -l*) + # Some compilers place space between "-{L,R}" and the path. + # Remove the space. + if test x-L = "$p" || + test x-R = "$p"; then + prev=$p + continue + fi + + # Expand the sysroot to ease extracting the directories later. + if test -z "$prev"; then + case $p in + -L*) func_stripname_cnf '-L' '' "$p"; prev=-L; p=$func_stripname_result ;; + -R*) func_stripname_cnf '-R' '' "$p"; prev=-R; p=$func_stripname_result ;; + -l*) func_stripname_cnf '-l' '' "$p"; prev=-l; p=$func_stripname_result ;; + esac + fi + case $p in + =*) func_stripname_cnf '=' '' "$p"; p=$lt_sysroot$func_stripname_result ;; + esac + if test no = "$pre_test_object_deps_done"; then + case $prev in + -L | -R) + # Internal compiler library paths should come after those + # provided the user. The postdeps already come after the + # user supplied libs so there is no need to process them. + if test -z "$compiler_lib_search_path_CXX"; then + compiler_lib_search_path_CXX=$prev$p + else + compiler_lib_search_path_CXX="${compiler_lib_search_path_CXX} $prev$p" + fi + ;; + # The "-l" case would never come before the object being + # linked, so don't bother handling this case. + esac + else + if test -z "$postdeps_CXX"; then + postdeps_CXX=$prev$p + else + postdeps_CXX="${postdeps_CXX} $prev$p" + fi + fi + prev= + ;; + + *.lto.$objext) ;; # Ignore GCC LTO objects + *.$objext) + # This assumes that the test object file only shows up + # once in the compiler output. + if test "$p" = "conftest.$objext"; then + pre_test_object_deps_done=yes + continue + fi + + if test no = "$pre_test_object_deps_done"; then + if test -z "$predep_objects_CXX"; then + predep_objects_CXX=$p + else + predep_objects_CXX="$predep_objects_CXX $p" + fi + else + if test -z "$postdep_objects_CXX"; then + postdep_objects_CXX=$p + else + postdep_objects_CXX="$postdep_objects_CXX $p" + fi + fi + ;; + + *) ;; # Ignore the rest. + + esac + done + + # Clean up. + rm -f a.out a.exe +else + echo "libtool.m4: error: problem compiling CXX test program" +fi + +$RM -f confest.$objext +CFLAGS=$_lt_libdeps_save_CFLAGS + +# PORTME: override above test on systems where it is broken +case $host_os in +interix[3-9]*) + # Interix 3.5 installs completely hosed .la files for C++, so rather than + # hack all around it, let's just trust "g++" to DTRT. + predep_objects_CXX= + postdep_objects_CXX= + postdeps_CXX= + ;; +esac + + +case " $postdeps_CXX " in +*" -lc "*) archive_cmds_need_lc_CXX=no ;; +esac + compiler_lib_search_dirs_CXX= +if test -n "${compiler_lib_search_path_CXX}"; then + compiler_lib_search_dirs_CXX=`echo " ${compiler_lib_search_path_CXX}" | $SED -e 's! -L! !g' -e 's!^ !!'` +fi + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + lt_prog_compiler_wl_CXX= +lt_prog_compiler_pic_CXX= +lt_prog_compiler_static_CXX= + + + # C++ specific cases for pic, static, wl, etc. + if test yes = "$GXX"; then + lt_prog_compiler_wl_CXX='-Wl,' + lt_prog_compiler_static_CXX='-static' + + case $host_os in + aix*) + # All AIX code is PIC. + if test ia64 = "$host_cpu"; then + # AIX 5 now supports IA64 processor + lt_prog_compiler_static_CXX='-Bstatic' + fi + lt_prog_compiler_pic_CXX='-fPIC' + ;; + + amigaos*) + case $host_cpu in + powerpc) + # see comment about AmigaOS4 .so support + lt_prog_compiler_pic_CXX='-fPIC' + ;; + m68k) + # FIXME: we need at least 68020 code to build shared libraries, but + # adding the '-m68020' flag to GCC prevents building anything better, + # like '-m68040'. + lt_prog_compiler_pic_CXX='-m68020 -resident32 -malways-restore-a4' + ;; + esac + ;; + + beos* | irix5* | irix6* | nonstopux* | osf3* | osf4* | osf5*) + # PIC is the default for these OSes. + ;; + mingw* | cygwin* | os2* | pw32* | cegcc*) + # This hack is so that the source file can tell whether it is being + # built for inclusion in a dll (and should export symbols for example). + # Although the cygwin gcc ignores -fPIC, still need this for old-style + # (--disable-auto-import) libraries + lt_prog_compiler_pic_CXX='-DDLL_EXPORT' + case $host_os in + os2*) + lt_prog_compiler_static_CXX='$wl-static' + ;; + esac + ;; + darwin* | rhapsody*) + # PIC is the default on this platform + # Common symbols not allowed in MH_DYLIB files + lt_prog_compiler_pic_CXX='-fno-common' + ;; + *djgpp*) + # DJGPP does not support shared libraries at all + lt_prog_compiler_pic_CXX= + ;; + haiku*) + # PIC is the default for Haiku. + # The "-static" flag exists, but is broken. + lt_prog_compiler_static_CXX= + ;; + interix[3-9]*) + # Interix 3.x gcc -fpic/-fPIC options generate broken code. + # Instead, we relocate shared libraries at runtime. + ;; + sysv4*MP*) + if test -d /usr/nec; then + lt_prog_compiler_pic_CXX=-Kconform_pic + fi + ;; + hpux*) + # PIC is the default for 64-bit PA HP-UX, but not for 32-bit + # PA HP-UX. On IA64 HP-UX, PIC is the default but the pic flag + # sets the default TLS model and affects inlining. + case $host_cpu in + hppa*64*) + ;; + *) + lt_prog_compiler_pic_CXX='-fPIC' + ;; + esac + ;; + *qnx* | *nto*) + # QNX uses GNU C++, but need to define -shared option too, otherwise + # it will coredump. + lt_prog_compiler_pic_CXX='-fPIC -shared' + ;; + *) + lt_prog_compiler_pic_CXX='-fPIC' + ;; + esac + else + case $host_os in + aix[4-9]*) + # All AIX code is PIC. + if test ia64 = "$host_cpu"; then + # AIX 5 now supports IA64 processor + lt_prog_compiler_static_CXX='-Bstatic' + else + lt_prog_compiler_static_CXX='-bnso -bI:/lib/syscalls.exp' + fi + ;; + chorus*) + case $cc_basename in + cxch68*) + # Green Hills C++ Compiler + # _LT_TAGVAR(lt_prog_compiler_static, CXX)="--no_auto_instantiation -u __main -u __premain -u _abort -r $COOL_DIR/lib/libOrb.a $MVME_DIR/lib/CC/libC.a $MVME_DIR/lib/classix/libcx.s.a" + ;; + esac + ;; + mingw* | cygwin* | os2* | pw32* | cegcc*) + # This hack is so that the source file can tell whether it is being + # built for inclusion in a dll (and should export symbols for example). + lt_prog_compiler_pic_CXX='-DDLL_EXPORT' + ;; + dgux*) + case $cc_basename in + ec++*) + lt_prog_compiler_pic_CXX='-KPIC' + ;; + ghcx*) + # Green Hills C++ Compiler + lt_prog_compiler_pic_CXX='-pic' + ;; + *) + ;; + esac + ;; + freebsd* | dragonfly* | midnightbsd*) + # FreeBSD uses GNU C++ + ;; + hpux9* | hpux10* | hpux11*) + case $cc_basename in + CC*) + lt_prog_compiler_wl_CXX='-Wl,' + lt_prog_compiler_static_CXX='$wl-a ${wl}archive' + if test ia64 != "$host_cpu"; then + lt_prog_compiler_pic_CXX='+Z' + fi + ;; + aCC*) + lt_prog_compiler_wl_CXX='-Wl,' + lt_prog_compiler_static_CXX='$wl-a ${wl}archive' + case $host_cpu in + hppa*64*|ia64*) + # +Z the default + ;; + *) + lt_prog_compiler_pic_CXX='+Z' + ;; + esac + ;; + *) + ;; + esac + ;; + interix*) + # This is c89, which is MS Visual C++ (no shared libs) + # Anyone wants to do a port? + ;; + irix5* | irix6* | nonstopux*) + case $cc_basename in + CC*) + lt_prog_compiler_wl_CXX='-Wl,' + lt_prog_compiler_static_CXX='-non_shared' + # CC pic flag -KPIC is the default. + ;; + *) + ;; + esac + ;; + linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*) + case $cc_basename in + KCC*) + # KAI C++ Compiler + lt_prog_compiler_wl_CXX='--backend -Wl,' + lt_prog_compiler_pic_CXX='-fPIC' + ;; + ecpc* ) + # old Intel C++ for x86_64, which still supported -KPIC. + lt_prog_compiler_wl_CXX='-Wl,' + lt_prog_compiler_pic_CXX='-KPIC' + lt_prog_compiler_static_CXX='-static' + ;; + icpc* ) + # Intel C++, used to be incompatible with GCC. + # ICC 10 doesn't accept -KPIC any more. + lt_prog_compiler_wl_CXX='-Wl,' + lt_prog_compiler_pic_CXX='-fPIC' + lt_prog_compiler_static_CXX='-static' + ;; + pgCC* | pgcpp*) + # Portland Group C++ compiler + lt_prog_compiler_wl_CXX='-Wl,' + lt_prog_compiler_pic_CXX='-fpic' + lt_prog_compiler_static_CXX='-Bstatic' + ;; + cxx*) + # Compaq C++ + # Make sure the PIC flag is empty. It appears that all Alpha + # Linux and Compaq Tru64 Unix objects are PIC. + lt_prog_compiler_pic_CXX= + lt_prog_compiler_static_CXX='-non_shared' + ;; + xlc* | xlC* | bgxl[cC]* | mpixl[cC]*) + # IBM XL 8.0, 9.0 on PPC and BlueGene + lt_prog_compiler_wl_CXX='-Wl,' + lt_prog_compiler_pic_CXX='-qpic' + lt_prog_compiler_static_CXX='-qstaticlink' + ;; + *) + case `$CC -V 2>&1 | $SED 5q` in + *Sun\ C*) + # Sun C++ 5.9 + lt_prog_compiler_pic_CXX='-KPIC' + lt_prog_compiler_static_CXX='-Bstatic' + lt_prog_compiler_wl_CXX='-Qoption ld ' + ;; + esac + ;; + esac + ;; + lynxos*) + ;; + m88k*) + ;; + mvs*) + case $cc_basename in + cxx*) + lt_prog_compiler_pic_CXX='-W c,exportall' + ;; + *) + ;; + esac + ;; + netbsd* | netbsdelf*-gnu) + ;; + *qnx* | *nto*) + # QNX uses GNU C++, but need to define -shared option too, otherwise + # it will coredump. + lt_prog_compiler_pic_CXX='-fPIC -shared' + ;; + osf3* | osf4* | osf5*) + case $cc_basename in + KCC*) + lt_prog_compiler_wl_CXX='--backend -Wl,' + ;; + RCC*) + # Rational C++ 2.4.1 + lt_prog_compiler_pic_CXX='-pic' + ;; + cxx*) + # Digital/Compaq C++ + lt_prog_compiler_wl_CXX='-Wl,' + # Make sure the PIC flag is empty. It appears that all Alpha + # Linux and Compaq Tru64 Unix objects are PIC. + lt_prog_compiler_pic_CXX= + lt_prog_compiler_static_CXX='-non_shared' + ;; + *) + ;; + esac + ;; + psos*) + ;; + solaris*) + case $cc_basename in + CC* | sunCC*) + # Sun C++ 4.2, 5.x and Centerline C++ + lt_prog_compiler_pic_CXX='-KPIC' + lt_prog_compiler_static_CXX='-Bstatic' + lt_prog_compiler_wl_CXX='-Qoption ld ' + ;; + gcx*) + # Green Hills C++ Compiler + lt_prog_compiler_pic_CXX='-PIC' + ;; + *) + ;; + esac + ;; + sunos4*) + case $cc_basename in + CC*) + # Sun C++ 4.x + lt_prog_compiler_pic_CXX='-pic' + lt_prog_compiler_static_CXX='-Bstatic' + ;; + lcc*) + # Lucid + lt_prog_compiler_pic_CXX='-pic' + ;; + *) + ;; + esac + ;; + sysv5* | unixware* | sco3.2v5* | sco5v6* | OpenUNIX*) + case $cc_basename in + CC*) + lt_prog_compiler_wl_CXX='-Wl,' + lt_prog_compiler_pic_CXX='-KPIC' + lt_prog_compiler_static_CXX='-Bstatic' + ;; + esac + ;; + tandem*) + case $cc_basename in + NCC*) + # NonStop-UX NCC 3.20 + lt_prog_compiler_pic_CXX='-KPIC' + ;; + *) + ;; + esac + ;; + vxworks*) + ;; + *) + lt_prog_compiler_can_build_shared_CXX=no + ;; + esac + fi + +case $host_os in + # For platforms that do not support PIC, -DPIC is meaningless: + *djgpp*) + lt_prog_compiler_pic_CXX= + ;; + *) + lt_prog_compiler_pic_CXX="$lt_prog_compiler_pic_CXX -DPIC" + ;; +esac + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $compiler option to produce PIC" >&5 +printf %s "checking for $compiler option to produce PIC... " >&6; } +if test ${lt_cv_prog_compiler_pic_CXX+y} +then : + printf %s "(cached) " >&6 +else $as_nop + lt_cv_prog_compiler_pic_CXX=$lt_prog_compiler_pic_CXX +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_pic_CXX" >&5 +printf "%s\n" "$lt_cv_prog_compiler_pic_CXX" >&6; } +lt_prog_compiler_pic_CXX=$lt_cv_prog_compiler_pic_CXX + +# +# Check to make sure the PIC flag actually works. +# +if test -n "$lt_prog_compiler_pic_CXX"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if $compiler PIC flag $lt_prog_compiler_pic_CXX works" >&5 +printf %s "checking if $compiler PIC flag $lt_prog_compiler_pic_CXX works... " >&6; } +if test ${lt_cv_prog_compiler_pic_works_CXX+y} +then : + printf %s "(cached) " >&6 +else $as_nop + lt_cv_prog_compiler_pic_works_CXX=no + ac_outfile=conftest.$ac_objext + echo "$lt_simple_compile_test_code" > conftest.$ac_ext + lt_compiler_flag="$lt_prog_compiler_pic_CXX -DPIC" ## exclude from sc_useless_quotes_in_assignment + # Insert the option either (1) after the last *FLAGS variable, or + # (2) before a word containing "conftest.", or (3) at the end. + # Note that $ac_compile itself does not contain backslashes and begins + # with a dollar sign (not a hyphen), so the echo should work correctly. + # The option is referenced via a variable to avoid confusing sed. + lt_compile=`echo "$ac_compile" | $SED \ + -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ + -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ + -e 's:$: $lt_compiler_flag:'` + (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5) + (eval "$lt_compile" 2>conftest.err) + ac_status=$? + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + if (exit $ac_status) && test -s "$ac_outfile"; then + # The compiler can only warn and ignore the option if not recognized + # So say no if there are warnings other than the usual output. + $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' >conftest.exp + $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 + if test ! -s conftest.er2 || diff conftest.exp conftest.er2 >/dev/null; then + lt_cv_prog_compiler_pic_works_CXX=yes + fi + fi + $RM conftest* + +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_pic_works_CXX" >&5 +printf "%s\n" "$lt_cv_prog_compiler_pic_works_CXX" >&6; } + +if test yes = "$lt_cv_prog_compiler_pic_works_CXX"; then + case $lt_prog_compiler_pic_CXX in + "" | " "*) ;; + *) lt_prog_compiler_pic_CXX=" $lt_prog_compiler_pic_CXX" ;; + esac +else + lt_prog_compiler_pic_CXX= + lt_prog_compiler_can_build_shared_CXX=no +fi + +fi + + + + + +# +# Check to make sure the static flag actually works. +# +wl=$lt_prog_compiler_wl_CXX eval lt_tmp_static_flag=\"$lt_prog_compiler_static_CXX\" +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if $compiler static flag $lt_tmp_static_flag works" >&5 +printf %s "checking if $compiler static flag $lt_tmp_static_flag works... " >&6; } +if test ${lt_cv_prog_compiler_static_works_CXX+y} +then : + printf %s "(cached) " >&6 +else $as_nop + lt_cv_prog_compiler_static_works_CXX=no + save_LDFLAGS=$LDFLAGS + LDFLAGS="$LDFLAGS $lt_tmp_static_flag" + echo "$lt_simple_link_test_code" > conftest.$ac_ext + if (eval $ac_link 2>conftest.err) && test -s conftest$ac_exeext; then + # The linker can only warn and ignore the option if not recognized + # So say no if there are warnings + if test -s conftest.err; then + # Append any errors to the config.log. + cat conftest.err 1>&5 + $ECHO "$_lt_linker_boilerplate" | $SED '/^$/d' > conftest.exp + $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 + if diff conftest.exp conftest.er2 >/dev/null; then + lt_cv_prog_compiler_static_works_CXX=yes + fi + else + lt_cv_prog_compiler_static_works_CXX=yes + fi + fi + $RM -r conftest* + LDFLAGS=$save_LDFLAGS + +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_static_works_CXX" >&5 +printf "%s\n" "$lt_cv_prog_compiler_static_works_CXX" >&6; } + +if test yes = "$lt_cv_prog_compiler_static_works_CXX"; then + : +else + lt_prog_compiler_static_CXX= +fi + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if $compiler supports -c -o file.$ac_objext" >&5 +printf %s "checking if $compiler supports -c -o file.$ac_objext... " >&6; } +if test ${lt_cv_prog_compiler_c_o_CXX+y} +then : + printf %s "(cached) " >&6 +else $as_nop + lt_cv_prog_compiler_c_o_CXX=no + $RM -r conftest 2>/dev/null + mkdir conftest + cd conftest + mkdir out + echo "$lt_simple_compile_test_code" > conftest.$ac_ext + + lt_compiler_flag="-o out/conftest2.$ac_objext" + # Insert the option either (1) after the last *FLAGS variable, or + # (2) before a word containing "conftest.", or (3) at the end. + # Note that $ac_compile itself does not contain backslashes and begins + # with a dollar sign (not a hyphen), so the echo should work correctly. + lt_compile=`echo "$ac_compile" | $SED \ + -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ + -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ + -e 's:$: $lt_compiler_flag:'` + (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5) + (eval "$lt_compile" 2>out/conftest.err) + ac_status=$? + cat out/conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + if (exit $ac_status) && test -s out/conftest2.$ac_objext + then + # The compiler can only warn and ignore the option if not recognized + # So say no if there are warnings + $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' > out/conftest.exp + $SED '/^$/d; /^ *+/d' out/conftest.err >out/conftest.er2 + if test ! -s out/conftest.er2 || diff out/conftest.exp out/conftest.er2 >/dev/null; then + lt_cv_prog_compiler_c_o_CXX=yes + fi + fi + chmod u+w . 2>&5 + $RM conftest* + # SGI C++ compiler will create directory out/ii_files/ for + # template instantiation + test -d out/ii_files && $RM out/ii_files/* && rmdir out/ii_files + $RM out/* && rmdir out + cd .. + $RM -r conftest + $RM conftest* + +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_c_o_CXX" >&5 +printf "%s\n" "$lt_cv_prog_compiler_c_o_CXX" >&6; } + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if $compiler supports -c -o file.$ac_objext" >&5 +printf %s "checking if $compiler supports -c -o file.$ac_objext... " >&6; } +if test ${lt_cv_prog_compiler_c_o_CXX+y} +then : + printf %s "(cached) " >&6 +else $as_nop + lt_cv_prog_compiler_c_o_CXX=no + $RM -r conftest 2>/dev/null + mkdir conftest + cd conftest + mkdir out + echo "$lt_simple_compile_test_code" > conftest.$ac_ext + + lt_compiler_flag="-o out/conftest2.$ac_objext" + # Insert the option either (1) after the last *FLAGS variable, or + # (2) before a word containing "conftest.", or (3) at the end. + # Note that $ac_compile itself does not contain backslashes and begins + # with a dollar sign (not a hyphen), so the echo should work correctly. + lt_compile=`echo "$ac_compile" | $SED \ + -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ + -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ + -e 's:$: $lt_compiler_flag:'` + (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5) + (eval "$lt_compile" 2>out/conftest.err) + ac_status=$? + cat out/conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + if (exit $ac_status) && test -s out/conftest2.$ac_objext + then + # The compiler can only warn and ignore the option if not recognized + # So say no if there are warnings + $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' > out/conftest.exp + $SED '/^$/d; /^ *+/d' out/conftest.err >out/conftest.er2 + if test ! -s out/conftest.er2 || diff out/conftest.exp out/conftest.er2 >/dev/null; then + lt_cv_prog_compiler_c_o_CXX=yes + fi + fi + chmod u+w . 2>&5 + $RM conftest* + # SGI C++ compiler will create directory out/ii_files/ for + # template instantiation + test -d out/ii_files && $RM out/ii_files/* && rmdir out/ii_files + $RM out/* && rmdir out + cd .. + $RM -r conftest + $RM conftest* + +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_c_o_CXX" >&5 +printf "%s\n" "$lt_cv_prog_compiler_c_o_CXX" >&6; } + + + + +hard_links=nottested +if test no = "$lt_cv_prog_compiler_c_o_CXX" && test no != "$need_locks"; then + # do not overwrite the value of need_locks provided by the user + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if we can lock with hard links" >&5 +printf %s "checking if we can lock with hard links... " >&6; } + hard_links=yes + $RM conftest* + ln conftest.a conftest.b 2>/dev/null && hard_links=no + touch conftest.a + ln conftest.a conftest.b 2>&5 || hard_links=no + ln conftest.a conftest.b 2>/dev/null && hard_links=no + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $hard_links" >&5 +printf "%s\n" "$hard_links" >&6; } + if test no = "$hard_links"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: '$CC' does not support '-c -o', so 'make -j' may be unsafe" >&5 +printf "%s\n" "$as_me: WARNING: '$CC' does not support '-c -o', so 'make -j' may be unsafe" >&2;} + need_locks=warn + fi +else + need_locks=no +fi + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the $compiler linker ($LD) supports shared libraries" >&5 +printf %s "checking whether the $compiler linker ($LD) supports shared libraries... " >&6; } + + export_symbols_cmds_CXX='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols' + exclude_expsyms_CXX='_GLOBAL_OFFSET_TABLE_|_GLOBAL__F[ID]_.*' + case $host_os in + aix[4-9]*) + # If we're using GNU nm, then we don't want the "-C" option. + # -C means demangle to GNU nm, but means don't demangle to AIX nm. + # Without the "-l" option, or with the "-B" option, AIX nm treats + # weak defined symbols like other global defined symbols, whereas + # GNU nm marks them as "W". + # While the 'weak' keyword is ignored in the Export File, we need + # it in the Import File for the 'aix-soname' feature, so we have + # to replace the "-B" option with "-P" for AIX nm. + if $NM -V 2>&1 | $GREP 'GNU' > /dev/null; then + export_symbols_cmds_CXX='$NM -Bpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "W")) && (substr(\$ 3,1,1) != ".")) { if (\$ 2 == "W") { print \$ 3 " weak" } else { print \$ 3 } } }'\'' | sort -u > $export_symbols' + else + export_symbols_cmds_CXX='`func_echo_all $NM | $SED -e '\''s/B\([^B]*\)$/P\1/'\''` -PCpgl $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "L") || (\$ 2 == "W") || (\$ 2 == "V") || (\$ 2 == "Z")) && (substr(\$ 1,1,1) != ".")) { if ((\$ 2 == "W") || (\$ 2 == "V") || (\$ 2 == "Z")) { print \$ 1 " weak" } else { print \$ 1 } } }'\'' | sort -u > $export_symbols' + fi + ;; + pw32*) + export_symbols_cmds_CXX=$ltdll_cmds + ;; + cygwin* | mingw* | cegcc*) + case $cc_basename in + cl* | icl*) + exclude_expsyms_CXX='_NULL_IMPORT_DESCRIPTOR|_IMPORT_DESCRIPTOR_.*' + ;; + *) + export_symbols_cmds_CXX='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[BCDGRS][ ]/s/.*[ ]\([^ ]*\)/\1 DATA/;s/^.*[ ]__nm__\([^ ]*\)[ ][^ ]*/\1 DATA/;/^I[ ]/d;/^[AITW][ ]/s/.* //'\'' | sort | uniq > $export_symbols' + exclude_expsyms_CXX='[_]+GLOBAL_OFFSET_TABLE_|[_]+GLOBAL__[FID]_.*|[_]+head_[A-Za-z0-9_]+_dll|[A-Za-z0-9_]+_dll_iname' + ;; + esac + ;; + linux* | k*bsd*-gnu | gnu*) + link_all_deplibs_CXX=no + ;; + *) + export_symbols_cmds_CXX='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols' + ;; + esac + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ld_shlibs_CXX" >&5 +printf "%s\n" "$ld_shlibs_CXX" >&6; } +test no = "$ld_shlibs_CXX" && can_build_shared=no + +with_gnu_ld_CXX=$with_gnu_ld + + + + + + +# +# Do we need to explicitly link libc? +# +case "x$archive_cmds_need_lc_CXX" in +x|xyes) + # Assume -lc should be added + archive_cmds_need_lc_CXX=yes + + if test yes,yes = "$GCC,$enable_shared"; then + case $archive_cmds_CXX in + *'~'*) + # FIXME: we may have to deal with multi-command sequences. + ;; + '$CC '*) + # Test whether the compiler implicitly links with -lc since on some + # systems, -lgcc has to come before -lc. If gcc already passes -lc + # to ld, don't add -lc before -lgcc. + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether -lc should be explicitly linked in" >&5 +printf %s "checking whether -lc should be explicitly linked in... " >&6; } +if test ${lt_cv_archive_cmds_need_lc_CXX+y} +then : + printf %s "(cached) " >&6 +else $as_nop + $RM conftest* + echo "$lt_simple_compile_test_code" > conftest.$ac_ext + + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } 2>conftest.err; then + soname=conftest + lib=conftest + libobjs=conftest.$ac_objext + deplibs= + wl=$lt_prog_compiler_wl_CXX + pic_flag=$lt_prog_compiler_pic_CXX + compiler_flags=-v + linker_flags=-v + verstring= + output_objdir=. + libname=conftest + lt_save_allow_undefined_flag=$allow_undefined_flag_CXX + allow_undefined_flag_CXX= + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$archive_cmds_CXX 2\>\&1 \| $GREP \" -lc \" \>/dev/null 2\>\&1\""; } >&5 + (eval $archive_cmds_CXX 2\>\&1 \| $GREP \" -lc \" \>/dev/null 2\>\&1) 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } + then + lt_cv_archive_cmds_need_lc_CXX=no + else + lt_cv_archive_cmds_need_lc_CXX=yes + fi + allow_undefined_flag_CXX=$lt_save_allow_undefined_flag + else + cat conftest.err 1>&5 + fi + $RM conftest* + +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_archive_cmds_need_lc_CXX" >&5 +printf "%s\n" "$lt_cv_archive_cmds_need_lc_CXX" >&6; } + archive_cmds_need_lc_CXX=$lt_cv_archive_cmds_need_lc_CXX + ;; + esac + fi + ;; +esac + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking dynamic linker characteristics" >&5 +printf %s "checking dynamic linker characteristics... " >&6; } + +library_names_spec= +libname_spec='lib$name' +soname_spec= +shrext_cmds=.so +postinstall_cmds= +postuninstall_cmds= +finish_cmds= +finish_eval= +shlibpath_var= +shlibpath_overrides_runpath=unknown +version_type=none +dynamic_linker="$host_os ld.so" +sys_lib_dlsearch_path_spec="/lib /usr/lib" +need_lib_prefix=unknown +hardcode_into_libs=no + +# when you set need_version to no, make sure it does not cause -set_version +# flags to be left without arguments +need_version=unknown + + + +case $host_os in +aix3*) + version_type=linux # correct to gnu/linux during the next big refactor + library_names_spec='$libname$release$shared_ext$versuffix $libname.a' + shlibpath_var=LIBPATH + + # AIX 3 has no versioning support, so we append a major version to the name. + soname_spec='$libname$release$shared_ext$major' + ;; + +aix[4-9]*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + hardcode_into_libs=yes + if test ia64 = "$host_cpu"; then + # AIX 5 supports IA64 + library_names_spec='$libname$release$shared_ext$major $libname$release$shared_ext$versuffix $libname$shared_ext' + shlibpath_var=LD_LIBRARY_PATH + else + # With GCC up to 2.95.x, collect2 would create an import file + # for dependence libraries. The import file would start with + # the line '#! .'. This would cause the generated library to + # depend on '.', always an invalid library. This was fixed in + # development snapshots of GCC prior to 3.0. + case $host_os in + aix4 | aix4.[01] | aix4.[01].*) + if { echo '#if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 97)' + echo ' yes ' + echo '#endif'; } | $CC -E - | $GREP yes > /dev/null; then + : + else + can_build_shared=no + fi + ;; + esac + # Using Import Files as archive members, it is possible to support + # filename-based versioning of shared library archives on AIX. While + # this would work for both with and without runtime linking, it will + # prevent static linking of such archives. So we do filename-based + # shared library versioning with .so extension only, which is used + # when both runtime linking and shared linking is enabled. + # Unfortunately, runtime linking may impact performance, so we do + # not want this to be the default eventually. Also, we use the + # versioned .so libs for executables only if there is the -brtl + # linker flag in LDFLAGS as well, or --with-aix-soname=svr4 only. + # To allow for filename-based versioning support, we need to create + # libNAME.so.V as an archive file, containing: + # *) an Import File, referring to the versioned filename of the + # archive as well as the shared archive member, telling the + # bitwidth (32 or 64) of that shared object, and providing the + # list of exported symbols of that shared object, eventually + # decorated with the 'weak' keyword + # *) the shared object with the F_LOADONLY flag set, to really avoid + # it being seen by the linker. + # At run time we better use the real file rather than another symlink, + # but for link time we create the symlink libNAME.so -> libNAME.so.V + + case $with_aix_soname,$aix_use_runtimelinking in + # AIX (on Power*) has no versioning support, so currently we cannot hardcode correct + # soname into executable. Probably we can add versioning support to + # collect2, so additional links can be useful in future. + aix,yes) # traditional libtool + dynamic_linker='AIX unversionable lib.so' + # If using run time linking (on AIX 4.2 or later) use lib.so + # instead of lib.a to let people know that these are not + # typical AIX shared libraries. + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + ;; + aix,no) # traditional AIX only + dynamic_linker='AIX lib.a(lib.so.V)' + # We preserve .a as extension for shared libraries through AIX4.2 + # and later when we are not doing run time linking. + library_names_spec='$libname$release.a $libname.a' + soname_spec='$libname$release$shared_ext$major' + ;; + svr4,*) # full svr4 only + dynamic_linker="AIX lib.so.V($shared_archive_member_spec.o)" + library_names_spec='$libname$release$shared_ext$major $libname$shared_ext' + # We do not specify a path in Import Files, so LIBPATH fires. + shlibpath_overrides_runpath=yes + ;; + *,yes) # both, prefer svr4 + dynamic_linker="AIX lib.so.V($shared_archive_member_spec.o), lib.a(lib.so.V)" + library_names_spec='$libname$release$shared_ext$major $libname$shared_ext' + # unpreferred sharedlib libNAME.a needs extra handling + postinstall_cmds='test -n "$linkname" || linkname="$realname"~func_stripname "" ".so" "$linkname"~$install_shared_prog "$dir/$func_stripname_result.$libext" "$destdir/$func_stripname_result.$libext"~test -z "$tstripme" || test -z "$striplib" || $striplib "$destdir/$func_stripname_result.$libext"' + postuninstall_cmds='for n in $library_names $old_library; do :; done~func_stripname "" ".so" "$n"~test "$func_stripname_result" = "$n" || func_append rmfiles " $odir/$func_stripname_result.$libext"' + # We do not specify a path in Import Files, so LIBPATH fires. + shlibpath_overrides_runpath=yes + ;; + *,no) # both, prefer aix + dynamic_linker="AIX lib.a(lib.so.V), lib.so.V($shared_archive_member_spec.o)" + library_names_spec='$libname$release.a $libname.a' + soname_spec='$libname$release$shared_ext$major' + # unpreferred sharedlib libNAME.so.V and symlink libNAME.so need extra handling + postinstall_cmds='test -z "$dlname" || $install_shared_prog $dir/$dlname $destdir/$dlname~test -z "$tstripme" || test -z "$striplib" || $striplib $destdir/$dlname~test -n "$linkname" || linkname=$realname~func_stripname "" ".a" "$linkname"~(cd "$destdir" && $LN_S -f $dlname $func_stripname_result.so)' + postuninstall_cmds='test -z "$dlname" || func_append rmfiles " $odir/$dlname"~for n in $old_library $library_names; do :; done~func_stripname "" ".a" "$n"~func_append rmfiles " $odir/$func_stripname_result.so"' + ;; + esac + shlibpath_var=LIBPATH + fi + ;; + +amigaos*) + case $host_cpu in + powerpc) + # Since July 2007 AmigaOS4 officially supports .so libraries. + # When compiling the executable, add -use-dynld -Lsobjs: to the compileline. + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + ;; + m68k) + library_names_spec='$libname.ixlibrary $libname.a' + # Create ${libname}_ixlibrary.a entries in /sys/libs. + finish_eval='for lib in `ls $libdir/*.ixlibrary 2>/dev/null`; do libname=`func_echo_all "$lib" | $SED '\''s%^.*/\([^/]*\)\.ixlibrary$%\1%'\''`; $RM /sys/libs/${libname}_ixlibrary.a; $show "cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a"; cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a || exit 1; done' + ;; + esac + ;; + +beos*) + library_names_spec='$libname$shared_ext' + dynamic_linker="$host_os ld.so" + shlibpath_var=LIBRARY_PATH + ;; + +bsdi[45]*) + version_type=linux # correct to gnu/linux during the next big refactor + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + finish_cmds='PATH="\$PATH:/sbin" ldconfig $libdir' + shlibpath_var=LD_LIBRARY_PATH + sys_lib_search_path_spec="/shlib /usr/lib /usr/X11/lib /usr/contrib/lib /lib /usr/local/lib" + sys_lib_dlsearch_path_spec="/shlib /usr/lib /usr/local/lib" + # the default ld.so.conf also contains /usr/contrib/lib and + # /usr/X11R6/lib (/usr/X11 is a link to /usr/X11R6), but let us allow + # libtool to hard-code these into programs + ;; + +cygwin* | mingw* | pw32* | cegcc*) + version_type=windows + shrext_cmds=.dll + need_version=no + need_lib_prefix=no + + case $GCC,$cc_basename in + yes,*) + # gcc + library_names_spec='$libname.dll.a' + # DLL is installed to $(libdir)/../bin by postinstall_cmds + postinstall_cmds='base_file=`basename \$file`~ + dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\$base_file'\''i; echo \$dlname'\''`~ + dldir=$destdir/`dirname \$dlpath`~ + test -d \$dldir || mkdir -p \$dldir~ + $install_prog $dir/$dlname \$dldir/$dlname~ + chmod a+x \$dldir/$dlname~ + if test -n '\''$stripme'\'' && test -n '\''$striplib'\''; then + eval '\''$striplib \$dldir/$dlname'\'' || exit \$?; + fi' + postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~ + dlpath=$dir/\$dldll~ + $RM \$dlpath' + shlibpath_overrides_runpath=yes + + case $host_os in + cygwin*) + # Cygwin DLLs use 'cyg' prefix rather than 'lib' + soname_spec='`echo $libname | $SED -e 's/^lib/cyg/'``echo $release | $SED -e 's/[.]/-/g'`$versuffix$shared_ext' + + ;; + mingw* | cegcc*) + # MinGW DLLs use traditional 'lib' prefix + soname_spec='$libname`echo $release | $SED -e 's/[.]/-/g'`$versuffix$shared_ext' + ;; + pw32*) + # pw32 DLLs use 'pw' prefix rather than 'lib' + library_names_spec='`echo $libname | $SED -e 's/^lib/pw/'``echo $release | $SED -e 's/[.]/-/g'`$versuffix$shared_ext' + ;; + esac + dynamic_linker='Win32 ld.exe' + ;; + + *,cl* | *,icl*) + # Native MSVC or ICC + libname_spec='$name' + soname_spec='$libname`echo $release | $SED -e 's/[.]/-/g'`$versuffix$shared_ext' + library_names_spec='$libname.dll.lib' + + case $build_os in + mingw*) + sys_lib_search_path_spec= + lt_save_ifs=$IFS + IFS=';' + for lt_path in $LIB + do + IFS=$lt_save_ifs + # Let DOS variable expansion print the short 8.3 style file name. + lt_path=`cd "$lt_path" 2>/dev/null && cmd //C "for %i in (".") do @echo %~si"` + sys_lib_search_path_spec="$sys_lib_search_path_spec $lt_path" + done + IFS=$lt_save_ifs + # Convert to MSYS style. + sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e 's|\\\\|/|g' -e 's| \\([a-zA-Z]\\):| /\\1|g' -e 's|^ ||'` + ;; + cygwin*) + # Convert to unix form, then to dos form, then back to unix form + # but this time dos style (no spaces!) so that the unix form looks + # like /cygdrive/c/PROGRA~1:/cygdr... + sys_lib_search_path_spec=`cygpath --path --unix "$LIB"` + sys_lib_search_path_spec=`cygpath --path --dos "$sys_lib_search_path_spec" 2>/dev/null` + sys_lib_search_path_spec=`cygpath --path --unix "$sys_lib_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"` + ;; + *) + sys_lib_search_path_spec=$LIB + if $ECHO "$sys_lib_search_path_spec" | $GREP ';[c-zC-Z]:/' >/dev/null; then + # It is most probably a Windows format PATH. + sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e 's/;/ /g'` + else + sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"` + fi + # FIXME: find the short name or the path components, as spaces are + # common. (e.g. "Program Files" -> "PROGRA~1") + ;; + esac + + # DLL is installed to $(libdir)/../bin by postinstall_cmds + postinstall_cmds='base_file=`basename \$file`~ + dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\$base_file'\''i; echo \$dlname'\''`~ + dldir=$destdir/`dirname \$dlpath`~ + test -d \$dldir || mkdir -p \$dldir~ + $install_prog $dir/$dlname \$dldir/$dlname' + postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~ + dlpath=$dir/\$dldll~ + $RM \$dlpath' + shlibpath_overrides_runpath=yes + dynamic_linker='Win32 link.exe' + ;; + + *) + # Assume MSVC and ICC wrapper + library_names_spec='$libname`echo $release | $SED -e 's/[.]/-/g'`$versuffix$shared_ext $libname.lib' + dynamic_linker='Win32 ld.exe' + ;; + esac + # FIXME: first we should search . and the directory the executable is in + shlibpath_var=PATH + ;; + +darwin* | rhapsody*) + dynamic_linker="$host_os dyld" + version_type=darwin + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$major$shared_ext $libname$shared_ext' + soname_spec='$libname$release$major$shared_ext' + shlibpath_overrides_runpath=yes + shlibpath_var=DYLD_LIBRARY_PATH + shrext_cmds='`test .$module = .yes && echo .so || echo .dylib`' + + sys_lib_dlsearch_path_spec='/usr/local/lib /lib /usr/lib' + ;; + +dgux*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + shlibpath_var=LD_LIBRARY_PATH + ;; + +freebsd* | dragonfly* | midnightbsd*) + # DragonFly does not have aout. When/if they implement a new + # versioning mechanism, adjust this. + if test -x /usr/bin/objformat; then + objformat=`/usr/bin/objformat` + else + case $host_os in + freebsd[23].*) objformat=aout ;; + *) objformat=elf ;; + esac + fi + version_type=freebsd-$objformat + case $version_type in + freebsd-elf*) + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + need_version=no + need_lib_prefix=no + ;; + freebsd-*) + library_names_spec='$libname$release$shared_ext$versuffix $libname$shared_ext$versuffix' + need_version=yes + ;; + esac + shlibpath_var=LD_LIBRARY_PATH + case $host_os in + freebsd2.*) + shlibpath_overrides_runpath=yes + ;; + freebsd3.[01]* | freebsdelf3.[01]*) + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + ;; + freebsd3.[2-9]* | freebsdelf3.[2-9]* | \ + freebsd4.[0-5] | freebsdelf4.[0-5] | freebsd4.1.1 | freebsdelf4.1.1) + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + ;; + *) # from 4.6 on, and DragonFly + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + ;; + esac + ;; + +haiku*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + dynamic_linker="$host_os runtime_loader" + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + shlibpath_var=LIBRARY_PATH + shlibpath_overrides_runpath=no + sys_lib_dlsearch_path_spec='/boot/home/config/lib /boot/common/lib /boot/system/lib' + hardcode_into_libs=yes + ;; + +hpux9* | hpux10* | hpux11*) + # Give a soname corresponding to the major version so that dld.sl refuses to + # link against other versions. + version_type=sunos + need_lib_prefix=no + need_version=no + case $host_cpu in + ia64*) + shrext_cmds='.so' + hardcode_into_libs=yes + dynamic_linker="$host_os dld.so" + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes # Unless +noenvvar is specified. + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + if test 32 = "$HPUX_IA64_MODE"; then + sys_lib_search_path_spec="/usr/lib/hpux32 /usr/local/lib/hpux32 /usr/local/lib" + sys_lib_dlsearch_path_spec=/usr/lib/hpux32 + else + sys_lib_search_path_spec="/usr/lib/hpux64 /usr/local/lib/hpux64" + sys_lib_dlsearch_path_spec=/usr/lib/hpux64 + fi + ;; + hppa*64*) + shrext_cmds='.sl' + hardcode_into_libs=yes + dynamic_linker="$host_os dld.sl" + shlibpath_var=LD_LIBRARY_PATH # How should we handle SHLIB_PATH + shlibpath_overrides_runpath=yes # Unless +noenvvar is specified. + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + sys_lib_search_path_spec="/usr/lib/pa20_64 /usr/ccs/lib/pa20_64" + sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec + ;; + *) + shrext_cmds='.sl' + dynamic_linker="$host_os dld.sl" + shlibpath_var=SHLIB_PATH + shlibpath_overrides_runpath=no # +s is required to enable SHLIB_PATH + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + ;; + esac + # HP-UX runs *really* slowly unless shared libraries are mode 555, ... + postinstall_cmds='chmod 555 $lib' + # or fails outright, so override atomically: + install_override_mode=555 + ;; + +interix[3-9]*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + dynamic_linker='Interix 3.x ld.so.1 (PE, like ELF)' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + ;; + +irix5* | irix6* | nonstopux*) + case $host_os in + nonstopux*) version_type=nonstopux ;; + *) + if test yes = "$lt_cv_prog_gnu_ld"; then + version_type=linux # correct to gnu/linux during the next big refactor + else + version_type=irix + fi ;; + esac + need_lib_prefix=no + need_version=no + soname_spec='$libname$release$shared_ext$major' + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$release$shared_ext $libname$shared_ext' + case $host_os in + irix5* | nonstopux*) + libsuff= shlibsuff= + ;; + *) + case $LD in # libtool.m4 will add one of these switches to LD + *-32|*"-32 "|*-melf32bsmip|*"-melf32bsmip ") + libsuff= shlibsuff= libmagic=32-bit;; + *-n32|*"-n32 "|*-melf32bmipn32|*"-melf32bmipn32 ") + libsuff=32 shlibsuff=N32 libmagic=N32;; + *-64|*"-64 "|*-melf64bmip|*"-melf64bmip ") + libsuff=64 shlibsuff=64 libmagic=64-bit;; + *) libsuff= shlibsuff= libmagic=never-match;; + esac + ;; + esac + shlibpath_var=LD_LIBRARY${shlibsuff}_PATH + shlibpath_overrides_runpath=no + sys_lib_search_path_spec="/usr/lib$libsuff /lib$libsuff /usr/local/lib$libsuff" + sys_lib_dlsearch_path_spec="/usr/lib$libsuff /lib$libsuff" + hardcode_into_libs=yes + ;; + +# No shared lib support for Linux oldld, aout, or coff. +linux*oldld* | linux*aout* | linux*coff*) + dynamic_linker=no + ;; + +linux*android*) + version_type=none # Android doesn't support versioned libraries. + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext' + soname_spec='$libname$release$shared_ext' + finish_cmds= + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + + # This implies no fast_install, which is unacceptable. + # Some rework will be needed to allow for fast_install + # before this can be enabled. + hardcode_into_libs=yes + + dynamic_linker='Android linker' + # Don't embed -rpath directories since the linker doesn't support them. + hardcode_libdir_flag_spec_CXX='-L$libdir' + ;; + +# This must be glibc/ELF. +linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + finish_cmds='PATH="\$PATH:/sbin" ldconfig -n $libdir' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + + # Some binutils ld are patched to set DT_RUNPATH + if test ${lt_cv_shlibpath_overrides_runpath+y} +then : + printf %s "(cached) " >&6 +else $as_nop + lt_cv_shlibpath_overrides_runpath=no + save_LDFLAGS=$LDFLAGS + save_libdir=$libdir + eval "libdir=/foo; wl=\"$lt_prog_compiler_wl_CXX\"; \ + LDFLAGS=\"\$LDFLAGS $hardcode_libdir_flag_spec_CXX\"" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_link "$LINENO" +then : + if ($OBJDUMP -p conftest$ac_exeext) 2>/dev/null | grep "RUNPATH.*$libdir" >/dev/null +then : + lt_cv_shlibpath_overrides_runpath=yes +fi +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + LDFLAGS=$save_LDFLAGS + libdir=$save_libdir + +fi + + shlibpath_overrides_runpath=$lt_cv_shlibpath_overrides_runpath + + # This implies no fast_install, which is unacceptable. + # Some rework will be needed to allow for fast_install + # before this can be enabled. + hardcode_into_libs=yes + + # Ideally, we could use ldconfig to report *all* directores which are + # searched for libraries, however this is still not possible. Aside from not + # being certain /sbin/ldconfig is available, command + # 'ldconfig -N -X -v | grep ^/' on 64bit Fedora does not report /usr/lib64, + # even though it is searched at run-time. Try to do the best guess by + # appending ld.so.conf contents (and includes) to the search path. + if test -f /etc/ld.so.conf; then + lt_ld_extra=`awk '/^include / { system(sprintf("cd /etc; cat %s 2>/dev/null", \$2)); skip = 1; } { if (!skip) print \$0; skip = 0; }' < /etc/ld.so.conf | $SED -e 's/#.*//;/^[ ]*hwcap[ ]/d;s/[:, ]/ /g;s/=[^=]*$//;s/=[^= ]* / /g;s/"//g;/^$/d' | tr '\n' ' '` + sys_lib_dlsearch_path_spec="/lib /usr/lib $lt_ld_extra" + fi + + # We used to test for /lib/ld.so.1 and disable shared libraries on + # powerpc, because MkLinux only supported shared libraries with the + # GNU dynamic linker. Since this was broken with cross compilers, + # most powerpc-linux boxes support dynamic linking these days and + # people can always --disable-shared, the test was removed, and we + # assume the GNU/Linux dynamic linker is in use. + dynamic_linker='GNU/Linux ld.so' + ;; + +netbsdelf*-gnu) + version_type=linux + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + dynamic_linker='NetBSD ld.elf_so' + ;; + +netbsd*) + version_type=sunos + need_lib_prefix=no + need_version=no + if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then + library_names_spec='$libname$release$shared_ext$versuffix $libname$shared_ext$versuffix' + finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir' + dynamic_linker='NetBSD (a.out) ld.so' + else + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + dynamic_linker='NetBSD ld.elf_so' + fi + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + ;; + +newsos6) + version_type=linux # correct to gnu/linux during the next big refactor + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + ;; + +*nto* | *qnx*) + version_type=qnx + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + dynamic_linker='ldqnx.so' + ;; + +openbsd* | bitrig*) + version_type=sunos + sys_lib_dlsearch_path_spec=/usr/lib + need_lib_prefix=no + if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`"; then + need_version=no + else + need_version=yes + fi + library_names_spec='$libname$release$shared_ext$versuffix $libname$shared_ext$versuffix' + finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + ;; + +os2*) + libname_spec='$name' + version_type=windows + shrext_cmds=.dll + need_version=no + need_lib_prefix=no + # OS/2 can only load a DLL with a base name of 8 characters or less. + soname_spec='`test -n "$os2dllname" && libname="$os2dllname"; + v=$($ECHO $release$versuffix | tr -d .-); + n=$($ECHO $libname | cut -b -$((8 - ${#v})) | tr . _); + $ECHO $n$v`$shared_ext' + library_names_spec='${libname}_dll.$libext' + dynamic_linker='OS/2 ld.exe' + shlibpath_var=BEGINLIBPATH + sys_lib_search_path_spec="/lib /usr/lib /usr/local/lib" + sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec + postinstall_cmds='base_file=`basename \$file`~ + dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\$base_file'\''i; $ECHO \$dlname'\''`~ + dldir=$destdir/`dirname \$dlpath`~ + test -d \$dldir || mkdir -p \$dldir~ + $install_prog $dir/$dlname \$dldir/$dlname~ + chmod a+x \$dldir/$dlname~ + if test -n '\''$stripme'\'' && test -n '\''$striplib'\''; then + eval '\''$striplib \$dldir/$dlname'\'' || exit \$?; + fi' + postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; $ECHO \$dlname'\''`~ + dlpath=$dir/\$dldll~ + $RM \$dlpath' + ;; + +osf3* | osf4* | osf5*) + version_type=osf + need_lib_prefix=no + need_version=no + soname_spec='$libname$release$shared_ext$major' + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + shlibpath_var=LD_LIBRARY_PATH + sys_lib_search_path_spec="/usr/shlib /usr/ccs/lib /usr/lib/cmplrs/cc /usr/lib /usr/local/lib /var/shlib" + sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec + ;; + +rdos*) + dynamic_linker=no + ;; + +solaris*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + # ldd complains unless libraries are executable + postinstall_cmds='chmod +x $lib' + ;; + +sunos4*) + version_type=sunos + library_names_spec='$libname$release$shared_ext$versuffix $libname$shared_ext$versuffix' + finish_cmds='PATH="\$PATH:/usr/etc" ldconfig $libdir' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + if test yes = "$with_gnu_ld"; then + need_lib_prefix=no + fi + need_version=yes + ;; + +sysv4 | sysv4.3*) + version_type=linux # correct to gnu/linux during the next big refactor + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + shlibpath_var=LD_LIBRARY_PATH + case $host_vendor in + sni) + shlibpath_overrides_runpath=no + need_lib_prefix=no + runpath_var=LD_RUN_PATH + ;; + siemens) + need_lib_prefix=no + ;; + motorola) + need_lib_prefix=no + need_version=no + shlibpath_overrides_runpath=no + sys_lib_search_path_spec='/lib /usr/lib /usr/ccs/lib' + ;; + esac + ;; + +sysv4*MP*) + if test -d /usr/nec; then + version_type=linux # correct to gnu/linux during the next big refactor + library_names_spec='$libname$shared_ext.$versuffix $libname$shared_ext.$major $libname$shared_ext' + soname_spec='$libname$shared_ext.$major' + shlibpath_var=LD_LIBRARY_PATH + fi + ;; + +sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*) + version_type=sco + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + if test yes = "$with_gnu_ld"; then + sys_lib_search_path_spec='/usr/local/lib /usr/gnu/lib /usr/ccs/lib /usr/lib /lib' + else + sys_lib_search_path_spec='/usr/ccs/lib /usr/lib' + case $host_os in + sco3.2v5*) + sys_lib_search_path_spec="$sys_lib_search_path_spec /lib" + ;; + esac + fi + sys_lib_dlsearch_path_spec='/usr/lib' + ;; + +tpf*) + # TPF is a cross-target only. Preferred cross-host = GNU/Linux. + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + ;; + +uts4*) + version_type=linux # correct to gnu/linux during the next big refactor + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + shlibpath_var=LD_LIBRARY_PATH + ;; + +*) + dynamic_linker=no + ;; +esac +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $dynamic_linker" >&5 +printf "%s\n" "$dynamic_linker" >&6; } +test no = "$dynamic_linker" && can_build_shared=no + +variables_saved_for_relink="PATH $shlibpath_var $runpath_var" +if test yes = "$GCC"; then + variables_saved_for_relink="$variables_saved_for_relink GCC_EXEC_PREFIX COMPILER_PATH LIBRARY_PATH" +fi + +if test set = "${lt_cv_sys_lib_search_path_spec+set}"; then + sys_lib_search_path_spec=$lt_cv_sys_lib_search_path_spec +fi + +if test set = "${lt_cv_sys_lib_dlsearch_path_spec+set}"; then + sys_lib_dlsearch_path_spec=$lt_cv_sys_lib_dlsearch_path_spec +fi + +# remember unaugmented sys_lib_dlsearch_path content for libtool script decls... +configure_time_dlsearch_path=$sys_lib_dlsearch_path_spec + +# ... but it needs LT_SYS_LIBRARY_PATH munging for other configure-time code +func_munge_path_list sys_lib_dlsearch_path_spec "$LT_SYS_LIBRARY_PATH" + +# to be used as default LT_SYS_LIBRARY_PATH value in generated libtool +configure_time_lt_sys_library_path=$LT_SYS_LIBRARY_PATH + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking how to hardcode library paths into programs" >&5 +printf %s "checking how to hardcode library paths into programs... " >&6; } +hardcode_action_CXX= +if test -n "$hardcode_libdir_flag_spec_CXX" || + test -n "$runpath_var_CXX" || + test yes = "$hardcode_automatic_CXX"; then + + # We can hardcode non-existent directories. + if test no != "$hardcode_direct_CXX" && + # If the only mechanism to avoid hardcoding is shlibpath_var, we + # have to relink, otherwise we might link with an installed library + # when we should be linking with a yet-to-be-installed one + ## test no != "$_LT_TAGVAR(hardcode_shlibpath_var, CXX)" && + test no != "$hardcode_minus_L_CXX"; then + # Linking always hardcodes the temporary library directory. + hardcode_action_CXX=relink + else + # We can link without hardcoding, and we can hardcode nonexisting dirs. + hardcode_action_CXX=immediate + fi +else + # We cannot hardcode anything, or else we can only hardcode existing + # directories. + hardcode_action_CXX=unsupported +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $hardcode_action_CXX" >&5 +printf "%s\n" "$hardcode_action_CXX" >&6; } + +if test relink = "$hardcode_action_CXX" || + test yes = "$inherit_rpath_CXX"; then + # Fast installation is not supported + enable_fast_install=no +elif test yes = "$shlibpath_overrides_runpath" || + test no = "$enable_shared"; then + # Fast installation is not necessary + enable_fast_install=needless +fi + + + + + + + + fi # test -n "$compiler" + + CC=$lt_save_CC + CFLAGS=$lt_save_CFLAGS + LDCXX=$LD + LD=$lt_save_LD + GCC=$lt_save_GCC + with_gnu_ld=$lt_save_with_gnu_ld + lt_cv_path_LDCXX=$lt_cv_path_LD + lt_cv_path_LD=$lt_save_path_LD + lt_cv_prog_gnu_ldcxx=$lt_cv_prog_gnu_ld + lt_cv_prog_gnu_ld=$lt_save_with_gnu_ld +fi # test yes != "$_lt_caught_CXX_error" + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + + ac_ext=f +ac_compile='$F77 -c $FFLAGS conftest.$ac_ext >&5' +ac_link='$F77 -o conftest$ac_exeext $FFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_f77_compiler_gnu + +if test -z "$F77" || test no = "$F77"; then + _lt_disable_F77=yes +fi + +archive_cmds_need_lc_F77=no +allow_undefined_flag_F77= +always_export_symbols_F77=no +archive_expsym_cmds_F77= +export_dynamic_flag_spec_F77= +hardcode_direct_F77=no +hardcode_direct_absolute_F77=no +hardcode_libdir_flag_spec_F77= +hardcode_libdir_separator_F77= +hardcode_minus_L_F77=no +hardcode_automatic_F77=no +inherit_rpath_F77=no +module_cmds_F77= +module_expsym_cmds_F77= +link_all_deplibs_F77=unknown +old_archive_cmds_F77=$old_archive_cmds +reload_flag_F77=$reload_flag +reload_cmds_F77=$reload_cmds +no_undefined_flag_F77= +whole_archive_flag_spec_F77= +enable_shared_with_static_runtimes_F77=no + +# Source file extension for f77 test sources. +ac_ext=f + +# Object file extension for compiled f77 test sources. +objext=o +objext_F77=$objext + +# No sense in running all these tests if we already determined that +# the F77 compiler isn't working. Some variables (like enable_shared) +# are currently assumed to apply to all compilers on this platform, +# and will be corrupted by setting them based on a non-working compiler. +if test yes != "$_lt_disable_F77"; then + # Code to be used in simple compile tests + lt_simple_compile_test_code="\ + subroutine t + return + end +" + + # Code to be used in simple link tests + lt_simple_link_test_code="\ + program t + end +" + + # ltmain only uses $CC for tagged configurations so make sure $CC is set. + + + + + + +# If no C compiler was specified, use CC. +LTCC=${LTCC-"$CC"} + +# If no C compiler flags were specified, use CFLAGS. +LTCFLAGS=${LTCFLAGS-"$CFLAGS"} + +# Allow CC to be a program name with arguments. +compiler=$CC + + + # save warnings/boilerplate of simple test code + ac_outfile=conftest.$ac_objext +echo "$lt_simple_compile_test_code" >conftest.$ac_ext +eval "$ac_compile" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err +_lt_compiler_boilerplate=`cat conftest.err` +$RM conftest* + + ac_outfile=conftest.$ac_objext +echo "$lt_simple_link_test_code" >conftest.$ac_ext +eval "$ac_link" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err +_lt_linker_boilerplate=`cat conftest.err` +$RM -r conftest* + + + # Allow CC to be a program name with arguments. + lt_save_CC=$CC + lt_save_GCC=$GCC + lt_save_CFLAGS=$CFLAGS + CC=${F77-"f77"} + CFLAGS=$FFLAGS + compiler=$CC + compiler_F77=$CC + func_cc_basename $compiler +cc_basename=$func_cc_basename_result + + GCC=$G77 + if test -n "$compiler"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if libtool supports shared libraries" >&5 +printf %s "checking if libtool supports shared libraries... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $can_build_shared" >&5 +printf "%s\n" "$can_build_shared" >&6; } + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether to build shared libraries" >&5 +printf %s "checking whether to build shared libraries... " >&6; } + test no = "$can_build_shared" && enable_shared=no + + # On AIX, shared libraries and static libraries use the same namespace, and + # are all built from PIC. + case $host_os in + aix3*) + test yes = "$enable_shared" && enable_static=no + if test -n "$RANLIB"; then + archive_cmds="$archive_cmds~\$RANLIB \$lib" + postinstall_cmds='$RANLIB $lib' + fi + ;; + aix[4-9]*) + if test ia64 != "$host_cpu"; then + case $enable_shared,$with_aix_soname,$aix_use_runtimelinking in + yes,aix,yes) ;; # shared object as lib.so file only + yes,svr4,*) ;; # shared object as lib.so archive member only + yes,*) enable_static=no ;; # shared object in lib.a archive as well + esac + fi + ;; + esac + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_shared" >&5 +printf "%s\n" "$enable_shared" >&6; } + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether to build static libraries" >&5 +printf %s "checking whether to build static libraries... " >&6; } + # Make sure either enable_shared or enable_static is yes. + test yes = "$enable_shared" || enable_static=yes + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_static" >&5 +printf "%s\n" "$enable_static" >&6; } + + GCC_F77=$G77 + LD_F77=$LD + + ## CAVEAT EMPTOR: + ## There is no encapsulation within the following macros, do not change + ## the running order or otherwise move them around unless you know exactly + ## what you are doing... + lt_prog_compiler_wl_F77= +lt_prog_compiler_pic_F77= +lt_prog_compiler_static_F77= + + + if test yes = "$GCC"; then + lt_prog_compiler_wl_F77='-Wl,' + lt_prog_compiler_static_F77='-static' + + case $host_os in + aix*) + # All AIX code is PIC. + if test ia64 = "$host_cpu"; then + # AIX 5 now supports IA64 processor + lt_prog_compiler_static_F77='-Bstatic' + fi + lt_prog_compiler_pic_F77='-fPIC' + ;; + + amigaos*) + case $host_cpu in + powerpc) + # see comment about AmigaOS4 .so support + lt_prog_compiler_pic_F77='-fPIC' + ;; + m68k) + # FIXME: we need at least 68020 code to build shared libraries, but + # adding the '-m68020' flag to GCC prevents building anything better, + # like '-m68040'. + lt_prog_compiler_pic_F77='-m68020 -resident32 -malways-restore-a4' + ;; + esac + ;; + + beos* | irix5* | irix6* | nonstopux* | osf3* | osf4* | osf5*) + # PIC is the default for these OSes. + ;; + + mingw* | cygwin* | pw32* | os2* | cegcc*) + # This hack is so that the source file can tell whether it is being + # built for inclusion in a dll (and should export symbols for example). + # Although the cygwin gcc ignores -fPIC, still need this for old-style + # (--disable-auto-import) libraries + lt_prog_compiler_pic_F77='-DDLL_EXPORT' + case $host_os in + os2*) + lt_prog_compiler_static_F77='$wl-static' + ;; + esac + ;; + + darwin* | rhapsody*) + # PIC is the default on this platform + # Common symbols not allowed in MH_DYLIB files + lt_prog_compiler_pic_F77='-fno-common' + ;; + + haiku*) + # PIC is the default for Haiku. + # The "-static" flag exists, but is broken. + lt_prog_compiler_static_F77= + ;; + + hpux*) + # PIC is the default for 64-bit PA HP-UX, but not for 32-bit + # PA HP-UX. On IA64 HP-UX, PIC is the default but the pic flag + # sets the default TLS model and affects inlining. + case $host_cpu in + hppa*64*) + # +Z the default + ;; + *) + lt_prog_compiler_pic_F77='-fPIC' + ;; + esac + ;; + + interix[3-9]*) + # Interix 3.x gcc -fpic/-fPIC options generate broken code. + # Instead, we relocate shared libraries at runtime. + ;; + + msdosdjgpp*) + # Just because we use GCC doesn't mean we suddenly get shared libraries + # on systems that don't support them. + lt_prog_compiler_can_build_shared_F77=no + enable_shared=no + ;; + + *nto* | *qnx*) + # QNX uses GNU C++, but need to define -shared option too, otherwise + # it will coredump. + lt_prog_compiler_pic_F77='-fPIC -shared' + ;; + + sysv4*MP*) + if test -d /usr/nec; then + lt_prog_compiler_pic_F77=-Kconform_pic + fi + ;; + + *) + lt_prog_compiler_pic_F77='-fPIC' + ;; + esac + + case $cc_basename in + nvcc*) # Cuda Compiler Driver 2.2 + lt_prog_compiler_wl_F77='-Xlinker ' + if test -n "$lt_prog_compiler_pic_F77"; then + lt_prog_compiler_pic_F77="-Xcompiler $lt_prog_compiler_pic_F77" + fi + ;; + esac + else + # PORTME Check for flag to pass linker flags through the system compiler. + case $host_os in + aix*) + lt_prog_compiler_wl_F77='-Wl,' + if test ia64 = "$host_cpu"; then + # AIX 5 now supports IA64 processor + lt_prog_compiler_static_F77='-Bstatic' + else + lt_prog_compiler_static_F77='-bnso -bI:/lib/syscalls.exp' + fi + ;; + + darwin* | rhapsody*) + # PIC is the default on this platform + # Common symbols not allowed in MH_DYLIB files + lt_prog_compiler_pic_F77='-fno-common' + case $cc_basename in + nagfor*) + # NAG Fortran compiler + lt_prog_compiler_wl_F77='-Wl,-Wl,,' + lt_prog_compiler_pic_F77='-PIC' + lt_prog_compiler_static_F77='-Bstatic' + ;; + esac + ;; + + mingw* | cygwin* | pw32* | os2* | cegcc*) + # This hack is so that the source file can tell whether it is being + # built for inclusion in a dll (and should export symbols for example). + lt_prog_compiler_pic_F77='-DDLL_EXPORT' + case $host_os in + os2*) + lt_prog_compiler_static_F77='$wl-static' + ;; + esac + ;; + + hpux9* | hpux10* | hpux11*) + lt_prog_compiler_wl_F77='-Wl,' + # PIC is the default for IA64 HP-UX and 64-bit HP-UX, but + # not for PA HP-UX. + case $host_cpu in + hppa*64*|ia64*) + # +Z the default + ;; + *) + lt_prog_compiler_pic_F77='+Z' + ;; + esac + # Is there a better lt_prog_compiler_static that works with the bundled CC? + lt_prog_compiler_static_F77='$wl-a ${wl}archive' + ;; + + irix5* | irix6* | nonstopux*) + lt_prog_compiler_wl_F77='-Wl,' + # PIC (with -KPIC) is the default. + lt_prog_compiler_static_F77='-non_shared' + ;; + + linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*) + case $cc_basename in + # old Intel for x86_64, which still supported -KPIC. + ecc*) + lt_prog_compiler_wl_F77='-Wl,' + lt_prog_compiler_pic_F77='-KPIC' + lt_prog_compiler_static_F77='-static' + ;; + # flang / f18. f95 an alias for gfortran or flang on Debian + flang* | f18* | f95*) + lt_prog_compiler_wl_F77='-Wl,' + lt_prog_compiler_pic_F77='-fPIC' + lt_prog_compiler_static_F77='-static' + ;; + # icc used to be incompatible with GCC. + # ICC 10 doesn't accept -KPIC any more. + icc* | ifort*) + lt_prog_compiler_wl_F77='-Wl,' + lt_prog_compiler_pic_F77='-fPIC' + lt_prog_compiler_static_F77='-static' + ;; + # Lahey Fortran 8.1. + lf95*) + lt_prog_compiler_wl_F77='-Wl,' + lt_prog_compiler_pic_F77='--shared' + lt_prog_compiler_static_F77='--static' + ;; + nagfor*) + # NAG Fortran compiler + lt_prog_compiler_wl_F77='-Wl,-Wl,,' + lt_prog_compiler_pic_F77='-PIC' + lt_prog_compiler_static_F77='-Bstatic' + ;; + tcc*) + # Fabrice Bellard et al's Tiny C Compiler + lt_prog_compiler_wl_F77='-Wl,' + lt_prog_compiler_pic_F77='-fPIC' + lt_prog_compiler_static_F77='-static' + ;; + pgcc* | pgf77* | pgf90* | pgf95* | pgfortran*) + # Portland Group compilers (*not* the Pentium gcc compiler, + # which looks to be a dead project) + lt_prog_compiler_wl_F77='-Wl,' + lt_prog_compiler_pic_F77='-fpic' + lt_prog_compiler_static_F77='-Bstatic' + ;; + ccc*) + lt_prog_compiler_wl_F77='-Wl,' + # All Alpha code is PIC. + lt_prog_compiler_static_F77='-non_shared' + ;; + xl* | bgxl* | bgf* | mpixl*) + # IBM XL C 8.0/Fortran 10.1, 11.1 on PPC and BlueGene + lt_prog_compiler_wl_F77='-Wl,' + lt_prog_compiler_pic_F77='-qpic' + lt_prog_compiler_static_F77='-qstaticlink' + ;; + *) + case `$CC -V 2>&1 | $SED 5q` in + *Sun\ Ceres\ Fortran* | *Sun*Fortran*\ [1-7].* | *Sun*Fortran*\ 8.[0-3]*) + # Sun Fortran 8.3 passes all unrecognized flags to the linker + lt_prog_compiler_pic_F77='-KPIC' + lt_prog_compiler_static_F77='-Bstatic' + lt_prog_compiler_wl_F77='' + ;; + *Sun\ F* | *Sun*Fortran*) + lt_prog_compiler_pic_F77='-KPIC' + lt_prog_compiler_static_F77='-Bstatic' + lt_prog_compiler_wl_F77='-Qoption ld ' + ;; + *Sun\ C*) + # Sun C 5.9 + lt_prog_compiler_pic_F77='-KPIC' + lt_prog_compiler_static_F77='-Bstatic' + lt_prog_compiler_wl_F77='-Wl,' + ;; + *Intel*\ [CF]*Compiler*) + lt_prog_compiler_wl_F77='-Wl,' + lt_prog_compiler_pic_F77='-fPIC' + lt_prog_compiler_static_F77='-static' + ;; + *Portland\ Group*) + lt_prog_compiler_wl_F77='-Wl,' + lt_prog_compiler_pic_F77='-fpic' + lt_prog_compiler_static_F77='-Bstatic' + ;; + esac + ;; + esac + ;; + + newsos6) + lt_prog_compiler_pic_F77='-KPIC' + lt_prog_compiler_static_F77='-Bstatic' + ;; + + *nto* | *qnx*) + # QNX uses GNU C++, but need to define -shared option too, otherwise + # it will coredump. + lt_prog_compiler_pic_F77='-fPIC -shared' + ;; + + osf3* | osf4* | osf5*) + lt_prog_compiler_wl_F77='-Wl,' + # All OSF/1 code is PIC. + lt_prog_compiler_static_F77='-non_shared' + ;; + + rdos*) + lt_prog_compiler_static_F77='-non_shared' + ;; + + solaris*) + lt_prog_compiler_pic_F77='-KPIC' + lt_prog_compiler_static_F77='-Bstatic' + case $cc_basename in + f77* | f90* | f95* | sunf77* | sunf90* | sunf95*) + lt_prog_compiler_wl_F77='-Qoption ld ';; + *) + lt_prog_compiler_wl_F77='-Wl,';; + esac + ;; + + sunos4*) + lt_prog_compiler_wl_F77='-Qoption ld ' + lt_prog_compiler_pic_F77='-PIC' + lt_prog_compiler_static_F77='-Bstatic' + ;; + + sysv4 | sysv4.2uw2* | sysv4.3*) + lt_prog_compiler_wl_F77='-Wl,' + lt_prog_compiler_pic_F77='-KPIC' + lt_prog_compiler_static_F77='-Bstatic' + ;; + + sysv4*MP*) + if test -d /usr/nec; then + lt_prog_compiler_pic_F77='-Kconform_pic' + lt_prog_compiler_static_F77='-Bstatic' + fi + ;; + + sysv5* | unixware* | sco3.2v5* | sco5v6* | OpenUNIX*) + lt_prog_compiler_wl_F77='-Wl,' + lt_prog_compiler_pic_F77='-KPIC' + lt_prog_compiler_static_F77='-Bstatic' + ;; + + unicos*) + lt_prog_compiler_wl_F77='-Wl,' + lt_prog_compiler_can_build_shared_F77=no + ;; + + uts4*) + lt_prog_compiler_pic_F77='-pic' + lt_prog_compiler_static_F77='-Bstatic' + ;; + + *) + lt_prog_compiler_can_build_shared_F77=no + ;; + esac + fi + +case $host_os in + # For platforms that do not support PIC, -DPIC is meaningless: + *djgpp*) + lt_prog_compiler_pic_F77= + ;; + *) + lt_prog_compiler_pic_F77="$lt_prog_compiler_pic_F77" + ;; +esac + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $compiler option to produce PIC" >&5 +printf %s "checking for $compiler option to produce PIC... " >&6; } +if test ${lt_cv_prog_compiler_pic_F77+y} +then : + printf %s "(cached) " >&6 +else $as_nop + lt_cv_prog_compiler_pic_F77=$lt_prog_compiler_pic_F77 +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_pic_F77" >&5 +printf "%s\n" "$lt_cv_prog_compiler_pic_F77" >&6; } +lt_prog_compiler_pic_F77=$lt_cv_prog_compiler_pic_F77 + +# +# Check to make sure the PIC flag actually works. +# +if test -n "$lt_prog_compiler_pic_F77"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if $compiler PIC flag $lt_prog_compiler_pic_F77 works" >&5 +printf %s "checking if $compiler PIC flag $lt_prog_compiler_pic_F77 works... " >&6; } +if test ${lt_cv_prog_compiler_pic_works_F77+y} +then : + printf %s "(cached) " >&6 +else $as_nop + lt_cv_prog_compiler_pic_works_F77=no + ac_outfile=conftest.$ac_objext + echo "$lt_simple_compile_test_code" > conftest.$ac_ext + lt_compiler_flag="$lt_prog_compiler_pic_F77" ## exclude from sc_useless_quotes_in_assignment + # Insert the option either (1) after the last *FLAGS variable, or + # (2) before a word containing "conftest.", or (3) at the end. + # Note that $ac_compile itself does not contain backslashes and begins + # with a dollar sign (not a hyphen), so the echo should work correctly. + # The option is referenced via a variable to avoid confusing sed. + lt_compile=`echo "$ac_compile" | $SED \ + -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ + -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ + -e 's:$: $lt_compiler_flag:'` + (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5) + (eval "$lt_compile" 2>conftest.err) + ac_status=$? + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + if (exit $ac_status) && test -s "$ac_outfile"; then + # The compiler can only warn and ignore the option if not recognized + # So say no if there are warnings other than the usual output. + $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' >conftest.exp + $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 + if test ! -s conftest.er2 || diff conftest.exp conftest.er2 >/dev/null; then + lt_cv_prog_compiler_pic_works_F77=yes + fi + fi + $RM conftest* + +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_pic_works_F77" >&5 +printf "%s\n" "$lt_cv_prog_compiler_pic_works_F77" >&6; } + +if test yes = "$lt_cv_prog_compiler_pic_works_F77"; then + case $lt_prog_compiler_pic_F77 in + "" | " "*) ;; + *) lt_prog_compiler_pic_F77=" $lt_prog_compiler_pic_F77" ;; + esac +else + lt_prog_compiler_pic_F77= + lt_prog_compiler_can_build_shared_F77=no +fi + +fi + + + + + +# +# Check to make sure the static flag actually works. +# +wl=$lt_prog_compiler_wl_F77 eval lt_tmp_static_flag=\"$lt_prog_compiler_static_F77\" +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if $compiler static flag $lt_tmp_static_flag works" >&5 +printf %s "checking if $compiler static flag $lt_tmp_static_flag works... " >&6; } +if test ${lt_cv_prog_compiler_static_works_F77+y} +then : + printf %s "(cached) " >&6 +else $as_nop + lt_cv_prog_compiler_static_works_F77=no + save_LDFLAGS=$LDFLAGS + LDFLAGS="$LDFLAGS $lt_tmp_static_flag" + echo "$lt_simple_link_test_code" > conftest.$ac_ext + if (eval $ac_link 2>conftest.err) && test -s conftest$ac_exeext; then + # The linker can only warn and ignore the option if not recognized + # So say no if there are warnings + if test -s conftest.err; then + # Append any errors to the config.log. + cat conftest.err 1>&5 + $ECHO "$_lt_linker_boilerplate" | $SED '/^$/d' > conftest.exp + $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 + if diff conftest.exp conftest.er2 >/dev/null; then + lt_cv_prog_compiler_static_works_F77=yes + fi + else + lt_cv_prog_compiler_static_works_F77=yes + fi + fi + $RM -r conftest* + LDFLAGS=$save_LDFLAGS + +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_static_works_F77" >&5 +printf "%s\n" "$lt_cv_prog_compiler_static_works_F77" >&6; } + +if test yes = "$lt_cv_prog_compiler_static_works_F77"; then + : +else + lt_prog_compiler_static_F77= +fi + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if $compiler supports -c -o file.$ac_objext" >&5 +printf %s "checking if $compiler supports -c -o file.$ac_objext... " >&6; } +if test ${lt_cv_prog_compiler_c_o_F77+y} +then : + printf %s "(cached) " >&6 +else $as_nop + lt_cv_prog_compiler_c_o_F77=no + $RM -r conftest 2>/dev/null + mkdir conftest + cd conftest + mkdir out + echo "$lt_simple_compile_test_code" > conftest.$ac_ext + + lt_compiler_flag="-o out/conftest2.$ac_objext" + # Insert the option either (1) after the last *FLAGS variable, or + # (2) before a word containing "conftest.", or (3) at the end. + # Note that $ac_compile itself does not contain backslashes and begins + # with a dollar sign (not a hyphen), so the echo should work correctly. + lt_compile=`echo "$ac_compile" | $SED \ + -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ + -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ + -e 's:$: $lt_compiler_flag:'` + (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5) + (eval "$lt_compile" 2>out/conftest.err) + ac_status=$? + cat out/conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + if (exit $ac_status) && test -s out/conftest2.$ac_objext + then + # The compiler can only warn and ignore the option if not recognized + # So say no if there are warnings + $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' > out/conftest.exp + $SED '/^$/d; /^ *+/d' out/conftest.err >out/conftest.er2 + if test ! -s out/conftest.er2 || diff out/conftest.exp out/conftest.er2 >/dev/null; then + lt_cv_prog_compiler_c_o_F77=yes + fi + fi + chmod u+w . 2>&5 + $RM conftest* + # SGI C++ compiler will create directory out/ii_files/ for + # template instantiation + test -d out/ii_files && $RM out/ii_files/* && rmdir out/ii_files + $RM out/* && rmdir out + cd .. + $RM -r conftest + $RM conftest* + +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_c_o_F77" >&5 +printf "%s\n" "$lt_cv_prog_compiler_c_o_F77" >&6; } + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if $compiler supports -c -o file.$ac_objext" >&5 +printf %s "checking if $compiler supports -c -o file.$ac_objext... " >&6; } +if test ${lt_cv_prog_compiler_c_o_F77+y} +then : + printf %s "(cached) " >&6 +else $as_nop + lt_cv_prog_compiler_c_o_F77=no + $RM -r conftest 2>/dev/null + mkdir conftest + cd conftest + mkdir out + echo "$lt_simple_compile_test_code" > conftest.$ac_ext + + lt_compiler_flag="-o out/conftest2.$ac_objext" + # Insert the option either (1) after the last *FLAGS variable, or + # (2) before a word containing "conftest.", or (3) at the end. + # Note that $ac_compile itself does not contain backslashes and begins + # with a dollar sign (not a hyphen), so the echo should work correctly. + lt_compile=`echo "$ac_compile" | $SED \ + -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ + -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ + -e 's:$: $lt_compiler_flag:'` + (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5) + (eval "$lt_compile" 2>out/conftest.err) + ac_status=$? + cat out/conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + if (exit $ac_status) && test -s out/conftest2.$ac_objext + then + # The compiler can only warn and ignore the option if not recognized + # So say no if there are warnings + $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' > out/conftest.exp + $SED '/^$/d; /^ *+/d' out/conftest.err >out/conftest.er2 + if test ! -s out/conftest.er2 || diff out/conftest.exp out/conftest.er2 >/dev/null; then + lt_cv_prog_compiler_c_o_F77=yes + fi + fi + chmod u+w . 2>&5 + $RM conftest* + # SGI C++ compiler will create directory out/ii_files/ for + # template instantiation + test -d out/ii_files && $RM out/ii_files/* && rmdir out/ii_files + $RM out/* && rmdir out + cd .. + $RM -r conftest + $RM conftest* + +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_c_o_F77" >&5 +printf "%s\n" "$lt_cv_prog_compiler_c_o_F77" >&6; } + + + + +hard_links=nottested +if test no = "$lt_cv_prog_compiler_c_o_F77" && test no != "$need_locks"; then + # do not overwrite the value of need_locks provided by the user + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if we can lock with hard links" >&5 +printf %s "checking if we can lock with hard links... " >&6; } + hard_links=yes + $RM conftest* + ln conftest.a conftest.b 2>/dev/null && hard_links=no + touch conftest.a + ln conftest.a conftest.b 2>&5 || hard_links=no + ln conftest.a conftest.b 2>/dev/null && hard_links=no + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $hard_links" >&5 +printf "%s\n" "$hard_links" >&6; } + if test no = "$hard_links"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: '$CC' does not support '-c -o', so 'make -j' may be unsafe" >&5 +printf "%s\n" "$as_me: WARNING: '$CC' does not support '-c -o', so 'make -j' may be unsafe" >&2;} + need_locks=warn + fi +else + need_locks=no +fi + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the $compiler linker ($LD) supports shared libraries" >&5 +printf %s "checking whether the $compiler linker ($LD) supports shared libraries... " >&6; } + + runpath_var= + allow_undefined_flag_F77= + always_export_symbols_F77=no + archive_cmds_F77= + archive_expsym_cmds_F77= + compiler_needs_object_F77=no + enable_shared_with_static_runtimes_F77=no + export_dynamic_flag_spec_F77= + export_symbols_cmds_F77='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols' + hardcode_automatic_F77=no + hardcode_direct_F77=no + hardcode_direct_absolute_F77=no + hardcode_libdir_flag_spec_F77= + hardcode_libdir_separator_F77= + hardcode_minus_L_F77=no + hardcode_shlibpath_var_F77=unsupported + inherit_rpath_F77=no + link_all_deplibs_F77=unknown + module_cmds_F77= + module_expsym_cmds_F77= + old_archive_from_new_cmds_F77= + old_archive_from_expsyms_cmds_F77= + thread_safe_flag_spec_F77= + whole_archive_flag_spec_F77= + # include_expsyms should be a list of space-separated symbols to be *always* + # included in the symbol list + include_expsyms_F77= + # exclude_expsyms can be an extended regexp of symbols to exclude + # it will be wrapped by ' (' and ')$', so one must not match beginning or + # end of line. Example: 'a|bc|.*d.*' will exclude the symbols 'a' and 'bc', + # as well as any symbol that contains 'd'. + exclude_expsyms_F77='_GLOBAL_OFFSET_TABLE_|_GLOBAL__F[ID]_.*' + # Although _GLOBAL_OFFSET_TABLE_ is a valid symbol C name, most a.out + # platforms (ab)use it in PIC code, but their linkers get confused if + # the symbol is explicitly referenced. Since portable code cannot + # rely on this symbol name, it's probably fine to never include it in + # preloaded symbol tables. + # Exclude shared library initialization/finalization symbols. + extract_expsyms_cmds= + + case $host_os in + cygwin* | mingw* | pw32* | cegcc*) + # FIXME: the MSVC++ and ICC port hasn't been tested in a loooong time + # When not using gcc, we currently assume that we are using + # Microsoft Visual C++ or Intel C++ Compiler. + if test yes != "$GCC"; then + with_gnu_ld=no + fi + ;; + interix*) + # we just hope/assume this is gcc and not c89 (= MSVC++ or ICC) + with_gnu_ld=yes + ;; + openbsd* | bitrig*) + with_gnu_ld=no + ;; + linux* | k*bsd*-gnu | gnu*) + link_all_deplibs_F77=no + ;; + esac + + ld_shlibs_F77=yes + + # On some targets, GNU ld is compatible enough with the native linker + # that we're better off using the native interface for both. + lt_use_gnu_ld_interface=no + if test yes = "$with_gnu_ld"; then + case $host_os in + aix*) + # The AIX port of GNU ld has always aspired to compatibility + # with the native linker. However, as the warning in the GNU ld + # block says, versions before 2.19.5* couldn't really create working + # shared libraries, regardless of the interface used. + case `$LD -v 2>&1` in + *\ \(GNU\ Binutils\)\ 2.19.5*) ;; + *\ \(GNU\ Binutils\)\ 2.[2-9]*) ;; + *\ \(GNU\ Binutils\)\ [3-9]*) ;; + *) + lt_use_gnu_ld_interface=yes + ;; + esac + ;; + *) + lt_use_gnu_ld_interface=yes + ;; + esac + fi + + if test yes = "$lt_use_gnu_ld_interface"; then + # If archive_cmds runs LD, not CC, wlarc should be empty + wlarc='$wl' + + # Set some defaults for GNU ld with shared library support. These + # are reset later if shared libraries are not supported. Putting them + # here allows them to be overridden if necessary. + runpath_var=LD_RUN_PATH + hardcode_libdir_flag_spec_F77='$wl-rpath $wl$libdir' + export_dynamic_flag_spec_F77='$wl--export-dynamic' + # ancient GNU ld didn't support --whole-archive et. al. + if $LD --help 2>&1 | $GREP 'no-whole-archive' > /dev/null; then + whole_archive_flag_spec_F77=$wlarc'--whole-archive$convenience '$wlarc'--no-whole-archive' + else + whole_archive_flag_spec_F77= + fi + supports_anon_versioning=no + case `$LD -v | $SED -e 's/([^)]\+)\s\+//' 2>&1` in + *GNU\ gold*) supports_anon_versioning=yes ;; + *\ [01].* | *\ 2.[0-9].* | *\ 2.10.*) ;; # catch versions < 2.11 + *\ 2.11.93.0.2\ *) supports_anon_versioning=yes ;; # RH7.3 ... + *\ 2.11.92.0.12\ *) supports_anon_versioning=yes ;; # Mandrake 8.2 ... + *\ 2.11.*) ;; # other 2.11 versions + *) supports_anon_versioning=yes ;; + esac + + # See if GNU ld supports shared libraries. + case $host_os in + aix[3-9]*) + # On AIX/PPC, the GNU linker is very broken + if test ia64 != "$host_cpu"; then + ld_shlibs_F77=no + cat <<_LT_EOF 1>&2 + +*** Warning: the GNU linker, at least up to release 2.19, is reported +*** to be unable to reliably create shared libraries on AIX. +*** Therefore, libtool is disabling shared libraries support. If you +*** really care for shared libraries, you may want to install binutils +*** 2.20 or above, or modify your PATH so that a non-GNU linker is found. +*** You will then need to restart the configuration process. + +_LT_EOF + fi + ;; + + amigaos*) + case $host_cpu in + powerpc) + # see comment about AmigaOS4 .so support + archive_cmds_F77='$CC -shared $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + archive_expsym_cmds_F77='' + ;; + m68k) + archive_cmds_F77='$RM $output_objdir/a2ixlibrary.data~$ECHO "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$ECHO "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$ECHO "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$ECHO "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR $AR_FLAGS $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)' + hardcode_libdir_flag_spec_F77='-L$libdir' + hardcode_minus_L_F77=yes + ;; + esac + ;; + + beos*) + if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then + allow_undefined_flag_F77=unsupported + # Joseph Beckenbach says some releases of gcc + # support --undefined. This deserves some investigation. FIXME + archive_cmds_F77='$CC -nostart $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + else + ld_shlibs_F77=no + fi + ;; + + cygwin* | mingw* | pw32* | cegcc*) + # _LT_TAGVAR(hardcode_libdir_flag_spec, F77) is actually meaningless, + # as there is no search path for DLLs. + hardcode_libdir_flag_spec_F77='-L$libdir' + export_dynamic_flag_spec_F77='$wl--export-all-symbols' + allow_undefined_flag_F77=unsupported + always_export_symbols_F77=no + enable_shared_with_static_runtimes_F77=yes + export_symbols_cmds_F77='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[BCDGRS][ ]/s/.*[ ]\([^ ]*\)/\1 DATA/;s/^.*[ ]__nm__\([^ ]*\)[ ][^ ]*/\1 DATA/;/^I[ ]/d;/^[AITW][ ]/s/.* //'\'' | sort | uniq > $export_symbols' + exclude_expsyms_F77='[_]+GLOBAL_OFFSET_TABLE_|[_]+GLOBAL__[FID]_.*|[_]+head_[A-Za-z0-9_]+_dll|[A-Za-z0-9_]+_dll_iname' + + if $LD --help 2>&1 | $GREP 'auto-import' > /dev/null; then + archive_cmds_F77='$CC -shared $libobjs $deplibs $compiler_flags -o $output_objdir/$soname $wl--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' + # If the export-symbols file already is a .def file, use it as + # is; otherwise, prepend EXPORTS... + archive_expsym_cmds_F77='if test DEF = "`$SED -n -e '\''s/^[ ]*//'\'' -e '\''/^\(;.*\)*$/d'\'' -e '\''s/^\(EXPORTS\|LIBRARY\)\([ ].*\)*$/DEF/p'\'' -e q $export_symbols`" ; then + cp $export_symbols $output_objdir/$soname.def; + else + echo EXPORTS > $output_objdir/$soname.def; + cat $export_symbols >> $output_objdir/$soname.def; + fi~ + $CC -shared $output_objdir/$soname.def $libobjs $deplibs $compiler_flags -o $output_objdir/$soname $wl--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' + else + ld_shlibs_F77=no + fi + ;; + + haiku*) + archive_cmds_F77='$CC -shared $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + link_all_deplibs_F77=yes + ;; + + os2*) + hardcode_libdir_flag_spec_F77='-L$libdir' + hardcode_minus_L_F77=yes + allow_undefined_flag_F77=unsupported + shrext_cmds=.dll + archive_cmds_F77='$ECHO "LIBRARY ${soname%$shared_ext} INITINSTANCE TERMINSTANCE" > $output_objdir/$libname.def~ + $ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~ + $ECHO "DATA MULTIPLE NONSHARED" >> $output_objdir/$libname.def~ + $ECHO EXPORTS >> $output_objdir/$libname.def~ + emxexp $libobjs | $SED /"_DLL_InitTerm"/d >> $output_objdir/$libname.def~ + $CC -Zdll -Zcrtdll -o $output_objdir/$soname $libobjs $deplibs $compiler_flags $output_objdir/$libname.def~ + emximp -o $lib $output_objdir/$libname.def' + archive_expsym_cmds_F77='$ECHO "LIBRARY ${soname%$shared_ext} INITINSTANCE TERMINSTANCE" > $output_objdir/$libname.def~ + $ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~ + $ECHO "DATA MULTIPLE NONSHARED" >> $output_objdir/$libname.def~ + $ECHO EXPORTS >> $output_objdir/$libname.def~ + prefix_cmds="$SED"~ + if test EXPORTS = "`$SED 1q $export_symbols`"; then + prefix_cmds="$prefix_cmds -e 1d"; + fi~ + prefix_cmds="$prefix_cmds -e \"s/^\(.*\)$/_\1/g\""~ + cat $export_symbols | $prefix_cmds >> $output_objdir/$libname.def~ + $CC -Zdll -Zcrtdll -o $output_objdir/$soname $libobjs $deplibs $compiler_flags $output_objdir/$libname.def~ + emximp -o $lib $output_objdir/$libname.def' + old_archive_From_new_cmds_F77='emximp -o $output_objdir/${libname}_dll.a $output_objdir/$libname.def' + enable_shared_with_static_runtimes_F77=yes + file_list_spec_F77='@' + ;; + + interix[3-9]*) + hardcode_direct_F77=no + hardcode_shlibpath_var_F77=no + hardcode_libdir_flag_spec_F77='$wl-rpath,$libdir' + export_dynamic_flag_spec_F77='$wl-E' + # Hack: On Interix 3.x, we cannot compile PIC because of a broken gcc. + # Instead, shared libraries are loaded at an image base (0x10000000 by + # default) and relocated if they conflict, which is a slow very memory + # consuming and fragmenting process. To avoid this, we pick a random, + # 256 KiB-aligned image base between 0x50000000 and 0x6FFC0000 at link + # time. Moving up from 0x10000000 also allows more sbrk(2) space. + archive_cmds_F77='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-h,$soname $wl--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' + archive_expsym_cmds_F77='$SED "s|^|_|" $export_symbols >$output_objdir/$soname.expsym~$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-h,$soname $wl--retain-symbols-file,$output_objdir/$soname.expsym $wl--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' + ;; + + gnu* | linux* | tpf* | k*bsd*-gnu | kopensolaris*-gnu) + tmp_diet=no + if test linux-dietlibc = "$host_os"; then + case $cc_basename in + diet\ *) tmp_diet=yes;; # linux-dietlibc with static linking (!diet-dyn) + esac + fi + if $LD --help 2>&1 | $EGREP ': supported targets:.* elf' > /dev/null \ + && test no = "$tmp_diet" + then + tmp_addflag=' $pic_flag' + tmp_sharedflag='-shared' + case $cc_basename,$host_cpu in + pgcc*) # Portland Group C compiler + whole_archive_flag_spec_F77='$wl--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive' + tmp_addflag=' $pic_flag' + ;; + pgf77* | pgf90* | pgf95* | pgfortran*) + # Portland Group f77 and f90 compilers + whole_archive_flag_spec_F77='$wl--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive' + tmp_addflag=' $pic_flag -Mnomain' ;; + ecc*,ia64* | icc*,ia64*) # Intel C compiler on ia64 + tmp_addflag=' -i_dynamic' ;; + efc*,ia64* | ifort*,ia64*) # Intel Fortran compiler on ia64 + tmp_addflag=' -i_dynamic -nofor_main' ;; + ifc* | ifort*) # Intel Fortran compiler + tmp_addflag=' -nofor_main' ;; + lf95*) # Lahey Fortran 8.1 + whole_archive_flag_spec_F77= + tmp_sharedflag='--shared' ;; + nagfor*) # NAGFOR 5.3 + tmp_sharedflag='-Wl,-shared' ;; + xl[cC]* | bgxl[cC]* | mpixl[cC]*) # IBM XL C 8.0 on PPC (deal with xlf below) + tmp_sharedflag='-qmkshrobj' + tmp_addflag= ;; + nvcc*) # Cuda Compiler Driver 2.2 + whole_archive_flag_spec_F77='$wl--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive' + compiler_needs_object_F77=yes + ;; + esac + case `$CC -V 2>&1 | $SED 5q` in + *Sun\ C*) # Sun C 5.9 + whole_archive_flag_spec_F77='$wl--whole-archive`new_convenience=; for conv in $convenience\"\"; do test -z \"$conv\" || new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive' + compiler_needs_object_F77=yes + tmp_sharedflag='-G' ;; + *Sun\ F*) # Sun Fortran 8.3 + tmp_sharedflag='-G' ;; + esac + archive_cmds_F77='$CC '"$tmp_sharedflag""$tmp_addflag"' $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + + if test yes = "$supports_anon_versioning"; then + archive_expsym_cmds_F77='echo "{ global:" > $output_objdir/$libname.ver~ + cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~ + echo "local: *; };" >> $output_objdir/$libname.ver~ + $CC '"$tmp_sharedflag""$tmp_addflag"' $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-version-script $wl$output_objdir/$libname.ver -o $lib' + fi + + case $cc_basename in + tcc*) + hardcode_libdir_flag_spec_F77='$wl-rpath $wl$libdir' + export_dynamic_flag_spec_F77='-rdynamic' + ;; + xlf* | bgf* | bgxlf* | mpixlf*) + # IBM XL Fortran 10.1 on PPC cannot create shared libs itself + whole_archive_flag_spec_F77='--whole-archive$convenience --no-whole-archive' + hardcode_libdir_flag_spec_F77='$wl-rpath $wl$libdir' + archive_cmds_F77='$LD -shared $libobjs $deplibs $linker_flags -soname $soname -o $lib' + if test yes = "$supports_anon_versioning"; then + archive_expsym_cmds_F77='echo "{ global:" > $output_objdir/$libname.ver~ + cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~ + echo "local: *; };" >> $output_objdir/$libname.ver~ + $LD -shared $libobjs $deplibs $linker_flags -soname $soname -version-script $output_objdir/$libname.ver -o $lib' + fi + ;; + esac + else + ld_shlibs_F77=no + fi + ;; + + netbsd* | netbsdelf*-gnu) + if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then + archive_cmds_F77='$LD -Bshareable $libobjs $deplibs $linker_flags -o $lib' + wlarc= + else + archive_cmds_F77='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + archive_expsym_cmds_F77='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' + fi + ;; + + solaris*) + if $LD -v 2>&1 | $GREP 'BFD 2\.8' > /dev/null; then + ld_shlibs_F77=no + cat <<_LT_EOF 1>&2 + +*** Warning: The releases 2.8.* of the GNU linker cannot reliably +*** create shared libraries on Solaris systems. Therefore, libtool +*** is disabling shared libraries support. We urge you to upgrade GNU +*** binutils to release 2.9.1 or newer. Another option is to modify +*** your PATH or compiler configuration so that the native linker is +*** used, and then restart. + +_LT_EOF + elif $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then + archive_cmds_F77='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + archive_expsym_cmds_F77='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' + else + ld_shlibs_F77=no + fi + ;; + + sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX*) + case `$LD -v 2>&1` in + *\ [01].* | *\ 2.[0-9].* | *\ 2.1[0-5].*) + ld_shlibs_F77=no + cat <<_LT_EOF 1>&2 + +*** Warning: Releases of the GNU linker prior to 2.16.91.0.3 cannot +*** reliably create shared libraries on SCO systems. Therefore, libtool +*** is disabling shared libraries support. We urge you to upgrade GNU +*** binutils to release 2.16.91.0.3 or newer. Another option is to modify +*** your PATH or compiler configuration so that the native linker is +*** used, and then restart. + +_LT_EOF + ;; + *) + # For security reasons, it is highly recommended that you always + # use absolute paths for naming shared libraries, and exclude the + # DT_RUNPATH tag from executables and libraries. But doing so + # requires that you compile everything twice, which is a pain. + if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then + hardcode_libdir_flag_spec_F77='$wl-rpath $wl$libdir' + archive_cmds_F77='$CC -shared $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + archive_expsym_cmds_F77='$CC -shared $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' + else + ld_shlibs_F77=no + fi + ;; + esac + ;; + + sunos4*) + archive_cmds_F77='$LD -assert pure-text -Bshareable -o $lib $libobjs $deplibs $linker_flags' + wlarc= + hardcode_direct_F77=yes + hardcode_shlibpath_var_F77=no + ;; + + *) + if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then + archive_cmds_F77='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + archive_expsym_cmds_F77='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' + else + ld_shlibs_F77=no + fi + ;; + esac + + if test no = "$ld_shlibs_F77"; then + runpath_var= + hardcode_libdir_flag_spec_F77= + export_dynamic_flag_spec_F77= + whole_archive_flag_spec_F77= + fi + else + # PORTME fill in a description of your system's linker (not GNU ld) + case $host_os in + aix3*) + allow_undefined_flag_F77=unsupported + always_export_symbols_F77=yes + archive_expsym_cmds_F77='$LD -o $output_objdir/$soname $libobjs $deplibs $linker_flags -bE:$export_symbols -T512 -H512 -bM:SRE~$AR $AR_FLAGS $lib $output_objdir/$soname' + # Note: this linker hardcodes the directories in LIBPATH if there + # are no directories specified by -L. + hardcode_minus_L_F77=yes + if test yes = "$GCC" && test -z "$lt_prog_compiler_static"; then + # Neither direct hardcoding nor static linking is supported with a + # broken collect2. + hardcode_direct_F77=unsupported + fi + ;; + + aix[4-9]*) + if test ia64 = "$host_cpu"; then + # On IA64, the linker does run time linking by default, so we don't + # have to do anything special. + aix_use_runtimelinking=no + exp_sym_flag='-Bexport' + no_entry_flag= + else + # If we're using GNU nm, then we don't want the "-C" option. + # -C means demangle to GNU nm, but means don't demangle to AIX nm. + # Without the "-l" option, or with the "-B" option, AIX nm treats + # weak defined symbols like other global defined symbols, whereas + # GNU nm marks them as "W". + # While the 'weak' keyword is ignored in the Export File, we need + # it in the Import File for the 'aix-soname' feature, so we have + # to replace the "-B" option with "-P" for AIX nm. + if $NM -V 2>&1 | $GREP 'GNU' > /dev/null; then + export_symbols_cmds_F77='$NM -Bpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "W")) && (substr(\$ 3,1,1) != ".")) { if (\$ 2 == "W") { print \$ 3 " weak" } else { print \$ 3 } } }'\'' | sort -u > $export_symbols' + else + export_symbols_cmds_F77='`func_echo_all $NM | $SED -e '\''s/B\([^B]*\)$/P\1/'\''` -PCpgl $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "L") || (\$ 2 == "W") || (\$ 2 == "V") || (\$ 2 == "Z")) && (substr(\$ 1,1,1) != ".")) { if ((\$ 2 == "W") || (\$ 2 == "V") || (\$ 2 == "Z")) { print \$ 1 " weak" } else { print \$ 1 } } }'\'' | sort -u > $export_symbols' + fi + aix_use_runtimelinking=no + + # Test if we are trying to use run time linking or normal + # AIX style linking. If -brtl is somewhere in LDFLAGS, we + # have runtime linking enabled, and use it for executables. + # For shared libraries, we enable/disable runtime linking + # depending on the kind of the shared library created - + # when "with_aix_soname,aix_use_runtimelinking" is: + # "aix,no" lib.a(lib.so.V) shared, rtl:no, for executables + # "aix,yes" lib.so shared, rtl:yes, for executables + # lib.a static archive + # "both,no" lib.so.V(shr.o) shared, rtl:yes + # lib.a(lib.so.V) shared, rtl:no, for executables + # "both,yes" lib.so.V(shr.o) shared, rtl:yes, for executables + # lib.a(lib.so.V) shared, rtl:no + # "svr4,*" lib.so.V(shr.o) shared, rtl:yes, for executables + # lib.a static archive + case $host_os in aix4.[23]|aix4.[23].*|aix[5-9]*) + for ld_flag in $LDFLAGS; do + if (test x-brtl = "x$ld_flag" || test x-Wl,-brtl = "x$ld_flag"); then + aix_use_runtimelinking=yes + break + fi + done + if test svr4,no = "$with_aix_soname,$aix_use_runtimelinking"; then + # With aix-soname=svr4, we create the lib.so.V shared archives only, + # so we don't have lib.a shared libs to link our executables. + # We have to force runtime linking in this case. + aix_use_runtimelinking=yes + LDFLAGS="$LDFLAGS -Wl,-brtl" + fi + ;; + esac + + exp_sym_flag='-bexport' + no_entry_flag='-bnoentry' + fi + + # When large executables or shared objects are built, AIX ld can + # have problems creating the table of contents. If linking a library + # or program results in "error TOC overflow" add -mminimal-toc to + # CXXFLAGS/CFLAGS for g++/gcc. In the cases where that is not + # enough to fix the problem, add -Wl,-bbigtoc to LDFLAGS. + + archive_cmds_F77='' + hardcode_direct_F77=yes + hardcode_direct_absolute_F77=yes + hardcode_libdir_separator_F77=':' + link_all_deplibs_F77=yes + file_list_spec_F77='$wl-f,' + case $with_aix_soname,$aix_use_runtimelinking in + aix,*) ;; # traditional, no import file + svr4,* | *,yes) # use import file + # The Import File defines what to hardcode. + hardcode_direct_F77=no + hardcode_direct_absolute_F77=no + ;; + esac + + if test yes = "$GCC"; then + case $host_os in aix4.[012]|aix4.[012].*) + # We only want to do this on AIX 4.2 and lower, the check + # below for broken collect2 doesn't work under 4.3+ + collect2name=`$CC -print-prog-name=collect2` + if test -f "$collect2name" && + strings "$collect2name" | $GREP resolve_lib_name >/dev/null + then + # We have reworked collect2 + : + else + # We have old collect2 + hardcode_direct_F77=unsupported + # It fails to find uninstalled libraries when the uninstalled + # path is not listed in the libpath. Setting hardcode_minus_L + # to unsupported forces relinking + hardcode_minus_L_F77=yes + hardcode_libdir_flag_spec_F77='-L$libdir' + hardcode_libdir_separator_F77= + fi + ;; + esac + shared_flag='-shared' + if test yes = "$aix_use_runtimelinking"; then + shared_flag="$shared_flag "'$wl-G' + fi + # Need to ensure runtime linking is disabled for the traditional + # shared library, or the linker may eventually find shared libraries + # /with/ Import File - we do not want to mix them. + shared_flag_aix='-shared' + shared_flag_svr4='-shared $wl-G' + else + # not using gcc + if test ia64 = "$host_cpu"; then + # VisualAge C++, Version 5.5 for AIX 5L for IA-64, Beta 3 Release + # chokes on -Wl,-G. The following line is correct: + shared_flag='-G' + else + if test yes = "$aix_use_runtimelinking"; then + shared_flag='$wl-G' + else + shared_flag='$wl-bM:SRE' + fi + shared_flag_aix='$wl-bM:SRE' + shared_flag_svr4='$wl-G' + fi + fi + + export_dynamic_flag_spec_F77='$wl-bexpall' + # It seems that -bexpall does not export symbols beginning with + # underscore (_), so it is better to generate a list of symbols to export. + always_export_symbols_F77=yes + if test aix,yes = "$with_aix_soname,$aix_use_runtimelinking"; then + # Warning - without using the other runtime loading flags (-brtl), + # -berok will link without error, but may produce a broken library. + allow_undefined_flag_F77='-berok' + # Determine the default libpath from the value encoded in an + # empty executable. + if test set = "${lt_cv_aix_libpath+set}"; then + aix_libpath=$lt_cv_aix_libpath +else + if test ${lt_cv_aix_libpath__F77+y} +then : + printf %s "(cached) " >&6 +else $as_nop + cat > conftest.$ac_ext <<_ACEOF + program main + + end +_ACEOF +if ac_fn_f77_try_link "$LINENO" +then : + + lt_aix_libpath_sed=' + /Import File Strings/,/^$/ { + /^0/ { + s/^0 *\([^ ]*\) *$/\1/ + p + } + }' + lt_cv_aix_libpath__F77=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` + # Check for a 64-bit object if we didn't find anything. + if test -z "$lt_cv_aix_libpath__F77"; then + lt_cv_aix_libpath__F77=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` + fi +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + if test -z "$lt_cv_aix_libpath__F77"; then + lt_cv_aix_libpath__F77=/usr/lib:/lib + fi + +fi + + aix_libpath=$lt_cv_aix_libpath__F77 +fi + + hardcode_libdir_flag_spec_F77='$wl-blibpath:$libdir:'"$aix_libpath" + archive_expsym_cmds_F77='$CC -o $output_objdir/$soname $libobjs $deplibs $wl'$no_entry_flag' $compiler_flags `if test -n "$allow_undefined_flag"; then func_echo_all "$wl$allow_undefined_flag"; else :; fi` $wl'$exp_sym_flag:\$export_symbols' '$shared_flag + else + if test ia64 = "$host_cpu"; then + hardcode_libdir_flag_spec_F77='$wl-R $libdir:/usr/lib:/lib' + allow_undefined_flag_F77="-z nodefs" + archive_expsym_cmds_F77="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs '"\$wl$no_entry_flag"' $compiler_flags $wl$allow_undefined_flag '"\$wl$exp_sym_flag:\$export_symbols" + else + # Determine the default libpath from the value encoded in an + # empty executable. + if test set = "${lt_cv_aix_libpath+set}"; then + aix_libpath=$lt_cv_aix_libpath +else + if test ${lt_cv_aix_libpath__F77+y} +then : + printf %s "(cached) " >&6 +else $as_nop + cat > conftest.$ac_ext <<_ACEOF + program main + + end +_ACEOF +if ac_fn_f77_try_link "$LINENO" +then : + + lt_aix_libpath_sed=' + /Import File Strings/,/^$/ { + /^0/ { + s/^0 *\([^ ]*\) *$/\1/ + p + } + }' + lt_cv_aix_libpath__F77=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` + # Check for a 64-bit object if we didn't find anything. + if test -z "$lt_cv_aix_libpath__F77"; then + lt_cv_aix_libpath__F77=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` + fi +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + if test -z "$lt_cv_aix_libpath__F77"; then + lt_cv_aix_libpath__F77=/usr/lib:/lib + fi + +fi + + aix_libpath=$lt_cv_aix_libpath__F77 +fi + + hardcode_libdir_flag_spec_F77='$wl-blibpath:$libdir:'"$aix_libpath" + # Warning - without using the other run time loading flags, + # -berok will link without error, but may produce a broken library. + no_undefined_flag_F77=' $wl-bernotok' + allow_undefined_flag_F77=' $wl-berok' + if test yes = "$with_gnu_ld"; then + # We only use this code for GNU lds that support --whole-archive. + whole_archive_flag_spec_F77='$wl--whole-archive$convenience $wl--no-whole-archive' + else + # Exported symbols can be pulled into shared objects from archives + whole_archive_flag_spec_F77='$convenience' + fi + archive_cmds_need_lc_F77=yes + archive_expsym_cmds_F77='$RM -r $output_objdir/$realname.d~$MKDIR $output_objdir/$realname.d' + # -brtl affects multiple linker settings, -berok does not and is overridden later + compiler_flags_filtered='`func_echo_all "$compiler_flags " | $SED -e "s%-brtl\\([, ]\\)%-berok\\1%g"`' + if test svr4 != "$with_aix_soname"; then + # This is similar to how AIX traditionally builds its shared libraries. + archive_expsym_cmds_F77="$archive_expsym_cmds_F77"'~$CC '$shared_flag_aix' -o $output_objdir/$realname.d/$soname $libobjs $deplibs $wl-bnoentry '$compiler_flags_filtered'$wl-bE:$export_symbols$allow_undefined_flag~$AR $AR_FLAGS $output_objdir/$libname$release.a $output_objdir/$realname.d/$soname' + fi + if test aix != "$with_aix_soname"; then + archive_expsym_cmds_F77="$archive_expsym_cmds_F77"'~$CC '$shared_flag_svr4' -o $output_objdir/$realname.d/$shared_archive_member_spec.o $libobjs $deplibs $wl-bnoentry '$compiler_flags_filtered'$wl-bE:$export_symbols$allow_undefined_flag~$STRIP -e $output_objdir/$realname.d/$shared_archive_member_spec.o~( func_echo_all "#! $soname($shared_archive_member_spec.o)"; if test shr_64 = "$shared_archive_member_spec"; then func_echo_all "# 64"; else func_echo_all "# 32"; fi; cat $export_symbols ) > $output_objdir/$realname.d/$shared_archive_member_spec.imp~$AR $AR_FLAGS $output_objdir/$soname $output_objdir/$realname.d/$shared_archive_member_spec.o $output_objdir/$realname.d/$shared_archive_member_spec.imp' + else + # used by -dlpreopen to get the symbols + archive_expsym_cmds_F77="$archive_expsym_cmds_F77"'~$MV $output_objdir/$realname.d/$soname $output_objdir' + fi + archive_expsym_cmds_F77="$archive_expsym_cmds_F77"'~$RM -r $output_objdir/$realname.d' + fi + fi + ;; + + amigaos*) + case $host_cpu in + powerpc) + # see comment about AmigaOS4 .so support + archive_cmds_F77='$CC -shared $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + archive_expsym_cmds_F77='' + ;; + m68k) + archive_cmds_F77='$RM $output_objdir/a2ixlibrary.data~$ECHO "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$ECHO "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$ECHO "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$ECHO "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR $AR_FLAGS $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)' + hardcode_libdir_flag_spec_F77='-L$libdir' + hardcode_minus_L_F77=yes + ;; + esac + ;; + + bsdi[45]*) + export_dynamic_flag_spec_F77=-rdynamic + ;; + + cygwin* | mingw* | pw32* | cegcc*) + # When not using gcc, we currently assume that we are using + # Microsoft Visual C++ or Intel C++ Compiler. + # hardcode_libdir_flag_spec is actually meaningless, as there is + # no search path for DLLs. + case $cc_basename in + cl* | icl*) + # Native MSVC or ICC + hardcode_libdir_flag_spec_F77=' ' + allow_undefined_flag_F77=unsupported + always_export_symbols_F77=yes + file_list_spec_F77='@' + # Tell ltmain to make .lib files, not .a files. + libext=lib + # Tell ltmain to make .dll files, not .so files. + shrext_cmds=.dll + # FIXME: Setting linknames here is a bad hack. + archive_cmds_F77='$CC -o $output_objdir/$soname $libobjs $compiler_flags $deplibs -Wl,-DLL,-IMPLIB:"$tool_output_objdir$libname.dll.lib"~linknames=' + archive_expsym_cmds_F77='if test DEF = "`$SED -n -e '\''s/^[ ]*//'\'' -e '\''/^\(;.*\)*$/d'\'' -e '\''s/^\(EXPORTS\|LIBRARY\)\([ ].*\)*$/DEF/p'\'' -e q $export_symbols`" ; then + cp "$export_symbols" "$output_objdir/$soname.def"; + echo "$tool_output_objdir$soname.def" > "$output_objdir/$soname.exp"; + else + $SED -e '\''s/^/-link -EXPORT:/'\'' < $export_symbols > $output_objdir/$soname.exp; + fi~ + $CC -o $tool_output_objdir$soname $libobjs $compiler_flags $deplibs "@$tool_output_objdir$soname.exp" -Wl,-DLL,-IMPLIB:"$tool_output_objdir$libname.dll.lib"~ + linknames=' + # The linker will not automatically build a static lib if we build a DLL. + # _LT_TAGVAR(old_archive_from_new_cmds, F77)='true' + enable_shared_with_static_runtimes_F77=yes + exclude_expsyms_F77='_NULL_IMPORT_DESCRIPTOR|_IMPORT_DESCRIPTOR_.*' + export_symbols_cmds_F77='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[BCDGRS][ ]/s/.*[ ]\([^ ]*\)/\1,DATA/'\'' | $SED -e '\''/^[AITW][ ]/s/.*[ ]//'\'' | sort | uniq > $export_symbols' + # Don't use ranlib + old_postinstall_cmds_F77='chmod 644 $oldlib' + postlink_cmds_F77='lt_outputfile="@OUTPUT@"~ + lt_tool_outputfile="@TOOL_OUTPUT@"~ + case $lt_outputfile in + *.exe|*.EXE) ;; + *) + lt_outputfile=$lt_outputfile.exe + lt_tool_outputfile=$lt_tool_outputfile.exe + ;; + esac~ + if test : != "$MANIFEST_TOOL" && test -f "$lt_outputfile.manifest"; then + $MANIFEST_TOOL -manifest "$lt_tool_outputfile.manifest" -outputresource:"$lt_tool_outputfile" || exit 1; + $RM "$lt_outputfile.manifest"; + fi' + ;; + *) + # Assume MSVC and ICC wrapper + hardcode_libdir_flag_spec_F77=' ' + allow_undefined_flag_F77=unsupported + # Tell ltmain to make .lib files, not .a files. + libext=lib + # Tell ltmain to make .dll files, not .so files. + shrext_cmds=.dll + # FIXME: Setting linknames here is a bad hack. + archive_cmds_F77='$CC -o $lib $libobjs $compiler_flags `func_echo_all "$deplibs" | $SED '\''s/ -lc$//'\''` -link -dll~linknames=' + # The linker will automatically build a .lib file if we build a DLL. + old_archive_from_new_cmds_F77='true' + # FIXME: Should let the user specify the lib program. + old_archive_cmds_F77='lib -OUT:$oldlib$oldobjs$old_deplibs' + enable_shared_with_static_runtimes_F77=yes + ;; + esac + ;; + + darwin* | rhapsody*) + + + archive_cmds_need_lc_F77=no + hardcode_direct_F77=no + hardcode_automatic_F77=yes + hardcode_shlibpath_var_F77=unsupported + if test yes = "$lt_cv_ld_force_load"; then + whole_archive_flag_spec_F77='`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience $wl-force_load,$conv\"; done; func_echo_all \"$new_convenience\"`' + compiler_needs_object_F77=yes + else + whole_archive_flag_spec_F77='' + fi + link_all_deplibs_F77=yes + allow_undefined_flag_F77=$_lt_dar_allow_undefined + case $cc_basename in + ifort*|nagfor*) _lt_dar_can_shared=yes ;; + *) _lt_dar_can_shared=$GCC ;; + esac + if test yes = "$_lt_dar_can_shared"; then + output_verbose_link_cmd=func_echo_all + archive_cmds_F77="\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring $_lt_dar_single_mod$_lt_dsymutil" + module_cmds_F77="\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags$_lt_dsymutil" + archive_expsym_cmds_F77="$SED 's|^|_|' < \$export_symbols > \$output_objdir/\$libname-symbols.expsym~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring $_lt_dar_single_mod$_lt_dar_export_syms$_lt_dsymutil" + module_expsym_cmds_F77="$SED -e 's|^|_|' < \$export_symbols > \$output_objdir/\$libname-symbols.expsym~\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags$_lt_dar_export_syms$_lt_dsymutil" + + else + ld_shlibs_F77=no + fi + + ;; + + dgux*) + archive_cmds_F77='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + hardcode_libdir_flag_spec_F77='-L$libdir' + hardcode_shlibpath_var_F77=no + ;; + + # FreeBSD 2.2.[012] allows us to include c++rt0.o to get C++ constructor + # support. Future versions do this automatically, but an explicit c++rt0.o + # does not break anything, and helps significantly (at the cost of a little + # extra space). + freebsd2.2*) + archive_cmds_F77='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags /usr/lib/c++rt0.o' + hardcode_libdir_flag_spec_F77='-R$libdir' + hardcode_direct_F77=yes + hardcode_shlibpath_var_F77=no + ;; + + # Unfortunately, older versions of FreeBSD 2 do not have this feature. + freebsd2.*) + archive_cmds_F77='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' + hardcode_direct_F77=yes + hardcode_minus_L_F77=yes + hardcode_shlibpath_var_F77=no + ;; + + # FreeBSD 3 and greater uses gcc -shared to do shared libraries. + freebsd* | dragonfly* | midnightbsd*) + archive_cmds_F77='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' + hardcode_libdir_flag_spec_F77='-R$libdir' + hardcode_direct_F77=yes + hardcode_shlibpath_var_F77=no + ;; + + hpux9*) + if test yes = "$GCC"; then + archive_cmds_F77='$RM $output_objdir/$soname~$CC -shared $pic_flag $wl+b $wl$install_libdir -o $output_objdir/$soname $libobjs $deplibs $compiler_flags~test "x$output_objdir/$soname" = "x$lib" || mv $output_objdir/$soname $lib' + else + archive_cmds_F77='$RM $output_objdir/$soname~$LD -b +b $install_libdir -o $output_objdir/$soname $libobjs $deplibs $linker_flags~test "x$output_objdir/$soname" = "x$lib" || mv $output_objdir/$soname $lib' + fi + hardcode_libdir_flag_spec_F77='$wl+b $wl$libdir' + hardcode_libdir_separator_F77=: + hardcode_direct_F77=yes + + # hardcode_minus_L: Not really in the search PATH, + # but as the default location of the library. + hardcode_minus_L_F77=yes + export_dynamic_flag_spec_F77='$wl-E' + ;; + + hpux10*) + if test yes,no = "$GCC,$with_gnu_ld"; then + archive_cmds_F77='$CC -shared $pic_flag $wl+h $wl$soname $wl+b $wl$install_libdir -o $lib $libobjs $deplibs $compiler_flags' + else + archive_cmds_F77='$LD -b +h $soname +b $install_libdir -o $lib $libobjs $deplibs $linker_flags' + fi + if test no = "$with_gnu_ld"; then + hardcode_libdir_flag_spec_F77='$wl+b $wl$libdir' + hardcode_libdir_separator_F77=: + hardcode_direct_F77=yes + hardcode_direct_absolute_F77=yes + export_dynamic_flag_spec_F77='$wl-E' + # hardcode_minus_L: Not really in the search PATH, + # but as the default location of the library. + hardcode_minus_L_F77=yes + fi + ;; + + hpux11*) + if test yes,no = "$GCC,$with_gnu_ld"; then + case $host_cpu in + hppa*64*) + archive_cmds_F77='$CC -shared $wl+h $wl$soname -o $lib $libobjs $deplibs $compiler_flags' + ;; + ia64*) + archive_cmds_F77='$CC -shared $pic_flag $wl+h $wl$soname $wl+nodefaultrpath -o $lib $libobjs $deplibs $compiler_flags' + ;; + *) + archive_cmds_F77='$CC -shared $pic_flag $wl+h $wl$soname $wl+b $wl$install_libdir -o $lib $libobjs $deplibs $compiler_flags' + ;; + esac + else + case $host_cpu in + hppa*64*) + archive_cmds_F77='$CC -b $wl+h $wl$soname -o $lib $libobjs $deplibs $compiler_flags' + ;; + ia64*) + archive_cmds_F77='$CC -b $wl+h $wl$soname $wl+nodefaultrpath -o $lib $libobjs $deplibs $compiler_flags' + ;; + *) + archive_cmds_F77='$CC -b $wl+h $wl$soname $wl+b $wl$install_libdir -o $lib $libobjs $deplibs $compiler_flags' + ;; + esac + fi + if test no = "$with_gnu_ld"; then + hardcode_libdir_flag_spec_F77='$wl+b $wl$libdir' + hardcode_libdir_separator_F77=: + + case $host_cpu in + hppa*64*|ia64*) + hardcode_direct_F77=no + hardcode_shlibpath_var_F77=no + ;; + *) + hardcode_direct_F77=yes + hardcode_direct_absolute_F77=yes + export_dynamic_flag_spec_F77='$wl-E' + + # hardcode_minus_L: Not really in the search PATH, + # but as the default location of the library. + hardcode_minus_L_F77=yes + ;; + esac + fi + ;; + + irix5* | irix6* | nonstopux*) + if test yes = "$GCC"; then + archive_cmds_F77='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` $wl-update_registry $wl$output_objdir/so_locations -o $lib' + # Try to use the -exported_symbol ld option, if it does not + # work, assume that -exports_file does not work either and + # implicitly export all symbols. + # This should be the same for all languages, so no per-tag cache variable. + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the $host_os linker accepts -exported_symbol" >&5 +printf %s "checking whether the $host_os linker accepts -exported_symbol... " >&6; } +if test ${lt_cv_irix_exported_symbol+y} +then : + printf %s "(cached) " >&6 +else $as_nop + save_LDFLAGS=$LDFLAGS + LDFLAGS="$LDFLAGS -shared $wl-exported_symbol ${wl}foo $wl-update_registry $wl/dev/null" + cat > conftest.$ac_ext <<_ACEOF + + subroutine foo + end +_ACEOF +if ac_fn_f77_try_link "$LINENO" +then : + lt_cv_irix_exported_symbol=yes +else $as_nop + lt_cv_irix_exported_symbol=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + LDFLAGS=$save_LDFLAGS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_irix_exported_symbol" >&5 +printf "%s\n" "$lt_cv_irix_exported_symbol" >&6; } + if test yes = "$lt_cv_irix_exported_symbol"; then + archive_expsym_cmds_F77='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` $wl-update_registry $wl$output_objdir/so_locations $wl-exports_file $wl$export_symbols -o $lib' + fi + link_all_deplibs_F77=no + else + archive_cmds_F77='$CC -shared $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib' + archive_expsym_cmds_F77='$CC -shared $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry $output_objdir/so_locations -exports_file $export_symbols -o $lib' + fi + archive_cmds_need_lc_F77='no' + hardcode_libdir_flag_spec_F77='$wl-rpath $wl$libdir' + hardcode_libdir_separator_F77=: + inherit_rpath_F77=yes + link_all_deplibs_F77=yes + ;; + + linux*) + case $cc_basename in + tcc*) + # Fabrice Bellard et al's Tiny C Compiler + ld_shlibs_F77=yes + archive_cmds_F77='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' + hardcode_libdir_flag_spec_F77='$wl-rpath $wl$libdir' + ;; + esac + ;; + + netbsd* | netbsdelf*-gnu) + if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then + archive_cmds_F77='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' # a.out + else + archive_cmds_F77='$LD -shared -o $lib $libobjs $deplibs $linker_flags' # ELF + fi + hardcode_libdir_flag_spec_F77='-R$libdir' + hardcode_direct_F77=yes + hardcode_shlibpath_var_F77=no + ;; + + newsos6) + archive_cmds_F77='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + hardcode_direct_F77=yes + hardcode_libdir_flag_spec_F77='$wl-rpath $wl$libdir' + hardcode_libdir_separator_F77=: + hardcode_shlibpath_var_F77=no + ;; + + *nto* | *qnx*) + ;; + + openbsd* | bitrig*) + if test -f /usr/libexec/ld.so; then + hardcode_direct_F77=yes + hardcode_shlibpath_var_F77=no + hardcode_direct_absolute_F77=yes + if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`"; then + archive_cmds_F77='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' + archive_expsym_cmds_F77='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags $wl-retain-symbols-file,$export_symbols' + hardcode_libdir_flag_spec_F77='$wl-rpath,$libdir' + export_dynamic_flag_spec_F77='$wl-E' + else + archive_cmds_F77='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' + hardcode_libdir_flag_spec_F77='$wl-rpath,$libdir' + fi + else + ld_shlibs_F77=no + fi + ;; + + os2*) + hardcode_libdir_flag_spec_F77='-L$libdir' + hardcode_minus_L_F77=yes + allow_undefined_flag_F77=unsupported + shrext_cmds=.dll + archive_cmds_F77='$ECHO "LIBRARY ${soname%$shared_ext} INITINSTANCE TERMINSTANCE" > $output_objdir/$libname.def~ + $ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~ + $ECHO "DATA MULTIPLE NONSHARED" >> $output_objdir/$libname.def~ + $ECHO EXPORTS >> $output_objdir/$libname.def~ + emxexp $libobjs | $SED /"_DLL_InitTerm"/d >> $output_objdir/$libname.def~ + $CC -Zdll -Zcrtdll -o $output_objdir/$soname $libobjs $deplibs $compiler_flags $output_objdir/$libname.def~ + emximp -o $lib $output_objdir/$libname.def' + archive_expsym_cmds_F77='$ECHO "LIBRARY ${soname%$shared_ext} INITINSTANCE TERMINSTANCE" > $output_objdir/$libname.def~ + $ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~ + $ECHO "DATA MULTIPLE NONSHARED" >> $output_objdir/$libname.def~ + $ECHO EXPORTS >> $output_objdir/$libname.def~ + prefix_cmds="$SED"~ + if test EXPORTS = "`$SED 1q $export_symbols`"; then + prefix_cmds="$prefix_cmds -e 1d"; + fi~ + prefix_cmds="$prefix_cmds -e \"s/^\(.*\)$/_\1/g\""~ + cat $export_symbols | $prefix_cmds >> $output_objdir/$libname.def~ + $CC -Zdll -Zcrtdll -o $output_objdir/$soname $libobjs $deplibs $compiler_flags $output_objdir/$libname.def~ + emximp -o $lib $output_objdir/$libname.def' + old_archive_From_new_cmds_F77='emximp -o $output_objdir/${libname}_dll.a $output_objdir/$libname.def' + enable_shared_with_static_runtimes_F77=yes + file_list_spec_F77='@' + ;; + + osf3*) + if test yes = "$GCC"; then + allow_undefined_flag_F77=' $wl-expect_unresolved $wl\*' + archive_cmds_F77='$CC -shared$allow_undefined_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` $wl-update_registry $wl$output_objdir/so_locations -o $lib' + else + allow_undefined_flag_F77=' -expect_unresolved \*' + archive_cmds_F77='$CC -shared$allow_undefined_flag $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib' + fi + archive_cmds_need_lc_F77='no' + hardcode_libdir_flag_spec_F77='$wl-rpath $wl$libdir' + hardcode_libdir_separator_F77=: + ;; + + osf4* | osf5*) # as osf3* with the addition of -msym flag + if test yes = "$GCC"; then + allow_undefined_flag_F77=' $wl-expect_unresolved $wl\*' + archive_cmds_F77='$CC -shared$allow_undefined_flag $pic_flag $libobjs $deplibs $compiler_flags $wl-msym $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` $wl-update_registry $wl$output_objdir/so_locations -o $lib' + hardcode_libdir_flag_spec_F77='$wl-rpath $wl$libdir' + else + allow_undefined_flag_F77=' -expect_unresolved \*' + archive_cmds_F77='$CC -shared$allow_undefined_flag $libobjs $deplibs $compiler_flags -msym -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib' + archive_expsym_cmds_F77='for i in `cat $export_symbols`; do printf "%s %s\\n" -exported_symbol "\$i" >> $lib.exp; done; printf "%s\\n" "-hidden">> $lib.exp~ + $CC -shared$allow_undefined_flag $wl-input $wl$lib.exp $compiler_flags $libobjs $deplibs -soname $soname `test -n "$verstring" && $ECHO "-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib~$RM $lib.exp' + + # Both c and cxx compiler support -rpath directly + hardcode_libdir_flag_spec_F77='-rpath $libdir' + fi + archive_cmds_need_lc_F77='no' + hardcode_libdir_separator_F77=: + ;; + + solaris*) + no_undefined_flag_F77=' -z defs' + if test yes = "$GCC"; then + wlarc='$wl' + archive_cmds_F77='$CC -shared $pic_flag $wl-z ${wl}text $wl-h $wl$soname -o $lib $libobjs $deplibs $compiler_flags' + archive_expsym_cmds_F77='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ + $CC -shared $pic_flag $wl-z ${wl}text $wl-M $wl$lib.exp $wl-h $wl$soname -o $lib $libobjs $deplibs $compiler_flags~$RM $lib.exp' + else + case `$CC -V 2>&1` in + *"Compilers 5.0"*) + wlarc='' + archive_cmds_F77='$LD -G$allow_undefined_flag -h $soname -o $lib $libobjs $deplibs $linker_flags' + archive_expsym_cmds_F77='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ + $LD -G$allow_undefined_flag -M $lib.exp -h $soname -o $lib $libobjs $deplibs $linker_flags~$RM $lib.exp' + ;; + *) + wlarc='$wl' + archive_cmds_F77='$CC -G$allow_undefined_flag -h $soname -o $lib $libobjs $deplibs $compiler_flags' + archive_expsym_cmds_F77='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ + $CC -G$allow_undefined_flag -M $lib.exp -h $soname -o $lib $libobjs $deplibs $compiler_flags~$RM $lib.exp' + ;; + esac + fi + hardcode_libdir_flag_spec_F77='-R$libdir' + hardcode_shlibpath_var_F77=no + case $host_os in + solaris2.[0-5] | solaris2.[0-5].*) ;; + *) + # The compiler driver will combine and reorder linker options, + # but understands '-z linker_flag'. GCC discards it without '$wl', + # but is careful enough not to reorder. + # Supported since Solaris 2.6 (maybe 2.5.1?) + if test yes = "$GCC"; then + whole_archive_flag_spec_F77='$wl-z ${wl}allextract$convenience $wl-z ${wl}defaultextract' + else + whole_archive_flag_spec_F77='-z allextract$convenience -z defaultextract' + fi + ;; + esac + link_all_deplibs_F77=yes + ;; + + sunos4*) + if test sequent = "$host_vendor"; then + # Use $CC to link under sequent, because it throws in some extra .o + # files that make .init and .fini sections work. + archive_cmds_F77='$CC -G $wl-h $soname -o $lib $libobjs $deplibs $compiler_flags' + else + archive_cmds_F77='$LD -assert pure-text -Bstatic -o $lib $libobjs $deplibs $linker_flags' + fi + hardcode_libdir_flag_spec_F77='-L$libdir' + hardcode_direct_F77=yes + hardcode_minus_L_F77=yes + hardcode_shlibpath_var_F77=no + ;; + + sysv4) + case $host_vendor in + sni) + archive_cmds_F77='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + hardcode_direct_F77=yes # is this really true??? + ;; + siemens) + ## LD is ld it makes a PLAMLIB + ## CC just makes a GrossModule. + archive_cmds_F77='$LD -G -o $lib $libobjs $deplibs $linker_flags' + reload_cmds_F77='$CC -r -o $output$reload_objs' + hardcode_direct_F77=no + ;; + motorola) + archive_cmds_F77='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + hardcode_direct_F77=no #Motorola manual says yes, but my tests say they lie + ;; + esac + runpath_var='LD_RUN_PATH' + hardcode_shlibpath_var_F77=no + ;; + + sysv4.3*) + archive_cmds_F77='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + hardcode_shlibpath_var_F77=no + export_dynamic_flag_spec_F77='-Bexport' + ;; + + sysv4*MP*) + if test -d /usr/nec; then + archive_cmds_F77='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + hardcode_shlibpath_var_F77=no + runpath_var=LD_RUN_PATH + hardcode_runpath_var=yes + ld_shlibs_F77=yes + fi + ;; + + sysv4*uw2* | sysv5OpenUNIX* | sysv5UnixWare7.[01].[10]* | unixware7* | sco3.2v5.0.[024]*) + no_undefined_flag_F77='$wl-z,text' + archive_cmds_need_lc_F77=no + hardcode_shlibpath_var_F77=no + runpath_var='LD_RUN_PATH' + + if test yes = "$GCC"; then + archive_cmds_F77='$CC -shared $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + archive_expsym_cmds_F77='$CC -shared $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + else + archive_cmds_F77='$CC -G $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + archive_expsym_cmds_F77='$CC -G $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + fi + ;; + + sysv5* | sco3.2v5* | sco5v6*) + # Note: We CANNOT use -z defs as we might desire, because we do not + # link with -lc, and that would cause any symbols used from libc to + # always be unresolved, which means just about no library would + # ever link correctly. If we're not using GNU ld we use -z text + # though, which does catch some bad symbols but isn't as heavy-handed + # as -z defs. + no_undefined_flag_F77='$wl-z,text' + allow_undefined_flag_F77='$wl-z,nodefs' + archive_cmds_need_lc_F77=no + hardcode_shlibpath_var_F77=no + hardcode_libdir_flag_spec_F77='$wl-R,$libdir' + hardcode_libdir_separator_F77=':' + link_all_deplibs_F77=yes + export_dynamic_flag_spec_F77='$wl-Bexport' + runpath_var='LD_RUN_PATH' + + if test yes = "$GCC"; then + archive_cmds_F77='$CC -shared $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + archive_expsym_cmds_F77='$CC -shared $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + else + archive_cmds_F77='$CC -G $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + archive_expsym_cmds_F77='$CC -G $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + fi + ;; + + uts4*) + archive_cmds_F77='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + hardcode_libdir_flag_spec_F77='-L$libdir' + hardcode_shlibpath_var_F77=no + ;; + + *) + ld_shlibs_F77=no + ;; + esac + + if test sni = "$host_vendor"; then + case $host in + sysv4 | sysv4.2uw2* | sysv4.3* | sysv5*) + export_dynamic_flag_spec_F77='$wl-Blargedynsym' + ;; + esac + fi + fi + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ld_shlibs_F77" >&5 +printf "%s\n" "$ld_shlibs_F77" >&6; } +test no = "$ld_shlibs_F77" && can_build_shared=no + +with_gnu_ld_F77=$with_gnu_ld + + + + + + +# +# Do we need to explicitly link libc? +# +case "x$archive_cmds_need_lc_F77" in +x|xyes) + # Assume -lc should be added + archive_cmds_need_lc_F77=yes + + if test yes,yes = "$GCC,$enable_shared"; then + case $archive_cmds_F77 in + *'~'*) + # FIXME: we may have to deal with multi-command sequences. + ;; + '$CC '*) + # Test whether the compiler implicitly links with -lc since on some + # systems, -lgcc has to come before -lc. If gcc already passes -lc + # to ld, don't add -lc before -lgcc. + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether -lc should be explicitly linked in" >&5 +printf %s "checking whether -lc should be explicitly linked in... " >&6; } +if test ${lt_cv_archive_cmds_need_lc_F77+y} +then : + printf %s "(cached) " >&6 +else $as_nop + $RM conftest* + echo "$lt_simple_compile_test_code" > conftest.$ac_ext + + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } 2>conftest.err; then + soname=conftest + lib=conftest + libobjs=conftest.$ac_objext + deplibs= + wl=$lt_prog_compiler_wl_F77 + pic_flag=$lt_prog_compiler_pic_F77 + compiler_flags=-v + linker_flags=-v + verstring= + output_objdir=. + libname=conftest + lt_save_allow_undefined_flag=$allow_undefined_flag_F77 + allow_undefined_flag_F77= + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$archive_cmds_F77 2\>\&1 \| $GREP \" -lc \" \>/dev/null 2\>\&1\""; } >&5 + (eval $archive_cmds_F77 2\>\&1 \| $GREP \" -lc \" \>/dev/null 2\>\&1) 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } + then + lt_cv_archive_cmds_need_lc_F77=no + else + lt_cv_archive_cmds_need_lc_F77=yes + fi + allow_undefined_flag_F77=$lt_save_allow_undefined_flag + else + cat conftest.err 1>&5 + fi + $RM conftest* + +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_archive_cmds_need_lc_F77" >&5 +printf "%s\n" "$lt_cv_archive_cmds_need_lc_F77" >&6; } + archive_cmds_need_lc_F77=$lt_cv_archive_cmds_need_lc_F77 + ;; + esac + fi + ;; +esac + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking dynamic linker characteristics" >&5 +printf %s "checking dynamic linker characteristics... " >&6; } + +library_names_spec= +libname_spec='lib$name' +soname_spec= +shrext_cmds=.so +postinstall_cmds= +postuninstall_cmds= +finish_cmds= +finish_eval= +shlibpath_var= +shlibpath_overrides_runpath=unknown +version_type=none +dynamic_linker="$host_os ld.so" +sys_lib_dlsearch_path_spec="/lib /usr/lib" +need_lib_prefix=unknown +hardcode_into_libs=no + +# when you set need_version to no, make sure it does not cause -set_version +# flags to be left without arguments +need_version=unknown + + + +case $host_os in +aix3*) + version_type=linux # correct to gnu/linux during the next big refactor + library_names_spec='$libname$release$shared_ext$versuffix $libname.a' + shlibpath_var=LIBPATH + + # AIX 3 has no versioning support, so we append a major version to the name. + soname_spec='$libname$release$shared_ext$major' + ;; + +aix[4-9]*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + hardcode_into_libs=yes + if test ia64 = "$host_cpu"; then + # AIX 5 supports IA64 + library_names_spec='$libname$release$shared_ext$major $libname$release$shared_ext$versuffix $libname$shared_ext' + shlibpath_var=LD_LIBRARY_PATH + else + # With GCC up to 2.95.x, collect2 would create an import file + # for dependence libraries. The import file would start with + # the line '#! .'. This would cause the generated library to + # depend on '.', always an invalid library. This was fixed in + # development snapshots of GCC prior to 3.0. + case $host_os in + aix4 | aix4.[01] | aix4.[01].*) + if { echo '#if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 97)' + echo ' yes ' + echo '#endif'; } | $CC -E - | $GREP yes > /dev/null; then + : + else + can_build_shared=no + fi + ;; + esac + # Using Import Files as archive members, it is possible to support + # filename-based versioning of shared library archives on AIX. While + # this would work for both with and without runtime linking, it will + # prevent static linking of such archives. So we do filename-based + # shared library versioning with .so extension only, which is used + # when both runtime linking and shared linking is enabled. + # Unfortunately, runtime linking may impact performance, so we do + # not want this to be the default eventually. Also, we use the + # versioned .so libs for executables only if there is the -brtl + # linker flag in LDFLAGS as well, or --with-aix-soname=svr4 only. + # To allow for filename-based versioning support, we need to create + # libNAME.so.V as an archive file, containing: + # *) an Import File, referring to the versioned filename of the + # archive as well as the shared archive member, telling the + # bitwidth (32 or 64) of that shared object, and providing the + # list of exported symbols of that shared object, eventually + # decorated with the 'weak' keyword + # *) the shared object with the F_LOADONLY flag set, to really avoid + # it being seen by the linker. + # At run time we better use the real file rather than another symlink, + # but for link time we create the symlink libNAME.so -> libNAME.so.V + + case $with_aix_soname,$aix_use_runtimelinking in + # AIX (on Power*) has no versioning support, so currently we cannot hardcode correct + # soname into executable. Probably we can add versioning support to + # collect2, so additional links can be useful in future. + aix,yes) # traditional libtool + dynamic_linker='AIX unversionable lib.so' + # If using run time linking (on AIX 4.2 or later) use lib.so + # instead of lib.a to let people know that these are not + # typical AIX shared libraries. + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + ;; + aix,no) # traditional AIX only + dynamic_linker='AIX lib.a(lib.so.V)' + # We preserve .a as extension for shared libraries through AIX4.2 + # and later when we are not doing run time linking. + library_names_spec='$libname$release.a $libname.a' + soname_spec='$libname$release$shared_ext$major' + ;; + svr4,*) # full svr4 only + dynamic_linker="AIX lib.so.V($shared_archive_member_spec.o)" + library_names_spec='$libname$release$shared_ext$major $libname$shared_ext' + # We do not specify a path in Import Files, so LIBPATH fires. + shlibpath_overrides_runpath=yes + ;; + *,yes) # both, prefer svr4 + dynamic_linker="AIX lib.so.V($shared_archive_member_spec.o), lib.a(lib.so.V)" + library_names_spec='$libname$release$shared_ext$major $libname$shared_ext' + # unpreferred sharedlib libNAME.a needs extra handling + postinstall_cmds='test -n "$linkname" || linkname="$realname"~func_stripname "" ".so" "$linkname"~$install_shared_prog "$dir/$func_stripname_result.$libext" "$destdir/$func_stripname_result.$libext"~test -z "$tstripme" || test -z "$striplib" || $striplib "$destdir/$func_stripname_result.$libext"' + postuninstall_cmds='for n in $library_names $old_library; do :; done~func_stripname "" ".so" "$n"~test "$func_stripname_result" = "$n" || func_append rmfiles " $odir/$func_stripname_result.$libext"' + # We do not specify a path in Import Files, so LIBPATH fires. + shlibpath_overrides_runpath=yes + ;; + *,no) # both, prefer aix + dynamic_linker="AIX lib.a(lib.so.V), lib.so.V($shared_archive_member_spec.o)" + library_names_spec='$libname$release.a $libname.a' + soname_spec='$libname$release$shared_ext$major' + # unpreferred sharedlib libNAME.so.V and symlink libNAME.so need extra handling + postinstall_cmds='test -z "$dlname" || $install_shared_prog $dir/$dlname $destdir/$dlname~test -z "$tstripme" || test -z "$striplib" || $striplib $destdir/$dlname~test -n "$linkname" || linkname=$realname~func_stripname "" ".a" "$linkname"~(cd "$destdir" && $LN_S -f $dlname $func_stripname_result.so)' + postuninstall_cmds='test -z "$dlname" || func_append rmfiles " $odir/$dlname"~for n in $old_library $library_names; do :; done~func_stripname "" ".a" "$n"~func_append rmfiles " $odir/$func_stripname_result.so"' + ;; + esac + shlibpath_var=LIBPATH + fi + ;; + +amigaos*) + case $host_cpu in + powerpc) + # Since July 2007 AmigaOS4 officially supports .so libraries. + # When compiling the executable, add -use-dynld -Lsobjs: to the compileline. + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + ;; + m68k) + library_names_spec='$libname.ixlibrary $libname.a' + # Create ${libname}_ixlibrary.a entries in /sys/libs. + finish_eval='for lib in `ls $libdir/*.ixlibrary 2>/dev/null`; do libname=`func_echo_all "$lib" | $SED '\''s%^.*/\([^/]*\)\.ixlibrary$%\1%'\''`; $RM /sys/libs/${libname}_ixlibrary.a; $show "cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a"; cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a || exit 1; done' + ;; + esac + ;; + +beos*) + library_names_spec='$libname$shared_ext' + dynamic_linker="$host_os ld.so" + shlibpath_var=LIBRARY_PATH + ;; + +bsdi[45]*) + version_type=linux # correct to gnu/linux during the next big refactor + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + finish_cmds='PATH="\$PATH:/sbin" ldconfig $libdir' + shlibpath_var=LD_LIBRARY_PATH + sys_lib_search_path_spec="/shlib /usr/lib /usr/X11/lib /usr/contrib/lib /lib /usr/local/lib" + sys_lib_dlsearch_path_spec="/shlib /usr/lib /usr/local/lib" + # the default ld.so.conf also contains /usr/contrib/lib and + # /usr/X11R6/lib (/usr/X11 is a link to /usr/X11R6), but let us allow + # libtool to hard-code these into programs + ;; + +cygwin* | mingw* | pw32* | cegcc*) + version_type=windows + shrext_cmds=.dll + need_version=no + need_lib_prefix=no + + case $GCC,$cc_basename in + yes,*) + # gcc + library_names_spec='$libname.dll.a' + # DLL is installed to $(libdir)/../bin by postinstall_cmds + postinstall_cmds='base_file=`basename \$file`~ + dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\$base_file'\''i; echo \$dlname'\''`~ + dldir=$destdir/`dirname \$dlpath`~ + test -d \$dldir || mkdir -p \$dldir~ + $install_prog $dir/$dlname \$dldir/$dlname~ + chmod a+x \$dldir/$dlname~ + if test -n '\''$stripme'\'' && test -n '\''$striplib'\''; then + eval '\''$striplib \$dldir/$dlname'\'' || exit \$?; + fi' + postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~ + dlpath=$dir/\$dldll~ + $RM \$dlpath' + shlibpath_overrides_runpath=yes + + case $host_os in + cygwin*) + # Cygwin DLLs use 'cyg' prefix rather than 'lib' + soname_spec='`echo $libname | $SED -e 's/^lib/cyg/'``echo $release | $SED -e 's/[.]/-/g'`$versuffix$shared_ext' + + ;; + mingw* | cegcc*) + # MinGW DLLs use traditional 'lib' prefix + soname_spec='$libname`echo $release | $SED -e 's/[.]/-/g'`$versuffix$shared_ext' + ;; + pw32*) + # pw32 DLLs use 'pw' prefix rather than 'lib' + library_names_spec='`echo $libname | $SED -e 's/^lib/pw/'``echo $release | $SED -e 's/[.]/-/g'`$versuffix$shared_ext' + ;; + esac + dynamic_linker='Win32 ld.exe' + ;; + + *,cl* | *,icl*) + # Native MSVC or ICC + libname_spec='$name' + soname_spec='$libname`echo $release | $SED -e 's/[.]/-/g'`$versuffix$shared_ext' + library_names_spec='$libname.dll.lib' + + case $build_os in + mingw*) + sys_lib_search_path_spec= + lt_save_ifs=$IFS + IFS=';' + for lt_path in $LIB + do + IFS=$lt_save_ifs + # Let DOS variable expansion print the short 8.3 style file name. + lt_path=`cd "$lt_path" 2>/dev/null && cmd //C "for %i in (".") do @echo %~si"` + sys_lib_search_path_spec="$sys_lib_search_path_spec $lt_path" + done + IFS=$lt_save_ifs + # Convert to MSYS style. + sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e 's|\\\\|/|g' -e 's| \\([a-zA-Z]\\):| /\\1|g' -e 's|^ ||'` + ;; + cygwin*) + # Convert to unix form, then to dos form, then back to unix form + # but this time dos style (no spaces!) so that the unix form looks + # like /cygdrive/c/PROGRA~1:/cygdr... + sys_lib_search_path_spec=`cygpath --path --unix "$LIB"` + sys_lib_search_path_spec=`cygpath --path --dos "$sys_lib_search_path_spec" 2>/dev/null` + sys_lib_search_path_spec=`cygpath --path --unix "$sys_lib_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"` + ;; + *) + sys_lib_search_path_spec=$LIB + if $ECHO "$sys_lib_search_path_spec" | $GREP ';[c-zC-Z]:/' >/dev/null; then + # It is most probably a Windows format PATH. + sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e 's/;/ /g'` + else + sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"` + fi + # FIXME: find the short name or the path components, as spaces are + # common. (e.g. "Program Files" -> "PROGRA~1") + ;; + esac + + # DLL is installed to $(libdir)/../bin by postinstall_cmds + postinstall_cmds='base_file=`basename \$file`~ + dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\$base_file'\''i; echo \$dlname'\''`~ + dldir=$destdir/`dirname \$dlpath`~ + test -d \$dldir || mkdir -p \$dldir~ + $install_prog $dir/$dlname \$dldir/$dlname' + postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~ + dlpath=$dir/\$dldll~ + $RM \$dlpath' + shlibpath_overrides_runpath=yes + dynamic_linker='Win32 link.exe' + ;; + + *) + # Assume MSVC and ICC wrapper + library_names_spec='$libname`echo $release | $SED -e 's/[.]/-/g'`$versuffix$shared_ext $libname.lib' + dynamic_linker='Win32 ld.exe' + ;; + esac + # FIXME: first we should search . and the directory the executable is in + shlibpath_var=PATH + ;; + +darwin* | rhapsody*) + dynamic_linker="$host_os dyld" + version_type=darwin + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$major$shared_ext $libname$shared_ext' + soname_spec='$libname$release$major$shared_ext' + shlibpath_overrides_runpath=yes + shlibpath_var=DYLD_LIBRARY_PATH + shrext_cmds='`test .$module = .yes && echo .so || echo .dylib`' + + sys_lib_dlsearch_path_spec='/usr/local/lib /lib /usr/lib' + ;; + +dgux*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + shlibpath_var=LD_LIBRARY_PATH + ;; + +freebsd* | dragonfly* | midnightbsd*) + # DragonFly does not have aout. When/if they implement a new + # versioning mechanism, adjust this. + if test -x /usr/bin/objformat; then + objformat=`/usr/bin/objformat` + else + case $host_os in + freebsd[23].*) objformat=aout ;; + *) objformat=elf ;; + esac + fi + version_type=freebsd-$objformat + case $version_type in + freebsd-elf*) + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + need_version=no + need_lib_prefix=no + ;; + freebsd-*) + library_names_spec='$libname$release$shared_ext$versuffix $libname$shared_ext$versuffix' + need_version=yes + ;; + esac + shlibpath_var=LD_LIBRARY_PATH + case $host_os in + freebsd2.*) + shlibpath_overrides_runpath=yes + ;; + freebsd3.[01]* | freebsdelf3.[01]*) + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + ;; + freebsd3.[2-9]* | freebsdelf3.[2-9]* | \ + freebsd4.[0-5] | freebsdelf4.[0-5] | freebsd4.1.1 | freebsdelf4.1.1) + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + ;; + *) # from 4.6 on, and DragonFly + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + ;; + esac + ;; + +haiku*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + dynamic_linker="$host_os runtime_loader" + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + shlibpath_var=LIBRARY_PATH + shlibpath_overrides_runpath=no + sys_lib_dlsearch_path_spec='/boot/home/config/lib /boot/common/lib /boot/system/lib' + hardcode_into_libs=yes + ;; + +hpux9* | hpux10* | hpux11*) + # Give a soname corresponding to the major version so that dld.sl refuses to + # link against other versions. + version_type=sunos + need_lib_prefix=no + need_version=no + case $host_cpu in + ia64*) + shrext_cmds='.so' + hardcode_into_libs=yes + dynamic_linker="$host_os dld.so" + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes # Unless +noenvvar is specified. + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + if test 32 = "$HPUX_IA64_MODE"; then + sys_lib_search_path_spec="/usr/lib/hpux32 /usr/local/lib/hpux32 /usr/local/lib" + sys_lib_dlsearch_path_spec=/usr/lib/hpux32 + else + sys_lib_search_path_spec="/usr/lib/hpux64 /usr/local/lib/hpux64" + sys_lib_dlsearch_path_spec=/usr/lib/hpux64 + fi + ;; + hppa*64*) + shrext_cmds='.sl' + hardcode_into_libs=yes + dynamic_linker="$host_os dld.sl" + shlibpath_var=LD_LIBRARY_PATH # How should we handle SHLIB_PATH + shlibpath_overrides_runpath=yes # Unless +noenvvar is specified. + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + sys_lib_search_path_spec="/usr/lib/pa20_64 /usr/ccs/lib/pa20_64" + sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec + ;; + *) + shrext_cmds='.sl' + dynamic_linker="$host_os dld.sl" + shlibpath_var=SHLIB_PATH + shlibpath_overrides_runpath=no # +s is required to enable SHLIB_PATH + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + ;; + esac + # HP-UX runs *really* slowly unless shared libraries are mode 555, ... + postinstall_cmds='chmod 555 $lib' + # or fails outright, so override atomically: + install_override_mode=555 + ;; + +interix[3-9]*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + dynamic_linker='Interix 3.x ld.so.1 (PE, like ELF)' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + ;; + +irix5* | irix6* | nonstopux*) + case $host_os in + nonstopux*) version_type=nonstopux ;; + *) + if test yes = "$lt_cv_prog_gnu_ld"; then + version_type=linux # correct to gnu/linux during the next big refactor + else + version_type=irix + fi ;; + esac + need_lib_prefix=no + need_version=no + soname_spec='$libname$release$shared_ext$major' + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$release$shared_ext $libname$shared_ext' + case $host_os in + irix5* | nonstopux*) + libsuff= shlibsuff= + ;; + *) + case $LD in # libtool.m4 will add one of these switches to LD + *-32|*"-32 "|*-melf32bsmip|*"-melf32bsmip ") + libsuff= shlibsuff= libmagic=32-bit;; + *-n32|*"-n32 "|*-melf32bmipn32|*"-melf32bmipn32 ") + libsuff=32 shlibsuff=N32 libmagic=N32;; + *-64|*"-64 "|*-melf64bmip|*"-melf64bmip ") + libsuff=64 shlibsuff=64 libmagic=64-bit;; + *) libsuff= shlibsuff= libmagic=never-match;; + esac + ;; + esac + shlibpath_var=LD_LIBRARY${shlibsuff}_PATH + shlibpath_overrides_runpath=no + sys_lib_search_path_spec="/usr/lib$libsuff /lib$libsuff /usr/local/lib$libsuff" + sys_lib_dlsearch_path_spec="/usr/lib$libsuff /lib$libsuff" + hardcode_into_libs=yes + ;; + +# No shared lib support for Linux oldld, aout, or coff. +linux*oldld* | linux*aout* | linux*coff*) + dynamic_linker=no + ;; + +linux*android*) + version_type=none # Android doesn't support versioned libraries. + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext' + soname_spec='$libname$release$shared_ext' + finish_cmds= + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + + # This implies no fast_install, which is unacceptable. + # Some rework will be needed to allow for fast_install + # before this can be enabled. + hardcode_into_libs=yes + + dynamic_linker='Android linker' + # Don't embed -rpath directories since the linker doesn't support them. + hardcode_libdir_flag_spec_F77='-L$libdir' + ;; + +# This must be glibc/ELF. +linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + finish_cmds='PATH="\$PATH:/sbin" ldconfig -n $libdir' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + + # Some binutils ld are patched to set DT_RUNPATH + if test ${lt_cv_shlibpath_overrides_runpath+y} +then : + printf %s "(cached) " >&6 +else $as_nop + lt_cv_shlibpath_overrides_runpath=no + save_LDFLAGS=$LDFLAGS + save_libdir=$libdir + eval "libdir=/foo; wl=\"$lt_prog_compiler_wl_F77\"; \ + LDFLAGS=\"\$LDFLAGS $hardcode_libdir_flag_spec_F77\"" + cat > conftest.$ac_ext <<_ACEOF + program main + + end +_ACEOF +if ac_fn_f77_try_link "$LINENO" +then : + if ($OBJDUMP -p conftest$ac_exeext) 2>/dev/null | grep "RUNPATH.*$libdir" >/dev/null +then : + lt_cv_shlibpath_overrides_runpath=yes +fi +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + LDFLAGS=$save_LDFLAGS + libdir=$save_libdir + +fi + + shlibpath_overrides_runpath=$lt_cv_shlibpath_overrides_runpath + + # This implies no fast_install, which is unacceptable. + # Some rework will be needed to allow for fast_install + # before this can be enabled. + hardcode_into_libs=yes + + # Ideally, we could use ldconfig to report *all* directores which are + # searched for libraries, however this is still not possible. Aside from not + # being certain /sbin/ldconfig is available, command + # 'ldconfig -N -X -v | grep ^/' on 64bit Fedora does not report /usr/lib64, + # even though it is searched at run-time. Try to do the best guess by + # appending ld.so.conf contents (and includes) to the search path. + if test -f /etc/ld.so.conf; then + lt_ld_extra=`awk '/^include / { system(sprintf("cd /etc; cat %s 2>/dev/null", \$2)); skip = 1; } { if (!skip) print \$0; skip = 0; }' < /etc/ld.so.conf | $SED -e 's/#.*//;/^[ ]*hwcap[ ]/d;s/[:, ]/ /g;s/=[^=]*$//;s/=[^= ]* / /g;s/"//g;/^$/d' | tr '\n' ' '` + sys_lib_dlsearch_path_spec="/lib /usr/lib $lt_ld_extra" + fi + + # We used to test for /lib/ld.so.1 and disable shared libraries on + # powerpc, because MkLinux only supported shared libraries with the + # GNU dynamic linker. Since this was broken with cross compilers, + # most powerpc-linux boxes support dynamic linking these days and + # people can always --disable-shared, the test was removed, and we + # assume the GNU/Linux dynamic linker is in use. + dynamic_linker='GNU/Linux ld.so' + ;; + +netbsdelf*-gnu) + version_type=linux + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + dynamic_linker='NetBSD ld.elf_so' + ;; + +netbsd*) + version_type=sunos + need_lib_prefix=no + need_version=no + if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then + library_names_spec='$libname$release$shared_ext$versuffix $libname$shared_ext$versuffix' + finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir' + dynamic_linker='NetBSD (a.out) ld.so' + else + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + dynamic_linker='NetBSD ld.elf_so' + fi + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + ;; + +newsos6) + version_type=linux # correct to gnu/linux during the next big refactor + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + ;; + +*nto* | *qnx*) + version_type=qnx + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + dynamic_linker='ldqnx.so' + ;; + +openbsd* | bitrig*) + version_type=sunos + sys_lib_dlsearch_path_spec=/usr/lib + need_lib_prefix=no + if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`"; then + need_version=no + else + need_version=yes + fi + library_names_spec='$libname$release$shared_ext$versuffix $libname$shared_ext$versuffix' + finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + ;; + +os2*) + libname_spec='$name' + version_type=windows + shrext_cmds=.dll + need_version=no + need_lib_prefix=no + # OS/2 can only load a DLL with a base name of 8 characters or less. + soname_spec='`test -n "$os2dllname" && libname="$os2dllname"; + v=$($ECHO $release$versuffix | tr -d .-); + n=$($ECHO $libname | cut -b -$((8 - ${#v})) | tr . _); + $ECHO $n$v`$shared_ext' + library_names_spec='${libname}_dll.$libext' + dynamic_linker='OS/2 ld.exe' + shlibpath_var=BEGINLIBPATH + sys_lib_search_path_spec="/lib /usr/lib /usr/local/lib" + sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec + postinstall_cmds='base_file=`basename \$file`~ + dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\$base_file'\''i; $ECHO \$dlname'\''`~ + dldir=$destdir/`dirname \$dlpath`~ + test -d \$dldir || mkdir -p \$dldir~ + $install_prog $dir/$dlname \$dldir/$dlname~ + chmod a+x \$dldir/$dlname~ + if test -n '\''$stripme'\'' && test -n '\''$striplib'\''; then + eval '\''$striplib \$dldir/$dlname'\'' || exit \$?; + fi' + postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; $ECHO \$dlname'\''`~ + dlpath=$dir/\$dldll~ + $RM \$dlpath' + ;; + +osf3* | osf4* | osf5*) + version_type=osf + need_lib_prefix=no + need_version=no + soname_spec='$libname$release$shared_ext$major' + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + shlibpath_var=LD_LIBRARY_PATH + sys_lib_search_path_spec="/usr/shlib /usr/ccs/lib /usr/lib/cmplrs/cc /usr/lib /usr/local/lib /var/shlib" + sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec + ;; + +rdos*) + dynamic_linker=no + ;; + +solaris*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + # ldd complains unless libraries are executable + postinstall_cmds='chmod +x $lib' + ;; + +sunos4*) + version_type=sunos + library_names_spec='$libname$release$shared_ext$versuffix $libname$shared_ext$versuffix' + finish_cmds='PATH="\$PATH:/usr/etc" ldconfig $libdir' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + if test yes = "$with_gnu_ld"; then + need_lib_prefix=no + fi + need_version=yes + ;; + +sysv4 | sysv4.3*) + version_type=linux # correct to gnu/linux during the next big refactor + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + shlibpath_var=LD_LIBRARY_PATH + case $host_vendor in + sni) + shlibpath_overrides_runpath=no + need_lib_prefix=no + runpath_var=LD_RUN_PATH + ;; + siemens) + need_lib_prefix=no + ;; + motorola) + need_lib_prefix=no + need_version=no + shlibpath_overrides_runpath=no + sys_lib_search_path_spec='/lib /usr/lib /usr/ccs/lib' + ;; + esac + ;; + +sysv4*MP*) + if test -d /usr/nec; then + version_type=linux # correct to gnu/linux during the next big refactor + library_names_spec='$libname$shared_ext.$versuffix $libname$shared_ext.$major $libname$shared_ext' + soname_spec='$libname$shared_ext.$major' + shlibpath_var=LD_LIBRARY_PATH + fi + ;; + +sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*) + version_type=sco + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + if test yes = "$with_gnu_ld"; then + sys_lib_search_path_spec='/usr/local/lib /usr/gnu/lib /usr/ccs/lib /usr/lib /lib' + else + sys_lib_search_path_spec='/usr/ccs/lib /usr/lib' + case $host_os in + sco3.2v5*) + sys_lib_search_path_spec="$sys_lib_search_path_spec /lib" + ;; + esac + fi + sys_lib_dlsearch_path_spec='/usr/lib' + ;; + +tpf*) + # TPF is a cross-target only. Preferred cross-host = GNU/Linux. + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + ;; + +uts4*) + version_type=linux # correct to gnu/linux during the next big refactor + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + shlibpath_var=LD_LIBRARY_PATH + ;; + +*) + dynamic_linker=no + ;; +esac +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $dynamic_linker" >&5 +printf "%s\n" "$dynamic_linker" >&6; } +test no = "$dynamic_linker" && can_build_shared=no + +variables_saved_for_relink="PATH $shlibpath_var $runpath_var" +if test yes = "$GCC"; then + variables_saved_for_relink="$variables_saved_for_relink GCC_EXEC_PREFIX COMPILER_PATH LIBRARY_PATH" +fi + +if test set = "${lt_cv_sys_lib_search_path_spec+set}"; then + sys_lib_search_path_spec=$lt_cv_sys_lib_search_path_spec +fi + +if test set = "${lt_cv_sys_lib_dlsearch_path_spec+set}"; then + sys_lib_dlsearch_path_spec=$lt_cv_sys_lib_dlsearch_path_spec +fi + +# remember unaugmented sys_lib_dlsearch_path content for libtool script decls... +configure_time_dlsearch_path=$sys_lib_dlsearch_path_spec + +# ... but it needs LT_SYS_LIBRARY_PATH munging for other configure-time code +func_munge_path_list sys_lib_dlsearch_path_spec "$LT_SYS_LIBRARY_PATH" + +# to be used as default LT_SYS_LIBRARY_PATH value in generated libtool +configure_time_lt_sys_library_path=$LT_SYS_LIBRARY_PATH + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking how to hardcode library paths into programs" >&5 +printf %s "checking how to hardcode library paths into programs... " >&6; } +hardcode_action_F77= +if test -n "$hardcode_libdir_flag_spec_F77" || + test -n "$runpath_var_F77" || + test yes = "$hardcode_automatic_F77"; then + + # We can hardcode non-existent directories. + if test no != "$hardcode_direct_F77" && + # If the only mechanism to avoid hardcoding is shlibpath_var, we + # have to relink, otherwise we might link with an installed library + # when we should be linking with a yet-to-be-installed one + ## test no != "$_LT_TAGVAR(hardcode_shlibpath_var, F77)" && + test no != "$hardcode_minus_L_F77"; then + # Linking always hardcodes the temporary library directory. + hardcode_action_F77=relink + else + # We can link without hardcoding, and we can hardcode nonexisting dirs. + hardcode_action_F77=immediate + fi +else + # We cannot hardcode anything, or else we can only hardcode existing + # directories. + hardcode_action_F77=unsupported +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $hardcode_action_F77" >&5 +printf "%s\n" "$hardcode_action_F77" >&6; } + +if test relink = "$hardcode_action_F77" || + test yes = "$inherit_rpath_F77"; then + # Fast installation is not supported + enable_fast_install=no +elif test yes = "$shlibpath_overrides_runpath" || + test no = "$enable_shared"; then + # Fast installation is not necessary + enable_fast_install=needless +fi + + + + + + + + fi # test -n "$compiler" + + GCC=$lt_save_GCC + CC=$lt_save_CC + CFLAGS=$lt_save_CFLAGS +fi # test yes != "$_lt_disable_F77" + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + + ac_ext=${ac_fc_srcext-f} +ac_compile='$FC -c $FCFLAGS $ac_fcflags_srcext conftest.$ac_ext >&5' +ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_fc_compiler_gnu + + +if test -z "$FC" || test no = "$FC"; then + _lt_disable_FC=yes +fi + +archive_cmds_need_lc_FC=no +allow_undefined_flag_FC= +always_export_symbols_FC=no +archive_expsym_cmds_FC= +export_dynamic_flag_spec_FC= +hardcode_direct_FC=no +hardcode_direct_absolute_FC=no +hardcode_libdir_flag_spec_FC= +hardcode_libdir_separator_FC= +hardcode_minus_L_FC=no +hardcode_automatic_FC=no +inherit_rpath_FC=no +module_cmds_FC= +module_expsym_cmds_FC= +link_all_deplibs_FC=unknown +old_archive_cmds_FC=$old_archive_cmds +reload_flag_FC=$reload_flag +reload_cmds_FC=$reload_cmds +no_undefined_flag_FC= +whole_archive_flag_spec_FC= +enable_shared_with_static_runtimes_FC=no + +# Source file extension for fc test sources. +ac_ext=${ac_fc_srcext-f} + +# Object file extension for compiled fc test sources. +objext=o +objext_FC=$objext + +# No sense in running all these tests if we already determined that +# the FC compiler isn't working. Some variables (like enable_shared) +# are currently assumed to apply to all compilers on this platform, +# and will be corrupted by setting them based on a non-working compiler. +if test yes != "$_lt_disable_FC"; then + # Code to be used in simple compile tests + lt_simple_compile_test_code="\ + subroutine t + return + end +" + + # Code to be used in simple link tests + lt_simple_link_test_code="\ + program t + end +" + + # ltmain only uses $CC for tagged configurations so make sure $CC is set. + + + + + + +# If no C compiler was specified, use CC. +LTCC=${LTCC-"$CC"} + +# If no C compiler flags were specified, use CFLAGS. +LTCFLAGS=${LTCFLAGS-"$CFLAGS"} + +# Allow CC to be a program name with arguments. +compiler=$CC + + + # save warnings/boilerplate of simple test code + ac_outfile=conftest.$ac_objext +echo "$lt_simple_compile_test_code" >conftest.$ac_ext +eval "$ac_compile" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err +_lt_compiler_boilerplate=`cat conftest.err` +$RM conftest* + + ac_outfile=conftest.$ac_objext +echo "$lt_simple_link_test_code" >conftest.$ac_ext +eval "$ac_link" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err +_lt_linker_boilerplate=`cat conftest.err` +$RM -r conftest* + + + # Allow CC to be a program name with arguments. + lt_save_CC=$CC + lt_save_GCC=$GCC + lt_save_CFLAGS=$CFLAGS + CC=${FC-"f95"} + CFLAGS=$FCFLAGS + compiler=$CC + GCC=$ac_cv_fc_compiler_gnu + + compiler_FC=$CC + func_cc_basename $compiler +cc_basename=$func_cc_basename_result + + + if test -n "$compiler"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if libtool supports shared libraries" >&5 +printf %s "checking if libtool supports shared libraries... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $can_build_shared" >&5 +printf "%s\n" "$can_build_shared" >&6; } + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether to build shared libraries" >&5 +printf %s "checking whether to build shared libraries... " >&6; } + test no = "$can_build_shared" && enable_shared=no + + # On AIX, shared libraries and static libraries use the same namespace, and + # are all built from PIC. + case $host_os in + aix3*) + test yes = "$enable_shared" && enable_static=no + if test -n "$RANLIB"; then + archive_cmds="$archive_cmds~\$RANLIB \$lib" + postinstall_cmds='$RANLIB $lib' + fi + ;; + aix[4-9]*) + if test ia64 != "$host_cpu"; then + case $enable_shared,$with_aix_soname,$aix_use_runtimelinking in + yes,aix,yes) ;; # shared object as lib.so file only + yes,svr4,*) ;; # shared object as lib.so archive member only + yes,*) enable_static=no ;; # shared object in lib.a archive as well + esac + fi + ;; + esac + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_shared" >&5 +printf "%s\n" "$enable_shared" >&6; } + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether to build static libraries" >&5 +printf %s "checking whether to build static libraries... " >&6; } + # Make sure either enable_shared or enable_static is yes. + test yes = "$enable_shared" || enable_static=yes + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_static" >&5 +printf "%s\n" "$enable_static" >&6; } + + GCC_FC=$ac_cv_fc_compiler_gnu + LD_FC=$LD + + ## CAVEAT EMPTOR: + ## There is no encapsulation within the following macros, do not change + ## the running order or otherwise move them around unless you know exactly + ## what you are doing... + # Dependencies to place before and after the object being linked: +predep_objects_FC= +postdep_objects_FC= +predeps_FC= +postdeps_FC= +compiler_lib_search_path_FC= + +cat > conftest.$ac_ext <<_LT_EOF + subroutine foo + implicit none + integer a + a=0 + return + end +_LT_EOF + + +_lt_libdeps_save_CFLAGS=$CFLAGS +case "$CC $CFLAGS " in #( +*\ -flto*\ *) CFLAGS="$CFLAGS -fno-lto" ;; +*\ -fwhopr*\ *) CFLAGS="$CFLAGS -fno-whopr" ;; +*\ -fuse-linker-plugin*\ *) CFLAGS="$CFLAGS -fno-use-linker-plugin" ;; +esac + +if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + # Parse the compiler output and extract the necessary + # objects, libraries and library flags. + + # Sentinel used to keep track of whether or not we are before + # the conftest object file. + pre_test_object_deps_done=no + + for p in `eval "$output_verbose_link_cmd"`; do + case $prev$p in + + -L* | -R* | -l*) + # Some compilers place space between "-{L,R}" and the path. + # Remove the space. + if test x-L = "$p" || + test x-R = "$p"; then + prev=$p + continue + fi + + # Expand the sysroot to ease extracting the directories later. + if test -z "$prev"; then + case $p in + -L*) func_stripname_cnf '-L' '' "$p"; prev=-L; p=$func_stripname_result ;; + -R*) func_stripname_cnf '-R' '' "$p"; prev=-R; p=$func_stripname_result ;; + -l*) func_stripname_cnf '-l' '' "$p"; prev=-l; p=$func_stripname_result ;; + esac + fi + case $p in + =*) func_stripname_cnf '=' '' "$p"; p=$lt_sysroot$func_stripname_result ;; + esac + if test no = "$pre_test_object_deps_done"; then + case $prev in + -L | -R) + # Internal compiler library paths should come after those + # provided the user. The postdeps already come after the + # user supplied libs so there is no need to process them. + if test -z "$compiler_lib_search_path_FC"; then + compiler_lib_search_path_FC=$prev$p + else + compiler_lib_search_path_FC="${compiler_lib_search_path_FC} $prev$p" + fi + ;; + # The "-l" case would never come before the object being + # linked, so don't bother handling this case. + esac + else + if test -z "$postdeps_FC"; then + postdeps_FC=$prev$p + else + postdeps_FC="${postdeps_FC} $prev$p" + fi + fi + prev= + ;; + + *.lto.$objext) ;; # Ignore GCC LTO objects + *.$objext) + # This assumes that the test object file only shows up + # once in the compiler output. + if test "$p" = "conftest.$objext"; then + pre_test_object_deps_done=yes + continue + fi + + if test no = "$pre_test_object_deps_done"; then + if test -z "$predep_objects_FC"; then + predep_objects_FC=$p + else + predep_objects_FC="$predep_objects_FC $p" + fi + else + if test -z "$postdep_objects_FC"; then + postdep_objects_FC=$p + else + postdep_objects_FC="$postdep_objects_FC $p" + fi + fi + ;; + + *) ;; # Ignore the rest. + + esac + done + + # Clean up. + rm -f a.out a.exe +else + echo "libtool.m4: error: problem compiling FC test program" +fi + +$RM -f confest.$objext +CFLAGS=$_lt_libdeps_save_CFLAGS + +# PORTME: override above test on systems where it is broken + + +case " $postdeps_FC " in +*" -lc "*) archive_cmds_need_lc_FC=no ;; +esac + compiler_lib_search_dirs_FC= +if test -n "${compiler_lib_search_path_FC}"; then + compiler_lib_search_dirs_FC=`echo " ${compiler_lib_search_path_FC}" | $SED -e 's! -L! !g' -e 's!^ !!'` +fi + + + + + + + + + + + + + + lt_prog_compiler_wl_FC= +lt_prog_compiler_pic_FC= +lt_prog_compiler_static_FC= + + + if test yes = "$GCC"; then + lt_prog_compiler_wl_FC='-Wl,' + lt_prog_compiler_static_FC='-static' + + case $host_os in + aix*) + # All AIX code is PIC. + if test ia64 = "$host_cpu"; then + # AIX 5 now supports IA64 processor + lt_prog_compiler_static_FC='-Bstatic' + fi + lt_prog_compiler_pic_FC='-fPIC' + ;; + + amigaos*) + case $host_cpu in + powerpc) + # see comment about AmigaOS4 .so support + lt_prog_compiler_pic_FC='-fPIC' + ;; + m68k) + # FIXME: we need at least 68020 code to build shared libraries, but + # adding the '-m68020' flag to GCC prevents building anything better, + # like '-m68040'. + lt_prog_compiler_pic_FC='-m68020 -resident32 -malways-restore-a4' + ;; + esac + ;; + + beos* | irix5* | irix6* | nonstopux* | osf3* | osf4* | osf5*) + # PIC is the default for these OSes. + ;; + + mingw* | cygwin* | pw32* | os2* | cegcc*) + # This hack is so that the source file can tell whether it is being + # built for inclusion in a dll (and should export symbols for example). + # Although the cygwin gcc ignores -fPIC, still need this for old-style + # (--disable-auto-import) libraries + lt_prog_compiler_pic_FC='-DDLL_EXPORT' + case $host_os in + os2*) + lt_prog_compiler_static_FC='$wl-static' + ;; + esac + ;; + + darwin* | rhapsody*) + # PIC is the default on this platform + # Common symbols not allowed in MH_DYLIB files + lt_prog_compiler_pic_FC='-fno-common' + ;; + + haiku*) + # PIC is the default for Haiku. + # The "-static" flag exists, but is broken. + lt_prog_compiler_static_FC= + ;; + + hpux*) + # PIC is the default for 64-bit PA HP-UX, but not for 32-bit + # PA HP-UX. On IA64 HP-UX, PIC is the default but the pic flag + # sets the default TLS model and affects inlining. + case $host_cpu in + hppa*64*) + # +Z the default + ;; + *) + lt_prog_compiler_pic_FC='-fPIC' + ;; + esac + ;; + + interix[3-9]*) + # Interix 3.x gcc -fpic/-fPIC options generate broken code. + # Instead, we relocate shared libraries at runtime. + ;; + + msdosdjgpp*) + # Just because we use GCC doesn't mean we suddenly get shared libraries + # on systems that don't support them. + lt_prog_compiler_can_build_shared_FC=no + enable_shared=no + ;; + + *nto* | *qnx*) + # QNX uses GNU C++, but need to define -shared option too, otherwise + # it will coredump. + lt_prog_compiler_pic_FC='-fPIC -shared' + ;; + + sysv4*MP*) + if test -d /usr/nec; then + lt_prog_compiler_pic_FC=-Kconform_pic + fi + ;; + + *) + lt_prog_compiler_pic_FC='-fPIC' + ;; + esac + + case $cc_basename in + nvcc*) # Cuda Compiler Driver 2.2 + lt_prog_compiler_wl_FC='-Xlinker ' + if test -n "$lt_prog_compiler_pic_FC"; then + lt_prog_compiler_pic_FC="-Xcompiler $lt_prog_compiler_pic_FC" + fi + ;; + esac + else + # PORTME Check for flag to pass linker flags through the system compiler. + case $host_os in + aix*) + lt_prog_compiler_wl_FC='-Wl,' + if test ia64 = "$host_cpu"; then + # AIX 5 now supports IA64 processor + lt_prog_compiler_static_FC='-Bstatic' + else + lt_prog_compiler_static_FC='-bnso -bI:/lib/syscalls.exp' + fi + ;; + + darwin* | rhapsody*) + # PIC is the default on this platform + # Common symbols not allowed in MH_DYLIB files + lt_prog_compiler_pic_FC='-fno-common' + case $cc_basename in + nagfor*) + # NAG Fortran compiler + lt_prog_compiler_wl_FC='-Wl,-Wl,,' + lt_prog_compiler_pic_FC='-PIC' + lt_prog_compiler_static_FC='-Bstatic' + ;; + esac + ;; + + mingw* | cygwin* | pw32* | os2* | cegcc*) + # This hack is so that the source file can tell whether it is being + # built for inclusion in a dll (and should export symbols for example). + lt_prog_compiler_pic_FC='-DDLL_EXPORT' + case $host_os in + os2*) + lt_prog_compiler_static_FC='$wl-static' + ;; + esac + ;; + + hpux9* | hpux10* | hpux11*) + lt_prog_compiler_wl_FC='-Wl,' + # PIC is the default for IA64 HP-UX and 64-bit HP-UX, but + # not for PA HP-UX. + case $host_cpu in + hppa*64*|ia64*) + # +Z the default + ;; + *) + lt_prog_compiler_pic_FC='+Z' + ;; + esac + # Is there a better lt_prog_compiler_static that works with the bundled CC? + lt_prog_compiler_static_FC='$wl-a ${wl}archive' + ;; + + irix5* | irix6* | nonstopux*) + lt_prog_compiler_wl_FC='-Wl,' + # PIC (with -KPIC) is the default. + lt_prog_compiler_static_FC='-non_shared' + ;; + + linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*) + case $cc_basename in + # old Intel for x86_64, which still supported -KPIC. + ecc*) + lt_prog_compiler_wl_FC='-Wl,' + lt_prog_compiler_pic_FC='-KPIC' + lt_prog_compiler_static_FC='-static' + ;; + # flang / f18. f95 an alias for gfortran or flang on Debian + flang* | f18* | f95*) + lt_prog_compiler_wl_FC='-Wl,' + lt_prog_compiler_pic_FC='-fPIC' + lt_prog_compiler_static_FC='-static' + ;; + # icc used to be incompatible with GCC. + # ICC 10 doesn't accept -KPIC any more. + icc* | ifort*) + lt_prog_compiler_wl_FC='-Wl,' + lt_prog_compiler_pic_FC='-fPIC' + lt_prog_compiler_static_FC='-static' + ;; + # Lahey Fortran 8.1. + lf95*) + lt_prog_compiler_wl_FC='-Wl,' + lt_prog_compiler_pic_FC='--shared' + lt_prog_compiler_static_FC='--static' + ;; + nagfor*) + # NAG Fortran compiler + lt_prog_compiler_wl_FC='-Wl,-Wl,,' + lt_prog_compiler_pic_FC='-PIC' + lt_prog_compiler_static_FC='-Bstatic' + ;; + tcc*) + # Fabrice Bellard et al's Tiny C Compiler + lt_prog_compiler_wl_FC='-Wl,' + lt_prog_compiler_pic_FC='-fPIC' + lt_prog_compiler_static_FC='-static' + ;; + pgcc* | pgf77* | pgf90* | pgf95* | pgfortran*) + # Portland Group compilers (*not* the Pentium gcc compiler, + # which looks to be a dead project) + lt_prog_compiler_wl_FC='-Wl,' + lt_prog_compiler_pic_FC='-fpic' + lt_prog_compiler_static_FC='-Bstatic' + ;; + ccc*) + lt_prog_compiler_wl_FC='-Wl,' + # All Alpha code is PIC. + lt_prog_compiler_static_FC='-non_shared' + ;; + xl* | bgxl* | bgf* | mpixl*) + # IBM XL C 8.0/Fortran 10.1, 11.1 on PPC and BlueGene + lt_prog_compiler_wl_FC='-Wl,' + lt_prog_compiler_pic_FC='-qpic' + lt_prog_compiler_static_FC='-qstaticlink' + ;; + *) + case `$CC -V 2>&1 | $SED 5q` in + *Sun\ Ceres\ Fortran* | *Sun*Fortran*\ [1-7].* | *Sun*Fortran*\ 8.[0-3]*) + # Sun Fortran 8.3 passes all unrecognized flags to the linker + lt_prog_compiler_pic_FC='-KPIC' + lt_prog_compiler_static_FC='-Bstatic' + lt_prog_compiler_wl_FC='' + ;; + *Sun\ F* | *Sun*Fortran*) + lt_prog_compiler_pic_FC='-KPIC' + lt_prog_compiler_static_FC='-Bstatic' + lt_prog_compiler_wl_FC='-Qoption ld ' + ;; + *Sun\ C*) + # Sun C 5.9 + lt_prog_compiler_pic_FC='-KPIC' + lt_prog_compiler_static_FC='-Bstatic' + lt_prog_compiler_wl_FC='-Wl,' + ;; + *Intel*\ [CF]*Compiler*) + lt_prog_compiler_wl_FC='-Wl,' + lt_prog_compiler_pic_FC='-fPIC' + lt_prog_compiler_static_FC='-static' + ;; + *Portland\ Group*) + lt_prog_compiler_wl_FC='-Wl,' + lt_prog_compiler_pic_FC='-fpic' + lt_prog_compiler_static_FC='-Bstatic' + ;; + esac + ;; + esac + ;; + + newsos6) + lt_prog_compiler_pic_FC='-KPIC' + lt_prog_compiler_static_FC='-Bstatic' + ;; + + *nto* | *qnx*) + # QNX uses GNU C++, but need to define -shared option too, otherwise + # it will coredump. + lt_prog_compiler_pic_FC='-fPIC -shared' + ;; + + osf3* | osf4* | osf5*) + lt_prog_compiler_wl_FC='-Wl,' + # All OSF/1 code is PIC. + lt_prog_compiler_static_FC='-non_shared' + ;; + + rdos*) + lt_prog_compiler_static_FC='-non_shared' + ;; + + solaris*) + lt_prog_compiler_pic_FC='-KPIC' + lt_prog_compiler_static_FC='-Bstatic' + case $cc_basename in + f77* | f90* | f95* | sunf77* | sunf90* | sunf95*) + lt_prog_compiler_wl_FC='-Qoption ld ';; + *) + lt_prog_compiler_wl_FC='-Wl,';; + esac + ;; + + sunos4*) + lt_prog_compiler_wl_FC='-Qoption ld ' + lt_prog_compiler_pic_FC='-PIC' + lt_prog_compiler_static_FC='-Bstatic' + ;; + + sysv4 | sysv4.2uw2* | sysv4.3*) + lt_prog_compiler_wl_FC='-Wl,' + lt_prog_compiler_pic_FC='-KPIC' + lt_prog_compiler_static_FC='-Bstatic' + ;; + + sysv4*MP*) + if test -d /usr/nec; then + lt_prog_compiler_pic_FC='-Kconform_pic' + lt_prog_compiler_static_FC='-Bstatic' + fi + ;; + + sysv5* | unixware* | sco3.2v5* | sco5v6* | OpenUNIX*) + lt_prog_compiler_wl_FC='-Wl,' + lt_prog_compiler_pic_FC='-KPIC' + lt_prog_compiler_static_FC='-Bstatic' + ;; + + unicos*) + lt_prog_compiler_wl_FC='-Wl,' + lt_prog_compiler_can_build_shared_FC=no + ;; + + uts4*) + lt_prog_compiler_pic_FC='-pic' + lt_prog_compiler_static_FC='-Bstatic' + ;; + + *) + lt_prog_compiler_can_build_shared_FC=no + ;; + esac + fi + +case $host_os in + # For platforms that do not support PIC, -DPIC is meaningless: + *djgpp*) + lt_prog_compiler_pic_FC= + ;; + *) + lt_prog_compiler_pic_FC="$lt_prog_compiler_pic_FC" + ;; +esac + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $compiler option to produce PIC" >&5 +printf %s "checking for $compiler option to produce PIC... " >&6; } +if test ${lt_cv_prog_compiler_pic_FC+y} +then : + printf %s "(cached) " >&6 +else $as_nop + lt_cv_prog_compiler_pic_FC=$lt_prog_compiler_pic_FC +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_pic_FC" >&5 +printf "%s\n" "$lt_cv_prog_compiler_pic_FC" >&6; } +lt_prog_compiler_pic_FC=$lt_cv_prog_compiler_pic_FC + +# +# Check to make sure the PIC flag actually works. +# +if test -n "$lt_prog_compiler_pic_FC"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if $compiler PIC flag $lt_prog_compiler_pic_FC works" >&5 +printf %s "checking if $compiler PIC flag $lt_prog_compiler_pic_FC works... " >&6; } +if test ${lt_cv_prog_compiler_pic_works_FC+y} +then : + printf %s "(cached) " >&6 +else $as_nop + lt_cv_prog_compiler_pic_works_FC=no + ac_outfile=conftest.$ac_objext + echo "$lt_simple_compile_test_code" > conftest.$ac_ext + lt_compiler_flag="$lt_prog_compiler_pic_FC" ## exclude from sc_useless_quotes_in_assignment + # Insert the option either (1) after the last *FLAGS variable, or + # (2) before a word containing "conftest.", or (3) at the end. + # Note that $ac_compile itself does not contain backslashes and begins + # with a dollar sign (not a hyphen), so the echo should work correctly. + # The option is referenced via a variable to avoid confusing sed. + lt_compile=`echo "$ac_compile" | $SED \ + -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ + -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ + -e 's:$: $lt_compiler_flag:'` + (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5) + (eval "$lt_compile" 2>conftest.err) + ac_status=$? + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + if (exit $ac_status) && test -s "$ac_outfile"; then + # The compiler can only warn and ignore the option if not recognized + # So say no if there are warnings other than the usual output. + $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' >conftest.exp + $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 + if test ! -s conftest.er2 || diff conftest.exp conftest.er2 >/dev/null; then + lt_cv_prog_compiler_pic_works_FC=yes + fi + fi + $RM conftest* + +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_pic_works_FC" >&5 +printf "%s\n" "$lt_cv_prog_compiler_pic_works_FC" >&6; } + +if test yes = "$lt_cv_prog_compiler_pic_works_FC"; then + case $lt_prog_compiler_pic_FC in + "" | " "*) ;; + *) lt_prog_compiler_pic_FC=" $lt_prog_compiler_pic_FC" ;; + esac +else + lt_prog_compiler_pic_FC= + lt_prog_compiler_can_build_shared_FC=no +fi + +fi + + + + + +# +# Check to make sure the static flag actually works. +# +wl=$lt_prog_compiler_wl_FC eval lt_tmp_static_flag=\"$lt_prog_compiler_static_FC\" +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if $compiler static flag $lt_tmp_static_flag works" >&5 +printf %s "checking if $compiler static flag $lt_tmp_static_flag works... " >&6; } +if test ${lt_cv_prog_compiler_static_works_FC+y} +then : + printf %s "(cached) " >&6 +else $as_nop + lt_cv_prog_compiler_static_works_FC=no + save_LDFLAGS=$LDFLAGS + LDFLAGS="$LDFLAGS $lt_tmp_static_flag" + echo "$lt_simple_link_test_code" > conftest.$ac_ext + if (eval $ac_link 2>conftest.err) && test -s conftest$ac_exeext; then + # The linker can only warn and ignore the option if not recognized + # So say no if there are warnings + if test -s conftest.err; then + # Append any errors to the config.log. + cat conftest.err 1>&5 + $ECHO "$_lt_linker_boilerplate" | $SED '/^$/d' > conftest.exp + $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 + if diff conftest.exp conftest.er2 >/dev/null; then + lt_cv_prog_compiler_static_works_FC=yes + fi + else + lt_cv_prog_compiler_static_works_FC=yes + fi + fi + $RM -r conftest* + LDFLAGS=$save_LDFLAGS + +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_static_works_FC" >&5 +printf "%s\n" "$lt_cv_prog_compiler_static_works_FC" >&6; } + +if test yes = "$lt_cv_prog_compiler_static_works_FC"; then + : +else + lt_prog_compiler_static_FC= +fi + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if $compiler supports -c -o file.$ac_objext" >&5 +printf %s "checking if $compiler supports -c -o file.$ac_objext... " >&6; } +if test ${lt_cv_prog_compiler_c_o_FC+y} +then : + printf %s "(cached) " >&6 +else $as_nop + lt_cv_prog_compiler_c_o_FC=no + $RM -r conftest 2>/dev/null + mkdir conftest + cd conftest + mkdir out + echo "$lt_simple_compile_test_code" > conftest.$ac_ext + + lt_compiler_flag="-o out/conftest2.$ac_objext" + # Insert the option either (1) after the last *FLAGS variable, or + # (2) before a word containing "conftest.", or (3) at the end. + # Note that $ac_compile itself does not contain backslashes and begins + # with a dollar sign (not a hyphen), so the echo should work correctly. + lt_compile=`echo "$ac_compile" | $SED \ + -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ + -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ + -e 's:$: $lt_compiler_flag:'` + (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5) + (eval "$lt_compile" 2>out/conftest.err) + ac_status=$? + cat out/conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + if (exit $ac_status) && test -s out/conftest2.$ac_objext + then + # The compiler can only warn and ignore the option if not recognized + # So say no if there are warnings + $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' > out/conftest.exp + $SED '/^$/d; /^ *+/d' out/conftest.err >out/conftest.er2 + if test ! -s out/conftest.er2 || diff out/conftest.exp out/conftest.er2 >/dev/null; then + lt_cv_prog_compiler_c_o_FC=yes + fi + fi + chmod u+w . 2>&5 + $RM conftest* + # SGI C++ compiler will create directory out/ii_files/ for + # template instantiation + test -d out/ii_files && $RM out/ii_files/* && rmdir out/ii_files + $RM out/* && rmdir out + cd .. + $RM -r conftest + $RM conftest* + +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_c_o_FC" >&5 +printf "%s\n" "$lt_cv_prog_compiler_c_o_FC" >&6; } + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if $compiler supports -c -o file.$ac_objext" >&5 +printf %s "checking if $compiler supports -c -o file.$ac_objext... " >&6; } +if test ${lt_cv_prog_compiler_c_o_FC+y} +then : + printf %s "(cached) " >&6 +else $as_nop + lt_cv_prog_compiler_c_o_FC=no + $RM -r conftest 2>/dev/null + mkdir conftest + cd conftest + mkdir out + echo "$lt_simple_compile_test_code" > conftest.$ac_ext + + lt_compiler_flag="-o out/conftest2.$ac_objext" + # Insert the option either (1) after the last *FLAGS variable, or + # (2) before a word containing "conftest.", or (3) at the end. + # Note that $ac_compile itself does not contain backslashes and begins + # with a dollar sign (not a hyphen), so the echo should work correctly. + lt_compile=`echo "$ac_compile" | $SED \ + -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ + -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ + -e 's:$: $lt_compiler_flag:'` + (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5) + (eval "$lt_compile" 2>out/conftest.err) + ac_status=$? + cat out/conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + if (exit $ac_status) && test -s out/conftest2.$ac_objext + then + # The compiler can only warn and ignore the option if not recognized + # So say no if there are warnings + $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' > out/conftest.exp + $SED '/^$/d; /^ *+/d' out/conftest.err >out/conftest.er2 + if test ! -s out/conftest.er2 || diff out/conftest.exp out/conftest.er2 >/dev/null; then + lt_cv_prog_compiler_c_o_FC=yes + fi + fi + chmod u+w . 2>&5 + $RM conftest* + # SGI C++ compiler will create directory out/ii_files/ for + # template instantiation + test -d out/ii_files && $RM out/ii_files/* && rmdir out/ii_files + $RM out/* && rmdir out + cd .. + $RM -r conftest + $RM conftest* + +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_c_o_FC" >&5 +printf "%s\n" "$lt_cv_prog_compiler_c_o_FC" >&6; } + + + + +hard_links=nottested +if test no = "$lt_cv_prog_compiler_c_o_FC" && test no != "$need_locks"; then + # do not overwrite the value of need_locks provided by the user + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if we can lock with hard links" >&5 +printf %s "checking if we can lock with hard links... " >&6; } + hard_links=yes + $RM conftest* + ln conftest.a conftest.b 2>/dev/null && hard_links=no + touch conftest.a + ln conftest.a conftest.b 2>&5 || hard_links=no + ln conftest.a conftest.b 2>/dev/null && hard_links=no + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $hard_links" >&5 +printf "%s\n" "$hard_links" >&6; } + if test no = "$hard_links"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: '$CC' does not support '-c -o', so 'make -j' may be unsafe" >&5 +printf "%s\n" "$as_me: WARNING: '$CC' does not support '-c -o', so 'make -j' may be unsafe" >&2;} + need_locks=warn + fi +else + need_locks=no +fi + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the $compiler linker ($LD) supports shared libraries" >&5 +printf %s "checking whether the $compiler linker ($LD) supports shared libraries... " >&6; } + + runpath_var= + allow_undefined_flag_FC= + always_export_symbols_FC=no + archive_cmds_FC= + archive_expsym_cmds_FC= + compiler_needs_object_FC=no + enable_shared_with_static_runtimes_FC=no + export_dynamic_flag_spec_FC= + export_symbols_cmds_FC='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols' + hardcode_automatic_FC=no + hardcode_direct_FC=no + hardcode_direct_absolute_FC=no + hardcode_libdir_flag_spec_FC= + hardcode_libdir_separator_FC= + hardcode_minus_L_FC=no + hardcode_shlibpath_var_FC=unsupported + inherit_rpath_FC=no + link_all_deplibs_FC=unknown + module_cmds_FC= + module_expsym_cmds_FC= + old_archive_from_new_cmds_FC= + old_archive_from_expsyms_cmds_FC= + thread_safe_flag_spec_FC= + whole_archive_flag_spec_FC= + # include_expsyms should be a list of space-separated symbols to be *always* + # included in the symbol list + include_expsyms_FC= + # exclude_expsyms can be an extended regexp of symbols to exclude + # it will be wrapped by ' (' and ')$', so one must not match beginning or + # end of line. Example: 'a|bc|.*d.*' will exclude the symbols 'a' and 'bc', + # as well as any symbol that contains 'd'. + exclude_expsyms_FC='_GLOBAL_OFFSET_TABLE_|_GLOBAL__F[ID]_.*' + # Although _GLOBAL_OFFSET_TABLE_ is a valid symbol C name, most a.out + # platforms (ab)use it in PIC code, but their linkers get confused if + # the symbol is explicitly referenced. Since portable code cannot + # rely on this symbol name, it's probably fine to never include it in + # preloaded symbol tables. + # Exclude shared library initialization/finalization symbols. + extract_expsyms_cmds= + + case $host_os in + cygwin* | mingw* | pw32* | cegcc*) + # FIXME: the MSVC++ and ICC port hasn't been tested in a loooong time + # When not using gcc, we currently assume that we are using + # Microsoft Visual C++ or Intel C++ Compiler. + if test yes != "$GCC"; then + with_gnu_ld=no + fi + ;; + interix*) + # we just hope/assume this is gcc and not c89 (= MSVC++ or ICC) + with_gnu_ld=yes + ;; + openbsd* | bitrig*) + with_gnu_ld=no + ;; + linux* | k*bsd*-gnu | gnu*) + link_all_deplibs_FC=no + ;; + esac + + ld_shlibs_FC=yes + + # On some targets, GNU ld is compatible enough with the native linker + # that we're better off using the native interface for both. + lt_use_gnu_ld_interface=no + if test yes = "$with_gnu_ld"; then + case $host_os in + aix*) + # The AIX port of GNU ld has always aspired to compatibility + # with the native linker. However, as the warning in the GNU ld + # block says, versions before 2.19.5* couldn't really create working + # shared libraries, regardless of the interface used. + case `$LD -v 2>&1` in + *\ \(GNU\ Binutils\)\ 2.19.5*) ;; + *\ \(GNU\ Binutils\)\ 2.[2-9]*) ;; + *\ \(GNU\ Binutils\)\ [3-9]*) ;; + *) + lt_use_gnu_ld_interface=yes + ;; + esac + ;; + *) + lt_use_gnu_ld_interface=yes + ;; + esac + fi + + if test yes = "$lt_use_gnu_ld_interface"; then + # If archive_cmds runs LD, not CC, wlarc should be empty + wlarc='$wl' + + # Set some defaults for GNU ld with shared library support. These + # are reset later if shared libraries are not supported. Putting them + # here allows them to be overridden if necessary. + runpath_var=LD_RUN_PATH + hardcode_libdir_flag_spec_FC='$wl-rpath $wl$libdir' + export_dynamic_flag_spec_FC='$wl--export-dynamic' + # ancient GNU ld didn't support --whole-archive et. al. + if $LD --help 2>&1 | $GREP 'no-whole-archive' > /dev/null; then + whole_archive_flag_spec_FC=$wlarc'--whole-archive$convenience '$wlarc'--no-whole-archive' + else + whole_archive_flag_spec_FC= + fi + supports_anon_versioning=no + case `$LD -v | $SED -e 's/([^)]\+)\s\+//' 2>&1` in + *GNU\ gold*) supports_anon_versioning=yes ;; + *\ [01].* | *\ 2.[0-9].* | *\ 2.10.*) ;; # catch versions < 2.11 + *\ 2.11.93.0.2\ *) supports_anon_versioning=yes ;; # RH7.3 ... + *\ 2.11.92.0.12\ *) supports_anon_versioning=yes ;; # Mandrake 8.2 ... + *\ 2.11.*) ;; # other 2.11 versions + *) supports_anon_versioning=yes ;; + esac + + # See if GNU ld supports shared libraries. + case $host_os in + aix[3-9]*) + # On AIX/PPC, the GNU linker is very broken + if test ia64 != "$host_cpu"; then + ld_shlibs_FC=no + cat <<_LT_EOF 1>&2 + +*** Warning: the GNU linker, at least up to release 2.19, is reported +*** to be unable to reliably create shared libraries on AIX. +*** Therefore, libtool is disabling shared libraries support. If you +*** really care for shared libraries, you may want to install binutils +*** 2.20 or above, or modify your PATH so that a non-GNU linker is found. +*** You will then need to restart the configuration process. + +_LT_EOF + fi + ;; + + amigaos*) + case $host_cpu in + powerpc) + # see comment about AmigaOS4 .so support + archive_cmds_FC='$CC -shared $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + archive_expsym_cmds_FC='' + ;; + m68k) + archive_cmds_FC='$RM $output_objdir/a2ixlibrary.data~$ECHO "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$ECHO "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$ECHO "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$ECHO "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR $AR_FLAGS $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)' + hardcode_libdir_flag_spec_FC='-L$libdir' + hardcode_minus_L_FC=yes + ;; + esac + ;; + + beos*) + if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then + allow_undefined_flag_FC=unsupported + # Joseph Beckenbach says some releases of gcc + # support --undefined. This deserves some investigation. FIXME + archive_cmds_FC='$CC -nostart $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + else + ld_shlibs_FC=no + fi + ;; + + cygwin* | mingw* | pw32* | cegcc*) + # _LT_TAGVAR(hardcode_libdir_flag_spec, FC) is actually meaningless, + # as there is no search path for DLLs. + hardcode_libdir_flag_spec_FC='-L$libdir' + export_dynamic_flag_spec_FC='$wl--export-all-symbols' + allow_undefined_flag_FC=unsupported + always_export_symbols_FC=no + enable_shared_with_static_runtimes_FC=yes + export_symbols_cmds_FC='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[BCDGRS][ ]/s/.*[ ]\([^ ]*\)/\1 DATA/;s/^.*[ ]__nm__\([^ ]*\)[ ][^ ]*/\1 DATA/;/^I[ ]/d;/^[AITW][ ]/s/.* //'\'' | sort | uniq > $export_symbols' + exclude_expsyms_FC='[_]+GLOBAL_OFFSET_TABLE_|[_]+GLOBAL__[FID]_.*|[_]+head_[A-Za-z0-9_]+_dll|[A-Za-z0-9_]+_dll_iname' + + if $LD --help 2>&1 | $GREP 'auto-import' > /dev/null; then + archive_cmds_FC='$CC -shared $libobjs $deplibs $compiler_flags -o $output_objdir/$soname $wl--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' + # If the export-symbols file already is a .def file, use it as + # is; otherwise, prepend EXPORTS... + archive_expsym_cmds_FC='if test DEF = "`$SED -n -e '\''s/^[ ]*//'\'' -e '\''/^\(;.*\)*$/d'\'' -e '\''s/^\(EXPORTS\|LIBRARY\)\([ ].*\)*$/DEF/p'\'' -e q $export_symbols`" ; then + cp $export_symbols $output_objdir/$soname.def; + else + echo EXPORTS > $output_objdir/$soname.def; + cat $export_symbols >> $output_objdir/$soname.def; + fi~ + $CC -shared $output_objdir/$soname.def $libobjs $deplibs $compiler_flags -o $output_objdir/$soname $wl--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' + else + ld_shlibs_FC=no + fi + ;; + + haiku*) + archive_cmds_FC='$CC -shared $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + link_all_deplibs_FC=yes + ;; + + os2*) + hardcode_libdir_flag_spec_FC='-L$libdir' + hardcode_minus_L_FC=yes + allow_undefined_flag_FC=unsupported + shrext_cmds=.dll + archive_cmds_FC='$ECHO "LIBRARY ${soname%$shared_ext} INITINSTANCE TERMINSTANCE" > $output_objdir/$libname.def~ + $ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~ + $ECHO "DATA MULTIPLE NONSHARED" >> $output_objdir/$libname.def~ + $ECHO EXPORTS >> $output_objdir/$libname.def~ + emxexp $libobjs | $SED /"_DLL_InitTerm"/d >> $output_objdir/$libname.def~ + $CC -Zdll -Zcrtdll -o $output_objdir/$soname $libobjs $deplibs $compiler_flags $output_objdir/$libname.def~ + emximp -o $lib $output_objdir/$libname.def' + archive_expsym_cmds_FC='$ECHO "LIBRARY ${soname%$shared_ext} INITINSTANCE TERMINSTANCE" > $output_objdir/$libname.def~ + $ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~ + $ECHO "DATA MULTIPLE NONSHARED" >> $output_objdir/$libname.def~ + $ECHO EXPORTS >> $output_objdir/$libname.def~ + prefix_cmds="$SED"~ + if test EXPORTS = "`$SED 1q $export_symbols`"; then + prefix_cmds="$prefix_cmds -e 1d"; + fi~ + prefix_cmds="$prefix_cmds -e \"s/^\(.*\)$/_\1/g\""~ + cat $export_symbols | $prefix_cmds >> $output_objdir/$libname.def~ + $CC -Zdll -Zcrtdll -o $output_objdir/$soname $libobjs $deplibs $compiler_flags $output_objdir/$libname.def~ + emximp -o $lib $output_objdir/$libname.def' + old_archive_From_new_cmds_FC='emximp -o $output_objdir/${libname}_dll.a $output_objdir/$libname.def' + enable_shared_with_static_runtimes_FC=yes + file_list_spec_FC='@' + ;; + + interix[3-9]*) + hardcode_direct_FC=no + hardcode_shlibpath_var_FC=no + hardcode_libdir_flag_spec_FC='$wl-rpath,$libdir' + export_dynamic_flag_spec_FC='$wl-E' + # Hack: On Interix 3.x, we cannot compile PIC because of a broken gcc. + # Instead, shared libraries are loaded at an image base (0x10000000 by + # default) and relocated if they conflict, which is a slow very memory + # consuming and fragmenting process. To avoid this, we pick a random, + # 256 KiB-aligned image base between 0x50000000 and 0x6FFC0000 at link + # time. Moving up from 0x10000000 also allows more sbrk(2) space. + archive_cmds_FC='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-h,$soname $wl--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' + archive_expsym_cmds_FC='$SED "s|^|_|" $export_symbols >$output_objdir/$soname.expsym~$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-h,$soname $wl--retain-symbols-file,$output_objdir/$soname.expsym $wl--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' + ;; + + gnu* | linux* | tpf* | k*bsd*-gnu | kopensolaris*-gnu) + tmp_diet=no + if test linux-dietlibc = "$host_os"; then + case $cc_basename in + diet\ *) tmp_diet=yes;; # linux-dietlibc with static linking (!diet-dyn) + esac + fi + if $LD --help 2>&1 | $EGREP ': supported targets:.* elf' > /dev/null \ + && test no = "$tmp_diet" + then + tmp_addflag=' $pic_flag' + tmp_sharedflag='-shared' + case $cc_basename,$host_cpu in + pgcc*) # Portland Group C compiler + whole_archive_flag_spec_FC='$wl--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive' + tmp_addflag=' $pic_flag' + ;; + pgf77* | pgf90* | pgf95* | pgfortran*) + # Portland Group f77 and f90 compilers + whole_archive_flag_spec_FC='$wl--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive' + tmp_addflag=' $pic_flag -Mnomain' ;; + ecc*,ia64* | icc*,ia64*) # Intel C compiler on ia64 + tmp_addflag=' -i_dynamic' ;; + efc*,ia64* | ifort*,ia64*) # Intel Fortran compiler on ia64 + tmp_addflag=' -i_dynamic -nofor_main' ;; + ifc* | ifort*) # Intel Fortran compiler + tmp_addflag=' -nofor_main' ;; + lf95*) # Lahey Fortran 8.1 + whole_archive_flag_spec_FC= + tmp_sharedflag='--shared' ;; + nagfor*) # NAGFOR 5.3 + tmp_sharedflag='-Wl,-shared' ;; + xl[cC]* | bgxl[cC]* | mpixl[cC]*) # IBM XL C 8.0 on PPC (deal with xlf below) + tmp_sharedflag='-qmkshrobj' + tmp_addflag= ;; + nvcc*) # Cuda Compiler Driver 2.2 + whole_archive_flag_spec_FC='$wl--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive' + compiler_needs_object_FC=yes + ;; + esac + case `$CC -V 2>&1 | $SED 5q` in + *Sun\ C*) # Sun C 5.9 + whole_archive_flag_spec_FC='$wl--whole-archive`new_convenience=; for conv in $convenience\"\"; do test -z \"$conv\" || new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive' + compiler_needs_object_FC=yes + tmp_sharedflag='-G' ;; + *Sun\ F*) # Sun Fortran 8.3 + tmp_sharedflag='-G' ;; + esac + archive_cmds_FC='$CC '"$tmp_sharedflag""$tmp_addflag"' $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + + if test yes = "$supports_anon_versioning"; then + archive_expsym_cmds_FC='echo "{ global:" > $output_objdir/$libname.ver~ + cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~ + echo "local: *; };" >> $output_objdir/$libname.ver~ + $CC '"$tmp_sharedflag""$tmp_addflag"' $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-version-script $wl$output_objdir/$libname.ver -o $lib' + fi + + case $cc_basename in + tcc*) + hardcode_libdir_flag_spec_FC='$wl-rpath $wl$libdir' + export_dynamic_flag_spec_FC='-rdynamic' + ;; + xlf* | bgf* | bgxlf* | mpixlf*) + # IBM XL Fortran 10.1 on PPC cannot create shared libs itself + whole_archive_flag_spec_FC='--whole-archive$convenience --no-whole-archive' + hardcode_libdir_flag_spec_FC='$wl-rpath $wl$libdir' + archive_cmds_FC='$LD -shared $libobjs $deplibs $linker_flags -soname $soname -o $lib' + if test yes = "$supports_anon_versioning"; then + archive_expsym_cmds_FC='echo "{ global:" > $output_objdir/$libname.ver~ + cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~ + echo "local: *; };" >> $output_objdir/$libname.ver~ + $LD -shared $libobjs $deplibs $linker_flags -soname $soname -version-script $output_objdir/$libname.ver -o $lib' + fi + ;; + esac + else + ld_shlibs_FC=no + fi + ;; + + netbsd* | netbsdelf*-gnu) + if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then + archive_cmds_FC='$LD -Bshareable $libobjs $deplibs $linker_flags -o $lib' + wlarc= + else + archive_cmds_FC='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + archive_expsym_cmds_FC='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' + fi + ;; + + solaris*) + if $LD -v 2>&1 | $GREP 'BFD 2\.8' > /dev/null; then + ld_shlibs_FC=no + cat <<_LT_EOF 1>&2 + +*** Warning: The releases 2.8.* of the GNU linker cannot reliably +*** create shared libraries on Solaris systems. Therefore, libtool +*** is disabling shared libraries support. We urge you to upgrade GNU +*** binutils to release 2.9.1 or newer. Another option is to modify +*** your PATH or compiler configuration so that the native linker is +*** used, and then restart. + +_LT_EOF + elif $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then + archive_cmds_FC='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + archive_expsym_cmds_FC='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' + else + ld_shlibs_FC=no + fi + ;; + + sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX*) + case `$LD -v 2>&1` in + *\ [01].* | *\ 2.[0-9].* | *\ 2.1[0-5].*) + ld_shlibs_FC=no + cat <<_LT_EOF 1>&2 + +*** Warning: Releases of the GNU linker prior to 2.16.91.0.3 cannot +*** reliably create shared libraries on SCO systems. Therefore, libtool +*** is disabling shared libraries support. We urge you to upgrade GNU +*** binutils to release 2.16.91.0.3 or newer. Another option is to modify +*** your PATH or compiler configuration so that the native linker is +*** used, and then restart. + +_LT_EOF + ;; + *) + # For security reasons, it is highly recommended that you always + # use absolute paths for naming shared libraries, and exclude the + # DT_RUNPATH tag from executables and libraries. But doing so + # requires that you compile everything twice, which is a pain. + if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then + hardcode_libdir_flag_spec_FC='$wl-rpath $wl$libdir' + archive_cmds_FC='$CC -shared $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + archive_expsym_cmds_FC='$CC -shared $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' + else + ld_shlibs_FC=no + fi + ;; + esac + ;; + + sunos4*) + archive_cmds_FC='$LD -assert pure-text -Bshareable -o $lib $libobjs $deplibs $linker_flags' + wlarc= + hardcode_direct_FC=yes + hardcode_shlibpath_var_FC=no + ;; + + *) + if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then + archive_cmds_FC='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + archive_expsym_cmds_FC='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' + else + ld_shlibs_FC=no + fi + ;; + esac + + if test no = "$ld_shlibs_FC"; then + runpath_var= + hardcode_libdir_flag_spec_FC= + export_dynamic_flag_spec_FC= + whole_archive_flag_spec_FC= + fi + else + # PORTME fill in a description of your system's linker (not GNU ld) + case $host_os in + aix3*) + allow_undefined_flag_FC=unsupported + always_export_symbols_FC=yes + archive_expsym_cmds_FC='$LD -o $output_objdir/$soname $libobjs $deplibs $linker_flags -bE:$export_symbols -T512 -H512 -bM:SRE~$AR $AR_FLAGS $lib $output_objdir/$soname' + # Note: this linker hardcodes the directories in LIBPATH if there + # are no directories specified by -L. + hardcode_minus_L_FC=yes + if test yes = "$GCC" && test -z "$lt_prog_compiler_static"; then + # Neither direct hardcoding nor static linking is supported with a + # broken collect2. + hardcode_direct_FC=unsupported + fi + ;; + + aix[4-9]*) + if test ia64 = "$host_cpu"; then + # On IA64, the linker does run time linking by default, so we don't + # have to do anything special. + aix_use_runtimelinking=no + exp_sym_flag='-Bexport' + no_entry_flag= + else + # If we're using GNU nm, then we don't want the "-C" option. + # -C means demangle to GNU nm, but means don't demangle to AIX nm. + # Without the "-l" option, or with the "-B" option, AIX nm treats + # weak defined symbols like other global defined symbols, whereas + # GNU nm marks them as "W". + # While the 'weak' keyword is ignored in the Export File, we need + # it in the Import File for the 'aix-soname' feature, so we have + # to replace the "-B" option with "-P" for AIX nm. + if $NM -V 2>&1 | $GREP 'GNU' > /dev/null; then + export_symbols_cmds_FC='$NM -Bpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "W")) && (substr(\$ 3,1,1) != ".")) { if (\$ 2 == "W") { print \$ 3 " weak" } else { print \$ 3 } } }'\'' | sort -u > $export_symbols' + else + export_symbols_cmds_FC='`func_echo_all $NM | $SED -e '\''s/B\([^B]*\)$/P\1/'\''` -PCpgl $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "L") || (\$ 2 == "W") || (\$ 2 == "V") || (\$ 2 == "Z")) && (substr(\$ 1,1,1) != ".")) { if ((\$ 2 == "W") || (\$ 2 == "V") || (\$ 2 == "Z")) { print \$ 1 " weak" } else { print \$ 1 } } }'\'' | sort -u > $export_symbols' + fi + aix_use_runtimelinking=no + + # Test if we are trying to use run time linking or normal + # AIX style linking. If -brtl is somewhere in LDFLAGS, we + # have runtime linking enabled, and use it for executables. + # For shared libraries, we enable/disable runtime linking + # depending on the kind of the shared library created - + # when "with_aix_soname,aix_use_runtimelinking" is: + # "aix,no" lib.a(lib.so.V) shared, rtl:no, for executables + # "aix,yes" lib.so shared, rtl:yes, for executables + # lib.a static archive + # "both,no" lib.so.V(shr.o) shared, rtl:yes + # lib.a(lib.so.V) shared, rtl:no, for executables + # "both,yes" lib.so.V(shr.o) shared, rtl:yes, for executables + # lib.a(lib.so.V) shared, rtl:no + # "svr4,*" lib.so.V(shr.o) shared, rtl:yes, for executables + # lib.a static archive + case $host_os in aix4.[23]|aix4.[23].*|aix[5-9]*) + for ld_flag in $LDFLAGS; do + if (test x-brtl = "x$ld_flag" || test x-Wl,-brtl = "x$ld_flag"); then + aix_use_runtimelinking=yes + break + fi + done + if test svr4,no = "$with_aix_soname,$aix_use_runtimelinking"; then + # With aix-soname=svr4, we create the lib.so.V shared archives only, + # so we don't have lib.a shared libs to link our executables. + # We have to force runtime linking in this case. + aix_use_runtimelinking=yes + LDFLAGS="$LDFLAGS -Wl,-brtl" + fi + ;; + esac + + exp_sym_flag='-bexport' + no_entry_flag='-bnoentry' + fi + + # When large executables or shared objects are built, AIX ld can + # have problems creating the table of contents. If linking a library + # or program results in "error TOC overflow" add -mminimal-toc to + # CXXFLAGS/CFLAGS for g++/gcc. In the cases where that is not + # enough to fix the problem, add -Wl,-bbigtoc to LDFLAGS. + + archive_cmds_FC='' + hardcode_direct_FC=yes + hardcode_direct_absolute_FC=yes + hardcode_libdir_separator_FC=':' + link_all_deplibs_FC=yes + file_list_spec_FC='$wl-f,' + case $with_aix_soname,$aix_use_runtimelinking in + aix,*) ;; # traditional, no import file + svr4,* | *,yes) # use import file + # The Import File defines what to hardcode. + hardcode_direct_FC=no + hardcode_direct_absolute_FC=no + ;; + esac + + if test yes = "$GCC"; then + case $host_os in aix4.[012]|aix4.[012].*) + # We only want to do this on AIX 4.2 and lower, the check + # below for broken collect2 doesn't work under 4.3+ + collect2name=`$CC -print-prog-name=collect2` + if test -f "$collect2name" && + strings "$collect2name" | $GREP resolve_lib_name >/dev/null + then + # We have reworked collect2 + : + else + # We have old collect2 + hardcode_direct_FC=unsupported + # It fails to find uninstalled libraries when the uninstalled + # path is not listed in the libpath. Setting hardcode_minus_L + # to unsupported forces relinking + hardcode_minus_L_FC=yes + hardcode_libdir_flag_spec_FC='-L$libdir' + hardcode_libdir_separator_FC= + fi + ;; + esac + shared_flag='-shared' + if test yes = "$aix_use_runtimelinking"; then + shared_flag="$shared_flag "'$wl-G' + fi + # Need to ensure runtime linking is disabled for the traditional + # shared library, or the linker may eventually find shared libraries + # /with/ Import File - we do not want to mix them. + shared_flag_aix='-shared' + shared_flag_svr4='-shared $wl-G' + else + # not using gcc + if test ia64 = "$host_cpu"; then + # VisualAge C++, Version 5.5 for AIX 5L for IA-64, Beta 3 Release + # chokes on -Wl,-G. The following line is correct: + shared_flag='-G' + else + if test yes = "$aix_use_runtimelinking"; then + shared_flag='$wl-G' + else + shared_flag='$wl-bM:SRE' + fi + shared_flag_aix='$wl-bM:SRE' + shared_flag_svr4='$wl-G' + fi + fi + + export_dynamic_flag_spec_FC='$wl-bexpall' + # It seems that -bexpall does not export symbols beginning with + # underscore (_), so it is better to generate a list of symbols to export. + always_export_symbols_FC=yes + if test aix,yes = "$with_aix_soname,$aix_use_runtimelinking"; then + # Warning - without using the other runtime loading flags (-brtl), + # -berok will link without error, but may produce a broken library. + allow_undefined_flag_FC='-berok' + # Determine the default libpath from the value encoded in an + # empty executable. + if test set = "${lt_cv_aix_libpath+set}"; then + aix_libpath=$lt_cv_aix_libpath +else + if test ${lt_cv_aix_libpath__FC+y} +then : + printf %s "(cached) " >&6 +else $as_nop + cat > conftest.$ac_ext <<_ACEOF + program main + + end +_ACEOF +if ac_fn_fc_try_link "$LINENO" +then : + + lt_aix_libpath_sed=' + /Import File Strings/,/^$/ { + /^0/ { + s/^0 *\([^ ]*\) *$/\1/ + p + } + }' + lt_cv_aix_libpath__FC=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` + # Check for a 64-bit object if we didn't find anything. + if test -z "$lt_cv_aix_libpath__FC"; then + lt_cv_aix_libpath__FC=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` + fi +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + if test -z "$lt_cv_aix_libpath__FC"; then + lt_cv_aix_libpath__FC=/usr/lib:/lib + fi + +fi + + aix_libpath=$lt_cv_aix_libpath__FC +fi + + hardcode_libdir_flag_spec_FC='$wl-blibpath:$libdir:'"$aix_libpath" + archive_expsym_cmds_FC='$CC -o $output_objdir/$soname $libobjs $deplibs $wl'$no_entry_flag' $compiler_flags `if test -n "$allow_undefined_flag"; then func_echo_all "$wl$allow_undefined_flag"; else :; fi` $wl'$exp_sym_flag:\$export_symbols' '$shared_flag + else + if test ia64 = "$host_cpu"; then + hardcode_libdir_flag_spec_FC='$wl-R $libdir:/usr/lib:/lib' + allow_undefined_flag_FC="-z nodefs" + archive_expsym_cmds_FC="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs '"\$wl$no_entry_flag"' $compiler_flags $wl$allow_undefined_flag '"\$wl$exp_sym_flag:\$export_symbols" + else + # Determine the default libpath from the value encoded in an + # empty executable. + if test set = "${lt_cv_aix_libpath+set}"; then + aix_libpath=$lt_cv_aix_libpath +else + if test ${lt_cv_aix_libpath__FC+y} +then : + printf %s "(cached) " >&6 +else $as_nop + cat > conftest.$ac_ext <<_ACEOF + program main + + end +_ACEOF +if ac_fn_fc_try_link "$LINENO" +then : + + lt_aix_libpath_sed=' + /Import File Strings/,/^$/ { + /^0/ { + s/^0 *\([^ ]*\) *$/\1/ + p + } + }' + lt_cv_aix_libpath__FC=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` + # Check for a 64-bit object if we didn't find anything. + if test -z "$lt_cv_aix_libpath__FC"; then + lt_cv_aix_libpath__FC=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` + fi +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + if test -z "$lt_cv_aix_libpath__FC"; then + lt_cv_aix_libpath__FC=/usr/lib:/lib + fi + +fi + + aix_libpath=$lt_cv_aix_libpath__FC +fi + + hardcode_libdir_flag_spec_FC='$wl-blibpath:$libdir:'"$aix_libpath" + # Warning - without using the other run time loading flags, + # -berok will link without error, but may produce a broken library. + no_undefined_flag_FC=' $wl-bernotok' + allow_undefined_flag_FC=' $wl-berok' + if test yes = "$with_gnu_ld"; then + # We only use this code for GNU lds that support --whole-archive. + whole_archive_flag_spec_FC='$wl--whole-archive$convenience $wl--no-whole-archive' + else + # Exported symbols can be pulled into shared objects from archives + whole_archive_flag_spec_FC='$convenience' + fi + archive_cmds_need_lc_FC=yes + archive_expsym_cmds_FC='$RM -r $output_objdir/$realname.d~$MKDIR $output_objdir/$realname.d' + # -brtl affects multiple linker settings, -berok does not and is overridden later + compiler_flags_filtered='`func_echo_all "$compiler_flags " | $SED -e "s%-brtl\\([, ]\\)%-berok\\1%g"`' + if test svr4 != "$with_aix_soname"; then + # This is similar to how AIX traditionally builds its shared libraries. + archive_expsym_cmds_FC="$archive_expsym_cmds_FC"'~$CC '$shared_flag_aix' -o $output_objdir/$realname.d/$soname $libobjs $deplibs $wl-bnoentry '$compiler_flags_filtered'$wl-bE:$export_symbols$allow_undefined_flag~$AR $AR_FLAGS $output_objdir/$libname$release.a $output_objdir/$realname.d/$soname' + fi + if test aix != "$with_aix_soname"; then + archive_expsym_cmds_FC="$archive_expsym_cmds_FC"'~$CC '$shared_flag_svr4' -o $output_objdir/$realname.d/$shared_archive_member_spec.o $libobjs $deplibs $wl-bnoentry '$compiler_flags_filtered'$wl-bE:$export_symbols$allow_undefined_flag~$STRIP -e $output_objdir/$realname.d/$shared_archive_member_spec.o~( func_echo_all "#! $soname($shared_archive_member_spec.o)"; if test shr_64 = "$shared_archive_member_spec"; then func_echo_all "# 64"; else func_echo_all "# 32"; fi; cat $export_symbols ) > $output_objdir/$realname.d/$shared_archive_member_spec.imp~$AR $AR_FLAGS $output_objdir/$soname $output_objdir/$realname.d/$shared_archive_member_spec.o $output_objdir/$realname.d/$shared_archive_member_spec.imp' + else + # used by -dlpreopen to get the symbols + archive_expsym_cmds_FC="$archive_expsym_cmds_FC"'~$MV $output_objdir/$realname.d/$soname $output_objdir' + fi + archive_expsym_cmds_FC="$archive_expsym_cmds_FC"'~$RM -r $output_objdir/$realname.d' + fi + fi + ;; + + amigaos*) + case $host_cpu in + powerpc) + # see comment about AmigaOS4 .so support + archive_cmds_FC='$CC -shared $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + archive_expsym_cmds_FC='' + ;; + m68k) + archive_cmds_FC='$RM $output_objdir/a2ixlibrary.data~$ECHO "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$ECHO "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$ECHO "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$ECHO "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR $AR_FLAGS $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)' + hardcode_libdir_flag_spec_FC='-L$libdir' + hardcode_minus_L_FC=yes + ;; + esac + ;; + + bsdi[45]*) + export_dynamic_flag_spec_FC=-rdynamic + ;; + + cygwin* | mingw* | pw32* | cegcc*) + # When not using gcc, we currently assume that we are using + # Microsoft Visual C++ or Intel C++ Compiler. + # hardcode_libdir_flag_spec is actually meaningless, as there is + # no search path for DLLs. + case $cc_basename in + cl* | icl*) + # Native MSVC or ICC + hardcode_libdir_flag_spec_FC=' ' + allow_undefined_flag_FC=unsupported + always_export_symbols_FC=yes + file_list_spec_FC='@' + # Tell ltmain to make .lib files, not .a files. + libext=lib + # Tell ltmain to make .dll files, not .so files. + shrext_cmds=.dll + # FIXME: Setting linknames here is a bad hack. + archive_cmds_FC='$CC -o $output_objdir/$soname $libobjs $compiler_flags $deplibs -Wl,-DLL,-IMPLIB:"$tool_output_objdir$libname.dll.lib"~linknames=' + archive_expsym_cmds_FC='if test DEF = "`$SED -n -e '\''s/^[ ]*//'\'' -e '\''/^\(;.*\)*$/d'\'' -e '\''s/^\(EXPORTS\|LIBRARY\)\([ ].*\)*$/DEF/p'\'' -e q $export_symbols`" ; then + cp "$export_symbols" "$output_objdir/$soname.def"; + echo "$tool_output_objdir$soname.def" > "$output_objdir/$soname.exp"; + else + $SED -e '\''s/^/-link -EXPORT:/'\'' < $export_symbols > $output_objdir/$soname.exp; + fi~ + $CC -o $tool_output_objdir$soname $libobjs $compiler_flags $deplibs "@$tool_output_objdir$soname.exp" -Wl,-DLL,-IMPLIB:"$tool_output_objdir$libname.dll.lib"~ + linknames=' + # The linker will not automatically build a static lib if we build a DLL. + # _LT_TAGVAR(old_archive_from_new_cmds, FC)='true' + enable_shared_with_static_runtimes_FC=yes + exclude_expsyms_FC='_NULL_IMPORT_DESCRIPTOR|_IMPORT_DESCRIPTOR_.*' + export_symbols_cmds_FC='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[BCDGRS][ ]/s/.*[ ]\([^ ]*\)/\1,DATA/'\'' | $SED -e '\''/^[AITW][ ]/s/.*[ ]//'\'' | sort | uniq > $export_symbols' + # Don't use ranlib + old_postinstall_cmds_FC='chmod 644 $oldlib' + postlink_cmds_FC='lt_outputfile="@OUTPUT@"~ + lt_tool_outputfile="@TOOL_OUTPUT@"~ + case $lt_outputfile in + *.exe|*.EXE) ;; + *) + lt_outputfile=$lt_outputfile.exe + lt_tool_outputfile=$lt_tool_outputfile.exe + ;; + esac~ + if test : != "$MANIFEST_TOOL" && test -f "$lt_outputfile.manifest"; then + $MANIFEST_TOOL -manifest "$lt_tool_outputfile.manifest" -outputresource:"$lt_tool_outputfile" || exit 1; + $RM "$lt_outputfile.manifest"; + fi' + ;; + *) + # Assume MSVC and ICC wrapper + hardcode_libdir_flag_spec_FC=' ' + allow_undefined_flag_FC=unsupported + # Tell ltmain to make .lib files, not .a files. + libext=lib + # Tell ltmain to make .dll files, not .so files. + shrext_cmds=.dll + # FIXME: Setting linknames here is a bad hack. + archive_cmds_FC='$CC -o $lib $libobjs $compiler_flags `func_echo_all "$deplibs" | $SED '\''s/ -lc$//'\''` -link -dll~linknames=' + # The linker will automatically build a .lib file if we build a DLL. + old_archive_from_new_cmds_FC='true' + # FIXME: Should let the user specify the lib program. + old_archive_cmds_FC='lib -OUT:$oldlib$oldobjs$old_deplibs' + enable_shared_with_static_runtimes_FC=yes + ;; + esac + ;; + + darwin* | rhapsody*) + + + archive_cmds_need_lc_FC=no + hardcode_direct_FC=no + hardcode_automatic_FC=yes + hardcode_shlibpath_var_FC=unsupported + if test yes = "$lt_cv_ld_force_load"; then + whole_archive_flag_spec_FC='`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience $wl-force_load,$conv\"; done; func_echo_all \"$new_convenience\"`' + compiler_needs_object_FC=yes + else + whole_archive_flag_spec_FC='' + fi + link_all_deplibs_FC=yes + allow_undefined_flag_FC=$_lt_dar_allow_undefined + case $cc_basename in + ifort*|nagfor*) _lt_dar_can_shared=yes ;; + *) _lt_dar_can_shared=$GCC ;; + esac + if test yes = "$_lt_dar_can_shared"; then + output_verbose_link_cmd=func_echo_all + archive_cmds_FC="\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring $_lt_dar_single_mod$_lt_dsymutil" + module_cmds_FC="\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags$_lt_dsymutil" + archive_expsym_cmds_FC="$SED 's|^|_|' < \$export_symbols > \$output_objdir/\$libname-symbols.expsym~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring $_lt_dar_single_mod$_lt_dar_export_syms$_lt_dsymutil" + module_expsym_cmds_FC="$SED -e 's|^|_|' < \$export_symbols > \$output_objdir/\$libname-symbols.expsym~\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags$_lt_dar_export_syms$_lt_dsymutil" + + else + ld_shlibs_FC=no + fi + + ;; + + dgux*) + archive_cmds_FC='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + hardcode_libdir_flag_spec_FC='-L$libdir' + hardcode_shlibpath_var_FC=no + ;; + + # FreeBSD 2.2.[012] allows us to include c++rt0.o to get C++ constructor + # support. Future versions do this automatically, but an explicit c++rt0.o + # does not break anything, and helps significantly (at the cost of a little + # extra space). + freebsd2.2*) + archive_cmds_FC='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags /usr/lib/c++rt0.o' + hardcode_libdir_flag_spec_FC='-R$libdir' + hardcode_direct_FC=yes + hardcode_shlibpath_var_FC=no + ;; + + # Unfortunately, older versions of FreeBSD 2 do not have this feature. + freebsd2.*) + archive_cmds_FC='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' + hardcode_direct_FC=yes + hardcode_minus_L_FC=yes + hardcode_shlibpath_var_FC=no + ;; + + # FreeBSD 3 and greater uses gcc -shared to do shared libraries. + freebsd* | dragonfly* | midnightbsd*) + archive_cmds_FC='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' + hardcode_libdir_flag_spec_FC='-R$libdir' + hardcode_direct_FC=yes + hardcode_shlibpath_var_FC=no + ;; + + hpux9*) + if test yes = "$GCC"; then + archive_cmds_FC='$RM $output_objdir/$soname~$CC -shared $pic_flag $wl+b $wl$install_libdir -o $output_objdir/$soname $libobjs $deplibs $compiler_flags~test "x$output_objdir/$soname" = "x$lib" || mv $output_objdir/$soname $lib' + else + archive_cmds_FC='$RM $output_objdir/$soname~$LD -b +b $install_libdir -o $output_objdir/$soname $libobjs $deplibs $linker_flags~test "x$output_objdir/$soname" = "x$lib" || mv $output_objdir/$soname $lib' + fi + hardcode_libdir_flag_spec_FC='$wl+b $wl$libdir' + hardcode_libdir_separator_FC=: + hardcode_direct_FC=yes + + # hardcode_minus_L: Not really in the search PATH, + # but as the default location of the library. + hardcode_minus_L_FC=yes + export_dynamic_flag_spec_FC='$wl-E' + ;; + + hpux10*) + if test yes,no = "$GCC,$with_gnu_ld"; then + archive_cmds_FC='$CC -shared $pic_flag $wl+h $wl$soname $wl+b $wl$install_libdir -o $lib $libobjs $deplibs $compiler_flags' + else + archive_cmds_FC='$LD -b +h $soname +b $install_libdir -o $lib $libobjs $deplibs $linker_flags' + fi + if test no = "$with_gnu_ld"; then + hardcode_libdir_flag_spec_FC='$wl+b $wl$libdir' + hardcode_libdir_separator_FC=: + hardcode_direct_FC=yes + hardcode_direct_absolute_FC=yes + export_dynamic_flag_spec_FC='$wl-E' + # hardcode_minus_L: Not really in the search PATH, + # but as the default location of the library. + hardcode_minus_L_FC=yes + fi + ;; + + hpux11*) + if test yes,no = "$GCC,$with_gnu_ld"; then + case $host_cpu in + hppa*64*) + archive_cmds_FC='$CC -shared $wl+h $wl$soname -o $lib $libobjs $deplibs $compiler_flags' + ;; + ia64*) + archive_cmds_FC='$CC -shared $pic_flag $wl+h $wl$soname $wl+nodefaultrpath -o $lib $libobjs $deplibs $compiler_flags' + ;; + *) + archive_cmds_FC='$CC -shared $pic_flag $wl+h $wl$soname $wl+b $wl$install_libdir -o $lib $libobjs $deplibs $compiler_flags' + ;; + esac + else + case $host_cpu in + hppa*64*) + archive_cmds_FC='$CC -b $wl+h $wl$soname -o $lib $libobjs $deplibs $compiler_flags' + ;; + ia64*) + archive_cmds_FC='$CC -b $wl+h $wl$soname $wl+nodefaultrpath -o $lib $libobjs $deplibs $compiler_flags' + ;; + *) + archive_cmds_FC='$CC -b $wl+h $wl$soname $wl+b $wl$install_libdir -o $lib $libobjs $deplibs $compiler_flags' + ;; + esac + fi + if test no = "$with_gnu_ld"; then + hardcode_libdir_flag_spec_FC='$wl+b $wl$libdir' + hardcode_libdir_separator_FC=: + + case $host_cpu in + hppa*64*|ia64*) + hardcode_direct_FC=no + hardcode_shlibpath_var_FC=no + ;; + *) + hardcode_direct_FC=yes + hardcode_direct_absolute_FC=yes + export_dynamic_flag_spec_FC='$wl-E' + + # hardcode_minus_L: Not really in the search PATH, + # but as the default location of the library. + hardcode_minus_L_FC=yes + ;; + esac + fi + ;; + + irix5* | irix6* | nonstopux*) + if test yes = "$GCC"; then + archive_cmds_FC='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` $wl-update_registry $wl$output_objdir/so_locations -o $lib' + # Try to use the -exported_symbol ld option, if it does not + # work, assume that -exports_file does not work either and + # implicitly export all symbols. + # This should be the same for all languages, so no per-tag cache variable. + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the $host_os linker accepts -exported_symbol" >&5 +printf %s "checking whether the $host_os linker accepts -exported_symbol... " >&6; } +if test ${lt_cv_irix_exported_symbol+y} +then : + printf %s "(cached) " >&6 +else $as_nop + save_LDFLAGS=$LDFLAGS + LDFLAGS="$LDFLAGS -shared $wl-exported_symbol ${wl}foo $wl-update_registry $wl/dev/null" + cat > conftest.$ac_ext <<_ACEOF + + subroutine foo + end +_ACEOF +if ac_fn_fc_try_link "$LINENO" +then : + lt_cv_irix_exported_symbol=yes +else $as_nop + lt_cv_irix_exported_symbol=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + LDFLAGS=$save_LDFLAGS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_irix_exported_symbol" >&5 +printf "%s\n" "$lt_cv_irix_exported_symbol" >&6; } + if test yes = "$lt_cv_irix_exported_symbol"; then + archive_expsym_cmds_FC='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` $wl-update_registry $wl$output_objdir/so_locations $wl-exports_file $wl$export_symbols -o $lib' + fi + link_all_deplibs_FC=no + else + archive_cmds_FC='$CC -shared $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib' + archive_expsym_cmds_FC='$CC -shared $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry $output_objdir/so_locations -exports_file $export_symbols -o $lib' + fi + archive_cmds_need_lc_FC='no' + hardcode_libdir_flag_spec_FC='$wl-rpath $wl$libdir' + hardcode_libdir_separator_FC=: + inherit_rpath_FC=yes + link_all_deplibs_FC=yes + ;; + + linux*) + case $cc_basename in + tcc*) + # Fabrice Bellard et al's Tiny C Compiler + ld_shlibs_FC=yes + archive_cmds_FC='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' + hardcode_libdir_flag_spec_FC='$wl-rpath $wl$libdir' + ;; + esac + ;; + + netbsd* | netbsdelf*-gnu) + if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then + archive_cmds_FC='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' # a.out + else + archive_cmds_FC='$LD -shared -o $lib $libobjs $deplibs $linker_flags' # ELF + fi + hardcode_libdir_flag_spec_FC='-R$libdir' + hardcode_direct_FC=yes + hardcode_shlibpath_var_FC=no + ;; + + newsos6) + archive_cmds_FC='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + hardcode_direct_FC=yes + hardcode_libdir_flag_spec_FC='$wl-rpath $wl$libdir' + hardcode_libdir_separator_FC=: + hardcode_shlibpath_var_FC=no + ;; + + *nto* | *qnx*) + ;; + + openbsd* | bitrig*) + if test -f /usr/libexec/ld.so; then + hardcode_direct_FC=yes + hardcode_shlibpath_var_FC=no + hardcode_direct_absolute_FC=yes + if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`"; then + archive_cmds_FC='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' + archive_expsym_cmds_FC='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags $wl-retain-symbols-file,$export_symbols' + hardcode_libdir_flag_spec_FC='$wl-rpath,$libdir' + export_dynamic_flag_spec_FC='$wl-E' + else + archive_cmds_FC='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' + hardcode_libdir_flag_spec_FC='$wl-rpath,$libdir' + fi + else + ld_shlibs_FC=no + fi + ;; + + os2*) + hardcode_libdir_flag_spec_FC='-L$libdir' + hardcode_minus_L_FC=yes + allow_undefined_flag_FC=unsupported + shrext_cmds=.dll + archive_cmds_FC='$ECHO "LIBRARY ${soname%$shared_ext} INITINSTANCE TERMINSTANCE" > $output_objdir/$libname.def~ + $ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~ + $ECHO "DATA MULTIPLE NONSHARED" >> $output_objdir/$libname.def~ + $ECHO EXPORTS >> $output_objdir/$libname.def~ + emxexp $libobjs | $SED /"_DLL_InitTerm"/d >> $output_objdir/$libname.def~ + $CC -Zdll -Zcrtdll -o $output_objdir/$soname $libobjs $deplibs $compiler_flags $output_objdir/$libname.def~ + emximp -o $lib $output_objdir/$libname.def' + archive_expsym_cmds_FC='$ECHO "LIBRARY ${soname%$shared_ext} INITINSTANCE TERMINSTANCE" > $output_objdir/$libname.def~ + $ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~ + $ECHO "DATA MULTIPLE NONSHARED" >> $output_objdir/$libname.def~ + $ECHO EXPORTS >> $output_objdir/$libname.def~ + prefix_cmds="$SED"~ + if test EXPORTS = "`$SED 1q $export_symbols`"; then + prefix_cmds="$prefix_cmds -e 1d"; + fi~ + prefix_cmds="$prefix_cmds -e \"s/^\(.*\)$/_\1/g\""~ + cat $export_symbols | $prefix_cmds >> $output_objdir/$libname.def~ + $CC -Zdll -Zcrtdll -o $output_objdir/$soname $libobjs $deplibs $compiler_flags $output_objdir/$libname.def~ + emximp -o $lib $output_objdir/$libname.def' + old_archive_From_new_cmds_FC='emximp -o $output_objdir/${libname}_dll.a $output_objdir/$libname.def' + enable_shared_with_static_runtimes_FC=yes + file_list_spec_FC='@' + ;; + + osf3*) + if test yes = "$GCC"; then + allow_undefined_flag_FC=' $wl-expect_unresolved $wl\*' + archive_cmds_FC='$CC -shared$allow_undefined_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` $wl-update_registry $wl$output_objdir/so_locations -o $lib' + else + allow_undefined_flag_FC=' -expect_unresolved \*' + archive_cmds_FC='$CC -shared$allow_undefined_flag $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib' + fi + archive_cmds_need_lc_FC='no' + hardcode_libdir_flag_spec_FC='$wl-rpath $wl$libdir' + hardcode_libdir_separator_FC=: + ;; + + osf4* | osf5*) # as osf3* with the addition of -msym flag + if test yes = "$GCC"; then + allow_undefined_flag_FC=' $wl-expect_unresolved $wl\*' + archive_cmds_FC='$CC -shared$allow_undefined_flag $pic_flag $libobjs $deplibs $compiler_flags $wl-msym $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` $wl-update_registry $wl$output_objdir/so_locations -o $lib' + hardcode_libdir_flag_spec_FC='$wl-rpath $wl$libdir' + else + allow_undefined_flag_FC=' -expect_unresolved \*' + archive_cmds_FC='$CC -shared$allow_undefined_flag $libobjs $deplibs $compiler_flags -msym -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib' + archive_expsym_cmds_FC='for i in `cat $export_symbols`; do printf "%s %s\\n" -exported_symbol "\$i" >> $lib.exp; done; printf "%s\\n" "-hidden">> $lib.exp~ + $CC -shared$allow_undefined_flag $wl-input $wl$lib.exp $compiler_flags $libobjs $deplibs -soname $soname `test -n "$verstring" && $ECHO "-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib~$RM $lib.exp' + + # Both c and cxx compiler support -rpath directly + hardcode_libdir_flag_spec_FC='-rpath $libdir' + fi + archive_cmds_need_lc_FC='no' + hardcode_libdir_separator_FC=: + ;; + + solaris*) + no_undefined_flag_FC=' -z defs' + if test yes = "$GCC"; then + wlarc='$wl' + archive_cmds_FC='$CC -shared $pic_flag $wl-z ${wl}text $wl-h $wl$soname -o $lib $libobjs $deplibs $compiler_flags' + archive_expsym_cmds_FC='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ + $CC -shared $pic_flag $wl-z ${wl}text $wl-M $wl$lib.exp $wl-h $wl$soname -o $lib $libobjs $deplibs $compiler_flags~$RM $lib.exp' + else + case `$CC -V 2>&1` in + *"Compilers 5.0"*) + wlarc='' + archive_cmds_FC='$LD -G$allow_undefined_flag -h $soname -o $lib $libobjs $deplibs $linker_flags' + archive_expsym_cmds_FC='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ + $LD -G$allow_undefined_flag -M $lib.exp -h $soname -o $lib $libobjs $deplibs $linker_flags~$RM $lib.exp' + ;; + *) + wlarc='$wl' + archive_cmds_FC='$CC -G$allow_undefined_flag -h $soname -o $lib $libobjs $deplibs $compiler_flags' + archive_expsym_cmds_FC='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ + $CC -G$allow_undefined_flag -M $lib.exp -h $soname -o $lib $libobjs $deplibs $compiler_flags~$RM $lib.exp' + ;; + esac + fi + hardcode_libdir_flag_spec_FC='-R$libdir' + hardcode_shlibpath_var_FC=no + case $host_os in + solaris2.[0-5] | solaris2.[0-5].*) ;; + *) + # The compiler driver will combine and reorder linker options, + # but understands '-z linker_flag'. GCC discards it without '$wl', + # but is careful enough not to reorder. + # Supported since Solaris 2.6 (maybe 2.5.1?) + if test yes = "$GCC"; then + whole_archive_flag_spec_FC='$wl-z ${wl}allextract$convenience $wl-z ${wl}defaultextract' + else + whole_archive_flag_spec_FC='-z allextract$convenience -z defaultextract' + fi + ;; + esac + link_all_deplibs_FC=yes + ;; + + sunos4*) + if test sequent = "$host_vendor"; then + # Use $CC to link under sequent, because it throws in some extra .o + # files that make .init and .fini sections work. + archive_cmds_FC='$CC -G $wl-h $soname -o $lib $libobjs $deplibs $compiler_flags' + else + archive_cmds_FC='$LD -assert pure-text -Bstatic -o $lib $libobjs $deplibs $linker_flags' + fi + hardcode_libdir_flag_spec_FC='-L$libdir' + hardcode_direct_FC=yes + hardcode_minus_L_FC=yes + hardcode_shlibpath_var_FC=no + ;; + + sysv4) + case $host_vendor in + sni) + archive_cmds_FC='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + hardcode_direct_FC=yes # is this really true??? + ;; + siemens) + ## LD is ld it makes a PLAMLIB + ## CC just makes a GrossModule. + archive_cmds_FC='$LD -G -o $lib $libobjs $deplibs $linker_flags' + reload_cmds_FC='$CC -r -o $output$reload_objs' + hardcode_direct_FC=no + ;; + motorola) + archive_cmds_FC='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + hardcode_direct_FC=no #Motorola manual says yes, but my tests say they lie + ;; + esac + runpath_var='LD_RUN_PATH' + hardcode_shlibpath_var_FC=no + ;; + + sysv4.3*) + archive_cmds_FC='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + hardcode_shlibpath_var_FC=no + export_dynamic_flag_spec_FC='-Bexport' + ;; + + sysv4*MP*) + if test -d /usr/nec; then + archive_cmds_FC='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + hardcode_shlibpath_var_FC=no + runpath_var=LD_RUN_PATH + hardcode_runpath_var=yes + ld_shlibs_FC=yes + fi + ;; + + sysv4*uw2* | sysv5OpenUNIX* | sysv5UnixWare7.[01].[10]* | unixware7* | sco3.2v5.0.[024]*) + no_undefined_flag_FC='$wl-z,text' + archive_cmds_need_lc_FC=no + hardcode_shlibpath_var_FC=no + runpath_var='LD_RUN_PATH' + + if test yes = "$GCC"; then + archive_cmds_FC='$CC -shared $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + archive_expsym_cmds_FC='$CC -shared $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + else + archive_cmds_FC='$CC -G $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + archive_expsym_cmds_FC='$CC -G $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + fi + ;; + + sysv5* | sco3.2v5* | sco5v6*) + # Note: We CANNOT use -z defs as we might desire, because we do not + # link with -lc, and that would cause any symbols used from libc to + # always be unresolved, which means just about no library would + # ever link correctly. If we're not using GNU ld we use -z text + # though, which does catch some bad symbols but isn't as heavy-handed + # as -z defs. + no_undefined_flag_FC='$wl-z,text' + allow_undefined_flag_FC='$wl-z,nodefs' + archive_cmds_need_lc_FC=no + hardcode_shlibpath_var_FC=no + hardcode_libdir_flag_spec_FC='$wl-R,$libdir' + hardcode_libdir_separator_FC=':' + link_all_deplibs_FC=yes + export_dynamic_flag_spec_FC='$wl-Bexport' + runpath_var='LD_RUN_PATH' + + if test yes = "$GCC"; then + archive_cmds_FC='$CC -shared $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + archive_expsym_cmds_FC='$CC -shared $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + else + archive_cmds_FC='$CC -G $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + archive_expsym_cmds_FC='$CC -G $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + fi + ;; + + uts4*) + archive_cmds_FC='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + hardcode_libdir_flag_spec_FC='-L$libdir' + hardcode_shlibpath_var_FC=no + ;; + + *) + ld_shlibs_FC=no + ;; + esac + + if test sni = "$host_vendor"; then + case $host in + sysv4 | sysv4.2uw2* | sysv4.3* | sysv5*) + export_dynamic_flag_spec_FC='$wl-Blargedynsym' + ;; + esac + fi + fi + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ld_shlibs_FC" >&5 +printf "%s\n" "$ld_shlibs_FC" >&6; } +test no = "$ld_shlibs_FC" && can_build_shared=no + +with_gnu_ld_FC=$with_gnu_ld + + + + + + +# +# Do we need to explicitly link libc? +# +case "x$archive_cmds_need_lc_FC" in +x|xyes) + # Assume -lc should be added + archive_cmds_need_lc_FC=yes + + if test yes,yes = "$GCC,$enable_shared"; then + case $archive_cmds_FC in + *'~'*) + # FIXME: we may have to deal with multi-command sequences. + ;; + '$CC '*) + # Test whether the compiler implicitly links with -lc since on some + # systems, -lgcc has to come before -lc. If gcc already passes -lc + # to ld, don't add -lc before -lgcc. + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether -lc should be explicitly linked in" >&5 +printf %s "checking whether -lc should be explicitly linked in... " >&6; } +if test ${lt_cv_archive_cmds_need_lc_FC+y} +then : + printf %s "(cached) " >&6 +else $as_nop + $RM conftest* + echo "$lt_simple_compile_test_code" > conftest.$ac_ext + + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } 2>conftest.err; then + soname=conftest + lib=conftest + libobjs=conftest.$ac_objext + deplibs= + wl=$lt_prog_compiler_wl_FC + pic_flag=$lt_prog_compiler_pic_FC + compiler_flags=-v + linker_flags=-v + verstring= + output_objdir=. + libname=conftest + lt_save_allow_undefined_flag=$allow_undefined_flag_FC + allow_undefined_flag_FC= + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$archive_cmds_FC 2\>\&1 \| $GREP \" -lc \" \>/dev/null 2\>\&1\""; } >&5 + (eval $archive_cmds_FC 2\>\&1 \| $GREP \" -lc \" \>/dev/null 2\>\&1) 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } + then + lt_cv_archive_cmds_need_lc_FC=no + else + lt_cv_archive_cmds_need_lc_FC=yes + fi + allow_undefined_flag_FC=$lt_save_allow_undefined_flag + else + cat conftest.err 1>&5 + fi + $RM conftest* + +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_archive_cmds_need_lc_FC" >&5 +printf "%s\n" "$lt_cv_archive_cmds_need_lc_FC" >&6; } + archive_cmds_need_lc_FC=$lt_cv_archive_cmds_need_lc_FC + ;; + esac + fi + ;; +esac + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking dynamic linker characteristics" >&5 +printf %s "checking dynamic linker characteristics... " >&6; } + +library_names_spec= +libname_spec='lib$name' +soname_spec= +shrext_cmds=.so +postinstall_cmds= +postuninstall_cmds= +finish_cmds= +finish_eval= +shlibpath_var= +shlibpath_overrides_runpath=unknown +version_type=none +dynamic_linker="$host_os ld.so" +sys_lib_dlsearch_path_spec="/lib /usr/lib" +need_lib_prefix=unknown +hardcode_into_libs=no + +# when you set need_version to no, make sure it does not cause -set_version +# flags to be left without arguments +need_version=unknown + + + +case $host_os in +aix3*) + version_type=linux # correct to gnu/linux during the next big refactor + library_names_spec='$libname$release$shared_ext$versuffix $libname.a' + shlibpath_var=LIBPATH + + # AIX 3 has no versioning support, so we append a major version to the name. + soname_spec='$libname$release$shared_ext$major' + ;; + +aix[4-9]*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + hardcode_into_libs=yes + if test ia64 = "$host_cpu"; then + # AIX 5 supports IA64 + library_names_spec='$libname$release$shared_ext$major $libname$release$shared_ext$versuffix $libname$shared_ext' + shlibpath_var=LD_LIBRARY_PATH + else + # With GCC up to 2.95.x, collect2 would create an import file + # for dependence libraries. The import file would start with + # the line '#! .'. This would cause the generated library to + # depend on '.', always an invalid library. This was fixed in + # development snapshots of GCC prior to 3.0. + case $host_os in + aix4 | aix4.[01] | aix4.[01].*) + if { echo '#if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 97)' + echo ' yes ' + echo '#endif'; } | $CC -E - | $GREP yes > /dev/null; then + : + else + can_build_shared=no + fi + ;; + esac + # Using Import Files as archive members, it is possible to support + # filename-based versioning of shared library archives on AIX. While + # this would work for both with and without runtime linking, it will + # prevent static linking of such archives. So we do filename-based + # shared library versioning with .so extension only, which is used + # when both runtime linking and shared linking is enabled. + # Unfortunately, runtime linking may impact performance, so we do + # not want this to be the default eventually. Also, we use the + # versioned .so libs for executables only if there is the -brtl + # linker flag in LDFLAGS as well, or --with-aix-soname=svr4 only. + # To allow for filename-based versioning support, we need to create + # libNAME.so.V as an archive file, containing: + # *) an Import File, referring to the versioned filename of the + # archive as well as the shared archive member, telling the + # bitwidth (32 or 64) of that shared object, and providing the + # list of exported symbols of that shared object, eventually + # decorated with the 'weak' keyword + # *) the shared object with the F_LOADONLY flag set, to really avoid + # it being seen by the linker. + # At run time we better use the real file rather than another symlink, + # but for link time we create the symlink libNAME.so -> libNAME.so.V + + case $with_aix_soname,$aix_use_runtimelinking in + # AIX (on Power*) has no versioning support, so currently we cannot hardcode correct + # soname into executable. Probably we can add versioning support to + # collect2, so additional links can be useful in future. + aix,yes) # traditional libtool + dynamic_linker='AIX unversionable lib.so' + # If using run time linking (on AIX 4.2 or later) use lib.so + # instead of lib.a to let people know that these are not + # typical AIX shared libraries. + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + ;; + aix,no) # traditional AIX only + dynamic_linker='AIX lib.a(lib.so.V)' + # We preserve .a as extension for shared libraries through AIX4.2 + # and later when we are not doing run time linking. + library_names_spec='$libname$release.a $libname.a' + soname_spec='$libname$release$shared_ext$major' + ;; + svr4,*) # full svr4 only + dynamic_linker="AIX lib.so.V($shared_archive_member_spec.o)" + library_names_spec='$libname$release$shared_ext$major $libname$shared_ext' + # We do not specify a path in Import Files, so LIBPATH fires. + shlibpath_overrides_runpath=yes + ;; + *,yes) # both, prefer svr4 + dynamic_linker="AIX lib.so.V($shared_archive_member_spec.o), lib.a(lib.so.V)" + library_names_spec='$libname$release$shared_ext$major $libname$shared_ext' + # unpreferred sharedlib libNAME.a needs extra handling + postinstall_cmds='test -n "$linkname" || linkname="$realname"~func_stripname "" ".so" "$linkname"~$install_shared_prog "$dir/$func_stripname_result.$libext" "$destdir/$func_stripname_result.$libext"~test -z "$tstripme" || test -z "$striplib" || $striplib "$destdir/$func_stripname_result.$libext"' + postuninstall_cmds='for n in $library_names $old_library; do :; done~func_stripname "" ".so" "$n"~test "$func_stripname_result" = "$n" || func_append rmfiles " $odir/$func_stripname_result.$libext"' + # We do not specify a path in Import Files, so LIBPATH fires. + shlibpath_overrides_runpath=yes + ;; + *,no) # both, prefer aix + dynamic_linker="AIX lib.a(lib.so.V), lib.so.V($shared_archive_member_spec.o)" + library_names_spec='$libname$release.a $libname.a' + soname_spec='$libname$release$shared_ext$major' + # unpreferred sharedlib libNAME.so.V and symlink libNAME.so need extra handling + postinstall_cmds='test -z "$dlname" || $install_shared_prog $dir/$dlname $destdir/$dlname~test -z "$tstripme" || test -z "$striplib" || $striplib $destdir/$dlname~test -n "$linkname" || linkname=$realname~func_stripname "" ".a" "$linkname"~(cd "$destdir" && $LN_S -f $dlname $func_stripname_result.so)' + postuninstall_cmds='test -z "$dlname" || func_append rmfiles " $odir/$dlname"~for n in $old_library $library_names; do :; done~func_stripname "" ".a" "$n"~func_append rmfiles " $odir/$func_stripname_result.so"' + ;; + esac + shlibpath_var=LIBPATH + fi + ;; + +amigaos*) + case $host_cpu in + powerpc) + # Since July 2007 AmigaOS4 officially supports .so libraries. + # When compiling the executable, add -use-dynld -Lsobjs: to the compileline. + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + ;; + m68k) + library_names_spec='$libname.ixlibrary $libname.a' + # Create ${libname}_ixlibrary.a entries in /sys/libs. + finish_eval='for lib in `ls $libdir/*.ixlibrary 2>/dev/null`; do libname=`func_echo_all "$lib" | $SED '\''s%^.*/\([^/]*\)\.ixlibrary$%\1%'\''`; $RM /sys/libs/${libname}_ixlibrary.a; $show "cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a"; cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a || exit 1; done' + ;; + esac + ;; + +beos*) + library_names_spec='$libname$shared_ext' + dynamic_linker="$host_os ld.so" + shlibpath_var=LIBRARY_PATH + ;; + +bsdi[45]*) + version_type=linux # correct to gnu/linux during the next big refactor + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + finish_cmds='PATH="\$PATH:/sbin" ldconfig $libdir' + shlibpath_var=LD_LIBRARY_PATH + sys_lib_search_path_spec="/shlib /usr/lib /usr/X11/lib /usr/contrib/lib /lib /usr/local/lib" + sys_lib_dlsearch_path_spec="/shlib /usr/lib /usr/local/lib" + # the default ld.so.conf also contains /usr/contrib/lib and + # /usr/X11R6/lib (/usr/X11 is a link to /usr/X11R6), but let us allow + # libtool to hard-code these into programs + ;; + +cygwin* | mingw* | pw32* | cegcc*) + version_type=windows + shrext_cmds=.dll + need_version=no + need_lib_prefix=no + + case $GCC,$cc_basename in + yes,*) + # gcc + library_names_spec='$libname.dll.a' + # DLL is installed to $(libdir)/../bin by postinstall_cmds + postinstall_cmds='base_file=`basename \$file`~ + dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\$base_file'\''i; echo \$dlname'\''`~ + dldir=$destdir/`dirname \$dlpath`~ + test -d \$dldir || mkdir -p \$dldir~ + $install_prog $dir/$dlname \$dldir/$dlname~ + chmod a+x \$dldir/$dlname~ + if test -n '\''$stripme'\'' && test -n '\''$striplib'\''; then + eval '\''$striplib \$dldir/$dlname'\'' || exit \$?; + fi' + postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~ + dlpath=$dir/\$dldll~ + $RM \$dlpath' + shlibpath_overrides_runpath=yes + + case $host_os in + cygwin*) + # Cygwin DLLs use 'cyg' prefix rather than 'lib' + soname_spec='`echo $libname | $SED -e 's/^lib/cyg/'``echo $release | $SED -e 's/[.]/-/g'`$versuffix$shared_ext' + + ;; + mingw* | cegcc*) + # MinGW DLLs use traditional 'lib' prefix + soname_spec='$libname`echo $release | $SED -e 's/[.]/-/g'`$versuffix$shared_ext' + ;; + pw32*) + # pw32 DLLs use 'pw' prefix rather than 'lib' + library_names_spec='`echo $libname | $SED -e 's/^lib/pw/'``echo $release | $SED -e 's/[.]/-/g'`$versuffix$shared_ext' + ;; + esac + dynamic_linker='Win32 ld.exe' + ;; + + *,cl* | *,icl*) + # Native MSVC or ICC + libname_spec='$name' + soname_spec='$libname`echo $release | $SED -e 's/[.]/-/g'`$versuffix$shared_ext' + library_names_spec='$libname.dll.lib' + + case $build_os in + mingw*) + sys_lib_search_path_spec= + lt_save_ifs=$IFS + IFS=';' + for lt_path in $LIB + do + IFS=$lt_save_ifs + # Let DOS variable expansion print the short 8.3 style file name. + lt_path=`cd "$lt_path" 2>/dev/null && cmd //C "for %i in (".") do @echo %~si"` + sys_lib_search_path_spec="$sys_lib_search_path_spec $lt_path" + done + IFS=$lt_save_ifs + # Convert to MSYS style. + sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e 's|\\\\|/|g' -e 's| \\([a-zA-Z]\\):| /\\1|g' -e 's|^ ||'` + ;; + cygwin*) + # Convert to unix form, then to dos form, then back to unix form + # but this time dos style (no spaces!) so that the unix form looks + # like /cygdrive/c/PROGRA~1:/cygdr... + sys_lib_search_path_spec=`cygpath --path --unix "$LIB"` + sys_lib_search_path_spec=`cygpath --path --dos "$sys_lib_search_path_spec" 2>/dev/null` + sys_lib_search_path_spec=`cygpath --path --unix "$sys_lib_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"` + ;; + *) + sys_lib_search_path_spec=$LIB + if $ECHO "$sys_lib_search_path_spec" | $GREP ';[c-zC-Z]:/' >/dev/null; then + # It is most probably a Windows format PATH. + sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e 's/;/ /g'` + else + sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"` + fi + # FIXME: find the short name or the path components, as spaces are + # common. (e.g. "Program Files" -> "PROGRA~1") + ;; + esac + + # DLL is installed to $(libdir)/../bin by postinstall_cmds + postinstall_cmds='base_file=`basename \$file`~ + dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\$base_file'\''i; echo \$dlname'\''`~ + dldir=$destdir/`dirname \$dlpath`~ + test -d \$dldir || mkdir -p \$dldir~ + $install_prog $dir/$dlname \$dldir/$dlname' + postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~ + dlpath=$dir/\$dldll~ + $RM \$dlpath' + shlibpath_overrides_runpath=yes + dynamic_linker='Win32 link.exe' + ;; + + *) + # Assume MSVC and ICC wrapper + library_names_spec='$libname`echo $release | $SED -e 's/[.]/-/g'`$versuffix$shared_ext $libname.lib' + dynamic_linker='Win32 ld.exe' + ;; + esac + # FIXME: first we should search . and the directory the executable is in + shlibpath_var=PATH + ;; + +darwin* | rhapsody*) + dynamic_linker="$host_os dyld" + version_type=darwin + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$major$shared_ext $libname$shared_ext' + soname_spec='$libname$release$major$shared_ext' + shlibpath_overrides_runpath=yes + shlibpath_var=DYLD_LIBRARY_PATH + shrext_cmds='`test .$module = .yes && echo .so || echo .dylib`' + + sys_lib_dlsearch_path_spec='/usr/local/lib /lib /usr/lib' + ;; + +dgux*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + shlibpath_var=LD_LIBRARY_PATH + ;; + +freebsd* | dragonfly* | midnightbsd*) + # DragonFly does not have aout. When/if they implement a new + # versioning mechanism, adjust this. + if test -x /usr/bin/objformat; then + objformat=`/usr/bin/objformat` + else + case $host_os in + freebsd[23].*) objformat=aout ;; + *) objformat=elf ;; + esac + fi + version_type=freebsd-$objformat + case $version_type in + freebsd-elf*) + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + need_version=no + need_lib_prefix=no + ;; + freebsd-*) + library_names_spec='$libname$release$shared_ext$versuffix $libname$shared_ext$versuffix' + need_version=yes + ;; + esac + shlibpath_var=LD_LIBRARY_PATH + case $host_os in + freebsd2.*) + shlibpath_overrides_runpath=yes + ;; + freebsd3.[01]* | freebsdelf3.[01]*) + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + ;; + freebsd3.[2-9]* | freebsdelf3.[2-9]* | \ + freebsd4.[0-5] | freebsdelf4.[0-5] | freebsd4.1.1 | freebsdelf4.1.1) + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + ;; + *) # from 4.6 on, and DragonFly + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + ;; + esac + ;; + +haiku*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + dynamic_linker="$host_os runtime_loader" + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + shlibpath_var=LIBRARY_PATH + shlibpath_overrides_runpath=no + sys_lib_dlsearch_path_spec='/boot/home/config/lib /boot/common/lib /boot/system/lib' + hardcode_into_libs=yes + ;; + +hpux9* | hpux10* | hpux11*) + # Give a soname corresponding to the major version so that dld.sl refuses to + # link against other versions. + version_type=sunos + need_lib_prefix=no + need_version=no + case $host_cpu in + ia64*) + shrext_cmds='.so' + hardcode_into_libs=yes + dynamic_linker="$host_os dld.so" + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes # Unless +noenvvar is specified. + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + if test 32 = "$HPUX_IA64_MODE"; then + sys_lib_search_path_spec="/usr/lib/hpux32 /usr/local/lib/hpux32 /usr/local/lib" + sys_lib_dlsearch_path_spec=/usr/lib/hpux32 + else + sys_lib_search_path_spec="/usr/lib/hpux64 /usr/local/lib/hpux64" + sys_lib_dlsearch_path_spec=/usr/lib/hpux64 + fi + ;; + hppa*64*) + shrext_cmds='.sl' + hardcode_into_libs=yes + dynamic_linker="$host_os dld.sl" + shlibpath_var=LD_LIBRARY_PATH # How should we handle SHLIB_PATH + shlibpath_overrides_runpath=yes # Unless +noenvvar is specified. + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + sys_lib_search_path_spec="/usr/lib/pa20_64 /usr/ccs/lib/pa20_64" + sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec + ;; + *) + shrext_cmds='.sl' + dynamic_linker="$host_os dld.sl" + shlibpath_var=SHLIB_PATH + shlibpath_overrides_runpath=no # +s is required to enable SHLIB_PATH + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + ;; + esac + # HP-UX runs *really* slowly unless shared libraries are mode 555, ... + postinstall_cmds='chmod 555 $lib' + # or fails outright, so override atomically: + install_override_mode=555 + ;; + +interix[3-9]*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + dynamic_linker='Interix 3.x ld.so.1 (PE, like ELF)' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + ;; + +irix5* | irix6* | nonstopux*) + case $host_os in + nonstopux*) version_type=nonstopux ;; + *) + if test yes = "$lt_cv_prog_gnu_ld"; then + version_type=linux # correct to gnu/linux during the next big refactor + else + version_type=irix + fi ;; + esac + need_lib_prefix=no + need_version=no + soname_spec='$libname$release$shared_ext$major' + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$release$shared_ext $libname$shared_ext' + case $host_os in + irix5* | nonstopux*) + libsuff= shlibsuff= + ;; + *) + case $LD in # libtool.m4 will add one of these switches to LD + *-32|*"-32 "|*-melf32bsmip|*"-melf32bsmip ") + libsuff= shlibsuff= libmagic=32-bit;; + *-n32|*"-n32 "|*-melf32bmipn32|*"-melf32bmipn32 ") + libsuff=32 shlibsuff=N32 libmagic=N32;; + *-64|*"-64 "|*-melf64bmip|*"-melf64bmip ") + libsuff=64 shlibsuff=64 libmagic=64-bit;; + *) libsuff= shlibsuff= libmagic=never-match;; + esac + ;; + esac + shlibpath_var=LD_LIBRARY${shlibsuff}_PATH + shlibpath_overrides_runpath=no + sys_lib_search_path_spec="/usr/lib$libsuff /lib$libsuff /usr/local/lib$libsuff" + sys_lib_dlsearch_path_spec="/usr/lib$libsuff /lib$libsuff" + hardcode_into_libs=yes + ;; + +# No shared lib support for Linux oldld, aout, or coff. +linux*oldld* | linux*aout* | linux*coff*) + dynamic_linker=no + ;; + +linux*android*) + version_type=none # Android doesn't support versioned libraries. + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext' + soname_spec='$libname$release$shared_ext' + finish_cmds= + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + + # This implies no fast_install, which is unacceptable. + # Some rework will be needed to allow for fast_install + # before this can be enabled. + hardcode_into_libs=yes + + dynamic_linker='Android linker' + # Don't embed -rpath directories since the linker doesn't support them. + hardcode_libdir_flag_spec_FC='-L$libdir' + ;; + +# This must be glibc/ELF. +linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + finish_cmds='PATH="\$PATH:/sbin" ldconfig -n $libdir' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + + # Some binutils ld are patched to set DT_RUNPATH + if test ${lt_cv_shlibpath_overrides_runpath+y} +then : + printf %s "(cached) " >&6 +else $as_nop + lt_cv_shlibpath_overrides_runpath=no + save_LDFLAGS=$LDFLAGS + save_libdir=$libdir + eval "libdir=/foo; wl=\"$lt_prog_compiler_wl_FC\"; \ + LDFLAGS=\"\$LDFLAGS $hardcode_libdir_flag_spec_FC\"" + cat > conftest.$ac_ext <<_ACEOF + program main + + end +_ACEOF +if ac_fn_fc_try_link "$LINENO" +then : + if ($OBJDUMP -p conftest$ac_exeext) 2>/dev/null | grep "RUNPATH.*$libdir" >/dev/null +then : + lt_cv_shlibpath_overrides_runpath=yes +fi +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + LDFLAGS=$save_LDFLAGS + libdir=$save_libdir + +fi + + shlibpath_overrides_runpath=$lt_cv_shlibpath_overrides_runpath + + # This implies no fast_install, which is unacceptable. + # Some rework will be needed to allow for fast_install + # before this can be enabled. + hardcode_into_libs=yes + + # Ideally, we could use ldconfig to report *all* directores which are + # searched for libraries, however this is still not possible. Aside from not + # being certain /sbin/ldconfig is available, command + # 'ldconfig -N -X -v | grep ^/' on 64bit Fedora does not report /usr/lib64, + # even though it is searched at run-time. Try to do the best guess by + # appending ld.so.conf contents (and includes) to the search path. + if test -f /etc/ld.so.conf; then + lt_ld_extra=`awk '/^include / { system(sprintf("cd /etc; cat %s 2>/dev/null", \$2)); skip = 1; } { if (!skip) print \$0; skip = 0; }' < /etc/ld.so.conf | $SED -e 's/#.*//;/^[ ]*hwcap[ ]/d;s/[:, ]/ /g;s/=[^=]*$//;s/=[^= ]* / /g;s/"//g;/^$/d' | tr '\n' ' '` + sys_lib_dlsearch_path_spec="/lib /usr/lib $lt_ld_extra" + fi + + # We used to test for /lib/ld.so.1 and disable shared libraries on + # powerpc, because MkLinux only supported shared libraries with the + # GNU dynamic linker. Since this was broken with cross compilers, + # most powerpc-linux boxes support dynamic linking these days and + # people can always --disable-shared, the test was removed, and we + # assume the GNU/Linux dynamic linker is in use. + dynamic_linker='GNU/Linux ld.so' + ;; + +netbsdelf*-gnu) + version_type=linux + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + dynamic_linker='NetBSD ld.elf_so' + ;; + +netbsd*) + version_type=sunos + need_lib_prefix=no + need_version=no + if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then + library_names_spec='$libname$release$shared_ext$versuffix $libname$shared_ext$versuffix' + finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir' + dynamic_linker='NetBSD (a.out) ld.so' + else + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + dynamic_linker='NetBSD ld.elf_so' + fi + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + ;; + +newsos6) + version_type=linux # correct to gnu/linux during the next big refactor + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + ;; + +*nto* | *qnx*) + version_type=qnx + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + dynamic_linker='ldqnx.so' + ;; + +openbsd* | bitrig*) + version_type=sunos + sys_lib_dlsearch_path_spec=/usr/lib + need_lib_prefix=no + if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`"; then + need_version=no + else + need_version=yes + fi + library_names_spec='$libname$release$shared_ext$versuffix $libname$shared_ext$versuffix' + finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + ;; + +os2*) + libname_spec='$name' + version_type=windows + shrext_cmds=.dll + need_version=no + need_lib_prefix=no + # OS/2 can only load a DLL with a base name of 8 characters or less. + soname_spec='`test -n "$os2dllname" && libname="$os2dllname"; + v=$($ECHO $release$versuffix | tr -d .-); + n=$($ECHO $libname | cut -b -$((8 - ${#v})) | tr . _); + $ECHO $n$v`$shared_ext' + library_names_spec='${libname}_dll.$libext' + dynamic_linker='OS/2 ld.exe' + shlibpath_var=BEGINLIBPATH + sys_lib_search_path_spec="/lib /usr/lib /usr/local/lib" + sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec + postinstall_cmds='base_file=`basename \$file`~ + dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\$base_file'\''i; $ECHO \$dlname'\''`~ + dldir=$destdir/`dirname \$dlpath`~ + test -d \$dldir || mkdir -p \$dldir~ + $install_prog $dir/$dlname \$dldir/$dlname~ + chmod a+x \$dldir/$dlname~ + if test -n '\''$stripme'\'' && test -n '\''$striplib'\''; then + eval '\''$striplib \$dldir/$dlname'\'' || exit \$?; + fi' + postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; $ECHO \$dlname'\''`~ + dlpath=$dir/\$dldll~ + $RM \$dlpath' + ;; + +osf3* | osf4* | osf5*) + version_type=osf + need_lib_prefix=no + need_version=no + soname_spec='$libname$release$shared_ext$major' + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + shlibpath_var=LD_LIBRARY_PATH + sys_lib_search_path_spec="/usr/shlib /usr/ccs/lib /usr/lib/cmplrs/cc /usr/lib /usr/local/lib /var/shlib" + sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec + ;; + +rdos*) + dynamic_linker=no + ;; + +solaris*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + # ldd complains unless libraries are executable + postinstall_cmds='chmod +x $lib' + ;; + +sunos4*) + version_type=sunos + library_names_spec='$libname$release$shared_ext$versuffix $libname$shared_ext$versuffix' + finish_cmds='PATH="\$PATH:/usr/etc" ldconfig $libdir' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + if test yes = "$with_gnu_ld"; then + need_lib_prefix=no + fi + need_version=yes + ;; + +sysv4 | sysv4.3*) + version_type=linux # correct to gnu/linux during the next big refactor + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + shlibpath_var=LD_LIBRARY_PATH + case $host_vendor in + sni) + shlibpath_overrides_runpath=no + need_lib_prefix=no + runpath_var=LD_RUN_PATH + ;; + siemens) + need_lib_prefix=no + ;; + motorola) + need_lib_prefix=no + need_version=no + shlibpath_overrides_runpath=no + sys_lib_search_path_spec='/lib /usr/lib /usr/ccs/lib' + ;; + esac + ;; + +sysv4*MP*) + if test -d /usr/nec; then + version_type=linux # correct to gnu/linux during the next big refactor + library_names_spec='$libname$shared_ext.$versuffix $libname$shared_ext.$major $libname$shared_ext' + soname_spec='$libname$shared_ext.$major' + shlibpath_var=LD_LIBRARY_PATH + fi + ;; + +sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*) + version_type=sco + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + if test yes = "$with_gnu_ld"; then + sys_lib_search_path_spec='/usr/local/lib /usr/gnu/lib /usr/ccs/lib /usr/lib /lib' + else + sys_lib_search_path_spec='/usr/ccs/lib /usr/lib' + case $host_os in + sco3.2v5*) + sys_lib_search_path_spec="$sys_lib_search_path_spec /lib" + ;; + esac + fi + sys_lib_dlsearch_path_spec='/usr/lib' + ;; + +tpf*) + # TPF is a cross-target only. Preferred cross-host = GNU/Linux. + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + ;; + +uts4*) + version_type=linux # correct to gnu/linux during the next big refactor + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + shlibpath_var=LD_LIBRARY_PATH + ;; + +*) + dynamic_linker=no + ;; +esac +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $dynamic_linker" >&5 +printf "%s\n" "$dynamic_linker" >&6; } +test no = "$dynamic_linker" && can_build_shared=no + +variables_saved_for_relink="PATH $shlibpath_var $runpath_var" +if test yes = "$GCC"; then + variables_saved_for_relink="$variables_saved_for_relink GCC_EXEC_PREFIX COMPILER_PATH LIBRARY_PATH" +fi + +if test set = "${lt_cv_sys_lib_search_path_spec+set}"; then + sys_lib_search_path_spec=$lt_cv_sys_lib_search_path_spec +fi + +if test set = "${lt_cv_sys_lib_dlsearch_path_spec+set}"; then + sys_lib_dlsearch_path_spec=$lt_cv_sys_lib_dlsearch_path_spec +fi + +# remember unaugmented sys_lib_dlsearch_path content for libtool script decls... +configure_time_dlsearch_path=$sys_lib_dlsearch_path_spec + +# ... but it needs LT_SYS_LIBRARY_PATH munging for other configure-time code +func_munge_path_list sys_lib_dlsearch_path_spec "$LT_SYS_LIBRARY_PATH" + +# to be used as default LT_SYS_LIBRARY_PATH value in generated libtool +configure_time_lt_sys_library_path=$LT_SYS_LIBRARY_PATH + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking how to hardcode library paths into programs" >&5 +printf %s "checking how to hardcode library paths into programs... " >&6; } +hardcode_action_FC= +if test -n "$hardcode_libdir_flag_spec_FC" || + test -n "$runpath_var_FC" || + test yes = "$hardcode_automatic_FC"; then + + # We can hardcode non-existent directories. + if test no != "$hardcode_direct_FC" && + # If the only mechanism to avoid hardcoding is shlibpath_var, we + # have to relink, otherwise we might link with an installed library + # when we should be linking with a yet-to-be-installed one + ## test no != "$_LT_TAGVAR(hardcode_shlibpath_var, FC)" && + test no != "$hardcode_minus_L_FC"; then + # Linking always hardcodes the temporary library directory. + hardcode_action_FC=relink + else + # We can link without hardcoding, and we can hardcode nonexisting dirs. + hardcode_action_FC=immediate + fi +else + # We cannot hardcode anything, or else we can only hardcode existing + # directories. + hardcode_action_FC=unsupported +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $hardcode_action_FC" >&5 +printf "%s\n" "$hardcode_action_FC" >&6; } + +if test relink = "$hardcode_action_FC" || + test yes = "$inherit_rpath_FC"; then + # Fast installation is not supported + enable_fast_install=no +elif test yes = "$shlibpath_overrides_runpath" || + test no = "$enable_shared"; then + # Fast installation is not necessary + enable_fast_install=needless +fi + + + + + + + + fi # test -n "$compiler" + + GCC=$lt_save_GCC + CC=$lt_save_CC + CFLAGS=$lt_save_CFLAGS +fi # test yes != "$_lt_disable_FC" + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + + + + + + + + + + ac_config_commands="$ac_config_commands libtool" + + + + +# Only expand once: + + + +# Autoupdate added the next two lines to ensure that your configure +# script's behavior did not change. They are probably safe to remove. + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for egrep" >&5 +printf %s "checking for egrep... " >&6; } +if test ${ac_cv_path_EGREP+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if echo a | $GREP -E '(a|b)' >/dev/null 2>&1 + then ac_cv_path_EGREP="$GREP -E" + else + if test -z "$EGREP"; then + ac_path_EGREP_found=false + # Loop through the user's path and test for each of PROGNAME-LIST + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_prog in egrep + do + for ac_exec_ext in '' $ac_executable_extensions; do + ac_path_EGREP="$as_dir$ac_prog$ac_exec_ext" + as_fn_executable_p "$ac_path_EGREP" || continue +# Check for GNU ac_path_EGREP and select it if it is found. + # Check for GNU $ac_path_EGREP +case `"$ac_path_EGREP" --version 2>&1` in +*GNU*) + ac_cv_path_EGREP="$ac_path_EGREP" ac_path_EGREP_found=:;; +*) + ac_count=0 + printf %s 0123456789 >"conftest.in" + while : + do + cat "conftest.in" "conftest.in" >"conftest.tmp" + mv "conftest.tmp" "conftest.in" + cp "conftest.in" "conftest.nl" + printf "%s\n" 'EGREP' >> "conftest.nl" + "$ac_path_EGREP" 'EGREP$' < "conftest.nl" >"conftest.out" 2>/dev/null || break + diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break + as_fn_arith $ac_count + 1 && ac_count=$as_val + if test $ac_count -gt ${ac_path_EGREP_max-0}; then + # Best one so far, save it but keep looking for a better one + ac_cv_path_EGREP="$ac_path_EGREP" + ac_path_EGREP_max=$ac_count + fi + # 10*(2^10) chars as input seems more than enough + test $ac_count -gt 10 && break + done + rm -f conftest.in conftest.tmp conftest.nl conftest.out;; +esac + + $ac_path_EGREP_found && break 3 + done + done + done +IFS=$as_save_IFS + if test -z "$ac_cv_path_EGREP"; then + as_fn_error $? "no acceptable egrep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5 + fi +else + ac_cv_path_EGREP=$EGREP +fi + + fi +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_EGREP" >&5 +printf "%s\n" "$ac_cv_path_EGREP" >&6; } + EGREP="$ac_cv_path_EGREP" + + + + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for C/C++ restrict keyword" >&5 +printf %s "checking for C/C++ restrict keyword... " >&6; } +if test ${ac_cv_c_restrict+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_cv_c_restrict=no + # Put '__restrict__' first, to avoid problems with glibc and non-GCC; see: + # https://lists.gnu.org/archive/html/bug-autoconf/2016-02/msg00006.html + # Put 'restrict' last, because C++ lacks it. + for ac_kw in __restrict__ __restrict _Restrict restrict; do + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +typedef int *int_ptr; + int foo (int_ptr $ac_kw ip) { return ip[0]; } + int bar (int [$ac_kw]); /* Catch GCC bug 14050. */ + int bar (int ip[$ac_kw]) { return ip[0]; } + +int +main (void) +{ +int s[1]; + int *$ac_kw t = s; + t[0] = 0; + return foo (t) + bar (t); + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ac_cv_c_restrict=$ac_kw +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + test "$ac_cv_c_restrict" != no && break + done + +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_restrict" >&5 +printf "%s\n" "$ac_cv_c_restrict" >&6; } + + case $ac_cv_c_restrict in + restrict) ;; + no) printf "%s\n" "#define restrict /**/" >>confdefs.h + ;; + *) printf "%s\n" "#define restrict $ac_cv_c_restrict" >>confdefs.h + ;; + esac + + +# Check if bash is available +# Extract the first word of "bash", so it can be a program name with args. +set dummy bash; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_path_REALBASH+y} +then : + printf %s "(cached) " >&6 +else $as_nop + case $REALBASH in + [\\/]* | ?:[\\/]*) + ac_cv_path_REALBASH="$REALBASH" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +as_dummy="/bin:$PATH" +for as_dir in $as_dummy +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_path_REALBASH="$as_dir$ac_word$ac_exec_ext" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + ;; +esac +fi +REALBASH=$ac_cv_path_REALBASH +if test -n "$REALBASH"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $REALBASH" >&5 +printf "%s\n" "$REALBASH" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + + +# Record git version +# Extract the first word of "git", so it can be a program name with args. +set dummy git; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_path_gitcommand+y} +then : + printf %s "(cached) " >&6 +else $as_nop + case $gitcommand in + [\\/]* | ?:[\\/]*) + ac_cv_path_gitcommand="$gitcommand" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_path_gitcommand="$as_dir$ac_word$ac_exec_ext" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + ;; +esac +fi +gitcommand=$ac_cv_path_gitcommand +if test -n "$gitcommand"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $gitcommand" >&5 +printf "%s\n" "$gitcommand" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + +if test -f $srcdir/STARPU-REVISION ; then + cp $srcdir/STARPU-REVISION . +elif test "$gitcommand" = "" ; then + echo "unknown" > ./STARPU-REVISION +else + bdir=$PWD + cd $srcdir + git log -n 1 --pretty="%H%d" . > $bdir/STARPU-REVISION_tmp + cd $bdir + if test -s ./STARPU-REVISION_tmp ; then + mv ./STARPU-REVISION_tmp ./STARPU-REVISION + else + echo "unknown" > ./STARPU-REVISION + fi +fi + + if test "x$cross_compiling" = "xyes"; then + STARPU_CROSS_COMPILING_TRUE= + STARPU_CROSS_COMPILING_FALSE='#' +else + STARPU_CROSS_COMPILING_TRUE='#' + STARPU_CROSS_COMPILING_FALSE= +fi + + +############################################################################### +# # +# MPI compilers # +# # +############################################################################### + +#Check MPICC +if test x$enable_simgrid = xyes ; then + DEFAULT_MPICC=smpicc +else + DEFAULT_MPICC=mpicc +fi + +# Check whether --with-mpicc was given. +if test ${with_mpicc+y} +then : + withval=$with_mpicc; DEFAULT_MPICC=$withval +fi + +case $DEFAULT_MPICC in + /*) mpicc_path="$DEFAULT_MPICC" ;; + *) # Extract the first word of "$DEFAULT_MPICC", so it can be a program name with args. +set dummy $DEFAULT_MPICC; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_path_mpicc_path+y} +then : + printf %s "(cached) " >&6 +else $as_nop + case $mpicc_path in + [\\/]* | ?:[\\/]*) + ac_cv_path_mpicc_path="$mpicc_path" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +as_dummy="$simgrid_dir/bin:$PATH" +for as_dir in $as_dummy +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_path_mpicc_path="$as_dir$ac_word$ac_exec_ext" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + test -z "$ac_cv_path_mpicc_path" && ac_cv_path_mpicc_path="no" + ;; +esac +fi +mpicc_path=$ac_cv_path_mpicc_path +if test -n "$mpicc_path"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $mpicc_path" >&5 +printf "%s\n" "$mpicc_path" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + ;; +esac +# We test if the MPICC compiler exists +if test ! -x $mpicc_path; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: The mpicc compiler '$mpicc_path' does not have the execute permission" >&5 +printf "%s\n" "The mpicc compiler '$mpicc_path' does not have the execute permission" >&6; } + mpicc_path=no +fi + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether mpicc is available" >&5 +printf %s "checking whether mpicc is available... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $mpicc_path" >&5 +printf "%s\n" "$mpicc_path" >&6; } +MPICC=$mpicc_path + + +if test x$mpicc_path != xno ; then + MPIPATH=$(dirname $mpicc_path):$PATH +else + MPIPATH=$PATH +fi + +#Check MPICXX/MPIC++ +if test x$enable_simgrid = xyes ; then + DEFAULT_MPICXX=smpicxx +else + DEFAULT_MPICXX=mpicxx +fi + +# Check whether --with-mpicxx was given. +if test ${with_mpicxx+y} +then : + withval=$with_mpicxx; DEFAULT_MPICXX=$withval +fi + +case $DEFAULT_MPICXX in + /*) mpicxx_path="$DEFAULT_MPICXX" ;; + *) # Extract the first word of "$DEFAULT_MPICXX", so it can be a program name with args. +set dummy $DEFAULT_MPICXX; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_path_mpicxx_path+y} +then : + printf %s "(cached) " >&6 +else $as_nop + case $mpicxx_path in + [\\/]* | ?:[\\/]*) + ac_cv_path_mpicxx_path="$mpicxx_path" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $MPIPATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_path_mpicxx_path="$as_dir$ac_word$ac_exec_ext" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + test -z "$ac_cv_path_mpicxx_path" && ac_cv_path_mpicxx_path="no" + ;; +esac +fi +mpicxx_path=$ac_cv_path_mpicxx_path +if test -n "$mpicxx_path"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $mpicxx_path" >&5 +printf "%s\n" "$mpicxx_path" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + ;; +esac + +# try with mpic++ if mpicxx was not found +if test x$mpicxx_path = xno ; then + DEFAULT_MPICXX=mpic++ + # Extract the first word of "$DEFAULT_MPICXX", so it can be a program name with args. +set dummy $DEFAULT_MPICXX; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_path_mpicxx_path+y} +then : + printf %s "(cached) " >&6 +else $as_nop + case $mpicxx_path in + [\\/]* | ?:[\\/]*) + ac_cv_path_mpicxx_path="$mpicxx_path" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $MPIPATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_path_mpicxx_path="$as_dir$ac_word$ac_exec_ext" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + test -z "$ac_cv_path_mpicxx_path" && ac_cv_path_mpicxx_path="no" + ;; +esac +fi +mpicxx_path=$ac_cv_path_mpicxx_path +if test -n "$mpicxx_path"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $mpicxx_path" >&5 +printf "%s\n" "$mpicxx_path" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + +fi + +# We test if the MPICXX/MPIC++ compiler exists +if test ! -x $mpicxx_path; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: The mpicxx compiler '$mpicxx_path' does not have the execute permission" >&5 +printf "%s\n" "The mpicxx compiler '$mpicxx_path' does not have the execute permission" >&6; } + mpicxx_path=no +fi + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether mpicxx is available" >&5 +printf %s "checking whether mpicxx is available... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $mpicxx_path" >&5 +printf "%s\n" "$mpicxx_path" >&6; } +MPICXX=$mpicxx_path + + +# Check if mpiexec is available +if test x$enable_simgrid = xyes ; then + DEFAULT_MPIEXEC=smpirun + +# Check whether --with-smpirun was given. +if test ${with_smpirun+y} +then : + withval=$with_smpirun; DEFAULT_MPIEXEC=$withval +fi + +else + DEFAULT_MPIEXEC=mpiexec + +# Check whether --with-mpiexec was given. +if test ${with_mpiexec+y} +then : + withval=$with_mpiexec; DEFAULT_MPIEXEC=$withval +fi + +fi + +case $DEFAULT_MPIEXEC in + /*) mpiexec_path="$DEFAULT_MPIEXEC" ;; + *) # Extract the first word of "$DEFAULT_MPIEXEC", so it can be a program name with args. +set dummy $DEFAULT_MPIEXEC; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_path_mpiexec_path+y} +then : + printf %s "(cached) " >&6 +else $as_nop + case $mpiexec_path in + [\\/]* | ?:[\\/]*) + ac_cv_path_mpiexec_path="$mpiexec_path" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $MPIPATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_path_mpiexec_path="$as_dir$ac_word$ac_exec_ext" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + test -z "$ac_cv_path_mpiexec_path" && ac_cv_path_mpiexec_path="no" + ;; +esac +fi +mpiexec_path=$ac_cv_path_mpiexec_path +if test -n "$mpiexec_path"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $mpiexec_path" >&5 +printf "%s\n" "$mpiexec_path" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + +esac +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether mpiexec is available" >&5 +printf %s "checking whether mpiexec is available... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $mpiexec_path" >&5 +printf "%s\n" "$mpiexec_path" >&6; } + +# We test if MPIEXEC exists +if test ! -x $mpiexec_path; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: The mpiexec script '$mpiexec_path' is not valid" >&5 +printf "%s\n" "The mpiexec script '$mpiexec_path' is not valid" >&6; } + default_enable_mpi_check=no + mpiexec_path="" +fi +MPIEXEC=$mpiexec_path + + +############################################################################### +# # +# MPI # +# # +############################################################################### + +# Check whether --enable-mpi was given. +if test ${enable_mpi+y} +then : + enableval=$enable_mpi; enable_mpi=$enableval +else $as_nop + enable_mpi=$default_enable_mpi +fi + + +if test x$enable_mpi = xmaybe ; then + if test -x "$mpicc_path"; then + enable_mpi=yes + else + enable_mpi=no + fi +fi + +# in case MPI was explicitly required, but mpicc is not available, this is an error +if test x$enable_mpi = xyes ; then + if test ! -x "$mpicc_path"; then + as_fn_error $? "Compiler MPI '$mpicc_path' not valid" "$LINENO" 5 + fi + + OLD_CC=$CC + CC=$mpicc_path + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + #include + #include + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + +printf "%s\n" "#define STARPU_HAVE_MPI_EXT 1" >>confdefs.h + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + + ac_fn_c_check_func "$LINENO" "MPI_Comm_create_group" "ac_cv_func_MPI_Comm_create_group" +if test "x$ac_cv_func_MPI_Comm_create_group" = xyes +then : + +printf "%s\n" "#define STARPU_HAVE_MPI_COMM_CREATE_GROUP 1" >>confdefs.h + +fi + + CC=$OLD_CC +fi + +build_mpi_lib=$enable_mpi + +# Check whether --enable-mpi-minimal-tests was given. +if test ${enable_mpi_minimal_tests+y} +then : + enableval=$enable_mpi_minimal_tests; enable_mpi_minimal_tests=$enableval +else $as_nop + enable_mpi_minimal_tests=no +fi + + + if test x$enable_mpi_minimal_tests = xyes; then + STARPU_MPI_MINIMAL_TESTS_TRUE= + STARPU_MPI_MINIMAL_TESTS_FALSE='#' +else + STARPU_MPI_MINIMAL_TESTS_TRUE='#' + STARPU_MPI_MINIMAL_TESTS_FALSE= +fi + + + +############################################################################### +# # +# NEW MADELEINE # +# # +############################################################################### + +# Check whether --enable-nmad was given. +if test ${enable_nmad+y} +then : + enableval=$enable_nmad; enable_nmad=$enableval +else $as_nop + enable_nmad=no +fi + + +build_nmad_lib=no +CC_OR_MPICC=$cc_or_mpicc + +#We can only build StarPU MPI Library if User wants it and MPI is available +if test x$enable_mpi = xyes -a x$enable_nmad = xyes ; then + build_nmad_lib=yes + build_mpi_lib=no + +pkg_failed=no +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for NMAD" >&5 +printf %s "checking for NMAD... " >&6; } + +if test -n "$PKG_CONFIG"; then + if test -n "$NMAD_CFLAGS"; then + pkg_cv_NMAD_CFLAGS="$NMAD_CFLAGS" + else + if test -n "$PKG_CONFIG" && \ + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"nmad\""; } >&5 + ($PKG_CONFIG --exists --print-errors "nmad") 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + pkg_cv_NMAD_CFLAGS=`$PKG_CONFIG --cflags "nmad" 2>/dev/null` +else + pkg_failed=yes +fi + fi +else + pkg_failed=untried +fi +if test -n "$PKG_CONFIG"; then + if test -n "$NMAD_LIBS"; then + pkg_cv_NMAD_LIBS="$NMAD_LIBS" + else + if test -n "$PKG_CONFIG" && \ + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"nmad\""; } >&5 + ($PKG_CONFIG --exists --print-errors "nmad") 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + pkg_cv_NMAD_LIBS=`$PKG_CONFIG --libs "nmad" 2>/dev/null` +else + pkg_failed=yes +fi + fi +else + pkg_failed=untried +fi + + + +if test $pkg_failed = yes; then + +if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then + _pkg_short_errors_supported=yes +else + _pkg_short_errors_supported=no +fi + if test $_pkg_short_errors_supported = yes; then + NMAD_PKG_ERRORS=`$PKG_CONFIG --short-errors --errors-to-stdout --print-errors "nmad"` + else + NMAD_PKG_ERRORS=`$PKG_CONFIG --errors-to-stdout --print-errors "nmad"` + fi + # Put the nasty error message in config.log where it belongs + echo "$NMAD_PKG_ERRORS" >&5 + + as_fn_error $? "Package requirements (nmad) were not met: + +$NMAD_PKG_ERRORS + +Consider adjusting the PKG_CONFIG_PATH environment variable if you +installed software in a non-standard prefix. + +Alternatively, you may set the environment variables NMAD_CFLAGS +and NMAD_LIBS to avoid the need to call pkg-config. +See the pkg-config man page for more details. +" "$LINENO" 5 +elif test $pkg_failed = untried; then + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "The pkg-config script could not be found or is too old. Make sure it +is in your PATH or set the PKG_CONFIG environment variable to the full +path to pkg-config. + +Alternatively, you may set the environment variables NMAD_CFLAGS +and NMAD_LIBS to avoid the need to call pkg-config. +See the pkg-config man page for more details. + +To get pkg-config, see . +See \`config.log' for more details" "$LINENO" 5; } +else + NMAD_CFLAGS=$pkg_cv_NMAD_CFLAGS + NMAD_LIBS=$pkg_cv_NMAD_LIBS + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + : +fi + save_LIBS="$LIBS" + save_CFLAGS="$CFLAGS" + CFLAGS="$CFLAGS $NMAD_CFLAGS" + LIBS="$LIBS $NMAD_LIBS" + ac_fn_c_check_func "$LINENO" "piom_ltask_set_bound_thread_os_indexes" "ac_cv_func_piom_ltask_set_bound_thread_os_indexes" +if test "x$ac_cv_func_piom_ltask_set_bound_thread_os_indexes" = xyes +then : + printf "%s\n" "#define HAVE_PIOM_LTASK_SET_BOUND_THREAD_OS_INDEXES 1" >>confdefs.h + +fi + + ac_fn_c_check_func "$LINENO" "nm_trace_add_synchro_point" "ac_cv_func_nm_trace_add_synchro_point" +if test "x$ac_cv_func_nm_trace_add_synchro_point" = xyes +then : + printf "%s\n" "#define HAVE_NM_TRACE_ADD_SYNCHRO_POINT 1" >>confdefs.h + +fi + + CFLAGS="$save_CFLAGS" + LIBS="$save_LIBS" +else + build_nmad_lib=no +fi + +# If MadMPI is used, MadMPI can't be built with PIOman (we don't want communication progression to be done in both StarPU and MadMPI): +if test x$enable_mpi = xyes -a x$build_nmad_lib = xno -a ! -z "`$mpicc_path --showme|grep pioman`"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: Using MPI backend of StarPU with MadMPI built with PIOman: disabling PIOman's progression." >&5 +printf "%s\n" "$as_me: WARNING: Using MPI backend of StarPU with MadMPI built with PIOman: disabling PIOman's progression." >&2;} + +printf "%s\n" "#define HAVE_PIOMAN 1" >>confdefs.h + +fi + +############################################################################### +# # +# MPI Master Slave # +# # +############################################################################### + +# Check whether --enable-mpi-master-slave was given. +if test ${enable_mpi_master_slave+y} +then : + enableval=$enable_mpi_master_slave; use_mpi_master_slave=$enableval +else $as_nop + use_mpi_master_slave=no +fi + + +if test x$enable_simgrid = xyes; then + if test x$use_mpi_master_slave = xyes; then + as_fn_error $? "MPI Master Slave not supported with simgrid" "$LINENO" 5 + fi + use_mpi_master_slave=no +fi + +# in case it is explicitly required, but mpicc is not available, this is an error +if test x$use_mpi_master_slave = xyes -a ! -x "$mpicc_path"; then + as_fn_error $? "Compiler MPI '$mpicc_path' not valid" "$LINENO" 5 +fi + +#We can only build MPI Master Slave if User wants it and MPI compiler are available +if test x$use_mpi_master_slave = xyes -a x$mpicc_path != xno -a x${mpicxx_path} != xno ; then + build_mpi_master_slave=yes +else + build_mpi_master_slave=no +fi + +#users cannot use both at the same time +if test x$build_mpi_master_slave = xyes -a x$enable_mpi = xyes; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: StarPU-MPI and MPI Master-Slave cannot be used at the same time ! Disabling StarPU-MPI..." >&5 +printf "%s\n" "$as_me: WARNING: StarPU-MPI and MPI Master-Slave cannot be used at the same time ! Disabling StarPU-MPI..." >&2;} + build_mpi_lib=no + build_nmad_lib=no + enable_mpi=no +fi + +if test x$build_mpi_master_slave = xyes; then + +printf "%s\n" "#define STARPU_USE_MPI_MASTER_SLAVE 1" >>confdefs.h + + CC=$mpicc_path + CCLD=$mpicc_path + CXX=$mpicxx_path + CXXLD=mpicxx_path +fi + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the MPI master-slave mode should be enabled" >&5 +printf %s "checking whether the MPI master-slave mode should be enabled... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $build_mpi_master_slave" >&5 +printf "%s\n" "$build_mpi_master_slave" >&6; } + if test x$build_mpi_master_slave = xyes; then + STARPU_USE_MPI_MASTER_SLAVE_TRUE= + STARPU_USE_MPI_MASTER_SLAVE_FALSE='#' +else + STARPU_USE_MPI_MASTER_SLAVE_TRUE='#' + STARPU_USE_MPI_MASTER_SLAVE_FALSE= +fi + + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking maximum number of MPI master-slave devices" >&5 +printf %s "checking maximum number of MPI master-slave devices... " >&6; } +# Check whether --enable-maxmpidev was given. +if test ${enable_maxmpidev+y} +then : + enableval=$enable_maxmpidev; nmaxmpidev=$enableval +else $as_nop + + if test x$build_mpi_master_slave = xyes; then + nmaxmpidev=4 + else + nmaxmpidev=0 + fi + +fi + +if test x$nmaxmpidev = x -o x$nmaxmpidev = xyes +then + as_fn_error $? "The --enable-maxmpidev option needs to be given a number" "$LINENO" 5 +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $nmaxmpidev" >&5 +printf "%s\n" "$nmaxmpidev" >&6; } + +printf "%s\n" "#define STARPU_MAXMPIDEVS $nmaxmpidev" >>confdefs.h + + +############################################################################### +# # +# TCP/IP Master Slave # +# # +############################################################################### + +# Check whether --enable-tcpip-master-slave was given. +if test ${enable_tcpip_master_slave+y} +then : + enableval=$enable_tcpip_master_slave; build_tcpip_master_slave=$enableval +else $as_nop + build_tcpip_master_slave=no +fi + + +if test x$build_tcpip_master_slave = xyes; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for dlsym in -ldl" >&5 +printf %s "checking for dlsym in -ldl... " >&6; } +if test ${ac_cv_lib_dl_dlsym+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-ldl $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +char dlsym (); +int +main (void) +{ +return dlsym (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_dl_dlsym=yes +else $as_nop + ac_cv_lib_dl_dlsym=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_dl_dlsym" >&5 +printf "%s\n" "$ac_cv_lib_dl_dlsym" >&6; } +if test "x$ac_cv_lib_dl_dlsym" = xyes +then : + printf "%s\n" "#define HAVE_LIBDL 1" >>confdefs.h + + LIBS="-ldl $LIBS" + +fi + + +printf "%s\n" "#define STARPU_USE_TCPIP_MASTER_SLAVE 1" >>confdefs.h + +fi + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the TCP/IP master-slave mode should be enabled" >&5 +printf %s "checking whether the TCP/IP master-slave mode should be enabled... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $build_tcpip_master_slave" >&5 +printf "%s\n" "$build_tcpip_master_slave" >&6; } + if test x$build_tcpip_master_slave = xyes; then + STARPU_USE_TCPIP_MASTER_SLAVE_TRUE= + STARPU_USE_TCPIP_MASTER_SLAVE_FALSE='#' +else + STARPU_USE_TCPIP_MASTER_SLAVE_TRUE='#' + STARPU_USE_TCPIP_MASTER_SLAVE_FALSE= +fi + + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking maximum number of TCP/IP master-slave devices" >&5 +printf %s "checking maximum number of TCP/IP master-slave devices... " >&6; } +# Check whether --enable-maxtcpipdev was given. +if test ${enable_maxtcpipdev+y} +then : + enableval=$enable_maxtcpipdev; nmaxtcpipdev=$enableval +else $as_nop + + if test x$build_tcpip_master_slave = xyes; then + nmaxtcpipdev=4 + else + nmaxtcpipdev=0 + fi + +fi + +if test x$nmaxtcpipdev = x -o x$nmaxtcpipdev = xyes +then + as_fn_error $? "The --enable-maxtcpipdev option needs to be given a number" "$LINENO" 5 +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $nmaxtcpipdev" >&5 +printf "%s\n" "$nmaxtcpipdev" >&6; } + +printf "%s\n" "#define STARPU_MAXTCPIPDEVS $nmaxtcpipdev" >>confdefs.h + + +############################################################################### +# # +# Miscellaneous things for MPI # +# # +############################################################################### + +# Check whether --enable-mpi-pedantic-isend was given. +if test ${enable_mpi_pedantic_isend+y} +then : + enableval=$enable_mpi_pedantic_isend; enable_mpi_pedantic_isend=$enableval +else $as_nop + enable_mpi_pedantic_isend=no +fi + +if test x$enable_mpi_pedantic_isend = xyes; then + +printf "%s\n" "#define STARPU_MPI_PEDANTIC_ISEND 1" >>confdefs.h + +fi + +# If the user specifically asks for it, or if we are in a developer checkout, we enable mpi check +if test -d "$srcdir/.git" -o -f "$srcdir/.git" ; then + default_enable_mpi_check=$enable_mpi +fi +# Check whether --enable-mpi-check was given. +if test ${enable_mpi_check+y} +then : + enableval=$enable_mpi_check; enable_mpi_check=$enableval +else $as_nop + enable_mpi_check=$default_enable_mpi_check +fi + +running_mpi_check=no +if test x$enable_mpi_check = xyes ; then + running_mpi_check=yes + if test x$enable_mpi = xno ; then + as_fn_error $? "MPI checks requested, but MPI is disabled" "$LINENO" 5 + fi +fi +if test x$enable_mpi_check = xmaybe ; then + running_mpi_check=yes +fi +if test x$enable_mpi_check = xno ; then + running_mpi_check=no +fi +if test x$enable_mpi = xno ; then + running_mpi_check=no +fi + + if test x$running_mpi_check = xyes; then + STARPU_MPI_CHECK_TRUE= + STARPU_MPI_CHECK_FALSE='#' +else + STARPU_MPI_CHECK_TRUE='#' + STARPU_MPI_CHECK_FALSE= +fi + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether MPI tests should be run" >&5 +printf %s "checking whether MPI tests should be run... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $running_mpi_check" >&5 +printf "%s\n" "$running_mpi_check" >&6; } + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the StarPU MPI library should be generated" >&5 +printf %s "checking whether the StarPU MPI library should be generated... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $build_mpi_lib" >&5 +printf "%s\n" "$build_mpi_lib" >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the StarPU MPI nmad library should be generated" >&5 +printf %s "checking whether the StarPU MPI nmad library should be generated... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $build_nmad_lib" >&5 +printf "%s\n" "$build_nmad_lib" >&6; } + +if test x$build_mpi_lib = xyes -o x$build_nmad_lib = xyes ; then + +printf "%s\n" "#define STARPU_USE_MPI 1" >>confdefs.h + + if test x$build_mpi_lib = xyes ; then + +printf "%s\n" "#define STARPU_USE_MPI_MPI 1" >>confdefs.h + + else + +printf "%s\n" "#define STARPU_USE_MPI_NMAD 1" >>confdefs.h + + fi +fi + +if test x$enable_mpi = xyes ; then + if test x$enable_simgrid = xyes ; then + if test x$enable_shared = xyes ; then + as_fn_error $? "MPI with simgrid can not work with shared libraries, if you need the MPI support, then use --disable-shared to fix this, else disable MPI with --disable-mpi" "$LINENO" 5 + else + CFLAGS="$CFLAGS -fPIC" + CXXFLAGS="$CXXFLAGS -fPIC" + NVCCFLAGS="$NVCCFLAGS --compiler-options -fPIC" + HIPCCFLAGS="$HIPCCFLAGS --compiler-options -fPIC" + FFLAGS="$FFLAGS -fPIC" + FCLAGS="$FFLAGS -fPIC" + fi + fi + + enable_mpi_sync_clocks=no + +pkg_failed=no +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for MPI_SYNC_CLOCKS" >&5 +printf %s "checking for MPI_SYNC_CLOCKS... " >&6; } + +if test -n "$PKG_CONFIG"; then + if test -n "$MPI_SYNC_CLOCKS_CFLAGS"; then + pkg_cv_MPI_SYNC_CLOCKS_CFLAGS="$MPI_SYNC_CLOCKS_CFLAGS" + else + if test -n "$PKG_CONFIG" && \ + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"mpi_sync_clocks\""; } >&5 + ($PKG_CONFIG --exists --print-errors "mpi_sync_clocks") 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + pkg_cv_MPI_SYNC_CLOCKS_CFLAGS=`$PKG_CONFIG --cflags "mpi_sync_clocks" 2>/dev/null` +else + pkg_failed=yes +fi + fi +else + pkg_failed=untried +fi +if test -n "$PKG_CONFIG"; then + if test -n "$MPI_SYNC_CLOCKS_LIBS"; then + pkg_cv_MPI_SYNC_CLOCKS_LIBS="$MPI_SYNC_CLOCKS_LIBS" + else + if test -n "$PKG_CONFIG" && \ + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"mpi_sync_clocks\""; } >&5 + ($PKG_CONFIG --exists --print-errors "mpi_sync_clocks") 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + pkg_cv_MPI_SYNC_CLOCKS_LIBS=`$PKG_CONFIG --libs "mpi_sync_clocks" 2>/dev/null` +else + pkg_failed=yes +fi + fi +else + pkg_failed=untried +fi + + + +if test $pkg_failed = yes; then + +if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then + _pkg_short_errors_supported=yes +else + _pkg_short_errors_supported=no +fi + if test $_pkg_short_errors_supported = yes; then + MPI_SYNC_CLOCKS_PKG_ERRORS=`$PKG_CONFIG --short-errors --errors-to-stdout --print-errors "mpi_sync_clocks"` + else + MPI_SYNC_CLOCKS_PKG_ERRORS=`$PKG_CONFIG --errors-to-stdout --print-errors "mpi_sync_clocks"` + fi + # Put the nasty error message in config.log where it belongs + echo "$MPI_SYNC_CLOCKS_PKG_ERRORS" >&5 + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + enable_mpi_sync_clocks=no +elif test $pkg_failed = untried; then + enable_mpi_sync_clocks=no +else + MPI_SYNC_CLOCKS_CFLAGS=$pkg_cv_MPI_SYNC_CLOCKS_CFLAGS + MPI_SYNC_CLOCKS_LIBS=$pkg_cv_MPI_SYNC_CLOCKS_LIBS + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + enable_mpi_sync_clocks=yes +fi + if test x$enable_mpi_sync_clocks = xyes ; then + +printf "%s\n" "#define STARPU_HAVE_MPI_SYNC_CLOCKS 1" >>confdefs.h + + fi +fi + + if test x$enable_mpi_sync_clocks = xyes; then + STARPU_MPI_SYNC_CLOCKS_TRUE= + STARPU_MPI_SYNC_CLOCKS_FALSE='#' +else + STARPU_MPI_SYNC_CLOCKS_TRUE='#' + STARPU_MPI_SYNC_CLOCKS_FALSE= +fi + + if test x$build_mpi_lib = xyes; then + STARPU_USE_MPI_MPI_TRUE= + STARPU_USE_MPI_MPI_FALSE='#' +else + STARPU_USE_MPI_MPI_TRUE='#' + STARPU_USE_MPI_MPI_FALSE= +fi + + if test x$build_nmad_lib = xyes; then + STARPU_USE_MPI_NMAD_TRUE= + STARPU_USE_MPI_NMAD_FALSE='#' +else + STARPU_USE_MPI_NMAD_TRUE='#' + STARPU_USE_MPI_NMAD_FALSE= +fi + + if test x$build_nmad_lib = xyes -o x$build_mpi_lib = xyes; then + STARPU_USE_MPI_TRUE= + STARPU_USE_MPI_FALSE='#' +else + STARPU_USE_MPI_TRUE='#' + STARPU_USE_MPI_FALSE= +fi + + +###### Failure tolerance material ####### +default_enable_mpi_ft=no +# Check whether --enable-mpi-ft was given. +if test ${enable_mpi_ft+y} +then : + enableval=$enable_mpi_ft; enable_mpi_ft=$enableval +else $as_nop + enable_mpi_ft=$default_enable_mpi_ft +fi + + +default_enable_mpi_ft_stats=no +use_mpi_ft_stats=no +# Check whether --enable-mpi-ft-stats was given. +if test ${enable_mpi_ft_stats+y} +then : + enableval=$enable_mpi_ft_stats; enable_mpi_ft_stats=$enableval +else $as_nop + enable_mpi_ft_stats=$default_enable_mpi_ft_stats +fi + + +# TODO: Check MPI version to be ULFM +if test x$enable_mpi_ft = xyes ; then + if test x$build_mpi_lib != xyes ; then + as_fn_error $? "Failure tolerance mechanisms only work with a particular MPI implementation: ULFM (OpenMPI based)." "$LINENO" 5 + else + +printf "%s\n" "#define STARPU_USE_MPI_FT 1" >>confdefs.h + + use_mpi_ft=yes; + if test x$enable_mpi_ft_stats = xyes ; then + +printf "%s\n" "#define STARPU_USE_MPI_FT_STATS 1" >>confdefs.h + + use_mpi_ft_stats=$enable_mpi_ft_stats; + fi + fi +fi + + if test x$use_mpi_ft = xyes; then + STARPU_USE_MPI_FT_TRUE= + STARPU_USE_MPI_FT_FALSE='#' +else + STARPU_USE_MPI_FT_TRUE='#' + STARPU_USE_MPI_FT_FALSE= +fi + + if test x$use_mpi_ft_stats = xyes; then + STARPU_USE_MPI_FT_STATS_TRUE= + STARPU_USE_MPI_FT_STATS_FALSE='#' +else + STARPU_USE_MPI_FT_STATS_TRUE='#' + STARPU_USE_MPI_FT_STATS_FALSE= +fi + + +###### End of failure tolerance material ###### + + + + +# Check whether --with-mpiexec-args was given. +if test ${with_mpiexec_args+y} +then : + withval=$with_mpiexec_args; + mpiexec_args=$withval + +fi + +MPIEXEC_ARGS=$mpiexec_args + + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether MPI debug messages should be displayed" >&5 +printf %s "checking whether MPI debug messages should be displayed... " >&6; } +# Check whether --enable-mpi-verbose was given. +if test ${enable_mpi_verbose+y} +then : + enableval=$enable_mpi_verbose; enable_mpi_verbose=$enableval +else $as_nop + enable_mpi_verbose=no +fi + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_mpi_verbose" >&5 +printf "%s\n" "$enable_mpi_verbose" >&6; } +if test x$enable_mpi_verbose = xyes; then + +printf "%s\n" "#define STARPU_MPI_VERBOSE 1" >>confdefs.h + +fi +if test x$enable_mpi_verbose = xextra; then + +printf "%s\n" "#define STARPU_MPI_VERBOSE 1" >>confdefs.h + + +printf "%s\n" "#define STARPU_MPI_EXTRA_VERBOSE 1" >>confdefs.h + +fi + +if test x$enable_mpi = xyes -o x$build_mpi_master_slave = xyes ; then + cc_or_mpicc=$mpicc_path + # For some reason, libtool uses gcc instead of mpicc when linking + # libstarpumpi. + # On Darwin (and maybe other systems ?) the linker will fail (undefined + # references to MPI_*). We manually add the required flags to fix this + # issue. + + # openmpi version + MPICC_LDFLAGS=`$mpicc_path --showme:link 2>/dev/null` + + if test -z "$MPICC_LDFLAGS" + then + # mpich version + MPICC_LDFLAGS=`$mpicc_path -link_info | awk '{$1=""; print}'` + fi + +else + cc_or_mpicc=$CC +fi +CC_OR_MPICC=$cc_or_mpicc + + +############################################################################### +# # +# NUMA memory nodes # +# # +############################################################################### + +default_nmaxnumanodes=2 +# Extract the first word of "hwloc-calc", so it can be a program name with args. +set dummy hwloc-calc; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_path_hwloccalccommand+y} +then : + printf %s "(cached) " >&6 +else $as_nop + case $hwloccalccommand in + [\\/]* | ?:[\\/]*) + ac_cv_path_hwloccalccommand="$hwloccalccommand" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_path_hwloccalccommand="$as_dir$ac_word$ac_exec_ext" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + ;; +esac +fi +hwloccalccommand=$ac_cv_path_hwloccalccommand +if test -n "$hwloccalccommand"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $hwloccalccommand" >&5 +printf "%s\n" "$hwloccalccommand" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking maximum number of NUMA nodes" >&5 +printf %s "checking maximum number of NUMA nodes... " >&6; } +# Check whether --enable-maxnumanodes was given. +if test ${enable_maxnumanodes+y} +then : + enableval=$enable_maxnumanodes; nmaxnumanodes=$enableval +else $as_nop + nmaxnumanodes=auto +fi + +if test x$nmaxnumanodes = xauto +then + if test "$hwloccalccommand" = ""; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: hwloc-calc not available to automatically get the number of NUMA nodes, using the default value: $default_nmaxnumanodes" >&5 +printf "%s\n" "$as_me: WARNING: hwloc-calc not available to automatically get the number of NUMA nodes, using the default value: $default_nmaxnumanodes" >&2;} + nmaxnumanodes=$default_nmaxnumanodes + else + nmaxnumanodes=$($hwloccalccommand all -N node 2>/dev/null) + if test x$nmaxnumanodes = x; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: hwloc-calc could not get the number of NUMA nodes, using the default value: $default_nmaxnumanodes" >&5 +printf "%s\n" "$as_me: WARNING: hwloc-calc could not get the number of NUMA nodes, using the default value: $default_nmaxnumanodes" >&2;} + nmaxnumanodes=$default_nmaxnumanodes + fi + fi +fi +if test x$nmaxnumanodes = x -o x$nmaxnumanodes = xyes +then + as_fn_error $? "The --enable-maxnumanodes option needs to be given a number" "$LINENO" 5 +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $nmaxnumanodes" >&5 +printf "%s\n" "$nmaxnumanodes" >&6; } + +printf "%s\n" "#define STARPU_MAXNUMANODES $nmaxnumanodes" >>confdefs.h + + + +############################################################################### + +for ac_prog in lib +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_path_STARPU_MS_LIB+y} +then : + printf %s "(cached) " >&6 +else $as_nop + case $STARPU_MS_LIB in + [\\/]* | ?:[\\/]*) + ac_cv_path_STARPU_MS_LIB="$STARPU_MS_LIB" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_path_STARPU_MS_LIB="$as_dir$ac_word$ac_exec_ext" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + ;; +esac +fi +STARPU_MS_LIB=$ac_cv_path_STARPU_MS_LIB +if test -n "$STARPU_MS_LIB"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $STARPU_MS_LIB" >&5 +printf "%s\n" "$STARPU_MS_LIB" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + + test -n "$STARPU_MS_LIB" && break +done + + + if test "x$STARPU_MS_LIB" != "x"; then + STARPU_HAVE_MS_LIB_TRUE= + STARPU_HAVE_MS_LIB_FALSE='#' +else + STARPU_HAVE_MS_LIB_TRUE='#' + STARPU_HAVE_MS_LIB_FALSE= +fi + +case "$target" in +*-*-mingw*|*-*-cygwin*|*-*-msys*) + starpu_windows=yes + libext=a + STARPU_EXPORTED_LIBS="$STARPU_EXPORTED_LIBS -lwsock32" + +printf "%s\n" "#define STARPU_HAVE_WINDOWS 1" >>confdefs.h + + ;; +*-*-linux*) + starpu_linux=yes + +printf "%s\n" "#define STARPU_LINUX_SYS 1" >>confdefs.h + + ;; +*-*-openbsd*) + starpu_openbsd=yes + +printf "%s\n" "#define STARPU_OPENBSD_SYS 1" >>confdefs.h + + ;; +*-*darwin*) + starpu_darwin=yes + +printf "%s\n" "#define STARPU_HAVE_DARWIN 1" >>confdefs.h + + ;; +esac + if test "x$starpu_windows" = "xyes"; then + STARPU_HAVE_WINDOWS_TRUE= + STARPU_HAVE_WINDOWS_FALSE='#' +else + STARPU_HAVE_WINDOWS_TRUE='#' + STARPU_HAVE_WINDOWS_FALSE= +fi + + if test "x$starpu_linux" = "xyes"; then + STARPU_LINUX_SYS_TRUE= + STARPU_LINUX_SYS_FALSE='#' +else + STARPU_LINUX_SYS_TRUE='#' + STARPU_LINUX_SYS_FALSE= +fi + + if test "x$starpu_darwin" = "xyes"; then + STARPU_HAVE_DARWIN_TRUE= + STARPU_HAVE_DARWIN_FALSE='#' +else + STARPU_HAVE_DARWIN_TRUE='#' + STARPU_HAVE_DARWIN_FALSE= +fi + + if test "x$starpu_openbsd" = "xyes"; then + STARPU_OPENBSD_SYS_TRUE= + STARPU_OPENBSD_SYS_FALSE='#' +else + STARPU_OPENBSD_SYS_TRUE='#' + STARPU_OPENBSD_SYS_FALSE= +fi + + +# on Darwin, GCC targets i386 by default, so we don't have atomic ops +# The cast to long int works around a bug in the HP C Compiler +# version HP92453-01 B.11.11.23709.GP, which incorrectly rejects +# declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'. +# This bug is HP SR number 8606223364. +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking size of void *" >&5 +printf %s "checking size of void *... " >&6; } +if test ${ac_cv_sizeof_void_p+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if ac_fn_c_compute_int "$LINENO" "(long int) (sizeof (void *))" "ac_cv_sizeof_void_p" "$ac_includes_default" +then : + +else $as_nop + if test "$ac_cv_type_void_p" = yes; then + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error 77 "cannot compute sizeof (void *) +See \`config.log' for more details" "$LINENO" 5; } + else + ac_cv_sizeof_void_p=0 + fi +fi + +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_sizeof_void_p" >&5 +printf "%s\n" "$ac_cv_sizeof_void_p" >&6; } + + + +printf "%s\n" "#define SIZEOF_VOID_P $ac_cv_sizeof_void_p" >>confdefs.h + + +SIZEOF_VOID_P=$ac_cv_sizeof_void_p +case $SIZEOF_VOID_P in + 4) + case "$target" in + i386-*darwin*) CFLAGS="$CFLAGS -march=i686" ;; + esac + STARPU_MS_LIB_ARCH=X86 + ;; + 8) + STARPU_MS_LIB_ARCH=X64 + ;; +esac + + +# This will be useful for program which use CUDA (and .cubin files) which need +# some path to the CUDA code at runtime. + +printf "%s\n" "#define STARPU_BUILD_DIR \"$PWD\"" >>confdefs.h + +STARPU_BUILD_DIR=$PWD + +case "${srcdir}" in +/*) +printf "%s\n" "#define STARPU_SRC_DIR \"$(eval echo ${srcdir})\"" >>confdefs.h + + STARPU_SRC_DIR="$(eval echo ${srcdir})" + ;; +*) +printf "%s\n" "#define STARPU_SRC_DIR \"$(eval echo $PWD/${srcdir})\"" >>confdefs.h + + STARPU_SRC_DIR="$(eval echo $PWD/${srcdir})" + ;; +esac + +case "$target" in +*-*-mingw*|*-*-cygwin*) + # Check whether --enable-native-winthreads was given. +if test ${enable_native_winthreads+y} +then : + enableval=$enable_native_winthreads; enable_native_winthreads=$enableval +else $as_nop + enable_native_winthreads=no +fi + + ;; +esac +if test x"$enable_native_winthreads" != xyes ; then + INCLUDE_PTHREAD_H='#include ' +fi + + for ac_header in unistd.h +do : + ac_fn_c_check_header_compile "$LINENO" "unistd.h" "ac_cv_header_unistd_h" "$ac_includes_default" +if test "x$ac_cv_header_unistd_h" = xyes +then : + printf "%s\n" "#define HAVE_UNISTD_H 1" >>confdefs.h + +printf "%s\n" "#define STARPU_HAVE_UNISTD_H 1" >>confdefs.h + +fi + +done + +ac_fn_c_check_type "$LINENO" "struct timespec" "ac_cv_type_struct_timespec" " +#include +#include +#ifdef HAVE_UNISTD_H +#include +#endif +#include +$INCLUDE_PTHREAD_H + +" +if test "x$ac_cv_type_struct_timespec" = xyes +then : + +printf "%s\n" "#define STARPU_HAVE_STRUCT_TIMESPEC 1" >>confdefs.h + +fi + + +if test x"$enable_native_winthreads" = xyes ; then + CPPFLAGS="$CPPFLAGS -I$STARPU_SRC_DIR/include/pthread_win32" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + #define STARPU_CONFIGURE + #include + +int +main (void) +{ + pthread_t t; pthread_create(&t, NULL, NULL, NULL); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + +printf "%s\n" "#define STARPU_NATIVE_WINTHREADS 1" >>confdefs.h + +else $as_nop + as_fn_error $? "pthread_create unavailable" "$LINENO" 5 +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for pthread_create in -lpthread" >&5 +printf %s "checking for pthread_create in -lpthread... " >&6; } +if test ${ac_cv_lib_pthread_pthread_create+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-lpthread $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +char pthread_create (); +int +main (void) +{ +return pthread_create (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_pthread_pthread_create=yes +else $as_nop + ac_cv_lib_pthread_pthread_create=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_pthread_pthread_create" >&5 +printf "%s\n" "$ac_cv_lib_pthread_pthread_create" >&6; } +if test "x$ac_cv_lib_pthread_pthread_create" = xyes +then : + + LIBS="$LIBS -lpthread" + STARPU_EXPORTED_LIBS="$STARPU_EXPORTED_LIBS -lpthread" + +fi + +fi + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for library containing sqrt" >&5 +printf %s "checking for library containing sqrt... " >&6; } +if test ${ac_cv_search_sqrt+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_func_search_save_LIBS=$LIBS +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +char sqrt (); +int +main (void) +{ +return sqrt (); + ; + return 0; +} +_ACEOF +for ac_lib in '' m +do + if test -z "$ac_lib"; then + ac_res="none required" + else + ac_res=-l$ac_lib + LIBS="-l$ac_lib $ac_func_search_save_LIBS" + fi + if ac_fn_c_try_link "$LINENO" +then : + ac_cv_search_sqrt=$ac_res +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext + if test ${ac_cv_search_sqrt+y} +then : + break +fi +done +if test ${ac_cv_search_sqrt+y} +then : + +else $as_nop + ac_cv_search_sqrt=no +fi +rm conftest.$ac_ext +LIBS=$ac_func_search_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_sqrt" >&5 +printf "%s\n" "$ac_cv_search_sqrt" >&6; } +ac_res=$ac_cv_search_sqrt +if test "$ac_res" != no +then : + test "$ac_res" = "none required" || LIBS="$ac_res $LIBS" + +else $as_nop + as_fn_error $? "math library unavailable" "$LINENO" 5 +fi + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for main in -lws2_32" >&5 +printf %s "checking for main in -lws2_32... " >&6; } +if test ${ac_cv_lib_ws2_32_main+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-lws2_32 $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + +int +main (void) +{ +return main (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_ws2_32_main=yes +else $as_nop + ac_cv_lib_ws2_32_main=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_ws2_32_main" >&5 +printf "%s\n" "$ac_cv_lib_ws2_32_main" >&6; } +if test "x$ac_cv_lib_ws2_32_main" = xyes +then : + printf "%s\n" "#define HAVE_LIBWS2_32 1" >>confdefs.h + + LIBS="-lws2_32 $LIBS" + +fi +ac_cv_lib_ws2_32=ac_cv_lib_ws2_32_main + +ac_fn_c_check_func "$LINENO" "sysconf" "ac_cv_func_sysconf" +if test "x$ac_cv_func_sysconf" = xyes +then : + printf "%s\n" "#define HAVE_SYSCONF 1" >>confdefs.h + +fi + +ac_fn_c_check_func "$LINENO" "getrlimit" "ac_cv_func_getrlimit" +if test "x$ac_cv_func_getrlimit" = xyes +then : + printf "%s\n" "#define HAVE_GETRLIMIT 1" >>confdefs.h + +fi + +ac_fn_c_check_func "$LINENO" "scandir" "ac_cv_func_scandir" +if test "x$ac_cv_func_scandir" = xyes +then : + printf "%s\n" "#define HAVE_SCANDIR 1" >>confdefs.h + +fi + + +ac_fn_c_check_func "$LINENO" "pthread_spin_lock" "ac_cv_func_pthread_spin_lock" +if test "x$ac_cv_func_pthread_spin_lock" = xyes +then : + have_pthread_spin_lock=yes +else $as_nop + have_pthread_spin_lock=no +fi + +if test x$have_pthread_spin_lock = xyes; then + +printf "%s\n" "#define HAVE_PTHREAD_SPIN_LOCK 1" >>confdefs.h + + +printf "%s\n" "#define STARPU_HAVE_PTHREAD_SPIN_LOCK 1" >>confdefs.h + +fi + +ac_fn_c_check_func "$LINENO" "pthread_barrier_init" "ac_cv_func_pthread_barrier_init" +if test "x$ac_cv_func_pthread_barrier_init" = xyes +then : + have_pthread_barrier=yes +else $as_nop + have_pthread_barrier=no +fi + +if test x$have_pthread_barrier = xyes; then + +printf "%s\n" "#define STARPU_HAVE_PTHREAD_BARRIER 1" >>confdefs.h + +fi + +# yes, that's non portable, but it's still better than sched_setaffinity +ac_fn_c_check_func "$LINENO" "pthread_setaffinity_np" "ac_cv_func_pthread_setaffinity_np" +if test "x$ac_cv_func_pthread_setaffinity_np" = xyes +then : + printf "%s\n" "#define HAVE_PTHREAD_SETAFFINITY_NP 1" >>confdefs.h + +fi + + +ac_fn_c_check_func "$LINENO" "pthread_setname_np" "ac_cv_func_pthread_setname_np" +if test "x$ac_cv_func_pthread_setname_np" = xyes +then : + have_pthread_setname_np=yes +else $as_nop + have_pthread_setname_np=no +fi + +if test x$have_pthread_setname_np = xyes; then + +printf "%s\n" "#define STARPU_HAVE_PTHREAD_SETNAME_NP 1" >>confdefs.h + +fi + +if test "x$cross_compiling" = "xno"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether PTHREAD_MUTEX_INITIALIZER just zeroes" >&5 +printf %s "checking whether PTHREAD_MUTEX_INITIALIZER just zeroes... " >&6; } +if test "$cross_compiling" = yes +then : + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "cannot run test program while cross compiling +See \`config.log' for more details" "$LINENO" 5; } +else $as_nop + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +int +main (void) +{ +pthread_mutex_t var = PTHREAD_MUTEX_INITIALIZER; + char *p; + for (p = (char*) &var; p < (char*) (&var+1); p++) + if (*p != 0) + return 1; + return 0; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_run "$LINENO" +then : + +printf "%s\n" "#define STARPU_PTHREAD_MUTEX_INITIALIZER_ZERO 1" >>confdefs.h + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } +else $as_nop + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ + conftest.$ac_objext conftest.beam conftest.$ac_ext +fi + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether PTHREAD_COND_INITIALIZER just zeroes" >&5 +printf %s "checking whether PTHREAD_COND_INITIALIZER just zeroes... " >&6; } +if test "$cross_compiling" = yes +then : + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "cannot run test program while cross compiling +See \`config.log' for more details" "$LINENO" 5; } +else $as_nop + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +int +main (void) +{ +pthread_cond_t var = PTHREAD_COND_INITIALIZER; + char *p; + for (p = (char*) &var; p < (char*) (&var+1); p++) + if (*p != 0) + return 1; + return 0; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_run "$LINENO" +then : + +printf "%s\n" "#define STARPU_PTHREAD_COND_INITIALIZER_ZERO 1" >>confdefs.h + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } +else $as_nop + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ + conftest.$ac_objext conftest.beam conftest.$ac_ext +fi + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether PTHREAD_RWLOCK_INITIALIZER just zeroes" >&5 +printf %s "checking whether PTHREAD_RWLOCK_INITIALIZER just zeroes... " >&6; } +if test "$cross_compiling" = yes +then : + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "cannot run test program while cross compiling +See \`config.log' for more details" "$LINENO" 5; } +else $as_nop + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +int +main (void) +{ +pthread_rwlock_t var = PTHREAD_RWLOCK_INITIALIZER; + char *p; + for (p = (char*) &var; p < (char*) (&var+1); p++) + if (*p != 0) + return 1; + return 0; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_run "$LINENO" +then : + +printf "%s\n" "#define STARPU_PTHREAD_RWLOCK_INITIALIZER_ZERO 1" >>confdefs.h + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } +else $as_nop + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ + conftest.$ac_objext conftest.beam conftest.$ac_ext +fi + + +fi + +# There is no posix_memalign on Mac OS X, only memalign + + for ac_func in posix_memalign +do : + ac_fn_c_check_func "$LINENO" "posix_memalign" "ac_cv_func_posix_memalign" +if test "x$ac_cv_func_posix_memalign" = xyes +then : + printf "%s\n" "#define HAVE_POSIX_MEMALIGN 1" >>confdefs.h + +printf "%s\n" "#define STARPU_HAVE_POSIX_MEMALIGN 1" >>confdefs.h + +fi + +done + + for ac_func in memalign +do : + ac_fn_c_check_func "$LINENO" "memalign" "ac_cv_func_memalign" +if test "x$ac_cv_func_memalign" = xyes +then : + printf "%s\n" "#define HAVE_MEMALIGN 1" >>confdefs.h + +printf "%s\n" "#define STARPU_HAVE_MEMALIGN 1" >>confdefs.h + +fi + +done + +# Some systems don't have drand48 +ac_fn_c_check_func "$LINENO" "drand48" "ac_cv_func_drand48" +if test "x$ac_cv_func_drand48" = xyes +then : + have_drand48=yes +else $as_nop + have_drand48=no +fi + +ac_fn_c_check_func "$LINENO" "erand48_r" "ac_cv_func_erand48_r" +if test "x$ac_cv_func_erand48_r" = xyes +then : + have_erand48_r=yes +else $as_nop + have_erand48_r=no +fi + +# Maybe the user still does not want to use the provided drand48 +# Check whether --enable-default-drand48 was given. +if test ${enable_default_drand48+y} +then : + enableval=$enable_default_drand48; enable_default_drand48=$enableval +else $as_nop + enable_default_drand48=yes +fi + +if test x$have_drand48 = xyes -a x$enable_default_drand48 = xyes ; then + +printf "%s\n" "#define STARPU_USE_DRAND48 1" >>confdefs.h + +fi +if test x$have_erand48_r = xyes ; then + +printf "%s\n" "#define STARPU_USE_ERAND48_R 1" >>confdefs.h + +fi + +# Some systems do not define strerror_r +ac_fn_c_check_func "$LINENO" "strerror_r" "ac_cv_func_strerror_r" +if test "x$ac_cv_func_strerror_r" = xyes +then : + +printf "%s\n" "#define STARPU_HAVE_STRERROR_R 1" >>confdefs.h + +fi + + +# Some systems may not define setenv +ac_fn_c_check_func "$LINENO" "setenv" "ac_cv_func_setenv" +if test "x$ac_cv_func_setenv" = xyes +then : + +printf "%s\n" "#define STARPU_HAVE_SETENV 1" >>confdefs.h + +fi + + +# Some systems do not define unsetenv +ac_fn_c_check_func "$LINENO" "unsetenv" "ac_cv_func_unsetenv" +if test "x$ac_cv_func_unsetenv" = xyes +then : + +printf "%s\n" "#define STARPU_HAVE_UNSETENV 1" >>confdefs.h + +fi + + +# Some systems do not define nearbyintf... +ac_fn_c_check_func "$LINENO" "nearbyintf" "ac_cv_func_nearbyintf" +if test "x$ac_cv_func_nearbyintf" = xyes +then : + +printf "%s\n" "#define STARPU_HAVE_NEARBYINTF 1" >>confdefs.h + +fi + + +# ... but they may define rintf. +ac_fn_c_check_func "$LINENO" "rintf" "ac_cv_func_rintf" +if test "x$ac_cv_func_rintf" = xyes +then : + +printf "%s\n" "#define STARPU_HAVE_RINTF 1" >>confdefs.h + +fi + + +# Define quick check +# Check whether --enable-quick-check was given. +if test ${enable_quick_check+y} +then : + enableval=$enable_quick_check; enable_quick_check=$enableval +else $as_nop + enable_quick_check=no +fi + +if test x$enable_quick_check = xyes; then + +printf "%s\n" "#define STARPU_QUICK_CHECK 1" >>confdefs.h + +fi + if test "x$enable_quick_check" = "xyes"; then + STARPU_QUICK_CHECK_TRUE= + STARPU_QUICK_CHECK_FALSE='#' +else + STARPU_QUICK_CHECK_TRUE='#' + STARPU_QUICK_CHECK_FALSE= +fi + + +# Define long check +# Check whether --enable-long-check was given. +if test ${enable_long_check+y} +then : + enableval=$enable_long_check; enable_long_check=$enableval +else $as_nop + enable_long_check=no +fi + +if test x$enable_long_check = xyes; then + +printf "%s\n" "#define STARPU_LONG_CHECK 1" >>confdefs.h + +fi + if test "x$enable_long_check" = "xyes"; then + STARPU_LONG_CHECK_TRUE= + STARPU_LONG_CHECK_FALSE='#' +else + STARPU_LONG_CHECK_TRUE='#' + STARPU_LONG_CHECK_FALSE= +fi + + +# Define new check +# Check whether --enable-new-check was given. +if test ${enable_new_check+y} +then : + enableval=$enable_new_check; enable_new_check=$enableval +else $as_nop + enable_new_check=no +fi + +if test x$enable_new_check = xyes; then + +printf "%s\n" "#define STARPU_NEW_CHECK 1" >>confdefs.h + +fi + if test "x$enable_new_check" = "xyes"; then + STARPU_NEW_CHECK_TRUE= + STARPU_NEW_CHECK_FALSE='#' +else + STARPU_NEW_CHECK_TRUE='#' + STARPU_NEW_CHECK_FALSE= +fi + + + for ac_header in malloc.h +do : + ac_fn_c_check_header_compile "$LINENO" "malloc.h" "ac_cv_header_malloc_h" "$ac_includes_default" +if test "x$ac_cv_header_malloc_h" = xyes +then : + printf "%s\n" "#define HAVE_MALLOC_H 1" >>confdefs.h + +printf "%s\n" "#define STARPU_HAVE_MALLOC_H 1" >>confdefs.h + +fi + +done + +# Check whether --enable-valgrind was given. +if test ${enable_valgrind+y} +then : + enableval=$enable_valgrind; enable_valgrind=$enableval +else $as_nop + enable_valgrind=yes +fi + +if test "$enable_valgrind" != "no" ; then + for ac_header in valgrind/valgrind.h +do : + ac_fn_c_check_header_compile "$LINENO" "valgrind/valgrind.h" "ac_cv_header_valgrind_valgrind_h" "$ac_includes_default" +if test "x$ac_cv_header_valgrind_valgrind_h" = xyes +then : + printf "%s\n" "#define HAVE_VALGRIND_VALGRIND_H 1" >>confdefs.h + +printf "%s\n" "#define STARPU_HAVE_VALGRIND_H 1" >>confdefs.h + +fi + +done + for ac_header in valgrind/memcheck.h +do : + ac_fn_c_check_header_compile "$LINENO" "valgrind/memcheck.h" "ac_cv_header_valgrind_memcheck_h" "$ac_includes_default" +if test "x$ac_cv_header_valgrind_memcheck_h" = xyes +then : + printf "%s\n" "#define HAVE_VALGRIND_MEMCHECK_H 1" >>confdefs.h + +printf "%s\n" "#define STARPU_HAVE_MEMCHECK_H 1" >>confdefs.h + +fi + +done + for ac_header in valgrind/helgrind.h +do : + ac_fn_c_check_header_compile "$LINENO" "valgrind/helgrind.h" "ac_cv_header_valgrind_helgrind_h" "$ac_includes_default" +if test "x$ac_cv_header_valgrind_helgrind_h" = xyes +then : + printf "%s\n" "#define HAVE_VALGRIND_HELGRIND_H 1" >>confdefs.h + +printf "%s\n" "#define STARPU_HAVE_HELGRIND_H 1" >>confdefs.h + +fi + +done +fi +if test "$enable_valgrind" = "full" ; then + +printf "%s\n" "#define STARPU_VALGRIND_FULL 1" >>confdefs.h + +fi + +ac_fn_c_check_func "$LINENO" "sched_yield" "ac_cv_func_sched_yield" +if test "x$ac_cv_func_sched_yield" = xyes +then : + +printf "%s\n" "#define STARPU_HAVE_SCHED_YIELD 1" >>confdefs.h + +fi + + +ac_fn_c_check_header_compile "$LINENO" "aio.h" "ac_cv_header_aio_h" "$ac_includes_default" +if test "x$ac_cv_header_aio_h" = xyes +then : + printf "%s\n" "#define HAVE_AIO_H 1" >>confdefs.h + +fi + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for aio_read in -lrt" >&5 +printf %s "checking for aio_read in -lrt... " >&6; } +if test ${ac_cv_lib_rt_aio_read+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-lrt $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +char aio_read (); +int +main (void) +{ +return aio_read (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_rt_aio_read=yes +else $as_nop + ac_cv_lib_rt_aio_read=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_rt_aio_read" >&5 +printf "%s\n" "$ac_cv_lib_rt_aio_read" >&6; } +if test "x$ac_cv_lib_rt_aio_read" = xyes +then : + printf "%s\n" "#define HAVE_LIBRT 1" >>confdefs.h + + LIBS="-lrt $LIBS" + +fi + +#AC_CHECK_HEADERS([libaio.h]) +#AC_CHECK_LIB([aio], [io_setup]) +ac_fn_c_check_func "$LINENO" "copy_file_range" "ac_cv_func_copy_file_range" +if test "x$ac_cv_func_copy_file_range" = xyes +then : + printf "%s\n" "#define HAVE_COPY_FILE_RANGE 1" >>confdefs.h + +fi + + +ac_fn_c_check_func "$LINENO" "mkostemp" "ac_cv_func_mkostemp" +if test "x$ac_cv_func_mkostemp" = xyes +then : + printf "%s\n" "#define HAVE_MKOSTEMP 1" >>confdefs.h + +fi + +ac_fn_c_check_func "$LINENO" "mkdtemp" "ac_cv_func_mkdtemp" +if test "x$ac_cv_func_mkdtemp" = xyes +then : + printf "%s\n" "#define HAVE_MKDTEMP 1" >>confdefs.h + +fi + + +ac_fn_c_check_func "$LINENO" "pread" "ac_cv_func_pread" +if test "x$ac_cv_func_pread" = xyes +then : + printf "%s\n" "#define HAVE_PREAD 1" >>confdefs.h + +fi +ac_fn_c_check_func "$LINENO" "pwrite" "ac_cv_func_pwrite" +if test "x$ac_cv_func_pwrite" = xyes +then : + printf "%s\n" "#define HAVE_PWRITE 1" >>confdefs.h + +fi + + +# Depending on the user environment, the hdf5 library may link against some +# mpi implementation, and bring surprising runtime behavior. +# Check whether --enable-hdf5 was given. +if test ${enable_hdf5+y} +then : + enableval=$enable_hdf5; enable_hdf5=$enableval +else $as_nop + enable_hdf5=no +fi + + +if test "x$enable_hdf5" != xno ; then + +# Check whether --with-hdf5-include-dir was given. +if test ${with_hdf5_include_dir+y} +then : + withval=$with_hdf5_include_dir; + hdf5_include_dir="$withval" + +else $as_nop + hdf5_include_dir="" +fi + + + hdf5_inc_dir="/usr/include/hdf5 /usr/include/hdf5/serial ${hdf5_include_dir}" + + enable_include_hdf5=no + for f in $hdf5_inc_dir; do + if test -n "$f" ; then + SAVED_CPPFLAGS="${CPPFLAGS}" + CPPFLAGS="$CPPFLAGS -I$f" + ac_fn_c_check_header_compile "$LINENO" "hdf5.h" "ac_cv_header_hdf5_h" "$ac_includes_default" +if test "x$ac_cv_header_hdf5_h" = xyes +then : + printf "%s\n" "#define HAVE_HDF5_H 1" >>confdefs.h + +fi + + if test "$ac_cv_header_hdf5_h" = "yes" ; then + CPPFLAGS="-I${f} ${SAVED_CFLAGS}" + enable_include_hdf5=yes + break + else + CPPFLAGS=${SAVED_CPPFLAGS} + fi + unset ac_cv_header_hdf5_h + fi + done + + + +# Check whether --with-hdf5-lib-dir was given. +if test ${with_hdf5_lib_dir+y} +then : + withval=$with_hdf5_lib_dir; + hdf5_libraries_dir="$withval" + +else $as_nop + hdf5_libraries_dir="" +fi + + + hdf5_lib_dir="/usr/lib/x86_64-linux-gnu/hdf5 /usr/lib/x86_64-linux-gnu/hdf5/serial ${hdf5_libraries_dir}" + + enable_libraries_hdf5=no + for f in $hdf5_lib_dir; do + if test -n "$f" ; then + SAVED_LDFLAGS="${LDFLAGS}" + LDFLAGS=-L${f} + _LIBS_SAV="$LIBS" + LIBS="" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for main in -lhdf5" >&5 +printf %s "checking for main in -lhdf5... " >&6; } +if test ${ac_cv_lib_hdf5_main+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-lhdf5 $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + +int +main (void) +{ +return main (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_hdf5_main=yes +else $as_nop + ac_cv_lib_hdf5_main=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_hdf5_main" >&5 +printf "%s\n" "$ac_cv_lib_hdf5_main" >&6; } +if test "x$ac_cv_lib_hdf5_main" = xyes +then : + printf "%s\n" "#define HAVE_LIBHDF5 1" >>confdefs.h + + LIBS="-lhdf5 $LIBS" + +fi + + STARPU_HDF5_LDFLAGS="$STARPU_HDF5_LDFLAGS $LIBS" + LIBS=$_LIBS_SAV + + + + if test "$ac_cv_lib_hdf5_main" = "yes" ; then + LDFLAGS="-L${f} ${SAVED_LDFLAGS} ${STARPU_HDF5_LDFLAGS}" + enable_libraries_hdf5=yes + break + else + LDFLAGS=${SAVED_LDFLAGS} + fi + unset ac_cv_lib_hdf5_main + fi + done +fi + +if test "x$enable_libraries_hdf5" = "xyes" -a "x$enable_include_hdf5" = "xyes" -a "x$enable_hdf5" != "xno"; then + +printf "%s\n" "#define STARPU_HAVE_HDF5 1" >>confdefs.h + + enable_hdf5=yes +else + enable_hdf5=no +fi + if test "x$enable_hdf5" = "xyes"; then + STARPU_HAVE_HDF5_TRUE= + STARPU_HAVE_HDF5_FALSE='#' +else + STARPU_HAVE_HDF5_TRUE='#' + STARPU_HAVE_HDF5_FALSE= +fi + + + +# This defines HAVE_SYNC_VAL_COMPARE_AND_SWAP + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the target supports __sync_val_compare_and_swap" >&5 +printf %s "checking whether the target supports __sync_val_compare_and_swap... " >&6; } +if test ${ac_cv_have_sync_val_compare_and_swap+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +int foo, bar; +int +main (void) +{ +bar = __sync_val_compare_and_swap(&foo, 0, 1); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_have_sync_val_compare_and_swap=yes +else $as_nop + ac_cv_have_sync_val_compare_and_swap=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_have_sync_val_compare_and_swap" >&5 +printf "%s\n" "$ac_cv_have_sync_val_compare_and_swap" >&6; } + if test $ac_cv_have_sync_val_compare_and_swap = yes; then + +printf "%s\n" "#define STARPU_HAVE_SYNC_VAL_COMPARE_AND_SWAP 1" >>confdefs.h + + fi + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the target supports __sync_val_compare_and_swap_8" >&5 +printf %s "checking whether the target supports __sync_val_compare_and_swap_8... " >&6; } +if test ${ac_cv_have_sync_val_compare_and_swap_8+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + int64_t foo, bar; +int +main (void) +{ +bar = __sync_val_compare_and_swap(&foo, 0, 1); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_have_sync_val_compare_and_swap_8=yes +else $as_nop + ac_cv_have_sync_val_compare_and_swap_8=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_have_sync_val_compare_and_swap_8" >&5 +printf "%s\n" "$ac_cv_have_sync_val_compare_and_swap_8" >&6; } + if test $ac_cv_have_sync_val_compare_and_swap_8 = yes; then + +printf "%s\n" "#define STARPU_HAVE_SYNC_VAL_COMPARE_AND_SWAP_8 1" >>confdefs.h + + fi + +# This defines HAVE_SYNC_BOOL_COMPARE_AND_SWAP + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the target supports __sync_bool_compare_and_swap" >&5 +printf %s "checking whether the target supports __sync_bool_compare_and_swap... " >&6; } +if test ${ac_cv_have_sync_bool_compare_and_swap+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +int foo, bar; +int +main (void) +{ +bar = __sync_bool_compare_and_swap(&foo, 0, 1); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_have_sync_bool_compare_and_swap=yes +else $as_nop + ac_cv_have_sync_bool_compare_and_swap=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_have_sync_bool_compare_and_swap" >&5 +printf "%s\n" "$ac_cv_have_sync_bool_compare_and_swap" >&6; } + if test $ac_cv_have_sync_bool_compare_and_swap = yes; then + +printf "%s\n" "#define STARPU_HAVE_SYNC_BOOL_COMPARE_AND_SWAP 1" >>confdefs.h + + fi + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the target supports __sync_bool_compare_and_swap_8" >&5 +printf %s "checking whether the target supports __sync_bool_compare_and_swap_8... " >&6; } +if test ${ac_cv_have_sync_bool_compare_and_swap_8+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + int64_t foo, bar; +int +main (void) +{ +bar = __sync_bool_compare_and_swap(&foo, 0, 1); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_have_sync_bool_compare_and_swap_8=yes +else $as_nop + ac_cv_have_sync_bool_compare_and_swap_8=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_have_sync_bool_compare_and_swap_8" >&5 +printf "%s\n" "$ac_cv_have_sync_bool_compare_and_swap_8" >&6; } + if test $ac_cv_have_sync_bool_compare_and_swap_8 = yes; then + +printf "%s\n" "#define STARPU_HAVE_SYNC_BOOL_COMPARE_AND_SWAP_8 1" >>confdefs.h + + fi + +# This defines HAVE_SYNC_FETCH_AND_ADD + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the target supports __sync_fetch_and_add" >&5 +printf %s "checking whether the target supports __sync_fetch_and_add... " >&6; } +if test ${ac_cv_have_sync_fetch_and_add+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +int foo, bar; +int +main (void) +{ +bar = __sync_fetch_and_add(&foo, 1); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_have_sync_fetch_and_add=yes +else $as_nop + ac_cv_have_sync_fetch_and_add=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_have_sync_fetch_and_add" >&5 +printf "%s\n" "$ac_cv_have_sync_fetch_and_add" >&6; } + if test $ac_cv_have_sync_fetch_and_add = yes; then + +printf "%s\n" "#define STARPU_HAVE_SYNC_FETCH_AND_ADD 1" >>confdefs.h + + fi + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the target supports __sync_fetch_and_add_8" >&5 +printf %s "checking whether the target supports __sync_fetch_and_add_8... " >&6; } +if test ${ac_cv_have_sync_fetch_and_add_8+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + int64_t foo, bar; +int +main (void) +{ +bar = __sync_fetch_and_add(&foo, 1); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_have_sync_fetch_and_add_8=yes +else $as_nop + ac_cv_have_sync_fetch_and_add_8=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_have_sync_fetch_and_add_8" >&5 +printf "%s\n" "$ac_cv_have_sync_fetch_and_add_8" >&6; } + if test $ac_cv_have_sync_fetch_and_add_8 = yes; then + +printf "%s\n" "#define STARPU_HAVE_SYNC_FETCH_AND_ADD_8 1" >>confdefs.h + + fi + +# This defines HAVE_SYNC_FETCH_AND_OR + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the target supports __sync_fetch_and_or" >&5 +printf %s "checking whether the target supports __sync_fetch_and_or... " >&6; } +if test ${ac_cv_have_sync_fetch_and_or+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +int foo, bar; +int +main (void) +{ +bar = __sync_fetch_and_or(&foo, 1); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_have_sync_fetch_and_or=yes +else $as_nop + ac_cv_have_sync_fetch_and_or=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_have_sync_fetch_and_or" >&5 +printf "%s\n" "$ac_cv_have_sync_fetch_and_or" >&6; } + if test $ac_cv_have_sync_fetch_and_or = yes; then + +printf "%s\n" "#define STARPU_HAVE_SYNC_FETCH_AND_OR 1" >>confdefs.h + + fi + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the target supports __sync_fetch_and_or_8" >&5 +printf %s "checking whether the target supports __sync_fetch_and_or_8... " >&6; } +if test ${ac_cv_have_sync_fetch_and_or_8+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + int64_t foo, bar; +int +main (void) +{ +bar = __sync_fetch_and_or(&foo, 1); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_have_sync_fetch_and_or_8=yes +else $as_nop + ac_cv_have_sync_fetch_and_or_8=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_have_sync_fetch_and_or_8" >&5 +printf "%s\n" "$ac_cv_have_sync_fetch_and_or_8" >&6; } + if test $ac_cv_have_sync_fetch_and_or_8 = yes; then + +printf "%s\n" "#define STARPU_HAVE_SYNC_FETCH_AND_OR_8 1" >>confdefs.h + + fi + +# This defines HAVE_SYNC_LOCK_TEST_AND_SET + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the target supports __sync_lock_test_and_set" >&5 +printf %s "checking whether the target supports __sync_lock_test_and_set... " >&6; } +if test ${ac_cv_have_sync_lock_test_and_set+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +int foo, bar; +int +main (void) +{ +bar = __sync_lock_test_and_set(&foo, 1); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_have_sync_lock_test_and_set=yes +else $as_nop + ac_cv_have_sync_lock_test_and_set=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_have_sync_lock_test_and_set" >&5 +printf "%s\n" "$ac_cv_have_sync_lock_test_and_set" >&6; } + if test $ac_cv_have_sync_lock_test_and_set = yes; then + +printf "%s\n" "#define STARPU_HAVE_SYNC_LOCK_TEST_AND_SET 1" >>confdefs.h + + fi + +# This defines HAVE_ATOMIC_COMPARE_EXCHANGE_N + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the target supports __atomic_compare_exchange_n" >&5 +printf %s "checking whether the target supports __atomic_compare_exchange_n... " >&6; } +if test ${ac_cv_have_atomic_compare_exchange_n+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +int foo, bar, baz; +int +main (void) +{ +baz = __atomic_compare_exchange_n(&foo, &bar, 1, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_have_atomic_compare_exchange_n=yes +else $as_nop + ac_cv_have_atomic_compare_exchange_n=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_have_atomic_compare_exchange_n" >&5 +printf "%s\n" "$ac_cv_have_atomic_compare_exchange_n" >&6; } + if test $ac_cv_have_atomic_compare_exchange_n = yes; then + +printf "%s\n" "#define STARPU_HAVE_ATOMIC_COMPARE_EXCHANGE_N 1" >>confdefs.h + + fi + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the target supports __atomic_compare_exchange_n_8" >&5 +printf %s "checking whether the target supports __atomic_compare_exchange_n_8... " >&6; } +if test ${ac_cv_have_atomic_compare_exchange_n_8+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + int64_t foo, bar, baz; +int +main (void) +{ +baz = __atomic_compare_exchange_n(&foo, &bar, 1, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_have_atomic_compare_exchange_n_8=yes +else $as_nop + ac_cv_have_atomic_compare_exchange_n_8=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_have_atomic_compare_exchange_n_8" >&5 +printf "%s\n" "$ac_cv_have_atomic_compare_exchange_n_8" >&6; } + if test $ac_cv_have_atomic_compare_exchange_n_8 = yes; then + +printf "%s\n" "#define STARPU_HAVE_ATOMIC_COMPARE_EXCHANGE_N_8 1" >>confdefs.h + + fi + +# This defines HAVE_ATOMIC_EXCHANGE_N + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the target supports __atomic_exchange_n" >&5 +printf %s "checking whether the target supports __atomic_exchange_n... " >&6; } +if test ${ac_cv_have_atomic_exchange_n+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +int foo, bar; +int +main (void) +{ +bar = __atomic_exchange_n(&foo, 1, __ATOMIC_SEQ_CST); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_have_atomic_exchange_n=yes +else $as_nop + ac_cv_have_atomic_exchange_n=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_have_atomic_exchange_n" >&5 +printf "%s\n" "$ac_cv_have_atomic_exchange_n" >&6; } + if test $ac_cv_have_atomic_exchange_n = yes; then + +printf "%s\n" "#define STARPU_HAVE_ATOMIC_EXCHANGE_N 1" >>confdefs.h + + fi + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the target supports __atomic_exchange_n_8" >&5 +printf %s "checking whether the target supports __atomic_exchange_n_8... " >&6; } +if test ${ac_cv_have_atomic_exchange_n_8+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + int64_t foo, bar; +int +main (void) +{ +bar = __atomic_exchange_n(&foo, 1, __ATOMIC_SEQ_CST); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_have_atomic_exchange_n_8=yes +else $as_nop + ac_cv_have_atomic_exchange_n_8=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_have_atomic_exchange_n_8" >&5 +printf "%s\n" "$ac_cv_have_atomic_exchange_n_8" >&6; } + if test $ac_cv_have_atomic_exchange_n_8 = yes; then + +printf "%s\n" "#define STARPU_HAVE_ATOMIC_EXCHANGE_N_8 1" >>confdefs.h + + fi + +# This defines HAVE_ATOMIC_FETCH_ADD + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the target supports __atomic_fetch_add" >&5 +printf %s "checking whether the target supports __atomic_fetch_add... " >&6; } +if test ${ac_cv_have_atomic_fetch_add+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +int foo, bar; +int +main (void) +{ +bar = __atomic_fetch_add(&foo, 1, __ATOMIC_SEQ_CST); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_have_atomic_fetch_add=yes +else $as_nop + ac_cv_have_atomic_fetch_add=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_have_atomic_fetch_add" >&5 +printf "%s\n" "$ac_cv_have_atomic_fetch_add" >&6; } + if test $ac_cv_have_atomic_fetch_add = yes; then + +printf "%s\n" "#define STARPU_HAVE_ATOMIC_FETCH_ADD 1" >>confdefs.h + + fi + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the target supports __atomic_fetch_add_8" >&5 +printf %s "checking whether the target supports __atomic_fetch_add_8... " >&6; } +if test ${ac_cv_have_atomic_fetch_add_8+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + int64_t foo, bar; +int +main (void) +{ +bar = __atomic_fetch_add(&foo, 1, __ATOMIC_SEQ_CST); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_have_atomic_fetch_add_8=yes +else $as_nop + ac_cv_have_atomic_fetch_add_8=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_have_atomic_fetch_add_8" >&5 +printf "%s\n" "$ac_cv_have_atomic_fetch_add_8" >&6; } + if test $ac_cv_have_atomic_fetch_add_8 = yes; then + +printf "%s\n" "#define STARPU_HAVE_ATOMIC_FETCH_ADD_8 1" >>confdefs.h + + fi + +# This defines HAVE_ATOMIC_FETCH_OR + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the target supports __atomic_fetch_or" >&5 +printf %s "checking whether the target supports __atomic_fetch_or... " >&6; } +if test ${ac_cv_have_atomic_fetch_or+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +int foo, bar; +int +main (void) +{ +bar = __atomic_fetch_or(&foo, 1, __ATOMIC_SEQ_CST); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_have_atomic_fetch_or=yes +else $as_nop + ac_cv_have_atomic_fetch_or=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_have_atomic_fetch_or" >&5 +printf "%s\n" "$ac_cv_have_atomic_fetch_or" >&6; } + if test $ac_cv_have_atomic_fetch_or = yes; then + +printf "%s\n" "#define STARPU_HAVE_ATOMIC_FETCH_OR 1" >>confdefs.h + + fi + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the target supports __atomic_fetch_or_8" >&5 +printf %s "checking whether the target supports __atomic_fetch_or_8... " >&6; } +if test ${ac_cv_have_atomic_fetch_or_8+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + int64_t foo, bar; +int +main (void) +{ +bar = __atomic_fetch_or(&foo, 1, __ATOMIC_SEQ_CST); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_have_atomic_fetch_or_8=yes +else $as_nop + ac_cv_have_atomic_fetch_or_8=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_have_atomic_fetch_or_8" >&5 +printf "%s\n" "$ac_cv_have_atomic_fetch_or_8" >&6; } + if test $ac_cv_have_atomic_fetch_or_8 = yes; then + +printf "%s\n" "#define STARPU_HAVE_ATOMIC_FETCH_OR_8 1" >>confdefs.h + + fi + +# This defines HAVE_ATOMIC_TEST_AND_SET + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the target supports __atomic_test_and_set" >&5 +printf %s "checking whether the target supports __atomic_test_and_set... " >&6; } +if test ${ac_cv_have_atomic_test_and_set+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +int foo, bar; +int +main (void) +{ +bar = __atomic_test_and_set(&foo, __ATOMIC_SEQ_CST); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_have_atomic_test_and_set=yes +else $as_nop + ac_cv_have_atomic_test_and_set=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_have_atomic_test_and_set" >&5 +printf "%s\n" "$ac_cv_have_atomic_test_and_set" >&6; } + if test $ac_cv_have_atomic_test_and_set = yes; then + +printf "%s\n" "#define STARPU_HAVE_ATOMIC_TEST_AND_SET 1" >>confdefs.h + + fi + +# This defines HAVE_SYNC_SYNCHRONIZE + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the target supports __sync_synchronize" >&5 +printf %s "checking whether the target supports __sync_synchronize... " >&6; } +if test ${ac_cv_have_sync_synchronize+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ +__sync_synchronize(); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_have_sync_synchronize=yes +else $as_nop + ac_cv_have_sync_synchronize=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_have_sync_synchronize" >&5 +printf "%s\n" "$ac_cv_have_sync_synchronize" >&6; } + if test $ac_cv_have_sync_synchronize = yes; then + +printf "%s\n" "#define STARPU_HAVE_SYNC_SYNCHRONIZE 1" >>confdefs.h + + fi + +CPPFLAGS="${CPPFLAGS} -D_GNU_SOURCE " + + _LIBS_SAV="$LIBS" + LIBS="" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for library containing set_mempolicy" >&5 +printf %s "checking for library containing set_mempolicy... " >&6; } +if test ${ac_cv_search_set_mempolicy+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_func_search_save_LIBS=$LIBS +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +char set_mempolicy (); +int +main (void) +{ +return set_mempolicy (); + ; + return 0; +} +_ACEOF +for ac_lib in '' numa +do + if test -z "$ac_lib"; then + ac_res="none required" + else + ac_res=-l$ac_lib + LIBS="-l$ac_lib $ac_func_search_save_LIBS" + fi + if ac_fn_c_try_link "$LINENO" +then : + ac_cv_search_set_mempolicy=$ac_res +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext + if test ${ac_cv_search_set_mempolicy+y} +then : + break +fi +done +if test ${ac_cv_search_set_mempolicy+y} +then : + +else $as_nop + ac_cv_search_set_mempolicy=no +fi +rm conftest.$ac_ext +LIBS=$ac_func_search_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_set_mempolicy" >&5 +printf "%s\n" "$ac_cv_search_set_mempolicy" >&6; } +ac_res=$ac_cv_search_set_mempolicy +if test "$ac_res" != no +then : + test "$ac_res" = "none required" || LIBS="$ac_res $LIBS" + enable_libnuma=yes +else $as_nop + enable_libnuma=no +fi + + STARPU_LIBNUMA_LDFLAGS="$STARPU_LIBNUMA_LDFLAGS $LIBS" + LIBS=$_LIBS_SAV + + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether libnuma is available" >&5 +printf %s "checking whether libnuma is available... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_libnuma" >&5 +printf "%s\n" "$enable_libnuma" >&6; } +if test x$enable_libnuma = xyes; then + +printf "%s\n" "#define STARPU_HAVE_LIBNUMA 1" >>confdefs.h + +fi + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether statement expressions are available" >&5 +printf %s "checking whether statement expressions are available... " >&6; } +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#define maxint(a,b) ({int _a = (a), _b = (b); _a > _b ? _a : _b; }) + +int +main (void) +{ + int x=maxint(12,42); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + statement_expressions="yes" +else $as_nop + statement_expressions="no" +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $statement_expressions" >&5 +printf "%s\n" "$statement_expressions" >&6; } +if test x$statement_expressions = xyes; then + +printf "%s\n" "#define STARPU_HAVE_STATEMENT_EXPRESSIONS 1" >>confdefs.h + +fi + +saved_LIBS="${LIBS}" +LIBS="${LIBS} -ldl" +STARPU_DLOPEN_LDFLAGS="" + + for ac_func in dlopen +do : + ac_fn_c_check_func "$LINENO" "dlopen" "ac_cv_func_dlopen" +if test "x$ac_cv_func_dlopen" = xyes +then : + printf "%s\n" "#define HAVE_DLOPEN 1" >>confdefs.h + STARPU_DLOPEN_LDFLAGS="-ldl" +fi + +done +LIBS="$saved_LIBS" + +############################################################################### +# # +# SCHED_CTX settings # +# # +############################################################################### +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking maximum number of sched_ctxs" >&5 +printf %s "checking maximum number of sched_ctxs... " >&6; } +# Check whether --enable-max_sched_ctxs was given. +if test ${enable_max_sched_ctxs+y} +then : + enableval=$enable_max_sched_ctxs; max_sched_ctxs=$enableval +else $as_nop + max_sched_ctxs=10 +fi + +if test x$max_sched_ctxs = x -o x$max_sched_ctxs = xyes +then + as_fn_error $? "The --enable-max_sched_ctxs option needs to be given a number" "$LINENO" 5 +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $max_sched_ctxs" >&5 +printf "%s\n" "$max_sched_ctxs" >&6; } + +printf "%s\n" "#define STARPU_NMAX_SCHED_CTXS $max_sched_ctxs" >>confdefs.h + + +# Check whether --enable-sc_hypervisor was given. +if test ${enable_sc_hypervisor+y} +then : + enableval=$enable_sc_hypervisor; enable_sc_hypervisor="yes" +else $as_nop + enable_sc_hypervisor="no" +fi + + +#for pkgconfig + +if test "x$enable_sc_hypervisor" = "xyes"; then + +printf "%s\n" "#define STARPU_USE_SC_HYPERVISOR 1" >>confdefs.h + +# PKG_CHECK_MODULES([SC_HYPERVISOR], [libsc_hypervisor], [], build_sc_hypervisor="yes") + STARPU_SC_HYPERVISOR="-lsc_hypervisor" + build_sc_hypervisor="yes" +else + build_sc_hypervisor="no" +fi + + if test "x$build_sc_hypervisor" = "xyes"; then + STARPU_BUILD_SC_HYPERVISOR_TRUE= + STARPU_BUILD_SC_HYPERVISOR_FALSE='#' +else + STARPU_BUILD_SC_HYPERVISOR_TRUE='#' + STARPU_BUILD_SC_HYPERVISOR_FALSE= +fi + + if test "x$build_sc_hypervisor" = "xyes"; then + STARPU_USE_SC_HYPERVISOR_TRUE= + STARPU_USE_SC_HYPERVISOR_FALSE='#' +else + STARPU_USE_SC_HYPERVISOR_TRUE='#' + STARPU_USE_SC_HYPERVISOR_FALSE= +fi + + +# Check whether --enable-sc_hypervisor_debug was given. +if test ${enable_sc_hypervisor_debug+y} +then : + enableval=$enable_sc_hypervisor_debug; enable_sc_hypervisor_debug="yes" +else $as_nop + enable_sc_hypervisor_debug="no" +fi + + + +STARPU_SC_HYPERVISOR_DEBUG=$enable_sc_hypervisor_debug + + if test "x$enable_sc_hypervisor_debug" = "xyes"; then + STARPU_SC_HYPERVISOR_DEBUG_TRUE= + STARPU_SC_HYPERVISOR_DEBUG_FALSE='#' +else + STARPU_SC_HYPERVISOR_DEBUG_TRUE='#' + STARPU_SC_HYPERVISOR_DEBUG_FALSE= +fi + + +if test "x$enable_sc_hypervisor_debug" = "xyes"; then + +printf "%s\n" "#define STARPU_SC_HYPERVISOR_DEBUG 1" >>confdefs.h + +fi + +############################################################################### +# # +# CPUs settings # +# # +############################################################################### + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking maximum number of CPUs" >&5 +printf %s "checking maximum number of CPUs... " >&6; } +# Check whether --enable-maxcpus was given. +if test ${enable_maxcpus+y} +then : + enableval=$enable_maxcpus; maxcpus=$enableval +else $as_nop + maxcpus=auto +fi + +if test x$maxcpus = xauto +then + confcpu=$(getconf _NPROCESSORS_ONLN 2>/dev/null) + if test x$confcpu = x + then + as_fn_error $? "cannot get the number of CPUS, please specify a numerical value with --enable-maxcpus" "$LINENO" 5 + fi + maxcpus=2 + while test $maxcpus -lt $confcpu + do + maxcpus=`expr $maxcpus \* 2` + done +fi +if test x$maxcpus = x -o x$maxcpus = xyes +then + as_fn_error $? "The --enable-maxcpus option needs to be given a number" "$LINENO" 5 +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $maxcpus" >&5 +printf "%s\n" "$maxcpus" >&6; } + +printf "%s\n" "#define STARPU_MAXCPUS $maxcpus" >>confdefs.h + + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether CPUs should be used" >&5 +printf %s "checking whether CPUs should be used... " >&6; } +# Check whether --enable-cpu was given. +if test ${enable_cpu+y} +then : + enableval=$enable_cpu; enable_cpu=$enableval +else $as_nop + enable_cpu=yes +fi + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_cpu" >&5 +printf "%s\n" "$enable_cpu" >&6; } +STARPU_USE_CPU=$enable_cpu + + if test x$enable_cpu = xyes; then + STARPU_USE_CPU_TRUE= + STARPU_USE_CPU_FALSE='#' +else + STARPU_USE_CPU_TRUE='#' + STARPU_USE_CPU_FALSE= +fi + + +if test x$enable_cpu = xyes; then + +printf "%s\n" "#define STARPU_USE_CPU 1" >>confdefs.h + +fi + +############################################################################### +# # +# CUDA settings # +# # +############################################################################### + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking maximum number of CUDA devices" >&5 +printf %s "checking maximum number of CUDA devices... " >&6; } +# Check whether --enable-maxcudadev was given. +if test ${enable_maxcudadev+y} +then : + enableval=$enable_maxcudadev; nmaxcudadev=$enableval +else $as_nop + nmaxcudadev=4 +fi + +if test x$nmaxcudadev = x -o x$nmaxcudadev = xyes +then + as_fn_error $? "The --enable-maxcudadev option needs to be given a number" "$LINENO" 5 +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $nmaxcudadev" >&5 +printf "%s\n" "$nmaxcudadev" >&6; } + +printf "%s\n" "#define STARPU_MAXCUDADEVS $nmaxcudadev" >>confdefs.h + + +# Check whether --enable-cuda was given. +if test ${enable_cuda+y} +then : + enableval=$enable_cuda; +else $as_nop + enable_cuda=maybe +fi + + +# We don't want to be hit by conflicts between simgrid, boost, and CUDA +if test x$enable_simgrid = xyes; then + if test x$enable_cuda = xyes; then + as_fn_error $? "Building against CUDA should not be enabled with simgrid" "$LINENO" 5 + fi + enable_cuda=no +fi + +#AC_MSG_CHECKING(whether CUDA is available) + +# Check whether --with-cuda-dir was given. +if test ${with_cuda_dir+y} +then : + withval=$with_cuda_dir; + cuda_dir="$withval" + # in case this was not explicit yet + enable_cuda=yes + +else $as_nop + cuda_dir=no +fi + + + +# Check whether --with-cuda-include-dir was given. +if test ${with_cuda_include_dir+y} +then : + withval=$with_cuda_include_dir; + cuda_include_dir="$withval" + # in case this was not explicit yet + enable_cuda=yes + +else $as_nop + cuda_include_dir=no +fi + + + +# Check whether --with-cuda-lib-dir was given. +if test ${with_cuda_lib_dir+y} +then : + withval=$with_cuda_lib_dir; + cuda_lib_dir="$withval" + # in case this was not explicit yet + enable_cuda=yes + +else $as_nop + cuda_lib_dir=no +fi + + + + + +if test x$enable_cuda = xyes -o x$enable_cuda = xmaybe; then + + __cuda_dir="$cuda_dir" + __cuda_include_dir="$cuda_include_dir" + __cuda_lib_dir="$cuda_lib_dir" + + if test -z "$__cuda_lib_dir" ; then + __cuda_lib_dir=no + fi + if test -z "$__cuda_include_dir" ; then + __cuda_include_dir=no + fi + if test -z "$__cuda_dir" ; then + __cuda_dir=no + fi + + if test "$__cuda_dir" != "no" ; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether CUDA is available in $__cuda_dir" >&5 +printf %s "checking whether CUDA is available in $__cuda_dir... " >&6; } + else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether CUDA is available" >&5 +printf %s "checking whether CUDA is available... " >&6; } + fi + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: " >&5 +printf "%s\n" "" >&6; } + + if test "$__cuda_include_dir" = "no" -a "$__cuda_dir" != "no" ; then + __cuda_include_dir="$__cuda_dir/include" + fi + + SAVED_CPPFLAGS="$CPPFLAGS" + have_valid_cuda=no + + if test "$__cuda_include_dir" != "no" ; then + CPPFLAGS="${CPPFLAGS} -I$__cuda_include_dir" + fi + + ac_fn_c_check_header_compile "$LINENO" "cuda.h" "ac_cv_header_cuda_h" "$ac_includes_default" +if test "x$ac_cv_header_cuda_h" = xyes +then : + have_valid_cuda=yes +else $as_nop + have_valid_cuda=no +fi + + unset ac_cv_header_cuda_h + + if test "$have_valid_cuda" = "yes" ; then + if test "$__cuda_lib_dir" != "no" ; then + + __cuda_L="-L${__cuda_lib_dir}" + SAVED_LDFLAGS="${LDFLAGS}" + STARPU_CUDA_LDFLAGS="${__cuda_L}" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether CUDA library is available in $__cuda_L" >&5 +printf %s "checking whether CUDA library is available in $__cuda_L... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: " >&5 +printf "%s\n" "" >&6; } + LDFLAGS="${SAVED_LDFLAGS} ${__cuda_L}" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for main in -lcudart" >&5 +printf %s "checking for main in -lcudart... " >&6; } +if test ${ac_cv_lib_cudart_main+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-lcudart $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + +int +main (void) +{ +return main (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_cudart_main=yes +else $as_nop + ac_cv_lib_cudart_main=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_cudart_main" >&5 +printf "%s\n" "$ac_cv_lib_cudart_main" >&6; } +if test "x$ac_cv_lib_cudart_main" = xyes +then : + have_valid_cuda=yes +else $as_nop + have_valid_cuda=no +fi +ac_cv_lib_cudart=ac_cv_lib_cudart_main + + unset ac_cv_lib_cudart_main + if test "$have_valid_cuda" = yes ; then + LDFLAGS="${SAVED_LDFLAGS} ${STARPU_CUDA_LDFLAGS}" + # we also check that CUBLAS is available + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for main in -lcublas" >&5 +printf %s "checking for main in -lcublas... " >&6; } +if test ${ac_cv_lib_cublas_main+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-lcublas $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + +int +main (void) +{ +return main (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_cublas_main=yes +else $as_nop + ac_cv_lib_cublas_main=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_cublas_main" >&5 +printf "%s\n" "$ac_cv_lib_cublas_main" >&6; } +if test "x$ac_cv_lib_cublas_main" = xyes +then : + have_valid_cuda=yes +else $as_nop + have_valid_cuda=no +fi +ac_cv_lib_cublas=ac_cv_lib_cublas_main + + unset ac_cv_lib_cublas_main + fi + LDFLAGS="${SAVED_LDFLAGS}" + + else + if test "$__cuda_dir" != "no" ; then + for __cuda_libdir in lib64 lib lib/x64 lib/Win32 ; do + + __cuda_L="-L${__cuda_dir}/${__cuda_libdir}" + SAVED_LDFLAGS="${LDFLAGS}" + STARPU_CUDA_LDFLAGS="${__cuda_L}" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether CUDA library is available in $__cuda_L" >&5 +printf %s "checking whether CUDA library is available in $__cuda_L... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: " >&5 +printf "%s\n" "" >&6; } + LDFLAGS="${SAVED_LDFLAGS} ${__cuda_L}" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for main in -lcudart" >&5 +printf %s "checking for main in -lcudart... " >&6; } +if test ${ac_cv_lib_cudart_main+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-lcudart $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + +int +main (void) +{ +return main (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_cudart_main=yes +else $as_nop + ac_cv_lib_cudart_main=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_cudart_main" >&5 +printf "%s\n" "$ac_cv_lib_cudart_main" >&6; } +if test "x$ac_cv_lib_cudart_main" = xyes +then : + have_valid_cuda=yes +else $as_nop + have_valid_cuda=no +fi +ac_cv_lib_cudart=ac_cv_lib_cudart_main + + unset ac_cv_lib_cudart_main + if test "$have_valid_cuda" = yes ; then + LDFLAGS="${SAVED_LDFLAGS} ${STARPU_CUDA_LDFLAGS}" + # we also check that CUBLAS is available + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for main in -lcublas" >&5 +printf %s "checking for main in -lcublas... " >&6; } +if test ${ac_cv_lib_cublas_main+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-lcublas $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + +int +main (void) +{ +return main (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_cublas_main=yes +else $as_nop + ac_cv_lib_cublas_main=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_cublas_main" >&5 +printf "%s\n" "$ac_cv_lib_cublas_main" >&6; } +if test "x$ac_cv_lib_cublas_main" = xyes +then : + have_valid_cuda=yes +else $as_nop + have_valid_cuda=no +fi +ac_cv_lib_cublas=ac_cv_lib_cublas_main + + unset ac_cv_lib_cublas_main + fi + LDFLAGS="${SAVED_LDFLAGS}" + + if test "$have_valid_cuda" = yes ; then + break + fi + done + else + + __cuda_L="" + SAVED_LDFLAGS="${LDFLAGS}" + STARPU_CUDA_LDFLAGS="${__cuda_L}" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether CUDA library is available in $__cuda_L" >&5 +printf %s "checking whether CUDA library is available in $__cuda_L... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: " >&5 +printf "%s\n" "" >&6; } + LDFLAGS="${SAVED_LDFLAGS} ${__cuda_L}" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for main in -lcudart" >&5 +printf %s "checking for main in -lcudart... " >&6; } +if test ${ac_cv_lib_cudart_main+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-lcudart $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + +int +main (void) +{ +return main (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_cudart_main=yes +else $as_nop + ac_cv_lib_cudart_main=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_cudart_main" >&5 +printf "%s\n" "$ac_cv_lib_cudart_main" >&6; } +if test "x$ac_cv_lib_cudart_main" = xyes +then : + have_valid_cuda=yes +else $as_nop + have_valid_cuda=no +fi +ac_cv_lib_cudart=ac_cv_lib_cudart_main + + unset ac_cv_lib_cudart_main + if test "$have_valid_cuda" = yes ; then + LDFLAGS="${SAVED_LDFLAGS} ${STARPU_CUDA_LDFLAGS}" + # we also check that CUBLAS is available + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for main in -lcublas" >&5 +printf %s "checking for main in -lcublas... " >&6; } +if test ${ac_cv_lib_cublas_main+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-lcublas $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + +int +main (void) +{ +return main (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_cublas_main=yes +else $as_nop + ac_cv_lib_cublas_main=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_cublas_main" >&5 +printf "%s\n" "$ac_cv_lib_cublas_main" >&6; } +if test "x$ac_cv_lib_cublas_main" = xyes +then : + have_valid_cuda=yes +else $as_nop + have_valid_cuda=no +fi +ac_cv_lib_cublas=ac_cv_lib_cublas_main + + unset ac_cv_lib_cublas_main + fi + LDFLAGS="${SAVED_LDFLAGS}" + + fi + fi + fi + + if test "$have_valid_cuda" = "no" ; then + CPPFLAGS="${SAVED_CPPFLAGS}" + unset STARPU_CUDA_LDFLAGS + else + if test "$NVCC" = "" ; then + # Extract the first word of "nvcc", so it can be a program name with args. +set dummy nvcc; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_path_NVCC+y} +then : + printf %s "(cached) " >&6 +else $as_nop + case $NVCC in + [\\/]* | ?:[\\/]*) + ac_cv_path_NVCC="$NVCC" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +as_dummy="$cuda_dir/bin:$PATH:/usr/local/cuda/bin:/usr/bin:/bin" +for as_dir in $as_dummy +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_path_NVCC="$as_dir$ac_word$ac_exec_ext" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + test -z "$ac_cv_path_NVCC" && ac_cv_path_NVCC="not-found" + ;; +esac +fi +NVCC=$ac_cv_path_NVCC +if test -n "$NVCC"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $NVCC" >&5 +printf "%s\n" "$NVCC" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + + fi + if test "x$NVCC" = "xnot-found"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: 'nvcc' not found, disabling CUDA" >&5 +printf "%s\n" "$as_me: WARNING: 'nvcc' not found, disabling CUDA" >&2;} + have_valid_cuda=no + else + # This is for very old cuda, to enable the use of double etc. + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether nvcc supports sm_13 architecture" >&5 +printf %s "checking whether nvcc supports sm_13 architecture... " >&6; } + OLD_NVCCFLAGS="$NVCCFLAGS" + NVCCFLAGS="$NVCCFLAGS -arch sm_13" + echo "int main(int argc, char **argv) { return 0;}" > cuda_test.cu + $NVCC $NVCCFLAGS -c cuda_test.cu >/dev/null 2>&1 + if test $? -eq 0 + then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + NVCCFLAGS="$OLD_NVCCFLAGS" + fi + + # This is for recent cuda, which complains if we don't actually set an arch!? + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether nvcc supports -Wno-deprecated-gpu-targets" >&5 +printf %s "checking whether nvcc supports -Wno-deprecated-gpu-targets... " >&6; } + OLD_NVCCFLAGS="$NVCCFLAGS" + NVCCFLAGS="$NVCCFLAGS -Wno-deprecated-gpu-targets" + echo "int main(int argc, char **argv) { return 0;}" > cuda_test.cu + $NVCC $NVCCFLAGS -c cuda_test.cu >/dev/null 2>&1 + if test $? -eq 0 + then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + NVCCFLAGS="$OLD_NVCCFLAGS" + fi + + rm -f cuda_test* + fi + + if test -n "$NVCC_CC"; then + NVCCFLAGS="${NVCCFLAGS} -ccbin \${NVCC_CC}" + fi + if test "$__cuda_include_dir" != "no"; then + STARPU_CUDA_CPPFLAGS="-I$__cuda_include_dir" + NVCCFLAGS="${NVCCFLAGS} -I$__cuda_include_dir" + fi + fi + + if test "$have_valid_cuda" = "no" ; then + + __cuda_dir="$CUDA_ROOT" + __cuda_include_dir="$CUDA_INC_PATH" + __cuda_lib_dir="$CUDA_LIB_PATH" + + if test -z "$__cuda_lib_dir" ; then + __cuda_lib_dir=no + fi + if test -z "$__cuda_include_dir" ; then + __cuda_include_dir=no + fi + if test -z "$__cuda_dir" ; then + __cuda_dir=no + fi + + if test "$__cuda_dir" != "no" ; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether CUDA is available in $__cuda_dir" >&5 +printf %s "checking whether CUDA is available in $__cuda_dir... " >&6; } + else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether CUDA is available" >&5 +printf %s "checking whether CUDA is available... " >&6; } + fi + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: " >&5 +printf "%s\n" "" >&6; } + + if test "$__cuda_include_dir" = "no" -a "$__cuda_dir" != "no" ; then + __cuda_include_dir="$__cuda_dir/include" + fi + + SAVED_CPPFLAGS="$CPPFLAGS" + have_valid_cuda=no + + if test "$__cuda_include_dir" != "no" ; then + CPPFLAGS="${CPPFLAGS} -I$__cuda_include_dir" + fi + + ac_fn_c_check_header_compile "$LINENO" "cuda.h" "ac_cv_header_cuda_h" "$ac_includes_default" +if test "x$ac_cv_header_cuda_h" = xyes +then : + have_valid_cuda=yes +else $as_nop + have_valid_cuda=no +fi + + unset ac_cv_header_cuda_h + + if test "$have_valid_cuda" = "yes" ; then + if test "$__cuda_lib_dir" != "no" ; then + + __cuda_L="-L${__cuda_lib_dir}" + SAVED_LDFLAGS="${LDFLAGS}" + STARPU_CUDA_LDFLAGS="${__cuda_L}" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether CUDA library is available in $__cuda_L" >&5 +printf %s "checking whether CUDA library is available in $__cuda_L... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: " >&5 +printf "%s\n" "" >&6; } + LDFLAGS="${SAVED_LDFLAGS} ${__cuda_L}" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for main in -lcudart" >&5 +printf %s "checking for main in -lcudart... " >&6; } +if test ${ac_cv_lib_cudart_main+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-lcudart $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + +int +main (void) +{ +return main (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_cudart_main=yes +else $as_nop + ac_cv_lib_cudart_main=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_cudart_main" >&5 +printf "%s\n" "$ac_cv_lib_cudart_main" >&6; } +if test "x$ac_cv_lib_cudart_main" = xyes +then : + have_valid_cuda=yes +else $as_nop + have_valid_cuda=no +fi +ac_cv_lib_cudart=ac_cv_lib_cudart_main + + unset ac_cv_lib_cudart_main + if test "$have_valid_cuda" = yes ; then + LDFLAGS="${SAVED_LDFLAGS} ${STARPU_CUDA_LDFLAGS}" + # we also check that CUBLAS is available + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for main in -lcublas" >&5 +printf %s "checking for main in -lcublas... " >&6; } +if test ${ac_cv_lib_cublas_main+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-lcublas $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + +int +main (void) +{ +return main (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_cublas_main=yes +else $as_nop + ac_cv_lib_cublas_main=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_cublas_main" >&5 +printf "%s\n" "$ac_cv_lib_cublas_main" >&6; } +if test "x$ac_cv_lib_cublas_main" = xyes +then : + have_valid_cuda=yes +else $as_nop + have_valid_cuda=no +fi +ac_cv_lib_cublas=ac_cv_lib_cublas_main + + unset ac_cv_lib_cublas_main + fi + LDFLAGS="${SAVED_LDFLAGS}" + + else + if test "$__cuda_dir" != "no" ; then + for __cuda_libdir in lib64 lib lib/x64 lib/Win32 ; do + + __cuda_L="-L${__cuda_dir}/${__cuda_libdir}" + SAVED_LDFLAGS="${LDFLAGS}" + STARPU_CUDA_LDFLAGS="${__cuda_L}" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether CUDA library is available in $__cuda_L" >&5 +printf %s "checking whether CUDA library is available in $__cuda_L... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: " >&5 +printf "%s\n" "" >&6; } + LDFLAGS="${SAVED_LDFLAGS} ${__cuda_L}" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for main in -lcudart" >&5 +printf %s "checking for main in -lcudart... " >&6; } +if test ${ac_cv_lib_cudart_main+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-lcudart $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + +int +main (void) +{ +return main (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_cudart_main=yes +else $as_nop + ac_cv_lib_cudart_main=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_cudart_main" >&5 +printf "%s\n" "$ac_cv_lib_cudart_main" >&6; } +if test "x$ac_cv_lib_cudart_main" = xyes +then : + have_valid_cuda=yes +else $as_nop + have_valid_cuda=no +fi +ac_cv_lib_cudart=ac_cv_lib_cudart_main + + unset ac_cv_lib_cudart_main + if test "$have_valid_cuda" = yes ; then + LDFLAGS="${SAVED_LDFLAGS} ${STARPU_CUDA_LDFLAGS}" + # we also check that CUBLAS is available + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for main in -lcublas" >&5 +printf %s "checking for main in -lcublas... " >&6; } +if test ${ac_cv_lib_cublas_main+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-lcublas $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + +int +main (void) +{ +return main (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_cublas_main=yes +else $as_nop + ac_cv_lib_cublas_main=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_cublas_main" >&5 +printf "%s\n" "$ac_cv_lib_cublas_main" >&6; } +if test "x$ac_cv_lib_cublas_main" = xyes +then : + have_valid_cuda=yes +else $as_nop + have_valid_cuda=no +fi +ac_cv_lib_cublas=ac_cv_lib_cublas_main + + unset ac_cv_lib_cublas_main + fi + LDFLAGS="${SAVED_LDFLAGS}" + + if test "$have_valid_cuda" = yes ; then + break + fi + done + else + + __cuda_L="" + SAVED_LDFLAGS="${LDFLAGS}" + STARPU_CUDA_LDFLAGS="${__cuda_L}" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether CUDA library is available in $__cuda_L" >&5 +printf %s "checking whether CUDA library is available in $__cuda_L... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: " >&5 +printf "%s\n" "" >&6; } + LDFLAGS="${SAVED_LDFLAGS} ${__cuda_L}" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for main in -lcudart" >&5 +printf %s "checking for main in -lcudart... " >&6; } +if test ${ac_cv_lib_cudart_main+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-lcudart $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + +int +main (void) +{ +return main (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_cudart_main=yes +else $as_nop + ac_cv_lib_cudart_main=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_cudart_main" >&5 +printf "%s\n" "$ac_cv_lib_cudart_main" >&6; } +if test "x$ac_cv_lib_cudart_main" = xyes +then : + have_valid_cuda=yes +else $as_nop + have_valid_cuda=no +fi +ac_cv_lib_cudart=ac_cv_lib_cudart_main + + unset ac_cv_lib_cudart_main + if test "$have_valid_cuda" = yes ; then + LDFLAGS="${SAVED_LDFLAGS} ${STARPU_CUDA_LDFLAGS}" + # we also check that CUBLAS is available + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for main in -lcublas" >&5 +printf %s "checking for main in -lcublas... " >&6; } +if test ${ac_cv_lib_cublas_main+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-lcublas $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + +int +main (void) +{ +return main (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_cublas_main=yes +else $as_nop + ac_cv_lib_cublas_main=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_cublas_main" >&5 +printf "%s\n" "$ac_cv_lib_cublas_main" >&6; } +if test "x$ac_cv_lib_cublas_main" = xyes +then : + have_valid_cuda=yes +else $as_nop + have_valid_cuda=no +fi +ac_cv_lib_cublas=ac_cv_lib_cublas_main + + unset ac_cv_lib_cublas_main + fi + LDFLAGS="${SAVED_LDFLAGS}" + + fi + fi + fi + + if test "$have_valid_cuda" = "no" ; then + CPPFLAGS="${SAVED_CPPFLAGS}" + unset STARPU_CUDA_LDFLAGS + else + if test "$NVCC" = "" ; then + # Extract the first word of "nvcc", so it can be a program name with args. +set dummy nvcc; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_path_NVCC+y} +then : + printf %s "(cached) " >&6 +else $as_nop + case $NVCC in + [\\/]* | ?:[\\/]*) + ac_cv_path_NVCC="$NVCC" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +as_dummy="$cuda_dir/bin:$PATH:/usr/local/cuda/bin:/usr/bin:/bin" +for as_dir in $as_dummy +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_path_NVCC="$as_dir$ac_word$ac_exec_ext" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + test -z "$ac_cv_path_NVCC" && ac_cv_path_NVCC="not-found" + ;; +esac +fi +NVCC=$ac_cv_path_NVCC +if test -n "$NVCC"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $NVCC" >&5 +printf "%s\n" "$NVCC" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + + fi + if test "x$NVCC" = "xnot-found"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: 'nvcc' not found, disabling CUDA" >&5 +printf "%s\n" "$as_me: WARNING: 'nvcc' not found, disabling CUDA" >&2;} + have_valid_cuda=no + else + # This is for very old cuda, to enable the use of double etc. + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether nvcc supports sm_13 architecture" >&5 +printf %s "checking whether nvcc supports sm_13 architecture... " >&6; } + OLD_NVCCFLAGS="$NVCCFLAGS" + NVCCFLAGS="$NVCCFLAGS -arch sm_13" + echo "int main(int argc, char **argv) { return 0;}" > cuda_test.cu + $NVCC $NVCCFLAGS -c cuda_test.cu >/dev/null 2>&1 + if test $? -eq 0 + then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + NVCCFLAGS="$OLD_NVCCFLAGS" + fi + + # This is for recent cuda, which complains if we don't actually set an arch!? + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether nvcc supports -Wno-deprecated-gpu-targets" >&5 +printf %s "checking whether nvcc supports -Wno-deprecated-gpu-targets... " >&6; } + OLD_NVCCFLAGS="$NVCCFLAGS" + NVCCFLAGS="$NVCCFLAGS -Wno-deprecated-gpu-targets" + echo "int main(int argc, char **argv) { return 0;}" > cuda_test.cu + $NVCC $NVCCFLAGS -c cuda_test.cu >/dev/null 2>&1 + if test $? -eq 0 + then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + NVCCFLAGS="$OLD_NVCCFLAGS" + fi + + rm -f cuda_test* + fi + + if test -n "$NVCC_CC"; then + NVCCFLAGS="${NVCCFLAGS} -ccbin \${NVCC_CC}" + fi + if test "$__cuda_include_dir" != "no"; then + STARPU_CUDA_CPPFLAGS="-I$__cuda_include_dir" + NVCCFLAGS="${NVCCFLAGS} -I$__cuda_include_dir" + fi + fi + + fi + if test "$have_valid_cuda" = "no" ; then + if test "$NVCC" = "" ; then + # Extract the first word of "nvcc", so it can be a program name with args. +set dummy nvcc; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_path_NVCC+y} +then : + printf %s "(cached) " >&6 +else $as_nop + case $NVCC in + [\\/]* | ?:[\\/]*) + ac_cv_path_NVCC="$NVCC" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +as_dummy="$PATH:/usr/local/cuda/bin" +for as_dir in $as_dummy +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_path_NVCC="$as_dir$ac_word$ac_exec_ext" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + test -z "$ac_cv_path_NVCC" && ac_cv_path_NVCC="not-found" + ;; +esac +fi +NVCC=$ac_cv_path_NVCC +if test -n "$NVCC"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $NVCC" >&5 +printf "%s\n" "$NVCC" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + + fi + if test "$NVCC" != not-found ; then + CUDA_ROOT="$(dirname $NVCC)/.." + # Try to find all of cuda just from the availability of nvcc in PATH + + __cuda_dir="$CUDA_ROOT" + __cuda_include_dir="$CUDA_ROOT/include" + __cuda_lib_dir="$CUDA_ROOT/lib" + + if test -z "$__cuda_lib_dir" ; then + __cuda_lib_dir=no + fi + if test -z "$__cuda_include_dir" ; then + __cuda_include_dir=no + fi + if test -z "$__cuda_dir" ; then + __cuda_dir=no + fi + + if test "$__cuda_dir" != "no" ; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether CUDA is available in $__cuda_dir" >&5 +printf %s "checking whether CUDA is available in $__cuda_dir... " >&6; } + else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether CUDA is available" >&5 +printf %s "checking whether CUDA is available... " >&6; } + fi + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: " >&5 +printf "%s\n" "" >&6; } + + if test "$__cuda_include_dir" = "no" -a "$__cuda_dir" != "no" ; then + __cuda_include_dir="$__cuda_dir/include" + fi + + SAVED_CPPFLAGS="$CPPFLAGS" + have_valid_cuda=no + + if test "$__cuda_include_dir" != "no" ; then + CPPFLAGS="${CPPFLAGS} -I$__cuda_include_dir" + fi + + ac_fn_c_check_header_compile "$LINENO" "cuda.h" "ac_cv_header_cuda_h" "$ac_includes_default" +if test "x$ac_cv_header_cuda_h" = xyes +then : + have_valid_cuda=yes +else $as_nop + have_valid_cuda=no +fi + + unset ac_cv_header_cuda_h + + if test "$have_valid_cuda" = "yes" ; then + if test "$__cuda_lib_dir" != "no" ; then + + __cuda_L="-L${__cuda_lib_dir}" + SAVED_LDFLAGS="${LDFLAGS}" + STARPU_CUDA_LDFLAGS="${__cuda_L}" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether CUDA library is available in $__cuda_L" >&5 +printf %s "checking whether CUDA library is available in $__cuda_L... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: " >&5 +printf "%s\n" "" >&6; } + LDFLAGS="${SAVED_LDFLAGS} ${__cuda_L}" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for main in -lcudart" >&5 +printf %s "checking for main in -lcudart... " >&6; } +if test ${ac_cv_lib_cudart_main+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-lcudart $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + +int +main (void) +{ +return main (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_cudart_main=yes +else $as_nop + ac_cv_lib_cudart_main=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_cudart_main" >&5 +printf "%s\n" "$ac_cv_lib_cudart_main" >&6; } +if test "x$ac_cv_lib_cudart_main" = xyes +then : + have_valid_cuda=yes +else $as_nop + have_valid_cuda=no +fi +ac_cv_lib_cudart=ac_cv_lib_cudart_main + + unset ac_cv_lib_cudart_main + if test "$have_valid_cuda" = yes ; then + LDFLAGS="${SAVED_LDFLAGS} ${STARPU_CUDA_LDFLAGS}" + # we also check that CUBLAS is available + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for main in -lcublas" >&5 +printf %s "checking for main in -lcublas... " >&6; } +if test ${ac_cv_lib_cublas_main+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-lcublas $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + +int +main (void) +{ +return main (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_cublas_main=yes +else $as_nop + ac_cv_lib_cublas_main=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_cublas_main" >&5 +printf "%s\n" "$ac_cv_lib_cublas_main" >&6; } +if test "x$ac_cv_lib_cublas_main" = xyes +then : + have_valid_cuda=yes +else $as_nop + have_valid_cuda=no +fi +ac_cv_lib_cublas=ac_cv_lib_cublas_main + + unset ac_cv_lib_cublas_main + fi + LDFLAGS="${SAVED_LDFLAGS}" + + else + if test "$__cuda_dir" != "no" ; then + for __cuda_libdir in lib64 lib lib/x64 lib/Win32 ; do + + __cuda_L="-L${__cuda_dir}/${__cuda_libdir}" + SAVED_LDFLAGS="${LDFLAGS}" + STARPU_CUDA_LDFLAGS="${__cuda_L}" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether CUDA library is available in $__cuda_L" >&5 +printf %s "checking whether CUDA library is available in $__cuda_L... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: " >&5 +printf "%s\n" "" >&6; } + LDFLAGS="${SAVED_LDFLAGS} ${__cuda_L}" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for main in -lcudart" >&5 +printf %s "checking for main in -lcudart... " >&6; } +if test ${ac_cv_lib_cudart_main+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-lcudart $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + +int +main (void) +{ +return main (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_cudart_main=yes +else $as_nop + ac_cv_lib_cudart_main=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_cudart_main" >&5 +printf "%s\n" "$ac_cv_lib_cudart_main" >&6; } +if test "x$ac_cv_lib_cudart_main" = xyes +then : + have_valid_cuda=yes +else $as_nop + have_valid_cuda=no +fi +ac_cv_lib_cudart=ac_cv_lib_cudart_main + + unset ac_cv_lib_cudart_main + if test "$have_valid_cuda" = yes ; then + LDFLAGS="${SAVED_LDFLAGS} ${STARPU_CUDA_LDFLAGS}" + # we also check that CUBLAS is available + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for main in -lcublas" >&5 +printf %s "checking for main in -lcublas... " >&6; } +if test ${ac_cv_lib_cublas_main+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-lcublas $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + +int +main (void) +{ +return main (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_cublas_main=yes +else $as_nop + ac_cv_lib_cublas_main=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_cublas_main" >&5 +printf "%s\n" "$ac_cv_lib_cublas_main" >&6; } +if test "x$ac_cv_lib_cublas_main" = xyes +then : + have_valid_cuda=yes +else $as_nop + have_valid_cuda=no +fi +ac_cv_lib_cublas=ac_cv_lib_cublas_main + + unset ac_cv_lib_cublas_main + fi + LDFLAGS="${SAVED_LDFLAGS}" + + if test "$have_valid_cuda" = yes ; then + break + fi + done + else + + __cuda_L="" + SAVED_LDFLAGS="${LDFLAGS}" + STARPU_CUDA_LDFLAGS="${__cuda_L}" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether CUDA library is available in $__cuda_L" >&5 +printf %s "checking whether CUDA library is available in $__cuda_L... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: " >&5 +printf "%s\n" "" >&6; } + LDFLAGS="${SAVED_LDFLAGS} ${__cuda_L}" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for main in -lcudart" >&5 +printf %s "checking for main in -lcudart... " >&6; } +if test ${ac_cv_lib_cudart_main+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-lcudart $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + +int +main (void) +{ +return main (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_cudart_main=yes +else $as_nop + ac_cv_lib_cudart_main=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_cudart_main" >&5 +printf "%s\n" "$ac_cv_lib_cudart_main" >&6; } +if test "x$ac_cv_lib_cudart_main" = xyes +then : + have_valid_cuda=yes +else $as_nop + have_valid_cuda=no +fi +ac_cv_lib_cudart=ac_cv_lib_cudart_main + + unset ac_cv_lib_cudart_main + if test "$have_valid_cuda" = yes ; then + LDFLAGS="${SAVED_LDFLAGS} ${STARPU_CUDA_LDFLAGS}" + # we also check that CUBLAS is available + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for main in -lcublas" >&5 +printf %s "checking for main in -lcublas... " >&6; } +if test ${ac_cv_lib_cublas_main+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-lcublas $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + +int +main (void) +{ +return main (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_cublas_main=yes +else $as_nop + ac_cv_lib_cublas_main=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_cublas_main" >&5 +printf "%s\n" "$ac_cv_lib_cublas_main" >&6; } +if test "x$ac_cv_lib_cublas_main" = xyes +then : + have_valid_cuda=yes +else $as_nop + have_valid_cuda=no +fi +ac_cv_lib_cublas=ac_cv_lib_cublas_main + + unset ac_cv_lib_cublas_main + fi + LDFLAGS="${SAVED_LDFLAGS}" + + fi + fi + fi + + if test "$have_valid_cuda" = "no" ; then + CPPFLAGS="${SAVED_CPPFLAGS}" + unset STARPU_CUDA_LDFLAGS + else + if test "$NVCC" = "" ; then + # Extract the first word of "nvcc", so it can be a program name with args. +set dummy nvcc; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_path_NVCC+y} +then : + printf %s "(cached) " >&6 +else $as_nop + case $NVCC in + [\\/]* | ?:[\\/]*) + ac_cv_path_NVCC="$NVCC" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +as_dummy="$cuda_dir/bin:$PATH:/usr/local/cuda/bin:/usr/bin:/bin" +for as_dir in $as_dummy +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_path_NVCC="$as_dir$ac_word$ac_exec_ext" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + test -z "$ac_cv_path_NVCC" && ac_cv_path_NVCC="not-found" + ;; +esac +fi +NVCC=$ac_cv_path_NVCC +if test -n "$NVCC"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $NVCC" >&5 +printf "%s\n" "$NVCC" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + + fi + if test "x$NVCC" = "xnot-found"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: 'nvcc' not found, disabling CUDA" >&5 +printf "%s\n" "$as_me: WARNING: 'nvcc' not found, disabling CUDA" >&2;} + have_valid_cuda=no + else + # This is for very old cuda, to enable the use of double etc. + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether nvcc supports sm_13 architecture" >&5 +printf %s "checking whether nvcc supports sm_13 architecture... " >&6; } + OLD_NVCCFLAGS="$NVCCFLAGS" + NVCCFLAGS="$NVCCFLAGS -arch sm_13" + echo "int main(int argc, char **argv) { return 0;}" > cuda_test.cu + $NVCC $NVCCFLAGS -c cuda_test.cu >/dev/null 2>&1 + if test $? -eq 0 + then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + NVCCFLAGS="$OLD_NVCCFLAGS" + fi + + # This is for recent cuda, which complains if we don't actually set an arch!? + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether nvcc supports -Wno-deprecated-gpu-targets" >&5 +printf %s "checking whether nvcc supports -Wno-deprecated-gpu-targets... " >&6; } + OLD_NVCCFLAGS="$NVCCFLAGS" + NVCCFLAGS="$NVCCFLAGS -Wno-deprecated-gpu-targets" + echo "int main(int argc, char **argv) { return 0;}" > cuda_test.cu + $NVCC $NVCCFLAGS -c cuda_test.cu >/dev/null 2>&1 + if test $? -eq 0 + then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + NVCCFLAGS="$OLD_NVCCFLAGS" + fi + + rm -f cuda_test* + fi + + if test -n "$NVCC_CC"; then + NVCCFLAGS="${NVCCFLAGS} -ccbin \${NVCC_CC}" + fi + if test "$__cuda_include_dir" != "no"; then + STARPU_CUDA_CPPFLAGS="-I$__cuda_include_dir" + NVCCFLAGS="${NVCCFLAGS} -I$__cuda_include_dir" + fi + fi + + cuda_dir=$(dirname $NVCC)/.. + else + unset NVCC + fi + fi + + if test "$have_valid_cuda" = "no" ; then + for f in "/usr/local/cuda" "/c/cuda" "/cygdrive/c/cuda" "/opt/cuda" "$CUDA_ROOT" "$CUDA_PATH" "$CUDA_INC_PATH/.." "$CUDA_INC/.." "$CUDA_BIN/.." "$CUDA_SDK/.." "$CUDA_INSTALL_PATH" "$CUDA_TOOLKIT"; do + if test -n "$f" ; then + + __cuda_dir="$f" + __cuda_include_dir="no" + __cuda_lib_dir="no" + + if test -z "$__cuda_lib_dir" ; then + __cuda_lib_dir=no + fi + if test -z "$__cuda_include_dir" ; then + __cuda_include_dir=no + fi + if test -z "$__cuda_dir" ; then + __cuda_dir=no + fi + + if test "$__cuda_dir" != "no" ; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether CUDA is available in $__cuda_dir" >&5 +printf %s "checking whether CUDA is available in $__cuda_dir... " >&6; } + else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether CUDA is available" >&5 +printf %s "checking whether CUDA is available... " >&6; } + fi + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: " >&5 +printf "%s\n" "" >&6; } + + if test "$__cuda_include_dir" = "no" -a "$__cuda_dir" != "no" ; then + __cuda_include_dir="$__cuda_dir/include" + fi + + SAVED_CPPFLAGS="$CPPFLAGS" + have_valid_cuda=no + + if test "$__cuda_include_dir" != "no" ; then + CPPFLAGS="${CPPFLAGS} -I$__cuda_include_dir" + fi + + ac_fn_c_check_header_compile "$LINENO" "cuda.h" "ac_cv_header_cuda_h" "$ac_includes_default" +if test "x$ac_cv_header_cuda_h" = xyes +then : + have_valid_cuda=yes +else $as_nop + have_valid_cuda=no +fi + + unset ac_cv_header_cuda_h + + if test "$have_valid_cuda" = "yes" ; then + if test "$__cuda_lib_dir" != "no" ; then + + __cuda_L="-L${__cuda_lib_dir}" + SAVED_LDFLAGS="${LDFLAGS}" + STARPU_CUDA_LDFLAGS="${__cuda_L}" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether CUDA library is available in $__cuda_L" >&5 +printf %s "checking whether CUDA library is available in $__cuda_L... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: " >&5 +printf "%s\n" "" >&6; } + LDFLAGS="${SAVED_LDFLAGS} ${__cuda_L}" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for main in -lcudart" >&5 +printf %s "checking for main in -lcudart... " >&6; } +if test ${ac_cv_lib_cudart_main+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-lcudart $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + +int +main (void) +{ +return main (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_cudart_main=yes +else $as_nop + ac_cv_lib_cudart_main=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_cudart_main" >&5 +printf "%s\n" "$ac_cv_lib_cudart_main" >&6; } +if test "x$ac_cv_lib_cudart_main" = xyes +then : + have_valid_cuda=yes +else $as_nop + have_valid_cuda=no +fi +ac_cv_lib_cudart=ac_cv_lib_cudart_main + + unset ac_cv_lib_cudart_main + if test "$have_valid_cuda" = yes ; then + LDFLAGS="${SAVED_LDFLAGS} ${STARPU_CUDA_LDFLAGS}" + # we also check that CUBLAS is available + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for main in -lcublas" >&5 +printf %s "checking for main in -lcublas... " >&6; } +if test ${ac_cv_lib_cublas_main+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-lcublas $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + +int +main (void) +{ +return main (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_cublas_main=yes +else $as_nop + ac_cv_lib_cublas_main=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_cublas_main" >&5 +printf "%s\n" "$ac_cv_lib_cublas_main" >&6; } +if test "x$ac_cv_lib_cublas_main" = xyes +then : + have_valid_cuda=yes +else $as_nop + have_valid_cuda=no +fi +ac_cv_lib_cublas=ac_cv_lib_cublas_main + + unset ac_cv_lib_cublas_main + fi + LDFLAGS="${SAVED_LDFLAGS}" + + else + if test "$__cuda_dir" != "no" ; then + for __cuda_libdir in lib64 lib lib/x64 lib/Win32 ; do + + __cuda_L="-L${__cuda_dir}/${__cuda_libdir}" + SAVED_LDFLAGS="${LDFLAGS}" + STARPU_CUDA_LDFLAGS="${__cuda_L}" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether CUDA library is available in $__cuda_L" >&5 +printf %s "checking whether CUDA library is available in $__cuda_L... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: " >&5 +printf "%s\n" "" >&6; } + LDFLAGS="${SAVED_LDFLAGS} ${__cuda_L}" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for main in -lcudart" >&5 +printf %s "checking for main in -lcudart... " >&6; } +if test ${ac_cv_lib_cudart_main+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-lcudart $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + +int +main (void) +{ +return main (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_cudart_main=yes +else $as_nop + ac_cv_lib_cudart_main=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_cudart_main" >&5 +printf "%s\n" "$ac_cv_lib_cudart_main" >&6; } +if test "x$ac_cv_lib_cudart_main" = xyes +then : + have_valid_cuda=yes +else $as_nop + have_valid_cuda=no +fi +ac_cv_lib_cudart=ac_cv_lib_cudart_main + + unset ac_cv_lib_cudart_main + if test "$have_valid_cuda" = yes ; then + LDFLAGS="${SAVED_LDFLAGS} ${STARPU_CUDA_LDFLAGS}" + # we also check that CUBLAS is available + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for main in -lcublas" >&5 +printf %s "checking for main in -lcublas... " >&6; } +if test ${ac_cv_lib_cublas_main+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-lcublas $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + +int +main (void) +{ +return main (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_cublas_main=yes +else $as_nop + ac_cv_lib_cublas_main=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_cublas_main" >&5 +printf "%s\n" "$ac_cv_lib_cublas_main" >&6; } +if test "x$ac_cv_lib_cublas_main" = xyes +then : + have_valid_cuda=yes +else $as_nop + have_valid_cuda=no +fi +ac_cv_lib_cublas=ac_cv_lib_cublas_main + + unset ac_cv_lib_cublas_main + fi + LDFLAGS="${SAVED_LDFLAGS}" + + if test "$have_valid_cuda" = yes ; then + break + fi + done + else + + __cuda_L="" + SAVED_LDFLAGS="${LDFLAGS}" + STARPU_CUDA_LDFLAGS="${__cuda_L}" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether CUDA library is available in $__cuda_L" >&5 +printf %s "checking whether CUDA library is available in $__cuda_L... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: " >&5 +printf "%s\n" "" >&6; } + LDFLAGS="${SAVED_LDFLAGS} ${__cuda_L}" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for main in -lcudart" >&5 +printf %s "checking for main in -lcudart... " >&6; } +if test ${ac_cv_lib_cudart_main+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-lcudart $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + +int +main (void) +{ +return main (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_cudart_main=yes +else $as_nop + ac_cv_lib_cudart_main=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_cudart_main" >&5 +printf "%s\n" "$ac_cv_lib_cudart_main" >&6; } +if test "x$ac_cv_lib_cudart_main" = xyes +then : + have_valid_cuda=yes +else $as_nop + have_valid_cuda=no +fi +ac_cv_lib_cudart=ac_cv_lib_cudart_main + + unset ac_cv_lib_cudart_main + if test "$have_valid_cuda" = yes ; then + LDFLAGS="${SAVED_LDFLAGS} ${STARPU_CUDA_LDFLAGS}" + # we also check that CUBLAS is available + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for main in -lcublas" >&5 +printf %s "checking for main in -lcublas... " >&6; } +if test ${ac_cv_lib_cublas_main+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-lcublas $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + +int +main (void) +{ +return main (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_cublas_main=yes +else $as_nop + ac_cv_lib_cublas_main=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_cublas_main" >&5 +printf "%s\n" "$ac_cv_lib_cublas_main" >&6; } +if test "x$ac_cv_lib_cublas_main" = xyes +then : + have_valid_cuda=yes +else $as_nop + have_valid_cuda=no +fi +ac_cv_lib_cublas=ac_cv_lib_cublas_main + + unset ac_cv_lib_cublas_main + fi + LDFLAGS="${SAVED_LDFLAGS}" + + fi + fi + fi + + if test "$have_valid_cuda" = "no" ; then + CPPFLAGS="${SAVED_CPPFLAGS}" + unset STARPU_CUDA_LDFLAGS + else + if test "$NVCC" = "" ; then + # Extract the first word of "nvcc", so it can be a program name with args. +set dummy nvcc; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_path_NVCC+y} +then : + printf %s "(cached) " >&6 +else $as_nop + case $NVCC in + [\\/]* | ?:[\\/]*) + ac_cv_path_NVCC="$NVCC" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +as_dummy="$cuda_dir/bin:$PATH:/usr/local/cuda/bin:/usr/bin:/bin" +for as_dir in $as_dummy +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_path_NVCC="$as_dir$ac_word$ac_exec_ext" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + test -z "$ac_cv_path_NVCC" && ac_cv_path_NVCC="not-found" + ;; +esac +fi +NVCC=$ac_cv_path_NVCC +if test -n "$NVCC"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $NVCC" >&5 +printf "%s\n" "$NVCC" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + + fi + if test "x$NVCC" = "xnot-found"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: 'nvcc' not found, disabling CUDA" >&5 +printf "%s\n" "$as_me: WARNING: 'nvcc' not found, disabling CUDA" >&2;} + have_valid_cuda=no + else + # This is for very old cuda, to enable the use of double etc. + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether nvcc supports sm_13 architecture" >&5 +printf %s "checking whether nvcc supports sm_13 architecture... " >&6; } + OLD_NVCCFLAGS="$NVCCFLAGS" + NVCCFLAGS="$NVCCFLAGS -arch sm_13" + echo "int main(int argc, char **argv) { return 0;}" > cuda_test.cu + $NVCC $NVCCFLAGS -c cuda_test.cu >/dev/null 2>&1 + if test $? -eq 0 + then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + NVCCFLAGS="$OLD_NVCCFLAGS" + fi + + # This is for recent cuda, which complains if we don't actually set an arch!? + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether nvcc supports -Wno-deprecated-gpu-targets" >&5 +printf %s "checking whether nvcc supports -Wno-deprecated-gpu-targets... " >&6; } + OLD_NVCCFLAGS="$NVCCFLAGS" + NVCCFLAGS="$NVCCFLAGS -Wno-deprecated-gpu-targets" + echo "int main(int argc, char **argv) { return 0;}" > cuda_test.cu + $NVCC $NVCCFLAGS -c cuda_test.cu >/dev/null 2>&1 + if test $? -eq 0 + then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + NVCCFLAGS="$OLD_NVCCFLAGS" + fi + + rm -f cuda_test* + fi + + if test -n "$NVCC_CC"; then + NVCCFLAGS="${NVCCFLAGS} -ccbin \${NVCC_CC}" + fi + if test "$__cuda_include_dir" != "no"; then + STARPU_CUDA_CPPFLAGS="-I$__cuda_include_dir" + NVCCFLAGS="${NVCCFLAGS} -I$__cuda_include_dir" + fi + fi + + if test "$have_valid_cuda" = "yes" ; then + break + fi + fi + done + fi + + # Check cuda is compatible with the C compiler + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether CUDA is working" >&5 +printf %s "checking whether CUDA is working... " >&6; } + if test "$have_valid_cuda" = "yes" ; then + SAVED_CPPFLAGS="${CPPFLAGS}" + CPPFLAGS="${CPPFLAGS} ${STARPU_CUDA_CPPFLAGS}" + SAVED_LDFLAGS="${LDFLAGS}" + LDFLAGS="${LDFLAGS} ${STARPU_CUDA_LDFLAGS} -lcudart" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +int +main (void) +{ + + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + + if test "$cross_compiling" = yes +then : + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "cannot run test program while cross compiling +See \`config.log' for more details" "$LINENO" 5; } +else $as_nop + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +int +main (void) +{ + + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_run "$LINENO" +then : + have_valid_cuda="yes" +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: CUDA found and can be compiled, but compiled application can not be run, is the CUDA path missing in LD_LIBRARY_PATH?" >&5 +printf "%s\n" "CUDA found and can be compiled, but compiled application can not be run, is the CUDA path missing in LD_LIBRARY_PATH?" >&6; } + have_valid_cuda="no" + +fi +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ + conftest.$ac_objext conftest.beam conftest.$ac_ext +fi + + +else $as_nop + + as_fn_error $? "CUDA found, but cuda.h could not be compiled" "$LINENO" 5 + have_valid_cuda="no" + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CPPFLAGS="${SAVED_CPPFLAGS}" + LDFLAGS="${SAVED_LDFLAGS}" + fi + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $have_valid_cuda" >&5 +printf "%s\n" "$have_valid_cuda" >&6; } + + # in case CUDA was explicitly required, but is not available, this is an error + if test x$enable_cuda = xyes -a x$have_valid_cuda = xno; then + as_fn_error $? "cannot find CUDA" "$LINENO" 5 + fi + # now we enable CUDA if and only if a proper setup is available + enable_cuda=$have_valid_cuda +fi + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether CUDA should be used" >&5 +printf %s "checking whether CUDA should be used... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_cuda" >&5 +printf "%s\n" "$enable_cuda" >&6; } +STARPU_USE_CUDA=$enable_cuda + + if test x$enable_cuda = xyes; then + STARPU_USE_CUDA_TRUE= + STARPU_USE_CUDA_FALSE='#' +else + STARPU_USE_CUDA_TRUE='#' + STARPU_USE_CUDA_FALSE= +fi + +cc_or_nvcc=$CC +if test x$enable_cuda = xyes; then + cc_or_nvcc=$NVCC + +printf "%s\n" "#define STARPU_USE_CUDA 1" >>confdefs.h + + + # On Darwin, the libstdc++ dependency is not automatically added by nvcc +# case "$target" in +# *-*darwin*) AC_HAVE_LIBRARY([stdc++], []) ;; +# #*-*darwin*) AC_HAVE_LIBRARY([stdc++], [STARPU_CUDA_LDFLAGS="$STARPU_CUDA_LDFLAGS -lstdc++"]) ;; +# esac + STARPU_CUDA_LDFLAGS="$STARPU_CUDA_LDFLAGS -lcudart -lcublas" + STARPU_CUFFT_LDFLAGS="-lcufft" + + ac_ext=cpp +ac_cpp='$CXXCPP $CPPFLAGS' +ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_cxx_compiler_gnu + + case \ $NVCCFLAGS\ in + *\ -std=*\ *) ;; + *) + SAVED_CXX="$CXX" + CXX="$NVCC" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + #ifdef STARPU_HAVE_SIMGRID_MSG_H + #include + #include + #else + #include + #endif + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_compile "$LINENO" +then : + +else $as_nop + NVCCFLAGS="-std=c++11 $NVCCFLAGS" +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CXX="$SAVED_CXX" + esac + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + if test "$F77" = "gfortran" -o "$FC" = "gfortran" ; then + STARPU_CUDA_FORTRAN_LDFLAGS="-lgfortran" + + fi + + #in case this is a 64bit setup, we tell nvcc to use a -m64 flag, if missing from existing flags + if test x$SIZEOF_VOID_P = x8; then + case \ $NVCCFLAGS\ in + *\ -m64\ *) ;; + *) NVCCFLAGS="${NVCCFLAGS} -m64" ;; + esac + fi + + SAVED_CPPFLAGS="${CPPFLAGS}" + CPPFLAGS="${CPPFLAGS} ${STARPU_CUDA_CPPFLAGS}" + SAVED_LDFLAGS="${LDFLAGS}" + LDFLAGS="${LDFLAGS} ${STARPU_CUDA_LDFLAGS}" + SAVED_LIBS="${LIBS}" + ac_fn_c_check_header_compile "$LINENO" "cuda_gl_interop.h" "ac_cv_header_cuda_gl_interop_h" "$ac_includes_default" +if test "x$ac_cv_header_cuda_gl_interop_h" = xyes +then : + printf "%s\n" "#define HAVE_CUDA_GL_INTEROP_H 1" >>confdefs.h + +fi + + + for ac_header in cublasLt.h +do : + ac_fn_c_check_header_compile "$LINENO" "cublasLt.h" "ac_cv_header_cublasLt_h" "$ac_includes_default" +if test "x$ac_cv_header_cublasLt_h" = xyes +then : + printf "%s\n" "#define HAVE_CUBLASLT_H 1" >>confdefs.h + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for cublasLtCreate in -lcublasLt" >&5 +printf %s "checking for cublasLtCreate in -lcublasLt... " >&6; } +if test ${ac_cv_lib_cublasLt_cublasLtCreate+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-lcublasLt $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +char cublasLtCreate (); +int +main (void) +{ +return cublasLtCreate (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_cublasLt_cublasLtCreate=yes +else $as_nop + ac_cv_lib_cublasLt_cublasLtCreate=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_cublasLt_cublasLtCreate" >&5 +printf "%s\n" "$ac_cv_lib_cublasLt_cublasLtCreate" >&6; } +if test "x$ac_cv_lib_cublasLt_cublasLtCreate" = xyes +then : + +printf "%s\n" "#define STARPU_HAVE_LIBCUBLASLT 1" >>confdefs.h + + STARPU_CUDA_LDFLAGS="$STARPU_CUDA_LDFLAGS -lcublasLt" +fi + + +fi + +done + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for cusparseCreate in -lcusparse" >&5 +printf %s "checking for cusparseCreate in -lcusparse... " >&6; } +if test ${ac_cv_lib_cusparse_cusparseCreate+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-lcusparse $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +char cusparseCreate (); +int +main (void) +{ +return cusparseCreate (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_cusparse_cusparseCreate=yes +else $as_nop + ac_cv_lib_cusparse_cusparseCreate=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_cusparse_cusparseCreate" >&5 +printf "%s\n" "$ac_cv_lib_cusparse_cusparseCreate" >&6; } +if test "x$ac_cv_lib_cusparse_cusparseCreate" = xyes +then : + +printf "%s\n" "#define STARPU_HAVE_LIBCUSPARSE 1" >>confdefs.h + + STARPU_CUDA_LDFLAGS="$STARPU_CUDA_LDFLAGS -lcusparse" +fi + + ac_fn_check_decl "$LINENO" "cusparseSetStream" "ac_cv_have_decl_cusparseSetStream" "#include +" "$ac_c_undeclared_builtin_options" "CFLAGS" +if test "x$ac_cv_have_decl_cusparseSetStream" = xyes +then : + ac_have_decl=1 +else $as_nop + ac_have_decl=0 +fi +printf "%s\n" "#define HAVE_DECL_CUSPARSESETSTREAM $ac_have_decl" >>confdefs.h + + + # we also check that CuSolver is available + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for cusolverDnCreate in -lcusolver" >&5 +printf %s "checking for cusolverDnCreate in -lcusolver... " >&6; } +if test ${ac_cv_lib_cusolver_cusolverDnCreate+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-lcusolver $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +char cusolverDnCreate (); +int +main (void) +{ +return cusolverDnCreate (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_cusolver_cusolverDnCreate=yes +else $as_nop + ac_cv_lib_cusolver_cusolverDnCreate=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_cusolver_cusolverDnCreate" >&5 +printf "%s\n" "$ac_cv_lib_cusolver_cusolverDnCreate" >&6; } +if test "x$ac_cv_lib_cusolver_cusolverDnCreate" = xyes +then : + +printf "%s\n" "#define STARPU_HAVE_LIBCUSOLVER 1" >>confdefs.h + + STARPU_CUDA_LDFLAGS="$STARPU_CUDA_LDFLAGS -lcusolver" +fi + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether nvidia-ml can be used" >&5 +printf %s "checking whether nvidia-ml can be used... " >&6; } + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +int +main (void) +{ + + __typeof__(nvmlInit) *mynvmlInit = nvmlInit; + mynvmlInit(); + + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + + +printf "%s\n" "#define STARPU_HAVE_NVML_H 1" >>confdefs.h + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + ac_fn_check_decl "$LINENO" "nvmlDeviceGetTotalEnergyConsumption" "ac_cv_have_decl_nvmlDeviceGetTotalEnergyConsumption" "#include +" "$ac_c_undeclared_builtin_options" "CFLAGS" +if test "x$ac_cv_have_decl_nvmlDeviceGetTotalEnergyConsumption" = xyes +then : + ac_have_decl=1 +else $as_nop + ac_have_decl=0 +fi +printf "%s\n" "#define HAVE_DECL_NVMLDEVICEGETTOTALENERGYCONSUMPTION $ac_have_decl" >>confdefs.h + + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: nvml.h could not be compiled. This will prevent from correct understanding of the machine topology." >&5 +printf "%s\n" "$as_me: WARNING: nvml.h could not be compiled. This will prevent from correct understanding of the machine topology." >&2;} + NO_NVML="Warning: no nvml.h found" + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + + CPPFLAGS="${SAVED_CPPFLAGS}" + LDFLAGS="${SAVED_LDFLAGS}" + LIBS="${SAVED_LIBS}" +fi + +CC_OR_NVCC=$cc_or_nvcc + + +have_magma=no +if test x$enable_cuda = xyes; then + +pkg_failed=no +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for MAGMA" >&5 +printf %s "checking for MAGMA... " >&6; } + +if test -n "$PKG_CONFIG"; then + if test -n "$MAGMA_CFLAGS"; then + pkg_cv_MAGMA_CFLAGS="$MAGMA_CFLAGS" + else + if test -n "$PKG_CONFIG" && \ + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"magma\""; } >&5 + ($PKG_CONFIG --exists --print-errors "magma") 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + pkg_cv_MAGMA_CFLAGS=`$PKG_CONFIG --cflags "magma" 2>/dev/null` +else + pkg_failed=yes +fi + fi +else + pkg_failed=untried +fi +if test -n "$PKG_CONFIG"; then + if test -n "$MAGMA_LIBS"; then + pkg_cv_MAGMA_LIBS="$MAGMA_LIBS" + else + if test -n "$PKG_CONFIG" && \ + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"magma\""; } >&5 + ($PKG_CONFIG --exists --print-errors "magma") 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + pkg_cv_MAGMA_LIBS=`$PKG_CONFIG --libs "magma" 2>/dev/null` +else + pkg_failed=yes +fi + fi +else + pkg_failed=untried +fi + + + +if test $pkg_failed = yes; then + +if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then + _pkg_short_errors_supported=yes +else + _pkg_short_errors_supported=no +fi + if test $_pkg_short_errors_supported = yes; then + MAGMA_PKG_ERRORS=`$PKG_CONFIG --short-errors --errors-to-stdout --print-errors "magma"` + else + MAGMA_PKG_ERRORS=`$PKG_CONFIG --errors-to-stdout --print-errors "magma"` + fi + # Put the nasty error message in config.log where it belongs + echo "$MAGMA_PKG_ERRORS" >&5 + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + : +elif test $pkg_failed = untried; then + : +else + MAGMA_CFLAGS=$pkg_cv_MAGMA_CFLAGS + MAGMA_LIBS=$pkg_cv_MAGMA_LIBS + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + + +printf "%s\n" "#define STARPU_HAVE_MAGMA 1" >>confdefs.h + + STARPU_HAVE_MAGMA=1 + + have_magma=yes + +fi +fi + if test x$have_magma = xyes; then + STARPU_HAVE_MAGMA_TRUE= + STARPU_HAVE_MAGMA_FALSE='#' +else + STARPU_HAVE_MAGMA_TRUE='#' + STARPU_HAVE_MAGMA_FALSE= +fi + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether MAGMA should be used" >&5 +printf %s "checking whether MAGMA should be used... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $have_magma" >&5 +printf "%s\n" "$have_magma" >&6; } + +# cufftDoubleComplex may not be available on an old CUDA setup +ac_fn_c_check_type "$LINENO" "cufftDoubleComplex" "ac_cv_type_cufftDoubleComplex" "#include +" +if test "x$ac_cv_type_cufftDoubleComplex" = xyes +then : + have_cufftdoublecomplex=yes +else $as_nop + have_cufftdoublecomplex=no +fi + + if test x$have_cufftdoublecomplex = xyes; then + STARPU_HAVE_CUFFTDOUBLECOMPLEX_TRUE= + STARPU_HAVE_CUFFTDOUBLECOMPLEX_FALSE='#' +else + STARPU_HAVE_CUFFTDOUBLECOMPLEX_TRUE='#' + STARPU_HAVE_CUFFTDOUBLECOMPLEX_FALSE= +fi + +if test x$have_cufftdoublecomplex = xyes; then + +printf "%s\n" "#define STARPU_HAVE_CUFFTDOUBLECOMPLEX 1" >>confdefs.h + +fi + +# The CURAND library is only available since CUDA 3.2 +have_curand=$enable_cuda +if test x$enable_cuda = xyes; then + SAVED_LDFLAGS="${LDFLAGS}" + LDFLAGS="${LDFLAGS} ${STARPU_CUDA_LDFLAGS}" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for main in -lcurand" >&5 +printf %s "checking for main in -lcurand... " >&6; } +if test ${ac_cv_lib_curand_main+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-lcurand $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + +int +main (void) +{ +return main (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_curand_main=yes +else $as_nop + ac_cv_lib_curand_main=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_curand_main" >&5 +printf "%s\n" "$ac_cv_lib_curand_main" >&6; } +if test "x$ac_cv_lib_curand_main" = xyes +then : + have_curand=yes +else $as_nop + have_curand=no +fi +ac_cv_lib_curand=ac_cv_lib_curand_main + + LDFLAGS="${SAVED_LDFLAGS}" +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether CURAND is available" >&5 +printf %s "checking whether CURAND is available... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $have_curand" >&5 +printf "%s\n" "$have_curand" >&6; } +if test x$have_curand = xyes; then + +printf "%s\n" "#define STARPU_HAVE_CURAND 1" >>confdefs.h + + STARPU_CURAND_LDFLAGS="$STARPU_CURAND_LDFLAGS -lcurand" + +fi + +# Peer transfers are only supported since CUDA 4.0 +# Disable them if user explicitly wants to disable them +# Check whether --enable-cuda_memcpy_peer was given. +if test ${enable_cuda_memcpy_peer+y} +then : + enableval=$enable_cuda_memcpy_peer; +else $as_nop + enable_cuda_memcpy_peer=$enable_cuda +fi + +if test x$enable_cuda_memcpy_peer = xyes; then + +printf "%s\n" "#define STARPU_HAVE_CUDA_MEMCPY_PEER 1" >>confdefs.h + +fi + +# Check whether --enable-cuda_map was given. +if test ${enable_cuda_map+y} +then : + enableval=$enable_cuda_map; +else $as_nop + enable_cuda_map=yes +fi + +if test x$enable_cuda_map = xyes -a x$enable_cuda = xyes ; then + SAVED_LDFLAGS="${LDFLAGS}" + LDFLAGS="${LDFLAGS} ${STARPU_CUDA_LDFLAGS}" + ac_fn_c_check_member "$LINENO" "struct cudaDeviceProp" "canMapHostMemory" "ac_cv_member_struct_cudaDeviceProp_canMapHostMemory" "#include +" +if test "x$ac_cv_member_struct_cudaDeviceProp_canMapHostMemory" = xyes +then : + +printf "%s\n" "#define STARPU_HAVE_CUDA_CANMAPHOST 1" >>confdefs.h + +fi + + ac_fn_c_check_member "$LINENO" "struct cudaDeviceProp" "unifiedAddressing" "ac_cv_member_struct_cudaDeviceProp_unifiedAddressing" "#include +" +if test "x$ac_cv_member_struct_cudaDeviceProp_unifiedAddressing" = xyes +then : + +printf "%s\n" "#define STARPU_HAVE_CUDA_UNIFIEDADDR 1" >>confdefs.h + +fi + + ac_fn_c_check_member "$LINENO" "struct cudaDeviceProp" "managedMemory" "ac_cv_member_struct_cudaDeviceProp_managedMemory" "#include +" +if test "x$ac_cv_member_struct_cudaDeviceProp_managedMemory" = xyes +then : + +printf "%s\n" "#define STARPU_HAVE_CUDA_MNGMEM 1" >>confdefs.h + +fi + + ac_fn_c_check_member "$LINENO" "struct cudaDeviceProp" "pageableMemoryAccess" "ac_cv_member_struct_cudaDeviceProp_pageableMemoryAccess" "#include +" +if test "x$ac_cv_member_struct_cudaDeviceProp_pageableMemoryAccess" = xyes +then : + +printf "%s\n" "#define STARPU_HAVE_CUDA_PAGEABLEMEM 1" >>confdefs.h + +fi + + ac_fn_c_check_member "$LINENO" "struct cudaPointerAttributes" "type" "ac_cv_member_struct_cudaPointerAttributes_type" "#include +" +if test "x$ac_cv_member_struct_cudaPointerAttributes_type" = xyes +then : + +printf "%s\n" "#define STARPU_HAVE_CUDA_POINTER_TYPE 1" >>confdefs.h + +fi + + LDFLAGS="${SAVED_LDFLAGS}" + +printf "%s\n" "#define STARPU_USE_CUDA_MAP 1" >>confdefs.h + +fi + +if test x$enable_cuda = xyes; then + # Check whether --enable-cuda0 was given. +if test ${enable_cuda0+y} +then : + enableval=$enable_cuda0; +else $as_nop + enable_cuda0=no +fi + + if test x$enable_cuda0 = xyes; then + +printf "%s\n" "#define STARPU_USE_CUDA0 1" >>confdefs.h + + fi + # Check whether --enable-cuda1 was given. +if test ${enable_cuda1+y} +then : + enableval=$enable_cuda1; +else $as_nop + enable_cuda1=no +fi + + if test x$enable_cuda1 = xyes; then + +printf "%s\n" "#define STARPU_USE_CUDA1 1" >>confdefs.h + + fi + + if test x$starpu_windows != xyes ; then + STARPU_CUDA_LDFLAGS="$STARPU_CUDA_LDFLAGS -lstdc++" + fi + + + +fi + if test x$enable_cuda0 = xyes; then + STARPU_USE_CUDA0_TRUE= + STARPU_USE_CUDA0_FALSE='#' +else + STARPU_USE_CUDA0_TRUE='#' + STARPU_USE_CUDA0_FALSE= +fi + + if test x$enable_cuda1 = xyes; then + STARPU_USE_CUDA1_TRUE= + STARPU_USE_CUDA1_FALSE='#' +else + STARPU_USE_CUDA1_TRUE='#' + STARPU_USE_CUDA1_FALSE= +fi + + + + + + +############################################################################### +# # +# HIP settings # +# # +############################################################################### +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking maximum number of HIP devices" >&5 +printf %s "checking maximum number of HIP devices... " >&6; } +# Check whether --enable-maxhipdev was given. +if test ${enable_maxhipdev+y} +then : + enableval=$enable_maxhipdev; nmaxhipdev=$enableval +else $as_nop + nmaxhipdev=8 +fi + +if test x$nmaxhipdev = x -o x$nmaxhipdev = xyes +then + as_fn_error $? "The --enable-maxhipdev option needs to be given a number" "$LINENO" 5 +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $nmaxhipdev" >&5 +printf "%s\n" "$nmaxhipdev" >&6; } + +printf "%s\n" "#define STARPU_MAXHIPDEVS $nmaxhipdev" >>confdefs.h + +# Check whether --enable-hip was given. +if test ${enable_hip+y} +then : + enableval=$enable_hip; +else $as_nop + enable_hip=maybe +fi + + +if test x$enable_cuda = xyes; then + # hip_runtime.h conflicts with cuda_runtime.h + # see https://github.com/ROCm-Developer-Tools/HIP/issues/2703 + if test x$enable_hip = xyes ; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: Disabling HIP as CUDA is enabled, see https://github.com/ROCm-Developer-Tools/HIP/issues/2703" >&5 +printf "%s\n" "$as_me: WARNING: Disabling HIP as CUDA is enabled, see https://github.com/ROCm-Developer-Tools/HIP/issues/2703" >&2;} + fi + enable_hip=no +fi + +if test x$enable_simgrid = xyes; then + if test x$enable_hip = xyes; then + as_fn_error $? "HIP not supported with simgrid" "$LINENO" 5 + fi + enable_hip=no +fi + +have_valid_hip=no +if test x$enable_hip != xno; then + + # Extract the first word of "hipconfig", so it can be a program name with args. +set dummy hipconfig; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_path_HIPCONFIG+y} +then : + printf %s "(cached) " >&6 +else $as_nop + case $HIPCONFIG in + [\\/]* | ?:[\\/]*) + ac_cv_path_HIPCONFIG="$HIPCONFIG" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_path_HIPCONFIG="$as_dir$ac_word$ac_exec_ext" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + test -z "$ac_cv_path_HIPCONFIG" && ac_cv_path_HIPCONFIG="not-found" + ;; +esac +fi +HIPCONFIG=$ac_cv_path_HIPCONFIG +if test -n "$HIPCONFIG"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $HIPCONFIG" >&5 +printf "%s\n" "$HIPCONFIG" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + + if test "x$HIPCONFIG" = "xnot-found"; then + if test x$enable_hip = xyes; then + as_fn_error $? "'hipconfig' not found for HIP support" "$LINENO" 5 + fi + have_valid_hip=no + else + HIP_PLATFORM="$(hipconfig --platform)" + HIP_DIR="$(hipconfig --path)" + HIP_LIB_DIR="$HIP_DIR/lib" + HIP_INCLUDE_DIR="$HIP_DIR/include" + STARPU_HIP_CPPFLAGS="$(hipconfig --cpp_config | tr -d '\n') -L$HIP_LIB_DIR" + if test "$HIP_PLATFORM" = "nvidia"; then + STARPU_HIP_CPPFLAGS="$STARPU_HIP_CPPFLAGS -DSTARPU_HIP_PLATFORM_NVIDIA" + fi + if test "$HIP_PLATFORM" = "amd"; then + STARPU_HIP_CPPFLAGS="$STARPU_HIP_CPPFLAGS -DSTARPU_HIP_PLATFORM_AMD" + fi + HIP_CLANG_PATH="$(hipconfig --hipclangpath)" + have_valid_hip=yes + + +# Check whether --with-hipblas was given. +if test ${with_hipblas+y} +then : + withval=$with_hipblas; custom_hipblas_dir="$withval" +fi + + + if test x$custom_hipblas_dir != x; then + HIPBLAS_INCLUDE_DIR="$custom_hipblas_dir/include" + HIPBLAS_LIB_DIR="$custom_hipblas_dir/lib" + STARPU_HIPBLAS_DIRS="-I$HIPBLAS_INCLUDE_DIR -L$HIPBLAS_LIB_DIR" + fi + + HIPCCFLAGS="$HIPCCFLAGS $STARPU_HIP_CPPFLAGS" + fi +fi + +if test "$HIP_PLATFORM" = "amd"; then + SAVED_LIBS=${LIBS} + SAVED_LDFLAGS="${LDFLAGS}" + LDFLAGS="${LDFLAGS} -L$HIP_LIB_DIR" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for library containing hipMemGetInfo" >&5 +printf %s "checking for library containing hipMemGetInfo... " >&6; } +if test ${ac_cv_search_hipMemGetInfo+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_func_search_save_LIBS=$LIBS +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +char hipMemGetInfo (); +int +main (void) +{ +return hipMemGetInfo (); + ; + return 0; +} +_ACEOF +for ac_lib in '' amdhip64 +do + if test -z "$ac_lib"; then + ac_res="none required" + else + ac_res=-l$ac_lib + LIBS="-l$ac_lib $ac_func_search_save_LIBS" + fi + if ac_fn_c_try_link "$LINENO" +then : + ac_cv_search_hipMemGetInfo=$ac_res +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext + if test ${ac_cv_search_hipMemGetInfo+y} +then : + break +fi +done +if test ${ac_cv_search_hipMemGetInfo+y} +then : + +else $as_nop + ac_cv_search_hipMemGetInfo=no +fi +rm conftest.$ac_ext +LIBS=$ac_func_search_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_hipMemGetInfo" >&5 +printf "%s\n" "$ac_cv_search_hipMemGetInfo" >&6; } +ac_res=$ac_cv_search_hipMemGetInfo +if test "$ac_res" != no +then : + test "$ac_res" = "none required" || LIBS="$ac_res $LIBS" + +else $as_nop + have_valid_hip=no +fi + + LDFLAGS="${SAVED_LDFLAGS}" + LIBS=${SAVED_LIBS} +fi + +if test x$have_valid_hip = xyes; then + SAVED_CPPFLAGS="$CPPFLAGS" + CPPFLAGS="${CPPFLAGS} $STARPU_HIPBLAS_DIRS $STARPU_HIP_CPPFLAGS " + for ac_header in hip/hip_runtime.h hip/hip_runtime_api.h +do : + as_ac_Header=`printf "%s\n" "ac_cv_header_$ac_header" | $as_tr_sh` +ac_fn_c_check_header_compile "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default" +if eval test \"x\$"$as_ac_Header"\" = x"yes" +then : + cat >>confdefs.h <<_ACEOF +#define `printf "%s\n" "HAVE_$ac_header" | $as_tr_cpp` 1 +_ACEOF + have_valid_hip=yes +else $as_nop + have_valid_hip=no +fi + +done + if test x$custom_hipblas_dir != x; then + as_ac_Header=`printf "%s\n" "ac_cv_header_$custom_hipblas_dir/include/hipblas.h" | $as_tr_sh` +ac_fn_c_check_header_compile "$LINENO" "$custom_hipblas_dir/include/hipblas.h" "$as_ac_Header" "$ac_includes_default" +if eval test \"x\$"$as_ac_Header"\" = x"yes" +then : + have_valid_hipblas=yes +else $as_nop + have_valid_hipblas=no +fi + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: 'hipblas' custom" >&5 +printf "%s\n" "$as_me: WARNING: 'hipblas' custom" >&2;} + else + ac_fn_c_check_header_compile "$LINENO" "hipblas/hipblas.h" "ac_cv_header_hipblas_hipblas_h" "$ac_includes_default" +if test "x$ac_cv_header_hipblas_hipblas_h" = xyes +then : + have_valid_hipblas=yes +else $as_nop + have_valid_hipblas=no +fi + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: 'hipblas' default" >&5 +printf "%s\n" "$as_me: WARNING: 'hipblas' default" >&2;} + fi + if test x$have_valid_hipblas = xyes; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for main in -lhipblas" >&5 +printf %s "checking for main in -lhipblas... " >&6; } +if test ${ac_cv_lib_hipblas_main+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-lhipblas $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + +int +main (void) +{ +return main (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_hipblas_main=yes +else $as_nop + ac_cv_lib_hipblas_main=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_hipblas_main" >&5 +printf "%s\n" "$ac_cv_lib_hipblas_main" >&6; } +if test "x$ac_cv_lib_hipblas_main" = xyes +then : + have_valid_hipblas=yes +else $as_nop + have_valid_hipblas=no +fi +ac_cv_lib_hipblas=ac_cv_lib_hipblas_main + + fi + if test "$HIP_PLATFORM" = "amd"; then + if test x$have_valid_hipblas = xyes; then + for ac_header in rocblas/rocblas.h +do : + ac_fn_c_check_header_compile "$LINENO" "rocblas/rocblas.h" "ac_cv_header_rocblas_rocblas_h" "$ac_includes_default" +if test "x$ac_cv_header_rocblas_rocblas_h" = xyes +then : + printf "%s\n" "#define HAVE_ROCBLAS_ROCBLAS_H 1" >>confdefs.h + have_valid_hipblas=yes +else $as_nop + have_valid_hipblas=no +fi + +done + fi + if test x$have_valid_hipblas = xyes; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for main in -lrocblas" >&5 +printf %s "checking for main in -lrocblas... " >&6; } +if test ${ac_cv_lib_rocblas_main+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-lrocblas $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + +int +main (void) +{ +return main (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_rocblas_main=yes +else $as_nop + ac_cv_lib_rocblas_main=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_rocblas_main" >&5 +printf "%s\n" "$ac_cv_lib_rocblas_main" >&6; } +if test "x$ac_cv_lib_rocblas_main" = xyes +then : + have_valid_hipblas=yes +else $as_nop + have_valid_hipblas=no +fi +ac_cv_lib_rocblas=ac_cv_lib_rocblas_main + + fi + fi + if test x$have_valid_hipblas = xyes; then + +printf "%s\n" "#define STARPU_USE_HIPBLAS 1" >>confdefs.h + + if test x$custom_hipblas_dir != x; then + HIPCCFLAGS="$HIPCCFLAGS -I$HIPBLAS_INCLUDE_DIR" + STARPU_HIPBLAS_LDFLAGS="-L$HIPBLAS_LIB_DIR" + fi + STARPU_HIPBLAS_LDFLAGS="$STARPU_HIPBLAS_LDFLAGS -lhipblas" + if test "$HIP_PLATFORM" = "amd"; then + STARPU_HIPBLAS_LDFLAGS="$STARPU_HIPBLAS_LDFLAGS -lrocblas" + fi + else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: 'hipblas' not found, disabling HIP examples" >&5 +printf "%s\n" "$as_me: WARNING: 'hipblas' not found, disabling HIP examples" >&2;} + fi + CPPFLAGS="${SAVED_CPPFLAGS}" +fi +STARPU_USE_HIPBLAS=$have_valid_hipblas + + if test x$have_valid_hipblas = xyes; then + STARPU_USE_HIPBLAS_TRUE= + STARPU_USE_HIPBLAS_FALSE='#' +else + STARPU_USE_HIPBLAS_TRUE='#' + STARPU_USE_HIPBLAS_FALSE= +fi + + +if test x$have_valid_hip = xyes; then + if test -z "$HIP_DIR"; then + have_valid_hip=no + fi + if test -z "$HIP_LIB_DIR"; then + have_valid_hip=no + fi + if test -z "$HIP_INCLUDE_DIR"; then + have_valid_hip=no + fi + + if test "$HIPCC" = ""; then + # Extract the first word of "hipcc", so it can be a program name with args. +set dummy hipcc; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_path_HIPCC+y} +then : + printf %s "(cached) " >&6 +else $as_nop + case $HIPCC in + [\\/]* | ?:[\\/]*) + ac_cv_path_HIPCC="$HIPCC" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +as_dummy="$HIP_CLANG_PATH:$PATH:/usr/bin:/bin" +for as_dir in $as_dummy +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_path_HIPCC="$as_dir$ac_word$ac_exec_ext" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + test -z "$ac_cv_path_HIPCC" && ac_cv_path_HIPCC="not-found" + ;; +esac +fi +HIPCC=$ac_cv_path_HIPCC +if test -n "$HIPCC"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $HIPCC" >&5 +printf "%s\n" "$HIPCC" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + + fi + + #testing if hipcc is defined, if not => STARPU_USE_HIP undefined + if test "x$HIPCC" = "xnot-found"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: 'hipcc' not found, disabling HIP" >&5 +printf "%s\n" "$as_me: WARNING: 'hipcc' not found, disabling HIP" >&2;} + have_valid_hip=no + fi + + if test "$HIP_PLATFORM" = "nvidia"; then + HIPCCFLAGS="$HIPCCFLAGS --x cu" + fi + + if test "x$have_valid_hip" = xyes; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether $HIPCC is working" >&5 +printf %s "checking whether $HIPCC is working... " >&6; } + rm -f conftest.hip conftest.o + touch conftest.hip + if $HIPCC $HIPCCFLAGS conftest.hip -o conftest.o -c $STARPU_HIP_CPPFLAGS +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: 'hipcc' does not work, disabling HIP" >&5 +printf "%s\n" "$as_me: WARNING: 'hipcc' does not work, disabling HIP" >&2;} + have_valid_hip=no + +fi + fi +fi + +# in case HIP was explicitly required, but is not available, this is an error +if test x$enable_hip = xyes -a x$have_valid_hip = xno; then + as_fn_error $? "cannot find HIP" "$LINENO" 5 +fi +# now we enable HIP if and only if a proper setup is available +enable_hip=$have_valid_hip + +if test "x$enable_hip" = xyes; then + +printf "%s\n" "#define STARPU_USE_HIP 1" >>confdefs.h + + if test "$HIP_PLATFORM" = "nvidia"; then + STARPU_HIP_LDFLAGS="-lcuda -lcudart -lcublas $STARPU_HIPBLAS_LDFLAGS -lstdc++" + fi + if test "$HIP_PLATFORM" = "amd"; then + STARPU_HIP_LDFLAGS="-L$HIP_LIB_DIR -lamdhip64 $STARPU_HIPBLAS_LDFLAGS -lstdc++" + fi + + # Check whether --enable-hip_memcpy_peer was given. +if test ${enable_hip_memcpy_peer+y} +then : + enableval=$enable_hip_memcpy_peer; +else $as_nop + enable_hip_memcpy_peer=$enable_hip +fi + + if test x$enable_hip_memcpy_peer = xyes; then + +printf "%s\n" "#define STARPU_HAVE_HIP_MEMCPY_PEER 1" >>confdefs.h + + fi +else + STARPU_HIP_LDFLAGS= + STARPU_HIP_CPPFLAGS= + enable_hip_memcpy_peer=no +fi + + + + if test x$enable_hip = xyes; then + STARPU_USE_HIP_TRUE= + STARPU_USE_HIP_FALSE='#' +else + STARPU_USE_HIP_TRUE='#' + STARPU_USE_HIP_FALSE= +fi + + +#AC_ARG_VAR([HIPCC_CC], [C compiler for HIP compiler]) + + + + +############################################################################### +# # +# OpenCL settings # +# # +############################################################################### + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking maximum number of OpenCL devices" >&5 +printf %s "checking maximum number of OpenCL devices... " >&6; } +# Check whether --enable-maxopencldev was given. +if test ${enable_maxopencldev+y} +then : + enableval=$enable_maxopencldev; nmaxopencldev=$enableval +else $as_nop + nmaxopencldev=8 +fi + +if test x$nmaxopencldev = x -o x$nmaxopencldev = xyes +then + as_fn_error $? "The --enable-maxopencldev option needs to be given a number" "$LINENO" 5 +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $nmaxopencldev" >&5 +printf "%s\n" "$nmaxopencldev" >&6; } + +printf "%s\n" "#define STARPU_MAXOPENCLDEVS $nmaxopencldev" >>confdefs.h + +# Check whether --enable-opencl was given. +if test ${enable_opencl+y} +then : + enableval=$enable_opencl; +else $as_nop + enable_opencl=maybe +fi + + +have_valid_opencl=no + + +#AC_MSG_CHECKING(whether OpenCL is available) + +# Check whether --with-opencl-dir was given. +if test ${with_opencl_dir+y} +then : + withval=$with_opencl_dir; + opencl_dir="$withval" + # in case this was not explicit yet + enable_opencl=yes + +else $as_nop + opencl_dir=no +fi + + + +# Check whether --with-opencl-include-dir was given. +if test ${with_opencl_include_dir+y} +then : + withval=$with_opencl_include_dir; + opencl_include_dir="$withval" + # in case this was not explicit yet + enable_opencl=yes + +else $as_nop + opencl_include_dir=no +fi + + + +# Check whether --with-opencl-lib-dir was given. +if test ${with_opencl_lib_dir+y} +then : + withval=$with_opencl_lib_dir; + opencl_lib_dir="$withval" + # in case this was not explicit yet + enable_opencl=yes + +else $as_nop + opencl_lib_dir=no +fi + + + + +if test x$enable_opencl = xyes -o x$enable_opencl = xmaybe; then + case $target in + *-*-darwin*) + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether OpenCL is available" >&5 +printf %s "checking whether OpenCL is available... " >&6; } + + SAVED_LIBS=$LIBS + LIBS="$LIBS -framework OpenCL" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + #ifdef __APPLE_CC__ + #include + #else + #include + #endif + +int +main (void) +{ +return clSetKernelArg(0, 0, 0, 0); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + enable_opencl=yes + have_valid_opencl=yes + STARPU_OPENCL_CPPFLAGS= + STARPU_OPENCL_LDFLAGS="-framework OpenCL" +else $as_nop + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + enable_opencl=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + LIBS=$SAVED_LIBS + ;; + *) + + if test "x$has_opencl_being_checked" != "xyes" ; then + + __opencl_dir="$opencl_dir" + __opencl_include_dir="$opencl_include_dir" + __opencl_lib_dir="$opencl_lib_dir" + + if test "$__opencl_dir" != "no" ; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether OpenCL is available in $__opencl_dir $__opencl_include_dir and $__opencl_lib_dir" >&5 +printf %s "checking whether OpenCL is available in $__opencl_dir $__opencl_include_dir and $__opencl_lib_dir... " >&6; } + else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether OpenCL is available" >&5 +printf %s "checking whether OpenCL is available... " >&6; } + fi + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: " >&5 +printf "%s\n" "" >&6; } + + if test "$__opencl_include_dir" = "no" -a "$__opencl_dir" != "no" ; then + __opencl_include_dir="$__opencl_dir/include" + fi + + SAVED_CPPFLAGS="$CPPFLAGS" + SAVED_LDFLAGS="${LDFLAGS}" + + if test "$__opencl_include_dir" != "no" ; then + CPPFLAGS="${CPPFLAGS} -I$__opencl_include_dir" + fi + ac_fn_c_check_header_compile "$LINENO" "CL/cl.h" "ac_cv_header_CL_cl_h" "$ac_includes_default" +if test "x$ac_cv_header_CL_cl_h" = xyes +then : + have_valid_opencl=yes +else $as_nop + have_valid_opencl=no +fi + + unset ac_cv_header_CL_cl_h + + if test "$have_valid_opencl" = "yes" ; then + if test "$__opencl_lib_dir" != "no"; then + LDFLAGS="${SAVED_LDFLAGS} -L$__opencl_lib_dir" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for main in -lOpenCL" >&5 +printf %s "checking for main in -lOpenCL... " >&6; } +if test ${ac_cv_lib_OpenCL_main+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-lOpenCL $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + +int +main (void) +{ +return main (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_OpenCL_main=yes +else $as_nop + ac_cv_lib_OpenCL_main=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_OpenCL_main" >&5 +printf "%s\n" "$ac_cv_lib_OpenCL_main" >&6; } +if test "x$ac_cv_lib_OpenCL_main" = xyes +then : + have_valid_opencl=yes +else $as_nop + have_valid_opencl=no +fi +ac_cv_lib_OpenCL=ac_cv_lib_OpenCL_main + + unset ac_cv_lib_OpenCL_main + else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether OpenCL is available in $__opencl_dir" >&5 +printf %s "checking whether OpenCL is available in $__opencl_dir... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: " >&5 +printf "%s\n" "" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for main in -lOpenCL" >&5 +printf %s "checking for main in -lOpenCL... " >&6; } +if test ${ac_cv_lib_OpenCL_main+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-lOpenCL $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + +int +main (void) +{ +return main (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_OpenCL_main=yes +else $as_nop + ac_cv_lib_OpenCL_main=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_OpenCL_main" >&5 +printf "%s\n" "$ac_cv_lib_OpenCL_main" >&6; } +if test "x$ac_cv_lib_OpenCL_main" = xyes +then : + have_valid_opencl=yes +else $as_nop + have_valid_opencl=no +fi +ac_cv_lib_OpenCL=ac_cv_lib_OpenCL_main + + unset ac_cv_lib_OpenCL_main + if test "$have_valid_opencl" = "no" -a "$__opencl_dir" != "no" ; then + for __cuda_libdir in lib64 lib lib/x86 lib/Win32 ; do + __opencl_lib_dir="$__opencl_dir/$__cuda_libdir" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether OpenCL is available in $__opencl_dir and $__opencl_lib_dir" >&5 +printf %s "checking whether OpenCL is available in $__opencl_dir and $__opencl_lib_dir... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: " >&5 +printf "%s\n" "" >&6; } + LDFLAGS="${SAVED_LDFLAGS} -L$__opencl_lib_dir" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for main in -lOpenCL" >&5 +printf %s "checking for main in -lOpenCL... " >&6; } +if test ${ac_cv_lib_OpenCL_main+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-lOpenCL $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + +int +main (void) +{ +return main (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_OpenCL_main=yes +else $as_nop + ac_cv_lib_OpenCL_main=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_OpenCL_main" >&5 +printf "%s\n" "$ac_cv_lib_OpenCL_main" >&6; } +if test "x$ac_cv_lib_OpenCL_main" = xyes +then : + have_valid_opencl=yes +else $as_nop + have_valid_opencl=no +fi +ac_cv_lib_OpenCL=ac_cv_lib_OpenCL_main + + unset ac_cv_lib_OpenCL_main + if test "$have_valid_opencl" = yes ; then + break + fi + done + else + LDFLAGS="${SAVED_LDFLAGS}" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for main in -lOpenCL" >&5 +printf %s "checking for main in -lOpenCL... " >&6; } +if test ${ac_cv_lib_OpenCL_main+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-lOpenCL $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + +int +main (void) +{ +return main (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_OpenCL_main=yes +else $as_nop + ac_cv_lib_OpenCL_main=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_OpenCL_main" >&5 +printf "%s\n" "$ac_cv_lib_OpenCL_main" >&6; } +if test "x$ac_cv_lib_OpenCL_main" = xyes +then : + have_valid_opencl=yes +else $as_nop + have_valid_opencl=no +fi +ac_cv_lib_OpenCL=ac_cv_lib_OpenCL_main + + unset ac_cv_lib_OpenCL_main + fi + fi + fi + + if test "$have_valid_opencl" = "yes" -a "$__opencl_include_dir" != "no"; then + STARPU_OPENCL_CPPFLAGS="-I$__opencl_include_dir" + ac_fn_c_check_header_compile "$LINENO" "CL/cl_ext.h" "ac_cv_header_CL_cl_ext_h" "$ac_includes_default" +if test "x$ac_cv_header_CL_cl_ext_h" = xyes +then : + printf "%s\n" "#define HAVE_CL_CL_EXT_H 1" >>confdefs.h + +fi + + fi + + CPPFLAGS="${SAVED_CPPFLAGS}" + LDFLAGS="${SAVED_LDFLAGS}" + + if test "$have_valid_opencl" = "yes" ; then + if test "$__opencl_lib_dir" != "no"; then + STARPU_OPENCL_LDFLAGS="-L$__opencl_lib_dir" + fi + STARPU_OPENCL_LDFLAGS="${STARPU_OPENCL_LDFLAGS} -lOpenCL" + fi + + + if test "$have_valid_opencl" = "no" ; then + for f in "/usr/local/cuda" "/c/cuda" "/cygdrive/c/cuda" "/opt/cuda" "$CUDA_ROOT" "$CUDA_PATH" "$CUDA_INC_PATH/.." "$CUDA_INSTALL_PATH" "$CUDA_TOOLKIT"; do + if test -n "$f" ; then + + __opencl_dir="$f" + __opencl_include_dir="no" + __opencl_lib_dir="no" + + if test "$__opencl_dir" != "no" ; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether OpenCL is available in $__opencl_dir $__opencl_include_dir and $__opencl_lib_dir" >&5 +printf %s "checking whether OpenCL is available in $__opencl_dir $__opencl_include_dir and $__opencl_lib_dir... " >&6; } + else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether OpenCL is available" >&5 +printf %s "checking whether OpenCL is available... " >&6; } + fi + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: " >&5 +printf "%s\n" "" >&6; } + + if test "$__opencl_include_dir" = "no" -a "$__opencl_dir" != "no" ; then + __opencl_include_dir="$__opencl_dir/include" + fi + + SAVED_CPPFLAGS="$CPPFLAGS" + SAVED_LDFLAGS="${LDFLAGS}" + + if test "$__opencl_include_dir" != "no" ; then + CPPFLAGS="${CPPFLAGS} -I$__opencl_include_dir" + fi + ac_fn_c_check_header_compile "$LINENO" "CL/cl.h" "ac_cv_header_CL_cl_h" "$ac_includes_default" +if test "x$ac_cv_header_CL_cl_h" = xyes +then : + have_valid_opencl=yes +else $as_nop + have_valid_opencl=no +fi + + unset ac_cv_header_CL_cl_h + + if test "$have_valid_opencl" = "yes" ; then + if test "$__opencl_lib_dir" != "no"; then + LDFLAGS="${SAVED_LDFLAGS} -L$__opencl_lib_dir" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for main in -lOpenCL" >&5 +printf %s "checking for main in -lOpenCL... " >&6; } +if test ${ac_cv_lib_OpenCL_main+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-lOpenCL $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + +int +main (void) +{ +return main (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_OpenCL_main=yes +else $as_nop + ac_cv_lib_OpenCL_main=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_OpenCL_main" >&5 +printf "%s\n" "$ac_cv_lib_OpenCL_main" >&6; } +if test "x$ac_cv_lib_OpenCL_main" = xyes +then : + have_valid_opencl=yes +else $as_nop + have_valid_opencl=no +fi +ac_cv_lib_OpenCL=ac_cv_lib_OpenCL_main + + unset ac_cv_lib_OpenCL_main + else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether OpenCL is available in $__opencl_dir" >&5 +printf %s "checking whether OpenCL is available in $__opencl_dir... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: " >&5 +printf "%s\n" "" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for main in -lOpenCL" >&5 +printf %s "checking for main in -lOpenCL... " >&6; } +if test ${ac_cv_lib_OpenCL_main+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-lOpenCL $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + +int +main (void) +{ +return main (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_OpenCL_main=yes +else $as_nop + ac_cv_lib_OpenCL_main=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_OpenCL_main" >&5 +printf "%s\n" "$ac_cv_lib_OpenCL_main" >&6; } +if test "x$ac_cv_lib_OpenCL_main" = xyes +then : + have_valid_opencl=yes +else $as_nop + have_valid_opencl=no +fi +ac_cv_lib_OpenCL=ac_cv_lib_OpenCL_main + + unset ac_cv_lib_OpenCL_main + if test "$have_valid_opencl" = "no" -a "$__opencl_dir" != "no" ; then + for __cuda_libdir in lib64 lib lib/x86 lib/Win32 ; do + __opencl_lib_dir="$__opencl_dir/$__cuda_libdir" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether OpenCL is available in $__opencl_dir and $__opencl_lib_dir" >&5 +printf %s "checking whether OpenCL is available in $__opencl_dir and $__opencl_lib_dir... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: " >&5 +printf "%s\n" "" >&6; } + LDFLAGS="${SAVED_LDFLAGS} -L$__opencl_lib_dir" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for main in -lOpenCL" >&5 +printf %s "checking for main in -lOpenCL... " >&6; } +if test ${ac_cv_lib_OpenCL_main+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-lOpenCL $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + +int +main (void) +{ +return main (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_OpenCL_main=yes +else $as_nop + ac_cv_lib_OpenCL_main=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_OpenCL_main" >&5 +printf "%s\n" "$ac_cv_lib_OpenCL_main" >&6; } +if test "x$ac_cv_lib_OpenCL_main" = xyes +then : + have_valid_opencl=yes +else $as_nop + have_valid_opencl=no +fi +ac_cv_lib_OpenCL=ac_cv_lib_OpenCL_main + + unset ac_cv_lib_OpenCL_main + if test "$have_valid_opencl" = yes ; then + break + fi + done + else + LDFLAGS="${SAVED_LDFLAGS}" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for main in -lOpenCL" >&5 +printf %s "checking for main in -lOpenCL... " >&6; } +if test ${ac_cv_lib_OpenCL_main+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-lOpenCL $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + +int +main (void) +{ +return main (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_OpenCL_main=yes +else $as_nop + ac_cv_lib_OpenCL_main=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_OpenCL_main" >&5 +printf "%s\n" "$ac_cv_lib_OpenCL_main" >&6; } +if test "x$ac_cv_lib_OpenCL_main" = xyes +then : + have_valid_opencl=yes +else $as_nop + have_valid_opencl=no +fi +ac_cv_lib_OpenCL=ac_cv_lib_OpenCL_main + + unset ac_cv_lib_OpenCL_main + fi + fi + fi + + if test "$have_valid_opencl" = "yes" -a "$__opencl_include_dir" != "no"; then + STARPU_OPENCL_CPPFLAGS="-I$__opencl_include_dir" + ac_fn_c_check_header_compile "$LINENO" "CL/cl_ext.h" "ac_cv_header_CL_cl_ext_h" "$ac_includes_default" +if test "x$ac_cv_header_CL_cl_ext_h" = xyes +then : + printf "%s\n" "#define HAVE_CL_CL_EXT_H 1" >>confdefs.h + +fi + + fi + + CPPFLAGS="${SAVED_CPPFLAGS}" + LDFLAGS="${SAVED_LDFLAGS}" + + if test "$have_valid_opencl" = "yes" ; then + if test "$__opencl_lib_dir" != "no"; then + STARPU_OPENCL_LDFLAGS="-L$__opencl_lib_dir" + fi + STARPU_OPENCL_LDFLAGS="${STARPU_OPENCL_LDFLAGS} -lOpenCL" + fi + + + if test "$have_valid_opencl" = "yes" ; then + break + fi + fi + done + fi + has_opencl_being_checked=yes + fi + + # in case OpenCL was explicitly required, but is not available, this is an error + if test x$enable_opencl = xyes -a x$have_valid_opencl = xno; then + as_fn_error $? "cannot find OpenCL" "$LINENO" 5 + fi + # now we enable OpenCL if and only if a proper setup is available + enable_opencl=$have_valid_opencl + ;; + esac + save_LIBS="$LIBS" + LIBS="$LIBS $STARPU_OPENCL_LDFLAGS" + ac_fn_c_check_func "$LINENO" "clEnqueueMarkerWithWaitList" "ac_cv_func_clEnqueueMarkerWithWaitList" +if test "x$ac_cv_func_clEnqueueMarkerWithWaitList" = xyes +then : + printf "%s\n" "#define HAVE_CLENQUEUEMARKERWITHWAITLIST 1" >>confdefs.h + +fi + + LIBS="$save_LIBS" +fi + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether OpenCL should be used" >&5 +printf %s "checking whether OpenCL should be used... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_opencl" >&5 +printf "%s\n" "$enable_opencl" >&6; } +STARPU_USE_OPENCL=$enable_opencl + + if test x$enable_opencl = xyes; then + STARPU_USE_OPENCL_TRUE= + STARPU_USE_OPENCL_FALSE='#' +else + STARPU_USE_OPENCL_TRUE='#' + STARPU_USE_OPENCL_FALSE= +fi + +if test x$enable_opencl = xyes ; then + +printf "%s\n" "#define STARPU_USE_OPENCL 1" >>confdefs.h + + STARPU_OPENCL_CPPFLAGS="${STARPU_OPENCL_CPPFLAGS} -DSTARPU_OPENCL_DATADIR=\"\\\"${datarootdir}/starpu/opencl\\\"\" -DCL_USE_DEPRECATED_OPENCL_1_1_APIS" + STARPU_OPENCL_DATAdir="$(eval echo ${datarootdir}/starpu/opencl/examples)" + + + +fi + +############################################################################### +# # +# Maxeler FPGA Settings # +# # +############################################################################### + +#NUMBER OF MAXELER FPGA DEVICES +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking maximum number of Maxeler FPGA devices" >&5 +printf %s "checking maximum number of Maxeler FPGA devices... " >&6; } +# Check whether --enable-maxmaxfpgadev was given. +if test ${enable_maxmaxfpgadev+y} +then : + enableval=$enable_maxmaxfpgadev; nmaxmaxfpgadev=$enableval +else $as_nop + nmaxmaxfpgadev=12 +fi + +if test x$nmaxmaxfpgadev = x -o x$nmaxmaxfpgadev = xyes +then + as_fn_error $? "The --enable-maxmaxfpgadev option needs to be given a number" "$LINENO" 5 +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $nmaxmaxfpgadev" >&5 +printf "%s\n" "$nmaxmaxfpgadev" >&6; } + +printf "%s\n" "#define STARPU_MAXMAXFPGADEVS $nmaxmaxfpgadev" >>confdefs.h + + +# Check whether --enable-max-fpga was given. +if test ${enable_max_fpga+y} +then : + enableval=$enable_max_fpga; enable_max_fpga=$enableval +else $as_nop + enable_max_fpga=maybe + +fi + + +if test x$enable_simgrid = xyes; then + if test x$enable_max_fpga = xyes; then + as_fn_error $? "Max fpga not supported with simgrid" "$LINENO" 5 + fi + enable_max_fpga=no +fi + +if test x$enable_max_fpga != xno; then + # Extract the first word of "slic-config", so it can be a program name with args. +set dummy slic-config; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_path_SLIC_CONFIG+y} +then : + printf %s "(cached) " >&6 +else $as_nop + case $SLIC_CONFIG in + [\\/]* | ?:[\\/]*) + ac_cv_path_SLIC_CONFIG="$SLIC_CONFIG" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_path_SLIC_CONFIG="$as_dir$ac_word$ac_exec_ext" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + test -z "$ac_cv_path_SLIC_CONFIG" && ac_cv_path_SLIC_CONFIG="not-found" + ;; +esac +fi +SLIC_CONFIG=$ac_cv_path_SLIC_CONFIG +if test -n "$SLIC_CONFIG"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $SLIC_CONFIG" >&5 +printf "%s\n" "$SLIC_CONFIG" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + + + if test "x$SLIC_CONFIG" = "xnot-found"; then + # in case FPGA was explicitly required, but is not available, this is an error + if test x$enable_max_fpga = xyes; then + as_fn_error $? "'slic-config' not found for Maxeler FPGA support" "$LINENO" 5 + fi + enable_max_fpga=no + else + STARPU_MAX_FPGA_CPPFLAGS="`slic-config --cflags | sed s/\'//g | sed "s/-I /-I/"`" + STARPU_MAX_FPGA_LDFLAGS="`slic-config --libs | sed s/\'//g | sed "s/-L /-L/" | sed "s/-L /-L/"`" + + enable_max_fpga=yes + fi +fi + +STARPU_USE_MAX_FPGA=$enable_max_fpga + + if test x$enable_max_fpga = xyes; then + STARPU_USE_MAX_FPGA_TRUE= + STARPU_USE_MAX_FPGA_FALSE='#' +else + STARPU_USE_MAX_FPGA_TRUE='#' + STARPU_USE_MAX_FPGA_FALSE= +fi + +if test x$enable_max_fpga = xyes; then + +printf "%s\n" "#define STARPU_USE_MAX_FPGA 1" >>confdefs.h + +fi + + +############################################################################### +# # +# General GPU settings # +# # +############################################################################### +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether asynchronous copy should be disabled" >&5 +printf %s "checking whether asynchronous copy should be disabled... " >&6; } +# Check whether --enable-asynchronous-copy was given. +if test ${enable_asynchronous_copy+y} +then : + enableval=$enable_asynchronous_copy; enable_asynchronous_copy=$enableval +else $as_nop + enable_asynchronous_copy=yes +fi + +disable_asynchronous_copy=no +if test x$enable_asynchronous_copy = xno ; then + disable_asynchronous_copy=yes +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $disable_asynchronous_copy" >&5 +printf "%s\n" "$disable_asynchronous_copy" >&6; } +if test x$disable_asynchronous_copy = xyes ; then + +printf "%s\n" "#define STARPU_DISABLE_ASYNCHRONOUS_COPY 1" >>confdefs.h + +fi + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether asynchronous CUDA copy should be disabled" >&5 +printf %s "checking whether asynchronous CUDA copy should be disabled... " >&6; } +# Check whether --enable-asynchronous-cuda-copy was given. +if test ${enable_asynchronous_cuda_copy+y} +then : + enableval=$enable_asynchronous_cuda_copy; enable_asynchronous_cuda_copy=$enableval +else $as_nop + enable_asynchronous_cuda_copy=yes +fi + +disable_asynchronous_cuda_copy=no +if test x$enable_asynchronous_cuda_copy = xno ; then + disable_asynchronous_cuda_copy=yes +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $disable_asynchronous_cuda_copy" >&5 +printf "%s\n" "$disable_asynchronous_cuda_copy" >&6; } +if test x$disable_asynchronous_cuda_copy = xyes ; then + +printf "%s\n" "#define STARPU_DISABLE_ASYNCHRONOUS_CUDA_COPY 1" >>confdefs.h + +fi + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether asynchronous OpenCL copy should be disabled" >&5 +printf %s "checking whether asynchronous OpenCL copy should be disabled... " >&6; } +# Check whether --enable-asynchronous-opencl-copy was given. +if test ${enable_asynchronous_opencl_copy+y} +then : + enableval=$enable_asynchronous_opencl_copy; enable_asynchronous_opencl_copy=$enableval +else $as_nop + enable_asynchronous_opencl_copy=yes +fi + +disable_asynchronous_opencl_copy=no +if test x$enable_asynchronous_opencl_copy = xno ; then + disable_asynchronous_opencl_copy=yes +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $disable_asynchronous_opencl_copy" >&5 +printf "%s\n" "$disable_asynchronous_opencl_copy" >&6; } +if test x$disable_asynchronous_opencl_copy = xyes ; then + +printf "%s\n" "#define STARPU_DISABLE_ASYNCHRONOUS_OPENCL_COPY 1" >>confdefs.h + +fi + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether asynchronous MPI Master Slave copy should be disabled" >&5 +printf %s "checking whether asynchronous MPI Master Slave copy should be disabled... " >&6; } +# Check whether --enable-asynchronous-mpi-master-slave-copy was given. +if test ${enable_asynchronous_mpi_master_slave_copy+y} +then : + enableval=$enable_asynchronous_mpi_master_slave_copy; enable_asynchronous_mpi_master_slave_copy=$enableval +else $as_nop + enable_asynchronous_mpi_master_slave_copy=yes +fi + +disable_asynchronous_mpi_master_slave_copy=no +if test x$enable_asynchronous_mpi_master_slave_copy = xno ; then + disable_asynchronous_mpi_master_slave_copy=yes +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $disable_asynchronous_mpi_master_slave_copy" >&5 +printf "%s\n" "$disable_asynchronous_mpi_master_slave_copy" >&6; } +if test x$disable_asynchronous_mpi_master_slave_copy = xyes ; then + +printf "%s\n" "#define STARPU_DISABLE_ASYNCHRONOUS_MPI_MS_COPY 1" >>confdefs.h + +fi + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether asynchronous TCP/IP Master Slave copy should be disabled" >&5 +printf %s "checking whether asynchronous TCP/IP Master Slave copy should be disabled... " >&6; } +# Check whether --enable-asynchronous-tcpip-master-slave-copy was given. +if test ${enable_asynchronous_tcpip_master_slave_copy+y} +then : + enableval=$enable_asynchronous_tcpip_master_slave_copy; enable_asynchronous_tcpip_master_slave_copy=$enableval +else $as_nop + enable_asynchronous_tcpip_master_slave_copy=yes +fi + +disable_asynchronous_tcpip_master_slave_copy=no +if test x$enable_asynchronous_tcpip_master_slave_copy = xno ; then + disable_asynchronous_tcpip_master_slave_copy=yes +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $disable_asynchronous_tcpip_master_slave_copy" >&5 +printf "%s\n" "$disable_asynchronous_tcpip_master_slave_copy" >&6; } +if test x$disable_asynchronous_tcpip_master_slave_copy = xyes ; then + +printf "%s\n" "#define STARPU_DISABLE_ASYNCHRONOUS_TCPIP_MS_COPY 1" >>confdefs.h + +fi + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether asynchronous Maxeler FPGA copy should be disabled" >&5 +printf %s "checking whether asynchronous Maxeler FPGA copy should be disabled... " >&6; } +# Check whether --enable-asynchronous-max-fpga-copy was given. +if test ${enable_asynchronous_max_fpga_copy+y} +then : + enableval=$enable_asynchronous_max_fpga_copy; enable_asynchronous_max_fpga_copy=$enableval +else $as_nop + enable_asynchronous_max_fpga_copy=yes +fi + +disable_asynchronous_max_fpga_copy=no +if test x$enable_asynchronous_max_fpga_copy = xno ; then + disable_asynchronous_max_fpga_copy=yes +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $disable_asynchronous_max_fpga_copy" >&5 +printf "%s\n" "$disable_asynchronous_max_fpga_copy" >&6; } +if test x$disable_asynchronous_max_fpga_copy = xyes ; then + +printf "%s\n" "#define STARPU_DISABLE_ASYNCHRONOUS_MAX_FPGA_COPY 1" >>confdefs.h + +fi + +############################################################################### +# # +# Fortran # +# # +############################################################################### + +# Check whether --enable-fortran was given. +if test ${enable_fortran+y} +then : + enableval=$enable_fortran; enable_build_fortran_requested=$enableval +else $as_nop + enable_build_fortran_requested=yes +fi + +use_mpi_fort=no +enable_build_fortran=no +if test "x$enable_build_fortran_requested" = "xyes" ; then + if test "x$FC" != "x"; then + if $FC --version|grep -q 'GNU Fortran'; then + ac_ext=${ac_fc_srcext-f} +ac_compile='$FC -c $FCFLAGS $ac_fcflags_srcext conftest.$ac_ext >&5' +ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_fc_compiler_gnu + + OLD_FCFLAGS="$FCFLAGS" + FCFLAGS="$FCFLAGS -cpp" + cat > conftest.$ac_ext <<_ACEOF + program main + +#if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 9) +#error GFortran too old, version >= 4.9.x needed, Fortran examples will not be built +#endif + + + end +_ACEOF +if ac_fn_fc_try_compile "$LINENO" +then : + enable_build_fortran="yes" +else $as_nop + enable_build_fortran="no" +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + FCFLAGS="$OLD_FCFLAGS" + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + if test "$enable_build_fortran" = "no" ; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: GFortran too old, version >= 4.9.x needed, Fortran examples will not be built" >&5 +printf "%s\n" "$as_me: WARNING: GFortran too old, version >= 4.9.x needed, Fortran examples will not be built" >&2;} + fi + else + if $FC -V 2>&1|grep -q 'Intel(R) Fortran'; then + enable_build_fortran="yes" + ifort_fc_version=`$FC -V 2>&1 |head -1|sed 's/.*Version //;s/ Build.*//'` + ifort_maj_version=`echo $ifort_fc_version|cut -d. -f1` + + if test $ifort_maj_version -lt 16; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: Intel Fortran compiler $ifort_fc_version too old, version >= 2016.x needed, Fortran examples will not be built" >&5 +printf "%s\n" "$as_me: WARNING: Intel Fortran compiler $ifort_fc_version too old, version >= 2016.x needed, Fortran examples will not be built" >&2;} + enable_build_fortran="no" + fi + else + if $FC -qversion 2>&1|grep -q 'IBM XL Fortran'; then + xlf_fc_version=`$FC -V 2>&1 |tail -1|sed 's/.*Version: //'` + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: IBM Fortran compiler $xlf_fc_version not validated with the native StarPU Fortran API, Fortran examples will not be built" >&5 +printf "%s\n" "$as_me: WARNING: IBM Fortran compiler $xlf_fc_version not validated with the native StarPU Fortran API, Fortran examples will not be built" >&2;} + enable_build_fortran="no" + else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: Fortran compiler has not been tested for StarPU native Fortran support" >&5 +printf "%s\n" "$as_me: WARNING: Fortran compiler has not been tested for StarPU native Fortran support" >&2;} + enable_build_fortran="yes" + fi + fi + fi + if $FC -v 2>&1 | grep -q 'Arm C/C++/Fortran Compiler' ; then + armflang_version=`$FC -v 2>&1 | head -1 | sed 's/.*version //'` + armflang_maj_version=`echo $armflang_version|cut -d. -f1` + if test $armflang_maj_version -lt 23 ; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: ARM Fortran compiler $armflang_version is not validated with the native StarPU Fortran API, Fortran examples will not be built" >&5 +printf "%s\n" "$as_me: WARNING: ARM Fortran compiler $armflang_version is not validated with the native StarPU Fortran API, Fortran examples will not be built" >&2;} + enable_build_fortran="no" + fi + fi + + if test "x$enable_build_fortran" = "xyes" ; then + +printf "%s\n" "#define STARPU_HAVE_FC 1" >>confdefs.h + + if test x$build_mpi_lib = xyes -o x$build_nmad_lib = xyes -o x$build_mpi_master_slave = xyes ; then + #Check MPIFORT + if test x$enable_simgrid = xyes ; then + DEFAULT_MPIFORT=smpifort + else + DEFAULT_MPIFORT=mpifort + fi + +# Check whether --with-mpifort was given. +if test ${with_mpifort+y} +then : + withval=$with_mpifort; DEFAULT_MPIFORT=$withval +fi + + case $DEFAULT_MPIFORT in + /*) mpifort_path="$DEFAULT_MPIFORT" ;; + *) # Extract the first word of "$DEFAULT_MPIFORT", so it can be a program name with args. +set dummy $DEFAULT_MPIFORT; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_path_mpifort_path+y} +then : + printf %s "(cached) " >&6 +else $as_nop + case $mpifort_path in + [\\/]* | ?:[\\/]*) + ac_cv_path_mpifort_path="$mpifort_path" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +as_dummy="$simgrid_dir/bin:$PATH" +for as_dir in $as_dummy +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_path_mpifort_path="$as_dir$ac_word$ac_exec_ext" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + test -z "$ac_cv_path_mpifort_path" && ac_cv_path_mpifort_path="no" + ;; +esac +fi +mpifort_path=$ac_cv_path_mpifort_path +if test -n "$mpifort_path"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $mpifort_path" >&5 +printf "%s\n" "$mpifort_path" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + ;; + esac + # We test if the MPIFORT compiler exists + if test ! -x $mpifort_path; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: The mpifort compiler '$mpifort_path' does not have the execute permission" >&5 +printf "%s\n" "The mpifort compiler '$mpifort_path' does not have the execute permission" >&6; } + mpifort_path=no + else + OLD_CC=$CC + CC=$mpicc_path + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +int +main (void) +{ +return MPI_Comm_f2c(0); + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + use_mpi_fort=yes +else $as_nop + use_mpi_fort=no + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + CC=$OLD_CC + if test "x$use_mpi_fort" = xyes; then + +printf "%s\n" "#define HAVE_MPI_COMM_F2C 1" >>confdefs.h + + fi + fi + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether mpifort is available" >&5 +printf %s "checking whether mpifort is available... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $mpifort_path" >&5 +printf "%s\n" "$mpifort_path" >&6; } + MPIFORT=$mpifort_path + + + if test x$mpifort_path != xno ; then + MPIPATH=$(dirname $mpifort_path):$PATH + else + MPIPATH=$PATH + fi + fi + fi + fi +fi +if test "x$enable_build_fortran" = "xyes" ; then + if test "x$FC" = "x" ; then + enable_build_fortran="no" + fi +fi + +#We have MPI C/C++ compiler +if test x$build_mpi_master_slave = xyes; then + #Check if we can compile fortran cases + if test x$use_mpi_fort = xyes ; then + F77LD=$mpifort_path + FCLD=$mpifort_path + F77=$mpifort_path + FC=$mpifort_path + else + enable_build_fortran=no + fi +fi + + + if test "x$FC" != "x" -a "x$enable_build_fortran" = "xyes"; then + STARPU_HAVE_FC_TRUE= + STARPU_HAVE_FC_FALSE='#' +else + STARPU_HAVE_FC_TRUE='#' + STARPU_HAVE_FC_FALSE= +fi + + if test "x$F77" != "x" -a "x$enable_build_fortran" = "xyes"; then + STARPU_HAVE_F77_TRUE= + STARPU_HAVE_F77_FALSE='#' +else + STARPU_HAVE_F77_TRUE='#' + STARPU_HAVE_F77_FALSE= +fi + + if test "x$use_mpi_fort" = "xyes"; then + STARPU_HAVE_MPIFORT_TRUE= + STARPU_HAVE_MPIFORT_FALSE='#' +else + STARPU_HAVE_MPIFORT_TRUE='#' + STARPU_HAVE_MPIFORT_FALSE= +fi + + +############################################################################### +# # +# Debug and Performance analysis tools # +# # +############################################################################### + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether debug mode should be enabled" >&5 +printf %s "checking whether debug mode should be enabled... " >&6; } +# Check whether --enable-debug was given. +if test ${enable_debug+y} +then : + enableval=$enable_debug; enable_debug=$enableval +else $as_nop + enable_debug=no +fi + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_debug" >&5 +printf "%s\n" "$enable_debug" >&6; } + +# Check whether --enable-spinlock_check was given. +if test ${enable_spinlock_check+y} +then : + enableval=$enable_spinlock_check; enable_spinlock_check=$enableval +else $as_nop + enable_spinlock_check=no +fi + +# Check whether --enable-fstack-protector-all was given. +if test ${enable_fstack_protector_all+y} +then : + enableval=$enable_fstack_protector_all; enable_fstack_protector_all=$enableval +else $as_nop + enable_fstack_protector_all=yes +fi + + +if test x$enable_debug = xyes; then + +printf "%s\n" "#define STARPU_DEBUG 1" >>confdefs.h + + CFLAGS="$CFLAGS -O0" + CXXFLAGS="$CXXFLAGS -O0" + FFLAGS="$FFLAGS -O0" + FCFLAGS="$FCFLAGS -O0" + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler supports -fno-optimize-sibling-calls" >&5 +printf %s "checking whether C compiler supports -fno-optimize-sibling-calls... " >&6; } + + SAVED_CFLAGS="$CFLAGS" + CFLAGS="-fno-optimize-sibling-calls" + + check_mpi="no" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ +const char *hello = "Hello World"; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + + check_mpi="yes" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + + if test "$build_mpi_lib" = "no" -a "$build_nmad_lib" = "no" + then + if test "$check_mpi" = "yes" ; then + GLOBAL_AM_CFLAGS="$GLOBAL_AM_CFLAGS -fno-optimize-sibling-calls" + fi + elif test "$check_mpi" = "yes" ; then + SAVED_CC="$CC" + CC="$MPICC" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether MPI C compiler supports -fno-optimize-sibling-calls" >&5 +printf %s "checking whether MPI C compiler supports -fno-optimize-sibling-calls... " >&6; } + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ +const char *hello = "Hello World"; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + + GLOBAL_AM_CFLAGS="$GLOBAL_AM_CFLAGS -fno-optimize-sibling-calls" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + CC="$SAVED_CC" + fi + CFLAGS="$SAVED_CFLAGS" + + + + ac_ext=cpp +ac_cpp='$CXXCPP $CPPFLAGS' +ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_cxx_compiler_gnu + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether CXX compiler supports -fno-optimize-sibling-calls" >&5 +printf %s "checking whether CXX compiler supports -fno-optimize-sibling-calls... " >&6; } + + SAVED_CXXFLAGS="$CXXFLAGS" + CXXFLAGS="-fno-optimize-sibling-calls" + + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ +const char *hello = "Hello World"; + + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_link "$LINENO" +then : + + GLOBAL_AM_CXXFLAGS="$GLOBAL_AM_CXXFLAGS -fno-optimize-sibling-calls" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + CXXFLAGS="$SAVED_CXXFLAGS" + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + + ac_ext=f +ac_compile='$F77 -c $FFLAGS conftest.$ac_ext >&5' +ac_link='$F77 -o conftest$ac_exeext $FFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_f77_compiler_gnu + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether Fortran 77 compiler supports -fno-optimize-sibling-calls" >&5 +printf %s "checking whether Fortran 77 compiler supports -fno-optimize-sibling-calls... " >&6; } + + SAVED_FFLAGS="$FFLAGS" + FFLAGS="-fno-optimize-sibling-calls" + + cat > conftest.$ac_ext <<_ACEOF + program main + + + end +_ACEOF +if ac_fn_f77_try_link "$LINENO" +then : + + GLOBAL_AM_FFLAGS="$GLOBAL_AM_FFLAGS -fno-optimize-sibling-calls" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + FFLAGS="$SAVED_FFLAGS" + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + + ac_ext=${ac_fc_srcext-f} +ac_compile='$FC -c $FCFLAGS $ac_fcflags_srcext conftest.$ac_ext >&5' +ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_fc_compiler_gnu + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether Fortran compiler supports -fno-optimize-sibling-calls" >&5 +printf %s "checking whether Fortran compiler supports -fno-optimize-sibling-calls... " >&6; } + + SAVED_FCFLAGS="$FCFLAGS" + FCFLAGS="-fno-optimize-sibling-calls" + + check_mpi="no" + cat > conftest.$ac_ext <<_ACEOF + program main + + + end +_ACEOF +if ac_fn_fc_try_link "$LINENO" +then : + + check_mpi="yes" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + if test "$check_mpi" = "yes" ; then + SAVED_FC="$FC" + FC="$MPIFORT" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether MPI Fortran compiler supports -fno-optimize-sibling-calls" >&5 +printf %s "checking whether MPI Fortran compiler supports -fno-optimize-sibling-calls... " >&6; } + cat > conftest.$ac_ext <<_ACEOF + program main + + + end +_ACEOF +if ac_fn_fc_try_link "$LINENO" +then : + + GLOBAL_AM_FCFLAGS="$GLOBAL_AM_FCFLAGS -fno-optimize-sibling-calls" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + FC="$SAVED_FC" + fi + FCFLAGS="$SAVED_FCFLAGS" + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + + enable_spinlock_check=yes + if test x$GCC = xyes; then + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler supports -Og" >&5 +printf %s "checking whether C compiler supports -Og... " >&6; } + + SAVED_CFLAGS="$CFLAGS" + CFLAGS="-Og" + + check_mpi="no" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ +const char *hello = "Hello World"; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + + check_mpi="yes" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + + if test "$build_mpi_lib" = "no" -a "$build_nmad_lib" = "no" + then + if test "$check_mpi" = "yes" ; then + GLOBAL_AM_CFLAGS="$GLOBAL_AM_CFLAGS -Og" + fi + elif test "$check_mpi" = "yes" ; then + SAVED_CC="$CC" + CC="$MPICC" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether MPI C compiler supports -Og" >&5 +printf %s "checking whether MPI C compiler supports -Og... " >&6; } + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ +const char *hello = "Hello World"; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + + GLOBAL_AM_CFLAGS="$GLOBAL_AM_CFLAGS -Og" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + CC="$SAVED_CC" + fi + CFLAGS="$SAVED_CFLAGS" + + + + ac_ext=cpp +ac_cpp='$CXXCPP $CPPFLAGS' +ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_cxx_compiler_gnu + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether CXX compiler supports -Og" >&5 +printf %s "checking whether CXX compiler supports -Og... " >&6; } + + SAVED_CXXFLAGS="$CXXFLAGS" + CXXFLAGS="-Og" + + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ +const char *hello = "Hello World"; + + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_link "$LINENO" +then : + + GLOBAL_AM_CXXFLAGS="$GLOBAL_AM_CXXFLAGS -Og" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + CXXFLAGS="$SAVED_CXXFLAGS" + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + + ac_ext=f +ac_compile='$F77 -c $FFLAGS conftest.$ac_ext >&5' +ac_link='$F77 -o conftest$ac_exeext $FFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_f77_compiler_gnu + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether Fortran 77 compiler supports -Og" >&5 +printf %s "checking whether Fortran 77 compiler supports -Og... " >&6; } + + SAVED_FFLAGS="$FFLAGS" + FFLAGS="-Og" + + cat > conftest.$ac_ext <<_ACEOF + program main + + + end +_ACEOF +if ac_fn_f77_try_link "$LINENO" +then : + + GLOBAL_AM_FFLAGS="$GLOBAL_AM_FFLAGS -Og" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + FFLAGS="$SAVED_FFLAGS" + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + + ac_ext=${ac_fc_srcext-f} +ac_compile='$FC -c $FCFLAGS $ac_fcflags_srcext conftest.$ac_ext >&5' +ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_fc_compiler_gnu + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether Fortran compiler supports -Og" >&5 +printf %s "checking whether Fortran compiler supports -Og... " >&6; } + + SAVED_FCFLAGS="$FCFLAGS" + FCFLAGS="-Og" + + check_mpi="no" + cat > conftest.$ac_ext <<_ACEOF + program main + + + end +_ACEOF +if ac_fn_fc_try_link "$LINENO" +then : + + check_mpi="yes" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + if test "$check_mpi" = "yes" ; then + SAVED_FC="$FC" + FC="$MPIFORT" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether MPI Fortran compiler supports -Og" >&5 +printf %s "checking whether MPI Fortran compiler supports -Og... " >&6; } + cat > conftest.$ac_ext <<_ACEOF + program main + + + end +_ACEOF +if ac_fn_fc_try_link "$LINENO" +then : + + GLOBAL_AM_FCFLAGS="$GLOBAL_AM_FCFLAGS -Og" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + FC="$SAVED_FC" + fi + FCFLAGS="$SAVED_FCFLAGS" + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + + if test x$starpu_windows != xyes ; then + if test x$enable_fstack_protector_all = xyes ; then + CFLAGS="$CFLAGS -fstack-protector-all" + CXXFLAGS="$CXXFLAGS -fstack-protector-all" + FFLAGS="$FFLAGS -fstack-protector-all" + FCFLAGS="$FCFLAGS -fstack-protector-all" + fi + fi + fi +else + CFLAGS="-O3 $CFLAGS" + CXXFLAGS="-O3 $CXXFLAGS" + FFLAGS="-O3 $FFLAGS" + FCFLAGS="-O3 $FCFLAGS" +fi + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether gdb information should be enabled" >&5 +printf %s "checking whether gdb information should be enabled... " >&6; } +# Check whether --enable-gdb was given. +if test ${enable_gdb+y} +then : + enableval=$enable_gdb; enable_gdb=$enableval +else $as_nop + enable_gdb=yes +fi + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_gdb" >&5 +printf "%s\n" "$enable_gdb" >&6; } + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether full gdb information should be enabled" >&5 +printf %s "checking whether full gdb information should be enabled... " >&6; } +# Check whether --enable-full-gdb-information was given. +if test ${enable_full_gdb_information+y} +then : + enableval=$enable_full_gdb_information; enable_full_gdb_information=$enableval +else $as_nop + enable_full_gdb_information=yes +fi + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_full_gdb_information" >&5 +printf "%s\n" "$enable_full_gdb_information" >&6; } +if test x$enable_gdb = xyes; then + if test x$enable_full_gdb_information = xyes -a x$GCC = xyes; then + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler supports -gdwarf-2" >&5 +printf %s "checking whether C compiler supports -gdwarf-2... " >&6; } + + SAVED_CFLAGS="$CFLAGS" + CFLAGS="-gdwarf-2" + + check_mpi="no" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ +const char *hello = "Hello World"; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + + check_mpi="yes" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + + if test "$build_mpi_lib" = "no" -a "$build_nmad_lib" = "no" + then + if test "$check_mpi" = "yes" ; then + GLOBAL_AM_CFLAGS="$GLOBAL_AM_CFLAGS -gdwarf-2" + fi + elif test "$check_mpi" = "yes" ; then + SAVED_CC="$CC" + CC="$MPICC" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether MPI C compiler supports -gdwarf-2" >&5 +printf %s "checking whether MPI C compiler supports -gdwarf-2... " >&6; } + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ +const char *hello = "Hello World"; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + + GLOBAL_AM_CFLAGS="$GLOBAL_AM_CFLAGS -gdwarf-2" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + CC="$SAVED_CC" + fi + CFLAGS="$SAVED_CFLAGS" + + + + ac_ext=cpp +ac_cpp='$CXXCPP $CPPFLAGS' +ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_cxx_compiler_gnu + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether CXX compiler supports -gdwarf-2" >&5 +printf %s "checking whether CXX compiler supports -gdwarf-2... " >&6; } + + SAVED_CXXFLAGS="$CXXFLAGS" + CXXFLAGS="-gdwarf-2" + + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ +const char *hello = "Hello World"; + + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_link "$LINENO" +then : + + GLOBAL_AM_CXXFLAGS="$GLOBAL_AM_CXXFLAGS -gdwarf-2" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + CXXFLAGS="$SAVED_CXXFLAGS" + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + + ac_ext=f +ac_compile='$F77 -c $FFLAGS conftest.$ac_ext >&5' +ac_link='$F77 -o conftest$ac_exeext $FFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_f77_compiler_gnu + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether Fortran 77 compiler supports -gdwarf-2" >&5 +printf %s "checking whether Fortran 77 compiler supports -gdwarf-2... " >&6; } + + SAVED_FFLAGS="$FFLAGS" + FFLAGS="-gdwarf-2" + + cat > conftest.$ac_ext <<_ACEOF + program main + + + end +_ACEOF +if ac_fn_f77_try_link "$LINENO" +then : + + GLOBAL_AM_FFLAGS="$GLOBAL_AM_FFLAGS -gdwarf-2" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + FFLAGS="$SAVED_FFLAGS" + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + + ac_ext=${ac_fc_srcext-f} +ac_compile='$FC -c $FCFLAGS $ac_fcflags_srcext conftest.$ac_ext >&5' +ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_fc_compiler_gnu + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether Fortran compiler supports -gdwarf-2" >&5 +printf %s "checking whether Fortran compiler supports -gdwarf-2... " >&6; } + + SAVED_FCFLAGS="$FCFLAGS" + FCFLAGS="-gdwarf-2" + + check_mpi="no" + cat > conftest.$ac_ext <<_ACEOF + program main + + + end +_ACEOF +if ac_fn_fc_try_link "$LINENO" +then : + + check_mpi="yes" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + if test "$check_mpi" = "yes" ; then + SAVED_FC="$FC" + FC="$MPIFORT" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether MPI Fortran compiler supports -gdwarf-2" >&5 +printf %s "checking whether MPI Fortran compiler supports -gdwarf-2... " >&6; } + cat > conftest.$ac_ext <<_ACEOF + program main + + + end +_ACEOF +if ac_fn_fc_try_link "$LINENO" +then : + + GLOBAL_AM_FCFLAGS="$GLOBAL_AM_FCFLAGS -gdwarf-2" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + FC="$SAVED_FC" + fi + FCFLAGS="$SAVED_FCFLAGS" + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler supports -g3" >&5 +printf %s "checking whether C compiler supports -g3... " >&6; } + + SAVED_CFLAGS="$CFLAGS" + CFLAGS="-g3" + + check_mpi="no" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ +const char *hello = "Hello World"; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + + check_mpi="yes" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + + if test "$build_mpi_lib" = "no" -a "$build_nmad_lib" = "no" + then + if test "$check_mpi" = "yes" ; then + GLOBAL_AM_CFLAGS="$GLOBAL_AM_CFLAGS -g3" + fi + elif test "$check_mpi" = "yes" ; then + SAVED_CC="$CC" + CC="$MPICC" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether MPI C compiler supports -g3" >&5 +printf %s "checking whether MPI C compiler supports -g3... " >&6; } + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ +const char *hello = "Hello World"; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + + GLOBAL_AM_CFLAGS="$GLOBAL_AM_CFLAGS -g3" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + CC="$SAVED_CC" + fi + CFLAGS="$SAVED_CFLAGS" + + + + ac_ext=cpp +ac_cpp='$CXXCPP $CPPFLAGS' +ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_cxx_compiler_gnu + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether CXX compiler supports -g3" >&5 +printf %s "checking whether CXX compiler supports -g3... " >&6; } + + SAVED_CXXFLAGS="$CXXFLAGS" + CXXFLAGS="-g3" + + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ +const char *hello = "Hello World"; + + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_link "$LINENO" +then : + + GLOBAL_AM_CXXFLAGS="$GLOBAL_AM_CXXFLAGS -g3" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + CXXFLAGS="$SAVED_CXXFLAGS" + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + + ac_ext=f +ac_compile='$F77 -c $FFLAGS conftest.$ac_ext >&5' +ac_link='$F77 -o conftest$ac_exeext $FFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_f77_compiler_gnu + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether Fortran 77 compiler supports -g3" >&5 +printf %s "checking whether Fortran 77 compiler supports -g3... " >&6; } + + SAVED_FFLAGS="$FFLAGS" + FFLAGS="-g3" + + cat > conftest.$ac_ext <<_ACEOF + program main + + + end +_ACEOF +if ac_fn_f77_try_link "$LINENO" +then : + + GLOBAL_AM_FFLAGS="$GLOBAL_AM_FFLAGS -g3" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + FFLAGS="$SAVED_FFLAGS" + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + + ac_ext=${ac_fc_srcext-f} +ac_compile='$FC -c $FCFLAGS $ac_fcflags_srcext conftest.$ac_ext >&5' +ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_fc_compiler_gnu + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether Fortran compiler supports -g3" >&5 +printf %s "checking whether Fortran compiler supports -g3... " >&6; } + + SAVED_FCFLAGS="$FCFLAGS" + FCFLAGS="-g3" + + check_mpi="no" + cat > conftest.$ac_ext <<_ACEOF + program main + + + end +_ACEOF +if ac_fn_fc_try_link "$LINENO" +then : + + check_mpi="yes" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + if test "$check_mpi" = "yes" ; then + SAVED_FC="$FC" + FC="$MPIFORT" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether MPI Fortran compiler supports -g3" >&5 +printf %s "checking whether MPI Fortran compiler supports -g3... " >&6; } + cat > conftest.$ac_ext <<_ACEOF + program main + + + end +_ACEOF +if ac_fn_fc_try_link "$LINENO" +then : + + GLOBAL_AM_FCFLAGS="$GLOBAL_AM_FCFLAGS -g3" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + FC="$SAVED_FC" + fi + FCFLAGS="$SAVED_FCFLAGS" + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + + NVCCFLAGS="$NVCCFLAGS -g" + HIPCCFLAGS="$HIPCCFLAGS -g" + else + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler supports -g" >&5 +printf %s "checking whether C compiler supports -g... " >&6; } + + SAVED_CFLAGS="$CFLAGS" + CFLAGS="-g" + + check_mpi="no" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ +const char *hello = "Hello World"; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + + check_mpi="yes" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + + if test "$build_mpi_lib" = "no" -a "$build_nmad_lib" = "no" + then + if test "$check_mpi" = "yes" ; then + GLOBAL_AM_CFLAGS="$GLOBAL_AM_CFLAGS -g" + fi + elif test "$check_mpi" = "yes" ; then + SAVED_CC="$CC" + CC="$MPICC" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether MPI C compiler supports -g" >&5 +printf %s "checking whether MPI C compiler supports -g... " >&6; } + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ +const char *hello = "Hello World"; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + + GLOBAL_AM_CFLAGS="$GLOBAL_AM_CFLAGS -g" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + CC="$SAVED_CC" + fi + CFLAGS="$SAVED_CFLAGS" + + + + ac_ext=cpp +ac_cpp='$CXXCPP $CPPFLAGS' +ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_cxx_compiler_gnu + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether CXX compiler supports -g" >&5 +printf %s "checking whether CXX compiler supports -g... " >&6; } + + SAVED_CXXFLAGS="$CXXFLAGS" + CXXFLAGS="-g" + + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ +const char *hello = "Hello World"; + + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_link "$LINENO" +then : + + GLOBAL_AM_CXXFLAGS="$GLOBAL_AM_CXXFLAGS -g" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + CXXFLAGS="$SAVED_CXXFLAGS" + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + + ac_ext=f +ac_compile='$F77 -c $FFLAGS conftest.$ac_ext >&5' +ac_link='$F77 -o conftest$ac_exeext $FFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_f77_compiler_gnu + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether Fortran 77 compiler supports -g" >&5 +printf %s "checking whether Fortran 77 compiler supports -g... " >&6; } + + SAVED_FFLAGS="$FFLAGS" + FFLAGS="-g" + + cat > conftest.$ac_ext <<_ACEOF + program main + + + end +_ACEOF +if ac_fn_f77_try_link "$LINENO" +then : + + GLOBAL_AM_FFLAGS="$GLOBAL_AM_FFLAGS -g" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + FFLAGS="$SAVED_FFLAGS" + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + + ac_ext=${ac_fc_srcext-f} +ac_compile='$FC -c $FCFLAGS $ac_fcflags_srcext conftest.$ac_ext >&5' +ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_fc_compiler_gnu + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether Fortran compiler supports -g" >&5 +printf %s "checking whether Fortran compiler supports -g... " >&6; } + + SAVED_FCFLAGS="$FCFLAGS" + FCFLAGS="-g" + + check_mpi="no" + cat > conftest.$ac_ext <<_ACEOF + program main + + + end +_ACEOF +if ac_fn_fc_try_link "$LINENO" +then : + + check_mpi="yes" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + if test "$check_mpi" = "yes" ; then + SAVED_FC="$FC" + FC="$MPIFORT" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether MPI Fortran compiler supports -g" >&5 +printf %s "checking whether MPI Fortran compiler supports -g... " >&6; } + cat > conftest.$ac_ext <<_ACEOF + program main + + + end +_ACEOF +if ac_fn_fc_try_link "$LINENO" +then : + + GLOBAL_AM_FCFLAGS="$GLOBAL_AM_FCFLAGS -g" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + FC="$SAVED_FC" + fi + FCFLAGS="$SAVED_FCFLAGS" + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + + NVCCFLAGS="$NVCCFLAGS -g" + HIPCCFLAGS="$HIPCCFLAGS -g" + fi +else + CFLAGS="$CFLAGS -g0" + CXXFLAGS="$CXXFLAGS -g0" + FFLAGS="$FFLAGS -g0" + FCFLAGS="$FCFLAGS -g0" + LDFLAGS="$LDFLAGS -g0" +fi + +if test x$enable_spinlock_check = xyes; then + +printf "%s\n" "#define STARPU_SPINLOCK_CHECK 1" >>confdefs.h + +fi + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether extra checks should be performed" >&5 +printf %s "checking whether extra checks should be performed... " >&6; } +# Check whether --enable-fast was given. +if test ${enable_fast+y} +then : + enableval=$enable_fast; enable_fast=$enableval +else $as_nop + enable_fast=no +fi + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_fast" >&5 +printf "%s\n" "$enable_fast" >&6; } +if test x$enable_fast = xyes; then + +printf "%s\n" "#define STARPU_NO_ASSERT 1" >>confdefs.h + +else + # fortify gets really enabled only with optimizations, avoid enabling it + # when optimizations are not enabled, because with some glibc it + # spews a lot of warnings. + if test x$enable_debug != xyes; then + if test x$GCC = xyes; then + CPPFLAGS="-U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=1 $CPPFLAGS" + fi + fi +fi + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether debug messages should be displayed" >&5 +printf %s "checking whether debug messages should be displayed... " >&6; } +# Check whether --enable-verbose was given. +if test ${enable_verbose+y} +then : + enableval=$enable_verbose; enable_verbose=$enableval +else $as_nop + enable_verbose=no +fi + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_verbose" >&5 +printf "%s\n" "$enable_verbose" >&6; } +if test x$enable_verbose = xyes; then + +printf "%s\n" "#define STARPU_VERBOSE 1" >>confdefs.h + +fi +if test x$enable_verbose = xextra; then + +printf "%s\n" "#define STARPU_VERBOSE 1" >>confdefs.h + + +printf "%s\n" "#define STARPU_EXTRA_VERBOSE 1" >>confdefs.h + +fi + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether coverage testing should be enabled" >&5 +printf %s "checking whether coverage testing should be enabled... " >&6; } +# Check whether --enable-coverage was given. +if test ${enable_coverage+y} +then : + enableval=$enable_coverage; enable_coverage=$enableval +else $as_nop + enable_coverage=no +fi + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_coverage" >&5 +printf "%s\n" "$enable_coverage" >&6; } +COVERAGE=$enable_coverage + + if test "x$enable_coverage" = "xyes"; then + STARPU_COVERAGE_ENABLED_TRUE= + STARPU_COVERAGE_ENABLED_FALSE='#' +else + STARPU_COVERAGE_ENABLED_TRUE='#' + STARPU_COVERAGE_ENABLED_FALSE= +fi + +if test x$enable_coverage = xyes; then + CFLAGS="${CFLAGS} --coverage" + CXXFLAGS="${CXXFLAGS} --coverage" + FFLAGS="${FFLAGS} --coverage" + FCFLAGS="${FCFLAGS} --coverage" + LDFLAGS="${LDFLAGS} --coverage" + LIBS="${LIBS} -lgcov" +fi + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether coverity mode should be enabled" >&5 +printf %s "checking whether coverity mode should be enabled... " >&6; } +# Check whether --enable-coverity was given. +if test ${enable_coverity+y} +then : + enableval=$enable_coverity; enable_coverity=$enableval +else $as_nop + enable_coverity=no +fi + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_coverity" >&5 +printf "%s\n" "$enable_coverity" >&6; } + if test x$enable_coverity = xyes; then + STARPU_COVERITY_TRUE= + STARPU_COVERITY_FALSE='#' +else + STARPU_COVERITY_TRUE='#' + STARPU_COVERITY_FALSE= +fi + +if test x$enable_coverity = xyes ; then + +printf "%s\n" "#define STARPU_COVERITY 1" >>confdefs.h + +fi + +# We would need a PIC-compiled libfxt.a for this to work ; that's usually not available. +if test x$enable_mpi = xyes -a x$enable_simgrid = xyes -o x$enable_shared = xno -a x$enable_starpupy = xyes ; then + default_enable_fxt=no +else + default_enable_fxt=maybe +fi +# shall we use FxT to generate trace of the execution ? +# Check whether --enable-fxt was given. +if test ${enable_fxt+y} +then : + enableval=$enable_fxt; +else $as_nop + enable_fxt=$default_enable_fxt +fi + + +# Check whether --with-fxt was given. +if test ${with_fxt+y} +then : + withval=$with_fxt; + if test x$withval = xno ; then + enable_fxt=no + else + fxt_dir="$withval" + use_fxt_from_system=no + # in case this was not explicit yet + enable_fxt=yes + FXTDIR=$fxt_dir + + fi + +else $as_nop + + use_fxt_from_system=yes + fxt_dir="" + +fi + + +if test x$enable_fxt != xno; then + if test x$use_fxt_from_system = xno; then + save_PKG_CONFIG_PATH="$PKG_CONFIG_PATH" + PKG_CONFIG_PATH="$fxt_dir/lib/pkgconfig:$PKG_CONFIG_PATH" + +pkg_failed=no +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for FXT" >&5 +printf %s "checking for FXT... " >&6; } + +if test -n "$PKG_CONFIG"; then + if test -n "$FXT_CFLAGS"; then + pkg_cv_FXT_CFLAGS="$FXT_CFLAGS" + else + if test -n "$PKG_CONFIG" && \ + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"fxt\""; } >&5 + ($PKG_CONFIG --exists --print-errors "fxt") 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + pkg_cv_FXT_CFLAGS=`$PKG_CONFIG --cflags "fxt" 2>/dev/null` +else + pkg_failed=yes +fi + fi +else + pkg_failed=untried +fi +if test -n "$PKG_CONFIG"; then + if test -n "$FXT_LIBS"; then + pkg_cv_FXT_LIBS="$FXT_LIBS" + else + if test -n "$PKG_CONFIG" && \ + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"fxt\""; } >&5 + ($PKG_CONFIG --exists --print-errors "fxt") 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + pkg_cv_FXT_LIBS=`$PKG_CONFIG --libs "fxt" 2>/dev/null` +else + pkg_failed=yes +fi + fi +else + pkg_failed=untried +fi + + + +if test $pkg_failed = yes; then + +if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then + _pkg_short_errors_supported=yes +else + _pkg_short_errors_supported=no +fi + if test $_pkg_short_errors_supported = yes; then + FXT_PKG_ERRORS=`$PKG_CONFIG --short-errors --errors-to-stdout --print-errors "fxt"` + else + FXT_PKG_ERRORS=`$PKG_CONFIG --errors-to-stdout --print-errors "fxt"` + fi + # Put the nasty error message in config.log where it belongs + echo "$FXT_PKG_ERRORS" >&5 + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + have_valid_fxt=yes + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: Old FxT without fxt.pc file, hoping link will succeed" >&5 +printf "%s\n" "$as_me: WARNING: Old FxT without fxt.pc file, hoping link will succeed" >&2;} + FXT_CFLAGS="-I$fxt_dir/include/ " + FXT_LDFLAGS="-L$fxt_dir/lib/" + + FXT_LIBS="-lfxt" + +elif test $pkg_failed = untried; then + + have_valid_fxt=yes + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: Old FxT without fxt.pc file, hoping link will succeed" >&5 +printf "%s\n" "$as_me: WARNING: Old FxT without fxt.pc file, hoping link will succeed" >&2;} + FXT_CFLAGS="-I$fxt_dir/include/ " + FXT_LDFLAGS="-L$fxt_dir/lib/" + + FXT_LIBS="-lfxt" + +else + FXT_CFLAGS=$pkg_cv_FXT_CFLAGS + FXT_LIBS=$pkg_cv_FXT_LIBS + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + have_valid_fxt=yes +fi + PKG_CONFIG_PATH="$save_PKG_CONFIG_PATH" + else + +pkg_failed=no +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for FXT" >&5 +printf %s "checking for FXT... " >&6; } + +if test -n "$PKG_CONFIG"; then + if test -n "$FXT_CFLAGS"; then + pkg_cv_FXT_CFLAGS="$FXT_CFLAGS" + else + if test -n "$PKG_CONFIG" && \ + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"fxt\""; } >&5 + ($PKG_CONFIG --exists --print-errors "fxt") 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + pkg_cv_FXT_CFLAGS=`$PKG_CONFIG --cflags "fxt" 2>/dev/null` +else + pkg_failed=yes +fi + fi +else + pkg_failed=untried +fi +if test -n "$PKG_CONFIG"; then + if test -n "$FXT_LIBS"; then + pkg_cv_FXT_LIBS="$FXT_LIBS" + else + if test -n "$PKG_CONFIG" && \ + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"fxt\""; } >&5 + ($PKG_CONFIG --exists --print-errors "fxt") 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + pkg_cv_FXT_LIBS=`$PKG_CONFIG --libs "fxt" 2>/dev/null` +else + pkg_failed=yes +fi + fi +else + pkg_failed=untried +fi + + + +if test $pkg_failed = yes; then + +if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then + _pkg_short_errors_supported=yes +else + _pkg_short_errors_supported=no +fi + if test $_pkg_short_errors_supported = yes; then + FXT_PKG_ERRORS=`$PKG_CONFIG --short-errors --errors-to-stdout --print-errors "fxt"` + else + FXT_PKG_ERRORS=`$PKG_CONFIG --errors-to-stdout --print-errors "fxt"` + fi + # Put the nasty error message in config.log where it belongs + echo "$FXT_PKG_ERRORS" >&5 + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + have_valid_fxt=no +elif test $pkg_failed = untried; then + have_valid_fxt=no +else + FXT_CFLAGS=$pkg_cv_FXT_CFLAGS + FXT_LIBS=$pkg_cv_FXT_LIBS + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + have_valid_fxt=yes +fi + fi + if test x$have_valid_fxt = xyes ; then + enable_fxt=yes + save_LIBS="$LIBS" + LIBS="$LIBS $FXT_LIBS" + save_LDFLAGS="$LDFLAGS" + LDFLAGS="$LDFLAGS $FXT_LDFLAGS" + ac_fn_c_check_func "$LINENO" "fxt_close" "ac_cv_func_fxt_close" +if test "x$ac_cv_func_fxt_close" = xyes +then : + printf "%s\n" "#define HAVE_FXT_CLOSE 1" >>confdefs.h + +fi + + ac_fn_c_check_func "$LINENO" "fxt_blockev_leave" "ac_cv_func_fxt_blockev_leave" +if test "x$ac_cv_func_fxt_blockev_leave" = xyes +then : + printf "%s\n" "#define HAVE_FXT_BLOCKEV_LEAVE 1" >>confdefs.h + +fi + + ac_fn_c_check_func "$LINENO" "enable_fut_flush" "ac_cv_func_enable_fut_flush" +if test "x$ac_cv_func_enable_fut_flush" = xyes +then : + printf "%s\n" "#define HAVE_ENABLE_FUT_FLUSH 1" >>confdefs.h + +fi + + ac_fn_c_check_func "$LINENO" "fut_set_filename" "ac_cv_func_fut_set_filename" +if test "x$ac_cv_func_fut_set_filename" = xyes +then : + printf "%s\n" "#define HAVE_FUT_SET_FILENAME 1" >>confdefs.h + +fi + + ac_fn_c_check_func "$LINENO" "fut_setup_flush_callback" "ac_cv_func_fut_setup_flush_callback" +if test "x$ac_cv_func_fut_setup_flush_callback" = xyes +then : + printf "%s\n" "#define HAVE_FUT_SETUP_FLUSH_CALLBACK 1" >>confdefs.h + +fi + + LDFLAGS="$save_LDFLAGS" + LIBS="$save_LIBS" + save_CFLAGS="$CFLAGS" + CFLAGS="$CFLAGS $FXT_CFLAGS" + ac_fn_check_decl "$LINENO" "enable_fut_flush" "ac_cv_have_decl_enable_fut_flush" "#include +" "$ac_c_undeclared_builtin_options" "CFLAGS" +if test "x$ac_cv_have_decl_enable_fut_flush" = xyes +then : + ac_have_decl=1 +else $as_nop + ac_have_decl=0 +fi +printf "%s\n" "#define HAVE_DECL_ENABLE_FUT_FLUSH $ac_have_decl" >>confdefs.h + + ac_fn_check_decl "$LINENO" "fut_set_filename" "ac_cv_have_decl_fut_set_filename" "#include +" "$ac_c_undeclared_builtin_options" "CFLAGS" +if test "x$ac_cv_have_decl_fut_set_filename" = xyes +then : + ac_have_decl=1 +else $as_nop + ac_have_decl=0 +fi +printf "%s\n" "#define HAVE_DECL_FUT_SET_FILENAME $ac_have_decl" >>confdefs.h + + ac_fn_check_decl "$LINENO" "fut_setup_flush_callback" "ac_cv_have_decl_fut_setup_flush_callback" "#include +" "$ac_c_undeclared_builtin_options" "CFLAGS" +if test "x$ac_cv_have_decl_fut_setup_flush_callback" = xyes +then : + ac_have_decl=1 +else $as_nop + ac_have_decl=0 +fi +printf "%s\n" "#define HAVE_DECL_FUT_SETUP_FLUSH_CALLBACK $ac_have_decl" >>confdefs.h + + CFLAGS="$save_CFLAGS" + + if test x$enable_simgrid = xyes -a x$enable_shared = xno ; then + # simgrid's SMPI needs fxt to be linked in statically for + # variable privatization to work + FXT_LIBS="$(pkg-config --variable=libdir fxt)/libfxt.a -Wl,--as-needed $(pkg-config --libs --static fxt) -Wl,--no-as-needed" + fi + + ########################################## + # Poti is a library to generate paje trace files + ########################################## + +pkg_failed=no +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for POTI" >&5 +printf %s "checking for POTI... " >&6; } + +if test -n "$PKG_CONFIG"; then + if test -n "$POTI_CFLAGS"; then + pkg_cv_POTI_CFLAGS="$POTI_CFLAGS" + else + if test -n "$PKG_CONFIG" && \ + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"poti\""; } >&5 + ($PKG_CONFIG --exists --print-errors "poti") 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + pkg_cv_POTI_CFLAGS=`$PKG_CONFIG --cflags "poti" 2>/dev/null` +else + pkg_failed=yes +fi + fi +else + pkg_failed=untried +fi +if test -n "$PKG_CONFIG"; then + if test -n "$POTI_LIBS"; then + pkg_cv_POTI_LIBS="$POTI_LIBS" + else + if test -n "$PKG_CONFIG" && \ + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"poti\""; } >&5 + ($PKG_CONFIG --exists --print-errors "poti") 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + pkg_cv_POTI_LIBS=`$PKG_CONFIG --libs "poti" 2>/dev/null` +else + pkg_failed=yes +fi + fi +else + pkg_failed=untried +fi + + + +if test $pkg_failed = yes; then + +if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then + _pkg_short_errors_supported=yes +else + _pkg_short_errors_supported=no +fi + if test $_pkg_short_errors_supported = yes; then + POTI_PKG_ERRORS=`$PKG_CONFIG --short-errors --errors-to-stdout --print-errors "poti"` + else + POTI_PKG_ERRORS=`$PKG_CONFIG --errors-to-stdout --print-errors "poti"` + fi + # Put the nasty error message in config.log where it belongs + echo "$POTI_PKG_ERRORS" >&5 + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + have_valid_poti=no +elif test $pkg_failed = untried; then + have_valid_poti=no +else + POTI_CFLAGS=$pkg_cv_POTI_CFLAGS + POTI_LIBS=$pkg_cv_POTI_LIBS + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + have_valid_poti=yes +fi + # Check whether --enable-poti was given. +if test ${enable_poti+y} +then : + enableval=$enable_poti; enable_poti=$enableval +else $as_nop + enable_poti=no +fi + + if test x$enable_poti = xyes -a x$have_valid_poti = xyes ; then + +printf "%s\n" "#define STARPU_HAVE_POTI 1" >>confdefs.h + + save_LIBS="$LIBS" + LIBS="$LIBS $POTI_LIBS" + ac_fn_c_check_func "$LINENO" "poti_init_custom" "ac_cv_func_poti_init_custom" +if test "x$ac_cv_func_poti_init_custom" = xyes +then : + printf "%s\n" "#define HAVE_POTI_INIT_CUSTOM 1" >>confdefs.h + +fi +ac_fn_c_check_func "$LINENO" "poti_user_NewEvent" "ac_cv_func_poti_user_NewEvent" +if test "x$ac_cv_func_poti_user_NewEvent" = xyes +then : + printf "%s\n" "#define HAVE_POTI_USER_NEWEVENT 1" >>confdefs.h + +fi + + LIBS="$save_LIBS" + FXT_CFLAGS="$FXT_CFLAGS $POTI_CFLAGS" + FXT_LIBS="$FXT_LIBS $POTI_LIBS" + fi + else + if test x$enable_fxt = xyes ; then + as_fn_error $? "FxT is required but not available" "$LINENO" 5 + fi + enable_fxt=no + fi +fi + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether FxT traces should be generated" >&5 +printf %s "checking whether FxT traces should be generated... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_fxt" >&5 +printf "%s\n" "$enable_fxt" >&6; } + +if test x$enable_fxt = xyes; then + +printf "%s\n" "#define STARPU_USE_FXT 1" >>confdefs.h + + +printf "%s\n" "#define CONFIG_FUT 1" >>confdefs.h + +fi + +STARPU_USE_FXT=$enable_fxt + + if test x$enable_fxt = xyes; then + STARPU_USE_FXT_TRUE= + STARPU_USE_FXT_FALSE='#' +else + STARPU_USE_FXT_TRUE='#' + STARPU_USE_FXT_FALSE= +fi + + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether additional locking systems FxT traces should be enabled" >&5 +printf %s "checking whether additional locking systems FxT traces should be enabled... " >&6; } +# Check whether --enable-fxt-lock was given. +if test ${enable_fxt_lock+y} +then : + enableval=$enable_fxt_lock; enable_fxt_lock=$enableval +else $as_nop + enable_fxt_lock=no +fi + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_fxt_lock" >&5 +printf "%s\n" "$enable_fxt_lock" >&6; } +if test x$enable_fxt_lock = xyes; then + +printf "%s\n" "#define STARPU_FXT_LOCK_TRACES 1" >>confdefs.h + +fi + +# Check whether --enable-papi was given. +if test ${enable_papi+y} +then : + enableval=$enable_papi; enable_papi=$enableval +else $as_nop + enable_papi=yes +fi + +if test x$enable_papi = xyes; then + +pkg_failed=no +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for PAPI" >&5 +printf %s "checking for PAPI... " >&6; } + +if test -n "$PKG_CONFIG"; then + if test -n "$PAPI_CFLAGS"; then + pkg_cv_PAPI_CFLAGS="$PAPI_CFLAGS" + else + if test -n "$PKG_CONFIG" && \ + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"papi\""; } >&5 + ($PKG_CONFIG --exists --print-errors "papi") 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + pkg_cv_PAPI_CFLAGS=`$PKG_CONFIG --cflags "papi" 2>/dev/null` +else + pkg_failed=yes +fi + fi +else + pkg_failed=untried +fi +if test -n "$PKG_CONFIG"; then + if test -n "$PAPI_LIBS"; then + pkg_cv_PAPI_LIBS="$PAPI_LIBS" + else + if test -n "$PKG_CONFIG" && \ + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"papi\""; } >&5 + ($PKG_CONFIG --exists --print-errors "papi") 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + pkg_cv_PAPI_LIBS=`$PKG_CONFIG --libs "papi" 2>/dev/null` +else + pkg_failed=yes +fi + fi +else + pkg_failed=untried +fi + + + +if test $pkg_failed = yes; then + +if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then + _pkg_short_errors_supported=yes +else + _pkg_short_errors_supported=no +fi + if test $_pkg_short_errors_supported = yes; then + PAPI_PKG_ERRORS=`$PKG_CONFIG --short-errors --errors-to-stdout --print-errors "papi"` + else + PAPI_PKG_ERRORS=`$PKG_CONFIG --errors-to-stdout --print-errors "papi"` + fi + # Put the nasty error message in config.log where it belongs + echo "$PAPI_PKG_ERRORS" >&5 + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + have_valid_papi=no +elif test $pkg_failed = untried; then + have_valid_papi=no +else + PAPI_CFLAGS=$pkg_cv_PAPI_CFLAGS + PAPI_LIBS=$pkg_cv_PAPI_LIBS + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + have_valid_papi=yes +fi + if test x$have_valid_papi = xyes ; then + +printf "%s\n" "#define STARPU_PAPI 1" >>confdefs.h + + STARPU_EXPORTED_LIBS="$STARPU_EXPORTED_LIBS $PAPI_LIBS" + fi +fi + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether performance debugging should be enabled" >&5 +printf %s "checking whether performance debugging should be enabled... " >&6; } +# Check whether --enable-perf-debug was given. +if test ${enable_perf_debug+y} +then : + enableval=$enable_perf_debug; enable_perf_debug=$enableval +else $as_nop + enable_perf_debug=no +fi + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_perf_debug" >&5 +printf "%s\n" "$enable_perf_debug" >&6; } +STARPU_PERF_DEBUG=$enable_perf_debug + +if test x$enable_perf_debug = xyes; then + +printf "%s\n" "#define STARPU_PERF_DEBUG 1" >>confdefs.h + + CPPFLAGS="${CPPFLAGS} -pg " + LDFLAGS="${LDFLAGS} -pg " +fi + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether performance model debugging should be enabled" >&5 +printf %s "checking whether performance model debugging should be enabled... " >&6; } +# Check whether --enable-model-debug was given. +if test ${enable_model_debug+y} +then : + enableval=$enable_model_debug; enable_model_debug=$enableval +else $as_nop + enable_model_debug=no +fi + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_model_debug" >&5 +printf "%s\n" "$enable_model_debug" >&6; } +if test x$enable_model_debug = xyes; then + +printf "%s\n" "#define STARPU_MODEL_DEBUG 1" >>confdefs.h + +fi + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether memory stats should be displayed" >&5 +printf %s "checking whether memory stats should be displayed... " >&6; } +# Check whether --enable-memory-stats was given. +if test ${enable_memory_stats+y} +then : + enableval=$enable_memory_stats; enable_memory_stats=$enableval +else $as_nop + enable_memory_stats=no +fi + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_memory_stats" >&5 +printf "%s\n" "$enable_memory_stats" >&6; } +if test x$enable_memory_stats = xyes; then + +printf "%s\n" "#define STARPU_MEMORY_STATS 1" >>confdefs.h + +fi + +# Check whether --enable-glpk was given. +if test ${enable_glpk+y} +then : + enableval=$enable_glpk; enable_glpk=$enableval +else $as_nop + enable_glpk=yes +fi + +if test x$enable_glpk = xyes; then + for ac_header in glpk.h +do : + ac_fn_c_check_header_compile "$LINENO" "glpk.h" "ac_cv_header_glpk_h" "$ac_includes_default" +if test "x$ac_cv_header_glpk_h" = xyes +then : + printf "%s\n" "#define HAVE_GLPK_H 1" >>confdefs.h + +printf "%s\n" "#define STARPU_HAVE_GLPK_H 1" >>confdefs.h + +fi + +done + _LIBS_SAV="$LIBS" + LIBS="" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for main in -lglpk" >&5 +printf %s "checking for main in -lglpk... " >&6; } +if test ${ac_cv_lib_glpk_main+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-lglpk $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + +int +main (void) +{ +return main (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_glpk_main=yes +else $as_nop + ac_cv_lib_glpk_main=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_glpk_main" >&5 +printf "%s\n" "$ac_cv_lib_glpk_main" >&6; } +if test "x$ac_cv_lib_glpk_main" = xyes +then : + printf "%s\n" "#define HAVE_LIBGLPK 1" >>confdefs.h + + LIBS="-lglpk $LIBS" + +fi + + STARPU_GLPK_LDFLAGS="$STARPU_GLPK_LDFLAGS $LIBS" + LIBS=$_LIBS_SAV + + + +fi + + +# Check whether --with-ayudame1-include-dir was given. +if test ${with_ayudame1_include_dir+y} +then : + withval=$with_ayudame1_include_dir; + ayudame1_include_dir="$withval" + if test -n "$ayudame1_include_dir"; then + CPPFLAGS="-I$ayudame1_include_dir $CPPFLAGS" + fi + +else $as_nop + ayudame1_include_dir=no +fi + + +# Check whether --with-ayudame2-include-dir was given. +if test ${with_ayudame2_include_dir+y} +then : + withval=$with_ayudame2_include_dir; + ayudame2_include_dir="$withval" + if test -n "$ayudame2_include_dir"; then + CPPFLAGS="-I$ayudame2_include_dir $CPPFLAGS" + fi + +else $as_nop + ayudame2_include_dir=no +fi + + +# Ayudame 1 header is capitalized +ac_fn_c_check_header_compile "$LINENO" "Ayudame.h" "ac_cv_header_Ayudame_h" "$ac_includes_default" +if test "x$ac_cv_header_Ayudame_h" = xyes +then : + printf "%s\n" "#define HAVE_AYUDAME_H 1" >>confdefs.h + +fi + +# Check whether --enable-ayudame1 was given. +if test ${enable_ayudame1+y} +then : + enableval=$enable_ayudame1; enable_ayudame1=$enableval +else $as_nop + enable_ayudame1=yes +fi + +# Ayudame 2 header is lowercase +ac_fn_c_check_header_compile "$LINENO" "ayudame.h" "ac_cv_header_ayudame_h" "$ac_includes_default" +if test "x$ac_cv_header_ayudame_h" = xyes +then : + printf "%s\n" "#define HAVE_AYUDAME_H 1" >>confdefs.h + +fi + +# Check whether --enable-ayudame2 was given. +if test ${enable_ayudame2+y} +then : + enableval=$enable_ayudame2; enable_ayudame2=$enableval +else $as_nop + enable_ayudame2=yes +fi + +if test x$enable_ayudame1 = xyes -a x$ac_cv_header_Ayudame_h = xyes; then + +printf "%s\n" "#define STARPU_USE_AYUDAME1 1" >>confdefs.h + + ayu_msg="yes, use version 1" +else + if test x$enable_ayudame2 = xyes -a x$ac_cv_header_ayudame_h = xyes; then + +printf "%s\n" "#define STARPU_USE_AYUDAME2 1" >>confdefs.h + + ayu_msg="yes, use version 2" + else + ayu_msg="no" + fi +fi + + if test "x$enable_ayudame1" = "xyes"; then + STARPU_USE_AYUDAME1_TRUE= + STARPU_USE_AYUDAME1_FALSE='#' +else + STARPU_USE_AYUDAME1_TRUE='#' + STARPU_USE_AYUDAME1_FALSE= +fi + + if test "x$enable_ayudame2" = "xyes"; then + STARPU_USE_AYUDAME2_TRUE= + STARPU_USE_AYUDAME2_FALSE='#' +else + STARPU_USE_AYUDAME2_TRUE='#' + STARPU_USE_AYUDAME2_FALSE= +fi + + +STARPU_FXT_EVENT_DEFINES="`grep -E '#define\s+_STARPU_(MPI_)?FUT_' ${srcdir}/src/common/fxt.h ${srcdir}/mpi/src/starpu_mpi_fxt.h | grep 0x | grep -v 0x1 | cut -d : -f 2`" + + +# Heteroprio works better if it can store information based on the program's name +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the target supports program_invocation_short_name" >&5 +printf %s "checking whether the target supports program_invocation_short_name... " >&6; } +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + #include + #include + int main() { + printf("%s\n", program_invocation_short_name); + return 0; + } + +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + +printf "%s\n" "#define STARPU_HAVE_PROGRAM_INVOCATION_SHORT_NAME 1" >>confdefs.h + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } +else $as_nop + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + +############################################################################### +# # +# Miscellaneous options for StarPU # +# # +############################################################################### + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether data locality should be enforced" >&5 +printf %s "checking whether data locality should be enforced... " >&6; } +# Check whether --enable-data-locality-enforce was given. +if test ${enable_data_locality_enforce+y} +then : + enableval=$enable_data_locality_enforce; enable_data_locality_enforce=$enableval +else $as_nop + enable_data_locality_enforce=no +fi + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_data_locality_enforce" >&5 +printf "%s\n" "$enable_data_locality_enforce" >&6; } +if test x$enable_data_locality_enforce = xyes ; then + +printf "%s\n" "#define STARPU_DATA_LOCALITY_ENFORCE 1" >>confdefs.h + +fi + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking how many buffers can be manipulated per task" >&5 +printf %s "checking how many buffers can be manipulated per task... " >&6; } +# Check whether --enable-maxbuffers was given. +if test ${enable_maxbuffers+y} +then : + enableval=$enable_maxbuffers; nmaxbuffers=$enableval +else $as_nop + nmaxbuffers=8 +fi + +if test x$nmaxbuffers = x -o x$nmaxbuffers = xyes +then + as_fn_error $? "The --enable-maxbuffers option needs to be given a number" "$LINENO" 5 +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $nmaxbuffers" >&5 +printf "%s\n" "$nmaxbuffers" >&6; } + +printf "%s\n" "#define STARPU_NMAXBUFS $nmaxbuffers" >>confdefs.h + + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking how many MPI nodes fxt files can be manipulated when generating traces" >&5 +printf %s "checking how many MPI nodes fxt files can be manipulated when generating traces... " >&6; } +# Check whether --enable-fxt-max-files was given. +if test ${enable_fxt_max_files+y} +then : + enableval=$enable_fxt_max_files; nmaxfxtfiles=$enableval +else $as_nop + nmaxfxtfiles=64 +fi + +if test x$nmaxfxtfiles = x -o x$nmaxfxtfiles = xyes +then + as_fn_error $? "The --enable-maxfxtfiles option needs to be given a number" "$LINENO" 5 +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $nmaxfxtfiles" >&5 +printf "%s\n" "$nmaxfxtfiles" >&6; } + +printf "%s\n" "#define STARPU_FXT_MAX_FILES $nmaxfxtfiles" >>confdefs.h + + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking maximum number of memory nodes to use per MPI rank" >&5 +printf %s "checking maximum number of memory nodes to use per MPI rank... " >&6; } +# Check whether --enable-maxnodes was given. +if test ${enable_maxnodes+y} +then : + enableval=$enable_maxnodes; maxnodes=$enableval +else $as_nop + maxnodes=0 +fi + + +if test x$maxnodes = x0 ; then + if test x$enable_simgrid = xyes ; then + # We need the room for the virtual CUDA/OpenCL devices + nodes=`expr 4 + $nmaxcudadev + $nmaxopencldev + 1 + $nmaxmpidev` + else + # We have one memory node shared by all CPU workers, one node per GPU + # we add nodes to use 2 memory disks + nodes=`expr $nmaxnumanodes + 2` + if test x$enable_cuda = xyes ; then + # we could have used nmaxcudadev + 1, but this would certainly give an + # odd number. + nodes=`expr $nodes + $nmaxcudadev` + fi + if test x$enable_hip = xyes ; then + # we could have used nmaxhipdev + 1, but this would certainly give an + # odd number. + nodes=`expr $nodes + $nmaxhipdev` + fi + if test x$enable_opencl = xyes ; then + # we could have used nmaxopencldev + 1, but this would certainly give an + # odd number. + nodes=`expr $nodes + $nmaxopencldev` + fi + if test x$enable_max_fpga = xyes ; then + # we could have used nmaxmaxfpgadev + 1, but this would certainly give an + # odd number. + nodes=`expr $nodes + $nmaxmaxfpgadev` + fi + + #nmaxmpidev = 0 if mpi master-slave is disabled + nodes=`expr $nodes + $nmaxmpidev` + + #nmaxtcpipdev = 0 if tcpip master-slave is disabled + nodes=`expr $nodes + $nmaxtcpipdev` + fi + + # set maxnodes to the next power of 2 greater than nodes + maxnodes=1 + while test "$maxnodes" -lt "$nodes" + do + maxnodes=`expr $maxnodes \* 2` + done +fi +if test x$maxnodes = x -o x$maxnodes = xyes +then + as_fn_error $? "The --enable-maxnodes option needs to be given a number" "$LINENO" 5 +fi +if test $maxnodes -gt 32 ; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: Note: the wt_mask feature only supports 32 memory nodes" >&5 +printf "%s\n" "$as_me: WARNING: Note: the wt_mask feature only supports 32 memory nodes" >&2;} +fi + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking maximum number of memory nodes" >&5 +printf %s "checking maximum number of memory nodes... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $maxnodes" >&5 +printf "%s\n" "$maxnodes" >&6; } + +printf "%s\n" "#define STARPU_MAXNODES $maxnodes" >>confdefs.h + + + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether allocation cache should be used" >&5 +printf %s "checking whether allocation cache should be used... " >&6; } +# Check whether --enable-allocation-cache was given. +if test ${enable_allocation_cache+y} +then : + enableval=$enable_allocation_cache; enable_allocation_cache=$enableval +else $as_nop + enable_allocation_cache=yes +fi + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_allocation_cache" >&5 +printf "%s\n" "$enable_allocation_cache" >&6; } +if test x$enable_allocation_cache = xyes; then + +printf "%s\n" "#define STARPU_USE_ALLOCATION_CACHE 1" >>confdefs.h + +fi + + +# Check whether --with-perf-model-dir was given. +if test ${with_perf_model_dir+y} +then : + withval=$with_perf_model_dir; + if test x$withval = xno; then + as_fn_error $? "--without-perf-model-dir is not a valid option" "$LINENO" 5 + fi + + perf_model_dir="$withval" + have_explicit_perf_model_dir=yes + +printf "%s\n" "#define STARPU_PERF_MODEL_DIR \"$perf_model_dir\"" >>confdefs.h + + +else $as_nop + + # by default, we put the performance models in + # $HOME/.starpu/sampling/ + have_explicit_perf_model_dir=no + perf_model_dir="\$STARPU_HOME/.starpu/sampling/" + + +fi + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking using explicit performance model location" >&5 +printf %s "checking using explicit performance model location... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $have_explicit_perf_model_dir" >&5 +printf "%s\n" "$have_explicit_perf_model_dir" >&6; } + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking performance models location" >&5 +printf %s "checking performance models location... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $perf_model_dir" >&5 +printf "%s\n" "$perf_model_dir" >&6; } + +# On many multicore CPUs, clock cycles are not synchronized +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for clock_gettime in -lrt" >&5 +printf %s "checking for clock_gettime in -lrt... " >&6; } +if test ${ac_cv_lib_rt_clock_gettime+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-lrt $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +char clock_gettime (); +int +main (void) +{ +return clock_gettime (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_rt_clock_gettime=yes +else $as_nop + ac_cv_lib_rt_clock_gettime=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_rt_clock_gettime" >&5 +printf "%s\n" "$ac_cv_lib_rt_clock_gettime" >&6; } +if test "x$ac_cv_lib_rt_clock_gettime" = xyes +then : + printf "%s\n" "#define HAVE_LIBRT 1" >>confdefs.h + + LIBS="-lrt $LIBS" + +fi + +ac_fn_c_check_func "$LINENO" "clock_gettime" "ac_cv_func_clock_gettime" +if test "x$ac_cv_func_clock_gettime" = xyes +then : + printf "%s\n" "#define HAVE_CLOCK_GETTIME 1" >>confdefs.h + +fi + + +# Compute the maximum number of workers (we round it to 16 for alignment +# purposes). +if test x$enable_simgrid != xyes; then + if test x$enable_cpu != xyes; then + maxcpus=0 + fi + if test x$enable_cuda != xyes; then + nmaxcudadev=0 + fi + + if test x$enable_max_fpga != xyes; then + nmaxmaxfpgadev=0 + fi + if test x$enable_opencl != xyes; then + nmaxopencldev=0 + fi + #By default, if we cannot build mpi master-slave nmaxmpidev is set to zero. + #But with the multiplication with maxcpus, we need to put it to one. + if test x$build_mpi_master_slave != xyes; then + nmaxmpidev=1 + fi + #By default, if we cannot build tcp/ip master-slave nmaxtcpipdev is set to zero. + #But with the multiplication with maxcpus, we need to put it to one. + if test x$build_tcpip_master_slave != xyes; then + nmaxtcpipdev=1 + fi +fi +if test $maxcpus = 0 +then + nmaxworkers=`expr 16 \* \( \( \( $nmaxmpidev \* 64 \) + $nmaxcudadev + $nmaxhipdev + $nmaxopencldev + $nmaxmaxfpgadev + 15 \) / 16 \) ` +elif test $nmaxmpidev = 0 +then + nmaxworkers=`expr 16 \* \( \( $maxcpus + $nmaxcudadev + $nmaxhipdev + $nmaxopencldev + $nmaxmaxfpgadev + 15 \) / 16 \) ` +else + nmaxworkers=`expr 16 \* \( \( \( $nmaxmpidev \* $maxcpus \) + $nmaxcudadev + $nmaxhipdev + $nmaxopencldev + $nmaxmaxfpgadev + 15 \) / 16 \) ` +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking Maximum number of workers" >&5 +printf %s "checking Maximum number of workers... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $nmaxworkers" >&5 +printf "%s\n" "$nmaxworkers" >&6; } + +printf "%s\n" "#define STARPU_NMAXWORKERS $nmaxworkers" >>confdefs.h + +nmaxdevs=0 +if test $nmaxdevs -lt $nmaxcudadev; then + nmaxdevs=$nmaxcudadev +fi +if test $nmaxdevs -lt $nmaxhipdev; then + nmaxdevs=$nmaxhipdev +fi +if test $nmaxdevs -lt $nmaxopencldev; then + nmaxdevs=$nmaxopencldev +fi +if test $nmaxdevs -lt $nmaxmaxfpgadev; then + nmaxdevs=$nmaxmaxfpgadev +fi +if test $nmaxdevs -lt $nmaxmpidev; then + nmaxdevs=$nmaxmpidev +fi +if test $nmaxdevs -lt $nmaxtcpipdev; then + nmaxdevs=$nmaxtcpipdev +fi + +printf "%s\n" "#define STARPU_NMAXDEVS $nmaxdevs" >>confdefs.h + + +# Computes the maximum number of combined worker +nmaxcombinedworkers=$maxcpus +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking Maximum number of workers combinations" >&5 +printf %s "checking Maximum number of workers combinations... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $nmaxcombinedworkers" >&5 +printf "%s\n" "$nmaxcombinedworkers" >&6; } + +printf "%s\n" "#define STARPU_NMAX_COMBINEDWORKERS $nmaxcombinedworkers" >>confdefs.h + + + + +# Computes the maximum number of implementations per arch +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking maximum number of implementations" >&5 +printf %s "checking maximum number of implementations... " >&6; } +# Check whether --enable-maximplementations was given. +if test ${enable_maximplementations+y} +then : + enableval=$enable_maximplementations; maximplementations=$enableval +else $as_nop + maximplementations=4 +fi + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $maximplementations" >&5 +printf "%s\n" "$maximplementations" >&6; } + +printf "%s\n" "#define STARPU_MAXIMPLEMENTATIONS $maximplementations" >>confdefs.h + +if test x$maximplementations = x -o x$maximplementations = xyes +then + as_fn_error $? "The --enable-maximplementations option needs to be given a number" "$LINENO" 5 +fi + +# Enable LevelDB support if requested and the lib is found +# Check whether --enable-leveldb was given. +if test ${enable_leveldb+y} +then : + enableval=$enable_leveldb; enable_leveldb=$enableval +else $as_nop + enable_leveldb=no +fi + +if test x$enable_leveldb = xyes; then +ac_ext=cpp +ac_cpp='$CXXCPP $CPPFLAGS' +ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_cxx_compiler_gnu + + for ac_header in leveldb/db.h +do : + ac_fn_cxx_check_header_compile "$LINENO" "leveldb/db.h" "ac_cv_header_leveldb_db_h" "$ac_includes_default" +if test "x$ac_cv_header_leveldb_db_h" = xyes +then : + printf "%s\n" "#define HAVE_LEVELDB_DB_H 1" >>confdefs.h + +printf "%s\n" "#define STARPU_HAVE_LEVELDB 1" >>confdefs.h + +fi + +done + _LIBS_SAV="$LIBS" + LIBS="" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for main in -lleveldb" >&5 +printf %s "checking for main in -lleveldb... " >&6; } +if test ${ac_cv_lib_leveldb_main+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-lleveldb $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +namespace conftest { + extern "C" int main (); +} +int +main (void) +{ +return conftest::main (); + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_link "$LINENO" +then : + ac_cv_lib_leveldb_main=yes +else $as_nop + ac_cv_lib_leveldb_main=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_leveldb_main" >&5 +printf "%s\n" "$ac_cv_lib_leveldb_main" >&6; } +if test "x$ac_cv_lib_leveldb_main" = xyes +then : + printf "%s\n" "#define HAVE_LIBLEVELDB 1" >>confdefs.h + + LIBS="-lleveldb $LIBS" + +fi + + STARPU_LEVELDB_LDFLAGS="$STARPU_LEVELDB_LDFLAGS $LIBS" + LIBS=$_LIBS_SAV + + + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +fi + if test "x$enable_leveldb" = "xyes" -a "x$ac_cv_lib_leveldb_main" = "xyes"; then + STARPU_HAVE_LEVELDB_TRUE= + STARPU_HAVE_LEVELDB_FALSE='#' +else + STARPU_HAVE_LEVELDB_TRUE='#' + STARPU_HAVE_LEVELDB_FALSE= +fi + + +# Defines the calibration heuristic for the history-based calibration of StarPU +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking calibration heuristic of history-based StarPU calibrator" >&5 +printf %s "checking calibration heuristic of history-based StarPU calibrator... " >&6; } +# Check whether --enable-calibration-heuristic was given. +if test ${enable_calibration_heuristic+y} +then : + enableval=$enable_calibration_heuristic; calibration_heuristic=$enableval +else $as_nop + calibration_heuristic=50 +fi + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $calibration_heuristic" >&5 +printf "%s\n" "$calibration_heuristic" >&6; } + +printf "%s\n" "#define STARPU_HISTORYMAXERROR $calibration_heuristic" >>confdefs.h + + + +############################################################################### +# # +# MP Common settings # +# # +############################################################################### + +if test x$build_mpi_master_slave = xyes -o x$build_tcpip_master_slave = xyes; then + build_master_slave=yes +else + build_master_slave=no +fi + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the master-slave mode should be enabled" >&5 +printf %s "checking whether the master-slave mode should be enabled... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $build_master_slave" >&5 +printf "%s\n" "$build_master_slave" >&6; } + if test "x$build_master_slave" = "xyes"; then + STARPU_USE_MP_TRUE= + STARPU_USE_MP_FALSE='#' +else + STARPU_USE_MP_TRUE='#' + STARPU_USE_MP_FALSE= +fi + + +# Check whether --enable-export-dynamic was given. +if test ${enable_export_dynamic+y} +then : + enableval=$enable_export_dynamic; +fi + + +if test x$build_master_slave = xyes; then + +printf "%s\n" "#define STARPU_USE_MP 1" >>confdefs.h + + + if test x$enable_export_dynamic != xno ; then + STARPU_EXPORT_DYNAMIC="-rdynamic" + fi +fi + + + +############################################################################### +# # +# Flags for C Compiler # +# # +############################################################################### + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler supports -Wall" >&5 +printf %s "checking whether C compiler supports -Wall... " >&6; } + + SAVED_CFLAGS="$CFLAGS" + CFLAGS="-Wall" + + check_mpi="no" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ +const char *hello = "Hello World"; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + + check_mpi="yes" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + + if test "$build_mpi_lib" = "no" -a "$build_nmad_lib" = "no" + then + if test "$check_mpi" = "yes" ; then + GLOBAL_AM_CFLAGS="$GLOBAL_AM_CFLAGS -Wall" + fi + elif test "$check_mpi" = "yes" ; then + SAVED_CC="$CC" + CC="$MPICC" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether MPI C compiler supports -Wall" >&5 +printf %s "checking whether MPI C compiler supports -Wall... " >&6; } + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ +const char *hello = "Hello World"; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + + GLOBAL_AM_CFLAGS="$GLOBAL_AM_CFLAGS -Wall" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + CC="$SAVED_CC" + fi + CFLAGS="$SAVED_CFLAGS" + + + + ac_ext=cpp +ac_cpp='$CXXCPP $CPPFLAGS' +ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_cxx_compiler_gnu + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether CXX compiler supports -Wall" >&5 +printf %s "checking whether CXX compiler supports -Wall... " >&6; } + + SAVED_CXXFLAGS="$CXXFLAGS" + CXXFLAGS="-Wall" + + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ +const char *hello = "Hello World"; + + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_link "$LINENO" +then : + + GLOBAL_AM_CXXFLAGS="$GLOBAL_AM_CXXFLAGS -Wall" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + CXXFLAGS="$SAVED_CXXFLAGS" + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + + ac_ext=f +ac_compile='$F77 -c $FFLAGS conftest.$ac_ext >&5' +ac_link='$F77 -o conftest$ac_exeext $FFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_f77_compiler_gnu + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether Fortran 77 compiler supports -Wall" >&5 +printf %s "checking whether Fortran 77 compiler supports -Wall... " >&6; } + + SAVED_FFLAGS="$FFLAGS" + FFLAGS="-Wall" + + cat > conftest.$ac_ext <<_ACEOF + program main + + + end +_ACEOF +if ac_fn_f77_try_link "$LINENO" +then : + + GLOBAL_AM_FFLAGS="$GLOBAL_AM_FFLAGS -Wall" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + FFLAGS="$SAVED_FFLAGS" + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + + ac_ext=${ac_fc_srcext-f} +ac_compile='$FC -c $FCFLAGS $ac_fcflags_srcext conftest.$ac_ext >&5' +ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_fc_compiler_gnu + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether Fortran compiler supports -Wall" >&5 +printf %s "checking whether Fortran compiler supports -Wall... " >&6; } + + SAVED_FCFLAGS="$FCFLAGS" + FCFLAGS="-Wall" + + check_mpi="no" + cat > conftest.$ac_ext <<_ACEOF + program main + + + end +_ACEOF +if ac_fn_fc_try_link "$LINENO" +then : + + check_mpi="yes" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + if test "$check_mpi" = "yes" ; then + SAVED_FC="$FC" + FC="$MPIFORT" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether MPI Fortran compiler supports -Wall" >&5 +printf %s "checking whether MPI Fortran compiler supports -Wall... " >&6; } + cat > conftest.$ac_ext <<_ACEOF + program main + + + end +_ACEOF +if ac_fn_fc_try_link "$LINENO" +then : + + GLOBAL_AM_FCFLAGS="$GLOBAL_AM_FCFLAGS -Wall" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + FC="$SAVED_FC" + fi + FCFLAGS="$SAVED_FCFLAGS" + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler supports -Werror=implicit" >&5 +printf %s "checking whether C compiler supports -Werror=implicit... " >&6; } + + SAVED_CFLAGS="$CFLAGS" + CFLAGS="-Werror=implicit" + + check_mpi="no" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ +const char *hello = "Hello World"; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + + check_mpi="yes" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + + if test "$build_mpi_lib" = "no" -a "$build_nmad_lib" = "no" + then + if test "$check_mpi" = "yes" ; then + GLOBAL_AM_CFLAGS="$GLOBAL_AM_CFLAGS -Werror=implicit" + fi + elif test "$check_mpi" = "yes" ; then + SAVED_CC="$CC" + CC="$MPICC" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether MPI C compiler supports -Werror=implicit" >&5 +printf %s "checking whether MPI C compiler supports -Werror=implicit... " >&6; } + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ +const char *hello = "Hello World"; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + + GLOBAL_AM_CFLAGS="$GLOBAL_AM_CFLAGS -Werror=implicit" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + CC="$SAVED_CC" + fi + CFLAGS="$SAVED_CFLAGS" + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler supports -Werror=implicit-function-declaration" >&5 +printf %s "checking whether C compiler supports -Werror=implicit-function-declaration... " >&6; } + + SAVED_CFLAGS="$CFLAGS" + CFLAGS="-Werror=implicit-function-declaration" + + check_mpi="no" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ +const char *hello = "Hello World"; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + + check_mpi="yes" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + + if test "$build_mpi_lib" = "no" -a "$build_nmad_lib" = "no" + then + if test "$check_mpi" = "yes" ; then + GLOBAL_AM_CFLAGS="$GLOBAL_AM_CFLAGS -Werror=implicit-function-declaration" + fi + elif test "$check_mpi" = "yes" ; then + SAVED_CC="$CC" + CC="$MPICC" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether MPI C compiler supports -Werror=implicit-function-declaration" >&5 +printf %s "checking whether MPI C compiler supports -Werror=implicit-function-declaration... " >&6; } + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ +const char *hello = "Hello World"; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + + GLOBAL_AM_CFLAGS="$GLOBAL_AM_CFLAGS -Werror=implicit-function-declaration" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + CC="$SAVED_CC" + fi + CFLAGS="$SAVED_CFLAGS" + +if test x$enable_perf_debug = xyes; then + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler supports -no-pie" >&5 +printf %s "checking whether C compiler supports -no-pie... " >&6; } + + SAVED_CFLAGS="$CFLAGS" + CFLAGS="-no-pie" + + check_mpi="no" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ +const char *hello = "Hello World"; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + + check_mpi="yes" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + + if test "$build_mpi_lib" = "no" -a "$build_nmad_lib" = "no" + then + if test "$check_mpi" = "yes" ; then + GLOBAL_AM_CFLAGS="$GLOBAL_AM_CFLAGS -no-pie" + fi + elif test "$check_mpi" = "yes" ; then + SAVED_CC="$CC" + CC="$MPICC" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether MPI C compiler supports -no-pie" >&5 +printf %s "checking whether MPI C compiler supports -no-pie... " >&6; } + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ +const char *hello = "Hello World"; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + + GLOBAL_AM_CFLAGS="$GLOBAL_AM_CFLAGS -no-pie" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + CC="$SAVED_CC" + fi + CFLAGS="$SAVED_CFLAGS" + + + + ac_ext=cpp +ac_cpp='$CXXCPP $CPPFLAGS' +ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_cxx_compiler_gnu + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether CXX compiler supports -no-pie" >&5 +printf %s "checking whether CXX compiler supports -no-pie... " >&6; } + + SAVED_CXXFLAGS="$CXXFLAGS" + CXXFLAGS="-no-pie" + + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ +const char *hello = "Hello World"; + + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_link "$LINENO" +then : + + GLOBAL_AM_CXXFLAGS="$GLOBAL_AM_CXXFLAGS -no-pie" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + CXXFLAGS="$SAVED_CXXFLAGS" + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + + ac_ext=f +ac_compile='$F77 -c $FFLAGS conftest.$ac_ext >&5' +ac_link='$F77 -o conftest$ac_exeext $FFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_f77_compiler_gnu + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether Fortran 77 compiler supports -no-pie" >&5 +printf %s "checking whether Fortran 77 compiler supports -no-pie... " >&6; } + + SAVED_FFLAGS="$FFLAGS" + FFLAGS="-no-pie" + + cat > conftest.$ac_ext <<_ACEOF + program main + + + end +_ACEOF +if ac_fn_f77_try_link "$LINENO" +then : + + GLOBAL_AM_FFLAGS="$GLOBAL_AM_FFLAGS -no-pie" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + FFLAGS="$SAVED_FFLAGS" + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + + ac_ext=${ac_fc_srcext-f} +ac_compile='$FC -c $FCFLAGS $ac_fcflags_srcext conftest.$ac_ext >&5' +ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_fc_compiler_gnu + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether Fortran compiler supports -no-pie" >&5 +printf %s "checking whether Fortran compiler supports -no-pie... " >&6; } + + SAVED_FCFLAGS="$FCFLAGS" + FCFLAGS="-no-pie" + + check_mpi="no" + cat > conftest.$ac_ext <<_ACEOF + program main + + + end +_ACEOF +if ac_fn_fc_try_link "$LINENO" +then : + + check_mpi="yes" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + if test "$check_mpi" = "yes" ; then + SAVED_FC="$FC" + FC="$MPIFORT" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether MPI Fortran compiler supports -no-pie" >&5 +printf %s "checking whether MPI Fortran compiler supports -no-pie... " >&6; } + cat > conftest.$ac_ext <<_ACEOF + program main + + + end +_ACEOF +if ac_fn_fc_try_link "$LINENO" +then : + + GLOBAL_AM_FCFLAGS="$GLOBAL_AM_FCFLAGS -no-pie" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + FC="$SAVED_FC" + fi + FCFLAGS="$SAVED_FCFLAGS" + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler supports -no-PIE" >&5 +printf %s "checking whether C compiler supports -no-PIE... " >&6; } + + SAVED_CFLAGS="$CFLAGS" + CFLAGS="-no-PIE" + + check_mpi="no" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ +const char *hello = "Hello World"; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + + check_mpi="yes" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + + if test "$build_mpi_lib" = "no" -a "$build_nmad_lib" = "no" + then + if test "$check_mpi" = "yes" ; then + GLOBAL_AM_CFLAGS="$GLOBAL_AM_CFLAGS -no-PIE" + fi + elif test "$check_mpi" = "yes" ; then + SAVED_CC="$CC" + CC="$MPICC" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether MPI C compiler supports -no-PIE" >&5 +printf %s "checking whether MPI C compiler supports -no-PIE... " >&6; } + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ +const char *hello = "Hello World"; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + + GLOBAL_AM_CFLAGS="$GLOBAL_AM_CFLAGS -no-PIE" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + CC="$SAVED_CC" + fi + CFLAGS="$SAVED_CFLAGS" + + + + ac_ext=cpp +ac_cpp='$CXXCPP $CPPFLAGS' +ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_cxx_compiler_gnu + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether CXX compiler supports -no-PIE" >&5 +printf %s "checking whether CXX compiler supports -no-PIE... " >&6; } + + SAVED_CXXFLAGS="$CXXFLAGS" + CXXFLAGS="-no-PIE" + + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ +const char *hello = "Hello World"; + + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_link "$LINENO" +then : + + GLOBAL_AM_CXXFLAGS="$GLOBAL_AM_CXXFLAGS -no-PIE" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + CXXFLAGS="$SAVED_CXXFLAGS" + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + + ac_ext=f +ac_compile='$F77 -c $FFLAGS conftest.$ac_ext >&5' +ac_link='$F77 -o conftest$ac_exeext $FFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_f77_compiler_gnu + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether Fortran 77 compiler supports -no-PIE" >&5 +printf %s "checking whether Fortran 77 compiler supports -no-PIE... " >&6; } + + SAVED_FFLAGS="$FFLAGS" + FFLAGS="-no-PIE" + + cat > conftest.$ac_ext <<_ACEOF + program main + + + end +_ACEOF +if ac_fn_f77_try_link "$LINENO" +then : + + GLOBAL_AM_FFLAGS="$GLOBAL_AM_FFLAGS -no-PIE" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + FFLAGS="$SAVED_FFLAGS" + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + + ac_ext=${ac_fc_srcext-f} +ac_compile='$FC -c $FCFLAGS $ac_fcflags_srcext conftest.$ac_ext >&5' +ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_fc_compiler_gnu + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether Fortran compiler supports -no-PIE" >&5 +printf %s "checking whether Fortran compiler supports -no-PIE... " >&6; } + + SAVED_FCFLAGS="$FCFLAGS" + FCFLAGS="-no-PIE" + + check_mpi="no" + cat > conftest.$ac_ext <<_ACEOF + program main + + + end +_ACEOF +if ac_fn_fc_try_link "$LINENO" +then : + + check_mpi="yes" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + if test "$check_mpi" = "yes" ; then + SAVED_FC="$FC" + FC="$MPIFORT" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether MPI Fortran compiler supports -no-PIE" >&5 +printf %s "checking whether MPI Fortran compiler supports -no-PIE... " >&6; } + cat > conftest.$ac_ext <<_ACEOF + program main + + + end +_ACEOF +if ac_fn_fc_try_link "$LINENO" +then : + + GLOBAL_AM_FCFLAGS="$GLOBAL_AM_FCFLAGS -no-PIE" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + FC="$SAVED_FC" + fi + FCFLAGS="$SAVED_FCFLAGS" + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler supports -fno-pie" >&5 +printf %s "checking whether C compiler supports -fno-pie... " >&6; } + + SAVED_CFLAGS="$CFLAGS" + CFLAGS="-fno-pie" + + check_mpi="no" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ +const char *hello = "Hello World"; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + + check_mpi="yes" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + + if test "$build_mpi_lib" = "no" -a "$build_nmad_lib" = "no" + then + if test "$check_mpi" = "yes" ; then + GLOBAL_AM_CFLAGS="$GLOBAL_AM_CFLAGS -fno-pie" + fi + elif test "$check_mpi" = "yes" ; then + SAVED_CC="$CC" + CC="$MPICC" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether MPI C compiler supports -fno-pie" >&5 +printf %s "checking whether MPI C compiler supports -fno-pie... " >&6; } + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ +const char *hello = "Hello World"; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + + GLOBAL_AM_CFLAGS="$GLOBAL_AM_CFLAGS -fno-pie" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + CC="$SAVED_CC" + fi + CFLAGS="$SAVED_CFLAGS" + + + + ac_ext=cpp +ac_cpp='$CXXCPP $CPPFLAGS' +ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_cxx_compiler_gnu + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether CXX compiler supports -fno-pie" >&5 +printf %s "checking whether CXX compiler supports -fno-pie... " >&6; } + + SAVED_CXXFLAGS="$CXXFLAGS" + CXXFLAGS="-fno-pie" + + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ +const char *hello = "Hello World"; + + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_link "$LINENO" +then : + + GLOBAL_AM_CXXFLAGS="$GLOBAL_AM_CXXFLAGS -fno-pie" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + CXXFLAGS="$SAVED_CXXFLAGS" + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + + ac_ext=f +ac_compile='$F77 -c $FFLAGS conftest.$ac_ext >&5' +ac_link='$F77 -o conftest$ac_exeext $FFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_f77_compiler_gnu + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether Fortran 77 compiler supports -fno-pie" >&5 +printf %s "checking whether Fortran 77 compiler supports -fno-pie... " >&6; } + + SAVED_FFLAGS="$FFLAGS" + FFLAGS="-fno-pie" + + cat > conftest.$ac_ext <<_ACEOF + program main + + + end +_ACEOF +if ac_fn_f77_try_link "$LINENO" +then : + + GLOBAL_AM_FFLAGS="$GLOBAL_AM_FFLAGS -fno-pie" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + FFLAGS="$SAVED_FFLAGS" + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + + ac_ext=${ac_fc_srcext-f} +ac_compile='$FC -c $FCFLAGS $ac_fcflags_srcext conftest.$ac_ext >&5' +ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_fc_compiler_gnu + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether Fortran compiler supports -fno-pie" >&5 +printf %s "checking whether Fortran compiler supports -fno-pie... " >&6; } + + SAVED_FCFLAGS="$FCFLAGS" + FCFLAGS="-fno-pie" + + check_mpi="no" + cat > conftest.$ac_ext <<_ACEOF + program main + + + end +_ACEOF +if ac_fn_fc_try_link "$LINENO" +then : + + check_mpi="yes" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + if test "$check_mpi" = "yes" ; then + SAVED_FC="$FC" + FC="$MPIFORT" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether MPI Fortran compiler supports -fno-pie" >&5 +printf %s "checking whether MPI Fortran compiler supports -fno-pie... " >&6; } + cat > conftest.$ac_ext <<_ACEOF + program main + + + end +_ACEOF +if ac_fn_fc_try_link "$LINENO" +then : + + GLOBAL_AM_FCFLAGS="$GLOBAL_AM_FCFLAGS -fno-pie" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + FC="$SAVED_FC" + fi + FCFLAGS="$SAVED_FCFLAGS" + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + +fi + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler supports -Wextra" >&5 +printf %s "checking whether C compiler supports -Wextra... " >&6; } + + SAVED_CFLAGS="$CFLAGS" + CFLAGS="-Wextra" + + check_mpi="no" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ +const char *hello = "Hello World"; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + + check_mpi="yes" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + + if test "$build_mpi_lib" = "no" -a "$build_nmad_lib" = "no" + then + if test "$check_mpi" = "yes" ; then + GLOBAL_AM_CFLAGS="$GLOBAL_AM_CFLAGS -Wextra" + fi + elif test "$check_mpi" = "yes" ; then + SAVED_CC="$CC" + CC="$MPICC" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether MPI C compiler supports -Wextra" >&5 +printf %s "checking whether MPI C compiler supports -Wextra... " >&6; } + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ +const char *hello = "Hello World"; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + + GLOBAL_AM_CFLAGS="$GLOBAL_AM_CFLAGS -Wextra" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + CC="$SAVED_CC" + fi + CFLAGS="$SAVED_CFLAGS" + + + + ac_ext=cpp +ac_cpp='$CXXCPP $CPPFLAGS' +ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_cxx_compiler_gnu + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether CXX compiler supports -Wextra" >&5 +printf %s "checking whether CXX compiler supports -Wextra... " >&6; } + + SAVED_CXXFLAGS="$CXXFLAGS" + CXXFLAGS="-Wextra" + + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ +const char *hello = "Hello World"; + + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_link "$LINENO" +then : + + GLOBAL_AM_CXXFLAGS="$GLOBAL_AM_CXXFLAGS -Wextra" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + CXXFLAGS="$SAVED_CXXFLAGS" + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + + ac_ext=f +ac_compile='$F77 -c $FFLAGS conftest.$ac_ext >&5' +ac_link='$F77 -o conftest$ac_exeext $FFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_f77_compiler_gnu + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether Fortran 77 compiler supports -Wextra" >&5 +printf %s "checking whether Fortran 77 compiler supports -Wextra... " >&6; } + + SAVED_FFLAGS="$FFLAGS" + FFLAGS="-Wextra" + + cat > conftest.$ac_ext <<_ACEOF + program main + + + end +_ACEOF +if ac_fn_f77_try_link "$LINENO" +then : + + GLOBAL_AM_FFLAGS="$GLOBAL_AM_FFLAGS -Wextra" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + FFLAGS="$SAVED_FFLAGS" + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + + ac_ext=${ac_fc_srcext-f} +ac_compile='$FC -c $FCFLAGS $ac_fcflags_srcext conftest.$ac_ext >&5' +ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_fc_compiler_gnu + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether Fortran compiler supports -Wextra" >&5 +printf %s "checking whether Fortran compiler supports -Wextra... " >&6; } + + SAVED_FCFLAGS="$FCFLAGS" + FCFLAGS="-Wextra" + + check_mpi="no" + cat > conftest.$ac_ext <<_ACEOF + program main + + + end +_ACEOF +if ac_fn_fc_try_link "$LINENO" +then : + + check_mpi="yes" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + if test "$check_mpi" = "yes" ; then + SAVED_FC="$FC" + FC="$MPIFORT" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether MPI Fortran compiler supports -Wextra" >&5 +printf %s "checking whether MPI Fortran compiler supports -Wextra... " >&6; } + cat > conftest.$ac_ext <<_ACEOF + program main + + + end +_ACEOF +if ac_fn_fc_try_link "$LINENO" +then : + + GLOBAL_AM_FCFLAGS="$GLOBAL_AM_FCFLAGS -Wextra" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + FC="$SAVED_FC" + fi + FCFLAGS="$SAVED_FCFLAGS" + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler supports -Wunused" >&5 +printf %s "checking whether C compiler supports -Wunused... " >&6; } + + SAVED_CFLAGS="$CFLAGS" + CFLAGS="-Wunused" + + check_mpi="no" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ +const char *hello = "Hello World"; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + + check_mpi="yes" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + + if test "$build_mpi_lib" = "no" -a "$build_nmad_lib" = "no" + then + if test "$check_mpi" = "yes" ; then + GLOBAL_AM_CFLAGS="$GLOBAL_AM_CFLAGS -Wunused" + fi + elif test "$check_mpi" = "yes" ; then + SAVED_CC="$CC" + CC="$MPICC" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether MPI C compiler supports -Wunused" >&5 +printf %s "checking whether MPI C compiler supports -Wunused... " >&6; } + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ +const char *hello = "Hello World"; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + + GLOBAL_AM_CFLAGS="$GLOBAL_AM_CFLAGS -Wunused" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + CC="$SAVED_CC" + fi + CFLAGS="$SAVED_CFLAGS" + + + + ac_ext=cpp +ac_cpp='$CXXCPP $CPPFLAGS' +ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_cxx_compiler_gnu + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether CXX compiler supports -Wunused" >&5 +printf %s "checking whether CXX compiler supports -Wunused... " >&6; } + + SAVED_CXXFLAGS="$CXXFLAGS" + CXXFLAGS="-Wunused" + + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ +const char *hello = "Hello World"; + + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_link "$LINENO" +then : + + GLOBAL_AM_CXXFLAGS="$GLOBAL_AM_CXXFLAGS -Wunused" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + CXXFLAGS="$SAVED_CXXFLAGS" + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + + ac_ext=f +ac_compile='$F77 -c $FFLAGS conftest.$ac_ext >&5' +ac_link='$F77 -o conftest$ac_exeext $FFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_f77_compiler_gnu + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether Fortran 77 compiler supports -Wunused" >&5 +printf %s "checking whether Fortran 77 compiler supports -Wunused... " >&6; } + + SAVED_FFLAGS="$FFLAGS" + FFLAGS="-Wunused" + + cat > conftest.$ac_ext <<_ACEOF + program main + + + end +_ACEOF +if ac_fn_f77_try_link "$LINENO" +then : + + GLOBAL_AM_FFLAGS="$GLOBAL_AM_FFLAGS -Wunused" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + FFLAGS="$SAVED_FFLAGS" + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + + ac_ext=${ac_fc_srcext-f} +ac_compile='$FC -c $FCFLAGS $ac_fcflags_srcext conftest.$ac_ext >&5' +ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_fc_compiler_gnu + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether Fortran compiler supports -Wunused" >&5 +printf %s "checking whether Fortran compiler supports -Wunused... " >&6; } + + SAVED_FCFLAGS="$FCFLAGS" + FCFLAGS="-Wunused" + + check_mpi="no" + cat > conftest.$ac_ext <<_ACEOF + program main + + + end +_ACEOF +if ac_fn_fc_try_link "$LINENO" +then : + + check_mpi="yes" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + if test "$check_mpi" = "yes" ; then + SAVED_FC="$FC" + FC="$MPIFORT" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether MPI Fortran compiler supports -Wunused" >&5 +printf %s "checking whether MPI Fortran compiler supports -Wunused... " >&6; } + cat > conftest.$ac_ext <<_ACEOF + program main + + + end +_ACEOF +if ac_fn_fc_try_link "$LINENO" +then : + + GLOBAL_AM_FCFLAGS="$GLOBAL_AM_FCFLAGS -Wunused" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + FC="$SAVED_FC" + fi + FCFLAGS="$SAVED_FCFLAGS" + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler supports -Wundef" >&5 +printf %s "checking whether C compiler supports -Wundef... " >&6; } + + SAVED_CFLAGS="$CFLAGS" + CFLAGS="-Wundef" + + check_mpi="no" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ +const char *hello = "Hello World"; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + + check_mpi="yes" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + + if test "$build_mpi_lib" = "no" -a "$build_nmad_lib" = "no" + then + if test "$check_mpi" = "yes" ; then + GLOBAL_AM_CFLAGS="$GLOBAL_AM_CFLAGS -Wundef" + fi + elif test "$check_mpi" = "yes" ; then + SAVED_CC="$CC" + CC="$MPICC" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether MPI C compiler supports -Wundef" >&5 +printf %s "checking whether MPI C compiler supports -Wundef... " >&6; } + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ +const char *hello = "Hello World"; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + + GLOBAL_AM_CFLAGS="$GLOBAL_AM_CFLAGS -Wundef" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + CC="$SAVED_CC" + fi + CFLAGS="$SAVED_CFLAGS" + + + + ac_ext=cpp +ac_cpp='$CXXCPP $CPPFLAGS' +ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_cxx_compiler_gnu + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether CXX compiler supports -Wundef" >&5 +printf %s "checking whether CXX compiler supports -Wundef... " >&6; } + + SAVED_CXXFLAGS="$CXXFLAGS" + CXXFLAGS="-Wundef" + + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ +const char *hello = "Hello World"; + + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_link "$LINENO" +then : + + GLOBAL_AM_CXXFLAGS="$GLOBAL_AM_CXXFLAGS -Wundef" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + CXXFLAGS="$SAVED_CXXFLAGS" + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler supports -Wshadow" >&5 +printf %s "checking whether C compiler supports -Wshadow... " >&6; } + + SAVED_CFLAGS="$CFLAGS" + CFLAGS="-Wshadow" + + check_mpi="no" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ +const char *hello = "Hello World"; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + + check_mpi="yes" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + + if test "$build_mpi_lib" = "no" -a "$build_nmad_lib" = "no" + then + if test "$check_mpi" = "yes" ; then + GLOBAL_AM_CFLAGS="$GLOBAL_AM_CFLAGS -Wshadow" + fi + elif test "$check_mpi" = "yes" ; then + SAVED_CC="$CC" + CC="$MPICC" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether MPI C compiler supports -Wshadow" >&5 +printf %s "checking whether MPI C compiler supports -Wshadow... " >&6; } + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ +const char *hello = "Hello World"; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + + GLOBAL_AM_CFLAGS="$GLOBAL_AM_CFLAGS -Wshadow" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + CC="$SAVED_CC" + fi + CFLAGS="$SAVED_CFLAGS" + + + + ac_ext=cpp +ac_cpp='$CXXCPP $CPPFLAGS' +ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_cxx_compiler_gnu + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether CXX compiler supports -Wshadow" >&5 +printf %s "checking whether CXX compiler supports -Wshadow... " >&6; } + + SAVED_CXXFLAGS="$CXXFLAGS" + CXXFLAGS="-Wshadow" + + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ +const char *hello = "Hello World"; + + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_link "$LINENO" +then : + + GLOBAL_AM_CXXFLAGS="$GLOBAL_AM_CXXFLAGS -Wshadow" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + CXXFLAGS="$SAVED_CXXFLAGS" + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + + ac_ext=f +ac_compile='$F77 -c $FFLAGS conftest.$ac_ext >&5' +ac_link='$F77 -o conftest$ac_exeext $FFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_f77_compiler_gnu + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether Fortran 77 compiler supports -Wshadow" >&5 +printf %s "checking whether Fortran 77 compiler supports -Wshadow... " >&6; } + + SAVED_FFLAGS="$FFLAGS" + FFLAGS="-Wshadow" + + cat > conftest.$ac_ext <<_ACEOF + program main + + + end +_ACEOF +if ac_fn_f77_try_link "$LINENO" +then : + + GLOBAL_AM_FFLAGS="$GLOBAL_AM_FFLAGS -Wshadow" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + FFLAGS="$SAVED_FFLAGS" + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + + ac_ext=${ac_fc_srcext-f} +ac_compile='$FC -c $FCFLAGS $ac_fcflags_srcext conftest.$ac_ext >&5' +ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_fc_compiler_gnu + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether Fortran compiler supports -Wshadow" >&5 +printf %s "checking whether Fortran compiler supports -Wshadow... " >&6; } + + SAVED_FCFLAGS="$FCFLAGS" + FCFLAGS="-Wshadow" + + check_mpi="no" + cat > conftest.$ac_ext <<_ACEOF + program main + + + end +_ACEOF +if ac_fn_fc_try_link "$LINENO" +then : + + check_mpi="yes" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + if test "$check_mpi" = "yes" ; then + SAVED_FC="$FC" + FC="$MPIFORT" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether MPI Fortran compiler supports -Wshadow" >&5 +printf %s "checking whether MPI Fortran compiler supports -Wshadow... " >&6; } + cat > conftest.$ac_ext <<_ACEOF + program main + + + end +_ACEOF +if ac_fn_fc_try_link "$LINENO" +then : + + GLOBAL_AM_FCFLAGS="$GLOBAL_AM_FCFLAGS -Wshadow" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + FC="$SAVED_FC" + fi + FCFLAGS="$SAVED_FCFLAGS" + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler supports -Wpointer-arith" >&5 +printf %s "checking whether C compiler supports -Wpointer-arith... " >&6; } + + SAVED_CFLAGS="$CFLAGS" + CFLAGS="-Wpointer-arith" + + check_mpi="no" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ +const char *hello = "Hello World"; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + + check_mpi="yes" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + + if test "$build_mpi_lib" = "no" -a "$build_nmad_lib" = "no" + then + if test "$check_mpi" = "yes" ; then + GLOBAL_AM_CFLAGS="$GLOBAL_AM_CFLAGS -Wpointer-arith" + fi + elif test "$check_mpi" = "yes" ; then + SAVED_CC="$CC" + CC="$MPICC" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether MPI C compiler supports -Wpointer-arith" >&5 +printf %s "checking whether MPI C compiler supports -Wpointer-arith... " >&6; } + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ +const char *hello = "Hello World"; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + + GLOBAL_AM_CFLAGS="$GLOBAL_AM_CFLAGS -Wpointer-arith" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + CC="$SAVED_CC" + fi + CFLAGS="$SAVED_CFLAGS" + + + + ac_ext=cpp +ac_cpp='$CXXCPP $CPPFLAGS' +ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_cxx_compiler_gnu + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether CXX compiler supports -Wpointer-arith" >&5 +printf %s "checking whether CXX compiler supports -Wpointer-arith... " >&6; } + + SAVED_CXXFLAGS="$CXXFLAGS" + CXXFLAGS="-Wpointer-arith" + + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ +const char *hello = "Hello World"; + + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_link "$LINENO" +then : + + GLOBAL_AM_CXXFLAGS="$GLOBAL_AM_CXXFLAGS -Wpointer-arith" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + CXXFLAGS="$SAVED_CXXFLAGS" + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + +if test "x$STARPU_DEVEL" != x; then + +printf "%s\n" "#define STARPU_DEVEL 1" >>confdefs.h + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler supports -Werror=pointer-arith" >&5 +printf %s "checking whether C compiler supports -Werror=pointer-arith... " >&6; } + + SAVED_CFLAGS="$CFLAGS" + CFLAGS="-Werror=pointer-arith" + + check_mpi="no" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ +const char *hello = "Hello World"; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + + check_mpi="yes" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + + if test "$build_mpi_lib" = "no" -a "$build_nmad_lib" = "no" + then + if test "$check_mpi" = "yes" ; then + GLOBAL_AM_CFLAGS="$GLOBAL_AM_CFLAGS -Werror=pointer-arith" + fi + elif test "$check_mpi" = "yes" ; then + SAVED_CC="$CC" + CC="$MPICC" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether MPI C compiler supports -Werror=pointer-arith" >&5 +printf %s "checking whether MPI C compiler supports -Werror=pointer-arith... " >&6; } + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ +const char *hello = "Hello World"; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + + GLOBAL_AM_CFLAGS="$GLOBAL_AM_CFLAGS -Werror=pointer-arith" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + CC="$SAVED_CC" + fi + CFLAGS="$SAVED_CFLAGS" + + + + ac_ext=cpp +ac_cpp='$CXXCPP $CPPFLAGS' +ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_cxx_compiler_gnu + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether CXX compiler supports -Werror=pointer-arith" >&5 +printf %s "checking whether CXX compiler supports -Werror=pointer-arith... " >&6; } + + SAVED_CXXFLAGS="$CXXFLAGS" + CXXFLAGS="-Werror=pointer-arith" + + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ +const char *hello = "Hello World"; + + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_link "$LINENO" +then : + + GLOBAL_AM_CXXFLAGS="$GLOBAL_AM_CXXFLAGS -Werror=pointer-arith" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + CXXFLAGS="$SAVED_CXXFLAGS" + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler supports -fno-common" >&5 +printf %s "checking whether C compiler supports -fno-common... " >&6; } + + SAVED_CFLAGS="$CFLAGS" + CFLAGS="-fno-common" + + check_mpi="no" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ +const char *hello = "Hello World"; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + + check_mpi="yes" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + + if test "$build_mpi_lib" = "no" -a "$build_nmad_lib" = "no" + then + if test "$check_mpi" = "yes" ; then + GLOBAL_AM_CFLAGS="$GLOBAL_AM_CFLAGS -fno-common" + fi + elif test "$check_mpi" = "yes" ; then + SAVED_CC="$CC" + CC="$MPICC" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether MPI C compiler supports -fno-common" >&5 +printf %s "checking whether MPI C compiler supports -fno-common... " >&6; } + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ +const char *hello = "Hello World"; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + + GLOBAL_AM_CFLAGS="$GLOBAL_AM_CFLAGS -fno-common" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + CC="$SAVED_CC" + fi + CFLAGS="$SAVED_CFLAGS" + + + + ac_ext=cpp +ac_cpp='$CXXCPP $CPPFLAGS' +ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_cxx_compiler_gnu + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether CXX compiler supports -fno-common" >&5 +printf %s "checking whether CXX compiler supports -fno-common... " >&6; } + + SAVED_CXXFLAGS="$CXXFLAGS" + CXXFLAGS="-fno-common" + + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ +const char *hello = "Hello World"; + + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_link "$LINENO" +then : + + GLOBAL_AM_CXXFLAGS="$GLOBAL_AM_CXXFLAGS -fno-common" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + CXXFLAGS="$SAVED_CXXFLAGS" + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + + ac_ext=f +ac_compile='$F77 -c $FFLAGS conftest.$ac_ext >&5' +ac_link='$F77 -o conftest$ac_exeext $FFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_f77_compiler_gnu + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether Fortran 77 compiler supports -fno-common" >&5 +printf %s "checking whether Fortran 77 compiler supports -fno-common... " >&6; } + + SAVED_FFLAGS="$FFLAGS" + FFLAGS="-fno-common" + + cat > conftest.$ac_ext <<_ACEOF + program main + + + end +_ACEOF +if ac_fn_f77_try_link "$LINENO" +then : + + GLOBAL_AM_FFLAGS="$GLOBAL_AM_FFLAGS -fno-common" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + FFLAGS="$SAVED_FFLAGS" + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + + ac_ext=${ac_fc_srcext-f} +ac_compile='$FC -c $FCFLAGS $ac_fcflags_srcext conftest.$ac_ext >&5' +ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_fc_compiler_gnu + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether Fortran compiler supports -fno-common" >&5 +printf %s "checking whether Fortran compiler supports -fno-common... " >&6; } + + SAVED_FCFLAGS="$FCFLAGS" + FCFLAGS="-fno-common" + + check_mpi="no" + cat > conftest.$ac_ext <<_ACEOF + program main + + + end +_ACEOF +if ac_fn_fc_try_link "$LINENO" +then : + + check_mpi="yes" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + if test "$check_mpi" = "yes" ; then + SAVED_FC="$FC" + FC="$MPIFORT" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether MPI Fortran compiler supports -fno-common" >&5 +printf %s "checking whether MPI Fortran compiler supports -fno-common... " >&6; } + cat > conftest.$ac_ext <<_ACEOF + program main + + + end +_ACEOF +if ac_fn_fc_try_link "$LINENO" +then : + + GLOBAL_AM_FCFLAGS="$GLOBAL_AM_FCFLAGS -fno-common" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + FC="$SAVED_FC" + fi + FCFLAGS="$SAVED_FCFLAGS" + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + +fi + if test "x$STARPU_DEVEL" != x; then + STARPU_DEVEL_TRUE= + STARPU_DEVEL_FALSE='#' +else + STARPU_DEVEL_TRUE='#' + STARPU_DEVEL_FALSE= +fi + + + + + + + +# Same value as Automake's, for use in other places. +pkglibdir="\${libdir}/$PACKAGE" + + + +# Check whether --with-check-flags was given. +if test ${with_check_flags+y} +then : + withval=$with_check_flags; check_flags=$withval +else $as_nop + check_flags="" +fi + +if test "x$check_flags" != "x" ; then + for xflag in $check_flags + do + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler supports $xflag" >&5 +printf %s "checking whether C compiler supports $xflag... " >&6; } + + SAVED_CFLAGS="$CFLAGS" + CFLAGS="$xflag" + + check_mpi="no" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ +const char *hello = "Hello World"; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + + check_mpi="yes" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + + if test "$build_mpi_lib" = "no" -a "$build_nmad_lib" = "no" + then + if test "$check_mpi" = "yes" ; then + GLOBAL_AM_CFLAGS="$GLOBAL_AM_CFLAGS $xflag" + fi + elif test "$check_mpi" = "yes" ; then + SAVED_CC="$CC" + CC="$MPICC" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether MPI C compiler supports $xflag" >&5 +printf %s "checking whether MPI C compiler supports $xflag... " >&6; } + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ +const char *hello = "Hello World"; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + + GLOBAL_AM_CFLAGS="$GLOBAL_AM_CFLAGS $xflag" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + CC="$SAVED_CC" + fi + CFLAGS="$SAVED_CFLAGS" + + + + ac_ext=cpp +ac_cpp='$CXXCPP $CPPFLAGS' +ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_cxx_compiler_gnu + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether CXX compiler supports $xflag" >&5 +printf %s "checking whether CXX compiler supports $xflag... " >&6; } + + SAVED_CXXFLAGS="$CXXFLAGS" + CXXFLAGS="$xflag" + + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ +const char *hello = "Hello World"; + + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_link "$LINENO" +then : + + GLOBAL_AM_CXXFLAGS="$GLOBAL_AM_CXXFLAGS $xflag" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + CXXFLAGS="$SAVED_CXXFLAGS" + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + + ac_ext=f +ac_compile='$F77 -c $FFLAGS conftest.$ac_ext >&5' +ac_link='$F77 -o conftest$ac_exeext $FFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_f77_compiler_gnu + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether Fortran 77 compiler supports $xflag" >&5 +printf %s "checking whether Fortran 77 compiler supports $xflag... " >&6; } + + SAVED_FFLAGS="$FFLAGS" + FFLAGS="$xflag" + + cat > conftest.$ac_ext <<_ACEOF + program main + + + end +_ACEOF +if ac_fn_f77_try_link "$LINENO" +then : + + GLOBAL_AM_FFLAGS="$GLOBAL_AM_FFLAGS $xflag" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + FFLAGS="$SAVED_FFLAGS" + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + + ac_ext=${ac_fc_srcext-f} +ac_compile='$FC -c $FCFLAGS $ac_fcflags_srcext conftest.$ac_ext >&5' +ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_fc_compiler_gnu + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether Fortran compiler supports $xflag" >&5 +printf %s "checking whether Fortran compiler supports $xflag... " >&6; } + + SAVED_FCFLAGS="$FCFLAGS" + FCFLAGS="$xflag" + + check_mpi="no" + cat > conftest.$ac_ext <<_ACEOF + program main + + + end +_ACEOF +if ac_fn_fc_try_link "$LINENO" +then : + + check_mpi="yes" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + if test "$check_mpi" = "yes" ; then + SAVED_FC="$FC" + FC="$MPIFORT" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether MPI Fortran compiler supports $xflag" >&5 +printf %s "checking whether MPI Fortran compiler supports $xflag... " >&6; } + cat > conftest.$ac_ext <<_ACEOF + program main + + + end +_ACEOF +if ac_fn_fc_try_link "$LINENO" +then : + + GLOBAL_AM_FCFLAGS="$GLOBAL_AM_FCFLAGS $xflag" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + FC="$SAVED_FC" + fi + FCFLAGS="$SAVED_FCFLAGS" + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + + done +fi + +######################################################################## +# # +# Parallel worker support # +# # +######################################################################## + +default_enable_parallel_worker=yes +if test x$starpu_darwin = xyes ; then + default_enable_parallel_worker=no +fi +# Check whether --enable-parallel-worker was given. +if test ${enable_parallel_worker+y} +then : + enableval=$enable_parallel_worker; enable_parallel_worker=$enableval +else $as_nop + enable_parallel_worker=$default_enable_parallel_worker +fi + + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for parallel worker support" >&5 +printf %s "checking for parallel worker support... " >&6; } + +if test x$enable_parallel_worker = xyes; then + +printf "%s\n" "#define STARPU_PARALLEL_WORKER 1" >>confdefs.h + + if test -e penmp || test -e mp; then + as_fn_error $? "AC_OPENMP clobbers files named 'mp' and 'penmp'. Aborting configure because one of these files already exists." "$LINENO" 5 +fi +# Check whether --enable-openmp was given. +if test ${enable_openmp+y} +then : + enableval=$enable_openmp; +fi + + OPENMP_CFLAGS= + if test "$enable_openmp" != no; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CC option to support OpenMP" >&5 +printf %s "checking for $CC option to support OpenMP... " >&6; } +if test ${ac_cv_prog_c_openmp+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_cv_prog_c_openmp='not found' + for ac_option in '' -fopenmp -xopenmp -openmp -mp -omp -qsmp=omp -homp \ + -Popenmp --openmp; do + + ac_save_CFLAGS=$CFLAGS + CFLAGS="$CFLAGS $ac_option" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#ifndef _OPENMP +#error "OpenMP not supported" +#endif +#include +int main (void) { return omp_get_num_threads (); } + +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#ifndef _OPENMP +#error "OpenMP not supported" +#endif +#include +int main (void) { return omp_get_num_threads (); } + +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_prog_c_openmp=$ac_option +else $as_nop + ac_cv_prog_c_openmp='unsupported' +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ac_save_CFLAGS + + if test "$ac_cv_prog_c_openmp" != 'not found'; then + break + fi + done + if test "$ac_cv_prog_c_openmp" = 'not found'; then + ac_cv_prog_c_openmp='unsupported' + elif test "$ac_cv_prog_c_openmp" = ''; then + ac_cv_prog_c_openmp='none needed' + fi + rm -f penmp mp +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_c_openmp" >&5 +printf "%s\n" "$ac_cv_prog_c_openmp" >&6; } + if test "$ac_cv_prog_c_openmp" != 'unsupported' && \ + test "$ac_cv_prog_c_openmp" != 'none needed'; then + OPENMP_CFLAGS="$ac_cv_prog_c_openmp" + fi + fi + + +fi + + if test "x$enable_parallel_worker" = "xyes"; then + STARPU_PARALLEL_WORKER_TRUE= + STARPU_PARALLEL_WORKER_FALSE='#' +else + STARPU_PARALLEL_WORKER_TRUE='#' + STARPU_PARALLEL_WORKER_FALSE= +fi + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_parallel_worker" >&5 +printf "%s\n" "$enable_parallel_worker" >&6; } + +############################################################################### +# # +# OpenMP LLVM runtime support # +# # +############################################################################### + +# Check whether --enable-openmp-llvm was given. +if test ${enable_openmp_llvm+y} +then : + enableval=$enable_openmp_llvm; enable_openmp_llvm=$enableval +else $as_nop + enable_openmp_llvm=no +fi + + +openmp_llvm_msg="" +if test x$starpu_windows = xyes ; then + enable_openmp_llvm=no + openmp_llvm_msg="disabled on windows" +fi +if test x$enable_simgrid = xyes ; then + enable_openmp_llvm=no + openmp_llvm_msg="incompatibility with Simgrid support" +fi +if test x$PROG_CLANG = x ; then + enable_openmp_llvm=no + openmp_llvm_msg="missing clang" +fi + +if test x$enable_openmp_llvm = xyes; then + +printf "%s\n" "#define STARPU_OPENMP_LLVM 1" >>confdefs.h + + # Force activating the generic OpenMP runtime support + enable_openmp="yes" +fi + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for LLVM OpenMP runtime support" >&5 +printf %s "checking for LLVM OpenMP runtime support... " >&6; } + if test "x$enable_openmp_llvm" = "xyes"; then + STARPU_OPENMP_LLVM_TRUE= + STARPU_OPENMP_LLVM_FALSE='#' +else + STARPU_OPENMP_LLVM_TRUE='#' + STARPU_OPENMP_LLVM_FALSE= +fi + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_openmp_llvm $openmp_llvm_msg" >&5 +printf "%s\n" "$enable_openmp_llvm $openmp_llvm_msg" >&6; } + +############################################################################### +# # +# OpenMP runtime support # +# # +############################################################################### + +# Check whether --enable-openmp was given. +if test ${enable_openmp+y} +then : + enableval=$enable_openmp; enable_openmp=$enableval +else $as_nop + enable_openmp=yes +fi + + +ac_fn_c_check_header_compile "$LINENO" "ucontext.h" "ac_cv_header_ucontext_h" "$ac_includes_default" +if test "x$ac_cv_header_ucontext_h" = xyes +then : + have_valid_ucontext=yes +else $as_nop + have_valid_ucontext=no +fi + + +openmp_msg="" +if test x$starpu_windows = xyes ; then + enable_openmp=no + openmp_msg="disabled on windows" +fi +if test x$enable_simgrid = xyes ; then + enable_openmp=no + openmp_msg="incompatibility with Simgrid support" +fi +if test x$have_valid_ucontext = xno ; then + enable_openmp=no + openmp_msg="ucontext.h unavailable" +fi + +if test x$enable_openmp = xyes; then + +printf "%s\n" "#define STARPU_OPENMP 1" >>confdefs.h + +fi + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for OpenMP runtime support" >&5 +printf %s "checking for OpenMP runtime support... " >&6; } + if test "x$enable_openmp" = "xyes"; then + STARPU_OPENMP_TRUE= + STARPU_OPENMP_FALSE='#' +else + STARPU_OPENMP_TRUE='#' + STARPU_OPENMP_FALSE= +fi + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_openmp $openmp_msg" >&5 +printf "%s\n" "$enable_openmp $openmp_msg" >&6; } + + if test x$enable_simgrid = xno -a -n "$OPENMP_CFLAGS" -a x$starpu_windows != xyes; then + STARPU_HAVE_OPENMP_TRUE= + STARPU_HAVE_OPENMP_FALSE='#' +else + STARPU_HAVE_OPENMP_TRUE='#' + STARPU_HAVE_OPENMP_FALSE= +fi + + +############################################################################### +# # +# SOCL interface # +# # +############################################################################### + +# Check whether --enable-socl was given. +if test ${enable_socl+y} +then : + enableval=$enable_socl; enable_socl="$enableval" +else $as_nop + enable_socl="maybe" +fi + + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for SOCL" >&5 +printf %s "checking for SOCL... " >&6; } + +# in case SOCL was explicitly required, but is not available, this is an error +if test "x$enable_socl" = "xyes" -a "$have_valid_opencl" = "no" ; then + as_fn_error $? "SOCL cannot be enabled without OpenCL" "$LINENO" 5 +fi + +# now we enable SOCL if and only if a proper setup is available +if test "x$enable_socl" = "xyes" -o "x$enable_socl" = "xmaybe" ; then + build_socl=$have_valid_opencl +else + build_socl=no +fi + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $build_socl" >&5 +printf "%s\n" "$build_socl" >&6; } + if test "x$build_socl" = "xyes"; then + STARPU_BUILD_SOCL_TRUE= + STARPU_BUILD_SOCL_FALSE='#' +else + STARPU_BUILD_SOCL_TRUE='#' + STARPU_BUILD_SOCL_FALSE= +fi + + if test "x$build_socl" = "xyes"; then + STARPU_USE_SOCL_TRUE= + STARPU_USE_SOCL_FALSE='#' +else + STARPU_USE_SOCL_TRUE='#' + STARPU_USE_SOCL_FALSE= +fi + + +if test "$build_socl" = "yes" ; then + ac_fn_c_check_func "$LINENO" "clGetExtensionFunctionAddressForPlatform" "ac_cv_func_clGetExtensionFunctionAddressForPlatform" +if test "x$ac_cv_func_clGetExtensionFunctionAddressForPlatform" = xyes +then : + printf "%s\n" "#define HAVE_CLGETEXTENSIONFUNCTIONADDRESSFORPLATFORM 1" >>confdefs.h + +fi + + if test -n "$SOCL_OCL_LIB_OPENCL" -a -f "$SOCL_OCL_LIB_OPENCL" ; then + run_socl_check=yes + SOCL_OCL_LIB_OPENCL_DIR=$(dirname $SOCL_OCL_LIB_OPENCL) + + else + run_socl_check=no + fi +else + run_socl_check=no +fi +############################################################################### +# # +# Debugging # +# # +############################################################################### + +# Extract the first word of "gdb", so it can be a program name with args. +set dummy gdb; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_path_GDB+y} +then : + printf %s "(cached) " >&6 +else $as_nop + case $GDB in + [\\/]* | ?:[\\/]*) + ac_cv_path_GDB="$GDB" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_path_GDB="$as_dir$ac_word$ac_exec_ext" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + test -z "$ac_cv_path_GDB" && ac_cv_path_GDB="not-found" + ;; +esac +fi +GDB=$ac_cv_path_GDB +if test -n "$GDB"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $GDB" >&5 +printf "%s\n" "$GDB" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + +if test "x$GDB" != "xnot-found"; then + +printf "%s\n" "#define STARPU_GDB_PATH \"$GDB\"" >>confdefs.h + +fi + +############################################################################### +# # +# Examples # +# # +############################################################################### + +# Check whether --enable-build-tests was given. +if test ${enable_build_tests+y} +then : + enableval=$enable_build_tests; enable_build_tests=$enableval +else $as_nop + enable_build_tests=yes +fi + +# check stuff for tests (todo) + if test x$enable_build_tests != xno; then + STARPU_BUILD_TESTS_TRUE= + STARPU_BUILD_TESTS_FALSE='#' +else + STARPU_BUILD_TESTS_TRUE='#' + STARPU_BUILD_TESTS_FALSE= +fi + +# Check whether --enable-build-examples was given. +if test ${enable_build_examples+y} +then : + enableval=$enable_build_examples; enable_build_examples=$enableval +else $as_nop + enable_build_examples=yes +fi + +# check stuff for examples (todo) + if test x$enable_build_examples != xno; then + STARPU_BUILD_EXAMPLES_TRUE= + STARPU_BUILD_EXAMPLES_FALSE='#' +else + STARPU_BUILD_EXAMPLES_TRUE='#' + STARPU_BUILD_EXAMPLES_FALSE= +fi + +# Check whether --enable-opengl-render was given. +if test ${enable_opengl_render+y} +then : + enableval=$enable_opengl_render; enable_opengl_render=$enableval +else $as_nop + enable_opengl_render=no +fi + + +if test x$enable_opengl_render = xyes; then + _LIBS_SAV="$LIBS" + LIBS="" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for glutInit in -lglut" >&5 +printf %s "checking for glutInit in -lglut... " >&6; } +if test ${ac_cv_lib_glut_glutInit+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-lglut $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +char glutInit (); +int +main (void) +{ +return glutInit (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_glut_glutInit=yes +else $as_nop + ac_cv_lib_glut_glutInit=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_glut_glutInit" >&5 +printf "%s\n" "$ac_cv_lib_glut_glutInit" >&6; } +if test "x$ac_cv_lib_glut_glutInit" = xyes +then : + printf "%s\n" "#define HAVE_LIBGLUT 1" >>confdefs.h + + LIBS="-lglut $LIBS" + +else $as_nop + as_fn_error $? "cannot find glut" "$LINENO" 5 +fi + + STARPU_OPENGL_RENDER_LDFLAGS="$STARPU_OPENGL_RENDER_LDFLAGS $LIBS" + LIBS=$_LIBS_SAV + + + _LIBS_SAV="$LIBS" + LIBS="" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for glXCreateContext in -lGL" >&5 +printf %s "checking for glXCreateContext in -lGL... " >&6; } +if test ${ac_cv_lib_GL_glXCreateContext+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-lGL $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +char glXCreateContext (); +int +main (void) +{ +return glXCreateContext (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_GL_glXCreateContext=yes +else $as_nop + ac_cv_lib_GL_glXCreateContext=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_GL_glXCreateContext" >&5 +printf "%s\n" "$ac_cv_lib_GL_glXCreateContext" >&6; } +if test "x$ac_cv_lib_GL_glXCreateContext" = xyes +then : + printf "%s\n" "#define HAVE_LIBGL 1" >>confdefs.h + + LIBS="-lGL $LIBS" + +else $as_nop + as_fn_error $? "cannot find GL" "$LINENO" 5 +fi + + STARPU_OPENGL_RENDER_LDFLAGS="$STARPU_OPENGL_RENDER_LDFLAGS $LIBS" + LIBS=$_LIBS_SAV + + + _LIBS_SAV="$LIBS" + LIBS="" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for gluLookAt in -lGLU" >&5 +printf %s "checking for gluLookAt in -lGLU... " >&6; } +if test ${ac_cv_lib_GLU_gluLookAt+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-lGLU $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +char gluLookAt (); +int +main (void) +{ +return gluLookAt (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_GLU_gluLookAt=yes +else $as_nop + ac_cv_lib_GLU_gluLookAt=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_GLU_gluLookAt" >&5 +printf "%s\n" "$ac_cv_lib_GLU_gluLookAt" >&6; } +if test "x$ac_cv_lib_GLU_gluLookAt" = xyes +then : + printf "%s\n" "#define HAVE_LIBGLU 1" >>confdefs.h + + LIBS="-lGLU $LIBS" + +else $as_nop + as_fn_error $? "cannot find GLU" "$LINENO" 5 +fi + + STARPU_OPENGL_RENDER_LDFLAGS="$STARPU_OPENGL_RENDER_LDFLAGS $LIBS" + LIBS=$_LIBS_SAV + + + + +printf "%s\n" "#define STARPU_OPENGL_RENDER 1" >>confdefs.h + +fi + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether OpenGL rendering is enabled" >&5 +printf %s "checking whether OpenGL rendering is enabled... " >&6; } +STARPU_OPENGL_RENDER=$enable_opengl_render + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_opengl_render" >&5 +printf "%s\n" "$enable_opengl_render" >&6; } + if test "x$enable_opengl_render" = xyes; then + STARPU_HAVE_OPENGL_TRUE= + STARPU_HAVE_OPENGL_FALSE='#' +else + STARPU_HAVE_OPENGL_TRUE='#' + STARPU_HAVE_OPENGL_FALSE= +fi + + + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for X" >&5 +printf %s "checking for X... " >&6; } + + +# Check whether --with-x was given. +if test ${with_x+y} +then : + withval=$with_x; +fi + +# $have_x is `yes', `no', `disabled', or empty when we do not yet know. +if test "x$with_x" = xno; then + # The user explicitly disabled X. + have_x=disabled +else + case $x_includes,$x_libraries in #( + *\'*) as_fn_error $? "cannot use X directory names containing '" "$LINENO" 5;; #( + *,NONE | NONE,*) if test ${ac_cv_have_x+y} +then : + printf %s "(cached) " >&6 +else $as_nop + # One or both of the vars are not set, and there is no cached value. +ac_x_includes=no +ac_x_libraries=no +# Do we need to do anything special at all? +ac_save_LIBS=$LIBS +LIBS="-lX11 $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +int +main (void) +{ +XrmInitialize () + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + # We can compile and link X programs with no special options. + ac_x_includes= + ac_x_libraries= +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS="$ac_save_LIBS" +# If that didn't work, only try xmkmf and file system searches +# for native compilation. +if test x"$ac_x_includes" = xno && test "$cross_compiling" = no +then : + rm -f -r conftest.dir +if mkdir conftest.dir; then + cd conftest.dir + cat >Imakefile <<'_ACEOF' +incroot: + @echo incroot='${INCROOT}' +usrlibdir: + @echo usrlibdir='${USRLIBDIR}' +libdir: + @echo libdir='${LIBDIR}' +_ACEOF + if (export CC; ${XMKMF-xmkmf}) >/dev/null 2>/dev/null && test -f Makefile; then + # GNU make sometimes prints "make[1]: Entering ...", which would confuse us. + for ac_var in incroot usrlibdir libdir; do + eval "ac_im_$ac_var=\`\${MAKE-make} $ac_var 2>/dev/null | sed -n 's/^$ac_var=//p'\`" + done + # Open Windows xmkmf reportedly sets LIBDIR instead of USRLIBDIR. + for ac_extension in a so sl dylib la dll; do + if test ! -f "$ac_im_usrlibdir/libX11.$ac_extension" && + test -f "$ac_im_libdir/libX11.$ac_extension"; then + ac_im_usrlibdir=$ac_im_libdir; break + fi + done + # Screen out bogus values from the imake configuration. They are + # bogus both because they are the default anyway, and because + # using them would break gcc on systems where it needs fixed includes. + case $ac_im_incroot in + /usr/include) ac_x_includes= ;; + *) test -f "$ac_im_incroot/X11/Xos.h" && ac_x_includes=$ac_im_incroot;; + esac + case $ac_im_usrlibdir in + /usr/lib | /usr/lib64 | /lib | /lib64) ;; + *) test -d "$ac_im_usrlibdir" && ac_x_libraries=$ac_im_usrlibdir ;; + esac + fi + cd .. + rm -f -r conftest.dir +fi + + # Standard set of common directories for X headers. +# Check X11 before X11Rn because it is often a symlink to the current release. +ac_x_header_dirs=' +/usr/X11/include +/usr/X11R7/include +/usr/X11R6/include +/usr/X11R5/include +/usr/X11R4/include + +/usr/include/X11 +/usr/include/X11R7 +/usr/include/X11R6 +/usr/include/X11R5 +/usr/include/X11R4 + +/usr/local/X11/include +/usr/local/X11R7/include +/usr/local/X11R6/include +/usr/local/X11R5/include +/usr/local/X11R4/include + +/usr/local/include/X11 +/usr/local/include/X11R7 +/usr/local/include/X11R6 +/usr/local/include/X11R5 +/usr/local/include/X11R4 + +/opt/X11/include + +/usr/X386/include +/usr/x386/include +/usr/XFree86/include/X11 + +/usr/include +/usr/local/include +/usr/unsupported/include +/usr/athena/include +/usr/local/x11r5/include +/usr/lpp/Xamples/include + +/usr/openwin/include +/usr/openwin/share/include' + +if test "$ac_x_includes" = no; then + # Guess where to find include files, by looking for Xlib.h. + # First, try using that file with no special directory specified. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +_ACEOF +if ac_fn_c_try_cpp "$LINENO" +then : + # We can compile using X headers with no special include directory. +ac_x_includes= +else $as_nop + for ac_dir in $ac_x_header_dirs; do + if test -r "$ac_dir/X11/Xlib.h"; then + ac_x_includes=$ac_dir + break + fi +done +fi +rm -f conftest.err conftest.i conftest.$ac_ext +fi # $ac_x_includes = no + +if test "$ac_x_libraries" = no; then + # Check for the libraries. + # See if we find them without any special options. + # Don't add to $LIBS permanently. + ac_save_LIBS=$LIBS + LIBS="-lX11 $LIBS" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +int +main (void) +{ +XrmInitialize () + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + LIBS=$ac_save_LIBS +# We can link X programs with no special library path. +ac_x_libraries= +else $as_nop + LIBS=$ac_save_LIBS +for ac_dir in `printf "%s\n" "$ac_x_includes $ac_x_header_dirs" | sed s/include/lib/g` +do + # Don't even attempt the hair of trying to link an X program! + for ac_extension in a so sl dylib la dll; do + if test -r "$ac_dir/libX11.$ac_extension"; then + ac_x_libraries=$ac_dir + break 2 + fi + done +done +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +fi # $ac_x_libraries = no + +fi +# Record the results. +case $ac_x_includes,$ac_x_libraries in #( + no,* | *,no | *\'*) : + # Didn't find X, or a directory has "'" in its name. + ac_cv_have_x="have_x=no" ;; #( + *) : + # Record where we found X for the cache. + ac_cv_have_x="have_x=yes\ + ac_x_includes='$ac_x_includes'\ + ac_x_libraries='$ac_x_libraries'" ;; +esac +fi +;; #( + *) have_x=yes;; + esac + eval "$ac_cv_have_x" +fi # $with_x != no + +if test "$have_x" != yes; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $have_x" >&5 +printf "%s\n" "$have_x" >&6; } + no_x=yes +else + # If each of the values was on the command line, it overrides each guess. + test "x$x_includes" = xNONE && x_includes=$ac_x_includes + test "x$x_libraries" = xNONE && x_libraries=$ac_x_libraries + # Update the cache value to reflect the command line values. + ac_cv_have_x="have_x=yes\ + ac_x_includes='$x_includes'\ + ac_x_libraries='$x_libraries'" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: libraries $x_libraries, headers $x_includes" >&5 +printf "%s\n" "libraries $x_libraries, headers $x_includes" >&6; } +fi + +if test "$no_x" = yes; then + # Not all programs may use this symbol, but it does not hurt to define it. + +printf "%s\n" "#define X_DISPLAY_MISSING 1" >>confdefs.h + + X_CFLAGS= X_PRE_LIBS= X_LIBS= X_EXTRA_LIBS= +else + if test -n "$x_includes"; then + X_CFLAGS="$X_CFLAGS -I$x_includes" + fi + + # It would also be nice to do this for all -L options, not just this one. + if test -n "$x_libraries"; then + X_LIBS="$X_LIBS -L$x_libraries" + # For Solaris; some versions of Sun CC require a space after -R and + # others require no space. Words are not sufficient . . . . + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether -R must be followed by a space" >&5 +printf %s "checking whether -R must be followed by a space... " >&6; } + ac_xsave_LIBS=$LIBS; LIBS="$LIBS -R$x_libraries" + ac_xsave_c_werror_flag=$ac_c_werror_flag + ac_c_werror_flag=yes + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + X_LIBS="$X_LIBS -R$x_libraries" +else $as_nop + LIBS="$ac_xsave_LIBS -R $x_libraries" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + X_LIBS="$X_LIBS -R $x_libraries" +else $as_nop + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: neither works" >&5 +printf "%s\n" "neither works" >&6; } +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + ac_c_werror_flag=$ac_xsave_c_werror_flag + LIBS=$ac_xsave_LIBS + fi + + # Check for system-dependent libraries X programs must link with. + # Do this before checking for the system-independent R6 libraries + # (-lICE), since we may need -lsocket or whatever for X linking. + + if test "$ISC" = yes; then + X_EXTRA_LIBS="$X_EXTRA_LIBS -lnsl_s -linet" + else + # Martyn Johnson says this is needed for Ultrix, if the X + # libraries were built with DECnet support. And Karl Berry says + # the Alpha needs dnet_stub (dnet does not exist). + ac_xsave_LIBS="$LIBS"; LIBS="$LIBS $X_LIBS -lX11" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +char XOpenDisplay (); +int +main (void) +{ +return XOpenDisplay (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + +else $as_nop + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for dnet_ntoa in -ldnet" >&5 +printf %s "checking for dnet_ntoa in -ldnet... " >&6; } +if test ${ac_cv_lib_dnet_dnet_ntoa+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-ldnet $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +char dnet_ntoa (); +int +main (void) +{ +return dnet_ntoa (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_dnet_dnet_ntoa=yes +else $as_nop + ac_cv_lib_dnet_dnet_ntoa=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_dnet_dnet_ntoa" >&5 +printf "%s\n" "$ac_cv_lib_dnet_dnet_ntoa" >&6; } +if test "x$ac_cv_lib_dnet_dnet_ntoa" = xyes +then : + X_EXTRA_LIBS="$X_EXTRA_LIBS -ldnet" +fi + + if test $ac_cv_lib_dnet_dnet_ntoa = no; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for dnet_ntoa in -ldnet_stub" >&5 +printf %s "checking for dnet_ntoa in -ldnet_stub... " >&6; } +if test ${ac_cv_lib_dnet_stub_dnet_ntoa+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-ldnet_stub $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +char dnet_ntoa (); +int +main (void) +{ +return dnet_ntoa (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_dnet_stub_dnet_ntoa=yes +else $as_nop + ac_cv_lib_dnet_stub_dnet_ntoa=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_dnet_stub_dnet_ntoa" >&5 +printf "%s\n" "$ac_cv_lib_dnet_stub_dnet_ntoa" >&6; } +if test "x$ac_cv_lib_dnet_stub_dnet_ntoa" = xyes +then : + X_EXTRA_LIBS="$X_EXTRA_LIBS -ldnet_stub" +fi + + fi +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + LIBS="$ac_xsave_LIBS" + + # msh@cis.ufl.edu says -lnsl (and -lsocket) are needed for his 386/AT, + # to get the SysV transport functions. + # Chad R. Larson says the Pyramis MIS-ES running DC/OSx (SVR4) + # needs -lnsl. + # The nsl library prevents programs from opening the X display + # on Irix 5.2, according to T.E. Dickey. + # The functions gethostbyname, getservbyname, and inet_addr are + # in -lbsd on LynxOS 3.0.1/i386, according to Lars Hecking. + ac_fn_c_check_func "$LINENO" "gethostbyname" "ac_cv_func_gethostbyname" +if test "x$ac_cv_func_gethostbyname" = xyes +then : + +fi + + if test $ac_cv_func_gethostbyname = no; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for gethostbyname in -lnsl" >&5 +printf %s "checking for gethostbyname in -lnsl... " >&6; } +if test ${ac_cv_lib_nsl_gethostbyname+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-lnsl $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +char gethostbyname (); +int +main (void) +{ +return gethostbyname (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_nsl_gethostbyname=yes +else $as_nop + ac_cv_lib_nsl_gethostbyname=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_nsl_gethostbyname" >&5 +printf "%s\n" "$ac_cv_lib_nsl_gethostbyname" >&6; } +if test "x$ac_cv_lib_nsl_gethostbyname" = xyes +then : + X_EXTRA_LIBS="$X_EXTRA_LIBS -lnsl" +fi + + if test $ac_cv_lib_nsl_gethostbyname = no; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for gethostbyname in -lbsd" >&5 +printf %s "checking for gethostbyname in -lbsd... " >&6; } +if test ${ac_cv_lib_bsd_gethostbyname+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-lbsd $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +char gethostbyname (); +int +main (void) +{ +return gethostbyname (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_bsd_gethostbyname=yes +else $as_nop + ac_cv_lib_bsd_gethostbyname=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_bsd_gethostbyname" >&5 +printf "%s\n" "$ac_cv_lib_bsd_gethostbyname" >&6; } +if test "x$ac_cv_lib_bsd_gethostbyname" = xyes +then : + X_EXTRA_LIBS="$X_EXTRA_LIBS -lbsd" +fi + + fi + fi + + # lieder@skyler.mavd.honeywell.com says without -lsocket, + # socket/setsockopt and other routines are undefined under SCO ODT + # 2.0. But -lsocket is broken on IRIX 5.2 (and is not necessary + # on later versions), says Simon Leinen: it contains gethostby* + # variants that don't use the name server (or something). -lsocket + # must be given before -lnsl if both are needed. We assume that + # if connect needs -lnsl, so does gethostbyname. + ac_fn_c_check_func "$LINENO" "connect" "ac_cv_func_connect" +if test "x$ac_cv_func_connect" = xyes +then : + +fi + + if test $ac_cv_func_connect = no; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for connect in -lsocket" >&5 +printf %s "checking for connect in -lsocket... " >&6; } +if test ${ac_cv_lib_socket_connect+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-lsocket $X_EXTRA_LIBS $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +char connect (); +int +main (void) +{ +return connect (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_socket_connect=yes +else $as_nop + ac_cv_lib_socket_connect=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_socket_connect" >&5 +printf "%s\n" "$ac_cv_lib_socket_connect" >&6; } +if test "x$ac_cv_lib_socket_connect" = xyes +then : + X_EXTRA_LIBS="-lsocket $X_EXTRA_LIBS" +fi + + fi + + # Guillermo Gomez says -lposix is necessary on A/UX. + ac_fn_c_check_func "$LINENO" "remove" "ac_cv_func_remove" +if test "x$ac_cv_func_remove" = xyes +then : + +fi + + if test $ac_cv_func_remove = no; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for remove in -lposix" >&5 +printf %s "checking for remove in -lposix... " >&6; } +if test ${ac_cv_lib_posix_remove+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-lposix $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +char remove (); +int +main (void) +{ +return remove (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_posix_remove=yes +else $as_nop + ac_cv_lib_posix_remove=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_posix_remove" >&5 +printf "%s\n" "$ac_cv_lib_posix_remove" >&6; } +if test "x$ac_cv_lib_posix_remove" = xyes +then : + X_EXTRA_LIBS="$X_EXTRA_LIBS -lposix" +fi + + fi + + # BSDI BSD/OS 2.1 needs -lipc for XOpenDisplay. + ac_fn_c_check_func "$LINENO" "shmat" "ac_cv_func_shmat" +if test "x$ac_cv_func_shmat" = xyes +then : + +fi + + if test $ac_cv_func_shmat = no; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for shmat in -lipc" >&5 +printf %s "checking for shmat in -lipc... " >&6; } +if test ${ac_cv_lib_ipc_shmat+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-lipc $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +char shmat (); +int +main (void) +{ +return shmat (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_ipc_shmat=yes +else $as_nop + ac_cv_lib_ipc_shmat=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_ipc_shmat" >&5 +printf "%s\n" "$ac_cv_lib_ipc_shmat" >&6; } +if test "x$ac_cv_lib_ipc_shmat" = xyes +then : + X_EXTRA_LIBS="$X_EXTRA_LIBS -lipc" +fi + + fi + fi + + # Check for libraries that X11R6 Xt/Xaw programs need. + ac_save_LDFLAGS=$LDFLAGS + test -n "$x_libraries" && LDFLAGS="$LDFLAGS -L$x_libraries" + # SM needs ICE to (dynamically) link under SunOS 4.x (so we have to + # check for ICE first), but we must link in the order -lSM -lICE or + # we get undefined symbols. So assume we have SM if we have ICE. + # These have to be linked with before -lX11, unlike the other + # libraries we check for below, so use a different variable. + # John Interrante, Karl Berry + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for IceConnectionNumber in -lICE" >&5 +printf %s "checking for IceConnectionNumber in -lICE... " >&6; } +if test ${ac_cv_lib_ICE_IceConnectionNumber+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-lICE $X_EXTRA_LIBS $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +char IceConnectionNumber (); +int +main (void) +{ +return IceConnectionNumber (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_ICE_IceConnectionNumber=yes +else $as_nop + ac_cv_lib_ICE_IceConnectionNumber=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_ICE_IceConnectionNumber" >&5 +printf "%s\n" "$ac_cv_lib_ICE_IceConnectionNumber" >&6; } +if test "x$ac_cv_lib_ICE_IceConnectionNumber" = xyes +then : + X_PRE_LIBS="$X_PRE_LIBS -lSM -lICE" +fi + + LDFLAGS=$ac_save_LDFLAGS + +fi + +if test "x$no_x" != "xyes"; then + +printf "%s\n" "#define STARPU_HAVE_X11 1" >>confdefs.h + +fi + if test "x$no_x" != "xyes"; then + STARPU_HAVE_X11_TRUE= + STARPU_HAVE_X11_FALSE='#' +else + STARPU_HAVE_X11_TRUE='#' + STARPU_HAVE_X11_FALSE= +fi + + +# In case there are BLAS kernels that are used by the example applications +# we may specify which library to use. Note that this is not used for StarPU +# itself. + +blas_lib=maybe +# Check whether --enable-blas-lib was given. +if test ${enable_blas_lib+y} +then : + enableval=$enable_blas_lib; + if test "x$enableval" = "xatlas" ; then + blas_lib=atlas + elif test "x$enableval" = "xgoto" ; then + blas_lib=goto + elif test "x$enableval" = "xopenblas" ; then + blas_lib=openblas + elif test "x$enableval" = "xnone" ; then + blas_lib=none + elif test "x$enableval" = "xmkl" ; then + blas_lib=mkl + elif test "x$enableval" = "xarmpl" ; then + blas_lib=armpl + elif test x$enableval = xno; then + blas_lib=none + else + echo + echo "Error!" + echo "Unknown BLAS library" + exit -1 + fi + +fi + + +if test x$blas_lib = xmaybe -o x$blas_lib = xgoto; then + +# Check whether --with-goto-dir was given. +if test ${with_goto_dir+y} +then : + withval=$with_goto_dir; + blas_lib=goto + gotodir=$withval + GOTODIR=$gotodir + + + CPPFLAGS="${CPPFLAGS} -I$gotodir/ " + LDFLAGS="${LDFLAGS} -L$gotodir/ " + + +fi + + + if test x$blas_lib = xgoto; then + _LIBS_SAV="$LIBS" + LIBS="" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for main in -lgfortran" >&5 +printf %s "checking for main in -lgfortran... " >&6; } +if test ${ac_cv_lib_gfortran_main+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-lgfortran $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + +int +main (void) +{ +return main (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_gfortran_main=yes +else $as_nop + ac_cv_lib_gfortran_main=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_gfortran_main" >&5 +printf "%s\n" "$ac_cv_lib_gfortran_main" >&6; } +if test "x$ac_cv_lib_gfortran_main" = xyes +then : + printf "%s\n" "#define HAVE_LIBGFORTRAN 1" >>confdefs.h + + LIBS="-lgfortran $LIBS" + +fi + + STARPU_BLAS_LDFLAGS="$STARPU_BLAS_LDFLAGS $LIBS" + LIBS=$_LIBS_SAV + + + _LIBS_SAV="$LIBS" + LIBS="" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for main in -lifcore" >&5 +printf %s "checking for main in -lifcore... " >&6; } +if test ${ac_cv_lib_ifcore_main+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-lifcore $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + +int +main (void) +{ +return main (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_ifcore_main=yes +else $as_nop + ac_cv_lib_ifcore_main=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_ifcore_main" >&5 +printf "%s\n" "$ac_cv_lib_ifcore_main" >&6; } +if test "x$ac_cv_lib_ifcore_main" = xyes +then : + printf "%s\n" "#define HAVE_LIBIFCORE 1" >>confdefs.h + + LIBS="-lifcore $LIBS" + +fi + + STARPU_BLAS_LDFLAGS="$STARPU_BLAS_LDFLAGS $LIBS" + LIBS=$_LIBS_SAV + + + # Perhaps that GotoBLAS2 is available instead (so that we have libgotoblas2.{so,a}) + _LIBS_SAV="$LIBS" + LIBS="" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for sgemm_ in -lgoto2" >&5 +printf %s "checking for sgemm_ in -lgoto2... " >&6; } +if test ${ac_cv_lib_goto2_sgemm_+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-lgoto2 $STARPU_BLAS_LDFLAGS $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +char sgemm_ (); +int +main (void) +{ +return sgemm_ (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_goto2_sgemm_=yes +else $as_nop + ac_cv_lib_goto2_sgemm_=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_goto2_sgemm_" >&5 +printf "%s\n" "$ac_cv_lib_goto2_sgemm_" >&6; } +if test "x$ac_cv_lib_goto2_sgemm_" = xyes +then : + printf "%s\n" "#define HAVE_LIBGOTO2 1" >>confdefs.h + + LIBS="-lgoto2 $LIBS" + +else $as_nop + havegoto2=no +fi + + STARPU_BLAS_LDFLAGS="$STARPU_BLAS_LDFLAGS $LIBS" + LIBS=$_LIBS_SAV + + + if test x$havegoto2 = xno; then + _LIBS_SAV="$LIBS" + LIBS="" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for sgemm_ in -lgoto" >&5 +printf %s "checking for sgemm_ in -lgoto... " >&6; } +if test ${ac_cv_lib_goto_sgemm_+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-lgoto $STARPU_BLAS_LDFLAGS $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +char sgemm_ (); +int +main (void) +{ +return sgemm_ (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_goto_sgemm_=yes +else $as_nop + ac_cv_lib_goto_sgemm_=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_goto_sgemm_" >&5 +printf "%s\n" "$ac_cv_lib_goto_sgemm_" >&6; } +if test "x$ac_cv_lib_goto_sgemm_" = xyes +then : + printf "%s\n" "#define HAVE_LIBGOTO 1" >>confdefs.h + + LIBS="-lgoto $LIBS" + +else $as_nop + as_fn_error $? "cannot find goto lib" "$LINENO" 5 +fi + + STARPU_BLAS_LDFLAGS="$STARPU_BLAS_LDFLAGS $LIBS" + LIBS=$_LIBS_SAV + + + fi + +printf "%s\n" "#define STARPU_GOTO 1" >>confdefs.h + + fi +fi + +if test x$blas_lib = xmaybe -o x$blas_lib = xatlas; then + +# Check whether --with-atlas-dir was given. +if test ${with_atlas_dir+y} +then : + withval=$with_atlas_dir; + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking STARPU_ATLAS location" >&5 +printf %s "checking STARPU_ATLAS location... " >&6; } + blas_lib=atlas + atlasdir=$withval + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $atlasdir" >&5 +printf "%s\n" "$atlasdir" >&6; } + ATLASDIR=$atlasdir + + + CPPFLAGS="${CPPFLAGS} -I$atlasdir/include/ " + LDFLAGS="${LDFLAGS} -L$atlasdir/lib/ " + + +fi + + + if test x$blas_lib = xatlas; then + # test whether STARPU_ATLAS is actually available + ac_fn_c_check_header_compile "$LINENO" "cblas.h" "ac_cv_header_cblas_h" "$ac_includes_default" +if test "x$ac_cv_header_cblas_h" = xyes +then : + +else $as_nop + as_fn_error $? "cannot find atlas headers" "$LINENO" 5 +fi + + _LIBS_SAV="$LIBS" + LIBS="" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for ATL_sgemm in -latlas" >&5 +printf %s "checking for ATL_sgemm in -latlas... " >&6; } +if test ${ac_cv_lib_atlas_ATL_sgemm+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-latlas $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +char ATL_sgemm (); +int +main (void) +{ +return ATL_sgemm (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_atlas_ATL_sgemm=yes +else $as_nop + ac_cv_lib_atlas_ATL_sgemm=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_atlas_ATL_sgemm" >&5 +printf "%s\n" "$ac_cv_lib_atlas_ATL_sgemm" >&6; } +if test "x$ac_cv_lib_atlas_ATL_sgemm" = xyes +then : + printf "%s\n" "#define HAVE_LIBATLAS 1" >>confdefs.h + + LIBS="-latlas $LIBS" + +else $as_nop + as_fn_error $? "cannot find atlas lib" "$LINENO" 5 +fi + + STARPU_BLAS_LDFLAGS="$STARPU_BLAS_LDFLAGS $LIBS" + LIBS=$_LIBS_SAV + + + _LIBS_SAV="$LIBS" + LIBS="" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for cblas_sgemm in -lcblas" >&5 +printf %s "checking for cblas_sgemm in -lcblas... " >&6; } +if test ${ac_cv_lib_cblas_cblas_sgemm+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-lcblas -latlas $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +char cblas_sgemm (); +int +main (void) +{ +return cblas_sgemm (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_cblas_cblas_sgemm=yes +else $as_nop + ac_cv_lib_cblas_cblas_sgemm=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_cblas_cblas_sgemm" >&5 +printf "%s\n" "$ac_cv_lib_cblas_cblas_sgemm" >&6; } +if test "x$ac_cv_lib_cblas_cblas_sgemm" = xyes +then : + printf "%s\n" "#define HAVE_LIBCBLAS 1" >>confdefs.h + + LIBS="-lcblas $LIBS" + +else $as_nop + as_fn_error $? "cannot find atlas lib" "$LINENO" 5 +fi + + STARPU_BLAS_LDFLAGS="$STARPU_BLAS_LDFLAGS $LIBS" + LIBS=$_LIBS_SAV + + + _LIBS_SAV="$LIBS" + LIBS="" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for sgemm_ in -lf77blas" >&5 +printf %s "checking for sgemm_ in -lf77blas... " >&6; } +if test ${ac_cv_lib_f77blas_sgemm_+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-lf77blas $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +char sgemm_ (); +int +main (void) +{ +return sgemm_ (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_f77blas_sgemm_=yes +else $as_nop + ac_cv_lib_f77blas_sgemm_=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_f77blas_sgemm_" >&5 +printf "%s\n" "$ac_cv_lib_f77blas_sgemm_" >&6; } +if test "x$ac_cv_lib_f77blas_sgemm_" = xyes +then : + printf "%s\n" "#define HAVE_LIBF77BLAS 1" >>confdefs.h + + LIBS="-lf77blas $LIBS" + +else $as_nop + as_fn_error $? "cannot find f77blas lib" "$LINENO" 5 +fi + + STARPU_BLAS_LDFLAGS="$STARPU_BLAS_LDFLAGS $LIBS" + LIBS=$_LIBS_SAV + + + +printf "%s\n" "#define STARPU_ATLAS 1" >>confdefs.h + + fi +fi + +if test x$blas_lib = xmaybe -o x$blas_lib = xopenblas; then + +pkg_failed=no +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for OPENBLAS" >&5 +printf %s "checking for OPENBLAS... " >&6; } + +if test -n "$PKG_CONFIG"; then + if test -n "$OPENBLAS_CFLAGS"; then + pkg_cv_OPENBLAS_CFLAGS="$OPENBLAS_CFLAGS" + else + if test -n "$PKG_CONFIG" && \ + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"openblas\""; } >&5 + ($PKG_CONFIG --exists --print-errors "openblas") 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + pkg_cv_OPENBLAS_CFLAGS=`$PKG_CONFIG --cflags "openblas" 2>/dev/null` +else + pkg_failed=yes +fi + fi +else + pkg_failed=untried +fi +if test -n "$PKG_CONFIG"; then + if test -n "$OPENBLAS_LIBS"; then + pkg_cv_OPENBLAS_LIBS="$OPENBLAS_LIBS" + else + if test -n "$PKG_CONFIG" && \ + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"openblas\""; } >&5 + ($PKG_CONFIG --exists --print-errors "openblas") 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + pkg_cv_OPENBLAS_LIBS=`$PKG_CONFIG --libs "openblas" 2>/dev/null` +else + pkg_failed=yes +fi + fi +else + pkg_failed=untried +fi + + + +if test $pkg_failed = yes; then + +if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then + _pkg_short_errors_supported=yes +else + _pkg_short_errors_supported=no +fi + if test $_pkg_short_errors_supported = yes; then + OPENBLAS_PKG_ERRORS=`$PKG_CONFIG --short-errors --errors-to-stdout --print-errors "openblas"` + else + OPENBLAS_PKG_ERRORS=`$PKG_CONFIG --errors-to-stdout --print-errors "openblas"` + fi + # Put the nasty error message in config.log where it belongs + echo "$OPENBLAS_PKG_ERRORS" >&5 + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + if test x$blas_lib = xopenblas; then + _LIBS_SAV="$LIBS" + LIBS="" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for cblas_sgemm in -lopenblas" >&5 +printf %s "checking for cblas_sgemm in -lopenblas... " >&6; } +if test ${ac_cv_lib_openblas_cblas_sgemm+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-lopenblas -lopenblas $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +char cblas_sgemm (); +int +main (void) +{ +return cblas_sgemm (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_openblas_cblas_sgemm=yes +else $as_nop + ac_cv_lib_openblas_cblas_sgemm=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_openblas_cblas_sgemm" >&5 +printf "%s\n" "$ac_cv_lib_openblas_cblas_sgemm" >&6; } +if test "x$ac_cv_lib_openblas_cblas_sgemm" = xyes +then : + printf "%s\n" "#define HAVE_LIBOPENBLAS 1" >>confdefs.h + + LIBS="-lopenblas $LIBS" + +else $as_nop + as_fn_error $? "cannot find openblas lib" "$LINENO" 5 +fi + + STARPU_OPENBLAS_LDFLAGS="$STARPU_OPENBLAS_LDFLAGS $LIBS" + LIBS=$_LIBS_SAV + + + +printf "%s\n" "#define STARPU_OPENBLAS 1" >>confdefs.h + + STARPU_OPENBLAS=1 + + fi + +elif test $pkg_failed = untried; then + if test x$blas_lib = xopenblas; then + _LIBS_SAV="$LIBS" + LIBS="" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for cblas_sgemm in -lopenblas" >&5 +printf %s "checking for cblas_sgemm in -lopenblas... " >&6; } +if test ${ac_cv_lib_openblas_cblas_sgemm+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-lopenblas -lopenblas $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +char cblas_sgemm (); +int +main (void) +{ +return cblas_sgemm (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_openblas_cblas_sgemm=yes +else $as_nop + ac_cv_lib_openblas_cblas_sgemm=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_openblas_cblas_sgemm" >&5 +printf "%s\n" "$ac_cv_lib_openblas_cblas_sgemm" >&6; } +if test "x$ac_cv_lib_openblas_cblas_sgemm" = xyes +then : + printf "%s\n" "#define HAVE_LIBOPENBLAS 1" >>confdefs.h + + LIBS="-lopenblas $LIBS" + +else $as_nop + as_fn_error $? "cannot find openblas lib" "$LINENO" 5 +fi + + STARPU_OPENBLAS_LDFLAGS="$STARPU_OPENBLAS_LDFLAGS $LIBS" + LIBS=$_LIBS_SAV + + + +printf "%s\n" "#define STARPU_OPENBLAS 1" >>confdefs.h + + STARPU_OPENBLAS=1 + + fi + +else + OPENBLAS_CFLAGS=$pkg_cv_OPENBLAS_CFLAGS + OPENBLAS_LIBS=$pkg_cv_OPENBLAS_LIBS + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +pkg_failed=no +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for BLAS_OPENBLAS" >&5 +printf %s "checking for BLAS_OPENBLAS... " >&6; } + +if test -n "$PKG_CONFIG"; then + if test -n "$BLAS_OPENBLAS_CFLAGS"; then + pkg_cv_BLAS_OPENBLAS_CFLAGS="$BLAS_OPENBLAS_CFLAGS" + else + if test -n "$PKG_CONFIG" && \ + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"blas-openblas\""; } >&5 + ($PKG_CONFIG --exists --print-errors "blas-openblas") 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + pkg_cv_BLAS_OPENBLAS_CFLAGS=`$PKG_CONFIG --cflags "blas-openblas" 2>/dev/null` +else + pkg_failed=yes +fi + fi +else + pkg_failed=untried +fi +if test -n "$PKG_CONFIG"; then + if test -n "$BLAS_OPENBLAS_LIBS"; then + pkg_cv_BLAS_OPENBLAS_LIBS="$BLAS_OPENBLAS_LIBS" + else + if test -n "$PKG_CONFIG" && \ + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"blas-openblas\""; } >&5 + ($PKG_CONFIG --exists --print-errors "blas-openblas") 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + pkg_cv_BLAS_OPENBLAS_LIBS=`$PKG_CONFIG --libs "blas-openblas" 2>/dev/null` +else + pkg_failed=yes +fi + fi +else + pkg_failed=untried +fi + + + +if test $pkg_failed = yes; then + +if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then + _pkg_short_errors_supported=yes +else + _pkg_short_errors_supported=no +fi + if test $_pkg_short_errors_supported = yes; then + BLAS_OPENBLAS_PKG_ERRORS=`$PKG_CONFIG --short-errors --errors-to-stdout --print-errors "blas-openblas"` + else + BLAS_OPENBLAS_PKG_ERRORS=`$PKG_CONFIG --errors-to-stdout --print-errors "blas-openblas"` + fi + # Put the nasty error message in config.log where it belongs + echo "$BLAS_OPENBLAS_PKG_ERRORS" >&5 + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + if test x$blas_lib = xopenblas; then + _LIBS_SAV="$LIBS" + LIBS="" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for cblas_sgemm in -lblas-openblas" >&5 +printf %s "checking for cblas_sgemm in -lblas-openblas... " >&6; } +if test ${ac_cv_lib_blas_openblas_cblas_sgemm+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-lblas-openblas -lblas-openblas $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +char cblas_sgemm (); +int +main (void) +{ +return cblas_sgemm (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_blas_openblas_cblas_sgemm=yes +else $as_nop + ac_cv_lib_blas_openblas_cblas_sgemm=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_blas_openblas_cblas_sgemm" >&5 +printf "%s\n" "$ac_cv_lib_blas_openblas_cblas_sgemm" >&6; } +if test "x$ac_cv_lib_blas_openblas_cblas_sgemm" = xyes +then : + printf "%s\n" "#define HAVE_LIBBLAS_OPENBLAS 1" >>confdefs.h + + LIBS="-lblas-openblas $LIBS" + +else $as_nop + as_fn_error $? "cannot find blas-openblas lib" "$LINENO" 5 +fi + + STARPU_OPENBLAS_LDFLAGS="$STARPU_OPENBLAS_LDFLAGS $LIBS" + LIBS=$_LIBS_SAV + + + +printf "%s\n" "#define STARPU_OPENBLAS 1" >>confdefs.h + + STARPU_OPENBLAS=1 + + fi + +elif test $pkg_failed = untried; then + if test x$blas_lib = xopenblas; then + _LIBS_SAV="$LIBS" + LIBS="" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for cblas_sgemm in -lblas-openblas" >&5 +printf %s "checking for cblas_sgemm in -lblas-openblas... " >&6; } +if test ${ac_cv_lib_blas_openblas_cblas_sgemm+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-lblas-openblas -lblas-openblas $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +char cblas_sgemm (); +int +main (void) +{ +return cblas_sgemm (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_blas_openblas_cblas_sgemm=yes +else $as_nop + ac_cv_lib_blas_openblas_cblas_sgemm=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_blas_openblas_cblas_sgemm" >&5 +printf "%s\n" "$ac_cv_lib_blas_openblas_cblas_sgemm" >&6; } +if test "x$ac_cv_lib_blas_openblas_cblas_sgemm" = xyes +then : + printf "%s\n" "#define HAVE_LIBBLAS_OPENBLAS 1" >>confdefs.h + + LIBS="-lblas-openblas $LIBS" + +else $as_nop + as_fn_error $? "cannot find blas-openblas lib" "$LINENO" 5 +fi + + STARPU_OPENBLAS_LDFLAGS="$STARPU_OPENBLAS_LDFLAGS $LIBS" + LIBS=$_LIBS_SAV + + + +printf "%s\n" "#define STARPU_OPENBLAS 1" >>confdefs.h + + STARPU_OPENBLAS=1 + + fi + +else + BLAS_OPENBLAS_CFLAGS=$pkg_cv_BLAS_OPENBLAS_CFLAGS + BLAS_OPENBLAS_LIBS=$pkg_cv_BLAS_OPENBLAS_LIBS + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +printf "%s\n" "#define STARPU_OPENBLAS 1" >>confdefs.h + + STARPU_OPENBLAS=1 + + CFLAGS="${CFLAGS} ${OPENBLAS_CFLAGS} ${BLAS_OPENBLAS_CFLAGS} " + LIBS="${LIBS} ${OPENBLAS_LIBS} ${BLAS_OPENBLAS_LIBS} " + blas_lib=openblas + +fi + +fi +fi + +if test x$blas_lib = xmaybe -o x$blas_lib = xmkl; then + # Should we use MKL ? + if test -n "$MKLROOT" ; then + CPPFLAGS="${CPPFLAGS} -I$MKLROOT/include" + case $host_vendor in + *1om) mkl_plat=mic ;; + *) mkl_plat=intel64 ;; + esac + SAVED_LIBS=$LIBS + STARPU_BLAS_LDFLAGS="-L$MKLROOT/lib/$mkl_plat -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lm -lpthread -ldl" + LIBS="$LIBS $STARPU_BLAS_LDFLAGS" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + #include + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + blas_lib=mkl +else $as_nop + STARPU_BLAS_LDFLAGS="" +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + LIBS=$SAVED_LIBS + fi + +# Check whether --with-mkl-cflags was given. +if test ${with_mkl_cflags+y} +then : + withval=$with_mkl_cflags; + CPPFLAGS="${CPPFLAGS} $withval" + blas_lib=mkl + +fi + + + +# Check whether --with-mkl-ldflags was given. +if test ${with_mkl_ldflags+y} +then : + withval=$with_mkl_ldflags; + STARPU_BLAS_LDFLAGS="$withval" + blas_lib=mkl + +fi + + if test x$blas_lib = xmkl; then + +printf "%s\n" "#define STARPU_MKL 1" >>confdefs.h + + fi +fi + +if test x$blas_lib = xmaybe -o x$blas_lib = xarmpl; then + # Should we use ARMPL ? + if test -n "$ARMPL_DIR" ; then + CPPFLAGS="${CPPFLAGS} -I$ARMPL_INCLUDES" + SAVED_LIBS=$LIBS + STARPU_BLAS_LDFLAGS="-L$ARMPL_LIBRARIES -larmpl_lp64 -lgfortran -lm" + LIBS="$LIBS $STARPU_BLAS_LDFLAGS" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + #include + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + blas_lib=armpl +else $as_nop + STARPU_BLAS_LDFLAGS="" +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + LIBS=$SAVED_LIBS + fi + +# Check whether --with-armpl-cflags was given. +if test ${with_armpl_cflags+y} +then : + withval=$with_armpl_cflags; + CPPFLAGS="${CPPFLAGS} $withval" + blas_lib=armpl + +fi + + + +# Check whether --with-armpl-ldflags was given. +if test ${with_armpl_ldflags+y} +then : + withval=$with_armpl_ldflags; + STARPU_BLAS_LDFLAGS="$withval" + blas_lib=armpl + +fi + + if test x$blas_lib = xarmpl; then + +printf "%s\n" "#define STARPU_ARMPL 1" >>confdefs.h + + fi +fi + +if test x$blas_lib = xmaybe; then + #perhaps it is possible to use some BLAS lib from the system + use_system_blas=no + _LIBS_SAV="$LIBS" + LIBS="" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for library containing sgemm_" >&5 +printf %s "checking for library containing sgemm_... " >&6; } +if test ${ac_cv_search_sgemm_+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_func_search_save_LIBS=$LIBS +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +char sgemm_ (); +int +main (void) +{ +return sgemm_ (); + ; + return 0; +} +_ACEOF +for ac_lib in '' blas +do + if test -z "$ac_lib"; then + ac_res="none required" + else + ac_res=-l$ac_lib + LIBS="-l$ac_lib $ac_func_search_save_LIBS" + fi + if ac_fn_c_try_link "$LINENO" +then : + ac_cv_search_sgemm_=$ac_res +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext + if test ${ac_cv_search_sgemm_+y} +then : + break +fi +done +if test ${ac_cv_search_sgemm_+y} +then : + +else $as_nop + ac_cv_search_sgemm_=no +fi +rm conftest.$ac_ext +LIBS=$ac_func_search_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_sgemm_" >&5 +printf "%s\n" "$ac_cv_search_sgemm_" >&6; } +ac_res=$ac_cv_search_sgemm_ +if test "$ac_res" != no +then : + test "$ac_res" = "none required" || LIBS="$ac_res $LIBS" + use_system_blas=yes +fi + + STARPU_BLAS_LDFLAGS="$STARPU_BLAS_LDFLAGS $LIBS" + LIBS=$_LIBS_SAV + + + if test x$use_system_blas = xyes; then + +printf "%s\n" "#define STARPU_SYSTEM_BLAS 1" >>confdefs.h + + blas_lib=system + elif test x"$BLAS_LIBS" != x; then + +printf "%s\n" "#define STARPU_SYSTEM_BLAS 1" >>confdefs.h + + STARPU_BLAS_LDFLAGS="$BLAS_LIBS" + blas_lib=system + + else + blas_lib=none + fi +fi + +if test x$blas_lib = xsystem; then + ac_fn_c_check_header_compile "$LINENO" "cblas.h" "ac_cv_header_cblas_h" "$ac_includes_default" +if test "x$ac_cv_header_cblas_h" = xyes +then : + have_cblas_h=yes +else $as_nop + have_cblas_h=no +fi + +fi + if test x$have_cblas_h = xyes; then + STARPU_HAVE_CBLAS_H_TRUE= + STARPU_HAVE_CBLAS_H_FALSE='#' +else + STARPU_HAVE_CBLAS_H_TRUE='#' + STARPU_HAVE_CBLAS_H_FALSE= +fi + +if test x$have_cblas_h = xyes; then + +printf "%s\n" "#define STARPU_HAVE_CBLAS_H 1" >>confdefs.h + +fi +if test x$blas_lib != xnone; then + +printf "%s\n" "#define STARPU_HAVE_BLAS 1" >>confdefs.h + + SAVED_LIBS="$LIBS" + LIBS="$LIBS -lblas" + ac_fn_c_check_func "$LINENO" "cblas_sgemv" "ac_cv_func_cblas_sgemv" +if test "x$ac_cv_func_cblas_sgemv" = xyes +then : + printf "%s\n" "#define HAVE_CBLAS_SGEMV 1" >>confdefs.h + +fi + + LIBS="$SAVED_LIBS" + _LIBS_SAV="$LIBS" + LIBS="" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for library containing dgels_" >&5 +printf %s "checking for library containing dgels_... " >&6; } +if test ${ac_cv_search_dgels_+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_func_search_save_LIBS=$LIBS +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +char dgels_ (); +int +main (void) +{ +return dgels_ (); + ; + return 0; +} +_ACEOF +for ac_lib in '' lapack +do + if test -z "$ac_lib"; then + ac_res="none required" + else + ac_res=-l$ac_lib + LIBS="-l$ac_lib $ac_func_search_save_LIBS" + fi + if ac_fn_c_try_link "$LINENO" +then : + ac_cv_search_dgels_=$ac_res +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext + if test ${ac_cv_search_dgels_+y} +then : + break +fi +done +if test ${ac_cv_search_dgels_+y} +then : + +else $as_nop + ac_cv_search_dgels_=no +fi +rm conftest.$ac_ext +LIBS=$ac_func_search_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_dgels_" >&5 +printf "%s\n" "$ac_cv_search_dgels_" >&6; } +ac_res=$ac_cv_search_dgels_ +if test "$ac_res" != no +then : + test "$ac_res" = "none required" || LIBS="$ac_res $LIBS" + enable_liblapack=yes +else $as_nop + enable_liblapack=no +fi + + STARPU_LIBLAPACK_LDFLAGS="$STARPU_LIBLAPACK_LDFLAGS $LIBS" + LIBS=$_LIBS_SAV + + +fi + if test x$enable_liblapack = xyes; then + STARPU_HAVE_LIBLAPACK_TRUE= + STARPU_HAVE_LIBLAPACK_FALSE='#' +else + STARPU_HAVE_LIBLAPACK_TRUE='#' + STARPU_HAVE_LIBLAPACK_FALSE= +fi + + if test x$HAVE_CBLAS_SGEMV = x1; then + STARPU_HAVE_CBLAS_SGEMV_TRUE= + STARPU_HAVE_CBLAS_SGEMV_FALSE='#' +else + STARPU_HAVE_CBLAS_SGEMV_TRUE='#' + STARPU_HAVE_CBLAS_SGEMV_FALSE= +fi + + + if test x$blas_lib = xatlas; then + STARPU_ATLAS_BLAS_LIB_TRUE= + STARPU_ATLAS_BLAS_LIB_FALSE='#' +else + STARPU_ATLAS_BLAS_LIB_TRUE='#' + STARPU_ATLAS_BLAS_LIB_FALSE= +fi + + if test x$blas_lib = xgoto; then + STARPU_GOTO_BLAS_LIB_TRUE= + STARPU_GOTO_BLAS_LIB_FALSE='#' +else + STARPU_GOTO_BLAS_LIB_TRUE='#' + STARPU_GOTO_BLAS_LIB_FALSE= +fi + + if test x$blas_lib = xmkl; then + STARPU_MKL_BLAS_LIB_TRUE= + STARPU_MKL_BLAS_LIB_FALSE='#' +else + STARPU_MKL_BLAS_LIB_TRUE='#' + STARPU_MKL_BLAS_LIB_FALSE= +fi + + if test x$blas_lib = xsystem; then + STARPU_SYSTEM_BLAS_LIB_TRUE= + STARPU_SYSTEM_BLAS_LIB_FALSE='#' +else + STARPU_SYSTEM_BLAS_LIB_TRUE='#' + STARPU_SYSTEM_BLAS_LIB_FALSE= +fi + + if test x$blas_lib = xnone -a x$enable_simgrid = xno; then + STARPU_NO_BLAS_LIB_TRUE= + STARPU_NO_BLAS_LIB_FALSE='#' +else + STARPU_NO_BLAS_LIB_TRUE='#' + STARPU_NO_BLAS_LIB_FALSE= +fi + + + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking which BLAS lib should be used" >&5 +printf %s "checking which BLAS lib should be used... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $blas_lib" >&5 +printf "%s\n" "$blas_lib" >&6; } +BLAS_LIB=$blas_lib + + +############################################################################### +# # +# Multiple linear regression # +# # +############################################################################### +# Check whether --enable-mlr was given. +if test ${enable_mlr+y} +then : + enableval=$enable_mlr; enable_mlr=$enableval +else $as_nop + enable_mlr=no +fi + +# Check whether --enable-mlr-system-blas was given. +if test ${enable_mlr_system_blas+y} +then : + enableval=$enable_mlr_system_blas; enable_mlr_blas=$enableval +else $as_nop + enable_mlr_blas=no +fi + + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether multiple linear regression models are disabled" >&5 +printf %s "checking whether multiple linear regression models are disabled... " >&6; } +if test x$enable_mlr = xyes -a "$starpu_windows" != "yes" ; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + install_min_dgels=no + support_mlr=yes + _LIBS_SAV="$LIBS" + LIBS="" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for library containing dgels_" >&5 +printf %s "checking for library containing dgels_... " >&6; } +if test ${ac_cv_search_dgels_+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_func_search_save_LIBS=$LIBS +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +char dgels_ (); +int +main (void) +{ +return dgels_ (); + ; + return 0; +} +_ACEOF +for ac_lib in '' lapack +do + if test -z "$ac_lib"; then + ac_res="none required" + else + ac_res=-l$ac_lib + LIBS="-l$ac_lib $ac_func_search_save_LIBS" + fi + if ac_fn_c_try_link "$LINENO" +then : + ac_cv_search_dgels_=$ac_res +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext + if test ${ac_cv_search_dgels_+y} +then : + break +fi +done +if test ${ac_cv_search_dgels_+y} +then : + +else $as_nop + ac_cv_search_dgels_=no +fi +rm conftest.$ac_ext +LIBS=$ac_func_search_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_dgels_" >&5 +printf "%s\n" "$ac_cv_search_dgels_" >&6; } +ac_res=$ac_cv_search_dgels_ +if test "$ac_res" != no +then : + test "$ac_res" = "none required" || LIBS="$ac_res $LIBS" + use_system_lapack=yes +fi + + STARPU_LAPACK_LDFLAGS="$STARPU_LAPACK_LDFLAGS $LIBS" + LIBS=$_LIBS_SAV + + + if test x$blas_lib = xnone ; then + use_system_lapack=no + fi + if test x$enable_mlr_blas = xyes -a x$use_system_lapack = xyes; then + +printf "%s\n" "#define STARPU_MLR_MODEL 1" >>confdefs.h + + LDFLAGS="-llapack $LDFLAGS" + else + if test x$enable_mlr_blas = xyes -a x$blas_lib = xmkl; then + +printf "%s\n" "#define STARPU_MLR_MODEL 1" >>confdefs.h + + else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether min-dgels is linked" >&5 +printf %s "checking whether min-dgels is linked... " >&6; } + if test x"$DGELS_LIBS" != x; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +printf "%s\n" "#define STARPU_MLR_MODEL 1" >>confdefs.h + + + else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking min-dgels source" >&5 +printf %s "checking min-dgels source... " >&6; } + if test "${cross_compiling}" != "no" ; then + # Cross-compiling is not supported by min-dgels + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + install_min_dgels=no + support_mlr=no + else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + DGELS_LIBS="-Wl,--start-group $STARPU_BUILD_DIR/min-dgels/build/minlibblas.a $STARPU_BUILD_DIR/min-dgels/build/minlibdgels.a $STARPU_BUILD_DIR/min-dgels/build/minlibf2c.a -Wl,--end-group" + +printf "%s\n" "#define STARPU_MLR_MODEL 1" >>confdefs.h + + +printf "%s\n" "#define STARPU_BUILT_IN_MIN_DGELS 1" >>confdefs.h + + + install_min_dgels=yes + fi + fi + fi + fi +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + install_min_dgels=no + support_mlr=no +fi + if test x$install_min_dgels = xyes; then + STARPU_USE_MIN_DGELS_TRUE= + STARPU_USE_MIN_DGELS_FALSE='#' +else + STARPU_USE_MIN_DGELS_TRUE='#' + STARPU_USE_MIN_DGELS_FALSE= +fi + + +########################################## +# FFT # +########################################## + +have_fftw=no +have_fftwf=no +have_fftwl=no +fft_support=no + +# Check whether --enable-starpufft was given. +if test ${enable_starpufft+y} +then : + enableval=$enable_starpufft; enable_starpufft=$enableval +else $as_nop + enable_starpufft=yes +fi + + + +pkg_failed=no +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for FFTW" >&5 +printf %s "checking for FFTW... " >&6; } + +if test -n "$PKG_CONFIG"; then + if test -n "$FFTW_CFLAGS"; then + pkg_cv_FFTW_CFLAGS="$FFTW_CFLAGS" + else + if test -n "$PKG_CONFIG" && \ + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"fftw3\""; } >&5 + ($PKG_CONFIG --exists --print-errors "fftw3") 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + pkg_cv_FFTW_CFLAGS=`$PKG_CONFIG --cflags "fftw3" 2>/dev/null` +else + pkg_failed=yes +fi + fi +else + pkg_failed=untried +fi +if test -n "$PKG_CONFIG"; then + if test -n "$FFTW_LIBS"; then + pkg_cv_FFTW_LIBS="$FFTW_LIBS" + else + if test -n "$PKG_CONFIG" && \ + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"fftw3\""; } >&5 + ($PKG_CONFIG --exists --print-errors "fftw3") 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + pkg_cv_FFTW_LIBS=`$PKG_CONFIG --libs "fftw3" 2>/dev/null` +else + pkg_failed=yes +fi + fi +else + pkg_failed=untried +fi + + + +if test $pkg_failed = yes; then + +if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then + _pkg_short_errors_supported=yes +else + _pkg_short_errors_supported=no +fi + if test $_pkg_short_errors_supported = yes; then + FFTW_PKG_ERRORS=`$PKG_CONFIG --short-errors --errors-to-stdout --print-errors "fftw3"` + else + FFTW_PKG_ERRORS=`$PKG_CONFIG --errors-to-stdout --print-errors "fftw3"` + fi + # Put the nasty error message in config.log where it belongs + echo "$FFTW_PKG_ERRORS" >&5 + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + : +elif test $pkg_failed = untried; then + : +else + FFTW_CFLAGS=$pkg_cv_FFTW_CFLAGS + FFTW_LIBS=$pkg_cv_FFTW_LIBS + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + + +printf "%s\n" "#define STARPU_HAVE_FFTW 1" >>confdefs.h + + STARPU_HAVE_FFTW=1 + + have_fftw=yes + +fi + if test x$have_fftw = xyes; then + STARPU_HAVE_FFTW_TRUE= + STARPU_HAVE_FFTW_FALSE='#' +else + STARPU_HAVE_FFTW_TRUE='#' + STARPU_HAVE_FFTW_FALSE= +fi + + + +pkg_failed=no +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for FFTWF" >&5 +printf %s "checking for FFTWF... " >&6; } + +if test -n "$PKG_CONFIG"; then + if test -n "$FFTWF_CFLAGS"; then + pkg_cv_FFTWF_CFLAGS="$FFTWF_CFLAGS" + else + if test -n "$PKG_CONFIG" && \ + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"fftw3f\""; } >&5 + ($PKG_CONFIG --exists --print-errors "fftw3f") 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + pkg_cv_FFTWF_CFLAGS=`$PKG_CONFIG --cflags "fftw3f" 2>/dev/null` +else + pkg_failed=yes +fi + fi +else + pkg_failed=untried +fi +if test -n "$PKG_CONFIG"; then + if test -n "$FFTWF_LIBS"; then + pkg_cv_FFTWF_LIBS="$FFTWF_LIBS" + else + if test -n "$PKG_CONFIG" && \ + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"fftw3f\""; } >&5 + ($PKG_CONFIG --exists --print-errors "fftw3f") 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + pkg_cv_FFTWF_LIBS=`$PKG_CONFIG --libs "fftw3f" 2>/dev/null` +else + pkg_failed=yes +fi + fi +else + pkg_failed=untried +fi + + + +if test $pkg_failed = yes; then + +if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then + _pkg_short_errors_supported=yes +else + _pkg_short_errors_supported=no +fi + if test $_pkg_short_errors_supported = yes; then + FFTWF_PKG_ERRORS=`$PKG_CONFIG --short-errors --errors-to-stdout --print-errors "fftw3f"` + else + FFTWF_PKG_ERRORS=`$PKG_CONFIG --errors-to-stdout --print-errors "fftw3f"` + fi + # Put the nasty error message in config.log where it belongs + echo "$FFTWF_PKG_ERRORS" >&5 + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + : +elif test $pkg_failed = untried; then + : +else + FFTWF_CFLAGS=$pkg_cv_FFTWF_CFLAGS + FFTWF_LIBS=$pkg_cv_FFTWF_LIBS + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + + +printf "%s\n" "#define STARPU_HAVE_FFTWF 1" >>confdefs.h + + STARPU_HAVE_FFTWF=1 + + have_fftwf=yes + +fi + if test x$have_fftwf = xyes; then + STARPU_HAVE_FFTWF_TRUE= + STARPU_HAVE_FFTWF_FALSE='#' +else + STARPU_HAVE_FFTWF_TRUE='#' + STARPU_HAVE_FFTWF_FALSE= +fi + + + +pkg_failed=no +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for FFTWL" >&5 +printf %s "checking for FFTWL... " >&6; } + +if test -n "$PKG_CONFIG"; then + if test -n "$FFTWL_CFLAGS"; then + pkg_cv_FFTWL_CFLAGS="$FFTWL_CFLAGS" + else + if test -n "$PKG_CONFIG" && \ + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"fftw3l\""; } >&5 + ($PKG_CONFIG --exists --print-errors "fftw3l") 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + pkg_cv_FFTWL_CFLAGS=`$PKG_CONFIG --cflags "fftw3l" 2>/dev/null` +else + pkg_failed=yes +fi + fi +else + pkg_failed=untried +fi +if test -n "$PKG_CONFIG"; then + if test -n "$FFTWL_LIBS"; then + pkg_cv_FFTWL_LIBS="$FFTWL_LIBS" + else + if test -n "$PKG_CONFIG" && \ + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"fftw3l\""; } >&5 + ($PKG_CONFIG --exists --print-errors "fftw3l") 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + pkg_cv_FFTWL_LIBS=`$PKG_CONFIG --libs "fftw3l" 2>/dev/null` +else + pkg_failed=yes +fi + fi +else + pkg_failed=untried +fi + + + +if test $pkg_failed = yes; then + +if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then + _pkg_short_errors_supported=yes +else + _pkg_short_errors_supported=no +fi + if test $_pkg_short_errors_supported = yes; then + FFTWL_PKG_ERRORS=`$PKG_CONFIG --short-errors --errors-to-stdout --print-errors "fftw3l"` + else + FFTWL_PKG_ERRORS=`$PKG_CONFIG --errors-to-stdout --print-errors "fftw3l"` + fi + # Put the nasty error message in config.log where it belongs + echo "$FFTWL_PKG_ERRORS" >&5 + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + : +elif test $pkg_failed = untried; then + : +else + FFTWL_CFLAGS=$pkg_cv_FFTWL_CFLAGS + FFTWL_LIBS=$pkg_cv_FFTWL_LIBS + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + + +printf "%s\n" "#define STARPU_HAVE_FFTWL 1" >>confdefs.h + + HAVE_FFTWFL=1 + + have_fftwl=yes + +fi + if test x$have_fftwl = xyes; then + STARPU_HAVE_FFTWL_TRUE= + STARPU_HAVE_FFTWL_FALSE='#' +else + STARPU_HAVE_FFTWL_TRUE='#' + STARPU_HAVE_FFTWL_FALSE= +fi + + +if test x$enable_starpufft = xyes -a \( \( x$enable_cpu = xyes -a x$have_fftw = xyes -a x$have_fftwf = xyes \) -o x$have_cufftdoublecomplex = xyes \); then + fft_support=yes +fi + if test x$fft_support = xyes; then + STARPU_BUILD_STARPUFFT_TRUE= + STARPU_BUILD_STARPUFFT_FALSE='#' +else + STARPU_BUILD_STARPUFFT_TRUE='#' + STARPU_BUILD_STARPUFFT_FALSE= +fi + + +# Check whether --enable-starpufft-examples was given. +if test ${enable_starpufft_examples+y} +then : + enableval=$enable_starpufft_examples; enable_starpufft_examples=$enableval +else $as_nop + enable_starpufft_examples=no +fi + + if test x$enable_starpufft_examples = xyes; then + STARPU_BUILD_STARPUFFT_EXAMPLES_TRUE= + STARPU_BUILD_STARPUFFT_EXAMPLES_FALSE='#' +else + STARPU_BUILD_STARPUFFT_EXAMPLES_TRUE='#' + STARPU_BUILD_STARPUFFT_EXAMPLES_FALSE= +fi + + +########################################## +# hwloc # +########################################## + +have_valid_hwloc=no +SAVED_LIBS="${LIBS}" +SAVED_CPPFLAGS="${CPPFLAGS}" +SAVED_PKG_CONFIG_PATH="$PKG_CONFIG_PATH" + +# Check whether --with-hwloc was given. +if test ${with_hwloc+y} +then : + withval=$with_hwloc; + if test x$withval != xno; then + if test "$withval" = "yes" ; then + use_hwloc=yes + else + # use specified path + if test ! -d "$withval" ; then + as_fn_error $? "\"Directory specified for hwloc <$withval> does not exist\"" "$LINENO" 5 + fi + if test -d "$withval/lib64/pkgconfig" ; then + export PKG_CONFIG_PATH=$withval/lib64/pkgconfig:$PKG_CONFIG_PATH + else + if test -d "$withval/lib/pkgconfig" ; then + export PKG_CONFIG_PATH=$withval/lib/pkgconfig:$PKG_CONFIG_PATH + else + as_fn_error $? "\"Hwloc directory <$withval> does not have a subdirectory lib/pkgconfig or lib64/pkgconfig\"" "$LINENO" 5 + fi + fi + use_hwloc=yes + fi + else + use_hwloc=no + fi + +else $as_nop + + use_hwloc=maybe + +fi + +if test "$use_hwloc" != "no" +then : + +pkg_failed=no +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for HWLOC" >&5 +printf %s "checking for HWLOC... " >&6; } + +if test -n "$PKG_CONFIG"; then + if test -n "$HWLOC_CFLAGS"; then + pkg_cv_HWLOC_CFLAGS="$HWLOC_CFLAGS" + else + if test -n "$PKG_CONFIG" && \ + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"hwloc\""; } >&5 + ($PKG_CONFIG --exists --print-errors "hwloc") 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + pkg_cv_HWLOC_CFLAGS=`$PKG_CONFIG --cflags "hwloc" 2>/dev/null` +else + pkg_failed=yes +fi + fi +else + pkg_failed=untried +fi +if test -n "$PKG_CONFIG"; then + if test -n "$HWLOC_LIBS"; then + pkg_cv_HWLOC_LIBS="$HWLOC_LIBS" + else + if test -n "$PKG_CONFIG" && \ + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"hwloc\""; } >&5 + ($PKG_CONFIG --exists --print-errors "hwloc") 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + pkg_cv_HWLOC_LIBS=`$PKG_CONFIG --libs "hwloc" 2>/dev/null` +else + pkg_failed=yes +fi + fi +else + pkg_failed=untried +fi + + + +if test $pkg_failed = yes; then + +if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then + _pkg_short_errors_supported=yes +else + _pkg_short_errors_supported=no +fi + if test $_pkg_short_errors_supported = yes; then + HWLOC_PKG_ERRORS=`$PKG_CONFIG --short-errors --errors-to-stdout --print-errors "hwloc"` + else + HWLOC_PKG_ERRORS=`$PKG_CONFIG --errors-to-stdout --print-errors "hwloc"` + fi + # Put the nasty error message in config.log where it belongs + echo "$HWLOC_PKG_ERRORS" >&5 + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + have_valid_hwloc=no +elif test $pkg_failed = untried; then + have_valid_hwloc=no +else + HWLOC_CFLAGS=$pkg_cv_HWLOC_CFLAGS + HWLOC_LIBS=$pkg_cv_HWLOC_LIBS + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + have_valid_hwloc=yes +fi + +fi + if test "x$have_valid_hwloc" = "xyes"; then + STARPU_HAVE_HWLOC_TRUE= + STARPU_HAVE_HWLOC_FALSE='#' +else + STARPU_HAVE_HWLOC_TRUE='#' + STARPU_HAVE_HWLOC_FALSE= +fi + +# in case hwloc was explicitly required, but is not available, this is an error +if test "$use_hwloc" = "yes" -a "$have_valid_hwloc" = "no" +then : + as_fn_error $? "cannot find hwloc or pkg-config" "$LINENO" 5 + +fi +# in case hwloc is not available but was not explicitly disabled, this is an error +if test "$have_valid_hwloc" = "no" -a "$use_hwloc" != "no" +then : + as_fn_error $? "libhwloc or pkg-config was not found on your system. If the target machine is hyperthreaded the performance may be impacted a lot. It is strongly recommended to install libhwloc and pkg-config. However, if you really want to use StarPU without enabling libhwloc, please restart configure by specifying the option '--without-hwloc'." "$LINENO" 5 + +fi + +LIBS="${HWLOC_LIBS} ${SAVED_LIBS}" +CPPFLAGS="${HWLOC_CFLAGS} ${SAVED_CPPFLAGS}" + +if test "$have_valid_hwloc" = "yes" +then : + +printf "%s\n" "#define STARPU_HAVE_HWLOC 1" >>confdefs.h + + HWLOC_REQUIRES=hwloc + STARPU_HAVE_HWLOC=1 + + ac_fn_check_decl "$LINENO" "hwloc_cuda_get_device_osdev_by_index" "ac_cv_have_decl_hwloc_cuda_get_device_osdev_by_index" "#include +" "$ac_c_undeclared_builtin_options" "CFLAGS" +if test "x$ac_cv_have_decl_hwloc_cuda_get_device_osdev_by_index" = xyes +then : + ac_have_decl=1 +else $as_nop + ac_have_decl=0 +fi +printf "%s\n" "#define HAVE_DECL_HWLOC_CUDA_GET_DEVICE_OSDEV_BY_INDEX $ac_have_decl" >>confdefs.h + + ac_fn_check_decl "$LINENO" "hwloc_hip_get_device_osdev_by_index" "ac_cv_have_decl_hwloc_hip_get_device_osdev_by_index" "#include +" "$ac_c_undeclared_builtin_options" "CFLAGS" +if test "x$ac_cv_have_decl_hwloc_hip_get_device_osdev_by_index" = xyes +then : + ac_have_decl=1 +else $as_nop + ac_have_decl=0 +fi +printf "%s\n" "#define HAVE_DECL_HWLOC_HIP_GET_DEVICE_OSDEV_BY_INDEX $ac_have_decl" >>confdefs.h + + ac_fn_check_decl "$LINENO" "hwloc_distances_obj_pair_values" "ac_cv_have_decl_hwloc_distances_obj_pair_values" "#include +" "$ac_c_undeclared_builtin_options" "CFLAGS" +if test "x$ac_cv_have_decl_hwloc_distances_obj_pair_values" = xyes +then : + ac_have_decl=1 +else $as_nop + ac_have_decl=0 +fi +printf "%s\n" "#define HAVE_DECL_HWLOC_DISTANCES_OBJ_PAIR_VALUES $ac_have_decl" >>confdefs.h + + +fi + +ac_fn_c_check_func "$LINENO" "hwloc_topology_dup" "ac_cv_func_hwloc_topology_dup" +if test "x$ac_cv_func_hwloc_topology_dup" = xyes +then : + printf "%s\n" "#define HAVE_HWLOC_TOPOLOGY_DUP 1" >>confdefs.h + +fi + +ac_fn_c_check_func "$LINENO" "hwloc_topology_set_components" "ac_cv_func_hwloc_topology_set_components" +if test "x$ac_cv_func_hwloc_topology_set_components" = xyes +then : + printf "%s\n" "#define HAVE_HWLOC_TOPOLOGY_SET_COMPONENTS 1" >>confdefs.h + +fi + +ac_fn_c_check_func "$LINENO" "hwloc_cpukinds_get_nr" "ac_cv_func_hwloc_cpukinds_get_nr" +if test "x$ac_cv_func_hwloc_cpukinds_get_nr" = xyes +then : + printf "%s\n" "#define HAVE_HWLOC_CPUKINDS_GET_NR 1" >>confdefs.h + +fi + +ac_fn_c_check_func "$LINENO" "hwloc_get_area_memlocation" "ac_cv_func_hwloc_get_area_memlocation" +if test "x$ac_cv_func_hwloc_get_area_memlocation" = xyes +then : + printf "%s\n" "#define HAVE_HWLOC_GET_AREA_MEMLOCATION 1" >>confdefs.h + +fi + + if test $ac_cv_func_hwloc_topology_dup = yes; then + STARPU_HWLOC_HAVE_TOPOLOGY_DUP_TRUE= + STARPU_HWLOC_HAVE_TOPOLOGY_DUP_FALSE='#' +else + STARPU_HWLOC_HAVE_TOPOLOGY_DUP_TRUE='#' + STARPU_HWLOC_HAVE_TOPOLOGY_DUP_FALSE= +fi + + +LIBS="${SAVED_LIBS}" +CPPFLAGS="${SAVED_CPPFLAGS}" +export PKG_CONFIG_PATH=$SAVED_PKG_CONFIG_PATH + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether hwloc should be used" >&5 +printf %s "checking whether hwloc should be used... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $have_valid_hwloc" >&5 +printf "%s\n" "$have_valid_hwloc" >&6; } + + +# is the header file f77.h available ? +ac_fn_c_check_header_compile "$LINENO" "f77.h" "ac_cv_header_f77_h" "$ac_includes_default" +if test "x$ac_cv_header_f77_h" = xyes +then : + have_f77_h=yes +else $as_nop + have_f77_h=no +fi + +STARPU_HAVE_F77_H=$have_f77_h + + if test x$have_f77_h = xyes; then + STARPU_HAVE_F77_H_TRUE= + STARPU_HAVE_F77_H_FALSE='#' +else + STARPU_HAVE_F77_H_TRUE='#' + STARPU_HAVE_F77_H_FALSE= +fi + +if test x$have_f77_h = xyes; then + +printf "%s\n" "#define STARPU_HAVE_F77_H 1" >>confdefs.h + +fi + + +# Check whether --with-icc was given. +if test ${with_icc+y} +then : + withval=$with_icc; icc_path="$withval" +else $as_nop + icc_path="" +fi + + +# Check whether --with-icc-args was given. +if test ${with_icc_args+y} +then : + withval=$with_icc_args; icc_args=$withval +fi + +ICC_ARGS=$icc_args + +# Check whether --enable-icc was given. +if test ${enable_icc+y} +then : + enableval=$enable_icc; enable_icc=$enableval +else $as_nop + enable_icc=yes +fi + +ICC="" +if test "$enable_icc" = "yes" ; then + if test "$icc_path" != "" ; then + ICC="$icc_path" + else + # Check if icc is available + # Extract the first word of "icc", so it can be a program name with args. +set dummy icc; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_path_ICC+y} +then : + printf %s "(cached) " >&6 +else $as_nop + case $ICC in + [\\/]* | ?:[\\/]*) + ac_cv_path_ICC="$ICC" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_path_ICC="$as_dir$ac_word$ac_exec_ext" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + ;; +esac +fi +ICC=$ac_cv_path_ICC +if test -n "$ICC"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ICC" >&5 +printf "%s\n" "$ICC" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + + fi +fi +if test ! -x "$ICC"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: The ICC compiler '$ICC' does not have the execute permission" >&5 +printf "%s\n" "The ICC compiler '$ICC' does not have the execute permission" >&6; } + enable_icc=no + ICC="" +fi + +# If cuda and icc are both available, check they are compatible +if test "$enable_cuda" = "yes" -a "$ICC" != ""; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether CUDA and ICC are compatible" >&5 +printf %s "checking whether CUDA and ICC are compatible... " >&6; } + OLD_CC="$CC" + CC="$ICC" + OLD_CFLAGS="$CFLAGS" + CFLAGS="-I$PWD/include -I$srcdir/include" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + #include +int +main (void) +{ + + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } +else $as_nop + ICC="" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CC="$OLD_CC" + CFLAGS="$OLD_CFLAGS" +fi + +# Disable ICC on windows +if test "x$ICC" != "x" -a "$starpu_windows" = "yes" ; then + ICC="" +fi + +if test "x$ICC" != "x"; then + +printf "%s\n" "#define STARPU_HAVE_ICC 1" >>confdefs.h + +fi + if test "x$ICC" != "x"; then + STARPU_HAVE_ICC_TRUE= + STARPU_HAVE_ICC_FALSE='#' +else + STARPU_HAVE_ICC_TRUE='#' + STARPU_HAVE_ICC_FALSE= +fi + + +# Do not generate manpages for the tools if we do not have help2man +for ac_prog in help2man +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_HELP2MAN+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$HELP2MAN"; then + ac_cv_prog_HELP2MAN="$HELP2MAN" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_HELP2MAN="$ac_prog" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +HELP2MAN=$ac_cv_prog_HELP2MAN +if test -n "$HELP2MAN"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $HELP2MAN" >&5 +printf "%s\n" "$HELP2MAN" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + + test -n "$HELP2MAN" && break +done + +# Disable on windows +if test "$starpu_windows" = "yes" ; then + HELP2MAN="" +fi + if test "x$HELP2MAN" != "x"; then + STARPU_HAVE_HELP2MAN_TRUE= + STARPU_HAVE_HELP2MAN_FALSE='#' +else + STARPU_HAVE_HELP2MAN_TRUE='#' + STARPU_HAVE_HELP2MAN_FALSE= +fi + + +ac_fn_c_check_member "$LINENO" "struct cudaDeviceProp" "pciDomainID" "ac_cv_member_struct_cudaDeviceProp_pciDomainID" "#include +" +if test "x$ac_cv_member_struct_cudaDeviceProp_pciDomainID" = xyes +then : + +printf "%s\n" "#define STARPU_HAVE_DOMAINID 1" >>confdefs.h + +fi + + +ac_fn_c_check_member "$LINENO" "struct cudaDeviceProp" "pciBusID" "ac_cv_member_struct_cudaDeviceProp_pciBusID" "#include +" +if test "x$ac_cv_member_struct_cudaDeviceProp_pciBusID" = xyes +then : + +printf "%s\n" "#define STARPU_HAVE_BUSID 1" >>confdefs.h + +fi + + + if true; then + STARPU_HAVE_AM111_TRUE= + STARPU_HAVE_AM111_FALSE='#' +else + STARPU_HAVE_AM111_TRUE='#' + STARPU_HAVE_AM111_FALSE= +fi + + +########################################## +# Resource Manager # +########################################## + +starpurm_support=no +starpurm_dlb_support=no + +# Check whether --enable-starpurm was given. +if test ${enable_starpurm+y} +then : + enableval=$enable_starpurm; enable_starpurm=$enableval +else $as_nop + enable_starpurm=no +fi + +if test "x$enable_starpurm" != xno +then + starpurm_support=yes + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether resource management debug messages should be displayed" >&5 +printf %s "checking whether resource management debug messages should be displayed... " >&6; } + # Check whether --enable-starpurm-verbose was given. +if test ${enable_starpurm_verbose+y} +then : + enableval=$enable_starpurm_verbose; enable_starpurm_verbose=$enableval +else $as_nop + enable_starpurm_verbose=no +fi + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_starpurm_verbose" >&5 +printf "%s\n" "$enable_starpurm_verbose" >&6; } + if test x$enable_starpurm_verbose = xyes; then + +printf "%s\n" "#define STARPURM_VERBOSE 1" >>confdefs.h + + fi + + # DLB + DLB_CFLAGS="" + DLB_LIBS="" + # Check whether --enable-dlb was given. +if test ${enable_dlb+y} +then : + enableval=$enable_dlb; enable_dlb=$enableval +else $as_nop + enable_dlb=no +fi + + if test "x$enable_dlb" != xno + then + +# Check whether --with-dlb-include-dir was given. +if test ${with_dlb_include_dir+y} +then : + withval=$with_dlb_include_dir; dlb_inc_dirs="$withval" +else $as_nop + dlb_inc_dirs="" +fi + + + dlb_inc_dirs="${dlb_inc_dirs} /usr/include/dlb" + + dlb_incdir_found=no + for dlb_incdir in $dlb_inc_dirs + do + if test -n "$dlb_incdir" + then + SAVED_CPPFLAGS="${CPPFLAGS}" + CPPFLAGS=-I${dlb_incdir} + ac_fn_c_check_header_compile "$LINENO" "dlb.h" "ac_cv_header_dlb_h" "$ac_includes_default" +if test "x$ac_cv_header_dlb_h" = xyes +then : + printf "%s\n" "#define HAVE_DLB_H 1" >>confdefs.h + +fi + + if test "$ac_cv_header_dlb_h" = "yes" + then + CPPFLAGS="-I$dlb_incdir ${SAVED_CPPFLAGS}" + DLB_CFLAGS="-I${dlb_incdir}" + dlb_incdir_found=yes + break + else + CPPFLAGS=${SAVED_CPPFLAGS} + fi + unset ac_cv_header_dlb_h + fi + done + + +# Check whether --with-dlb-lib-dir was given. +if test ${with_dlb_lib_dir+y} +then : + withval=$with_dlb_lib_dir; dlb_lib_dirs="$withval" +else $as_nop + dlb_lib_dirs="" +fi + + + dlb_lib_dirs="${dlb_lib_dirs} /usr/lib/dlb" + + dlb_libdir_found=no + for dlb_libdir in $dlb_lib_dirs + do + if test -n "$dlb_libdir" + then + SAVED_LDFLAGS="${LDFLAGS}" + LDFLAGS=-L${dlb_libdir} + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for DLB_Init in -ldlb" >&5 +printf %s "checking for DLB_Init in -ldlb... " >&6; } +if test ${ac_cv_lib_dlb_DLB_Init+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-ldlb $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +char DLB_Init (); +int +main (void) +{ +return DLB_Init (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_dlb_DLB_Init=yes +else $as_nop + ac_cv_lib_dlb_DLB_Init=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_dlb_DLB_Init" >&5 +printf "%s\n" "$ac_cv_lib_dlb_DLB_Init" >&6; } +if test "x$ac_cv_lib_dlb_DLB_Init" = xyes +then : + printf "%s\n" "#define HAVE_LIBDLB 1" >>confdefs.h + + LIBS="-ldlb $LIBS" + +fi + + if test "$ac_cv_lib_dlb_DLB_Init" = "yes" + then + LDFLAGS="-L${dlb_libdir} ${SAVED_LDFLAGS} ${STARPU_DLB_LDFLAGS}" + DLB_LIBS="-L${dlb_libdir} -ldlb" + dlb_libdir_found=yes + break + else + LDFLAGS=${SAVED_LDFLAGS} + fi + unset ac_cv_lib_dlb_DLB_Init + fi + done + + SAVED_CPPFLAGS="${CPPFLAGS}" + SAVED_CFLAGS="${CFLAGS}" + SAVED_LDFLAGS="${LDFLAGS}" + CPPFLAGS="$HWLOC_CPPFLAGS -D_GNU_SOURCE $CPPFLAGS" + CFLAGS="$HWLOC_CFLAGS $CFLAGS" + LIBS="$HWLOC_LIBS $LIBS" + # check whether libhwloc has a dedicated glibc-sched.h include for conversion with glibc cpusets + ac_fn_c_check_header_compile "$LINENO" "hwloc/glibc-sched.h" "ac_cv_header_hwloc_glibc_sched_h" "$ac_includes_default" +if test "x$ac_cv_header_hwloc_glibc_sched_h" = xyes +then : + printf "%s\n" "#define HAVE_HWLOC_GLIBC_SCHED_H 1" >>confdefs.h + +fi + + CPPFLAGS="$SAVED_CPPFLAGS" + CFLAGS="$SAVED_CFLAGS" + LIBS="$SAVED_LIBS" + + SAVED_CPPFLAGS="${CPPFLAGS}" + SAVED_CFLAGS="${CFLAGS}" + SAVED_LDFLAGS="${LDFLAGS}" + CPPFLAGS="$STARPU_CPPFLAGS $CPPFLAGS" + CFLAGS="$STARPU_CFLAGS $CFLAGS" + LIBS="$STARPU_LIBS $LIBS" + # check if StarPU implements starpu_worker_set_going_to_sleep_callback() + if test x$enable_worker_cb = xyes ; then + +printf "%s\n" "#define STARPURM_STARPU_HAVE_WORKER_CALLBACKS 1" >>confdefs.h + + fi + + #AC_CHECK_FUNC([starpu_worker_set_going_to_sleep_callback],AC_DEFINE([STARPURM_STARPU_HAVE_WORKER_CALLBACKS], [1], [Define to 1 if StarPU has support for worker callbacks.])) + CPPFLAGS="$SAVED_CPPFLAGS" + CFLAGS="$SAVED_CFLAGS" + LIBS="$SAVED_LIBS" + + if test "x$dlb_incdir_found" != "xyes" -o "x$dlb_libdir_found" != "xyes" + then + enable_dlb=no + fi + fi + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether DLB support should be enabled" >&5 +printf %s "checking whether DLB support should be enabled... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_dlb" >&5 +printf "%s\n" "$enable_dlb" >&6; } + if test "x$enable_dlb" != "xno" + then + +printf "%s\n" "#define STARPURM_HAVE_DLB 1" >>confdefs.h + + starpurm_dlb_support=yes + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether DLB resource management debug messages should be displayed" >&5 +printf %s "checking whether DLB resource management debug messages should be displayed... " >&6; } + # Check whether --enable-starpurm-dlb-verbose was given. +if test ${enable_starpurm_dlb_verbose+y} +then : + enableval=$enable_starpurm_dlb_verbose; enable_starpurm_dlb_verbose=$enableval +else $as_nop + enable_starpurm_dlb_verbose=no +fi + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_starpurm_dlb_verbose" >&5 +printf "%s\n" "$enable_starpurm_dlb_verbose" >&6; } + if test x$enable_starpurm_dlb_verbose = xyes; then + +printf "%s\n" "#define STARPURM_DLB_VERBOSE 1" >>confdefs.h + + fi + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether DLB callbacks expect an user argument" >&5 +printf %s "checking whether DLB callbacks expect an user argument... " >&6; } + if test ${ac_cv_dlb_callback_arg+y} +then : + printf %s "(cached) " >&6 +else $as_nop + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +dlb_handler_t dlb_handle; +void _dlb_callback_disable_cpu(int cpuid, void *arg) { + (void)cpuid; + (void)arg; +} +void f(void) { +(void)DLB_CallbackSet_sp(dlb_handle, dlb_callback_disable_cpu, (dlb_callback_t)_dlb_callback_disable_cpu, 0); +} + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ac_cv_dlb_callback_arg=yes +else $as_nop + ac_cv_dlb_callback_arg=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + +fi + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_dlb_callback_arg" >&5 +printf "%s\n" "$ac_cv_dlb_callback_arg" >&6; } + if test $ac_cv_dlb_callback_arg = yes; then + +printf "%s\n" "#define STARPURM_HAVE_DLB_CALLBACK_ARG 1" >>confdefs.h + + fi + + fi + + +fi + if test x$starpurm_dlb_support = "xyes"; then + STARPURM_HAVE_DLB_TRUE= + STARPURM_HAVE_DLB_FALSE='#' +else + STARPURM_HAVE_DLB_TRUE='#' + STARPURM_HAVE_DLB_FALSE= +fi + + if test x$starpurm_support = xyes; then + STARPU_BUILD_STARPURM_TRUE= + STARPU_BUILD_STARPURM_FALSE='#' +else + STARPU_BUILD_STARPURM_TRUE='#' + STARPU_BUILD_STARPURM_FALSE= +fi + + +# Check whether --enable-starpurm-examples was given. +if test ${enable_starpurm_examples+y} +then : + enableval=$enable_starpurm_examples; enable_starpurm_examples=$enableval +else $as_nop + enable_starpurm_examples=no +fi + + if test x$enable_starpurm_examples = xyes; then + STARPU_BUILD_STARPURM_EXAMPLES_TRUE= + STARPU_BUILD_STARPURM_EXAMPLES_FALSE='#' +else + STARPU_BUILD_STARPURM_EXAMPLES_TRUE='#' + STARPU_BUILD_STARPURM_EXAMPLES_FALSE= +fi + + +##################################### +# StarPUPy # +##################################### + +starpupy_support=no + +if test "x$enable_starpupy" != xno +then + for ac_prog in python3 +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_PYTHON+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$PYTHON"; then + ac_cv_prog_PYTHON="$PYTHON" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_PYTHON="$ac_prog" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +PYTHON=$ac_cv_prog_PYTHON +if test -n "$PYTHON"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $PYTHON" >&5 +printf "%s\n" "$PYTHON" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + + test -n "$PYTHON" && break +done + + if test "$ac_cv_prog_PYTHON" = "" + then + if test "x$enable_starpupy" = xyes ; then + as_fn_error $? "python3 missing, cannot build StarPU python interface" "$LINENO" 5 + else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: python3 missing, cannot build StarPU python interface" >&5 +printf "%s\n" "$as_me: WARNING: python3 missing, cannot build StarPU python interface" >&2;} + enable_starpupy=no + fi + fi +fi +if test "x$enable_starpupy" != xno +then + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for python3 version" >&5 +printf %s "checking for python3 version... " >&6; } + PYTHON_VERSION=$(echo "import sys ; print(str(sys.version_info.major)+\".\"+str(sys.version_info.minor))" | $PYTHON) + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $PYTHON_VERSION" >&5 +printf "%s\n" "$PYTHON_VERSION" >&6; } + + PYTHON_INCLUDE_DIRS="`$PYTHON -c "from sysconfig import get_paths as gp; print(gp()['include'])"`" + SAVED_CPPFLAGS="${CPPFLAGS}" + CPPFLAGS="$CPPFLAGS -I$PYTHON_INCLUDE_DIRS" + for ac_header in Python.h +do : + ac_fn_c_check_header_compile "$LINENO" "Python.h" "ac_cv_header_Python_h" "$ac_includes_default" +if test "x$ac_cv_header_Python_h" = xyes +then : + printf "%s\n" "#define HAVE_PYTHON_H 1" >>confdefs.h + have_python_h=yes +else $as_nop + have_python_h=no +fi + +done + if test "$have_python_h" = "no" ; then + if test "x$enable_starpupy" = xyes ; then + as_fn_error $? "Python.h missing, cannot build StarPU python interface (consider installing python-dev)" "$LINENO" 5 + else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: Python.h missing, cannot build StarPU python interface (consider installing python-dev)" >&5 +printf "%s\n" "$as_me: WARNING: Python.h missing, cannot build StarPU python interface (consider installing python-dev)" >&2;} + enable_starpupy=no + fi + fi +fi +if test "x$enable_starpupy" != xno +then + as_ac_Lib=`printf "%s\n" "ac_cv_lib_python$PYTHON_VERSION""_PyErr_Print" | $as_tr_sh` +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for PyErr_Print in -lpython$PYTHON_VERSION" >&5 +printf %s "checking for PyErr_Print in -lpython$PYTHON_VERSION... " >&6; } +if eval test \${$as_ac_Lib+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-lpython$PYTHON_VERSION $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +char PyErr_Print (); +int +main (void) +{ +return PyErr_Print (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + eval "$as_ac_Lib=yes" +else $as_nop + eval "$as_ac_Lib=no" +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +eval ac_res=\$$as_ac_Lib + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +printf "%s\n" "$ac_res" >&6; } +if eval test \"x\$"$as_ac_Lib"\" = x"yes" +then : + have_python_lib=yes +else $as_nop + have_python_lib=no +fi + + if test "$have_python_lib" = "no" ; then + if test "x$enable_starpupy" = xyes ; then + as_fn_error $? "Python library missing, cannot build StarPU python interface (consider installing python-dev)" "$LINENO" 5 + else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: Python library missing, cannot build StarPU python interface (consider installing python-dev)" >&5 +printf "%s\n" "$as_me: WARNING: Python library missing, cannot build StarPU python interface (consider installing python-dev)" >&2;} + enable_starpupy=no + fi + fi +fi +if test "x$enable_starpupy" != xno +then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for python3 setuptools" >&5 +printf %s "checking for python3 setuptools... " >&6; } + if $PYTHON -c "import setuptools" ; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + if test "x$enable_starpupy" = xyes ; then + as_fn_error $? "setuptools missing, cannot install StarPU python interface (consider installing python-setuptools)" "$LINENO" 5 + else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: setuptools missing, cannot install StarPU python interface (consider installing python-setuptools)" >&5 +printf "%s\n" "$as_me: WARNING: setuptools missing, cannot install StarPU python interface (consider installing python-setuptools)" >&2;} + enable_starpupy=no + fi + fi +fi + +if test "x$enable_starpupy" != xno +then + CPPFLAGS=${SAVED_CPPFLAGS} + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for python3 module joblib" >&5 +printf %s "checking for python3 module joblib... " >&6; } + + echo "import joblib" | $PYTHON - 2>/dev/null + if test $? -ne 0 ; then + joblib_avail=no + else + joblib_avail=yes + fi + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $joblib_avail" >&5 +printf "%s\n" "$joblib_avail" >&6; } + if test "$joblib_avail" = "yes" ; then + +printf "%s\n" "#define STARPU_PYTHON_HAVE_JOBLIB 1" >>confdefs.h + + else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: python3 module joblib missing, cannot build full StarPU python interface (consider running 'pip3 install joblib')" >&5 +printf "%s\n" "$as_me: WARNING: python3 module joblib missing, cannot build full StarPU python interface (consider running 'pip3 install joblib')" >&2;} + fi + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for python3 module cloudpickle" >&5 +printf %s "checking for python3 module cloudpickle... " >&6; } + + echo "import cloudpickle" | $PYTHON - 2>/dev/null + if test $? -ne 0 ; then + cloudpickle_avail=no + else + cloudpickle_avail=yes + fi + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $cloudpickle_avail" >&5 +printf "%s\n" "$cloudpickle_avail" >&6; } + if test "$cloudpickle_avail" = "yes" ; then + +printf "%s\n" "#define STARPU_PYTHON_HAVE_CLOUDPICKLE 1" >>confdefs.h + + else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: python3 module cloudpickle missing, cannot build full StarPU python interface (consider running 'pip3 install cloudpickle')" >&5 +printf "%s\n" "$as_me: WARNING: python3 module cloudpickle missing, cannot build full StarPU python interface (consider running 'pip3 install cloudpickle')" >&2;} + fi + + starpupy_support=yes + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for python3 module numpy" >&5 +printf %s "checking for python3 module numpy... " >&6; } + + echo "import numpy" | $PYTHON - 2>/dev/null + if test $? -ne 0 ; then + numpy_avail=no + else + numpy_avail=yes + fi + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $numpy_avail" >&5 +printf "%s\n" "$numpy_avail" >&6; } + PYTHON_NUMPY_DIR="" + if test "$numpy_avail" = "yes" ; then + +printf "%s\n" "#define STARPU_PYTHON_HAVE_NUMPY 1" >>confdefs.h + + PYTHON_NUMPY_DIR="`$PYTHON -c "import numpy ; print(numpy.get_include())"`" + fi + + PYTHON_SETUP_OPTIONS="" + if test x$enable_debug = xyes ; then + PYTHON_SETUP_OPTIONS="--debug" + fi + +fi + if test x$starpupy_support = xyes; then + STARPU_BUILD_STARPUPY_TRUE= + STARPU_BUILD_STARPUPY_FALSE='#' +else + STARPU_BUILD_STARPUPY_TRUE='#' + STARPU_BUILD_STARPUPY_FALSE= +fi + + if test x$numpy_avail = xyes; then + STARPU_STARPUPY_NUMPY_TRUE= + STARPU_STARPUPY_NUMPY_FALSE='#' +else + STARPU_STARPUPY_NUMPY_TRUE='#' + STARPU_STARPUPY_NUMPY_FALSE= +fi + + + + +########################################## +# Documentation # +########################################## + +def_enable_build_doc="yes" +available_doc="no" +if test -d "$srcdir/doc/doxygen/html" ; then + def_enable_build_doc="no" + available_doc="yes" +fi +if test "$starpu_darwin" = "yes" ; then + def_enable_build_doc="no" +fi + +# Check whether --enable-build-doc was given. +if test ${enable_build_doc+y} +then : + enableval=$enable_build_doc; enable_build_doc=$enableval +else $as_nop + enable_build_doc=$def_enable_build_doc +fi + + +# Check whether --enable-build-doc-pdf was given. +if test ${enable_build_doc_pdf+y} +then : + enableval=$enable_build_doc_pdf; enable_build_doc_pdf=$enableval +else $as_nop + enable_build_doc_pdf=no +fi + + +available_doc_pdf="no" +if test -f "$srcdir/doc/doxygen/starpu.pdf" ; then + enable_build_doc_pdf="no" + available_doc_pdf="yes" +fi + +# Check whether doxygen needed tools are installed +# Extract the first word of "doxygen", so it can be a program name with args. +set dummy doxygen; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_path_doxygencommand+y} +then : + printf %s "(cached) " >&6 +else $as_nop + case $doxygencommand in + [\\/]* | ?:[\\/]*) + ac_cv_path_doxygencommand="$doxygencommand" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_path_doxygencommand="$as_dir$ac_word$ac_exec_ext" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + ;; +esac +fi +doxygencommand=$ac_cv_path_doxygencommand +if test -n "$doxygencommand"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $doxygencommand" >&5 +printf "%s\n" "$doxygencommand" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + +if test "$doxygencommand" = "" ; then + if test "$enable_build_doc_pdf" = "yes" ; then + as_fn_error $? "doxygen missing, cannot build documentation PDF" "$LINENO" 5 + fi + enable_build_doc="no" + enable_build_doc_pdf="no" +fi +# Extract the first word of "pdflatex", so it can be a program name with args. +set dummy pdflatex; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_path_pdflatexcommand+y} +then : + printf %s "(cached) " >&6 +else $as_nop + case $pdflatexcommand in + [\\/]* | ?:[\\/]*) + ac_cv_path_pdflatexcommand="$pdflatexcommand" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_path_pdflatexcommand="$as_dir$ac_word$ac_exec_ext" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + ;; +esac +fi +pdflatexcommand=$ac_cv_path_pdflatexcommand +if test -n "$pdflatexcommand"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $pdflatexcommand" >&5 +printf "%s\n" "$pdflatexcommand" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + +if test "$pdflatexcommand" = "" ; then + if test "$enable_build_doc_pdf" = "yes" ; then + as_fn_error $? "pdflatex missing, cannot build documentation PDF" "$LINENO" 5 + fi + enable_build_doc_pdf="no" +fi +# Extract the first word of "epstopdf", so it can be a program name with args. +set dummy epstopdf; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_path_epstopdfcommand+y} +then : + printf %s "(cached) " >&6 +else $as_nop + case $epstopdfcommand in + [\\/]* | ?:[\\/]*) + ac_cv_path_epstopdfcommand="$epstopdfcommand" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_path_epstopdfcommand="$as_dir$ac_word$ac_exec_ext" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + ;; +esac +fi +epstopdfcommand=$ac_cv_path_epstopdfcommand +if test -n "$epstopdfcommand"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $epstopdfcommand" >&5 +printf "%s\n" "$epstopdfcommand" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + +if test "$epstopdfcommand" = "" ; then + if test "$enable_build_doc_pdf" = "yes" ; then + as_fn_error $? "epstopdf missing, cannot build documentation PDF" "$LINENO" 5 + fi + enable_build_doc_pdf="no" +fi + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether HTML documentation should be compiled" >&5 +printf %s "checking whether HTML documentation should be compiled... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_build_doc" >&5 +printf "%s\n" "$enable_build_doc" >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether HTML documentation is available" >&5 +printf %s "checking whether HTML documentation is available... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $available_doc" >&5 +printf "%s\n" "$available_doc" >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether PDF documentation should be compiled" >&5 +printf %s "checking whether PDF documentation should be compiled... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_build_doc_pdf" >&5 +printf "%s\n" "$enable_build_doc_pdf" >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether PDF documentation is available" >&5 +printf %s "checking whether PDF documentation is available... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $available_doc_pdf" >&5 +printf "%s\n" "$available_doc_pdf" >&6; } + + if test x$enable_build_doc != xno; then + STARPU_BUILD_DOC_TRUE= + STARPU_BUILD_DOC_FALSE='#' +else + STARPU_BUILD_DOC_TRUE='#' + STARPU_BUILD_DOC_FALSE= +fi + + if test x$available_doc != xno; then + STARPU_AVAILABLE_DOC_TRUE= + STARPU_AVAILABLE_DOC_FALSE='#' +else + STARPU_AVAILABLE_DOC_TRUE='#' + STARPU_AVAILABLE_DOC_FALSE= +fi + + + if test x$enable_build_doc_pdf != xno; then + STARPU_BUILD_DOC_PDF_TRUE= + STARPU_BUILD_DOC_PDF_FALSE='#' +else + STARPU_BUILD_DOC_PDF_TRUE='#' + STARPU_BUILD_DOC_PDF_FALSE= +fi + + if test x$available_doc_pdf != xno; then + STARPU_AVAILABLE_DOC_PDF_TRUE= + STARPU_AVAILABLE_DOC_PDF_FALSE='#' +else + STARPU_AVAILABLE_DOC_PDF_TRUE='#' + STARPU_AVAILABLE_DOC_PDF_FALSE= +fi + + +if test x$enable_build_doc_pdf != xno ; then + DOC_GENERATE_LATEX=YES +else + DOC_GENERATE_LATEX=NO +fi + + +############################################################################### +# # +# Julia # +# # +############################################################################### +# Check whether --enable-julia was given. +if test ${enable_julia+y} +then : + enableval=$enable_julia; enable_julia=$enableval +else $as_nop + enable_julia=no +fi + +if test "$enable_julia" = "yes" ; then + # Check whether the julia compiler is available + # Extract the first word of "julia", so it can be a program name with args. +set dummy julia; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_path_juliapath+y} +then : + printf %s "(cached) " >&6 +else $as_nop + case $juliapath in + [\\/]* | ?:[\\/]*) + ac_cv_path_juliapath="$juliapath" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_path_juliapath="$as_dir$ac_word$ac_exec_ext" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + ;; +esac +fi +juliapath=$ac_cv_path_juliapath +if test -n "$juliapath"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $juliapath" >&5 +printf "%s\n" "$juliapath" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether julia is available" >&5 +printf %s "checking whether julia is available... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $juliapath" >&5 +printf "%s\n" "$juliapath" >&6; } + if test ! -x "$juliapath" ; then + as_fn_error $? "Julia compiler '$juliapath' is not valid" "$LINENO" 5 + enable_julia=no + fi +fi + if test "x$enable_julia" = "xyes"; then + STARPU_USE_JULIA_TRUE= + STARPU_USE_JULIA_FALSE='#' +else + STARPU_USE_JULIA_TRUE='#' + STARPU_USE_JULIA_FALSE= +fi + +JULIA=$juliapath + + +############################################################################### +# # +# Eclipse Plugin # +# # +############################################################################### +# Check whether --enable-eclipse-plugin was given. +if test ${enable_eclipse_plugin+y} +then : + enableval=$enable_eclipse_plugin; enable_eclipse_plugin=$enableval +else $as_nop + enable_eclipse_plugin=no +fi + +if test "$enable_eclipse_plugin" = "yes" ; then + # Extract the first word of "eclipse", so it can be a program name with args. +set dummy eclipse; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_path_eclipsepath+y} +then : + printf %s "(cached) " >&6 +else $as_nop + case $eclipsepath in + [\\/]* | ?:[\\/]*) + ac_cv_path_eclipsepath="$eclipsepath" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_path_eclipsepath="$as_dir$ac_word$ac_exec_ext" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + ;; +esac +fi +eclipsepath=$ac_cv_path_eclipsepath +if test -n "$eclipsepath"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $eclipsepath" >&5 +printf "%s\n" "$eclipsepath" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether eclipse is available" >&5 +printf %s "checking whether eclipse is available... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $eclipsepath" >&5 +printf "%s\n" "$eclipsepath" >&6; } + if test ! -x "$eclipsepath" ; then + as_fn_error $? "Eclipse executable '$eclipsepath' is not valid" "$LINENO" 5 + enable_eclipse_plugin=no + fi + + libs=$(for x in starpu-$STARPU_EFFECTIVE_VERSION $(echo $STARPU_EXPORTED_LIBS | sed 's/-l//g') $HWLOC_REQUIRES ; do echo $x ; done) + option_libs=$($srcdir/eclipse-plugin/tools/cproject.sh option $libs) + module_libs=$($srcdir/eclipse-plugin/tools/cproject.sh module $libs) +fi + + if test "x$enable_eclipse_plugin" = "xyes"; then + STARPU_BUILD_ECLIPSE_PLUGIN_TRUE= + STARPU_BUILD_ECLIPSE_PLUGIN_FALSE='#' +else + STARPU_BUILD_ECLIPSE_PLUGIN_TRUE='#' + STARPU_BUILD_ECLIPSE_PLUGIN_FALSE= +fi + +ECLIPSE=$eclipsepath + +STARPU_INCLUDE_PATH=$(eval echo ${includedir}/starpu/$STARPU_EFFECTIVE_VERSION) + +STARPU_LIB_PATH=$(eval echo ${prefix}/lib) + +STARPU_MODULE_LIBS="$module_libs" + +STARPU_OPTION_LIBS="$option_libs" + + +############################################################################### +# # +# Final settings # +# # +############################################################################### + +if test x$enable_simgrid = xyes -a \( x$enable_cuda0 = xyes -o x$enable_cuda1 = xyes \) ; then + as_fn_error $? "Cuda0 not supported with simgrid" "$LINENO" 5 +fi + +if test x$enable_opencl = xyes -a \( x$enable_cuda0 = xyes -o x$enable_cuda1 = xyes \) ; then + as_fn_error $? "Cuda0 not supported with OpenCL" "$LINENO" 5 +fi + +if test x$enable_openmp = xyes -a \( x$enable_cuda0 = xyes -o x$enable_cuda1 = xyes \) ; then + as_fn_error $? "Cuda0 not supported with OpenMP" "$LINENO" 5 +fi + + +CPPFLAGS="$CPPFLAGS -DSTARPU_SAMPLING_DIR=\"\\\"${datarootdir}/starpu/perfmodels/sampling\\\"\"" +STARPU_BASIC_H_CPPFLAGS="$HWLOC_CFLAGS $STARPU_CUDA_CPPFLAGS $STARPU_HIP_CPPFLAGS $STARPU_OPENCL_CPPFLAGS $STARPU_MAX_FPGA_CPPFLAGS $SIMGRID_CFLAGS $PAPI_CFLAGS" + +# these are the flags needed to compile starpu.h +STARPU_H_CPPFLAGS="$STARPU_BASIC_H_CPPFLAGS" + + +STARPU_NVCC_H_CPPFLAGS="$STARPU_BASIC_H_CPPFLAGS" + + +# these are the flags needed for linking libstarpu (and thus also for static linking) +LIBSTARPU_LDFLAGS="$STARPU_OPENCL_LDFLAGS $STARPU_CUDA_LDFLAGS $STARPU_HIP_LDFLAGS $HWLOC_LIBS $FXT_LDFLAGS $FXT_LIBS $PAPI_LIBS $STARPU_GLPK_LDFLAGS $STARPU_LEVELDB_LDFLAGS $SIMGRID_LDFLAGS $STARPU_BLAS_LDFLAGS $DGELS_LIBS $STARPU_MAX_FPGA_LDFLAGS $STARPU_DLOPEN_LDFLAGS" + + +# these are the flags needed for linking against libstarpu (because starpu.h makes its includer use pthread_*, simgrid, etc.) +if test "x$enable_shared" = xno; then + # No .so, so application will unexpectedly have to know which -l to + # use. Give them in .pc file. + +printf "%s\n" "#define STARPU_STATIC_ONLY 1" >>confdefs.h + + STARPU_EXPORTED_LIBS="$STARPU_EXPORTED_LIBS $LDFLAGS $LIBS $LIBSTARPU_LDFLAGS" +fi + + +STARPUPY_EXTRA_LINK_ARGS="" +if test "x$enable_starpupy" != xno +then + if test "x$OPENMP_CFLAGS" != "x" + then + STARPUPY_EXTRA_LINK_ARGS="$STARPUPY_EXTRA_LINK_ARGS '$OPENMP_CFLAGS', " + fi + for flag in $STARPU_EXPORTED_LIBS + do + STARPUPY_EXTRA_LINK_ARGS="$STARPUPY_EXTRA_LINK_ARGS '$flag', " + done + if test x$enable_coverage = xyes; then + STARPUPY_EXTRA_LINK_ARGS="$STARPUPY_EXTRA_LINK_ARGS '-lgcov', " + fi +fi + + +LIBSTARPU_LINK=libstarpu-$STARPU_EFFECTIVE_VERSION.la +LIBSTARPU_LINK="$LIBSTARPU_LINK $STARPU_EXPORTED_LIBS" + + +# File configuration +ac_config_commands="$ac_config_commands executable-scripts" + + +# Create links to ICD files in build/socl/vendors directory. SOCL will use this +# directory as the OCL_ICD_VENDORS directory +SOCL_VENDORS="vendors/install/socl.icd" +for icd in /etc/OpenCL/vendors/*.icd ; do + if test -f $icd ; then + if test "$(basename $icd)" != "socl.icd" ; then + new_icd=$(basename $icd) + ac_config_links="$ac_config_links socl/vendors/$new_icd:$icd" + + SOCL_VENDORS="$SOCL_VENDORS vendors/$new_icd" + fi + fi +done + + +ac_config_files="$ac_config_files tests/regression/regression.sh tests/regression/profiles tests/regression/profiles.build.only" + +ac_config_headers="$ac_config_headers src/common/config.h src/common/config-src-build.h include/starpu_config.h starpurm/include/starpurm_config.h" + + +SANITIZE=$(echo $CFLAGS | grep sanitize) + if test -n "$SANITIZE"; then + STARPU_SANITIZE_TRUE= + STARPU_SANITIZE_FALSE='#' +else + STARPU_SANITIZE_TRUE='#' + STARPU_SANITIZE_FALSE= +fi + + +ac_config_files="$ac_config_files Makefile src/Makefile tools/Makefile tools/starpu_env tools/starpu_codelet_profile tools/starpu_codelet_histo_profile tools/starpu_mpi_comm_matrix.py tools/starpu_fxt_number_events_to_names.py tools/starpu_workers_activity tools/starpu_paje_draw_histogram tools/starpu_paje_state_stats tools/starpu_paje_summary tools/starpu_config tools/starpu_mlr_analysis tools/starpu_paje_sort tools/starpu_smpirun tools/starpu_tcpipexec socl/Makefile socl/src/Makefile socl/examples/Makefile socl/vendors/socl.icd socl/vendors/install/socl.icd packages/libstarpu.pc packages/starpu-1.0.pc packages/starpu-1.1.pc packages/starpu-1.2.pc packages/starpu-1.3.pc packages/starpu-1.4.pc packages/starpu-1.3 packages/starpu-1.4 mpi/packages/libstarpumpi.pc mpi/packages/starpumpi-1.0.pc mpi/packages/starpumpi-1.1.pc mpi/packages/starpumpi-1.2.pc mpi/packages/starpumpi-1.3.pc mpi/packages/starpumpi-1.4.pc starpufft/Makefile starpufft/src/Makefile starpufft/tests/Makefile starpufft/packages/libstarpufft.pc starpufft/packages/starpufft-1.0.pc starpufft/packages/starpufft-1.1.pc starpufft/packages/starpufft-1.2.pc starpufft/packages/starpufft-1.3.pc starpufft/packages/starpufft-1.4.pc starpurm/Makefile starpurm/src/Makefile starpurm/tests/Makefile starpurm/examples/Makefile starpurm/packages/starpurm-1.3.pc starpurm/packages/starpurm-1.4.pc starpu_openmp_llvm/Makefile starpu_openmp_llvm/src/Makefile starpu_openmp_llvm/examples/Makefile starpupy/src/setup.cfg starpupy/src/setup.py starpupy/Makefile starpupy/src/Makefile starpupy/examples/Makefile starpupy/execute.sh starpupy/benchmark/Makefile examples/Makefile examples/stencil/Makefile tests/Makefile tests/model-checking/Makefile tests/model-checking/starpu-mc.sh mpi/Makefile mpi/src/Makefile mpi/tests/Makefile mpi/examples/Makefile mpi/tools/Makefile mpi/GNUmakefile sc_hypervisor/Makefile sc_hypervisor/src/Makefile sc_hypervisor/examples/Makefile doc/Makefile doc/doxygen/Makefile doc/doxygen/doxygen-config.cfg doc/doxygen/doxygen-config-include.cfg doc/doxygen/doxygen_filter.sh doc/doxygen_dev/Makefile doc/doxygen_dev/doxygen-config.cfg doc/doxygen_dev/doxygen_filter.sh doc/doxygen_dev/doxygen-config-include.cfg doc/doxygen_web_introduction/Makefile doc/doxygen_web_introduction/doxygen-config.cfg doc/doxygen_web_installation/Makefile doc/doxygen_web_installation/doxygen-config.cfg doc/doxygen_web_basics/Makefile doc/doxygen_web_basics/doxygen-config.cfg doc/doxygen_web_applications/Makefile doc/doxygen_web_applications/doxygen-config.cfg doc/doxygen_web_performances/Makefile doc/doxygen_web_performances/doxygen-config.cfg doc/doxygen_web_faq/Makefile doc/doxygen_web_faq/doxygen-config.cfg doc/doxygen_web_languages/Makefile doc/doxygen_web_languages/doxygen-config.cfg doc/doxygen_web_extensions/Makefile doc/doxygen_web_extensions/doxygen-config.cfg tools/msvc/starpu_var.bat min-dgels/Makefile bubble/Makefile bubble/tests/Makefile julia/Makefile julia/src/Makefile julia/src/dynamic_compiler/Makefile julia/examples/Makefile julia/examples/execute.sh eclipse-plugin/Makefile eclipse-plugin/src/Makefile eclipse-plugin/examples/Makefile eclipse-plugin/examples/hello/.cproject" + +cat >confcache <<\_ACEOF +# This file is a shell script that caches the results of configure +# tests run on this system so they can be shared between configure +# scripts and configure runs, see configure's option --config-cache. +# It is not useful on other systems. If it contains results you don't +# want to keep, you may remove or edit it. +# +# config.status only pays attention to the cache file if you give it +# the --recheck option to rerun configure. +# +# `ac_cv_env_foo' variables (set or unset) will be overridden when +# loading this file, other *unset* `ac_cv_foo' will be assigned the +# following values. + +_ACEOF + +# The following way of writing the cache mishandles newlines in values, +# but we know of no workaround that is simple, portable, and efficient. +# So, we kill variables containing newlines. +# Ultrix sh set writes to stderr and can't be redirected directly, +# and sets the high bit in the cache file unless we assign to the vars. +( + for ac_var in `(set) 2>&1 | sed -n 's/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'`; do + eval ac_val=\$$ac_var + case $ac_val in #( + *${as_nl}*) + case $ac_var in #( + *_cv_*) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5 +printf "%s\n" "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; + esac + case $ac_var in #( + _ | IFS | as_nl) ;; #( + BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #( + *) { eval $ac_var=; unset $ac_var;} ;; + esac ;; + esac + done + + (set) 2>&1 | + case $as_nl`(ac_space=' '; set) 2>&1` in #( + *${as_nl}ac_space=\ *) + # `set' does not quote correctly, so add quotes: double-quote + # substitution turns \\\\ into \\, and sed turns \\ into \. + sed -n \ + "s/'/'\\\\''/g; + s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\\2'/p" + ;; #( + *) + # `set' quotes correctly as required by POSIX, so do not add quotes. + sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p" + ;; + esac | + sort +) | + sed ' + /^ac_cv_env_/b end + t clear + :clear + s/^\([^=]*\)=\(.*[{}].*\)$/test ${\1+y} || &/ + t end + s/^\([^=]*\)=\(.*\)$/\1=${\1=\2}/ + :end' >>confcache +if diff "$cache_file" confcache >/dev/null 2>&1; then :; else + if test -w "$cache_file"; then + if test "x$cache_file" != "x/dev/null"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: updating cache $cache_file" >&5 +printf "%s\n" "$as_me: updating cache $cache_file" >&6;} + if test ! -f "$cache_file" || test -h "$cache_file"; then + cat confcache >"$cache_file" + else + case $cache_file in #( + */* | ?:*) + mv -f confcache "$cache_file"$$ && + mv -f "$cache_file"$$ "$cache_file" ;; #( + *) + mv -f confcache "$cache_file" ;; + esac + fi + fi + else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: not updating unwritable cache $cache_file" >&5 +printf "%s\n" "$as_me: not updating unwritable cache $cache_file" >&6;} + fi +fi +rm -f confcache + +test "x$prefix" = xNONE && prefix=$ac_default_prefix +# Let make expand exec_prefix. +test "x$exec_prefix" = xNONE && exec_prefix='${prefix}' + +DEFS=-DHAVE_CONFIG_H + +ac_libobjs= +ac_ltlibobjs= +U= +for ac_i in : $LIBOBJS; do test "x$ac_i" = x: && continue + # 1. Remove the extension, and $U if already installed. + ac_script='s/\$U\././;s/\.o$//;s/\.obj$//' + ac_i=`printf "%s\n" "$ac_i" | sed "$ac_script"` + # 2. Prepend LIBOBJDIR. When used with automake>=1.10 LIBOBJDIR + # will be set to the directory where LIBOBJS objects are built. + as_fn_append ac_libobjs " \${LIBOBJDIR}$ac_i\$U.$ac_objext" + as_fn_append ac_ltlibobjs " \${LIBOBJDIR}$ac_i"'$U.lo' +done +LIBOBJS=$ac_libobjs + +LTLIBOBJS=$ac_ltlibobjs + + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking that generated files are newer than configure" >&5 +printf %s "checking that generated files are newer than configure... " >&6; } + if test -n "$am_sleep_pid"; then + # Hide warnings about reused PIDs. + wait $am_sleep_pid 2>/dev/null + fi + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: done" >&5 +printf "%s\n" "done" >&6; } + if test -n "$EXEEXT"; then + am__EXEEXT_TRUE= + am__EXEEXT_FALSE='#' +else + am__EXEEXT_TRUE='#' + am__EXEEXT_FALSE= +fi + +if test -z "${AMDEP_TRUE}" && test -z "${AMDEP_FALSE}"; then + as_fn_error $? "conditional \"AMDEP\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${am__fastdepCC_TRUE}" && test -z "${am__fastdepCC_FALSE}"; then + as_fn_error $? "conditional \"am__fastdepCC\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${am__fastdepCC_TRUE}" && test -z "${am__fastdepCC_FALSE}"; then + as_fn_error $? "conditional \"am__fastdepCC\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${am__fastdepCXX_TRUE}" && test -z "${am__fastdepCXX_FALSE}"; then + as_fn_error $? "conditional \"am__fastdepCXX\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${HAVE_PARALLEL_TRUE}" && test -z "${HAVE_PARALLEL_FALSE}"; then + as_fn_error $? "conditional \"HAVE_PARALLEL\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${STARPU_BUBBLE_TRUE}" && test -z "${STARPU_BUBBLE_FALSE}"; then + as_fn_error $? "conditional \"STARPU_BUBBLE\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${STARPU_SIMGRID_MC_TRUE}" && test -z "${STARPU_SIMGRID_MC_FALSE}"; then + as_fn_error $? "conditional \"STARPU_SIMGRID_MC\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${STARPU_SIMGRID_TRUE}" && test -z "${STARPU_SIMGRID_FALSE}"; then + as_fn_error $? "conditional \"STARPU_SIMGRID\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${STARPU_HAVE_CXX11_TRUE}" && test -z "${STARPU_HAVE_CXX11_FALSE}"; then + as_fn_error $? "conditional \"STARPU_HAVE_CXX11\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${STARPU_CROSS_COMPILING_TRUE}" && test -z "${STARPU_CROSS_COMPILING_FALSE}"; then + as_fn_error $? "conditional \"STARPU_CROSS_COMPILING\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${STARPU_MPI_MINIMAL_TESTS_TRUE}" && test -z "${STARPU_MPI_MINIMAL_TESTS_FALSE}"; then + as_fn_error $? "conditional \"STARPU_MPI_MINIMAL_TESTS\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${STARPU_USE_MPI_MASTER_SLAVE_TRUE}" && test -z "${STARPU_USE_MPI_MASTER_SLAVE_FALSE}"; then + as_fn_error $? "conditional \"STARPU_USE_MPI_MASTER_SLAVE\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${STARPU_USE_TCPIP_MASTER_SLAVE_TRUE}" && test -z "${STARPU_USE_TCPIP_MASTER_SLAVE_FALSE}"; then + as_fn_error $? "conditional \"STARPU_USE_TCPIP_MASTER_SLAVE\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${STARPU_MPI_CHECK_TRUE}" && test -z "${STARPU_MPI_CHECK_FALSE}"; then + as_fn_error $? "conditional \"STARPU_MPI_CHECK\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${STARPU_MPI_SYNC_CLOCKS_TRUE}" && test -z "${STARPU_MPI_SYNC_CLOCKS_FALSE}"; then + as_fn_error $? "conditional \"STARPU_MPI_SYNC_CLOCKS\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${STARPU_USE_MPI_MPI_TRUE}" && test -z "${STARPU_USE_MPI_MPI_FALSE}"; then + as_fn_error $? "conditional \"STARPU_USE_MPI_MPI\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${STARPU_USE_MPI_NMAD_TRUE}" && test -z "${STARPU_USE_MPI_NMAD_FALSE}"; then + as_fn_error $? "conditional \"STARPU_USE_MPI_NMAD\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${STARPU_USE_MPI_TRUE}" && test -z "${STARPU_USE_MPI_FALSE}"; then + as_fn_error $? "conditional \"STARPU_USE_MPI\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${STARPU_USE_MPI_FT_TRUE}" && test -z "${STARPU_USE_MPI_FT_FALSE}"; then + as_fn_error $? "conditional \"STARPU_USE_MPI_FT\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${STARPU_USE_MPI_FT_STATS_TRUE}" && test -z "${STARPU_USE_MPI_FT_STATS_FALSE}"; then + as_fn_error $? "conditional \"STARPU_USE_MPI_FT_STATS\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${STARPU_HAVE_MS_LIB_TRUE}" && test -z "${STARPU_HAVE_MS_LIB_FALSE}"; then + as_fn_error $? "conditional \"STARPU_HAVE_MS_LIB\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${STARPU_HAVE_WINDOWS_TRUE}" && test -z "${STARPU_HAVE_WINDOWS_FALSE}"; then + as_fn_error $? "conditional \"STARPU_HAVE_WINDOWS\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${STARPU_LINUX_SYS_TRUE}" && test -z "${STARPU_LINUX_SYS_FALSE}"; then + as_fn_error $? "conditional \"STARPU_LINUX_SYS\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${STARPU_HAVE_DARWIN_TRUE}" && test -z "${STARPU_HAVE_DARWIN_FALSE}"; then + as_fn_error $? "conditional \"STARPU_HAVE_DARWIN\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${STARPU_OPENBSD_SYS_TRUE}" && test -z "${STARPU_OPENBSD_SYS_FALSE}"; then + as_fn_error $? "conditional \"STARPU_OPENBSD_SYS\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${STARPU_QUICK_CHECK_TRUE}" && test -z "${STARPU_QUICK_CHECK_FALSE}"; then + as_fn_error $? "conditional \"STARPU_QUICK_CHECK\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${STARPU_LONG_CHECK_TRUE}" && test -z "${STARPU_LONG_CHECK_FALSE}"; then + as_fn_error $? "conditional \"STARPU_LONG_CHECK\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${STARPU_NEW_CHECK_TRUE}" && test -z "${STARPU_NEW_CHECK_FALSE}"; then + as_fn_error $? "conditional \"STARPU_NEW_CHECK\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${STARPU_HAVE_HDF5_TRUE}" && test -z "${STARPU_HAVE_HDF5_FALSE}"; then + as_fn_error $? "conditional \"STARPU_HAVE_HDF5\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${STARPU_BUILD_SC_HYPERVISOR_TRUE}" && test -z "${STARPU_BUILD_SC_HYPERVISOR_FALSE}"; then + as_fn_error $? "conditional \"STARPU_BUILD_SC_HYPERVISOR\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${STARPU_USE_SC_HYPERVISOR_TRUE}" && test -z "${STARPU_USE_SC_HYPERVISOR_FALSE}"; then + as_fn_error $? "conditional \"STARPU_USE_SC_HYPERVISOR\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${STARPU_SC_HYPERVISOR_DEBUG_TRUE}" && test -z "${STARPU_SC_HYPERVISOR_DEBUG_FALSE}"; then + as_fn_error $? "conditional \"STARPU_SC_HYPERVISOR_DEBUG\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${STARPU_USE_CPU_TRUE}" && test -z "${STARPU_USE_CPU_FALSE}"; then + as_fn_error $? "conditional \"STARPU_USE_CPU\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${STARPU_USE_CUDA_TRUE}" && test -z "${STARPU_USE_CUDA_FALSE}"; then + as_fn_error $? "conditional \"STARPU_USE_CUDA\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${STARPU_HAVE_MAGMA_TRUE}" && test -z "${STARPU_HAVE_MAGMA_FALSE}"; then + as_fn_error $? "conditional \"STARPU_HAVE_MAGMA\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${STARPU_HAVE_CUFFTDOUBLECOMPLEX_TRUE}" && test -z "${STARPU_HAVE_CUFFTDOUBLECOMPLEX_FALSE}"; then + as_fn_error $? "conditional \"STARPU_HAVE_CUFFTDOUBLECOMPLEX\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${STARPU_USE_CUDA0_TRUE}" && test -z "${STARPU_USE_CUDA0_FALSE}"; then + as_fn_error $? "conditional \"STARPU_USE_CUDA0\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${STARPU_USE_CUDA1_TRUE}" && test -z "${STARPU_USE_CUDA1_FALSE}"; then + as_fn_error $? "conditional \"STARPU_USE_CUDA1\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${STARPU_USE_HIPBLAS_TRUE}" && test -z "${STARPU_USE_HIPBLAS_FALSE}"; then + as_fn_error $? "conditional \"STARPU_USE_HIPBLAS\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${STARPU_USE_HIP_TRUE}" && test -z "${STARPU_USE_HIP_FALSE}"; then + as_fn_error $? "conditional \"STARPU_USE_HIP\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${STARPU_USE_OPENCL_TRUE}" && test -z "${STARPU_USE_OPENCL_FALSE}"; then + as_fn_error $? "conditional \"STARPU_USE_OPENCL\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${STARPU_USE_MAX_FPGA_TRUE}" && test -z "${STARPU_USE_MAX_FPGA_FALSE}"; then + as_fn_error $? "conditional \"STARPU_USE_MAX_FPGA\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${STARPU_HAVE_FC_TRUE}" && test -z "${STARPU_HAVE_FC_FALSE}"; then + as_fn_error $? "conditional \"STARPU_HAVE_FC\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${STARPU_HAVE_F77_TRUE}" && test -z "${STARPU_HAVE_F77_FALSE}"; then + as_fn_error $? "conditional \"STARPU_HAVE_F77\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${STARPU_HAVE_MPIFORT_TRUE}" && test -z "${STARPU_HAVE_MPIFORT_FALSE}"; then + as_fn_error $? "conditional \"STARPU_HAVE_MPIFORT\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${STARPU_COVERAGE_ENABLED_TRUE}" && test -z "${STARPU_COVERAGE_ENABLED_FALSE}"; then + as_fn_error $? "conditional \"STARPU_COVERAGE_ENABLED\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${STARPU_COVERITY_TRUE}" && test -z "${STARPU_COVERITY_FALSE}"; then + as_fn_error $? "conditional \"STARPU_COVERITY\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${STARPU_USE_FXT_TRUE}" && test -z "${STARPU_USE_FXT_FALSE}"; then + as_fn_error $? "conditional \"STARPU_USE_FXT\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${STARPU_USE_AYUDAME1_TRUE}" && test -z "${STARPU_USE_AYUDAME1_FALSE}"; then + as_fn_error $? "conditional \"STARPU_USE_AYUDAME1\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${STARPU_USE_AYUDAME2_TRUE}" && test -z "${STARPU_USE_AYUDAME2_FALSE}"; then + as_fn_error $? "conditional \"STARPU_USE_AYUDAME2\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${STARPU_HAVE_LEVELDB_TRUE}" && test -z "${STARPU_HAVE_LEVELDB_FALSE}"; then + as_fn_error $? "conditional \"STARPU_HAVE_LEVELDB\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${STARPU_USE_MP_TRUE}" && test -z "${STARPU_USE_MP_FALSE}"; then + as_fn_error $? "conditional \"STARPU_USE_MP\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${STARPU_DEVEL_TRUE}" && test -z "${STARPU_DEVEL_FALSE}"; then + as_fn_error $? "conditional \"STARPU_DEVEL\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${STARPU_PARALLEL_WORKER_TRUE}" && test -z "${STARPU_PARALLEL_WORKER_FALSE}"; then + as_fn_error $? "conditional \"STARPU_PARALLEL_WORKER\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${STARPU_OPENMP_LLVM_TRUE}" && test -z "${STARPU_OPENMP_LLVM_FALSE}"; then + as_fn_error $? "conditional \"STARPU_OPENMP_LLVM\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${STARPU_OPENMP_TRUE}" && test -z "${STARPU_OPENMP_FALSE}"; then + as_fn_error $? "conditional \"STARPU_OPENMP\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${STARPU_HAVE_OPENMP_TRUE}" && test -z "${STARPU_HAVE_OPENMP_FALSE}"; then + as_fn_error $? "conditional \"STARPU_HAVE_OPENMP\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${STARPU_BUILD_SOCL_TRUE}" && test -z "${STARPU_BUILD_SOCL_FALSE}"; then + as_fn_error $? "conditional \"STARPU_BUILD_SOCL\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${STARPU_USE_SOCL_TRUE}" && test -z "${STARPU_USE_SOCL_FALSE}"; then + as_fn_error $? "conditional \"STARPU_USE_SOCL\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${STARPU_BUILD_TESTS_TRUE}" && test -z "${STARPU_BUILD_TESTS_FALSE}"; then + as_fn_error $? "conditional \"STARPU_BUILD_TESTS\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${STARPU_BUILD_EXAMPLES_TRUE}" && test -z "${STARPU_BUILD_EXAMPLES_FALSE}"; then + as_fn_error $? "conditional \"STARPU_BUILD_EXAMPLES\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${STARPU_HAVE_OPENGL_TRUE}" && test -z "${STARPU_HAVE_OPENGL_FALSE}"; then + as_fn_error $? "conditional \"STARPU_HAVE_OPENGL\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${STARPU_HAVE_X11_TRUE}" && test -z "${STARPU_HAVE_X11_FALSE}"; then + as_fn_error $? "conditional \"STARPU_HAVE_X11\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${STARPU_HAVE_CBLAS_H_TRUE}" && test -z "${STARPU_HAVE_CBLAS_H_FALSE}"; then + as_fn_error $? "conditional \"STARPU_HAVE_CBLAS_H\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${STARPU_HAVE_LIBLAPACK_TRUE}" && test -z "${STARPU_HAVE_LIBLAPACK_FALSE}"; then + as_fn_error $? "conditional \"STARPU_HAVE_LIBLAPACK\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${STARPU_HAVE_CBLAS_SGEMV_TRUE}" && test -z "${STARPU_HAVE_CBLAS_SGEMV_FALSE}"; then + as_fn_error $? "conditional \"STARPU_HAVE_CBLAS_SGEMV\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${STARPU_ATLAS_BLAS_LIB_TRUE}" && test -z "${STARPU_ATLAS_BLAS_LIB_FALSE}"; then + as_fn_error $? "conditional \"STARPU_ATLAS_BLAS_LIB\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${STARPU_GOTO_BLAS_LIB_TRUE}" && test -z "${STARPU_GOTO_BLAS_LIB_FALSE}"; then + as_fn_error $? "conditional \"STARPU_GOTO_BLAS_LIB\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${STARPU_MKL_BLAS_LIB_TRUE}" && test -z "${STARPU_MKL_BLAS_LIB_FALSE}"; then + as_fn_error $? "conditional \"STARPU_MKL_BLAS_LIB\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${STARPU_SYSTEM_BLAS_LIB_TRUE}" && test -z "${STARPU_SYSTEM_BLAS_LIB_FALSE}"; then + as_fn_error $? "conditional \"STARPU_SYSTEM_BLAS_LIB\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${STARPU_NO_BLAS_LIB_TRUE}" && test -z "${STARPU_NO_BLAS_LIB_FALSE}"; then + as_fn_error $? "conditional \"STARPU_NO_BLAS_LIB\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${STARPU_USE_MIN_DGELS_TRUE}" && test -z "${STARPU_USE_MIN_DGELS_FALSE}"; then + as_fn_error $? "conditional \"STARPU_USE_MIN_DGELS\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${STARPU_HAVE_FFTW_TRUE}" && test -z "${STARPU_HAVE_FFTW_FALSE}"; then + as_fn_error $? "conditional \"STARPU_HAVE_FFTW\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${STARPU_HAVE_FFTWF_TRUE}" && test -z "${STARPU_HAVE_FFTWF_FALSE}"; then + as_fn_error $? "conditional \"STARPU_HAVE_FFTWF\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${STARPU_HAVE_FFTWL_TRUE}" && test -z "${STARPU_HAVE_FFTWL_FALSE}"; then + as_fn_error $? "conditional \"STARPU_HAVE_FFTWL\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${STARPU_BUILD_STARPUFFT_TRUE}" && test -z "${STARPU_BUILD_STARPUFFT_FALSE}"; then + as_fn_error $? "conditional \"STARPU_BUILD_STARPUFFT\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${STARPU_BUILD_STARPUFFT_EXAMPLES_TRUE}" && test -z "${STARPU_BUILD_STARPUFFT_EXAMPLES_FALSE}"; then + as_fn_error $? "conditional \"STARPU_BUILD_STARPUFFT_EXAMPLES\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${STARPU_HAVE_HWLOC_TRUE}" && test -z "${STARPU_HAVE_HWLOC_FALSE}"; then + as_fn_error $? "conditional \"STARPU_HAVE_HWLOC\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${STARPU_HWLOC_HAVE_TOPOLOGY_DUP_TRUE}" && test -z "${STARPU_HWLOC_HAVE_TOPOLOGY_DUP_FALSE}"; then + as_fn_error $? "conditional \"STARPU_HWLOC_HAVE_TOPOLOGY_DUP\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${STARPU_HAVE_F77_H_TRUE}" && test -z "${STARPU_HAVE_F77_H_FALSE}"; then + as_fn_error $? "conditional \"STARPU_HAVE_F77_H\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${STARPU_HAVE_ICC_TRUE}" && test -z "${STARPU_HAVE_ICC_FALSE}"; then + as_fn_error $? "conditional \"STARPU_HAVE_ICC\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${STARPU_HAVE_HELP2MAN_TRUE}" && test -z "${STARPU_HAVE_HELP2MAN_FALSE}"; then + as_fn_error $? "conditional \"STARPU_HAVE_HELP2MAN\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${STARPU_HAVE_AM111_TRUE}" && test -z "${STARPU_HAVE_AM111_FALSE}"; then + as_fn_error $? "conditional \"STARPU_HAVE_AM111\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${STARPU_HAVE_AM111_TRUE}" && test -z "${STARPU_HAVE_AM111_FALSE}"; then + as_fn_error $? "conditional \"STARPU_HAVE_AM111\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${STARPURM_HAVE_DLB_TRUE}" && test -z "${STARPURM_HAVE_DLB_FALSE}"; then + as_fn_error $? "conditional \"STARPURM_HAVE_DLB\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${STARPU_BUILD_STARPURM_TRUE}" && test -z "${STARPU_BUILD_STARPURM_FALSE}"; then + as_fn_error $? "conditional \"STARPU_BUILD_STARPURM\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${STARPU_BUILD_STARPURM_EXAMPLES_TRUE}" && test -z "${STARPU_BUILD_STARPURM_EXAMPLES_FALSE}"; then + as_fn_error $? "conditional \"STARPU_BUILD_STARPURM_EXAMPLES\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${STARPU_BUILD_STARPUPY_TRUE}" && test -z "${STARPU_BUILD_STARPUPY_FALSE}"; then + as_fn_error $? "conditional \"STARPU_BUILD_STARPUPY\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${STARPU_STARPUPY_NUMPY_TRUE}" && test -z "${STARPU_STARPUPY_NUMPY_FALSE}"; then + as_fn_error $? "conditional \"STARPU_STARPUPY_NUMPY\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${STARPU_BUILD_DOC_TRUE}" && test -z "${STARPU_BUILD_DOC_FALSE}"; then + as_fn_error $? "conditional \"STARPU_BUILD_DOC\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${STARPU_AVAILABLE_DOC_TRUE}" && test -z "${STARPU_AVAILABLE_DOC_FALSE}"; then + as_fn_error $? "conditional \"STARPU_AVAILABLE_DOC\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${STARPU_BUILD_DOC_PDF_TRUE}" && test -z "${STARPU_BUILD_DOC_PDF_FALSE}"; then + as_fn_error $? "conditional \"STARPU_BUILD_DOC_PDF\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${STARPU_AVAILABLE_DOC_PDF_TRUE}" && test -z "${STARPU_AVAILABLE_DOC_PDF_FALSE}"; then + as_fn_error $? "conditional \"STARPU_AVAILABLE_DOC_PDF\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${STARPU_USE_JULIA_TRUE}" && test -z "${STARPU_USE_JULIA_FALSE}"; then + as_fn_error $? "conditional \"STARPU_USE_JULIA\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${STARPU_BUILD_ECLIPSE_PLUGIN_TRUE}" && test -z "${STARPU_BUILD_ECLIPSE_PLUGIN_FALSE}"; then + as_fn_error $? "conditional \"STARPU_BUILD_ECLIPSE_PLUGIN\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${STARPU_SANITIZE_TRUE}" && test -z "${STARPU_SANITIZE_FALSE}"; then + as_fn_error $? "conditional \"STARPU_SANITIZE\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi + +: "${CONFIG_STATUS=./config.status}" +ac_write_fail=0 +ac_clean_files_save=$ac_clean_files +ac_clean_files="$ac_clean_files $CONFIG_STATUS" +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: creating $CONFIG_STATUS" >&5 +printf "%s\n" "$as_me: creating $CONFIG_STATUS" >&6;} +as_write_fail=0 +cat >$CONFIG_STATUS <<_ASEOF || as_write_fail=1 +#! $SHELL +# Generated by $as_me. +# Run this file to recreate the current configuration. +# Compiler output produced by configure, useful for debugging +# configure, is in config.log if it exists. + +debug=false +ac_cs_recheck=false +ac_cs_silent=false + +SHELL=\${CONFIG_SHELL-$SHELL} +export SHELL +_ASEOF +cat >>$CONFIG_STATUS <<\_ASEOF || as_write_fail=1 +## -------------------- ## +## M4sh Initialization. ## +## -------------------- ## + +# Be more Bourne compatible +DUALCASE=1; export DUALCASE # for MKS sh +as_nop=: +if test ${ZSH_VERSION+y} && (emulate sh) >/dev/null 2>&1 +then : + emulate sh + NULLCMD=: + # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which + # is contrary to our usage. Disable this feature. + alias -g '${1+"$@"}'='"$@"' + setopt NO_GLOB_SUBST +else $as_nop + case `(set -o) 2>/dev/null` in #( + *posix*) : + set -o posix ;; #( + *) : + ;; +esac +fi + + + +# Reset variables that may have inherited troublesome values from +# the environment. + +# IFS needs to be set, to space, tab, and newline, in precisely that order. +# (If _AS_PATH_WALK were called with IFS unset, it would have the +# side effect of setting IFS to empty, thus disabling word splitting.) +# Quoting is to prevent editors from complaining about space-tab. +as_nl=' +' +export as_nl +IFS=" "" $as_nl" + +PS1='$ ' +PS2='> ' +PS4='+ ' + +# Ensure predictable behavior from utilities with locale-dependent output. +LC_ALL=C +export LC_ALL +LANGUAGE=C +export LANGUAGE + +# We cannot yet rely on "unset" to work, but we need these variables +# to be unset--not just set to an empty or harmless value--now, to +# avoid bugs in old shells (e.g. pre-3.0 UWIN ksh). This construct +# also avoids known problems related to "unset" and subshell syntax +# in other old shells (e.g. bash 2.01 and pdksh 5.2.14). +for as_var in BASH_ENV ENV MAIL MAILPATH CDPATH +do eval test \${$as_var+y} \ + && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || : +done + +# Ensure that fds 0, 1, and 2 are open. +if (exec 3>&0) 2>/dev/null; then :; else exec 0&1) 2>/dev/null; then :; else exec 1>/dev/null; fi +if (exec 3>&2) ; then :; else exec 2>/dev/null; fi + +# The user is always right. +if ${PATH_SEPARATOR+false} :; then + PATH_SEPARATOR=: + (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && { + (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 || + PATH_SEPARATOR=';' + } +fi + + +# Find who we are. Look in the path if we contain no directory separator. +as_myself= +case $0 in #(( + *[\\/]* ) as_myself=$0 ;; + *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + test -r "$as_dir$0" && as_myself=$as_dir$0 && break + done +IFS=$as_save_IFS + + ;; +esac +# We did not find ourselves, most probably we were run as `sh COMMAND' +# in which case we are not to be found in the path. +if test "x$as_myself" = x; then + as_myself=$0 +fi +if test ! -f "$as_myself"; then + printf "%s\n" "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2 + exit 1 +fi + + + +# as_fn_error STATUS ERROR [LINENO LOG_FD] +# ---------------------------------------- +# Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are +# provided, also output the error to LOG_FD, referencing LINENO. Then exit the +# script with STATUS, using 1 if that was 0. +as_fn_error () +{ + as_status=$1; test $as_status -eq 0 && as_status=1 + if test "$4"; then + as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: $2" >&$4 + fi + printf "%s\n" "$as_me: error: $2" >&2 + as_fn_exit $as_status +} # as_fn_error + + + +# as_fn_set_status STATUS +# ----------------------- +# Set $? to STATUS, without forking. +as_fn_set_status () +{ + return $1 +} # as_fn_set_status + +# as_fn_exit STATUS +# ----------------- +# Exit the shell with STATUS, even in a "trap 0" or "set -e" context. +as_fn_exit () +{ + set +e + as_fn_set_status $1 + exit $1 +} # as_fn_exit + +# as_fn_unset VAR +# --------------- +# Portably unset VAR. +as_fn_unset () +{ + { eval $1=; unset $1;} +} +as_unset=as_fn_unset + +# as_fn_append VAR VALUE +# ---------------------- +# Append the text in VALUE to the end of the definition contained in VAR. Take +# advantage of any shell optimizations that allow amortized linear growth over +# repeated appends, instead of the typical quadratic growth present in naive +# implementations. +if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null +then : + eval 'as_fn_append () + { + eval $1+=\$2 + }' +else $as_nop + as_fn_append () + { + eval $1=\$$1\$2 + } +fi # as_fn_append + +# as_fn_arith ARG... +# ------------------ +# Perform arithmetic evaluation on the ARGs, and store the result in the +# global $as_val. Take advantage of shells that can avoid forks. The arguments +# must be portable across $(()) and expr. +if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null +then : + eval 'as_fn_arith () + { + as_val=$(( $* )) + }' +else $as_nop + as_fn_arith () + { + as_val=`expr "$@" || test $? -eq 1` + } +fi # as_fn_arith + + +if expr a : '\(a\)' >/dev/null 2>&1 && + test "X`expr 00001 : '.*\(...\)'`" = X001; then + as_expr=expr +else + as_expr=false +fi + +if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then + as_basename=basename +else + as_basename=false +fi + +if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then + as_dirname=dirname +else + as_dirname=false +fi + +as_me=`$as_basename -- "$0" || +$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ + X"$0" : 'X\(//\)$' \| \ + X"$0" : 'X\(/\)' \| . 2>/dev/null || +printf "%s\n" X/"$0" | + sed '/^.*\/\([^/][^/]*\)\/*$/{ + s//\1/ + q + } + /^X\/\(\/\/\)$/{ + s//\1/ + q + } + /^X\/\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + +# Avoid depending upon Character Ranges. +as_cr_letters='abcdefghijklmnopqrstuvwxyz' +as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' +as_cr_Letters=$as_cr_letters$as_cr_LETTERS +as_cr_digits='0123456789' +as_cr_alnum=$as_cr_Letters$as_cr_digits + + +# Determine whether it's possible to make 'echo' print without a newline. +# These variables are no longer used directly by Autoconf, but are AC_SUBSTed +# for compatibility with existing Makefiles. +ECHO_C= ECHO_N= ECHO_T= +case `echo -n x` in #((((( +-n*) + case `echo 'xy\c'` in + *c*) ECHO_T=' ';; # ECHO_T is single tab character. + xy) ECHO_C='\c';; + *) echo `echo ksh88 bug on AIX 6.1` > /dev/null + ECHO_T=' ';; + esac;; +*) + ECHO_N='-n';; +esac + +# For backward compatibility with old third-party macros, we provide +# the shell variables $as_echo and $as_echo_n. New code should use +# AS_ECHO(["message"]) and AS_ECHO_N(["message"]), respectively. +as_echo='printf %s\n' +as_echo_n='printf %s' + +rm -f conf$$ conf$$.exe conf$$.file +if test -d conf$$.dir; then + rm -f conf$$.dir/conf$$.file +else + rm -f conf$$.dir + mkdir conf$$.dir 2>/dev/null +fi +if (echo >conf$$.file) 2>/dev/null; then + if ln -s conf$$.file conf$$ 2>/dev/null; then + as_ln_s='ln -s' + # ... but there are two gotchas: + # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail. + # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable. + # In both cases, we have to default to `cp -pR'. + ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe || + as_ln_s='cp -pR' + elif ln conf$$.file conf$$ 2>/dev/null; then + as_ln_s=ln + else + as_ln_s='cp -pR' + fi +else + as_ln_s='cp -pR' +fi +rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file +rmdir conf$$.dir 2>/dev/null + + +# as_fn_mkdir_p +# ------------- +# Create "$as_dir" as a directory, including parents if necessary. +as_fn_mkdir_p () +{ + + case $as_dir in #( + -*) as_dir=./$as_dir;; + esac + test -d "$as_dir" || eval $as_mkdir_p || { + as_dirs= + while :; do + case $as_dir in #( + *\'*) as_qdir=`printf "%s\n" "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'( + *) as_qdir=$as_dir;; + esac + as_dirs="'$as_qdir' $as_dirs" + as_dir=`$as_dirname -- "$as_dir" || +$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$as_dir" : 'X\(//\)[^/]' \| \ + X"$as_dir" : 'X\(//\)$' \| \ + X"$as_dir" : 'X\(/\)' \| . 2>/dev/null || +printf "%s\n" X"$as_dir" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + test -d "$as_dir" && break + done + test -z "$as_dirs" || eval "mkdir $as_dirs" + } || test -d "$as_dir" || as_fn_error $? "cannot create directory $as_dir" + + +} # as_fn_mkdir_p +if mkdir -p . 2>/dev/null; then + as_mkdir_p='mkdir -p "$as_dir"' +else + test -d ./-p && rmdir ./-p + as_mkdir_p=false +fi + + +# as_fn_executable_p FILE +# ----------------------- +# Test if FILE is an executable regular file. +as_fn_executable_p () +{ + test -f "$1" && test -x "$1" +} # as_fn_executable_p +as_test_x='test -x' +as_executable_p=as_fn_executable_p + +# Sed expression to map a string onto a valid CPP name. +as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'" + +# Sed expression to map a string onto a valid variable name. +as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'" + + +exec 6>&1 +## ----------------------------------- ## +## Main body of $CONFIG_STATUS script. ## +## ----------------------------------- ## +_ASEOF +test $as_write_fail = 0 && chmod +x $CONFIG_STATUS || ac_write_fail=1 + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +# Save the log message, to keep $0 and so on meaningful, and to +# report actual input values of CONFIG_FILES etc. instead of their +# values after options handling. +ac_log=" +This file was extended by StarPU $as_me 1.4.10, which was +generated by GNU Autoconf 2.71. Invocation command line was + + CONFIG_FILES = $CONFIG_FILES + CONFIG_HEADERS = $CONFIG_HEADERS + CONFIG_LINKS = $CONFIG_LINKS + CONFIG_COMMANDS = $CONFIG_COMMANDS + $ $0 $@ + +on `(hostname || uname -n) 2>/dev/null | sed 1q` +" + +_ACEOF + +case $ac_config_files in *" +"*) set x $ac_config_files; shift; ac_config_files=$*;; +esac + +case $ac_config_headers in *" +"*) set x $ac_config_headers; shift; ac_config_headers=$*;; +esac + + +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +# Files that config.status was made for. +config_files="$ac_config_files" +config_headers="$ac_config_headers" +config_links="$ac_config_links" +config_commands="$ac_config_commands" + +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +ac_cs_usage="\ +\`$as_me' instantiates files and other configuration actions +from templates according to the current configuration. Unless the files +and actions are specified as TAGs, all are instantiated by default. + +Usage: $0 [OPTION]... [TAG]... + + -h, --help print this help, then exit + -V, --version print version number and configuration settings, then exit + --config print configuration, then exit + -q, --quiet, --silent + do not print progress messages + -d, --debug don't remove temporary files + --recheck update $as_me by reconfiguring in the same conditions + --file=FILE[:TEMPLATE] + instantiate the configuration file FILE + --header=FILE[:TEMPLATE] + instantiate the configuration header FILE + +Configuration files: +$config_files + +Configuration headers: +$config_headers + +Configuration links: +$config_links + +Configuration commands: +$config_commands + +Report bugs to . +StarPU home page: ." + +_ACEOF +ac_cs_config=`printf "%s\n" "$ac_configure_args" | sed "$ac_safe_unquote"` +ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\''/g"` +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +ac_cs_config='$ac_cs_config_escaped' +ac_cs_version="\\ +StarPU config.status 1.4.10 +configured by $0, generated by GNU Autoconf 2.71, + with options \\"\$ac_cs_config\\" + +Copyright (C) 2021 Free Software Foundation, Inc. +This config.status script is free software; the Free Software Foundation +gives unlimited permission to copy, distribute and modify it." + +ac_pwd='$ac_pwd' +srcdir='$srcdir' +INSTALL='$INSTALL' +MKDIR_P='$MKDIR_P' +AWK='$AWK' +test -n "\$AWK" || AWK=awk +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +# The default lists apply if the user does not specify any file. +ac_need_defaults=: +while test $# != 0 +do + case $1 in + --*=?*) + ac_option=`expr "X$1" : 'X\([^=]*\)='` + ac_optarg=`expr "X$1" : 'X[^=]*=\(.*\)'` + ac_shift=: + ;; + --*=) + ac_option=`expr "X$1" : 'X\([^=]*\)='` + ac_optarg= + ac_shift=: + ;; + *) + ac_option=$1 + ac_optarg=$2 + ac_shift=shift + ;; + esac + + case $ac_option in + # Handling of the options. + -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r) + ac_cs_recheck=: ;; + --version | --versio | --versi | --vers | --ver | --ve | --v | -V ) + printf "%s\n" "$ac_cs_version"; exit ;; + --config | --confi | --conf | --con | --co | --c ) + printf "%s\n" "$ac_cs_config"; exit ;; + --debug | --debu | --deb | --de | --d | -d ) + debug=: ;; + --file | --fil | --fi | --f ) + $ac_shift + case $ac_optarg in + *\'*) ac_optarg=`printf "%s\n" "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;; + '') as_fn_error $? "missing file argument" ;; + esac + as_fn_append CONFIG_FILES " '$ac_optarg'" + ac_need_defaults=false;; + --header | --heade | --head | --hea ) + $ac_shift + case $ac_optarg in + *\'*) ac_optarg=`printf "%s\n" "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;; + esac + as_fn_append CONFIG_HEADERS " '$ac_optarg'" + ac_need_defaults=false;; + --he | --h) + # Conflict between --help and --header + as_fn_error $? "ambiguous option: \`$1' +Try \`$0 --help' for more information.";; + --help | --hel | -h ) + printf "%s\n" "$ac_cs_usage"; exit ;; + -q | -quiet | --quiet | --quie | --qui | --qu | --q \ + | -silent | --silent | --silen | --sile | --sil | --si | --s) + ac_cs_silent=: ;; + + # This is an error. + -*) as_fn_error $? "unrecognized option: \`$1' +Try \`$0 --help' for more information." ;; + + *) as_fn_append ac_config_targets " $1" + ac_need_defaults=false ;; + + esac + shift +done + +ac_configure_extra_args= + +if $ac_cs_silent; then + exec 6>/dev/null + ac_configure_extra_args="$ac_configure_extra_args --silent" +fi + +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +if \$ac_cs_recheck; then + set X $SHELL '$0' $ac_configure_args \$ac_configure_extra_args --no-create --no-recursion + shift + \printf "%s\n" "running CONFIG_SHELL=$SHELL \$*" >&6 + CONFIG_SHELL='$SHELL' + export CONFIG_SHELL + exec "\$@" +fi + +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +exec 5>>config.log +{ + echo + sed 'h;s/./-/g;s/^.../## /;s/...$/ ##/;p;x;p;x' <<_ASBOX +## Running $as_me. ## +_ASBOX + printf "%s\n" "$ac_log" +} >&5 + +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +# +# INIT-COMMANDS +# +AMDEP_TRUE="$AMDEP_TRUE" MAKE="${MAKE-make}" + + +# The HP-UX ksh and POSIX shell print the target directory to stdout +# if CDPATH is set. +(unset CDPATH) >/dev/null 2>&1 && unset CDPATH + +sed_quote_subst='$sed_quote_subst' +double_quote_subst='$double_quote_subst' +delay_variable_subst='$delay_variable_subst' +macro_version='`$ECHO "$macro_version" | $SED "$delay_single_quote_subst"`' +macro_revision='`$ECHO "$macro_revision" | $SED "$delay_single_quote_subst"`' +AS='`$ECHO "$AS" | $SED "$delay_single_quote_subst"`' +DLLTOOL='`$ECHO "$DLLTOOL" | $SED "$delay_single_quote_subst"`' +OBJDUMP='`$ECHO "$OBJDUMP" | $SED "$delay_single_quote_subst"`' +enable_shared='`$ECHO "$enable_shared" | $SED "$delay_single_quote_subst"`' +enable_static='`$ECHO "$enable_static" | $SED "$delay_single_quote_subst"`' +pic_mode='`$ECHO "$pic_mode" | $SED "$delay_single_quote_subst"`' +enable_fast_install='`$ECHO "$enable_fast_install" | $SED "$delay_single_quote_subst"`' +shared_archive_member_spec='`$ECHO "$shared_archive_member_spec" | $SED "$delay_single_quote_subst"`' +SHELL='`$ECHO "$SHELL" | $SED "$delay_single_quote_subst"`' +ECHO='`$ECHO "$ECHO" | $SED "$delay_single_quote_subst"`' +PATH_SEPARATOR='`$ECHO "$PATH_SEPARATOR" | $SED "$delay_single_quote_subst"`' +host_alias='`$ECHO "$host_alias" | $SED "$delay_single_quote_subst"`' +host='`$ECHO "$host" | $SED "$delay_single_quote_subst"`' +host_os='`$ECHO "$host_os" | $SED "$delay_single_quote_subst"`' +build_alias='`$ECHO "$build_alias" | $SED "$delay_single_quote_subst"`' +build='`$ECHO "$build" | $SED "$delay_single_quote_subst"`' +build_os='`$ECHO "$build_os" | $SED "$delay_single_quote_subst"`' +SED='`$ECHO "$SED" | $SED "$delay_single_quote_subst"`' +Xsed='`$ECHO "$Xsed" | $SED "$delay_single_quote_subst"`' +GREP='`$ECHO "$GREP" | $SED "$delay_single_quote_subst"`' +EGREP='`$ECHO "$EGREP" | $SED "$delay_single_quote_subst"`' +FGREP='`$ECHO "$FGREP" | $SED "$delay_single_quote_subst"`' +LD='`$ECHO "$LD" | $SED "$delay_single_quote_subst"`' +NM='`$ECHO "$NM" | $SED "$delay_single_quote_subst"`' +LN_S='`$ECHO "$LN_S" | $SED "$delay_single_quote_subst"`' +max_cmd_len='`$ECHO "$max_cmd_len" | $SED "$delay_single_quote_subst"`' +ac_objext='`$ECHO "$ac_objext" | $SED "$delay_single_quote_subst"`' +exeext='`$ECHO "$exeext" | $SED "$delay_single_quote_subst"`' +lt_unset='`$ECHO "$lt_unset" | $SED "$delay_single_quote_subst"`' +lt_SP2NL='`$ECHO "$lt_SP2NL" | $SED "$delay_single_quote_subst"`' +lt_NL2SP='`$ECHO "$lt_NL2SP" | $SED "$delay_single_quote_subst"`' +lt_cv_to_host_file_cmd='`$ECHO "$lt_cv_to_host_file_cmd" | $SED "$delay_single_quote_subst"`' +lt_cv_to_tool_file_cmd='`$ECHO "$lt_cv_to_tool_file_cmd" | $SED "$delay_single_quote_subst"`' +reload_flag='`$ECHO "$reload_flag" | $SED "$delay_single_quote_subst"`' +reload_cmds='`$ECHO "$reload_cmds" | $SED "$delay_single_quote_subst"`' +FILECMD='`$ECHO "$FILECMD" | $SED "$delay_single_quote_subst"`' +deplibs_check_method='`$ECHO "$deplibs_check_method" | $SED "$delay_single_quote_subst"`' +file_magic_cmd='`$ECHO "$file_magic_cmd" | $SED "$delay_single_quote_subst"`' +file_magic_glob='`$ECHO "$file_magic_glob" | $SED "$delay_single_quote_subst"`' +want_nocaseglob='`$ECHO "$want_nocaseglob" | $SED "$delay_single_quote_subst"`' +sharedlib_from_linklib_cmd='`$ECHO "$sharedlib_from_linklib_cmd" | $SED "$delay_single_quote_subst"`' +AR='`$ECHO "$AR" | $SED "$delay_single_quote_subst"`' +lt_ar_flags='`$ECHO "$lt_ar_flags" | $SED "$delay_single_quote_subst"`' +AR_FLAGS='`$ECHO "$AR_FLAGS" | $SED "$delay_single_quote_subst"`' +archiver_list_spec='`$ECHO "$archiver_list_spec" | $SED "$delay_single_quote_subst"`' +STRIP='`$ECHO "$STRIP" | $SED "$delay_single_quote_subst"`' +RANLIB='`$ECHO "$RANLIB" | $SED "$delay_single_quote_subst"`' +old_postinstall_cmds='`$ECHO "$old_postinstall_cmds" | $SED "$delay_single_quote_subst"`' +old_postuninstall_cmds='`$ECHO "$old_postuninstall_cmds" | $SED "$delay_single_quote_subst"`' +old_archive_cmds='`$ECHO "$old_archive_cmds" | $SED "$delay_single_quote_subst"`' +lock_old_archive_extraction='`$ECHO "$lock_old_archive_extraction" | $SED "$delay_single_quote_subst"`' +CC='`$ECHO "$CC" | $SED "$delay_single_quote_subst"`' +CFLAGS='`$ECHO "$CFLAGS" | $SED "$delay_single_quote_subst"`' +compiler='`$ECHO "$compiler" | $SED "$delay_single_quote_subst"`' +GCC='`$ECHO "$GCC" | $SED "$delay_single_quote_subst"`' +lt_cv_sys_global_symbol_pipe='`$ECHO "$lt_cv_sys_global_symbol_pipe" | $SED "$delay_single_quote_subst"`' +lt_cv_sys_global_symbol_to_cdecl='`$ECHO "$lt_cv_sys_global_symbol_to_cdecl" | $SED "$delay_single_quote_subst"`' +lt_cv_sys_global_symbol_to_import='`$ECHO "$lt_cv_sys_global_symbol_to_import" | $SED "$delay_single_quote_subst"`' +lt_cv_sys_global_symbol_to_c_name_address='`$ECHO "$lt_cv_sys_global_symbol_to_c_name_address" | $SED "$delay_single_quote_subst"`' +lt_cv_sys_global_symbol_to_c_name_address_lib_prefix='`$ECHO "$lt_cv_sys_global_symbol_to_c_name_address_lib_prefix" | $SED "$delay_single_quote_subst"`' +lt_cv_nm_interface='`$ECHO "$lt_cv_nm_interface" | $SED "$delay_single_quote_subst"`' +nm_file_list_spec='`$ECHO "$nm_file_list_spec" | $SED "$delay_single_quote_subst"`' +lt_sysroot='`$ECHO "$lt_sysroot" | $SED "$delay_single_quote_subst"`' +lt_cv_truncate_bin='`$ECHO "$lt_cv_truncate_bin" | $SED "$delay_single_quote_subst"`' +objdir='`$ECHO "$objdir" | $SED "$delay_single_quote_subst"`' +MAGIC_CMD='`$ECHO "$MAGIC_CMD" | $SED "$delay_single_quote_subst"`' +lt_prog_compiler_no_builtin_flag='`$ECHO "$lt_prog_compiler_no_builtin_flag" | $SED "$delay_single_quote_subst"`' +lt_prog_compiler_pic='`$ECHO "$lt_prog_compiler_pic" | $SED "$delay_single_quote_subst"`' +lt_prog_compiler_wl='`$ECHO "$lt_prog_compiler_wl" | $SED "$delay_single_quote_subst"`' +lt_prog_compiler_static='`$ECHO "$lt_prog_compiler_static" | $SED "$delay_single_quote_subst"`' +lt_cv_prog_compiler_c_o='`$ECHO "$lt_cv_prog_compiler_c_o" | $SED "$delay_single_quote_subst"`' +need_locks='`$ECHO "$need_locks" | $SED "$delay_single_quote_subst"`' +MANIFEST_TOOL='`$ECHO "$MANIFEST_TOOL" | $SED "$delay_single_quote_subst"`' +DSYMUTIL='`$ECHO "$DSYMUTIL" | $SED "$delay_single_quote_subst"`' +NMEDIT='`$ECHO "$NMEDIT" | $SED "$delay_single_quote_subst"`' +LIPO='`$ECHO "$LIPO" | $SED "$delay_single_quote_subst"`' +OTOOL='`$ECHO "$OTOOL" | $SED "$delay_single_quote_subst"`' +OTOOL64='`$ECHO "$OTOOL64" | $SED "$delay_single_quote_subst"`' +libext='`$ECHO "$libext" | $SED "$delay_single_quote_subst"`' +shrext_cmds='`$ECHO "$shrext_cmds" | $SED "$delay_single_quote_subst"`' +extract_expsyms_cmds='`$ECHO "$extract_expsyms_cmds" | $SED "$delay_single_quote_subst"`' +archive_cmds_need_lc='`$ECHO "$archive_cmds_need_lc" | $SED "$delay_single_quote_subst"`' +enable_shared_with_static_runtimes='`$ECHO "$enable_shared_with_static_runtimes" | $SED "$delay_single_quote_subst"`' +export_dynamic_flag_spec='`$ECHO "$export_dynamic_flag_spec" | $SED "$delay_single_quote_subst"`' +whole_archive_flag_spec='`$ECHO "$whole_archive_flag_spec" | $SED "$delay_single_quote_subst"`' +compiler_needs_object='`$ECHO "$compiler_needs_object" | $SED "$delay_single_quote_subst"`' +old_archive_from_new_cmds='`$ECHO "$old_archive_from_new_cmds" | $SED "$delay_single_quote_subst"`' +old_archive_from_expsyms_cmds='`$ECHO "$old_archive_from_expsyms_cmds" | $SED "$delay_single_quote_subst"`' +archive_cmds='`$ECHO "$archive_cmds" | $SED "$delay_single_quote_subst"`' +archive_expsym_cmds='`$ECHO "$archive_expsym_cmds" | $SED "$delay_single_quote_subst"`' +module_cmds='`$ECHO "$module_cmds" | $SED "$delay_single_quote_subst"`' +module_expsym_cmds='`$ECHO "$module_expsym_cmds" | $SED "$delay_single_quote_subst"`' +with_gnu_ld='`$ECHO "$with_gnu_ld" | $SED "$delay_single_quote_subst"`' +allow_undefined_flag='`$ECHO "$allow_undefined_flag" | $SED "$delay_single_quote_subst"`' +no_undefined_flag='`$ECHO "$no_undefined_flag" | $SED "$delay_single_quote_subst"`' +hardcode_libdir_flag_spec='`$ECHO "$hardcode_libdir_flag_spec" | $SED "$delay_single_quote_subst"`' +hardcode_libdir_separator='`$ECHO "$hardcode_libdir_separator" | $SED "$delay_single_quote_subst"`' +hardcode_direct='`$ECHO "$hardcode_direct" | $SED "$delay_single_quote_subst"`' +hardcode_direct_absolute='`$ECHO "$hardcode_direct_absolute" | $SED "$delay_single_quote_subst"`' +hardcode_minus_L='`$ECHO "$hardcode_minus_L" | $SED "$delay_single_quote_subst"`' +hardcode_shlibpath_var='`$ECHO "$hardcode_shlibpath_var" | $SED "$delay_single_quote_subst"`' +hardcode_automatic='`$ECHO "$hardcode_automatic" | $SED "$delay_single_quote_subst"`' +inherit_rpath='`$ECHO "$inherit_rpath" | $SED "$delay_single_quote_subst"`' +link_all_deplibs='`$ECHO "$link_all_deplibs" | $SED "$delay_single_quote_subst"`' +always_export_symbols='`$ECHO "$always_export_symbols" | $SED "$delay_single_quote_subst"`' +export_symbols_cmds='`$ECHO "$export_symbols_cmds" | $SED "$delay_single_quote_subst"`' +exclude_expsyms='`$ECHO "$exclude_expsyms" | $SED "$delay_single_quote_subst"`' +include_expsyms='`$ECHO "$include_expsyms" | $SED "$delay_single_quote_subst"`' +prelink_cmds='`$ECHO "$prelink_cmds" | $SED "$delay_single_quote_subst"`' +postlink_cmds='`$ECHO "$postlink_cmds" | $SED "$delay_single_quote_subst"`' +file_list_spec='`$ECHO "$file_list_spec" | $SED "$delay_single_quote_subst"`' +variables_saved_for_relink='`$ECHO "$variables_saved_for_relink" | $SED "$delay_single_quote_subst"`' +need_lib_prefix='`$ECHO "$need_lib_prefix" | $SED "$delay_single_quote_subst"`' +need_version='`$ECHO "$need_version" | $SED "$delay_single_quote_subst"`' +version_type='`$ECHO "$version_type" | $SED "$delay_single_quote_subst"`' +runpath_var='`$ECHO "$runpath_var" | $SED "$delay_single_quote_subst"`' +shlibpath_var='`$ECHO "$shlibpath_var" | $SED "$delay_single_quote_subst"`' +shlibpath_overrides_runpath='`$ECHO "$shlibpath_overrides_runpath" | $SED "$delay_single_quote_subst"`' +libname_spec='`$ECHO "$libname_spec" | $SED "$delay_single_quote_subst"`' +library_names_spec='`$ECHO "$library_names_spec" | $SED "$delay_single_quote_subst"`' +soname_spec='`$ECHO "$soname_spec" | $SED "$delay_single_quote_subst"`' +install_override_mode='`$ECHO "$install_override_mode" | $SED "$delay_single_quote_subst"`' +postinstall_cmds='`$ECHO "$postinstall_cmds" | $SED "$delay_single_quote_subst"`' +postuninstall_cmds='`$ECHO "$postuninstall_cmds" | $SED "$delay_single_quote_subst"`' +finish_cmds='`$ECHO "$finish_cmds" | $SED "$delay_single_quote_subst"`' +finish_eval='`$ECHO "$finish_eval" | $SED "$delay_single_quote_subst"`' +hardcode_into_libs='`$ECHO "$hardcode_into_libs" | $SED "$delay_single_quote_subst"`' +sys_lib_search_path_spec='`$ECHO "$sys_lib_search_path_spec" | $SED "$delay_single_quote_subst"`' +configure_time_dlsearch_path='`$ECHO "$configure_time_dlsearch_path" | $SED "$delay_single_quote_subst"`' +configure_time_lt_sys_library_path='`$ECHO "$configure_time_lt_sys_library_path" | $SED "$delay_single_quote_subst"`' +hardcode_action='`$ECHO "$hardcode_action" | $SED "$delay_single_quote_subst"`' +enable_dlopen='`$ECHO "$enable_dlopen" | $SED "$delay_single_quote_subst"`' +enable_dlopen_self='`$ECHO "$enable_dlopen_self" | $SED "$delay_single_quote_subst"`' +enable_dlopen_self_static='`$ECHO "$enable_dlopen_self_static" | $SED "$delay_single_quote_subst"`' +old_striplib='`$ECHO "$old_striplib" | $SED "$delay_single_quote_subst"`' +striplib='`$ECHO "$striplib" | $SED "$delay_single_quote_subst"`' +compiler_lib_search_dirs='`$ECHO "$compiler_lib_search_dirs" | $SED "$delay_single_quote_subst"`' +predep_objects='`$ECHO "$predep_objects" | $SED "$delay_single_quote_subst"`' +postdep_objects='`$ECHO "$postdep_objects" | $SED "$delay_single_quote_subst"`' +predeps='`$ECHO "$predeps" | $SED "$delay_single_quote_subst"`' +postdeps='`$ECHO "$postdeps" | $SED "$delay_single_quote_subst"`' +compiler_lib_search_path='`$ECHO "$compiler_lib_search_path" | $SED "$delay_single_quote_subst"`' +LD_CXX='`$ECHO "$LD_CXX" | $SED "$delay_single_quote_subst"`' +LD_F77='`$ECHO "$LD_F77" | $SED "$delay_single_quote_subst"`' +LD_FC='`$ECHO "$LD_FC" | $SED "$delay_single_quote_subst"`' +reload_flag_CXX='`$ECHO "$reload_flag_CXX" | $SED "$delay_single_quote_subst"`' +reload_flag_F77='`$ECHO "$reload_flag_F77" | $SED "$delay_single_quote_subst"`' +reload_flag_FC='`$ECHO "$reload_flag_FC" | $SED "$delay_single_quote_subst"`' +reload_cmds_CXX='`$ECHO "$reload_cmds_CXX" | $SED "$delay_single_quote_subst"`' +reload_cmds_F77='`$ECHO "$reload_cmds_F77" | $SED "$delay_single_quote_subst"`' +reload_cmds_FC='`$ECHO "$reload_cmds_FC" | $SED "$delay_single_quote_subst"`' +old_archive_cmds_CXX='`$ECHO "$old_archive_cmds_CXX" | $SED "$delay_single_quote_subst"`' +old_archive_cmds_F77='`$ECHO "$old_archive_cmds_F77" | $SED "$delay_single_quote_subst"`' +old_archive_cmds_FC='`$ECHO "$old_archive_cmds_FC" | $SED "$delay_single_quote_subst"`' +compiler_CXX='`$ECHO "$compiler_CXX" | $SED "$delay_single_quote_subst"`' +compiler_F77='`$ECHO "$compiler_F77" | $SED "$delay_single_quote_subst"`' +compiler_FC='`$ECHO "$compiler_FC" | $SED "$delay_single_quote_subst"`' +GCC_CXX='`$ECHO "$GCC_CXX" | $SED "$delay_single_quote_subst"`' +GCC_F77='`$ECHO "$GCC_F77" | $SED "$delay_single_quote_subst"`' +GCC_FC='`$ECHO "$GCC_FC" | $SED "$delay_single_quote_subst"`' +lt_prog_compiler_no_builtin_flag_CXX='`$ECHO "$lt_prog_compiler_no_builtin_flag_CXX" | $SED "$delay_single_quote_subst"`' +lt_prog_compiler_no_builtin_flag_F77='`$ECHO "$lt_prog_compiler_no_builtin_flag_F77" | $SED "$delay_single_quote_subst"`' +lt_prog_compiler_no_builtin_flag_FC='`$ECHO "$lt_prog_compiler_no_builtin_flag_FC" | $SED "$delay_single_quote_subst"`' +lt_prog_compiler_pic_CXX='`$ECHO "$lt_prog_compiler_pic_CXX" | $SED "$delay_single_quote_subst"`' +lt_prog_compiler_pic_F77='`$ECHO "$lt_prog_compiler_pic_F77" | $SED "$delay_single_quote_subst"`' +lt_prog_compiler_pic_FC='`$ECHO "$lt_prog_compiler_pic_FC" | $SED "$delay_single_quote_subst"`' +lt_prog_compiler_wl_CXX='`$ECHO "$lt_prog_compiler_wl_CXX" | $SED "$delay_single_quote_subst"`' +lt_prog_compiler_wl_F77='`$ECHO "$lt_prog_compiler_wl_F77" | $SED "$delay_single_quote_subst"`' +lt_prog_compiler_wl_FC='`$ECHO "$lt_prog_compiler_wl_FC" | $SED "$delay_single_quote_subst"`' +lt_prog_compiler_static_CXX='`$ECHO "$lt_prog_compiler_static_CXX" | $SED "$delay_single_quote_subst"`' +lt_prog_compiler_static_F77='`$ECHO "$lt_prog_compiler_static_F77" | $SED "$delay_single_quote_subst"`' +lt_prog_compiler_static_FC='`$ECHO "$lt_prog_compiler_static_FC" | $SED "$delay_single_quote_subst"`' +lt_cv_prog_compiler_c_o_CXX='`$ECHO "$lt_cv_prog_compiler_c_o_CXX" | $SED "$delay_single_quote_subst"`' +lt_cv_prog_compiler_c_o_F77='`$ECHO "$lt_cv_prog_compiler_c_o_F77" | $SED "$delay_single_quote_subst"`' +lt_cv_prog_compiler_c_o_FC='`$ECHO "$lt_cv_prog_compiler_c_o_FC" | $SED "$delay_single_quote_subst"`' +archive_cmds_need_lc_CXX='`$ECHO "$archive_cmds_need_lc_CXX" | $SED "$delay_single_quote_subst"`' +archive_cmds_need_lc_F77='`$ECHO "$archive_cmds_need_lc_F77" | $SED "$delay_single_quote_subst"`' +archive_cmds_need_lc_FC='`$ECHO "$archive_cmds_need_lc_FC" | $SED "$delay_single_quote_subst"`' +enable_shared_with_static_runtimes_CXX='`$ECHO "$enable_shared_with_static_runtimes_CXX" | $SED "$delay_single_quote_subst"`' +enable_shared_with_static_runtimes_F77='`$ECHO "$enable_shared_with_static_runtimes_F77" | $SED "$delay_single_quote_subst"`' +enable_shared_with_static_runtimes_FC='`$ECHO "$enable_shared_with_static_runtimes_FC" | $SED "$delay_single_quote_subst"`' +export_dynamic_flag_spec_CXX='`$ECHO "$export_dynamic_flag_spec_CXX" | $SED "$delay_single_quote_subst"`' +export_dynamic_flag_spec_F77='`$ECHO "$export_dynamic_flag_spec_F77" | $SED "$delay_single_quote_subst"`' +export_dynamic_flag_spec_FC='`$ECHO "$export_dynamic_flag_spec_FC" | $SED "$delay_single_quote_subst"`' +whole_archive_flag_spec_CXX='`$ECHO "$whole_archive_flag_spec_CXX" | $SED "$delay_single_quote_subst"`' +whole_archive_flag_spec_F77='`$ECHO "$whole_archive_flag_spec_F77" | $SED "$delay_single_quote_subst"`' +whole_archive_flag_spec_FC='`$ECHO "$whole_archive_flag_spec_FC" | $SED "$delay_single_quote_subst"`' +compiler_needs_object_CXX='`$ECHO "$compiler_needs_object_CXX" | $SED "$delay_single_quote_subst"`' +compiler_needs_object_F77='`$ECHO "$compiler_needs_object_F77" | $SED "$delay_single_quote_subst"`' +compiler_needs_object_FC='`$ECHO "$compiler_needs_object_FC" | $SED "$delay_single_quote_subst"`' +old_archive_from_new_cmds_CXX='`$ECHO "$old_archive_from_new_cmds_CXX" | $SED "$delay_single_quote_subst"`' +old_archive_from_new_cmds_F77='`$ECHO "$old_archive_from_new_cmds_F77" | $SED "$delay_single_quote_subst"`' +old_archive_from_new_cmds_FC='`$ECHO "$old_archive_from_new_cmds_FC" | $SED "$delay_single_quote_subst"`' +old_archive_from_expsyms_cmds_CXX='`$ECHO "$old_archive_from_expsyms_cmds_CXX" | $SED "$delay_single_quote_subst"`' +old_archive_from_expsyms_cmds_F77='`$ECHO "$old_archive_from_expsyms_cmds_F77" | $SED "$delay_single_quote_subst"`' +old_archive_from_expsyms_cmds_FC='`$ECHO "$old_archive_from_expsyms_cmds_FC" | $SED "$delay_single_quote_subst"`' +archive_cmds_CXX='`$ECHO "$archive_cmds_CXX" | $SED "$delay_single_quote_subst"`' +archive_cmds_F77='`$ECHO "$archive_cmds_F77" | $SED "$delay_single_quote_subst"`' +archive_cmds_FC='`$ECHO "$archive_cmds_FC" | $SED "$delay_single_quote_subst"`' +archive_expsym_cmds_CXX='`$ECHO "$archive_expsym_cmds_CXX" | $SED "$delay_single_quote_subst"`' +archive_expsym_cmds_F77='`$ECHO "$archive_expsym_cmds_F77" | $SED "$delay_single_quote_subst"`' +archive_expsym_cmds_FC='`$ECHO "$archive_expsym_cmds_FC" | $SED "$delay_single_quote_subst"`' +module_cmds_CXX='`$ECHO "$module_cmds_CXX" | $SED "$delay_single_quote_subst"`' +module_cmds_F77='`$ECHO "$module_cmds_F77" | $SED "$delay_single_quote_subst"`' +module_cmds_FC='`$ECHO "$module_cmds_FC" | $SED "$delay_single_quote_subst"`' +module_expsym_cmds_CXX='`$ECHO "$module_expsym_cmds_CXX" | $SED "$delay_single_quote_subst"`' +module_expsym_cmds_F77='`$ECHO "$module_expsym_cmds_F77" | $SED "$delay_single_quote_subst"`' +module_expsym_cmds_FC='`$ECHO "$module_expsym_cmds_FC" | $SED "$delay_single_quote_subst"`' +with_gnu_ld_CXX='`$ECHO "$with_gnu_ld_CXX" | $SED "$delay_single_quote_subst"`' +with_gnu_ld_F77='`$ECHO "$with_gnu_ld_F77" | $SED "$delay_single_quote_subst"`' +with_gnu_ld_FC='`$ECHO "$with_gnu_ld_FC" | $SED "$delay_single_quote_subst"`' +allow_undefined_flag_CXX='`$ECHO "$allow_undefined_flag_CXX" | $SED "$delay_single_quote_subst"`' +allow_undefined_flag_F77='`$ECHO "$allow_undefined_flag_F77" | $SED "$delay_single_quote_subst"`' +allow_undefined_flag_FC='`$ECHO "$allow_undefined_flag_FC" | $SED "$delay_single_quote_subst"`' +no_undefined_flag_CXX='`$ECHO "$no_undefined_flag_CXX" | $SED "$delay_single_quote_subst"`' +no_undefined_flag_F77='`$ECHO "$no_undefined_flag_F77" | $SED "$delay_single_quote_subst"`' +no_undefined_flag_FC='`$ECHO "$no_undefined_flag_FC" | $SED "$delay_single_quote_subst"`' +hardcode_libdir_flag_spec_CXX='`$ECHO "$hardcode_libdir_flag_spec_CXX" | $SED "$delay_single_quote_subst"`' +hardcode_libdir_flag_spec_F77='`$ECHO "$hardcode_libdir_flag_spec_F77" | $SED "$delay_single_quote_subst"`' +hardcode_libdir_flag_spec_FC='`$ECHO "$hardcode_libdir_flag_spec_FC" | $SED "$delay_single_quote_subst"`' +hardcode_libdir_separator_CXX='`$ECHO "$hardcode_libdir_separator_CXX" | $SED "$delay_single_quote_subst"`' +hardcode_libdir_separator_F77='`$ECHO "$hardcode_libdir_separator_F77" | $SED "$delay_single_quote_subst"`' +hardcode_libdir_separator_FC='`$ECHO "$hardcode_libdir_separator_FC" | $SED "$delay_single_quote_subst"`' +hardcode_direct_CXX='`$ECHO "$hardcode_direct_CXX" | $SED "$delay_single_quote_subst"`' +hardcode_direct_F77='`$ECHO "$hardcode_direct_F77" | $SED "$delay_single_quote_subst"`' +hardcode_direct_FC='`$ECHO "$hardcode_direct_FC" | $SED "$delay_single_quote_subst"`' +hardcode_direct_absolute_CXX='`$ECHO "$hardcode_direct_absolute_CXX" | $SED "$delay_single_quote_subst"`' +hardcode_direct_absolute_F77='`$ECHO "$hardcode_direct_absolute_F77" | $SED "$delay_single_quote_subst"`' +hardcode_direct_absolute_FC='`$ECHO "$hardcode_direct_absolute_FC" | $SED "$delay_single_quote_subst"`' +hardcode_minus_L_CXX='`$ECHO "$hardcode_minus_L_CXX" | $SED "$delay_single_quote_subst"`' +hardcode_minus_L_F77='`$ECHO "$hardcode_minus_L_F77" | $SED "$delay_single_quote_subst"`' +hardcode_minus_L_FC='`$ECHO "$hardcode_minus_L_FC" | $SED "$delay_single_quote_subst"`' +hardcode_shlibpath_var_CXX='`$ECHO "$hardcode_shlibpath_var_CXX" | $SED "$delay_single_quote_subst"`' +hardcode_shlibpath_var_F77='`$ECHO "$hardcode_shlibpath_var_F77" | $SED "$delay_single_quote_subst"`' +hardcode_shlibpath_var_FC='`$ECHO "$hardcode_shlibpath_var_FC" | $SED "$delay_single_quote_subst"`' +hardcode_automatic_CXX='`$ECHO "$hardcode_automatic_CXX" | $SED "$delay_single_quote_subst"`' +hardcode_automatic_F77='`$ECHO "$hardcode_automatic_F77" | $SED "$delay_single_quote_subst"`' +hardcode_automatic_FC='`$ECHO "$hardcode_automatic_FC" | $SED "$delay_single_quote_subst"`' +inherit_rpath_CXX='`$ECHO "$inherit_rpath_CXX" | $SED "$delay_single_quote_subst"`' +inherit_rpath_F77='`$ECHO "$inherit_rpath_F77" | $SED "$delay_single_quote_subst"`' +inherit_rpath_FC='`$ECHO "$inherit_rpath_FC" | $SED "$delay_single_quote_subst"`' +link_all_deplibs_CXX='`$ECHO "$link_all_deplibs_CXX" | $SED "$delay_single_quote_subst"`' +link_all_deplibs_F77='`$ECHO "$link_all_deplibs_F77" | $SED "$delay_single_quote_subst"`' +link_all_deplibs_FC='`$ECHO "$link_all_deplibs_FC" | $SED "$delay_single_quote_subst"`' +always_export_symbols_CXX='`$ECHO "$always_export_symbols_CXX" | $SED "$delay_single_quote_subst"`' +always_export_symbols_F77='`$ECHO "$always_export_symbols_F77" | $SED "$delay_single_quote_subst"`' +always_export_symbols_FC='`$ECHO "$always_export_symbols_FC" | $SED "$delay_single_quote_subst"`' +export_symbols_cmds_CXX='`$ECHO "$export_symbols_cmds_CXX" | $SED "$delay_single_quote_subst"`' +export_symbols_cmds_F77='`$ECHO "$export_symbols_cmds_F77" | $SED "$delay_single_quote_subst"`' +export_symbols_cmds_FC='`$ECHO "$export_symbols_cmds_FC" | $SED "$delay_single_quote_subst"`' +exclude_expsyms_CXX='`$ECHO "$exclude_expsyms_CXX" | $SED "$delay_single_quote_subst"`' +exclude_expsyms_F77='`$ECHO "$exclude_expsyms_F77" | $SED "$delay_single_quote_subst"`' +exclude_expsyms_FC='`$ECHO "$exclude_expsyms_FC" | $SED "$delay_single_quote_subst"`' +include_expsyms_CXX='`$ECHO "$include_expsyms_CXX" | $SED "$delay_single_quote_subst"`' +include_expsyms_F77='`$ECHO "$include_expsyms_F77" | $SED "$delay_single_quote_subst"`' +include_expsyms_FC='`$ECHO "$include_expsyms_FC" | $SED "$delay_single_quote_subst"`' +prelink_cmds_CXX='`$ECHO "$prelink_cmds_CXX" | $SED "$delay_single_quote_subst"`' +prelink_cmds_F77='`$ECHO "$prelink_cmds_F77" | $SED "$delay_single_quote_subst"`' +prelink_cmds_FC='`$ECHO "$prelink_cmds_FC" | $SED "$delay_single_quote_subst"`' +postlink_cmds_CXX='`$ECHO "$postlink_cmds_CXX" | $SED "$delay_single_quote_subst"`' +postlink_cmds_F77='`$ECHO "$postlink_cmds_F77" | $SED "$delay_single_quote_subst"`' +postlink_cmds_FC='`$ECHO "$postlink_cmds_FC" | $SED "$delay_single_quote_subst"`' +file_list_spec_CXX='`$ECHO "$file_list_spec_CXX" | $SED "$delay_single_quote_subst"`' +file_list_spec_F77='`$ECHO "$file_list_spec_F77" | $SED "$delay_single_quote_subst"`' +file_list_spec_FC='`$ECHO "$file_list_spec_FC" | $SED "$delay_single_quote_subst"`' +hardcode_action_CXX='`$ECHO "$hardcode_action_CXX" | $SED "$delay_single_quote_subst"`' +hardcode_action_F77='`$ECHO "$hardcode_action_F77" | $SED "$delay_single_quote_subst"`' +hardcode_action_FC='`$ECHO "$hardcode_action_FC" | $SED "$delay_single_quote_subst"`' +compiler_lib_search_dirs_CXX='`$ECHO "$compiler_lib_search_dirs_CXX" | $SED "$delay_single_quote_subst"`' +compiler_lib_search_dirs_F77='`$ECHO "$compiler_lib_search_dirs_F77" | $SED "$delay_single_quote_subst"`' +compiler_lib_search_dirs_FC='`$ECHO "$compiler_lib_search_dirs_FC" | $SED "$delay_single_quote_subst"`' +predep_objects_CXX='`$ECHO "$predep_objects_CXX" | $SED "$delay_single_quote_subst"`' +predep_objects_F77='`$ECHO "$predep_objects_F77" | $SED "$delay_single_quote_subst"`' +predep_objects_FC='`$ECHO "$predep_objects_FC" | $SED "$delay_single_quote_subst"`' +postdep_objects_CXX='`$ECHO "$postdep_objects_CXX" | $SED "$delay_single_quote_subst"`' +postdep_objects_F77='`$ECHO "$postdep_objects_F77" | $SED "$delay_single_quote_subst"`' +postdep_objects_FC='`$ECHO "$postdep_objects_FC" | $SED "$delay_single_quote_subst"`' +predeps_CXX='`$ECHO "$predeps_CXX" | $SED "$delay_single_quote_subst"`' +predeps_F77='`$ECHO "$predeps_F77" | $SED "$delay_single_quote_subst"`' +predeps_FC='`$ECHO "$predeps_FC" | $SED "$delay_single_quote_subst"`' +postdeps_CXX='`$ECHO "$postdeps_CXX" | $SED "$delay_single_quote_subst"`' +postdeps_F77='`$ECHO "$postdeps_F77" | $SED "$delay_single_quote_subst"`' +postdeps_FC='`$ECHO "$postdeps_FC" | $SED "$delay_single_quote_subst"`' +compiler_lib_search_path_CXX='`$ECHO "$compiler_lib_search_path_CXX" | $SED "$delay_single_quote_subst"`' +compiler_lib_search_path_F77='`$ECHO "$compiler_lib_search_path_F77" | $SED "$delay_single_quote_subst"`' +compiler_lib_search_path_FC='`$ECHO "$compiler_lib_search_path_FC" | $SED "$delay_single_quote_subst"`' + +LTCC='$LTCC' +LTCFLAGS='$LTCFLAGS' +compiler='$compiler_DEFAULT' + +# A function that is used when there is no print builtin or printf. +func_fallback_echo () +{ + eval 'cat <<_LTECHO_EOF +\$1 +_LTECHO_EOF' +} + +# Quote evaled strings. +for var in AS \ +DLLTOOL \ +OBJDUMP \ +SHELL \ +ECHO \ +PATH_SEPARATOR \ +SED \ +GREP \ +EGREP \ +FGREP \ +LD \ +NM \ +LN_S \ +lt_SP2NL \ +lt_NL2SP \ +reload_flag \ +FILECMD \ +deplibs_check_method \ +file_magic_cmd \ +file_magic_glob \ +want_nocaseglob \ +sharedlib_from_linklib_cmd \ +AR \ +archiver_list_spec \ +STRIP \ +RANLIB \ +CC \ +CFLAGS \ +compiler \ +lt_cv_sys_global_symbol_pipe \ +lt_cv_sys_global_symbol_to_cdecl \ +lt_cv_sys_global_symbol_to_import \ +lt_cv_sys_global_symbol_to_c_name_address \ +lt_cv_sys_global_symbol_to_c_name_address_lib_prefix \ +lt_cv_nm_interface \ +nm_file_list_spec \ +lt_cv_truncate_bin \ +lt_prog_compiler_no_builtin_flag \ +lt_prog_compiler_pic \ +lt_prog_compiler_wl \ +lt_prog_compiler_static \ +lt_cv_prog_compiler_c_o \ +need_locks \ +MANIFEST_TOOL \ +DSYMUTIL \ +NMEDIT \ +LIPO \ +OTOOL \ +OTOOL64 \ +shrext_cmds \ +export_dynamic_flag_spec \ +whole_archive_flag_spec \ +compiler_needs_object \ +with_gnu_ld \ +allow_undefined_flag \ +no_undefined_flag \ +hardcode_libdir_flag_spec \ +hardcode_libdir_separator \ +exclude_expsyms \ +include_expsyms \ +file_list_spec \ +variables_saved_for_relink \ +libname_spec \ +library_names_spec \ +soname_spec \ +install_override_mode \ +finish_eval \ +old_striplib \ +striplib \ +compiler_lib_search_dirs \ +predep_objects \ +postdep_objects \ +predeps \ +postdeps \ +compiler_lib_search_path \ +LD_CXX \ +LD_F77 \ +LD_FC \ +reload_flag_CXX \ +reload_flag_F77 \ +reload_flag_FC \ +compiler_CXX \ +compiler_F77 \ +compiler_FC \ +lt_prog_compiler_no_builtin_flag_CXX \ +lt_prog_compiler_no_builtin_flag_F77 \ +lt_prog_compiler_no_builtin_flag_FC \ +lt_prog_compiler_pic_CXX \ +lt_prog_compiler_pic_F77 \ +lt_prog_compiler_pic_FC \ +lt_prog_compiler_wl_CXX \ +lt_prog_compiler_wl_F77 \ +lt_prog_compiler_wl_FC \ +lt_prog_compiler_static_CXX \ +lt_prog_compiler_static_F77 \ +lt_prog_compiler_static_FC \ +lt_cv_prog_compiler_c_o_CXX \ +lt_cv_prog_compiler_c_o_F77 \ +lt_cv_prog_compiler_c_o_FC \ +export_dynamic_flag_spec_CXX \ +export_dynamic_flag_spec_F77 \ +export_dynamic_flag_spec_FC \ +whole_archive_flag_spec_CXX \ +whole_archive_flag_spec_F77 \ +whole_archive_flag_spec_FC \ +compiler_needs_object_CXX \ +compiler_needs_object_F77 \ +compiler_needs_object_FC \ +with_gnu_ld_CXX \ +with_gnu_ld_F77 \ +with_gnu_ld_FC \ +allow_undefined_flag_CXX \ +allow_undefined_flag_F77 \ +allow_undefined_flag_FC \ +no_undefined_flag_CXX \ +no_undefined_flag_F77 \ +no_undefined_flag_FC \ +hardcode_libdir_flag_spec_CXX \ +hardcode_libdir_flag_spec_F77 \ +hardcode_libdir_flag_spec_FC \ +hardcode_libdir_separator_CXX \ +hardcode_libdir_separator_F77 \ +hardcode_libdir_separator_FC \ +exclude_expsyms_CXX \ +exclude_expsyms_F77 \ +exclude_expsyms_FC \ +include_expsyms_CXX \ +include_expsyms_F77 \ +include_expsyms_FC \ +file_list_spec_CXX \ +file_list_spec_F77 \ +file_list_spec_FC \ +compiler_lib_search_dirs_CXX \ +compiler_lib_search_dirs_F77 \ +compiler_lib_search_dirs_FC \ +predep_objects_CXX \ +predep_objects_F77 \ +predep_objects_FC \ +postdep_objects_CXX \ +postdep_objects_F77 \ +postdep_objects_FC \ +predeps_CXX \ +predeps_F77 \ +predeps_FC \ +postdeps_CXX \ +postdeps_F77 \ +postdeps_FC \ +compiler_lib_search_path_CXX \ +compiler_lib_search_path_F77 \ +compiler_lib_search_path_FC; do + case \`eval \\\\\$ECHO \\\\""\\\\\$\$var"\\\\"\` in + *[\\\\\\\`\\"\\\$]*) + eval "lt_\$var=\\\\\\"\\\`\\\$ECHO \\"\\\$\$var\\" | \\\$SED \\"\\\$sed_quote_subst\\"\\\`\\\\\\"" ## exclude from sc_prohibit_nested_quotes + ;; + *) + eval "lt_\$var=\\\\\\"\\\$\$var\\\\\\"" + ;; + esac +done + +# Double-quote double-evaled strings. +for var in reload_cmds \ +old_postinstall_cmds \ +old_postuninstall_cmds \ +old_archive_cmds \ +extract_expsyms_cmds \ +old_archive_from_new_cmds \ +old_archive_from_expsyms_cmds \ +archive_cmds \ +archive_expsym_cmds \ +module_cmds \ +module_expsym_cmds \ +export_symbols_cmds \ +prelink_cmds \ +postlink_cmds \ +postinstall_cmds \ +postuninstall_cmds \ +finish_cmds \ +sys_lib_search_path_spec \ +configure_time_dlsearch_path \ +configure_time_lt_sys_library_path \ +reload_cmds_CXX \ +reload_cmds_F77 \ +reload_cmds_FC \ +old_archive_cmds_CXX \ +old_archive_cmds_F77 \ +old_archive_cmds_FC \ +old_archive_from_new_cmds_CXX \ +old_archive_from_new_cmds_F77 \ +old_archive_from_new_cmds_FC \ +old_archive_from_expsyms_cmds_CXX \ +old_archive_from_expsyms_cmds_F77 \ +old_archive_from_expsyms_cmds_FC \ +archive_cmds_CXX \ +archive_cmds_F77 \ +archive_cmds_FC \ +archive_expsym_cmds_CXX \ +archive_expsym_cmds_F77 \ +archive_expsym_cmds_FC \ +module_cmds_CXX \ +module_cmds_F77 \ +module_cmds_FC \ +module_expsym_cmds_CXX \ +module_expsym_cmds_F77 \ +module_expsym_cmds_FC \ +export_symbols_cmds_CXX \ +export_symbols_cmds_F77 \ +export_symbols_cmds_FC \ +prelink_cmds_CXX \ +prelink_cmds_F77 \ +prelink_cmds_FC \ +postlink_cmds_CXX \ +postlink_cmds_F77 \ +postlink_cmds_FC; do + case \`eval \\\\\$ECHO \\\\""\\\\\$\$var"\\\\"\` in + *[\\\\\\\`\\"\\\$]*) + eval "lt_\$var=\\\\\\"\\\`\\\$ECHO \\"\\\$\$var\\" | \\\$SED -e \\"\\\$double_quote_subst\\" -e \\"\\\$sed_quote_subst\\" -e \\"\\\$delay_variable_subst\\"\\\`\\\\\\"" ## exclude from sc_prohibit_nested_quotes + ;; + *) + eval "lt_\$var=\\\\\\"\\\$\$var\\\\\\"" + ;; + esac +done + +ac_aux_dir='$ac_aux_dir' + +# See if we are running on zsh, and set the options that allow our +# commands through without removal of \ escapes INIT. +if test -n "\${ZSH_VERSION+set}"; then + setopt NO_GLOB_SUBST +fi + + + PACKAGE='$PACKAGE' + VERSION='$VERSION' + RM='$RM' + ofile='$ofile' + + + + + + + + + + +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 + +# Handling of arguments. +for ac_config_target in $ac_config_targets +do + case $ac_config_target in + "depfiles") CONFIG_COMMANDS="$CONFIG_COMMANDS depfiles" ;; + "libtool") CONFIG_COMMANDS="$CONFIG_COMMANDS libtool" ;; + "executable-scripts") CONFIG_COMMANDS="$CONFIG_COMMANDS executable-scripts" ;; + "socl/vendors/$new_icd") CONFIG_LINKS="$CONFIG_LINKS socl/vendors/$new_icd:$icd" ;; + "tests/regression/regression.sh") CONFIG_FILES="$CONFIG_FILES tests/regression/regression.sh" ;; + "tests/regression/profiles") CONFIG_FILES="$CONFIG_FILES tests/regression/profiles" ;; + "tests/regression/profiles.build.only") CONFIG_FILES="$CONFIG_FILES tests/regression/profiles.build.only" ;; + "src/common/config.h") CONFIG_HEADERS="$CONFIG_HEADERS src/common/config.h" ;; + "src/common/config-src-build.h") CONFIG_HEADERS="$CONFIG_HEADERS src/common/config-src-build.h" ;; + "include/starpu_config.h") CONFIG_HEADERS="$CONFIG_HEADERS include/starpu_config.h" ;; + "starpurm/include/starpurm_config.h") CONFIG_HEADERS="$CONFIG_HEADERS starpurm/include/starpurm_config.h" ;; + "Makefile") CONFIG_FILES="$CONFIG_FILES Makefile" ;; + "src/Makefile") CONFIG_FILES="$CONFIG_FILES src/Makefile" ;; + "tools/Makefile") CONFIG_FILES="$CONFIG_FILES tools/Makefile" ;; + "tools/starpu_env") CONFIG_FILES="$CONFIG_FILES tools/starpu_env" ;; + "tools/starpu_codelet_profile") CONFIG_FILES="$CONFIG_FILES tools/starpu_codelet_profile" ;; + "tools/starpu_codelet_histo_profile") CONFIG_FILES="$CONFIG_FILES tools/starpu_codelet_histo_profile" ;; + "tools/starpu_mpi_comm_matrix.py") CONFIG_FILES="$CONFIG_FILES tools/starpu_mpi_comm_matrix.py" ;; + "tools/starpu_fxt_number_events_to_names.py") CONFIG_FILES="$CONFIG_FILES tools/starpu_fxt_number_events_to_names.py" ;; + "tools/starpu_workers_activity") CONFIG_FILES="$CONFIG_FILES tools/starpu_workers_activity" ;; + "tools/starpu_paje_draw_histogram") CONFIG_FILES="$CONFIG_FILES tools/starpu_paje_draw_histogram" ;; + "tools/starpu_paje_state_stats") CONFIG_FILES="$CONFIG_FILES tools/starpu_paje_state_stats" ;; + "tools/starpu_paje_summary") CONFIG_FILES="$CONFIG_FILES tools/starpu_paje_summary" ;; + "tools/starpu_config") CONFIG_FILES="$CONFIG_FILES tools/starpu_config" ;; + "tools/starpu_mlr_analysis") CONFIG_FILES="$CONFIG_FILES tools/starpu_mlr_analysis" ;; + "tools/starpu_paje_sort") CONFIG_FILES="$CONFIG_FILES tools/starpu_paje_sort" ;; + "tools/starpu_smpirun") CONFIG_FILES="$CONFIG_FILES tools/starpu_smpirun" ;; + "tools/starpu_tcpipexec") CONFIG_FILES="$CONFIG_FILES tools/starpu_tcpipexec" ;; + "socl/Makefile") CONFIG_FILES="$CONFIG_FILES socl/Makefile" ;; + "socl/src/Makefile") CONFIG_FILES="$CONFIG_FILES socl/src/Makefile" ;; + "socl/examples/Makefile") CONFIG_FILES="$CONFIG_FILES socl/examples/Makefile" ;; + "socl/vendors/socl.icd") CONFIG_FILES="$CONFIG_FILES socl/vendors/socl.icd" ;; + "socl/vendors/install/socl.icd") CONFIG_FILES="$CONFIG_FILES socl/vendors/install/socl.icd" ;; + "packages/libstarpu.pc") CONFIG_FILES="$CONFIG_FILES packages/libstarpu.pc" ;; + "packages/starpu-1.0.pc") CONFIG_FILES="$CONFIG_FILES packages/starpu-1.0.pc" ;; + "packages/starpu-1.1.pc") CONFIG_FILES="$CONFIG_FILES packages/starpu-1.1.pc" ;; + "packages/starpu-1.2.pc") CONFIG_FILES="$CONFIG_FILES packages/starpu-1.2.pc" ;; + "packages/starpu-1.3.pc") CONFIG_FILES="$CONFIG_FILES packages/starpu-1.3.pc" ;; + "packages/starpu-1.4.pc") CONFIG_FILES="$CONFIG_FILES packages/starpu-1.4.pc" ;; + "packages/starpu-1.3") CONFIG_FILES="$CONFIG_FILES packages/starpu-1.3" ;; + "packages/starpu-1.4") CONFIG_FILES="$CONFIG_FILES packages/starpu-1.4" ;; + "mpi/packages/libstarpumpi.pc") CONFIG_FILES="$CONFIG_FILES mpi/packages/libstarpumpi.pc" ;; + "mpi/packages/starpumpi-1.0.pc") CONFIG_FILES="$CONFIG_FILES mpi/packages/starpumpi-1.0.pc" ;; + "mpi/packages/starpumpi-1.1.pc") CONFIG_FILES="$CONFIG_FILES mpi/packages/starpumpi-1.1.pc" ;; + "mpi/packages/starpumpi-1.2.pc") CONFIG_FILES="$CONFIG_FILES mpi/packages/starpumpi-1.2.pc" ;; + "mpi/packages/starpumpi-1.3.pc") CONFIG_FILES="$CONFIG_FILES mpi/packages/starpumpi-1.3.pc" ;; + "mpi/packages/starpumpi-1.4.pc") CONFIG_FILES="$CONFIG_FILES mpi/packages/starpumpi-1.4.pc" ;; + "starpufft/Makefile") CONFIG_FILES="$CONFIG_FILES starpufft/Makefile" ;; + "starpufft/src/Makefile") CONFIG_FILES="$CONFIG_FILES starpufft/src/Makefile" ;; + "starpufft/tests/Makefile") CONFIG_FILES="$CONFIG_FILES starpufft/tests/Makefile" ;; + "starpufft/packages/libstarpufft.pc") CONFIG_FILES="$CONFIG_FILES starpufft/packages/libstarpufft.pc" ;; + "starpufft/packages/starpufft-1.0.pc") CONFIG_FILES="$CONFIG_FILES starpufft/packages/starpufft-1.0.pc" ;; + "starpufft/packages/starpufft-1.1.pc") CONFIG_FILES="$CONFIG_FILES starpufft/packages/starpufft-1.1.pc" ;; + "starpufft/packages/starpufft-1.2.pc") CONFIG_FILES="$CONFIG_FILES starpufft/packages/starpufft-1.2.pc" ;; + "starpufft/packages/starpufft-1.3.pc") CONFIG_FILES="$CONFIG_FILES starpufft/packages/starpufft-1.3.pc" ;; + "starpufft/packages/starpufft-1.4.pc") CONFIG_FILES="$CONFIG_FILES starpufft/packages/starpufft-1.4.pc" ;; + "starpurm/Makefile") CONFIG_FILES="$CONFIG_FILES starpurm/Makefile" ;; + "starpurm/src/Makefile") CONFIG_FILES="$CONFIG_FILES starpurm/src/Makefile" ;; + "starpurm/tests/Makefile") CONFIG_FILES="$CONFIG_FILES starpurm/tests/Makefile" ;; + "starpurm/examples/Makefile") CONFIG_FILES="$CONFIG_FILES starpurm/examples/Makefile" ;; + "starpurm/packages/starpurm-1.3.pc") CONFIG_FILES="$CONFIG_FILES starpurm/packages/starpurm-1.3.pc" ;; + "starpurm/packages/starpurm-1.4.pc") CONFIG_FILES="$CONFIG_FILES starpurm/packages/starpurm-1.4.pc" ;; + "starpu_openmp_llvm/Makefile") CONFIG_FILES="$CONFIG_FILES starpu_openmp_llvm/Makefile" ;; + "starpu_openmp_llvm/src/Makefile") CONFIG_FILES="$CONFIG_FILES starpu_openmp_llvm/src/Makefile" ;; + "starpu_openmp_llvm/examples/Makefile") CONFIG_FILES="$CONFIG_FILES starpu_openmp_llvm/examples/Makefile" ;; + "starpupy/src/setup.cfg") CONFIG_FILES="$CONFIG_FILES starpupy/src/setup.cfg" ;; + "starpupy/src/setup.py") CONFIG_FILES="$CONFIG_FILES starpupy/src/setup.py" ;; + "starpupy/Makefile") CONFIG_FILES="$CONFIG_FILES starpupy/Makefile" ;; + "starpupy/src/Makefile") CONFIG_FILES="$CONFIG_FILES starpupy/src/Makefile" ;; + "starpupy/examples/Makefile") CONFIG_FILES="$CONFIG_FILES starpupy/examples/Makefile" ;; + "starpupy/execute.sh") CONFIG_FILES="$CONFIG_FILES starpupy/execute.sh" ;; + "starpupy/benchmark/Makefile") CONFIG_FILES="$CONFIG_FILES starpupy/benchmark/Makefile" ;; + "examples/Makefile") CONFIG_FILES="$CONFIG_FILES examples/Makefile" ;; + "examples/stencil/Makefile") CONFIG_FILES="$CONFIG_FILES examples/stencil/Makefile" ;; + "tests/Makefile") CONFIG_FILES="$CONFIG_FILES tests/Makefile" ;; + "tests/model-checking/Makefile") CONFIG_FILES="$CONFIG_FILES tests/model-checking/Makefile" ;; + "tests/model-checking/starpu-mc.sh") CONFIG_FILES="$CONFIG_FILES tests/model-checking/starpu-mc.sh" ;; + "mpi/Makefile") CONFIG_FILES="$CONFIG_FILES mpi/Makefile" ;; + "mpi/src/Makefile") CONFIG_FILES="$CONFIG_FILES mpi/src/Makefile" ;; + "mpi/tests/Makefile") CONFIG_FILES="$CONFIG_FILES mpi/tests/Makefile" ;; + "mpi/examples/Makefile") CONFIG_FILES="$CONFIG_FILES mpi/examples/Makefile" ;; + "mpi/tools/Makefile") CONFIG_FILES="$CONFIG_FILES mpi/tools/Makefile" ;; + "mpi/GNUmakefile") CONFIG_FILES="$CONFIG_FILES mpi/GNUmakefile" ;; + "sc_hypervisor/Makefile") CONFIG_FILES="$CONFIG_FILES sc_hypervisor/Makefile" ;; + "sc_hypervisor/src/Makefile") CONFIG_FILES="$CONFIG_FILES sc_hypervisor/src/Makefile" ;; + "sc_hypervisor/examples/Makefile") CONFIG_FILES="$CONFIG_FILES sc_hypervisor/examples/Makefile" ;; + "doc/Makefile") CONFIG_FILES="$CONFIG_FILES doc/Makefile" ;; + "doc/doxygen/Makefile") CONFIG_FILES="$CONFIG_FILES doc/doxygen/Makefile" ;; + "doc/doxygen/doxygen-config.cfg") CONFIG_FILES="$CONFIG_FILES doc/doxygen/doxygen-config.cfg" ;; + "doc/doxygen/doxygen-config-include.cfg") CONFIG_FILES="$CONFIG_FILES doc/doxygen/doxygen-config-include.cfg" ;; + "doc/doxygen/doxygen_filter.sh") CONFIG_FILES="$CONFIG_FILES doc/doxygen/doxygen_filter.sh" ;; + "doc/doxygen_dev/Makefile") CONFIG_FILES="$CONFIG_FILES doc/doxygen_dev/Makefile" ;; + "doc/doxygen_dev/doxygen-config.cfg") CONFIG_FILES="$CONFIG_FILES doc/doxygen_dev/doxygen-config.cfg" ;; + "doc/doxygen_dev/doxygen_filter.sh") CONFIG_FILES="$CONFIG_FILES doc/doxygen_dev/doxygen_filter.sh" ;; + "doc/doxygen_dev/doxygen-config-include.cfg") CONFIG_FILES="$CONFIG_FILES doc/doxygen_dev/doxygen-config-include.cfg" ;; + "doc/doxygen_web_introduction/Makefile") CONFIG_FILES="$CONFIG_FILES doc/doxygen_web_introduction/Makefile" ;; + "doc/doxygen_web_introduction/doxygen-config.cfg") CONFIG_FILES="$CONFIG_FILES doc/doxygen_web_introduction/doxygen-config.cfg" ;; + "doc/doxygen_web_installation/Makefile") CONFIG_FILES="$CONFIG_FILES doc/doxygen_web_installation/Makefile" ;; + "doc/doxygen_web_installation/doxygen-config.cfg") CONFIG_FILES="$CONFIG_FILES doc/doxygen_web_installation/doxygen-config.cfg" ;; + "doc/doxygen_web_basics/Makefile") CONFIG_FILES="$CONFIG_FILES doc/doxygen_web_basics/Makefile" ;; + "doc/doxygen_web_basics/doxygen-config.cfg") CONFIG_FILES="$CONFIG_FILES doc/doxygen_web_basics/doxygen-config.cfg" ;; + "doc/doxygen_web_applications/Makefile") CONFIG_FILES="$CONFIG_FILES doc/doxygen_web_applications/Makefile" ;; + "doc/doxygen_web_applications/doxygen-config.cfg") CONFIG_FILES="$CONFIG_FILES doc/doxygen_web_applications/doxygen-config.cfg" ;; + "doc/doxygen_web_performances/Makefile") CONFIG_FILES="$CONFIG_FILES doc/doxygen_web_performances/Makefile" ;; + "doc/doxygen_web_performances/doxygen-config.cfg") CONFIG_FILES="$CONFIG_FILES doc/doxygen_web_performances/doxygen-config.cfg" ;; + "doc/doxygen_web_faq/Makefile") CONFIG_FILES="$CONFIG_FILES doc/doxygen_web_faq/Makefile" ;; + "doc/doxygen_web_faq/doxygen-config.cfg") CONFIG_FILES="$CONFIG_FILES doc/doxygen_web_faq/doxygen-config.cfg" ;; + "doc/doxygen_web_languages/Makefile") CONFIG_FILES="$CONFIG_FILES doc/doxygen_web_languages/Makefile" ;; + "doc/doxygen_web_languages/doxygen-config.cfg") CONFIG_FILES="$CONFIG_FILES doc/doxygen_web_languages/doxygen-config.cfg" ;; + "doc/doxygen_web_extensions/Makefile") CONFIG_FILES="$CONFIG_FILES doc/doxygen_web_extensions/Makefile" ;; + "doc/doxygen_web_extensions/doxygen-config.cfg") CONFIG_FILES="$CONFIG_FILES doc/doxygen_web_extensions/doxygen-config.cfg" ;; + "tools/msvc/starpu_var.bat") CONFIG_FILES="$CONFIG_FILES tools/msvc/starpu_var.bat" ;; + "min-dgels/Makefile") CONFIG_FILES="$CONFIG_FILES min-dgels/Makefile" ;; + "bubble/Makefile") CONFIG_FILES="$CONFIG_FILES bubble/Makefile" ;; + "bubble/tests/Makefile") CONFIG_FILES="$CONFIG_FILES bubble/tests/Makefile" ;; + "julia/Makefile") CONFIG_FILES="$CONFIG_FILES julia/Makefile" ;; + "julia/src/Makefile") CONFIG_FILES="$CONFIG_FILES julia/src/Makefile" ;; + "julia/src/dynamic_compiler/Makefile") CONFIG_FILES="$CONFIG_FILES julia/src/dynamic_compiler/Makefile" ;; + "julia/examples/Makefile") CONFIG_FILES="$CONFIG_FILES julia/examples/Makefile" ;; + "julia/examples/execute.sh") CONFIG_FILES="$CONFIG_FILES julia/examples/execute.sh" ;; + "eclipse-plugin/Makefile") CONFIG_FILES="$CONFIG_FILES eclipse-plugin/Makefile" ;; + "eclipse-plugin/src/Makefile") CONFIG_FILES="$CONFIG_FILES eclipse-plugin/src/Makefile" ;; + "eclipse-plugin/examples/Makefile") CONFIG_FILES="$CONFIG_FILES eclipse-plugin/examples/Makefile" ;; + "eclipse-plugin/examples/hello/.cproject") CONFIG_FILES="$CONFIG_FILES eclipse-plugin/examples/hello/.cproject" ;; + + *) as_fn_error $? "invalid argument: \`$ac_config_target'" "$LINENO" 5;; + esac +done + + +# If the user did not use the arguments to specify the items to instantiate, +# then the envvar interface is used. Set only those that are not. +# We use the long form for the default assignment because of an extremely +# bizarre bug on SunOS 4.1.3. +if $ac_need_defaults; then + test ${CONFIG_FILES+y} || CONFIG_FILES=$config_files + test ${CONFIG_HEADERS+y} || CONFIG_HEADERS=$config_headers + test ${CONFIG_LINKS+y} || CONFIG_LINKS=$config_links + test ${CONFIG_COMMANDS+y} || CONFIG_COMMANDS=$config_commands +fi + +# Have a temporary directory for convenience. Make it in the build tree +# simply because there is no reason against having it here, and in addition, +# creating and moving files from /tmp can sometimes cause problems. +# Hook for its removal unless debugging. +# Note that there is a small window in which the directory will not be cleaned: +# after its creation but before its name has been assigned to `$tmp'. +$debug || +{ + tmp= ac_tmp= + trap 'exit_status=$? + : "${ac_tmp:=$tmp}" + { test ! -d "$ac_tmp" || rm -fr "$ac_tmp"; } && exit $exit_status +' 0 + trap 'as_fn_exit 1' 1 2 13 15 +} +# Create a (secure) tmp directory for tmp files. + +{ + tmp=`(umask 077 && mktemp -d "./confXXXXXX") 2>/dev/null` && + test -d "$tmp" +} || +{ + tmp=./conf$$-$RANDOM + (umask 077 && mkdir "$tmp") +} || as_fn_error $? "cannot create a temporary directory in ." "$LINENO" 5 +ac_tmp=$tmp + +# Set up the scripts for CONFIG_FILES section. +# No need to generate them if there are no CONFIG_FILES. +# This happens for instance with `./config.status config.h'. +if test -n "$CONFIG_FILES"; then + + +ac_cr=`echo X | tr X '\015'` +# On cygwin, bash can eat \r inside `` if the user requested igncr. +# But we know of no other shell where ac_cr would be empty at this +# point, so we can use a bashism as a fallback. +if test "x$ac_cr" = x; then + eval ac_cr=\$\'\\r\' +fi +ac_cs_awk_cr=`$AWK 'BEGIN { print "a\rb" }' /dev/null` +if test "$ac_cs_awk_cr" = "a${ac_cr}b"; then + ac_cs_awk_cr='\\r' +else + ac_cs_awk_cr=$ac_cr +fi + +echo 'BEGIN {' >"$ac_tmp/subs1.awk" && +_ACEOF + + +{ + echo "cat >conf$$subs.awk <<_ACEOF" && + echo "$ac_subst_vars" | sed 's/.*/&!$&$ac_delim/' && + echo "_ACEOF" +} >conf$$subs.sh || + as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5 +ac_delim_num=`echo "$ac_subst_vars" | grep -c '^'` +ac_delim='%!_!# ' +for ac_last_try in false false false false false :; do + . ./conf$$subs.sh || + as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5 + + ac_delim_n=`sed -n "s/.*$ac_delim\$/X/p" conf$$subs.awk | grep -c X` + if test $ac_delim_n = $ac_delim_num; then + break + elif $ac_last_try; then + as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5 + else + ac_delim="$ac_delim!$ac_delim _$ac_delim!! " + fi +done +rm -f conf$$subs.sh + +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +cat >>"\$ac_tmp/subs1.awk" <<\\_ACAWK && +_ACEOF +sed -n ' +h +s/^/S["/; s/!.*/"]=/ +p +g +s/^[^!]*!// +:repl +t repl +s/'"$ac_delim"'$// +t delim +:nl +h +s/\(.\{148\}\)..*/\1/ +t more1 +s/["\\]/\\&/g; s/^/"/; s/$/\\n"\\/ +p +n +b repl +:more1 +s/["\\]/\\&/g; s/^/"/; s/$/"\\/ +p +g +s/.\{148\}// +t nl +:delim +h +s/\(.\{148\}\)..*/\1/ +t more2 +s/["\\]/\\&/g; s/^/"/; s/$/"/ +p +b +:more2 +s/["\\]/\\&/g; s/^/"/; s/$/"\\/ +p +g +s/.\{148\}// +t delim +' >$CONFIG_STATUS || ac_write_fail=1 +rm -f conf$$subs.awk +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +_ACAWK +cat >>"\$ac_tmp/subs1.awk" <<_ACAWK && + for (key in S) S_is_set[key] = 1 + FS = "" + +} +{ + line = $ 0 + nfields = split(line, field, "@") + substed = 0 + len = length(field[1]) + for (i = 2; i < nfields; i++) { + key = field[i] + keylen = length(key) + if (S_is_set[key]) { + value = S[key] + line = substr(line, 1, len) "" value "" substr(line, len + keylen + 3) + len += length(value) + length(field[++i]) + substed = 1 + } else + len += 1 + keylen + } + + print line +} + +_ACAWK +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +if sed "s/$ac_cr//" < /dev/null > /dev/null 2>&1; then + sed "s/$ac_cr\$//; s/$ac_cr/$ac_cs_awk_cr/g" +else + cat +fi < "$ac_tmp/subs1.awk" > "$ac_tmp/subs.awk" \ + || as_fn_error $? "could not setup config files machinery" "$LINENO" 5 +_ACEOF + +# VPATH may cause trouble with some makes, so we remove sole $(srcdir), +# ${srcdir} and @srcdir@ entries from VPATH if srcdir is ".", strip leading and +# trailing colons and then remove the whole line if VPATH becomes empty +# (actually we leave an empty line to preserve line numbers). +if test "x$srcdir" = x.; then + ac_vpsub='/^[ ]*VPATH[ ]*=[ ]*/{ +h +s/// +s/^/:/ +s/[ ]*$/:/ +s/:\$(srcdir):/:/g +s/:\${srcdir}:/:/g +s/:@srcdir@:/:/g +s/^:*// +s/:*$// +x +s/\(=[ ]*\).*/\1/ +G +s/\n// +s/^[^=]*=[ ]*$// +}' +fi + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +fi # test -n "$CONFIG_FILES" + +# Set up the scripts for CONFIG_HEADERS section. +# No need to generate them if there are no CONFIG_HEADERS. +# This happens for instance with `./config.status Makefile'. +if test -n "$CONFIG_HEADERS"; then +cat >"$ac_tmp/defines.awk" <<\_ACAWK || +BEGIN { +_ACEOF + +# Transform confdefs.h into an awk script `defines.awk', embedded as +# here-document in config.status, that substitutes the proper values into +# config.h.in to produce config.h. + +# Create a delimiter string that does not exist in confdefs.h, to ease +# handling of long lines. +ac_delim='%!_!# ' +for ac_last_try in false false :; do + ac_tt=`sed -n "/$ac_delim/p" confdefs.h` + if test -z "$ac_tt"; then + break + elif $ac_last_try; then + as_fn_error $? "could not make $CONFIG_HEADERS" "$LINENO" 5 + else + ac_delim="$ac_delim!$ac_delim _$ac_delim!! " + fi +done + +# For the awk script, D is an array of macro values keyed by name, +# likewise P contains macro parameters if any. Preserve backslash +# newline sequences. + +ac_word_re=[_$as_cr_Letters][_$as_cr_alnum]* +sed -n ' +s/.\{148\}/&'"$ac_delim"'/g +t rset +:rset +s/^[ ]*#[ ]*define[ ][ ]*/ / +t def +d +:def +s/\\$// +t bsnl +s/["\\]/\\&/g +s/^ \('"$ac_word_re"'\)\(([^()]*)\)[ ]*\(.*\)/P["\1"]="\2"\ +D["\1"]=" \3"/p +s/^ \('"$ac_word_re"'\)[ ]*\(.*\)/D["\1"]=" \2"/p +d +:bsnl +s/["\\]/\\&/g +s/^ \('"$ac_word_re"'\)\(([^()]*)\)[ ]*\(.*\)/P["\1"]="\2"\ +D["\1"]=" \3\\\\\\n"\\/p +t cont +s/^ \('"$ac_word_re"'\)[ ]*\(.*\)/D["\1"]=" \2\\\\\\n"\\/p +t cont +d +:cont +n +s/.\{148\}/&'"$ac_delim"'/g +t clear +:clear +s/\\$// +t bsnlc +s/["\\]/\\&/g; s/^/"/; s/$/"/p +d +:bsnlc +s/["\\]/\\&/g; s/^/"/; s/$/\\\\\\n"\\/p +b cont +' >$CONFIG_STATUS || ac_write_fail=1 + +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 + for (key in D) D_is_set[key] = 1 + FS = "" +} +/^[\t ]*#[\t ]*(define|undef)[\t ]+$ac_word_re([\t (]|\$)/ { + line = \$ 0 + split(line, arg, " ") + if (arg[1] == "#") { + defundef = arg[2] + mac1 = arg[3] + } else { + defundef = substr(arg[1], 2) + mac1 = arg[2] + } + split(mac1, mac2, "(") #) + macro = mac2[1] + prefix = substr(line, 1, index(line, defundef) - 1) + if (D_is_set[macro]) { + # Preserve the white space surrounding the "#". + print prefix "define", macro P[macro] D[macro] + next + } else { + # Replace #undef with comments. This is necessary, for example, + # in the case of _POSIX_SOURCE, which is predefined and required + # on some systems where configure will not decide to define it. + if (defundef == "undef") { + print "/*", prefix defundef, macro, "*/" + next + } + } +} +{ print } +_ACAWK +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 + as_fn_error $? "could not setup config headers machinery" "$LINENO" 5 +fi # test -n "$CONFIG_HEADERS" + + +eval set X " :F $CONFIG_FILES :H $CONFIG_HEADERS :L $CONFIG_LINKS :C $CONFIG_COMMANDS" +shift +for ac_tag +do + case $ac_tag in + :[FHLC]) ac_mode=$ac_tag; continue;; + esac + case $ac_mode$ac_tag in + :[FHL]*:*);; + :L* | :C*:*) as_fn_error $? "invalid tag \`$ac_tag'" "$LINENO" 5;; + :[FH]-) ac_tag=-:-;; + :[FH]*) ac_tag=$ac_tag:$ac_tag.in;; + esac + ac_save_IFS=$IFS + IFS=: + set x $ac_tag + IFS=$ac_save_IFS + shift + ac_file=$1 + shift + + case $ac_mode in + :L) ac_source=$1;; + :[FH]) + ac_file_inputs= + for ac_f + do + case $ac_f in + -) ac_f="$ac_tmp/stdin";; + *) # Look for the file first in the build tree, then in the source tree + # (if the path is not absolute). The absolute path cannot be DOS-style, + # because $ac_f cannot contain `:'. + test -f "$ac_f" || + case $ac_f in + [\\/$]*) false;; + *) test -f "$srcdir/$ac_f" && ac_f="$srcdir/$ac_f";; + esac || + as_fn_error 1 "cannot find input file: \`$ac_f'" "$LINENO" 5;; + esac + case $ac_f in *\'*) ac_f=`printf "%s\n" "$ac_f" | sed "s/'/'\\\\\\\\''/g"`;; esac + as_fn_append ac_file_inputs " '$ac_f'" + done + + # Let's still pretend it is `configure' which instantiates (i.e., don't + # use $as_me), people would be surprised to read: + # /* config.h. Generated by config.status. */ + configure_input='Generated from '` + printf "%s\n" "$*" | sed 's|^[^:]*/||;s|:[^:]*/|, |g' + `' by configure.' + if test x"$ac_file" != x-; then + configure_input="$ac_file. $configure_input" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: creating $ac_file" >&5 +printf "%s\n" "$as_me: creating $ac_file" >&6;} + fi + # Neutralize special characters interpreted by sed in replacement strings. + case $configure_input in #( + *\&* | *\|* | *\\* ) + ac_sed_conf_input=`printf "%s\n" "$configure_input" | + sed 's/[\\\\&|]/\\\\&/g'`;; #( + *) ac_sed_conf_input=$configure_input;; + esac + + case $ac_tag in + *:-:* | *:-) cat >"$ac_tmp/stdin" \ + || as_fn_error $? "could not create $ac_file" "$LINENO" 5 ;; + esac + ;; + esac + + ac_dir=`$as_dirname -- "$ac_file" || +$as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$ac_file" : 'X\(//\)[^/]' \| \ + X"$ac_file" : 'X\(//\)$' \| \ + X"$ac_file" : 'X\(/\)' \| . 2>/dev/null || +printf "%s\n" X"$ac_file" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + as_dir="$ac_dir"; as_fn_mkdir_p + ac_builddir=. + +case "$ac_dir" in +.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;; +*) + ac_dir_suffix=/`printf "%s\n" "$ac_dir" | sed 's|^\.[\\/]||'` + # A ".." for each directory in $ac_dir_suffix. + ac_top_builddir_sub=`printf "%s\n" "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'` + case $ac_top_builddir_sub in + "") ac_top_builddir_sub=. ac_top_build_prefix= ;; + *) ac_top_build_prefix=$ac_top_builddir_sub/ ;; + esac ;; +esac +ac_abs_top_builddir=$ac_pwd +ac_abs_builddir=$ac_pwd$ac_dir_suffix +# for backward compatibility: +ac_top_builddir=$ac_top_build_prefix + +case $srcdir in + .) # We are building in place. + ac_srcdir=. + ac_top_srcdir=$ac_top_builddir_sub + ac_abs_top_srcdir=$ac_pwd ;; + [\\/]* | ?:[\\/]* ) # Absolute name. + ac_srcdir=$srcdir$ac_dir_suffix; + ac_top_srcdir=$srcdir + ac_abs_top_srcdir=$srcdir ;; + *) # Relative name. + ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix + ac_top_srcdir=$ac_top_build_prefix$srcdir + ac_abs_top_srcdir=$ac_pwd/$srcdir ;; +esac +ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix + + + case $ac_mode in + :F) + # + # CONFIG_FILE + # + + case $INSTALL in + [\\/$]* | ?:[\\/]* ) ac_INSTALL=$INSTALL ;; + *) ac_INSTALL=$ac_top_build_prefix$INSTALL ;; + esac + ac_MKDIR_P=$MKDIR_P + case $MKDIR_P in + [\\/$]* | ?:[\\/]* ) ;; + */*) ac_MKDIR_P=$ac_top_build_prefix$MKDIR_P ;; + esac +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +# If the template does not know about datarootdir, expand it. +# FIXME: This hack should be removed a few years after 2.60. +ac_datarootdir_hack=; ac_datarootdir_seen= +ac_sed_dataroot=' +/datarootdir/ { + p + q +} +/@datadir@/p +/@docdir@/p +/@infodir@/p +/@localedir@/p +/@mandir@/p' +case `eval "sed -n \"\$ac_sed_dataroot\" $ac_file_inputs"` in +*datarootdir*) ac_datarootdir_seen=yes;; +*@datadir@*|*@docdir@*|*@infodir@*|*@localedir@*|*@mandir@*) + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&5 +printf "%s\n" "$as_me: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&2;} +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 + ac_datarootdir_hack=' + s&@datadir@&$datadir&g + s&@docdir@&$docdir&g + s&@infodir@&$infodir&g + s&@localedir@&$localedir&g + s&@mandir@&$mandir&g + s&\\\${datarootdir}&$datarootdir&g' ;; +esac +_ACEOF + +# Neutralize VPATH when `$srcdir' = `.'. +# Shell code in configure.ac might set extrasub. +# FIXME: do we really want to maintain this feature? +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +ac_sed_extra="$ac_vpsub +$extrasub +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +:t +/@[a-zA-Z_][a-zA-Z_0-9]*@/!b +s|@configure_input@|$ac_sed_conf_input|;t t +s&@top_builddir@&$ac_top_builddir_sub&;t t +s&@top_build_prefix@&$ac_top_build_prefix&;t t +s&@srcdir@&$ac_srcdir&;t t +s&@abs_srcdir@&$ac_abs_srcdir&;t t +s&@top_srcdir@&$ac_top_srcdir&;t t +s&@abs_top_srcdir@&$ac_abs_top_srcdir&;t t +s&@builddir@&$ac_builddir&;t t +s&@abs_builddir@&$ac_abs_builddir&;t t +s&@abs_top_builddir@&$ac_abs_top_builddir&;t t +s&@INSTALL@&$ac_INSTALL&;t t +s&@MKDIR_P@&$ac_MKDIR_P&;t t +$ac_datarootdir_hack +" +eval sed \"\$ac_sed_extra\" "$ac_file_inputs" | $AWK -f "$ac_tmp/subs.awk" \ + >$ac_tmp/out || as_fn_error $? "could not create $ac_file" "$LINENO" 5 + +test -z "$ac_datarootdir_hack$ac_datarootdir_seen" && + { ac_out=`sed -n '/\${datarootdir}/p' "$ac_tmp/out"`; test -n "$ac_out"; } && + { ac_out=`sed -n '/^[ ]*datarootdir[ ]*:*=/p' \ + "$ac_tmp/out"`; test -z "$ac_out"; } && + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file contains a reference to the variable \`datarootdir' +which seems to be undefined. Please make sure it is defined" >&5 +printf "%s\n" "$as_me: WARNING: $ac_file contains a reference to the variable \`datarootdir' +which seems to be undefined. Please make sure it is defined" >&2;} + + rm -f "$ac_tmp/stdin" + case $ac_file in + -) cat "$ac_tmp/out" && rm -f "$ac_tmp/out";; + *) rm -f "$ac_file" && mv "$ac_tmp/out" "$ac_file";; + esac \ + || as_fn_error $? "could not create $ac_file" "$LINENO" 5 + ;; + :H) + # + # CONFIG_HEADER + # + if test x"$ac_file" != x-; then + { + printf "%s\n" "/* $configure_input */" >&1 \ + && eval '$AWK -f "$ac_tmp/defines.awk"' "$ac_file_inputs" + } >"$ac_tmp/config.h" \ + || as_fn_error $? "could not create $ac_file" "$LINENO" 5 + if diff "$ac_file" "$ac_tmp/config.h" >/dev/null 2>&1; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: $ac_file is unchanged" >&5 +printf "%s\n" "$as_me: $ac_file is unchanged" >&6;} + else + rm -f "$ac_file" + mv "$ac_tmp/config.h" "$ac_file" \ + || as_fn_error $? "could not create $ac_file" "$LINENO" 5 + fi + else + printf "%s\n" "/* $configure_input */" >&1 \ + && eval '$AWK -f "$ac_tmp/defines.awk"' "$ac_file_inputs" \ + || as_fn_error $? "could not create -" "$LINENO" 5 + fi +# Compute "$ac_file"'s index in $config_headers. +_am_arg="$ac_file" +_am_stamp_count=1 +for _am_header in $config_headers :; do + case $_am_header in + $_am_arg | $_am_arg:* ) + break ;; + * ) + _am_stamp_count=`expr $_am_stamp_count + 1` ;; + esac +done +echo "timestamp for $_am_arg" >`$as_dirname -- "$_am_arg" || +$as_expr X"$_am_arg" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$_am_arg" : 'X\(//\)[^/]' \| \ + X"$_am_arg" : 'X\(//\)$' \| \ + X"$_am_arg" : 'X\(/\)' \| . 2>/dev/null || +printf "%s\n" X"$_am_arg" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'`/stamp-h$_am_stamp_count + ;; + :L) + # + # CONFIG_LINK + # + + if test "$ac_source" = "$ac_file" && test "$srcdir" = '.'; then + : + else + # Prefer the file from the source tree if names are identical. + if test "$ac_source" = "$ac_file" || test ! -r "$ac_source"; then + ac_source=$srcdir/$ac_source + fi + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: linking $ac_source to $ac_file" >&5 +printf "%s\n" "$as_me: linking $ac_source to $ac_file" >&6;} + + if test ! -r "$ac_source"; then + as_fn_error $? "$ac_source: file not found" "$LINENO" 5 + fi + rm -f "$ac_file" + + # Try a relative symlink, then a hard link, then a copy. + case $ac_source in + [\\/$]* | ?:[\\/]* ) ac_rel_source=$ac_source ;; + *) ac_rel_source=$ac_top_build_prefix$ac_source ;; + esac + ln -s "$ac_rel_source" "$ac_file" 2>/dev/null || + ln "$ac_source" "$ac_file" 2>/dev/null || + cp -p "$ac_source" "$ac_file" || + as_fn_error $? "cannot link or copy $ac_source to $ac_file" "$LINENO" 5 + fi + ;; + :C) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: executing $ac_file commands" >&5 +printf "%s\n" "$as_me: executing $ac_file commands" >&6;} + ;; + esac + + + case $ac_file$ac_mode in + "depfiles":C) test x"$AMDEP_TRUE" != x"" || { + # Older Autoconf quotes --file arguments for eval, but not when files + # are listed without --file. Let's play safe and only enable the eval + # if we detect the quoting. + # TODO: see whether this extra hack can be removed once we start + # requiring Autoconf 2.70 or later. + case $CONFIG_FILES in #( + *\'*) : + eval set x "$CONFIG_FILES" ;; #( + *) : + set x $CONFIG_FILES ;; #( + *) : + ;; +esac + shift + # Used to flag and report bootstrapping failures. + am_rc=0 + for am_mf + do + # Strip MF so we end up with the name of the file. + am_mf=`printf "%s\n" "$am_mf" | sed -e 's/:.*$//'` + # Check whether this is an Automake generated Makefile which includes + # dependency-tracking related rules and includes. + # Grep'ing the whole file directly is not great: AIX grep has a line + # limit of 2048, but all sed's we know have understand at least 4000. + sed -n 's,^am--depfiles:.*,X,p' "$am_mf" | grep X >/dev/null 2>&1 \ + || continue + am_dirpart=`$as_dirname -- "$am_mf" || +$as_expr X"$am_mf" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$am_mf" : 'X\(//\)[^/]' \| \ + X"$am_mf" : 'X\(//\)$' \| \ + X"$am_mf" : 'X\(/\)' \| . 2>/dev/null || +printf "%s\n" X"$am_mf" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + am_filepart=`$as_basename -- "$am_mf" || +$as_expr X/"$am_mf" : '.*/\([^/][^/]*\)/*$' \| \ + X"$am_mf" : 'X\(//\)$' \| \ + X"$am_mf" : 'X\(/\)' \| . 2>/dev/null || +printf "%s\n" X/"$am_mf" | + sed '/^.*\/\([^/][^/]*\)\/*$/{ + s//\1/ + q + } + /^X\/\(\/\/\)$/{ + s//\1/ + q + } + /^X\/\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + { echo "$as_me:$LINENO: cd "$am_dirpart" \ + && sed -e '/# am--include-marker/d' "$am_filepart" \ + | $MAKE -f - am--depfiles" >&5 + (cd "$am_dirpart" \ + && sed -e '/# am--include-marker/d' "$am_filepart" \ + | $MAKE -f - am--depfiles) >&5 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } || am_rc=$? + done + if test $am_rc -ne 0; then + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "Something went wrong bootstrapping makefile fragments + for automatic dependency tracking. If GNU make was not used, consider + re-running the configure script with MAKE=\"gmake\" (or whatever is + necessary). You can also try re-running configure with the + '--disable-dependency-tracking' option to at least be able to build + the package (albeit without support for automatic dependency tracking). +See \`config.log' for more details" "$LINENO" 5; } + fi + { am_dirpart=; unset am_dirpart;} + { am_filepart=; unset am_filepart;} + { am_mf=; unset am_mf;} + { am_rc=; unset am_rc;} + rm -f conftest-deps.mk +} + ;; + "libtool":C) + + # See if we are running on zsh, and set the options that allow our + # commands through without removal of \ escapes. + if test -n "${ZSH_VERSION+set}"; then + setopt NO_GLOB_SUBST + fi + + cfgfile=${ofile}T + trap "$RM \"$cfgfile\"; exit 1" 1 2 15 + $RM "$cfgfile" + + cat <<_LT_EOF >> "$cfgfile" +#! $SHELL +# Generated automatically by $as_me ($PACKAGE) $VERSION +# NOTE: Changes made to this file will be lost: look at ltmain.sh. + +# Provide generalized library-building support services. +# Written by Gordon Matzigkeit, 1996 + +# Copyright (C) 2014 Free Software Foundation, Inc. +# This is free software; see the source for copying conditions. There is NO +# warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + +# GNU Libtool is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of of the License, or +# (at your option) any later version. +# +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program or library that is built +# using GNU Libtool, you may include this file under the same +# distribution terms that you use for the rest of that program. +# +# GNU Libtool is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + + +# The names of the tagged configurations supported by this script. +available_tags='CXX F77 FC ' + +# Configured defaults for sys_lib_dlsearch_path munging. +: \${LT_SYS_LIBRARY_PATH="$configure_time_lt_sys_library_path"} + +# ### BEGIN LIBTOOL CONFIG + +# Which release of libtool.m4 was used? +macro_version=$macro_version +macro_revision=$macro_revision + +# Assembler program. +AS=$lt_AS + +# DLL creation program. +DLLTOOL=$lt_DLLTOOL + +# Object dumper program. +OBJDUMP=$lt_OBJDUMP + +# Whether or not to build shared libraries. +build_libtool_libs=$enable_shared + +# Whether or not to build static libraries. +build_old_libs=$enable_static + +# What type of objects to build. +pic_mode=$pic_mode + +# Whether or not to optimize for fast installation. +fast_install=$enable_fast_install + +# Shared archive member basename,for filename based shared library versioning on AIX. +shared_archive_member_spec=$shared_archive_member_spec + +# Shell to use when invoking shell scripts. +SHELL=$lt_SHELL + +# An echo program that protects backslashes. +ECHO=$lt_ECHO + +# The PATH separator for the build system. +PATH_SEPARATOR=$lt_PATH_SEPARATOR + +# The host system. +host_alias=$host_alias +host=$host +host_os=$host_os + +# The build system. +build_alias=$build_alias +build=$build +build_os=$build_os + +# A sed program that does not truncate output. +SED=$lt_SED + +# Sed that helps us avoid accidentally triggering echo(1) options like -n. +Xsed="\$SED -e 1s/^X//" + +# A grep program that handles long lines. +GREP=$lt_GREP + +# An ERE matcher. +EGREP=$lt_EGREP + +# A literal string matcher. +FGREP=$lt_FGREP + +# A BSD- or MS-compatible name lister. +NM=$lt_NM + +# Whether we need soft or hard links. +LN_S=$lt_LN_S + +# What is the maximum length of a command? +max_cmd_len=$max_cmd_len + +# Object file suffix (normally "o"). +objext=$ac_objext + +# Executable file suffix (normally ""). +exeext=$exeext + +# whether the shell understands "unset". +lt_unset=$lt_unset + +# turn spaces into newlines. +SP2NL=$lt_lt_SP2NL + +# turn newlines into spaces. +NL2SP=$lt_lt_NL2SP + +# convert \$build file names to \$host format. +to_host_file_cmd=$lt_cv_to_host_file_cmd + +# convert \$build files to toolchain format. +to_tool_file_cmd=$lt_cv_to_tool_file_cmd + +# A file(cmd) program that detects file types. +FILECMD=$lt_FILECMD + +# Method to check whether dependent libraries are shared objects. +deplibs_check_method=$lt_deplibs_check_method + +# Command to use when deplibs_check_method = "file_magic". +file_magic_cmd=$lt_file_magic_cmd + +# How to find potential files when deplibs_check_method = "file_magic". +file_magic_glob=$lt_file_magic_glob + +# Find potential files using nocaseglob when deplibs_check_method = "file_magic". +want_nocaseglob=$lt_want_nocaseglob + +# Command to associate shared and link libraries. +sharedlib_from_linklib_cmd=$lt_sharedlib_from_linklib_cmd + +# The archiver. +AR=$lt_AR + +# Flags to create an archive (by configure). +lt_ar_flags=$lt_ar_flags + +# Flags to create an archive. +AR_FLAGS=\${ARFLAGS-"\$lt_ar_flags"} + +# How to feed a file listing to the archiver. +archiver_list_spec=$lt_archiver_list_spec + +# A symbol stripping program. +STRIP=$lt_STRIP + +# Commands used to install an old-style archive. +RANLIB=$lt_RANLIB +old_postinstall_cmds=$lt_old_postinstall_cmds +old_postuninstall_cmds=$lt_old_postuninstall_cmds + +# Whether to use a lock for old archive extraction. +lock_old_archive_extraction=$lock_old_archive_extraction + +# A C compiler. +LTCC=$lt_CC + +# LTCC compiler flags. +LTCFLAGS=$lt_CFLAGS + +# Take the output of nm and produce a listing of raw symbols and C names. +global_symbol_pipe=$lt_lt_cv_sys_global_symbol_pipe + +# Transform the output of nm in a proper C declaration. +global_symbol_to_cdecl=$lt_lt_cv_sys_global_symbol_to_cdecl + +# Transform the output of nm into a list of symbols to manually relocate. +global_symbol_to_import=$lt_lt_cv_sys_global_symbol_to_import + +# Transform the output of nm in a C name address pair. +global_symbol_to_c_name_address=$lt_lt_cv_sys_global_symbol_to_c_name_address + +# Transform the output of nm in a C name address pair when lib prefix is needed. +global_symbol_to_c_name_address_lib_prefix=$lt_lt_cv_sys_global_symbol_to_c_name_address_lib_prefix + +# The name lister interface. +nm_interface=$lt_lt_cv_nm_interface + +# Specify filename containing input files for \$NM. +nm_file_list_spec=$lt_nm_file_list_spec + +# The root where to search for dependent libraries,and where our libraries should be installed. +lt_sysroot=$lt_sysroot + +# Command to truncate a binary pipe. +lt_truncate_bin=$lt_lt_cv_truncate_bin + +# The name of the directory that contains temporary libtool files. +objdir=$objdir + +# Used to examine libraries when file_magic_cmd begins with "file". +MAGIC_CMD=$MAGIC_CMD + +# Must we lock files when doing compilation? +need_locks=$lt_need_locks + +# Manifest tool. +MANIFEST_TOOL=$lt_MANIFEST_TOOL + +# Tool to manipulate archived DWARF debug symbol files on Mac OS X. +DSYMUTIL=$lt_DSYMUTIL + +# Tool to change global to local symbols on Mac OS X. +NMEDIT=$lt_NMEDIT + +# Tool to manipulate fat objects and archives on Mac OS X. +LIPO=$lt_LIPO + +# ldd/readelf like tool for Mach-O binaries on Mac OS X. +OTOOL=$lt_OTOOL + +# ldd/readelf like tool for 64 bit Mach-O binaries on Mac OS X 10.4. +OTOOL64=$lt_OTOOL64 + +# Old archive suffix (normally "a"). +libext=$libext + +# Shared library suffix (normally ".so"). +shrext_cmds=$lt_shrext_cmds + +# The commands to extract the exported symbol list from a shared archive. +extract_expsyms_cmds=$lt_extract_expsyms_cmds + +# Variables whose values should be saved in libtool wrapper scripts and +# restored at link time. +variables_saved_for_relink=$lt_variables_saved_for_relink + +# Do we need the "lib" prefix for modules? +need_lib_prefix=$need_lib_prefix + +# Do we need a version for libraries? +need_version=$need_version + +# Library versioning type. +version_type=$version_type + +# Shared library runtime path variable. +runpath_var=$runpath_var + +# Shared library path variable. +shlibpath_var=$shlibpath_var + +# Is shlibpath searched before the hard-coded library search path? +shlibpath_overrides_runpath=$shlibpath_overrides_runpath + +# Format of library name prefix. +libname_spec=$lt_libname_spec + +# List of archive names. First name is the real one, the rest are links. +# The last name is the one that the linker finds with -lNAME +library_names_spec=$lt_library_names_spec + +# The coded name of the library, if different from the real name. +soname_spec=$lt_soname_spec + +# Permission mode override for installation of shared libraries. +install_override_mode=$lt_install_override_mode + +# Command to use after installation of a shared archive. +postinstall_cmds=$lt_postinstall_cmds + +# Command to use after uninstallation of a shared archive. +postuninstall_cmds=$lt_postuninstall_cmds + +# Commands used to finish a libtool library installation in a directory. +finish_cmds=$lt_finish_cmds + +# As "finish_cmds", except a single script fragment to be evaled but +# not shown. +finish_eval=$lt_finish_eval + +# Whether we should hardcode library paths into libraries. +hardcode_into_libs=$hardcode_into_libs + +# Compile-time system search path for libraries. +sys_lib_search_path_spec=$lt_sys_lib_search_path_spec + +# Detected run-time system search path for libraries. +sys_lib_dlsearch_path_spec=$lt_configure_time_dlsearch_path + +# Explicit LT_SYS_LIBRARY_PATH set during ./configure time. +configure_time_lt_sys_library_path=$lt_configure_time_lt_sys_library_path + +# Whether dlopen is supported. +dlopen_support=$enable_dlopen + +# Whether dlopen of programs is supported. +dlopen_self=$enable_dlopen_self + +# Whether dlopen of statically linked programs is supported. +dlopen_self_static=$enable_dlopen_self_static + +# Commands to strip libraries. +old_striplib=$lt_old_striplib +striplib=$lt_striplib + + +# The linker used to build libraries. +LD=$lt_LD + +# How to create reloadable object files. +reload_flag=$lt_reload_flag +reload_cmds=$lt_reload_cmds + +# Commands used to build an old-style archive. +old_archive_cmds=$lt_old_archive_cmds + +# A language specific compiler. +CC=$lt_compiler + +# Is the compiler the GNU compiler? +with_gcc=$GCC + +# Compiler flag to turn off builtin functions. +no_builtin_flag=$lt_lt_prog_compiler_no_builtin_flag + +# Additional compiler flags for building library objects. +pic_flag=$lt_lt_prog_compiler_pic + +# How to pass a linker flag through the compiler. +wl=$lt_lt_prog_compiler_wl + +# Compiler flag to prevent dynamic linking. +link_static_flag=$lt_lt_prog_compiler_static + +# Does compiler simultaneously support -c and -o options? +compiler_c_o=$lt_lt_cv_prog_compiler_c_o + +# Whether or not to add -lc for building shared libraries. +build_libtool_need_lc=$archive_cmds_need_lc + +# Whether or not to disallow shared libs when runtime libs are static. +allow_libtool_libs_with_static_runtimes=$enable_shared_with_static_runtimes + +# Compiler flag to allow reflexive dlopens. +export_dynamic_flag_spec=$lt_export_dynamic_flag_spec + +# Compiler flag to generate shared objects directly from archives. +whole_archive_flag_spec=$lt_whole_archive_flag_spec + +# Whether the compiler copes with passing no objects directly. +compiler_needs_object=$lt_compiler_needs_object + +# Create an old-style archive from a shared archive. +old_archive_from_new_cmds=$lt_old_archive_from_new_cmds + +# Create a temporary old-style archive to link instead of a shared archive. +old_archive_from_expsyms_cmds=$lt_old_archive_from_expsyms_cmds + +# Commands used to build a shared archive. +archive_cmds=$lt_archive_cmds +archive_expsym_cmds=$lt_archive_expsym_cmds + +# Commands used to build a loadable module if different from building +# a shared archive. +module_cmds=$lt_module_cmds +module_expsym_cmds=$lt_module_expsym_cmds + +# Whether we are building with GNU ld or not. +with_gnu_ld=$lt_with_gnu_ld + +# Flag that allows shared libraries with undefined symbols to be built. +allow_undefined_flag=$lt_allow_undefined_flag + +# Flag that enforces no undefined symbols. +no_undefined_flag=$lt_no_undefined_flag + +# Flag to hardcode \$libdir into a binary during linking. +# This must work even if \$libdir does not exist +hardcode_libdir_flag_spec=$lt_hardcode_libdir_flag_spec + +# Whether we need a single "-rpath" flag with a separated argument. +hardcode_libdir_separator=$lt_hardcode_libdir_separator + +# Set to "yes" if using DIR/libNAME\$shared_ext during linking hardcodes +# DIR into the resulting binary. +hardcode_direct=$hardcode_direct + +# Set to "yes" if using DIR/libNAME\$shared_ext during linking hardcodes +# DIR into the resulting binary and the resulting library dependency is +# "absolute",i.e impossible to change by setting \$shlibpath_var if the +# library is relocated. +hardcode_direct_absolute=$hardcode_direct_absolute + +# Set to "yes" if using the -LDIR flag during linking hardcodes DIR +# into the resulting binary. +hardcode_minus_L=$hardcode_minus_L + +# Set to "yes" if using SHLIBPATH_VAR=DIR during linking hardcodes DIR +# into the resulting binary. +hardcode_shlibpath_var=$hardcode_shlibpath_var + +# Set to "yes" if building a shared library automatically hardcodes DIR +# into the library and all subsequent libraries and executables linked +# against it. +hardcode_automatic=$hardcode_automatic + +# Set to yes if linker adds runtime paths of dependent libraries +# to runtime path list. +inherit_rpath=$inherit_rpath + +# Whether libtool must link a program against all its dependency libraries. +link_all_deplibs=$link_all_deplibs + +# Set to "yes" if exported symbols are required. +always_export_symbols=$always_export_symbols + +# The commands to list exported symbols. +export_symbols_cmds=$lt_export_symbols_cmds + +# Symbols that should not be listed in the preloaded symbols. +exclude_expsyms=$lt_exclude_expsyms + +# Symbols that must always be exported. +include_expsyms=$lt_include_expsyms + +# Commands necessary for linking programs (against libraries) with templates. +prelink_cmds=$lt_prelink_cmds + +# Commands necessary for finishing linking programs. +postlink_cmds=$lt_postlink_cmds + +# Specify filename containing input files. +file_list_spec=$lt_file_list_spec + +# How to hardcode a shared library path into an executable. +hardcode_action=$hardcode_action + +# The directories searched by this compiler when creating a shared library. +compiler_lib_search_dirs=$lt_compiler_lib_search_dirs + +# Dependencies to place before and after the objects being linked to +# create a shared library. +predep_objects=$lt_predep_objects +postdep_objects=$lt_postdep_objects +predeps=$lt_predeps +postdeps=$lt_postdeps + +# The library search path used internally by the compiler when linking +# a shared library. +compiler_lib_search_path=$lt_compiler_lib_search_path + +# ### END LIBTOOL CONFIG + +_LT_EOF + + cat <<'_LT_EOF' >> "$cfgfile" + +# ### BEGIN FUNCTIONS SHARED WITH CONFIGURE + +# func_munge_path_list VARIABLE PATH +# ----------------------------------- +# VARIABLE is name of variable containing _space_ separated list of +# directories to be munged by the contents of PATH, which is string +# having a format: +# "DIR[:DIR]:" +# string "DIR[ DIR]" will be prepended to VARIABLE +# ":DIR[:DIR]" +# string "DIR[ DIR]" will be appended to VARIABLE +# "DIRP[:DIRP]::[DIRA:]DIRA" +# string "DIRP[ DIRP]" will be prepended to VARIABLE and string +# "DIRA[ DIRA]" will be appended to VARIABLE +# "DIR[:DIR]" +# VARIABLE will be replaced by "DIR[ DIR]" +func_munge_path_list () +{ + case x$2 in + x) + ;; + *:) + eval $1=\"`$ECHO $2 | $SED 's/:/ /g'` \$$1\" + ;; + x:*) + eval $1=\"\$$1 `$ECHO $2 | $SED 's/:/ /g'`\" + ;; + *::*) + eval $1=\"\$$1\ `$ECHO $2 | $SED -e 's/.*:://' -e 's/:/ /g'`\" + eval $1=\"`$ECHO $2 | $SED -e 's/::.*//' -e 's/:/ /g'`\ \$$1\" + ;; + *) + eval $1=\"`$ECHO $2 | $SED 's/:/ /g'`\" + ;; + esac +} + + +# Calculate cc_basename. Skip known compiler wrappers and cross-prefix. +func_cc_basename () +{ + for cc_temp in $*""; do + case $cc_temp in + compile | *[\\/]compile | ccache | *[\\/]ccache ) ;; + distcc | *[\\/]distcc | purify | *[\\/]purify ) ;; + \-*) ;; + *) break;; + esac + done + func_cc_basename_result=`$ECHO "$cc_temp" | $SED "s%.*/%%; s%^$host_alias-%%"` +} + + +# ### END FUNCTIONS SHARED WITH CONFIGURE + +_LT_EOF + + case $host_os in + aix3*) + cat <<\_LT_EOF >> "$cfgfile" +# AIX sometimes has problems with the GCC collect2 program. For some +# reason, if we set the COLLECT_NAMES environment variable, the problems +# vanish in a puff of smoke. +if test set != "${COLLECT_NAMES+set}"; then + COLLECT_NAMES= + export COLLECT_NAMES +fi +_LT_EOF + ;; + esac + + + +ltmain=$ac_aux_dir/ltmain.sh + + + # We use sed instead of cat because bash on DJGPP gets confused if + # if finds mixed CR/LF and LF-only lines. Since sed operates in + # text mode, it properly converts lines to CR/LF. This bash problem + # is reportedly fixed, but why not run on old versions too? + $SED '$q' "$ltmain" >> "$cfgfile" \ + || (rm -f "$cfgfile"; exit 1) + + mv -f "$cfgfile" "$ofile" || + (rm -f "$ofile" && cp "$cfgfile" "$ofile" && rm -f "$cfgfile") + chmod +x "$ofile" + + + cat <<_LT_EOF >> "$ofile" + +# ### BEGIN LIBTOOL TAG CONFIG: CXX + +# The linker used to build libraries. +LD=$lt_LD_CXX + +# How to create reloadable object files. +reload_flag=$lt_reload_flag_CXX +reload_cmds=$lt_reload_cmds_CXX + +# Commands used to build an old-style archive. +old_archive_cmds=$lt_old_archive_cmds_CXX + +# A language specific compiler. +CC=$lt_compiler_CXX + +# Is the compiler the GNU compiler? +with_gcc=$GCC_CXX + +# Compiler flag to turn off builtin functions. +no_builtin_flag=$lt_lt_prog_compiler_no_builtin_flag_CXX + +# Additional compiler flags for building library objects. +pic_flag=$lt_lt_prog_compiler_pic_CXX + +# How to pass a linker flag through the compiler. +wl=$lt_lt_prog_compiler_wl_CXX + +# Compiler flag to prevent dynamic linking. +link_static_flag=$lt_lt_prog_compiler_static_CXX + +# Does compiler simultaneously support -c and -o options? +compiler_c_o=$lt_lt_cv_prog_compiler_c_o_CXX + +# Whether or not to add -lc for building shared libraries. +build_libtool_need_lc=$archive_cmds_need_lc_CXX + +# Whether or not to disallow shared libs when runtime libs are static. +allow_libtool_libs_with_static_runtimes=$enable_shared_with_static_runtimes_CXX + +# Compiler flag to allow reflexive dlopens. +export_dynamic_flag_spec=$lt_export_dynamic_flag_spec_CXX + +# Compiler flag to generate shared objects directly from archives. +whole_archive_flag_spec=$lt_whole_archive_flag_spec_CXX + +# Whether the compiler copes with passing no objects directly. +compiler_needs_object=$lt_compiler_needs_object_CXX + +# Create an old-style archive from a shared archive. +old_archive_from_new_cmds=$lt_old_archive_from_new_cmds_CXX + +# Create a temporary old-style archive to link instead of a shared archive. +old_archive_from_expsyms_cmds=$lt_old_archive_from_expsyms_cmds_CXX + +# Commands used to build a shared archive. +archive_cmds=$lt_archive_cmds_CXX +archive_expsym_cmds=$lt_archive_expsym_cmds_CXX + +# Commands used to build a loadable module if different from building +# a shared archive. +module_cmds=$lt_module_cmds_CXX +module_expsym_cmds=$lt_module_expsym_cmds_CXX + +# Whether we are building with GNU ld or not. +with_gnu_ld=$lt_with_gnu_ld_CXX + +# Flag that allows shared libraries with undefined symbols to be built. +allow_undefined_flag=$lt_allow_undefined_flag_CXX + +# Flag that enforces no undefined symbols. +no_undefined_flag=$lt_no_undefined_flag_CXX + +# Flag to hardcode \$libdir into a binary during linking. +# This must work even if \$libdir does not exist +hardcode_libdir_flag_spec=$lt_hardcode_libdir_flag_spec_CXX + +# Whether we need a single "-rpath" flag with a separated argument. +hardcode_libdir_separator=$lt_hardcode_libdir_separator_CXX + +# Set to "yes" if using DIR/libNAME\$shared_ext during linking hardcodes +# DIR into the resulting binary. +hardcode_direct=$hardcode_direct_CXX + +# Set to "yes" if using DIR/libNAME\$shared_ext during linking hardcodes +# DIR into the resulting binary and the resulting library dependency is +# "absolute",i.e impossible to change by setting \$shlibpath_var if the +# library is relocated. +hardcode_direct_absolute=$hardcode_direct_absolute_CXX + +# Set to "yes" if using the -LDIR flag during linking hardcodes DIR +# into the resulting binary. +hardcode_minus_L=$hardcode_minus_L_CXX + +# Set to "yes" if using SHLIBPATH_VAR=DIR during linking hardcodes DIR +# into the resulting binary. +hardcode_shlibpath_var=$hardcode_shlibpath_var_CXX + +# Set to "yes" if building a shared library automatically hardcodes DIR +# into the library and all subsequent libraries and executables linked +# against it. +hardcode_automatic=$hardcode_automatic_CXX + +# Set to yes if linker adds runtime paths of dependent libraries +# to runtime path list. +inherit_rpath=$inherit_rpath_CXX + +# Whether libtool must link a program against all its dependency libraries. +link_all_deplibs=$link_all_deplibs_CXX + +# Set to "yes" if exported symbols are required. +always_export_symbols=$always_export_symbols_CXX + +# The commands to list exported symbols. +export_symbols_cmds=$lt_export_symbols_cmds_CXX + +# Symbols that should not be listed in the preloaded symbols. +exclude_expsyms=$lt_exclude_expsyms_CXX + +# Symbols that must always be exported. +include_expsyms=$lt_include_expsyms_CXX + +# Commands necessary for linking programs (against libraries) with templates. +prelink_cmds=$lt_prelink_cmds_CXX + +# Commands necessary for finishing linking programs. +postlink_cmds=$lt_postlink_cmds_CXX + +# Specify filename containing input files. +file_list_spec=$lt_file_list_spec_CXX + +# How to hardcode a shared library path into an executable. +hardcode_action=$hardcode_action_CXX + +# The directories searched by this compiler when creating a shared library. +compiler_lib_search_dirs=$lt_compiler_lib_search_dirs_CXX + +# Dependencies to place before and after the objects being linked to +# create a shared library. +predep_objects=$lt_predep_objects_CXX +postdep_objects=$lt_postdep_objects_CXX +predeps=$lt_predeps_CXX +postdeps=$lt_postdeps_CXX + +# The library search path used internally by the compiler when linking +# a shared library. +compiler_lib_search_path=$lt_compiler_lib_search_path_CXX + +# ### END LIBTOOL TAG CONFIG: CXX +_LT_EOF + + + cat <<_LT_EOF >> "$ofile" + +# ### BEGIN LIBTOOL TAG CONFIG: F77 + +# The linker used to build libraries. +LD=$lt_LD_F77 + +# How to create reloadable object files. +reload_flag=$lt_reload_flag_F77 +reload_cmds=$lt_reload_cmds_F77 + +# Commands used to build an old-style archive. +old_archive_cmds=$lt_old_archive_cmds_F77 + +# A language specific compiler. +CC=$lt_compiler_F77 + +# Is the compiler the GNU compiler? +with_gcc=$GCC_F77 + +# Compiler flag to turn off builtin functions. +no_builtin_flag=$lt_lt_prog_compiler_no_builtin_flag_F77 + +# Additional compiler flags for building library objects. +pic_flag=$lt_lt_prog_compiler_pic_F77 + +# How to pass a linker flag through the compiler. +wl=$lt_lt_prog_compiler_wl_F77 + +# Compiler flag to prevent dynamic linking. +link_static_flag=$lt_lt_prog_compiler_static_F77 + +# Does compiler simultaneously support -c and -o options? +compiler_c_o=$lt_lt_cv_prog_compiler_c_o_F77 + +# Whether or not to add -lc for building shared libraries. +build_libtool_need_lc=$archive_cmds_need_lc_F77 + +# Whether or not to disallow shared libs when runtime libs are static. +allow_libtool_libs_with_static_runtimes=$enable_shared_with_static_runtimes_F77 + +# Compiler flag to allow reflexive dlopens. +export_dynamic_flag_spec=$lt_export_dynamic_flag_spec_F77 + +# Compiler flag to generate shared objects directly from archives. +whole_archive_flag_spec=$lt_whole_archive_flag_spec_F77 + +# Whether the compiler copes with passing no objects directly. +compiler_needs_object=$lt_compiler_needs_object_F77 + +# Create an old-style archive from a shared archive. +old_archive_from_new_cmds=$lt_old_archive_from_new_cmds_F77 + +# Create a temporary old-style archive to link instead of a shared archive. +old_archive_from_expsyms_cmds=$lt_old_archive_from_expsyms_cmds_F77 + +# Commands used to build a shared archive. +archive_cmds=$lt_archive_cmds_F77 +archive_expsym_cmds=$lt_archive_expsym_cmds_F77 + +# Commands used to build a loadable module if different from building +# a shared archive. +module_cmds=$lt_module_cmds_F77 +module_expsym_cmds=$lt_module_expsym_cmds_F77 + +# Whether we are building with GNU ld or not. +with_gnu_ld=$lt_with_gnu_ld_F77 + +# Flag that allows shared libraries with undefined symbols to be built. +allow_undefined_flag=$lt_allow_undefined_flag_F77 + +# Flag that enforces no undefined symbols. +no_undefined_flag=$lt_no_undefined_flag_F77 + +# Flag to hardcode \$libdir into a binary during linking. +# This must work even if \$libdir does not exist +hardcode_libdir_flag_spec=$lt_hardcode_libdir_flag_spec_F77 + +# Whether we need a single "-rpath" flag with a separated argument. +hardcode_libdir_separator=$lt_hardcode_libdir_separator_F77 + +# Set to "yes" if using DIR/libNAME\$shared_ext during linking hardcodes +# DIR into the resulting binary. +hardcode_direct=$hardcode_direct_F77 + +# Set to "yes" if using DIR/libNAME\$shared_ext during linking hardcodes +# DIR into the resulting binary and the resulting library dependency is +# "absolute",i.e impossible to change by setting \$shlibpath_var if the +# library is relocated. +hardcode_direct_absolute=$hardcode_direct_absolute_F77 + +# Set to "yes" if using the -LDIR flag during linking hardcodes DIR +# into the resulting binary. +hardcode_minus_L=$hardcode_minus_L_F77 + +# Set to "yes" if using SHLIBPATH_VAR=DIR during linking hardcodes DIR +# into the resulting binary. +hardcode_shlibpath_var=$hardcode_shlibpath_var_F77 + +# Set to "yes" if building a shared library automatically hardcodes DIR +# into the library and all subsequent libraries and executables linked +# against it. +hardcode_automatic=$hardcode_automatic_F77 + +# Set to yes if linker adds runtime paths of dependent libraries +# to runtime path list. +inherit_rpath=$inherit_rpath_F77 + +# Whether libtool must link a program against all its dependency libraries. +link_all_deplibs=$link_all_deplibs_F77 + +# Set to "yes" if exported symbols are required. +always_export_symbols=$always_export_symbols_F77 + +# The commands to list exported symbols. +export_symbols_cmds=$lt_export_symbols_cmds_F77 + +# Symbols that should not be listed in the preloaded symbols. +exclude_expsyms=$lt_exclude_expsyms_F77 + +# Symbols that must always be exported. +include_expsyms=$lt_include_expsyms_F77 + +# Commands necessary for linking programs (against libraries) with templates. +prelink_cmds=$lt_prelink_cmds_F77 + +# Commands necessary for finishing linking programs. +postlink_cmds=$lt_postlink_cmds_F77 + +# Specify filename containing input files. +file_list_spec=$lt_file_list_spec_F77 + +# How to hardcode a shared library path into an executable. +hardcode_action=$hardcode_action_F77 + +# The directories searched by this compiler when creating a shared library. +compiler_lib_search_dirs=$lt_compiler_lib_search_dirs_F77 + +# Dependencies to place before and after the objects being linked to +# create a shared library. +predep_objects=$lt_predep_objects_F77 +postdep_objects=$lt_postdep_objects_F77 +predeps=$lt_predeps_F77 +postdeps=$lt_postdeps_F77 + +# The library search path used internally by the compiler when linking +# a shared library. +compiler_lib_search_path=$lt_compiler_lib_search_path_F77 + +# ### END LIBTOOL TAG CONFIG: F77 +_LT_EOF + + + cat <<_LT_EOF >> "$ofile" + +# ### BEGIN LIBTOOL TAG CONFIG: FC + +# The linker used to build libraries. +LD=$lt_LD_FC + +# How to create reloadable object files. +reload_flag=$lt_reload_flag_FC +reload_cmds=$lt_reload_cmds_FC + +# Commands used to build an old-style archive. +old_archive_cmds=$lt_old_archive_cmds_FC + +# A language specific compiler. +CC=$lt_compiler_FC + +# Is the compiler the GNU compiler? +with_gcc=$GCC_FC + +# Compiler flag to turn off builtin functions. +no_builtin_flag=$lt_lt_prog_compiler_no_builtin_flag_FC + +# Additional compiler flags for building library objects. +pic_flag=$lt_lt_prog_compiler_pic_FC + +# How to pass a linker flag through the compiler. +wl=$lt_lt_prog_compiler_wl_FC + +# Compiler flag to prevent dynamic linking. +link_static_flag=$lt_lt_prog_compiler_static_FC + +# Does compiler simultaneously support -c and -o options? +compiler_c_o=$lt_lt_cv_prog_compiler_c_o_FC + +# Whether or not to add -lc for building shared libraries. +build_libtool_need_lc=$archive_cmds_need_lc_FC + +# Whether or not to disallow shared libs when runtime libs are static. +allow_libtool_libs_with_static_runtimes=$enable_shared_with_static_runtimes_FC + +# Compiler flag to allow reflexive dlopens. +export_dynamic_flag_spec=$lt_export_dynamic_flag_spec_FC + +# Compiler flag to generate shared objects directly from archives. +whole_archive_flag_spec=$lt_whole_archive_flag_spec_FC + +# Whether the compiler copes with passing no objects directly. +compiler_needs_object=$lt_compiler_needs_object_FC + +# Create an old-style archive from a shared archive. +old_archive_from_new_cmds=$lt_old_archive_from_new_cmds_FC + +# Create a temporary old-style archive to link instead of a shared archive. +old_archive_from_expsyms_cmds=$lt_old_archive_from_expsyms_cmds_FC + +# Commands used to build a shared archive. +archive_cmds=$lt_archive_cmds_FC +archive_expsym_cmds=$lt_archive_expsym_cmds_FC + +# Commands used to build a loadable module if different from building +# a shared archive. +module_cmds=$lt_module_cmds_FC +module_expsym_cmds=$lt_module_expsym_cmds_FC + +# Whether we are building with GNU ld or not. +with_gnu_ld=$lt_with_gnu_ld_FC + +# Flag that allows shared libraries with undefined symbols to be built. +allow_undefined_flag=$lt_allow_undefined_flag_FC + +# Flag that enforces no undefined symbols. +no_undefined_flag=$lt_no_undefined_flag_FC + +# Flag to hardcode \$libdir into a binary during linking. +# This must work even if \$libdir does not exist +hardcode_libdir_flag_spec=$lt_hardcode_libdir_flag_spec_FC + +# Whether we need a single "-rpath" flag with a separated argument. +hardcode_libdir_separator=$lt_hardcode_libdir_separator_FC + +# Set to "yes" if using DIR/libNAME\$shared_ext during linking hardcodes +# DIR into the resulting binary. +hardcode_direct=$hardcode_direct_FC + +# Set to "yes" if using DIR/libNAME\$shared_ext during linking hardcodes +# DIR into the resulting binary and the resulting library dependency is +# "absolute",i.e impossible to change by setting \$shlibpath_var if the +# library is relocated. +hardcode_direct_absolute=$hardcode_direct_absolute_FC + +# Set to "yes" if using the -LDIR flag during linking hardcodes DIR +# into the resulting binary. +hardcode_minus_L=$hardcode_minus_L_FC + +# Set to "yes" if using SHLIBPATH_VAR=DIR during linking hardcodes DIR +# into the resulting binary. +hardcode_shlibpath_var=$hardcode_shlibpath_var_FC + +# Set to "yes" if building a shared library automatically hardcodes DIR +# into the library and all subsequent libraries and executables linked +# against it. +hardcode_automatic=$hardcode_automatic_FC + +# Set to yes if linker adds runtime paths of dependent libraries +# to runtime path list. +inherit_rpath=$inherit_rpath_FC + +# Whether libtool must link a program against all its dependency libraries. +link_all_deplibs=$link_all_deplibs_FC + +# Set to "yes" if exported symbols are required. +always_export_symbols=$always_export_symbols_FC + +# The commands to list exported symbols. +export_symbols_cmds=$lt_export_symbols_cmds_FC + +# Symbols that should not be listed in the preloaded symbols. +exclude_expsyms=$lt_exclude_expsyms_FC + +# Symbols that must always be exported. +include_expsyms=$lt_include_expsyms_FC + +# Commands necessary for linking programs (against libraries) with templates. +prelink_cmds=$lt_prelink_cmds_FC + +# Commands necessary for finishing linking programs. +postlink_cmds=$lt_postlink_cmds_FC + +# Specify filename containing input files. +file_list_spec=$lt_file_list_spec_FC + +# How to hardcode a shared library path into an executable. +hardcode_action=$hardcode_action_FC + +# The directories searched by this compiler when creating a shared library. +compiler_lib_search_dirs=$lt_compiler_lib_search_dirs_FC + +# Dependencies to place before and after the objects being linked to +# create a shared library. +predep_objects=$lt_predep_objects_FC +postdep_objects=$lt_postdep_objects_FC +predeps=$lt_predeps_FC +postdeps=$lt_postdeps_FC + +# The library search path used internally by the compiler when linking +# a shared library. +compiler_lib_search_path=$lt_compiler_lib_search_path_FC + +# ### END LIBTOOL TAG CONFIG: FC +_LT_EOF + + ;; + "executable-scripts":C) + chmod +x tests/regression/regression.sh + chmod +x tests/model-checking/starpu-mc.sh + chmod +x tools/starpu_env + chmod +x tools/starpu_codelet_profile + chmod +x tools/starpu_codelet_histo_profile + chmod +x tools/starpu_mpi_comm_matrix.py + chmod +x tools/starpu_fxt_number_events_to_names.py + chmod +x tools/starpu_workers_activity + chmod +x tools/starpu_paje_draw_histogram + chmod +x tools/starpu_paje_state_stats + chmod +x tools/starpu_paje_summary + chmod +x tools/starpu_config + chmod +x tools/starpu_mlr_analysis + chmod +x tools/starpu_paje_sort + chmod +x tools/starpu_smpirun + chmod +x tools/starpu_tcpipexec + chmod +x doc/doxygen/doxygen_filter.sh + chmod +x doc/doxygen_dev/doxygen_filter.sh + chmod +x starpupy/execute.sh + chmod +x julia/examples/execute.sh + for x in \ + tests/microbenchs/tasks_data_overhead.sh \ + tests/microbenchs/sync_tasks_data_overhead.sh \ + tests/microbenchs/async_tasks_data_overhead.sh \ + tests/microbenchs/tasks_size_overhead.sh \ + tests/microbenchs/tasks_size_overhead_sched.sh \ + tests/microbenchs/tasks_size_overhead_scheds.sh \ + tests/microbenchs/tasks_size_overhead.gp \ + tests/microbenchs/microbench.sh \ + tests/microbenchs/parallel_dependent_homogeneous_tasks_data.sh \ + tests/microbenchs/parallel_independent_heterogeneous_tasks_data.sh \ + tests/microbenchs/parallel_independent_heterogeneous_tasks.sh \ + tests/microbenchs/parallel_independent_homogeneous_tasks_data.sh \ + tests/microbenchs/parallel_independent_homogeneous_tasks.sh \ + tests/microbenchs/parallel_redux_homogeneous_tasks_data.sh \ + tests/microbenchs/parallel_redux_heterogeneous_tasks_data.sh \ + tests/microbenchs/bandwidth_scheds.sh \ + tests/energy/static.sh \ + tests/energy/dynamic.sh \ + tests/datawizard/locality.sh \ + tests/overlap/overlap.sh \ + tests/model-checking/prio_list.sh \ + tests/model-checking/prio_list2.sh \ + tests/model-checking/prio_list3.sh \ + tests/model-checking/barrier.sh \ + examples/heat/heat.sh \ + examples/lu/lu.sh \ + examples/cholesky/cholesky.sh \ + examples/cholesky/cholesky_julia.sh \ + examples/mult/sgemm.sh \ + examples/scheduler/schedulers.sh \ + examples/scheduler/schedulers_context.sh \ + examples/scheduler/libdummy_sched.sh \ + examples/profiling_tool/prof.sh \ + tools/starpu_paje_draw_histogram.R \ + tools/starpu_paje_state_stats.R \ + tools/starpu_mlr_analysis.Rmd \ + tools/starpu_paje_summary.Rmd \ + tools/starpu_trace_state_stats.py \ + julia/examples/check_deps/check_deps.sh \ + julia/examples/mult/mult_starpu.sh \ + julia/examples/mult/perf.sh \ + julia/examples/variable/variable.sh \ + julia/examples/task_insert_color/task_insert_color.sh \ + julia/examples/vector_scal/vector_scal.sh \ + julia/examples/mandelbrot/mandelbrot.sh \ + julia/examples/callback/callback.sh \ + julia/examples/dependency/task_dep.sh \ + julia/examples/dependency/tag_dep.sh \ + julia/examples/dependency/end_dep.sh \ + julia/examples/axpy/axpy.sh \ + julia/examples/gemm/gemm.sh \ + julia/examples/cholesky/cholesky.sh \ + starpupy/benchmark/tasks_size_overhead.sh \ + starpupy/benchmark/tasks_size_overhead.gp \ + starpupy/benchmark/test_handle_perf.sh \ + starpupy/benchmark/test_handle_perf_pickle.sh \ + starpupy/examples/starpu_py.sh \ + starpupy/examples/starpu_py.concurrent.sh \ + starpupy/examples/starpu_py_handle.sh \ + starpupy/examples/starpu_py_handle.concurrent.sh \ + starpupy/examples/starpu_py_np.sh \ + starpupy/examples/starpu_py_np.concurrent.sh \ + starpupy/examples/starpu_py_parallel.sh \ + starpupy/examples/starpu_py_partition.sh \ + starpupy/examples/starpu_py_partition.concurrent.sh \ + starpupy/examples/starpu_py_perfmodel.sh \ + starpupy/examples/starpu_py_perfmodel.concurrent.sh \ + starpupy/examples/starpu_py_numpy.sh \ + starpupy/examples/starpu_py_numpy.concurrent.sh \ + ; do + test -e $x || ( mkdir -p $(dirname $x) && ln -sf $ac_abs_top_srcdir/$x $(dirname $x) ) + done + for x in tools julia/examples starpufft/tests examples examples/stencil mpi/tests mpi/examples socl/examples bubble/tests starpupy/examples starpu_openmp_llvm/examples \ + ; do + test -e $x/loader.c || ln -sf $ac_abs_top_srcdir/tests/loader.c $x + done + + sed -i -e '/ STARPU_SRC_DIR /d' -e '/ STARPU_BUILD_DIR /d' src/common/config.h + ;; + + esac +done # for ac_tag + + +as_fn_exit 0 +_ACEOF +ac_clean_files=$ac_clean_files_save + +test $ac_write_fail = 0 || + as_fn_error $? "write failure creating $CONFIG_STATUS" "$LINENO" 5 + + +# configure is writing to config.log, and then calls config.status. +# config.status does its own redirection, appending to config.log. +# Unfortunately, on DOS this fails, as config.log is still kept open +# by configure, so config.status won't be able to write to it; its +# output is simply discarded. So we exec the FD to /dev/null, +# effectively closing config.log, so it can be properly (re)opened and +# appended to by config.status. When coming back to configure, we +# need to make the FD available again. +if test "$no_create" != yes; then + ac_cs_success=: + ac_config_status_args= + test "$silent" = yes && + ac_config_status_args="$ac_config_status_args --quiet" + exec 5>/dev/null + $SHELL $CONFIG_STATUS $ac_config_status_args || ac_cs_success=false + exec 5>>config.log + # Use ||, not &&, to avoid exiting from the if with $? = 1, which + # would make configure fail if this is the last instruction. + $ac_cs_success || as_fn_exit 1 +fi +if test -n "$ac_unrecognized_opts" && test "$enable_option_checking" != no; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: unrecognized options: $ac_unrecognized_opts" >&5 +printf "%s\n" "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2;} +fi + + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: + + CPUs enabled: $enable_cpu + CUDA enabled: $enable_cuda $NO_NVML + HIP enabled: $enable_hip + OpenCL enabled: $enable_opencl + Max FPGA enabled: $enable_max_fpga + + Compile-time limits + (change these with --enable-maxcpus, --enable-maxcudadev, + --enable-maxopencldev, --enable-maxmaxfpgadev, --enable-maxnodes, --enable-maxbuffers) + (Note these numbers do not represent the number of detected + devices, but the maximum number of devices StarPU can manage) + + Maximum number of CPUs: $maxcpus + Maximum number of CUDA devices: $nmaxcudadev + Maximum number of HIP devices: $nmaxhipdev + Maximum number of OpenCL devices: $nmaxopencldev + Maximum number of Maxeler FPGA devices: $nmaxmaxfpgadev + Maximum number of MPI master-slave devices: $nmaxmpidev + Maximum number of TCP/IP master-slave devices: $nmaxtcpipdev + Maximum number of memory nodes: $maxnodes + Maximum number of task buffers: $nmaxbuffers + + CUDA GPU-GPU transfers: $enable_cuda_memcpy_peer + CUDA Map: $enable_cuda_map + HIP GPU-GPU transfers: $enable_hip_memcpy_peer + Allocation cache: $enable_allocation_cache + + Magma enabled: $have_magma + BLAS library: $blas_lib + hwloc: $have_valid_hwloc + FxT trace enabled: $enable_fxt + + Documentation HTML: $enable_build_doc + Documentation PDF: $enable_build_doc_pdf + Examples: $enable_build_examples + + StarPU Extensions: + StarPU MPI enabled: $build_mpi_lib + StarPU MPI failure tolerance: $enable_mpi_ft + StarPU MPI failure tolerance stats: $use_mpi_ft_stats + StarPU MPI(nmad) enabled: $build_nmad_lib + MPI test suite: $running_mpi_check + Master-Slave MPI enabled: $build_mpi_master_slave + Master-Slave TCP/IP enabled: $build_tcpip_master_slave + FFT Support: $fft_support + Resource Management enabled: $starpurm_support + Python Interface enabled: $starpupy_support + OpenMP runtime support enabled: $enable_openmp + OpenMP LLVM runtime support enabled: $enable_openmp_llvm + Parallel Worker support enabled: $enable_parallel_worker + SOCL enabled: $build_socl + SOCL test suite: $run_socl_check + Scheduler Hypervisor: $build_sc_hypervisor + simgrid enabled: $enable_simgrid + ayudame enabled: $ayu_msg + HDF5 enabled: $enable_hdf5 + Native fortran support: $enable_build_fortran + Native MPI fortran support: $use_mpi_fort + Support for multiple linear regression models: $support_mlr + Hierarchical dags support: $enable_bubble + JULIA enabled: $enable_julia +" >&5 +printf "%s\n" "$as_me: + + CPUs enabled: $enable_cpu + CUDA enabled: $enable_cuda $NO_NVML + HIP enabled: $enable_hip + OpenCL enabled: $enable_opencl + Max FPGA enabled: $enable_max_fpga + + Compile-time limits + (change these with --enable-maxcpus, --enable-maxcudadev, + --enable-maxopencldev, --enable-maxmaxfpgadev, --enable-maxnodes, --enable-maxbuffers) + (Note these numbers do not represent the number of detected + devices, but the maximum number of devices StarPU can manage) + + Maximum number of CPUs: $maxcpus + Maximum number of CUDA devices: $nmaxcudadev + Maximum number of HIP devices: $nmaxhipdev + Maximum number of OpenCL devices: $nmaxopencldev + Maximum number of Maxeler FPGA devices: $nmaxmaxfpgadev + Maximum number of MPI master-slave devices: $nmaxmpidev + Maximum number of TCP/IP master-slave devices: $nmaxtcpipdev + Maximum number of memory nodes: $maxnodes + Maximum number of task buffers: $nmaxbuffers + + CUDA GPU-GPU transfers: $enable_cuda_memcpy_peer + CUDA Map: $enable_cuda_map + HIP GPU-GPU transfers: $enable_hip_memcpy_peer + Allocation cache: $enable_allocation_cache + + Magma enabled: $have_magma + BLAS library: $blas_lib + hwloc: $have_valid_hwloc + FxT trace enabled: $enable_fxt + + Documentation HTML: $enable_build_doc + Documentation PDF: $enable_build_doc_pdf + Examples: $enable_build_examples + + StarPU Extensions: + StarPU MPI enabled: $build_mpi_lib + StarPU MPI failure tolerance: $enable_mpi_ft + StarPU MPI failure tolerance stats: $use_mpi_ft_stats + StarPU MPI(nmad) enabled: $build_nmad_lib + MPI test suite: $running_mpi_check + Master-Slave MPI enabled: $build_mpi_master_slave + Master-Slave TCP/IP enabled: $build_tcpip_master_slave + FFT Support: $fft_support + Resource Management enabled: $starpurm_support + Python Interface enabled: $starpupy_support + OpenMP runtime support enabled: $enable_openmp + OpenMP LLVM runtime support enabled: $enable_openmp_llvm + Parallel Worker support enabled: $enable_parallel_worker + SOCL enabled: $build_socl + SOCL test suite: $run_socl_check + Scheduler Hypervisor: $build_sc_hypervisor + simgrid enabled: $enable_simgrid + ayudame enabled: $ayu_msg + HDF5 enabled: $enable_hdf5 + Native fortran support: $enable_build_fortran + Native MPI fortran support: $use_mpi_fort + Support for multiple linear regression models: $support_mlr + Hierarchical dags support: $enable_bubble + JULIA enabled: $enable_julia +" >&6;} + +if test "$build_socl" = "yes" -a "$run_socl_check" = "no" ; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: +WARNING: SOCL test suite will not be run as the environment variable SOCL_OCL_LIB_OPENCL is not defined. +To run the tests, you need to install the OCL implementation of ICD +(https://forge.imag.fr/projects/ocl-icd/ or Debian package ocl-icd-libopencl1) +and set the variable SOCL_OCL_LIB_OPENCL to the location of the libOpenCL.so." >&5 +printf "%s\n" "$as_me: +WARNING: SOCL test suite will not be run as the environment variable SOCL_OCL_LIB_OPENCL is not defined. +To run the tests, you need to install the OCL implementation of ICD +(https://forge.imag.fr/projects/ocl-icd/ or Debian package ocl-icd-libopencl1) +and set the variable SOCL_OCL_LIB_OPENCL to the location of the libOpenCL.so." >&6;} +fi + +if test x"$have_valid_hwloc" = xno -a "$enable_simgrid" = "no" ; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: +WARNING: hwloc was not enabled. If the target machine is hyperthreaded the +performance may be impacted a lot. It is strongly recommended to install +hwloc" >&5 +printf "%s\n" "$as_me: +WARNING: hwloc was not enabled. If the target machine is hyperthreaded the +performance may be impacted a lot. It is strongly recommended to install +hwloc" >&6;} +fi + +if test x"$starpu_windows" = xyes -a "x$STARPU_MS_LIB" = "x" ; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: +WARNING: lib was not found, you will not be able to build StarPU applications +with Microsoft Visual Studio. Add to your PATH the directories for MSVC, e.g + c:\Program Files (x86)\Microsoft Visual Studio 11.0\Common7\IDE; + c:\Program Files (x86)\Microsoft Visual Studio 11.0\VC\bin" >&5 +printf "%s\n" "$as_me: +WARNING: lib was not found, you will not be able to build StarPU applications +with Microsoft Visual Studio. Add to your PATH the directories for MSVC, e.g + c:\Program Files (x86)\Microsoft Visual Studio 11.0\Common7\IDE; + c:\Program Files (x86)\Microsoft Visual Studio 11.0\VC\bin" >&6;} +fi + diff --git a/configure.ac b/configure.ac new file mode 100644 index 0000000..042cae4 --- /dev/null +++ b/configure.ac @@ -0,0 +1,4573 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# Copyright (C) 2018-2018 Umeà University +# Copyright (C) 2018,2020 Federal University of Rio Grande do Sul (UFRGS) +# Copyright (C) 2017-2017 Guillaume Beauchamp +# Copyright (C) 2013-2013 Thibaut Lambert +# Copyright (C) 2011-2011 Télécom Sud Paris +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +AC_INIT([StarPU], [1.4.10], [starpu-devel@inria.fr], [starpu], [http://gitlab.inria.fr/starpu/starpu]) +AC_CONFIG_SRCDIR(include/starpu.h) +AC_CONFIG_AUX_DIR([build-aux]) + +# libtool doesn't actually properly manage a space in the workdir +case `pwd` in + *[[\\\"\#\$\&\'\`$am_lf\ \ ]]*) + AC_MSG_ERROR([unsafe absolute working directory name]);; +esac + +dnl Versioning. + +STARPU_MAJOR_VERSION="`echo $PACKAGE_VERSION | cut -d . -f 1`" +STARPU_MINOR_VERSION="`echo $PACKAGE_VERSION | cut -d . -f 2`" +STARPU_RELEASE_VERSION="`echo $PACKAGE_VERSION | cut -d . -f 3`" +STARPU_RELEASE_VERSION="`echo $PACKAGE_VERSION | cut -d . -f 3| sed 's/rc.*//'`" +dnl we do not want the rcXX in the release version. we would like to use sed -r 's/[a-z]+.*//' to remove any string but the -r option is not portable +AC_SUBST([STARPU_MAJOR_VERSION]) +AC_SUBST([STARPU_MINOR_VERSION]) +AC_SUBST([STARPU_RELEASE_VERSION]) +AC_SUBST([STARPU_EFFECTIVE_VERSION]) +AC_DEFINE_UNQUOTED([STARPU_MAJOR_VERSION], [$STARPU_MAJOR_VERSION], [Major version number of StarPU.]) +AC_DEFINE_UNQUOTED([STARPU_MINOR_VERSION], [$STARPU_MINOR_VERSION], [Minor version number of StarPU.]) +AC_DEFINE_UNQUOTED([STARPU_RELEASE_VERSION], [$STARPU_RELEASE_VERSION], [Release version number of StarPU.]) + +. "$srcdir/STARPU-VERSION" +AC_SUBST([LIBSTARPU_INTERFACE_CURRENT]) +AC_SUBST([LIBSTARPU_INTERFACE_REVISION]) +AC_SUBST([LIBSTARPU_INTERFACE_AGE]) +AC_SUBST([LIBSTARPUMPI_INTERFACE_CURRENT]) +AC_SUBST([LIBSTARPUMPI_INTERFACE_REVISION]) +AC_SUBST([LIBSTARPUMPI_INTERFACE_AGE]) +AC_SUBST([LIBSTARPUFFT_INTERFACE_CURRENT]) +AC_SUBST([LIBSTARPUFFT_INTERFACE_REVISION]) +AC_SUBST([LIBSTARPUFFT_INTERFACE_AGE]) +AC_SUBST([LIBSTARPURM_INTERFACE_CURRENT]) +AC_SUBST([LIBSTARPURM_INTERFACE_REVISION]) +AC_SUBST([LIBSTARPURM_INTERFACE_AGE]) +AC_SUBST([LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT]) +AC_SUBST([LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION]) +AC_SUBST([LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE]) +AC_SUBST([LIBSOCL_INTERFACE_CURRENT]) +AC_SUBST([LIBSOCL_INTERFACE_REVISION]) +AC_SUBST([LIBSOCL_INTERFACE_AGE]) +AC_SUBST([LIBSTARPUJULIA_INTERFACE_CURRENT]) +AC_SUBST([LIBSTARPUJULIA_INTERFACE_REVISION]) +AC_SUBST([LIBSTARPUJULIA_INTERFACE_AGE]) + +AC_CANONICAL_SYSTEM + +AM_INIT_AUTOMAKE([1.11 -Wall -Wno-portability foreign silent-rules color-tests parallel-tests subdir-objects tar-pax]) + +m4_ifdef([AM_SILENT_RULES], + [AM_SILENT_RULES(yes)]) + +AC_PREREQ(2.64) + +m4_ifdef([AM_PROG_AR], [AM_PROG_AR]) +AC_PROG_CC +AM_PROG_CC_C_O +AC_PROG_CXX +AC_PROG_CPP +AC_PROG_SED +AC_PROG_LN_S +AC_PROG_F77 +AC_PROG_FC +AC_PROG_GREP +AC_PROG_EGREP +AC_PROG_INSTALL +AC_PROG_MKDIR_P +AC_CHECK_PROGS(PROG_STAT,gstat stat) +AC_CHECK_PROGS(PROG_DATE,gdate date) +AC_PATH_PROGS(PROG_FIND,find) +AC_CHECK_PROGS(PROG_CLANG,clang) + +AC_CACHE_CHECK([for parallel that supports semaphores with exit code], [ac_cv_path_PARALLEL], + [AC_PATH_PROGS_FEATURE_CHECK([PARALLEL], [parallel], + [[parallel --semaphore --id starpu --fg --fg-exit -j 2 exit 42 > /dev/null 2>&1 + [ $? = 42 ] && ac_cv_path_PARALLEL=$ac_path_PARALLEL ac_path_PARALLEL_found=:]], + [ac_cv_path_PARALLEL=no])]) +AC_SUBST([PARALLEL], [$ac_cv_path_PARALLEL]) +AM_CONDITIONAL([HAVE_PARALLEL], [test "x$PARALLEL" != "xno"]) + + +dnl locate pkg-config +PKG_PROG_PKG_CONFIG + +AC_ARG_ENABLE(simgrid, [AS_HELP_STRING([--enable-simgrid], + [Enable simulating execution in simgrid])], + enable_simgrid=$enableval, enable_simgrid=no) + +if test x$enable_perf_debug = xyes; then + enable_shared=no +fi + +default_enable_mpi_check=no + +if test x$enable_simgrid = xyes ; then + default_enable_mpi=no +else + default_enable_mpi=maybe +fi + +IS_SUPPORTED_FLAG_VAR(-Wno-unused,APP) +IS_SUPPORTED_FFLAG(-Wno-unused-dummy-argument,[APP_FFLAGS="$APP_FFLAGS -Wno-unused-dummy-argument"]) +IS_SUPPORTED_FCFLAG(-Wno-unused-dummy-argument,[APP_FCFLAGS="$APP_FCFLAGS -Wno-unused-dummy-argument"]) +AC_SUBST(APP_CFLAGS) +AC_SUBST(APP_CXXFLAGS) +AC_SUBST(APP_FFLAGS) +AC_SUBST(APP_FCFLAGS) + +AC_FUNC_MMAP + +############################################################################### +# # +# Forwarded options # +# # +# Move here options whose values are needed early # +# # +############################################################################### +# +AC_ARG_ENABLE(starpupy, [AS_HELP_STRING([--enable-starpupy], [enable StarPU python interface])], + enable_starpupy=$enableval, enable_starpupy=maybe) + +############################################################################### +# # +# Profiling tool support # +# # +############################################################################### + +AC_ARG_ENABLE(prof-tool, [AS_HELP_STRING([--enable-prof-tool], + [enable profiling tool])], + enable_prof_tool=$enableval, enable_prof_tool=yes) +if test x$enable_prof_tool = xyes; then + AC_DEFINE(STARPU_PROF_TOOL, [1], [Define this to enable profiling tool support]) +fi +AC_MSG_CHECKING([for profiling tool support]) +AC_MSG_RESULT($enable_prof_tool) + +############################################################################### +# # +# Hierarchical dags support # +# # +############################################################################### + +AC_ARG_ENABLE(bubble, [AS_HELP_STRING([--enable-bubble], + [build the hierarchical dags (a.k.a bubble) support])], + enable_bubble=$enableval, enable_bubble=no) + +AC_MSG_CHECKING([for hierarchical dags - a.k.a bubble - support]) + +if test x$enable_bubble = xyes; then + AC_DEFINE(STARPU_BUBBLE, [1], [Define this to enable hierarchical dags support]) +fi + +AM_CONDITIONAL([STARPU_BUBBLE], [test "x$enable_bubble" = "xyes"]) +AC_MSG_RESULT($enable_bubble) + +AC_MSG_CHECKING(whether bubble debug messages should be displayed) +AC_ARG_ENABLE(bubble-verbose, [AS_HELP_STRING([--enable-bubble-verbose], + [display verbose bubble messages])], + enable_bubble_verbose=$enableval, enable_bubble_verbose=no) +AC_MSG_RESULT($enable_bubble_verbose) +if test x$enable_bubble_verbose = xextra; then + AC_DEFINE(STARPU_BUBBLE_VERBOSE, [1], [display verbose bubble debug messages]) +fi + +############################################################################### +# # +# Drivers # +# # +############################################################################### + +AC_ARG_ENABLE(opencl-simulator, [AS_HELP_STRING([--enable-opencl-simulator], + [Enable the use of an OpenCL simulator])], + enable_opencl_simulator=$enableval, enable_opencl_simulator=no) +if test x$enable_opencl_simulator = xyes; then + enable_simgrid=yes + AC_DEFINE(STARPU_OPENCL_SIMULATOR, [1], [Define this to enable using an OpenCL simulator]) +fi + +AC_ARG_WITH(simgrid-dir, + [AS_HELP_STRING([--with-simgrid-dir=], + [specify SimGrid installation directory])], + [ + simgrid_dir="$withval" + # in case this was not explicit yet + enable_simgrid=yes + ], simgrid_dir=no) + +AC_ARG_WITH(simgrid-include-dir, + [AS_HELP_STRING([--with-simgrid-include-dir=], + [specify where SimGrid headers are installed])], + [ + simgrid_include_dir="$withval" + # in case this was not explicit yet + enable_simgrid=yes + ], [simgrid_include_dir=no]) + +AC_ARG_WITH(simgrid-lib-dir, + [AS_HELP_STRING([--with-simgrid-lib-dir=], + [specify where SimGrid libraries are installed])], + [ + simgrid_lib_dir="$withval" + # in case this was not explicit yet + enable_simgrid=yes + ], [simgrid_lib_dir=no]) + +if test x$enable_simgrid = xyes ; then + PKG_CHECK_MODULES([SIMGRID], [simgrid], [], [:]) + + if test "$simgrid_include_dir" != "no" ; then + SIMGRID_CFLAGS="-I$simgrid_include_dir $SIMGRID_CFLAGS" + fi + if test "$simgrid_lib_dir" != "no" ; then + SIMGRID_LIBS="-L$simgrid_lib_dir $SIMGRID_LIBS" + fi + if test "$simgrid_dir" != "no" ; then + SIMGRID_CFLAGS="-I$simgrid_dir/include $SIMGRID_CFLAGS" + SIMGRID_LIBS="-L$simgrid_dir/lib $SIMGRID_LIBS" + else + simgrid_dir="$(pkg-config --variable=prefix simgrid)" + fi + if test -n "$SIMGRID_CFLAGS" ; then + CFLAGS="$SIMGRID_CFLAGS $CFLAGS" + CXXFLAGS="$SIMGRID_CFLAGS $CXXFLAGS" + NVCCFLAGS="$SIMGRID_CFLAGS $NVCCFLAGS" + HIPCCFLAGS="$SIMGRID_CFLAGS $HIPCCFLAGS" + fi + SAVED_LIBS="${LIBS}" + LIBS="$SIMGRID_LIBS $LIBS" + AC_HAVE_LIBRARY([simgrid], [], + [ + AC_MSG_ERROR(Simgrid support needs simgrid installed) + ] + ) + AC_CHECK_HEADERS([simgrid/msg.h], [AC_DEFINE([STARPU_HAVE_SIMGRID_MSG_H], [1], [Define to 1 if you have msg.h in simgrid/.])]) + AC_CHECK_HEADERS([msg/msg.h], [AC_DEFINE([STARPU_HAVE_MSG_MSG_H], [1], [Define to 1 if you have msg.h in msg/.])]) + AC_CHECK_HEADERS([simgrid/host.h], [AC_DEFINE([STARPU_HAVE_SIMGRID_HOST_H], [1], [Define to 1 if you have host.h in simgrid/.])]) + AC_CHECK_HEADERS([simgrid/link.h], [AC_DEFINE([STARPU_HAVE_SIMGRID_LINK_H], [1], [Define to 1 if you have link.h in simgrid/.])]) + AC_CHECK_HEADERS([xbt/base.h], [AC_DEFINE([STARPU_HAVE_XBT_BASE_H], [1], [Define to 1 if you have base.h in xbt/.])]) + AC_CHECK_HEADERS([simgrid/version.h], [AC_DEFINE([STARPU_HAVE_SIMGRID_VERSION_H], [1], [Define to 1 if you have version.h in simgrid/.])], [], [[ + #ifdef STARPU_HAVE_XBT_BASE_H + #include + #endif + ]]) + AC_CHECK_HEADERS([simgrid/simdag.h], [AC_DEFINE([STARPU_HAVE_SIMGRID_SIMDAG_H], [1], [Define to 1 if you have simdag.h in simgrid/.])]) + AC_CHECK_HEADERS([xbt/synchro.h], [AC_DEFINE([STARPU_HAVE_XBT_SYNCHRO_H], [1], [Define to 1 if you have synchro.h in xbt/.])]) + AC_CHECK_HEADERS([xbt/config.h], [AC_DEFINE([STARPU_HAVE_XBT_CONFIG_H], [1], [Define to 1 if you have config.h in xbt/.])]) + AC_CHECK_HEADERS([simgrid/actor.h], [AC_DEFINE([STARPU_HAVE_SIMGRID_ACTOR_H], [1], [Define to 1 if you have actor.h in simgrid/.])]) + AC_CHECK_HEADERS([simgrid/engine.h], [AC_DEFINE([STARPU_HAVE_SIMGRID_ENGINE_H], [1], [Define to 1 if you have engine.h in simgrid/.])]) + AC_CHECK_HEADERS([simgrid/semaphore.h], [AC_DEFINE([STARPU_HAVE_SIMGRID_SEMAPHORE_H], [1], [Define to 1 if you have semaphore.h in simgrid/.])]) + AC_CHECK_HEADERS([simgrid/mutex.h], [AC_DEFINE([STARPU_HAVE_SIMGRID_MUTEX_H], [1], [Define to 1 if you have mutex.h in simgrid/.])]) + AC_CHECK_HEADERS([simgrid/cond.h], [AC_DEFINE([STARPU_HAVE_SIMGRID_COND_H], [1], [Define to 1 if you have cond.h in simgrid/.])]) + AC_CHECK_HEADERS([simgrid/barrier.h], [AC_DEFINE([STARPU_HAVE_SIMGRID_BARRIER_H], [1], [Define to 1 if you have barrier.h in simgrid/.])]) + AC_CHECK_HEADERS([simgrid/engine.h]) + AC_CHECK_HEADERS([simgrid/zone.h], [AC_DEFINE([STARPU_HAVE_SIMGRID_ZONE_H], [1], [Define to 1 if you have zone.h in simgrid/.])]) + AC_CHECK_TYPES([smx_actor_t], [AC_DEFINE([STARPU_HAVE_SMX_ACTOR_T], [1], [Define to 1 if you have the smx_actor_t type.])], [], [[#include ]]) + + # Latest functions + AC_CHECK_FUNCS([MSG_process_attach sg_actor_attach sg_actor_attach_pthread sg_actor_init sg_actor_set_stacksize sg_actor_on_exit MSG_zone_get_hosts sg_zone_get_hosts sg_zone_get_all_hosts MSG_process_self_name MSG_process_userdata_init sg_actor_get_data sg_actor_set_data sg_actor_data]) + AC_CHECK_FUNCS([xbt_mutex_try_acquire smpi_process_set_user_data SMPI_thread_create sg_zone_get_by_name sg_link_get_name sg_link_name sg_link_set_bandwidth sg_link_bandwidth_set sg_host_get_route sg_host_get_route_links sg_host_route sg_host_self sg_host_list sg_host_get_speed sg_host_speed simcall_process_create sg_config_continue_after_help]) + AC_CHECK_FUNCS([simgrid_set_maestro]) + AC_CHECK_FUNCS([simgrid_init], [AC_DEFINE([STARPU_SIMGRID_HAVE_SIMGRID_INIT], [1], [Define to 1 if you have the `simgrid_init' function.])]) + AC_CHECK_FUNCS([xbt_barrier_init], [AC_DEFINE([STARPU_SIMGRID_HAVE_XBT_BARRIER_INIT], [1], [Define to 1 if you have the `xbt_barrier_init' function.])]) + AC_CHECK_FUNCS([sg_actor_sleep_for sg_actor_self sg_actor_ref sg_host_get_properties sg_host_get_property_names sg_host_send_to sg_host_sendto sg_cfg_set_int sg_actor_self_execute sg_actor_execute simgrid_get_clock]) + AC_CHECK_DECLS([smpi_process_set_user_data], [], [], [[#include ]]) + + # Oldies for compatibility with older simgrid + AC_CHECK_FUNCS([MSG_get_as_by_name MSG_zone_get_by_name MSG_environment_get_routing_root MSG_host_get_speed]) + LIBS="${SAVED_LIBS}" + + AC_DEFINE(STARPU_SIMGRID, [1], [Define this to enable simgrid execution]) + # We won't bind or detect anything + with_hwloc=no + + # disable mpi checks by default, they require static linking, we don't + # want that by default + default_enable_mpi_check=no + + # disable MPI support by default + default_enable_mpi=no + + AC_LANG_PUSH([C++]) + if test x$enable_shared = xno ; then + # When linking statically, libtool does not realize we need libstdc++ for simgrid_cpp.cpp + SIMGRID_LIBS="$SIMGRID_LIBS -lstdc++" + LIBS="$LIBS -lstdc++" + fi + SIMGRID_LDFLAGS="$SIMGRID_LIBS -lsimgrid" + + # Simgrid 3.12 & 3.13 need -std=c++11 to be able to build anything in C++... + case \ $CXXFLAGS\ in + *\ -std=*\ *) ;; + *) + # Make sure our C++ compiler can compile simgrid headers + SIMGRID_INCLUDES=" +#ifdef STARPU_HAVE_SIMGRID_MSG_H +#include +#include +#elif defined(STARPU_HAVE_MSG_MSG_H) +#include +#endif + +#ifdef STARPU_HAVE_XBT_BASE_H +#include +#endif +#ifdef STARPU_HAVE_SIMGRID_VERSION_H +#include +#endif +#ifdef STARPU_HAVE_SIMGRID_ZONE_H +#include +#endif +#ifdef STARPU_HAVE_SIMGRID_HOST_H +#include +#endif + +#include +" + AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[$SIMGRID_INCLUDES]])],, + CXXFLAGS="-std=c++11 $CXXFLAGS") + ;; + esac + AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[$SIMGRID_INCLUDES + #include ]], + [[simgrid::s4u::Engine::on_time_advance_cb([](double delta) { });]] + )], + AC_DEFINE(STARPU_HAVE_S4U_ON_TIME_ADVANCE_CB, [1], [Define this to 1 when s4u::Engine::on_time_advance_cb is available])) + AC_LANG_POP([C++]) + AC_ARG_ENABLE(simgrid-mc, [AS_HELP_STRING([--enable-simgrid-mc], + [Enable using Model Checker of simgrid])], + enable_simgrid_mc=$enableval, enable_simgrid_mc=no) + if test x$enable_simgrid_mc = xyes ; then + AC_DEFINE(STARPU_SIMGRID_MC, [1], [Define this to enable Model Checker in simgrid execution]) + AC_PATH_PROG([SIMGRID_MC], [simgrid-mc], [no], [$simgrid_dir/bin:$PATH]) + LDFLAGS="$LDFLAGS -Wl,-znorelro -Wl,-znoseparate-code" + # libsimgrid needs to be linked from binaries themselves for MC to work + STARPU_EXPORTED_LIBS="$STARPU_EXPORTED_LIBS $SIMGRID_LDFLAGS" + AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ +#ifdef STARPU_HAVE_XBT_BASE_H +#include +#endif +#ifdef STARPU_HAVE_SIMGRID_VERSION_H +#include +#endif +#if SIMGRID_VERSION < 33100 +#error no mutex support with MC +#endif + ]])],, + AC_MSG_ERROR([We need simgrid >= 3.31 for MC])) + fi +fi +AM_CONDITIONAL(STARPU_SIMGRID_MC, test x$enable_simgrid_mc = xyes) +AM_CONDITIONAL(STARPU_SIMGRID, test x$enable_simgrid = xyes) +AC_SUBST(SIMGRID_CFLAGS) +AC_SUBST(SIMGRID_LDFLAGS) +AC_MSG_CHECKING(whether SimGrid is enabled) +AC_MSG_RESULT($enable_simgrid) + +AC_MSG_CHECKING(whether blocking drivers should be enabled) +AC_ARG_ENABLE(blocking-drivers, [AS_HELP_STRING([--enable-blocking-drivers], [enable blocking drivers])], + enable_blocking=$enableval, enable_blocking=$enable_simgrid) +AC_MSG_RESULT($enable_blocking) + +if test x$enable_blocking = xno ; then + if test x$enable_simgrid = xyes ; then + AC_MSG_ERROR([--disable-blocking-drivers cannot be used in simgrid mode]) + fi + AC_DEFINE(STARPU_NON_BLOCKING_DRIVERS, [1], [drivers must progress]) +fi + +if test x$enable_blocking = xyes ; then + AC_MSG_CHECKING(whether worker callbacks should be enabled) + AC_ARG_ENABLE(worker-callbacks, [AS_HELP_STRING([--enable-worker-callbacks], [enable worker callbacks])], + enable_worker_cb=$enableval, enable_worker_cb=no) + AC_MSG_RESULT($enable_worker_cb) +else + # worker sleep/wake-up callbacks only make sense if blocking drivers are enabled + enable_worker_cb=no +fi + +if test x$enable_worker_cb = xyes ; then + AC_DEFINE(STARPU_WORKER_CALLBACKS, [1], [workers must call callbacks on sleep/wake-up]) +fi + +############################################################################### +# # +# LIBTOOLS # +# # +############################################################################### + +#c++11 detection +AX_CXX_COMPILE_STDCXX(11,noext,optional) + +AC_SUBST([STARPU_HAVE_CXX11], $HAVE_CXX11) +AM_CONDITIONAL([STARPU_HAVE_CXX11], [test "$HAVE_CXX11" -eq 1]) +if test $HAVE_CXX11 -eq 1; then + AC_DEFINE(STARPU_HAVE_CXX11, [1], [compiler supports cxx11]) +fi + +LT_PREREQ([2.2]) +LT_INIT([win32-dll]) + +AC_HEADER_STDC + +AC_C_RESTRICT + +# Check if bash is available +AC_PATH_PROG([REALBASH], [bash], , [/bin:$PATH]) + +# Record git version +AC_PATH_PROG(gitcommand, git) +if test -f $srcdir/STARPU-REVISION ; then + cp $srcdir/STARPU-REVISION . +elif test "$gitcommand" = "" ; then + echo "unknown" > ./STARPU-REVISION +else + bdir=$PWD + cd $srcdir + git log -n 1 --pretty="%H%d" . > $bdir/STARPU-REVISION_tmp + cd $bdir + if test -s ./STARPU-REVISION_tmp ; then + mv ./STARPU-REVISION_tmp ./STARPU-REVISION + else + echo "unknown" > ./STARPU-REVISION + fi +fi + +AM_CONDITIONAL([STARPU_CROSS_COMPILING], [test "x$cross_compiling" = "xyes"]) + +############################################################################### +# # +# MPI compilers # +# # +############################################################################### + +#Check MPICC +if test x$enable_simgrid = xyes ; then + DEFAULT_MPICC=smpicc +else + DEFAULT_MPICC=mpicc +fi +AC_ARG_WITH(mpicc, [AS_HELP_STRING([--with-mpicc=], [Name or path of the mpicc compiler])], [DEFAULT_MPICC=$withval]) +case $DEFAULT_MPICC in + /*) mpicc_path="$DEFAULT_MPICC" ;; + *) AC_PATH_PROG(mpicc_path, $DEFAULT_MPICC, [no], [$simgrid_dir/bin:$PATH]) ;; +esac +# We test if the MPICC compiler exists +if test ! -x $mpicc_path; then + AC_MSG_RESULT(The mpicc compiler '$mpicc_path' does not have the execute permission) + mpicc_path=no +fi + +AC_MSG_CHECKING(whether mpicc is available) +AC_MSG_RESULT($mpicc_path) +AC_SUBST(MPICC, $mpicc_path) + +if test x$mpicc_path != xno ; then + MPIPATH=$(dirname $mpicc_path):$PATH +else + MPIPATH=$PATH +fi + +#Check MPICXX/MPIC++ +if test x$enable_simgrid = xyes ; then + DEFAULT_MPICXX=smpicxx +else + DEFAULT_MPICXX=mpicxx +fi +AC_ARG_WITH(mpicxx, [AS_HELP_STRING([--with-mpicxx=], [Name or path of the mpicxx/mpic++ compiler])], [DEFAULT_MPICXX=$withval]) +case $DEFAULT_MPICXX in + /*) mpicxx_path="$DEFAULT_MPICXX" ;; + *) AC_PATH_PROG(mpicxx_path, $DEFAULT_MPICXX, [no], [$MPIPATH]) ;; +esac + +# try with mpic++ if mpicxx was not found +if test x$mpicxx_path = xno ; then + DEFAULT_MPICXX=mpic++ + AC_PATH_PROG(mpicxx_path, $DEFAULT_MPICXX, [no], [$MPIPATH]) +fi + +# We test if the MPICXX/MPIC++ compiler exists +if test ! -x $mpicxx_path; then + AC_MSG_RESULT(The mpicxx compiler '$mpicxx_path' does not have the execute permission) + mpicxx_path=no +fi + +AC_MSG_CHECKING(whether mpicxx is available) +AC_MSG_RESULT($mpicxx_path) +AC_SUBST(MPICXX, $mpicxx_path) + +# Check if mpiexec is available +if test x$enable_simgrid = xyes ; then + DEFAULT_MPIEXEC=smpirun + AC_ARG_WITH(smpirun, [AS_HELP_STRING([--with-smpirun[=]], [Name or path of the smpirun helper])], [DEFAULT_MPIEXEC=$withval]) +else + DEFAULT_MPIEXEC=mpiexec + AC_ARG_WITH(mpiexec, [AS_HELP_STRING([--with-mpiexec=], [Name or path of mpiexec])], [DEFAULT_MPIEXEC=$withval]) +fi + +case $DEFAULT_MPIEXEC in + /*) mpiexec_path="$DEFAULT_MPIEXEC" ;; + *) AC_PATH_PROG(mpiexec_path, $DEFAULT_MPIEXEC, [no], [$MPIPATH]) +esac +AC_MSG_CHECKING(whether mpiexec is available) +AC_MSG_RESULT($mpiexec_path) + +# We test if MPIEXEC exists +if test ! -x $mpiexec_path; then + AC_MSG_RESULT(The mpiexec script '$mpiexec_path' is not valid) + default_enable_mpi_check=no + mpiexec_path="" +fi +AC_SUBST(MPIEXEC,$mpiexec_path) + +############################################################################### +# # +# MPI # +# # +############################################################################### + +AC_ARG_ENABLE(mpi, [AS_HELP_STRING([--disable-mpi], + [Disable StarPU MPI library generation])], + [enable_mpi=$enableval], + [enable_mpi=$default_enable_mpi]) + +if test x$enable_mpi = xmaybe ; then + if test -x "$mpicc_path"; then + enable_mpi=yes + else + enable_mpi=no + fi +fi + +# in case MPI was explicitly required, but mpicc is not available, this is an error +if test x$enable_mpi = xyes ; then + if test ! -x "$mpicc_path"; then + AC_MSG_ERROR([Compiler MPI '$mpicc_path' not valid]) + fi + + OLD_CC=$CC + CC=$mpicc_path + AC_COMPILE_IFELSE( + [AC_LANG_PROGRAM([[ + #include + #include + ]],,)], + [AC_DEFINE(STARPU_HAVE_MPI_EXT, [1], [ is available])]) + + AC_CHECK_FUNC([MPI_Comm_create_group], [AC_DEFINE([STARPU_HAVE_MPI_COMM_CREATE_GROUP], [1], [Define to 1 if the function MPI_Comm_create_group is available.])]) + CC=$OLD_CC +fi + +build_mpi_lib=$enable_mpi + +AC_ARG_ENABLE(mpi-minimal-tests, [AS_HELP_STRING([--enable-mpi-minimal-tests], + [Only enable a subset of MPI tests])], + [enable_mpi_minimal_tests=$enableval], + [enable_mpi_minimal_tests=no]) + +AM_CONDITIONAL([STARPU_MPI_MINIMAL_TESTS], [test x$enable_mpi_minimal_tests = xyes]) + + +############################################################################### +# # +# NEW MADELEINE # +# # +############################################################################### + +AC_ARG_ENABLE(nmad, [AS_HELP_STRING([--enable-nmad], + [Enable StarPU MPI library generation using the new madeleine backend])], + [enable_nmad=$enableval], + [enable_nmad=no]) + +build_nmad_lib=no +AC_SUBST(CC_OR_MPICC, $cc_or_mpicc) +#We can only build StarPU MPI Library if User wants it and MPI is available +if test x$enable_mpi = xyes -a x$enable_nmad = xyes ; then + build_nmad_lib=yes + build_mpi_lib=no + PKG_CHECK_MODULES([NMAD],[nmad]) + save_LIBS="$LIBS" + save_CFLAGS="$CFLAGS" + CFLAGS="$CFLAGS $NMAD_CFLAGS" + LIBS="$LIBS $NMAD_LIBS" + AC_CHECK_FUNCS([piom_ltask_set_bound_thread_os_indexes]) + AC_CHECK_FUNCS([nm_trace_add_synchro_point]) + CFLAGS="$save_CFLAGS" + LIBS="$save_LIBS" +else + build_nmad_lib=no +fi + +# If MadMPI is used, MadMPI can't be built with PIOman (we don't want communication progression to be done in both StarPU and MadMPI): +if test x$enable_mpi = xyes -a x$build_nmad_lib = xno -a ! -z "`$mpicc_path --showme|grep pioman`"; then + AC_MSG_WARN([Using MPI backend of StarPU with MadMPI built with PIOman: disabling PIOman's progression.]) + AC_DEFINE(HAVE_PIOMAN, [1], [PIOman (from PM2) is available]) +fi + +############################################################################### +# # +# MPI Master Slave # +# # +############################################################################### + +AC_ARG_ENABLE(mpi-master-slave, [AS_HELP_STRING([--enable-mpi-master-slave], + [Enable StarPU to run with the master-slave mode])], + use_mpi_master_slave=$enableval, + use_mpi_master_slave=no) + +if test x$enable_simgrid = xyes; then + if test x$use_mpi_master_slave = xyes; then + AC_MSG_ERROR([MPI Master Slave not supported with simgrid]) + fi + use_mpi_master_slave=no +fi + +# in case it is explicitly required, but mpicc is not available, this is an error +if test x$use_mpi_master_slave = xyes -a ! -x "$mpicc_path"; then + AC_MSG_ERROR([Compiler MPI '$mpicc_path' not valid]) +fi + +#We can only build MPI Master Slave if User wants it and MPI compiler are available +if test x$use_mpi_master_slave = xyes -a x$mpicc_path != xno -a x${mpicxx_path} != xno ; then + build_mpi_master_slave=yes +else + build_mpi_master_slave=no +fi + +#users cannot use both at the same time +if test x$build_mpi_master_slave = xyes -a x$enable_mpi = xyes; then + AC_MSG_WARN(StarPU-MPI and MPI Master-Slave cannot be used at the same time ! Disabling StarPU-MPI...) + build_mpi_lib=no + build_nmad_lib=no + enable_mpi=no +fi + +if test x$build_mpi_master_slave = xyes; then + AC_DEFINE(STARPU_USE_MPI_MASTER_SLAVE, [1], [MPI Master Slave support is enabled]) + CC=$mpicc_path + CCLD=$mpicc_path + CXX=$mpicxx_path + CXXLD=mpicxx_path +fi + +AC_MSG_CHECKING(whether the MPI master-slave mode should be enabled) +AC_MSG_RESULT($build_mpi_master_slave) +AM_CONDITIONAL([STARPU_USE_MPI_MASTER_SLAVE], [test x$build_mpi_master_slave = xyes]) + +AC_MSG_CHECKING(maximum number of MPI master-slave devices) +AC_ARG_ENABLE(maxmpidev, [AS_HELP_STRING([--enable-maxmpidev=], + [maximum number of MPI master-slave devices])], + nmaxmpidev=$enableval, + [ + if test x$build_mpi_master_slave = xyes; then + nmaxmpidev=4 + else + nmaxmpidev=0 + fi + ]) +if test x$nmaxmpidev = x -o x$nmaxmpidev = xyes +then + AC_MSG_ERROR([The --enable-maxmpidev option needs to be given a number]) +fi +AC_MSG_RESULT($nmaxmpidev) +AC_DEFINE_UNQUOTED(STARPU_MAXMPIDEVS, [$nmaxmpidev], [maximum number of MPI devices]) + +############################################################################### +# # +# TCP/IP Master Slave # +# # +############################################################################### + +AC_ARG_ENABLE(tcpip-master-slave, [AS_HELP_STRING([--enable-tcpip-master-slave], + [Enable StarPU to run with the master-slave mode])], + build_tcpip_master_slave=$enableval, + build_tcpip_master_slave=no) + +if test x$build_tcpip_master_slave = xyes; then + AC_CHECK_LIB([dl], [dlsym]) + AC_DEFINE(STARPU_USE_TCPIP_MASTER_SLAVE, [1], [TCPIP Master Slave support is enabled]) +fi + +AC_MSG_CHECKING(whether the TCP/IP master-slave mode should be enabled) +AC_MSG_RESULT($build_tcpip_master_slave) +AM_CONDITIONAL([STARPU_USE_TCPIP_MASTER_SLAVE], [test x$build_tcpip_master_slave = xyes]) + +AC_MSG_CHECKING(maximum number of TCP/IP master-slave devices) +AC_ARG_ENABLE(maxtcpipdev, [AS_HELP_STRING([--enable-maxtcpipdev=], + [maximum number of TCP/IP master-slave devices])], + nmaxtcpipdev=$enableval, + [ + if test x$build_tcpip_master_slave = xyes; then + nmaxtcpipdev=4 + else + nmaxtcpipdev=0 + fi + ]) +if test x$nmaxtcpipdev = x -o x$nmaxtcpipdev = xyes +then + AC_MSG_ERROR([The --enable-maxtcpipdev option needs to be given a number]) +fi +AC_MSG_RESULT($nmaxtcpipdev) +AC_DEFINE_UNQUOTED(STARPU_MAXTCPIPDEVS, [$nmaxtcpipdev], [maximum number of TCP/IP devices]) + +############################################################################### +# # +# Miscellaneous things for MPI # +# # +############################################################################### + +AC_ARG_ENABLE(mpi-pedantic-isend, [AS_HELP_STRING([--enable-mpi-pedantic-isend], + [Prevent StarPU MPI from reading buffers while being sent over MPI])], + enable_mpi_pedantic_isend=$enableval, enable_mpi_pedantic_isend=no) +if test x$enable_mpi_pedantic_isend = xyes; then + AC_DEFINE(STARPU_MPI_PEDANTIC_ISEND, [1], [enable StarPU MPI pedantic isend]) +fi + +# If the user specifically asks for it, or if we are in a developer checkout, we enable mpi check +if test -d "$srcdir/.git" -o -f "$srcdir/.git" ; then + default_enable_mpi_check=$enable_mpi +fi +AC_ARG_ENABLE(mpi-check, AC_HELP_STRING([--enable-mpi-check], [Enable execution of MPI testcases]), + [enable_mpi_check=$enableval], [enable_mpi_check=$default_enable_mpi_check]) +running_mpi_check=no +if test x$enable_mpi_check = xyes ; then + running_mpi_check=yes + if test x$enable_mpi = xno ; then + AC_MSG_ERROR([MPI checks requested, but MPI is disabled]) + fi +fi +if test x$enable_mpi_check = xmaybe ; then + running_mpi_check=yes +fi +if test x$enable_mpi_check = xno ; then + running_mpi_check=no +fi +if test x$enable_mpi = xno ; then + running_mpi_check=no +fi + +AM_CONDITIONAL(STARPU_MPI_CHECK, test x$running_mpi_check = xyes) +AC_MSG_CHECKING(whether MPI tests should be run) +AC_MSG_RESULT($running_mpi_check) + +AC_MSG_CHECKING(whether the StarPU MPI library should be generated) +AC_MSG_RESULT($build_mpi_lib) +AC_MSG_CHECKING(whether the StarPU MPI nmad library should be generated) +AC_MSG_RESULT($build_nmad_lib) + +if test x$build_mpi_lib = xyes -o x$build_nmad_lib = xyes ; then + AC_DEFINE(STARPU_USE_MPI,[1],[whether the StarPU MPI library is available]) + if test x$build_mpi_lib = xyes ; then + AC_DEFINE(STARPU_USE_MPI_MPI,[1],[whether the StarPU MPI library (with a native MPI implementation) is available]) + else + AC_DEFINE(STARPU_USE_MPI_NMAD,[1],[whether the StarPU MPI library (with a NewMadeleine implementation) is available]) + fi +fi + +if test x$enable_mpi = xyes ; then + if test x$enable_simgrid = xyes ; then + if test x$enable_shared = xyes ; then + AC_MSG_ERROR([MPI with simgrid can not work with shared libraries, if you need the MPI support, then use --disable-shared to fix this, else disable MPI with --disable-mpi]) + else + CFLAGS="$CFLAGS -fPIC" + CXXFLAGS="$CXXFLAGS -fPIC" + NVCCFLAGS="$NVCCFLAGS --compiler-options -fPIC" + HIPCCFLAGS="$HIPCCFLAGS --compiler-options -fPIC" + FFLAGS="$FFLAGS -fPIC" + FCLAGS="$FFLAGS -fPIC" + fi + fi + + enable_mpi_sync_clocks=no + PKG_CHECK_MODULES([MPI_SYNC_CLOCKS],[mpi_sync_clocks],[enable_mpi_sync_clocks=yes],[enable_mpi_sync_clocks=no]) + if test x$enable_mpi_sync_clocks = xyes ; then + AC_DEFINE(STARPU_HAVE_MPI_SYNC_CLOCKS, [1], [Define to 1 if you have mpi_sync_clocks and it is meant to be used]) + fi +fi + +AM_CONDITIONAL(STARPU_MPI_SYNC_CLOCKS, test x$enable_mpi_sync_clocks = xyes) +AM_CONDITIONAL(STARPU_USE_MPI_MPI, test x$build_mpi_lib = xyes) +AM_CONDITIONAL(STARPU_USE_MPI_NMAD, test x$build_nmad_lib = xyes) +AM_CONDITIONAL(STARPU_USE_MPI, test x$build_nmad_lib = xyes -o x$build_mpi_lib = xyes) + +###### Failure tolerance material ####### +default_enable_mpi_ft=no +AC_ARG_ENABLE(mpi-ft, AC_HELP_STRING([--enable-mpi-ft], [Enable failure tolerance mechanisms provided by StarPU]), + [enable_mpi_ft=$enableval], [enable_mpi_ft=$default_enable_mpi_ft]) + +default_enable_mpi_ft_stats=no +use_mpi_ft_stats=no +AC_ARG_ENABLE(mpi-ft-stats, AC_HELP_STRING([--enable-mpi-ft-stats], [Enable stats for failure tolerance mechanisms]), + [enable_mpi_ft_stats=$enableval], [enable_mpi_ft_stats=$default_enable_mpi_ft_stats]) + +# TODO: Check MPI version to be ULFM +if test x$enable_mpi_ft = xyes ; then + if test x$build_mpi_lib != xyes ; then + AC_MSG_ERROR([Failure tolerance mechanisms only work with a particular MPI implementation: ULFM (OpenMPI based).]) + else + AC_DEFINE(STARPU_USE_MPI_FT, [1], [whether the StarPU MPI failure tolerance mechanisms are requested]) + use_mpi_ft=yes; + if test x$enable_mpi_ft_stats = xyes ; then + AC_DEFINE(STARPU_USE_MPI_FT_STATS, [1], [whether the StarPU MPI failure tolerance mechanisms stats are watched]) + use_mpi_ft_stats=$enable_mpi_ft_stats; + fi + fi +fi + +AM_CONDITIONAL(STARPU_USE_MPI_FT, [test x$use_mpi_ft = xyes]) +AM_CONDITIONAL(STARPU_USE_MPI_FT_STATS, [test x$use_mpi_ft_stats = xyes]) + +###### End of failure tolerance material ###### + + + +AC_ARG_WITH(mpiexec-args, [AS_HELP_STRING([--with-mpiexec-args[=]], + [Arguments for mpiexec])], + [ + mpiexec_args=$withval + ]) +AC_SUBST(MPIEXEC_ARGS,$mpiexec_args) + +AC_MSG_CHECKING(whether MPI debug messages should be displayed) +AC_ARG_ENABLE(mpi-verbose, [AS_HELP_STRING([--enable-mpi-verbose], + [display MPI verbose debug messages (--enable-mpi-verbose=extra increase the verbosity)])], + enable_mpi_verbose=$enableval, enable_mpi_verbose=no) +AC_MSG_RESULT($enable_mpi_verbose) +if test x$enable_mpi_verbose = xyes; then + AC_DEFINE(STARPU_MPI_VERBOSE, [1], [display MPI verbose debug messages]) +fi +if test x$enable_mpi_verbose = xextra; then + AC_DEFINE(STARPU_MPI_VERBOSE, [1], [display MPI verbose debug messages]) + AC_DEFINE(STARPU_MPI_EXTRA_VERBOSE, [1], [display MPI verbose debug messages]) +fi + +if test x$enable_mpi = xyes -o x$build_mpi_master_slave = xyes ; then + cc_or_mpicc=$mpicc_path + # For some reason, libtool uses gcc instead of mpicc when linking + # libstarpumpi. + # On Darwin (and maybe other systems ?) the linker will fail (undefined + # references to MPI_*). We manually add the required flags to fix this + # issue. + + # openmpi version + MPICC_LDFLAGS=`$mpicc_path --showme:link 2>/dev/null` + + if test -z "$MPICC_LDFLAGS" + then + # mpich version + MPICC_LDFLAGS=`$mpicc_path -link_info | awk '{$1=""; print}'` + fi + AC_SUBST(MPICC_LDFLAGS) +else + cc_or_mpicc=$CC +fi +AC_SUBST(CC_OR_MPICC, $cc_or_mpicc) + +############################################################################### +# # +# NUMA memory nodes # +# # +############################################################################### + +default_nmaxnumanodes=2 +AC_PATH_PROG(hwloccalccommand, hwloc-calc) + +AC_MSG_CHECKING(maximum number of NUMA nodes) +AC_ARG_ENABLE(maxnumanodes, [AS_HELP_STRING([--enable-maxnumanodes=], + [maximum number of NUMA nodes])], + nmaxnumanodes=$enableval, nmaxnumanodes=auto) +if test x$nmaxnumanodes = xauto +then + if test "$hwloccalccommand" = ""; then + AC_MSG_WARN([hwloc-calc not available to automatically get the number of NUMA nodes, using the default value: $default_nmaxnumanodes]) + nmaxnumanodes=$default_nmaxnumanodes + else + nmaxnumanodes=$($hwloccalccommand all -N node 2>/dev/null) + if test x$nmaxnumanodes = x; then + AC_MSG_WARN([hwloc-calc could not get the number of NUMA nodes, using the default value: $default_nmaxnumanodes]) + nmaxnumanodes=$default_nmaxnumanodes + fi + fi +fi +if test x$nmaxnumanodes = x -o x$nmaxnumanodes = xyes +then + AC_MSG_ERROR([The --enable-maxnumanodes option needs to be given a number]) +fi +AC_MSG_RESULT($nmaxnumanodes) +AC_DEFINE_UNQUOTED(STARPU_MAXNUMANODES, [$nmaxnumanodes], + [maximum number of NUMA nodes]) + + +############################################################################### + +AC_PATH_PROGS([STARPU_MS_LIB], [lib]) +AC_ARG_VAR([STARPU_MS_LIB], [Path to Microsoft's Visual Studio `lib' tool]) +AM_CONDITIONAL([STARPU_HAVE_MS_LIB], [test "x$STARPU_MS_LIB" != "x"]) +case "$target" in +*-*-mingw*|*-*-cygwin*|*-*-msys*) + starpu_windows=yes + libext=a + STARPU_EXPORTED_LIBS="$STARPU_EXPORTED_LIBS -lwsock32" + AC_DEFINE(STARPU_HAVE_WINDOWS, [1], [Define this on windows.]) + ;; +*-*-linux*) + starpu_linux=yes + AC_DEFINE(STARPU_LINUX_SYS, [1], [Define to 1 on Linux]) + ;; +*-*-openbsd*) + starpu_openbsd=yes + AC_DEFINE(STARPU_OPENBSD_SYS, [1], [Define to 1 on OpenBSD systems]) + ;; +*-*darwin*) + starpu_darwin=yes + AC_DEFINE(STARPU_HAVE_DARWIN, [1], [Define this on darwin.]) + ;; +esac +AM_CONDITIONAL([STARPU_HAVE_WINDOWS], [test "x$starpu_windows" = "xyes"]) +AM_CONDITIONAL([STARPU_LINUX_SYS], [test "x$starpu_linux" = "xyes"]) +AM_CONDITIONAL([STARPU_HAVE_DARWIN], [test "x$starpu_darwin" = "xyes"]) +AM_CONDITIONAL([STARPU_OPENBSD_SYS], [test "x$starpu_openbsd" = "xyes"]) + +# on Darwin, GCC targets i386 by default, so we don't have atomic ops +AC_CHECK_SIZEOF([void *]) +SIZEOF_VOID_P=$ac_cv_sizeof_void_p +case $SIZEOF_VOID_P in + 4) + case "$target" in + i386-*darwin*) CFLAGS="$CFLAGS -march=i686" ;; + esac + STARPU_MS_LIB_ARCH=X86 + ;; + 8) + STARPU_MS_LIB_ARCH=X64 + ;; +esac +AC_SUBST(STARPU_MS_LIB_ARCH) + +# This will be useful for program which use CUDA (and .cubin files) which need +# some path to the CUDA code at runtime. +AC_DEFINE_UNQUOTED(STARPU_BUILD_DIR, "$PWD", [location of StarPU build directory]) +AC_SUBST(STARPU_BUILD_DIR, $PWD) +case "${srcdir}" in +/*) AC_DEFINE_UNQUOTED(STARPU_SRC_DIR, "$(eval echo ${srcdir})", [location of StarPU sources]) + AC_SUBST(STARPU_SRC_DIR, "$(eval echo ${srcdir})") ;; +*) AC_DEFINE_UNQUOTED(STARPU_SRC_DIR, "$(eval echo $PWD/${srcdir})", [location of StarPU sources]) + AC_SUBST(STARPU_SRC_DIR, "$(eval echo $PWD/${srcdir})") ;; +esac + +case "$target" in +*-*-mingw*|*-*-cygwin*) + AC_ARG_ENABLE(native-winthreads, [AS_HELP_STRING([--enable-native-winthreads], + [Use native windows threads instead of pthread])], + enable_native_winthreads=$enableval, enable_native_winthreads=no) + ;; +esac +if test x"$enable_native_winthreads" != xyes ; then + INCLUDE_PTHREAD_H='#include ' +fi + +AC_CHECK_HEADERS([unistd.h], [AC_DEFINE([STARPU_HAVE_UNISTD_H], [1], [Define to 1 if you have the header file.])]) + +AC_CHECK_TYPE([struct timespec], + AC_DEFINE(STARPU_HAVE_STRUCT_TIMESPEC,[1],[struct timespec is defined]), + [], [ +#include +#include +#ifdef HAVE_UNISTD_H +#include +#endif +#include +$INCLUDE_PTHREAD_H +]) + +if test x"$enable_native_winthreads" = xyes ; then + CPPFLAGS="$CPPFLAGS -I$STARPU_SRC_DIR/include/pthread_win32" + AC_COMPILE_IFELSE( + [AC_LANG_PROGRAM([[ + #define STARPU_CONFIGURE + #include + ]], + [[ pthread_t t; pthread_create(&t, NULL, NULL, NULL); ]])], + AC_DEFINE(STARPU_NATIVE_WINTHREADS,[1],[Using native windows threads]), + AC_MSG_ERROR([pthread_create unavailable])) +else + AC_CHECK_LIB([pthread], [pthread_create], [ + LIBS="$LIBS -lpthread" + STARPU_EXPORTED_LIBS="$STARPU_EXPORTED_LIBS -lpthread" + ]) +fi + +AC_SEARCH_LIBS([sqrt],[m],,AC_MSG_ERROR([math library unavailable])) +AC_HAVE_LIBRARY([ws2_32]) +AC_CHECK_FUNCS([sysconf]) +AC_CHECK_FUNCS([getrlimit]) +AC_CHECK_FUNCS([scandir]) + +AC_CHECK_FUNC([pthread_spin_lock], have_pthread_spin_lock=yes, have_pthread_spin_lock=no) +if test x$have_pthread_spin_lock = xyes; then + AC_DEFINE(HAVE_PTHREAD_SPIN_LOCK,[1],[pthread_spin_lock is available]) + AC_DEFINE(STARPU_HAVE_PTHREAD_SPIN_LOCK,[1],[pthread_spin_lock is available]) +fi + +AC_CHECK_FUNC([pthread_barrier_init], have_pthread_barrier=yes, have_pthread_barrier=no) +if test x$have_pthread_barrier = xyes; then + AC_DEFINE(STARPU_HAVE_PTHREAD_BARRIER,[1],[pthread_barrier is available]) +fi + +# yes, that's non portable, but it's still better than sched_setaffinity +AC_CHECK_FUNCS(pthread_setaffinity_np) + +AC_CHECK_FUNC([pthread_setname_np], have_pthread_setname_np=yes, have_pthread_setname_np=no) +if test x$have_pthread_setname_np = xyes; then + AC_DEFINE(STARPU_HAVE_PTHREAD_SETNAME_NP,[1],[pthread_setname_np is available]) +fi + +if test "x$cross_compiling" = "xno"; then + STARPU_INIT_ZERO([[#include ]], pthread_mutex_t, PTHREAD_MUTEX_INITIALIZER) + STARPU_INIT_ZERO([[#include ]], pthread_cond_t, PTHREAD_COND_INITIALIZER) + STARPU_INIT_ZERO([[#include ]], pthread_rwlock_t, PTHREAD_RWLOCK_INITIALIZER) +fi + +# There is no posix_memalign on Mac OS X, only memalign +AC_CHECK_FUNCS([posix_memalign], [AC_DEFINE([STARPU_HAVE_POSIX_MEMALIGN], [1], [Define to 1 if you have the `posix_memalign' function.])]) +AC_CHECK_FUNCS([memalign], [AC_DEFINE([STARPU_HAVE_MEMALIGN], [1], [Define to 1 if you have the `memalign' function.])]) + +# Some systems don't have drand48 +AC_CHECK_FUNC([drand48], have_drand48=yes, have_drand48=no) +AC_CHECK_FUNC([erand48_r], have_erand48_r=yes, have_erand48_r=no) +# Maybe the user still does not want to use the provided drand48 +AC_ARG_ENABLE(default-drand48, [AS_HELP_STRING([--disable-default-drand48], + [Do not use the default version of drand48])], + enable_default_drand48=$enableval, enable_default_drand48=yes) +if test x$have_drand48 = xyes -a x$enable_default_drand48 = xyes ; then + AC_DEFINE([STARPU_USE_DRAND48], [1], [Define to 1 if drandr48 is available and should be used]) +fi +if test x$have_erand48_r = xyes ; then + AC_DEFINE([STARPU_USE_ERAND48_R], [1], [Define to 1 if erandr48_r is available]) +fi + +# Some systems do not define strerror_r +AC_CHECK_FUNC([strerror_r], [AC_DEFINE([STARPU_HAVE_STRERROR_R], [1], [Define to 1 if the function strerro_r is available.])]) + +# Some systems may not define setenv +AC_CHECK_FUNC([setenv], [AC_DEFINE([STARPU_HAVE_SETENV], [1], [Define to 1 if the function setenv is available.])]) + +# Some systems do not define unsetenv +AC_CHECK_FUNC([unsetenv], [AC_DEFINE([STARPU_HAVE_UNSETENV], [1], [Define to 1 if the function unsetenv is available.])]) + +# Some systems do not define nearbyintf... +AC_CHECK_FUNC([nearbyintf], [AC_DEFINE([STARPU_HAVE_NEARBYINTF], [1], [Define to 1 if the function nearbyintf is available.])]) + +# ... but they may define rintf. +AC_CHECK_FUNC([rintf], [AC_DEFINE([STARPU_HAVE_RINTF], [1], [Define to 1 if the function rintf is available.])]) + +# Define quick check +AC_ARG_ENABLE(quick-check, [AS_HELP_STRING([--enable-quick-check], + [Lower default values for the testcases run by make check to allow a faster execution])], + enable_quick_check=$enableval, enable_quick_check=no) +if test x$enable_quick_check = xyes; then + AC_DEFINE(STARPU_QUICK_CHECK, [1], [enable quick check]) +fi +AM_CONDITIONAL([STARPU_QUICK_CHECK], [test "x$enable_quick_check" = "xyes"]) + +# Define long check +AC_ARG_ENABLE(long-check, [AS_HELP_STRING([--enable-long-check], + [Enable some exhaustive checks which take a really long time])], + enable_long_check=$enableval, enable_long_check=no) +if test x$enable_long_check = xyes; then + AC_DEFINE(STARPU_LONG_CHECK, [1], [enable long check]) +fi +AM_CONDITIONAL([STARPU_LONG_CHECK], [test "x$enable_long_check" = "xyes"]) + +# Define new check +AC_ARG_ENABLE(new-check, [AS_HELP_STRING([--enable-new-check], + [Enable new and known-to-fail testcases])], + enable_new_check=$enableval, enable_new_check=no) +if test x$enable_new_check = xyes; then + AC_DEFINE(STARPU_NEW_CHECK, [1], [enable new check]) +fi +AM_CONDITIONAL([STARPU_NEW_CHECK], [test "x$enable_new_check" = "xyes"]) + +AC_CHECK_HEADERS([malloc.h], [AC_DEFINE([STARPU_HAVE_MALLOC_H], [1], [Define to 1 if you have the header file.])]) + +AC_ARG_ENABLE(valgrind, [AS_HELP_STRING([--disable-valgrind], + [Do not check the availability of valgrind.h and helgrind.h])], + enable_valgrind=$enableval, enable_valgrind=yes) +if test "$enable_valgrind" != "no" ; then + AC_CHECK_HEADERS([valgrind/valgrind.h], [AC_DEFINE([STARPU_HAVE_VALGRIND_H], [1], [Define to 1 if you have the header file.])]) + AC_CHECK_HEADERS([valgrind/memcheck.h], [AC_DEFINE([STARPU_HAVE_MEMCHECK_H], [1], [Define to 1 if you have the header file.])]) + AC_CHECK_HEADERS([valgrind/helgrind.h], [AC_DEFINE([STARPU_HAVE_HELGRIND_H], [1], [Define to 1 if you have the header file.])]) +fi +if test "$enable_valgrind" = "full" ; then + AC_DEFINE([STARPU_VALGRIND_FULL], [1], [Define to 1 to disable STARPU_SKIP_IF_VALGRIND when running tests.]) +fi + +AC_CHECK_FUNC([sched_yield], [AC_DEFINE([STARPU_HAVE_SCHED_YIELD], [1], [Define to 1 if the function sched_yield is available.])]) + +AC_CHECK_HEADERS([aio.h]) +AC_CHECK_LIB([rt], [aio_read]) +#AC_CHECK_HEADERS([libaio.h]) +#AC_CHECK_LIB([aio], [io_setup]) +AC_CHECK_FUNCS([copy_file_range]) + +AC_CHECK_FUNCS([mkostemp]) +AC_CHECK_FUNCS([mkdtemp]) + +AC_CHECK_FUNCS([pread pwrite]) + +# Depending on the user environment, the hdf5 library may link against some +# mpi implementation, and bring surprising runtime behavior. +AC_ARG_ENABLE(hdf5, [AS_HELP_STRING([--enable-hdf5], [enable HDF5 support])], + enable_hdf5=$enableval, enable_hdf5=no) + +if test "x$enable_hdf5" != xno ; then + AC_ARG_WITH(hdf5-include-dir, + [AS_HELP_STRING([--with-hdf5-include-dir=], + [specify where HDF5 headers are installed])], + [ + hdf5_include_dir="$withval" + ], [hdf5_include_dir=""]) + + hdf5_inc_dir="/usr/include/hdf5 /usr/include/hdf5/serial ${hdf5_include_dir}" + + enable_include_hdf5=no + for f in $hdf5_inc_dir; do + if test -n "$f" ; then + SAVED_CPPFLAGS="${CPPFLAGS}" + CPPFLAGS="$CPPFLAGS -I$f" + AC_CHECK_HEADERS([hdf5.h]) + if test "$ac_cv_header_hdf5_h" = "yes" ; then + CPPFLAGS="-I${f} ${SAVED_CFLAGS}" + enable_include_hdf5=yes + break + else + CPPFLAGS=${SAVED_CPPFLAGS} + fi + unset ac_cv_header_hdf5_h + fi + done + + + AC_ARG_WITH(hdf5-lib-dir, + [AS_HELP_STRING([--with-hdf5-lib-dir=], + [specify where HDF5 libraries are installed])], + [ + hdf5_libraries_dir="$withval" + ], [hdf5_libraries_dir=""]) + + hdf5_lib_dir="/usr/lib/x86_64-linux-gnu/hdf5 /usr/lib/x86_64-linux-gnu/hdf5/serial ${hdf5_libraries_dir}" + + enable_libraries_hdf5=no + for f in $hdf5_lib_dir; do + if test -n "$f" ; then + SAVED_LDFLAGS="${LDFLAGS}" + LDFLAGS=-L${f} + STARPU_HAVE_LIBRARY(HDF5, [hdf5]) + if test "$ac_cv_lib_hdf5_main" = "yes" ; then + LDFLAGS="-L${f} ${SAVED_LDFLAGS} ${STARPU_HDF5_LDFLAGS}" + enable_libraries_hdf5=yes + break + else + LDFLAGS=${SAVED_LDFLAGS} + fi + unset ac_cv_lib_hdf5_main + fi + done +fi + +if test "x$enable_libraries_hdf5" = "xyes" -a "x$enable_include_hdf5" = "xyes" -a "x$enable_hdf5" != "xno"; then + AC_DEFINE([STARPU_HAVE_HDF5], [1], [Define to 1 if you have the header file.]) + enable_hdf5=yes +else + enable_hdf5=no +fi +AM_CONDITIONAL(STARPU_HAVE_HDF5, test "x$enable_hdf5" = "xyes") + + +# This defines HAVE_SYNC_VAL_COMPARE_AND_SWAP +STARPU_CHECK_SYNC_VAL_COMPARE_AND_SWAP +STARPU_CHECK_SYNC_VAL_COMPARE_AND_SWAP_8 + +# This defines HAVE_SYNC_BOOL_COMPARE_AND_SWAP +STARPU_CHECK_SYNC_BOOL_COMPARE_AND_SWAP +STARPU_CHECK_SYNC_BOOL_COMPARE_AND_SWAP_8 + +# This defines HAVE_SYNC_FETCH_AND_ADD +STARPU_CHECK_SYNC_FETCH_AND_ADD +STARPU_CHECK_SYNC_FETCH_AND_ADD_8 + +# This defines HAVE_SYNC_FETCH_AND_OR +STARPU_CHECK_SYNC_FETCH_AND_OR +STARPU_CHECK_SYNC_FETCH_AND_OR_8 + +# This defines HAVE_SYNC_LOCK_TEST_AND_SET +STARPU_CHECK_SYNC_LOCK_TEST_AND_SET + +# This defines HAVE_ATOMIC_COMPARE_EXCHANGE_N +STARPU_CHECK_ATOMIC_COMPARE_EXCHANGE_N +STARPU_CHECK_ATOMIC_COMPARE_EXCHANGE_N_8 + +# This defines HAVE_ATOMIC_EXCHANGE_N +STARPU_CHECK_ATOMIC_EXCHANGE_N +STARPU_CHECK_ATOMIC_EXCHANGE_N_8 + +# This defines HAVE_ATOMIC_FETCH_ADD +STARPU_CHECK_ATOMIC_FETCH_ADD +STARPU_CHECK_ATOMIC_FETCH_ADD_8 + +# This defines HAVE_ATOMIC_FETCH_OR +STARPU_CHECK_ATOMIC_FETCH_OR +STARPU_CHECK_ATOMIC_FETCH_OR_8 + +# This defines HAVE_ATOMIC_TEST_AND_SET +STARPU_CHECK_ATOMIC_TEST_AND_SET + +# This defines HAVE_SYNC_SYNCHRONIZE +STARPU_CHECK_SYNC_SYNCHRONIZE + +CPPFLAGS="${CPPFLAGS} -D_GNU_SOURCE " + +STARPU_SEARCH_LIBS([LIBNUMA],[set_mempolicy],[numa],[enable_libnuma=yes],[enable_libnuma=no]) +AC_MSG_CHECKING(whether libnuma is available) +AC_MSG_RESULT($enable_libnuma) +if test x$enable_libnuma = xyes; then + AC_DEFINE(STARPU_HAVE_LIBNUMA,[1],[libnuma is available]) +fi + +AC_MSG_CHECKING(whether statement expressions are available) +AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ +#define maxint(a,b) ({int _a = (a), _b = (b); _a > _b ? _a : _b; }) +]], + [[ int x=maxint(12,42); ]])], + [statement_expressions="yes"], + [statement_expressions="no"]) +AC_MSG_RESULT($statement_expressions) +if test x$statement_expressions = xyes; then + AC_DEFINE(STARPU_HAVE_STATEMENT_EXPRESSIONS,[1],[statement expressions are available]) +fi + +saved_LIBS="${LIBS}" +LIBS="${LIBS} -ldl" +STARPU_DLOPEN_LDFLAGS="" +AC_CHECK_FUNCS([dlopen], [STARPU_DLOPEN_LDFLAGS="-ldl"]) +LIBS="$saved_LIBS" + +############################################################################### +# # +# SCHED_CTX settings # +# # +############################################################################### +AC_MSG_CHECKING(maximum number of sched_ctxs) +AC_ARG_ENABLE(max_sched_ctxs, [AS_HELP_STRING([--enable-max-sched-ctxs=], + [maximum number of sched_ctxs])], + max_sched_ctxs=$enableval, max_sched_ctxs=10) +if test x$max_sched_ctxs = x -o x$max_sched_ctxs = xyes +then + AC_MSG_ERROR([The --enable-max_sched_ctxs option needs to be given a number]) +fi +AC_MSG_RESULT($max_sched_ctxs) +AC_DEFINE_UNQUOTED(STARPU_NMAX_SCHED_CTXS, [$max_sched_ctxs], [Maximum number of sched_ctxs supported]) + +AC_ARG_ENABLE([sc_hypervisor], + [AS_HELP_STRING([--enable-sc-hypervisor], + [enable resizing contexts (experimental)])], + [enable_sc_hypervisor="yes"], + [enable_sc_hypervisor="no"]) + +#for pkgconfig +AC_SUBST(STARPU_SC_HYPERVISOR) +if test "x$enable_sc_hypervisor" = "xyes"; then + AC_DEFINE(STARPU_USE_SC_HYPERVISOR, [1], [enable sc_hypervisor lib]) +# PKG_CHECK_MODULES([SC_HYPERVISOR], [libsc_hypervisor], [], build_sc_hypervisor="yes") + STARPU_SC_HYPERVISOR="-lsc_hypervisor" + build_sc_hypervisor="yes" +else + build_sc_hypervisor="no" +fi + +AM_CONDITIONAL([STARPU_BUILD_SC_HYPERVISOR], [test "x$build_sc_hypervisor" = "xyes"]) +AM_CONDITIONAL([STARPU_USE_SC_HYPERVISOR], [test "x$build_sc_hypervisor" = "xyes"]) + +AC_ARG_ENABLE([sc_hypervisor_debug], + [AS_HELP_STRING([--enable-sc-hypervisor-debug], + [enable debug for resizing contexts (experimental)])], + [enable_sc_hypervisor_debug="yes"], + [enable_sc_hypervisor_debug="no"]) + + +AC_SUBST(STARPU_SC_HYPERVISOR_DEBUG, $enable_sc_hypervisor_debug) +AM_CONDITIONAL([STARPU_SC_HYPERVISOR_DEBUG], [test "x$enable_sc_hypervisor_debug" = "xyes"]) + +if test "x$enable_sc_hypervisor_debug" = "xyes"; then + AC_DEFINE(STARPU_SC_HYPERVISOR_DEBUG, [1], [enable debug sc_hypervisor]) +fi + +############################################################################### +# # +# CPUs settings # +# # +############################################################################### + +AC_MSG_CHECKING(maximum number of CPUs) +AC_ARG_ENABLE(maxcpus, [AS_HELP_STRING([--enable-maxcpus=], + [maximum number of CPUs])], + maxcpus=$enableval, maxcpus=auto) +if test x$maxcpus = xauto +then + confcpu=$(getconf _NPROCESSORS_ONLN 2>/dev/null) + if test x$confcpu = x + then + AC_MSG_ERROR([cannot get the number of CPUS, please specify a numerical value with --enable-maxcpus]) + fi + maxcpus=2 + while test $maxcpus -lt $confcpu + do + maxcpus=`expr $maxcpus \* 2` + done +fi +if test x$maxcpus = x -o x$maxcpus = xyes +then + AC_MSG_ERROR([The --enable-maxcpus option needs to be given a number]) +fi +AC_MSG_RESULT($maxcpus) +AC_DEFINE_UNQUOTED(STARPU_MAXCPUS, [$maxcpus], [Maximum number of CPUs supported]) + +AC_MSG_CHECKING(whether CPUs should be used) +AC_ARG_ENABLE(cpu, [AS_HELP_STRING([--disable-cpu], + [do not use the CPU(s)])], + enable_cpu=$enableval, enable_cpu=yes) +AC_MSG_RESULT($enable_cpu) +AC_SUBST(STARPU_USE_CPU, $enable_cpu) +AM_CONDITIONAL(STARPU_USE_CPU, test x$enable_cpu = xyes) + +if test x$enable_cpu = xyes; then + AC_DEFINE(STARPU_USE_CPU, [1], [CPU driver is activated]) +fi + +############################################################################### +# # +# CUDA settings # +# # +############################################################################### + +AC_MSG_CHECKING(maximum number of CUDA devices) +AC_ARG_ENABLE(maxcudadev, [AS_HELP_STRING([--enable-maxcudadev=], + [maximum number of CUDA devices])], + nmaxcudadev=$enableval, nmaxcudadev=4) +if test x$nmaxcudadev = x -o x$nmaxcudadev = xyes +then + AC_MSG_ERROR([The --enable-maxcudadev option needs to be given a number]) +fi +AC_MSG_RESULT($nmaxcudadev) +AC_DEFINE_UNQUOTED(STARPU_MAXCUDADEVS, [$nmaxcudadev], + [maximum number of CUDA devices]) + +AC_ARG_ENABLE(cuda, [AS_HELP_STRING([--disable-cuda], + [do not use CUDA device(s)])],, [enable_cuda=maybe]) + +# We don't want to be hit by conflicts between simgrid, boost, and CUDA +if test x$enable_simgrid = xyes; then + if test x$enable_cuda = xyes; then + AC_MSG_ERROR([Building against CUDA should not be enabled with simgrid]) + fi + enable_cuda=no +fi + +#AC_MSG_CHECKING(whether CUDA is available) +AC_ARG_WITH(cuda-dir, + [AS_HELP_STRING([--with-cuda-dir=], + [specify CUDA installation directory])], + [ + cuda_dir="$withval" + # in case this was not explicit yet + enable_cuda=yes + ], cuda_dir=no) + +AC_ARG_WITH(cuda-include-dir, + [AS_HELP_STRING([--with-cuda-include-dir=], + [specify where CUDA headers are installed])], + [ + cuda_include_dir="$withval" + # in case this was not explicit yet + enable_cuda=yes + ], [cuda_include_dir=no]) + +AC_ARG_WITH(cuda-lib-dir, + [AS_HELP_STRING([--with-cuda-lib-dir=], + [specify where CUDA libraries are installed])], + [ + cuda_lib_dir="$withval" + # in case this was not explicit yet + enable_cuda=yes + ], [cuda_lib_dir=no]) + +AC_DEFUN([STARPU_CHECK_CUDA_L], +[ + __cuda_L=$1 + SAVED_LDFLAGS="${LDFLAGS}" + STARPU_CUDA_LDFLAGS="${__cuda_L}" + AC_MSG_CHECKING(whether CUDA library is available in $__cuda_L) + AC_MSG_RESULT() + LDFLAGS="${SAVED_LDFLAGS} ${__cuda_L}" + AC_HAVE_LIBRARY([cudart],[have_valid_cuda=yes],[have_valid_cuda=no]) + unset ac_cv_lib_cudart_main + if test "$have_valid_cuda" = yes ; then + LDFLAGS="${SAVED_LDFLAGS} ${STARPU_CUDA_LDFLAGS}" + # we also check that CUBLAS is available + AC_HAVE_LIBRARY([cublas],[have_valid_cuda=yes],[have_valid_cuda=no]) + unset ac_cv_lib_cublas_main + fi + LDFLAGS="${SAVED_LDFLAGS}" +]) +AC_DEFUN([STARPU_CHECK_CUDA], +[ + __cuda_dir=$1 + __cuda_include_dir=$2 + __cuda_lib_dir=$3 + + if test -z "$__cuda_lib_dir" ; then + __cuda_lib_dir=no + fi + if test -z "$__cuda_include_dir" ; then + __cuda_include_dir=no + fi + if test -z "$__cuda_dir" ; then + __cuda_dir=no + fi + + if test "$__cuda_dir" != "no" ; then + AC_MSG_CHECKING(whether CUDA is available in $__cuda_dir, $__cuda_include_dir and $__cuda_lib_dir) + else + AC_MSG_CHECKING(whether CUDA is available) + fi + AC_MSG_RESULT() + + if test "$__cuda_include_dir" = "no" -a "$__cuda_dir" != "no" ; then + __cuda_include_dir="$__cuda_dir/include" + fi + + SAVED_CPPFLAGS="$CPPFLAGS" + have_valid_cuda=no + + if test "$__cuda_include_dir" != "no" ; then + CPPFLAGS="${CPPFLAGS} -I$__cuda_include_dir" + fi + + AC_CHECK_HEADER([cuda.h],[have_valid_cuda=yes],[have_valid_cuda=no]) + unset ac_cv_header_cuda_h + + if test "$have_valid_cuda" = "yes" ; then + if test "$__cuda_lib_dir" != "no" ; then + STARPU_CHECK_CUDA_L("-L${__cuda_lib_dir}") + else + if test "$__cuda_dir" != "no" ; then + for __cuda_libdir in lib64 lib lib/x64 lib/Win32 ; do + STARPU_CHECK_CUDA_L("-L${__cuda_dir}/${__cuda_libdir}") + if test "$have_valid_cuda" = yes ; then + break + fi + done + else + STARPU_CHECK_CUDA_L("") + fi + fi + fi + + if test "$have_valid_cuda" = "no" ; then + CPPFLAGS="${SAVED_CPPFLAGS}" + unset STARPU_CUDA_LDFLAGS + else + if test "$NVCC" = "" ; then + AC_PATH_PROG([NVCC], [nvcc], [not-found], + [$cuda_dir/bin:$PATH:/usr/local/cuda/bin:/usr/bin:/bin]) + fi + if test "x$NVCC" = "xnot-found"; then + AC_MSG_WARN(['nvcc' not found, disabling CUDA]) + have_valid_cuda=no + else + # This is for very old cuda, to enable the use of double etc. + AC_MSG_CHECKING(whether nvcc supports sm_13 architecture) + OLD_NVCCFLAGS="$NVCCFLAGS" + NVCCFLAGS="$NVCCFLAGS -arch sm_13" + echo "int main(int argc, char **argv) { return 0;}" > cuda_test.cu + $NVCC $NVCCFLAGS -c cuda_test.cu >/dev/null 2>&1 + if test $? -eq 0 + then + AC_MSG_RESULT(yes) + else + AC_MSG_RESULT(no) + NVCCFLAGS="$OLD_NVCCFLAGS" + fi + + # This is for recent cuda, which complains if we don't actually set an arch!? + AC_MSG_CHECKING(whether nvcc supports -Wno-deprecated-gpu-targets) + OLD_NVCCFLAGS="$NVCCFLAGS" + NVCCFLAGS="$NVCCFLAGS -Wno-deprecated-gpu-targets" + echo "int main(int argc, char **argv) { return 0;}" > cuda_test.cu + $NVCC $NVCCFLAGS -c cuda_test.cu >/dev/null 2>&1 + if test $? -eq 0 + then + AC_MSG_RESULT(yes) + else + AC_MSG_RESULT(no) + NVCCFLAGS="$OLD_NVCCFLAGS" + fi + + rm -f cuda_test* + fi + + if test -n "$NVCC_CC"; then + NVCCFLAGS="${NVCCFLAGS} -ccbin \${NVCC_CC}" + fi + if test "$__cuda_include_dir" != "no"; then + STARPU_CUDA_CPPFLAGS="-I$__cuda_include_dir" + NVCCFLAGS="${NVCCFLAGS} -I$__cuda_include_dir" + fi + fi +]) + +if test x$enable_cuda = xyes -o x$enable_cuda = xmaybe; then + STARPU_CHECK_CUDA("$cuda_dir", "$cuda_include_dir", "$cuda_lib_dir") + if test "$have_valid_cuda" = "no" ; then + STARPU_CHECK_CUDA("$CUDA_ROOT", "$CUDA_INC_PATH", "$CUDA_LIB_PATH") + fi + if test "$have_valid_cuda" = "no" ; then + if test "$NVCC" = "" ; then + AC_PATH_PROG([NVCC], [nvcc], [not-found], [$PATH:/usr/local/cuda/bin]) + fi + if test "$NVCC" != not-found ; then + CUDA_ROOT="$(dirname $NVCC)/.." + # Try to find all of cuda just from the availability of nvcc in PATH + STARPU_CHECK_CUDA("$CUDA_ROOT", "$CUDA_ROOT/include", "$CUDA_ROOT/lib") + cuda_dir=$(dirname $NVCC)/.. + else + unset NVCC + fi + fi + + if test "$have_valid_cuda" = "no" ; then + for f in "/usr/local/cuda" "/c/cuda" "/cygdrive/c/cuda" "/opt/cuda" "$CUDA_ROOT" "$CUDA_PATH" "$CUDA_INC_PATH/.." "$CUDA_INC/.." "$CUDA_BIN/.." "$CUDA_SDK/.." "$CUDA_INSTALL_PATH" "$CUDA_TOOLKIT"; do + if test -n "$f" ; then + STARPU_CHECK_CUDA("$f", "no", "no") + if test "$have_valid_cuda" = "yes" ; then + break + fi + fi + done + fi + + # Check cuda is compatible with the C compiler + AC_MSG_CHECKING(whether CUDA is working) + if test "$have_valid_cuda" = "yes" ; then + SAVED_CPPFLAGS="${CPPFLAGS}" + CPPFLAGS="${CPPFLAGS} ${STARPU_CUDA_CPPFLAGS}" + SAVED_LDFLAGS="${LDFLAGS}" + LDFLAGS="${LDFLAGS} ${STARPU_CUDA_LDFLAGS} -lcudart" + AC_COMPILE_IFELSE([AC_LANG_PROGRAM( + [[#include ]], + [[]] + )], + [ + AC_RUN_IFELSE([AC_LANG_PROGRAM( + [[#include ]], + [[]] + )], + [have_valid_cuda="yes"], + [ + AC_MSG_RESULT([CUDA found and can be compiled, but compiled application can not be run, is the CUDA path missing in LD_LIBRARY_PATH?]) + have_valid_cuda="no" + ]) + ], + [ + AC_MSG_ERROR([CUDA found, but cuda.h could not be compiled]) + have_valid_cuda="no" + ] + ) + CPPFLAGS="${SAVED_CPPFLAGS}" + LDFLAGS="${SAVED_LDFLAGS}" + fi + AC_MSG_RESULT($have_valid_cuda) + + # in case CUDA was explicitly required, but is not available, this is an error + if test x$enable_cuda = xyes -a x$have_valid_cuda = xno; then + AC_MSG_ERROR([cannot find CUDA]) + fi + # now we enable CUDA if and only if a proper setup is available + enable_cuda=$have_valid_cuda +fi + +AC_MSG_CHECKING(whether CUDA should be used) +AC_MSG_RESULT($enable_cuda) +AC_SUBST(STARPU_USE_CUDA, $enable_cuda) +AM_CONDITIONAL(STARPU_USE_CUDA, test x$enable_cuda = xyes) +cc_or_nvcc=$CC +if test x$enable_cuda = xyes; then + cc_or_nvcc=$NVCC + AC_DEFINE(STARPU_USE_CUDA, [1], [CUDA support is activated]) + + # On Darwin, the libstdc++ dependency is not automatically added by nvcc +# case "$target" in +# *-*darwin*) AC_HAVE_LIBRARY([stdc++], []) ;; +# #*-*darwin*) AC_HAVE_LIBRARY([stdc++], [STARPU_CUDA_LDFLAGS="$STARPU_CUDA_LDFLAGS -lstdc++"]) ;; +# esac + STARPU_CUDA_LDFLAGS="$STARPU_CUDA_LDFLAGS -lcudart -lcublas" + STARPU_CUFFT_LDFLAGS="-lcufft" + + AC_LANG_PUSH([C++]) + case \ $NVCCFLAGS\ in + *\ -std=*\ *) ;; + *) + SAVED_CXX="$CXX" + CXX="$NVCC" + AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ + #ifdef STARPU_HAVE_SIMGRID_MSG_H + #include + #include + #else + #include + #endif + ]])],, + NVCCFLAGS="-std=c++11 $NVCCFLAGS") + CXX="$SAVED_CXX" + esac + AC_LANG_POP([C++]) + + if test "$F77" = "gfortran" -o "$FC" = "gfortran" ; then + STARPU_CUDA_FORTRAN_LDFLAGS="-lgfortran" + AC_SUBST(STARPU_CUDA_FORTRAN_LDFLAGS) + fi + + #in case this is a 64bit setup, we tell nvcc to use a -m64 flag, if missing from existing flags + if test x$SIZEOF_VOID_P = x8; then + case \ $NVCCFLAGS\ in + *\ -m64\ *) ;; + *) NVCCFLAGS="${NVCCFLAGS} -m64" ;; + esac + fi + + SAVED_CPPFLAGS="${CPPFLAGS}" + CPPFLAGS="${CPPFLAGS} ${STARPU_CUDA_CPPFLAGS}" + SAVED_LDFLAGS="${LDFLAGS}" + LDFLAGS="${LDFLAGS} ${STARPU_CUDA_LDFLAGS}" + SAVED_LIBS="${LIBS}" + AC_CHECK_HEADERS([cuda_gl_interop.h]) + + AC_CHECK_HEADERS([cublasLt.h], [ + AC_CHECK_LIB([cublasLt], [cublasLtCreate], + [AC_DEFINE([STARPU_HAVE_LIBCUBLASLT], [1], [Define to 1 if you have the cublasLt library]) + STARPU_CUDA_LDFLAGS="$STARPU_CUDA_LDFLAGS -lcublasLt"]) + ]) + + AC_CHECK_LIB([cusparse], [cusparseCreate], + [AC_DEFINE([STARPU_HAVE_LIBCUSPARSE], [1], [Define to 1 if you have the cusparse library]) + STARPU_CUDA_LDFLAGS="$STARPU_CUDA_LDFLAGS -lcusparse"]) + AC_CHECK_DECLS([cusparseSetStream], [], [], [[#include ]]) + + # we also check that CuSolver is available + AC_CHECK_LIB([cusolver],[cusolverDnCreate], + [AC_DEFINE([STARPU_HAVE_LIBCUSOLVER], [1], [Define to 1 if you have the cusolver library]) + STARPU_CUDA_LDFLAGS="$STARPU_CUDA_LDFLAGS -lcusolver"]) + + AC_MSG_CHECKING(whether nvidia-ml can be used) + AC_COMPILE_IFELSE([AC_LANG_PROGRAM( + [[#include ]], + [[ + __typeof__(nvmlInit) *mynvmlInit = nvmlInit; + mynvmlInit(); + ]] + )], + [ + AC_DEFINE([STARPU_HAVE_NVML_H], [1], [Define to 1 if you have the nvml.h header]) + AC_MSG_RESULT(yes) + AC_CHECK_DECLS([nvmlDeviceGetTotalEnergyConsumption], [], [], [[#include ]]) + ], + [ + AC_MSG_RESULT(no) + AC_MSG_WARN([nvml.h could not be compiled. This will prevent from correct understanding of the machine topology.]) + NO_NVML="Warning: no nvml.h found" + ] + ) + + CPPFLAGS="${SAVED_CPPFLAGS}" + LDFLAGS="${SAVED_LDFLAGS}" + LIBS="${SAVED_LIBS}" +fi + +AC_SUBST(CC_OR_NVCC, $cc_or_nvcc) + +have_magma=no +if test x$enable_cuda = xyes; then + PKG_CHECK_MODULES([MAGMA], [magma], [ + AC_DEFINE([STARPU_HAVE_MAGMA], [1], [Define to 1 if you have the MAGMA library.]) + AC_SUBST([STARPU_HAVE_MAGMA], [1]) + have_magma=yes +], [:]) +fi +AM_CONDITIONAL(STARPU_HAVE_MAGMA, [test x$have_magma = xyes]) +AC_MSG_CHECKING(whether MAGMA should be used) +AC_MSG_RESULT($have_magma) + +# cufftDoubleComplex may not be available on an old CUDA setup +AC_CHECK_TYPE(cufftDoubleComplex, + [have_cufftdoublecomplex=yes], + [have_cufftdoublecomplex=no], [#include ]) +AM_CONDITIONAL(STARPU_HAVE_CUFFTDOUBLECOMPLEX, test x$have_cufftdoublecomplex = xyes) +if test x$have_cufftdoublecomplex = xyes; then + AC_DEFINE(STARPU_HAVE_CUFFTDOUBLECOMPLEX, [1], [cufftDoubleComplex is available]) +fi + +# The CURAND library is only available since CUDA 3.2 +have_curand=$enable_cuda +if test x$enable_cuda = xyes; then + SAVED_LDFLAGS="${LDFLAGS}" + LDFLAGS="${LDFLAGS} ${STARPU_CUDA_LDFLAGS}" + AC_HAVE_LIBRARY([curand],[have_curand=yes],[have_curand=no]) + LDFLAGS="${SAVED_LDFLAGS}" +fi +AC_MSG_CHECKING(whether CURAND is available) +AC_MSG_RESULT($have_curand) +if test x$have_curand = xyes; then + AC_DEFINE(STARPU_HAVE_CURAND,[1], [CURAND is available]) + STARPU_CURAND_LDFLAGS="$STARPU_CURAND_LDFLAGS -lcurand" + AC_SUBST(STARPU_CURAND_LDFLAGS) +fi + +# Peer transfers are only supported since CUDA 4.0 +# Disable them if user explicitly wants to disable them +AC_ARG_ENABLE(cuda_memcpy_peer, [AS_HELP_STRING([--disable-cuda-memcpy-peer], [do not allow peer transfers when using CUDA 4.0])],, [enable_cuda_memcpy_peer=$enable_cuda]) +if test x$enable_cuda_memcpy_peer = xyes; then + AC_DEFINE(STARPU_HAVE_CUDA_MEMCPY_PEER,[1],[Peer transfers are supported in CUDA]) +fi + +AC_ARG_ENABLE(cuda_map, [AS_HELP_STRING([--disable-cuda-map], [do not allow CUDA memory mapping when available])],, [enable_cuda_map=yes]) +if test x$enable_cuda_map = xyes -a x$enable_cuda = xyes ; then + SAVED_LDFLAGS="${LDFLAGS}" + LDFLAGS="${LDFLAGS} ${STARPU_CUDA_LDFLAGS}" + AC_CHECK_MEMBER([struct cudaDeviceProp.canMapHostMemory], + AC_DEFINE([STARPU_HAVE_CUDA_CANMAPHOST],[1],[Define to 1 if CUDA device properties include canMapHostMemory]), + , [[#include ]]) + AC_CHECK_MEMBER([struct cudaDeviceProp.unifiedAddressing], + AC_DEFINE([STARPU_HAVE_CUDA_UNIFIEDADDR],[1],[Define to 1 if CUDA device properties include unifiedAddressing]), + , [[#include ]]) + AC_CHECK_MEMBER([struct cudaDeviceProp.managedMemory], + AC_DEFINE([STARPU_HAVE_CUDA_MNGMEM],[1],[Define to 1 if CUDA device properties include managedMemory]), + , [[#include ]]) + AC_CHECK_MEMBER([struct cudaDeviceProp.pageableMemoryAccess], + AC_DEFINE([STARPU_HAVE_CUDA_PAGEABLEMEM],[1],[Define to 1 if CUDA device properties include pageableMemoryAccess]), + , [[#include ]]) + AC_CHECK_MEMBER([struct cudaPointerAttributes.type], + AC_DEFINE([STARPU_HAVE_CUDA_POINTER_TYPE],[1],[Define to 1 if CUDA pointer attributes include a type field instead of old memoryType field]), + , [[#include ]]) + LDFLAGS="${SAVED_LDFLAGS}" + AC_DEFINE(STARPU_USE_CUDA_MAP,[1],[Define to 1 if CUDA Mapped host memory may be used]) +fi + +if test x$enable_cuda = xyes; then + AC_ARG_ENABLE(cuda0, [AS_HELP_STRING([--enable-cuda0], + [Enable the minimal-support CUDA driver (only for testing)])],, [enable_cuda0=no]) + if test x$enable_cuda0 = xyes; then + AC_DEFINE(STARPU_USE_CUDA0,[1],[Define to 1 if the CUDA0 driver is to be tested]) + fi + AC_ARG_ENABLE(cuda1, [AS_HELP_STRING([--enable-cuda0], + [Enable the small-support CUDA driver (only for testing)])],, [enable_cuda1=no]) + if test x$enable_cuda1 = xyes; then + AC_DEFINE(STARPU_USE_CUDA1,[1],[Define to 1 if the CUDA1 driver is to be tested]) + fi + + if test x$starpu_windows != xyes ; then + STARPU_CUDA_LDFLAGS="$STARPU_CUDA_LDFLAGS -lstdc++" + fi + AC_SUBST(STARPU_CUDA_LDFLAGS) + AC_SUBST(STARPU_CUFFT_LDFLAGS) + AC_SUBST(STARPU_CUDA_CPPFLAGS) +fi +AM_CONDITIONAL(STARPU_USE_CUDA0, test x$enable_cuda0 = xyes) +AM_CONDITIONAL(STARPU_USE_CUDA1, test x$enable_cuda1 = xyes) + +AC_ARG_VAR([NVCC], [CUDA compiler]) +AC_ARG_VAR([NVCC_CC], [C compiler for CUDA compiler]) +AC_ARG_VAR([NVCCFLAGS], [CUDA compiler flags]) + +############################################################################### +# # +# HIP settings # +# # +############################################################################### +AC_MSG_CHECKING(maximum number of HIP devices) +AC_ARG_ENABLE(maxhipdev, [AS_HELP_STRING([--enable-maxhipdev=], + [maximum number of HIP devices])], + nmaxhipdev=$enableval, nmaxhipdev=8) +if test x$nmaxhipdev = x -o x$nmaxhipdev = xyes +then + AC_MSG_ERROR([The --enable-maxhipdev option needs to be given a number]) +fi +AC_MSG_RESULT($nmaxhipdev) +AC_DEFINE_UNQUOTED(STARPU_MAXHIPDEVS, [$nmaxhipdev], + [maximum number of HIP devices]) +AC_ARG_ENABLE(hip, [AS_HELP_STRING([--enable-hip], + [Enable the minimal-support HIP driver (only for testing)])],, [enable_hip=maybe]) + +if test x$enable_cuda = xyes; then + # hip_runtime.h conflicts with cuda_runtime.h + # see https://github.com/ROCm-Developer-Tools/HIP/issues/2703 + if test x$enable_hip = xyes ; then + AC_MSG_WARN([Disabling HIP as CUDA is enabled, see https://github.com/ROCm-Developer-Tools/HIP/issues/2703]) + fi + enable_hip=no +fi + +if test x$enable_simgrid = xyes; then + if test x$enable_hip = xyes; then + AC_MSG_ERROR([HIP not supported with simgrid]) + fi + enable_hip=no +fi + +have_valid_hip=no +if test x$enable_hip != xno; then + + AC_PATH_PROG([HIPCONFIG], [hipconfig], [not-found]) + if test "x$HIPCONFIG" = "xnot-found"; then + if test x$enable_hip = xyes; then + AC_MSG_ERROR(['hipconfig' not found for HIP support]) + fi + have_valid_hip=no + else + HIP_PLATFORM="$(hipconfig --platform)" + HIP_DIR="$(hipconfig --path)" + HIP_LIB_DIR="$HIP_DIR/lib" + HIP_INCLUDE_DIR="$HIP_DIR/include" + STARPU_HIP_CPPFLAGS="$(hipconfig --cpp_config | tr -d '\n') -L$HIP_LIB_DIR" + if test "$HIP_PLATFORM" = "nvidia"; then + STARPU_HIP_CPPFLAGS="$STARPU_HIP_CPPFLAGS -DSTARPU_HIP_PLATFORM_NVIDIA" + fi + if test "$HIP_PLATFORM" = "amd"; then + STARPU_HIP_CPPFLAGS="$STARPU_HIP_CPPFLAGS -DSTARPU_HIP_PLATFORM_AMD" + fi + HIP_CLANG_PATH="$(hipconfig --hipclangpath)" + have_valid_hip=yes + + AC_ARG_WITH([hipblas], + [AS_HELP_STRING([--with-hipblas=], [specify where hipblas is installed])], + [custom_hipblas_dir="$withval"], + []) + + if test x$custom_hipblas_dir != x; then + HIPBLAS_INCLUDE_DIR="$custom_hipblas_dir/include" + HIPBLAS_LIB_DIR="$custom_hipblas_dir/lib" + STARPU_HIPBLAS_DIRS="-I$HIPBLAS_INCLUDE_DIR -L$HIPBLAS_LIB_DIR" + fi + + HIPCCFLAGS="$HIPCCFLAGS $STARPU_HIP_CPPFLAGS" + fi +fi + +if test "$HIP_PLATFORM" = "amd"; then + SAVED_LIBS=${LIBS} + SAVED_LDFLAGS="${LDFLAGS}" + LDFLAGS="${LDFLAGS} -L$HIP_LIB_DIR" + AC_SEARCH_LIBS([hipMemGetInfo],[amdhip64],,have_valid_hip=no) + LDFLAGS="${SAVED_LDFLAGS}" + LIBS=${SAVED_LIBS} +fi + +if test x$have_valid_hip = xyes; then + SAVED_CPPFLAGS="$CPPFLAGS" + CPPFLAGS="${CPPFLAGS} $STARPU_HIPBLAS_DIRS $STARPU_HIP_CPPFLAGS " + AC_CHECK_HEADERS([hip/hip_runtime.h hip/hip_runtime_api.h],[have_valid_hip=yes],[have_valid_hip=no]) + if test x$custom_hipblas_dir != x; then + AC_CHECK_HEADER([$custom_hipblas_dir/include/hipblas.h],[have_valid_hipblas=yes],[have_valid_hipblas=no]) + AC_MSG_WARN(['hipblas' custom]) + else + AC_CHECK_HEADER([hipblas/hipblas.h],[have_valid_hipblas=yes],[have_valid_hipblas=no]) + AC_MSG_WARN(['hipblas' default]) + fi + if test x$have_valid_hipblas = xyes; then + AC_HAVE_LIBRARY([hipblas],[have_valid_hipblas=yes],[have_valid_hipblas=no]) + fi + if test "$HIP_PLATFORM" = "amd"; then + if test x$have_valid_hipblas = xyes; then + AC_CHECK_HEADERS([rocblas/rocblas.h],[have_valid_hipblas=yes],[have_valid_hipblas=no]) + fi + if test x$have_valid_hipblas = xyes; then + AC_HAVE_LIBRARY([rocblas],[have_valid_hipblas=yes],[have_valid_hipblas=no]) + fi + fi + if test x$have_valid_hipblas = xyes; then + AC_DEFINE([STARPU_USE_HIPBLAS], [1], [HIPBLAS support is enabled]) + if test x$custom_hipblas_dir != x; then + HIPCCFLAGS="$HIPCCFLAGS -I$HIPBLAS_INCLUDE_DIR" + STARPU_HIPBLAS_LDFLAGS="-L$HIPBLAS_LIB_DIR" + fi + STARPU_HIPBLAS_LDFLAGS="$STARPU_HIPBLAS_LDFLAGS -lhipblas" + if test "$HIP_PLATFORM" = "amd"; then + STARPU_HIPBLAS_LDFLAGS="$STARPU_HIPBLAS_LDFLAGS -lrocblas" + fi + else + AC_MSG_WARN(['hipblas' not found, disabling HIP examples]) + fi + CPPFLAGS="${SAVED_CPPFLAGS}" +fi +AC_SUBST(STARPU_USE_HIPBLAS, $have_valid_hipblas) +AM_CONDITIONAL(STARPU_USE_HIPBLAS, test x$have_valid_hipblas = xyes) + +if test x$have_valid_hip = xyes; then + if test -z "$HIP_DIR"; then + have_valid_hip=no + fi + if test -z "$HIP_LIB_DIR"; then + have_valid_hip=no + fi + if test -z "$HIP_INCLUDE_DIR"; then + have_valid_hip=no + fi + + if test "$HIPCC" = ""; then + AC_PATH_PROG([HIPCC], [hipcc], [not-found], + [$HIP_CLANG_PATH:$PATH:/usr/bin:/bin]) + fi + + #testing if hipcc is defined, if not => STARPU_USE_HIP undefined + if test "x$HIPCC" = "xnot-found"; then + AC_MSG_WARN(['hipcc' not found, disabling HIP]) + have_valid_hip=no + fi + + if test "$HIP_PLATFORM" = "nvidia"; then + HIPCCFLAGS="$HIPCCFLAGS --x cu" + fi + + if test "x$have_valid_hip" = xyes; then + AC_MSG_CHECKING(whether $HIPCC is working) + rm -f conftest.hip conftest.o + touch conftest.hip + AS_IF([$HIPCC $HIPCCFLAGS conftest.hip -o conftest.o -c $STARPU_HIP_CPPFLAGS], + [AC_MSG_RESULT(yes)], + [ + AC_MSG_RESULT(no) + AC_MSG_WARN(['hipcc' does not work, disabling HIP]) + have_valid_hip=no + ]) + fi +fi + +# in case HIP was explicitly required, but is not available, this is an error +if test x$enable_hip = xyes -a x$have_valid_hip = xno; then + AC_MSG_ERROR([cannot find HIP]) +fi +# now we enable HIP if and only if a proper setup is available +enable_hip=$have_valid_hip + +if test "x$enable_hip" = xyes; then + AC_DEFINE(STARPU_USE_HIP,[1],[Define to 1 if the HIP driver is to be tested]) + if test "$HIP_PLATFORM" = "nvidia"; then + STARPU_HIP_LDFLAGS="-lcuda -lcudart -lcublas $STARPU_HIPBLAS_LDFLAGS -lstdc++" + fi + if test "$HIP_PLATFORM" = "amd"; then + STARPU_HIP_LDFLAGS="-L$HIP_LIB_DIR -lamdhip64 $STARPU_HIPBLAS_LDFLAGS -lstdc++" + fi + + AC_ARG_ENABLE(hip_memcpy_peer, [AS_HELP_STRING([--disable-hip-memcpy-peer], [if you want to disable peer transfers when using hip])],, [enable_hip_memcpy_peer=$enable_hip]) + if test x$enable_hip_memcpy_peer = xyes; then + AC_DEFINE(STARPU_HAVE_HIP_MEMCPY_PEER,[1],[Peer transfers are supported in HIP]) + fi +else + STARPU_HIP_LDFLAGS= + STARPU_HIP_CPPFLAGS= + enable_hip_memcpy_peer=no +fi + +AC_SUBST(STARPU_HIP_LDFLAGS) +AC_SUBST(STARPU_HIP_CPPFLAGS) +AM_CONDITIONAL(STARPU_USE_HIP, test x$enable_hip = xyes) + +#AC_ARG_VAR([HIPCC_CC], [C compiler for HIP compiler]) +AC_ARG_VAR([HIPCCFLAGS], [HIP compiler flags]) + + + +############################################################################### +# # +# OpenCL settings # +# # +############################################################################### + +AC_MSG_CHECKING(maximum number of OpenCL devices) +AC_ARG_ENABLE(maxopencldev, [AS_HELP_STRING([--enable-maxopencldev=], + [maximum number of OPENCL devices])], + nmaxopencldev=$enableval, nmaxopencldev=8) +if test x$nmaxopencldev = x -o x$nmaxopencldev = xyes +then + AC_MSG_ERROR([The --enable-maxopencldev option needs to be given a number]) +fi +AC_MSG_RESULT($nmaxopencldev) +AC_DEFINE_UNQUOTED(STARPU_MAXOPENCLDEVS, [$nmaxopencldev], + [maximum number of OPENCL devices]) +AC_ARG_ENABLE(opencl, [AS_HELP_STRING([--disable-opencl], + [do not use OpenCL device(s)])],, [enable_opencl=maybe]) + +have_valid_opencl=no +AC_DEFUN([STARPU_CHECK_OPENCL], +[ + __opencl_dir=$1 + __opencl_include_dir=$2 + __opencl_lib_dir=$3 + + if test "$__opencl_dir" != "no" ; then + AC_MSG_CHECKING(whether OpenCL is available in $__opencl_dir $__opencl_include_dir and $__opencl_lib_dir) + else + AC_MSG_CHECKING(whether OpenCL is available) + fi + AC_MSG_RESULT() + + if test "$__opencl_include_dir" = "no" -a "$__opencl_dir" != "no" ; then + __opencl_include_dir="$__opencl_dir/include" + fi + + SAVED_CPPFLAGS="$CPPFLAGS" + SAVED_LDFLAGS="${LDFLAGS}" + + if test "$__opencl_include_dir" != "no" ; then + CPPFLAGS="${CPPFLAGS} -I$__opencl_include_dir" + fi + AC_CHECK_HEADER([CL/cl.h],[have_valid_opencl=yes],[have_valid_opencl=no]) + unset ac_cv_header_CL_cl_h + + if test "$have_valid_opencl" = "yes" ; then + if test "$__opencl_lib_dir" != "no"; then + LDFLAGS="${SAVED_LDFLAGS} -L$__opencl_lib_dir" + AC_HAVE_LIBRARY([OpenCL],[have_valid_opencl=yes],[have_valid_opencl=no]) + unset ac_cv_lib_OpenCL_main + else + AC_MSG_CHECKING(whether OpenCL is available in $__opencl_dir) + AC_MSG_RESULT() + AC_HAVE_LIBRARY([OpenCL],[have_valid_opencl=yes],[have_valid_opencl=no]) + unset ac_cv_lib_OpenCL_main + if test "$have_valid_opencl" = "no" -a "$__opencl_dir" != "no" ; then + for __cuda_libdir in lib64 lib lib/x86 lib/Win32 ; do + __opencl_lib_dir="$__opencl_dir/$__cuda_libdir" + AC_MSG_CHECKING(whether OpenCL is available in $__opencl_dir and $__opencl_lib_dir) + AC_MSG_RESULT() + LDFLAGS="${SAVED_LDFLAGS} -L$__opencl_lib_dir" + AC_HAVE_LIBRARY([OpenCL],[have_valid_opencl=yes],[have_valid_opencl=no]) + unset ac_cv_lib_OpenCL_main + if test "$have_valid_opencl" = yes ; then + break + fi + done + else + LDFLAGS="${SAVED_LDFLAGS}" + AC_HAVE_LIBRARY([OpenCL],[have_valid_opencl=yes],[have_valid_opencl=no]) + unset ac_cv_lib_OpenCL_main + fi + fi + fi + + if test "$have_valid_opencl" = "yes" -a "$__opencl_include_dir" != "no"; then + STARPU_OPENCL_CPPFLAGS="-I$__opencl_include_dir" + AC_CHECK_HEADERS([CL/cl_ext.h]) + fi + + CPPFLAGS="${SAVED_CPPFLAGS}" + LDFLAGS="${SAVED_LDFLAGS}" + + if test "$have_valid_opencl" = "yes" ; then + if test "$__opencl_lib_dir" != "no"; then + STARPU_OPENCL_LDFLAGS="-L$__opencl_lib_dir" + fi + STARPU_OPENCL_LDFLAGS="${STARPU_OPENCL_LDFLAGS} -lOpenCL" + fi + +]) + +#AC_MSG_CHECKING(whether OpenCL is available) +AC_ARG_WITH(opencl-dir, + [AS_HELP_STRING([--with-opencl-dir=], + [specify OpenCL installation directory])], + [ + opencl_dir="$withval" + # in case this was not explicit yet + enable_opencl=yes + ], opencl_dir=no) + +AC_ARG_WITH(opencl-include-dir, + [AS_HELP_STRING([--with-opencl-include-dir=], + [specify where OpenCL headers are installed])], + [ + opencl_include_dir="$withval" + # in case this was not explicit yet + enable_opencl=yes + ], [opencl_include_dir=no]) + +AC_ARG_WITH(opencl-lib-dir, + [AS_HELP_STRING([--with-opencl-lib-dir=], + [specify where OpenCL libraries are installed])], + [ + opencl_lib_dir="$withval" + # in case this was not explicit yet + enable_opencl=yes + ], [opencl_lib_dir=no]) + +AC_DEFUN([STARPU_LOOK_FOR_OPENCL], +[ + if test "x$has_opencl_being_checked" != "xyes" ; then + STARPU_CHECK_OPENCL("$opencl_dir", "$opencl_include_dir", "$opencl_lib_dir") + if test "$have_valid_opencl" = "no" ; then + for f in "/usr/local/cuda" "/c/cuda" "/cygdrive/c/cuda" "/opt/cuda" "$CUDA_ROOT" "$CUDA_PATH" "$CUDA_INC_PATH/.." "$CUDA_INSTALL_PATH" "$CUDA_TOOLKIT"; do + if test -n "$f" ; then + STARPU_CHECK_OPENCL("$f", "no", "no") + if test "$have_valid_opencl" = "yes" ; then + break + fi + fi + done + fi + has_opencl_being_checked=yes + fi +]) + +if test x$enable_opencl = xyes -o x$enable_opencl = xmaybe; then + case $target in + *-*-darwin*) + AC_MSG_CHECKING(whether OpenCL is available) + + SAVED_LIBS=$LIBS + LIBS="$LIBS -framework OpenCL" + AC_LINK_IFELSE( + [AC_LANG_PROGRAM([[ + #ifdef __APPLE_CC__ + #include + #else + #include + #endif + ]], + [[return clSetKernelArg(0, 0, 0, 0); ]])], + [AC_MSG_RESULT(yes) + enable_opencl=yes + have_valid_opencl=yes + STARPU_OPENCL_CPPFLAGS= + STARPU_OPENCL_LDFLAGS="-framework OpenCL"], + [AC_MSG_RESULT(no) + enable_opencl=no]) + LIBS=$SAVED_LIBS + ;; + *) + STARPU_LOOK_FOR_OPENCL() + # in case OpenCL was explicitly required, but is not available, this is an error + if test x$enable_opencl = xyes -a x$have_valid_opencl = xno; then + AC_MSG_ERROR([cannot find OpenCL]) + fi + # now we enable OpenCL if and only if a proper setup is available + enable_opencl=$have_valid_opencl + ;; + esac + save_LIBS="$LIBS" + LIBS="$LIBS $STARPU_OPENCL_LDFLAGS" + AC_CHECK_FUNCS([clEnqueueMarkerWithWaitList]) + LIBS="$save_LIBS" +fi + +AC_MSG_CHECKING(whether OpenCL should be used) +AC_MSG_RESULT($enable_opencl) +AC_SUBST(STARPU_USE_OPENCL, $enable_opencl) +AM_CONDITIONAL(STARPU_USE_OPENCL, test x$enable_opencl = xyes) +if test x$enable_opencl = xyes ; then + AC_DEFINE(STARPU_USE_OPENCL, [1], [OpenCL support is activated]) + STARPU_OPENCL_CPPFLAGS="${STARPU_OPENCL_CPPFLAGS} -DSTARPU_OPENCL_DATADIR=\"\\\"${datarootdir}/starpu/opencl\\\"\" -DCL_USE_DEPRECATED_OPENCL_1_1_APIS" + AC_SUBST(STARPU_OPENCL_DATAdir, "$(eval echo ${datarootdir}/starpu/opencl/examples)") + AC_SUBST(STARPU_OPENCL_CPPFLAGS) + AC_SUBST(STARPU_OPENCL_LDFLAGS) +fi + +############################################################################### +# # +# Maxeler FPGA Settings # +# # +############################################################################### + +#NUMBER OF MAXELER FPGA DEVICES +AC_MSG_CHECKING(maximum number of Maxeler FPGA devices) +AC_ARG_ENABLE(maxmaxfpgadev, [AS_HELP_STRING([--enable-maxmaxfpgadev=], + [maximum number of Maxeler FPGA devices])], + nmaxmaxfpgadev=$enableval, nmaxmaxfpgadev=12) +if test x$nmaxmaxfpgadev = x -o x$nmaxmaxfpgadev = xyes +then + AC_MSG_ERROR([The --enable-maxmaxfpgadev option needs to be given a number]) +fi +AC_MSG_RESULT($nmaxmaxfpgadev) +AC_DEFINE_UNQUOTED(STARPU_MAXMAXFPGADEVS, [$nmaxmaxfpgadev],[maximum number of Maxeler FPGA devices]) + +AC_ARG_ENABLE([max-fpga], + [AS_HELP_STRING([--disable-max-fpga],[disable support for Maxeler FPGA])], + [enable_max_fpga=$enableval], + [enable_max_fpga=maybe] +) + +if test x$enable_simgrid = xyes; then + if test x$enable_max_fpga = xyes; then + AC_MSG_ERROR([Max fpga not supported with simgrid]) + fi + enable_max_fpga=no +fi + +if test x$enable_max_fpga != xno; then + AC_PATH_PROG([SLIC_CONFIG], [slic-config], [not-found]) + + if test "x$SLIC_CONFIG" = "xnot-found"; then + # in case FPGA was explicitly required, but is not available, this is an error + if test x$enable_max_fpga = xyes; then + AC_MSG_ERROR(['slic-config' not found for Maxeler FPGA support]) + fi + enable_max_fpga=no + else + STARPU_MAX_FPGA_CPPFLAGS="`slic-config --cflags | sed s/\'//g | sed "s/-I /-I/"`" + STARPU_MAX_FPGA_LDFLAGS="`slic-config --libs | sed s/\'//g | sed "s/-L /-L/" | sed "s/-L /-L/"`" + + enable_max_fpga=yes + fi +fi + +AC_SUBST(STARPU_USE_MAX_FPGA,$enable_max_fpga) +AM_CONDITIONAL(STARPU_USE_MAX_FPGA,test x$enable_max_fpga = xyes) +if test x$enable_max_fpga = xyes; then + AC_DEFINE(STARPU_USE_MAX_FPGA,[1],[Maxeler FPGA support is activated]) +fi + + +############################################################################### +# # +# General GPU settings # +# # +############################################################################### +AC_MSG_CHECKING(whether asynchronous copy should be disabled) +AC_ARG_ENABLE(asynchronous-copy, [AS_HELP_STRING([--disable-asynchronous-copy], + [disable asynchronous copy between CPU and GPU])], + enable_asynchronous_copy=$enableval, enable_asynchronous_copy=yes) +disable_asynchronous_copy=no +if test x$enable_asynchronous_copy = xno ; then + disable_asynchronous_copy=yes +fi +AC_MSG_RESULT($disable_asynchronous_copy) +if test x$disable_asynchronous_copy = xyes ; then + AC_DEFINE([STARPU_DISABLE_ASYNCHRONOUS_COPY], [1], [Define to 1 to disable asynchronous copy between CPU and GPU devices]) +fi + +AC_MSG_CHECKING(whether asynchronous CUDA copy should be disabled) +AC_ARG_ENABLE(asynchronous-cuda-copy, [AS_HELP_STRING([--disable-asynchronous-cuda-copy], + [disable asynchronous copy between CPU and CUDA devices])], + enable_asynchronous_cuda_copy=$enableval, enable_asynchronous_cuda_copy=yes) +disable_asynchronous_cuda_copy=no +if test x$enable_asynchronous_cuda_copy = xno ; then + disable_asynchronous_cuda_copy=yes +fi +AC_MSG_RESULT($disable_asynchronous_cuda_copy) +if test x$disable_asynchronous_cuda_copy = xyes ; then + AC_DEFINE([STARPU_DISABLE_ASYNCHRONOUS_CUDA_COPY], [1], [Define to 1 to disable asynchronous copy between CPU and CUDA devices]) +fi + +AC_MSG_CHECKING(whether asynchronous OpenCL copy should be disabled) +AC_ARG_ENABLE(asynchronous-opencl-copy, [AS_HELP_STRING([--disable-asynchronous-opencl-copy], + [disable asynchronous copy between CPU and OPENCL devices])], + enable_asynchronous_opencl_copy=$enableval, enable_asynchronous_opencl_copy=yes) +disable_asynchronous_opencl_copy=no +if test x$enable_asynchronous_opencl_copy = xno ; then + disable_asynchronous_opencl_copy=yes +fi +AC_MSG_RESULT($disable_asynchronous_opencl_copy) +if test x$disable_asynchronous_opencl_copy = xyes ; then + AC_DEFINE([STARPU_DISABLE_ASYNCHRONOUS_OPENCL_COPY], [1], [Define to 1 to disable asynchronous copy between CPU and OpenCL devices]) +fi + +AC_MSG_CHECKING(whether asynchronous MPI Master Slave copy should be disabled) +AC_ARG_ENABLE(asynchronous-mpi-master-slave-copy, [AS_HELP_STRING([--disable-asynchronous-mpi-master-slave-copy], + [disable asynchronous copy between MPI Master and MPI Slave devices])], + enable_asynchronous_mpi_master_slave_copy=$enableval, enable_asynchronous_mpi_master_slave_copy=yes) +disable_asynchronous_mpi_master_slave_copy=no +if test x$enable_asynchronous_mpi_master_slave_copy = xno ; then + disable_asynchronous_mpi_master_slave_copy=yes +fi +AC_MSG_RESULT($disable_asynchronous_mpi_master_slave_copy) +if test x$disable_asynchronous_mpi_master_slave_copy = xyes ; then + AC_DEFINE([STARPU_DISABLE_ASYNCHRONOUS_MPI_MS_COPY], [1], [Define to 1 to disable asynchronous copy between MPI Master and MPI Slave devices]) +fi + +AC_MSG_CHECKING(whether asynchronous TCP/IP Master Slave copy should be disabled) +AC_ARG_ENABLE(asynchronous-tcpip-master-slave-copy, [AS_HELP_STRING([--disable-asynchronous-tcpip-master-slave-copy], + [disable asynchronous copy between TCP/IP Master and TCP/IP Slave devices])], + enable_asynchronous_tcpip_master_slave_copy=$enableval, enable_asynchronous_tcpip_master_slave_copy=yes) +disable_asynchronous_tcpip_master_slave_copy=no +if test x$enable_asynchronous_tcpip_master_slave_copy = xno ; then + disable_asynchronous_tcpip_master_slave_copy=yes +fi +AC_MSG_RESULT($disable_asynchronous_tcpip_master_slave_copy) +if test x$disable_asynchronous_tcpip_master_slave_copy = xyes ; then + AC_DEFINE([STARPU_DISABLE_ASYNCHRONOUS_TCPIP_MS_COPY], [1], [Define to 1 to disable asynchronous copy between TCP/IP Master and TCP/IP Slave devices]) +fi + +AC_MSG_CHECKING(whether asynchronous Maxeler FPGA copy should be disabled) +AC_ARG_ENABLE(asynchronous-max-fpga-copy, [AS_HELP_STRING([--disable-asynchronous-max-fpga-copy], + [disable asynchronous copy between CPU and Maxeler FPGA devices])], + enable_asynchronous_max_fpga_copy=$enableval, enable_asynchronous_max_fpga_copy=yes) +disable_asynchronous_max_fpga_copy=no +if test x$enable_asynchronous_max_fpga_copy = xno ; then + disable_asynchronous_max_fpga_copy=yes +fi +AC_MSG_RESULT($disable_asynchronous_max_fpga_copy) +if test x$disable_asynchronous_max_fpga_copy = xyes ; then + AC_DEFINE([STARPU_DISABLE_ASYNCHRONOUS_MAX_FPGA_COPY], [1], [Define to 1 to disable asynchronous copy between CPU and Maxeler FPGA devices]) +fi + +############################################################################### +# # +# Fortran # +# # +############################################################################### + +AC_ARG_ENABLE(fortran, [AS_HELP_STRING([--disable-fortran], + [disable build of fortran examples])], + enable_build_fortran_requested=$enableval, enable_build_fortran_requested=yes) +use_mpi_fort=no +enable_build_fortran=no +if test "x$enable_build_fortran_requested" = "xyes" ; then + if test "x$FC" != "x"; then + if $FC --version|grep -q 'GNU Fortran'; then + AC_LANG_PUSH([Fortran]) + OLD_FCFLAGS="$FCFLAGS" + FCFLAGS="$FCFLAGS -cpp" + AC_COMPILE_IFELSE([AC_LANG_PROGRAM([], [[ +#if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 9) +#error GFortran too old, version >= 4.9.x needed, Fortran examples will not be built +#endif +]] + )], + [enable_build_fortran="yes"], + [enable_build_fortran="no"]) + FCFLAGS="$OLD_FCFLAGS" + AC_LANG_POP([Fortran]) + if test "$enable_build_fortran" = "no" ; then + AC_MSG_WARN([GFortran too old, version >= 4.9.x needed, Fortran examples will not be built]) + fi + else + if $FC -V 2>&1|grep -q 'Intel(R) Fortran'; then + enable_build_fortran="yes" + ifort_fc_version=`$FC -V 2>&1 |head -1|sed 's/.*Version //;s/ Build.*//'` + ifort_maj_version=`echo $ifort_fc_version|cut -d. -f1` + + if test $ifort_maj_version -lt 16; then + AC_MSG_WARN([Intel Fortran compiler $ifort_fc_version too old, version >= 2016.x needed, Fortran examples will not be built]) + enable_build_fortran="no" + fi + else + if $FC -qversion 2>&1|grep -q 'IBM XL Fortran'; then + xlf_fc_version=`$FC -V 2>&1 |tail -1|sed 's/.*Version: //'` + + AC_MSG_WARN([IBM Fortran compiler $xlf_fc_version not validated with the native StarPU Fortran API, Fortran examples will not be built]) + enable_build_fortran="no" + else + AC_MSG_WARN(Fortran compiler has not been tested for StarPU native Fortran support) + enable_build_fortran="yes" + fi + fi + fi + if $FC -v 2>&1 | grep -q 'Arm C/C++/Fortran Compiler' ; then + armflang_version=`$FC -v 2>&1 | head -1 | sed 's/.*version //'` + armflang_maj_version=`echo $armflang_version|cut -d. -f1` + if test $armflang_maj_version -lt 23 ; then + AC_MSG_WARN([ARM Fortran compiler $armflang_version is not validated with the native StarPU Fortran API, Fortran examples will not be built]) + enable_build_fortran="no" + fi + fi + + if test "x$enable_build_fortran" = "xyes" ; then + AC_DEFINE(STARPU_HAVE_FC, [1], [Define this if a Fortran compiler is available]) + if test x$build_mpi_lib = xyes -o x$build_nmad_lib = xyes -o x$build_mpi_master_slave = xyes ; then + #Check MPIFORT + if test x$enable_simgrid = xyes ; then + DEFAULT_MPIFORT=smpifort + else + DEFAULT_MPIFORT=mpifort + fi + AC_ARG_WITH(mpifort, [AS_HELP_STRING([--with-mpifort=], [Name or path of the mpifort compiler])], [DEFAULT_MPIFORT=$withval]) + case $DEFAULT_MPIFORT in + /*) mpifort_path="$DEFAULT_MPIFORT" ;; + *) AC_PATH_PROG(mpifort_path, $DEFAULT_MPIFORT, [no], [$simgrid_dir/bin:$PATH]) ;; + esac + # We test if the MPIFORT compiler exists + if test ! -x $mpifort_path; then + AC_MSG_RESULT(The mpifort compiler '$mpifort_path' does not have the execute permission) + mpifort_path=no + else + OLD_CC=$CC + CC=$mpicc_path + AC_LINK_IFELSE( + AC_LANG_PROGRAM( + [[#include ]], + [[AC_LANG_SOURCE([return MPI_Comm_f2c(0);])]] + ), + [use_mpi_fort=yes], + [use_mpi_fort=no] + ) + CC=$OLD_CC + if test "x$use_mpi_fort" = xyes; then + AC_DEFINE([HAVE_MPI_COMM_F2C], [1], [Function MPI_Comm_f2c is available]) + fi + fi + + AC_MSG_CHECKING(whether mpifort is available) + AC_MSG_RESULT($mpifort_path) + AC_SUBST(MPIFORT, $mpifort_path) + + if test x$mpifort_path != xno ; then + MPIPATH=$(dirname $mpifort_path):$PATH + else + MPIPATH=$PATH + fi + fi + fi + fi +fi +if test "x$enable_build_fortran" = "xyes" ; then + if test "x$FC" = "x" ; then + enable_build_fortran="no" + fi +fi + +#We have MPI C/C++ compiler +if test x$build_mpi_master_slave = xyes; then + #Check if we can compile fortran cases + if test x$use_mpi_fort = xyes ; then + F77LD=$mpifort_path + FCLD=$mpifort_path + F77=$mpifort_path + FC=$mpifort_path + else + enable_build_fortran=no + fi +fi + + +AM_CONDITIONAL([STARPU_HAVE_FC], [test "x$FC" != "x" -a "x$enable_build_fortran" = "xyes"]) +AM_CONDITIONAL([STARPU_HAVE_F77], [test "x$F77" != "x" -a "x$enable_build_fortran" = "xyes"]) +AM_CONDITIONAL([STARPU_HAVE_MPIFORT], [test "x$use_mpi_fort" = "xyes"]) + +############################################################################### +# # +# Debug and Performance analysis tools # +# # +############################################################################### + +AC_MSG_CHECKING(whether debug mode should be enabled) +AC_ARG_ENABLE(debug, [AS_HELP_STRING([--enable-debug], [enable debug mode])], + enable_debug=$enableval, enable_debug=no) +AC_MSG_RESULT($enable_debug) + +AC_ARG_ENABLE(spinlock_check, [AS_HELP_STRING([--enable-spinlock-check], [enable spinlock check])], enable_spinlock_check=$enableval, enable_spinlock_check=no) +AC_ARG_ENABLE(fstack-protector-all, [AS_HELP_STRING([--disable-fstack-protector-all], [disable GCC option -fstack-protector-all])], enable_fstack_protector_all=$enableval, enable_fstack_protector_all=yes) + +if test x$enable_debug = xyes; then + AC_DEFINE(STARPU_DEBUG, [1], [enable debugging statements]) + CFLAGS="$CFLAGS -O0" + CXXFLAGS="$CXXFLAGS -O0" + FFLAGS="$FFLAGS -O0" + FCFLAGS="$FCFLAGS -O0" + IS_SUPPORTED_FLAG(-fno-optimize-sibling-calls) + enable_spinlock_check=yes + if test x$GCC = xyes; then + IS_SUPPORTED_FLAG(-Og) + if test x$starpu_windows != xyes ; then + if test x$enable_fstack_protector_all = xyes ; then + CFLAGS="$CFLAGS -fstack-protector-all" + CXXFLAGS="$CXXFLAGS -fstack-protector-all" + FFLAGS="$FFLAGS -fstack-protector-all" + FCFLAGS="$FCFLAGS -fstack-protector-all" + fi + fi + fi +else + CFLAGS="-O3 $CFLAGS" + CXXFLAGS="-O3 $CXXFLAGS" + FFLAGS="-O3 $FFLAGS" + FCFLAGS="-O3 $FCFLAGS" +fi + +AC_MSG_CHECKING(whether gdb information should be enabled) +AC_ARG_ENABLE(gdb, [AS_HELP_STRING([--disable-gdb], [disable gdb information])], + enable_gdb=$enableval, enable_gdb=yes) +AC_MSG_RESULT($enable_gdb) + +AC_MSG_CHECKING(whether full gdb information should be enabled) +AC_ARG_ENABLE(full-gdb-information, [AS_HELP_STRING([--disable-full-gdb-information], [disable full gdb information])], + enable_full_gdb_information=$enableval, enable_full_gdb_information=yes) +AC_MSG_RESULT($enable_full_gdb_information) +if test x$enable_gdb = xyes; then + if test x$enable_full_gdb_information = xyes -a x$GCC = xyes; then + IS_SUPPORTED_FLAG(-gdwarf-2) + IS_SUPPORTED_FLAG(-g3) + NVCCFLAGS="$NVCCFLAGS -g" + HIPCCFLAGS="$HIPCCFLAGS -g" + else + IS_SUPPORTED_FLAG(-g) + NVCCFLAGS="$NVCCFLAGS -g" + HIPCCFLAGS="$HIPCCFLAGS -g" + fi +else + CFLAGS="$CFLAGS -g0" + CXXFLAGS="$CXXFLAGS -g0" + FFLAGS="$FFLAGS -g0" + FCFLAGS="$FCFLAGS -g0" + LDFLAGS="$LDFLAGS -g0" +fi + +if test x$enable_spinlock_check = xyes; then + AC_DEFINE(STARPU_SPINLOCK_CHECK, [1], [check spinlock use]) +fi + +AC_MSG_CHECKING(whether extra checks should be performed) +AC_ARG_ENABLE(fast, [AS_HELP_STRING([--enable-fast], + [do not enforce assertions])], + enable_fast=$enableval, enable_fast=no) +AC_MSG_RESULT($enable_fast) +if test x$enable_fast = xyes; then + AC_DEFINE(STARPU_NO_ASSERT, [1], [disable assertions]) +else + # fortify gets really enabled only with optimizations, avoid enabling it + # when optimizations are not enabled, because with some glibc it + # spews a lot of warnings. + if test x$enable_debug != xyes; then + if test x$GCC = xyes; then + CPPFLAGS="-U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=1 $CPPFLAGS" + fi + fi +fi + +AC_MSG_CHECKING(whether debug messages should be displayed) +AC_ARG_ENABLE(verbose, [AS_HELP_STRING([--enable-verbose], + [display verbose debug messages (--enable-verbose=extra increase the verbosity)])], + enable_verbose=$enableval, enable_verbose=no) +AC_MSG_RESULT($enable_verbose) +if test x$enable_verbose = xyes; then + AC_DEFINE(STARPU_VERBOSE, [1], [display verbose debug messages]) +fi +if test x$enable_verbose = xextra; then + AC_DEFINE(STARPU_VERBOSE, [1], [display verbose debug messages]) + AC_DEFINE(STARPU_EXTRA_VERBOSE, [1], [display verbose debug messages]) +fi + +AC_MSG_CHECKING(whether coverage testing should be enabled) +AC_ARG_ENABLE(coverage, [AS_HELP_STRING([--enable-coverage], + [enable coverage checking])], + enable_coverage=$enableval, enable_coverage=no) +AC_MSG_RESULT($enable_coverage) +AC_SUBST(COVERAGE, $enable_coverage) +AM_CONDITIONAL(STARPU_COVERAGE_ENABLED, [test "x$enable_coverage" = "xyes"]) +if test x$enable_coverage = xyes; then + CFLAGS="${CFLAGS} --coverage" + CXXFLAGS="${CXXFLAGS} --coverage" + FFLAGS="${FFLAGS} --coverage" + FCFLAGS="${FCFLAGS} --coverage" + LDFLAGS="${LDFLAGS} --coverage" + LIBS="${LIBS} -lgcov" +fi + +AC_MSG_CHECKING(whether coverity mode should be enabled) +AC_ARG_ENABLE(coverity, [AS_HELP_STRING([--enable-coverity], [enable coverity mode])], + enable_coverity=$enableval, enable_coverity=no) +AC_MSG_RESULT($enable_coverity) +AM_CONDITIONAL(STARPU_COVERITY, test x$enable_coverity = xyes) +if test x$enable_coverity = xyes ; then + AC_DEFINE(STARPU_COVERITY, [1], [Define to 1 if you are building with coverity]) +fi + +# We would need a PIC-compiled libfxt.a for this to work ; that's usually not available. +if test x$enable_mpi = xyes -a x$enable_simgrid = xyes -o x$enable_shared = xno -a x$enable_starpupy = xyes ; then + default_enable_fxt=no +else + default_enable_fxt=maybe +fi +# shall we use FxT to generate trace of the execution ? +AC_ARG_ENABLE(fxt, [AS_HELP_STRING([--disable-fxt], + [disable FxT trace mechanisms])],, [enable_fxt=$default_enable_fxt]) +AC_ARG_WITH(fxt, + [AS_HELP_STRING([--with-fxt=], [specify FxT installation directory])], + [ + if test x$withval = xno ; then + enable_fxt=no + else + fxt_dir="$withval" + use_fxt_from_system=no + # in case this was not explicit yet + enable_fxt=yes + AC_SUBST(FXTDIR, $fxt_dir) + fi + ], + [ + use_fxt_from_system=yes + fxt_dir="" + ]) + +if test x$enable_fxt != xno; then + if test x$use_fxt_from_system = xno; then + save_PKG_CONFIG_PATH="$PKG_CONFIG_PATH" + PKG_CONFIG_PATH="$fxt_dir/lib/pkgconfig:$PKG_CONFIG_PATH" + PKG_CHECK_MODULES([FXT], [fxt], [have_valid_fxt=yes], [ + have_valid_fxt=yes + AC_MSG_WARN([Old FxT without fxt.pc file, hoping link will succeed]) + FXT_CFLAGS="-I$fxt_dir/include/ " + FXT_LDFLAGS="-L$fxt_dir/lib/" + AC_ARG_VAR(FXT_LDFLAGS) + FXT_LIBS="-lfxt" + ]) + PKG_CONFIG_PATH="$save_PKG_CONFIG_PATH" + else + PKG_CHECK_MODULES([FXT], [fxt], [have_valid_fxt=yes], [have_valid_fxt=no]) + fi + if test x$have_valid_fxt = xyes ; then + enable_fxt=yes + save_LIBS="$LIBS" + LIBS="$LIBS $FXT_LIBS" + save_LDFLAGS="$LDFLAGS" + LDFLAGS="$LDFLAGS $FXT_LDFLAGS" + AC_CHECK_FUNCS([fxt_close]) + AC_CHECK_FUNCS([fxt_blockev_leave]) + AC_CHECK_FUNCS([enable_fut_flush]) + AC_CHECK_FUNCS([fut_set_filename]) + AC_CHECK_FUNCS([fut_setup_flush_callback]) + LDFLAGS="$save_LDFLAGS" + LIBS="$save_LIBS" + save_CFLAGS="$CFLAGS" + CFLAGS="$CFLAGS $FXT_CFLAGS" + AC_CHECK_DECLS([enable_fut_flush], [], [], [[#include ]]) + AC_CHECK_DECLS([fut_set_filename], [], [], [[#include ]]) + AC_CHECK_DECLS([fut_setup_flush_callback], [], [], [[#include ]]) + CFLAGS="$save_CFLAGS" + + if test x$enable_simgrid = xyes -a x$enable_shared = xno ; then + # simgrid's SMPI needs fxt to be linked in statically for + # variable privatization to work + FXT_LIBS="$(pkg-config --variable=libdir fxt)/libfxt.a -Wl,--as-needed $(pkg-config --libs --static fxt) -Wl,--no-as-needed" + fi + + ########################################## + # Poti is a library to generate paje trace files + ########################################## + PKG_CHECK_MODULES([POTI], [poti], [have_valid_poti=yes], [have_valid_poti=no]) + AC_ARG_ENABLE(poti, [AS_HELP_STRING([--enable-poti], + [Enable the use of the POTI library to generate Paje traces])], + enable_poti=$enableval, enable_poti=no) + if test x$enable_poti = xyes -a x$have_valid_poti = xyes ; then + AC_DEFINE(STARPU_HAVE_POTI, [1], [Define to 1 if you have libpoti and it is meant to be used]) + save_LIBS="$LIBS" + LIBS="$LIBS $POTI_LIBS" + AC_CHECK_FUNCS([poti_init_custom poti_user_NewEvent]) + LIBS="$save_LIBS" + FXT_CFLAGS="$FXT_CFLAGS $POTI_CFLAGS" + FXT_LIBS="$FXT_LIBS $POTI_LIBS" + fi + else + if test x$enable_fxt = xyes ; then + AC_MSG_ERROR([FxT is required but not available]) + fi + enable_fxt=no + fi +fi + +AC_MSG_CHECKING(whether FxT traces should be generated) +AC_MSG_RESULT($enable_fxt) + +if test x$enable_fxt = xyes; then + AC_DEFINE(STARPU_USE_FXT, [1], [enable FxT traces]) + AC_DEFINE(CONFIG_FUT, [1], [enable FUT traces]) +fi + +AC_SUBST(STARPU_USE_FXT, $enable_fxt) +AM_CONDITIONAL(STARPU_USE_FXT, test x$enable_fxt = xyes) + +AC_MSG_CHECKING(whether additional locking systems FxT traces should be enabled) +AC_ARG_ENABLE(fxt-lock, [AS_HELP_STRING([--enable-fxt-lock], + [enable additional locking systems FxT traces])], + enable_fxt_lock=$enableval, enable_fxt_lock=no) +AC_MSG_RESULT($enable_fxt_lock) +if test x$enable_fxt_lock = xyes; then + AC_DEFINE(STARPU_FXT_LOCK_TRACES, [1], [enable additional locking systems FxT traces]) +fi + +AC_ARG_ENABLE(papi, [AS_HELP_STRING([--disable-papi], + [disable using papi])], + enable_papi=$enableval, enable_papi=yes) +if test x$enable_papi = xyes; then + PKG_CHECK_MODULES([PAPI], [papi], [have_valid_papi=yes], [have_valid_papi=no]) + if test x$have_valid_papi = xyes ; then + AC_DEFINE([STARPU_PAPI], [1], [Define to 1 if you have the libpapi library]) + STARPU_EXPORTED_LIBS="$STARPU_EXPORTED_LIBS $PAPI_LIBS" + fi +fi + +AC_MSG_CHECKING(whether performance debugging should be enabled) +AC_ARG_ENABLE(perf-debug, [AS_HELP_STRING([--enable-perf-debug], + [enable performance debugging through gprof])], + enable_perf_debug=$enableval, enable_perf_debug=no) +AC_MSG_RESULT($enable_perf_debug) +AC_SUBST(STARPU_PERF_DEBUG, $enable_perf_debug) +if test x$enable_perf_debug = xyes; then + AC_DEFINE(STARPU_PERF_DEBUG, [1], [enable performance debug]) + CPPFLAGS="${CPPFLAGS} -pg " + LDFLAGS="${LDFLAGS} -pg " +fi + +AC_MSG_CHECKING(whether performance model debugging should be enabled) +AC_ARG_ENABLE(model-debug, [AS_HELP_STRING([--enable-model-debug], + [enable performance model debugging])], + enable_model_debug=$enableval, enable_model_debug=no) +AC_MSG_RESULT($enable_model_debug) +if test x$enable_model_debug = xyes; then + AC_DEFINE(STARPU_MODEL_DEBUG, [1], [enable performance model debug]) +fi + +AC_MSG_CHECKING(whether memory stats should be displayed) +AC_ARG_ENABLE(memory-stats, [AS_HELP_STRING([--enable-memory-stats], + [enable memory stats])], + enable_memory_stats=$enableval, enable_memory_stats=no) +AC_MSG_RESULT($enable_memory_stats) +if test x$enable_memory_stats = xyes; then + AC_DEFINE(STARPU_MEMORY_STATS, [1], [enable memory stats]) +fi + +AC_ARG_ENABLE(glpk, [AS_HELP_STRING([--disable-glpk], + [disable using glpk for bound computation])], + enable_glpk=$enableval, enable_glpk=yes) +if test x$enable_glpk = xyes; then + AC_CHECK_HEADERS([glpk.h], [AC_DEFINE([STARPU_HAVE_GLPK_H], [1], [Define to 1 if you have the header file.])]) + STARPU_HAVE_LIBRARY(GLPK, [glpk]) +fi + +AC_ARG_WITH(ayudame1-include-dir, + [AS_HELP_STRING([--with-ayudame1-include-dir=], + [specify where Ayudame version 1 headers are installed])], + [ + ayudame1_include_dir="$withval" + if test -n "$ayudame1_include_dir"; then + CPPFLAGS="-I$ayudame1_include_dir $CPPFLAGS" + fi + ], [ayudame1_include_dir=no]) +AC_ARG_WITH(ayudame2-include-dir, + [AS_HELP_STRING([--with-ayudame2-include-dir=], + [specify where Ayudame version 2 headers are installed])], + [ + ayudame2_include_dir="$withval" + if test -n "$ayudame2_include_dir"; then + CPPFLAGS="-I$ayudame2_include_dir $CPPFLAGS" + fi + ], [ayudame2_include_dir=no]) + +# Ayudame 1 header is capitalized +AC_CHECK_HEADERS([Ayudame.h]) +AC_ARG_ENABLE(ayudame1, [AS_HELP_STRING([--disable-ayudame1], + [Do not use Ayudame lib version 1])], + enable_ayudame1=$enableval, enable_ayudame1=yes) +# Ayudame 2 header is lowercase +AC_CHECK_HEADERS([ayudame.h]) +AC_ARG_ENABLE(ayudame2, [AS_HELP_STRING([--disable-ayudame2], + [Do not use Ayudame lib version 2])], + enable_ayudame2=$enableval, enable_ayudame2=yes) +if test x$enable_ayudame1 = xyes -a x$ac_cv_header_Ayudame_h = xyes; then + AC_DEFINE([STARPU_USE_AYUDAME1], [1], [Define to 1 if Ayudame 1 is available and should be used]) + ayu_msg="yes, use version 1" +else + if test x$enable_ayudame2 = xyes -a x$ac_cv_header_ayudame_h = xyes; then + AC_DEFINE([STARPU_USE_AYUDAME2], [1], [Define to 1 if Ayudame 2 is available and should be used]) + ayu_msg="yes, use version 2" + else + ayu_msg="no" + fi +fi + +AM_CONDITIONAL([STARPU_USE_AYUDAME1], [test "x$enable_ayudame1" = "xyes"]) +AM_CONDITIONAL([STARPU_USE_AYUDAME2], [test "x$enable_ayudame2" = "xyes"]) + +STARPU_FXT_EVENT_DEFINES="`grep -E '#define\s+_STARPU_(MPI_)?FUT_' ${srcdir}/src/common/fxt.h ${srcdir}/mpi/src/starpu_mpi_fxt.h | grep 0x | grep -v 0x1 | cut -d : -f 2`" +AC_SUBST([STARPU_FXT_EVENT_DEFINES]) + +# Heteroprio works better if it can store information based on the program's name +AC_MSG_CHECKING(whether the target supports program_invocation_short_name) +AC_LINK_IFELSE([AC_LANG_SOURCE( + [ + #include + #include + int main() { + printf("%s\n", program_invocation_short_name); + return 0; + } + ])], + [AC_DEFINE([STARPU_HAVE_PROGRAM_INVOCATION_SHORT_NAME], [1], [variable program_invocation_short_name is available]) AC_MSG_RESULT(yes)], + AC_MSG_RESULT(no) +) + +############################################################################### +# # +# Miscellaneous options for StarPU # +# # +############################################################################### + +AC_MSG_CHECKING(whether data locality should be enforced) +AC_ARG_ENABLE(data-locality-enforce, [AS_HELP_STRING([--enable-data-locality-enforce], + [disable data locality enforcement])], + enable_data_locality_enforce=$enableval, enable_data_locality_enforce=no) +AC_MSG_RESULT($enable_data_locality_enforce) +if test x$enable_data_locality_enforce = xyes ; then + AC_DEFINE([STARPU_DATA_LOCALITY_ENFORCE], [1], [Define to 1 to enforce data locality]) +fi + +AC_MSG_CHECKING(how many buffers can be manipulated per task) +AC_ARG_ENABLE(maxbuffers, [AS_HELP_STRING([--enable-maxbuffers=], + [maximum number of buffers per task])], + nmaxbuffers=$enableval, nmaxbuffers=8) +if test x$nmaxbuffers = x -o x$nmaxbuffers = xyes +then + AC_MSG_ERROR([The --enable-maxbuffers option needs to be given a number]) +fi +AC_MSG_RESULT($nmaxbuffers) +AC_DEFINE_UNQUOTED(STARPU_NMAXBUFS, [$nmaxbuffers], + [how many buffers can be manipulated per task]) + +AC_MSG_CHECKING(how many MPI nodes fxt files can be manipulated when generating traces) +AC_ARG_ENABLE(fxt-max-files, [AS_HELP_STRING([--enable-fxt-max-files=], + [maximum number of mpi nodes for traces])], + nmaxfxtfiles=$enableval, nmaxfxtfiles=64) +if test x$nmaxfxtfiles = x -o x$nmaxfxtfiles = xyes +then + AC_MSG_ERROR([The --enable-maxfxtfiles option needs to be given a number]) +fi +AC_MSG_RESULT($nmaxfxtfiles) +AC_DEFINE_UNQUOTED(STARPU_FXT_MAX_FILES, [$nmaxfxtfiles], + [how many MPI nodes fxt files can be manipulated when generating traces]) + +AC_MSG_CHECKING(maximum number of memory nodes to use per MPI rank) +AC_ARG_ENABLE(maxnodes, [AS_HELP_STRING([--enable-maxnodes=], + [maximum number of memory nodes per MPI rank])], + maxnodes=$enableval, maxnodes=0) + +if test x$maxnodes = x0 ; then + if test x$enable_simgrid = xyes ; then + # We need the room for the virtual CUDA/OpenCL devices + nodes=`expr 4 + $nmaxcudadev + $nmaxopencldev + 1 + $nmaxmpidev` + else + # We have one memory node shared by all CPU workers, one node per GPU + # we add nodes to use 2 memory disks + nodes=`expr $nmaxnumanodes + 2` + if test x$enable_cuda = xyes ; then + # we could have used nmaxcudadev + 1, but this would certainly give an + # odd number. + nodes=`expr $nodes + $nmaxcudadev` + fi + if test x$enable_hip = xyes ; then + # we could have used nmaxhipdev + 1, but this would certainly give an + # odd number. + nodes=`expr $nodes + $nmaxhipdev` + fi + if test x$enable_opencl = xyes ; then + # we could have used nmaxopencldev + 1, but this would certainly give an + # odd number. + nodes=`expr $nodes + $nmaxopencldev` + fi + if test x$enable_max_fpga = xyes ; then + # we could have used nmaxmaxfpgadev + 1, but this would certainly give an + # odd number. + nodes=`expr $nodes + $nmaxmaxfpgadev` + fi + + #nmaxmpidev = 0 if mpi master-slave is disabled + nodes=`expr $nodes + $nmaxmpidev` + + #nmaxtcpipdev = 0 if tcpip master-slave is disabled + nodes=`expr $nodes + $nmaxtcpipdev` + fi + + # set maxnodes to the next power of 2 greater than nodes + maxnodes=1 + while test "$maxnodes" -lt "$nodes" + do + maxnodes=`expr $maxnodes \* 2` + done +fi +if test x$maxnodes = x -o x$maxnodes = xyes +then + AC_MSG_ERROR([The --enable-maxnodes option needs to be given a number]) +fi +if test $maxnodes -gt 32 ; then + AC_MSG_WARN([Note: the wt_mask feature only supports 32 memory nodes]) +fi + +AC_MSG_CHECKING(maximum number of memory nodes) +AC_MSG_RESULT($maxnodes) +AC_DEFINE_UNQUOTED(STARPU_MAXNODES, [$maxnodes], + [maximum number of memory nodes]) + + +AC_MSG_CHECKING(whether allocation cache should be used) +AC_ARG_ENABLE(allocation-cache, [AS_HELP_STRING([--disable-allocation-cache], + [disable data allocation cache])], + enable_allocation_cache=$enableval, enable_allocation_cache=yes) +AC_MSG_RESULT($enable_allocation_cache) +if test x$enable_allocation_cache = xyes; then + AC_DEFINE(STARPU_USE_ALLOCATION_CACHE, [1], [enable data allocation cache]) +fi + +AC_ARG_WITH(perf-model-dir, [AS_HELP_STRING([--with-perf-model-dir=], [specify where performance models should be stored])], + [ + if test x$withval = xno; then + AC_MSG_ERROR(--without-perf-model-dir is not a valid option) + fi + + perf_model_dir="$withval" + have_explicit_perf_model_dir=yes + AC_DEFINE_UNQUOTED(STARPU_PERF_MODEL_DIR, ["$perf_model_dir"], [performance models location]) + ], [ + # by default, we put the performance models in + # $HOME/.starpu/sampling/ + have_explicit_perf_model_dir=no + perf_model_dir="\$STARPU_HOME/.starpu/sampling/" + ] + ) +AC_MSG_CHECKING(using explicit performance model location) +AC_MSG_RESULT($have_explicit_perf_model_dir) + +AC_MSG_CHECKING(performance models location) +AC_MSG_RESULT($perf_model_dir) + +# On many multicore CPUs, clock cycles are not synchronized +AC_CHECK_LIB([rt], [clock_gettime]) +AC_CHECK_FUNCS([clock_gettime]) + +# Compute the maximum number of workers (we round it to 16 for alignment +# purposes). +if test x$enable_simgrid != xyes; then + if test x$enable_cpu != xyes; then + maxcpus=0 + fi + if test x$enable_cuda != xyes; then + nmaxcudadev=0 + fi + + if test x$enable_max_fpga != xyes; then + nmaxmaxfpgadev=0 + fi + if test x$enable_opencl != xyes; then + nmaxopencldev=0 + fi + #By default, if we cannot build mpi master-slave nmaxmpidev is set to zero. + #But with the multiplication with maxcpus, we need to put it to one. + if test x$build_mpi_master_slave != xyes; then + nmaxmpidev=1 + fi + #By default, if we cannot build tcp/ip master-slave nmaxtcpipdev is set to zero. + #But with the multiplication with maxcpus, we need to put it to one. + if test x$build_tcpip_master_slave != xyes; then + nmaxtcpipdev=1 + fi +fi +if test $maxcpus = 0 +then + nmaxworkers=`expr 16 \* \( \( \( $nmaxmpidev \* 64 \) + $nmaxcudadev + $nmaxhipdev + $nmaxopencldev + $nmaxmaxfpgadev + 15 \) / 16 \) ` +elif test $nmaxmpidev = 0 +then + nmaxworkers=`expr 16 \* \( \( $maxcpus + $nmaxcudadev + $nmaxhipdev + $nmaxopencldev + $nmaxmaxfpgadev + 15 \) / 16 \) ` +else + nmaxworkers=`expr 16 \* \( \( \( $nmaxmpidev \* $maxcpus \) + $nmaxcudadev + $nmaxhipdev + $nmaxopencldev + $nmaxmaxfpgadev + 15 \) / 16 \) ` +fi +AC_MSG_CHECKING(Maximum number of workers) +AC_MSG_RESULT($nmaxworkers) +AC_DEFINE_UNQUOTED(STARPU_NMAXWORKERS, [$nmaxworkers], [Maximum number of workers]) +nmaxdevs=0 +if test $nmaxdevs -lt $nmaxcudadev; then + nmaxdevs=$nmaxcudadev +fi +if test $nmaxdevs -lt $nmaxhipdev; then + nmaxdevs=$nmaxhipdev +fi +if test $nmaxdevs -lt $nmaxopencldev; then + nmaxdevs=$nmaxopencldev +fi +if test $nmaxdevs -lt $nmaxmaxfpgadev; then + nmaxdevs=$nmaxmaxfpgadev +fi +if test $nmaxdevs -lt $nmaxmpidev; then + nmaxdevs=$nmaxmpidev +fi +if test $nmaxdevs -lt $nmaxtcpipdev; then + nmaxdevs=$nmaxtcpipdev +fi +AC_DEFINE_UNQUOTED(STARPU_NMAXDEVS, [$nmaxdevs], [Maximum number of device per device arch]) + +# Computes the maximum number of combined worker +nmaxcombinedworkers=$maxcpus +AC_MSG_CHECKING(Maximum number of workers combinations) +AC_MSG_RESULT($nmaxcombinedworkers) +AC_DEFINE_UNQUOTED(STARPU_NMAX_COMBINEDWORKERS, + [$nmaxcombinedworkers], [Maximum number of worker combinations]) + + + +# Computes the maximum number of implementations per arch +AC_MSG_CHECKING(maximum number of implementations) +AC_ARG_ENABLE(maximplementations, [AS_HELP_STRING([--enable-maximplementations=], + [maximum number of implementations])], + maximplementations=$enableval, maximplementations=4) +AC_MSG_RESULT($maximplementations) +AC_DEFINE_UNQUOTED(STARPU_MAXIMPLEMENTATIONS, [$maximplementations], + [maximum number of implementations]) +if test x$maximplementations = x -o x$maximplementations = xyes +then + AC_MSG_ERROR([The --enable-maximplementations option needs to be given a number]) +fi + +# Enable LevelDB support if requested and the lib is found +AC_ARG_ENABLE(leveldb, [AS_HELP_STRING([--enable-leveldb], + [Enable linking with LevelDB if available])], + enable_leveldb=$enableval, enable_leveldb=no) +if test x$enable_leveldb = xyes; then +AC_LANG_PUSH([C++]) +AC_CHECK_HEADERS([leveldb/db.h], [AC_DEFINE([STARPU_HAVE_LEVELDB], [1], [Define to 1 if you have the header file.])]) +STARPU_HAVE_LIBRARY(LEVELDB, [leveldb]) +AC_LANG_POP([C++]) +fi +AM_CONDITIONAL(STARPU_HAVE_LEVELDB, test "x$enable_leveldb" = "xyes" -a "x$ac_cv_lib_leveldb_main" = "xyes") + +# Defines the calibration heuristic for the history-based calibration of StarPU +AC_MSG_CHECKING(calibration heuristic of history-based StarPU calibrator) +AC_ARG_ENABLE(calibration-heuristic, [AS_HELP_STRING([--enable-calibration-heuristic=], + [Define the maximum authorized deviation of StarPU history-based calibrator.])], + calibration_heuristic=$enableval, calibration_heuristic=50) +AC_MSG_RESULT($calibration_heuristic) +AC_DEFINE_UNQUOTED(STARPU_HISTORYMAXERROR, [$calibration_heuristic], [calibration heuristic value]) + + +############################################################################### +# # +# MP Common settings # +# # +############################################################################### + +if test x$build_mpi_master_slave = xyes -o x$build_tcpip_master_slave = xyes; then + build_master_slave=yes +else + build_master_slave=no +fi + +AC_MSG_CHECKING(whether the master-slave mode should be enabled) +AC_MSG_RESULT($build_master_slave) +AM_CONDITIONAL([STARPU_USE_MP], test "x$build_master_slave" = "xyes") + +AC_ARG_ENABLE([export-dynamic], [AS_HELP_STRING([--disable-export-dynamic], + [Prevent the linker from adding all symbols to the dynamic symbol table])], [], []) + +if test x$build_master_slave = xyes; then + AC_DEFINE(STARPU_USE_MP, [1], [Message-passing SINKs support + is enabled]) + + if test x$enable_export_dynamic != xno ; then + STARPU_EXPORT_DYNAMIC="-rdynamic" + fi +fi + +AC_SUBST(STARPU_EXPORT_DYNAMIC) + +############################################################################### +# # +# Flags for C Compiler # +# # +############################################################################### + +IS_SUPPORTED_FLAG(-Wall) +IS_SUPPORTED_CFLAG(-Werror=implicit) +IS_SUPPORTED_CFLAG(-Werror=implicit-function-declaration) +if test x$enable_perf_debug = xyes; then + IS_SUPPORTED_FLAG(-no-pie) + IS_SUPPORTED_FLAG(-no-PIE) + IS_SUPPORTED_FLAG(-fno-pie) +fi + +IS_SUPPORTED_FLAG(-Wextra) +IS_SUPPORTED_FLAG(-Wunused) +IS_SUPPORTED_CFLAG(-Wundef) +IS_SUPPORTED_CXXFLAG(-Wundef) +IS_SUPPORTED_FLAG(-Wshadow) +IS_SUPPORTED_CFLAG(-Wpointer-arith) +IS_SUPPORTED_CXXFLAG(-Wpointer-arith) + +if test "x$STARPU_DEVEL" != x; then + AC_DEFINE(STARPU_DEVEL, [1], [enable developer warnings]) + IS_SUPPORTED_CFLAG(-Werror=pointer-arith) + IS_SUPPORTED_CXXFLAG(-Werror=pointer-arith) + IS_SUPPORTED_FLAG(-fno-common) +fi +AM_CONDITIONAL([STARPU_DEVEL],[test "x$STARPU_DEVEL" != x]) + +AC_SUBST(GLOBAL_AM_CFLAGS) +AC_SUBST(GLOBAL_AM_CXXFLAGS) +AC_SUBST(GLOBAL_AM_FFLAGS) +AC_SUBST(GLOBAL_AM_FCFLAGS) + +# Same value as Automake's, for use in other places. +pkglibdir="\${libdir}/$PACKAGE" +AC_SUBST([pkglibdir]) + +AC_ARG_WITH(check-flags, [AS_HELP_STRING([--with-check-flags], + [Specify flags for C and Fortran compilers])], + check_flags=$withval, check_flags="") +if test "x$check_flags" != "x" ; then + for xflag in $check_flags + do + IS_SUPPORTED_FLAG($xflag) + done +fi + +######################################################################## +# # +# Parallel worker support # +# # +######################################################################## + +default_enable_parallel_worker=yes +if test x$starpu_darwin = xyes ; then + default_enable_parallel_worker=no +fi +AC_ARG_ENABLE(parallel-worker, [AS_HELP_STRING([--enable-parallel-worker], [build the parallel worker support])], + enable_parallel_worker=$enableval, enable_parallel_worker=$default_enable_parallel_worker) + +AC_MSG_CHECKING(for parallel worker support) + +if test x$enable_parallel_worker = xyes; then + AC_DEFINE(STARPU_PARALLEL_WORKER, [1], [Define this to enable parallel worker support]) + AC_OPENMP +fi + +AM_CONDITIONAL([STARPU_PARALLEL_WORKER], [test "x$enable_parallel_worker" = "xyes"]) +AC_MSG_RESULT($enable_parallel_worker) + +############################################################################### +# # +# OpenMP LLVM runtime support # +# # +############################################################################### + +AC_ARG_ENABLE(openmp-llvm, [AS_HELP_STRING([--enable-openmp-llvm], + [build the OpenMP LLVM runtime support])], + enable_openmp_llvm=$enableval, enable_openmp_llvm=no) + +openmp_llvm_msg="" +if test x$starpu_windows = xyes ; then + enable_openmp_llvm=no + openmp_llvm_msg="disabled on windows" +fi +if test x$enable_simgrid = xyes ; then + enable_openmp_llvm=no + openmp_llvm_msg="incompatibility with Simgrid support" +fi +if test x$PROG_CLANG = x ; then + enable_openmp_llvm=no + openmp_llvm_msg="missing clang" +fi + +if test x$enable_openmp_llvm = xyes; then + AC_DEFINE(STARPU_OPENMP_LLVM, [1], [Define this to enable LLVM OpenMP runtime support]) + # Force activating the generic OpenMP runtime support + enable_openmp="yes" +fi + +AC_MSG_CHECKING(for LLVM OpenMP runtime support) +AM_CONDITIONAL([STARPU_OPENMP_LLVM], [test "x$enable_openmp_llvm" = "xyes"]) +AC_MSG_RESULT($enable_openmp_llvm $openmp_llvm_msg) + +############################################################################### +# # +# OpenMP runtime support # +# # +############################################################################### + +AC_ARG_ENABLE(openmp, [AS_HELP_STRING([--enable-openmp], + [build the OpenMP runtime support])], + enable_openmp=$enableval, enable_openmp=yes) + +AC_CHECK_HEADER([ucontext.h],[have_valid_ucontext=yes],[have_valid_ucontext=no]) + +openmp_msg="" +if test x$starpu_windows = xyes ; then + enable_openmp=no + openmp_msg="disabled on windows" +fi +if test x$enable_simgrid = xyes ; then + enable_openmp=no + openmp_msg="incompatibility with Simgrid support" +fi +if test x$have_valid_ucontext = xno ; then + enable_openmp=no + openmp_msg="ucontext.h unavailable" +fi + +if test x$enable_openmp = xyes; then + AC_DEFINE(STARPU_OPENMP, [1], [Define this to enable OpenMP runtime support]) +fi + +AC_MSG_CHECKING(for OpenMP runtime support) +AM_CONDITIONAL([STARPU_OPENMP], [test "x$enable_openmp" = "xyes"]) +AC_MSG_RESULT($enable_openmp $openmp_msg) + +AM_CONDITIONAL([STARPU_HAVE_OPENMP],[test x$enable_simgrid = xno -a -n "$OPENMP_CFLAGS" -a x$starpu_windows != xyes]) + +############################################################################### +# # +# SOCL interface # +# # +############################################################################### + +AC_ARG_ENABLE([socl], + [AS_HELP_STRING([--enable-socl], + [build the OpenCL interface (experimental)])], + [enable_socl="$enableval"], + [enable_socl="maybe"]) + +AC_MSG_CHECKING(for SOCL) + +# in case SOCL was explicitly required, but is not available, this is an error +if test "x$enable_socl" = "xyes" -a "$have_valid_opencl" = "no" ; then + AC_MSG_ERROR([SOCL cannot be enabled without OpenCL]) +fi + +# now we enable SOCL if and only if a proper setup is available +if test "x$enable_socl" = "xyes" -o "x$enable_socl" = "xmaybe" ; then + build_socl=$have_valid_opencl +else + build_socl=no +fi + +AC_MSG_RESULT($build_socl) +AM_CONDITIONAL([STARPU_BUILD_SOCL], [test "x$build_socl" = "xyes"]) +AM_CONDITIONAL([STARPU_USE_SOCL], [test "x$build_socl" = "xyes"]) + +if test "$build_socl" = "yes" ; then + AC_CHECK_FUNCS([clGetExtensionFunctionAddressForPlatform]) + if test -n "$SOCL_OCL_LIB_OPENCL" -a -f "$SOCL_OCL_LIB_OPENCL" ; then + run_socl_check=yes + SOCL_OCL_LIB_OPENCL_DIR=$(dirname $SOCL_OCL_LIB_OPENCL) + AC_SUBST(SOCL_OCL_LIB_OPENCL_DIR) + else + run_socl_check=no + fi +else + run_socl_check=no +fi +############################################################################### +# # +# Debugging # +# # +############################################################################### + +AC_PATH_PROG([GDB], [gdb], [not-found]) +if test "x$GDB" != "xnot-found"; then + AC_DEFINE_UNQUOTED([STARPU_GDB_PATH], ["$GDB"], + [Path to the GNU debugger.]) +fi + +############################################################################### +# # +# Examples # +# # +############################################################################### + +AC_ARG_ENABLE(build-tests, [AS_HELP_STRING([--disable-build-tests], + [disable building of tests])], + enable_build_tests=$enableval, enable_build_tests=yes) +# check stuff for tests (todo) +AM_CONDITIONAL(STARPU_BUILD_TESTS, [test x$enable_build_tests != xno]) +AC_ARG_ENABLE(build-examples, [AS_HELP_STRING([--disable-build-examples], + [disable building of examples])], + enable_build_examples=$enableval, enable_build_examples=yes) +# check stuff for examples (todo) +AM_CONDITIONAL(STARPU_BUILD_EXAMPLES, [test x$enable_build_examples != xno]) +AC_ARG_ENABLE(opengl-render, [AS_HELP_STRING([--enable-opengl-render], + [enable OpenGL rendering of some examples])], + enable_opengl_render=$enableval, enable_opengl_render=no) + +if test x$enable_opengl_render = xyes; then + STARPU_CHECK_LIB(OPENGL_RENDER, glut, glutInit,,AC_MSG_ERROR([cannot find glut])) + STARPU_CHECK_LIB(OPENGL_RENDER, GL, glXCreateContext,,AC_MSG_ERROR([cannot find GL])) + STARPU_CHECK_LIB(OPENGL_RENDER, GLU, gluLookAt,,AC_MSG_ERROR([cannot find GLU])) + + AC_DEFINE(STARPU_OPENGL_RENDER, [1], [enable OpenGL rendering of some examples]) +fi + +AC_MSG_CHECKING(whether OpenGL rendering is enabled) +AC_SUBST(STARPU_OPENGL_RENDER, $enable_opengl_render) +AC_MSG_RESULT($enable_opengl_render) +AM_CONDITIONAL([STARPU_HAVE_OPENGL], [test "x$enable_opengl_render" = xyes]) + +AC_PATH_XTRA +if test "x$no_x" != "xyes"; then + AC_DEFINE(STARPU_HAVE_X11, [1], [enable X11]) +fi +AM_CONDITIONAL([STARPU_HAVE_X11], [test "x$no_x" != "xyes"]) + +# In case there are BLAS kernels that are used by the example applications +# we may specify which library to use. Note that this is not used for StarPU +# itself. + +blas_lib=maybe +AC_ARG_ENABLE(blas-lib, + [ --enable-blas-lib[=blaslibname]: + none [default]: no BLAS lib is used + atlas: use ATLAS library + goto: use GotoBLAS library + mkl: use MKL library (you may need to set specific CFLAGS and LDFLAGS with --with-mkl-cflags and --with-mkl-ldflags)], + [ + if test "x$enableval" = "xatlas" ; then + blas_lib=atlas + elif test "x$enableval" = "xgoto" ; then + blas_lib=goto + elif test "x$enableval" = "xopenblas" ; then + blas_lib=openblas + elif test "x$enableval" = "xnone" ; then + blas_lib=none + elif test "x$enableval" = "xmkl" ; then + blas_lib=mkl + elif test "x$enableval" = "xarmpl" ; then + blas_lib=armpl + elif test x$enableval = xno; then + blas_lib=none + else + echo + echo "Error!" + echo "Unknown BLAS library" + exit -1 + fi + ]) + +if test x$blas_lib = xmaybe -o x$blas_lib = xgoto; then + AC_ARG_WITH(goto-dir, [AS_HELP_STRING([--with-goto-dir=], [specify GotoBLAS lib location])], + [ + blas_lib=goto + gotodir=$withval + AC_SUBST(GOTODIR, $gotodir) + + CPPFLAGS="${CPPFLAGS} -I$gotodir/ " + LDFLAGS="${LDFLAGS} -L$gotodir/ " + ] + ) + + if test x$blas_lib = xgoto; then + STARPU_CHECK_LIB(BLAS, gfortran, main,,) + STARPU_CHECK_LIB(BLAS, ifcore, main,,) + # Perhaps that GotoBLAS2 is available instead (so that we have libgotoblas2.{so,a}) + STARPU_CHECK_LIB(BLAS, goto2, sgemm_,, [havegoto2=no], [$STARPU_BLAS_LDFLAGS]) + if test x$havegoto2 = xno; then + STARPU_CHECK_LIB(BLAS, goto, sgemm_,,AC_MSG_ERROR([cannot find goto lib]), [$STARPU_BLAS_LDFLAGS]) + fi + AC_DEFINE(STARPU_GOTO, [1], [use STARPU_GOTO library]) + fi +fi + +if test x$blas_lib = xmaybe -o x$blas_lib = xatlas; then + AC_ARG_WITH(atlas-dir, [AS_HELP_STRING([--with-atlas-dir=], [specify ATLAS lib location])], + [ + AC_MSG_CHECKING(STARPU_ATLAS location) + blas_lib=atlas + atlasdir=$withval + AC_MSG_RESULT($atlasdir) + AC_SUBST(ATLASDIR, $atlasdir) + + CPPFLAGS="${CPPFLAGS} -I$atlasdir/include/ " + LDFLAGS="${LDFLAGS} -L$atlasdir/lib/ " + ] + ) + + if test x$blas_lib = xatlas; then + # test whether STARPU_ATLAS is actually available + AC_CHECK_HEADER([cblas.h],,AC_MSG_ERROR([cannot find atlas headers])) + STARPU_CHECK_LIB(BLAS, atlas, ATL_sgemm,,AC_MSG_ERROR([cannot find atlas lib]),) + STARPU_CHECK_LIB(BLAS, cblas, cblas_sgemm,,AC_MSG_ERROR([cannot find atlas lib]),[-latlas]) + STARPU_CHECK_LIB(BLAS, f77blas, sgemm_,,AC_MSG_ERROR([cannot find f77blas lib]),) + AC_DEFINE(STARPU_ATLAS, [1], [use STARPU_ATLAS library]) + fi +fi + +if test x$blas_lib = xmaybe -o x$blas_lib = xopenblas; then + PKG_CHECK_MODULES([OPENBLAS], [openblas], + [PKG_CHECK_MODULES([BLAS_OPENBLAS], [blas-openblas], + [AC_DEFINE([STARPU_OPENBLAS], [1], [Define to 1 if you use the openblas library.]) + AC_SUBST([STARPU_OPENBLAS], [1]) + CFLAGS="${CFLAGS} ${OPENBLAS_CFLAGS} ${BLAS_OPENBLAS_CFLAGS} " + LIBS="${LIBS} ${OPENBLAS_LIBS} ${BLAS_OPENBLAS_LIBS} " + blas_lib=openblas + ], + [ if test x$blas_lib = xopenblas; then + STARPU_CHECK_LIB(OPENBLAS, blas-openblas, cblas_sgemm,,AC_MSG_ERROR([cannot find blas-openblas lib]),[-lblas-openblas]) + AC_DEFINE([STARPU_OPENBLAS], [1], [Define to 1 if you use the openblas library.]) + AC_SUBST([STARPU_OPENBLAS], [1]) + fi + ]) + ], + [ if test x$blas_lib = xopenblas; then + STARPU_CHECK_LIB(OPENBLAS, openblas, cblas_sgemm,,AC_MSG_ERROR([cannot find openblas lib]),[-lopenblas]) + AC_DEFINE([STARPU_OPENBLAS], [1], [Define to 1 if you use the openblas library.]) + AC_SUBST([STARPU_OPENBLAS], [1]) + fi + ] ) +fi + +if test x$blas_lib = xmaybe -o x$blas_lib = xmkl; then + # Should we use MKL ? + if test -n "$MKLROOT" ; then + CPPFLAGS="${CPPFLAGS} -I$MKLROOT/include" + case $host_vendor in + *1om) mkl_plat=mic ;; + *) mkl_plat=intel64 ;; + esac + SAVED_LIBS=$LIBS + STARPU_BLAS_LDFLAGS="-L$MKLROOT/lib/$mkl_plat -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lm -lpthread -ldl" + LIBS="$LIBS $STARPU_BLAS_LDFLAGS" + AC_LINK_IFELSE( + [AC_LANG_PROGRAM([[ + #include + ]], [[ ]])], + [ blas_lib=mkl ], + [ STARPU_BLAS_LDFLAGS="" ], + ) + LIBS=$SAVED_LIBS + fi + AC_ARG_WITH(mkl-cflags, [AS_HELP_STRING([--with-mkl-cflags], [specify MKL compilation flags])], + [ + CPPFLAGS="${CPPFLAGS} $withval" + blas_lib=mkl + ]) + + AC_ARG_WITH(mkl-ldflags, [AS_HELP_STRING([--with-mkl-ldflags], [specify MKL linking flags])], + [ + STARPU_BLAS_LDFLAGS="$withval" + blas_lib=mkl + ]) + if test x$blas_lib = xmkl; then + AC_DEFINE(STARPU_MKL, [1], [use MKL library]) + fi +fi + +if test x$blas_lib = xmaybe -o x$blas_lib = xarmpl; then + # Should we use ARMPL ? + if test -n "$ARMPL_DIR" ; then + CPPFLAGS="${CPPFLAGS} -I$ARMPL_INCLUDES" + SAVED_LIBS=$LIBS + STARPU_BLAS_LDFLAGS="-L$ARMPL_LIBRARIES -larmpl_lp64 -lgfortran -lm" + LIBS="$LIBS $STARPU_BLAS_LDFLAGS" + AC_LINK_IFELSE( + [AC_LANG_PROGRAM([[ + #include + ]], [[ ]])], + [ blas_lib=armpl ], + [ STARPU_BLAS_LDFLAGS="" ], + ) + LIBS=$SAVED_LIBS + fi + AC_ARG_WITH(armpl-cflags, [AS_HELP_STRING([--with-armpl-cflags], [specify ARMPL compilation flags])], + [ + CPPFLAGS="${CPPFLAGS} $withval" + blas_lib=armpl + ]) + + AC_ARG_WITH(armpl-ldflags, [AS_HELP_STRING([--with-armpl-ldflags], [specify ARMPL linking flags])], + [ + STARPU_BLAS_LDFLAGS="$withval" + blas_lib=armpl + ]) + if test x$blas_lib = xarmpl; then + AC_DEFINE(STARPU_ARMPL, [1], [use ARMPL library]) + fi +fi + +if test x$blas_lib = xmaybe; then + #perhaps it is possible to use some BLAS lib from the system + use_system_blas=no + STARPU_SEARCH_LIBS(BLAS,[sgemm_],[blas],use_system_blas=yes,,) + if test x$use_system_blas = xyes; then + AC_DEFINE(STARPU_SYSTEM_BLAS, [1], [use refblas library]) + blas_lib=system + elif test x"$BLAS_LIBS" != x; then + AC_DEFINE(STARPU_SYSTEM_BLAS, [1], [use user defined library]) + STARPU_BLAS_LDFLAGS="$BLAS_LIBS" + blas_lib=system + AC_ARG_VAR([BLAS_LIBS], [linker flags for blas]) + else + blas_lib=none + fi +fi + +if test x$blas_lib = xsystem; then + AC_CHECK_HEADER([cblas.h], [have_cblas_h=yes], [have_cblas_h=no]) +fi +AM_CONDITIONAL(STARPU_HAVE_CBLAS_H, test x$have_cblas_h = xyes) +if test x$have_cblas_h = xyes; then + AC_DEFINE(STARPU_HAVE_CBLAS_H, [1], [The blas library has blas.h]) +fi +if test x$blas_lib != xnone; then + AC_DEFINE(STARPU_HAVE_BLAS, [1], [The blas library is available]) + SAVED_LIBS="$LIBS" + LIBS="$LIBS -lblas" + AC_CHECK_FUNCS([cblas_sgemv]) + LIBS="$SAVED_LIBS" + STARPU_SEARCH_LIBS([LIBLAPACK],[dgels_],[lapack],[enable_liblapack=yes],[enable_liblapack=no]) +fi +AM_CONDITIONAL(STARPU_HAVE_LIBLAPACK,test x$enable_liblapack = xyes) +AM_CONDITIONAL(STARPU_HAVE_CBLAS_SGEMV, test x$HAVE_CBLAS_SGEMV = x1) + +AM_CONDITIONAL(STARPU_ATLAS_BLAS_LIB, test x$blas_lib = xatlas) +AM_CONDITIONAL(STARPU_GOTO_BLAS_LIB, test x$blas_lib = xgoto) +AM_CONDITIONAL(STARPU_MKL_BLAS_LIB, test x$blas_lib = xmkl) +AM_CONDITIONAL(STARPU_SYSTEM_BLAS_LIB, test x$blas_lib = xsystem) +AM_CONDITIONAL(STARPU_NO_BLAS_LIB, test x$blas_lib = xnone -a x$enable_simgrid = xno) +AC_SUBST(STARPU_BLAS_LDFLAGS) + +AC_MSG_CHECKING(which BLAS lib should be used) +AC_MSG_RESULT($blas_lib) +AC_SUBST(BLAS_LIB,$blas_lib) + +############################################################################### +# # +# Multiple linear regression # +# # +############################################################################### +AC_ARG_ENABLE(mlr, [AS_HELP_STRING([--enable-mlr], + [Enable multiple linear regression models])], + enable_mlr=$enableval, enable_mlr=no) +AC_ARG_ENABLE(mlr-system-blas, [AS_HELP_STRING([--enable-mlr-system-blas], + [Make the multiple linear regression models use the system BLAS instead of min-dgels])], + enable_mlr_blas=$enableval, enable_mlr_blas=no) + +AC_MSG_CHECKING(whether multiple linear regression models are disabled) +if test x$enable_mlr = xyes -a "$starpu_windows" != "yes" ; then + AC_MSG_RESULT(no) + install_min_dgels=no + support_mlr=yes + STARPU_SEARCH_LIBS(LAPACK,[dgels_],[lapack],use_system_lapack=yes,,) + if test x$blas_lib = xnone ; then + use_system_lapack=no + fi + if test x$enable_mlr_blas = xyes -a x$use_system_lapack = xyes; then + AC_DEFINE(STARPU_MLR_MODEL, [1], [use reflapack library]) + LDFLAGS="-llapack $LDFLAGS" + else + if test x$enable_mlr_blas = xyes -a x$blas_lib = xmkl; then + AC_DEFINE(STARPU_MLR_MODEL, [1], [use mkl library]) + else + AC_MSG_CHECKING(whether min-dgels is linked) + if test x"$DGELS_LIBS" != x; then + AC_MSG_RESULT(yes) + AC_DEFINE(STARPU_MLR_MODEL, [1], [use user defined library]) + AC_ARG_VAR([DGELS_LIBS], [linker flags for lapack dgels]) + else + AC_MSG_RESULT(no) + AC_MSG_CHECKING(min-dgels source) + if test "${cross_compiling}" != "no" ; then + # Cross-compiling is not supported by min-dgels + AC_MSG_RESULT(no) + install_min_dgels=no + support_mlr=no + else + AC_MSG_RESULT(yes) + DGELS_LIBS="-Wl,--start-group $STARPU_BUILD_DIR/min-dgels/build/minlibblas.a $STARPU_BUILD_DIR/min-dgels/build/minlibdgels.a $STARPU_BUILD_DIR/min-dgels/build/minlibf2c.a -Wl,--end-group" + AC_DEFINE(STARPU_MLR_MODEL, [1], [use user defined library]) + AC_DEFINE(STARPU_BUILT_IN_MIN_DGELS, [1], [use built-in min_dgels]) + AC_ARG_VAR([DGELS_LIBS], [linker flags for lapack dgels]) + install_min_dgels=yes + fi + fi + fi + fi +else + AC_MSG_RESULT(yes) + install_min_dgels=no + support_mlr=no +fi +AM_CONDITIONAL(STARPU_USE_MIN_DGELS, test x$install_min_dgels = xyes) + +########################################## +# FFT # +########################################## + +have_fftw=no +have_fftwf=no +have_fftwl=no +fft_support=no + +AC_ARG_ENABLE(starpufft, [AS_HELP_STRING([--disable-starpufft], + [Disable build of StarPU-FFT])], + enable_starpufft=$enableval,enable_starpufft=yes) + +PKG_CHECK_MODULES([FFTW], [fftw3], [ + AC_DEFINE([STARPU_HAVE_FFTW], [1], [Define to 1 if you have the libfftw3 library.]) + AC_SUBST([STARPU_HAVE_FFTW], [1]) + have_fftw=yes +], [:]) +AM_CONDITIONAL(STARPU_HAVE_FFTW, [test x$have_fftw = xyes]) + +PKG_CHECK_MODULES([FFTWF], [fftw3f], [ + AC_DEFINE([STARPU_HAVE_FFTWF], [1], [Define to 1 if you have the libfftw3f library.]) + AC_SUBST([STARPU_HAVE_FFTWF], [1]) + have_fftwf=yes +], [:]) +AM_CONDITIONAL(STARPU_HAVE_FFTWF, [test x$have_fftwf = xyes]) + +PKG_CHECK_MODULES([FFTWL], [fftw3l], [ + AC_DEFINE([STARPU_HAVE_FFTWL], [1], [Define to 1 if you have the libfftw3l library.]) + AC_SUBST([HAVE_FFTWFL], [1]) + have_fftwl=yes +], [:]) +AM_CONDITIONAL(STARPU_HAVE_FFTWL, [test x$have_fftwl = xyes]) + +if test x$enable_starpufft = xyes -a \( \( x$enable_cpu = xyes -a x$have_fftw = xyes -a x$have_fftwf = xyes \) -o x$have_cufftdoublecomplex = xyes \); then + fft_support=yes +fi +AM_CONDITIONAL(STARPU_BUILD_STARPUFFT, [test x$fft_support = xyes]) + +AC_ARG_ENABLE(starpufft-examples, [AS_HELP_STRING([--enable-starpufft-examples], + [enable build of StarPU FFT examples])], + enable_starpufft_examples=$enableval, enable_starpufft_examples=no) +AM_CONDITIONAL(STARPU_BUILD_STARPUFFT_EXAMPLES, [test x$enable_starpufft_examples = xyes]) + +########################################## +# hwloc # +########################################## + +have_valid_hwloc=no +SAVED_LIBS="${LIBS}" +SAVED_CPPFLAGS="${CPPFLAGS}" +SAVED_PKG_CONFIG_PATH="$PKG_CONFIG_PATH" +AC_ARG_WITH([hwloc], + [AS_HELP_STRING([--without-hwloc], [Disable hwloc (enabled by default)])], + [ + if test x$withval != xno; then + if test "$withval" = "yes" ; then + use_hwloc=yes + else + # use specified path + if test ! -d "$withval" ; then + AC_MSG_ERROR("Directory specified for hwloc <$withval> does not exist") + fi + if test -d "$withval/lib64/pkgconfig" ; then + export PKG_CONFIG_PATH=$withval/lib64/pkgconfig:$PKG_CONFIG_PATH + else + if test -d "$withval/lib/pkgconfig" ; then + export PKG_CONFIG_PATH=$withval/lib/pkgconfig:$PKG_CONFIG_PATH + else + AC_MSG_ERROR("Hwloc directory <$withval> does not have a subdirectory lib/pkgconfig or lib64/pkgconfig") + fi + fi + use_hwloc=yes + fi + else + use_hwloc=no + fi + ], + [ + use_hwloc=maybe + ]) +AS_IF([test "$use_hwloc" != "no"], + [PKG_CHECK_MODULES([HWLOC],[hwloc], [have_valid_hwloc=yes], [have_valid_hwloc=no])] + ) +AM_CONDITIONAL(STARPU_HAVE_HWLOC, test "x$have_valid_hwloc" = "xyes") +# in case hwloc was explicitly required, but is not available, this is an error +AS_IF([test "$use_hwloc" = "yes" -a "$have_valid_hwloc" = "no"], + [AC_MSG_ERROR([cannot find hwloc or pkg-config])] + ) +# in case hwloc is not available but was not explicitly disabled, this is an error +AS_IF([test "$have_valid_hwloc" = "no" -a "$use_hwloc" != "no"], + [AC_MSG_ERROR([libhwloc or pkg-config was not found on your system. If the target machine is hyperthreaded the performance may be impacted a lot. It is strongly recommended to install libhwloc and pkg-config. However, if you really want to use StarPU without enabling libhwloc, please restart configure by specifying the option '--without-hwloc'.])] + ) + +LIBS="${HWLOC_LIBS} ${SAVED_LIBS}" +CPPFLAGS="${HWLOC_CFLAGS} ${SAVED_CPPFLAGS}" + +AS_IF([test "$have_valid_hwloc" = "yes"], + [AC_DEFINE([STARPU_HAVE_HWLOC], [1], [Define to 1 if you have the hwloc library.]) + HWLOC_REQUIRES=hwloc + AC_SUBST([STARPU_HAVE_HWLOC], [1]) + AC_CHECK_DECLS([hwloc_cuda_get_device_osdev_by_index], [], [], [[#include ]]) + AC_CHECK_DECLS([hwloc_hip_get_device_osdev_by_index], [], [], [[#include ]]) + AC_CHECK_DECLS([hwloc_distances_obj_pair_values], [], [], [[#include ]]) + ]) + +AC_CHECK_FUNCS([hwloc_topology_dup]) +AC_CHECK_FUNCS([hwloc_topology_set_components]) +AC_CHECK_FUNCS([hwloc_cpukinds_get_nr]) +AC_CHECK_FUNCS([hwloc_get_area_memlocation]) +AM_CONDITIONAL(STARPU_HWLOC_HAVE_TOPOLOGY_DUP, test $ac_cv_func_hwloc_topology_dup = yes) + +LIBS="${SAVED_LIBS}" +CPPFLAGS="${SAVED_CPPFLAGS}" +export PKG_CONFIG_PATH=$SAVED_PKG_CONFIG_PATH + +AC_MSG_CHECKING(whether hwloc should be used) +AC_MSG_RESULT($have_valid_hwloc) +AC_SUBST(HWLOC_REQUIRES) + +# is the header file f77.h available ? +AC_CHECK_HEADER([f77.h], [have_f77_h=yes], [have_f77_h=no]) +AC_SUBST(STARPU_HAVE_F77_H, $have_f77_h) +AM_CONDITIONAL(STARPU_HAVE_F77_H, test x$have_f77_h = xyes) +if test x$have_f77_h = xyes; then + AC_DEFINE([STARPU_HAVE_F77_H], [1], [Define to 1 if you have the header file.]) +fi + +AC_ARG_WITH(icc, [AS_HELP_STRING([--with-icc=], [Name or path of the icc compiler])], icc_path="$withval",icc_path="") +AC_ARG_WITH(icc-args, [AS_HELP_STRING([--with-icc-args[=]], [Arguments for icc])], [icc_args=$withval]) +AC_SUBST(ICC_ARGS,$icc_args) +AC_ARG_ENABLE(icc, [AS_HELP_STRING([--enable-icc], + [Enable the compilation of specific ICC examples])], + enable_icc=$enableval, enable_icc=yes) +ICC="" +if test "$enable_icc" = "yes" ; then + if test "$icc_path" != "" ; then + ICC="$icc_path" + else + # Check if icc is available + AC_PATH_PROG([ICC], [icc]) + fi +fi +if test ! -x "$ICC"; then + AC_MSG_RESULT(The ICC compiler '$ICC' does not have the execute permission) + enable_icc=no + ICC="" +fi + +# If cuda and icc are both available, check they are compatible +if test "$enable_cuda" = "yes" -a "$ICC" != ""; then + AC_MSG_CHECKING(whether CUDA and ICC are compatible) + OLD_CC="$CC" + CC="$ICC" + OLD_CFLAGS="$CFLAGS" + CFLAGS="-I$PWD/include -I$srcdir/include" + AC_COMPILE_IFELSE( + [AC_LANG_PROGRAM( + [[#include + #include ]], + [[]] + )], + AC_MSG_RESULT(yes), + [ICC="" + AC_MSG_RESULT(no)] + ) + CC="$OLD_CC" + CFLAGS="$OLD_CFLAGS" +fi + +# Disable ICC on windows +if test "x$ICC" != "x" -a "$starpu_windows" = "yes" ; then + ICC="" +fi + +if test "x$ICC" != "x"; then + AC_DEFINE(STARPU_HAVE_ICC, [1], [Define this if icc is available]) +fi +AM_CONDITIONAL([STARPU_HAVE_ICC], [test "x$ICC" != "x"]) + +# Do not generate manpages for the tools if we do not have help2man +AC_CHECK_PROGS([HELP2MAN], [help2man]) +# Disable on windows +if test "$starpu_windows" = "yes" ; then + HELP2MAN="" +fi +AM_CONDITIONAL([STARPU_HAVE_HELP2MAN], [test "x$HELP2MAN" != "x"]) + +AC_CHECK_MEMBER([struct cudaDeviceProp.pciDomainID], + AC_DEFINE([STARPU_HAVE_DOMAINID],[1],[Define to 1 if CUDA device properties include DomainID]), + , [[#include ]]) + +AC_CHECK_MEMBER([struct cudaDeviceProp.pciBusID], + AC_DEFINE([STARPU_HAVE_BUSID],[1],[Define to 1 if CUDA device properties include BusID]), + , [[#include ]]) + +dnl Set this condition when Automake 1.11 or later is being used. +dnl Automake 1.11 introduced `silent-rules', hence the check. +m4_ifdef([AM_SILENT_RULES], + AM_CONDITIONAL([STARPU_HAVE_AM111], [true]), + AM_CONDITIONAL([STARPU_HAVE_AM111], [false])) + +########################################## +# Resource Manager # +########################################## + +starpurm_support=no +starpurm_dlb_support=no + +AC_ARG_ENABLE(starpurm, [AS_HELP_STRING([--enable-starpurm], [enable resource management support])], + enable_starpurm=$enableval, enable_starpurm=no) +if test "x$enable_starpurm" != xno +then + starpurm_support=yes + + AC_MSG_CHECKING(whether resource management debug messages should be displayed) + AC_ARG_ENABLE(starpurm-verbose, [AS_HELP_STRING([--enable-starpurm-verbose], + [display resource management verbose debug messages])], + enable_starpurm_verbose=$enableval, enable_starpurm_verbose=no) + AC_MSG_RESULT($enable_starpurm_verbose) + if test x$enable_starpurm_verbose = xyes; then + AC_DEFINE(STARPURM_VERBOSE, [1], [display resource management verbose debug messages]) + fi + + # DLB + DLB_CFLAGS="" + DLB_LIBS="" + AC_ARG_ENABLE(dlb, [AS_HELP_STRING([--enable-dlb], [enable DLB support])], + enable_dlb=$enableval, enable_dlb=no) + if test "x$enable_dlb" != xno + then + AC_ARG_WITH(dlb-include-dir, + [AS_HELP_STRING([--with-dlb-include-dir=], + [specify where DLB headers are installed])], + [dlb_inc_dirs="$withval"], [dlb_inc_dirs=""]) + + dlb_inc_dirs="${dlb_inc_dirs} /usr/include/dlb" + + dlb_incdir_found=no + for dlb_incdir in $dlb_inc_dirs + do + if test -n "$dlb_incdir" + then + SAVED_CPPFLAGS="${CPPFLAGS}" + CPPFLAGS=-I${dlb_incdir} + AC_CHECK_HEADERS([dlb.h]) + if test "$ac_cv_header_dlb_h" = "yes" + then + CPPFLAGS="-I$dlb_incdir ${SAVED_CPPFLAGS}" + DLB_CFLAGS="-I${dlb_incdir}" + dlb_incdir_found=yes + break + else + CPPFLAGS=${SAVED_CPPFLAGS} + fi + unset ac_cv_header_dlb_h + fi + done + + AC_ARG_WITH(dlb-lib-dir, + [AS_HELP_STRING([--with-dlb-lib-dir=], + [specify where DLB libraries are installed])], + [dlb_lib_dirs="$withval"], [dlb_lib_dirs=""]) + + dlb_lib_dirs="${dlb_lib_dirs} /usr/lib/dlb" + + dlb_libdir_found=no + for dlb_libdir in $dlb_lib_dirs + do + if test -n "$dlb_libdir" + then + SAVED_LDFLAGS="${LDFLAGS}" + LDFLAGS=-L${dlb_libdir} + AC_CHECK_LIB(dlb, [DLB_Init]) + if test "$ac_cv_lib_dlb_DLB_Init" = "yes" + then + LDFLAGS="-L${dlb_libdir} ${SAVED_LDFLAGS} ${STARPU_DLB_LDFLAGS}" + DLB_LIBS="-L${dlb_libdir} -ldlb" + dlb_libdir_found=yes + break + else + LDFLAGS=${SAVED_LDFLAGS} + fi + unset ac_cv_lib_dlb_DLB_Init + fi + done + + SAVED_CPPFLAGS="${CPPFLAGS}" + SAVED_CFLAGS="${CFLAGS}" + SAVED_LDFLAGS="${LDFLAGS}" + CPPFLAGS="$HWLOC_CPPFLAGS -D_GNU_SOURCE $CPPFLAGS" + CFLAGS="$HWLOC_CFLAGS $CFLAGS" + LIBS="$HWLOC_LIBS $LIBS" + # check whether libhwloc has a dedicated glibc-sched.h include for conversion with glibc cpusets + AC_CHECK_HEADERS([hwloc/glibc-sched.h]) + CPPFLAGS="$SAVED_CPPFLAGS" + CFLAGS="$SAVED_CFLAGS" + LIBS="$SAVED_LIBS" + + SAVED_CPPFLAGS="${CPPFLAGS}" + SAVED_CFLAGS="${CFLAGS}" + SAVED_LDFLAGS="${LDFLAGS}" + CPPFLAGS="$STARPU_CPPFLAGS $CPPFLAGS" + CFLAGS="$STARPU_CFLAGS $CFLAGS" + LIBS="$STARPU_LIBS $LIBS" + # check if StarPU implements starpu_worker_set_going_to_sleep_callback() + if test x$enable_worker_cb = xyes ; then + AC_DEFINE([STARPURM_STARPU_HAVE_WORKER_CALLBACKS], [1], [Define to 1 if StarPU has support for worker callbacks.]) + fi + + #AC_CHECK_FUNC([starpu_worker_set_going_to_sleep_callback],AC_DEFINE([STARPURM_STARPU_HAVE_WORKER_CALLBACKS], [1], [Define to 1 if StarPU has support for worker callbacks.])) + CPPFLAGS="$SAVED_CPPFLAGS" + CFLAGS="$SAVED_CFLAGS" + LIBS="$SAVED_LIBS" + + if test "x$dlb_incdir_found" != "xyes" -o "x$dlb_libdir_found" != "xyes" + then + enable_dlb=no + fi + fi + + AC_MSG_CHECKING(whether DLB support should be enabled) + AC_MSG_RESULT($enable_dlb) + if test "x$enable_dlb" != "xno" + then + AC_DEFINE([STARPURM_HAVE_DLB], [1], [Define to 1 if dlb support is enabled.]) + starpurm_dlb_support=yes + + AC_MSG_CHECKING(whether DLB resource management debug messages should be displayed) + AC_ARG_ENABLE(starpurm-dlb-verbose, [AS_HELP_STRING([--enable-starpurm-dlb-verbose], + [display resource management verbose debug messages])], + enable_starpurm_dlb_verbose=$enableval, enable_starpurm_dlb_verbose=no) + AC_MSG_RESULT($enable_starpurm_dlb_verbose) + if test x$enable_starpurm_dlb_verbose = xyes; then + AC_DEFINE(STARPURM_DLB_VERBOSE, [1], [display DLB resource management verbose debug messages]) + fi + + AX_DLB_CALLBACK_ARG() + fi + AC_SUBST(DLB_CFLAGS) + AC_SUBST(DLB_LIBS) +fi +AM_CONDITIONAL(STARPURM_HAVE_DLB, test x$starpurm_dlb_support = "xyes") +AM_CONDITIONAL(STARPU_BUILD_STARPURM, [test x$starpurm_support = xyes]) + +AC_ARG_ENABLE(starpurm-examples, [AS_HELP_STRING([--enable-starpurm-examples], + [enable build of StarPU Resource Manager examples])], + enable_starpurm_examples=$enableval, enable_starpurm_examples=no) +AM_CONDITIONAL(STARPU_BUILD_STARPURM_EXAMPLES, [test x$enable_starpurm_examples = xyes]) + +##################################### +# StarPUPy # +##################################### + +starpupy_support=no + +if test "x$enable_starpupy" != xno +then + AC_CHECK_PROGS([PYTHON], python3) + if test "$ac_cv_prog_PYTHON" = "" + then + if test "x$enable_starpupy" = xyes ; then + AC_MSG_ERROR([python3 missing, cannot build StarPU python interface]) + else + AC_MSG_WARN([python3 missing, cannot build StarPU python interface]) + enable_starpupy=no + fi + fi +fi +if test "x$enable_starpupy" != xno +then + AC_SUBST(PYTHON) + AC_MSG_CHECKING(for python3 version) + PYTHON_VERSION=$(echo "import sys ; print(str(sys.version_info.major)+\".\"+str(sys.version_info.minor))" | $PYTHON) + AC_MSG_RESULT($PYTHON_VERSION) + AC_SUBST(PYTHON_VERSION) + PYTHON_INCLUDE_DIRS="`$PYTHON -c "from sysconfig import get_paths as gp; print(gp()@<:@'include'@:>@)"`" + SAVED_CPPFLAGS="${CPPFLAGS}" + CPPFLAGS="$CPPFLAGS -I$PYTHON_INCLUDE_DIRS" + AC_CHECK_HEADERS([Python.h],[have_python_h=yes],[have_python_h=no]) + if test "$have_python_h" = "no" ; then + if test "x$enable_starpupy" = xyes ; then + AC_MSG_ERROR([Python.h missing, cannot build StarPU python interface (consider installing python-dev)]) + else + AC_MSG_WARN([Python.h missing, cannot build StarPU python interface (consider installing python-dev)]) + enable_starpupy=no + fi + fi +fi +if test "x$enable_starpupy" != xno +then + AC_CHECK_LIB([python$PYTHON_VERSION], [PyErr_Print], [have_python_lib=yes], [have_python_lib=no]) + if test "$have_python_lib" = "no" ; then + if test "x$enable_starpupy" = xyes ; then + AC_MSG_ERROR([Python library missing, cannot build StarPU python interface (consider installing python-dev)]) + else + AC_MSG_WARN([Python library missing, cannot build StarPU python interface (consider installing python-dev)]) + enable_starpupy=no + fi + fi +fi +if test "x$enable_starpupy" != xno +then + AC_MSG_CHECKING(for python3 setuptools) + if $PYTHON -c "import setuptools" ; then + AC_MSG_RESULT(yes) + else + AC_MSG_RESULT(no) + if test "x$enable_starpupy" = xyes ; then + AC_MSG_ERROR([setuptools missing, cannot install StarPU python interface (consider installing python-setuptools)]) + else + AC_MSG_WARN([setuptools missing, cannot install StarPU python interface (consider installing python-setuptools)]) + enable_starpupy=no + fi + fi +fi + +if test "x$enable_starpupy" != xno +then + CPPFLAGS=${SAVED_CPPFLAGS} + AC_MSG_CHECKING(for python3 module joblib) + AC_PYTHON_MODULE(joblib,[joblib_avail=yes],[joblib_avail=no]) + AC_MSG_RESULT($joblib_avail) + if test "$joblib_avail" = "yes" ; then + AC_DEFINE(STARPU_PYTHON_HAVE_JOBLIB, [1], [Python joblib package available]) + else + AC_MSG_WARN([python3 module joblib missing, cannot build full StarPU python interface (consider running 'pip3 install joblib')]) + fi + + AC_MSG_CHECKING(for python3 module cloudpickle) + AC_PYTHON_MODULE(cloudpickle,[cloudpickle_avail=yes],[cloudpickle_avail=no]) + AC_MSG_RESULT($cloudpickle_avail) + if test "$cloudpickle_avail" = "yes" ; then + AC_DEFINE(STARPU_PYTHON_HAVE_CLOUDPICKLE, [1], [Python cloudpickle package available]) + else + AC_MSG_WARN([python3 module cloudpickle missing, cannot build full StarPU python interface (consider running 'pip3 install cloudpickle')]) + fi + + starpupy_support=yes + AC_MSG_CHECKING(for python3 module numpy) + AC_PYTHON_MODULE(numpy,[numpy_avail=yes],[numpy_avail=no]) + AC_MSG_RESULT($numpy_avail) + PYTHON_NUMPY_DIR="" + if test "$numpy_avail" = "yes" ; then + AC_DEFINE(STARPU_PYTHON_HAVE_NUMPY, [1], [Python3 numpy package available]) + PYTHON_NUMPY_DIR="`$PYTHON -c "import numpy ; print(numpy.get_include())"`" + fi + AC_SUBST(PYTHON_NUMPY_DIR) + PYTHON_SETUP_OPTIONS="" + if test x$enable_debug = xyes ; then + PYTHON_SETUP_OPTIONS="--debug" + fi + AC_SUBST(PYTHON_SETUP_OPTIONS) +fi +AM_CONDITIONAL(STARPU_BUILD_STARPUPY, [test x$starpupy_support = xyes]) +AM_CONDITIONAL(STARPU_STARPUPY_NUMPY, [test x$numpy_avail = xyes]) + +AC_ARG_VAR([PYTHON], [Python3 interpreter]) + +########################################## +# Documentation # +########################################## + +def_enable_build_doc="yes" +available_doc="no" +if test -d "$srcdir/doc/doxygen/html" ; then + def_enable_build_doc="no" + available_doc="yes" +fi +if test "$starpu_darwin" = "yes" ; then + def_enable_build_doc="no" +fi + +AC_ARG_ENABLE(build-doc, [AS_HELP_STRING([--disable-build-doc], + [disable building of documentation])], + enable_build_doc=$enableval, enable_build_doc=$def_enable_build_doc) + +AC_ARG_ENABLE(build-doc-pdf, [AS_HELP_STRING([--enable-build-doc-pdf], + [enable building of PDF documentation])], + enable_build_doc_pdf=$enableval, enable_build_doc_pdf=no) + +available_doc_pdf="no" +if test -f "$srcdir/doc/doxygen/starpu.pdf" ; then + enable_build_doc_pdf="no" + available_doc_pdf="yes" +fi + +# Check whether doxygen needed tools are installed +AC_PATH_PROG(doxygencommand, doxygen) +if test "$doxygencommand" = "" ; then + if test "$enable_build_doc_pdf" = "yes" ; then + AC_MSG_ERROR([doxygen missing, cannot build documentation PDF]) + fi + enable_build_doc="no" + enable_build_doc_pdf="no" +fi +AC_PATH_PROG(pdflatexcommand, pdflatex) +if test "$pdflatexcommand" = "" ; then + if test "$enable_build_doc_pdf" = "yes" ; then + AC_MSG_ERROR([pdflatex missing, cannot build documentation PDF]) + fi + enable_build_doc_pdf="no" +fi +AC_PATH_PROG(epstopdfcommand, epstopdf) +if test "$epstopdfcommand" = "" ; then + if test "$enable_build_doc_pdf" = "yes" ; then + AC_MSG_ERROR([epstopdf missing, cannot build documentation PDF]) + fi + enable_build_doc_pdf="no" +fi + +AC_MSG_CHECKING(whether HTML documentation should be compiled) +AC_MSG_RESULT($enable_build_doc) +AC_MSG_CHECKING(whether HTML documentation is available) +AC_MSG_RESULT($available_doc) +AC_MSG_CHECKING(whether PDF documentation should be compiled) +AC_MSG_RESULT($enable_build_doc_pdf) +AC_MSG_CHECKING(whether PDF documentation is available) +AC_MSG_RESULT($available_doc_pdf) + +AM_CONDITIONAL(STARPU_BUILD_DOC, [test x$enable_build_doc != xno]) +AM_CONDITIONAL(STARPU_AVAILABLE_DOC, [test x$available_doc != xno]) + +AM_CONDITIONAL(STARPU_BUILD_DOC_PDF, [test x$enable_build_doc_pdf != xno]) +AM_CONDITIONAL(STARPU_AVAILABLE_DOC_PDF, [test x$available_doc_pdf != xno]) + +if test x$enable_build_doc_pdf != xno ; then + DOC_GENERATE_LATEX=YES +else + DOC_GENERATE_LATEX=NO +fi +AC_SUBST(DOC_GENERATE_LATEX) + +############################################################################### +# # +# Julia # +# # +############################################################################### +AC_ARG_ENABLE(julia, [AS_HELP_STRING([--enable-julia], + [enable the Julia extension])], + enable_julia=$enableval, enable_julia=no) +if test "$enable_julia" = "yes" ; then + # Check whether the julia compiler is available + AC_PATH_PROG(juliapath, julia) + AC_MSG_CHECKING(whether julia is available) + AC_MSG_RESULT($juliapath) + if test ! -x "$juliapath" ; then + AC_MSG_ERROR(Julia compiler '$juliapath' is not valid) + enable_julia=no + fi +fi +AM_CONDITIONAL([STARPU_USE_JULIA], [test "x$enable_julia" = "xyes"]) +AC_SUBST(JULIA, $juliapath) + +############################################################################### +# # +# Eclipse Plugin # +# # +############################################################################### +AC_ARG_ENABLE(eclipse-plugin, [AS_HELP_STRING([--enable-eclipse-plugin], + [Build the Eclipse plugin])], + enable_eclipse_plugin=$enableval, enable_eclipse_plugin=no) +if test "$enable_eclipse_plugin" = "yes" ; then + AC_PATH_PROG(eclipsepath, eclipse) + AC_MSG_CHECKING(whether eclipse is available) + AC_MSG_RESULT($eclipsepath) + if test ! -x "$eclipsepath" ; then + AC_MSG_ERROR(Eclipse executable '$eclipsepath' is not valid) + enable_eclipse_plugin=no + fi + + libs=$(for x in starpu-$STARPU_EFFECTIVE_VERSION $(echo $STARPU_EXPORTED_LIBS | sed 's/-l//g') $HWLOC_REQUIRES ; do echo $x ; done) + option_libs=$($srcdir/eclipse-plugin/tools/cproject.sh option $libs) + module_libs=$($srcdir/eclipse-plugin/tools/cproject.sh module $libs) +fi + +AM_CONDITIONAL([STARPU_BUILD_ECLIPSE_PLUGIN], [test "x$enable_eclipse_plugin" = "xyes"]) +AC_SUBST(ECLIPSE, $eclipsepath) +AC_SUBST(STARPU_INCLUDE_PATH, $(eval echo ${includedir}/starpu/$STARPU_EFFECTIVE_VERSION)) +AC_SUBST(STARPU_LIB_PATH, $(eval echo ${prefix}/lib)) +AC_SUBST(STARPU_MODULE_LIBS, "$module_libs") +AC_SUBST(STARPU_OPTION_LIBS, "$option_libs") + +############################################################################### +# # +# Final settings # +# # +############################################################################### + +if test x$enable_simgrid = xyes -a \( x$enable_cuda0 = xyes -o x$enable_cuda1 = xyes \) ; then + AC_MSG_ERROR([Cuda0 not supported with simgrid]) +fi + +if test x$enable_opencl = xyes -a \( x$enable_cuda0 = xyes -o x$enable_cuda1 = xyes \) ; then + AC_MSG_ERROR([Cuda0 not supported with OpenCL]) +fi + +if test x$enable_openmp = xyes -a \( x$enable_cuda0 = xyes -o x$enable_cuda1 = xyes \) ; then + AC_MSG_ERROR([Cuda0 not supported with OpenMP]) +fi + + +CPPFLAGS="$CPPFLAGS -DSTARPU_SAMPLING_DIR=\"\\\"${datarootdir}/starpu/perfmodels/sampling\\\"\"" +STARPU_BASIC_H_CPPFLAGS="$HWLOC_CFLAGS $STARPU_CUDA_CPPFLAGS $STARPU_HIP_CPPFLAGS $STARPU_OPENCL_CPPFLAGS $STARPU_MAX_FPGA_CPPFLAGS $SIMGRID_CFLAGS $PAPI_CFLAGS" + +# these are the flags needed to compile starpu.h +STARPU_H_CPPFLAGS="$STARPU_BASIC_H_CPPFLAGS" +AC_SUBST([STARPU_H_CPPFLAGS]) + +STARPU_NVCC_H_CPPFLAGS="$STARPU_BASIC_H_CPPFLAGS" +AC_SUBST([STARPU_NVCC_H_CPPFLAGS]) + +# these are the flags needed for linking libstarpu (and thus also for static linking) +LIBSTARPU_LDFLAGS="$STARPU_OPENCL_LDFLAGS $STARPU_CUDA_LDFLAGS $STARPU_HIP_LDFLAGS $HWLOC_LIBS $FXT_LDFLAGS $FXT_LIBS $PAPI_LIBS $STARPU_GLPK_LDFLAGS $STARPU_LEVELDB_LDFLAGS $SIMGRID_LDFLAGS $STARPU_BLAS_LDFLAGS $DGELS_LIBS $STARPU_MAX_FPGA_LDFLAGS $STARPU_DLOPEN_LDFLAGS" +AC_SUBST([LIBSTARPU_LDFLAGS]) + +# these are the flags needed for linking against libstarpu (because starpu.h makes its includer use pthread_*, simgrid, etc.) +if test "x$enable_shared" = xno; then + # No .so, so application will unexpectedly have to know which -l to + # use. Give them in .pc file. + AC_DEFINE(STARPU_STATIC_ONLY, [1], [Only static compilation was made]) + STARPU_EXPORTED_LIBS="$STARPU_EXPORTED_LIBS $LDFLAGS $LIBS $LIBSTARPU_LDFLAGS" +fi +AC_SUBST(STARPU_EXPORTED_LIBS) + +STARPUPY_EXTRA_LINK_ARGS="" +if test "x$enable_starpupy" != xno +then + if test "x$OPENMP_CFLAGS" != "x" + then + STARPUPY_EXTRA_LINK_ARGS="$STARPUPY_EXTRA_LINK_ARGS '$OPENMP_CFLAGS', " + fi + for flag in $STARPU_EXPORTED_LIBS + do + STARPUPY_EXTRA_LINK_ARGS="$STARPUPY_EXTRA_LINK_ARGS '$flag', " + done + if test x$enable_coverage = xyes; then + STARPUPY_EXTRA_LINK_ARGS="$STARPUPY_EXTRA_LINK_ARGS '-lgcov', " + fi +fi +AC_SUBST(STARPUPY_EXTRA_LINK_ARGS) + +LIBSTARPU_LINK=libstarpu-$STARPU_EFFECTIVE_VERSION.la +LIBSTARPU_LINK="$LIBSTARPU_LINK $STARPU_EXPORTED_LIBS" +AC_SUBST([LIBSTARPU_LINK]) + +# File configuration +AC_CONFIG_COMMANDS([executable-scripts], [ + chmod +x tests/regression/regression.sh + chmod +x tests/model-checking/starpu-mc.sh + chmod +x tools/starpu_env + chmod +x tools/starpu_codelet_profile + chmod +x tools/starpu_codelet_histo_profile + chmod +x tools/starpu_mpi_comm_matrix.py + chmod +x tools/starpu_fxt_number_events_to_names.py + chmod +x tools/starpu_workers_activity + chmod +x tools/starpu_paje_draw_histogram + chmod +x tools/starpu_paje_state_stats + chmod +x tools/starpu_paje_summary + chmod +x tools/starpu_config + chmod +x tools/starpu_mlr_analysis + chmod +x tools/starpu_paje_sort + chmod +x tools/starpu_smpirun + chmod +x tools/starpu_tcpipexec + chmod +x doc/doxygen/doxygen_filter.sh + chmod +x doc/doxygen_dev/doxygen_filter.sh + chmod +x starpupy/execute.sh + chmod +x julia/examples/execute.sh + for x in \ + tests/microbenchs/tasks_data_overhead.sh \ + tests/microbenchs/sync_tasks_data_overhead.sh \ + tests/microbenchs/async_tasks_data_overhead.sh \ + tests/microbenchs/tasks_size_overhead.sh \ + tests/microbenchs/tasks_size_overhead_sched.sh \ + tests/microbenchs/tasks_size_overhead_scheds.sh \ + tests/microbenchs/tasks_size_overhead.gp \ + tests/microbenchs/microbench.sh \ + tests/microbenchs/parallel_dependent_homogeneous_tasks_data.sh \ + tests/microbenchs/parallel_independent_heterogeneous_tasks_data.sh \ + tests/microbenchs/parallel_independent_heterogeneous_tasks.sh \ + tests/microbenchs/parallel_independent_homogeneous_tasks_data.sh \ + tests/microbenchs/parallel_independent_homogeneous_tasks.sh \ + tests/microbenchs/parallel_redux_homogeneous_tasks_data.sh \ + tests/microbenchs/parallel_redux_heterogeneous_tasks_data.sh \ + tests/microbenchs/bandwidth_scheds.sh \ + tests/energy/static.sh \ + tests/energy/dynamic.sh \ + tests/datawizard/locality.sh \ + tests/overlap/overlap.sh \ + tests/model-checking/prio_list.sh \ + tests/model-checking/prio_list2.sh \ + tests/model-checking/prio_list3.sh \ + tests/model-checking/barrier.sh \ + examples/heat/heat.sh \ + examples/lu/lu.sh \ + examples/cholesky/cholesky.sh \ + examples/cholesky/cholesky_julia.sh \ + examples/mult/sgemm.sh \ + examples/scheduler/schedulers.sh \ + examples/scheduler/schedulers_context.sh \ + examples/scheduler/libdummy_sched.sh \ + examples/profiling_tool/prof.sh \ + tools/starpu_paje_draw_histogram.R \ + tools/starpu_paje_state_stats.R \ + tools/starpu_mlr_analysis.Rmd \ + tools/starpu_paje_summary.Rmd \ + tools/starpu_trace_state_stats.py \ + julia/examples/check_deps/check_deps.sh \ + julia/examples/mult/mult_starpu.sh \ + julia/examples/mult/perf.sh \ + julia/examples/variable/variable.sh \ + julia/examples/task_insert_color/task_insert_color.sh \ + julia/examples/vector_scal/vector_scal.sh \ + julia/examples/mandelbrot/mandelbrot.sh \ + julia/examples/callback/callback.sh \ + julia/examples/dependency/task_dep.sh \ + julia/examples/dependency/tag_dep.sh \ + julia/examples/dependency/end_dep.sh \ + julia/examples/axpy/axpy.sh \ + julia/examples/gemm/gemm.sh \ + julia/examples/cholesky/cholesky.sh \ + starpupy/benchmark/tasks_size_overhead.sh \ + starpupy/benchmark/tasks_size_overhead.gp \ + starpupy/benchmark/test_handle_perf.sh \ + starpupy/benchmark/test_handle_perf_pickle.sh \ + starpupy/examples/starpu_py.sh \ + starpupy/examples/starpu_py.concurrent.sh \ + starpupy/examples/starpu_py_handle.sh \ + starpupy/examples/starpu_py_handle.concurrent.sh \ + starpupy/examples/starpu_py_np.sh \ + starpupy/examples/starpu_py_np.concurrent.sh \ + starpupy/examples/starpu_py_parallel.sh \ + starpupy/examples/starpu_py_partition.sh \ + starpupy/examples/starpu_py_partition.concurrent.sh \ + starpupy/examples/starpu_py_perfmodel.sh \ + starpupy/examples/starpu_py_perfmodel.concurrent.sh \ + starpupy/examples/starpu_py_numpy.sh \ + starpupy/examples/starpu_py_numpy.concurrent.sh \ + ; do + test -e $x || ( mkdir -p $(dirname $x) && ln -sf $ac_abs_top_srcdir/$x $(dirname $x) ) + done + for x in tools julia/examples starpufft/tests examples examples/stencil mpi/tests mpi/examples socl/examples bubble/tests starpupy/examples starpu_openmp_llvm/examples \ + ; do + test -e $x/loader.c || ln -sf $ac_abs_top_srcdir/tests/loader.c $x + done + + sed -i -e '/ STARPU_SRC_DIR /d' -e '/ STARPU_BUILD_DIR /d' src/common/config.h +]) + +# Create links to ICD files in build/socl/vendors directory. SOCL will use this +# directory as the OCL_ICD_VENDORS directory +SOCL_VENDORS="vendors/install/socl.icd" +for icd in /etc/OpenCL/vendors/*.icd ; do + if test -f $icd ; then + if test "$(basename $icd)" != "socl.icd" ; then + new_icd=$(basename $icd) + AC_CONFIG_LINKS([socl/vendors/$new_icd:$icd]) + SOCL_VENDORS="$SOCL_VENDORS vendors/$new_icd" + fi + fi +done +AC_SUBST(SOCL_VENDORS) + +AC_CONFIG_FILES(tests/regression/regression.sh tests/regression/profiles tests/regression/profiles.build.only) +AC_CONFIG_HEADER(src/common/config.h src/common/config-src-build.h include/starpu_config.h starpurm/include/starpurm_config.h) + +SANITIZE=$(echo $CFLAGS | grep sanitize) +AM_CONDITIONAL(STARPU_SANITIZE, test -n "$SANITIZE") + +AC_OUTPUT([ + Makefile + src/Makefile + tools/Makefile + tools/starpu_env + tools/starpu_codelet_profile + tools/starpu_codelet_histo_profile + tools/starpu_mpi_comm_matrix.py + tools/starpu_fxt_number_events_to_names.py + tools/starpu_workers_activity + tools/starpu_paje_draw_histogram + tools/starpu_paje_state_stats + tools/starpu_paje_summary + tools/starpu_config + tools/starpu_mlr_analysis + tools/starpu_paje_sort + tools/starpu_smpirun + tools/starpu_tcpipexec + socl/Makefile + socl/src/Makefile + socl/examples/Makefile + socl/vendors/socl.icd + socl/vendors/install/socl.icd + packages/libstarpu.pc + packages/starpu-1.0.pc + packages/starpu-1.1.pc + packages/starpu-1.2.pc + packages/starpu-1.3.pc + packages/starpu-1.4.pc + packages/starpu-1.3 + packages/starpu-1.4 + mpi/packages/libstarpumpi.pc + mpi/packages/starpumpi-1.0.pc + mpi/packages/starpumpi-1.1.pc + mpi/packages/starpumpi-1.2.pc + mpi/packages/starpumpi-1.3.pc + mpi/packages/starpumpi-1.4.pc + starpufft/Makefile + starpufft/src/Makefile + starpufft/tests/Makefile + starpufft/packages/libstarpufft.pc + starpufft/packages/starpufft-1.0.pc + starpufft/packages/starpufft-1.1.pc + starpufft/packages/starpufft-1.2.pc + starpufft/packages/starpufft-1.3.pc + starpufft/packages/starpufft-1.4.pc + starpurm/Makefile + starpurm/src/Makefile + starpurm/tests/Makefile + starpurm/examples/Makefile + starpurm/packages/starpurm-1.3.pc + starpurm/packages/starpurm-1.4.pc + starpu_openmp_llvm/Makefile + starpu_openmp_llvm/src/Makefile + starpu_openmp_llvm/examples/Makefile + starpupy/src/setup.cfg + starpupy/src/setup.py + starpupy/Makefile + starpupy/src/Makefile + starpupy/examples/Makefile + starpupy/execute.sh + starpupy/benchmark/Makefile + examples/Makefile + examples/stencil/Makefile + tests/Makefile + tests/model-checking/Makefile + tests/model-checking/starpu-mc.sh + mpi/Makefile + mpi/src/Makefile + mpi/tests/Makefile + mpi/examples/Makefile + mpi/tools/Makefile + mpi/GNUmakefile + sc_hypervisor/Makefile + sc_hypervisor/src/Makefile + sc_hypervisor/examples/Makefile + doc/Makefile + doc/doxygen/Makefile + doc/doxygen/doxygen-config.cfg + doc/doxygen/doxygen-config-include.cfg + doc/doxygen/doxygen_filter.sh + doc/doxygen_dev/Makefile + doc/doxygen_dev/doxygen-config.cfg + doc/doxygen_dev/doxygen_filter.sh + doc/doxygen_dev/doxygen-config-include.cfg + doc/doxygen_web_introduction/Makefile + doc/doxygen_web_introduction/doxygen-config.cfg + doc/doxygen_web_installation/Makefile + doc/doxygen_web_installation/doxygen-config.cfg + doc/doxygen_web_basics/Makefile + doc/doxygen_web_basics/doxygen-config.cfg + doc/doxygen_web_applications/Makefile + doc/doxygen_web_applications/doxygen-config.cfg + doc/doxygen_web_performances/Makefile + doc/doxygen_web_performances/doxygen-config.cfg + doc/doxygen_web_faq/Makefile + doc/doxygen_web_faq/doxygen-config.cfg + doc/doxygen_web_languages/Makefile + doc/doxygen_web_languages/doxygen-config.cfg + doc/doxygen_web_extensions/Makefile + doc/doxygen_web_extensions/doxygen-config.cfg + tools/msvc/starpu_var.bat + min-dgels/Makefile + bubble/Makefile + bubble/tests/Makefile + julia/Makefile + julia/src/Makefile + julia/src/dynamic_compiler/Makefile + julia/examples/Makefile + julia/examples/execute.sh + eclipse-plugin/Makefile + eclipse-plugin/src/Makefile + eclipse-plugin/examples/Makefile + eclipse-plugin/examples/hello/.cproject +]) + +AC_MSG_NOTICE([ + + CPUs enabled: $enable_cpu + CUDA enabled: $enable_cuda $NO_NVML + HIP enabled: $enable_hip + OpenCL enabled: $enable_opencl + Max FPGA enabled: $enable_max_fpga + + Compile-time limits + (change these with --enable-maxcpus, --enable-maxcudadev, + --enable-maxopencldev, --enable-maxmaxfpgadev, --enable-maxnodes, --enable-maxbuffers) + (Note these numbers do not represent the number of detected + devices, but the maximum number of devices StarPU can manage) + + Maximum number of CPUs: $maxcpus + Maximum number of CUDA devices: $nmaxcudadev + Maximum number of HIP devices: $nmaxhipdev + Maximum number of OpenCL devices: $nmaxopencldev + Maximum number of Maxeler FPGA devices: $nmaxmaxfpgadev + Maximum number of MPI master-slave devices: $nmaxmpidev + Maximum number of TCP/IP master-slave devices: $nmaxtcpipdev + Maximum number of memory nodes: $maxnodes + Maximum number of task buffers: $nmaxbuffers + + CUDA GPU-GPU transfers: $enable_cuda_memcpy_peer + CUDA Map: $enable_cuda_map + HIP GPU-GPU transfers: $enable_hip_memcpy_peer + Allocation cache: $enable_allocation_cache + + Magma enabled: $have_magma + BLAS library: $blas_lib + hwloc: $have_valid_hwloc + FxT trace enabled: $enable_fxt + + Documentation HTML: $enable_build_doc + Documentation PDF: $enable_build_doc_pdf + Examples: $enable_build_examples + + StarPU Extensions: + StarPU MPI enabled: $build_mpi_lib + StarPU MPI failure tolerance: $enable_mpi_ft + StarPU MPI failure tolerance stats: $use_mpi_ft_stats + StarPU MPI(nmad) enabled: $build_nmad_lib + MPI test suite: $running_mpi_check + Master-Slave MPI enabled: $build_mpi_master_slave + Master-Slave TCP/IP enabled: $build_tcpip_master_slave + FFT Support: $fft_support + Resource Management enabled: $starpurm_support + Python Interface enabled: $starpupy_support + OpenMP runtime support enabled: $enable_openmp + OpenMP LLVM runtime support enabled: $enable_openmp_llvm + Parallel Worker support enabled: $enable_parallel_worker + SOCL enabled: $build_socl + SOCL test suite: $run_socl_check + Scheduler Hypervisor: $build_sc_hypervisor + simgrid enabled: $enable_simgrid + ayudame enabled: $ayu_msg + HDF5 enabled: $enable_hdf5 + Native fortran support: $enable_build_fortran + Native MPI fortran support: $use_mpi_fort + Support for multiple linear regression models: $support_mlr + Hierarchical dags support: $enable_bubble + JULIA enabled: $enable_julia +]) + +if test "$build_socl" = "yes" -a "$run_socl_check" = "no" ; then + AC_MSG_NOTICE([ +WARNING: SOCL test suite will not be run as the environment variable SOCL_OCL_LIB_OPENCL is not defined. +To run the tests, you need to install the OCL implementation of ICD +(https://forge.imag.fr/projects/ocl-icd/ or Debian package ocl-icd-libopencl1) +and set the variable SOCL_OCL_LIB_OPENCL to the location of the libOpenCL.so.]) +fi + +if test x"$have_valid_hwloc" = xno -a "$enable_simgrid" = "no" ; then + AC_MSG_NOTICE([ +WARNING: hwloc was not enabled. If the target machine is hyperthreaded the +performance may be impacted a lot. It is strongly recommended to install +hwloc]) +fi + +if test x"$starpu_windows" = xyes -a "x$STARPU_MS_LIB" = "x" ; then + AC_MSG_NOTICE([ +WARNING: lib was not found, you will not be able to build StarPU applications +with Microsoft Visual Studio. Add to your PATH the directories for MSVC, e.g + c:\Program Files (x86)\Microsoft Visual Studio 11.0\Common7\IDE; + c:\Program Files (x86)\Microsoft Visual Studio 11.0\VC\bin]) +fi diff --git a/contrib/ci.inria.fr/Jenkinsfile-basic b/contrib/ci.inria.fr/Jenkinsfile-basic new file mode 100644 index 0000000..2f987a7 --- /dev/null +++ b/contrib/ci.inria.fr/Jenkinsfile-basic @@ -0,0 +1,136 @@ +#!groovy +// StarPU --- Runtime system for heterogeneous multicore architectures. +// +// Copyright (C) 2018-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +// +// StarPU is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation; either version 2.1 of the License, or (at +// your option) any later version. +// +// StarPU is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +// +// See the GNU Lesser General Public License in COPYING.LGPL for more details. +// + +def statusHasChanged = false + +pipeline +{ + agent none + + // Trigger the build + triggers + { + // Poll SCM explicitly every hour + pollSCM('0 * * * *') + } + + options + { + timeout(time: 1, unit: 'HOURS') + } + + stages + { + stage('Tarball') + { + steps + { + node('autotools') + { + checkout scm + sh 'contrib/ci.inria.fr/job-0-tarball.sh' + script + { + env.tarballgz = sh (script: 'ls *.tar.gz', returnStdout: true).trim() + } + stash includes: "${env.tarballgz}", name: 'tarballgz' + stash includes: "starpu.pdf", name: 'doc' + stash includes: "starpu_dev.pdf", name: 'doc_dev' + // Stash those scripts because they are not in make dist + dir('contrib/ci.inria.fr') + { + stash includes: "job-1-check.sh", name: 'script-unix-check' + } + archiveArtifacts artifacts: "${env.tarballgz},starpu.pdf,starpu_dev.pdf", fingerprint: true, onlyIfSuccessful: true + deleteDir() + + } + } + } + stage('Check') + { + steps + { + script + { + labelToSelect = 'unix' + listOfNodeNames = jenkins.model.Jenkins.instance.nodes.collect + { + node -> node.getLabelString().contains(labelToSelect) ? node.name : null + } + listOfNodeNames.removeAll(Collections.singleton(null)) + + def p = listOfNodeNames.collectEntries + { + [ (it): + { + node(it) + { + dir('check-unix') + { + unstash 'tarballgz' + unstash 'script-unix-check' + sh 'chmod 755 job-1-check.sh && ./job-1-check.sh' + deleteDir() + } + } + } + ]} + parallel p; + } + } + } + } + + post + { + // hooks are called in order: always, changed, aborted, failure, success, unstable + changed + { + echo "Build status has changed." + script + { + + statusHasChanged = true + } + } + success + { + echo "Build success." + // email when changed to success + script + { + if (statusHasChanged) + { + emailext(body: '${DEFAULT_CONTENT}', + subject: '${DEFAULT_SUBJECT}', + replyTo: '$DEFAULT_REPLYTO', + to: '$DEFAULT_RECIPIENTS') + } + } + } + failure + { + echo "Build failure." + // always email on failure + emailext(body: '${DEFAULT_CONTENT}', + subject: '${DEFAULT_SUBJECT}', + replyTo: '$DEFAULT_REPLYTO', + to: '$DEFAULT_RECIPIENTS') + } + } +} diff --git a/contrib/ci.inria.fr/Jenkinsfile-bsd b/contrib/ci.inria.fr/Jenkinsfile-bsd new file mode 100644 index 0000000..b13f888 --- /dev/null +++ b/contrib/ci.inria.fr/Jenkinsfile-bsd @@ -0,0 +1,131 @@ +#!groovy +// StarPU --- Runtime system for heterogeneous multicore architectures. +// +// Copyright (C) 2018-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +// +// StarPU is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation; either version 2.1 of the License, or (at +// your option) any later version. +// +// StarPU is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +// +// See the GNU Lesser General Public License in COPYING.LGPL for more details. +// + +def statusHasChanged = false + +pipeline +{ + agent none + + // Trigger the build + triggers + { + // Poll SCM explicitly every past-half hour + pollSCM('30 * * * *') + } + + stages + { + stage('Tarball') + { + steps + { + node('autotools') + { + checkout scm + sh 'contrib/ci.inria.fr/job-0-tarball.sh' + script + { + env.tarballgz = sh (script: 'ls *.tar.gz', returnStdout: true).trim() + } + stash includes: "${env.tarballgz}", name: 'tarballgz' + stash includes: "starpu.pdf", name: 'doc' + stash includes: "starpu_dev.pdf", name: 'doc_dev' + // Stash those scripts because they are not in make dist + dir('contrib/ci.inria.fr') + { + stash includes: "job-1-check.sh", name: 'script-unix-check' + } + archiveArtifacts artifacts: "${env.tarballgz},starpu.pdf,starpu_dev.pdf", fingerprint: true, onlyIfSuccessful: true + deleteDir() + + } + } + } + stage('Check') + { + steps + { + script + { + labelToSelect = 'bsd' + listOfNodeNames = jenkins.model.Jenkins.instance.nodes.collect + { + node -> node.getLabelString().contains(labelToSelect) ? node.name : null + } + listOfNodeNames.removeAll(Collections.singleton(null)) + + def p = listOfNodeNames.collectEntries + { + [ (it): + { + node(it) + { + dir('check-unix') + { + unstash 'tarballgz' + unstash 'script-unix-check' + sh 'chmod 755 job-1-check.sh && ./job-1-check.sh' + deleteDir() + } + } + } + ]} + parallel p; + } + } + } + } + + post + { + // hooks are called in order: always, changed, aborted, failure, success, unstable + changed + { + echo "Build status has changed." + script + { + + statusHasChanged = true + } + } + success + { + echo "Build success." + // email when changed to success + script + { + if (statusHasChanged) + { + emailext(body: '${DEFAULT_CONTENT}', + subject: '${DEFAULT_SUBJECT}', + replyTo: '$DEFAULT_REPLYTO', + to: '$DEFAULT_RECIPIENTS') + } + } + } + failure + { + echo "Build failure." + // always email on failure + emailext(body: '${DEFAULT_CONTENT}', + subject: '${DEFAULT_SUBJECT}', + replyTo: '$DEFAULT_REPLYTO', + to: '$DEFAULT_RECIPIENTS') + } + } +} diff --git a/contrib/ci.inria.fr/Jenkinsfile-windows b/contrib/ci.inria.fr/Jenkinsfile-windows new file mode 100644 index 0000000..5607ade --- /dev/null +++ b/contrib/ci.inria.fr/Jenkinsfile-windows @@ -0,0 +1,140 @@ +#!groovy +// StarPU --- Runtime system for heterogeneous multicore architectures. +// +// Copyright (C) 2018-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +// +// StarPU is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation; either version 2.1 of the License, or (at +// your option) any later version. +// +// StarPU is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +// +// See the GNU Lesser General Public License in COPYING.LGPL for more details. +// + +def statusHasChanged = false + +pipeline +{ + agent none + + // Trigger the build + triggers + { + // Poll scm once a day between 10pm and 11pm + pollSCM('H 22 * * *') + } + + stages + { + stage('Tarball') + { + steps + { + node('autotools2') + { + checkout scm + sh 'contrib/ci.inria.fr/job-0-tarball.sh' + script + { + env.tarballgz = sh (script: 'ls *.tar.gz', returnStdout: true).trim() + } + stash includes: "${env.tarballgz}", name: 'tarballgz' + stash includes: "starpu.pdf", name: 'doc' + stash includes: "starpu_dev.pdf", name: 'doc_dev' + // Stash those scripts because they are not in make dist + dir('contrib/ci.inria.fr') + { + stash includes: "job-1-check-windows.bat", name: 'script-windows-check' + stash includes: "job-1-build-windows.sh", name: 'script-windows-build' + } + archiveArtifacts artifacts: "${env.tarballgz},starpu.pdf,starpu_dev.pdf", fingerprint: true, onlyIfSuccessful: true + deleteDir() + + } + } + } + stage('Check') + { + steps + { + script + { + labelToSelect = 'windows' + listOfNodeNames = jenkins.model.Jenkins.instance.nodes.collect + { + node -> node.getLabelString().contains(labelToSelect) ? node.name : null + } + listOfNodeNames.removeAll(Collections.singleton(null)) + + if (listOfNodeNames.size() == 0) + { + sh 'false' + } + + def p = listOfNodeNames.collectEntries + { + [ (it): + { + node(it) + { + dir('check-windows') + { + unstash 'tarballgz' + unstash 'script-windows-check' + unstash 'script-windows-build' + bat './job-1-check-windows.bat' + archiveArtifacts artifacts: "*.zip", fingerprint: true, onlyIfSuccessful: true + if (env.KEEP_WORKING_DIRECTORY != 'true') + deleteDir() + } + } + } + ]} + parallel p; + } + } + } + } + + post + { + // hooks are called in order: always, changed, aborted, failure, success, unstable + changed + { + echo "Build status has changed." + script + { + + statusHasChanged = true + } + } + success + { + echo "Build success." + // email when changed to success + script + { + if (statusHasChanged) + { + emailext(body: '${DEFAULT_CONTENT}', + subject: '${DEFAULT_SUBJECT}', + replyTo: '$DEFAULT_REPLYTO', + to: '$DEFAULT_RECIPIENTS') + } + } + } + failure + { + echo "Build failure." + // always email on failure + emailext(body: '${DEFAULT_CONTENT}', + subject: '${DEFAULT_SUBJECT}', + replyTo: '$DEFAULT_REPLYTO', + to: '$DEFAULT_RECIPIENTS') + } + } +} diff --git a/contrib/ci.inria.fr/job-0-tarball.sh b/contrib/ci.inria.fr/job-0-tarball.sh new file mode 100755 index 0000000..0671f93 --- /dev/null +++ b/contrib/ci.inria.fr/job-0-tarball.sh @@ -0,0 +1,41 @@ +#!/bin/sh +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +set -e + +export LC_ALL=C +export PKG_CONFIG_PATH=/home/ci/usr/local/lib/pkgconfig:$PKG_CONFIG_PATH +export LD_LIBRARY_PATH=/home/ci/usr/local/lib:$LD_LIBRARY_PATH + +if test -f $HOME/starpu_specific_env.sh +then + . $HOME/starpu_specific_env.sh +fi + +BUILD=./build_$$ + +./autogen.sh +if test -d $BUILD ; then chmod -R 777 $BUILD && rm -rf $BUILD ; fi +mkdir $BUILD && cd $BUILD +../configure --enable-build-doc-pdf $STARPU_USER_CONFIGURE_OPTIONS +make -j4 +make dist +cp *gz .. +cp doc/doxygen/starpu.pdf .. +cp doc/doxygen_dev/starpu_dev.pdf .. +make clean + diff --git a/contrib/ci.inria.fr/job-1-build-windows.sh b/contrib/ci.inria.fr/job-1-build-windows.sh new file mode 100755 index 0000000..8fcc0ad --- /dev/null +++ b/contrib/ci.inria.fr/job-1-build-windows.sh @@ -0,0 +1,88 @@ +#!/bin/sh +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +set -e + +export LC_ALL=C +oldPATH=$PATH +export PATH=/c/Builds:/usr/bin:/bin:"/c/Program Files (x86)/Microsoft Visual Studio 11.0/VC/bin":"/c/Program Files/Microsoft Visual Studio 11.0/Common7/IDE":$oldPATH + +tarball=$(ls -tr starpu*.tar.gz | tail -1) +if test -z "$tarball" ; then + echo Tarball not available + exit 2 +fi + +basename=$(basename $tarball .tar.gz) +test -d $basename && chmod -R u+rwX $basename && rm -rf $basename +tar xfz $tarball +touch --date="last hour" $(find $basename) +version=$(echo $basename | cut -d- -f2) +winball=starpu-win32-build-${version} + +export STARPU_HOME=$PWD + +rm -rf ${basename}/build +mkdir ${basename}/build +cd ${basename}/build + +#export HWLOC=/c/StarPU/hwloc-win32-build-1.11.0 + +prefix=${PWD}/../../${winball} +rm -rf $prefix + +#--with-hwloc=${HWLOC} +options="--without-hwloc --enable-quick-check --enable-debug --enable-verbose --enable-native-winthreads" +day=$(date +%u) +if test $day -le 5 +then + ../configure --prefix=$prefix $options --disable-build-examples $STARPU_USER_CONFIGURE_OPTIONS +else + ../configure --prefix=$prefix $options $STARPU_USER_CONFIGURE_OPTIONS +fi + +make + +CHECK=${PWD}/check_$$ +touch ${CHECK} + +if test "$1" == "-exec" +then + (make -k check || true) > ${CHECK} 2>&1 + cat ${CHECK} + make showcheck +fi + +fail=$(grep FAIL ${CHECK} | grep -v XFAIL || true) +if test -z "$fail" +then + make install + cd ../../ + cp /c/MinGW/bin/pthread*dll ${winball}/bin + cp /c/MinGW/bin/libgcc*dll ${winball}/bin + # cp ${HWLOC}/bin/*dll ${winball}/bin + zip -r ${winball}.zip ${winball} + + rm -rf starpu_install + mv ${winball} starpu_install +fi + +PATH=$oldPATH + +echo $fail +exit $(grep FAIL ${CHECK} | grep -v XFAIL | wc -l) + diff --git a/contrib/ci.inria.fr/job-1-check-windows.bat b/contrib/ci.inria.fr/job-1-check-windows.bat new file mode 100644 index 0000000..ded5fce --- /dev/null +++ b/contrib/ci.inria.fr/job-1-check-windows.bat @@ -0,0 +1,27 @@ +REM StarPU --- Runtime system for heterogeneous multicore architectures. +REM +REM Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +REM +REM StarPU is free software; you can redistribute it and/or modify +REM it under the terms of the GNU Lesser General Public License as published by +REM the Free Software Foundation; either version 2.1 of the License, or (at +REM your option) any later version. +REM +REM StarPU is distributed in the hope that it will be useful, but +REM WITHOUT ANY WARRANTY; without even the implied warranty of +REM MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +REM +REM See the GNU Lesser General Public License in COPYING.LGPL for more details. +REM + +set oldPATH=%PATH% +set PATH=C:\MinGW\msys\1.0\bin;c:\MinGW\bin;C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Tools\MSVC\14.28.29333\bin\Hostx64\x64;C:\Users\Administrator\AppData\Local\Programs\Python\Python37-32;%PATH% +sh -c "./job-1-build-windows.sh" +set PATH=%oldPATH% +set HWLOC=c:\StarPU\hwloc-win32-build-1.11.0 + +cd starpu_install +set STARPU_PATH=%cd% +cd bin\msvc +starpu_exec ../../share/doc/starpu/tutorial/hello_world_msvc.c + diff --git a/contrib/ci.inria.fr/job-1-check.sh b/contrib/ci.inria.fr/job-1-check.sh new file mode 100755 index 0000000..34db336 --- /dev/null +++ b/contrib/ci.inria.fr/job-1-check.sh @@ -0,0 +1,150 @@ +#!/bin/bash +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +set -e +set -x + +export LC_ALL=C + +ulimit -c unlimited + +export PKG_CONFIG_PATH=/home/ci/usr/local/lib/pkgconfig:$PKG_CONFIG_PATH +export LD_LIBRARY_PATH=/home/ci/usr/local/lib:$LD_LIBRARY_PATH + +tarball=$(ls -tr starpu-*.tar.gz | tail -1) + +if test -z "$tarball" +then + echo Error. No tar.gz file + ls + pwd + exit 1 +fi + +COVERITY=0 +if test "$1" == "-coverity" +then + COVERITY=1 + if test -f $HOME/.starpu/coverity_token + then + COVERITY_TOKEN=$(cat $HOME/.starpu/coverity_token) + else + echo "Error. Coverity is enabled, but there is no file $HOME/.starpu/coverity_token" + exit 1 + fi + shift + BRANCH=$1 +fi + +basename=$(basename $tarball .tar.gz) +export STARPU_HOME=$PWD/$basename/home +mkdir -p $basename +cd $basename +( + echo "oldPWD=\${PWD}" + env|grep -v LS_COLORS | grep '^[A-Z]'|grep -v BASH_FUNC | grep '=' | sed 's/=/=\"/'| sed 's/$/\"/' | sed 's/^/export /' + echo "cd \$oldPWD" +) > ${PWD}/env + +test -d $basename && chmod -R u+rwX $basename && rm -rf $basename +tar xfz ../$tarball >/dev/null 2>&1 + +hour=$(date "+%H") +today=$(date "+%Y-%m-%d") +lasthour=$(echo $hour - 1 | bc ) +if test "$hour" = "0" -o "$hour" = "00" +then + lasthour=0 +fi + +(find $basename -exec touch -d ${today}T${lasthour}:0:0 {} \; || true ) >/dev/null 2>&1 +cd $basename + +if test -f $HOME/starpu_specific_env.sh +then + . $HOME/starpu_specific_env.sh +fi + +BUILD=./build_$$ +mkdir $BUILD +cd $BUILD + +STARPU_CONFIGURE_OPTIONS="" +suname=$(uname) +if test "$suname" = "Darwin" +then + # the VM macos is very slow + export STARPU_MPI_NP=2 +fi +if test "$suname" = "OpenBSD" +then + STARPU_CONFIGURE_OPTIONS="--without-hwloc --disable-mlr --enable-maxcpus=2" +fi +if test "$suname" = "FreeBSD" +then + STARPU_CONFIGURE_OPTIONS="--disable-fortran --enable-maxcpus=2" +fi + +export CC=gcc + +set +e +mpiexec -oversubscribe pwd 2>/dev/null +ret=$? +set -e +ARGS="" +if test "$ret" = "0" +then + ARGS="--with-mpiexec-args=-oversubscribe" +fi + +export STARPU_MICROBENCHS_DISABLED=1 +export STARPU_TIMEOUT_ENV=3600 +export MPIEXEC_TIMEOUT=3600 +CONFIGURE_OPTIONS="--enable-debug --enable-verbose --disable-build-examples --enable-mpi-check=maybe --enable-mpi-minimal-tests --disable-build-doc $ARGS" +CONFIGURE_CHECK="" +day=$(date +%u) +if test $day -le 5 +then + CONFIGURE_CHECK="--enable-quick-check" +#else + # we do a normal check, a long check takes too long on VM nodes +fi +../configure $CONFIGURE_OPTIONS $CONFIGURE_CHECK $STARPU_CONFIGURE_OPTIONS $STARPU_USER_CONFIGURE_OPTIONS + +if test "$COVERITY" == "1" +then + cov-build --dir cov-int make -j4 + grep "are ready for analysis" cov-int/build-log.txt + tar caf starpu.tar.xz cov-int + curl -k -f --form token=$COVERITY_TOKEN --form email=starpu-builds@inria.fr --form file=@starpu.tar.xz --form version=$BRANCH --form description= 'https://scan.coverity.com/builds?project=StarPU+MR' + exit 0 +fi + +make -j4 +make dist +set +e +set -o pipefail +make -k check 2>&1 | tee ../check_$$ +RET=$? + +make showcheckfailed +make clean + +grep "^FAIL:" ../check_$$ || true + +echo "Running on $(uname -a)" +exit $RET diff --git a/contrib/gitlab/build.sh b/contrib/gitlab/build.sh new file mode 100755 index 0000000..9d629f8 --- /dev/null +++ b/contrib/gitlab/build.sh @@ -0,0 +1,38 @@ +#!/bin/sh +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +set -e + +./contrib/ci.inria.fr/job-0-tarball.sh + +tarball=$(ls -tr starpu-*.tar.gz | tail -1) + +if test -z "$tarball" +then + echo Error. No tar.gz file + ls + pwd + exit 1 +fi + +if test ! -f starpu.pdf +then + echo Error. No documentation file + ls + pwd + exit 1 +fi diff --git a/contrib/gitlab/chameleon.sh b/contrib/gitlab/chameleon.sh new file mode 100755 index 0000000..8d97d32 --- /dev/null +++ b/contrib/gitlab/chameleon.sh @@ -0,0 +1,50 @@ +#!/bin/bash +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2023-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +set -x +set -e + +export starpudir=$PWD +export rootdir=$PWD/../starpu_chameleon +export builddir=$PWD/../starpu_chameleon/build +if test -d $rootdir ; then rm -rf $rootdir ; fi +mkdir -p $builddir + +./autogen.sh +cd $builddir +$starpudir/configure --prefix=$rootdir/starpu.inst --disable-static --disable-socl --disable-build-tests --disable-build-examples --disable-build-doc --disable-opencl +make -j 32 +make install +source $rootdir/starpu.inst/bin/starpu_env + +# compiling morse +cd $rootdir +git clone --quiet --recursive --branch master https://gitlab.inria.fr/solverstack/chameleon.git chameleon +cd chameleon +git show HEAD +mkdir build +cd build +CFLAGS=-g cmake ../ -DCHAMELEON_USE_MPI=ON +make -j 20 + +set +e +ctest -R test_mpi_s +if test $? -ne 0 +then + ctest --rerun-failed --output-on-failure +fi +#ctest -R test_mpi_sgeadd -V diff --git a/contrib/gitlab/coverity.sh b/contrib/gitlab/coverity.sh new file mode 100755 index 0000000..53c69cc --- /dev/null +++ b/contrib/gitlab/coverity.sh @@ -0,0 +1,29 @@ +#!/bin/sh +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +BRANCH="unknown" +if test -n "$CI_COMMIT_BRANCH" +then + BRANCH=$CI_COMMIT_BRANCH +fi +if test -n "$CI_MERGE_REQUEST_SOURCE_BRANCH_NAME" +then + BRANCH=$CI_MERGE_REQUEST_SOURCE_BRANCH_NAME +fi + +./contrib/ci.inria.fr/job-1-check.sh -coverity $BRANCH + diff --git a/contrib/gitlab/deploy.sh b/contrib/gitlab/deploy.sh new file mode 100755 index 0000000..311a926 --- /dev/null +++ b/contrib/gitlab/deploy.sh @@ -0,0 +1,18 @@ +#!/bin/sh +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +./contrib/ci.inria.fr/job-1-check.sh diff --git a/contrib/gitlab/simgrid.sh b/contrib/gitlab/simgrid.sh new file mode 100755 index 0000000..a2024b1 --- /dev/null +++ b/contrib/gitlab/simgrid.sh @@ -0,0 +1,19 @@ +#!/bin/sh +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +export STARPU_MICROBENCHS_DISABLED=1 +STARPU_USER_CONFIGURE_OPTIONS="--enable-simgrid --disable-cuda --disable-mpi --disable-mpi-check" ./contrib/ci.inria.fr/job-1-check.sh diff --git a/doc/Makefile.am b/doc/Makefile.am new file mode 100644 index 0000000..6d1ba2b --- /dev/null +++ b/doc/Makefile.am @@ -0,0 +1,69 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +include $(top_srcdir)/make/starpu-notests.mk + +SUBDIRS = doxygen +SUBDIRS += doxygen_dev +SUBDIRS += doxygen_web_introduction +SUBDIRS += doxygen_web_installation +SUBDIRS += doxygen_web_basics +SUBDIRS += doxygen_web_applications +SUBDIRS += doxygen_web_performances +SUBDIRS += doxygen_web_faq +SUBDIRS += doxygen_web_languages +SUBDIRS += doxygen_web_extensions +DIST_SUBDIRS = doxygen +DIST_SUBDIRS += doxygen_dev +DIST_SUBDIRS += doxygen_web_introduction +DIST_SUBDIRS += doxygen_web_installation +DIST_SUBDIRS += doxygen_web_basics +DIST_SUBDIRS += doxygen_web_applications +DIST_SUBDIRS += doxygen_web_performances +DIST_SUBDIRS += doxygen_web_faq +DIST_SUBDIRS += doxygen_web_languages +DIST_SUBDIRS += doxygen_web_extensions + +EXTRA_DIST = \ + tutorial/hello_world.c \ + tutorial/hello_world_msvc.c \ + tutorial/Makefile \ + tutorial/README \ + tutorial/vector_scal.c \ + tutorial/vector_scal_cpu.c \ + tutorial/vector_scal_cuda.cu \ + tutorial/vector_scal_opencl.c \ + tutorial/vector_scal_opencl_kernel.cl \ + title.tex \ + sectionNumbering.py \ + extractHeadline.sh \ + fixLinks.sh \ + doxygen.cfg + +txtdir = ${docdir}/tutorial +txt_DATA = $(EXTRA_DIST) + +readmedir = ${docdir}/manual +readme_DATA = README.org + +all-local: README.org +DISTCLEANFILES = README.org +README.org: + $(top_srcdir)/doc/extractHeadline.sh + $(top_srcdir)/doc/fixLinks.sh $(top_builddir)/doc + + + diff --git a/doc/Makefile.in b/doc/Makefile.in new file mode 100644 index 0000000..2061026 --- /dev/null +++ b/doc/Makefile.in @@ -0,0 +1,1042 @@ +# Makefile.in generated by automake 1.16.5 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2021 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +target_triplet = @target@ +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@am__append_1 = --compiler-options -fno-strict-aliasing -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ $(STARPU_NVCC_H_CPPFLAGS) +@STARPU_USE_HIP_TRUE@am__append_2 = -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ +subdir = doc +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ + $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ + $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ + $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/src/common/config.h \ + $(top_builddir)/src/common/config-src-build.h \ + $(top_builddir)/include/starpu_config.h \ + $(top_builddir)/starpurm/include/starpurm_config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +SOURCES = +DIST_SOURCES = +RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \ + ctags-recursive dvi-recursive html-recursive info-recursive \ + install-data-recursive install-dvi-recursive \ + install-exec-recursive install-html-recursive \ + install-info-recursive install-pdf-recursive \ + install-ps-recursive install-recursive installcheck-recursive \ + installdirs-recursive pdf-recursive ps-recursive \ + tags-recursive uninstall-recursive +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__installdirs = "$(DESTDIR)$(readmedir)" "$(DESTDIR)$(txtdir)" +DATA = $(readme_DATA) $(txt_DATA) +RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ + distclean-recursive maintainer-clean-recursive +am__recursive_targets = \ + $(RECURSIVE_TARGETS) \ + $(RECURSIVE_CLEAN_TARGETS) \ + $(am__extra_recursive_targets) +AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \ + distdir distdir-am +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +am__DIST_COMMON = $(srcdir)/Makefile.in \ + $(top_srcdir)/make/starpu-notests.mk \ + $(top_srcdir)/make/starpu.mk +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +am__relativize = \ + dir0=`pwd`; \ + sed_first='s,^\([^/]*\)/.*$$,\1,'; \ + sed_rest='s,^[^/]*/*,,'; \ + sed_last='s,^.*/\([^/]*\)$$,\1,'; \ + sed_butlast='s,/*[^/]*$$,,'; \ + while test -n "$$dir1"; do \ + first=`echo "$$dir1" | sed -e "$$sed_first"`; \ + if test "$$first" != "."; then \ + if test "$$first" = ".."; then \ + dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ + dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ + else \ + first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ + if test "$$first2" = "$$first"; then \ + dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ + else \ + dir2="../$$dir2"; \ + fi; \ + dir0="$$dir0"/"$$first"; \ + fi; \ + fi; \ + dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ + done; \ + reldir="$$dir2" +pkglibdir = @pkglibdir@ +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +APP_CFLAGS = @APP_CFLAGS@ +APP_CXXFLAGS = @APP_CXXFLAGS@ +APP_FCFLAGS = @APP_FCFLAGS@ +APP_FFLAGS = @APP_FFLAGS@ +AR = @AR@ +AS = @AS@ +ATLASDIR = @ATLASDIR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BLAS_LIB = @BLAS_LIB@ +BLAS_LIBS = @BLAS_LIBS@ +BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ +BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CC_OR_MPICC = @CC_OR_MPICC@ +CC_OR_NVCC = @CC_OR_NVCC@ +CFLAGS = @CFLAGS@ +COVERAGE = @COVERAGE@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CSCOPE = @CSCOPE@ +CTAGS = @CTAGS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DGELS_LIBS = @DGELS_LIBS@ +DLB_CFLAGS = @DLB_CFLAGS@ +DLB_LIBS = @DLB_LIBS@ +DLLTOOL = @DLLTOOL@ +DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +ECLIPSE = @ECLIPSE@ +EGREP = @EGREP@ +ETAGS = @ETAGS@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FC = @FC@ +FCFLAGS = @FCFLAGS@ +FFLAGS = @FFLAGS@ +FFTWF_CFLAGS = @FFTWF_CFLAGS@ +FFTWF_LIBS = @FFTWF_LIBS@ +FFTWL_CFLAGS = @FFTWL_CFLAGS@ +FFTWL_LIBS = @FFTWL_LIBS@ +FFTW_CFLAGS = @FFTW_CFLAGS@ +FFTW_LIBS = @FFTW_LIBS@ +FGREP = @FGREP@ +FILECMD = @FILECMD@ +FXTDIR = @FXTDIR@ +FXT_CFLAGS = @FXT_CFLAGS@ +FXT_LDFLAGS = @FXT_LDFLAGS@ +FXT_LIBS = @FXT_LIBS@ +GDB = @GDB@ +GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ +GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ +GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ +GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ +GOTODIR = @GOTODIR@ +GREP = @GREP@ +HAVE_CXX11 = @HAVE_CXX11@ +HAVE_FFTWFL = @HAVE_FFTWFL@ +HELP2MAN = @HELP2MAN@ +HIPCC = @HIPCC@ +HIPCCFLAGS = @HIPCCFLAGS@ $(am__append_2) +HIPCONFIG = @HIPCONFIG@ +HWLOC_CFLAGS = @HWLOC_CFLAGS@ +HWLOC_LIBS = @HWLOC_LIBS@ +HWLOC_REQUIRES = @HWLOC_REQUIRES@ +ICC = @ICC@ +ICC_ARGS = @ICC_ARGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JULIA = @JULIA@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ +LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ +LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ +LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ +LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ +LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ +LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ +LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ +LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ +LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ +LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ +LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ +LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ +LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ +LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ +LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ +LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ +LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ +LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ +LIBSTARPU_LINK = @LIBSTARPU_LINK@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAGMA_CFLAGS = @MAGMA_CFLAGS@ +MAGMA_LIBS = @MAGMA_LIBS@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPICC_LDFLAGS = @MPICC_LDFLAGS@ +MPICXX = @MPICXX@ +MPIEXEC = @MPIEXEC@ +MPIEXEC_ARGS = @MPIEXEC_ARGS@ +MPIFORT = @MPIFORT@ +MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ +MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ +NM = @NM@ +NMAD_CFLAGS = @NMAD_CFLAGS@ +NMAD_LIBS = @NMAD_LIBS@ +NMEDIT = @NMEDIT@ +NVCC = @NVCC@ +NVCCFLAGS = @NVCCFLAGS@ $(am__append_1) +NVCC_CC = @NVCC_CC@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ +OPENBLAS_LIBS = @OPENBLAS_LIBS@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PAPI_CFLAGS = @PAPI_CFLAGS@ +PAPI_LIBS = @PAPI_LIBS@ +PARALLEL = @PARALLEL@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PKG_CONFIG = @PKG_CONFIG@ +POTI_CFLAGS = @POTI_CFLAGS@ +POTI_LIBS = @POTI_LIBS@ +PROG_CLANG = @PROG_CLANG@ +PROG_DATE = @PROG_DATE@ +PROG_FIND = @PROG_FIND@ +PROG_STAT = @PROG_STAT@ +PYTHON = @PYTHON@ +PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ +PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ +PYTHON_VERSION = @PYTHON_VERSION@ +RANLIB = @RANLIB@ +REALBASH = @REALBASH@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ +SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ +SIMGRID_LIBS = @SIMGRID_LIBS@ +SIMGRID_MC = @SIMGRID_MC@ +SLIC_CONFIG = @SLIC_CONFIG@ +SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ +SOCL_VENDORS = @SOCL_VENDORS@ +STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ +STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ +STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ +STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ +STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ +STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ +STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ +STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ +STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ +STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ +STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ +STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ +STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ +STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ +STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ +STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ +STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ +STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ +STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ +STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ +STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ +STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ +STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ +STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ +STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ +STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ +STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ +STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ +STARPU_LIB_PATH = @STARPU_LIB_PATH@ +STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ +STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ +STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ +STARPU_MS_LIB = @STARPU_MS_LIB@ +STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ +STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ +STARPU_OPENBLAS = @STARPU_OPENBLAS@ +STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ +STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ +STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ +STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ +STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ +STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ +STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ +STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ +STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ +STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ +STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ +STARPU_SRC_DIR = @STARPU_SRC_DIR@ +STARPU_USE_CPU = @STARPU_USE_CPU@ +STARPU_USE_CUDA = @STARPU_USE_CUDA@ +STARPU_USE_FXT = @STARPU_USE_FXT@ +STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ +STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ +STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ +STRIP = @STRIP@ +VERSION = @VERSION@ +XMKMF = @XMKMF@ +X_CFLAGS = @X_CFLAGS@ +X_EXTRA_LIBS = @X_EXTRA_LIBS@ +X_LIBS = @X_LIBS@ +X_PRE_LIBS = @X_PRE_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_ct_F77 = @ac_ct_F77@ +ac_ct_FC = @ac_ct_FC@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +doxygencommand = @doxygencommand@ +dvidir = @dvidir@ +eclipsepath = @eclipsepath@ +epstopdfcommand = @epstopdfcommand@ +exec_prefix = @exec_prefix@ +gitcommand = @gitcommand@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +hwloccalccommand = @hwloccalccommand@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +juliapath = @juliapath@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +mpicc_path = @mpicc_path@ +mpicxx_path = @mpicxx_path@ +mpiexec_path = @mpiexec_path@ +mpifort_path = @mpifort_path@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +pdflatexcommand = @pdflatexcommand@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +runstatedir = @runstatedir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target = @target@ +target_alias = @target_alias@ +target_cpu = @target_cpu@ +target_os = @target_os@ +target_vendor = @target_vendor@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +AM_CFLAGS = $(GLOBAL_AM_CFLAGS) +AM_CXXFLAGS = $(GLOBAL_AM_CXXFLAGS) +AM_FFLAGS = $(GLOBAL_AM_FFLAGS) +AM_FCFLAGS = $(GLOBAL_AM_FCFLAGS) +@STARPU_USE_CUDA_TRUE@V_nvcc_ = $(V_nvcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_USE_CUDA_TRUE@V_nvcc_0 = @echo " NVCC " $@; +@STARPU_USE_CUDA_TRUE@V_nvcc_1 = +@STARPU_USE_CUDA_TRUE@V_nvcc = $(V_nvcc_$(V)) + +# Avoid using nvcc when making a coverity build, nvcc produces millions of +# lines of code which we don't want to analyze. Instead, build dumb .o files +# containing empty functions. +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_ = $(V_mynvcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_0 = @echo " myNVCC " $@; +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_1 = +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc = $(V_mynvcc_$(V)) +@STARPU_USE_HIP_TRUE@V_hipcc_ = $(V_hipcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_USE_HIP_TRUE@V_hipcc_0 = @echo " HIPCC " $@; +@STARPU_USE_HIP_TRUE@V_hipcc_1 = +@STARPU_USE_HIP_TRUE@V_hipcc = $(V_hipcc_$(V)) +V_icc_ = $(V_icc_$(AM_DEFAULT_VERBOSITY)) +V_icc_0 = @echo " ICC " $@; +V_icc_1 = +V_icc = $(V_icc_$(V)) +V_ln_ = $(V_ln_$(AM_DEFAULT_VERBOSITY)) +V_ln_0 = @echo " LN " $@; +V_ln_1 = +V_ln = $(V_ln_$(V)) +V_help2man_ = $(V_help2man_$(AM_DEFAULT_VERBOSITY)) +V_help2man_0 = @echo " HELP2MAN" $@; +V_help2man_1 = +V_help2man = $(V_help2man_$(V)) +SUBDIRS = doxygen doxygen_dev doxygen_web_introduction \ + doxygen_web_installation doxygen_web_basics \ + doxygen_web_applications doxygen_web_performances \ + doxygen_web_faq doxygen_web_languages doxygen_web_extensions +DIST_SUBDIRS = doxygen doxygen_dev doxygen_web_introduction \ + doxygen_web_installation doxygen_web_basics \ + doxygen_web_applications doxygen_web_performances \ + doxygen_web_faq doxygen_web_languages doxygen_web_extensions +EXTRA_DIST = \ + tutorial/hello_world.c \ + tutorial/hello_world_msvc.c \ + tutorial/Makefile \ + tutorial/README \ + tutorial/vector_scal.c \ + tutorial/vector_scal_cpu.c \ + tutorial/vector_scal_cuda.cu \ + tutorial/vector_scal_opencl.c \ + tutorial/vector_scal_opencl_kernel.cl \ + title.tex \ + sectionNumbering.py \ + extractHeadline.sh \ + fixLinks.sh \ + doxygen.cfg + +txtdir = ${docdir}/tutorial +txt_DATA = $(EXTRA_DIST) +readmedir = ${docdir}/manual +readme_DATA = README.org +DISTCLEANFILES = README.org +all: all-recursive + +.SUFFIXES: +.SUFFIXES: .cu .cubin .hip .o +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/make/starpu-notests.mk $(top_srcdir)/make/starpu.mk $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign doc/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign doc/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; +$(top_srcdir)/make/starpu-notests.mk $(top_srcdir)/make/starpu.mk $(am__empty): + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs +install-readmeDATA: $(readme_DATA) + @$(NORMAL_INSTALL) + @list='$(readme_DATA)'; test -n "$(readmedir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(readmedir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(readmedir)" || exit 1; \ + fi; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; \ + done | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(readmedir)'"; \ + $(INSTALL_DATA) $$files "$(DESTDIR)$(readmedir)" || exit $$?; \ + done + +uninstall-readmeDATA: + @$(NORMAL_UNINSTALL) + @list='$(readme_DATA)'; test -n "$(readmedir)" || list=; \ + files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ + dir='$(DESTDIR)$(readmedir)'; $(am__uninstall_files_from_dir) +install-txtDATA: $(txt_DATA) + @$(NORMAL_INSTALL) + @list='$(txt_DATA)'; test -n "$(txtdir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(txtdir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(txtdir)" || exit 1; \ + fi; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; \ + done | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(txtdir)'"; \ + $(INSTALL_DATA) $$files "$(DESTDIR)$(txtdir)" || exit $$?; \ + done + +uninstall-txtDATA: + @$(NORMAL_UNINSTALL) + @list='$(txt_DATA)'; test -n "$(txtdir)" || list=; \ + files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ + dir='$(DESTDIR)$(txtdir)'; $(am__uninstall_files_from_dir) + +# This directory's subdirectories are mostly independent; you can cd +# into them and run 'make' without going through this Makefile. +# To change the values of 'make' variables: instead of editing Makefiles, +# (1) if the variable is set in 'config.status', edit 'config.status' +# (which will cause the Makefiles to be regenerated when you run 'make'); +# (2) otherwise, pass the desired values on the 'make' command line. +$(am__recursive_targets): + @fail=; \ + if $(am__make_keepgoing); then \ + failcom='fail=yes'; \ + else \ + failcom='exit 1'; \ + fi; \ + dot_seen=no; \ + target=`echo $@ | sed s/-recursive//`; \ + case "$@" in \ + distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ + *) list='$(SUBDIRS)' ;; \ + esac; \ + for subdir in $$list; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + dot_seen=yes; \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || eval $$failcom; \ + done; \ + if test "$$dot_seen" = "no"; then \ + $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ + fi; test -z "$$fail" + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-recursive +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ + include_option=--etags-include; \ + empty_fix=.; \ + else \ + include_option=--include; \ + empty_fix=; \ + fi; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + test ! -f $$subdir/TAGS || \ + set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ + fi; \ + done; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-recursive + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-recursive + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done + @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + $(am__make_dryrun) \ + || test -d "$(distdir)/$$subdir" \ + || $(MKDIR_P) "$(distdir)/$$subdir" \ + || exit 1; \ + dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ + $(am__relativize); \ + new_distdir=$$reldir; \ + dir1=$$subdir; dir2="$(top_distdir)"; \ + $(am__relativize); \ + new_top_distdir=$$reldir; \ + echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ + echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ + ($(am__cd) $$subdir && \ + $(MAKE) $(AM_MAKEFLAGS) \ + top_distdir="$$new_top_distdir" \ + distdir="$$new_distdir" \ + am__remove_distdir=: \ + am__skip_length_check=: \ + am__skip_mode_fix=: \ + distdir) \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-recursive +all-am: Makefile $(DATA) all-local +installdirs: installdirs-recursive +installdirs-am: + for dir in "$(DESTDIR)$(readmedir)" "$(DESTDIR)$(txtdir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-recursive +install-exec: install-exec-recursive +install-data: install-data-recursive +uninstall: uninstall-recursive + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-recursive +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + -test -z "$(DISTCLEANFILES)" || rm -f $(DISTCLEANFILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-recursive + +clean-am: clean-generic clean-libtool mostlyclean-am + +distclean: distclean-recursive + -rm -f Makefile +distclean-am: clean-am distclean-generic distclean-tags + +dvi: dvi-recursive + +dvi-am: + +html: html-recursive + +html-am: + +info: info-recursive + +info-am: + +install-data-am: install-readmeDATA install-txtDATA + +install-dvi: install-dvi-recursive + +install-dvi-am: + +install-exec-am: + +install-html: install-html-recursive + +install-html-am: + +install-info: install-info-recursive + +install-info-am: + +install-man: + +install-pdf: install-pdf-recursive + +install-pdf-am: + +install-ps: install-ps-recursive + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-recursive + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-recursive + +mostlyclean-am: mostlyclean-generic mostlyclean-libtool + +pdf: pdf-recursive + +pdf-am: + +ps: ps-recursive + +ps-am: + +uninstall-am: uninstall-readmeDATA uninstall-txtDATA + +.MAKE: $(am__recursive_targets) install-am install-strip + +.PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am all-local \ + check check-am clean clean-generic clean-libtool cscopelist-am \ + ctags ctags-am distclean distclean-generic distclean-libtool \ + distclean-tags distdir dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-html \ + install-html-am install-info install-info-am install-man \ + install-pdf install-pdf-am install-ps install-ps-am \ + install-readmeDATA install-strip install-txtDATA installcheck \ + installcheck-am installdirs installdirs-am maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-generic \ + mostlyclean-libtool pdf pdf-am ps ps-am tags tags-am uninstall \ + uninstall-am uninstall-readmeDATA uninstall-txtDATA + +.PRECIOUS: Makefile + +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@.cu.o: +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ @$(MKDIR_P) `dirname $@` +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ $(V_mynvcc)grep 'extern *"C" *void *' $< | sed -ne 's/extern *"C" *void *\([a-zA-Z0-9_]*\) *(.*/void \1(void) {}/p' | $(CC) -x c - -o $@ -c + +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.cubin: +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) -cubin $< -o $@ $(NVCCFLAGS) + +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.o: +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) $< -c -o $@ $(NVCCFLAGS) +@STARPU_USE_HIP_TRUE@.hip.o: +@STARPU_USE_HIP_TRUE@ $(V_hipcc) $(HIPCC) $< -c -o $@ $(HIPCCFLAGS) + +recheck: + -cat /dev/null + +showcheckfailed: + @-cat /dev/null + +showfailed: + @-cat /dev/null + +showcheck: + -cat /dev/null + +showsuite: + -cat /dev/null + +all-local: README.org +README.org: + $(top_srcdir)/doc/extractHeadline.sh + $(top_srcdir)/doc/fixLinks.sh $(top_builddir)/doc + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/doc/doxy.mk b/doc/doxy.mk new file mode 100644 index 0000000..e1568b6 --- /dev/null +++ b/doc/doxy.mk @@ -0,0 +1,152 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +DOXYGEN = doxygen +PDFLATEX = pdflatex +MAKEINDEX = makeindex + +txtdir = $(docdir)/manual + +EXTRA_DIST = + +if STARPU_BUILD_DOC +if STARPU_BUILD_DOC_PDF +all: $(DOX_HTML_DIR) $(DOX_DIR)/$(DOX_PDF) +EXTRA_DIST += $(DOX_HTML_DIR) $(DOX_DIR)/$(DOX_PDF) +txt_DATA = $(DOX_DIR)/$(DOX_PDF) +else +all: $(DOX_HTML_DIR) +EXTRA_DIST += $(DOX_HTML_DIR) +endif # STARPU_BUILD_DOC_PDF +DOX_HTML_SRCDIR=$(DOX_HTML_DIR) +install-exec-hook: $(DOX_HTML_DIR) + @$(MKDIR_P) $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) + @(cd $(DOX_HTML_SRCDIR) && $(PROG_FIND) . -type f -exec $(INSTALL_DATA) {} $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) \;) +uninstall-hook: + @rm -rf $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) +else +if STARPU_AVAILABLE_DOC +EXTRA_DIST += $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$(DOX_HTML_DIR) +DOX_HTML_SRCDIR=$(top_srcdir)/doc/$(DOX_MAIN_DIR)/$(DOX_HTML_DIR) +install-exec-hook: + @$(MKDIR_P) $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) + @(cd $(DOX_HTML_SRCDIR) && $(PROG_FIND) . -type f -exec $(INSTALL_DATA) {} $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) \;) +uninstall-hook: + @rm -rf $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) +endif # STARPU_AVAILABLE_DOC +if STARPU_AVAILABLE_DOC_PDF +EXTRA_DIST += $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$(DOX_PDF) +txt_DATA = $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$(DOX_PDF) +endif # STARPU_AVAILABLE_DOC_PDF +endif # STARPU_BUILD_DOC + +if STARPU_BUILD_DOC +EXTRA_DIST += \ + $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty \ + $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.html + +chapters/version.sty: $(chapters) + $(MKDIR_P) $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters + @for f in $(chapters) ; do \ + if test -f $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$$f ; then $(PROG_STAT) --format=%Y $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$$f ; fi \ + done | sort -r | head -1 > timestamp_sty + @if test -s timestamp_sty ; then \ + LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_sty` +"%F" > timestamp_sty_updated ;\ + LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_sty` +"%B %Y" > timestamp_sty_updated_month ;\ + fi + @if test -s timestamp_sty_updated ; then \ + echo ':newcommand{:STARPUUPDATED}{'`cat timestamp_sty_updated`'}' > $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty;\ + else \ + echo ':newcommand{:STARPUUPDATED}{unknown date}' > $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty;\ + fi + @echo ':newcommand{:STARPUVERSION}{$(VERSION)}' >> $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty + @$(SED) -i 's/:/\\/g' $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty + @for f in timestamp_sty timestamp_sty_updated timestamp_sty_updated_month ; do \ + if test -f $$f ; then $(RM) $$f ; fi ;\ + done + +chapters/version.html: $(chapters) $(images) + @for f in $(chapters) ; do \ + if test -f $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$$f ; then $(PROG_STAT) --format=%Y $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$$f ; fi \ + done | sort -r | head -1 > timestamp_html + @if test -s timestamp_html ; then \ + LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_html` +"%F" > timestamp_html_updated ;\ + LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_html` +"%B %Y" > timestamp_html_updated_month ;\ + fi + @echo "This manual documents the version $(VERSION) of StarPU." > $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.html + @if test -s timestamp_html_updated ; then \ + echo "Its contents was last updated on "`cat timestamp_html_updated`"." >> $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.html;\ + else \ + echo "Its contents was last updated on unknown_date." >> $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.html;\ + fi + @for f in timestamp_html timestamp_html_updated timestamp_html_updated_month ; do \ + if test -f $$f ; then $(RM) $$f ; fi ;\ + done + +doxy: + @rm -fr $(DOX_HTML_DIR) $(DOX_LATEX_DIR) + @$(DOXYGEN) $(DOX_CONFIG) + +$(DOX_HTML_DIR): $(DOX_TAG) + @$(MKDIR_P) $(DOX_HTML_DIR) + +$(DOX_TAG): $(dox_inputs) + @rm -fr $(DOX_HTML_DIR) $(DOX_LATEX_DIR) + @$(DOXYGEN) $(DOX_CONFIG) + @if test -f $(DOX_HTML_DIR)/DocOrganization.html ; then $(SED) -i 's/ModuleDocumentation <\/li>/Modules<\/a>/' $(DOX_HTML_DIR)/DocOrganization.html ; fi + @if test -f $(DOX_HTML_DIR)/DocOrganization.html ; then $(SED) -i 's/FileDocumentation <\/li>/Files<\/a>/' $(DOX_HTML_DIR)/DocOrganization.html ; fi + # comment for the line below: what we really want to do is to remove the line, but dy doing so, it avoids opening the interactive menu when browsing files + @if test -f $(DOX_HTML_DIR)/navtreedata.js ; then $(SED) -i 's/\[ "Files", "Files.html", null \]/\[ "", "Files.html", null \]/' $(DOX_HTML_DIR)/navtreedata.js ; fi + @$(SED) -i 's/.*"Files.html".*//' $(DOX_HTML_DIR)/pages.html + @if test -f $(DOX_LATEX_DIR)/main.tex ; then mv $(DOX_LATEX_DIR)/main.tex $(DOX_LATEX_DIR)/index.tex ; fi + @if test -f $(DOX_LATEX_DIR)/refman.tex ; then $(SED) -i '/\\begin{titlepage}/,$$d' $(DOX_LATEX_DIR)/refman.tex ; fi + @if test -f $(DOX_LATEX_DIR)/refman.tex ; then cat $(top_srcdir)/doc/$(DOX_MAIN_DIR)/refman.tex >> $(DOX_LATEX_DIR)/refman.tex ; fi + $(top_srcdir)/doc/sectionNumbering.py $(top_builddir)/doc/$(DOX_MAIN_DIR) $(DOX_HTML_DIR) + +$(DOX_DIR)/$(DOX_PDF): $(DOX_TAG) refman.tex $(images) + $(MKDIR_P) $(DOX_LATEX_DIR) + @cp $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty $(DOX_LATEX_DIR) + @cp $(top_srcdir)/doc/title.tex $(DOX_LATEX_DIR) + @if test -f $(top_srcdir)/doc/$(DOX_MAIN_DIR)/modules.tex ; then cp $(top_srcdir)/doc/$(DOX_MAIN_DIR)/modules.tex $(DOX_LATEX_DIR) ; fi + @echo $(PDFLATEX) $(DOX_LATEX_DIR)/refman.tex + @cd $(DOX_LATEX_DIR) ;\ + rm -f *.aux *.toc *.idx *.ind *.ilg *.log *.out ;\ + for f in group__API__* ; do sed -i '1 i \\\clearpage' $$f ; done ;\ + if test -f ExecutionConfigurationThroughEnvironmentVariables.tex ; then $(SED) -i -e 's/__env__/\\_Environment Variables!/' -e 's/\\-\\_\\-\\-\\_\\-env\\-\\_\\-\\-\\_\\-//' ExecutionConfigurationThroughEnvironmentVariables.tex ; fi ;\ + if test -f CompilationConfiguration.tex ; then $(SED) -i -e 's/__configure__/\\_Configure Options!/' -e 's/\\-\\_\\-\\-\\_\\-configure\\-\\_\\-\\-\\_\\-//' CompilationConfiguration.tex ; fi ;\ + if test -f DocOrganization.tex ; then $(SED) -i s'/\\item Module\\.Documentation/\\item \\hyperlink{ModuleDocumentation}{Module Documentation}/' DocOrganization.tex ; fi ;\ + if test -f DocOrganization.tex ; then $(SED) -i s'/\\item File\\.Documentation/\\item \\hyperlink{FileDocumentation}{File Documentation}/' DocOrganization.tex ; fi ;\ + max_print_line=1000000 $(PDFLATEX) -interaction batchmode refman.tex ;\ + ! < refman.log grep -v group__ | grep -v _amgrp | grep -v deprecated__ | grep "multiply defined" || exit 1 ;\ + $(MAKEINDEX) refman.idx ;\ + max_print_line=1000000 $(PDFLATEX) -interaction batchmode refman.tex ;\ + for i in $(shell seq 1 5); do \ + if $(EGREP) 'Rerun (LaTeX|to get cross-references right)' refman.log > /dev/null 2>&1; then \ + max_print_line=1000000 $(PDFLATEX) -interaction batchmode refman.tex; \ + else \ + break ; \ + fi; \ + done + mv $(DOX_LATEX_DIR)/refman.pdf $(DOX_DIR)/$(DOX_PDF) + +CLEANFILES = $(DOX_TAG) $(DOX_STARPU_CONFIG) \ + -r \ + $(DOX_HTML_DIR) \ + $(DOX_LATEX_DIR) \ + $(DOX_DIR)/$(DOX_PDF) + +endif + +EXTRA_DIST += refman.tex $(chapters) $(images) diff --git a/doc/doxygen.cfg b/doc/doxygen.cfg new file mode 100644 index 0000000..82d8e36 --- /dev/null +++ b/doc/doxygen.cfg @@ -0,0 +1,1918 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# Copyright (C) 2011-2011 Télécom Sud Paris +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +# Doxyfile 1.8.3.1 + +# This file describes the settings to be used by the documentation system +# doxygen (www.doxygen.org) for a project. +# +# All text after a hash (#) is considered a comment and will be ignored. +# The format is: +# TAG = value [value, ...] +# For lists items can also be appended using: +# TAG += value [value, ...] +# Values that contain spaces should be placed between quotes (" "). + +# We include a file here that is generated by StarPU's configure +# script. This file will contain some configure-set values, such as +# version, source dir, etc. + +@INCLUDE = doxygen-config.cfg +@INCLUDE = doxygen-config-include.cfg + +#--------------------------------------------------------------------------- +# Project related configuration options +#--------------------------------------------------------------------------- + +# This tag specifies the encoding used for all characters in the config file +# that follow. The default is UTF-8 which is also the encoding used for all +# text before the first occurrence of this tag. Doxygen uses libiconv (or the +# iconv built into libc) for the transcoding. See +# http://www.gnu.org/software/libiconv for the list of possible encodings. + +DOXYFILE_ENCODING = UTF-8 + +# The PROJECT_NAME tag is a single word (or sequence of words) that should +# identify the project. Note that if you do not use Doxywizard you need +# to put quotes around the project name if it contains spaces. + +#PROJECT_NAME = + +# The PROJECT_NUMBER tag can be used to enter a project or revision number. +# This could be handy for archiving the generated documentation or +# if some version control system is used. + +PROJECT_NUMBER = + +# Using the PROJECT_BRIEF tag one can provide an optional one line description +# for a project that appears at the top of each page and should give viewer +# a quick idea about the purpose of the project. Keep the description short. + +PROJECT_BRIEF = + +# With the PROJECT_LOGO tag one can specify an logo or icon that is +# included in the documentation. The maximum height of the logo should not +# exceed 55 pixels and the maximum width should not exceed 200 pixels. +# Doxygen will copy the logo to the output directory. + +PROJECT_LOGO = + +# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) +# base path where the generated documentation will be put. +# If a relative path is entered, it will be relative to the location +# where doxygen was started. If left blank the current directory will be used. + +OUTPUT_DIRECTORY = + +# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create +# 4096 sub-directories (in 2 levels) under the output directory of each output +# format and will distribute the generated files over these directories. +# Enabling this option can be useful when feeding doxygen a huge amount of +# source files, where putting all generated files in the same directory would +# otherwise cause performance problems for the file system. + +CREATE_SUBDIRS = NO + +# The OUTPUT_LANGUAGE tag is used to specify the language in which all +# documentation generated by doxygen is written. Doxygen will use this +# information to generate all constant output in the proper language. +# The default language is English, other supported languages are: +# Afrikaans, Arabic, Brazilian, Catalan, Chinese, Chinese-Traditional, +# Croatian, Czech, Danish, Dutch, Esperanto, Farsi, Finnish, French, German, +# Greek, Hungarian, Italian, Japanese, Japanese-en (Japanese with English +# messages), Korean, Korean-en, Lithuanian, Norwegian, Macedonian, Persian, +# Polish, Portuguese, Romanian, Russian, Serbian, Serbian-Cyrillic, Slovak, +# Slovene, Spanish, Swedish, Ukrainian, and Vietnamese. + +OUTPUT_LANGUAGE = English + +# If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will +# include brief member descriptions after the members that are listed in +# the file and class documentation (similar to JavaDoc). +# Set to NO to disable this. + +BRIEF_MEMBER_DESC = YES + +# If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend +# the brief description of a member or function before the detailed description. +# Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the +# brief descriptions will be completely suppressed. + +REPEAT_BRIEF = YES + +# This tag implements a quasi-intelligent brief description abbreviator +# that is used to form the text in various listings. Each string +# in this list, if found as the leading text of the brief description, will be +# stripped from the text and the result after processing the whole list, is +# used as the annotated text. Otherwise, the brief description is used as-is. +# If left blank, the following values are used ("$name" is automatically +# replaced with the name of the entity): "The $name class" "The $name widget" +# "The $name file" "is" "provides" "specifies" "contains" +# "represents" "a" "an" "the" + +ABBREVIATE_BRIEF = + +# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then +# Doxygen will generate a detailed section even if there is only a brief +# description. + +ALWAYS_DETAILED_SEC = NO + +# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all +# inherited members of a class in the documentation of that class as if those +# members were ordinary class members. Constructors, destructors and assignment +# operators of the base classes will not be shown. + +INLINE_INHERITED_MEMB = NO + +# If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full +# path before files name in the file list and in the header files. If set +# to NO the shortest path that makes the file name unique will be used. + +FULL_PATH_NAMES = NO + +# If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag +# can be used to strip a user-defined part of the path. Stripping is +# only done if one of the specified strings matches the left-hand part of +# the path. The tag can be used to show relative paths in the file list. +# If left blank the directory from which doxygen is run is used as the +# path to strip. Note that you specify absolute paths here, but also +# relative paths, which will be relative from the directory where doxygen is +# started. + +STRIP_FROM_PATH = + +# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of +# the path mentioned in the documentation of a class, which tells +# the reader which header file to include in order to use a class. +# If left blank only the name of the header file containing the class +# definition is used. Otherwise one should specify the include paths that +# are normally passed to the compiler using the -I flag. + +STRIP_FROM_INC_PATH = + +# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter +# (but less readable) file names. This can be useful if your file system +# doesn't support long names like on DOS, Mac, or CD-ROM. + +SHORT_NAMES = NO + +# If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen +# will interpret the first line (until the first dot) of a JavaDoc-style +# comment as the brief description. If set to NO, the JavaDoc +# comments will behave just like regular Qt-style comments +# (thus requiring an explicit @brief command for a brief description.) + +JAVADOC_AUTOBRIEF = NO + +# If the QT_AUTOBRIEF tag is set to YES then Doxygen will +# interpret the first line (until the first dot) of a Qt-style +# comment as the brief description. If set to NO, the comments +# will behave just like regular Qt-style comments (thus requiring +# an explicit \brief command for a brief description.) + +QT_AUTOBRIEF = NO + +# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen +# treat a multi-line C++ special comment block (i.e. a block of //! or /// +# comments) as a brief description. This used to be the default behaviour. +# The new default is to treat a multi-line C++ comment block as a detailed +# description. Set this tag to YES if you prefer the old behaviour instead. + +MULTILINE_CPP_IS_BRIEF = NO + +# If the INHERIT_DOCS tag is set to YES (the default) then an undocumented +# member inherits the documentation from any documented member that it +# re-implements. + +INHERIT_DOCS = YES + +# If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce +# a new page for each member. If set to NO, the documentation of a member will +# be part of the file/class/namespace that contains it. + +SEPARATE_MEMBER_PAGES = NO + +# The TAB_SIZE tag can be used to set the number of spaces in a tab. +# Doxygen uses this value to replace tabs by spaces in code fragments. + +TAB_SIZE = 8 + +# This tag can be used to specify a number of aliases that acts +# as commands in the documentation. An alias has the form "name=value". +# For example adding "sideeffect=\par Side Effects:\n" will allow you to +# put the command \sideeffect (or @sideeffect) in the documentation, which +# will result in a user-defined paragraph with heading "Side Effects:". +# You can put \n's in the value part of an alias to insert newlines. + +#ALIASES += + +# This tag can be used to specify a number of word-keyword mappings (TCL only). +# A mapping has the form "name=value". For example adding +# "class=itcl::class" will allow you to use the command class in the +# itcl::class meaning. + +TCL_SUBST = + +# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C +# sources only. Doxygen will then generate output that is more tailored for C. +# For instance, some of the names that are used will be different. The list +# of all members will be omitted, etc. + +OPTIMIZE_OUTPUT_FOR_C = YES + +# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java +# sources only. Doxygen will then generate output that is more tailored for +# Java. For instance, namespaces will be presented as packages, qualified +# scopes will look different, etc. + +OPTIMIZE_OUTPUT_JAVA = NO + +# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran +# sources only. Doxygen will then generate output that is more tailored for +# Fortran. + +OPTIMIZE_FOR_FORTRAN = NO + +# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL +# sources. Doxygen will then generate output that is tailored for +# VHDL. + +OPTIMIZE_OUTPUT_VHDL = NO + +# Doxygen selects the parser to use depending on the extension of the files it +# parses. With this tag you can assign which parser to use for a given +# extension. Doxygen has a built-in mapping, but you can override or extend it +# using this tag. The format is ext=language, where ext is a file extension, +# and language is one of the parsers supported by doxygen: IDL, Java, +# Javascript, CSharp, C, C++, D, PHP, Objective-C, Python, Fortran, VHDL, C, +# C++. For instance to make doxygen treat .inc files as Fortran files (default +# is PHP), and .f files as C (default is Fortran), use: inc=Fortran f=C. Note +# that for custom extensions you also need to set FILE_PATTERNS otherwise the +# files are not read by doxygen. + +EXTENSION_MAPPING = + +# If MARKDOWN_SUPPORT is enabled (the default) then doxygen pre-processes all +# comments according to the Markdown format, which allows for more readable +# documentation. See http://daringfireball.net/projects/markdown/ for details. +# The output of markdown processing is further processed by doxygen, so you +# can mix doxygen, HTML, and XML commands with Markdown formatting. +# Disable only in case of backward compatibilities issues. + +MARKDOWN_SUPPORT = YES + +# When enabled doxygen tries to link words that correspond to documented classes, +# or namespaces to their corresponding documentation. Such a link can be +# prevented in individual cases by by putting a % sign in front of the word or +# globally by setting AUTOLINK_SUPPORT to NO. + +AUTOLINK_SUPPORT = YES + +# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want +# to include (a tag file for) the STL sources as input, then you should +# set this tag to YES in order to let doxygen match functions declarations and +# definitions whose arguments contain STL classes (e.g. func(std::string); v.s. +# func(std::string) {}). This also makes the inheritance and collaboration +# diagrams that involve STL classes more complete and accurate. + +BUILTIN_STL_SUPPORT = NO + +# If you use Microsoft's C++/CLI language, you should set this option to YES to +# enable parsing support. + +CPP_CLI_SUPPORT = NO + +# Set the SIP_SUPPORT tag to YES if your project consists of sip sources only. +# Doxygen will parse them like normal C++ but will assume all classes use public +# instead of private inheritance when no explicit protection keyword is present. + +SIP_SUPPORT = NO + +# For Microsoft's IDL there are propget and propput attributes to indicate +# getter and setter methods for a property. Setting this option to YES (the +# default) will make doxygen replace the get and set methods by a property in +# the documentation. This will only work if the methods are indeed getting or +# setting a simple type. If this is not the case, or you want to show the +# methods anyway, you should set this option to NO. + +IDL_PROPERTY_SUPPORT = YES + +# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC +# tag is set to YES, then doxygen will reuse the documentation of the first +# member in the group (if any) for the other members of the group. By default +# all members of a group must be documented explicitly. + +DISTRIBUTE_GROUP_DOC = NO + +# Set the SUBGROUPING tag to YES (the default) to allow class member groups of +# the same type (for instance a group of public functions) to be put as a +# subgroup of that type (e.g. under the Public Functions section). Set it to +# NO to prevent subgrouping. Alternatively, this can be done per class using +# the \nosubgrouping command. + +SUBGROUPING = YES + +# When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and +# unions are shown inside the group in which they are included (e.g. using +# @ingroup) instead of on a separate page (for HTML and Man pages) or +# section (for LaTeX and RTF). + +INLINE_GROUPED_CLASSES = YES + +# When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and +# unions with only public data fields will be shown inline in the documentation +# of the scope in which they are defined (i.e. file, namespace, or group +# documentation), provided this scope is documented. If set to NO (the default), +# structs, classes, and unions are shown on a separate page (for HTML and Man +# pages) or section (for LaTeX and RTF). + +INLINE_SIMPLE_STRUCTS = YES + +# When TYPEDEF_HIDES_STRUCT is enabled, a typedef of a struct, union, or enum +# is documented as struct, union, or enum with the name of the typedef. So +# typedef struct TypeS {} TypeT, will appear in the documentation as a struct +# with name TypeT. When disabled the typedef will appear as a member of a file, +# namespace, or class. And the struct will be named TypeS. This can typically +# be useful for C code in case the coding convention dictates that all compound +# types are typedef'ed and only the typedef is referenced, never the tag name. + +TYPEDEF_HIDES_STRUCT = NO + +# The SYMBOL_CACHE_SIZE determines the size of the internal cache use to +# determine which symbols to keep in memory and which to flush to disk. +# When the cache is full, less often used symbols will be written to disk. +# For small to medium size projects (<1000 input files) the default value is +# probably good enough. For larger projects a too small cache size can cause +# doxygen to be busy swapping symbols to and from disk most of the time +# causing a significant performance penalty. +# If the system has enough physical memory increasing the cache will improve the +# performance by keeping more symbols in memory. Note that the value works on +# a logarithmic scale so increasing the size by one will roughly double the +# memory usage. The cache size is given by this formula: +# 2^(16+SYMBOL_CACHE_SIZE). The valid range is 0..9, the default is 0, +# corresponding to a cache size of 2^16 = 65536 symbols. + +#SYMBOL_CACHE_SIZE = 0 + +# Similar to the SYMBOL_CACHE_SIZE the size of the symbol lookup cache can be +# set using LOOKUP_CACHE_SIZE. This cache is used to resolve symbols given +# their name and scope. Since this can be an expensive process and often the +# same symbol appear multiple times in the code, doxygen keeps a cache of +# pre-resolved symbols. If the cache is too small doxygen will become slower. +# If the cache is too large, memory is wasted. The cache size is given by this +# formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range is 0..9, the default is 0, +# corresponding to a cache size of 2^16 = 65536 symbols. + +LOOKUP_CACHE_SIZE = 0 + +#--------------------------------------------------------------------------- +# Build related configuration options +#--------------------------------------------------------------------------- + +# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in +# documentation are documented, even if no documentation was available. +# Private class members and static file members will be hidden unless +# the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES + +EXTRACT_ALL = NO + +# If the EXTRACT_PRIVATE tag is set to YES all private members of a class +# will be included in the documentation. + +EXTRACT_PRIVATE = YES + +# If the EXTRACT_PACKAGE tag is set to YES all members with package or internal +# scope will be included in the documentation. + +EXTRACT_PACKAGE = NO + +# If the EXTRACT_STATIC tag is set to YES all static members of a file +# will be included in the documentation. + +EXTRACT_STATIC = YES + +# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs) +# defined locally in source files will be included in the documentation. +# If set to NO only classes defined in header files are included. + +EXTRACT_LOCAL_CLASSES = YES + +# This flag is only useful for Objective-C code. When set to YES local +# methods, which are defined in the implementation section but not in +# the interface are included in the documentation. +# If set to NO (the default) only methods in the interface are included. + +EXTRACT_LOCAL_METHODS = NO + +# If this flag is set to YES, the members of anonymous namespaces will be +# extracted and appear in the documentation as a namespace called +# 'anonymous_namespace{file}', where file will be replaced with the base +# name of the file that contains the anonymous namespace. By default +# anonymous namespaces are hidden. + +EXTRACT_ANON_NSPACES = NO + +# If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all +# undocumented members of documented classes, files or namespaces. +# If set to NO (the default) these members will be included in the +# various overviews, but no documentation section is generated. +# This option has no effect if EXTRACT_ALL is enabled. + +HIDE_UNDOC_MEMBERS = NO + +# If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all +# undocumented classes that are normally visible in the class hierarchy. +# If set to NO (the default) these classes will be included in the various +# overviews. This option has no effect if EXTRACT_ALL is enabled. + +HIDE_UNDOC_CLASSES = NO + +# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all +# friend (class|struct|union) declarations. +# If set to NO (the default) these declarations will be included in the +# documentation. + +HIDE_FRIEND_COMPOUNDS = NO + +# If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any +# documentation blocks found inside the body of a function. +# If set to NO (the default) these blocks will be appended to the +# function's detailed documentation block. + +HIDE_IN_BODY_DOCS = NO + +# The INTERNAL_DOCS tag determines if documentation +# that is typed after a \internal command is included. If the tag is set +# to NO (the default) then the documentation will be excluded. +# Set it to YES to include the internal documentation. + +INTERNAL_DOCS = NO + +# If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate +# file names in lower-case letters. If set to YES upper-case letters are also +# allowed. This is useful if you have classes or files whose names only differ +# in case and if your file system supports case sensitive file names. Windows +# and Mac users are advised to set this option to NO. + +CASE_SENSE_NAMES = YES + +# If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen +# will show members with their full class and namespace scopes in the +# documentation. If set to YES the scope will be hidden. + +HIDE_SCOPE_NAMES = NO + +# If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen +# will put a list of the files that are included by a file in the documentation +# of that file. + +SHOW_INCLUDE_FILES = YES + +# If the FORCE_LOCAL_INCLUDES tag is set to YES then Doxygen +# will list include files with double quotes in the documentation +# rather than with sharp brackets. + +FORCE_LOCAL_INCLUDES = NO + +# If the INLINE_INFO tag is set to YES (the default) then a tag [inline] +# is inserted in the documentation for inline members. + +INLINE_INFO = YES + +# If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen +# will sort the (detailed) documentation of file and class members +# alphabetically by member name. If set to NO the members will appear in +# declaration order. + +SORT_MEMBER_DOCS = NO + +# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the +# brief documentation of file, namespace and class members alphabetically +# by member name. If set to NO (the default) the members will appear in +# declaration order. + +SORT_BRIEF_DOCS = NO + +# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen +# will sort the (brief and detailed) documentation of class members so that +# constructors and destructors are listed first. If set to NO (the default) +# the constructors will appear in the respective orders defined by +# SORT_MEMBER_DOCS and SORT_BRIEF_DOCS. +# This tag will be ignored for brief docs if SORT_BRIEF_DOCS is set to NO +# and ignored for detailed docs if SORT_MEMBER_DOCS is set to NO. + +SORT_MEMBERS_CTORS_1ST = NO + +# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the +# hierarchy of group names into alphabetical order. If set to NO (the default) +# the group names will appear in their defined order. + +SORT_GROUP_NAMES = NO + +# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be +# sorted by fully-qualified names, including namespaces. If set to +# NO (the default), the class list will be sorted only by class name, +# not including the namespace part. +# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. +# Note: This option applies only to the class list, not to the +# alphabetical list. + +SORT_BY_SCOPE_NAME = NO + +# If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to +# do proper type resolution of all parameters of a function it will reject a +# match between the prototype and the implementation of a member function even +# if there is only one candidate or it is obvious which candidate to choose +# by doing a simple string match. By disabling STRICT_PROTO_MATCHING doxygen +# will still accept a match between prototype and implementation in such cases. + +STRICT_PROTO_MATCHING = NO + +# The GENERATE_TODOLIST tag can be used to enable (YES) or +# disable (NO) the todo list. This list is created by putting \todo +# commands in the documentation. + +GENERATE_TODOLIST = YES + +# The GENERATE_TESTLIST tag can be used to enable (YES) or +# disable (NO) the test list. This list is created by putting \test +# commands in the documentation. + +GENERATE_TESTLIST = YES + +# The GENERATE_BUGLIST tag can be used to enable (YES) or +# disable (NO) the bug list. This list is created by putting \bug +# commands in the documentation. + +GENERATE_BUGLIST = YES + +# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or +# disable (NO) the deprecated list. This list is created by putting +# \deprecated commands in the documentation. + +GENERATE_DEPRECATEDLIST= YES + +# The ENABLED_SECTIONS tag can be used to enable conditional +# documentation sections, marked by \if section-label ... \endif +# and \cond section-label ... \endcond blocks. + +ENABLED_SECTIONS = + +# The MAX_INITIALIZER_LINES tag determines the maximum number of lines +# the initial value of a variable or macro consists of for it to appear in +# the documentation. If the initializer consists of more lines than specified +# here it will be hidden. Use a value of 0 to hide initializers completely. +# The appearance of the initializer of individual variables and macros in the +# documentation can be controlled using \showinitializer or \hideinitializer +# command in the documentation regardless of this setting. + +MAX_INITIALIZER_LINES = 0 + +# Set the SHOW_USED_FILES tag to NO to disable the list of files generated +# at the bottom of the documentation of classes and structs. If set to YES the +# list will mention the files that were used to generate the documentation. + +SHOW_USED_FILES = YES + +# Set the SHOW_FILES tag to NO to disable the generation of the Files page. +# This will remove the Files entry from the Quick Index and from the +# Folder Tree View (if specified). The default is YES. + +SHOW_FILES = YES + +# Set the SHOW_NAMESPACES tag to NO to disable the generation of the +# Namespaces page. +# This will remove the Namespaces entry from the Quick Index +# and from the Folder Tree View (if specified). The default is YES. + +SHOW_NAMESPACES = YES + +# The FILE_VERSION_FILTER tag can be used to specify a program or script that +# doxygen should invoke to get the current version for each file (typically from +# the version control system). Doxygen will invoke the program by executing (via +# popen()) the command , where is the value of +# the FILE_VERSION_FILTER tag, and is the name of an input file +# provided by doxygen. Whatever the program writes to standard output +# is used as the file version. See the manual for examples. + +FILE_VERSION_FILTER = + +# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed +# by doxygen. The layout file controls the global structure of the generated +# output files in an output format independent way. To create the layout file +# that represents doxygen's defaults, run doxygen with the -l option. +# You can optionally specify a file name after the option, if omitted +# DoxygenLayout.xml will be used as the name of the layout file. + +LAYOUT_FILE = + +# The CITE_BIB_FILES tag can be used to specify one or more bib files +# containing the references data. This must be a list of .bib files. The +# .bib extension is automatically appended if omitted. Using this command +# requires the bibtex tool to be installed. See also +# http://en.wikipedia.org/wiki/BibTeX for more info. For LaTeX the style +# of the bibliography can be controlled using LATEX_BIB_STYLE. To use this +# feature you need bibtex and perl available in the search path. Do not use +# file names with spaces, bibtex cannot handle them. + +CITE_BIB_FILES = + +#--------------------------------------------------------------------------- +# configuration options related to warning and progress messages +#--------------------------------------------------------------------------- + +# The QUIET tag can be used to turn on/off the messages that are generated +# by doxygen. Possible values are YES and NO. If left blank NO is used. + +QUIET = YES + +# The WARNINGS tag can be used to turn on/off the warning messages that are +# generated by doxygen. Possible values are YES and NO. If left blank +# NO is used. + +WARNINGS = NO + +# If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings +# for undocumented members. If EXTRACT_ALL is set to YES then this flag will +# automatically be disabled. + +WARN_IF_UNDOCUMENTED = NO + +# If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for +# potential errors in the documentation, such as not documenting some +# parameters in a documented function, or documenting parameters that +# don't exist or using markup commands wrongly. + +WARN_IF_DOC_ERROR = YES + +# The WARN_NO_PARAMDOC option can be enabled to get warnings for +# functions that are documented, but have no documentation for their parameters +# or return value. If set to NO (the default) doxygen will only warn about +# wrong or incomplete parameter documentation, but not about the absence of +# documentation. + +WARN_NO_PARAMDOC = NO + +# The WARN_FORMAT tag determines the format of the warning messages that +# doxygen can produce. The string should contain the $file, $line, and $text +# tags, which will be replaced by the file and line number from which the +# warning originated and the warning text. Optionally the format may contain +# $version, which will be replaced by the version of the file (if it could +# be obtained via FILE_VERSION_FILTER) + +WARN_FORMAT = "$file:$line: $text" + +# The WARN_LOGFILE tag can be used to specify a file to which warning +# and error messages should be written. If left blank the output is written +# to stderr. + +WARN_LOGFILE = + +#--------------------------------------------------------------------------- +# configuration options related to the input files +#--------------------------------------------------------------------------- + +# The INPUT tag can be used to specify the files and/or directories that contain +# documented source files. You may enter file names like "myfile.cpp" or +# directories like "/usr/src/myproject". Separate the files or directories +# with spaces. + +#defined in doxygen-config.cfg +#INPUT = + +# This tag can be used to specify the character encoding of the source files +# that doxygen parses. Internally doxygen uses the UTF-8 encoding, which is +# also the default input encoding. Doxygen uses libiconv (or the iconv built +# into libc) for the transcoding. See http://www.gnu.org/software/libiconv for +# the list of possible encodings. + +INPUT_ENCODING = UTF-8 + +# If the value of the INPUT tag contains directories, you can use the +# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp +# and *.h) to filter out the source-files in the directories. If left +# blank the following patterns are tested: +# *.c *.cc *.cxx *.cpp *.c++ *.d *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh +# *.hxx *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.dox *.py +# *.f90 *.f *.for *.vhd *.vhdl + +FILE_PATTERNS = *.h *.doxy + +# The RECURSIVE tag can be used to turn specify whether or not subdirectories +# should be searched for input files as well. Possible values are YES and NO. +# If left blank NO is used. + +RECURSIVE = NO + +# The EXCLUDE tag can be used to specify files and/or directories that should be +# excluded from the INPUT source files. This way you can easily exclude a +# subdirectory from a directory tree whose root is specified with the INPUT tag. +# Note that relative paths are relative to the directory from which doxygen is +# run. + +EXCLUDE = + +# The EXCLUDE_SYMLINKS tag can be used to select whether or not files or +# directories that are symbolic links (a Unix file system feature) are excluded +# from the input. + +EXCLUDE_SYMLINKS = NO + +# If the value of the INPUT tag contains directories, you can use the +# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude +# certain files from those directories. Note that the wildcards are matched +# against the file with absolute path, so to exclude all test directories +# for example use the pattern */test/* + +EXCLUDE_PATTERNS = + +# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names +# (namespaces, classes, functions, etc.) that should be excluded from the +# output. The symbol name can be a fully qualified name, a word, or if the +# wildcard * is used, a substring. Examples: ANamespace, AClass, +# AClass::ANamespace, ANamespace::*Test + +EXCLUDE_SYMBOLS = + +# The EXAMPLE_PATH tag can be used to specify one or more files or +# directories that contain example code fragments that are included (see +# the \include command). + +#defined in doxygen-config.cfg +#EXAMPLE_PATH + +# If the value of the EXAMPLE_PATH tag contains directories, you can use the +# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp +# and *.h) to filter out the source-files in the directories. If left +# blank all files are included. + +EXAMPLE_PATTERNS = + +# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be +# searched for input files to be used with the \include or \dontinclude +# commands irrespective of the value of the RECURSIVE tag. +# Possible values are YES and NO. If left blank NO is used. + +EXAMPLE_RECURSIVE = NO + +# The IMAGE_PATH tag can be used to specify one or more files or +# directories that contain image that are included in the documentation (see +# the \image command). + +# From @INCLUDE, above +#IMAGE_PATH = + +# The INPUT_FILTER tag can be used to specify a program that doxygen should +# invoke to filter for each input file. Doxygen will invoke the filter program +# by executing (via popen()) the command , where +# is the value of the INPUT_FILTER tag, and is the name of an +# input file. Doxygen will then use the output that the filter program writes +# to standard output. +# If FILTER_PATTERNS is specified, this tag will be +# ignored. + +#defined in doxygen-config.cfg.in +#INPUT_FILTER + +# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern +# basis. +# Doxygen will compare the file name with each pattern and apply the +# filter if there is a match. +# The filters are a list of the form: +# pattern=filter (like *.cpp=my_cpp_filter). See INPUT_FILTER for further +# info on how filters are used. If FILTER_PATTERNS is empty or if +# non of the patterns match the file name, INPUT_FILTER is applied. + +FILTER_PATTERNS = + +# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using +# INPUT_FILTER) will be used to filter the input files when producing source +# files to browse (i.e. when SOURCE_BROWSER is set to YES). + +FILTER_SOURCE_FILES = NO + +# The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file +# pattern. A pattern will override the setting for FILTER_PATTERN (if any) +# and it is also possible to disable source filtering for a specific pattern +# using *.ext= (so without naming a filter). This option only has effect when +# FILTER_SOURCE_FILES is enabled. + +FILTER_SOURCE_PATTERNS = + +# If the USE_MD_FILE_AS_MAINPAGE tag refers to the name of a markdown file that +# is part of the input, its contents will be placed on the main page (index.html). +# This can be useful if you have a project on for instance GitHub and want reuse +# the introduction page also for the doxygen output. + +USE_MDFILE_AS_MAINPAGE = + +#--------------------------------------------------------------------------- +# configuration options related to source browsing +#--------------------------------------------------------------------------- + +# If the SOURCE_BROWSER tag is set to YES then a list of source files will +# be generated. Documented entities will be cross-referenced with these sources. +# Note: To get rid of all source code in the generated output, make sure also +# VERBATIM_HEADERS is set to NO. + +SOURCE_BROWSER = NO + +# Setting the INLINE_SOURCES tag to YES will include the body +# of functions and classes directly in the documentation. + +INLINE_SOURCES = NO + +# Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct +# doxygen to hide any special comment blocks from generated source code +# fragments. Normal C, C++ and Fortran comments will always remain visible. + +STRIP_CODE_COMMENTS = YES + +# If the REFERENCED_BY_RELATION tag is set to YES +# then for each documented function all documented +# functions referencing it will be listed. + +REFERENCED_BY_RELATION = NO + +# If the REFERENCES_RELATION tag is set to YES +# then for each documented function all documented entities +# called/used by that function will be listed. + +REFERENCES_RELATION = NO + +# If the REFERENCES_LINK_SOURCE tag is set to YES (the default) +# and SOURCE_BROWSER tag is set to YES, then the hyperlinks from +# functions in REFERENCES_RELATION and REFERENCED_BY_RELATION lists will +# link to the source code. +# Otherwise they will link to the documentation. + +REFERENCES_LINK_SOURCE = YES + +# If the USE_HTAGS tag is set to YES then the references to source code +# will point to the HTML generated by the htags(1) tool instead of doxygen +# built-in source browser. The htags tool is part of GNU's global source +# tagging system (see http://www.gnu.org/software/global/global.html). You +# will need version 4.8.6 or higher. + +USE_HTAGS = NO + +# If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen +# will generate a verbatim copy of the header file for each class for +# which an include is specified. Set to NO to disable this. + +VERBATIM_HEADERS = YES + +#--------------------------------------------------------------------------- +# configuration options related to the alphabetical class index +#--------------------------------------------------------------------------- + +# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index +# of all compounds will be generated. Enable this if the project +# contains a lot of classes, structs, unions or interfaces. + +ALPHABETICAL_INDEX = YES + +# If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then +# the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns +# in which this list will be split (can be a number in the range [1..20]) + +COLS_IN_ALPHA_INDEX = 5 + +# In case all classes in a project start with a common prefix, all +# classes will be put under the same header in the alphabetical index. +# The IGNORE_PREFIX tag can be used to specify one or more prefixes that +# should be ignored while generating the index headers. + +IGNORE_PREFIX = + +#--------------------------------------------------------------------------- +# configuration options related to the HTML output +#--------------------------------------------------------------------------- + +# If the GENERATE_HTML tag is set to YES (the default) Doxygen will +# generate HTML output. + +GENERATE_HTML = YES + +# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `html' will be used as the default path. + +#HTML_OUTPUT = + +# The HTML_FILE_EXTENSION tag can be used to specify the file extension for +# each generated HTML page (for example: .htm,.php,.asp). If it is left blank +# doxygen will generate files with .html extension. + +HTML_FILE_EXTENSION = .html + +# The HTML_HEADER tag can be used to specify a personal HTML header for +# each generated HTML page. If it is left blank doxygen will generate a +# standard header. Note that when using a custom header you are responsible +# for the proper inclusion of any scripts and style sheets that doxygen +# needs, which is dependent on the configuration options used. +# It is advised to generate a default header using "doxygen -w html +# header.html footer.html stylesheet.css YourConfigFile" and then modify +# that header. Note that the header is subject to change so you typically +# have to redo this when upgrading to a newer version of doxygen or when +# changing the value of configuration settings such as GENERATE_TREEVIEW! + +HTML_HEADER = + +# The HTML_FOOTER tag can be used to specify a personal HTML footer for +# each generated HTML page. If it is left blank doxygen will generate a +# standard footer. + +HTML_FOOTER = + +# The HTML_STYLESHEET tag can be used to specify a user-defined cascading +# style sheet that is used by each HTML page. It can be used to +# fine-tune the look of the HTML output. If left blank doxygen will +# generate a default style sheet. Note that it is recommended to use +# HTML_EXTRA_STYLESHEET instead of this one, as it is more robust and this +# tag will in the future become obsolete. + +HTML_STYLESHEET = + +# The HTML_EXTRA_STYLESHEET tag can be used to specify an additional +# user-defined cascading style sheet that is included after the standard +# style sheets created by doxygen. Using this option one can overrule +# certain style aspects. This is preferred over using HTML_STYLESHEET +# since it does not replace the standard style sheet and is therefore more +# robust against future updates. Doxygen will copy the style sheet file to +# the output directory. + +HTML_EXTRA_STYLESHEET = + +# The HTML_EXTRA_FILES tag can be used to specify one or more extra images or +# other source files which should be copied to the HTML output directory. Note +# that these files will be copied to the base HTML output directory. Use the +# $relpath$ marker in the HTML_HEADER and/or HTML_FOOTER files to load these +# files. In the HTML_STYLESHEET file, use the file name only. Also note that +# the files will be copied as-is; there are no commands or markers available. + +HTML_EXTRA_FILES = + +# The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. +# Doxygen will adjust the colors in the style sheet and background images +# according to this color. Hue is specified as an angle on a colorwheel, +# see http://en.wikipedia.org/wiki/Hue for more information. +# For instance the value 0 represents red, 60 is yellow, 120 is green, +# 180 is cyan, 240 is blue, 300 purple, and 360 is red again. +# The allowed range is 0 to 359. + +HTML_COLORSTYLE_HUE = 220 + +# The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of +# the colors in the HTML output. For a value of 0 the output will use +# grayscales only. A value of 255 will produce the most vivid colors. + +HTML_COLORSTYLE_SAT = 100 + +# The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to +# the luminance component of the colors in the HTML output. Values below +# 100 gradually make the output lighter, whereas values above 100 make +# the output darker. The value divided by 100 is the actual gamma applied, +# so 80 represents a gamma of 0.8, The value 220 represents a gamma of 2.2, +# and 100 does not change the gamma. + +HTML_COLORSTYLE_GAMMA = 80 + +# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML +# page will contain the date and time when the page was generated. Setting +# this to NO can help when comparing the output of multiple runs. + +HTML_TIMESTAMP = YES + +# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML +# documentation will contain sections that can be hidden and shown after the +# page has loaded. + +HTML_DYNAMIC_SECTIONS = YES + +# With HTML_INDEX_NUM_ENTRIES one can control the preferred number of +# entries shown in the various tree structured indices initially; the user +# can expand and collapse entries dynamically later on. Doxygen will expand +# the tree to such a level that at most the specified number of entries are +# visible (unless a fully collapsed tree already exceeds this amount). +# So setting the number of entries 1 will produce a full collapsed tree by +# default. 0 is a special value representing an infinite number of entries +# and will result in a full expanded tree by default. + +HTML_INDEX_NUM_ENTRIES = 100 + +# If the GENERATE_DOCSET tag is set to YES, additional index files +# will be generated that can be used as input for Apple's Xcode 3 +# integrated development environment, introduced with OSX 10.5 (Leopard). +# To create a documentation set, doxygen will generate a Makefile in the +# HTML output directory. Running make will produce the docset in that +# directory and running "make install" will install the docset in +# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find +# it at startup. +# See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html +# for more information. + +GENERATE_DOCSET = NO + +# When GENERATE_DOCSET tag is set to YES, this tag determines the name of the +# feed. A documentation feed provides an umbrella under which multiple +# documentation sets from a single provider (such as a company or product suite) +# can be grouped. + +DOCSET_FEEDNAME = "Doxygen generated docs" + +# When GENERATE_DOCSET tag is set to YES, this tag specifies a string that +# should uniquely identify the documentation set bundle. This should be a +# reverse domain-name style string, e.g. com.mycompany.MyDocSet. Doxygen +# will append .docset to the name. + +DOCSET_BUNDLE_ID = org.doxygen.Project + +# When GENERATE_PUBLISHER_ID tag specifies a string that should uniquely +# identify the documentation publisher. This should be a reverse domain-name +# style string, e.g. com.mycompany.MyDocSet.documentation. + +DOCSET_PUBLISHER_ID = org.doxygen.Publisher + +# The GENERATE_PUBLISHER_NAME tag identifies the documentation publisher. + +DOCSET_PUBLISHER_NAME = Publisher + +# If the GENERATE_HTMLHELP tag is set to YES, additional index files +# will be generated that can be used as input for tools like the +# Microsoft HTML help workshop to generate a compiled HTML help file (.chm) +# of the generated HTML documentation. + +GENERATE_HTMLHELP = NO + +# If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can +# be used to specify the file name of the resulting .chm file. You +# can add a path in front of the file if the result should not be +# written to the html output directory. + +CHM_FILE = + +# If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can +# be used to specify the location (absolute path including file name) of +# the HTML help compiler (hhc.exe). If non-empty doxygen will try to run +# the HTML help compiler on the generated index.hhp. + +HHC_LOCATION = + +# If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag +# controls if a separate .chi index file is generated (YES) or that +# it should be included in the master .chm file (NO). + +GENERATE_CHI = NO + +# If the GENERATE_HTMLHELP tag is set to YES, the CHM_INDEX_ENCODING +# is used to encode HtmlHelp index (hhk), content (hhc) and project file +# content. + +CHM_INDEX_ENCODING = + +# If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag +# controls whether a binary table of contents is generated (YES) or a +# normal table of contents (NO) in the .chm file. + +BINARY_TOC = NO + +# The TOC_EXPAND flag can be set to YES to add extra items for group members +# to the contents of the HTML help documentation and to the tree view. + +TOC_EXPAND = NO + +# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and +# QHP_VIRTUAL_FOLDER are set, an additional index file will be generated +# that can be used as input for Qt's qhelpgenerator to generate a +# Qt Compressed Help (.qch) of the generated HTML documentation. + +GENERATE_QHP = NO + +# If the QHG_LOCATION tag is specified, the QCH_FILE tag can +# be used to specify the file name of the resulting .qch file. +# The path specified is relative to the HTML output folder. + +QCH_FILE = + +# The QHP_NAMESPACE tag specifies the namespace to use when generating +# Qt Help Project output. For more information please see +# http://doc.trolltech.com/qthelpproject.html#namespace + +QHP_NAMESPACE = org.doxygen.Project + +# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating +# Qt Help Project output. For more information please see +# http://doc.trolltech.com/qthelpproject.html#virtual-folders + +QHP_VIRTUAL_FOLDER = doc + +# If QHP_CUST_FILTER_NAME is set, it specifies the name of a custom filter to +# add. For more information please see +# http://doc.trolltech.com/qthelpproject.html#custom-filters + +QHP_CUST_FILTER_NAME = + +# The QHP_CUST_FILT_ATTRS tag specifies the list of the attributes of the +# custom filter to add. For more information please see +# +# Qt Help Project / Custom Filters. + +QHP_CUST_FILTER_ATTRS = + +# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this +# project's +# filter section matches. +# +# Qt Help Project / Filter Attributes. + +QHP_SECT_FILTER_ATTRS = + +# If the GENERATE_QHP tag is set to YES, the QHG_LOCATION tag can +# be used to specify the location of Qt's qhelpgenerator. +# If non-empty doxygen will try to run qhelpgenerator on the generated +# .qhp file. + +QHG_LOCATION = + +# If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files +# will be generated, which together with the HTML files, form an Eclipse help +# plugin. To install this plugin and make it available under the help contents +# menu in Eclipse, the contents of the directory containing the HTML and XML +# files needs to be copied into the plugins directory of eclipse. The name of +# the directory within the plugins directory should be the same as +# the ECLIPSE_DOC_ID value. After copying Eclipse needs to be restarted before +# the help appears. + +GENERATE_ECLIPSEHELP = NO + +# A unique identifier for the eclipse help plugin. When installing the plugin +# the directory name containing the HTML and XML files should also have +# this name. + +ECLIPSE_DOC_ID = org.doxygen.Project + +# The DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs) +# at top of each HTML page. The value NO (the default) enables the index and +# the value YES disables it. Since the tabs have the same information as the +# navigation tree you can set this option to NO if you already set +# GENERATE_TREEVIEW to YES. + +DISABLE_INDEX = NO + +# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index +# structure should be generated to display hierarchical information. +# If the tag value is set to YES, a side panel will be generated +# containing a tree-like index structure (just like the one that +# is generated for HTML Help). For this to work a browser that supports +# JavaScript, DHTML, CSS and frames is required (i.e. any modern browser). +# Windows users are probably better off using the HTML help feature. +# Since the tree basically has the same information as the tab index you +# could consider to set DISABLE_INDEX to NO when enabling this option. + +GENERATE_TREEVIEW = YES + +# The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values +# (range [0,1..20]) that doxygen will group on one line in the generated HTML +# documentation. Note that a value of 0 will completely suppress the enum +# values from appearing in the overview section. + +ENUM_VALUES_PER_LINE = 4 + +# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be +# used to set the initial width (in pixels) of the frame in which the tree +# is shown. + +TREEVIEW_WIDTH = 250 + +# When the EXT_LINKS_IN_WINDOW option is set to YES doxygen will open +# links to external symbols imported via tag files in a separate window. + +EXT_LINKS_IN_WINDOW = NO + +# Use this tag to change the font size of Latex formulas included +# as images in the HTML documentation. The default is 10. Note that +# when you change the font size after a successful doxygen run you need +# to manually remove any form_*.png images from the HTML output directory +# to force them to be regenerated. + +FORMULA_FONTSIZE = 10 + +# Use the FORMULA_TRANPARENT tag to determine whether or not the images +# generated for formulas are transparent PNGs. Transparent PNGs are +# not supported properly for IE 6.0, but are supported on all modern browsers. +# Note that when changing this option you need to delete any form_*.png files +# in the HTML output before the changes have effect. + +FORMULA_TRANSPARENT = YES + +# Enable the USE_MATHJAX option to render LaTeX formulas using MathJax +# (see http://www.mathjax.org) which uses client side Javascript for the +# rendering instead of using prerendered bitmaps. Use this if you do not +# have LaTeX installed or if you want to formulas look prettier in the HTML +# output. When enabled you may also need to install MathJax separately and +# configure the path to it using the MATHJAX_RELPATH option. + +USE_MATHJAX = NO + +# When MathJax is enabled you can set the default output format to be used for +# the MathJax output. Supported types are HTML-CSS, NativeMML (i.e. MathML) and +# SVG. The default value is HTML-CSS, which is slower, but has the best +# compatibility. + +MATHJAX_FORMAT = HTML-CSS + +# When MathJax is enabled you need to specify the location relative to the +# HTML output directory using the MATHJAX_RELPATH option. The destination +# directory should contain the MathJax.js script. For instance, if the mathjax +# directory is located at the same level as the HTML output directory, then +# MATHJAX_RELPATH should be ../mathjax. The default value points to +# the MathJax Content Delivery Network so you can quickly see the result without +# installing MathJax. +# However, it is strongly recommended to install a local +# copy of MathJax from http://www.mathjax.org before deployment. + +MATHJAX_RELPATH = http://cdn.mathjax.org/mathjax/latest + +# The MATHJAX_EXTENSIONS tag can be used to specify one or MathJax extension +# names that should be enabled during MathJax rendering. + +MATHJAX_EXTENSIONS = + +# When the SEARCHENGINE tag is enabled doxygen will generate a search box +# for the HTML output. The underlying search engine uses javascript +# and DHTML and should work on any modern browser. Note that when using +# HTML help (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets +# (GENERATE_DOCSET) there is already a search function so this one should +# typically be disabled. For large projects the javascript based search engine +# can be slow, then enabling SERVER_BASED_SEARCH may provide a better solution. + +SEARCHENGINE = YES + +# When the SERVER_BASED_SEARCH tag is enabled the search engine will be +# implemented using a web server instead of a web client using Javascript. +# There are two flavours of web server based search depending on the +# EXTERNAL_SEARCH setting. When disabled, doxygen will generate a PHP script for +# searching and an index file used by the script. When EXTERNAL_SEARCH is +# enabled the indexing and searching needs to be provided by external tools. +# See the manual for details. + +SERVER_BASED_SEARCH = NO + +# When EXTERNAL_SEARCH is enabled doxygen will no longer generate the PHP +# script for searching. Instead the search results are written to an XML file +# which needs to be processed by an external indexer. Doxygen will invoke an +# external search engine pointed to by the SEARCHENGINE_URL option to obtain +# the search results. Doxygen ships with an example indexer (doxyindexer) and +# search engine (doxysearch.cgi) which are based on the open source search engine +# library Xapian. See the manual for configuration details. + +EXTERNAL_SEARCH = NO + +# The SEARCHENGINE_URL should point to a search engine hosted by a web server +# which will returned the search results when EXTERNAL_SEARCH is enabled. +# Doxygen ships with an example search engine (doxysearch) which is based on +# the open source search engine library Xapian. See the manual for configuration +# details. + +SEARCHENGINE_URL = + +# When SERVER_BASED_SEARCH and EXTERNAL_SEARCH are both enabled the unindexed +# search data is written to a file for indexing by an external tool. With the +# SEARCHDATA_FILE tag the name of this file can be specified. + +SEARCHDATA_FILE = searchdata.xml + +# When SERVER_BASED_SEARCH AND EXTERNAL_SEARCH are both enabled the +# EXTERNAL_SEARCH_ID tag can be used as an identifier for the project. This is +# useful in combination with EXTRA_SEARCH_MAPPINGS to search through multiple +# projects and redirect the results back to the right project. + +EXTERNAL_SEARCH_ID = + +# The EXTRA_SEARCH_MAPPINGS tag can be used to enable searching through doxygen +# projects other than the one defined by this configuration file, but that are +# all added to the same external search index. Each project needs to have a +# unique id set via EXTERNAL_SEARCH_ID. The search mapping then maps the id +# of to a relative location where the documentation can be found. +# The format is: EXTRA_SEARCH_MAPPINGS = id1=loc1 id2=loc2 ... + +EXTRA_SEARCH_MAPPINGS = + +#--------------------------------------------------------------------------- +# configuration options related to the LaTeX output +#--------------------------------------------------------------------------- + +# If the GENERATE_LATEX tag is set to YES (the default) Doxygen will +# generate Latex output. + +#defined in doxygen-config.cfg +#GENERATE_LATEX = YES + +# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `latex' will be used as the default path. + +LATEX_OUTPUT = latex + +# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be +# invoked. If left blank `latex' will be used as the default command name. +# Note that when enabling USE_PDFLATEX this option is only used for +# generating bitmaps for formulas in the HTML output, but not in the +# Makefile that is written to the output directory. + +LATEX_CMD_NAME = latex + +# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to +# generate index for LaTeX. If left blank `makeindex' will be used as the +# default command name. + +MAKEINDEX_CMD_NAME = makeindex + +# If the COMPACT_LATEX tag is set to YES Doxygen generates more compact +# LaTeX documents. This may be useful for small projects and may help to +# save some trees in general. + +COMPACT_LATEX = NO + +# The PAPER_TYPE tag can be used to set the paper type that is used +# by the printer. Possible values are: a4, letter, legal and +# executive. If left blank a4wide will be used. + +PAPER_TYPE = a4 + +# The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX +# packages that should be included in the LaTeX output. + +EXTRA_PACKAGES = + +# The LATEX_HEADER tag can be used to specify a personal LaTeX header for +# the generated latex document. The header should contain everything until +# the first chapter. If it is left blank doxygen will generate a +# standard header. Notice: only use this tag if you know what you are doing! + +#defined in doxygen-config.cfg +#LATEX_HEADER + +# The LATEX_FOOTER tag can be used to specify a personal LaTeX footer for +# the generated latex document. The footer should contain everything after +# the last chapter. If it is left blank doxygen will generate a +# standard footer. Notice: only use this tag if you know what you are doing! + +LATEX_FOOTER = + +# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated +# is prepared for conversion to pdf (using ps2pdf). The pdf file will +# contain links (just like the HTML output) instead of page references +# This makes the output suitable for online browsing using a pdf viewer. + +PDF_HYPERLINKS = YES + +# If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of +# plain latex in the generated Makefile. Set this option to YES to get a +# higher quality PDF documentation. + +USE_PDFLATEX = YES + +# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode. +# command to the generated LaTeX files. This will instruct LaTeX to keep +# running if errors occur, instead of asking the user for help. +# This option is also used when generating formulas in HTML. + +LATEX_BATCHMODE = NO + +# If LATEX_HIDE_INDICES is set to YES then doxygen will not +# include the index chapters (such as File Index, Compound Index, etc.) +# in the output. + +LATEX_HIDE_INDICES = NO + +# If LATEX_SOURCE_CODE is set to YES then doxygen will include +# source code with syntax highlighting in the LaTeX output. +# Note that which sources are shown also depends on other settings +# such as SOURCE_BROWSER. + +LATEX_SOURCE_CODE = NO + +# The LATEX_BIB_STYLE tag can be used to specify the style to use for the +# bibliography, e.g. plainnat, or ieeetr. The default style is "plain". See +# http://en.wikipedia.org/wiki/BibTeX for more info. + +LATEX_BIB_STYLE = plain + +#--------------------------------------------------------------------------- +# configuration options related to the RTF output +#--------------------------------------------------------------------------- + +# If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output +# The RTF output is optimized for Word 97 and may not look very pretty with +# other RTF readers or editors. + +GENERATE_RTF = NO + +# The RTF_OUTPUT tag is used to specify where the RTF docs will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `rtf' will be used as the default path. + +RTF_OUTPUT = rtf + +# If the COMPACT_RTF tag is set to YES Doxygen generates more compact +# RTF documents. This may be useful for small projects and may help to +# save some trees in general. + +COMPACT_RTF = NO + +# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated +# will contain hyperlink fields. The RTF file will +# contain links (just like the HTML output) instead of page references. +# This makes the output suitable for online browsing using WORD or other +# programs which support those fields. +# Note: wordpad (write) and others do not support links. + +RTF_HYPERLINKS = NO + +# Load style sheet definitions from file. Syntax is similar to doxygen's +# config file, i.e. a series of assignments. You only have to provide +# replacements, missing definitions are set to their default value. + +RTF_STYLESHEET_FILE = + +# Set optional variables used in the generation of an rtf document. +# Syntax is similar to doxygen's config file. + +RTF_EXTENSIONS_FILE = + +#--------------------------------------------------------------------------- +# configuration options related to the man page output +#--------------------------------------------------------------------------- + +# If the GENERATE_MAN tag is set to YES (the default) Doxygen will +# generate man pages + +GENERATE_MAN = NO + +# The MAN_OUTPUT tag is used to specify where the man pages will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `man' will be used as the default path. + +MAN_OUTPUT = man + +# The MAN_EXTENSION tag determines the extension that is added to +# the generated man pages (default is the subroutine's section .3) + +MAN_EXTENSION = .3 + +# If the MAN_LINKS tag is set to YES and Doxygen generates man output, +# then it will generate one additional man file for each entity +# documented in the real man page(s). These additional files +# only source the real man page, but without them the man command +# would be unable to find the correct page. The default is NO. + +MAN_LINKS = NO + +#--------------------------------------------------------------------------- +# configuration options related to the XML output +#--------------------------------------------------------------------------- + +# If the GENERATE_XML tag is set to YES Doxygen will +# generate an XML file that captures the structure of +# the code including all documentation. + +GENERATE_XML = NO + +# The XML_OUTPUT tag is used to specify where the XML pages will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `xml' will be used as the default path. + +XML_OUTPUT = xml + +# The XML_SCHEMA tag can be used to specify an XML schema, +# which can be used by a validating XML parser to check the +# syntax of the XML files. + +#XML_SCHEMA = + +# The XML_DTD tag can be used to specify an XML DTD, +# which can be used by a validating XML parser to check the +# syntax of the XML files. + +#XML_DTD = + +# If the XML_PROGRAMLISTING tag is set to YES Doxygen will +# dump the program listings (including syntax highlighting +# and cross-referencing information) to the XML output. Note that +# enabling this will significantly increase the size of the XML output. + +XML_PROGRAMLISTING = YES + +#--------------------------------------------------------------------------- +# configuration options for the AutoGen Definitions output +#--------------------------------------------------------------------------- + +# If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will +# generate an AutoGen Definitions (see autogen.sf.net) file +# that captures the structure of the code including all +# documentation. Note that this feature is still experimental +# and incomplete at the moment. + +GENERATE_AUTOGEN_DEF = NO + +#--------------------------------------------------------------------------- +# configuration options related to the Perl module output +#--------------------------------------------------------------------------- + +# If the GENERATE_PERLMOD tag is set to YES Doxygen will +# generate a Perl module file that captures the structure of +# the code including all documentation. Note that this +# feature is still experimental and incomplete at the +# moment. + +GENERATE_PERLMOD = NO + +# If the PERLMOD_LATEX tag is set to YES Doxygen will generate +# the necessary Makefile rules, Perl scripts and LaTeX code to be able +# to generate PDF and DVI output from the Perl module output. + +PERLMOD_LATEX = NO + +# If the PERLMOD_PRETTY tag is set to YES the Perl module output will be +# nicely formatted so it can be parsed by a human reader. +# This is useful +# if you want to understand what is going on. +# On the other hand, if this +# tag is set to NO the size of the Perl module output will be much smaller +# and Perl will parse it just the same. + +PERLMOD_PRETTY = YES + +# The names of the make variables in the generated doxyrules.make file +# are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX. +# This is useful so different doxyrules.make files included by the same +# Makefile don't overwrite each other's variables. + +PERLMOD_MAKEVAR_PREFIX = + +#--------------------------------------------------------------------------- +# Configuration options related to the preprocessor +#--------------------------------------------------------------------------- + +# If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will +# evaluate all C-preprocessor directives found in the sources and include +# files. + +ENABLE_PREPROCESSING = YES + +# If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro +# names in the source code. If set to NO (the default) only conditional +# compilation will be performed. Macro expansion can be done in a controlled +# way by setting EXPAND_ONLY_PREDEF to YES. + +MACRO_EXPANSION = NO + +# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES +# then the macro expansion is limited to the macros specified with the +# PREDEFINED and EXPAND_AS_DEFINED tags. + +EXPAND_ONLY_PREDEF = NO + +# If the SEARCH_INCLUDES tag is set to YES (the default) the includes files +# pointed to by INCLUDE_PATH will be searched when a #include is found. + +SEARCH_INCLUDES = YES + +# The INCLUDE_PATH tag can be used to specify one or more directories that +# contain include files that are not input files but should be processed by +# the preprocessor. + +INCLUDE_PATH = + +# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard +# patterns (like *.h and *.hpp) to filter out the header-files in the +# directories. If left blank, the patterns specified with FILE_PATTERNS will +# be used. + +INCLUDE_FILE_PATTERNS = + +# The PREDEFINED tag can be used to specify one or more macro names that +# are defined before the preprocessor is started (similar to the -D option of +# gcc). The argument of the tag is a list of macros of the form: name +# or name=definition (no spaces). If the definition and the = are +# omitted =1 is assumed. To prevent a macro definition from being +# undefined via #undef or recursively expanded use the := operator +# instead of the = operator. + +PREDEFINED = STARPU_USE_OPENCL=1 \ + STARPU_USE_CUDA=1 \ + STARPU_HAVE_NVML_H=1 \ + STARPU_USE_HIP=1 \ + STARPU_USE_MAX_FPGA=1 \ + STARPU_USE_MPI=1 \ + STARPU_USE_MPI_FT=1 \ + STARPU_USE_MPI_FT_STATS=1 \ + STARPU_USE_MPI_MPI=1 \ + STARPU_USE_MPI_NMAD=1 \ + STARPU_HAVE_HWLOC=1 \ + STARPU_USE_SC_HYPERVISOR=1 \ + STARPU_SIMGRID=1 \ + STARPU_OPENMP=1 \ + STARPU_PARALLEL_WORKER=1 \ + STARPU_MKL=1 \ + STARPU_WORKER_CALLBACKS=1 \ + STARPU_HAVE_GLPK_H=1 \ + STARPU_USE_MPI_MASTER_SLAVE=1 \ + STARPU_BUBBLE=1 \ + __GCC__ + +# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then +# this tag can be used to specify a list of macro names that should be expanded. +# The macro definition that is found in the sources will be used. +# Use the PREDEFINED tag if you want to use a different macro definition that +# overrules the definition found in the source code. + +EXPAND_AS_DEFINED = + +# If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then +# doxygen's preprocessor will remove all references to function-like macros +# that are alone on a line, have an all uppercase name, and do not end with a +# semicolon, because these will confuse the parser if not removed. + +SKIP_FUNCTION_MACROS = YES + +#--------------------------------------------------------------------------- +# Configuration::additions related to external references +#--------------------------------------------------------------------------- + +# The TAGFILES option can be used to specify one or more tagfiles. For each +# tag file the location of the external documentation should be added. The +# format of a tag file without this location is as follows: +# +# TAGFILES = file1 file2 ... +# Adding location for the tag files is done as follows: +# +# TAGFILES = file1=loc1 "file2 = loc2" ... +# where "loc1" and "loc2" can be relative or absolute paths +# or URLs. Note that each tag file must have a unique name (where the name does +# NOT include the path). If a tag file is not located in the directory in which +# doxygen is run, you must also specify the path to the tagfile here. + +TAGFILES = + +# When a file name is specified after GENERATE_TAGFILE, doxygen will create +# a tag file that is based on the input files it reads. + +GENERATE_TAGFILE = starpu.tag + +# If the ALLEXTERNALS tag is set to YES all external classes will be listed +# in the class index. If set to NO only the inherited external classes +# will be listed. + +ALLEXTERNALS = NO + +# If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed +# in the modules index. If set to NO, only the current project's groups will +# be listed. + +EXTERNAL_GROUPS = YES + +# The PERL_PATH should be the absolute path and name of the perl script +# interpreter (i.e. the result of `which perl'). + +PERL_PATH = /usr/bin/perl + +#--------------------------------------------------------------------------- +# Configuration options related to the dot tool +#--------------------------------------------------------------------------- + +# If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will +# generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base +# or super classes. Setting the tag to NO turns the diagrams off. Note that +# this option also works with HAVE_DOT disabled, but it is recommended to +# install and use dot, since it yields more powerful graphs. + +CLASS_DIAGRAMS = YES + +# You can define message sequence charts within doxygen comments using the \msc +# command. Doxygen will then run the mscgen tool (see +# http://www.mcternan.me.uk/mscgen/) to produce the chart and insert it in the +# documentation. The MSCGEN_PATH tag allows you to specify the directory where +# the mscgen tool resides. If left empty the tool is assumed to be found in the +# default search path. + +MSCGEN_PATH = + +# If set to YES, the inheritance and collaboration graphs will hide +# inheritance and usage relations if the target is undocumented +# or is not a class. + +HIDE_UNDOC_RELATIONS = YES + +# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is +# available from the path. This tool is part of Graphviz, a graph visualization +# toolkit from AT&T and Lucent Bell Labs. The other options in this section +# have no effect if this option is set to NO (the default) + +HAVE_DOT = NO + +# The DOT_NUM_THREADS specifies the number of dot invocations doxygen is +# allowed to run in parallel. When set to 0 (the default) doxygen will +# base this on the number of processors available in the system. You can set it +# explicitly to a value larger than 0 to get control over the balance +# between CPU load and processing speed. + +DOT_NUM_THREADS = 0 + +# By default doxygen will use the Helvetica font for all dot files that +# doxygen generates. When you want a differently looking font you can specify +# the font name using DOT_FONTNAME. You need to make sure dot is able to find +# the font, which can be done by putting it in a standard location or by setting +# the DOTFONTPATH environment variable or by setting DOT_FONTPATH to the +# directory containing the font. + +DOT_FONTNAME = Helvetica + +# The DOT_FONTSIZE tag can be used to set the size of the font of dot graphs. +# The default size is 10pt. + +DOT_FONTSIZE = 10 + +# By default doxygen will tell dot to use the Helvetica font. +# If you specify a different font using DOT_FONTNAME you can use DOT_FONTPATH to +# set the path where dot can find it. + +DOT_FONTPATH = + +# If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen +# will generate a graph for each documented class showing the direct and +# indirect inheritance relations. Setting this tag to YES will force the +# CLASS_DIAGRAMS tag to NO. + +CLASS_GRAPH = YES + +# If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen +# will generate a graph for each documented class showing the direct and +# indirect implementation dependencies (inheritance, containment, and +# class references variables) of the class with other documented classes. + +COLLABORATION_GRAPH = YES + +# If the GROUP_GRAPHS and HAVE_DOT tags are set to YES then doxygen +# will generate a graph for groups, showing the direct groups dependencies + +GROUP_GRAPHS = YES + +# If the UML_LOOK tag is set to YES doxygen will generate inheritance and +# collaboration diagrams in a style similar to the OMG's Unified Modeling +# Language. + +UML_LOOK = NO + +# If the UML_LOOK tag is enabled, the fields and methods are shown inside +# the class node. If there are many fields or methods and many nodes the +# graph may become too big to be useful. The UML_LIMIT_NUM_FIELDS +# threshold limits the number of items for each type to make the size more +# manageable. Set this to 0 for no limit. Note that the threshold may be +# exceeded by 50% before the limit is enforced. + +UML_LIMIT_NUM_FIELDS = 10 + +# If set to YES, the inheritance and collaboration graphs will show the +# relations between templates and their instances. + +TEMPLATE_RELATIONS = NO + +# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT +# tags are set to YES then doxygen will generate a graph for each documented +# file showing the direct and indirect include dependencies of the file with +# other documented files. + +INCLUDE_GRAPH = YES + +# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and +# HAVE_DOT tags are set to YES then doxygen will generate a graph for each +# documented header file showing the documented files that directly or +# indirectly include this file. + +INCLUDED_BY_GRAPH = YES + +# If the CALL_GRAPH and HAVE_DOT options are set to YES then +# doxygen will generate a call dependency graph for every global function +# or class method. Note that enabling this option will significantly increase +# the time of a run. So in most cases it will be better to enable call graphs +# for selected functions only using the \callgraph command. + +CALL_GRAPH = NO + +# If the CALLER_GRAPH and HAVE_DOT tags are set to YES then +# doxygen will generate a caller dependency graph for every global function +# or class method. Note that enabling this option will significantly increase +# the time of a run. So in most cases it will be better to enable caller +# graphs for selected functions only using the \callergraph command. + +CALLER_GRAPH = NO + +# If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen +# will generate a graphical hierarchy of all classes instead of a textual one. + +GRAPHICAL_HIERARCHY = YES + +# If the DIRECTORY_GRAPH and HAVE_DOT tags are set to YES +# then doxygen will show the dependencies a directory has on other directories +# in a graphical way. The dependency relations are determined by the #include +# relations between the files in the directories. + +DIRECTORY_GRAPH = YES + +# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images +# generated by dot. Possible values are svg, png, jpg, or gif. +# If left blank png will be used. If you choose svg you need to set +# HTML_FILE_EXTENSION to xhtml in order to make the SVG files +# visible in IE 9+ (other browsers do not have this requirement). + +DOT_IMAGE_FORMAT = png + +# If DOT_IMAGE_FORMAT is set to svg, then this option can be set to YES to +# enable generation of interactive SVG images that allow zooming and panning. +# Note that this requires a modern browser other than Internet Explorer. +# Tested and working are Firefox, Chrome, Safari, and Opera. For IE 9+ you +# need to set HTML_FILE_EXTENSION to xhtml in order to make the SVG files +# visible. Older versions of IE do not have SVG support. + +INTERACTIVE_SVG = NO + +# The tag DOT_PATH can be used to specify the path where the dot tool can be +# found. If left blank, it is assumed the dot tool can be found in the path. + +DOT_PATH = + +# The DOTFILE_DIRS tag can be used to specify one or more directories that +# contain dot files that are included in the documentation (see the +# \dotfile command). + +DOTFILE_DIRS = + +# The MSCFILE_DIRS tag can be used to specify one or more directories that +# contain msc files that are included in the documentation (see the +# \mscfile command). + +MSCFILE_DIRS = + +# The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of +# nodes that will be shown in the graph. If the number of nodes in a graph +# becomes larger than this value, doxygen will truncate the graph, which is +# visualized by representing a node as a red box. Note that doxygen if the +# number of direct children of the root node in a graph is already larger than +# DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note +# that the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH. + +DOT_GRAPH_MAX_NODES = 50 + +# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the +# graphs generated by dot. A depth value of 3 means that only nodes reachable +# from the root by following a path via at most 3 edges will be shown. Nodes +# that lay further from the root node will be omitted. Note that setting this +# option to 1 or 2 may greatly reduce the computation time needed for large +# code bases. Also note that the size of a graph can be further restricted by +# DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction. + +MAX_DOT_GRAPH_DEPTH = 0 + +# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent +# background. This is disabled by default, because dot on Windows does not +# seem to support this out of the box. Warning: Depending on the platform used, +# enabling this option may lead to badly anti-aliased labels on the edges of +# a graph (i.e. they become hard to read). + +DOT_TRANSPARENT = NO + +# Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output +# files in one run (i.e. multiple -o and -T options on the command line). This +# makes dot run faster, but since only newer versions of dot (>1.8.10) +# support this, this feature is disabled by default. + +DOT_MULTI_TARGETS = YES + +# If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will +# generate a legend page explaining the meaning of the various boxes and +# arrows in the dot generated graphs. + +GENERATE_LEGEND = YES + +# If the DOT_CLEANUP tag is set to YES (the default) Doxygen will +# remove the intermediate dot files that are used to generate +# the various graphs. + +DOT_CLEANUP = YES diff --git a/doc/doxygen/Makefile.am b/doc/doxygen/Makefile.am new file mode 100644 index 0000000..c0199f4 --- /dev/null +++ b/doc/doxygen/Makefile.am @@ -0,0 +1,233 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +DOX_DIR = $(top_builddir)/doc/doxygen +DOX_CONFIG = $(top_srcdir)/doc/doxygen.cfg + +DOX_MAIN_DIR = doxygen +DOX_HTML_DIR = html +DOX_LATEX_DIR = latex +DOX_PDF = starpu.pdf +DOX_TAG = starpu.tag +DOX_STARPU_CONFIG = starpu_config.h + +include $(top_srcdir)/doc/doxy.mk + +chapters = \ + chapters/foreword.doxy \ + chapters/starpu_introduction/introduction_intro.doxy \ + chapters/starpu_introduction/doc_organization.doxy \ + chapters/starpu_introduction/glossary.doxy \ + chapters/starpu_installation/installation_intro.doxy \ + chapters/starpu_installation/environment_variables.doxy \ + chapters/starpu_installation/building.doxy \ + chapters/starpu_installation/configure_options.doxy \ + chapters/starpu_installation/configuration_and_initialization.doxy \ + chapters/starpu_basics/basics_intro.doxy \ + chapters/starpu_basics/starpu_applications.doxy \ + chapters/starpu_basics/basic_examples.doxy \ + chapters/starpu_basics/scaling_vector_example.doxy \ + chapters/starpu_basics/tasks.doxy \ + chapters/starpu_basics/data_management.doxy \ + chapters/starpu_basics/scheduling.doxy \ + chapters/starpu_basics/examples_sources.doxy \ + chapters/starpu_basics/code/basics_vector_scal_c.c \ + chapters/starpu_basics/code/basics_vector_scal_cpu.c \ + chapters/starpu_basics/code/basics_vector_scal_cuda.c \ + chapters/starpu_basics/code/basics_vector_scal_opencl.c \ + chapters/starpu_basics/code/basics_vector_scal_opencl_codelet.cl \ + chapters/starpu_applications/applications_intro.doxy \ + chapters/starpu_applications/vector_scaling.doxy \ + chapters/starpu_applications/code/vector_scal_c.c \ + chapters/starpu_applications/code/vector_scal_c_align.c \ + chapters/starpu_applications/code/vector_scal_cpu.c \ + chapters/starpu_applications/code/vector_scal_starpu.c \ + chapters/starpu_applications/stencil.doxy \ + chapters/starpu_applications/code/stencil5.c \ + chapters/starpu_applications/code/stencil5_starpu.c \ + chapters/starpu_applications/code/stencil5_starpu_mpi.c \ + chapters/starpu_performances/performances_intro.doxy \ + chapters/starpu_performances/benchmarking_starpu.doxy \ + chapters/starpu_performances/online_performance_tools.doxy \ + chapters/starpu_performances/offline_performance_tools.doxy \ + chapters/starpu_faq/faq_intro.doxy \ + chapters/starpu_faq/check_list_performance.doxy \ + chapters/starpu_faq/faq.doxy \ + chapters/starpu_languages/languages_intro.doxy \ + chapters/starpu_languages/native_fortran_support.doxy \ + chapters/starpu_languages/java.doxy \ + chapters/starpu_languages/python.doxy \ + chapters/starpu_languages/openmp_runtime_support.doxy \ + chapters/starpu_languages/code/nf_initexit.f90 \ + chapters/starpu_languages/code/java_starpu.java \ + chapters/starpu_languages/code/java_spark.java \ + chapters/starpu_extensions/extensions_intro.doxy \ + chapters/starpu_extensions/advanced_tasks.doxy \ + chapters/starpu_extensions/advanced_data_management.doxy \ + chapters/starpu_extensions/helpers.doxy \ + chapters/starpu_extensions/debugging_tools.doxy \ + chapters/starpu_extensions/advanced_scheduling.doxy \ + chapters/starpu_extensions/scheduling_contexts.doxy \ + chapters/starpu_extensions/scheduling_context_hypervisor.doxy \ + chapters/starpu_extensions/cuda_support.doxy \ + chapters/starpu_extensions/opencl_support.doxy \ + chapters/starpu_extensions/max_fpga_support.doxy \ + chapters/starpu_extensions/out_of_core.doxy \ + chapters/starpu_extensions/mpi_support.doxy \ + chapters/starpu_extensions/tcpip_support.doxy \ + chapters/starpu_extensions/transactions.doxy \ + chapters/starpu_extensions/fault_tolerance.doxy \ + chapters/starpu_extensions/fft_support.doxy \ + chapters/starpu_extensions/socl_opencl_extensions.doxy \ + chapters/starpu_extensions/bubble.doxy \ + chapters/starpu_extensions/parallel_worker.doxy \ + chapters/starpu_extensions/interoperability.doxy \ + chapters/starpu_extensions/scheduling_policy_definition.doxy \ + chapters/starpu_extensions/simgrid.doxy \ + chapters/starpu_extensions/code/complex.c \ + chapters/starpu_extensions/code/disk_compute.c \ + chapters/starpu_extensions/code/disk_copy.c \ + chapters/starpu_extensions/code/forkmode.c \ + chapters/starpu_extensions/code/multiformat.c \ + chapters/starpu_extensions/code/simgrid.c \ + chapters/files.doxy \ + chapters/fdl_1_3.doxy \ + chapters/api/fortran_support.doxy \ + chapters/api/bubble_support.doxy \ + chapters/api/fft_support.doxy \ + chapters/api/threads.doxy + +images = \ + chapters/images/arbiter.png \ + chapters/images/data_trace.png \ + chapters/images/distrib_data.png \ + chapters/images/distrib_data_histo.png \ + chapters/images/paje_draw_histogram.png \ + chapters/images/parallel_worker2.png \ + chapters/images/runtime-par.png \ + chapters/images/starpu_non_linear_memset_regression_based.png \ + chapters/images/starpu_non_linear_memset_regression_based_2.png \ + chapters/images/starpu_starpu_slu_lu_model_11.png \ + chapters/images/starpu_chol_model_11_type.png \ + chapters/images/tasks_size_overhead.png \ + chapters/images/temanejo.png \ + chapters/images/eclipse_installer.png \ + chapters/images/eclipse_install_cdt.png \ + chapters/images/eclipse_hello_build.png \ + chapters/images/eclipse_hello_run.png \ + chapters/images/eclipse_hello_fxt.png \ + chapters/images/eclipse_hello_graph.png \ + chapters/images/eclipse_hello_vite.png \ + chapters/images/eclipse_hello_svg_graph.png \ + chapters/images/eclipse_hello_plugin.png \ + chapters/images/eclipse_hello_paje_trace.png \ + chapters/images/eclipse_hello_hgraph.png \ + chapters/images/eclipse_install_pde.png \ + chapters/images/starpu_gflops_non_linear_memset_regression_based_energy.png \ + chapters/images/starpu_log_arr.png \ + chapters/images/starpu_log_list.png \ + chapters/images/starpu_non_linear_memset_regression_based_energy.png \ + chapters/images/starpu_power_non_linear_memset_regression_based.png \ + chapters/images/starvz_visu.png \ + chapters/images/starvz_visu_r.png \ + chapters/images/trace_bw_heatmap.png \ + chapters/images/trace_recv_use.png \ + chapters/images/trace_send_use.png \ + chapters/images/trace_volume_heatmap.png \ + chapters/images/starpupy_handle_func_perf_pickle.png \ + chapters/images/starpupy_handle_perf_pickle.png \ + chapters/images/starpupy_handle_func_perf.png \ + chapters/images/starpupy_handle_perf.png \ + chapters/images/tasks_size_overhead_py_fut_pickle.png \ + chapters/images/tasks_size_overhead_py_futur.png \ + chapters/images/tasks_size_overhead_py_handle_pickle.png \ + chapters/images/tasks_size_overhead_py_handle.png \ + chapters/images/tasks_size_overhead_py_none.png \ + chapters/images/tasks_size_overhead_py_noret_pickle.png + +if STARPU_BUILD_DOC +starpu_config.h: $(top_srcdir)/include/starpu_config.h.in + @$(SED) 's/#undef \(.*\)/#define \1 1/' $< > $@ + +dox_inputs = $(DOX_CONFIG) \ + $(chapters) \ + starpu_config.h \ + chapters/version.sty \ + chapters/version.html \ + $(top_srcdir)/include/starpu.h \ + $(top_srcdir)/include/starpu_bitmap.h \ + $(top_srcdir)/include/starpu_bound.h \ + $(top_srcdir)/include/starpu_cublas.h \ + $(top_srcdir)/include/starpu_cublas_v2.h \ + $(top_srcdir)/include/starpu_cublasLt.h \ + $(top_srcdir)/include/starpu_cusparse.h \ + $(top_srcdir)/include/starpu_cuda.h \ + $(top_srcdir)/include/starpu_cusolver.h \ + $(top_srcdir)/include/starpu_data_filters.h \ + $(top_srcdir)/include/starpu_data.h \ + $(top_srcdir)/include/starpu_data_interfaces.h \ + $(top_srcdir)/include/starpu_deprecated_api.h \ + $(top_srcdir)/include/starpu_disk.h \ + $(top_srcdir)/include/starpu_driver.h \ + $(top_srcdir)/include/starpu_expert.h \ + $(top_srcdir)/include/starpu_fxt.h \ + $(top_srcdir)/include/starpu_hash.h \ + $(top_srcdir)/include/starpu_helper.h \ + $(top_srcdir)/include/starpu_hip.h \ + $(top_srcdir)/include/starpu_max_fpga.h \ + $(top_srcdir)/include/starpu_mod.f90 \ + $(top_srcdir)/include/starpu_opencl.h \ + $(top_srcdir)/include/starpu_openmp.h \ + $(top_srcdir)/include/starpu_parallel_worker.h \ + $(top_srcdir)/include/starpu_perf_monitoring.h \ + $(top_srcdir)/include/starpu_perf_steering.h \ + $(top_srcdir)/include/starpu_perfmodel.h \ + $(top_srcdir)/include/starpu_profiling.h \ + $(top_srcdir)/include/starpu_profiling_tool.h \ + $(top_srcdir)/include/starpu_rand.h \ + $(top_srcdir)/include/starpu_sched_component.h \ + $(top_srcdir)/include/starpu_sched_ctx.h \ + $(top_srcdir)/include/starpu_sched_ctx_hypervisor.h \ + $(top_srcdir)/include/starpu_scheduler.h \ + $(top_srcdir)/include/starpu_simgrid_wrap.h \ + $(top_srcdir)/include/starpu_sink.h \ + $(top_srcdir)/include/starpu_stdlib.h \ + $(top_srcdir)/include/starpu_task_bundle.h \ + $(top_srcdir)/include/starpu_task_dep.h \ + $(top_srcdir)/include/starpu_task.h \ + $(top_srcdir)/include/starpu_task_list.h \ + $(top_srcdir)/include/starpu_task_util.h \ + $(top_srcdir)/include/starpu_thread.h \ + $(top_srcdir)/include/starpu_thread_util.h \ + $(top_srcdir)/include/starpu_tree.h \ + $(top_srcdir)/include/starpu_util.h \ + $(top_srcdir)/include/starpu_worker.h \ + $(top_srcdir)/include/fstarpu_mod.f90 \ + $(top_srcdir)/include/schedulers/starpu_heteroprio.h \ + $(top_srcdir)/starpufft/include/starpufft.h \ + $(top_srcdir)/mpi/include/starpu_mpi.h \ + $(top_srcdir)/mpi/include/starpu_mpi_ft.h \ + $(top_srcdir)/mpi/include/starpu_mpi_lb.h \ + $(top_srcdir)/mpi/include/fstarpu_mpi_mod.f90 \ + $(top_srcdir)/sc_hypervisor/include/sc_hypervisor.h \ + $(top_srcdir)/sc_hypervisor/include/sc_hypervisor_config.h \ + $(top_srcdir)/sc_hypervisor/include/sc_hypervisor_lp.h \ + $(top_srcdir)/sc_hypervisor/include/sc_hypervisor_monitoring.h \ + $(top_srcdir)/sc_hypervisor/include/sc_hypervisor_policy.h \ + $(top_srcdir)/starpurm/include/starpurm.h \ + $(top_srcdir)/include/schedulers/starpu_scheduler_toolbox.h +endif + diff --git a/doc/doxygen/Makefile.in b/doc/doxygen/Makefile.in new file mode 100644 index 0000000..bb810d5 --- /dev/null +++ b/doc/doxygen/Makefile.in @@ -0,0 +1,1078 @@ +# Makefile.in generated by automake 1.16.5 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2021 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +target_triplet = @target@ +@STARPU_BUILD_DOC_PDF_TRUE@@STARPU_BUILD_DOC_TRUE@am__append_1 = $(DOX_HTML_DIR) $(DOX_DIR)/$(DOX_PDF) +@STARPU_BUILD_DOC_PDF_FALSE@@STARPU_BUILD_DOC_TRUE@am__append_2 = $(DOX_HTML_DIR) +@STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@am__append_3 = $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$(DOX_HTML_DIR) +@STARPU_AVAILABLE_DOC_PDF_TRUE@@STARPU_BUILD_DOC_FALSE@am__append_4 = $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$(DOX_PDF) +@STARPU_BUILD_DOC_TRUE@am__append_5 = \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.html + +subdir = doc/doxygen +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ + $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ + $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ + $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/src/common/config.h \ + $(top_builddir)/src/common/config-src-build.h \ + $(top_builddir)/include/starpu_config.h \ + $(top_builddir)/starpurm/include/starpurm_config.h +CONFIG_CLEAN_FILES = doxygen-config.cfg doxygen-config-include.cfg \ + doxygen_filter.sh +CONFIG_CLEAN_VPATH_FILES = +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +SOURCES = +DIST_SOURCES = +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__installdirs = "$(DESTDIR)$(txtdir)" +DATA = $(txt_DATA) +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +am__DIST_COMMON = $(srcdir)/Makefile.in \ + $(srcdir)/doxygen-config-include.cfg.in \ + $(srcdir)/doxygen-config.cfg.in $(srcdir)/doxygen_filter.sh.in \ + $(top_srcdir)/doc/doxy.mk +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +pkglibdir = @pkglibdir@ +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +APP_CFLAGS = @APP_CFLAGS@ +APP_CXXFLAGS = @APP_CXXFLAGS@ +APP_FCFLAGS = @APP_FCFLAGS@ +APP_FFLAGS = @APP_FFLAGS@ +AR = @AR@ +AS = @AS@ +ATLASDIR = @ATLASDIR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BLAS_LIB = @BLAS_LIB@ +BLAS_LIBS = @BLAS_LIBS@ +BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ +BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CC_OR_MPICC = @CC_OR_MPICC@ +CC_OR_NVCC = @CC_OR_NVCC@ +CFLAGS = @CFLAGS@ +COVERAGE = @COVERAGE@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CSCOPE = @CSCOPE@ +CTAGS = @CTAGS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DGELS_LIBS = @DGELS_LIBS@ +DLB_CFLAGS = @DLB_CFLAGS@ +DLB_LIBS = @DLB_LIBS@ +DLLTOOL = @DLLTOOL@ +DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +ECLIPSE = @ECLIPSE@ +EGREP = @EGREP@ +ETAGS = @ETAGS@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FC = @FC@ +FCFLAGS = @FCFLAGS@ +FFLAGS = @FFLAGS@ +FFTWF_CFLAGS = @FFTWF_CFLAGS@ +FFTWF_LIBS = @FFTWF_LIBS@ +FFTWL_CFLAGS = @FFTWL_CFLAGS@ +FFTWL_LIBS = @FFTWL_LIBS@ +FFTW_CFLAGS = @FFTW_CFLAGS@ +FFTW_LIBS = @FFTW_LIBS@ +FGREP = @FGREP@ +FILECMD = @FILECMD@ +FXTDIR = @FXTDIR@ +FXT_CFLAGS = @FXT_CFLAGS@ +FXT_LDFLAGS = @FXT_LDFLAGS@ +FXT_LIBS = @FXT_LIBS@ +GDB = @GDB@ +GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ +GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ +GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ +GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ +GOTODIR = @GOTODIR@ +GREP = @GREP@ +HAVE_CXX11 = @HAVE_CXX11@ +HAVE_FFTWFL = @HAVE_FFTWFL@ +HELP2MAN = @HELP2MAN@ +HIPCC = @HIPCC@ +HIPCCFLAGS = @HIPCCFLAGS@ +HIPCONFIG = @HIPCONFIG@ +HWLOC_CFLAGS = @HWLOC_CFLAGS@ +HWLOC_LIBS = @HWLOC_LIBS@ +HWLOC_REQUIRES = @HWLOC_REQUIRES@ +ICC = @ICC@ +ICC_ARGS = @ICC_ARGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JULIA = @JULIA@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ +LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ +LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ +LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ +LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ +LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ +LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ +LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ +LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ +LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ +LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ +LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ +LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ +LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ +LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ +LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ +LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ +LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ +LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ +LIBSTARPU_LINK = @LIBSTARPU_LINK@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAGMA_CFLAGS = @MAGMA_CFLAGS@ +MAGMA_LIBS = @MAGMA_LIBS@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPICC_LDFLAGS = @MPICC_LDFLAGS@ +MPICXX = @MPICXX@ +MPIEXEC = @MPIEXEC@ +MPIEXEC_ARGS = @MPIEXEC_ARGS@ +MPIFORT = @MPIFORT@ +MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ +MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ +NM = @NM@ +NMAD_CFLAGS = @NMAD_CFLAGS@ +NMAD_LIBS = @NMAD_LIBS@ +NMEDIT = @NMEDIT@ +NVCC = @NVCC@ +NVCCFLAGS = @NVCCFLAGS@ +NVCC_CC = @NVCC_CC@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ +OPENBLAS_LIBS = @OPENBLAS_LIBS@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PAPI_CFLAGS = @PAPI_CFLAGS@ +PAPI_LIBS = @PAPI_LIBS@ +PARALLEL = @PARALLEL@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PKG_CONFIG = @PKG_CONFIG@ +POTI_CFLAGS = @POTI_CFLAGS@ +POTI_LIBS = @POTI_LIBS@ +PROG_CLANG = @PROG_CLANG@ +PROG_DATE = @PROG_DATE@ +PROG_FIND = @PROG_FIND@ +PROG_STAT = @PROG_STAT@ +PYTHON = @PYTHON@ +PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ +PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ +PYTHON_VERSION = @PYTHON_VERSION@ +RANLIB = @RANLIB@ +REALBASH = @REALBASH@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ +SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ +SIMGRID_LIBS = @SIMGRID_LIBS@ +SIMGRID_MC = @SIMGRID_MC@ +SLIC_CONFIG = @SLIC_CONFIG@ +SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ +SOCL_VENDORS = @SOCL_VENDORS@ +STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ +STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ +STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ +STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ +STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ +STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ +STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ +STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ +STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ +STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ +STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ +STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ +STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ +STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ +STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ +STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ +STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ +STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ +STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ +STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ +STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ +STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ +STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ +STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ +STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ +STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ +STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ +STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ +STARPU_LIB_PATH = @STARPU_LIB_PATH@ +STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ +STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ +STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ +STARPU_MS_LIB = @STARPU_MS_LIB@ +STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ +STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ +STARPU_OPENBLAS = @STARPU_OPENBLAS@ +STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ +STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ +STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ +STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ +STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ +STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ +STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ +STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ +STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ +STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ +STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ +STARPU_SRC_DIR = @STARPU_SRC_DIR@ +STARPU_USE_CPU = @STARPU_USE_CPU@ +STARPU_USE_CUDA = @STARPU_USE_CUDA@ +STARPU_USE_FXT = @STARPU_USE_FXT@ +STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ +STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ +STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ +STRIP = @STRIP@ +VERSION = @VERSION@ +XMKMF = @XMKMF@ +X_CFLAGS = @X_CFLAGS@ +X_EXTRA_LIBS = @X_EXTRA_LIBS@ +X_LIBS = @X_LIBS@ +X_PRE_LIBS = @X_PRE_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_ct_F77 = @ac_ct_F77@ +ac_ct_FC = @ac_ct_FC@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +doxygencommand = @doxygencommand@ +dvidir = @dvidir@ +eclipsepath = @eclipsepath@ +epstopdfcommand = @epstopdfcommand@ +exec_prefix = @exec_prefix@ +gitcommand = @gitcommand@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +hwloccalccommand = @hwloccalccommand@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +juliapath = @juliapath@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +mpicc_path = @mpicc_path@ +mpicxx_path = @mpicxx_path@ +mpiexec_path = @mpiexec_path@ +mpifort_path = @mpifort_path@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +pdflatexcommand = @pdflatexcommand@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +runstatedir = @runstatedir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target = @target@ +target_alias = @target_alias@ +target_cpu = @target_cpu@ +target_os = @target_os@ +target_vendor = @target_vendor@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +DOX_DIR = $(top_builddir)/doc/doxygen +DOX_CONFIG = $(top_srcdir)/doc/doxygen.cfg +DOX_MAIN_DIR = doxygen +DOX_HTML_DIR = html +DOX_LATEX_DIR = latex +DOX_PDF = starpu.pdf +DOX_TAG = starpu.tag +DOX_STARPU_CONFIG = starpu_config.h + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +DOXYGEN = doxygen +PDFLATEX = pdflatex +MAKEINDEX = makeindex +txtdir = $(docdir)/manual +EXTRA_DIST = $(am__append_1) $(am__append_2) $(am__append_3) \ + $(am__append_4) $(am__append_5) refman.tex $(chapters) \ + $(images) +@STARPU_AVAILABLE_DOC_PDF_TRUE@@STARPU_BUILD_DOC_FALSE@txt_DATA = $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$(DOX_PDF) +@STARPU_BUILD_DOC_PDF_TRUE@@STARPU_BUILD_DOC_TRUE@txt_DATA = $(DOX_DIR)/$(DOX_PDF) +@STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@DOX_HTML_SRCDIR = $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$(DOX_HTML_DIR) +@STARPU_BUILD_DOC_TRUE@DOX_HTML_SRCDIR = $(DOX_HTML_DIR) +@STARPU_BUILD_DOC_TRUE@CLEANFILES = $(DOX_TAG) $(DOX_STARPU_CONFIG) \ +@STARPU_BUILD_DOC_TRUE@ -r \ +@STARPU_BUILD_DOC_TRUE@ $(DOX_HTML_DIR) \ +@STARPU_BUILD_DOC_TRUE@ $(DOX_LATEX_DIR) \ +@STARPU_BUILD_DOC_TRUE@ $(DOX_DIR)/$(DOX_PDF) + +chapters = \ + chapters/foreword.doxy \ + chapters/starpu_introduction/introduction_intro.doxy \ + chapters/starpu_introduction/doc_organization.doxy \ + chapters/starpu_introduction/glossary.doxy \ + chapters/starpu_installation/installation_intro.doxy \ + chapters/starpu_installation/environment_variables.doxy \ + chapters/starpu_installation/building.doxy \ + chapters/starpu_installation/configure_options.doxy \ + chapters/starpu_installation/configuration_and_initialization.doxy \ + chapters/starpu_basics/basics_intro.doxy \ + chapters/starpu_basics/starpu_applications.doxy \ + chapters/starpu_basics/basic_examples.doxy \ + chapters/starpu_basics/scaling_vector_example.doxy \ + chapters/starpu_basics/tasks.doxy \ + chapters/starpu_basics/data_management.doxy \ + chapters/starpu_basics/scheduling.doxy \ + chapters/starpu_basics/examples_sources.doxy \ + chapters/starpu_basics/code/basics_vector_scal_c.c \ + chapters/starpu_basics/code/basics_vector_scal_cpu.c \ + chapters/starpu_basics/code/basics_vector_scal_cuda.c \ + chapters/starpu_basics/code/basics_vector_scal_opencl.c \ + chapters/starpu_basics/code/basics_vector_scal_opencl_codelet.cl \ + chapters/starpu_applications/applications_intro.doxy \ + chapters/starpu_applications/vector_scaling.doxy \ + chapters/starpu_applications/code/vector_scal_c.c \ + chapters/starpu_applications/code/vector_scal_c_align.c \ + chapters/starpu_applications/code/vector_scal_cpu.c \ + chapters/starpu_applications/code/vector_scal_starpu.c \ + chapters/starpu_applications/stencil.doxy \ + chapters/starpu_applications/code/stencil5.c \ + chapters/starpu_applications/code/stencil5_starpu.c \ + chapters/starpu_applications/code/stencil5_starpu_mpi.c \ + chapters/starpu_performances/performances_intro.doxy \ + chapters/starpu_performances/benchmarking_starpu.doxy \ + chapters/starpu_performances/online_performance_tools.doxy \ + chapters/starpu_performances/offline_performance_tools.doxy \ + chapters/starpu_faq/faq_intro.doxy \ + chapters/starpu_faq/check_list_performance.doxy \ + chapters/starpu_faq/faq.doxy \ + chapters/starpu_languages/languages_intro.doxy \ + chapters/starpu_languages/native_fortran_support.doxy \ + chapters/starpu_languages/java.doxy \ + chapters/starpu_languages/python.doxy \ + chapters/starpu_languages/openmp_runtime_support.doxy \ + chapters/starpu_languages/code/nf_initexit.f90 \ + chapters/starpu_languages/code/java_starpu.java \ + chapters/starpu_languages/code/java_spark.java \ + chapters/starpu_extensions/extensions_intro.doxy \ + chapters/starpu_extensions/advanced_tasks.doxy \ + chapters/starpu_extensions/advanced_data_management.doxy \ + chapters/starpu_extensions/helpers.doxy \ + chapters/starpu_extensions/debugging_tools.doxy \ + chapters/starpu_extensions/advanced_scheduling.doxy \ + chapters/starpu_extensions/scheduling_contexts.doxy \ + chapters/starpu_extensions/scheduling_context_hypervisor.doxy \ + chapters/starpu_extensions/cuda_support.doxy \ + chapters/starpu_extensions/opencl_support.doxy \ + chapters/starpu_extensions/max_fpga_support.doxy \ + chapters/starpu_extensions/out_of_core.doxy \ + chapters/starpu_extensions/mpi_support.doxy \ + chapters/starpu_extensions/tcpip_support.doxy \ + chapters/starpu_extensions/transactions.doxy \ + chapters/starpu_extensions/fault_tolerance.doxy \ + chapters/starpu_extensions/fft_support.doxy \ + chapters/starpu_extensions/socl_opencl_extensions.doxy \ + chapters/starpu_extensions/bubble.doxy \ + chapters/starpu_extensions/parallel_worker.doxy \ + chapters/starpu_extensions/interoperability.doxy \ + chapters/starpu_extensions/scheduling_policy_definition.doxy \ + chapters/starpu_extensions/simgrid.doxy \ + chapters/starpu_extensions/code/complex.c \ + chapters/starpu_extensions/code/disk_compute.c \ + chapters/starpu_extensions/code/disk_copy.c \ + chapters/starpu_extensions/code/forkmode.c \ + chapters/starpu_extensions/code/multiformat.c \ + chapters/starpu_extensions/code/simgrid.c \ + chapters/files.doxy \ + chapters/fdl_1_3.doxy \ + chapters/api/fortran_support.doxy \ + chapters/api/bubble_support.doxy \ + chapters/api/fft_support.doxy \ + chapters/api/threads.doxy + +images = \ + chapters/images/arbiter.png \ + chapters/images/data_trace.png \ + chapters/images/distrib_data.png \ + chapters/images/distrib_data_histo.png \ + chapters/images/paje_draw_histogram.png \ + chapters/images/parallel_worker2.png \ + chapters/images/runtime-par.png \ + chapters/images/starpu_non_linear_memset_regression_based.png \ + chapters/images/starpu_non_linear_memset_regression_based_2.png \ + chapters/images/starpu_starpu_slu_lu_model_11.png \ + chapters/images/starpu_chol_model_11_type.png \ + chapters/images/tasks_size_overhead.png \ + chapters/images/temanejo.png \ + chapters/images/eclipse_installer.png \ + chapters/images/eclipse_install_cdt.png \ + chapters/images/eclipse_hello_build.png \ + chapters/images/eclipse_hello_run.png \ + chapters/images/eclipse_hello_fxt.png \ + chapters/images/eclipse_hello_graph.png \ + chapters/images/eclipse_hello_vite.png \ + chapters/images/eclipse_hello_svg_graph.png \ + chapters/images/eclipse_hello_plugin.png \ + chapters/images/eclipse_hello_paje_trace.png \ + chapters/images/eclipse_hello_hgraph.png \ + chapters/images/eclipse_install_pde.png \ + chapters/images/starpu_gflops_non_linear_memset_regression_based_energy.png \ + chapters/images/starpu_log_arr.png \ + chapters/images/starpu_log_list.png \ + chapters/images/starpu_non_linear_memset_regression_based_energy.png \ + chapters/images/starpu_power_non_linear_memset_regression_based.png \ + chapters/images/starvz_visu.png \ + chapters/images/starvz_visu_r.png \ + chapters/images/trace_bw_heatmap.png \ + chapters/images/trace_recv_use.png \ + chapters/images/trace_send_use.png \ + chapters/images/trace_volume_heatmap.png \ + chapters/images/starpupy_handle_func_perf_pickle.png \ + chapters/images/starpupy_handle_perf_pickle.png \ + chapters/images/starpupy_handle_func_perf.png \ + chapters/images/starpupy_handle_perf.png \ + chapters/images/tasks_size_overhead_py_fut_pickle.png \ + chapters/images/tasks_size_overhead_py_futur.png \ + chapters/images/tasks_size_overhead_py_handle_pickle.png \ + chapters/images/tasks_size_overhead_py_handle.png \ + chapters/images/tasks_size_overhead_py_none.png \ + chapters/images/tasks_size_overhead_py_noret_pickle.png + +@STARPU_BUILD_DOC_TRUE@dox_inputs = $(DOX_CONFIG) \ +@STARPU_BUILD_DOC_TRUE@ $(chapters) \ +@STARPU_BUILD_DOC_TRUE@ starpu_config.h \ +@STARPU_BUILD_DOC_TRUE@ chapters/version.sty \ +@STARPU_BUILD_DOC_TRUE@ chapters/version.html \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_bitmap.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_bound.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_cublas.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_cublas_v2.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_cublasLt.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_cusparse.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_cuda.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_cusolver.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_data_filters.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_data.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_data_interfaces.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_deprecated_api.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_disk.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_driver.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_expert.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_fxt.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_hash.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_helper.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_hip.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_max_fpga.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_mod.f90 \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_opencl.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_openmp.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_parallel_worker.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_perf_monitoring.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_perf_steering.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_perfmodel.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_profiling.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_profiling_tool.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_rand.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_sched_component.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_sched_ctx.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_sched_ctx_hypervisor.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_scheduler.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_simgrid_wrap.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_sink.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_stdlib.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_task_bundle.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_task_dep.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_task.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_task_list.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_task_util.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_thread.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_thread_util.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_tree.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_util.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_worker.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/fstarpu_mod.f90 \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/schedulers/starpu_heteroprio.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/starpufft/include/starpufft.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/mpi/include/starpu_mpi.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/mpi/include/starpu_mpi_ft.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/mpi/include/starpu_mpi_lb.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/mpi/include/fstarpu_mpi_mod.f90 \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/sc_hypervisor/include/sc_hypervisor.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/sc_hypervisor/include/sc_hypervisor_config.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/sc_hypervisor/include/sc_hypervisor_lp.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/sc_hypervisor/include/sc_hypervisor_monitoring.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/sc_hypervisor/include/sc_hypervisor_policy.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/starpurm/include/starpurm.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/schedulers/starpu_scheduler_toolbox.h + +all: all-am + +.SUFFIXES: +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/doc/doxy.mk $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign doc/doxygen/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign doc/doxygen/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; +$(top_srcdir)/doc/doxy.mk $(am__empty): + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): +doxygen-config.cfg: $(top_builddir)/config.status $(srcdir)/doxygen-config.cfg.in + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ +doxygen-config-include.cfg: $(top_builddir)/config.status $(srcdir)/doxygen-config-include.cfg.in + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ +doxygen_filter.sh: $(top_builddir)/config.status $(srcdir)/doxygen_filter.sh.in + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs +install-txtDATA: $(txt_DATA) + @$(NORMAL_INSTALL) + @list='$(txt_DATA)'; test -n "$(txtdir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(txtdir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(txtdir)" || exit 1; \ + fi; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; \ + done | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(txtdir)'"; \ + $(INSTALL_DATA) $$files "$(DESTDIR)$(txtdir)" || exit $$?; \ + done + +uninstall-txtDATA: + @$(NORMAL_UNINSTALL) + @list='$(txt_DATA)'; test -n "$(txtdir)" || list=; \ + files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ + dir='$(DESTDIR)$(txtdir)'; $(am__uninstall_files_from_dir) +tags TAGS: + +ctags CTAGS: + +cscope cscopelist: + +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile $(DATA) +installdirs: + for dir in "$(DESTDIR)$(txtdir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +@STARPU_AVAILABLE_DOC_FALSE@@STARPU_BUILD_DOC_FALSE@install-exec-hook: +@STARPU_AVAILABLE_DOC_FALSE@@STARPU_BUILD_DOC_FALSE@uninstall-hook: +clean: clean-am + +clean-am: clean-generic clean-libtool mostlyclean-am + +distclean: distclean-am + -rm -f Makefile +distclean-am: clean-am distclean-generic + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: install-txtDATA + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: + @$(NORMAL_INSTALL) + $(MAKE) $(AM_MAKEFLAGS) install-exec-hook +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-generic mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-txtDATA + @$(NORMAL_INSTALL) + $(MAKE) $(AM_MAKEFLAGS) uninstall-hook +.MAKE: install-am install-exec-am install-strip uninstall-am + +.PHONY: all all-am check check-am clean clean-generic clean-libtool \ + cscopelist-am ctags-am distclean distclean-generic \ + distclean-libtool distdir dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-exec-hook \ + install-html install-html-am install-info install-info-am \ + install-man install-pdf install-pdf-am install-ps \ + install-ps-am install-strip install-txtDATA installcheck \ + installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-generic \ + mostlyclean-libtool pdf pdf-am ps ps-am tags-am uninstall \ + uninstall-am uninstall-hook uninstall-txtDATA + +.PRECIOUS: Makefile + + +@STARPU_BUILD_DOC_PDF_TRUE@@STARPU_BUILD_DOC_TRUE@all: $(DOX_HTML_DIR) $(DOX_DIR)/$(DOX_PDF) +@STARPU_BUILD_DOC_PDF_FALSE@@STARPU_BUILD_DOC_TRUE@all: $(DOX_HTML_DIR) +@STARPU_BUILD_DOC_TRUE@install-exec-hook: $(DOX_HTML_DIR) +@STARPU_BUILD_DOC_TRUE@ @$(MKDIR_P) $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) +@STARPU_BUILD_DOC_TRUE@ @(cd $(DOX_HTML_SRCDIR) && $(PROG_FIND) . -type f -exec $(INSTALL_DATA) {} $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) \;) +@STARPU_BUILD_DOC_TRUE@uninstall-hook: +@STARPU_BUILD_DOC_TRUE@ @rm -rf $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) +@STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@install-exec-hook: +@STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@ @$(MKDIR_P) $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) +@STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@ @(cd $(DOX_HTML_SRCDIR) && $(PROG_FIND) . -type f -exec $(INSTALL_DATA) {} $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) \;) +@STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@uninstall-hook: +@STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@ @rm -rf $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) + +@STARPU_BUILD_DOC_TRUE@chapters/version.sty: $(chapters) +@STARPU_BUILD_DOC_TRUE@ $(MKDIR_P) $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters +@STARPU_BUILD_DOC_TRUE@ @for f in $(chapters) ; do \ +@STARPU_BUILD_DOC_TRUE@ if test -f $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$$f ; then $(PROG_STAT) --format=%Y $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$$f ; fi \ +@STARPU_BUILD_DOC_TRUE@ done | sort -r | head -1 > timestamp_sty +@STARPU_BUILD_DOC_TRUE@ @if test -s timestamp_sty ; then \ +@STARPU_BUILD_DOC_TRUE@ LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_sty` +"%F" > timestamp_sty_updated ;\ +@STARPU_BUILD_DOC_TRUE@ LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_sty` +"%B %Y" > timestamp_sty_updated_month ;\ +@STARPU_BUILD_DOC_TRUE@ fi +@STARPU_BUILD_DOC_TRUE@ @if test -s timestamp_sty_updated ; then \ +@STARPU_BUILD_DOC_TRUE@ echo ':newcommand{:STARPUUPDATED}{'`cat timestamp_sty_updated`'}' > $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty;\ +@STARPU_BUILD_DOC_TRUE@ else \ +@STARPU_BUILD_DOC_TRUE@ echo ':newcommand{:STARPUUPDATED}{unknown date}' > $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty;\ +@STARPU_BUILD_DOC_TRUE@ fi +@STARPU_BUILD_DOC_TRUE@ @echo ':newcommand{:STARPUVERSION}{$(VERSION)}' >> $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty +@STARPU_BUILD_DOC_TRUE@ @$(SED) -i 's/:/\\/g' $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty +@STARPU_BUILD_DOC_TRUE@ @for f in timestamp_sty timestamp_sty_updated timestamp_sty_updated_month ; do \ +@STARPU_BUILD_DOC_TRUE@ if test -f $$f ; then $(RM) $$f ; fi ;\ +@STARPU_BUILD_DOC_TRUE@ done + +@STARPU_BUILD_DOC_TRUE@chapters/version.html: $(chapters) $(images) +@STARPU_BUILD_DOC_TRUE@ @for f in $(chapters) ; do \ +@STARPU_BUILD_DOC_TRUE@ if test -f $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$$f ; then $(PROG_STAT) --format=%Y $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$$f ; fi \ +@STARPU_BUILD_DOC_TRUE@ done | sort -r | head -1 > timestamp_html +@STARPU_BUILD_DOC_TRUE@ @if test -s timestamp_html ; then \ +@STARPU_BUILD_DOC_TRUE@ LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_html` +"%F" > timestamp_html_updated ;\ +@STARPU_BUILD_DOC_TRUE@ LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_html` +"%B %Y" > timestamp_html_updated_month ;\ +@STARPU_BUILD_DOC_TRUE@ fi +@STARPU_BUILD_DOC_TRUE@ @echo "This manual documents the version $(VERSION) of StarPU." > $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.html +@STARPU_BUILD_DOC_TRUE@ @if test -s timestamp_html_updated ; then \ +@STARPU_BUILD_DOC_TRUE@ echo "Its contents was last updated on "`cat timestamp_html_updated`"." >> $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.html;\ +@STARPU_BUILD_DOC_TRUE@ else \ +@STARPU_BUILD_DOC_TRUE@ echo "Its contents was last updated on unknown_date." >> $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.html;\ +@STARPU_BUILD_DOC_TRUE@ fi +@STARPU_BUILD_DOC_TRUE@ @for f in timestamp_html timestamp_html_updated timestamp_html_updated_month ; do \ +@STARPU_BUILD_DOC_TRUE@ if test -f $$f ; then $(RM) $$f ; fi ;\ +@STARPU_BUILD_DOC_TRUE@ done + +@STARPU_BUILD_DOC_TRUE@doxy: +@STARPU_BUILD_DOC_TRUE@ @rm -fr $(DOX_HTML_DIR) $(DOX_LATEX_DIR) +@STARPU_BUILD_DOC_TRUE@ @$(DOXYGEN) $(DOX_CONFIG) + +@STARPU_BUILD_DOC_TRUE@$(DOX_HTML_DIR): $(DOX_TAG) +@STARPU_BUILD_DOC_TRUE@ @$(MKDIR_P) $(DOX_HTML_DIR) + +@STARPU_BUILD_DOC_TRUE@$(DOX_TAG): $(dox_inputs) +@STARPU_BUILD_DOC_TRUE@ @rm -fr $(DOX_HTML_DIR) $(DOX_LATEX_DIR) +@STARPU_BUILD_DOC_TRUE@ @$(DOXYGEN) $(DOX_CONFIG) +@STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_HTML_DIR)/DocOrganization.html ; then $(SED) -i 's/ModuleDocumentation <\/li>/Modules<\/a>/' $(DOX_HTML_DIR)/DocOrganization.html ; fi +@STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_HTML_DIR)/DocOrganization.html ; then $(SED) -i 's/FileDocumentation <\/li>/Files<\/a>/' $(DOX_HTML_DIR)/DocOrganization.html ; fi +@STARPU_BUILD_DOC_TRUE@ # comment for the line below: what we really want to do is to remove the line, but dy doing so, it avoids opening the interactive menu when browsing files +@STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_HTML_DIR)/navtreedata.js ; then $(SED) -i 's/\[ "Files", "Files.html", null \]/\[ "", "Files.html", null \]/' $(DOX_HTML_DIR)/navtreedata.js ; fi +@STARPU_BUILD_DOC_TRUE@ @$(SED) -i 's/.*"Files.html".*//' $(DOX_HTML_DIR)/pages.html +@STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_LATEX_DIR)/main.tex ; then mv $(DOX_LATEX_DIR)/main.tex $(DOX_LATEX_DIR)/index.tex ; fi +@STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_LATEX_DIR)/refman.tex ; then $(SED) -i '/\\begin{titlepage}/,$$d' $(DOX_LATEX_DIR)/refman.tex ; fi +@STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_LATEX_DIR)/refman.tex ; then cat $(top_srcdir)/doc/$(DOX_MAIN_DIR)/refman.tex >> $(DOX_LATEX_DIR)/refman.tex ; fi +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/doc/sectionNumbering.py $(top_builddir)/doc/$(DOX_MAIN_DIR) $(DOX_HTML_DIR) + +@STARPU_BUILD_DOC_TRUE@$(DOX_DIR)/$(DOX_PDF): $(DOX_TAG) refman.tex $(images) +@STARPU_BUILD_DOC_TRUE@ $(MKDIR_P) $(DOX_LATEX_DIR) +@STARPU_BUILD_DOC_TRUE@ @cp $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty $(DOX_LATEX_DIR) +@STARPU_BUILD_DOC_TRUE@ @cp $(top_srcdir)/doc/title.tex $(DOX_LATEX_DIR) +@STARPU_BUILD_DOC_TRUE@ @if test -f $(top_srcdir)/doc/$(DOX_MAIN_DIR)/modules.tex ; then cp $(top_srcdir)/doc/$(DOX_MAIN_DIR)/modules.tex $(DOX_LATEX_DIR) ; fi +@STARPU_BUILD_DOC_TRUE@ @echo $(PDFLATEX) $(DOX_LATEX_DIR)/refman.tex +@STARPU_BUILD_DOC_TRUE@ @cd $(DOX_LATEX_DIR) ;\ +@STARPU_BUILD_DOC_TRUE@ rm -f *.aux *.toc *.idx *.ind *.ilg *.log *.out ;\ +@STARPU_BUILD_DOC_TRUE@ for f in group__API__* ; do sed -i '1 i \\\clearpage' $$f ; done ;\ +@STARPU_BUILD_DOC_TRUE@ if test -f ExecutionConfigurationThroughEnvironmentVariables.tex ; then $(SED) -i -e 's/__env__/\\_Environment Variables!/' -e 's/\\-\\_\\-\\-\\_\\-env\\-\\_\\-\\-\\_\\-//' ExecutionConfigurationThroughEnvironmentVariables.tex ; fi ;\ +@STARPU_BUILD_DOC_TRUE@ if test -f CompilationConfiguration.tex ; then $(SED) -i -e 's/__configure__/\\_Configure Options!/' -e 's/\\-\\_\\-\\-\\_\\-configure\\-\\_\\-\\-\\_\\-//' CompilationConfiguration.tex ; fi ;\ +@STARPU_BUILD_DOC_TRUE@ if test -f DocOrganization.tex ; then $(SED) -i s'/\\item Module\\.Documentation/\\item \\hyperlink{ModuleDocumentation}{Module Documentation}/' DocOrganization.tex ; fi ;\ +@STARPU_BUILD_DOC_TRUE@ if test -f DocOrganization.tex ; then $(SED) -i s'/\\item File\\.Documentation/\\item \\hyperlink{FileDocumentation}{File Documentation}/' DocOrganization.tex ; fi ;\ +@STARPU_BUILD_DOC_TRUE@ max_print_line=1000000 $(PDFLATEX) -interaction batchmode refman.tex ;\ +@STARPU_BUILD_DOC_TRUE@ ! < refman.log grep -v group__ | grep -v _amgrp | grep -v deprecated__ | grep "multiply defined" || exit 1 ;\ +@STARPU_BUILD_DOC_TRUE@ $(MAKEINDEX) refman.idx ;\ +@STARPU_BUILD_DOC_TRUE@ max_print_line=1000000 $(PDFLATEX) -interaction batchmode refman.tex ;\ +@STARPU_BUILD_DOC_TRUE@ for i in $(shell seq 1 5); do \ +@STARPU_BUILD_DOC_TRUE@ if $(EGREP) 'Rerun (LaTeX|to get cross-references right)' refman.log > /dev/null 2>&1; then \ +@STARPU_BUILD_DOC_TRUE@ max_print_line=1000000 $(PDFLATEX) -interaction batchmode refman.tex; \ +@STARPU_BUILD_DOC_TRUE@ else \ +@STARPU_BUILD_DOC_TRUE@ break ; \ +@STARPU_BUILD_DOC_TRUE@ fi; \ +@STARPU_BUILD_DOC_TRUE@ done +@STARPU_BUILD_DOC_TRUE@ mv $(DOX_LATEX_DIR)/refman.pdf $(DOX_DIR)/$(DOX_PDF) + +@STARPU_BUILD_DOC_TRUE@starpu_config.h: $(top_srcdir)/include/starpu_config.h.in +@STARPU_BUILD_DOC_TRUE@ @$(SED) 's/#undef \(.*\)/#define \1 1/' $< > $@ + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/doc/doxygen/chapters/api/bubble_support.doxy b/doc/doxygen/chapters/api/bubble_support.doxy new file mode 100644 index 0000000..fbae913 --- /dev/null +++ b/doc/doxygen/chapters/api/bubble_support.doxy @@ -0,0 +1,25 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2024-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* +* The file is empty but necessary to define the group API_Bubble +*/ + +/*! \defgroup API_Bubble Hierarchical Dags + +\brief API for Hierarchical DAGS + +*/ diff --git a/doc/doxygen/chapters/api/fft_support.doxy b/doc/doxygen/chapters/api/fft_support.doxy new file mode 100644 index 0000000..b7d815c --- /dev/null +++ b/doc/doxygen/chapters/api/fft_support.doxy @@ -0,0 +1,79 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/*! \defgroup API_FFT_Support FFT Support + +\def STARPUFFT_FORWARD +\ingroup API_FFT_Support +todo + +\def STARPUFFT_INVERSE +\ingroup API_FFT_Support +todo + +\fn void * starpufft_malloc(size_t n) +\ingroup API_FFT_Support +Allocate memory for \p n bytes. This is preferred over \c malloc(), +since it allocates pinned memory, which allows overlapped transfers. + +\fn void * starpufft_free(void *p) +\ingroup API_FFT_Support +Release memory previously allocated. + +\fn struct starpufft_plan * starpufft_plan_dft_1d(int n, int sign, unsigned flags) +\ingroup API_FFT_Support +Initialize a plan for 1D FFT of size \p n. \p sign can be STARPUFFT_FORWARD +or STARPUFFT_INVERSE. \p flags must be 0. + +\fn struct starpufft_plan * starpufft_plan_dft_2d(int n, int m, int sign, unsigned flags) +\ingroup API_FFT_Support +Initialize a plan for 2D FFT of size (\p n, \p m). \p sign can be +STARPUFFT_FORWARD or STARPUFFT_INVERSE. flags must be \p 0. + +\fn struct starpu_task * starpufft_start(starpufft_plan p, void *in, void *out) +\ingroup API_FFT_Support +Start an FFT previously planned as \p p, using \p in and \p out as +input and output. This only submits the task and does not wait for it. +The application should call starpufft_cleanup() to unregister the + +\fn struct starpu_task * starpufft_start_handle(starpufft_plan p, starpu_data_handle_t in, starpu_data_handle_t out) +\ingroup API_FFT_Support +Start an FFT previously planned as \p p, using data handles \p in and +\p out as input and output (assumed to be vectors of elements of the +expected types). This only submits the task and does not wait for it. + +\fn void starpufft_execute(starpufft_plan p, void *in, void *out) +\ingroup API_FFT_Support +Execute an FFT previously planned as \p p, using \p in and \p out as +input and output. This submits and waits for the task. + +\fn void starpufft_execute_handle(starpufft_plan p, starpu_data_handle_t in, starpu_data_handle_t out) +\ingroup API_FFT_Support +Execute an FFT previously planned as \p p, using data handles \p in +and \p out as input and output (assumed to be vectors of elements of +the expected types). This submits and waits for the task. + +\fn void starpufft_cleanup(starpufft_plan p) +\ingroup API_FFT_Support +Release data for plan \p p, in the starpufft_start() case. + +\fn void starpufft_destroy_plan(starpufft_plan p) +\ingroup API_FFT_Support +Destroy plan \p p, i.e. release all CPU (fftw) and GPU (cufft) +resources. + +*/ + diff --git a/doc/doxygen/chapters/api/fortran_support.doxy b/doc/doxygen/chapters/api/fortran_support.doxy new file mode 100644 index 0000000..eb41bc3 --- /dev/null +++ b/doc/doxygen/chapters/api/fortran_support.doxy @@ -0,0 +1,21 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/*! \defgroup API_Fortran Fortran Support + +\brief Fortran API + +*/ diff --git a/doc/doxygen/chapters/api/threads.doxy b/doc/doxygen/chapters/api/threads.doxy new file mode 100644 index 0000000..9d9b2b9 --- /dev/null +++ b/doc/doxygen/chapters/api/threads.doxy @@ -0,0 +1,377 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/*! \defgroup API_Threads Threads + +\brief API for thread. The thread functions are either implemented on top of the +pthread library or the SimGrid library when the simulated performance +mode is enabled (\ref SimGridSupport). + +\def STARPU_PTHREAD_CREATE_ON +\ingroup API_Threads +Call starpu_pthread_create_on() and abort on error. + +\def STARPU_PTHREAD_CREATE +\ingroup API_Threads +Call starpu_pthread_create() and abort on error. + +\def STARPU_PTHREAD_MUTEX_INIT +\ingroup API_Threads +Call starpu_pthread_mutex_init() and abort on error. + +\def STARPU_PTHREAD_MUTEX_INIT0 +\ingroup API_Threads +Call starpu_pthread_mutex_init() only if the content of +PTHREAD_MUTEX_INITIALIZER is not zero. This should be called instead +of STARPU_PTHREAD_MUTEX_INIT when it is known that the content of the +pthread_mutex_t was already zeroed. + +\def STARPU_PTHREAD_MUTEX_DESTROY +\ingroup API_Threads +Call starpu_pthread_mutex_destroy() and abort on error. + +\def STARPU_PTHREAD_MUTEX_LOCK +\ingroup API_Threads +Call starpu_pthread_mutex_lock() and abort on error. + +\def STARPU_PTHREAD_MUTEX_UNLOCK +\ingroup API_Threads +Call starpu_pthread_mutex_unlock() and abort on error. + +\def STARPU_PTHREAD_KEY_CREATE +\ingroup API_Threads +Call starpu_pthread_key_create() and abort on error. + +\def STARPU_PTHREAD_KEY_DELETE +\ingroup API_Threads +Call starpu_pthread_key_delete() and abort on error. + +\def STARPU_PTHREAD_SETSPECIFIC +\ingroup API_Threads +Call starpu_pthread_setspecific() and abort on error. + +\def STARPU_PTHREAD_GETSPECIFIC +\ingroup API_Threads +Call starpu_pthread_getspecific() and abort on error. + +\def STARPU_PTHREAD_RWLOCK_INIT +\ingroup API_Threads +Call starpu_pthread_rwlock_init() and abort on error. + +\def STARPU_PTHREAD_RWLOCK_INIT0 +\ingroup API_Threads +Call starpu_pthread_rwlock_init() only if the content of +PTHREAD_RWLOCK_INITIALIZER is not zero. This should be called instead +of STARPU_PTHREAD_RWLOCK_INIT when it is known that the content of the +pthread_rwlock_t was already zeroed. + +\def STARPU_PTHREAD_RWLOCK_RDLOCK +\ingroup API_Threads +Call starpu_pthread_rwlock_rdlock() and abort on error. + +\def STARPU_PTHREAD_RWLOCK_WRLOCK +\ingroup API_Threads +Call starpu_pthread_rwlock_wrlock() and abort on error. + +\def STARPU_PTHREAD_RWLOCK_UNLOCK +\ingroup API_Threads +Call starpu_pthread_rwlock_unlock() and abort on error. + +\def STARPU_PTHREAD_RWLOCK_DESTROY +\ingroup API_Threads +Call starpu_pthread_rwlock_destroy() and abort on error. + +\def STARPU_PTHREAD_COND_INIT +\ingroup API_Threads +Call starpu_pthread_cond_init() and abort on error. + +\def STARPU_PTHREAD_COND_INIT0 +\ingroup API_Threads +Call starpu_pthread_cond_init() only if the content of +PTHREAD_COND_INITIALIZER is not zero. This should be called instead +of STARPU_PTHREAD_COND_INIT when it is known that the content of the +pthread_cond_t was already zeroed. + +\def STARPU_PTHREAD_COND_DESTROY +\ingroup API_Threads +Call starpu_pthread_cond_destroy() and abort on error. + +\def STARPU_PTHREAD_COND_SIGNAL +\ingroup API_Threads +Call starpu_pthread_cond_signal() and abort on error. + +\def STARPU_PTHREAD_COND_BROADCAST +\ingroup API_Threads +Call starpu_pthread_cond_broadcast() and abort on error. + +\def STARPU_PTHREAD_COND_WAIT +\ingroup API_Threads +Call starpu_pthread_cond_wait() and abort on error. + +\def STARPU_PTHREAD_BARRIER_INIT +\ingroup API_Threads +Call starpu_pthread_barrier_init() and abort on error. + +\def STARPU_PTHREAD_BARRIER_DESTROY +\ingroup API_Threads +Call starpu_pthread_barrier_destroy() and abort on error. + +\def STARPU_PTHREAD_BARRIER_WAIT +\ingroup API_Threads +Call starpu_pthread_barrier_wait() and abort on error. + +\fn int starpu_pthread_create_on(const char *name, starpu_pthread_t *thread, const starpu_pthread_attr_t *attr, void *(*start_routine) (void *), void *arg, int where) +\ingroup API_Threads + +\fn int starpu_pthread_create(starpu_pthread_t *thread, const starpu_pthread_attr_t *attr, void *(*start_routine) (void *), void *arg) +\ingroup API_Threads +Start a new thread in the calling process. The new +thread starts execution by invoking \p start_routine; \p arg is passed +as the sole argument of \p start_routine. + +\fn int starpu_pthread_join(starpu_pthread_t thread, void **retval) +\ingroup API_Threads +Wait for the thread specified by \p thread to +terminate. If that thread has already terminated, then the function +returns immediately. The thread specified by \p thread must be +joinable. + +\fn int starpu_pthread_exit(void *retval) +\ingroup API_Threads +Terminate the calling thread and return a value via +\p retval that (if the thread is joinable) is available to another thread +in the same process that calls starpu_pthread_join(). + +\fn int starpu_pthread_attr_init(starpu_pthread_attr_t *attr) +\ingroup API_Threads +Initialize the thread attributes object pointed to by +\p attr with default attribute values. + +Do not do anything when the simulated performance mode is enabled +(\ref SimGridSupport). + +\fn int starpu_pthread_attr_destroy(starpu_pthread_attr_t *attr) +\ingroup API_Threads +Destroy a thread attributes object which is no longer +required. Destroying a thread attributes object has no effect on +threads that were created using that object. + +Do not do anything when the simulated performance mode is enabled +(\ref SimGridSupport). + +\fn int starpu_pthread_attr_setdetachstate(starpu_pthread_attr_t *attr, int detachstate) +\ingroup API_Threads +Set the detach state attribute of the thread attributes +object referred to by \p attr to the value specified in \p +detachstate. The detach state attribute determines whether a thread +created using the thread attributes object \p attr will be created in +a joinable or a detached state. + +Do not do anything when the simulated performance mode is enabled +(\ref SimGridSupport). + +\fn int starpu_pthread_mutex_init(starpu_pthread_mutex_t *mutex, const starpu_pthread_mutexattr_t *mutexattr) +\ingroup API_Threads +Initialize the mutex object pointed to by \p mutex +according to the mutex attributes specified in \p mutexattr. If \p +mutexattr is NULL, default attributes are used instead. + +\fn int starpu_pthread_mutex_destroy(starpu_pthread_mutex_t *mutex) +\ingroup API_Threads +Destroy a mutex object, and free the resources it might +hold. The mutex must be unlocked on entrance. + +\fn int starpu_pthread_mutex_lock(starpu_pthread_mutex_t *mutex) +\ingroup API_Threads +Lock the given \p mutex. If \p mutex is currently +unlocked, it becomes locked and owned by the calling thread, and the +function returns immediately. If \p mutex is already locked by +another thread, the function suspends the calling thread until +\p mutex is unlocked. + +This function also produces trace when the configure option +\ref enable-fxt-lock "--enable-fxt-lock" is enabled. + +\fn int starpu_pthread_mutex_unlock(starpu_pthread_mutex_t *mutex) +\ingroup API_Threads +Unlock the given \p mutex. The mutex is assumed to be +locked and owned by the calling thread on entrance to +starpu_pthread_mutex_unlock(). + +This function also produces trace when the configure option +\ref enable-fxt-lock "--enable-fxt-lock" is enabled. + +\fn int starpu_pthread_mutex_trylock(starpu_pthread_mutex_t *mutex) +\ingroup API_Threads +Behave identically to starpu_pthread_mutex_lock(), +except that it does not block the calling thread if the mutex is +already locked by another thread (or by the calling thread in the case +of a ``fast'' mutex). Instead, the function returns immediately with +the error code \c EBUSY. + +This function also produces trace when the configure option +\ref enable-fxt-lock "--enable-fxt-lock" is enabled. + +\typedef STARPU_PTHREAD_MUTEX_INITIALIZER +\ingroup API_Threads +Initialize the mutex given in parameter. + +\fn int starpu_pthread_mutexattr_gettype(const starpu_pthread_mutexattr_t *attr, int *type) +\ingroup API_Threads +todo + +\fn int starpu_pthread_mutexattr_settype(starpu_pthread_mutexattr_t *attr, int type) +\ingroup API_Threads +todo + +\fn int starpu_pthread_mutexattr_destroy(starpu_pthread_mutexattr_t *attr) +\ingroup API_Threads +todo + +\fn int starpu_pthread_mutexattr_init(starpu_pthread_mutexattr_t *attr) +\ingroup API_Threads +todo + +\fn int starpu_pthread_key_create(starpu_pthread_key_t *key, void (*destr_function) (void *)) +\ingroup API_Threads +Allocate a new TSD key. The key is stored in the +location pointed to by \p key. + +\fn int starpu_pthread_key_delete(starpu_pthread_key_t key) +\ingroup API_Threads +Deallocate a TSD key. Do not check whether +non-NULL values are associated with that key in the currently +executing threads, nor call the destructor function associated with +the key. + +\fn int starpu_pthread_setspecific(starpu_pthread_key_t key, const void *pointer) +\ingroup API_Threads +Change the value associated with \p key in the calling +thread, storing the given \p pointer instead. + +\fn void *starpu_pthread_getspecific(starpu_pthread_key_t key) +\ingroup API_Threads +Return the value associated with \p key on success, and +NULL on error. + +\typedef STARPU_PTHREAD_COND_INITIALIZER +\ingroup API_Threads +Initialize the condition variable given in parameter. + +\fn int starpu_pthread_cond_init(starpu_pthread_cond_t *cond, starpu_pthread_condattr_t *cond_attr) +\ingroup API_Threads +Initialize the condition variable \p cond, using the +condition attributes specified in \p cond_attr, or default attributes +if \p cond_attr is NULL. + +\fn int starpu_pthread_cond_signal(starpu_pthread_cond_t *cond) +\ingroup API_Threads +Restart one of the threads that are waiting on the +condition variable \p cond. If no threads are waiting on \p cond, +nothing happens. If several threads are waiting on \p cond, exactly +one is restarted, but it is not specified which. + +\fn int starpu_pthread_cond_broadcast(starpu_pthread_cond_t *cond) +\ingroup API_Threads +Restart all the threads that are waiting on the +condition variable \p cond. Nothing happens if no threads are waiting on \p cond. + +\fn int starpu_pthread_cond_wait(starpu_pthread_cond_t *cond, starpu_pthread_mutex_t *mutex) +\ingroup API_Threads +Atomically unlock \p mutex (as per +starpu_pthread_mutex_unlock()) and wait for the condition variable \p cond +to be signaled. The thread execution is suspended and does not consume +any CPU time until the condition variable is signaled. The mutex must +be locked by the calling thread on entrance to +starpu_pthread_cond_wait(). Before returning to the calling thread, the +function re-acquires mutex (as per starpu_pthread_mutex_lock()). + +This function also produces trace when the configure option +\ref enable-fxt-lock "--enable-fxt-lock" is enabled. + +\fn int starpu_pthread_cond_timedwait(starpu_pthread_cond_t *cond, starpu_pthread_mutex_t *mutex, const struct timespec *abstime) +\ingroup API_Threads +Atomicall unlocks \p mutex and wait on \p cond, as +starpu_pthread_cond_wait() does, but also bound the duration of +the wait with \p abstime. + +\fn int starpu_pthread_cond_destroy(starpu_pthread_cond_t *cond) +\ingroup API_Threads +Destroy a condition variable, freeing the resources it +might hold. No threads must be waiting on the condition variable on +entrance to the function. + +\fn int starpu_pthread_rwlock_init(starpu_pthread_rwlock_t *rwlock, const starpu_pthread_rwlockattr_t *attr) +\ingroup API_Threads +Similar to starpu_pthread_mutex_init(). + +\fn int starpu_pthread_rwlock_destroy(starpu_pthread_rwlock_t *rwlock) +\ingroup API_Threads +Similar to starpu_pthread_mutex_destroy(). + +\fn int starpu_pthread_rwlock_rdlock(starpu_pthread_rwlock_t *rwlock) +\ingroup API_Threads +Similar to starpu_pthread_mutex_lock(). + +\fn int starpu_pthread_rwlock_tryrdlock(starpu_pthread_rwlock_t *rwlock) +\ingroup API_Threads +todo + +\fn int starpu_pthread_rwlock_wrlock(starpu_pthread_rwlock_t *rwlock) +\ingroup API_Threads +Similar to starpu_pthread_mutex_lock(). + +\fn int starpu_pthread_rwlock_trywrlock(starpu_pthread_rwlock_t *rwlock) +\ingroup API_Threads +todo + +\fn int starpu_pthread_rwlock_unlock(starpu_pthread_rwlock_t *rwlock) +\ingroup API_Threads +Similar to starpu_pthread_mutex_unlock(). + +\fn int starpu_pthread_barrier_init(starpu_pthread_barrier_t *barrier, const starpu_pthread_barrierattr_t *attr, unsigned count) +\ingroup API_Threads +todo + +\fn int starpu_pthread_barrier_destroy(starpu_pthread_barrier_t *barrier) +\ingroup API_Threads +todo + +\fn int starpu_pthread_barrier_wait(starpu_pthread_barrier_t *barrier) +\ingroup API_Threads +todo + +\fn int starpu_pthread_spin_init(starpu_pthread_spinlock_t *lock, int pshared) +\ingroup API_Threads +todo + +\fn int starpu_pthread_spin_destroy(starpu_pthread_spinlock_t *lock) +\ingroup API_Threads +todo + +\fn int starpu_pthread_spin_lock(starpu_pthread_spinlock_t *lock) +\ingroup API_Threads +todo + +\fn int starpu_pthread_spin_trylock(starpu_pthread_spinlock_t *lock) +\ingroup API_Threads +todo + +\fn int starpu_pthread_spin_unlock(starpu_pthread_spinlock_t *lock) +\ingroup API_Threads +todo + +*/ diff --git a/doc/doxygen/chapters/fdl_1_3.doxy b/doc/doxygen/chapters/fdl_1_3.doxy new file mode 100644 index 0000000..d66e844 --- /dev/null +++ b/doc/doxygen/chapters/fdl_1_3.doxy @@ -0,0 +1,526 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/*! \page GNUFreeDocumentationLicense The GNU Free Documentation License + +
+Version 1.3, 3 November 2008 +
+ +
+\copyright 2000, 2001, 2002, 2007, 2008 Free Software Foundation, Inc. +http://fsf.org/ + +Everyone is permitted to copy and distribute verbatim copies +of this license document, but changing it is not allowed. +
+ +
    +
  1. +PREAMBLE + +The purpose of this License is to make a manual, textbook, or other +functional and useful document free in the sense of freedom: to +assure everyone the effective freedom to copy and redistribute it, +with or without modifying it, either commercially or noncommercially. +Secondarily, this License preserves for the author and publisher a way +to get credit for their work, while not being considered responsible +for modifications made by others. + +This License is a kind of ``copyleft'', which means that derivative +works of the document must themselves be free in the same sense. It +complements the GNU General Public License, which is a copyleft +license designed for free software. + +We have designed this License in order to use it for manuals for free +software, because free software needs free documentation: a free +program should come with manuals providing the same freedoms that the +software does. But this License is not limited to software manuals; +it can be used for any textual work, regardless of subject matter or +whether it is published as a printed book. We recommend this License +principally for works whose purpose is instruction or reference. + +
  2. +
  3. +APPLICABILITY AND DEFINITIONS + +This License applies to any manual or other work, in any medium, that +contains a notice placed by the copyright holder saying it can be +distributed under the terms of this License. Such a notice grants a +world-wide, royalty-free license, unlimited in duration, to use that +work under the conditions stated herein. The ``Document'', below, +refers to any such manual or work. Any member of the public is a +licensee, and is addressed as ``you''. You accept the license if you +copy, modify or distribute the work in a way requiring permission +under copyright law. + +A ``Modified Version'' of the Document means any work containing the +Document or a portion of it, either copied verbatim, or with +modifications and/or translated into another language. + +A ``Secondary Section'' is a named appendix or a front-matter section +of the Document that deals exclusively with the relationship of the +publishers or authors of the Document to the Document's overall +subject (or to related matters) and contains nothing that could fall +directly within that overall subject. (Thus, if the Document is in +part a textbook of mathematics, a Secondary Section may not explain +any mathematics.) The relationship could be a matter of historical +connection with the subject or with related matters, or of legal, +commercial, philosophical, ethical or political position regarding +them. + +The ``Invariant Sections'' are certain Secondary Sections whose titles +are designated, as being those of Invariant Sections, in the notice +that says that the Document is released under this License. If a +section does not fit the above definition of Secondary then it is not +allowed to be designated as Invariant. The Document may contain zero +Invariant Sections. If the Document does not identify any Invariant +Sections then there are none. + +The ``Cover Texts'' are certain short passages of text that are listed, +as Front-Cover Texts or Back-Cover Texts, in the notice that says that +the Document is released under this License. A Front-Cover Text may +be at most 5 words, and a Back-Cover Text may be at most 25 words. + +A ``Transparent'' copy of the Document means a machine-readable copy, +represented in a format whose specification is available to the +general public, that is suitable for revising the document +straightforwardly with generic text editors or (for images composed of +pixels) generic paint programs or (for drawings) some widely available +drawing editor, and that is suitable for input to text formatters or +for automatic translation to a variety of formats suitable for input +to text formatters. A copy made in an otherwise Transparent file +format whose markup, or absence of markup, has been arranged to thwart +or discourage subsequent modification by readers is not Transparent. +An image format is not Transparent if used for any substantial amount +of text. A copy that is not ``Transparent'' is called ``Opaque''. + +Examples of suitable formats for Transparent copies include plain +ASCII without markup, Texinfo input format, LaTeX input +format, SGML or XML using a publicly available +DTD, and standard-conforming simple HTML, +PostScript or PDF designed for human modification. Examples +of transparent image formats include PNG, XCF and +JPG. Opaque formats include proprietary formats that can be +read and edited only by proprietary word processors, SGML or +XML for which the DTD and/or processing tools are +not generally available, and the machine-generated HTML, +PostScript or PDF produced by some word processors for +output purposes only. + +The ``Title Page'' means, for a printed book, the title page itself, +plus such following pages as are needed to hold, legibly, the material +this License requires to appear in the title page. For works in +formats which do not have any title page as such, ``Title Page'' means +the text near the most prominent appearance of the work's title, +preceding the beginning of the body of the text. + +The ``publisher'' means any person or entity that distributes copies +of the Document to the public. + +A section ``Entitled XYZ'' means a named subunit of the Document whose +title either is precisely XYZ or contains XYZ in parentheses following +text that translates XYZ in another language. (Here XYZ stands for a +specific section name mentioned below, such as ``Acknowledgements'', +``Dedications'', ``Endorsements'', or ``History''.) To ``Preserve the Title'' +of such a section when you modify the Document means that it remains a +section ``Entitled XYZ'' according to this definition. + +The Document may include Warranty Disclaimers next to the notice which +states that this License applies to the Document. These Warranty +Disclaimers are considered to be included by reference in this +License, but only as regards disclaiming warranties: any other +implication that these Warranty Disclaimers may have is void and has +no effect on the meaning of this License. + +
  4. +
  5. +VERBATIM COPYING + +You may copy and distribute the Document in any medium, either +commercially or noncommercially, provided that this License, the +copyright notices, and the license notice saying this License applies +to the Document are reproduced in all copies, and that you add no other +conditions whatsoever to those of this License. You may not use +technical measures to obstruct or control the reading or further +copying of the copies you make or distribute. However, you may accept +compensation in exchange for copies. If you distribute a large enough +number of copies you must also follow the conditions in section 3. + +You may also lend copies, under the same conditions stated above, and +you may publicly display copies. + +
  6. +
  7. +COPYING IN QUANTITY + +If you publish printed copies (or copies in media that commonly have +printed covers) of the Document, numbering more than 100, and the +Document's license notice requires Cover Texts, you must enclose the +copies in covers that carry, clearly and legibly, all these Cover +Texts: Front-Cover Texts on the front cover, and Back-Cover Texts on +the back cover. Both covers must also clearly and legibly identify +you as the publisher of these copies. The front cover must present +the full title with all words of the title equally prominent and +visible. You may add other material on the covers in addition. +Copying with changes limited to the covers, as long as they preserve +the title of the Document and satisfy these conditions, can be treated +as verbatim copying in other respects. + +If the required texts for either cover are too voluminous to fit +legibly, you should put the first ones listed (as many as fit +reasonably) on the actual cover, and continue the rest onto adjacent +pages. + +If you publish or distribute Opaque copies of the Document numbering +more than 100, you must either include a machine-readable Transparent +copy along with each Opaque copy, or state in or with each Opaque copy +a computer-network location from which the general network-using +public has access to download using public-standard network protocols +a complete Transparent copy of the Document, free of added material. +If you use the latter option, you must take reasonably prudent steps, +when you begin distribution of Opaque copies in quantity, to ensure +that this Transparent copy will remain thus accessible at the stated +location until at least one year after the last time you distribute an +Opaque copy (directly or through your agents or retailers) of that +edition to the public. + +It is requested, but not required, that you contact the authors of the +Document well before redistributing any large number of copies, to give +them a chance to provide you with an updated version of the Document. +
  8. + +
  9. +MODIFICATIONS + +You may copy and distribute a Modified Version of the Document under +the conditions of sections 2 and 3 above, provided that you release +the Modified Version under precisely this License, with the Modified +Version filling the role of the Document, thus licensing distribution +and modification of the Modified Version to whoever possesses a copy +of it. In addition, you must do these things in the Modified Version: + +
      +
    1. +Use in the Title Page (and on the covers, if any) a title distinct +from that of the Document, and from those of previous versions +(which should, if there were any, be listed in the History section +of the Document). You may use the same title as a previous version +if the original publisher of that version gives permission. +
    2. +
    3. +List on the Title Page, as authors, one or more persons or entities +responsible for authorship of the modifications in the Modified +Version, together with at least five of the principal authors of the +Document (all of its principal authors, if it has fewer than five), +unless they release you from this requirement. +
    4. +
    5. +State on the Title page the name of the publisher of the +Modified Version, as the publisher. +
    6. +
    7. +Preserve all the copyright notices of the Document. +
    8. +
    9. +Add an appropriate copyright notice for your modifications +adjacent to the other copyright notices. +
    10. +
    11. +Include, immediately after the copyright notices, a license notice +giving the public permission to use the Modified Version under the +terms of this License, in the form shown in the Addendum below. +
    12. +
    13. +Preserve in that license notice the full lists of Invariant Sections +and required Cover Texts given in the Document's license notice. +
    14. +
    15. +Include an unaltered copy of this License. +
    16. +
    17. +Preserve the section Entitled ``History'', Preserve its Title, and add +to it an item stating at least the title, year, new authors, and +publisher of the Modified Version as given on the Title Page. If +there is no section Entitled ``History'' in the Document, create one +stating the title, year, authors, and publisher of the Document as +given on its Title Page, then add an item describing the Modified +Version as stated in the previous sentence. +
    18. +
    19. +Preserve the network location, if any, given in the Document for +public access to a Transparent copy of the Document, and likewise +the network locations given in the Document for previous versions +it was based on. These may be placed in the ``History'' section. +You may omit a network location for a work that was published at +least four years before the Document itself, or if the original +publisher of the version it refers to gives permission. +
    20. +
    21. +For any section Entitled ``Acknowledgements'' or ``Dedications'', Preserve +the Title of the section, and preserve in the section all the +substance and tone of each of the contributor acknowledgements and/or +dedications given therein. +
    22. +
    23. +Preserve all the Invariant Sections of the Document, +unaltered in their text and in their titles. Section numbers +or the equivalent are not considered part of the section titles. +
    24. +
    25. +Delete any section Entitled ``Endorsements''. Such a section +may not be included in the Modified Version. +
    26. +
    27. +Do not retitle any existing section to be Entitled ``Endorsements'' or +to conflict in title with any Invariant Section. +
    28. +
    29. +Preserve any Warranty Disclaimers. +
    30. +
    + +If the Modified Version includes new front-matter sections or +appendices that qualify as Secondary Sections and contain no material +copied from the Document, you may at your option designate some or all +of these sections as invariant. To do this, add their titles to the +list of Invariant Sections in the Modified Version's license notice. +These titles must be distinct from any other section titles. + +You may add a section Entitled ``Endorsements'', provided it contains +nothing but endorsements of your Modified Version by various +parties---for example, statements of peer review or that the text has +been approved by an organization as the authoritative definition of a +standard. + +You may add a passage of up to five words as a Front-Cover Text, and a +passage of up to 25 words as a Back-Cover Text, to the end of the list +of Cover Texts in the Modified Version. Only one passage of +Front-Cover Text and one of Back-Cover Text may be added by (or +through arrangements made by) any one entity. If the Document already +includes a cover text for the same cover, previously added by you or +by arrangement made by the same entity you are acting on behalf of, +you may not add another; but you may replace the old one, on explicit +permission from the previous publisher that added the old one. + +The author(s) and publisher(s) of the Document do not by this License +give permission to use their names for publicity for or to assert or +imply endorsement of any Modified Version. +
  10. + +
  11. +COMBINING DOCUMENTS + +You may combine the Document with other documents released under this +License, under the terms defined in section 4 above for modified +versions, provided that you include in the combination all of the +Invariant Sections of all of the original documents, unmodified, and +list them all as Invariant Sections of your combined work in its +license notice, and that you preserve all their Warranty Disclaimers. + +The combined work need only contain one copy of this License, and +multiple identical Invariant Sections may be replaced with a single +copy. If there are multiple Invariant Sections with the same name but +different contents, make the title of each such section unique by +adding at the end of it, in parentheses, the name of the original +author or publisher of that section if known, or else a unique number. +Make the same adjustment to the section titles in the list of +Invariant Sections in the license notice of the combined work. + +In the combination, you must combine any sections Entitled ``History'' +in the various original documents, forming one section Entitled +``History''; likewise combine any sections Entitled ``Acknowledgements'', +and any sections Entitled ``Dedications''. You must delete all +sections Entitled ``Endorsements.'' +
  12. + +
  13. +COLLECTIONS OF DOCUMENTS + +You may make a collection consisting of the Document and other documents +released under this License, and replace the individual copies of this +License in the various documents with a single copy that is included in +the collection, provided that you follow the rules of this License for +verbatim copying of each of the documents in all other respects. + +You may extract a single document from such a collection, and distribute +it individually under this License, provided you insert a copy of this +License into the extracted document, and follow this License in all +other respects regarding verbatim copying of that document. +
  14. + +
  15. +AGGREGATION WITH INDEPENDENT WORKS + +A compilation of the Document or its derivatives with other separate +and independent documents or works, in or on a volume of a storage or +distribution medium, is called an ``aggregate'' if the copyright +resulting from the compilation is not used to limit the legal rights +of the compilation's users beyond what the individual works permit. +When the Document is included in an aggregate, this License does not +apply to the other works in the aggregate which are not themselves +derivative works of the Document. + +If the Cover Text requirement of section 3 is applicable to these +copies of the Document, then if the Document is less than one half of +the entire aggregate, the Document's Cover Texts may be placed on +covers that bracket the Document within the aggregate, or the +electronic equivalent of covers if the Document is in electronic form. +Otherwise they must appear on printed covers that bracket the whole +aggregate. +
  16. + +
  17. +TRANSLATION + +Translation is considered a kind of modification, so you may +distribute translations of the Document under the terms of section 4. +Replacing Invariant Sections with translations requires special +permission from their copyright holders, but you may include +translations of some or all Invariant Sections in addition to the +original versions of these Invariant Sections. You may include a +translation of this License, and all the license notices in the +Document, and any Warranty Disclaimers, provided that you also include +the original English version of this License and the original versions +of those notices and disclaimers. In case of a disagreement between +the translation and the original version of this License or a notice +or disclaimer, the original version will prevail. + +If a section in the Document is Entitled ``Acknowledgements'', +``Dedications'', or ``History'', the requirement (section 4) to Preserve +its Title (section 1) will typically require changing the actual +title. +
  18. + +
  19. +TERMINATION + +You may not copy, modify, sublicense, or distribute the Document +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense, or distribute it is void, and +will automatically terminate your rights under this License. + +However, if you cease all violation of this License, then your license +from a particular copyright holder is reinstated (a) provisionally, +unless and until the copyright holder explicitly and finally +terminates your license, and (b) permanently, if the copyright holder +fails to notify you of the violation by some reasonable means prior to +60 days after the cessation. + +Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + +Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, receipt of a copy of some or all of the same material does +not give you any rights to use it. +
  20. + +
  21. +FUTURE REVISIONS OF THIS LICENSE + +The Free Software Foundation may publish new, revised versions +of the GNU Free Documentation License from time to time. Such new +versions will be similar in spirit to the present version, but may +differ in detail to address new problems or concerns. See +http://www.gnu.org/copyleft/. + +Each version of the License is given a distinguishing version number. +If the Document specifies that a particular numbered version of this +License ``or any later version'' applies to it, you have the option of +following the terms and conditions either of that specified version or +of any later version that has been published (not as a draft) by the +Free Software Foundation. If the Document does not specify a version +number of this License, you may choose any version ever published (not +as a draft) by the Free Software Foundation. If the Document +specifies that a proxy can decide which future versions of this +License can be used, that proxy's public statement of acceptance of a +version permanently authorizes you to choose that version for the +Document. +
  22. + +
  23. +RELICENSING + +``Massive Multiauthor Collaboration Site'' (or ``MMC Site'') means any +World Wide Web server that publishes copyrightable works and also +provides prominent facilities for anybody to edit those works. A +public wiki that anybody can edit is an example of such a server. A +``Massive Multiauthor Collaboration'' (or ``MMC'') contained in the +site means any set of copyrightable works thus published on the MMC +site. + +``CC-BY-SA'' means the Creative Commons Attribution-Share Alike 3.0 +license published by Creative Commons Corporation, a not-for-profit +corporation with a principal place of business in San Francisco, +California, as well as future copyleft versions of that license +published by that same organization. + +``Incorporate'' means to publish or republish a Document, in whole or +in part, as part of another Document. + +An MMC is ``eligible for relicensing'' if it is licensed under this +License, and if all works that were first published under this License +somewhere other than this MMC, and subsequently incorporated in whole +or in part into the MMC, (1) had no cover texts or invariant sections, +and (2) were thus incorporated prior to November 1, 2008. + +The operator of an MMC Site may republish an MMC contained in the site +under CC-BY-SA on the same site at any time before August 1, 2009, +provided the MMC is eligible for relicensing. +
  24. +
+ +\section ADDENDUM ADDENDUM: How to use this License for your documents + +To use this License in a document you have written, include a copy of +the License in the document and put the following copyright and +license notices just after the title page: + +
+ Copyright (C) year your name. + Permission is granted to copy, distribute and/or modify this document + under the terms of the GNU Free Documentation License, Version 1.3 + or any later version published by the Free Software Foundation; + with no Invariant Sections, no Front-Cover Texts, and no Back-Cover + Texts. A copy of the license is included in the section entitled ``GNU + Free Documentation License''. +
+ +If you have Invariant Sections, Front-Cover Texts and Back-Cover Texts, +replace the ``with...Texts.'' line with this: + +
+ with the Invariant Sections being list their titles, with + the Front-Cover Texts being list, and with the Back-Cover Texts + being list. +
+ +If you have Invariant Sections without Cover Texts, or some other +combination of the three, merge those two alternatives to suit the +situation. + +If your document contains nontrivial examples of program code, we +recommend releasing these examples in parallel under your choice of +free software license, such as the GNU General Public License, +to permit their use in free software. + +*/ diff --git a/doc/doxygen/chapters/files.doxy b/doc/doxygen/chapters/files.doxy new file mode 100644 index 0000000..3fc8e5a --- /dev/null +++ b/doc/doxygen/chapters/files.doxy @@ -0,0 +1,88 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/*! \page Files Files + +\file starpu_config.h + +\file starpu.h +\file starpu_bitmap.h +\file starpu_bound.h +\file starpu_cublas.h +\file starpu_cublas_v2.h +\file starpu_cuda.h +\file starpu_cublasLt.h +\file starpu_cusolver.h +\file starpu_cusparse.h +\file starpu_data_filters.h +\file starpu_data.h +\file starpu_data_interfaces.h +\file starpu_deprecated_api.h +\file starpu_disk.h +\file starpu_driver.h +\file starpu_expert.h +\file starpu_fxt.h +\file starpu_hash.h +\file starpu_helper.h +\file starpu_hip.h +\file starpu_max_fpga.h +\file starpu_mod.f90 +\file starpu_opencl.h +\file starpu_openmp.h +\file starpu_parallel_worker.h +\file starpu_perfmodel.h +\file starpu_perf_monitoring.h +\file starpu_perf_steering.h +\file starpu_profiling.h +\file starpu_profiling_tool.h +\file starpu_rand.h +\file starpu_sched_component.h +\file starpu_sched_ctx.h +\file starpu_sched_ctx_hypervisor.h +\file starpu_scheduler.h +\file starpu_simgrid_wrap.h +\file starpu_sink.h +\file starpu_stdlib.h +\file starpu_task_bundle.h +\file starpu_task_dep.h +\file starpu_task.h +\file starpu_task_list.h +\file starpu_task_util.h +\file starpu_thread.h +\file starpu_thread_util.h +\file starpu_tree.h +\file starpu_util.h +\file starpu_worker.h +\file fstarpu_mod.f90 + +\file starpu_heteroprio.h +\file starpu_scheduler_toolbox.h + +\file starpu_mpi_ft.h +\file starpu_mpi.h +\file starpu_mpi_lb.h + +\file sc_hypervisor_config.h +\file sc_hypervisor.h +\file sc_hypervisor_lp.h +\file sc_hypervisor_monitoring.h +\file sc_hypervisor_policy.h + +\file starpufft.h + +\file starpurm.h + +*/ diff --git a/doc/doxygen/chapters/foreword.doxy b/doc/doxygen/chapters/foreword.doxy new file mode 100644 index 0000000..b08a546 --- /dev/null +++ b/doc/doxygen/chapters/foreword.doxy @@ -0,0 +1,22 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/*! \page Foreword Foreword + +\htmlinclude version.html +\htmlinclude foreword.html + +*/ diff --git a/doc/doxygen/chapters/images/arbiter.png b/doc/doxygen/chapters/images/arbiter.png new file mode 100644 index 0000000000000000000000000000000000000000..2f293c1f8f8e66b761781b20ee7d1c74cb0132fa GIT binary patch literal 784 zcmV+r1MmEaP)w#0K~#90?c7gtn=lv!aNI1sC;9g<|o7-3q9;fU?impUM%>QPgC;9)ist6sQ0~{|3ywt zPEJlfh9B>CqOE62#PeLnvsd|ko6N|Cogd^Y5l{7;TbJ(p=AOB5-@k}>Ap5}H^H@ht zPJU;6)&TBbnhE&f)z#jUmoola#(zX2{&QCTO^f(<8ULz8JeG0267fjLGYM~Gyw*<= zE-2*WcgSPhYxOKV!7Flda&kXjgsx7NtK-3DFL=Cgb?|`qmp;4+i?*)Sb6Z#Xvk$Lz z6}2pKll;=GeRvf$+OfKAW^F1TzB5MIdShHO8JZc-SB|@8GBh)iZ@@F~l^@@tzqysi z-A?QqJ5Rm@4=q=wRAYH&Tc`s&NzP3b*gZ&0xfsiH$CruYp>BE5xA_iU*!%5q{S^1z z_7)sni`&iz===hme;D6+%R_Jlu19hldE<`!0!O}Y<2~m+#CcaZ?|mEZx@dFzh?ux+@Q4{ell?})+vm28e?7LHP)GB>i!-+p}F33 z-pOOEj`6(-LN8uo^#aRjxqfYP4pqm%qDfsY{#LF4Akjp0I zLjHH422Zcc!QObA5b_HAG@mK1%TbqL#gLwy*QaK{hrczMoP(Y6SAP(B4}Lz{2)XB2 z#{kX%{;>l1#c#~~610$y?K}fG1NeacXaQWgngN`goP1)=0L}m|=M^{uI0LwV`v&my z;X-?H25<)O8w=oLIKKgB0A~RA;0)m88NeC9<-7uC0RO)O_-&7MXWXxllarH^ zlarI*9B&@C7kr;59cy>~P)f*MdC5l>a&mHVa&mI=8{_l-la{xrfBX*_Onp2w-lVVq O0000z%gd3+$)lPXPb`wu-WXE&u?60sue+2Fjxc zgv&YqXwYh_>nkFW$VX3MVc}mL8~}hK;Q%Dy^b{@&`1~30+-VaDrv*S|;ZP*66*LY2 zkc9$}P-HlAwv?UyajGn0`f(mPIk|v>i&v)ayVJ>Ykm^U}8g-}{#Dd05n^gUo^t#B<_ zTh_pZ2TBNl5)wjX9~%^E1&3N8p?~3*MHW6bD`cT35)MZ`=7WS!!1B+L=Y_2keaOBE zcs&FuTZp9X>FF5|l<-`zrRh8~@J-T6g(Np4)It(qA0m^6*X#0}BM6>IY%PB%c)V$Mqhzny3I0={XC9 zV+5Ki)P_s@p&(B@CtzuRm;rFtt&tXtp{ZxwnBA~7^i4CC2{0)@f>>?XO>d$?MD_{U zEztLLenXWw$g?jPEFu8?8v?}DEZQXBC!GA^X=6g_%UHO8rzQo31O}dDdHFtzO-ZUjf)m_=Ikg8Qj&N?e(fTqPtq}974ZiA_AOS$#d}`bUQPUjidFYi>x9+m zXA%#iRApQ&Q-wnd`yYutv!YkNkDc1Js|~Ci(80B4WTCZ!spwOP(Sra(OwoACp%3V@ z??W;=XG3p6VZX6VAPE|@a7{L@mu#|s%#P`!Ts`{`l1HFwb~SgOu_6D0z9qf0`p2-? zz+wW>siNSYK)$Zdp~M*SFs|i9$4SI}=6)HmCj+*3`QeCLxN?&T95VtC!G=f6iwXjl z(Ndrt7w6aUy0s_ALtRPh(}K1=S}#|YRjJ7CEFeWgvL@=Ej9GIes^~EIuAWZdWlRT=ulRo+O0Q9l|a@ob|MZ}w^G2o49^3T_9`pt{Tl8e(h7;QOBxcdLUp}jr+hTXTYTki!0(t_DD8iowd*HeJF**A z#0_WWvMyl~)7X1eNCid-N52$3%=;LToA+v-sftH6R$=G;l+720w6K@(I#-D?CMUw5 z)rT*aHAkKtn60KqFWM+F474gAKt)(kh^6j>*^!@B0bd0px54JC1cc*Pv$2kcag|-rnKt3 zD?LHaKBwBS{C1=W38H1B8#7?5H3v+_*#aX*u6nQ}CX>f*G-Ph)Ya>m@ zFQ!m29ed)&YKeu@Jc8krmQwY~!0guGKiM>mp_%rZseqdjhOn2Jm8;oNdJXSC2=-s) z8D;AexD42*u}iVlc@3GA<1pefRTV;ZnhIBsx9{Fw6y}ey&%G%LqATiWNfRzokgg+k zWXh{Q|C*E|voH=iEENWEsvu8&WN6Jds$t(pYk6uL%=Aqa=>}%+o~`6wRxevT9pPGa z7jGr$V3==Qt@L)#%+Ifg4wtr2M-zQW_HXv-8%k}gNdQ-5G)4qc!|E9)bc)Dr7RtF& zl1s%a^l0kB)e1Ca1q)|-_sPHDZ*WYh<>6gSYj)4C(ZN^AzQ`1}9WHY_@RVXja@0EZ zT>GH}qq%K8bur@U+JhRgEmP_d2qE4;{1~sDZoIVEQEt@UOcg_^xcc&(7l5{Mp%zn%+7+{2*YgLEjUjE+M2diIsnT%R z;fwJ#a~}&@_>;&R(}9aAhJo?iJf<+kPo0+Xd_9WLuL%HE6dApK^*g7_WHheUGitR) z@nH!t>F-g;;NBiHz9oJdiDE)`XiiwR)2sQ;-}i@uSIp~laM>Qiy1$53l;6Dd zUJwrvv@OXYoY|2p6Y!#M9SWf7R{&kCZ?Df>q~TVFU!ENbl@gW` zZ(;K+;5)qX*|z0jQ;F}X6%WI=1L$2?;5%{UuG%m49i>ggw2y?EQ@7VXpyl&IzdZS3 zF6oh#>uf?)FZ7E^qTQg`_@_2!<1lQr=quOGb(+f6{Il_R&oiQ+-Cz&VIc@kzcH*Ch z^Ka_zQeUn(K*!03{O3F9sse89C`s15AkcBX^8>eRhfxu856e$GHScD!4-G&NBss(I zt%wh~ z1=gs>77C_n_30W0p;#DPki@|3@yw`c$E{!`ndy3vpu@tBq~2QvbjDWH8AJ|GKH);= zuiv&1pov9vb|5#k(k{=h^M{T4jIQ&MAly{H;BI4^?WPeDs}Snc>uPeIbY)i%hZ{D# z(QM-U6Fh`9x%ntZip^Qc18W_^cHZ@6-Jy>qq16aUiCm*&yD5hNR+snK*}Zdw%eRVB zfTjn!;hT)5u((?-;J+lZiYDB7FEM%Uw>wu@HrJ|IkmEgOF zv~=bY`&|ZYVJCBf<$F?!PF<**^T7xCSW`{x(o!7f8Xb}`4Z186Hn#{gZc!EYXEt|) z84B&xXHVNHiP((Rzn?jfj}@Iu%OhTPN+W~KM7eKDW#4B^;ohB~~2=gr!-`v$a@~Q`Zp-tH4q~++qu(wOwvP+KsD!@p& zDIYY`%gf^@SB$%m|FC17WlP5HwDO0eTbF;Q`6KF%Z)O^SxZBprW=O>TuJ+BA+`BpN z;5Z&dqooTwIhPk!4zJ~RIZo>v9XJxZx^zmNyxgUHdrfz91zr!c#QbODuG>Rm;p%Pr8j;JDY)qS;&t z&(n5NhYR|~a(~ljh`44EAdorHtYgRv_Y-7Ude%%MjBotzL#Nd3g&)v@kM!nrtQ{Z2 z+r~X1+niG&ST=I4>P7;zN_4;2ImYD6Kc^aPE^SoAw}FqyjWY7*Sk$$&JWY3B#lCII z*yk@$>Dnz0m|K^zBt|^XP=tMdTO|?X|LNwU>9WG2H-3l(wJkR*Ku@D{p)M0r@_4^Y z13lQ<##c?~tbJGveaT#%S4y_+eg;MJv;v+|WtuO~%clDEjhlp}e{)t26k8C|Nbx&; z2(~fC+2#HkiA64~J# z`}3B*ZEEsOEps!MiA4f=zq>*;87{qmlbrAC)kl?emM+t1NPNu9=Nk%S;I>JM|l`E`BWN;OPdww zy+04!tA}*l%=3htKCY~|*Oa2qgAJzMS|q$IXJ#PsIV+3#h=Dcyj%g@?#x|!b(g1kM z>eHRESG4qIv@xE_Xx1=3--*IVc;7f6Ot@rwp!dnl%VzV5T zE^&|KMZ>vfo;Vd5DHv3q<2(&Cu9@s`xw7i^g>slgC|fB@Wc3Ii8|PRjuo<66OPp{< zTS*M3MjFPHAi6X?(UL`LYO~BT6~*rT=jYNe?P$9#v2<>Lrq=Ym&&W2L)c0>qn_EUs zi6f`)j}K6j=F!ZG#ZCCnq7;H&zZtNr`KVlk`-F38J(}%U#hFbKF|mhE6?7(2{=A|t zSnLRdwPMKyUo&bTJxoS(^`a?XbEtRtM&~#+KF{@e`c05uR7e@8@tFe)zvuv_C)o~k zMvf5s$qwO898ua-?rrhv9}bDmGz#PuoDkL^92z)n>dHWDGKo%+>1B4_qd7QCEo2KL z3NZ{N@PZ1y3w=*wDxxZVuhoF0h@4>vS9vPV=gWURC<>C)A|uHa`XySZUQCraTO1h{ z-I>^p89;`72{F#`v;jcaZIJT!)>defVX|}5D`A*-nKNq$Klk1=4}E^F%hHW zO$ghvG*5ohp)PvhU7K3p2-rBategjhl@Z$<-*MByhEie7VXAzQ`h^|@Qh`5IiiqsC zty0$UY7a4MrcR5XGRP_*k2S^oq|+|juBEE4pmRxWpDA)}DaW#KwJK*_CZsCwuQz4u z-BrYA++OpU0YiOx_Hu8KF*xQQ?4b)gS#7b+E3!|C-E(?+b z+RH0)uXMKQG#|E!9=O-3k!x!4Bq_>$WI7BWcg4KewR&KE;~J=+K`(jdy*lX!2rzbHur2QWrfPt*x>TaDKz}W7 z0Y_s#`+=cctUa!Rul*wbC=MpgGCC}0e1lPa>ga8n<+oS4B;DD3Ev;~4&?=?@abqrf zn1mqE=PJ2vD)pWsE&GbFvsvUfT{Bj;#bv`ocCwYu_?q-wC7HXlV{qT{_*~bpFsbO(ULdFF zP3D!hmq`WAXKZJJ0IZ>IZnEd(E#7l6oN2vWU7oAL-4h;q^u>#Nc^xFu@(rl_av}^^ z5n!)3tu$WSWv|drf5+B2szD)^pKDrHvF)+N)~09;Vg`o^gLd06-+kPtl!-rqd)A@D zE>hR7oMuI^TXc?rKC4QiHeRY5*#%n!pxiX3kBxY5MRdK}_!U@kg~f!;3AwCzLxZYC zLM&-K-eWHDOZ#e;-P#aahZb(3WZ$oF--Z5!b|QESv5wlS`TYm!*C|ClEm}@b%_4kE zyn_}Q9ISp?skBp24d(rJXS$uq(9}5eXW16|36;>X?rrv7C$et&NM-XYSz>h1A=$=C z%U=RAE`@REn;RgM-Mwgf>-;Q9F-W_--)V@}It$wYO>rbAZ3$Ct`Lr_7S~yG6x33}7 z3gw*y98!{#^mJ!-rhm@6L#}ipvsl8m=^zp|W>&Wo#{S!;oRP_voAIRNsUFD)Z5Qm3@xIw#Sr$EkNRo(^0>0W5&Nt;rYqnDKbUN zFnQ(e7o0|exF4W$=0aESr1|NzX5g^}FBi8cUA1?D4`sktxdbLj7`j-MVtiY}J*%eF zL3??QyL4>gidexlt6pO+CCN`v4#<})MlQGkx_m!jn1(}UsN1GYn3N4}K+k31$=B`V zA(JRjed>~F8zn}XrFe)Y5}HBTN*8M_;`a0nyl#Y}@ z*e;Xm47pRN!JBLGHgMXyRmv5!UYw9*{`a@MfjEy&=k~#(+mIRo`jwEmt=)>|#;At5 z5qTQO*o*51$rGZTUo(1B3nBJf#GSO3Z5-RVg2uL9ICDENwHCp0dYJ>iF`KT?btVTL zuWvv{l;F&euE7b(9_+=^P{=B8mA{dlC+DrQtsyfrt1k!KpoGPNInL1o>5y5&n81x6Lr{6p7jhp(W|mFAGJghxl>5q|m0c>l8Yy(Ww@1<%a+ z#;a)(^`M=xFY9y!Ry2a4@w9dDw`e?pgTCLYo&MwxTfPGq(D5v)Wtp_+GZhPi?Jmz$ z1J1rJU&8c*;>%)OlDxw}g`c-v{@0NUX6F*%wvme2tCBp-0K-e>o)*@TOhw`J<7NlT zDtpD~{YB=miF2mO$6KuVF+G;g>*y*<`cNAz60VXGJIH7vUrwI9b5G4g*m_dVOgSXRPsj(DJmD4 zvB>QQ!ap)dSp8mO?JKRKB);v?z70<5^esfMU8$iJ-QPv8EHGM@rn40SG9!eKw^M7MLfz zVld?bMJ^t1?|mgs7d}iXM_*^iNHIJ$NB6hLkXARRwgPxkhu5w`qEA1o6bH-ONk3
mFRym?D z@%oSQ`cN@~(f?K4m8O%uj{dbn00RHMvOtT$z~fM}G$m!0_?I(t zFya3=!s@_s>sx=}|@MZ^R zTMLX;jyvE+zptXX0UFwlc=zjpclj+BnIRNGWp?;lf#%OAUabAN=O|x$TF_%Qy5CJr z!~=qD;Mw@QQ8P+3^=g>mi|0sgDYZ;B_VHfH78n=qlmU2L7ea+j9c@@p}^+ zz!(VJ^m) zq$$MTtcEng?`y2Ntdz->87iT`kt}n+V`2J$fM_V$gk4`kPdi4tY@7-FtzZlE zu`t2<2)tC0g|Wl?wKd!e?=9NjGQRo*5UX#N5>EP965xLgFxAr0Oh`h{Ad`b4y8naB z|9`6!|J=)DY?Sza&-i~@UHI2@|8XRJ;}~$f@p!=hCjKx1qR{k(QQz>=|Ie_Q73 zRygOU!c^7#Pvw4oL&o(uIP2$gpKjJfei`BKLg=@zjg8|_y2h)^v4BJ1CBHh@|QJ=qtA?gWbDyVU(?z_xXQ<|C!rA`qt{C#A^2%*Fj@U z<09syD^Rt26sV7X1U2yb6&|heZn=+`Xm1qKchpK1Zl)^sq`-&&B&yv4QCh*cn7hL4 zmxXyqI21%oiFy7*Z-rL2UYTE4hAr!ZXP`93y}IrjK;P41y@qmvRPw-^t{-}#q5{*dYTNhoU{M{FlcM383F(l1ONz; z8b)4$GMyhFPsH`F-cTozNaPhf9#82A005E*00~%GAz*-(7C_nd2Z?|NKnwvSiDAJI z0Kk9%2_)f2ed$6%WKs;Vn@q>a$tfWrp{uKFWo3oK;o{=rN=ixy1VVdz`^?M?`G_@f z@rad(yzjGXMr%Y8X=N;cR9088vO*dJ$fp4o9^&1WD?^F6jC*=E>UZtLk#Qia@&Gzen*Pnv@Q#1gt~@y<_;X2z`u^ zohXO`KoknZkRK4l5hli)Ow${wdY+_Ft8Z8hZ^ zo{7szH1Rn?%+F^QV|6Va$u8sQ^4P6mvVf*6$=9Ko_eT1n0{pyI42udi?=$BAY|8rH z%$v2Spv1F;`IRkry&*B`AN;7-wT-N+0pTG>i{szeqRvzUA(dl&>bGxc1qXG+Fo)}d z{DPbi+9<(8M8ycQZ_j*rX8gH|cZ^0v=xBQ`+x~|wy`K{{4nGDja(cx_Cj1QTf5=sM zI1~A3PckYoa{QZTnXPs3j*LObWw*ZB*9 zq+Z!_XJwGLL{$)@lrg5{r65Yc5Ahb!720?!=nhVuIuzw$I)62!G4983o#LSjYDO5*Duh;R+o79^xc)(1dG(mFNVf{Aj<;Ct2*s_gzEd#_!7ZN$=mD>ytKoCa*(E z=t8n9*6+?GEKR3|b>;rCq_6cp*gs?+`>-0d?q6PZZzf+O5tvmtQ>o6XFgqQ()Aj*)~JQ;*$bN! zmEpX#nHmL}8R6a0ZAxwJpf22z4Xw{GUpy*Owm@>d$bH-NwAEX`f(ur2Z^vKmSXFtt ze9TVEmUwH;hPXNxobFWrBlT5L?p4yog~rs+vdFj|#Of40w!@4aF3T8)3s$M_ zrg5$y6Z%?D4BSb)hbmN*IdQRMTxK({SbVCP-JY#C&tZq9l0N~Lr^*tpWudD}z&Zt}gA^If6+!;|AX)#=$2U!^*l+3h;(7L&pYO<+iS z7?h^l=QRJv#z)5JradL#+>N#EnU}j$(z+f>^msQy-*+?x*U<(Ge>TT2^jI zZ9bXfd*wjb8Od2mT3m{__nqd_t^E1V;pv>rL$^n$3m2ab+~X;#u8Gk#ScJ=(Rz;^L zKB@CHn5hJRCN5QGG@_%{*9uJ63c`DQ7Qq0vW)hg0_Axguv2kWcNQ;e zMqKxHeX?ij9wd67jmWBBbj>a1#1*2!KDF}ZhdthDlrl|~hZ19#c7?*rAJ&kusp5s{ z3t}yO!Sc#3T#ZbV9MJ@&ZIZd`sl>wf+sX=0k9ne$f~@tSJl)|g^vqm;^8{E@lv4ia zd~!z#incr4_D_BL972}7;6Dv-iS`MJ-v3?i_=5i|CSy9``i8`3C6Zm)<_=72@QrDg8~P$@rT78U;vgR^>94lC1m z?ye#-DKEpNWzOx4i_>055m~j)2q-n0t{wF}hY9}2JXih11bASmW}f}C{Bw0Q^1(a} z5`zAk>rXwA0&9YP%w2f6BA#c89W9IGVe*A1c7;r8Vass6KQuB9g_&xL4O=zx4 zm}-;AU-G_TN=$-Zg_;WjS32o(~x`)pk2qiPL6iGb{H-v3}XIhAEKmO!99~+foSsCq1+4JRhhdDC!6)_i#!B?!^el zMkx)C>a>=hq*CZU6z@SNE7K_ahk7WWIQ5sy{GUW>UBYdLeOn^>Q1a(`9(mu9Jo@63@tvFlO-K|b z@%DlGwIIS^|9}u;oBlpC{yaZo`^J4{Y-#c=dkd3m06VCQ6(_8@wtN8+TC$Le(XWaigZ!UM&Rg_-t z)2IHsJN)PNsOFAxD2ePXvIpx8UUSBKU2g%GFJY6K8>2N_fskmz`|U^tGKaJXtpF8B z6rO*Cj&T|I*=pt>y6rZ#aSKUNkLE^|l?e}7t=5uyjL8%)dx1KgU zwKg)mVln^16>5E=VuU8hp{d7_js!+uG#n^>qT@B)D>Z~@M&9>xp zAbj~Q2k)hgA4=2}()2X6|39QI!OU(xzkNBHZj$OH?INK~Wnj56x8TjSMmAyZSVTXS;EZFBwr_IL{dcan%}_; zuHZYoMX%+7=Ul;-nLX6v?DR!!o}vYq6tNxjk)dT~C|M;@3jaWVKgeI!4%3X^W`W&q zGvmNeobn=Ay8j09nLsDy`^*93D*v~*eVfWJmdWR#Vbu8s$+)z9_g9ACOV?ipqaTc2 zEDf@Go|An|3wa(kK|j@A;&bGL+{f%clx>p&9Lkq9^D`@7L-}E*P|=$if*5fO z8M*}O?lE>5|It5Gk#}aL45C=k){W+oi2!%KUJJ|N6P3>{ z+>{)=PJc>j-LR`i{i~(3l2$x4THV-Msx|`>P#(XN5ChcKi|xPTgr6$i4ot3x^(-M^ z5$}N7*UE|gpx9XIS>{lH=~Ht}s#X@^3?Q@{bLB zenCP1IkBrzT-1B%{YB>$T&|y2Zf;w+?LomWx&O1HeE;d|C@uY?aQ4sWSX7H(3MCLI z@Mrh5el2l{F_yo%-n0bC@8K%D=7Z8)nZ^jm`yx}4a(3obWG^%nUkLmWl8{8V-$ z8BxRlCx(`nKmCGi5SYtUBOq7M793HLN=jyCRNM21{-S@)5~Ef^|LLe8@>e+xRfuBR zK8Q!o!k+j;?_R%^;y7i)C8rHpXfpne96cQpo}(y+s0f6PD${zH#p^*tQi1ZvQba03 zxU87Z++(!~_`7DswR@Wd`4uV)zVidN#-{-TxAK z)-kxzH{-J#=ELE=K#df`sIG1hok) zL;PnwW*@5N05b%G=Mh1}BWZv)xoQ!zhix4h%ZW@x(R2Mt6!r){YfX#&(h#$PZbvv3 zrKa$ldXs}o9iwKXU~5eyr@G3>+(stgsE({!H{{67g&2?n{flegw7jH`N()MLd6Z{0 zh-P1zq=SyZRly-0eVi@yG+`i|{dcotAPYOBDN{+&z67EHuJ?l|JOdi~s)!+rjc8Y< z%aI9Gl%EJa&*}XpB)zJGKd2nNUAd} zWDtWsE?+Salx4K};zsuTG$2%V6@klMW+X?DyU3cGsyH_%%VBK|@Zm5EBCFY!*rxJp zs{4LfNo}X)E9i~hFhLAicj5VT-)Qwie;I&s&gAsi99q?{#e>K?Qr>8bcbP!#QBafl zLO$_dkxVwvV3OtEDssjzwobQ>f-O}lP!K@LN?`=Y6$Xlp!uhI5a)JaM@;Fo8f(hZt zK_PU3j|Vsb6bo_rL#vL`u?w7b?I`Ouvj`GS9-XR*2Pm6l$9TQzb$Fa7)Bi!c`HiYC zh2<5Yc4E?BMq=thY=~1VgayCBCkHXaSqTz03z1b2+BF4x*f@ajOs<+6wA~6I>#gen zGr9<(2I8x+2fEU??8xw8130OkwO<&xw%qZ8)Z%5xhOY^m68hY!Z3>nw_mnKnjst-J z=}_(MT`t1LaY%83H;f(nEDsX5IvGz5_tX|ZVXwe8V08;R!O6-N-aW*i&9iHIE)5E8Y`@6+6X?c~MO)Uzjrl z>&XAJtV|1CxWu|POQygo1Ql5gP13{^J#aYJfR_0rGo(h%Z`T}YM>q>RO}INPdQazS zxpT8g9;-G)k(C*eZ5yY=szf@hB>B}CPtbUkIvpA-8G@-~+sDWHiLdIV6yIM$*ws3Q zR3Ozrhm&g_5}grV_U&=s#%qmXfe1Bl*tAgDT!Z1>G4%9sKZ`#v=3rP%G`)7SL{Sif zO-5s`Gh{h59sJ(+kwCD=u*2u`k&;<&?;-^FhMnmS?Ya(?qUjGJ3FP@QtqKY)jfPfz zqZ^pZiAS`g&PJ%AF$cYpM;bG|%WGsJi3qy75A9<*E&fADY@IGb3N493BiK>25m5Sn zR=5qKm|2(#*-)lE%yt+GC->r?8vkaQQUQyZpN}rR5+w($z8@E2u~Qh3T#Tg5bZ)Dh z*uVqY4YJHwfsxSMoASLOPwe_$1Jxg8Z1_J$9>u8PIG%LCwjq_}qNwL6%g7_vGmtyp zpFC2#lhau&_%db5Djt+<_{EkUsj8vpq6hLDtgsPZN~3xI7__=?ez+;W`5uLK(aO`< zucW=!?w&!J=@_nYeG`;%r;#XmJxKBG}!eg=;Tj zG49XPoAKf+d*!wB^}Dzw&hy78;keF=ueFWcRh;+Y4t@G|uFsg9{Kvg;tyCFq8K^LD`Uk@p-08_0srK-q4?}a`Wvi z-GeR<8*nEJqlSbmaXw^0)OEHfLi>?S5>}!OOz9GOTDX0=-?NaQqZvzGkN%8tR@k)G z?3>WL9W?FsPFT+jf3#mEY}oI(Yt$?_+clV+wRniMH{8l(>twiF_&Armv-O=rim0U6 z8{GPi|C`91W!KNwTs7Bd{>!cD(M{^4y3N+oxN1;X$6&$v%kHLy zcZu3zp3>eH$NXDTG=&L}o6`4vtwt9;xZ1${ zGF?hm_^iqtFs0`jvl#>%ciSsuL~};+z7FkC_lR!=w5e>--72lM8$wMTD}6{4UCdk} z(Y7dORZZ-|f8NO!dbT9NLuD^;JuBTLIQy`@lD}S-=xOD3PxBJ_el5XoW}Lj`Zx_>- bx|EcMyrxeW2kMaTz5#7@J+*ulY{35jZ;OaV literal 0 HcmV?d00001 diff --git a/doc/doxygen/chapters/images/distrib_data_histo.png b/doc/doxygen/chapters/images/distrib_data_histo.png new file mode 100644 index 0000000000000000000000000000000000000000..914981c127e1264610b15a3355ee15872191f78f GIT binary patch literal 9743 zcmb_?2UL^Gw=Z@9ML`e+=|$-vMLN=Z4GIW|C{>z-5_&{J2^>H`dPk}>1yn-yL=fps zK_Gyk7fnJ34dG7Wx%aO7-v6%k-hD3%);F_fX7Abcx4!_=#7O%*4GRqw71eoN9St)o zs$=`)-?JwGC29A9A$Xj0Ry9MaBFir^!?iobxs@FxNafIs$wCuRh?w|KR73i2uRC ze|hBpFSmaw;{ToazgrbpTXqF00Tu#%&5R&a8uXIfPBEF&&A3;vzB}TaF}3M^8VM67 z%}(@VE5C4`C4ZFVw{J{l&iC*BtX@u>-6rCv^9hIVcg6?z{1OjHWt*;DehpzgHv3iB zq>=UCTW`4+m!h7nzUmnM1}nXjY=pdB5?}0%dI|jiw+>Yiw^DM>{yaaa;>s8x4K8u) zp}w{TRm>?i9WbGOqGRO)p5(C(l~%W#G8Gkfv#y4!dBEh)+&K8uSYVX*bt(oM*Epnx zTxofS=p|><939V`#(qsGgFx|WtJy?KRNAEtB9@XOT*$EUzVp$^@3v;!?!nO4Mv^WUx^m_CrW*W+~i<_ zu)I_%QM&UHCdtFCnqAUmCKjj zMTkz+Sx6AYeGDV0Z$- zCAJ<;m@;`83w-^97-0+;(3_H#ZeecFE{`-{JnP1IH$9xvw(nUMAQmR5pMBZ-oP!4^)XoAytrNBI*#!(E6Ev;h37a3HM|;<0ohs&5&MZ7~&v|3;EX22@grE-l#@%u~J3W&VHO=Sb_?h+ddX}zB!2@6`5Rfb=nU9M{f!WYK^t6^`Bi5Y6EHz_wG znYPRVym?oRp5F@d4jyL_XcwiMeq5Yz&X3RR@>*0Tl~*N`wnX@){>s_4leruX zDdqYB=dHWoAN@(bc1Luli0`@Om(Yclv*Tq9pJi5Wq`3OK6&qxW$30H!F|puwEcCom zfl-q-%Mu=bn2{Gx2Ta zpr|+X)7wd}@3F?7?Qi$y>Wj^ob{;dCC0nHTTId~>2>S82s? z#(Vy3XIIisbY=czs}pE6y7pw6i|9B1be#@c%lf_%cahYz!7$E-w!1~g0(oM1@!6m! zUjE{cn9cqK(PcdRX_!5FaEaDttsf!Jy|7wPpT2Of{JRD$5Jlfiq{`w=9<(4ARF4IQZ+gF@ZN*) zhwWW9u(MvYo*e-POr2pSD9P*?g|+I8ht01p-%=xVZq)2uY`}JfYyaF7)!*+Vj@7{E z8&pS8oV(;{NR)nS65@-H9E(jdfqX4iy|lGO`@|ff8(N9}B`6OnOb2^T1t1 z)UEb;Y7`Lf&pBefe85c|R|LP`4(J`?BVzTNXD+>F%0j4M)E5Utx6i^GqkD24UFh>x z5(dEzF2%2GXzQrc*i78TnVVlCmnmE?lLpuYVKPO+L|b!Ql}77bvC@k(q7A9;@Wyqe zW7dWG`-_$T@U;{5u6R$YgceKR|LgrZ5%eXB_=|aKF zRY#~r3LxIodZadwzX&=O`m!1d^Wv7p?1+XN4&6ab1&8(=^sd&xsNJ8H_Ds(dUb$kw zdm8cnxKe6!t9mr8%9KJl;ch_*R6DxL89lg5cm3iFjjuT__p@j?$jbA%RAQ9AlW9XvhMPQpE|4zWy?SDU1>BL}Eg9mf zD?TM5Fs^>#e`*Vikn$AGZwsY3c4f2NZ{`UJcXDYwH!21PK>!=^C=Nt^sU;rRQ5jLdFr3M zbm-od5ro|>?9W;V1zuo{Y<%6^pB)rhimrpgPN1_pWC!H>?OY_8qNI_T0fE1v9;N8l z{1PvukGoN5wttrXmj{S)b%{u6q^-;Exf<|SJS-Y41<(8ZfPV7K~7KAR?1Owic9ME zrJF2pEC&izy8f{oornipOilS44}S&O3-(1fwj2cX$YFNgG%5#Z?0j}UEyeos9!h8v z=4uGt*l^gKX-?@rHb9&q@Pwsa_$51E@^(dOXk!uOB2&qI{ zGWGsLm(e`xIr>(N*V}|-!dD9Qn!U?t+f@mA3&m6U$XLQ#qHW&6mW4!v>n!5LFUj@m zxe6`|_{~1}K6QRLK`EKfv|&wP-=K+h_Va9KX8YdiBhqhHWB*X8noIVKkyF*c*b4E) z-28*!Xl4LktiPL;uY9-iy-f1tfjc`ZqiW~TA77MPeD+gbT{naCmUfj$Q}@ni5XAY` zQoYB>tY5UdAB4FI_HD`vDjFbvIet;qxPwhM?p%LVqrk*os-ZN`7fMpfI~3m9D+3C} zb%CR-Dg(cN|K(*FCt4!i-RWG8*@<7?C0@1bp9S+};JI*$X7^G6rqsaEQn2}-Z6PIO zKc!Q0=hCU18Nxg#bUL6?p#_HqA>b+~EC`ExZ(8^BeDZ*%ryV>u_-#_VKEz&>ILBD3 zaVTL_PViQN=L%J6rk6t0G4+_Ov&S~GojJ8x%*bA^;Zjs+ z=+3LW_Fl=O-bWb>Kch%(?pD2IwSL8zlH$Y7=H}2>n%3j#1qweTNsj zj!&~K1?JJ+&3k83j`_L;Q*KyYRWrxY&C1U1V5tzC=TH3l>&|3|wQns3#G_1u0NFK< zTt=0>eg1xxu_@Q1x^=pso**W@xd#l-%&wlj8=H<$y?A~qOnS*~VB>CH!QC3nwAesM zUONN};-wui7=U>Ed?Z?A#MB4%iU8_)pI;f7WghiJci!7FAJUE3PpMne@mIv!N4y`Q z?=^g$<#;lUB!F1;YH`03)g6`E{1k4y-lekIDB&q`Zwar$5W!*>@~qG>xQq3Cv~@yA ziEl;i5`M&de-wmhg=)-J@J@DyAbNZgCfBe!I=Fuhflu0*MX`2qkGhf6I1HDt@rmy)cmhn)R(IV(xz0yhK`I&gX z-3RtQiF{adkq+qIE!yUDeG5ntsKtPc30plZTtwUz>b! z7q?=8;|hVFobZoeHDj#S?IGIIc?4}xkFufnV)A#08NSkRPyq8|wcR;G;M<28z9R5+ zXHqMV@(JwkXG?hNnF+baW4x416f1;|OPcS8tiNTf(p@`fmfcD(f-c*9SAZ!E*AuiU znWk@wiV!e`ZwnJYYLRWuOwKTsul$Auohrl;i=2PnF*VxzH*_#;)@>?&8ua$=LPg_}iXo z=vVVAppF-qqbiqx=OCawhwClYq_y9w)*UeUAi)yZW2*%BvP|h?FLZE|nihh^6I(qE zZ)SsH?=EGT!ss04`e*h;DvJh~UIT`~TurEx?)KPddPqU9sF(fb`}bW16{Dq*2P-$3 z04gbyj~|!HBwsy@ba9MVrl8Bb@!F*CE>o3m)$O>7Cm`h9!Nv!I?lSTQkswsMLvy8= z!Vq1>@mdk&Wr5awW(qcC7&>ET8AoHuwXU&(08lCFC4kFie4lv$og2gGu(KiB&zZ9I zN+KC#!r*{$254HId_mMM;5l^MlEui#NOny<>e}RXZ#0-4QySx4*aAjwJ>|wAGz3(^ zuOkG8$)NYgGd_@_gL9>5|AN1PFz>z>F3J%VulZcWnexDGt=P{%>=jIEG5w1}rHb(A ziq~xA7Py6>QX2q(*3TX<)`0x%10ZXT>(hP%wdkIujD>Gk|Hs(i^l}ISbmn;kQC(buj*Ci|6Q;96aauXX_(Qv^LL8LmF7%t%of{y)JgvQZ$oWGGCQ$ zUwGceHfA8!E%f`%Fk(uA(wJeS&={{a=)z*eyXP^mzJPxt1AATTcb9K_!ijtx-z+T`SfE#K-Sky}!R3%V};t~Vx zm(tyJ4g9K$0|DDs{}B7Mv!%O7N3u?)D$ou&8rBI@a{Cwvpk4}5Xy_3jM$5E<*DEF$ zpl{kgDVh;a=a%d!*We03ttCFWdKg&g&th|@fI(#cpV%g^=7@^kS1iDw;P2LLPkP+2 zz_F)NcoYRoG_2C5*y6ce)KO4Sf&dV{BGVYrtVj@74g8>Z80L%l*Dgm7 z!o7RvJRm(zZ6-UJD3(|vKz|fVhq!gAq{kNcT0Of>%pekk+r=3JO0e0U^9T|&{D;CQ z2-_j%UZ7Ae;18L+&S3kCgBNR(+UpAqg~-Jj6fopzx8%9L+C_3tvoSz^k1ZTFZ=?Y1 zqnr(`qCxWj(?N1;(@`>G(2z-^B3r$TC-&LRNzRz~X3~g-@ft>|Ob8hw(;88dOopZC zIEbqr?-nd*X;fuGJwS5!k)VMxIU|fCrK#n%CM={mTQXfFm`eBj4Be8AJ1u;{d_|ne zX>aSiTk^*;{DwbJ22{u`#TrjTXyYD(AWUoYP-0b!bibaTYtIirRU&wWVDKZqap{>4 zl6jY7GLd!C$i7(+Tf&Jcr5cT2vXQT(b(7jbbyEaQjRxRJfu7cff;My>;Em-?xmAk! z=H3XogaLQ>nAY)EECD05!#bRc1?Yi^K?H*yb}%Fpv^n6_lb>dccZX=3a{z#RPhUBb z;4~82Sg%DI7$n|=YcCGC%hMjcPHN{+0Gsj-#9HeAi2*&g%5T*H;Fwizbj)#Kfo9LF2ur{SehhHV05vhKck^n<&~Q7$O3vV3j#2 z1lWED2-YihxEFK?Wb9rwc3d<%N1<;G1}G~XnFz~*?$!{3wF~KQFFFDC>tmVidWU;) zQk#2G6@(vHu|Nf2iYe-LS*3LO8IJ|L9|Q|L;-!pnKT%-!jv>su>4wpF9*NY0;Ql}8 zKz~|RD%k^322ZS9-bf|3J|;UrK0BYRu=UwN5}ks_U0h!5I1`1@L`e8xfaHWt0BLW} ziCl5@$z;gA9}yHym&?Fc4vViU2+kz}!y)0#L68aa{rcJ|L0In{h&_1o8XyLFb6n{x zMZ*II%HIByBzh2R+#pM5=mW;7ySQDi*i&Z!Ai5fE9<+YqJAOqI0=lPJhtnr6T%hx0 zOIfuMW=3&OTzvdQa zh!vW@T&Mr`9D)|!xHoYM9=d-Alz8iuz9nFrY&#^wY-zJpZVVp3>woUtgCOpKm$}qO zP7(mfTIml%G`VDZNB=F4Hbs`D$CuoJWPnc%&q z6#yJ)c|G-{#Kh>~&Gs6LKOfn3eW1EaItg8EpjF9MDQ^YX)7}nYv)NJ$7I%*5TtR(QF@DGO`|^oa7F>3ZS|&Q1wQq)5O!Gt#Y=2! zuNEOqjnW{%K3qn7!H|tK&vTc$e4Np@`G<3#(NQGseFhj&yMYxHY;AN1Yg`hD3!?WNKFc3xt}5;MsE?m8eD3QW6yba z2babz0@{{6*3QsOGBd};fr$p_`=d`3e&+0aKNWt`Z7CX``MYP+9~z3MFqSHII z3A4h0E?SCDP?TQ9S7yFDzE}SA=}{tGrOY!a-mKrIYtmuEq9Jmg_HUuuwOR&U?})aS z_Z&&0hR9g`j64hu$u_eU_^yxVMzP$a9w{jic5g0EWR!S2K;^|H+2#`@~^ z$9DLhlKk(D?38Y{o3PF2ZA}}e^NskOIdV2KZ>fZB(}4-T?W0A8f#{5a{phSg`vFt# zK#MNxlF9)c<*1IQ03bF@IK_nLZO6Qr7$W+$L2qxgBbf$tVth|E9czLJH_Ybb$1#;2k=|Hh`Kgm? zZ|}w;0;I5AkiF-F^N8HF`!+V~C(^efK3ON$9Xb4p7Fbs4*ls@bAypo(Zd2!CRdyXe z9?r%^8JF%v`O25{=Ecmyg;+@8M^3+5UACr`?G6=&2}C5OA>$xoWj=WLkmKOj=n-SP z-H*6Tyv9c_&79e|i%0rZulGmWELEk5MpBH0O7uboNBh^`pf9~)g$}m6{pfvP#6`V| z{DD$5cQC3FKfI6FuXg#oi$z+7vkyi(VRKcpG)$8deD`P#x;Qk4Wliw^<&T;lY7X!! z*Ru{drrFl;8-qucN*cztv0+G?9pyFN$wyyi9yv2dv%`MXg%g~j0##xVuIV3W6BSU;aM z^mgT4sdjtts@ZQNJDCVxz3HB7v~m}EI$tTp(&f5e>74excZoZYZSfvVzNe^hKIPLo zOr8z4zLC5E8KjQhk-N@v=l;ry1dE*KlC0|W+%besSflPO?D+PT=gg$r>qcF^L(hzq z?#sN?STh|sWwjWYz^)sez$%|GUeP7Zm5jR?$~D6NvBO6pZDRa~RH#sVQ?5$-c~&L9 z+Ce@~^6pDv^ylh~(tk;F$qziK64Q7$s+(3^IF_5RDJ$*M_$)1ERnw}H(4hXSZeE+1 zbuoAN_Iz;4PhSJkor-~6e@K2yuKZ5G#E>y(TYV>nvr2SqX#Q!gW_Q87qpx9%Q8J_n z6JE0_QOw{MEL1(=8o9MxSD3FxFXv}tJRr9{Y2LDe;W=n{k^4H%_S->xJHLk3`-mpb zvhJ&R|KRtv6RW0sTgxL?J5R9i$oX_!YrQ0L8HP{Fiw#MDU3s$wagKSB;5?ee{!$jH zz;6f2fM2^?W5}~PBboA<(bKkGPsVr4l{8rWC&wAVzgzsvkNyUaiY|-(yPpMh=x?^8 zcjkw+mBGqBKDX;`>uptDnzQVWN?dJ-i`DcTIS9@sNHn<~&cRmNzwfHob` z!gxUoy&xlAIa`CHI4y4mN7D@Lq6mAnba~yOkt-dBEm{LQ2)o^H#I z8(2gZ?_5wi5?Elnu37t{06wCPjl)q@f~OSZY2 z(<|$(d_T4T9h&(niEysJe_0FZVsjzk)XmRYM5WI>NvyS++>v%j#4Wc=Gsuw)sYR8E zx=PUyDVFE@4lGTcT#B~l!u0(u&sA=kUuqjE7bJe*d#>JETA8NJT8XZ|K)XTZsU+LP zYY>U^!>%VWqHl52+oqz&^SKs%ek@C0oEd!BmSRfbf{l;e`YTLOOD=d{wnFCBSz`{d zmq%M)YrgqtizLcGu}Zr(EI+6EwMG7Zqsfli-FM_A%B>W$BN^MSB3E9;6mJ^vdT7mD z|8YHW;T0{u!$JEn;Q6`vIS0*HQiv)Db+WZi-7tibzO^ zh|8KwNXtt}$V*BIiA%_fi{}r0Oa3nhJiHuSodW;+1GxAHqTm20S;7+sCwU!LGe;ja zFF#KQS5Id(uK+4(aj5|CDPbw1)EfEP(Db8!!@&RC- ON>|fJqekt)vws7wpU*b{ literal 0 HcmV?d00001 diff --git a/doc/doxygen/chapters/images/eclipse_hello_build.png b/doc/doxygen/chapters/images/eclipse_hello_build.png new file mode 100644 index 0000000000000000000000000000000000000000..2609dbfe0c2f9465cc9f15e9244f7206cb2b8228 GIT binary patch literal 226826 zcma%jbzD^4+BS-SNU0zl3IfvIr63?J-Cfe%ElP)UgNT4i*U$_h9YZ$^J@fzr3`2g~ z=Q+=N&UwFo-u?T*0JCT9z4p4}y080MM7~#+#d}2l2n`JlPww4YH8eD=U^KJ`WDhZc zzr3U)83kS*xJk-sJOmE^hZYfNXwT8)-oDZB&fZ`0@_(jzdv`pOqdzX?D;YleT}rZ! z#Qng;i_K2|=3wm?)Wc(@j*hr=w%f4z<2MtpgRif~zkiAww|jSgXA_hGrFoHdy6FBJ zi8x;AXytK(`Hy6{@*sI&F$Xkv4gJ-uID_{GtI?uQkip@Tc^GN8*8MZ1p4nj$w=@%mT?#N`i?a;GgJ)gOA5n)Fikq%y;!_Wbtzgk7A!=!42$KfN#^Fso4;ueT*Ip%0OM`yU5> zkY{tqlA{xxb6!dMux{l@U>rh5uBfj&1R9ZwiEaD3o{o{ zgyBupGPjm5Zq5q6|A2CzsHJ*$G+)VDApfo2cD(-b%P9GMz5lqz+YRJz$NF{G#=q`f z=pHU#m;?UHU>4!s`5)qwZa%Q3Py}-YVS9BuXS)k0k3rjb)DUOkRn;R;(~H4Aj*}V) zm0T6a12FXWrt9NpRLb@SM8Hj3l4<&)rujB%p0?Be=K<#MY^DnDnp-xlEeVC(ie0q% z-fuX0Jf@k{K6R%liqml7i(kH=N=LTG)(tw)j~Oc5nDsrHTDza>befQ2_)gW7NgSM4 z$w|Uhm2kQaS_~~N^ILKQNOSA-%}d|p1Zp+xzQ1)me+k!W`Z4q1x^TG zv1XppaXJ(o@#xXHZmMO7rP-!KXrVe2Lg<3u?Nvovb82bn{cCXvEOij^Gbelgh?s}# z7fInR$D52v=|t*nZ_(w9>4X1ksRC)VO8cHOGdHz%eb7$9v@B1?Ezd?1Pk-+CO31+H z6YQ5*@Srh`Vjb)@Dxf2jXf7$es)+_3ko?Bf^tj_0dI;7-8l7ScBI2}@x!&m6tc>2@ zStEsZ$nSbdty4*D^7o_v)8MYf1u4N$>h_c*@Fu*3F?e+a;cqum!M39x;G2TNzz`y) zpPL0=F;WkX;IPOXEz6c1d;4!HJUFTKptm0^rU;7-2~}@|ui8aK#{NoODMRW0ui4-S z;SjSjaB<}Wvk{mgY~P%)WpFP6my4H`-<5FewZpqX=MJUc6jOCzC&wgq$G+DOi0d`K zJF%2~O71X^_wnj6C~rB(m6Re7@6&BG`Y->3ax^b3FTbeGY;*(LIcE?m3$_hh zT|RAggqr8k+raR8|$G^o_FFPZnzep+T-u4trN^)}Y^usdgS$lrR)-#YWOyY7o`Rkss zhDK+T#~zuW^JBZ|QU<;{Mmf1iGz&}18jn4GOkC2w-Ki2%-Ve_c5}xq+l8VF(b zRYOC=QqR@(DJ+}kUd4LXyzTma`J4gp5{8zk8i)`FO#O!;&+%I8p&zio~htqV(Je;?(EpSm6545>y|%3 zAjr6Ab0Q*++O4O{4a1ilt&djTxF5`8piS_iR>M)V2c9x9@5~Pu;gRw2=G&wBAt#jh zjHwt3sjP+HzkiyX%mq&V+|uGt%x>^DFi@O}i|cf6CXHIGNn6+2_tJT58XlB|(?)VlI9+8dTbcHtkWriLH;@~9p1{b-Ta5LAN!m=;~XF zcs7;dwS1eKN9>5v4m(>v%ZU9!JBx>I9YIC$_zBs7Ipn%dHIL*kQxr1?m@FG|Fo|6nDyUa!fcRw-Ljb@e;R072sRq=@9$EB}dp+ zmebnF;9bPb-ewrZZCuytN(9|ymG2pOPrO9*>1i#qrTE=7Z;_H;WRRW=ly|`p zPo6#NzrAVv^y$;lqMHi0?c_r=Bkxs-mW#E)^sSD)4tj+^+Lyokc_1zAgB;+n!Oc!y zCETtabb`g+e#X4neNb#f#PgTGLo88|QI7z#LaW@3fvwO*4y4q>?p9qJb*jwdz6R{s zMqgk1vp1}a>n)7&-mq!Sya~k|0|(K>i5$LDoPYpkvV_>o4Z~}|vOiogN3JaUgMeEW3v>COLjnm6@ zyLN(wan&L&n!EDxhlwDrq(NMJ&g$+x*1vo|Jc-L(=3b@y&Ldp9Pcbo?f;q}sr8?5^ z<<|beLF@h5s^8u8C+qRh<9R!$#4nE_`*U!01A`$SSWfTcPZhTTrsC0jDWdnctP7Kq zMvFkK1s?dC^C!#!zqLrlbp8W+WpN)$h!|2d?4EHT>e=VdpB1wO6M}9LdCAE{HD=P7 ztE*J6n3yoy;YdWWgki@{abC~w$e9}NV>9uK6(08HeMuw#W1%?VT|w5twHk={dQq0+ zTCBjb)&4A9f1geV4##!c(UG%Jz?mi{0rk*YcXVW=oJD_P-UU3!INu4{=6|soPDDiH zJTAxR0u3N=1@22NfT$!f@#)FQ2|7F}@u`a-r3dI>SO7}t9dOdmc>J3%zS{R}UU(gN z1m!2FftkG3)v*xM6kxH_)BA)MTsl6|kZaED{xZh7@)y4jP-vtMo@13bI} zX>z{-bh(%LH{il&bgah$LuXc8Dfq8D3)76=sHt)5>iVI}twha`7&t%Sv1*YsFhJ1Q zEEuFXX*qrf^ts=#da1oV`XcjishP{XC(9rZmGZkcEq=Mq$r%67>9)4aE-qCb;*uyRA6uXBppqhkU5VYTo5PxC=a3_(<6@-Cw49fq%e6^RranhJkySk()P2J z30&sm=zFbqw^!IWZmXVYYo2M0E@IRKKx)MWl8nWCEeV$rO?Nj2F6Asu+F&;VA_38N zY6Yz6XLi7O1}>Fwa1?bxn5lC=ecH-ds_X90b)6P(F1wlXZca-7%N^@Q_le<+%}p#{ z2KZuQ(w8rs0K7soS!{5LA?J%qOcWg-$6P!9V>Uhk)z#Ic5`m&cJH6}2K=d9uT+(&l zNqoCWU1A(`!{!AIFkZRorGXUrA#L?2GrZ3(HGJ2uh^6}ZV^=}m9PySB9LI&QgMlH)>gkJ-}H1*9f zR3acAw=G=9#!Y^-W$$%O_O=VP79*M>8_c@$X{`kR9NwQ+94mf@83y!1d&nztDmPtS zrZCgqRcFj1AxdT3E@j|%BPWfVJP)0OoT?NH9)E8RVdOSmu;pC)quVz_XtS*Lpc6&V zlIf)D8}`35^#VT-$gCi@Uu-6fzT3^PE2eXXknva@b(0w1dxmX7ke|OP<`W$qO&|*E zlXY?7LNhfrB@Lr`6|`*U?#_!s>FT6@hQKh|SDi--WC?&I&_&_Y(jx@R#(oW?|BH*)$(NO4}^Kr3=tMNdz-zkkyUJ*!C0#)c^{Nbc*_FW30{#CTYz{~sKm5Oq%-=R>*o>&(r0IJ%7@`wa< zb9uIUxpx;ep?llNZs==9s+Hr#5KkkKdk_86G;@U*#{Oda3;s3#PTlx@LHXne)nC_3|n)DEU4=YU1Lu zwzf{n$mo8iZJ-p6Pt`Xv0_2CpB=v^t6BdHn&#@O54SEvrBQMHm^QEkp;UtwTER^xZ z*OH*p-^RHQ@e^5y_RmkgyFiTO-UKQCozm;ghSF7sa&KNBU=Y|xoDu83FC9R%thDHV zN=rva#%=Miun+~9Rp#jEY}Dfw9Pa5_s>J6SfPPe2f)xD zcP6awJMUNZRy#pEOp^u&2E6ZX=2GNqxjgqjbar;83pi1$h@*&FeLbvEC%BFcs})$g zgaZKDjfjkV!Nrw3oF#zROfz;nY_wiKYbyB-tuQtQ0B!JukMldb`sVK=o;kuE%wuB- zAqNsd5 z*pkN#n4)!L0DEO1uT1o&y!n;IhV6$$!E&3KU-5C{1sIbx(53ac7O_ibr4wKDpE{bYSAxwK}J2ZRpwl{=y?#S zVr3@L%WWyq(@}E(>b*n*K;=mvUHnrjDrrwo(>gx=soU_4jh8^4BP(y(eMh4%oy!CE$v4j9)DKOk@Q}R4EMQMWxQB` zI-(8uF~w7hJq@S(KD|*6g?Gb|m{aA5t>KcB+v8rE0&`jYoa0H>it`s=ffVS2*qWS+ z>+WuhEw?kQy1rPC2aNbjV&c%nv9;TJtU$t-FRxX{+L#BYw1VoJe}uUHU7H8$llv8v z3~rhYrVhVGeNY~5RRy&{D8k0J&$L;cNHGyGjvpSET)mW2+k!y+0Nmk~oLs+!we@ii zMFj?R$tm#S0=Q#Ba`Mn2A zR5VxZ+qX3rK_K2+5MR%;8w)%eRkg%;2&}`Z<_TfaeVvUhD=odC zshu!WMrLN}u1a--?06`=uypA6fhUl`UhC+PI(lLI_{_yn2T-!7MH>gsy>>hl2cpA^ zqNT$`bVzcYs@^Ug0D;20;uZ|$-@Owo+F_KCV0m2AYp$lZ%OE5~*1V(~cx%Ku*tvt- zaR9n+*=)exVk96feU-JC%f-bsYtL69=;nr;q)_R$W|{x#+I+Co+}Z)ET*#>dGFy!o zh5k&GFA?$93c{yka^~na4#|U_@e1W`GIT!#_{Pq_qrd{KGeLBkZ zj)oD@EHt}5ceghHf{X?Xx5})W`|@m0%G=vJ)?@Y@4t1D2;a|Nx#+UE%J6h2eyV%Y0 z2*JpXd)O(p?jqdjLduS&I2aBwmj5Xd&+R2nyh&2Aw-AnF8aOLyScFB_!FwpOb_I_&-6 z$lWEMA=X@|I&@tqFaK*K+VCvh8u70RX(J5=<>&Z?-MpuYrXQ}|KC=*!kR+w0sVFL9 z0VjUU)$X`w(tX+~vLP&~Ejn1o$e?a8A1;j}u!_j?4(KK#N;4(J$IYU1?r_jEQbe@t zOz~3~HZh#kXF*g?wEyyi@0qEJ*IGXoqlpH>ncA)n7b4^0N;Jx)9pna05`S!BkM%(8 z*kYM#TpU#IKsNDBQGXZAj>4Xbz>>vOR*wF;zij7Dmom`d_V%l91qI|{6(1e$ZkIOG zs#7DYyqMx*Nc!E*)`q*~{4dWKP7qOhh9MKC*4q)w23|M1lw)AK3ELNc&zlXXT(4ip zuZ#nOF8yTt+gI`N{LU?^?dM#5Pq&ws^RL+{716X6`xZV0?2&l8f)Z6zM~WpjYd)-6 z4J?W)f&ox(I$KHM>5DuV+Odwe8d61n_ilgG$m8+f$m#Fz2~x>6nb)TilG)0PV`F0n zR3oMpGWP{J3tr;Txp7?t5QP<0+f?N28wgOx$KzAcZZGqEj>}miKdb!PNBe=(u7;n@HG1?18Gi|31=WJvyBcPOMd3vXCI_oL8zWj@8s4`7f5 zkQ2c*!dt9GXx#zG2UO~6Xx=AtuwN}s4t3pi4p*LuNlDJalzgesMmrL9>bwCtMoMVW zvEzL8XL!&>!ZWfG0MalpGe;dA9ci)Ev}!Cf%n1oK7%0<{t7_*#dQUUpCnJ6}7n?K2 zWEr{3QDdN-7|jyk*I4(1ag3~C;`BC?F|*E2NqzgE4}0d7b(HeGLv$*Ng;L@8FX=NhF*E=2Ky`kjC1y1s@-`N`ExF@PZ~|Yo zNt^p0li!4Uu_>)!d1o=WOQ&H>bqq~-M_FLqpwq_C3?=G}dx4FD~;AfkP zfVN*PG~aWG)v)bjz}0#E{GTVvagC&BF!1JbQVeR=-Lvn%`6@ON&8BH3<#5&B6<_T> zSIwNqrIZx3p>0g2;c5svP((+WyBt<=&TL(_c8kLmA zcfPmtXI$Ng-Uc9B6$SSc6O-`Ixl2l}W3%|Vxy5q03S_i04$pk&>VqU)s;Yn(t~Xd# z)^_GF5jNif;&+J46>CC_msA{wyD&~Qx0&qfrR`q1e+F*UOxXR0N5EN%w((V6kN9M| zc~RR*s7+T7H4iY6{p^6c5xeq%-XWNA!0<+Rk#mnt&=1$;Uu?@w=c`TUpR|QcM2*L$|G$7_Ry*%N5f{cN_%NcbJ=(^T^UR%OoB&> z6wYshBqb$-n~)WkW~K1v21z$C)`FfnBt$$fem|S?=^CN4aOf>U;qQ98t!tNcb3cS+@kR2oA4+uW~jW(N%>iE zltnf-m=P?!ciWT2v(+9H@c_zKk&uX0r5kVDgWlO{p`^Q|(~IN)20mp+HD%dpt|_*e z5efk)G;dLAOnl1r)_N#0>Bo~(Y#5j2$@g}G<2`}vPNn+mBn}DG<6wpImQ1?dn5`10 zEt5=!BzRkra*puH3SE4a)q${?nb|95_N~UB@nbA}b;I@jUqjH*0C!^$V*0s!!YFad z2@OP2@n>}ixNW%}OliBGlNBR2Y9Pm84lpf0Lm03Q+0Khqi^mZC`&~(g!vnwS+TcN_ za%Bs(`TP}SO5p?)^ZOuDuQlu;lp}SG#C3W!hhW9DEuZEV&26--aVB#}{tOzr+-)?Sbb33Vb1Fy%DIUz2C zj32BV7w4@OsEDse$1gLAie__wZ^MwwcVE(syg_pZPfB}o(K#AVYs^8+GcyC6+J zHqE%ZX`ov;Fjaz@pby&ENK&>>n0>mo4kVWzw)&-!20|5+zLhsI)IuNA@u|=_m7<#u zvlK6nR7Uc7zB{YzR}xM?!7$2QtQtz-T8X!(lfG1-$hkX$ix*1PhcI~)L6TUfjl zP`*|$5wp(qBp0|o;{YiBsHkXQpy6xyV1*!}R=tWfx*6{Gb^3A|q!j9a7;v!7BxdWV zB)dMFw`;gtl@+>*nXo$BR;+FO{}D zZ0nrvRai{a$LRv($bZNi$hs^)QI!5BCsh?81aFYyP8=e7?sc zWp4IBZ^1>s)nP%;0ruNuc(g)i|N!Q_YA4kXXa;$T~+mLL2wm`Y--a znG$w073=dQ6{SzrH*ZSXNGUGa9k;Wy`Rz%)mIN|dT0{X?)@iX|l8cOFh3NFFn)g_^)EHX*pKrdV4<#YCI?BMjmcf zG3IRn;R_?sV7bZ-=o&a|L9}nN(SRP|R8Uil5ENpkF@D^Fy&f4Ii(#U&Ma`{Mwsu#Z zM@nkO&A<@0z5RoKfx$^JuY+s-w%w&TyQh>ueC=m1^$Tv=Z~dFd!;}FI3W1YuZDYig zfLla#wB=c59oqhUYxBtEG{Y=^#bQyIsa=BSJ;BSqDgZ=fkipw#wwro-dN@5#U)>_R z(_W*T8;l_tgBitndV5VE`=x>MCBKH#wP>`5*x0L`h!MP|migLlaSbRJp(3hdl4;hQ zq4HX29UXgsFis?lO~2kmm(9%_t=&2=sN+wbh-L>T{(&P|PS^Th1)R>jy$$Uf+po^tZP?$?WyYX_8%+1ZW=L#4lIP{g3N5rF|q6D2f=U8={YDKp!2T3z>r)eGT z@!b`@VrBl7_|<7w=qS4eg1hgv4iP1ssMg6%7&CMEH}usYb|+YQvSSnLi+Pkgq-f6{ z+OJOilRID~_?GC|GtHf`8Oj2gm>+!QKstCUE3Lg_m^2;%V*?QtPjm=b=XC>LpKbK~ zFZzo`Pkw8}QSxN2B|Zg6t%xY$^bbHx=-E3<%*&%Wb;BSeB)nh2gS2`=Os=EY14T9Gu6dol1!rTFrXcfD{KgF7#|C&_zbY{Dz&Wz1NvF39w5_ z$syFp(srCktlvcC`ZIQY4}Xl{k59vYI=H3D5`G-p}*8Cdlf$qoIS$E?$|ps&9sUZk3QSy zQKq96{m7mpLij|r&dYn#J9H_Wg|~K?+z+?z(wYKL3dtfAR zMwy#oxS2x&EiC%n-Oe3-<9b5fL%a#6y=b zu(q_GxgyJA+r}SLG9%?5p$Z~a5x=4o@jU!)3>62c95o#cq7MbFx^sd#_`;MIPlR5+ zd{DnYs3U7+G)yg%a#hSg&!DEKC1VsA7)$dFSWLkj_HVR**2-es(93yjQ=9X)lqXZ; zcEqqb9l7q`c6(aT&e@xxt4l=v?s`(&_(!7>A>kT@z1xrZA~uJM(HFR6q7gIDOe0HQ zDGv-m-+j^|(Mv&vG>-Ke2zVt%{VsS|%dfUyPHuyQ=)Hyk<+ekE8}d*lMZ5K8{<5Jw zaOhNGEy8yYTIFUP>hW+lxQxpd&ZA z`}>ra=R0`ql`o_&R@POVo0q;5&@6A937i^I4&92vGDCVg_aUCeep}P_v=`r_XC799sB-WxZ22-oeu`iu;-BRewBcTO5j!0x5%d0 z=mB-s)VEFV)?D%~zRR1yCa*miq@$tFUQPyDa!R8Qtaa|JoJE{Hm2Cl3r(^Hq<1>`a zm(}_0TYD{}?cyoWSAvQMY){uw{1o#$+NE%!`Vk!uJmnFWUP?mJHr_gGg1~eM8npU_ zVG;Q7ulk{KLt!50*Fm`ftKF|vKvAAHB@Sx3#c@aa`vM?vM98A|dg0^nr#q(=RSvkw{niAnyZE1qjyR^(=T|REj=R3l(+9{IIx;umOT1P-eduFTEVB0QUp#_$7T*xByKzyN9Nh$s|HUA3HwIodI9`)JZT=!+w$?bQV`6r2#jz{v4MylzoMI%XuQd*KY@rG zZUh2f@R>pSIJ#~-38HX-m2v)yT+LmANv2SwVp%wEgqkapqn=1XSB+Ziy*^!7f$ zSr1JpTMo~Ac%<3XxIhiPT;b>huJfM|5%JM<931i1#p22u6_fmP3{y8d;|h%{b0&5sn})W8ggVDRwI18)!BUoaoN%@L(~|rq zEkVSoI%qwT&|Yxj0zzdp2XP0|;TR&W|9dWsTX)+^nDD)mJCD z(V}1P!LXi_6!S==LMpr6sZbjq;Eg?ucULX)lGRe~yi`d^b8T*np_r3**U(GSu(DTVpyq`#bOC!cD_Q#Gb-%>CJXI<70_no@4gSYe~B@Ol$zlNXNvdgxCa53)L(QOO8NoO-N2I zDe3S3HC)ruucjJ66hA#4YE{hi0;v`_vqV(3>IEPo+klaba?v*qxS8Y!)XS+4K=ZN` zfIec5h(~!%v17nlnNonma_+~UcIvG+hpbyS7jzt=Hx@O6cEbU${wkic^on~9r4Gu< z11`o{Dps59%GVmq-sEnwM2RNBrpe;B7 zm1d`7|j zKc)3sJ%tjydu+4^71|K$Qh+NN)(1VMq3KPE7h5b{HP{?Y%}VRNG!9%%z&cbv(cXCM zvXRaO2rxiir2oSUo``&-{%_eMZFpoDP1CFYIpeP%z}Wv6x=--j!V2Vt``;fOzi&_9 zEv@B;^)K%BpLd6gUwr*{#{DxXe8T_BMPB_&1M<%}uIbLTUTEXoz_rJ@=nGrB{#6~? za5YW;6=f2n?< zDR?|_t=+D!KDjq-;@w>Md(8%7GTbZXEfQfW7xPBPFg;r1I zf(^q@KqATJgnt`k!U`q+3{n~cMMU!K=~Hqor}A&A_Wb*vSTa9y);jM;JvyP-$KFTl z>t6&UNmqdrc+BDDJ%`5nAbZpPMEVUFtlrh)F&SC^57mP0sgm1fdZ3O%?bkpm=j=~+ zCa}T_&V(WQsoiY>S3tWKDFg^qcA(&()5`apb+jZH5!3e3ShUcvFl-%9j z%LW0=T=ij$7SKrmnnt@i?_Vq|EOhMbae$y?l3f3@6w1GN_AS2B{GER4JfeQ@{jz7c ztNp)B;c0f)>Sds@GVute@GriUVXLkk*D4N1mWBt;s;W<SkO5fO16+`H7{b@bly2pjQg3LI576AYK<2l|$63kM7W0%@jQVRbeq zBmiB(ZPJdG%Bm{~u-Tpk=Pv{Fv*DTYRdR0oOns|6G)Cs#<#@C9f!9T`Z`cX9{*WRGqYWA5=ac7jqeRslb>bW zyNUl$_~BpiQcuS2?tF!frJYO<`S2TC{`GMYo)_))O5@LLw;SkQ_T33m|6apZ1GT30 ztbKYBxZ_L&&G(#obmil4mGwF$aiBY)`ADvOulTxNU1=Dg&8iOU!n_U)dM{U#olR+= z%y@%!%Gi?7HAf7*$}8e)BR0gH;0D&sSG$DW?D0K z*Rt63pgG840TdetXmfPAYb4FgXbqd(O&3589t^=tjsk!Rz=1psP-_C^{KtE}&1M9} zZf^G^$;z=4cMWTXv$!Lf<#J3c|6EW{7&JYtR%OU@!(`NUT12d?t^ev3%hqt7#3rH+ zF$)SJD6&Z;dNyc-9=G~y4EBX|4g~w$bX{pfh77nNVl0w&oBLwHs*e(HVi8G-ghTu9 zsNC?1DP1%~WOb*m*#^^34tV1z$vcG5X{#z%5&G9n0T_fvx_wE6j>X1m@#px2A8yRu z;9EYd*V_znqE6(IOEUGgx_*H-VcmJl17jpDivDS}T5KikY*V>7FgB~XL<86JCb@U` zFB~jb&Ev}ac;mPFbeaG{*HCIYm$vCVV*TUb#P($4L1L)ZEyJS3vW(3K5)jev(K>zF zxQLf#@q*xCvtZCo?j;IVh!#VdKs|Xf9Lpxc7_P1?gz@UmZ>t&Jw*F~7-fmi0Kg%X_ zgYumoI%Uvel?QvX@9;e6;ZNlv8=z;)pHdk6ez-u<_mFGgiT2v#RkRBn{%L@kOtP3$?Wswk7Yw_iBxfSEU8=!q1 zW?H{6S8GEkOR0*?Iog}4u%7&(3dp{uOO2F#eb)nD8P*4ig<|5F0s2UHprtrqc*&m? zd$3Rs2!e+=z}Ewn`BIq%#aqlwOlFf+Gm}C=n=RHlbrzWD6YQOb&iuYN9f!u2mX@DD zX7v}BpTfh{wY0SMTB0lh&Q{|VK6b4NI@iBbdUmDmcdcVdDpTg7c^y1GFJ&=PxYpa; zG+%0#QPgyHW*h<0k70{=P-#i3=-)AD?S+Bt@^CXp2XvG#L`7S4_kJiSIH)_-5+5w5*MtfX2Mkr`*v)1%r*UeNTGRJ@(W=f}XU`cyPTchY20YI$ zx03n+@)<#lm@3Y|m4_?Hr=8ds2VCM*HgqSo;{?tQEmYhp-o44$S*qaC=eI(PQsp7w zSju)uFDp~P*l?Bl^?kAIQT&{UlSWQSziOwwaV?JKCeUTz9bm#2{+5E29e0MaiB)q4 zp4Ycw@b3;P&w}!_>H~U5$S0aS4#u~aJK6eSh}GlEbRjne5@Yu#z`mEk&2L}726rn2 zVwx<4-Laov;$z>axu z?IE66nQ2F-G{C?D+H%4%`5<-bC}OzIU?@~fHbN|$apamYr+^SZu7BW00)L&)ATf{{ zV;2ta6&348MOO0AH-56quRr16go+{YgoL9nToODk6p+X*ONPkr4ag2CzKw>-K z9oOr*gYDJc*KIqC{>Vb0$fV9Y)rwBrM0>@?^n35@=~BKGy!SF&Y$xb!M1s$QL(Y$y zuF%}8v#ax+;$aT~9QvCJCO#D>w{r3z#U}IOCJ+eRWn!!a1qH`fH|a%@OOC~*<8d^s z6*Cm1m|mL$DRt)$Mu1i`FK_ZQGM0rh#!5)-I_}RatfBAk1^gb>^j-K6OUW7pwK(O8t&v}V{QLn);VjaB`;?~mXM$-n9~7I z8lIyW91s;8Ry*{*0AZ#DT%60!X|6d;US<*M%gJHGgRYqZd<(gKHzMR6Y|#gTP8~+# z!E7FsBvyyr>xgyNS=~Bw!otSJVLfBJ48TS1F1AhC1%J|^t6e@lh+3a%d4~4;ZPPnA z?3-*aXD;ekQ`CqfEmpW-$t=MCkmYoD?RovERC({PM9+03skZEvJ?fLg72@9Z{8&$b zu{eY!Z4v6|qUE_ztiH26WoF!#r8d}mM(0F3c}tF{=dcP=MGXt~V>58vp0W#2`{|0v zF0|YP?YIe;>1v1*^)J@u=H)eGYxnB#J7mk5^&SE-Bk9XzY@U^IZvyP5xL?jBZwFS0 z0Y_XDIPp|lW5w-~$XUGeQWJAYX3tfYgsTFZDw4j;ex`voXBKdH7Y--2v$=@tf?kp| z^JIIl5HK{Gt*^g2?nU(0S6I89JE{?ixS)RE2(@Y>d|lcQ^AKE`xunCNo7f?%baX&x znFM;1+-a1B;cVoKc%g44OY zjV7M?3cAQDr$sHyCRppyfq>rg4C&POA3q)n@`kDNE7izUK!M)ZWTU;GZl&{+geRL_ zBTloGApuv&q}ZtGz`!oq*o{;Mn(KyW8`fiSYF}u`pnt z3$RJ$B5+xBx=bH}MHBctucEO?$@jj%*=1mlb~Gx`90kYr>}u!4#cdAhs+{1kz;h9y zVW2OhST!|$`O?dB42y7q4cq?G|Ct1#+2wWJmh1^dhW?LO^yk(Pd{@Ld&&yVqXR#6S z^!7(9da%!F2hVM~p^e*cXtK)DhVu{#f_}7|#OQV?F(W{?UsO{X8u=3_Y2UsbhOKB0 zmFR$&B9o$Cx@l<pO9~51@O;fKF z>DngM8dzR4WHsi_dScFD3;0h{5QYx1rK13lod?d{1PvQ(<*s#$A{P#NYFTlcsVIm8 z!I8sOE00`Eon+#52VsCnoTi`U+yvDEkh_FS`RJakwCieuuI}U>qP`$aRDco4{xZeT zue#mpF-6`xTU!;{uB@2?->EgY&)Qs?&|Wpz_4a3n6ZqRlQDApaj7`J#5=+X>T*m%# zmK4OObtgId-PU2w{@`G*<198_^jARt{3-0yXu7EL`JsdV?LmBu$QiD$=K<*{EVD); zGf-v4LX@?9^?S_@lGpZTIFb$MjV`yU>DF1X0a?4MKsIF9lM`m4o1Xf4>NibrRsbcs z3M5D6&BpVQz%?=kYJp5`@w|?lv}-U!MMWJ^@-xH?l6qz12{!4ha~fIp98#}snLCTd z)1O*rq`l|Z=2$TYDJ+X5Uv_M@OLH`qwI%6WHkSWhbA4t1VXgm@h#AVetArJr9s0Su zD4Xiwsj-9Bz7Ax?DBl9kLC@3~Zd&7}R&N*K%5c;j#xYyzM}tW$VXx=;?qzUR&7EJ% zmr*d!i~M}=CF&@|qwPdWsdqEZfrb*HHRS;$UCt(m4bJmYrVAyByf$}i*kW~&T-7oX z#oGqGM%SiXicRMv!A>xYWajD5K~k<)_>nYsMRqxIpYMk=F^#Nd)mNBw4m%|I{~XOe zH#ph&@QqOTOAXS=e%bv9Gcot4+$m{#Cs=!_Gl%i|-mAMS_i<~LPBLC$Y=hMLtXLYw z9`!^O(+(#4Yx*Q5@FCt1bbWE4XBHKML(x~*75~y4sm0J1Bml{FBe+TO6=*W5<3g5o zfbAch8+FZU%*Qg`l8jKLHC6gMQjQ(FM=vkaZt}_~_>)W)yPnjW7mrvR3_I4$Do*@{ zAvArAwG-=G!_~t=#e$Is3lNAx8aJ(24j{;WO|Sw)5e=WET(RJ*^Mk>U7&vGb9_uo| zUIrZyeD)1EP<9m9R-{#KFvsAclPV2VbpsV(&wZ28uWyF31PFp17V3F`O}z{iA_4;O zDJ+!XQ1>nYXN zOXeYIi-r<^--zgpJ!@{KtNSN|qTjs;3S`Zgg08$DIT>HS_tSp!r;hrr1OVJuU#?p( zzTu!x039izyB0(`pC3N}D`CxW@cN?Lh|QUIrfn2U_gwA@hR`*lDfw=&S=hpE>s$q95}GZDpnbM~0@7*{@PKV@}mUnF6#Ll_u3`?d30Rb0VUi{47> ziDy&Yz1^HahiTvA#LHB%SE|=&je?HJqNLOsuAp}-FqE73YHWNuujM29#tfjH2y@?^ zPD=0NbVWo9nlARc!?Pv3oh+Y8$z=sfX!tPzbq#4)NOSEMq*a?#-kTlZeah4Kvj8qp z%EL;l@qcv`BzBD)L!~@NB)~DckK05p^BqU3@?yE3w2&vcFPHA4i!EF$K~(1mBm|p(PiuOW^SB3 zuuqpB=w({h8&5|jG^%VYWUne!?|OCkvDLA=mcx>Ys!O2XXB}y`IHkN96-5~PC$ETa zr42+AxpJ^%EyBCff3yHPYHC=kbtF7a=Vk6nRDJDZ+EsQ3tRP1{C09bYr~$~F3eJ9) zI2gaLtY*PYhE3r+BfJ|qT3{dp!FHqyPgq4lbZHw7p=UImZL7aSY@;d-#wa`rY&rP zCtTOaVuC&hTj#3i#j6M^^i*Mmm zPxjs+qitPPz3%xcY0HXD<0??1qfK~cUOi!R^MUO94~IyNfM1M33i44Dx9@R!w2r;} zHjyb*%>9#((6LUf$JXds^;*&aTAP^7Xb8}}8x~?YZ_XUdp_4wuEwd*D#7JX}Zq`o; z2mPi1-Y?BKz;K-h*aaZo>NnBqSO0Zz4%nv)c3jkfKVGI1@%+8|XSXPNkOaT=Hf(WG zl$)EIG&dFK&Y+f#*wad;g@Fy$V-?2G*O2w|FTj3EJn*&J-ntW@K0^bjk9!00gTT(q z*93>q#2-dC*f{ae$1NWpa!0t&MLN$#KIDpUo{w~&Lz$c@wk;g7(=}ZDYuFoDvOC#=05UbX68R-{Si#7^fMo6bpO*ABYu7Bz#vv^><5WUtk|xQtFuS@ zBg#IWUY?$?t0BY<_lK2WTN)ZsZ%z@f!?mN;%f`n`P{h?ZEk!SWj_A@veAJtJ(weF% z&t*XA?MS#XeW4{Ym9Nrj`!tGjwmCtjtl~K>Xjs~tc&hm}7t4FIqdCJ9?twNGhypJ! zM0M`#8Uq0G-afn2f|Ps2UDrCY5<(frx0|YwuCC^K#tH zl~kec$bM6J+!?7y?6SF0`P3!!%9*+Xj5^1FW$=4&PZTMZdkke3<*47IBqsJ8@elch z#(szJ7R?s*Nd#naF8i}wnF3Bmg)d|@HSsA0Ujt~ZRr`K3A9^N!9_!-A!Oe~x*LC+M zm&V0s^}`IG8R2A+`iKBZgCJx@)aG!=3oHV{)P*py~nGypU9?6usnQNv-(mu}7RO9GINmGmNc8Nh$36v6`FW30I`MvO|1xX*#euS#8&u zkeC|SU`Hhs=uHzIN8S6OYP?ZFn^w|RyV36bYF4~8Vojgs_*~YQ`%vB~Eg${L-Fp4d ze0I=&f4PxR%9&J-Yjt13KV~xSK&aB1kJ_qY$e{`CbrS_UtvSh`7jZqeR(Uy6o+AQdWkPoACIj6og&hP{Bam{0sfcrrlTetiXy!D*g0gR|Dn3vKtNdX3>47_6K)`Q)Sn3s;bCXz1pd~*? z&=nhKjZpM1Yv;$WtiopG@of|ODfs`9_Lfmme(f75ARz*xQj&^vhjdCygD`Xn(kkY!@@8NXm5-*1~;pI^i$HznYCcSSz ze3{84bFMRF49pvw^JYA}U-`YIV={-~*y>_FZMQ3O#*Syr)yH_M?|St>!zZ!w#MYgn zziS^=?x#&-aAZAuhH~v(P)-%D48-~SjPnd7Az^&>Q<|MWNDsS<6^wH`Llv9a(Y?i* zk1^Zsr_BeDmKt843Eu|V5FdgJ$~=N+-v_c9n^GA?BU^TShv8mII+YYKI4chw!bYi^XaA*$qXT*GAW0)9 z>@oX!8Xle5P{}lX%`QQ$C2Rb4bU}?Y_p$QBQIP_gP z_I&M-966|^@Kyp9Q)*xJ{Y?~LsStq{Nu;b<`~#Dm(cd0R9Gy1RAOfMZ#1*8}+M6w0 z;Y3fRPFCYs8$z!LYX>dX{uF3bS&s+T)d>LTkxu=IB_xeW z7&dmQkXyJ`i5_S%xBZT#VWDcd+AS(F9NKaH1ttg?Pg;Et`(~idCF(dzRRzpVz~5sl z(W^EETwDNt_grhZYHv9l`krCj3b=WNAYKJOEb&)bD=ubrKbV(6`uo{d-$hg)0sAJX z>eMyc_O&DfJ$*Jz2qc$za?uCXDrm;Vev8`aezuG?d@}faSy)NxAC(!~=yCu4vruZY z(AqOnh@pv?w#ai)tp07?CAG$#lbYVU|Toobz;-{2tdwWokD{9lF6?Ia*`AyYJtiS`d7q^S)k2AfB9+EUF zT=`N^-J^SmZ}@dMfcvS-M8_W9`O#U3>Z7Fjdp`aSCB`H21PmytwWEDgvVQq`sKsdq z%oo0JNO~>1;j0WoYy+{u4WaQ6kIU1OqET4foLOs5+gXQDkpES^%atH3Gbd+Th-y%{ ztn6l?h502}RHBmX#}aDkPoICCElW5wT+*Ms^f>;bSb6k`p3C}Xg7w0kE2n3vGv8>i zoUEy=7W?I-2}=`Jbn8lizFTP**qEf_P>calYB=L4$TqlD>C?o8N?=Y8Tkf=-O* zH>?f#P2Y3hlFbNS?kzP0M_t1cxz=>uZyE!U?7G85QdlnABkI{NJ|@AtVbVM{Lmw#h zNd=vS%u2TnBdiET*o9dDp(8V&w!z&F$8)JRQFf4YGGFk^7}}!Gtsrr+XlMDIpz&)>D}%J<$~F z5z&0dciR0)h$Tm^r|gZ-7z=A{_&K&{ZKsM&hnw$h3)NAW+s)LZS)_%;JR<*`Q~XUf zij08bT`ns}%9L@E44jYBdiEFFV?Ut^%V8?5va2KsZNuTP67h=5{HQN%?5jv(@S^fJe_UoKYcT+y^43LvoNB-g#1`8A005J(E5I*2=S$8|&VZA0zYg0$5p>YQK6+ z4X!yk(1&NQxrB;rcjWwnJZ~I6o1m_V zWFFO`oL2HyV5H8K{`w+0dxy&Cb_b&`l~1$lRm!LHPV}ymeTL!HFE(v?GF(d5flKf1 z8q*(&JX%*KMyHc(ktNO)&G2_3R34WPjiPUPm8Ty5ctl-I1?jK;TYPWMv91Z}7dbhl zM8n9627%O@{`g|?>v$0Hd3s+6`<5LHFD4=$SBg7WRfl~W9cMl$dhm4kiriyeVbQ=R z)YC;k_(b!rOkN~M&y7K+8%gvxdOku*0m?TMBja;0-TLvvB}9*v@$`Izgp5p&xaY0v~DJQtAr;#8Ai7#H0<7pX9`;n?Kw#`=g%ek*th|&Uz`PvY`J^ErvwbD9DkVwfFZm%+1Yu$^LF-U8>$D24PQDw6b-nBGf ziCi(ORzR5oS*M7KQXUkg$whKF++nOAsxswA7E@X_ZXs&XK!kqbcEMNDNM@C&yKX(rKBp7aU-{X zob-+H8q5qK*!j_JMN!pkgf%cMScGdIRLj33XrOi9sw6N*TNYcemA1#=-joYj7&@!u zC$NC15q_T1YVI0qVqtXuX2Mgr9a!1fSxIwvY~4iia+7pwzDM~`MA{${F@2Z8>vHL^ zWg5QM^)w2TN7?dDE+iEH`oURz8R`hZs`Eh7_;apnC{o-Drqg%gV&!{;9z~GD#K`t<uM6>FBL${ z{_ge)H<|CtyQxJQ9jW5Z&hFSWxAyC~?vUu9mWs38nYfGq-Oq)EJT|VIKbToqB0x%a zb$)zDU!>^Kx1|_*nQ~vh$pX*W*_p@s4R3EsYO2BU1)x(E(Uy>NZ)QHn0n_Cwn~4Xt zb5XtT3f)ASjXvpLqAeyTD$&n7@PiccB%K9a*LU{&+&*eoO8)uA+1Z&OhBxwfgBR`a zbKN5RPlK_vnsKi&N&-wt|1;XtJ|yAc(j*=%3DFa9GEfv?-g1jx_QSa2gUs;t1&;3D zU?v%A+)`sdS8g$=_npmH-5huMJrI|{I4bbSZ8$>QEbeG=k`Dze*=W9|x_ESX+&f)` zp1%?E^}|VbH%C&;Bp`IXL8)t2Bm>p1rTuDeON)}x7X;er2YWNp>6*hTh`|?CQpvc zRNr!c3vhxtvKpZ%#$Z6ytrlzUt#wD|a30bI_`?-X6DN7FaXR+h)%6TI+oa@PA52Ae zN0E;+a>f2MK6u_z_bqf=fPVg*{Hu-VUBQ<&3MD;nvL(lh;2^o^an@!wfNEJuxISoq zoY8F--IHTYCPBRdw6I2Kg}z<7+_jkn_(#kj{UbW3L&KdtSoDMmH|Cv32s!ha3$ru- zoS09zrEOo(dY>IAI&ZRT%7DZtR|ZbVjsNV&z=7nH2%rS}Yl zs#e>Q0@^7jzLq~?X~=!cnV&C9V-Z0LIdi`*DP#uFIGq9d`D&%+3G0AAyaV)ywgg1Z zD;tCKG#5Henuy`>T5oYdzDWC!%I$AsR5IEEZdUx4U3|%%!Rh`fe&XGK=xPcMn_ufn zpsK1g#!XG_nXS~^8cY;0Wlfs+&!5sLuf8&&e3twakdTpe{)pm0x?rn{(0d)1XnNYxKFqgq8a zdt)08XZ@-E$X$z7p-xC4$qYAx+i zoRuZezekwbXV&|XH?qO)>E^PDYeGspO_4ygGr!cc+cZ^}xc>OI^oe5XyG^eEPFZw0 zQ;)#$wHLn$rI)n)MtJZQJvj1yOska7Ao{b(cF(HrO%F#Rvo%_^IfnANRn3;8Q7kvA z%j8p*HGCZg@0Gq>8*WH*|8h9+9C?A7TJvOt8smZhC7@R*j zXZIlwYWwukN0gb0HA)yrr#XKU08T zWV+6ofts2+k&XD5s>Nu&^L`KI89!e{mn6uvE>?Qzp5kJzuC4X<_O2YQbe!(bVMTPk z$vgPgpO*HhNb!Zz)C#C=frQ6+k#<@54uI3-RaI4GU#~TML8bVen3Mg&da0R`l$3n? zPXR$i1#iYc!@_|}bC7LzA@jqK94)h}&Gq$iH<;Gg_;_geNt0p;kUN;IwH*dvpx26u z3_pksX}7kvv?D8F5wDs_FoQm1XJ_m6K+XG!$HvF*V;9`LSb<}|8@O|V>|Pd#?AWR` z%9}twOxRAH9C~Gp)tmW5KG4ye-dE4x(s2G8$L>>;85|llO;=gJ^Z|Ua96(hYX+;H! zdFpC9zsG(m&eXe*$mYZXxx)!Xk-PLI;e}KPDz4(zO>>_}_?*O{it@}aAG+94W$cJh zoYw=@v1Tmmo)W}Mgz2PW0|!-=TYs=qsaS?@?XjIJvJ7||2-x&7>Dg&s4Ta?ZwB{He z??T>Z%7f&u^<-8*mz`H1*nDt^%7e`w3d(k;Pgp%E_AcB5xo>L2ZE@EBVL=Cef(MLBk}hmdL)^ZJs@& zgC;-LjK(uu(b6lnWK5g{2NShJP0aJXRv&o602M;q*Ya4{V} z+QW0Be&A8GWetmMoe4fW+LQ7y(QJy3NzTo>bgK5Y7u+`|B>WzT_89|1;9s!15tQhD zj6&3)P1F7430I26?-%r3`3=FjPpy|q*|4IYnZhUcrDdcaQ7$FYhgIZvKW{+nDr(yK zmaL7a*ID-&+RZj+=yEg%oTz9DzWOKZ=5k}MsP?yoFDxV~lP z(V*6^;M`!q82+(DAmgk^`$VzZ9n;vE=y4lnDm%ZqIiG@kTb-zbf}(xrC*yo`X+T*c z36YaH77%pXCB%^F`V_45yLZsClXnUzn=YN4eZ!Qa&y`KO~_6cF+xVlzRm4)HC90bBihUa@y z=5bLMJV|Y+UY_VDyoj-0nGF*WEX7B(Tod31t1#^|GO`8D=iD@_YTVdQ6e`*wlWx?y z_H9oL8_QiO!(z#zUJUXmE3M}Z_FKo#qd~0Ov$e%c#y>1H{&Es?@xQK9ox}cv=p5k_ z)%}HJFl}$d+qU`n?4ehUZ~EPN+1|ix)QaoJy3UncpUT;11RSyh_w;k&UHg@uyK~wz zDQ<~ty1G`QWJ7A?7&+(5^)o}el|sb%hT``~I5sN`OMh67tB6!Do7YgzL{FHunetTL z6e$t5KBkP=__7m-fXv~wRISflY+@#3$vU_O}nby|6FFw`+aU4r_dwX{0 z#yWjMQ2$fKo9FB>#7C+z{mT=}c=V&GvZn6s5UPgOCyQKmS{FLJ50~9-m(S?~?LS}c z8C1kf-pY>h*mEvM^bqqnpj&T(CCC5l zMe)va3Uigc8g8`#&t$`WkCR#hDL>*u4Z%y;aa~11{0OUHMa-&pIh>P%2N@m=O{~lE#-aX z6d*A(czH75*>LBsZ>XDTiWwReMqQ$>&CO>=%gbB+0=v=kc-3Z$pDe1XWW{u$vf6qf z*}VTdgmk$67eFSWF0LPfBgtmZ9HqLv^L z*mKby-#|MvsshizYdfhq*W~{C^?koB>1-HeW6|2(aH9Hd>m&lJX?lYgq760n?J;M8ooj6|WIi9hE&pF?gQ1|h}&`~tt(@EpsnoL&?jQyL8T%^_<1g!nvquI`SGMy zK3C#kPxK?=cZG@Wzv?2jocHhJ;cNt_$T^!JA1?-2^46;CP{z=fKh7+F9^KeTkwMaI z!mT}}R{aF`+$>-3u|qXogXxJdD(<4f=LR8R)JB&r6(B;UrXH1&lqCP=%@a_|zhGl4 z>3iS2`-dqWfPphKuyGf;a?TGHB66+4p*36UsOse8bPs~!0}cEV71nCS`N*_d&D7-) z)q(x#m}{Db7e;5-CjtT@S#|j&RT#np$(-kv3d9j&{#4P?t}pt~*Lj@opJYn&o^w1R!6pNkpTAf)dF>w4J~5+t zZ9PV8tHa|ZLzftZd2qA8;A%C6Mj@lr0&~!-w*Fws`EsKc;e6!zkeO_S3%mHr8%s7W z?2Dm~lbZt06PlnpzggGhTD!J1#JpV2X0>ALgn7UX+|e6;69Q9JCg>`k+6k7JrhyWWZnHfNLM0^V?bz67=~sk!id?xoXSE8d6cz za1%nhbS-+!Jsy$1?NZmXP9Fc<13@@L9QVb-pCO9rg*rQyntfs|WyNLZI<=~lPruyy zH7=vSOlW+_ygs*}v?JPmXg`5~S$i}H7-9`fl?}c<6|L2lzn@Xk`jBOFU?sW5@l;T( zcJAK68wH#P)4%rQgp9euWAZtjh}C&7aTp)`y!0wY9<=;0LQrhR*aU~wp;&G{NyQR- z{#Gr3&_z*Fs)5t=M4EWOKltN731gcwQ?F=vzcoYd21-VTq8r(6rJAl=ud%*0s=0Yr zr!>>H&7%eAwR^Vq)U+n#C*bh18drQZYVrmw6lV8kuWRxbL%?<@T(E0qZ1!6LuCqOul= zDYy47E;9{oEDO*)0__Ne_2h31ANcKlKWAg3ESgXNr0>>r4f7A49LfBuSzhpOHqTfB zqWa=m7RJY6^K}|aVN37XYgd)e)3-S2#K`jI$px!NouEeP0aHBoT>Rofz!*^>FU zYCTjecgD;q#NL1U_z?}L6S}7O00j7Zza=SrVpktuqWN#tyYXR1 zHNe0bSz_ts1H>1ID4nbn!K<8r{60$N|{=IK{&j8J`kgu#9(7927t9gzMRtl;>HEt$$N69PqQx%-9zT97!e>Uzo`Uk z(ED_4A93YfRH8{hZ?=3c8Z`QP7`0=2*782R%OmDTOLau5H^-!`lbj=NtXZ-1Zb%x6 z{yw12@~Kqx>Tycq>zs>+JhpKPDn``m71+`}bSHDyRO=&IG>o!1Vb0CGRQEo~bvd7H z!@!Ezl&{rJ;}`m%pQlp9t?Uv1mZg&5$>R<;ex>Ks?LH{c(Fc7&`@^(2&RN6c?Wwp+lVe z9EU0|VpYSd?%s&LOtb|h>5`iOP7Oz!iAbO0)`DtYH)U9jY^-I!rN*XKsslfRLTcpR zNK4zfet^;N$%AE}dt-UJUmLjKzvzW?|3vr_XYs-ESB;*TIy<_B;D>ayot}&9j`IGf zw#~-U2^9gu|kp0z!)?)0}2?Qrl)jJc02 zBD_cUXbFQv;ZBH>%9VF72a15#BAC z{AwVQEKcPg^ezRjyRtyKZ$*VkDQvjL*_pmP&rv9!(X1FKgQpSvtKpx0(B9Eee?B`p zA;#x5Wati5Ha9=d${fM&6J;k!A-2#=a_=iJ6SqwseU$iV z%&fy6^ zH^MeZLwf=Hc7wsl;!?hQ{cKy*_jwleu5Pa}vjfRG@h^v7y@oH>IHQ9%MNSd>j;(sebo1nSSH`{OAW z7Y%Op#%Vu(NQ1utt_aZ@CvrA%&;lUXw#T9TTwKk@-@KWep%X6`#9SOO%GE#ARkGW; z*Fy?Wi2vDEWGgExN@?7m@wfas!w48IhJ_D}suWBB7GWXzF%4Pau6=ZL)PtUF_sp%5 zE{f#vezO<9U9VPE^S)$H22|{xm0WYx45CPOv~wm8G~S)OaCguR8+F>8@7!mie<8%# zr7jzIbRVg5U_cd5iDUUI6etm2_AcFudFm-F#$yV|-+fw8_WhHJswmISFvz@boKaY)MsyKwJ&Q51(2# z-;WC4I=FsTaCQUOrTO_+MfcV4WXF6EQgUr}h4m8ql7~NAdW2DOBsSod!PiZ`%DYeI zW`=yf7y9R3ogK&|{P`jlAc)0W!_HJv@#!;UzV=f{h5s;LL+!DnNMpp#9d#{tqu;Z; z1}nany#+}tr$<_GpI`Wp+OKJN?l$zEI5Yw0UzBt-dA#5LoKS(*g%ePm z=zw9@@1NU2gP8PD0!!J^`dj=+M1QiQG)H!VzXB@8yw)=fQpH9hP1$bUsrBU2po25*IpB7eg3w{ z{|lCfEMJFrYKUR?|G4fI&qw z$$|6f_Gpt&T8)1Fle&2@kSOY`@Vn-Fm1Uc7WxwrW%e>?Z{hJDBHLrqM{lq zHH7*a2mCd&87+hq**pUP8BkD%M?_!)MTy!(Fk}K~QyvFj2MjJxL;(;bJl~s<@@_!l zyHyaL9Soa)k2nTL8xyh3rJ=sTa( z-Rf(a*B@)mE_&KZg{VU)&jVbnXg5a+vD-t8{mE^WmWzg&$rnrUJLb+%TT8q+3lr2# ze-BSpmWD0N?s`rbx;E~Xde3Nqnx^N2S7KZ@=aomoG6ubWqlcUP?$wWn8RH;_>Fa0L zZ+;QjW=H|_CRz2W?jNO>xx&1$4_-?0#FQVsCT=`F5*)V`lFUySL;U<_DI`2Cn&<=8 zFNxwvm#ecw4`;sBtxLZ=73&{As){STbs&%zG#pV>(O2h=7~41HRtz&U4_{;#N49W9 zol|~A`{m!IrI(!{G0&q z6DvTnZlc$9Gu#5ycM2ibQ2*PjEZYg~ErY%u;V|V;#kf7>2fDJYvZb`>;J<(uN7D~uwPP4$ z?v@3-6H?7sr1^1iB(fj8ECCh-9*W6$LICPyTF=GCgR{*StI1 zod5#RQ?~r0K;)Q0#4`psa+p=qxG)R7j#Ao#q4QOHqB5-fMDF16D z`u9h*i(u*!madc&H1#QNcf>FM=uPT;Vkr?aRDf&&WZh=Q5BFF4?YSpYs8#oaXtw*``3TAo@8i$R4VMj zKSLgNx@u85CMITDeg<;rU~uk4Nxm8c4_&`Rx~GePObcrIO8%eP|3BX($Td36CGEFS z{(UX~y#+m(Ht-)vT=eu9ch@W?Lxj7xOuRbB?b$$AuSGsuS&J6Rc9{-t^|5hFv z`zSjrwND7ZB>&a=_}@$I+>m_I`k#mW`>i@B#h{n}`#=A=G6Q;ARM!7_-~Zep0sH^` zQ~m$_v}RIR&p*`(Jn#tUD~L==l#H`--?zl0e28*+iP3m@Zr8Wx?tQjB^t-6cW;^D8 zF6$!WtxH2u8kHS)HscvZWSw18)gIR7!4lr9kaEtkHS*)-Ocq$<;ziA{^AemN-kE6O zW~BW*v6j+e>u3k1%J`Ic=#L^zDUR3rXpiSgeGB+pC;Enmw?IcI2MDz{`&~KhZ>!@` zj+N<^JZVLU3k4+&i#k`BJzo9w6D9;Fv6qnh0KbWOGc+(Ty<(^aI|?c?LF9d-xcGVy zC-$~~3vte{hy7AT1m^4yCGkDro`>cPvg_>hs)=}s-`&V2KwSzz+f5m zIT$=Vq!fxtOxzycEUJT*LK8*2xPb~wDZF8~@WiDU=)$}OG31ZeExyPRidlE$TY#H( zGfK!05E}7)zn9TNXw;%jOZu;2H|kT7clY)A?DK2UkCu!* z3$)<3Kou$gGIugj4h<-Dzgj#D82(yYRtkA?v3Py3HK|WOR%xl)VtUac_skr~N#Ig& zX3k#!_xmClA0NJ}*Mf^)pK7!e+E2)q&$O;2O+k{r-~Fj|GdcvrZrw@?w&dK{@84xx z{QY2!Ao3PMYwlgYEo$C|wvQV7m%`l#9Q5O_$pc(e)26({;^SIHHR8%;$-^{@HhwW{MV)8wHdE^FxNlezmWYhn=$x?na2Jbp(r_pzbtpO{!~7JDs1RCv}=ny$c?*o zM{lo=UqyUjU3=GUdzySfY+);aB87B>3Bt`CZo_X|MCAHHwbclNo(*@3S5%DtHQD>| z^XKs{OWDyj4gSPj4XRPVZ97RJ@Ge?t>9e2suCF~Z1oz?z=^Sa4u_^i5+MmE?HXW~h zLD@4bj+Cr*SQzqY5d)q6CZv9e@7?81I6=q;ix#K*US0TFLP^yRqqQgh{zj#5FO2OO z#y;G;gyeat@RQgxl{BN(xrCO??trdr8k0sq2T%%W-yY53dWw7FvZ?e&*nK?+U;H^M zPnDK49tBV0^&vF6!mO+;(cL{Ic2Em=(-lr!?XJGJ;r~_t_UtA(1j`rU;`r6^WZIt> z=vyazzkMZp?(9xTGEU7}JOnEmIxoQ=uNW7vJG@LG6%~mOH33NN@F$U_jI1+ergK>Y zRT0B9ykb|`ohXYA86JFO>dwM3hOb)6j|nVF4PReRqsvxp_t7jk;LbO&(0u@Cj_s1+ zPRII-3jyFuqzFC@&;4p`ud4zEnA>&2973{$;BR=#S#>}CzSg;a6Z$5hJ-hzdW77|J z)jB<&un8{ujqlJ}=j&aPxXmAEb6A90{Kaz__9 z;k9?*iw>kX-W6#%3sO83p&G{6@;Zx8C`$9^n$2ggb-0BDWZxPB~&ay?mL6kj0{)qKuGOsg80m&FQ#!>w#Uso99*tNjVNqD;Q)~ zAQ`FU@Q<~MXeXJ9W~ls;2uVtsZ@!S(#KlpPfOypo$1$i{oUHdYULQb-#)s0ecFTmf zcRZBLF2jjfZ>_PTkt^x|w6kjt#mf?FCb3CFDHXvyn(u{MiND7c_r2f>qM!f0c-V5{ z)uC$jNVvsT9D!0hy{ggh+Y3?rc)uYrlGmk&zK) z833358WjR*K>`X2CBVRW7I4F5KX+yHJ{`=My}gD0J_oQ{-8VG@_G6$O`qHBmfddBl z@kON#;g|5OoI++06&rz9o`Ij+*_!(SpyGfA)%%lmd7vis_x$o~-hhz{zNZC7OC4*+ zoQ{hkfCT8@wxzki4J3vN-LFC80Io>^)!#Uc(pG>R!>K@0F;xRuk zd4rdT%PbqjKV$nw73rwPMWz`UehAU-u>d<`1#S25F4oKww_mgfLw9bV;n(^eQ}adb zS7)R*D-}H;r3ZW{u`4U8Ak2s8jXirz8hO7ghmAf41%-u&n;y&~tbj~T$?E)H*O6Q$ zI$>dQAh8ZAgQS<&`{8k+mF5-}?-2`#a`y!xfMQ}|WnG0jk^okI58OfOi}Al>XeY7; zIpO|-ayl8pORz0y$aOU77;;g?o-){-Du~u=C?)6s4y4hkFVyhu#|Ht+s37a;0vST9889yzp_cN5mGBSGEM~Sz)q$m1 zwq~X-DXS_(|3DLXff3hX(NPP}2i>7SfpL=#p%mh5{zP+6b_ChBYjkH-v=Z;QE zTsD!dcIe83x*yl#!97{+C0lFG4xoox@@V{3aSWBZbyi%-a(`EYbNk>~!)^b*Pg@y# zrW8%~Ov{n2B-;ZlVt-{%1A>gHV}SQC(G6~!fLhIa5Dfs?VER7tu#lTaGfg>#zW2+`0ZU)vSx8Id( zy5}l3kVJdRWs2^ANaARbkD~`?JpOk!{opnelkP}T;7%3>KqaXNFE-=mRLgX)+|}k8 zyG@|@mj~`UkS?0i8^w!({2_Pd$;QFut z*L4dAG=k`r5?jG_|5a21jZ9FsIs=(QcpKI;&Emrp$L6=7TTDw!n`uo3%w?LET?!qN z;JW1|5;7Zy%WXkS0O{dbe6yPmJZMurE|D_1ine-CbTeCkW-}LG$dkmRrK+^5LOxdNQKra|CgP@5~!8pav_j3}imh6}zXN)Z+(r zL8qs##B2tkZ6?e0TN#$KRl+*WcGgj_37)9lxI9j)kyl2ZcV2+P0;1PetlV8R@@8vk zYoH7!q>-!g)B3%}7YeaYGj{Y03|py(p_P6h=OBwV&Ik}|>Gu3jxp@rJFkag?3Xd#o zFUru`zf@Ll?6~C(!$Vjb&eb~hIMj~U9@ekjp2Ma{qI)`W2UF@#!dZY$G$i=JAGfA# zHvdg=WH0FAd0k|M)U5;`Z!M(H?U#-j?K?v+{a4xpYt;=)_hp41xup+@0qYS|-*~E} z`vOMlmvhvtk(6Cs^Cvq#zD^xKTJ1X5EJA=|0Pl=%>anGRLO}R+dP`ErxDNrZB|lx4 zN^18bJue$ZpHhhpS|>Sk7D2&0K=SyZn5Q)p?eFz(EC4M-ZSD*Xeg)E%WMt6jzLMKP zzby&;G_ZMZNZidL8?E4YR0L}3{OU<-Jc~D|G9a`c{-&@*@hxYNGyNG`$Hq4CyW`cJ zn>HmNp0BV~`9Ol>d;c-mLg+_l92{8Bk?-i$v8z?@n&CsjH%A@#gv7)^JMuBm(vktE z1;Er78@HqzLfzHhya7!VG=N&Q07u7PFSNg6AOUwb;Ri9p2Ze7z#GVKK&VTb-r3|qq zQ}3;zv|;F9D!$WlD=L-$#ak2*5bHtK`8VM~0qfkPE1akiIN4E%`%@TBk@7oG0GmZ1 znJKtcBJCnTK}6K~D*hl&E}Ci(qz9znP)JHjVl{5QNp(m$Y=jSmZ+Y}kdK~@HP_3|= z`Q=g=h}fIy0DBS`K3aFVr~97B%SJwJ0|^#OcVZKWXY#LKqbvhs-fC}^_yj?p;@LiC zV^qkc;-lqc+tLDV_VOcc(m&zIehLTI+AkJ=MsUf1Eo+ZetNlnQe$gD!J2t3cNPo`J zim*$C&hYJu`$f?)V_|HaQS@VqtvP`Xx3}C$*!YO<@f|+o=cje>f#+o2tgM+9s8d{+&y?xS|`yAP$Xzdgc?fVPPy?RL3&C7rx;^@ z-L6KdXIj2;sfQN^Gp(rifHrFVmZ(33YDwllor~4?kL>u$ZM)0iVvCyOE2k2@JACnp+b%siQDUMXhd)j2LEAB{Qa7yGmwk|0cJLbCBzhA6VFR}li*WRt zg;$}uk8OBNJZ!fyUaG%1#4>pkLb!RrOe;OkI00L{x7pBTw!^hYbKrAl;Xr9Y3-Z?t zZTsA4%s~fIm45x;DKGMP-UoPFY7SJ7zn`o~xV_36S^1%!??__(N6QNYBd!nRAA79| zzlm?REiOQJ*-(82Pgqb>k7wT*(0TdZb^zA%Oju^(y9O1gQ$6?Jcv*LQK8KBup9d+@ z0trYCOo)HclLm+)yaRO?ko{V973%GWKa_R?OB^g`c7eMC&@@#6i#JKc>wQ;M@?=U{ zn(|-6A9VXSZ{Bq6gXjtX4B(U~`Bee{JL+mjf7Kj3H790|R+heflc)Lg75$0#ePECp z`uxSTn)MZSBU*MQ)-mIQ@bI3GnVF%hk41Hu6v~K`KQq5R%nFH-W&WYceRfUr?suY6 z{|^+g+9?BWXz?%DJ8_?qc=MO7BP{^|P%{)&U}6il!zVC+9|xHLH@u_06zfmC0v#SX zAFW{hUJ5Y)@>~G(+OrRfpjQfPCVv0^U2azvj7Q#JkT(Vk?c2!GG`w#4iW=7}ej!Gjx86raP!CLF9D(9D~NE0O_oCELF z-jgBtic+^O0-ha`^todh5}2XzTaP8GZVmm^*c$pfEoi(}nOR}`3}+@pnankOo+)K4 ziJ#iekWC;chjg-O0`sH<>&M=AH(q}Yg+T~Cg2V5H2Q_P#8DWIq%f7t4bk|=1DFX1` zBR&SJ&0+_3m_g z6qlSY5^yz7fPuSCH$G$Rw{Pz*Yr&x@C^*B+X_yVH*Z~GAHa^~*$ryz5^z?MCxq?%Z z)1k@U8PvUfH~0B*7>^!e5t z4&*naJ(D7G+mwfEklJ1S)rUTN>R%|u+h)eOl>GMNx!IOwCdNqPpZ0lewVJf=J{=Vt zMVtN}j7J8Nv`gn(q5kR#qO7bea6S5lDFg&@N2jN(h!C$1OgtdSilz*Y`69;2bf3kTPy=HUtZPIl0v!!_up7HoF?Y3^=6nsW9R#uGQ zw91mgg0=qXgB0Dz#r>g?w<@bf7KLfQa@P2go4fdX?G%BzOdULOSIf`A)p;Lx*IPoN zzGIbaK|x=`bK<~vY4YxqYk_JD$-#1MJ+$uV7Q&yO>aTEq6m zi`EEL|A3${57n*D_`#1S8mTCEq2e{i1=F|=hi_6c0=9>PklGaJ2fE}cJr;nS;PC-R>f2GH#HrhEgkle;wX*o6;`AqH$72UxqMsCyX_n*Aw z`A_r83JLNC%6<6`RNnQB(XGP>?F{NZ(d4~hF+%zIBxm%sx3_b^CNTa3N98=XIPm8L z;~97#OIhX=3>El`{o1vdnVU;-gH5h%ZS~wDmQ;<5(mPy%?zX(9CSe@C@(Vz}ou1a& z97r;YcLYTqs8(h3#!G!AB~`yKog8UopGnMnJWK|L;6G;soExW{Kx#_&!8RUHgp z)W3r?MV5`j=Ie)Xz0!xaL6Zr@q@?m-6vkE(=ImBm(RcBGspfpo1< z`#C-k6OPZ$%)$M%7NjCzB7_fKebCE#WnUN0QIB5m;QkqFc0qLPZ*`EiNl`*y z5&qH1!~dZ*GqbxLcM>Gs5$_2@Ra z8^&)j6z>4UAH9J8)uW#wXPjF<-tV1KA0*%4$r3zOB0{z;WXrgKfmpX~S(AfX+DwyA zv<9p;G88DjOb(<9;xIPX_zC#%;fvqIY|Nf_e5Lb6EO>ms0R?-a&>3F8=nJqz_x9|d zm$;iO-G60LsAjKN9;76F08 z@jO*+J;MbrS+^oF{x@p#7x^C?XQE3=v(h#+zL|UYd$ld~kl+02t?-J3v7;$v03$- z{~yNQIxgz=TN@ojML|JDx>N*_Zlo0ylAF=!T(( z7>0Atv-j`W?|aVv)K+9S3IuSdKmgVo&qW8O$eY*$5DDhR#k)|NG&MH8a*ONC@(~Zg z?+be9Kmc_D%(vK}3}B|^uoE{qkSR)HrrLmu-O3^P@ur>lcLakX@B-nqLT>;d0XyiI zkA=l?8+QS8<2os6wMRQhGTXshI*wQaumsd;lXeqeBl>5V^x29&sRhur_4f7UxA=3b z+G)BAEE@;7iL_e&=Z34>qoqtBvwfb~50*T)X1oaHRqkIB9Nu#7{ZDQ1x(p|c_3sammI%$EwsP)eS; zeV*p0nf<(Q94vxfRG}kcSmdg!r|;4DUc>MFSH78dGCR9TOqs0qyHyU>W(ddHrU$`T z-u>=JrJe-FyO19;>1K05Xz!!(>w-=L(N}NMx}5A;)_bkBMoO1JYnwIlEq+2#V5B^o zPO(Y2PGPq3WOeriiYnGJ-~wnv_V#H6tZop$54wW#M{1gP^w+-9lnDxc)%VA2;e+XB z^YjU3V>Cgikf1K$`hVPa#@_VHzE6#kTJ-U+r)_ePaN#k%iL$Se(h;2&^W)e12-^F= z!nw}?k*EP5Yd3aj2HQt>gONWPq(5)?aRk|xA5kWI&A7loujuqF@?}MG+G{XnvYL2C#l8ry_IGQ z1gp*#l(w7@6am_gC0TbuDrQQnVPL+MxYU^gBW?Hx9z;@Kx@^eWRGntkl4)|mLUY=a zTI-NH_sa7UVpf&-*Ax(wGnli7kABvQZT?E`&l)FLMyy$;;gwg)$}9l*1%^54@G}8G zYgSgQAf#8*HjGYtD8jJ29YuyU5unL$sN4r+hI>oMcOO3J*hcMx;W>N&Jl}4@m;gw@ zW+?A=b8|E4sN&0)z&ge@A2dkBsONqw$?%84jsaG}X)NgsCc_gDUe$97vZ^?DSx!fI zX^7N1h>X@es2J}qFpYG_$kr@m>rla?6&i=_B!<}=huhH>KsdeZk0Fno8k{M{4sdTS z4vma^`(F4L5eh^qeV6mHVmFM%#!*V9W(Mq+bM3TZ?j2ue`bltoS*gtt!qysnU#xNlRg}{oACa{okZ^sV5bc`G=47#f#{ij z0TK)sQEArdp=U5s}i!SQPG|5K;*^V%)j!ong3Rc9!v=!tYZq+cm?}Go+?T z1*a~<+gV$5?<^wK|LWZiGVUbljzFUA(mj~-i?zp{6m*2%JGtWQ4V*xv2OwJqC_tFj zYbBmEGAn*ZYRIFTue}Yuua5BHSr_3s3+kvB#kdQdP;bN)WjLm; zDz{Yh*$B^%(7h8Q=d58S7z3Y)rFR6VTUgAX|EoX*{QV^a z3m6h*WFQ6jENS;v>OIR237ep$CKg$_b7&h@mMBLQ{#@Eu;_{fB>Eojv68|H2rdyif4I zFh*Xyg#3RQBTl#P{4b0V!*~Dd%cfubQ=jn4Y)eeYm~xjaI?9tGFl-1ZWOV1g)6M#f z2NF$t`>>oxQNMHL<6io~LI2Sl+Ir?t|jOr5G~LgyE7kjen1Cp7PGB`s$t-RqwB8 z@g|+Ed(cp3_LmC_ip5XIao`7YNiGo8Pvap-;`ByHl6Ch+dR1`#7Tu~_!Y<9HBn1h=r%}G)A8r29XJ`1%d=g9c zj-6_ixXZS)f5g)0BnxZuC*Y;NN#E=pRy|j|yUXrG;eTRBJboftNnAL!Wk7LoRZ@OF zf-p^^AdgNhM9#C#E{ijf^y0&B&9-Dmzmb=?n)>c34s0!)a7{G6@_Ojv&k~HbVsmH> zPp)DGY2|fiZLX6~bjTqV^riOEINLUpQWr5df40<$l{g1!{3HI@m8lk(OB_wi8~@C@ zXzdP27izU%lHhH?X&i`lK$dGgUTz5n``35FD#n!@WQfHvsTPAr!JAX?Z)5N7c3X8! zOD6K|cdDjF!C2b)xuhfs0vB8#)g{Ee_-Z1rpp!7TH0`pgJ7XdhRa@!JontbOple>7 zeb20eq;GV2r;C73t_d2KS&Xs=jFkm($zRMSN;j{J^E@g$J1~J%*2`FxUhvqCuf;B5 zlE(YK85b60amH}TCw6QeUM#H;CLS-%QsxwYZy#cs$xeG|n1Ax50`75D(l@hNW4}pV zYiW0_=)&=YH2GOyAUEc3zWy1=i9H7N`S(leV}6JKIDg)*1apySLsNIvEs_Ywo#dJ_ zP0ekdSUR6+AD*zR{Nw+egi_dFDl4w6+@yP@tk|KcJ;(wY?XHD+yOb?FJ%$~>W#b6O zcH6zzeMVf~f~Q)W1$OdOI-^o5tozEro}EG;0I@KfslL6G@F4_@DtPhi*=;IvjhD5s z_cQ9nTRHx#t?I;wFQ#S+5-lgn6d&oIk4Cb^u#=iRY2A|^<^M5B`0nxuR8J?f^VdLf zp||6o&{#?ppj?z`Iy@F*t+*ci(@cONxW8o>%L$VZiXCZK;xk`X*w)52p6~3{1_W>l4^f^TJz$7O}is;dJq$q{#uRAAUXCGNly9hPU(&CfU#`B135y`E25WA zM&;hN+NETk6YjVSoeQtndcMz#VV4+Uw4hqNHM?NSN>;2A+HcX?*Ndxa4~yT#(t|7U zHlV62Yis+ocL-&20*JuOyKC2PY~jB2#IB8B5%u=-o?Du;6u4=)5+bn4z+{HNZ97R@ zcR82hST?SlUd_;mz2MHt#7^fUGWSI1w|BRCcXfHvxW$6cTZ$V-iP*;F^^bkdV*MQI z{a_95{;n9DyMM~osi;`s4{=H%=r7! z9Q+LyGJXhbyXGT186WL$I^D6};+a=nT=>HtFCYs=2jF_q0Sf zvbRT!x8%Y`VtAD4UZ39iZ7$iw3N-fqK=}Z#xz+Ll`tLoqET#Af5hr8(W%|4@) z2`#s&^6ReoniK3=BL_&)n zyThIoAQ~q7;`hW0;0|{5OeJS`vo${DP6XdW+?)7NQ{H%Mo(=8?>~uq%JvDQGaAtSy zqd-S$CU(mebE@`ca1Nzc=Eo3VBDF}dyzf#&L_Opfs5#l!zdN^mn9aW}eY8$=)W_pD z38s|s7%sW9`dddZL60KEI}rZ_YsugCJhe0&b1y_(r$L(@-|*L~b7PqekZ74c29~Q; z0}Rx@ly&e3_lZqGvN7`9KWCU9bHLVYld}H$Y`zZ6BLPLn3~?NoOq=YnHqgEd1$G^S z?#|Xq5OI8&p1yfcIQ_n-s5@V$73A1sK$P3`)m({JQ&A4jO!bY-@n6PUzWsu=d>4~g zF8_(?XIi1Xq{xNR-AqrthXSK8CDf#^yJ4nqrNdb3K+vNi6+-+Oob@riIVln94tV1m zFDNEx+tH?uSy-BTKa;l7V~O7F*iD`qU2{V6gTx&b>9~>|^5}2*GZKFHQa#Buo~@RX zHel;JqsLzhd))Ox5w((YiG<@z&XUe^#vvc&=JNq$ZD93Xh7cJyPdXhJ6;&xdii3-{I0bf{=WDTkkwc z$hBeynW)q7G0$}#~tn@M&LluQowi#`-RLvff_gXt!M} zJV9ZbKgOo_(2T+bPhMOBONopF#H+*4#11Ps7rIW=Q)&;6-6F~4524UAQOy9qLlN`k zP-WEL((~jSTYO)#IO2SK=HNxu>A=-^=6o|FN}S#cUqfR{lY5IA8sg z56d8jB4&FU^j`QQrB3sDEWu_zFkdHg$?w@S0$>03kRVqPDPT#%#R|V6xW(denAjGf zRH|@j4ucGvf=Q5+?d_ajS1cIfLjY{BZ(G40UH#MSo|M##)u^eht!-Aes5Ipw$`Xty zNK7=GI|NN)qqz8($S!24u4%IVa^~LN0|BI0;ppL6z#Ojk9C`V}hv(A2$f4maqsWlo zhpq7vtEkxRq~zU+FLAY{gVXrj2&&UPzjWo|e$~+ThKNdszd_MOf?mlJZPif?f4%Kq zb}6w}7)AbJGZ3~Rc*<~buTZB8!7$kclAgUOU7iZbW!01>=c2(AabPj81u5VP(l<(r zP9hr1SC!>p60st&D!?XgPxaS_C~jcv{KZWeQ-C?x+@1&<;#|Q*GFP2nBDi*N^WmS; zYD>i3)yhR1cTrksP{YLGyuyq&ujCN$-N{yCb3Y;K1&$gU;N82%s zVX9{w#Ouh}NOiD2zC2K4`=;gY)1xPNYktt$BcUp@KA27`g_U7Yr4 ztjn(v;7oAtmgjq2=saA!sP4zUmszN)lbgXZw!=HVR1>|+rv0Z4k~sFZ z5>3BV5bML&KM>5OYZ%sxLNUiBVH=?h7$(0%4t4EOQ|ooK(h0ll3ru9gf4L%H9)Bkj zpE^6+*$WOGHc7pc^G==+EH(}KsMgWu2?oKX_JKOJv$wZ8!9+{zUB6y+*;{bpV1*+n z8u|ADzd{wjKu9I15aTQx-f~1fv!8?x+vv=G0vqH*<%+8#_=tu>?xigy?+nZ@ zxI7U3MAa$AYr53N|4nPjQ#?#9g|*d=Ti zTA1%X^C!(u*52DVSIgy?FrTqZZISh3ib&N}2TVK!oVEo0A%sj`5+6=g191D<#kT+9 z0wl4B>+H%UD;?H~|-6h7JC;HZjigk8&L zaJcR^Y4>(b3E4e>tMItGQd*TjJF{`Sa9fhYyqS6MLzBvMpjgo)hD3TBjKUx3=azhk zIc#p09##;#YTl0ed*1NPF<*5jPP5*~ z)UUdFPy2MHX(pX3EsI7Oxf;r5BUygsKKk-4MlSF-s^Cgq=HZrW4)$mrU9uTu)crkw ziOwuu2l*(lC}@)(IUz1hU*-v`ZMZPfM>OK2U_|xtTN&_^{0NWP$#E=)0bKVel53kj zRjsPS)bdYvlfr`U+)}Um{|}3jD`v+rWNERfvB$6}o^xyDN5@9i2O;8ERg(Sy=oV}O zWoBV{55~#tuH~lkziz#Zr>^k6z62T{Ii`pJPQ4$2ghZsgfN~bCzyeZdz!NgarUGk4 z_xqf~#xt(yc{Si<&$eHikZi3U-ul?a5LYjS(Nh)cs4e6yXY&-GLB=La8?ma=5 zRMd~}0n3<|X*1OOv!mwkP?fMOFsgp%=JXX5%lPLa)>jP=*PH-FE)kT*oSE$W% z=c?CuRq*d~=VtZi=-bq76r(UC&>FP)^`qF_toL8Uu|fd{A2)w5GsE?ib=mGNRl|Dv zm-i;i9ATmX+*Pp8!I0}TpO$hQ(QjSO4GwL{b&jsEl-j>Ft2ptUk&Ei*pVmi{rYgL; zqhL8cTh_48uzPfn6RQegM#I7~sZ@71zDj$iAtfPs+IEFC9wZ(H62_Y{gCjUs-7j#* z?(aUHJE7cFFS&Yv!!0|PBgm}%?)!uXm!fe3pwMlRv`eJl9&ebD8%pF$W7QYc}klza1LVVjkQ*>WBy|qa7Qf@oOZKV!m^bK&1 z1)O!UGt!<3RohqI$H#3)detL;HkP3ZD$JWwJHq z^S#c|2w*+sWn(8eb>XDL1!9$uwYwjBbaOs8nrjr0wVSnqg44pN}r>e zs$yco2s}`g!k`(io{3bkjYNwH3=g3Q3YRtmB7)_C&7}YSBrz`bW6!}A+2(WA)5z|4 z-jAR(SQ!#our);C+P>iV=AeCL&;Q0~!)d9Z;^K+Md{ zZT-Tto8uMq;$C4jGrlAc9v+?r6&Gul`la#P8ZW}ll9}rU;YS%*RSsb4I`w| zb_Bu`^G(2dp6$*9B)=-nT(k zZ-_-0i@mB1{ppk~^GQJ;`}}U|nQq$&{zzN} z*ZJv~+P9`|dg;({7{T9#CMAnW5S}YOJ|FW^oM})we{sgTRlouVMx{O3r+#jKcID3P ziKJ9g2XLnD+k01fV`$;S(^!W8+pc84z;Bt@sj3)>X*Q>uCfu( zdm(@)vI|6t6GqJ$yAZ=&Jgjnid@Is*^=~@F(%_OfG49e`njtebh*@{y6|Y!qb$2wn z*|&U(MG=Zf>$my4<^gC1z>qxb{6z+LjAg_x|wSeS^e6XM93tXU%}u{RsMN+pE<{k*Z4?Z)i^@WMD!w zg0^~bl;J_ht%+j*qtr^4i$4qKg@&$uM)We8t@REA1b|gHQE_mVHEi`fZfK_$U&qlcmfrmF z&j&7}?VuC6feLLw4xACvS-xiXX`Ejz!X2JddirG{?#<5OQ57xWuzvLsS&KaiA*&N0 zeDdOijY7fe_vZZ~C)pg*N?h3*`U8Stu{Ux z4;5<16*Y2B2-_)x&w-3aM8Izy+6{9J`u-gUriPl{YuLySh}@s6zW|A0)MK`RfogU8 z?KL(3>DQu~7X?!R5$n=(I9J#y@`CytwJTVR`UpaMhK33J<-8LvVg#xsCBKp@(2*;i zR43(~HzYCaeD`9SH#I2ZlH7ss&_Elc7V5t`RYVwEFa3v3F-LQf=ZldeBbi5Y2xWRt z(nLT+Z3W)NE#w7&^xofd1?!drI~ssuaPxVvUd9XkE24Mrj-Q?B;$g@6ky`?17vo8} zIut}7hS^z67}*n%;{%npnn0t1S26f3$6J5={CPf@Oe)Q~FJ*~46$~o}3j(F2MneQ` zhw{Gm9B|N*{h&xyD=f6gQojGrC3Jo^g<-k3ziE?Ot6tp3ItT&5kQuiACaZ+dBht)% z%LvKlNPMU%-4Y4V;Lh6VXhrE&n`y=S;8#8XasrT~UJKk`=>v@CwWU#_4R zbcv7UF`fyO80tF~pv5vh^z}s6gQ`bn(tD^pG zLiI3D)<#FfK~KzgbaFFfinHlg2HkwN3p*Pz$A*G8*b#oKU21AsOTgt2&|VOGw#OIt zFqiU1ereeY9@_>kFKfkOc4fE#Z4vaj=3$Ka%s$hLOp^l3vwbLMGc zH)}L83z{>v)41j^14_3MLal8a0BS6c03EPuuP@xipEp4X{{hfjhTFn!m}4*N3!UHR z9qQ)=37Mx2n$2BL0ybkx3nJm*NM*45HWFrs6P~eXSt7F5P?@;FIv3UtIk$_8a*QRQ z3BnBZwZ?LO87QNWPA&S)Lp65w{8G>JB5uYr-E57ou|Dr52e0Zpi3DT^{7!Pv=T=lu zAciE1LziQ8P0Bn77|6+GL7?gE>3Na)IN*0BNSaf8*B=8hG6b_ga5PVjN9?C;-b64~ zPG9k`1p%(=azOHR;EQOr%>Jyb(twp$N9p^7l6Oyz<{nU@2rfiHG ze3f1=-v>F9DwvM?lrLcas4^rOgby_4c6OxW5(CA>sPo_Kt|1pqh#C zXM=-QzqW>B=L;srSMrZPuPPVcL~D#KGdA{aJ!KksYe4szsgascW@j34*HZ@!+B9Ty za3_0fZz{_)LZI!nJBh0?5sQ1gdITS?SH+{f0FS-FXh$Jeaboa8S9|@U-TV><1+j#7 z2~nWAxjBA$E0BO6pPk{?WUr*coqTS)bwxi8qZ5e$tqymTKc>v8hqf7aY7h7rzd)VB z#>u%@*1@=Nyt}ZJgI1C-L&kCe>Kq@?bbNNC2k-V;1+Y182mda4z^wVK)Niss?}g+L zBl}Q_!Vsf^7NY|AqS0Tn)`K;fi3-~;&f~RkRI*)j-1RSo;LfdCUt~Twa6IAAHTeCl z4A$uasFMH^uIf<%CGfxC>eM zE>(uhG4Jl~0!cirh*MZ@Zf=_=sA)cihl8~^%N@8;%qjhHwC@QL%-d7DEEBYr2d7O7nG z#1a-t{EryzpBQPCUbJ=zd{tWqkb_VllZr}6=mIPbAf96sOD^f!Yxt4?X?Z&qWwB_I ztyapsreGJd^w0D7_XA8RWnIDZVr#;fq*Fk)1Uc}PWP+Yw+Y^_LX|tmNv`YsrrBY9<}lR)h0Dm0D+Z}krCv8UsF>}El(u9&iv_{ zJU-|)Uwr(LW+ufb`5wZAU`PJ1KmI%$PezdhI|jBUwDlQNolQRUTP(g+Y5I80v9;(B zMXzd!<^S~=qlJti;L(fl z0I~}x+wqvsY|GnK4>14$^=}*s84Lbp=VF~hWA#Kk8dCgriFF*t~@!Jvt z`&X~Ggz_~10h}{1j6yYg=|0~jV68<4ge@dg+<0OycWG|Zw?CgOHT##W31-*mUN*cI z_5OD%jY*Jhf;LH)YiFV9p}rvxOabRtfbFdjBGm;4Sf z>%A}3Z-H0WDJGa5F4QHgrN2CL(c|vn=O&!e{1&p9-Qn)Brj_w1x~cPtIoLA*)F2FO z+sz)JaSI*~EfW(Hz|vk{wcZVr1wu zbk<(T%U4)*-a%r(wdV6&*8*VXT7M=HKrYPzx72H&tq%-v?oyV$cHhpRiYV}B)jb!T z)>jXRRGu!8e<WkjoJOI1h}&`3B^C?rQ@q0Qfrw zROaFVLFyZT<5q~K6Lv5|&on(`VcBgZ;o$|)aH7vxIVf}820@yUpuU-@|nRUZ<%iO048Stp@$B3jk(-;3%a?&_KfF%zc7i{p%(1 zx1#o|k58WZ@i*Zs>K@mK`kbXjNG|f3k$LUwgHt8i+}(*Xm@&h*Y+%_ zf`9isx?RrJ z=={!SUKES>Ci?pNu_V5FdfzQmH1f0lE1MJV=HTKAZ)thj*x1N92*$@Ia~J*-hx7DL z-W%L0&AK(7W~)r~AK_AgarfqhwR?(zHa)dYVYIhTo&&eF;$V(4{v6#`dY9s@Q|<%# z37OP!V2Y$)Yy-^AjUS(49UtrE2-yw&c`d~f-jG)NHC83J%|@_ZFv8gqfAh7|f0xL# zg^*bTUqu=>NT+1RMheM&^za9Kka7ak5ZRoWa<#;hBb{x{M23L)if!EdSi>?=T-QD* zEZhP2J5wVcK*-<~@!X1rg*{dl7JZzfa3a_dRejk_i1wyI@d&@Pq~!D-8eCR>0RepX z;)V$vC^+J}IPe_`Ad3X%2!=T^^YVTH1B`T$kyDwjxz`KHWfg(}w z*AH%{$v-E$1j81jMN`yRp`DMtecVku#ElHlp zRh0?bHfP*t?#x_tg6ZyqBit*XX0^U}PoA}6#8T-c>Et|qyQ=Fnvh!EB7ETO$*Occb zwE9zP2XZp|>G zeGHkJQ6tB`_K=rfI_gKy7ABsvEc)4&u8etgL^<$y| zboDB6uKL8bfIM%!GF#wN6y$*n-2wnm9sR7ey-u7jSC^Q+_hlgBPPzw@F6DJ0$UPyB zsH{R;52p!Qz@lm1?;ud)&>kw)4zc?A+HxRLS$T0aTLAL>onVHto_k^0K$7;%Ax3HM zg_Z&Zmkho1DtlzpLPz{5L!`8#*Rz!2%H^MP??y^Om?KK0KgH+&%6`<(EfMq-HJ_5$ zoBJ#3Tg7TU)ep{$&yD#Ts|2t zH<+0|ixkJm%TJ99MZNt*>b>5(s6Oy1BanABaUW@^u{xusra{T!^X zGza5az)BDS`w`9$qzqsX7V*<~)L#%UjGBBYigiNeG_ijCe0=@I_1cTjdJx86+AXp# zJM2)kxNt#|7@-l#_9M4EQBCiJNIFL4^|8C!a~qkhqw?XUr3pdZ4@q(5AH+NS-v}rJ zc?j?b@8}^-o$lkIjhFS`)(=jvym&8hG3^H{v!r>YJ8-=4 z%fzbqBB$W9;cKyA#wNggpNniyWtLp|JZ!K9ZPlPRuwV5ySb7pp@B(3oYcUik(U?Wldrtkd}vd&fz%;zO&P+88-o z=VENwi^JbqBrKlBwbMvSr0(N0Y8DM74_)%S?VFgE8DINvtIi zd4W77gzGIM5Jw#N4-xO9uM*Su^#yn``&>x9lTk`!uE(f9$C1OEz4`GhOKZ1>+k%l7 z(9cUE*K6Sy1eoneJfdjinrV8057g>)fH6MA&caymZML-BTf!`R0Eld@uNXicJ$!hJ z$rNjt4Fbi%_B36xiuH*<<()e+N=jjtC*Yg6X=pk_m;$anefI3_@c!}VV?RbnC?(f? zVQI4)hcUQN+`}`M#$sl90%vSkCAnjhr7u)U-(oa*vM5)}AKQX{5OM7>YVFf22h)84 zN%@|vS1W!FIii3X4~+d&O^pZ`kP0^Q!~w5ASfsxT7BR^D1gmUl#JxDdT=3YE06F_?3Hq+$CwNav176i1K zcW7x9K(0d#L=jB>2VWH@0pZi>a-Sq7Dcck5rh5aVCLlb%TU{-%z3nyI`ij!=;&zNi zC-8yCUK<8-2e7~o&wi>DBV5jW_dRJiAKYhCU(&Gd!Uoutk-W4j{+GdQt zG&{_+ytDsvt8u4gACD>IZvt%q9Pa3vl%&m|b}d03+X`qE4{;eOw6D9HscyTGxf|l77M< zEnxz)TIYba+3IiFGkg&Z_z8@Dn|u*qi1l23Ok4+0%Kt$k&8KQzDO5m}RiIdPHP9FU zqpO~*)x{{8)R0rP(&osXS!V+cc0XaX7d3Xe+A&e{MA&KSyD9uUCMk)Y+q78;$g2nl zMwt$hrTl3F@uE1e8WcZms1$uD{iXrTl?0U?7$WfQ{rkCvg*I_?qbBS59sJxFF2nkf zrVKp(t{_bh`lxxsc7>DzFDu1OPJY-+9 zoc}M~jaY7f3gRn{w8ASp?V2h?fzyw{bt70GC3ds*)2?1=PJUK#`WoE3#_ruKW0C=V zCe6RHUNHUe9e5P$WO*S54U2@E{2BOEq0WBSV$o_BH&OagS@G%HAdnL^9Uh5xMfESjo_p9mmWyt{aNx|}eI5q7E6o*)Z+%Adf)a2YGW z_mFQSh8ZRN(7eQ-iG4u4rWv&6^`G=4noW*LUDzmoy(eqVUg?kPjo%mzRDqV`T9_*B zj-#h5!CeRKu`ki~Tn76jRd?4sFWZ!$O0^VXkY^(^hoiUUCoj;r0BrOT=2V*qe*BtyS}?k6aC!QwTbLtI!}vlY4k zJ$9W9fZzlJ9uX)3uY+mk!9!81*mmG;GhQ`aE%IG6 z_ez%fH3~1X`dJ~6G`R|Qzc2LlK~u`|&yfWsu?m01=fXCvr5#XRwu?Kiwy%C!8|6SZ z<`=jmG3Ine_+HqPwxk4u9XAFUv)B zF7IABibTbxhhIN9Tq&)PUz^V1fFPJqvZ>YTY^6;v`R&g|1aB1dg)l!N+?h!@;T)^( z9dNt3nJQg)E?nqLY ze&%(Z39T|~clHtfk#0U>;&H|aESfg%waHi7{f#Cpd-O?Bbve}4S**!%)Ai`o6I7Mn zM~To*4$dD9Q{%|EC-L4XpUA>4-D~NHBn@Zw@$95$Tpda*>6sW1ZhYTE8C(6|;gaaA zBeLmKJFa2EYax>SY|M52UaG_{yL*6%UjEYAQKS&(R5N2#=dV9AWCGz=VcG*jZx|I2-B9jg?}Jwk<@U_?c(xEZFUOf#8}jR z1eb}4iE!#L$^Bo_%2iN;t%5TXqd?(DZQJ|_f%SP34$#+7X1mc61~B`01u!CjfbfUy zU=I5Rw>AiS37r-)rK9G&)Bw!=u4KuC)s6+vy47?6DFQFb@o}NXKpTUx4_1C3bMW7d z+$ecW5yqtqIIpX3Zd{}`osmWsC@?+2CC49_yt;P%{^dI|JP(v_U4DK`UFUgA&~XK4 zUg1Y$Xz_-@k6HGTypQ)TWeDEB^<3>K!5YiKtxJ~&{NjD#aG(^o;%5_)`4lk1a5d)7rlTS9kx$cr}nI~`U=eZn9`R%Lwhnj8gOM$WKJ!?bd+o5u! zUfbq0-}tzs;@w}QY^BHO8UUH-Jre}o?@sme5z5jJe+%W-$O-dF4NPL&vjOrUL5+_UtfMB zi3zZ%gHFOtsuUarE^cu!ihhuocnb$Xt9`*uAr+NSjrUd zGp-WH#Zp26B?Xf@DSOlM8ly3+lE&7@;~(6Pa1@(NPZ2{-MdKATa5`C=3slI(^JmZA zf0mb5v9MW@iuEi-Nrh)m!e>`x#p5yiH4mS+@2wxpe zm6m!x+>-Hr4sZlewScALKnx2I2Ew+G1`#3~f4k){x}o$q%K$O!kiR#MNScS=|PTqGJdm2bIp}0{eT(@uKd9Q6R4%=Fb!FyUN(#d_`+#e!MYp2$W+o=w%o}c$~WbkNUaT*tD&0@LOxD-h0I_z778! z6>2Kts2ql!V&Zo9cP()_F_%wI#j6-++YK2lMo!xs>n$ih^}9|D?b9AvGgOG-2rDjn z(ghwtYMuXZc`mCv*x=~5P-9x`lWlJKaH>F;T>qO}Lm@mOaQW=&gsl$DAg5qZbq_g# zd!Saf#_mi4Pw~+w((9#BD*mH=#{tZ2g$1*wp)h;JlwOj49A_E8#+ zI&b;Bn^s2r{^)eh?mctdUM#iLa!0O6JvLsOt|pm`KxaDJ=~ntO@B~Ok!EsyY1C_N)E%r#M zxpN^BZ`Vyi#uHq=t0>V8k7eA<7vM<+M*k@`@co4qp%VybIDV8SjZwf4{5m-~l6Ts9 zH^Ntf`Fyayo2~6ZLTbT~37zcq90|GdE=?n!t%Qx4|H%zrBFWZZnc`}Wl1Azsu6$1B zMxCd>`X)nuCi_eF1;fH&tmMs%^p-5# z88pS_cgA4eREeZ$iMgTlwmmEE=1K?=QC{Yjo1`jl35y#d5-0V1Jyb4;hK=g5`g)7y-9Tchneb|*g(a-w#b&3#P`tsDLZnqBqWY%ky1Fz&MU$C%0| z;;7{3l3}KCzn6L8q5i2t-SKD=T8PgH$t9_T=L@D$wlIqxEf1q_i!XX#+)$=SM z-8xZ4XQLD?65mEu*A`=Y9hiSHOu|YbV~oxQ1>$f1O48i4Rn@&l_%-B3~kMr|YLVSd6_}ZgJ*&CvyQ9g8M0MMDIj5zxHq2 zh*9Yy_r4!Xf1C=6t9eZaG5T{L_o6_0nJQ1!cgC0lS@I_<~Dt&l^_c1Z>LghxK!(v?0 zx#)YM!auVmCK@h+eDctH>k5TT`)aKb8%*oY(h*%mYMyf`mi+NR78k9nD<&xun2VOK z-m_K(4p%ZhgbUtB|3~ciNTy#Me;qTVEM5Kb@oCxatXGSKMq6xKgAIeZ6g=e^mp`|9 zR+hdsnY_QiZ9Vy~0rq`m#FI8?yUloadPGoaCMFRGgmg?y43Lq%Cm?jd_&O4opaX<_ zvR_C%IYV7OBhoO??#*Ki=9Fw^q;VzccaeM|42Etq%Mf z4|+nvj{8uybcrv4S=$ViH1);M#{EhoZK~ZT+@C7M7`Y2%mQXJq;Owdro-veKAio+{ z+JCjE>II3@n4341=7(_TW7M!O7YaRnsby7!r?(L1k0FqdT6cnMGx5iEGC;~wqqR`ih(YZBlV(kO2Q)V(xBYT3%4+L0$S+)N;l3ha%-vXJz9Sx9Og(| zoKR)f8x0qak(Xy=@)Y6m-@lK9VqeCnD0aN7_WV#=-JyPVg?eaLLFzbwz?SQq4X&Y1 zHMM@t1Xr!83zgQ%b!CG?fl|=DCC3-78D&9`-SEsU&SxVu{jz zqaoran)zB1!s|{chEIagUgu$Pa{%yXO)qv%h_dQ1|y>GaQh-` z$kog49f5S;_tucWV$;*~lIa2y{cc)LG>VMHe^vPbPaEDjdDNS_4Bw#STPVo|DL}TY z4BhXerU)v^zQy1RKT=SaAgh5X+t5&Kcq9+I9O16;)|&HJ1t40Se1pXg-yD5l0diYr z_=Zq)>HvIU;n0+vfnMd6CCy{k;#&A0y|_IaG>+tl7n-MN`Yd#~ygjm6+2iH^L)Kdd zM74H(!=Ts_iijX#6G|v0t*D@MBMl<}H6Q}gl0y&S&@l866WykwSM)YVeiQPac}$+%JbaKS6`;A0Q(i-&j^6)X7Q@)}rZ#dO9njRXyqMjm zeTuRBO{PO7>iw%Kw6qY5zqD|%f0b_lOOYqSeP-{kOaS)QN!^?zBry-~@4wiVL<^83 z?+$w6WRIA>w4j9d5`RnfvY~u4s&Hl0m0C?(Th`rO2%sN;Ez64wODkK=cAXxSC@UyG zqBISxe{q-!rJJH*aOYWJih1WGGYN-8xQu9Bu1Ix1lC!A?1;*l%tsoN^m*Jn-GBl3wI=EKF?6{9s%l zfK20ns&VLt!2y3l{tTzPhrg-!F>@`3pD#r=evL*Ku#sE@xQ>f9?(XdFRN@Z=Wx~Fo zuBj%kX!S+Qq>Bkka9Ci+P8{7HRBcUePw9SorHV6h@OPHaNR0Trcv|g#g1~48!rEN) zsurgB_3I*Ug3hA*pQtlEmr~>U>8d-$G~PBOu}gH!(pJmMd|AAE%Lz}IhzMPPw+%Z+ zebmNjIAR64eLIKLw(K&FfZ1)u)tBcKWuG*pt`79&>!-CmRCcDU9hZv!+-K{G+i&j1 zrEv`7FnxW0w;ksSzZ$nV%tmat$mje57twHfxt>QhBb_!wEoQ;i==9)`UC1X~YyME{ znR$}zzVO2zP=Wh$a#bw&nnb*x-=jAXxj0Hc#m8U9CZK0}+cAM%`4Ajiv~U7mWa+#- zLi=<~LvjkA_3(V~ z!_3~Uq1eS-^wo>Ftlhmb=Ky@E-y<2#8w(5SsLn2tLW-G*xqSZu*Xy@u=JxAZ0|SV~ zZ){ey|K8abR8;Tim`0ukIod>p&P*0WCQlw?E_pRG*?r}H)9^{+fE_(!ce}+Z<0y&f zyh#}Rw4LOc(~TYK%%DF__q3K6gC7w~s@CX?k^7e|p^@i95qu;oyXShbT+RHIBet*g8F!2ei0M7>X_>vYy=zh$G4t8zK2_^%#VI#enD$X9V*9omUk@mmc~_R>AlU^h!xL>q5h<&Fx~_n}SG zwR+JUiP%|s6m4fLr{>Mo(U8V8GAlN9*-RpUWsT8o&T_DtLeD)g)pf?)w8+VZy_lWC z7a7dkaDiYsG=m#45Mt|DI_ZzN_*G$%&1phzPMF1`@KprrPLuNMYW{Yw`uV-Gie;hI z0&&Lc6rvmx2{8%E{ss~D4n&ThS5OF=7dzMY=QqLvM>9NlL=>I}dK&gDz?O6|o_U^} zNAsh%2e(p8LxHniZVt3#HD^2kOv;P#uS}$cqQvN%PLx@a`E<;UpHn($XB~Z7@%!IU z?X)Yshhu||*joPR=(>9+R`k;+&Y2^)sKyy11C(-L>;R&}jipR7s@O2^-MBlwT!1L@tH0VB0?J-C!pL6H ze6065)xR7-^0$Nv7r*xCuU$-LI(96?+~x8EN+tS~Q(ki)sfAMm(W7cj4k_ zSUWyr=-}X|EPk%g4tJ@*cUMYQ<}~TbF z6EnBv^Zvtm1ZO9lMj?@JuHL$x*B$x4Sc<$hQo9>sZJqwNyJsrnG08o-I(%dl$Dsmq zHZo(e0Nl{{A?u|r2`0uAbva$wmTS!TL;UCZW6#(Tcc^5iSkFDm9bon9!Fpu&^k!WY zzOUg}IJ!OtKOH2R2%gA6s@+zi$0M6o)VU2@ZIENA85$xuAU?Pbs7T#5d=Ycms70_hy`O12nDx0d z0{lr|*8g^!=7GjkI@c*JX2!+5;bzUF@a~66>>rL@5|@6sA=UNa_8;UNL-VOV8fPg# zNzqp-*ieh*zM0L3nTU?R#8G6*Ar~Z*~18d#%+&5uu z`Cd``ImSm`cL?60$GTV1YifBux1(ttBA@g(bXfPckIYXWyLeyY6X$WDTBqrkIfMt~ zv2xI?<8|1bO8H}qBbFw23&%N-#aZ2vK;-#ny&b*SUd_AtR6SSIs?P?xv93BH<`PV^ z!q`_f706dO3h5rBUen=Zq96i4%}yb)T6geEX68oSL0UR?_AW!uu|sv>cO4QIMrMTw z+77${3whl`A>tlOzktto7w|N&ekarP+?EF4HM9A?`S|e+WLqeLhp}#rPl>!+c;7tyFOR)8Skn)Hu2)P*KWqq&X{xOwv{P; z(qz1zF{drRD|ktVa8~j@I!8{d1GxyGoM7@D?vtm#Z`3@lA?Gy}R`xpb^<+|r%IVW| zeOb+C{=LXA=oojc1cCR~XA>nlm|MGJFs_i+q>y$uO%29`J`_9Pg5Mq|5ZvCb2+DBP zGcQ4hMga<0_OB3IOqQVlI%7Fx+4JR5_3li9m$x_goB#N9Hz-!zlNX#P546{~0Q03K zf>r!=NJ#zDTc-Ty9v3}11f)C;(3}9B@ip8QveNC&9g4$2l<5P-A45}bFZEaXY}t@Q z>%dEuuX^={K)ZZ0`b2J9{2c0lam(n>FQNCVo`00uSn&kvn|D`*PqReMA+ z?$*oL*mD#Lkr=?aQs(BYD9B!g(6~zzHb1m^7x=%4H~Jxh_7gZl%T|2+5{&fIpY$Ir z=c1*|7s%m{Ul`}CD#6m!bvb%ATdDtM(n6lFMQLezJI@`-H>%f9lgWQIW6FHy5K-vX zsLlRCOLwaM9G<(=d-spnX7zD>)G{7jF$ke#4-frKA@qP@V88i=O;*o!_h|S#>@A@T z*U#N{x^JICT3F(d$VeR~TGcd^6;^r>aiup76-{p6pCo?kNGbyHt<=l2p z*4mbbAU?WTx9$cXvl@jbk4j4YKY6+?~CB*D*O2HIk6jjTm1AZ1+sKfA;*E z;@NN@g&af!9D|P^Kd$aA7P>;pg)30rDROsTQ34gA6TB;d5_X02mxR(|;F&*oFwlD| zN7FrcUcz@f5A!{h6KHY0)!wdkEOq%gm4ova_`22v-@y^8iFbbWOgQ^yoC#^tFxI9~ zKB|Xhn{~YLQ?pc~)q$Ywz;5FSx6gsF1|%eIpK%P|ZM&2Iyx^FW@^Xrn$%V@*U1K_j z4o>8&IqM)zgxRt)GB(yx!69H-+$}_jEEMTC|I~k8|KIg7)w8~_{sZVO(37JNhbP7ohhkuY%+lowNF-^JBw|2zH@151mgGBm?cAVIr?K2z`%_ zw<^6jlofYN8hKapEoi>f6&^jhc=c-I`aW!igWt?gK=4h)n~MJgg1w*%?-)@CQd8rl z@)ar}3UwXJ)6QPCuA6gWVqzL~>s7jc|2`dr3_(Z>Y~0106aMQh9Dl7!+(s4aV6^`3}i39u(JENQ@5=ft;*w~xp|yF$L>~{bmic-{DXS@ zN@VCRcJxQ+1nroV5p)q*7pQkJvfRZ5Dm`rUsdsHv6WNAE`;3*ckntN=W!+@s&KMmv zMUxI&z8$Ej>i_nkndU>{@OZkL9|12J$D=^80}2awA9j29sOAlFvHDNDvL?5>)ngvr zWhRS$0Rw=}fzqVg@0Ha@Jha=C*~At1c_l|XQzRRVN#ohe+R~!!f5lT!O@ep|c*JXK z&JyyL%E~n9>FJ=ELor<&js}|l+OTcmO}jyUARvS+oiF`99l+3$^z zGOzc9Ffnd6n+yG6AfGT?vHQ2>0#HGA;B8&OcOcI}Rsrua`G5z8Ut9A!)QUVj9W!E} z1kCmeg zp?^s-Elul7jGx2v6#5^afYNm3*O%K+-CbIlE$8QZ?$^(9J2%Z>WYg&PssHs+k6ygr z`+0bGv8xr0Dytpcg|9LBoJ0+Hn5$>5IY%VxHPMn9h54PmUEf>!)T#LeHmAwI^FQCW zysy@%vVGmVeJgSi679nNXYw>yA~fw{S%2yJ^|1AImt1pjNy^g9k(FE-Z*TZ1*L0eRWoEsWNz`)C zci||7y_tTGloV6RUQacB3ARs7KDSkkvYvSNijeKyuZRm2z?rdc-tNxVVh&{Vd62?; zTLn7YBzg@4vCjAQ(sUDx&F+SU>u#qge(`WUt&i6F^YKsb)gsoi?V19$2-gbyP#Xtt z_yc6##ap-ETuk!O=W$$G?0vTvK_bTURpc0Y0>+@zm8{f&DUi18_axc^H6X3u_EES( zDLy(E#EZ%j2CW-IdO*`H*cL^cv2b^=e`9@BCuE#x2uAVtmh* zE_(L}RI7|<5q0wiW|7GF_*bP^>3v-Aze}R~jF;icNw~E;(~FQ=481_rmF8jRsSB=gp$Xs6%Zi zw>+RX@9uu4UdZ>qG%M(N@|<~@rHeo2NY-y{Q^B~S*_lL^{Ice_bq1}EQ`qIOl*qyk zK`=yA+jsgowtaA=Ka%5ij-kIW=y8Oh;a_R$2sYFX00e8@= ziJ&8ejo9`vKgS=S*la%wCZ|2Q*{|V{nit^Ygx#F23tC(hAboF5?8bH34&5#q(&~n62yfkzK8HG48Y1ppAzo()tmj?Dgag=uj#G}LMb^_5pL^CI~K$$ zRh`YR_}qLabAh_OAsoHmz2PUcWv^heGFPFYn6$wh#_k_XVJscIg_~GHxN3COM_)8W z6CA*9aa@r}xO3j`#?bb`6ng{tuf`OB4l9-~_*^q^mz>zy^L5%rT94gG29#gpbN(gn z+le@Az`*=A^-4eK-O>k%s#QtoQvz#=aHI8W5~tuPYbUg*5m{N;+xdq4idU|mymmQ1 zRe!a$9Pzl-5{no6AnjS`=)3D#8nEPC<}WHnD)Fzth&wEck1mM!c5fXj@JIHQ0su#b zQh?C-Cx)Rn)1J{ISus;XMn4&89;(soS>``dg%ooo{ddpbh*zW-w9>q7{3k9L4NHtS zAG}$<1rU6o(IE%9LXx3Pu~`Q`;36QUFp}>r^}cRoOymNHayG|fjCJMwX}bF{KQ)v0 z*6n58&et*Ii~qj(IXoSwkvn(0Hei!gn0>sFLpq=#k&9IDLAL8p#G6HN7HM{4z+P!P z0;0645EeiNx-oK&f&RKD;b)i``xi5r^DYX<*S5Ax<1zu+q-Um6w7SC&cbGaaD(scV zO55*Fe{a`DJH^oK&b`92{B-+``^kh60>x9tnW<@WamUhXG2$r-*_yQreCR#8B)x&< zdheF@xb|4uR!YRe-HNU2o=FTaF0F z=cDL?F|K`5J$l76zvJGBQBDL|Mvxc6GXQ9Y_H+>@=Ydg|PT?f;^oIQt{ci!J!f1Tn zcufe=TJlCcquiFIV4SK=iTH;n--c)wH%8V7ewCBwiQ@CRdd)x6fHUj1J(V~cvCw68 z%X!J~tSg%rLy3H&Kf7et-a&mP_m>;EFRuoWshs3q*P;3e9c*3?WILHw2y@z=p zgOtwE6c!A+dyO(8NxG4dwk9H`DNJs*wlgLHIbD0P?D0Zb?Z%=aakQp(8G|0$D{4;K z#ewQ{@_1V?SX-iHUdhoLe;cM}=Fn9s z34_FD=Q}fm67tn-p609Fxl}ghaefh1I13?X;U3lEhezw~H8@`W#rNz_td|iJj<(II zv(t;PZ4C&-Q6P=XPKp-ZoNt(gEyuNb`Vi~sVe(W9wixq{H)?B{DXPFi@zt8_np%N#>?&F(@}ZXWj&Db{Z%5ACj88>ZgSIc zPt8#5b(I9T_3Z$lWc{K7_1`wQ-wL>cbv&n+cs>n zUjWH6;t-5}$!1$*@kp7>ERgo zBK+&J322}rR0+^JYgMfPy(>**?&i60_fp$a-Ff4JKG!@=9>)Yw?H)PJe#-3Ffj(kE zp3+#IsG>a+ke~o%#M^f7f7H?e)Yp-X)tiI4q{*zAVg%F0!vjpUzl^SEx!@P#CF4xn ztvUv3p{bnO8}YA>yi|Q`G$r4?&|hJGd_$!0OLgUYZbKnh=a~vmxIYKh-#6q85?^kP z+OwV;jsJhyZ1d@;!L1{2b@cALD7N{az}vS3dt{uMZ<9Kfw)El_uzQZ*v6^T!G4Oh8 z>pJJNuvhF!a{jWtOMT+x=dV8mW>NGgPczp{e^Fne%1p8;I{UJO_?}cQ=AY#uguZDL z#k}>h3z$!7Epz|B#i3?kK=vQtLHe%m)?}KUP-+2{bSreP-qYbCuS1F?AB1Ouhj%!1 zuVZVklwI5kY|Fwy;(VHc;Q@HYkuw<~-+gJU3c_tFzd~Y|kvE0`WZ_mrAhE`pLybt5 zKeM$JUQXs#f!^4ADue@K66e!feOP^J&VI-y&hifDxO|HscpP)RV&lq9fd?uE#cfuq z0Nn0;me93o)$-}m$Hha&sr4^pZ>q%`2ZD}iMxjsuW31w`Z`)=NkJBkcxq@8=xP*Zp znR9i}LCelWkmDYgmR|haD780tIbr1d$;02ie}~C-L+&Xb#bYpZGc@a>G)T(`{vUrRn7X#|FGHCWmL+cN&-5Y zp_r`y@@1}T$*^*4x9w))X=(-o{Mf6M*}iQO-OC7GsQ}t-Y@0e|j*}0hrBs95anH`a zlrmW}2L9`X1y_E&+zUTW!%_C7ZbnRJq05juoAmWW;P>~fw-o5|vr^w^ce@Mc#|WS- z{g=Dch)G%oww!zLr*nbjv766_h)E?3i`QOWDdn^5BE=-=INys5jE;%Bv0vqcSl>J$ z-pPWdUmiK-i(~&=yml84#UHqD*rl*iz<=%}j2>S>Jh<4Rog}g_E9-)YNX+ZkM`i9U z5_y*8?e2n|2~6HV;=b^cW>ia8_fbGVZO6Hw%xQLTlr)EoTrebT$M~~0%GgCs!}Qz^ zqG3SWS>aa=Iew7Nb&RHz4fAWDqM*<|QQR})2%Fb4DgV8SQb1r5Wjn)#i+m1x+$tBq zlaZ}7Ma(^vEF*+*Qd^?Hh+^oE=RCPtecA_A^Ly7__h{am(dU(h9jCcmR{VDA_@bz{ z7PqU@XN!VyAP5e+JS}TppM?_%>bg>v@k7tF3#ZHIk64Z6y=;zf78IvXs>es0!i&Je zV7?k_qVONe8P~NByVAixwAfA(Tj=w#9K4bASS>s>g8HC}InP=s^4|Jh`#kLfV0!K^1;($?m`za~!EsM=cZi>+LZ#yFUu?+58SCW%M8>a**gjFHDhCAc;7a&` ze_ZTu&NFBBzPPPf5&vKE9B&84hm^<2#mSdZopS*Ak|6zlduHsp2-fT5vy%M))2$Vy zpRXRMsTwJkX%I?wQ{nf^{yw- z(*vHFYHEfLa9;!+s;8J^T2+nrDK>t$Wv+j>?$0|U+xWA25m|cDbRlGW+K!OO)Bf39 zb4=t*5|6Q)6;+XGw{}DQ^Xa`RC^s`Ye1nph=79uRDdA^}{m45yjy8~cS8oc}z*PZj z`zTDM_6j_W+Y81}Dh*~;c{@y=tR2}BFk+@=)z?FwoDNy+HQIOm3BAqfw5izDaeB^B z83QlD>HYApd7xY$ZcC{`l?m)RS`o@eY9UuXEWofsh}Bp3)eDlO%4o`{DVAK)bcq}1W?xw3BCC-B#^(nb7G}Q?dspQ~wa$(Zi z)>bjYyoe09l}1W?tzK}QX}<``z+b=epLs9|n636mCx-!zw*X>)1bxFg9PGBhD>9K>>%M>gqy+`1 z(?bx(0TrDbxdDHE+#hEMOAzu0;T&Ttq7t@Ev6Zto>(9|@PZa$CYW3KZfV~hfL?RpO zfXv*wtRS(!uhq4YK?mTXc%O}g6xa9dX(JU+ChXk@N|q(w8V>}>IM?g`xYUuPLW9+n zQ?HWp+TQ`vsx5MlmPgNXGG`LeZ1sC%438-C-Xfh7n|V|>I)dtnKJ${gZTwovh^h~D z@@pfV#`Ugbx9?3a^*xAt-=}_2i;S9Vk-u^9{+p!R`f4sQ;yb$8oRtTHtwG=MB_o3y z{H4n94uZ^40Kl5V7k~;{K9WuHqxZ&)?f`hva3=`cf5Vd_D7k=$2(Pw+8_*eW2Trf7 z9R3!tHq|I^V)6-tLubas$LF-i3$?B9lYaxo`2ennMuEHJ>K8KOQB(V)K7nQQ`@QUf z9Jl#q`Zuy^>W?31fomPt;h=g9K{AR~=cA4mcjo%HsHxDN!^_6%Y8g*4DH?CWH(5;t z<329^Fi8Je{Y9;ZuTVG!`6V?^t;bVVSL@~bD15aE-&Ns~JiceXQ4nj*T|9zCF@=fM&YOw$%w$3w& zW!`U^YKe(3fTflV`uo=NTh#JZTm~suqcE+zQ+^4H63zi*6}9IRs+MzW$xQzo0~~fg zO2d1Irnfv1$00`^1Bp+y6aEs;rn)TF-ls>iKTtCWDAh33%7#nOFyorq1hEoPM68l( zQ&&Q&q9O4G#u8u6B~QX#6*Xwz@$ zD%?jf>6o1eeGCAM#2ZA{Ki-J+ez;@(oxB3gY4ueO@b=P^Ya^L@bl9-5VZw+U4H- zoU}AuWuL*sj&QD(`!DJ)h8BX&gDlTQ;ieKpAsf%L+}Lfi@lgTzH+BCtL5@4|_~=?p zhIZ~TT9R4EJl))^%BK|MH1#6P&HJ)kALnRs#|Qg5J!wRPXocf<*(3({0;DC+@e8%) zi%UMfiCo5tkDmthY*VQeLkPkLpyBH88^{64Rq~l5nt{9 z(8>fm%?#|vv!`wfqlC7%W>5NV6{^`XYvP3Be{3;Lu|K2>R=4<<3-B7bEP|5!->Vgy zzCDG&5yPU@B1m1%qF9xP2&}_?Y4gy2&)a&uUzw@i+67fe;A(-{0?o} z>(^fwl19FnxA~N*$jd(gR~N`o4I=Y4z+Gv*>Ad6bLUg0`gT!zi;0i+vAnG~v-+36Hmy z(l55EUoJz_+yT7!Ud|^YD3Y#yI2I+}Kk==2(Ss*J$NyJU5XXD(r`M%w1B@hN>E@vn3ak%K6fq?s9YDNSv)Gj^X}g=pc^o`; zwhlWG2!!mMoNss|-#1&}t9ktRaZWC-BE`dqsh`J!&Htj|RtrGVPIt6H9bT@2Ni&47 zc`Zh8sOB27XDA#sO{n5yBQX5>pM51V+xQ8~tU_A|b;Y?WV#a#P$e-nhGl!tR0k6!g zoSYwMG~Ux~`C^hilZXQgJfmq}zPx~XHYvfeHV(X!^#{by>q8HAyDf$<6D-3&U2u(q z#1*h>`)1dhuP@Tm)AP98>Ed^bjwv!Q{Wt??Dcs!Lzq+~#Jy(o;y^9&KPEq34qTF&a zGS9VY4!&U#eZRj_6$yS0@4#J!9}akh-?yMx252!fER3hV6ONkc>1i_of6EbBCntWx zD&eJ}*O%cgiHeF|xpGCY{6@Zd!PDt=Ptk;;;D#zxPr-<4xoULX#8IeXi~BJK%ki1*p=Q^|(j>+VNs01tK}+CMc=yhL=NZseZkp6lB#d~>{SJgB zN1>{v)d*%swz@IkTLk_M8eNXgE!;wOxWjw((qSg__VNQTd!s@LEYP9Y49$INLVyP zEX+AB4QrMW22^dUAI?j>9!p?!-TUdFUPV)scOp#wLaOd1afBYlvlc6p*{(3Fq=7Dr z!s)MBR6j0F4}CYkNpZL)x(3PXUU}okMj{PG)oHwt=2^9mwa2lqRcTg+<=jT@uY3C6 z9pR@xoRqChx3gJfP`i(3%urrY=yKp$Q1XNkh1>FQR9~NFhH_FX*!Df~J~4|k@|nMS zme=4RSP?%2j-w}bTze6@w{TrKN$deoA}c0eoi8VBk3inDM-Dkm;a|`XykL#v^wMX> zSEhINk~42>XtuveDRa%piY>%?hx%4U-pjWc{KX3jA8<8(K{k~mPc&dvfO+BKm=v(1 zVAl2)OwOT~tJvrij}vvx(0bcCG$;eMb1<9BfZot;p)UkjU$-pUW5q_sd=~S8&9zd! zp`1JEy9l;%JL_$HqAq`q!dXd<=Yu9y?*J-deU5W{^0;S@i)IAb)EMYSpZaa`w-cZs z1`=JGBf1s{wYt%{{Wg$x)72Vv{n5jR5P;L^pgP=N+D!*cI(o4DE2&b_;8X!A3H;mq z$BScI4j+8Ck2d2-PeqbE#tJc;A%;J%eK~bHz`2zjvPItBRDn(MylW=9Gf{-YM9`|j z<|o=d13u(Aw_eR{$NlJCtnw4$o2n2cE3BsO{b|)@{FY_9FLn=2NAdWpNnmfi@iW&1 z3zHi|_wKtW3pi{aQ+w*#CQ*V}?-*F}hNbO#bx43*6%RBOX%^#U9y@=>Glx zma8_M#1WnRTp40*%*^^rI(I`xG%dYS)+P_tH)XZz6ZB4jf6^r(a3j{WxN z4NgwTnM#I96VyE52B@W~3T{=DAt50+%?*RQ3sM)trYY*AwyX~K<@ zVUs22nn&Nzozj*{QZsrF+aO;6`jqY@a_MXcTb2*zRuUXRT@h&DC+n6eJBHCuwYd z6BSAcAWvdPx!1Hdh;HQlhu^_Ww8-S}FZa0UuU^?xq9xvZKFkLL+DDT%ZWsCW`S*iR z3K`A5bK`5=1!>iHI1GUf@G#WjZseZ`fXaY;4${Xveot@Yr*^nXV$Lu+m2$?cJhGk( zKU~T>A_pybbcL!4V`3(V_&Y;NNk;oMmxnmIvxJI6?1!u{Rig{JXzAxIDb**R0Z)QZ zmE!JA^3RL_Ibp={quaEXvUN{9CoD^U%w8r48M^>({`4krCV{mDIFLG z>Ev2O)gCS=Z5?ce>0}0{Lre;JU@oKFd;natwNw=K4MKQ+{`^TtPjBWr1~ZbKrSDzm z#N4vl-~svd>mfL;LZEVuS8%SxJ8gfA7mPV~0o5^3?ux3gGqgT0^oo|f6R-eL|Gi@K z*E3-1ng~3=f~u7%*Er}zt61?boA|oGI0iW6A_g7WK~yhg#cT4L+7edhq_M6E(x)>I z)D`4#x70tidRd6Lkk)J%rZ*i{T75dGHs0{Mqn&Cz6aC$UR$&hC9!LZ%f{KcY?w_I8FuOPbVbv@eFHZrq%w2XVqhOd)q^UT88{A}g7 z(2vIrZYEfKemC0Vd+yZT&%DdNqFr6{&o>kac=spwEjW0_Dnw&L$Lc?Z)8X( z({9wK^lmQJGE#sd;aQ_T>Xv0I5*Ewa=nve^A1t5ldN~MBOIguax35m!e6}E{#jL4$ zifEtWyY9QSFSm-1Z2>@geUPyi>|^q_!ng`J)4ja}j<?_xHnC%+J1&JrxBg5VOOws?(-N(Q47AyT~)qP2?%%KBNJo^$n%r6Ywqm~&d*?s2z z!g*^Fm9Fhg>b}Y&8GoSS$o;+q4|o3erNAR>glF_mO2gi&h^wP8 z^~Qz4oRguc@Lr*h)Lx%FZm(L!o5R_N9$^FyLc*fTpTwC6%Cy9lUA+;EgmfX_{tI|Y zB?R`Q>I`%;Pju?rf6&qUHTj*x8uf#ifC_Ih+GR;Hw(LZh86rCS3I&>RKX-vw0OT^5 zIm6_8akDd=I!wm$x4728-pR#v($bixn`VD)-5T}_PS)r1y*n2!)G%6KdhPRoOTPEI zRFUd|$5_me>fJLn2G~P~PTjwwM#u2+66a^!S$WQjS1x|R31h0IFCDxY7ajNLITojT z-PT-rXuqsYG_F1QyC+{0j`djGH)G^<5IfT3PC~(1!{Do!^p81hYq&fKp{R;Aru}^i z2KNuTZ-dIJ%yHVTx(P@jLW4E|O=m`E9>pthaq*=q<~McqR$KSxh@*`N&57%}u7vmv zV*QzWn18~fJ^;>b3VzG|4v`R`Fx42=zmn^~JU`MeC~LcZMh0IpWKMDWYllBoX+;;? zBXYQ_oSPUcJhu2Q5}x+zRR-I>$5n?zxYB&b+d^_@<2Eodmk!G8I8BA^czMP76H7n% z>V6}f4{ej*(MQ$e{U;js{6m zy5C=)Ko!2%|JWssBr+;TC~w-2g)qL7boj7GxnH7U;~|z=X!vw*Ro`|8lncyp<+eXw<3+dQ zRgIhf)ZRGB0KKqVsIT z`cNVI+mnTnv(1_P^QsOCI+=6an6&=+hYuB=G=AT3Qq7M2_LNs#8Eg4;mJ+50zG%Mb zz|Mo`(4JRf9ItHjy|4Hw7&?$b8ppN0K6=!men*$0ec$2Obr1VL6~4`5t@paurn}Kz zgzj0Z^ZkjN)piC+NlHx)4!hKijt>7Up(w>ej2xEH(i>;H_)_HFs!+6%W|fGgPRwEt z>hGpMAq3{bMf{nI@MX?A_tDk)mZ7JJ|4y&`N~oRe65#{q4W5}e74!tOqC5I!cNx|q zl4$CF?RKrr%*|S(e&acwEmry$JpDrPo0ETZ&Kc)1*U@M0sQy^b&SS`~QFB+Zq%fPX zT}^DyHrCiZUxggJMC9!lzLhb{KWbpWh05{{*p(1w-%kpzq`9SA8$al}at_wXj*r)3 zo8=Pm_}jN0QG6)~S`Il5ivqvM!l{5Kt)JxDTe}9)N1okdR4KBXEyLTc=L%AX+|TcJ z62^RExLRya9N4DS-$a-xZn4{9kr1FAX8kBHz*LHchDOnNjdyX|!F2su@Fu3PjcBI4 zYvc2GBVu_4E@2lw&U~gGa<)mSttn+0QA!y|db;|ro8j2jz^Ie3i2 z{_IGp#{RkCT1KUN+yfXF50{*7!IkY_=wTvd=_2V>?1J>bC<6A{UV0r%#zfX6@~`gS zs3r}6C&g;i#v36s#-(Gyf>+LMsX5xLit@Kr$Jd8Chv(xk)K^MDD{H<}41fWi?>~%@R{(F!tFH= z-2MFiPXks?9xhLuszL)pv9vmbAI%4ou%_d$(%-Cj7Y2-URFVXaoX=-$c(H&An+DUt(&tZ{$_sdzy*v$=YU5 znK2?DS?Vd{(8^xhuddI!5AP+UTejizm)T9Kh>owbiDn%HT!wTRLfN!-Xja>^mS;aw z6a)4_whM%yiZ@Y6L_z`Hw#|ljtXx*!Lo9%8{A|aQe}@RE9xZ>(lbAQQx0{BBn4-cD zJKb>JIL7I#nyvYXQ%DRo)LqnbWcX!zBbC=~yn6wfoxOU7fpJdyMF$u6Cuh}0;sth6 zs5M)22YQ#CrLmHIZ&EXvNn5`VYT6%p+s{)!amt4ig5GIOj4X~X$vANA5$=8rR8jISi0Rf^0~&yE#ZKW3!Ae@dE_z{(*fH!N(YHXj?q<`?QI9MF0XtI~jK z2+MnJAnVTlq}QD@p9{$|B@m6QX+v)PF(MycS;iOO6O4|P3rXsgitOX(apAKiCF}w| zYBwIFUnSrABVuT?4}%UlZQF4HXs;Q2{6{2DqU^I6zC%|V6_;>#N}6fxM#7=+(Xbv_ zdZ)75R7RTIZ$PT68_AAjoqe+?Iht4PIPCL;PhVvzzj!$zj-Bogy~#K7ZS4E+ZA`>2 zPIm0n;so4>5?<^aPW`0mYkyl@^rqPk)%c#nF}VE_l5=Y0uMu$doM|VaM9t*(v{Z6X zD(8#+BR63uwP=P^?0dNAW$$X02dr@7T`nwtD_2ST+5M@&_coI}l3oI`gwT*oK*Qs; zOklil>B5U5_KpH8(le8$O_mI_4vNq`yv~%1aD`K&L31~)A%X7C`8!$0s_FBo|LS`C zq;oFs2k3}-U^P(A&{KY`>n2oLv-TGClwLnJYjxH?=D~bC}jiVINFJ*Io8V#nJH<;C59zCzclymB6uSKyeJkF1_L09Ei&9PPKAW0Fa! zPM1aKMHqFes4twmo|3&Slu1|~Ln`$bmh9@g1_U@?^uGG?KGlmsJA#EmtSh2}k1O>{ z^+!7NVULiz@JXT*LL#elk_@IZ*2^;dm&Zs-P#CQsRfsrti2+}#q=eGX_YJk}_ffF? z8_|qrTaKOkM>O6<8Rz!ivgo$6IGJ*%YmP=p-k3Zr*2+hJRU_?Z1+#t0EB-kp$eR?I z`6#SaA=}{5?PkMeaa~q$vU)A9f)BZT>rgQa$VRMwe+q02LL#&rI(hFOdNp9R%E&!{ zeq%#tv)Q1}CqkRb{9*uxL0=Jm+luJk{-jrdrrP;tEK{gY<21zQTU)b>lS-OFznSyh z7eA?=nsG#DHouAL(%g^lC2p^vGZs+8_JaK$Br!IebG>W6Kv}dDwu_ zCqDxC5rBVk>Gn`&SpIx{Dr#X8B4wV!zkm|dVzCzgBzK+6ka$7W7GqtME~y{=)&=vW zp5Q%{jsLc?toP>OV8ThVa_cV`S)S%@%VvfOB$2S&iPEofzn?hK+?Bn?IQsYHg&=aM zT4JIB%2d-9@}%;NYT`4KeL^9=2PK~jBP_Rtr&Sz{VPRovx&4Q6vokXQU!J8XFsP7$ z*(@;j_Ut*76DdqaO6?_fCW0hj^0w!i0%ia!NK!zlBiiA+mr{|Ix0u^p9gr!P*uS2# zac2@sYMLN^doT0;G)@1L`#;8?QvXMb=vmAa#CFsFZC26j3abw4B6-22vh{LRa*we5 zf2_TARFrG`KZ>GYpr|M!X;T6Mk^*A^3Ifs%D$?D}sHg}Cs7S{sNSAbXx3uKYNaqkU z%*?sQ9q;>#?^)}tb^dWH;tYG9=eh6e{-jDj(lk9e`8}X_2M-=x!(cLUDgjj^IsuUcB=oR;&g>RF zAjD|_G^sp|P(&|mGfoDrTQ)P~ap?OiU7%2eS0%NQx`z zAZDo4+V~wG1ny8$Bj*7VNQ4GNE)FFJ4E3l7oo4fOOS9}E6YtQrOrK?Qj$|boM;o`D4EO1>>XgaO*KR7$-cR#}AyVmi0mRtH_%yWa} z^OY`QXcSs)`Rk8sq;hLWM4%R1BjeJ_8-IJ=>VzH?q6I+7`59*B4(O-?d~pt9FYeTT zDF?*?6_e-}Qdh7-><6{9d|{-4D=9BSGuNmq$558kccq~bPViND`Sa%!$Qg)7dQLZ; zhThkJSs{?~L6=%Hv^jtr?FHN;MhSgT2CC$6CBghNsL z&n$g}R)Iw`e8U#bZF{LXQowu{x+!O(Ol zCsv56Elouu?a)$!H`2sZo!vHEC8Bn2E^PQY*4p!Z-o*x|r0eeA;yfQX*QlDMsPrWX zK^RWxc_i$bX8k=$XS|jAdd{;!3pM1{RrQV)0lKInewaqhRySq;(E|K=qW=BmMx3&P zhnoXeyo7t$LnI;*vn=zyl7;jqg`mug^^0q8SCJwv!g%ya#nE8e_Xgsf9MVsKNWQ|x zM!J(B-9gxXmI|8Y$e=4G0&YN?l@SprJZpf~FO6cm1j)UqmizHt5U_k54DwA0vSHyM z;3LstMMWBw`ifs<=1p(COBy>&dMCi2XFxW2T|`6^fyqwE1EVmwHfP~(`2PL-P0OKM z_{fUHmeUYX4oM~$(*UO1p`ZY?nW%XyV=E4^DUjOnt!JJNT04O3oWo)kzb7OBo)IqL z>6YN)AQ~e9W4XOdL>g~i-{Bjo$6 zJ6hmzWG+RFyzEFQaUp16KCck_Iqrx36y*c7c=*w5&I`DGrA%E>GoyCD+i(9Yb;-OS zI`Q!E2tk2#e@J|l#&@oXqbc9Dgg96dI^{cO5xXMiJI`Jhb6abkRMh0unsuFYe6Fo# zvJfuvCe5=#C7};HTW90j^*34~CDVs-xlD2C444dj|1v&eIca-XY;^Uc#oXMKt)N0- zLGLXucYUa8DKR6y{(RX>-1DWQJ92g>SNdn~xHfk0nj~X|!#e;N1~1hd+b*dbEG5P$ zl-JbfT@7v=ogW~^jHeZ$ghL#vw_g3VO}y-LM?QD(DLsN&KX|dX(Z7pnDU4IY^Hdv_JE+`905JM8-A<3k!$C zL%a@q4%ss=|ztZ-RK8VIn6hqf;yRD_TsA7Deg@S)JYGW$d1hGIT$L>zDO zD~^9INR>+1T)^y%{Bm61=zX%LcA4vo4d+U>kc?!~tj3}{42?6pMKbyqi`ZE==?CZ4 z=X;&0$U3-J@>chRyt9382mf_5WCN)Pwh0b=x_)IWXsiagI%S18#@LuDJl?gvW|Zlh z`Thd^gDs$ZE{NZ-dBkid=CRDhD~RC*;S+;)9zrB+&U)Lrk7yybpEkOY+l$3(M-)sl zLBqi(8}Uzt71}jp#~ZoijjI!9zL@ulb$hanYS*a@8K)B#OR@c4=X)|gM~4`7iOlvQ zi=_gz%=d?zF%j9jiyPQVkmfCaYif%tyjR|KI;GFeD-gpzZz$zV9B!}Tg=Mu!-rnt3LF-p$SNg8fG1bL>f{6RqS8`j$P-md z6VQZsV^u>N8~E4hTKUIf;$^8)Ots@)1u zeR}aC88tOEz5tf@1?0}=X3xOrRFFEA&X`1#)p1OUm}m^?m8Wv@O5BgkDJxW2SI{R}&RmIdE+$NCP;% z!T?VK{Vl(H^!K88XY)`gSl)UKVyR{(#aKlj$~?}iErrNi{_eu4HJooVS*3q)IHuSC2||LN$2juxt4i}?`Qg^ zyQK4UY{96o%lcW_0;GXc1=6S;-~Oe9fHOF$yqv{$EaI?;OQ|}#P_Ev1fnb!194g(P z9jVMHuBaT_;Rk?vd`SbNZ>v30`RB}Irky1<@qOmJ8vP;pR-W(b@iuAQOis3ZQwfc<+GhR zhn}P&$WBNl#mRJUxY*gdRx3T_G-Wk|LsRTX^_QV)d$KcEq&_@_Zs&bSAR*o2up=#? z!+d;vlr=OST3K;>d3oKbr?2Ja=I%>mfqNgaJ0LO*ZeH?p3QWWZ=&ORfmz$+Jp~(s} z#kN!L2#>ZSO-6D!ODpsrw9HIJ-!2GIbEbZWXqlX6dRbETCa3$J&L{`g>zN+K$w2uc zR~7tzQ_t;}@a0v0+&}2mkth+zg|pBQe9LO&mz67kZ#=A-vBn)? z$9<`eOw{`FOioQ0Vjo-tAVo8c_DQCqvWiwsGBrn{X~YQe(mUs+G{lOE(!o|Qd|+J8 z89-$;_v_MJN>t0TTW>VY#GF9ciQwruQ~)A=4m8fX-Da|2XJmN!f+q=cA1RLiKKtS; zn3N?G4J1AW{_0(GjA>c#iW3k^Qy4g(omBS&Z`>T>I1M%Nuh9*c)23xo&3TeBw$rLjjdaswWtYq7Ok%@85 z!LhNkwzNBp)3eZi2FD|SwSfAHbmYGCBg+wews4kU)Dp9{)2M>GU%&Me zwL0{&Fp3g3cz`dWZOeV4&Y@BW&r*^77(9cSmnZsrpXs3M>_rxNkL(r+^Hq{t{mm)I zne5owh>}@l{H>p+dvDWkGk3BYpi*rkLF#m0@hrWd#rv!*9x!6eR1)z#esMqJB#%Rr zZ3CQ{G;&@X?da&3o>jYX8n8Pk3A}dY3OPu}Vf@l8v|@t!H=7N{`4L7ompkzwOg_R=A=N57d2af>9G7C;vpga0Bk>jlT}`WtCPO9a`TFUY;~#3W?p|%&@wh)n z2>PZNX8RmXk=uHCg^G_q}ycDjbP7B3dEH~ z1L|;9*=p#I=L1r)rOwfVh7X%%41kkc&@VLq^%(%C~ODBxz49i7p+2Fv(?Z$U_ChV=X?+u)0s_1bz=zMn(pVZds`>Ggv#i(AC$cKE`K)Mzj*jh3iUpV-k9JS2K7mcYVB(WVM{XbEPQa|-T zf6}S=)TS&ud&XYd^FkVnJ(f5g9!nKC-~3vxm9yS4SY(FZ2|toIwRIp*G^U=sDsiY& zV?W;!&#;JW$fGiUgGv&@@kD=J)s5j?nD3Rq7S6w}1zSq~UKUiP&^`-r!B{Jhe~gUD zWU&xbMLsRv7LtDMcI;8a!2Ix#7r)GYgaB!-X+^UbJkBS6^H0(!*wiKn?5Re+dl1)e zA{=F>S&3HhNW}W25KLD$qRf!Vdl=821cv4>DSfw&n`#)M@^`FB_{RH^@vnB{DS-w@ zwk@iBhShRbT*S)h-{ulldDic2J!Mo8(0;W6iD)Ej0!HNVwWRNYQfo7n^L>AmMh?F}Rv#G0D#gOzfjOXM*qXtM2Ni1<%W1qr&qzjL(r0@_ zAGvX3B4Bu83c2W?3^BTQD{<#Of0~?hf)ej|^#zN)1;-8+ zbfA>7{AK~Zsp6AK&W*{AOXQ?bc?;OYgX`)ZFnew(md^55tm0it-E)=(tW9apH2*kR z&d$Nf>lt_WUES7Ch528iITr@(IT8n&xj4pu)+_JNtr9QtBPb8Qwc>idYqt|bn~rkZ(% zZyapfOa=QpuFM$-$w5G`dVFc7i=ur*%T`#)Xge(j60dxZSV~=EH?!|ZgOjf+2qhr;+-p@6}CR6}*da79S-W;pm3Eogj6)G|wjUO-JB9peIq`8c1VkJ(?qzYZoM?H{| znw*%Zh89FCzRpewTzZS3vT4k1=gjVPSo*W_GPAPn^33}DAh;|WoGJhZd_^R|2~4%0 zq9O(MHy;EORRHCU79k&&Ku-3(> zi7vkJfeQak0}iBi-Q4U_%`d$j67v9q#2Quz_2b3xKpEk8?lg(JJw}Kv9J_;c-}|T= zZtR3@sJms+yiml#euFC!Fn99v3SAY7w<==C4iOaJy+x#tYfNgo6)wC_O1o_qKp#-B zK;|a5(5dros8bDNkW8GRRrN3!+$ngajLd4Wo^8)SMcyp*csE(8E7vcduZoL!?!Q=B zLNE@jUUpkk9Z@(N76L;T`)f~flhgf8Mtecajx2|-N$%T^K0hYoPxq_fHox@as;+Mf zPkTvsY=ENqEf~$NXi49C2KxqWjQI(7G`h`jwGz?a)YO@>-K=^OAz^^8)gb0M?{S$D zBlXa3eaMv;1GvIJI9a(S|1b0R@$m);Ndjp-U+G8G5x^(xz`!MT|gN>r1B2z3Xc%tiM z2kW1eEIqN$OvG)A4Nelt`C`@*uT*FtNrK8_vorh;gX{tfvis;nWUAGj1{Fd z)B(#~&%YuH*;tS~`u^PlDnz+^mtl380gz~)BY43k*7mnLS;5ko4Dr= zE1`YegRjrqCkE5TlFT;?wo23dn}3o52Q~a>S>fe_#nL|V*0&02HOIP>iP^^N2 z{o}>3>9O*4Z*(T4H}}9pW2%iNMGUAGkQRfM0%MeCK}!MlH}tw^EuN|)-Y+1U1;VaP zbcW%z%Pc<+TH`YB^U8iLWSQR^gwIpMA+|D!nkg#JXs_eMZ-;Y>S7<{*Y&I4S(-;rE zOWDr|P)K5f?y!i-93?WD*4ABd!p-)3oE3-Xm!=>1G-T|q|K|1_8dg|NlI3q58hwJl zovf1QYtbmYgfxA{Wdedv*yn2^X6{mJL4jh5tk}n zMOTE9-BG*C?ZXozcpLu|MMh8gmc*x%fkZ_l=F3;KwH3Y>suEN22V(Ulf}Dl4woe#n z=j(e`5-1B7tEr`(Y%fk+^&WC8G$ou@qQUfp37tIF*wfd(R1NjrNJpzo+ZcVFrVkS| zY}`Bv!0hokx`r4udftjcT59SVmP)0Oz6h#3wqw9;VsR1d+uUe5$5G-xTsM{6+il~2 zNos`y9285P*Y$HWVZ%teHa{>;P|{vT-?(~A$oM=!R;oi*|G9#oFo)@04aaZLKovj7 ztyA<7@Yw;g*n3BwvH#J#yy+ti(?Dt1Iyq%7K507xRsqnxnKn3UYq)`?HGMt+WX~uZ0 zyL}xl3>({)$>3OA+487Y;=x95gUHTUBBZ_X-Ny_K7w&Lsd#>1i=f1? zyb6+lmj$|m_5`+!%T{v6CjNgvqCcX8m6xf5dL?7wAw#K1Z{sr^md27Yr(i99_9`Cd z+#{N3kYK5PrlacymfOGE*3rJuP(wgTO-qSq@`nnnK_ZfSZ*~Q%>QlBUD@dd#2|9oe zZgO$rZI+L7e&a|}drfVhT;+9p%N8E)t6ASazJGa&Q6-f=;o0gozZxQ8!H#bu$M?vQ zhU)5#W{o^it`w0AZ?F`TI_-$Sf@BuubW`GVIs-|)-~q?3IC>3oNRwBIrdd3R(0LB^JI>vU6r&4W*U;;+NGz3UvPuetF{%Y2#O<0lgi}k-fvD zLt9TflLC$R#wD*4W?u@pK$D_bPVv2bxKjgPBvN`x)jO%j%Fe;&mgtXX{s7cPZY#8u zb^K=D>36ZASRi85QJXseVf6Hs@Qn7(VDuRl-1inrh$5oGXHK7bZc|x~OUmA|v_)n+ zM{3Yb#@wa;&>F&L+{Cazaz*jQ& zqvRxLP5;ohb^6gWnd-M;1A%P;2GSzE`dpK41# z+e_4s5yyr7{hseDD~w}Z-vN;|u)F^k3D51FwR-jcjf6-3e<$JJ9CCkSQt*Ex;r;$x z#UC=z0SFWD48(O9KxHvtg3$CqJ;YU68`c3jYh>`}n2xc{#t0ndIp3zeMXUk<#tAcq zY6%d*JnuRPtBYkU@{ub$h&%gMde}u<oC^|w4mv~dA&M>-PJya62a)JsNJjhwDHOj-+0!10}|AiYQ*@C z+nwmI{9isd80fVyFROu=eIze0xQms~u^8l2%hGFCRuHcTPTTxiCsZPcIxf7(-2zwY z|F6=3iO%d6nPZP+gjM`cAJzWE{*`ps)HgRDm6n#qo5L^B(MvmXh7O(XBDNSY`wE?h zFYX)xwCywtHBW9wd1y!j02>>a57cY6JBP;qBicR%8q@zt*>31&wKpvG%kUy>+LTYb z6w+*L?Ti+F@?t%VKqgpvIqJ0Y3LC6rY_Kz-yu9Hc8q6Enf^6v~CYah>g*ibgqp$mN z`Xy^ItQd2q)m5YKeCB-z;b04!937^bitPFh^8Ex8?%?_wN7M7m#A?=3-v>GP)lL-9B?Ocs(lXT=O-yJ*GelIQ@*Q8Q5BDUoU z6N^rfSIDoB%{@V<)1dPuTT&2ihCb-($oW;kLN#NE0V6l`{&znBnvmaD`OjnA%OWbN zZW!g=BG?&g`bRx+l$ttgdovM;Y_iwh-sWGtjEt^lW@ggz7^~^%JT@{i%C}AFGgDGh zIzmm|SYMwYWjne8epxE|Tkk=spyiR7f=$c(!yW=hZ81{RHMvjKH~0CPY-#`(Oq)M{ zwxFDKD!pQ0k_=@Qcl#I~-V`4sA@le#74%zBeuaFETh?PbNDuI(K6?B(Pgu?VSE(38 zju?UXK&Q+lTPb4cTb?kBw6t-wE+?{~8##SkHL!0!@9d26h=rK5_K%`_xnBb}52 z2#4fan1}X&5oKdz`wHRxurdYf_iwoGKU*m+rKTRVIe_>_3*gK`QrnY!7`Yt*W~XE} zf4*B1&R3|kY|S3sG97({5Z;|o;4J3Mxv*$(KHCa=uj9J@EXd%u8qFO=zYG(yyFk@K z{B80Ve0br?^_-ayV$MdfGaT}7aZ2^6to2JU-`4okwijxveA z*BYW9i(0EiSPvva-Vi%o(`4;Da6)v5xf);C=4f zOI?)`;0}fG1gTAu7noB$^Tm930^ks&|9lRqi zbw(e_7n1LfGMcGb-!EDakn!8tYDAVgh#qCN#sqAAIqtDn{Xp~zauIfVfY z<}W4Kk4R{l0E0Dfyr4Y%IiHRUwr&DR<>vm6#x=XmubL09S_ink0$nZ`o!$rHf7w|G zkhjdomCP;C~;G3V?JXY9djlLNkdx?C=p-_ROg$2<3rlJu=q)qnRPdDSRjpxd9<|Dc7`m^gim(sP zgMc;&71TJ7vJezP8Z4V1pRF@~Xr*|-hdcJUhR@iG*}0FOP;!N|yo{+vsY{VtEHA+l z@B<%opoh;<1Z*y{b2L9c4`~cGvuzhHU%vd(kM)j_UK3XtK0&}PHAGdwDlBYOVfgpr353RY@Gf?CjeZ2E;||O7-LSy3N1l!t9nmyHbz6 z$4XO>CcoZrtqe$Q5`b#jXV8AG=AdtTl<5Pl!0YgLB1x zR1UYc)$5-|x_jQfE&46fsC+mwbEE{^4Q+TM-Y~1HpVW0;YQ;;%_eI83W^d;owx8s` zDvn?P%YGvQSwCV;XeWAY9z`>a+%DKoNls-A4 z%kcRxpv<7K-3Kh_;Qh4O+c0!m9c%x>u1uo8A$yNRp2I5hIU>T8Z}j+iZW|!hAhPSY zirCvdsGe8{F$2A4@&pL;>gwy)p@``>D)7XyV?h7iLAHT+Rt|2%C)t7-@<~&LXskkeQLASq?YPn|JT*mWKqq*PT{`LKT%I07@!NVmYDhFuR}a zcfdG9O0FhFmn7vNE7oj&FK(!*Win`NcU`?nI1pl z_d1R(WKu{-;y#9DdJ2M=%7m=mH9Mx6cO%Ja$X}b%pOFMAeo4al0dXrUV`B#v&e`|A zcj_Z+bJfcg24*eioz>3=XAl1O-2w_8L#_B0RM#7!)0{7D#O3-GVA?9J2$a1ug;zk_u4{dwA|Ltsn2?Il57+*>TR@!H{ z6ug6JzZCNEd+UoyS=Yz_M`)ceOYF>Gx8I=re*LoWb^HcQr>?gZCT865e)P8aUI zs41}DKBw2*p3aa_U00COvy~iC?+ylVCA{91!A#%&@ZO>Eg{77*&Jd!e0O@+KuFl_Y zyq=+1Ved^~h?g|H7G37{O@_XoZX}Zu!8lm|>P;{}vZIN6`VZy4>%>!V^owniye~h7 zLh%t50J)2G{M)+qVbKdH(>mAKF>2GGtLj*tI-6D8fpGwtq1 z`9w|Yy_9YyTJ4K9HpNX5@wdCn-p-DchcpB0k^y0VuMDIGungIvJ;6a^A|>e~bg4<` zrF{F4T4W&YQ>^Edh{rrt{P}u5Y^?_U;dkqI=G?b%EGvTHy6Zl4xhAwco|tcPN#_Mw zkuL#_Y5<{%v_!bCydqtXy++8!36q|HqeQQWPn#%%o{1Q&rvi^>j3P_h)5a9Mg;<_9 z>9^adJ6tVjM)wY;jIFH{5fAaQ3*f~}h8=1-ti3G4!1v-Awg!8 zw0BasQw%Dr*!Jlk*m-mck(u-wM*k|TUdnmzHrrHzJ_qLu=TMUQ#?Sv4Cq7!lOM{(@ zz2JAFTAo%0Y@aF9b934rnOsvh!=%y_rMRNMEr~d{jq{p{M96sAKFi0!>FUe(4~u@F zj8}h8v5A~SWU=3tUsm7nJN6}Sq)n?6&B&S+t9~7iQHEE37I*L(`=(n!r%;k- z#xqL~2nYnQMFtSbgvYv8wM}NML{N6jm`SC6R#$m4wVyGs{|~sQzR~W?u!Zo8)7T%B zH+__CH@iDwhG0!Sp%tUHx3bhJ19LF@$!vUx1WS-|#cf<}T%YDn#3KiXyqssX6!=95 zJv;oQ*DN1G{XU0>C&Nkl;riyLGA!ZoyEq(44&TLs2GuPohLS>`1>vRR8yKht!+!|H z33@g_!_;ABwc^W^4dZ5qK0Or(;tPQkD^55DwZMhk;zi(=a%EIhzI1~zpuVwDda%eg zo-hK3CpP#NnRuyyFUiSlGbrMI62vH)^L0XOl#*{iKt^7>idN!6r-n#Jtauvi9BiOk z4%rZMS_(hN26^deXW~<^`}TN45K^-Df{_&x_Bo;ZdJ zEr&^e^;^F)f?xW=&8IKB@__RN2Kp=vjMpw*svhU1MCvA_<97I^dYX&86!&%z?>HDj z+f$9u*9@TmNIqJVmTb|Bo4}cVjT{Z!;9p zTmN6MS_C2iS_COom-i8|C{!nAae$Pk*x0zCSp`SlccrDJ&5qv08&E$5`fz#yvo~qx z^=>KfEKZI(qYXi-0#=o7@LRx#%o(Gurza0n1k`MX^nb`Ws{LJ6SA z_>Q`8<&lGXm@$;-!P?;q`I#N5!tILTVXuO7IjxuzvuI!z^y+Xd^LTCJl!5CEdOI_z zz*wD<=}p3G&3a-t8*Ln&o=xnEI8Tp1Eu7^A8X|e|<_G%G%`wr||NhV><(0L)%liNZ zNa?Ctl@ri)&%f(U0kkrHjvrJxd9cV0d4lva&%!inrq1G1=r_9?AZP%!LfholjT8lS zb#-sdi>4pXYDqPEpup=<=-LOhJE<(Td}j!ftW7lpCTAF4 zV80@SNj!4z&%mztLl+gmWL3xXFqZ5egh#(nVAUn7n(X}Et<5*4ojLIm>2l=?&F)S$Dbo!VDb3VbnhP} zzBuED^NRKHdC^fkS2j0NywluPJQsZo0`RIK3OAtsxoJK&@LbHnr?_?UFKX&fbF`Xx zc+B##|KN>(e&+YBzJGvp6T!5;7HY(P6ld#E*$MEor3niI}dLSY5dJS#GfOS9pGK zQUh8ptCu@N4fQvp@YhFZEAmO4bfCSZ4e^U)Qt_Xvs_u{)TT?PKt@1^Q*+v(fd}0`I zI+9vZDtdZ*5<*+We&2Ml1zItGU%kK3u)3Lhf6m3XqG0QWP7p}#<^>260xdxfed+S$ zxW0vG!KLf>yzjXXah<>EP<1EDeV*SMDA4l!-(KfI=u5Z(CxYQ4je(yflt*74N*+mQ z7aLo0-@@?|#|}JwuyY_U;KTg{rEkZntp10OWzAPU$zZb1S8VI&eNwM1xanY5=~=)y zy;rb1N%4=N5cqrjAW&R{wY9Ywu^?cafKCbB6qYRO-`CY&|0BNeP*PNURXafjKpi=wn9D~mJXUu)`9GKaf%&il8%lH4 zax&AopLelb&O-DH`tl3`;(5=~(hA~s;Xv{+ENlSY^LY{ z(LcpD<`FEDzh0flQ1g=QwY~JbOeNg>#5D8X;MD$M)*XX%4va97sH9KOYGNkAzaBZp zoy@Qe`bfp!1q>&MMd9APn);Lf_0;6W&8aV4OnTB;B|ld|sa3%1gVB5|CP46`O{q1? zRfMoQc<#DP=ua90u0swPUjykIva+(Ea)2YUo1>1L+zHqa0~%&hA4j!I>FLF`u6C4J zDl6}`8}U1pj>hxpWNSU{rN*JUEd!$yL&E~!y?gPBxcredEdxtLtBUel z6beBYdZ)c2!szrtHQ~_W<)|B_{mGOU*{;^aYecrJ&KI8wJ9&Y$j7X+Lo&GJ?G2osg_2ag5r?GY*@3SY;sD)nzmJG1KL1wk0 zvU2Sl3fb}g^cAr2v>Pt;ralB1d)&mYPSmMYDE>smYwH%O>ff;bVpbe0Jey@F(UNZrZXIc*Bxf!|-UsvB- z1;;7LAqQfU&MQG;HBFzcXqLIydh^-k@Ue4=NmWi13TZVqs1l{OSMq%X@BF=14^+ zUidz~a|ua5cg>Gd9h)f`COmxbScxY4R!n6|#n~{yy-+<^#yaoazhCYJtjs^i{5_r6 z{#72aol1%yedV#il&4GXz@N8V$y{KJv=}W30bML%Bdqc!wBiCdf3ymMR_D=`2oj(T zW(zUm{^nzpL=4EdO1K6lY;GF1oR#`ON2;p68P)#b0{=UZ@8vqocmHwC0Q3c}k9%-_ zg=%QXt{$J8n=5!MsYt4Mb{?@MO%}CJZ`!mA4D@oMhXspUh+tUD1pq46W%4NCR3k5< zWc=kn+Pi4SlUL2uvo;T5n0lBALPK|$*p9v9m!+U)*d%*fOgV4gXZan&=%t4qYHl8D zs6*_>Jk`DG7uVZPuez`kZ+|@{Zf!J)=QKTY=ZD6y`0bu2=IIYI)PyvBvUr{5-I=^I zQNCKsKJ_oA{gL8Q+p+l$^Y$^Vg3(PaFDAvU&zwEe@d{NrfjgJAJ}lzjwQH_hn9a^T zl*x7s?X`c|m&uKH>mA~0q2`|3JZi9Lfk-E?Zu{h{*Y2c1H}1pps(k0C`Mo(C`auJn zUdzUtzOqYgMHe;|Q~H*6*RS+@NuYe4DXY#Tb7{3uPT@c<-oaJTwMGZ zux$W`6}1+;6ncQS$aP-TBT1jh0C`YA;BTQkw=bOZiEo~Qu=55$wOyC<2aX&`))o|o z))zPe6dD>C8rqIm9U$yBA`U|1ayY+nV6KJr@U%Hgbl>U%yWGhmX8ZZSENSH#L6@0$ zOohW?BGdRG`BstL2m43QLu6+7tPSf;Y}VBWhx+kPZ?D$VGTI6wMP`Aaxp3x9Z*e_rq ztgf!^*aAPa2y(kXu44NYYt3bvBBOnsZ6mL0EV1H|&U0f5-5vKizDBvDM>or?yesI4 zFYg5HBw<9PB%if6@;p)eENhp<`IBPd$FOC%dD>{f-Sb{60sw4R>y#o{kJxrzH>zUA5(E8o9gU2?h& zCfw_nYjCe>A%XY#O}d|pn2sq0h!4JV=Vj}w9{3(;V29oC#6~e^E`SZN(UA5Vz^lnz zU0vI)f?4A`VN z)48u`oS!KOkhAUE{`iwEJyfgh`u)-F{@&&D>DxW0=osZ6nw&KXJ`mOSx&bl%*dqbc z;>32c;W27XY)tot)uy`*Jd~1uCa-_jP*_bHtysD~b(Pa-R91fM4L>qhiNPJeFu)Un z4zd%M?L80p8x^DQM`>o)L;m##dzTkWZKtGy0u>Y#lAtmQ8VIP#{OfD{c`BM9BVG4o zWMV=BDNUQ%eTg?+jg!o+oVi?YwV;heNkNh|*mVvhr=&cCW%kc!22KXhFn?{W5O&=) zcCykKGl~3`bLc(rMDQ{A)Yt`&2)Qoi?-y~N5%Ll)In6U(J9 z+;cf@=nSg&ZM!+`};c^r~3u~fMA@udnlKM4-?r1B1ZvBKulvt8iy;?>K z8y{mC!f^Rbezi2CF0{1pODmAByGnB|tR|3Uks^o5iQ%@Fmb_2qeUQsOfWJ5$Q@0|A zicaZii{;{62#NU!Zt3Losi{686yK$q_QO(tr%*YMNnPGmk zpMtMs=Vz$ZJ@Hndtm~q;UVjp&)cb;s>Q=#9XAnNZEZ==Qzg4mkmm~0x7QlBgEOG(i z%&kVO*j(nLa9kNR!w(G26c)%{BgkT~rOp%fWVi17lp=`pn4 zK?ptM4uH|**=UI)B(WFWP|i@xwVAj_GJ%2k;%3BSQu`3-^##)8VyBV&>y5?Gis6Fk zQ@C>RSVZ^ zTMjRcZLu$lVhpyvAcTgj)%EyevIv{3b~*ftXWw#rX2!GeELz;Bb6lKC$6H7c-3YrEfL43zkG};TLlH{C5Pju96vPvp*JUjhn~H zX`|ZjB{8cj>wU4;8&ue+Z5h}bXbnDk)W1PPS4!u(sE{18!dF9QZfDEGU)IO0F%xghTsp?bUo5>73u;W9?~ndBzkMbRi?-xW3okHSpz|+@g>5*l2c} zXH|%uhRHMuqC>z+r>w2r3V%|7^-!8OTrQuv-nKs*(j-18%;HxO1}0$@-|Ma`sp;N* zu2Lr5h>YChA`)z`)s}4ES7clgvwcq~rC?|hW{)oxaToF*$QnoOh%MUc#JWB0+WZ}$ zn&oH>$5WiIGwI5%o2Q{Sf)1j<_n1wI1Z4n*(Yp1U`{9{-u?>{^9-;F@rG{0=%|vRO@!=Ql=r6wtn3?9xiN*FQqX>BDk6iNNlF+raN*u3SpSl@kR?a=8 zv3++asSo*NweHi65==?W+%C7Hs;X+Vgl8lq$e*H}#Y-fB{qHgxThpSreQQe#8z(0; z)g@FvIHGPC*=`ibf{+LL9-4OoLYFP0JeJAT&-!>Tv^+oS6Z7-EORraZl=AN%!^z#} zza95aDYRbRdn}+DZkBfRL{P256gri+pN`{4r1=qlGyfLvgaCwx{dI4t_KU^Jh*Zqm z-!)Tf)JM|88iV1sQyo4Lat-#>0Patga4<_14&c5)i?p0VXKA$w^s0o>Fw#rD6fEiz ztC&6?mgw#7)I>$Xb}}6o`vX9m0cK26YbC>XByPEoWDf(C!SljlS3G855Q@RPDGNK! zuL0EYv^j`auaIG6zimZ}|M5EIzSWiq*{_u!n`0#ku6x@)G5q06CvWG|+LwErtbaAr zX2%+kJ!JVsb@6Byg^2O0pp@cSnN*G=$NNwJMq1t3$Lie=7qqxRwu&T*WO-t&r5q}| zE%(R5#il93Y*p#fY}k%SCpP2JL%h7sI zwNCK<#JYZ=J^vTe$r^fBq)Rt)rN|uBXB1zfA!(uUePC`Nc)OSrt%?($$lO0>aPBTa z>mWHj@s29IK!KL;5!3zJJf8KvA;iiGOKb&E*A_!C@!g2hM&5fsMH`jhJT;Qw-ZF|= zUc~jBQ&`DbSwN^c_?gx=xRr0*m&KrNXLu6N{+Uy17@!(X;)L6ER#u_KA}D2oq?&SW z<+Q$T$Z-rKofq|SRp&yqH>x*DHJmiJCo8hwb@A$PrX%woKvuJA_n%5R9-rD!Rn<`*etXv0 zq?6&L$CkJHd}!~d0L|O%$!vEO8OxkI^&bVA?lziK29EBHClQrY%}tz8(W|B9QLfI* zti9pp#WO96ZDmgP%C|0iUI^mBGh9zQoGFRG73bbbmI+#E?fM3k?N_KxgKDsK=F#98zKB}p#nOYQ0PJP2I))t@T$lY8vzDRxK z=dL{(6D;nRm8aF|&p9^bvd*rO3|Lkhf8p5N4YV$?cq-GA^lwhuKC-x7LLFZm8vgU) zSmgQ_>A8U(0y@n7%}yn;tZQR5A5D|l(#gp=2`FD2EN5D25trsYT^yFE_B+K!A@YxS z&!^~W0n2bupA~C{+--u}@Qv5`uOu1=&fupO2ot7znMH2MT`j^FA?6uhTmJcw&w8sE z=Sm!1+GqoLe3F|3K3`CMpfGDyL4AkP28nc3C$b>4swR3^yBN}THye@CQ%TP>1~^^T z-l5%8h;M&J?kM~<`EehHp_PhhXxB*;oOI7uMvBi1U$$j`IPY=o(zCJlGn6+~LUNu@ z)CMGC7;X5=eROs^+b5Aee}S3>si_={Cn+JJiBZ_*C7iM$%Sr^$jkLbKAc58&x0gnK z&2e-Oec#w<*(v_ksfr1;v69jF5va#LFSb5mKJv?$4lnAEal~k^G?=*8aAAB6rOjFrFS~<~kvtCz7D@mjOM$zJ^ag(t1ou)9X_&m~Pv~p#EQdr65GqKI>LICQW zPgq%-4@2eb%BRBqkY$^%(azELZG~IcuU3SqS13GJVcbj-KUj4fRoOa;(O;|jox%){ z_sWp5v#_vIcT6!;dIgoDE-h-IhWXH5-D?S zm9?v6n&~>EoDEtOlo&EXztJ9cF>7yc4z<3s(`Z##s1?)mZnQ3)!Akl(-Q&jHtFv-m zKZ~PtS@j6ZIt}_FpEIO))_+ch6>Y{O!K1&vySvgH12Fn*X}PvWx%-vsp*wam$LOaS zy6`e?SDg=DX!Qg1NITi%B43b|Qly;CXC9)Yl0;{O0+wVN)| zV5`B*OQIx*!Hqg0FZK23R6C0AhHIPI_&cpzP*B=R_uwm%4Bn>Xy5ltTA6e_SI1hN~ zLwboNj5Z&ma03~_UyoB6sWd$03tCEX-iO=I1W&%U(_`bAb;&{`cuZ+sK*(!tpJ21phyDeFaq1>)J2M7A!zSLMasmrAs%K*%Gr_+jnNkI zN7KyZ2P5?oZbL^*jQAoR+B}ZAb24R{*YDB$^qAE4{`Nu|sy_`i-o*CDVcf{>&|bK! zs$TA}H=h!Js8%%2?4$N(_<>W_2WcKufJNvBOSiMr6MS83Y6m5fnzot8FGFaV28+|^ za?7N`-V)fgat=oBcU##9UGJGJ5n$ON4lD-sA`2IASE5|Hi8~ zE_!5Uh3^8BwOI1PiQCTpCEl7bwDRKfmTk!~Cp`mRijWdwubNcl^?A}9d*f85rz2VD zAtRPn_cesJ+A_mm-*`1c7i$-`zv5E%@$RGKbP39+QPm+=BWjgWhl8gF2YWsA?*$?) z7L&{Td4xEB*hI~xIb0fvD80Uqy}SAqPd}A?ROx?`oPd-FYXZmUSa%%yHzJnV<5Aeg zTxlh_b;oY>kaYa0bhK_|YP7EB!R|E+E>=zK=Bt!Oy`ynAD4=!zIX_^t$mi6~18Z;}g_mWFp$$Gg7AJl_ z=YJ*?^Qx4`z6^;~0#-e_A8~~~A?yvZ<%=V)2seDW;hw)hZ_&(3q(~zm%{Z)7?=$VX zXLy(8MmD>gZ6=y6M_!|EHqUJ<{zj{j+IBWg0MG3yWeY1^A>-x0ep(d}R;1%w1*=>! za@uKw+@825I6i#Lh$Wd6gU$q8y;aDS@zR>;G87|6C2~6Da6!?@nIlCf@_Js6JTq;H ze{^>K(}ZFGMaVOJTB%T&24-ON_j=}|TH1@}WyiMlv^Q%`GX$4<@C423Y{yHS%~!%w%Y=m$Eq-4y4%9MTS>VGnQ^VoW10FE!9Os$?C}k9TbkHpQq?ng+` zjrq@n?Zyt;D4ChOUsr!_tH4IcF_+OGJ^SF8EAaOo* zZ6Qp{ixjy0dqesw|NPOG# zJ`4n9Pr4Zn>y_`;x&@-Pu2g$FL5~n6vMR=}D(2RW;_8q>>~-R^2glBvjEMxLMpQI> zJ>kF?ZLu&^$sxQ`mZBg3>zi{*`I_0tUXZ`h`Zztjx!LWI)SZxCPyB(Zr>wlGQaH`~ zOA&AN>{g*S!RWEXcVlxpc;|$>qIUMm=oD|kwG(0`FK?Z4L_XCUtBgk>cSw;tMZMX~ z5*YO1`qjbOYtHI(JO#5y0EPc+EXyTVVAP~6KlVyXZ%G6mbADzzOHyjGnYAKY{Rg7X;AR^}Z?xvb5FdJ3N~#Pdg6+!UBUMbS&i#IT6-LVQc-d#r7F znUB5l{?43a)0Z0;78FjMZSNhj%}u6jDS#{T?j*T&uj*GgTVDI0KVNOM+UJbtjLO+6 z@=4cWDWIWRo9H0xRMb2J*{0WIYrkj;+vZ;IPJ06l4t(b?vqMbs-XJQ1SZmq#)ct*a zs7hNUtJ?OD@JH{4{DQwA{8qYkT7u%o&QKO5HNYu^Ya1U07 zP_~)5@pwjl0-4qa3+Du6qtS|=4e|YpgmSXxv%{0`*&3wWJ?zxhS9NWT`k~OjGB{f_ zro7y@^vXN@8~VD>3Ey+u0aZ15$B&2~*vu{!=Q+Nrnm_JM{|JUv+@yrNW0K_(bH6wT z=My=)M~I{>#{cDga7ng&y_4ioN?Ne}AD?C=oWp-U*|iMb&uX%vc~ZXTctt9Ny}R5H zOT7R3$nT!)Xnd~sCoAyeTE>f=-?|I&!wZIf|J)Lj=WqQDM5FFtBtX?)H0qjBzsDmR zIJvH(t^YH6rp+xD)s>EHkj^clW;q|h{s-;+=eG$bS)UMNqiL^OOfe?TsoB+*Du#Rp zvGo4w;P+?5P~Ow#EBz1790;TS@la(cb0ssTz0Cv)yQQCRG&#`M7mLsF>9aQ%kl2SD zF4^H;haT{0h5;ImO5;H-|CjVq-^H_*oF=C&5oz%kvH10N@msHr=^dOsYlCTSj+qJC zL!c!)2M7J5oV2tn!osvMEt*)Tyn$x$V&scbl&Wu6XC#7wC&q_R%mG|CWB^mmz&(-Y5?wJh363T0fBF}ADV*Xx;3 z2z(h6X5qp2GA6l!pWf2D#wJMz;ZrlZT7Hn2TbN*+o>py z-BY>Ew|KMR4~#yX4lh<(8ghE)hfDJM7P6ixE73gcICalY*4e!|Wl3-I!OA`77iJZG z$`|IK<%fu}c#jglv`oHk1uR{HV$AO+(h(UM4w8=Qqhgs3v(NAfc<4jT#{}nUH$ql(f2`XE6E=bQfYdpPMtkIuC9HB`VBj5 zWMx^>SFYAgRrbeL#LTz9fNdICY4Ss--_uGOUfI{W!V{k#AY?u7^LY#*XCCA1ph5lE zUwKEHk?P~fO2y(se3^fnVcw7Bc_|`63+c(~q=j<{({`mf%g8=Fbsc*GXzo2Kow%UR zmfc}m?6JeTmj&L5bL$c>P~f<~jb6UHW$Vy}7T@7^CnthVy2wPLXK{;XtUP+Y{Upr5 z=G8>2#i92i9OnM)3JYPKz7M_S;MV(d=l{AspT|{ygYy>OceUf9`wrf*6>2Q~aY=LV zXe949qr9dBNjD*daLy~xYE)O>1$49-qDfEv1dKIO)PYsE$mTl0b&ZYkKyHvD#BU0i zk`sO$9(?`!IKi$JWF$Z!+(`G-aw1SM;1^ZZ)qNVC8v;3_RqPm;IJW{A0UA%}@d$K~ zusG1}0%+gR+R6$Zwtxf@7$>f-u1UGYjVS!lj>GEs9fJKcxLH#YsAUkvME~hSXxKyZ z3BG+YbVY}lQ1-s`4`j}LS{j+4FRzs({TzCNcGkPJY$1QINf}YmGJ6aAq5BI@-za~C zvF(VA{2Y#9pp40Ha=cVGVR|S8F}~lR+?Aq-B$7WJygKtSHFE7&JrNzd!tS$~&T@^&9I*t@ydEaI^)@mW?KT+H7fW3}1QkrA=8Pul z=|~4N_P>=+uCwCjwU{)@SHMvGoYYDwwlY$Sws3QFX9vv0eWyX88-{^HnZMYO6N9=; z*Kb^7@R~fV$;8d)d{!ksy-<;Tbbp{0ey&OVQ~cejoa?+dK4EMQSSa~|P;9g8C^e6p z+{%C3mL+rP7On#K>imTZKHyE+z?QJdUXjemA$Grh_jw_+G_>sD?|&LJrgJ<9^hdfa z#f%;L9b_jo_l)fKxp@qvXb?{E>$#HBOv9}os-Hp?caHMB{8MK^D$>BjN6$Iw^IqCt z#?leVDb1au`O1Q8{*1Bq&Srzc4yuMw<2-~Dd7TNK>2tWLcCHtQMus+QRGd}4#6SFqGdL*Q1PDfCn(^@7E`w)sQY zjOt>2Cn@4(Wwx*;;p>HAp)l0w5iL<6^H>{O)Yy;R$~hROMD7ffaeAy$U%k6XVuv0> zhn*O6|JkX{Bdi(LcZ6lHBy?2pY?4Dxb%>;iClfaZW$V@#^W|V!+sCX}9*k_Eb&}W9 zP&yxBLWbztO>l|HL*_XjoyNJe<&kx6&obFn?&t($?4!uIU*cb5Hn0JvtfLBdT=+r` z=X%bl;ybECyLH@AhIo7Xn@^cH@MUjd9BsqsEU8ku8bIUs2m39~&l*EGub{|Ym?S<| zV!AhfTpNx4K0y2v6etFe*FHTBSDW%x;*HkpAE@s?19}U1o&xJx@%C*Z;N7<#8x_Zv zhXPeHsl)7BW+uVCom2aV4?Ng4wGW%^3z0~$vL@JA0(u7sab4iW8gPTx1`9u6wSeko zrFe>bwk5`|%3wU&GCNHR_rEeOgf4|-9NMCfgdk)dv|sTCpe=h;n3g~Pm#AOSEis~} z`tBby^~U&?Cs0Xb-LDc2MDm`me&?fj9O?F6N^3+`r){Fuh=E*ObYLiQ*AGpV8E{YK zQ>k{6{E??)>otuFl?R*G@77F3t?k-(dtR~#3IkFZD`xdp9k)l0QTvvFO2*Om$UyhC zociCRdEtQPI1Id@(vDIrpL_KBZnfy@hOj$&@|LH#Dc>bj7UmB)Lw$b&R`Io58J;(z ztE9kbLGT<2&}$trC$UPA`&UyS%4J;>RL&M}lLIiTiNmb9*5&{-WcM!aqOg$^dqFg# zwCAtEwmE^d72!){^%|bat-i(XfBQH^5VSE6vI$Ie6EFtXPc6z{t|$LKU5}qk)yyLR zCZ12Kx~~Yk3KGQva;Rdg?cEo^-ZUTGeas7Rl@sGDD4g%a$i9Hnylw2>3X@C5#+)0i zRQ)(iTZODLHwvICE#fQR7_ml8NhoJ>6vJ8~*{gRHKlrI4;MvIj z+D6CG^rs6Vre*OOsv1g~xOXH1+N4e^k9MY&4iD8lo9#>w@*^K_rL#DtU=k@aTTwJR?~Oxub!*H#9x@(PC2p)8{Q6Cle=TCf)PJh}30&A-E))0AiDa{?2~A?EyX<>6j-D@x%h^Gb1j_LL;j$qG+R@Qel~Ydj%T7`izy z-@6ShGa-l>#C0$F^pMB?;lIEFlmY$=tT9M%j`!R*2WrN zv2eS;a1*F2yf$_2M2CVncye|G_a2M?I^BM!+9%qC=habFPw`%tUCAnaw>W4aP1`QI zi*3hY>ro!BX2h}6jHMLDm{JsQ06z=j)2H@STh`w3uH_rDZZgi9(qZxk|&KKd^7mQU;K&NqGKgE2jKQ(rT00to-5@>hlS z-0P;TL?hLdNhe1#?&D*u=MEnr^{^sm{tL2rEr3vZcr3)7mpP!tzeBz0SKlnri@DXc z!xSv9X{ASr9Dy>+7j60ORM2$RMP*Ub$8%xSYt8kxD$uZJ^)$cVl#&p8NrE$#`YG{! z-UZ%IZ^?7)>m2FJx2M`X`joeeJ+*mUMza%Hw=RuD>+u5Gm++>5WsxYf{_YEV((t#OmDLL#!S7 zdOZHD(V0LILg;pF0PRK{z3q!O$sGbGLEmMD`pwykcfpkz9N$2xdP_t}<$_JwvJennVTVy1T6)j^7&pw=bD&WcOCV)0LLXBPH&K2;^KB#brno9BUanUcjg9t?3 zzI^9OyzR`aTA+V*bO_>e;8+C@=9`r=BwK(QdHAUo(igZvOMB8 z@18yQ4~43|1S=%NG6@A>qy;;lW<(O%>HAZtm1CnDq?H*Q{*TkT;jAlgovCAUs%Y+6xFlduP3ZgS0nKGy49n| z;LHd}>$QS$qSe%>F)>m>@)>4irM{!dssQWKu%8d0Wd8I~)I&OrWMR+em{al7q&4@To2|kL#v2H)>zp8f&e+%gxXTT|d1EB6)T34^~r9-}AChim-Mdib)oN=qCyJut-dSNjYcZDTGus~2Y+hl<*Yh;jgD*`RDT*E3iOyc79S%R5WoDK zgkVW^#m8)LsiVThP@A>J?b>JI*qBr9FZbrqic8CCdyxHM0wo@BqSqnnK>*IN(`U|r z+6s zAz8aUo?|`Yr0LtbEI?&M{G=?d}^HEO6RzCRO=y*F)Xm^dI zQ$I}v#8JXM>S8GJQz5k$e77W+T+fCX6#bMlzk{2;l3v^tABU zB8hR_g%d_j7lH8eH+{^{NA_7kx1rF*#qh!u8?-?|!keGtFM!a_8*{cllSa^#iCZXM zdw<_*fV{RUa$zI3zAT{D+LK4?E>#5g81^h&yI&^+t@|G}mrlHdK^0JH3`eI)7vctM zlZ=LH^S?Ym$ZO*4-Z(AG*%@DiHTJE?VzN3>hf_AGgvL9K1pejqOuCP_nk=XX4Yut? z*R|#JL>x$r%`S~NzEx(Gr)>?3bNv{Giajl(Q6cf=PSx3m*qX%=dU~!YWKI2tI8-dh zAacXv-DBnbvCv&ld}{*T-Y~WVHQ^xf;z%79&$>v(bvXOjjy4*X9by>zZRFct#)(EE zFL?RE#tW+*M&P1P-^<9JA@6<~!ACeJjtk-(A1(e~@GrgC4QFB;8@as$zO*6$-X8$7 zinAXixipJ7T#ar?Gmq4ROaubh?>y8HCGOjRyi-?n~4NzphD!Z;*Bw-GIaU z`g)nJW!syN!m#GPLZ+3NBCxP3Bul9FW@_DwkY_TGw%4A}KTvvj$NL9?Mxu>; z>U?3M>I+k{n`$KwHiyq6hcL+|>R)=G_BC^DC!qnMI7h6nPx8L;LX( z)x=?uC+f=7e)9T(O15c8sXe&4*gH79Dh-0Z*eRhG^oDH#WbRsy!85=21W<*D4g3i(=cS(;tR3ba7e zh>}LzJRFOTzC^?M^QDMY*-x z+@vj>daF_C!D97n4_35E^MY>|8-?J-fU?8*cw4)L(2E4!3pcn#t21vA7!fE!aWU76#$h!)Hkni#|UUF(W_%RLd>JJw)^^eJYy|K*%Q9h_~)JD9b}E| zCrCoiT~yGCxsbEC{S`0y4vQ6tzbYjW4FKe1(xWlnA9I%vZ(R!M)J;5Eaqg6E8PF9- z(lZUWIXn%cb)A5B0DE_ET#s9Q-|m{%=;4Ihn#QJ9xy};58hzrMc;nuiST~(psy%{%hxr-jNcuLxKY~n%OoPx58y~p8lQ|(ft z=&yEe4sxGyI+5SMt|lS^on^gI+HWJN*o1HoLN4Fjq@ZsKrdyoN&h zIf%1<)z^oZO zYill${yy(WynlPdYS{M-Ex*mM2uN>U!_*J*T$YB4+O)U9q~#YVaNdQ4u<`MIfMMbO z4z>+cyO)A$^>FpfsMxZV(f}=lM;K(qZ2(Y}egX}+Gh0nsR(a1M>FDLmt**UAY1CCVaVi)ccg~5y z5U*9pN2N%rb+B!ABS(uBrV@$0-{xk;5wi5(9CYOz43|7-=)%z+JYOi+yzbW9Xg%XA zLo5UBmpL3iB_$Y5WO0`IsrZD-9H(If5R(H6ycY9-*;IZyUu|BD;p6LQGs&*0($;zOJ(`^}I?ygTaw9-!*88;)+=xVwLB{JN53iU-c{9Mz9e7^(jY+x9SFO&fi3|Le-I)Hu%5bBm<>Y|5oMw~@Ym zqnfLj@Kf<(sSO_JA=%oFD^gLwD;s!UP$PQK*EOYk*Rl|Mx4kd+!0;N=D={-O5JZdMHrH-}sT6ksE#huh0`8uM1~sTet-edF zZ!Zr?`d4W%R=93m1;I2#0a0GK&}|Sk4+7#3FxG}R04Sucg5Vi_v~1hD`RNl(zgEQ4 z)6+#f_60#}c7=0kxF#AM*rA6byb_Sjlvyd`DbdOJw_`%ly+Q?Spa+Zm4{B+&>q3$A zre}pwid6CUJEC?E7YQH>G68-5yyQPY78~m3e9y(2btz_dGljln}LHr5F(j^0SGp3#2UizVYjh;vAQT zM{mnf&P2U;ya(^PPZ@4UND5f)%QE$=6LmQlu7VMDxcBjUCS0zsA}Y-ey=B$9*{MfD z#d58>gwHBAjYAH`-Zcj@$&X#->};%puYoZGB&D3-jXtyaxIy2Xm)WF?;bMp~o=_ZP z8yAL}nW+Y+CP#E5qc^YO!%P4$P@g}b1ANH1gM#CQ2tzB~c6=uO3R1o8j(drbq;ANP z@$q71`6|UuE(48>&V_ zVi6*(H*XrCRwzT7kVubj>Q3GnVYi&-X71TB#r#0J^pI%j0CfkOFt*89SL7y?yRmyG z8*}T|l)Yrh-$j#8@u%Iry^cx`Hz$PxC-;JR zHHkOc)Vx54VCD~LmPrdlz>)o$n%YO->{L5+IQlrP$LBulIi|QLwgx@A7Sd#t9q-J* z!|~)nTPZ-lj+N)tNnF5awIUsa#}O< zj^-4TIC=jlKvZjRsgRN_th%W)-jNmX@d)Rl%!js+(JY&^hcir@-hIO zt5;r@Q*|sCIC@7x`<*S-W+6Puw}ew4e}ZCUR~HCxKkhFkY{V=`}6{ ztZ+)`)jhr)kCIY|x)buH_pMIW>`+CFIdjbKL-P=kjju)4ZEshIBXQfIM9t+U$APN2 z`V9x&_SAy0Ex&oy)4&v+{X%Rblty+b%5=Q`CCug3!BeEC$WEI(y67MlQU5=BMyacU zMaaC}b;1-W``xlR)kVzKTBXJEcFAPJ9+HiZJsu`$U`dkyHz{K=`M^@Jd9THB)&s{) zqDHn8Sl%K^Jf?S)J(#MR=KOr=EWj-!2<+jhpaf>gL6@Bu-WeDc36W3cH8zu#I|(dO z5+o;l0n&8QsN~?%s4N4i&CvF*H*z1&QR>=?nkvK4_fHA$vy1o`5m65&jP#FUZduZR z61d_gPrQEAn(k z5N6%`XFTl|n*N6?@eLxY?Nz_>usork9}YcmXU*{a*l2$~X;Np}98(9>6Lq?bw-h0) zKbzRz=uYnTzRaCG_`f+Bzx?EiSv< zEEZ7GWLow}hU?}X>amfp)x5I)i{vj}Uaq8pnc~uae}akTzJ5xzc_w8gW3$~Xc}1toOX*^k5Jqd)Ms1rrA9w7~deDfU{vS(fI5z$cL4&LCx=e1= zHCN^ww*;^2V(q_Ot9{NzoN{q|O1MscF4jN)@x17O0d zPfu?pZ(`cxV0`1I%#@Pe`xsmN{W9aln{m1QnFRhhQP5xrV1Z)4*lvBRpLp@!n6|@WNswXI^?|isVssCQ@XJMgUpB{Z7 zHRiR~1Ilyn4UgSVcf1<+-iPM+y9LS6C`FT%133eO-}zvSfs<+rC7q3TP%haS7HEhS zoy%wYSx=!C6<_SYws3xk#Fj#t$g{0!a;KbA?3Ru3hr0M*EI#|Bof7FE_99y!%MK4v z0jr{uSK_o6q!6lZ@Go?KyS^6D`ce0(LBYzSs~D<_7fghYIO-qypQ0wDm3Nhrtvc@T z`Y|g%OuTLB;$g^Q_>G={{I=ns%W$`i;JZxQ0*38Yq&lwr<6ZS{W-2-MQAR9AHt#AF zLIu=HKE*teM+BYruArJm#Gw%fdWmFN)*ri)#y*HBtTZX&xI zFCXh|syse5^C%%)4`cwxMULD>YcvmDPB(6;9m8FczSNWEho4y&l^;{ti*u1;ltdVm zq4?;U22lHSjP!-S`gDnZ*2X%51AfcSSjWbK{88*P?7kQ~T3}Dy0dG}+=2$8~na2Oy zTK4W+-B*A6ZCiq8tozOg5~NqdBO@+gamZC|+7|x^#>@nkJy0A7xH}5JgK~Ct?Ui?F zHF}%Vb)qO*Lv569kzV}2VkaKAwv$onTKDZseFJljgMs*HOm`T*Fzn*{Xhz)R6V%SBNM4N7fFWa{@eT zjsBwDWZ5-8&@DpDZ9F%p;YT|-)30o>;?H=&3w~07HKlpv=B;nlklfLCgBI~6UlEkQqxx^=@MB?J_s_O9Hsl;`k zIy>`fg}8-Sony!9?$ko90}E4;E5%YA_S+$}E`5E?#sg!cd=%rhlDP>KsHfFG(5;2h zV?l*$zjF?qceO6b{fZ1Rm#Ana{+!({Ep)YR<%o2t5aHEx;5nPYh*=o=amL@!a3(%T zky#>mluJyMdd1=h$;L2Q(32qW*-4oHwEdHO4@r1m0S0KI)Oa4#e#^1o=Aj?{haZ6@ z(08TiMnZl$XFNUM2k{|CWMpO1y7-2>m}}+I7LSg{&792Lw|n><+YKV@b|`5-YE~pc z0>?kTyXTnfnXQkIn?bUo1Tnny za7SiEkE@70YSK$()2jEF7d~^3S>dH;y)<*=>7ScbQ|#-}J3RYEi;cyjHR1qlrw03A>lO$cP`VFrTpIY@?N%2#)$djrJMqm!od25#&&HPS_bpwiT^t8V{>mHqWP5fG z-5F$xhq!B{MIy@XYv~q)b?OP9(-O0e;lIzMv}8`|M5Y1re$Doxw5j5 zv@iQC?zpqFGgwH-J$(2uE6lW+(YpkFFtVg1(VXogQ!Zh_zs!Srf<220;`!ovC$_Ep zS2)!Hr^&WPSWd(nS*OJh@d?IKg?s%>vW^S3eBYgo3)AynvN3%z%_DcT>i#JC&X2>F z0@EL|Wv82=XCt?OXlffGjr(Fj>{VdCKW=9wQ68!wdNEV{`w3}jNi<}>aHd;a;Jauxy%%k*eyZKxzvb4nL3sY=-P@`H%}-V2y~ znkJlBjHgqRt@5Kyp5Wo%``Trdt@f;V%PwR$&Z6w-UluM;9l(y5$$fQ^*3z^kx)z#Y z%WKQgiWu1voU4evc(2N^$cYm3!~Au!)G6$!NJRHWy+!W5MAkpNino_SKYX|Y{B!7n z(+VV{4$M!8PfgJc=29fN4c9Dfo#_vI@lD?Rtl3CV7908Zjc(gb^MMl8-7g!du(ad$ zOvlVI==N(zp07e9THA9~YH=05gNF72XUxu-pz0Ux<$;4`|jgy8oaT}D3cMZY)x2O6LALurcRM>VQ_TKT1~>%Id@^>_ijCf#?O$GAXmBs;(53d6wGIOM1~A4hRY5Y~+ccL@QPK*s8x zpp4jN$~}ZmvfzIHQgaeBK5kYe=;ZewjxV%JstjLbH9t0?#QmDX-@bFl*FV)y*}$MI zo@l;_W7535e0dgS?XuOET*0<>GKj_SMXl9dQfu{6~^MIe3e^+?|hy5)YvFoJShMs6pyec%4c zvm~)a54)tC;*)E(!Eq0}XCzG+eZ^}ukH{iIN-{>8pRL%~e9{vp9_rngIcqvt#6mw= zn8&to1LC;MnjFiW$G);BW99dj?FvzLwL#w!DlnL{uusX4MQzV19w0Yym;dZh5(#Av z|8N0*B{a}mqF6*F6PO-dZK>B>k~eX?E5;L&e&~CU&d$R#2SR=Do7X%YBaXC5HUi?< za}yJg=HE3iFi3vkU3DAMD#3Tv*>!Mi@nKG1Xned9lrf{Dw+@hz5CkhO9s{-S@CcWm z*^j)uHG&N=dY#sISucbGQUH&AQ%en2GtQ+(A*r+-dN$TBpkr^RF=z1=+qQ zyoRv>w<{Lme=4*W+Ac1lkV3`E%K8S*);!IP(jR>ouig=`Y=>|?4VQOH(zeWRkc+lV z<7N6GDa%VMYrsI|p`xT~{Gm6N=e`p%JW|NUw6e16b^wH@8|{OLR>!;O5BErXj}bm- zceim`hyImcHO_9%KU~M!h@9PAbJ-SnYs9wl9vD535GU-&b;Er9X<}N}kG?P-MD7rc zIzFeh^%ReQ&EiS2yG3Y15ygk93BjzP&+>CK5f>6~oUW?WBF%e!bo@6L_qPj0`fnXj zH?*m!G$Yb>B%UsB#682}hAh#eO!0xsw?5@oyixtkHCdpMz=zxrF5hlfQ~GnKd2mxa zT|Dc$f!}(=?=HcGe_yx-W%0mVeSCIq*2V5hTF*;}M479rr6zb&(@tbEY#oaTb#}~| z19js!hj7oq*=V)LIAhD9}g!|)asA`NoEs}SU>4;2&$!8Xka(#+s+L*Q?PbL%yN=|uC| z&dNv=_$(GXuO*IHcXR8P|Ni%O zHMS)d@gq*XeQCp6A$s(9xycWR)iWn$bYta03y&5A9NERuc%R|S3okwKH{Cq-btCk1A74z<3Rb@QbYbS)P`WU&Z0p_R?A@4N z9Y%P{Q(G}FCzcxYzNuSmmKu^stxCWHCk#5ojh1^mXmp(@g3KQ1!)~Be5mYEbVN~+! zZ6wXG6W+nhouiWOLsayylo>op*rms2qquBLXY2Eg^nqz(#IKQ>Gqyp}I})4c?>y@e zYBUtD%(A>`@i#j00@k0c06!Bvwcr+RjuVdnkj;K&Sk&E}U74K_ zy8yXsI=(1u??jRfyoB4*FsE3OssV9O`qFslgZm$Thle-ecU3pm7IF8yq2 z2@)Q4_O54g3j}jtU9OOd-2~75n@|_b`qJu;Pxx7*swBmyBpM!)W|9-lndo1hST(7W zKBCqi_56I}0z#j^7`wXgHT73lZkvpt_0X7maKrO!qH9DHhhz1yk0;%bug&bRd^yL* zoZ(zG+haS$Ojk99LcAMniR=7$9Gr+-B%X2^1t{&bJG5Wxwh(S~yqiIoXsbZ}vRT3O%}>1Wj1k{dIJvON7`~wB6VgHc>TW&$`~;b8lOZ zDN>@|!fvS+IjjG6@QNFlv-*5*G80VfT4+8<2XE_5SINg~XBuEH1CeSVgXl-ViUNv@ zGB_06_4jfMet}u^?1#~sJ=Z>p+2mN}dA|2#cDfJc6-d}kB1OyOrfp%MF)`a3u=B{U zu6?bUPAtBjbhz!y#@c))r-0ZGOx)7j5QkFz))a(|psJ?L<-E%gwiH%h?l^yPSuM&} zl*?5LXn4+#V5@Ob*u4}uL(`Y@B=r_Hd7*i)pcdu@F z);u;XdwML~E!!d_H9R~4yIRmHI2FB@5bVV^9rk&cfTDij;+f)nGczv8U zZA5dWz(S9b_?0WKfR?o0slam#TCsXa+_9?LSzEi4+C|8gu(!7dz2dK5PjKAQ;0(B7 zH3%-9=iwd)Yq`u6aNrdWW)Q80S_sG*qLh@B06xz;U(78gSe~n0%^M*&*;kua|2k$;kTRr1gUZEai zQ`4ewK7%3COym8II2s}HFy~5Wivqi)JMmhly1rCdN+aE+j<-u=Z66&SIZ{Jx>+5+U zHv>+QW7Tln-7vO0xv8^4i^hJ<%^-!{2zU`boy_4r`F^4(o=hVXafClEzd3 zy`0IPo16pi`RfYdkqGqmpd}b*^TI}PmV_8mm))P>fr=S91WhRV^-X^yX;djGdGT4x^rwbB#fE(-1wAS#{3i9 zQM;qVA#Ek@eNqE8xlBQiU4Np8D)^F7J$i8LjTj!hi9q7;XS zF(GFJuMa81X1}(_i5$slN#TJ@s%1^#hzwSYErw8NiBD1x1`$37q-vkJ61CN3QihM- zi)JUi&}zA~O~=D|z{4^Y6`ZZ8oI}HSu_GD1ET+HXJ}_lE`~ zpb0LxLn_bPXTqwpkMteL0!1JUp7Q*8Lq72$FtZh!61M1m3ugzxZ9hdZ>Mq>lE^~26 zh=Gp~RdU?y37ZCqFVO2)0HIUW@lRZBm!Yr&h7)+S--ELT9Jr@vXQQVGW?f*ry)o*B z&3)c*ez3sOG@i^#0kXI&D=Rg3W*J2!G*wxB4$}5(f!V=eom`r%3T$cRVOgavJX{klAT+B zXzhHgW{{A$M6~iSw!R@&y|ECDg<0GvCaUUE@B+ zMx{e8@#s?GNsi8qA7OW`*IKyi9;rM{*WiAhJYW?QT&-s;81?DX(pcD1b-B(%eA`cb zydgZ2KdgAYsCb<9Q?pZno~PV$PT|nMcC|j;t!14JKovA_R&rYD8cIy>^7chltaVDa z0?$jjq^-}(p>NcT9gZGu5dN>2YPjYrU3!hXc?o%mOM( zq1<>ZbyR=(zLskNS?YCWEh&5-FSrBWEQzB1)TYmvNY}VIyV_M>#Ib=BShpC&Gg#T- zwkr4BOFue459nIUu;;ngBI0sQ>BDtG4&+;%@#Tg4-m5o`d&KI#lpTITdFRWnr4_bi zLv9w6^~LjvUW|2@N^u5&|9oFQ%z8xL?-{@#;43HZnp4{qlIUl;Lo@$55?hm7`|b6F z1o`&)!8^xGZ%#y^pemfJbCIXZ#``gR9H@OOQf;!Buad4dR0>3@Y?5El{M?p5} zvXC4B{m@mTxi0M5L`~3yMfuJY!D*k8q zkF9QQUr)cI8z@Wsw31EA^UAbwazEPa_JcT*Yi#PAXg!-7Q}d%vv)@0zzi~BD_xoJx zUL?0|!q-0Pq&{l#FuAa%Z>mX=fuF)J5WSBGt6re$ji5x2HWV|r2>2>9;x69jqwM(} zR3kQMn6q-{8J|zL`A=JMcFiDxi%omhbMlvpmV6<8?gr5&g>f^!)CbM=qn^d2 zA!pw@-P8jX2)JODK+*jKx>pbmKwuR?>XWh77AIcsu`?nEN%J@y4tD3Y0n-HSr7LdG zD|UU6Ar-ttwy?0ksb5YfEsd5pudS=&cUn}39agU94H=N*$TVos8v zx(6#pQ=rzt=^PnRv;wue^wDN$4}5BPFu00?a_HmEs9Ok7OE)0aOH-ar;MRwY)a~{( z<(Xrv)#Z*to=2DT33D4k=quZiyN<=~%3@nCZ>=%X)4x}Gx@|jstBmmGTVPSK!B>gc zr}#*<1NpIBVxuy3$vzy^&&It`a}l?&>oejMQ&Y5eVGP8=)>_vz)b@<2=cWt{KLSo^)eg zkLCUCzp3Sj?nrIF3Hs3=f?YbIxuvi*G#V$L%H(15QWQ=C^{Ou(=*}N;GoImd%53L` z$~D3|*VcQxpXa9E&*7-J-uU*1wdeQjZVH9A(f!z^BwO8klp~N z`0oB7f|?Upl_0D(G%`}j;NbZh9U3a*=qLyQ3;8yB1p}>+m;}_C_Rn{S%gf6-t6{|P zRyHZOlp;rDuRs-!4^xRAeLGZ$GP?QHrq5QfAqf6$H0$pH68N#5rWdjC~R^fA}m3b1_$)+`L zResa^f31h#N`HM;^M;a%=jRmFz+N7 zNKQg8La;FcyLyQ6v_s|`6-6X95Nr(2FEXoRT>dB7`t4BH0Tzu&T7WH}d2tP_23uoA zjczp$@^?6HHN?KE?zE=ltsX{MII#ke`tFPs4Ca4 z-y1~5AVdLa6cnVpOOa4S5S5S)>Fx%Vln|9}1p#T0ZlrrDE!{|W$2xPNd+U4N=l#Aj z#u;ag?PhO9*1GTey5=?GH~-V4tjsydoF_{u_tlFRXfTh5&t;c2n#WuZ0AB}Z=fQj5 zxuEQPnH2BtP)RVPGRGH|?#jqfY>8&Dv{Iwi|F4};7id9EBkn7LTQIPsclFLt!zZ50 zF!fzAT_Mw;FN++iV=$K`_3q-6$JCkw(f7gtwPZw=zZigkB;dwYEG z)PnVZ5*^kZ`>JTDXZ{Ga@C}!InCx0hSP=GSl!lmTNuo1Pc}00~qH|HS8a`Zv`?d7~ zD)`DQ022{Wo0X3*^6lG8{Gd4xyux@xCqMXBCZ3|)`bQZi zfg4U(pYgjsLyOw*sdqho%}!zFh?FM6ItytXNF_!{Co8uayNo)o_3eJqTp-b=P9X05 z$a8bQQ@Iu7QvTdf&*Ipo%irMks)DXA2V{L3;D^IJ2N-#k*}J~p_mojm=!30&!mEW3 zk5gASwo?lZ|N6_Q0O55TO0->02kbv&@j<(|&q+OyI>A^7_wTp4G+{@5qIcY{%SrNm z&IUdynhifWG&7)ODzw5}QJ8r70+hyOVtDV^SQ35@85>ozD*9w;)G9j$aV*M9wr*FvqGx+N0`PQZq-y6jpE4Gpz5 z@|X?X^T8qitWv}baR>}$ey3jHghxoI_Xg?@9T1KXbDIz?FSDPf^4zt4KZ3evYN&BT zsF{!?gqJQ=Ut}r2EhmQur!g!J$Z45dod4_n+TTa1)eQ`v@4mR9|1_JBal|D+G&63F zx=cZ!T~Xc93sBu0@Of!uVhn_Z;gp5>I|w+)eNs|rQ4@NhUEZWW2OUjZj)(^g6e*A1 zRMR`=zK7?on<;)doe!53zISkNg@S_F;}?o`vOnC}RdC{GyRa=k>0Ook)U(m+H=HpX z`_rlv^})enggY{@8w(%r>OkvoH3(Lq?@k5F@;YxbqVf!_dobr`N&2t7aPs)%Zem{U z;(Z&@z~X~IMZ3BbyO~#K5HIUE{qhSj&B;+iYCwC)1@eB}lQ0Bi9UyrQ= z@ePj4c`%uFm7?d?B(MECVa)czz{gHhosIGt=>DL8oGRPx)E{ zso+}O71S@7{G}2IA?K~iy&2Nw@(mf9hf#GfTO~adHr(O6Un;XFH{#d7p5E`9k<0fz zf2LYv@_wK3S*J4YpakQMTcva)h{$`n3T3p_{%Si$H@6#TOxmK)4iE34exGowR<&am*2M&f9n%U33>xi% z!>d$aL;#iOakw3{TwFcVf?lu#Xiu)+;$C{s`=}_0J~)kg+w{tTzlKBx3;-!mPySzmXsuMrG{DcgnBzjZiZ5WNVGS^~1Biz;RAoR$8e#;Thoc`Fe&>!m%QU3lEiC15u zwFxPN0g%N8D|d;UZBKFHgkA=P z^SCP8T_20u*3CYOl@ju~Kmbg}mnL)B2|R6T9p7_U(+*OX&kSh$ulD;AL2g`Nkuuw2 zdJkxd9}j?XQG9Y_u6KvI)gF0I;tux`X}%_)5o%H^K}OSY4}m1Jv>&A4;nYw;q@*8} zomhgc%a6a%F*4=#Bkk8_bkMplUL3ovO~Rxjy0)`s#g|>_deE7!$TH%x(JrNR=FAzv zy{X`G@LEw{7vVI6{M;%$=du`RY8mP<3zRK`T|e|?8)`rh%5?Y*{X9(L5PICe_nJXpi=q83gfI!N!K2K>pu@R z-A%dTO_l2&O+2#vRrio|VO&1~0ExEovMV@jjw4mP!ErS+I?j$08}4rvRSj^8`i@Z_ zS8q)Kk{gkeD8{O&57F4zMf!q#%`#(Phr`@OAV}6-xg*V0#F|E^-lekQ7*Eyxn7n{R za!$bM*eDI@YF{hb(t<76uX711Hr9L|9;LRs=3i9bE2tD z@+cR%5-Rkfh)fvaY2Rx-uPqZez0<2=zE*`i3WE8M{*y3g>yvJ480smbxquQpilBpR zW4F=>c-{i9je61umIex_L4^n+)MtT-w=O_OQX(WFU?^Fik>f;1-1D_J*u^m2 z2E(IrCn!l-q@s1k&m@AA)mEtY;zUOa#iL!X~6Up z{`BNRQYxna5BsfvFIFB;YzuTB9jF7;bn3+!m8xlvl`8Gq3uX!nS`Q2(abiCPXpX)x z3SAjjzq{zo;3sM)M{!ItyJ9L|996}zl$bnFCG=+AllXq@KP#N(CZTb=544-o@I%Aax_z0!SZrnI9`f7#tcHnC5CA9b^#_R@H$kz5*K)#hPYH(0 z#=ESaf08V%1-eqeJoG9N114=pb>e|9Uckyfx;{ey;yTx;s9rTSH7Qe~QCK%>zEQt7 zwU@MVh5YhYciO{YZO2E>QZ`9Hv2{-yWHs17DI1?%(s7Gb3qhzDSBbQL zEYabrb~llbD3nnZtZmZ@iVRA9N+F?z{aP1j4I?gH#iZVcEbtB~VV(z~(hD0MJ zRZn!wY7~$^O_0iJJ@=9O`(^V<)0FQw56*yX+neQ~oLruf84Ziw8+*DT8RY4vRn`XFf7+f11TdIJsxSw!;E-ja);sZP}TyhoSO4R^)( zpt}5x^7094h*exFI8X2Gs<`mAtM7Llx{ZBC+9e}%8|9EEE}CTpxF(N2G3gA)iAii3 zb$Gs9+O@e$1};jZ@R#(~IrGuViq2dD28A5$`U@KiKzTlfl}b0_fh*huzkXVstkGXH z>r_D&tuDLjxCKPec)>5toZa~$%fK2HfVI@`lWa@M?tFC0+h+0Z3Slf+94Qn(4$iha zjxXQ>BFCaeQj`qa*q@j=uaD(}`aSXyawH=fiZ>A-?2FX*=DSdoqUC^}9)HYIb{U7w zeSrk9-ddybt~okJr}>BAx&+Twwg9+D^j-Hw>s~xU>dc2bN+H7z9s-+s1Fw*eGP?Ms z^OE_}M(2-2y7J91)GOUDJYu^wF}JKGJStbr(TDg2y{S7J@SAE!y?yr9KfJMr)5LOS zpTA|)fW5>8xfEJuY(h#=sdH{5PCX=)ba$bKXK!s)M#8piQ0pj7$G%4El@?L2$Awfl^TsuXx->XaKr^eYI{ba$IV_sp#MPwuWqYFjh?k!S%9 z`us@Z4aI@pq8RkU-x#3&^3lb5ex@f?yF8=yrnAS(iU;BI+u42zV{^-2Yf`f}`n=5H zbg8L99^md&iC_dLd{9VL?EuSoR}NnVec3g&YZl?QNen)TS8R$Ks#fQF-hZk+mjiHe+Nw{!DD-jfllxn> z2%lG?JIPB*`wKg0IJL$tLQ76j;h#*1$y-+(pK-mf8hPZMQie&fOkj@GFnQJvB<8v8 zEgttA+_CJnkBS>_O!&WC42-?WX&l9|wPCY!?WRENlhj!Qg(7sh>$Z%YIak#~7kGfq z9;o9){~)93`()bQ#x=J6*NIv5#<1qcrhiOpxfmRWlq}}&^EzYF;`6xcO!Z|u2JRJSQ+Ik_0S#h~dX4ycyOWSaJW~Wujn}Jb{{n17!PJb`4Tre7TRyj({ zJ2JNAi>^#ZXxI`5Oq20bBD$T`F{%-RJ};M+HAm+X2{CdWRU8vsYZE#5;t}$8&8b&y z>Fx$wNe+$%-kn7^Aj325NTliX8CaRiIRc7L8{6>^vghc-UJ z-d|?d$&fXK^cD|Ex~igcX+xnNxIw#yF0XN3e_mhj>{~t{g?19N`*#9EnbKK<`hEAg z!qgwmtQZ}Z@QLs4+cwDCk}=>@MKsPVhUZpCXKZRbV1%-`@o2X@SFSpkH9OlFcY3>Jmhxj|w``?%V2-@W%1+-w^sLW@HL*oSZ)YNTv4#*RJXDz3oeJs&yYeYlXoP$Q_q_}d@GAa4~^>YsYF>2-R&%NH+D;Bucq}?_1Yb0 z2~*w)$Bes}FFteUXZd?75^qH&SCuMC-a&T(?QEpY<}v;6qbgVEWr$=7;M?So8oV%HEMefF$f>e}`b<5Fui7^17| zs}tY$98x+n{d}ug!D+<4Eh8rk{@7E$jn=ga@KK2#O1SZtMlU~P#Kcge8&xwc-RSy0 zd_!MhF~x{IlGk?5B(yW9$4w0^>cDKf36g94B?ZQhOGTld@}GZ52>M~aPxOe5x95My zd${;{<&~2Yl~Pb+Ru#-XD5VtedF<^U=h7c7{AP9(YE25V)3!$oi#y$x)-3E(?$#2z zxElyO`d;Y9R~ZDbshZ4z#MH|Ch8*oRPw#O#6Tw3UwB~1qG57e?rZ-I?U^GE4`Bofv zeLq+r*dF*LQvoSW6`%hp+bG?)!&y#jS4TPJ&y;?C2RHe)idf!fZ|RbQ>(;v$cZ8Z% z7AiHnojJ~Ju2HAliU-J0XM~{>T$3nCE{Lm^1-Iw?(IB$nq%Os>m{(QYi088rG|xJC zbub!#$5-Q;c#<;BCaEFUGU_(|kZompBlI81jnwjN;7KT807c_Oe_D#jHNk6jT1Rfe zg+75k?l&Q{iI&|At*UbB@U4C$fiGM2*iNjE`g2=yr>#Acj}bEsxYSulJ|-h+?_LqiQ#cJAURo#c?gNsVi-3 zNK|r=L(`x-6F+;x10+c^>6llx$drNADK!1gC(w97&fzWG^4ZK@(a$&&(dC!e zS5!d%%kdzg^IVzH%(u1jFB8-aF1uLJf`W>uo0#oOW0nJ3P_7SDI~RQ$A(cYr#+!cy z;>Lk)bLC$vL>jr&wqXc+zuR3^EBol5v~;oUXB5LR*PV!HX9h7eSwNU&<2&xg~`=%v+-K9~4wH0+Ms`Jv-US@m_u`cziao`JR&-H#&*PM&$(0 z`@}=FlBy!n1N%^*ha~RcKzm&1z5%>xRP@WBlp@E}QI$Z3qlL`3mFev3)~jplm2?P& zhf^yp8(N&i_gh=3*jce~?H5K1Dnk#<_4iLw{oTS9S@Rl&{Lw_RiOFFqFJnYvZ>Mj^ z8mB42VP!_WCh2}s6)GnGw{;eNzm78K7cVc5D%auu5=sBNaIl;Bt9$MDf%p55c0K+N zfA>%e%3c0|qbknDfD*=98wX-2hdqYR+8dM7<=f%@TuS^56pNUrOPnWFA0sHO>98RU ze*Yv5&ikpyhz}$&psEyF1BF!IprE2%IsByFRvS*fmoW)0K5O6)-jf#4SeVB9^ZANs zG8O#4YJTqILkMnQ#R}M0YUws15c)uW&-9V=+l)a@*q%<-<4#cX(`B2(o((;V?Wt_$ zX*$nhuHw%5+HRFb(idy zZ$Lz|>vZt;Nr+^{TVE+lke}5^7oPquyaVmDa+x8L|CW6w5N)3#=*gd?8|f`ag5)R@ zQy`RY0kS!N8w3pf5-yRD7~v`Y)rm1es0D5A1yOF$(*V;w#l*$Om)&fOr(j6^;%HSo z40e+*X#>#%z!Jw-R#tK#@|jjGwRsIG4k;~QK(wGtgSzFAuzy-YhIS-*2&(MC&Ax@V zFe#Cbx{Zp`8(Xm>q#KN;Z^EVjO5~q(D~4;hMFJE&7|@lcFAH#^$+29z@ygOo=ucEX znt1{Y^^af&PsOS|&&3+SbjuwYa&g7v*^b^}qfEg+55!%8roL{!u6+~vT#y1WERK|I z$P>2A>$x+YX3&+4%~HO43KmHs3@7C@>3<7?SsWlCghKYA2F{_^#@ZMUeg06}!Xn}H z<+q(@wVV(OXh_G1gdrtkkn}CQhXjG^WPoEf7k>l;tO?Ia4@aZWg!ryE78@=`Q&q*f4c!2+6h44jU3a!d{PnxuZ$m?M^RS@g4n=8SsLOwca$Ikg zn(@2J@bIfUL)N%Zqs?=SeSc*euV~nAH_DNN&vntLLcxXI>!^xxVu3GMiNl;wqf*Gb zdqWbvfA5HPN-rsA<3M(AzCyBped&O^i$HW}|5x+58kB5+_q=fXZQC9GnUGzm58j1kBi{ z!l5qbxB-wOznGH~zx}Z7RhTI70kqno3T?I6&nApKw1zGqG!#!xOiX;^#tjRAj^0oR zwLoc#sk3a`IvwQw+z!_yw^v4Gp+^WFMNa>UJrweQ^sGO0TI|n-)iC&05iD6q?&d3>6_-P~!;!37r>-)qqGWafdQvRKQ7g3>@6#}4 zX^x4tjGOv|!Cm{1Cb3?@lqUGy#rv3!nk{5Bf<*y#LMu8u$&P57 zsWqtW^bH+1iu#hu3gcac!SY{VYG@?#c&7?Q=@O2HWniPcwb(UFRQlw*(6ZL{{+UH7 z-ludUYaut7r$2Gz>rgVJ{<|?n&nYquZeZx}=Tos)F7(F?R>XZ)7$GVFzNJmy;T>3LWU+llxR0@-n4}0qVBpS4^HGVLw0!$ zWHvKoBJSP0HwIh1e6#Zb8Yr=_8TV#Ltx-`^mq5o$JHO2wPZ%Z*B3A{mP%tMfNW@gg{IOPbrr$Y)cU7|U zUL0FmQZqzon46o!aPR+tHXV27oanVL_DbFKcx??Z+rHl>TsH+ErQJh zc{nQq#n+cm)&&P16V;st!{2Anjb8w_3c8Xo5+E7aMnXfNp8$q^1xb!QC77SZ5c>T& z`^U$ri@k|yoWd_>kY&ye+U#LEkEK7$o7r_-79BGCwfP=o9qE?9lI1iXiGltfWm2^5 zf6DAN7_T6t+;u!qs^g1ye64f!vZG`7lrr6)=3vhk>evbQSQxnxXlLO|vZBXlQUz~V z;W=(cP9PYywvm2I5`uLY=g}j>0nKNdn>PMBAmOABYMQ7S_o%%BYML8kCNP>CMrimtd>0u zFqpc6RA3UVXR+gN3kc6XRtALU(Qc=7ee7F!>31XpnnL>OmuF#C1Q?@luyvvpMnHDL zO^Be;L+I>qKKq}IdiXt6-Y9NylQo^TqB^PEWoA(j^+iF}=_RdzP=K>ceuD zQh$9mBYi2M+c|pf1Z@mDDynNz&-fqT=tjGK{f6gT(Uu#L>j#t67&@X6_~)m$c=d5O z;+g?&1rPBHe` zUitW}wO^`!Z9CO^L>Oi|!;R)u^9PUv`h#F4ZopJPom{2W?d{~^;s{uW-{IU3x4y-F zwIswReMEhoZsxuAvE@M%i9g(wlb6+X{AhoAAsRz-s)xs&Epr1OkDPrg((XP?lD{53 ztS~d+MSmQ}1Pi#lToBBe0<}OlHWBL!0OBl1U3cV-RAIA#L8P-^m=)o#C{BPsIDPsw zZ-FZu7L2OJZ{b*%H*&*!SGS~Mb2i7<_bDSx6@e;n$l%P%3IauefX}3m9tsQ(C~>e@ zc`~bH?;?Dydre1rL{G4SdUtZtfU^I*_ z2_M`BHg9b{JzK%a4G-`38BILAR0?P}gzgU`0c)U8sk5o`qwRv{6<_&MP;S#<>f^teb+gh4}iDd~eUyQTgjb4pmycO$oX>NH@(O2hl!oZjtO@(9W(P;RAU_QxL*vrXcp}RU=>2N(FqN(1Jcj_`V@ZC za)al6eVgh{K{W=oZT`3B!>EGzQ!L!ehMd~O9)Gk`2l!Qy0GFRPu_IJ^c#dw%FqL`L2vmtijN<)@!TD`mhlj)LY-Lo3Z1$I zWfhLr*2k5Kjy{p-v&Szj4vx44M!Qo!s)!U9sPfbQh5V-mn^fX9y- z${-p2!vwZJm&{RDSJ&&nz)S^zv0mDYymt1(Gd>RTXtmFo9-bBq-OhRv|JgWtT!rOB zi*-or^P2YB*Z}h8=al?1;VB1-3^AGAHI?-$Y_s^?k6v+2&Wf`hjjnEmZR`vmG2qA0 zJ6H5$sgH2Qk*M_DPER)$&iePEHie3wn3qQ;CX8@G>A)(0UQJvXQ&@j+RNZ}p0J(Y? z!vz$%k5Rj$!8Eg=YsKrzEItt-oe)rX3pCq&`>`36-7kA7`_d^VB84cy>!w3FSw>lVVY7d6o+X>@%y~@Hd zM_$^d2_g)+N&8L5jt^|?1fJ^>_0`UMG*1nU8hO2+ux`v7yiVOiSHv&UYEdpDbvSD|U9i+{+(@@t|n( zDq+)V56k0hU3@fd|Iq0x!uqu;qdH6l6U%clJ6Cyshm=yo&*M#9qmrmb*&6-vOBB)Q9Hw`rF98E3{ zadkRW8FK`b)ZU^wz!bd~fwz+{9$!O>xa~%PMUJ6ED=-^UPT7F{D(1?gr)a~N)&6<( zip9gFXOk;5r-<;d&l?9E!jfi~sU60>vvOzb$(IxJec!ymvmXa%6LF#T7(ig(-h%Gf(@!eA0|)(i&TAoD*=pkxIIqxB4}b8DU;P zqQWb}jb;|)5#qtByB$eoTBGu9pJaFK663>Ho;_nrI{94wUh$6uUzr4lH5|l0M&P?I z**UnK#&SCyM((XB?9rYcGrJ|^T{~W9(L#e1kl?&pmD`dhh5g<#W$R}~H z>)gadnP#5S-ol2xT(&&t9k!2~PP(^D&)T2;V2CJG-PvX8PF-;C;2lA?{VB9%ZgZ59 zo5sU3?U`sQ`AY74x&BuX1CcM7?x1;S=H_S+YArnYj>|bGOzG#U1q*-RLTjcuuDy}0 zdPQ_hZ6xO$o{#%;R@-RF%Of=2U}7G>Hy1WPEO_uyQD^rj*H2s83~8@s$~%o!adVSP z7(UxI9f6t!x22)3KL1db?LwM;5W8@Z!Noos%e5J{LId@xnp@qcPM;<{Gbuk>Mu8E) zCS2K2kzYpf7?-5a)g`bHSFw6X<2ol<9MQQQn^%ih2paMK+MEU#4Zyv@Bjqvs1TqV! zF)8hKSH?jBEK}piuCG3oQy156x&Yk_ge-O1xxDE4NOeB{pG6$n_a6kNU&G zV#y#&>5P~o*bi^om>EgpVH7$?jXckAm1OXmh_i@^s$=psl1nD|F78qp;k;}MZEY&JQf||Mr^Z=jRb&X` zcZolFOL?%(4eCe4Rf;#d*xrQZ-#k_Oqr(U;XfOJ`%LnUBwQ7Lm`u zZY{C1F<%H- zONWOQI~M1`f#}J3UjM*k7Q=?r&Q$#N({qFR*;M)B_;;`uEUl1iILxEX-&SagH_1y% zzG|3rs7E%A4F-*QVU8aqHSkrJ72e+9ZrE;a*Z#_qvp7GxdP}ElHaPOpPDW4VJGO=h za}jpGw)?MhY?)LC+mpIi*SYuIj)Vn~$ZI0Fgzh*V6pVvKvK&8qacRn@l}%o-=#$J+ zjvigwEN$J&%>3iTWZghHF!#Iw34I_c`f73m)Ih+9q9U9(#;ECKY3W%tU0r#~*0KH8 zA%_8~Q8q!7)t#oNc`jHFNc@P2Mc5~f+LKuCZ@9KotI?y`Q;dcWQPcLXFSA#wiIDTy z^OQa$S?s1|uDpmDcGz;f;;y@USHR zc9MG~R~=}CMf>Kf?Qo#z!jAzPPZgir$liw8++eNM+J^bztD`<__SHOj)wNd!M#lv0 zRtqGChD>w>=2wKh^h^UzaaLdK3S6rosZCY=*^S{Hz#Q$Y;Qd{lxHj$uW?b43(ZA;4 z9YdgDYpMM9cC1;(lAxH_r^|&|8V?ujI>c-59_)O}&^1Y#a5(5rZO%{o)UW!K!BMF# zY3hdlfDDj?ZmC5E%3V(GY*j~GcTfi#%3=wbltT<LoJ(uDqhJz<3Hz=&7$kM$9&#@VA9fKP>^H3rtr9TIrZad;|7HP*fLJm7C6FWBZ}Kjxx`X_ z$iqFP!2-!W*~|f@e{bwNhLTr8wthK1lj0njJu@;Ad9~E~MH?A&dVJ+(zgT z$yK(dP%gPBt6`pyZQL;)f5otqBTG77K-eh+)9u*a%hKw}7dU)dO!LB&n}s9p7>-BM z>$M(}`^F>fkF5r|dRnUTKYJbq3|%H97&yC zFfoxij59~q!G<)b8y#Qv=@?djY*P2RmL!&!(WwG^rE~VvyO6@aNtFwb5 zd-ZI33DXx-h_c1s)C>;>!-+|zR7zfzbK5)H?VS@-B3jO~4956XKGKnW3gc7)ds+L^ zDqhbl#L)QTH!MzdS17L5QV%A-FPy%Xm*pOozFhTsP=pA(x?%3YGyY>86GGBHA-4yS zYjcA*9OnI;Z&swxKYS0&B#v0+{Tf(9f8?r5=4CG8SwdNj+4@n-`rZR?Niz)V>k&gA ziq8=)dB3=JPs&rSBCw|{#p(Blem+XUp^ha4sY<^859@x)XQvGHdKzGo;3P-UBe7e5_^ zCuHDomjP{HWH3}^#09Nt^_q!-N+c1te<0ZHbH-O{xu{E@yp3#}iEJfXB27N3Myjib z)yW4Evo7G7Me&}qmU-FAH;};Eh6NTB&KV!?AF7e{WgDq#adLJ-#|X_IYx4sBsVz zXUQv;^m?IC9jCPrauYvMlYe2y$i;^Ec4#`s=$qTOARl|eP!R7?iSMEW(9V|!dX z){m#OdR~z2lIZ+bfl*@jzc%FZNw$KXfr&{Mx+JlP**z690|-h-T{f^G;Yfbh2S*6; z^GhYg`dLT1XJ{FYj>3j2INGo~cZv*$B3T=zc_9#HEs3<)VLy(t&geh0{y1+HB&F6S zSZ+hag!VNsa4F_)l&3&Jd}WcEMy5`m;zG;-Se|pnB@jrF7Ff2~%rIL~B}9CwfwU#V3#ffiH;hD)7~cEFG(Js3py^-(H91&`=8tMc+|{ zdaSkH*Zjm=s=*yx?zRL3)vtcAmSbWqpW}PR>U!O!knq)AV>jVH7vuj<;lwS=P+)+< zO-NYSDNXKZNFGA-=@l}v0Lah@*e_3kkW8I+?%RlnpHDAk1^hpAatE-Mv2JU6Jo*yM z-Y;lPTvlnPUQgPUbkmGt*~HPF@A|`z#W=(H_g(*+?W9uI!Ga@SSdfzfBr5#$giZwboJ0vR)uK%>X z>fqQXUHMmRa2b`V`y>iw@|)MMe_eC@)#;CtuM-p$BocIB-I!_DflM!oKP)m+TZW+` z^zF9V2stoVhMhuhAbEl}bgRvfRM*;0cr?&$)Ua(1vBMQ)lZrOv=%N9M0nOI3)?K?J zo1vlMpWKEyPvXI3kSVP*98@7g>w{cxJ46!!R^Qrnw5PS$$mnK6Xt~ybIdxW|FRAgt zT1Lbw-$lua)`CRm;Bu#;#NpOapX=7~dS89*caPU_(IyY42#*F7Csg>UGlT^<83Urw zQi9afLp1R%5=m-b>sYASC%$zm*dDfjhS@Neb^0&{19QiBA{v^7@iQk zcUr6`bMtVvoduV@DuK*4zW+8*s`vhrdprN=iQ#MCrkgh`4+u{J zzcAX)Pk(9OOgGFgeZS ztoQysNPU~UCKXUQ=f7X~+8?3v!hgM{*#8Hwx8%lBT+z$SJcVvMl0Peu>MB9dfXr<% zF=n^_tr9^?OB);&B^AqW>+A1d+uE8wU`;U^DynIt#v0l5pqYxX5FK&{iLTRi+2Bxm zZR2idY^$a$sTD^utuWcUEb~pPQw?xuSe3tjrCmw9epbs=`{!26x2St-emq--5b|X$ zDGWybxfs=${8Om9M84TDzq-@h&C1=0i!&XuD#=GMq<5lARs^M53eEDYuKPApQc{q6 zZpN4^zp{hhXrJ`uxV`?yrdxCB?!`;CrW_=r_$rx>Ev%6i8y2)2Y@x3HMUO*l)0D?e zk_fk%Mxgt$U9+jkxDhK4^D=6#OJG`=QOP?!gUxqrV(98Pje1b%^m(B46ZGM`hneaO%0x6IT<;?yCK0`4VCQAX1m%1=ezB+R^AFUZCu@MeEj z-iXo5d%2Y{m_Il!#!n{8F@k*8bKz#aZg#wa@+nE&yXtoJ8v{#k<4(=gd@=RfvtaK^ z+D!lSsXyXi@qi(7WQF{%`%3t)H~TZj6gGnrBVj^fB069lFv9U|NQfA8%rginwv@Uu z5*_25iFE8|RM5+J^$WgFynFiK%kxFaq5WP9!ho2s+p=z!0g(oKHFOP-r?A-BzL8Rc)em>YF0|)So3VGUu`N( z{bb75l(7&$opxY8v5hN%kz=|4IP49Q2n0ZEgGL|gx$GC#wvloNLKOkcz(&XTERCRy zMPiPWy9Tl3_#m#ADY#~pp#6}6JwKG4>3q^y&Sc|C- zlb8-yBonUh8L!`4E{uHHoj>dOQAjy{7o?=$A*>{jx+TUM1as zn?q9W8c?X0yZVH_oVWE>?{SDxxy)5}bhDorA}L>Y4m79b?hRmX2=7kXqm7p1BBRiw zWxh@bT_W2*5K`7ea4T6=6KO=?QdZb091|vLx@;fyXE2WnYu0HLw>EX@dEBf*;@mzo zAUb4iL-yY?lcm5ouv1_z{C71sCCU7%6oT4?;ZQLf2p5dcM4Qt9ZU;ODI321<4%Z)2 z`}(icAc8ArvJs>8FDVQ0@UKW^D|++qb1wY|OBNdZ0yj{GoWVUvd*ucHX52oH3Q?J(|QLNkke%ZBQkj2|8ab)qSju5;iTWAhhjDx}PkEj6dx2V3(7 zb9)F(QW9g~-SI4XJwCP`8!{EWw!V|fdOVP{TH|r43;!&RPv6XKa*UHmr>?tZ5HOKd-jDK1p zNZ$!QGf|cE?XrbH`6f-(1(7xfa)DD9g0qFe@-A`;YB{YC`Nu^vwi*@ig!o zW34PzHv|P^pi~C>ZmLO28n})gP_IMlEfXTr>VmeF=F$hj;{G-*Y)Qq3PRBwg%G z?B33&uy_g*u3!Xu9M-OWxW<9|VyvOTt^-`D*a6wm!L01w+=X;o8B?%to&|5RSSy=P zFE2MZmsK2YxJn+^@|M{zsx`#BuDpEBQ9WE=wT3)mHBmh;w1QT3don}qf#DCg6h*S| zl;vkdYCKpn&x^T>(oc2lADxoTUPo{U{|Dds*JXh$6JUc)YmN{L0O?oNvIftO_7)9( zS-XU~5h0tJa<5nv4kU=1`{ShDoZjSKC?ZQyoCw1Y$z+Tgl2&@agcf0aF?PUZff#dm zCW*vrWN7qa<(m?B7Jdz6UFHz4mS%|#L+43?*w~F#p{J6a!f6P{UJ$L%j>9ho{j0=R z`w6UjLl3OVJ-BkoN#Ra^ zS8K_Kn0RMB|M_`(`WKY5Br3O8M$aF4*KB$<^!EKZkrgk*qKfAPUz9T=+V!L!1g6FQ zFi9@nmDb`E5Fi~~@hJrrDF3=TF@C#6rKFPOV=F-#o-5>%JISqG7-%OM zoSCDO__||46TH@P?Ao|^RQ0vRi#cg}stgf8mO-ernJV`}m?;hKu(a19-Vv+!SjjCw zxLvcQzoq?PT6%Z;F45)(%fdAd^R!)te{Y^5#jjjDiuCpLJl!~ZKbQu##n)oXNwoe- zR{_BCs%vX8%7!}t8wFG@qf?c3o9w0tbvVuNByWxlDdbLK3gx)%Kv# zj8-Z*98cNc);1xjk9T#n-cS|%CZ}?Igx~r2Jd5r!Re@3-upj~h6^E`w>^p{p0S-PZ zlt?tR;ASy@w zZMD{*ccJJhRt!8VIkaOhQ|RNf{}|Zmx_sSXdglWfo?bf&D8!ss>B~;G6_`yJ2MEZZ z_ls34d4=nI_dVehnb?HSpM6o3^SE5eC=)&R)gC2^P%XCb_rWpuZy#MM$6(-bcavb| z_TP#*qi-#PUZ2u-y52L6L*nb2s-Mivl}qwPog48yRf0{@9&cIC3EN+o4GGpQC*kH% zAX99JRjl3Msfcjcb$}xC)6j-}$IBs=W_PNwpBe-Dvx<+o3IL3V!%j_*e2uz(oBOJ!3tXDbZ)>kkG_ zGO<*Tb>dt~3OW9ABfp$vZ~ncz0F3;n6!b|--~s^wL9Y3z@cQ~XN9T$7GYuGy83#F&voDbVFAX0&3y3VPnN`2^~Wy{dJ|-9S5YzyFwhmHs17VXsMlJ; zge%$&prNccf676@sSYIcC%c*d-8(>)z4;Z+O9WA%h903oOpju2QG_EL9q6l*K_Z>G zI+Bpcd(mSw4eHUF5S;?cmc-rl=U(8IQ2okS{j^!TTHV!k#ogTPEf$GHm^v;k3m{`~nR5Z1D@ zvt!c#`>*Mq08OfoRaM)7h=*}IK05L__1fqGpg7ekXi`+g2xg>c0VLIL!VS{L{dxL9lL4p|_s^>&=fszfd17ct2h{f&O#o2eEP(LQ zR94yYsWFh>n1;Onln8PU7e+$ln~%oz_xCF||EU>>O-Y@wY%#$=z=NhjYEDp9l?b#G zTnD*HN=iy_t5_b{{dpu*>yjsxw}MD%n)#k|#X-Zr*0So_3BC8%@BGKrjk@T+U%&XI zsW17?BWDXcl;w;!5YaqAP_OP8IJ7cMUz$~9HggjGC#3yE^{e#f=jVsA;OUCldax9f zGJDxwp(yhMMgjm{mhOesT4Xh$$Vj#YL6p=5zrFI&R>p%_efg-w4t(LW^&0+z~?asZ&aN4osYyK8k7mt zcMUtQoeFLSJp^(HiCq@XJne5(Fwz>G$c(VQkj5zqO9(YfgO)Z$dJsUz2Qb<;4Sz=! z79j-%1@r-UfcLnz*(KX&aQL7esHKw0Hv)jBCBV8XE-TYmB?Eonb{HGo7%VHimr{q! ziyMW7_*>1=J3gKgU_nsN{0^d|XbRct4?!6$EmWWy)DiB1P$sd6o8Y^5??TOx94Ncz zu%^vz(tlf8S~_wYw%!-bucx5D#rn=hTEKvlaK|jjUR?u;7{Hl}1!ukNYO#4x!pHEP zI7365WjVi>mz85@m6I>jiNS)kgB>8|G`Iw7dmL&FV8nX9Q;?FjS|fxctK5!S0tyC~ zG6wRaLpFms{ZeHcjg`OON*1P@qkUFj$YWPl(c(xzhDiGKVK-*U-evO^?&0Bz)u5EW zR1=~pbA1S?N3cdu`)3n_g!yNsq@vp;fN6(5o_>=;tAqX}J7{f+890E!*C>_IrNTk_ zn6giwJg26f!@O1c%6CS$n3$%=zIp&U88)b{e#|uuAb&_mhy`dpe=+z@b-WRed`c$+ z94+kQ>B88RV|Di!5|mDA29kl0QFioTM>v^7}6mh zg%Kb+fs!$WNg9TbKuj9kSYPLLTz~Fm9plz;yvy-ra0v!>1c!wwj3aeW>ZPz$TUE_L zjwg_WE6rkAWU&6&1$7}$p0v8`fLt04KYyu|b-V(!K|}ix5c^p<8sZ{I*aM7&4@xFs zkm9LoP;ODm6#ORyX8vD%eRn*Ud;kAOdv{ut*%peBY$`<>G%0%|duL@;l1fNOb|sZf zHt~_YNA}Ln-h2I?mrkd1-{1T3>yL9foyx~`y~peIT(7&owl3Zd?U10L;K&r*t>*|d2l1?R|x%m2XTWN@A z7hgni?>w!WwF9qIG~L|9UlppkoLnk9udf(t4^3IRXsF;Cxb(=C)4cpZ^T=$VQ-$ZK zQQ5**g{6H`67wP7TXJy)uHUfXJr06PzC6*F=0M(#K$iJ+Q3KDB7@|PvwF3i#r6cL_ z($?>!otEJK1E?#W+TskDV(u1H#dMwWF;}fgaCPYjXo4kQSI52 z4;}v2Q)kYs!1lY>S6$pWjr~Cwmt^&l?D6KmBPta7hraI%eoft&A+3`BfI^~%&Axjo z_>;d1Towp^elRoEO&CEgb2oKVs^eROg7{TpA`j%JgRvQ@+BFeGsGM`Di{RvLNx=#C z_A#7YMC1TCsBT;#gi>2VmMI`o5DhiL`!MKzBf=y_0)(s!!lH&i-Y(77kP}f9Kwo+2 z=+UAP&Px&!hgn&jAq6yUcjDeoM|W9IugbPA@UT2)qmSNxtezrUhN_Vv z+a;fnk3x)LhDync_BGd|yhy^5fpB$>xa0(XN%# z_G^`6_#%ahB}^%RUJA=1oZteAWK z)~#$)Upt6(f?M5Ywucev&Q$c)<5$Ien)Gs^X*O&Z#g5CpE-83URCEmy=*%l7DJiL7 zIlzUuXK}0nF=tvr0BzFIg=ry2tZ>#4Q*4MBMx=7eJr~xkKSBKNh~!>oNM5sXtw9V+>9&thHj?fR>f3Tx7!BX1jCX(rN^@`{S?;FM); zYSF|%khdu2LnD$4dSW}Q2jL=8}@;)SMHtU{iGI8sTd)3p8+-NtFMVDn|`-LjEZg?scZR4K(JgpR1Y zTRU-JX-}wt&!o;p(6V*lran3I>hHUMrtmw?iFD_QY4j&(C2|!wWbP=!nL|+ZAkLGW8 zIEc)D7#$sACfeatdL^G?Ix- zu%N!Q{Fc&O^#JP};=*f)*LAWSo$Lbb7*ptS%eJ zBkhvXi7K^-BbRlXUaV0 zR+d=A_h-+~ufPZ`((yZae^H;%cmNdw4^M5dq`7%w#oU;mnLJWauKhBIR`=YuC?(GH zLZ=ZI+j+bGex1SOK32?^1A$_RbaT<9hUl6n7Z+yEU%q@SW;FG%Ak^_;$YLf|A-*`$ zW}%KeRuTC%V5x^QG&3WEwub0A1_i2kZGTI>aP%L0OHiipRqz^j{pjl}1eC+VbuwPl z+Bt|Jx1#Arm&wrup6JVco_i{KC!O&Q4lX46eNf|n+S9mV>3kq*+irQP2fiIU9*nq5 zCy7Zwx?|Sdw(fy`D(T0icZawSg8n38YuwBJg{SfsWzEA45Z1f83+wOfT?Y!PCEk)lHPpL{75-t101OhEIDKV*tKj zs*N6bVUlBY`g32Q>s*3@3%=IO1uezJ#aGK_I7{w^>sd(;V#M7OH%7_VvTu^#1hmA& z={8hf?Vk&*ApbO&wZ3!bLwgLIz!lat9i>rt@z^KCxF~E-x=04w`RGe6#=Gv>+1Yuq ztFA%gA0`v`9EYE0U0d==`QB17IIwZO1RI1Q7#D@^_jA>_LkSles`O$M76mv7BvFYc z85YdV1S|$Q#l~`~7Y1lk5xz&Uhq7l`!#r4|`I-Y@ejTyF8HFOYTJ*xN{o)zqsLJ~o zk~w#nXR!5xkY!G!qg}hU;z3GEit3zfXw?Nx&EUWzcm9zIG@N*@nj2G>GIgAN^Zwz` zc9GMrUhb8fzBV;I)ulcs9$lv?0sRlCk>_*)5jnoH=etL&>2x>q#qCYJx*Jj}ui?bq z9A-f;&BhPXF$}IgC4QQnf4daL6(FDKU0yk+Z;#EI?frQ1VxvG~-RPjv8Slq2){`L@ ztQq8!?|nn<2CJhdUe9sMNu5uVj~=barOc?S%b5)ORO&gh#*>W!= z3sdex6;C*yN5;l5KEt&yBE^}{q$KP$+;v<^z79SZ{DpQ3{C!m5OG{k1Fl=YxsG=2) zsX%{49C{eBMf&k@E&b<8v%eS^N?~u`&10}P5L5V2(_rg-(!I}8K*q~OfzOVyJZX!w zeHV%cp7-XMbuOZKaEOd#!%lCn*N!xywq+M4N6cbt6Uo8;{-{jy85@m|bP_M>L`}?5 zda{+F1kKVkj_sL-mPM%pV^16$Dgwo}A}l!%{y3+U6n?l{aZWaM>OM)(sXK#~b9iKA zAAEV~>FL616GJ$q&m$9h8nz}DuEA@zLhy+ZdCpq}$NvIq@eO4SjHRfUXE~?J-%`Nh z6^2}LAN{nt&f3Fn>oKzlKo+2aAGmu|TQlq%(K@m^xjcXV9;fmpOid<+k)h4_0?+OJ z$^P3Phj83!-|N%bX4u44BPW7!3`t2zn;3+A35#W0n+gt=+tDhk(XK}h?EKYoux?yN z{JL+*$P~c)N5r9H7u|)Z0!7C&dyG_=CGutg3XcGJdFAPu3mq{|rZr0Qb~dFFx*D|d zE|rz-I!)2UzfL$uh~XDYl4e72tRL^D4m~9i;IjTlYkPi^qEjy0{x{h!B`WRXL9st; z9{OK@vEm^wf54C^na~<{-|)WrKyYCuYcLS@C_+lH)2~Rq^|RS}7e^+3i`D2gh~gFv z*+KZI_Or5<1Pg-RMhnM@K`yz>u@wPkL(Q8{UAwj&6%a8Hf*30y6)xjZH#!M3<6X2Q z#6v49E8}2!+>Telac3Ql6&j+lN3z9!5;E<(;P$)nyo;hFM$Xxxj8(1mdv ztJhk-p-;3JFBhwe)!Z@FVk^Ak@FfFZs=f=0lEMooaJ-5!SXXw&d|(Mo9+6KH2Gx!E zjTw(tZ!B;!j20qC5Xr4@TOGhrg}1RdQ_3luD2zyHr2KNj4*M9Xzy4z{szPS5V(L(q zp_>75sViFyc{qR-GFg8dcCm zjC{6TnrrF7NuDJ9Y4t|RM7dj(uHJEgQLu=j>vXK9TOGi);KU*xAq?EV-HCZ>nSZ0< zvzyHUD(6>EglaZLq*lcJOF``}Xg76+;*BXoYQj z(a>OjUk{GPsXz>Vsj9I6#3LktK2u0-@1bAu_R z zI_DY%9(gJb&WWEt|9ND3|21Rv5?_po*o@<1z1=qgtELSe-CC?$9g&+cPa}b_?h`mdI&pRXql&OS) zlH04^$2f;wD+L7vuvCJhtkq_uV>uJ}#-u-aEFAG+ZQ3veJj*>H0%x#y#g zMoXF?PV0{Vf=x%FqM|m;ZNXuGGgRs(?_y_%ZPJ*cE>3VfP;Dvp#EheylB%~w zsdZmWZ8MQ-zbglQ1^|@~wNlji>6l_g+z?<=aKJ_sZRrGHBR?sC*DN6+p(bH!U}8ed z&?3M2<$b;8xNO@_5(QiE#cF1h<)jUo?OVzBH0=?rb3?hvb;&)oah}c0W?Pc!YoS62_K5zCSVyQ6u-DUdr{Y?;tOASK_Def2+WfZP$_ds>9QfT@{CXyK zWsz+#w*(O$1Ke2Hf$&&wqo#fy6BCm;;+<^Jx&jw3(ZZsRb_U{GNPqqJ*TS2F+hjGUrx9aR%rn&A5NS3o2|h*3SzJKqA7 z|Lxm1e1;qR3UnByBZ!EGAk-PLT2;MYa<=0SxN7IZWka>sXX zBEn;E!qu4E|Mk?kH+d!#GP?+Q*u79bU%PT89c}!w;k>SHkF<0RbGv6CR{I?_HTg+F zf>X(hYXw9?L}!rX5)CL66%|Q%km9XuwNCXI6DS$j=nFL z?ldqm%Fv2asNTK&xCRm@s%2*Y=p$kV`+=2*mlvr?m(VvRVqNv}2`)cISDGiwB8z>9 zNNB#_7fzkaMmW6z(p2u+BTIbQh&1$S^!I=6J4(56jUB}iLBpK<{Po=234A5U$ia_} zoOfn;6M0;?#>A~K*o=4<5bjl)@GXB3FJE6@tYUD3Zucj8ui`Yq83i;pJzsoD@b9fC z8@m$cM1(l1=>39%ekh2EPOzruUtE(M)v_pDTL$NKAK&U zvT6LWe|;a0d5;6XG@nQ-TurXr(g=dNA6_h~wrSy>7B7Ey8Z^~oyp zqd&x51dWd=uUv97#)-(ujsMX$ZF8;vaDV^+u6aHulOiG_2=IP>KEUjQ0`sE6Y;AV- z>d>nDH;uz|L7WiNA~w;SEE=5s#+KG?;#sa^sTwp;-=@IEUSaI3>pAF^n(I~5s@a9a zfrPKQW7&CRx4F2^dW&h~Ua{alAw4tQ) z)okWAyH{VC`e=qmk**}hfTtnZdPa@L|%v}DCt9dLq8H! z2Jm2lr?`FlHU{EdI(*&B6d(YT@c=s$7z5I>!swpioyTd{x zbMC2?KicxY&w=x}6Ht(m`AI0kfAD5WUtQkD>78wrU0n@3F%6Rnrd(^1%!ks8r{|Dz`qQCq*KTm4&AQ;@sDe6 z1_(@H7PeB}WwXa$^fiz2GIi_Z3a?ycpOE?K9BF-kbz#JePwj9PW5);oPwk(-2flP1 zbLw9_@}!9&`-=I#t?82%61^6W^tXP?&-N1BxOj2Ct@y0)!fkur!RFMI#y~%S)5Wh2 zUF0zz-g{+Ms2L4$eH@+vQNo74|{!z7$phKaVQF4t|{s*ZaL(s0xM z+HaVyU7*{5S_bgYt?>EfjrQvw%j<1B#t&gKDG5}GW(qDSGtn2jPeaCn$gzWLY%av5 ze&xz3sLE%WhHRvznQAtutE*#0WaAF+0mR{D{?l5JchB682l5oP*_j@$iUP&9^t1eZ zA6eJDZF;HKI99!z^qjS0#H3??N;IprP}=1kdV@5gSu}xl^BEk~^U4&0`{(MqX5vs2 zi~;0uPZBa8R#LDUIft*U*5I^Id0o=5L07oDXK=7Y7)BL@^5i0n%?p{6pgnJR^dZpa z03q-o_99kwluS5GWgiM`$gyBfh~fsl5`;|~L5j1OB@DYB5F;9|>e_z}yn{;;~zTo3ADh(obj|3TSMUgLH*}I1p4+)vJA!IV?^< z3$Xcij6*!g#I)XQD#S&9G0u^Z({O6?gTMZQ*3_}&H!kc6PoI2S?955fZ;`lj=OB1B z9$pXIH)43sj1s=Mco-BFTeP~bcoB;XqsqbQU2mPg$?j9k!_RMwU?g?~Zsxc!=sO-_ zWUNTDw!|p|`bAzh=7XV^PBKy?PTRM!f8f zQ6K=Vot-ypG%G{O%C6Ac&KsgyIfn}?w5jFOxl>Nvlf%I-4wBjS?O1EXq+2N&D-%P* zcewevO5SncBA=e~RP>D~06vYa#(4DT3;af-;UA};Y@yStISgzaeOd6oRGimVE;mp& zet-cD@7r0xg`s95W2M<2R+fbOK#SMBC_W@>bl)-}y4!EHI3O(OF4X^kA zGJW%i61~SZ3MDXvyK=PdSB^za{4BR4#t;?U`tvfcyJ8Jvfy>Tzi*NP%skW5a9Ik zYj0Plq4uj7E!o1=J2&vccI-*+tu5hA*9&jU3qqZ;+bJhmLgdfR4G~}7y4Ubmu3EEZ zg%XF?8&35t=o%h7I5gCwI0LqL4t)%U?wqAllSvs|xTocz`$aM!@U)VMZ&Samugs;p zs1m~NUUcf_rgribw0sR2$EGzRPPxIs5S*1tTTA?_;I^tKM9dz^H|AHP%?<*PjVQW? zxEZVuWa@uF0-Q8|s!YPH>&(|dj{3>&nr6MxwavQh9ziSbjt*XSoj>-I{mYgh3ImN| zJ(OaX`+XECqExaP@VmV~w;I#>lYx(78qjuX>I*n&dMbl=z}iB>@N@i10Y--D42a>x zK^4r}{d(Z2e6l+}ErA>Yb!FD!h7g95RqmGd*dO`i`GI9C746^ZLjaPo@^EP$0tDWI z_)(KLw+GhKRQON~UH)Lx{^V9c-8XHf=$F>ajt^uX^>Fm)jdB{-E?nJ2a>~?GNw?gg z*%Y~hB+u|n2|w>S85Eg0=IQPpg&43c^aAI>h9e3JRr$&vsPR!wmH_MdhNJOzloAE% zePTLVoI!UG&MhK%602qJ-I$~dQw#%U1e8(T^(PO@wf-GZ4`=NY4Rrnd8Q|3Ze$&;F zw_~293?^@slq}^dikNJ)lHASRvLk2EMI}$r_+8`4L4}E*+*3%E19bLv-=id@ zg;Q6Ixvb2)NX|ofSihzB0*ZqWzf4G{b^0tCbQ6diQ`s<}bYH>y6F5B7rJyaO7`4fm zOo9xAh}p0I+EKr*NM;Xw(>PLbp+9^2G)i9BW}XMa;S9{4hnf!8{W(KJ_FQMW(0fxq zCrX-9=+`jX4K$Hozw##cB?tiCjdy8kY&=fwWI6&P!<)*=8w95a9fV@2F@FdO3^2{r z`Hlbjy6nr2%4>MMcpA8I357|&3T`Yk7=-ZRq+VklrX?exmk5u>Oi#Xo#KAqjn@A`M zSU46{T?9o@suO@U*0gOz-oKJujI6&CxZbsW5HEn|V>)t#5J$k8x$;MU|BKL279y4s zRC@d*VS7y(N>fGKX5ulb##F-nWk|7sbmZ&HwMv#`{gii2!+L|=WjqQ?+6i-kyGOsX zwN-FDh+mmfdBJ%j73&?G(*R*&P`VSCJLVu>03Xv|Khaa+0CaVX&@xJOEyS@1ru{iP zDqs9++@T;x{3i)IfmnH*()?hXK8MCeym`OT!ip{_Z?Hb7G&e!B&47ZZ|6Mb!OsY?X zp#x7c(?e6clMn*jMct=egtu!&}g&~=#%My7T>=%2=!(Buj2&TRjn4*gmw}g7mu8(Z;8LX(p7|p3tG@cY z54S*$k;Z^OjnQIUN_@F)eJu;eq|*A;YFbSEHM(hhE5-B}^n@uP#=nowJbgz~0(lEUeDV5Cn^vXY z;b2!Up#^^cB~(IOPgN+DS)<{=P77AgDwRXr0|ILp8L3Z{F-0ab4wvGM8)! zEz8_EuYi?+;Z%rpIBn5kPTo&38f95)r6s4ROBLyX3W!cx0CKu)P2*7)t{Z>r`0F>! z{fcrb`vxX-na;g?* z&s6oT^ynrJo#kM2@x2tV1&Vs(j+tPc1-jx$0d{uw`H4tdkVf_pkc@(x8wC*yRiKZS zlN)A!qh13JzpFdI`#B(akh7-jF?BBh;@z9ab$*R(W8GU5si;iz5A31V_1iz-+H&&O zVfG(ThgHf+=*1!3h2Y%DtzyDDF$--`nD=p)Moucy=Wf}6eEya!vhMXQ;dRB&=UQ^K zn3`I@;_AeB;QB3t1tOsVEB6Yv9^9BBDCzT19tZyU`oTb+6uuovOLWxsC)!Os%TEmM zE>aaPIiy?4PT4ZK-;c2|f7s;!MU?xSnW2q^wKG!_Z!X{UmG?`Uo47jeZxbmpCf#|b zYtNd63CW$i9St~kA31t*w8DW!^Jm?Itcu*9-0g5>ezO|VlRnPRJh=P_0Z}c zY=un zJfk_%0`yUB-&#jJH2$!T? zoaOr>n?<=)!oQ=tgq1C8qWEXJ&t^= zny3Hpny=ih)eC$SWfQLJh@b0SpIsDqQ1v`pr**i-a#*J$`+*r_S*Po|20bX>9tCBo z)J%HvTbS8Ixh5vmY_Cl^lu0R^;}vVUh3x$PV(4GaY_|%_U8?_@>&lH+ z(RBFXBYykq!)zDwh4W*tKOAj7RnWFMzHRTRCDzpQb3EReOViIQCEw35b@DJQnRztP z-gUG;$F9h}R8E!W^!!jyCfmrA*1A*A>WW-)w;JDB)Ve0zd|u(|KK)1;l|ES&cGYN^ z{u~b3qAg9bH9jJmjb)xDDXO|PuXP*J^BU&b?i^WQEd8)my)0S$FU^3nZnOEK)E0Bc zRkP2?&izbn87ufQqhqbQO-xuuWcHI!;(^$h%sx(&Z$~p4>|JL^L?w>Sd;2+TjoN!Z zD6{XnhoRY){c0m?=)VRs+`U*w_X8)Or3<6s@Pe%yHUnj-M`v{TmTtwW*&7~9-c&7z z@O!Oj%tt!M6YvbaNzL1b1OyaRx0KH_b3$|3 zT%XQ>=P}tFV34}xS`PG{2xz^GRS3$MP^m$gGVg19??mL+(G9rTIw9|a{JcRzj0nAi zNa?7;*EfiJ1{Zx^Ts(T0Ed_*y7qGhU$9;JWAE9F;JOOhP+&;HsHMU{pGIDc!e)w=u zcH7)5)X`wRx`&g07ejeY=ZJ3SKWpNx)F;-#iv;rFJgNgU_k>6R%ukJSyl3tk`Sj>F z*0D78PAQ5EYWyqe6K<;BUo<3H^ic6`_dC^aT7G?Mf>@I3iKMtgsx{;*UDHh={c|-Z zN%S%cvlnp4y2m&GV&<@rwca4e8S<4u4g@A3v{T zCY@m9!p};&ARtCMCT{O_2oss zxZyC%`$fpJ^2AbzKVCeUZ{}tP=5XZTupq{U06-f7&Fkjpr|amlhw}+4Yy$dGR^CV0 zavI{llik4ZofQ*{fiVMU|HWPwe_We_eaZ@f|jsFRK_N$PO za-bu?2n<4S5ZSO|IDLvSy32JYD{TViI%|7HpTqNt;}@r!R_AJP zD06taYu0?!tb1R1$cbjrN}po0b$+6G+AR0AhtXu3yfBY|SF*R#b5acvL-?VvI7qDB zzoZ~t>G*I*uJOR^su}KhYhNQ)yOLuKRT_gab=}c2;j3onztV7)<+_YJb{E8k>sYha zu(^ns8XIP7Fq?i3IM8P-et9gimnc3$-4xGRSzioDMP~oDojV_YQ;OnGvp_>qgyB3u zozUo44N9PHlVdZrKM~(-v}ORe0-ckH}t-G zbl^Gl-vwt9`*IG32TXFrgqoh7d`N7i)#myZ=+eUn)9i)m79JGYG4(HW(4^U#(eW>) z`9HL^t*!3*?yMAACDY3DWJ5Iyy>uPsW9GZmsNA1fa!nmL(0UO2O0z8M)1}7p8J0FI z%Ndu{bSs6ui?_Bfs0&Ce$y$!24+t|H7`SuXo!|6s$!bcy*;nI|s+a+`@<)1bpu4B2 z+Um|C-;MDuF-ENQyB;^m$+3IP%l%bUQkvOrYs~xJ#BDEMoK9hgpX{_&H)T#UZyDvr zQT-QHT53&&L(=9QM#khE-vWW_J+!;RB_AZP0e{9D(ClqkcjVZHwXfaCfvcxj-o2lv zyB%;*>lOJxsiaz;qThUok*337B$Pwm*bC8GRm^F#y(~G-baP|#r%wlsMaoKd<*CUo z7W){Z^}En*YGlN`G_!u)5o?o`KH8BRc=b%*@BakUvz1D z%(OB3xb;HF5nHk9ZX=%2`5WaW&uP#b?CA<{T_$rC)c(#Oh^-XcmP*6#J z{=Sow-DVOl^n%`i{tSkHeEU9MeAjfaQIp#S=r<1rQsLk-t%2b-31ZG$u^O(r{h%nz zkuIRsLfBv4@EEGZ%>lz${*mW&x?wF6HP8GHB>Z9ioi@d#St^h+i4eS=peFuC+;SrF zK~{Ao48=;hmVLVC21MT#Ck^DEHr36b?=5~;mBLGB_B7CEy9Re6i|$&!N&ni00J?%4 z#-;q^V8C-F3G16`ON;>zc5Y&Bf}mLIvaZ{coA1j#Sia)Ae3s zJQgT|!zWg6dse^@oxbCOW}b%4QvHpSLvs`@YZS&!hZj=FCqm4G{b$DY$8{Q9r5+X! zs4UG_Y&!gPiTN`9qM*}PtMe%@^RJ8K)HZYSC(7z8SSf8!{V4gtOObS&m&!ux=Wadi zn&^TFiZ|S9d{b#$POq;&c`5%=?ChJocL3{_K zX+-%;_?rguI)U|!wK=kV2arMdP*BF=f9ymCrH}55!KM^Wt;!<}>@$R`qFjt&wGzB5 zgff-zO+j>k>*gs^0wx8~`aXX7@})QH!{0TAT=yo+w3fRA)DcsUpFNe6Y%|OHo_1Y2 zMLXBcO@r=QYDQ=Zg0o7pk$mRo=TxVIJCv$hi+(df0C<1^xN1saq0ah4re)>OUNUuR zVFHK6p1NBXip;qqr#0GN?cYHqdAF^qlFDqTUOoUjNYw#C&0b> zM0RFX_@%E|I$c?HC+1VB+t4-Fw+9pn*qNoya`jps`>bWn>VVVkW}oe$F$6|9Qt?_& z9_4PjN4P@O+OVimj;%l))nMrm+76Z)I)$3g`bI_^P?$183k~dNQb>FGXba}gd7q&* z#e4sVtCX(DKm?tn#Cwr$y*Q88(1(j{-!ZuicFWPjf=a!MDMF2Yv*3uvPmAmv{l9LR zlvjZn65UrEkvHG$KQS^Hrr5r?sIZxBjxNl@skLYD^!YJLPcECAR=t+3*IPm}5*=eW z@knaS=Wz^aH%`f1shXABBIX|uQoVGct9u>H%~n25(D)of#PP0<8!5T>YV2ti7XJa`J9>YCND&)ZtE|Z56J4TY z;lIB+Feg6WUOoy!R#YyjIC#)zr0YL${0}m}_)~~g@=)SH0{RVDYMJNbJx#y00A*Kv zZR4(&V-yfnmRQ6GDEs4)CIifs~P zLC}DwO6C6X^SdVhfAnRt^O!3}v~Q5eymEE@2Ax)wf4ll?~Y zn}>H7zKgRAX)=Q%kVNE8AzBQ)DPcsoO5b@q&5D>Pb_Yms+{ur>A1GGI-)!-&J$uBV z;w9m@h3Nx=dHn|QWT4J6u^QjdNwDwUWa6b2o;+0%d^0{@$l7_$!_9`KF-wM8>|RXl z-cjDis{Fa3QzkD?M1n{m^sB@~F2a}ul(2nxH_#ixsvM;(0!QC~3MlgrpFe-DxA=YD zLyR@CJS`-+GN_I10ifQ^)F<`|3p0=~)I>#PKcTutd>RSL!D?~;IJJobW8%{~p#x*m z7hgAIw}9I4ZIWNF(c*ice0}BC;jY%&8qJ;GUx;X%#EbB%bC=j{nE z1Ef3CYeZ|}&pZd$7KMw??EA0ZUe&n$lBc+tTYy(4 zCZyuz6;h#Nq_Jf`#L7nkr zX`I-;YZsvvZ*naoVJsa%{Q;aH95PV+!INPi|4~+zphKVniAJ$xR4R8Q!6`*raJG0s zxP(DmeTY#ukJoSY1O3pP@%eTV(0QYSe0Vj<+wbeUXcaHPN7WAB5p0N;z-5?2sscB3 ze0J`OJtCO@1-$(&OfR6OV{{u%%6^rOR3PT?K?gvL62oj4cJ}D{zNlZENl26ApKqb} z^)in7p9;p>2-s#;mSJWSI#&bms*o{a?A(`#YpKkQseQ@@9JS30+DyTVDK;!dJ$|XU z841!6sE?JEmCguZrvEj?Yz8sVn0EIOQpose*wzcsszdj@mxX0xV7UfMrL@>DxklB?!iONK zINuzVATs?}+Ks$B9NQQewx$<(aq8Bjm<^S{QcJ@6I!;Wxv*-&?fN+jj)*ymX zBt}!^&QXm{^BK7|#a#?=(XUuFw6x1Kbo=BL{Vn0m#XX54(6^jQ`6jQ)A9b;cjt-c~ z5r2!WJ+Q)v<3Z}w&~RyG5i2fIAyuRN{c5O4G2qg@XXt+_9Ngu7;2ZpF`}S+vy#D}j zPxGpjYsVH&YS#=4nfN(?W)_)uzEyA5`-}Sz7va01O>5?IKkjt};z^)GBtjo$79e$x z^LB8zIfgSu3^bI1QOKe)11=sSUIgYtn32p@bNoGp!gX}F((iOqtj{r>Xl1xxd~d?U-2X<@^g^9 zK`ylHPyc<7ou^yA@0tI;#jYf`oBdK*qo#8HmBSJpcZ`XN80iY{eD9D_x!70^yMFJb$Ym!h`qgW<8l6- zFWR1O-aN&`yw|dN!GV|ltdp~=(t{$12|C)I@TLk0(LeCwRIiVKIKXe)0nu6^0F+b! z4|A%jCPKOicKr7b;a@Klf`J_PId7_^;8K#e*ZFfApZ&nhw?gJ$|Bae(&Hu?(-TZ$h z3jD8Sw_F(h>#x%LYgxEWNJoex7!m{I^sAioDU3xqx({&*VE$Zvd}P<&z99bwqNRoe z{m8x(XF~?;ObTao3TsUKMD0Nv4rjgR-DEkY*?c=SHll(_6>A5kj*pGCfYe}%($_jM zh}_g-T;Y~`ygc~OStis{f4iRs#>HGygvq4%!@h?v z;Pm3f9Sk*{?~$}krv5#>EYIWuvD1Pp0UkYbbUXks|L0E)C}7154Wo~NGBg#M>Da-l z@HG#2k2&=E=Ora0!9-dkeIf|ZuZdrd8gZ51N;KF9mLSZu3b2qgvyx&77mdEgs2|1 zLw|jvVJ;oJ{UN zo5m0)_jXt^Ay2)8ni%pYQ(YqK0cAuc^bQes>;GOtMt;9^K9_bF%Y`)FKQQUP$}>gP50*I=eHsjW>-$Q6bmY?ojZ5BKl3WS`$AMTs67C> z8f2qfAoSAEi^>nuO4mbSOvo^BBD45-W?zsf;5DjXRf@Y+n04$h=ONJ@QpQ!Q-}hH^ zAMc`+;uns|FS1wN^JYUqgt18%8))D|3vImLOVm~3UZ)DjWIu||x}m((wtWX<$w$L| zY@Z4&u#m1O9cA88$ilprv-`}1TjU|$)ib`42W`Pg79@@Srb!vepGPAt)j%rZumIQP5c3gDFEFp@^<>mY!IkV_sa; z26GDm*|B5cjosa^0Kk<)^OmB`mO8#2_G*>X`MvS?#*SWL`b(8$YA=qFi#Y#v1Niy*+uMw+7tNpje~N6^*ROLhYq1u&Qz+L75Ktw1 z15s(UTS34}uvnM@ZL~OVK{OX1B?BbT9bjbze*XLvc*3Jck4S{y)J0%)-Ie?E-T#MJ zYuF`&{F(nnt0gh_-9YpXq8_devPX)kPDX2G2pE;RQ!@6=RI87caF6IWxc}Wy=$>oc zb)Y#1eiTy!X@a33^{i*ST{~b=zp4L>!tA;4rVQD2RbF`uLuE%k?1@Zy@zF3>iGI$I zRnyWrdODt}N1#>Wi7oB=2M`O}D&EQ|AxGxwaJ2XL7Bxk>yq_Akr1K^-Gjf(5{~2lf zKK<5Xw{NGEqg?}p$ zBcUW>Thgg;$6!k6=u@j__w@$jd1p(_eTPkz!)9A$or6B9M9Vz2Ro{cAyKJG(@Nt|* z=0^szD{?9XT_+{9bP_ZQ#m|L|gCDn8y<{iTkkZ>Xs<$Fgm4Bvp#_l(8cl$xDoHFs;@SlxEVfRGF9Xts4)V02?wdV9SvKFr*dQfML-FEC1vH}aU` z1AS|@lJ3so`Gp-`HcMw6su^dBnCRb((E`T;zH;YQDgW*HX_myKM7~&|o41oq1S^&% zD0o*}2(X)<;LH`$qZ+~|%aDmxYj5WHvT;;nkScvLLCA2ruP<-*#_Odh?tsI&E)K724wzSW2ncxo57&m1CWpx8X^1~YOgTUguU5aAFh7GxK~KnYVvsF6 zD{C0^Ek;QtcD|OKhVnqCMG@|wV#Hk$MsO0+n0N@qj;gQ2W*Sk5f`FE(Wj)=Nn3QC= zv@kWcI5)(<)2w68Qmw^S(qAASXCg+Pr03^9gDaHCt)V`tbF0&u#z_>~ev3{IX%dbg zRoMCF;f4&KG%O|TdiXn$O%WvXyA^}`hbZydq96}LoCjPDfdlpEcBZkLYrm*%#4T_L z)E$z%DwSVz)%=S;tU-H5@P~#*%nPo(|BFGatrrs$8$;?$lu4fyrkY5DRQEk7FWiMrNis#`VB{-rtzy2KR4mr{>B3WD!3YIUXm+ zBU*zP(>WN>OPiAXk88r%>()4DfbvO?- zB7WZBo+i9iDNco5mS=u~UVz4e{YTUKdlm;X;$>@8;+2@9YvUF=C7S&hbPf&<#H}*b z%a6)-G3*ZAdIhGPk(topsmQ>e-M2Fw1-#2~t8l}F{NPFb=?_Uow zIdZ@JVs01Cv#yMMk?p&XbJwtN+WrYQ4Vw$RCAmxTYTOQWD#}-jq>F6FR~rSYh|dmR zoKL;5@%*8?oWk_NMXocU;tS>nEY8_~P1Sl9uKz6iMYdKnO`#6cTr&n;rSiJxY_`6g zz+Y{uTNl6i#n)6ei6={fOAIA%L8yEedUkli&FRGGq;k%4`}oOnOLDh|+F0Fe&VFZ` z*eBzX+=gNr>JByPy^n4`PS|*0qTR{&)-)IYRtBZh;};xiJ~6is(w+sKE?_PuT+~T> zRZfLHX3Bi~+Lk45aNS_WZ`0EZ7%bj|j$$U#VniO_czVJ^Dn$ufANOn;oovO=vwzEO zI3OEP?JV#heEmew5{fslF(DiSc!Lr;Iy$@uZV-c6X^}XIdj?)%NVJzx*etpuX z*mNjJG3M@OT)sH1N5OEudi6?$Wsq-sWbfQu5HHK@;vAN^AFF(_0fHQfm8PQ;HsJXz z#Hhyx!5T!83NHlq@C*{I`2CIR2qiL*C9SEy3vpMP+=--f7aldoH&0A9JTh=l_#nfFzT$VWdZSDIO%`(QRf}tfOG#e}qBe!6mO;e_>xH0COaj1`%?8ifI_WCqe%0)a^d45EL^=(+e;lAUr zZZnjc&_3}x{a0-1+(es6D{tYLpt`?TvURs_#kf_-_S%ZIJvFTD^G+;BB9qr%KcIT2 z@oUHp{l@gx-NQ@I_!(KUr)0-pUd?!3>iNVeC#OR(YB*au_PL11dOMNm1%v(jS1dgV zsi)3boZ_aH8Q)BL8`aOcy_7a{B&g*;LZb3qUs>A9k)DVp-K!}RS5_R}LgUoT+C)KG z<2TNp*T>{ED9;iu(wnrTGE)#vA{pfKs%`{d%{(#+@d92;`hY!**h z985Lf5|+N~xW>CkGVpay%tR)_F4IS+sn4{Ni*+Lmn zQ&mNbZUzAYjrdv;(eO?q|7+W>T~!LLLhkr9FdePx?Cd0pA%IiEmDe|~clY7eKXY)q zGLkbl=nhJTbaix`NaB#%v3m9DmxY}h)|<-8%3}F5RSm$dQoT^vX>>m5J7sqD4aLjr z7mPNveT>L%%_jdCZc|jMYud7O-im=TqW(}%m>^xO1TDp>+Y;=XiZ8prB8RJ$)Os2u zyrDJE1ePz z5wh(r^0EVzgcq{{Eo?SoFP}nHsCDn?qhQTUV`(WVB7+BuKnXY9Uyj)_E}e!=_I7`_ z(K1_iq@O)odDQFD6Ob{PwY9sUvA%f(Q)l_q;E?yd^6ua&Y^*w{J<4lanOs zR~cI)64#WyH-5d9j_)ywlCSw+(9C_7qnqE0m|^svYfVf{E`&Zv**uohC!Kbt)t=UO zW{QGVHg0P!`Kj1XDmQyRCzeM(7q>sV=PeX;=EH$A*7715p~x#}wY zsKm738PnyV$mdDVn5xq_sI=LsG^IJWeHp&_czk`<*viEOr&#Cnm3La~)y$&z^{97P zdv207zDP=+p^+}TvAg(?`^Lh-4P+`0k0S>kPfYN=8%#TB+mpIs_u^9OiPpQ_>jEMK zqq?(B&l|PvegEX~1~upHK2N%B7d@F1St5eME-+$>Txf6MV0wg1w0{-}hg6=g4X z#_R64xx8ag_w>#ANCD(OjW{T-@h~2+GBkUz3t}H*MN9j1HEhaf`{< z)%D#lLqCHU{E^7~Sc&aVjLU9Kc5>PbUH-_owG8XJC0=`YF!`GBBI1fT1Av>8$_x}b z?ADLq68wSge#5gZZYJ7{fAP7V(m z`)jw`WLplje`+U@WLqLgZ{1gMm}s0=@RRX;;im2O(!)Ts-O`cnP@?4_hELUp+J`@p zk9bT`3;$(=aFiTYvDvPc;ki+s4+eGx0j~$Z{%kZ9~n5?(Ww&n=@NqEETs* zpWatHA7Gtm(7xfplQBI>Iibiha>xw(&kru|Qs<-Cd#K(G={iiRo(&*{=a#*`ksX;o zFRHtmBaoGqY`a}=w$Jv+M_=ryrcW@qn1E430)OGNU0EPWvg_v5IS z!yEnvEX&>OSROosB0D*m5Bv!Q32DIx<))^l#1QKZnGZ2vY96fvk7@sIEiEl2P0eRi z3g+)p&*<%fKgb*a$S`sO0s+=+P(yQ`agi-`6}sbFw{N$?et@Zc+>h^wp#;IQZ~)na z2=LWVjuO9KMz*zmw-xj-tU0h8?l_g%2o!^kM1&gR770Z4kL25P&?O;+kG?#QD=;+F z5l18V!}p59*_`)=5vb{fu8T+nUyE{Y;Gl_t#6m`rb-kO(l4S zi=WXny`fZPDeQ7BcRS5EweYE6f#Krg?#)LsdM5IxJEy6C{_IuynPC`iy2P*;v$cip zsfe@>Yohh1b(#0)RAb31?+eK(RQv5j)5qeTWEH>vEX^)>aD|kLagnVu^NJRqRaDxR zc@#nM-aqw*R@L;Bo?-ROEH2qL^jLCdfdYddeWtMQ!%E{N`*Z5oS819r4ldxY`Vl@P zlvyEo|D8{`-I=VCt*pkd1K196^Ac*HATTGE`5P@@cs2X%`r#FDT- zK1@v&WM*a_#;8$cR1-2T2QZHUhkj);-QVj@koW8-7rIwdn}6`aiS6PR+wRcXQz6}2 zA8^~tpsy$Brtq34qT$=H5IU>+k&*e>8`ZL<15PWu^=zG^at7%u1-tk}(v@loWX* zAqmMmXDnl(Qbgu4$q)koubN>BZzvsF>SJ(TK9?!F1d*A!M*S*%duX!{` zQ6qbXr1%KhNr-Uk-p8EeIdg5Ae{aBsgFp1@!;CKmDR^sA2fWy=^W4NNZWo0&ZdZoR}_KEjrx4ip?^-zMOW_%?V(-mqA#s=)yIx2OTIpx)KecJ+byhRPUiq+rS3R8;n-R_xbD{G5Yu!J=H-lRKEU@f0MB%w5KSf7?k9EIHPmLqcL}4;vILXg@u}R zQ2+SgGcT`&-iM6B?;o69+MQNm6U|!WBPF$P(R0YA&6xf@egJ>bBcHkG(VaViii(OZ z=!1vH#nC=|_z;FFj{S>y`VutrJn>aGY}^=%#`CAz+N+Uz^PAueMMvx=D8!&<$&w}e zm6g}S^7$BY3m;c5atogfNVDhj`5?mf&Y1@JA?ms$Dw^O^JK@6XLn z)6^neflL?X7!(u%ZF3VoGTW$UYinz_qHP1|>22gAu$9{4EwlSc2)eW|d|&ze`SZ!? z*7?;ZPeOKji~+?4hS43ex0iyHQwYvI-QBxya3y99>5u#rjYvsZiyPF`+S(0GHAmNp ze)#ObT;m=}62f%d06+f2VXk~WhK7dT4<0z;=E8J@9iti0Cqn7ZzGH_>@1E|raHm@h zXEqq4W?1i7fQ}FI6)q8^rx!6qC`?LAb4g1RIsIgxJFo?!hCKMs1S)iLn}R-q8}t^; zW>=A(f=|BjH{|g7U?*ZLP$4mgT=^mlEugnQW@N+*q5U$7%*m5ZhdFL}c+gP3K&W0MAJ{CKY=H4&KHL(a>B zFYTtnTMQ|&9GoK;A*-Y~!VgIc>Suh5dj~bw3tV^u5$7PLTd76;08aCrFi{1pqs=Tr?8IY|F zpCZ)8P8$3rh-faKBjvUV3MK__br2U35dk9V7#<#;v?(PA_3?*biS^LkY}jiNWf~tA zwuq91$pxV%`}b2|S9c!;JkJ=U2A@)=k}me^c~yw_Jklkp(MfnDxF}z+S&HZAf=5gt=nts!w*WcGUj?C9){}-JH8-h) zhs(~#w~CXKli~=dmJ;}}96cf@{QCCUiB>mUuYit|(3k)$r4bcwVjs8*f@kb#9V{kg zlajb*w#D&K&zK?opFd+DQgZ^38wU49>e`PVJ`6&2--iC+0qj7Gvlv9Hj)R^3z;_x} z_&XfN#o&?GeMU)XS@b#XxCHwLxQk%i!pp~}g`FXs}~{!7&sH#21CwRU)i^NG5083NEig-5OW?P2f{fpE2az*9KZ7Df!3|*!@np ze0z*|J%gi}YmvdFbz-p?;6zSEpczAeFz=TFc$+U}~TS-H62oIH5fZ*GI z#l1H9#fffR4~cSTqt8uFW7iXdLU|31d#_&Y1FtW=yG6{Ti9&IyF2*7kjx7huLrm`% zQKIh4RZpswLK(GY!-go|>&Bx%$zGS2(`hH;8s2!K1JCr3q9P-LA!EWsU*mdFQBl&g zMvS6RNEuBd@M?=u#J94mO176VnpM`q;>Cj|&{CumF640gVqa(>&TT*^3T~slym;x7 zGx86fs_29SC7BCKii$gD5~HK_J^Gt7<4wWfb2h{zowE%lD$O%dXH`{K0!rPm0ilYN zw=ikRFD^c+wLLv%D2)l}&c2Bz^GS5hf|@w$-@I9Z&!U~IvQvn9f1T*moQ=V5oNQ2N zNkxS1co26yyq}jm)#B*~RSM3kET9LV977FL1S2=@7p>WrJaE~$g9p2qf|AC0B)B>{ zKK_V-K}^#53;;QB3I@w8@d0I-Be4CIJVC*aAN&3UN0UL@sE39L&tX#{?|=7Q)gyuo zr9C^l6VfVLKyiq3Jie&TwUEt`@TjGu;{?Y!jXe9DU=Cx}39h8Eu`$23jyOZb7-{PW zhuNdC)z8hcG_)@3p4`C9tOKqR@d^RMY0bus?jZ53$H*d|qkZjPzh1=mi#seTBvgRU zPbgfWW#C^BHw4Hmh-d91L9N>lUMz707w6*FH}0_Y+z%0k^ERz^?(*f)*)7C$9y~g* zR)|wMRz=Z3+Mp_mvphE~Dalc0o59eDZ^dthupgXTae)2SzVGQg#)xdNs;cTK4gy}V zXuO@AEQClBSuv@m0HOg|Pq2`A{60S3!$6#Wkwv+XR07}RgylCg8=u*uSP<|9b|A&U z)@Vb475T)LnnDi6ox>bZP@N3@IIDRV$LoxmYFsqb?{7QITm*L2H8gMt2*^d7V60A1 z-^uF9P5ObpSn%I313Hq*pGmzWyb%t8t95jA?y*WJMZZFGhD<+j$%C2CYCF5>V;jG2 zoj@Ctz`9=m}L*p84)U5kw{h^vKt*DeN!xhbx>@r*nz45gwje~vp8DXJ4- zs0hV;01>#YNw^{tJa)u+T3J{){L!%|YtM&~9Z@MB`N2j-L* zyjYbS8p{6a)vGnQdY)?aeuY-G?Sq~ySmJFY;9-B_{ti3Wl6l0eT~1XsKwcESlCW1< z4djnW8$9pY?p)x*#O0by!p0$ifsPLPG29F@4P{=3IS!z!c%}TweW)lYdc&`NbQaeJ z0#r)^)vs+}c-V}{pI~0QGcGRfQ=+CcQvCLhVy)-&y4u?pV`sv^C;gp$cvu*@)1<^G#DB`Hr)T*B~>splS>^k$S%Ol2n%5)+m8>bwMtIMZf?6Z|R0 z1m+DJE`DjxrAaMMj6t1(dj;eR5NYC^P4B=x<0AsngC9?Sj2gh463L*IXdY!SO50vCq}dj}hCkL4sww zZ~!fAFWlVSSK?VUYa)p$T~PWZ!z4;QaoscF%QdWI75#qh|PT|z>)A|rJ@8hX0BU)X3ThI0;x zbo@rj6vfL`)N!I^6Z%cClieUb%Mh1ZzuuoIL-N|!B{21BzBIgEPsSc8?;msua8J^& z!xDDvE9UH-QLTXx;MHIujhA3xO>IlNUDbpG#$&0c<#}p@JNNOSG(f29pmMUE+5KN0 zr$h3%JDc4*%N8l6H4I;?Y-u+fnEt~VK2wrUzzurIwO;M284Y>^LunJUe^|m%I$zAz zaBzg>J@t?d+20@*-N(A@-z|YZ(uDaL**U_lFfuW5aB&e_QxK40WMpLJiPvyLBrR!I zSJ!TMz#zG4*x4|SX{?l|S|Xh|Rt+K#ktS>btO4Nn#O3AEb?&!m#44;CS{vAf$(#r$ z6gofp`uyJihslW5kAv2fk2VyAf>PxGDsOT@;YgsXEK(1~ofPnEaA^>-Qpcxipr#h#?oh@H6ZRn*kNP;%o~Qz)ng2#XjTS*L)2jg&9IGTxS# zmmr8$l^sW1LP=JUhxZIb-3vn`fCv{24SAa8?bo^=4xLAe4en=qWyHy(|h#865QYoun|R$!rqtT|Gz@J5-a?v zJlUi@vCF;yVp^9F5gEC4-#!+U?S-@E=H@QfsP+TXc3EO-W3va2rta9W5MU^wDdtzknv0><7QnH$@ zox3M~M)ztW*t8GD+Rdj$AdmeD=}jSSgT`m`oh=~?XS7P_H%?^>i{LG5-6{KJGJ_@U zlvKV4`uZ-x9wt>sRAOQY>OoUjO3`i*d@A|cL;)?|Ov_#-#3AQur~4RHco%o7)!iQs zuiqPK1zyr#fLz?R^S&luZsXA~$;Pc)D7`)H)YR=z&@?ZYs5xHQ7G{1P>Et*v&Ipobvko1 zQzwMrR|xZNl8?T9`_>Wji4IO7phKp1Dj;yerj0`)AvN)lmhgI!W( z%fFe(T?mqzX*RwSmB_3kkxYJ^J8$nQMqnF^0MqxQsESiCbi|c`*NRam01-N zL$c?XeFg*XX*N&G1>6s4XqZ==}II@E-5(GC^vRDIsmyu;_R$@xLOk9 zd2!9G>ksXjJ-+Y|S(oRfL(cWl;MAPxTtZ~z@+^(jPuZtnmnnkpznBE73WRSs>|~sb zqXR8t6@Yu{l}+$c1$S&er=g6MJpfJIfyL;oe?m_P!z$^4^Y<~D`JkeMUfmtuY^S4n zt$NgM#ED&)wE0B+(m~s=l-6it){5B2I)ef2JUg;2Xeg#T``1a;FnhfYnOj7e6w+Qw z{-aOlp*~9^@)9yo2$#GzZW>V1*~oCO2-|xT}hY!PIVyF?&GN0p1;GEnapxeF8 z5zeyr0R-bYN5^q7c~n4AO-ldE!O)?GKYMmqzwzrsSk52BiSqvE9}bO^$B!cfJ}SNm zU!SD_Rh4CasH&?QR*B-CKp9bJjyux{)cBom%%x>JyX=({G>8Qvqf*h`!yHIk^r~|C z_I>D$G?$qz z5wHE^imYZTm>hg;gREmc2gVg z>jc3!j!;H0L*^nVT906{jkF>BTsiz?k?1RBa(c@M!b=y9)mku_p9GxjBCu7DTR?yg zjXq#(TLsO&k#2oVTo6)s@QXz3YV-*T5Epmx^5qCcmKglgl`CrjkAi%uvXkltgpUNK zFO(y$gikXwH=v3e933^P$bnA-6@@zXJ}VA+FJ0v&`+jTOloWa95MV7XZZ@;$?rT<0 z%lVZ1NHaMf3Tdf?UCeZVn1tgbcuFD`Ts1Kq*fVstB})x)0Go9uB=vz?6e8!9_x( zyK`qhU_jg|1Plw^Cn&fE38^@IuHToIBH6u*9;Q;(!~^z2U%I>NF4a4&eAKzyq_F|c z(GkM6d1J1WIImFk?asc5uJPEO|Iry&X^@&({|TF82;hQ`Ji`(I*=64YZmM+xtA1|m~r zG{_KMbr($!7(fXxjRwhlNlXG3q zv4AHUSH=dhDRT4hXzA&#SiE>KB_9TdL_ka0b#ZT$L~?6}lMuUy#3tqp`$sThZVhf7 zar>Ff!5?mzJPA~tvEgcSHY1wHhY^iH(_FT!aH_nABzGbrigjg;m~)5*6RPYIK+}vp zp3GvW>w4)@mIs zz}jSDGLCBUUEU&kTjtod$Fn>>1`aie-1l`kG0&y2qJo~oJL(#o3p zN3<+8;aEk|ULgDI?nJB;aY+Vf#TNfk#RN|UGK%}4_V57=r9G3ick%!}P^Nj;YCNkz z4VTHJ!+Tfh*go23-cEf~5!Rm0OOm;jl$4Oiyg{pBced{3%iDpNqcmcFtk4IH4f&(X zlBDS9okqsSL@b2Hpf-$Rf3c{B$-qDx6Yw{LC@Xlr-EvhNzF#X63QiQf(8z*s-+rVm zw+CoMKCrjE`8f%+2qUh&Jc>)j&C_!=sgiYdmp|2-o>*SqttciPgV8|;(A-C`J}fW4 z23;axpAnXi=}uPnM((sE8di~=e&@RCtU4WkvN+oET~xL@6{WECa+dY({?@93vwC)B#%oj7mgzMqA%p~VT+KAfOi9995Ime2rjR)H_x9xPddB+ zx2pA+wb2JalWt`fwX&ij`_7%mdnXQ|q_dx!(gk?Ngjjf@#E_P#w(g z@B$u(5p~RX_$6q3`ZWL+{L-74cNCA47QlW&k1P_4=Nl)jD~dGkPY=v$ud#KOa%o~vCB(kR8a{O~LyPyLw|vknt^z4-j+mLH9Aty`i*WEGBe{_HtkYJ9a0TNf zDxjP~)qp8V3@D@2ywt0wQ_omgr8g+PSqP3*R)6St=l>@iFZ%q~>U{asRDsgcQc!eO zA#3gK?d93gXB^|Ac#Q%Y=`UZo!hvbsINM&$0Uu!D zasW*i5PgCkw?eQ$h$vVVAdCiXD}uOic>s_=1Y4mVE^UCe38|uq{9ugb-6dhvLa#xh zspHN}19Y^2^Kme^&`r-l8~ZOI5YI5`9KYWQlCuGvZPPF|M*oUcJ2~@+aWzD(DJfsR ze}@qYZRX3DhRwgcbSgB@%clKgeYn};U#eSf*Z)a%Th;viGpEC)f$qwdWC+NaOuCvJ z)8e(YpMN_|&l~?j!cJLEE_Fl;_ZRj`Sd!VWJ@yqYSXoZD>S+HHBA(&lf}*0XhdI7~ z>e5tGRXxD%cktM;sjA2y+0g)5ZS>D3_eyK2Q)R= zYHDiu0=Ih0EO9@)NI~JV=$*I&?RHV&_P4%jN&c8F$5TKZcG*k=Z;TsNP}rc}U-!eu zkLpcI&C??|X6T2!f&Ghm)xx*VITvFY^s>k$>A?>BsSJFTxW4Qczv+EDKji(Q_j!dM z%UP7kC+Ud>zS{p4R{VnwrY%cAKQYYB&)a^JT+0(Xvb9*{&2_1ySMq2z+TG~8>F{4G z<~F&DIl_*8IlixV+`dmTcRcrxuhE%f1s!V^Hs>9ZI}BgGLl37HTsC@f4+4fJCg%jo|=o%Hmx zmO@1A@^Jxap#X{IQHska4H-a_q-e9S!6G8Vc%}r3CK};)(cUPitXvBd=)V6rFNnh;2O@J8&6xy4r+BiUb2Q1nOGY7lJwdJj3t{CBE-|nB!kIb{{MC zUw<$1`(2_zkwfW?WSrs%>=TrM&0vZAq6VB(Ru0FfpvZ%*2L_LoyZ^#dZJc44Bati-2|ycQFIcS6Sb}Ea%w|7%VA2kCBVtz(#&uL|X8(KTBYv$sw|kK` z@fbkw^%#IzEFUuGMER1^QW}IrlgJf+kZ1g2d$Qo(YJV{I|31?lnN?57gZ%3?>imna z@Ym0DuPpe5Z2z|s6%4<3>7S>?5ys_w!EX6}>}z$Tg}`;>&zzwG=z(_1LAe2k`AI@6 zfyFH0JfYABFah>t2J2Q6R@IfdrfntRQM?I`Y5rSX1@7B^6C#o*L@jiL&T>yflDmb; zV+e$4$>B_NbxH9LiewvTcWxjc#I}Y@mekU}=$HhIxWGBIZT<6&s^)$(J=0KwK9;av zjou*{>ytY(d=gKKrURE>5dahKkPt7lok(X1cwpa#q1Un53b$C>O1+b(mr(W}o2Zy? zABImC(SAHZSmG>=jg38{pfGYLd=aFLcOk{y0xXKOLJ24C%$bUw3H`(&%XP+k<}i!* zE1FrLlo|T`+bF^EM%}!&l&~RtVI=J&vuzOt-AU4mh8Fk;s%r}AwE!TcIHEo#RR$2d zgH!F@{UJ-E68*wzCvR<-o{g2Q$x@cTdz#brlbDcRdSYczk>GAI*IUuvN<#36T8r|G zuvb%NKt-#F$*UO#=RyaL(nRMhudYsmxmhz{h882kXo;E7|2Tx(1Rul2ueXPA10Ynw z&Mq98-x19`kJ{!UdI6Y16kodhFA^G?)PFNz7HGAW4#Ow6tvw(oM~AX3v*yvvQbK_Z z1qPFeM)KDC$s+Md(_FdVrRgZ9{ceXQRj%$6T{SDSBkyLXT-=Z?L#gq+d(l_-x~W3l zsGYHE-;Y!#w(H~!)s6o+Yt${a9w1%PDdD9@{7v*(oY1a9XKSd*ynLshF+ehWVqp5$ zq0Ls8`ox6tsQ}QLkkt2?5~)gI|v>rM#LNH3f&w4I)1KnW1B$_NW>O@hQT-!FC2mt{4wlO+yOoX`|+B z#ooz3=O?QDSL5h^AYD@;sshMou^o}y*KYMp8e1Y@OHXyxnTo=)*!CjesZaA`@)XAx z&sR%?%+qeKrKTUQKY86?Sn$%1y`EJzjdQiPG*+wT(E2+_uF~OAxTi6;_DSF-TAQ5C z#=a-m=zu&iPg9ZG!_)I9fE>WcZopvUi_^g+MGsSK3VrzS^qYW!dU|^ac_YMOj!2e3 z)Z7XJbiz4nbVtN(N28R6sOZF`CWV8J`^O{%~tHAif zo{@cceB0WzKIZagBErIjgik1>Qw+Q;{#~j8U>zh|v!+yrJ3%bN&T#^(X~GX`e9?YUo=5J04G2O*%28q{~2NI=|R%11;vUyzb{|DSlvzO^Xpi*n8K@J zOrdbF(tmiV7T(h@S*6I$c_3brbu)Le)x2n9{1JZtL)?C%Q{gKW6ci|gREx|r8D3$& z4<8=nW#Xx3-L1Cfw@!dzLyqZu7}Sy1M)(m@0Ta=rw1k=WYH~;ATL^f z%Y>qd?FQB{GO-LMVoDuRR54Fq0GIB)n}vh@#PbEfxJ{^+Sr|KfSBk{YyJ` z*ngB@!w;0PrV3-zV)7qzm;EnVGr8)_*gf?V6*5@IysDxtu=%vcm+3ETG=l%@T0!@ci`&lRD z*bsS9CH-5Ud)UwZOjFAvxSFS@$u&E?JJ@#w>mb54Ecg&&^;e9vRgRI+`?Q4=n%IwY z^vUDkg$kkpbZ?UgHCO`0pqROVSpg>GnRau>pwxf4I=Dr^?q(rGYg}17#+om**8m! zx>i?E-PqUlr0}FTPU}g}7!Nrs<#_#OU(ho%$NEzGF(rYBqp(s3fwQQm8{IgQhV!>T zh5+0q1dFkI%wnt2ZgByrK|UkCL>l;QU44D@k7MU;fVHE!7j1cC;kh2J8gf6)Cb4aR zIiGRM|19(VRb6eW=K0_9t!l}t=;(w>aZk@@-)z}w-xLd`2b$yWe|_Q@TYjUuS2H@OQf_mF|804LW#`LpZbG&KjCspQoE$|bd#}f1MMH0in2fc zlSlbWT4a=J_re$89&v6yHMeNhyI$n!)^ERcg@39jarpXXW!#0Fzy&D}-kEEiG7?5f z*Ue+pU(!c2N%8XgWnip9vPC)%5{>?TJ=yYGe)DGw(Mz)b+Cx8SPMplW^ZI3Fg9Pn= z*`mQ^+dftbJ-ww7u0OOD4jehM6!jRK(N{`JO5QJxzFfxkzWAN+x5Z(7nUxKA&4`+t_&9%X@0$Uq3r@YFSB{iNO69;67P+$EHEq_-7Zzj1 zDftBjw}4CeHFELrlm<=!Mt+~;`aVH`oBJ;6H?&?#!j4?lOacNzU?nti<^4}CJe8~n z{bgh@9{1k8fe5F#ix1!+!1^%^tc`}gv*!M(G}o`%?zR(SU7BCkB0nJ1;5^M>c2z#U z?EHMom9V{e?+WS|I?|Q7=EIM9rNHXN7W0rtCVOj%6P>|WGb{|aPWy)F8dq<#H-5Em zV;N_mtDZ{QXk`0x>wI$^|frR5+T_i@eg2)^Q{f7$@vWJb4DZO?e@?iR_n0mVJ{Oz$H^*$iyE=#S~7@=A7 z@4{rs>fME#@;L+W={L|uEZvt{{{fKd9@8(5mYgbzSeSs~laKD6%$#U@88ABj)@)jZ z*L3K{>Xwg9LUoE7TXG*fbP9S&!!P^T$WwZ%gkG`p8av-|x>inc@rFhg0U^JO*=Nsc zQ9a)~C7_bz2+0p-%ba`uQ9IcbcrR>Kz<79(t64L-eY+o138ph)2pwE}lKDt@2Uo+i7hRf7y>U-Bxfx@d7TftbAL@r+ta5p{yvJTrM6q_Jx!-Xz4ye!R^s(N_=E&N+ zI%OpG&=PCok0Wf-2M(Cf=iQ&#b-+5Uu&l-N+3dUPlcH_)m)rGOYZf_v)9JDqN#5Sn zJbl_scto7l|89;GYhrxRQIGW88@sJprQ@ceGgG$p`w0)RK4#IY&)s<}eS2Gfdoc|U z7euN8%kw14rps@57HRyvv0gfr+KH-g+Es8iCt0!6!SCHAZAi(8WcPByJo;GsW(Hpq zlBllkF=DOkwYdmGUqLQFQ&a|}E#@Oa`bQZ=i;l>FDCmd02EXH)sVULYK#qkBHNKr| z+qS_v9WQUp#zoP)QR;>`M1~nmVRmCZMG%3+ojeLk=4T*}Nf!%l8x&oY;akMMjm3V3 z?VfGSXr0%*Q|_S{Yx^@)*Qai$@`&&CXWB#cp|g@BvOs>XgjKOjlC`76zWhcywdKlE z{bOg@ro0@OmZk9oGd6KPGrBjFb7Ej9eg2?{mA=-^B2m+cS5mc^$nC(fJcL33-CLrN zi^)iajG~0hqytSpP4FcW1f(o5Ay@~+0P$cW6S*P~NFaw50=a?o;$mYBbH2%I)`}s| zfhLa5MR}!RxoA8cLy4#2UZv72rqG>`kKD&pG<*(uN(}XWth55M{8WL>twN?YOW}67@rosaXm|p+mCy% z%*;*ozrKm;ayam_p2kQ$Cxk}7p0az@pmu)d=5LoeuU%4K+jeu{S?S_Xk+N33-57k2 zmYhy~9X~%LB9FnxCFnkxa@isIl6%jd0EoUA7#J4szkWT_GD~I0`35-pH#@|?DE%TjL*<M1)Qqhv`bQwHUIhGXP7$&S$Py`o5vwaz#^j>V`GnWy{%M@J~r{% zbN;}9TYf^|yl94l#D`6*zDEn{vTM9`T<@?{z*8W2$~JU#yzQoJ^M?$ZDV=K9qD7nv zQ<}BA9Ogv_^JOL{6FW+-P5k_BcPujGVPGyJa(T~H%%Ts1f=Zz-JpvnhG?J3jF(23u z4ikK>HThETpaTemW~*9jDKKQ%|F(z zt+tx!(8Fh?P_W3xbnH=qH<}^RS^~4rfRd;E_CGhH8S_(83d^2U-0+Y6SKet zxNw0C+`@DRGGi3&!8bTZ*MB6J#NC8V(0e`#npm`?pzNIhMaAMr9sLeIT4vg1s8;kb zq%Cc>;dT-*7})|I35mw+C6bkFVFjFJXYSfj*2F4$LyR(#~KT+ znX%TvF4e&?-H2TlRQ#83%YE-_Ke+eBfQoe1K>Z>friZQL{hLgqFIt%%`X=3RWK!|! zqn~z-w+G@AU;t{-Q{|2iG#D~JL;THD8-<01iLwE8TurMxmQ8pd8^{lnM?_(LMlI3y zqMW_Yq`45$k4A3H2ZSDqjRfzTxTTJ8IED&z#flYCN-uZcJNZ59#!t6ZH-w~JtDey| zSPhp{MD&zfFfy)7-D67^6h84FL}A6n*{RB@CWDd*gMKZ^iRtq3H5uEJk4{` zuMa@koQy^_1<>;(h#VWmO`oR8$;&?&dkoMq8HKR$9;J2jYT%A`x^q2=`XO4S7Q#N5 zC(u@Rt5Vz4vc7SCG$-p@WC$zE3EfZWD|q=H&P9%nd#|ILPWNYxOn1HSrfBzm-r&N0 zhf7tubL_!SPs&aHn9V+~aAec)RITL%x2^VBVDuL55x8$RBAsJ}Dcx1F6)Vv308CHsiL3i3W~qkK+Z(Ni+Gjf+YWOhA;?Qyu~vkb4XniAb0k7P z`N5eZf~(sZkJL|lKqb$HM6Xb&}K*&<#4yVKQ2jYH-kFewEN zKuFm5W@In|W;T1EJz7&bNdX;`n5dB%yI>2dd4Y)NiyvMKvtbZBu-88fZ1%N(@D&U_ zT`&Vcw0Fmc4QicBw^gW!wY#aP3RM^l6K_;f4m(Cy{`)dn=9(jIl5BB5YLbCWoNBMn z(BaVv&Te3l@H4$P&T5dZ6r1W{nD(=yv-7Nq${l3UpudQiB%V5>bs%jdv$>;-Y1qh1 znxRSLL6@}sKd0J{1TMR-rRy-l*G&dQZ?^4bhpB;UYHF&iIx;qJqrgZ(lSsWiS(cLl zw3KZ-c0_%sQx1mWkB*KG1eMYk9bH{MI3(avkJp}-h=1@Nu~u#AErmT90z)fqWH8OWq|W7)aDQ2$=r^BM+)Ve{eyL74 z_kc3Ps@WN4+Zh~pG+$szP@Is>d`j~*CPZx5@D@I2E2*h51oDc&7l;vvhz>^k+}3`M zC{kf^SA@1!>Bq$6e;2POCMxM^ao09hugJdOP|=dp+gZYOx=hB%Fq@{sU+Ma**OJ-L zBEUJ}-^|V1uY?F)Ue>ZdbxTN4$|TJij3p<=(*T`IU)Wr~j{jDR#Vir{tbs|w$m9@e z5#2R>H;)3D93~ToDivw&;4{G{eOYl=t}H?D;*uMatAETSbapjL><@Zy*CVpRqiv8s zGtr@W*l34hheQ04`JKUM&WCLJIVCna3A_tfVO7i|4LSqw!P_KG-0)xx%p+c6S0I%d z9nhSkNjL90JvTRzr~C*LQ2#V|npTkO<{PI)jxI%P+eb=5E{{vmOm9Tm3%y$3w$IZd zR(tOfIqn0ZFv3$=oSxwR+6l!_c|Hk0E~Zhpfv;tV2v%y0#E=* zsnPq&uW`fxw)a?cL$kUOeAN`a3Jz4>rK2b1B*u?tiWe0&wu(M&j=hrqxg1{A?x$xJv9c`@Oz811HR zv>Sqp|7w0_1^<4x9)qEYo(Uq;=Vc0#f)+ar2?%-`wdxwnmoJCK!P%?re*siYtAAho zzK!w2C8R@ka_1y*D@P z#X;ae3IuFYM>Htj7y{~WI6eS`tv37s2${IR4H6?%Z|^gSI{(d#xYHDK?$oD&3o;;JC2|xY0E@&}c$fKKsiO$4veAa0TCx5IHFN3knJ_`UJ-vJ~3M%N7xa(k!9^#?Y=y0bZBzRa8>lM zNbvA@phLLV3qP^&GL!i2=6*>yfap3wQqIiI{s(UgVGHUaIj53z72djf{m4asZMDGP zzx!KTTLC1@Z$Ew{TvW(Cfd6y_baSi{g-eUqOK4f`_gw-~@fBo}D_vcDrVfi&R zHxG@B#J-qR*Sr9bA6;1FCQo4=ziRVe<6*ujg%Mz)N&W(oTe#1>gAnQt3KyNozrTh; z1zB6OLs{*f5}PEJ{`K2}9+GqM_p6D6JlnrtSx59%>Y*|^2dOOTFAyX_WyVyO*9C2n z(roDOZWjfvKSr2@=dI5Vk1D;orS| z3#j3>?P>6N$h3z)ulxIdj}*YTmG~l!KNuEpR{9z#fPJ8GrzW5Mcg6_$yx#M2cE1jJ zLhw67!3`J^ny;N)H;P4WgTmv{$Ijm1Cx3lYjYGRIyW&?(57qBKc%1F+?T0`oH)_DW z`i9){^+s3n9jOpd^1OwK;QHTJR?e>hn2N$dAHj-(*EZ6cw1Dr^7=wx#Kpv1wBMcEI zVpz@J1{Qhr1M>8R*SGO!zp(^Fc(`*o!!i}rY4`j0)6$7Mg0Aw6yo^e z6aU+CKL53xfS{g2x{ldQN_AZZLVvya;RNYfWm`NedDtO=u#A90`FGzVU2}w#3427} zSQ-vM_=xRg5(7pr9{)8s!$h~?*D#d-$A+h)U={*t>Z3Y_QWZSk(cW4w-0h-foqwJi z2S)}MnQigxAP8$fpxofFf+#yQI{F4uu?G7RR_j4{C1YT$ERF#1ID{WD?c%@xz1XjR ze*$L4AaD6MUeVQs9B#t0OlDyb@#p(+N&J$(Uncaw$HV;hjQ?+Y`PX!ljA!AR{g+yY zuQE%l$~y9W_Mi9NvkUO4ck#2|n-)VX!1hT+$CtIsaNj=rr@ z`oR;cGSR!;Z#>H^>yDgTuIvYo5RJ$*ek8`d*j73Q_tXJ+IU+BO8Oo|H1wWqnt8bE6_3yUz4hlS^!?gt? zZ_ z3SbIsi6R){)Yk2+e(yrFo@CcZA+dSr;C#^gC?8=l;fi00hfpqn(B>||gZG4j6-Vr=QU zNn}{K%p+A(uZVb3MYy~{eNlMF95e&<9&dO$0|4|2NGy%v}H#u3?G!2@oJai}RnYG#Yey9b|ZvWcX zcU~*+{gb>3y=QnMadF?A<iZ;iG8w#qHp z$i_TvtZw*i>&U~#>%F{L=F@UR7fJ#?7|t2aeG|H+7(RFK%;{Tc^9}DjY+h@{jrDd} zINw|CW^tzk!KaIB>$N^vSM0b2~Gg4pp%KD8InOy5Cy@)>6W$pVxw*V7UP z8l+IaXcJOO$yL`0L%{n$1f5VUr@(v_?{ENwZR4Ma0APtP@6Vk!fjdw+EE3|35k?5l zJ8g~#Np_+C+*^)tz_n$IBfQ}VtcPD>^zdv#jHHN&pXSWh^(ZY&YtLH?p^YCjNx=d8 z3WagU6TALUurHD3sD`wGG@XeSAXvH2fE6Ia#U*gQeY$FmiQhy!#y}Qfra0InUk3&> zJ14^VFWB{opl;eAZn_0$|I{PFTTmec0Slf7?tyYm28%`B&jwRgh!uY6n~g%+OG0j# z>F|F11MBYkHTf!rB+KA!tE3xkM6z77T@TeLQeT#R*B>At38R^g9@Q1TC2Z2>Pag6G zp4H>tIZ~Z+M8Pv;&s7Gl>w}UTe)P#SU0>rk6Q^<3YI|k2@(%tBvVx3^sXfVgrr#}= zIn4=s^UIBM&3Ulin68vtnmAZuCv?4yVYhy#d)6DHUG^6mwu_eiTyacHciHUaMa97@ ztEy09z&f!L9<0QK?NrbeiO&OAhRe`oS@bsr0F)quE$Y_2WnpCvkam!QCIq+dZ72kW z!5^#^_quzIsZ~2!9y4ITf+tFL{8cAt8)9Ozx z@+$C!8`iCf-PC$Xm?Z1{*}eRTvap+5hS+}qyCw$hRA_DXBl;M7w$cvNBGpnw0*r|| zU*QCI8FUhS^y25`@X6T&p8_0ibOZ*$SE%ot3-~DAF*Y(yBwz?lxZiKUcMYW*K;CnG z<4E2sa&Ou})B!erD2~i4c+ZMX`T6=1VFm^=o&paAxyftTIyvUe_FV;a)df+-+t>FE zDx1>aj>ZipE)zPCUK49SGHnemDwUO$-5>$*4alfs<3Z0={d100($*K|WuKnQ9Vy2m z4MM;+T%{m)4YDN^X|M6P-#07g{@V~}8D7YFQR4l|cK_}J#HQt+)6k&J2R4c=y!uKi zc{K1lEQK!bZMH?g8eQGa4?86m8~68?dyl2&2n0)N=uhkyKQ839<2~P(Z}Y2lgw$8q zJ+n9(eeKwy^>R-lr>ivA>@i=#l~OT_un!dzp>^}f0l=6 zdq}g@y?=N(IqW5z@3gLF-y-bXL7$g6Gz(WAh`C&G8w?dQU#;T}eFYH_1$Uic>Z=h& zT%#OlBLk)*G*I~M0PGbh3*}HOTCP{0vYn`+QDs5;KrG=E5R3&}GEqpTf>H|UXr!uJt?VR5&aG*FcW}`d6#v+2AZjF;6|Z(#q|g5 zAN4Aqt&~@LFbf^|`;fWO$W2Aj7#v0mVeDI^ix|sNfG6_l5Zw?=S(8t-lAezLaE=w5 zCaMCc^UKI~hUG9m-_^lavAs&yrOEsz&Qp)j?zsM4WRHY$<~?g;Y*{m4<-l!GW)(Fz z2jhwno_%jOhikn9{%weBl!%iy6Oi>Zi6cZNIo@5d7$z!!zj3yFEFDgJNoeeRrSX=n@er(u_Ye$(tVMYARtaxJf zWADX|%FKC{*BWYW%vXVUB!u=W7D{?3QhL@Ml4>lK44?qV%CF)6{>oWJ?*KGlHsEQT zQb;GsENZ;=4!pC8fB#jJal-~Zz8Kh^z!ppn96Vs%V-Js=$68&)-O`*l(XjD$b(U0q z_Nl4z=I(*oYDq;hnH0R-WlmI}g#}5~_D?s)sqQKZ$p}8)Di19Q=I0XgTG5hHyQ}uO zTz!fHuLc*vZj+3}?f^Y)rgeUn4X_cvwLo35X=)zZ(eUICA0~e8hV47bx{XNjkpHh^ zWK@-2VG%$4!~sSb6cFBPbl{Z4CwBTsUfx2=e^yeB4AuLqlX0suAP{?uvFuY%UBdRR z%F1gm#C)gr`ae8fV#Qbbm|<_2#4?G+$Aeb?v(EfZ{@XSEdS}$W3=U3EeW?tnoJ3Og zBQ+rNx=3`&A!pgJ7{ zkD462-dfyE??>aKp7wTh6k&CbpFVvBX*Z(m^ABrl?|;AIAPrvH9$do_T`U&f~pLLRr+Z{8)4zT23iCI^J3H>h_HIWW~gf3$NU-UYT)v z>`T)m`P?G9g2&&ZZsY*1oz$JsFWQ?O$GY%0OIli~B$nHa^G~r&KXfQ94q&UEd;-HY z4`Pi%sR^*EW!eeDC-mmpFwb~9o5PF{?#tBvAv??GEr|Oh^92aB3EA&XhOGbxcQngQ z5D*+xi0ZP$lu6}?=g)eB2tqVFIoie(XU_)?6B5v&5r4vtj@|s zv4TlFSAyN`sPuIB3nhd2`SeAm(uHg3>7)-0`^4+W zqT{BiktkQ`CDtHEQPX{8jXoJ2o!7E}*+w|bU<8l*rVwkK7UV=7?OxKIAY1JJZvRTc zy8p#@23ThV1u=tkZ+yR4bOS9wtKQ|%WPDZ0Fgc%AeziJ=fDj*o#OPA%jb>_= zB%V}W0C1X2epM~h0jD_O9P|<18Xw5DuQ@lo9tbI1aBjxkbfw)`2v6AflN@N>A=#tz zWuo1DC(}c8PF85s=X&ezB}aAk8qWj`?|*Ziv8I`W1=4tTdg>b8Cqy#%pDm2m}-$4tL9WDRP`(iCoR`z~Hc|FK8wUDlm#epg@Xyb=+$q5!*)W|R@PzeXN zJmIdkoaxaZTuahM0Ry$e^V-%t&1!HPPe6&kl9m?pz3;5ooY8k51Ep$lW$ zCG~_#e7plLGZy3&$Ya(pF&#zE7MpWE=H}d8MjpMES9_q0dyXsD0#ioeL{wwwNjy5! z_<37D43r>Gxc)uDACf(S?SSj4KGBr(9(_JkIU$5W_4R8Z{Gek+tiXN^kg(V z>1%7EV?QPGa5wLp?cnB( zvf5mmZNa%%mrhJ0C+J-BlrFWv`R6+_)|BsW-4sF-QQ8AGz9;gUX=!O`?EPgb_=T@$z_YRVoDafgs53~agTYAmf zIx!_l{Q`R=yUo(6>Yp0x6FD=lSHChf_d|0Hn73bv={0Pk)g9^QU+Nml!gN8guCuL8 zOMy0y_kYp#7GP0sUH>o`ScHPop(3Etf^-;&NQy`!2uOD~g8>K#h?KMih_rMLAzdQU zEhXJK%*?lLeLTnK{XgGa=Q_uuIC0Ot_g?E)OCqBERRs+0YOzW;HMUmxp`IdPOsVeV z1-f7{P+={MRFtXZJ)~R5ej;UJVge+-*$SLX=^G>8kHASk|7scuA5*sO#9fAjaOoG+ z)yODOWcGK%niBet79iVulRa(=>Rm&A+Dvwm{UK0ozZUm!etHHqUXPN52p*u$tY7AO zd3jl2J1~frAu385(xG7^n}j_LV)a{7Isnt!g*B4(Bl|xkmkh%%_{}~X;tsNNtiX+o zoN(XPA-@#KgoTd~xn=Qjr+4r-SR}&thZEl~5{HTGMBmSvtEW}EzaOgjGpq8k+uK}AjBTx%3u5@r#R z(#{`2jlmXR7d%tg-MhSvu&*#7M~B<+58onF_X`d$xwG$?Ju=8y`JA5~>;2D$E5iTa z&rAI0g(%;K?jiUD$PsvGgMoV)pPIS_i8oFu8bD_NBx}@e2DLHVmQH~lknDosfFM9{ z9rvYzYxJ?}zloY(IOsi4&%^Q>gQ?!oZ^5Rb59%!V8~~#HK+JLs?h&KT_Mk{q9ks#XmKS58TIlLTCIc$R*?Nc zjgHKP?EPF@SVj=(j#l}Uc-#9&N=0NqqDGa)0D+n9&i6pmpawgejFWMv`OMbB66X787XODc$;2V!MF zd|ulAUv!<~?^79Jv?1NN&VcYNP)%S@M*x()x`qZ7Xx|^W46EAY!mDbheEr)u8tGuV za0tGYKW z{_)mdoTRl399$rs3QI@`{(o7hU&NTTDZF}c!jnJW4iLc#H|4JVSyI1$`#-?_rG`#! zm(Mo4MZpPv|Ngv8@c$nlRlU{W-)TAtOJ7Jtgd98zaHugGX`>AEG;i&NJt|dnd!zgV z@{eKnTt?zc_O=#V;D_m0LI=A6gb4yS9K^#|F5LgUn2~>*|GxW>HHdTs(8EG=qY1eP zAocIr0#(2&(i(&763N3wx@Tx3K+-0uH9VLo;3e8KH?*vW7qlsTF|o5(Q4k^tf!;ud z#0WdE!ail2*>I@?;MZALZWSSJ>2NAT4e~L!>kk(JOb+iavKB$28J<=X_y8pZ=nzDb z`@lSbH~9o&0Jr^u13}G(%7vU);0s1fg?s(O@bPL!Neh!4OQuT!72%6XDh_tXQr;PG zuuN+;NCLhNok237a7F__Fy8=p9ljQ5<)b|+>kQ7ZupEZ7V`yl|ascLIe&Vo((66o! zcPL`{&F<+l1=T$P^=NNzk^GBN82)n}blrf0&SPA}#qs;=z2YhVd{;7Fzm#_XB$U4vq_5XPds}-4>8mpCBZAu$GS0uz#-sPq4E4ejN~5j-+7M)PYstxfP+01xcFN-p}_SW?pIK>(Cg~ z`=8h$FOrM{nd`F;rIcyr8SW~afkX}FJ0rnT$4E$~ zpmZzRVuDi+Qugmxd?C^(v1fr-gPnZ^&dACdZ;&4ALp!%g`A2pPZnMf?+m5QNy!<1m z3=!Wfd@pfRPvKLygIEP<);C z&xK?#s~pl8AY0@*Q0X9zF1R!XLjc1d-!TTJ6op4bNP)NxMm`;S13eAke2~kj2t6d6 ze(*owDzp?w{ZBP4%ykuW4dLG{&LS>OCxo)w}wJ3xQ~GTu5%0SX2N z67Uv@CV8$M1GzWMfC+%~#*;9Q9ct!z7J(`VJK&`aegS?Uz(kH?Zoxp{S~AJ++B$z2 z+f!kC_UnqBP>0ReTMDQ^fIpO~5EziLv87&F6b%1w5N>MA&e_Fi ztX$9T9B(id|D1(Qn1W!ruQr|Uq!>zGO*#xKnN&gXaRRg z!Ng=^g$LvjGS3eHY?E9fBC1|?7B$s;2846k&^&rMZLZ?B-MavP|{2x85bO7g{{^k5VnqPaJIUhAK!alaPexu0- zg90Skd;`+MA_>a(Tv%82va5a%bJ&LkcLyq7&EEt-47Ohu1mgG*92Arn0K*G902E+n zXlQTG8X5uy0fXl6#YX(bMYs&ZqjJnO#BjE(xH;$aoD;;J(DR`V51k{ym2p zgQb5J9yfrNM|hneUuX)T5=RmtBHF(@Iy+AT#CUBMmkD-9fy3i*O_43P7Ix42i8Fn< zYs*VkwdbJN>}wu6v9xR`?y~(u;eux#T6YaT9Q5@Tgc84nl>HktV1z^v4aiI(l--`k zx?QgXa{mGwTXFU|6aKA(S57L9#p%Ok|&j@8r9)t5gmK(kQ3f@i_Xa%4? zqrPf>pV>rL)R$^~=dyWBTW-3f(oyTj4S4PaHI{w6yPfAE<13Vq; z9J~SGGXh{3&{^`+r=Qgs=!0}k;1}`$Lj$EAJ^J8OxP@>L$hl>a1mZVX)YS89O*R#q zlyw3{rkZV3i9$Fpeb@H9{3EatPw_JR6+l&1gr#{%Z~sI zL3RMR2#^I7Ez*F7JPBd&h9V<^Mn)20>$V4a3W-9B^Cq$L`sc1YzgeBL-M|@@b*h3+ zlnhSX$N#Hww#!sQ|1D%llP~MHoCc&03aRm>CEfS*fyYmLO7M?>@i^z9z5@goMALe$ zwZO3;rLLFP>eg14-3vaGcCa!kr#pZN8#FX9<|^^!%a^oCdg>$$=-ll8NPDaw-10qq zRf28SRm^IcxIozX@;Ehvs7rcwKGcG&_~*=rf{ydXZz0bAX@aOpNefXDNHz7YIV-YghVdKymL}Qjjp{j3!UbY{???Btj2r!}9 zmlt#++u8Uy!3Xtrp4buXE4aO3?Vkc3I!|ACu@|3OQqo^N6BuZ*?cprGmb5lPcbbxl z*P%~{aj7Tox?%ShXwC^BcLfS1UqnGpL6L0%AZ~Ul3|r{E`cM5cr2CJIAOQq`YmLmI zJb*&N3OqtekK|syh9Ecu(gOB128rF=+FBSM6!PudgMsmlyvAX1?x%wK@1+du=sZdL zNG%so(wzG_JP$afk9MRibu2n!Zizqk--*<5heFY|c#krT>Pn7Av#wr1zkFC)am{-6 z>pgy8GI}iVg7D8=48$I!kj3yZXV@V3AwO+XM+bM8+P!-> zl$HBO=Pe+C*#WY7WfA=(Fb^%k|L6jX1UU)ee2Bb4O)M}9<8L*BWRd0k=*sx=vW*oP zG4Tbc1>p3SZYs)pWk8y1{KI21k)2bq)Ll@&xn1&yhlw0gInIQ(7E$_2q8t(4OVr=y~F8OS8D+gkoP3xf6G;?-2ix%by{a(SQ={+jv znAjJ6ib^`C;=-f^rGx%i&D)$Eqwk<#y)xluVaq<urIDc#dhL~toKWQL2~)W)x@Y-j$fHl3pdNY8-IC-@!*-;>d^qV-#!&fVVfGOg zHpx($oAqlw#i>>U9$0Jg@#`*Yqgct82}%CwP^LlLMyR>u=-g0J^BNPbA!b3;?Zw7I zkl5bLjGevXWY}{;j@UKNg3+YFC6v>8dMmVQpwc}7mB=(SaFeio{9=hgWU&f^nqC3? z2NMej=mabwMlTQ!5{N;(4UG-7bdzvW!>NmKlrU%`Bs#iRX%~&*LaKNms3Dm5F_oTY z7&OR8Q3?1eG-4M(`tRf8^9_UFo0m@%qwxXM5V@ZLbtr*gUI05x;GPv|nL`R>=*19D z8Aw##xVDACqEL|UX<+1IuJ|&FyRQK2tU6!$XTNI9y!DUp@Lw~b;e*QWZJGI|E{pO7 zaLz!8BOOb}UXESQ?TFKHuXl#i#Y%RamJ$RS2YNWluRf)cxQ=<8K=4zG_LFX`w)2BCsS;&+%b4Ck37^C{&-i3)3r_vxY}vBw$q2OsL-Nb09X zHG-ZJJS~-awKp!USdM49hszCnU$5f1gT5-jE8A)uRk!b7r-Hh@WTGv+wavZiw0)@R z{=J&VDr_=3FHpD z$$sl4$SF?z;zA*`(%x&?SKu2XH&f!}dD&hCoi2RE<5;hrI}7Pd40F|@;t}-HXmyX^ z^s9}Ld+{kv@!fL6A?SA#D$4soiJrmCdC@0nPd4s1=#4inuw+eg-}l5uxaDC$ZYHhcpN2YMeFd3lH= zeggMlQXp#fUZVTlCFJY_{SXj})8FKRQmU-GQb?fG)Q55jA&~($vz=@_rM^b>5F8_* zsSW?~MGCA-RS+ihTG+w3X=2%y2d2=#RFdluUIr0swk)|pZ6NCeuA{_D=T9LpS@Z4` zVyHTo+7-}dTx(r4F*D=8{^-Xg5DbBuJ`b`DdK*ii;fSCnagn&(+G;rZxhEspdi9DjL-|coMm|cCRoo4lYvF|nSsFRQ_=vIw}d- zhuE*=n9E%#P0xz*{usxX!DU6?c3-LSXAZu(x9L&v!m#V{154rj=!Jcq1K9n3XV+1h zsVubP$qFUbB?IPnatmc?qup7jl8SUCUTC&Z^6b`oTt0JKQ+k`B#w0!Z%IDr|Yni?r zL&dLDJ35HYUY$QAhr&MIUU7pL)) z>aCz;5*P@fVwItiaD$FoXrcPP6j>=?2iL# z=whE7W2X|gS;?anAMEdxHumsF!XV#A40jp?KeW>{jn1w8)U%nAQWRj;bALurqB+zz zbJ_TX2e}d%S5sNdB$_D;uev=J;(cn zg8S?i@}uYEW40ywcik@RmxlB`Hx+qskN@h@c)lUEU-dJW)TobLXT_Q~XP9Rjets>! z6}hL_Bo<;8E9D|6a;G+@=ln+TLQ; zl{)4pLA^J)g7q(XMINp-7|S`sQhxl(dV3!eHAQ`{!jYys?PuRu&oZYC&!RcbIUR_x z>!4^IJLcVA^fa9nADk|-^`mqoH6^T&S~|@t)i_}+!Dc3WA9v;5%^HF}dsFUYUuE4J z!dK^?&3Bk-(y)-%j3nyzEu@k^i764@NnB`QP?#4un`P%vIkX~vLcp`*wf9XO4DGYq z`mwA*Y!%AW5iPv>%Jj10TGmWP3`vb|?h@CYWmy;6yus}A?t`u4rPZ=HTS=euF|M*mHh|}7>6Qz zYRf8DLl*6Qo;fHhDdKeR=v$Dk!CS2p6W=_e`)4m}PPqzg(Fb|p9)V38jLd-Xx+b8; zV396Q5VEVe%CZ4?ZGWK^9YQ@sx~A8D^T&l+IrJVpIsBHliiX+kpkd%Eb;*i~7fdFe zElVuu(qZqrPi++!#Osx8xD7W?oM`VGUGkITRR|YGB{Zjyseg8!J0B&vt68v#UmsaK z9Z!F1fOU)f$^P8ftn!tZ`;r{5YR#Hh$O|B}+)w|dvppv2riW&}UGkt873R?t`Eg6N z1=1r-+53L!BAfP$7N<2sVY4K(-H@Uk#ckb89%SlrIlAmAn#3ZRZRD6#owcZ3txs$^ z8k{N_H?Acq$j%+zJ*dclrrXo0^Jk~b+x)8g#pMSW_?8K9}AlnwcBqSSlYD2_RhKw79NEK(R(7-?ga8Mw;q!9hB z`FkKV0VinM)l{3}aUyW#-voAF>omHy8crM8eDEH5fz)2!+}u215mcIplM8gnB?CB+ zpq9WS5O`6@G^w>J8djTi|95eGT_HSm>QeZyHKJCVoScMtqACf))hoW z44WPZjSXU15gI=Y+B8mSFXxbD@%p{l(c)Jlr!ozG@{Z4u8x4%Q(uh*wzKlDG_G1=V zwTKtx*Uv3wQVe3o$1lk5_|#39bS?Rfm@F)BZohrRC_6C|)1r}sQ{Z5v+*7Ko9N5E~ z))|c8txpBNpL5u7rHw9HZqCVKNex;Y$RK&oRrac9JNVGpE-%V(NR>GN%8h&b3>)tI z0<^b!YM=4VmA==k(BPsGr0?Q4(<&kieNkLju>4qGkmqO2D#vJf*jJxxTn-M{;jZA) z1zARSylSbS&F%1?ugNhIt`DyiRj%?o*)T~-Jv@4c?Vg)$pJ}g1-pw+d`taW1@Gl&`KC1T;|QFZks7%9itNtX+S)w_@}^KttVE|%FXkL}2!{XaW;Od5wI%1lTp zQZiDWK<=%=+~}N*7{Oud_vJh6A`&LG3G=D%CQw`x>;e$IfVe!tU9SjejO+pi>ha>u z9&NbLT!6tsJBU>1r71d$B90f8Bz7-=y4b!gY6X7t8%$$YM^=&%^o>CE)A~S#o(TS9 z!WD=+y8_7^w1U?1plLfuL_7w%_sk6su;r0K$V|rExj#Hf*n0fxI$3&ayLf@(>*tNT zlhK>$W(>V0Ukb5hE?@JF;$#aZCgx|92_kBjs<8B0_arT&A-(=@hqD7 zcl8)pV2D}4Jk|;nNAfbE!91yYJ&3@;#QN#pFv1Li z8XO2==^#o~?GDh`!NXih|GIbb{K9ZaFl(E3WM3g_n{>eO=e-B|0nZ!V9u1}>T|J=j zDh(a+M++b&b(s>x)(;;(jNDGL@qCJ1CE1cRSv(@89`Crhv3dTT5!T@w>LrTd$^QKP z5^vXsE1KEWTDo*NiOT#JW{zr$>K8sJo;~N(Txl#WE@rC6UD&)R7$}D7t@aBH?dZ3i z*-QJmlhm>^1j7$+$jHoEINt1}t$;%?-5wK6tBTzl%{4@Lx%+E>h>^+jDTl!I_Wb+n z>I4sUVd1@@p?;$!3!1LiJ0t4%uVzU4p1@-7v{6fV3PGQiy~y)tXa7KX2{ELeziQmv zwyj>WG|@YvJ1r5E?4CNCMMAar@A$K@*tfr4A)ZQ1T+CebnZw)-)IX`l=kM`x6fgh* z*O=6~wM(~Sw6!)sdh^d~0G@V_|JQNt3}}?403D)t%oXS43pa%%P|K2okCq}P4}&ZV?H1UvFOQ2>nZ%#>hxjD40&tC5X0gN zyJN2Hcn$d=8Q`H1u6y0U;JhSmhZ#Xq)zS;1-epK^mR(J@Lkp;WDl=HqNnB^BZO``3 zavTQg<{+ZD2I#7(rNt0D#j1ht`5mTPksw+V;FP+1{(-TcKi_w|h+MZbR}{h#d>9mR z3AxJ|5 zHK|E5<^f3X0Wn_~E{lf#I}nPP5YXw$7U&mfK|~b+8!%0VdLZuxC@IZR=g%$=$fNhy z(};0-y*>jc^B?vBf_E*^%Mph*%xbQ-duh8YHHMxf;m4 zJvSKK1+j;$z>fl6cm)Kfhzs0_eyMz25w^;9korGJ`{x{kk5=~UzG;q3NN5AeG7Z1! zR~ihMzHCJjK=*_g(Lg&5qs45~n3X!H7H{j-kIfb-vt6b(xy0<6;LH}||LwUYom^^e zba-5B9h4de&wlc{Nu-1hPEoHX{JI-LNSx?R_h6~AW)TOMxe;`6)IdIftok8n@(^nO z)}qsrJPaxUU9WQ_9!}i=n1*6-X^##2DLo5G5`uj?44f1|J-R`}6uTEaq%hS2C3X41 zU#3*^XlcZUSpq$R6!^=)8zX6$G_1>y%>}Lto*J~S0-$c2uoW^FX*G9fs@l@Ik|U1P zOAGIl**<$6z!uxjwb74;w`MPUDfNu=6WoZC>cm!7PhK#|F_~qJ)f@^F$!up)j$%OwMcb2>8vDi&|`m> zPKW!79#g%0oN|yXM(|!b_#G$_sr|0Y$UL{eLz%P#ha0H)ry>55RY0Kd%RnpK@(3FM z*Cq_4RG`uTOZGQAaKHKk@0Eb1``3mZMw|Sr*Jh1tLk7)(sE7cn9z@%5im(af=Zj9@drl(k2$0#RE1A@yL-?GH_<%f5BaeB zQt`}p`_ccDlRF^wgsJth7G?U>a0!8K1~mo@5c5T3^N`s`zDdV#>Vrm0fbfDCVZ)MtIP|&fqn)!YL!8K)jQPz z^p%w^Y5`Vt**0csy^DuzF?8I9Gc`#<<<@K-=wYd#w+ZQ23B~@qQ$4{)WKQLKtx8gS&`SiO!O1T+5cO0Z* z?aYG$*bL;>vfqN{3!*a%YhQ-LUUv-CdiGe*I`G*2kYYjodlt3dltt|0GRF`&6kKKn z41ei#?q8pp@%{IfLEb@_Bnxue{QclM`RghEpri0#owNB!$U^ZVG;DpA0+7|Uc*mCL`i}Db zUvH+Q_*W(H*NJ8~R`r-11Gm9Vpzq;~K7a1qTjv9R{gqSKERYXNhd47}ehJvvO%_g& zl0uM-NWsa-zdpcz7&!ukWkIot=&&J1$z1gB=SlPV{pWD|BJngJ?6>$|f1Uy-2|{QA zVUElXgJdI(Of}ef{cFcRF0TA_4JXOOEJVQft*b5ibQaRCVwz z6k>;^fB&@6gpwrtF8hC;h)S$%xV=8fI}51*ClCC3FZ*eZ%L6d$SK;m-mWfIL(iuu! z7Viz}BhMs*&Iy9bP8NSnYDi&8l;4YiUSr@Z6uTyfLY&P$5=V3ocAD zeB8#v@9wOt6)^lGaoa!k)8+iBu%NgefY1JZlwA8&R{iy{?H}JoZs)%qlLy`+Xav_u z*aJQ;{yi?^Llklg+Y<(4Xrl{no(x;RZ*EVn5L@(Vk!O-@YkA}?dhpLu`(l;WbSD-u z+5GjGyz#rn{qr%94}U}D+COj~tn1*WjusYCzJnI}=Tc_H(hG@epA?0XJ+3~tv)h}N zvk#(EsBa|FC8j!*W;D5+|7#`71j~pzM?Beww1^5A3U|0xsEkN~!oTafdo!;mcK`YF zEOqh6fzRU`_z(ZB6ODJ+bt^eNIWs;n59#zX{cI>g;)7&lFe%XMQey|RB=`2h;OSGs z9ls0tKbLMs$Jv-jcVvXRJ8TDgu$E_KZHrZu@BP3aAO-m)g6gkuHxZIXusURHq-N# zGq1US{)2U%Yq?jgIR~R~k>z0;*ImEw&eT+#$R@}2u!T}7#co0w0tLh@zGogJ4zv@;K``}J}<(cjQXD3bEreykko3Wf!{CBVJ)2YWYJ%+UB zDzWn^_KzCqY4c0R%kLauDl!hY;6B?U`MKH3QutMe+=tO3 zvQsWB??$z?BqguER1Yxi%PXucx%m4o%j-XWZ0SmrUzC`o;=xo3OI9IzBIm9zOZvCRU-#Nm5Oh;RQ zp%Hi%kKr)RCkXOE;l%SXX*Tw@lkcTvhKF={IAZF^HWj%xntg1*v(NEU3@?0 z({hwg>9QV@kj2A#q~%i6>^3eUk`pgva9=~0?R&Juad-c z?)>^~VnJ^QUQS9@@1J96GJ$N_;;XD~9Yy=N;YV4x!OP?$bGI9+yCM!{9sac?jLFwl zH3(9FQ+l~If_cQXe|))E=n0b-efCe&g)NT#{rQs~O)NshO3n+SYSsr_TM8E+85$C^vf*> zUy&Dck8qLPd7hf&nlHWQAVlJMU_QE(`<=HO4~M1h+9A7}x{h#_J%ed|V6`frI`u{1 zc{NXSqQR%n=JT-`nchsh=m*bCEFCe%kM@b_f|BO$S(kv5;hN z7olnVK(TY#Je(C(|BMb}E0$`0kkkA#w~s3NLZoX#n6f>}zB)~E)XS+m1U;-@J^svP z1}~#%P1rBIdx0f&y=Rhcb!DrTiQ={*E2eNiy63@i+C8SLEXik|Vuc%iYBb#NULsu@ zSUHYgRN3w9_4O4fkKf&&NN!-~z4W-7=eS+8;izuwzyYHKsv9`PCyn3lGF&ZIfj=Z(R-lseDepT>Hbi}(FJv=P=I)DbiS<84H$9@_v+T9QyEs@lz7^LHKXFkmIxf7W zJ_(hUi(61U*w7T$6{Cl5Z5}0CSw@qb6KiB5vynVMcl98JH*QaP@5OF@&6Xls+Dd@V zpMoNnn$nua>!7;c)sI>RSpgPnmHv$9a090{$`RoaSY+B4n?tE0O8-FcMl|o-A+|B!h20};YA2<;kUO^pdTCFt9-ozKo7@=<)isr#05K@GHpQmLTxcH;DDq&J$GavEdW7ud5x^7YY3Q z#P1j@YqaDB!)V2~Fw%)niH8*XSB~2;>%Fs{V0|ch{caAEy7-n=nvtN_=J9?E`O#oH z?I~QlUz>8oz1)cP7j6c|ow2D&3N$3IGj+!Bfv1QlOp_T2aTXY#_8x__>zHYM|u};kDBYAI$ zm@~^E%{zSv3kqS$>^+3WnLHo1JA6D=Amr^=o}))Mr~B%ej)}zhTr}oHELrbs3W+x6 z=VcYSFxH2;(OV+wy1Z|`$UO9~y>6~WJFc(q>UJhmE-WU}7kX)&(TlabL52^>sT8Le z{ZdWp@KQ#P9vwJIPOf9@G20MI!ALo04wD9sP=vp!dZi>qdHVFru$Vg!ubl;xu6Jpa zL5Z)ku3}#>q!|`%+SL8TEjBggy;_++*RynI(r<}Z)x6g@&u(9e=D!HVVAjsSyw$72>n=uZYvnilq78q`H{O;H5(C~~%NE{+M6fqD5AQ04--_zBX zU!eIwPJBl}frWzu5x~h7VQ)js2!WJzDLO0EEEY*nfBfXN7e|1?H22LPo=ce)Su z6nN;w$176M3qO2fcACXFA!JU00DtBE!KXM0rY`3L1nc?uC)Ga3)^|oN7=Ah~r(7;z z`Zmp4$n8af!rwj?@agvSDt}Kqmsf&@PhU0Pr|(KmM@eiB`6S0@EUXS)Gp=-Wf1yej zsU7ZB9mDL?aqC9kc>b7U%}{b&uGZpdEh`pWQe;9yK3jIJo&AP`)g_Av3;TqGg`;@J zTAqN_qi;-#eee5|1%*uAL$}AE#fxjriheArZfLg|a8k|lHU ztkJiiMGgykW>Z#3FXVc%lbySOpT6_O)`>-*v4ir<%J<$QfNdaqHbuI zUUUgT`78~Wu3(#0h2j-i#rJrsuU{|caiV3ras~E}eCzI44NUGY$Hr9jUU-jw4yMT) zspEl>0uF8lKW;)nUh-W+<(`V0{#-h}#H;6UKVE-KDDS>-%7E@Pi~2G>oS;!cL z`Wie6;nxrDMgPQ)y{(-}M1RlcPpQB$6)q3llDW$x>{e2~wvRUbRvf0i++k<$H5c#6 z9#-wNvbt8I+3cuFYSR76$WNc=jUH2>^=Nyl1KO6whrKl9@_OZEk@{pWt{z-YWvhry z>+7cKT7NggdU@kY3}J$(^H?WZvI;{o2=?QE1WXN;*!y`?i=P`+!2nJQBzFc@TnzYJLC$;FmIb8_SvF`?H3}2DJXTREy#nPxA=$6a?VDuQ0Yif$v3ZO zg$%)UY|lxO6j-f)itijbGBHl4FmZ}0@}h|XhGcnV>*0M|_4FJ$wN5Ax(+O{sOf%(x zB7>vSmDzreyUGHL8t5$)YOnH*u{42bD+pOo4Br7{31ZaYX?r){!*53!dfd5ZM40F! z1!bV#C3M1tBq3>yLT)=kUCYL%3r$OH+5K;cTPq%uWzQ=oSIYaJ;U0Z=e^@S*gsUhc zHJZ)%G3MBquwJXl))A&C?dhmaLEDy}bVJtbk6-p4W!gF{>}#Z;FcLl?Sy@&2z+v_~ zUvPr9zz?g{H`ppvk5=o_r+^QZyupa}BJY*9t+|vwHZ|$TSh2??p;ymb=6jw&HU`a9 zD8ro(CK%SpIm6+-U~rY3RLN!}AWeO`Nr#%uob5VFa;$pR(Z1s#StoCn_{4JM!ML`1 zNmj{*xIE+a$y&0KfnEKH(DL0mc0dqTH(w>oC*X*f(4t^rNaEXzzhWiA87DZ=eW?S$ z?VZW>lJbo?N8I|{eRX0>743CDU`& zuq#Ay>9w;hx1E2Z$ROW!fm6KVUc;L{Hc9$-Rnw1#-Ux6EJHDQo#M@ zZD+MEtel@a02gVw{ptF_5|0Hd_*{T^Eta?^^L$#z9K-aoxA zv5N#WUg68r2~cwBQ;=zCohsO!FDX>A0M(Yta@KhkKL1fN*4(8{(FarQ(y7Hz(eKd# zwWF)6JEeO&(o1iO!BA-30gzHcLZ{}8^{R@AzEUeu!7rhcYn7=z?>G}Fk6o4!tixLG z%$guq?a9+8J1bqn<=7=mId(5SQpCC2iSIKY{D>eQsEe4*1VuKe9O$6pGMa1QhLo1Z z#<1R1P!JqQ}^$-!IL3i_+DMo)t1c2m_4wxM<3lqNpH_Gy=d;esn zBT3zw?mrd3orrvniQOJ&-g{b#D#arg`&9{(TC#ct%y0?)|JO5gYt@P$gBm3 zgIsvf9G(e&4E_;&4_)`jl*VXI%DHAo=j`|;YQ7=bIpRlQaA{XsoOors@UAKM@lwt8 zl#SW$$3LEVYRPt|5N(kWv$=J%)63`*T$AY%H66fiIeeq~-b!0VYSKwtbs`F@JAHKQ zQfRPaCQ3(}IEc{0B_X%s9PdKTWartcXj?thG*ipW@tMsXYE>q;#eD5m31Q}mh$8h@ zNqW~{494NZzLQj^;dYSmK~wSRmXp{hk;OZ2Q&b7+xNu&JIoAE@!o3p=jFYEDexB4$ zUARGlYwAf>tK8eTbDnmam7DYMQ2gOVoi7U^a~gx&Fc}`s%M%*9Zw1Q)7-|_Au1R58 zAGPkp$4_Vqd0~Yf9u#1xGk3~yleb)R&b8@7?n!`czO{`j#}6fb$sc|B}YV?l*h z&eV#XpMqx12Mf{lK0Og~*OoGQ+Wx~nH#{_z8X^~8)d%+%c#!#rTzIATQpr^2=%CBO zqm?rUp-o8fi01Mv+ZU}Lv8ND-3qF(Hv$7z`=#q{bXAXes;yGU4gN8s-Pft3e*iRjg zWhJ;xPSGNcy3^ET*R1>Hzr_33YU~by2GNJFi1sl5!c$LE4p145v)tfL=!Xo*wfjV z9S%~$V-Ba7shRJM&y~_N3CA2FAv?jImp$yUl>-w6D<8xX_=JX%g^F)c_wG+|m(M$v zzFL`Bstj6Lu_cgxv2#L2&+Tv*KSZ>?&&he|3q*wF&%1!sf(q<<@&H#M@J0~F)ByBO zJA@m7(&rSjG>_YwBmnm=n|iU(Os_zC1B{#dO5UXB1N2~&2Gwui@52>u*Gw;Eu4L1r zTzz@M;+Q$x*jq*y2S(ScN7IMYw~r_W|HSIMdk}lDPMbC9`ct@zYwRkclaE>U9Z?#+ zyMR2V-Od-*dYqWBj~`3(Gu0!`4ZB<* z@sXpIy36*NLEwmx!r0_b)3^$AY|NcwLYZ;1`_UbQ7KI#d|7ZdFK2Iuc#rX0sI&oKy zF!n99o6nyxH$Idz44qZY3Sn|60S+6@w6hh*&3im>sl#Pq=+3=w$}soAY&2_8I5fM@ zs?D3ZxNUJ#Qw|qW0B@<^ZshxYleESl*M2)$oOjEMC_n8UcnHsAhv#Jlm*skN z%9wU>h;S)VbU9M8e1N>7P}yuk9km+;p8~$$@ThBYt9AUE)}in{n3(Nz?Xaile8}dAin| zVPb%9vYCBldz(2$w}LN)H&}@C(yJ(MdP%)e@_9??Tha!3{f(`y_L*h0GPvzxq&}?y}TBxn_#SN`)GNV}=|6I}deJfkho&}kds_BOhG1SK` zN#i5q{hRm|a952k`;#)gwU|*nbm)7vbrHnqO0N@4eNDLx+i?sLdD{+%6O9cJ9jExdTyqMXj3-!{fHJQoe_Hcyad)pYd2i+ zS|aK_Iu@Bd(iGOsJz>?6WzST%TnC$wh|_+5o{e5fud>ldwCK!HS#PePOqEWI-AvvY zde1p=SB$G+(V(43LZsG-;!ZNDL8qy}0;r z0nzfTiU4MB52$}TA$or6q1P_P?UQXqBR_B$rZ?c~6T=TVY|M3cEfxkbh&dh&s;eos z+m7fPfmvXnfoV}d_!xM^vn#R@d#psN&;>3>E3q19dyk!Y@otD#Pbo=5i$Zn2moR1G z`@t;MZuCfw-hEN65ZtE?TmmaYmYZJ!yMc@j=E5oQ#2V53TLoKsi3<^&{HD${IeuIT zhL1tF$+92PU33PsmGRav|N2{!E9GKo9`;((H$0}8OqE3UDTJF&n01;QC)6h)JZiB` z?V&z=({)1bSUQuxf3o}DWi<*50?`B_#ndMC(MX8{LXEkheWj1%g_fFP3+83yMIf?- zNkIEvFsqN3I7zve=g*#u$&0;3sWvlFS%x&cp9GT@b!78V)K3SxP1@e(QRhDtS>1vV z3|}Hlr1tm0mONR9ay;)_cL7)n{-S8M{wj_ODP~796wBhAWIwgP=2rWKPgWhbl=!~R zD-^XtKXT4#mDDOk-SmXz&dqC0sTV?1KYY#i+O^Q43K-TfK9tgZH*Dd5K4MkToj{1!J`^6=bbglbc zZ8DIg36`(%Anxf-*st%k0N4-;G3HEI3Zfgko4-?F|FgJKrqZe&S~* zeIGjen^nX2(CTs99Mkb=7V_C8jRN?Mi+5qN=S|Xa49=j#;&S&6xM1ECXS= zgWTC8E;jXOMIHKmT3!7(;VIU_YF zO)s>EQ=4afcsqs%$0#N)4(D~|pv%{+W%JcDS*5TOJjGu8 zCj=Hu+DDJ9T6(G*a}CzNV#}TmP!h~>?9yMN>3bEQK@n-*#ls;)uLw8D>iXssfC3pB z1+c4#mNtVFmFEpgPa>(vM*)p*$8L`-I%EBENuFDstVPXmj!o0uR)&hd+&48bKlA7Q znWRw7Wh@ifh#(`(pzh^IFR}hyR6OW^tg>>1_~%f>s*Puzp}0yDZj+NBB!R=Fj+uRD zwzjN4Z&Eo{ck_T>@Q5To?^l+TId``|?iADVH*}vi2PMR7=^-F6XfeOE368!1ghsQ!ADK`(9{Bj+ zEJQT|e_-4k5sjn>=V)chdrh6mDlsnBwvb~Iwm;Ox4@m_eOL^YHRTiO{X98^rXRKiM z{XtOqB|`+K*y@*X`6L`In6Y3l>B%0w26JpqXDZo#t(P##IZUG(rMy|o^J7W`sm4#aaQG*^sb7~@=j3oVI=_EIO>!6w_BW1{ zy_Rzym0y<-jMlAE+bEN*6-daL9&KAP3hlapi!%>?%K?jopb2d929Io9k_WK#W(yJ} z8(IE!XGf0?B$wt-80~t_0fBH;em*B*vhVh21mz% z?k;r&dm*S%70A`B&!MYiy(=U4pWIkm&lI!o=yqJ}jEi{h-zcM059cbZAD97j?Y%UK z1%LYi?b%}wP$<1pr{(IXQNgY(FCE{h;nh$&(UrY@PK_5o*qb?W;|uNfP$W;&TwF5k z^}55PI0a)YL~?~3RxZkX`O#WqZ5C+eswzxKEW4jb@_Y5Dlm5Ip_v47r5>)w18_e3cEa z2TY*`%hK$$)c3e-83jf;n~W6N~g>$Pk! z;g5Ds8j4}FsUXgvYP2IXf1<(d;ePRDmao#zR#jvkq3hqG(QBa=q7$>q5qD$l)b&C> z%&e`(K6vpwDC5l;DDnaD_Oi>+%jSaSsOAAG?wX_H>DOPkzl}^z&qRJiAb=MhW6NI( zEaZlNn)Yr5&e8c*e>t)=F*~FCu-|3WuwdZl=**QZCZV4LdLz4h#826nVj3gNvQMG9 zGN1YRy^M*e`%Z<^Ogm(|N;2^by;X1Sl7p#zoX_aW^p-?1m_K`Bxhc5_-Nbz&(PVJn z!_=vb&eatYJun?#vi_RSg%R#qDQT&%X*WHLD_3bZ=cLPia4m~`YQ@-Z)2kM?lk^5Z z8lbbe;)08Ioc~txrL=u1K5lL!J6GIow~J}Gl!ovrO7d%VYoYW5lsFNQlH@uunS$qQ z$ajaqS~RQCXr85|vRn-g&lQ}ah2BTtZI&9f{uoyC1>_=tLF+(yz z6*L=eU@<1Ht>SqhTVO>^gx{{lykNo$(q6T`0bqi#`^LowWsdrt?|i4~!=j^SWO4Nk z4SbL!fy|#mAUPx=lRJCC;x`-n;XIacw7HyWfXDtur@Us}wwYeJL@=R}b@~=gHf|xU zl%fhpvX^VoowuO12~=;kw<8z9?d@fQgy3eU5CJY4fl3^7FR6i|LDJwH)4{xYTreWg`dR@29|tpaGeI*<}$s74tgj zrcyR#TXyscvhQ@Ud)ATM9X^Vivwa-&>B9kLHMMyt7T(#97O1DM_sZ=}I(-NwZOER? zyQ-ID&)xlbahIUNOoZ3g;9-O9#EQ8@?b-lmp`?qrYJ|$9D}$9$5l=e8=K`Tsoy#CwS=8Hd_OiG^_07V!zqIZ zYPLb?BveiHB3~lM9YyZkQK%cYJ;`v!H+808oTi0*1=i%R_K6Pq*2NNklUjA-9 zIx;~)O--OvIV|f?q3DPgOClZ916_l`6v48uUMp9n1T57I%4Quu&tmH6MIB$03YPo3 zT&p{_8F=53p#)R;$v&igtDG|Z=TLFC^eYR&?X1Nc2t zJNivkAeH=fwo1N09%%m{9@jx(v)p{sJgdU_1N&}Vv^K7^BN;pVdoGN9=~PF-cqy8S zc=QIl>(s`{yqMJ$hXj2O!Cv*p;RQoSs|6? z-(dJ*NRd+i46331DYYNl?rsTqBK!_w$b9rq(wwT^TxDkOR#)xdtEzcsls_x@iZplW zL3)zc{y7wl*hg8f{sdM-y}_!t%y-U2S$#k2r%6#d{-Am}Xy%(gA5&^Cr%mVE$7GZy zq<(n>^6ZotJ7NZPm3*Jg%5s?GmG*2(6%9gNWGp4~Q6$>NCY4nsx3K z&vAO9@l(|r97ImODny0Q$dc^Y_Zg=JWlKeiWl~A@kbN1- z79smG7?FJ)jBUou?|$i=Q}56Fd>_B{@%!sL507&sF|XI_`MjUkec#u8U6EJTJBrD* zvT}s^YpJhYTyiJb4tbOmCjY3fvEDo(bWWpSIP-Plhdok3-&?R-%c@fb#{76rieR@S zGO_m1DgCY>SPV&M{xw4SB-!MuS)LE)u~dLT@0D?3McPRTflGA6Yaz-G+EnT=%O zm7V2o)V+-4UA2@TQPEZ+t&|@=}a*#ZuoZ7QiP*4X_ z>2I*a@{jg}fKX*xP41LDNCo!X{ce>~HeFeY?#QzBD0YRugBw`uT|B?fU9{HMKhQ#GGa7dE(Y;G?RFtvjlZ;1y{P~ z2F49G@-aLoynCXp>|Eqh4YFHIGCCT0zYHuXe^O39W>Q@okPt)0CaijS8$72^M*hPR zeH#C6d#2Qj*)^fWmLFx>=$6@JebVNzv`Fr``MH`SBCCA$jhTVC@SXL+CNbQlx#%}G zV~H4wk-=P7%7NUqLT3gPt2|L_K7UhxkJ46*=(4`}Dn+crSZ}~>$)R_&FO5*JHRmAJ#BHy;!(O%RVsF*C{1=~0w|DF34et!it{(BoDJ5ffaUHMUp8eoTV%SCB zSLeRGNWB*I+`4dedse+q-}(8trK?6hy0UK6qt{Jx9d+gM)p)u@EC%M8#$Ph;9q8ez}V)eSeE&rlI{GOtv5H1_dGPmVaK(4J#`h zBUHPtGE|(3Vg7G4v*YVFwYTv#>e&F+2Slxv4 z-ha$B?ut)9TN46VOS9a*fWJEy(D{ z?=P-R|AR>U1L6YZ!~0q)wtwby+#h>Rb8majuvz!iHuZ~VJHNGNeoP|{zfAG9KG2eS zbS>AKJatJrPm-+fcg!%M-KvskGjUb-ZTmqRg8|hoN(pzwg|@y|Nb;YbrQEe_k9Z+# z#lMZ4m@zR|_}m3YaluYglwwYNor+Nvy_4sEaG&$uw#e}Ytv`bbGoyxYdU|`gx$`VI z!k8|~)8QYr9l~t}7)`sc*2mS-bz~by#1lvYNyS<}VV0WO$%5O9I~^G={{83m|NjtP zW;KP|>NZXF;+4m2sINcvDDeIO?S|@i@{CatzdVLFC&%s4+5KROr*iwz*6$B*=NFtq zp}b2NHd2<1LZ$`>?|xT((rxbu!Wdwuw25ttife1h$)UCH)wq7?B%ugN&ktQ4c_}(c zADqdFo*v%`R|l`{32)fns=SwlHpkDy!?VrJaerL&%2rhW8lA$xzdJ+C*EmMcIZLWq zn^wp(a{m1Hjw0zkDo%g0_RsZd-1>heKW{|dxXb)IPOKO92&U7>KmMHRH)$De<}ECl z7AwpKD^1Ls^lL~R+H|JSGmQ6*!=j?1^9`=EJy5%2^-lRdQ7Z-&s}?;pc=b!V~z64 zr+I%}Mvam&3ZAb-mn?3erperZ-eq(62i>qo!E5#77mT@p_ol{%nhhw>7MWa~_Mxuy zRs!=h!1=3c4Q-ThuvCm{&2PrEwQN2+j>C@Lp3fQ@>N&vPGTV#Cl-(m~URuG+&1m1D zjJnF*N>1VUmKfe&YF5pNvYCLv-|q}C!x21nwZk|-L>#5)y&!(1sHycHXm_ayN4|cR zkx`%N=s~tQhAJZxwQr*f6t>QN5RabA$zgR7A)n*t3^*Sd>0SwP5P!d_e=a~tDJgSG z7WKqr{q zN%=b00`mF{?;X`of1Qx9k&(ITM!9Xqr^XDo8?NI5*|X6RLYWgqGv9ie?+m}YDRO z!6e^5TH3KY_A_b6p%0iAPO5z`Q}Oa#icZZg?i0ej2I(zhrvBy2-Ke`?He_lg-aVZm zy@y~bZrU`E;Av63U7m&(b?(S0*A8Se{Hp%dbCk?fyfWhPNYYc>oo2pg&fP9hE9Eou zQ6-5lW2e5K_Fe3VT(+SQNk85mnr%ANh4P>0(#)KnaGdp;K4^Ds{zz$GCf{zoxSf0y zcNU@_yORXtqdaqY+RBo#!c&?cXDKDFa_!hk=Io*D6}j#J#ovU?Udsovl&PtS{#Xs& zgYxeyNpNp-EQ*{7-E+}>|{USdLp zc#E@gX^NGZikA@xEcM5ZEytS*jE_(3jX1lFcc+EkBj^nE99 zqONJDN_~P5ox)*=x8SEA~ck ztZm!>4W&Mu%y7rC1+f7_p5AUngv@$iE-*KJ8&4p8k@g z=ZR@Q!8Y`bbLouU4&=*lVM0zi#hKK;8qDENyZ{PTSx67waR;CLJ_TNm?_2D?_HIGl zvavZcP0TauL4WnUcl48!qlI#+A)W(u8_tG_lMbKmT3g?Cd9rEvazYa<4@T7J&_wXw z2X}AlrQ8S0Y1v#A{($9tgH(eRee-lU*s8+>xD?N*z?C(T{tj(0suWh~h}ywXz&-Rl zP%z@`2j?qnQP__g{HSY}jPwJMC8w=Gd+vlMN9UTZy*oC$u%LEZXIsDl*78+tmu;Lp z13DCYr{13Zn?`r|6W@kaxMbhtsk6Fyu^M-MMPTP7odb+gb_no?4?KSI+N`MH+-!H< zJZWyiQE(>Ov8lQFj>H>{?>#H?oD-Wu1EZt=U?F<)If_tExMP1@Xh1(R(> zg+yw@Er*OhKWrfxc4Ur56k*!=}eyF2s!-Eu~$2-)i&67Ymq3<}lH|rI0Mi z10#Q3RH{YE_3+wcVK|x+D@)y{vwF46j6;UJ7I`bYwja8HMqZWaO(9xmM`r1x&ZWGmW_RwYPWYT0vDx3A^?dSb!>l&-? zHy?i+l24rO`~{gT1VmvGaJZQ}x|bV?HDYd_u}VDk&tJQCI8RLe@#}C$KC6-aiLjiL zva+)E1b99qAlw>m@qhcZ|L6oaPf^W&Be>)lLBux745RqjkAr2)wpt z1o*4Ii;HUzbleUSU?ffa#-~4?*dM)`>VH|d5|N+RmtSnhfp+~tt(YKinJelp>75&@ z16hDOVCej?n-12Q73SfJ!?!6>tGJk$ttfD)s)lLYKL!nIiA^hGq99g%du`PX#uc`L zLrvhfWrCdY-)~(_t#>1r|AQdoR>8UVY**gINOidKvKAMi|RO4RQCDDN%B?G#mPc;+X%S6q2xMB(!LE9QMw+3@M zuAdM^$$R^53dN_*|55~KHTem>da?91Wbik28VatyDMR)3_1|tuKB^l61b?zkntSW} zVqzk`&88_)TOgZsR9F~j-DWxL?UE-?4uICvrY&2tQc4qOOJJgT z@#4j+6A!--BUTEeN{f6tN`7F;ZTk7{Q#^6Lb6?yo7x@KL2E}e-kmIxrX4OGwd7pQ4 zT)C;gW`YXX!t@Ugn(J@<^$3k-cLR>_NP02MT*2tP8G(Oh78* z1{v(}@o}{dv7<*-!GRIRjllyV0BnGtte%?4Q#e+spVEfRmqYg^{7*(^m88qBb937b zFfh`1c;^;SiQjwV$Y!tz$@1C?vil=xv(`uGDJ{3eK6d(g@tD2_O%s4by` zbZB7>+MqwYbIV4sj|0A1xP*C?Dd>Do4S?bjBLL%ES*+DH(4GMy;y@7Idv53PWBGO~ zyk(eK58A65APCk#Al-k<+B)+{6*QE#f#D9bfbTZ&N-!w)%xszaWtyCIZyica!dmql z43!R{JFK0&4HLYecMdcji&57?cek8t$B+dAt_1JfY(~l6(DZ(oPKSZW8#RyiBIYHa z*A@uQK|r04o^)9ly%bE!&dS=hYuCn{Ypju)_n~MgVw}GXZk8~7eReXt@%6`#J3+1H z69~51^W#99tslH`bdoNA8*>Fsod@UWWbYDEj#lD{u|yqu8pjj7&^$P>BATLKV`O)L zlMC>0nHQ&fS<3nR$cf%QL|<{vb77u;dp>OGpOvs z;@@szVNq6H4JC#?ZZKd3cNek3bm$Oj;gmu_k9ad=YM?zm6=!LC1mor=1r$fb!}R@S zVNmt$hu?Gtyj@VQii>%x!v(6r(UuW)_Uzd*Ms@{LJtE2!CUR%VkY6CwZ1?K{MW076G5P_*}jAJ z9@HQ$z*ZE5FLi_?;RA-#ADfyK7_&jFg1Oy&f`YG1ya=D{FJ25KS%sdt3x*WY&Z+6? zCsztUG^wfsxE}dl^Y2J*CMG6DMg}ZE8uf>O0Uv7noV;mK&^p_2%TLRJ@Ol*dLSs0I zhEXDhci>b6TOZ;(Zft`ZQ=(8hhj+sF ze1=etLcxe|CmxHhmv!hX-YxCW<9{`!5;Q5q!2;DBs<-zfhT$E~PUR81dC{WJKEq1& zS)7#Unogx217Uu3*ao1Vw*?k7OtgSz@XKClVd3OnX>0qvmx;PwhK9QlYnkHZf#8#Q zM_heBW=hgjhJg7Cs$d#gT45545N;$KdSsf`qCvyAJp&yM5doFf_(;if_Eo*#fq&v^ z)-L4ynbw72`9NdQYH*>nk~R}oV4laCN)(xF9#n%H10ou`vhX?4Cg(F z-bpF#Ft|Vm(**yH>prY7R{-3Z)1X!Xq25v-{nuFO_8uvikE6rR%au2V z$(hsV4iW$`-p{Xyk9`{9MJAJVQraGUvAF8kW_!`z{+J{WE65TMdhZyfv<-mYDf}GJ z(SK$_$}ldtQ&n{a)J5`+7Gc)pYU}Da`1nHMFzDPxvi+^c^f#O)~neQ*pH zAgR(2j)aKv-0zfVnEC6?b^NI|3rafC>vH|i$6UaX_XhBZ!*Q@4$6O*zbLWn@Ma?!q z00dsE%7?oiX(o7%Cna9U1s57d6fBcWIGM|3K)3`z-zJcf5EfQ)OZA!`z5${`ut@V# zO21JF-oAeeoxziLS;vymS0|QfTD}3iIo&iVDssp1prCvMY_{DyxL#t7!rA`` zSbyR8f)C&rYzA)#zr5f{j=2Fw>DC@bcx$)OIO=ukYWV0_<8(B1lS0 ztG;PE1>kXCgh9y6v;*b9Mc8=-1zl!da1yuLNFhR_Fbfn+$r(Z%6%bpKxs;5Iz^f_h zfJp5c8h59LX5>)s4Wqx37*IT+OaNK|2jnPv{d#*gfsJt96`qGS@plMZRUI(r*IDe9 zO(KKYO8B$u-d~ozA%r0S8}9oWxx?OJ21G^R>^OV=yeinhi<%T#=!))CmXg{9&dFyk zT-XYFScro)QX{Ghe)#Z}&SAt>4(0_xQj!S_QMN}KojbSrhqEjQ;{Mr^sI3Z@DEe#= zL^w}OXTwc}13qMrG#d!i08O|80Q7|ci0+8d47Y-(ICwbRqzAq=x zEs)0;e_8mFk;ow8+#n59deT-#!2H*7&fX~PDoe?AN8hxTu~#i)AsHu*AJ@5b=}$;k zqV2don~H-O6-%QMMFe0Vf`$NdbXW+)F9T)+mEhB06avp`0Z=EQ4k{pU!PS6BFI~MF z0BSI%5NE#G4h~jTSKp`vlEjD%*REZ=U?r4+d$@g%M$%=Kw5x+ZVJw_u7DXEk zpsXJg!wcx-O?*59xNWJNI_3YK3h3^CLSr3Qj6KY9f-(!krM1#t;I`C?}v4LbFjLiLJ)kU&voT#f?F~4{ci&zla!R~2P)iY zXvh|=A>KBV(JGJ+8y9;v*zx43u47UjT%hebf;1_(b{sUDeZjcRlS-=I8>!g@ zR!;5=ir{$HdorF1ry>++#lf}tiMMk>&#*#EbF*D~3!LO~CzM^_aNWvu_lpGLepp+p znOO0wI9nKvoFb#59AF{?B@Z}hp(M)!Q1KwkOE)(+D81$$c?U=xt{y)2Z-3zAqocD8 z_3GWbs^#o6Z%!K<$L#Rl@E3HnC%pavz|1aeW_XzCZuvI{A4m>BFeem#>EOO0FiiM5 zJu%%!(-nS-GZqajH!XvF3Sxz+3x@Ba9VTiR{oeTClhgYmAgdMJzyFU@6QmoB*zLbg z=zV%nC59VVtSG}$qg>*qirc%DEP(=l%*NwZ-p`|{{;@ybq;Ui;tG_q zZAYP?st{r?s|rG1;7*VU4-IlXz=^LveE60wGc+{x9#C_aXvA5-WIB3!jIZ2COTl8L zx36LT1GNWm zntFqN=As^W3LmHfK7dTNA0h-8&K3O&w&MfULx@k{k#Pq+PmugQ>GI3TLC9|4j{pG~ zAx&GJ{mjnrQS=qKW|{(?%bzKwh+8-B2W9Ez;pKLa?&60V69`oq*a`@=%_dgm-0b{t z!*lM-A$az|khwy>xaHKu)Jvl0AS-AX3qlG7>Q)kcngqj@G?OCEL)vj_;KYZll=~`w z!bn?i?ab9HSFXHINy$DUEm83UF1_+87JgtSnD9aLhBRhVMOBp}g zGm=y?v-7J#2<7)%xRjWSEM4;M{00aT>o76wUH1(+IXqx6w&34>wluQqIo!@4f3X!R zO`U~qN8zC%;#8vKW6kMt0{^TFj)H?o%)gW;>AQRzo>^&j%ZK+VBfrvDwe&Bg%sjt} zu*ko>^lZH``|AvaPD{DbDah#G0}6iM0=X*l`FfhV^%CvC}{?n z8{k%eds!>rmpV>XR#wP}#2|gW55*nC)c3XvrI8-nJmw0-vd?2Ji5y`51M2mHf`X@9 z3$|&D3&^={f*v%SgCE+~Ci$?L^S{|sdT{=%&4F0yZK#w0HaUIp$zBBoUNC$rF=r`5 zmXo!0Qa|P=xEWu*XXgqIe&G1S16KvQ_Z845$+T+ZF9z={)HSX^s3nBIe941+1JQ4Z zFV~S6hNaEpUf152xrPNB2_ys{2UZHDOP*#n^6YOc927JyvVN4lhn#2|O5Cnf9P!5j zsmkQ8H7Mo7`Vf4EgikvMjen|X?nAQc1-*8}-svX_+SJn<>vJwz$xrS~IDaU}cJCp` z`K^A;q&8w{C&nixoR(*Am=jY{Qq-TZIDITN2n)e;L!JZY>1a!0c%@(Wu!Nu>3RI9! z=KVSn)ad2wt1bZwU=m;k4#Js&01KdysbXTnhw6uHAO1Bv1iKK*$j}fdTy20}5bE~g zq~ZR92hE@?1)f8yAf^kwov*o9^z{QlJRUrg8}se|6DQTOqaYRkFZ5%>WmOfj+W^$Y zFPghReF%IAQP|ZMTnGWME~HKu=5SKBQC~jgfyIr9Xa)G#49Fj+-+0+6V+z5QjYo^Yfmh}%m*r5tqb;fcQx$@mDqe>&xH~>*&ig4bHKMf+19wo<>Ct^q1c?$g#Onl|cHmC|Uef#DQt|VOxAr`;3 zpG`Ot+dVQvLbgIb2%4ySgwBMN!8~rLC46rs)Cf>0C~1@_(^s@FM4x0ueS>BbbTmyN zOrlWA%F0lC1Mj-V{iSfcf*=M|VP|2kSLPUgZqAuk8N-`>vwi~#StkHy>_B6P);%zl zajXDxYPsV!UXZvars@U3vxN}PKeFRxvXvWyIOWMt$)5_b7JGf037KsIGr zM4-z(#+`Y?VCsF|;ygJeL-Yd z29pT*Qxl(3)b>e;R%}QM&dL+r#8?KCJ7J)zX$~or=kN=wm?$HF3jXaQUvmpw#~!x~ zu=C071pY=@qcVLfid*KkATX^%hajIpJ_n~m$J|mK2#^!RBT>WpP@n>C0iU~B=^tp>23GY22aZxiIIjb~iI z&k_NO(F4qX{DIVBk!t*HSpKL}s;YOtC_#9xMAKLGAz(AyDzTqW%PZp)<{G%=_lkVA z7v9oAI$+6*5~-Pz`2Rpd_kW_H{*Sr8NZC<9~xdkjRR77FDj z49~_%m@^^x916|*Bqb3%aIi6d*ZPH`02eWWhT}zA(q&;(SU5fHYyC`jjxAycyD&xI z0=NkYXJ?5|o(bvKwVqWt*A(uWFX#5{tpYFSHzAs}o!r1B2UuS;N&sUhIkIyMC|xz|iQ;bc;` zvrFwAQsGS_LU))78v7kx*1yUUy;AGNq29+A4yr=m0d)Nzf_Vs>53gEE@BdauYQ2J# zFMlZ);oT^aj0Rbum1?S%6Qd41S>RAo;`iq z4|#o62b=?+@!*8ZR`XRUZ{*Nb$n=n>MqSLqS3)OIUNC#QypOcQIk775&D@NzaS3+u zTBg}XuyYu>9Yd&If(D~&?Ml5a1}NrdwY1LI*d)J4iZjTbITls4N^(Q;@!;^>4qG^yfot&za*rSley_KTSralyB`Sc8Z~Qi| zMwNC@4rywV8rz1^a3RxVUneDf83-ISqc}SFvTWHB-myhI4Rp2OZIS*AGg*xdk-1W0|0J5u@bC1DS^?)LfwUG4YWq z=Zz_BCNlezClVF!Rb{+s6hjV^){>vy)7@K{o<4h~n$mWyHxYUt9U17n6si^?@_Ku! zTS@_>Xr^ibpuv;`^f*j&)6cvq&#~?J8YLwu`3kNzA6I|?tb8cB2M#TPQHbD)6FUG7 zsKB~za~H9W9wCg>>|V=`!fPe&px%U=rGZj(a3pYKh-*PY`|i&4fZ-E4@l&b{z;hS_wD3e2HR zp=X4YtRq$c5VAwt5!$w6%BHgPusE#5ilIDS_PDZH75fK_FDk4tW1GYXT4R$5Q6v=q zZTSHoF>D%lLS~NkDkU7P5AbDy4;nKcCP0Hp6A=he?}69?g}94`StkpwA1$e1 z|A{ndA>2Vv*1RgD{Iz=Ug+)WyEMQxX_%`el;V~DA{#zG7WGl?F<|a|K41aIG5<#{W zP~jtJX}}p8pPt@e;Qnt=6oo?jk~Tt#TLO9*&<{}{x9TCX>fp&7LT0*l082-oJ2 z1VJZ@6VGr%6psMcI7NaP2ER7t2q7MD`E11wZ74aKL%ZW3*|Eg3=kF@uTayIc2YE!y z9w@e-0d-%Ckl(ZA(e`9rL=RA?pG8VgP{5N^4p9eClrN4$9PV2`vz&`T_e`=lK=fTb`f_zQ}P zvH`SKMGi~q7CyAcdHPolIV?;WaKNvHdDF=s?MI5-?c+L(YHy7(HewpVFPt`NG{7sove z{oXJ!YQjn*vy~2l_FO&PutmvK#>EK#RIU^K>@%jDF4({5v1{zc2zY& zcdxiO!qV9pi-TSu7)gr^BeDGzgbg^l5r-;hNdHM+Bhsakh*2!xR_y}`%Kj~ox_k1V+Gbfg@n+?mO`N`e8Qp@ZrP$d3kxG z%k5SA7icO|@{jS)7b7(fKphnpCL>%D#P3{+e@inpubv1sO(c}6BQJkz<18(>3-AE-BfGVMKR|__tw2|gQI2)QzM62eAAE_!i&zi z1|QLmr`vBrSsQuVPyy>o7lAq!z%s=x^QHmFUa((-N+z!(OU)mc%X`i=NPL!J@h z23wB-wRbO)`JTYc>Y>r$5Tv13Vh&b9K&?RTFPMdgz+s;FW8?yIwgU42RkmOx&@hTC zSqa`vKN(fk2#HyuHm(ZL@*inegJC)oVGHpRueW!5NMaF_(^+_mW}vEXP~?6LNui!T zefp8m*i2ATX&`LR1Ma{8(f))5oXAgX z|4634u!>_qfp$0dNWXjgTwYbg3fD^kqXweNSW z--=?al(e!LhMroFKh^=FR}z@vRiNP5_~tr7N~r`sP4El=;Xm-k2@Nk1cVm^2qEf_s zzQ;PH+Sk5u=hH71RrYyU)HT>w6%}g168CB=xn-T-lCUokBW6!HmF$zjN#hr(VR~f5 zirUrE(z2}ZA$O#giJ5s%-QBx)FBuwI`}VpA%S%g3KfwXT=C@+?;t%Hny*Zb6AT^#% zh;35#nddq|^Cy#hz`DYTJbnen$albrEKHjXR2Gz%-viaA*Mst(u2I5<#i~FZes*n% zI0zXZR5sOt0uFS6E(hiF^Xw={O{#E~$9E?uC#&W3R<%*A9z35L!CyB^g`lu>!p{m!~B&?1ih# zZjmI9A8QFmjq$vG*kcx8u*j{9QGjR(Lg}ZG)_^Be=w}QtVPWCVmokQjHNMW^a#px} z)^-kzE*p+x!Mqe4j}!r9zJ}mH^qpr#{vouOni|)3t*&`g5yKIH^h05tczoKPNV}ZU zR>gh{+FJL3&oQV3M^?Tk>2)Qy%8=P45l>uJ?y&d5m_%H=-lKb#a%dfS8d;) zP(FYTq<=g6u?oy4p=WhB#JNAXjA#u?TgAS>aU&&ufy3=>Z7)hTW_GfRI3CEws`T{q ze8rqNJ-KYO2U2`xXy^#RO@|abKsV{0Zc+u15_sd#@znds9*JLsS{Imq*+AWdhvEt= zA)9h7nhtw$uu)1%>Y%{;hq9#r$fJT}u@7%dEpNEjD3G_9QCP?WbxEmG_qSMkpbVOd zbHfJ|+CvD0DpI>d+bzkdRk5d7jryODL{?5Ys!ChLcK&F<&pomT{Ct%`tEA#=%Au%# zo(h*Y*4p0J`1|V6D8)7tYF9}725aul%Z(8$45{Jlg1$7)&pk+V+gy+DoJicJQQi0$ z*q=bYL-I?c4o(6cgnsa#wOyJ*YcvQnEdM+aV}3HMPshi{Lk3X=*6dKR0x^eM#Y6hh z0D(Vt~?)6;Ao9v;~OIv+oNd<8VQA7}zIKM=Z*vx)?D z*6!AW$iIO4HCTP1Zk0@t{z~I7k%2@c?!9oU82q-XM$(av*Q-}$`Qbt<4`7r23H?RJ zTP8irP)La7G`c{q0E+~5FRy|x%x?bqBrYC2AUzlfVYuZt91V;cjKRn#6;N(#>6;an z9I;J*xZ3w0wq`ITZUSyR6k2u%D4!0t|G_Pzn0SXc{dx2M1ZBYb(1mGB)1dOP|M~+D3TAruzNzqm2g%+v9 zqf`SP<;>Lj3~tz84RXre=O>q0LU9Rm4b#N5ud&Lg>OKM21k#BPeEM;k%8VGCO0dyV z$#l3e@6*zdKnH7B$H*u+ARxe90+Mp*e;_O%9^7dv60D)Snh(Wd`*EDmJfnetK-uaF zfkOq4RhbtXUOf`u*U=d)4?oqb-vT!1*QI%;x{&2_nDGHASBQxfx875@7buc)qfNX${;tDu&mvA zDR2LYk$)*{d=-`eF|lAcn_dGq3o4NJRpQI0dy7Jc!iEo7{0F<|k3{9Wm)~Q}5dIKi zv}R^*j^tifQm*cEGXP3gzH^P+k8V!1}r3!4CE6^&QGrPrOF?^UpsK z#^a;MkMF>O2P4Y_6B7x*w@zT4t`e_vDx<@kMqQ~o9zR&uM(KE$oV*1(e^a?EvC6RX z*$Rxqz?#hztgN8xKBe?3*WLGlEEcIh3JW8}KA^mM4n^v#Eooo)ZweAnA(?w*Wn~RY zePySMW|W}tp41!o*=%m61~~vFMx-@Hs+*S?2C9hNRJq#SBN`e(yRo)|hc5SZt=XE7 zQNNE;?xr8dM6GsQsUq^&u6aaGJUbxoF?|mP#BbKWM0l6LIe>=&M{^JLq?FWmNJ!5> zXSgZP5e;wB6g=pjOFPEZ`+Yn!x}0-T$)7x*_a=2c3Ot{H10fVA1*ON?+1WDa*#nvC z59r3aS3egT3701=2_f~02Zw(0nPk(l=b^*6hgVeO)a0xEPwcrG;#7 zHPk$gmjXTwjvnJwHUk{mYP4ql4R@@Q-xo7)xg`?9?B8dZMKZ9ekXBHkkOWH^IB){jQ4MU3*!UHKfQ2#uM1tOd z@L8av0p}Llh3@XcH{Hn8!oq1tbN146*>w^xyfAYy(VxL_^Lf!G*#>Pd>et}rRZVt< z3v6$0>d2T58(e2*|KL4P{jguOcn>t&H2$mE=8(C}f&zy98Q?g`%d-X-dSgsXJOuF5 zkLb&!L3qwL^#8B?4h1T7pf5>V)E#^EtM(T(I<4cJx@l4I_HUycNH$f-izMH_zyANA z&F`hic>8Xt$EQ{Q>bgc4tV4=RU_Mt4odM~sJd!o}X+Wcx;`EPx`4YIYmL5RXM-qG{ zc78jLqC9!%&_LRxHUoGG^bGm1H?tCGBg4Z3kP<=C{(4}YqVH4z$=idCES#?Zck6X? zHAk$-{;z)djU^&h8F430p#T?=XyXNt4Y&!A_5rcrBAJzm5#ZXZjtQ6#FoOy@b#>Jn zU@A0ms_m%IqqukPp25^zsnF2Sd}ww65o^7sAB+su$Z=>KNENz=)FPo29hq&9h=C?tD{0&cOUEPPIQ<}SY`C>Hd<(uF=eKAGjX@++ zg9cwl{h$jNF(cG?;1bEjvYn3-kr=~Y&yX%{A9FppwsRaWwZ2gPwZZ|z8R|n^z!HJg z<_k4Z#54ou0}y&uA`a7?9}Y11TH47lB9N^J$3SvH4zL@Nmq71a#%+8j#Cx^B?%Y8r zrNU^>wze5UmyfU9=YQ*BMz#$66C5HY29Sti{2bnDL49iH#phTPgv!W+FtmwDDCD`oU4eQ-xv4lpzOMoZ%Yp*>L^Wx!Iuj_os8bMb`91p? zqF0$vaGOr_U6Y0agMjZW1CoqH4|diXt{Cy)3I7i$QY!O~!BFtxIsW2noM^Y7uUs;e zQGtwI4vU!qF&wG+cPdnR@HzwLziIg03&m~!OA)z!7gUS$RH zf@8325!zHAEOqFz0Eg+vbFh{@V`-V7_uHeg9b6}LZ2DiMNPkS!4ml$Od>x`}E6D>*@vf)lZf-eW8rQR+#f1e)!*8i6DYhOgwtJy}C(gYUK45%EtC9E^ zdUrrOPqH_9S`0P9dQxvs457@9Zi3+ zkRbhqL2o9iCUAIgn zKnw`=F4J z3gF3fE2=Kh^E^gwIA3w=E5lytjcrho<9mEFLPT))**hFvm(LvUAN6P1v`J{Iw%Tbc zUd!vTmz-iePN{F(b$V9)wvNVuEd!r6Wd#L=J+x!H#JPF*DZ%{~+Gh{NCbYDf%m@U& zYZ*&$xZ*LYMIM<~AiE|^my7_DG0n(p+-M|5WJ({67SKogxd9nO44x;SzOj?~@GAQP z6^>qIsAt6ic{m87l5`k!9KJI6CPK|jK}&w><8vw7s~6%FWYY_VUk081EQJFLNv9tX z8dVTf2Y|^gHqmo435^}WUtY`2a~`UNcD-ndgP}zd8a<+1HMCNGwI3M098iha6-mUEzsDY#nwq+#r?;b;*X2Tt-(r^&boSYgY4suYqgWNj1 z0gpf%sB=s{LowgOfV_`yn#_oNTj!PaZ{u^S;D9bTH9q|eF2H9E`5Tn`mB@>o%(bJ!wH^<2#c(}`qqp8thk&p6K%b9LRamGDZ8@l9Q8=UL5H2Kt zDmQ5Jj(u8+QfAcU={kyhDfoIFcwM5n?^$rCd3ca2+XHUJ!s6rm_N5mee;vPfKyhXa z;3*nMYi~b8o%TqmE11j*Dp?AX84R(b_)TWEDiKmrf;}mO>^89!%{=;Yb&IU>@blWm zqBqWjL&3ToTx%%-i+e^X<}%Blxb}s~2`gR~?YJJTB(FqzP@9XV(}H##EyTMIJQ&09 zURs?`YF)hh{XF`i%;bk4JZ+-<=nhy-{X_duOz(BkYc>^u%DfUzW);sRIL<$FM`NmX zb7Xj`Z)r;}5*?t%pU4IP@Y`u=HQCqH) zEMis_x*UyvAT#t7EYiaA7n*~Z`GY#`^X#hiYl$@biqp*Ml2Ymlbl)vcfD#y_Uy@wx zoa->9D1-kV5Ts0(^$}mIRz73b^V|}`1VZZAa zUNn=tHy=`KZGAhEpoG)3SLW`U4G6+62SUsKLCu~Mo8gXhRxZmizS4~KG^69ZkW~ zmv-_u;si)3X!Vb|@5)Y0kSv|bH*$FNaF9mdaA*j`s@!*u^>Ns;+5O{8DFYEk$1O&Kv|k6c7+zedc=1o zh5F}5ZLFpV+K{YI*NUyq^m19L(@%TY`7tSLE;|ON$$FJSwj$*2JYG7hLZ2XaU>FNZ z&)cKNS9@Me5y!IXn%6v9-^bN;6ZF<7dgH6L&TCi3akM1Lnk{AaWZj4!mZ0%gAjGb> z+l^N~?ym00h#39&TFHEaX81O^Tc)F%{27v4Ta!4R99k;zrSM-|@!hhmo8XhaL|aPa zzgYqYQ)}?zhtJ-kdlU3K9x{9&2t?2d8i1Bsq$aVBFsxa|zOVb-)u5Ih= zR6Hd&{yu0TV`gs!*D$qoqE84x*9&fFsLHFl5FXgo*VGyUsJeNU5# zmoyS)F~eVj?y_qsLfJ6e5%1|Ve$4Mj#2EKtS0ZJRBZ=!adPvMD%KH)j?NfyTVe+D8 zYpdx)^4We_#^{8n$yhb-sTQ1;V!TzOR!Kd537dTk$kDA1fxM5q7r&m++Z2^U6&Xp? zWwgiZmmVL;!%wZO;%T-O%5)cR+Ka}wgR+aALAy^hlN`I6{nlt<*jF7MNmE=RXFEpV zy}4uNql@VN7FM~~M3ub*fyx&?3pqM||Kw^ngT>l}AJJUMR*Em0(~KI?4{S|J=_-5Zcy?1ZLrkGp8u`$Dq3L`O+)cJxD_hUzQ zHurpr?Isj4Z<`-ZxRFRCr}y2Rr|k?*TKb_(Gavu%+3QxqY?h#~Vj+i(4VRy;xsF%a z%bG&%G(Ylna`f!TzKKQHSX%<)ybiLu`7QM-GSUM1$BU@hjanT&S(anI+Ho=77VI}E z3zp*P8MJX?Qi&qjl-k{)>sX;4yzk(}+FYutfad&JWxB7;U1t8X%K0_dFj4b*Nq^=N zC$2vWv(i}VeYT+>Xy>%35hwJetg+$BeTrovqx5BFZ@;BHzf5kYcEhBR*?~$9>E5U5 zjZ=4PV!LRu{9gO-4=@Y#tdYC2TQdFTqdA!QpT~)r@Q_NEkOhit4|TOvljva#c5qM2 zE%qT9I(1?20co&hq#{hyPl=r4r=ejp0paAi)KQaYc4c1D6P#;rOLiRYGrM*jJR6j| z)CN<3j#QnqCD-%p&MBsPPdp%J{i#(S@-N~Pd%slHEj2eruLKhLra!l9!pLx5b1uaOEI!0pF^YG zyb)q`>RvhQ@&1R`ylzn#f2>MDBCeryYP2cI5V{w zfOe1b`6ahiim!0a+R^I~wCNI$@m|8pXMff`&BW9ZtBjInA73lh@2qgAKV-MExot#o z%Qy>b1LcrX#GQFPt#erV^o*|d*X}8r zRg?b8`KY%7%)UQ_dh242A6Vl{T^y@Bm64%KCiaEwUVGL?m}^+k%u^m#T)HqHdGnZZ z-AYpHDZu>)nSVIPH?4JbE+sL;cIh0B?Kib98>GWS{=a zU#PKeZbwdaN~i%^TJWfUncg9%nqZ=CZocJ0l*qZy#6DL&Zwm=aO`9hKhx3Z3Pm_?dpSiID>#z)6hcCb?3kbXR1QaCY*e2W ze)WpJSa(}WOL4ykF?IufzTtd{j}OZrle|v->uis)EK8G&DH#oNgB@KX4>E!I`HB2@aF>~nda3H{`E?`A9wm2j(sQfb3RkVzj~i>r&&SAyGArn3XZ zcsVIIxAn8b{P4~Vsc78rmyomZ<*^>_^?}Q+dSk)Vj?!E!S;`DCJ8{sd+Ks&wfLqX5 zOUwM`9nJHkK_SfY=a0xiSugFuevfig`B|{c%8!MOem*`GE{*Cv-nQdukweS1)^E~` z^ft}5WxU(wnb+H#HP#|DU2sSdU0S?!I|Eo;&D*l@RLiA~M6HYSt{4hojL5>}K4q() zX?ohGD9BZWm9^`0zT)E9d2h_Pp2>WML+3ZCj+?SmEd;WjX4HhOAX(u3>Z;_&Y3b6Y zD~sd~4PGC+#p79VxB?d+9HmGZ4n@nIv-WQsCmeP2vn-*> z1;){arCTWT6X?dPAfwiDizD{?g5uI!0X<$1<5iEO#zb9$RRo!;uxvW><}%!I)K(9O ztio;tdyXB4-o;;jcIQlmtPu!K$6lEsoKH(IEy!C+ok4%A6q4ZKYMWTfN`wVRmU_bAob=^tbTk+ZX@3ofC)Txzd zwHR+3{bebGtX4-s0(Pk=Nu$4r-9r_=Ixlh4*xq}Z9a*WdU|Mc!G019-ad=&_@T ze!>MxLYinaV-npoe>^z|P2Fck^lJ&{xLH5`@`+S%>c~h^qVR;e{I|tudFhQo%zRJ# z(2dGOGJmv!EO}LNu4ds)1k?d$x@*l|&4jPa>&(>pO1{!SmiBTUuU~5lKclc{Fy`&p_1K#|oG;pY4}R>T zwd8|_iNw|sy`-t^8gGn4sq8?=iL7TV@Fc{I|1KOXwYdA6d=Z$&%ikf^HVmYVvF9p#q~tBG8XDSc1$nVHKIVl{tp77Yw84dhd!1p0F=nY z)r&C(wA_S-#wXhEOaX5NF|r-TQ)Aa?#ziL;Z+;zpS?!i@?WnyJU>M$`(2jpJon~);!~#Oqh}@c=l4V!vQ-H!A`1A2gpV`@o z6EK5;dL<172SF55=PuT(sI4K!c|DI+(#0@N?`^Xt3f2~P>NLFA0pl7RT*i-v-}6nr zk_m>S-;jz0y8zx+~3{B8q>)vrl-yEj@nEqgb&7PR}3NyUG) z;UipRY_LI$KiFU)8f4m)(9YkIROaQWQf*I!pg%9WMZZJkFL1u&mi;I6!Ej&SUJDC@ zoIe20-~HxE#iq)y&b-Z4Yn!bNU>FY;s^6^?r_#x(}$PB&u4D7)Lb;&g-(e^-qCPiL@ z+bvtP0R@wzeB`uR+P?bnCKzvMNc+B;(?nwK;aupI*>#;Tak5z6}iqoTty;@Dp1@P7ZL@B9xxGV#zjmf zGitQ)D6vJ}H=?-L@UD?I1H_vvTSo_Sv=IUrCJB{^qDrEw)hWO9>7(w%KwROfg%5hU zx<=2&oJm{w+IVnSL*jD>f&HjQ5^#7`UnabLS`}?w!-at3-s`ETWV`)=S*k3X8+YfH8&sI)VN)8c7KJGgHXE&+064+IzSx0Sl;!nSUKz zJKjm!5I*xUFFY+dxx38bV#~qDQenSfUG!KFh$=v#5?3DzX9H*w%IUO7@b8f z`U>p(90C!H@{fy+&Ckvjv^V-f#>U61+S`EyeFtehD*!QjO$985>gt zf;}|Y0d+Vy*jOih=5`#<*c#&x7{9NL6YXmK8*Twb)aYWtU1GY8~al5=HbjNc!^-dl@ zq`09#POgrM4>I&|HCaK)pu}nu0vR}s>?%b6N>>pw7f!XUHy-`8f7u4K%r^4&R&p4C z{hS9$Z=URNxm(?RCmaZbCXt?UpDqOuFBt;)G0n zRx{O`jkE#_<$|7ij8fp>=;&wyP&;i!^6E!kg!7iDzYG=`X(CwnrYUL_=e4J=CsA;4 zQyTciDY*)#VTUX%ENBKf^1 z&*_V+Iun2?`qo`A>!~Dk_iZ%q^fUVB$j90l7-lPi+jD{IhY2*?625q>>5V&YsP{jF zPX!aMc+I)w};n3OiRQbicy=p!>8=`3Ls zzVx_VusNRTiVey46KHCRoQOE4>h8EiVHri#yH-}3m|0u;f5IgHGB^~hgSTs@mp?|^-mixnSm2ug-vB;%Q4-{`HR&Zg zrsSpUT);Nnu@7e literal 0 HcmV?d00001 diff --git a/doc/doxygen/chapters/images/eclipse_hello_fxt.png b/doc/doxygen/chapters/images/eclipse_hello_fxt.png new file mode 100644 index 0000000000000000000000000000000000000000..b37dbd33977de7c2b0d75a1457198e44249075d2 GIT binary patch literal 236816 zcma%i1yq#n);5ZTlz^1fD=7_vl!Staba!`m=cp*%-6Mh^(%lR_zyPC+bhj{c4-7N^ zqwhKAd(Zc;f35#nBCJ{P#C`95?Q38AdLq?S&G;Nbj)Bme59mS4`!yl)V>HU@h*nH%VdkO`0u|6Z7` zwrinZ{cgU#Y_}9bt~>2L?!6mvd-A>UTs`Mh=ibcAmxc0pbYCVDZjHg51t3z=JnX`Q z%C?8sLE?UH`@?ghU;$4X7aPg*oa*cA{bVj!O%+!a7kNile;}1?S^1p_y{h3NZH1+x;GJbmUQX^r0fs`kj5& zUDIsLBsn%naiHH#e`NeM%Z&Pnk zko@BiJ(}LVe`Irh}Va6~SAlV9AHk#@Xi_m69kBrcM*&c`9P zoREAodh56Neo{3|gK;W;eRV9jRD-YQ`Vxl znIHqu5Z#A;3u-}>Z!6f>@Heb@i0LrVqMc7R#_ycm+eSL z-B&!k4EhlMJTf-=vo3fv$L~lj;A}6V$GkG_gxfTiw3G3l2gFn;J2v5g+%-OZmxC&f z%=+V8T*6}`PlMAf@_3YlNaZIph_yb29;?oRkDgHM)5_3&fxE{uSIB`Oj?T?f_b1kl z>WBR{p}zfz7DeQqo3U8OS})k_XMu75t((bbY0IwTNBR|S&>sm#L(lYt+Xi%O}D zHC~=@9OK{j;8Lim#=1RsfsICUh97saIl8sBuPa_0*VVH)!HQ|ZVllV~zoj3Jz$#Xx zEtRA6S>FCvM8^f;Q}R4!XRrPpdB6^>BOF-0N+A`Jh7qY-s~h-f2#(>lvBg($qHGkhbfs4r(<$89b$lv=P*uAQ9xPmtT=nN+4nlChDH zl6D`%rN$?g>%JCE)L(AWw06|gJjGlsR#HDdd*uST9ugOkgtKl0;rzH46{t#_tg+uf-~3iTFgzm$5t49 zOkx!FY5lQw`uEBThE3utrg8Ocd6ANnE85$?Gz|Y~k!3Ks4Ti&D_CGHvxby}*;q3#> zn<^q)EB5l5#gZ!O>guWX?CgU%fqS>a>2=G&r6!GNG_8>9f|8z|U-r-P&-B+<=S=ME z>}Ks278XtGY4SNB%*7*cDObkjpS&K}&o;jWem>Xe$}rowzxFwrr_UyIx>l;wr7<}v zY3x0TP@XxJ_nDgz0n@L`&CSgp5G;<0xH}i_U80rt^Myl@&uqi4k=G9vW@cuKEDC@B z=2voZ<9gwt^HMT);?)wb`P?-$Rvols;hR6I;D2&}^gFP$Q}vF*!Ivkj&z|*RB_)EX zfORS}YK}sqgTjba*`7Y_#%6jWCoCIhsYTpya-aL&>=abqfSbOZcd@-VFdxpnV#32a zB%l|cGR^BDd2ih{_67EYo&EPd0Kd%0%L|r`rbq^ZRh^u;fXfsaHdbg)WadexG<$-2 z`3T~k@9BZWkrU;%u(a}rl$2yNG_Wdj8AoxL-R7uqnQ?2}%8JFd=176=|Y}*tVFV1gunk*Iy?lfoYHx>Fxf!NF`Rjf zYYEMJ<#&;f4{wMmZfQA_vsBJ}`q894;Kq`d7#?A4@2=0dfyrsh)GpB;1Z(p7vjrc) zT}jhx=gI~j!+d^c*j7xZ&2uj%i=h;DW)>EH+fiD{izVDW2f}!$g@r?BdWiIZv`oA(OX_FLw)JYBQXnE`C0T zu(H`(MIZBuW^_8snV3kHl#~P=k8Cy>@wl{XxdQvw;=A8pq@4ZkXt`)1;Cbef`nJ=ywfQ3|yt zP``(Z1BOGl?G>_Lo`Z&V*EaZ##km-MjJREVCqSMzVgdT%SA}-TorP zd))L-aVr#yhmvXC6y)TL<~D3_-};h%l?=fd9Ar-=&8Lu;A3>DVvnwIiMxXDOcw>5q zRZD7|s_>Sos`!9~8j7KIbwBI_F^h_RfA25J>Ri&;wk(Cfx3yYhzwxBoe?);lzkR+l=qQ|~C_jRk! zj~A#~ZNylB+hXuJ$)iVMJtP~v`-5y?tILyg(bJ!rei#%bAHl^X()9@!m(@ydoYSZPEK0oyFSa^ zC_)FraqY!XlS)c3yyvTWPv$d6A;SizE&yXzMN@Eyv_SLt5B}ur3nLz3NpR!Bx{wPx z6~@z=w6^xJrsfQl+(}-^+jd}KGa3|qaZw~S)=fT4Lo0y;`xjlus79Gw(QP|tt_d=oLEh=~8_zu#J)@_gV>wOq-tgTu1G)zNz=A@;a zKFEnxQ3JTxboCAvimm#)AiLDLf)zGLbM+cs=wNBw#f60*6nWcI`l2b5(aq zshpH~-Oqs&CxyAy;n_73`fi`>FH#mG2Jv~@I%i!W%J4k<9M4gG0{Xf5DEgQ%b2lJyRt{4< zF1L!StOiLZ$_>NKMJ@oK$q?M4I65L?dEmP{BWq_T!l`S6<2J-z%;&N8MnU0-N~lcA z#h0H`H*Jy)NWsHZD3GahF2a3%eLK_1vc)QcT6-^1yN!#U`|ZRRi=fLbCtwAQoTJlt zU2CVCmcwWcP;*|$iPW4NT3}=K8(oxhtyx+6?iiVV)GpPTY$n#wOwM?IeitZcR5>ba zC{&(MJXDbQ`creowR*??qn38!=Iw8#zrN?=;sAT_;iK#wb(3B1(#qX=zg4iKb0axa zp~|_m=tZ?2z+-Q&LtgNL^2YPmZ;nF>nHl~Nu*7X~dSZIjYdkeDJRGLoKbrg8Z@)bg zz;ZgAHq?frk;kC5pSXztVqb29WGEG1?6B;)kaMMg*sOmlFfdGyWes;Ndva>JM4IZbz zY3)!ft!NHFOKgB0`!&~0M#idSwlSO;bUH5lLiXmAvYwv5>cYy(N+=xvNW;j8;Dw)` zNSOKEA+XdVGBTNv5Q;SG<TSo%4Ngu~+cjhBp@+v|$I>fvu zj4%j;+WcLw2x!t&6ckXE4$6qay1Iwp%3f0&JG&q_R&=y!P79>3w#l29<^9 zKfpJ8_y|D507CnT^MK;z5ndz|6!%luU;wfjVZD>hshC+=EvR0n;`d4h2e_}s3O^v# zz%;Amwpdc>q*`-K0Q6S+&6|6Dea%z5VIUJfzgH60I@f(3r;g2+=kG!pLNcJ`jTpMh z$&c2V&h+pG=UKOABd@)==Fw|)W4_te8X&e1m8J}!?&)2g^4hj2XHCO-j)lpIvw*W5 zeaF`FC&E(=+L|M`+<>wuVAF=*`RhkPHCL&^@Vo8Gz#D%Pvo}?692G@HOQ86_J!0_I z1weEyR@mhBC*4Yum&C(w_Cqal$N1Xy;5u2D9M5nG2jchgx6( zcj;m^-WH3I>A6TtO+8!=ujNd&vjix|hwyL#*9GysfNlGLlV3wo)#m3 zj*N|EVP}5_)ZK&A$wd#~d`pK~$_i&&i0qK-OA=BKyEl&MrG;_ahV^osoj>H_OKOKd zf9@7L*ABii10r;7ZD$jx5v3#`!(Lnp&^6N8X`$^Sv1G>J54p(>DQOv*(z4e4*}$1P z``+GHDBeqkpo0#vbE#XPSh}yfeRQQHMD zCK(Tp`{^bDqLuAu_f>4~k&rAun^@-4M3^7TrIcf1d~N!w#4I^+9>?AL+?`xKz1`|p z`^1w{O{0O~?XQe`du`aKV*&qeqTj_*CB}`|^we;@j;9Yb?*#ZMYCU}&*sMD}d;qKR z`js$b886Yz6fZt{eXe@vFQ6;HUQSv9tCXB;ETM0weZ`=mk>tPi#kTo!Tk3Z}NK4ZK zE#c7-tnv!n>;N#&wWMfjAiVAprAR#j*d8ez-N?MJYJUwEPs=aHQOPT(ni`)&%vCKw zXsC6K4%tQ}e1lT&%`~c-&(wkVy|>BQ4!WapUcG+pG3(qomM`-}RTA|iUhlP-n*f$kn6NWrf@`sF&7GT zH6xz){yi@K=>2VPMj(1YLa@cDt;)kJ39*y#|;c&Te*W{;PJ*t zN&P$p<)%=R^Ed?z;8w2y04phBk6{222``FA0_zbKb*8J&qjqgv@i}1VZ@6D94a1oI z-14gNMpAqV(kd*%EY(%OX~KRio$7MAQr!VkkwE0SX*JH?EeF6mf=7ZGzxhFJt>5v; zG95NWBAvv1CNkZbW`Wq``vb_)pkjU3Eb~!>uCac}sXcfL>kwS?$Q5_+%t^5(=G~dWaJh zeeBk%7#XM1x$Fj1W_7I;q^S@ybIO`lH z6r1PUc_qhYM&$#X;SeRBl9~y02c%?Vc*Mleb&kipyxHmnDxF(iN61Q=!IAm+c@xRr zI+(V{0d=vEf?_)}Tk(q9lzX!R(7tsF2Cr?}qAYz{l@%0?3kwU)8fORLi{aM#A3l6o z@zBTAJ! z2MFyLWIV5$KRK9Ct1=jZI% z&RN3S-G-IvRzhb(^3x|=!$3o-Rg!M4pHW0yIjO9#pO4ku>H7JvexOFzG&mV^eRZcT z?(4`q+`AXMjUzZGd-KUnO--zQ2MAUC?HtffHKF1J<^*xGNGa0*i4NRrF6ctu0@$

$3v2+^4maW%((w$)_2I6h|r_?6E2`YTBmu zw$a{q@#L)bmxix3VPze}QBhHE0u%BJgaGn=Bt;Oioe%(Cu6zcow)&k`x$m<#>N zA55eD_ai0t0LpaG(I91SZ?7j%lXSPVx!!3LY;r3XG_d^%bh&T0@hMgg{UV~HosnuG zlxRFNE;7r-r?k|(iJLoJhKflj^z6vmCi!IZ`v>8}z z{O4hRXZyx~->Sz4wf`27U#;EC|GzFEjc4}H(~u-GefVDrH>2FWD`Veze^4;Kb(zUX zzbVM_Dvk~Va*5AwwCM;%Y!wDOaNwv)oCl?zb91X1&HC>A#ln*LAiSXesMTlRW}6Hl zf+{34e6}!R*m$&x2J2;3XYK4beJ&`7jEJ!N@pM*UJvb>hITS%HZH2qNeHxH?xNy~W z)x{NZy(|2>u(;8zuAI&C)FI^BnJo9YcehO~s+iZP?Zd%Am5n2Dc`!jkO3zjXYBCvN zH(5b9C@;@lgBgM{PQOB_LU1>|4)wtow!t#JqRjnx|l25AXUMBjEV*3QE0O z1oj?R-!R5OO#^!ES%AuGVQE>aWLjs1r&63eFfd>ssXvbGKKwgVeusiw!>0foRS-(M zNaBiie>7_s+i11*bZE%PuU!)B!Kqu$%E{T<_nwfo!q;}B=LpnVR6}Kn9Hy_>H3PeZt9YgL4X&tY0YZnh37izS1gZo8uj^3vDvr5 zQQP5s^}Qgz+6i^U0p?du2F(TY8aB8%d!WWG?S@6GS;`ly-aX&XHnN_X-EV_Q?RBJk zBZV)De@D}NGDGKb6|SK1)``1M_) z2Mm3a!oNW4NE{G2Px!*N0@!j*&B$DMhTX^q>SJKk&F~^Gk zv+!M6UU!dXXk6AlAH!FPi*sn*LN+?M_HvK@dhWBsEGU?gkzq&Qwr|qnvo)7u5!w*4 z`WoXt*-jNk2z&2doNHhmvVOskeI{tw=)757oVSrlStQ`$Cw7iLW>DsUd~Ldi5R8n9 z`u0?!W$dbaw!w2h<*umPUzClr%2>>G6L$1DT1G98e>tc~C36Qt0x@5$RBAc(>A>t* z;_$R@KZ>K5h;r@74zfKWrJ9X@Gu(d|bD_!8e1Lun`>8#7ZZl#t zN__0M)m(fHt2BhX%f^P#fJZ{W2L}g_ple~V5D%!txkW9tUfSSRao$i_yyQkJHblqO zzn+eadphjBsLeJfq~mPtyX76T`7lE9CDITs^qo+8N$oGqDf!RC&4F;qeXh`Om- z;p7h&7jF;y`}-%R!%7=NFB|k_;`58Xx-v{o>^-VAMFKN|{7X^(3{RjtrtN9;+z<@B zQdE}D2hba)TT!UEsX{w2!XnI@E-$>$*~&1y@(sp|Yley<7z?N4s0_IJ#@m0{5segM z14*6VUC2QZxuQ2&aPs^%^d(U{_#KyPG62p2__Tkq)D!QI>f6j}lXyS=-E--Su5r(= zM5fHERdIrz*XHUZ$@?9pL&SgmC$?T?X@19BBIgvyz#Qe9bMj{J(F013byj_d%~4PM z_x-PuN}vsPf}W+*#)aRX9BiLZW>I*va0)#2L)8nT_Zf+~qWTwF^Hb7RgaJwT1eky8Vh$v()oG} zO(jzay5SVOCSYqYevYGrSjoFvlpyK#1k}3q8qk><(W@!|hKncfHzUr_MMH&Po*>g3gHYs9 zhCqVlyI@CyX65Xn!FEl0tL|xA_i^&{RgdNjFXXQFjH>baxQ|ku70tK(47?(A%eGc^ z>?4n19H0Vy&@F%qMzjkH;chW;Sb6;H)}j=0mngFSz<`~Lp^z5V?)K>bWg zPSQ)Pjmn;V zP-y@J$^-?`I!;vJnw{rK)K3T}m^J#ZJ3CM{*i7@_K>;;d+2e$CUd!jb2{EbG#Y;>9eqjjztP z2kM57(wsWihSAH6bT?N4WT>qJI!<7sgQqyftHd|@!Vw|WsSP;;c=#^rPYff8#Ujmv0mrr zH64vOF(0Yn{A^)Wuen6g?v?r3sq>Jf^@f79%UH`XuDap-|@ z-Ag#mLUv~4hz+0hHsqVq{$eGsEO3H=lV^Pc%RRXLskyb_j9(cX?T#8W2W@>Dq@*iL zbDpCcz*oN@wC1_|9W-W*b~>e@2kJl9OFN z^?UM!tE{o(ot5?Gt@;s8SHy-8AYffw0l3xGO!iHwva$llHtu<@JzN}mI%(3=c3X;! zoOd{=W=c%d4h+|g6P^+>%Kw{ri2kgPzdDup)_>I1F#ZDSY&<)$ z?6y>XR%Tu~X($O_2!h*{@8(dIr8$9sT9uHS+*2{%s-KwHC2E^@qqFL9sDAZR+;mvq z+mP@)?ChV7B`T7Lf5R{|W(_;^YbQ`;=#KzY?7#zapS_tr3~n=lq)3pD=e-l*zQ#rP zA;e`zluwj>5Ie)?#LUJDv=9g<_kZN{M38S2Ao3`53o7RY?EK(>?>liv;^94fNU`G4 zj25n_t*J$#V@4>v!hgNyC!pDkmCF8I_j7?MUa>E)f}$cj7srunL9qTbfZ%q%2do}| zI>nPd3yEuM%aVxUI9F9xQTSX}7So1CKN9|I=m-XWFETEQQH&#M@O-YXPuFPhq89F? zh5FqFEvVvOoE@v8duVNKtkwPa4mEXy^EV`w6_mdEX3LB_&&-7p(f59OhwHbxORZfr zsjK_e5*lN(ZK+!2)aUU#uSUhMiKm?26)4NAutGk){k&oUa_un=JQMudwmn-}ACKII zDlHboeXB(Sk1%e?b`O(b&t1aKPC%n^C1A`6rE#LEP+OD~P%eE5Rr4(E__KMhGrQ`v z?*+|G)H`cxYEHEZ*ACUSXRhzhHmW*X#X5Z3%@J`LNcc2*$j#0oni)qob9_C=FgQAz z?+dtse&cq$~Kv0zI=+J%9M0q*I< zTqBiLgp2$0a5>omxnYcfAeB^xwg2Q{gkUj6YwcpQrrezqB&?QK3A)O@uWp@1L0zgV z>?lef-`ejM3vm1jv%gZ9{~fneUtWIW6k*X^R0(7*{%#_(dohqd0lu=x=n@-!SX=i4PHH*x3KkAY7|42-VX17@pkc zKhD2OiXWq+0hzHT5N@(iB6#A(x30Yy!aX0h{Tv{FQW;{}x~4Q@bAok*lZ_*f0tlV~ z+PjEaX6$rjm&n$|>IE8BIaMDhtR`7@$?4iA3+a$cOqqIGfTJbJCfx%+X!9g)3TU&GfSI%=Glk@2FQdhUu z`JAU`%tEfy8pxL4?9Bh7nl0mS?<$5`^nD4ja%4B7ZyRyVwV`lS{3lp6<;?CvOy2_ny0)SFT<d=2t?qukeRv4aQIG}6!j1s8n2xrexH@OS(*U&{ui^Ylt()(Q45cblLtJ}!2w zwEdOPDOSbcO~6eRJ*UjxsJ_E`&?e zwSDL}ot-J~p`nqw`DB6(05mlE=1g&smOl|!-L9m)66&spCp!;Mk!5&X{n^j*(vEjO z>qlla_`IFVd>6odZQziVKHEG85C4BJAEesb!H&kkvA%v8R?(7%tZdIBi+kQMbaHv7 z?>Hfq5o~0vNJm2-pFSL%+1uc#yRKhX{-t{J0?X1BM9EOGxi)hvkf`k*qG0UeK> zp%=9mJ2*K8ikrSBe6KnXgJ?eLwdry3HTy8l&cpTT>t9k!yh6mAK3}H()n*s6(BVy< zJNuiKJiYruWrK%^b_y%i{&M|b{QFtGH&Q2!$5}5&x65E*iSN8UR zUHI@eU~&drfy-`nMw|YynkfiKiXr6Uu_ZL_i|b;CHxDETc!{rXi1+nLkupLMLW>>O}q8O@L7|>JZAX z^|OTR$B!R3(hUJGk77E{I^Z>B;u$*K=%oR#XL)^=86XFWWVh{ zbu(ukBD6UkL+ownBN#BY&s+c+w9I{v=7EtB>w$#F!!F+&izA zn1da2_6zOLDAo>!(UbbkflNZ##9({N7Y@8GVrQyeiX!5Gq^zbhta-d(HoicZ;Wxm| zwZ4Oadak;slWp70TjukW|G-G>SxD!UALkrRaB)7!Rm~bT%nFqdq{#l^!K8Of0|;vn zfh;~|3TD$I7<|K)ZghA1O`P-fj#M@1xDjV6H}M2w#>$`TfOR)BG(@j@#)vKRzU0Lo z&uxlc7Sb5JBB7+%(i-U>9`D%dP`--`v6dSf`HTyU3W$Xc{> z)uF*(hG=nse!tYWt7xj#o+w%}v-g*0(T|Qvc5VZm;QROQPgl7!NrHL57V4|05RE20 zxI9BDzQPT!<$m(S3b8Q)bgC16fCjIxmnI7&OnczD!$C_nVyMq`LR5-NFh?>J4z(%0 z@^7xkYY28;?8rUaoozIDS}U!gL7FS(ZEdB>vBfMTl=lAp`?try zXV%u%g+)aT4n|?-->WU$ufz_06P*<1M?bLdU|qo9|q~ zHak^RJPaxCt!sfr&K>L}-ruN%8pG`6=n(ztI`w8qkD?H%+=9Du9xjk4Tuy$$6p&rL z!4JBWhd)0&-%6en=Xe&etmpmUS)wz z85>sXuKlM7x|A?)3&)pSxvk905lXCkU-1pN@7SR%^K1H!7rWfT_$_`mzvNy7IB)|w zKdOBsBTak1X8G|Q&B`5_$==Sz^btBkMMnpxD`60<MuZ+U=lGDf9M^uXG1Ng#qVmH5Y#Nh|X%uS8lorcVKV@9CZ!v-`}L*HW}`Z zg(Rh?%V}vn1gzZVCtuZjp^Xdet7&cLRp*WSdgv|g7|H$H8m$bo7VAIP*8^P(>b_24 zG1G>uHy_ZG4`L+_^ir)iu`LYu5^#u;aP599JDs~^^{l9}@w=?V2T5Erzs@^-mCgDN z1Wd%j2&mr!CUd|%LF@ffy`HDlyywpAINC_)s`+l#v%+HGojb>ugfvuqW;j4x6#yuq zm+I0#?IZ+P!CopT+=pN;X=LsYJ9y=0eJd=CNQ9aiBGKB%dDf?6nI{ZB{62DTLMIuV zt~fY3naXar_D!-dGQO{=sd;+>duk2)5{8kp-W(Qsx*Mp;oFY z{E%If0X%rn(Df>-$B*xb8Nt!^+oE$t^N4XJCkh3kkomZW9ptw5m`+XNByY3FrI!T*%2(o6>wAd zyc*TJh+zPM&q&qs(8d=(Ua3IL=}jSCjB;Qs*hc=Q>(KOIcC?f=s-k{pXkM;KAg4Cj zf?B8M9T`t{SG)jbpqjE{f>nNLV=6CtgL^p}Ezc{evc01mUF?2d zc`dzogKlfOn8{Gs`gp+D^avi_U#~zqHMAJBx%nJ#OEjg*hc4nwjK9037BpK!!Q0w0 zahdn+4Q6Ji>)g#n!KtzRkQ2{evFdgrP6I(`ByYgQO5TZ1B;hB(S_%XmYk7gtNVDqh z=)Iu5l}kz|nIiIweaIK;R!1co{w5~++TEmVz(U&yg$3@nLs=+I0Ua4It+GFPVm3Ht ziX*bVR?STh^<9MDFk6dQ2FCk<9({;XS_s~k;OV%q$azde8&?$jIw3tB{!P2|p1T;e zK#Mi#-Z&`*k3q~N(t4^Y|5d~H>Hrag8c#Z6?16ax*}Xh*q>orXbG`SQKNPUs0X9(% zR7;w4*g9ATkmhQvVS2STgqtcy^kDu}0^l&M&fk z=<5zxSB>AwJL)-)eQUrRlb3<^sZC!8WL@^&)x!sf3Y&dsOO5~v;yo%Q;mj}?$iP+~ ztjpNlBd@U4jV5MupYvC3G$(Bme`%e?DhVbA!zru6tpP>?{0!vr>~4y-!$sGTpE$axlE^q~I$tJU^>)Q0|^_x+9eR(cVDs?T>*E)um-ANn=j|`yU=2UWc-fZXnn2 zT=*=qXRE3Y<=pyy%mx5@6i?48ry`Z4&@B|qD!F&Yw&A*&ue8=k(To+Vzx{odRjd$%Lxoi1;1 z)yPWOP8_`WM}2J{CW!J&S~`uK65}vN4Bv-q`Gq>`Od5{hfE-vvz{ z3tQ@^#G82VHKsKC<3hxx@NBO{gj@sS3(t%JocoKv8D9$CkyikWQ3{efO%PBRzSP=B z)IQ(mz~^yWWU^Xzx?^*PDLk1kE#Qf-Wcv~_enx^No5C!4gX+hVJ4f6l^*aP91K-?2 z>h2z?Mk_`X_ojC;EH)={PY1qH-0mc2;$4ZMXA@*!@HNGL%x7vrJS#`{pM57$&nOyH z+y|+c$8)38YGVsD0^H^Yq{g0?1SSuCt6)Sq?hCygM;Wto{OX*_=YQy$1Kp#$l7F;N z);C`I3?o#MgWmnodS;6dwso?$c99;|*b6v`3rG(>8HG+6uh@UT%qBIIm&aoWxfE>= zDCBqOJy2M@4p-sOc@T+Ai`Y%b0<~(XevPA=JFKEoY5siI?bHOqbjO#VsSP^ogSYfC znu3g6U(yKMA4@Y0TEf}t39EKE<+wvY)ww-hYREzatDzEc`wJLS0RyE6uryw_<)vRz zG{R1wfT_-EoHP0f6Zzm`mrNL7*iPEG->OYcPR?&N@F0!X^iT?XYau%|5_mbZ^4UyZE)LdI52)bWY9djBW1#qee(W=ClB@$4r=jpr^BnMugyb zu=FG;1gWl}+4_KlUd2MemI5SLJ(AQ}G->D_NRB|_XD|%9E=gp+pv4gl^i9h4efx5Y zu++{V`*h zL6U@lZ$x%`6sL9DvdI-~1%ihA2n|4%v*}=K^$id6+UajQQgRVwwr_jE)>(jWP6^gs zHCt`Ie4IJubZ1nJ?Tqy#qrT0Bf(IShW+9w5Df7~tpMQ789LmTcnOY!6Sdbfnu-@7^mtyC;N$xVHP#qv?y-UiM4vO=3MX=*74-hK;fl6zi`mOgJL4p-p;Vn;ia;L z$-~`=>ET7x7mz3}4I7c7*-V?TNCWh{5Yo8S2^uga_VVnV#%JjvI4m?-g{7MWT1;bV z*_TGk#U-@4 z?Ob3Hyad}qH(5*S($9XJa$}2B&yC~}7|cBvU#$c&v@+capSKzojn~k~>G9n+8vI~r zz!s^N&pzw9rCAUm?<6MJ2u|!m6I$@lF{%9e=*(u7flYRNR+3D90tDWh7fW@v)d+rywdcf~Y zaz60H8U(?LE%y08jC?WnJA#nRd1sQrkliH7FVZvUMl@7p1fjk!jlZ#h@Q-REqw)3< z36 PMWVN31wE0Sof4IPhq=j=fufQJLtiuWTvPp%#HGtxe4L!Z!d zlheF~-)-{v*1_f`U0)uq873p~*0X`jd}|hD)4Pt4SEAWc)r+T^s~$o&?A(}Z^>wA- zF}m03nl0oGjOS}DH&rfAM5BNb zdu1g5ig;krEfYcHOG>`X1*YF#YbV_2cZEtH!R88m35yLi*g+^Sj(ww}H-@@@oOFfS zTn24ZV08Q1N;Db)ym{HQgg(xE7ASaoaA#f53Ta%5CMPy?w{h6Visz4_0(nh6rTsa(^(h3aH<-(z3f{$ zT zvVO3PlhwV&pu@-P!ToN>^?>VDH~j$v?0vIovXFyOC~2p4pKROVSnb2`B9DD3>?kk( z#=%N9n9qM(OTj|`by3T^_5I89YW3x{4NCVh_TsU6OAhk$A&#&lpU zUHiujZRb4gcM1{pmI~ix%!L~yOrEdWi+}lIWmDFb?8As{al40rpfehu3auzU8bZQ^ zj|_E)DnSSL^M9z%`JRpxp4;W}#?7DuySQ4IKay=%CF}kw8rZtM^mA{MZ2iqr`?#x_ zK{!3dVyUJ`{HT8_C2w1+@``t$#M)Xalxl1=@JU6}a?a%ByV+wdPhH?$m}ky@{Tpju zA_Jq`w?7#VE11cD3+f4`Y~pY49$?bfl< z6MBaycsIsiHpt=qc$UZSa0XeKBj8u3_pNMgZS!(-hj)U&D9J_QJ#@msujE)HdoGzD z=jQF#jmX}jp3F~FW=~>VCL@K@I&_h@4$_E=eq6;-R!=ec1@;v`+(TQCDpNyC(*a#v!9PMg@UMb7fHIr!{&zk`cSfGHwJL>WvDH zt2t}O`yRWBu*y$rll!c;$1`-y1&q!a0MySZA=z+(NZxH z_8DtwtE>6;jVkOV@SaFy?5Qpj9|7bd7B6q~}uSOJGJ43g3Lb zDjpdbnW4lgPOsW`4sA&Z8HMwj0e^h*Mw#X57)ZFS&!Xf^;$v7CAhjC(IBw#%vSnZi zo~&>Phg2I}TfS#wzHrskZ}p+Gw=W@=XadPg&wBkeWy99{*`oIA2R*?Z3c^HXGSy7YInjYG$``3pD()S+s$}&m>Q{n)9_qQ`tzD#(H zB~*oo)-HqQ0=K<`m)j#D)BWVt7Y1_W8?xH!see^CNW{5fBr*$ya<`^zNv|4)Ef@C) z1?_#=m-z=bmyNadXRhd1QWxi8DOZ&@mU7dvn^fgH(hBKC%616r~_F4Tpp*!}7O~Znx zJ^ijn8sTE_ltHr0r}h@HmJ{CBb@ZKpF`#%v*Hq#y3=|~EAQw;n1<)3(Y;4AWK9NJ- zT3h@7(Ds&LQMT>7H;597poBD7grqc*5=w*8-67o!Jt`$gs7N;mNRCK1NQuON#L(RX z4Ba!cPVRd>&;Q-tZGBlE)(7UpFbs2Db6w|o#D4tt2{5CtsHk{*mVCU}VI98@?aj{^ zOIp+YacaABX;0_UnU_P{ZBE06$<~9VSNB+oqswOM$(>>PaAVa_9x1lK!D++a2TS`6936 z+noYk=2Bf36&CW8A`OkN@S+vvp?>f4W3`p7C(fbY*QwK(eV6NgeTaZDsVbY(Ej|STfase=j{~I@{j|yyRU1yz$W?2 zqelIOb`z)4Tswgj%5pwW_-VZ+A2j)bBw_HC4R*DVmJ1s|k7}N+3;C`PdT9YAx>%f# zyNyPtkTUm?^(!l5-wh*Oa4HS86Sk*cEv+CReJS?8R*tf8nw7$*qZ)(zjP@A~Nfk>#fVjG2OoFzb}I=?E(!a}kF`oHC@hCPgSF@WS9L=@KS`Azu&q<_zkjNLr3#w?e=c~H6@)h2F#*ig|y3;#b z8)Nc_JI~ks=IaO|PX8s*uuE^;9OsE1{ zf;J`G)ORI*w0U6LyW-7Ijsf#q2x_8zFk6-*Y~fiP5V1F(Z*ojlJM{wGh~kIXlWwn- z39PePe5PqD$eD{DLvh1y7YC^A6%b0AUE1YRSh^&HQ zd0a}u%aLba20=-MI;;&Y1xbu3M|*?oDZXa%Yu{-#?aH{ey2kxLTI?lr?9i_f-&tcNZ`Vua%TG zlMD;9pTkhyAwkc(_dGZlU7)C!qG~D1A==Cxmz*0?s@F@0etodW zsk>s9Igo`r^JPS@e9fwDGj&w+qPaK0#i_+HQ^cFuTed;Y(ZcH?drLS2<2ZK}EEQK6 zThAjj+H6x``;A^ta0Bht=lAUx;XSmoE5p60mo5q>nPKYT4&jg+OCA1e9sWYCt;nbtSnbRdhLa_`hhHS!_l<)Ef!swpbYfrkCyIyUTEv{Tk`sqT4^zN z%w;4P#BRyGZ5aH9B7l*#H|X?p)^E9bh!XB)rlDt?4zbJMo4R(mSYI@XQ!lP<_Z`p? zErFhYrp5NTYDc&HUZOmc#r)9;AG&5K`PmZh7~}r#dN5=cri)ci%YqEm09VfOp3e2Vq{X{Z%bjvNWbL1(; zEgP~M=45l#De2ehs7u7&tQBT ziPIVCj2H5y3I38X5Q2jkpbPZl!==|a8vpyG`Ifq|P;f!BZ&+?;H_lTBmKchaPt^B@ z=Xnc+kgcn^4&?;Nuhi2H6SnlbTA&&2@21Fi9&8E6oCdt8fH;2ONm1mh0K;H$yY9#V)w zJ@pvxoPw)SpTW9!+gO{TBjwIl6;eqGOeMy$c%WJ_b ziY%?qrJuPm>im{^)qr}~fIXq!eyR3=O@LloyY7%3Z)R)0*X5?0=XJJkf@Br6_V2r zvEjMsS)d&`pqXKDk`hWkS%pW~NU(b*($WwaTfUi2 zw?0_R(I1DN!=|tnywcJ5RRnQ1o0rJU)Ty5=MqG-y26l^YeNhG?0LO{?cmPuQ{ADrr z%qTN4->#~e8&tC2r0C=ZF8o#I$*ZjVdW1v_S34Pknv+qn7bq&0g9cIk$-1RW~CTBh1ZB(*9JhV%7Hi1gZRLu+Bw8f{B-4F|3+S zkyavavApQ*#@Ebp0-mLQL7EQzU*j1C`xiEDQT9`DW;9q`!m9=KG7^XlfbiuxkWxiN z6p&Wwy;_GkQj?MWy}x(eFA#OiuE!n?*T;-6@VfdCKfI8Pc%gk7aLhy$sgjShhB^-} zqZaW}eSFd4W%PezV+MKfnPN!vs!(T9X;Ne#q-J8DG#Le3}f z=*Me{PzE}kz5Tw#@%oFWaLE%+)wnNqnH0VJu%Hh_q=5t^xenb{eJPO$eT6fHNg!CU zx|QwxXz_hc%7>V;RW<4txxNgm*Q=seIRW^F{9em)-Ix~JYLSvP{9?o`{}-lv~(1W zKRLk|`gg-zK<75%Htr2kPh@*l;paxN3n;y$dz79>KkZ%Z7oApo+FudFxA9=z!8UyF zu7&d*?@lDneVW9QoqIW}jb#UWGCfVpm=UXh!nuFOGCV@=N*^Yq(f^b9csH`f~jB%!kWyk1}!RC0?Vm@;CWc<-_ON zs!)y(NAk1SZo;_yKfY*4)YxKgv(n(jfkY&4=+5XCuVlcJY@-6+rp~6mbgaZuAHhaZ z2S+ieC+_zw`Rvx~+ZwSpcc{*vZE~36n00h706^Mn3ldK(ba4#H)(g+xz&sLA{g9LO z{p))f0!HJd)U#gP^-l>eFEWHbl?uszq`u?2PnDHbLJ{?<&z>4*B6Pxv>TBZA@ejW_ z{QDnRr1$6r9JhLVi$?e9y!`8NZI64pQ=;EB1yhfl$dQduMA|N?_8ABzj_tAT>}r~s zdgGGD78&b9V=JOQbQCFO6~n4Lj;hLi#PMw^esznT?{?K&pQ_+Xr@Tx?gu@HmP;U+G zv9oVWB%~@fx=(%xGQDSh{O*1+WpQWZ-3bzp*ECW=$G6)r(B`=-B;YrkZFi7$yNKT0{} z<(RFfja$ugik+n#=C5|Lz>f7FHXL|h1H9M@jONx(v{AFqi({M=H}tC#EtKFv)b-f2 z2c(>3Un!3w4U@@* zMDVStW0yB{v42d~?wTml^)#b0ios~uKdagY)8?;Vt#n`}?TgIH#ZkCM&O%OJK_c2b{c0#0c>*ZFo z_}*6U^2({$o7-v;=B|J0#B4`Hh+3E2vM&w|T=aBr`vq@?(ardN%I_z4^=An~Ju zdi!0O@7w+j7znzaooy+m)F0xMr45#*%|}N>EVRDE)2{almU>W%^m~ph`nXRv2o3ZV zx|Qf+_8bNj@Un%B3H^#YkrtWd1)!ax{+6^%Ei7Ofy}O~{eY$+u#+^yQB4)fQ+-0EZErFMap#hE zHbg9fT=6br&>oDeer1&M128R2Nje?O!5!bt7%PkLdcS23m@Gq_>><7tm^Mpj&_~E@ z;Kz`p-AFeGhk+Qy%AwmJ#h@0Y_Ul*nwfeW)@3V7%mJGbm4Dews$}y|LAFkIX3n$$w z=-X%;47@yShdi!E#uQa`Sd@SH+x%!ow0Z@?BM~h8^LDm3U07MJ9eYtvNTxbRQ{4>0aFb_ z6(`EXlvns;l)&*D~6` zm~1BwwtSL84t45XWkB_|TmRQir%SG#&2;xz&SLvyC5Ku8xo+|;O$BB56WILvtoS0n zgaLBdrLODJP$Vwtr<+{YY#1#f>Pb!b-njz@;m4SE3R>ya9o6gMHTFs4#0(!Npd_)x zQWr=r8*=KZhK(OPQ~jpa`MxS_hNM=*%!PQ7*{kcH=#6=7XN z(9N~W!^KxBHvC*--iy0Cp4FU&ir5CJPFh;s0{1^Fn+KO=w9bEZ(a*H9ZFCsUj z(8yRKKZPk7zustgbVNR@%E47rmtbhJGj9!)1)4(1Or7ehv2#EfzPLwO@Kl+&Q{1A~~i1z$3NFUbdY zkgp##P4zyJcxygXGN+R)zgNFLm3Jd`IPvb?(f5P9<9-o!EbjE7R+Lr;)(bu6h%{%` zh8U){?TWpV7POqB3ZZV)j!df15qTof{k7i@A|r1qZlviHQ#CZMDXng1CBBD8vwjy0 zd(wyxE1U)Cu4}_(6U8+rpSQy(A8u}L8z5lR{!*5b|2Zv9#dWubfpgHL4zS$*NP(ss z2!qpw@PFjnffE^q!rdAk^h#BX&8*lT^=(vTQrk`)l_x7AxNy6agpEibfI=-Fz zC#~1Vn0#y)b@;Q*nVa?FIt3diTkH0bhK^1c$*DNwD?x#o_LR}$b%BSWwKhZF!(z&k z)(0g+(fTxwlRu+>@4DtJru^l6fImth2bV-gL`4+4(c|9~R;>?@o;v0`2wi&}#2;_g zbiF{Hl|>Xk&i<9gW-HQLLr+Vl;`=A^sSbJRAW5|vf~pdVn(p8nHj&5!{RVp)jxi_8 z^*r4>j3o=lK2JD2UTqR~ArCYRHt&mWIdVP1Kl81t4Y(Nz?+4(mCv5C29nQ?G!kNAD zCjDR#vXko8J+n?#6&dmrVbyPJQcR;(T?=fT&mV}0BpD^%s_2aD-u_k4US85(GW8>L z7L9+`oIg9cUxc2dM7pik*jmt@yQ~*;3X2$K4`ND2Eu6iOio26N{P>s5$B^syoDF2A zR!d*U+XS5a33r@M9N?H6upFC-y;|4fY~g{4`CR#pT3LFEsL4>24QsL5#(mQY z%>EkBDze`ACF-8Bk9%9MqeLxL>S*Msm3hxv_-hI$PyNo_excCD_Ma}z z&VyHrVlFuuufxr9)ur;#@I_88+1JDr_sf!@DWLoat02A+{Chlwsk>;nN&lvlZAlw) z3XVy`tBkbki9*`!ZdWtWj92p%ay69Oqu`w5ji079gqsYmAK$3ttu83;FlNTR>~w(f z`b}0=H2#vh*ZbGtn3iDI&ubSDKw^Fccypo?yzyQRJ@jJ3q<*8ggdBavlEhrnj|HBS z)SFyrh@F?mdpemY{QA)*l19X7_#<65m5)8mwudok*`)mPt@Uv!ftu4@|W9~(RHx$`N-jp97-xn87o z0}Z}<)OL3;)`UZrc&?ylRGWji1*Jcc)TL|X=%G#{UvQT=nxudrmeHlQ|rQB zB-!N{_Ytor1xEUT=@5L0TC%sO3^lp(m%1tt3LCc-Q#BoG!b(Rk>my43=PI58g%b5w zGqYE@=;!Mg%mu1Aea?#SvwKXcmwNnk8N){^o-Y3|lauyvQUV}bDK)J5k}w&Zw7zb? zRtoS>*Kq*JP&t;dkWz7!l0)Z}fq~!TrOwB^vAq0VFXS{$CIafHwmr+$`T;%l=4f+t zGyzdPwS9;}F=f_fWwmDBS65|bXWy^%rQ=(DUfYCe8UE8_dejk%A60uUdd$?D?W_2N z$oyCdW;dStA@r(I6Y|CpD8Y=NXg$d@E5-&0%I0KeMms||Sp~?en=jpQYoeH#c8{c0 zZ-9LEu&57x(N7sm`C=i`Oue8LzCW5eD(S?c;BP5GPl#Z zGTKR9;SHhiw0LBoKk0s(e{rps#+;wxwNxB1=>Uer3y(g+Q~CrwwVJN?Cr1=fEoe`f zv8}!(Z=G*JHkPTW;-JRMgQbJv z_ciCsPXjKgL3DB2jz`5+#n-Shd-)Xi9?Sqf-(1Mny3;Ske^SAe?2Zbg&U?m*Ifs@| z3w@HFnJV|(lZl!q%;BQFMa5-3vim@<|44JVsj?PF_#QsVR&4icIlJUriHI!S%;pe1 zZ6gyV=?8a>+*Y(HQYiycqA_Sc%iBNQ4xHl7);}`oSDf1#)Y!?M#26<(-NAQ{vHD@B ze0@eEI3l`eqE@rMj2zL(fIXQF=QOR3Y7(olsMhmYPNv$Jt|E}MB}(@rU3xdx^ke`Y z`t5r6;NE|=0NhMspP@-Pt!tM1fx~RaNo*V*Iczr>b!i&Bw^z=mn3loda@O*?WDx&0 z>0@SLSt61}(pU+#-dfjb<~rMG<AUA^S?8sKcqMaDARwe`{)FfGsI1(6znwB@)Zga z-Ibo^wPr7!tXwU-V9Zi1PGROo)=MS$9|S{PM}H9YT}q~AZySGm?5F5F6b2VwKXh6_ zNl!I--Q;JLq8QvqF8K)O&3OehpRwk&QwV4mD&W{IGc2o{qC-Ksa8T_q3E>r-X2Anx zqCE9OKC;<8xgNB(T7dgwLe>nle5wsVr`nO4i0 zA}%`eFI2Ncb7|=q?^D5r22b33R|rmb4Qn^Mqy+RFq0LP^@O})9_A~w2-2ED*yuU43 zyMNHKw46d&IggJ%ICJ^hEizwP&nnGg+;6Mq$ogcdYbwPsS{=cW>&XTeChxtXxdh6% z@KEPT%rIm5OvQ>1jXb3bnMswvyd-uT%$_wP!=t(X1@*@u~aq#rCQMKcQ4B)NIR*yd>a+dxjr#1tn0m=wlK!f-b&#^IDtQ+r3tK>^M zzf>3M6DX;~!2Q1D-k92rEIA|NA4`5406;6B+{&`|L<+vG!4A*ODXR)v3XRS+7bL43G76;apt;)| ziDYgprc|%QLj7oqxhRP792Bmfh@TQBPt=8kO>WFO;P4U=2}B?jdY1>5mnq3;$knwR zuR)}C#rXJ0a*|3OcZ`({G2G|mOW=?cTET2s7?<0Vi zej0P?ip3S@E)r`&y8gT?R2ADYgh9gKvkttR4^tL&qpn{<2s`EE|JE7O78RM7CvE&~ zB5jie1!F36tr!z9l)a*E&NuKi$)fMuYC4ztjsEG*YGV z#fC#nGhfu)5koZx{XDTb8N4$K#XhPRl(}VTd<=yl}4V^XNZF0X>RR+4f1pQiqxFzY|{fz3Y1V>6i`{529xvYUb=~$Ve zI-<;&Zfg~>zl;K0*p1`_1D>8a_jvzn1$jq~yL5g?W#4^7OY~;OrIU&?Ov8G5PBX6U zfzGXMa!Vq()IFo3 zpzzx_;?W?|ADh(j$1aw#J-lK5PWH=tWvjY5_AHH(Pou(AaZC|GcOvuoY^O-S*)p;+ zXH%l4-Fc1|*;e~ZqsuN}FB?tBbC9b}eYy#R(_CJ69z7_~#_TbJi&(ptqD~ zmZ{pabOL?6`+}qCw`oQ+d)C5iJQ`?bYFM8YkGc21g8u4U^ZZg_@6>r1rUSZ%YkNQT zro6QC86b2b>d7{LH`iG5yTj(BZQ6JfP;<`%l0rvmkUvsX*XyN&3O}Ziky-^&ql!f}x7BW7n`K3kc(&UrjZ@5%x z_QiP1ol(9Y83w#)zZ^XS?Z?pGiEZDn@d;T}Tvn&0v7bI^9qy7Rg8$%LLwf=7|}Q+A=I`%1hyASy$xuVU~UvXv-JEJiccm z*W*vx$)6+!1^VY&{&*yj9>btv?&Xd!BIDP}`?>+U>WY_qh(aXR*4E$Few7YV8}Qu3PisBS z1V7Ut*&lkGC5AOIL4g-!48|9!8a$6pV17Th_zO;Ogr3~-{}bKM2xFIc`)`W!{Ux}; z;#gHJj?=L2%fIhpj%HinhU)V`xzJQkL@rvi*&-4XLEOIVWxib6M)sE`7OwK?tfN2E z8SjPDsl$cC)b08DW$BQRFbh6IAM^@HDtu{JwwJ3S5V|TpT4+ype%z2y0&lmqy+TLq zG4rv8Ou;gnMb1{OkqO0%2Q9xl7q%YWylE^l(;RuLu&dd!xgKzq%Y#w{9d!0;10;wcVo+ac)iJ5jU`IR-a98 z+rkDJ6z@T^y)SevXSiIbyYw3_GbcS7v$bYT)$)qi!_3p%qADf{BchgO3787^E&*KN z_wO*|-7I^Szvm>a&)X~{A;FUqlD*GkAqATWBP0%Q-gH_z=&?CKobqhl&6euZ)k#g< zIUb1~F2UxS+9PLDiHJyk*;H#Rb3S6`j$nl)|85tsz>3cG&*siHQf@O z5Ha2TuD^AT;$LHO*yO@PhIt<(uf2@T)$LU~`dRk;#!m@UGC1lv&&A;6H8qkLsF&e= zb8Ur9|3_j}n2(vO(Ac7sOVhXN0YpM-m*n*K6s;--i0d!r^tOPEq_}<=wlFKLcgr`Sq8m4QnzF(%pHX$vzffs|N9t8%ba%0RwupMjz^>F!SbigRHMz-_HS7fHt zz09e1Hic(;!&L79JiCm>!z;KH_F!!Eo8m*u0oU$J9)Qs{njL>ILYEn&1T$4T#^=Ts z?g;+7DSX%j6Np{04ardKh1Bx$>i!;U=2N#{A%3RnS%?$in3hB;OaS!7s4}E5I1z&( zyory`q}#VP>=kC>&b)9m-u0R;EL+n^i|~1GLiW~VGO&Sp+Sc|!MCs!b_9rVvLrt63 z#`)w99rIg?ahod5{-QWVi!b;irFjXZJHwM*`zqJpgwax3JBfhurd4}bv^E(ifI64( z!9PEeFH%7WM4L$XkJ&YWEiZ8j812N10D%2XHUJj+ar>Kz@k%c&sA#CM=w9p0mf&;? z;DwIYdDw0znvw#nv>ga9tKi8hECQ;zwYAg@O$)SRjV4#%C;=yfz@^+#`%dSY3J{?F z{cA6L-_IBn(T=w!Wa0K07A@@;2(M}96HsAAzI;70>_@EIy|g!fM~TU+Z*d5$t@Dy!X|toPr>Q#Kj{>%Z$J76F8Tr%&w^DG%(?> zr>*_)A$L@yJer{T2651-GeLJu9bs&x8#=nsb3I9qr&kx;vR!+R2!NGTzIH>`w&J5} zkjG-F9U&p$G#Cg@w7hrn1_l(U#sd;y4F%A9VGf)oRrZt|x@F55{yPuY*go*d+r`)7 zNM3C51N=UC5oj2torI~wK$!SQ{`7EIz3ynt5Y$`>h00{ZN%n^1nRqQa2mw6+IiiU+ z`qO>=0=x#;iLFgKpe3@|yYrI!uF5l~r#>1Q8nl&rlTV4kM2Gy#m-t|YqjfNFbbz&6 zY|nw^0*d~!eb(z$2Ime$zAPEbVqb|v#+%Q(*C}iSoU1~q_Jf$qi5}3lhX#~#{39n0 z`EJ@0g1qoB&K0m}(6DBCd)a8Yys~04NT+q_KI(}DsoUbD18gNuUD6y}oxoQt9dSLU z3<=b_U{JZc)@^BE($Wj~CiSI@QN{Im&XSkn)Ya8p!4nuI{O*u@td>Wm3cCy(Axz;X zbI_|F=&bS^5PyJb7!9A5LKQr<-v)5~+ky6%jhowfkZ&k7G;|AKD>5YfEvgLM5}p;M ztH(eV5h};nbLOdoM5Ixs@u&V6#L~-Ql{IU$jKPouP%*?2nVdhFw1%0RRZOfSU`#}7>kRk*|6h_QOH4b#aW1~yQ4k|P zF<$JHY=A|@TMGvjvebVv>8pov62+0RxgqSkqT0nGB5ioGbtVyz$eN6!sNVrn1+E-$b^NoW|e5-HW=$LK3Q<{{}CQW zMy7-Iy^xuOJr?>$_IUM>rcTv<6lht^p`9YiKDr5Addu z5S-I=CBU9&j{@o7hsfwhj}L#<+9?x02qIcG_*e2n==Pv+Lr}@GhC}h1-aohU|M!8L z%-)jVUj_VszT>N3=L9MIA7A_h=ia}&#s9p=hmRqT|99B)-*X#sXYt_!BD4P<%YR;7 ztB`bznw=Quy?UYrRMx6s`#vD%i>8S9@88O8d#$G!tduX)fOq+2@qU;tC-jZN1qAh2 zC|l7o9Q@(!MIUdSs$tCw68m|DpqVk4fs#sq#XrCL_rqN-F4IeBY}s6`2SEBw!QvUqLH)xS8M(NQRy?KXYXmEvoJ4fk^VF!6LslQaXR z2afu^B{e=%K=sXboPpiE%Kc@-vYn4b_?T@$TcqPn??4$ik;la4MySVh;L0fCY&bLH zAvbqS3+4#6HJz+>ar`8jVGP#wJ~wxS@og0;H`AYf@CEQRp^=;9Rdq=NcyW!}Xp*X9yHh`GnnE~sgt?6bEr?;xB zVm+c)Q!yiPE#JLH$9Daxbi@Krgj=lJ`q~7QuFZMCB~;QxJ~szm;8V2Al1g+W@mf*? z)Nxhpf3H2adQhh;e|dmj_&kTs8Pg_jyBCeeW2|XyFd63s^QrOPHUL7F;4&mYitw2| z$BObCj2uJkVyPApOBRm7By6}_e@`PPY&;; z!ITiOnQ73dT{C&&R;qF_3U_*RM`URqL>h4{a|4l-9Mk~Um6E>)Pc+H#C{PO!xsa8S zm9eKl%s-Iz-F-(;yHoGLk)J+^*>U~4u;*i6*;xO{+xr1GG1Zh?eb+>AcYcf3p8v=y zrv>CPQ6u&j1cjw)ptS#c>A!Sacy~k-dd=)O?U_otA63naAIIL_9vFHC{81$%BYJ6R z>Cxud1=mlhsmc}>4_P$(Wnew-{QUe8F)@-$2i>tu1owr6j?<8IQqD%5{={FqmmUBQ zFkrgEd9U}`8oa;1)1oS4Z+Z2|@7cf2CcRRf`Ds*PJwmJdDKX$Zb=(%7ZY5x9ByBCN znbjF$gZ+L^viQ(kL+};F7ksadqGUq8Gk=VM`Xv)^Bh=SU=3l!o;AyE`7vXLXi^Q9W z_w73KbfL*3ff&y!JDQT9_DmRM542{v$z?XyEbKEk1FYIfjYWhE#cReJO0P4p^6gnoH> zIgK|%Q(N-n$^j2&+O_?*`_A3}tSYrXCWVOc{vH&m9c*B*Oz5>Vd((p>a?63*Mpo=0 z>(#}6?&ScYKHPaYT*Y>MP}#?4vK@ezu%iG5+8YYy%#h-+=6(07;U*Iv`55|VmKixh zXCdx?2e><6%ChtY4rw@L;`qC)Cws>>_oj4YC8${X6 zbpir%8k(L96gGU>d)poz1g?t>DYN_FNcaGl3x7V0&gCJez*qf>S={@jwmdJ1nFg?u z7sT4yfFv?+iW;y0oU2i8kM4nVl@x(wKmuFv6y2Bs{Loq7hQ*G=u?i|(qo8LVQW$Db zcW;^V2Q@5A6TY|-`7=)HeN~B_8;D+s6n;Dm`ZOzlo8P?jxv5Q^b#y)>^;qdCP z%02wqzdxFwxZn=xs+kLn-5CZT{U|1gM{rro10EypZw*$xzMk=$Wr`FxavCuCi+c%& z9w$4-08VmcJ_Y37t|CcNP{~9icOo1DQbw!G)4gv61?2SfDgf@o6nJ>|$rC}2lO|wn z(-q4E2O1eLM|`+Fm59Auld^4`e+EvlX1Dw~0=i9_TX6TSm*f{SYQBMv8JSZ)hvv;eWN`01dm`0?oyalLn}K&0#$a#Uk@w(2o-x~Q zIKL5SzEtWw*o_;!SD=}JFa6Hd06D{i7Y0XKx9Q%}tG5F1Io>N~TOnko|Jcb;U91&Z zKhO~o2mFDa!lh2=LHVfM61MhPJ_ruN59iMPX3RNIW*c6uo%LlNE84p0M-tj6Dg4P& zP?Hy^VlcL;VPIgeXLL5Bbg%{&TSA7OGV^A#p&d0w$sUArOY&;}Ekx!Qd{v0cr?7z_f9^(fXD#{L8mu^=RJNozSja z4lPMqrkDNzVaQ zN3YR`Hzgkc0f@m9R|FDxosltuM%bDD>S6{qv1H(Qfv4Brd@$+$LO4@~P>VTr6q8g2 zNSEBV#(6pK-2yv;)b?f2m9bCS!E}XL+k20$|DA8%krDVBM(c7SW2Hf9f<6V?J4L5d zhh|064+Aa-kYS3XtDT=n1vbtB4_;76AFs6|Pc-&>VgjrAv->K{eE#U1e252qkO}hP zNDwzHh0&EsEuA--xWpg3Wg>co=Yo2_bbkC06JygA)v_NhCEoRi5+j6pzE5<2drQWM zkh@E0{ViFmE(T49b2G-8?5^&?GU|dUpjG`Am4=qy@9g&Qv+e^pIq*^hpdTax#u|v= zR#H|1?nIXTDVg9Jl^MwN{tp+x33z^G0zC;ItEKmgXUl&eCpeC?KSS+ErG7Cnq9tnP zg8VxVfsJIQTb&qW#|n!fW(=6e#Q_b*x|0n5WIu#4&Q+?r(yRN_V^sw#`GEasFZNz#$p2e3TO9 zvcKE|3=ZI&hSgh@Cau7A#vE`215Qe@6+aA0#n0DsjU;S{D?FPl(*71|7U+E46?dI~ z9zCnMf=CmcwX+T;xV;!mI%~W9`?I#163!#}m=ZpdcXwJmZB$E7Tl!WycWKvI7>tMa zl05@hZo~4)sSIiw_B8zF-gXx}U2D?U+g>P;$ixh3vlT_;2j^R=zp3SI>u2FNH^=a- zH2ED_K+Xs7m2qzEo@xa&$5C;u^voq|US2$(yO^^%Q`X!)9hkvGgV?H61Rlj8me7LU zb^+@Fu<+6c(UeQ$9=^W5K6nD`&c48g8Y~UZwx|7zz9e^31 zp<1vQyR|qFk^gj@-~+uVUAG{ZqTRPYkais~_aM&wZ?F0)%c52P@xpQ~YQZ)@@~j-S zoAI5XX)n2#@Rq25ea^C+*mF;<5j*&c^dubv4Zlf`qm8mch)!j$sYgF`2DV!Y%#KGz zMZNv^`ucG2DC_#}m_B5QtADMRsfKU!?|@!z%Fx0X)xcT(gJ1%Fx4&|~WMu7T@^71G zXM=+&t(gHnqZ`CbK{NpQ?=$VZe}KlrqZbVzY?uied$~|E>z4}r`kaAvH8jJT=5Wk)$x3|>$bq#kej zC}fwvcrpU3{Zh@JDi(G=ySD?&C_JbaJ!!}oKo5AHWUGa9WLQik#tvS7S)=15O8YwU zCDC@^o4h^y)i(ys2=eo_cf-^MD!uUi_Q%gt<%U_$$HG%jJjiW799_vwd;)@{D{feT zm6cUi5qlh~dKn)yQ!=wpTFvvO1%QA92`|6j=YKPJ$aSxTqn~fMy)sQ;!apInN`m* z{r*kZN8E>#fU=GCee8jfSPR^dI5DYR%kcPMYH~hUfPrK@0DE$8vGq-DA@z=Q$`MQGmn4CTENuL5KzHm(#l${rjxaK{0Wu z4WvmZA09dM1wiWe>Yo(7e5V#s{S`t|>Gx{7!sNw`8#l7S z5i|!j^z^giS3yE)?8In((rX1FFh%T-}L_rT<7s!4@SKOrHZ*;1D($U(cTCIH`X_xL#9 zxS{&_f--8evB4J@FDI3_I4}1`{>YP%&ZxNcfIWCrw@%o$PdKQWA<`O~GP+E`HePKZ zXv@v4BaV|tmaZnRq{N4l`0(L)t$DDUZyZbX6#-VWKutDx29(gSDJjX_tJDoC&9 z9)uKJ!!~!IZdtIr(wZRpA?ge;KeUInBj&ZP>>hk*4D;$ae7oYV( z^NN)oI9#|(_wY%IA&BiJzK{BH8lT^|N}0E6sZ?nLaKY}|wLKLYJpw$yWMGKlri*sw zeLGq*8KCb@7CXVi0j1p2H^9L)=)926#e|YeQy%~z?E`){Lr^BuGc#@wh;Z_cpx>Xe zDU}RATtvso;Ig@DvAK@ScBOL$Ft`)B4isdk#=PQrvxKxtY$UF~=DY>X>}p~eUcl;5 zOQmDWGj^6B@1=twGh+fwBH_giCagRXliX)1MMb#--dQiNTc9bah|j{*%Px%;Y1z%wi@3Bezp6`HQW_P(bbDV>JO_aA(yL3L1yZ5sLyq-0 zbEO77Aw=YMBei$iq}!tC1;2oxi}t{{{L&DFZwhGq+eE~~%Rv00TF)Ryq#7&okwG!e1SkP{;{3k*-vYy@>UX zC(VEQ`?(|aHmP>4K<50&DGuN9*d!X(gttGN{L)wi-*TijfT zDR$VS@OZjBmrl?zp{?4B_^BiqIRB2o_=_+fWQ<6kEx+zW#Rd!fwq`ytub}jM*i!bopf(0Z{BbiD*H@%0?S`M5N|Y0F#ZO$p-4bI}ggZ!!&HkVtIC-Qmh zAwK>NG!BMB`=L~W$Rm}cQ?gg5;txTsbYr-H-~5-)k{_y3A{wy#fG4fcp`S$V%b4Zy zy``>|I8*=V2p&}{%=;?`_sl8>_Sr7nY6<_uwJjNOpQ1(4(puNmkZf}W{tAw!XO_*1 zc)QhDi7*z{;AKa_DhaLHQQ~iNQPDy-03Te=#^zo$tsrLwfrEhoZDFC^NHLWo9|Tsq zaqn!a0)BRum^tAK58ml3;RLthcg+G4lZ=cp%J0_zf}z_V)&C8!oTooTJP=_cgq(bd zZaImJj((B%zi4~Quqf9yY?KgC5KuunR3t=7y1@czB&9_<2N>xX6b0#}2uLFch;%nY zBi)R22?#?CIRnhhzFF^D@B4lG&;GUlpa&kvGtYh9SDxp2>BKv+5&C}yn`E>1@UE_0 zG$SBt1mAuPvVGM$sVUpBx**QC)1KDzmhP}IH`;R6uTu24*f|wtb4V!rdUroS2qW?lJe^T@JC2X$1-#g+tav$;V~)1VI#m zh-U=iqzK5USQe)z?#n*dL#Ym(?D9z6PWRt!088t#0ukk5sHCH=vBtgW(@}b+tKR0a z+I!f%p$9HIbSYN-{r!BYZ*SeoeAFJe(CAAOuWEBNJeZ%K4_cc#@SwoR6YtJ6%gMRf zSv0Wz=*M!qa}9~y={{{fyAK%p;W6%;KM5T%WG`-J{CnoNcT`mxpB_wZYcw|mab|TK zPxrr_hkeodjl?BR@GriL#XtgZ*%(d`t}IS^Z5~ zTDKLYtDZ>G<+U~22O9qOb}PJn2L;}h*8F~lju;?3N0ZHLvr+)1bvkbQlcS?f1Onl@ z;W~ZDfygcy`)r8(;_r4q$F!KQ~5-`SXiGAE|&s2l~s zy5TinDk>@g#3@)kUU|xyTh8R=f3yiwI?epP{|5A?z25t~Qzi0}UEA#f{|2F>*y)X5N-B9yz7`G*Vo22jl-9!QNwuOZZ z{uEdibbWAwZpbQDK?z*v0)E)-CsUtQIR&ql7a5kwS+C2KFZZ$kd4TqOn`Q~MR+85-~H)w1@-Mh$uf4Do?*A%)ec1mLcFV1Sx3KJ zxvlX7SENE%v4(vycl*B~>e1)OM}+dOgM|xq-M{^nZI&+k5jNjs4+P_hcfQoGiT_u; z{QVSLLlZr3XaI{~M+nSK-dLYKiT`h6{6Cwd7$xBQ??eCFdWO;b|MlG%0ul^HvY}Sb zn{Sy=Px~)wMUO`TV#;^kU7NU%_2QkE7b}p7B70O!D%1ELT8?|7zTA*@nFkNv{f zYZEyXDcKh`V;;+9>bdTW?c3mGv(l-__@p*gqII$Xi+dQEvF`W z-Lb0gt`h$5T>t$s7=6qHYw*Q;{GsLQDgdO_8LpBhi@P`iX;Iykd&6|YKjCkBPUx*T zuI!<@yXP8a9lE}Cc-#}e5lmWzynf>sLr#|Cyj|pnCUpaIadKLp+Ikw(0)d_49Rhn# zV@(SM<;cag+B(|fh-OcjH&LnT$s|+Wa0Ak4gZ8993WRRj(%O9mTt|*1`f7YLTKLdj zqfQR~dP6lF%sb!6!C9&~{&&A#+uKLJE}wJeC=1AS7kNop5h1Mur5LF>H}m?TL1YHac&TvtqwCut>#MU z6r=UVAVevz%&GY#vbQi}^7Peu4XwSH?1&}7Hd9q`c>u?FAVzr>hIV2+!0&B#dl@7=FRdZ}4PVx)A{509$0CSX&|qNX|%wF0{fSquwh) zRzsdz4C#EnGaF6eEL%>JGx{bDF<812%g#9Nd&{#C%h&Z|3~2;?NerR#o%q$Rl81=P zdyrP}3^EKLKYi-T1!EpE+^iCI@nt8j4p$kT_0>m*6_5#Ndc&kP7$?(in`IEaoMTTx zpVq{`zHiV4uXiU_(nUxRc)m&Q>B}qMJ`@P`3amGEuzzhV3>85g9X@>1W}TF?LcI4n zue;AQSKw#+^NrxRY#PS8B%eCpgrASiGL`ZWE0$~MgwEg#i@ZuoJkKVk`ppG-Jo$OS z^c;No^yPQT)$M(~B)KsW4-t1ykJirot-6L@X@x?|;-Fy6{!Bf-Fnqxj6Ip%Wb<*|F z=+wV5VZSElJYl(_WAfBR4O_6SA4yb%2{kb(q1JvKH43h(V3q5%G2ErD`IjHQuAy*i z-{^5pdb`w6xS@q>lff$g#X94p$v65C|eAV17c3SWC>Jr2+4BTsM!_X`m2kJ z;{+JSsDjg165TdZP&v6GLFYc84Zbm5<@A2p=V-!^QPln#$h>)v>@H6Ap>DQPv~c26 z(^zf`NT$mR4uZqrf|Q80uar{I2N55TC{gl5e(2dicg?He%*@nQO9YisK|nfxu;zfP%CId=qH6_?z`DkS<2dik{a7l!tK4hz4J6dI^{TeTBsT(IT+*3ersqR{Xm153A(j;@6%|U(94%Q6e5=o zi*)bgP9B+*Zu}gwE`S#Dwp)bv8a;Izxb;ayCu*Uo;y94Ba-+R{fC5H;wsxW~DI@uy z93ix_?^pF!5{lLGpcqH{`G`P&>CKwz8L(y$CsI}T;r+%*9NC-@v6j@-VrzlR(+&nZ zW|5KEA~n!a2Sh35x|P5UV~KYvT6DM_A8^T^&cN?*NzuK3^ss5VRo%~{u`K4rr_`k0 zS&fXpO71DcB?kqC=t7l+{0MP~1(BQ*XAY^5&NgNuTHUA0CFwTWq~hDoLOo%UAJ_ zMEU%XwP&Nfc=e-s`dY>l6xj}QTwQFk55OFWD*Z{*(+4PjpRO5OW!$^ z6wnl~uVk7xI+Jg}%eGi;pE4mvkVof1uIwdpavyWwXgnKG74cQHm<=z;L+SZ0=I`H9 zOLHl^3gbl3K-@5s`nU=2Zz!2G^RPD!?J(q#-=S(#&puK`0*z*`c+N}Ph?;9)iI7!= zPc{`w%gZHuXDc7L^37rR{X*uglUiT4Otpr`wk=qlPM_p&Y_x}E+wL#Z$APfILrb2p zg(k%_1*x-2lLQad1onisr(D?;oBbgSk*SI!4LZUMwYU@9&E;PD{pXTmuof#5dSYcU zhokGX{G^Sr{@rUuU{~({vn%9KBT>tjPQ2xQCVsfk^+V4-B*;Q#EVwIbn_W{gNI(}3 zi(MGpUM=&&Mj=11pYv$E_E671YS+QI%)ur*AjRjTSNW!Jlvrbhwqe(h4{8maFvCWd za%%on$j*7Gt-o4#_bA4pILP>w2DaieT&mifonTN2RE_K@Umk4nZ%&Xu${zFBHu&sb zS9CNpiHs~dVORUtmSPY3OamsrK{T*stLR@k_);!HG*`7KsF%U_H#LjeYjME zb$`V$JKQ8(-b-r3j_xZ0X(W3PLY`AJ;Rr^__n_$Spga8OQ(=bIo;g4)g55~^KK>Uz zZTFU#92prs{k)HrYj|u(Bt`y)&57S!z?LB!WC8x9efY0pEmZ70hnu&1cF^|AT-gKm z6}Q1jALvgf*~wzTypjy7E^79@g;hx03+F*IwPaH9+qdPyLxA5nX?v(Q$h)kwqFRXH zucXl+P9e}98$*0c+=r|m3wa*-Hy%#Fro2(qT(aU_o99JeCH{ zgk8^wv7AmedUwLq+CI<_?{vmCz~0u`!^o}7oqS33&V#wr6|3;rFbRVBoq4n$nju!r z^-Z$6C!JmP&{%^F^=LUHE;(LlCbUS(AKN&wAtNPyJ;N)q;L7tqhkoZxSU28DnNqES z^sOd00v_i=G{-Ve)9xW?{`o`y4oNOK6An_al^fWZL(-PH&S*BvIZ0N};*NUYC)9d> zuvfFe_nj?_&l7qxl9QCwRr#0yOeDk0)lAiIwt4qL00G@NC%E=J4%2Ysl zcfXqE!ui031gUTOO8GWQAYq&dHr8v4tItnM!zFZ=YiXH#)M8`#%L?rW+g%5wb;q*Z zf*v`A4PA0`_gLG|B=)TzQ!F2!>c`tWC^*5l6liHwI-Zqy6)m<6sJ(U^Ws9#MkJbz^ zuqY3!fY-p194d5uPiMqv9iOgIJ}MIgo-wjjAmz?Y@tV*CHYHo>0*rYV{Co= zNm5(s2pH~XTlfUnQgPJ)%$$>|E zZ##WgqZ;@VE6-CcZ>DFO<^mbcnAELC(|QSH+n1oFt~&DuvF-l-xq@(&W|c>NH_Ump z;@C11swKLU(F5@d28?w^3_{xNbA4ztN(sSHsEBHz6S(8w3PB7&-5)y6>4fB;CvCPR zK5o)W?^^9-ZY~;tXRQhjq3%WUAJOpE;HDlsPP%L&s02Kd{=bj@iSNL<4NXDuQ=)5Y z8!BXQ#d%+k4+9(ZB$})7)L{?S-y4YM*-(>&;eU??w|(?V-aWsu?s}qZx)zlo^byRl z^}QK#z*5fnqZ9_p za?_Xx<6ShuO_TBFsltf&njnGm7m@O<_%}ca$sORTfINI>!)|lm@18g*&rMpexI8)w zw6MW|lbVc-3(`Nb-SLn6di%PFA+NwI|JEZvE+A_=*w|vNHApor#yXGu3J7AKB zV1(C9`nTule14~6RF3*JxtdTb-JsJx5cx4yc4!K@KILgEK`e8Xpk<`>dfLv8pjRD< zV``t&J6ogfZiD7@c?O}qu0_MX363gBZ+eNYLuQhwWsr>6LQ<7eNQ?eVS8`~hi5{KU zn*D+<-3s4O;c9{vO|kU~z~z=}-Y2GTlhazS*)FY^{~8k!k?8X&rZr@wBP!;YJspKuG^v0;OPGoA#SX@G} zl5S>e&(swQ$j$9d-VNr-VFBM{s6&h7v^6W~VxgVH3VheQlljfuECoJ>$1m6La1`pQ zIiYoOJkl8Ji!7?yJ2&F)8kb?*G8;r-=bv1$o=f@uw=m6#C9f`!s3G^(=m%Z`zyGcr80~Oq4 z0=yjLvHNE}J~TImzhr1Rx4#r}Tb&durcHrOOrG=qzf2J^Mo+J7H@Hx%=g3rT%FgCE z`5sG;yLCKR+6&S*Nv5|KY0QdK>{)Ex{!}FYdZHLANv64a)}k1U{TTcO~fOc{f{XRUMZC5ct(-I7IVnxSl)HH8v+mz|Z3PiL_=Lokm3~<RdHV-g8;>C~3Y`5g*AwkPrizeZ&=es3uVRP{6Qn)9k$B@_M){UE@5cTLTm}<_qsDoTE74X>)xo zRKz}wfiPPa^allqX~*2eoHdo6Q@m#Y!Th-^>4p^J@89d0dOT8YD4~Z{o5G%dtP4id z_kJ-1TW*AWht?nBlSTPET5@XBNh_&O(KRs8m)n?d@qI|<0ezaO+aWfT+CEmg9JGI zWbIfi37i|`!8-`GtC#JmoT%7C?QyB44YV8++us=e`2%Yg&fkpyWt!S*MMpBS;6G-m zwlH}XnS(AY!f(_i{a#D)_X1}KoCVI&qsq;Cg)|b$Z|qJH9KIAgle*q-A?_}GFFg_< zA$z@vO1IVh-BrR#b)I7i1*gTNGc2^ zg@40_RyNI#wclJp-1ybBEHNp-uK&si^z6RKcnb{49$Y0bEHUT2d-qbb(qjNI97q=9 z;OD1=Kz4P;c7By{Ttw&h^yWOM10c2!c=#)TK-9HO%_6A?55j5}Oz_yaUNpG8JWyLI zOrfG8n4Qr&-Aa3tX3NWFrfL}>HN30q`O)t0(EcdQFW+&tv%8*wYV2&&Y#s*_vx6$# zYO6+Hze-k&Pg$?q=yo?8Dmj}lUM2k`q6?D7d!9Zc@Osc81ntUk6zXw)tpZG;w`Yh` zL?X^OUCf<#{o2GPk(k4j4MH%@d=vb|^~U3wxnuhU;xi+wW}}0H3xp7EvfwHf6*T6V1uL&{BU}Wb0GVNa@hi>{}+uR5iR|A@db> z)b8#mFbbrG!SLjHP^{{csmF@+7oO0ny+kY;PT*@yHoWF0ULJg6P&X3v(z}wxZ{I^S zrxbg~Y@4EyR<+ztNurelDvRH7MNV79TNe8F&O`S+s+rJ}y3rYh&~1vW?jiNfkG zi_)g^YF7&bjFyE$clrU@Mf+$w54LXW$ik|+@$khIDn&*%#Uf&EiI6Se#n^23kgHac9$f$;Z)}>)P>Icdflg6slG!dGM(Po&PM1 z<+JLh&zb&=lw*F-^ot~l8(-M+-Z)$~#K3x57wb`8GHBj(<2{HS+R{!4Z+ixs_F!3wgnx!shUMXWD zGM8SvPl}(@^&q56@lkK7VTCGrLEGP1x56}aCAqcOnBvk~Bueg<1+L!CZ9c!2HXWa; zJAC8)qt;|GCk7GQt2i8PyvQUGV1uMRH#=fjW%yi6^ms8qZp#;kSvLafj2){0*TJ~V z*{EkJ|DtR0T*tLJc{ae)Gwk-094YrBQ31_Cb^l}di6?RGcNS1-Q9Wf@)&Bk9ywMRg zzvnP9t!=KqpK{pZiS+l`IU~xet@s^w_^oU5=9n(}Gy&yC!jObduzmubAXk}F-c#pnPU6)yuql9(OQ z5u-oH&JRI~GE0<4H2es~eeYh+c)o6!cv14OY$5THLb9{-rDFU?PhB6Q^1WXIxhFm! ze$MoG!}S!^6fA66esyjsn^#K3F3b7N&B|S-rskHFO$8+zqd%FUP07NYjDY=nPz5vr zn1<5vy=X}iu>+Dd%VQEoa|EX5K~;-I`fm<>DmjBZowE3cQ}_fOFEw9J>Q7izPC{)y ze!Z>jRr2-2hs$vpDWVQB0FwhcKi98a3r$&P<$nivu347~9L}4grNPAUUS=UomB6VPTElP6iC^yQf{0DNMmd$;7o; z(+YJ{KaS`c$KJE0Awlp4&)I?fy(|5XY|-Bdg5Ul@uR?NSKOr9uC>|2ddj6?6WRa@s z_pAs?>--(t2Sas?8gzlsJ*W(ey42$D}Hika`$$kF0DBwf$&K$AZ>GqM_P)(yl zRNv0j_pKQa@P_QDq)`RQin!cT7?|#@+AUt1pRH)a$W8YrL1OH#8zU%-2z4g>9yOYN z!Jmn89i?@GPnNB_GF^$P2((A4J-;VA-Wq1nygL+!uc4D?QSxU0@IkdoYI~F~;P7U; z@d4?X)hpj0&#gWkjI)Htc8~P53q-@$Xpb&YQBwwYsUV%K=&8EIT zi#B<}B}4nnQ zq)syS4sMS%kt8!Mo`B7X1+*tyDeez6^)gW|t2#-(rcT5W+i;vr!>|q#cAwmX9ka(2; z0l*R02N)03&dN2oLjj5Q$gNN6MiK=8POEM^rpa1 za}hKW0fPULJoZr6(Z?Tpj&@HOEf~N3H%-+Y&!i9pbYXA=_M%Mk=X;Me)1;pQ=-z!! zI1NIe)MJ4TL`$lt>6TOJ?2IFUw>$%}_c;~*cV4KMezuAx1aw8UHs^!$a(C9%Gdd2e zy2p5UK7ar9Y%;Lw+xU^H(>c~*(jYxad^q=M*iiW!O@RvLbv!3=^Y8#;!jRO|RMoy+Oc>kBR`fG-{EpSz04zrI90rD?dd5NE z9RIycEK{{_IO0Da*Ow&ho&Q4Fh~tRvrm7POIh^yK77I@mT$e0-s@!J$x{-T3#!IE4 zeom$Xipq#&3M}}+?UpiGlmzgMK&sLfdwLXb+S4tb^B*&Lck97}kH3Fg8&^9UIr#F; zm(it}cei-SUA|)9LY4wwLV3yg)Eb?s_UZobQDkemLKVz6e7`uIll-jvOw!UlAYX9! zVke$ip+RR1CnHK^qH_~4nnJ1Bm42`GJ+XifzQtrP`4by{7A3f-aj9$1&jTh4jUEH^ zwYne4%*@Ypt)+aA^^`eghb{Jw6&H3cZVNL9%>QxSp3iY9pXb#-{w^9n)pCA|+-3Un zaIVJor-Xn0N1-BvKA@C#g^CI^Z{*{X6Z3jmgXj6{wP*Us-amR1R$!8|cJJSJdGS%G znH>IRZnJ6oHKhZX(Bj%!7r=iO@GU6e9_l9~{@wZd=ZmcU=YL@t>8&pTq&FH2--Ydm zZ-HzTC}0BYt)932|8Z8AZ#(a`91+O-?A-!`5>UMB@!o80Z3W%(EWLyh{s6$x0^wt< z$_b$wp$8u3yDU?5DCmF^3i%(;)k#ZxZT$rIw%|VBe^3A7iN$}6k}d)U$TiUS+Swj= zC`+}FiGQ2=njm$q$wA6loOfZL`nHtIw5aQRBaj-ojKN?on)Q0Tz9)0uj3TyC6B}Do z6=BNkft9nEi(@z;2K)Qg`jd3$EdO`7;BTcByebpMYoT1{l=a#tOl8w3C?mXn(^Vh`9X z9700Z?CjP(x^FNus}&hnxp{i>pH%-}@9W}Zo}0!G5jMK7C>a!%HZL7-?T<*>yMEQ2 zRShyA-!Sz0=#5`O-C7UyzNc!EaW#!QVMAt9mjO!Yu*A@Ci5R>|I#p_MMH0-i`cs2HR-Ky%?1 zC`ABYWqEtsYzO^chVsA1koByppU^Zty=6l}aQM|vHOX4v%)ZF3k#@?VE>?F)W_xSr zfc4y1f$iAg2II|$d|-(e>eRs4O-)T1#2je>0rJ&CbD*T8Bsghj0L|?LbiC-9^YeqD zizXz%3{55O=eX)?kAI62-i+3-Jzs80!X^B0$1k@NBwP z|F0L>0cQ0BP4$n#`*c^1KOqo!<)Za6+91|$d%CtZ-_7H};_^;ZK_a)S{CsV#R7!xT zap3zE)ZyW!Tio1XVEq*pi)0Is+5oBz+<>zMx~vyGDK>A~8dHE5*jY>k-uBzsI0xv10a4biSespXs z`mq!l2?=B3y?E8l$s)CYfX2>75JCa8zN@pd6K$rb7(7u%{+|=$<>J=Ta5g6T`i8PV zhW6i6eDp!}xeH~>h5Z8Kp+k(>A7%186KFR3#;weFkJc|(9WshhY10;et380xW@`WS znj<+orCHEWb|LVDHSp}Xv$gdNpalTB!v!u!Mi63Znu=N=4S55K8LP*|syUB}444zS zxw)C~AUy<{=)8=KjN{**69IX|VPMc33w45UbOkCg4h#3Y$n zA8HRsk00`#v6;|;0{P?p_1Yvq3W&F0{rk$+zJcr;S%tM4%CPo<&iL+Wc~h{ZVarsl zCfCJbCew)XsTX211U~x1{BqHzm>6b$6lZqUEe__XnSt$VtAiqHY3(Sdb(O(JjbJSo zHpgi?Wiqlj&sVxMQXZ>(I|96HSCH>gBAx0IGT^@PA8L7I^><`P8##&x1zz+`q6??w zb<^5c(e_tK<slMzRQyDbExGQ(4otK6yE8vEs3r zg-FBn++Z5SAdOxH6{RB?(O+NERcE*FPD?mzzA)v};3U>xZHbMRi>Nmv*`}%9 zqof`#fz{Cmyv3S>ubrmpEMl3skTZVxvQ?ZtXc6Yg?wtHHIJuS;@WbY^y%RHxu+p0e zz@AFnUXF1|%N_GPjDG9xtxFd)%j)J0ZJ8Um?(MDYp}Qi9L2|J%%E0?YJ*8QFeSHB_ z>0%liO3B;>6w#AF`;FlC?c1-nr{nMG=7Oa~1hn$nl0JKxD$<}h=L9MM1OaE;HGhu$ z{EoM-C;}G4uW}m-0+7*S4tBf7fF8|7tqsO*J%fW>U=jjmX&t>y+~B7E>IS|?%6o?e znB_%~cuzqOM0X4wFlK6vS6afqQ+%_YVqG&Rnem|0(2NA1__R#>kaFOPou6^P?u zcgE$Gw-uEV6_G}3n(S;3cAA1d+Q61{*qtFE*Nwx7P|}v@-#$U|b&0l~%t--|0i27G z;Jf!T&5t2MpJf&-5AsS;e+07PR?w#>l#S?#hVK+RlQjntCBkcwHFW{^lP$D%?b;9` zeN_PyIR3^auiM~&oE4yd!C+z`jc?C+BZo$h71++AAF?-h^ixJ+UiOvmZ_84EmZ7>y z!(S#F-WHOx{!0uNxaQ_r2mMN%!A$`3f(8}JqwgO%kqOsA6;hUKlifmojbBO`<#9bd z9Wtteh$y7}YH!R)6nWJOt4pVKsyW=X*+mzm)FBQ1G`Yqm1%6}M;iAQ`JDai;HGyP` zO0IO^o7Qt*x6j?I$x4dR!6=J?yWG1weT~d9TfgI1p|#1^+T%?|`ll;DKTaI_EHelu z_DCw1^=>P;e&+A@{S)j?y*0bbS0>q~dbTty%I&+EF8(_4j-g<``Zab6wsC2<@GNS8 z+lx7I2!pzxTV(dk(W_n`WmN8gr=#07?K|a=iDrLXb~a8ORwH6FA^S;KM~a5El_CQ> zgqFGTD+o_GvlE*D22s5$LGXW(MZI~w{sf76D0;?af zrlyqMZ3!M{weqWGgT6jCSuNz4xK|yF%IYq}m;g-$&`bfXfI3erBlmfBn?lF`#e856 z^V<0j>p>`Yti(bAVEZFgbMAnaQv7|x<)!Dp)NDZ(B8%#vxd?>7dO-jj_&YG-_S&hz z6eKh_m;g`?yTO7740p?16DJ&R)83USoBsaxWC{2tB6Lo5!2I2wEZMR$8xAY>1uo z5y>KQL1NFRA-z#}j|BM9rO5eL$PMDF+oKE1`=Bnne*#$wxn45T{G0)VmIrDJmNQAj z1`>x;Oc^=b4Pn)JT73d64X4~{u=}v~!lo|;7B8D|#vI%h(V8WoM{d3*ZIh+|esV@C z=?^8Tqu)4>cfe#S``qK1rIjU{rz*WM4Y_vF!icima~sAvf1$bLYLdmKXp2%u{7~b; zmQS1~pV>l0x6w4NiZdu$Q_ayp8bk=$``XRD)&ocPr9`4X)FMR-O#Fs+@c@A3S4H%* z$M7KbsqU1|Yx-<9UnWpU7{VcoA$b1*$w#0={@5&rUOnQoC$=gy2Mfu^eb$!`PGn;y zY^fDo$T57utt5N?q;5qd&sW>(*h2atnj_?cX2gB5AaGHwo~;MT&Qu-R0W9OLwey0V zZ@m?SU>=R536iRdH6(2&_~nJa_o@#&a>FS$HU}xAcYcz1hB?1zxbM(|X~pV*#DX>U zv<-d&SgQJ@luSGCG@4uis0Y2m6s^^L2bI|`HxX@bwn%Lt7hSc6C z4Y13eJF`hZc`z$G`wBUE+rU6HNW?!sgZ~6oK4&v{VDhX*k1O?#^o#DyZiAc?6an(bs#kV${E8fO=v|?vL{037AIDr zM)>KxuCHwP21ho_htX0D?h)vj9)_!a&3^7&tyP<);5R1Vs?%o!#2!nZ*r_jkeHN;-l)?BpM_>#+BqIu(!YUB zEOaHP=9E{PAx2N^@gs2MHHirWxgRaAbr<7|vVsV0k-MuV5?>nkAB$_ILs$%sf5t-t zPW#9w)sw??zTV=zpC+Xc^i*$DgOif3= zMWOLD>wtzMwjKs%(=?2*w0tM}m`qD_hfrng6(hQ7#tU84Eyg3-7sqXWdp$_AF5eP5d zmpp$1fshsU)vTGmy07Ew_mtY!x&LATj)$5G>w@G)cCL$O{L-kX#|x(AC2ysL03~zoinX$aXZ=t`ZSGTg0wd|+E*D%DM1AIzq zcuc8N08?5*$Dy91uWzDHYwy&^mF`#T5!oTxC}UeBHm^2fp54V}(EG8+t`S#;lfNiP&v=VKHB*3DF4Y&~-R_G>%@V7Z4Q%f&i7ti=EC5H$COcc$)afDutx)M3%nT}O5k_DR z5%KRx#BBF<@wa3I6G1fpy-wrLIGR&RsGWy!7rPz0Yk>nG;GS>bGI`rGXZurg)!MQq zkLP#q!q~*c8C+!ogV_dTzVsjDiTuH=j?3Gg!xQZpXWxB`5;aXFXTNg-!Cu3fD0C>B zBmnH9#w!tqtNnB5Vp^Vmo?bXWmAX!aMD^?=Iq06b$_GnK#|gl`Mwv*bEex?QZNxM= zXv_RXSrXXW`aw6K$)BT-yT95sZ(+sw6(5F)G8(I~o>Xazr@KPw+)}JSt`?{k`U_@Xj2AS%D7**ADgOr}Y7 zxuOz_Rm?*q8o^rvur5s6>=19Q7*lz*q0$(FbKJV6U)7&?`-72pK_(bl-y9Sm>OoGh z3o-89Z&G0NFMfkLuxlSldv|HvZu2jC!Vg+%!b!vkfJ;qfpX@NC3u(RRHCbwR&qr-x zM4yM3MVU#bO&&eo{uJ~vkY3A8?p_RdUojTgIEi#dCsW&vQ&O8<3;&LVe0Q}_xkE`% zp8z7px!u&6vs}}HPt*9u5kmwkebYmTtfs3#aiiStg+?5 zT!yXnP+nA@IU|}#%^Rx-Lg|K7(l=YXz3I63)bCxI@U(WZXI4*N)Hgrwguw`rn-~4q z=lYFi6nJ?<9$HDB-9cAI(=jq-GR_z(D$}}ny`@9I)qkd{+J-y){tT68&-)ViFLr0s3z%ktb_rnfn}tFzo@y0vMVX>Wiwr;x z_oze=vp8f9!V3EjGxRkQ8Qe0F_w6PoSt@_@G)cTC*=7NzbT|=~hWe zZ76YC6ZC<@a4=c)uR{p-ud?pmY?u0jzWl(b9XW0ILmdB-u zYQHBMp4zQw$4(w^s`sjMfO0QZxtU6_$(XS6hv7bF8DUWaW0{Kj8j!XHPh%3}RHS zk`?rgd4?Sdjdxa-o+3{*jOvkOZT@l~X_t_sht_qV0E(S~XKS@sk4G&Ueg_58qaaH) z>?Yy|GdxY<5x5S&oya8_+>_sDFI*xIo0bbY(n^T(P5InE!yq)w`Auwr6`k|sA~geU zzT?|nw|fE+xr9eo4v#8wzH{}`SUBv+!I&{*jD0U-4qo%@1hcHDNAr)^~F zljaMVhuD-aA046Rz#iTBm$;s*lzpezcZ0ffN=waemWzSQ{yja!$7!3$)y}Tm?J$Ki zG^2>esl+0kR{~@TR%b~e-&GkOomr^OcLfuYkemApr{citOAOp=vksCyW(MxSdIFbm zrC2}gBuwkha9;I`c-D}C{UXUyDvWht*4(I z;WU7{PY!z8R+#nPpUng?!QWn{qA~&~}vPWL+xiu{qPYCzb6dZfH*S;dn{+LfOpRMtT zrXjziK*=a^^FI2}LU3gl*`0_?r1?XiX)n_fS)WPFgWHmF|0#e+ zLCtOO`5XkYl6%KYNS?{6fZ~`E2(SLOh1>Tb3Pqn$IBC^xk0Eo=H+75F$##GHYfKL# zYn6h<6o2UC`@kj)y}E%EIpSIO$dSm86YYpTHQ${SYMx>q8^s3e^x6&cFejUa_O$(p z+-hXasdQULx|6e0ZSi#78wB0Ja)XKp?ZQIl@$r!oCJ!%^$l858mgh-Nge4MU zpk#1jBwXtMYcTLjh1YdM2^Y0&dqf2#&*~r+nNW=NbwN4rYgM_t z?;QmR&$_{#rI1b8dlv2AC1-}@OuX*YJyot|nb#;WNE>G6^+h#M`yN-rKF*eN5J31a z@2Cv|2O_zjKGc+h`3ZSSLzCBaZV5V@pBYxu(_1}Y@EF8$w0MJ$n+Jz}(o*x$N!ao0qYYq~P(R{fZ z{`hVUfDrkA5x5ku`t+g0gO9D2!D4@|v(Y-%SUjAn3x-%4Jy@C`x`UH%zG|P*-^+^Z z=^yf{t#&md()rjrsi^AeE^{i?6JAGI}g?Fb#`xg;P4r7-dvP zzu6v&9*m^AQllv%>zCE%*~Dt5@DL|G^SO`=FFhD{u#oG2;AQHsFVSi9`gLHfZu9iJ zG|EQfUB=0b1GPv8mhH4dr!gMh7?Dnr&xQs(&G3iY+eiltg=!?maz>g?D`Rn z*2Hu<7j$6pt3;n+ps2RjbGDqDtD^bG4a(qdp8r)&w>JokvOY-OoW~P`%@M#}RI=X% zgXE1x^`Z~;rvj@T#F#3X#F(PHs{7x_fNnY1>&|E1_{Fk4^g1J>ijK~wb}SHF{Z#{D z2fE#)w}6^?I1RV3S7C^aBo&)AQ0)fmB?gV@6%^8a*BD8u+4_MB>vj+<-*vXu{mE0K z#6}jtIw^tUPM;>ZJh|-d`(q~H^2IQuwlE$@^!*vW&1VNM9T*s}*`05?%Vln?J85td z`}-lE8YVs@sha@Yx2XVo2?BXz{xqf1>P>!{(ig&@2%~sa%GY*cl}o)bk3&XQ?G6xjbGOpih1HGk+zj3 zORgG;)Qhowpn^U_d!P@)nPyU}Af9Bxv7=v~4}2UM)GK;utY^^H!MvMOSpN1?gzzCL za=ZwLCOA!-4vZ?mI-SMi^uP4whRD=aYZyMAA=IQ#EhD)(xx3I6N<70jieYN-0Gc79 zj+9JQT|3maT7B!AJ_Ol4PgI^V-V9YOeZiB@ldW#?Mgr{^XwSkr)Fq0HTl9O(31gd5 z8zY@hdupeBmVRF9!fHabZTs>Js+RbUEOaPDEpWHXtJ%VJK;U$<7pQ>u<=phnEhQ# z>bLR3xb;0$RSAnJ*qLCf{EFQb*|>=Jr3}x zY$M;7NEDZvmE4T?3)0f~YE-x)c*$vH3r8`T->k;lyM;4?&qC*Li?#ULAMA>lDgT5i z=iE{I%x_uhsH=qY&G>NxdAr`jn_IF8wr7F1z&Sa~*GBFPOcP#XW_mw3l;L?+O*EOr zB%bS%=MZ&t?z?i37f~?OU4IPG8jQ{Re0%{lSbp>@@QmuX5lii**%0kgLTQJ*ziw;x zgB<55=6D`33;UA~(g4shO$6bb24LWUVNTV$XSZW76mCC!P|TB&pil-XnXEe*K+6fF zJWEUKwo?w{->-umyc;3tiM%kX%a=3D%gZOL!0yK^apAu;$!m!iqm_U@pTDZAD$tzD zEG&!yZhy1}vYl-%NFw1?myntjz?O8t+e-KbxJIF^&7kV*7Br#r?IINO>w}DOJViWt zYH{MTqZ2{ox$izOC^enj1rY=Nfi!B%Mq$pg0L}FzOz9dkor}cWj~mnjK_6#75vlg7 zff;1sd{a2r$dyLVA!~wkHry@2K)ga|GfvRBLp%%IBl}s?l<)chG^bVDtxY0DP9?Ys z{N+$j{bpROaZ#>C!;Z6TY1-r!jjd|ns(G2mszqG)cxrmhEQb%q1d!}Q?F-U1#rx4_ zr~AK4$q0Px7g^PD%lxh>ILXy`KT_{4jonEz=d=3hX4@fKdAv)$XRkJedi_RFawiYP zu#d4kXE#K?GSZH&5i;X7#=K%bd+SZzin+d?l3_Z_F~P;dCgh&0`sSp2sqj0hyKhGo z;3$Sm%)D)Jhhi=_lCOEsPv_~nvi2d>nH!cOz4XEp{$Hq!RWqJgS_m$4Aq=h`=w+)wypl>wOH>;CdzH5>ODs+VLEmVPVnx} z+gjc-M!`AM2bYvjXC{I13VQVYa)A)r**rlS8m9Lok#lu~3?o+8;@03j)rWc@Zf)ld zDR`XTWyhXV?t=B$=Yd%Jl<`yCfDvF|DAqWEcpp*hiHzo=aBGNHPH`2{uE2)5V z2uR0(bdHFmboWTd&`1r;%(uqpocFxvyyyEvFD_!{o_p_m?X}h~R*gH!UL6CUyFCnT znupV5M7&i_Fjy|XA}Rgb_Khp+-1Wc($tqGQcY=V2(G^wt%C|M+*V$sGm1w^y&V%r?cqxTU(fy7*M}ZK`pbOm7L* zYukLTq~Mceh`7{lSuxF`E2(JIbI;W-%_c3HO)hqA16{VIs|SM^R3V9n`9SdJw#->* z&TVb65fzmTj;!kudEX3fJJ$#mo6$`rZETd}i0Mv~q&>nb@FREoQ*F3J=H4F-%(l;N zvNoIw;D~aYtom))o1x!qCmchD@};*=ym6_1Q=@;uBKyaC0o2+GU#bAtB!lrq0tzelovR8FCx4;`g+6zdjZ5jJA>t1$3?% z)5X?c``ed}FjS^|NebBS`pfV(+tL9~`LKnbO+36873dd>GQz5~q(cYqn)=Y>C#ulw zMR3et9iJnnzDk?sDZksVz1pV-xYi&`Z`V^&7q*wbq2NLwQqI# zUf(%y<*7O@hBmS61-j-T(;ZscKUI(X|4oA1J ze?8`~wy{>En4%w-mLQWW&05uo;Y)p3MFPUlYCv_;$db|4l0u^55vXi}SmY-&gETy| zG>giYQ>LFp=jCkccu(2&;skan_qP@mZBsngr{u1k^S3#>0+T(+-sK=$1>P0-G|K}) z%pDwS5jgB7B&V}&5-U?>v+>CgASn(W&NFLgfryD9lhQ#a_G`IfrQTY&8-~x?mX04D z^VzfEzIv5BKLrZUr!_Aq5m3<4R_WWt$d)_f`oKz$X~YAUgSa5=6$#`zgX;_(eYMyM z01_g4^oEU(nUhZh`E$SkaB?afuEF{@Vf%w+JJ)D1zQITRoft11?WF8w+#j`fl3B^w zIeSpd#XyqG8k(0)W9L%$85-;_6a2$<`3Q#h@gLYHf)FWp&uE=g7!-@TP zRmE8KzFNkU%d+*N$8*bvSWcalPoCTHI7;MhA|<6?dy7uJsYPM=$**+uDiz7`qqB|{ zMN7hQEwlDbB6fUFz|h@Y!|u?@spO|$^&63p!D0?*zD#!As;o)=6`9q3 zMEMZ@NXK>Vw&X&+QB)k<(~oD*do~@$_HfhLk1w~!k3)j=rgYUjeq*n&@~XOgJD>XZESiQPqDj^#+g0dXnvd^L%#Y%KvYqz+c+Ki{(YnCpnoWu(O$!d{tk61S~b67I1S z0wVTN0kA2BrgWk8V2FN|hnNB~M|}f|sIOkT4!~UjvQ1iLWh@{=16x{E9zrt2o#eeh za4({1q%t6Hq66hUvIx%!!$3rC)*$BE7uT$|LPbk`+!mCFujNxFwx((XsRCC$h!*GX zYZY#qLAC#mI(Q~1UgQJA`N>VMA9UuVmAr8)^N~~Qq_5TVV4Lh(tLFW*U%A8n`WGVa zUMxmAL8YU;~{0gEuGyS zgOYCHD3}Lfe;~Q@0my#ON^zq5aexkR01NS#feG!gcuDsVKzpO`T2w0N$F4S^2(kf{ z{UtesCY1``7Ri?2Kwh?w=VOmeMGLv*;qNf49iRK2O+o2jhp+S2dhUF-e{=F; z>inmupsR0=?QBub2{1A`=N-N$>_zEXiIDbLojm@|hDHN>oHFg{ykO}xmleWfnARZs z6${&iv;^lv>ZFw}i(u;Lw%XJCbl*Qc+|>r9;abTvF-r1S7fwelO5X%p#%ZTL|0_u3 z=MeH%aps^PFV){%EM4x)WdAFaOh!eqy;jaSY4edGZLCb;f=awHYYHcoq{#z@pRN)4 zBFSHsOd>@&!?3jKc}u0$ef|H& zRgf?{j10kpO|nJRjN@h8Wdzs!o~UZ9^6SKqfXZx-*8jy5rMi?W7}0vR<8T3EXvWMQ zH{D$qw;x*`?aym?yDgbke!hC1xMTTbXD@wc+tt08E7s_aV5Oww3)xfJzh$ZKO`FE+ z$2P3#CMP)vzwcUa&N+4WQ07{1=0J=}6ZOyaCO(1wa+JcmH)giS(`;VQ?wXjs*zXA- ztsj3|aKdk$%;lDRny(4Ex1(_qt~21|ycw<0{L#B*&*nU1QMBR_7k;xPL)mQ>`P+9D zs%HMjGn~Ab8X-S95!p!|RGVl4_ZH&QGgnJUl;38y8??6t%2=F-v!Y~-q<Ux!oIB3muZFkP;i5FVKb5O^HO->%9NmYLsVS-11Jn)`It>;Zo~%SXCk7wxY6E) zf0xF zI_Nhf=EG#a1K(CwoNA1VA6YyuJ8m@_5f}4N+b-`{vPz=*9o@*o{*>_*TqCxms`H-? z?Pb{9GPBa(L~=CC20I)US&U1sC`$9NI~z>Mo<!yW?P~Jo~Wyb#K$W^4D2u$)NdCMy?D~s$Ho(q?I57WG?Km_O8Acp z*;SyhQ8yB;xu>Ky~7TNpGeaNRj=Zm8hbv-3-|k80c(9yLLz3)w%xfle1j)gxP$_`>x=1 zgUn!!vYpdRIg8YD8HUv^2ZWEus)b+S%dt`h@H!K zG)tddLY(1HnhMvk9WTe_ZfLWRxx+w|hB*dJ`VUp9*0!Ao5BUbN)0t9ddl4vF&dqgP z51={mc6DIMx6Z-E+kj3swAn(u1QlkojW@3TxQsR(&?l*xPkcnTot#Vii~B)s)af%R zctMf^wH^p zy}G!6zqzHS>!kMG1GkBItap)ZmIqb`#uL>5kFSid6}Q!Q;GbN8VdiDkM~}WiW7Y=! zv$=UB`l+HEcy8sItp2L;>ppv1caS#*kyn!~&NP*i!98VFGr^C@ev&bEJ7Y|9cCx@v z!FCC$X`7m-D@yYi^@jtilk|o{D3au|df63A!XUrcPi)sfl=E&A2 zV!kpnFF&@h;2InpynFZVNk&FXPb9kEh`)Eg8fz{{R@)dF8ChAA2d?J0GX%==L(3yL z_T)`YhS*6x$d>|43acxb^uYKfwu~z5GvzC>59 zAA|XVpu@%TDkgWn8?-L=tTL=SR%|_Z8>kO_m8%Cpd2I+UTQoD?2z$Uvk$i0$B@Ivi zwR(TP;zh`?@-P-L2FkvaO}?GnIY92z>YN;CZ)jL51mfvG`m`fp-^NV`BM51>l^;Gd z0^YhL;deLU5GCaefZrl^fp#ARjBx~T{%c^qSDvONa_g&fHylFe$>IE8QIz~u>H3#y z@3*x!uxi({d(+o8Iz>m?Ke8)EUk8i=1~Evnc+6dbvZ_ADu38S-VpgLvpfm&pqLX;s zQWX$j%Ud|a#fP>WZ{9>ecNd1cl=Q7_$}$2<_Hu;o-#?(e+(RGRqoxHy1Gr@M|;7imMVZh^7^{NfddB+)~d4o zy}Ryk?P-MwoiujcMF;Y^pz*3lFOARTlr=e?%gMbT^|To(gf$iu{Vw6c{E+Pie|Y1s z>m|X8?c#O0lSdHdr-nadosQAVlL>KQm3g4zew6#7=*4&JCr(q*P#yYa!2fl+!&Y3x z_ReR_W-unSEUk%;E9p*hxb1X-`A@#N{r$2;RaQF<3g5-=lGS6SFS5T205LgQ!gKTy z->&X8)HeP-^<<1<*1O|0Mh1Jv72CKyyi=VDku@9dC!JH#kztSXf6SZ0>$1l+J-4qR z79RS3>~or$S17eE|4O)z5t(R%W_M)Zt-tKscdErIV9IT}CCYv_r1BtVoq)`r<5Y@0*l_B~G{Ifovh_21~ zx&4FCe$m$y4Gk@8mcj4}IWJ*R+wJ6(?Y1u7s}KrBL;~|}R%a{$D~{L5c|4hAFN27M z)4m~y*`hD|Jg7h>V9QU}(#jM{T_p9WD(4&28~}i%WYmQT$l5^5eg%5nu1a@DkY&jM zQ&BQ=c-I|~tzQ+*irVi?c6xnMgE#5`c@jbCi_OKs?(t~hv2X`t)(sz>H?ldV-OuA% zv??MytKNp3JTShwG1ifMk?r5ihrgv^cqihf6S~nht`XC&4UJ=WV#BXu3aJX}ZW})` zkCU}xZ4z&i@_sAE9JQEEM5N#y6ifv!(G0csx|oIa!1%Ox2v!;BS6V98Y7uldX*DF5 zWPWMwxxLKV+6b#N9EWwUp%B95ejyx}W!p`;M}~Xdg)+Ew&Z%yH!43T0# zVYP^e=}&Y|7`Ttp9=DHb-wg|E?U!C|?Q?bi?$H)@1=EA@SE>CqY7pf9NKKMHBS@kf!*aw(Oysc>in*{Jh zVNxurQs9*Djs6p}^}OB`krlQ_sYyHUS3jThYnFdtJNQvfWHcCA#P=&9mL#lkMcC-h zkPH8YW1eP<*M7&t%lGQ~I92;iZK6nf{C>?3Ev>Ws6I|^K`}nB2x+5HQwr}k_exs^b z(xNPr+?;meKK?0Pjr+Ik!Hs&1ZH&Xvr!-7_O${bU^^x`uM=X&(NA@9BSYu=0Lxs(r zXcj4I;($JK|Hk|vhe~3Smcr?FtaO_TjjoMrof<)D+H`Hdg!Pf8xIhz0T-ee?BUMWVl3Whr zoFv@LXo`_o?5uiJM z=bAn99!sDNP<2xdOT|t!ZDvdnr!x0**N@=t2p9Me&m|Auot!(Qe||SI$EU3g9kTB3 z;Vp3$)3s$^-%Xre*w@(Zt!FJO=s#J}uAtFRU%I{fc(neYMC5LFKN%=YE%lA(5&ybm z&b<_>y}G|Nd3Jimg5=_YQga~v8UME${z>^nZb_jK;|ay+(vlx<6B1bA0Ln-^4ZTw{ z-Y7&~lkvDywQKcFI`Xm055u=i!Jq}eZ~XR(GC=tYJZSKO@`WxmP$bMx^+%FRvInzP zbKT|sKaK7<-GbA_j|*8<+tKDdKUBzL6B<1%DBC*{fk(QLznb?^`3C zHV9!)!q!$B_}1<;i`(LEbeKT&2^K!8IEdn|l0~>-GipbMY!JWPNh(9qR$I0;#=;eJ z2qnLO6nE=8eT%!b>hYf>`WzKwvTSPTHI~fyWAdNiTuuGU`h4aZ6W(sG&JV5Zu0&=C z2Fm>~5d12`Hd;KYwinf6N)sntGqQ$GRL^jG*0+&8SP}J9?-Mq3VTcG37Vh{oFiT3) zR<3syDBW=B+i*(nW0fqE{xp)H`w6Q?M7&jEo%O$;me_0VZa5}L#yDwdDo6XOgZ_O- zg%VSy_B93_CoZNrC<{P|?Xpr)Uj41AJz@b8$7*fX8Oi-)!ENq=b04T?_Hia;*v!ROVgAU@=)`r*%4-Kgg}S_ifs0~i#lYT8v98{_WOsCQ@=XAX7sRW{ZF z#o6p1g%un(D)(*z*QIt6p3oLj{3QXIhRx?@MT(Y6viia@E5w&-5+a{BHfA-IOalo3 z7%EkKnL$8f^WqbFkZoQwVWvf`^Rt;LnD*wLP$=6Zot<6bf~d~X7Hf)<6=yGa4|wH{ zc@^mz(q@y7lV_&A){~(tgVxM%y@{(i`My8RzPmA>@6V<9zII5v2z>m7bIi2_IGNjnOFc+^`VjNy&A}+qTi6c zL4}{|dX}6Ks)CN8I04K)6tmQGKxJl7rHFh|#BPKsma@k2amPuY{r3OmJ|#{5Uz(L- zD&yW2fRG|(OH4r^lP_|M;`wFS9|JECy{PsKCA|hA1*`wg?-H~O#JwDSq(dn1UkAEV zC1&Aj#dh+Y?{qdA-?a$9w8+!E=7NiUbL900jmL9Jy%!cb@~$9lHI;x10oj7JSmoQG zK7&2iToUfy?|j=0byE(aU%X}Q5LXBFO7#uqZ6=sH2CQa!mn zH4G~pN(>8mEY^BM7@*hrc6@WJDP6s|-Pf_%w>iH^oRr_(U^>TeTR+3K4JZgJ_!&9U zV-I4mX@cSQ+Cc?o;^)!5WnHAR6h5T}`nF@Lc9v_Az>|gCjB? zKg`e14;Vs;&bz8yvMT~qgGAz1w<^T_tUf}ZEL&7m6tI4ibKa_m{MiwYu~rC;48}`y z@BhUCa#_iG!VNXtBJz=T3+-@UzT5gJ6xH;r-9=XU)qGP7=T1zV`{%-eyozYU%$82%KhfjW-yr##U}1yn1d6p7|MNB zfPX!I8kEhh9T*$hVwSPit>itMWR%pB z%$PZ=#=f`L`3~tQ2*;sO2{s~lCAqy_b3qr10-(-?mex)T*Qz2cWf zcsRdHvaCI3Ta6bVu2m?W94(b`-+Rt{W!EzqRWY$hWc%|(L4R!G(Z1pev=!QC6HvM4 z$k2PQp-z_$QF!KL=G3V%Nk4kR<|WHr-QI-oej2lpSNaC|xqB5JI^r8XgNn6>gD}RH z{khCAfG*YKXZUf3>%O260*>qaGt$6-CeZWDJ*#Yng_Dw!7WG9ed)`^)mc0Qdq#Mo) zPhbM4^>Aot=E|7_@L>~iE+wW(ks804>!&bM2Z}E8wcjsv7lq|sF zLs~h+E^Jm)PY0a9Z@r4cU!QF#7e4%|PaD6*>N+&rDTZFF4^bY;)cC|y@!MoGH_%#H zc^V&7WwUbC0u$N6xfL&Y=q8y=WUI6y@mY`7XQ=3-(cpOmBtBaS;s<(2o3nmdTXui5 zmWIz3H#qPQdQN=Map(NAE9`bn+F9pLe2Q#gC;uf#HU^5h#4th4vw=9B_vOXGusND` z+qoM}2*k6t$}GB*Zg!&y3(acehge>;{C_>fBaGiOoi49_iC|w#kYoL6roG$#I&Qxiwp>fb5DjKR-X`B$O5`$QW46S407) zxNNd>G+$`b570ikuhtxi5_jeX?@<`yHBel&u#?FrnyHW0%zptL&y#d?b<}*7-@eF& zl1JKg7yqg2?|}qxcVpH_ka2Be;|{vtan!lrzE@2{Z!t;i_kZPpj7sNB#B`u-XqV%5 zSrNoR)S=q)Ew=w)r%(fT^|$5j|0U6kX#d=n_S(Mazn-xeFps z=cFVHet9p)-gw{T=TI(aQOl#ZSD|*~KH*@tDj%y%sNCe?qmkE4rS&)BY>uhg{bo&B z3jG*yuQpw?q$^h+GuJ@{1f8YSmz4cWB1sJw*;`SeHuD*WjvOkN^0spL88qS@SQPlz z%UaHBSI4)ea2BBhi!XFhxuRAm82vQ`d^G4K?AY(Em z`Cp-b^~s(~s%GxOoY&F_@FNGSSZ)};`gw6RZW~oEVt22+QUBmmsTz*&Cb!@`ZUrza zO3>o80khgkCZ?YdgW5h^y7RwS42Q`!w^ts9ccdw^BEYpzO`msc z5fT8&*zc03|6=TMmv@5>$ova|SBDPuHYQi2Wv;{)VVbP}5|DX2(81}okk|7w<(c=n zPZ5vxwQebxw_<85?R})ms&O%CgrA+eeAlj;k|{fzzcvH|{2Zg}>ndf+nz7x)CBKlp zKhxKE72#8t1P_)7ycT9IAE(>Ajuj+T98Yk|m~}wR6}(8{N{PP&b_m1`&#ynvpy^)Fre`(mz2zKVKw& zPq&nwUR_cd*v`*==!NHesxP9LZxMyekSgV)=dWz~A9dXHeM(r9CE52$bF}T(#Yx$v ztFF2>bP}>hOXz@=5~AbutTdruYh!!uN=8s}aF%%w9^AZmiRMM{m?$P*7rWUC74O`>Rj;-*<6i8~UwpbFd!bqTQcKu2&Z>U}%x5D( zxu{%Z}@hn3r&4PYKkX$a38OvNO+?w!683ykOhj*qBGK zhKb!RElv!jCxGx+xj)kVP)tm$I3YA7q_MR%B`@zo$0J;MEsUkN*W&m)6JB@-B>M$P z2iwW#%#geA6XH?*?oMeZlh~D6CCg7Y{!hxxD`iV?P8HmtepI+3as1YeO5shRfa)3s zrTyErH|gk7-e_;POUBkk^NHM;U!$Puj+OEh2D1i)*VdpJxzGlSU0gMIotpqX9uXE1 zApirehl|PJRE~}aw7l;E6%7J?{wopI5nnaN*doy7rSp#+g$3RN~@rbR{U?omHe*Qd*_IXf`Z) zAgT5}m4H2#H0m2%em19#cH`r8WWXIn`Zr(HXW9nZsXXiDbNaCZ{!>|T=qy2)22E}m zlhp~(1wMLQvZPuKJdj7LM=M-8*w|izgU`pv$ahLU2+&uAnyRFGltcMJUP@A1%idzqaFXx6rxy zeqChttzwL@-`3{lW)by*ddai#+t3FjcXh#iqmr;S+>#&zPZnIu5A17wGzVycAiVy? z#%9r*NyiwNs=qPfd@b-tqb3|U3r_n2({66~KEr_Y;AR{BHO0pE#&lC59}^TK*ln&y23U&~#!!5BW=6Fq7cA zw87jc=_i|$=x$x>k%g_BN%!VELaw(FK$f6oiCoxPs|aTOl3sa;?&O7&cS8>!B_E)Y zxy?tis*{v)yfk~5uJzvj*_T~J&oVD*!l*8kMoW7o6&RV)Ofs2s&Gq!ZfWtD2{|y}C zVtxJ#ZWv{`2;3ksAa&ljabqWeXLvV2mW@L>{<5?*i;Rp60(@n_RtM^uNgPgRc3r)l z4FDr>;3?F;cZv1V54vwTLcX84`C7hP3r^CXY+U`=pWL^T$j4P{o^G(Wu4B1hky?TK zwGt>Y_Q23EslRz3S5Lyh(NU||`aI~j$gnp+QG-_VWB@;+{Kbm{W8T|(P%0~{tDl3h zTx=2*%P7t=hySF&Tr26$5M+e=Tx2`!WWYh&<}sG+;PG?3yFu)eC;r9Ed^WK{`NCgm zoGVGwB-gdRuVMg9jKoVuPMu>O5#KV5PXG+44Cs}b_*q6kLZYf>Buq)`JP0UQ$a#>y zj^uDLN*=F2hTNq@nBX1awN|B3bW;Px=;4x2fQWFnkpK?TtlXsM>z(PE%!YMrCrv)B z-(tN58WCr4pRMTEUcWa)uE}Zr^t?Lb*iaF=nA$_~y;y^$i)27GDov(s@SCUzKJPfc0f!vUIFA|`8 zRleYtmrv}798s_I9n?JfP43%$dZq3}XVUFw_}}yT+;M6#!ZvOjts+&?spJ4Frd8%} z1^lKA0T@i@{l-}o#t&ORFRI>+hebhiO z1gU-3>496IOM&LKsk75(&RVSM07~W)>O*5w**>|@Gu%KT`x8R8)Iu;K zZJ6YBCIwkwhEe^dPQM{8B7DtHKKYi49y7Tm^Pgx>Np9_W^D*fAgE*uo!>di2_1loJ zncv1ynrACxnPF+Wo?SAPy(DJ)K2+GuioClCrN_(XK!h#{&JnM!#s`Ps-5m(ryM!9L z`bS`T3D;%MHM8AH>e1 zOTs5_?-P0+B1AKn9wiOY0~x`!F$y0&#>E5H=N%Z8oDv*4io9tcQc9jrrY)I`GtosfTR@yRsj+ zK2ttEGLV8!`ppU&jS?3ogBS<<`rwVVUi@|2)d#~{{|AvM=CY?XW-(cKvE7;8dk9Z? zGL^cR+BSiU_t$0}YiPXpX*%xcy31>TA5g8!^e-#7p<)1o&c9?M5w$C(3AK{X+?9}| z*!J+GLZYYAg^6w?eXq99mFbPkDCGszU)y|S`2)!AV!3!foB0P>OGL}>eK{wI+7taI zZnCC|R7(5)=)HEzZ9Bp1cKf(ue3=8ci2p2x;QRfnVe<(OkCKI!&{~#n_l*e7yYpzX zXRGK*==iLSl#uRab#|4s_DTv**`f<+d$7ArEk8|HnranF*Yt-yLH_hc#olFabYNGT ztjoZ?A~}v4=+9)5cX8IzraOVl0;#cEQI2ubCYIsC^HzfssA!KRrf0vJ3TKXT>L%>; z-Z^>pvsFQH3(5QZz3D4}HRz2YYw#aBvJ)_M=E9w#;?Ff?rBl;UFE`rXa{CwWe>cOO zJF|#RPOl6D5XjgmfvM^-Qh8&$5apIkl9dq#@aPJDOyl5t=X{0+KZc`Gg3x8tW85s8&Vm>K{23r(Bq+CD;nah@`Q*uyJhwGNU_8hg8(#+Yg;MZ*N8&;7 zk^GX9qVo9hZ6E|f0#;1vO9Awz`w9V@j#ZXF*6+|t-G%-S0`jKmL9>ME60DC=tvX7{1lt)DjtW1QkA#yCGqyVhcOCN#hmXe zV*T%Uh;`BsTw7*5=c;S~MAzdPno#pbeuxQ#>DRRIK?Z>xoy1i4e~O^B7SWcFX6ywDzFb6J(d&lyz#Jy2Zo__D%y<=G$47zET zd%I$LUWlEIRTgQzJt-2iz^Bw3?->GQx6HnHKDx6q11RCDEXlQAjkNZgXvH5R>V-dq zl-^>82Fy{wGGY|3p6|9R%^}hkkT^4tdT8Voj11>^I#KRpzF@FTW3&`%psgPiwq>cU zy+C?~GjUK3v}|vOGhbj}hsv-9))FpQz!YI}ga*3%Wa3+vB#5n*yE8Gr!4;{G0O8bi z#QwDA%KsF$B|mjjxFe6BJbCh_iOCiAP`SPX z=Wh43GyWkBl@^{q-j{K5V`GD??N^o|2(pJ}==b}TrLD9|icj$Z0xsw;H}&aba3_#k zJbt33b+L&Y5Ut2c$IyWQ%zBPhk}3W4$9QMIm~!1CHvx|sx*}AE0PU`SQDtpA zI~}STc0c$W^Ug-&7^C}dtKm@>>z5r=I*1hMpDRkc3*VVGgOAZ1yvQ#amj2=T@Z1`I zmGhwrfN{>4DHX`DF@tKx>+V6Xv&0&!>*oi31V8DNZyveWh-~;wQ#Iga9$`l3s_Grb zT9tSjnG>8N1>LDX@Rc%3s@4|s3&C8a<80d7c490ik{aU2!oK*hK9tj0TWZmNpV1}% z1r{5{7-zPA4Re;)#`WxxS?e8>uI^5dOj!He{E9SyMDti7gJ9tM_|0^WG1c<(JO@_=Iz zZ-GZR&XItCI*0?AmgTtpnt$062 zJy|`A93drKm$OSGv-*xN{&7Z-Oz9TT=gy4izn@(9%tfmIMb}@5+gUj}3@lEzC#pf9 z3t7kt8sE%YHTPkak*sZ6wT#MgeB$HUtU*7Pj*&MZywu5goGiUpAfNKxSeWAaf3+T{ zu&BAf#mV#CFn=H?hJoemZGE}Kug5&(yyFsn`QALv&UQHY6l+fSt)-mjekoi+tUIMC zRw&5fV}QVA;EB#$YERqh3N!q#TR@rglvg5XDGkaiIO7ejGhUBYuy8=53B0Y{hvDaV1h+s?=J z*vs0c!Sg>BxxZ+s4W;4J1T-uV-JVIMcCk7~PWC4rPoR4rTVyh>Wl3*i$`^``VZ{Rw z(x9G$C4v*`G$W%t&;obol7p>Pxf;OEG&SPGpcz?$;{^8L z1JHi}+vx!1kt36nla4BVuQC`Z)L3B=W4C|gJeFn*9Hk$+LT7UK&}(jS;l)LI^lOv4 zV!o$jX}y;Vd+OYWXE?#&WR7y(?SVMHmUfMPe1&Rt`4sBX0_zld@_o$m!E79pv>3?- z#PkhC8qSXH*8eglFuTu4A9n6k?&|(k=lnM1tkU{)HOnskZVPPQl|ZJ;&$#)r`n+{G z1TTT&k@^1i@Yse7Q=@>!)e1NO+Z)=KzV^t<5p?6Qf18kG_SFTW%(^_OzMMQwb>$V~ zU!QN@JVZfptC=E&`R0?U7aoTU#aN{J6Ikei-z{-(`+Yuj@8-#qbYV$uR8ogdD^N*& zIAHOmpKEXXzVSu&sF0{jx5nBxR?WADJab$>4Mw#@7Zv9_c?QdHRg43@HL-hEPu-hN+@SB5D`Brz*XL>E zYA%^$)ZtgJ=2^xRGmtJk?HGcf3g?jK{DVq+H`ex zFx6})TFVqiWWTteMF3I7MEK0qoUf9KPr{;e={Sb_!i5WP0*gv$661*==U0flP65Sy zWo2b!bF(3MxV+gu0a&#p`IK2I!J2|=E_nbY02pV$G5~xm{F1J_uN(XME27@pKqvuU zuW>N$1|LaH5KhI%3$2_X8~oJ=oUe7odcW02o5A3?3*t_V&z7Lo#L2_+=_tr_w2Z0E z$=h4_i+UEwu75$S;`Dm~y(Z|B!F+?W26!8LHnFb$0V+h(}z4YaT( z*w7*GOk)G^32pMp+ByYA?vbeLEWEj<$}?hO;-~5QUay?lVlW(bp1A1hvS?ed^LZZM zFgzN1>f=F8Vz)RbJ1IEPnbxcOYp!EI(Ohe965^QS;QdXJmIodcOs{=(#B^Tzyu6Ve z&^cEarXJppceR-=%D_i0AdTj?SWu}${X^?HRr)EJyX&_)SI*p}ciO6r^i?(#~V3xVEoQzC+5K||TeP6~$e&oKkQmcz6cz}eSpS{+c zoVaFXvs=uNuyuhn6ToVR`g$fCqVZ9dDAg6#7L!b( zkq52m{R!#QZE4StXncYRCuE-^$1X7UxRs0lTA&KzX6CLs9G4oh00n2WJQoj7O=rzI zS1|Ch-&R&iLTz6MA*Zpi(O{t`qo@nKwH4jn#h{Kq@C%5PQvhy|cgmM9m&xM;$RKJd zJ2c;*FVVwg1v@2H+}RI6vJam=)d30ogoW#eBec}wH5V{rUZ{s|Kj^k zGM-2ZET7WX-j-5yH-+tHaWhz{=Z(CJMwa+*~z};f>h=M!co3G8sJW5uYcdECC zE*D!xe?^@elL-|m_WF(tmkLZsqrE~m-BNAlRW(b4>kOWj$;okE9JCK!T@WGtELoFL zRnfZ_-0>`xmeELkuz&HvNPEx^LQ#;7<~|K^%!eIkmWfhK8TQ%aq z@$se1BHsI~eD!RY2a-uCDCioafPcTRw}OR<*v1BsO-< zXo*1oIS_fVdkj^aN+KMHWRx2Ev+aiP=dDnVV=|GoPY7%;h9_4h?|(p}a+Z>ygny^g zx;1-$h{x{8kt0JTc6OaY&!0a(boj8gZg@-#lU{{Oj!5g^U{qV&wEcmO~JAwNx4$|kN>lUw74`NGsa;~L)&W^m95H2bquEq4Vbx=au za|WrdcFRZZQuVTa;F!En#dTI!dW~9vtQaP%VRU-ss|i2;7dksSmA94AoZz!DzOZOm za`ceVra&)d<~OFQp(JCgsMyOYmyee(B|Rxj*yp*hr%%C^mNLvajW87zJ-N@y4=a9r z?#7iqj(l4ZV-{LjulUJMku>DFAHyiqH-cg%U{L%<9h!KTIi=UBnPWt|g*798EMNT) zHg=mfl_3iQkNART`&aXpYe{TLjGv&n`%VU(*?i0&Eho(aQ?ka2g7ic~{aOSV=+B&~ zEwt#K8}9$w?hAsp$6W94oBZuq5fwdo=$R4WJO>%SqOy0?i$PSE+{dKSOe@b1ybj$D z@rpFE;Mn!5VG6{*h#|j)NEFO#cqfw(_9J=y_`kH4f`LA9e=QX+=zObIZGd2AI@mI6 zysq&FIe9nyPV(Ttq%?{XYvZ)S&QvH%<)G-Xo=bmzhw4(auNUFpREx0A(5bp?{O3x^ zU;p{WKfn9WqodOt-02nKCRtcm;LK>tHQ`82Io-woE?mD_xOE%lnr(D*mNV&l`G6qS zs>^}DsWe&-7nwZtoEYahm{M6UnhPS(F&V&2@3EL5>PM?bQia+Xw-AB9V zMV^*wW=R}#YRw__t&l##s~%q&#HQ}riWgGDmXN)7;;Tiq$U~#al5HGGz4HkH`B6VedvRQh_&=@kugkNOm*?H^BdF5ARsj~a%;7b-nGM<;*wJ+nXFho6 zy-Y$tRsl6j?($T`*^YX?jzvk}TPQ8Yw`NCf%f}Udd1$8KBWfGC)Uf?^X+U56&>O~} zGO-H5n4`Y}kGQVxqOts?R>-sW+VO;8cZVLQ!_MY5gZz$1fxtA z)lK)yjMYiybVrJ!328wa^L#N;(yKoH?7o|R@s@FYb7G7+;V-@#?pNu2ew?xJB|=RN zp2Rhmjl5DCy&)%qd8Cm-d>ZGmvN}q)3eSm!DQlJ8!1u~&{cBCjf%~5IhPB58rU{oZ5E^w0&b*{ z6pDhm>g%yZ9!0-1Q4>v1_9DB9sz-u^qir*NQuSr}_?NaO0s&IB~gyHD^?fxS^#&@R>u@@G6 z0u254{;3Ha2vPXs9`*tRdcT&Jl>q-9ZHDK;?}vYy+wxD7S38vJ%ZsyXF57~*p~8`C z@wpAbyVA8~` zC^TlFOX_Vw+f!JS!pcXLXm$99FlW|!l>B8N-+IpGUC@6W}LosEd-!Z`97 z)=LNcJi+=+r`rcsMq1eiFMekEg6Y85-741VZ-brhDcRB(yCe?1dbYFp-s8{nTv@t& z@IsHJr0&S7^LFOu^bw;aqqZGvRAEA}tIg+1=;MqCy=Zu&__fpr7TR>aPkFv8^&su< z^M)#W(F-z0YuN05O#Yf#%luFJ_gaK=*dLW|3zE+uj(H3>gX-yztllSHP5rucmQLj`ywE4jG&gmBuvp!z6i&j}WS=gjss zRwfnCYFYf2Xx(mFsXNM*%V~uFd_#d%us(Z7A&^#evLIsx+p*KXEd(PUks-7ju*SGA zTArl8$Nb)=^87$f-cR0sHr03vU>u_eI#mBPk_9vT`rb-u%xd4${k>tefa*!xf?`e@ z$d4x{T;D%bG?;G3o~-z_>CrCXIDI-_x$?Po%EVeQ(sbMI$oYO(P6UbQLT9soKvR6< zJYsC%_lebaX zHD$y8Xoq}Vf9bECJ&l!TB<7d%!-*23cO-fj+FenaiX|U{en_rfPx&<6LYh*m_Q%#D z@h8w+c0;!j^;MgfNzv<1a%P#LElIgH;?MXez2c7knhGd*L+1wgwgm>?FgnUJy7Ilsy2H})(?zcU##ZWgDH{k5Zd_B9Gs;kV}fmLGyb zo0-}5?a7Gum5EgWMZsc|sJq63Y=0$dm2Y-y+v^!A5dx}wat8^+mz+GR>dt&i#kXtj zi{LixtoP4%mpMy#NmYuviGQpa#rG9Uc^}j zJ^ecPWbgSB9(%z;EK0YzqUBfa*09P%aCR9cs!P|0#9)+33XYYk$wXF&z^@A{i65axodixAmB6v{*#ktxvLd8orh5chQl{~e58R7X*Q6DtGL)2D;5gKE|4cJ%-;f!DxN$E3pf$h*4c^^LUrn!T9s}$ zHfGylw&KXxB@uJ%R`5|AzpwBT8%~k0Ji4wXF5!klv%iSg*BQCd9tv`%kYqs~4IUmY zyD}Cob+zxYz!8~ms>x=*<`|;W)cOU>eEylF$Vj~*yRYV9auc)~KFdO;^NxjkYr_Q(52)sYni@78(j0wl4?w-6Uxs%jj|_SeWXK+!D~ZLomzQ zvM}GDK8u|8aye||NlWqhj={Ucp#I2V>Agh#-*)RBQ~FSGj`ywiH)#xw3vY1V)(y9};3oM0#Ku66}&y^l1- z3RL%H8hC!)Cba5q5eY_taU3+)zxLl0Kh*adhN34wWwLgh6cCmU2vqj$$A%E?7I91$ zOLn-&mRTU)_Q(~?5_HIGp_m3dAn^TR^5Ny?MU>^TrOfDmw^;q)1+F@KQihLXK6q1VNj3qT1k zS(gh6;jdbisRNnP;DRZ+J>s}UJ3tkfdaSB?{C?IGwJ?YImX9$EDjV!q|IVXE#%tfs z!-!;Xk8-Kl@BfkZ z)^Sm_ZPze%A{L0Giim)8NvjB`bW4Lsmvl2K3JL-$A{|PMFm(4Q-AGG!ch@lU9b0d& z=YH?!`M%%xzVG$(4=x-BX76*K=W(pHjxaRsHR`jaaZv7soOe!hCx>(lIm1j0$;Ca*88Fj~6dM^y~1)r_5$IXogYtsYE zD+MAwKg-*TyK*P<6+HA45L)#zL~ViXXjgYPvA=&q2-{la1vz%T??i2FZO+qQeBQi!7Y|_+ME0{C zZ)Ky>PzOrDlND$^bt(-1ymkx>eP@84s5x3#6C=ARudegX%j<$HYc8!6_tXBN*+@+H z+)d`@ih;GseSZhcpTlsJH?e)@!Gi~S0A%o`HkvcPrw~g}e9`uulKOVvUo8MbhQzj8 z{DWF@pV%H)LuFO@Ci0txwC156?#8^*6wu^}m2ccb(e0_%gY6N~B^zdqh%E znR&$OYN!Xqge^?%?N#1aaCxi$@oQ|#D|VD8TSLjF>HK!S3W4u)>h6+J3BD;zK2S_ir&CE8BW@f(#H$Ee}SJl_wnRIN+zPSCV1z#Lt!&R7IJ!T6& z?aq@g_<0+;srby@hfZQq6m9jGBmCk!8;L#FL=48V`TMcyrpOlYy)O{@=Ntj;RF&_W zWheyV{>obVb*fKE(8X!WY*V}OiC(cDnSW^2_M3C6G6!pFYPewR3B;8EJK9Y)YRv5K zZB1FD_9)xY-9pt*M#^6!Gr8~IpH0&$EQd_ConbE$B2a1{17D9(kPqEWRRwiyZHTUC z9ej6=#SKm^_!kr?6bd{XLqWp}Yp)XG#3mX-ej7re2%Z%?$IejcHHLD;qtRm{BdvN- zAX;z%LCtOZ>9df3d6ASf({0NNIkPycA%+Zr*N>i|I{qEJr!Rx2FDMa-KW{;BPPTr< zQTWv%tYiwjeMed)X?eobz|H~D<$!=GE-ULJtaVvK=kQASf77U;hY33aw2DySu%=e!U3@Fk0x% zeMipm5+FFli_)6}i@$l^oCm#IP*Cs;qSKJgR>yN^!gWkWO8CKGu-!T}XPm4?+Y6RR zsh);P$#m<^s305PR1D$Aym47&U623EeyREX<^>$-EixZ(22dbU&#?J&na*>A8ThmX})C{SCV}GdHpOC zKIpt$O{{#ZQE|(+3|9A;jGFgbX%~(aX2%)s56bUhRG5l@FETHM~g zAbig}`c#}^r~9_HvX0%r>&VlaQ36Jeg`10_cD)Qg(WKafm;+a^@$a2a<*^YjZ_+i2 zhAz~Y;q{~%nnv@bj8D%XTWk$Bb(eNHNp5Jki3RC;RrkC5bqRE?XNHU8+-}?7csnAz z*O!B9S2tBQH_w|txkgf5QPnwLcS~~<<&h>lR#Z^(N!-9lK}Y)`0B0+oS2){zHD(uT)}z#gJdYkW-qbnT&Jgh4d-c>fF!1qVzA{p1H%On zwFBZ0Ywa4WGvgSKysko^nkQf-MhZFw@C`b9!ECm?j*gD?4m_1t;Y9%9{u#%9x-r{H z1P_lFtilT368Y?}b^sSkFrY}q^`U3j-rIxenF^S$e;_LhzLC5}RbZlPUOJ?F@knad zHtSvRT9S_7^@j=W4^Srj+Wn=h13Ny5c@aNhuq<)eok72S`<4Sk19-^#LSOg$Za}z@ zM-wfli^5)pUmf6O2voce>=pP+c%Qyp4i9&s?ZioVAF!_P@}1Jqji8`}I1~e*DG*DI z#&8}#s8S&M_F&w?)3=kVF<__B75)w%S2BMpK1xK(TElt#Y!;WSLTHTX?a!|63=74l zzt{B;i!1zE**O2QPjYqH&fI0YFn4-GU^I^qGvZ#VIv`~pGMbSLJLtgNgIwXgcMmwjA0ZlXpW>j#Mf2L#{Rc=&{awZZ>{ zLGzeN^;NU0l;y7MhuglAbF{7;!W;p$js2N327}8!r)N$r#neg7iRjh4aePuy{RzxS z^;T*3R{?T!Lak#|VQ7dr;57EW(eO{>r2 zF~Alr@t$;Vc1BqOqU9j}utJ{C4!j+sFK-=ZOLdE(J&*T_^ExEl>rL6#WJznVAZlI3 zJRU-(sG;Dkk!mE_=GnWKuRqgh+?X6w$eC``o~JUf@Kc(j6O}m1r-A~+`t=v##nHH! zt~Mw=H1e18O?L!8Zf5WQ(s}8w`9W8smReui-2X#FCt)M0LwQAMRD|dTYiSvEcj%y( zi(@}sZgZ)Iskcd_S=r`aOd@_`D zxG2Ze`nJ`-P?B5OMg$V+x3d!OothS}-5rdEudBoNMcMQ7S0s6Pt^@M{t;`u3q2tfd zXa-PZK7smmBYFwY6oB$TKV$?R+K3(D&!0avroh*-CF39x=Z0I0A&htHqEekmpMm^n zWo0EkF;P)NzOzp-JU0rEneTwW>ER@s)a*lw&}jAUSWy^a_HXsi=^`p=1C_ ztwfO`9^vr#!Vt=#jubNw@b5&t>UG^$j+i+t@B}4ROMub|Ibp%y2eHoqty2Wdx%Z}M zhiiJgeEIT$oSZfLVA+;*+>&tntc=~-a)PC^t}J!mL+l7iT)WwAyC)=ZiFiJu*E^TT zjdk_P>7))kduIO?UcFxt1c&;uH(oU87g#HF!S+u$GphlEx z;U_2cq%MIX|2W!RCE92%8%&Ye3~5BgzZ(HMR^i31n?B@D7?s0Hlh%ixD- z)Ig~d4;jo%0#UL%AcYGC+a+)5uw2%?=_2wfR zn@otn&D5wlc<4|tu!9M*ji5A9?uA`nFAD#yutQSe;>22usM@t_*BS%%1Dg}(l|)c` z>ApJYYR&jfTEmSjb>CGt?Y0(bB~_gHy@lH+({9r4+r=yDXuz}Z)qNi;$Ak`8;*G?b z)p_MMw-jwhWR2>MQBn`ZJX~FI7FYd^g^uQp&ddg&F>cu`L*d&(S_8|^9JpL%W33w` zCM-`4E=H@09%>uhH%{>YSmA_af|1GMhWYHuRIDQqYI{>56*>H>y^;ee+wF)=-}!??%=!4bM14il=J zFuLmN>$|;$B{ac!Vu9_1?^R*0wO!tPfQKP31)lu5aXY|w%Jw%k({xIG(Z(`SFscTD z^)E&u1s4mvAjl=lYCu3W;Jr?%8hG^BF(ug7VEck*{7w^>cNFx4O+)>e zBNG!aLH(&`BkR6hvYex%V^rtj=i_tkm)y!c0vp$A9t!7qu{yo`=8|@Dcx1vSvFK%T zy`q+uxz4Es@xpJ^R5lJ_FB`M+_~|z{ zC;gpk*WcbduKz`I0?$eBnj;=a^qX#4XnI3au{*}7#mIWZ>;|fvR!(1HJyc?<2O^MR znLRGbF`Dav+I`S1eZ=x3k{u}oz-Y5)XseJT7Ry;MP%we*ohbnU{ zvu-#Wu+&HNTvbVY^hJ|V_k`n>3;*FhX42Kc#AD|sXEVgF>`S(-2k)pX7#sK+%u2mKaevaA2AS)CTr2q*Q})t~)%QLd5+x zd;_fRGQbrMrngNWkGREw$b=JxcZYeOnVxJ065;i;070hN4FbUQEiKIm^3n^6BL;I3 zcn2I8Y`_!`ww0N#R7$w=-fzv7IFOwRcFX|So8V4Q4`H!&oRm*wbP{JdV2aEWw<4LG zoNT7eXWUHXxY&=3d_(2PN2LZv{OZ}>OISiQICVi9tH10cO#xntF*wT&!FEcb#q`WZ zQcB8G_!@3j=?^c>8@)E@K;e8Z!Y+h88o@sJzm4p9e(Y|g<;JA)$AZAuu>;W5YoW=3`D1MgaOka54-FwZl^On+=YMk%Mo+c~${#rSs$% z4CbMf6y5dfl_?L64Jj_4?2$#S9CRa9VPxfNzfQto&Mz5UN-}5Ixyy)W!ioNGs-P#s=yi` z%UV$679pgD?v&Shc3I9E6`A60(H8R%#0BG*|6}nM(Eoapl`q1|$_l~Hohf#ZDQ=cp znGzTg5z(`AG!XFj`y9Oj0rg0MtzP{xC~09noS{Jiv3bDj(CHOs2m zHyWuBR>O(S4Utf3NAY*`j3}!JeH-oe$yfX-^N7%(} z_F0#J5Bg{C&YRI9c%zuP;pzLQ2zie0CcEf=KWmkiw_s=w-h5bM21rrW_zneN)A*o|A zYGS3mNu~QQ+TBJFneBjk$J=1Nzq?^S{qw~o9p^7yE1(oTN}Nov&PPYP`i6#*N7R_e zgl*c6aal$L!LFCmoC$qmTyrKvwDYLZD}!7p5{r|>&XRrn%^{8Ql0uydN;7HDFM_sy z1l+ravG_Uc@slSxJ#6SA8)VT2dN)_5f@~Zde^hiv;XWSh-R>k1clWj8m1^D&WQUQi z>6k7@nx!6PklZVIJ=e+`nUCFjCpU%l-t{)VG;mTUS|}z3!!NXEI7Ya_f!-&~&-0i4 z77>8*%rV2FhcjX_AH11d1ASh-dIrU=Yx``U#IA(De``4B>$7|zCD(p4&B|I@RNz@S z4xo&2{U=5cR@-bZ>lX)KLa~O0VDWDn0Xr0Rh=ecS191=$q9m^>ZRilJ;sTl)*;W-M zo=#pi?AM}?KJQSyFskA~)GE$+5p4?0+v>G4SP?VD>HVY`X>lOOzHUw*WQ- zibZhuyou@Q$4jDavemreVfT(V)OGLu{wSZ~;^@wtIpZAzOZZvNSUr}pHSR%)v+q=y zmc$#~D@)CuG!QG9ohH+QE$28$K6< zL*&0CJ<+SAMu_`JDzn-3adbM@NV7f&C1{^-d2SyBo`LLLq}k1>9=5?m5Lt{WUZ`cN z6<1hy=Uh^`rx~V0YMG3yQNxJ}9EQoZMLuo;>XFpG)mbsQk;TzR8dhX2?wOy=QV|8! z4Q%%D=lo(fI1{(@h#YS$OpR$%f+%m8&B=c_Q$Oey6QNE{7cF+@xKB%TY)bM=5Kto8 zgOz6vnBbP!H-q!i0^hZxT*Ev0x+@oZ%DoSXwwAX?jIJu~IxUJf{_?T0vE@}S8*OO1Us$7Q4v;0H5(=s; z9zr$uzP1KJHicERm_25y`&OKylD|g6*GCxrjSEtz_fgr^A^&W zukrv?g;dW$eP-*_E#h7Ga>*>323oL+Jo-%;MX8c0qWq}Xu1U+6>&ya$S!>V{3apozV^dQ2U zS`&%;64~~1XopeWe7td**0s&Ie4s5A!ml*U;}@=s?Vd>umb?FqTu^SF57W$7lx;wg zC=poQ+4CB1DO$wC%d!S0MJx}0@IWX)mLwljv_!LNm ztZt(-$Iy0jj@z!zHp}+cyoxuD=xzNbL(%%oR^|;u&L+oT;jE16P9Avgz*EECbDGcE z9Sqt2Uh%yko*lT{m6p;8iXS2(q^zu1o12@bHyju|Eet1zk2#yIp9Yd{hMc_IT!|o| z{P@1#G4{kWwG%J<1zdJq zcHgV@FO&Tso}uY|i|iw?NCRuePBTX|u?#&X^uFX@;fc>kc|~U|$1Y%Y)}`vmIGJvO zuJd)t*&e21j8I*ih;~)?>24KvQv3+hU)x*be$p`CnN|{L| zb-`Tba@#&DpqCUNcnNahIi~Bx=9;%~`VQp8`F{ObUeWi=^OT%*+GjF#Dq~74kzYDT zK8~{8L%0MK`gsMn%xr4KedJ=YpjWnerK z>-cR8N7?fqpZ#A45Da?V2Q239-R>9}3MwjuNt)Mj^!ff?LVF4z)r{!<9#0E4^&|Yb zsy@3#ZgGl9U{LZUA%sny%e1q+z2;d!ke68M32a-vEzarzwmR}erYI-c{dxK7+=Vj0 z;&PV1l?o^W7xbFhnuT$Q3V6WN7uF}4z~uzc<6pSOV>!zyVb@FE2fd|ludCke)(S8? zCV%mwcoHeLaD}G>j~`@_2PuJ^?$g!6PXEiIM8MSAA3seG@|e;jNn#AjQl$)y5%jBR zK57=tkDI?_ZKsB&>2F_<4a)q_&&(40M>Pcj%uvT50T4u~s#1?0JxV<>GHC*89T1jE zmhW6#oa6cVtiP`H!ylym*z>I9la5ihYon-(DVtCHn+m%udclseA#(MABRzGj2Ielh zJCn;))X8yHow+nPgpI#S4J-yTrFJH72Rpo|b^M%KqLO5GL(Z?>e6;L+&=1fG-eN8O zg69dr=F_suvR>DEV5;m+oQpa|VvUGp(O3&!iy$3P4z*Ralhv3szKK4xy*N#CChFl6 z=N3FK3&CEM0yIZN`uHEt1qI>4>Xr>FJp22`J24C9x5{LZQ}FjucrR4$PXnKS71G@i zlVgN%0c!>*Y|C3(0*}$UXC#SnFfX*+t0KMBsI2ZPf6)wKk!xF&;m!^dMGO))n7k7!sd4BNWyE(y&&s~^xu3~fE zxA}6y#tQgm`yiHB%2>p}acA;ZNci`cAKGsfRo4vog?Z8H{YXZgn{GdOnER46TxedUMGL2}dEp!>GnMWuG9T&ng8{;>!FO`1c^&ymw;@+cH{b zYv8?E;1}qrA=S;hU^-BaRSnObSbEUgvT5LUV%1U{%nF=dIiz+T1$;j#iD`-OMOjMXVZ<9=?&MDCr>_e zbgXV@XjnhO_boXkL!D}}ZN@>`V@aLNKTO>IzH;dlNM*+sN6nlkm;Iq28Yk0E@Yf|W z%k|*BXZj5%Nmz<$BVdCiO?>pWEx2^Up9 zftWW#p&L0eGI&M4^$3*wBjN42eQ)pcf5~@mC2q)Nn%N2ajnbHSKC5?S6)#8I`g*!4MH$(11#ZFFcaae zgINo3pZyFa@C#x}Uh;s6sVO_$whaj5jc)UT92Ks`bWb+=3w1OK@A5Gy2yvYNN`v=p z_eIT4-x>N$EGt_mz<;5asn#k8n^=Er*>NkU zZ#l%P4w$6LT+nN8~{|W@=lv|Y)o#Z^Q638qJU!Np5{@Wy7Bbf3AciR);yIt z2|4^rhrieBsuiEx)*?L}U0hPdw{A!tc${rS4Dhlcl)!umjZpCKu4fKhI#S=z0DR8vOM-2iCDD??m!g-2~ge!fC>`L|8L1Qbo%sMRm2%XnyCuko*VG|GbVDc|2&X) z-f*Bc7>O8npU`rf>1J$8yMWDOr*~Bj3iiNkrT^<;e-j~=K0QaJgh?R?W)dQ=Esj8w*_=!G?J2&M*(gC-TCP3teMJb;@|^3%#IKPcmjk6fIueO z#u{ZkgSiDH$9U8{SA#`eU45ha#6~wAKYy5*&zVfa1~NZb^9|6!CNv-PM#K(-zkX{U zP9{6xRwo*SRw^$b=5jeXb{jo>f3*PI$R32?6P=jJl?Px&{S}&5?=?Y(+4~wA;Z>R8 z6@wed+~uiDtjJ77?(9>N;EAf+w{MS(E4qbzzdD9jozg1RkyN*#S&U|xrwICXH6{%y zbw)oUqB$WB$omoTl%akUCnSQqTjbNbC9{<}uezlBcSqbrtF61cnK6ioM80j$t{`6> zkuDR?%^rNHFLg_+05KY`4y2WHcD{|;>)WWq05%1}i-*siv4c+x9hfbHD3pjta})48 zE^x>Pr}j`T<1+$L&7-USNc=Jh5*8}pwAh%TUtuy(l&fFGa0j||sByV)?hwNP@-D-i z5@`2AJ27h6;Gci*-o3}UX2I$%8(Cb^5FQsr@n!#yg11~UTGaL zXWZ-e*c!+k6@VNGzzsir{(SKL$B&5rzOdV7JkW$lI}OJ96+4Jh0Fz$EiKBw-Ir!-i zJLWdsU&saA3vaymZ?g&wh}{)LsB#(<&PK;|hW58r_qxQPDa@-o!KeiamyOnKcr_-&imh#?M>l2t!od<1#YCf2fZgN6B6ELy{ECw z=`s=F{xKu1HRwBpVpLXF+v0I9kXKdP+&l?0=11-*TuhXU34jX))=QE6cHTfa0WN;% zJA4hVLB?<3?w!AI0Ya^E#U~<|#4w#%4s)t!Xhj+e?WUZ^{P~c32g(_5dvOJ`33MlK z$p-5k3yWL80vQaJxPaz>JWqr-I7Ag=2z)8balL`T&}h$H(=cNG4{dzIE@{G3rsbW@ez zC77|-pK6Zsw#x@t5QGw>E|(uSxq4>cA@@?)QVGZj|r%*!B*ZqDi_V z$Y}Uq&A(H+IB84~kU%r^aF6v6@I+z?7aiS8Ukz`55W@9q(0E$0xRwSKED(%uLmGxQ zt(0#$o95uv(SUP}zUoamXw~voYkf)dX{G z^z7{K!FxqRxh#p32zU}7xOMVt78-yMLyT_kw1B1od|+>t9}pyQji3LrxPo-oSYRLd zvBQViUDr)S2spb~BjyDNzvBS>kq9*JGKz}l*|0cH`RwG4gb*D>^THwhaq#9J{t>?t zh#~^jNZnU)ztb~dCyS(J0An<~u+Wrc*xBH*Of9#IF=h2;>O9ghdx_>D<{+RP>bEgh>UmQ=c2|P)|=I1$K-L4M(P?e2R;U zM;^mtK}=Yhn=>~l#^Q60MpvsJI%=B8Zondk_m=2RXrSJ*VY1-^p2czYN;4}>&WX7b zchqe}WSgxD3bauyt~*C#+s(e+VoMHEQ?FCe>Mwa_Z_7cow!V^X7oEZK7bgJoEiPmg zfsG)Qu8v5k^)+ zkXB(<;lN=6uJ{TcUkEbT2Zn)dAE};^Ie; z-x-RZ#%SR_cGJs{!w4=@uU~gxI)^B$=I6DRxB@^{A_I#rb(@Qmb2f77Kd7HPeR?;8 zR}N*TQWJgui}`szG&lN}_?b(uYWu3yaVDUkXs@HLp`sxW$e^6?xfsFEq$ylRBHa<6WDN&D~oWvcOk2rxQ{|jO)dLr-FYy1dZVH3WQGAa7@)U&>)O66oUSbfDP0wI;WJY4k z|6FMljahJbVJSLmB@-K@AYZ#j;Fp%TdzF%QPY}2x@l>;Z`x>CkpmMqdw8C}p*aTOh zOz<3nDRe*z(#v*zp_V#FPW~X@YT!5t$y8f{l>Pj#5C588hVU;J5P^mYxnxyMP%@8& zgBbV%DXBPcWk?qQE7#ez%L2PMi%CbQPO$oY^X5$~ih%bwJ7&JdsS*g*N6TQTps1pv zGFJo&s0_O)*{^YNrbA_=BL$##0Rf*oVSfuAgY#5W7WRr7sjAtaeizxDrh|~m`*6eF z1_W5$XXc*&|7L|=txP>|n(?5WnL2;`WHToY`$i^X? zDWI)QQ3ti}fK+Y}cyi+VY$pc6av<6r?J{w7Fwq6@_=*E5 znvIg-&a-kx+2I zL0UTw0}AXY)x2&D*g*sDS`R|C6yJ6#;FoU;6b{cWTCQqEsfyYBFcd1?x3SG!%$CvD z0_KP+Lg4pW?stb{19*Z=Xo$Q?Xl22$4$KL+L8qD82p#_Sh9)l+Q4~y9oRyFy{+d$1 zD#{J$%%8`=W-rST$Rzb_d5}lt@4-U*W3qzjqi$>{LL&y zS)TX$@63|6mKKEt+WY^GT$1R2@CQ5LQ?OG%aGD4690mL*i11E;3nTnyS|!dOppFc8 zC{18~p3WZ=^#YJWkOND{AV>b6&r@7Tszx;A3w;qew~2hIshse}MbP;Z>rnpNY-&IX zqtbC3^PqpBl8x={*XQ-sb_hT8kTegpW5@`L~2Keznv2OE{U_j}y)pZ)OnTmGMX_l;;K zs1radRsj|HoJ>nY1Ckzw)D7@BM4VT8qQSI7|JuK36DCO_NS2J&Mh0+>GpAik2di6o zZq1CE0#O6F)E&UyAiv{s8@B+9P0|Sv5|RfbOzB>od9uG?)tD$&yuIQaDf5KaXdh48 z<1nfh6g>3ayPmLIlYTq;bMN7Eo#|gTxe}6kkzw3y^SvHHl$$y03SW2k5b%vp@b#z_ zAdI@*qLS>D!u#fmGi`m-R`GdkJ2kyqkFf;F?DXused(R-oIl;Ya1%f$UdLDnluO~_ zeq@>8Id~LR7Hq;zff$+Ikc(~v^X+F!^$1>rOp-JJRucmi4TA7>Wx`(LxHJg34`e?= zAeKd{BLomn!B@AS_{(VeXV?h*Dl#SC81?1v8*s8+=LyxZ=kbf_>YK8T>+}C}xSAp94f6lQmUHf|c!8!Q@cQ3|^ zO<#Y1_DcMRXqllVBc~zJgHqQtn~nIM3&c#VaPZqTH{vEQW(gV_ePq3^q^#g8p(Z8! zuxxy+o90(_ilOz#=#aX?hRcn*1dhG70KQ&h-B}wWPn<6o#&WeTfCp#2XU2Pdzx-GP zcDzj7Zf9rxE>UtWPow6>)sW8DiTh4BXh(yn&F5Lsd9psTx>Gr>njfQ`-p-E-p+7IZ z+ss$S*EMpVY>=X=?+fepN!ZLz;BQIK-Ig_qwkz0AFa@Ma>7xxpekSHjbwWu4 z?pO~QF%GHj5%OrTmb#h~hdMgUeomXMY^`RhH(NhU(hbf#TKeO8VA!14qrILd%p%&_ z)CnpRhxfMoJUkOBjhRvAD^mK*gtlbcPgW;|`bmuf)01FKX=ZtCl{aGX!9JBGt!4{J zQa6^TD~-f>9PV}n?&Y`CY@DH2s(q5hz)*lrOxFXKvR)>FyC<2_(SWNW$zIxqf|z{^ z)5R$?a<7k<`#)jV5v2^(nvBe}a&T}MpPWR}M76ZjmqZwE-Xywm{d)YvolB&oU*vBt zZ}JktYCb|62eP)tE-7^ThoB%aFfp=ma>{@2We>Swdec9@z&J$tqJd>@OyO++WBHwz zuTb(@WE2ZYE$o0GEh1IZ(UDhQU*A7Zx6D0^A;$e9bf_?%_5&~`Nzg@sF!4w%ARIZ? zLxVXe0Fmmz`MNkwk=scKP_U<<89=&b-z0vM*v@$np3YYMj>Bt@l$&MduZ| z9;-gLZcHce{K(z2het^2ySr@}RN&%*b1sx1hlv1c0w@I$%;udLe!DMWJF3XU=DtC5 z?!cE@;hTK!LMk5vQrkLUAZ#?QBoFJ?e@#$4D ziQ=CRbPBglDB<&ZTgWt5gmc%4d%f>JySaZ+rI+Gm$^Opt2v$k}tJp_Sp5%weptJ{P zS;h7`7nkj~2EbY1O-x#>1UdmF?G#PB#%{&BK`UwGxheG?eF&l-Ks7b>?k`Ml zm@e!P<;A4u)uUV{p0u&*-qTiT7A@Pkl*XENdj40Cv4rr=Bpl@z{+O8G8`*Kbk3UTG z3}zk4EOna)r3%*r&FUicl`d0qHIB#*1DwnOF`O_#+u7NPWH*B~_M!J-VRaBO`%p$k zJW2Wd*|SJh4mC>V2lG-85W;yU4GJm>e{b;iMRefLo@I27Awx|Z21h|Dj>O`DKGSjj z*IkeUA;2e+y9p;&?Sw6GP`Vx)q;V3R*k6~)$f_as6NGM}M9A4y`Q{u9IH})S0)N3L z(9pokRnv8v07|nO5)w$$aQeL9-kNbzt1$)i0? z6c5L9Z0}q`Yh&5FM2g?!wy>J5PuQnBMyc~Tap(${zR$J0WS6we84U(2# zt}3}_X>}vTX}fzp#Ky@X=u>{H$ioLWc`RS>v z2bw3elSNT2tkrtl%NxVcXT`kGj^I>A3hr zwZ$`#tLM@-tEsWq_D1}aVb*9^zPg~(Ii6e%)uq1CPrLka9x1}2@srm6xpNh*I6?3H z3F*z(b9oHL=yt}hQBAAX7tOcw2mJja7j5G@Q)Y? z5+=?6c9>9XuTL{As;`or!+SPT{E9e^5=H(mhDLj0dFU~?OpSmpcYJ)D-MlMB!n_T7 z!C;+I*AF2fl2FKP4Z5)bnh>mN?ezkQ@O=+1$5H`pns5A%^ zmT+{;Go3Zibb;4ox~v&xFBmfi0G|P|lzR2%;D<~lx3DqfSr1wDI)Qzk(DL^~h+G!r-+=X!feh~iX5nd|CQCGQ zPZCkBSd~wx5ASlsjM~4B9s3Xy;$~AbUVLP=JJoicT2*byk^j$ zx$`WE1Sgupgx7f_CD&N9g!$=@Scn$Jq}FT=CoNxzW8W&;hozrr_jI58_SLcj{sPW? z^b6xF^yzkl(WzgqPe|AT+{TN>BK(Rmlwsm7k#&X7A#>O8&T%WFL?upiY{{!rJ3a3f zIgUTi`*JRk-1`0wvAVggrnlpapHJ~YHK2dyMc#W&$@XfO@y{loJcEvTqMUyK1aDA* z-HVxaJ`*EKd>g$VPfjy*397Mv?SX0X)_?{_t)YN%Yi3W$weFd@+pZk1GfQWUspMUp zId7HG9&3&=vBza>d*1Btx3RM^yx)zV$9`N)+2zT_U)kTWVV-SEKEwzOL!-tfnK_At znDx(~&k=XsnfDTztYkkgE>YUxQ-z1DIySod!{>Gwk&5Z%v$)h>TZb+bJ6vl-dwOTE z(~`p1PpZmjEvQWUF31Vp1?G(i6lGU{Oo00d<8L4(=L1Xc>wy0Pm2(5Kx&h%$2}mqu zf`BTJ+lEVMFU1j!qD=phuk9peCg6>n_FY24?S@kPLThK zX;&~#tPm9XzA7M6+FQl()f}b#i0#_Jdot~k4Y#DZ==1G})y!{fV1sl)6 zr<|8*t@evrH}zt6bB!ZgJr%i2X#2IaTKQc{Wu$ice1}?bohlRL{?!swExHq84-LoG zw))mIHPjb7{#kLp)}1d%-OA94N-{F3-tgo>x#N`{!gp8JN=))`5!SkLCx6quSOEr{ z%+vnaSM#|5c&7a5?Qq^zi^=?8%ubaaObwX9|{ij%=15MXa5 zG@2ghNthYL?WgY(UnV7et%^zb_N{i*wH*BaoLyYhwXJVo8`aQ+QUgR6bxo!GY#0qQ zQB9lLr&)6X$7q2s2WYhlhSZt?h+E#+m;gESv3ctj;CX?5US2QR3`>Uzx5Suz9F7RV zeSaLA6J(`y%o40fy%_jI@6P1+EtSjxpKRkb)Qm?BH>V=WaN6><5{FAf$@P9Vc4}Wu zGG0aG>mJ_~pbnqq{G{XQznYwU^X`}R>duAUtq)APbM0SGwY@!~_op^kU0J#NMIQds z+ZD@-jr~5^%hmjom%DsZmUr9Lm8-yxmY1rx)udSZ#vv~2i^&+K?ZsNzGLJmD+=05~{Whc|Gec(n?p?(jP4LTpa*%Pz-AE5->;GlKX00=v}%ZEc%{sbt%O%H3Us zj&?St9aq(olB_;irF#Al!Edw+$weDDYMUA74EbM4*l)@t#m3=;Jl@wm{f1#WzMe+N zK&LU)5csAfwY9Bw3(HbVqcpIId4U~Ml1ghK$w@o;C;Vd*RV%v2+{n*&`-sgj@VlJus?Yx>EtR#X=D+P)8-ZpIbzBlm53}FV zaiQOj)8N$%gV`|<69L?6Hmj8_39THAOWaRYjRj>&-zr}D0jj?lJKry@ieKGC`K_|kE5egwKsq7RvJ1k`6|XDS8>Bmi!k zYd^}u+AYtd^(7$RW@cvcOu={dzl{y8Zf+(5BlU4Ct0wg#GN1r6$T9&54nBkP{RN?s zr)v1!EGS_i{Rnm`_MSDPe6C%)&|T zpVD6(1`2l~nbgm^N*_%wb#0S$U9lEDSuZ88ro!FJ8q`^%bso&(`IwCErw8zIx`AgcS`i$KKv72s2R|izVcBa zv;pc>S+*g?J;4S!^k!UV=U2Yzos~+R-JC$agx9-AZ3HkPiK8cZ9vUSY=PHY$*l1mI zW@_7X%Xnncl@n4&#)BO^qgp-r?kV*Rsp0}`?Hm#`=N^}d7PWucl0%>QiP6@I+xHYG zjOi7icJ+#0VAyKKcw9n*vDpIM#!hR0Hwso)!_!Lf9OKTxCP0GPvn{F2&)^9a}!=Un{KW%2M2pI?%Ng;^K`E%App(c z5ec{0A?*~A=zFllfZ$f+~DB&0D_Q!-d>GZ&x;dq!d<7Py>LNH zODocpVi3MN=)NWcI+#E$t3p2*L<$Gie7XD-TWDFI0;Uu0Q@cov`|1gLdU_&vz!Aw$ zA4M!6?tmZyF+`0`O!NlasqgX*Pr$e#b3mU}v44DgyhKewVIc=}L4%%qmQbfP!wjRe z>1LiWC@ui8MDnPJ05}xvAe*!pED3{wu7)SOun>Xchh+MQs&y$-AO0e@+$#G*KW6ID zV|ErT6j{x*bcucPO5wWl*AHA(pz6szPvlh=K!;6B1bMZQuKmU--%2+2mt$_-MDpl% z+b4wlj|S&bF3|l8kzfL)00;=$*7u;rdjvv;Y~z-rdSLsDc!WbrmAs~=6nIv^NLG7O za+*xB;+eIXfZW4wdrGR>cYQvmx>!tm_;0>7?eYF$;}N>M6hair@${g<`J-cVGfkgf zIIUG0r(c+*e07e_L(tyAY#-^x(^L1@D1$r`6IG?)_TC_Qk3X=UDW{WApeSP~Luy+X zX$i~Y#_dR_M4@Bf^Mdw%*;?@Fd)~dMmOL|96gN%|hhK}oOQ;J*WY{MTJ32dhZ%Wl) z5EZFVmayq0fg@75%MykgIR%xr#_ln$BtleT&Iu21j;)4#e%q@@tP?J2H@>sVQN>=W z5w4G*6fwjVXD@FkD`p#{urueSB+S~KDHQNvG6;X_#ZSvR`{D^W_KE_Hq(I#*m>F3ab zUA#q-@t>9^XX_)26FE`)HDLRIT)FzXP94EZUP|5sWybmI{olnBXndMmR_s;*acYpb+bil{Drzu*6 zAAzSlOF>~4Zd3?L2;BBqVBQ={2qLutX1Eqm1PBHC0@NDZ;SQrhWO!UDQTi%3_a~Tn z0}tMwOamX&cmU7Dkh!==39yL@0l*OONJVsY6<}{9n_?7Fz$*0E*}*^_X54?p^FF)I zrfGw89P3L%w5h6nCZ<_X@gc)rR@i+<=uQlJt*qVZCu~38QZFyC8GBsj&JWnd+Y1b~ zU=ML4_ccFQIgNtBLM$_}K-m&qxRvj`kLc!SMCS3lx{rYjmjxXbT(ak`BE>Z7BqKw>&l^Q67t4)hY3S25Uuy5%6#68a z6xeDKUr<0!LPFAh>~K;}I0d>>ESX7$xGLDpU04N9{63LkzIqHZBPQv((3Os)xiuX_ zc8WT+OCuv`COLy&vz1NFZ>GtQP0b9%Cq39WDh!RytDU1GlV$|B=K-IWPEYWt03LgM zgSdoMCJvJTg8${`XED|ano3DYi8;xU@l$_P1~e}EBuRVtu$u5H zmN53>K-@zUN=biN^G)2~n_czx{uYuSyZgieQ7FxpV~j2NledOG&yTt%;`ivwoNW($ zcp)=)*y=Crij~ul@n4AYgi-qQ)MIYky^bsW>Q>64UO!9#fDnyLbZ%#>T3WII z+zCnqZVj$H#>fvLg|~?SB}X(i72w&kP~Q^0*oTT+_ffxZZ+6aqQL-t~eo zp#!tWv=E;T*P@AWEFzSa*Rw3Cv6;*hDS{I zL|VINZnBlWt7jnRtMcDZTM1;Es65AU@Gi-T>I$c9=x}QlV6k;2GFjxVCSWF7L=pji}IAE#Bv~vF}%b4py0FQnXg! z?|qRMU88nW+i^}_{;?|D+Yi>Cg)y`*H})v01POW3B#P12P3mPVVd5cSzntR5T4EJ z_tiQ|1!|vCpjr5ePnHv3{{x02ds;z~(V5aMBemcA+wU{m+waYrE5gD97U8K0{DUBY zq0~vdavV$h_0aFThP>vVKO;AuIh3vpaP!4U!|r%%MZo9p?~k;`fB%%zzH)97`+dv5 zgKz+Zls`zsK*Y+aGm z$X%bzOqq~>%0K=!ik-1%iZSUHSA^4EhU&J9Ymoibuvd;zE6?nX4cnSt4UsxE)0!7# zI`r-(6<`GZ=yI<j&=uemqxqCqT>{ zEcJkMv^od&0Qwo6>9vCuCe;1spTon>pKxT|r2PXI|E2d=>3B<;99(iw`Q;RDCV$$@ zSH(g}<EVWP3TzBHna zjrps&@~sPd{v|ys=l{76R_FgdQn8y$LoFb-P|r0(yhO4E2YQ;Pj1Rnri}cl3k-JF4 z_R%Um_1?%N@WVEnMGKfss}Z!)-hp4iA1lKT^*I~tGvM}Wf>Zc2{MSpQi8dpJ>*3pv z?er*5mF>R@%=5I|-3Rsv_T9oz1)+%+^`;(gNP%d3P5+z10yq?lo19+LzX%9F|f=&%#Oa-3=eb6yq#A zR7exoW8`SZQ~jlu?YA=2(|%v?tl5+BkKcFh<*hcopbcBAx<^A;|9|NE%CM-uwqFz# z13?5tT1AlVP5}jJ327Lld+3lDR6?bc?hpZK>1IR(q?@6U?v81)>K3Ja@4P$zCmkTv;H7Os?WO66TZgZ?&K3=yoNGi|4l*DU8F) zJC#pT6sE)E8(jJp#6`_7f?)-HcDm*r|BJKNRXBY%+F%ajWi_N>yWuvKEd1u@>PfOb z#eW9=^6h`hw|vl|1YMKpxVSbTK;BaItom}$!9H7gjiNTiFRxt>)m5#$4SO_qAyY6i z{3;;>yzr!M>(rvvEzJ7HcTI*z_}mB4Fy614Tuj6@T@|z4naCu6f8UOc!;St}7X0>v zDuuOon4RMhWlG^-7W^I1C*EJQ0xuYPBa%4X`asuUNvTrk=q|n(qmwY2AK+3s1I0Sz zaq&)qEA#FQdS_sy8Jhbvja7cq^OSt23Tcm`wMzsA;*5?Qj{!qAI(vBMlxN6>eE1Ek z!%brJ*-oKYO=@0s_zC-x!3~cTk9y=M^Y*jJf&Oo?q-&U|5=$Fjs9M)Fd4pnMX+m7w&`)E zW9p9gN_h>tBR)?q1(sP)%5%>(i2isd>Z-7Dd1N@ex@`=ZO z?WP{il4!F!{Cn{o&%z?CkckXE#vZahnvs0Ph+FH2ua2|n>WlFYi?^%GdDg5k8^3;H8aiK`lO>eV<#|G>z=XkG78Bbo%X5165^t^pPg4db4d3T z?+lESa`d94B*y0E$OIK;(C04Sc7n+S zR~C2tN|SF0Sbtt5QEhv}i13C8!dHsv@Kw%IJH^|rq)KuX4P-x7OVLZ5dk)1cj<=pR z?DxpWywG``H8d!dBi0w`eOD+R6^XCL;rG_LahqFZy#6GF3!oPZ?@5qKoU=)vuM0r^ z&sZp~mwmjSWn}%ZWa{h^*meQnX1cv!jRR1&iyiT3ggv94&cd2I$9EpYcNEgWDlrbS z4}@d;8$ ztHn*Xy6UxuBGUsW4xc6@#Z>7Zim*)=H~cSfhZe78{;c$P{3&6?MtHNebp_~ zS&KMt$)Mb6!77j-@SEc{L1AkBU5FZamxm)>hHNPFc|(K9M~@#B*a*!tN-{*38&M%G z7(1i$)oqkpb_M=bx?)(lEE2GA#7Rn~f6a)ybav_6VM$5(*(M30+(PVsMYHxDTCG6z zWz8O68xx`B(Izqh84m}ze$rxD_7r{NguCVRRIBgNq0DHnQFr!_E_=yiyDapP9R4i* zbZg4M(AfSyRu?mDaN_k!L9sd(Ub)dVdPGWjR}PdMo7UV$m^icg+9ETx4%)-2?b*xG zmgV5-eT^}0kleEY_~J7+%wOCYPy(HI{N1y><@gS)-pgz1#2Nd$UyQ6T9qhn@9gEBu zX(w5~UOrua(7SACV$y!;dgligcCM$K@uc&lzbizwZR~)}I5>sK6%+_KAdC`4M_z23 zPAsp_4{i?m308p3%|Vvo+Vj)FdOr$q3GO0k-`MKGXbjKv_$%MYadA2-{s`O-Dx)Hg%7=`e*{b?TDii}O!4g-T^P!bCK(*_RbHQa1Q080p^x}euJ zW4iU)VfE&#K^MMv(4~ev{g8HKYfBF;cqgKA6$>sl=&QD=O%YiXns+DduHrx?aKVM| zwWOnEu2f*qhYYFldVQmT zm+N*yY|%6%i(3SbGpf0yCH-^8N%JcaqaAd2W{HE6#T+?{!&WEK(W;{Gs(ChMt~nWh z!7Lxbiy02qmr@xfm3kqaXcCI?w3MieRq0cGpi8=|<@HfL0rUj{LnpbDXz%8C#E^kT zUCcRcZIc|bCY7Vz1OIt0qI}`-LI>CLsm(K;f9Lm!-)+suH*oXYN!HLv)7i<8ehBqm zBUio%^0q1vRD-L7tl%C+&W0jLNp9I9Mbk%>>;2e4L5b(Ja`y+QgkGA$KOvSf z_jexxW6ae%>K?Hie>Q6fF*X$(N3}Ok(_#UT1Rmp*p%G)Y7=ypTmkKq_(b2H~jwyI% zsV_y?dP&vR)_kttfAgj;AK?G*;PH_x(|^#3#G%froQ#G`;J_YY0C%jAsFB`msXJWS zkdOl2+c?q(!1;0x?k)X~x8!_8$q?W13395(6rCxws&d9J4)-^fL^R$T(5jl()ES9i zm^7Jw35zAxP^gufIdi0J=k4XbS4Ciaov}`o*8LUQ-8+aNP?w|Y@7Hmq^=KJlpzQ>9 zb@2YcD!=w%Zz(d_5Bt}II@c4&oGjo(O+SDBWTKS<;1v)$bb&K!H^!gsnVQ-PP~hM= z6H&)-aNyiuG?M+XpeP&P@>v?{YW#I zb&5=t1(H>;wAxB`#Y6dC?nC;3(dIco^zkFGw)dir*`}kZ_iAIYc)N8RuP>z*M1WzX zPrK_c1!jV!*(-acP9%YiRrO-Q+HI8rW4I=3LbNjGaCW5kv*cUEzcmy!c5 z`-{Cv+1%u=k^WQHeGS%6H$?;NYz0755P+#TR@bnWn8T%v&X#CL2hp|)>jJ6; z*-JO>)+);xKJn4snpQS-6PK85D_#B`=@t=#X(m|Dy}-%LC2jQLCBoUUxY*5(whJ&; zE*@#7V%^s90O*-`eC_DwyxXV<-Uqhfhlzr=@0B$?X^|Q>sB4GPwSt38j}ojEJEv6K6)$EBr#vX3w+#ub$5R$?mb6d1F9=B zGBSPSc>yS+zJDYV(ZY3{GURIN;d{7;p-}KS!OWn}s zjFK#w)X3%zKEuRNG^>icNZpmLo9Lg8ZCiO5xz(ExD4lb-(k|X@w}I(u`;wlQH!>)~JF#79 z$}yh&DlE-m;bT`xV?9EE^$I19R=3rnFED|P==D44i)2gHIDRoYF{_O>)vs#egp1W3 zzFgzlI0ay+<4%l;09wX>R3#FdkNN~+9mFFe*S}|fHq;LZ%o3tHaciHD)BG-Z+))6! z{&|IYGMw?{TZ@ttmqHGDV^);UepcAG0C-**ja0(xzRUZX=h|DwSgEsK794k(l%g+N zQByWjkd`u;`)7dXRFuO;GmYv`0z30mjV3PcJ_X(IDOvJ(k zdOK%9l{Zv&v$^R);OS+dq+Y%22FQ{>U92DJ&2{LN4z|IMV2lzTRrYp+fhwGqX*rEK zP+pzv&E&<2Z$T>#^vnT%RPK*{b%nSXo6HG}_5ndDIw|RcM&84F_X2?8nv|53fo12v z5(vK?<+=X<2!!^_i7_!hfwS_bE(-NuECJlR(k{Dx(Ex z$Rd_LIUnOa^3*K zepe+?v%ux5cQj!3#C^53Hd>75?oC2Ck`dS0CqedQJuN`RGu?3Q+ne5pAMA$C;yu^X z{>4R25uNULCp(v`a)Px@yyD|x%WN`7%Yf+i^gfSOBsOrcD?a~&>4ZDg_ur+iAB|2U zS_}!{{#cTt&!ocX4FrkG5!#v^-+YWSUbvwj2Ut#J0!v5x&~ytw`3iLCVC1P+4cjK>4W#DCf?|d5XqIcQ zHTnisxFl8rzzz#}yWB+`(;tBr00acgUAP}EI+>aWNFJbZlG*gSroL`^cOiMQCx4a! zdnSe30s^a?hgT~CQC9?&L`LU_s1Fz8?%ZCc9=3LXTCtQAe<{ovnouz^Mj0o4r05CK^YXJG4eCLx6qXXW%{202;=?xqi2e z8u|@5_u;&hAIr#G1o-4~GXZ_Ww&&aC$3XW;DelD`O2H)u2s}U{jWZJfs>)VT%5XV= z$UXzc008M~ot{qp@k7SY&=AB>lqcnaHV;r^1F3m1U>a%a>2-X(FZ)aZ1P7OuDe#O> zzySvUheJ)lu;@ExiW8MlVZ-=Sd<8#Gg>w@5QWJ# z8KG!qnlcF~vFt7sI=1qZ)EnI z^tEe_*r&oxBqS+nZ+v?FvS;x0#!>0hE5QOvEB3A{u+4c&$+2^Cy6tVi4iFNdxBF*t zq@R9$C@%QW;@g2eotWCn2o~E3qmJSg{+PA`oE^pYE&<6O9>}?U4b)&^atsulh9+Rx zbc5s}Kpg>MwnXli7ek0>KLbjVWfRcprHFe+=@@%o#tV{ZZty(x zb*2Mzoo`wNsg&#Xu&VeA%%VkVC8ez4kJOxj>eSnEFZro{^$4@%r7i1*8X2XiIAfl^ zx|zST>FMmwFn8Qx_-y85zu3Xy3HzK}xL=yP7Q_7kERFy( zv!)gbc(XZ-@<846K~f~iDPQ3D8$-E$7`isP_{rx@e+poOKu04W%{-t1nF(s{Miwsq zjT?9V!dhT%Fa3`g{HP-^?v}*zEo%v}ij0+&z!Luz5dVR3jk7%+!1$pKK`2MEXgCKz zVx+S*=5}^K%jq9d*gFvAQQmH0X*mzXnu62XX~0M3(xpql!oWrmIrIuLrx%L(!@wfcF?V%bo$o z_OD|Jb+51e8yo$j{KnuMc9USMHlC3NSwZ}HGbR$p`TM3*Q?bl3tV4oO_^m^C zQT`DN@Ru3SUoEWXSU-o`A(fumNb)A8q{IE->P?v>rGV_?pN7&=z;nKo~@u5}#Nl~(MaxIY}j-|fQKdhx6eqmkA zLM2QtpY_9@*5$FVf!lVaKHdZw1x3;Yd4=f{NvNYYOPs}CUa0vcT3`vRkXn34M+nJw z#AX{)4I~hU!>2h54kNA0m>&MKq4f7&=>G@U3p5zTe>=%jW&oT6R|Z6X^&QUk4ta+u zS@$rs6h_{5e*#Q*fe->0_cS~@deMaY?pV21l zV6F@d;Er=Z*?S>Xl^{UBNDeR*y7_=BEU+=cA*px(7z9aU&e3L3WiSwO4aCpdlk*xA zFC(^%9R!QY3O{z1UBsJ`Xt1Unx!h<>Ajg20mXD`+gRf|r_|~o4Pus@v@c`jVq!jj+ zrH3nn5sZq>Gv{kp{Ls%`igLg%uAxEcfly~^x9-=}d@(C+pr6oDJ5+DqhW9b0Vw_*& zq`TAs^oaGVw(a@klnPt7DaaHaKO>X(81XcxK6Ix!br*0DJC7sajx4U# z$cZC8gFD1{>ONsVtMa$)oV4b5In^j@ts9$wte@Ut|C{3TFKUGQp(fi8bwXQsvLwz` zENU`wJc@oV&Rt_v9>*5lp!)4q3n<8oNVh8uO| zgl)kJ-M!C7p4!-zYldKU6tgcyT#ijEtY_KNZBKU2wu3!Pd*0P(Ih)PELPl}fc# z3fjT1XPUs#%xofOU?d702T5MPtRQl{_PD7oA!YC)pbS1!JoZ*(?NLJvNfwg7^he~p z;*H0SY+cE^K>2%RN!Bft88BUx*K&D>mk>WGSpH0soc- z(s^3$hBmU;LdCnluKn2_&-H8s1U%@YeZ}8L>`72L)O&5S!-D-m`l$;}RsswtcK1qQ zse^@u>@kYj({{^uC@3@y4V6Iwdp+p0dQzo0upk4q%ROLs4TVC>y(g)ovm8vq2TU?P z7vg0}M^;0aUE*`XU-GYRFzc*j?O(I4aCDGOT6f_WAon^8{yq5nEPDB?892*`ItWN! z;P|AFFmbX$Y}M~M*7BrEEkhrS!hW% z*RXWM9_bbo<OO`6?FARH+6OkpYs6P8y*Ep& zqIAp5IEJo-n@wz)UGO@XR0DFl)(=*9ZqwcBd?YsdT+3L|`fcxE)qN7&2rjI911N!C zlnRJyYU^AixkBSG%?)Bp-kKp#E45p0KpK_w+!?1Q-4<6{`EUv@>S#-K!#vBGd}2L; zGb2r_&DXAvbYZKHntazeG`%TqlqFWLvYO9o?{tAKm8o z?TGBn#z~>yRyNh6%KPx0wq34BD#o%zoS`DPV{KSq0Xn0;Pm+H-`8N78vvOsB&WQqy zM)~BAsE9Zt7KN0qu7?~#_)=U@FyCr(`?xg;MgZ?Zy^hVxzq`eP=8cYWLq|f` zPdDsxh-OkdLWC3iKSa`D#7xd`^Fe+Rp^eUtfo`1Z)nmRNAAwo&4{s#T*VmV3{V*&n ztfDcR@=pSM63lx-Kka(Z8!$ffdnRWRXdLkE%w0qzF)xW$7(AU@xwxm+CF#;6mk}4L zGFjXYwNBr5&Tf|(PEYL-3t}I9i@D2DdZ)fqXl&C14-lI|k7_%O=4^E2iyP1W!`^=j z_=cpoT%GLfQedRDF-Q;0-(+Mr#2~=h9=IO~%16>M^6-@Etw*~_CN9+r&OL}P2P>Nr z>@=x8z1PehBeFCr1NOQN8IDXrR4Ly)B&LpA`1PaLBr^07&VDlU?`f zK7Hd0nXyBi0!7nMiL6e!^H?Vg^?UDdtN&&I0)dX?f}tZ|QwPF$YT+q{kkLn2^XIw7 z%5+ZgMx}aU#){~L2a^Zj&3D%wNV}m#<2H4>!tRrHr^(v^{sZ7kD6jki=*zR0-Yhs$ z!#f8S)0WBo-nXtjIU=`VX?0h>`C>}S+|d88HeFv~O+zmf3!l`%Y0q;A8>rt7{EANR zu^It`OY(f8!f14xi^U!+p#UVC9rjO0H>82ckvGlGjVe$bfP8D9vkhmfwz;gtA@(-5 zz8ZSAkUaZXK&SxYl!7Z~yQCBs)*KprBRWd&}SXQ$2ulV`8?y{@A|_cKS8?AzD1pw+9XT z`aT+-dU!3E%Ra?WgWD6WdrKwB%q4k05kr&3=gImoyOcfO(j~bz#5cb!#_aRpQ^e?6 zp)m}wm;V5c_pZvge)xxqH_Yf4D3yj`uo7Goj#axw+>M#CPf}(o&XuD6`+;8s3R%F} z90YDKmTU`BQpGaRepkRmc;`5fgkvITAtxWIso&rRtBq8;=VBe5}BaU|sCjV(3+2+mOr0JIp?_WLXmG2DZoO_?L0TQow3q7)Jj$32#o$)t!8r|INX# zB~Bd2l0!yZ@2_%F6*xWu3o1-{;~7z8z9PLaCRHKzVKg1n5B` zk-~Z?gK~f~j;Ly^@WmAbyS7s7A%WB@fUl6VrZpY-2fJ zWG~WES6<36R&IdoiI80m?O8vcG45*bn>q6v%D5fl_BaL~-I^saDn_P*^%EO@ zVQhpRbgjt=;lFX}&>VkGLPnN2Cv`fqG&pVLbY=oP1^_1WE)C6`pOObF8Pop zg4^&%O*4>-&LR^lYbU552I%chWH$D65Zj3M{nsX_n2{F`XQO1;ZZAlU%WwmkARu0- zMs=fZw-_N!`zo4@HbgOd-{*br*WIf?u-Y&39ckyM?|ys|(T>bHzhTn%+Vhf_LVM|BCorP8W&0@_% z{SoLq!k;mim+a|%&i#24Z9^^or>H?&Z|XKgfwOP;?G5nc7i=5gT;i!PHM9`;g?s2= zzNT3x@Y=dh)@7H^cNzJ%eWr5VAT@TdpQo>tqDMB*;p4XVA5^RdkkGCwAHXFS&x!+J~x&LYSqdTG*U5T_zX1MF?TsR1q@7?894J%KE-c~_p>8xT`({X zCYi2`h(-~E1kEfrm_QgJQ#%h_+}*K@wKLn<6wP`%WdxZS{sODlkm7vLsWLezgZSF{ zyQE|Z!S4@kn1=l5YhQm(wjx;HBCY!lT5!p*__$WAsi|$J$lr76Yy!*qbqmAwk3-k_ zoesP|7K!0ylUoVOOogMJOMEI!q?YBBcu&=JfnW|GUL*{1CZ)Rhi|C@dZ$DRpxIC4V zfxnJF0;}3Ib_`k3=ruzTzO-1s5@BhD7t7b}K`ZuKv(ZPD=HWx;gm^jCrMx*(Z`+e6 zIfQi~>1woXi>8SKB#l?fPo47nw)QK1lcytEEg4HH-a$P|auqc2`9V6)3ti<^6*(HB zkON^oSY=?zh+~1v`5_ZS=czO;{r+edeyb%}_b7jNCm6QIS zE3-yecjoJ%)mY-JS{LqFbDr`gCsvLq>eCpyuVA{=3EH(I>ixfGGLvm|>0-M$9UW)8 zEc|s4@sBXZQkeZ}U)Tb{4!UIs&B!kt2wv$}+HpDGRxT-ljd97#SkWn?^!O~lv=b_r z$_8>2>$`<*I`>W3?R9Oaw*3UzqWH%QtXQt>(w`{Ya78sWE8u?HyJOXz@ET;>07;B) zk+wgkcE1G$Tnt4HfO}Ciu%m<=>)Lhw;Fv9S>hkQ}aWB2bo_d_q)lluW-jhoWpR1Qh zzB{xcxV`Gn2g-Kwy*2iVq-UM1hf~vLw{UY`f9LqH@z8qaLM3Xx+}$XtLi+NTG~s7m zi)}A9w#Sl*5$wmcMl+o|vsGevCG3900U!o#BDdl<6e%&+jh&}}mP;|*3|J6J?4CX3 zP`inh$a1VXA)2l{^Ze$4jbL?$9UU4xeA<3i0W(Sx^nJj^cZo^O4MHT}7vW>S{0 zhNzj2CV522Jx39qwGBQ7i#=xC3P0ipq+@%uhtIi+B@QnTO+Vk&-NxL0>5m$H=PVrv3x|luHiwMe#6+T{oCB^YvQB%Yh-mFT7`u zpihn3cZ#qh?$|%$qDDJW1tW#$X$@iB64mj0elT3hZ=NE3^?or?2y*As-|18F*NJat zDQLW9qYZYFrAT(lmaQ>;&?4~UmDlPIgI5{DtmNUhqc#f9H(uwSg@T+Jf5SnGh_A=P z?&mWoh!GO@uc=)1lbJsby#K>hPQ44*M&q>9oOY@0k@PD-XK^HgjeVzn?g1doa?3UL z9m>ARCR-xCT%zHZPk!k9wG;j43hxSLjv4gCPxn0NP}UM^{tc;H@@2(t&*&V0zc2vk zj7LR5wgk@AyYCboUh26s<9jrsBXF(f5v0%W18GRrsNQL%di&D5J=>SX{n6f=kJ1mU z(k0(z7JgqjM+}uVOqt{9{^!@>e$uJb>q2tFG^vb|+Xizjc)=fK<_1^F<9705_itXo z&s9zRTv0J>Q8!@ykdKcvxkuZge%yW;^nDYQa{|nqbO;^;lMY~Y&50}@w+F`7USapS zxFUhJE#1sHe%4;lta{Rqvz@k}^@*M{(VghbPLGhD4=rcI@F+eS<`_1G6XinoiOB|*2sH$}$#Cx&c#k~$ zE5#WsEqu@!agfe>o01e3bcB_E;f$SND=5;4cq2fu)qkl7Z|8|pYOGMACFtrf_Sn-DVTpHFYlm- z`bXC{Y51^}vIvI)j0&pB{mIiuro$9NWkZi0sW8k2O|bOO=gL*S*b8E#zRHRR+fy!m zeC*y>lBA05)>8arCuSnYdL-kh|0!#Aq)#F$Tnz7Hx?kANo%8oBT>jSrn)uVh*=CrK z_Ebsl?y_6rdrEO9%S!p}ltnx?n*!t3=4>?ceOU8z<7c7AXi9!lQC9aQab+Ecv3>Lz zzvDS0Zg6zb1hcUqQ8hs8>7dpxkS#0~P2Yqfkn`j5NG3PRssi86 z)%t-M-mfmv&h=Ipek|2<>!GDMNNj35F{CG_<~L#U{g;7u6;mFw^zT)aok>H_&bK47 zzta9)!L@*lQ}6|s83Cd;4_`?DbL0v;M8xmkb=VQp^B&|sTmTc892Lv8 zj)&G}8x5TQ%w(DKV+YX$*S%Sj-{(&{!pd(^llSJl6%nArlLYD4AfpdhFT4eYrDRW? zIo}sDUkA1y8Nj$^acPN!ib~PnzsWVg6F8(c?6px}`jfqhQ`g}J6mb1GuJu=Q7p5Ts ziswG?sn^+7HBWNSl=n(1j_wF_f0@kJfk<=?mw~q2od__!E5vk$+2`Q;0|h!|0_<69Dkzp!6_mpn>`aoZYIsF8u!7F!W^${rbHc zHssqb^K-Jpm3TrrrxnsgbGn>K734tv(BlppYx+ZagT9LBUDOB5SB=~j$iV=q=Vid38Hn_>Is7`J7-@u^0)Q{2pySjp zprmsjpeSjpy%X>tu{@ggM!}fjwb9L`J=|9!P&&rpGJ(?z!9urM{qGY0FEsPIl zV{(|#gSIcvc^p)*pnStwfq~diP^p5se8Fk{=QUi|rvbF_k4ZhS+6QVO{Qg>3tK+T7 zc3k!|NOJ|Lq`<+Ys{|!6;tUcRh-ierLooXkc))$d>vSm|XPQ71@)kJo;%G+n*b{lY z&!0a7Poui>O)z$Fi2`)~Cojv&%R9Qe%@0=xfQ@SMn>Xt_Hb+8mF$pN$j>lte;fI9v zJwNgo#JDND9%nybW$Lg?q=nW*zxyf-VDs4ahV0fAVNcX8bg)F2=vSArw@kdj+~K3S zCw8~Z-V{?_UQu;&!(NF%m%Ru7!=?9&w}Wg*T1#p#k}8cE5KnT)j?|xza-_?CnMU;WNm23idE?gUDp$bS;I!dtl_m5KG_M$XI zIG6M?=&U3Nx7U2M2a2tqZ+h6SX%viqYKl}7;1&+&&LMil)1d5eKH9uM1tbucS~ zh%ci*uF8X1lOP3?+-W!y@sJ+Nq- z1MVjv(eh4ctBkt(C&-acJ_8$DC{W>r)Oh~dqy-ToLs#i zjkWycslkiRk?e|A`$1AV5+jRx`03NFs0$`w^_!<6Cna6rK@a9KzF;K>0WQoR=3hpB z4{LO0K2ys*IA4KA-7Z;xAmNYQp2Q$&|Mk^%HcA$(VB_K(Z~Y$85wprwZ0zy!bOj!< zM)mv@XBRzH;|%gca6C!i5Tc^0N(6RRVB`QIeDZJl(@FWKkh5t&2O(ivT1zX`z^aCf zg5vB&b*?BCzjc6dNiKMTV`H5c8iAVKukndE6Z-Z&@MUbu_`(n8Qq_#pJcISpR!DaARWx=QA3Xwgw_k|xG`ubje(dofV zw4K5~R12@xZCFOm8NN&U#P}yFY-4P1pb{ zcX~z8vg9Pt!A1(9_df==;JM@T8^InbmP5@nNla~`a6#Jcm*#yr;=(@E%mg2|p2jErr00Dtfc!XWDfkhqTO z!5#%_pkS~Haf7HOJoY;`N*a%;KoGr5k@gVCodsvA7HVk21ps#}ZdK0~Yj~tf`gwuk zq*Ryv@e->O$ZBw<4y`^1%o7?(-!ZdfReJRM^y)gvJE%clF!}KUgXIS^6drAQu%)EO z+#(@YbN0_!d3)t2!cIm`f5~N1upkI626xYCZ4-JNKNv@Aw>m_ zm|QGX>&x69&qW>^q@;J0zD;hdy72q?IyQwbD00&%^s}&ae?=BTux=r5tMq;NyAnj8 zL^2ye8LbDkPZfK%cfCKbNirp+CQ1JIqiGA0S~Ap)N$o0#NlgL2tUwB+s4-t(QKfgm z-9%FwbWSSqPib%1PD4Zho?LQIuv}L2oYSzL5h=k`kTz+@BkH&TnfM>yiqSj?Z0m)R z|JD+QVv_!cckVcdH-{>5Hkl#m7f`$P(i|6Ak-%~i=eG{3Jz(GuwBEiab0NL4Lro|1 zH*moMs<}9qJFshk1Zy>5RO1aCz}A4fBw+77RVM?oep$Ul*pA?fJT1Bn-3n_Gkji1P zHkhrYrPU8=Qc3ioW`>L4HsDp=6>)h8+FIYud3k`HB+lm7Y8>$RECD>T>LyAhX#Wh@ zM}nmfC#c{w+l#{nQ5gRQrj+yP*z-q#Wx52SPV%%>jkQH{BQ`V6RLlypW64*(t%p(u z+EfGv+kn}?`@8p13ZHcStQ$SGPD2lPqC+(nYQYiM@~#Dsmfs4#*i$z1FFP{oWD_3s z;Zx-{6jVPx`an#k%Y;rgkAT}ytL97Qmp>QB1B2wfi?Wz_JZk1t0Y==TUs+VP_3L%|T7P9OWGOJn&bJq+8jNwq zol0Ddi-M7#nY&ZP;@iTh`GCu2dZ$1#>-mR;H_^3L3Lhe53*I04{F@FXGfY?Bfh8y> z-crgoTG@V4l`0z72K7#9cC3z0Onl#CnFVS-Krh$s($v*`1}Y6;E^wcHb6!lt)zy`S zjV%ilix;>eH;akBrsNe4Sc8~*guS5a{-OeKVh3JuoM6voTL4_;;Sxyts`bu}F(?wG z$U@%k4~mn8&^i{N7l|3S5Ie`Z1{G0Y-?wR50tn1NaNi>ev_dT41< z_>*>_ulwr)o=sk!tbQTNnd)6>G4YGH$TcHB_V2QK``ToQrA-ub^*j=O*u8Xn+#@-* zY~B?dL`ZHgqspa1$KRDUyu0ckp81l5^Wahx`ULk(ib?)=Yva!)uKsnq z|FlT|{I_>@frNNB{=KKaFZEO4!HFUPuC4s8&WxX`qq7sNCi>NmTY?a<%V`(H-r>+6 z_EtXQ8+@klj7ge$1KIS*wXY28CW==SMLi^$ecASCWKVoj-Y&ks+Fhqk(80jU{caBv z=*DM{Jy$IM5`1VqKvJm~1P%j7KczKpN7*C|;{{Ab$&`IKSe+@P)vgbxkEG2o81i@$ zY=_?vFuF& z2P`GfzD}I<_4fAm1#OB#-O|sh3nHI3r^yIr$QgCmPXh zgk;16n1`|PK4%GM`bKy9tNnXB#V8uQtlBs6@eN(IQNC6v5SNfxk6}ySfYg}@ba|E( zWRDv9iPO%`zJZ5%$AnyoT;3fg6_|i+qe@PY#7d`v?hHHU(ECXA2Bjd)`M?UA*qrWY z4C@!(XIgGrIq6%B&}llJt;@&Ss_Sg=C8?axmFt{ZZ{05Yus`OsD-TMupNUNpTUK;w zXF3rp=KC$LQ^ijP*g(HKWi1T_d7l|(b*Dk~edbgmtfiU$wEctklfYp7Z>LVIkS+bL zLg?~-b5g5sr5#7RD{tzVPo0#^Zv?y=vsB~@ML-;;VY9lYEO(M)ea@y-)3sCaId}Bk z#aH;PZn&OLv=8kNG`V>CU3mrjyAy7hZhqx2kimAgNg`Fua~YJAfJ-=GLHAVs1RQ8^ zR#U(v$el`U{k<&vSVPpk$|NvcPTUq;m=38`Tf9;cep^Ih^d;D&ak$TfNK60rH_ewk zI$d_$IwqdN`Q4lckETgO=D*Kd?EbVZSGQ+oU=byX4IMWVD1seV|CuT-g*EWe*0+R9_^sc#3%2rhjH8qwaKKzJ5)eXCxxB+6}q{(O31?;~5}P4-5%JkyUfG zBjQnZ3+MHrv$oahzy#JjA!lN9YH{odyI_HugZs)cCptm+wu;xrk9)9i8$nc?~j z;L2MbOChGRlt~}CS47;hnUFePeV^SEW`5WI`__k>7C+^z3o1GKQTVlzoH5B6;m*nJ znx`}VQoC;0rti~7YhCD}__Gk)1U$!upyCn~n^5`t&1=B=i_i4uRUp8)>oi`eH@dXH zu`t&IEe)UFXW{6yL=dMtpXttO*NlT+oyz2K_DeqRk0bPdcgwO&O{ZY*yqiJX187V< zeDnx$inj|E0NkPpbRtYFd!iV3?2Q+31V=wj^1s_x-YTk1kOB*xmRI1x8Sq9mHo)*a zI_S-id3wW$0pqe`)+;_UDADn&Z+ScjYEr7llfZiSVAKtdiI}F9cA7x=03GEWrY7k7 zw%JhnvtR@=_q?Na(dFKe6~&VCZntyJG*3sZO18#;aH)T+ucupmBESHxCwHYwEJg*P zTU{J1i$PDJre*pd`#ixx`;Is!>EOgm3AuD4QN5d9p=Q%uRgvDo*(_#_I2lLM0rr{q216;yh_EVV7`yPGLM|x&p(1^l6radLherc zTosGlW_Y07k}7cpMK*bP8w|G@h+RI@61EQg8nrNAE%IucAFPs?^Ch241ef_TjEFJZ zn@GGAj_xHcNRwJ#Y0CA2M1be-O-*ZFy;gVPEUmIrGC73AtwUfIY5??fR@-RIb)7qv zs&bwSDXGnNMDWV*i&gr(n$is8vmz#5Wm)^cs?HXhv`*v_5+90$Lbj#Syy{_pHuWTc zQRtR&;79j7@p2Mg0Qw3@eQ>3*8c3@J2T&J(C_h!HKEBWD@f(o9HV6Vo^;tGtPLK9> z+hzrix2Kaq=13e)4a_v>eRMRHyE+Obhk`0RAMLfq&J{-9!gf_PrB;gWu0pW<{-ny_Jw( zY&7tj{812lrL#nD>Np;3sUGJ6ZC8gmW})P>b{_5}>B5{S0Ln(@gwEYB$Fq(a`)<~h zX>@il_6Q~q<$c9bUgd2!i;ybLet~8>UrUn@=#>5h7J?Pb%%Go$zej5`QVMF>=BL(Q zs;;!J7<%w@u^(>RYM&@LPr(cYPr;*23dmnRCBKf8#Gpx zfRbj5kx6EQ#MH zY#yDRgVcGde)T5N+>pH0K8Q1lBBSlg#Ueeoy$@JuKD^P-zFvcQTul5_J)v=)RPK=G zCs@l}d6DeIJ*PKPlPjE;_q3Y0;PhaS`qWD{?s;JUl%GuS`sXqaD{Y;<9SwdvdW^j#B z-Gq%+MU9@cPOMxix6gGNMA&z=w&+)%Bup+n&G0SjC3)})Xp@wR=%CN4FJ5Tv-BPi% z>9>Sj_GLHi2coa#AMdMJA}bB<-lC`Xdy~6Je2bsbiNNqQWZO!QOFvXUJ?Tw*uQy$F zcxG=$Q*-HWwj-MbeJ!C$?nRP`A%d9uGL$w~)MaibS*S{#*E&~IvfH4Wz3z$`Kq3NW zO8dUeG*XjU9lVx$*;K~wb&YWHEj|0YE$jp`S!BBWjx^;%XQ`60@Ytb{ci;A)oyJ^6 z`yGa%Pd~;rZN0L(tqas)Ja0@N(U@Vw4bUsxV3A)s(xthd@$!RCoK@y%Az^6X9ZvH& z)|K<{#JuZ)x76rNwUB2CCo_(}%`uE5P4RrLA#||lzUb}7z9C$|Y`OgR1FHK*Lp6|LB^Xg)Ry?bBUx8)XkJ^1UZ)uH72ne?=@duHAA z;71ai4CNuUj0;d6HbIQo$Gt?WC~UwDw!&n0RqsRc2pzPX{wdHX z0qR*;ARBny;DAc38LttC#3kgw$N{NF@ z5MYy^0RGAr+la;h3p)p0OtFnZ1X0U*!){Ai;P{SICnq;8rElaD?eIa`t2GoTVQ4+a z_>+?_u`h<04wzo&Aw;}m9z)7K^P<(Lz<5)c3)6mbb+#&=BY$6pIU}RKH`(ta!+ROO za0GSH9O)4`)KLBeT~CG6*dbi+eEzuh-4*1i`#P~5^`zDj?%aej5(H#XfM0rDsi*L>&`cW2cH#CmjDyh-G8zv)t?^JGm7Zxr`zbZ|Enc z+h%Cvv?4sq8UU-zUO8Kmc6rWtvql9|`I)ueDlgf*`@P1hE0K)Bj4z3+nzUN^zV!!r z+;f&C!tG7OU%oFs&B6h zMFEQ>51xH>&nEzVCsUfpj$Y4#bLdWKl5wX@6>61A(2WF6EtiU%B6%EZBh?x z*u?pjh`KW$jHoiN1tt2rIv!}&kMee71H96^ePiSftNXtyK$E7RtvBBBcr1O`VEp}S zPnW~?ho=%~E^oeD?bt9^^(C$-r_#|`$_d?CV>#wQ#&wLox&>T#H*6l`o3@RJ-0w@J!eXPh7a+KN26fs^WA;V3LJm1F5@u9voN|WG}TFh zuJ^3VHt>MS!SXL)tOAR?68_)8$eLHfR)CXh#R6$Dua!!*ASmC}=UW~w6S%hg<+BcJ*H`8ATH{cyc;&iJ6*b|~yw*}&r2RKgrTVU(1b zR>Fk5`y0>Lsy4Br*99SZPYIOb3 z7H=n;DQO)l(Bt6zII%3`SX;bOohjUK=i3k!QMH(v@%voXm5_5`;|l_7_~Y}%U4oE1 z4R?OO#Z$O8!v!a*N(6CPIn~(x4xJAhQCFJ}tU74li=eXYbte_I$&`+tPaP3FmK1&V zjL_0>!BqE>8yV+`5F%Zi4TV%GOA(kMB;%)6+6aECog=`rzbVe7|ueOz)oef8*1 z1&5f(3)I)7x>5ruTLq;cxyks%^RMV~_hn}Cu-RJWPB+FZBmF=CH$g!|OO z@xsbn1_XMxS^bWJf$@)xj*r#tEZU-EWFJ4Cg2vmx#-H~FMi(pL5hoU!zJ~zk1KNm} zGc8f}zyf1&XJEa)PPR5^u2^|TJfHpkdJQqw-?xPoX#h{A9&-3A6961HzC}(ahz`79aPf(-Z5RF#0TiQm zht!H}ZU6vG9o7I{MFvV7nP@z>Nq{1v53U9{#}%XkVAFRUYG)qZJ)U(=pKF_1aNTl= zG6i)E$fRVc21L8KJ~R2Jg+HMfpHV#4RwKC+V7C^-3P%-Wegq4{Q6SX(xO{ZC^n62gHg=vBp0H2|5j-m^0BC+5Ck0}*q7x*hss$iwOJ7=0WlUU}m- zshcmj7@slo!hs0qqAj(Hqjk%-pv*H ze>A^0#=o;lqkXh#c9N$69QRgoFFg4sJgKd!Ro^RWZFLV<@TA6h5_!Hz+FJCMAW{J^u(@Bx11UNcvy^3h5RZi{=AZ zVhMRZBOs2s4dS?&QrEoi--CjKZ?ddTegy|G&MFITg{M!)cU<87{Ggl>1IHuQd<-l1 zAQoK5`J>iIRrfre&3QRrWPlgu79jowqkl|sY2RqHE51c=+7a1kGKQCQV*8G(pI-p# z$ZODBU-5G9-GsV;8SA%*RbJ8n`DG`H+2F58`w}y8#nw7Y5C}c6ycw{WYTQ>c}Y0C2TBTDXvJ zjC6r{yN4t-D1n+;o+RXH77_oIG z%GG+|x5hd9`$}lGiW z%)!z~Ky%OS+;K1;d|i>ndR(>0T+ZE|ME7Uid`CR-d;b!5b@H3s@oN@ZlGC4>zZGAH z=?FsR=d(QaopG-O?Up=WE!f9}TtxGf>|IDy+vj(erIyNgnW`|gXv_1^qSDVfN&4C4 zYqfXSA&gC$ayM8dfDfE!!ur1SQum7j+f&BAp@xG+{?@F&3gVp|RDBfcwz^_Zu_EuG zGuftqA^d@bJ3!xSy>h;rYQ&lrnHfzdoB&+^LpA@zQg;Ho_A`lI4Y##5w^^mij!)5s z0HjuG>kQST@;sFxI%RFguQ+jl54eZuDiq{8+Iv+p(QD?a^*bFTBX1tAC>>S6Y|b68 zyiVW5<+9wDz_J?pV_tZ^UtX-X4PvW7)&zRr-d`e~K79H`QP6$wGCYF_gLXX+siuR) zaP!*c2-<7B1w9nC)YXOV8bVKB_#!S z9B;w5cfrD2k0he*&sk-Y!@>@fbkSLEQwaQzt8asv!((Z2>m0LBU@s z@3%n9wxXi`m~`mjgVk2vdHkLYR&t`(p=plgqlXWxblHmH^@cu(;zDo(_ARyb%0%-C zeD(pIolh@bx>Qab2EDPCodhyea-3Gzn8~aUdKN3^7z-9T-zX>GJ!dE1cO8n#RJ#{} zpD2PUMg9-fjyvnrR*U$Wo z3tyGR2ktPbGMoe9860orpgxte4C}7f&sWstUSNtUNZa(RTKu3}ed~D;{FQs*n;P?} zB%P1#HeL%$=M4oe<2%d7E?|X4+FO0Um^!?+h*lmeVJS;nS4z;kUnM+b)NyyRdiI1= z3R#hjr*f0X)BhP?LQ>YdtIfl+3^{D{F{kG)=k>^^Y?qLkpaGh_lQ$m!Ja6b|v~Sgh zen8UoQFz4yyPuZNeFR#`L1`U@eXOXNc>4fo)a__s+L7kY(h{Hs?UH!n#Kc zr*btFLL`h$>1w2qK%pYaV#@9n#}l+o!rRUDx#mmug?vaB51KQjBiUfys~y9xACOER z{G9FTJ23L02V19hK_R#8r?5SLrVmaAp&cU-iLi0Fjt_QL-ZD!c1b}l5h|~$ zihT~)7|?;pD_LG!TRSTgW*w7RzB-@lW-7d|9aud|X7n%uXq<-&HCW^oBVy0sd9ni?OH`k$2Qn+dETR25wg7#Uap(1gV96LcleeqFBtwES; zGFP~S`OT%f0oNTb5KcKdDtkfLWapI=a7^=gy^}5WHkVCIn(`NucjLtvO;ev?ot%J8 z+)p7g`!anrjXNZ%M4N*mIkW#v(ngVnQ)M$25W zS}X81(Y+UQ6};%7D``l}5uIG3!E|FdD0NeM%)s(|wuV?FbCYNQRJ@VdTBXT9O&Xh; z^r~K*ZYQsZGK2O%VZw*>K&qy}bcI+P9DwPWm@?j4gJ%jfT}lAKj*?pJ09E88Fa`0b z0u%H#(F-1dJ&MB+Vmu^#jXw5;qPjfMT3QCa@Rt9i^kq! zPYA?EMX%~Kj-j27Paw5h5aM_%iGYg#opr@Q8Y~F4g?Rq&zNXM8eK^H zdGT5rsPP-U1(IyqPpx=$d0J?0aH9$kGE?t`$oY>uSi*(~ed#vPb2)FqE6z(k!=}V` z9cfx7XT(=MeH>dx$MgCcNI;P1|Iv>m!uS#y^-TWj*xLSy00FhbsuUA_M{lu^T^>Dl zitM!VBa!77oQh|755W^xL?wCDf)^kY9@mcW5~fB=YL8|;V{hY z@F<%RFEqX#F7{1@H5L5*J(YDDJf(?f)N`7%eE3ybXe$2yM}w1%k+Jms^Hw- zj)%^YV&Lv4#+B0TqF=VZStkD))Lv(%(h%d&Ys&o?A}#krCE4gG(ax4Ci?2nDlUQ^g zyGP%UXp5FPM&2CntWX?uT-7!l`l4@o)Z0MbYQzD<4>08EtllkOcc;<>D8c}o1uL`S zEBI;i4d9efetp=!1oo6^o&v;SZCa)iZXD{{H^pH})nr_7Olc zM`kLZ(}F)?@sr%#Tt-s>JVjG}Dr(SP^AMnhSH%lBzlND`iE8D!+BD7g_#&rNO2yTTfd>vAzKq1= zMO$^H3JU@T=R(zYQ_F4#evWqY^HcVeD_{jhoWjt~r-M_0e#M$Mhph7Cl|5d6p1&e)_G-+M02Zkd{Vqy;-vMLV#dD zdFrH&;{&S8Pb|d5!(6LxC2Jw^`$J-Duac1BKeP4fBc;6fNK;-v%CzFg@54FR7P+YT z)Fx=ih2;!k$og+xS0Bay(h7Y(?L2IY4+(a@@+dS(G94}B1HU5q$Q2-NJ|Qs2EPJkS zmblKw7@INUZ4CWP-652aQc3!`5?YbzJ+qbQ6w~`n zY29IlGO2QH9YYOp-turudT@Q$I5qt3n

$7iVr*`+0e>9+D8P|$mj5Oy#eX}Y32#~AMD7Fe(kt1CZV4v5LjjZ zT3Q>aotg~%uNGjf+i;|S2!yKi&-q}=QqLM174>C#6Y2p;L_ zZ-6Qp^fv?eo%^?Df}=r4Ve#x!D%AVJR80*pH+#C#T^OV+%jG{$A@gAfJ48FY_A6Ag zl}3f`EGq@Rljo)p;Avy8Qh&Cw_~nKW}ZpNImMM`XVN8$UBzXL zp0-n#`dT6mEp_Dfk9m0aJ*7(AjJ=L~TpFB>_>)1z?~7Fk)^2xCZ1={y;v=bHc(lHP zNW=(&!yL}nF-c^-@y&#c1wg5@nQ9-gd1qI(xtN0In$b)oXOklX;G+xX~i95ox( zY(}+PeM*(V)}M1HhS7c;=sS_chq%EsDr%L6W}h`htWt#RHQZ%D%XEH80%@!qUu}<3 zb4fe&z!!w;eOS0wzJJn7LHvxW4*S{Y*AF3`omH4%xjEp;Ev)lB>Msw2qxuz-aCN=) zq{DOLhsAOe;g;18$$T&haeCUiMn;0htWc(7NQD{z`>43(S7i6@IuAk&TJ!ns4q zzTWkAiPCX06gp?v0ZFj32eJ-<$y@_C+DRHUB@T~LYaFi8ut=d^n(msRH@173&Qn7G zb_g-Z)8Eai({iT1N4&cA)+S_1zC-}`pi~wMhSK*@0-DX8FUkDY_!1Wfqs3ZbGE(ni zTP2^GoMtGRM1V=)A0LtE=nlm4RuuL5S6A08x3}3z`K@qORAvB5d%pR|Ci#bt1P8|> zCPseb1Tz0}|5nBNPvsZ7o>^}Wb_+k=2fLzRQ8nC} z+&oDoY)o8TUq}4$`zUz(?5vG-pmR2i26&1J2N7PKp5dG** zRrv`)Z2xUYk+dx+q3K{S5TM8_>vnOM`+fWA$0@s{{=wu>)sOAOjsu;{Rjh1EpImH} z^Zxc{4fr7PZ_1EaV5%v5uq00p=v!T z7Ua^K=hV%wU2!OA*zpM%hg+#7MhbcvBl7h%daS%CIn}g~o1yp{Y z&vq|q0emfR9|F1eb2xf68ag`WTW=@Zo5}uEO*!)0zKqwwuab^Q~ z15gI6c*`;<7?jxf_ybnbPnynRKzaaWI5xR2Zporj`s~n^7Z;?EH5j(KtQsT~xY7O0 zXsu~HjuUBr5D<$3al60A&O?)rzdJ{i0-BZF;RnylV5>2~r1p`Krwes9U1=oTE2BT$ z!4w|Py%4+8>m1by242?-Cdiu{Iq-K(6*rFOeS?+j-Lg8bH1|e{)T$2OxHX{xndbc_ zv*A?Tj}d&wr}hWSIbBsLzKj!1jJ^sYmA@5Aoc8%4$(%NWb?4iEic;JdB`5(IARbN{ zHaJ(MyLBMs#0Rzz9M2_I);QjBlZy*^sTNMAy=JBb#q#a|5r{)7V@>TH$C|s3uGcd^ zd7U5Nfg|4e@I$2mD+gz7TpTDT5c^9&%Ag*(WClE#*UkCvYMdTp`iMmx03_myc2&~* zzOOFm4Jf7hl~JAhU&<2{{1|@bV(KSUB9W$Nj4e{fJs}Oqzt!h8g{L)N@rmCSj(F&) zfV*0rwu!4CvRi$=>I{khYL`uOCQc#&PU|tFeY(<~0+KGLOusw%E3{KG7TgZl08y4$ z_A4${NS|u|O8pQSiJegz&gb|y!#9CHD3$9MFwh2v9wX{wdwP z-}C;yJ6s2Lqod7esf=auNo`ewvTk!8>`)2m7%=(*3JEZ23cBv-9%_xG%ZK5vDA>Dm zLyfoCEi*GuKz+=SWYARmqQ49T+#qPtmCw5Zm1elYL>``eQOfI%Y&u+x!pRpv7<3IM2 z*FK9|g83nb@B`Gt5wZ`xz2aLoSfDbFe{Bw(G@tcS987}IN$mIa=Ip2b6eH#4!eCaM z@)aM_`R;gECdJlCwwwzXN6FGmgE}0xXP{!C!K34+@K;ELwXXNiy7X*tNRd9`b;%@# z+@oM;0b1rOe`64UOuK7;0YL!xo{|y8K%Je%N_4+aa)f`X6u1PbF2>;v_lM3ZGJ)gn zz71J=(vebEfxFWs_xO{03?`MU=o#j;fsGl`Mkl-O<#4~XjW}YG6#tjv#km+1CzMU` zN0^_|gs@}vM)D=>K_eOt;^m0wG50i**KrdAx%ska=1Rha3vrE3`t2~!jp)jD001Zh zX`GVo&EOTrv1qn-w?m(?AaVwPo26FW4{$!(Cl!a5XID!=L7AsfNKET4w8+4x0nQuf z5`$sfjyo~o_U<_n5VjpEMV-$Mfr;q;j7de5qEvj8&akKV_IRoH2)-Re7L4*BDq*yF zw)-D9S1-XsxJ*V7y`jX&baXUe65)8E!#t!ZnV`-b2AL0=Q$hgAkRh@30BlO$Iyv#X zsjyMJ{;gF0tC6ikV`HHESGhrO3_Ha|?1wxSWyUm`t62=^7a%cg?t8*W-oVh%_Nxy| zU{-|;xEjJ(z^mbIFk17L)?0c*pJ8my{GvME@AZjtUvZ_YCsq^xfYQarXMG4P!^5Oq zTQiD5<+-liGm9kVRP=2BFcyA;YVYDqQur$eCplknoKSLIuqz$z=QuLxVxwAmtR2Amjn)X($Jq^bzOue>pIS^2f|2?Fw$ER zG+$~kv~hKjRJ?Mq>9_B`&k*(Rd5Y(@u5z2OauVmCtv2#z1*BlUDEmKctUFET3!R6| zHs-u~ze~U1a?S^6#Zm!iW*HD7Tf3|Jz_@THhZjQB2$+Cg3^{C?5H{!i54PWdVS5dx zZ`DN_+Iz3fP|;sAN5JHW|SrD_pd&G zmadrp6qgXu*XuUdswW_vU1wVYR1Cjncn5u7nKTG(yM_=$F_@EhL_uE@I3ld9{$;vp zQs&^`1k*EtL2o_9KN+dbp?~R!Eq?`u(3bm}A6LVLr*wxeGFjt&`)YW8K#ONavkZw65d+N#duC1IBC$6-`PtJ~H!28cj2VznYvRVz-@^g9Y$T5T&^RLPYMz zXT?1i)D-&iOTLqenqcv6x;xQ{wKB9ao3Zp<`6G$mr|OLn+H0E^*%FejKXFw$xBCOP%UVWl-fU92KjGx@IE6##TdNGc~T!6+(F zP^3>7KPAM0fLIT?)6+9zLA8(|0G64QFJ82~f0VQ7iPQKH9hDdajOxG~S9*0K&*6gj;WT{Z=G8!`J(#qhWCdeQWcH zF9Jp>cQsy;0w@5T5Y9Vmj-iBse*({JA6|#s8=|k0GSVzoq4Yuxf;;*kbKEerp4fC!w#^2GF6vj`v&eLGb!Gty_UMvxiT23?>mirP30j@q!8c2R z<}!^gy{}E9*#$ef73-05{s5Y8M)ZHHvE0L_I-K{-YI{oU@9L9V+(5;D7yM09&Q>p| zQI9#CTV<3Z&Zhb6}ZvwyNHJhbH|a-mz7T!aeHo_12p- z=$r1a&BNeAVVwkFiah0~S4)rC-86oz&4^CtWy!*1Rk3N)SEikdH>_qK>s*3KGHx=m zDt6sJoSSb{x;_^%zK=4d(9?F2i;A(iEz!q}h=7DpXmafQ;L@XU&sk`S*}C>5{3vk0 zm9knz0j^pm>VHbdviBO~CFT?s$5c4@`mR?(&K|$Mc)CB7)*ISs|5etSV$oU?KfO%5 zybGojrm(PLNIlXy(>1wC;)cT}hZHyJ*Sp9)En}4oA!=`Ab3K(_+dJKaTU?~sgRp$M zf3JORynMOjDISVeTw5Dx4LO`eN){fzzdvmDC}x$d<+!{JW-YLW#DR-odBgQ5J8-_4Z#-|ri&;PxoD9@%5+ zcl8}&o1ePpNH}&a#FzcW)U759>F9z_O-Ph5cxTprzQI{US02bA1yY4z6pebCLpjjnUs+5n;TpsYo;<`j$*yjEI()UjCdo@M;Y7?(= z&Y!yOkD2_jQNqsCak36gH?M46UgX@5YkzRRpl@ESV11+h0PI_;UVKZt$OBAy-1vhU zD*Hb5(v>u7#Vfs;yfvgg8MPM$x1NOP}II5nNK zI?t9Q(7oRwy}0q7o6V)>eiUaETl4k>GUe^F(PqM1UMn9H zVPYkIe)(A!pm6ll)cwp>t6TT&$^qVa|SB5*?3$+qvNjb&e|CNa<-zG zrSdn8*^xon(*qIc@ehn#U&^z8`8cIuLB293q%C)x!hFSo&hV{^DV3`Ku$rGZHxRAb zn>QTmM)%6vPjpi-a6P*oVu3dPut-o#9TUx`9&WGrcKjOMW!}E@ZbA~X&-yC5>E&_f zBU$RSr=;Z274&m2Jzg8&R%>-Wf?(bV(+wETK37dtZ zM{2B}I+7iS1$*3=i$v>DnO}{9YChW0<;W>#HZ1WEL&{2BijQ#o{`)m_kVSq~eOT2y zlYam7&x&(TxvRX6crFXV|7DSrW?%bW`yrd{!0On$y9Rn9V`$+WyY0_aE{Ct%u^lMP zBxdRx$g%qLF)ORMo+bZ&1O1$qFlyVxaZECjQizpbU08Y{zIUGK=yol9Q{A^Efz^ZN zy%)YuExvJJ;OE`OueQCfkR_|FMbaDAE;wa%*YGhpRaUuZkC#hb=>C1TUL={zfOni| z>PbrWSS<=GEy0!$s^;dZ$jkjYz2G#3bnHZ*GM~;9;dJefeOg&)TiVm2OlW#*-C$e4zSzG{nG=QYwITX4ZX;qE2$j2; ziq6OP<(W>^wy@_>L)EHt4VUA%Rcu=9#fUqXk*c=&=@=P&h~i13KIjC8q`~P zMBH=X?0!^d7k-D+w7uvDyx|V#Ibn|Tij6oe3iDo3E&0Uo%8nJy6tCZ}_|Ys0U!&c9 z7{dwae`2Ub6|G$BEF^kj9^H6G(`r2_fgwiPfziGCdyYC?um)3;XX`l|&sg#Ob;ky{ z?Subh7nV!rbJfuw|F|=6H1zH6rA)E#KwFHU$%a1jj4#Cb>|d?7!5*~Bzl|L*GnxLvPsoF*pg>oRnys^%$$LR~G6IJ#p0 zVbAxs6X=Kvn}#B05pQ26<2KW#8%#F#DI62JS5 zWw=*ch9Udy((v3&8nrmj*qC9r?-lm!2&$rhyz($kW}8j_kdETEeiYfdFDh`PYFl5S zs2S^sb@zVGFE^v?7$3=b6`hlDswdALNIaJ4^_wNybBzC!97sOfO;;ViS`;?!(|4~= z5cgnR-0HUZ8A${8vg;!YVHxd(&S^ThY zny)O5ILXD;uPf_={&hO#R<%8c&qUTb#UbQ-@Yie2@^?1g-cA)BT%1(-JzZWPg#Zo0 z`~7{yiNa;H(LFAM9o;#fntgqG`3(p54uOei-z>pTT6PM{Ow@C&oXp=4eyXN2O0%8p zUS2%U?VGCE$Y#+cX0&Wv@5{YiGs#XAN9xR%<_g-hORD_0fe z9LBg=Po1_on`hl1sn^zC9K*!OHLCM^jpER>ol|2*n$M?CFNjItyK+w8$3Ny0Q{JfV z*ioz<8bPOqKB2-2DvSHMGm72FsQ-yw z&$&5eVwcef?WpRLF?*X6<~KABy3Cn*dJcd5Hhy!ld7<1+>4xBjv}DiP(`bzcGItw3 zXWQ(%a@=TXqi=^Y`S&Xsw8MMn*czCnR5OQjn~Oxhm^EFf;9{$r_=mj*FquD^E|WKJrVeEma( za%WHK3U8;}{=UEjo@1vzuT>M`1SBXxe_0NC&!{#YR6*W`H@+|qKG49HkCDVGsotu|B$P`}g9d3e(M_bkxblFFKxJ|@{((lT#qz_PG5B*NKJwDUq+ki-tD%RRL^89+^L_b*DpAvnY@>?YULg7>j{Zh(m1O@ z8JpMR`&f#w<0(evlM%F)p)T~F4H3l-rD9e?6_b@%B5msSmA@xS%!*-Qp=jk^BU_w@ z%LpZ$6d2@r_pU)K3O|k^Rv30ON48~getWq6qy(-Q@guccWgdqVVw%kNxnJ@20Q ztx39HY^KoWVP(GF;)GPMA1ft00n5*C)7Km(R7{MFG|+XP{zPSt=N4EI(1PWpws*yX z`zguyH)FE6{tOzbhmBm{#?<`0p6zKy$tMOd%@8nqkLPbZnX#NyS5>GDc-Uv+f6aoL zlSD1tnX@2pxStMyqK&?cnk#%o)Apn8&9}WW8-F`t6fdK0G_l*bz3?J-&ZI>3a+r^_KNCYRaFhT|J+d=ad01tUxY2yeQqN) zO}iLBTweCPkexQdbnL_<{bkL<)2LlZqLK;frD85wN2;dkT60>qS&34HuUf3*4I9db zzJD8!0+tWO4`WD2Ac{BZ`SYm;;;%x6j(W>dLiQd9w zwM@&AC-tH-9Iqr^w%?7BGSDS8<>l=*o_VZBR}QyZ&`SMKu9B(G0&kgrpj<(gDr6^V zo*J+cKcnO!vy-yQ$6nH-^F8UrEU2cUK3x!@8xR?DQn() z*;8{uKQpt!qS&^x^^1`DA>Q4=M=c(){OWh&bB&}Y2Adt@w0(?WwdLkbe5{eyOd&M4@^5aBwn;66?P4j7Tzr{#bFh? zqHws}&@h5+WoMynwMfD1D0Jr7-U;I2^nA?RfZgK@mO6_%C!8EJ%jFd#V$nAr-p54C z8CDva+*=Lo{A8A+xO3<7swy|m$E%W7LaQz=E2D2be4L(hJE7Wl8jn^Fa;SCwv>{=i zKnoPsJE(6NNbFd)w)V^%drrA$iIqQ{c=8Lq(|mTrK7pQyk=OU2iq=wdMRbLi_k>3n z1)F{`$Iv@z8yv;*c|PYcm7>cu_@aYmFPYXIg_U=7oTbi>G(6lWh?WodLRW4SP0y#v zo<6rAkL3snU%|O6(CB%x6Sf}B(2D7*n`EgH!wv>JNy%u2(<0R7+-jQ&T{50tqVl`c z5OGh-=xBK0cYkyTzZ+#0PkcK|>8EbA7DPp%=VrL+DOjJ*?7PeQ4sze0I1XK+@H`rd zlG{-4Ezx#g=~)T*rD)?7-CPLgrBk`%Q_{_4>|f&nScYHY?yc2`lIWCyU(PVZwbwQD zWHQfr{KCM{t7Vs+=kDa8&{`y*NkTE_UDu?aCuL30M?+Zfl=n;t`nJ|RMcG}E@-ve? zn(nIuWTrQi)rbOH{`Gwx=S}mhOxT7PkqcUOcF5xv+SQb3+s)A@ujjeI$ywg-y#w2q zPWqx4s%3SBS@4VB?v%pT#?2%11M-jF4R!ul_iaD^%3NRg{MJY0FT~%UJIugfpm^=t zQ#tCA=FM0cU63saL)2a880aY?oPETVp1amW+R@G;8Jxi-?fE=))wxp&lqtD{`J69Eh`lZ}~lY_K@^l zDn2buaD8npuV)SRaYlji5F9g@LJ+r3K7Q5bl$4YnzJi>Fp;L&K@FjRLUH^5>_8<2o zaY+Bt4u+P?pnIa4tcJ5uAB6s-PknvSd1%dLx{iTChM%9GPEB|3_XE`}AP1wZtNTF6 zfro}&&AlzG`fQn!p_87Lo-W#xR8pb`4T{6=vcBj<&yXeGCM8`5tvX~#^c+2_&p3Yl zxCh!|%)I@6vF7C(dnqP>yo|?YQa~N-?MMM1ruxhUvnGzjgoFzzpyT|yz?PWRg~l2} z8z`xVU!Q>fSCrTOAGaJ=9>lPVA?`6q&%eSfgy1IA(zDuvj$b{@&5eidP026l%Ux*8 zkAMBnCTX7ll_{q#LzmQ~q)%@upf^qn2zIfD$}fCA>Ap7X%gH4#CudYi8JItO=ukYg zb4@pEjwO@Q@f?1`{-PD?APEB&}ATTAIs@(626k= z-WYj);%%+G{peqyj+F-8wxP9->hSPzfxAK$?&SBvaWbe=-GGkhh`HShl+9 z5Em2EfypyhkBy8>g}d~vE!4}@E4EK;Yj4*!GRlJ2xOVQ|E$#*reF2h}##mW>Wb&bN z{ebzE+(eMM%lPo&N;wQI`8^XR4n7}YW+n}K%;#Gcy38AmjgOPMbMAp5wU^FUY>t|L zU|<*Ap5su}Hdsy%=rpSH9&`Vix3V5921{!o+T0Ejf=-~o<^`h9z3)u?LG6oI(wWNf ztTM4nEm%WCL+keKls=o%o6y4hGBxq5%g65{wfUevFS|Y(ao^no8=z<9Gx_ zMBp{|2f?I6(R?9FOtMQsh7_=Qcy#D9;B)uXg$o@z%xr9F&`w^WgHE2tcX89#6vD#7 zrau{uF&nPSGd-GM(=c?7t{ayh@5(=gQ6lx^CJ(Gd=-;^$H=AErSUBtUzNrYa zO2k&mQD)Bb^70BCJ9ddcAf#nxI!&$xU%=8lV)b46D{_7s+*#RA#H z%uJdCs|*X~^bxG49DT#V#MG2DAx|$Bv+hj*Lr;14PUE$hbsN|iAyJt{X}MWVRYj!( zWc>2&hChf&GW~e`xXew@vIB&*g6^{F71|8&7Iw^ng5v&a`183)6~?YrcoY~2(7%B_ zJRw>*EvF05q+q^cS)HhEGL5ca;Km(6@N2IYv4ekqOKP@iFzT zEW>;d)45(-e7ig_Fc5;@u1tgCHn9~nZGi>cSX!0%HB}_)&!B z)@hfsFud>s77$DdsRV^^Yn}b2F7rCt+F=P~Gq&Bo{L&qqf77tC^V9tP2){}J7?t|^ z`gdN9IC(Tk;lXDO=;mwP9CTVlgrF>1o&Ywot1o-UkH^jm0QADZ`2|PcFat46bh*cB zCjo`dgtiosji#ky&<)ppDlsB7IQRstI}yW@j$yJqbxMuA)UHN=&TOA6E6Ljdsg;oj& z-Id?GdBb+r_z83wO$V*MPMhbjvMpgrLv%}q1z8SKz{rgw!3kRF`HtAW@blYia{sFf!d~&iRX{87_vVzHb?!yDr2nWQM--dQgTp4u09q2yg*2L$*Dqb z!N>rd;McFl%d>qba4t=ITgSoYLLgFyleO2{On(<)_J{4|hmRcL<>toiT1I23`B~88 z7$$lOg0emNJ7+7j%G1+JYirYhE)0rXT#a;(3Q)72f|Up!z|6nkZFnz+X6SeR>y{7h zzh%^96T59c!zG0yVEt89RbuS&f`Vy{pWu4sWo4%$yP-vT7ap^m?yO9^ky(m?H!?GC zc<%b;ms7`%#nn&~3vk$oug}-EP@u~-1amn`oj3i`0zj`4*@vU9l8(Yd9E*r$))>)6b6DFp|9nAA! z=bd;o{_Wcn%kxAu+_5^CSFgzRtvi`iybX8ky{`^T99q2^>Z+;+1P2G-&$(@70vg~g z(9tY3Dk=@cVEgi+A7H7mv2kcjOlFsn$4zL5*1EnpG21J%)N&Q1;G@NC2m77*+n+$} zxEU>Ri%I62x1k$=QMfOABpT<`aCYZPag11SaftVf_SEW+`3br*L zKc142DZ5M=uJ#4U{3UA!`D%u4Zs^OGZvf1s0WJifQ)2jfa?%`DPky74MAe!7fO`}` zNb~>+I+`Ux-@<7aFO;)$foinGo9EA;JO6cX>$*~0$V?Ok!2&(NE~a9d+oNMD_BsrD z;i=O}Bn#`Fch^7^J(S1I!^09hkn>3KBS0+XpM#D=6DO_5C7_$#YeZRJPetL41O)_& zqldQ)DWw3z!_8sN!&W;Xq02Pe<-u`}m_w)6Rfa7A4Te}3xsUR2F^75oY^ufL~ z?Q};_EEXec-@ZNB!U~YE>uZhwGAJ#I^@xd!qd?-NWGd;k9=p`m%{%Tjn&pJg*%3iO zaUfC*M)T#13j8+#F|p1!6%gKnNvNi#*Wzl=o;_oeu#3wq%l{O}jc5#Bp9UQdPPTh< z{l)O6)Sn=b_^{3y7S;__RUHe9w1KocTi2+n$=S>*4uKmkDUq7($yG4T)rBAs=IeXU zwueTe1t-9Y69C(Vrrw>f=#KpM+fZmX#IzdY2TKjVgBp1;^zpfPr;5S4H&1CdBZoRz zBoaA$d-e27VUT(G_$-ztTlI?U-e?+ctyAwN?;}+(F|6>~eNAm`gOwQkOb`2VI~Uo) zOQ@iL5E>DY48C7Ecx9f5hsjU=SOBf-z@_H3mJrfWXh)}Sm9UxwPMo+B2-Ptjz?HWV zu=Qx?J|u~9njbXON!JFTQwk6N24&<{Bk*v`i(fLky!p(bFsylb7sf_MPr+u8VGP6# zFa|Am_Y%{l=u}uoUJw)A0U*X4L;;!tP^i}rh<|X-(+IO}0=J@tpfnPSwHW|$Vq&5@ zNWcP|J*~9x{W}Qtpr;@Gd|Fj+k+^LPc9%h9w>8VK^n&}6`OaOt#M0+;o8{8MyFGpS zQ~>fUm;Q&o2M-PTcr0#h5?%J9<&h0|p$VrbM zKMsmjHwzk?nsl_ZLLq0dHUWR%n`;(`U_EbdhNnJ0Vv`ZCU#CM)vgA+w2XoDuJ3$cE zn&>j!dDG`^-n~DzPT7)!Sc5m?=jLX_ra)%$rI<~BciiCpb5npWtS+nzDFJTkEp|xe zQ+d_W`DQB*gcMr`l7*IS0OKu}Nd%EOTzL5SNAs=IGGskMX8Y$c#D_S8pNIGBWB=(% zH=QM<4B0~G_g}T_%xDF-WNiXt93|&fg5=T&l6x-!IcH}w4wD8i(~7$Tj2<^9FHJ>B zyJR`2Rb57Vet#M+iq3*WOPgOcy2!i~R*!Xe_VBsue-Ggg$gZfP6Ar={v-PmiEi^Qg z$G^r{sMK!w5v01w0BjJflH~;{_LVDFkg)18UtZWc@-Mz{W+TM>Lol!M1rWLRg4hh6 zBm@zV_ks?m2YM0U^7Z30_n|*`pZ0jZ_1G=9UP2mL^;G4x*iFcFbfCXQrd74i0m#X3 zfwcMK74VX%+qYjC>KYrrg^-?a)$`7uiyw>xP)gIjvZF?>!#icEOYQEo^@)57f*_Ll z38Av`Ebp=UfSP$cJcskM-?y$NCHUXC!Iq4(@KC)cWRl_StS|&fd)V6Au9L~V`MNpA zPkr_ueG9U@hI8MxP84_b2gLU-2gPoHJU};irKP5}g6=g0kKVz%O2ME#l$fX-A*NU8 zzH9;6?FGqCkPN3NaJl^fE)VtaVGii9W&tkMr*5TGv)wsY=gU`P+yBKoAmQ{h-|^#@ z;eC^H`>%ouEbtqeR#sLXzPw6o@|cZlA!QJ{83XT~iwwo#@X)R-4LW5yH;c!@^fL?J zj^mPb&yzCRy6;J=?(J+qp8S)Wvn(@V_c`ug(>57#5_)mQfy|QZa)%~F;=-jme%z^Z z=QJue#zc9Q{&%jfw?O9N*0t>RWgt^q13iWWfgb>O3O_);*m?Ng&vjcI*RkH_wKRDv zM%F_ICOkLn1&5@DkWNMr&;Qi^>uZ(piO)(xwEq~?d{tu zkUqhPnV6U?EG>mrR$|jfAhEm!sl7i%;o7x2A`y)YHZU2<;G8pcb3cXu77!R%C{2uj zjSY})YU}9eq~H7Lx|*sku7m)U=A5Jw`4-r(2jI&0?%!{vLj++e#3><37YNfP#l;6=ejSfLdZ7*bwIEgI+h``pt1{z zdg8ZlcKW&+8o~Rki|$uKpS&Z759diWg@ejY+tO6K)9B~loo0KrK|qJaTH0g93YY@Q zx$9fE%D~BvIK)YB&~6KT^(qC)Dgb0R&f#KXvw)$p`0!+p^>2pl@TTAeyP+y&KI~7b zudx=n8L7CTp*xvIauyA!uX6M!&*(oLUm!nx9D?GQDmtbI+JiK7XzHpY`lB*# zzX7F@>%b>+an*5Jh;EAT0Skf8GJJ3kt`n+1Y3GDZoQm0y^>w2+%P# zO#-va1KJ8Kp|O(>8XzE^T6(@L9`d4sF#++Q@bDyHLj=J}fM9|IJ}=!?kOo&U?=O0| zmyuCNMddjX-Vw+Jj9bLA%+_sJ5k|*mO!q&2|Pad#70;GXpTRitjxLtIT8kN6_|umCr?&q-E?%!10J(?oF@wU8m2%V0(oghD>>;a zP99qUN-}vKt2X7JBMuqgiS>P3*QHka*GmdN&bkQ|8M=Nz#l{;9Sd@)T z7I-zM_M768yagaZKGkc9w_cj2=u1jhua1Zc3u{5zI_NgRdEn@|KLElcLeRZ*;-@Xr zYSPP*<+K9#heWOFGog`@0-&W0e2MYc#+nI7kTLxkPIg0BCSk_UO$&>4Cf{dr+($iBY5Q}G$aJLa93XnfM?xY z)6~2?t@%oN1+Z7VN|3F?@LM_=Kq`T7Zv@u-{Cos>0f60Kt*)yZDI+Tj6dCdUMr>>> zbbXG}$udZV&dl}3Ig_?n%Ii-7oP8vRpDxX%in#?KZJdB+k~!QK8^j;joX$Cix^)YZ zlKs{LCE5VXAmi151enY7+k3I+1#Sq$0M9vqrBkJVk%lB22oe6eoHZ6vqYOx|fJ_&X zkul&4sY~rM_^AoGA_XYLd&0*&n)TUj}uRoW6OdJAoF73<^d zTTayacfKvdcT_}TR@@FG$ij>=i|8c)pb#s?0vL9l>rdTth%fK5V?=G$pF5zw^5aWY ztB7CpW;`V!aT{jE<{=%xH2?LV!(n#>IpyDn_$KE5`Nw~EqB1+cs#ySoA;k!=-`@KD z)h)gGmKROGumXC$5qv{g*=cpaMLF1}d|9W@QmG!YctFyK<3mW5Pwcj#z%dOxM4(T+ zLZAiglhLd2TsM(8?}30sa9i3aR5DaWb}j#-&n(QO0ZM6Ene8LwTcU2_;o;6m)%BnAyKG z%P<|NhV+aK6+rt3df}ypUhCbnv$G;-G*Qf-W$Wxx2NC+k+h0jhF{sC?6WmNPJPDG{ z1NGdH4sx3SZ>;ldAVYz)+?=om0S>}+8xpb*N*pq3 zge9ZVRx5?TsDqcj`iqiYDsZ&|P}PQF)<|ujkgROEcJ0sb>hdFz1ke8O`FVlvz1zaf z0HsLK=N+cwod?Daal*8>N`qK_bNvzc>tGUYhr#XJr=TvO6v8)^njG5(Nrcs`3SS5k z7QnQT!1`d~Z+ydvRNID(A$nc<0Hz6!e;0b_QQy4WHG zJAjo?;Wc8v-Y0{&0dVefm2?bLD^)IEz6_~bE^9U^_cAe|fZTxTWj<|i3SkLu zL5h#~S{jv-u7;Bn6cz0PV!O*I`KJ>-{qzTM@Siz8FTwVWCz3`;JiG!=4fdQZi&`y( zM&&@(4lNaNg5L*7a9WR8Xp07lOpWNd_yVRSEjv4V7ZrSB7T~Z8dRw&%hM1%Ogq;tN z=;h?XoKZXv6cH@JaYmiFdqv;3*BGiIW#;x>4Gq)5%UdJXqWgn8`2P?Nrsw~K2t~<3 zC74szT??t#E{rwwO>ETDR#z(x{*D34_x>+qK>s8%-AE~&%XR3_T~0{h3#kNy^n>*3 zFB~mU2@pY77(k462fovBz^MzVcNLH!em;Hj`dKCHsa2o++=ej0hKUv%Vhwwqu=9mfE}qs!|P7FW`R`5LZ*7m=dL;A5(vIol4xyh zg*?OFXs9t87QZ!YQbWj9IKLCr*MaWuCLKC-2=R0#4dEAi>Ho;ZlE?OPZ%w%Y+a}af zghDr5nlHD7RM`vdvKU}<5I2X|)5>m3Lb0_?Jw zhWy;o-6~xtvkhwKeUm8240CH6{-rqE%W~vMokhMNB=Ovy-~NQ<0dn?l5Ywl0Z(U<3 zpsZIKKHm}jPr4zanhI8*zH=WlbZ_HQ4Yb}^`;LT;ywegAMY{K3V}%gR7Q`Iq^hctJ zKvru|>xUu77dU(N7OauHu%D34Y%|3)_$MG8P;P<+5~u#lmkxy5f6&ec?(`A>282ek zu(Z_Ye0`Vo-EH9)n2QQdPWeC(UIhM7#Gt5U81v8kY)i&Z0PkJ;1Gjtvw-2SRbFUlW zXLIkuoW8a;vj0GqJQ*LH0Qpgjl+%rU%)%{DttJ5W(W&_lzSTuHQ)d7}nI*Nx`x?28 zg_&~712o;XZ5yEJ_M!KWd6g)u^C(A0$J|dpQ{o@>6PABxTH#ilXVL#fqN+X<0u?)e z;UaUktUjiVQHel`meXkz@Q4vSkUb!TWU!XLemaz;AgI=#*$?A`#4;EFsH{dwIwZkf zf{PC0Dga`p;sfQ{UfWhog)aVtZvg@SM8FofNT_Q5H|YGb{_B4vIC@ApuN7S=*g!g~ z0_&+{BtRP2hHkF^^4Y$d)OH-~i_p#Nva+fv2Dg=Ofo!Eacw^Gt7TZTHBjl|(gyP?ZYGGH`>Utt%wv2vyZnKs*Bta*q} z#r*MPZ*1NDwlI{D(!x)$RIzzOj+cNh(<^s(@~0#xCQ60k^pHSd{4eITWfGSM7S=)%e%^*gG3-ZkH4>w8sW>UNXa zO*nb{aJ5fwy2?3frN`^ z&x!_8QrjmJ9_f4>3D!GwMR&^_i>2GJiVPx9SWT%506irdfB zyu!HS5`S9>CEz$gH=%^>X!wnzo%_z|rr&3EIu|K(-<4cHI>jG?Le*94FMJEWZ*P7d zsgNN@oMtx^EQlL(drIGF&zGeuFeeGx(y;||4LyZw`*3TpO@WPm1`-WDW`Dv8HKRwq z%jYIeTqu0NVH?9uJ7A zAHd;5af9iy9u9i4S<=(;=Gi_q9}AEPdKpf1*`L~^hug@M<&P(YS}@&DIp+{7C`U;Z zJU@}TQl)<`O|>pXmY+aSyl&_vzx!{>Jmy1+<@L0LJS`%Zh!*kluSyy#gB&$A0XojA z0oEpV@P`8d6?%}87C~~a3w{@r_0p4l^a$|bfVm2@ z|Fz^BHx3`CGwmSaB6Y)_+KbPI^QXTFMm_hER$K(-CD4)usFeEYjgi?cx)w8q(LF4pOuElJb2z0sCYN>}bHFe9l6>6G-lik2^=VI=r;)Py%)@Jx+` z;isOm&=a0_89i9;o~yraZ)L*4AffC(FYR-@I$IH}gkYvsKC#Em-SO7yWaL8GiQAoi z8rocgaU@xEz1iqIPe{c5g;1~>`=M7qOA9RmONLy@QpGA7rsN&LPZqqk)z}E+pDJbv zpK2LC>25Q1P6Rt;@}j~y6hoOjsTZ}vPrXOw-wVxCVJL30(of2YVPj=vXt~FUgQBYs z9`q_Nd^?l6vEe40^X+UY&!|g`glioTX&H}q&OxCDo?O|&Aex^nS{sEFZH?1z_!V;8&PKs>E~Bj zf;&d@>9lAzdndn^68)jXP>f~b+ zSs^r>=Znft>EFiJubw2-K#dJE<2^k{c^Z1s-eA4pLR85 z+ET@7;Tz*i>`;s#ag{~4!cYYNr@+Fug^-S#H?7FV`psqDUSxXz`N~Y=t+wY=y}9BF zXRCcJKW5)0&(712`PZC)opog!RB}&-2M+=7-CrWBj5S(+=l;vi)U2i;f<@RZ6u%OA zS(Ljig1(VFSF*@Rz2Ejn-@_!en6%ySZbWi&auZ5xa)OB(NXwxex`~sj=`@{QQ7O;) z$_sl7NZ2k+iu}fRU9WF*=+Wuv_<4`7Q}PyeOH4}(3yBNgzO~?pdWrn8w0I-zdSU_f z&*I{sp*x1+BsOT&$xe0s`U~PFbGi)Et6)2vHSE)N#(aNz&dBgM7K@dob#N~)`qC5} z<|wvQQ<@B?h;Rp|OLJSJk~qvhkq_^6&w(_D8|b>jc(KMJ-@>Dlv3zAgdGE}iA->{! zQaDw>@F+e@{rwwI9maA`V~HLLw1JYkGy9L=$j#wjZ#MFq)lAwjVl?BdBXwDBGDtg= zY4Y+yq50N)i@LtAL`A5w4{|MK8K;pB;(9;}SAyIz_7JzuOB&t31KyZemK z$$DJ0|7d6U@EYpc6JJX@YImNfxLUAMwa@;7mR_0YKlM$KKSGqBPh`N zETkst3{9cF4~Ku3Yo@<&ugXQ(ZRKbqt*2b-;X* zu}Smkz)lsV^GV9VCr+N|5I=Xtmy!GBXm`GCU9`oQ#`A})>qvV>YGI9Eg*tFWg(oC@feA)&eB3K8yuenr(CJ@kS}1yR7ar--*-TUY_(TpOad5ndVcr z&TI;c=*yobNp<{S`G=sJE+(1?e(Ff|s z26xLnl!Ke-(>)y!9cJHBrKHtFZP(Yxe~*_6fB+sQQ8$BD&CXuUU^N#v$2}2s=P#YJ zxZlBrGOT&%<~S=5;*0X~Pza~{{lrXCFI$Yt4-fLIu**>lU=P-0n$T{gU}GPqE_xPK z9igW$+9w#|lH^R&>1XET_xk(gxlN0Ckz3i;*9q8Tu5XKOj}eu(W5jzbS$$N)!cXxO z4HsmUyRT>&8pednt!515J{b1o;^G?!mAIF?czeZ`I>sroHaou*NSK$F!JSrkUQ`78$YC=Q>CXG(hE8K)!3nl|_l{0YrD9|# zl(%FiHsN#vYyx4J{6o)^c-HOnx5*RL)u`CZSMN*42+a@CgMFJXMaq;Y`CM~~9Waq3 z7t55OMc{udOn&`FIXmCG={OlCwX&W)Yemz@u|7=ff=q$rz*^($`@16OUkLJxE7MUg zPexrdvI{)MDP{k0rQFxomqf~t($uNs9C}l&aDQ-3_c112enWAhiNL>0o?IDd(s7hc z(%AK}^UL3d*`(Gi%GcT&(lYZEgGr?QV>5kS84|5Xv5!VxUDc-H!@`2|=0=v^-L|k+ zDjc_4>ctXQC)6!tgim+EdJRqhJn-h_NuTsi=_a4m z*Vh-N(6=78uJq+PpBkBUIo;>_9RkhjApMfIHUT1Hdmq+IZ3M%b3Ubi{?k|u8pYqjf(O#%rM@7n%HjEDQyRFsV-y7V7;a#q{#V0CXH>yo+ z-uAP^b}1PQ>$%)UExm_VzjXI6r-UzP5t*4SCWqcfOb)adx(vQa#_4Ai61~(O#^c-1 ztI>?adHG0m>QW^k*~RwgxeE_B0<~`GKIob+xfQ>JUZttPMmGd?RZ<$`3$){j!1OQq z5~(_BvXrtU-hqTz*{iTDwI0V~<4l@GEn2t&&bzsOF8h8S!4zO3X$46-T(Mm4OI?c{ zm1wQp+I{owwQnrxUN0KobV@kS6phQT=mTO%hdtU-+8Sx=m7a=8STFPb;PufNR>Sus zCi%9nCJoj^I?wR+^>s2ScbRUU=QT+4^r~=5pCe;k48p(9Rl0Fy=IeDtY)vaIgXeVsf(XDc;oDO>k5m(NU~Jq^Vcme z+EhaX<~=ts)78UF^TWDjOwE$gjpHG!=h6$Xc0taiF^w-8JErOM+$`aSnG3IVLB*(M zecf!X$HwJdX<8pyG!a^M6Z=A-p3`;1F11$PNF>f^w(DcnH<&@0|go?HN#CzDf8niwfaibD;nu$E#tOt zx3&nXc)z2lmDJX1*D@r1UY}MKNp0rKui-dM?I%x`a~uiG^{Nb$CK7L&vZxV3#g_fr zg&(_fymMSe^2OU{oznxhsk|KSh|$wXPiz-hXt3>D&&`i`OO=RGr8q9Eu0Pm91NY_= zBO=lR7*rzfcE$`o6h~uMuhzf&^8KOszKGMYZus2%7e6@8>xz{`wMrshYS)=-KDsV8 z*ab|1OTfDUE3^4i>t7f3%?2B`y!)9I=hv$mhp*X`VRDf%XEhy&jESs^5tV1ZB6z3@ z8CHg>UnIIcUUfE6)9x{7-q)HAKz5G9ac0HO`Q=+LEvNl={GMA3Jlx9F&#Yt_VNaqu z@!Fg_UY&^7bFqkV9=_(d`4tB5Jwto9c3^t%d9yWu3}=pS4G9AS-^WTBE#ri9!>Cip PHCzy4VB@`#+^e3mi3yq$u!9~Q)!sKEvaor zM@O4~jsE(&p=5H;GFr<#BV%lSAT`>JM*KW7!b|Doh7k3+@!@0p$k*;tk~oQDrZCMw=! z>(;rqN>JUzv%NUGv1ngCWWdikKR^E|IGCK7Ih251FrukJ>UHx6*37IV?*Ps>v+(bmrj~~v z_*Vb9i{5@kIGp2#mVDjT(UGBob>}sw{O>});3y(@uZ|nW=_^HVCY+U&2W{;g+q=4I zK45VY&}A?R4(o8+o?w2WAXj1W6=v3Ad#ISL5-Xe8OyO`E_$4l@ske>Lh?Y5|!I&(MdMzvyiPNlwG{(!0}Ni z95Lb`WW|^9whIrB;PG6%&&ey$a|d0gOpB>C-q^SFHgnzfLt^6Lfb+V3jaXyR7vkT{ zQm3bqCMR_U2H9;L99o;2Qo1Lkjo6}ohto>4edOC>AP5$B-kN%;bY|jCx47?OFJkcK zrHVC^qW^(IQM3?I(wc&m)%$+4=UIbO#Yx&56E1JrO?zRhzNz*2p8R`p6RQB@xttu9 zd)*aN(=2-ejdJhv8MAU;G#pHx;T+X8#Wa>DPu2t9_?)adSAG9}wa9$%_0Sr>R;9IH zOw7oOc3Yu+cL8$opdX!`0mpj{^$UmossqQ2TV9c^`Gs8@aOG}m+0KWrGW*SDy|vk- zq@;9eU2nZ@Y(%TAqu0}$|D8Y4weLd2C-`ca&B0;#$B+0-Vc(DNiHx+7M82;X{e`qN z-Oi5sH$9=q*jQx_v-=j_4U-Ak`(2c2{hwsjT&s;a2x)0(l9Q9){^cqtwLFyj+1;p@ z(X3k7u9%zQC*HzZm&jghYBoUggo}%-Op6iI?<%VM(mYG~^V0@t&okWLg_ptO<>#HA1Jdk9ZL;%bjBJ>SL+O{Il`B+R03{p7EC&|jdSGwRi3M4;=aT-^7k;$&PLr17&z^D>g{>B z?x2s?ow0?;S3KpRyp`7fGvx)w_Rh{zZtnXDa_WYq&*IqR>T0{x ziHk+V`yn9#d!o|jsgMxK+X(8u@GrJe;=zRTi;F){?55R@t85nKgpVFQ`pR*OYlf4Z zFnE2s?vbB}aOvxpqs2zlo1506q8waX9tZ1V*U{10d@ej8nf-{-^X>`2ZHTSZ)}DA8 z(GjSsVri+pXum`hRhL)H@rJNBgorP!YpKk9kkG@U#>2YdUoCBv8V!zgrrh~`3=HZ; z`wX|!;oK2!uai3#>qeSrr=ih}mwrTN4NK#@)x9 z$*+3|U7i^2E%%HTlVjHD%AX!=bVspLEpwQguR{0*)YME6InC8<)i;liC;lBNWOrWI zKvT=ty7AW6_u5MDtNH!a2ufm%=dWMmVPHL>pr^kL_q{fJTO?~RdX$V=N=b<=hPi#P zP|t^7;C6&`lAxDnxy21Qe>Pj45oVp?{4zC!w85pN*je)J_wjz;baAW1++X z(i$2!FD?YHqu0L`k7#cv=_BRgx$*XJ#)sssNmul*Z;IE@?-7US7Pj~IzuVuxqe6!} z<8_Gp;6cx?RmY$wJcJoaQnt1NhK6!`lMVu|^Pkn0y-rV0v2k#oS6U~GmYHL7Sx+{$ zWu~utdQ4{W5!1^sz^U}P9M`F`eL!Osz^q+?pP}T|w^7+3NG=fq3;lJW{oXaI<-W^A zzuP`^YR6b4)Be&XP9vkzd<{)a!Y=DZ2kV8$$15hwH6Jkj-m@A_o6I)G1QHp+!TJ;! zcr97Cj@0|yiGU3iWYh4fvEJ+ScsY)z%x2~w%je;-LVLpNk||gEy6StCg2I>fL;*z#YU;3vh+w&_7g>XN6B7NQ{{9$!{DXIc z#alky!u~T}j`6BSusr}b_m|nOX3?zn*_p-FBF9yw)YMe7=~`U->Tti?4dk4h(J%b5 z*j+YtI0)}HoRvKEYaT!A$Q}86>*h`4p09cYypH(NOFpbY0rttTdmb z)%>*+l^~&}ruKh8E`FcLD+o%8=T7S*GY+jg=;s1H^Ygts12TR#d6ALEd-cjn;XC{L zsq#tnKib-&vhxCtpyrCFg*k6%Jvyy+nmCXCyS^@(ujQSmQ5;!T7RSxUx3IVWr?m0s-g*DdZhC?CX>a$p7}oh^;m~hUe)exr#;MI$a z60!VV)|kr<>Lh(j%V6<{P_;NyZ_m|%3_oh&>~y2{5YMxn_CNo4K>~=aEH$Xf7##Z* zrKWllIB--j&`uo9M?y~K7G0g z=g5430w-S$Vi7X#aIQy`tGA6$S`xn-=36LRb1Ui*laqG{xv-2bFMNNrzDhRf%6vX& zyUR7>iR~*avoT&?<+ewKLTO&VK4jb$WIj5^lcJz;Inl1H<-eZ5*zS?A@axz8y!?C= zmkn`OG-H}v0-F1Q=fTW%r>$Z(bl)q%nQwE|u{G-4Z!EOEygnPFhc{R_gXSP)qS={q zQ_M!+SJ+2F)@4Es@5YVepG%mL()4>q(iw0B-oJnE>B!q&!+$DwNku?@$!UT1%i^{+ zk(!#CM%vOzRB3#mfRu(Ai@ei*bC0;#bA8gy%}q3pH{+3zs;~J)bq@{(qTg-icC~Fd z#x5?BU+$I882Jk_g_OVcN>3bDQ>Vq%7a#8!ZB5td)VU`>rHtiEN-laap#Q7XphX-q z717H$&ZU-nLZnaU=pY6D&}Siy>FPNxsi20fB2Yp5%BHXBYIAEgxT+1hkrQgeN71HhuBe%16hP z5TnECWP8JGe`+^6ic`^BQd073mE(`&xKGz$?kL~Qu;^JZm zLp|AqlUv{}n3PKeNlD-0(qjramW^MYOxWAoA04lVp$+A0AJ#Jo1+0&kpB7Aj{c)Hv z!L!N7@2uU>RZ>=#l9rwuR`;J>UzfG8z!Xn=z|3rej^(*A~j|0X#pQkc0^A#NnK|^+nN_LPC3428uV#AM|4i3Hez3 z-hPpxbnbEcR1Ttz|J2Hc*U?Nqf%hE<;Sdrbtmn42EcVq8xwzDs^g>MMn*D|fUPZtG zBrZQ`z>^XdzS3}Bu^Fvob4eVP{p0$|{QUdi;P>(IWrj966>qd1HYaaDzUuJ1-S>cq z2zI4uW+q9i+#)O~Ni{G2^s~u8x>T9Pa1bcI%bLQ9R>PW+gK+r9Rc&o;=Z*1Jb-f-+ zmLk0NkOck3pIluy+EOc(>0YCw-#|HG(5vN9u(Jm%XUQ!rFN>GH?qR{lv_AgJcbaF+ zMnK|oMtgcdq`x`IY(7)p;}wk7Fk%-05EBJI}g+iBkC?(kJWY%G46R>E<_xwVc? zJk)@qXB8fYx_b-aY5iy13YjI=rW}OFM+@Ow`<~lPm&8@wsXl3`&e11ahsw^nx`MZE zjks=uskAJd9{i(@JG+!-Eh3@ivNEMB0ePd(wSITWpPEHye z_p*$jh~o0{6uSkM3ahc+86TUgjlI1g5Ldsd?2PQ~2k_YzRl~!TG)iCJrHc3roAhpT zs%E#H#K&&`k0LtO6VXE5z9}_(sf2PDHq@P1+1AqYzl9CruU}*NoIU7RA9J{3Hd=HA zJdy|#hHr{%GYC}~lwmQPyZ-*Lj)*J+Hk9jXM$Di7Gz?A6Y?E=_@nV(2&DMAx_T%+Z zX2?W>Xa@%eK_o&Uris;uZgi_>B0CQA3kyAYnn}B>S$Zr^YaD1-ub!cJ9Ps4iLeevr zCl#&D8z#niQ&qIm25}WQGd&WGAajxU=or@DC{UpJGqTmt_3PB&&6}R3F24n16}sUU zp*`Ox_Ll6AuSMzY#}9;$_(mo~MTu!>kn;=FNxpnJ7o!)gkgI0kGsWD%0EY$xbca}) zc(9aQz3#I1XvrE`oZ!Ki>6yi8D6#c_0OEJcN` zxU{roY>WV$)8y0?6vH1qJ;9@+8U?Rpnkrwv$&380LF?pM?0HQu#G`tV)R;|$P6BcW zE2?pD5W8AOPFR@f#tpxywoYlkgoM&+%|sL`2n@H7UT9VpmvqL}U@DTRPOP;4=W1%0 zup5sa*))oZiz^j-eXnvQkLdW>QTlWw|8`6!ieRMLT>4%B-^E6 z+K`5la$$d}7$4LD9v7yNpdb#MfaWglV5-pW?x7m%zOJtC3mVeAob>d#TKQpTo_BS1 zb!l)gOG?67QB{B4=r+#YXaDe8T=}a=mv8IDxQ^D{)g@9m5{(~`miAaWBVcmU(UJEq z7M8V!X7^n-&=L%YVQKIMJ~ z8QuoUjThP1QQiACsh7+b;f9?ZQp^V)*48^SGO(R8t{a=)A$QTC!X_(erVTGA_Z!bG zE`I7TZU(SWvI~(nI9S^4@(tLfh$RZ=dRYuCJtIegC1-J&)5%ExN?SLDr(fFXjyYx$B;nuR1z5?y&b7aKaA$9e3+#*%wUev$L~r)pCWI zgrqDi)*8}$%khISIS3g^nV;0EwPAgmhzQI`G|JwUInmBg>hLF#YwlQW=q?P>Os7PfJ#V3 z*0`=W>T`UF%57~@bDC*V1&I<5XfRo6EJbZ*?0fZy^IX%7>qpo`UU{QG^-~O^VvR-4 z>UeDD8F~n>FZ_L#zwQ3}`qQTJev9Uw%GO1D9f3&nLTY}az9`g;rKdQ)mZxv|cxR>INur2J3v)dIB=USGT_GrlfdL=*OZ?c)c`jKS5> z?@sh%V`Hsqxc74k?BmKT08*^aG&Ix(VCT@(>Go|kT=srbOsjI;A)||0tH;yDrfWOe zS@`_v6HQD>XFeq&(XpOzE9kb~QdQ}{>Sj~BgZ|87=y_WIZeUU(jub9sY^pIf5fRkHQnRV=&lD9E$@*2UkagS)|Ng8A=iFm^R=ECTjUGf& zYr(6UpIu$ppO+sTXyo~M{aaHtG&17xIADlj)}!L#LFK9!g6t4+cIE_k^yi=Zki|3m z>L(Hs63Mn<&JbaqUS2x~2W=xGG0DlKA|fI;v9Q{J7|fG<>;pOlgbRi)_%YY~)|Q@? zhp6cF{{H?ltFe+vjL)}l(>f_paM=eN6YW3@MoUfa`C}2Wg|a!>*`>JuTjS>ConKp% zUT6z0uB>eD?8NjFc^4df7hdq}%o7dF9+V48a&lj6YPlk>Q|AJ`h7ed#stCmz=QjXQ zd$8sCh5!D}zjgO6?o3OyJn(~glxdl&6WUr>Ey|d&vMlSX@a-smghS z5LRn>u_Gc+yD|h$bB=nUTwkJq+rgOWDqt>ys_M_5qk-yf0fX`Lr=(heP6vdw zq@-lBSRfuZ56_j@h~RBAcjJg4t^C(u&d&A+<=4kbG5GlSz=q%pdU6{W7=Q}YGH!q! z09aC6S7-h^k3b>T6|R=Yc^w}B;mYY&Lp!_ypgmNRFGkEnrDkq8>`aCARA-XUg;X%% zGkjLV>kiA^1|8v)99EvTiw>I7hI*A)Ex_kU8NEJq4hO?#hsAI)K4Vq%)XJHP^` z8?=0+i5hqkqQgWD=Hl&JG+o!Z8vsB@Yh0Kgb6FcVy}tpk_T_NeBNX;h8qdh26!zqI zzP1D)qN3vBxnQno{fo0>Kn)+fP7jJb4y|)kb3fMA)eWU!-dhc;u8I8erSZ$t7tkaE zC+R(Rypm)JFB(KF@D*sN^uJ{%C#R(`(KP7(} zTHmJU#>Nz|SL9@5@0WYxkVCLN*CHh&10MJs&`n^Cw5n)^JmAu=_k_;AK$PwD30-cV z{4LxA2Q`=`5o*6`R9wdEw8mn;D7oC5AR{Sx6AWUyVwyOFt=QTxiWCJJ4G4{wUpZ1V z0Sr}F8`#){#>G8G@=mT=eyZI<8^lHbXI(Ls#L&x!R#V@jU?(EN!h|7z{DBq8uqd9q zCoeDm@eYAVrS+6Xy(bTlhUJVHUBl^Gx2l>N86sUj=%(!joJ44HbN&xM`!2e-o?em z#hzATW~(rwAas7!dALAHgjCi(F`>Qog`snLIx&n~;vo*s4-68|Mg&TgnT6pyQmN2J zQFX-gyM;ryjS!AlTM)sX#?<%qfsEVJ)6?B3c7Hfqz_HGM9thlO-k@t;8W=16?}Wis z&DKHe)2>9;2AsvXv2v%(b5^4l5)M%P;N)&7jY=A_K^M={4WZ~*a zO$7x7rX>}0G2i>9z45e8H1D?+C}h}x?-H%9uI_KmT&8r5*LiS(w`AR_-Rr#;W59yX z4f&W}yy6uZl6bzP{0vgz-EhEm_V!vvM$`kQ zVsg^2*ecx%upBF)T907g4K)F; zo~m(ChBdWk)V@#mo`Mkt9{cLmtDSW^`D!wd0j{Cn(H}1}*J z&~mgGAulN}Z`v72YsT^Qbi5CcX^)=Id8=h%U0dV3YXm&@6sLzs=ig7IsBH>{%cVrdaXkth(A-ZkNp`Xm)mX(Eh5wyf`)N1KZx>cWhJke{DF zKw!GeTsgTY>UZYLI1GUg4|%w`ZK2g-&!~K#PTG$emC7un_M@>{dxcy|R#r4aX{YV6 z4ZjLXj1g?uq+uu@FK?<+cDGa+|I*5ekU~QksR}w=$s!OJrECu7Dm6uLfzTddwyTwK zc6LT}fn~i8y^HD}8_`up(Q$DyP)=bJkQ+!e214TP?X6Se3?ce=WyQb4*zkCFsg+n> zPHq`$5iNUrI?ex{1#sXI78V{IAGbf+o-+*1%E(CRH|w@!jd~Q(@#Ej$ku8mS)%(P_ z;gikQBL!1LHUs`q@MgNY#O`&|{1ueY1cEXKJdS}OsIl=m2uNSKETjZLzJTW052&+X zI|rxTD_0NNpAZqbEk&sx?X;8dIIZD8PwKipXpAz;k&s`%e%T8Y{P-aXF1@QRN#Z_T z0EJX0lq&|k`X}XDub8k^pfCqItn|X6)Tpo`fFCVE&_E6~gwt1TPgz*IiwS~N5PDun z(8LRPL~n1~PJ5refw!%;ox2Kx1e8Go|X z{WxTqpsV1y*TZG#e4&jbsVI8&-HX#LYGIH_^ED0qj@&xR!0m63$;L>*o z_TO$!%Tr=d<~I993KYflaLPrI28aK2y;R9xWe%aF>!p3aEXn~%X?q4g|Heiq7JdaE z@&UT;n zyj{=xm=dDPq&u7n z$}v&0;*kH~N0Gb?!bP#tBjL1$Ha2~bBB1wn8#?hpM10W@X~$v2RgbW-TOdeLLOz0Z z?*E{%I*7cLZiZFIWDazv;<@eKBk7t}IkU*53vayAMjm>X+q=8rytN)MfAK`nGZutw zZ<3HaT%~4>Gb0F8C4Z`$xTyow1T;FQe|orPjDLQ4nx|X$HB{&#)@8;EaS7uUZ)D;) z-za3ODEVv3YH1NdTiO8Hzz`t)pcTAfP6kvArIXovg%sk?6v~jNSYgXf4!8OM|7(_+ zJ%BoE2r~YIzyBcQJ7_uwB_t$J>`+os*)RTl5CRQW9i1nDD{4>Ha_&dEjz@P-&_h$o{>yu?-n8FL6)816_Se&^6BLb$7<`KaPj0hx^sEP^>a%<-& z|3DDx=;(aYETw|q0vYreoD*bRP-$(wy;xDwej+K2ZEffg9ioua6u7ISweqRJ+2rLh z3=9lN!X?nseg-F&I=E_UYm4}WJC8Yoz`^{2e(H`Bns8Msd6Kd95+G`jxk&#=p{i{V zG#0nRa?r6UM^yHDhTe(&k7#NTwz@=q^Q26lS!+ktF@s^KwN#epf-fxrUsDxK1 zG5QtUKa)1dz@xdM0{*O}&|F?IuIjumN8dU=o z0f*e`M#|Msz1M{AGe}bq{D1klR;-c#Ja_ovL*EB#Y7+oe5)u-Np>M+ZTIGPKpF-8i0x79P61|Of2mRXT&!2y^x5MusGDYg; zOG-w@;5aUu@ULH`;Wc6UB46$Hb8yL25gj^Q$+jKFVB27a!MZfIv|Mv>ap4yb$bMDB zd2w-Zbb6``y4;uz73t4VO@}f@Y*e}BBe3bBVqy=lv3+53EE{m(0>V4xSyZ2WXK&zMWcH8^^@2% z9SV%B!~Bn1dwr0|fTX0l83ZUuR##Z6K7%d()_$(nA%6c-vRnsdTX$$=^f-7gxbM@zE}b1Mgn~_Dcl@L9@g9k827Txd z3_kI>Y#PH+2f!Zw<;zr;3={+zhFy;REl{T47>p>Hu~Q z?t84kM+kC>SnjY=rS+>sp9?ql{Xb2>FBCJhT0@P)q0h{qTN^7cbRPNP!*zrPJhQ-o z4rXg-XHN4%Hl!7pC|m@`Obt;9LVj;hUWnE5H$$S^GC`(Xf^-DcOFI6795}PZmLp6^ zf(0Tb1C9uy5$URe>VTT7kv)@)M4P`qHZd_4z^R4FY6npXiBiK>%-20%{lMcgH=MEg zqTeMWBNLhbL^4etWgH|kF*7q7aIJTq zaDPEU7#smo-|J5|`QS{qLwnrq{NIS3lT#G$B*yMf3e=xJX5vBk!j6s{M10PR=Z7=x zQ&X?jilixHx&RoGf+U3B0D|xWx;6L$?i>KnvbU+u($=65cP9$OLD{_lCl_v~1xAo$ zKv-p9^^ifv?Dq0_#H6DS#z{7gGuV7Ea|dAtn>AZNwwRq_4>JZrBLG6kCi0`8HxJq6 z7|az+ahPSuu~b?@;bwo`JzGAaYXnRK&U2B=mTq=S&JdGM6@nFS-o0xA5(#05o~Ij? zaJxmFRIwAo*~bnzi%W$|Nd288nhoq2I12(wZ`bdw(_~(7t z?trAEq#*-=`yd72a!~>voY0a%lELa=77R#cVC5^p`CZzmoS~wi_~46ij}1nw>d$s% zc%9deMyHEQ%gg@*Et`|oLHMliArUG=R8IR`dVxkt$J$Dy+gMV96$bFw3cW#)SSl!a zW~gXwp(K5f15liA2tlSS0>u%ANhPAMi-=jCcK^#LON9-(A_H<$)OYO4R9YNNkcjON z=S^?~kCx+XG`>613Z3s`L;V4fp`_Xw-3>t1Y@~pwzP=tAn^2`{!U^jzcHEjykcyx} zJpOmbRiKjHLSCS>8sH7xSB9s^b2!ziper3o!()Bra z8nP%KN{RY5K#8jJI2o%7qNkZz4L?a2~G{jy}Ot z27qt#mls=?#DVf`LPA22n}0yU`7_gSX*4W>O+X+8_^7C|(iF5aur!6WFT_$nlVLQ2 zU~zf*E4H(Vl6g45!A#JNM=)#F%eO@uqyF_5LvLSSC_s%;=r_YT1!#dg4{~c3A|f5G z145DjMsl)WxFcT$hYFs2f_ydr`B6H`jIM{z4MYnlN?*v++Tn^dO1E#_N&(D?L^5y> zNum(3Hi@%%pAApEAKMrFtB0Mr9JK22ddB8Nr5-6hr*| z{fFixmjT#Oa&Sa)O}nOb>T)K4ibq}uAQ-#r_H4U1;fLK!1sd0t1Q$e+fDHyjCqopz zFV$wu#_E0MO2}y?3F##r#5^>E0HZma?3+VGP&sig$))vg56CdjZEPUEo*_{`Ak)Nk z5{Hry3LE5Y|0ew}9K;TM`Lb50}zy36dI1)b3BNm(b!p8t{T8~YtIiR z)~9M>lDto(1~e#OUoVcx!BVEfWPy^u64fJST?#t70Dufr{9KZ59?K}xPobf}lsn0m zWS&C60>B!C*sHCrW%ynQUR9qB6+meYiVzW?{|;3};eFn{d*}G)7ox)@$8#b$I-r5I zgCK^2SA4WBRm@Ol95=>?Q%|@5{iEdOjs6v{&)#?*fQfrg z_@gOey0(t2wV`bZK7a>g@={4?z&DJg6p+QhDx|eyz;}T;Y}04?1@ao+!Y0~k?UDI- z87+9O(j)Ip2@pEK1yi7Axw^aWLLe4F6GoP5P#~$sen}dff?fu?x*Brr1qB7b`@<3M zhUc4ZY7QegLpiFq`S@z=nfU|-6Jf1p|NfN$y-pbX@t)vOW9>Q2T|5zR$MF+s^1J=W z^<*^*j`1}#O-)V2-`u-*&w9F63N8Sqq8t1BizgQtUMeWu?}}jt=2WlrN}4W88fK;e zrXijSwhNt^nfc^kLk7|;G(-Lo^&!VGhUt|dfF``azkr9K#w~!p0)9YF-ctKAUk#}< z2b)teFwSK@UCX23PE-wyAC_bpDvsyToIj!fkijf)@d(9*q%0G>VC}R5FNXM8Xi#pi z5RZYdo|Xzhk?sOzQ;lm z4gl9q{$lnYDWBT;}n4h5nUUZLB^1#WVEef^tqR*2X<(tPnb+rfaT zDTBthSCFy|e&cJUDX=T(2?3!{%}dHG<1GRGz-cqx4RH8quTN-ox{eR_ffOtP`BpfSw1;xtB`5N9I1BLhl9;yH@&{M;L^0db);jdRCJ@DYzR5 zr7M2yZHIsQuhCfTGHaE6g7PuudAl5F13bl$I@)anY>Dl1267$LPy5h4Ybz@&M7BM9 zRxWaQnjG-37j7$-)d(HfMh0|mJui+|0L;w8@N<~BAI^{YdPlnhVsK_aZ6!;H8$^ zMMQRPn7lcNh-saWQW|Ry2h63J6;k{YEqg%D~{@R%15Y zzi5t*j>gceTb{|1MP0?|ZTJpt5Rd>0ncn`puT4$cffNw%yW09~)o_>)BxnzjAQw|c> z7{m?S8cY#~!Kc0XK<4SorWp)%b@dMPWnkDa`O}m1cIgdV5pq_*5Ze9u^%Tm@3s?e3 z24e6)1sh8G-1r$`Fvwbaz{5~E#GyEI`&`tD&DTDLp;zd87;MsqgQ=hB%syeG8Q zODIR`0DIl?5IG)L`&3wHlik;E#*tyog_1SN;e-4;l&c;Jf_rXZ;RPJtP5RAK6BhiR zot@3??d_?jw880KY*0|CXlb)2yM|XCmo2A}8VA=3l82R*b-Ak)YA>#3o9=hV&!9kI z&l{m96X2CfaOWdtgU)xLMN|>lqyM~2R2((I`L)E^)JuWJ0+enForh%yqK6L+bv27? zaTAh2KH+*ynsH1!{n(#clQakW__Mb+6G$_qbwkp3Xky)iZymwJury;~tM7TcK(Ct3)Yly|X`26=wRXyqgpB}uoeY&0kA%?fXxrfI{G3udL}Gvf zr1WE$tA@l&e^|0)&jZz&HY_X*>8L`7Wrn}r06K_{o0BMn0KvF1^f~e2*#k5rbAaXp z<%W<%h&_blfqFEA1P>fL7=C``J}z!6EL^96^IK>-XqA~MuA+MaXGeTqh0Tndy*(S~ zp!$X0o(+gM==VK_x=|q+XxseZ79zsIsj;~4o290uAq7=l;2#cnu6RC|5J;(4kv=vg zms^u{A zKf!QF5lL6yh6Xis*#W9w7{H1v`77r@@H*@+N`V&!p(g<2xJybO z5-2>8yibzDDF|qu6#biWnR!4!pt-uNSj+)=AMQUyIs++ls^0c*pmc&F?R$IW+7Sf8 zF^qhsW37h4q_PI|1|U3)Kx}(L(^|>jSdEq`CIAtpATZe8jB|hnF&oZ_0?QRImt_Pp zb+Xni0xX#zXYR9~OZLc40aihA+mWdk=okP_HvIP!<4aUjD==-Kat1r|tqA7@hA*2T z&4vcV`YBXM1du~LfvQdBB#D=%mWz63i8oTHhiqF4r~?G~LugCDz2kG+k^*%yg@O+h zOLDD?QX#7Ek9Ptn` z^$No+FwwdM0u>q0_|MNwRBjavV)BOoiRpn*L|Uew0s;(S3LsCXdbdDXR}c%vUNmZ5 zSpn-Tf?V&8;|hgjnCN|Cp8cXGOWLj4F&E?t(s~0W38T1F(5p3s{&(n5mJiHx`&^!L zK|Y43Ws4!DN~GFL!X>f7C4-o5hAu%n#B~eOl#P%Vlmg4W$s6XP3LQ4U*>@m8ErU!t zIyr%GO4^17roN30vy-cd+G@ldoL-kx=I$ zTJQ);({1Kjh1L2fJ-kqrA8;Qg&_#s7wIn!&>=4%$zw@NPFM)Y^k#Srg4;>>&c_6i} z`8GAe_fQ<>;S^npDmUn&0uaUnFi|c^um=*>F$fuCH=$C%13y#a}afv%dH6*^EC`$tZ=0ym$cv*L_ES(n3d} zaRl&f1qNj(*w`Kd%l@A^FN7Y!I;nJ~168WFU%~|c4%3FEAiAM%E(T%mwpqP`OmYK~ zEM#+0A?ct{PIuc|Mp!0~!*Y@5u|1%;7I^A2SSgzVoGZl4+DM_izP^67UTS()Rvi6L z@W)b;l8rF7FHJfrVA!j3e6Zx~;mxO?AoE1U`7;XYFb@yU;^HFAAtTTig+f6P!(6~iIK@A^yE$SW5E6F6QR~pQsS^WzhCp?D zw~YEGZKfR2@aiZKa)>X4)QhH5?}>s#e;*s$=mHpIZJ|^N^n;Kmnc)#l#OT9jCIE4H z_~5~g&K#Kw%i34zZ)yKyN1m3M%x1N%c&K673P-fdpEjx=bJNEAwql;XY`e&n+z>&r_3+zzP_yF zWHcHbOdwCFzvws>vOu^1U{}@DsIN8IDtTDV@WASZGd_5#0)kBrPm}Z5?$EVj=DP4V z7MQGjudn!j{|w+yuN46yN%W;*Fj+@EF~(H^L!ug1vm>gsPByz9bROVP$~B9wK- zZoOCV`y}}Ppm%obOGHFqTG}-N0VE8g%Sb4whg!uNBK8Ag~G u#Cm&2AEphcq{ts$Pm%ubA5gfwa#_ar;xSL(0sN~DXi{Qwq6H%Q@BbIh7p3a} literal 0 HcmV?d00001 diff --git a/doc/doxygen/chapters/images/eclipse_hello_hgraph.png b/doc/doxygen/chapters/images/eclipse_hello_hgraph.png new file mode 100644 index 0000000000000000000000000000000000000000..aae567b611adee393892450aeb120f0402c49d6c GIT binary patch literal 20117 zcmb?@cR1GV|Mz7S2@%;L3EA0ug^<0Iz4y-ELiXMyWQT;1Rb=lKvUm1MHqYDlcl>_G z^Bm7V&mYg{IPT+ibYJ)7y3X@`zTdBP7OE)!0tJ{+(J`n{?nkt^ z1_ol;_EL!)99*w#epv}&V%>>-*Y>!2w*x;}d$#I`6yc}I=(CerQ04FcxE$};gMpr* z{95~w#-FNO#cR$_=Z3j5McO>RJB8!D_Lme1273ig7ver@a((Oy%xn8@&n-Rr2uYF0 zif=jDy|Kg&St==SE-SSXQQ}~D>^-}6e(H4KR;yY>>{wI8`_}r+- z#O#_Ja?v$dIOxmjpNmVZ+NW-JYCcYs30`aWi@5u#dd)!3!!CPal01_-q04y52Hp7c zXJ+EDB^wlD*;L9SooI&TU`pl!qXrwlh^O1z+xQae&foUAP}wv;L=z(Kzxg2Z2uBaC z82`V1Og&x59PHo&|MdPdO zT=s}%GLz0CZTus2$JSO`w_hA`@zlI2>bLL6jc~~23JbA>v`O0^6Ms_zp)`%uTTHJxV{=_YgtTx_)69BG=9vgL3ekI z;ukm9wRW#=IvN$)3tt`=mo;GQpjU|=p%n0UAU>JKDAR)A-re0aA`-zE; zID0=gdTtc2N+tbLI;qPTRaBdq!Oq%p@z8A&ukyn9%SM`3ZkPF2&4(^8v}C@_tQQuS z?H?bev+;%m889c;f3B^qEMcIeq~zq4X1uS2ucoCd3{A15UyC&zoJ=2cf!mv^LW0B@)A;>Fr}rCFb;la}R|)`eP4ObY-zH{O_c198^4y7)fbpS4F!MrP}upd$7IDcT!I>>y{(-N z-!Xo2dd?0NL5aS&OY6j=qs<&bEiDvHn<#^X;EAf7+$!<3OO=(0TVAzmayM-|h?$>H z`}VE8#wI#6YVk0Hfa%V@i6TNo$2(9mZ*{-6(cAmY5td_}@BYg0bN3Fquy5MxO|I8S zsj{XVslsz7c~jfWOwC^$MhGz5wy-;Ve|@TVE9#t^yPnT(U0KnPikq(RRB-4riPX~3 zQuq0@hl>zVu*;fR7~BkCPENL)X$tIF8RodOv9-0+%W7_JmZDZIQBw~6dyyADR$Yxi zl=S&5jy}yDQdQBYF`05<5j96aVM$K(t~4SP!Ab5p>PNe7P*y5Zz3p|bV|$v2f%yLQ zVSRCN$r%0E%J7%x?k|uK9fu0rv-Ju^iWAmfOQ&4F@RC(XF~&;=I`dUXd0qVM%I#1n zPUzl9OWWpm#Y0^D^5mWs@_Re>d}H*9ZKju8$2f=&&2uTfs}UVb(1^*WRn%0r|GCu) zyq`3z;3vbEsGA|*=orF9!f0EH7NUypZkoP0^51pj_NwOP;5qpbbT{vQmm*8hh}l5n z`9kxo&o0U;#m*4{K{95;<#HwG%x7P6TE*y&k>9^e2hkiRtSh8Ap0OKQ%J(lzPs;Vl z$tQ0v7mZjHJw297|F-PkCMbBb^9aAf&Q9Wy0%l#F=E?%kv(cv`1+wz*qMMapjm^#~ zxvX@541E>5u<%qm^cEotGQx6WV^k(lgc5=H+4l2eL0*2|M@9TGI}Vb70A&2IKZS*) z!DEjR&;A_2+eC#&{$OR~qj&t}J+d~C8kwWH`1X#9BaMWl_$Vhwifn#-fxr(^d=ng^ef72Irm9z@ox~#79 zKia}=qYJ7_Gdq@w5@ha0)eG9D%94=cm8ut&sjAja?}Sij3OJqpUHGWpE8OHJm zanU%4HOSILV&01mkHkes#STFcRcd6W6Jkz+ZsF`qg-(L=!QOM4C#q-VYTu#4GEq5C zS(-X-borxx;Ou}xQc|4t;HFA^-0$C0I?S{P5t_u{U{o|zIv#86FhhTm$ShU*cp0W& zR+Y~&Kg9H8xjXVoE4{ASp5L!k5U$gviN{`2{;XxQ{V>z!;lp0xw0tt`j_*_bWy}{H zxvrflKiEj*Y?Jw=3T~mdtQ|hN|15w~fH^teJdTnKJ4BUA6!&eb!QqK^@)Q-x`GMRDZh`p!O3GAE-TaKa_53BrR6 z5R|CB%y;u)+%$_hFmtJbmNqsa@iovanzxd=(QEO7|w58mw0y8Xr1b31O)}pK726H#yDJ^7PjlZM6t`# zI%-MCkY>ozdKOVknS1y5%+y^R@zQVmY3H5q-sQygxYRd(ZJIT#*sx9Yc#)@CoY?*G z?OVe4I&}v{w1g}vW?wa7GvrC#*k9101W>YtR+TqrQFg5+B{elQ`P8B?$S)QHVnwS#poWNR=klol*1t3!bW?GLgaH(W6YAnMk=lf|8?)wwL=A&rlv161A^GrIB(kwkx=XGeviuO*xqa+=5idpU;WgPxK5bN?g@VF~@H=35fOhX84Z7Ha4;h?GO(Kz|Lk)El zzF*U*iuOJa7R5W6BPOf~0w~b~WP5w3efQ?;4hdKLo30o0S{T&S$hi2STV)OG{@K#1 z(kDlJ_*nIKBGfDH89-}GLz z{b$48p`LvA`;+CRXzW(E&OZSOiHSj|q&td9H9POgnM3#m%-T~7s;D^PkvPmUk2Z|Q|6~D5G)Z*NDsZH66Xiq+)(=8w0u|JvdkHBDU`dfxlwsfR4 zVf`;NhHlvw^}q4>{|3!*^vp()A3k!uiz@wt^$4+cLLR*v z-XOg3(jtupJplMcPRb{;UhK$maq{PsnDz{xD;Cb15_0H5S{IJ)TXWr&c!m#^mS+)` zV;)A;s2kAMB2G2bS-CFagN*r;D5~bsF~++Q1K0W)`GW9)wL`at-9UDqKfI~S8u-%g z0^|iFk8sf41jx5Ec3-6oC5>4-@@}Cb1-5jce=Mnl0aKyb+w%GKG$}4ZD(;I`DTiFB zIF;UP;6ul?J&(rxCUWd#T8-if>wdR%r*gl86R#rGuo>kX1cC&C|2&TkU`XZE!y2h} zw9_K7kt)LD4|K(fMcLvV_Y!ScIU&Zfg?V96?Ru;L*$Pmq%x(u~AX8>{zMB=*%fzm-2+k2-gLy0 zeiEdT{*s+NG%%;|D0%J^QLur4(7=jg1zK{@$Lu2M?`tIq-3zB77HPJ-e;66*Z7pn8 z?0J-8aYONFPENbZRMiQR6NE124QCYzMsQFoj=$H`y*LoV3{F%iMIW{ZF}S}>f*wG} z#8g^V_H}C8$H(W+ojV2w27^63^!j!U`yC~7OSLt{#l;2X3iPp3KNnvo7#lE_jCHTw zvoy@lt1hjqg#7|=qnf?QA8t37I zS86&V6QklI&N1@*ZrRHCZ|HXa^v`&XD67zzDMUnuy6|MKXwYM?Jk&XC3l@nCCJL*W zG_Bm8Xguz%3xHWfp>t*rlu7P&^QrOChFO*e8zTp5Kgzx6{{DWI0(lcNvyPuXQ~Fln zdH3#}Nl#KHzw1tC?Bkco%qxy{wn8X;h=_?_sj4pT?kbE_jaj~_)2^|J z&0jy-95*8Rw;0aC#lg{VJnIR28mwNT)9fP{pEcugNWJvfcqCtrIe9ZtL$~g#WI*`Z zYkfHHWfEiEn=H|hzl!Bm*Twi9jYn8mmy)?cbW)!6nBOq1xkx+qe{i&}*tfKbZEldJ zrKL|=Cu!x8e9V5RqjOp?ASwBl#n+lR9$$eban)F1v`}sL@^C4vyZDD{iwMro*~_{2 zOgves@2>_IgS44xd7t{{ud_5=SoUyFWqDmx%0iJ8eMh;cs-Xc* z42zU&bbMSPoy+ETl?74on>TN4=9(HyOPQFNZM)*0h-I@13peNGwJq&)aB%cy@@o{S z?)VZWMMlVH31ucHhfsEYdR6(QvNAP)AWQgqt8#!`z-@48zpA?W==fM6 zOUQfiXDAvrS?9n&&7`$%ITT)TNr@Hw283Tjg8<@t1Tp(p+ZlR-@Ru)NK7amPIxayb z4?c`(ZITiZi{Et9TmEe2#28%p-^5p#Ab}k(~Ev?yVZ|a<9v{c-uva+4c-EoU* zzz3BjbDY{sydsDeQzzR$BHs)ggrW&?3Fz@LHc3eM`S{V+Lki2vw%50Hcwiry}J~;fo{zI)ZQ{1YKBuKDFTf#Fwz*;y0vR zUtOKC^XWUsfQ9`2O+rC2AP^876eM`q%kEf5hWNf&o(Al5I@m-^1V@EA_Szu345PbanPs7-V zFm=nJEp_X&z7x+`T{R8MsnTMS17efKo!vrpOol}8A*6ir0^Tmn7 zsy4HBt^GpFt+f1m6har#z88-f7#JQu&i(LVe#Uh!QHol({CG@5Z|2(@D_DfdN;7WT zX>dGJ}&P54>Ssx=iYP4zqmP^kNzs+tIZ86&JEgM4unxk#mJN5Jrvx# zgJq7{T1H4JigSyQ`I~Lsk%`XpTt&KuS1l7I>)V@7e3`ho*L~clmtlsO;Ysh15Xzx% zd+IJR&-d|8L!!wK-2{}2#OV_`xHzY*xd_r9+$Z}qVksSGZEbsHy?u6lc}QXR`oXfL zc^Vfm3yR^iU!$X2({+b=$-2-~5q=*&eE9e8-{wmP9Lo{&147~9;iNp_f}Z~x{{6N(Ki6DC zFn)xtPL}j87NOieJp#L1LBcpyFV zf#JE5YQfljR0nS?e7WPJqq=F^Q1RZC>G@YH9&Xyq;cZ{0wi_tk+XP?*>s>Fd1wQrU z%zpIa98a!pTTBBW$wFwoKW4iC>y2Yj(X_$`M1LPZrYu*S{B z#h2Ot`7@9n$2HKC!Q_!19=eJ;#|&xuvUPg{Q>iDeI(R_ZumXq3OD zaPxlSqPAMCNVjayV_BVMk@hNZaB^~TYRYqK;@N7AoPxr&ZXOa6Mp)RV;<9`?nSwD4 z)EZZGB!pBz?d(`oj``<>+xOf!nQ;j`puO)6X>Z8B?#wG zanra$^LC&?Ti63$UfyxvmvPI93>wf*Q4qC`D`Cn-s`QD#NVBv3*B=p=lrd8!_Xv_J z*x8jsC8zgMOJfThu%cn2$NA@51c~7i5d5p3yR@Beg$5PgcAtX6*BfsYcD1NhX)@bJ}9 zx$lhc^(9Qs?yjz(O}nM1UT5nCzK7{^P%I%qL7ZG%a9W&QTxh7NF-;{O)6lHWU0=B> zDaFp%3j1DNz`P80K8qHh{I)jtW&&Bn%sHjVy^}@9YUlb=+@#oUMho`5UX{9)0 zQ)!a5dWj+(ne3p2nT9KW{kttxG7EUCW{-Ka&$`BR0Tv7<2C)iQBsut2w- znVA8Usm;vI!^6YRPbx74kl1OhUjm3>NXXot{KmeURGh4=?DD>wWbQL+YVq>%*NjQ; z-k}Hnh{?#19r;^f)bacGZ>q<352zE+caWa|Y}7a~GGdW0sc#zE_7mdRK7+ zYj(e@p29(V{1^qHUo+Ka_-thT6=Twe50vm;X=z$2Dk_SKG4J1BO*;+jy9tzxZS3w+ z5E7yx{%mhYMn`MVC-(L|p{1o25D;KvJLpYi2V~$SB>edC(%wSr)2B}>Dk{(eQeLW& zz+`YXko~_e!Eb+`kwI3fo}Q54eD-&}r9~_)A7)^bn}D$6;ULGK?ah+1GOj;I9+#zG z-9SmOX2bfsTC=LG^ens$(hZcr!`3liLud9uk-Di=CiSK z1cJhA??;YAxGm2H`2&EHWjgiw3R&bNBpxAYwYSEL@ev{;-`AZTdA44cUsa~`F%np@ zq971{Ju70*tXcaf(idDj8P&skRwQx6Dd&4W4vR9&H>6#nHBWiY&vPEG5{1#wK4JE? zo=oMBb8P*cc>Fpe>Ri= z2|+E*mnkolCH%2s%G?1Z5&L#c&$eUTPs0?W72Soe^g+|S!MUJLX2{beQ{h27%3Dfwxe>(olj>);ws&_s1W!Kv+8pADT9V9-?ODJy zDkgEP38X|1Z#!75?X&dv&o{bb$VChkD!h+i@AOmpxlxJ|^TN@=snY8|a3g_H| zEMJH`5kC1*yC~eQH}BC${Ve^y@kpos!xoT=u_7ajy1KlC%I8}oid65r#;Xzvz0~u) z2LA&rqJ!WoKJF|4B?9O}l%tUz=jioD4X^ z;`D*e2O2KEGXh=DIO=hq3WNQCQ7kyk?L2rH5* zcF?doIOr{=@gLUIUP13hd}-;t`;l7P&UJu1u+AtEerO$2Q~Y89SUgd#Z(R9xn2W@7 zFwylZG=T%ul!MC%3yc*51uwjCj566 zKolXZeA`=5&$qeD*tBxepM+Cl1iVaE{6v{NGOj7p$~P{qD)@OZJr5k~hKwEgbbgCO zb}X%L>KT}nCkeIm?9NiSyvVY!E4rX2%2fD5 zkv~*|{0qm=pirFZz0eyOSB2f#OEenOk6E3&feNwNS z1RL@7* zik+am7Y!L}HSUD>@9b=b(>|d=y|qrjl0tZ&Ok3S2FS}wtNR45|@f2K33})R7VKeOH zp03oC2M>rHfh3@QhP%<Vw;@R1U5r}ZY-txlYVms@L5elDyB?sZxHCMfpPIn1=?QAVh z|5kF|4>caTa1WGSy6vw$a7>>!Va@%7a)vA^^+~Jt(9Rd0<}`-<3EP79`$0(Ycl0@M zrtH+iCk+?udDPQOOA5?-;Jg0YfyOGob}aq9MeO;hS5sIY0miM%OG!UU6kLVT?_z@# z4#?jWxEUK0r*Sk~-GMP1+D}-t2Nm@Vi9uIU2 zdWkRw}xBXx@iU-oeVn0Pu zzQz!|xGto_GEZxFXeg;)Q=A>j*a}5*l=z@kY5t>O`6p@s^}~O=YC5m=cAe`YBy(FA zR6*d5?pSdI(o^3$G&FV#A%ghMih1mTgzzWnds|z(rXf_SRPs=s76D?dpLTjF8RGMs z^Qh$azRW_*E+gY}&d2`{h&a6aE?d);HfuX6h3FNUTf4i2u5xk z-aDCY&(9x>(GQ?Zar^C$@eK3s-8%*(#U&IIjA)`tZ%2Djd_suK8SRJo3vtZ3dLq3BqC`VJwOIuRQ(#3QYoSb5-&7V{T@-8moC;G=P zzAHNaPRM95>KOlYjob=?tm*HBE|j_8kYErT%cnlawl26eimOYW6;-Z%;+lFnP+nYI zBK()K49_gX~Fm3R71--e}e zEB5Z#J0JgzK@H8Ji|&BsC)_)Qy`}a%KO@6N3(L75D!#KjH$JBj_pxW?wqFtemDqP5 z>2imbo%trM3E%}400YyR2uxp?+30`hS=rzw5hTy(S?RpIbQjQl?dcgK@7nia$QszR zdOIR5KM@J3H$*b`sba3Fi$|M7LwWPux-a`8Pg`?=1C_~47`21}3Y&-$#I3$+mU4V}Vp!3fl{K{u`m^kaOGs`_d33&c zs_X`B{1Is+UD()$L4fjpdvzU2!?x*es9}E_a#;Z(F0QyU(K91l1|H*vtw0eCZk!|2gjOgz`TyjWOjmHuAoM<~CC#1jA5~>mIP? z(W5SIeqU3*c_n?2h}Xly|BpXrPFe5DDX9X}myl?i&_p@4@XPtLHCbuGHt9xsfVaR! zf8@~!GSOU<_xYHo&f`vBb^*+WriGAkr`B>?bpmxCZgu$)XHQR0FE0Vrb#n*O#9@wY zdmeJntqKr+30gl_RNQgYShD9KV9}Mu`3Y}20&i-9gAsIfb)}dW^W*!9<3b+WH<4Xj z_?iG+DVwlWpZ=deVs>}%hYfOUJM+!h)=X{hY;QAxIa5*sM5^5(Bd0Jai6FTrD2EQ= zms2=DuP>AZYCrg3QBhIh;p{}1RI%>{)|~3Ju|wF~+n-Y|wM-O^t~;x1Xb{3fK;Qz8 zv-3O;bF9>|BX8CKHxVXeP73Kj*TBcP6Yytumsa}z&G|e#zgdW8hb2j}4wU~q&&9-c z>UW!f($;DB7m^^K7O@e~Ly}0yaajtWrKcx9f)ZFOA0MBfpw5k`9+z$rF|>{7J;SJ+ zv}Q|XBzS3HumHj?D8Gh=i?HtC>gbPSoEABvsmUm*DkuyzUtjJ9qTM@iYk2$it^46> zmoZy~W&~xYqSbR8acVepRGIQG))ybOReix)Hty1-pv3MFeaQ{F6ysmpgH>Lk=g5q9w$|KTmEc*{xVo-|q^%C&c5h_%kOb zXI#%$kdyN$)$F~rtnAx?l9;U9xn2)zs;WRAY@VohwR3U#mm4iaLQ3jc@TA+1@||RE zDrhP|HPh15eUD1?9`ZUGWBvxp=BKpz60{^++nv{TjVo(w$Kcsu1hotmyYJ}}5s;8r zvJ!!Kqw9S%7EZ`grd2JIryRMXPl`-zB>5r_#AMJrr$EuPw6v6$??3tW`Wfa+1qBcT zz!WGcDmp#Px+Wta;EMVuLiyO(S+Zfy=X^Vj(+XIwkV!W_MJnzyB24gq?@?~*x(QB{()QM)xvEs|~W4npZKQ@n|YIt!v?TKQ`?8!jVy_enx zMpsj{xJBAcsngK`7W(!klS3|DHXl9?CkF=t0hQz6urD>7`xHMcBO}B6{2!=D*!B0( zjmd~(WzNse4h{~$EjaPIDvcJ$c}w|jq(C7#DQWghDvsL9%IbJ)Qbk8+`^bZzkI!Xm zLP=Sf?LI%KFiZqJAj;5%kw^>Tbk?#`C6>Lal%!7tWdLhcKUJ0n2i>*=k0AU3G4az_ zDY`Jj%1I?9rNF0etiXzdWoWuMcp1|AEY5>6#=+o(zItl=Mq!4spd4pnjZfl`8~^od zb!{yw-Nv4WErVuqD57 z!*_4=Jl0|)AdiOM{?(+1KyoZll=}YD#BSJ+N3ZVspCGqtSjfp?MR(M1dm2?z+FffRg%;LY3K4BpCyhKq~S z!$3Q&BGne^%b5Du1qCQ`7v0P~pBVHYG3*cs0=o;mpRwKD*|D{-c*@KSMADk;9PEsS zpz_+{VvJT+Q<O|II_f_>>zD4Dvz@9oK4kbFs7(CAwCUNl%%pAqDah*II8Ly*f) zr2uR{aN11KHjg}#d&XvF`Wm1uXqCF`-MueIqhfEr*SQ4;&wt@K4`gfkf_TbzqGcFW znv9@bP@>mN&CJd1I`ct&agfMaT5fL=m(55WbRx)1FFY@J!D^! zdpJ$H6TmoXXlQ^_{%yvVJeu46VEG5>^xRxCm7bqLzUou zn{PqZVPt3s_WY!^Z4E#4PdfZK>N@=&$eHrvLqpk$bSZ;2$vufFDV=MF;9<8e=sS{3 zZvXT1^IPdjPEJUu`ua78?(8R?0_LpiiZkD9EiHX}%w=2S6MCSu>+9=nZEb-{PXj1L z|MT=t!T(rtD?T&R`{)N4S@85)1&KMlxfCF0VPLQ#`UMX(;h#T$8YI91CF7O~o=pz; zxSSrC%;`~Vy3~h72tZ^b8LKux@ZOO0QsHw0;kYOCu{8Hm`oMh zaUp48GsV@YdksQosCdpk7brE@kL5~Xie`E4@Wyvl!Hj?fK)ZeWOrEiJ#&)jJv!-X| zcat}NbO))O6J?A68()~mN}>jM{og>X1utjXuJ1GIHL)Xu^3!3Sf3f6PcRc6o3oaOF-N%un+>+wr zAM^8zRf{WzLiqF@*(8URuv=ZXuoxr|; z$it1V7#4oxB#*2+(}hJP`AEf8OYB}Sa}_4O?+>7zUFLdCs=e5GbJ;-~?wKw5+0h-^x1-}1 zZRX?SV~vtA`*ne3a)VD43vL3lBENsY$&I6yj-$TG{j*EE>M(171*u##YR=L7oN>U!X$|A zwl&1(A_RHkq&Wp!0OcnY>7?hZ|8+9q}-_IFY{|kCTK|LWe54;dceQL^zD}8Ees{Y$I zKsvBuzP`d?IiUBNa~zyrY_gNbQ?EPoK`+u|q>*N$qXUczZwn|)y~Knq71q|Wnsshb z=f}iltUXzIX{jK9tFCz3K2!FnC5U_HDFgcIG-iWTwsx5aW$jr2h>50^NC;@aUB>i` zjJ-WQMwJlU=3!%VglZ=yCKm1*_2)`2EiC+Vu+jqs2~iQ-8q!wQ=#Hh`U6xp>fWSb2 z0L1>_TW2e}Jx>HkNF$9E?*_>`$OiqgtOk`ERdmAI#m&u4TU%R3M#j*PN>*>HXWAig zn8T)K3S9jLXekE=j=sLvTU*xET7BkmJu9=Z+7QK8unS)P-<1&I0JqGqd?w6?Z7IXd2!JSW38+iRRV3boiP`K-wZ zA_>GAuIB^q;bXKxdw>VPuI&W-^Y&!^l3}p#mO7%#UsdK;R4&sNvrD5&+9~asgr(Zr; zd@Cwihqx5*^PXfDxRqiQFRH;v0G8VeNlD0QT>Q@`BOxi5$04`2vT_Mo7->1VOb&}- zw<9BKE`D})m2AbW-_gyduj0k=JOMZcvxZcHf9nD@EiJ@}$wXsrLKk3Z-dsLY zIwr_yY13l1RzbmR2FDH3*CmCbs% zGK-8VYRoo?ry#HZk~k2potgUNS1d@xq>`d}sG?pc3n8eYz3&C5T*)xe0{{mZo0<7S zlm%SiC6nOs&6zqUGXUey?+GR(C2{GRMi4-B$R1ej&334%an637ejX73wnsPiLp~0O z=kW{Pvt>&?I6PE%FTmTm^b9kd$AKy=2l8x9xxq65u#wS1XLQh~o;)#;x+NXQ$iOhw z=;`|Wc`NI0kb@we!OdM$UT#O!W_LSlAdAP#^9{OksY3#+e5i8A|)RqJ^(ZY+G=TGLHyB%CyM3O7vmoRx9}w*cmAkS;xOtq)D{++_oZ># zPxEsD=Yw$!lU!C#u5pC@Kvq*$Uw>XvhY;uzzQoGPN@ST}@8U8vsn?~y(jPiqU7P}8 zgDHz`HL`oN2uloxn(wY6nO z0O|C1cIp!#?zM74t{5H(iJ+-%a*4Idk#+4x4~QA5IXXIec$`7;q?%M{R6?19XB-m~ z^Jpw$=>%ZrodBW<_tcxbgf2}JKH?^~GCke9V87*Z4kr!-6CiObj_w*78W2qcgyH7K z6Kuc}#g0@6HdStJZe3k!4qY5IwC}0kzo8UBb{Z{G$y2s~Acmga9K6SEU+&Q8Xt3Jh z!N9_LI6NsD{Nvr3a$5fR`8h8ccGdue*VFRx9!7?HckVnwZ!zb9yy?a$M3UoUVg`X? z0H{sxOUla9)72#>Bn(+PNn&~(Y(pJD$(LDHPyi$avp$YAV98BQZ49v7_1^;Fo3I98 zv>6Cjfr2yQ`V5H~(sR$hRSt4A`d<40#G{~qahYO4o!q1L`gMAGI!yZ1yvGnIU}O6N z5y*9CE-tPeNQA+uG^`+M{S4VjI26MieHnHT@DmejS!fMGLcn*t^`D0%vfZ*;T~(E} zh6%pOf_<>_U89z6273D2ErX9-qi~Pl{K1yK%o~1G=W-{2HB}Zajd(N)J$r_WfQauK z2&d{@pN(rXy8=}1>+92I)}V=h_FM=U@9^;AzFY9N$m-!~X=y1$$8vdw#^>gumv|1H zyD^cpUf-bC6_6v0yW)U)IPqk_9T5oCK2m!Po2IJzcTM;@wRJ)K#S8WkEo6U6X6Buf z{UrokmIdCY$(Zy*KjoRx?dT3*>VP(&VgYBQrl!JthGUB=`uFHar^)LSnmMxnO~$XZ zyu5&po}C>w9EN{?I`{5~f-vYS??OW@EG^SO1&Z0BfeVu*V~{Kaf1lzN_W_=%IQnCYy?W~q{P)gQ+?D5bKiw@TK4QeeJ72QXwMf_4+k(TzZi{`&Q0a?780 z4QT~ae8G_?BO)TAHRPVFvooXSXTLtvC7vkC&hN1xnD(Ls{D^mOS? zcnYPmzrSx=^SXQ-4HZ?p&hZH)C8G6n_7xjZ@GVqS4sP!H8Qa2P{=Rpr-He&Q4xTeT zc<|up;&5Phs>W`qJrWq%Plsv1hMDrKb??^qLCR26QE`HaZbSq{!f~G;?zNV>Onx!p z%d~xagsIO#0x%Pv@>4QVoUj~x3<)i*v@T;3OKjBDf)5sFrngeppX4dmS&f&mG+#Kv z=++3?8s>lyF*IYCDxcJ?TwQDWOdm@-?#?zs^e~x4j{qYGUd}{MuS|yz?WZH^VXg%* zj5mb^psjW+LBP`pPtGBlohs3gC||tIAQXW`;)TW>zA zH)qt6goFfaQ}gAv-JJVcCgjc9F$3UQ;D317ABa+=33>C0;+R4b*#chzgdJl3F-uNS zoL*HkRaI4xZ=oYBwuOgyoI$Sw97#=WTo`z(la(k|<`ttl5Lb}Ir)`g89D|_MPj0&e ziGe5qVNMW*Arl!*#;dHZp08XK7axCldC5s42YD_jYAWeC%6Cw|&_h88;!3ySAi2D_ zxXq$bImwvZvnw8f9hL)%1N0Aiddpv*Ucrd~BG9V=B867z*NTc~j~*H9J%FGXH4P1@ zl{K(NRI%u-tZ=i6D;*T^Qgu-GN+ffOK7E2r8;nAIR#t4|IO@%f4OC?RN$dCQX7Uhg z5*0;?Pl%wFhHTrfU+O_(qF(MMtOx`)=gr>>a6|J5!t-isG$3uW;z;wyi>T5Z)}XoB z7oy)y#N;P_ zd_}4xL^sc$wY845LqbMPKZA)F$yfLi5GKwmD2P5ArmNGw0+bDt6}}w}56?-Oo?n}x zTqfU}f%k%_qL{k3A!7t}kA;OrEq(j;?WVn!yP)P!;(!SENxP1Xk`ls4aD^eUb>RaY zpVhBV-@l85wEE{1YBQS+M@duPubFCe+14WKfF(yD)JxP##=dDY`-{vw^8tyHq85WI zvvd`jWcTm0MVvC-B$158#E{AMqmUk}S%{0jRWE^7rC6jIxHX-k#yRdqhl6ftWK@1- z{5=vCp|AY|;rA$#oY$C7$m=7cIB-1V~;%0AR73 z)(AtzQD(|z@(gvgw;8bS-D^H`TYba@cg(4%S{CdFIU)^GW^7;JynUquzVpp}^82l< znewza=qFxHBkLTgvY;PsT-XJ^%_#&hJu#s490m zFK(fsnVFbG+E&Zv%)h3{RW4F5)vAVM@ZjK}!8-%i80xOxL7=^CnU+>oo9qJuBakk! z$E@TFINcPNl>9Y5&dtF=Cg`D>t=J14pNRZ76mfojK4|@ywh_B^q{KlxCO|Vcf0e?> z2bG<|-6j|Qb(1TDyn3$EKqQwGOkQVf!>;o-4^?706$v{b$M@9;~=O>X$3pppBWEGU&{wHZRY))pjB zlA;C<40&G5M7AXxR8cS<6&2x5_d{Lhw?pO^tLb)N-T-HF7=NPH=t&$zK_VKXR-z8x zryZCqrw1!Sah9E%N94SYKR1ucbQ?X$LKf-5pa9p;N9N;C#io-HZ(p2bIBB<|u$GL?5Y4G0oaP-La2_lRpnvQSGC;Nx>}aW(C? zlQs=D0+I3sIf^RQ99B3o63oH=7y}xM@p8TElcnhE{b*k@OZ!{KZzIxVpxfN2Z7|BA z1^+!hj#jU31$pqHkQa}z@U?66xsIh}arBlS)W!jXEnmENVK@2Z2@A_EGe$N9+UL9u z`vCD~xv!>b=y}86L{kXA2aVQt=G(-? zgr`9kKZP-e8c6lZMHg^?z<`yA%WhU%MdfA_`7epM`~bqjr5>==K*xgXP2@LgvyFYn zUxXEo`STF$Ql0vLFaNkF2q{22YU2wEE*mxc_p)*4$tPy{Xry8@v7- zzk$OuX_m!FgsJCqx)O{f*AU<0G4m^69MU%u?wU+Mq=&Bo3?=e86Hvo%lo4k6+U z*>xkpRI(WWMF!-E^X1avmIf)!Ctf~2t!zbf)PUH|2OSPztpL)4SvNF<3-<-!Hq|30 z%VS(O0bR|T2W&n;7dY5&pzd_)lRB1e-M-!4-VSXStVeu|7gUMcgGs0Bx`U|@pL6B+A@`svif^N^m6A_7qEKXm zy2qb}&(831xqCcwW_?D#mDPCUT;RP3gXS#k>Qk2{ax_t=*WEB!XZVe?RJ?qZ1&Az_0!VDuLBNW0u zWJE;s?z%~k17izmHK?2fpqUmp52^|eneGjo)l7N+-ZXhMgWt6b0>Mp9K6+ciH6Q7^ zdwVg{^?$4#!hJyCNi|b2+vAdx%_=k}veug=jEjKe0qT~H`}h$qYy8k>#mA72Q`Odn z+cYJ`#h=tS`!{8%@52b&{PSnx!kV?iVN0ypcYobBsY@}hcVmNo;4m-{3|Uu@({Rx3 z$=2+7;Dp$4y|%S2lgtH01xP|1H4`Ia=a)}(-HVG;nW&T$= z=l=|K9>?*`bwr#_hA7U>E$uc=H?FKNdr=*ZoKd+NBfA*i(#Me#S>e)^Xb_Drts>tx zHs7+B*{tQT2&+APT{_fhh_l$Xm9^rY?zfw{|KRr5o%zgrUZ3~t{dqkf&)4gnCsNRp zYRf5H?u>F7Ib@=%*=*$G&reKfPv>%%&$Z#R5tXvcp5TL)pgyjzw`tnhlJVe;4aM1c zTo%vM?)_74UmSM|*AftfHKJKDq*AG3?8T>ds7G^KJttB3LkYmMkGyA^g%?*e`kD5d zILCOo44yY49^&UsHpnAG(xAo!$9Hyic5)i9GQtHEbjclVe=u4Ofx-BEX~drTy7hCf zuZiFdds%H2jD24mtk10Aa^c!z3qTNovM2E^-^n`8mwVq{?{NHwe%^eDuar2tE2Zpd z8VILWDa4f#YS>|ZEKt6Nh`dVe&wo&b6@&bozyEfFG#v^ z*T=U$$=C2n_uNo@u!{7g>P$AxP)APB=?GRo?CjL=KWTMM`lQ2T)t&dkD8K3Y7tHpQ zn=Q@Ju_bTx-mj?L;*(~wW*gQNZgkb$zWZoP=ng*bVxjPF6ZQ?hS{p7ua-rk*XU__5 zL~LFBwCthg)PQrY(Gq@y_mR0q22ld2~# zRWPLvzN}j&Sv`2cTX%Zn`Ubn#-vE)obls-Q_TqruLH>Y0D;F1GdKMHG^6qBRCHi`L zmZfBrk7Z?LwY7LQ0C~yO?h|~AO`RgqFDO788p69s?`K`-QkTe+E~f~hHD1P+Oz~dG zh@fiN;EE{;{A~Q-;Nu}RHB=!Zb!ABo>pRAt?%*&yGSWXMMo)hBL@T3#>|thR2Bj5Z zJIX(OA#1fA)UdfQT~jmP`~>!nQmJINotvJY59jTvOhH}OR7JVX;Yg{~E*3B4XEn97 zdV70!Gg^yUr@&bj7GfxdZ`B`I!nVrF*HB%c;^^V$eED9e@eaB92}0@E zO!8fah7&c+5AUY0Sq~u3Q5p*b{uM*t<%#wM=aQ*sGC4T6QAQs&RI-v$Sx%Epg=xAfx1G9Ug$X9zyMiZTT$@RM3_l<)CR1mduogrZq28Q^#?Qv>*mXK5O#n#c6YlH;hM!-e zdJ*4C!;P;>e|);(yyxd%rdd>$-k@Y|>uJ0alWqL7qpyyO*}qcJFMCOBC8FhQ@?{T- z!+t`C2P3*W4ELf)Bjsx2U4vnSwl#a;(OqE#$d8fEh=JSQ5MG6Q&X^!ULfB#8%+es^ zN#ANSZQdg`EniQ<3$p5nE4dMH`7vk~jLe(QFMw14n_o z+-x(c@&FElvLmD!A1digwc&p(@c;35%Ni;7y)Kz0aueg8Ig*E~_qX*20@MBmxz>|f literal 0 HcmV?d00001 diff --git a/doc/doxygen/chapters/images/eclipse_hello_paje_trace.png b/doc/doxygen/chapters/images/eclipse_hello_paje_trace.png new file mode 100644 index 0000000000000000000000000000000000000000..80f9d6a527280b5e011097c49cd3a835e75c8251 GIT binary patch literal 236343 zcmZ^LbwE_z-ZciTgftQ&-AFe|ij;JhbaykPfPjF4bP5VO#Lx{xBMdMgF?4qgT|<1w zd!PHh&%O8k{6i3D&N;L9Z?E54Yp+f8TUCX}IFvYOXlRcWU(0Hsp<#xgp*^I)!T^54 zKutwT;7d_DAP&F7sgsF5|AI2b zqI*vu{rA6yykhddpr8k|AsWk|%V4m~>dt^e%WQMv4zUYlS+?TeulAl`dDiFp0$WSlZkLBrf2xa%Cb6Ck49@X0y?}zj z-%ZGFifvo-S;&>hN>cj?iophw{Bp0<`L(n;ldi|-6c?9ix(uSfxopw2B|aqmw+H=m zX&as&P~uS~=? zV-a$!61!(Krmp*S_bogk+b4D-d6p@pEF2tGAFc}|?W82NzZ21Me$>YEmQ_yZC zj44D3kI+~Y+vv=6q$)(oxyW~>$GuNaL9%~jK|QJVQ(^TwLS@QPY>|Oc0MkKLQZ*G< zN{Wx}YpCZYF|0fFBp=kfP97J@tGLiHsQ7=}Fg2egO}Ph5CLX1og%+78BN{-t-f&%A zIT}lI9AItY?cG?mBf3fHKcPO!n|iMJ^JwHPt1xIRDPN=M^TCD2W#;u7ib z046#D605nF9&E06C$1wy9P9193`0KY>+9p3=UEI2IY@WfeyQp#sRUhk?XE$ZRP>R) zR{Kk@rOVVe2t#PZf0UoF6nWf+3;tqa-$KW^6;a#UYw?AnV+lqOOqE+eN=!_JnVY1O zHsKxO@qpT9XX?UUm$MHk*V53a&X;n|tTHf;aZ{aDPpr?ACzM)D?S?_d|HmiVH-rWe zef~@sM=f8#nu*4hr6c*(@XTwOL*>wpskf3#ii%r$(yb9r2c5RuOM+NmM%$s;p`_lu^YzMF#jZd z-Tk|a8)?07S>YCkX>A-G+4(mGqb4?Xw!dz9+P>w<>vu$~%tYkUDgvqK{Ha@0>M(Wc z(AW|dc0cI)kSr1(nx;StY93knsiu^2{b6pOMPLvPPyI{}`Qh18Rq8BvN-ntjTv|qE z4*3{^L`q63zo4LgF--O1;=&hNtV0_szchpJ}{JKh2ls6+GD_FyOv^`9zTlP9*| zzuTRBAxu?^gJ3rlie-!o$HZUW*+H;)v4a&5WyT*qemv2GEH0WAs%Cf8AmX{|OcdwG z{(@PzAWdzgM-))YR1*GKI}j*^TxfSJw9AZ#fSTGkfp8)V3{F0C)zKc`xiaL?_uQL~ zi{++KJ>QqOxIR{!+o2J0{{p=0^T7cYzX~%uds1FroBIrpyF2y8d&1vk+(|{U6D2xU z^+tFUXZrtym8FsP@F-!oWrl1~F9s&2=^IJOR-w?)dkhSYz3(c9i`2yL-MiiQ@%c&6of5rhF!&LL`&Y4?xu$d1&0%wQ4-af? z?5{pZleHWxPoOV&?@g;XJ99bB)Y5U46$QL5q*sWq^w{8fgz=yy;QYC$D7Xhzwd%v2 zs3-NPGbi&(TgSTZH1;gH=$wQ){5dT3uKi%C^Wd2Mc6lo8@Qe1;;g6r` zd^W$o%3z`mSo29r-kaR3#euOKl7+Ybs5^yThiDP8#5U+%YyKEYzbfare%2yEBDB!X1OZh{G6O!eVgii> zHh#il&r=uck$g3@`pfm`_Yo0EpFaj^_s(6YT>W0)kB#465C*|X*OgqZlXygZYnq-68uy7<-U&V#n= z%e%F`x&^t>7^tq(d@~*`EfEANnd`G~gH|>5vC!@ZxWfZJxL65`M7So9?vtsElJi+} z1f1>O%hN214?OH%Hn&HZ$kode=%j5C9T@?;JI3&Oi@BW60M``uFG4wJ8AUB)`Ns&>58 zP)O8gzis$y_%J`2DX1n5U+Ox|ypzkFcA@`YP)1-SLWKu;lE=!*N+s&~KGP*^GqQi5Xf${kOY@3XyCEXtgMz6f88uE zwwyW^9v%}TW4fA@@#~q|(!X_MK9c`fxuyJE82Q#ED7Q+*w zKVaD$E<^eT2iK+@AXfXci2CaznB8v%ESA7p`B5Kp^Fb&T41AiHvK~IjyY~gc9*T;TI7}PfpQ4YciZVkB$~&lfY0{Ujmu{ zdeLZ~9GS7QbJ_Qi6;?Y$*&GQ?!gXM*SoH4REtlQ)*w1}Lsa}*TgO0DW&JKLLqzsIg4Bq33G^QKY^+vwy9fDyBi`~+;=;v0G=hvHQg zsP-VYGMcXO-E)pF z8>BaQX55EHOl&)OY&?FoY|YDhfljWj#giE)E@F{?&i)rO8+xje*+cMjCTy;p*Dj&aIc$RPllGAM3vW)lva8BCYXXiHu6=yjE=b`rX)FM9iv-D=P}p z($CQVTw}etI;H34j-795dRMoR`01@)?w2pb@(DC?g$?C1d#mf~?%P^8oCdP$m6&TT z(3_!AHMlGE?iSTuZBJrooj}S3VZk|{58&oX%?29Q=adwAos4y1%gH3YK|6;~0uqD?; zSFLO`uLh$@M|3KwppS4V)2>c@DU3D++7@`6kZRkKYrb8E?hM#Pt`D&_3Om0aw4R>+ zJ+TTXq3IW7#H$5hjE_CbBgG%(iNN-GdCzF0=zny0#rd0gd}-G5Kebmlp4huL?1k=$ zr0seXl3&0Qk?t6-etOWY^OyL*Io&}-0aV1QJ*+M|&Z#}^g@s5>$iE?4bsX?`EIC%Se3hQ)apJj(7@8`_S==QwE4d8yd>egFg zDX^f_;wy6U%asIe@#_N_;cP)qV}}#pcq);wL2G(J!P?&|>M25~rf(w!3ZvhZn0-2? zoZ*n_=6sN*`drMs?%3IMor@_j%WR05T5Xfdhps>kMc_(3Fl!6Bx7ldIX03Lbx^Vw5 zYMtYYdbC*UG*f9i())5rWlqlB-TmUA^Bipx=q-p&*3@eI@$EMork*38?42M#q9BEK z-zPZP6h`?A)q;m^8bx0e6&5Cb`O@K3H}@)uN#(VQ%BxZS`~r*mwG>60vj}Nl-?ZM- zxU#|rFZM6fZ0{UZ3ok6{$b#rNetD2Eg|%3ob>Dn%RcT^@Ac+2nLRrBWk2ZUkyB8|G z_YBIKoW&*lJ$rk5qXsjD0D|-ypeVoKbv&+S!GR!fVmS?Ke-|j=H+pV$0q{N9;K7_x zXvRZ~e~G$SN#Jueqb23KrV5GWYW#3=vTPoRi-U7tW7n+=6SF^6=Uji1z05rsNPR?wglQkP%#}VRhUhkMDxB?2K_1c{< z978)$JTNfujZ;%2Dqw%M;UOkwBlJWHwab@G6MS$hR$Ek?`svfJ>6&7R+L`unOzmPP zZEN6ln~kyW-|wxit`d-tbdUYiRFb%>%fBktjuYVgOYd+`?cqTpU*ch77rHETjI;t2 zdnc#NE+atnZRye%C1;EE0W$HSfbE}mS^=4iL~V=+PLZ3RMYk(C`+H}{M@J&wyAJ_s z_sZ6mg_X50@Sr__1!vZKh8o}*9>;@3es?!MFEUkt2AVt*sU9006~6Gcd;?SRhAoXc zI_e$|#SQjJth74#>ZnlD49*03Mjeu87EF^X7+Sl>h|TfOhdd~8S@0#6dxh}ActrjI zEPcLje0dBneMBWppsfv1DSiji&fO{QgAQD7KBsBVKd9HK#68F)Ec{_?tg=tKW#A5w zGTk#S&>^5#cb;tVy-5EktDtZ&Bt~1G+hET_UVqs0baSKtlY$~5q31>B;6rru!_}nG z5wC1O+2j;MS~I?UAp%I9uH#f?grRdoa)pILYD}KpsO|a?C|bYDWT~`n{Q&_1K^uL7 zxN?=X>|#1{R_@{YlbZY_htLM!ct zAgkWRx`m(uQ>Y}r-IK8H#M#Y5OPivG)W=v@U;U0_5-^2fbNp{lZ@Fsi$H5v%Q@=H< z3_)!+4#@T4X)}b)KA>2K6H-36zHY74_rQ^rUEWGNK0kbsadbUeev zUPSG*ZmLB43;kd7Z>oJBsa`cTu+$k9(xLv@KA_XiY6oz<_V&m)Az(J3Tb>JpfJQSO zKME+b0tL1F{Cq_fl_h{(xk}}10$&zuXPlYY*fMe?R$i-0%9)wHY;a39iv1O;|n94W*;1Kv&YL9KC{ zo3?-yBvs;VEVJOfG^^U$uN<`I!;5-cEKm@Ms=SuO!olgZJ!!WeY+EYVHn5&Lm2iUO zTLAdurJN-k8coF9xZ*P48{_{I15*LeC4EDxoont|{^It}85l6+SnpIs_>QZ^B-PeP zN_T!b_x1IH9Ro1J`z)F`{m-1eW^~jg+}(e>ic){y{=OrcvEH#&t@-xTN^9&~=G9*W zN>&lsa!zR6`n7m*m~^s9s36^;BkHvs0)+jG+@kP8_b!A^q^*jYUp55pJGn<>KC=q4w;rFXu~+>k4HEI4}mi*8G; zZsVMek#lDa=U>F;$1hz|kW4J;Ah62gsNZD&+)nR!d$WN<1}F|jb{J4UMECFCZ_d$` z?fWJC{lSmk6bohF@Re4Xw7Rjbp2wJOjW|#s7k5&xQ2a=Go+)&4L!jjfO>P>E8XN zu0Hs0tgHkVD!2w&L_Htk`+fG$D-&#Fj20zLHhMiRHLSDWeLIBPjlGzUt&jPF>XIU0 zn(ztkvVP@IN6+IZmU591BT>NC0GqviT#cN}Vux(OL)wi$d~p1&=k*`vC4qF-cYYO% zxyHw7%1mM5;ZMb~JWLuMmIuh2;Mo6Wauw-2bb_^8KKPj~xa-XfXoF#}$|IG`2<;v7 z!M}9I|G2LojuX1ynWN6K0lZy8A`z2IS)**qXiZy`!pE-!!rF^MJD&XKd$N=&D=R0f zY`8~8M&hXZMM}3HmjBlyj@^suZ10K7d@AVL1cf%V^pEM1p~^pm##ea&Crxy66_KOR4e{*jva z;YBr=^v!HVI0$vOyjfwfAM72Koj?YPnQ|<&Q+|YYJnvgIhPqm3Ei34aC#0s1=(FXh zyN*s`PCxB8;-c_&3-1XyI55{KGm(*A#?#Q$#KyttSXh9Xz#KUk6BO+YD6OlaL+r=K zw>&Ekj=3*(VUWo?gy~6K0;JdWsp@h(_3qscS_P*;9!pHX)m2>P!SQiv)!Fdyh8ajC zb0|G^ec(HwGi2rDCtznvIed0oTL4lnA~yiZc}qRre#`I_x;J2c{53Ftf3B&DzC~ZdWIE8G=sRIfA}zXgD~I z!r5Hs)$jMj>hmmm8xtxi-R^mA1>Os4ZCy)&R~YC+%?nVVsS9Mi4KI;%bEpy*je*UJ zpo2bXNs|*+vd1)laf{}FfNW&rOInU#BFFV9Y?9EC%?fYc-~c1}DKir`e}hW*9gVwQ z$oBEAxjk13UQS&|IfMt?CX|;C8}w?@_Es~IHXqZ>JbB`GRG~m2;Ql_~s@f)Z<4}Rt zdh-+R&bO>er^Z@Ji@R*6je!&=>=Wcn-F&U-QC44t!4xoovtFpZS<$6JN|!3GnOC6d z&`a3p1v18yYmZX)6Qn;zLdOIpkW5SKjN4WNp0shNb4Iy zOoA#ySaoHOf;&6zUBFX|(nO!fxPdb25hI@2eY81?NdX2UP~$zKnwl1-i6V)BbKbjP zWaa7(_uQ!Dk7EbN(@l@<&M#5C$ye7HzB*lzhV>X~^NbA>r?h9Fn+W z0XbTW-#y1yZjB;LHC-R~ro3{Y`yYG&W{d?^{E(;Z1B{#dyx>P2FJBcJ*5k)N-}P-5 zvvH-u1B#9Y2gS9Y5{ERxd>y#uMb$tOi4}G9I?Q+u8%Neey;5trx|(oG45MbzFT9vJwD;@F_MVc z{IP6eO_ChExOnfXr=xBySPbem#w1z}PbJ&Ze*wLO1 zd4O9j`LcVjxRRV5#lfX91845d1fE`*5sQ8Wh@mJddis zop$>(7&IHqj70}012%wS5C*OWNSMQ2KDAM>m7`P4>2wYVb#YFlRshCn#G{9u%(GJ5 z?KVK%FMg?8!vm5L^9C=Wl854_xxolT<3d zt#KeLl(N&4xp6uNxgjG74WDZ!z0V zl(52~C%jGGNIC|FhYrqME<>PnZ(u{`;>FfP4K5lh>rYt)IcRKtB zrtQW)%F7wWWtQ_#U5-|`p-(8ENZT8Tc1D_X5hY0O(&X@qFm_vjptt*&3BA=|(7s*n zS<`O~AGtv78)2K;Fok!W4AO2@AUZ+JD3*%Di3m*w14~OFKw=?|b9MqqhB) z0KdJ`%gJ4DM|44J39;}C6StZ&2s0?)Tz=G3HCy$$ILClRGq5v~T)R{W)E4MZZ3&iK zQgS^GyLrQm#qm6{=wDfY*m*S+Q}lyA!9q|{R|#X^qkV$5SRwwE24vEub!pgsb#_z9uHm6jj;WM5B}BT=SHFfzOfRgLqf4@p1+M`8nB2nT?-2*J zJunM0Y;7KzF;(^i=)}9pj889NlcbiG)`{uKeP2-fYgOFPcfY%^(H!SeUI!wVkNRsN z;CCh4RrQq)M@aVD#R9q9qouofjKySx$wm`x%F(fp=?W4F=Z12wz#!1I>`Rw1|6t$Z zMnqdC^IUsK(2ZV&I|&&4$)bk$rjPl}-0Jhch|)jm)~j#Fw5^Vjp>i>MnNF`afg%pJ zCXEpH92K>=I_4f;3sdFrb)o@-kJ@{4d2TMrd|lcNYhoMUzweTr_q&rh4=CvS^(~rc z*elz{W^lcqP}dbMwf{^?Di?LD-H6cWyP)-R#hLNhU!BNwS*}!yL?}8EE-jtJAqP7X zB!<3sMvW8%2wk1;$-GYLSdte|+4xDp7mXZ`#Y95!0WvcdEHyNXSg^d(R3q;Fus>fI zA73o%BAyEC&hUDAu^5bth5NXB?RT2^+N*c(@-MfVTzJwyT3T6s1xNHHM+!5^?Rtxu z2HiXky?3wLZnUxgDEA5ueL?;R*q%IUTMd|<0U8Yh;AG$fsRCdLBo!&I^_m7*jr}6r zY#bWp%Q@CFRqGv_0hzex+GnU5za>cyKsF}Xm}U(M=*FE)!b&R}O@jIun2p; zs&c3wqCf#(Jvp3PEllU)ieV5?x-#7p7mVaoIJ$(IFMW(* zr@4PM-AX4e$c7UR{-l}r!mVAsbuO{H285e5uc~6x?aaQQY^MTXsanw=_8 zo9=&{=KbId6Srlu#nqpq49*KfX~haagOS&8$N7U!`w{_tm3sEnzJyJkuj0qIZ&Mux z(H8-NNhRq%ITzjshbKIRK+A^uPbad-Y~0|+K8atl24Ke~EB!AJjlo8eLCeCJcr?qj zNzUkPmb`Agr9}Fz?%Fkp@rZZ|6+PRfV|O*?Y$mr}yj!mY%X5Mx;FAJiFEIJy(st>2 zq>(?m3^#3`?m}R1F*j7!?B&Zin9D*LCkmfxalcttZ0J%ErjZNS^Xwsi5mivOHJY`GpjS*;q2Yxbx1z zXL{i68Q4PeXdwv+_y|+6p`9afyAOg|Qz9!Wm-AzNGafxH*OMdr3@7DZWrPD)c^XZG zgfP%JZnkXpo6jYk>#d$ZS_E=`;$M%>W!5=1GpptNnD8q2Ct7kfd)RQ`$9-J2EDYaT zV0hu$69llTn%`brw%eWinu4t}_U3(pxP+ndPvVnR2UN(3-*lztw8$0hElX9A-(3-w zrGY-_OMN>S+qCVtzwM2M1Z#P!oalgRp#WMts=R)Plj}2AY=eLQ{o z31xLJnb5$#$}oA*Yr@wLQca){z)?X=rKIWExcxl|Bdcg~jX3wNjg6uS$U3y>{gPp#ZG5kcQY5Sa($YTXGv5xb*}-~*m8Yu)Z&~2wt-`{2 z#BtiZ^x@#ZK3P{sruu9DP}Cm@m@g%%K)v8a5vh!BpXiq$j06(V z?$8YQT~uGb+?rovTsf_MO!jj-P5Yb*jLQ_>5%)m4!R;kW241equhlIGunXn!Oyb>5 zKPP18ppBVXB+b#{brD+F%_CtWQGLJfF0wJs8n^ULFPqTHcF3gjAa9CY`9fQ}&m}0d z(W8nzpO3p09+!^1^$`mCTS0s){O-2As#wd&(~h35l8-fYr(`a!g+d460z}TuWR@$&tjz zH-)VE&*}TPS)Re(v7XC#%1{#}^(g}r{3mK}kVqvL+ez8&zF)x;+(6GuL#u#Eh$?0s)tyk>OFbl}tg( zt!8vmZcq4h{%% z`8lR6x@Bd#xv>XPwE2T2L&%{Y$*z-_ks;rUw|DQu&PcdR>)pM~LHR(4iNJgiu;%Bh zHzZpE_%>>UTpNu}Vv#CMwNAf&4J;xSQb$`H5Nt`KUOx*FZHav*#t20fHFhtV_`h2j zW8eKzR#LTc9*tgG13NC9z!u*9i0DW_G;dc^MRl4u*UJN%syKzyZk#toO%g>F5_bP- zE~ugYw^Kw^g!{$jh-fS)J9{h~YM=~sj-1+)4O75|^xPc|O{w6tn<*4P2A}k$Hws@K zBt<-;=IxT$b)69`)o=0UL<71$y!9IVo7cjSzsMnVsQJa6Q`S(&jY+jB<#yVau@XDY zvQ~G7JJ9(JWisGAXRJTxW5*2v*nOqrCC>SE|2FPg!cQSoGo5lPiZ<|O4u6%5xV}hR zTNkZh8(Biq6!n>f$uig+_Pwx0(nb)pvf_6=qTqWH&3t*d zVOlY>XCxX(j)_kVb~UV<4XGZ~>o>nRCkX@I`N=|KqLhfqb?$xib%U2-4a{Y} zgknWL!MbDbE>BgfiOKls<3}gH%Ei+gkH1==&K^;S|Il);1UHgWc&%YM zw;VhBcb|UKA1zEyIlexz!8t#lJnWKHcwxW#niO;!fmrjK4nCcVZ2pHBp8@HC2iSme zkk?MOuUP_@bqk0@*^@x;D-l)snJeNfg<|ZgQip>NX1!GGXpJY6j`z)%#1Gf(=8e;b zYyl1X_o`K4?7d2lHa@*#KtSM^W^VU&B`-O?a#e4q|nx8yWu!1 zlt7_SqktIEtE`HaoqoP}1lweV*`8`g%J4h~-=<$P}EOStQmcoX_> z5Fo(FjR5Pm=>3T!d``(x1@amVHii>IfX3+4Ecy`hv1nyah>>1XV2Zl_W4pDbLf!zFmJDPzEw} zcJn2E5fWbP#SrG?z2lx$6=R3(QSR1v;~4@2!A>TnuwuC2>Y#|G{-r>vrq|3{i{c-T z)zUnfeHQ%0WTK4p?ESTSOXzk?7l%c%*V^DEPbe(A7Q20k?L<^d1mjYc*4;a05s{tW zJ$Of7*@lEN1$iACZP+K9b~;=yL<<3C;Eq7U^Ldj8Ei(nY|wA%y24f=7^(&74-X24#WNh7?5y%w*1D?!X-~(O0h?DvX4cm@hB#Y{#OBg@Ob|Z ztA7m0acwpNX-Htx<2`}Es(ZuiM&;i}52^AGC*Ho)mfV)EYayx7F7cY=f6HN+itZm{ zQNtvG8F3Mi_u@bV7rOJ7;y&~}w(1BEAT%O#5&w8N8h^=lhzjiygO7RrD8#-BR66a=c> zhve5&R*|LWv%K7@@48%TYY%5&aAt<)g`{oT`k(H;l9iRs2Kp`C=YbLqwEGY4{|UVU zUc&z6&V);zO6J7(TLQp_AYs=X0eIpco5#88^|7_e)>wr_gqv+f4{foIuCUazw%l6~ z%3r^DI$gqst1}YsyILR;gB9tRl#W!xb64E49Q?ib+deSnmI)Q(U12B%VHzGk7Xblro%70R>t*%Ti>K2Xhqvji3Bb^T) zq7%+E8YdT%-*e~Kn#6IGl@%&BMw9k^J9;)w04LHO?^k#wlsUMM00;xMn78Z2fq9my zO-m066_qk_zQu03`e%SaQ1nAreL(Mg`^G>OACvl%69_0`W7F~SJ_&DsyX4+jw&E)M zT1}0an;SB}%nAb}?ZJU7;MIy@??xLThn$ejclVH>3qn8-%L1$uAeiR2!FRog++dux z_liEQGhNt5L0gP9Y#VWxxUb)+7FxmdfMm>MlZ(_Jxgo|hhtbBzm%Emmgfu_-p}zm8 zj@uPz*5=TUC72W!(x4##;TUPwcblPdUs`-a)CNR9BXWb*gP=$V6zkx;NbYzYt*c8m zKp71&9>ikjAbsCh)by>*464bs!3gjsDT1kmU&AK#!otFYJui5mH?3JudlH!W`QHQW zSQP+$fOCEQ?UXI-6|=ed17~BW(PZgVWt6|ww6eO~xTj~?Qv~4R47zbt>cv;t#Y5}x z`_ZM=L+g|Ov*?$yJ(f@{0yENO0+_U>&^I0=tzcMp>1|;4JjGOwADZk)w8zA)#?7&CiQG@4- zi$y(+nI$B$A<)|lAX_ln;IZ*0b)nIV{i&eiGajDu`ISmN4!{=h7!*JdP*Lr=Q528& z_ryI_R8(vSJWpWOiu3Y9%FB6x$j9P~+UM>qVm>6q#0TqPb$+0Ib|YciPXYd_s`03r zPUmx9kLWpP;BMjN2RwmqW-rte0gIniB>fj(OSFQCo5)@BISpFM^#zfdN^ZM=ZUWml zQGKJlvB>Wh6&GL^a~>4Xin^Igx>H1OWv3s(QEbML{2R1S$;lCYj!bO}Q2ArHki$gj z%0+b2Ebo%Fw9?70s;a6t)VsmRHe=&6A-@WJuRSAA-qC03-@ivM*PDZD>=MPI)+VM>p!a)ADE)b{2xSW_TUPHs;McpYso3pWM~khCrv5)8^riv zn-!18jAv+Wg-MTTc}*jLn3f(iNFp5LF%Us-G+FuWl|5Rdb`0c?_g8R~K;XKaH9Q@LuN2I{&;Mm?OpA+JGRsa)Vu^;% zZ1%Cx3(JFNL04@qI7zu*j9>4G%&3<$2aEW<0D6JF^NX6m4+=iKt^%iu3aC0?%ZOaa}(=~ zGB#qn#I2X=3^J_<177*(OLDO;u-Tv_Zxlp98+eWFG*fMjY`p>C@W*F*2NKv>w%FEF z+63T~lYPw-a&o=mz3iR;-?9O{GU1XuoqW}qN6B*V?nImZ-+o;6CYDJ4Ua|?Xh&K>0UaG3{hTYFq9VQr&pM;< zAv}bJ2;f`*5R0~frEmfXA>k`voc$Ktqw1F{ zVIOv;!e2eOYRaucT|jB+=w5sVAgu@3W%GatU2PYqi>}@CS>7EAU^>3}O-te7wRA8D zAegqp#KBfC?GqZ(^@@$%m}GXqKdvt6t}?uLJKLBnw9f4cg_>Xc<`?9rovjjyi#Umr zd2auzTuAlUJRp=rtrk!wFWE@}BM!~=O>8|p6u%nQ0&MAXsS5(`!E_idpN*EtU0`xb zO62Gkhk&Dxe(IyAe>%|?q6nYNldaWyJEP~TOw^z4Qy@@=*q++*ZlaFumez3R1y-kw zwzihGRo7bTpDm@hx*3A3OFQyF>9=E7Pun|^sxGTuzAH|j?nz0j&mODhNBeV_V|f`1 z`{%XlbWgPnven*HXsFlx>R#v;CPU z;DiR7T?+64r!4b5Qim>dJ%nd~|H7jZfBo|1%Lbdl?Z)`Boz;zv(F%(wdzU6IM#mEI z=*68Cg`ylpK1;F`cir>(T1bIH!f(LE$565+1DR zx`-w*vey14qQbGhT1si}S6sAr4exWkAf%Ii(HX25K(6w&| z+=w=&Y~#;+|LoJ(th$!#E9qfm7tX54qu9u0qY|NvI4rV<|`!BU6)NYKlPiDQKl=CBqDjWQ3 zIF*7Lz0&Rs6v$(1p&j8Kv9gp`_zxPKU#n(|J|Urq?o_`px*AnfoE6y;1dw{_WFr^w zfthdJP~R}1bVa<4YtL#05N! zr)lQolLWo=+sjw{+)u)Tn3DDDyz;f*Z@^)V@ujWfg0(>DaY3)$^9~HIfs+?Jy8}OP zZP-;tUyCJGW7PX@%8_t2c_8*9v3^Qtz#_}0)U}AfOS=?>CLSbt4tT`lAMForjsEH! zA3RjNy#=2NI$JKZ-T+%Ygg`vDNHv>$7%F9M{`h>x4Rm|vdIhm^ybfY}(9x^M=?{X4pFQhvzil4 zNgov~{LrNoz;3>SKtIIBcAD}00UxgVyk)T^pf4yvgKk`fsdlFuk}fXh$Jrd<^DUk` z6LG7nmIk$s`rBL}Idx(0U2;Hr?JnGrxdF~ay=qNzs?GNWIUgP2U=$cJRpCcy9Iz15 zZVNs;YjH9Kz$@--RysI17zYPOe2D$cL@M=uJso*m4^Hl={#Xtr{UIO{dh0!n71Y1@ z(>`x{9+R&{ZrEqF9ab5a3xuCD+>&*pS&I-Q zduz&{j7EnI+Jf(#FKN&fMJG4Sn<8Ez(=i<&zi@jS^bnIQ!F?ETzyLJV*H2;F5L%@T zwMoifb&4N1D6q*vj1Tf!=XN}FIWIGw%Plx;@vpF+^ViVOup7zu%l@Ki2-Ii1SAvE8 z0`3CSAeMWEI%wuokfl<1@9(#{n!&dX=tWh4ONN5F*b~jr)9y2iI}pW?VVfCir;9JN z|1N4)Qbq_0hBP;epZjSdwvW;%A7L1kfsx;wl;whRQ@nOBqTNRgdTM`+vplF-tF&1z zw{q=Ei()4DeX51(6*?~WlKX1#)jb9&FjKNGj zf;BtvT)sEwLF^xMpt_jO0zJf-?s)tc8XCDeoZs#UIRYDJzngp@qAer4j&)lX&N+80 zDuIt~2PjTi2E$=@bN`IYbv5h`FLefbi(knjwR8xDYPHd8XtC*c?_3#%Q z*1qIQF#9bK+1qc#chmc3Bf&OPd2A~++HM-aeyQgLhLu-4tdA(p?dh39af!45CFa;(ihIK_|KsrXm#?BDk(^PTOZ#3`Ae1+7z)n+B6F4r#Naq$%aQe{O9z$7@ z1mBHp&0)n;6(>44NDeNEFz3;6N$bg|g;c3nFbX`?T;jPO)BDYN4J8BN!7gTDi^(sr z6BW&y;4yb(|HaG=4QXq8rp$z#u{5sf&H84!t4d~F5K7&PpO~AjU%WS-s*PEDFzmza zKF6rC^8uI-nZ8U640fNZSxEy_ph5y| zks+dt#j30waKg>H`?VEeOC~g7QY+)467;s_4vLK%+5cgFLT?j+KVfu?^ z(VM$8(9ri~9 z4xHHoD7dtA#-H9#LPIM5_ALSGY>%0(a+x-ky|G8zYi(z&qCjjaHzj9i#G+S%#wTe+ zPk78P`mxS=qo~cA*f*Ez-wl44`}@Is)bul24mV(8#^A@LO%i^quNeQkI!N9qWzG(*5ot3ox`Kr3vI7*#j5@E4O%?bWeBgjHZMGmdp+o?gfQ>S@m zm@T3HWYZaH95v-J*)is}H&t_i*BBp^p1mMB`^;@q)br^vuNbsBy$sRXn+3riZctgW zne++XU6uvMMeWy@^~}H}E23K#Zo}&{Z()T#PrbFm8{gF!M90Qj>`XWXqZkghzA*5s zh#eaj9Ee4MNrRAY;mx{Q=-TRf)u#$aH3ii%XG<9gw|x+w>r~gf<8$QGf2eb}puV9Y zZ{}bz&&{^s*hm5zn$Yeg7QPg+!$3qxf4>d-S=dnR^UHuOtpq=DH;xsA( zKghKoWCwZ^dN0epduOnW+)$A9scNl* zA+cPnI}&p6_PWvMTo?{r6jsN^ckc{`;V@5*lJnkjqP1rEP^{3*8OD}7s3KyCH07`t z6I*IyhzHpXYMG>7lQ5=hYG`Z%=X1VGgj&3NM_I~TuF02YcW?=x>EkX>a;eh%<4(1hLrG;XEf$0%leaG~SEiRzO?g4dgj?@i>?iHC{U^$2#dZ-@MIPPN2$ z=B6ZYi*j1Ra;ViGqJL9DwqU;Ip7B{fR{(wyz-w=z%#aHZ)a2lex6L}rk}rKO-6m&a zXVq+6Qimh^)oq?q*mTejlrE=n@0vHgqDIR?u078?M~1^92TJFg=>a5pp!)?S_au|j@4=Vik|rw(xB4aZz{%lc_!n7puI)mzKCE9Q#D zke*imP}K8RhqIHu+-U}*)Zs}igzxr9%55pFEjT!N?YrYlEwzf^?-xxEx}$I>gUdga z?8a9sy~#7XGl_Wvduxea)h+MuPidVLZFWQwb&gz~nK~A+7|^Tyt$E)B(v6}&xXwcV zGSlR-Kg#X8^5I_d;T~#MYAfXhh|AyV|R~x9};3MX5y)9 zcE)B2v@{ID#_i2f@zjArI{_GvsfP3QRHd94Akb66jW}90)Cs>h4L+pxjB>8Y%Yj$j zSDd>=^l{zHU?=`{T1aUTj9SL#uOveA+`AF!yFVDjz#jSQ-y+2_{aTd?E>`q%HKa;ePB`B|dtpmevBy;OhdtEmB z=f6XKU+(_=b|RiS&Nt0;ujKNpac=A(F%Ap{hq$h5 z@4fb3bImoU1*F3Y(qZv(+~8%0L6;4;il`08=~z$XwfohZ0WI=d6SY1WyHj6Xp?2hW z5uZ$71hMD;^)kZ=#b+&*ebn*j@2-iYg+T095eeA5d(e5TX8IP=H#t`C&oqV86re}G zGk+jCioEERUp~gN({V^BcdRYF+aH(Q+h7RNRT3Uvq=lyT=l7>$r_N9>h)QIM++A_P z2%T*Q)kF&4%>-Sfl_lcLkbEU+Q1(0h#xXGPAwWkx8lq)6&2yWArcjy3w%%=T|%t=8)Gi*t$qE5NrISaDuR^=t>DljXPj+;kj9Y(ehP$^2ZgILVxO|m#B}dp^AtnB z(!NVI&8j>kg<3uUj3@Obux#n8x@TU-PMg>8q;e zDw5H=W?$*l&mrKn*I9}Y>VB40*06T_L`6H>Gj2S%6Th4%CR8F7ZA7}!E%dM;s@`1S zCz|5QGR6a6)%y4&dqHf$OV+Oc*08%>js-dGF2#ra&i0E@$T*CgZoVSb$&2w=$L(9cEm9jC578?noaE;`LqZ zihCQ_eAB;|K)El(w_$Q?Qa4P}dm^LBmU@YTFH`u&JTT~k;YHxVm^y);>iV_Wu%_AV z7~_xBaVDXgb&_VUu?Jd)Klo8biAN?6zZ9V&@>W(%I+4igQSQgBo;%-_wTJnPPw`~D z|FAE4Y-xW*C+^~W@`?^+P52dvLp#QjL)KWzd3*j*Nky5PQ)7aqVfzip!+XC-vrzNU zd>yak5ZL;2JkyDV*izCh9sifxcicX2tItNr_fS2d1~{Uw%U8TQo*OyevCG3-Fe61z z*?e2JL|T(Cgl+1QBgm=}Z(m=uz8rLjfj*@<_Yc2=+ho2DoOSf=FsWH{5&)1pRg0&} z^6yDdgI2Hd1+~!)BV!YX!}?>@xvRk=Y5k8cCUl-yO{!3UfPm*^g5TEDu>8^MranTB zHEcGbuzSyT!DhF|-}v2sTz)9l*z)JfvKo%;1YoC1y3%)%DoXH#F(_+Jo0d$iH%hTPvN8b&d$aU(@?NU+rI12im6;wNP*DwykSlS9)RSJN8*RM}9U^@PEj3&C*u~ zU0C%O^QNPDcZ>}8Z!d>Cm-;X!Xq$rfS=5So_9cjlKK9rsl@sAv(GVqzwc99NTOCLaEuORAn2RiI zs)=ZSl#5itkt@r(f&EY1TE(p}pD=-uK)1aYGXB#SWi?YCrQI~QiS!5}RnY+1{=EC~ z`#cJwcZIDwOwuMJJ8V&jwz97{n*_CywYo34IcHjp5f21X<&?bK^5!fDZg>}a(CU`^ zYIvVu{if5NbT6i(yDXvJD#`bVcJ4)G!d-K4O|{d6c&|T(Ck(aIlG;Z2e66>iC~)F! z?&cpaer`_Q6L;g{QtXr38h|=m=|fal%6{Cu1wZ#Vf8_g1y6iEp-?%iDBhj)fiPUZ= zDVIARrnCUJh*1mg7x&Y>k0>%0dA4LE`Tl%eWN~q^2Z4c+x(PUG8=08Yx>J|SD~>u{ zUpGyP>@Grl})SO)r`Hgf0Er=neb0X=KcLpzb*y`>1c;2C+Y|ZPr>gSzHZF(!eiJ({8CVj5 zSj&o|O1Z}cap8gpC1uvmSQg2htF`e~+Cx4Kqp;@%%>cgIMk!^dDn(0=;)KW_Q3{_C zL{lYIW=x77&~LJLcKl>kbLFtz*kT{FAoDH8K?ngj)STJM6Oy*04 zyL!9XEqh3;7SwzGSU$f0G5PCjILSx6z~~P6;`w^KreRDZoJ4witEEPqx?btt6jIEC zMX|)rfYO@~srRpzEZ&~E&s6J^jhwzgLMN8_l5-QFpMST$7rzMS*SlqUk5+ba&%3;m z&-Y8Tw3S@_lHGMeWl6g+H6u^A>)M82==8*rkc!gn>4_ZO?5@PV`Oak>+ST*^hmulV zd09DW%cv-wHzcZuFD>3)AJF>f6=I+HM_U zD(Jp-=pNOl&f7e4BBP=LWTceW*TyiSZW#9AwnA@Y-y?F-+jM>M(}a#kML)X**QLN8 zm))+d3DHuY1DV|2xDGL)ClGuQxh@Ox9c@@r9UbXe%YK#JgoFj>^V)i8XfUYCmp;`A z8lT2u)H>dDNk6D~XyEz#eY}kCj>+e~87`|7!zbISqOS_e z)t*;LIf8Ywb1-4gS4hVQii~hiS!gcb8~?i4D);h)z4EGuA>-o}B{_IM9bX%KkOwsJ?D&X>7E22mmPhU} zLFFrWweyW!ZmC97T+U!p+Ll0>E93r@6c_^x2HDJ|7G>oSfo#WaEp=Guhu*zLJM&xxEnC8nX^B-ExjQ>%@5*@Bzm5 z;Y4br-ctlg+Grh(l!%kX5(Z1?l-`$+bdZyi6#R|#METm2bM5v;eVs;66w6#)=|^K$ z?3l4Um%F^>mh!O;tm4%Eob7k!yxhwGa{N zNW;lss!-HJ^UVL{-xqC65?ud!DwVjTvV7zxPU=sVXcza7&B3la0z2Ez3m3LJGSfvO zG;D~rH)C}mqt+oaG2rzSYgaP2lYsa;FW{4=ryq11h7Pk64g zY13X=c~5u=-^&NF4QVU#5al*1J(Wtd3R6HJx^#WIFdjn%ZTHw84=QI`eK59U8q?0?Cjp$GhY0R4= zn@MIHXKmV2ZTzfxbv)81!%(pN7Muu*30z38sPxmGi0S*G34{5%LHH%YxO9w0>;z~wcV+iFf zj}@Ik@QSda7N=4c2_(W$azKzHz+-D1I^LnAU zYZrEDWFSMM^)5N_W-LC%e#uP6V>ar~5xz`|ZO3y0wfRXqa@x+5*=%+Ifl9``ViEZ) z+~k^Bh~y+(KQQ>u@o#eXYMWN)1|@GIaB!nz!(rX2!LD&(rUXgH=bj;~(0>z+1(q~! zS~l9v*qoifS-L}^GSYfab*#>k)kf6!S)@;6g5zD_l!wso83@Yw!~_ucE3SL=gM|m6 z5mpeEYMba+4cLVuPglQnnyxoeUm6x|@R@bqn3&L@Nan%;G$r5Lt8YaNV-pUO2A1?N zfue24YGFgu{b27PWon|wJ@4vGl-7#ny-@Mjb{KM1JG^spRXn5s%+^YpctuERo-K*o z&1#7dC;SLW!mn;qr1*V$g-T}Y}#Piz2Ex@F!ZU&6@OBb;mEVNMJO7D$wAE< z?$%REMn-(vIgUwvU_}g`Jvq5es)qaWMnUVhJ$2%GS;>P=+-u>@WyB`v$|SX*6qiJ0 ztJ-Fnc}Un^Lgc%lC&qt*(0v3Dicr+h_+W3(PEU^^NE92VoW3`6aLE%w`}@o@QocjY z)Mq5=h)jnpNR`%PWtnXNTesPlVl#o%BkkmhNA~ft+7b`?BoI&~iJocs0X5XsW@~T; z9{no&?5wCvk4jf%>8#;pPV;Y z6t~x`GdnY%Wd2!V0>2T9NqG>iD|C77rPSar|0SrXLn0c&Q>@l?1{EHhWA=1kqmcie zl=yaQ2!Y(^L|CaAomAjknzkbw9fyc2(Qk8ltJ*p9*|7ST{S5Z@;gAqW;Q0^nIHv2w zYfNGO7G2jh$`Vqc^2E1+m>)thInL(QwOl)Eyv`p5ar4#D2}jc(ny$)C2Q5UduQ^%L z9E9(#tPCG|N?>XHB9*io7wHPW71H^QjNeqAD7{H=Sp2TQ}H$r>}N zOBJltS-Intj+Sz))l5A~0CzN96VasAW-}U#{`Bc=Gy54yX&M_GWP98aW^1eaxaaQ4 z@aXef2qBzw_F!~aVPOqkY*Oj^na4*ZJjhIyBV)QRcT9789(%s0z}?k;?$!6PKUaq% z5hcm=d``g+o~~8iP}ndVWIRkPZE#Goy{*EPAq*We%QXZlZ%>oxwFh$q3UKu~ck7n{ zpo1~YyuL*E#L`AVrg*mI>-#3MBC*wCat_uZ?IEJn09&#bhwEgoW7o~NV>)*%kG(XU zh+Cr15`9)2VZ0<8DG+uCq;P=fNW4xW{wFX0mno$8g%RzL)i=Fm^b34{D!6{83(+eO5=K$_*Es-MCsyK zOd*iu1T{NCq5NA;b?G^J3f5Qn7L-vu=X)Dru3pLEfVB3iiXWXu<7rl5YG-J3RDw zG&Hhe*x8TZHp2L>*cZY5&x?O@y)VnH3k#(9tU4X?qR=8)cSAzQ<^+Ky%Hvf%q z+toEBY~ZA=t`K{t^XMmpoh&F0;gw9$G&XD5bxMf)LqD_+VdY-O-AuqsqdfX9-7Rgc zW0zz!Xsd@)sP1O1_;o&vP8rE?8RA1nBYZy6q!U(>C2zq_znfQt-xU8;1#Jtn)Ysp_ zt2MJ$fBtZ#W}8bP0_wBPB7&`#mk>|}vKUQb=jSJqotX2|sYt>fjTC+oal|jyd*5~_ zbROTIB6uU>vd^M7oH9+%T(nY|Oy0TUFxuKF@%0>|SOfXZa$5w@2|3`XBog-EnhM^S zq&-5yQPuI~O!v9vUmjBL&^^2%LCV?0syoSd zC$1rmAd((m_|3s(e?j*4`>X8S+;JBE#D?^;yzI$7`h~AObUE*2j%4C}L}X#A zYu%+q5F~@#Ge-mNU0f7($ZNrGyNz;I2B`2zn7IZ=5=K%TMwO$l7-L$RTvnXi$GAmB zAK5~G$hAbpq64L|hSa9 z781X9cH{Qg?Hh!f2adE$v4Fl`uVfIP1lt*@maMyEZ8R=TXkZ-}p~3L_APGwS{Z6l!?FXmEls z@kd#>k8e0Nag$5+xs=+1dd0b0TE~9A`99gt?7LXlG?d*dS?;0=E6maKat9l%Co8DLqGqSeq6IJ20Tk+YIcNovUcxY z;Tx5JJMV!)MB8p3^v%tuO-vi@k!bm}Txj-mh};pyZ$21yqKAI6Gs3f_`T%`Z5pi)Z zjjBD|!#O{Cj%!S`)qAgeI@FDgjTtzz9W79G*qtoeNscFQ3JS416wNrE+PPPXIaki! zg_JY3XHrYMtp~_&&z7JSBG3@=+)&S5D{1|MMn9LU{|t{*?y$j7Yc<;uMBzOI(kXCE z`B_~Y23CC~kd0MR&XG?SvBs9q-?}2F=v9L0T$p~+vL))@GSQ2?EMDSD-=4V?7pMM0 zz4;nfU|>d9q> zXxE$@yoaeBxkn3s9kp8?`jtu`mdmY|rf|K8Y-;@Q>UWH`nOm>pyLX71bv7mOPRh7} zpis&bmnEo{uiU_mveNhj=y&Gk7p&y{m0_85^^1qKWiXeFX2f2%7f`EY(QxuC+s#c& zn&Xq|pl#mGNbm;lm8t~#{>bs8w!3GKyco6L+r_qsjjOwmu!t<_I&!Ch1OH4V-FmGW z2SoQysKM5`y!+0@Rf0#abH0OfU`VdHnb<-JIbGzSl({VQxl~PG)OB;Go%=_1-2>;pX5|Z)FowVH$ASv-PUEhlo7LsYBi1XjL1JWP9E5l<-;t62mhXzvC zkpzr=nO^THYtMM_RBtuFctFOWnk&pCL_-Y;p(i-y#oiyZmUg1&5ymF0mQUgy-gqaA8r6Cc#X1< z6=BeM2_}?`TIhP%(28#6ivvVVYv(d5{l)dNi*~%W1ACL}0VcnS*mJq!$Mv!dX&Zso z0S*!iy1;bK!fP$b1? z(R#0=qtkNXKba-zI#s4S{S%w;XLEB3P)7V|PW&x%G)J0%34ai!5T?`ymW+ZRu#j%1 zn1+GOhLWm?)x*4TWOeNm_8MhKXvojz$`dq(UYy-W<+o>^YOP-U^aoW1Ld~J+_zwfe zpWWa8ikpr?Uqsb&Qj}7(r!5CIc^nhO{bJWFX+uG}Yb2eLR&y186 zB}n0ubSQkc;;KAe@c^>no{ZEYHW9znEKF{%-eyKjEs9ud+*85KDvb_AuLiD;({Sq3n^rJxG2YUg= z;Lxa1W6qnK_SEufNNt<0@OR(lHe7XqxuvYbyblEN*ea_-bd^$+J=IoG}k28YRK?60Ji zKgc)-uAk|vsL-gZCx(k$b;s*w?0Hz9^0wJ7`bJ zqn0O;X)R&IBJO%jvG!*d_e*<0dldKimv`ohtgmP`%_>qiowM+nKL3ci=Bg13TAFe( zhE}=RcDfQW6My8P>c%5ySQKzgv&U$vi~pptPsu|Q^|xqwGf8aIXsi_4s|35CgQcU} z+3+fP{c^gk@8=#d5V8@0_~I3s5>)DhKvO=K_V__~)$&Tnl3^}yyl@E!rhVGn&w%VO z8hK>il0yc>STQ;#Bs^plz}k~V2uNymHrm!}vqx7|cB^mxb|u&*)_zii^{UTDPhXag z?F^qa%BDe=4X96`b2(U&{wReQb@@^=FsR;l^1D&&t1RgH*~j%UXN-WI?dyVB?S`vI z&ya85R?;NXJNGX1E9YLTY){%gdvf<=i_WI%M?$x_z7&3^yd!GG)quNLLI{;CVc*NZxoka2#MaMMq7fcC2w2ti=S&zTb?jTNoKh zZHiLD=|^eTc0vvglUmt7+2Xxj$Whf%^qrl7u%tU!xAZ!d;RHwn=kLP1 z8Ccoc966j*S}pnQhF&iF+*wbYi1)I445|IR#mu;_Cv3dh4P{~^3`g=fKMd$ymyqnx zV^gZ5xYXn~-RA?MrpmYiS3^`yp#rw_0jv~nt_FSHK;N|f*Ty|H>$xvt zuA!_HF-dya3(7sw+JkNmmlJ#~EI+6SzB%jpMuj>GU>PcyKymQPKw1(}Rqxj=tuk@d zAsK8=ac}>tWnFj;>Ed0j(;JR+IBF6ULidwl8Q({_A7*&dtHxoi`>ZpWYaLJU2qdjRI96P`sg26tkpv7lzQLVUK3{_WhEWQ@^*rYl9F;)d2~wl zXKi(=gXMQffevChMC$(h3`lrZ?lRXI_D0lVmPnzH1x>h)bNRSmc_8UJ!CPNASP+xW zR3_<)xX0cP<^@v!UCxru)W<1xhnv;?uGOdzc>b#SXa5!vOC-Js9ja5A!X*&9(rPa}N)B?>8i8l}(=Vn|0 zSV39ZnX1KcOZMH#kHc4QmLmlptc>1uZy{^j&LbRu8R`6vn}{{_@GI9ls9$zut!B`s zOXbBNaypB|L?KkdOC&Y`aYLU-5wUM~re+LZVc33B>1A%s~a-&CCJ3WU81QP1Hk=XkV>H4`XX@|nm&Lo#-*VL4 zmv<#RTpnSbaosAWEh`rE7CAb5QU%mW-02GZ^wr_r z)|xft?WzH~Mps0@WU}BakDdHNBS+=_rx(Of#rm}F>##~#RUvkX7iJb~Tg;!KUt8j~ zva24fd^|Ur%IXo&yug%uYjZ1+wjO8sr=i1^yGRkWt%#|XJ1o0H?R1#$QE>xzSK+9I zf*wmoOLeD0yyJS2(}E4_7T27S&)F$wdhf(4P2rM+rS zWUU^L9x}C4f>`FV5?N6jgYleZE3enq{M+a+jMmQjCFeyZ%Hh;vU#=#QB)5pD|MwpB zhKiDkD$!w5x%sI*h?LLH@2T}hrAQer$iwGo0$4v7HQWkFddWZvq2W5s2w5$HI&H93 zt9`@ie(IeGF(NfeP2Li^OtN9O91?DdnH&OK3{Bt`?c(pn%RMUbc4J0O3C-V*q z>lV&=$u}ObMA0lBb)VTh34A2@?WAp6`R8e7;H37Gt1o{X(;(;#W<{SYzDy9eX}%co zi+62W?CPr4e&<)5e@@1SK!%dlKC^!+`@r&6c!{h?)r3~&#fzHdWJ)TvT6g9f{>9G9 z2fN0L>Ouah!I$L87sPE=>fwt9Jr-;q4;`YD6Z)4Q*%C9z{B7%I6CNWE(FR=*#LP6G z8ub_|h=kwhv{n2mJDh#Jk;@wr$~opfV>Iq4CqpvY^W{{h+-mj|DbjlO6Ybcy7s1Gh zAc4#1QF*IuB;vh2^nKb^u72rl#-VBJZTiDkbkf&lhl~CqAy!gW+`Q(%AKwgm_$8%%f$C$Zi(d`ksn>pY zszyc-5?&j0pF4d81%=ljLnZN+Q&z@(mjTK{$-`47>^EK_1A~K7ves&1=#zIphD{7X zgVgx}$!*}K9xzX-wORb&L*vGJ? zXKmU@(&A@m+!-1drh1#I1oyXq%z${(Dack2pUGw{!>KvAN>wm zBu@V@CbF+9enlr67YO*`=Chn7NuHQEJFJ2Q08yv&{;?CYsRSp{;JnB{vi$QxD00`d&VTRWU^AMs#sHrPVhKWIqj*U3H#AkJ4 z>&$-T7c3^3Kv$wEd0X^W^@Q9y&=m*gr0+2?6PX__Hh&MHT+W)NN2R99YG`y85@wAB zkC_=gJwAX=`0AZ*OYLcXyA_y9CPOabv6`MD$2Y=D+%!w%%6HCXL!`NjcCMjyvvV?q zQPl~{JtrE^zbs?s2_oA4c89>vEqjlT2?;|~OL`9jIq??b?SBc)JW2;fRP9#)~6A*0^PF4OaJkhR9IQF* z*ScsJRX_vv0O$<^wW^XdGekND27{3l{_~xQpzG^(=&QK2w2?ppoxZDOUnuxk-es<) zmdbQJS_&g%&m7yrK!Ts0on_Lf2IS{&Jk~hcDk?p|fISjxRNq)0Nor9 z6J9F%y__5keZj#z`ZTFbvOxZ~^;E!7vEyhX2%dT4$kF>RX9Xm8Z}j8>&pbViaBG8~ z6*3I46Nb}OVk-!4=YowV_vXZ-d4`g*T#_dp>k}+?^jwXZo|xH(mv`|KA19Hb5TJYZ zylyheMoKx(ny)Q=D3YG8C!1;a_M9twl;io9QX6lLllasQOf3skC`Z4Cio7T&g@r zSXFYBP(?e)RDQho(QoOME$niP?>Xz6rnVCP({1%#LGy|{Lc@Dp)=FrlZ{rearqr9giIem5#1ZcA%NSF9` zbgnx_-9=%$Rwg~C3?E2|<1eeKyy}vfky~VK7`Zhz6mCvL(LZ4K%5GkM;GMi9lh80y zQa8A3&h5qhuSJo{H;wn7+gvrm-sb-%%)#*U!{QVr_AfB99rUY%%9uLIDX$H(Ke2|m zXqcjj$Zl;7v%ru)uH;DDK#_9(ZU=yrUdpQB=xVJ>k~2n4Os!4yjN! zz4%a=6lpmnK+6hcMqWuFS0N{)=u|Bt!&Sab4m)$RM--_27^M&~(JdbJivCkZ`J2x5 zX?Ty8A*2wq(4kepM>711xp`rR7Em>u-IL$kRv-PTcPt^AGIp*Ek0@gL&%3x^HgN?O zCT*crj49KD10-m~^4m|g?;bGG39hOfe<~?Gq5gwF{PVfvDC+%xVG?;?YVL&gna(+C zgJPeiq@u(3%#AlSkac{WY4*d5u&>R`@s>**VeIa<>3S}!9m*HvQ?ys_oG~A{i_rZN z>vHX;&nZ<7E?`FayMLkKtP?$)!~Se^KAj;~0po!30#($;>9kLknzZc9cCq5lG|}z4 zd1h|QsG!l}ELs(-vZzneA-iK48c9v1(tR*N?`pAdr_SE=4bBp)*(r}i-QVRSsw!qB zFVR*-nG~Dhld}w$Itqg?8O|`Xsd$A^P;!B8-0|u~2-D2wW^pOL5z*mO@s}43n>6e1 zzbV!GTA?p~!uY|*wNI0nyu@g<_sTX@H*)GH1Gex)*wEsi!uWecKDR$KUnT@JgKOwh zre-_J1u-+({ZXF82aHhLYsGGDV=lonCZpDj**U%vFB*82ZEO#kEgSO1wQ z>tLqP{F$}D`6U&FdXq?#maeVR6viOkCpo|_2q7c{L*vd$p52l23O1xy#b%0r@yGXk zLB#9$B2ppICiFHWmzbf!_+HN1yjWIM?liX|!OLhen;s$0pO+2DJ-y$FOUm1-1?;a! z-Pwzmk5<3dVP);G8NWxsN+s$1@kEZfT|<~=2+9~s*t%8rQ{jv=6;JF{KXl9`mTq(W z)%PN4*zwd2)Fe4oTkV6_7%TQvwb)aT3eni-n|w5b z`TyElz^a;y`_D`Zvz-#CH&=qRjE5D^0dq`wMomnQJ?f=hhemHg<&dZb6Q8C1%%Orw z6&oajy;rQ-pTdlK3e>`|yOn(!^N1spVMNl3f+R*chx;aN$$R#urNUKqQf<~kY^9}w z6~5=oK_1PGAY&>GVm0RB_QECSgIxN&gAhpuA?~6d*g+B@Z_)Th6nbrj&NpIfq~6Qg z-d=nN{RsZTpJJkSHIsAHl0B%VGOI>><5zPV7$wlt-4=4vv|VQs%gK}&qnEETZ@(gh zigg%$CmbKsy1U;D(7{ywy{v1DB9KR@UO$ZYN#=jAF#nhLmmvt{f47SPx`J`F^6uel zo!U8sV@c_vh$b)0qQu*VkPvvv`mGeRsiUjYj{VBHIH+utUoTBpG+tSySyM7<{8zE4 zV6MU8QqAc^ZDy@;=m@thdG3OGYM)Gq%BZuOo|a6DF=(uPt}o!z;W*2}t6S`(urW2G zxRy0gf=p(bZ^*l;&^>0H97;w1p?znPBz2xQg*{MF_H@c7&fWvQ&HkrE4&yz}xk)z( z8(n$B&y!^G{}t{*#OJ#xAW?bp$Ta+WPVnjElI`3&t1#@9dwGT+39nmRxVCy`aHAg( zll?w0(UPRB=fX4P{tRkZf1#VsZmuH~^^skMJaK3p$AE9^CIF_sMS$kQ%yx-zZ!Ftf zqj(@#i_=ud`6thd1k#-6FT~;MRgIzHF38b%MihEPmQs2JB-71UBHnKJFd-F;eOpohYgD#pl6Aw*L(-+XD< zxo~=YAN9ac8Q;pfv9eB65FSjslj{Hy=jFrH1ZAQCv3GF+O(Fj?X|EspcYil5T?y7& z35iAhJy3^#ggr&V6dbdE)@Inok|gQ%X&?mi?ej0s&6B=P@8#4NV_&9io9VTy5XhDF zEcd+Qw5=+tEv$uYxjiv`auAt*dVt3#zpQksiaNWSc|GwuZdPa8SS=#3TW}#kDJ10K zKvH$OXv;plc7RXFr*chM!nLN-%u}z#b;3aD(f%W;yqDtK&owvsibxQw7aTlr-}JF)$6V(ww>z?V`c z$+sggw7hHJq>8p77k+^(bj{6Hi0wTdzRz&^QDHN0pajQM(Qm3`poW;5V+~_SO^4{b zk^KY(0?E^3BNT`Gx9$Z}+woQZhj0-et50fXn&3e}UNtQK*CUDf1>VR+to}RiEykR# zUBY7+k!Ei)3mV)mvO-qPeUtn4j=aRM8A-&gfcV*% zLku&cxn>CV@})MG5O`w|h~6a9QM-byZM4S#p#|qs z2foxY!mMA9Y@~DZKT*vwIIepJD%Dq)w~(*a?z}JBcr|Mxv8iF_#>FHN!W`sPED}AV zTRS*Rq|Nllm~dA4&w9~h`|u00{L+K4AN*XG@kq-N8h@9{f4{yc2332lLL2j8@Z}&M zVVRz_S##t4(yX=G$e6sL6-e>B(b~Aby<&m6e(=G4r#~+IWGt(U^fg%AC<{YMg`F7p zQzkB^^0mD?N$q5KP5+Dx$K407UhBXGY@QhLX6=mf%Eo9eCE4zq_KT3_Q$>DJ>q1>T z($Wg}@GOZR@4%x{RM@l~9jNCvK#hN26~^>7P`cCi)Tt1&ZykOrDmjLV!4L7@%l%?P z#xk>PhGzB=*2^FSRZr}>1u4r+WPJxMSOoWniynI=Kp~+iQjVs#9^TgYRZC{G`EqW6jzWUrSb|>0kWn`zDln%!_;J@$YpCOP#qy!KB@8z2(^;nj*^t{Cq{=zgU z^VF-HZIK*F^O<{@J`rIFjO19EG?*Ea_Z|N?ia+bH_5WuBYxDGJqi9=(0u2^7keIUP zGd$f*D|a>Vt`;#!oTW5Mfd}^A=xZvT)VO065xn{oo zpK+06my1jvx$=peR1?KmJ-LQ zBzaCQZJQ2wfMk=9L@;wePsXtn|d6Xot~20}c9W6!lh=m?Sq<@c+oJWTW z9urKM&;jtszONZWIJQ+b?AL8uj-1}xNb@{w2Z}AgHRvrIb^FJFK>_&Yn z__k+du8ZCE_UnWqR%%(xpl-Y?fEPLOirOzVsP@IIB`*CSkKV!Wpi^a0o~9A7aG}63 zefxI-PEJk^fbaZ)tfs@&qH7TLn-YGTyh$9 zcd&Jbbb@mCwA@t6e%9)?-qB*RJn_QQ$ab2ABk29so#s-rIZ9%kxg3l!O1prI&i-;7 z^97hd#{nn4WT1tj~f2bGR1Q~#xvJ)mXc7j#4a$n8oAJ6n z`uEo-pmT~7Dk6Plv*>p8a1~rWAK7H1-6C9j^bTQ=W5s9 z1yXfOT=`s~VXa1q*X>T&+?O-x*(SSw2V%|iESYj_33=$t`?OSx-W@-jUv*0tW(U(1 zMOQK9t8WXNv8$GDo+qPxMJf*mMs%lrRFN>i2l41$)bol=cj7_R?%rAQa5#>h19F*0 z-DP22V_I~tv`xS)P+eV^DN%s`PNPs@|Aza6^rS^trp0eM|FVxnM#xe2F`kDZ6nJgrVXwe2bi=F!|RyJl(APc>p6uqB6UFAWmNxIm(!uPoE@*IOOs^m+FOQ8(bq(;Ye z1$2!+b)S9gdB9lT(Jr7iNh6zN zw>d-vIEq0rF);|A{}mN`cGBgR0=smdAY;eRv&WMa{dm=dxaYWE#rV^;6xb3Rza2>v z9^6r+1rQqSN*2+CIna8*>5Q$gs?i9~r27l$b%zq* zP*7m~0#{CJ+cM}H{L{V=q( zh3-!7h>q?}7WJM%MWKLMrBDRC-+QY7g52nGkhGpaz^Y>e?4SV>{5>TlEImE_#*4ED zlm$rkrh$(cSEbK`2ZL&9$`=2|@2!Tdw84|>si~>IwJsp1SZO{%)98AHsrRTL0088v zsmh3eYu8bW-}x|THAraT#3eMsGnN@X*L4!%Dp?Xm3|>2?eyA6>h6+mi_ck2bx;|uv z7Q~vZDM=-%8irz3u#voX1TQ%_zCE-dWYE!sNl>w3Za=9-KGpxhy&rZ1iZ?@DAviZ6 zFEXDge^pRWpw;LU0<08&suY2#jE(RgLqDLP8bNNVa49tiBjQWTJP77V8r=PbK)@DP-tdY%1b^}o4(JEEj zH*am&4-e{4FfmOQ=50B504Bllx z-??_9(<|nKBdjc-`?R*VHwfMxGvfl4kx=jr2M}4cpnHI*ER)PLe6N!?ang=Vv9htz z{dZQkwTXrka~pu>h`)oNm?xJwpPQ3&0vxW7fxGki?(P7X%|s$zeA8vR&`WrokGMz} z#NvOiy58;;Jw5&T_E-RLdaeL2#b<87eRu`z{q+qE8FZ-Nj-h8M4!beQAj~Ck`p(x{ z4+bD(4c&31smwJvnwCBUUei{%sokmo$ zydd**L6YVbh)`4fl&8S7>;*<04c`DPiH<$48T-N_QosR;&q{#d_}`$ur=G6`&- zSzE9DHEJw?W(64TT)^Mv;&E4ecIjX0L?bnEqCdm|otzMQPPuLi<4 zkh3x~GY>x48)^`X9xV-L1R&eFxR^{8N4h^fNRbM9Ao#tYk{r*L!nxdE3YW-Qo#1yQ zWH*2SeU!elW>8>@KJvZ>H080FQs&E;#y#`mx*t?3kSFIgyYCF}f=eN~Iq+FQNv z>OHSm>TDJ%+1Q?|s;X*fYxBFGLfYHgL08Gj=4KDDQ||%dv<)r?#BGnaj3BLIkALe^ z?bHD(QSN8M|BtS>j;lKBqQ*fH6ciLeBm`0El5P+r6a*xsrMnw0jfg?2NOuWHOLv2G zNq2X5zx&QSGtcw>;*Xh+Gotr*@g96L~Ot9$NUJGY|c;Y zDPM)7cmlO$S#?ZV|KZlvDbOpC2)Kq*NQQztQecZ-qQ~V08X6jm5006?k<<>BYwd&$ z&q8-v;O5sm+L}h%+`)YXwA4h`k`!yx`oH{;rM6V6NZtO)vGM$;Y-1UIMvt1|CB;z=8G5{& zskOPl4Z6X)ELL@rd-r(O*83J`S4aJ^7P|z#qD~2RMpJA|cWDt5o6;)%6gd_S9c`gQ z-eESI6Z{j1#RpfeUQO4i@NS z-76pMGnvbroV7~W*|BL)PbU)waDAuy7*y=&6_)YaYHEu7r(%|{XilA=GRAFUV(}M7 z)V?ok>&G7yU!nhxTQ)cnf4@f;^I=LQ#%5eox0ke+biiAHHR@=Fx!sW}>!_lg5dn;*Qxp;Z6cBjf%E_6nHY|)!p=!hcWw)tLD^UPwhDgf3E1pKe! z1iZt;@7u;j^Ns$vWjc`Axi8?dU=NpZV!SUNia_B|+@JT#?qiOzu@6>tm({HR4 zLN^-BesFo*<&j%loC+stXmnJUCBkXDk)YIW85wY3&!K|Po0-{39SUs7x~3+|u6;)s z_$>+djDK}?HA2mOR!mH6W@_pS#k19o4GVX--36&XChQRFx@16A1Mc_Df@-GHpYCqUQtQ6^KcrKz}uTpU{AlP>cGUS z`8N}mvMe5i?KUXE#liPEfSB_UCC3NP^uDw?`5Tofq>z>g{=T1g^If`K zN4g;2iuK%qyIALr*+<6X@u+|_t&ph$1z`gI$U5F zp;?DUDVqdkF#9cQ(DMAe4?M(ToJUh_ZNd}*6^>Za9q_u;a9*ZBw%l~a@OJE_&b7hG$RRBPJm+ESZq;ba?lY%gV)+phhmLrT>hV;R(!Jas zqbD33p-@DycwD&hI4pUJWr=0<%N3fuAt50_(xr@yq=p2BdpLB;dZFLHiTm=T1pk2@H}17utq~hMBpw@1}7D1#FL!60Pj5YwPMBH8Vd> zN~GRgi8NRFklKS1Pf ztv<&Pt7746KBtn7O)Sk}>WDhNGBv+k8+%^eU7^xLD)P@RBKK|6d$)CzOmg^!+VFFL z#`C`P^4DM8WloMN$Ps}`Fj;+Ue0=zqKHV*111aC30t72IHa43@bgUitJB!NgOg0Aw z26pxe(nBndnE;1(uOYlUP)DHH#k0-57|D<{4G0urmXFeXC~%pglPR>Mxc zmXh<6)f_?&v-{wCPOn)-A}Z^2F#TvJqCZn5xXgYP28h3cxc0GDOY2~5cyE1_a}Xyd zF;?D!B@7{8HTA5haoLs*hoM36tB=IBwC>`{PJj1Py;|%xE%&Zgp;Js+ z8WP$buWvKw<*ngh&P+|$A33?~!sBKtSAL)kp}XUHj0t@mvtLt4F(tEl;(IR6&lCDh zWHS_S{~diHYw%4zpy2EXg%n#+rD(D=sy*Q_Zhp)^(1%DSOOe%lL}9}H^ugI#WM7sV zCyM(G%$a;d9JBq30zCr*YTDa!4qnHNjU>6Yx3NUAt zFqqP<2C~=o!wHYmbuMT(`JHbGMI@G896t~3uZ9J>_eY0xR#2>`26yCeR{i=~c8-z1 zyK1?7v2vZ!OhakzhC-HVNKcY@U%b#&+BZMJu0f~96LqZI5$A=+jvPN+4gvUzb!gubwT(%3ApbAQ8#-Djvu=z15{DtOK&TJcnB-}$f5HzB> zZ9~NZNGFLJM8E1h&Aq6O$}KERQ4OJe=1ln`9Yr&F?3cT(vj;q~z|qvzh4u(<2p7l* zAyipb!C`~e+1Uv#zbra?%I-tQcio&1rbhNx%_j4L<2;5w#Kb&+u>+reuT|GfR>6!E z%Sx&8#n|}(=A`^}B|M+Qt3SG{Ey*ArM+1hT0lh$gV}$NZF)=qTB zI!-v>4G?=SEPUk>VeXHxAanz3^oFvKHfuXOz1@M$W2+PKvx0M%U5`X}erNwey3pyV zshV#&)!Gwv&p5JG-h5~36I6v%)Y#bA$=PXcj8y-{j7d?fiT=0O;|e9`hz{>sv5j-cRq0hXxj{ga)x zGxwSIh|43#3GKC2U6sFGx`%SBmYA_$R3BE^G}<)z6Z*-dw88~%b$VbP$zfcFBWTu@ zznOFhNX___-);`%7agR{f7RFn!#q~nlFkd8J{_yM)-4_#Lr_}Gm07}ud=M&MZ ze@0uy0H2yt0W+QhhLe)o$xA%EI#f4uBWJ-ff3_UOE_5Jhjh$I*?dC?4NUq^Hfr2SJ zcB({{uUq>We1*)+-nfBG`Bdy$f=l{+-Fy9xbc(-hI#glIeO?i4_Zd-9+Pr9`t;3$o zDS)yjJTBc}M?Z&ZAE^`+4}9>LogF_!A|OBb?qXM5s!SKDvZ^Xzctajj56$Duy`Pub z47bW_mO6*XEV?!QzO1P8ta7u7o-ADDynYL--sATrYW(0TA)nPl`5%UFKb|zAUbS+t zxzc+nuaM--&Zu720M|7jD6K5TH<|WUO*M3@G~*`dPfTd2Wa2msQE^-os*lMtA^7i( z`-UL0Wlk+YRfp->Uti;-&2Bb?V!FELG0Xk7yiF zLpS})gy#sv@7oq*C7+>ElA|Ci7|Bcs1eDZf-9zbkHAl_K*;&inT z;e3@7lH+}=*v+o!T`kQT{XZ-l`Ih5l zjp{3ij>!JMFvx`F{eLQZ+GWIF!8I(L*wAoMm?<=NumlE-%s}al4DPU<;682!=rq4D zzqq)IbxW-m?CRitaqt=iEFiDDnfkMBD}!3P`Dl^cfyx8=APTPiG%uJo$!NFmP zC2M+h<=Z)HZ&pRdaUoQ$<1fjq&YPrEi?-?~215T7DC~~4yN$I*X=e1Bw3e6Q;_n}- zatC62XkM+M*z$0zg8wwSSIjhM1|HDKQK@%Zjtg3PYGu58bV8XjC9pPiCA+xo(Iqj zL)XnG`K>bMt=HA+XQ2;m47!SpAM3~Y+!j@gN3mMi>AIxcU+JHP6iU6s$_rL}3xL_# z#YH89-Q@}B5YW)jcW@;a*-)UxKj&rtU9C+c`J(67&gx}v#i*K-0@Hc>A!%2=|=%TWDym5dlc3-?Ah1 z;)PbDFMh@O!RWP>%Y%~QqvORmB(?~e(_}rKjoldDZAf|m6UsH4r=37-y15TmwpDn{ z9BrSK>{3mT2zu2ZY&+gCv1CjwG(k=F^2Lj%7`V@c>uo@{kMrdH`wzHwTerb9xqgI) zUkl-)2&Te!P;3W`o9sXN2DU2d3wuJyfQ3}Aa&3(M90l`$l6GAbk_`fg{c3vX4Rh$Mtv_#Uh zRclDV|K@U9y3G*D*@pY2L+YfAPOU?w27=uz^d2|1w)%Q{qQK%F92}S(yAzX=ZWLIK zhL#*Tg}R>K3Z960mZe%)ay+Ps9FW10)t6tmDI%i95@D6|8Wn(tNWQ*sDYaq)nuN^0 zWo0f)?7LJl+U-%ZM>@5xwly5yLB*W*tACIe@Py0a1D#nNl}e7yeG^!J-^AMlWFHF) z@dqu(v<_n{3${#SQ^prIi+f^I(uNloL++i%A4gg>Qr$Ui`?#P;T4oc6`u8mH$h zmGsktep&wqhu9d{WKCamet)w+O+yl7EmU#c8A?nrhzsQEKvd;x*Q0A7=J8bvjD%ji zxE>oD%f-Vzy}T?s`%$R3JJ%}I^^^fBj~Ij8yI3+w8dOX=CM z1yf14hf3)W0%738%V{4GN3T@_Ghcy#kV^8 zEqg%5^$Fe@Ox04zQ4A2f)tiCN244MOykx!ehXx7%<&cg`x{{8A8}?u1kC_&z76m1+sdE3=RXY3e&w*zG|al*?CtwU3t)3n zTwhXB8n(~&{2>lfr87XnJKB~O3zra32cB=}6?ZGXA2#7B&bv6n^j2~mCkS-0pQ}Tn#B=>Pe3LEAhp261l-q&{TJu$ zA&JyuRkj$=65!&435D_Z-=$d^#j){MF;ourl@LXKp93JcV>lt46BRS^*H-zbL?@ix z=(ALh?%Z=HKE$*TRh>LA2Kl<1zwaFv-f|WdoMAnduV21+>ab`VnntagMgiA#@H$`Y z7Ck*ZX1~mT1`am#QLRXGg=7r2{#tM9pE4V5n6;lZd76#S*Xm3p{E6z>pBSr_zc!g& zUT$tp^Jo;(HKoJ@zH-IsMmeO?-@?ORK);DG;Ef+~$eN%u1(FT&ujR>VK_J=SCldQj z^78Tk1gRrIDJm`&Pmznb)cc{BvAsuq!88Xm!Kv8j{e3kw#!3*uA*KU6+y2JK2euA2 zb;B(*T^z6_pz_0a36#0(;!@7SUnP-J*O_g$_`ic6sa?L!An%7Sqv^9k)DS$Iq%F&py((}WfZfvYIto!D_JSg%r`7FMnLTsB!PnAo*e)HyB z0Ov%F*3(?*8irU!8v=vvjF5*62ubAb0PcrQDSHdBi|#yKaTM4PP!lxt_Wn}1S{3wD zt`yg@`dI99Ah-6I8U;`6zt|3Z&ZlNc7UZ-{zWq?fx2?pM6+*Csb{D&vo0>kD^#7DC zcQ6P19<3(%W$-UOR3?>QXr7F3%aEBklzsqdz>xcp`SkzJnS@_CwZ23FPNhNK5{!W% z$p33C=U=;UN(eO(GH zrGRn=fGM%R6%%VFj)IZLm)eDwp1vThf5$ccpR%WpH90xHZ?(;tNQT0j{F-MP@3cPO zUGj!tVXPNhO5X`xFvMfGP34jC0M8paIS}2hC{Uk2M7Tvc^$@sQdbOgPhK7c=O_uP{ zu#1^7>Sz@+e*Kc-P7BiPV^s!OMABSLsDwrpYC7 z!2AHA)V{x7Hmu}z^4Lc-EiZjGY}a=hNs1-6ir1FxfZok{gM17=atrlap}5`1ACrdt zb9Ch7ur}nCno0$wP=Z{F#KKgLVuol^51@@(dwX-B;XrAR;tYbWl&9`zPWR}QQEH&b zY28uy>!bet7i+pK4RV_%_ZUHWev{pFpaEKGmXMT8-QrQGs6qbD=26H8>)BJ|9uAdRjWcehG+0hUHYg! z32n$o#rnyG%&&swu}A!AF6p8hwd3*e56kS9<>nAbE)TRAb0->bSo`LCSh}tmXY?(% z57+o6LOlu;04y9Ek(5{S-T&(oBqY<(JPD*@KB^|gH#W3gDtUe1vNw9&9geplDhN}9 zs-m}PxCZI>h5%=H|M4S;SH8~j_DQZTG*a4H5|-AJH2f1#{<{BkaJqio-ZNBy>5Ji^|BzNFr$c z4%5N{&^*Ic+l4Xi%&g3G*~&X*}mQnUr( z_xH=l9I^xQ9L(QM^b?pLFL3o6%LFW*uo$RI+MaGz3lG_@ahhBjzKor{xH#SX10e7{ z2GyyR6^YlczaS=o&ovq^W4B)&NHOmF!DjO36Try?PMsjtKmucbax}?c^MZUsLP0@c zS_aleB;qBAd$dOE&~H6x@~Z!M0}ID+G5hTvLi=IHU>4Kr^8E7UhswHGw^Kj%rs6iG zr+b2rk7w6ZU=JN_hBEjQouZ?-9Bk+wM3)YqKp1(t&|!TB`hl&kvJU|NKjNYM@oEu5 z@Nl6E=PJiV7%}JNBY_LX)UN31(RuJE@x#zs{}7;F>dZcwCtP0-Rl#rj>K46L`^iSh z+RX9XufNlbgoU50RGF}U-Er~%_jYN}<7I8y|ERxlIS(9xf|9nq)2g@xyHR zmrn?t(!wyJJ6`A}5gYaB!8U6XtMS>Rf}dxBn|;EvD5Zs&lm*+YQDe#fZw}FQqNP<0eYlj4SsW%V>|sufW|#kO4(Gi_ zYFjTyzt%I^?fX9B{uQwo9K{4bLV+G7r3Ht7~BbnqJjkAe*Q zpbo-ARH$FAIR^*sr|k^aqt&)J>-X;yzi|0-*puY01$&~MsYV19Fhp-ZKR<*!Tog@u zEFev}1zAA*HwsCZFAmjn4?y8Cpzc#(Aje@#2W&Gg#|;u_t44X2Gx%R!0=tQQ`V$G* z2h>8aWM_`M_XM0gbV6$=Jd=_hua6e7S690&VsB!&{_d#0sFTY{jjrUkHRwwG7KDRx zj*gu;Zrio98(WzSZN5}gFn%QUh{IfN>cU*LP;YR9|31EtjG>!Ix0g4v8V5!jj9n6S zPY+&2u@Dxjb~MLz&W{8TyPhqR2&movq`7P0%C=wc8WfDobbJ%_)aoA2x}7`Qr>m#q z3T~WzzEl4(;_z~7R=UjIjT8&ZPZ%_lZ!7THc{ini^M;nvrHOq6huKK`*Z6bMGD<=> z)k3q{s`F})Ta|iT3XplQ@k@H1{!OrG1!|`DhW=laif2?5G0Lg;yHB43^nqHzfE*pN z_(fhvoyHY*g>?1LQ*BDG@q9?%mfu5=0(SbCg5ny$Lc2wfkw^2{M*`Bl=j@iy%og$L z{f7@TYis_6h4w}3px5(Zv}X7>z1ZgJcP+yIY>X(r!R?$p)ZCLoVSw=1&wANN%E}qx zUDL5rs=FTF*eK;4kP?W|g04fFY_nu?libnrX{P8hh8OFZ{JUz&D)sgyX5D-=FIye` znAps(-@|31^U=~3Xm@vty7&)CYQvbqq*3nM;9OZGIJ@@ZWnW#r<;gS|pp>E;}=KwztVCD14TdmLBC$ zArtxS(vxaaSb8>-*MgHpsPd`uInw5#9_RQbg^`w_42C;@mNVaey@=3*T-bGv+Ej_Y z9&%R-O^^BtA1^cY{GT=FIOivHt4cWy!!0*NWpj)37h{SiAcz6)UE`raT#v)+kqB6= z>(lMGa8tI?)t@1%Y*Vjvv9b~n5P<$*XJK@9xi|f801-!gil!|I%Qec=y|vmOS(8y$ zYaAavIy#?wIe}OW5Z0`3#}l1C_oP9R+Fx^S!Z356!3n3kN!yz&4}B_Oul#=#4@j!( zXn!eBy;?ejfrlp&m})+LJ{$ACgtxqPb(VcF!_lNaQ+vWf4iAW=+Al&(+UDjlhKR*8 zpo37qeA%iY*{b#sM1_%4<1UBPam$CPXMC4uy9Gu)?;t(022Bz?7c_e#?^Mu-^prbV zvJu^aMR7tPdWy`|;0XWnX9nIXlHIfkV0&Lz>HlUfjUnOa)#k&|mgS9fmRFw+R4?Hy z8HVTS(xpDCFLk;UuqS=_hnj|(ogibzmo9C51D%HDV9K9!E+uAe)k!X?D_0f<}o)PY`g z=S^srmlx9>XZ6NV75`+GGJxNY==mz1ookXjMF9#51tdBkYMiZ0ksPZ)1UEE1iH?cp z^0=tr9y(5Y*U6gS=!G?OF^i2&BG8`AU;griQ%xEW152`xQI~qMh`s!(=h}z!*-VDM zkU6X+i|w!c$&^icJ-8)1m_7NBjsE5(`6WiOQt$o0NQ185z1h@*b3X2F9XLk&t9?X3 z;OS7a+$0l-hIDLu`Xd=G3CSxE5kS^vU&IWg&NEfjNHk>4{C9%5En*~S^{gtQ=GqNB z0uiw-&c|3>m1uS?sxEwy4C0i1)&2BS6!!vm>d}(#OD(a}-n;zz!7up1DI?XCQlU|E z?m>9FnyrS;!Iv@bva);ok|}6=HCgi3y%|SUn<&C}H*D$IzI&;q{>q!&!zC0o|`>~G{&D^FCv%UN8V zVFhxMzbFKJ^NG;W!DZ{rYteTqD&xLrb=EO}rfl?#82>elyytc}b9{$QDx8`hpAtvL z>$qN5c74|wh(F`wr!^XRI-)2Rz~%rbG+ge;gaYhh>%L z#uzw`#(>%|nXvEv!lq{CHfDvBn$Xp1Lb+Zx5}qWT&cn_l2Qz=u#-01}_?~7$Q(rQ^ zo>i<+2Tz)PwwxVnqh1h8k;?;dxS0QaFSzId$3+%$fAjsj>i`s}lshm0wW>GU6ab0_ zS*XuL8QL^-)bz)EPe+|g zugPv*cw8>r$0d^Ju6<52QH=w>of=u|#dO6CM1qIlLZ@bpCOP?^ z3^f8zfl6l<<5mS8$EBK>PcI<7nHWorhe!f-AyNl>?VW8@4Cje7jU};Y?}#A3s3|u-I(M@ z3+8ObCJ)hMg(`QXUkO$ei(ILRV0F$iH|r3ZqR?dIFuV}R&YWyV4#uI1bN8*Rc~V>91gTVr5idx#o^|bG%)Vo#=~T^ZusqA$I#Hr%m=OcZREP(&Dq`-9z-hBxtnQa&}`Qu(GleaoG|~mA~h)=LT9~kXLhr@wm!a zFe0<%`;fklT2|E-1R2QHX7RbX?6WAnzmA2&S-hLA{1ttFWm{!x@m1^*DZF>+_8GQa zG#*RSu4<-{@CcFcNu+LQY=%Uk5nzI$N<$`wv&g7XKnQHtn*tt@Q86+G#yM>MP&(XL z@Z84Xb@&z;Dz+ZU-myMXVB!PnWqHBKm!pQGuNnIc^@Vb#lUyq*)74V3ckiL^Oz>{r zCMIZu@hLkMf!9hYQKG6R#d2c#sSPc=4#*tisVO5t$+kEY1c*$c)T-jzaCwl;{2KC) z69u4q*Vd#9Uc`SZ#@X1w&&?~Mq~s`4Sj6=>WIS+ifZ+1z(PQqDQOhG}H%ZPb7+I?( zb>>rFl zLR$W5Ba_JdQ*6%!d)hpnBSsG2x~uZrbaOF7qd1e?H2k~f!=DcIP)KW^?!q<(!^ zh{{#L5kJ)YyS;uOHi|j#d!L~K5gRJ)#*h13xrJRBreF2-t{7_EEul9mP_-H$#*n_1D>v(NV)2XZIAv(x1aqU-FH|ZoU&s zTG(xrJkr}R1gWCc=7n1YkTI+KdpK4Q^HcK;Sb(q@v0skVkmKUL-X>bn#Ez z%UPOl^gBOg*6P;c8{>y7*KVad_C2%$n148}Zr_U_%eUDlQY|#9eODTHZ-tAtFml}G zV0*Z(%i6}~5me7vvaI$y7*J{qk4a}K_T{T7h32SA0rBT&cgvQ=@}T)u6t^Gt+j7`) zSKOTU!SlATw20|9JL;C{LYtrWN02Cd4*aY=uxGsBKTRbBdqx)ZV-Sh{rI+`RvKkEf z6xlcFH~JHgiqa9u0l~}-$`;?&5Zay$@k89W=3@|qp0pFfo%BLVDkZ{Fo`~J-wa%=I zlhf4VVlxo_DnJ=jUtaK5pC1rE;L+18OOlG>K$7@i-SvHF```S5+&4*LvZZvRyxP_v zuc?S_uBx55#;~yJxAp1K6sNCU1B@i4Uh4+@hHOs{><2AfUHK(l zydsZRX7n%;sHUwi|Fvm5!2%cbh>U&y1d?N104;Q~^fkYjhDY_U4&1eyde~NVLEGhk zcpE%%Rbwt|u(|iA51ICKv31UrVsfDaSs18zbQS!EMu+)B**}qm08s>#g~P#t^4yok zsIb0N87K`NK<82?vb_y@nM1QWfVn4RGrA4xbT*5z`z|gnNbcm>vuB|WP;esMnV>_W zNO|$`f5Lg#iQk*{9VjZG`(M1o<+PZ0{LZ8aw@8d`oq?2}}X zrY;(k=-SKavhs*QH23KFc+mb!IURv;#rSP<$~end36Z)pVGZ13A}T+p7D6qFiHV~g zU32#vnS?SEdJE+E12uA*n@7vLn}5F)%GUnCe|m72=$@D;6O+YQMN2EY>9JE&f$J&$ zT;$Te^G}6rg{ji-_%5e)IsNM2kSKK#<3w-_tXao?f)@x^YX-rfF5Lj%zZWXG#5dnP+-_3taUHq^+?oNJZNvss%M_ERX1_!HRlTWO@9=M9$a_|m?1?X}k}NwuxsjOBOF9QyettJTMkpw!`Xh-?HpNZ(+i4K?+vJZS5xEM zXF#hCfNmVfjcupvt~npHtBUb)afJiiD4!%2z!I7APx?SEQuZZDygla)0#2Yp@DHZ; z1D@2(p>H=Y4DOHqz;t08n=*TG#jvv}o}EzExFFiE4=8G?7je&Y0#d1kl$UjY_%L3rVOpFs>f&6%r{{LsSXYri~Bz4 z1f4%o+nLu93nFX-ET>~rGDz&A-``YSY5q-jgtax*>xLqb zu`in5H81;lI$y$m&Exr>)qUfCv;eoc1cNee?!^l+Ss%CJTTGM)P1RjnFq=yJhFpqI zI`ZiXX$lk-pFZ54$@}V2|DTr(*KUfgt5pkQ)WH*)4wT)#G^Dr&N@;-8m%lB-PsIuY zK~$(JAou<#RW76dzoHuWqcfVqf04a-l41+-&~FB!%)j9*%HZ_O)B}++n`sh(H~9jM z_Pl#ce3f+gCL~xf0+SrTP&D`S+`+)W0J~ujp6qRm6Judvp#af6ROQAC#2Fw=z}X`G z9;_W47`sX2ZqZzeY@T&+IyihX7MQxaSdTS+7&I>wt!W!x#EL#M{?mY_J$o-pvzinF z4)lbC7?%5c6hpmPsllXz3`hUIRLK`eHd!Puz25|6`qFTx0k1^u`v`SPJ(*%6No-wJ zrATF!T_IbsL@PbPUU{v8kUFnByE_=D@HUl7$IB4Jm#Ows=HG!y5tuaBFLeI@ohXoXkxj z-hnXU6zDj311&TEia?NWcP1SR9?(Xg> zZ1Uvqj|_j4h`{;)AN`6l7|qjGTVMY(K%fKWp^C#r^Wv7qA%BHp`ppLq9?0TdgSQ0O z7svqpNw|B@2$hGpjo4SNbj(^GZx`lpS1;rqpJUtr35VtqD|*1hl`vIO^-&jWVgi>B z8<)2ii8j=Sts^^_7;2KkbD!0BYHKFQrKb%PGBa5?r#@@8oUGyp-I^}2;?TWJAR{Al zusJDHYBOH~NH7pq2Cd&|V4e+%C;9UaNJ12x4%8nlg0|Zk5I5w{B#8$ZKnFf|5itx= zf;30J;nSUkOwB@72L1T|COYqr!kjWurvP4`+1%7za(|hZa8|0#@&RKTWyuUIOnd1w zqbo4^6`wsr0W?b?Mf6#}+eTDRJteHi-cC826poPgauajQ{uwZ<7vK7Ud zrRXP}+V-oX?$K${8D5baOfiDqBOuqVg9cq)odhI7l3|S2&~*+90V_vGLlC52!$yn3 z(@j{EjU>WB1o}$-++AJUEcM`J934{-dvX19%7termviY(^UlLg2HqE8$IrLT)O{?l zG^SSq<@Q7TBWV2PT4Bp`G9@n2 z1H?28@Fui(FAU}BQaIc?KR*W=*br0_&o(9>gUnM{77tu8V%*=7qPtNX(fxA-ps22{ z*S&>eIsEHde7Dfl-N{Pne5Hkid~1}4-1GphGPH4{8H(_hxW%(szabY1yJ;M8gNVva&+y;0w zkxJ+NR(mr^`=xF#XbtQo#{4$|UneJ(!2gAj%g*Z6H!CB184)JS&6CvS_k4ZrljLix zXp5g8~W^3FLi^=r>dG$kXd!Kw$@hYVm{ zh)Z}QXg{tzb8z5*JbZ1UG722tK&JK^2ubKgT?EApkn7>ugq#*6&=84Iu~ml!cZb8q zn97PWGcz*?40c9Lb5N(CDFXz;Z6xLJ$=i+~-2yGZ*Wlpi(BsobB}U})j_K3alP=J7vkEYkS%3t>&M$}hug zX-gFIqGk1*zU{$_N_D<_0Ya9sTm1{mN7I@y7BMvUpBk1o=7Ee@%h2#6po5ptEcBW- z1bH!KN`biqjN0U6m?yb?TkEGhZHPjKVgR&R$TgR_pYuSq^<$nBwJ+8mj+F?a+oTY# z%BxRZ4$R~ce)kxFjKzpI?21MD-+<{2tnSwSehS1@eB8~-R>`)Rux-uokZ}Qzsz9>r zV%+i+MOk-7u02u0|FaV!L*+Ew1XYDfVprSu{z=H6|*2=|I|Ol724k z^p=-WQ@?WrcaE_9eAnf327|-G#KpwWL2RK1cM}PZ-6#0CE3aV)3W!sj)(Z#0dKm-r zKz1zOFi1rZuITxufOq;7a76~zYQ@8mM?scPaUK_ho?~O`_ZZ6V%2-)Fh1R$yu$-Bh z^dNLRp7O?_uW&)`r{kTS9p|**>Y#4`0^B7ed}(F1^?bPcp8^G5Vd4z`7Gcf66A#n^ z=f^G9d21K*jy!woUO{ohbB0zfMV7VnQ}1Ty=x(f?Z4->}3x-8@#L^aZ!9%k+Zm6%V zuivQv620L(DfW0 z6N4JgZA*!CMu4LTaRJ<}TD-1eyc%=|9il%WD>}3|Lw?%{T}en~Vpdrg^5$_WfLuEY z6oY~V=K!Vt{Z(+1;I9FvAi1%rVj+0!fJeg%6tK~ z@GkV^!}(eQyN|E4GY|i8CXRm;f>?Dlk`RIVk{73*Gi#-yEW9 zZ7xv!xoQ3!`l2-H-ly^5iC>KAo822o*bAN4>WE}lr0_tkig-(q$dNbGZqrm4PxfzAHPo3rzCYjELFh>MDngrp4W{$|ioWHJg0#+8(C zK`IGdpY+zz;7|4i8c=fK+}rrQg>{|3Tj{~MPsGkjSMIdICPW^j(jOF4`5f`#4P|7- zn8clr|E_SWSa6t{b5eDw73Bw2`{y04jIq2Ox&cvPO3IBRr%G33U1omw&1>fr8NNP^ zEXYt=k3C%n!6j0A1=py-{G{vdEl`9d0fGeARd7z#i7yydM6Lk74gH9O(3W`hArn&& zq*NdzA%$%GqeRLK6#9u!UjXSdP;f;pRXXNVP*5t|QJ|g!J+rr5x*nv)u)FQNQ$0;nSq|_jl%(n@7|%!R_jrn+ zBo|rM34DOaW%bOGy0}s@Xe6@Zle9KbfaN0>Ae=6e@Oon6oB+i~LbTBN!4kjuW9;mS zd4i`PI#bn~Ipl!C4UiWly_l>l^E^rVi>-PbxQ%e}@EVr;QdPjRMH6CWX~TbOdE`#x zlQ75;6jqnx(Y8N(r4Mgz$eQPK@yVpi4g&nDz(yux=%ks{#UK6(R&b~Rj?LGM6&hXx zN%>pByliWG8&wOwKiB|%fK~yd=2L$3)2HiwPH^L;rBQ#tAqxcA40yjVvS$O+isbKM zoPqGOXP)V2;LQUM_yNSH%8pkpmj4*)e4R)4j*X5==PGkbP?(6gX5MYge1A`$Nbdm(M|dTF=c`*ex)T;_NNXwA7jxN|lE2Y~^}hHKxG=cI!YMrc`pWE0VFj9@HOvY8 zR;1?X6f?Dce<25<@jb-bvE}pRCvW3LQ^`dKa@2St#I)VgN7GnQWa9#p6pe^GV}?F! zgat`wB)Bw;DrEGBzpZv{4VY_NJJDx8Epiunwr;?;{Yo!A4~u-GS|Scbad>EY&3;po zOMwGf)Lon%<3IQS*^;=jg#{zrt`w}SccH`+e*RqQ^bfQP0!XE_L_|eJ1=dd6@z3#R z$Yz5zldSCR44f7<@E=HgosrLG_BfYdq|lt-KI!reJ}NTMV*Q1Hv%8K?sj3zw|yY^9cyV z3SJy}U&FeG$1B!a=+_X#uWWW@NUIy>9CUOt#qc-e&kEE{^n^SgRmyj4lZz#;4iBu< zpj@`ODaFetKK>!b_ksNN?I{{bOC(SG>0?g?Vt1(6k_=+EE@UzE8WrJZXRh&DGLaBn z!z-aLv<8RQKs|j#F)FH4W0E%*MZq5o;27{;`ny-x~&Mr2MDgSzl{XO0Ey?dLC!Ec>NSZHpUmo&(5KAKG=O5o|{r#Kz&kyi^`je#=h9htk{~9J4 z+(Uu(GM^QTuyG6d=+T$+YinkHGeqD@0F^LoMFLN#yL(HlbtBn~<<{;qkOoZ+=aGfn zZ=Pf~8pm>JewW2!DmV~JJjhi0_N5L4UHN50qyYPNqFGZAnGA^cJ>L;lLuUYxl95nA zf(sp#HJRyI(y6+zo;Yqc^+r!^ShvwJQ*aFh9q}Z5R4Ros#8Z28{t!#7(>)fzcX1S7 zQzyyv70T%kCX9QRAIHlPxS*z>%<3)+fbbxT1v1||+!9N8>fzz+jT#`WwzhxF!|<8& zo3*W7Q~hvZgLQVYMsr)^Epc9#{Ibjn5}RH2qY~%u%lu;*?a^j-==`yb4WI~mSz`S_ zX0a|W%)tXC+i}LxZ1rdOgX1#!IuAV01A_4#-(e*-sx3}W)Hp% zl0J0*gnC^mtha_Oi>b+{md384NC%0_i^FQro|}!8e07{aeB_*P@`j2@xnBl`H2`8I zBhxiAEAyhJukS9DF8Mr_8bM-7G78*VU8bj;s7MO~7|<)Ry?Z?65@@Gs$wfrJ zp&=38?N4Q=VSs59!xi5V`>=8pLNnaD$p#&fIMii3j@qNv95*%FJ2H$>xSY6{7iwIb z`u7*xyFMOoMEKmlbxkJKouKNrkFd~qY{#dgWqXuwfp2w;cRADgl4w+3E%vVnyhP&d^&D0d*tlI;XdAA8ZzC%+y^$3_rA?;!=(RDu5d z40x~^6fL*n;upv-HR#wJeAKVFW{!D4Jz}TFG#ggY+(U zBZYj%$x03a$5dWP3%4CpyiGBN+WHnBVwH%B?e^qqaDTOo;t9WQ-Lz#~S|R2$Hi>`6)SL>m1zPWx^b}G{u95)^F=^)0@+i?dbc+ zZ(H^2sB7@ssl42o+eDPG-I{T>Ve%b;%}a24nU2V1bmd#Pq`x@BbG&2-$aw-}2>3hr zK{JUwWZ)M^vp9Ws_j-DIkn@BZ8bsFCv8rwZ&UP2*ibViOs6!+`l8F47$3)vKFG>s|x`p{J*J`}XbER+onDTHpqU zg0%TEjB^948@yln`H(Z5}h!c*d|Iv>1lC^k%>11rKEY{2gRc0vGKL zc&hdlq#r*o1j`tB&hIBv=WqujWAI9D~EUUb28@&-@R1galRIrc~6cCg&QAs5w z1XMaixc=gXAw&4AGtV)ti#kA7&g zd#MweaXst?b+h3@>p-?%ZZJRb8`T4?k471{G51J@=qJaXhw_8xT~em~@v#Ro$zM%6 z#J(i|6_WQt=B?#v_wUn2#RXh>SIE&Hu}&3bSZxO9E>zOWB}~U>H>xQo`%!PQIyus` zc7HN-Z0oJI82$aLacxs)8U$Z|F8%z|KXhS|yx(SF;6XT_o$CXxTU6!&*$-e#ZyN+^B&r{GPqV~*7~*vr}Sr@j)G|3e-oLZ zRm#Ty;o7yVmLxw4FQ$k6zd1Rckw}j1@(y(6!1+wKIA2BO zxU8SgvC40Ev4gH%z%?**uZ;O%7b7qRUXKG8sz+m94(+*jV>zrPsIqfyp_GPV+$n3T zsPwI?OLn~)fnKuLF{Q z97i?It(i^oDpCy!x)Dm`mEiv0&_%N!vT>gI@t+UpGA)M`k?&$`HP$Kxc?)6`4+z<8 zXPMvG2S<|;MhHVZVT#m;3lo{sl@i2y)+*dtSyiPEr-y_OM~AnPdYbv#j9u1z=T;OF z5=cFK?%`38lj9I>i%1=T0O_6o;8M$_uglI4pE+3Iv&K{rcUekq;mJ?aX~P?y6*9Ht zzEk(Ok{tNq9IGDuG1X6mHsCvz&K6~6UKz)j9z9s_k&U;poxHZfh4x9iHd;CSsp7|W z>qm&}g;Z9z>_dkujYvECK;Uo%Us3~|7WF@X;{a^$xH!#fl^5E?86bRVFzM!d%Y8TR z2uhq4uz&00kfye{^MV(nA7M$nAk>NPgv7XyXoN3wp7whC_5!*&=&02;TvE#7r8}E@ zPf+^)Sg%Fk+27yRf4g!*LnX60Ao`Oqi*5;DZ{Jfku z@8;_-j076a9W%bTpK^P@@v1GccETTZjF)B3-)XXwTQ{#xJzDR2IwNIgYwu>%3d$8o?##_&6YHEJW!f57~$T!KGyZm?A;rx~>Uo?lmiRSo@*i>TPiz0E%A38cUmWobLG)Mn5H(Jx*vDTE~Gia&uTevg8Y(VKR zQrO!`mQy*67p1*Ns+hlxB>G{HV_J!(c+2Z&EUA@8G@^?g5 z?->XMbl-ec>LJ6dKVc!;ITZ4?%l^!)m15^;M5p`(5kFh)rl}eAfj=<0_TuFb7GP1w zSXh1;u~t95;o)&*VyeHqURYN+qc6H3@fG9Gn%Ur60n8I zxwv>}<*BESrRTWq8qlxfI}WB#G^Fz1P)+5iPA%6-T*W&dw`)_g?XylCWl+b|ciN0v0qA&GUVe+FUOrOxF;MLb#u815^_8Cz|GtXX zcyZcMsXFU(-d9)D;mCaiIsAhC5*X(7NB@rH@pIQv9f#8|JQSoMHPXAWG;( zWAd<&mruacJ*uMZc`_wsn`G24u!sweeV*Zu}Gj)KdK%x%@?$FH#$%-IaziDPCa-+4>M5`_h| zy}$qJ+Um*+qfkQQ^(tH`PMzjV`lM2nx#My5`A6MyN@o^p7?&41;)d7sb_t8n>l>Kc zi;cHP@U8@O*1nB@Tuj@av|)4H_m>|o%y8BSrCAN{M0I;+i8eqoMQgber@(l_i?vRk z)ypWk(o5f+OF3?AlROf@{Kk9B~ns zG;+6Ge9;EK(fI86()Wexn>wYb=^nGmnw*W&%p#%GaPfoLV7s)b`XfHflDx9YNe1Sr# z(y}=^m`*&*w(e|5t}p)74Z4;Rp5J`GvZkN(lpu8aUWo?=7pbwb%5S_#dD;&Tf6YP*7eORD=CY$%NS8D!G+TbFElZ|cU2C2Q*O zBO^afZ*$IaZyAf>l5f+zE{HDc#>(FNMm5s2ob!HdZEAA&x){^O_u8iDJQCS+bHCCi z=4!SnEm;K_#!k7nB|DQyTXut?lfCC1gf|pKGy2VDxGAe4%}>Z!L?AP;Mv%V(<&@kl zYDVYTK+byW1p}2W>LfytJfH z$GRk7i9g~Zzv9Tex128*X~R+r!x_UYB}3D>g0g%cY3lb=c_u?rj?Ni|TV+mfPFxgj zmihZ_?e%eR5>H>76U=IQLPba9hBth-c=iKIofRc$4be6tt-7>mWO$gEEui5r@i=k}VmF`9fp$fMbEpG0 zLF0XU#`5o`l(-_Sc^nP(cprZ0-^}Hm@5yD%m9X6Gqh!O+_mEE6&@GzWRLwBDY$o*7 zbFG=bK@ly zlVnMioH(u%p*g|Hj|ZD12d?G!?0&`3kiWR{;>H&*i}$y);wCd+uFbsCP+l!4ubz%6 zs@czKaj*WN`iTnTygFiOax-7`wIPkAsB)LGNR`1{K} zN&|&R9mSM8om5Ypm$x|`4a#lG3&{PZb$pKAs8KgrcWU%44IOm2NJo(!kJo$_puR|DxoaT2zw%nk>`DPG|wiVyzD%f9_he0u*CMY#0I`a@u z?*R|h+c$5{Ba(TasMGhFu@2COj~+epn%VQGkofZpDfrA}A7|GK@?Imh){pT0l=mpy zzk1oeSpQ^;`eNp<^m$>P-#VYoPR(jXUTbGmkN)wwiKpH++-S_ahDD?HK*^n{!VgEv z|4`WutCu+bKS2J1mPu~2U+M3kxO)o6QUq!@7N;CjuH!!0Kb@+`?xcVBR-n(cL}BW@ zy=I?E)*Lervp?R1gPL&BH?MnxrBt#e;c^9{+rxgOGq1gue6DFQ|5u>V0Wm9e&Kk$% zElEHdb_AKZ3#>kfa8njvQV-K@>$c0M{<1WG*W|8hg!<>YUDm=oxc2f3eYh1fx+XiR zpmy*>k%h7L_5b-wL?k|a{%j7L8j&ZO>DW<2hp-O7Z?LEN%^pn7hi-)!Lr|53bO$24 zg@~U!pq9K6{kKNutIu(+xq4c+n?4b+fCtht>yhk^Oe_;b#jB*?Z+I z@{(fi3uNxRc&e$VL4bC+MFZzC$T zU)dgCSy_3$iOAarA(s*I^Y22400@p|gBI@pcs%xP*}7W#-OT|PGT)oIhV7q=KfB0A zS`u~o7#xr_9mdeKv4O|TmV4mrM)m&wqVCvxR7;Zrq^c#m?Vj(-QxyvLy*!x^u(fK* z=A@aeS>y^E?a2%>^V2R~hp#6nRlOh5Y_yvgl)Lvm^Ki^{`win`PJzd?`}=2av#Dh6 zpt-_iSA3irXsylFU&9&`Ebjz|y}306XZ+se)v|i`WJp?HwyACl`u9UWvPpes+^XQ` z>ry-D-9IB%HDon87kWx+K~GZd$8o0zt(upjxE#r8NmmlCkxw7BUK&s{SckC!Lk2@2obHO z8pC_0;HjA{)0d6bHBZOukcJn7>FiwJIAnI3=#TuUG#ia(im9NFzrlK)EvN0)?bJHJ`DNUd5OQ< z?QP(bnFxxAVNkk!f+E1*zZkZ; z!jh8Qswz*!Z^MNAITOU!56DQMD>?i}k>$JPd6JN^bDw=|kXzi>_2ZyuKqlw6k&Tvm z_AtX{OoF8*P}sgG$%Md?|Jgygpl%Y|HC=Xn4n5WE&3;-(y=#6zPk=njWJlRg zrLCdEwff{0u^sGwQ14ebuc933a{NK^o?Fgm6|NX1_}l;lD3o%8bzeo<I&htg~zVZadtR;(>_ec%USZ21+_P zWkotcOXx6Dql5!-7t$P{hLgd)^Eg)}@JljPNN<;i_dw%89w$womF;ClLU(483OpD$ zX#&jvDUHe2KVhgV-hJf;l$Y;>?fK9UbESyabYAE8V1@p-eKWOjCGo~hd>g6uWk24!w?5VMXt~G| z2Z$oYnKwZ}G5}bhdVM^I?{y6x!{}H4r7-8}@T}-gYPt+KK&FY9>(Kfs5KXExE-6VP zkMS{;*QtC~(meU2_iX6NeIIXFTGK0x<>XShRM;3gYiazC91(X z>qF-Ik9|ERa`zMD+DD>~FiFTH_m;K(EO)kS8D1m@1!aV5XZO`bbbh%I%CkqMaUme6 zg#26TYTYRvZSH&9#G4Oz;bbbT+uJ%sq(J?Q3h{A?l?`(nsL#*e2$9AcpCp;7u(YVt zO`qy;k+~27MmFmes(1dj28~J&xdgA??`-+n&oYe2S}ul`umMX2X#}58VAJmF$<#lS zD%`TyJ-e`u`r-~A8)^YcI$yc*NXDyGHv$T!%eu$=nn>e+1Jv86<&ToyyN_O&yL?nn zBsnUt3tAnbJA*ec5AurhWkuA*m6#nN+bzU1XKV%j5B&-=NCG zc?M#tJ&wHE2{FUBMUhLFFULbh5*7E~2ghS=Z{IvJGI9TYc2i-YJVSra`4bvkgNsY8 za%&9@f6_c4cuF*L7jspQM=#hx>&G7X0DqDyth*+}Vxv-S<&0rZQFq)klNFMa6BQV_V^8|h-k~3M;el)+-S5ktJ>LqE2zyPxBz~B_5(!-+O+2tXs^W)KG z)RiZ1|5&eXKXMvDLPcl`&N$4Ugd6-yq&$!rMAhGXzVh_(<5Q-cmp0Nc7_l{F+VjhW zOP-IEe~VQNmSg|~e7qw-?$^J5U4^XbS7BiXL>Zq5P}zUC)w{CyCd%X%- zT+Yr@^H2p4XLEF~QJmJ;f}8R>`YnM}w=l*C!XhX_^Wbg#_7w4s5QH0RrCVT{lu7A&Cw< z0&_}q0jq0iYx_C&$?gu2R_gz_^ncN@793fsN9&FtnQUYMgY+UqX%-gWYQ~iQnkd)w zA*~J>h}fzeJz>~zJ5KTbc*qm62gzm*5XNvB{uOx#lR*y@$1hC$jd}v|^tyYtx8B=f zop7~{0P9)rcOGQ_1EP`z3N1Bfnpn^Jy_WIm%@I}8{@O!r`W2+HYrPM1B)ARJ`uci21T@GgiWhhM zr|4jCwBj?6Zw?%YIu|DIH(1R^Yv8>T2YZ0KdyS)@fWTW{-vS7w2*wZkg!LOY?rB;@ z>Hh)VS1d@%_Tk|Y4Cx{!&_&Z*H>aA0(|%XC>DcgxZGm)ln4{fS-EK2Da1y|BKOWsz z4x)!1vG{cU3d(rNwW^*G(nTz~)g2K&+vGm1rALnZ2-(MLn--Qo5!|h7 zcxKfy$E-+jEz`&Ux&HKGbo<`8_SRC zGh=R3$me7`vGrk~futVUC$(<+<2{e?%deF8U_$9;BKQuOmdB4nqn2bq0U_T*1X6)x z4Tka*x%r5SR3T=50jJeTR}Zad9~mhno?v5A|H^Ygi~e)huWyHbh$SWUKLwG{dwgC+ z4f763s-(du@Bci40(yR~xUbQZ-Oa`k_n02+dMkHUfVp{41QpeMf2hY9v+lp3qtt>1 z<(pyMHWwIU-j$M8!6*<$N!&BxLlE{$AWrXBQPH~K%P;m2l2Zs^5a&ZrUxe#%-=7zP z4R8=B2b)DE;`oUXM*9RTSfGV12@v#!bKO7btn;c6Y$rW?{sFlO;PO#sB8BTm&G0yP z+2`Vph6B1uTaprjn_I(;SUqqioYMP^fp*;QkcNSCA5u?>cO(2%?PFsyDk}6C@NnkL zZlQh7_Jw+esA}K6au+*_aLM_lC0|x2IDiCCXXm7RQTT^uzA(L7zlr-5E+Qh_IKK3* zhp?EQ9t)~Ih{WKnnml(WgYo}E=bKhw?GGzo{L&&MB%aqikH3K+6b|z(D-Ug(zTW=1 z4~PC6O8^f9ja`TNcFlMG6SsVrjnY2py@POykG*2+LfqtR+B((!3IhjMh&W6@&bSY{ zdV9q%@`RYqCsoY8w6L&a*RHz;jP&&4i22>Rhoe2fy7e$K^HZ!7W#Lg^lyu;W5F0^^ zLxJ`bUrIp?w4-;=AJh&*QOY-DeTfhm=N`QnDc{-CGjpBUnURS&AU18<>%V-W6z9eB zmoJZ-hKSQ5COUyaM)c-ONH7VHFyw0pEsv?`f_{c@hHZ!79%s=_C_ORH zfwVl^WI}MnigSp0NMU`ooK{@V5qa6t#SJKb?dj`saykN+ zQ09clqideAJ9u*zrR`q>RXeY3Z{=@BTy z10bMzteh`iEC0=Y`%jCxqH|-NO;3?G@^QXPQg(+k$&w4ScDo6By5DsUQVq`%Pq3f6 z_nx1NEW8mQKLq|gwFK-i5!}<;Yc@Nq%+13yF+EM(|H#^`?ZryL;KU6Os6wi#S;4^n zpKIk$SIfrrVrMo=Y|5`^Hh=GLdCa1)v3egJUoMSQXG+A$U~c+fl7;!2C&z>QrfdcR z`1c7|yDBWLB1;s;#0Z?NX%5#Mdx~g@sSGJ8{OlSDTG!r;AT<=b5Jr63r1VOoj|8K= zo7<#S2s1h#dU(*lX45&>=G>RJSA%zDg_sgK4(U80oWtmm`ZRN93<-LD?2lXa&u7Fh zLb)7enkt^_YxmRE-_aT_z8z81+eBr*_>g(~@)=Ez`nkE3qwxpMT6+&>-OHP8z;qVE zosP*sSdQqA6YXXUaD3j-vKJKf+Y~{}HzsD$-M$Uo zXR=9K8OD}E@9;y9mx%Jd54#ccKXN&52;JYmZ-Rwgdq8UR<{#TrEJxC%eB%+SFa1^IP1N&;AFn&h3A>|Ap+t+HUV#e~{(NVkGX>Iip&!`vz2xGJg| z>E?2Acc;b%)za0aeC^}&MEY|m-ZcqC4(7I{A#C_@F5Cb3_}fDqZhDh87JFvLM@k15 zb1ryi1+|uD)@R+{r`ksqu5h(x=hA`y!?UA5qIw7yH1;V`MUx*TAqza@W+DPQ{dP% zGgOC!)B^nM^1X0S4qB#omS2C(TUK6vC9bYmoj8TqPP>c!1a#kD8TLY75%wz{@S$3; z9{wZp!cBroI<8&sKefZ3jorL8ipLj{Wk{I48e^3sc&?FSb2BVWVpk?i1gumgt=u(@ z4jQ{<9v`@{(>Z$CBz^|SMEG6s(%2O7&sDL1(cDP&5p_cs<_yAliyu9^G)A-Sw(_Jb z&iT-*pQ{KX1YQxw?YvNKd42d|eS-GZHpjVx(BeQcV~YR3#ZCTyo>iA@zZ(2)J@NZo z8)6yz!`>wGX<6-tk)Ac#mTTwLlaqhVKJSa0SmIxL_F?)`yd$Eiz2H+PeTCW_QMnj4 z#F1bOGJ65*k>XcrsW$kI@N`Vkfc5VhCX_2HD-*UXOg_CiorTx*?CE1n>4LA}jDSET z*?H~444$>n(r}{Wd*0^|ujA)Bp|az@zH;K-_h_50tAF1e$I0ye?VVJ;O}Z)D;u#T3 z!`C}A-=~jmv{-m@;adB=dEJ6#k4X4S6N%#R8n=-Yz-7chU*wI2OZxpt&@RSgP|k!A zWNQ(zj}TXYYT3SLkB%a|AZ_LYl}s>C5HZ1M^Yide$Q(hy%3I-f{y*Jgvt?~Ls2gB1 zAxQ@aT!tv`gvz&wh}Zvpa;U6p zeBaG=Zt{)7nR%P`fmq%7{?U4t#2bQ*w^PTr+TEfdP5!uYtl0f)gEqHFy#L zadIY;;ob2MsEd?CYQ0{^L88xK;D}L_z_CU8pA{nsn!o%p^4cqv803WGWjUk6VLPV& z@5%M7bKXNkIbwKF_vee~j9cpJDk=9$5eK5<)4q~1J#7JDjFCV&B3teMYEwau(Ptj~=yRtf8+KdvHRjQ|?CO$V9ZdyP>mj=TDO@V-(93f-(9{KN7e;96WF! zLMblrvN8TCeVcB*gt)k8?(R%1EYI*`B;qV0QWjn;(h@sU5c+pWw&d>kyq`fB2sxgXTj`g0eQ9G68T$y$YPj}%}eW)2o{XU)}rFFu8=lpPmt=6w+c z?&NPu?ONs%;JRyNlkj6%?Wf1tHwEx{^lL2cv9r`uu$xSQ#25{@m+DK*hCKvZ8q8RJ zB_)zX`;F$fIEL^LcHVABDt264-15%^XMq5vu+6uCAXz#)_v~5mF>M*VELR&8*&6 z5o%)W9Km=zpV0CKZ9>ECHEf4>1QE>-U>j|tp`KIIPUn=_LA%%~ncqb&QNqpR=)n zg}Ee0k2mBxi<=SfO>55?hX0;4#3#FS} z!WLQ5&(O=^5$C`G0t+fLE9>Uq-&D7tcR^YJZjC$)waxyfWpM-Jdc|S1K-UPEh!V14 zkC>Pk{U>o4jhd1$@{Eu_JQL@_k%v44q7EkFRir<&S06`!D%OP=>MP`_Z@9$D_5%S? zfK6pCKRx`YMr7f%n|G6QWdBJwT9D96S?^?m=EBEUWun*xK3sr-xDe*87tzEF3g_jC zD}IXs+}dvXXEW?~K)4Lz;X;szqYdF1j3o!*h8n;^LZ^ENRr=$>@&LhtkjpP_W;vV% zO3WcIic_9=rtsV_UP}G5&OMIVC(+}@xDQ%Qrf6R4f7l(jGfDh`F`W*nprF8fVN#of z1EwGIGojbP;A;{NT7Z?Ol4z)@nXuzv!66Zp4gMOp5mEdP^#42v)+Rxs2*73=U><%Z zU#L+~QajwX9L9n;aP5z^!~?1^7A!Jp1UTUY?nB{voQtbm-Wm6u$m;xEl<{ZaVJ5nD z7)9f~7W#ZG_e@-d{*yir)5-%m3VV%zb7aI9?bH>zX@EPcy9c2L)(m?U(nRks^Y9${E5U^G5SyEeo8fbNGdX#V zy|R*}W((SS+F?CrH6g0uI(2(W)dt_!#pLajy#AXbCil|rxFc-ui;f)PbV=C8Ll%AJ zd6bGOLVwngtUkq7mKX?)?)!znK3g}Ng&tpkG1p_N#=;SCLzL_IWEg-#KHtbiUcP`% z&N?Z7b^!xRuMR!0_*bQ=rG)bLZF3uHgB2qq zKSw%}<{<(8X41xhI66c;WT%>T&Ci3E%e2BLC$!;AB51dy4fN5}1Ig zATO&$Zw0Z%FlmmrShoTCI1k|nD+dSfU-SjJxnJS2ZlnQ;y6>>WIii!Gi9iRXzK{@t z5lOK5V|vZuLx)H#Yi0=Y*vsI&Y+6)QbTi$86^RQqPWran|JHUbzAzx-oGEG67VgK@=pbr+e!lV%D5^@!Fm1gT5*5BR2M!fUWxfo{q}{_eV^~8@sN|lMHP`$7o57s@g+Pi1E#`|WaI zH)UVGh;e;WX|1H5L;WqC`+)nj20pqI)Bb{u-b?CL^CYc4^_<2Kky@Ypwz9dBbU?G#wa22bFMdNZJB6niQ_or)2H=lL`9Z{d0ZQ^ zkYs?_;kkO`KUd79(h^<+dX|Y6?%&=w!r8;3t53=p&kR&O1=Yo=mg=Gyl~+|&)h=j` z6A7kEuk(JOh)fZERTe1x0vRG0tgtbvI=7%;e>agigLVP=udcX_%xwyl!Te zkkbZWQeNj6mJaJN(jUcn@V(Uu72ISxI^hn=02K%~3EB)2gy@hl;dt%gq9aLTGgMq* zWIB5E;=*l0lZuMLX=#KX*diSrozTj3)$*A0+Ql&0j zKaQ|yLzjJomGwC?_~qo~F9Du`x|#%^01wELV26cr=0KZ=_;F#S&%t?BhF*&*i+AIO z4WMDN(Y>FwpXC791A9N}&6UqDJr$#Rz;Z;J`PK2nUujsJHk;&9-w>STdfIavM|Pyb zQE9RKsl0Kfe#@S^^1X*8VBr3?Wi;1wM;yg#Kbt^>0NqM6%`36G>PL^RQqh}J-(It) z>8o|OtuopjYbA!-VGq0XE-2u(ckGFTEIhq~@C{VtYEw2=HV7tte*S$xrz_Ici>Bki* zozBpU6gGFK#C`5Hx%da{@|}v?(Hypy{paen6+7qkk*+^J?`&^*6+>RlZYZ&^usgVU z%=nY#E}8U_eG-D@KDh^;hh9<@FX)YF;5OgPH&aY`U07E}vncz`%h6j5HvZ`bhkbhY zeCJ>7p)WttVZ`l`#UPfiD;UqqMQ#n*wjx|&8GW3b__gEMSn(jl)vK@`OKx>{yZ=4jizVr zR_`;`N(NjJ<}vrg&Kq)md8gJ{^hjL00M~kt&XH4c)Rp#Sf#(Fv6^G9(Z%x}!r{CHC zLuSlDXL8A~XkNZ#ytfP zu*Rkpogtyc0Q*cqd?AX^TvSvBnaxw+b8SF%+ajUMP(hk#FgnMM*y;Sf6{Q+l@f$a) ziZ?Nd3pJIZHnM>Xyb-T7DDLAzl$Z>fh-(C=_!U4EfK>0E7gLjyzXvRcDBb0GQyk1h z%nMGLxzqy7sz^CzkZ{yK5{+P{QOsYTt@1;hG_YNG!d?dkzCtC3D?@xWK+u}N-JDR; z0J6RaGbK_LxH<;=`mO*zCk!TN5I%*4z47r0U++wTJ{jh45{fTk_Y-+8uspgt`FMNB z<8vuS-#qW+B!Uw~VdL$4Nlq(v#E_ewD8AxYbO>I!KO2dpYU4ikAUVY^Jp*GJy2~CN zLcBd5T(z+y@44GobjJtBXwH3p{(#D6jZQ#xsh^Ryf2P04;=|(TcEvJ2s+)_v2bnjr zC0bf)3s|TGoNx8lon)BT+eGGQc%r(bBb3#9cCp0yVTd8WqzQQ=qeIH`SEZ^`O-05Z z7LL7j+$y5*FwBYJ#8uT@rSBuB^jkjB=^KVzQ{5G_eiOw*0jr~3RRJFs=T$33n+l(Q zYn!*IQ*`Akf^DI|so1SBv9Rr%N4IucyOtcoxk?{z)9w_xPtnf~tyCRU<6_e)thE$Z zj)|oTP}jY#Lj0w8MWy_Xwzp?jJCw8>l6M8Dm-jjfN{u>8b;-%C z9qr>_GM3`qMr|~&Q@gXW_7Z!JM#REm8bJm3|Lx8^z*dwOL=?-5` zU0G9IRTrk4pyzX^_Pr7-wUmGJeO;GUzO9q6;|J*z4&w$lXKd)JVv(M^JfN*`9&T)kIagfLxS(D0(&IZ zslKa=GNbGI;-x3i;5e<+>S#hqnyuw~Il}WS;-cu(g>e?w?=|k_BlJzWuM>>Sl*W=j zl#265eqimA`xv_tH$W^g!z$6cy=4I}TwN(4UC3)|3xYJt)8Ai}RRcagZ+k6c(Vs|f zx#5!)gcV2x(16H_R5)mPyJ#izVf<2DSQrztOkg(%voKIbgpjw{DKD7>Bvu#hKq@-! z^;}$W^eI~-!~sxY)X@K0IO_;RHkY!bscAn5Y-ImHseK2+IOIue-}++mki4nsDWb22 z6nGikxH}bl2^KVk9s;vC-fzHqk z7Jq!xd-t5zKSGE0tE42_Ypz$AUn;C_$UAstggaUHcQy_4_OqrNmRc8l9LF49+%0}0 zlr$A|=3B(A*JXJr*J3EIkZkpR#wdt_4Cp1gf{WG zhxh)-?xr~o%=8d>}+S+RDvuaB&X;)1Y#!!MibZ8f7F=H~I{hlU!x8-t$hxNAqF z)#0=DTH|K+tDofdj$yHaJv|aHKKE*QYpexj8D7V*Q4dFzDo)*sEI;dj#R0BH>ePwr3D@a;bO-R-U&D3r zz)!hU!`IlTC@ur?2soYNlaqV#_H$~Jt(JHxkBeFy`&0AcmukeS6K}H31wr5#9t19f z1S;6w3|JEedD-|L4JwpZfYy~`O_8g{?}Dh zRP^`UpUN_}nJBrUCJ>n4QJ$r{RymUExIHhMCZFAwO=rE*-ea@P3b+0P-O?2**TKRV|*C*bTa^cA)FEf*<*#rGcF|z$WpI`8J!oEjEyDdB^SX+($(vZn9;vuQl~m z&zP~_58N%Qr7H0=*$i32U!R_G+{mUsDjnNXx7JMK&Y9MnTj9Ocms7FnN&JLqyd?{H z)jaO^uN*oSnk^X$70z;$qt>qRtNz>~w1bt-aC<0-nA}{-h~Qus0J!WquMmA|jxaH%>k+b% z$o!`UR}1YKiYhT+78pvC>D!WJj1%NG>^HEk6Q_SqD&_-1^91LkhH|d}01Zu~>4%Cz zE++u6W1H95URywWu8UwQBn=F{oDZff@_pgd{pQNeOuHo6IRt{B$zMBoHVy%EJK@pm zDXvEDF~UPJ=?2Z%C)vg_B@yO;?xIMwIsNqA(IvxMYwubqm1(D_E1o*(Sj15jnJNihS=S_-yDY`-wPHTC z<=$7W0gs9OHSEtUMUDFAAM>1Ba7x)EeUK-jt$zOIySlD7$pOj-r(*CVG6nnF4hPrv z-RGwpBC~dx3reOrec8X3(L(mmD=4V5-7|=Xg$npmRLvwP9v;nX#=2n4v_#|;4+*s5 zvE#>kUrq0o%pliw!|Sx0mX@N&KC20-n9E;rY7qWZVxc4?aHD(8wr+F00z@UIJPd|G zp{Wv{|8FWD5zTQsn z1B=N+KiWB;Jc#8yvGUhQ17llXF(to=hhI^5@b(YJBYuK+j zTdlIPn!5$Q454iOv(OCa*D@N&{+BOZ8b`21O=L9G0XX3i#)$0B958j>-+&^#foLlp zST7jj{sxTX8sxq{wUT*!ZDq_Eu;uDSyrWc`i6g+ov_aJ$Pa&g##i>LHMC;--`k)8Z zK@e1uhzF7}fEe5wGW!F{64y=Zc#jOmi4UUC^npJ3~0T#%^eHOu}nTc)mIyMg~#s@>+js!s(@gxQ}J^(|hg(`)!e zne@eK>wtyL!Zn@8LKn%i6wNGbpCToGira2+dvUe2Fhu((4X>C_W!9~_d)d; z54A6EZDp`u`ZD+K@uct(ga<13EeZ<@52?+T6C@V$xuK%a3{$r8u~NL*sN9m2yg#+( zfW_%V-hwpCu8hj881K_4#wR&m00i#ELix!P=1SWqCW(%1hx z%(r>RKM~~f{(dNKGe>{4mAOc_3>`S&I23hStV)GnKOfy`ZIbHT<4r5et89Q$oZ>R> zYnYpv5jh-|oMhy-8B{Ua51(SNAC4EO1IP6D7HO9JQ}N;gyQX zN-VZ$+e)*z@L&DXa>Hm@@14KD?wQ~Z$rE&fG zxj(HU(x1lctEJoo^`4tI zZtQ?NpOoF)`~h2mh{tiY!&OxPpV5s-d9Z<%NZiAzMPgxPJq)xI@oSiNv?&{^7Dzrr za0Q+YRJwEL$mPqIp&d~;8xAi$6nQ}I5kD2S;*ShGx@ryd;1C|6Mar$I@rJMhXuKo? zG4Np5u3cE{)R0RfI=-TN;?!lpGa%VenpcEN?MDm!ROOb)#iaD9sM00)+a9iilw^E% zHov@_#sx8BAQkDr6QlF#LFOA3gU~+jFAjy>Z{O|=2@M^njoFVff8)#vwba+}K~+K- zpyH9UG7LI`$cu%{xOFJ@9~ymhoxOwvNuG_Ceugn>gYB8Kn&CI@hF9EQykiwGM20XJ z)jJ*sTbnzD+lv>h?Ceoq8c{WGeSE}lL$im)sHG(o@qJS{fkT7HM`-7#ziV6_zU}a% zRKMnf#KF9&nqW(klJXE)h@Y46J3&SFb$MlL6;Na%K@62bK}pF=Fn(=a45IZS^X-&S zGebjj9;z5b#SpXCQJ+9!qIv$^tnYuc0LU;PfzxhBiWrI-Qr!1Rx1EQCtcLeN1ri3< z_=AX}N~9oUmw*^6z$OyIXe1>+;dg-6v1q7(yO%;-G1MRJ$aDH2Z0`dikq7omDEgrX z-3JwNIKCCh0ZbE&L#{zii;?-}7?gS!Z3-cOEQ&`i6j5nl5c7b~KJG4q@O2^x0-7(i)cX%0?(OXAGBr2n(RVN~8?0ufqLF2nUs+NW)LS7cm69dO-bE0@6%M4Cj=$=m}H*=weqO=im=^uJAxZ~Fmzj7 z^N-0BazxFORbFeNSt?-{Vifl;i;~4c71ebtmpR8lYZG-kGOl3JBiMVwAVgRIK7Bd{ zh?dygHq#YXi1}xrtEl$#QDH_k9QTo*1Gyu(z{3$G0n8JWzp=Xr`#zQiD;rw~>O|!2 zKEWoVHx6aPz&>{L7jg1X=rxP~0}~9Qf>qL?R9t@mvD^gINkW@~e$pHdP1~y5H8?eu zctH4Q-c#f{EJ!T+cGo{;Rl{7)M0>aG3HgbB&Sz-Urz+g8$yS^(ZgzbaJ@~5N4sdW^-cIV16~}D{36j zgy$f~^KyXjFnaPksnuah3QGw!b9Q4RJxfsDws7}yIMR#2ZZANA`yA3d^s+=$CTeS? zLbCg6xgaqm{5 zDk36v6_-fk0FH>aj0MCyxsVq!$UGI#8U{`;Lz>;gWuKK2kRF zBoEKUxH|R%#HxOwPFSv)22}t)3$IQO#2My4cK7zK>q(SwbZ|hzIhWV+$B!Qgp_;RJ z8)QXji4Y5X?JU^BCz+X!F~m9`l%ja>JL%~3d4nJ`NAuc)_art&nEfF~Q&34x|S=7c#h*VJ_D1^H+2-AaD+{1A*o3IZcJaXsunBW?N1>CmY zre83|mYAcA>!v6kfTxhKus0(I)b;&!ag1P{3DyK26?EP@*UK@61G$R3kWTawN_P&m z)a@?VpV%3Gy!tzd%c!O0AYA7nE7Nje!}zTFcxDF$tw4ete+jP;E)my7*M|>nAZ8i( z{gD#;j3@`U{2=^AV72!h{E0tF$Q45e{wW!jJRhgg(I?TvpC%T1A+&-;Ux(g(=lbN6 z{Bf2o=*!VsP5@zMQ)RZbvDsIOAny_^7()I)f^e!G^p+GL*4mh*?em zoPmzB$^>4G*i>BE{J*AvT7}jac#|2O=5G`16TX_TI)he{Hr7bJPm!&eY)YIUT`Ane zdI5XnLcgRtygcfq8)*9kWU<1Q?}~&BVW2sUCpkG8o(ctVQM+jtVCm2*w6@zoc!dlF zZNT{0KqO*51yQnbMDfA!_Wk>JLZM9}G6+F2K^(H%6cPz975KIK_&LpF(OowFn-k~^hIL@=bIcrDP zKNOQO$p&4$UQ(mzNC(Z|>bKm0H{Y~ARq*mi*=p8HhUWiJyUQDN3W_dWJY0wIoK1w4 z<|%{SGf26sAQfVp($&(ESDA@`fADm&`P9WS0$KR<=@FPUm60xc=0W%kgn!-oW1ewA zmX)}KM1?^TYVtPCg^!ymFipgpOB1iZ2Z|1H6|5RRI8tDDI?T@g0=gqbm5|zZ$Wq=% zJ=rQ0l_G$8jBr5^_fbl#-3yS`@TEm^_^El~OaM79gSuxkQt)6WMzz+_+Z*0ZY#@yJ z!Tbg|F@L-}b(d=JW||ojq27gt7UHY@Epic@SX*>()!-P0=7$(^fg0H0_2-C)Ux#dz zZ{E}!m%(r`fuvjU(O(y&WbFL?{gqVirG`(O3J(q@Nk~Ybej&ZXmLVKcM0v*(?;g+= z&MwS@G4n8NC8aBu1L5dg1w(->U`9~o?%0)4i5>kchqO`_aGvSmv>F8a*T%3GFGZ;NJtWA7#%5Oop)A0{;U>uAz@<1ckXSPlwV zlG@tZc~H2D!7UE(E4Q@N9S6{RH4$#^owy8Jo!1oBC`_@8~{kei+M7a35%5;AGHk>kffVtSc8Q~Ix`ZjCw z%RA%2_I3c{U#29x-LfYj(DJ&AmdATt(P?*&QxN)M!VLRD!ArWIko&WbBx+%|f5HzY zve50xRz>WuzoWoIxq0Rk^L;k%=h2};Vkg$r)IZ`P!aal`PX90<*zP$*3UsoOU(UrH zApX#}*A9FDv{5lMvyp};H7$)*G`Tq%a~*1GL6zu25Vct4JNl1UvYF0DN9Ub>^*ckh zhhzg@`kApDjng{xv@?SzDbJift)P6q2-9G}<;%&+hB%P*>c4)>VUvIi=G(m%729V? z!NJ-6Uv7t|CsDhAo(AP5$v7E*2I|lqAHYb2OnyN+7UitmBOfNKCqKZhjZe z^a%Mc;!AmuEu-vTgvre0zvnpaj-GJ2uFvOlp6C1h zTJIaExu}2&ku1rs%9^^7uP15Cf{xou(3&BjWe`hmus6N$E$H+{t1gO!Ob}*V-hizJ zyAWGdS8MA-Ak#!UjbBHT5Q5n>xDN(RS^miMZ{4zKQ^>DKqx_y#dP!*oRFtPrS7Q(c zs@PU#3yXKhkENWDD)hy80I)b}(XCaBx=p*aZCbufW7M-YZE<2lQ=*01b5R%cI(7A! zsYD>Jfz>y!u<$7w;X_9CM~P6d)j9`w9Er)oQ4GE9cEn>qm_t;L&=P&FtGjeDG9uQ< za0&5r;K!R3Gfy~Xb0Qf5J>o5@Et@w(m>>-x1fB6^%;>>^069<4FEoQ8iHQm;X|`W0$v9%o`69IdMAim z?TMbL_G#4%xJj_ZDWi(?ZOSgn&CNZ0{mXhRaMsou9iE~9VZRzRYiCr%N~zoXix0B1 zV?4laEJn!1&nqjVvy-G`zo)^;Yw-Or_$qACQ69Ek8}s#&7$7aDLYJS5t3U#Kabv^Y zym>}~%M%@9G4&VSRx5%(gK&CX1{saLk_#$7)}b1UJLuTyu;kE=>}O?Ng&25nkNW1e zmU4LiFbXB=Rjs-Ugpo8q*u*?+{PA%MplB+B(=ya!2t%d{3Z8(N?#E+uA<7aGNz#iU zc#Nd+{)-C<#9W8<&SED4zroH@aJZB-iP&jL)k=yxlP{cD3_oLVtl66jkrpf!XYBeGg!6S$F{@(Qf)kBDb<;jkkDT-?xcGpuhO+2(Z3 zAhr-mC5elXR|9H0VL%3CftHjS)56eBV<+W4n>|9^``;yMO(Eo+4B}caPB*;OJZHcB zdbFMfN~)wNct4Exag*uqj?14ZMq`sc5XMXG#lQ$O z!S33pW~;zQkQqRj9QY#|X8|&!JM$ysD}#c-I)a>BFn-f=Khj z_wPOgISM&Li(=D&$IdWM8zaOJrA(5beUBKDljFq^L*M*%%l*RG&1n)6+7U^MeE@== z`=;H*6Uxe3@M97q10Otkr0C19hIktf>4G*@z-A9g82e|+$ThGAo1ZHWpQ8XIa=)aH|t&22YT(I%U$&FjLDt5&a5Cw_T zVinlH^x2P5G8Tu85(igMZEpctOseu zcJ;n|kYY1FX?ghQC@|d&m8q&IrqzX=+6NV(Mn+Y$LQqi9J^THOsFaAWzib^%;rP9r zaqGNK=I#oaGy~ zZWRND<~gMVaR>vDL^n4#0BW)3GYdV8kmHl5tMpadit_jPCLgXhZBMSqA5ZGL6P|_% z4$VjQ>}7J>OQ!WXF82XSE%exmIX)rbQ%Om$n&l^!Bo$TNw)Vhc^!e)QO#+@5qN^;0 z-&khT^hve;ja2W%h7)6c+uC}HRohYtEiAY0MRN^wasaeZHK6E>Dfq$Zu{=9mCXnL; zqD|(PD-4wV3ux$yT~Q;|myGG9^pTqk>Dc%tYqR!-uA!-fxZD)Zq{yR3L;pcmNrS zc5H?CT-f~r7UV-}e}V~L!rw{g72szXzsAXghOq3&iQ zSNe|(Wo2?DI;bowD_J7 zq;(uaUEz_PjPPFUtHk|h8WR!Wh^Ck7ijk2t)}(Gttp9Og^i3_0az`kz0w&Q#UNpQD zR3084>$hwx<*hCYy|j>%DsHH7$?V? zPEIzq-2kD04>WgnN?9!FDR| z6%pGztv58wAm1>hBtL_0Hl}JqX>4MOE&BHmT-OV?#c{dA{GI*ZfY!vsrB`6=L*e(c zx%nfamXL|bN8$vq#i!>9XY-+E!ojEwB|uvE%u$o(rQ{4pCU@lF%z=|?%KUYek-f|x zvzx;$-yLp_R??YKZddUHZpZ?hg@oBcaE{Ob;J>h`$mOGDqa}?m2HmLW_SItk#$D|1 zphX~wBv2PpGY;SEkVP4d$y=gS=%b+x`}zC#%J-mwUoBmMA|V%GuP&l7u?`eFOY-xt zf{4b$unC794tDYt42+XFW(mdsF4qc#G$OHxz(zBOgXcgVUW@c&sF5Y6&@L3*rbgC-^h&Ds_I6O^R4`~G2>2J7REc8@ zG#8Y!s5wd3f~iNQkZl8`COPHB#l^1&BJ;rv;QL8#8M^5NNGuVDUJPJaUx>hWoeE>K zRSMZYK0f*F>J{yMM#!KB3Ohi9$2E;@1b21wU#sl1{po*Uo@YYks9C6!aSF0?~B(-34fZas_^s*X6HDq9u{q$P3DBvWY0eI@u5eNOGe^gPG-eUDoi`Iq{5y_ zhxjbh%h6SbF^LWV8MmOt@X!Dyvz+7_oFAlFy7UluI0(ruMbd)90P=w8RMKRB9U9_3 ztB#w31_5iHFdHJ%{U>q4gug~AjdY|X;?ZDk-N3-0(iNXlg|hlqZ)*EdRIxzisYuOF z*c&KQSSI#Kzf^9$O`S2(=7*Ph4mb>*nykoEW=Y0Cl4=1<_aGSMO^_kt$1nwFB{YJu zZXog^OF&a%lemsX?byVcb}UAQUtAWpVOiO{T%))vcBj`jdJM(=9#V2&L`$dsk7((J zf6&rL@ulI5YY}Q-p}~%1vOnI9S~06QosMg2m8r(BxMBL?pyqi-Ok6?>I9n;wcz`_l z0s;(6Zvg!4htV;djn*Oy=qIqYukdS<@tf$BSPmY{P*RO`ThPh>LU31{$5t+rWyZeF za%#yc<^6NYo%|0+mNaVVRven`ncdF6pPT9^J$-&HAD5`IR$Tl@{?2nfKHtU< z^%Q4D{~vgcAtd$0o|z46dH7#SY#MtFoYI@OZf*Yc>-BaVP{48G&gPH(P z%HvyFUF`)D!L+xYKjb!W+&n}Tv9e#fybV+_R*(yzmIHZcg-;_YEZ>~a7%2A`myROE z>2G+{vrCMk$c=yy``P*;kAc+h*nq|X1Fq4^iBe!Cwl!8ezWNdZemUFBLc9cZ^j8EJ z!SM$eh%|CQp^Ae70MxBONrwSSCsD5{V^|jzz>1XgbZ*`9JxG9h3|j>p3U?-dhf91% z5nqvIV=Z9OeHPzLl%)`&(2Oi!wv3n@DvWI5*# zF9b2uVYfIdBhwCUhKAa!F8*&t_O;RA1K2tx0Mwupi}R>9x+z1GF}J2V)}kZo#fRvM z&M%qI?9VaM3P^VpI-k4!*>2$ty4mR z8D;XSB<}IX8|()TT*dL|`Nw;LO<`AiGziJXA3t!ELYmgx)paTS9p~i~sw;=;QgBfV z1}^y=7U2^FVtdSahTFo{@>5TSU2aL!xAszxsX~lu=pP)!cnW*a6#!MyDJTv0Vc;Bu z`vd5=O@X5$C<7r}%LxYr(jp$i0-z3&0#l6LT=M17^9MvV3pOJ|gj{35_}h=(AOoHl zh7)39oWSxWT+3O7MowH>`pB)0KcZ&uG#a4bCer4xH6aUYxtR&a&2A13)OROWm<0Y+ z{Jj8albE;}mTR2Qp5OKO*f=<}P(Oe>U?^HO8%>W{5tvo{Cuk+8>d7ew7}>26wK|R) zevp%(29Xr?%){g6nc7}-U3fY#gf%eSjg-!LOKEFs^DI}H4%xvzTjm0JT7LGeq=G@1 zv3}3>%3l`G|JAQ;{pDOS_50vdHd;~9yInptXTSTLjXv(*Pe1uAf98)Df|eYpSpeH^ zgF%yITNoJ`h2BBwut!*UN2Z$bq1D0y0={U>2A~G-M>$!B;cK|oo_pQ(-@@bvBRwW5 z?7{+*^f~+xLnQ6>-=RI0NGI7$Q1Wp_XaEAfGuB{#OSEgF)?k5hYjc_}L^ zQyqf5D{F4nn)oBMt}zN`3$|J`W&4(7rxC;D7lE z)O|`s{|LqykX?dC-a2qdFpGw`iNNWN*ZhQ92jqAt*h>`4g5~ej;?6`@HO8lP7JPgs z!kQ%UeM%{uGNYEAtI+MU9sM0PG*-{3g1s-DN16aF5KM&p2dWt(>xhR&-Dbp7ya!>0 z*MN7TdZ8r;B{|p2UyDlwfW&@`w)HJk)CeOj0`2P=wD%a;4KkP)PYwW$C)m@Mup}Hc z*6hA@0eU+`h_v+o{6vN$fjv=V71j9~#5$IRsjNWrp1Zqim|F8iuh`y{Vsdg~4B{;b z>Q>*}JUUGKB7y-9j@*wQ@1sAtg13XN{95@$!-3pJQF$NJ+1%Ks&R$NS87R2%rnF?+ z+8j>cGDN-nZoDAh&iwFc9iCw9B8mmpLjm;{B7e_b(~~8@)cVaE;BlkcNhN;jxB?QO zKQxwaWNC5}GpiV5r`^9CR=gq}25I%8ShE>EqxzkKf{b`aAPhZP-~)nJ;zEHs4kT=K zczxyB#^g)L-GV<@w>~9)a0YAK1G^7NO#*<@S>NPWhZ-vg>gE?d;$P0JI4`|m{+Xo# z+Kv&?u}hOKNU_F$2F$yAyPS$YX)A~6;PQ_)g2?)|2)y84C;5{ECsZX~~6N@2D-t{%kU)6YEp{Z(6^tmU2grcux?zpx^wp8JFec?8c2k6RBdNoH_Xx$RTc z3}exJ$jq%}i6`e&lNCs*`xC5EP3GgRiZY|mS z$F=E!CZfUm6T2{k5NvrDSUo?iDFmcSe^B+(2QahXBG5UgMq2>bKZi=llT+i|NhO#w znDG^t?flnnWc(828VLNnaZdc;L#4PSEOzao>RhR|e*0V`zW=3hejE10)isXisVpQk zkfq_UyEhmfm2B|++Mv^ye~?ymHX6{TkgA%@>?W&sf|p?}C>1UAjQiM68tCdi4h#(3 zD9ds1An>`c>f(x71z1&L(uugmDJ7ge^*DZrZ*(I1O7|@3mXTix+EmH5Kfkb$ z`xIv63iukm_${O~Qxl{d zl?E9m2urmln=i4+Ag}@=cWhp`EJUvnDh(ofJWtksu_Ew4uBm$Ed?B{%oI z-1K{2O;c}vKqAAp=fg|Fzl&(DUGCkN5i+yf(b4hH3YQDxsGv;{UyCu1N=iy#?k*z zy{pZ*p{hn6jkB0$|IAm(kn{K7Z*6N!imk(Oj(UlhV@P_hL?}w~klMpQSzv2;0Ma`F z>j!%|6qnpzxKZC~wgx<*FGM?=o0GHb?%Y)N9C%1lAy=oB@+F_=I(HORD8z^EU{4-F;L$GjJNP9lCgg9V@qvql7gbXg-a)UH@`!ry5rN{|(eD z^z7cpx|wOzV9ub!fTpXoGF|x_-c{(4>&zuDJFm*7{2scE-4`LuI08P_)umUD%=6EV z*}Kvqp4fD&4B#p1Cl4PaE*-t5OPqXs>qt`WTV1XeXHHy za?QK?!GVEYSdoaYZ&k)7ejC%tAS!8|OR#Ledf|fglDuq(^UuUL(=RPMMzj2LuxWMd zzUN*RJJz2`tc+IF^U}JQnVejC5CLQ6t4FFNJ<>eoCB#D?hP~LdZpD2^$-}IF9%d@J zJ@>>PM@8`bKo4J0}U;kdcPDocEIFo=o5gr z0=LuxD0HlKwLfy6gR!5S7KvNAtCRCc+tIO42D{f3bv)bX?xOJ7k|seM5A< z^-10Cra;iTCEuGafUN`OVQ}LP+Ml%|$+6j`;5|SK{z)Re@pPJ$c8;~JQ-~_K_&!Wb zjLvV`^0)RT4ar$S!4yy3PjXD?->;#bB}#UpbwgeD+{b67Y!CNMXmSNDdw)R)pnUNn ziI4pVcp|YAW8Hf_0&MlSa3-lpiHSXGTc(BsgJP)HThAo6V%PAlm4u*18CeAKuD*U3 z&)IVg`W?jX7!QM@A12P$=jg17OUA6)7g>Yjdwy(z(m1V;!k;tMq_AI6_|r{RyF@&QKIXklu`7-6Ka7<(s~I zaeVS*6_7`$%<(T?C_H=tHdcRE!cK$u)o*om2{_smSUt7UW5LX!RO28KAt+H7Z^Djpt%M~1DV-4oI5<+x4hXe<)Zu{M>@{&{o_!!5-|y@)d4Po_A|Vc72S|8V95aAC5!-!{`{_7#a4j6-C^Q%$l>z3Ac>vWf(~*Ua z^I|XFYs9wi^BVG$r@f<0@1=g**8fXVo?I_uO|?90Og^*o^ug6%(|d5GKolV*5+$Do z8iaO!05Guq|M>aye4MvZ^l10m6)SckZwM=)99=F&016NoILJYREP+N6jr>e_ffON^ zW8s=G>zuuooCWb1G*$^}?SSZ-J31mP)wgcliU+#$AVI|tZQpkUIjz!e^TITh2y|a) zTY%Ml#{6M$OL)9HRTueI^2%#8@VP-+9SqAXNcBsCi8Xh6t_2$AFs z>s3Xmwf=<#KUn|q5FAl;^}7Hq?)>Z5LF>cx@B}qvCCy}BC5m6D6yyH=Q~3E_QS&~@ z`Ou{)xeKPro!L^7k`N@0aQutT!9tsRPEXvMr&fEtcySiD9eX>TGC*a_nFWi2gM~L! zuYR3|1bddPg4Q7$O%r%=(|NX!m@6m~z>D1a6Xo1LeyZ$n7zuq9%Hw z*+z?q zxZ0o=)_JnNt4H?pw%6(csu~j`(e%^9Bk7i0udMy{kcZWAYY}Kh(9v)=Fev= zZKg7jFS!>q>~U#nmo{Z&$WZ~LB<6ZdB~~=+#pTAp`Eb$;uH9;^x`vkt=n`c4wZqgk zuQD=_R(nz7wyw#p11s$sa~yjO-!4u3Ye6ksyWZ8TNl~?2yHB^Yr{UIMv0%@OQXB=< zuOMiFz z(*z$)=R}>61Kkc#UwzLph>j86qi3s@sIvj90*c^A;P?BRpC)gV zhcLArP3EM;Z7KBP|i-QHn=6G9rTN2Zpa`K*X&wLdz`T1f>gl z{}^*goJ2@1#ESa}TnAV3C%%2;Nm+jWW4sAnTM+=W&|*d>Wg~zSWsxKf9|#T+gdePC zXx`W84V9CUeDK#T#SCiDhxP)d68Er%QPt;TQAZ>Vq@IaL~cah%Effs&$+@` z`1pHQ_<5EQc>n zzte78n&X3teN+&CHuSx^rRf``_yuyQVH{(i8LjAL+Ag zd1DQ`m5=_~aJlXqm+g#Rwf_p^8yl5DTOhqntLViCRKv)f+I#r0T(pZ^IW zfO;N7Z7m%rB84zKVoFhLKpAU-c9O7^u-_4o4!fDaR3QF>Ba28dkw%V1=f`JEa|OB) z;#^5s!>Lfx@e+z{JP5JolBCT4@Zlb+VUqi1*}Hv*z|9CHbO{@wZ>tKByEs&yPDPMs zs6)Vo!Jrd%aWW z#Zd8>m>6U%LBPyy@a+)LrxO6kBtqR_n$gXXA4!kxY6PIoiT;T|nnb7%b^y7_>RsmL zd6`E7@R}R9Y`LJ3k$TblY}O40?Tg#8uU-$$aS!y4<=<+_>Q*8$!^N9$jG~y(=XW9H z=IHM0zmJ^oGCCO=_2bItcHc?X3po^Paf;TQ3++Jf$k7UKgMH%nBHBG|4xRnu zzgo;P-iVH_|JAf*KO-B}OM{pdgMAX;n5`M=wSeuA?ik?)OWvcgf|$5Qxe2aMsN0bh zM{JG=4uw~XA^;4FWEe)&k6j3r8$hG|+n}_x)D?ji@o2sQ89jpV9(6zUdk|mf1&a@7 z0mQ#ERLlniFS(jaHRINEq9Q`Nsh(Dzz|o^ujE$Ru==^GtN5`9RKhK#l4>tryuenQ0 ziKY|yAQ?a#I1Kj6rY2e-Arijkr9Y5-fZOhib7ytL%c?XG6y*GpavQcVUsxg-)B~<= zIq*DUkrwRB(0J2j8t058rN@<(l?*LCf;=w<%tWd$YL(TxxTsEUw0nE_=jGso`+pWu z-?4IDx>JMk6nKfIDko_n>`x^aEy zcwxuIlUEi395QzSt^))S$#?nrLLvzz&j5#5&CQK}O26IxOa7h62-EXIJvFbe0~EDP zUtEpOgHxcCLnnUNCL;xr1c}9?aK#Bzw#SC2eY|&>6#p!7FR9zjB#GB9s!tuKJ_Hxi z`~ET}w{x9piOvl_ASozEG<0<%`Kz%}GIUuY5|oIU0fhtp1PoNE(oFZhs_Mc}6Tk*S z7dtt1az$0?k-$_~3E+1?&F299T&!AVR3x|E>>X?-EUc`}u;P^0YP{Qrj|26pFIf8; zrkONK<*wA7$eNKuX^}O#qA0*(c>Z$$3cN{5BEB0c1e9xNYM%Bx zl05p<+|n`}urWGxy`CC#W8^>S#x_(&dOj|!bMJn1gVPyQey}VHgV5GMwcU?hCzhcT zF`eE}RCK8#Se};3XzQ=5dRq5Vb12!y`st$zEQJRSq^VITetfaljIKIAj!bQP;Ym4} zWJP;;As>=m0nZLJ5*X6s07dx_*l%|{-g+ZAONsjnZIHnumbKC2db@sVH0>Wv*P(K9 z>YF{|4^(XvZ1=legC22RoHy|(AS2CMSt{Ye=oX!f+j7UCq)V_2wWU76O;HhWu$RX+DS4r zOn>mb5~2ef!%M~6R+AJ>qpJDXIUpxW`uicGfh5xu`Jd#;Cf>A@w8F1FE4vZZNz$(! z9PoBk+Pl>wBzqwkkk{V$MubZRig{Jnx-pL`C5_rB{p~mS99Dv?(6n0DkErJ^y%7Rs5)Hnd9xEtD zbC*c4dP0*OAtb4EaL_c!bfTkf)5D~kZy-#8S1vn->V#4g!r`nN9cu!c>kD+Hy8aq^ zdIcEXich{4WhykaC*{G!5y2==A%iz!u;5J!xZOtYhV3v>{C{zV4hta&E2RI z`2$VaZM&N8325)A!xv&HY~kx>mQ!ioKEUOWTw{HZ_K(FiS=qzvg3((0_U)^3yM~C# zoU*d#kYd;O^sng!ZW?2kXiDo|p*~Ymx6l0>Pq!UUtCF-@b4Bda+f61%q7A}^iXYG( zU{CSSn=8X`g^`0LvVL5s<_fdwYAg zExW9WeGB;z`=KDg7Q}5cd6NWeQu?mK&TtO`ddN^ndNh6xyX^%EJ1s3Od8>he%z)^w zpmYbYo~4or2OKtSh=$Xu2Qzukvuq|J7ig^^<^vtg2E41x>SxM3x+dExLx8KlG4mO( zOnT=M5&iNP?>ZUsAleq35aT4Y5W7?LcD}=h|2%lmi9miR8z5%Ulajy| z!tm%EjC`-!b<3|wqDbj+VHWns9)aa7jGlSgs~#aW+%FAr(feb zN9+!HO7+Qvow!&Kw0|xvVaLN0A2c+!neDUUyUJX=r@Q6i(NuFvjWW~bCoK+kN@KlB zX1Vhg-A884XE$frW+u6)N;rP{bSaybrHyj1^8zP!Qq-PEr{Ln|*1%LMU!O>DpFcp6 zT>-T4lHcqR3j6`Mc&h{-mAf@t<(crk{MOHw+FmiC(PLCUXO0C(Ky&qa>C$z-MjWO^;2Bk(#GqIEIP+C@26 zwd5DjvM0}O=Ba`1P%(Z8czh7 zL5SxorSY-JVk;0KrTci6OK1fN^l&!;o~en> z#2ySm?M)8PJNs>VomrvfnMMqPHq?^XuSo!RDT0tcqT}E_$4QGEZ!|32dJ3Pk<4;z& zYJ+ehj6QKGoj0jI^K)doq-3$Krmnnp!zxYwWi_!^G+XXQPGk-VYnnZE=64Y{BV#>J zIEDf7Vd3N?v+}N94NkJ`z)zdF%7`z^@*mJH&se z$ZfT07m=RJ36YJi+|wuIyKg)bcsTe$}bDAW3-U>6wEI5IfXp{AAz^#xg>|5oF3Ahm#00 zle-0bLy$|V&z?PF+5PL+DX?^~=^P)6ScEp}`_??~uF9#e-ww^URTVE(_`Z)L5qowz zv(D`1&Cg|J=e57%>U=}eC$joe8uQg7rW=F8`qt?^Eklk4lBF}J^rPPCG2}s}Q3}=WSrol)mUlg$-Z`g1g`Bz$x+i(}%aDsw5t+rG|;=i->?1>83c7QT57Zb#q-Y|SF zcsJWrNay#WJU3WllbG4uLR&kJ!2N^;Nrkcx&H~hko9O7iK!kvN_+W^;U>}9~?i}o! z3z|91pRcbEW>h$?!bD}rq(^)A6S4@<`!rS;gqrX)IU$&ikA^Heg?Cu%dk947DhRXX_waXx}J{qiY}J z>k$LT!})XLT-$2v{%Yz@%|KlnRpOa8Jb?7_Ay{VO;bNe)K-2{v_1SiQqFp4Y+ASyN z-wBcRS>ZjI*HLg(xsVu&+IVEG*E`=}=m99I)4Tpbz(NNjM1=zx6(I41%c`l7A!0;E zP6g9!ea3Bf(4fyzNhGB#eIvEpnre_!$wi%_XePYZThW4&gh6SmcAR6c^7iv%MCeCo zMn-*5!d*M$$&0a5H$XB72O4HtgzDE6wC0uuin_YG%23)H*wHZVK~6Gxabko6SO}HD zN%SKShj4$BtjV6~F$M;M39Q9%$3&Z{hlUXmt<*Js4vyH`jiu(EO&r|ZgiH0AAz(3q zOvfTXWiTXW!XD_Kr>aWkwpLyY)2IJk$IFwOJAB|lO8URhQnV&5IABi%?}Qv0gtp$O zIK`L#GC=}x2yofWPPeT}6;B-<`dHF%?*%9A*Q|6#{&u+oXR{@mQe7Dnnc+r<%Zh}H z!khHwLoJ40v7#w9?4MZm@IEml6H5;X0>f`2c`)-aK+%Hu6wo*^WGpv2iRNwKm%Gd$ z{xb#IcEqUu3p{^$GNLmN!o#&g>JX9NS}Hn65_(DR3%rO__p^FBU z_07G9xKgJM1hjR?uLu^MI7eoNsO&^dO&E8GB>~Sv_DtYAl%D1F!Mv;86~iRQwYZvS z?1};=Jf}Y1a_;@qf5Clit!sQd7iyVq;QK0nfLvy7Bbi9)7HhHC$>0v`#^b0?)-aru zA|@@U2r+$z7{*Y|AHpW2Hv%c&!02doy3I+KQrLiq{tA1me$S-_St!#a+uqI1P^zHaD?v78F6V*bF<@LZpL`- zU~Qt2+h|L3Ghn$r931Nqf|@3i@)UXBupWSR_|eg!OuM3@`&3 zg3uC}>h5#;iwQw%UM0R(?la@a%_~oRI_h=e@6!+&PGu|Nx$>SHyO6i~7cyy&F zqIaE?*Yq+_=0y20O=LvXXHJ5L19AR}dt$X`e%Ib}UiI$zrRhJ_m`P)T^ZD_fzoU`B zNl+FT2a|0LJ@kYQ@?Ye$$nTPvhgemZeE#{jMOxv(dUnTor2OyyKfiijJ$yFLOYHh= z^Xt^F+pn^O%j&QG`&)X?|JAwyoP>5*9ML_{ZD8!Ho+td~>lqoNM`W;@$tSYseQ(|~wIp1XGK9TY7KYIG6+ij0z$JhPnQH+$CCU$tZAf>i0c@HY~JDvph(W^QRw)Yt) zZX8(0@iNO?NQ6fEjDF39=$|$I&%DjMlx>-cm{li?+r6EiYF4OxGs|nWwpP=uV^cS{ zTnByZfO_#Gd(k6V#lE(pkA~yyW}b3p8~oyoTZ!t`6(MUFwiv|F1!X)JG8V;LnN(Zo z^cb5ESpu$shchc;ipCmy7MW5!Lc0+2;yCpU?gW0MnVC5R%L8+VOYSPFEvQ_kV~20g zPTvL}d+p6=XId5>7)+r#-ldEZC3|}CX*HBQVdNK4y57KTiC-sm_N?QFX3Epx+#yt# zQ0IT$ysNq{*pc&Ww!+Y-v42R6<*th)1}hkHlraPun4Os-c_=u99*5ll91nZCHy|Xv zS@AzoOpMex+bY^W@n`i3QHkZQvL7ppJhME5$-?++YP#r*^KId#U0QR?{98vJ&jD|P zkq)kJ*N&#S-UgyzW=k3NV>1RSS5n$|pjj zMz=aS+xxuAn(Ur?w~asEeV*(RuJ(ZbT z<|hS91O9_)l#vf7((vs^-=v-}FfpwdJnyB{-|ybwlVlZBBA~~6xsWwT%36Q381H_^ zIIXQ$bQ_*1Qi>B@N-y(=l^l)5zf`>A-4~XtlE>z;g{6>HPx5``{#l%DWHb==vMA5U zDm`eg07L^I((e_On%lA^Xh{F{asNjyn%PF#PvaWvn;;` zA4J^vx_{}f$tQ^C`Rup>HAII^tX$+C%yrOy8DvdB@7*}!aeQ_(sAE0@^f+iw^moi5 z2j^xkq}GqH4 zVZc0f?HaN&d{2m&5^2Lg{W14FR$rVg{k-)5?a`j6J?Y4H!VUT zTH}fe3eN%qIl14ixso45t4yD`?(XjNYcJ|%3>yQpR^Dx93@z%JJ;G*RJ=Ewir}155 z?nmn`eR2N4NPbaXj;E;&O>OE!?*@7wm>;z6WGH=D(}BAEk#kcf2x7g8q)BbbgM zxth!t0xfd|LOuwLbqkiSWq6%*eHfw{ojqnB*F$(G6Vf6hj(X7j$0Wu}-Ac1N~# zV@-(751)X3C+7N;#+w>xq^14`0%`~{3`BUZwY4>6X)lr*FcYz6+HP!ax^(UbvO*9J z0dn&?-XfAG7!T5%bT&p`1^HH$@+|qV47H~!5*Cc2PlM#5ztJ=2zmW;MI$3Z7)TOik zz;gljkCV#(K}+fw8oGhNLb4XBY?3d4M-)`S5(SR$U10U`?;tg`A)&***3n>RXoDF! z{xIdyqXQ!87D%Ad%H4k#;y;+-%BSz3Ysaanq;Chs3#21H5K>XvTheMf8#4vb;|mbX zrmu+^6gZj47iU5oWOQ^9;%ca|@&FPlv&Di}M_eB%3}q*T`ViWJ3=}e>3BkhvyL381 zu*!6$-vtOzmTIPr47VrV-rK;;)Of6g;{}Z)2`#l9|G6CR`V(Z+dYRIbDc-s0%u8?q z^=)P)Tm(PHFG(wuVQ%~|y0-S5xpsI?f^F;3fo%-)>bU%+c7c#EIBdfK{Ssw-bp)bJDccWp9mpiRnAZ^h|~2S~rs zC9?rjU?l-(r`-I#k)#TPfK?%G(u=v$LpGANklY& zo+_{B{-Smw_`co3!v07fLx@>YqZ4G&Dx8P$!00fgb#*q$W(llNwR&HikH$W(NiQ1e zWj_=j*K2oiL^;1`{Xm`I`xo?Y25cH^6j<22og2;9t-G$U&wKaBq^`=*Dcu+#^#CMt z(U>p}YswfRSU?IG5l&&(wr@Ch$jA;cF;7=BYHDgY({8{FT3%6s8QB@f)o&cC9G{$@ zIIG~?ic(;Dz~RoydC&B&oAQwtVgxsojE_W{7;5Bw+~xYCHrHjKODUoE)Gd4W_t?8N zfXkpSH-VrLCm>IB6X*gGW(A%dPInTk0oxsz-^(Sscr+oZ-T;>~&~)@2L}e8>+trDg z(Qly(gNv*Htmj1p5rJStgu+>Q+o{!ah{Fhd+ZJj`O~x?5kPP;-%1R_Q8WWTAv0Ffr zwGa0JCRshTtXTc+#0Q2+6Ta_|UPRD4x20gZGqzGVqxCwm%X%Z^izN8Nm?n)RL@-ftrEw8zy-5R=Bh#LK z=8u1^K4b1>KENoZs3J6R`?s%#N%)_?yDON%OXB#S%DVXQoVF+kW5OHb)LAEXCpI!p z?m~T?C?rn%K0g{ObGpMBEWyR%hBic-%>#~Np-;%0H%Fgwt? z0elR}@&p=#Siu)S%=I$&l;KQ;Ns{<^(X1i`(HDKEmXk6ct~A!m`RJ-DFGr#vfB)e_ zF#Vs43hAf+Bc=bM!CKC~HZd{rH5f~rCqob&S0F_abSqT~BBb%4{z4!?MbYKV8SZE> z1E6_e-*bTr5AZ(`g<}=OSk9S`P#?1Fy<8lR#XN){%zhC5I6b}vaq;E;L)b%W^gj{y zWb6o}gJ2{{mfI4t?jiIjhArxD!9@xtHts~mC0JsKp%Vt4P_#mUl!vOH3UbbC#PJg# z%oy>KYwGhRnfV_@YeE#qFeI+EP{kU66CO$s6^#rV2bu~?oi`@N2ncYiJ< z@aF>nvn#dN^?!cNNByPr+-}aUjT+h)Y-KbIu1PcnyMND|;H~TotqK^6Z4EQ!ojTmo zc4y7qNS|fG%cnh;2#QA4ByCx{dm)`=YZ25#kTi2)#1O2SiCPoTE$E0`o*W^y3nsNdfhN{J~tPj`UFdM=7}O?Kj#f1#t&MU1Dz z8KsFdAF$qB)}!45O-Ezh{QcMJh|&oN2ox_@5Rejm;Jp=!JGk+oh@l$R4$NrQuBZSVS)*9vqRMenNAlVvrZ zOZOYF>{$9K?_--?M{izjZ?&DDZ-a_n+TIt3HCI9F(u(vlGB5;*2S{n)l$wAlhiur` zA^`!)0ksFr&Mg*mNosEHJc%72A0Mw&IWymT34Md?;Oic{=%e42O*%})?CSs%aT8e$ zX0n6bCjP#QH2iQ_k?w;iXxh=M{5PpVru&nmk@GoK$m>Y`Tb1z0HI>WA&LSClL>O3b z)dlh(2Z$2h1P-Df(nCZjeM{2c1~D}TnQy)ef(~LFor<<<+jyv?5t8iY=XZ%R-?>#ZY1oa#eQ1EZZyNLQkN8d;-a+p`H`6yMvW;?QXir-OK$w~?yaXcuIoax zzWnZq;P*K>r`wkouv%v6vaIP-(YmKqS`q!*li88deK4t$#ff3|X9kBa?QBx}6+K+$ zTs+j~`@G<{8z0M_lfns=JqPu+MfVk~OBHh`C#ReH?nW`KGtL%`cKawhaU)WeCDBDw zTU&qcgW{s1#OA14yA+X`rnR}bhggD4={r7&*4RyGQ)S!smn9#%x^6mSM}7Kx+V&4w z!rk8$C$gy~l4owK7gskZp4>jQV|E~_*RE!$*J!$w-G1)DN9XBW*DhNaI__a(Aa4Rp z1N^P0<-Oe9PeR>>L^V_;k6@4c(blHeqsGU}dm2bjT@%$Y=$}yeJ5K`YBK(KYHz*yR zT@1=(Rn$7ZYe|Fo!A-9n;Atjd6VPqbAy5Ka;1bqNagf?oPS{8+>d{d(5@Qh|9zai= z?JKRv1F~0mrdloosg<~=FihMl*kb_cMA-DGu;xe=p(7n|$CI|DV#k-lFdSke!lOu# zWZ(8>D6*k$%`eR!p#sUasA+(_KGmYfS8-Zbw`z?2?YnomO-QB#% zt>#6jVB#DxLZtX4=DIK;t)(i13yKdAVs2s6@O={x5|{?9rxVAY1=`qC=F+{5MIHCF zTwCWqBXFfQs&@y!#M8nF_G!B8hs+@nK}(Mfnb}r5Ta6jF>xA%+y?>|iaz^-k-1yA& z^qa&)5*kxgKxUzei)$J{P6xW1X49sg_|%k?XV@`&sLNr?0oU>tA|ooGnY_lAPPV>1 z3$R@SsSi|8g}=ev8Io5R+0xq^%ij*j7d3XAiWBdMT{>Jw@VCp=Y3jV$74oCKZ&$B^ z=HV2Gz>Q~;qnfKr_wDoK*72`@AHrr3mvQH^)5A>;QOOmuGExtXU4GBDD}O5gDzzos zn|YXFMV*L#{R_QvkL1ZNLV1W&k;AWD)$S35ztLlqS`6|;FF z>`oqqJsh}KY9;>py`&_)W0IB%%A9NKjN`_XPkmZY1M1v^#F0d8k~9@^K!c&*1tEd2gFSdWV$C5z z%q0A|I!?V5^9C_EM4$Uik5`>v;%wa{a20quFh45SmhWS7)T<%5HhP6#Zm0vB$Jka0BAGG zVThD16?4)E;CrBABFPvKQ2h}hc?Dds3(zivbZ>)0pA1GxD#83s88`r5gJ1yz+|t!m z2<(=p6+iJO)C%mVr-*d|ELghEo=cNnOwzNdPn>{f^Ho8EW6_Pd(xdM}K3YOt_#-MO=NW=0g}Bv1}w{3n77 zIPX`khzMvkE}B8;-)B@V#A`UOelSMvG3uKj0Dt&k_So?*Ox4*JY&$@H4(WFi@U#w3 zHV0+3L>Hp!Nph>P)GNpqC0iyu2HbLVTvB(RTtu8LGo9tYOZ;`hh)N9m#0t9QN(p2W+D#|80RN|#xUvx*%(38 zO2Jy$GoyYb+e-S929Oj*#`38IX1}dy5f>V|MClnHW)knr*>p5@;u=o!*Sg`Dvzv2xT9c_ zff3w(pr>NEGdS^eN~4KT#}gxuH~r7Y`n!8_+5EKK{4AU;VlC+Co9z0UfmWj!J`3?I za6=VMB$cAQ1#EN-2j+;;rAP$?r9Vhdw@!K&jU-5Vh*2rRW>H|?P`Bc;gg1_HAA>Iy z&pW)3&A^t&CY(JzY3xkw-<z0PIFuweFyi#`hGCd;{ZcykVHpHACkJ57@nC-OM` zd)1b?p0G7{!^1wjzMWK4aYlY_)-P=akvy;$6Svq`w^`u7DCC&6*zqA0?p%=tb>2Qimotq`k;w~ly&}8OHX`gb&l?5yZ$^~e_}o}y!oQHiXK(#vD;oui+jw!H0Ez}QCUY%(#?^daKpi5 zw(>tMMnljaLA6mGGhIRVNGCc>e+28A+jjaE8s1nXYwX%+K07Y6PDo#fZcI%}LwJ^I z+^GM{m*YrtFTv)7$CG9!nm^LoA+bcyON|L8KA2avDkViVFF z2aAs#A{oNvIAq^#;}j zr+j%c!#C2G9a)F@8;16384=Kpd_YFBGSqu0l{9L3W{lnwDj7j81K%{#jNklaEX-c7 z-~0C#qGRw@X&V??%6H$i%3+^6GLPlV8T-%Y>)Nd#)ZJ>oKV)kf)Vhps~e z!mgThVVlvnyvQ3F5U3)?1F$7%rFqZs*kP;#9@4?jo01icSFc-VbaQZsAx>; zr$vx|#jd+kW;FdXEbQ!3?TV+V7Ih1e1NFFk>$h*0gfG5NCGdpfl;6T@K1f+zq~de9 z{p8FT^JS%aoWv)JpiDR!LP2pubJL7WllXWO z`^hO7w~wN4b`>6_14_jEdHwtX`sN1VQ}54)#lc6 zO2M*Mu3fvYLZM;Z^bx=mW>h|CZoY2xoIxo1)vu4V3cGJb#fT)6&07;WqWvuwvW#41h2 z=lvbGs8}|yg1{2f+m?~h8S?V-6r1~NH4!{SoY+{|0YEAlGyauad9oH^f>{1*MMUR4 zZ;9l6!?1J@S{W_Od6AXqm9t|Dp9wg=0|%EDH}F zJ}DaSpg~GDi2`+MmeI`}{N=XlO;97lu^7zRS$ZQs6~r%s0&ebFIknL!PUVVfNo8{01; zlGSLKhl1q45)qqR@#E04N(4Fxcc<9J2q5AI@JD-7-7&P=5;Q&0M%%Zk0u^Cs@ zszc8O&lbdWn<3PKYeF{eU7BTI2__+^wTwg?ZU?^sCEqnIEgI0#c10lllbQvPoQd%* z*nq3MtVaJb`iY?TBSk(;B}hq*x0$k#`VRj18z5CA@3&#V`4w!S7{~aky6d$oRP3nF zoVN43A)%k4Me2ViqueuVOy$Pg5+)wl9riBP4*tn_BDP-V+UFv;js!DLSSaFg)VT+J8_7UHpgcW>PW{B}mTS;k!}+V<39OF{ zoJR0tA>2z)T&@GX?A)1*jSJuv?goS4()k8p{*Se|foYq5I;m$f)Eu!mxP5R1{B?9# z{aw-z_yksNhujewAQ(R}utZj0rk3;oK=Istd?4Fg1#O;a`1#NBmX4ENM|qcvw}DK1Vs zHTW<@pfGKG?QS;qfa#2z#qlZf&u@3=ynxeI$@cUpSy|*9<2M*=^ID$0>qMqVVf$BsYcF%3HT>m zYKV0(1OhSX+mQ}%cEWD7S5PR5)#k^z680!ex$|3a;$o?ES@cUOZ>f6e{5ZPDsA&|-IXiXpWW{{e0`tK;MDHJhqSZ~AJ{xF zBP7G*W?drOE^KMU!`&y`{yLP)qFb5V3VuSW5{(ZGAEOhhE9$nk#?t82yq-88E4>mL?JgApJBzibw`pb5DY~o zD8S>g^saQc1|o7X-D*q3+z1@~dr+2#2UxA5l97=C_ol(ohY|o&YF`6$#tt+DU(Oqy zvRC`kIcn-|fi4r5kx8aEcD=WWj<3Z$T2qf!T_v&Sc)2m}N^Scr--)*`i2~Tg~pFlCHuPLZ{7Xz>f`iHj$dKrHpjM8m2l_@jHMQ88N9MxAc-CK zLBM&ZBtW`a14tH zrwZy7Q-t_IjyZM%!mtVe<*0!=4fr5B#plC#04S8cN zwWx{BQvxQsF9mwjNjfVo)x^b`uxx&!lK%lZ=0HfD=|alB0E9>1eibFev6(n+Z1K2f zTcaD3bjm(XXU4_FP1H768%s404(_p*PYKA?;9-DVcrDYpn>E3Eb~HT7zy9Z5T~Ur$ zrUyw*)It9rZ{Hn{_5S~@oOBw}RCWtRQduFiP$6_ec99VwJG)XTt06mtXmF9eX&A{$ z;gT6bcJ}6ezN>T2_c!kQ&-?p$obNfQxUSFj9N~ibB)}W5Hxl-#setd3r`+ zOLZAsjeGf$IN(6xbL2&8SQPQXh7q0~R3vBB)QAr3B1oxNI4HN5(h$TATWSd{9nD8n z;o$lHdDy^!6XjoA_j~?J@|w0j(EyHE#cbZUvto(@FW>ia{CS0Bl77IzGb=uyDge$B z6diyrR}!m-=!9EB#7h)p?<93s37>9A0>JrbHG~a;?v(8$b7{CyJ)T{J*9CnO{}k8WGkz#8@=p=>xygn8YR^(y5ffz zFGT#0$EE!j&!a>2e7=dXQYU3DtVLTgRx8-X<7nhpv^3mEO$8;& zJiCVr^x&x>x5d75O{X{^M%YczI!ygRL=<~;90iT@#1|tB^w(|NcpU5&VyA=dt4j?2 zFOkr`{XRoU;QBM8ijf@R%MJ2qEDSpFTkBX@8o!!;{3%RAIv3$8!%vB^TOmx$A0Xl< zm~?=2W+184+6PY#^(4Jz7J1{0ki-$N5iux&UwP`&;h70mvtCB!_&)Yu>CB)bM^%Vr z1)qE@YVO9-^WFT|NyP>KuRkWPisYoar5ues|2}is)17SRYs)5+t$L2Hc4V7hn6Qr< zKzT-YFZ|swZR?Mo67g25F%(1BmP`y&pq@$UgBl*M3%(w zR+GvdmYzI#%I*H+M~CCms~Jlw!%jav);*}ODL`AsFR3nx*MDw48ij0Sgp?xoCQ9sa zNhwg}(lpHA8)S-RXBU!gAsSJmhRmOdj*cew(3lPEz#I>>HWj1BmJbaeGsbURA3C>cmT2T<8NSi3Y3v5|TW z&HfFCo=!BW!$gwXXkpPLKgKH&4mgL5C$Q+L&kwRGt3LMl@MV+8WVV~)>KT($E2}zs z*zTmnIoH*1T<^;+V@q)-9bTb7LUc@^iy9soX@^pPgmN7ssBKnOWUYxQpUR`KFa<0u z*3&fqUgKJ3YP?@e;Y@1pzP@0ykj;>ao0}W_7bpQ3R^q8!q?N-e0wt`_^D5rz0E#7{ ztYi39lan{2rHE2Clv)}9B=r>x$1ta_09XR`49F*>Mz+p_8Iqvb^eQ#iLoiGG8WX7V z(&8L_d(VdNSG2kP1uB+mYP~UD2>H$3-lSc@^qR}BG`HY(wedG1ca=G!lCZ2h*FIZl+k6p@@75R2>(01^T`b8=reZ-XMXJbKV;EGM){pJ=s3cq=) z+S!dq=Nx_w5;CucK_G`#4WHu;%n6%wG~Jjd!BdOG1W*?l5@Vm6MvU+PQSohK|B0>?jEe232pKkna*MBbRJ5n2dwTU9H z@F7^j+D-ssL-d37`7TZrF~Yfl#2t1+I_`h|bBU23FUa}p@3}1jjtp8HY#sT0Sys~# z@o%Wyv42+pdAruXZbM7jm(QPn1J3^986hbIM0VRvs50;}V+0#3ZnU&386O)lqx9P?g|K+{K?)(W)wbOkY z7^r~J-$@9NH7Jb8J0zOPTA%|BLU&I8d=9_=uh4({@4wOou~J|lHEqqmo1g3fmF-Ud zvAW6MYUeja35Eo|f;Zh&ujc4g!9MS@BONv{(S*=`dV82g3D*l^E2e9_^5OA8w{0}b6v6r;S2EsB2#nd2K^p9 zxG>t?cLZHM-hfr6t4qm8luY@sjJ|!d>waR3+v9Ij&1&)UvEMsP3a{pUR0()sH?!HJ z&nPDRN$$?pwi&k=Q-_%U;a@>b>n@Ni|G;+F=2DN`<%|2YHD;0>6<1cdxn!2kAPF9r zXe70d642e%h3S9e*NM^5(cOG}lt~9f$*@t75#(|DwB&+6YPeKugTD61D` z#y6kwt?)0$uC-KuzrHm}4w2sGpDpt0BJZ%=Z_<6so+b6`2+x$HGzZGtR9KAQ|nA8IR(<~LuJNEvMKsVFV(mhbdO*@?uU$s2F3(LI)h#Va0zO70vTge zza}m0{{7b&`+%Q(B(8Qn5{1Fnfy^bycR|muiXsn8XC48u0I2>TF-oY1N`OiryU&0kd}jTmUqqPvy(50{6G=v&b9S;# zuTvbN;rX!gt~b;1#e#~9)C>!&ZTvpkt@%Y&p;CuGXR~Cq2nE(!NFG0)6aH8-oi@7k z{l}ce(g1#)^XHn59@Vh_zEz zx9K_)4j&)Oq1ee2hemdoFmQa3TV?z?lEZ!IvHMYV0V#Y!b}qAAd<_#@MO_(I-dZ1) z?%h~i@l3igoF?A+(v9Z*4SJ^i3rF~Nmm|V4Q11CI-oht|T)Pg1Xb#OR$M>VL5>Om| z*Ijk}=!&2_z0Q3c4V4Ccf&6EJB7Dh(l(zZUki*JW8Xuo?m#G=HeA4s3kH;gp^H5tH z!D&F|nHH_={vSr%=D~)?DsAiUmq+`KvX;85K`vctep%LVkKKlSDxG5a%!|y|+1_Oh zNcUMVC&WUgFaj@uP&Xaam-rubU)1eJ^xu%5L?Qola=y(8;x@a4^c=M41tuuH)3T*{v3yp+Nlg#a$8oVq6T)R8@K1oIc~)KI&r{+K z%SYc0Uji3YmJ}%7kuRmuRTQy%{NCJV=~Yl6iM2cc+J(`GMZzm1Cov7`1$lhCFG$;j zDEId@7fifLz9du|UwnIW1+g+fjTia3XN#!SPTVj_bOkuU{m@V(O|Jq| z38b*vFR}Oy%<;&y5>`CQz#aN5#(yqfVpIlaRid#2%5G#WKP}pQLe%!7$?;r;HrDz>g)HirKjbdm@YaQvj}`oHZ|SK*|W7{VS#QiyaKnUP>PH~ z0JZv%U(H3mJ~g=?BP;Y8ZV2rPJBv*peU`^;qPl)dbPj#YyPqZMcD9J}DvRbca76Rj6`LOjt{*AVd#hm|+j^%! z|D)Py^E1U1?5!qY^Cdy@mvt9;l(j9Qsg%E-C%nA)&STY{kVVUZGa(*n`eUgvyB?id zGc@;@gUV;n&5Qc6)?*e3_R!Wb`>@f3SRAw&{ULiN=ps=a=W$de=Ih zcA4buaoUh*yvO&<0(!ya1evaLQNK*0o_6rEWym&W*b|3wOu(?&@&R0lne}E zW24cDM*1KI07jSzs0k&^WU5ZeYLoYf{-X*#E1P7uNnc|IKN-)+L=S6tZ2x)InjfC6 zx|p({v{HP1$`{M+Ib9$C)p4A=3((dxn1;N` z^OJ+(InR5Vy7yW)a*`kGcw>!(jFOJVVRg&B6!D22B0FAmiOnBz>GQh%%P6ssz87Xj zLW|Yt6dW)@1he#x1J@TwXGmr|dgP@fBbWGVp?SD~1wUSPQt5EqkF`zB+Ap4ewA?EZ zIySm_%f}QK_YR&azTz+M=r=Q{#dStYIoEe()%vS^KgFY*CETwF5ak456JRy)oO<-3 zq3W%QnwlSi7(v>`%z)8AMMZ^p;G&-f$*7`aD6?bDApRi?AQDAMSJZW4QOPd|maI^a zm7C1@5a{mZ%a`yE?m%Fk$rW*ct?+vfZhfMYkT9G#@`1O>X(&&%%C;~qy8v0JA25~G z1;@M9d6XWSUBOk1d!Q@(6s3b30Og0EMN4jr%VCA>maqatb6#+y6L~5~fWS?CdBSp- z9qES|SAFEUHo!q@i;oP>?aTQ|gD~BIS5KNRrnlCx{Be4h)1ELn`wqwo2;{IDFG{oc zk{r`4k;AI2qC#c^m`@9u)|DGdI3)r*p@S$SP1TPE=7kJ2n(3D>O#Nu{@l^R$&Y9kD zadV5XoVI(e5Y(3GENY0d(eZCC0PL2~)G6^p zXcUnmm{JKSciWLi@6nqo4k~*Y6t_%1!6at{u}E zUQt*4wa2i--yw^rPt)lm2>A#ov8}Bkv9No{~+O=h@)|l6GH-Fx>xQ12!qcM}8 z0=e_$R#_|UdIW38IK~0GOHtLA%C1;XrXSR4or${eyY+IwdAqkW#U65sqScC^ zU5|ReFCqCOlS1k}lUt!V%sWmsHPw>;t~B00tQ8=wJFMH^A*#Zf!$=Y0(5#--Jv8LX z-wXq#_R$J_1=9%%>GDp9Za%w?0;GxtN*}OB~2_i3j4mt6;I)!YBt^O_)G6ML9;&vOjz{ ze2yE~ltif|q9Aem1%qMhfP(x6QQ8e#4r> z_KTfyXKL-`eQPW{J!%?@SL^k=mj$Jtr;p^*ug{%Ho@5hdKf2JN?Ehg|o{rIix~64M zUYv*G)>;*f#uWL!pd(tXTfF!$TJE*v&^Ou>J@kWVY>VK!2*WpNs??M^Q-@N6xUyjR zM}drA2Yb}Ix0oe~*fH=+y^Q0*$}cbZ;5n)!_5wTL&AT#&u=Ld~wzUSs+S&XP&P(@$ zUHdrqPLe=?y7TnZtr6KZwf0}r)=vDIHUDb*5kU)T0ms+wZW10=VSo@hVi`qpdciBY z4MU~T_%}~$SdZSI{n$&NmXCj*I8~AKo!jc_=8dOMRbG|^YI$UL@bapUQM!WW`Rw0q zYn}HGs(79m*)q|)G5a)|{9CozFB3H}H`U4uU03VcNwHVB>|@>&c$lYbuioeej<){6 z53kg?{Vcxz(A6FNpyqe$tkllMt;m ze!{gwSTeS@LNqv2z@MxAVuVJk<6}&0ohJ7%0YpK87?#hUF)9ith*c$otV)6Jzs7p&w2jn ziciN!N4qv{p%)suWYciKx;-U9vX?FT=-pD|C&{^7M>K;1i+eVR_p#C*o{Jo+5V)nd zkiN{X^n`I)XfJCt-$d~s%UWOE(LYgtv(iK#scO?UHEz!52cfz3+7;({A=WRmokK3Y z2eXGvz0%!Rl8tne7!>ss>nv1Ni#f85Mb@G5}Yim&`AO?n#X8?^b+8i7) z$fwlX%fE}~Sy&veqNKVLUBg?WZsi}d@!4Kld9|teLbtzBNc5Mhh zzFNgmiue}7~3n0bk3y<6?9BczPvs3-MPxYMAhmxu=0PBc*8zdF}49mO4f^#%kD z#Cshm9U|h|fIf`-z6r>lS&ra(5*#cOsYKvXNU#86m_0n+M9Xl9jFuca{naqkr-dJu zab6>W&GdRc#Ska1MR+wmefBK0jFqXi$iN89eLmksrolus6(mFgCIKfvSH%i7p)e(b z=7a$e?emlG^CuciiC2}*yx?&#??FE>gk=J(5A_ZNZ`rT+2hiuElw6BnTammu zY6-V8AeslS{@4m6G8(|5+97k<=07J(KxHL1df;-rVDgKLt1!rAiLxJRVU=)v1VOlx zh6Y5jfwc^V@p*)I$qLp3F7%~Bat=_>R+&fv{qAy#Psq#oh4DO3WH4;`@T}3#!D`%f zG>OwPNuJr+pfv(R1HxV+ia(G?z=(iok@&R0SXirj8s#mHxkm_8ArvDXUS7PKHd1$n z&AZ|hXo9^9O4N0r%;C*qqeY`^vulI_KBy2kU>K7EtG`qYKBQc9gWp2W?6+fl^PY3B z;Q;g=WTbNV8^a1yn__1r;73}H>~Rj=xcI00LKPw;yiiSTRuk%sS+;u1^m7L;)+8I?$!bH9Ek($c1SnaHH=D$4X02EiEPk6Igu$P8p7`At^{1F7~MoJ}nWlo~RwO9qUy4?AFJpQXT z8Qf|To8UN@^SqCpbjE`m-%9UWu0nUb>al44hP?chi%$Z^SVjwnp7CrsdK5GyLQRc|Mi}-?Z(QOOSkl2T9Bv6WN$u2F$T2=b5%!Y%Fg$UV zD#k9+Wq8_R#^=$%@J&$hFvcsPy!=kc}dZgrx z&mZLHsftq%FU~NiSlC{nQR}u?1CyN5*R%|;;D&_oF?mp}v3z5J_8nFjsisn?CRPXF zWeN@x2)_XBw(Z>chx?n4-z7x(jS?5VumLJg`eQ(ZE*#WC+30FPbylA?&=t?T5JYb~ zz50l3@$woCx}K=M`GxGvp-)&5Xw!rg-_K|a##vA2D}dg{&0S)}HShm0Jlq$vJXrk! z9v#R}N^|}K_22G2doIEh4&0mr7vBCsGy`!?Z**3p-2vNl*WWHS#yoy>8Cly`v=$yO`=$;?NKtFX@9fS0 z99IQS?3S?ke!pniZ4OSsl?W6fFHeJFceSbs*g=Z@kj{V1u+-E%8eowkf<5_I!imvG z#vR&q7)_sYFFC6=4=tiqZ_VzUjvmn|9mh5KR1&OZOP_!xh;9(lCp5PWQC<&*4Pg>ZT-}u zmU`M7ngYW~n;vcIhO+@Bjltr|RVS{h`JjhyNAc6z`(MVQ*6XrDpW_{p9K@r&Y*WnU zE0ZJdipD>&*Vr%4(|b+yzSO0fZtLZeJ4`O*zlR^A`TJ3g8mDQ1Yf)iKdSH$*dt#r*nIKlD;zrIbD`omB>K4h83(~R#T+2xY^+1#h_Vv1%+k%_^ad2@>Hw{<4qDV z)o--F=!`kBR_2vACVIV&1qFNI=&_%Bb^*RFRqeB7Zi^U~>?v-3lo%-!4ai~`*pPbv z=TxAE8n$MfHZCqM4v3pUz$F2Qkg}Rzx1LJ<6fwW>Cvq$KDi%JeA_XZ(KGu<29aGn< znAH=UhcGa~M%sDkP?L!`maP|q*+m2GhS_ zd7S6pDTy|^R&{p``Xssqi7s*E)VGBet$MwD9Eq2Z+WUV3lB;1r$lo`+czH5vI=n#PCU=aXSm$XRJ=5-b8)d6W{+xLsdy zAA`)jRH&4dcq&3DhD#A6M$9aOHk@J8@UW81xc~I&#6mU<~Z+c~;af<2r5 z+Dj|863LcjhYc=N4`l_c#4ms+R{mJj54;a(_#E19Z7vrW$Fzg^xV(W^gsk8H)cd8_3y-|Y$voFWDD#RsOVSrN)5>KSY(lk$ zAUZOq1iR`Y@bEQZ9L&t8E??dQ#sr~8cYsKV*J_-(+qFy9O&lop|sT!d1y;~*{p*$M;4*nAJv82R(}&(qA;*3TEJslaKli7( z-8Z{nbsU`v`ZXL%Bs`XJPiAW2jQ%}|VzZ3qVl+3#b@9O!3^dFpz(L8t0`Mhzy7wPG z=ppZP4R@a;0wPEZI7zsk9_?lVt^&&W!M*bhM3D%l0_5xpI3R(R zJg;3&1CH+bq^FO64Xocv`pCQkLLDEl+duhQpKfiZ952=5nLUl4p__4gPn9`M(ic*m`S$<_*NC?u8qyr)peLNl8gAe_tnB9N0^ORa~#V<~?&* z`lq&od(|F=c*JFh_ZqT~OR8y}^-VbIrI_iKS9L4Iz?L{iA}_YW;8dChA6WTm6Uo;@ zIu+1C0qqQYZO&6S1AC|_Y+Aw^MF(y*(XS5fj|%S+*g3gl2)t)czh`0H#(~D1e5I73 zPsl?-R7q-fs)}|(C$>DEj*_aX4DcWme-(H!0Y^sph3S;q)5TwjDwnFGmI0Ryyq^rS zyPFUT17oU?yQjMsLCiS*(b&l0*-~Z+4H9|6Kfoft0;-!vcZ8%1G0*pd2U3pRK(ENY z<=R7&9>3<&;GKlZ-4&@Rl6VJ(9n0L2hi}efRB{(%yZvSjf*6!t-rESOCa%|SAaai! z@h2ZmVZ@!&n(9N=5B=5~*9teZ*JpjcYxBJ+@5S4nhJ!0)8Om0ZxUO~?5C#lHl5nk3 zxU3R(w%&P77tz<%cRkc%euKzn7;$~==+G}<5D^I#Sj*pkZB-Rf8^##`c&87a z@7)FV(&)pk@y>w(Gy8koFtq{1R!S&sC<9FSF3dv}dmky^IHxG#L?HpRBJteF=7h!B zV=-seF)=wvH%v@UTKsTBNxt9&UO#DjFtjc#DX}hLvp-h@Up^W@PqF{{A>Qe`_M5Pl z@CqezsDXWp12+^-8BX~a6rx1o787eQ&*QCKtVL^;Ysz-Rm_nOzN4Ki1A2%X?g1r&sP4!3cnnmvX@;IL zv9S*yJh-kw>52_Ce_K=(Q#}v!&*Y1x>xmTy${1iVW;UU}u7lrigY~!V1xpPidnAP$ zfCB;d(2!sXSJ5doKemCmLu1dfN;(ITnJNY)GLtzrJ!eQrGqkwIAb>|V{>vu9D{VXY zyTy8rTNH!l*eg_R`(YAyN$n$sBQ!uc$WOr;4CQ$#C^)!0+i_?53{K(~NS3|!%5t_7*=X^4iRXDQH{r`7x$%XD0SKq9y_o?&9-t9WD3 z_!}L7`nh%6zN7(B!xf4jjsq6X+of$Y<335SzI6u9NWY?Dn{M9X!P4QJ#B;L>)g`re zh8-j0_UsrK<6lFqIwf($;bi{1@x@cr5feW}BNUQ%Sn6@Y5(gNN>qx*-o|8Qdw3|y% z=9W}c&=a$IINh-{8>y=YXDR%%7VgCUn4@}}EZw_Z_&~k$G`m75 zm6er+Z>FwY9gm?Z>IGE)eC*Ct#LOBIqr{r;RmU!mFM;D6ac;v{A*~XDypXa&d#4rG z9|f$Q26fG#(w_9*J{z&sbneH@>i^P?iAm1=69kC8=ImV^GA=?g(c=?7GPDLo4&dN+ zJhifW&f$`_#!Pzupw0rO-?kIgL0jl&Oh5^-oE$4U+1dVM3&=A76!6ERZA`LL!_ty; zpHgKnR1SSJnB00jChLOiOBQ_x$Dg=5paqvcul1#iDng6AjEwtAS%5jP75=PN( zX^qk0y?idB56=`fxRiPiG{?Rf068HPF8-0##_uaDxuELAMxe!=P>E$pP9E5#5kw5K zaMb?gzf>&@0)-Zdz zbC6p~Jeb{`OMO7UD%Sp_*vO9XmL+E&ZCcMHaA(0Sv^2q@_1jS$=A3*NuLu#hYq7Q> z=6R-j<652X+_94ob-BD;#_vI-{h}YAZtm+>nYV7He*g2ZPTi$^^>KdE5Yym;z1;fB z`?2@&nnB64Js+5C7rhD>M<}(D%q$%(3PTUBev_?Xm&DNFW9%_0x>nKUw~`IF>6q%* zj}L^V8aKLso08bI@e~kMa~6>ZMl+|SuSQ~4*p(Q2QIcktr#;=r??chu8yOY97KHyN zKa9=u`Z%6Tn6Nv(uQ*i15FxdECFl9=FGneE#^Qk#-TV_5T*Y{d)S7B^R_Ar^cBT~+ z)=ScEtxMaeaO_u39zE()J8*l~_>~qC%W&MfOo04oNdHL`LO8`Slzj|lH;6DSO;nVt zQ{aVs$H*1ckjYDg2>w22+WE~(?)J+qzNB%f7=E<+$GD9?eoe=Ht)rs@j$tv=Z?5{4 zUesCt<>G<;P=jsXz8zvbblbO}#10L$k~Sx!!(Eu;8@6q3i}WfFbIcmtY>xxXzpo?}*BlQ8DYwdKr0>N>Kc@@M?+Qgr#= zlyTp=w5slj%^78N9q%(QQoqRR>w6ChKWcdw8+}$xIfl<@-edHn*vyVas%D1`c5yMO;a-Q<3RXOLPAk>g-$ zn0dbXm)cqH#uYm5VG`if@?Ur%{d5@vgM+@Rah(7+VxK&rxpnjil7(n6L&3-9p{DDy zX>(u*rwc7D?eOR*?GqrdB#>B9a{4CByB~uG|I>yMAH5}jCtO^`0#%1-ui@Y&vL^(| zVfGJ+?A?3!x?emCUuq)R_lp$*)>ASr8#rno!MO%B%Nu!vST-8YGBqD((#N(Od}DZ2 zsk7hnh;C7qpHwtu*OFUH6(e4YHRkqoJ?P@RyJ_XE=InpTBIR;CaT3l~RH!tbk)7Ssz0`!j+ik19-AdSRw{!;t0DE;q%MMxj8#`ca5oRz4nTXJtI>Zg$aP>YrS z76iTP`)6ktvS-TH2p_o}jboc=*pT%SMsg0oI~jrFa&?ArIYfc@5@>24C@!h4@NoD+9saCx0up2WxA;ybCW z`p7HAEA-v$XJIiJzq=K>xrZ<4Ftj2{0{EL!5Fo>oW121x7v`=M};>V{k0otr`kuV7MZ9q$^)2t6Qf0(cEpi| zkf)IO?IUvxr3jv0+Cx&QL6b-iIB$t!Rme?PAu*$v9Pe$^;rKN_S3Z08W}=0{QGVzW zpoYiu#HAMr$vuYmz#FbPO!s=35h^!KM@Oe^i!d~cNgiHRmhQuJyfNqqn0C{)RhuvC z+IvEcEUJ567Xf z6Evaoxc*5p01Ra%x+F|!=;)aXQJDuL@Q}3Wni=K_pX|}IW78{q3i#EHlY;;MF=xHk z0Pd6d0_(ey5)YsGoLC;3j3VYwxbhIe(X#oGdLT>|C zsl4TAFZ!anKd$oyucs&N8>c@6d~q2INK04a9%-B zYVpVQEHl&#(=}H^lWJ62tErkb5x-0)%+I4TzUuI$rS)mT$n79Z;juB=AXNSaw2t^2 zD>VE>TYnx!3yzXO4!jZOie06(ffM!G>o=F-(_*e21OUSX5+vk(5H3R#VUX9}u=cjHyqv&LWS_GG8M9hUgdO8m!MO4ljPVODU?T0f52A9oc+U@4Z z-{_I^j05!eCed+et>#00{n6X@+vPg!G;ImF8)&iie3XL6!r304Ud+qQvXi?8Jd|H( zft>xVE@^RJ)%J988O?t0W~ujn+d3Fub$V?+Xzk4H=&}h!?jDh@yRC{U`>u|$l2?0D z*7bN|;`Wi)>ss$mX2lP zWo%s3C?VzF9OqoNo5+B;dS$A_PjS{S3@dy0JCn5ls?1qY=7DaH=|h6v*?+p|E06v! zh)i}b_8Rsv#`&lc<6Q%km6Q%2IkE(Ja%Q$p!ww-#^f2oJljQUF&Ie}WON0dcYfaK> zU9SEkrmA=TSOCb0#1gJweiB&>UhtF3Tl|B1Bn||r0?8KAQT`Y?D zLpho1HI^62S?8o~$&NZZPVOQLgby3TOFoOQHB#$~m{;lHdA=*}T23Gb<_@ZTg0cC= zk8ehM82rq=-j=YMgChd4%0I`|arpK;Hm-f?-z)~V1+cU*CWXvTNlD4C@IoBJHK1mp z-7xtfW0D**CnqN=?*q-&neoV*uLa$5Ci>I#PccLn3Ng!_O?kR)W3cA2Qr z!^9_fWOpVkUpHW0{4Qt0docTv|F_X`Pwx1DqOokhy)Grf!h)8xzk@N%cX3nMo{Su2 zYL0Dnq#XOl%jV|c=xU;@i8Jug7S@<^7-y4=KGN;rxP0HG2@ln>jVrSvj?-stz}mV~ za1F>b5s{6nJ%FL! z!bC=aP$ruxF@%SAL(5aM7UNWn0s7H>KlOLNUkv@WU$aM%eK<<-e0vZlhfbW)01VVf z)E0Ov&{n@jv|%wUBEU_pN?VbB*L&edn^@n*XPdt=eRKEo>WnyAY(nd(EyU%X^kyDD zl-`8vQQ)?yLAbo61ncNH1{y?sI7X81;r$QP5m=ko0rxjbMpgpI2EfnLO8rS>O?d)BCZB?ThUctF9r^TmFs?+Q=J5*@> z@}OP84RQVp5u%h+l1dS9ngw&8~38WJf zQaUk&gZdHLA2{=rw(W2D42CGNal@sK+AkKp3V;k+a9K{_E0CD%r^3DCkbt4!2IvQ1yki$Gv8M1LSyF@IExy#Z^H&2J*8eFQ#e#N8!JNkC#VdbKO0m98s=IUx1`Yisk1bPm}?{770f;eeN-C5_RM(X1WO3Lxh`ec7Bvi(mn9BH z0IcAIZR6qja5f*W94tp!ENA6-J3CJE9op+*$q#E%l9@n*`2niV+u%`=tdo>%u%2&T z3LQdye75AHJgR1W#{%2_`Ue2`|58W4LfB(h`;$sYBn3n*aT5d2c=hmgSQa-8|7&mn zt>#}Bm%YNGRbHiyJPRPsfCn&B*d`>j30!t$i_^I;5f>MK@;$e1v(DA)sGvcGmZzHX zimNS0zEMxVZgl>L(Y%HU9JvTr8t;r)`azHP>ea`yuH(Hb=va7a?Xb=~qd?j=&F<>! zlRILg9qc`y+Ui;AU88vKsW%QCNUZ#XZRX7mm1@ zx#92Eya`P(JcEH}EdkEN3Dh4B-=H%40z?4O0+VgL9=0dD?p{qy^gjDSg=UxM8OlKH z)2FYH4~MlZf3g7+q@eM?3S3=~-1p(KEJZbNODHxcNaFD80_wkfV_tX$*G-F8=?PhG z3$PzsjDl4o0W#oTa2>r{?YTYwv4(0?lYe?qJ--zqY=%hozvSI;JNSBdEJybFo0b-K z#5x@CL;={Aw4ckd?JNFEQNm!@rBglMS(G?5VU+VT(FvcY7EVdu zakpm;zhhee9Imp5=M|zi%x4gm*@OP(nMOlGW%}Twf+|^ug}eG2?N6&ZZWphsNW3!d z-Wc%U%Ji6-_j9}7qlSCEtp!qOdx>ix_<4b-SYai&f?&itf`nY$rHYvFc-7{x zb^JZ#u{2_x6S)3@cg;xovz(yp_N~kLrphhuf}@Nnx8aP0lvHc~mS->PVLgU3i4ySp z`R!o?KEO7ObB%X{#;v(mbIYQt1`Esv)MPpAw-@K!O5Ylo=~bYT>Dulu@Xg?vYUV#_i9TxfC`RcSq7OGX zE#G{3AWWX>Gjr6t_+g_6GyONin6Ka3Z~3Pw+?uK0Y*PKrsO@|EEe$gja}Jfe)lc|$ zN{P_Q8rjFk%eiv+3(}$K&^gU~daurN%1i!D=RVb2eEepF7-Dyyuw!@kcNsiu|E@JR z`;uklh-bdI)fKBmr9CmViQi3?8-FVdJmAd%0|!$vZ>QT}>fsYGs7p#u=i0aL)h8F^ zH=Uhn0qGA`E30wTX!HobY`2@#{rt}=R2J=X^Woa9a_roBZ9M^t!JV})BgMJ(1=JT! z1&@gfCD_bj#pu7kzYR^>M(GQnFB*1&WlL12q$<6Jybh9w4f_X7U-YocoZ%xvlWJUj z145Q0*Lkq8g+Qeuo>^hIxV*_3!2w6_y-D|I8znpD zN+T@o#<`?K++W{kw{g07VAgdmXzbC!iTTV&$HY~?y1B8iyE@aY<51lG?cf2h2vVd{ZBf;A)Qm*AL3JvRACTMv4 znJHX>E$ zb)}V|x!MTFJ#1NhQ&TzkV?fM|2?5~|5Qi=zp9P^C>MNo?A))O@M~MduY)h)(rj~>x z4VVylvxxYDH1ueTqWXB?UhNMMtOg_H4qw`1EB}IuZQo{mcg5GSZ@iCZUAXmi=a|Kv zjFdK7EjApOdUJh|YxiEyy5{gFCr>bZmcV?depHreBJti8n~B59kD^qfJ*N+as6OOb zDeiXej#nsuTS{GIdUEVm{#9uj+9p^DN$R1JHofBO(i&EVaozLLt^79kA_6>U#bEtR z=fQpNxLoeI`>`0gHGbe{X)!1tOMI1{#Ovf8vs=I4GwR1@dkf{og#ep@MgG+@N7Lh7 z{jrv#Cts;Q+11}Nwf6|c>GN>g@W$Sicjl}D)JLTfb&rS5C@`_+|LLz$F`Q>QJ?-YT zWukvM=k32#=RI>7Cs&Hn{vDzFyI-}ZybF9-e>L7Xr(444q36OZHNvMvPtMPH+M;rS z3soWMXYH<7y{n9znt-=-bfCqKGl|mHZ8iK_bNURvmFzo^zHWqcF@GOc~Oi- z&8@7+`Z?exmKLAwzk~q;4e{{GWf+cG4Cq2Cog>J6`7^t9q zafVlf-oJk>B}IUpU8G8n{?9)T$JHXmO#`1padDJt9A>yT)N>#L!*!ohudE+Xm$+K( zUiKL40e9qQCmVhCsCm))=(J0?u$i?MGdI<9GAuOM+lS(EtLhoW%G>j;?-1kt2kT|s zU#Hi95!c`My_)gcBXK&Tj8H1nny2@+;7M(cQ&YyH<`<`D(Ollo`dERg)zo+XU)SUIq~Db$FjwSHlG`~roi*$x72VnBr{C+;1Z6uvOI16JQoU_^Zq z2N{o; zFc?51ibj1DqS63yF7U1Zc%M*E;0riEK03NXL?jSZV^g!&dPkJvRkLwmf!B=qJOIhbrh4xMJ5@?~3RF;Y{WCVeX=esF$P#ne=D%#qS# zuizTcW5g}OvC@`VaQX`;H&dDVk;|97)qC!0Nlx-A7R*Tqr%NXuGrxEsKmlD@pIG58dX`9oC`@V9{kpRGBSaN!j)lcYU)|JN>x)`f9U&dKQC?#g}pOJ+3JMT z-HHND>e7|>AGp#K8arF&71K1c=_=pgvgBoiM`X!>TO3)F+4bXK;Oy1OC4LAdTY0t( zN^qFn{o25U?QPmMO9Ces71S3(*`V@&&54 zU#z+A-fkI;mBL-MKU4cy_@MU0sj6h)Hr@dEA>aLDql+3b4++$e0t*pc?h|9LE<`A; zh3idJ)1h;=FgT7?$%T13C;aH}8?Vl3BJk{&>mdzEdC|;BIF|ydy!Iy|uFR=^9ggE*i5tg?Eox}$Nz0Y)2+Se#oPc6#|W zI9q|d7gKyjXCAh*>o;vu=?Y5Fn)2B&>s3X+k(1Rr<>8W7Y4&gVxBlTCxpu)(fi+B` z^u%3Wr%yChVoM6cc^h0cb)R?CB(f+N42HW-&4_e-^!49DFJ1OAEkr9dq-o{&1xs%p zAWEru~C0^grS^ydKVz zzIz#ZyANUQFjDbI+ymESmX$2vUJJ*_559elG+QWq2qHK0`uRvNZJNV8VlM=cwTd9ma+I)u5edl-Y46d6Gq9S0>&g~fLKtntzI&GcHx~du z2$ikme(_`JXyZVGT#5GvP~lvfiT|ZrLlkBa^Kt-}h)jtAOt8!IE$+$pmBLbGHfTr5 z@CmM{!Dba&%zy+yYuX}iw+H?>XmSE!G(d`A1Pl?5CH^!~z23O76ix_JI4F1oIRH

>79P$@E)BGM^2`_VFk5b5JU-6^g zduKW&*dxq+Rl408oTtNIU%Q2>-bnA6_#K3YS;H@h;a zn<0AF$_kCiwwYRM5xS#8Z7EX@^V{>CjduE0hHw04Fj9XrBPqpxZ^pe3g;R$;uE%SN zF>SWjJsfE{YW_WSx-)-a!ACP?&~NuS&Blijp>N(D(>*}hzSsI^b)%DCImRsP6_Ky` zI!0;&Jjfp-W+$lI_!n&BJU!?XjT%E79!pINIOh4jW^6uL!0sg1$e$uUB9ZUxwDWX= z4&H@J?P~uMs$y>(3CGoWz&mQy@m)_6wZD(xsnK;XY zKHpPj`JH9_);sD&7Tvs?eyp#c%ivF;#7_zKH?t*KCtdcweU$dg@QZ^~+mhny%J}k! z3vyArR#H(>Db;oYmUr#ay=C?b)3vD%R1aH50tcGwO;E z9~1v(5!QkJiVs#o(;L61PGveivYH(54~y_=jUEBHu=he#%Tr zRFnyO4j{=X3f2ZTK~Y&*+)>@8U&V^0_$8rbjIA47Bt2ymLV~q(oQGK}G+U;ZQ{wzo z<2ca^;JU+}`VeorZws!3V~NxaYu8?*P$(}j$2EkV^L-G}h)b8tWV2eA(I3~)3*sdy zApJqEX^ki!A3eSS;SJF;oJ()OuYE8?0^Im2MPM-;Nr-3>J=RyXQQkX)-DwTiLSP6U zai-G|qkAY8D~gPFa`Q438NV)S-khe=StrFz@0cu**wNo>W@kP?!R=_g67X!CyMJ1R zna_Uoibu^P6T)4K^0{}FhA?n{dSQQ%ere+azu=oGnrxRZS4c&uUHo~7W~uK+@8zE< zoqk03gk7=FV{MmLNxakIo5O@7<{sOyY9C5U<6Rt0ju{`~RDR}Lwvv#vqJ+gA$Da3` ztO5%!<$#DtM#zd@;kzvkAwC^}fSA8OMx-V;7rXj3H9yD4tv24a_xNS@Gn&Pvn19zp zVWdEp+E)5Lm}X~K_S=#4lt24hLzu(qd=ULn|NIg&~KL zH0BzfP0Pgi++1_nMP!Yl+V#H+42cSg@u^Q|O)2&>yv}{#cQu=Oo<`^1xnieKwT`|n zX^qodv<C*19SUZ?9w3Qm=CISt}ncDDLZh<4SWV!+F~TJivqZldy zw5ESijKQ#P*j-*H{aRU>aJ1@+lk$oe<~$OIOZ>f_E3IpIwW4n!Wv%_QP>!Qg4eQ^S zt}FX-#cG>`NT~WfsiOXph?gJdv~n3sUfCNy3<*0K9(zqz+{UQWI`O;X%9Q;wYd6TOP?*S=;qD9m# zkMRnvROycWAIkm%tmpmzAID!rS=phYtfZ3AqKT-ZB&4Ffr%I)%Wps>WwImHvp{2bS z+Dm&6O-iM`>vwwy@Av2Wf4|rF^M7C0x!%V)PF}C)^D*xC+x<3L9akbAg=t-1!QD-2 z?|;sO#!W@(+v^|>iyGK`5WFgR3@YxR_*l*r^_s2_=9q!lODOI&P1QzUS)ouQW*zVz z3z82zbM~w@hzC0LFG?8 zJzX+0oyup+eDlDSpv{7<06lz%AQ>S>4d9*=uqKx4<}`w*;3NV875;o`_%Yw##JXbD zsw33|qk;8E8yrj`zNGzbN8Ug7^V{8RYUUEasfpj-HBcVG_;ki9rc5xy@mYK9;D{ zr9^0wwtL<$FR!P1$?3GBwIO%Qx_}wU17Z2wq-1OA{F?10)5{7QLaJ;`KEzwZ?SE=y zFn7c4*KqruB0C5Ku)#t%<8d*O^S!iH+9dD4CxttlY-!^?UwjKV=Tq2wr%57nBB1@_ z%ZK_$Uvt~Nc8`4VEVzWRsjS+w zu~2zeL!u?CDud8k)*CcDE&?efwlB(@TJkfQlano$*P-B(`ZTQO4}+z<-8+vp^(E!L}%ev z*~*nGPAR?Z_?8n(=O4gUv}I!r&mA521u0$CwLA14>D50PJGahSgX$MsU(r3tOq*iknwPd{p~||UEHlr4 zuq0z+-?~D!no(G((xNS@5SyqP3+-kbC(i4 zSKj%QFW4koIZ;kb+|(iYWMgkr?6@<_zGil9FS|&{<#g7R(GZJRFAC33KBs9Lyuq_8KMk#; zCF?C@*)=&gyAPs96NBpdp?BH77~B_1@pc$dSo-9dU-qL`r>>ukYVJ?GZoS?=YFg>C zT-078#c3?V7-Z(1yOx~R``A!l=auAUcINu@Nh?2*!q=IlHI~-qWSP|2%?4gsx~px(tNn&o-T}pDenu;K94#^;zKs1neZ-1A-?M%sPMq!E5j- zlyGCn0bm3b-hO0?LAD!Xs#d<>4hjX9z-^GTpdLG41>q=WN^QEcVG7)U{7@!8Xitm{ zQc8TV!#$|-J4wBQfNg13bF&}da+2s6yX;UF(2o;%?>CVAEY8JHi9vZPa2@!Hx zQJJbKwyMSPn&5;%<-ld1k^h6qDax1QZB&R#sLrPl&tAb}96L^5_-UD$%;FW5%6i7f zAg9-hS5(fj&niEmhEIRBC0$C??hy+yoer+SIoX<7rHyI^LP8H(ZtM|%2!aN)r=(7T z!w8gl#Q6_4ZbZ(6VfB|3%L_{d;2-#QVXNw)!38hYVFbWSVsCP6FN;9G=-MKhc(wMw zN5nb_a^-Jge|P+p%SuuYBrY+>r;IzO*>JgM?!f!Hr}*~6hxKqPNic0)dmQwh-y$uL zniYL0CZk`dgqY_xu<1n&&uq>7*>^+f=xnz9w9}#NOFg*{)K6cKG?tJnl#OAJUnQn- z;iHthr&@SZa0Y9wjLI&4woN@L1385p4NeA4e#(=hYFs01To${#x06DqRo-t~O|tj9 zM~cDh(fuAj)_5(9iayI|x{pUj(QALr#`Z8~VeNK+fea|4P@{X!A2Z`k{M$Vm{=EQ{GE7)LXYLgIVGNR?tRt@WaRWhuKbGu6eShqvrW z4cxzKgV+``!)n#F2OB;|9y1DySm-EsdOc-yT7QIkqNBI%PQ0h>k%F?yyOULWV^489 zucAXoT$LFaaNkw8vOJw`VCLWI8;P&js$TJLo3t8~pH3LiW?%jDU0$}u>7IjXMgMe# zx@Rgj&3~hriJc8yi0+>}A#T~Hm!g_i;_knz-`27}G-Sjgc4VZE+0tTO+^VnWy|B_z z#;vrQ{j6_v-kQ1+5nS@v&y7=9F-|8(hViU%RDGhKNW{kjOrQEMDbCXQ1iXKnPj0%* z=Aq_8cBA@UTxzA5{u1N3?{*kbLsid;oDp_vvGU|9FmNDCU(lW1BN2md*-WN(e&M5slk@!OXn=OBsOx{+e!(p3-wG~Pw znD)$nv3>0%aJq7dv3^CSX3uA<`Ldk}nLp_;fq#>eUz$@WD9wCaUYR}5yI=d$*WkAi zRin=*PQ{Ncl=ySnPZjNBs2S=|v}0fA5~rmuIMT1#xSUV#RISLeg&X@RBNC!{Z!*M& ztSt2geujSMnPFRM-}~x#^bL;PA5^IZ0(e%OzCgBxH}zv~Phrt(d(+i?KSowc3|+H& zWwgAkHNE;?q=U^+k@fd4Zu|5M_!xK*oc1Z9R z%myjovFd?!K_RC4m`hcD&=5m97g;EvZ6(;O!Mtks{{3%bWOx3Z2P|>!os63C;L|X# z@gQ%T_2tpDfVMAuy0r(@gd{owySKz^=?$Lu(~x^0FMCblvl)l9yhw8Gkq>4udDX44 z7xkvRSSIYYh^H^9oenlN4zJz(%$8-jw;fYmvi#tzc2CdK=TC1v4wDE@sE{ry*v-l6 zo@GaSKI}l_ijeA2Rv+O}HoL?%dt_v@fXTU57t|WwlSx0Po=$z)Gse?OPN%!;$xF{t z2a&N4iP4<$k^I~4NuT>%lta5tPr zdeB5px#^bF6aQmjJb*Vv zUUPaUcgm}84_mUbvLq9k*Pox7SU`<~0Ly>>qs?A%LFdmnwyU=`#3I-|$kZ`2rYa_I znwGvLSwZJh3P;uW(0xIzFM%gWi0@zQd&RC(oI8KlsTU$$v_j>lTk0Rn?2kVi5!qs# zVsz4&IpFuDi3c;Omn?iYc2QqFwAF|s?Bc>@yDdY_ZxbH>IWr7XF*;pOPk%pK{ZMKK z6r7-*Q3Jo;bL7ags;Wz|0wlQ!wkfrJgO}657h{?dmn4rozA#`x@Z){YR6~I^_~mbu zm3rm7WA@vp7#Y|4Q>j;%R^soKJR4pLS|23(K9KQsYr#G!g1rdkLwvjr81{(4@hzk~ zt%2!2fr*Wb>aWSIFq>GU82=d=H2hEmMI6PICTv>3{!6Hnja;~tsy6pXy6iw$^gK)$ zjqC)H1R$Z=uw{!l{2Rj3jsJYA_5jV=LGLRm-?!nzb#@xrax+dh$iyM+L8@1mpQN}tvGU&bJnq?d}i(}u7)NA;M$EJc=O^diSKd%7f zJqd}VR76KtR}QgeTg{c01>npEZ8s#DEM&I~D6H6JLR0U49B=Gx4%i-m(1_Uw!J1+) z)j)3a>f;(3y9kvN@B@5*!>M;ih}_P6dC|lVu;xpL<_|Hv3!zV9!mml=RVsC_?tG=_Q;qgXQw3&cNt3J(YTjmp5a<8MN#;q9Tp=c0A< z@}EysmSy_MXwM2hwHF>aEU;`!?!5zS5g_`po&Fw5{xUN&V~cKqy*_c!CZ{1h=7?d3 zXD^nn3G0<52e9g@G(m66sjc+}$9gMA>eJcfAEKkP>+5}BGbBi_8vnq4TDIi_lfm?e zwQHZqryEOTClgxjTT)Gr3{Ee&R*l@J5KN6fTq4FPo0?KXt5OWG?8;1?Al-ly*p<+z zdLp2d)KQ>6h+&y6AvQDMAL*`a0xJnlJI>{oJ+8vVy%vMIS26)2h!Hq~UIhMI2mJ2> zM2P|8&~@++MQqoAZvX9B{hF-Rl&v&M4Xw|<88dfE%q5@auBeu9KAPZ)+6z!2672S?E*5yPteh%_leo)jB0fp93| ztNRoRTK0a1^$`hwM3(~w@_HCw4GCRsOrS+@$$x)+`JVQR1qZ+Q_${Ow_83rlZ={-I z8)ws|T{DXdGgl*eBbAiv8rip&=M>`FCvLwaNUKa}WKHJ$GzXDt<2(vC2Vwbw-=b`> z>ZCW98d2fn)oj?bNenbM0l!C}022&6CT8jJs9Iuk$iSn29KwC0qrHt~rPWJ36?Ft= zYTqW#hPPHU9gVfdcnS05po{NtYw>#`BIgChv1r9UfmLhpC2$FrH=n1>3!eK(H{eFP z2JGC+4dXHIRNO9s%JaR*w!a7L!I+)xJ2U4jA8vXcu#nm}QS@Va&fK$fVJ?+8W8lCb z)Sxl+5AbC~`koL~7O{~c!&2<3Pq07<3pT|8wf0Xl{qf^ToNkzN3sPX*0HBgM=)ukp zWF7?N#8~w!Zr`zk{)qNJv=a8#=Rxny5ox=!d+**ipZky&fp|?1{9g>%DRA90W{0#{ z19M)4YWc`(1RS#nxxAf^k8l4202^;Fy_d|}|L69abzk#+<0%KOt$a)Zj|03{i=`}! z#yc}#bzNveUl6Y@`I3?ogz@drosg?5#0bJfow?~T5)yR1q_NQ#;_BxZ^%865Uid#5 zAI5bpw`hp+ix?~kSX4Tb86;v{heyyr1@6yI2Bi)c0@1_?_^qXIaBv_(({OM1`!8Jv z$L6HVtqPaZCsRU39DLUrx*9o@t%~m$R!Ao+99=31ojl)LQ(mUBn;E(mZXglR^P&j-gl$8*koq}jkB$lm#K64PZj61(zTKobr98<@D+81{Ls#Nj z^y?aWdqymUXAi4dUanXVeFe+Cp>#2Fg_bbb3=QTT5d8RVFHCFMtT@4AfGW6hkhn6S zpULT^1snY9r@IQrsPRhAExyu$6S$dQ9yMF)4CSS7WFq3mz!R*wblEecAipcGfTK9F zdD1~yy?GU@vzY&n!l`_%&&&ccqcWlT0)f);`jLw`dM1Tt{v1>r_ zeMjbiY{Q*`D`u!@KKos3|LJ&3tnvA;A(q?i7fMx+{L;lN|CM7?l+IS!AYuFrv{^82 z!D$Db5>dl8L%V^CBa6)^g7Swxy-V_S_2$d=GNV)b8F&qJt@?*^EO<=o6Hyhrjf z2PFSgoB#Fi?kS#3aQ^#1e!cWlp4XoToUbId50ngs{lCi0cK`LjwNN8qOYx~-FMY){ z4HT~SfdP^%P*XE3L-pq%AU}PQ)t&Q~zE|-eoDdNbhT77=N*K!o8Y*|lNCuiyr{Ebz zj5aq1|EYM$Pq^2+v&f$Ozv9^on{nm-g8>LJ2;rESfJHBY-?FgxVFe08*_q3iFVmi4 zUtFn+KjXDP{^C4s3+8G2$v{0yE%S1{Tj!u#07i=!Hg}!-m-T=A2QL=+wr_(W#~>&& zjKdDbmj+9DPks*#xC{aVu6ZP&j;|H}dqS+ZFmraWGiOBT4;P?cypba4I$-e4xFk;1 z#QMO$)J!#LV@aXkCk)b?^xLk)OKWj(o9z1Bzw6h-{Hrj@Dld<&5&-7}M;$cizbK8l zg4IPHhlb1gr7TW8v$NJ+tsKLi@@~n6RT@i+h*uhZ~Wr*Xr^eTh|u;Q z=YvID*0Z=TdIuG;ZL)K-7K}vU%k2qCZb=t1aNXW1ell=4?HrwZs%gJGIjX! zyH}4~=?rXhwo+!d%Vj+jG2!|7VV{VYsoD16G25WlXSv2k!5<~%+Br|&V_y^yTv+{o z(|edmQd$EqzWWKv6fbZ5zK?W&*Z2Nfe205NneG!-k?n1sr&5+p9!{S)y0+}QO7Pxm zpJOeLUOOW;*!`&PO4uRxRb^4qTG}`C1b7oZm7FN~w{_(5um`m1yGirv&8D_Lj{2G!It?zTbzr)dRU_$=?)Jb(^o_;rgDuXEoZF9X zjyTNXCt-ChM30-sU&T}N!O#*>B@u?MoUC>3`L%33!wKCb+5R>H@1o=7&Gwzh>+?O5 zpU>~LP3)Sm(iYm0&nwuu*mfS22ymnMvDs#))@VWrmb|K-qD*uoSzVHLVP0!5A ze)OE%!)&BGZqehSYO*~&T&Jab&1$t{yG|MWsTKI;$6E-ZgdE&_l5PYmT-IH?ZX$pM zpd#^vg|{?Pp&=KNb^*kI78h!p^N4o~cV~xcrA=vD^O>3y1l|ng?b}zaF*hWnSvh)D#Ix*fYRsW+ z+4bj2@SEt@_vUxYp1v<*6u&CIFm3MO&GEC7M_h{S_ml~JE5vetI44B!EKmER&LJ}E zl2cy2y}whP=ZsLqeJL(QH^Dl_h1busL6SCKvqrmDdxjDTzJOfgPm4OObfLgxAVWbWz4wf>cZU)TkZLH0qcq6rPxK?;*oJ=%Oq3FHa`^F zbN8SDp5Thj#-2?*OK4fFuY1W!fwjW=+DdQ|#d{YHX>rI#!yQwBB_|N}?ZdN)oH6;ak zJy&rw0@Q`rLc>UDoQCf2L4WVX^`DZvxt2}(s)Y6R7N@vhkTVgCZPQ&UZ*=~F-&9!T z?ezFnbRXKxWEgCfEUK1+(&#*=u>gPF5j_Ho?_PV2H zTJv@<_^sJ@&y1lJ6p-2V+v+%vaa`Ew7OU~SyeZ9CZny$>#Nv_3%X zBx969Q(tLRz&mVty1pXCL_W#GopP{f>?)ONX*Y-II_0WEJsYeWcmZp^-6QH>+9Phl zhchlHV%63g@oX!9^ckLhbz&Dsrj0HyZ>=*;y_Cv>25iP;wFGG%k2`@{zJ#&J;-#^2)H_fPAY>rQ! z-M_c}qyNZ@jqdLiDn^yhRB|WCUTAM>5zleo@zS+=^>}QTc3MK!(bQ!P+!+>QoO}N% z;(ne`z2)R3uEo{x_c%aZqF8s!XuCi6Gl6reX^9qeedol>S+X88?LYJ#MdV zi?WZ=r8dv}HCxsUy|Mm4WyQySkY($zlc=s&UB-o*{7%K{L)W;NR+@b-Rm>@UyOg0e z*~dOUUcNl~>5hPb(u2IE-Hr*P`i5n-cg@*_Ym_1cJc^uXlDPL3`=|x+7SHZVS8Sj= z`3Ed*HWXNkhu-f0|ETInJm=!eVV4MF`!MPMZR7%vVMXyT2ANH7UP?8cywBRQcM3lm z{3PdF3M95V>+Kbentfw*+98RV>x=y%f_ix_I7zXquUN6pinoQ=(=&%($4EW8tq;kR$!B)wf5biRIcLEc;- z(8G_K{|k<-{IItU2Bz)M#J{_gih2#^7{YS*!vC?i7eb}oMfx7}>07*8&*9>L`tAud z!NWk%wqo%Q?Iw2dgi3qtSWe<6UwCw(DDQ-6eCDRFtH|@b=M>0cU?1gx~yLi`rY>>LwSk8{0T_ag9~${iPt;)T6lMed(Psgkmd$b<;(Vi{VLX)da8lT{o}64GzDLnP=FuYSt*jrWGk*ZY;FIgX7Fchl@V#^IM{Ub>#c$gyywqRmXGv1jOF zQM`*lyKIy~@0iPQ4^82th3h{~=ZTv_HA>kpG!cihbh~c7(mC`AEJ8vrVR$*O&hj zJ|rw`Mn)sEDbXBYcHgSQwn7YD*Vsr!NCR-4Ee2QrLRd8!?H~bRezJp1WI;a$KV&T| z7i0zhS3E3H3g{^v=q{)qBrI(I|D|D_GJ+yw&@13=2`bd zm$6c*S20dbb@6VWQs)4tRh~~?Xf$_4N-2}_tN9`emM(_Jv#leYM!dM5R$W(|pS2M- zc~p8TC+~}1i-rDm#h77>p@2GOXWKojL)W9SDQQg658=w3TU+89_wp=tTwiWX!9q@U zO=*=xt8Li^(2m?BCf2)qIA1tyu35lvtC!}>fT-4rl`A?!-S3uvUv1F5|K3>amX7we z6~}A6KlxNFtP~fnTj$*4#r3%*_Yupc5O2YRO^18FjWuX*%&fbaE?quj{J$UuvTntj zgZ3c)o{r7YzxfU(M?;G*Pp(yVe|T}$905zX6MyyhF8$6O;R7X zRoSXA+%z*M)T7+clRW5a#J}36C#U@E4YRm2BkoqmZhLpjutc>{Cq8hs$xt$t&k%9n z=f>d6BN~4tea|CAQ{!) zg71uQj7tUl+WG5Yv+@c5`0V?Ma{#xxh>R#a{2|U=BKGQ<;e|UB0C9{imt?{H?-BeT zV%$bZC=~Dshu8*fu4ZUGJ-LgKtyqyWbAn7ARIuT z`Wh_CG06pCa(p1a00y*Yk`n+Bsya^T6c@A4r;rdOsA(~8*?8s36#!qPtYvY+jRr}F zHfmjBCV?~9Zalp1V?4lJX!8jt66_-hUEO_{NlO0QN^oD`7VdDX0)SPYYR!e#ng3fT z=*;k%0Az6S^5uUF4GrT@JWq{KO@DzIn*5Bcx3L0XRY{nPfd(X3(cU_{%J|ALr}}xO zsDbAtvQKwOzK9KAr4pfcvvzW#2t20M&0-Dueq_GBmHkU!xq;=&`R_aBAMvfZJi1lD zjTRBEy>SB9y992hSx0s@&rEG`R7u+4?Yu=ObC}nz%;T75*pWS>QsF3Zss>nqrvNU7yoOuN!*&k6dw z`}9m6IouFP8*xses)qJ}-yy$`rtbf!q%pZkpQgN14(rNKu6%igL9E7GDd~#x=a1i_ zJY%{9I0e#60}n==GpeW)NE{|@UI^8h!uWVW{t@%XdWU7_CZ^Pc7)=$d58}@VcQVZ` zuko*6xbmv=v1ppFkCAsw$EW?J2VGfa7B+tSzDa#}_SC@usfh$fZHHu+5e`Sq!S5Xh z9xs%4PQ5G1Z@Eprh=%Y?P=u~)ySbm&r=E+)hUzDjr>%tq()|aYwM!2;8IQ;+Eoil> zoa!ztc^mLSV=m4`JF+Q!nbWKBp3smf{&*)|pCdY}AoSP+tWaaPvw*m4plV{DO0nvf zgp{3^w`u@GxtH)NhcEyFQ=Htg6@^7bvtzZVX0nJ-S=L)%SKp8h)h~gT@YRxCMZ?LOK%btq?*%Qd}-Go zi0WQy!7n_k{V*LPqZ9}qK=$7&Ob?d8shy~*aZLoheR~h6e^+=8heG6fW@Z#?H(;B8 zGOCuu{}J2#;90_=gn$+@cnUBxB8_%$ILOIKybVE}qfl<#xIx5h5cy&A9myAjPmWHQ z9OfYIy@SoEi7~yCCLzn7aOmjj!Z1i5TQdGT%T(f-Ry&V`GRs$kG+*BqU!uHK5~%Dq zrAqQ~xXnFO5Jz7{AMi>>PU)^u-j-9Jr6qzd`ZRGl2en>m3RBPw(X3Q9ej@KZ?iPKu z>5i=T!4F*h%lwq4+WHme>=`!Il?HTDr=@k z2dNGA+*o%}-hMuKQ2lj+iTXH4RP{UN+=n*0J6reVY}zu(`an|ZUVY+iKjw&KZIaoR z?dt}0%hICqQxv1;#VCcIUNwcrk#UYlDM#!K1a=KCcq-9YJ+x0iQx|2L5> zvqrR(@fyB|KZNfk-OfD3c0K7t%Zu(FRkt6)D>z&q1U6S12kaGQ4fEPach)CBa;)FU zxJ*WRM9q7io*S}!2;_0<5WgZ)DI(pJeG0=9&%yi@$A94$VvV-TpbYHiYdwkxEdkv9QZOS z?5gJo;b{3H_BIyqJsEld;)U5w_NPx`#~LR=vOozz?Do%hmj)W@>gvXGXh}as$TV~z zIbDg)Pv9XBxL1t?8DHe1pc3t?X@{s=E&O{XFP)*1m@I zz}&rn$2-)&RXTi#j|q7b{_u!#^^J-YpP|5v*k)5Lix^pVi=ggU$-dp~J~xZ!h8Ff8 zNGb99l-d6E+sROQIwe=N;-Ysp^O?%(=W|s$!e@_-7_MEy*-#VK-!yAPSC%RrsNjVp`64w{a8ZFzuX4DryIUOpb2EPK<0Hqo z6K3)!gD>J8N?~5_{2=hB-;%GAAGc)5d>%-(9M% zaR_b`;MiE>KIf!l7Hv&wv&p^kzE50wNf*cRT2Ij)x~4-bR`45CJ&zH*H9M7PqB+%? zaQ16|?;|HyzEgv9KD0NS_Pw4s&QhY%p*%EOHd*}5tH@++skU;mvKC`*cV228;JQWP&!bX`}|m@0MNbE!td=O4!K=LOeo#6WBPx!8#G&GZpYoNpKyA zDunXUl>+@W#!JO7c%_b6#{>puwS^a9);59`+yYH#V0&6Vegs2RYnrVE1h)F?zd1kX ze?2q$9Wf~$7bj5Dd|pVs}D?8+wsqS6)6w!)hKNnzbp-u zT%VsCvD?N{y^8$@l!_vS39Y-nf+qDzpDFwi#Hw#IOv?Q>4r)!WB^6F1WqR7G*p*EtBD zHA$^_vBQ_`AV;urn9kUvqbCKHvm29r@>IOls+vgb?==x|MzP_CL3Z zk1_-*TPIlTtQ%z0bx6H*Of<1=ob%a5N8ik^=_e5@SefPz3x}^H}%5WokEOkC+DLWYm_|`x`tVlz;%o?D}o&R2BL?d@nm_}fjQQ!C-nz6hIQ;nmKR_WT# zKBgRPXyESXie7e((XF*pW6eVCD7%hyd5RBB#6F?Bc}X|HCu(65kUVok>*qDcN#3Vk zoBa2Te&SzoWN5Zo(1m79uk5?l_3`oGVArqpN?|R}OA3lDk}?|BwPw0}9jd*#ci+PJ zEirxXoh=RqYi?GGcZ?)%HKU(9pyWQWtxDsnBgN}sKyFQEdfYbAZBaw7CQ}l%+nv4p zgs8Rr?Pki79y8`Ao>7|A91&>Bqur}L-iNz7WWJV9bmF5&PiVrF)04SW*|TRKW*2c= z!iw=WX3~&hd3bpIM4Zch1CqciI{TA~SRmn@!9!rCS|@NE>&1Sggr9Imz#a+>Wrb0p z4)ibkyaV`6nTX1SxL8e2{w(B(I;5TdMBs4;Zi%r5mOr56oh0AhwgJEC(&n!4T?Eh6 zI2y;gQf;A>y0$bpxV<^j!LK*^!&N)cR5>n|oYRj_iT3saZz40>@87TV8}l*hDqsoR*Q))#e?&@w8W^O}aaS zIBGTfbnT5ghTD&rC@0;L=H}g!FflR3p5)*vRU6f&9Tr(DpQ5mOBw5ksc1?-UA=&DF zVc_j2>S zbcU&ks1g1c$>btgZq3VT2b(_lF$mm(W2Uf#SaPcxau-zhgYL0 zN=l~B(0+v|7La-p5)!IS7MyUVVuE@ZiJZPSjbt}!+sv<5C0GsYo~6kkT^VH;&y|kgKMw4@I!pziTw$ zoI`92V9Hs)QziIfd5AP)P#4frNp!|&l_U6sk%WwNhJSv7X!xsq`RuT336Zs)yeCRx zal&4u{=CZ_Xz|#NMnZJfF}OU=%7oJPy#ZH-Ax}y1k9m{I=JiYyLw}Fd zlR{TF{zlZ>H`IJv;^#g6)}|@%KlA^g57i6KtcxG{PuWGj!Q;(~@WSj8l*kCc`WHob zc06!O2H4<^oeqA+sle<(C^jgWC>WL*qzXUy!v**Uqx8f4`~)*c{h*)}g5L+*tdmm? zfb$3j2~>c+_JOM_EW=k*K&Qw>@FRHPx9w0PB!Puo#1=9@XmL1YCm*(J2G30!?}DBbO#~rE;>@+$qdr zX51L}9B7D~FqNTc`{=Toz$NhKCiWd*@op2c`7k`iiG~s#38;_lKvK@P5;51RRfM!& zYVmWw5^S|WaU%};uTppaecI)Ldnts%Db#IlVR8S#0|dQ{z;=ec)@go9kNC@hwP9Y8 zmnX3_Yh6C!x8iC;Cl-@F+MdO4yhNPHEjxGaH!u#m+kFsOb{M`(4z!hY})BLFoR#|TGW zdUqW&d6N_(1;kPQ0UAKXyac^6CZdN2v)jW6KW)tp0S`E~1kV91&{3wK-Ik;z!K@#? znZ&aWL>Yi=Js|P{LC$?MMLzcA&#y>ZcnGsqD0WLQNWoQp*uc@y0uu^Y6Aldz@7}r7 znWRXAl#TFjwZ6%->6y7W#)rv#Ay{fF+>$6+pqZg`mpO{>i$iO6EXs*YeI9N>P8Lgjxuh1f8`;&n~NY9=qNJ>rUDfm?^>q7&9DJv?qfq?KDb^7oQi?{sQ$3o&V- zp@26Nw3(j)eucfEp{A~YAJpB}XqvYG@8AgrD^F%0bt_a#m>IER{)Fks{#teg^)erB ziGoI~O=7Ttw&i`*Cag86G8YXh_lKR}fd4+RF~-9tDr19VnGtzMx0%YatWM~fP~fo%7FRFrAT-w=zFKB!Yf8HGpqbt5=6h27g58;hW)!A`m~( zI~l|qKs*?T>%<=F_O!7YB`n^HEAPo8%#B#^7YfM9hR){WQ<676(=D9KEVIw^Z z`Fk;)6k5Az*fsQ=9gdW2Z1(|?LvSo_?2i)$>-1m9DUKd}4r;SVr)rC6S63Ho&Y$Cz zsq5;B<2oe__vgq!0xcB|82{odsE9XGz<3M%ZVk8}?W=~Pl;PhN9iClL(P`pAUw!oa zccHSx>L30QTUG(9SAmH0K*34EJ4jst$YH^FG6%}9_a!$#%O+;jXbK+IY~8f!c2LkB zP^bY5`82K5f}y}~hXD^SuV)?}pYb`?3={Pxip8!8U_6KQzw9LQNh>Qpkn6?_1^4Ws z%4$1Jg9q0&-TdnvgWnlgGf&UhI(G!hH^f1PEmP%i7|Pj-xUuDEi~y^?f5VKlcziBU zmDfaC4>ZxkwG>vir*I}vaI$tGj8;D3A`@(zp`^l!ruvEE!v_FrECQe?#4{3^Ab2cD zbGvZy;zvAE3X=8+`g%%2f`D&u{ZS1z1@3iyu!ZFHS#;#m`a4b=5Fs86>O6!LBfN0H zlh_nub_PEL*q^*RA&e?K;xJ_v1fL74s`Fn|=l|}&JNherpMl^xy%5P0g5s)#6 z6BxfYqM*CP_X_ylusG@Yv`+L5J_fFHcBOr_y4ZOjXm|AXO2QQrUx=87x3|CUTnX>B zs9F%WiMt{I0%}V#8V_)szP}g`d<8y4XN-M!V`X3{1K|$IafT;j7Z7Q_d=w5>*QXs| zXFoC-<>n@x-VoU&p9D%%VLUdvLcK{+!qnkzEc?rfikcc4ONbp`5q@cg_kQ`l@m2c? zS0pK7kM)E_WxTg?)y6S~Q7En+xurqQF~-AwBbN+7EhqM%xM`7Ma|&kCg4P4e*b~|x zJ%0SFI?~ZWnN5H6+D>Mt+31Uo5Ec;UQItZI0|yRxjQAPJDk>IY-+1(=0AHa$X1*6< z5b7oB&JdKaargp3e_z!Fq;WJlW$OWiS6&@1H2+)6-D~Ce4S>G-zXPDpeSI01ok0Jr zUmLjt2Bz~WG|q!f7n4EWmRsulThr1`JTEGbj@hMW7nV)~f7e8g8Igpfr3rzJPU5sD zag9o|Gcz)}0;-GISl`g_0_QRjfUuOI-1|8?stwD+;sDWr4SU4*pOA8fc7q#|0u*@g z1h{bPj%S$ApzaSlOYD~N@7Kw(fF7TE?e@#Tvk-&47oL%ikjJa;P2ZS&X`6ruK=mOMg*OwB90D2`1tQhhCb6dotOw zZwnx15@wC(ufQhjt=>9VjSK{FEO3a5yO2rO7VNEhI>xJZVP{81rJ6@HjvpE#L;5Z zV4nKS$LC7?5=Gpp*ji9Ppm|sW)UuU+eI#UZB@6ZirKP#e&FSYF!~W|MGGSU+sN_IJ zJ-Z~{xX}xDP**4p8t~F#Ll_+w2S@NbD4tlhS$zGt9AZ@HF{G&`X0wS`+swpZ4}dL_ zzIOKcp%W`ZYINcJOSnS~%(AyKAul13NtZ9LxDq6oJ)?8o%B72Z zB>SJ)G*G~}q1065xABrv7Id*DEP1l2j8k)6NefToK!10~2lOUbSGT)N7*D z%5Z4H)BD{s)#TIWT{0(5+#LpO{KVAx`qoh!FbQw|p3%=b&|%$`hT} zIA03W0!S_%q2w@5|G%QGt0UxBlaLx*Jd*yu0auP57NhtEcD<{JRd|UffDB5y+kJ?Z z#kanT8zy)F03u22NI6GK5JfXj(?8f>-+y?@lfqAbb+tn@m*nMhi5vzu?T&@@jG7uYDP?bB za9;_{{D`IINF=VTSWL*xto9-y0UzSx2uQduJ{q%bNIy8xDB>w%J8(lX;a(`ua7dkk zRS^mn0Cl68!ZfUc&WBYOK1TTWGm!I-V#<%?n)TS$aDdsw%xr4jsAfO3oY*;+mv4Y; z9*lwu?yTJOusf=67Qx%gP(NZA`7Z!3++}hJry>pPCn`Cj(lkE^%;46|L2@ZfI`-G1 z{1-!PWA=!KiOD$|o1~53GLnVnW4&Q(v&Wu8ACTh1(I%rW>{DsTP;NfGTYYn zpZQpu8JissoW~|HOsTEIe!g zFc0{yTfaxiPBP!{3h}@h->kz(2fLGsl{HkL>?vF$Q7hEd)$LbW*=h_KO8nyzOu0=NV82k$@nAlKAlY$?nh0~Ty|4U0F6VBq_A)MW1c!sbH zkqd*GQZAAX`Mj`JqQFV%A@(2;3hFUP;>?iLfpY=fl@MSA+!Yk0E5l1U2w@j^!%MCK zjQ|x7j5vh|i)Sxio&`uDBRUsnE)CZ{^zr~-P@7$7$aEJEI{a2Gta{_tt$8@8u31?@ zhqu{HTx)n1!mnFsgdl6PK{J4&<&dr8*$ZII;I>3jeEr*(o3NH=Q@T{klabP-re&Do z6kk2q+p8ElR69B~BOKO0!uIEN-pjkY%$fL+=io5TCW9R|k@V zhSwX%BwH<%ob+%{BD9nl5T6Q@DCpijDAzFNhb_fUM5X}6gL6(PKn#>a56_I?6#N9r z3$cfzr>8GQ@KBkJJ2Je7I2 zJ~Z5g1o2&Z2$sJrrdZ(gLNUv`Rs^hr(AnSvyR}4%yl<6jcOfAf)knz*AcRDj}-Fg?V zRY2wRhmpC7R)jy?M-NnqLYv?%*uY|z$%?qX2)qgnd4{B7ab2+e7yYDRi$s@5C!MDX zd&6k#KkqKPKN~e;)ajS9EbCnz6@J6fGt8!xHc^e$Q-S5zGIktk?aYw&IpTsfL9}jQ z7sLIA7S~JUGgH_wqpHenQ*?9`L2U$2f-Xgv7Jk6MF4^Jj3d%Yrq375Qnp#?J-LdiU zUC4I{AFlZ12l58c9{%@Nm6QRYF#&Tx(9&CFDRd7!JXM_nX!}c0;;dPE1M{SPp1Aq; za&k)BnxKUMkMYR~rvYtDmPuaDEap4ZVVZL6bl?E&^K@E2&;_6*GrBB*t(6N15TP-W zgC6GvArRmXUDG>-Xcd%XuuxNIeNFo%%4A7^2MF=zK9$?%DU*v-cDzHCb zw0F?i;=8?$i>nHpfAVB~%Z)){qUr&72m;9ajc=j`Mr=dT@JDOrJW$j_Pp|4a;Kq;;OSjinsowh--{wRmT$kDKu6INm7%sj!Z0{LPr1&-SnMn#Ua2!1yKjP|EX_`K zJSNONV$cnqC+I6-oqdSf!(yhcMN%74(SxN;&N5=NnXZZ|m9pr%6hJz0^(+cnl84>G z02ouP2P22VD?zA(xEwR|a9PoZ7@f#1jU*vwEG%*>DYz5;5G!gY9W=E&9(kE!HJ(8wu<{Q=QK!Yd}s7aqE`*7cNm40@x)Z9^Op9uk;( zAIr<10@~u3LW(-Uwa_}YVX@$+R>tbdP!M4u19TYVJ^~8?j7DEr0MOKP_&2 zOuVSGz+56eMd@D4hDFZElH`V z3cgm>l#qR%jS1)&k|Y(wM5oGoWgPKZJwF!JhH;u|RC>&>o-dB)uHQaCQMOsRnCf_Q zg^!67BVi7`ye|_{5?DI9#-Xs&aS2X9Dj0;|ZU3B@=&4pvV-Xe2loU}=P(V}trEiQ{ z$ZMgZJvy@E9F)WBl>=GBZWd(-VdcJv~11+JgrVO8R5Z1UCaR zLFcD&J`cLtDS%}81qCE%8Ka|WgSCc0x(m@RB6{veOS&(Fv%_#*VeDi(c#!WhonrFR z^x;fCzA?eZWb?*0!2sW1DB8LICyG{--jdmnt5(?)6*B1kANr<;o>N=aO#*~VPVpMm z90F8?YnuXn=J`^mNNEEDHr%_i2_p%Ki6r|Aeeq#~ioLhf&I?le|aGyJp8$UB|A{Gsv&Y$*5`<$R+*`EE7w*o)l;i$kS+-Tc$6?eiy}a5i7f`e8 z%Cjw;0s;-+dfQTMZEUW=3A?bc@V$zQP1Ge_I;|A%ofciI#I<5#V?8O+htZ=F+ZX7I z+#AvyX85qs_JGX~mOKT^p9bgTnx$FzJS1)r16V-u7~Zfs!Dilb6rWc9hKX4h`&8kG z@~d{Cx?cG*M)V}-0PJDbiJ>P$oDw76Bw)KN#4CRUwt4oT!aK{p8mu=0=z-(31hCs# zPi{LSUuG@9=C_iQj{$&Sh2j9WBI3#9dmLLhv?j_f#k--RRKw%PU5FTAl7x3X$4Oer zT!A#B?_n_h#P%3 zOYT|(!E0;p_44wbwQu-$L0yo8@CrJZj5h^RZyF|^g zMS@!H4;SFTV`fp450=I(7&T$>Aj(?FuVDv(Y=Q)h=T(7&fgOfkMNVGRyM_I0R!jsr z6<_!9FY#zy9TWNg@%0vPQLbJ0@Yo*v=)nNN!U7eQQY367Rk~40NlB$qOb}2~q*R&# z1_Y!e3`9UcVn9g+0cq(R_^%7}Jn#Gb-tU{=^PJ!~%suygUHjU5?X}hpP|I}>bdj;1 zsaROSw}mewE(Xos3pz~CrK7Y!%0R50&%`P3G}SNat#}&i1%#dDq}2nq3+X11@tk;m z)RB4v0G=4-R*HFhlC^igfMJZxjkA5(1}3J*Xf~2LJTfyJckbSOiKqz_>?8(J`VJNC z_u_N{RmLX;Cu=f%b0Hp&9(q&7Z3#TSqb`jL-G;q5Akc8gVT`C^UR;F1V2za{j$A}i zxJ2GTi7q zL<3hu*-GXDTC{yS0qP18)4RRZLg*!v7KCMQeE=3MM-3}0EAit-q(F9e$bRe^I@M|s zukon_%|em!9%b6k*v2LKKz|XVcCJy58Jm!De7Z6J{JO9`ujS09ODwrMsQw5???+qQh*f7K?Oy(p}#+}DHJ_`Tlh-fe*U}<2tdegL>tZ9 zv*f|~?Nh!5CKNg`?+{-x@rFZ{4b`})lvNA=$T$_`^hKBEK)1Cxaf!?KuU}ga9l9vC zdgaPipm#eD9)tj43n(aH98yoPB42?LZ!Rl)i(7X zghPG7ftT mOGWI}Gq^}ed>y$AG!2%%^7yXQh;V-wf;(yb5dXX>-=>5}ujqtj5D z6ITdO;wrwS@8C&8Ss67;&B^|QYT35FSbH3*ZNjd|fJl=G$l0#ovWY!kVF!$!EBn!- z8MWgjWo7L+?t#iR|NQxdA`}+@@C{`Z%Ket^?)t3`>1bA<3??jl8xKnXR&WvCeE4wo zE612)adB}Ak_%Mkok=VHY3yq6!=XP%uOqx&-=Y6Ou%jpZhhUfLAY55d@oRW^=gysX z5OS(B9B03E|D>FZ+GN7<>&NUk?)Vw6@C-WY?KLVV!Fr`-?jh8eoO zxqUrs1pa7pD-PInBxB=ZT@$rp%$|#p9_BHyoGHDRN@OC>PN`}*lG_2+!Six`wvItA zEcCrxTm_##tpGLdZm)E(J?e8*5(!0}AH@;va3`uej2iBVMaTd+!Ms6}L}A#|F9b4a zTk=>u0-S-WaUGA$=&2P#v0NAGsdkJhWfc{uLLM$IK;?4{y{M~|6WM}Vrly*GFY#TS z7{iLB?=EbABfE3XT_pE>?ZAM5e}?1mG6VPT%c=Yct>1stPB~fuU41W&_=xG@NK1-Oyye zKJ8WM>X&Z0$2glbR_CQJ;^Mn@zPI6Dy41%Xhheq_>c(JiZEDvEtN0VafX>hm+_!%} zN|AiDe8fZ0Y9PrECZin(H2_@z@dZOEj_da+9Y-Cxbk!=8p9%k7eKXr15s^P=|GT)!x?ht>BZe0E@@_kt9C86KQ~Yo#gfUnQd~8m^`{3<>E_ad@y_CR zly`6+2+n?6&U&@}wON)%k8tUr)0LV{(%k};pBvKkcApQ?hA$%2dhyb&7)f?ac;e>{ zz+!{Ww$+Ivy$urJ+yGDbf!YOHU%5b2=!ih(81!_Sa=|3Rh8kg``t--Zdg^Ng`l?X+ zG~Q@gX(`aK!0^<+UuE^ZIgR{((UE++kdgq9(-_Zw{W=IS@aN6N8`u_SOP9IVpM)f# zdvoYJG%bH0``3CYYNRidzpN$o_F1V-oOnFlTHsl2)D7u3g_CNu`8nz(9SE>EN@RYsf9&8jV(Br|4G zOJ=guYuagX;X!W=)2`|3DjEmW3U8tOCK0hqJ>`jsyzAQMgAxx!J@lSs64Ee&6OT^z z?o~O|0eIMNG14?u#h51)PO047K$x{Y-z(}u4A1IoU$&c_{UCC4tcwd;IjFf0gBxKB zuy<#PPyePsP0DY`r)LiGaZgQ6Ef@UIMgu+Sc)rq-6&o*T9s9HPZ{J7}n-iow1PgFA z@zqdQUpYVTI_lMp$YDToY<@o6<0vuC8|-wx?)bECAVuk`%0QWwQ~uZDn?HJ@#+uz{ zp4D{BQ=0f7Ajk2mtHCdK|E5)WnHl|4udPK|byO&u8+ro9l#hhuBv8~OOTI&jHKXMLG}uKn2Swf7k|J&x1dddmgkJ#zk*SG=wvS)$uo*Og(}wv1w&dE>D4 zw444@#VwL?5;n4JxqPW5O+L@=z#!tjZlg!g7pwIwOE@DF*jx-HR-U=DGqo|amIxA^ zq8Mfc;{(AfQ&wEl8yL9H!S$D1yGDpejUV%fu|VleHj9)^j33*n1z$E?->i3jiLU9x zg$C31a=%$npyFiL3OmB3fKZcA>wsPG0YK(EV(Xl1v+D+;xiTh(qpBrGAkrTYUZhbB z`3i#0M7S`Z@Ksb+5|RO23$XvqetkPZBxOjmf#M*x0C1i-2sz|r1(+BjInxt{s1_jk zGx#<`_#P+%aK^2Ly(*M~1jru;=SY$*G-+H44JCvT>}c=MP@0ht5(}db9<0lcgI>jz zJ+u~$EHu{LfbNALm0?~uVE8mwoY0H0RL?HRL#_hLev`#qYyRc$Hka?1b9?msA7E6x z&1hfKd93>jF}*Xc^m{rD^*2&l1tQ}nmYX_pKh4<@B2!Uco4H2Vfzo5A5Oj0%Kh+O@ zbu74{`^A(qq=Y44aG+}X^)Ov?D+ID2{*=Rj9VB<*&?E88voGt*3~ zw<6t@HMb*5fkTuVu{p!fv-JY$FZykTz|4`gMj%#;WHghj*U{)CGJsTl^^iq zw;5r|i>>^9eUg8e&OFCQeE4K{^S-jnk?TBenEmy%Dy_CC@bFA(jPbN(t*dWFiR!U+ z_(F&lU;7(wx=!t?C+kWI>uOzA=|Jl-7X9z-y5_%Xd`1jyr(KsxC^4qb^B5SFa&j*= z-;tJhC1RKrO1Ac}3bZyr0R*@7nYhaTp@QzdTJc5l@BhZ}yil}NY(<0ff9ah$nB5mc z`vU;F2mlZnomF05j>6L$!we^@MNma!#o6>_kH$)&qTh)a1q276YT>6(a<_6LAJQVn zjpxW)acUV}Sd)(f0>pH6BiBwXy8rNDI4B|7Iyz?4Q#Lq5ijl2JJ%({M>oWFLOf7yVE`IQ>wDciK?m7(@4ibm{sA29+wyk9f^iOAP^!q2yx zbAOsR3!8Al>m=|Ff^OA=t zG_Fk=I(8T>q7!U1=f*CqfuH9pB%leoE-h9}Hu-FI33Dz(kJFMOZ47ry_ew|ou->~~fp^IvLr#d6npXQ&OWhdn zar~jHrd8v4FT*d`^pQp-Bc^A%hG)jt)ztUpko(M~cI|f}3LbIL)LU|uHT8#c|I@H< z8)gsO+L8J9=cxUaA&eApWvx(8ao@W6ifM1H{x=^hwxKOO2YiG7hWW%H%O$822T&)< z)*U-s2rVAn77AEZOP9rF9?r@^OHu;p5t=MS1Bv1qBShCIB&ZdV0i?gar_!#_jO|Ac z=K^2_Z$#>ANNrWhlu9RiU9?Fr|afcRy#R4muG8e_AEbvzRU;Et-~Gv4HYlG z{}D?9lJ)?bnq2Dn^SklT(Lcia1Cjb|{Zv}=#Zj~@fcRrL`pPdYcKlPJ3&su3t}x@Y z{yXi;hSHE#Qx2`?f&?Fi?o^4vc5HN-UcQXh@F6~)dqkS=XR2+Hcx=K}qqd&=Np~t% zm2~^?Ea5*!lA@BXw2N7_jm4jxT`dgbjrLJj3Tu=dpyc)EYRL!0T(eAW?|v*P?pD0# zwKWUHT{U0G+LHTqP0azth2pD?gx}mO+1)G7`79t!%iSTXW?M`bgk~ zZ)AqHyF}}gr`J7RbtsjnYZZU0(za18(N?aFjcikAyo8 zXT0etT<5lVeR)gQ_>G`~Q#myw89D8C^}_9$2^uF-U9)n^G~Z;nj*eDXv~?)j)b-Xn z3QO*i^*t0B5h0&(IQ=<@aq@wD#ueF^&Vzmh`pwjoloXllZ{NO&*MRr5w$*?4uIy}< z-hfGPNq7L)H<%T$PJp}4bNTJUo`x(%;BVZ!WobXe2dQO@lAoU#%a^4c57;Q9hEjx!`;H zx*`#~i`n{FD|=v8a836S)9vo%=1}B{R zCzO$UyWiA7F(tRGZ%tLNt4-#FF9ge*S)&4wmeHxTRK=^ibpMGA4wb{9#@F*!8sjM0ureXI0za z-y1hZgRie(QyLHQ4oJ8LL84FqYab4XRTokThGJvCTKWYvird)R(3*~wJS%WSZn|9p z0Mbq|l)!C+X6J(iBFI>kf(zc@KBhv{;$;b%g+3;d*?zmZ#3wM23elQ+l7n%ZV&J49 z)Gw_dfH72Dg?if75RX3aJAElbEVq)?75z%vZ|XYJY- z;MN9e0?+l2ii!eibpQ-Fi)>H_Ho2d*>ZpkLE6jK!XM0>_Zz>b@{DPAVjsw#N?tTBR znsjm?iYaTTWk6jZW&8<)x2~{sP>;pKC!7F znZE6Lc{*V@tULg@5<1?v$wB_~Nmxia}to0{Ub_|II?=u!P&PugH`y2f&b zE9rWoET;&+`;F}?0lQ0o){XMla3tUXze#)LeUC=9w#P3N+z>VzI?*Ic ze_acp5eTi?@28JtqZ$zz$#LvhU2|qwSQwgeSTNhgD!?F21(XOK$H!P3@d_lc25POU z{*!a7AVGL-9)(h$2PWnQD-^ax+#)o7<(T%AYt{a?Ht**<45&Sv60q$m6*;3)mC+ND z&^dAM(oWWMmF+WQ(t&24zZs(ac5{n#RHd)OoR%ZMDCA` z-_z&?spIzW05HT%ELV5;c%BbJqxE4(vo+8KT()whFA1KX8YBS04WkWw6}uIfXhofA zt~zxLBvfAH>1cD2Sf5cAv&;B|Jaxz`(!|5C-w6qKoJWU@2rZb9`xHIcgqG z)M*X2UGcED4g?i~2aK&AbQ+*@eaBQ+qwAQOk&E=d3Y9)QTX&h7nmT~f(Do97WknV~ zqYDV}O#l27o|t$LHAe}SfeboyY~Vc@6nB}=%tnJey55&;hvW{wbEW3k2~|dPcl+HHRB2bYWwZxJ4k$j+jC} z1v4g7d@Lji#9Q?a5B#XFNGe*`oIkalNqx^nDsw4&sffji4Fk(s8*5XGrfK0hJ%%zh zlZthtQG9(fzP(NkRm0=n;h(}cr>+|C3eG;Ud$sOQ`XRS}%4c>7^-r4?zOJx*G_Kl`<=j#EHD1+!qP z!8Q1#Lv&h7N>3omL&Usd)r2Pq%9bozV^yzh=!G*+t8^WQvvZLpLDfc;tpiD$G{$Ok zot%98>7�D3outa%yt=R)H<|)>VCvyx?ZI+hZ5U>&f_)OzSDn*w&8pwXL{Bp0niXSb3J~gJmU~S%1{Lq1H}l7&lP& zTbNmSxfzylN~mY6d`$J=H0r9&c{NDuDrzdG4^3CKF>)&yG;!m0#n5(B`qUOH{o+Ho z?Q+t#3G6XV`o%N$F(vIgczEPIg+`u`STGF8K_wTC z_3UC(6JmG94e2bzCbM1Hu|3zL(aKYnvKeD^X-}HBE;1X+vD7Kqcon2@gCtMfww*$F0c7IoWMl#Ij)%y-18&0{B zzl_LWIzK;mAr-th^%A-s8WXkH%1KwPE z^ro~ZJC4TvO4vy?w8%xokf4_-&*ombs?0bH zl2UX_m|NT1sn4Drhsk3m5R<^IGQZz={M5F2EAT!L?R;@IwKg})7_+*(d4@a)p(z?9 zaX>D)u7Ce~P0YB(luItsK}ViLNtS9r7YDrr=jKH*u?P6(+C0qiDk`y%8X2Ow%*?I( z5Vkq{z;?G7zXp4?wYN6`+;9VAsIH!#HL$5qfn%AkpC3N>Jhn4P z!`x|C>t4l!+3@tB!Jq4PNLs{$hp>m8{WKt*w$PU5W>ALsq(X(&y?%WAwpCPA6gS`k zG<&jeDnWeMxAyJ7H)}Pyu_niHGszVaa0ivQI@E*)^jKf4)P}0(WId5r=goz9*!<3* zl~BRsSYHFJ3llSQEJy$`xI3uFj9>hm`=b=f$~&`v_#k8+2xGif$mC{U&Wo?qRQ>sl zD+piaK{RnP!! z!6T(jvS0iDb$)#M^@O!KuHNilziKjaI|;i>CH>lSldPUs9K~*~u5xN>_P$|W*z9@@ zFng#?)+5ODjx%S^#=-{TO~KXJadl2+i)nN=%h7_30bX*pV=f3#UcGr9Ly8;s0`0?l zckMch!Ec|@g)OO(kdR1)Mg+2ki@^d`D2^(C|AnDpz{$Z8+c7^O%3}9R?R|S?Ys?&Y zI+F`x<1Dsuo-a{35@WNxfO)b=LTJbC)fQQGC{_cA*3r?~i;Qxy z%kwu^V&XYVL=GH~CZZ;sL~#c&PrPoY7*uUFfN9icF3l~4OWsOk@2#xMu!uqvv9+(y z)Qo}l2M7?{n*1AAEL*W6&OGpESN_Zx!(X_Y@tVCO&#PqO-n%0X%#4>AQ&o-YZ29dY z+a{*{H7o+3`NC7IfBd9{P203K2g+;tGl$Wx_j z@%)$qkT47Zu`H&yrHnP7l)}7ZD%!AnaZ=$4D4srj8_$qk4ABOO7k$H0uKVZyZ~-*! zFDHmtcPj}&L*YAdwR=1lEKpvP7W;?+U>%)s;U(Wai1fck95e2FA$gVMp`(>@jiaa-1_`k>=-h4Sxl$ zrWS(!f&JOfvZB-}Dz zf#D$tMS`OCQJ+VJHw`x{7R_?`9JIg_z6c8mslze^o$?7Rccv{{Xdq9)i9j(n3+EdH zWJQJi-E>n&5)g+=_?z@W80)M}r6NR3w6&w5k>*s0d&HiR znTgvVQUh4%c}sz3m&Ba~C=HD3`*(d58e8 zZSiu{9PAl6q?f4({SA=j6@C0jwoU8z@8t_0^F35$+Z3I`Dmpy)+Vsadwr6#0GsbT9 z_<_}djuczzY>A89eUD@6!^5bfo!?%@GYic8?dCXTVnVy8Afg!6~QaWeSEYJWI_5o3~9eUh+P5>6Dc4>tF`Kd9cs(PQgA@< z@d`&NOpXaO-evOr%7Lr#`e{zHO^%Q6#lzX<9V7NCeJ+mXrFt^uQ7B)Yq2av;r#hzA zpz!xX`Y$IV^8)zkVCVaLY_{jXTb+PcrzI=`5S)s>e$5;-p>1fdEIsEXc+*_I#vt8-nOChh&xLhV?)XS_qo=>4=(`H(8?6l*UFtpTGF5k(NCV_`{2 zybLKDu|HBywB)XJd9xKxgJMh;@m)f%rE{>7S0ilco6ixw~L1kDnekMU>uJV1&G=2=(7Tpv7$v9|iP z6XS7^ILl)F;oRFPh9OK{yxu=IgU?G=G7f6^DV+l=stV7V*R1*U8BtOGFXA;KGvj5< z$UmS@AA*QkMR3MF2BcohS`B62iwA}rga-d5rLKWu-K$S})ms=I%EzB6Iv^{MkgwI^bDj)|J4YH(%K#JBU5fNrM@n!Q83PF|_2J2TJ zY#M=U-<-ioiwrc)$&hK1;+?4lJ;MK}@`|}HC^wfQL<86{M6<{I+ON4-LB~iM}+2)Zs_TQQ&OeXGmg+IES*6C5hX7t$%vLRl?PQdvJ;*`-&x&(673fvTJ zQ&Sp=PY`^PD{}d8A($wQFjh$cYk**YP{b*L>=Gx}hhnr7`8b`y|6>Z3&}@Ya^ePC% z=6I7CJCMw3$PIuP`68a}1z4qFpGP1J{zNV92M_KH6Rk2Q^s{*#OB3YSSXih?gOpi+ zpVuav?>5x&2bw(4X2Kz-_%4K1^`G@-ZuzO}MAy+{EYjr}M+A63DQDwofIEO3RU!Vz zw@VG9Z&AIn?DL@RYh31nRYI!aqL< zd$6p}(eTurMU?1!w;5UaGn?m8t}U>zvXV#66fZrkQ6_lHo_YT`rx@tcw%thuLwK}2 zwHvK;4sNupl;W2UnDXWDssU`6xyw6L@m;F$OnUl}66l(L{NAkw@0VSoP!e7k8ZWx$ zphn{)Wfg^Td=d6A>Jw5v{CRy@Ly(?7uS+|~k@x52?0!wZ6aR9&<(!z87lCzqZS02ZA712=$*rA54EHr@;)@#inEk#g z)ibk|3`xFp^z@2LN=oeaga02yLG-M~B(rcpStOv;zP3qbv+F)Y(yEw~InU#jl1>(Q zZmIe9#)TRW@oY3)UZtUyB3aH$X<#&|52hd=7Kchx1^jJ#KkJce$HAYgO}y*^iQ+aO z+BhZm!1%l~XV$YBMT0aVk56c*rppKeXY5Zznrg_uSW-0SuJxPB$=PdN&%_jgcCMWA zE71CeyT8i9V&hS{ewC_eb_BRt9Wap1ExOov1bLp%BHZzv#s1(qs{wXa6m4UP*|M zDk>1FoI)id>pe19985TTynsud2`JZ>!`F9j(bLh1fjNyq`HM5DR+78Ckze3+mB0Es zUObC2TTIH$Px?F@{QLxpTwhZ>2bBN?KI299J}Sg6>}#pN+pJ6xfk+i|X8v^qJDd=Q znAKp9F4jKF&Y+<+Gi2Eb8#N-Gd94iX=1YA1yqh_W)Q2Y;S?E;sHqHU^gw@q zIi$t=J3*g3g9e6C;^glVRLPChSbRpfuRz|?hAJeuh<3ZJ3x_Vr$U(9q=@`6siW?f# zp$as7t54HRt2cK;XpG0AE44R<=M=M$ZAy1HEU|ep%?}IfCk`<2^XF@5xzh%Zo#SXQ z+UA6O1q^`4<@!d6j$9yW`Vna`DGJUD4f~+3jl(SxV=8Qi)=*&_r7Tr`1_4b*MdhiW zkkBwB2KWsn%|Xqa4TwwC9d|Zpe1=*u9A^(FH+SXh30OC%!G&M~r58QJLJy6 zrG@k2_v=3+ zJ6R1_0rIaV9d4y0Bv{Unkq}Z4(}SPs&5n4bkl}7Aj}4as85)7uy?%WE{v0>m5UZA~ zz&^KS)zfK1rK0GQbV<1VRLx!477K#I*S&(5V7uOc7gfS`fu4G;VSb-l%@jUtY( zL2{69rREfB##oOM8l1}O8%dzj@01U!fl^NIVic6w;!;xb*4D4`t$^qtyEH_Y^x`P} zr~w56TxYbc;t&Wbq3Vt|`Q1R{`Nt0dVisFU1=AKQo!oAKJFXrc3XsrelwyqT5NsQ| zpq%u_60|JdpQhyV&K4C3U2}S1LxXjlF+nHCF;_7Sc4YDJFTmr*&A*EI9KN;31Oz(K z*R3QKB6@uUYrznbROT$$RYG&-gJ`@P`Tc(NOgR{3lw+C(vyr1z1m9?1h+S_vwkZw2 z5T+CgSaVM?2^-EF#tB5@1mU9s@)@siIAzOYLHM#~PxfXJzz*k)9U}i#$YGu$si!Bg z9kZ51i$9^j==X-7IMm&k^VL#9J2%o}0q~-?|2y+awtAq5MgeCvXs?PI8X@3LW7q0{ zof_kT^b=Zt80v%rIaFafWp?p$d~=>DNgr^&k`woeL~kvD)h}IoMxdu>&$fCEB(jouNcCK5;;P=v@uwZu?Rc}8^G+|caD&q zaV?c;1CvTa+t~MoR_S}Inrr_%HB%-4Njx+eSy$E;Upn})u5Oa!%tD+tfY<05*~!Vt z@N19ZZ@h5|O0?YRp(qh$x+BtgTx=x>;hmdL$;Os2EUsO<07huTvVjee=HlWN8=s-FsHXl-At}cj z2o9Kc7CD(10s+b}ZcU^fyt|#j-cK;i89xj1JV`vD@mHkvL)9j1)mbHmnxYau0I@if zs;MHlO7e0Zo`q80y?F7WiV%T{35xmm-_KDJ5Ng0M8x?twK}|w1x>%M7r2pompBK*d zqgix$n8{klF(Ix4p$SP=Ts%9zeS3y7aB<&;xlf^7(}^ROd^cV*R|EcIayX9R-V+Vq zKc0yv$e?^n+5RA#YOLhmC0;~S^TmyatD_Ipq$py5V4ue(a2M!BrZ|jbgNh(P3P<1v z%vP8+Is3{p{W6~!;Am@0%Z_7Nt|FbMPMsoWC{!ikOVJ>o-gUvM{ziAFjp-_IY%oW4W1P~GR&8-?OGhmw=39wPS>SoDXP-3Nw- zd{FA235Eb88{pYWfdr(Iwziq%2K`BWDaU&f1rZQgkfZO0|A>v9ouc?O-(8ZR;#^*~ zbm?d84`zoy_w%?8c^t=GZj5}@!`BW+pQ-rk*Nm8Vzc0m^Wi7#&oEMkB)G3Z!_2&Xj zZjcB5?`syT{`cRE7S6#^z*5n!pz9^~4^s&?@2194!;2Op+|MB^_2mpj_N3hKFI>;?AD}`U~J4jgmEgzQxM>+~jFp2B3#y4Sffiob_H* zf28{Zhj}XqeW{z+e*gYwHWmCD@nT16;V?{B%)HV6f`nH%YB4b}#el!G z_uZd|UA0c{4;=M>AF}qSLuL;$dVK=$%FLPR$v}49-xqM4@tDMk#b7|lz*Y1X!U}NG z){c(7I5#bTCzK4=8q#<#-~RpQoj=UA&*m0$S>&oe`|KLiY;pAGdwp5@Z{Wg@{=1mo zigiIr*qiIf8YQ&F#HjP&HJ9e`Q&KmCXWyPT>^yr@H~wLR4@FI%L1r-jyJHW3UF5M!LIB+7sa%n15KLs_FD zZz6%RDhwhY1Dq%mNIPoT523d$3aWvzPYz4hc-9YCx?(;`6o)z)$?XUw8+CFr8}>Fr z{qNTStiO#Vu4iSVMcU!x++>)LJVz+jHNly7qn{J|LB~DPRycwE@L?Iqp*!o+Xmn$m zAE2R5AOQNENQm%C-*$w@3V(lQMkWLOk2A#=NiI7n(9sG&M~hCrGA106W*VZl7lL5; zx~oyXAvR|>99SQ7$IW|+@w#eTUQNBImVO=8TkW;FVVtanO5~2Fo{JZI@2HGsnKTFt z*2e)aK`-tZ*7Z+hXtuBcjOR#W!g8P^f1=_$kzPJ1h>AjYVb~y#IGsz#DT1q_p`pi3-)ZVE0g3jX|&K=27ZjQEXw zp%{vNHF}Ky4^9`$Fs)$HGQAjD4e)yT$L;u zeQSQ*9^Sz?zL^1PzW5bG^^Zleg?E|_iotcs+%-LB?yD1VXD7#zV8!+%F5oTlNVt7< z2(05Y{W?DtwJ#^yT{s=%UzYe5aryiDMj*Wo-8hjAA}1tM9nb{I`vpqw0=p;{G=Neq zxpv%Rj=g)!@$I8XdO+k8omJJm>#@`u2~-*te7s9oH>PfEdY{?yOx^m2geyULwx8oe_kyfPrt(AI%EB;cpPl}k}_e7>nfL~(89K`{Swmd2f(#&gXi^T0!6z8c4M?wti zg@>rypGyY|6k?_fRG5$1aTXN5d2<4$910k8N5>*2H35DFZ`yTabpW*XiPdJ?r9tsB z^l^u0m90I52*bS%s$MB5YmOc{vPVE5vzkHr9q^W}oz$$HIzUT?ftmZpLvNcWhJQ`r z{E|2?*nb;N1WaHZ!0#&>%7pb+a+;KylOc>Q)W+Pmlz-(4|nY3C7y63N9D~9twR?ZP}Ws;Q= z!Ulr#C)Lkv(!%k4j*T1~+zPg-{e#ute^jn#ENpT}UNK@V@X92Yy`wq6KB7-B(%Ruv z>yPr2I{6VQ;|2V_c8676n(UQ?cCEbd*8f;a|9I0{l?JwMYF1N7^!oS)d6N`Lr48C` z1`V3RvfLX)S2|kUs%ZA8?r?;M`W`e$aX`qSo^5oRaS)Tac=0ieUm}n;7{W4;^whyk zB<-=Pt3gei05Z1BjENEe2>Uq_CUfD)Rb|RVX6)l9Z@_SKNMvSEq_Z(Qi#R-Fk2Sls zEer-t7|PL;#gfJ?>}2Y;+nUCeDT>Rk!3$bW%z28SXlG7MO^!+rnnw z2`)}fS?~-lV=7UzEKpPbAxpmQMmDy*!OC3C$H5{6*i7ggPgK%hVj6dpf* z4x|f7Y8UaVJY$bc3{R@!)}rM=n`kK5uu{alrWt)sR4~2-%$jebiIadPXO&Iy@Lk>I zWd*3RhtMwkNe)uLNWK99F@Sul`aR_M<^aAYp}FLY!D!0IXM#H$op7eX9j60RT;+js zpzuo}WEaKxqrivZ%Xu|1eKtKjPM<4)cp)FdMZjxP8XA6;Ov@NpQS5u=06m2we3D6q z56+DA!QjH|XKO@78KX{$8khMd`_6ZL>MMO^X=$NQa4fT9?Ct`NQZBiQ(Iq+@{Cm4| zT$#eI9`oo_tGGHGWb6Mzw{eGThM1^gAN`GvDzBwuK1aF7bxT>i^J>i`i;q*0`SJ>% z_y>pmeD<1u5F&l=`7cfmUH;J&m$Mso9=4dIVTFZG{B{AG_(G1|iKoO1b~*Yjg*7#b zWW%CTra^6v+^eE9=|GxR!73alSJCTwN(?@Wi&NpQj5{j{l?Y9Ki-h04$Lwf0#i9z6 zuLjcUPCRtbgNO$xhsK;NHn$6VGn>tizk9wUokHp+iwMZ(G-XDeyoOW)VSe_KVB*m> z$wfCQwJBdip=_myojxJw2(%5UpA1&iz68zHWld-sAPI~iE-*Oxp{$BcIXQxHr^N0G zIFfb~27i%~N5Nfo1Hs19OoGA5B})f~(aKGS!;ALk6g=Fd)mSt-U|}B|6hvCNgpo<$ zUqCZeKh40765IlN(u`e_PoBXmK>kx<`9*@!ml-FmL7xC{cNY?MRH6PerG#l&^K41lfQ3j5q^+hDP&n?ERd;7lQwa!>9?8$ zw{_1@qlQ=b;8i_#F+e&JUMs8@+oX(M>>Sy_k*yS`d%%OYwDqKQzCs~e4`A`}u*>T`He4HqTL#6zgNORV!tQW!f~(D*hrIH0Y7 zOk|{|SH*&^}EIK@~Gddu~EeOWUx*p94S zjQN;#95?{l$oIZ~zkD$YaQFILVTW4t2nQ`BH^?tP1CrG-%Z_c|g~I9JNn_zroTwqcOra58RaKm!X|S3!{5?dPW}ov6m#Rk1#sJgVE$T0tc$P1A`L9_9gjOtFN?ONY6Gs7*eWHm&k0zIvc{>4kHE9r_d@yc&tqhg3 z@2hkZ@tVoO?|pqVOyps%<1?&AYF60EZiu4@Zo$#m@IT%88$V+QGHu+bq$47qJ>%~< z^XcPQkI@*QNcp1*kfI%ZvvT$(Gnb|I`+_t1`58e;LP8S2c>j3m;GUyLl@T0?2m%}( zE)4=o5b`d%rZ6>uFa0^xCSKvcJdCY4x`NOdipisBre3qIxbt>T7-(U?t{N!~)wG<^25Mb=7~5l8qcUUKHo zm6rG!`LQ#Fa6V%1vCX}5to+gUG>%^*r7tZKYkF7b`0L%PuFbt+;nGM$?~X(>0ycwU zK&l5;G?JT~X0qYUWt?#R>ds#f@x>x}!GUB5t0l#Ko(~@a!gwS++09Mz52!gvuLw`x zkhO(q3oxY6otal0eW*&PHn8)Lx*6LdQ5u3J7ZuGtw5UHDr#0--Yj>sB0)G*3oJuDd z00T1=OtX6xgamI?Gx*`Hg0 zOv6>~%J~=P|IyV&MY)2>A@ZaHU`WrwJ$8NgunH#txICs;(W4b@8f*^uA) zvqN(ADRxjD))_ItgozS>+zwDWhB2wT;YBIQoIxY#Y0OrIeZmBOXnep9C;4!d?D?yh zyFRN*82u?eyHxbh^~IzE4nDqDGV_%(9YaMdrk+11gca;-INU`8+)Ug51<-&P;mtwi zLhyd7kXC8}7%j3wP$&R>@4?(_2u$|6BC1DlK-4XjGBjuW1bc%>NTRniRLf zx008WV@D5vFe5HiUQJGkdc=|IUs}m!tGXBU%aMb5**){88ymv#QG&4gu$fS@RexhZ zWJUTFwZ_i0@aX7sDC`W7^Bm2ADGf*gvGBvhBMC(i4M)tA6VD40c2EH<)V4o{Mdj3x zCVlFFKXB}Ar|#|n&B?<l+v0!?JAEs)T{T`Zd^w zXi`O>#Kfl+#=atr?6YU@0_FUq>ZytOv{FF%aEt}wCUOYvM(}~Yg{+T-pX-6H8oM1IsmO3jqd7IT#d*SjtO6RuAGbXbzITgTt!jrAY^n9t* zE_v7JTcVH72iFPMq&)JzH~e6eA?%G+K#n&_nRda|s~;(fIR&7_F%?Frq! ziW)Jm;(+CXgEE*wn0tN$|4)b7EtUSy%*VL?9HQS@wrJ$msVytYe zP>eiHA0{JGe8SF#HBK|AHdL!Gg!aJ&>M9D!l@)UxnP`z&JXETCRByOZzh!~GpncA4 zRg>PI82$%C#kVqAsJUbbT1^q*;nA3**{lLodd1>XzE)sa6S|wNryY*85;M#g<*1jfFac;WOELr# zSfeYe|MO{cgyg%cSkWZlLIpy&j9}6DhK7cYt2M$SaW4Q(kPTNJ)_@ieoYgoaxdsTH zD=T*YzT`XslAt;h^1|a__53ZvxKo6X2fMNDVx+PKKvo4yToh${ckdR(-_W8)Ngr2v z1EU0LK;a;TJ%SIYQgLW=U=<@F{R{>oWEm<^(@~NE zFK{k*0LudaKx5`G`h8i>Omhu2G_>MSPg(u^d;+~1#3RBRS+H7gtJt!?{<;WOW`-k# zL!=9gM5o5C8yB(%u1JlH-@k_vH}@bRLXIVX6TmNc=@Dv$rDY1bjw}vFXn+JrMxOm6 zc%@v}G}|A!fL04({O*D71NDDMNC=cJv{#OR73i?gT)c@KC!jO@S==atb!&dje9apC6&*Y2+0qvf zx9}y`!li-pyeRXO7j8eXzC3YHh4_ivt`Q1dGF4aTDvONcRra4zHP%jg@ofHZ%`Ly| zwRiT|J=%YJ=ZURVjKNxv0n%~hrENJto-l8s>yIg9&-rEiGaTwq_eZHkHb=S6#N^HEWl}S z(!RI1w;x&0%If~+(zTc=TkMh(P>_|;z{pMJ%Erg>ensuru^oq%^#nLS*B?Uwe~ zNP=VET)G>wHiVUr_x-|_ki?uS=FLr5Q^(D@#j|&ZI5Yx-ZZf@AxR#O=n_1gtiY+JroP>Tlo`$S}n>s((hf8ttvZ8Ua5G!6{1 zP0p}w#blXijxJ1aGKXgd`r4fvGaAs?uz}_b1BlLL7lDO$D6oVW&D*UEEFq)Ea2{a% zj8b3dMD<=KZMW@!X zb>sN+9<(jB)1?9$gP|XS_*Q%I7}}&BYDC^+6rX~QR`%P1KZCI!j{*aW0a#1HX#&aFwG6>y#{%n2vfKf6AcC$2pb?a? znoL$hLXy${fu*A{`xqh+85!sdVj7{LklT(4P2dHDMMpC@qxRC67LaE=k%cen!6Gp1~lFQz ze0IYiqsEBGjbY{IVEY3MZJVHA5ZHqLMKsY3{ySk3a!sJOwdNL!iTh56cN&sAb+o6h zZ{ef5KiRT6C9Tf(_N|ANLry>WnnO0bl^C>q>}=?sJQ8robY`yyK0k>iBRGKHo-gLUh5Am%98-J6X3n zG_b8WyP&+qbpEp=+*_RUSh^>BnIA5mFL1})y!bLlY2dG4F}5P#+)7)S7OeSP*cf#) znA((^mN`a=a&UV0_AT7BE!}I3_km*c{?fr;sIQN)dfvYuEtf{vtRXE(&cRP|#-J{w z%kx3fByP98y`>^13TdN>|6VfBs{Ql!Z{I#3@>V}?Jd{yeL|cNgryZL@4lfS39ID9T zu$x1}cITQB{YT6i^qJTw&YD?UDa$-V=XHzKzF4RCYn8sT;lPHx-S0P~`=5(WGR3aV~(g>#s*}oMzHuM{43}FwZsOV?qDn3W(+&Gz2Nd*vWUdd(Qw8 zfGw;!O*3Fl-Z8!?Ch zEr{#*swmVx0QgYCOdz0cvJnG6x2*g&*5*Qy{_E0p)76eIr^JMVr`A*1l$Rb)J2qj~ zMID`gLE}P^6Pw02%LF%CU)S2wl{>UH^$!Ty_t$xjcW)R>@AzKFS~LC}j$DUv267am zG!#HCAHQHCX0yGfAIt8g7#x_UJ{=57O} z3%Sw!m%qP}28?I>a%Y4=A#Dauv|@{J(6Qe1Fqv$)&w)-sk52l}F}w4wx+>q-iB%OE zMtH)Cr;4ge-Vb$lk!dD8+rv8jZVV5YTr^~*(#0`TV7=QN0EFOdcgkt7$l3%*n5h{9 zM}LU@3tH;y*9Mw73lLP!`Vua%g0L z#DwX%)mc-vKzK^fcrU^q?c|lo(+)M3*I7wa3B@Y5V;k8;SkM4%9?7h$7 zy{`wQ@&D4uRi}Oj?bG4PRLl$f^UmMzSDZ~k{;W&<{pzmSpFGxW{g(Uc@ylOs|Km5D z4j0p3{clQ!oI`-s?ZE@k{f=m$MGxy$5W9C`=p{~ZJh=qxXm^gQ)#m1_gTnh3r14q( zXP<9Z44w<7P^J;9xUuo^qaGlQ9b21s=gytipo1bo`3zGL;Ps+dg&+rBjeW#&rY>-M z|6l#_igjF?6T1s(3Ey_lOz~d4K4SRex+r%8aBh_kQGSAdFSZ0u;g zg=b~lx2}e<(Je8l(&dHrDybs{C6(i%+YIk4N{*Qyn6E-J)J(s26AK(|TpXuyFToon zuF|+&;tZ4IuVl=@T+US0l>rX|ExtgEJ&t|lQKGwpo7)=`B@muE?TIDw+$+O+@4S86 zw^uyGp^JdgKREbKpGUw$HC_StYrK`qNlaOJQ=V1mQnm5H(lHA?wvHR}{*A>3pGP5a=zBun?*3829wl!=gwV%8a_@b=@uv~aSEKMVlX-4Xr~}DD@xxJf7PR z+f=a3$Y^C8Ok(2V=9{@VINloZ0aRlnbAG0$+C`Ety_pZLW7F+&KFRLv{)e`Mjw*M@ zwc@WVrqPWee!kyy(k>;cZ0LJXF{f^O77JymTTfmzf8Xwb5e>=^V|?k^Y#R{<+vbje zVGN?f$fP!;n8auf$vx@xiDn?Zh29&}@0{`Qaq$xV8saj!fOJtP-vej*aY#rh;E(>1 zk;Ru#OT)V?zJfQ+=<7)}wapYDpSuqq_QE?M-^Wn9?`X-J$C@T7DPayadIbQ|U}k>y z+O{)q*XO&)gX48c#PjEd1&84Qb_0zA_@_PAlFCuoTjGRztf`rsEW&m>^yAhj137i%F{8Z~74LN$Oi z00%)E0fPP^yqydrlJhXT)gfX4lMdD}Gpj_|p&c;ydTe-aE+|yBioO0f!#aq@4$j7T z2QTGOut3j-JqG&IkATT1pRRbvUvv42vo2bFn$!$OJBC6lx~SVRix=0_(%ueMi#dBO zSEl_@U2sEz7`F^V049Z*e&{an<9@C2eYS%VU6dM#~F*a~-a0>$j1ZS?gWz`#g6&QFyTzcb9-$_+i6^G|O;u z-{13%|JZE1N?+wYFt4|#$Hm!sF?{k%w zqOCuQi~eWx^W*EA-EEk`6chE+(tLC~AmL7-Dw$`0P-xGdMA zpj|a))g16JaKKm{PtLBryB369zXf2bSae**Acf}DrJlNYT{xNVnwGE|EDlD|bloq(%41NBD8Bp#U-Fus}-#)&vBb)?d^G z3`xv21yivO6$5@WU`&(GZ+oIsUp%9Wre=0k*)@HiEWDuB#H6t5Rm$SVKhTn_8Hi(&SP|54S{b+O}iH$w$9`tv)ZOU`Gg z(K~(SF@HeH6`1Z*7*>ex3PW5*zN|U_!}fINT>T>#Ca`s@a$%y%Z!N&DN8Y@Tx3}hF zL51~Wirt!3tCBWdiZ|X7s-PBzXZrQ3jpb>6>*YrNTQ&6Vy*FkKBFG&$vgMoh z*y(4%24MVm7+M3Fv&Jd!5)*x}6QO%CP$^6j#(|0q*HVmg0CLpxg90!Ylgb|Z`JF+E zPPe7wuVc!|r?9BIZpX}pJ%K5wM6gKyaqCZ`^2;mY>|zMFx4OOvZH1HtG%v)l7^mvE z#_iG-*bVMXm66FTn9H&#XXVP3;LeYv)gT`50OmSuC(RX1xUEr|vvtaYB@7TX?Zp3d zWu3SD+5eVb7>`@}=WAzIL7U&}1%LY(qF9mQd-gAA^b-uro7Dc}5&$$s6KIc@J9TPm7+y7DRsUig+C-j2X|!5K#KxexvWocg(80M!@z=p0 zqZG+s0cev8b`p@8UEK?I{Qlu6hVSakB2bzjw>w>U(lhnr4r+W^avzC7+G)5K-{7=j zUX(58e&Np}a$=L`uiet;fF>fqbM+1pbJ#KJn<&zE?8psofZb(w%D9l3#+s9R{{A+% zlfT+`)VZu~6jC>M$cK*}wctqf>PAFNAoAcKbBMBf@b`O4^JmM#NiMSk8yyb2H_t8vQ=OAWgdS1b@)aQIeuHu{oeFZaB9`e*2^2; zy?v_=RNtpq2SQI?-*d#_g4-ZcUK^vi82Z87?Cg+-TLBCI`goE%yF{e;2pn0}rgV+nAUje)FNP#TL678-IhKqPU_st**|m8{^<%q z%U#li8u1^-Bet>EJARH^w21MP^|tCR+uoV(C2#I^_E^?;)X^6|^)TN&sVD3FU41bN zrxxfT)PXN#`jD_wNND~rAHwe5Rs$uX2ZB6z2P$YNG-IHT4P6ob9lB+ojeI)laZ3Ke zYk$4!>zj-J4{GYAjl&cH^Sq`_M;M5D6&=GyLF2k|=9r7s?**1woaJR*KH>c7v-%++ zud83zf8DUJe2382b?YAupE0u#T)k3$#PDbA;hc#HyVm;Bvcrqm)&_9z;Qg9VI>vS< z$FRLOeCzb_tIb_~7sFHkp|b~x2ie5ERhGYK9NHVO)cQQ4=hT!;IR#k+l|Budvm@w& z4GE&HtrG8dbhUEu=PJli&&E{N(cRh&WezxH?Kc~L>k!N@#-$KMGMZ^zxtxQvZU^6X zUGl0Qxfz85acL3^7=@dSPI_o`^nD964r+`H+q!Gl5)9hN8-lJDXKjC>k$ex57>Ge+ z0|V!w{*D_%p@rpz(orPGbjSu;P*ZD@I(qRH;Fc%=I&uH?E==$6d>{)G}>4NIn_5Oo?g%^d@DN-BS$7JtgKU$8KmQrA?TUN~zovu86%tLpv8jTx#Pt&RY;L6%MWL=h|I@{5LtW>gn#s<{Pr!#k}`hL3U zpVyfG$y?=Y;?@1~v@vg)UF+P!rNo+|5xu&P+pVQ9`JiK#w-*hqEY|jic@UZlR z9NxZmGxrmQrFkNXly0T>>-T1dT@I?ZJ-FZbUdH5xYRM6XpEc2p5~P{D1qqSDzKY(I zs8N!Ueouz)5Gl95EKuOd7IrC_Ji&bDFz<6aQ!RM4w}&bPpEn|B#M3hB+{>x91HPoP zB)6)_nYB+u^HAxS9+|Hrzx|`+w~Mo8Ni@62}}S;BE_%@`_#p zCFg@jk2Fvo)&Yo5OiF^~jR{MH0HO#5ZpfDnY-~kvS4iPd<>d5QLyybBT0D<2uFzG# zdHa@#m46Zt;F^&!Gh+#}0X(AFAd(MkasxR}v{z`PlbY|dDI%zz&?lDh6Jx2*?N|e? z?+$Q_=lliS@XTN1WyQSN4Isz1ZQh*uhL-X(Dy6@La4QJRo6clpA*_^(}C z`LNP_rT+xy4|`#dqC5QUen`9_?AYKHf%8kPIay;`q;}Og$56ZD)%9tep&90k@g0&^ zQj8;gM{6(M;9_5TWV+tb2mf7nFeW&}-9Lp*!gk6kXi9yP1ij2d$o|~}5_zWOE$g`=c;&~8|v0eq`!2$AylTynHT86-0l$LnEPh(_C0GZwG!X=$99POXEt%w=SNwF zyHB{3Jo`8uXK52ZsNNF0l)cu!{YL*XPhV@!tEbFphePRIlIMK+?`zt}PBcxDl^;dM z|5}F<5jo;~2}a($Ed%|>3#vyP;%1zE)%!LQUMoffr;w=tO_7sj8 zY$9oI-|;+kf~!uf&S!dB4;yDMUnEaYuwMh$1_U*G!-)k4uSz&NJImC;&rA-GDq*ZC zcizRikKGv;U;<;+y;mpw}CA{nN4=0@ZoMA zg}j+C^;dnmzD6fD>3a*)vSkSOxP$dvxz{PS`YXYE;6%eR!BWMc(Y=d=Fod6FVJyI6 zw~2`b!V=fo*S9<~^A=+@kwI%bGLa32OF3Cm!l7rql+J`rt?)P=Uo8Q9vC(Yp|h zN8HSZU(|+@9TLlDKm2OOM2ZQXL!Zd1hNutA82m^KoCqLw(a4+qK%EGV@S>8^qp+|} z%Zzc&E2j@H&Nv-bIk>BTs@ybhbpp09SD%A7Tln5!3)pF;r!ZBwsqY6RJ~$^vv)qZ; z2Ja0os}rru7k+xRYg1oHHW-g(-q9s?>0g4DjItQzJiDTKfK{Cy8p1Ap`1HL3S7(K6 zQd>>3ISYeFy2bf7?YH_qjD*Zw^I#* z51x!K-#VO2Nj-S|Wys7Em7V$IJ+J9MIf>J7ecjV|m1Bn!w=9|oGHvfn zBWvnayJ649x>NlRWB;Ky8kcmeJ?vFE>Y#5=;~%4+Z13cLETp-S_A*^G$y7gb#wzK) zYsQSf$0d1vjW0iD-*!WG@=$Mn!k)Ou{u#6HIoh`({F%h&p0oP;`fuz|$lur}R|g+B1F{#hfH$DwV`RXHfiHOqLB1+#pr(jX zV+8RDO%EgrXO114554hgs1oYXlyT=s5a`9zBIx=5*|shFVh1K~BF>>I-BASF%=Uu^ zOE0eH;u`Sr5;Lsgg;$8I=^>VjFc+etLVi5jaw5LB0&NCcl!NU^8FKGdcDemW z)8>ewp6@Qdieq#BxvKY^3hnIF|A8$)u?#{v%Zq7^uR{7BCL$H%868*_-oTsiW5$o| zodo?@g47DSCkx&O1)o!N-W;STHA|7v5_HpYoB|%{-3B2=Y_dV|~!E++Fw^Oj7~RF|J( z+|oB+iZ0;E)V}UnfEddWQxU592YfD|CRj}S{Ih;#i<^N4E2dw(_ramXpIh;b;+B)* zwP8_6R?PSSi>{)=xC0iezT@lyz!A-<+P}7KtrpwcIC1~mprUNY)Zk7G719K%;DAU?cdd7Oht$&9W6}9S&^ttk)BYR()&B*AF$5o% zP`Uy;!3m{}!Q6N6-$(Ma2#z|`;i9m%$y5Thd3%FLZmhBWojq-X3OVA$uTy23XO{A9 zBYkPNXTZQb)LSydttTRqBb0SU?N*6>SQGu86wn)z9iU+-z{}cX_*`sv&7RD> z@BJ;SbG6@u>XR=9-sF`y$kKk{M(A4lWBC&8$iCAB(*x1hV~^6pw>mha4UJM>$h{D| zf4huwVyJpxO|Ex|kqN2i{NjUCM>~Bz(;E{OvhyiPYfDHOCRL>;)2vS(r?R{wi8mUb zix``%{6XIkr`Mk25-jogZ0qVPqEri=Xd6}(v3q({qh7WSXwY@J;&&d4bq|QToaZ$z z2|n~NoN!*$x_E<1cib9)UKoj7Wf-ro>mg4(Ereps)AKlJzIg;$?NyNfj` zWxK!bgsAE#d^<%4en4gT0J#hEc@gXtDic@;wG$YAEmXUfk1~Ykh{2~mK2m5aA;TvM zdgS*&F9}AW9npa5NL(SXYn<)EI)|}CqcT>E)0ca*J&^P^-m$^L+#EOXZP^IW6yoEF zfd^%_Pq>c-hf2qN!53hH098YT>YF! zQY;Q5H5{?B;|#Qq%g(M1GF@ueC)rRCi*t8pM5kOvo{r@r*S?3vi%2BHjaTLSuhrW( z3Q-=Uy|iACWi(U$vE^#|wARkx)KtIEW!vixg)U}LnfjFXQ%9@pW1u1J&^_(wC`)1M zN+*sbOGdPd9*MnvFOhFe{a8&4cHB2rf5z3=+ez7Q&Gq1GNmKRG4wD5d@(k4!Y85K! zQF?mxqhsVeui10It;6`AU3DI28JkX(#5mAK`geG@=>MGPDUD{F7wK3%73`QwuU(f) zosigDZq`2Y{aXRk{zwP%GVe}SCO`)U z`{B`8vT`kHQ}5Cy%=&`b;R`8T;?{_PN0SjawDVcRZ%#E!$B*ZJPP z0{~2r;dU=agNsl$^7pr24TLPMLZ~P~d#I zt=8@O>jCkCThV1*{GaxUnRFfV$a*rGw*UUK<%=i#j7+{8=qWf%r}nTPr~Wi`?<;Pb zxF!2(nO^gul!Mo%iuXBcFXx(ZS|{k%b*0n7BS7H6bnjMy{fz|@LB(Hw>ZsRgX3rZO zH1B+wBylZEmYye>p|@*h8r;13EgZIcA1(GBrtyKJqyV_d| zvPpsYuNouU(2FkWjn%SW-qW!*`{n_$>Wr=>Q~{3WkjCgxBkv1S9{tUBRQ;*Ob<6uC z-qI!$ho=p?>0Fzf2BZyXy2}hU{9{q}lk-#0CV5R|w5$D`#$Rp5RiCFNyK7QECXR(M zUYY7vs;lEE7Po4jY5qT)wWn+~;<{}6lR6)`xP;)g1{}7yzca|@HkbwBZh-njP=5zv zkHB`FZxEf9i!9KK2GamiMd{NcxX<5XG$qm5TGyF57mc$Fg3>*RLm!A8Q2`?3*WN2u z1#r?C9JGNYj&N(nK$Qc{qa-+d0HU3@7IUg2CWXMw;qp8rM9Xp6FoLX&vT)g*=%sT2 zcm#-qtQkJt$%Sg;2@c?p6YD4Ke}}z=$ ze0CM?%___D%&c%K?=Qa^_=@k1IwSqtJMW3elF6s$oLqxDlShrnCgRf@bq_KvdAP*X zZ)O=KO*K*{LbIS_>MgHt0h#-=#HV%pm2BnqkjdPyPVSA~v*tnP*}~lt*0)}4qWr^M zAd#2!>fm&?Lxd>FXj4u4j60s8d2Kjq zG|dq$mRh%TKzW9b$+$<&$k!uUP&}P~=RJozS6i-12SmL+Z?7h?O_DNSM*uUG@%mpkCy39K8s8U-O~t(XG()C*LqLs<2Wa zqI0PudWZ=FvIi=t2wGGjv_gQ+!(;;isp^yjTp4J416!Eec*Em~d3Y)YMLm?T=oz!GI;j;*856FA*R8zChI@Rth)3Zn z(g(|Toh2i^<0INC9QCL05CTRr*)ucmD`BG9y?dWGBz&>C=3~E}ocyulvp}@+Nd=3g zKlk=2DbJs>(~eV&*w4qx%<5{XTr?$IRyUB5)Tn0kRmUPu8v899V^a0?NN1#j4+sX z={Tat#%k|%KacHv)(4TM&}#~vcE#nAq&xCXrj?&U-CBo-ECe=OlHKfdIlJUaJyorj z@CaZ(%tJiB-){&tY1y1U#TOm92G$p%t3=Wb1^h!CfGh30^6@SNWx#ZDh(UwMI`iXg zxi6w{IakDEzjo!+9R}9=O(c_YnV7zxR-h4snUvJVsT> z2P;Cd8hwP^RY{H$^R*gizAz4b~N!v-oUtrCfB=>~PpS>D0f=ihDQ++Fb4vpTVos;0HanJF|^ zth3;TkHFgg?xl36#yA0Q4=xj~iQ+UM1X5xHdk%fP>CdIcRt^HK703}F56kA#-^st;lF&D z%!W0r9WGglj<+WO?V{&@OQ&Ts2Qh3c#X>pO&DBqZWmm3q~z(u5(DuFH-97&V9x>&yN(>kILmjk zwPVo>-{hztxI=n8GGrClbj)_FPk(R5Peblcde`a6tCbID6fX+n+p>A0)KyQRC8LV_ z(^T{K&Fjzh6%;V-po+!2GaO%M(%2moymt@x5o3XgsXJ=w_0cBiF0Ox2>T|p-=MZPM zo%Sv$I3koD+A&hGlXjS#$)k3Yt+lB^h}BG?d`Lmi$w4WrO;SC}V6Y>MUo9fOleAss zUxJzUb~mBigQN;Nqqn1zS8F?C|3J%f5=Xe5+%X`qwbkPK%{pY{GrU*J2)Y=uNvIw` z9`O-nJ4+KD(uDjA(k$$0yqkFTPhY%n;bygAC~gP?G$)8SK5u6?W$Dilw2`iRhKbDh zVJue6kV;lf9_`CC5l;<9n%{rLhlRh~Wi&n5cZlb_k0g1E%5HJlLThR)Xn^4sU5}N3 zUs;0at|syhJ&6seGqHPA8vh%T3xcL)fbaRW*cx5+oh6FySqw49Q--QU#(!+0pVW!< zMHEpiMCND4N&f>4g~p;C`o*x+)Bs3v@WEiy^~QT*l1y5W3FF8{2`5oxk3kZ(YSX4O zFeb8hN|?7TL4;@%DvtmRyu(kAF>O93xm6fDj3lS+ml&;4Af=BWV+C1b)-@%)Uyv=x zf06Te^_sKwc3EoAZiE?fvm|E9*5bPSL5=b&l18Qm5}{q^U=jC3EbPiQj4PvY9*(xS=^aX=UA}Wh2*q zD*A}Aq+YEm3d=p7peUt!b!;rwBbz7h$fPYVYu4pO!CarbQ--INA7*SJ4~%T*9lv6K z+qN*eH#Ut&L3TRsfLFEWch&uw**%w5H|obVx@29TPWVnGrUj>qFb5{frJpjIA-y3& zBEANSc%!bscv3Q|P+vh4&izVNi>3a%4vNBHFy)Z`2c~WjJwnVv`u{|!n@Sl@|5wG6 z%BIcM^_CkD!EiBdX%k$4h#*@u2(DPbYC{&u$MLroU{1uBFBiJ~#jIS7z_cKhfND`1 zvF)uBIaiM22(@5`j_T#xw?E(IRVg}+wne(8lGC4bEl=Av3sp~*-C_Vl7FQ({FX-~% zZABd|R_M_Pd8nW$f=A>|T1f*BOWB@p&Zje^H#;3u%QLK>XZ_`1S3=0(a{Y(p8r0_Y z7^U|CVsa;Hxvza`tapAI+c~Kp?kHVmRjTIVJP6cPZpf)PTv)6VTlQ54dt;6U*0r0q z*})rb_rm#_zHf-wMJfyWY2SIW>QhZp?_}3E@zv>H@4w!+g!xuR#t)mAndVjY!`!=> z1RNIICu9xV#tAU?QvLetrEE%P;1F=Iy=uyk2yLqSj)tXcfEZ` z@vV@!j1_I$J3iQZzS&)m+N^zc0MSYdaXeD|CK~gOfciw~V0~A8&_i&Z( z-n~0mdbWh|i$=@`Nb3-P3bo#r{rlaT)H~;u{xTl)IYR05j0k1v=lF-%skUydWS`?- zLvf}aH#Oyj9t*`NJhjIlfq_)(lYaRlv{o@dNzt7XzYMr`N4sNlHFz^=(vpY$%%3jv+{hArR$_IW#|qut)ZJdsH@I#5;p=lW za8kcZ29#}tYgWpeof4D9Mr}btXNT#19fs~=FF9AQ%6zb!aToUUTWvQ$E50E|ez$a| zw|oo&UgZ`2|Av4+R*0t%emg<|1-~)_>gygw?zO#VgPlN~r4HQ}6IruzWhQ9YD`|H8 z8B^ua%IPIu6;Ylu89}OY-rnogrqWY;OUHFbntCh;3o6bojo1=$jkb62+_o=rX7l2m z<^?+i*9|ZKu(M4dGM=3KUb@kyiZb(hfa;v}V2HQHH?y!?4;FtT?WCvy@g|0 zLHoq4mQakG`yKlV>Rj~goh%<_h?B|d_;b|!IcJ(0?XQjKE5m4ImQNjPv76O1_k?+|la!SMY29)jc|g*`ckfv4o@ zT2m=ri>3YpLq9&ZsLUhhd^Wzx)82`|`k@*%L!=3W6AV5FwU-evE^45=z<4l=5S&rN zv(2yHhEVLJ{SSYDYWiiCT0cNYQR&O;6Ir-T?Qxiti z4OMFb7BQwyC0fi3KFCe#uU59d#4~Yed{y11lEf-U$JT3E5f?5k={(z4)YNe3hq%E< zOA%`NJ52W!FA@ljV_})kQ^cf#Qoen-{jr!R&wl zuQB0uGRh;^DzJBk-mGl%?vZaM2pccTx8p2No;~}5{4+v%tab3>#b(hX+;ql0P(+5! zl;G5J|NkLgh_B*_M(fshqx;_XvC+KD^g% zw~isP?-=UXC@Vz;xuY@%1pLLS%>|0YMH1w0ddPCzi3zG!+aRQ)Ts|!_p%0ID5ahl{ zin@4xz3Gu74B*;e6+uys?27q7gk{K8dWYnF8I~|HF%or<`vCL?gsV&aJTn{fB2=6l zn*Rt2R_@-?3ySF3N{qWU-)E*&JLa!sb64z^U*fxt?Uwaoy<^<&TNH{Ik7saMB(0p9 zxw=oHa_k@dN=5q?)11Y;`cK7fB^rf@#VA&eR6hF_!A;vtef(nZ>ze^`M~%0!@wrnW zGc}g&-6Iht)uI!f4pq-A!pZLa(O37S&!gP!dVj+H*m%|g>c`TA^KWM+%eOlzto}H5 z5u1D$|B{C3AGDb;rk9!uKGgFQWzpBlcT%(j69t?&Z5BBRFW{U~Y3<9XC(@VTLJNk) zCQr8k(^Yyfa;2=(0(?8fL?FNypYuEP5T4hM@b$K|_Az2#o{#74om|A8{aw??<7Tqp zeprFlSFVIMr}9qr^fq_Cz#Ayb&QbDj6$j#g1F5lr{+b@up4Cnz@`dv zM9NMh)#Gzsp0i2DO#G#4P;0w*67nkM>@a(_Gs5Cu1n(+@!WO~}Ao>uKPe8ydM_{uc zVzWZ(ii%tS$Hw4TB1EE-aN;B(J($B1Xb#5yAPA%)T1{dlOM6dYUsh16eLpQ(W&G8LcuIAu zZUh&nocY+KT%P&|(z<|{4DpX#PsJmyGB38eRY0R``+AdM$1Aj_(en>@pQov2P~S>BimNd%VqA3R7N2e}qt>jf_DcOCDd}J` zPF4`vs-zi9LQ(wUpnwGVA{mg<@-I=@8)URmySH=*6J;81E##StF_Pa zQDbqj8*;~D%-ajkXhXmo3SO0%1r8teHxw60M_Cajj1Vea@H$YVt^`4k`D6_Cw>5#+ z;_-IIvN;F_NOHLO6(BEXoYLO_+7fu)xpT}c64M_-9B^42Km>v-@P*MqSSLEke5ME) zH$UEJN3hlIb=f?#nR&Uhh(=Qd-)4hyrKG~NZf({d*J?%WHj8_FRjRMuR zBpfn=dt+Z&bY5_}a~k=GP*~+BD9j{+*q*eqE3cgX3ZaNB1?7 zeGfCHetzdVbsj_E)lHY|ixw4q*IqahA)~%TbX2sCZE>q+bGW_wPmZ{Rl)6rr=I0fI zAq~7A8Ap3rvbjYhu{gF2rD(KB^KShtzRkL;v9FQ7HEV1_g}+~1Xkvfes^vTRn%s9= zvPm$oh%&h}CO>;@d~R&g0UKd6ViR_9bGu_&9f0+qGr^H@;e8{vm>^?gWAhE(*Iajw zvzTaCru*~NNf#lT!Ogg(xWwP^sL`m6$t;iI{xS#GDu1=%a&X4n0bDx4oDy&Nag5`2 zniJ86-+vbA|FEmPvJ0-6>nBc?8||0l8CV}z0lnDTb?dIhC@x`U{?a#mU8#|1PTs$F zMJ%*1y|i*xk6d|sWP7`)c+>eCU9~}t4ikJOTI*M?UUl<(-xT#~BC17<9+U=pjQAHB zZ7?;^v8{mpJMp^HJaA+9nyqPO*1AqdT(lS_5b4};q8LHlSgTP1r7 zYRQ*g%9e?yFA~f6>_Am;=B8Xpv6NF{wymaYPwI(X%db2+q~MwRAY(_ITX2zd(#HPj z-F32g5*lw$AD2-3tF-otzifK)xI{3K8j8_ZZ-|8N=T*_tu+?CB^5WSFKS(oZ9>JKn z6Mpx@Q{R^um9o4kO)~gY23Di=XG4mb<~@Lt%O5A}de3o{QLd*ny-p_=Va7sBPmjIV z)?@+JdS*cI`rW+U`-O%5Fn8t|)WDbhw0q!gz(klda$kSHY7Y-h1hgvfXb%<~pkwJy zxVp%+H!N+tW0L7rZ~w_?tlqjfxUo2`<4b?cp+u5S*>=*)Yh<3yiPs*I<((+e_Y+H7 zqj$KiB1!ABNqAFxGG6vNw5n#j4CaAZd`zb9ShhrPR=PJ%=9xwLfTC?c%1216sxkdp zK{m6fV%wf%KfasMpL(t@8idMay;#E2LAGP8<5eF*SN!r1xB0!;2qSH=$1Dh!PhW7} zgoSgu=1O$B5I`{;7b};^UwJ07z zLv`$uu;^fhtM_wltv^EITQF;8z#t8PG+|9a?kruxew;(i(iSj{rHs`v%2utI;}>qb z7Ctvq*m^45@>9&E3$Nu&D<9?Wm$%znbz9=3e1B0NqH%SQ%MADtB5dSOq(U;am=(b+ z{v9|%wqZwsNDFq5(D{F5_fi|aam>EZeEqnmUS2u;mCDC0E&1i-b@&jm-|=W~ z6hkI9drYZ=VCQ>(Qhj}Wcx0rJHT&O}bX}~E;Ns1W!d(V+H_m_)c6P#m%HR_uP6GIj zFs~s`KW;TEs}!IDZdaIk;% z7bKJjnlXx1@kVUwjq{kBnwztrSCK4o}CfFt- zl2SkP`*oc-@x^uzq2o{7Od^qxg$KEbTm0e$3!pYQiH-vs9^gw1D}qAbWV{Y(j~dWF zyuX^@4*EM@{UlX+)84H!_JMIik5)^wdgc#!i32W zen_bCq~R;65ev)F*di#HaNParAJ_A$ClzHBVqI+5uxV4xiwP<}yc&v%iq30E!FJud zv7tY}Jidgof64B-xn_Uq>*EBPhxoP_`*C9!ER%&f{@lJNT>o$oCikrzueMpO`eVm> zAVt+%{C6qOz6qBL^Zxa-aofI}{J8`>$f^yPSPY>xJpH#3$OM?h6y3!q1v$AV{{Ak- zIw45!Rq+Y`eI*_|xUt~ZwZN8xX_lFlmEM^2^!Uk>qku%P1v3zU3_>5Fl;wgx+7`jI z$wHccUh=BzJagaw1Igxu@C?p~J6YdeIHJ==zJwW~$OMe*suphiwKT zgyR8I^%hqDy;)Ma-&f48pv&^|_p1#F{~pAeFsl+i4|kD%yyAq2vcDVb6PZ_YKmKi0 z)QX$%0>bJ@IMtxvB%F>eeE<5=lGn3a1QUAx`Ara8!Fkfs-!B74B0S|?EnojW4zKyb zXZNDp&;S$9p*rsk%YXS$%s2k}XBaY2*}x6Qjn}~U<#64aEnMwyR|0%#B+CU zhpfVzzlNNx9GPhmYe4ZmDS7hr>1PDtl)y)i`UJn77m$M_6$bV!_*J_m>@e5>AsEBZ zu2`Q;o;^8c|0tbzd3IaE$ho5gkxZ=KuC9j}8Kg7{_QsDkLoM8eGJoHV@SVABf*bQK zBV>QGi+BY^C#;F5{$C%7OJ?uy-|s!4e0TPn|Ni!^+{b30@xO~=)rRKZ&*}l+bC9iK zAsyAz+l5O5UG_ZecEu=+kQ+o;@Bt|c)tsp5nH(>Duxv%^hDznm%k(E6WQNT8pSdMD z(Uv;;Zc}|Gq@v}5b(xqUUvtO_BSsk#Q&M6PWFntHsw6ZPI4g%-)TTd&(;5@Tey^Uy zgLB*UMKqj*(ic9GJ7Ds~X%m{*yV(0k6WLr7n2USd$w?gjeG9ySs!qNktV zqbzm3Z{kp_K|?(6ZA3(?2Wfg+up`A+bEn!>sARc2R5H~Ml}6gHT%=zYpAf{ zdIOi3!jCRPD~5B58pGrcD|YXDrn})gzKAnj)hH+fxP{yFM<#5UuWn~=5>;?1e^j7c z**2kM!C|{$+0xOzQ>FA_tWPelWl$3-Z{YR zS#Sb}1~64O8c9}F)xFwq2S({M^k5`5&i>GX#Qkxk$QLgdBxchAl!8Uo(nv{30CrJN z)V`a*>jsE`EzQrhv0^OY5GRg7^@F}ju77-Y#j?b9_+{ahNJ8(L2zEL=Dr%A51?H*b z2__t&N3oG3*~)MvU}pk*NBu?~vmxcB4B}Z%Uu#kxd~>?!5JO`ysZU)?IMJ^r?8{N- z{l=rcDTL1+bI5_NwtP8-xAoaREPPb4vOpROFxs7TghjzjDbmZpt+x`aY1I8fm-vuafLM- zSQ}8BX|>mx9}N_B zjEep6>tQY3d&`fe_@Gvhwl{^oMUWxlcQq4&uq^Md+08Q6uM0T}rlxC3AZKmux(RCv~r_H>vd<1hwMc0x@{xHJk2&k(Xpz(j*! zX@qm3*~=er8W(}aJ;}6rK8Ah^A=KJp4-GqCVppiYskQY^%&SF$52jxCJg9FN6BHDO zvLDEetgP&DyaX&vhJDJ3kHO>Ac8&en{{r1_{@+^f3-SRJlW*+as}S}2-Me?+X$a|F zyMDbn(BXL766uUdk>|pEe6#`Uy0L;|PKS0@e_wxKmwV6i`qRM$rx}>j>{nOrPw~8# zK^uC*AiyT?;-$&u@W{kRqB4#5&KB$Q=DihhO79hmE9@LIl02;kT-YYbrQ;9T>pa6F z`n9KJ6E4Xrbxlw2pD0^QSC@)jknYg#C=;bfc0U>2(szv~Xt`^jbjiEr?~@01uYP6! zG*+MSaD>2*z2ZAqHlA#V@E4*CXXHB?tqJaQD2SjtB{Zf`B$!S` zbA6b4qFWs_b|r0skN)woc)e!6z0sPaXrq7jZGLdiWTw1p+K+AE>xlI!)Yc%|U{bgi zr*s8LoNp#|OJ1X4z-Lg-oCSIW49HM!8nq{%GyK_4k);K0s5-@(CpvZbQCX`X!g-KM zKY{QxA=}}95Y(xQ6(*CDP;CeSkEi`Wnm4oyu?SF)$LvrZC;Q6Ax(TeCr=FhKSYU*O z9S&M-K4C5y$ZSChY+bZsUazEriCC(~c5?Kh&H3cu>#fn9A?K}! zj-1(3eR5JO;BeOSFmFcQUFp%9%Ng#qZ#NW9^cN#dh$Q9QLyaHG%#r;nLCs=M0>$0B z0^U4!k#-Cv%?g*LjANkXV`NVfJPqLPzLE33pMmfQYc-&5hmo=n)?WsUe$zl!3K6JA z6y!2Fi2LNn&C*3M2LlwB2aM~4!di57bv>Uv$%06WfgoKW$7J(uYA5R9>%$2K;y}i?Y)p&A{G0uv6y%>n|?f%(N5lK;Srn>%u2heaCyafS7Xbe4KGB)SO+%4G~Vcba6F7}S9oIn zev*yr(?WA&F7L0y_8qPL9UVU=?4L^%#k(@r9nzRTFWt$fyZddd58EiUBc`Bn{VJR5 zM$Naf+PqcuW{&wimJJ{`1_52c?#P=QcPo1tZ+Av_=F}T` z238lvfwnrUs$AkRKBsqAHYv_JH8$NgAO_Kjt>QH>&QqsZ#4eL)J07`5%3+`vDZO!G zp(`xWh#wia;fiIAFoQb1t7nJcR$@Ra7hOq$9 z#Vr{eZM~q@Fn&iXtE*e%g=@T}O=6N}Y;|L>iPM0vNP2i@qbiOL z$|Nw{YK}K^eu71l2|w^(#-#Lx3sIBKqr5USuxi67ObgV4r)rfRajm+>^BQ)H!{R)< zb`kk293P!@^Vr;S1=2_`)$4*Dbc-j(NOH*Q{q8p>(|@BEu*Spp2rwX~+7Loi1Y@A- zTHL!Zo`9C$z1!n7>=}IF>eaJEv+?t1eATFbhUXYrjN!zJ{-xhuEa~7f;#t9|7;|r? zNYKI|`)F9};xBhEnI8;h+#CNv^Lqxy(fJB50tr(S%-#HrE%<{$fn9tFUDCOj-c>*j z506;bYhAKvVxqZUo^IV`clWNR%8hi@m+LWd2o3Oeg=ntB_D{#C;%0XJ(UCMQMt;jU zpZ)7g#U~%MZ3$sF8fw1SD{Qyqrp5M?hBA%y@2bLEpW3#zvJ{Bgul~@uC3U=NRi+dB z=-s`}G3K!|N!|)_A9bjwcBt{M{=g+C#v3n4s!is+`My^cKjnIY?FscIo{MH4gwbNy zUCO&3#B7V8jd(0|%d9N&k8~a{x38}rk`|*=(0UT43sK~9EAFG%dtm@?HGs>RX-$p6 zu`z8>rEa;`-_?VwK~~8J%;S;lMWhQa^x0t+=b&&RHUvJtHPE$vMjU0}BQ9uIfh5LG;?8yz)gt{rw6~fH0TpBJ3iKC zNCV;8NIg<-`SS4QjC&K(;$5-Bjr7KnKSC9SaGZQd9&1G#?|Dw91}3|DFBU6Iu}(ix zbyM$@=ncm-@jJ#`XESym&#$)+VCj6XK|kvsfc(k?L5d@+U_^Ec8_gru-+`eCF1*q zYO=*}>L1nW(G?8W<{cHat*v+Uv)*}!b82HaS>n6;d%metIi=JfwugcI3nR)4H3g-u zKZ)5K-(y1WY#dGQJas9^_Hm3_ePhr2XV)}+Nj>LI6{mR48*e<<7AT=p^is^7;_W2n zvEP|eI#MaPlHRj+wWi$xYH}@u{Yz4Nz)o9Q+M%i9q=#4OLf!`$quyf9!!fjAxyt~cjDXUgXc4O7gLP0Y*= z5OR7dH9_C~xsU!kwziA1@+WhWR^#q1eX#U;nM!(bcs@_Ie*DUn^`qZ&JN$&tkk__w zt#ehlIYKVj*e>VQ=CyV;=gPX?i(jO7rz@U9-+()&ap+V3_+XS>n^i}>`OeA}oTs&= z`OTW^*+$WhJXU#jzVWSMouAN|lS*=Cmd_!B#;?v>)QF^~A)*dc>}GE670BIO;X%SI350MgSz%nO zQURcLySD%R1q;mT;Y;EM(WHBO1b z%&ctTJ^PWf;r#x1aQ7@Kq)=HG=+99s@@>PwkwzJp#;TG=6N_=m=6x?E!@hDg^+?`~ zR`btuo?09qZrW?etj1%4#;A5(V|}q(`o$RO_&jGxevx>RM@J{i@W{^m=)MfsZw*6z zn22`b!i84rbJlmGJ+7PVg_<4BLv82;&l0>S>fKz&ZB2j|avv)EPQW_VGsD_1I8EH5uFzbq%m|8u1H)rTx~go|+a z7gzKr^>lX&x9z(A`{rRmiOux9N2$YvQ5ML_wD|dL3x;BW!XxB6c^uY&A$opVw|rSj zSZ&Z?0LQup=D9B@1)xNIp46X~@qtwB98}BnE+T^Q#p~C1q5V66{R4&IPE zP;jm`%q=cn9#LM?ahQ*f?@GGEVK@{CAyQwV+U!{|`$x^GIqV$m&caM&dbxeq`+|TJ zqyd0ck&RXZn2s))Aijxrxoc%YE?njWmX8<|M2p4kCH?Cyvi&N2GcqJlpN#cS8o48H z4@VYg&^X%?3?3tL&p-y)Voq5N`@b%1jm+H3_#Y7d1v$AK*b7jQFaUS_#b?%tSx2j; z)yPTxE@4o6$6E|^FTfjnZGjxJK^WhN6A<}yc#-D%LY5L@MRH ziC=#$8p^Qu@0q!{f}>>qUa_{tCugr%*II=0s07~r1E=P*xX)q=e}5XD&RL%2&zIo! z%ptDIJ|Jt5(nVmF$e-J{{%puhNst&JIN`Cdk~mjgy@S)HX@P7QCwO#=WZ}S~C@i-; zE>ZxXVw_zRAyqnon&LBc9f`>Q)BNwY zYcACGmz`aG_+wMp96fdUe~<%XQ`jhpiV2VYWU+};*~w~}=~ut8g=S_AqQg?m^(ppN zmcrik=;x}EEOrvH$_Ea7ADV(TJgNI#L=8Kp%ES?5IGjNOaVd|m2*If2ZSxR42D>y* zZK`0S10QC6ZsF(mf{x~Z!^B{bK*H~9^&@N+y?3dBuO8v9KoXHow5bmjx(z5sM20T` zt(Ts<)M}Oga&rsmyLWAxdTIxGpFLW}R9-IsIa^Fvn}(^Kw!;NBxo%8`Wj3A#FtKqQ zIG}on5-*S-?!SC^#2`j#%bvp6??Zuo;P45*K0>(#YcT9!7f!exJ3_w?)D91uBhCa~ zzcJN;>R2!!{L&@0m#CG1mfwNt~;D1XM~S|lERgc)X><9c!*hWbM~IRX6l+_d9>n`5Ek9yXAt!=IoS_j z)F~?BkUh}bA(4s4;=W32&IXI6%jJ69mSh*(F`SxukcQb( z(p#Okkyl;f&SI|3ySo^)wROTOtvk*wS5Qp~z8JFNpmE*NS0UDdtfa#~%FCPk8zu%e z!vgCGl%E&RGh}cWR%-DHvoy^R{zS-gqY_(|0dIjly6;0O5u<@cPux5JgXe<918Zt( zxB47m5r_C4aJnyca=N>Ug zvFeY${Rf>22&kynRK9re7W{M}MmRFHbabA2dpko-jfc%W9sQeNoRYgevsG0y)4uDr?%lZ zKer{nNKO`J0C?sDuLFPvBx1xE)e0c?fWbaX@AKWIZk)0`kL07d)u%?K7A^nC)z{Om zZD`6o^65HaC97Os@#);qZ7;Ogr(891P!}jt*;e*EVGq_-nc=eK?(xqnI{S>;TzKA9 zCk$*o?raruo{KB2lLKxQf})7OcmKZdIwArWPgE=hg+UTtsuv7*)8TZkIRq<=0mQo& z-z`tv{C%3yi!dKi;csA(_ zV`j1re8spk7s4jAUx;$N*C)5kLzzme;s0QAVWum$L{WoH=E~ zziCSi(CcT;Y!wsZxCv?8Q>^NP2>K>!>x&mx;ROWL717ae$H%4w<(?Tw z!jFebg_&~hl_0hbW6W9E0dsS6R?Liubwn-59*jwNp25M5Q(7VyWyxFtSGlT4SMW>Q z5nb~glMeoJk;ymz4M?#t-_Lcix3shbflSCrGoALCXlrlB)H5QNf%^)ByE~<&x7tsK zx^{g9ke5C0A{16qMh{(3uIsajkR6(4ZjG=lu`e-8-<1iYdGIrQXo822+}L`vVX>%}((u z4>ObQx4rRtCJwq4i-~|vSY-`N*Ni{s=<)wioe>&ET!PjKRxEEkjX=u7 zj=RXHyabHC-0IQ%*P-`Ea@i)ebmZq#M6Aa9_eWRij&(ce>%aFY7AP$p#=AHBl#^#4 zYoM@^p|hm?qwUDO6~!sUrD_ZdR(&0so33fobVP0b{JMae874}|Ef#j6Z$BjsteYv_ zgr4$L>!I>~iN~hU{A>>lL;va5u$*$9@)Mr!jUovCq0jD=}x8W zrDMJl67Iy&rk?4vb(?jp_Sw^E;*Qj*Zzm*K%2WhEeF5VA6U7GV9snB$!4!v$ zoJB8yTte4P{Eu+juH3k>0AJKS)>WIp1Dn$h0fF@dO5-8cp=S^AwJmgSUC1c|7)4Zt z{2pN^``iY;PGveNCw{LQ^Yb2kgSgE2es1pEva&Vc%;k%}ua=dzj2NDX;`KKQ5LY>__jYks zmiMxY3Myl}M>=xM`{J@L%PT!}8PVsBe^+?M%4#{TY*R-^U4M1O&j%CIkt{D#>yD`g zJsdP;Id)vk@4>eur?LNsvab${YJ1y8P{BY{M5HC8B&4N9q*Fk;ySrfkML?tk1cV`^ zL15^H836%d=uwOt0^U2&_xX&npZhafP8IJDr$ zkE=Qu4movgXImqC0III{5H)*XB7G(Ir~CVyHoA=0v-dY5IlH@lgJ5l*tHtU$1n+Fm4BDyCgarf5I{oh}n zl=mn`h=(9B;BfJW554&_kH+6_L$&l?`H^7VJaX08Ce#Ou_3Fd~5N|1HJyav`Zr@G= zq@5&J6%Rlcf`ae8!XUuW)b<3twXny&*RIU2B_)^3_^bG-e3a2%tGU^LeMAY0)3)fF zS9~gP0R5N+Sr&i(k)-QCwz}%Of4U6c07*j?X|o*Ni54Zylnrs?ws-8TG+E3Ba^7AY z^}s02+Z#^7{~P$+#nX<1;a^s26P>x%5znVjC3D)%+nSUvuC`@VR=O8tr2o0(4BUSj zS@0Bt!yXgFpn2{sivX`u@CaWW5=Efw0km9TtZg3(s%~)TC0P&UvwjNMU5u14T%MXN z&Fhb}(pInbKiB=d_2IJdTc4p+|DJRu->{3Ea6W003~`&kg4+Rl9Rb|?wq zNY*xObj*hje1?W~=4&=Kwh-`LOT&icuT{R9rO%CX_}UbN)d}Fgf4^}Zo9E1Gu{q?i zfWTdMFA@g&9BP4ybdk^<$2+E_VPC$$j;&S!<`54gRbb?2+kd2quTAgw`w1}wPOzZQ zCaN!aKAi6L+D5M5LCf-TEC6$ByVSQVp!4R9>Vva72uX$7yH|2tA3s(y`^bco1$w1? zG?*LXhG#CJ`c4uag2oGYx`A~Ml%TSI!8XvbAIty0cxzz?rRq$;Z^5u!^Z%#yN^1tq ziZ!3r(4&`dIFOJEoGeRaFD3Kskz2ah)S<0aekU5pg3bR1( zV&8E01h%hr^LFQ9nK-j4nJAKcI} z-r~;&@&Cr{ye!7Y0RiyJunF`LV5ZCFF#C(8qs9s3%)P&HN50gk_1#_KdEl>af`ZEs zFqMu1(!mWn`B1Srca;8!Uk`P zQ+FgD-+-LBms&)h&kpKc>^MDU5oo%@x@W;wd8J)t)*3P&2X&~7Z0PPGlbVXk^!oaw z`8brHQsB0sr>GHv!-11*$m~M{P9?5CuGNg0o(|#ZyzHlSEZ<4vOV26RnT3F@qdW51 zL;08SSpE`?{e>fUkh#>;zIyNZ$?a{=U~*ftnC?c{dlbefDQNMgQtha<9x4!jdhw>4 zydQ!&;FNG|oO$u2`W`J-v=(C>{oRu~x!m8HExJU(#pov*#Q4%N<8ugM=h10xWQ>c& zl1Bvj!D)DUnm};)0Tt(f&o5Vx>i8Ny5-7Khes?k@*#3$}_1nMRf6UBxr%W4zT-e7u zT5Jk%aI=SEU-D~- z+6x%`#PV+@1?c1e_Du$XYg9dXp^pKeZmnB4iv%S(F`Z)32dw|`U)zyGiw@b2DR3h@$8-)+6-OCcuV<#1pvR|>g(2ThtnNH93~9LLL(c$I(? z+0InJ_qzJodKfrVK75$afY(ug&T_7_n^96;NY>HuMdj4k1*WQj0U5|+&w&Gr!pA++$jsgfWR}!VO1$b$;0ggK>6H-n%GCx4fG1Z468Z#N zbieCdP+sRMr5v4Xn(8(!B@9in!Mj)K3rYRy^O5b9gUI;@dl{5RpdZ6|nI;QhKP6)z#Dc%}4BL+8f&g%GqRx zjJmy9e~pfPT~CY2-req8h$8={a@aY1 z@~~fDb|obSdAH2}2jD1MD)-_W69(U0^ z;@<|f8+GBDoGe(+>jsMr#`g!ANlP28Hd5-}uoZ-ey1@$sM;x}8rVkeD`9d&2;gIQd zIQcQa<4c^`>`agm@kIZ`XsCF4KYyH-Q7?My*WD*5vJZ{lU*P#1+n;-x3)bs5Ah(V< ztJH5t2w?Y9xWXU}GFw08>}?dP)NfMLHFTTtr=X4=9qoG`HPe4uHbBjsB7@f5R(zOY ztTcxeo3Fyxu{bDd2e{WozPS|Cza+bVSG2Wafq|>ws?$&5u;$h(&3<-;NCMjM(2aky z6;|{E72ND9amjdL3&~*T>$6Q+9~Cj|>+vjvc$}FNno$3<9rk(FpWx;kmS5P2zL~ET z`_sVHj8yuXKm2D{b1c-0u=(`KyYesI@*g7{!qoo|8vW_9{%`7xE0iXZ<*HNu&xz2# z(;hfhW9{Mc>Z3RFUO%?H$-%WOuX4_7!3BI&d@KGrAb*>vxeNUTNdLBSe}1pJ{okP2 z-=(Vk@qY}NB3S^UrU2HcTK2ZTqRzXv<`N}KIa+IOt6D>%yW#@%bz69M8(W79WtL93 zAqs7HN&l1I&jO>rB;^`>=kD$~3k<4nik@PQS*zzi{(NU=`(T@riT#Jn;={jfAP1X1Ee_p%x2)H%@0YNtB zH*5!504gI-PS$36UccrPEIIe5Q~Iyl?qE<(iw}aUlOCfwm&I)gR!X{`QHA}j(EmN* zloSl!Qlc;Rxd8(+W#UK~bS`8z5)nRK5+uSjFJFEfx2*;-t7o+|t)&vG-2IlBUa0Y{ zy?fQx$If3DhfJJzY0`$n3UuQ-O>YpRBCWt)r&$0B5+H*auW%aH|7lvTdr&=Jn+MB! zgtz$}CO?P$XEYzV`JI`9CBv+nCZ8M2G>^pPF;BdY|GY>mrRsP%bJEaGhEdlxTCQVcKubCH8Z)Y`*yZEgfd-U&YQ>I93sk_ zhD^#e0YbFUjZLk|wQ)GC3Ga7uskl>4n9DhQA5Ai~_(a0-$8nR_Qqh<-^TECY&sJ?p z^Fxx^-o;~Wj`W28~ScQ16RF3(F_C; z`Hce3A(#CgGGy;Fy5+cT*Lotpt;*m$7-u~JRTV>#)xLvCJ9Vy`Zj)l?9ZF5&gCrPc zZst)Gn}#_qJKwm2;#--(iy{|KhDN^f{x)p#TlUXg99w-Yb<3`PU!kt9{@D2R9_{53 zVtKf*FkO&`C*pAG5)`3!6_C0lXx5aZvvY)MH{fppwV}e6n-f!{P@u5RBJMIhXD!C^4OGK^B!gz!2$h9I zu9{j#N=#KpP@JzllSmQ0^!`4$%UKDV-k{XqnzkO|OT(TBjdR0U@WAFhZ{yfYuJW^; zEVLZd2BgCTV*CIxu~*l-r;^@;6)fmfU+#{js&+b6FV&D{t$W6XxV+oL{bV-UGkV?g zq=K*IL!~3m^YMs=;FNJ2aq0-T!Hh4(#&E1ND-?VV(c8XvsxRQ{*4`chYC4B?pMUDS zy6S8rH|e1besOM3AH^x|jiKnhHX#DhQ8N-NLY~pQtkU7VSji-iy(kQ(xhH8}203n5BNBAH?vJfA3iE)oh8sgb zV<7BMtfRcuA?Oyarso@-o&3#9W3f~n*Zr91A_B)i@ZnGW`K>@Y$%K-TT&k^MRQk=I z{m76{MK27E(#Bm$Rq$(8y-M;HA#0nhILwnvdw5Y!vBTRl9}GGVkR2WK!k43Ra&ou; zC^(I>0taHF)<-9RDVhiW)|S*Dr#u71BiT;-_jG7si;lZq$B>NA5br@1-# zGlpVZ9!nt}9y9Az9w*ZwxH4iy+2Lob!A+Yshu)cka22iZ3LJJQSmBAlZcusM-aNCn z(&S0nG|%umqaW#6S+T^*%v@aMD$rH+lOu&-a{X!NA-;!!T~xc1r}6Ha{A-@;W_O|G z_3xa38itYad6oLxYp=pOUn#IUASVnTVi8J(3i^^bbS3g!BunKyee_I>R5cQKfQ-ZIv-8Mp%7H((a#f7hwV$P zkB0=E*=SY!XXyjT$*2jm+nP)Nc(pxAP3Y*W0=xX~EJPUxcdNOxKd2}NuJ5lP{0&*f zKUDT)6N*X)<0RvyMMKUf@fRT~Fa>yps3V*d6@7TQ$Yxu6OxqGcHs@Z@V%%uGkx20! z3bZ8S2Evj{qc$|=2|@Y@6|NIbS@2kf62NOaEvX~ z<2m((Kr|{h$RtEOPQ%H7l^`=`$9%wraw{tyfn@v5Bl^$WeshT0d6s%Y%v)j^GH<_3iF~Df z`lnNJLH~l4$E8F@C&U}JEiD&rJFD^&B`gS4w zJD9&MqI)A4lCyI@c!G30-BT2j-JsZ|=e+m{Mk!m$^fLdKP!C&HB%nb9gD_+G zw)*bg4gMF)xIm4JOz{*1e0YZ@=>xBzdXqK}@icbljBpD1GQFn^#1u3{rYsX8o|b$~ zQ_FADOs4K$8q;kbh4+4pKOc#S7aLwytT3bXREag_R=4v;@b|gIB^e)B`yRx8&%wc5 z9V|W7k-V_bB=e4LojMuZc~2R`jo1A;C9dU1b%fKUjF$|X(qSivbzeCI)7_ea*k^ox zH$?7VPYp#*)Wz`U{YqEM#R>0mbV2#N-nC|3#`P40!-ZIot34HApr=-;BOXMxwVo1A2c7J4Kyp*Ipj)=$x&yU;lt8%jHZnq0mkxuKD+iz+; zjpvEf50^Y#li|ON{0SUMk293=r~W2>dYUh{b{zIWbSJ;twz*n@mT zs=+>f%?UN=qgt`3oZlwnw;63H# zkNKp~LWL~h3XEdvR}N2hl)KH>u${x4qr$!|)W8{{6m585G&rcuc-`Yb1!2toeNcVu zbTdp(LSCMQx4Igzu_B|P$dds7f__UbA;}i4@oV`?xjke;)`$FL_TuC5nfVef(_wS|AWr+;;m>F3VSQ_Z7JOIx_O4T;hIb}@`OSXl36 z(VRpUD=MEHHqQ~bo3IEbB#n+7Ej!f*!khK-(Jf&kJLB9PQNM~PH}5|tZvPe>T@@P% zqM5(D79`YN9zeN8-;wcof7NhWN>$4J-PXC_-(bgI7tPE2EPxGPc&V{j#=Knfwde<; zgQ;2-*&=5c{Kc|M0#|ih!MNM^VG_mMccGWgFQ2Oxn4;bb|L$r_)ml3BSZF51_;!r& zu%4(3SYD>@E7VDcK!@u1?ERob749w#BIMTEG4GRBF}`yo1mD|U%H`%7**g@gAXiA$ zVO}7U)F(P{Yg%Of7!(j08!L~F?Ib!o$;%nERL$4ytRSoX#%_@BaxzD!ustP_Qgk$4 zh_iL*#wYxo_%Pq0KY6|2zI5cXF0934VNqKqk z?Ec;iDf~qUZyZ=H7{rs~->_@l%vMTFmYw;o9QEpsyv8bu=NBd^Mb)e9AAqs7cD-i| z9718*XO_aiSepZcd|TQGA3z5D?B|C8IyjK`cs!7^ai${$NR(GjCt$)~4E(r(>%~p| z^)tZEeE$A@GRm3?=fhL*b>NR57;>%0 z;r@W=X}>6}j=)lXcsr7oRW2d$;0Uuvqcr<`sA#c{}jgG2{gY2d>2-82h zOFO?GA*)tH9IC^OuaiSTpauNOs_GbSqQf7A z5b=6Sv=*CYY*IAEN6S=aYcSH?+Z4?^NY!msu+!*}juFqczP5e6=+8$Xv9M|@@K$|h zF5)H6mWeYVYkGkW^i!&6h@9{K?6ZWy+NJ`NwXsq>U@0+shxk)eyx{2phf_NZRbx{m zO>ms9FSHgtY}6Cd4a9lJ_P^*&JI8g1eRmKc+!YPe=-E^8pN9W!C9SJAAlno7`P|kn`io0{iW+|py z=t9Sp9g}2=Jz~Kc*A^zJ{L6i{$39$f)8C&OYtV8rcNUO7Qm;A{;7kAYQ{F}0cMCH; z_ELVS_PGQ1Gbo}ftHjruqJsxUArU&v}M5YWOHAB!EHVDdg+v}FIA4@ z;&6~hyUscOqdU^edD!kG=CK^~At1*)`e?+)zudpGu{rYIM?qK$Q2zKSCfs>GF(E^f zU$o7fJU@bKTQeaEyIO%KM@I*fRcrP`S|TM5r}#QUyn3x+@hB6agvzjaAhAoT?SSR7 zs%JkB_F5e_TIfdTa}xKObIMmzyP+4DS{1z!Urgesr=JGx)?Mtp3MUn8F@l`-z&pbX zdTs|k#TrIu=~sKAjt*hN|NrUf(|Ka-jr zHA?{*7~HanFRs<=0v-|G?nbTX5tz*|?X$e-H9Zn?r6S*%_ug8`K26Mav$12asgBkgQp) zM$gxYwu$(VR$Uu({E1|FbIp>b}O@yRCEz4WpBJ+e(ktr6scTDDaB*BWvuZm zQz>scWmaKFjVgI=Rp8-=^WYskUVYrloEDXB4Tt36?ES1k^47Q zS&5~cV;;|E()u?>mf4DTNW0HX!cD@KF&=~Z=_5P6LgB)EL*BRc8~8di_t;IDDd`07 zvx7<98+OweO+65}ce9^h<3I7vNjyaCT-T;VL$c&>aBoqzG(`Ks@-NU2>!HKbTLI2B5n9NiDO``j zrOA&191#W(Cb#ibxecD4Jk{JraANcNZa+M(@dmpUuj?6TlF+~n_K6%Zmtj*Obmg)` z`AL{~h3$C579L8C^D-bL?xoj;KtG)v#?zmu6rp<0tz$wCj4Jn)sQ!I7UWVW%i3m3D z<^CLv>tbgw{Hl_t-PQ`K%;o%~ZV^y5yAy(V^J4Ogop18{;n!Q4KG zi;44T59Rh}^Lih>AQ>ve`q|wLKrk=h+2zHq5S5>_g{4RO3M^d=FjX_2HA|sHvIFFC zdEZkNrYUx4I|&4EVD!^aP;v)!f+bE%ELTLSA`)$JM~z5Ah^R8ZHsVar{iqiIn< z&jJ=kzymc15W5u-J`kWD%h4=n1dI&8Ov3U5atL7W)e;@yg>Y(l5S<|&HPdkKAO%Bc zuho|OYzLn)DYMi5?AI+8J@{+jvADlFwJ=md(-ykA%6jzXSO5Bo6o$3t7A?OIY1X-G#`5d9ou4FNMN4 zbDJ92#v)pl1JhH>#st&|KAed}gE%tg=bjg@(VTDfAYT2ko8e18&c)P|eCCO)4+r&z z?G`psgG{!Wbn+yhHEpJwV>=a-j>&W8dP`_ZuXXN6*4`AF`n5BEvJcUUH8$2LahAj) z4vQGy#Bd!*UA2(-F*Wb$rP^k?VR8P*f$i-tPWMP$rxZ6UqN(s>gtRlp)vbL^^fi09 zr|7K*V^l!98m&;4Kzz*;*@xd~h)VCC;_0UPOuoE#xlc&&NOuXPgCN)0Kt%lfi7^X; zSh+(joKxi^2g;l=&(3Y8N{LAD{lSorg9CXoHTZ3rWH(2JF)ck!QbvKbVl5+2?QPzm zY$AK-;_eui)t6|f)%r5yghuwD@szI)hztsbg3K(E9$J|Nxh*owQk5^=04#Qxp7f0*DFo~`aHA;CEDNwk6qL6X$lwYOOU!v-B>FKG_D>| z$6(Q|TQ1t&15;xVS^R+9XAUwD!}#t^;V_s^n?kVdW+F*(jlgtk7{9O{- zYwr9qj0vf`f+6I4ff@3cm6C?$I;f$b@@6s8V!&QWxIh#46(IQwXa-7W0&ZgnIO1Bu zNMwdtl6ui$YvYI%z|jUKl)rV{G{%YmyY))qM@R1UwZ_&pKlUR{?iZT)LFl^hHBw6< z(dMfAu*}mEqa_v=9r$ZNsuZ6yspR)7h3WkI7AIa^fff8ie`PICsgUG!Nd6feb@24o zON`qmcMEySUnEQ-7;3)j3dh78+y2W1$Upbrv*NAWdqd*d>6M_Ww!Q&^Z-trXJka`4qi zz{$?nQ*i9Vrb2$UwmwbLs;YkL@xzHD78!G{SXj-cB8=>4OD_2I;-<12Juepa{Ju@~?F{U}@BLn^Nb!-h5cV#7Ikc3{olEEynR>FkL$h@+s zf1AJdZP}bg%5W_^MIHG(_rON8A1-I*=(MX|XMLloHPme7gWUv6wzxJ9VZT$(V?yyF z#LZ}pS${FF?ftj?`JTMAm)>9Chb^`578jnYjg-5w3bW{%ll5J7yK9LgWC5MTJf=H@ z9LKxUDC?23_)~t@A{dt1u7voTFGFQRGFo)CI(Urw=9sgS0GZh+k>H|mcq;IvkxDA!zfjCoaffUnPyG;MqjBdLkQvVM{MBxKb!Zb;j_b^CUS>qUYrEx^QZ z7@X7XLDZQrOglh2bH|*1`8}4dar@Uczt0jU2bpGg$WstZ!H)zEWI4*MeYY5q?7&h>AAmRO{CMw zBSRQ^R8q^Nuq;QsCTPWG*M2{oR7f)E6aSOZwP(;3 z!A#XWU~e!JG>R(v)g0YjA|0SQH(G>>5S$o4@!5%T&RCU$@i{&F;@DvQb6JmIlt;Ef z;K1Yg;w5A6-<`)qajwONjpico(UX=L84VQ>l=l%v_FLzi(GhyQe z1r^y(+0vQPOxkBySSg!;O*ea*i8LRqiBEP++O%aYD)v5x)S(Ln=>El}+hxs2Z< z=Bn#`a90SJPl(n*X+!BOYL#IAfl416c11d;sR9}Rab7HnqPv4lT}}No9WbY zgG0g$ILZOso7U%mNYGOtXL+&@tcEwSCXA~2lHfEb&@4CJh|{A&vUB}VbIV(^9NfqN z1V~`Kt_bAQp{jXeQ8Yk0EEDp$GpPmWcqo8sP82j+BLGn zZ|!!;Y+6bbCO+ppKn~^rDvfaoFjUd40#&R2V<^Z^4O#3QDRv?R>}B&p6#W{|fVU3{ zdktKh`0-9Afzv|P#|OUO(!Ni^?YKSr4A`O??G8AAAVQ5wAFOiAp$u_c6P{cze~lfH zDC41X*KgiQ@6Wukj`;+Sk8e&va-Sb^+lcJM>>6i0OYJrP-SK;Rqn1Rm+VNRsT#TzH zTPHv6>sTB9EfXn2lwE(WQg`1q>lY%GQ#x%3q0ux}LNDz?3Z65mlXB8z-!d)-mzryg zL#$_CSQP5txQ>abxZ!0PEoD!#pf1=$d{yn6sx=Eh#6$$>n=!F4w+}1p?wq))$I>~A zpCa$v-OF}}&LS1s<8eU`y7AUOq!60&%YA+?U6r70l$qznBSnG>gmm*S!p(hmjj81~ zE@Sb}WpXk}y`AF}Wi-AGxKZVClDlE}FCQ`A`gghYoReurmn^#UVWp{aqDfl9A@Noc zstxs)!FvE+g@vVn?uK~t67p*I^}blEYVwcah`GsCJkfb?&V+=bl2ZnyP!+j7CvtaswbI68?7iNaDB%m?03{Cnxm;- zZLOkjcC2(0NzZtF{L<4I-RP$_^+MXQ^*o&oLeHom_OxUF677~n<1WjOdFMt%;U{H3lJfHxWkfDo*n|3E^J zPIY1bl!li&B}@SE%GE=_n=Alf=E8NXV`FQ3zX3Rzf=v}nl6ZLHgz)xfVYRFFW`jSx zIWGAzv9JmhNa8Y@H1Y;D`J;znTHUKD!-}=8HY$t+z;BHvnOAh5mgObn?B+4VdDtg7uj@wV;{G_I=VuR zL=^JO8(O*D(BF^q$H57w)!SLK=t&cYz$Z4AfTcfx_CYhawcCI7?jj--f^BtKI!!#< z2l4B{LR@NJKz=P$6zSdTa<0n8bW;my)Q|dN;=Y6wu692Dynp)f=(hL4hlQ+Ni~>9Z z5zUCW1zgY4b#6N3Lq-~vZ;J6w+gypF%uD(^g`sOC*BD)jJ+1>=u^u_Sz}3Ua@%g~c zUt1*w^V3LC@jkl&)NY{Nh!eG`E;%=5M35^yQ_Q0j7}@#t9f2n>RYv6l?_yOo z(OB#?JMF)W&tCS{lRz&Gv$KPzC>>19$hkDNJMB1$tD~G_zFYmzuF6z-Z@aPkuDcgW?rqXf2zXkGpO}}72lWM^Ev$$xoyU2 z4(Wk3G*yQQsf|iX$^c;Y{rE8u7=Mdx>I&LS)dEj@Uii7~H85HL>$Sg9!7c)YrTrAR z^@89~Sunxvu`CC!8xFu~xu9ki@=UB8kXdb7EdXf5c05r(6!>u=1n z9pu9@pKv=vA)i@N#u(P)3_rhDBq;M$Dk3|@|F-f7OvKa8I|N3=4 z7N;IFVXaFcBYQxYj^59@nMqcB>`|>!m#&%1f_$J*$>0}*=2ptKp&(XKarHktytCL} z>O>b2YZ7IAqA-O#4vEu|*W#s2uD0V8N6+&7fLdH69$H*<$P&{WrN%lcO7VTc&5>Kl z$q*P&A~#Jj54{*{pu4Zg=CxNUz+peZ9CD1Hf3{NMj;VfCjJKy*wffLjSkmy7e$H8b zF}4IyU>>1cKog-wsVC5Nz>%EvfR~q+Lf2o{sI`4?S?+?<(=@m#+jnxol3z)a zRbl9@)UeexHFQ>ntZDGI6uVLz$;XzGdUf6bejFSBJ1g#6QtB>vD{m;a$wyPL&I2R>&WlzSMpM8wMOp7GaWnkgAqv(>_S5pIkYtu0}*9CQ8?GCsm_id{;eudTTg7+4@xH4-5|rPvcu1 zQA9vBmS&XBCS!_rqo&oGx|7e-_`xGDf7fH@dQqOI$2gq~esHe$9PNi0WEXqdCRycq zi9jZY#mn6#dHIbtHG%qU??J{zJlnF&RVU*~WBF*&N#e*i5;xL^p^CXG+QMj3b=-kxlM}n1smm1Yd#pB zC(7#3ZKv=}!L&xL96KVgb5(QaarqkrNqFoQ#>%bA?8Z;B+?HdWjm9%H3q^O_ygOAI!7`uQ5#!&))hoTfPrgyy-A(Quf@C55unRfRR~~ zv(o2qp9fXqIA}>lP4$Fr7N4=kW6o46d5DRb&gc9yKJ@WZso3my1vJ5o1)gs+{TIrG z;zvCjUt3G4ea2x1z0uV4YOc1OBIt#A;ua*ODsm?TlE!zh--}53N2!?Mm##NTi7YGJ zG5qLG%d#QJ8gt0I-z5ua(y{M828>Z)s#jI>xtP1&(-AFoNec!`~o%S2u&3WS>#TF2kd)oXq*ThS~Ezf)L6Q%DRhRP>&UA;=JQ&An>+9n z$({b%BZf&!0_%CF$>iP3m@aLFm#e>->z6KWWWK@vGp740vba_I$;)28xSL~_=67iw zqV%h8I$GWV~a{`2Dh`~Uw+|NaHQnO%;*bn2zDZ9o zHLtd}|NU!5@AY4#^*?{ipTD2Ue4_mRhwyV(QmV!~PZT_#+@?L~-jn0W)fXSpcW(+l z4PXC{Z<`=a!}`00`tko5t{MC@(9%sCJPDqw`H#C<`DQkE&Jf4^|GXCPF! zqAQfTQxFbbQRMywK1^FcLAH8_JH;d;`{4ruNC=f<_tzB;aGrgQ2&o-RRoAI=4t9v9 z?JtJ#7k+g+nBax$90_(Na2`l|R8pB$8pd0!tmKauMT$y<-v^oT08!nq&E+tCsFx~W zkeYgLDP`-}xUsSEQ&{k}`LP&UyN{Gq!1)tCor1$fn|Hs) zK9`a@+g{QGc$-LiuY0+*ot^LoOJRm-#|4m3F`J<-u*z&E+gHkNpxwthG%{;!7~eUQ zIR7$cFSc*PW74e&jn0-Q7@xWf`?}adWVhH!vi-GUoXs?If*0nFemT8S+~3PJzFww# z+c4>TK_J^j&GQ10w5t1)ge~sCqcWq8EG2tkGJO2bMB=_zUbqo#me*l*z-BOz-pYfi zL$`K91M1WYn*?1;YxVAu%|)IHTLbN7z#+BS0>Q}TDTVc9MG}ZerlfxXNUYB8mV`qh-1~GWPy=t77Uqcnjt}6pA)a|Nx9V-PMBkpM;3Uv#J`^_%9Er<}8$AlIZE99+;WnIrY8`fA6hTwMRt5<)vBV?K#tei?U zLrG}t?-EzGGg#D5l-pfi8>_IC0+YRh6rwQNzzA~W9acm;c;}%0W*27C6G>@JnLP^# z8Cn6PL4V<+`!SjY?#YcbQc9%R{QZx=v6-=K;+}rnhx)j9>8gFUWt|cj8XaG%hcK-l z>+@)qn|Hyn5d2B7Ek2fY8|C1}?dlTCPR${^i8tLFlU0SHr%Io?_yq*9tD;HF))qRW zzkO|S%7dw`Rg)RH2KAwF?Z~k|v-A&E4^MGv&A7KqZ^X=Z4SI-5+1j<*^l7!GK|QAG ziRvIW>r#;Vm{YA8)A??#RyMmb%G<3@{NYrWc7Vy=<~_2OPrXLG9w(fq_s+WF^nB-v>beBI*nDTG^ikVYNMZ}I#S`o(dO%Gi=#!Rq#tXd&T6lisJ?0Ee)TuH@BxTb8TJAreJl)h^ zaIW%wURxXB($)J9nR8ObuV0$muWR1Kc<^Ax6~P1T)RvQ{rR5ttd6d*z=R8_gzOFGFCHW9lOP3+1_{hxxvLBd_@B7b_XB%l#SK6uc)aeu#z9QZWOe|?lj zMI8ch+ZkYBq@}rk|Gs{uwpkZV^1|<8BiSc{?FH_M3<(@&F0MhPb)N10d|hX#xH9vh zuMe66F<*e8C#^4=|M4ypZk;^Y7{_r4(dP{n*9ztsnifo=_;_|pDQ+uHYDYxC zm!u>>$l_lauO-a$)sQK~t{qS6dY2pn!x&kShv{j2%IpVRs ztNa1YVHf?V&x9aP4OG}{yVmL62d(iydgAW&zW>^<-a(z(KRR%m+DQa>s0#~_}G`d%G^V*Gg)lk(oGXVa(1D(2;ooZZ4y>}gvdKS zJ=hSt1oW9D*ks2yj$w<7uks+yg3QBY8Y9kxA-MY;eBxN{)&1+656FIAopy@i3-gO5 z9%wsoqyz)L7>DgZHaLikX6JLSHqT!S%JJMz+;ST>za%K?B>D!8g~)k7?=G=UFD*%! zo8zPl9e-<&VZ_|}<#cY*o6kVPZ6h81VQ> ztv6)%lu97+>j39irA&|QTd3@9_sBJVW zzVDtVnU5XmLd8=!&^$J29{pL|VOD0_G}#$jYku$RgpX{{MR{N|5C=X`O~Pm2)w@Us ztBal?V@o~I+>5d)mgj5B#dCE+M>?vkm+wI*ob6GbZB-}~O2Yf$WPcSCjIgDq+|6*% zYM1jR;TYp?8?*B({Q{S39L?>qG{IM-DcgA*wAmqQIMVJh6Ac$v7-;PQgF=o3g{%pK zy3bUbku6>>1K&C*FLI+|o>5%wVR!P}Tx7bW0wS0P*)PqQ+e}(&X}DZ9A1UhiuMbEZtO1ULky( z(UU=?OI>d>cfy881h%z(6SyoIV^I~Oouzr3A&j2+wF!wG-)^Ni%rCPwHZ`@ib(*xy zcUP5KzKaNV5<|L`Lu&Yj7~d6z-+4K{^!3+#*Rg0DT5-)MZnp0!#PG+ZOO*0m0kfxG+8fCzp@XlwPAE~ zEX;tU8J{{VO)T49Xcthsb&1%c$M1KAWNIhOSS{+ zT)N?+sx;M};=9<^N-ml{_$4YqTGn}c*p;BmV5aM>A6PQm-SHfs=iIo3Uj^(`6h3qk zYuk%5UX)gP_%<@K+4dp*35j3ck@kYjiXG;Mr|xn8@_Mc+C^MAL5DfJb9NM{hg)Eb` zuBLrey+d@GCPiZTZNh^O~R^)KR_aDa8{(Q@%!&p8`ipY0og0Fk~!$hvb>dcM-1 zhaL0$T_aEi5E9o6T633Lb?Yv6CQG@iKhg6!XI3`rFa$pUV|gm`PGS8S7?}wHJ`wwJ zSmm+F`OO-)m9@8m#u}yrRl&~dXBe#YJt-jSN7tkO;ND3bcHK`=LC+$C{zBOluAoSx z*8W^0?3xKw+d@wl_NCC9T5cO_A*aU**0Og2MjrRTsO4?CCs)N;MHqNQch@IIxp^6X z$EWiVy85<;d$U>HTVmDDP!f|_8t=9m?#+59gXW%QDm(y@aK^@Hgb5Q`SBmVK{H0T1 zQtjc$I)=QIT(kZQK#54)FWqs7Md;Q1FZSL$s>!rlAI3J04aR~}1V#Z-sz|S~(u*Ly zMMZk=Jz>UzQkC9O0YR$tPE-U$I!G^}NDTogp@k&B9mP5C?|kn$>#T3BZ+-uqEZ2-6 zB+v8Q``-K7*WUNOoCgLO@{Nlw_^jiLn2~>t_^e9|M;q1826Hs|uFWYhjkwS`)T|!{ z<8Mh((YmP158wf+(`_bSRI5AjYBZh2`Qv}I!M4mzG=YmQ70?*@Q zGD@v4ghSC++WAxs>q480I^wZO{!EC+ll~kjU9yDBZ_u~*(3@cNP2oOxa&Rf#fiss> zsR0q7+waGYy)r8@&mo)f!E+8L_`qP+^`xrA>4AZbTlAw2z%ov%%bz@>8wwLBDD zva|0i8C=nN#MM~waf)X^)|ka6>S8r)%)yz+2H2Q&J++#`uGzHOzj^^+lagm2ZhT)} zJ_j;^eFq2Z54MH6jf&H8OY}(w4vntAT6gBMJg%uncF}eO8!C9_%{Skd-@NNao!)o5Xmy&MPX894>nKhC zC1yiuQS}0JwDz@Iujgn z^+S2sVY@pO5k8#wlbl0gRdzQ$;Ac>`UTBfNhd<`1+3XbQjYqiG=wlowA}j(TJv&; zN>?F9K}WDFIbn*(7G2;>U|LyOo$arcSvrPl-u_fmgrx~=h>3}rxq^JTAhze)AJw>Z zAE!suOmRniA{~^PdAaW~RQ$IjAH}?5eAnG7e z&o~bFvCdPsA`O11^un_3`9fQ(V4bQWZZ7o{^N}YZ)aHZFpMR%ilbCmsX-h(=PYm2l z_L*0t0lMCE;YDrzmuEa@nuXM5+deu>3lfSB!RWW|iyil*x7S?G%D4}YuSDF@d=?n& z$djLIAe`Y7(^f6=ZC~wDaIn%xxZYD;V8}i{hNk})z#IqE!sc1ulx-#S!-1$=M)QrD(vWUhIV|DCdygoJ4d`dd?25I^GXOh)?!1{y=NBf&@D`# z?7N^rv$L}^Og{dn0D{f`-MfjpHTw09adw#1r18qvtA5pXu=+8h%k&k~Hlt`>V>NfR zQUX?Jx@8I?H>;}Q*SFo2=boaS+jSl>2Nx~y+adLmN=8!y-2_KyMy6$z{FIP?v{3dj zWZw2HgE?u$)GmDYK!MJ)AJ{X{RMV4uyD-1}BvLW*n{?);5l(Jc$=O(33^x4ivENY8 ztPVYpP{odopS|mmx^gxz$`C`Soxs z3BX_C^p|d%!<+@Dr+CZCCPnqd&2_z8-4?YzYUH%7OmXw;m%4^St8LnOmi01kw43{) zBiF`QrQd5yqEk^wrZ75h!ei#_H*Syg&L_9MZ7vTY991gu!FgXDluG70 zbA#%r8v>cJk5~Pc0J7L0i85$Q_45|8-Bf^Ow zXvZBD|6=k&zb{HRX%Jdc^q8Fm1?vO&4p)&`{B^SVdgoGmtUHY{FUz>9J9bbg6hp`b z4#n%aI;-Q~)g^ou&#E$uT>#$`(W#*0UfM`dP*i0X=J-0TwjzE8@tnR^Xt1yGpjN0$ zj^?%T3z=QjvOBj0qInJFhI(_~YUIXudq%=Z1EGiv)tUMknCCXb|M$bc%QLR+Q}%l> zfUlNJEe;LcfAGM*XpiF4)v1zPTnGJM;;?%Z3WL8WE`C@p)#_q^U2*95-`|m+9{A&o zE-fT!=H_n*<+Knv&FVb&fN$s47`!HJha)oB!kx@852d z^`FhW$mMeCj29^&0)#^|yteI#w1dUgE$!j z%W#RZQ180|fE*eE@%CJ!{cKV8kPgwwFWuqIg6w-NQzc#Sk_`IdK)}c|b*m)&r6#Z2 za|NH1%A>wjI@)tb3bZ$}RX2v4@i1%*%zVw$j_5OCQXqM|)wt)llpek6B)xud#9|?r zOMPQ}NM!nBwUaCjv2l+>yJ|)6{)=x!mHo6$^`~LFzO>F!Rb7@hF3N~T zmWbZeZ#LDqH8iC(lz7+6SrUOrAxai|=vnAa9XMc+{I}mbn=`l&o-hKwIj3S(+-v^S z#_-5{ds=9IkI*oM_L2vKY0rA^_0X5av9zFtwDPo?wl@L~9fGFBOR@=9TIZxgC+>s` z++P2|*8Fo8&@;2~;o59(R1J~toqL@e91Z9j)&^={$}J7rmG>VQDYr39FSPp|dSCuj zJaOkjD8#*T%TBTBDyz;j9?M;vrTG4)h7?_cl)Gw5Y8R?H!j3HV)>#U_h)N1E$!~0L zUwL?c{M?7bMv6-JNpFz)7^jm10}`(D{eLM=HkL0WhJ=K0^6?ovN#C{F?P=jjXrzJG`LwpHtws&if%?(9GlOSl8zmbn|vT%}X z2$3p@vX=_IdGis7X|~M?L0hf~%e$je3zL&KqOLDJQc}mr*#@t+dCMy?nX0&+0-dx6 zGvD-PF!70h%T3y>PDY==iP}?ZbbuS{o#@H0N zEd-XCYEqqR3VYjn#$HYdy682%{q3@EkVTmPBZgFQ=2u=N|}udfS%MQgcgMK;@%zFziZ-^g=Le}K@5m(op~ zT6Z75aHMg8J1m?-O4@^aidRQ|%b8x+_wixZS~CCq>!d#xjktyvaAmKDn`FctQarn& zZ(oOu`_odJ#OFAwDTid_{N$wgiWI7htP)Q0=7-Mp*BTdSR2=Rb(Ds6`!11Z^&ZdGv zF}BbWtarM*V6wt>?g~|AWvj<$Uo5 zoXW6-#G4(#+R)yNe(91Cnln`XY_W#TyuFsxI|l@jg0zR}=iIJV42)gE2F65B%#)hmUsxm=d=kYSTtY8OKf!XPrghodho`iV=+Q1&pV4SUm6p%{z~= zQoNMEh>|OHEA+Fj-q4^q9?oVt^14DbmDvC6B<%Gfk1x@##5}En6WU9eANy9%&_t`% z1L>J&hwJSRF>1o&uMwPygm(Y4ynQaH4WSU>Sb{Hw$njN7%*v!Y7mS=3PV9h<4aV&A{uPng{v6&RV2J_F?(j`4A!}jD?Kk2l*ncDeM z^KD{75oH6RS2`hI8z%)KVlnP^2CKvr^vZHZYOLUtOVth0f$Sm~NaCmCW{}^KtC;(% zbrryydl#S)qo}2QrAx!mQ|`Ld6$gduJ>@ykK!x9l2elf71)s;20CYY{hOcVgjK z&9zCKvXPO*g%D;_(&*jG?||z!Vf|i>i@F;S&XA;$tsTs;ey&c=_sg4B!wwu_Sigk3 zZj^1pwRY@4PnsD)v~YG{C6pTIG&*S%J-~|nC;jzdA2=?Gx|*%q2TXiNtO(7@aC0r zzAUuagoA?MT2n-(y9Je7dQ9CerbBH>^}Pi`qoGYbwG;Nfq2+@qN?B=*=igyu#GBnZ zhv6vc<8y4Nn?O=R6G>Xy8^Le6Vo)j1)bcDI4QyDOt@As}Uzg;0wx@5zMQ@LKSbCXh z3yseP+n<0nl1HlcET+%}_NMCO#=CD6g@2k2&IZ&&tSNI|;?-4JH%+f_sn?(uB?>KD z@>XUShTpCox-MV9ig4<(s+E>et8z)Y&D7^`V7Rp=d;J-g-677%>2~d%w)KM^`Dg6i znLlbso7GVHBTMy8cZqU)m%TfM_A-Z-=>w0wa>#GX`{J|xtZ|D=@I|s88%%N8A2IML zQ(gA_<;(B%yr82!N56{Je>6fo{XjGt^L}b2vW3z;pAsibX!xTwsggiF&BTX8 z7bM)U=x#1-fr3Y_x|ziT^Nu%W%A=@B5!~8i^uF6CQi>p^pnt@aEUbq^1hc4(U-l%m zR{4nX2mxcO-APYGQ#g|)+gN&RyzCe&D!kI(q#tLYHxsg5%MC~2nJOy4Se|;|nglHf zmzhGqcgFR{#@@dn74=n$%~v77-EMCCJxMeLyXYmtqzcgIyy%67do!MjxL8*a#-f3n z5Ku)^^7X4i@lqZV3k&Y@il)`^iNPr;DScQw95;WBNqbZp2b^M(_s3vbJ(PM?{{rXS z04O6?#4d1TscL<`&Y#@g`F*kKtK z4ceUppqdWl_sAA#cGfE~KL(^yS1#=o(iBQGYl~ZBgpIw27B}5+4zQ=OYU?{wdhjK^3nS*8X+4QWjfX32XL$5VR zRCcA|MB&mEyBsSF_-t zxDxH&lI}OkZ$q@XXJ&^j$tdpC)o;F~ACz&~V(6}ZXw_wh(q;-@;9LhvMpYAkcG~iH9Ym=ww9+}tZwr4llIg~0Z5TL zxG|J*mMGP~VwV?3O>opQAx)UROK*tU&N^w@wI9zqJ`w5gzGE?RNO*2Y&p8eBf7;$sX8IIsu#y!R=U<8!^reI27)Nx3Lu zbs|!LPF-4Kr7+TSh6RRhJCP21R+1y5mo7QXP|SJoK#^7qA`TB{kub$=4rRFI)~N7B zBp3Uj>q#Zgh)BlDBZ_+i=G6HxiVE}AMRnP1IvV-4A8|xTKSs(IH679~8ecO)G%H-V2 z8G$EgGRTa4m#z(WF7cCC8MKLJQ=znvk{WMmPm|ct?A#N%zyBS+cRc?v`ekX{mn7fT zN?nz;--w*}n59yik@O**#TvVTB(b4IV>?`)8UHQo0l8?qDtxZJt3^oO){(AHnFmxu zg!jGzBe7?ACarmRP2BxfX3aEUf;Es5<$Gs9jPt7}-2+TlYL z)rZE`hh=rDm!A|nrwe<#P8OTgv~c?O(h$_jrMt>p`mf! zMp>ve(x(VCB?b}F1r*8db~fQAkaiu7wbl}B}Z}dq?1WW z83}%*^1!7D5>_K;-2twcUv$u8&Zbd?v^a1779Bf6PEYSbkRp~X`LapfE##6DDG%(3 z@iMZNU0t};WbiSiE1!slw;nt7JYtmzwj0hzcH7ye`z6mJ7o$B0+r`^k@4qY;nJ5pn zpCsnX5Mv(Xk<77_p^wItwNJRA5n@cwhJ#1TSvF%oluP5t%)xBJD(R&iE+(f;-5HA> z#H}jIc#v@Yf~m+7!YU@J9W&a2rlC=Fq`ON)Bbjg`AWb7Rtz7()tV!V0=V~dUUk)D* z7I?6M74RdKEK6U}^d=D+rczLY;;)3}r{7J<`54hJ z4IvSz9n-l=TyWvrJ4x~kD)AgOt7?-JIvEbq&P*{DPOSyUKKwf^BiY(O)+*fT*<>t( zRSN^Vq6rq|(VQ?KyV&bsPSm%;{(Yc9@J3u2GHiw*pg-j5rkFvvgB3_+G!L;Si~R=YO7Tf-rl08 zmoq|d-wNrqSW=@^)}Rg-NOFf%D0{2tmEDIH=X znbhE&j~Ufl4?qa3=_yT@hV>AthA=vlE`6Ahf^G7j6(~PFKeUZQ-h^RUTmJ>Ikq0d< zOEnF$DvE+-Fg#Kpk2L?W?v<2WRz&Mrq)|a3X#+2m_rz)C!Q_>S3c2%j3m1(%@E#l6 z#m+lmiiSmK^%T(wzlDMZtHn`F&KgpvQB1e7(K}BTqi=__nRlSY(ix!QOT%Q92gB$%eMEFyl-}P-8Dr;x>P++0neKM`2q$`kKQ|^h= zg!Pii$bFCwn~o_O3TfV+ znqkIgW()BLnVEYD4KbQ`xp5|)%KF+ zb*KB5l>GK?ZaU3%tEX>Z9nPj}q|mJ2q*&p|E@jnq_jbg`|2i0aQqiWDf$JuI{SBd- zm{;uQGgR6QtMz$gTb^V&Hd>Qjh85*cC$)-o6`D0DzT#9KEKQy+ACk=rr86(bAs5$E z;>ZO)9;RM)xobmM?+da4br(~l`pQ+Nh8)Du*Q;|cs9ff6M!#LKoqS|gBT#GULovaa zY`0T{P0(0h(_x|QHwlR|PCv~=9d$q0M1WtkblD<>kz{nG%r7Sg3*Lb#rHYIX?b$fk zM8MU7pY?H}M;a#;k0-})kv7m`DD!uNve~2|aCjyuIvbdt$&9MWASs0h z5sds$MT4?MgtA9V2lFs&*!6Yiu-EgUa^{)6gWThOsu}+juNoMkOI!EpgFdK|*TtAY0zh6fjy7!qJ+( zSZlaFgBDFjdb5pw)w*=K!xIIz-*i-+b9}g>YY2%UZ!0&b#{Nlv+N9PkVFabaB6VTJ zDPV0ryFs9nlU=Y1RTCLunt0YxJZ;E6BH5=+2v@x*c}b{y*3e`4UQTwV!perj!oK8r zgw8rrxtUFt*N+e-cwOlVDji{^x$3slPAMj>Ou4zW#oVhB|KMA9Oz!VLFE#ZCB#4!f zSDxtTNyx_Hk$KdmUPea!JVH_MEjPupl|VPYq0zgZmlS@9~?bqj*TEZ{!BweJJND=QwbXxxjbx)pF`V zFzqkfBY33xCocV=+GJ0(B@UM=#30_WDRtLhurgbA*=0Jo0FzU=z4du_iP1i+2!R0N zl9-$VWi3N)!i-=&8K7q#XN4Kqpp>44HBuK^j*4b#j7e~aCgU(n83|tKk}mWJ3wD@= zO<;O{H6dy}?kzTphd{jMlaIBfSmn2KEpF?ohCC=@)Ma^|W|JqFNAn<>OO|5bRwM($)W+9KbICrz$nb=kzV?T7N@z-zlQYM~&*i%9^_V~M#BBAN z-Hd#(B`8v`0POFmQB>2`O{a}Tybq#MIS$s+sQk|nw-~3_<84yR8Emq)!w&PE@<}sc z!D}tvIC+|-{(HgJSrmGbNT2ZN%$Jj-#(A=h)mE40Wh%8ZZWyn>y6!Qt<+MU!L2ei$VcJ)L@*>in0pbAjhonh#4=(|4 zG1K_E&RB7~kge4aWGsT1v%2zo?2GZdH7uJMi>l|N!yH+ zm}n;UZyb|eq*~D;znnF74`ScRW&O_h$>;i-|KMkXpHQ@kL#j7T%?K zf)!!)uZ+-?MZFPpUMadgS*~k3L@ZdjymWeHQU{5^`|XG^Bb^JKh{3R*v1HZl)s&Dq z^HF5(8q%Xt%%ia}sA7A^Jl)THe#tV4K>TZfP*&oXJBP5F%Ay8R0%Dz)$0m4bVkR87 zl##rRn!-a2&AUCkkAcR&4bDw8H0Osh+}lxF?}|7CD)pG@n$4SW4}A4kR)y65P;7|F zh7m|?bwdu!g8lfeEbWFfU%fj!yGrdER@vo)eg5$3g3Y9EX+9k*|hD=H~v=@|%>BS=ETNo#0^^-f&gM=SEbvjgPlP@B-$6<6hmS$ax zVkH2dzrjTuSqg3zp83WQHhq+3a4m+M{LZ(c*d@aWo98hqi0VX-`OoxqpAc#ch}Hu@ zLTr>;QkZFHZVVH{M!r#X;dhuLO!C)eUQBI;i`YTl?(pQT)bg!fOJ$3*(XHV(4pv^{ zGvH87u@C-^TCRVyF?k?miI30&pYDN$rV2fF#{*(r3=Zac2v zCIDM4VzB*k%%Ti2&7#foxD1g-O6g^0WSM;fw$DVU7voz2@4TVQL+Tq@ZXH?f3i5Xd|Z{YMcWIUYi_ewS&~WZ**e&K0qSxEQ+@aWsg4(!=WFDxoq?b zibcvi*p<-JRqoUk%-FUX%xtl<7q#g)sr#Whnz6eM28$mFiQoW(ZmL#}5i#w*%&u{o z-i#w2wH(w?D9@vJ%Xz`u-8J?b5sz7xn>?~O-_ymD&(JQQcG>6tRx~w@X|>1r z)0hr?)tXHE6wWz4=|i-eB;R>>kQwuYj@G?)_&XcrB7VR$c$-mnxc^{G^0#*r0KbU_ ziVg1bgC&A{7T%I41A9m*aaM_AhYnxap8EQ>HRY;|d&?#P!!AvcyF1RGKVnSGa^@$t5jq>u#LC~&XX%Py2z2W6GMyJG1{XYC=Y9!kXnz`)~-Z}T;^wA z2lQoR74nENQs&!@O%V-K+zC=By^wbMn8QaxOf9Zn?Kaz35>soVKP5~q|1bnT<-@}U z@{vQ!wj;^2Yyq^4h(b$cTb;fod+KB7RzJ&@TPtfcca(%ZoK0$OS;?bfO3IWts+6m0 zMB!caN}#ZdIWDHGk_35X&UZ*Vdx$Yyq3w&s8_S(vW&+ysQW0#(P(qmnMX;Me?Y;{Lj01={L|qKbszC_bsrilENt)zpu#%}8EY2U@o0O$ zrIOMRZgWmOgkx#!Lj&g^{}c`{Pjv3X_zncHF>Q1NhKTUDZe=4CiR*4E=YJoab3jLw z+DD;?L(tk$zB5a)6rjSA1Hq~Z!ECR$Cg^DPxY8m>Z<1gR4 zUMCz-_X>nNttLJ_J4{Cx!vQMEbE7^>Q5+x*Xnd?4hsR=&FkooDJ0onddR2|EI5PCs z7xbJYeK+4dIm-AtT{))ICEFl6DJd897)QKuxyQOetu~#2>&q@L*L!%7t{0R_dvXoS zXoKL{{CG*8lmqxTcFWn>IWjzaLYNP(2hQRaB3&u{t26B&j-U*u^5Z4lvbuMM${hOo z3rwRD60$*A)A%Hp+Sh?1i==6)pF|_XeLFV-^OvX&5dq9EV<; z?PLOlzV70TWN#&x=@t#>)Dg8G&YYjO&NZs&vPy<=!nfq*%hcIb9L3MVq0eN^J9iQy zLF7@DdbLo0pk8gNd>FToP|?2E`+t6}e_H_92|RpmZl!{Pg2DmYW8Jo*2_R>!%7-*I z&gC;KwS>VP;c1H_RW{peb7};yhaggG1!C`&F#SG0GK5JaYuxMZ-Iv=NOZo5|jM~2M z-qOqN^HlaLMo~q@R-wQ5>sL!qxPEi;J!p?-<>qQ@X+@H^`!L)H1QMp-OS;ZJm%ndV zVjTq&rTP|9Gc!MHvB-y>?F1ccLW@v)d6F3XAjom`;LEV~BCfliQGVVPKMQ~NFM?ty zCl^=qxZ_1BDMKikff@b%)+{?ao0E~kbVVy-~_%S$Bv0; zQ{~`aA2@P469zi-&tzw2-d0iy?CR=Dlsm9*_frfSs3u&kVq}yE+F`{$8}||lw-Qh;7e?)P^1;{a1-l>oKFXiEu{;^!)EXxT$7l1R zCi5aI&|rNN6Y})w?c2AXg9g0t{ZGFuc7gokDG+me(J$&Sn!~9aEo@k7-Wq>e{$O8r zR+c6Vt4#J?QB>5lvU&>==y`CvmY|+Q-9FYfHXP=!Y%XCZDh&=F;`l=#UyZR-eZsGoe;9Q^{&~s%{d3=flS=VhkL#v-%o3Zv zdGJcdy1#t+B5c}lv^7@nhC}U^d4#uGvh-VZ*{z$nzI+FFnbqmm?qEitwa@Hfo~3&( z_>s3}3@TmUn)u*FSY*75>$gBz8>Bd&G&3;nex9p62i)-Y^AZvg&>H9z9}O)J(Cr7- z{lp5UF6ip$M8lR5cAMAA*y`@?rayi9O=RRLkPq`~&70|zXVdoetp>|n*pe$>{&+;d z5=QfEgUU5^GZ_2eOSV^<1rl07(EO#LeZ?(0nY9kHuz%b#4>beyd;#L2xUmlpxp{eQ zV@p9NG=xP;2jqHGr`h0=e>=)BCd_x~5+qjX-gD_O21e>CD#5Q)Q)SM0@>Fk51bHu3 z%&ZZy6q9V7TurzbogBS!;jp^#j%u&f$oTldbAo$+-s)A0TYDL3Xar8s)2H|M>kZhJ zKCu!_P=(o$xmoPIyg4!-SD%7rdL~sFkm9j4Y7F9cj`0=)g=S0=&T01L<4>AdSbse= zYhx)%`1=^Ipr8i437zzJe`{;&VzcJcaGh?hdf{mxJ#PiFs;$wv`bBc!2kmj2b!_C9 zT*;o-!TV`Y_v=}S*TKQTPgdCFLoGo^dV8U$eGOuv7q9#7$CRIVPBrmn7@5f}A)!wj zaARO#0D4Kjg5I^HB(GxEx26a~m6az?p2X)t6Q4TOix!wRe$Cbu$RIK!Rz9;Eyu16? z_B!51!sORmb~zpbtNiyaPinM~g*ITp-KU67s4WGN4=XK0TquGdTLx4a8xMpM1aY99 zLoo%l{T56~;{Vg!r=Kw1{^XG(Of$OUgY+Y4^XV7fgnbchdhqA{`1AbbVWfV7hDJP2)GpqA zariAr$?}Mcmuokho0(nW;J8UAyZKT{N$C{$tMc-4_rPK|=wgqIi+f9TB;UtAFo`?9 zZNN7`Z_oWnG9DmqY6R#S_v8+WWQf&o~d=gZeP za2lXXyrAh5S{BJP2oBco{rB*-frzhBYm(H)6}jDO{VzOOu8Ax=`wc1gg^tKecLvW% zZdAJCvtHc@s8k{UdZY$n)X?O59YmrHip*6LV${{uH|_!_`t#-RfqJpOUf+*X-N&!j zdwHt8{_|q{AoIhwczJojf_8z?GQEN20BoiPctXq5vm6v5a#Uai@;LydE?Rr?D`u!A zb%FYywvkcJhldAkK-7A^H#ec%8?;z6>H_F&K0iMP9&OIdGOj3sOWnFJ&lnaC7X9x# zadDgNi4v*1u98XAHkJyx0No(lF6>^CzwQsj`6f%g5kPxMR@MZbC6knU4tXU8V@WjwA@de9ha0zHEWthQ;QSmo#jUQGiNeD!(t9#>1`Oa)P&sq z+)d<9*RP?W5f~B@)oN}NYhE-E85?U^399vSBGPn6k7`lKB)5rAhr$}HZUL@?OEcj0 zpvR2Z_rZYPwav+}?CNSG2>y6hf!%j?z3}5*L4f$#u|s3=@l19Dh3VRd#)1GH+W?@d zB&`q0oEQS}3}O4>a&xvAlP?1&`9m<7nF^327}R)|$c3JLYm-+Y2Lbln@n3waX3FdsrBtUhK%*^pyo157oj^{tRM9jf*!isset zvK;^|94LpbU`objy0`tV`4B{V;#6nF$t8d@G{~E+s!O>-apY3jjUDo>V zQQ1^Ec)zIlc0K0<~E*T(!nm;ynx4Zejgl>2SJKTeR<0r)xrzeor zo&^ATPaQx0>gm(pq0`lC?MmP7Cw0UL{)i%{0DBA$8kDV0O;2-6N*Vx02oLu8CV7^& ztQm|e``x<^=R~^~iHXDagUu%}+w<9gQ;~BC4z(H}ZYOg0z5n{}CwJddl9QSeb{ql= z;s$ULbegXV(i6iPUpcua?hE~|F5P)ke8V-SvE$VZ#b2KBJtx&Zu5z7XV#s>y?$otr~mz)xCItEU>49 zohEKR4hT>V2RG!pRI{NJzF3LNdomMV?6G8mAFjC57|L=7LKm5Uq-ev0Z8cmRB!skV zvN`8&Jh7bXN{@ME)X(1-7sVpcfDZ?2{i?vQ-ZBcA&t_%kRVI$_ z-jILa{hvUTi%TB%mRdJ8Q-G~kH8T3kPih(%#KRo#d0`42>ia&@oI1J-LU`^>vX7c+ zL$C&zRQC@j3Hpg8UaQmVHY!KJO8*5zs(zt^CM{o@ABiU!u+@1x6n^G43F)KJg;Nm?P<8FoN%h&!FW9;H_ZJQ2HtaVmO`{dMpgfKWJU zJ>s`9GFWPtz`=ekF+BV=M0y<%d5o4NZ<^k^b!*N7L5!2{rQ6-9Ni5W??)w!onXIIs z@VwPLqSd?`Fv5@#WRod*7_v7+<97A}yV2?jV}U&~j$>B|3j=rHJlL^rsNrmWzn3KI zX8`oev2N-Q1LxAx<-L1X?{$BIV+gpJ7&0M$dm>2SMAoRV4ZH~p{tdHIu-`k*t}FJE2-4;}dQ>2r8g5HT5lrU5YXwrliN=>UWRLzP8H5qiudb}!cG?f;JvAqIFS9mu+< zU?yXZn)#2{1yD0GK7cDp2Hn+nGSsYq3*zSI?}hYQ#AR9qh}TxDTD+7`uzkhLPKN&KRY6=DUqr^Ahp)Nrx>RwfIZ*XgM2FR}T;$mUdB*~J2 zN(kVt&3t8`*v)FKxjvg3#tqOnLCj&@cApAN!+Z{cELm{0UWV}XPUUg~vl|Q%ezF2F z6d0UKoSe5dMm&%aQ)QrF2{|mcn3!%phFp0;EpfJ3`q%Sr`k!&BS0m;iSB6J&2a@t4 z96(VGF#egn4v&E}wSsKbpu#yF49jh8R>REPTr`sFUr=*10>+r7Sh0*9o4sU^P0R1zltYxlC>r)z|z2-1G}dKyz}Ke*113Ci5mOjpG=L zWbr*JJwkmM&HdM?I@YI|yw;f&ANB~sedvI%W-C&H?J=?!-Rre=^ z{Le%LcC8TNU|?|Y*9djlON!K32Pu8Y-4AgS$?ztax{w|fAc2P0~8J%IKU)${}G23pv}N%&#bg%em()3vFsma`}^WYq?fO^ERTq906<8qdhYr5~RC)E|Gm z1YHJ2=vX@Ax;QMMt)ug0+%Xe8Aw;M7k*YE{Uk-YOyk-v`P>Fr&wm-<``%(<++s%&u zh3VQtm9Ih?_4tiy!W#hU);pWahiPdSXC0-dAr<)2(()Cc2t6xn9?-3iA3d5K4`8#8 z<$?mqxpU{d0YPbNYEqe407oK`l=U)L%NI~E>7Snxx^ku1w2@Xd?n*k#Ya2Ag z;mYZF8-@Sx5WF)igx^XN7qxgqP0tj!*M}Um_4G0g9cp^CSb*k;{a&jCT4czM1qeS#&|JOk zk|f=ffaKM6aw_+nYsc@d#z$xj^?O|T@95gZ8B6_GZu`4IXH0N_x+#N}fag|XCa36C0J zx+T;LCH=NtZ^ZSGHo$vkBQYqd`9V$b4IBA;fFzpidt(bG_6s|VR-f)e&vu-HFr)<7 zIWR0t7pfySD&j~M7f?Jzhn_5|@6&<`NwW9++Y3{96Jj?1!gpUVgWrYM0RuMF=Bqmci4rctuCuqHdXfnt2oxu_d-73J z1#XmYT!>D13o5e*bPivt|`Ose2#y`>hXHsJ^kVu;Ah6Z*wJqx1gqKpvj@7 zq45fMTW(?DulQ}?rhyMkg>%010BES0OM{?n{X;dqOS<6Wf@~B@D7pvN> zY6WUHuR~e^Ilcy<=~2M`Pp)JW1>rCG@>XA)TF^f zH^HKaxv(jjfxx;2wSeaj>?v{2Fu6qaaG+3tE=;QeVLG@})i1{N4h<0Xi#?a`%dEBt z+0V58ODN9!^~fxLJ~C=O5i(=os*~hASP>{A*wuYu=p9qw>7YodP&VS8(3_83dO@I8 zR_mS|qgH%5E{TilK^0Clyn`=J*eV=K*&K@C+NnIz`KpmT$o8#3!aO&N0>zhJ1Z5SW z9RB+CYoJV~-AMs-Y`qX>($T~+bOc9zLxbp7CyDeoZ?XUz!|{cH3o5ZMp)#hJJb(r( zk#rpMA4L)ENXubpE$|Q8w{PFICWU`Zm8bI~5&Ec*7xToO58X% zN{ID80E3$Z^46`#fEXjAqo;+rcbSmy?mx>W9a=yf0JX}YsHCp03q?o{_78J2Ga2%1 zeqXJ~pFb;v<&*)53EOsjeEe89L;@;OyVI;5FRX~!YIUHdJw+Dn^QmF7)DlQfJ`w?B zc_x&WT}kl3Z0_uYe}(U5Yzvf)jg38-83BAyj(o?pj(tccn~9;2mXy@gbinMvxwwsg zJa7jWlSWOyspK<@$M7pa&d?var~zaU@ZRnWmdrbfgh1t{ABdstXlXk?;U25ubE7We~$ z5`>IL;ljaUsU+^c-;O?iz@goGLBrJ46wtDM&DL~6yo^t&?Dj0X-Eg@^#06DrH&WgS zZ@zjg28~widUleFDLE!O+WOPe<8LO!vXx`_;eB&W+7`$*O;lv#yX8zR1l~&^O|nre zwRu5rSg)gL2X@a_D9^a|XzN;Sejf*~6y9p? z63+$Xa6YslfeYz@9R($(Pp(_d0+<|X5f@@AHA{a&$2RKNZXyKX0UBAvLe5HPLJ9+b z&Yt1g{IhN2`X$&a^gKG$q9GOExXvVbh(eQ%8)Vd?VVW#r_K6@=lndz~4?r(?+Cnus zaiqTRckeDk7s737BnS!7hGq!!me|g$eylf+4=6{QmRP|`NSj6Mhs84_hw%uWVe)eCCcg#a<-a>2W zl4|hMtT+u#`T1|tww>QQu`coJcybMs;PexnK5f{@U0s^Skg!u7QO0#!M+jFC2ceku zY0|CCg9IcIq=(eH{?)5jBUVVn4I6lx+|Yhg(r1EOCM4W5qd*c$ts}wCzZ1isOOe3> zIMRXujv{TB%xQkrByJ#drH>D&pFU6&(2aQp zwA;q~41(enz=`IxAqqh7(VP$hgb%RH&cwu;LY@ian%JfMeCer~nqz&Ogv%TAw#0$X zdp|#i#=r5$r|f=3^WRs>{#&w`8aIBu{&R`#e}D0RAI>k^prQHCWrqK?oL%HWeMJB7 zAI?o(#V@S`U7b#_(Qom59_r~l_v@Ma`Xi?*ib6waf0fVSAMUcm`2UY9LI1HtS2J{- zDpj~bqg-7XX!8CX^&Y0?BD*iinv?QDu9=Pw@&S9s4o~DZvQV36e!gCIOZlwlv?b@x z&R(W5x>ijn@8;N3J^T0!A?)wJMRm(LNM=*0;HJi z`}p$7=kxpu>n_e(EV?5VOW(wc)ZH>Rn9%KhT=G3mO{lh5b=qzbj~dsHc_T>6k0D~E zg16^_1TdN{C4HrvX!%vbFTS3}k&{W<_NIIQ&Di1Y^~um1S=mwZ*5Aq<+hXRjxMnaK1~aTu*sv|Ik8l&$$)cX^t&7LDm%)L!XHKjx|u~V?k)VLEU?`va#Wy ziJI4{;DE4DuG-3tfRid!Zg+rZQEf$PYv-=O@sHq)lImQ)e;x@QhV60PqIn-N=sWBr z{>`zU(-6az~Kh7IHA|hFNS-q?6I|gD#2%T`(HNCf8WE>-86Za16-+%F9W?Ax|Is~sIzMbgmF^d=#6UjcBuc(EMTd+*rcPU=%c(N@h{w>OhVn6^BY@2 z-R4-|jDA4euGd}q4FBTB5}(CyypPjONZTjEhelb`y^XepJkPT|Mnc%ECwa`K#|SZf zK@o->gwIb)`jt=n<&svbDhbm~+dVEA!IHvC$pG?dA0>S$ge5zNLJpEOTCKIlmv}GJ z35a1`18!JhP&xn_w^K?extQwJ=utnH^zFw4p_1BYPi5k-$M0{EDDJIyknYq^Q?`>` z6hEt(WQ;1&c23+jl1Zh6%~xtsmQu6E9md8Iv^?frwt93F-P}zjPR6U!UxuQg7(Uk@%^WLE@rMmex?@3A%JIuDOzrMYF-}LwKu^-}}pL?0%-aha4pD9WcFZNHB zmXey{<2U<3J=c`ePl4#k?|=2z0yY1_D)oj-FXTP6@7@o8XT%UP@zHa?1qvTTS@u2o zc1lG`L?U13hpgo9x1OC#)?BZyT>I&?w7Lz`uOJI8&x;u$>t3JyURzpLdQxKb+x2_O zj{e$qP0X^d_-L{4%XcqBZRD*zq)1DuBPbBSvOZKT&T!v zaqh?GmG9(Sl@hP|3#>CUH_gnbh^T$}rS$N1(bLm2e=>jf*7|qosq|eWN3?cFO+Eee zR4iMAR?wF_hGC7p8XF8-mu)|}yTHe^S2|_!_LDcPG(S9PfAw^(rAqYuMF)(dwiKM3K<}(dJvA zO-TlZfU1iTF4o z*T(!_As4kd?B<(}dG%?Zf6SU+_s_Mx|BuDK`=@Vjd-Y|eaaFRNYQM5N|NrT;2pUjDPwjVlV@>CG6Rs%Bo!bn?7sU06|{=9Qhn z6RqthY&7Niv*u3xb#3$KnJd@%ZVEiv_GSMa#y|d@F1djRU#M=LHJdvse!kBtyO}yw z`1QK7YIE zhf0&b|9d9?&x_>%EIbpqB6h3@UH5w0ZXSlvB__qkJ&t7*T)3EDKj+fUE0vZr6?ys6 zj_v!SE5iuv<3qrXm#VMRr|f272wgU%SpE6VTYo<;Kb5xmlTCQ%t%{59jhZFZH6l}C zsu@xRSQZzy8@9?$AT^MXgE1*`vX4H^E2aN`tBf_Aq)(`EbhQDT^KkZ;>ZARqktq4erI6t5v*tC WH40;L|9Lb3q`}kG&t;ucLK6TA*roOW literal 0 HcmV?d00001 diff --git a/doc/doxygen/chapters/images/eclipse_hello_plugin.png b/doc/doxygen/chapters/images/eclipse_hello_plugin.png new file mode 100644 index 0000000000000000000000000000000000000000..c69a29c66251acc40e38088ef633c9e805129c9f GIT binary patch literal 236396 zcmZU51z1$w_C6?rN{L99N=kP~sVE^ON_QhMbhjWNE!`m94MTS`gfP@FGz{Hc!~Bom zckg%a{r%7L;0$u+oU`{@>wVXH_u3QuNkIztG5KROG&Ecp>Gz+}(6D^b&@jjzVgmm| zM@#Y>c)@UZC!_ojI6NO3|3E`~jwbW|t+H$C?!1dD^PWOh}b;8~9y$@!bRe zj`{LE_Y;e48rH@jiynL9d@rj=M5CUDAV6GC}i+RASid51NDPxJ9E za^j@cVuLMBe#k>hXp#-|Ns05!r)2JCGwE!d@C}P4$LpN&_36KTc>iC=xi?-=aM$wY zF6V1`mHpanC#QS$jf~|6Pd=`zG6oEPvae@u(3*$VH41Re_!0~bye`g5$@^vk`s2TjU0F&zEbF?a>aI;=KEn1-HZ$v= z?$sA%m%W|OOlJ{3P#rC9kVk`KLNUyoOzZ8lRgw$_Pbx#}3^7@ljxk_}@O<7UA2`1* zI}6K5htUhsqX%_$gNHR+JSoVJH6H2z_nb$0g?25sZ#g;v(^PZ~YnwdSVeqxYBhV3k zqMI0@Cbwe(G$dn!VY!0>x(XgEhl-^XKX ztB5CTNOvF8sc*}1Lgrf4TH*iREx~}aUV9{uWG<916)iAX;&l+kdc$2!Bea~B1%_FM&$BHu znK)VAs8f}D>i=b>_r{AA#xnem9tEdSN#+-EF*$@A+YV#cI{DQXuGtO~q+4E~m3iu< zS#VrF=)-qevNPs%fax!Iil;_nfuwuOBU3?=YjY{;x0JEC*fXgX8qdaiZ5+~H?XsD@w2bZ>*w@8B zV2KAUg(p=v94ht^Qt2R{n<3o)*Qa&xg0ib6z6w;E`OL3z2hn(|mtKU^yNgoeKX?$3 zlS4;HL!)P->+OyH>eZ|6zP?F#Jjg%Bk1sK$E$g|(^Mg_5zAxI9!qZx`Q0;QZf{}Y_ zqqVFl8hY%e;|5)1{8>IRL_~i%ojLxuH@+-oiGdKrN@XAQ?yl{uisg=`3o_oiJS)o& zb$7wTpePn*S=LYnT{Tyv@jBJ>6!R%H=i(ZZOR|^C-q|yEDlY0<4x)KyZ*RZZ(q-_J zkWj+Z6!T_JAhDpJpkin8nGEu;u^BI#kS;m$bD%dH_2EsG>0rG<-68u22Q4Ch&we)` zyv4tM*MfSEvgb#XFk0z4ilae}K<;(RP zT=3WH&NmVfAu65sG_$geUguh?oRPkEe7Lo>Wv~;MnAoqPCcjz(RVb^|m5-Xx`^)#w zL3w0u`?eb+X;yRfNj#$>SY5t#i{6AmU2jcIO@$?A?DNd|XS-m$!#{TPl$HB`XJXmT z7{mx2wi`6xT^DCM2>|0wGE0}Yupo~&`J%4==G{Bb7LXw9TJ(MH&j}w^H91)ujneIh zm`L(-jlXW$soCUAr3JP!Ib3>cY+@3)5J@c>a(O8j&w7=VmG!bWj7(lz+v;l?5sT`I zt)Spx`~6K=H5OXD$4V(d+|!-u1OnQ?pg`l<3Ike5JX_zgEvLnVG&c{AX0y1Z z_r+sOHnFQUhkM`9f~cIoeq}1?9V}_^o@{-5(VA)W$z-aeJW1rr*3hu1&DGgd#EH*Y zMAHHo9uB486SO+Gys|Q-SVmpP@%B{DvaLOYO@M$Vqfp3?L7r0# zRhdA9uGR?2MBSB3xoLcEWzs#5%^4IZN3$cvx+AEBh4yNyT^MbfKB=p}S5+m>@VO(< zEl&_fUD*gy;$5Hbb(Oc3Jzx$>alb-r{0I$2WQDTRKY94`=MzN$uauOOM9ydY!zctW zeHqg)1r$ZkaN<>vEfyVq2f-osNBy*lheJH0tIkc8tk_WAdgQs5(00K9+gx_sO-~hT zo`PsV9E;jLPS?Zd3$CjsI9*?}h8kTCrhI%*NPGcT95la<=ZoIL2{-#qL2ufpw~i(z zXO6yx*VhYXWu?wI|NQ>_>)>Drdea}j$gM)&5!Jzmqv^5V?KNbA)s&UH0eez_=QUL; zhG(bHu_)W`Zt&NBOW=1_`8A(S!J7{|WhuQcUu3Sr)&EAd_uI!eNCNsd4)*ryb=LPc zQXO=euCRYbMQP9ySj40+@TX~NbycyJHSPCeZYU5pEWn~n`&bFU);1Tm&l3d8-SglpR8nT#?gbYuQVR`jEO_~#=wApkWqn-o<8lM z^(J{E!zZ|=MtQ;ehU%lNEMQHj>(aL%q3g};t#Nwv0#C3-n>Vrn2!!B}klw8@yf_yW z^5SGIM$utHN$PmUDAFkT5N4L~<@4uGA~o#}zG-a<4UJc_@y3J{@BqNoesLFG1&M37 z+x5vWhp?0x-`!p{-|Y9Lx-2A#oJ|-}dLCxZfQS2Ne1>Ywp{_?gYDt1e&#S5v4!a3a zFZlW6H_|;K?&FL6ygJ#4iH#lcd$4$tff9lIdh73c&~m}V&c6PGHezW?>p=f_ZD4{5 zx2U8ftg|!g_f1%sW2%B#sYy|*$Af$5d2&ge{`g}34IAl@?Mcg-dOKly`Um0h8W&Ak z^A|9fEQKg08Cl^_M%&Lf&)Z)FQA(PdE9LinfgU4R$VI3Th!2@`_Bl6AdBtUo%%|4{ z*%3>3HwOl=sz3+}>kz2Iy0S7j(;$w*!b3*p2p0^e@xBCWfPj;4gq}Qnf^$tK?u}#9 za>lHl(sFv$?HEigSt^<~ zOSK~g0OI|}k2^Hxa7{j&YmkMv$-VlmYc;?^Q41{|Tn_89sXliBIeDrPufdjT)PGI~ ztR^iZV{MzFkUI8+X(I-|4Y7d`Mh8gvFb*r_tY;X({Ug}HFmcIQxM6wf$`UdJ80b8_W+xhw_nBa@=o)w6h_Uo|y}nGzR$ zd(J19tC1(i5Dp&wH{uGfw=e?nY2K;eOG0@8ICMC?fe4+?&?brdh_R@awDYEUo{sg& zr8pA>1WZ2aX43*cAtLIEriYq=gVHEs)P$-i!EzR4_2RI{PkfK=Q23W4D-c73`fud7QP z5b&Wyy|QGC8w!o-v@RbU73Tn*k{X-j@_3w2%OyKQ7_yIfB^D|_`NQ~7cu-{g5&^Klg>95mVR)Er*Kq0P!qPfz~_LA4n- zT3w{eu8o8YhWzCY1ci`;i`5H(G&EdHf7ccwtA*y^X}yerNi{v}_D~M7J|Nyg7irq7 zN`8PR7mA|Gq50DX4?ItXc`WG7{5#%M8_3=^(rcGw638k^iLEnD(Amr`y&VT3i z-sL{2NkbSVEMwmb6HOnEITI!RgAA>!my?+XhL=ya9$NZWr6Zkdx7vGn4nH^oX7GL4khtiokB;y6=76FZf*{yHv`1oltJ-X$rt>b{!)iZbv;ELWo#Gw9WY@5+FX<#qTZ&i&aZk047<9*RnR6C>S#~Rb5cA z)F4J605h0ma%X2{U0$v#x~+!S0Sv?|Fz-m4!Vg*p_=%@O{|S^=f{TQRX!*iJLX?3G zBJ1e*>{Ak}@Y%$dcZ(@0cOTEVwZU4dNFcX}t+Y&1Qb zI_X`^LPMm`O-x5T9~CqgHhq+ldBo9t$clC!3op!7 z5Rtq-bOj!}3ZmpL)PNUnFI#G;mO2#=R+}x~ zcT}-*^Hd9>Zn0esu>B_{Vk>rp{dE{uw6C5fRrXj72ls3g-@a2tI1cMW%N=xM&?$Obz4bEs(cFAb~%6~CN)(~VMHO_=S~CXK70L=AvZTS0HY_a`weEc zO*-AZy%X?2=pZ?+}XG)8a3?4 zJ8TKOk@;JXcx1CXGe#Qt4Fm`B3Igc&`}mZDisEdBE8W3~c{0zJvbE}1_Zv3!Zg=!9 zfb<%QzByga;j-w39~}H>ZGB3fc-HAIXlMv0-F=&Yy>XK}Pch>g1VP8c6P};pEp|Gj z0wkaSto2kFboGyxd-6uW_ZVKh_(ox!7y*K(x~(Urx~>GTtywLH3oIBecLo4_M+1~+ zF_s^G*IF{*i`WVPSvF0i_#FnCot+&}KB_@WwVqup`jEKHJBg!i+I%pXJzho9h!f}Y zFk4k|k)9IbTvNbO!tq(=O0`t8O*B;vVs-?aaau1|uzSNPBof$Vo)PWsp1OvFp1gzf z_Rx>Dcy#sMzjtO${?*+_ErrKSq4eTn-i1S_i5xdx4Rsl~s0M_-kb^1TjJ2#RcJe%! zJOt5n&`Qh998yvuryzDkcXym%o|Buqe6(9st6hKX(UQ(E_;*3-rQ%>i>eMt3R1Qz=|W*~Z&jg5_?Q&PGBI-qeE83SHM z0R(stG6`{<3i`3o^Up+}Dj1=Fl= z?;eY!`~9wBS9^v)pr3GR1K)oBOsxOqQ$=>~xuv3jJQnbw#617; z@t1yY#fgZBfeN9fh8KX`Lw5)h>yXLqrGy?}U%Qa`7q z#m5T#~|ij560m_X%ef2t6DrdNMlG=-~HmnJO02Ro<4Q=4(tI@_IJ2bUJP$HuM^ zJiN4=gn&V;EYd(AWM$Dgl6cVlyH?zSndupC{zi6rM}C#|_7-cqts?e)>yP;}HFd6t zwDrc?I{U=uqG_GpvhBv2oyT+zwscEj(+q~eF@W65=xke$l*{lL3>LaIS&a4SRh)$A z#>Pgq+leYrKXydZhmr!^R#{nj1~ypRu&EC3tgUZrN%u&5kuLVd__;-iI~|` z)*ew$&7(kr;5~U5|7D*or&H^@JR+fe!Hef=8tIcIHF|2QCixd9<-Ag%eKzS8Z(*st zkYuS3CkBI7Pp-Dh1{SfG*BVpu9q} z#}X$QEZVs>HL;zYFQcOypOBD{95pjbdf)L=w9oNkZ$wMMC`E&@#GSbg`f4%!;`BIi zyS|yf=E61G3@tXfsVtc{QlY>UbY+0wsEMAvpl1n<`Qj7I$UJ8{mL|L~z_f1f~m0hZY>CJ9pfYnbQ!@VW#2 zv8=?t%Iy)q!We>(Roc_)W{|ZN-To8>l(eOkeBkQ%!-FN@@30uCeUhkAQECF{-9n}6BN7(H0cO)6h;ZrVzYd(iG5 zDWiaGcWehD?>txh>wOw7WE*B0rct+qqhYAE-yS#LmrWKYjZ2 zM|e1Bnp;?Q*eE(BAqc+rvW(0L7vo;@@ij&B`MOrg6@sKUf_gq;%sF*8S61T$QhvHS zcE5}B9n>dLAR{qxliw1b$_o#iAi9;>aC3*XIi8D6wzaiag-qU%TPV|c|KS4x1tGKu zbVoefkxZrAs;=7U=Y-g0ef*og_pP=vd?Dr(dd(egQ>w2M5t+JzHpX79O14(_GeDchK7(v(_ti_y@Lr|J@A%AbQ(eJ&go( z?NkzXJ%PJWjwU~cQL#{3JmH#QL(e;enydMGF{S-s2S93E7eO?x*HbzQ`V3y!H6uAA1s9DX%+LFFQz+>uxdAdKQihA!_O_# zdFZL}ExoS-#C04Ad;~8p*cME+!2ai7?<)=st}a%)*scA>c$y_|T7G(XxY2NXvF(kP zdVYv`=c@krOCUYCC>2X=*u*0$L(JSd1j43jRjNPBwRJnXT!?miG7Qpovck-d*m|nj zR|P6_HNfW3n@TxCYGo9F_V_AAOSNl581GJSXj)E8zs^F+TYH;mJp0>dyb=wtsL_-e zByAGCS@gVbU;Ms4SGlNmRs1vk+_&xb8+IdWBZo^}=6dwX>cfm8#hA1998VYD{U;2a zKE$G73Z8Ff?;x4T3;q1wp+(2UB&%0lC5Nz(;zY-!$Xa~-*y1*=>!nh9;oU~VGUGf@ z9rBn1<-IPn-*ke<5J5wdth;hFp#`oecvuIST+xl_(EphxB$Sh%3lv!I5G2GzQR(p^ zUCRZvo*vXO*%5*-9py}Yc!GQu9I?MBihLUi+txM3jYI9s>X8Y%^Ah$>D%4*gy`2#o zG0~}E@I5>&>-uYcXStNwjpcK5%hNq*N`8h%x>1Bk*4W;M&(nbuAgGGHy@Sm5IQ{N9 zIZJsZa+(@cAhaQ8gpjJjPw^PR@PBK6;vA4Rt;six1O>& zUu5SL@piRBe%` zVj&i!>PKW`IiF}|@6DfcpkeU-QI7VF6E4TiC)N%7v%jCj_Q{Ke3!BN|BcHTcoRR10 zc;RMvT~ecMXuEP@$&-$5=#6w%v6{{~-Uli&L`2hp$yG=kpgbU+kI`h1_+Bj*0FchW9K7 zQFoqAYg3V@JDMpd%v`sg+yxm#nt!TDGITAz=!uHKH=CMvaJ^~47rY6BcCtg& zb@Y68K&D=zLHX2POf|P5amAOAMN~(SP1{>JIk^@G`^jxSZI4xX(!gMl7Ny68QuEoq zhv(HIehO=OrbCt}{1e@V+!utvq;9gu2wI7HZAcJ}&%N@|pt?>-THXBMW)!Wa&Z2E^ z1bKi7%nIJ>KM@;Sal}AR&o>*m1NP55*KiokoH~rv!eEmQD@=P{;_d@!625fGPtaPJ z??|KBTU+{OH|L}(V2#peJYZ*qzcAJRcQ3$8`jNK^cD_balM7FcQ`anv)ugpW~a_?HE`kb^zY`F3)*+|6#i zxcM+6AFcjiS;`gF6?t`vbOHDdQO(i1Hte#ny2VHhI=lO!>`?ygkL?M2!;Q`b9606u z0a)J>(AL{w_)jlE6!_T@$5Lp5@yQx=QlcVV?93RD>A^&KDO1+agG&+#VAUZ8U;Kx8 zvIuG%BG{<8)t>zB}JwIl4h=pZQE5wbMp&A$xaZSy>56l?!r;G;y~NL z(b3T@A_VddE@UwH?9TJb1Xv{z$<5W~+GD=Efo%YEq;3UY&(pHO*IKmg;~n?Xb;!_+ zm)GLNje(#XAJxq};WJW+(_3-W^=`YL5=b5P6Dm$_mtKr9X4v;1;4zq9@EBQw%^_8P zU_egv2!mY&CDN?R0WE9IQ@t@SX$~9-KDUyPPveP-T~tpiI#+TSA@c>8ca%p_OyP3^ z`=1FGIh`SFzC8U>T4p2Yus#B;nEkHysH-@_^dly@+ZxX+r{F6a4&SR@PnRf-OW#}r zoKE|C*EE7As((|g^49tj<$zriVSB!DrLBYz%E6VHF|2_4-gQl-jJC%+Og!kXoX(YI zMuFpn&UXqFT1lsP>ipGeuC6YcG~-WGzYBxfv~~TucsJ9gn$yof^mBY%(U!Lt;v<%I z=K>aM4~aPjOt*s>D_mvxX#;VMF27;fua@#W#JC55ylRQ&Kn8njg{mAMT+N7!|4d_U zg3oQ@2f>`{4$fn2Y?+TA4e$H{jC1m<1#iso(gnX=VUyGj;J#MbzLQ4>YGd6UFTjTZ z{<7Kd9(7O@RRGpB54;aw@Z33(95nOCY;G3thwA*5+yD6|CxmRv4^SdAMoF&CV7QFl z=wNHO!uhB;&sjHN|1iX;Z-f$0{Pc-xiS`ErC8gr~bp+1V{qFK$=1fNk% z1-Wzfba&gHCz;h~G(P}fxo#J$=7x_n4jU(0YLRjdio?2h#Fdj6C8}WL!pp!)4VRm zH+(2D4DLR_OO_s`o=>{!O)=OIP@P^|qXUS)&qo}IQ$U$~5voxwv|Ml0Wo?NJ#I>3S zrvdpJwl6?IBO-efKOorow6mX#Hst6KQlImsN0WXZ=ciJBd`IKGNjGJq9to-&4o63$ z@i~5#pzTd>mge&U9}sfXkO&`%7Pp<=Ca0ZvQyqciS+il$pxI7n!s*(ivRUkLh(lBSX8W~P4f7LkP9&vRSbe>^yW_#g;H0H` zqvr#C+ok<<$Z&XN;pj?U8K8H_0EtLK1~D50hYbmJWDqnjz1uGUo5e`C=0|0=RP8{k z@3qgDSXYtSr>AT!nCbT8k1`#u*9aBbt3bqNW_22K|4G95KFr;B9okeHcE>zCrxNDG zi6L`Y(6PM!{o-cz$kpO((=y!7sP}s~i+~MD@T1^b_i1CkRku(Ou&if4D8u;Y$54a+ z!0VF0Wf6r2P_Lr71{oI@7ag54qb9Ic4`A9eBBBqs+8w8-HNSU&G~q{Z1YVQ16jjV( zhTChBbRX)52DjUtbPu=HJ(onWwQJ%y8#w>;qCE=*zqRr0OeI}+AK~H6EB06BRjtcw zYwI6sMlJWDVCSA(pOu4Y-M+1Sxl+EC!p3`);s|wW*Gnb#mNmomA!Pbxy`Ur>{^AW= z+pnMBFE2GmDHLSoL8@*}PVXr!c2i%maM;w`vIhqfOuia-$on|#{^7%i3; z9SlT`w%KgphY1OFRnGYdGXAU5VRlRSv(MruTM1rO%OWFd9wAo+jkKR0jWM)*neUcT zTfA7~#l7vA4OT|&kJKqh!RIf>e)DmC-h%;&8|7PTU1}|`wTYcG|DR+8En5=D>?qm_ zz*Q`qLhm~p2P3QPYQgAp5$98BVpi?%;!pGeV}?l&5~ex%LAoh6Qzeb~!0nuZj-g(5~Q(u^A>~=BInU{F5O8wVc7>uWzvl313!@zMqecq-txk zGwL{_eozt-1x)W?gR(0syz#S>%|n1|53(>bu`(B3NH|JS5hg(7-+78WIy#bTJ8~a+ zjk>2Gc63P`*JiS#y6!2Ez#--?&O3Az0fenH<0*MD$Aoy^mU*H4zC%HaB7C-gxr@|! z3BCF7n58yPKt;`WQ9iXj?P9L4vnTZOGDcD8ZpMf+>~PG~)cBLKc36H5?*!Zykj#de z%lGegHcuM&g{o2BH+-6R>ug%dC4}$2M;v2IhU6r}zg{Ml@vVl};HsH#Cu+pcmpBxZ zCSl%s8*G#2Hwsie-+OZTvT%9l#C*I;-$6rjnTRO-_hn*y7KWl)hiz?xT@% zS&{w$e?YDrqm=@?-pG3qOO2=Zg^~9l@x?zrCpDhiR60GFPPp~+6hm1MW8qQF8rwIr z)t-{8^S~_O{0&XV7V|%8|B3VQ_}RS#WV&dhL;Fvp+u9N1(arrgCRr~$w#NC{`VKB8 zDayD=amJg}INUF`2O#AwcyB%gBmmi2(-kFMH#wUN@xA4msq&#=OWY59pb14PKz_2w1H_G^!;| zDE_i~)b2@yEfDoybOM##lOjC#uJASB}d_{iZquFoQM?J4k*5ZDyk3wRe z92zMC%sMg!g1goB{)R@ys=1(QT_`NJQ_YKRrMH#UzJR_eLg)DBCd4fFg(ftwa^LD+ z@@K3ZZ?EkMixVy5&*E%Rn9%kDT+tH9uM z>3aAFulCw+fa<`^{9U|P_ET2W-nTM9t$uiOn}`?VbGK@7eZGa;eo9fc5x7-%j3|M* zw&-48?DXMI>k`iP=91`jtDr6vThngtgFz0Ru|^uLh9+GmjxEAadRAMF%+ci@uyP@l+3Xvs4diaW>7l-Qy;go!&ymjG+b+WR$gr#>|YI*y)px zkiq0K8&A&}quB|ugNOc@IQruqSd$+W_*JHv87W;@ed^afOGc0?)RpN_ux&(a38?KK zd{JOz;qfQTlQRJ9@?KlF9yy&3Vv2Uit30Ni52_us%EH|XhIauCL1AS${Uy5tZd=QR+NgEs^ea=6Ae@MeN^E!Ak#6INHXka{ zXrN_{Qka!M@k2%MIJwLI+X1V=3m(LRQ%Fx*EBz!mI zi`0POJAzQi9MyMKvW72pcz9qnCfHaw=C@aE;au|BW1}n5vkq2PtU$FQr)0;K)D#p+ zE$seGZzOp*5l;cF?$jm)NCF)0=zU%?4-S{zdRn*cXJjfp{>N3`&n7FWZda!0mNw=~Q!DieeGJzDTUexxMwf2XsDcEtGori#kyh z+X#m5os2w_(b3wT1QyapTo#YIyYRt{%|J~MmC6$yw>bWquMXD<95{Q}K%(23o1opD z8RM;H>Hej!gmy=7xelhg$=C~uPG9OK=x}}JMvpF{8>(Bl@80#kE>Qeyb#1$%eX2h@ z7wANL!!&NO)AH6vhAyMWW1FwxMU?LSQGi&=>Q5ErNCo2egq?pTCCSJ+I$}vO{{xXX z$#^no2gMAOuN%g$#Jzcn{a?Za0^q2dSuxc1j?D}hL;;|*(Q4js^YQG?oPC$m?fx%7 zK~ApmUs-wDnu_H6LxeWXxA!!XlG3Q6`^Czc(X}rEasJUKqt3^zCEh?<6RhT-Ur`dM zRxkAi3!}GT?J=t|D1OV7OFd#8G~{Cxn?Q|4v0L#A)YQyPiGdI2s;*0 zOMAOd-fQ-7hH0YQ&$e^v{&}>1-F6rsOL4W)6N0EkPB<9orHky7aI|nY3xXPyWh}x7 ze$=F)Qh&+<447xm{OF(N=3@%8zZCmpxS0kqdRtp{*OMsX$CpU*DeZr|u`$PAntM$M z2tEUr=Oo?T{q#O304Rwzwf_1&n14HIiI>YJWJiVBznQ==NHX9Hu-Lyy7yARyQ_WLd56 zUnituS=mN486afItX^MfGzf?0^ayXpueBSHKa10{nk9O!&v&5d+W+h86asVR^OLW3 zl%ii!jp+mrA6S8(VK6;}PW^k%-{&B$bWtq1=})z-1(cM=Duivu48ij42S$+Ivu=92 z-_?pJ1~t2TvCsB8wU+7Gs z4=B;no0!rPd`=H12ai?b9AE~)#tg4#QfZ!x4I10Vhng*>4?`_64M;Hs(tman>XDp` zjZSX(ns*yHakBiGW9DEAD@}lo@;(Cj-i=BGw2ZnNI3{TC& zXOy1dq@Y`LGIasKJu0AXhe3Z2Ys;SL_y5j1`2>ZTg6h zPh60BE(N;{$?(>xa=lT%2iQ!^%oZE{&&9;jfzGS3HlI}&Ss;%;IqIV^BE z+D1;mplZu$_64^MDh9>$A)p0uaq~Eom<{N__MsCKYn@+ZW@df~G+@OjyauZO?h1&j z2=)4Kij;x^9#9VM5i9Bt5fSC&bS z$4K#c0OIiE&>y6csoN2CX>6|XH6`j03&ig6isSUI(gCX8FyEJ5O};28afAN+)l@f0 z#jBHmMSS_;#c9e3@#Osh1=Ep}iEUU~lDb!RSXap|I(3Bk8_ybEM8 z9SFx3xJQpvQX*!o@`OT;N=w`N4Bd5}a0Bp?M+FPG+G&wPRrvnFkX8IJz+qpCc65}e zAm+}M&{eW8TAj@?9j4udOr>%19tkvSeJbD0;8$iyJxJz{^CK&dPJlnKabpFW+$%Gs z|F$zI?RYreLpFCLgT7c&#=5b<^Z6B^mEC$H)sx46dvB(!uYn6+<#Z?2r(r_C*Yya0 zzu!+f0Yi}pQDJtu*I&6ieP}UgaoOJ=@VNBm%IOvo^lT$#zzs8X@b(>5i*;D|?#jRW zm{SlMV9?G8(VV8Z@!hv3wmyAY!#w&xk?;}1Yd-x}g5+YaKG~ACoXcIo8n)1-qd?*hkR{Nt2{h+b*e<{02hDc> z&Xm|;GlKI2X4vSbjoht3A{<`}9rx3^0XEWTa%S_##4X}m+&TOF=~Md;0@~NSSJ;5| zM#^pc5Sy6wS60@&ATlwTMS^MJElRia!R3epah5m^)!3L8SYPwmWN<^1IGeF6z5s`( zUgri&Q4EO2G~wt_?L%fajBq2@?zTc*Y^dz^iBlqG41kY8W)A%vwFOq1wdD`6&WgQt zFWYQxMLC8&{m1>N)mz`oxv(x3QQmKSMC)dh#(?>l_v1Pmx%<}Q^Z^c5jp<(t*8A|jZ93*z zIYA(-25lClh-*}TiQr2qKMjO=)2e^3X7nLu0>_(!tbmOOF)3<7S$64HkUP5=NZ@f7X3#zwrJm>jP!C_;?(uJzEX2cEuP2f4Xvo=K1V;TvTESA_m*| z1W`#g{AjiVnO+v7oH(q-%YFSScK1%HICBObXc^s&E*8Q&>QhYs&pV0AMNZO z6|0srkm5+j@=N*W3FkEqmY^ z3?yrIVQT^54a9jpofIk5dkcvD3 zSOo{P)|Hh*i3ax;T>yZAeN5ZG>*b(19vAy-2*ji;8kYZCoGt*{Tff!9)wH!K@q+GW zj7rHlD$khAZo9h-u;3pJH9%*LwWu=dvtkpxuYV}24jCBTIkQ4u(!NM~CBa}fcKJm* z@?hqCUia>LmR*688j|yEjFP7Y+fHdf#rVuKXN5lCd^itM+>0^JpW1oSR zJY#(-Lw(w$52Z5wgEnNYAAU8&b#s@O`ze+7wV`ziUdJ3u&wwuu398yd&|L5LI_%)1 z>t(94T=uVlt0}S9D;1U(g0pN?_E#Hht-tMX68gwuw_b{79^)^NCcKS>EG?l=y5N_= zwixT^emGn#b|(enrK4KEKW-aN6jPED;*8%|kLMLEaWkt3omxTeYYt|6RI~~Pr?-sT z^_u)_UfiN0m-0aEX+eP|L=63*#4!T4#pIbP0oTO4Yt52}PyM;rV}v!T@x3%|m7lH^ zB*>tVlWNa}#L+>cZzok=^all!pb| z&0ez3n?wxT@2PK^@XVT!(;7oJ`s82AE(1r?MN;h;=cWr~miXFkI{_H76{P%wX~=WS zONC37VpT5dIYEF-APH6mRp|ovB-<@lS(QR>`Y?KAI(gJw*-qe)AatgS_ za+jud{uDFI3FrP#S@KlWNvf23qdtR#R9ZmSmW6FY;QQHDz>R-c_gNo>2Zx8}0$as) zn~RLS=9eRc{h(WOUUkx5Dtm?d!w;xBc!w(h(hN5s-4lE7ECFo6;RGi`Y99Ecy&z_H zU-nLKS}Nj)Icuey?7X`>K2HlHjqNsACESeSdX}+hsHOX=%?kNVg67UwD^I&4i8gY7 z_mkEzO^utG(yIej<{n=C?S8?swCL`OHq{Exl=6jROxwP~SAXDX^)&}Q5ixtvTSLda z6XyIJ@04rv>;(MySf%Y+^)|K)8=DeJ#7emjk_BJ%^a-l*TEx3$U1-WxPWx!dwikf? zZ~9%qNe?KRamDUm4rhE6^3WF1D`zjCjHzMEVH9LKfz<|%F6cfh*wrAn5Qk_E?pS|0}d2#kf#Zi=CMq6uZ_OY=s z)`7bQsQW!ER;;puz@U1_kMpV)x}e>R1;fik9$Uc_0XZXC_;h++b#!uCp*<-&t<~qc zBcy>q;&(nA!mf%+R*IBCgHKvjHZ4Sm*f_N(G#S1SBDWN);&b&XBgWP@PwJo@w|9khSDFLe4-CMPcs*kRi*;ILi+ z^j|J`9eCZj4$xDL6V~T09+-KHW!QL^-ct1%Bq!ztZ*kfs<6%m%F`7Vhj7`p7@-qYi zn*Wf;MTmmzJ*{tVHeEzmp5Ua=ys}ug;a?U+KL*?6{o>3p$WBokLzx}4u4XfxU`6T`QDbWx=WkMo6gi4MP)&zZ(^E}mMDZ<$m( ztzSqns#I4>F*B8-O>iOY2-;9T^PeRn}Cf4UowV#>Tqq&}*bB(V?5O;tBWrmZnfHLGx~Q*bFph<(&@t0} z4hZI_&nUw%i7eG|{4q~6qcY93;-*&_EDx+B*4F+65s?CqbbIzX6<1j==LOA;0g zZ{vK7#%I=1cg%wyXPR7SLQ3+DK5~DMau^t4dHyPwb}x%|U|3AfK;{*>hX0uH3-+@6 zlKvg}T%3`nG)qlAETnwrBGD^`DJ|Neky832(R#0{osBw6NE%Eo(H8FZji{6DV-|NT z*-Ron-ktZvgpK@oBiXt2Q%HOd(|afxaxAepIl@TGm8B>+iaMs2`0O4-y0FhHx!3lo zdPrz~h;`1mT>f+VbXS-vZ6!}#<8@udxa{W~yaMVQRh422G>IW@=Y)iuhiutRiC12F z*wMuyoj|P{tXsYTd?~;x#H?(&vuSbpw&uyy{ws#IiC{WQFZaXcyydr~wNFvEdEZ|Y zN>WqN3=-L_3nV#O4F=yV>t_aeNzKbq7#f``=5F#noyGxII_BU1AbNJ=qf+#ke>jKFpuBIG{#ZlIH^gvGyxJWSHO^J2|4~l?sdnu_AMVF@ zt%H(KN>u=iriUu`nBg0!f5z*0$N?;;~f!yF+f5tRK%hc|8?y&>*g9 z((A-jHN7v{hb>cHmaPRkH8Tm0<8n4_IhOJr;~#l9-#XWuSQ1A`K?QzkyP2@fZ)?gx zXdoCE%TY!s-43GiO7S~8CW=Zx@g%S_Upx9_fpPtaKD`=K@-i7Z6YgeVL?lQ_#2!an zq^8Zcn7Lg^lyN%P9{-*D>xI<&7+kgPCr>_q8Ak702k)1Hum1C4g4qgDS4z2WzORrl z&leLFW!YT|yRkd^T4TN3K5i&cZ?e{<9M!vgcbDO${5oasO&WdH^uP`)i(5%RzpGCf zo6OXp5?bR18%#?Q-?tcr?bjHST)#EuBr@Myq|OA1d07$ME%Z=C&WkvQvB{5F*zJxatSFSNXgYSGnghNDvbH#iZ8sV5 zB&Hr7$Ywr0#*}j49jqT?fX3oSXuXCcqwK#p~q-#5W%sJ5EXVVRrec#1DQHd3C^7S^3EEB#J34sj)OR$~u@b|BwGm`X#Nv z?vVh){iXF}T2j0j3LZ7&6n}U;K`e}| z9Oa#DLStU4ni*TRu}v3EJlp;%WF(5Wycqt{ql2wpZ4#Iy)^d{4?s)NHQ@S%Zv^`Zz zCPLiNb6I3j+UpbyaJ+e9Ckce2>lSy!Sn6(;T7>onYN_xUbYCPd zE6B--ix^V8dE-P&OLw!H(`dduxz9>%wT#imuxS0_&XDw3N{z+lM?qK!gu%dCi}_I4 zlJ|YN!1|rzr)RCPZ4415TMf-e`wb)C4a_1+HMuJRF;}@nOUl846@Vf}JHOvTX?^w| zJ-F$71UvMK`57J65oh)Dg)=0AzX;r^T9j>s#F1=G>@bi=B}oT8af=xI)x1dp*e; z;|J#-h$XC$l@&3M^P!o|HGzLtgFFu4m6yfiO&Cj`ddmNa~ zAU2N-7QHTnzuT1W3(Iqu*4C@5ctfYigHfKTiKuYV=i&)56lB|!GFx|7r4-D01e$PP zJ6neBXKUU04&_HFIp3AM;TP$oTympgT5?35UUqs5V?A(eO}(1LjXNZr0Q&$oMEQ%E7I%%-MNf|<9diQ7U}gtZwKfJ{pO-T7jp^LAeh z86Zkeng_=iCVSpp#v~=pY{j<`i}+`EYztU)9A2*3x!ct9)-9&bWk9(@f~@#(qSo#> zfm@Dt_29nL#o=Pyyme!mVB*q9N$pZ-Ww8$v4t>w&WoG=w9C>IVH5iCZSkIV@kW9t( zuL~GJ=n}XfOI=6H-nn%Z9mkw7(QLW7jz6_UwP`OQ`twtV0X`_n}`6-q1%fYP-Ht=_|yGjN6f$me5MHSo! zKDWzvGviGInh_|-{O)nvOV{6@Jh(H$#86>$RE^;For&A!42`!h4^1T5^*a4$+*c45 z8=Q=mOAj~RV7=WRzBpPWh*Q?RyG)~wl~fX;L5q@H_IemIt2Qx64z1AhdUE6n9qDL$ z^pqasrF@M|?3P81B*L4;k$RdsUr5vM!k>nKee1cv>OqD;rYj+G1;%t zol`{aW$MM5AEI#yMoRhaY#SCFBqhk6Y_Rj3a0=HbSXmMfDq*|V^|mid6yx8#C$)dY zKJiQD+aqz5wWci(3}aN{DN{>-s (R(j`$39C;DeDsU;LFm&5`hByB?_13AWjr1! z-BKc=o{q70IuuL`t&|H8XM5>qI_QSg9t6cQ$z|E(g)n8UHRk#Mq2dc{l z(CG*@B`m^7TA`o!zYjKFlvI6LZE%j#`~Bq;ys3?gBGN!J6eHK0(82Ds$Q7F4ei?+Z zq%QZZo76sc#$H|s8aur=if(fb4;#@VcfPm^b#nzm-S_wr3&1g8!39dbwRlQQtYBPa zBacy6?pQi|Y_QR~9~E(ZOoHEn&;ZTpSuZTgdMM)=9j1V6z9!0T-~DwYKa+R`@#1M! zn}7T;m%MLeT$)0f6HYH&FWjzSd__oed9q}^2~BYIHsqqZHE>*CmJ58Nwe$$xFf?;{ zbL~_~RH-GDA46?V&F}c4*=`igiTiBKgl&e8@e(V|W8p44n=@s(#SN`QIclCrEO=0` zRSppsm3v&T$>mGb%E+?+Lm%Y@tsnBEEbX*Pq{H@37@jSRKTwKe_`{*Cd=%u$tA|foXy$ui#hd7OXIK9; zp)Kc&h(|-+>*i@1Hk0sBLD;w$sa#Z|@~n8$?B?i>**0jBy9Nu6J%dH^W`GIiW$YmUc z*j0}h2MuZcrrzl`FX;Rp$32;Cv6gB`0&J9uCVu;ogCH|B$G*CAx>Y@D47EFp@9Q)&W zf6vitA-|#^v0r!?zp8KCy}f-8g&QPqnl7O=&P;iOY#Eq((dBzaW#`+yv*kA*8ZP-W zED5mnQIC6k5G!L0mhH73)iY{?;&(;8=1=I-I`qX4VnZ4F6d|8Y*0bc%aFojp+#PwW zMeTx@=O><%J08d%hg@yhxv?p7HeZCB-y6O6IN&>AcfHwKher(A56Y=4o8`IKqltTl z-@FTj8sY4zz|i$JAX!ZhUllqri4ndNS&ug;Q+W4 zm;Ff<1gpfZ7RwYa=Yfc?HkWNipU74nc~nm)^un{sFZO<*DLyTA%u_HoXkv^EU@UnX zQX;Dy8(&0g5mMW*nD^uqKj@Q? zX{%*_RQP!Xy3y%mKEd_LzV`J->vHxOt0&=SJI6^YIYHtgl@f#Lbk73Xx-=z=xr+Dz zV&m6DuR|*fW1SA?^jY?P(GG0K;~2+%pNhZ!?%?;K@$UfJOZ$edvsaqj_#A_sxMG4W zkEk&U%YGg;P7aG{7XIoKnwXJ^9BSm!^NY?(!;N>Z8vh87EjZunYj@GKWKU+ydt7=H zi7+z8ubZ>hQPIg$Axy_lwKDsb5$B8T*&crW7QL8lQ@7g*rDKb(4u9f!ZyyBu=^)Ze zyR7PV14zNBp(eIN`C$_|c4d9FwoYNRZ$fEbC=e+*hr5c%C2`qPx;3YpPW(KDR;s)| zI*R%g&7BiZZjknCHonc0;uuR4E@SVZwSuL0mju1ujoy42qUJ=z=O@A_JpP$ek!kGh z;X`Cr#RYXyiz}k=o$gKva{xXHCzA8&gYokFJ-;#BKjZrS%Zs>fi=m87u8XB`XS{g9 z=5gNus*k4j?Iid!8qq?Oa*?o-Cl8}KM#`FP*0i#>*rUS+l`aRlO+WQb8 z6-&y<7zwxmZiDE!?|p=W&}=VEpjSDqZ`$}89bcHRzTJ{}H?-?;c2_t_PDzP@hli-> zJgoG=^CWX9uIKqB#WTX4q>L6v!jTHTrg|F^V`!O8`z+tsXS5Sm4-#eJ{1=IxwMcG> zIT&fuurj7`=ceTGYR+gm2Rp>+8OS&&;Tz-3Effy}_CQc_zQ4<12&YRVFHs$QL&v-|KTVTcoS^*KykyA?px`Cxb*L z-|BKVyx8gO-ovL|{~2ZS^C&dA)Me%uakx(^|Ltk7kr5FQvvP9-z#v0UU*@R4Ah+sP ztJkc$Mx6I?X}OUX zxNsub(A866_Qc-#9*n8Gwg_;#jP-z6V`@o4|aCPKaKnCd1i zUBS7&P%!H*+B6`r7h?j7(c=CEqrLYP-=xl&lD$EsmE8(c@) zuinu~;DRAK{tAbndr#Z@?Ynw-X85)|Q^}Azi+fJ$J^VwdK=pre- zHlIfW65g(?#gyxfb4cCA96>*SRiY)&CF_T09>sg(yPt9}02ggsQbkwXIJH zrZ}kZh}>6%ZC(6XO%JEuT&Zy~xuvfCv}&=S>Z@M-Kvx*rdnlSS>Gv||@>uuL?|Apa z4n0d_cZ}x_X)|0DWsV9*#pS^}4uOn=PWV}8YWU^}>H0;H?J6w(+cQ`@CT{fRrO&;8 zBJb;T7(sz(s{FhzuXnwriL8nD3N>|$*yRnC35o@OSu$zSn@Z_Uu0=zwuMMGcX&%Si z?|yId_Rz@&`y{pcv06go+t6_^XFje zIG6k8R%ci8iyuAODb*OZoku@8Y0(-nP=^WSS!A^|1v2_QZ{uR35!$-aJ!KV>5*vMk zjE1rbGH;(<=weKQk(oaUuEC-I1TAP)ScvZKy(Lzn3I-u||3=Zz*YgLq`bw;Bn4Y(n zt9Js}(T>z90R;}@6BE~mm#wSedz}yHpR%ZtL?vDUYM}!{CCgQbxBXM=(#mk{GAZS| zYZsg>wx>k$DykYqiOd*71M%#7E?Ole#IR7qwl=}Z;#8Xpo2@luAr z2|_w2hH?!hB74o3-6IQQF#v#$2`QfIMAx`vsd493t_ zj-_&X-72FSMIMJcfiLf}h`NxR5|z3Xl6V*y;~HvqB|%`Q zjfReD<>uNfjm)o1RqN5Jbj5;+qpLsY`?Z0}NmQO9@@QGywmuE*^^|@hS&KIE9YIRF zj?PJ@q?h{r2mFX{K+A`OgamF*7=JiJz4y54f70wCFfsKQ!~=fO67sr4-`{%xcS%_u z&%?QDMP=pe>3wtk659rSw2*z7xg_t3+x2~1n|{qt&Ij%Th6|Bj3`(q9Wz@_&G}9wS zT=rV)oA~xsAG+z?WL(3d>mKjkHr5vN3V;&wk){V*K8w4d$<(@X=!9%|+!X9=`>3Obl=VN~mzyl?RxT zuX0hI^YM};MihXR7}2Q|VLHcaAGNHB`_~P0u3Xp;zY2a<<*ixi^XXb3guRrnkg!ve zb@P~E3mR-M&EcwRGBVE{GPR(w?H{K`W*pJse4*e!-&h_9ga&Q zELbnqTlh|RCw#UL94CBc{~hokS(%9md3R!d(@Y~Jh29qzuu^etwKW!8C>^?De>FB?;L@T0X{ zcVEj$;EI0yX)##*SKF#`FhbNvx*|ozxzjqr?+Hr3-wwH1WVq1KF!rq4k*O%H*)_(H zBf7gXn|o`wTkqGf04DFt6xO;0;=;lM#W^XLCSqE;!vJ`;NtSH+uZqDFbZ=cX3HjW< z$5z-CW50k6>`t{`ddYvcr_rKT_v!`XJH5Hr)BFh)YkCGHXpPMnZxG}9KV?mXNL#lZ z+N~*(3f#R1Vu7FDTe3n!>a5&EPK`wVV*!I|lf_$W34~F{lFf~m+vlN+yp0a1%x{qn zg)X{!yUGmKnH<6;L=(8ncDkcx%B-oF9^WU{T6wkQAHLl4;9w~c5IEKpsw-)Dt6F*v zt7^csHD;Zm=YAy3+KbWkMj9OtarkMzbU$>EadU-^?*8guE`YB;J(OooAnIFes}=+Y z_}4yMEk{Y`>klO{d)!>RQ1Vy2LQngxPHs>lNA4Rq@$>0=e}+_dYEva)_JE~Ylbg9$NTc&fp*zDv%g7^W(CsmU`6*{?pk$eyL znx@lZPE?b5aYPt4q1m0J6VimE@yXHIbh-5mW>XA3jkUF%P-q(2=(f7#qbAAEyq^_j zlZ9|WL=~#^DEO=h1;p?c;Cuy=;GnWH+18pjee1m{j51KD;^GFx;~)bL_rvPDiZkTk zq5^WtK6zt2TcUr!ll$tIu&~UgwBHPwG&=?S8DOLt&auyJ+7Qfn~Zcm#ZjK{tU4MpEhMFTV?rnv9nZpRhg6t* z&hrdqQii+sPYB}5;e71}hn?SZa95o{=$HfqgJ3egJCQHbyy-F7?9NeRIqxP<@&2Ql z8eXIGv8Wst*m^)K(__Gr_7iCZv@2vC{$pmWszY$J2hx6ig#^u5N}DEfh%u{z0YP)r zMJ9EEdCUN^X2XJnIqpxrCdd%#9(zH5_kjKl^}zq-!BW+?(QT%!TMa9b5_+9k=cmou z7-8Hwy7lZG}>cY4@M_&gVF|;C*1(P3`afiQpO;&qc4(lmH)P?Q7AT>8!tnBqC%0^daM5GAs z%=UgVP(ANSlgA1ys8iH(zasO}Eca{XsmYw!ufNKu+?fJ~5Mr>JcTpRc8;{>{aD?$txW2B^L+4$iZB7qWHwoN@}_OufF--r$tYa5L2RPNztRm zr)L=1ezrjulW4~-|*U>#a*DBUDp zbP}<)J6;=qz2kr9cKwRn6G>S@W+a(AFwmM`w10K@^G_FAB=l7$E(94o{OD*6yL6O4 z&Vs`I+U|ykN=lmnL0<|gV!~0DtyI-9=39TFZX`!dlS=e9UKO}2(M;YjeQt7@KVc6; zEjWyIlN^1imE?vkGo;z?ja~dxvo3xfzAFFSF=^3MF>!NJzY9>QeySbmkJ;Q{oYF|Maz>$&_W zr_(XC;8QGM+gC!4X6h2Y2+8mDDU7ak$E4eCk?v8l#+)5{+OUq8vD)@Ul2~qyT2)z; zlKn)V=Nk0qGFEFHM&LZheX7xU7*iP69C0rZ zj?}qnE2pX~SSKE(4Cy?4{if}o4o6$TP9B3}*?|mbeAe6A{l9(D$||dV#-!$k@izDOiW2j!npGihlYll zX&=v3Bggdz3~m;3WC)1Q&LrV>2nnlQ4xYIo!uXVE)J{>R1MW45gvixy^@47h(y_g1 z6GN6bb!k?dOT$HVGwYDWLO@5|YHRPr!cN3AfR-282t+~fV~Y(>c94?-AWG zNRQ3=p>w(SlOTDCZ_?*w{KaQ}LS%RxUV8q=nqE*x=r(HD)_^$#!Xf3Tq6@LpK}6f` zkf6jQ`s(>S3TeMvGcAgv;}TDH(d22-d6|t!<}+tbZ#ul9z+0@>#9wODJ!1?}-I^nh zn%&mzxa4Af?>bB_qPq{Lx-Z?Bu;N#N{n+i|ZC)io^vBbaiEXe6`|U1Oa@dx~6oe+0 zulIm1e%I9WbHzgOHWVPOQ@=GBzRq#PS2j3rb{p;|wYIzpyiA>Dh!pD~lCe@n^(8T` zsK@A>x7|OXx!_rg56-UVC93jT*aYBz1P=FZB3v~a)Vf$iMd!w1u1 zq?vu_8gMNEZtgC?sTP;hhLmJcAhutIsaqId-9GqlCnTaI$5^Cnoqe23g%EM|z`HVZ z8+Z01V=m{$)ZQd~2>!@wcf(rfGi4$S8 zVmGPxmZlc^w-moQ`0(@R!wNeXDiiI=dGaqG&<^KM2M&c{-V;;>sG?e8YoIF0$@(z! zt>I04SiLrBEgT5dR;kdPpp;c&xxJmLR+P}wXf|R9&DN=$-BbEKC6!rI#j`m@DlXrQ zq$3`ydO2tZ42SoI6XWA2r?S{+D%RiMO1t_m^Ebr^CN6x^IFuIgO$8{~Vc2wk-C2YW zxwmikoPgHC%uG^Xu)e(Ft^jEXMd!bd&d%23zu(dSDcM4}U#?a8d>HU@_L#l% z;T9~nq)Yc=0;MbNCU*8wl*fp5Rzylf~`GdYtA_(R4DE`+5 z!ViYhao;eCsk|k zP|wR(i!QRDnE6?{zIfD)Zw0erE)Ytz{YWe-Url5^e%yIIb{G=O@0tn;$c-fbW151D zgqSY|nV2TkQj?R88mbIA+Iq_YWYjAg3XBtfY-!C|aU*)T6&#>TiR6guh@RKx)!bF; znrW`AxG0m`$Ph8TKi7k}E3pVQYK3%P4e%mr9^F#xQ>*_TpBqwFXKnCI%*X9&D2Fq? z%-*ejB1~R?)}U~6QvOtOn|3FAkc{;U!mW(3bivr~a|6rcUvRoN-?N&+d|<3wXBE5K z9+y3<-Kqxa7~qZ$eAR(}q5Ij0rhgLmzB8{pt$Jm6xE83_*{gS_l~TPsB|9+gi~fZ6 z4SgJ^ngm#)D!quJl5*yE2HlJJD~!0Ad*(3>h0GRYaPlfEe_uVN_M-CEz)$Hdvgs_= z=5*n_yYAxNj_I7`l1zGpg+U~mJ3sX&^V}n%kiC;q!ou2hp|BfHYE6D|my=oHxh>t` zAbqtPvX;=LJn>@G{n%TU$8v8{Ws=~oR#c)v0j=sAtxpg-PkqN@7i010OSxULUH5K+ z`}+(U%Ni*W-wc4Vv~ze!D!L73w%=UqnuNPktDOO8NOT>(0Cd`+<}D zA&Iz@Ugj`nn-*#)lJkNC9*36aGov`bi<-_z(Xwt~TK0;K5_lNMKlN^}UEg!Zf91jzLqAwh7)k{*Sv>Wd+Nyqi$!PT0fg2IkvN04!Ehq1|t-av$j^nMu z$nu}#@#XWy5I4%7dS$a_+5~j{@@LM9E^?gyydePuBw|9Io7NK<8O#&hv(6nSyuWV}6!Ghy)Mc zq7Qpl#<5?u`E(kq<<${)i1Nk9mwnf1_fU=+eD#Q-rrw_G=KEhS_js`Ow9Y46F9~>U zV8HkZdP7-3Q9j9=n;ac2Ywe~VGLhhrzo6cvW!-GH@&r4UCTcfk?|V#H&um#+NM zzg8)x?#WYPxG-Zc?TqkjT}is&cGHE;X1MiR!s~x1jaj;Y#Yq##wB3BQln-S6OfC1Z zzf`n|P7bfZkb3vKhRcz5?c}A~Zei7V$B=uBtlUku6F(&T{aUf@OZc*gPMpF*SuJIx zk3xPCnmmy@)j!@R7thcQ(i9bJESx?(uV6SVXUHCR7|(q>y>!nBQ?VhKli;7LC(;uZ zspHV)JSJGlZD`4(+2El2aEl7Xh#+9MDBzHhmX>bb1<0Bg9v;J-)#8^G{QYSN1gCeZ zL<1&7i*LTh_fJkA{q^Ox(J-C4beac`_=hM6H~@eod-d-21u4MXLH-W=b-HqqNX{5= zSc&Y`Gp$VGB}O+iG_0~-whvmRqcEP;7{gHu<^%bY&U*u;yf&!%g6vIsMez{@$PB=On3jlB)LkFE@-7feOH4l4)*i_S+-+C(-RCm9GVkk3p*{tfx0XQxs&VMoGRgv~z0m zJ37*5KqjmvF5{S-^UqMO#NF?D35$^zrw#r7fnRy}m1%|A7SVYKM zr-hk`+QX!ziHgIY3rvTX02t>t|x}9)t)w(aHj$5Ck4Gk&!`%#_UizjwL zh>sC~30}g(=S=TwG_OvqpGL>U2?Jwr{T^1We)}mk<5S>+pEa@jSwg~GF7Ni&pK!!W zko*x78!HrqMy1ld19byC=zh^&djB%iOQ53l^nR5>STwUv?3#qJ_&MEB{>t=r{=XOorSby3|# zD36TuKQs`(lY9gAl={6wIt|MQhWggpSw9&*1%hZpJjLpzvab<%tmN^6!t$Q;K85W2 zt4a(XZRPkyL;{Ts^dnDNA@#k9V@g>0$!Cn$Yo5b4aj>2TC~zccPT~(FLLa`oN!=Jxvpd*iS$f zjLYxg;(xsFzzy7suK`F#!^C7gVU;Z*A<;iD@Q#e^1=!kh29{Pl);y@H@_qzZD=4)A z<0mjN23Z9oz?*(&u9}W9F$@?U=*#2=29uYOzMM(E*Cf?DA5A7q`S*qc?bqVz2Iy;kDO)&(X?mwh(SX)8Mm%~NLfFd~?z|i@r96;p1U)mUXc6R0y z4!?ja9$ThLkBjn+a@%yel<2P#5rn7_@_W=tVJL%fFMCS=&=C0+meS?lw^zNT_{_ao z3;f?CeoyXE{yTc0tk?o+3C19W@9)qb14I5UIuumpLW61j%dqd*pHlnF{{_pzTm8-J z{q64VB$yhFq)&+5?D};oVy8M3^pkvb40<+wGZ4|1U3)8mLa1L4|iAA z-N8Q~Rzhq5?+asWKnUs_D?|ikG@zoHu>P3a`2DM|Lel0Wd8`>=CV-#tifHk25oJ16 zGKKqB=y}@jDg2^a%N<7`*!r8QG0J<60$dR`YL~s%fkx(YYyi1J$oXbm)lh1t|v3yd7tDX8YZf1L}e`}FNNG0c^bvgUF1s>C-} zpuRgzo8RO${h>^jGg)PY^E-6ff6cP#$Cc!wwz1h)$B^^IjA`IJ4LtQI-#%_22cV{~ z>#`?{OC0u9R;F(kuXgtD%;yj5nbY35>nYhdIkAJ_2$H|=09;PJ-SND?*XdF)s;B3z zcB?K~Hw6WZDg!pKb_zy7w^sJZ*Uz}0#lpg}A?G28nwI!%^B&l=nN+tpo>w@`GX2ee zoNmM2&#z#jD{6=TCLbHu{o(~GXIv}tCyPg}IDO22nfa-G*Q&(1`9>kY4`Y?9Ltz)^R+FLU5torwsp3K-`2`NHH#$q;!_u6MCc1ZJuUh^(U zh5Kuk-Am#XT=gds%%v7eW;ES|r6n)-pl%>CZ=%??LhQfVCorD{7gSbO)?c%{^!jWI*86tLFI#1?-MO#dN4b~OYbVZ zxvRHZw{mtSF}^!0%zLXsJPrXBbu~vuAV+(dKR53udwpr=?Cd8oF}B;5!omnjdk%{Q z#bakxb@kBceF_S$*;e%bpsdCSwh+31HmQ!}-1;l(smv84rM(*L7#M7@K95+8sqcM@ zAdTrw_a50fkyK+|>J=~3lm~-#g`c@x``*c@*Ay0pX0Pe_`TLcrm+)IoZ`#GAq)Z!d z9879D5JJvM3JO+0f4&K#A}yxaLGoZHc$}h!jXF(O*}1W=>g;=axHks>eSpUD}5NJ9#m1^S>AW=|w?{xxX_Ao1=g)^1O-)U68AWkVs8Oa6YNX9K zZ&Vo)C~FmDRr^%VTerTMPP1P>ir{F;9z&@zJf$0*%Cm3(VafbcaXV{TX5yE59oH$o(T;+f56^+b~0tYIq8C9rgHwZdNZxMLWTlf%No zW_B7fIcnUtf)P#s5UDmgl&LbB4)fOzx%J_JJ>j7Wq2P%7CM9rLuUh@a=}|?h$GM*( z@xAp@(;V#zc$`yGtQ!7~T%LTB2^px{4o_{)q0_JN@Wf3`X`UX8o`IYN+z2=!VPV(n zc6eY4WHR-L`x`t=xK|{jGt4=zSN`bY(q9aEM>N6lcC+3COJ53F`3ieG z+z)PwZBsP=9;p90Ryv;0{<%$A8411lwTJEs*`KARB=+uAo7eAOwoZ?%L*3*i_sk}n z?=G3W+l5-5Zd+$8*cFv1m;{~zT%#b`BHinJ{92u`>%I2VhqWMgam?T=fbosNjS2#N zd?_RnR-kS))YL0LP;WR_Wy*VdBK+523)rT>p%aJCT5RhXW;8igoMy z=Je8vzw?sWREuhb1s@Kd2+oYBKEkj<#)R}jH@|syKtW^vn87-O&$PivwzsAtz5hkhrUF{OOGNVhfo2_ zh96W|xN!Bm6L1g=ce9Vjb{(*AaL7oCU@j?s)&O@7pWgK>&&Hl!O0K(NV8odw{1lGD zotHl;t@0+&^RA-K?rbw@*hsN@6-n0{h`HitW>h;mJD%4_|08<(_ttmr%jbdl`#ps3 z=c9ay2dRYY77GY%)5zF=-8bFT)&0*2B1FG3;)EqK=HymkiN^+WDc8>un*Ji1EjPyz zxWCrVIp;m?!UW^D7e4SL9%W0aZIISSZr~4VKf)EX7K?Gze8&j3cIM>a0|A?8b4dNS z%oUw*N=zJS+|eUG_kzMgGY5_HR<*YHY3cP23C8vMS^FffQO$sF-==!%Pa2P}kAg%3 zf&$B3&$r;FZLj-YKhBi-hJi%KryV(9f{({`MqgvG2!h1KE3Z)7^5Q5bDc>5ZS+PV4 z2St^6b&4L!=I^v4-AFsJ2?m zn#!YUD{)Y#+MOHtx?jNUA{TTHy7QNZ4##tokfi@TT>dWG^)eN$4?wE|1R}r`>>DcH zkaw%vM@Tx~jnS&m&#z8uIp?+3(~;hMw+JMjw=_>~d!`MJyB%q@589TAqf&q|1<(c` zPqn+<#iG@l4A(8Zqvk8E1uG7x#x!8uVgt(Ka*rE3V7wZHMx+O_B)@u`b}u;XGvHph zRjD)NQ5YL^o&&lqU}3A$C%D<74G?~go(!E| z|5;UB-0o;#yiF>O5p{o_0sRT$L*pyXkc6an_UdTHB0~txyw4RrW+34ZM;vzo&wFX+5Y;Yz51T+q# z;j!xvHh7}SW-Wm0L|8#zpXj#v!PV4-H6}<9F)NxzMndMYIe8@<#QMNdW4O-_zWaM0 zny>2aDTa2`TM$3CTUD*kg8bj%1^6taA1lu@>7G{Y-fJ5A3EWvG!PuFjZCUv|Xsb*I zQOk*#uZi3nI$ynPmjMz?r$!oDOEesKu;;I;x=xZLhcXAH7qhsYtxeOb;t!$-z7ArsxJ|rrQL_`Ws$; zdUOVi+be++m{VKZ&#_&g6#;=T1}7`+#p=~apilv9yX%|n3_Lb7EEr&2Oio2LQ6hnp zp28Otbd(Y)oi8kh1@@3XCe^n3dp)M?DSvAj^e``S`xPI!?Wm}*9I;v}UJiFObm=Ft z!x|2&xrLn?vX0K#l=#VP&b9TmzlZSXsB26RNgYXvvV^N^{W5bkEEoWD8@vzmy2lh& zf&R+{2w7PnZE)K46U_mgeYw?x8^$^&aOrQ?V^>gB{Ihqes65v{*P0e`=+#25Lt4 z_3JL%59oC^9oLlGd>$JJ&I{-0e`}+p;SC(Dla-6z>NhF1OAchUOSU)?iA-Wjij274 z{J@|Xmb6qDp0WA*9i>#dkb%of?a@07pJ{FHG|&q@11WHMiQ3Y4PKP@)Abo#mSt^V9Kz&&reG?hxTT;gkJGqN4F;@ z^FAF&W~>9`4^QSk44%X_ymACj;*$%f2(S&>j!zVqsJB|>%GGq5r8P^%{F)vylroaWVbEZinAt8r^2e--RkI$%pk_E81#r1AtA>8RwG^UH+w!&+Y3N~um0vc0)qh-7L1&+U@eI8&Dmx+i9kBYca8!r zWkC(nb(-^178nbHq#zJU?6+tINZxwos-R?NcuZZ@)ddIVWQwx7a?jtc2KawDNXrPA z;)Qi%oRFIf+on>Jb0$TvtiUasMuk}yE zKiGb0HFH~8SFMhFMns?j-=9rD6mi&}iU9t8Ie=xE6LWFl07Fkt|M|<8cWi8L zMC9HlOKxI217o6XFh6~7%}7jqw8;*B5yIPme?A3=lD%mIUw@+crAQ09{+V)pco2fX z$;GvLco-2DhCDSj1x{2&+Ba^^#7s;~m&eO88T_6}9M;4V3G`XOT?i~cu@9&u*N?QA z`)Bs}`6W}%qyxh#{{0Pqe^<8N=jW>6FVqwfqMg8s?ezS-(|~1lWyL2hP9bsw(r+M? zGp6$%_vG|+bqyGX-`!P$L*8Cs+4G7B4Bw26gYxny%&O-#-hnmw^GhL!R;ln6 z5m5wqX@I%X4a`c7jg7y4{rVmkMgH0U^E$I<$p3x=J$W979mCV(EjIzW$E{`IfelP> zl#I@7qIOJbtPtH(!o}W=qG8a5tlTff9S3gv?$R%L10QTtnvv?h#M6SSKuAakKtbU= zd0j)pz;>NBAem2-rs~)JpR36`9y`u*wqTVv@vfE+Ec-nc%`@KJ;aU27n=h`t@4!nS zitF#%TFht8^7Cl9yegtwobScULj~r+xyK z%TShZP;f}dv$m0u$Xn?C`q7bA?5aI3hZUi(ukXv3FTXcH#s|D2XThqYnw#&-Up?&= zLB?CXy}6wZRQFl}UWph#H>-jKDd%+v6?MxObqf^2?gul>k;D(EB6226O82;y}JuT3v$X$6} zc)H#z5Uc;D=jGRs(fVPRC#Tb0cUKh{jvDEK+Sc#iu|y-h(#gakf7e=TgF6cun=x0{ zAFf!f5l(|3Diz^^S|dj^qGNF}4LICdfh_Kn6oQ0=1XtpW@83UxG~)F1^esnh zWQFfuot>SwPEI4v-inG?KJ~XHq@#RSsW*fYw0wMN!NCZ;e0*d3(7>9gkdXF{j^`R0 z^SoEa1_q#1|CXOmg^i8Xz zBA`;z1|E&)Vqp;c5=5IiLM<(lldzoHBx z&h3OT6XF?G)}%{^S}9?yb23lZw4wye7&D&@BNp(x0u0+-ks6 zue76qNrN!omd#~Lul9I_ahEg@%ER|0?C(;ahtJH+`~;=qaJe;AC@E)Kj|mL41aSBp z+`~tjW$!Oe7lp4LxSt-`V&IUz)yM^%A}XH=jv9#91QLm_3ZiS+4o;>kEdK4e8_Gc3V@eP1rk<~b#dEa;WDc*UBe7Qo!ARn91hd>I+CM*3d?72F4tS!G(dn5!huz$Z z)3dW-gtstYUO;lP!wCHT~^CaF0&BZWMVADjK5 z27KLc!+NyLlAPb^1~)f1f!J5zHLoTq>a@(6oE)q#v`qpdY_~ZfxLcc(5ecq^q|I#+ z0ZE6$46lK#{zH63ES7E}Ejv~YC!L?vIfqVZv)>Eutl`ss6NmLtibvy1e8!IIi6WUE z$bD{)YQ1|-PEL^E=Of&mlm`26Bt_IeTSnuv{FH%F&pT%Q3__z7Of6iI4EC;1txc*7 z_8kw_)PZ9&?WH|*L%=ClZ^~h})AtY>QxS1-@vt;38QKWs#Kq66*hllxx~`r(MoC_7 zFKTCB85zx02wQdhn#$0w;wccmLPtku-0hWIKH(Ox8%B-J1JaxLE z(w>BU;VSML-ovfxve&~}OP&Wznq0)oq*(VP7i>>qm0a1Xlfy;3>CH%vS)M}YjW|2d zJlGgVt%ypmGd_5*e>OE;g(<^u(Zm5T21ul$vT_@mN;WV~G3(Z%DiL_Ri(UEVFf(LP z6=Z((&h@8HpF&{sKy?k|bR!SkH^zALv?_nX-@^5Yj3h+;1{@dYQ*bL$wiuXj4;I>> zRwDy|3_$383m-pX7AtiP$ktB6#wTIR5pd($mwHlRzBBRk^fWn0;|5DQS!$0`kf%B> z(^u1F6Wh+uPVAL0=~laZgi0I#_H6@7I(qs*NzQW7?ii56#vz(Y=kxJ?W0j@uFzj~gg0=hv8f54h%(GTVQX)z zF87+0>OWn}e3NAUJqM?9kHbM%cW#oyML!|lVn(N^s~7xy+vkF&bkAF~PRKW{tD;?4 z25XDAu?Cj9>z_;qbVNFRJ5d_!C_Gs%C5tBw^C>b}jNjJa=4b{|bGBOJxS{90t zauYbKkP{}|waf)KMF^CGZr8OCV}44y(-Q99{kz=a!K1{iKMvCM^KEE}e2z1^7stf# ztn|Tn6MlYv0H9FZ-ePxh2J9I_qoWZ~QQ}%!3qR3dn`s#t`BAs;91WB;996J0a9A_u zOoF>=ZGGLI+Eh?Lpd&vY68JT8jbXduZ3dG8A()quv_3EfO=XI-4xd(9Z~CYI4RW3j zkh8KdvYEJ^p0APH+}<7nspMMg5W`teSO^V$eRXF`O90d?FlW7E5OR>ShZaAd-MJ@9 zo2X%babfNzcmB@7aH4u#Aq)PpNCHZdpm@#mNoQ$V#afH=1VQI8&-!~u{h5BS z7eHxaI$X_rDL09}ANfxUGT-z)hKD@^FE{<-ZL3jA|7tc?NaeG+a#uOexcx8oE-o&4 zZ_Qm?c%JyWzAgG7$f9RvCZ?f53XL=(PxpDQ%I-`E%e?94=BAv;&vkTs=#%9sB|oFd zhW&W29ns}N&L3W1PU0+kruA*s!^6W)`R)0p_XU`x5p>MVxG>85F8eYW6&0Qlt?1@> zm8xFh)s{tjhChc@q#^dda;X}+ozbA7a}5Wcxwv^ z9`Ct%hOK@+6StM`&iszuM$FN<>(~1Xd;5FuMDmDwM*mKGmhL--UnZh_ndQGQH5+-)oDAyPCrnSx^E4pz#<9bcb=M0v3)2q+K^M*w@`h^~p3i3TY6+bHGH0;oxWb|(D7o{y68nz&${f_rz zj&`o>mX(xn7#PmlIM_ALoZlcCh8fQI#8h&G{QSF%N)qF3Zr>k<+ZJ^#F63%f@bH6J z20IjoErT~WIIOv8m?Tj)+9dg^cB*&qv*%$qi74wKUz9xHb#iXG)i@i1i!_uz_+fs7 zJb`~zP|%Q?CNYn!^y^2Ho)?!{1cdpz&k$7nvW(u|8iF5@Q@-WVbXA9E(%E#UA*Yqh zf43^1X(F)hZ=*AaVNp@lPMUXd6ZE6{5!RVX^vz;WrATH0 zg3+WfcV+7vw@JLNX7aU`*)ffcw9KJO1Ev8YhLOm}Xr&sVJ*L!re5A0Zx)S+Y-ruLaHUm^8x*rM;XSm{GVYQa>czU50B>kUEC7!%KHqt07fH9AYLM+cF4g6Me~7 z$nBdYqJAWh0Mx93p~W*7(JQH!pGU2_yrQv)n~5*Cn%-CtU!)X7x@=%_)LewWwO@Qj za=58FgmC6Y^LvMz+8aS}WOhiY(EA6fQvLcdylv~@6t!8ey=`=_vD(X+n3zy7E{{~Q z_h!hlUU(((Ul@9g6<+36RW&e}b;)w)o^bQD%k$cpsA>OpQ4LFrc(ZobDNl5@a`$WX zL38|fB?dShYTSjIjzi;l$g8_Ogw{vP^cVBS?-2UqnIN9q{5nTBB;#w!4yjhzp zpWQ!J;~srD8UM8E_3G-W>%pU4VrGVZPB-#zCPp?peP(>lD=`m{*fG%xL&W6LueHce z-uMlBZ`~E@&)QZxwDPDj=j60dqTBl^2IXS+Whp^|XpPjTsb?qM0U#XARgl>E`(zbO zlaS#4vv77(U_ICFHNutO|FX5?)Lutk(~4ZK3cKeqhmPkyCpx-DPqr!xninpv@p&~; zI-Uy$G*oCS{n;(Bi7o%BWQSCIkAvYEa?i@j%3GwQ8aom0PrwxHG807SaH~l?zf7~QID2$=l#s6kF;y#@Zu@_I%sawM?ndxH}o>9b6@B~ZP z^jdl;4el3k?Y}4E4udvHvBu32uI96MH>TmSc(M>74CnfMD9+c55_3_s_1kR$H6~(>>t)I2t6O zHlk)|7F*Ne9;0yK^+m)wla{qf)WG9T*n9$-PNg}MR1`DngP=mVcjg|(aphKU21?!M zWD0czdoAF|r4|EM*ce%5hM6<5@JYl|ku^Q0b5qj~a*zjPP?D%0H$``7Rs`Z?s}fnP zoqL|_j5MnLHst`3*iL?Q^KQcd#f_!WfOgfqt+TTA)4c{2fK~y(7z=JYyL#T-8801k z#E_V@R)=T_Z!guACz!Y%*EZ#Q*<~8p1DVD| z(K(zX)8S7)yc|15WrBZ3GOCiWuBqR^xJ;|K?B(v@R&?3j8<&=b_iCt0gMUc$(nzIZ z<}ty4JK=3pQemOZho2e!(qtQ2hj~rvU$Z(s4?1zTYP<%7_vzbc-;R^H%*;$!@uAzp z8_CuDoPoZ67{!Jyvhe3~_TAf~n~3;BIj?n2+G+t`{}7^{TuttQd`8!>%WRu9`A1H? zvn_IU8E!ZL1XbG3VlZizOa)NYvTO}uZWdb)7m<88SrZZHVwgE}2uk-E7#*^AvNxXJ z9G0;tbYk}C9D=M@zDTG8US_N0Y7yqZx|-XXZfI<7hVnHeJUrZWePT^+ z8TM`H7ZvBcJYg_ju}L`c42BGShYmjP%z0yM8lE30fB;)pO|idXr;OnZ;UtwK4(lV4 z&^HIdgCw7;!I@JmgXOw5R7h?a&tWD4EfPR3P`1p+-RIAzi@DC9F(b79O(7E??}dzy zXuJ3lJ71|OF&QF*Gu%1qjo1p!x=EC1H{{xL(3{>$Uh1R$!_F3AB}t-$wt6GXrQ9vj zQWx`UIfp>OVB-fY_DTx(kZ}v)G^bGs9u;fN6;|xje)GbjHV=(rue0N~Nhif)Esrwu z5!&^^{T46eFMj&Z+(3h%lBZBudn+C$(xAO2Op5(DQ^tgclN&cE^sbj;w*KWB(uy8 z1>r9+Ochm#hx?(Jn4Ar|=8$ZOh~_&tV!c~-e>f2{>rgW=kowGhHiG_`*JY`|6_Kn< zp{1?G7EV_@!p$s1nXBTLLjOuAG4gMICSdZtnIa?qTXN~TG!!;>nCvU$xmkar6HBT| z?4iqRGxs8ijP(tS7Sd;n{jM{PnagF8l?qS0*7+pO`SB{Bj!qIN2j09nXSTDLtfk11 zOKh|4n0|(d?cu#MN5wlan57)?%{^%05DHPl(K4z^`$J}K>xstkaf+(@%&Ae1$(|?} zH8Tm|5t_qcf0rA|eVfa-X?!rf?7rm8)^&Dv3TVR|jD&>X8a214XI3yS1+W%an@k|l ziUpzwR%u&5AcuK%jn?YLMMWvV8i`rymYVk)gv%2xe%38jXbGcuHDa=@rjUKtW%SnW z$*F?2hRkw=US8?QtOn!Z4@ynp4GE7qvs&G%APfrK`cf4Mx+xY)1Yd1O0&fJ!eG8zU zPk(jt^R&R#>gchKix&r!pB*v#LKfVw^<*1j{i$4%{c8M>rT+d`mzS66gB9i+tz>BN zQ9`6Vts3$8flEWfLm!epcCDYVzjSeR+nFeFRxkDv+^i7}c(=d0yTZs4pOJNL!7F&!cqY5fn%f6EY%LG}E9Xl?3NIkq-@c{CjW)NmL?e$NhV#m%a9zEpJL?^`h&SBE zlWm1p>J`VUP*n=*IOzfwAv!W^&&okZg=*8qM8lqNYr`qOJG&ZX~W8DtkyHW zu~F3iKI|a9^!7eQMwQdI(c{`es>N_8Y{7Em^QZideC6 zQ}yj|;Ydlb6DLbv+a2nR?gZ9+L_$ zWa40P*d9npatey;JPLm!G4#R=ggkflUXM{E%nIj%*OXH^aee1h4*2BdNYm@4z>Ceu zZ|BSV?on{OO+{REy(Mm8)Bj`fmRulEg0%->P1SYkS|;6+y|UiP?uW^0G-M+nC;-)i z!|I>d#uLTafaju*m-o598<+%?)<02_zM(kb!LAzlT{dGjmA4oxPRK7>=bT$gNIk}) zZOYYt!sIw-*kO4X-b0P))l)m=2(A_J>v?EIOj8 zDiQh|JFskBRl>3VCjSWQ6*8uWswf!wZ`^x(^jv}BV%{gy^~r`Kt^ap=IxoS%;`ryU z0)Pa+CngW4*W#bFi=J%jX^Rpi$f9z%Zw)uQzf>zb)^K*F!g-T&LPzI&KQG8|MH`)}{8 z$H_gD0!Ay4-#>653&Wn{CfeS zPDxR*fBv3&Dqnu>&L-RX?!E+mdx`MBUj6M&LMb9w{QuT^|NKE<3vjt0A^7(T!lGsU zdi6zMc+*6EW|85oizm*vXD$U+I;MVla7*O#=M;J)Bcl&Di4}*xf>eWEh&IlYU(8emg- zlR^(o^4D_P?`tpL-lS0~mK9v-&kqOl$zK-)^(Zz*|qHt6>ble(tU{jk8dFo*zzBJ#Q*u_?qTWua{>PTGZ$%7Mt+|`BI{R+L1bv; zYJ9lFo-~-$f3J?ef9+=YcRM*W-RtM?=Z;>W^_k<7mc9w~V^kiM4mHDg5@0LE zd$C_3Z~~(f|0e9|!&G%WBLT$jEi`NqnEdCF`TGMmrsj>|*R^hc7m3@Em@bAC;&qv(;&KP}V>!;c7OACUM7?+KU zF^N#E5R9v#64SCBf>5HjcHq^w<_~$II9Gt=fpgWc+}KJ@NkOx5bYzlWgUB^lsCpI_ znkloeCX{Lci?o{wkAU6`fcdAz=`uD>c3Eo$42>7ex(joez0?`srT|bVrVRz!cVTH0 zF}tzwPOj9vGK|;tm;*LwM-Pkma>46^HvfXahriy$$A2bjL`@?8;6OQR3Ld4TNyY28 zi!RWhB>I+B?i^$g7!pGFNZ`gx`6?BeoZMWgzVLr7(V3of=4y#;5*db)m^&wSXl2Zu zk1I;*el}6%S}>BYI;VF)74))pI2QuvoP~LJ3kn;K0+dUemGhH;RHG{ zuFWc_nxHXH1s)wvuTKE;*a13pzr_PMjW{k#N#HS1t+Al{0iaJmj_6oGQ}PF8>^9tI^LUzb_8gIO}E}ADJV|-=7X*pW3bvz*x>x z=UwAw*&}YWs>8KcU1Q;P6yOeQJ5r_H9^7B8OLfHY7IsRA85)ud1;(hzevo;<>WD=( z|4dBer=@IyK&xsyf<{Q_k`L!9x6SsSG?A0*;qR&8o=u+7Mm(JKTpDyBL#w4tZ<7Iux$^U=K$4g#^(;^d#rmr%&p_G0^yL%qjHDe~lA zHV6{YAAivm7C*PRQJ#`w)6ON;JTax`put4JUxFFzjYH+Y$Z*}={X}aqI*n(LT1s3T z1BLzqY}>oMvsU~iS<#ixVeXp#^eury?*`j~a&Bt`kQre|1T>p-xheExGSZFdAI!&N zhSX|L&M~2UcS!O)CBH2)*hP`4TF#7yiA!FAFWqmQGwOA{qZ zhJq(e@&nBno7KCxd3kv#13xga@(8c9+lSHC=q_7R9c1s2G`_o1=f;({EpSK< zuTu&hV+tib_6ZNUNndxgJqud~R|_{FqX1z+Z#54a+av8CpMU*&0Y3o$9xX6@kkY-& zimY_mzMmnNObc=VG;C~a5OxAxhzV_#NG@}I-wDVU>x%KF~#40C4B?6&@Gs^mV zCrH~9TtHz!@SSe4*d?9JcePZb^MuN)aXwqs^`fD&L$@N(M@4Q6dM0cNx+eaK%T-1>yEj;|gYx6LwU{sZ)|Ho-WojPUY+~yye5XgTK38S{r z$+;2RMA|ChKgZktK{Abv#rk9}LhiQPvt5JIY-o3BCuh@u+UI7H3hv!sVeSUTp z^_5}2q|uQBF{Wr!Rojk%dv~g7R{cWnx`6>*@PkIXdcdvv}lujX#tbZR{N2(yqN;cd?vdgwUDW`1r^{ zsad4{2`ZH4k!lvvT!Lll?^)J3 z=TqxBR58zQkA4Ki&#BYeoqU;7=fZq_r{I(=mETGjc;t21-Ql#LiHJTwyDmOE+3Q1a z7-vL*567p4>Z6wWdS4mM+SzfE%|c+}3Q9_@O#S|-yK}}HSq={q8ynl!&R$2kOtdTr zva0LxnKTs!^TS-LB;+e#tlEQjf+6*K^fqpbKLO>s^dZKjJ7Ejq&V371lk5D>Hk}7& znUCcJ_X!|`$QT$rtxCJAV)8t;GgrL4yvT*U5}?FM?J+GVD*8D(8V{<1+XMukQ^Q#e zo3|SAQ-NLPM?LK>_)aRY)9ZQeXJC3`Ou5|6_rIg>tN2o)$rB-83C=<_V zxcrC41#bw_z6&D!fN_4;+@S2=BT3{K|N z8L{2EMWX&~HkpL&3f=SPPI#crTtR_NHY^Mj@XZNp9xvr*JFK=-u6kU| za*5zl8ZB;UQ`1fut|5|4#)gQ|mU^>^Lx@=10gMveyZ zI=fE}dzV{r!O3d7c7B^=IATi-pa|qnJIip18NTI633zmFfDQ_d1QbC5P;515-N6d> zr^427q|_ohI~;UV5g$MPyQUW);KQR(Fr~4PzuhGEG4ixC3%x4={m1KEUebnz2E{vL zPF!1$uv;+n&=o>yA8{}cU3~N{PGxdup}zex+sN>aK*nR+jp7cHkKPKZlRxAur7sCO zv~!R%DS2?)T$>JZM$!l*eAP@Z)c^gSZCTi2WU;R93Hs&B>V*dWuA_dIUuM$%ypZ!) zz~Aq-e|!LXLE5mx&D1ERj|&@Q0xpkX!$K32JV*a#WO{d&GJ|(st>N^%dIKA-(p7|AK$)tvkvq%U7u*Az%=xqJb;~QR^(k_VPR4Z(>gHc zqZH}z!l;AT_cCVNUN@?d{3ZElqQbR3;pW<(OnezqP6J(=Yrc%Q!u>6B6x8VP)CcpSTm@0?`Xgqo;>;!4jr_G8iyT<;<;* z1KvewWeYlWf{E{eZU;+UEYH0kxLjP?bEB|CM<0Uf``zuv{SNl--wR%EY5*BT(?WP9 z0^D6-(oKoBGLRd;>6`=Q=R!=W*gt(-3LDLeUuA{(wrV}J{|1@7z z=#)2*o4*_Vi$v8A>63XP+$hr5kNWg7=xEBm47*|L>5(o3@hwE2Nx| zb32}f^I3y$1jB;Y;#==Q;|e9mrkFgmIZ5;Y?ckY=$mcOTl4Pgx^TMG9UU0);%SGjy zf=Fw6eqPMTh#ID-Lb77^@7ezUcXn+q$w2Yiy~c)2<4{r%BNVt#3a#|-UuI)eU(Z&W z-isCPTpt_L0@6Dz`B7V`j+H#Y*%1Q4`_4ZAzxo0V^KiXM@0awi@fgn#{^3uFS(ubO z43-HX_RN6lA~l?NOI7U-+-WquCy3f=K}S-sV@xmP(nEpM<{>SgKkYX*K33s*!UbF6 z!Z%|z7g0*45N8=Q`>T(PXhTyCQ>fyGc@xMf3QJ4pfBJ`9l`T%t<+RhkDBH?bY|%=j z_#3RuvrMuCOAJNPI=|E~3vMs5Rpsvf(L!oY4n)~IZ~nQ6?>;(5MzeOg^Sai#4@ISc73fW`Emd`BM{U6oa#lJ2z(G z^3?}W0UXo9 zH?AIe=R-t^JRl||jhjWKP21;0FV4<3K(bAR{UR>6)mU9a!)4H6?VcO9gw6Au*B{En zaopqJ=o9FlZm_dghvKIPiX0u!`AKPSYoZOSyc=tS-2x9pMUB2yYL{jyn~fFR+a>n= z`I&nXv@75U%#cm~s@_ZMAEx+e_P;Rg&G34?rpRp7;Zk?J+{+rVqX1ReB3JrvhvQRG zb%$wJEe%$c^Xi{Al&u3%|FXNgdo0g$v<&JB*7X{7hqd2H48uZURrDm6 z6j4vJ(nT!0n z0u_SSC8P>$1kza8^22iIdPZj}IoE4KOH*vRm5xbvye8SNU0AKg@*Jv*JM`YBr0km3 zmHkTpdn)bT5Q5`dpP|vBnPE%m5KoqxQ74Bfy3&}eOha#T-j`=mkFsy`oW#{dK27b9 z+l_-{l(`XIZ@~ntsa##0udd0P!=+3F*O&^{Q=Bz>($m~y-&>fNL_gTp)KXWZxPI@g z%if+_7A9$EoE#C$ykGdj?95t6VwH2|!g^<8G`nye+&ikSq^3Rf;^n?nlZDSeS$jCX z>n3*#pMCqyDrx7TY;B!HA`p15;oa==G-x(cG^4wyC#p=W*!i}2jXT_u3A0st%N#9; zJofYpo8>e6*G6ihk=VlZJv|?xgYF~c>`kwJ)=A3Qu@2RVr~AGU;1D;K2FpbUAG7Q( zRClA^`A;$3hSdiAG&(YSaBj4ge(=iODJ<_K5+QUreGCi?V+9R=R2m%V!=k|>wAtFa zb>P3&0;oJMFUjOcso6*cE1F!%>(&iVcW7LAII~nUbdx7uQMSagd)v*-Nx+!|6qmsC zBmg?3(drlAPXO1KE4Mm2)^3LfB=YJL0w;{|T-tVv|LM+*i7vEn_(~fUC%I6dKi_E2>D$+D}JlJlrOZ$EBEq=|>>*{_-PodcR z_U)UVg-HN&IK#x71RmQr;4$c}a<)c!IdowDhn6L?_K!voc|)8@8hovx;b9ao2d%wb z$?n$H7hvN&z~`FANA)*?HP6CHQ#sV4RcjMQTI*>4y2|Y##~{}|hTeg3y3CPuCWO_5 zXx-F3rP50#*cGuYMHL$=v{7D_u~@!>QXB(~c(t#3G*frRFOYTCw${^aK3{ZRCzkrP zXR{b;cSl9nwqL}&Idwyl>Ru$hNsK1bwosXtu(nW12z{3NMnc<|6$(P7{CN@lK>~Dh zWtN^h>Jv$YAGnx2;A*CKU##oTjG36N-fW&WB#vdbRLDH;lACn2*4LLzv+mRo&7j`3 zFrv<6!1f|PcpAHxtcuPfHFf6bT%3=k^|AM9GF+cuzsMl6EKgLb>Q$xN!;({mzl?ot za~lgPrexsqx=rIH7x&HEI=lqZf+pO2u7f7BgWInLF8nOeOePPe;_3*OM@kWD zg?2Op16*2FPNC7*F`n@m0h#^(%b9#0fQGcfb(a=o$1?yRB?!7l)q2{?KA?-jo3fr^RaD?$MEQ5 zM2106-23kw8nP{`yiOi$Oqw0t?@dIPKh=%@D0#KN@C*#AQ)_EKRd>l)jqMiV>gLT* zUn_2pDQwLGUDfEeOY0|^M;d!rDk>_FPfertgZa?^M3^pq2-gqbhJl0whh&} zbJ%I~mR%(hH$3&pIcv@;ZhQIS=rVT}YwNU5<`z3WzWZG@vw3sk3&uFvTn)=ZJ1&gw z4##k*h$~jV3uwAC^$sT*PHmadx!nm7ns7tzr>R0S1^l!f9Vy9{QGBveFV>IWBz>K;X9USwmpAaB(3))E6Q;6 zUms69M^uv97_hVuPo-A4O_5I~ncBZ?wjE3z*A7pb(0CSqs+-T1m~HNe;|u{256U7& zo8?n6&irFqj|ybtmXKBPIPMm77CMuiayKe@gK`vWcE5 z;#s@Xa7loKD{ah0(CJEFeZFiWAnG|80^l;*#sh)S%k23FVu8;iHFBf=p|nt~NH|s| z8oZJ`gL%-&)T{hkQh*-fp3Q@Q5N%wfx-Pr?+Za_u6g+Rp$?OSln{{qGbr3xHp>(XPEDam z*rf>KzP*HOT{56LY1z^1$`N|e68kK3U?877h0z=3&l(wt1HBR95cBG0u18^WU;n zs~Y;+vv|rtuxjHQ=wM;)s@ziUHGMWm163Z({00pfBhA-i@bD96YOBoGf|fI*SAr1n zzYlsdJjh!M9eU1Zppyd&Gr&?Q&>aE?M0k?ESYbO0dPwTKcfTauClm>M{_KGI6FB4Y z2MyU`;j7>=GD_XRc-CbZrBYc&*!=TP)^9HGJ@ZcR^YDD3zYhNi4KE1nP-)H`$-YgY zPqXETABF|*%u4Cn81FybJ#oIRIzNQ?Z6ZL5P0{5jSXsTuG0^4oe(aE0)={9MV*N^d zlD3Zo>(^%u2lC0O3Yp)>i%Gg)d*r^D1$kO5$Tf=~lF(|hHZ(e#7|N7X@Ia%Kuwd?D zbzD(;cY_4IF7Va^lq)TSn1wlFSGU}X0<1Zx-vMI|U~caPM#0<>7R98Al6l(qgGnOF z?92b{PAZwAyEFw8Ucta1I;oheusPQf_AhbALR-pRH#=H;okLy|GOh}KT9CaR9Up%J zy2>A$_+@s#bh1z)yaPdH@_&7|Yuo-eXKX(U`cel?zA-D+K#A`y`Ls7I{U2JLfDI6W z&*Xa3Wx)Ok!31!HdA!dB=;V^9VddcA;ejJfXHK+0f|ZCrY^U#aV}z3Bbo*D{N!vql zkGvrz@fSBjQhPpd@wilN!IG8rE>5PT3UAC-mvLiqNs<~3+eM6Lm~MLzYmuGk={BVO zT7+3WKo~4QN}_14$}DJDohLn8K&@(PXJ-KF6&R!&4>H<9>q#MIt%^Dp#HoVu;N!=S zKLC(}vK-yL5Pm?$Z3UPvY%tNT+8cD9G)TsOHm`e)vtLE<>p& zYk3*wUDjj1i~*T@dOw42H~np*^#Sh_^c8TCVQdg8g)yuQymx37KYlC>tGc-zUlcn} zPRlE6UJme9{NqmJ`;viNb9P6FunZKm!I#e zF2BGbe)|H)mjGSj#Xag+>67zoH{P4E-}~UR>Y3y!V6y3Xpl*NXa;#(52#AXmfLdkq z^o{@U!Kb{O``{ol|84qB5w&NZUPM(U(zE1tesX=3)?<1E94Q>LjDRbk0Y^q~{VKRw zPgiv>e(F6gfJg#Zd}1Kle-!qWEfOcMH<#7%2PduDvUh9kCwY3VtJl}G$(L)Fc=M9w z7Q7jq>>k|1NmB??{XnI|a!9>sPD|`1_|ztU;q8I7nd1#$4I#RY#0O$I?6+sc6vC77+Eo>U=N zYe&GaPH2~JC2s#9^1c0NYjDudPQoycKZ|*)x?SgcVg1|m1lPeHgtF2DTg6$bTk8=E znr%gdm#0RF=~EyVdF&V74;Q*0tli_`8QSwdhBub>1aNz8w7k2-8~97Wdpi#-`o2Gf zo%I?P)^sQLBuE9vSB~Jh3j72LA(ROgtSS(8%UU`*JbdYQmNKIv)+V^oZ6W9|`1-zY zfxY9CuSzik=bTG7=J_thy`x7*MpAlAJC2`+l8V6=4vs36MsVKEO3HERx9{)YzyDxD zbNA2AMkz^Hgi*jLhi=lCoO0gF^3~2kJefFPWcXI}7b2eizSJUi7J1jTY~Cw}Ol~z* ze;k(o_@nF%fXo*c&w@T2rH=&M2s=`;2>@dk7>CT`vh`&-1`r5an2!!-9KHiRvUukT@cnart`E4^lRxYqQOvv+&)iA%quFIO2~{Uw;Q8EYzx;n7|GJ zezLlpZrJm|vf*lQVolKYX4MuU8H?EZiS4jCkqG=A$^_i&h6>;K>zg$;bk+A{raXgh zX)ER5e_p&Np_kj`6B3T+6oxxAL{ey9)HKn5^*NsN8o6bs&WO89Fw{X;ec&ks*>5|5 zAE?_3orf0O)U7QWTnau>bMvg_7_i#U&dy%0+wB$x$U+aIc+-*?Vd5l2pn;rU84ZZN z7!EUqSVNR0_`D0Kcds)yRI1EiJ{1y)oSUEj+HdarW~URzU_eggoxg%t7GZ1o2KSVZ z+;w+7%lxQr*W02dsd;d*I{j1&U=0=%5a`&w&+zmkH^ttU61Xn zlODS`aMUkC3YC8|>JCE%7d7fiwRigW#egdk7_59x4sFu7HLjAc=28i_%!ylK7y@@~ zc4pi3hTHDjd@i{bNPcjzkgKn&otjJWu_ z%+L1V!p@Y-R8jyr9?1SA1qY$4v~hOkj-0$AZNY|GU-M71`)^ZF;MBpQkX~ofD7h;j zK<)!8vCwW_a-k~`xS_hi!D#RrsJ^H^WMC1DPp0#*8gO#IsO0min2=nHO4k>4?>cpP zdvs<}v9)UZ#+@_OsyqFVd=hSg>w4+nP^CQ`FpDK-z3+hMO@b{7#r)UQ)R?UycNf9t zS9tu_HUQpHkXgW?s8(!@1Ens?c4Iu-#`o{jeP;TlKGqb(3cwRBNs-Y{@B-0w{=FN=@xfe(f;>Echq&e8GSVaK@Vq7roL# z=)6?1gAxHW)L_oP_5=$6M9~-&cHA^_+8=8yZ=@{F-%l3ix7 zKnXqs{|{sglo@1^lCM@Ki(X*^0{gH8>V{_lN3-F=U>JZrk~gqN5x5HZi}?EaU50EI z2)+2C#1KNRs-y*LXepeF(fx=OX^{)l z#=x@u1`Kha-S9uQx3>=p2}y--3%N(J`}2#7_{7A|pFBZ>jhrTF(GO2Z54L+0(J*EJ zo1{EalxpZ%jd*9r2y+s@%*$VfDxHQVn=g~}BZFvJ$`u;_IjRDSdgkV%gqEzC6N0{~ zjK3@{3t5!iRhwOyCsZjsIE#GIH^H;x;5wAOPBKC8=2y4FdtG(Uu*N1y+<5fV2pTL%J1kOH`${Si4 zc4a*<{^5fvs4rKO7_ygQ*-c(DL;{842SPjzf+k7@Ulrj|;4ZjpEAO-+ej8qwS`CORwFror-sWrDqX=K-=IX{b)CR z$DM0a{Hs#xO9pxR&*iEYSD$=Ukk8e@JeHn(>zD0}ym|vlKBCy8noc0;;bIdGn}F2r zbx;#Tq$j~?1mG7BJSbqxpUKXzWr&XX_fro`uQdVZd5~1s-JG!GOeS+{a(+3w-31?a zIUziV+JG1a;?avCLloJ51Db<3iHX`D4MEY?4Br&x+!qoOLhDH6kA~%ric(O?QPU?S zyLC&S?=OWEL-4H!;!{gYa3!BfhFXC0y2Nrc3Nk&=(b1*Xg-^FGgN|ZveRS{#wvALq zZA9>E?mK&PV=o$dny}M$x)gt7Bog^O)lIj@y0?*xse?YEA();=&7Ex%)3;RHLd789Gg00;g(8Ec*T1GMfZrf zjZ+q8zkP45yVWwkJwx?>e|egtSso@Qd#j?t*!d74S4Ld(xt;*0{b!S@^CDG z;fE+@e?0vqJ;dbdP1btC1sHsENh<;Nd_}-1pjJa+T>9p^;7W!;;NZV5`!zcwOLdnM ziO$4_!&KybR`ihL`s(v>@x)7k$3_CBBBM4N-P%iz$fXieH_>V_pe%>acImX_Dmylk zqYJ!87q9eLi4SHxkP6klW_zK0e5IhR8P_D=on`vva9_4+ID8}}hdc0%DfPh~hytPD zX@q7L3kQcYR}maPyv`e1Ukzawf!NAsKV0QN@Au)}K|lL@Vs9n7t2)q&10@qhh9~Y}aTf@gnPz_EUE)^a}pbV)%bSIv;>m zJ0Uhk`UG48#zD$y3axP5-(fet0UaZ2qZ{ zSJBMcYDGp^q@m4wpKT>oGMR7o|3>w)DxA)?y z!{=$m0j|cU0Ym&h>|9z%4=lBti}9m=7amGc>xO+b3oywXEl@06BoGmBY5Nm^st9NF zIlRw3pgQP=1f1bAOEQRCyC*D6MXwfN41o^D`H+4E5eC!O!;a5Kcz4~6NAc!6(o@6d zklYj#GP}Q0ynXQ_XOHL1c6&^B+a1>Y0=?Hhsn3~lO0Tnj&WJ0o%3myM4#KBH8>;o> z2YzyHZq5cw449-1@XVNkb3fmV_OH71E&W;@(uj)^#=KSU8rju*Ti0+=2F=zX=iG$? zfOCT5d#|md;}Hj>y=*Xx5px>iA0oyBG3P$ zTmIbn77HKM_rKg~+=L|#vNT#yNkEPVyqXDVcJFcwkl)Xe#~zcuAA9j5OeC{$xSPUX zL4Uyd_ezgh$({Xo3X5&;elMJSVBag(WZ>tY0c==|h*ED8aZ~w`go}YP_npmT*>!gEQi6r&V z5^UR}+)!5Ol5xNgOloShhN7z43rBA8XJNFHCtBVsotDyeTmR9-eJJd@9HMU7KVMTLkj5m!#GduCQny0NzxADWk9A+QOFr#`ej2U8!44uQPpBL@Snyi#l>r*+^gmHc)wa0Nf;e)gpHg&*75w#%rG0k7^X59ua_hI zg2KaFMn^|;ScHToUQ648(yOqz82q-KDHQ;Qp)yPuRdT*)S2>~Xu0%wzPQ#%N56?u< zb4$Yzn8U2c@kQyoB#-1Z`H%lWt_4yy&eMK2H)B9}UNQfR%>%H|&MYmx0Ff)zONaT* zp9lD_$qV0-ss6cSvt?tjR`0x>wb~t<9JaOI8}gX+`iATom&dK^{%?NN#NFDSwLqh$ zLNpdFBMqzu>dG6d4+f6qqdo1?9zgBWc;tV|KJ=SzM|Dz>Y`BU*CNi>WWFHPjBRqZ>3ka~E0 zDm&|Tp#ZP_eODwYn|%@1ol$(BSictbbWLwGV^OAgKN(%crp}NOGLk*>9^W+liN^W^ z*-t-CI)dI_6+8sg z1tS+094rSOQTHg^(H`3l3#?hLxmfr0DNJfQteO3&VfX#UQm${USRmfBka`kRu&2$; zJpNYUEA_J5^#?+30m3Ya&wSsAbFS{JXYblL|4_7Boz+cop)-F-Da)+Y-8HkH@tkda z`6H2bvb`$vZpWL!7oBZW#yZTNnu1+BLDu@&Wra(}pN%{7I;Brm42$fFTq_6l7H)7} zr2Joey>(ny`Pw#$fvAX#Euf-Fn9h^S=I4(5U*zu~wdT0>dFvUC+$VCeClM5dn|czfB>Xy~eqGESP?x8{ zr`U9FENT;xOR#fiLDo)WZUbi9Z0SKu56$BymW8MD?o^awnVgh*20s1hJV zk?=dd2YG-DgEr&J1bm`>NKeLLf?$qsT4xdRD237R1{El)4eCGmHdFBZb78-!iP%va zzWZnFeOjt!n+B46^>w>qj&H(KsvT$+uAyYJ#h{1E7Fvf<_78ADqO`tR8`aNKFM8z) z90jt&o(u4Vzr((aqR(YzN9Yh>&*H-STd>RY27i*>TpBjxVIx1mOvZH~j`!(-qr_I7 z@|sj>^x&Pqy?}229klyb=4pi--yj9_kbU>$t<*7J#ldtw(be_c;=ZrR3y$&3Epf+7 zdwrwhU73;nVa6~0kzGBeCT81sy5oeT7U%7sH0SXjhG+FfyeLIq+Mg<&O-sCJRv5`nfJmx8${N}0A z;itxfKe^M`r(%_*b_fM)RBfCI>06B!4wqvd;&eLlj5|?R_bWN++BU&$5k`m+~o60{`H78c=8k#kJ-=V^gXQY86o}- z6dA#@dbxcb|2$jV=2e z+>kMIBynA^P+z>&@^?!k$Hw&gv?>{~|19{Yn9YpxlGCW+d3D594#PUe<%J0u)N9Cf zL>!j$2?BF4%U^l~8C$qQ$MWj)V^Tn%0RXU^M?XyXT`7eaZ9DNVdI+=-fpeV3O@E=d zM{ni&fX$ppam_-QezeR0SUvY{2Nd)>$jKq_jD&CLCs<~`GwNk%B^(EqN9G*Ymj;Mh z67sCQw6tDsZcE<%L^7Hs-yZ@F>y76UKUvC*LWplkS;gl~alV{O8PFav+otKx1?+eunL zM*vIJ`T!hvq5c&+>f~i`OMOKI!(eNsS;_$~(;UnBvj)oA1`W5aKkZ@d z8ypq0FYCJ(AhBK6x%YQT(7h+RTAx7PfI2dz)?LSzhG|1!W%T2Lztop%?)3kav$nMe z`89S8$K`kEd#UE9*~gMKmr@0FS_gfS;i5}(DgS$5(n!wxUEqd})w$KL$~l{!EKYIV z{P5QQYDWL+x0KA3KFJ#T-NF1KoJCfuT<1XgSB{&jMlRX(u7Oszen*AR^t-Gy z$mU6tqq9*zGqdx=xTnhIrZ2K$i4EVi*E7yHGd;Bi;Urn|+Z>HB&nqKa$9GIUxUS1eNx$}rI9pJ8s!e{5o=M?e08S(?7z+gCY*OLM{( zrvQkUD9w`|_kcK~^{o(pC-mIjne%OEv13_CHcbUaF1FI={yyX(9wcHw(`)aBSaml5eK zKE8L=hSkGX?#1;}Y(YMsDjUasILw&O{4y-I8tJ^o{>XZ?Gs8>F2JMv{vxy?L zEu@j=J@QGoVBf5M6ZxpUId@#(V(?3A-(oJea?5ce6vot^3g!#v(iP1YPV&+v{A_xi zSe5rF=SXTo%QjE$&hQ@AY@tkFuJsxrVS05Jqi&WLkFOT#nW1lm5-IFDV}-LW#Zf*V zWU7FwqsYMUJ1woS*j`m2??p=tGTg#0g~5T0ILY+evQoZHwL#=(L>d~JW10+HYbWw$ z+>8A(bW#>ilcQa=`oM^WmZ!@qW@>g+)xwpVFSo!@G9dVEAbIK8p8JRu) z)uex6u zI4B};URT&kWX+g2z(;0Q$1_$cgVu9w-;Sl!)2#?IC}3($RiCvkqY%`4ou2CC)p};$ z{@ygpsU@TTNeP5~XSdG_0eOqm-8I(;7MFCsM!gz$FEL)81#OPDqEgwe?4=kq`lN# zMsx8{=rminwoI*4|K(3S246T#Cfc6HD=>P@nm9QAM+*?dPtOvpQ?Pzv+G9+?oQ;vx zYI>T~V`s$shxf#lJ{j&S?gcUq%1CQ)x5E!q<)LLNKtyl2pXs@q+(sn zSH7AVd|q>AJmV~pSY@K~wDa}vOY!ZZxW&qj_5$*qJHuroM2(RiK|#a)Cc*B&Uw>V{ zyRPZXoAu$zeoVe);ljc~LRbt&jgt!t#MZx}c$4viWze54rn@>qfmtGN zlfw2B;|#StK|4uL-Wkl2y(q)z+kbT6$zDyIIG1%S>8dWY9$%>;^W%u9{tj&xmK2d1 z-#aki?eC9VBrcjm`}VnMBt<)~IcTy(hnqO)9ANJ}2$xnldLDoE@}HvV%7U z-3z|w%v~PTzMpPxbypAA*VR*%UrRc2MpI0tuW@7T*4*kP{`ou2Ny5o}gF=ri2X7fJ zSk1hb_~MtFH!okioUOvk)4b1OV3}h4XK+2gOIKYV`Gq3;OtPxCN%g92b15HFBIVus z9oI*;)wXqxc9eNbeU5jgT#z@PQoH9g?oJ;X`Z{rA?Zrd}ii}j_=5xr~{0yi}ats+{ z#2Je=lqN4?Mk3oepe|&Xxg%%i^v(T$ArI*Xahu7Nc7}}bgxV<9$Ij@WpV7u>qr8s(|r@8W4^uK9Q z1JMa}tO3C-aGJIqrlAQWJEm=yGl@ZG>sRnG?R&~r4kXTt^coCQQy*Ew!3 z&YdsHdn=n6tuyj9!zrCojUlPm|B!#J*4Q!6rcvk7IsBe9mIO%FOwSCoIth?zO;)*>vH=MN+Gz^>r$i{o9kIiEP3 zejRR+q!B~n?(^$|Hzewyota-0!XA6Dx_J>eLBR5Wn)gUiXy>9(Sj$7IfVdlc5*n zC-;>reXAZAOM4^GWP&xt_Qh+W;%y_B5E715-&mpQG{HE#ZeL}McX-it9~8^u&HBoG zpC4x544(#wV{l8i%Y2xkUYF|jLIm<01@D!QWgX+fSyJc@MPF8 zy%88CHxYmv#s@|t&H0Y5ZcU`bp;MA?wB{TL5d}y|oxH98^kaD0B#bZCQ(6~4n~^^A zu!%9M3bC*|a73=U!;)HL;gAbUDx33!WExLx)?tU#x|5R&Yzkf8M*~Lrv*}`NHy@zP6K0f~2&{Ugvykq;g(~wRr9tUPOt1!)8F!J9dvGIc@1a z&o5a4{t{NS1tQw_IPJbhYec?qIsW0cxIt&!^FW>~KfB;zq4lQLrh~(cLvvr%B|evb zqoCANjvFZOxksu@7Jcrh8kLXrpAB1s4g0(bEjAUQ7DPOowwY!YpPO_v9nvX5P1Sz- z(vW32v=psvJHds%!85}ce%gz0~ z7c4d9?H9CMnp+lce@M(qIUpUae49}>TR<)PL3*xXLt@p4-eY!QGpX2_Sf5`#@qg2+ zdF4O((0ELmRMRlBlT}U3*ybwz5+5D!l3&7@d;`$ZLF0X+9(8oo>0;;wq;1=7u% zzF07E9$=1SP_LiowtcLS$2MSNWeF@ewxYx#JF9($I-b6=OJ7gsS~~Rtr;7oK;k!2X zm@+gIRCRl;CSe^Q%UBp!EfKXQTxH2-mAI^E5>Qh~ zq2{~Ufrd0GA;*-tYjog4cjp#?*sfU72#(jjM!!5M8nbgIpSY?YJFE8K9 zO5eA6cZImp!i@GZYNg1@m;&J`Efn#Okq-R_QvU?yF)=v}^h{Qk&@O zN$)lEnxoc?pXP5ayzdplb~u_XuTH!5xPxfIwG(tDg)>Lh@rQVJY2MtTB`QpL`}H(c z0UxQ`9?^7*TcVnj_$nqVjmXXYyC!?D1wC5G)Vm!gH@SYCO~bCnGm|^&5OosCdV8mL zujJ8o;cJ)Tc)Gh{E}J!9`a-Tz@Vyi6?V>Q0kni;H#V- z%QNXtb&RfteMkDVmQ3mGPx8&L()6q>6pE+ zE%?$=#^Ia-5=t(q%_d8~3Ig=abew7}cHQX{r_hIez{fI1%{Ex<#JX4=vtkOHQRCl< z&;QSlNh!5s&@peb)1GG}JWlacBsraCk2AdGA9%%3PqkB=sZ%g9ZR=h0M4cg7 z=3Dm^-m2_vlw)ePKVotqtBFObMA=;NT|G@H>)&>crzO1HJU82v)OZu)O^#_CP~6N1 z1q+(;*S@~)FmJwl_bx`E-^PI+V^Wr;b&QRTAyCJ}u}G2Ir)Bs%)?XM!ceAi=$~9^{ z^T5q*YJsiQKKmQcg)J8nlsLj^P2O$ z{gLE+zk`JDQgiv}H_b+K?1obH_GH-u?|prD>hIMzs-E(%wfq{TPt~6PtK5Rd_ra^N zn@x77yjm&QgzJC4T9*p0__1*B5KmpE;pLnr7V{K;x%)sW7-M7h{zSq$F~ydUP@(5c zxl_7#dS*sy4U~Q#Ko2PH+`u&wqmM9zkL6^;v_iVhUtF{<@scIuK}&)d0)J1pRyNK{ z5UySl01ynAri1~@KSG5yqt1KmJ#c`K44uFG>q2(rf3?zt^W=>VHH*a#{REAt9a?v$ zr?m$uMETN9FIDNTTL#Z@c>B|8x8_!7ieK;-v@_fqWtR0KG|(eKe)S3UK>57#DphSq zeRvOrsO?CJWlLkqz<~c+lRz^ifBFOLwztI(>oX$Mk2=&Yb+o*ZreNc0jar^+Uu2FC zz3$GO=(|XXxB2yw_UDH?h+&R;A$E^|Bn3di2kg>hxD#|d)MNv((IL0dg4M;Vc!Snb z)cV>NvEgxuD%(QzsrI>^+Mff5@~0}p>LY6P*@qL7DpH(QtdZLK9g@c;m{EyHYY;_B zm9IiWWp3U)0*{INvSCBK7zpb47ZU%z_Ju|?R#fNjh8egF*ToWHn@wvw_MN(6<(8R_ zSe5Q~Oq&vFSFoQNUF`_<* zU-O4XY28c>Pb}K+No%E7ZxaTh`hT>1sdBQb99=A!2+jGR^Gi=Tw)=xf1jj+mh#=dXuBxg;pfG2`wIO&#j@XHlG5{AO)suz%~Kp%*6pXxa#a64Y{+)2 zqQZ{B9Cd6kRPylOY8j5edI*g!kd~taDTRdxiOu7k&!;;$kJN-5dv>+EcjVE#Ck(a} zd|Lwi<)>n4uhmuuX9v9g_Nppl%dggCKEZ=tCkIPKNjPb0-SPD`jyHLpRndYUKyyYV zXwOL`7Xvm$f0&|`|1Vg|?zzz)Xm%Kp`Qkc7!X=+P;TdA|KM#-m zgO!Ab-PYt{EF-zOt%62kDqu(SBCUb zQD-#7Pj~e6w2h3QHgg*4DGRr5d{j5y@{%spWT4phGriBI(VItX?{(cJM5ayt{NFuf zNDCwU*b+2`FIwN*HjsHVdR)G8`&ehqhZ!)kMBMU z-hqMT#F%st9#U)Yns=et4id0?jo0_9yIVw3@_BvZEhvZ<7Z-2*w;z*LiB%-!kzqNo z(?>jVNY{|WSKPop)>lXE?|Ufj<2CZ$y1J6!Y^8$Lo7QP|&pu2h1q%bNd77Bhgh*7S z#G9p%{2e~4MIs)FL2lpCib(_sv$8%H^FGCX=lfsst&lUm;j>~Ek9>_e8Hh2x1ep6- za1T}$M!>xG_rEGFy_-;Fz}9@GcIAMGz;eajLdBbk@7sP_&?QYfEv>|rFdR7g0MGaq zm!1Rt$`5799r}z}9F{=cphPB`2>BePI2}BRUDTXUFbtIhqKFc3{}uQpEX-{yKYrA2 za+kYbmhmg}!Z!7#wZIa%N?p;K?D^|2SCMyxP`%a(TMk|a*jNG`TN5I;0XTA+wA9rJ zJFnUz?BOD=A#*FMD-qRid>^OQeOcX~`JZw}cE6hMP6(gmVi~W^GfxO?k5XcijZ=tG z5I3%GU~rWQ=KBT>Aeizm$l*lzJLcJtKml+I=qC|BTjWNzMT?V~*ccdtsD+LjYbMZX zKyE*RRR*J0{y&7QC4UbzL5oF3dF(6lDKPHi`+} z<+l5yoNU`NX^lSX%IjUge^^rmk$wYxX$sse+{R5>e)=*I4AGVb5Z*CrH*jSxW3bt8YoQm41fkh!;mN$10d>->W z)2~CCj(Z5d?B_3E0{Cqb9fglSWc2$jL}Oc3$_eiwKFRu?|M%wcc0slN9fZdd4t<=E z;Yx?vW+GwDNJ+UEUCZt;ZwzUsIHc7W_K0}Ny_kRk84s(+MQ?vK!y}a9>rxa9`OJ*b zxg(t;S+9JgCuGQ-r*@9l9%(cTVVykY*3$7vd9M&1-5K|9>8JL#B+Wpuh4BCA-@l)(V-g3!gF>90{Z`ojU@Y)^V}x@ex6LAKX@{aHs<7 zCA$DEdPNAy`dDV?VVom46r*|;##*l~jJHE)xh)LbfMmcq_gh~c9muAIe+{`0nDO<} z;04qx(=`%o?79_iM5f#kMgTeh1RG@`ijr8h%rmM94YR>WQ)@eDvC25@eGwOe?N%5z zFYcON``Y4EvQN2^QBZ$nNy-($1C%V)B*CH~j^aRW6G)r3L1NO8 z=WvLx17q%p0aZlU7Y;k(IDx9GqBuy{B?O7j_nyJ8{%3W^?E2i%@ucYb>?&ia2ru0s<-kZiC0c8N8{Yfo27pR|W zp++GWWrA}BwveFcH2P+N#Lo)y6bbS7PPzfe-Amh~Ue1A&GF**!&f`tjP zU$AR-$5YxV6E|s8oOM?2e$yydaMmQJahB;`ZgAX|OFSEKZ9O@ajg6U2R!(}AZ@+OCsP%>T+N{Ur<|P$2 zh$=2$c7dbIt6Kj(3L`{7RHtZP6maAC5T#XA;T_rZN3$frexcx z+i_Jn04znp7U2~_oKmQD_9woT1mZVO(TJ2!@vytM>8^PU606tt4x_JlZf@?e;ogae zujS>>pqahxT@d+S1+Txwo&PNJ%imIEB!+5YFfuwKYw{>?E&XPB;QD|NU@({^`l%1}B%z zi)}AtzDvn$z7We9E=PTLev&@<(w*!OJsiffBaL2IIf`XV|NK$^{R!R99~n30Vh9rE zxyeTD>y2WNEg0Cp--Dl$revKvNz%gi4ndHcX*e70!g>g+H!L?1!n5hVSYBD#H`@I1 z4n}tm4T&MzoY}X`xB`V3=F1jBpo&!g8)?O!|MSy6DJ#cFDHNoeSfw|!IqU5a_ua%L z`iOGwlJ_hH^RAfZ+fwDs;Bi{4ieJiIoFCa0m|6Y7$|YU(G`1UX&h(r4@1^n6TU%)$ z-TpX_Uv`jJBf9n#<_O{@l#OGl+*^jf?eYoqu+i|lq42u9IC+b(>=U-E+RjiSy4huQ zii{A>qo{(;iru{Xve@Q-w+_Dj>!phBT~S-Z5K65;cgl&v4Pzu>U?;#?V2mVCva&;03+e4`J{RQRJ@N4P8!Afl9K@J2j2XT; zJV!iCW;CgP)O&Zg)c;q#he2%+Ssv!ScM={xAl^dji`tl}CXU0B`TV{AtmfY@UMG^( z>HE=<#w0yFw8$6Ff)0o|M(s0n-#xqZt-!J|Kp>3EqH$b%U`itQXU^i@AAG?th?r}% zyyjh>Njsbt)KOSmfej<22M2&br#>f!V3VNA-$ixqA$F}f+*s0~LY1gNCD$^&cn^YPkf76xQ9M#5$3KsxOwHeVo@HkhXm)nG3Uxf4$(fK4WkD(q!4)r^c z+XsGs@PGbCRw!Pa#H~;4R}&}0&vw=&UvpQX%Dko&{qwE;yWc5ZtKU0-EHME>uuPO8 za2`O~uQR50{QoxX#Fpe?v>tN1xBP{a4akZsURU6^T-LF|_$p!s8^*8@Dt3~nhi411 zW?+7Zf4TU-c9DE1Y#Lm|nSD8JwSTt?#`dieLwJx=Lruxbc zs=xlyYEfeLRftpa#Gu`0J^2JHo0XHJ4e6fY%)$cOTvltT9^*CYfq?(Mz)q|Exh zMXn{j5G2vJSq1p7I54i@4w?#La4~)inktxZl78v`FR^zUc^rp7B119Z$cHacC5EH4 zApUcX6>Z}Bx*4Cf=I2W4q9(qU{SK1T?suFu=Q?k%?Ay0bsufo%au;sz%iO(tcW;0H z+j)$DmTbjaK)mj&fB?*oD*E(AZW{U3Un8zX*QVId8U8U)JX7gJ)lW>#A>?G>w6K~9 zyV_0<@QD9?T$k;VKWt5lL?mCl{K!u}<*s&~&MSH&59X&`V#nVfpNPu%J|#AgG~L>` z@-(O|y@}WVv|2IjJDYB#+$P0@N38IJycsYT6M1PQ_=KsrpX4<$JOtjqlNe+^I%<4d zTib494aPNssr>qdFdkZusUmlq>a5m%Ewb6TJTT;=3Jm?yZOm-2U1IyXNSJv z1h{K%o-+7*@UL$$s#}~GIB9=kzBq6i!5a7*0ZA2GEaZ%mp|M{5psDJE4}ToG0cdRqLlsdJxNj((nH!6wi~C zc+n1k2ECM^q>auDy)DcX2lgiY`*JRuuQJ3l*ID<~?zuAGZpy|6NlQNld~-=muB+qN zg)D}BY@oOIP5VQ$1PoaH3EAyAS^zW$8C(-yQRRdhQW!=Q`aN9aR4{Z9h*Rzv)(l+w zHS*br#AVUis3jwLlw1$*M?J_WX@?2P~)^J9l=2C}34hdy=Gi&T!@@3;rFYDd^_fPn`Wx&*o63y?0$Xxk0&Y$I5 z&|9fScDMN&>4a>cas|^wAtNIL6Q1N4wdb}x4Nd)A5KaiA{2n+2>AY$$FK!ShhCgOe z$KpRg&4~%SWeeOQ+v9GeZ%dB6$B}gVCsne=-P?t~7-T<;{=72~R@=X4&7wdgX?KFS z%=-Lr{WAc!XaSt2P2-Q$;_5MS4J6vyYX0{rVHd8a21^HDC)vNEJc1LSGCb`veL&{n{uB zoa6}Zi;$d>? zqQ$>N)bE=k+_CwC0vujG;#V%FD#xCkYj<8&js%^QA+*VHxQ+?1iWqQNh+%PmLOVl3 zT+IY3*OF!cyW3$5k^7F^4=VQCTLJi=;^z-U2F;tw*xDi3u}E;upw%2~%jN*|g_7_m z(rWN*K0{otZcJ#lx^VyDrs06U25vX0a;F_9E*p$XY;MX~m$z+n8FrTZ@0e#|XFqZI zi=T#z*;IHa?n0zOz;Ye)0Yl%tfLMd2y671tdLZ)F&^U%syJ3*t0vp|P`0$O@230=;#}NC zWP5E-Jj@ELbNJ7QpFaoA<;Vjc2b*uNm@>H3Gt*mhf4E(8AR5>zG0%Zgc=aW`nuMhf zuiyeYevcy<^ZXF41Fq4i4D7B${4Ve)W1b1d=e%h_0Lgv0ATR=1Q?v7LubHVSb`wM- zybKK`^4oQCv3*~K{_o%aFsANLANO)JU6xrk*38#gDegK8dCQndncpii5)& zXW~8#AckcRiWAGVwKXDZf`{i2b~%ZB>0mfPo#Vzps41>cvg1NR#2Aj+efe1I9c3AR zeLE+_VhmnNh4&xzm4|3VtSjS{D@1%E7?Om+vk#M7N(wHg z7?xBWI4YXbEuSllnX?r2rW~(j^g*7|jA9Eck6miws3Cdrqs6ME;%~_9~%$RVu;6 zzi+SpnLOa(;Q^z=J`@qj+9kV%mIvwJ7?oVZ$%VK-p}zqt!!wFbZuK!{-0ggxHQfqg z1a)81;Mws~^k;J$qwRJ#75df-r>ZJkkAKh4c8y`&%$!I+UMeMrWV8^#es~nlNJ!_%4-6kI5&+O)&n(rD?ox5jF`59|{6Jo&Wfe$=*uo)HOuIX8v zuwaicVo`Zynexc_>toN!Z=pCJf`K@a0BkS7MWsUjlt402`1*`Y-3PxN2kN z8F26{uFh29mOsHm@?%nqNp=$IL~+ znNCR(3d{p9wj1e=(bB}yul3B>??Cda43sqR0WaG#jyxGbvRb*lD@$tn0 zHO0;3GSuku~;gv4D}>DGvku3283BHNeW$SFk9oZXn5q5mX3a%v&((^PEf$QK5MK#iNK zBG%(pP*NDJt&3FlY^h(^SaknbLBkaw(4EE!doO-l!j$ zLlf@XN&A*F)^be&XjMr~&1BwEYv+0~tYM4D!)9b+D#9iw$Q-SVB+Tvx&&1Kf_WL-O z<>2sqD=O}oxyw?=XGolhg{wkd4YxTR1H)smD#$LFnw{lb{LzMj^hXe{_H|;81Pa)v z=&^gC1M&3qEMn%u$+c|>f5kN%wg5m0VRKDQ6%F^{#N$3eLB^9dAl7`TO2H=V-0?e@ zYOZcfSOZf`Dk>^`*vOGAgVXS@!-u!KiblKx?Tiy-m1#>M&?tfR#UAMH8HFpZUb(Ud zz7jZ7JX8~4dv_xcM#9R4p^%7(A_j%~tl1z%vv*&I!1pnOf*qtY z4k$R!F}dleD-gaAmvy@^$EB+15PJ_a+NfW?AttXBdxIjMa~oz&BR3Z|MqcQo&;-NH z3T^(%O}rP-XpvIVygs^fnoW}|?428zH|6QCu~Pe_uNKeLe~%or>T9f0 zJ|Tg4_Ou=2pKJG+k-QABeO0fnS8ab+&s4YLINkaYY72^68*3y38B%tw#=8>71Z}69 zR_+|(t!f#!7+N$Ie&lp)o2#^1aR_hV`B~ZhI*~g16?mBtEig_!$(3(6lScNC>J|KczIDf&tbRvnVM_zp+ z^yVV-jsfq|A9>Q>yBiI(On-cLy&x2_`l+wHp`1rY*+I?0WtQI;^GoAx+w{z!Cq$BM=c<>%-xY5y3LxG9z?%OEl z3X%1(iTFl>eFP$xt}$*m-CHWKJQ$ISNR)Ku-%6mVg1_yRemcdeq5MQ%h^lW&axxM( zGL>|2pd0!Eq#hRc`#tAoI0a=5Fkd1c1BF@H^f#Q)Fsc`$Dk?}+G{-SO*4Fb0 zjXXJ2ZrjS~L2^C??#Z+=L=^hqe^5ICvH`qGM5fShprQ;Ab`c_&#epBeh`jLD$Sd=| zJ2x{!he8H{AvGB$YM7jHO+rHcK>-Qs9%6hhFmWPA34G3tbVJ{Z!sTUU#OJ~kypt$7 z&KS1GRwg< zKDXS=s$Rj_(lgE)pSEPaIBk1x?Aa_LB(Ct726Q-62rZj(VCnZYD)J+l^(5Ir=ftfN zYXYjTGPvKS*B<?7lqlpjeaam2() zD(lu{TN(7Z#RYHS@1bNY;U4qSRj4YFjdRXf_kY||-xU0u&DwkY+nFGfu^k(R>91WD zs@*C&(_YhdsC>z;`nz;hZ&#ip-&+A8dF9i8V}}ogPIP}Bo;YvWzctRRBaWqKFsU?Xb+Pk~V+svSWqw<7q{dyOdgSFd=myTv;kMrGX}oAX zFNg19DbkEt`sZG=0(De`TP%RnU@y86y_)x8#+~_ja``x3&3_f%XEuy!hI0#S(Jn|} ze0+UV0OHU~#nn@@o>o^<5>;uQXtY5#HLw|E)J=lHdhO>Iwbe#SENN(6qdF>OW#THW z(`Br$rMX5;xZRH+>>`G*FDO4HNR6ZsM-_Gw9g zTo-%M%D`d4mN)zQVh0`t%g1;rr|C@S=gZ=I_yhJ{NW7jh8k?PM=R5Q*n>5Hv@WsMD z^`Q;xnAELlmn4*rt&JQfE)N}>zfB%l7U?b0DK=DR$&(jfsbv4;_<_A-LvdY0@aWVw zZYc`?tBF!8#W&va{IV>*;>_#t`b>i3q~XmuTk6_~1mUNLe%$S@;hiQoyH)$s<9*$X z@3iMQmupA?>9{~p`>vYe1)A`RMaPVd>sYgNt3!v3>M6C0=Z{wOZ^_v)G<9gFgvZmx zKC=0OoP(|`c8$$Wv&YS!6`Wif_MILnkYGFLJmZe%v95n=mR6jsYMTE3=?WT!?C0i+ z;%Skig6rfHF3EdH9E@o2dqnD9h1xRNKEK#8Ts&avIl*&Tg*!XP#^HxvWT3(ipQ+gE zjq8mlZ+OomW`uU(ADARMW!`c=MN-ui z#A@{)UVJ*+x2{ptwANTt^BU`N24c3KP%(h4k~A&2t>%hz{hUR=B>wM_zrV6)E2#n@ zdOvi?U)BF#z0ml{O#~<#eryI4=0QW?y`!VWkYrl4oW-BR1-2{RyjejQ!*En@ELFMq z^tgy(L1O#i2)J_xFQVCLp<^}Fu-j(rj#F|A|n-fIU4q(7)D+~TbncG zcEmdXbX8%R7;`XvE1uNIa&aX`$G9r2h*swB=_TJIwq`VB9j%YFM~VeLu9%%q`=I{v z$G1$Cge_M|>&5Lu3r?!FiMrJ(?l!?KKYrvXIj^;Dte<@lm`4}D zlc-dt*1p)5(CUB5R7+Rd+~BELpCnc2POC8&sYV&A9?igYs-9ohoyWjAccZMtESDR{ zvOl+vGFh+W7vT0mja#3c*z(UyTp$9Ez$78^=I+|E6%h*z_~vPVN~Na1iE&v{4h+kL zw>~4IKM&EP4H?!eoLf!_TmCzct?u$>B0B8r2IMzaa8bj+vZ=74;UHkC)Q=y1X~%%E zZPamiVR@jU)&mTLLGYf$x-tMjgjYlur7Vk$QC>m>E|+c+Q$NhS<>Z+&XHbpeClCRZ z*M&kd$sV%2*?T>!7V@d3$uE>~lCAkpIn~WYHq$t&P-vofPT9jB~v`e|}5f#ao%o%Vvg! zYtCmIT;z+RMbwwg%I4(T7aOBoxMFW~6~3vckfgtGQ%yPXCi~(73Mv*C9{r!LsF(UC z0G@jL-$;DQw6L(Vc)VbKexAUVppkg}^5q4Xc|marz~{=9(HXD80S*?%h|kFSCeZP& zo*wxX?|%u_@jcDI$=sO7D6P&r%*XE?&9a|khO_qiW_Qe41~ath(4ohehlx)BZA@*x z(+XMhSPnOIf=nB`MZ7BdAwfN9P*? zc9J-zd$u~e^dh|8wq+#b>}#yj*|VX-N_NmR z&Zom!l5N&rP)#&frhA!oacJ$@{Ev3WsutmUQB0IuFu>(#H37MItj-AYZAn-(u|UWugFFE4AFytMyfumht($Qh|cCf+@+ z<&73kc&(Eg=DbGu-_)~r?se*AD4wT!f5ax;;$-p{I@YFKxn^OWq+bWgUOUqkusc1t zu@M>^lxw-0v2$YnXTZK~b2K!x+IKai7m#8pb2j9Eg-0{x9I z-zTdPQ4r>!$w-K4G!WvDx&qKU>$rHAh>=8&6eu^4MQa!JOsSEojjsilCouh_>)5Bp zA3A#$Zuw0A#5iJ(PcpQ(Uq*ka&Eq|~M`pMxn)i_R6Pvq-T6S@pgf@k6glaX;T8##M z@|)MTR}!C?GCZ5@fA4%Mv!97M%XOnVfrCQJA?X`Q8>79UomXom6W#Sku8`w{*Yndq*>~AmFgti{Jh((hV;`8cF+L{ZK-rt_vhH@ZH7z<)jl(U?itX{t zXbHK#fH}zz?)f`63qG@4Dq&u!*k&_U*%p1OT6^At)5`J$!^E~E0d~LfjsoHRw~ZZ1 zlyc7oc1=`+#j?G@KtA2DB>Zl7zqXjqu(q2zV@-p!QF%Feo`wu0MsK)`{P5|^**Ji3 z&X2c0L#i4$q0BU_jM0 z+*VO`CHnN2jDV>~&-5#iZCSRYfv0xgXzFMkVZqj>YE1fdd*pg_q-9e7z&);wZ3%(4 zVM_I;CxWGugepg04jlf;?y7dhozuqPCX)}W|ncjk`XuRGe0FGaLW+_Il4(=nz|ZHyiL5*9U;O%rufn9FlxZL|}z+G~gu zgzh6|#CA;`1}qfdHMw>(;s)5CIAUv%!@#*1omu%7?gUnBL}Engn0n?~)^8ep zI6NyRxk2YguynW%&6m?`&Vqr@HA@l~KB(c`7#ET2-V?A1T6mEM7cIac93 zKmW#kLHo(kv9XlQBzrFv#!k9a#Im65s;&L_LuX?I2i#+}1XEWJXq2H&kxaWHhhkIgcioBxiCv%bcy% z*KhwC@ng>66KAYN^W%$BJr+EY+0KU48!OEP&}pPNE=}{1xoiU7YHk_7ya{2{aVtVO z=~2KMa9yUPbFKdpLB~Ud;hRJ*78)C{;>E|+Bju{2q-Y}ga4Czas#3!=m~>7MnSPME zUYk_@hsysdc;a^zEb?T79YIl!Kh+mQIbtNL_{gqyE-k1=7?=d6YoR+GX-Rl2;v`%k z{0!eNFnC6w4huG$jXl|xs zpq7%4n;1LSPKlrP-56{AmRLb0HxmgshOEAzq-FY3*xtVfrM-@*u=yLtkWxz%$n!2-@kuISo&uN@5jZ( zU37xBAsmbr!Y^|gv?tFxK+HizO5;kT{KWF-MLk(5?5Np`%H3UbZN>27G3niJ4C=4| zg7SjY2bRs+KV8A8NWfY)+OELQ%F4=^``|SI-Cd2?c5wq7_8(Fy(Dg$$0I4_EqG$09 zn)KL9+fSG>*pLwoUPuer5(L~~bKga3GobnaC<*-voNn>iP97&-3Z%1d`tx0j{-Xs5 z2w>Oy5d^qwd5oaC1O$R!zWfWLWKc8cKzB_9XW-P?eN15c1&Gn{q;2i(H&3=0B{6OK z42l2m%Rk-l8)#bJPZsy?9R|yc%cuDB=LZ-E3iA94>;%)1R)F9kRj-Bk;*4f~1sG1G zkWwId?*z1B2zDYqENtOh3qd*WNL?lL zd@pE)6SwyNBYc{33y50Xn5qX$p5F3nV~LA8efnHnkIj8$4Lf4m1-J@nUENc7R9ygG zb;0Qqlz>SKCv+>N_%}aLl|4ah6h3TBy&)=V__dy2blS9O6G6$sK6KC(Q~-x@(^+`; ziLw*-kmJf&CPpW8>eDDLA$tgsJED<%I76iO5>pvY496C%&n1-^7kaq%pvd38eLD%b zR!S&;@D?K~Uc_UP-7`F(;`VlR-VHK9VCOK-7J4Du(UcM&ZKP^#{e|vTetO&1t!Mu@ zC6r@BV6g>=+yrkF;;YJxf9{0d5P2|<(ea|6EGeONsQ_5)I}6Vk;~Z>vp*ZhEE?t^f zUIp{`8IGRsST)GuIzR%fIub_@qLd^{7l4o{+i8%P6|8Xs3O;%Aq?U;Zg-_LOvfAPS z;B0!x%v~KbbGZ~z6ehC2%;lq=Tj_3Jp##yOEJ;!Gp5n?OBb6h@Nc012Tp6JDF$ z2f7<5Ci{gYo7Xser;xQg{eyqvE?_5dmOW?}5NKZ13FZ9}KFjS;ly%sToW|E*#C!2I zIlS-aQFWc8FnBZAacbaW#6+7?I)C_0bwMOq#?H^5W9Y`}!j0`sLRPwtL#Sr`GT?X{Y ze123V#Bo}T7z+xLfqk%Q4ERhdDC!rXF6+%&4e>C6Aa>~e^gMAr@agsDZMj_Ke zWYJ4RoOsN_H$di{MS+cb!Jskm_r!@T z^Dki`AdR=;rU9mN6`0D6H2pm!Sl>W#IUpDX1MZ1bRv<5g=0F^WWK|6`=upK<;)@BM z8xZTZIWkhxDfroHK%+xn09{?|{)e=vabU>e1F_%?yd%VRssxmB+BZ#yE<{)?D#&?`-Zv-8zeeL$`-HV|9;}G$m zRn>(25h5IXcyDiS?R&z{ckJG+i{D4kNj^S4cYhUbf`UiuigfGuS0NrM;Pe7oi0oc5cPD=F4h;)M)O~ahr2i!;q~U6&Q6?i+i~-~!w4hA{;KcaW739Y zaby@756nLff=~;C;dek6k=|0Wpiy_?;bP};8`9rsa_x{MaKP|-L*NhZil#5kRpT7= zWz)h3CPMzjRWWpc;MGvF)3LBTLs_n@8s6{*LRcz_=?FsCJVfUCw zqobmVWevFL4UX*B#Dom+EvOi8W4QuI?dtE3>cz9CL)(rS4&c#?@qBg-gMK^E>9lYZ z1s%|8ns)gZ!lh0n;CntkYL)OlTqm#buuzC zLj8(@z;|oFIMy$$;iyqLxpkq$`z`uuA5NBBvcqD4nJKb zKu4$e;Z4s_etPTq{nLaTJF|&ABcj%q%%QU0kW;td^qO&0(P(CKkzcTaB%D^)GD~;ihy_mipHw;ozOTSA=+3UYyy<2{dJB9fiij7IXFQ4D2c8X zl>_F1OkA`X%Kzjf)(5AA!cG$Dp!2UlgdTGkO;N4nhNwe7y@gpb3Uxb?^l}`Y68Knp z`umF^<&iuCXYW(YF2{Lrf;bIYTHfM4AMPb`=dhG@@S0gwQs37sL4WiTY3Td*?74_g zzyF80?~dns{r{#xTc=b?(NfZsEu%?x*_k0bBxH}1qGVK3QAj15gzQm5*_+4?A$#xp zc~R$_@A%#K{dnAe+~@J=bdLD+`MlrP`?_AQ=X#;!UheK#7Fo2Vl!4#oMMW34%?LBj z1Pxq9pL`lT+|ch`u$C+2g!SZPuymF6zU$|jCqU-q$0A})l(O9~H;<0&==bjV%n)AV zM`@7H*rnq)9LBNH&;W^}FJb z*fG;0X8G%m^{o|SV=H7gDCF4mK^@!qRAO_?HZ~78DvPI-&;9(uBX?@+>I%d92@LOI zhB{JdG<>DX!tsNGx=6L^ARbSqg*La<(MdG;C4k-W zD`8E7aexCKym+y*6?xS-YQ&EuI?g*_-&AdnIAZ&Q7gMv~ro&k%p09r4LKdWZq|(CQ z3qeaML;@fW*9?7y-vKug_#~7eK4bTczP@7u)t=qd)O6WJFGu`Gz3v^u1SN0X^Gh)d z9v8kA6<5^wd;(?#0tzAG_kB;2Rk2ec#27EkoCo0<%kn9MZH!uEG=Y;zQNh)+HJqb95AV3 ztWCOnLBy|nG`GF5h&{7+=}dj0x<^N%k(nS1AM61?u*E52c9vgL zMqzGl?s0So;2O|y>amk)+FXrR&N$d*leZ7_9y)v&z3?6c%j3wqps0vQNb#u5EGla1 zI5-BtS7D3B+1R<^i$TIRXi+I3{wv1wPCh$kC2vSIKS%W%yNJK!v2m<^jw;Z7EoRVZBW2G^>MD1;>;X6$dbBAQqEU z$P@+?`*0~~Le7%F@Qersp9J(F@)YUkQ?O(>3q?( z!Uhcui4|Sqp3Vtv^{41wr!%2%_BNC2xL2zKDMPcD4<9-oVvO?UK+cX*Fy&A z{-IqaQPqG!VL$zYp5E|+FtR*IKu%4~7PL{$vFhl*aJP_QwgeXpf#`_Fi5RUV_`{8# zim$q`k#ZF8CcQP_J-+8Jb(i~&gs%og0z(vbXpd6^iM$wI2>-Qwox^z;sj0&EP>=Vd zE~9f4C9T-{ammx)DD7rb>9$XgUOld6G;1K|bz6IsZSz4CJh)@G{QdpEs9zSBQ<2rX zE_gN>jTdm1$ii~FTBxH)hIT>%2kID_*ED$RtypF`)WAbflj5-WSaiQ?efd%836CZ3-N#0262DF2Lpkh7ZHzEp z3FZE3k-Ngb_CtZV*w_dAFEZ{1r2t5f|A#SMusr;dGc&I)DJuh`1W{m+FqJ_)#!%gy z9tI}vDW}mkYzuDy#U&hp^IE- zC;pmocgIF*fnNudrqrJ{>{%9%lJq#l{(^70#v7dlG6hp+aY{B;OfhW6HWkV_dv||8 zGk#4t+w39G?ZW39Y(7u1XvLL{NJ)3{D0)A`W?kAj{-U1D+WPfvqxY6Aj+Z=}Fz0UD zP@Ricr*6pj{Tk@M)vt&ycyK-iehxFCdGPi|zjL3<-+#Wobr(~`TD;mFn{I4S0LDy* zJ0t+(4~~vP#{(Y$Ye$BuSC2`%33GDT_hC)MhMh- zki@bSn0Hs+#cMJ<_SeQbqqHnV?nk>KU?*5AmSlbV_8doZ1#mlO8=?+GmN`lZ(q95z zhwzmGir-x<^VxD;-|rO0)sezsl#(#3pZSWVSuS}zKtV) z9F2T#)Ga&i_;J3-tKUesGE@Ck??zQaxiNHA;M_OT)8E5Wj3?n&o;Q;eW)hM?oZJ8~ zg3}Gr>Fzqju(Fn% zE$ZLVr^g#qt#SgYNAM><4#PL!Z2?wA@Evs`Lq>=oX~PDT2vqsHgU(K=nFCww5V7LSms?y?Ii9#Lw=vVNJ7&el zXV>RA?B@&+=YzyuIUV&T5%8Soi;)zA`i)8-SOe{rEegtxU_($puBx-Md%PS-ojxjiZXmI>si?4!0F|+B%|{;{y;m1{aSx=Mqpo9vY9eso18Ugm zM89bhiM!)fwSGH{6#b8wC=IZQkg}u;*U)Hj?!N9rBpg`AK=!T#$KUBx7<&JeuH7Oe z*kj&HJ3!{^u^+0BIcVA#`;I;|<6Y`h@nA5`h|^{1>O{%`xzEy#ob=fp8e-yo^>{k0 zx`LmO5ljF$$u!ZIkV^UtA?L;&o#nReh$28~ydHIK`%{=HAQ#$~NYhT)3_&sW+4EPf zye_i?xBzl208lSo5#jeionO@d+L}U|QMAQKG7#o@uUod3qT80896)Oc7i^V z{)&rM9Kau;!i64W*BKYOk(#zo{YU7-(c(HhFu8CnOJEh*VuHTzE(`F z3yRRBijj=iGcLXENH5?JIZa2LbfSB%Q`%D}Ok2QZ$52TTLL;|d;q zRE%^*m{yyGq^F369`@b4m!QCeCz~i(agTkR3;_=j;V>=59J1xCd!iB_Dd5zj0C>j) zNC$32-UR!uawZDruerEjGM-Ie9t|cp;1*FD{0xj8PG#)tm}R>$mn+~1**{L`Tj*b* z{DHTr4OKSU1BkHu8c3E5Nx>mOExx`YIA|y^y6ivH@NF<@D&!-!$_L?Jm~Raje+jTG zO~92@vxg2I)WTaJ8aQ-qh^1M}km3|B7?@ZOl2UMfChkBs^I|b%mG$xCN07+2r46W$ zU;l1~{{>$>uH1VAiIi6x(|ke3I$!$W$cfHC8*KshX3~@b7zQh?P;NW&;}mKIAgOTv zF=7xa#(wsR)Dxrvt$lv{iKWM?!W7?EefyRstrU@I{-R2qcRg{{C7 z=wf)lcl)d+qjeY$1$Vr8ds*hXR-KNu(l_>(>zv+SY)pTKBfPzi5uiTyAK82p zgtUX5BeW|8>VrqKfrfZ8KLRZBX;16z?cEBIAm|c;Q&U-^p^Q2-`V-S`!j5PKmaM|| zyRU={hR1&0>Q(t)#R~fXsHKFAHb$Yh{DYQuuBA$gz!B8&|chl`Aol8sbd z^#WRHE3cq>sh0C>u>1kr`R@3HrXEQ0`MnV;gxyRySXwUmR$T0y@}*0Ix%4Ij_;ugg zn`qmBx3yyAY@$f7u#k`@1d)KJRyd(T1I>OJsDNkyqvOm2ERr}(vv_>y(nrh@@_E6O zg*iXIV1AHQLHs$`jg~OSara;OX(dOr1dw#Xtc`u$6d_gTg>7IDo7gz>OV zj99U~DaAr;a{S-1%>ze{5aJzbUk$t+EFzt9&>6Xnf-=6>=W?T)y8FnuPN)rm&lq3m z7;`;cUe5ni=H72DKzaD{uO@D2@E`Xe&%PkJ|9a0#1?3S65LHM?r?y zRGh0$JOMHidJ6rz_^_c_SQ$vZ6dHd~1c`TlYpSpR{Q2_>G{#STMLr`|QF77zGe?cL zj_)#9f4RKN92>#bg9i_y_#C9LwKM_YLNxVQ7hlt?<39Y%fDPv@77A%I9x{m$qci^T zxw#8Ii65g=O4J&dFj|7^9AQ_b<0TEY5$(;5k|hC>r8l5OyaD#}CVyk3JFWDz zwU=K$c18`fRGtKe;NWN_b9@pS=WBq3W5JSC17DSn8-Q|zqU53ddbwRpL?kR;*hHdD zOB`%UnI1a`GCC)GFaQ1aR*!VYH{o3~5z{krmX2h#;^8psl!Q#^kpgrV4?|Lzk`EGv zx8Sqi3U9~xT?fJB=5L}mm;zg060+D(((GZAux36pt2{71Ms2((Ldb!!$hO@_0ky{E z+P4ILBk{6Zop56iUGxe65C3^5=4vl(u2N7B#+T98w15$Vv0?4aPZ;0QNy6TNt`KiA z_7a@dCBPQ(6n7kToFubhcR`kPpd!gs%entH;yJ=h>z_eAdmBwR5ubuGuT3;j_b*mo zadCdJrT$wGE&f{&t@9dsAe_9Sr%q9#0D1*o4zvia&Ex2(X2|uyMSyFGl1l~p2XRP7{*&0?porG~elPq9ahr0hU5|J=QKETw{?Ox( zJzSKJGKb4!d{r_f*o1?Bw)n6<_Yg3+en}=LFHgbmTxdlX$zCACgePUHloO3Ptc9*z zy?O{6F5&9|IuN|H(P7#MGEFh4!GmWozXnT+G>zVmcfF5~|5Q>UwUT+u=FR9?WqNk% zA|r-G)8MT{T;Ce)#`N%@8e|Tlbr35|g5Kx8{{2olwaj>FLSkYFWK(uP*8uo{eW^{e zW|QI{mGzl4JRtzWm0Sww*_dhAWz(L#324gxR69wJb#v^7BhgDiI)h$>S#Sd5nYR8+ zI04yAb&Pt<&kz-P`CJsFYq}u@KY-l>m~BpT((T*Jz4*uIAmO;!-3Tu{%9qH|WQEbm zG*y_sA2G)NI@h1Fm+C>e9aLpw~#34uO|7wLfDI#!4VO4=QPA zWV0YihpZmdEV0*GPVWPiY8O*wkU&Lq+wWbSBD7y9O?IOaghmW;7Vb)cnd@IVJX}!j z{iUWxKV%K{^<+lTvD7e} zIC6FEFvC=tQayv8%`+bsQhi zB>-g=1Nd=wQ5lM&7`a;8Js_6*QF;XwNu-ed|w zsd@nGG8-mJ0_s5AgxnBKc1NQ;T@21fN(PBX0bE!NRQ2FtYn6sc{;T1ikc1bBno6Ra0pm_5ugND}8=wx>#leUC}Uh5sE^p$&0ekH5rV4 z`s||XYd?NVtG2*#ei5=nY&2vWLEle9<7H3)l*R04Eh|x(;3JuunPFx^@y-%_03RP) z7M(*FD6Va17wO9P$88FVInV)IZ{6PtOIQbPEXKR>3<&NTdm@2a&kTwZjx11^&7mF~_} z`3>$ybq!lXssV(XKo0fwsnv@b*gJwDtuOr)mZwxCSBS)4)kI`SAb<6_eN=#xB6fuwn-9R%goh~X{5&=trh23b?wib;o^`nwP zAU=n-98m$$MgeFCH3S(WMtpD-1A~JaDBzWpjVC822xqZ@mi9Fdwk<9-n=U3byTNlP z0&=j7e$>P;UdXsvg6TML>Q`-zLtFs_D+@6~ldJuam--?;K3+s1X5}t}qx+_o4b3mX zB0alN$H>SdA;Ch+budd0wE_acuKqk0-rm-h4V-Wiy)*}nJIT&a)3Fg?o!#a=M|OIH~!Eswr9fnn*zZJ6n|~n1hQ^mWF*qe4l*PQ zfy6hNC3ti;A5mfcqP~Dd4`y|b7IVsm4_(T7BP2xkY{s%nccYg0+=&xOzWNQ)41`8Q zS^W*^*jVrPLB~SGuhrbVDURqe?#R)dH?zILf{(l-3Mq__4z!$p}XlQ0y}X0 zavU8{g5<(T#;kwr*y$BPG`~g+@?C}-B_VfRkfdL?+8J~s4nEqAjg7!fIs}BYtUUNL za{K1hXnLB|B@Cvf?fQ~Ct?;W)hJ6tt_z~lKcO{c{yY1ZL{-uy)avU%Jx%#S(fr{Cg zaY$B9MafsdwGHJC3FX4Mzyxv<(aRvEkWt|`B_t$-!W0=&w{g_tE2Kw*FCsoS17#jC z)DMk~0nlH9hGnm}KuqUdERq3G*fIdt^_9c%Nu{CS!J8dnc(uN@Y|rA5;uX8XFE*%U z(R53nBhk}SsEIr zpg9C|U0Jx>p+9|`4iYjj%x8n8SuMI{Q4_Iu_MxdmZ$xf@!MU*owP)X}>2&IAXKd)k z7yftQ;Qq9Orh*~sPsTZ=2%A6dEgdmVjf`{$8}vw&35y6|Rh5+_fE83C*yby{+pFC8 zZI>=t0yyX;Xh!6SsYFNuryxSOloU$x2*eNZPZ~cEiF`r zF*Q7pO9J5OZg_(70-HK7*!aSMp`r3r39v#>_7b|}&OR0>iBJfD8YUBn!N6_Y=&khR z7hO+YKAU`Qa9tidT-LY#v8!o_#A!*$T3KO zl(>LO8g4)6i79YQaKoyO%bD=+uXkMkCk46Q8divFCs+{{7GMEpX66^@hSty>dm-Qc zkDCovQ}XzNsZsAH61=TIJ}ahK5huoX0*C|4$*HYP`sa^$g#&a)74>S;8k{|(Md?9)=Dc9X#_D0{q9dnC$E+AyArP9WK$XERD(KP>s`#IR`Dn=*OX<^_G!<#J-Ih;-kNcb&D$>#9Xt_5~`J#zv%u)r+=+-T-q*YJ|3X<7n0i4v2_0fE(QcD|SE;@)<(! zoSfwwH}*cdFTh$Ygfbf3!7gZ0`ptYqnSeW1ZQcKaft2w`I3~(doAD?9{vOH6Jg7vJ z@+@YfBT~4~T;~!Vy&;vzzlkgzH9kZ?tDI241q1~6P2peAjCSoXG-4mI!ex63;UI38 zl9s`WbIYyi=$~mC7zbI{l@7-}+x;Jvs4x(IY25>1O9`KTeabemY~VV$B9L<8j^VvJB=sNdDB z+a)!_(M+U`)fwWOS*->x1Bp%0+1O-=^@wL^{rdGMQN|LB1vm=|J51a~UnU$ehbaed z6B9d8NK|k5vbvwEWo%rGPfo-O z#~WYQzS`atk~&lP!04)KNV=@0j=^@rCmt{3U$1wfzx2g!JZJky!Dl(Sa^4eN&w^?DSZMHEtoTmZ}x$u}EG3GnEAg;Zr{Ava`^?|e_w~Iz^T!qg^k;<(wkPxG`)S37?9uP9pqF7=4P|_4rn0A)t>Ed+ zM!T+*ZI%r`zV4h|SXhj-PebyPN`o=u>1F-KQf$^OLO0ddF}0U}QBwOV?7VOS{zZM` z2+3MK-KKkU=0xiit>sSM9-B_h?N~Y8*2$Z=m7cx;x5Ik4h>P;=yLXy+JmjhqB&7+oBmR0-dQNYqXZ#uK{JH1yW&ayk9;KO5ydc2Bz9RfTL@X%p11X1XWs1fXJ=y;AJ1&sop z87wa_k0+*cQc_c8fGI-`5jXGCAnSp`$D;FO!lc1caB9GE92*9Mj$QYpcWaQp~-5I-396-_OXuE8e9&K{rpHS$>Uaeqg))qAuy`dzA%H;711;LkZX$t)(uo@d-CXP+tP z_N)HANYvZ-+gpf?gZwvYQusyOQ<#JjY#`4Jj(!AkZy^zGREdSn|zq!=k~K)RH<^H=@e zx{D*I|vP*9O#TUvRsqt$=-@>e(O(~T_qXM}D?Xc{UigY7Z^K&?RI zqni8!h2P1^p)$+;t0L~j;;|)!q!VaZgBW3n?h1K{wjWKIf(W;+;mxb8d<1Pv9MWG& z-*jBt?M)%VH^JfSKjt`c-~a(gi2a6ctDNMME?bkz98`O-q@D9=rzy9B+fj0UZZ_gj z=Ji1-Nrp(fdHG|QpKl7(EfVkC_NQ56MOWsoMi=O5gei z*7RwGY>l*kXGi1B*0jBu-`n&m-H4#ykYUJ?Ve7o*cHg<3+c~`_jSKhFb~cC%yb)G! zA>ZTBcjh#s`bWXKVjb-Q{VRD@4wHK@t;4d(J`Y%ode~Lmf6~)J_oZx-E;s&X^(j+R zR!mm<4(%{SR$-w#Xt&3J?4zU`05d~XA8yI$vRxgn^!e$``2sC3ej%)nsNyc*MX-bA z=HeBU_tGUIV{c3j4 zY8{uRV|_Kvoe?-k6~=Y_66Ofww@HU7`obRq>-L;$DBpoK5viK`7;=y8P^Kaki2Z1D z_d~RvAd^6vW`VdS+FK}r^z3Qa5aown1N@8*PH_C(W*G2AjnL#D!-yhS(}>K1knZTb ze7a!=xd{3f#;JtI{ZVeGgD!&!hsC7@?E`K_S80%Q_3FH;oCzYnMV>LVBv{yQ&@gbo zZn3b7kkSSoC~1ABXkvN7R4SakdPrUCNP6(#4;&oCGXO@7v?oT=N=>$}j*Rs;skhy0 zr1@wCdtNQvq+o7h-bv~xZC|My(Ii|KGJQe%i2tWAMO&V2wj48&XQo}VHT~)9BxX(r zz6Wa=Jwn7UiD&B0zlqy{Ips2Ywp`;h*sl>N$#iv;{rNZEYJ)>wt8(t{p{{JOmGh5X z=%8Dk^VzId&+JpmJCc{z+7i`7O*^c!8L%lreV{WyI|D$36%xFn!b)&HgW$qososv` zyMXAON8z{IamMtbMpRo8qh$27|IgL>GqDEyjnmVA9M52^K5}?qw#S?Kp4ensTf}$u z)DP+cEB5u+t=eBRTfEV1snuBA7RvL2{-2&SY^H3UbQcVuRQ=+an(uv-y0x?{Atk#1 z)3u?!D2MjqemwQ_>yBizpE3t1Dc4)?KThgaAI8Q+WIQIAcpX8l48Z0Z23_FY-Ahd5 zf(nBTm-(!)uwhjK3OC$XM4$#$4>m*{Xu+kvT3@4#YYs9qXc#J zp9Fm7uWOP3Y(Rtm0g0++X2VK(iqg`jfyNM=LzGt`Z@p31HtH>QZpz;p46o)aS;x%%AQqvzk*(*y!pR%{N%i=J&4h)!?`D zg)cfIL%Py0(;4{qO{j)&r%vAAN2&7ROXiM^Hx8F2r-@4t?j0@~pD#@*+qkb?_7Q1> zRbMu8vr_m84$sWf_gYk}if3GJ5ip8n6DBZvYJXH6HwuMsMDT>W=FOiy|G>=_Y#N2CA^Q(|MaKu~C? zh3zlg5aF;|)Fj#p^>6iw$<^&m^s=K3rz}|g5MEjo2O)Q*T5y_IAZR zi4qtWx{VeY1}9*~WrxaUaNYyF%VsP2Hd)NV&7IWvMICpLWI!MvqnlpdU-4`8ob6Vv zjWSBy@kUvC3~CP-fK$I?$Q8u~iy%~#d+#6*K;w}Nu`CLBLY*NF0r_e;$ak@_{$Xip z>F(h{f5O~3U#y7oMUGY!cVyc$cW5p0E(ogB{M0J-#vC+pBZBX%}T}9CzE{d=M4mtC4j`MC%PwRD10A z;10eQC<3)h$50WOGgyKl-?i(Mu5YyV>wftfx}J)V^{sOc7t-U)mTjV?g@e}xK-Q|; zH{qZFXcO{ve}bmYwcDuw)-ehc0-)G-@HzNbMDyU&V#h8NNB_ydez~NtB(?|Flxe2WIjWfF+K%O z@z7z0Bn#|zpW;dc3dk>ba1@bhfC9J^-CJT;vOyvjCO<>s9 z`&qi}wg{U{u3e&2zq7*d@be*ijWn0>%L7A$RK0n%wNFvWz?7$UfN}-^x>;`>2SluY z!rO!!`Vxib44QK)k02Iw;JXl8L7d%)^nlABH}Pz+4KP4#ussbKyYf0QZ3(`lFVOSE zk3~gC0L3BkAW(McHKy@E!AzDbu*Jui-RBH%w|+P0$OJ8+H-H{Nj0ltA+T zZ@|=V^RL7H%b_I%y?5ZY9@&}U(2(G%>gkn}o%#x35LaBNTsqDy@Wr>Hq7DNIwi&(X zO$;n~gFC;ZKfImn!L<3XAJ_S5Sb&yMQEh-uA0enk`@ac8cMAv`X!kX9^GI0+jIo0k z2GzL|+b70NG*c5aJ9s}qf^^r@^Ko#(uXjatQAIX%oaafare4!2Td|;m!C3R3b=em* zV2ufNefneLXD;D?Ak)qtyfbOoPFT89i6NP9V&WJkZp|SSgR19MG%928j-|dZx_lo+ z8_M|kd*|%H3ok)fEJ_5X#UEz=z7E9ivIx#6dG(F2X?7WTTl_a2be3m42XBYb8-~Yde7nPkozhuDy)4J>W@u4NunSx(yHU0r5?P9p_Yu5~oj66hAQ=oR^ zL8g^xuyh0;q#j7%iE=bZNO|zmh_`sh`OC8pjb;qTHakpAa5*Pt&Ia0kofwmJPE-!! z9E@4SKsXfFP{5v`i6V4uoompJxlP9HAKMj^tU8JZlk~VZ2CFJWxilr7k<9hiO6EDH zYc-kKt6p>T@!E*yPTM0*ws8gZYGb0abs3C(p7S>Qwoe7m8Oe`Mv%Yqm?NPUzwcvNO zUi$*YE73Axf0Vlf-L@+VJVb&Fu;g86m;gCfgr~~7eNzKhEuwxufzgb199X=>e4mg? zkc9=f?Zg^??2DI@uQOq`nhw_TcyI~$fwxesI43Anj7pq8e;HOhr94z9#DZ2GOQz$Z z&5r6JG9l0zm?d}TLu=ICpxj}&_ckyxHm%=PUDt9a=6tHkX!dwfkHX|=Jx!n1SuciP zZ4x2Bhzc6i8J?1%6301jOm&c$1Woic!Eg(@!txpKW!GlEWnB8)dGxlK=J;|7clS+H z{jXG?a9Z*AsfCzXI%EWXRc+dD(|^vtJ>l;BKTwUP`C@+*j<4h`$o$%zdns*^2koa~ z?j@?<`o64o)4HbHjXVtYv}*Jrs$565sWGgy%zScDWyQ3k%h&#sfkBp@Au2ZyEY&!x zTbq(`BUXibxyZOmhRE;>VMlIm#y%(9{#XT-ll5)Hj{+$U~2XOe_w&tSaWihlPTrT_8A0AWHfef$`}^H{ z7w?`I*jpICX^%zq#fTOmA%??B%CC-v^PaNf=cf_t(P1|(T#}a8Uh)Rd43Hnl;4~rc3=;=1_Ta&vh)MbS^^)1V{?yTf5u7}4*LiDTVUQqdM0ollQ0R~$bA@n zg-j*%`@-m~C?U-FR8b)#gQz`kOvOvrit=z_kb)o_FT5WWhoSh|pwOK(DD%XN4##`a z*klZKUtp2jnmGI}Hs0W3m^fX);-2gmn(A$`>h=X@al%|e_gO~Ox!^vwO=VK?r90%&03oD7gFqLW532;+IBPy!@jOb7+e zM097QpCE$)(c>*LVEV-vsm%?|Fw&|5;d3IzL)2JXTN_DM161+~oPju;BUmRA)^JtF zKXwnmXl9*DVRa)-_n&{uidelpxhJ?~Pv(a#jf4}KN$Y%%-fELXlUBM(ZNOQXi{4nAwA35X zCZcj205i8Wz83~Vtg0BFtOjhQkVp8<_s<2+Pcp*KNQa;!@S5Z;Vrz2Fxg0v^LC1O% zNgpf)c)TU`^p;ALELL3-H$|f~ZB_O<3p~5k)#JQ%F-c`ij;ig_1}4njzIE#@(nx^xCj@vI8blD{6VYX;y~xeNhe1G^ zNmMRInFw9J{9>OqR1gNZg)y6Iu%0pCaSsYZX2*c>|70F*N4tYZRkHvg>NjQZLn=xJ z?1C0D$cJt-8;xTvA{t116|?|XK=4$l?YHMU-^R+trHh=7Dgj5P9ESD@frfTn&#PKJ zyuHCGY%*<%saI)(nc7SIxZXXS?7PQv%aQY*N5sCt3NEn&Y4)9`W?V}rKVj~Z$(1*% zS1R_))TZ=Ve3Z}p+`igTcI&faYkr-Kia^6jE)SF-2t8C#($R(aWu}dBz8Ru~)N1M* zL77`NuhIGZedueI7rFqwFKlZ9M)p#`5}aSDBH-;3k+|nf$k@K4KxZ49EEHC^TNM9H zy(70;?B?TTr6%T<5;r^Vwy3U98d_65R&>!vPAyzn?oI1d!m}i+_ivg)jlZR+Wz7u? zahTOx@ob3yzLj@ul;Q=xzU;8I3#C^7H=3#(^%ihd7_MQ^=$T$m2w%)9LXc|*p~CpA zE(~{=0dB))O@`Xzc6s~xi5eJ$>*zB9^@m7%0uyzBIc~to=JWW0KNuO@y4{#-feinQnUGnyISf^j{Vs^&zA{cJPv{sc4E307T)~_4}vbu zLUqpuBoC_{BUAT6T#S7fi0|ZeNi-_N7YFGa;M+T}^?(Gh8C92I1knI!=Ne?Apbk=0v0{NI@3;USCvV2U4Ak4fPjUj9E) z#i>qi{hP_uK0p+3%I!LSTw2Bw=L;F=0CgZMnO(bk_fs}qNh61I2}3VDLf&-7{B$qa ztIGE#F4DPa=W*$_&qX2?+wbx35U~wx1T*$RKH}?a!*!Lq=|A4-t=@I-{R?O=m|Mjp z?0v$27N@_KkF%`bZ)Iug_hmKLhMDWk0P?Gc^4(G%GQC)kX^96*UsO_ZlMefYo$uhO zF`$9vn63xJ5EQ&Nt|_d9Ej>N;cm5DeoAT%u(llpDtzrD|HT%y@@z6Bk=qTq+8ymN} zoTlttxA*?FB;iTGTvztV1#V%|-uT$1%cb^YLByHSA|3#G|- z`4lw|TUswEPB(R)j5$&oB~{hi)id~K^Y`%7!uQ%mo%AeoVKE2*2V^4L_ zmoInW1ce+en5mT#>j3<_-mT9I4cn*e-9NnoR}S^x0i90)9(dWLT)T+?rNMrF59eIQ+-LN?3?HN9X1;*Ssw0py>$V= z?A%X*fUV=}D3W=?zF$$%`5r&iukS7)-oARh`D-mAdtgwGtmO@^7;V+MBvte!-tJN z-LZ)1o)=_683%GJ(4c3r_Y$uEcL4%$q5$}#kDbU|RjZ^kA$A4C{p`igzIt)rpm7YcemIJ9A65)zsQRPGSP+Tl& z){7%+g;RRE{f_99RSd5OA z!-Pv8t)3ap7K?=4JQ9V3WzBu9nF2(J7JeyNVAD0<=fuLWu0KQY;KPp>=B_~*5$~z3 zqhr8y3$FS2*6>=#B+l6;n%vydqc7~X{@9ZV;!~F*Q*EGY{Fu(ZnfIH%oyd~fl9jY5py#-h0+W@{2UPcnEn-E3dObaTr)>hp z!WFx$Cr2!5V#V(>KqHq2jtRjano#i}a&Z;2R9a((y>jRH0f2=V+T;&p<()wl3t=7z zuAMGurcfh%J3jt|*kE{-0ck0*x-JCVI(+C*<)fzbkjRV$#`~I?S;WZhaez1oJR&nr zbr%iw=Z4doTU$>fG$c9Ml7Rx##gDPERV3#@fXs$$?NT>Ej)b9%bC5eihFxQ$LbP;f zse^pKsBP&@3pja6NnEK_UsI|#H>AEowop;e*horWWLM5P%el?NCOR5duAXKSs~0um zNPHiX#LTFzf48HyH!Zy1QiEKS~A}tll z67&Y8daQ!0uCtlMPC8l+&gMN(Nq=P!NE*uO`>IQw0>IY=9aJ3gSDdOXc$hmw@I&g z&Zm;>pJHRY+s;yDTriL2v5Rr&MEcANW%b&jbF;a!J}xt}HHovZCX-Aj*uX&v*KAi{ zrp3_Yq9=ziaSNs84xeJ6_&FG&M5K6Oj%SDx6u?l-yR@K~%mqN$YC_@%o4birp5DVO zEH^t_UYC{jBUS}t3}9_AWM%1PL&E|m%B4s$Am#!{L`m>05a`#|UEEw;PY}9=& z1)o;#r%z-ASa*Y?g}p4IqN)lHsR5CPnj6Uq6s1VoC}Kn2#I8UGrEzVGp8rd|1reex zZLdFbSUuzU7B#C=oqJu}s(M-8dm%P1v?ifc8y6b>xtIAv)cSlJ{En*06Z`km$lGUb z+xo1zeVjIA-K^^)v5qHZ5@q7yoj1Y17bCcrB04)crTb#Y^K~ zLYq^+#NI?VL_$x%JTF7tU6g$as%&CRLu}GIAfEyNYX~_?k`*58F{%4z_dLmNpO1RY z?4LgV%i9|5-pF@u`gZ6_MXF$2S3}^JqC-*>@-udaW5-O|1Xv^4{Hd)TTJ7_BJMN&< z-=SrbV{-K>G0KBOijf_ImaG})8$PXm{rkhXp(Ghx$pVdl2)+l_0YsX(#biujXSf^% z_8J<#GTbgiXi5MAOemg3IfDS9*C^qVQd7Yw7666kXa|qIiUfWVfiMiOBj6IH-Q`9d z6Ok|=de`tw*Z7*}(;HLxg3?uD1p)%sY>~{q#&}-jR?EhG^(=dnwO19F|GDGxoZgSd z#E$pdoOiWCmthyT&F$VAMs-!kc?^f(268|mb1~9%39R=^maynf_Lw=MT`)zgE;33b zQOj}UDZ^8Q?vALXkx!-p_l?io*o&ESZuq&I=-A6KKPWweJG6Jg;2k)PsPCic+lKlV zrw8!OxYG(j|KZsml6k7zLq(pETL`}qjR;$cSN8dfiRQwPE#RZ)WM`8If_+i}WLQj) z_1EkFF5p-S?h+O8f2u!h#?@6BHvtm6iO@aNFr?lB$fc<*mod|4+n!-AfqnAwo<5r$jK_b14sELgalHadP9}tJ)WgoHoV*~yehZDd!ZQZT-`}i_e=g^iE_(V-*oTu26dwo0u8PYuXpX0G|s4vPl|Jl zgCynikDR?Z=VAxj?p~eHjhQt)JRq+AW)^ zc|CyTXNR|sX&55Or)&f5Nr zX~|9&N^}HdUM-X)J?=5F^K%RB1%q79x!W8HDS5$3baj>kW#lqG_Efc{V~I2fBOZlr zznQ2Dv93{Q6OqkPZpnPDlWjb`BocunFe0}gJ)*xi!F&`UHQ8>P4kC$uC-76$VHl}m z7B!4<5wIMUN6yGFy68oO8;;_!Cax%~=tu=yY}&AD{ZL_jAcv1U_92eS+_DihONkD?$M_|AaGuQN>R%g4rZ`f6|`$)%2RP(B!l&tNZ7xTFKSl=X#(dsrRtwAAW-*>ARb zqF{w$;)Vc*Jn&Bowp_B{ ztCHU%>+?T|DlVlT{_mm+-pZ8d*t-V~Z?EIL%-(AJFKpyqHYdZF*P)Nrni%JG#me)| zm%qqeC%jlA?z%<6P`ovZ<`qm~{>qyc#vUMB`E9-#9G&MO(8uwRW@5i;7sIdDT;D2i z%upx4P->)!8d;0Kh)Wtl?8ivDGJjkZe{}@O+kLiy@O1?)s2cwIF6;^0?%(|~E0xDC zgg>TsPV4yn0)JDf61URk$XD4f!r+6nU^1^Bof@7~pT?O(=qw;%K%;#M5_#cG@nCD3z#e5K)EO^6uT0uw61st;oZK1LRjh zD|~>N`IurRE&@tPx=8+ixqb=z*%n=XhcI>+k`6@oz(^AU?-T|6ryGN%MZmY=C=d#h zZG{*B_6?{eA{7Dw-vWPbTHo?p+_G@xO1*!PBgZf-5#@?Kyz-5KqrZpuS}>a63^* z@eMA6-ty_wr_GRa617l6C&FGZ0HM6xFzwO32J_9w<;ZHr9zi*^K9jqbOM_p!HkHq>VEo`}%63l7>))s?tK zK@ww>!`WC1RbgeE<9)ZKCy_pP4Qu z@M73q9^&A2-dY)Ih7O{iLVC_i7n-DP!NED9viU3p_f`*!c+L<1{SowTLX3hDD8f#=8ntY2LIClRK^EydxDvqw zBrKRiFp%5yCWKgW_3G!(uYlPBksGwQyD;D@M7+lwjY6WrZXb;>-VS;L;u|p9R9XLN zikneVEkPl=X!8SB83Jf&&35kfknABjCj0;dT{LjsYoJ#ol4A(AyfHCI-qwF)aHmHe zVoOOT-{#Ho+G!%O!aNyLmXp4F>T6RZZNvHsE4izW(4W&6v3@r$B!9fgp4D}rL?_m? zYOlkT&Vk#BCI;Q}>v;nwkaF!&7=Q2T)Rk4rHR+PI-CL6FTD(}aHcmY-(OrPdKf`TA zHYD;WW1{aIQdZdzAB=;T!vgjeVu*yf{=hyOCqJ9iMU?FLX7lzvFU{$6Ofts&z>Y}tNjZ{S-!hAHiJIZ@JoApRK9AS5XQ zPdu31j*>w2UTf%@z~;4~R7Io{oWf+)Lv~NmesrGXzM|bWMFbscs$4{8V81VYq!77A zp-fGV&Fmo2!Xlg%jN3WFQgyZY)k(AX{H#k5>l}^H-;O$xM1%Xi2U~I%0hl;qluSXI zE`bzXNmYt?|ANa$1Tg^V|BCNOs1adAv+$cDa_2hHc3fnOSJ$DQX0AK#mE70O%GvWY zrG(2HL%81PY=-6ehGXLPCu0t`4%MCh{PJ+9toC-d327PiV3t&~qhVD)8^%^PHU#Jl zee_tCxm`3r*gGNZ-OGKyC7_rHF&CIVz0NCKl#zSR|I2G}-UEC0ZalqmuT#*TAm`oM z=a!vX`C{jtq)_G+t0N1f6t*nSxmoL@`p3Cb%PChc*{$mKBL3F06|cQTZH{}3PtN|a zm5zq%JD8A*u*tpPi>|I(&6Y_1N{;{e*l) z$dJFq6W%nqO3*PcsknFL*x0h2we{Yn{`GNFo&DRsjlp4J5KzU7q#tnZ$9%GB_wuO-|{eAeBe7$X|;o6Ob+Ldt3!=1c% z*e50@eS7$ZKJV#p9e}7$!(C_(wEzE$x$h1Ha_#?Copx!f2&Y9$vNx4bGE$PQNVY=A zu2YVXR91G8WM^-(lbyY@XGZpZKUa0m^StBt|NF=DoO7DSeP7r2`SL0O zU@=CcguY=Y5~E1J81~|Q6i8D+P$a1%xq{sg(t2<*&^fYGVE-ct;>vBHpYe=cK)x6` z*P*i4L_4*MnTlb;IE;dBo}PEu+D6tCvYE}($d%O4WwGC__Rnt*-&MZOW{;NC@R9J6 z?G&3Ps*5WGrClG*toioN`2E7ejegGUdgBy1N7q{DD3@M(tF$6d;ipn_f6&IYtl>iI zZxx#hdFja42Y^wqTttjH_rHf_O7ZaJVs=J5W_*EftBK!v+?-|1!Ir2fsi}E%-Q!2U zLKBEF2`-2p=pedqD!;=64nDW8$q>geK0kJyuld%e`1ud`OKEXELiK|ag6kk{NK_CD zu6mt)&9SO($!!)%DIMSde zJtdc^*OrkwXN5TPiNy4k<-zmZ6S|lRTpI?>E@PS=)=jDvT3SuKWug*VlP)-!&px%lv1|9}zc`46Ra612s@>k@f!1auxyi56^UBIe8-BM-SF^m z3UGW07kAr^<*Zjm5YIz6IX47XU4-^`74#8nprKj)CMf7HtbDab-Ci8;qM8axNGpWL z`bN(IQG==ZMzq`*+Mz^gRFbPA=EXFvs50XaH>V`p_2bQL%$F|DyL>KxT&9_|>eejB*1pft6|2rrPabH@ z^pyKPFYj+UOtby(nC!oU7qb31s}|DLupC26=lt>4==|@&ghY?G$M=ZSjZl}|sHS?M zdgU`s`nH$&2mWj9UcJ1#Q-s?Ho^w*1h0%HY?%nxJ$t_S|QIK4o1I|iew_$!Kp)D9Y zckJBRd1dGIw^zn+JiUaw2`C^yyimLrm`{C%w$Smv1~0m6|NdeCeF{kH4+y9&85LYv zoFL{FK)BoCJ;D;68FNm!Dcs-ytSm(Ad^WUN!H-Mx%WgmpqA-2JGZ8ZPKqV&hYS?WU zPBNCJn;{6Sgj3dXW<WA{>p3WXzl*7Tn5Q_jz*uG--8#|V_GV{gzP%gZ#ucu+#Zz_eC60ac3n=iy78O%*r{ar3SKUyOR8w-Yb_8nVbud$fsYPmwCRcm`0N-S)x9)b=qjv@nhx=V z8Vy%^XhOnn904fVa}W<%{U$uz337)T?jo#&pjI8!D|JpDa6*UV?{5tkgMU)cw7A_1 z_kGeOoHA5J<$;&p*n}Fo?D13)b&D;Gjqq4p3Wgg8SHM}kWjKtZC+5JP;qFYoETjbS zwltU$U^gjga-&|@)?>w3xqC|U1pv^pP1e9 zL|}`BR^FW;weRC3br#?LSV(o(6%0K68l)^+9-Wd}zifpAO-A|ND(TU)ekwijg@xPg zhBt%{(f<7Oqo1Sr9Kpvfhmupp?FkySH#@0Ysy*LkOy zE?2Cohj`KJ@Qot+JfCqyUefsl|HQmY1^{--RBp1K5robKAj9^ z$LFjDtWaZGd^=8Y21Z!`SJiN3>+SacXYM$dujY$BAG~dHCGO%2Z2oFCQ?+_+uuW75 z|2#MIC5Q2oLmb0-Z#IV@3UuR1U8#lz|Z*RR49%Y(Kx~{p%V}aZE%W2H$9g8>9hO zR8`9nP_0|TgLDSyB?{~7XLv9C&LjTySp>2C+L32B;~WA705h4EQOnIUV-P1aDO3N- zLjC>T_7~rl%XeZSavSwKJZ2?EuVfp0ve}8yEFN>wx z@gU_(UPO8)8rh)^hiTc}%c~kJ5fpZJWk%tkc?&@WtPyIV7s)&S&&ztAo#7XB!*vi{ z7m%349K2C6iV7?{ZdBbGUuX-v%B}?MMB1X^`0Re^+ zeKhC5j{#OUOR-fcs)XVJ!G$IBPkLX(2tIiAE=Ax*)KX!8mty{d;L7XnuPl4_It*#O z_j?z~UC}Xb7oum8_!wICBVKVtW`<;B_<_V_L-!~JJb@&am~@xd>f((g z1Pllo%0yav`U)UzfQV542oGi+(9sQBwj@}oV)X>F)dd&`tM^spDdIqenI$^047Cyj ziAS0VgPY@ex(Af+%BzU1Z{A6-Fn*+yPf%6ZV>HK)I zb_ExIeqpszT*s_>TYLDt5639QS;OA_Sqf9k?@!OJeux{uOkY{l%yPA^IsFM!&1cqQ zM>}oj>U8P1x)`x_pcZx;0o z?<3mWl&+&gcd4U8a0QNI<9KbSimWa82J2aZ=82{f?Ceok^6>p4y-O++`{4aM4{me- z1Z!}zP2y+_2*N51HdxB;7+HhaX|b7U1!VG3@Rl#8cA|VkK5;HK4k9b`{}S^akT!^y zEuT>RtGB%T3TG_5YNSdC`mQ++=ZMqM^-D6oF!oRYfhDQC)v5Jg2+IjMYjUeu^Ax4X z&tszHiCI8t+G5P6S)22>jd^D+(_$xX%RS!p31>ksdDNi#f;iU~*AqF3SD!)17m~Ac zFnSSF?(b_3%Gi%}W-g+KAqLO_4~?4UE!Z)i?l}A!3MCM2NfrUcw@GUo0$ouh`WWJA z_&cCVtZjOapCE-&kZnV31sS7a*WIa~k1t`paZASD%vp^H0E6epnHTtrL$@hNQmB<) zI@pnHz&}<~l#*ugSMh~`(aY;*=*PJg!p83pn9~~styeWN*m{JKPJOt&J-;=6BZFI* zW!;D3AG|(VuY4r*7j!qO3Z9ej&!)nt?G$&&T|A^rkeXUUw*KHGHSgAtua4pd`)LGy z#5!Z?9S2$SNA{I79=JJtxJ1TkcKb}p%;MyfUW<^`(GyxX7BmtH${QpZb}41cOK3+( zP0}u#Z<#-lrL?rgiYvQ_R-^B6asKnIF*_Q2C@tFry~Dq3;nEe0w&KzY&Fa) z3+*e|SGu85$(SFEpjv2sEx}M8q8Z!g#ckVQ`a~c*-{>^cX1C(oQR$+)z6NtJtxHH& zDv}TBn>5dUmBic;Uf6jI#DRFoAxuCZ7ZGW-I7E9j6RXclWKYK8hNOTbEZSlV%I-R` z1b>4JKl8+QwWx(56y;Emv=4+qeJLs7v6_xd&4``0LuNQN0!;w`Pry4CRo-*s8dgTq z7>cs&El4v9lUeR|lm3dO2^k$Wl3n4>L#B2$L=;YPEpYLD>M)5;M0?{HQe1AbQ^%Me z`+2K$GTi^BPent?7|(Xl*`F~!d}V1#FR$Y^rC-#_a;6tQs%P=EQjijIU@YN6G(7(! z#L_gw{@)PGCm0|A`^Vw4O}7(b{13!}Yn=jzFLI(|>NwFdiq?cK_?_2%f`bH~pl@^Z z&}(YTt)~YBZ)TJ|d4n_Nan`GeP+L(Y)h(OW1_(~0ow3DuXhVyDFGX!&MZ-1Odyh6B zH=5ipNE>o(>d3}Khi%ciMGO~e7)4j!cy_B??3|1Z0OKHq z=n!_{ZTXH?B>=KtF%wY8MJpHM-}fP405?r*w%_Wt30w63PESAC)5BXHs`=8ayHfpd_BNgvLnC#922cBJ3m{3Rw z4?h8wIl{S6bKV*h85j2oF1>F!c;UXAn^iGyEg=6R4eZT#vsGdm4h+zT@0!5*46H`c zCHo*^sHA}b!4ql?Xm!ev5sG^_VJB#6PS$c}5q1U*#qEQs&r?)q>akzW!j*@(qfTfO zJG`|5Zo&0|-BP~hBoh<4rIEZ`m%8nP)DeQbVX3K&$Vf@qhw(QsFtAMUbgKr!gs5U( z0y2RO3FrVeGySaC5s0^-)#7+ACL%%RbEl9ju6=dHkkbVGU zW+X_j2%`2yaExi*DBvF{i6l&`Qt1W;Z<02PeL50cO~-IcqPAUcwu4T z*~w&KgZwQ;9wjVrbz{fF$orF1IYClmM3#VQpl?-Min{p~{(49l3fi$Jb|A4FL8kgy zLJ}}cA)`yz!oUFZa@_;9D)9UhGp4lHgkv5Z&hY)iq`9BxfY8TB{=S~o>xjrQFKTvTw5g#HktmB+hlxfh9%I zSJJ7ja`#Y(RC{ySBp8$f;Tbel~gbMgLW}zdu6eXt6Y8Ia{g_1 zXpCCCHrK^n_EJhtzUj#Y<7st!r+zI{Pkn1VioGwh$;S9)MqCX$b>{&R{=9{1q`3nf z%e8}4ZbLg=yHBRGHyVDF%lAchZR}%Xns-`I7pjO*Z_L#8pKEYZUFlpqvrt9lx$0g= z@qT?Z|T){J+?PSh<9?D%5zqtxAsnTcQi5)pxfQ3CpgeC6euYto2B!0pBaaasuU zQHx-VOrPgbSuO3|vj?ek{U~&PV7bSe|bJ7o%EQLvjH~91k&^W!YE}$UNp2NT}o%hAo8HD}XT zzFZF*WpSI2Kp|smK>WCqQp~T2pr`jF~1`NYS zGm{af&ypHw(jC0$uY2t2PYCJN2oXs=<}aI)y6~-|#{us`R`?I6+0*S_bb^oeNs5X} zDCWN&C^?m7y?B21u)`qVnBs$q2N-%6$w}x<6;J)zQIY9f4Y)u`(_M2zk)DJNY(rif_ zUNOdO`rr|qtE*}kTuv(`l^%K|ue4y9835@DZN6bUIgcyJZ8ffOVp9qEI) z@K(TfsE+MlPHRqe8&}>EQFW6Xd{=zUPzQ2BPL9fEelo0YPnZtgK3HvmC8?M-CKtczhIVSPwVUdwi5BIx>Lb@?jIfok!@Ji0&cDyV{(TH#d zo^&&}_edLg3%-REGvG@9DL^@ODl)MQEIrc96w>Y=r@(fP^&qlM!}y&o1VbP+&0<8p z45-bTRwzbj;$g>pLh-G$_uRz??CXU0EC!r1otX^&ldINnfx=cY?~>GqHbF79>&3B4 zUhP(M^Y+`{Id=9jojgMw-d;`KmwYV)n;ITb`<-r8XGIiD|4RCk%Y%*4yq0&}!<4*s zU#(4n^9Cmw++Mz*O0MA*RT_3_<@$3=_WO5P6;tK>40q_QjV5qH1 z!22U!1+EN){T3((sL6{e-MB$@>I2UjgANmu;uRaY)}zsFx{9rIl3$a$`o&Lh&vb2+ z5Z(2CBrHJ2$M)NnwS92|%b#f{eQX#Rf*sk~R|*{~rhg(xCoZ?jmB^e&LsjW7c#MkUGYG}mkwZg294W@DX-Ih>nSHg=4S{ z`Eb1<)_J+PrA4hsMwF1~Mvm_N?@b;!>eQxEXieE#=&^I&0sabw$wy7$_ zJ_3X+1W1T?E~liVWN%eBH zHTC=mRV2?%!K5Rce>*;o9$DChuM3Qm z5cc82pRnl=Jz(PW0+^HWPiwY;Xt{uQfPAhHcjALC#SThDf4b-f0G!++L>|P8D9B8_$>=M`c*C|fB=>yTa%*x8O?jGIM zcKKbs-fvVET8o~$l;BxD=_7Vut6^3vQZ-rWu5W&F;4*!&<9aaC7fkn`H?zzQd33KR zb7Ib@^{SvH50$yJ(1|abQmwvR#-3f*Bxa*Nd~aw&9gM}y?SFo{Ff_gNp~j=x=KCu4 zsPvE1{$es=$^d1)O|z_gY7|jf*k=&SX14OD^g37n3P45O!;UKbQy#PH6W@&XhuR8V z&v_#g-?-RlLI9}DtXEH!*4<;_n{!H9DSqw}Gf{Op_Q8XNYjUrot1B;#_)PxgB0pSj zJ~>5|^b0Ef1u^6Mrq1d;`|a^Ik|RqRR&l-HLHC0MZ^xeNnG#J$aSxn(Gdv(6XjVy$s0NT%i&hjbbBzS_=Vf<@A z?;Z+2R)@Q}Vi1&bb8BoQf?YRX-^-BIVbssNd4~Ogbot4~p_NydBTcRz<>&VUT!-l# z+5#6UF=5Dmf@M#XEzGF$2v@!wU&Y~hF0ETMJNxwabTnskvr|h>?TX9tTo_bNy?YJV zU~zep-zB?5M2x?^a*RYzGCfyHzpKxr#tCi{x~+X^UWY{N8NE;d-l6(I)sOXgflJHC z7L8$<$Yc?KiU0d`tcRsuv1?c1$9ziRXt&ptI-Y(V|cZGWqf~ued7bm z^>2*Ldg_|#&UjgI=@bf7s|qWrK1&MvgE{a$cbK2SbXDcPh4#69Sf<~I-*}#^cG1N_ zGsib>b7t)NKa$mRcbz=O^hKm+SW$z%?b}B) z37?6L1#9l)Ymk|~s=2|SH6 zVmRrj+STS3NM$O^NTKysjN3pGfIO9}2fhb0RQEye7eD+pM)b#Xi2iCF|*}KVjQ_FxTUyfU5<2f?RX#^t_s-_F=Y( zA8n)1CVyhG>Y$m6xvGXP5A4BO}uyuCp31OA|w* zr|i27pNUQXWSBnyO)LKV`A;Y~?a!aHwH0KP4miTVulH^0*s1dDs!?V!>*+=B@dX`%hNW|;n+lT^NVGOIlYyNNmN0H0ZDhvwg| z2YAk7$$(EW7&8veVl|J(Y}~lP*b6{-$4T`qHIBN%IWU4U9WrU-7h}Uj(#LB&jH+}F zqL0(l`2g{2jeX0ZOpdy849q5s7AV`y#|+YlfeT`hd;N}6j3DV-29 zx7aRx=NP0Dv`&%nivB&T-|+%hXX5MuV>T;TPrU;#Q+7N_&fK$YG)?H(6-B?I`jY{1 zid3x=hP*TTt1Bz>1AiB*_`Xn$(E`UkYkc>v?~2^p0z6sHl@y13);C(Kv*=Dny&L9Pd$Y}M*V&%l z(oD6|-iYzLW39opmzxu4t6ZA*-eCMQVQq*tUzf0R(&;7sk5o$XZ#pA`};vx^MSZH>j?7-5+W3hVRw5`|}PY>@wXsQ{m6& z#TaP)3PEA+$dchzp%G(Gun{W+_n4nz)YX7aM3+C#v^30k9fn7PU^h6FpoC?yTkbhp z7YNW}B&CZIerdAa>Dy9VQTr`_!tuNdS%=u}fE5^XE6f008H0uaV!6%04nIN8NrLr) z3pWt{7Aw$@=}6$*a>_(Gp9QMo25KX`EE505lB#gzp^AzMJ}>i>mE{FEVs9g?4CaK0 zx+JLN$-qo}r*kM{hgyQg)~5gH(%mnaeD zzE*i9JX%>X^)!Fxn%CSzha)sM*|$jBaX1}bF!xO>{CuqmV{U%p*w7<^grSDo&W1U* zjS0{7)k4jbTlJX}jYAOm1APt9%3d3VnA_~sP?;ijvS7scY+ZlC9?Bx4kX(0V<;+$Y zuJI2OsZF`8_m3)C=PnLepO$5gutH%B5HbDJ*I)<6jT08@wN;k+t}d&$c3oue&IXqP z#TB$d0|i6PUq87B~hE7|~D=EBe zDkJ-V5i&}^b8oG+5q$r{GC@PEU-FbByIpS{USVpRj z#lZN<5U3-ms0tAkw_)417^6U9P{NmyltK}=K-VJFJC#tCKu`elqe1Js-ZwN$%UJIK zv=Y^c$?@^|v;-}AfXA6a7g_gh0i{LK;0ay;^r}=7>$WDL^U-oI_+cm6CT4e4>6{$2 zmb$9W&(wTD;Q)~lP5Y;D$jRepssCJV*zp^UFNIcYn8D0LJf!wQd2Kfho&2attF^$r zi72biUIfw6TU%QX&vf+OFBySzQ`>6eZ$k>Fk$4Jj)tsef zjfkbR4+hq#4CbG1&;Ys5Jms_2N&qJ&6r`vJ*GepfDXcACjkafKNKf@^Ml>`w-$%yZ z$wj21pwI$$D@aBOm!pw5FI3Pz4NupL+6f?Npd`z$UtZ32v9p6UACqsicr)( z!raJ+STZk&h%`o-C(dWrfyuVY6y81s&HLRMRZDbo4+q%QtC-hbJmJ&?AI|4H3b2A% z=&bw==H@o$Y!b*JoUUY!RI0L3>zPLv;dt(#H8 z9STVoic_}0FnSm%BRK~Dsz1r|bnn?O-;Lo_*ugtM%AqVN4znA%&*UOFyjCEwx5ilm zE8{&Jh{UE3u7IXx6l^fz5Q$I{sM1!$Uyjy)KGLq=7~)S(2vl>az)VXX2$<&(H^Dna zA$$EgiF5!lyC3ndSU&QQe1gG1yievniyM=k+P#$Jtg=xzqvhG_&n|L{|5h)Fzr7-F4Nx3w>e(sQ zGJtrMl~+tGAWOx0vUMaGkW1m!YVtSy`o?Ze9F<+iCe%`tH)^QW>#)#Zexl3L;dVX# zPAogw?`@E^Os)=xp?Z!&{NN8LHtQJK9jvEv+>R7{Pu2a-B}!j^sF21!I>^0-Et^|e zh^^;{W`PA|{mMy|5-&YV-$rB3?ifHc6B5KAZ?lLT;6xyhI7LIKQ zkEtHOL_(Q(OZs|h`7at?lG>-h>{J z6sx&lS>DV%d>%WUp>n(f_Wuu}l_1 z1P-oldkl?8;KX=_7lHEZ{~(^ZgL#7#bHG0d89olUEvS*i%@O8F$J97P3U3i~fc7>Q z6y7p06t*yaf>DO}#o%spE6!iDjsIqq8RE1s$RXNlN1`HW3?y<7R@Se&r=|ymaMq&L zXre*Gl?7R`>$cIA!f>XS@Y3(z!;HuypaHZKT{C^#50Ce|Vn4m2s(ALoD3d0^G2FJ48>w82bCg(LJe zfBn@Wn`D#vt+69?hhS5OC_(o{eB?HW^N~V zX{gzau352kXN8V$t!#K&U}mFKf0h0B8O%izU%o;E0}h+sdTbTxbqC}}LBgMilpQO> zS4`^|lo1^sX0Em{Guj6qX9y?&giFQ6{{1tJZmu}-52&EM+euNXP-@A7&w%xLgZO5;@|M;s@Ttq|!nbuD+ za1t!tVIjHRp-TJq;_Nv2Q3$}_Ru7QqYfOIXP)9>~L2iAxdwv&ww3sy;P<@w%X%HR7 zWUL|6Y=`~c*X%kwaRVk$yJpVAe8&mIQ3DD6`Gti|2d=43TfCB{kMp6(dhK>T_ipop zZ(*)RrW1d-Ieu;T+A<>4X%u(0YvAjsVAF+d?13QCFG}&}H)Ll!V)K+>xHc6jV7P!g0W6CHN=eADN%Hmz zpLGDGrV89qo1Deey8cCugniRRu|cJmOA>USaBXa$@I34 z<$nJ%*Tiu1bgP2~>;7U1!I3L|bqzFDw`_Um8v6!V#%wA4WXtc`n`kl1jqh0MR(x{N}J+K(vewGc30}>gN%xQE0kqb zRY^f9`qDsmpemF|rvVJ~@&apwDfW%*2}fsVHs+4%D@-7Ms(nfL_pkH(Y11DI%URjc z%TAA7UUZO3UhGxf=&}FyQjG2`z7rW3R7r~-FnX*Ikt4o(tllX=i?~e(ccY4nOio|E zh$C@KK6Lr}6p>lL*(vFx+~&;j)8QkATM`!w8F5EO3{T%B7M+EkHX%)y{l!qfzWey` z1quV5yG*0h=-VX!N;Q}~@SMKkQ@Lb`4t0~KsxTcZvx&a5Lvojm7gg`K;m+Q%<%F%z z|DT!%ab4QSp%pzENQT`OL-9p?&}^GZaa%))(}aB=^W=K`&WTkwq-%Be@gD=H8^fK(neJ@q^QPz`X9|P#07Zjk!mag zi!Hw7KvU5gl$~0RIjkYeh7Y~kIMFbhZKR|$yw~pK>AvINTT^>W{NJW`a(vco$vFBt z_iKq2g_`2>T)PGz*M}usk(lC+pE%Kp_EmN8{E!B}{pk%l%*-4k`eOo5G7%4ec^Oa@)%+p!Pp&)J|!@!xa%KU%ne>PpxLs{ zcD7^v#9U&*RVAB{xQ@*wNse?EefuBDq6^$~bg#Zb{L}S)HmWMx8fK&PuHmw$`=@v( zDw=*AnwSCCEqh(J%5$%hG;1B}s(v9c}U&TWwR+mXiO^Q+?#0E$TDh{ z{ZC|i?PaBFnQ9+@+Kjf*soH`s?2kAh!;`*9rxqa6Eb8`hhZ~i5MGTXt;t+`Rr}Hn5 z{tQ_3eSz$N%~YpuLMJ2&Ikg_X55Py;VZWwxCik@9#; z{NJ`DJgeJb$;`Ql{DHj>!4%X*t0?v1x!(al4PYaA6ire5x4G#AJe`Li@c=8gy7pu) z-oDbH@6d!|g}MydmI8+tQj(hpiJ6m=gE^&{_S#P-)^$oxM$!GUjtnKmX+ydvPsB9N z$mWc?ZEO)(I`%ng$>b>b(j$zr%*jzP;oS|(TYFj6vbX=mF0+`SxmHwo%H7C6X}5n; z&0E321HtDVi?!bbMe?3gdg1%U+bvy}cFozAj?pqUd+DX5J#NJpFFGsfVyu`h9D$Wd zt@_^c;c%(jG+Qm75362|FtY(@HoNXg$52d1e-^Xd=(VEQB{ii4+DNdz^n&ck1~?F2 z?9^iOY#QX7w{%W(+fS-w^Ce45N;WeyH!t|s$f?o;?IJZwBv~W_@Cxql4>HaFHumPL zRs2iurDhb3ylU9TK&jjM#tg7g5h4@ECMF2C32u*Molnq3K_at%8~+#uoaMv}2?QF_ z-T~YlcnIE*fkeA%HC{zrVaHELH@CgtWS?vJlaa(COSVzG7O>;bfvjIITT!S2hg7hr zi#jkJ?LBQ3Qc`++-0^$uoz6VzI zSu?sSL-qE>+V9>STklr&(ChAY@zD9Q>+IB%yZ7Gd8lb%yd-uBsdp&cU(q@zl11|1Qwy zYrbHe2Wrw3MNH4x*>)~n|I2Hxe?3)L9Xf*l!Jpr{WLWJ$B)TvfqfI&%3JH6mB?FEOG$!O!RsU-p1d zA~DwvB}Y$*~)Cdmn$Jfx@dWcHoDL$y1i{$jz2@Mygyn}$B0fX z=D`wtgF-v zE(p8^~m^R*p{v%F>MG`5{7Wpv6Kh@Fq?12ZU}U zO35e+2AmV*PL*Fiemj!&P5t#-tow#qSCCggh&YfX5i)ZBo78o14#+w_7fjngN|KMU z?c0*bz-59MEF@S$je3XJqXB^dK;)R$KISha2K1KNr8kY=bl%l&vCC)Xzx-VJF2A2& zu2>9%;8iZ5r?J2&Q&rm7m=(%I3T*O@VEKF(suuU_TlcF3+$iwLUhen!=3pVPC?iNa zy*h>~Fh|cP+s*6s#N= z|2&9Ia0}rq!B!A-@h=R=6A}YY*{*&2E~={slXKuZOg+68#HFte?G^X}af z{0K$?y=Wnb0^UonLmJ6Zn}|lsq3t?dPXj8wZk5pG2ZwDFJ<0aK#qzL7n8hDv z8-hz}&bjwc5iJr0yNhA@N6l?tEiZ~uqzT@q$@3Jrm^|>9cEk=}0mKnhzq_=nE=Eln z-({quaCxDZhv;Uc0tKOFTo)>}BxGeyN7q#=cyE!<&8EFz=hi)kPQ>n#4O4XX460GQ zJQNb+Y3XcfTBR07)q360P*F*V`E62o(;g8Sb;%2yFKsGHZ{G=Mrw_a3WBO{l%YunY zG(!bX1w!{2wDYo-Mv7){}f=Yz<&fzaqJx0?03kr^XU;1S(YE)%XIu zK6C4@fn}fqB<~Ula0JX2uypS!DXTcoxo->D<5>MhRuC1b=*WUesE1BNXVOvt@B4ZL zoE(Oe}Z3?l+DITy#iY(a89Asp9e3DPPdUm{yowgxb zyo*GTBJ`;jj{_*&F3j^6E<6dd#z&5_mA_F)mn;8$*r`=Cjhom1IXXa~!#zZSoVxuS zy1PM(gr^;9z|^6o|H8M8l0R=EKG>jjX_iwbuvI>=wA=|lJv0aS>+#@235tmDvmF*u z%qXJ8i6e{@hv<4)vuMf>H7BYYvqfzRD&`Ywz{U_;G(Iy1wB$y~T|Rg2Q5gT})0B30 zE1MGi-QAxA9h;q>_lcgHpGOp+{5!+5XV2QtAKM0oxw-=V*dpa8>9`CJ;3K?0REMeT z4Ih=54taC&R7Zr|{m+eZaAcjrDiR7(a~2CQg8T?hPC+UV%gQwI))L|lsX7RmL7rX^ zpvtqic8=Ss?GL*eL2+ewhu`nm`6VB`l$SQ_inc#j}16aNRRa8w1DIhm=#>ZD*L`E z;9)h9QsAQz$9R;7r*Sw9iEa4L!>3MdImX{*f$b5+PbHo5=^!`bD+Kk%Z8S-aK=#Tq zpPKdfT7W^fAnrp>V^1^!L!17Y~yzuF=F1q+J? z1~Vds0P;}<4b7pck^I?z9)lKIhw>sej^XVUOwDCAAB7LZePhc8V$BK^A$?#5(@ngF z--#|Pwt(TUUs5tJY3~a@<@uakvP3_>XJeGIt@8NJmwl8ivC|PjuIW3vGSB-43YYe@ zm#7GJG$|OzU;U!8V(U}#^>eZsS;uVN;P(>2@URauGO&&v<=`OY=YIbF5y#WMm`qJ{ zO7<*V9V(TW%bkq3S`(-~)SSwD#<)4c!>OnDM0>Usc)kY%Azow#`}0qV#I$~s2Sf#l z);&gEUTS0(03ULy*|>Q#ThwHx3L+^;?L2gskjzk7jY2naRd*OY9t`k_KtAEA7{d*7 z1@3SS7*?X7&+jm|0EA!y7_5tOMj29= zjb5T+b*#+v^zrY4Oeter7m5{DKDo4?{{d%gOeWen4_y+Ki_-u@};Rn zMW%I+-zpopzdsv#P>5@d)%v~q-~A=(UFKImE9@jpPWNqwuI^5Ik3`AT`KrCzRo$AM z{hjuT2G{SsOv!2>1PAvN@a2_?|KIjZ;T!nMd_xs(*_wUBhHgV7!Rnd$#a5 z0vvGjh1A8O2jR~@-{jgLO76yN8^kteGzz!)u0e1t_r zSo^F&5S!umBeijl_(Fr&8pf24Jm%vJzqbI@8S#?BAH7@T7Pj|0J(#D61ei3WK17^R zEKQUJxn9X4Ow_?%Au&mq62;1BVr&yEP7;Q zBuV6)1>MYTJ^L|i8GyVfx{XkqQ2-XGfpZn0F(QG(?p=T}2y+Tdi;~dnV&uce@fT9k z-c)JB*27V7C14J9Ah*yTLV{!9&EXdiP=FNxAx#B^R~46t9pUNI4L|!O(Pv+Yc*raa z)rSl&sBxbcS3hEq?s6*0^>MRMUuT%wJ+mnWW=8*q-wf|tShFtJ&ATUb@0(spaL?n* z8?*Yz;VQuK*fdZoH~SLb>A*&38J4el{Sk|cORFun1zz@FPq+F{o(@(exh1dh_|1EF z;Ms_0Tog9USNW7vwbh@~i!s2)%Orb6R-fL(WxR5_N11zSGo_Sa_npwM9~$%c=ezIA z+A*CtX1Im?&FiU?kN#*mCwqcJQIb-?p|tx^;dhGJ`AjWwT8&HM)ahP#*L-!;Xqd`7 z5A6j@|K_o%cK4zH%eCu6bJrh!)yW*I@m=Z7)is+QG0`n3HD4N@?@;-AO+7X#Hm@zm zURAmxr?oleiM)8U+E?Yae@{MSYM>a-mTwJa;`wujvjiCeg2llz3THS&6Y$41G z8knYfd|Z+JM#LqUdmKDG_MrU+nT#wLE)yCah(~0Dgd+SW<}lnL4;(ebQ;j_OFBdsU zQiL8=>B^-~-?#LGtD%waanOs2de0o!$rUa~mK_O$KiFL!r+bgEP;b4U#$Wit&gQS+ z`1MXfyQ8VEm-ba9ypok^nCd#O++w|7NKZibq}~q`Hbh_?otA0b=bJPlQq;CfCL*d* z$GCB_R^oKPvi7917?XPoeUk@RH}AF=@NHxKcwL&8pVM=-ieQIY8LQs1*H-5b z*JC!WDPle!B4?uBu4p^ge&}!h6`9x$ce!l-n1ZGA|5c0m^YY3U82apSfs>ZGWyE5% zF+X=@c}lB#T>H85rO12D09n3)cX_Yjgj>a1(8khnik6lZmm&eLK_Fmc!aT|q!3GO^ zwQf^ifE6Vsb4Y)j448pq<5C(&$8@aOcSUO(8_A+m(5jB(yZ1WjO*zIO_(!!cbA&uU z4nP&+G9pp)O$wYIAJ2gq7}-uny`}q!f|i1$2cmxtrkhXN?4CUIc&ZL?-+Kt7Kme#B z5SaVGc4G=SIW2$c@{rd%gL*A-i4BU)4Yw+XQY@Fx_+*^A5_ZpQ;&^;n@An1Yq$6@+ zvLSV464~WbdVT#ZGZxjJU+}Ir@a%3COb=I|{9$nM%hbiw{>vUEb|VE&>8%Ao3N?~~%c#+WQMDflFQzl^(r$NLihY-Qe&0+myKq1mws%S#e+ zKjUZrWf``22FNup#l#y;C96dKwM7gGs?ZDHi7=sm@a@sgAuj!f7f?9*AX4ioYBsCz z=p(0109j%B;az>^dP9mxa)Znb8u4RaJ%f(%FHKgHutPu&T_YpUr9uxgF_p<`!8JlA za47935c-c4VRgl=${5-AfnRkJ@Z}N|q=WdApWo2GU=Y|9nERtgLrUjRBPEK$DXhrx zW_fr;`?#`+&K{%$XyxJ22c zR)j{~f98~1KDzfszyaZkysb-E2l=$^0}sv#}{TqpO? zD6Jnp;3rwH+@g1@a_^hsk|SJvTlv=SjWifGbRL!Kz}c|ybs(XLheBPrrkPy&?Asg26QUNo7`H> zgt%mMGhXw@UP6Sb*fQlYd6Oh0Zw+vJhY@{@NY+Ce>+mx;<%F@akBr!F%mj;sdYAF( z>C+~HxRfC~!C{=p_IjQdOE&*|$t^VZp@oH<6bOLb`#O>=JjK}PRi(d1GeFH+h2GvWN1y}DZ8D~(p)wnVM7capr+~&VA1(b~jo(cE`e1iE0?t)?2^7XCrYh zT{(lKOPYoSd@ruKYhE3gskLi=L(-~UmY!K(B z>wT;xXbu$UxzOsj$C4@KKoQvM>=}iT#lA|r8$WUUtEXaA;mtDu>b8);Xzba{SMS2O zMeLCHRzUVF*ZZ29%TNNm&}$wO25h2dJEp5J5&f}l-47Uw$6;h#^QL;>0VJpUU^w)$Sh>f(t@b6A zi9QDseEf{3ZW%p0!QoVO*4FUWUz`Fwf0t-Ew~H>0meaQ1Q9N$F>>4Pu`>-huJG05N zqsI+~N7|;wBYP?wZR{QOgU?ksE)7|n`M?I|(|mLu*tXGNv>+^Jf)nj~aorsV{83jUOSfj==RlqsD=C@$QNx-gMNg*Ob zMAixly;|OsG@Q?%TmYH1FFwlqa?sT<`S?3qA?rtMmlswlH$T5pdo?5)6Xnx#>qC#By_JWT^15e6Ly}q-!c`(`}>m1b{IxS76SSi~1m+MzIeeLtE zclJ-7ZY>(CHjmk1Grqwos50R?Gltw0p6!;D{sqi5RO7W)c}DlAj|g!+W-Q8d_Rlo& zwvjzj=+HEywe+<(C1qjtCKn&mbD?D?C#GifYMiD5C3BjZJTv_6)6pzC>FhBI!LWWJ7GOx>2L>VTP-j&AaYT5|Y<``_*-#dsGLT+4>}c!PGEM*>Y%a^lhn zt6ZUP5#|nE-Gd_d91;iDvfeCR+^K#%)7C<5)JMv+GkHmTsPc3ztph>d)wnIXDb5{? zDvMUhw0(Q=_j(kM@tx^*ZB5Q^z_+_)eZ=-kVwVP;2A(NvHGHiaP_! zMKqZ8Y$;x`R^N*=`4r|pW2Q54O`&Nu?IrJlS&Q1Z*(v+DzV+g++2tv!)+=Aq@9Wfs zf41qdSaIiE>2ZnSMlgE^7zljM1UI%_Su!VK#wep<(q%k(ax*eHLPNER$BT;g@{De} ziP^doT7Qrqo(;AT`Jj0fifj*gF`3HvQ z%{m)TWxjH5DEwWmF0lOEii&Mm#jg%gZ&`<_4We&swHnw7PUr!@!EXD`e6$UYz6lsP z-RJkO`UdJ2(upcC#_DK3N4f^Q6htyqDeM_#hW{p*2W;Mm($5Yh3uaEv+Xj;dC2aJ_ zR#ncn^~ZhuZBTK#kN@(J(s>r@;&KW6|2?A_5rgZoQ?9TkM?RzBPPg>+SF+-hRHgD+ z)K_lNy`^bjBBoJVaw+tzuS0}Z>!J2tyS2)Sb`3O`@K3F2dgcG}X}l7XzJ8fdpqJLq zGnB`>SiZSoPr3*w!n1PorhY0mA7f80^X>KLbj=uMKcK=Nn|N|zA}(ZVlyOI>^l$u? z0fM~O1D7gLdKk~j#g#m`agfrn#q@7U4X;-=nPYwj)^B5Epi_Bxql*`-h2NAklPC+7 zVMM&`m^ua{mxaV$?=f$RBLP{3PMq-@`kS5n^0(}KE~c)jQPBOr7<&(}9{cux{F5!~ zz9k{$#+_(N5=9}RQb|iYCDGK<9wJdHv}qufw)WnsG`06$+S@^;|11 z6$n;ow{N*?EPrCk@cWxa8awlRMviwy=Sl5~$$mSMA`2VJooh$7E+&rVO@1MN&Yi92 zpSG;DeE9W9t`1L<_s0UW=T0x(Me6^$Ss9o_-h26zEr~yn$6ie3e8iK7IzEYeXdLrw zRr!~P%tEHEU;phkP0{H?BmNjAK?kQ78(iebfetCWZHs>`eyKOfh}AnoFUIzv?>5J9n5OSa zZG7MBz|scwJ(KO%Zp57^uIrt;v=kj_)y1WwzjR!(dD){c-MYnRWt8!DU*GY>j{E&? zR=JVOU9v?~BON|;w)>B8vO8J?@a?x6C^gK^>hle(?zTnWA||Nx&oNH_-0>{?D3@*G z4i5%;p2c`9tw>fcdvj-16T2oLmzZ@Apphs;=kDY!+eO)j2F7ihdcC9^MIPuFcZ zMexJxB4jZ}{`>F0$6=$sx;!P0$Bw%NFlGD5au_7%k72jI7C-q9n64%q|Q z2H6M!u!PCd{ndeTb-09)6tk>+VFH5eor?g|h^zsIjRDU941e8e%F>V!2t%gZ{Wrud<SQ+a5?Qe>pA@biHq z3cOdGccyyPXLtNGO8oc^8>2JG4TM|uF@u)(uWt{OcBmfx$~oJ>eO)2!+44d{SftGN zwbKjV<=9q??THnQ#YIyqfn@{zo5M0|o$RYrd#lA6H+`#l#dqL2L!a6jr`%0;RjLIa zxnI|1%5Atm?M}B}Im01Mm$qtF<=mIl0M{djlb2V#yMhAitHW>o(;>=!Hi+>AM}-HA zt_=A?w~QXUiNoD-89H-~&OLr3bYFLU`*uQejmmP|Gh2Qe?cskPRb6e2c3D0>b&iKi z_kvEzHc5MN{p0tCh8Ut*wiF3Rg|xH|$YtE4o(foyy_H&JC&BSLQRDc&60W5B9jAi5 z-fUEGJD_^MsEBkk-j-K@cjkN)lhvg8a_H1usTMbXeJjeGZ#z`f+ZU;=j@xV?@sj6V z8tPeka_reR&F(m_@V2M1&!{gT@K`C$&gOr8`rOGPf_kUu?zn0!RBXF8%AKJ&CFv!h zbU;(8p~yF7ntXhZ%jJ?8!L_!EnK`L$w&&pvd#CVfJze9M5ABaiscq$^BxRQ03|Nuu z=6uGM^=H-Sw`<+2w~-qUtnSawnD8VdjG(F)x*ZTlN-9A^CGa2E;ezh*T0T)L7V9?* zRafy8}jn<=8=vvSR3X(D74f9f$a87W%l2b z;c7178*OnvfeCL}093G_iYs&t3=GZ!rT^T{3G0zNWk)*(SS8};x$n8TYgbXXX?*!2 ze5kHh=%7kbh0# zyygG3FJ#cmRZQ1dE2w9gsMs>pI#!mvL}7Ns>Ij=jd0<(>2lA`NmPSL4u6;#pfk88m zMDIzp#O-jWTPtvy&iYwA$IIITPyu&dy`&@c(n8#HOei#jmGp63cXfK=W3=0woln#c zjEdvHg{-@;;bQiIU3_)Z+04AdE1wcrcu9RGP zz9$zQF`-`oOBr)?ysd2)*&XJ~Bdw|KSfWA4s?uoNk%gU7r_6GMEYVbBE3~C2IX+0l zdy>vQsAjM+(~=wBP-}A9+WZOAJrvaVb;ol+LKX$b)2=%(0pe9&K0XG1j-T@7$0>8H za&aGoTNE-aOkx{WmX}k2WyflK);-KRw!?o(YUH7srmVi8*iv}o2~Xj6aq^{~dw#9A z@Wp_sD6xbhxPjP&f?)x3OVBNl$SQ66R62URsHf@dt-TxDn~EQrp6cFv{Y@1w*}3SE z7sH^H{g9mB^7>*Yk7XUqiWd@zBhgA&-m|N!DZq^(lj?PC>!feQS^97GGJ6yPq$M^AOwP zmR~zJ8(jCuL7}HGq?L$9s-|%+8=II)OSI=LH-TYEfaR;AAZH!8wOcW*6SD45-tYlBWGAAdWdwGC{<_exo30>tV;*lsj_>*-T z`iF-_&=9^%qoAM&PLgi_5dZVKxk$RHAmVgIiY_s&-&)0t-#we~xqiCu^3fC>P+2X}KcLfBe5K{vQq^fm|2NF`)R&LN z(n)U?{}Nuyv~|71W_Hm#$}bJQEdp9kTf3lz^3zVIImWI}W-!2!Zpv2H*c)SbVrWg~ zK<4>zFS@SJF}Y|V$vB&n(Bv+u|2QeS@K10FeFei?nK$n zyrlY5yT0i&tm8B?`b3V1nI!5mdXpw&;~lhaW4CTMWQCmIkC8v>lvkg6pr`M~a#P*R zS?#~wtukx*gkK*xAiD!sJ|0c&#L?Cx$BuE9pOj5z*3VlJJ9=z&vaC{+`dRi_qP*g1 zlo%R0jCF8n)jkF1AAEoU^v-+F81_c3t}Ih7b)+R07{wr1<_cWm!>l9fhPpfW`S={h z13&x=dcoRVM;x4avqiG}{t%oC+Wr|%0#yPZHjqUKy+2QfLWV=cDMGq2GP)(%hilJP z5MtnhK_Zen5}Sv%rr&swwY=?b{{0j=$(PsK9bz_D8lpZD<7%7b+~K`a);DVFk!~%^ z&t834|Nion(*B#DVy~J{b@@x|axi(c+rXlZZ#!#q+1MHFh)6Qk%Ymxp#6t;kD;o_= z?&md^ob{DzsBvucxDXwp%j(vT{y=Pvp0qTO0=1RK&%*1|+lPT33lTqkDiHyHapa(0EWiO}Vt(eJpp>%zE@h%|fc2 zdFuYBDO|Ss>C<=W+N1oOpRJ>;Oh0GwWjV6`Rew=Mp_g8H%U`oof|4!Tv*T}NORqZ@ z-DmXNWozCadZJ`%^_cGJxUm(t3!LDonPom9+Yx6+g&WLOE_*Kka>YwmS3HWMf~ zVfK?kP5!;ar>tZJ^975F+Q=2afIDevp*o9eGU4Ur#lH&Pyy+7*3TyYTF!FFN`Wh@k z6JR|n=RE#<^#_WZ|5mzJf6L3pS4I|-bGcr9c%xN>-AHrk#( z*Wr^l##!|J{)4VA!+>nd^jkaTy8B|prjs5aaEZG5*nqiliV=Hjx>hAhrU8!G1*(Yxc3 z`?P9){H%UdC|y?2=-fGnX+M_8TCJ>>WEaOF{^AO5FRyf$YB-16Fa_Zii|Z$g6`Iyy z3rbjG-0R3X2jjn!%!IwtUfu^RkWv+bqkv@ud@k=a8xlc`1a1ibp&QA1N(i*}frl&j zbH@`2Q(_{?3j1^tv7f>Qgv6qh=7DCW`t60a*p3ahXYyhqNHW_Bdcwiu$3MN7{G-I$ zL}52npugl<|IV=9g8(_W{mvTNy?s&tbJ`BdxwAV_U32`p=Pl`{GVZqG1qQsLcj(VF z)WjOZ71dS7U({UiIx--5L@<-0!Gb|wH?m>Z3j@}@-aPb;sWPL>n08*h2zc%mam692 zTC}|4UoPH9c04l@5iBj6Li(mPea_9xStM;@kae$I%a`a}ixKguXzGRY{);y~KY2cY z((;bGhi~CpK9f($v8*?ux%WI36V569ylsc3taD4Ymld7t#!$*OOZKDx{kh8014Km( z58>bB^FScQA5I)U83;A0`CcgqZ-Ok7I&BU+MbKrjCcfZ)gqpYVmw~;=J+6h_9aQEV z_D@{iyjDuLSXfgopAZ)Q32%ec#ezYqxSwGMT#lJ<4$tbjnnqe{cEq_y*Uz3Z+gP7@ zcqN^Or)%h&RM%3bVk?vN2JL&g0AZ+ZR%YKA_5R%UWxXk*_s-I&sK`wpCA0HJmzHY7 zzGu$a$NhTawO7Y)TlGN;P>wWc4@#dz3 zCAFa@p_GhQI#zFKJkxkMv+sC>m|=Kh7K=TWZe1Bk#zftb)C$cPBP1x5Cb3EG;?J9b=&Y3N8abNK5$%cKi&d(XHb7nzVDVysED|{di;4RfAtc|=qeBF3=M}h0+0`ps4qlS}No119$HwdL z-Ma;z67yuL}T_5u6nQWR6HyBY{Q^a=Ud+7mLiu)!B zoD4R@=IQtGlTGFfjQXV~bQ+Jhwzl3((vf>+0zU)%U@yVbBh;Hjs1)RwAn3@XfRvg- zbz7WJhP~e)xyu2ok*aWMW&-jo0l4ryCWf7a6-EX;V2FwT>({SZPZNf=E^7(OCBn}o=k>AU$C0rNk7%bTZg~0=J|x7w37fG&I4U_TQeAw?EA(?k$^|V*5u#j!0e4(3(K}*G zp`)X7|G|SJ+}v||Ph^Ah7+HE)hr2@fZIe@GjjTHP9mW^S1|DrZGL_YleKpmGl=Hc~ z$h_3z^v2yf>r_`dKWAv1^Gg)Zk{w$mavZ?}#M##b`blK;5QIKE)MV@d?s?_3!TS&G z{T>&GFa0q5qyl*mF{76u(t;-OAaXpr&3!;{f$sU!{GV5{Te+U?6PxI;B}Heqsz`dv z&dkv@!`1ky=#(85?hb&j<;|4$o*=syM3j=%ssAE2}Aez)2jzf!6&~W_?tcbi{7Z zX1VHAu&e;v>~a*_^1w}SA%(79?Zl~vyvb;F*_^QUa2~h?wjtQ;jonK>cm+h8ZrK#> zD}LgmktC^bdh4yaGDeEK@*U37oPoFT<9myCrol^uJ&K|U0o@Gt+9r28Z&NIh%@jC}N?!4RF^iLfW%Be=O64{YftsFYa&$~tuaCqLKW zK>yi&_3u}*mtMI0c1LWvtlMQwj z?a}+wea>IEW>?%l%GB0y*zN2^@R{X4sa|*)hoy3W<=C0s?8;@oB18+pubqcAI#U}w zJ){_ZsxG*`0u)5V#XHb!VBql*N!nQAGci#D1qbtr1W27=g#yH=feuZ}b+IDe$mD#m zjBmv%*AOfJaPq(0l_PY2MmCU)Mf)&Q--gZ()Z}>3YKIzP#Ieu1)(7sq8bCZEW)j?5 zeA=iMyNjxzd@I=@`1`d^ab&q^`m}@0Q@FXTlH7)3`+#hacw>OP+J>MS0Sw#McYPjN z{AuUdjrgOro$lg%UGE&}yJ3GuD9EI%lkx5SO7P{ne>I48JkWt+gYSU_pqp zSyWO&32q6H0Rv;>+aR&k?VrfJ5G+HnYr#(Q893BxG2H*>?DHXv1>8h5;>cRMEAIgA zY||kFm=(P}O6l|-auKHeLH{YcIH&*2WGFTo&mS_$RNPa0=7n^6#S8zK9tXzeHP=!$l|VA3#q8S&GEs;&lDG+Z0HIib_r(nqN+xG0T$p}l1zp+(t~3=m zduod-5R(#_grEtN95z>=i9{+-5v~B-ASB=gx75`oSvh2%u*sJC>TgMwtRPsy{^KpT z&(kUnn|npjd1nvNk*#RXJlmVX{rw$?vKp<@HpCMIy_s;_MFg5pbc7TW=0?0zX#WZG zPy&+$nW}7T4!XOls+ZuH*92Xe7@#s8tagAe=EO{cimY7g&n*tv@Fx3eQ;#*7^PHl~ zDt=q>@cHQ8yz26zdw6+m`R_ITE%q5MC;Bh7f#Rv-tqX@d zAKLS0qtoh$3Gp-p%tX7(t)vf&Ymj}SqdzfSa}UTDch4`2Zd8xsP*K z(X(?_FN*`kYd)w9Gl`ORU>%ACAws%=23(-V1e-fTZwZ6}iLKZ}A!JCU*9M~!-_i&t zzMacpP@wU;-JEdmj!Cp)>P1ymRnO0VLM2nf@p97S&ek_CmY; zfBlBvlnJ5#8FBsNr=RY-m}K|U0`2$jxD=cactEA?ApSckYwRZ=+MNZbVgbC8F8jDn zD2a%P9bsd0#cPSOv6+%GuUUTe-hU<&G#cdFaq9`;)BkLjVD3gRxj=2f9lir+g=3#f z$m{pJe*ewHeIY@8+t-cw=c8jzd6=Q@)`e3;7IH^}4C7mgFe%a=7eS&nhDnBk85;HE zf2K%Dt^Y+pAj0nRu~Nr%OnoffcR6z*J~5HY>3wVY_useDkMCcK<5xMALJ3D>`Bw8d z&&0LgT!6{|hI$a;Df!9EoNzt_>$n~EzpKT;OXZxIp|rZoqbzBm50klSEK=GM z_Y7IyoT`uv2}n%syM~rFViPK8`^zZa2o>6YTK17MLUP_{ou7gxh7uv zll#Wi9{=a#;Bw1!q14_I&yGLWVAUd5$vL-4Su~VnSu^gfx9hvwTnWQlR=PRa9xF4J z;&(TY4e!LL`GYt1|93syt^TkXncEj6cF9EE%tfjChHH!K3j3cAMD= zN9VGkD@DF;`MGcE0#j*88IuC9KXat>hF1+zPa10!sRlohXOsERzgvm)UE~rUr$wqn zNB-+ZAIHqNxkQJ$2{(y_@XokBlGolD!Ku3xtU#{;`k(m#xhj=ut1V9F%A zx$_OG98MR(M&rpN?1O%OA4h9mo-x^!o#MRXK~(qG#C1GN4H_1e9Q%E1;%^9bnp8G) zcT#UV_vVb6W>ntCqZuiuJiA8y1{;gFb8qCLB^PUasq{VHB~!FwPbXB#+1;b|IfiD^ zd@0oUxow|nwpG33O!h$Blwv^GinaI2X^UbCd>pfL+Fq5Zk!N zeyB0-?o&p=^Xqk%I4gCw8*gwpEFq6S@}H6O4`QdLuTojQo^SpX_t;RR@Laqb?>6g5 z-=-peZyc17a4&O(YE~=Q)iJ#MxqVpMrr2azgOQ5}iNjiq;vaT)HxTEM`ArB)LPChk z0aEA)=G%V;xu-knd?g(}xexoqoqB$Y_07bq>;uo_&TN%SeNh=8)K~1+MJ={QAxt_! z|8C_+Podk|&&w>UA87<-Ea-^u*4Gye%?jljW~@`of2R0eaz^Kje^F67C&!-LCb9B3 ziT3F&2DeKG^9n)}nNqp@?jdg#*^nb!YuYJHj?f$ zgxMX*N}ARkyBg!T|7t1M*Ngz&uu-zvA@AqeG6Q4yxF7M=x0ZR<8^#{uDpC!96QJGu zotky!;{JzuQLik|^{NUV=kL5HitnaISjqoFSLvgsv(2@+EEJaR*~wfx78}3Uy~|rP z=W$3ZGIXjHDd*@O){^dB6jh-wyVENBg~3aKM-TJGVdfvvi%aLhYlBeo(V!{XJ$SB& zqBJ7A5-_`tt_b<=((~U zONaMm?<2Flcd5lz6EzatoT<%uK6`xHkP;A9R4?rCCg}Baw*SO1ii}Lw-=euuM)p*k z+m`)>*E`+lH4OGR_2YZ_vB&G?>+kNicXk%}FYIKKL`cP|iBMSyM-*wp)v}Uz@wRR0 zyB`IHaw)AH9gOYeF;C0Wxu?s`tXOiD(bU5O! z+PoNtj6s~-CIe?3ieKxdUpL89%l2Zim7BIq_63o7qs~CXV$#Vk z{^4p+ZI)5kB$4=2%=V`opf9@jZcDV$?TY(aXUr#ZqopJHsV``f)0R&goD$zK`>%2C zO#03%ifIAc7!Bp7{MZ{fXFl4s4NMutMa+Lm;;rb9HH_SmB*SF3RV8IaYRQ-QHZINY*TVH$vS9_KG~z9`og^8o$j*0GIeL;Z!c1>Bc$YceR(~jg>BTcD~oY|MPd=}K@ zJrkH8lj@#z%rZv_q)VCZ#|2Sf-PQYY?K+2qQ|S+8CfDydtm!;Znc^o~}w?Q($ zz318b8o8aTN+yny1t9FF*ksPLmqMOb08)zly9b1{2PARkzvbS6`%1WQQHh_;g=-=U zTedq490=6~m@ETeTOPY%voNW_xBOioYjKD0FM&#t!i7j^V+dV1w|h6X^F6z&3hF>#bQHc%O;iQ$GY zMeeqo1A8=i^a_5(Gv2@CX6CZ{=2jAXIVN2bl&D&_OcDDZ*00jv+SC6g7X<6SJNEU2 zb)KUr#y+_whv29SRO05KcZj&-BXETSWdCK|55`nuzu0;wNDh+3RKSGomrGXHvI)`dG_>JHfy~0k&m4GuZVOewyme~^^VEli;RQ%zoZI@j3>)^Ia#0eV zoyqI{@W_#h%8e~2!hnuVkFR1eboZNAdBYpo1%q!?rKo5JYz~C|!k~8KT~24atf*q4 zvVXeF{sqzR+N#m!_J<|}@|pO$VL&Bx_sjc-oIL6JzpClT1}v+cAPHtZR8dySuX0Pe zb@aF@*n~m)+j_d1)|S@U%0)A%3`~$MU!INHkSKgPORRK3oV&CC+qFwNyE6MT?F2&^ zIN3L5KeMLr;kMdIWwCW#XSqq~f@-OjRa?f@@`|@AEo>}-%PRYIMz2{<2h6Q`Cu-kh ztivwxY+?`fVMg!BgP#KUb;+Dgs$`N4pUFD^EIXNPYnzU1bFgi9C0DllJ4#EBx*f1R zaQ(v5uKPYx8>lyaC52wwC7F7R$!j;m5#^Ygx)0jzao5tdnvxAORA^6b9I=V2Efc1o z()eoQ)OWVi?x>T~Tf5UFrl~X8{3#=6^B%6<{?S^wGT(iKD<*!Xlwa*>irYW}2t`Hu0j&-K;9VFRV*!}F?bhP7TYis`;Zmh5h7&80!lUk7@W z-pe>)%XG$^J4O0tjD)#@FL~o2>=d+iVy2 zWV9*7&wL;6ZO;s0HdQ%8;*AuMz8mI12Q+HWc$@i*Qo!dl)As10HAjLNK?*)|0P_FHozGhXNz z=`+7C@!~uvJi{KVr@`c+a&>iu_E0W02^4u`TS#BKRzglAh(VAr zrx*|J#D1}{rG?XSR)@hBYb9I+w}BM+!7mrMD}?pXd_~4UaKy+`^E@IF9~ecNk<`( zz=V$tMU= zLovssYo&uK+tqIAUnXC+4eDtmX(UD`U2{&7itwJR@XWBxZY>$#!rj%|;zuK!B^DzW zbKR`RYhtvab4n#ERNXnrPa*4BeChkE^V8Z4H=8RG83vh?KTW#GCFs`sO?xU_RY|?P zdHb4m52kyn8#w${wGQtkKkK@^cqYr$?W=aswFri@dWS_AuQ8IB*1e+9Kd&xLf1Zhz zxu-a_#!qgaP+@~Ys@%<-yr=q}aly?e5_qDNl{A`91QphY1v9#y(mHkE>hGucuIc_g&GSAj0m95@#c*WoUUGGk`Qi-cu{+VQp812e0Ks%7;qtawnQ6Ouuh!`A|p~T)xGBa7tnm6BRXSXt%5J$`xC^JMEphwS{kw4$lv6Q(p3i8-zoDMd z)6)wks~E7HN9iS2WQ4vIKgUH+bKMi3gIIT5A2$4fcey!PleJ_zlrT_I{L64hm~<0^ zgxOAiUQ<{2W)WI`s3d@!Asq&7yMd`Ga$KXDjU^>(7U?fWIXroC0w&5>Z#5wh6)cOA zrb4(pTnfIyffhdVgy{z9nwpvj#l70kkqBtaLnkI?yY4Ymp;(YbdL+ZgH!)fzw1$Y7 z4$YF5mX+RM#qs20tcZ|L{L9B@Xk)ee%2d=>9nv&L?Txo9muu0TUQ;ZR?UWj&5!^Yx za$+Vp+hFAmU+z?U-(u8R<>9@>)tkp_a*8dj^Qy(?XBsJNJzBSoUiN{i#zXTD-h-@(U~aOGTxd#F8G|ZcmXvrf)K36f@SkJyqh-lU<&>Xr(KB z7Q(JKtTq^vrMSt7gMkONOcpXx8h|NmVe%B#x9p{?^S4%@x^m@8CcmbJMqO%j^REZl z0fgFcWzhtT)Kr@lo|w$peNZw7UwHJIoT{TCZPS57xR*XYPlJOQkRl3YR9d|{$87N7 z-?!ZRBRfgN*bs0Ys3sbJB3gx{*PihOL}enh78~X;junHgBug{1&Ym6tC>3Ft@bAf! zQbI-s29OxW!pP_m^PM;FUBXjI)b;W4k=px@!TtMSeiGL-)_Qi6Z@>FfbWs+r+clPC zG$4kxNjy^xVg|NGZ_j!I3p!|lfBUuz8#9{^qY!8?^ijm*wYGRxN4;)(>1p=xjW4%ZNvff0N#pFc*u(_2 z!QmRY5@WO{RHv7=t>K-Y5T4^tdU;ABEIe{axZOmiK<}R^a`*kX#I>Df&2r&Gf+iGw z%;pWA`~$shU$<=H*82M56|enrYOZ7p|ul$f@>TA3+Gm3Zzt{jLY$@5zN5S`VxfSmC~7!&Dng zE^NicV5_?*>+VFieJgw+rPdTAn!xualOIbZ=4@iIh`f%zIe^bW$v2~)adeDRZ?z^j zH#f(@QoxH%sRR+u$VMP6t`PGaTsVu5{|Pn^&_gC5nCBODKyE4b&y+v+>?g{2_93=Z z0^s*e)_~iGIRibG&zm<5KC3B|+qP+7@Fg<)V3E$;0O>tqaaHRhb%Lvp%US88~*$Y(y}4ERi?4Iz}K_-mUAxiCSCr z{sHAZM#P)GBhA*72VJPY?7L`qirU++K?ceKSVG@A$ZnQj!6omoS=wfY29S!D)>370 zOQCgkLvLs9qg}gquj$jwf=dPMN}MY~un8gs8bQQ*A+xa%QtyjU!A2DkiQnjSrGM3K z0*`9@mqaWk0r_L3w45Dz2h#+YonBQ6wZJ1izFJ zV;>uBZep?7H@jEGT>PL~?M`T_Cq{Z7T9{|3Ooh(ax!xWTy=BxWJj?!!BW-kSJi|^b zLLL7eU>Kg^R5c#;58Bw?>eYCi<(8dEqADT+BJoT0tETwu#exEg=Av$;xX@npmKohs zdzt@hU9dFo#pR{088_9DD;qwcqa%$(>H1C#-CPcF$+Mii{$$0{A~>$_Ze4+=7nO33 zNh=AAgYoTq!#-r5xx0sdgDW^(*?( z!TMd9`#gLgH)YkDH1W(fqw}xYrocIsy^lH)B+OQOH2eh5i7R%~XYy<<-;wibqUNbc ztH{*Eqx%OAJoXmZa%9#vv0nXnudgZDsWgScD)5tMOj2Z#Mk{HJX0}8GPx!E{=#&xr z^Hf`{*W`^&*}T{@4`rT}q=h2`Xwk4R7sf<0o;errY8HoP5?nGOv7TUzd9Y11Xj@n zK>1)*NJl?lV`c4IUYG)HyRWsZIR*ZD_4V~8OiRbXC*nkgH6|Gj&YB&ccK5gU_xBB- zOgLH8{5e{zB}Zo9jtpCK^@+$gd)u2+5Cqk(p0k7q!KON;_?+@I9SbgSzD(YM0_G~ZyXRd*Q22}-1H##yUzY-G{ zFG40A@kYas?lo79?U#$8HN>JiGAb&fb(IX#*(E{2IAql`ncf0igLqTt4$<_ zCso2kLh9!A;S-wT3{~$FS6|W$+<0|wpk=|v1gy7$b617W9{mY5byt3_uivfE5?<-! zFv)pH0kJm&i-3-X6=!dKpPnY%*;PR;@|5ZFZMmtcr#_L>9Ie$i-nP$&yC6%qW@B|$ zq60(yZZ8wInAo_~0nVEd5&3y#eo5u@BP;6K_Z*^vJ#(iwgoGq$b<5)qrh8qRIui6Y zWFeIXHyc+u`O!!Ruw1zeGXwue*(7-TH8x4F*gK-^Bb_hGsIBnFr-OrVQ-RjT;2lyv zx6+^W!MIDce}9<@c~Lgg7?2m(9^1TB5UGa27^6{W$uwNwVR3gT_mBVOoWnX|m$d2N z>TS5H`Y(><1&Wcy+J^VEyuE@MkgIDzt_$p+==Vt7=5%39HZu%{<$z`uf?Im9~&{zO!J= z{`}8G_uKfhrs5$Sj?7)l!#*Q1@xkpsSv=jhB9} z$A=b>bRTpx&}%TjX}a_XU%N&)?87fzCN-pH@Trs2dvNq1$v!sPArw^gtxiQoMi6nR z09Erq^?}vh$$c(WQ$`3mgz;8vTpWSW6joGJd|TiUgu4m6bO?zxgr1ADqt4HS7M48f zTw+cI>Pzc&WQpqNun+;%c+W_fRv$uPy7RBUa$pn#Wyz*T8@AmD=)hzF-vxW5v;c*% zm?`(?r2!cYey!T)HoC>F-?IBFd_b{r9!h8a&u2a({Ns5RP_gmyzWM(BJ{lkNF-e9i z0NBA`WrjGNxdCop45Vc{E7*3I1)O_>1Yz6>1hEHYb?e>}kKy?V>#%J(Xug26rlhB@ z{SFHbW{NxjN1V5d|B_GALFl*B4O2*j2#PE?n?An2=)SLFzW}2|={0GD*&V(D0?Ee` z0}~QK^8+0cp@I{D@(qx)iV!?f4hVI`gal$&xPcC?>04v1so*w?rF!}J6ccbR5P;^& z;^OOt>pMqBqt@9?O-&Iyk~3${z?y;(g4nDsXae|lc^!x^8yG+#vMYo{MASh_gO?Er zzJo#ip+~THf!!Mk6$y?>a+wxfg#Ra&&s?sRjg<{AFoY>v3^EW)3v_TKT>d>UbL;^d z0w35EBBK2f$9O`mU=xX0HvmcHrvDIg4pk zFg_tXk=k5FglPSIM9X`Heh!lMgT_1<42TpD>|tU&#*oQ`L0gO0qc@<48(o>Q?TQH=&^?gwNcdEO06QYX z5FVb)7gwb_P>Zu%SBv!@Sj*7~6L+3xl+RI6w%85e|6W zAIbw+DL!vIoW0#gZIdv82YVNNoqxV!rTr=(pkj4p$-LiJb2_@EvGIF0c_}*s5e3A_ zgP|ke%CscT{Y7|wOvAaVgM0@zb)M0*%%E5RD$+2SX+vvF0FO;g=;_~iLoY3emo0Gy z9i95Udtt3>pETpxT){h=SOsr25?)Unne12r;$r{#B+R;tBPQ@~1yo40w#G|7xV~#l z3WyY<#t~(^BUN)Q5V$Npr-GUqDiswK1lYk7b$yAfw}pkpWd08RqYDBTFYe*DSr!;n zfbJU{PU7yD$E%AyQk3*Ld__d|4gx!%{@&UjpOn-O&S~_CZz}I#jto&|P<7I06R5VG zkkCfeJdU7qF`M{ILvfsg;}&$i7AvXl98*T2 zs2Y`)FvnwV8BDz-ey`5OCQ`NaJ+gCb9nex z(AeS12A_Nnyy%FE_!&9EG&C>?)DG5x;Vrr_&rb#gqZG<10Btzn7=XaaG===3 zIACokTA&Ex${aX2H@S**x;{AEAtZPW6%`fnCPUywEQWAC;N$WH#6Cjb1tc7`OAXkc zU>&|4tv@q5JdeYYGT?U&GVNic>3{`4xja=#(0%2ju2+1}i@db`kpFzD1 zFM|X2L6pjJEO58h)9l^*3}XUTTyXW<)3!qZkMpUUn=}96#9S6B@ z4zth+ynoTknZkGO905ZSAPNGf&yQbr#0Ph*Vf;?na_b=4B@)+c<=6Q!ZzAn_U_kcR zcloY}n5_3;SED+)dw574Bj~SK0dx)zN0??}(t$T15gG@ugyLrU1J;fQOEIl;GAKhfxbB&kH^p7e|Z06%Uv+AytsE9 zHCaSUEBsYGZa%`g9wXwHk`gua3L6Ic_3$`P?CH%klGZmN42uxy+ywqT!HYyY#=yiR z+;k%K2=;(^MMWD?D`1EY57xtQX2Bi7;0E0WkNgzrJzFAMW`=K_7UNexW!)v z23Eq5^SM?FVSbB3wa#Cx2;b*Dm{3*nEk!B^;4RAR)PPZQyKPitC-+qg2b*j)rj$W7PToBFSt1m9zMJlUljFlTPJ^ztb}4wv@R_UMLR!0(jz>zaX=G?f zK8y{YOR$@CrfRH4>Fyv|um9X!XQO{Me`g=UkNk?f{61)tL3G1hBLENfu&^++3F>$@ zH0vg&r$68WC(KP_x05U-p+q=baq;4D0+A@W0w6aKpMEmVO7 zAP-MIG}E5{-1^K^nZ7qonVO%UA4LK?ha-r`BbdER{P#O^m@2cRBkDd%h4aK*ir%!p zBh5r)VG8>==Jg)huMY^o6!9n%)BBGfH{nTve}El3_h#S-V~a1SsFrh>gWf z4~n`4J0~uom%hGo$Mhi*2#+fLI?uF+*5fzP0nNAHug+nOr-z5Cx_Umgj1(#6Q%BGU zsNuT?IKBbkv#^F1)rh5HCL}~C-f^bm$~KFGjLPPH)IlYqt*KK1Np@LpP^4gYBB+1C zYH3y%CqfV_Mwow5yhX{!Xg0Lo4rlnjp`jYo3ur2cjJef?N?WEuR{4atvN6h@Loqiu zTMx*6#bqKG^J2qsE-u0y4i^}z>r8XhGk~!8JO|JxMbvnGiIPvuD=2u3ZA4^kWn+H+TIhw)Qz0TbsD0=zt|RmbZ@BeOrF3Veu_iPB z0Bqly{BMBmwtZ6MN^h(zaieJ^W~TiqQU&h)U&#EKM5hpV9i;knA+mdaF8xesArAY7dP}LIs4@7um9{DT2{I+}` zz?@$Yt_!qXZTd#-j;;@_bS&(G305{r*a93`8zG56@rd^5sIB!N?9TMt&juwOJjKl& ziU9&oY)zj6O;KUtBwl>lx@=0+C?XvLC-tvAdu|@fRCDfi3PfwgpbOhQ7KNnefIyh6 z@%EnzR|2*MHcmcF*$~0XfEdd0Y;wD*U3St`&=0d<4V-nwObhDs3oI^l9V;h+3J@o% zZw~8O!b231n2tD4m1n26f_Kt|tlVnmf&XBiaqSQf5}asE0>o1@Y{Tq?rU)0@WvqHh zXi?FL9yKQ#cjCDCc`M?+9(+Ff3v7kT;Js;ES=n<8+SuMi*Jff9jiEudC9DhE@4D#8 z(WZDt6qs-m;wk;=v)!mR|DbK#UIf@r_$m|R5jXx-AMEt10o+3EBHN*4`48|7UsNCd zGvliSAatcTjG|1qU_B2Sc$GY%0)^3R^%*jnlw~N{{BUySMj}qN1NN9VTwr z1g-=0IWjsr({F?HZ+MDVWtba5zA!O4sn%WaEHU5>rm@VtRy+{bVjaE%G1g z9P>{TYwKlU!OwQzI-S5s!5%1P(NGcmZ%xP(;9iqh~ zZsl4J76l67XJK_&*xpWWX}PpGCdMEi<%Qux_1?W_5U^!Vci|oUfP6P%nRljID-kZ~ z8hx!cW>b8mi;rExd zR}P!vo2#ivo?035A_ROdJj|~DbPQgNgmJ{suG@c`XsW9}a&Zy0HqHNsippJ>ng75k z5fKq!)I(DB1qHniH_p=6b!Nt5csK8H;r5t%u3cJRpHx!UM!oOk-SfCLhFbYeP3L&L zv6^5<0^TXJ0CR-P01A59`6s>lV1U`rJ=o*F>K2hPwk;%4x5W#-QZol<`&T2zC(ez` zb^u~plJ5vo!>eBxlKnkzO9uYGBkY$O9r2dUT=@kXy_c`^U$1jFv9IStR*`JS=zki9 zr;N_OK&wlM^hNP{+Cl&KFJL+Lqtco^_iO#+Ex?>yXUhG1QYQWux1(};gbrqTGajeJ zjT`okj*jg!Xi&jqBd$IeO5*lX)6$X`&x}-oVrp%Ptom4UVubP%v3&(p0v%I~$Lw(A z%7f3+6d-G_#UDnzDu`Wt zflFD#7iLFaal-rN2)QprXGqg{ugJ{8avN7K2`l`+@Cw3WerAY}aKa7*WdVHz5p@8k zF%sH;DmpsRKt9;l#Zq(<1PDMw7~l2{Ab#>Cnn%KRqVM$&$)OV*Hz1t1y0%1W4s5ypHaB~FlV?ZrARXrO2h?h-=0i11~o&IenPW8yBG znDj)H4`^RC2~-iC!7ppjTCO{1>lXhyK>W`;dG^e&-mVMX2!NU^Q8H5m;M1Vrnv#oG z<&w00OEVZ%m*S9iixS!T^+!hIiD5wdk(d3fv!9s|{p z0CVv8IXF3yWAqYt4~#z|))@l@5(8w9O@A-yG}_ml^z;?*o+t)u6|1Fg%#XkvDePbp zO0;f4${QM?tiqf{Bxjc-78G21vUM+F&(o@H1O=a*U?YJ7!a(wapD1QS@Kg^1@<8J= zg`@_my?gJ~F5_+{bmhdAf|eN%oKPC0)7#6%&qZkOF~6d3Mv`3=TYgO8&{-jI&e$$>Z`rrUzU?UhZ&<|%O z5R?X_#N#`JEM=x9*D*3B#RupB)Q~}u^pV7gMj$4f`)ucdag}2vxLf;C!gEkyl|8bM z$BXnhnda1u4xmU%Az1tJdGPsHWGsDkj> z&YT8pi6RFkaS-}ZE8rervPlXZIAi$X#yI9wBI5wxN?Z z;j7S)g|G@Dnsn2qPDp9jmt-saH+-e`jd(0h|739V#0)M(2*Gy%U2w_Ja5@k@1ao|f zkI%>Z%#IHFIglT%;Yy@ZVcbu>vNaJszpG$Sm3IEu|9*IG8Hfk=>O{X*&65Bvmd#Mtd<>F8Y2(|P4}%|A^6iYmy>eTZ!e;X)wjx?c$0 zpS4M<@K2y807Syr8}hx4!B(}ZK9fet4i&bVFD>^(EZOEAN`AH+Q##!1m$oh^?BiPe zf<2o9_11Mdb~+G0?${BaZ`^@JL>>gwPoF>k7bgpOwyvOQOP6qi0gr`wkSQz3wiri& zg&Rg)c(kFhxp`k3pT|bKjobEJf^Z)&*oG-NV5n$KWi1A~#PCS_fUcz6!66hS2+p`q@tUcGPSMmGe-^z|5i(-U`W#*Cq_ zA`DhDrDNf!jE!wj3IC?K$%Vc9vfm9<`iy(ykXI$^s}LLjltS=*d-0cA%vix>1~?tz zDgfMxm?BA_5+G&qIPfPtW(N>aKp?YPnQ0JiLNNhYObl;0P(+>r%J^{IfjxVQ;a^9n zdhRwv)4`EKr z#{3E@AyV^B`pf}`N4-vOju$(8_;74;a#4GGmdwiWe~%u$x;bU{4?yhtF;WH~d_v>U zM>x~i;pxBf^?l~;E$iPbCL*%-t7TOX0LPM<^u3(?raYkcop)K`JejqzOPP;fpoe>U zdGFN9iT`Il>NkpCu$Cqo^aM0Mz*fP+17;d|gYW;5kUFEBW)~h<$qU{P!7gm`x%WqK zTX6W=PxWuWb)0)tz4Yo+K_rYIB*HTn1sq7ZT&f{MHip6|8CE{wCp!*Qz!HUUqXvLX zY(>EuVrYXIFAj(60e-?IAL9@zW1Dx9CB{x@uRQOWZd47IjMdf7f3l4^IC0sTo0Z&M z{!qi9S0zhX!<;jsa9ZKJY_$KE-tV?H4C89s|92`O=!2@Lz}Sj`r}@y$Z3h9_!!7p) zA21*kGUP>CC^+kbB!jI~JW^`-R_E0T3a^xuN1TSCC@TMB{4OlK05t4`g$um5;9)FG z541eE4Vm+Q@7otHTZDTS@0?0;Ny#BZl7iS)r?ovCuX+KNJNPSbLf+ubB(lnf`i`-% z)GAQR$F9lj&X$oe;cZD(Yw6@2e}myrrdcc{WPzg>H4-cy^*eVq?7(o7p0E|=`Ifta*$ZMzgco4-bsT^+st(0*PEN@R+h`%6 zSJ-+;CQT*5JqN1>Xcmarjm!-68PZ7>4@^v=q`cn^#OI-lc&=vidTB=6q6n+WX#SB?PYz|d*YCVh@B4l4`yI#s_&eUCw=HW~&-2{(eO=dip4WN#13~!O-kysD zhoBJv88!QDuOplg;2xkPmicQC*DQs6#s425H9lPZLraDd+l>|g#k{UK@2-Yiw^kf7 zx{JAK$BrGtMV-BH;W!#H@KFIpd<+|?7rZSkW3+Lf%aJiF+eX3vg_D6UGH5GwF@lQ* z{3nMEt^$|ZvTxs8)dtCu));xkM#0=AA^$pXP3I5X#eBTH*}1v5@$na4Lgs;-ItRC! zD3h_FW;M5LXWaS0K7-l88FvPPP>0A#zzy&3v+;v!R{Luw^2*q?E6l#cZItB9%baXs? z_%M7}?x|QbWIFo-7X1U%KM*h$Q&^oLqjgwt2Bj(VMp_s`iX?~#-(6>h0^}WlDMlPf zN2b1?apYQCL_gcC~i{NT`we5y#c+?1gv?U(n1%79{cA$Y6Vh=TTLgnn=W=SUeXY zvC!yy1TPtMm#K{#UHrEG9f1Ai%OA+|zjB@8h2a2%XhVhkv1;D`2MQPT&*$%~Qm&9nW&vR$!U3ee>% z>g((C-fOgkz($Ua`%wO%h*1kq_x9chn1Kz-_;-L74xK z%>Q34VunHEe-k2V^<3s~kme_wK3DyppLpoC;h*9*hyPpLR^#Wa?S}-c484%@9k(ij z!{VAUM_bM&W11;IfAz$V&TB!^%&XblBmzcM;=#s1|A?u-=cq%z7%tydG=UrzG-!Q+nS9# z)2lidND6S|x3st8nc0U7qk<|9ka0gAq*6?eaU*D-g@t}-yogi)+<4%H{^aS?qgdk< z{j51e>A)=h_W39NX7V5S+wbMeT8_JZ`KN85p((*9g`12+LdIy^tB9xUZ86A9xS&V)~mBUUm=vE}P3c!5{`kaJ|OBtVww%ca9o2`O< zdcB5~UQu>-O4mHhcxhWteAKM$o$IyiFK2X+_K?Bf+~=(NvnIF< zos;I?@}B^2@|wFllvSG#`KLSA&K(8G?h%s9zsqSY(`{^tWQ_g{xtslke;|sl*p-{L zlk6fG19ZE|pR4b8Wp?`zgV~(v-~aIceK0ix4{Kn@<7dx~qazM|7%bp_V5G0g8-NT2 zv0zo2WAkQyzMW@tbY;pCs9Uoc2hjHLPhaSll$pT6$s#Km<+uAEt%`r(?nh-@ZG7P=El4WIxY^5S#`JcB~Rk{ z2W_*BAvwlN7IB7bGj26Vxo5U$&Baa85f(i<7r548qLc3aGRDo7F(z#uP%uJus zh-j;6)4Ry&|LTeT2l=jXO{>!USASe^;}4~-eWrRT^6nD1ldP4lLlYJ!Ief~#^X_Z& zu?pD#ebnqj4f9@m6^)+7b4q<&X2M{4s!^~?Xeh2yt1R$r+hvWh@eM=cRl>}%W&=i+ z$1Byr*!uS|_Y|qd6bEURnQ4aWJ_xtiMJQ^=Slb$T|D_z#!>nL&+_l^l<$<8aQDpz^ z=Eh2))tr%rStmLb+1-xEu41e56*jYOEMa@(=4K3-5%%bTHBKP2jZr-f!=+5sXK%|s z;Vc#~707PYYh8da0^!sG;P=tUbOzYc%BSu$SLO^oaqEwDbXV4QzIgb86d5V)`|4_K zK^CVZ)B%VThu~<$t<%__6x?`vyd=R5_jlR#TQqT|4%3dazVAex%l+7fsRe!-OW&1= zRt2fvu#4S4WmK?-rLx5BYZYyhSAciXyM~-+_YD7TYHwTdE~j;uw2QI0^4LX1gJQ*E z?ySG@w4@*Y_91tMccx2p)!3-w-n}AUd%r%=GT-IRDSen%=4+R`X=Tk5Z4SeZ_xFmN z*jFo!ZQk^~HcyaUMB2jp87ni*?Yq?&9%@d)jb%3*dp}&y^0F%Ty(;8MWz2kRdn+Dx zQte82Pfnw>i^{TpW>T-p$b786pLkLs;B#T5q~Law_`!NsJqep%u2=fiZ;)gf3oAyh z++O)QeeaN=r%$`pedb|yVamZZs~nYjB_2@yemSk_o}F^*_Fq%4*kuBIq?tZ&4xS8h z9~yDwF#Fo`zP0To)nIj4VMd*6v4C)Itc7rCoJ{Yo|L>0s7R}jb8z+5k(~6TB0Ba@~)NXBo2X? z#vm%*vS?jVkj>zJk6}&%&_$HSMUXQQe|@w9z;ki~QfZ`Fq;0KVfr*lH9GE(y)yuOV z3U4s~?*?B7r`zqKyu9~NgQ;j~ov^fwi60^B3@F7e7Q8-vu#g`DBN9ywoCc0zz*hMB z{8-BwJ|ZqgfxBS^i#vztBdCkIGY<0nYsI+O=WoF%tMdLD7Q76%O}fYWXCt=&tz{}=G_p`aeL-Xb9KTE z-dLr+Jjuo>w>Luu&Dh-gch_8%T`rMeEb(Y2tNcJn;-BXo{+x-uayXLyYSOAEC1_)E zg6*qDQ<=e-hUQ84AZl5S5CUr(zoBjF;8cxJi{^Vv-3<5V%&A9_q5E7XzaKOBwI`=9 zeUQxyXU#6(bpIC*Iac_sJK^pnacR$Xxh_}BVnwX^mnto{@$G#40tZ{Kwbb}lZkfk+ zBkPRXGmdsf-FPAClP$XM)x&OG#Wpu@rcZ^iYfbzhHAPWZyqUZ1$&`6g^_%{K_AXnc z$-?T!!rF1wudQcne_36ccWJhvQB7?B41d1hxsf|rp95d=#(vuH&aHQ9kXmW?QQ5Xc z_72ZA)A*xDu~{|6TDPBJM^HK-6VFlN-g{ei zsirimcKB0e@#nepha6Wne&*ts4E=1j%dDpGhG*Jo-cyOw%@GS-VT4_j=LP!98KLyd66VpUo>rZ;OmNC zL?8URp_F?4bIkWq%MS@1GO=ovpNCXlp3`lL&`6Gf0P)FI+2+1J*6ZLCskNL{R0z9a zI7+-FA*F!F=m~lGh^JQIFA6ns!0WSs^Mu@SFNT4!pqb&H#;n1@-c$*!DU3aY0&)<7 zgHp7%p}HhbT3Kpj?T6d+^!pjF2kMKafUlBvWMfAP1RhYG9D>3V9g1Z|y7Z~~g6eAT zhYwd=kf>Bln|wDuIU4)r*zkRQvDE5;66)4VmqeVNnchl|O#1#U zCpJIps_z90&NsGkhc#aZ1&0m(_I^9p=mwpg7FGWomwRh|;X}w<&1C!O{oTZsB2Qhv zeHyM52eqFn9z1nW@7D!vZ-qD89_3e5_{8p2xOX3e(?TVW($B7Y;q~^STFUF3G7S+C zM)y*U81FXKbN#=4y&Zr5`oh2Bxr!O9ES0&m&x_yXGAzlvnRg_!YBVLYQP48LM&Bzy z*XZ^3@WDEr^_rm+PxlmWlWlShOi|67euo-nxFixq6><9TiP;!b2=;Xc)hmYW^oUDQTo31%@sskUh{XlImY0TRaNw zas|ry1eE_aLL=6@MYD428exp%eRBKw#Dq^Q{@??SCSTv(-Yx zqYGxWnvMxy-}coc^C-0hOiu4o7`mURV9j}_`B7xkNBfaYrAJO1WNGFO{wC#8o)t{- z*z6VKldfgiuI+u&Bl}N3x!9BU6Gt!YO-hTqby?-CTt-JnX{Dms9U*GK>790r%4f!z zRJnq3rs(Se9zSVs!S$LuXjdpNEB&c^Tm2(Wt_>K^`g?%kteKeAr|c}!t5QVujPcKk z{WB}>6)iYlXs>3~s7O6C)wFlolj0)n=z1c(cPO@R#;~cbMn%whpS~-1%0UH#spEpV zCq0iSOj3FbqTVnXZ%qbVP8RyZ$E8(`Dq%Z5j;7QKz<3c-9hHrZhKS0v;vi^gg}hIS z#BfE+UA>wJUMCESGMm*yP^r?lZ{5ThF*6hb`pY92dv9nwd55yU8rs|ZjKUOMTr$Cg zZ<9;yiAfWNQFpy7bGopos2ovWur_n|P-M|G?d{vhQGWmt`DgY3>QU&cp?Ru#ugw(O z>~zR>jByTu=C@yeO@8gZX!aShyjZY;<+|c;%a)ZwZSctuP}kGY&`@-~?1tLaCIb@{ zHo6^$$7A%yAlf6UO_f4SJf1SkQJPM63SDU_%_nDlH9CEV ziO?X`^I51mV=8P=$SA`ozliU1ylYIQ7xV3*jd}5O-uH$AFZkB%_Rq8|RFVn`(CDq` zUPX~Mdb>TTj;_DmbGV_!u+!^{v(8v)N&CKYl4C0#YdYFZuYRj3v%4|8O)yrYA$;M; zMuTy^n~uK}7B?vQW_<}4Fzl~f9~(pKoV@KTnI9A#>2j|+w7$o+{NAFCH3NMMoF|gB zs;ikZRXJDn`82g+TwLF}=*>jh)2#Gs(P`fE>dlyP#Q1nc+j!MgreK_1+-VLOQ8%Y8 zI^KE8J+)(^RlG5|nXcKQc8~AGMkcyC9t=3Z5j18pWi|Vtfek-?@ZiBHDD04F^n=i@ z4V=r#(;S7jD0pxal^uWnc@T_YP4ZL_jRh802^RumQ#JW=!J)Tc#Qb|@=Qmo=eC zM8l{uOebnGozo!vHL$2(uxJVTm*ht7TcN0!X!TTv@?f)E*>b`3KnN*m_!&%DOuaan z`Z0F*SQ6HaJYSHAiad5e!KI1>#h8q4Os)+Ii(9?fRo^sa8k&xo1~F z^56-UMJ$&#O@dXRlOHXaO0PU0(oXr>D>zb?|I0*c^@efV(aJv8`w3w-Mf&yh=~dbC zH36?nm;-+M1!_0TbyvnduuR>0Qub;{XSL)^bizL;QJf=`j^8IzgQFHan6Y?R%x@^w!O^HFJ)l}qfiVk*D2u9$>r zx35uad;J8neI#H8?~n4FK;q z2dV{4s4ALygk7e-J22$quwsyhDMG&I01`^%eEwsKW*Um};rPfif*35cQ);}`GZp>N zbj&J5J!%HsRm~ftI~cierzkPDXsX-V6)slMpkcs}okVxr1PYdq#vtQk_Fw!Gh*Xmg zq$=u02nTh@G5{vv&~#%dWtUa{^xS!%>3_CL&{t9PnDw(YEG(Mu`}=g)uDk4+Lu-*; zO%E}jPUN$_Vx4$Fi&a%s?~3|Am*2elYjvB+`mhiM1^bCq`ZWzJ#X+0jwt2SppPpWq zB_eKkDr#d=`s*g&g(JhwcW@~1GgwwVpvS7VX~(j)v9$~Ma=eNP)(hP?*w$aJQyjf1 z@@YVEzL$-89%aqlj^+2&onq81DB_;UO~c}TMS>oVx>c0PtQWb7v5H3+hWY`@`u@ze zepe&m;c3s{kHMTY)$K}Pg( zs+U&}dL{-32R%YN+DfWo+|i5HgG2DQl`Eh5>3@(8r3kZQ{lr2K2`FA0aZ=c8leqO! z+&}pfCr)A7LdGO^w1$q34_ed$p9w@N&s4qMJ!IZHt*E5*3{9{%YXB;#BP^Grc#m+4 zeKSS^r)H=}^DBoW@vA!z`C0x-OTIdF;?;Y{hKtd0EG%x1cwH0kqY+b?C|{963#v)55p`{RUwX*JFP##_pf_aX3L(g)Bv)p} zSI{G=dK}5`6JPfANHA~5Ol4?Ul!WdsVOo!6N!a>fCSevioNwwrDy$EsS(gp*$Sum% zn(0u`caesaedLRgdlv*V>VUZ;pGt~}?GzJB&c0lkTU4ZAU@(%Fv_8rpLkFRxyEF*S zCe3TPxnt2cq*Wnz@?-^q5yy-4E7sPOin<=5tlWmiz`(#INp5JAhb8@KBkR<;`@q7Y zC(d(y0SgOz0e0EMik|IR#SN?!Ce9m?XZ6=~HKj(2zu&%nRnfG2Ty%6?xRQD;vvh1s z#rJAI-$%(<4y)yK%wLaCh<&$YWTfJ>vWSHLgAA1EBF}95YSbZTH5=X_EiJ7B#Tju3 zwC%2tlVMKTP}So=ErfZj&1Jx3Ro`5E_Mxy`H(#oI5NF^oZ+;PuIDIxM?9InBd-Bg{ zmSjZ5?oE_-)eUHDd+Oy?rFFr?q^haUAD^2=m$af(or3O|Poo5oI zvx+WXY6{?-i?EtyBhy(bbAKm)o;mVeOqzwI>IQTpNYcVl?5(RRFDrWr{R-hlU?qaI z{?ppc^DlYFu0>+BTTx+Q86<8vY|Jk^BOnu5CKhoh8mvAA$Qa^jd|JEh=WED;e&)l; z?ViBgD}dJX8G~r0Lu(=8^!*aR38hVvS1WKL_-vM+f6aB;W0oZ>?{4>fd2SOF37+D)4TFpQYJXTypm#wI3_adD@hLsL6;EQjsk z@AE%?#^T}vR+j6m5-wB8Xl|=`t;hiym3PMuS(Lf*^70;~rfNt(p9vbqV796$y1sl4 z*vG?kY#6-Oe!I>emv;P5atm4c`1w84($XNbk3@#b2SW$61eF6kRoBpXjp_=Kx~Qb2 zr0zAsD40e{ZWwvYR;QZ^EK+uBz_G zCnw*Fy1Dr0zu_u=UH$IIPhvT4B}?VU*L62?r{3BAxoz7qv{y%3ONonXg8(Q;60feK zQ?ufUcHiFX)Zh+HX}6={Q;hXgDWm=g=Huh@33i?PAYYc;X;2gR6s9FDFQXw`Ey~F5ETH?_r_2%)`D~&6}Zx0SyN9(1iVkJbWCPIbw z%i_i8j#VUA%Pw8I1RXi0Sk%G}!=`eoXbC4rlQZnq)Vr0ug^q!|iyGTd`N9x7TGCb& zVe@Bh!ipbSfR5sI+fd5r=D! zS7+A6xRnS@W-RiQW6S+*)67*by>xxs4&PPbRa6mvMy z%y2vivAd1<)MC01JgyF(8evaN4{JeqZ0aLhli%DTGwy|>T)riAJpL+Pkpb* zXM{Gq4AZtNP&-@pgzk9dOtl89YsViCT&660!EO}3=IyV>RE15CLt-=6CUbM=;#WZ7?hF-{kIp~AlG4Ee@P0Qm=Zc0RB+CZeEYD< z9KD(pkm(td-h0meHSWxS_8sTNM`MhUdh72xV-E*Sd33szL9`J$U2S@Q9KyC^jm~3! zJiMDXAHudH>7!bT@fGNGqTANX7@>7ABs8?>ypTxIx~kcg#F9MmX7)V0GWGp(NN{i% zS^*HMRF#w-!OjR>dFKuMz9TV5^(SZOG;j+o1HmZ##3id3>9yuL)d}SuJSD{z+vJu= z-aH^N5w>&eMPae-_CwC^r#Pru`@B!Gu1a_&T~t&QIUPZyv2tbbr9d4hO|k%B!N*^-aX^3BHf@TAZl5w~`e?jL z_H?@8+%?1rM3H1*2@;fo`g(oz!cckOp3#lHpaabVM4!8m7tk#0bzG-fxIbBt2#f53iuOtg7*90hw*GN?g$u?{i9D#bn2P4RBvR2Z*4D{sLY79op_0@aML8PCCdM>Ysyy63O6;x37d z`*-s29EDcAozc?V487-m#Q^DgCrFkxK`x`$;k1)eI*ciYkrs={CM)j#Q|KtyA?cd2 zYIjPl)T!!$a0y?R;~AkEQrszf0mHX8H@^b;8UgzdjUGFwlzsM`J$$p4W5p6TE-4)l z#-B+G1Aur05sd(JbWl?lEm?62J{>GSAR~@oEne@bKn0R; zx#c)=ko@yPX5bpY?wI$-`TTwJ^l~?CaRtK$rj@hk^btAL{Cd%zL{09vZZRZ%5G%hR zeQP||Cx^R&VAe?jGQO{qdLt8^v%|g;sZ=Ob?NY;^j}Ri2>Hz*z z9eq$e^;sev__EDXUgH2E6H)!)8A z+QePmo+GFs*`mUkl`;(EM3z4qOreT;?W9W~Bza^0}bl zeSdP)wu{{$)`inA%0|NqhdPRFl0J%yRWNMMZp%5Q@t{S%P@(u)kne*RQo15?On~h0 zg?Wx%zqZcKYvW5a9~Yc2!|aZ>_V(?DhK+n}knezKdya*$OXIPjV9e;a)xXNS^Hq1F z3rVDH`s?4yySM|Yo%krX#?q^G&MshK-+Z=l_kfHnN}oKa0_n&g9d@|J8e=gjKAWNj zqX$xI^o!91U>M(v4{rk^O8t79BSLXTo?6g@U2aG%DB=G~32;S&9SnXpd1TE!Iz+>5vRMeaRh!#+_z8IP7_s`lT;Qxg#w zQR19LMil)MV>Ac4QMcj1j~tzbCae>kKxJT0qiO`_?%*Yn&9ha;1Hd|P z`il_I&2q^gZY*GQ3K5v`nhJp9wms2F248o^awFg)5&a7HTN{a|CV`yI=5PB?wHb~{Xr3^7@=ia)frd2y z^*;6dxa z6`0PoFGuO=I;% z>#ul%=Z^&PR_uiO*qD9a1sJ&UF;higzmIj*iNhUFrwRe!!jtP4qtL4h@wOs(8EUC% zf=L)H8N8L-0Pf-LA-Fa5}MTSLPE4(oOVi? z1bHhV#he-fiWmTUmA%FnF8F9>p(&CdzXJmvU#7xF3C|r1ugDJK|&Jp zHptSIxShV;IQ9@ZC*;LtfBmu?Q=kITYk)hiT|s*I;HCtpGrhT+^zQzbh3qKcNVvjJ zVEVig4pR~$;C~Bm#@ARta;kWz;d;A9-UA2p;9;WZA(n1%;M==JI+;!>h+onGy;1pT zkVbqjXwWg@*;(S+w?MR0w{~|&z%uc|!(6L_589Mvu~!`ce1aYolw+Sy3N7r+ z{z`bc+m0y>poc6xfTxdn#3dv&(%i1t;#ScR0tv$I(fal4OWbr*EDVR{dpmY@BYmw* zS9GxL*b}S$_Be1OR-dhNS9gYWHWfNP{{8!N&4AmeG|2jQ`W_ct*!r9Y`J5>PK7^4` z)6=_{`DnuNTlP{K+HTm^uI5Ao_@mcLzw=M&Kt&vl;;~kedUJFwec))A8Or z4WiN0i2pcj@yRxa51y$9Z-1!n`t-4ABFHaQysH5;aY_> z2h|Si{rN2%w?qRK{UyKT$19%k;K0Q7<2NY)Tc)acFE(mB_kq-~xPN5NF(+M_%B!Tm z2d3zeH+KjZ9;oTgkH)QqT| z=8$nohjT&{_O2YYVq6YpgI^}O)Fr>rFOmc-BqaK ziYH!fI5W7u-nvKto*DQx~u=~rKAf$&P=-or@65a3yB!n8I z&KCb%oQulWbNBX67{Bd$oxkAICftasTlKX`J9jm2e_)>H&J&?fHL=ZLliv$L-OyLB z4x=PNO*=R|{1if0ssvUr^xzQkdZKF7kKG`D|3UZKr7m{RyZA{v>!6)?ovw8iuqGcU z7SMFQpWF_*q9uCCVKf6o|O zv9Hhh)|S7#yu8p|&tBI6?HzQvWj(Q3G3X0bcAY2>=%%|6FEa&&^1wqk**TBcL=;Wi zT)K1$;@z8hn6E{-REs5zjK;ThK1=YKO=zy0|Bbf=Hp^Z&1C-{?IpO3AwABiq`{!=^ z%K3d89w?R4smpg9@3%9!^e<59k&|6rP0jRMQ#LN`l&5=FYpYsY@7ejQdqf{5-1dy92JULklQNxW3rC2%QBISzA{F_{#n17R~sFMoMcB$Cg zrvQ*fzV`~`)vY^se0wnS_5}WAuPbvbi>Z(3;UjKyAt-mh_esr*w7ow*?&{fG`xMgo z5nxLEGBSq90N}10kE1D!*s1_Nqerwr?@BNaOk0kaUO4~S?%QqRc;Y)@9boU^P*_`= zCO??OOAH~9nw>g$@H(11Dq359Jh_N1xG^kI&E6*dgfz1Zkkb5j^Ga ze0|L#Ja^NyaLN+D8H;KdzP0xB@XI0QmXo6b`5M2F(4hS7A6q$Y%YUr#>RYX`JT%x& z<8onI))905+K-FmpL?_yrzb`r9tc9tDU5TOm0dWj#sNr`DyV*xKrXkoE}nmd)|$DB z340A&Xd4~~783=@H;(kvs9%p|AUdJu(Sy5Bq81w-!|Ye`=-llq%*&I#dX?&0apzx| z&M`k^Z~_MDT0kJ#4_v!s^Y^m(SH*218(nY5Zl0ThfWkkcjk68t2g+vA(`xfS!(vVz zHrSWBKo3rehZn^gQbbbS^2i|~kW`n5%HdoWo{*r1@{Ht0#c@BDi`zdN@oT;WLIf$G z9e)(pWqsv}nw;FdS3yB+TwIpB8&QYwbWt@8_9pcpwjpw6Ubwe${sVlzHk-sBA~zSb zKrEOZoG)aVf+N>!h&NWqnCOoDv|;o7&j<3(GGWIN2nx!}-==){aUpt#NOAP@YyV$A z&nv4X}D1;2$(pPYqnjXJ01=hV_FCD@GHB&O;u**g3p zZL)A_`%0oSS1h^7$;p|37V}LqE_%>9s^MQ>bvhDnQ2=@(b zxrP};K*(K$(nmI^fAI_^TFjgY2`cnb@cs9udRjJ23D2J(mJaq$u7^K1S5hrIXXGTOr zF8HgQIwLvK&lVB*?yCH!13{m%W-h$(NX{%aZi^h&lXGhua}v_`ba4u=iI0D!%I05a z_3g|0;McEF04CJLBPsNga;$8wLGrP7&6*&@GtJs|>$dfTZBDVVRbHVVfuMrOB;h*a zL~Fxdg@+$fa?@`cM8v_$BK5IFdOFKG5GVqM;kDCQbyMwdt3`oD?l~)aT1rC}w8y3} z3INLSJafFAV|u{dRn5V{f$9LDiEyo-H>etKxeQMzl9>Q^>ehFGc>`Q%Usvwkoo?hM zy%i}8gmS2!D<&M#i1IhUM$I+{Q&W)8e^EB2h3oe*?i8F?#ncU|V_h4_AV!% z5a!E(@hWv<6FUNYd~{kYR}&!)j~NbCxBC#f;7gBd_Sk$5Mj7QPt>E1Q%>e_gY=_Ne zp4Sc>*~Yg0mTe!;j`b$$`ASN=i+}5>k;C`4ah1e%^1HMNPTcu$&BA4ze~7j!Xy&!1 z-uCC3*?#|>GK=A!%wIVD;cWVNkE&#YerQVnDl?ZfSz6aEDr+h?ZZKR}LrHD~jt3)w z^7RrS&9r4S=p*;AwzI3^p82*E+>fZ~3zBolVhYya$Ox(cUlf<8x0UMo;;fc}k2OJg zVEwQKz00I%@OX+!83JCg%9_B8HQ`GEP}mV$FR!j4JNpo3iUskQ0i(6Pbm{X}_XDaO z1JhGe72u)e$^gu?HZ=uyc6LgPF*-UzA?RRdxLORQB^SfyR|IIygPyAQlcIobr3U_4 zbKlJk6Tlv#^Y9fiBb^G^Yu+S~1E33Yn8Bb%+UGUGXfy=nx6hRcP~8j$W9te!4)7z8@b#@_TLRF z34$U6{wD>gOx6H*W!w^7dzt?U9K{X}m2QA1wFh`2VOUi|+H8)shv*s~cDi<`k!J3N z@`H{ZEpq2^R-_pqN8xLE2!1HQ)qolOW;Qe(oSxH6U_%>NWsbilm=ZYsaQSgWK$^?6 z1K}(1jlEWf9CwaCtr@qEOs>;vi}VXfTDdJCzH`U-Vh-cDr1DbRNglZl<@GzA^Hgj* zx%qauYw!v3mKVxozD!?J%|4NPqL^f>}1rC!ooZp@?(Maxm>mHKSQi~hDf zd%?eQx@{lr(&dWJqTBfd`cK^Kmxc8ZlmjA&1IXul&@Al51A&7%uzv~L?x&>s#c{3) z&csK^SpH^i3W=vP-}7X;A&YW=b=WWnvmOW);6xy$*&$7C;B*Od%H3hKHyrHIDZ%t_ ze~BM55%NAS%ua*xA`*O4JV^59{@0q9**Ky_!NA+@WoD^q)X?|X^uhaRc;F2mOfSXe zE8247TFMMqEGlF#q|yh%wQh`QJ(NbQ`0*T65eNlh43{$+vC7eYXh?p-XRpz10*Qf} ze*lSlZz;|zQr!fVhQZD5Q}c}e)S*WT3Udj-15hr-u z8gO52a0bIS~B`2%FD7|5CI*sQ${&s^=wnW$~D3SW|G@x?o=2HO9LHG+RHWtrZ{I z{mao?ogebE=#&1*sj1!IoIYcrj{s-n1BS2KHXaGN#Ai(Q#E8HhP(YFmOGc|f3q%Jp z;oxrjh8U>w^2))_P(rVjGLR(5 zib_G45R{F2w9d?RlkyKO08Xn{cw2M$v;)koYH^n_U`T^s5kaH(CO{Xk_I)!xqhRZ6 z2vh;3d+A$XLChicFr0-mCg|X*vd{8oAYi(VI2%mtxP=BRd5$!Sxo_YG3iQim^g+2kF z$xuS^9kQ}uCIj~-HR>5HIAe0EQ_7h~&ZJyP z!Q+5r5|w9T?~1|B&Y%jzm65$p_jHoQDkZ#Zs;Ro$9Ir&Z@H7cc97$oj0MROV(tQEk zj%RPx+&bEdut2j8X&a43wm|o%>Tm1rka7YuB_Y z%;55n6RYeOuV>b%h8i5`jWBHIAN0^OM3KVLM+ZLcE7=L|lJE&Yh-@JPy``y56-D#6|FlHV|RAjH)Qq^eW9eiEzTx}#+D50l0y5vTV{$h^ zK?lr1b`6ao`%QSBBoc5I5gAL05NQ^Tne|hMZxejB=JK5m*RGio6iiW(4&4Nfw$sKB zdfKnxrh;=pffG)%0Qi4i$h+I`{HNmLO31aKC|Q>Q^I@NTCXWzkBl|*AJ6ykxl{|dd zSaMqil?ly=uoi3T$Vz7k?9$uBU4TkLgJ2ryQX^tc90`c`&|_2}vw78FSc!r@fQ;Iu6sNDPZ7GN+9aT>Sdt;^1q>ED%z_Xuwiq8v(j9_+MhRKs zy96iB0MIj0;ySYxP&5h(4Iqq>L;F8Kx~B>@iYVWK00LSODd=tlxE2P{>jY}h9(uoy zt4uBobWkgFqUce_Pm^>4{#Hm-$HwfT7|Y2yMxYlcIt@B9S(n^Ikq06g7)vD1O`_%@ z8uo6Ya2Un&uE4O+VH9f=Tr)YVEz|lSTDR-3GowdG&LwRlvw+V{?CXT{1*?G|7Xf1j zvWDVN1IoML!e~2UrwhGHAs|o8-OxlXGIB>|>X54O;iklVO9>xq`2sU-2~D*qjbf$l zPDXUo%1n`%hTk?hKk!JJ+Ah~VB(kj_JN#B|)Qb>71~VzscjhGzZ)?A=ZEYlV+3n`K z!_JZxPmUR`&kWem8(N$+_PNO`&%EVrQ9^f?^r--|BOhC|ju04bKJRINYk>>z&z!V; z-~bEED%AH0pqPkQpt~~K_t?VZ*VgFQA!0lDgk_QT9l2Cs)IGMDJ&3YIS`3coAZ9Xo zWn^T`%#3B}R-_E%av&ch*3j@!RAAzppNvs5KZVcNOSs6H1KAzPl=UkAGSed@gXc7#W1a7EpbB{LO82$r6f*ep* zFOH(v+s^zs%E4m`zx-E9B>iF|;2d(GkPeKsYr$EmF-(%sRYBUM$XO1HA>{gxDKad- zqdo^PUKMFId%m11zC$*ZqgHNJ|w>$@vvlZ;2$Zv78EboZds^nKR^ z)se3JJ9?BU?*|1mkqu*d0&tA}$mgR~AT?Nf}IP^}1^a9NxT|povXfh?0 zk0BaIgC9XLV8k{;K_0D*sv3ZIUi!#)j7Tk8yJ15Z=tAC|JHrYcS3_}K^q`RbS z2I4~Gb3~+tV+s6Y^=RKY zf|Dl}78m!&FIx!EC?z%3;N@LD`^DqJ$lZ4qE!c}Ny)Xm@^*Xgl7pqkXX>4k08a?7end0w*29nbM`)3w{USujT(ub6y`(z(( zP}vrKx5yFRdgi-&kRSpiDDT{hsy6`w8E2TL67G?(`Fz|5eWV<(2Ib9RuC}vmP@p%~ zD_u*&i*Aip7qn6wd+WBUsT6okXxC;S0A|Tb$Tv_Ts#FY zN}u>&pYgxeg8KrB(qR7&nz@L;jw~!AA7)z@cEUWqx>|ik zg&Tg0#Pk6LrbjS>2}pZ_Sr8fqVHE^h1UgDKy5%l$8$3!qz2I|97hjmAgvt`ZVD0LK)i6Orzo+h>M^DB8jK#~dJh*@(}(y_th#Zt8nlEx>x`8CA>zSg?y(X`-XCoM|{(R*DEpRc>XI`e)b=QPs3#o zx;=VV(S^4udM*NvNLA#Nlrnd9DEXv3EO8M@%9ZFS@%i34s;SK!zN3@}2F(^-h^LHw z!7==Wk*x9XZ4kTsSL0N>=)+Bt&meECO40lAYmUZP)GWdW4)%t0dW)doDpVqQZ@^|E z`BP}Cia>2lY}{PXNnr47C@Ev_>VV%H*?YA3KXU4LAGZIh9ohox=;j+sxy~SW+u&C0 zdM6209Y*ZcHhu5UV!9wOzFVVw;1KDoE&_5j_tjeSdU4#{yC?R+7aTc?qi--kf(0X1bFs*)ac^r<|ozPKInwZ{C*#r=o^%T}(;MF8&ljxZmm6Acov z-G^zk5GtM7uP5eDo?VBbq*?B+b<2U6YeiQ&Pz}vLUf2f^)d)3%Qr?(g?MLo<#)TFI zW`|hfpZ%+*s!i@2J?k&fm$gruT$A1~p_$%nCs^b%Dta=<_|XKvi+4VL{;JdT=ftF{ z!3d_>g(Umy!QMp?ZUadATY-i|#glUjR`Hjyyoo~};{GOMR3pSK`%#tqfx%y40emSP z5XdfkOt3lD5S`#}*@HX4WTXD>fc+Ni;LU(1dBTDJ#ED-q(CO7_r*#i*Atm+1bVQh(0gYI^bm=1E$3+H_ zJmQj)hlLB2hws6xmr3o`j{lqB%{4P~%L9Ktxm~*YGw#r*3Svr*)nqE{f!W6x4Fjis zWFomCgXX2TjPFUojO>x)IISsF_ne$z*3#2*8#gj*AAF*0Z4X`4+n31s`p=fN(_HD) zuUO2e-F=JlsaVmU)*j>_l7I**`^j)K=OOJxhvHdsn1UMAFvlflv~`ptXkZr{Tuu$h2IFb+Q*Bsv$!$ zW7LRN{!vI2$`poJe_!7x_#_dtdnlbY3kk&+ngfjW`TY5Fkie~U}J0MAs`{) z1HB1k9IYq5Rcg3I$5&re=W?Hsb6pwRkYgt|DQB3*akKQ`a5Be&oy8xQ{`pN}GBa&S z(2eU#FJF9dx_jSd`vIqjhE?DHdTXD7#a#Nklt$?re%l>wg80PZ*$*LGx*8L+-xxKB-+lmhTp~ zBeN&&)QJslBFwjjy5lr5Pzh#?w~tbeg^@1~v5@MlGXeJ;yT%au9#x@& zP#ROG&!mp9{qe_*PZ!9*8K@U>2GF^YDsSuh{By>im_-1I68!G6QM74iIuYkftAq_d z&%}R3s%ESl@Snk}JqzGHecLFp3v-rEU@j}FwB1z0NDu-rXJ3U;z7`?z&(6+V*|7GL ze~vG1vfVD`ir@FfaV*$$q!Pw=UU%cvgXXAxdNQS%6$9gMx0A)Ri4heV6}a}@ zK&n{Jiax9R?>qZ*i&L0ejkDKBIX6#JjOqXw(%znt0ho@eeUI-aaxP$$SD8vB{)LEN zSx`Z08(Sx4f;z+iAu-fgS9V(%zm&IU6cpZB&+@r%G_!87P>SX0`lo}9ciJWWdfxXqru}VGe^={v z))i61shO?PlXN{xr_TL(wl1a^H4oKqT46k*9J1Xv)fe?p{2l<4_b48vlnrF#gE{LB zF{# zBiU7Rh77{*V9voejc)0D6 z_4B5BJz4z@|54U^0BEh^ro&VqDy0>hg~QQQc#||1hJX0|$GL4*84=*}^ObXdsam`3 z`p;E4o0#}4oBd9(w)nR$4b^o{r4?uYEni#Kw*aS0*~v}th{8P$HG)AjnyT~6v0o_^ zK8Q{cOV+Tl-SYLVrLLr@Z2P2lynklRa0>I;Yr@*if3@B}O>)HlNV#MeCgX{qBwdOU zg4ow##ug?e1cO&lWU~%3bu{9U-kdo?nQVK&+&)fio9b=M+SdIj(~Z;n=ZDO13;iF7 zRm3h`4A(`2Mk}N`SAE!=qB+JIKF}(8C7vipR?Z~bOE7nykB&RDU;WHmtsdJxbj@N7 zxM9{Ro}PN;Uhk7gHdg=fhe~*(#eNPf^!F568Io@YDKB(8VCOonj0iZ>W8IRy1$u0 zv)WA6H^Z;(ZHG)xNrib8l-{v>d7oar>`qD!(Jg z?{D2T0iJXrS~Fgv+`z0qGBcsQ*iQlYDhsB5?!XunnDL!bQ#(o~X(}n@Ha2o!2cx(b z9TwnA?v$DaV?+q@-6jU3#I{O559oOcXeqy#aR8`BLf&X$5QX9P1r+k|YFmFZ`yjF) z;uD9VF^?v2GQjEcF~lgIPoCVMQvf90LX?#C0~{OxWP0e`qE!_y$9}GYR?69^3>18K z^&+tc@(`8a6}S%ik4G(l;GTcFK*pGISfyu^x#SRel%U%4*;8}-Ud9gpCsuW`MSEPM z)IT=Se4^KGl2J5uA07UGl)VL5Ra@IO3I-S;Hqs`jAfPB6x>2zJ6&0mRP`bOeScrm% zN`s1u2!eE%lz@PUG)Q-ME!H~sMBVQ9{NMYX^RMgL<(B1IbB;O2^W61>2tLGtI%umW zL8TeXwr#G^>G+SDO|FvJtmU7DhQmgd?^4Nf{_C%0P+LKzcpu0Vv`;w@a1HDg3M?CJ zNc4bQfw=kA`qzt}c%1HsSJMYe&;nc<0DtK1HUameqT9yr3W3*^v~IM&?j9b?H2~k> zxO#%E10x6E?#q~M_mG=9uyrjK6Bow)Kn4e>X4SfN*6-s1MG~qM-x1~CPs8zO-hiKv z6_EA-7+J7$peo~0%kV>a1LZE{ZdBM9T=hciXdFiP02eP#PPKQ}a6vNhl9H501o zn+GFLj61--gr58s=8b|wle2>Vuifva1au9E1!TRBuZ#&lkI@$+UYi5w#Zp}kMv~eU z6yza{DPeb-ynrP7(@3#}DI-cX9aih4$IqVKK^cJQJsF6~c-9_-!MAX$Y@z~x8gnTN zm@*^-1C}HIJSpZ==YMVrX@}hzF`Eh_q@f(yiP^9~D5>UWM!YI-R4b#CzgCn&jF)i=K;daGnii7w}@PojY6xZch|Qaknl~6%C+l;tefIR(%{cG@Wn7s+L*$#+t^B+ATMyuWEYFo0*}; zB4R9kxC4;``KR2Gl8LLiad;y-T11-{qB~$ZsI>wck|wk`R0!NZ2-^;n8SVvI)Gsji z3EO)0KAMjeR9Ubu^*71={u;-l*99!Y*Z8mX5t|jj#XM|Jc3saD(ZdpNjRnd1FpXTY z_RQRf^hB_ys;!fWuPG@@ez1nq`l2|k;$UmO5bbI8ml6m80V038sHZe0R4O}d*Mm`gqIlkysFs)uSJc$@R z%gK2cCP}CyuEY%d1Y|xZ=3P>|(j(YMtI*eUq@k~_GViA4NOf}Tr;QuDX-+j~DhLpD zLI8JIMF?TsLIAwn^u zxc~S+f7XVZI=@gQ95gWdV|sOu@K#K(%GH49B?!!D_`HaS2&#A0)f)f&l(&K3Qk_+H z6|>@i_Pn=js;%{F7O*vMD! zL}m2r|KG2C z%8Xc!irc%CAkVRXi?VsRyXR`~3#;kr>Y5{_0f|8vb6Z$jJ9FmVKG~d0S8_AB$g+*R znoU!k_mu`lewaRfRBA1%Mwv|Bb))vcp!r?fvTLL1^3*AmsZc?)-Sjj4kuf0324&)b;q@%6Ez2D!O{x#Fm zQE}e%Vi*~2U*TkK~$Mi3xM!jk}SAT8OaENhrNOnRit1-KA!i}yj;TnUh zLiY3ng)%NmC%(xv8a#}7Kh3BelS*MJ^$*e_-BNBJzAz_gU@sXhPHC9W-oY$N-hWQ_ zOnK0rcaL128R%=Q-O5ksV^>jOXB$)hB|u6IG!&)_~?0T5DVKyPW?fdekgUYuoDx_ z^89sug<&Qw^f_^gwNEB1t-pYN{cGnk+uo5)ed{-^%W+Ro$T;lGTgxzfW~}dGR-^Bu zz;tg$Nr!Ee$5+JJXBb7NZ+KLeotlr{G`ljnGj>jab%-N_?3FOUT=<1T3eaAVA0N-3 zYdSnk^ERtf=~J_Q?08>GR;vq#VXjsZZhmKZU*5JU?uA2{MGT!ETWu&s#gkRr9@5M= zG3R~vEfn1tdz!;YuaDc{qf!)Ipo*P!k9z{^R?@ISoL49H*F+Oq=eY*?)yB$Jhhlk$ zRQo6m-)me1+9?mx!^MntO4o%3d73LX3FywglSfsckKx1cbfUyWjH@K#>4?;(>}}S%B^@b)sAhOehf&bnDiU~FbrpXA<+uCwX(#J{K7{iQC$CS)VzJpJ8Be2#p{ zdCoV~v|DEb#ebq;kK#wo#JakPt{qA%=lREGR_A+-xK-;HOgq(AN?bdBJh3}TJ^wU(*&&0TZdTr|qQp za|@;Ne&QB~WoC57$QGfgo0W}zufr=IPP)c8RPtM@1D^==%<31fb8un|kvFQyeoM~& zPVTh);WRI0`M{{he7yGAK=%E7+W3)nd8^pw=UNgc_Q;w2`|LI{jA zIPzjW_!2u@);XL)r{v|K1PX7cUfI733k!}mRGG)o0n@HyZG}p~>b-XCRX@xzfoy~r zqk^F-C{zRkAE}et8Hu}z@x2cL7sSI*`9;?vtblRzX4D=uSk~_qNxJd=pgm%7qG6e! z4#x1J{0rp&iKSpc5(UjiU>;{`n=lhvY;6&c z62@ZJ(_hfUm~dhwdgA2sU*OT%8(@)uWN9B}K7IDAHiE5)s(#a^j(3PS z!f4*GgoMrV@)ZWoBSmpf%-1>k$W>@wXqpSu^xR`Cy>r<%$yBL=@U!)yL2`qSqbIZW zG({lRVu-m*`J8yYz?h2NZejRL>2kfn^4%lHsiirmyJ{kq$6hlUY_|3LKxr?O}?|_Fcnlr#k6}?Cjc?$t>iFdHIB1K9zkzf;bHir0g+DziqBzLG#w4 zXKXw|#QmDGgW`PlLEGfXPQC2(XH6Y?r^wl=LB;#b_hYE+_i2(rKjOkGLb%N4FOsC4 z?6h*9DlH6Mq-Szj5M+B9^*w)d-y##fcII8{4*v}%SM-&e7dmdOEnTK~ByS4*UMKpg zJ%WN;srdL}PA0hnEw9#`?&s(p)PXQi5MWoQfeG~_00~M@y z?p`tC0_j;mVf?}JK(9s&zM^9~bQ=xxR-UWGu<;bA&{Q9|AXme4k1s9Vwxb{r!oD;% z1_FSOgS#7~zO;_c>o6Wk$sY%~9iW|mh@lk7YkP`u6GTOY!MUIrph`S|k!PCm_1li< zZzkAdY)26QVv0oGE8T)%hd&O^8Rle+2jJ4W`d)$ftFwr78{vS$_DjKSv* z)YDD|PL7$8=OnjwZeG5Ad@+;xA<(TD$?)KYdf?1_iP9D5G=Z65rspbp z`Zw4~aMS^hu|;rCP&6ap+&;DU{n=+P&ye_1o-4ZlLdoY2E;R|AGVR|aT%o4e%g4us zXa%z-YQvmK%!h+)~fR~lr&eUHI;rl7&b&J;_0UBbNyO_w7l!)TKtr5p?V* zTE6$qx&PeBKUd2*5Wgl10d z>0Wa-SbOi618reaAKHDgqq)Q+yvijS+uDreQ&uER?;6Qx%2JARyKE8Jq8%@~Sft`r zMniG5+!v@gs5_M2B0PORGg@&)uF+zB21z-KWJ}ADo$%n3!k#AKG+HWE`{bftU7e0^ zY=-l@@4ShLG846_&0<@%YU29Dc=PKhu^bzR`1>pTiB_A#79TQE8{ z*5<*N_3nEX=Wlp0oZ4Z0GQL8k(S><%bt|gPaRfN0=W$wlRh|BLruG zAtJe0p{x%4_x=qnatNV0JTly+983fc3zsHKE^__uVM26&E(=5e@hBy%im!j@>&u8W z0w7ydHy=h^0mNgXLRAnn`SB1KDj2!Y#X&s+XYUHxOY{zA=K|=md2er!;^@5;PVpO< zvzBJ=OPBjd8L+fp*H8|a zTA5%Ppz6;*#Q8iUBY=UD@`Oc3UA@P`%TM$%X>g|?Z)0wDNO69zuEtcTJBj5A$kNVr}7xd>_w$Kkry(7=1 zj1wC@2Q!Le!+D!E2Hjj19q-uDQ(e(kp=Wa$W;Z_P(!>5dEADxiLwpMMo~z>}%uP*v z<&xH2i^SAkIQn2gUP+iG73zUO*g~2DjwB4XHAvDa0K9P!sx{$$Oh_Dqir)v#F5t1x zkug~%gqzBM1p0&+=#Bea3Buh_kcF^Bmd-FB>xhFN4nqL@z0K)D$<5Zv&*D{)0u5`^ zrQmWWc)H^BD%hdBI*SO-;);qM^bLA2Y5_(28)%x4F!{gZy8p&<W3>3T>viz_|)Eg#crPxI6xd?5Khd_U&&6sd3g!WHQV#-ncvfvhT;8#^|G?r7ozW1H|P@ek4oPB`8m^OK{VnlLwWw^lFRqm@y0~Xgz zXR3YBjnppgo`1*hUtGcaNh)E>0VTW3`ztDVU!CBN*^iX_GbQ_^970|&Pw(a}mMX1( zb%>Pb7%A>z#N3d@IW;7u80y7ck)1>~xj*kIb4bKVXtm$dS*Sz?vo?2zD<6l z7F{TMHY4+IGQMO+PcU!}^ePxGPX3S6i-`loXl=;XA+T*v&~Z||%Ci*{R=0^-?gie^ zfm+b&DG|m3b0r*9eW3s7^&*t)>_q(i{SmB2joSL8S2h>|oFia~3{*=}MIl845ewHF zZARw6vd-Ua>QveJ+)JW1kF1yA^k#=Cqo|L-JEH< z$j=KI^B{7j_VL(Ey|Q|oN!13Dw6d9Kd||u~vmixOQRU1{^0aw7^18ic3_F!Du#3X7?0=0rZ0BzcAAI2C4-Nh_#?`_G8Cy!0{N{qB*}n>#%zZO?BQZ(~29Gcr9TTI(Y( zS>I~QON7i^R^Ga0OZDEf6-e@k1Hno3@puTq9h>Z5r2Ya=^@V-=co>06h83Dm{d#gf zC;n6?sT@-@U)@_eq_47UUg>(`)yM+^+vv7Ev{EUZXRc}*$xdxiH$r=uc>QEp?2l53 zCBouV-p$oFa3g;yN>x@mr!WqA$y#Jnx}9AT&1`BZ=D!}#`zoiUhFeY&g~9g=*phLa zzWJ9ZVnf6eGOId)h9mf082SadB9!j@+jx0}8~|QJE(wwMN(?BFz;mi7`sHM@Nymny zm>)7XHJOZdNv94JZ+K)+-r^#^bKw-bThUBJrnyAQMhdpGGhjmj_WSOkafRR%%bsNz z5m}|yj|!)!lzK&|j>K$Pdo84yPj8d-?qM>A>ZfumS}OjuBjg|NgS8qfhYCF*#F%STA3Go8TNiBkuEboi zUCOkT3exZ}z%^Fc1=jL+&fU!1okgO*zrrL?THTU9*7X5(AR zig61*_QsTrYWDe478I-1;?sI{@o${F6w}iRT{IXSYg6*v_@wTIUKS9)_+$rHyjjeQ zw$A>06K@M8dfYAjL{|)Fu#+c342*c$q<%<_raNd*Jnt45p!6r!<8ku5&I zB)bnQjLGu3>P^ZeHH#)jN$?vB@4(yqaP* zId#s#O68fA;V+b!k4AfHb-RE_!R)MkNAowkE0cU`S@AW(<(->57@IcQFbIWrY>i(z zVRtq$+tp?d8S6mcddbuMdIpV#f{^&#sn&l34pc5alN!tjr@~Y{M_}1Wpx}hDKS|Em zn3rI%(2!QwM93M{W-~wC>Q+EUK2~1TXmx9%t2#OI1L>AP>ep9q76;5tyGVj8K_$0c zTRj9`sk&4CO8?q_PsZ&kPa+prMd08}o#-;U%Nb6Q4$jf*g>vYuo)mqyGrRG9H-~nO+2G4X>dmubH&^$}%=RQnecu6b+MMSV*6~x>4!z&PmuD}uN&?FIAMxSd{|#048!gYx zpfu?@#*>tbrn-j%ul3CjhYTmU(`mYxi)I~@6i>8eZVPs+&urUH3Ch|WSS`6+%xwM; z9nF2VZ;xbJveIj2`5+hMQBHb@0Fp~-l>efvk%#UF#S05qb5OhZ;?y~)pg;4u%$=nI zbCHWFKc+TBz3T;=5OIDAQ}FyUK@k9_Q#TR{2nqYjwOfTZR$_Y*CynNL?g`_-DFVlv zzl1~5*;*6*)cVoEOTr1YAy@lrHj$GyJ1%GLAr-C5>*#nnn&5Q0UsJoyYi&;6oy)bG zqTQtUZaWK!aZnnorf=5?n50;f)%P5Y`Fpg$d||xy3b*5vWx-+kUYeA3vt_~y8!JDM zw)1ZmwT#FN=KiM8rkGr>7yf*6uIa*Pt2bj?^x3;IXAY4%>>Pq?hBPk3NnBe?@8WEd zO~vu$@79|0ehkuuIZ>&t&7agH_pqlG{52l%J|@(O&RK%Fq~h<~Rc{Z?l3X*Jp7r#c zA<^48hw|MOsi@oZ#m7ZEbjT?ppgrbM#FvnV)B4oxYK%U!S0WeH=yp`uS85V_@l9z zWZJ7$Jrn;(o^*St$NxVSPmHU##Uonnrxt+r6l6tEPU#@^T}MYpBT)(P^N#)f?NkJ= zK8JXVNLecQa2F>>T0+4azlcpg|C5@G=Km=LeF#O7CuGZnVjXx1YZ2d@T z&_gMTW5A*hm$7Dk=Jca;F#}t2^IN<+pNBF~<=mx@EGYax&}XY+ra`M|N4xu1{+2vP za#}Xlu^_BFFr6*&+&!Da-Q@h)sI83*d|uUBU0;fKHdwFTMUMUfxT#3-?4UtF(m+iB ze%tApW8PE*hKzqNF5AQ#ZKJF}V#FG}2lEl2u*-e(CRuZwL|7)o9rv(*FZn#beGO9P&B{_DIl&YzfLnLggq$d^-ix%_&CtlPBr z=yk!}*Nh6%yt#2?CNEq zU+!IV|CqVr%G>%)Hi@zIgNp)fwbK@qF6p}1*cx44kHK{;hlW-UlwX)05z-Oma-sf~ zb*EywG1R+yO{A|1k3sJQ%To=L^!~@0PV>~Wlg%DoTjQPT+xw%ldu8wU(GPfjYOuVT zusUPVM9McLlzXgpW{$e)V8M^@U0i_IDd%O!YP&0?J_$(OYYZETcQF#Q?U5hNxBXmZ zZbVvfr(tG~^T^cum7fzIUCe>n4(TmoX}}5=ptH7}Y4%v2Zzvd#q4(1xE%MNu zKsm7IEH4eQoPmnhr6rD2)o0kncQC|3vUpNQXE&M|pqm>uZv5oIC(5uMIv`j!AwB{; zb{lo&Ew^CWQ;3wVOSq|!IFtHQtQMGN^Y~{urDF9{so>8nVSgAFLueVn?-OMMMBvcM zenp%WQd#mr6%f)N{9%rWL6D1gk3KSHGsUTG!#VJy@m%2N^LOQ=Gi0J&Zg6u}u)5_^ zEeNu=ohn%kHdH)=mwf8Gq-ao|1(%|d+F@M{JuMpR?#Ml2Xo@}@q8WJ}LK?J5e&Rr) zE2boS2#8GIwm)^8DvxU>eAaT`f=`t@+qY*wl8@Jlkk^*lBQM}mR#LJMj2}8sHlPx? z2Wf)R!r(19?>@rYUK69vaMaK+N;~!ZM?R-{sGdjX2c`$l9X(1#;8ZBUVH(Odn-ld_ zPM4de*xO%LRa0^tTNsb?XbCNLi>duoq|D11MdNkLjxVq3(R#%KySnj}j^j?|r9wF& zj~i1;Bw6AGo!T`AW_+?nSk~kf<#x(wQsf-2sg_HbFKZfI45Np)izLD|D4@VtcaOGN z>*b8>k>pg?VLx-T;*OBzgYgOJInaVf>`n_Z0TC+|=O|CWu^n7ZhLy?Qh z7AcAuyH7FsZLBxUD-P~?Y^iW{Yoo{mpMM9UO7!f%eHNR0pgv&@ZAKCl`*y-7U~wsN z1t9du)%5fqWEg(Jb)TzG5?Tvy-TrUt?I&eqpdT&(Aamc{omvA)JTTqdh2n1G)~$uu zA~0b7s=^(I6DtzOY<63_HO%=0hsfN_6C9G-W*Jih;UTS=DffGGF-K4D2;xrUb93lezYtYiGc>sH zKFyvy&(r8wC@H=wV>h2=Ut;vrv%b>PqZujRO|`|d8QE_eoi;5MzR{;Am7Mx1abP)V zeBWMP*P`J#ip`hZe*QPq$5WDed;{9;Nskf-rx~hobFH1|M=Jh4mR#=}?>tsJI$1H~ z-sa1gCh6Q@6*ohnwP-Egue+@FJH@vV0W+td?|^+b4P_5`sdCDNRCiY0@FE8=VpI+JXR2B3cPgAzfd?_>14TR~cef(~(A=bk%R z8%I)cgp1N>OT=gMntn-nKW8&qjRYPE*vMbPPXCRk^?!hagF<2v?R1D51EXmmK@4YW z=>Zl1eSc2KHo(1OtIp8_r@u$XPUp9c&uDfYcUWdNK;xhl8{HGL@1-Ilo~Cyk zYwIH(^+dkrCclR#7ZL|bs(TcJ1?)yg>^>gs(F$d&S4_U@xO{MRmB0J+?cwFCr?0;; zVNqY0h&)S*-mNnb_IB2w%3^!QnET>_{MMsI(fmCvqrNSlO1y@OjZKY6O_akVM}G0y zdxecI=`Rt)=qF^t4nW(pA_E9~bzgkc+ln6W?YKL3Fsu3to9#qCktWoj`9+Pir;Z>-F2W-(u9WIo)T@?E4>3ZB>Vp1yCyyhipu#@0<6iC+w#g~|VqM+el}MKxou58(JA61;qd1~X5dWA8;WK#wKy1wjx5BX)_w z6+k@Uu8~OvB8wyLI*>am$WkT{)nV3Xlq}MZe0j_O0;d4mWAw#`)vA<>zpOQp5iHEA z=K&h5GbzhRkdcAXLpKc_K zY*~n@ZSAgHFZ4`WD@U*WjZfFQmm+mm3J5r#| zB9!dWqI`f+Jw3e&KNK_@0bPq1mwlx@{P2g!G_Kb#;4Ig##a02 z|u8qILg2H3$QJ`NAP%Uim!?EzPsgYg<{0;1Fe|BAiU7S8-RXvWIqoyRx9;1kH-UmEih zs*am(Ho39o>#g)qw!Vw$7ALOrIGgS9P**vv87}R~vsB#6|^B?;YCmoE&7c zJnLY8G_wz&zZ@VStU z>N41khgR$o{Y!`LoZ~9rDK^86n&>$MKeLH+ncxyE!|T$|ZpOMLw~=s!T)L|G26MT+TNOr(bk=UE8^N=5KO_7d*Ljt_$W zDPr!Ke)RJ7*DOc#z;&%%st${3C6Y8BNI5a)GpbWP$$mJdlh}re+c`w7>q0UXdA5ii zDeEx!I>G%V*~O|?D>PAA@lC;FUnBJ=>5^wJ$*5G2}Z0h&Uz*f)Dyk;xX zuDbz;CnqqHTL$udhd0-ferm3(7n(2%B$3>n4XmO1ImSjn%yoHWApFgngP>M9 zWhG~6RDNX2G+sga;^JI$lY!hIyR6%S?qSt&M&@bKvRf?AXAQgbDEjwHCCK{UwvE-k zP8zd_$fnSilph(MVwsztGD`yS=oj1~ z%V*8gpX*YE^2KPh#dEI$Sr+TH&Zyt?XTN(qdJQ4r{6<=WA@7s+g4G_)X(<%?w;tEG zIWc)G_&0)k|1%}trf2Me;taV+U7&tboXKs&?SHR-`r_zy->xe=PexeX)PD2gRAj3f z_fEyA$^CV=q;7t=wemyt(XlHL?YEAm?mc_$+0j$S1^?!G{D?;iStupEY-YPGD8yEKk=nQNd zM0iL7HsJ}lirb;O-J+=SH$qp^dz4^{g3U?`K4Vx6B!a!O{`orh6TH_D(pz5S-4gnz zj)8%*2n_oC<;!kiVL$ZCRM_hZ2u2L=mJ8QYU78g)+9(b{&_e+u0M*|jI0BClA&~}Z zTjO84wCDf)8`nE3*ihPEp|b&dD-QF=HoChfhtOy1AYf`_a)HtyoLjVc2*uI<6f0H3-G~O~rqII()E@pSJ_T zuC=bX?95@6@KO_+>L3hKIhJ0m(~eN7okxy5YiLj^8ek*rMG&$4lE3AR{rP11cKZo_ zIb}N%@*wnvbz3%xcEyU;uC5dC{=wpl!ydpUZ1N+cO@vE_+|KH~RFP=R-_q}{S~)sYsr9+|BCXcP$}%E(B|i%iU3BlLU_`N3o&3tNn+csLxq z(0VC&KEbX3_3}=tnkLo-BO^0&!@Jo9dN>G?loP($CDguuA5M0(d^bN<{p-==WncP3 zUn`*6xkt3@=r{}E-~ieOL>j|LYj z2V_ZhnER6^qW#ZD-q5%A=g)sfjz4N;E?An~a=*TF!l)@6AQqjUH((}fhw} zq{5hnBPYW)fVl*aDgukRkRB)Q#jls@>7VPkb7aH|n6W?qb>hTINF?Av&8g)4cA+BM zSMHMOw(OK{(eQaMTSrl<;gdp1eD)u>6%4+axGX3W=_g1%Y>6I2a`K+XLGL4AMjxScES%wS;P!=rlSj&r{(4{R~0%2NChI^Xlpqpz&mg`CLVES<@e{83k2H-OIi4TBc$< zUE$pcrGW!}vQu?DHCMEs^*n85>-6x=mJc-l_+9zk=Y4PSo5US68zfv5fM%`i>Va1N z_{w`n?4!3vZt+RYsvooJwjZz9vZ1%pt(BenK)H|d#Z&KGwr8pRpl{f35E&66uz`Jj zQ}mI9LU=fAq8~qIf7C;>mj@)KFN5=sM=ic4aWsQuC;?0x|C?|hf%F$XUO4wyN zQ^vR}Nt6VcDvroGNsQ}o>-IXOb&{SY8?w4(HSnYS!O!<<)-ms2kQgwl6UVvXqHrWT(xU)6mOQG zs%^meln`U6wD0+1<8fD7Ra7q zM`n!`OCvuo#LN%@2(Yjv?}IEBzHC>}kftb1#WJ0LZ)~wu8;EEi-gg-ptnu|bkyUX*kvrhQ10 zFNFSK=EJ6E1J~?Y6n3AS;k4g)K4Ug=QLy zt5H|37|t`64%^+Dpwtz%Wk>D#-lFDuSeZx1(dxE#>5p9_)m{-%A2f^8O@s>rK5UH= zG;q7RHL{>@%BU{eXD-rrtWSVjILnbukCNnFxtvCIyfY!c>0BLncB7f?ol=xN7ZMZA2&=J!pAC=n=H?SK$|l z@D_~K>V|mQ%$|gvg0O8M4s;)~*Ln!fnO&kGVirIne7&aFD9jPf3#7LIvLu%Lk=}V_ zW+*?7Vi&)bYQ6_{4*;@H0s{vu$Ao}01FJkTSLv;t`FrcmiurlMtgNnW^vXSO;J|eV zK2eljiB>6w!wHqD(brm?Lzvq^azocIShuB#{q4BEoT2C`3d_AiZ8K6ftki5xT(=@e zvOP=Uud_Cqx;ob~Xy(XD=^r{-!R=sCBqM*=j%0jxUDi#$tIgyetR#mc;_RshCWML_ z^L+RwRoPcf7cz}X!bBpIqV*bXvuxXJ(lXW8)mB{hw<7g)Sfmg~ zQ(k+IBK7*to{H~gwG)+kVz%3lJPK-Gc1WI0yQ->`YTM?})~m~pUO2IjWqL#hlQ!hk zPH({VEtWnRKRQ$!RPrILBlrmFsUo>K(Ek?gw-*=Dw`c+OC2}e-o^OHV!xeAZ_DRk^ zIXc=C3I9Oyw5Twk#1ED#j3nK_AU^7htfe9Z zG8l2a^oU10JA)~(7-S|#Bl}9u|MyAJxp4ef9oKc8Z=;&iR5f@%gj8U+0oi@H7wSJd zM1ug6x{jueEx=+tRPW>P@O4AwNxq~5^Nc%IOU!lMrEt#1C2e_@=kuyjg|zbB=L5N7 zpJOhJCC7i6>$5z=U13qm!5)_I{#Ikr+mC}Gb}8wpB1gy}*DK=oSa1l@bI7~KaJ-$j z??2Feaxu%vaNOp^3bm`s$XHl;y{94Tv2N${&HO1g6wbyqr4=x5+^0)Bf6jw$O`G90 z_ps)WsyiL7yO#4cjRh#ExPsGosmLgFpJfws+a+T*WI~y6=piGbFU3oci>pYIBF%6l zDBJ&)wlWe!8$w5tovC-BfQ;9x5M|p1Gd<+WLiIIDBplCFKulQ@Ty)p!b^g`@goO=^ z^!Mn+*YgLmP_lRbDgwu)-~2pnF-x`xhijyY8rG`d>oH$j=-HOy- z@8$Mgj9KImQ=fgT>N0=7*d?vBvL}!&7siTuVH)i!p-Z5*c&4Uz${X2rghefeN$(Xl zeSimHvbTOR^L6=`mZ0rYF3+16R+QS_s*q>pyq@dyT1DiEvW%+m$%Y&Jhndpfy?UB$ ztWsK^ZM5^yXSHn-mh-~0Zz?YyOMUn1`p1wnsv(uPqdiSY#g(StqoE5Ts!oJj+FMCZ4)yRz&i4{D1*Y^ z()O_%57!w^&-665q?db|4A0qILi(u_nL*0XOeO+t0lwA+@`pqu^EZZ?XzoD&w9(e1!d&{VbL@R zK~ndK4#k3z%G=Fh`QiPy=42W@+!mgjXpO`@VpYv<>6@iK`0^<`pGM2LtfBI3%*i3e z*N;%>#de2#h>RA74_?jiESn%#-kfl;Cau*U7@%4Rj;EG%PDwvPQcY9a(bl;+)ad-X zmHZkx8vN%;Qv>(pIP{uB*_|<|G;sfA+^q)v_<+_Y$dsg`+elq%qeY7Rgq;MIW9}^ zPah&D*|Aa8i*VJM&3-bTxSzA0bKU#~`9*D}t&!Cmw3V&%dN^>$Vtj=H8HR6;rp0#z zI*{4WDCI)TZHLH|k5>n0_A4T)3?lBkb4B^B0|TXK4g()nsidO`M6#59kMKn*4m00?c-yxe`+)0=>CzqlFp@fp2mr$S@N#4Ps~n}nG2FZnNKqLj7&wz z?^=G`FVpYHtLN_9P;blF`&s7VT;<^A@fr3W^(u9(iA|AXqZ9oJb1kG!>765_gKiwB zcNuq2(VgA)nLe*juvdUbbGsCWfcxtOlO2W)$JOdb*HLLw9}~B0Y>50}n|3F$JmBUMyf*OADxLXqz5g567`gt` z^Q`PNS>nzIw*e;VL4o)}uObAx$PJJmXT!L5ch13X40xXaP9W5;B~gSIxb7`}Pn*Q%`!b7DI+||KUT+roKicN8B`wVk5#D2=WKwWNt)F z4}YH$6L1DLNh0+~aqZjZ4uVU~u4_wpE#L13nsJ+CV`{aa&hbVH^+hzu4Z{vZ~z}$5~SxvUf|Drf4WA**jH)~b1 z4n*+mEz(a~zrLANS=ilRUm~}qjbqD0Mc0XnVx}$Cmpgs861`jdh5WL8BI2pnu}SOr1}`pdcAm`bT?%*gxBC zJv;htYti&b^*HI6GkL{aBE@%usAyIgijP%m^yfYM%16r%4h?k;56dB0IlUcYRjaaH zC|f&hEEiQ5Nxy-T6-zkM@hpaDmP79g+6~Tz%=MJ$Hf{`auxNb8kt}o|bybL6w?BSm z)27s+q()vX!QTtQJVId!%IkD!O;wltIvi_Zk`-Am7#3^c)%Uzy2s0RT8y^md_y3cz zrQ*AU@$Td;Z!Y_guJCxBsZFhV{qK_^->%`%_TTTl^w0L43BiD3^6y|YHPo#UQBk|0 zi6C5tSwHBN;AVq37x)MoE$^pKzwZ;H2eD-bfw!>z{h61yG6&els#U9Ukm!I6GD1d&;EhL+_uuPQg}MqkrFw87<7u=i zBp!fYul&fv5Vj{4C~#v!`Y0Ia z{@_77CNt#VCn`tayCVV@eYoV&NPYtKEGBLPK>wbX_rGxA>B@CWc=siBsIhL|+=o{5 z`i~4d;D5v*tQVosNoQrT!>tt<$Z{nzvDrfR`t?}D`a?w0G4!&cPqY8^jlBL>G(Q_j zbUpzAC1Ab@@buDlV-IK)o*=!{-24#YU;ECJ|Me9m{{zowZklM^2+SHM7F5l~GeJmR z&_Qw(3`nEMF-{Qs_k|7p^)V2R>*q%>5y+Rq7lIZNh%fC6R2|s+wBqZjV1RDJb%A;g zEQO@gyyd&cAaH)ahI;-t0Dl=3_5vcpA5bB1aeO0P#FD94lpSFUsl9PnbWQ2+_klUa z?AJkx$J7?%RS@BWr0GqdDrA%)cf&RBf)wz3O$}(6`{Chw;a^xFxq{e6Y|n3lJg#!W zBTrX?%pp)v`Ulx9-6JFNh&l`3;zA8iN-pS?zLsn$82$PZQvUqe-$a+-Wp6oRf?s)v z#93e%$TVpV{&U4XGdx(jV&C>)N>94j%HN0r0Djh=0RNx2`>$wf3cpWrKJGA))J=Qj&jH*~}1p0xfKLPM{<{9M17V>j> z$+_3fW|(P^i)ocGt)5G7P(2|hw+=lOp$lxoJhto`6G#z#3LOm&t7jg9;lG#QVOg6c z=$+Wm!8mz>&ds!++9@F5gJ?e}oR}xMd3bbCFyYak2y7v}4UAiC^Anjk+uoi5ig%m+ z^yJ{MC2v}R2V*=CA0pBTf%zR6Qj=_fxU(d&VeA#caI4@34(Z!$!6JlF!Bd9a?skZT zgIa_8#%3u(RD#wjUpGg+A09tjn=}B;+5pd20=_ z5l$$W7@Qr5uSZhhy2vfBUWo^?wp1fdx&$W|6NpBip?@KoSs%do!4|M_5E_}%0iyz&U7+y<9rxqs} zyU7bZUS%PNy4D{&n7)@Ms^htG+C6kZI53HrOKBGkbBWsPg^e*vgVY=zmZhv~M}RyNzXlaFPMjhl{@`(Blo98G z;e!WoLiC^z!>E=!2)~vq9l+*J$1d&{8J+e3mNG)=tE#FRs71t3qGli>8VUITjweZfnYvS9?5Xw$biJ7PS6hO!kW_o4g)I0F%5nlfA z4lgEwyU+<9Ss*DZw+MmWY9uxNK9F8p{M=)1Ve;m|z%o{0Q&&hgiDL*k#+5JDJHjl2 z$$Eu}wQ7xz{ME!+1cS86Bp=85zGtjbl0or$GKZjCRZY>2fAsL-8BaDAIkg|B_H!TL zx%Q;$1r9WktfZ^hMFoS9D!rw%+jF~com-*-AD58L3Kz1)t*WsyT^39nee!H-*Z72t zR${}|__v??rWX6;?NqETGi1%{3m17Y=rtyBk4;+-$J|2CPsbt{RqWN&l+>)Pa_yQs zj$9uj$mF1*!9X)FB!R*!;F$#6>njG7VzL6XuDf`69s>GIxHd*C9a-H!+n^PD z*g?-m0j;bIDhnv930fG@#AMXOVr(tYc!Pt~4RtXXSh$;*K}yCkG|+Y3(2{QA zdxZDwY@Nres_+@?O9DJszb@?5YsiRD*u`2T%D};%sZj_@8f1Un#ZPBsaH%tg2C0`Lf}7h%N3zSX1?3Ky+t764=GK3hm03zBLz;jvZqp_jmW*%+X2v7#5#6Rp!6x?9B*&W#i&k3E#gACChv?ye8Gu ztem~7PB>&}J~D_diOGn{o1(z}8{0ly0$!3g8@M0+_`7l4dcG`%Lm>a6CW;5oc3m zfG~forkcKOpmy-YrxZwHU-zuY%gaM+e8VA5^<8QYL-_}W!EBz7sE-hr(uW!4KTBWY z_-sDwd3)C8R9315Nvbw#g}!=aU({1j zFh#%SQ%DF+{{SCrv5K0p+5D>OdX|Ts4F=Y(G1se)bLLU5@xs{i-Fv#Wqq=CaHyBu% zMyvKN&4M)qje{iYOj~VO9gv+GUyqOnS?3X< zT6T7Js4WSh6$B$WYMaT{$5M{`!iEYYPn=-%9K^^Vz1{^)PQtpClLV_tA?ygZ$`*WO&hKHNC(9cIHT|R2`0UEZG_V(2K z_U!<0V{2kPxRx(6dV3JJmJm^fhuD{xnHYihg@eK1D<6m#%m6|_L6c5UcSB}I29XxaeI?2? zAB+A7#&2ZMZ_x^p~cg}UL^Uv|Z zi>(`E=6&C_*7MxY9i()0{eZNCA>_DdP(u zt9qdNd-kseL~LOE3xJr}Uo7)H85{E#vfi@2V-~*Z!Id_eM z_(}k%8*MwRkRn0?vU8+sVs$K`8(DEc)^ZQ1m6!nO9nhUjdlOfjyn*L2+LRXrsqg^H zV_=&Le4bT(C4il^uFHf;OIw>EOxq3E#!%Y;AG8N{@GH<;A3X$YAi%LJTiGwm%iK@V zq8ks#6QoSKCSC#!Pi4*4ok#x^ToFaar`BugQ(uXVYD9?u>wwO$A7VrQ#0^Yx>qXq- z>%ZKZy5HVC)xJ^xUyPb;VQr(lLJL*eINZ{Sw$3P+Ux51^YR~jL&MPr#+z?x~F?}k9 zw=pr5LdDWX`T2#*%fpdh=Y5ls9Ba&`GI$pSD2jT~y);$mXnXllL!p#^c<>XOcz_ko2ss7EWTYX1vr_-qf#O*9dtsP0%hFE8(N zaIh6$IcQt}!yGho)yw3^eV^y1&l~Dl(b2+a17&ouN@yFH8%Kl73;Jfz3qWgDf!z*x z<9#OJEgi-$8JG^`%=usydpRF*-NGi?DJTOJ$y+5vp4orMWL%E(Kpiml8xDn{JI2+0 zV9E}pcJS^n7LZwM2Lay;m=C}#%G3xAqG(-W?jezk+}yvqcjtFQPJ0UxAkz$3O$Pu= z0#jv5U?0ncj!zEykJG-M!dfw!3xzSJN2cKEc@Y%UFXosxQQq>xz_y0Hw$7p|89YD^ zAm0vDKKuJ;-rEnLs|jX_Hu4Gz%jHwHR{U*1UO2WmQ>iBo)kxwsBUBdKe-Vt>0ND;5 z;I}Jr>Ay~@XYYUinW2)s0aLD-xAny`Pa&A4RcE3$q0+b5b-%F8-2y>K0PGkXUrebe z;`+Pyt>i#ANsI3PxsH!$R(5CkWuA9uHO|y|Tz5^+9R5o;={N~4$NP6D{QGkyjtw*L zK(c=iYIFe3g5m@ybif5726=WEz)2QO!t+G|Q6oA2Wbb4Q=%X(8!-bPY-gZ~oKNCAQ zSAgU&%%Q!r(ZNHY*a5kVgn)4Jp=@Uyqq8~vlx=wI+4VBs66rN*+Z=`@7ekJ$s5uN~5N4tKcbyN>&7j0u*h0}XP@ zif7*Tq}{Gte0NTNXY}w<|4h?`2k?&@0(Bt}!J*A9Kyh7D+4(PLuI%IA5P&5}HUhN0 zsMy$1>o<_)jtDSK1>MGIowYzmH!tzs#gD~yOH*8+%pD%qyiIhn9(hJYTd>&4yhI@? z@KbL|k%`scAKB94IygKv`THEG&mZSDTl(id?q_O;7^A91sK7$Md28l2q3z{Z&*2)o zt1J25v4D!Erm}0LOeQ%A32Bch)x(D~hhxJ4bKn6XVj$daq-funNk{`Hf}8Kmla;l> zFreXUK(Wk8K=tJ6Qu!z<)X{%_$?V#kD2V{~aQn>dLcq4T$ZwJo=eC`}1~y?Khvm=zsW=>DhyE`VQpMv^O*Rmr1WZUd9Rg;STFDM^ z2xfr%nc{^EV!3KfNpv!R;3BzZ_dK$tMI4{HGEO#%&;eE3Iw)bPfR(R7CQ=V=rhtoHPkp*Hc0!MUhv}n{2ZcMHLi7uP!WSs=;=5%;kRMZD{Di?REY> z0RIdKXsp{$FE2&F=J??10!3H6gFwcdhK@-xo3%m6g4f6E?Y4x55&Chd^1Ol+I|HG+*EEO*)qi5XuL{ z^y`^lOmLkdY98 zYU@I&#e_nB8S#GpvcK5yG5y8Mofb>XOk&fX)5v=FW}gSFs>rfpGSZ7i8m8?5hF&ErvJpF>~t5`cQL&@Aqx=iZqAUZv^;te~FI#U`Kvt~4dtk=i{Pp3I2>{fV2c zP)a7#Sl@=AZT4JJ+e4jCtuhZS?2wS*HpG2eO_da?s2`m*@cEl5t^`nCa^NMqu%PEe zTK@0RgZ=l>^QpFWneze$B2VYpAy?RQ|~lCDJSPf)JBs)>*%PUnG#!8mZ@zOWAJYk z-ikTe#uAe&z{f&8nvY$Ducgw1Jk~ZvL4V>q^8ua4S@ozULetE_JtTC8Zm!l8ti?|5 z_goEt!XTz1%9QOdH(dc5tDe;`8nQaTo(3%;Xn8?wv&2r&0-e)>Ba!k2hpJDh@bQH? znfUNesPdX1Q^mS&dR;CxrP~5*;MHS^EzgkBs;@$oD-sWh=g!5bgYYI}%`~ib&wL%X z9NPc@uUT$Q0iYBx1=RV|U!v>1Du#ccAV8=^lknetsIN5R6M<6ioxE`>Bg;`9D<99v z&7B2o1A8wt+aM?X*|XBx>EGG((K?@Cn4vwB5MAeycVx(KMc)@-!>M56 zq3lbxtx~oatdQUg7_cMp6BldrT!7NCbT{U2M3qftq3i!N-v8ETiqb0lgE@Q;t~`bx zfsmWarxc;mV2(9W1L6D=zwsQ{ya4|W4B&_f@P>uQ1Hj!os5y{m2uLt@6>3z`gsnhY z_uRoP3yX!?ed!xoT5o->!e(Y_SSgRRV0k{1+N=O2J36|lqVk;DW`BPcC;$l?5_p}h zr(KIw4|mNkM@LU4%+`Js!Vu#$LIZI=7!?9LC>!8LfBl%FTw@(9RYP#H<+RJZaOJW$ zyrV4c{M#O3|I**zW~wWN5;O&B0D>G_meJY0##|rDK}QGMP!1)2#UYmbdbBLIB#5!nBkEq{aZnE1H}DHl~?m=XR3& z_h0MJR(^<2r0lA7vk#%JO4F~aeaum`wh)*}AtBZ<_)mKp+Kc6NalSWh^ee0!n=WK^ z(u%0W&5@$9aW{nG-ne;K*!rJ~yPAY~xnpOxF*60~`OSE)ho7lzKkRx>kNWw$?JY;` zqkjN^0IUTjsV|UY*j%7+a6g`w)r`jm^%||WQ1+mBuU;>h($qOtv6I#lDk}uqi$8pf zA9Sz~Mu}`SdA~Nf2LFp_`meF$fWs9o!$+5%v4>}&^8N<> zz6K$FpY@Mv>fxT_-f_Evi+3Na@LUiskJoc8{lc?RrSnS|n+zVh(Fl$dk5rYzzprt# zX(`NAy9ph!r=Vc1^BTQu!LJA<(|=o59RzxYNppNwWW&HnWbtR93{m~h7}jPC-N_kj z%?W_KSN6$2<~_aj7gACGY?mD;Ur;tuFP492{JCZSB5~leuHS!3#D8up$MgSe9e=+J zVzTr<-&H*Urq~b&?mu4#KK(QPsrt+BWP4=h$>)v}wZRF`^CPN(8H)~xr}NQIl=YvU z6TFw;qIK)~Kk)xQ|0?>EJ<0#cz5kyxrkQL&Qw2=QU8N=L^z7=O!xfX^1aMf561MV? zhq$`CEzptsg%=xC^da@mQzPj_{%hKRewVts_Kg&lR(bn-M=euRRQLg5m}L_`HQA%@ zlg{rfNog9ZRrD}E3f=f4D*Mk1BU4w^dugM{uRkkMSyct8eSr6lYkqY#EUrQj#y>Kw zrlEIK1=NK9^R-w01h382)!lGC#Bdt|kV7#sabW{}m%+8W5!rvX;Qw5>#imUKCN!AI zR-ZH&S}ij4GEM$PV)p;IH{U;IEIt}VZP|deBU!bd0~#06NkTf*r*H|>*ZKK7wlymP z?nirwSZZZD?ZM~yF{!sJ7B*-c1`jsa*bBSYS7l$MQBBoyB=zF*)zOauz!Rno+`B7l zYAWp3iW+@-E$h}DJG1iZ%kha2-_Vy=9(~^<AA_hW z@Q3^>eokdNXDa za-oh_)23Yg;=GdTbf(|%3&Fnmi6>e2gb|f+<(Wl*0t)Ka8oIvgmOAV3RD&V+yYdMx9Rh09rkK~F`v-w*Hafe0!|ego3qO~ByXh_>)weGn z-rMyq?kwk&@D7pMf_ATru{R37emTGEFmKF=H!HF`O%zEhotl*T$}@iKWyjwK*W{M2 z#S^#t0Mpeee0_|waWFM5uHORz1!;+bIua9IimJ8@BGnp6=FOtG+PBZ-Z#>Ek&mj^}WzQ+S zjvN0d*_?kbC<{pnk^WxyN)Zp%A%&Q35BENf%z3#YTtX=+WPb_ka(?7ScufCx;~uzDA4QpYpt3* zd)GDg5IMr|S4*i=2ir|?uPJIHlungj45D?urGG~tGeTzW=N_Es#7eoEtQXve&PzC^ zE_6;^!rP5nL;s{UsNrw;JVHLY_gzD`=p8w^C*C`D(eM+A)XvVo`VxX=j2TQFF(vA~ zTl8HhNPR^EmG75nVn{?^VXlynV4R+x-k~FtfA!%*pu^>me0Rc;!M#_MU&oCvN=S-% zEL%Qc5mjeM9eNs2ccDIEK%t9*I^rgG#!rnM zjY#FMVr~(tFX!#*We9w7;>d{Kzi(-KQGpQ3HFX+0F^2SBjL(JN(JpV_pkpxO%wKCh zlhWA+;9n~gRT>54j2txuM1V^KR(VdTifx5(xMUxJ;B*7OQM(I-1-)?`+| zno(3=KLK#nR0draijN;dAv+>l*X5qiiJ(wGC;(Y&p0)P(z%2mN_XzNmOhV=WXBOD1 z(*fjA6aJo_*KB%X)lE8-y4R|>5G2=nzqGa%1A%a?fKE;bKlQIW)zQ-G)Uhixb?91l zDbC95A*Cl}t*v(9jCl)7E-rX@ZGD@Qvyw(ona=aq`yXs;E$P>c7UOj^sl^3tag=yR zHVZpb`_5@*sGk9GskQ#fR+IYoFORD&6_Eh;eq)uFSzaDSsVXi{d;Mha7JhP%zWn0X z1mijn$$PtAfi%TeA?dso*$rC2a2h9Nn9P#!p$!N%rU{ z=sO=m>vi$rr_d=KYlO{oKY9Pi3z?I)vG%~dT-RgIJsgd>y(UJzzfd!uuI=;YKF#%B z&-VPG40kZ@ZV%z@P1gjrG|=8R^OT+#QYJ3jj=}P}DfL_^)+?h`sJJdd>5V=2V4eG$ zw{KeqmzXOL!=_|wc4A%DVj~hdf2M2jPo*ZrD8c~%I>q@?cr8yj+Mtu{jN}ETJ8!lD z&aa1kS3i#@sW$MdJs&`>4#GJ4N-W;Syxl(AJ`5H}o3Ly+K*op`d+t%BnWiqcUNGIG zoDWXQA$tGj_B-ExiTLJmQS}N+#ME3S=t7s5SKjlHCmaX!6)9Jht2)$^&qbYFuX5Yg zoH4b9QtY@Ejv6;v?j^?7$F;VcV0byRLAlP@j&k0ofiE!FMb7QJYso;9q|Z>$%ccYqIT!U&EH9w z8!w+Te~(DbuR+ksgy}k%ARPR@9bS3fubfKpI-U>_6fNBvD=gix|G)}#n!z<7m|Q}I zh3oP7f!VtUpmF{w@Yrsu9?(Y4Bje(*pX%ExeTiel`SbvMT-H#F+DC$cVKC=Pj0y45CMdeW4zfs{<(iTx08U13{-Uy~!75YS5?;KQd3#fs|mV*v0eR(R~t z;Ya;-CwE|Z(du)9^G%fl3EG9>+~YK`vVC-dUR@Hmqu!>w-@Mz=9c zjU*ZeArB}Hr}bk&n<^?w)#;h3b9m%0%C$}aUQ2hHyEvBbNBd^EV~@Z(lTJ733?8TN zVRTAm1`nQ4(lJn&a!fvRw-jlfUDwJmnZ>_0X5JqU8}v)K8jneMS+V}Q+>FUxE!J2- zlg<;)`rA1w#rSyJ^*HuhF3!!(v5E`5S11c@I?pJXx$}{Oj}K$YiF%)>#kJ*Ch1<`` zd&qOEob)CO4{e-)*=}`VEF+~4=CeD0rH6D+*T#tEw`6MkzzG|0aGvvierRpCPT($H zUoXMYy*XY~2(x!sS}jzF7mK;r-7z1!2U^^y$V53cV+P95m6S&bT3Y6Ols9RP4tNuK zzGr6<(~x{AF5a374hc~}Q}(H2<6={l)QtULwcFd&#>UU3tUZLcoJ4qdcyHe(eE7KI zO787P-MTNgx0KRHEZxez*lFzy(n7>;ZK1^a`-b9N%PeYTRw#k{S+U6-V%h-peB}9M z>`sid@r%l49$g#BI(=nRb<#HxV6a{}8;%>#5aVpV@G|MzbSIt}XK(0!zWFup5C26T zJVa&*TdBzuA(KMtCwLki%~S;}VRXd%yHj1Xb!JApf2kW7c11Ewcpi`DAKOjq*-({M z&1-(Jo4UVe`tl%3IiS?5ozMj_`4VOc(U!bk3ST6m0oDNN$U}Mtp>bO+failp`gOhM znc-RpAQY&mr~tl8*#!r$h6!0rWaQ1K3Lys&p3agU&@7`_xp-vWcn!cK1a|GZb?z~=Gh&GJix2R?D$f;MJor@rw!1_JP(DX6e3_nRvs5dV%?V4v7_YID zq!PT6vsS9YhOYK>1Aw zMN+c7NtO{P(RjSkt)w)r<^LgcT^yzxFyYzK>-9BBz0kBY`ydr7LUA zP&cl0oC8FwQJL_8SCe_h)vZ^3N{ow9zdA~+L(>_$kD@d6i)5cM3(>+bW8|1-nd4XA zSgQ1R7gQ(8`#dNRRsoo<06IXOTNanFoFh_ z7~GkKW*7L-4YM*mO@TUWek;M%qVi(L#qeU`%QcQJjhbSoTkK1<6Y<-h%`r&-dUnT; zrw^{L*Ow|9XlIVInoRAPul9_F9LHEYr}LG3&hrot#I=o+#w}wlrVUG;2x8(|u65@` z0gIm4xpMF(0|?v#e-6BEQ%G)sg`o9-FsqT}tBScYioDH%h0LBrQMwqpmrAxAboky= z3sF6tPo9J%oC=e1=s(XNxl4Js=jX|6mf@!?d!+xf;-T6NaqF4-TXnNOa)pMMIn7#C zmi!llm(N_32xn@h+gUm5F&cWVd(DO$(RjGcFgEgXZyvifI*)6*_vAGJ5EMCBh&~X*2m{~)~vs^gl zX0vyP*7IASGmWlI5*b#VQunIjM2U?kmM?Lge*YfP+*%RVaaxIUiw;RAI37@f5RND& zFVmq@yn)=o(50*K{Ux!BZAZX<$Q;@*pDcTFxYo-kB0}lYcEY#tkyc!@+*4R2B@BFP zEZbR_iw(P*8>Hh7(6)oAqZqJI$})fl#ve_X1$A+3f&f&pi)qJXF*h_*929FHssVF2 zclT0H7P#i1eOds3Qwn$>AV-n#oy{c()?@wnF;&v~(Lbk|M?$Zue7G#LxGez|7%E1Q zb+Eine~x*M+-d%8()}eYtX$w`9H+r76FLO98nUC3G3XAACi}s357-TexJ3AICGj9p6=Mx5+au(JB!EB>;wTk zv65Mz^cy2Dh=wPPzJBUZmTVt{S75yUrH=hP!)7Zn;p}6X6_hA%3)z)21*xe^{)BYT zGuziV`p&RxGWRKGp9r@VuGHz?E@#RsQb(&5YeOrN3Ug%!W&}}###ckeeA?F%ZN8}u zVX-k1-ZzP>L{v%+aNhoqr4o&GP|e|8t4aBulYQ7|MHFpLBTjsu;+ap%IpjAi|Apzr z&;!lp*<=Ndq_WEdnrAuFGFp4DdBbBFlL*W&*%h6a=C&4{v<*TzOO)Jh?M1GzCY&H| z<_w8loo1qc2X%Xjd-A*ykB!QNIMG@uoujyug{wkpl5!jt_aw?J={6zfN3pF1Y8HH5 zq<6&s9C-_pEE}SMj{*qU1+GSduzzL- zB61H7Cy@%tJfZ%FYvVKrmeb##3>WB!_P2*hgGFb)!P5qn@ZU&NE^FJnNW)IxM>;u!}a@ z8+dO$UhB_s;*w!BRep)+F0OLj;JLjYd$L|Mcf`t`#7_^X@hg`lY`pfHWYc3m==;Ty zf1QcYAUr=T%{%~q?cT%?I8{7<;qdEMfo2&7V94g_Q(=n_A&P^FTM zJu^oMMDjWcIqmY)WY@n-DPq{QeVvx(jlu-3FVdOz)pS)g`h5=gt>JiDv9}2c9fbO~ zIEj`!293lfi#cf~XUVY&-{J6lsI$S1tvP8xC|r9!dY_)ordjJZaR@g&9`2v%N~vIp z1epIr;1&hstRl$P7tk>FjrjKGVIe(PFbHI~_2) zHauGAuDwQKZ_mP_fT`2|P6eK@{UtfD#OWyM0aVKSaeBjex3C5(?TrKq3;?%*Xo*>2 z-k%KX=T1eT(&0~s6uJ&HM* zIFT4d8YaIuxmt>lSULYxx~IVroyVWIz^QfY+cG30u3y|Ld2Q=e zVJ~ONgpkmUxD}k1qY7V}D05}0XVm3{Y4;}6h&&iLm_s(UVu=~Tu|M4KAcA^SvpF2R zdMV!$DAk$FMJkM>jC7*MQjUGA!UDrZJ(-Py-`%IySc|x`tsD(~z*@ZOSbW6g&2l1Q za-S&y-@G_{Q_%Lo{iapcKy40UvMg>KN?hff<3sO$71?12DlZ<66ah9FFtCwpVl-tVKQSE zg(gl22RZ|G-IhivnIPoN`bei3DVeZ?RaVZZ*wiq#?lPN^PsR0mFF=v{ek{=4>zfYQ z)<98jBA@9s0y~j2M49VFtEBAg6GzOHETR%#b(_bp2~70-r^=BG&29xo4B+d) zcTjLIssi>%L3c%tO_Bhc?kPJ^OIBE1Vy=T=lU|RH=!OX06aD;jvJ)$Pi%S==p}nrx zfQdVMrSm)85e|q?fR^?ZP%nVoptHAk0myLp%m;ixL6E0jCcG6-r zYnN=_!tf))*!5fH6>UQxYP>hV!23m3=`clDno#3Pw+5Q??Oj)G%Pt?;lUs!Pr2Il` zCN1%=hHQVklBK{1vE#;2ta%>{c5mYMFYebG%B*o#PO?*6mckJrCWj+D(W#jSkUa^*uQr?FEC4431uKPK^Rmt>l}u*t__Sv<|$x4)oi2%}UJw|O;^q>Jchndq6ZNoC+*3W^WlEP20sXv^L_ z#eJ>u1?wI9%0BizIHIKF=TLrG^7^c+r9GNimCRT6W)y=_?EC9RR=!-a_60a9<5Sbc zWew$U&X(5+P{%y=JUeI&;11;&ejJ$?s|4!OUJn5yBSS9AP#~+3N(G-@pILcZ=k; z#$irY&OOSzhKJFRs(31b`J!?I2M1gf3Pm`VmhY#fLQF;5Gw)qU4l`#}w9+u{DybW# zZGd~BZjx0fKeg&QcBR1F$`~{U?vKky3A{=t^pH{cPJx?KOV2mNibiERf02;77}1n@ zYj1@M?bhO zSuM}*h?xMe#DpSqaQy?NR&3pG44^!~B<0kS19?tBcq9ru?I$AV*hJy4nM1n`t9+mVEMS>J9qB*3~ISx{rZ;+;5xB`HD6#w#=V;LrfBAA z28$-;Mcy!?uO6n_+4<^`kQkxQCxPazx9lONCv-s^n#G7Q)U$IKwZ3~Td(Eok%dMii zFNK;UJ`#4GVFkDhl5;)+LSaxB=*!p99?f24V($u~-l~miAE$o`(6VFgHHnFa7vmPD z(JAIl96O|luaGOx4W-S7mo3-a+j>GqF1zMVGHzf07g2rpUC_(^;CRH=u<>0xl(X{e z;{JWR*X*)Vd9ww}g16~AfXkcp=Vv@mlG|eijZnpR2^!F)#A~dL5bx*{ReR@|$7Kul z=Ja`)UvfW2JjE$8=e2Qn;DL-nP)gl5R}78JHQ}kv2)q-vkf0C-17LnH7&rf*o4^iq zY=FBb5L)IRs26LJPGb;NZH!b%EuNPpT_UA z>72r*R)#<0Ef~5Jjx09e9FM)@k@x)R%2_XAnW(|pzMMw%8B~NW)hLXyeow#Q&7fZuFlf9wBB59)?!zd%x_ErsFaJsi?sYe265mBH zmnNy)I_pg{7I-w{E+W`EgU0?@5}jf!WXw^_m^YNeQGZtNO zoXSc{96%5{+O`!uHaApI)(8Ww-uf8gA<+25^KCfNdR*G~F{RqqY@g!p10O~8tD{Us zsp`8^<#ubG%xQ)|r_x%l0}Q+Y-Oy;E$?hfRpB)aI9pti+1@y$iV5!PYV|eZ_z9i*9 zg?;<>z-p-&7>&`>`+j{+gd6DRx7Vzk|N4g-Edi$ZhlXHa6-n`W{{!6nXw$-N4lupU zfcHAN^QI={GZU>y+SsJxT#(zx7!vkEevUaPCa%XJEv0J! z7Gmb5_bs>4uBj{N`A)-UM-|>y8`-?2e&f!A1oOe7M?c8C8HJ_~^mQ|3TUXP(8^2ZX zIZ#qVa3EFsUNxH18ob>Dd+D;&r*4nAZ@>NgIm`%|9Ny7i=@%Z3Ye0vC%PJEyYF7pS zxz!WdlT2oJWawql#8N#M5oRo4R6m=kTd0ybcnZwpr4(4uD1fr?A@pqJC5hgU&# zuA9GPKr2&lY5{Zh)|e`O=h1U@KsD>>d49<TIeQl%s`NQFn)_=V6~pTP&sfZs;lRMUm8&MkH{x&Uwv@&=Yy;;`Rj$(f`AD@kyy zYE4X_ZLvH=ofmbfa7{lANqP62*A_Sueaaa`T)pp03W)2c6K(3%{pc`-q9Kym#dl+rdYfL zs>f2>D=f>A0@+Z?E?SintkPv)hR%YE9j-HxTW4ojpmRAmB&fYm5!|&Hja+731L)!N z%)ze-5si8f{_hSea<-hTJ4bi5ooM~orUd4@BU|d{vQ@L{vu=#P`?Tv-9Rc+!X+4Z} zt<{&bK8-rE5*E9DMYXu;gsoO!aP&|w&GXB4S#a(yr9uvea+O~Fl?mx}!Td)@n06?z zY;J6P_7~P)iwHY6vZI}!Y|85)$8luj>&j8ty+WSrcAF!1^A7He5}pMQRW)d(3t4hY zb!gu`{>E|%jS2d~IlHu9Qakv0vy59K_O-n5={iSxYzfy7#0zx`FiRerZC{pdNwSK1^``a46E7-S=e9yLpng#N3UJ&@E7Q zDEn~WUcY8wP?1(0<&rcZM6{k&R^!Zdnf&?lPq*=L9`K8&jA0WT<=!Ynh~M5uF{kWH zqB;Y;C>OSjEM6z_e`6;ba?q+=VyL5PseIgePzHIH_5Jyokh=?_%SU7On{Jin*L0iw z*yYu=iJqaAStWeWD|X7^e0kj}htYn$-eQ#>5@(Aw;Ly=g&CIyu86*j3k`%Z)ircjYJC4T7PLYULF3VG`xGnLVR1$ zJrNu8{P$)R#pQmpd03I>7j5yJg-n7noz+MaR?QM?Ozpzh_;vGLTg^1#AtqIcqGWzs zvx^APtt=<1D$)K-BBaUhG72`GGkQfz!4kLP(ny;nMfwXtv!=zXAeP8)i@{@4!faa+ zfppUTLE(~TeihhoDQjysG^nsU68w>~{Q-_6GSFO@o&}LCudS&T+v=_Mq-H%H zZD}bLyX>F`O%t%5?g0|Uco7p^0Hnvrt*B3cn$4~r74*zkKiwflH1~D-O^qVmh19lC zE!ZY=j{Wk=iWU|00^lEDfncnIVFp;K=p5ta72#Q#zgMH*HHTL0oBR>bBQ-4WfzVp0 zsIw{L^z{54ALTw0*$U=JNlEf^*dD4Cwlup1+A+msp7QVBeS=ll%Tgx8v2RU#)2il+N)Hnr!z zuyw}{rZZ54QHwow`2Et;YZwd{OmJte!$r8 z?n}zQc}U_S84l1v0PKTGG%bx3KwktL0m;_M?R}EdZOm?|wHw-WO`pnWIlHNjOsBk*Vj0z5<#MI- ztSwpz1#0&-E*nrx2;NlNG(5!NG9UPsRF6M`sHx}-O$=V>{?otmUH&o?g;DX9oU!c> z+Z!&jjC*ER9SgdBk<19gF+&nfxt*Wc>W-fXLXMK!VeE97cEYpgg!3)DW|gk1VtU$R zQib)xf~`+}{4j@U2TE;j5?~eSD>4Z%X}@_Xz{we2c4YP9rSyFdqF-O3L)qB^%>C8Z^$K)><`=r0EDjBzRu;g{4V34@LWtR2lj_t|nU zQJ`0Y37}S2M4*V$I>#DxrGoQm58<+TBuOpdFl;e1Y|JzG-JbsL-D!{B*YCkK?n%9p zeEwVt(FQbEr-B3QBvxZZ5y{CtWU9WiNk?B@xU{DbYdvB;SDSr95Wa6o*DJgSUT82ew8-}&cugKXYME?Iwih+h$vHX860nRJTN8 z_*>aS0ZGodAcz|4h>7sxh6Lu`P<80A#aLVbNsRb|>zO9Zqs`wFnI;Jg38<(O2h^fT z+>pA`XR}=@#ewS6&`ncqt~D=hG4ZH?+upfZ)M}B0#XG48=ZCHTpZZ2ZumXI08%v>9$gg2JeB)>3g(k?Y_x3xde*N?Qlh_-vLCcFoqKi59;(V%Ea(#jF@~>WdOFZsrZx3X7o97C% zhMd^R5Y*UAl@T{f{4+yVdRy$mJd!`-HWYX79!+nOU2R)LW_zwSlF-n|n(k+zN)}Hs zT;wyoOgM0=K^o3^G|kP?$wmUted)}>50|I*`n0Ey*tPl0W-p7!>y5BMmXuV^Q2I#h z?1#FYsgn=qUbef%49q#d>utz3M+=+luP%h$w}l1=cf|{Ezs6Du8nVcej*j6>UQKsC zvL54n_3sbO2Q&whU~%B9!7H+E!^-J{<|2Ti+a zJ-2ao;7IhkP^FXA<&%ei?Q9n~an=D!t7tdCDqq5@8sx>4KtNtHRB%1Zo5T5jh2<`} zPMmbOg7nVv75tN#E)s*(mf)Ws``5G63Sg`(s0xoKY@jNxlLJR%!JHQoDR^0C$qy#U zA>{kFNoG7hlrq%u%Xfbq9Bc$4L9m*-SgJIc(fqpbOPXI+jsossHz zib@yRg~ZAd_o=I5laVV?URm9kPGx?)H)^-^`)iaJ0S2tgPdX(Kmx6-dbb{=*0YhgT zRdHhVLm`HYX!y>!rbv2Bb0-eGF;ds3J`xsgrX$}z z?2b)h(NZTSwS?llTUgzk{&iJIxDbnT+i~ybv*pnA9?z@hFNIS{+9&n;a&K;<_qRVE zf>PjZT2p%AdF)P@xq{zv!WqBcma0N6T{aO8)g=StR^f74SX$*2BwhK&6C#wF793lq zr^S4$?oDr`Y&Vb>WYPbDI})0=?w&Lx8>X49%6>I-gPQq|M-oZ0;t1-Dg`doA{)0TB zPrC(4<*l_qEuN(%N5@3}o=A$q(c)n=hseut-sVqJ@OLrtPe}s(_4Vh65Uk9dy%GIK z-8|C15nq4qsTjmQz&ZX0ru=XJHsA6O^tV0v61M0rOfz<<(%~8a_W;Ivu`7oPw2=X`BJ~CT{rG7F)3!N1|I7_ zL2Oq!dX}z>CJNdt#vItb?6Ym<8Tvh>Rq>ua5X}3;#F7WT-r0hvn)NW$KX+?-E=AKP zxp3GCX0OCl_m&0^f{Pqw9f2R)L#1S~)-yb^SJjCjoyNdf41kX->cWhAoDc#!@z@b~ z!AcshnfdR^U%im)WZ~vfo2j!cgcPvXx}(7X3c0BpA5oWE4{o4P!0C=PHK$BhR&Fs| z@;U>MoAa{b|-9ec2w?cZMz+& z6xICRy51?<#jGWEv@n3kcTMfPY0vId!W()g>MJ(IR#{T-TZExBh=}s9D zhHOtNE>1Wmi$Z5t;c_JRtPc!Mg&frNKVHGF8_{oYt%u^%O z8IKeDvjdXF#V$a@vdMNXubrM2i#yCQsA-!E4F9nIoAK`AlHTgT#1XSn>LM7@3RK|| z;y&Wy8nD%pTXc-&WQ5JV3RiVxi9U_hAHse4LT*M=jyq!JtHG+0IXT`=QL|5y)IChXv zA=(wzjrF(SqA5jt0vtPDs$ZJ+t4b_;wMXryUlh~MR8<%Ss(kJf6|2LljG{8zUg?dB zk8X3!-__WzqA_v_{Cz=Smx>+4F*E`jn-$ktaP5-Yjalv+ah8&`wQKkKt+SSqOyv(u z`vr@qdcBg)ZEV%xN;FG{7$2Q}2|8G_K1XO2EFW47**Lf?WL&wYMxN@f$amV^{Pe&z z4ftrIm-;kT5fd}LY$WowHW>69mtSW#!mKDTUcB&|x|+2XaO4+G08OtIrw9m1wA7}2 z6Y<8JFLuhepupPLIEk5W4!UL&Xp;RsoA@4$KN!3BLO`kY2e5t=B>r%7!yN#!k*X@6 zgF@JjXF64La^t@~1*Q6oLyB;eYb?dFmRK0bo9K)9~AN|sruQMBC`J* z@9M@^lZ4QuioK0^pli5rysk@`4(c}Mi2x)>C$%ypxucAO9PYyV_d(^(&!5*SrkeGp zw5LI7j2kbn-*7){JU{z&kJ);4ss<_vvmAcF#ushDYsQ+G(;)ff3%fw`@2EfYyrxIy^e1EJw!yqO}(0iIt36W#VHaJT91_>{f=Q>5ExWML;@_x zs>tZ&4_>(UXWNRp)F`8VZ^_&+YY!6LPeHBm0nFgoyeuG67dxgPu(UEo?^g0kwYe>r zwioHuNSm8K24s?;?vgk`8E#>nfZwjvF@?*NtLCC^?i_VAM&<2;$fp}u0d-3~L*z6m z-MEe}{px(Qc5AaEu%SWwZKYjYp)UW@>DjQ0?{(CbB6K3vE~KlnDBTon@hY?0x9wIp z%*KqB9S|XrSM4)anKfsQq;S!(2bD1A&l&sE9RJy{^hShp6v)j~+kBoCtHAZehy7_2 zAJ_I}wktbn&n{+l3qep~!Eh8vfm#&z4URoo+164kcF953G$M#hP=a{m;+IA*_iGa2 z;JO__SD(1-PWFZfJEp8EZycS>OnuS7BK)8gi+)bKQ|A8q>)9Udzcc|gK+j96K($p9 zBni1(3N1-{$(>aSF)5|Wf!!0(cVfVe8ahgC^E_(wV9--sU5dyQf}(DM0~|P8aBW6~ z|Iq#M2qRo_DlN7*>DT)qQwCYX<@V-yArqED?Xq`6tL*DYcL15HN>cUzUSXYpK{&Ov zCXT$nVcWq`EExUpOI)Ip9izfZ36wQ-vfQ$@tv6|q3V+?%y)E=Sx(zaisTtsXZgPe7 zsHs_2Rxwkmrrc9#nP%7$*1l!G-Ew>OV$W#UlB5J*53yw(RRMO|wi8zGZvftMsw(E~ zO`JAT-EaiBwqlw2Xiv7o4qNJS^!-!{uCFK9a2pyI*6TPMRh+r1?qMF3yp7`5o_RH(sfZ_~@BPMOj{gtJbZ^Qc> zUv`7lSuHZg^7dyao{;EjbjxxdQmXTpijaiamYBn~tM^PbNsL@?cCQZkz_k`YCRYz! z589kxcfv;BOots~Gp^+{RV7^do1_JYetF0)o&xqEu2bZguC1Vlo2dOk^h|s*yOf1k z*yz{@f6t(cgA0DmnkAGD)?c=(+$L_R`JNfh0s|p@#oLKuW?VIl2x_L8swQJ(f zsx6JfGpk|;HAIOrY1qWb*}w#B=q*ZDuOcNSQ>@C&=|m#^`-$D|=#S~ny2$e{VD+Z_ zS-r5(=_;(o!eS{QQNidHkr!IsM(sPhON|(*A)$tzz(^FLzp-@LuzKQZc zPj1e7Kv7jca-`vDE+eME&gV4gSEsck@#(?pdWY>!P#T6(S-Hz-PEvGu(FUy}x~Zqp zlN@)UugEcJXl3v8a8cVx&36GMOJ0?+6fNbB6$;LaRH6@CFsf+7*ZHub_YZOVO}NnC zQ>@*jny@Q}Hn;a_AMFh@!L8b~s>2Fdz0+-@6vo-sc>3|&^0I5CJ+I0)K+_4@CFVrG zDV0TUdB-a8j}`+J^RVF+K$E>Ef990(VOb1$4D9HJoR+mAt;rCUimH{>ueotjmQ)d2 zFY;(?4k5n9qI70(RHCi5MkH(2S+Gfr8!MBQ`s|ONdjaCg%UyD4le;C@(a@jwdbUvY zDY=VnEW+pp`s9>x_8%km#&6q2(lR{GO2gzfK#E zhmu}K*yg5`lgZ4gE+*1amWSI4EyyzdD|A`dy9xWF0a~w`_q7;df@{Cqu~9w}tr!^h z1Fpph#+<~~QX2WV>mAj+-KrF+>4e>K)k-WT7TT(mzPYsd% z;anCrp-F#HJJWh0<85w^V!4de(KEK2UN$+nrzyJU;=UZig?-eiu8}YSDX!x}RjqlA z`}Xhu3ZChNyhn(E>-eiV>Q!>eSmdjj&Qxc!z>DXefO44|2UleU~$qxC8g#8EvGg(mdKf)Dg&lprV zrjlJhKXXisTC~Rv$54pp>GK|P6 zo>NTMvvp(OBmgh(N3lP&vd3DWaNaYbE6B%tJ@f7_;59B9dq5T85ZAF?m*7CpHM#gl z=ZpkWx+yu0z;ls^kwem+PfYBu56WTQRb1n(N&Itmr`i-&cQC=RRk<}M{N2%y(T_(b z`MJ#6dl#Os_3ePB(Fj%M@|rS$1FjUO)rPZ~kn=8&?(GpW`kZ3RI?O)5ZK1~W?ZhE& zL3YzGPai#UGL*1w6e0C&j~VHM1k#mo2Pt;UP)oMBYR_CndXn3%3aR7!nRe`0F+wdY zER?Mt{9Qoouk3~fO*7|Wk+Va1&I+fwU(y*DKkREW6vzipN!LO(j1fvtsuVu1uxGIw z*zisGR?Y=&nAhBV^10-n+3VA*FpNB-$hW|0dL-X`>@Ee zv^>HC%2Y_6y>`K_zv^!`*D*1(R^u39BRE}9N+pPoHIIIu?Bvog>dNe7Jn@ulU;BXR zU7s-{uZ7mKs~V5?b&{PX=gkjt$$X5@-(R?Nd?~|gwxH{u!QhB>?Go%;vLav^zuq8^DnzbYA=M|q@=lL<<@m{D$^d@~NEKZkQY z;1Z4N$WS~ZFBMg?>ASQM-$iyy@cfaQBD|PCx{mzWrQvvi>>KPa>$iuTN|I zj!XLTR#bQ$biMUvBU~v2$u@9N)?X^mu_iDqJlBHizZ5zgi1#;W>2So4Kq}$ceC>YH za+v@2H>^n!zJC^OQcmHm)L`SUP_~^~3m2zUMnpYOO|$r3zDF{tH&-Vf8ok+G73O=r z^5*KDUi|jYHYK&}eo$PmBuOnUJz$L^fT(;*I~;3+-7X3z=w|`@Qw$>c9_-j{CdS^- z*t+B?f8$2yuLt&&ctyyy)-vDwJ1Jy|I=3yFcq04e_42)068X&XU*gl6VmtFRIBafa zk4bQyhI0n*zgu^D&8^by1}YwZefc}Z;mhYjg~DkM(**6xu&^DwcW#ft6d$Wfk>Tgx z9q^Jw*mkWjq)~e&fi$|52mzVv#@w=H1kGPMdflz`)?;s ze1!GKQp>_|#Ehnyt9OWlQ$y7ztI0eK-Sj)0m|AO92$;w?!|!^O)r%Gw`ThI7bgwo0 z^@IJT4wF4{Y3AJ`_}@5=eCVfgY-_rqb;?{6ag)M*`Sdr{KL{* z(UhdHFlOZ$opV_be}BzTPZK>`4d(c8C`fBkWYCsjRbLgZs$f%`@o4aFl0w zT9dvQrKl~_B%3e4TH*;13NNzpxE))-H_vHp@8&N9nu%N=5MJd)^k!OW+ddpzaOU)~ z-O{(S&5?t+#_06rqj`5r>J5+xz0j#SZ+>FBKQk)jmXU<*)OmE?xBd#-GO8@>q0SSE zHx_O)N#x3J&EuqeAJ(P)F-a#naB5rAjcb#Bm6 zmbq_}ABG|)12T+ok|r{kWWRyZ9Xp3SZH&?ibAN@sFTblC*5M1|=anwEYL^&>{)YbmX^Uxf6A*~hZ91~xrP#$@{nYbofhd> z3!9YlVLW4Xw{1^eicjo%F3aiDUP6tqJgVt+k+p1WQi)x9l{3=Nr36GerPJGZZ=`s; zzZIK)13UP2_f{Vfkf*U1h_-T&IL}&#h_-7(O*}Fo(ky{tNHu4mJP3H@ zE4^%%dfk;k*w(0`qE{lq?f@=T^rZrgg9h$pq4O4S5BI33Kx_cc34`{R&H|Q=khTZtbW)B_ME8Z z*!yhX!guf6j<|l=)+!b=?RIT#;hDbRI$|hd(-Fj5))Zsse*&#)#Plp!bqieW~$C6jdU6Y)OW(dJ9{L5df{J*J~T-lShs&g2L@hw$T4!)8|EYu4;W9xOU zwD}a(CCy#)O_dCi+pSd^6+pPh!tyfDNrlAsmL19Dn5(X5RK${iA;Q6n)w}bSk}2h9 zg9v>aX_wP(^vTJgql#9dzVB0AiVyhx*Th4Lmo_VWY|vF*5#(`>ug;zKH=UCQQ@snZ z<>JJ`ZxAPxX|l4gUH(?Vq3EEi{wUovq2E{3RZ=Ns&zr%Q+;VOiUA?aY40DwtnvI)2 zG5tO_eNobW_^x;;j-*|26%ON}c(vW5M0Qw**o47W6G{w{guL<|AV@+%8kWaaX&ezp&6z; zUXd&}gc{Oz~n&Z1dPgk&3xs$dB4kcOVaVCG3HAY)a~k4B>5D7^|5t(ai%-{J$~Dnpf-aV009KR z<#@L0-E1tN>Ajq1GBI4T4`Ob~8JzFhT>Wuz=fjUDX~Eaxn_*Z!bkm#OVFpu^IHXL#&4{FA?o!W_6l(X@|QoctuG2hI%LL~HsTQVK2wWGxIQtJ2+)q#PY<5`Po;1_;gc91ufPc4`FS4teR^{!snB}i7{r?xz1+q(%Yo7fjf+S2^o54c%sE=>vICM#W=g(y&obTxJ`!YA z3(qq#s9N+Xo5#S4g~+7&g?Oxr4*qwsQ-6gtt3x=WMi{PMai+FO%3DV_>lo@mP+q@8 zwtM(ydxl97A4_9>fyakkT-fh--g+fQ`_4R4P5agv{!r9+@p$DDMGcPGLnC|jB)K3P zj2?ADPXAf@db(^WdCIX*w440+1mz$#R2 z*cFGpw@0h~t@^j}K(3W~xLjB72*SVH*D8Gu2!5TcSo8>CyFEkS4&JacTN(3~hOD3r z-R##x#2j8x8RAr8%dqoI?x1YbPZtrj$;H`$iv!~pAYYz1(?jIflxc}gAGgL(j;Ipp zxdFHej4o;}Rru9|j*W_7kdgaJp}lX(Q6rqmf$QJ?vMojH7Aidd`kyQW zA-4*-(+iC9HA_B?YSBMj$0_oAW|u9S*3i>P!3xeS-d((9n?D`WydO!k4meg>B-G40 zGHuqa&a7@$FO;b36nmGHF?$F16G%05Hfgp#k=*6*`O{d@N1ghL6mOw?A&?{t?<$r^ zPD!?V7+Ry-k#B;C6v8HP2kHseQZ6V|sKtsL;W(mWXO$k|J$|2g@=Fcp5Cz{hwlE-{ zuapubsGJNTQM?EWpZk@WAhr zQ@XGKokzcYwmR4L*An-N*mksMA|zf5j9yoJHO46?dOy4@a`2^BC!fc{^psDUSWzLc z#$;dPi}6ajOQ5`^zkgTJB9PZ^jDTi-!If{^rGzTTl_Z-V2WyuG%d*&w|rFd7Q$fS$6_Vnp`!4V)hPuN&rYY-kC z@%?k;ien7qWnddsP-rMM!{J_dweO!8>=8&!Jcl{Gy0w-3bq90=P#uG_?Ky@;4|`4h zC-nm7^F{5)?A8{&zpnZGxdXFNNtF^t*Qopcy%)-=e_mCK5%c|W9g2Q;Fl$eVd`}V* zG`eLeU(;CE1P$vZv8_&U8h!=3+lw#W(4GBYhLg;dU!!|S*;3?SGbs<8!+)hXe0Ist zfKX>HUt^S+GdiYMs}o_i;t|rvg~{K2l@_~{7BQA}#Yp>bWgzQ8 zKATI{?QzlWfjFd3;Z_V$xH+CWk8SeW@Zj@5^4o)sSkEbG1(06@ef`YQ$M*uabMidE(O{IHDGm zUl$v@ORh~ml>Oq18WWNpWLBU0un*;WvORrz$Rkh?pI}@Q@yGo&>Z71itgt(kM$S_q zDk1sT5hs6hKaN=HibGmqOuP|`mP**jfT)GfU^UlKD!M;bH7+~3ijzbx*NX9IPV*ZQ zwyn3lV^023l3rY@Z_sqr;BoqS%k|p^VYu^-JxvUu@wtKLD;k%6Q$;IzXH9I|Jm#6E zvbfpE;a_)7IK;rbo{bloLw1BeU_68MRwnbWKY)Bcg1+=K=(%1pmGEEufFx$|>&&Bf zQ(H#@|KlD9`sqKIf1GH0+|lv%x#cRGWu|g z@{GnR2GQ}4t5qzQuiyB6LwAI^OH!vIQrQ4^H(Bb7UWfmx1Fk2=!KiY%)B-c?fHSfu z<(Ug#wdqrevB!~f9o#HK^H%qDeazUY5i0SH`%G~1uhQoUs8&{|9To>)YXsq}6}Jh?{%ZS4YPAN}N=_eNS&sRhl2!&v)_@fw|dg zhPqzaLqRbG?7GIZo#|_+uxXT-cp9f{YDUIbFQXonnw}KnF|*`p#U*MPR_u{m?Cnk* ztr_+g7Z6Ad2RcMjO6+Ip=x`39fPr&)S|}ZLt1jSY&r&- z-EqQF>uaL6ZCsf-&EidBW+XkH=QEmW%SnBsWl`y6SX)~XZJ>X!TEWJ^4{MuQ>W#I{ zUn-Hz&KJq1eK@k7&tuOhRZy5766BQW@*vsT4tLZnDb%=qpV^(;r{Jl~*v2Gr>6hb7 zOpcN+sa?57A0`h5&IvBYu@ie~{{F3_-Q73L%{!ZNvm(N{qP?kNhmACxrBmNu;nD7_ z#<}*bE+{3JYH=@+Gg*%C_|YrYSA9!p78~=ic};?XY2WUi6)U{6KGcmV^Be5Ol+bqV zKVLJ5Bj)*0UdZzIk#EclA82Y`$`>@jRxgsv4y0q(b0r*%X^g}Pl-^PjZmonw?rmF1 zt4a^$6!crvULesZ7;0O6xRjEM@U72<$Bpf|G4;E}jj19H)Fr7-RBKXfV^dmp>&J}R);G@&3E?v7cRbcvS=AUzj&&&&IZImN>%FSJ5hY~X zrIiU)o&0b^Jp#e7@yLv$xp|%LkxF;*Qs*Rv17!u#OJ2#?itL#wsx@`t&uF4$?(5Pd z{w1{Sl1PK`dP@zX>;aBG8Pr>bTNcTlAyA%ORH$6r7z@L-G3&*)E#HsHMHdb$H25*B zbKCe!2C4Z+_(S}=JK7j}3Jh;WWGddu2W(@SusX;cqZIPw+qcve&$Fkl$oTY}X=;Cf@Z1RZ|>A>p#99F zLfLp|EA<@*q>h_M@?8u+|Lpu_@9~5+2DjHAYqcU)c!O*`HqqTXRkY;0-}t`YK|EY9 zD%z2utrSsJN~280_bE_%z`PBt2^*_*r=Qt!n^azFlhZ-_FJ>%O&#*Q0k9-LA(>1j5 zr{x(ZG9@aQc+XuWu9gwl%_FFEGjqoCOuSE-sEt2d*?oR!M#d3u$gFTi2tAzK(v?~$ z#*>HNNb~OxVe3$2_JWQ1rQzAc`)ri8SRC2$HP&}N2%02_UWt<~@S^pa_$K*TOfyzI zCuQLn7^gVi4zue(gruW`G%bifwZfIeAXJu&(u~A}nUAvz40&f4FcPuD6@%ULLk0b) zgD1}S_z**thPdgUrSF`5KA@o2JLAAG3~e)Ec+^j=-~YZwy+fttmQ(~g4#}9s54t=S zZKEx7te-z!YwRaZF6zfOcVVb&#g&X)E>z12v~fWpZFxp2@1gUWn_t3<6Fg7vcpS~c zE#Z9p3@_5-CzD$G@Rn`ta|(1%ngnJBt+$r{oEV=?G!xQKBIK0jUu<&RPhd92qMS3fAb8c?=v0z5C#CQdsDP#&j(y;drckb&&L00@4nFb?aW=hTG!6kd?yneL%x2&m5uD_n6k01 z-FsGkNZPr?G0FM8mY6@w#_D*-yZcJCh4C~Ly`Uy<$YH}}b>3W8ztg|yhEC|Snd-7R zZx5Wa0lrEuh4aFNI`e|&E~z5F!CXS~Qc*|Ecv7rpf*2RKzts!51cmAv-!XOT+1or1 zlTQ(OKj&lZ!6>@;0<;pN6lgcS8MHNR_L4ofwHA|NC@2h-^B}Q)53~He7uO7n>ZDKF zlt~%4;$~C_%F66yIi+uh4CeZEh3%sB>3FZ(6L0jRu|h&d7B7w(7N(k$tV3{Q;?&CR zhdBDE1LOH5x6e{ntiHY)o>6W2%~d*N-k>O?>1GYhgUJ8U$YxNQSh%6z*eIrXt7mOv zA!d1oN=jqU9f{OsYHyu5!^C(*1=-A;X0ept zy)sC3#N^bVuH~V*5p0OhUa_$W@uStUc6@$S7c@I)Jc*^J)5Y(1FBy;tlqtjth8TL` zNt7@V%B?t5CoOu|;bVlXkqeLVdVK@$Kle-oT*`W$KF-TiodlyIwb=0ZM}ji1|l zNg_f*zjE1hlE4`9^dI%})X_+{O`0Ub7WplX7@*^ZI8Gk|d6n z1AcUkzMQ6yzTh7u%ofxK13GO-{2uV(wd;@v=esqWfBlSueb$4cHHqo9E5_xT8)`V&b9i9~`& zeAJ6`v}wz_jD@E4=h1Z9(m-8C_?KHm*hXu{+dP_cj-blO$cm7(J5Sjotn%)i(mAl< zjfr>7O9h4gnDyQxb1FTh`TWD&hHGmRzpgDy(U(Y;A(SkQjTf?((<0e9{t7PogdtMr z6y33fXldj*d)e&4Vw=AOuc=$`{gxK1dM3F9WC&pKKc;t(Q`wzX9#jvNu*l74v^E3Mv_KS!iRsjcU09j$=J5 zVtMLW7wQj4Nr!t^bcAEi2^m|x@MV`N9wk}t`ImVUM)@z2ZRsl`!ZmeESkLkb^LWua ze?7H2D|8{xceY)*&MH#DFXY~3Awe4M&FAvsmx6jRj%nDVtQ`DTMF_7)M(WO7YZzQA zsPxaAq>+MFcx?xIkIXO&`T4o+NRa7fRBw2h>04Xv$xaFfrm>6wwb8;#-!V#cue1Vo z?r-PlXP_8LKcr{YT#Uas!)TGG<48#s-_{p(QZ~35=MSfr%*AAG9I(%3MOx9$>)mBKs{&CK)xEQS=p<9HcDal>(Qw{H&?hkdBA*rl5cYoU@9Oc+-#%G!1MnX`3isf2CJT-;!R z;@SX#Ju%f*!y=4XqX7KcQg~B1H@}u@uQAbhxQkS8;#*SxswrHhwl;A9mR4O5Rf-@% zA#cDWJ<1;Al^zUwl!0f)*q-l1#r4y{uo$eY96aB7iC*Ly$s)g;MXk*>WYGSCn1UNM zrf)R%2FeQgVa1!K*5h^a`YqG2&oj3;RC-tv<<<~3uN1pXQR)~Al?qWSmmHrb>Y-7& zej8F8XqQ6g^^ouPCiziM>1+CCmCu*;1!=uQb1g%2b1r!bcmwOC=c8q1VQaLv(`!ck zHDlmkT)G)U)ck&KpO~nwdd#Y819e$P%MU-lv^iIl&Gl2{gQ1P;2ObdjKkYNx@PJMW zMK758C##FqoLgOGV7wna33sg({ovVs=9u1*5wF!nl|1n)cR%=mtz2->$`p8UmOAe!H7nqO|?tfx6Mcmcq3)>()8^>E^oLG}xI_(i=h* zK|5?X{tF&TG^wu3{PlAx6||__`o>f&hL&w4m6C6H?>K{7-p2UwnhP)@?<|J5D`Q(g z_VGQpnV4ka%7(@KIc064+{h&z&qO=AGFuly*vvM5@t?R()PL6$!`AuXed^AAA(6ab z&<$0Bq&U%gkDi`+94vNN1HWPJ*Jl>amO|fkFzWCTd083tdD;@ew`guBhH5hztsgQD zPDx%HpGhX>)%1~Bf4kkKp>aV|^_;<7g_<&gBn5TB>E)d5%GcZDO$QjOZ5_kabs0&o z6hr--MpX_9I+VXF&yT)mm)@E{I`TGg3YB+jh{48IwM_Zp;$4 zp?^JIR&T(Px-yn$hY=oc(P4h<~VjQhGD2e#xN=epq-G*-uY z%z}4bbDIy@=#!z0TXVlvGzvVo=t4n?yJUtiGHl z&X1Q6%xx;x{ilNH1Bsgy7(ZkX&q-i&9N~hQe_Q-wT8Ld{URJZS=HeTz4@yJd<;?E9 znya;ib*eTtx;!2kf*$1pyNSp;9cv?SR+LNep~MDDiEN6p+QK*of0?lH`I=PI%&Nha zowH9)M&Pj@=Bwna$%e%F$|UUYo59`zLc2kqf8q^88)JWRzb%L9-2oFuVIxQXjHY`@ z`?3R0Bkp367td1}ne6)A4ekt=dUb}hUw^r=xmi7P_8dgHm;ZF0-v6uBt>w4dh~JI< z{b}JGRiQ(|oHF;O@lSqh6COB1BhKC-DjyRHYMEMtRhB(xFd9Tm>Fx~_K?R znC^_S+PR_&ePpG-K*SHa7{s9oQ%mBmzHiMBK_^Np;5z&3l?9VvkMt7Vckmd= zv4w8-nt_`_cX!wM7~i}}3peZNm}+YAMcKE}2ZmuteXsjd91ssye7bYb*NBKUpFlsx zdIy7sw?F!7PsyIoQBg4>L`#%-MAi1)?6Fq3i2O3{+$IyoygBfuK}A8yo^pxte5??e zKu`$!-7_7#q1a!kyGENCkUjj8+|Fn>q_1ib_a2XzWJCS2P|MBq%fxa2C2``+C@kK$ zy8I|inW5(p4-?bptSgFX7i{}wgX=UB?LyAIc|R1Zjkafqw@H|Id^pe{zg%or;pA~4 zX?mhl*?ZZ9$h-LV&yFQBFLuPPzc?mTeY@{A8h5Ta%pN@|$@j`U>!o$}3t820Y0o`M zklgwFP(7UKp=04dyfCRNsL5m-)8^;RgG2E8O!k@L+|@t+hN)Qi;q$F;;eP?)w~y~- zVlw5GlG0~kVPThYFdmW!{P4=1)ME`2ynC6MV|k;aq9*9;GqFIY+T`8aY&oT!E`f)h z>dua()T}#Dr~5iHeWB5W3@EAXI>4R^<>ykD?6{cd=roXl9&)bdbnk41-Kx-9v|9z6q&=wTYS9=B8<@G_nU+eJP!!q6rw#CUmmlH2#Aa8>|+(~ zN>zz`F-EBI!zqPx<-&HKYWf<99pztzh(b%Wy}0Oll$$%hC0?q`)NJrmTU#%=jF1OP z;~%Zp0=8{0cALC0IyyQD=lDBXqzHq*`)}R+5^*y>b``p2B!dKbCP;!ifSMbJg!iHq z$dE}q!-5LlRBwJ#XECUos)8cyO(ms=k`99zpktqFT+v~j2y)X`m6gjhdDNYxPsGXl zl!A;gN0j|Qsm4H=V=tWVkB*E?0dW^W9-b7=0s^W91qFqDcJ7^a zlJU@YO`{vc!@HmxqlDj@o63#eJd+FNjYrc*z3!Vg#aOS*j#NSdjXf&#xM(uP3y%AP zetI_aX)7`J+0*$G;M)MoLv@&qXn5iRH((rJPjGQZ%qb&qlZ zH-{PcKA`C1$3IT<^EZt+oe>mNgG0YyMlUieLG?KzEF+(7ws?t&6Ivy1tpAE`eBv9Z2RoD4zb_W9$J#f|Sfow_s zYnjlL2wEuoZr@+5vc5c{@8wk?Y2Tj;Vrtzj`UVD>ApaT4DXX{d*tywj(_1%ox^R1Q zcIpTOG^ne>?7&x?EGBQV5+qRI9yk2`YrsnQRQhq4{JVGW${ijUYl$zp*ASu91g%y& zzfXKMgf#_k+XW4(&#DLr3-^HP_6026Hy4cu^;_xlt9@)&=SR7MIJWMc zcZ?U(WwTd6>lXyFbsQY>K+;dW7dAb1E>3nvCnn}icBUhXiQ9Mf&tcyMjqs1radBB) zU0uf@L2}K>rVYftvQDeToRO6^iV%J82@1vnXV12;&*1Z+@t(>JIP3!|%DFq4w{EDv zO?cY@rU2GBIj=%!K9dHWX9T3B%MIKSGs#X({R6r*>8PuRP;fH}ApBZ#BQ?=|lgcf<3Vze0ap|i zOW^V_Qfg`owxj+`=vuOO-##5x)nG2quYW4_0#=;>?Qz?Mv6e)~2OJ_*vwuPJxm1t^ z){B?4@1XXg^F?Y_lHd8lUiwPNCs04}gco~-Y`y7c?M)OxdTU(`Jji(i=4q(-0EeX2 z;zT>BgjtxS&rS zcvDw5O~|O!3Py{}8N+K064QpDX`f}FMmcE$9p|CaH%M|;Irdy9*-tV*rR#26? zvGEHV8=GQSBz<<0Ltf(vQ(X?mOB zr&az()GM**+`8FY{vS(}I5U&~;K2^K5qz~I$d*+rW)*oYSg6GYD1cvYO_b06^5sjV zwG`-x!+`UtWMyPrhcQd^omWx0;p|+XnJAaJx-f1FR*o=(K*OKQt|n*`M$d?fYJ+t) zwzTL1P83a9Rk*( zZM>>h+`ctTNpBHwsJedZ*0Zea?4j0JaOV>f2s`M~fM)tJA4Z%Ew$LkvXcq+ga5WNS zbOV{W^_D3^utM;B;|UjRV*qx#AGZHvVb>uSm3ax~0+?#uo_w_*dPWwJ($mvz$6FKP zq@C{y{xNO}-(K*Xy&c0AWa{*?k=Zh8TtB!-~ouBV|93jXUwWAjHWW$+@* zp&q{`oIO9OC;_a-o!nm{6#w&yOn+DlXnz>Bp%_&)t7Gw%TZpFzZKa2bx7eVp$Mp4^ zH={bmcI`j@5>~$;AD>cw^-`SwLOcO+NmDS#LvpTT&F#=Gv&5S^)#KR30-b9YznF!e z<>mbYSc)b&Vz3^&GJ6LAQV@m`#D;qa^d+eSC#0`Khc=sq(yfOIC>uHa8NsEqIv0KSeD?joPxbPQGl8FSqWq0v>yhTd$B!Su z@m0FBk0{-^5fS|OF||WY(EhEird4Me%9)+++<7(afuocY4{qYx$lt7Tl|PC4 z#G~Vo;|hokdi+=iYzhpHC%)yi>`awTg3N^(AzWeM4e&$kvK~3d&fWT2@CATn6DhT9 z1n8jhel3E_d*b9O^dR{C_e;yeKKZ5g17q#ft#xxrxps4nhv4yeu-1D;eeflLp$uAd z=jY}1gs_Qs;V_Ou=$gTl0{h`A1ArduV!M{O3mRbu9S-WxEzc0p&{i)tDG5DP+?2F+ z)wkVY{~7-Nn)HV8LE;D%m7rlP!w;ykN(A80(c{OhF6MQ0-AsG^+R(5LLL@g}SwPc) zTW7#TLbKbczM}Mj6&j6(LG5hh7=gz5jcXvCCH#Ib9ySWvE#se?g z@+Wuc6AmP>E(omNy1WpA21|qYLM*=Fm-=m~qp|2IQa=7CO!oYGWb5f=c>gP!7jJ@+ zmp75cP`W+_9WgptSt<(1_>TqSwDZUB{QdBu4NQzsUQ$fV*t?%>lxg^{&1iV*248uQ zsPE?|ApCbQt$%zJ{_@$3M_pW9b+xr$ITP!;H(6epLqc6RcRmc83Yc0D1<&wTf#`iu zXW2Lc)GN}#<4nSn1&4+8U^nWqXOL6B@*rT`^OQ#!2|$EQDvJ*oj!grL@2~bL1NR>F zQ*Ys{+{i!0sFz4e$?#}Gn0 zd#bOm4~~mC$Hf4OgXifZFI8HzikRMnlO!PXnxO&(@)pK@?u|>>)h0C5+0m1LX^R^RZMM(|+jN8N$2$`l{+kbg;8SF|ABi|=-GTh#`$z_SOWj)~vCeJl1^cBr1K<7jMc?Sa>sYDD|a zU4_|jT_bWcVB@9USp@}CLA@NS?Aa|#eVhH?L#i9lt4S}QmAWP-oYVE1=T_fPdG3N|AtolI2=qMYTwe{-XfBy{x z>oYJZ>e7H?HUJxVqz=S8fo-cs&==EDlR^B6pMjU&G&atGrF~XjeyGFy=WFUC@ycfS z|DVCQWIt{w4>B_*Ri4@>PoC6VW-k5KW8`Qr)?gI>vg8{C{ShJSOsvp~(xHqLCQJET z)TWTlYU=sQK>?%`z#6r{dtVRdl9%gI*s{cliSiu~NSX41C%bUrLVuBEI3zMCRSTNw zO)~MK06^8V@Attp>A|9b2B#OpKaM#`-zyv7@bkO^SK?JcMhc{tS$_NNrO!;YKDg1y zd%TZyZroS|)0oOF8}6v*#^yj^K-(BcEq_}3i99Jkkmuinbce^W09NVXQMDo7U%qAs z>hKQTYM1^Eq``0hVcC|bYiDOSRE+#E^j|6}{yCYmEC`W$yYDm~-_#72fm2{?xSr1(v zfOc(TV`Gz5nC0lCqw64UPG`@mMT8@+32MMbYQN zR>Z?pwKp~Z85Jm*G#&_70in~Tx|p^2^zp#y$OMo=^4m?LdK|L2tz<(qEg+J ztvCGIy~&AK8(?uk*xy+Us%c0v&Y`SV1LrxHEG8~q0tq#ZIEl=5b6683z>{8{aJi0` zc94_}4iE2zq?P;t(-DM71L1%N*3y!vMmf~1S|mVdnkOeDqyfwuJWTs`X1UKV>EP!s zKxPzjITMgS=v|p3CQSWv#{PGZf8iO$Nm{~Z$qtB)WaU=+)JJ&M;!CN0e5oTL586Qr z@bfFzhjZ&IDJhwF{VQu`AQ@axNsor84MLVIKt~Xbv-`h;{N17$L|^Ivw}LT9c}Auy z7TbM%bjZDJ`}XbKWsYdjG|zG{gs1{?4eYs~@0p2Y3)U7VBT}K;iw(Fj&4GPe!=7jH zFD$y{pDY@X*m~|;t@qe-OF6E8l4w{gEvE2EL`VXTQrXnC0r68-UY;J@j!s|33&OBJ zefm_!cg3lw_rTUoYBF4XkY)D*5a>VQxOy*kx}aVHvWZm6&;YUEUo)%422Fq;NE!$T zz=VXrmuQHcRq8&C0EikR6j{#GY3Lt$-2VSD!%CY>PlAi7f#_ie1bZ(j7wgy4KE zk)2~9j8sooh}3A zb&)@8q4cW88k2|bFlZQD6@6gfDD5k-k-JzjQM4WVkECTeT9gSZwDN?RMH?Rf2}i# zo(g&dLs$kaw=@h?Hee0NR+s+QH7>o!TMs`g@gup3AOv8_8YG7$Zog$xxd9#;5);La z1O=TY&*dQu6;M_14qTqS{hs;QJ?bdjE9!8w`%lk4I$_vdTg`wL>q3r0PL+$tE zF{345L`arjP1Mk&2C_mB!`uW6lWI}lD4{~gseHE@sbGNZo3!cxTJk7|eHw9+FdXip zivzP!g_Lmz=sp=1`vd)ffkp_3M%{SW^A(N(tOad87T=}FEZ_CrCHLdZKfS>j;C}yw z3n=Q{8pmQ*WE|bjD~EG+FV`%~n_y{Fb_L&Zh}n^NreKsC>XQgt4wV|t0DeQ84Maia zH#)NtK`?N39O~F_LHrQe8+SHAvE*|;>?}qyIUgP#&`DD8Ljt7d!6QI=bpV?oUrkMz z@l}RQu()np2p|Z8MX+qUApMvI#WUoOFkzTDM~D%EgM!*25Rq{Cek}ohR1>g=~uh+mS-@NalKNnx}PZy?hzwi)`QkOjQ{{1&0b>lwOm1!Zg6&_k?ZF;CH zkf&(x)I#N^C;1z4`>(vx=>yk^Boq;(04D_Xg3f#_%{DxDJsm~MM8aFB5^~D8U4tCk zXOL!7@8*Php2zf;_S@c2oPfAXT?Dq(7(!rm9T*rSOA*_JxTMkq4_1O)9iRib0F)U# zQuegOuuIxKgWFI+6=(7xhr@Nq@gRSEyPa9OWqEZx32}Zfow|_I8TMNqsuas!7dd(I zGBU%U&#im&W-8PNWlq0@tQN4S&hKkz@5$3cp2SdO5iP>#+X8Qg{<-nIuIp{;8$v_a zh$twBy?=jY2!aXlvJTfN0{-G*oU*Tx65(^pL}yvYe{sf(^FM4WVChGx#SOBQ;0Lmb zi@U7d#sP^-{Au2>-5jN-t^E|9+?^WVivXO4DqR2XdxoO+ z9O2t6y}1q8msvMNoGu(7Bk?|;`m@hDqXc-UwPh>=qjKX1*%-`23Mp&zO(N9Dq>Vo! zj&g7qm4%cd??4&CWdk%J6;+6zdA%$c5Cl(teRtn`om@s8M==bb*Y!j z)@uzJW4Pg_ldtv|BICzC5+Z1p53^TVJpY9S|z=iMZPR2N;xq+7Vz*e0J+kFl~MHD5WoDhESP&vMWja>$}T*mGnN zN1$n@rKQPW=~W7A6Fdkrnd(drhjh?Qs4aW1!aA_l-iYXeWsyEwR`T`h*UE7SrE_uU zH~dQkB=v-)FC;;cOYi#iCjc+*;81l)v8j;GoCNcNy|?&$$evBqtW%&oQwiWT?ZE&( z`Ya58dcAEw^eS1{A}uJ|q*jS_Cm)%BM}s>?VsD3v(PMDIs}atO@Bgg(T19(od5+w_ zH$B%iC~iRh5a_ds>5Vcd(Sa+6FGzfen+(H)L%|HG5P=mypsL%#r)_3t=1$-6s)mep zoT%ADsv0y;IfF(uWsyadClk(Mzb)48bySSVp^klZt#|zXx7e z2Ydjak^JiS$cT^{=lSm{NLd}iDiC3{`Jc0Y6AqNFg6kQc(J{?M!h-5J!Xb; zfeWhC$%o$m>#wPZ63S9i_lo>G-3hD#lkfXdi7n(ubK~rSRSb`3KyyvvuC`i2t-FkBg za<0Llp}IieUu!1x=X@b}lZ1fu*>+`US3=tK0{nWqdHDcV@0%16Fi;PSRWIK(SKrj@ zDntXRqGg^S=yVjar{bgy533H30@A4r@*IeiU);mHDD ziTGwNG@eaw2@MkgF}%dRPrree+8``In68OnK6qS1x3e~x)b%96#7A4cYxe4D1+D_g zq;VoCYXDXw`_=aL_T19x9#emSHxoXOWr(EE8{b&TF&oi2WIU*&sRgwGErSgNCFM0; zYqT~Nt&dypxw2VU{+cCj-!GvNFPU5YVQaB7g%i2n{`{Va>Hm@w{*U|;p&>s%|L+(7 z`hSD-zm81)pXL0AQp^80mh-4JA}X>vt(pkSI4dRzhpSfnxjI*v@bmk|Imw(`8}E?6 ze=CFRc>2dj;eTnr0SEJ^?SeEDr2B{OFqH>;lAJg9@cp>X#H=Q#NY}_;eq_#F0ojkr zxG?`LF$El)*;rkW!Peve_&Bl5^$<1h@0d+yHSNOBFBnz5J#8%_IKI#pzk#n__5PU+ zU@B*)8C`#K<^B5s+5JrVhivOP18=&j6r!J`d`qH@A1;#hET4StVB%@3U?fA2L#H63{r{cpB%&$y&VnQg2 zbZVSU=4;h3wejy6aw%$r4U?sB30W1MHxhnX&u}F;Pl-8I9{ueUUifkZnxA+QRi@_n zgeeb7CApF5s$Kl&klN&d#+u%j%)AkDHZ%QjYc3Zm7_RoIuJ>%C+bIVTZY zR`PSx8B_6#q@?xKUzt2_JPwyV7O2WfGBoAu=r??n;m}|jpBYF`%TLWN%uW|liDSYH_Vu@O;Ilyk5>#VnJsIZSez7x`b-C8ofj9eE&5JQ#Gsxd zP-Svx=Z=oW>jU+Y!H)c8>OU9GE@$;5lj`O3kEsc8XBCN>>_}7FyO%OF{EDeuwRoeP z_$28=Ufk5+Zi34ie{?kEW4=;C<-<(NS>~Hdqq4}qBHx} zHc}}D?xq-)3s?k%gc^$sHKWZv@2_{>ANSnzoqNCgz5C9K^tWnm(f-Q_vB;vJaPp1V zk8@FWV`7Q0s;o!Yo~<61*jnO=Glh?TkToYy#TOZnb(QDZ5&NH?Hl_ zd?xOWkkP54zD#Kl)PaY|_XOc&UsNMGwinknp11!eQMwsBCG_2g(>)AC>cB~*<8Ej%9Jd1S=BTN*5j2B_uwh%}$FbGGsG4ICI-+#lN zz0Z|oYx1NzYQu3y_A1#X7|fZBajs>v&*}_ouPobTt{V%PL^0RM8gEvCKw{QNqys@6 z1L)emYnoZU)pL8++`K<~u&tws2y%Ckway;U?PiI-LG(_;vX>Z(kMOFPK>G%`$wflt zVJ{TwCkT|wvx**@kWxRe-!9R46aMaq#!Li?+I|fRpB`QF z195*$NP(5%Ih_2;P}m1T0M9hqh4kW}KC5kOrl0+NkMjY(_BPnnO1%%@ELh0vBZW&8g(Zjx#-dOwpNzRd_OWc0pA7UxKmWg9^vWY zZn1jlr;pPWiqD1K=HVTivC6?%%v3KV5%*zvB3FJXB0Ve!JN8C(DV^u0O^v*(Q4$x9 zTQ$hjQ!&c+mXFixC#A2mkY@PMkhBodLMrUN<{)gZC{Mr`07ra8*yk@^{kg%9Me2WH zIYLUFU1h-B$&B$wv@oN5S;iN9XAn|M#)(|-J;d83A%lr6nyS#t-LD81O|i@sXqX7e zf#UII<^H_sIDxQW{d)6Frgt;@5IfhMp$=5{SMU}}z^%x+C`q-WO`UuON9(Er;`9oI zF6TFV7d(kzJ29v73CG`*!Ds;oCfk)P<0aJ$>(rG#_K?9#bI^1^L-8j!laGFCBDlFB zi#X`Uc66g%{lQw7X7_n~?te5oi;Qe`?w^DKch6}x^IhQgG~aT$LY|*xw|n+-L^`}q zXl{%R92sVy?j)8pP;FI&O?uxXUR%%rkswfh#rNS8BQ_;Peq_&qD?UqSM4hBvwK6cs zd74{Rb^Q5oGlP)NShr|F06w%>(JyDo9>dEPUC-^MNc7wE0|J~fCD#P050kwDi?!A0ZlKiDEGUREE849Uqib SBVjWaON^_u6yKHP_sspX8~<-URbrhIn%$-VA5B=k8LzNbR%ITt$rXw*#=jrO&n#LNAE)n$h7$0Ga zz)b}!8s*h?r=pDFm<7DPc=<))X?VzgT>VS7eNmZYL1kqSbK?o3=DQ!3DaeBC7rjxK zAVU>*-NTAHlt4PZ)P#Q-3%L=5>r0SnjbTkHS&$+-$`Tddf$jMyJu$9L)f0;qhwa6G zTn-mC2>cxvPwn8}rYsH>#1({$zJzD1wbKVoj)aU~@xcxaFB&~73->$cyA5s$faNC@ zx#zg7QVLWzo!2<}di~x&3)2gBU-`p>OvqKAw5rCpO#}x{KKk_^_jvI_>Phol)Yzug zHrI^C+Fh=3vbyPPtn!T_%iPOPu(eEA!jX~|c`WE{2v%R`kR9qHD-R@Lua9Nw>k=zn zdw(BD{b|qqCod3D;n-^@*}L|Kzo&NZCz+^hu4yA;U>~vmYpSR;cU(jXq&@&6Dj)z| zoiAQ(;VcfmeWxyWTOFf*$hJ7RE!mWwAj&Um}GJ{(u0?YE4Y%X7)E z`EK5zE=Xo~ujKC%1v7aOlLz9SuH8$O>N1x}74kR+|GCVubjib}boAYd82{j^8KRC) z?jMk^AY$+gHE)j|V;{mH)|XReno0MPLKExpljQSH{}FmwH*v#B1zK3%ceD=;~SyV7-4$_y|-fkNc91V+{%; zsS@Ok#vy_gI>J|d2j;h=^%?vm-~GoGg-6gAi@4L^a8!ToeR`LR$}7uu808n&Hc_aa zs{bj*STQd0rccyP+eJ@{JAzO`HxjYSuHThiSjKLs4FV$MgfgZ#AX-<-O2?`8uI(_4 z<<|d+JMBsXZfWUIYwGvu#aUP!*=oGswQrnHEoyJ{y*05xgmtz1M#Lu7-6GkrHofq^ zT}XRF*$Z!Ni+i^m>I-lz)os@T$Q>IlFKZs)ie5Z3Tl4tu=hty*ZY_Ij&h5Bzev0jCHoKTB%HAm2e*E2@!WtQ7 zmY1S2+hqwB<%%LyL$_%&EYnD8+Vw{cnedGk7imt_RQ}Fa9)nw}9;0l^hYuefaJC#c zX$Ozxq-nvovTb)R2L}f)_QCCjLn-kMD^bleVp&NiJ*1eP-7}R`5*nJ`sVZ>Mv172- z9Ltvd(E!sUQc@(?ww^LDI5<8oZb(t;^f@0N-y)I)w*dWjKRK*cPj@VtYupy%<&Bq1 z7r65)SBj#zxS0L1R!!x!)vV7|X6*?0`jGiGyuGL>(0O}utU@0=S*j_grR5qIACJ{P zIB2>(X??Pbh@L1>|J@Fj$jNzOdBWvFmKt6~y!WjXlPzkAAYnL@YmA%mpNj!W*q z_CHZ*fmcCF!l~y!wn@*zk~BK%Uq?{f!&h5AA(5S&JXUl%oZaq=%m#-Zk5Ga?bdSXq z$h!OIY7}qe*ra?F5U2Mw&-lLD<`6EcX2tK6Ji>mSp({}sR!n0)c>(SmLZw* znVkS0-Nq#niTn)=SYE^e@6LU^)tmE$Q?Ls#ULCK)e0q9%qcjqC_r`ZpQWF}%0lMw4 z%*Cq2gLn;%lvci|M#7N4D`9_U`N^*Wx%BBqM^f+06Gndi9t=p_`=z2{Y^g?7-1W6r z5WWl(4^I~+wbl}DSdSp*`X-Sn>Uz6_Pi}oUJ$lZ@Y^w76Q0g%e4$dJS1z*2@7UZ$N zNoO2`3^OzHbM`V(;KMICIkUgwlE!9dD_B{v0k(yCJ59TN9y?1bM?>f&3=U8ezH(flv{2(XJ7uO+i%~d#f#jH$2&@h|nIv;6~ z1_ZNiGZ_%Y!ySGBz&2YQmEkn*jc~iB>ScpuK@92}9pp-Pfsr^v{J5&!wKv z--POu*xc*bK3qCX!!LR}NmSq&4@(47R&&+s?0<#V+xNX6fx$g5Z@c_x)4W@8 zy{)fMZP59G2JH0^d;wveb$~9sWM(#5>dLN~UL0V`UK$o$=oE<&uR-k2dThTdZ#jx~ z6Qi+dL)i*Z5k!gnAS_X@Fr000{GdRql|MwPpV9l%qJqogvHE%Bz)XEZf3rYg>WM z!O^sF|8+)WhS$>%+kAsjBa;FTV)0P1W9Z~tijOK(0n(?l~B z-8ENJBS{yuU8{zcA25M$#l;nKmHmFf=rD z+MiF{99vkAlRgVC{q^KR+g-FHogFGGM$$kDKPyFlJMX-q*P(`7fu@5rpAApdj` zeYMZT$A=e8H>5wWtRUV4gn-?QMd48wE~gRM0T4k-8XEn9I7C2N5Cx*kv?mPj#fuqW zY9A&r@ncR3ON5Sxs$^5_h{8!Z*H@z3SHoC2*4L8t7pIIBx~tjRu8(9G_^`1V>Lk$g^rQ2X#ovDI)|ZS7w4=%^R?iSX2}?sQrG z{1(_782Z7ua&(P{^X}T(nh9L1J|u(;Q#l9(&D2zp7Gl@`-Z8~^-H(#C%vL>4P8!r$rbnB@D%0D|`V1W6v$?hqWAFH0e z({F~MltY2TLpgAswX!Pux2^Xvl&Iqr-u|Z9h1+R&<|{ex{Q4kk1>^JYUgK5XR}LxM zE7qfo$O!%>US}Q zf0LrH(NEj#kI0csjb;~inPiS!U^bx~%@LB}B(X`8_`b%`;v^ODKM;c@FSPWo$!Swo zQIR76oA7g|zogYvWvpWE=*_nG#`oX{gs#gDiNFt&{hW;;40J_xb9rh!^D~)Sj%4n#miU;^`D_h2UlAvbYjIpz!K6FnaYDi>x0AWV?4FTY>B|ZQU<(86k*snwiE+02; zPL{C>3kw4^O^wU$mIze#V;Fuz+e23M@*j@0c?FEzW+Tx+E~o;a;^6_dt<&j*`q=lk*jT_CJirad%W0X}R?|l`Zx%?vXKYxb5)BY)a(+ftFWK1c z4ubvk&|)P4(0f0?($b*6(ePrzU5rL7>yqdjD4Q_3B!XJB?ihW=+pt?FddqFQ4BmJ$ z3O#gdNhu&7Z_?6|JaQh*OKbJrZwEUhu9m`()^$3S4}uH}OG}?d zGc`@ZL`sXIrao4a^WB2)xT&fC3d{FcC@3kvPG)ZE&bsl^3whZ~m^JqVB`6hML87X5$=ctwa^xjZretl4GT6TL#G@c zA0N8uplRX@Heax@J$ypLeBCSAkt%i6FNFaIAgjf24^y@o2Ry@2l4O>4fE#T$U z)6<$M1=1fsvHw~!W*#AKg-@9Yc^i=cqr`@=NJh?0!-%fV#h5Y4Of32cmhygG9kNX`T3{_8u7Tw z%1`SbuVM04YljaZ8$?~}*_c+-YJX#~4^*WBk-XiJ+C~in(Bm*iJYr?Ul|Oi@vpA# zto$64uXqgB?!H3;zS-Udczr6NL`Elf-*}tEo40REN}InUJh#S+ySv|^I4@~EHqavH z5_iFo)J3~x8nK<%sN>sp2mmAtUR#%NC>P52RD1nl*RCkogUd>IaF&azq8<{j?m^H;F@6*UgG z8;;Ca7240|!Iy>D&FHu|r8?V%feCee866!eO-)btl}LEy^x}__at2}HuK;rU!}Y<$ zg}i|wAqN!Ob}7Z1m7B(clinlEyv7=dRrh=!N!?Xk8kn*=)osyCXvBTVjKF>cND{8% zcET3oiFf}N?6Dm|gbICt0HaBg`#!Rl%b>&8=lXJ+=Y2Fyq#>DGtBr`$hqc2#+O^+O z*^7SIjJINvCNqKQvo|uz85k+gV*B~yhLD+gpym3=5nzAWseBeZ&z}=0DLrvtOBB3@ z>C_^2bv!m1@Ti1hZmyuTktR$LPy zGX={(-rYqMayf6cr|_&v>vp`*u|9EyYC7w5UGVM;c?>a!b0l3~w_4LEynY=8 zp6>705OFE>2K(0cM&I9Bn8zN@6s;e;0!ncIvt4U|I67kP&#M8MztPbQ0Qm55N6Oi{ zFX3xI?a0W-moPs5v^P>TS=%P{gOSk-Ny)1pY@|hK4s}KGwZBN)AN%9G1evo)rqy&c z`Ab^j;$jJ*vhCv_Lq4|~Vkl)05LCFg)jh9fTT0vU|R zp5Fo&T4{lF_HKV1bl_sA1?VR42*t>7eWrqn%CJ?<->3UxAN!$D)75?=dy&lRGsJxd$8*E_WJ2y;heRs3 z-r~qnCC-6?Vj(IQopPl&Z|S(?Xi+FlbjqAf{HPKsP<{dh;?eQBktWIW=ieK2lG?2b z$3pv7a*E8sboC~~){m&Eqalw!D*pBnqfyT^QNwtBJDsQ_?2T>@VJ&aS0Dik@!P>pb zzh(Ft8F8BDUcY`#0EEWK&2O@*s^Tdt{)z-g$McI7Tot_@vpW;k`}4b}b>hw9^ajmk zZf7=c5o(g+4{;q2Jj7MEOf*RfHX+;2%4%vG7-G+VRr`;NcWUADIr{Cncl4yjO)==3 zYxET(mdE*?w2qEWywL>;iEra>{YUGGed}kD z=5@K5+}+*%Cnu?Fs0&#E&>X>x#&@Ci9+0Rl;%cBZYp^HT*1l8%q~4 zll6Btscg=620l+$uQ_c`)|R~$enBORr7qi#2=v~DH#pYCDJG4Owy)NhFPRSN=*eb( z{rc5#s*=*h<7}@#6&dvzAoS9*U*KxakNz3R{|qO=BJKJJ`Ubc{LPIUK?Fc;ipLC$f z88iXms}q^e#TF^&x%(N}WY^sK(~?vgzE}Kn|Ixy?VE+HomiuFwBqXvES=EWCspa0i zbJGrX)F|P+F}>Lz({&QQy5IU~+eNFV(Kk0%Eg{V69ROd|TtcK9ENa zXa^+hTBDPORFet+E%-hbmL;>M_WdWw{^u_rp_GQXtajf&mC!!~82CH<>R)B-zrUS; z=>G`gAykb2?+sr5w=Da@A?4HmdBRmL!}hDaW||_~?-8mcQwNBBD@5BI2DWQ|d=}vf z0MSncxqFM7Nd2Otw)}fxmWZ?6ygYiMWxH_=A7r_%th)nCVTPAdNpG_Sl*A|PoR^hH zEjBY#&dG@nSR8gOQ|;mwg=$9!JCU~`RiAC)pNT^|k5 z;TK(y^q(V`uP(nRt{U2`o9zytP(?igM9-TsHmK1a>^L;iS_;q!YI=XAI2y^!w3x7p zy6gd3*0h8*d{3NBKlXKD6g5}G(LzY$oQ)6KH&!g;i5%FOYb-`3)z}lbB!V4)cmNKidz9k z>f7`Ts(v;xsn@9N>^_|z(tJuxd{3vXvMA&pE-7k3JsLA3p|BWC3;r@mw%}$aA~@^B z%c@>xdY(1sb+TzTdvuOSuZ?&+@KarHWtvofc?AYSE>yJjqLTG`p%rZ;CrF8_Bi1(5 zo@Z~SH%VcXaO1$?&u)-)#GmJ(c3psi+fj9cC)hL|f&7+mer@3igp+b@&zb0WRBAp{ zz0!68O`;86GhWZfv~*pBbFP(b?h9CC4o?%zEVoSQm`v7N)B5b#Cvj-2t??FE97TKX z86PLH%W<@O;6(~E)#W*F!yVx_@jqlek#px4ed6a;E^{83pFlJdAjQbA&{@DR4>NV+B>?1-qPUveeT;NB{?5r3T zpu%ixRxfRZLy^a^iJ5Y5yu2daUFTga6VGX)vB%(E12xq=q!^!5=d6sQ2f0#F2MU~c zG2cd@us-r9qZ>4KpzP>YO-*_R#wSmnEOSb5N@VAC-vydtiEG*p*%>Z_%xG^OWMSj= zaT_)ncw{<{o_5DXO&diiX#LsELnQY-UIowG%3-A~#%FnD*-j+$78GA)q3O)R8zJ^f zz&dPiuIXrS+x8$>S)tg`yK}r&0GQ#gr8`zeh+5o+wx+=n%d6UVlwbO}mDN$5R*)bX zUf_v#GQ4jg`Bi~=FR^VmKFU=VvfQElkx_rUM_Ze+yo`Fh9!BAT$N!$|s-cdDvPu7W zgEeB*UzlmKkYPsu^4Ig#9Yo(5wdm$(BM}xVqVc&s=t|z?h$HQq*$h<}W{a zUj1P0ZE6c=ValDA9ekEBXK*pMjAvEqJdzx?ER8Fn{V>8KTk;JnKyxWC*D?*x4*pP1P4h z_Z5O5No5GP>rFGavmT2GvZRq#%(YbfASVk)7oQwZG5wH@EsPFl|jqMifInVwx2 zpo5`L)37a%d6Zxl)$Yq2&xEq=u2?vrB(iM|umxf6`a;Xwl8oO+t|Io$jMEt7_nU(W zsArD*+p`RYXPmoxdzDmlhH#a$ze#CnmF&l(Z!T}gBqqvzba2RAQ}8+A_PIEevJwc` zH-jx*T2`#j){LwIXtvnC;3cDD#WpQ`C4}lGYPgC*Ttnjbh6f*NEJTBD#1=tiw&~b2 z1#j;YemSZWavmBOLfdoHfKU-KoK4Rbd7LLsZ&GVgZdP>ke9+{WBTIN0GO2xG1X%-m zCMMR>%iZV$v7L3__nxu>TA2>L7KPl$D+(9r_H4Meo^{_lnY!-=QK^@M6&0;~Cn>iy zntjA!Z5K~C>M7Mz!570jcfec!zs;ZppmbP}kwOLR8 zGA?4?LiE+vHuvtpFYBzei+tqq)~Uc~-e4lke>SlSeGnO45;&8Dr0t{liWi7h0#h9$h~|yFXb|!O#p4iR z-hcV>P~ZEK8z|k5j*fs7GOtlqrN9EHYrqT{=e(fXoNv``U0b+rpdbvyyz) z?Z#Q`WNIbDb8^bJb$08=_GV#5mm_DZPJaD7slN9xP9L?*ga&1S5?!eL=G_z#Nd zc&`yR=x{L3yGSLTDXY8JQWLRH2W^R(`ZKqei>O-V22t^1;gWEyPgrn1L6lQhw4fPb zEeB6*TaO&#w<-H)QO8w?U8lXN=LRi$*#i^sg-;mw+|kCy^1qQ+{3r4fr+?j0Y&4X$ zS!-*5b2Q>YUg(o>e5pDUbm})qs^~Yk7B;K#lDr>)P4M}b@2%OZ#m|8B3}9dFHr)QE zR$V=`*x?UCUABd9i!u>Xi=}wdX2|O4(Ew4-rdOS?nn#~xR=PU%J6EH{NwenqRpFb{ zc4n-PGBPDCwj08~kjLD%XwDY&Q8<7CM~7zFyr|!O7aOhQfnUE0Xz%=+JiQ?H_qflM zu+ZVuJckAssCq7w0z6;*bE(e2h2sdLC{yrIt7eARGES*(eOAl$H6ylO=kDPr4V6`l zx{eB7N8K)By4pFs9b4JWO(q(BQDik4VQWkO4;eEysZ-Jhn5ToABX-L`FC0U(fwmbPz;-}2T}t*2|* zDL(y_wVr!AMNpY$9d3txUqpdpVbsvYBIG`tpMxXmH=WeMLPh(n!C=UZ%^B4Q=s3c6 zd#2i%wb^BV=}3n2)-#|Y59bt;*f_dv5hV>1**P7)pqsKt;@*B@CF!@edmRvX0?-r`w%pVTg=bhLoS6$xO&lcWH>lRansN;6Hu*=EKIq(pB`jli+d*M}ewWI&cdSbnWW4ayW z^!jF`x#6JYP_+QAuI^jYo7FjPmW>SBE;re`)(;7b_}u@kqjlj7x&D}%Sar=E5{iR^ z&r;DeY5vxknwKFz-#KZ4gORaF9xUA@z z7~?YI$E@uCw_H=1^m1(|k(U1mFpDgo>nax5z7;z#c3=(xW%^pMyr#Nl`#NHn#3^{= zJvSaDGFmjaFdOVgE$TX2=*kIz2s0bYk!^0P=5%17kh$w3qXR&hIFb?VQ7tWL;t?$8 z3UcxvbAFaa0Ej>#n4@i(4SX&%D(r;_OPKWe{4j&MUQdJL$QKoqsO#>ZQUkRYu5bgP zGBY(%bbEHFtnU4N9VGWrw#YU8?S%ErY!Dtrf5)vG_v6gXf0lCPeYDZwibAy$il^;* z%W_0yg>cHsFIP@lp0WL#Z_ct2*cOu6>?&B)vW2Lqjo#ks8~QCpt!jn>K&G@_{?Rq} zm==)X$wV5?%$n4q1g8XazPEXsrl|!)eP37erac$GETS;EKc@Elz+M|(CnqOo2+JB` z89V!&)DFnawXys^it8Mgft3mn>8sC6Sp6*GVh$s8^m!pF8a&_XahuC+@f8TY;|R2` zH{S;FdEA_OlVXo$1>;i>D#Jk0aHR3O%TFG8&Su~}pSBxAfjgI{C4$0p85Eubr5QW5 zY~efJ!m2(obpHn8(G0Df5g(*H_2$C4d6uU=1&xAxG|Sk}evR56nm`k0mOsey1m)$l zJNgyina3lZ`*%L6tuMC{noTs82VMx(Ss^B^s(=pTAsQ&7NQhB|-l$VSw^kaD^DW8b z5#0~l^O5ic59BM|mKM~Ale`$(4RerhG1m;_2`?;Q82jErH6uKtu3I14+oX=KWjL=q zt5vO%fqk0jY`&1?I6?(&*Y|E!T%!!~v$*UZhTbs%$f}owXB5~+_w;Mp6<><=JO$(I z0T?LFkO@hNsKN<#^_TlqZkNS{2WJuI8rR!OBVsEjTF^g?ONw?P*i7FobaYh7Wp^IZ zoEVfvQ1E9p^AVrC`((Y>u$2q9wk+QUrv=q5UDj#7js9Nn!8pNuKnPE6nH zlphGM+k?TX;M3vyP0tfWdWO28msp9g&|e3RUNU4NZd|-k zc%{d#NDzU=HLuIRwjH%t2v7rE3%PPYMuUxNra#c~1d{KAf0aTcuyd=B9SR_fzrX(x zEt}Wj1YDz_C+77XZ85!nb2wq1S`9*j^x@vstNpd5zO2QeLu7nH!UvZT&?OBp$gB(X z8`cM-C+8r8OHW#r)U#RTXkIb#%;vp@OwqlzV2sZVyG?z%a$m+!!Em~zE#RL`WH!A;i=98K2`ffUoyx#qg_%kQ`^a^=Vp9ZgVNtf3Jhe0VoZ z=Hpg{t%^#Ax7LDyEq<%#+3JOHba7nwkBV|)AeZ8yo#ycI$i+ekb`fyR5xV$2&V_lT z?uRcQdCa1zp%Ld251LwDAu1RUpSb#Db=R<)@kxMQ*d*i6@1rJRB ztoV0e#YrP*`^x7$FIH6Hk4Pz{d|uxI?z!mKy#Hm?V_Vr?UZ>hgGXQqT{@lCY0EF3B zSA7zefU9`%j*PSz<;I=q9uH5 zcBTHGvin0Uysnn-j0)3EpR&LF_B2B$-2mp7tm95hWYdtPNEcMY#0!M*(=JGt7Pr#> zM*8T_kGC&A@NacK$BZt2`$lm8n9U$E!*1oaZbAx z{XPq(nQN}Hx$zLNo&NadjhKSOp+%wcux3KyQ{yz#_(>D8`-YEW>FF7Xh{wuPUp##*e7&iq{#Sh?VCdA#{dwxm zz~H+U^N8m0^yKr4#H6QJ`UvXh)@0JEpo%`s<)=A4VtdV`3AH7InrYu3_JH2RHy*`K z$;`n*-@Qo>_;?7s+WFEWHz8Ig%Nz1&6ApDVDI zC19187ZrsJ4atgS%_Li}qz?nKBAc|d^wjQ|R&os+E>L~~eObUQWKrez1DBAH>G|_p zg=3ovx3P1MGpPo~(do%`NKEiROaY20ZNzVJ9Im1-J2fz&vngY8 zZliZ)P{21;0eHT+OoviDJW~X)E8sE?4m=V!Cq#e=;)RHa$f5(_$21v;V*uQwJ@kx> zb>_0f_QP4?>8ZSC4il>C6B{^IX^fNtR)9yy0qB{iZN8ox6{8^p99*V=GL1(IYMESf zYKx~!odF+@o?S1fF0`?-GBfhAmG%tQQ@RX(A!k_mWTPGibQbr?gq~ZV`QEb3u)CKs$Bw<QKc`OeDGQ6%$ZXF5J#Cb`=C$^|f#s2h_*t2J8OxPcDvmlA8v zU`S_iVHRMJYUu(@0vmuMF%bnM)Y(~``-C*JKL+%D0UyZa*)9~Yw7z6zh1lGcAFc02 z$)}krye5E+cm@zR3mDX(4pId!3(uC)$i!5feX`Q`^&w4h^2|QX1KG zy)8%$zV|(9Qk-OKlwx;bg@dtsS_H;yJ6wJHH!5$lC0}^zt#K7OyP&`1Gav+x`ab302u2xb)9M(^Sd2({pKIHCa zEZofY&v&Zk!-&UR{f_w~FkaH*XTFCaxF*U4m(JJi;a_3p~ zA49i(wVEw>kXnU_Q<~KrjD<#sxU%eeX3pf8OcZ|m7IR7|uy~B}|CqM6y1Iymv$WAB zMfAI4$qM)3!|4VKqS)A2{w#h=O_C%NMJ=t5fC-PHWpDkb(Qj?%5kg%2LfALl%IPg1 zJsWF(YkfqWb*}f-Sy|FhX-q=GyF&R)z;&lxhGPKKNI>(mq<0Ny^G8PJfBW`q(F=G_ zQ*GPdcVM6nhndv#EfSt*Yjfg5N>~|*jHztpgO!z9l`$^a>Vfza;7vTuuy&QGv4Zfd} z&>wP+*08URq%~I39KxQu^aX)DPHt4W^5&`C%DtnRUdKHY)n`wCH)@NK>Ix?iP;b+4 z6bNgg((P`%nDPxXr<5MaAUULkUc`rrZgr`J-_k;B6j}Xlaplf1#G`fB`vetasEa6* zl=T1;6_`vkTe*dFCX`(M;^#g%H*xHn98IG@w8<^l&)h*^9W=^ZY=#;}ptpP1ACyU^ zsG+{D<|X@_C^Tbw+n@zXSc@VVUcG1H-=f4(i5}#M^~6cLW?ezSmc)A?)nF)`nnaXE|#J z?@i)h1xiUCfeAr+Ce!ZiyLWEW`v$CHX*mfY{>0%%O=v{s0nvi2XzF7#iRl9A#RVTs zW@i94pczj8v>=kAc2+5amg~1?J*ImPXb&1S9{ih|_|2Uy0FU_N%AIfZ+#JovRm)Ge z2iylh{sFA?UAFBvBa5%^Q0f*IurMqM%M8IpaNR8Uh_iuvq{O2BzE}mi|+JHk1 z4=g)v4xNEC`K`2+ZLuZnv-jyzly(4QN$9{bHjQOQS$npW2anD`hgk5JmPg3WjLK|F ziFLXv+ZdJDM36?q`f#D!%@6H#leuB*z25v;9?ORTo_!9Z{pKPJ=a60Q=}hIJ{bmxo z<(llnjwu`+vDFiSNJju(&b^QMjHUXBOl+kpiu^wB=)Q~X3m-GI8|-(H%{*smjc=mm z5%+VHdFZx*AQd)I;j22Mp9!X~}Ifeai$Gf^!O_yq^h zh`Y8=d7L&Lz0ls!5P!-(UzH@~8kQAEp$mR_hKWF&)YH3SB!= zU}a%30)ntquQlzsH#%3nLa*$JjLclk^zqd>+vT-w2KN$EoBJmH`z*MC^x|la5!wes zY@jKIT6D5$-FE;SatiVuh0{<6tc0Ye%YYHVLcQkPSa&4zWI+U(9b1>YDy|SS)5iLE zX^B4H$#7nZNEm%>Hp7LZll6^9jtBrs7!r31!o$dTANb!Q4e-VcvhZIU0A^AQR|8;P z#hW4$5)}n%7cRBWrB*dr9eQK(AIx_Jec~ZHJIq5bR^X|46fTd??$481SY3&X|5R0~ zVNWEaGr&z5FZ1-&sAV*?7Pyl>+{MSv)?xm+Yf4q^qqGINqT=tsz`F~K=_TSMrh7fR zr%O&#nzhD6`HhWoTKY1n&GyV-%n3J#NWQqYvM13>1oFIv7_0leu=M^)p7xbp))mTm zmWEJ0La1ce$lGDW@?>N6S^Y1$azxL9)J>>{nWw2OD#QBf_}0ekd~d))OTT5Z__(RP znhTOtd2?pkNWU#y2?~{iFg-B^2cQt7HSI;$QVh(YjGUOGTlE=sl9W=2pJ(kSU$raQ z2BkQKW3&fne`e|Hr-amaQ>6>EDJWf)1MwD0H zEbL+ER$i7_jNYH`Uf-F^V{l#^d`#+S8cme`bl=nVdN5{bZZl2I$Jn#~;HB)?>Kb9& zx5_?KAK3JuBC~jdPdDvm=6I{LiK=?+b*<|*?8kNaQ(`BBp%}6-!66g)nKwPY<>d0# z1O+%&p?g)cna*2R%_I2Sl-X6n#jF(ya)GbxupIK4-^hiQ0Y2_%(3QWQ_$ZmM zmqz|RI)I6oHKf~FrrYJyaJ%jgCY4~qEw>MNg*w0QGfCJNhiciAU=z(h27O|3ay%-on0#nbtewG0HvTqT z8R%0U_VN29#D=Zzf9nj;Y;m)9{5tmo;K^iSZd}kLc1*Uy?frqlZoc6=aEdiJRgj|3 zoFSE)Ew2@s03~nIO|p_BrLOWD;q$WggWXyTmw#}O{ZSh1BKTM&hXlg3X8pZ3$P0pZV78Ph$(L+<^3Q-gDHHfpiIzr z%a8>WS#hy$$OA$q?tI>GP}VO~X@jxtvWC{M&C`2B(W`<7S#r+^d^5yF`-c&*j*}o& zKAz`M2etj@|>>+9tb z*-_&38D~rRJM8m4M+`M&0+(_3%i61Up|I+gj*YMDrW8+H*TJ@`OwOuKp^;sY19m(-Tp1lT_ z59~g6a~bh^m~-j4rwrp>>W*zWo50&^+`;gBc(dn!D@~!4@JbmAVlXm*sG8cj>~EuN zpls{A7lZ8?ev$D(j9kup=~Qsz59z1nx=r5H7MagH!OV)!*N|(kr3)V~g{;)zSJZep z5|EIjJGlIm_VO#8s>-@0-qk;^|Is0j6k>`@;DA2{6bd+Hn<-?^n@nQ}@pC8U1D0U^ zO|y86o0HTVNY_Eb>{BH1FJ^|MUn7sN+kEaLbuhPrmO9o)3{QlW4^lyGZfiUv&DL_c zM&1V{7S})BY1FCVU#IMtLKQPZId}&$uWql{TwpcR-3^}me9{=LsFVRY-`DH8(AH)O z{q&s~;W#ix7bD_*90ql*WeOf&Bcye5~V1n z#Ug-}O=!2K;;pb4_(hwr5D(iE3u&b{6z7T_I$$k`O%dqIuA~7K87IQ6r|&el?$5Jn zRATuq2->Xvv3OraQJ-u&J_Q)tpCtSjOPn@-eZRi6>?8O?>;S+0t*JV1QQ)|e%}JA^ zROta4&_~lx=UY#H*OmMKsk=XvXf&9| zN}xP98Az`RnHU)Cm+p~rIZm=omTAMHC2smr$b08*B?aa?r#I)6A1xM2XrBk+7ebzO z+blXwJ#e+A^BI7s0>@r-`0#xRvxPIc)xFKuFc-og! zJF?!-(-sfi`t&J9!N*50KmssTzSM3NS#06ym|vkV6$fiU9N7QI3m_mKMbY((2s3C4 zT`-ymyZ3Y7`D47lP1qKW!5H5Fv_$iKYHErFXR#?Lp{#@rpaJmho=qn}{U#Uy(tWm{ zg=U0!n*z$NZ{3g54%?=ifuIS8E&MZdxleds!0jf|TA$b}i>D2Jp*!#4N}?#8DfGik zP|Sp3Dp$Gjrh@2!v&t|m@-UkBnLk=g4UIz zD)zb8)tXe^3HdNPoS#ioU-yj{+cDDX?M#0a{zt52V$#EPC`^$mZO|KYE!sYkJuGAE zJ($O={BmG;H6GTU^EP>}`~BDWbp7su!mRi3sA82_))KOhGbgBTGrZHXW8GYMI{9C{ zF3_GQTiNpPDCXCMU+1RI+~!Wq4ib{$QmZHwW~8sTb2??Owut6OrhGXq9@w%WI`%b% zTMm~>0^dv7WBBW1=5w9fguWc_qj)#tp2`-Z?}*e1>QcHAu(n{g#;q$I_0jO!7bE51WQIlZ zd5vp-1svmY@}P;9KPi*4%?KKlpI2}LKHC!og^74CUYMEI(Km03kNQ(+rqvjeakF2b$Wop-~EOfUAiP*l1km3Q`RH9z~`KPzi4`TELw9S0LY&bY~KL|X3 z?KNMa{!CO<^mu3Ivnldw==f%`Ohn+3pAGC@`*z1E=7dRHqT%>$SFVCUd7+d1Vp#?$ z*<3kMI)RzLk?(h|CS9g^jZ3{Uq7H3P%KLp$y>6rE#a-fTr4)Vt0WXIS0dJ|{+|@F! zm$e73D`R|OPr?U=ETId?3ClwGWvlAC!&AkyMgq%A%DW8bufomWH=A0gf(E1~ z9uiZ%cjWgXeCT{*;~+qOHJP1sE$+h?kThJV*)KmVbJ&t4EYHONlHq~ z-|~JY*h*xmaaZ(|e)s&6j8(qvvW+NVdRXbcEkLlEaIik3b4D{rE--89AE*|*YYAU8@%ahDd~mU zl9?sjQZ-DRiX#dDHK zg?gXvoRh{AtMwk@r#;vBl1>(1@`qN0&L0Lodh_6UV8e&J!u{I@{fnLqNh-fmR8?UJDbb2vNe3%17R#B5oTYZRu^&&>d)|?o z(f#Nqd*tp$I?;UA7VNg>dd{@ErZ9XRFxAQyc$01xJINV5hkzULI9*1$H@=5rU^~Js z=yilyrrF?XsB9`WAtFrr|06_$#m2O13rI8j?q(!}31Y+7xWpv1l*2adPS4-O{jRAWCiD z3nQ-XB7o}$X_ns#HGDcUBr30e7&o0mfwy>%bZhSu0O-efnku-NJ_tIyXp*>W4Yg*{ zCM8D|?p8EB%ipVZkrcPc-b-OEH+}kt2IpdkpA5+T$N@#;jEyL*UU@39m0zI?ZjY14 z(lT0gs_(~0*!8QvyFqoFMGqd2E9yQeX&4Z4`BKG$zBZ2G0$oJ5*X9Pj$XUO)82H#f zbLtDnxEkS+g;YJ)wsnC{r#pC$vbysVwcYGe8F(DDx7@BkNWGKWi>Eqoy{c%%2rQEW z|4@mHeU_Gu#5IL|QV@3E?`YE(5lV>*v927?k-__{C@kWZo5RLx&JZNj%4*f>Wy+I0 zrWPNY?W4hA7;aZLS?|tjsqD!1>UIAr`yd^J=#82SpGNi2GD=8j*w^i5PgoUi&bKi4 zMa72Ek+10@+*ag>7c#ST&UmyJkDu$x%0*htWMJ#r;n~K;y>Ro82*MQ%KBD6hh#^g- zS~_6#?H3pdciNpSm&oee5hq1!lK9oMg^aXlK$BzW8Ist3g(-9LB%|4vwP@d5<%(MT z(vt>-tM$`(dMsji(Y@kJFweE1^3(^!&V~p4{1?Nk*Q}c)*Uc4NxZ-nTE9HoHUF_p6 zHGgP?TJO*#o7K(de&O&N`1$si6f)`=;9;&7y9|sDhzyJ{z$&-KRD7E5`$8nA+((Gx zQ0qaTKU*f#L^RA~x{7NyBROIQowFRS5bA*?!!ppZoZup%Vk$W;h^yKT8DlFkF zpE*(dyRT?6EN2oJT#mR?{*=mB%GBn=_{-61%G@=6<>;P0_g}A`R9lxyjf<&Ir3*rr zCA$-9_PeAY+be!+y_9N8;lEjc-`lfbG5ujKs#VP2uM96I|NhC$xS%7k5ZQ}5w)?ck zzzh8i2dyu_jN>U_9~;f+2q3wq%5xr*G_s;f!@72_Zh+_Ek#jOxsps=aQmtMvjbSPf#bBN zq6I;4wHrub>;zw{?5@<~p*w}MjEHwMa4PQcp0iFvhqo*5Is1{yj^@yAUzKy=JXtu^ zM1Pl3jTgaDBKJTG>+5E7K0#%FjYWrjf~I2LbhgOWnWG;%^QcHqm0(%UOsKq7Mw*nG z$%EnLhOe~k`T=4^p7CYH-mW&`%}-1X>g}j7PEIH#{L<=d7pm-*gh6XnQTR@I%juA{ z;f`CgcFhNsQv8l)_@OaKREfHe(`41u2n~JDiEJ0@%t4PceNX@OM$_q`zhM3lgv$uD ziK@r8BXtL>OXNAaxe1849h$8M-0|z&<(29pJfU;_vdYlqsNY9RHk4-<&;?mVdAfZ{ z*I@EQN(8w`OQ)YS3M97%y3?23l3 zzOkh)nzBwm3VK?LA;_p~rscFcwfZ+EKbE?g_)hXRV~?En*`kD~oUAOBGaEnQ?)h!f z7~ZaspIz?;6t?uXU)DLSqUcLfCc|z1-r?fulXS00lj}YjjIbkx%wo&zf?9|ts&)6c zScw9LNpIg)O|%kyp{a;e_vd04l9oEf+mdc&o~6LMk~4KTaDEP=B{8mkoV;Bu zYOmiEAYO~#;FHqtT!jZa-P>}{5u`6y?#}P7?QZ_;xZdi#E=FNw^n(;ld`ZII7PZM= z-GV3id8*>ywJRjJ!u;OPt_i_-iU->qB(Yq_k~Ql(*Q{DChLQQ+Dn{;4;-?O_w~lI6 zpf2o+{BhE2Yf|5_PI!8CP9y2!Lg=RZJUrV35Lp#Jyt&TxF!0{G-xc#9{dM!v3%5Ci zI7_(S>2$8yoS))io7=R{rH;$6Z>*z_F1<>xuB1qE4m#HE|iaL_Bxm zMd?dEO!4SAbBUX`@64VpiTp&mgK#v-vIPrT1r^r>+_&7h6PFAcU3MH%4mh5yS1VV# z-7>CkD7nX>7K?qF41 z{N4!`(#T0He@b#t39TEOG3SmBDHekZQ~ zPNjG&BYI=0v&HmFNKl3XQiz&Zol7{ZOEqzHwa`9e?ZXK-(nv}e-Rl?h!M%JQBZZ^F z6^LD(!`1BVns7G*N{eN7YnJ}f4d{N zH{cDS3zAVGB#PGD3CH~u&7zgbJ!5_k_ww1aurRIDIreP%PE7dz-R@K8YqYP>rC>#y z*RM*Vxhy1zl#vsdJVFSRMui=M=Ff8kqk`rSd^2(kkV@^hrx*u1-HszOK-i_cL!w zybRs%e^MGY(X~->x2|5Ih;E7@`*gy(H@P{&Ui|9wa36NRHDSivyQ9AOTk(-`s7=jc z-`%0L33T56QJD1`4>Rm$0ghK#xShwI<4gR?%QFlDO9y=>r~V0$J7;clbEPyTCZ+rc zp8PnT{D{Y?$%1zR7!k74@M%tDJ+1{smCGH7=i(R(wX+|icfI|yQd2=ccPNs@h~el| z>w^O?(Xm-AWL5)po1*{(vMhD_lZ=8ElP$M=v!qR z-rvoLz6h#{#n)(cMnQogZfeSkTfIixQD)pnx;Yyk=1&eLloV zzu5d>zNFOSlJ~{>X_#8#g8qib#`^km>_C1SzdZl;wuh+c*NjtVPn@6HeQ1u!9am+g zF2+n!LXdeM^DOJdy-Z_>gnF{J+Y|Q})QMpvocIU*&2oFn-vV3&AdsLM7L(+0^)3Q6 z=q>|CT1twXK0~q`wq$@R|MTRP8gBGUvlXVw0?bUv9o@^AL6_D;2u{k4zK;NQjB*dAJ&-&vhb>4pN%<4=4El2e$gVC|TgoGey6Dk_pY+a5jm zjq0+6xjGcz(NqRntBcvIow9|#xlRRX!rhT=v3OL~)}ONHUO8qg8gTvmHP6&_P~7!K zh0yJzR_`#cp~=Bfx9Ig^HJ_qfp6`2hNXr#1qv#(hp~y0-IBpwO154U1IxC9cm)|(P zG)0ACgqO@L8T12=`0}i(3l^LVdURiJ-H>toBZ60-01kMiUzTg zxDvqsdC*;fkyao!dmXySx8-N@nP2Mj2y;_vWMGJ6LPCY9rLcuA=^E|Jq|cv8c`q1o zORi(w+Y#XlQD<8`Q1S#8F7>j?`(KrN)aT9hRP3prmZ^c zUFzp-=9aoc4q~W*TxIdYIN4sLMFxvJLs!Gn1aEQE4DM58J1=|k%JDnX-|ZUhkQtG- z`~mIA!Q89^w6nX^mwLN91?4`-0Z(W`Y^F=zguNKMy${nZ&taDc$-9mo5tflR6Rf1m|}nAy%VLd2MKXw%~uJ>kjw|yRahP# zYS5;0xX^Iu+me)rO;Zb|Ds@UoW1!DT;!id#SDv>h^L;I8eF*n?=&S&38nQoZ52_Ah zq3CbOsy{g*(f%US_wC#7b^bhfmpJQ^NpTj&`2$8i(`+IXeM&2iEiLoH zk=snbrj{C|;b>u0VjI?(_6&akuP?bVaD!|QDcAMzjfLG{n86#?H)!Ip%JY;5f-I589PV5jOiG+marlOmI)su_1rLdq~2$P4Zd( z{l=c={gA=V69ZX_!8TY%@m7q?|FXnT!?y3=%{0mly4M-xizAWs?8dWSBh~Jw7jiym zZFBb~VxLc@Sx~?nMOUViD0_h`Mr|#WYv$`|+HDyr4^@OD$Rg=2a1bp8Am%7M*XI%k zhfhd8;3$QGdUj((wRP9S)su!XU$0WZl47)}A5Cj-+^KV|&K@or$V3@97pYP_-H*jC z^y##zUyxN2_m0iuBu43;Xv(^4zY&y_nwC*LN5AD1JrMT3dU%*RFdEUjp)Try~oZ{{o7EC za#HvD>{@9RB^q=^WJPrqBGjM3sZ-Cfv)A$WChH}7L)^i+Y1lk3)n#zJ?C={(qsCt9 zSC;URF=;+3(2-{+W@CYhOq81H`oSl7J?Iu5Th|-!3a8{{rzmKZ5UWkCK$I$6kz44!@v~@;)i-bjqYo;*A^e64D zsPZ2k`8S@qyV%FS-*ElCbHX`I9cJ9{-0BVw`r9#fNn8-(2gk>9=XkBE?$h)#Y*@B> z;8LeFzs*DmJ7TSod-C#j2Z)Bf`0kbPe%sY(LQO86`r>ey0e-OL;&b?Cqb?_5sd+|o zWc1lavcN}tE)K8E?tPW2@b<0^SJMeKN+lX|T(%cMKW8QWxJ#@L3wKqn+$Bi1aZZ69 z($PKM?FPS}haI^-CoGJE{-Jwv+~*C?n;Ikj96PG73__&c8$XX%<-Xd#wVcgqFPu76 zo_fam=>}gg^d7H`qWB;h4N2a6{(rf|cWC{q*uDgV!=X?f@$kayV#MpM82*_3xj|8#gGBjRmI=I+VI|fphx}MhS znci`v$tuhVJu|hJjM)TTArib-yuY)Kh0T+#uyc{BK>&Hhfb?myb_stT$@3aQ8Uq{SVnldrQg;9%n@x2xOWU$iQ zn)V(UJHq0&Zb&qI=Jg6E$81_#WM~2Lm*DR_6%DBzjE+TWTH;?Qx zquTGGOeUsM4JIxm^EHK;4GKsvWN=l~)ZWVN)abO%Zi#8~Xri|g`q+uSEag^M+L?X7> zMv9XTje0nS$cK|-u1W1i7~YAkuM^kc&i4QXhnME>x%MvY3i~vF5R<-gvv7cYy|G+g z*I@`iS!!3>Izv2)2+fnu=d6+63&=xIa0=QkO75=?4_=yymqV_;?M)u_J1Sa*dajB3ky7KKD<@WUm zRIG}1M#e0R(NxCw-!r4Fc{UuroBVi8NvTja95iRryWzGrxu!L3JZ(EG1@j9GtwOPcmcfe>2l#MbsWW0zbaV&Xf%uv%)vddM)(;OOPYW_H4R9jN_ZJZ{%BO$n*^U~p9WpI)aAj5^XA!aO@DJ4H{toPhmK*x z@{k1n`vq|tZallVzI^wQ&B1a5bN376s;X)LIxo`VV!nZPxmU-j=MxUhlo;II^!n)6 z5AQBzV<~fbB$uMYO{#S&Q`p%NH_tIw!^1nhcGNojajWK_TII*l?phb4GdSeVKd${< zq@GzQ($-mt)*Zh~`)SS`{^>yv|CzFLe8QS*LYC2EGTpNcE!QSN`L&%$eh=U4xlHE( z4Q`HP$0h8xr3*Tgg$ssSb3qO0Z8B;qNv!UEo@PWxTYx~{g-d)xF?~S02bBMfa;jT` z=4HQ$w$5;oBT~60dbtGhA@hi3|Af7jpb;U*`;hf^S-DPsO=SdZbpT?#ln?uHPz$q` zk*JF;8^w|(6(9}a{bY_+=lnqxEsG;~mDPE}<@+a-&Y9DpfVlQ8@&o%U;rO_A($~yK zw~K;S61*!;`0=w3A{aso3R0rdwP$A4)H}^4lCB}}%Uw!sY-yDW9cwrcf2>P2KJJJoKf4i%ad<^eYpS z2S`B1^ruYp@9^CHa3VHl4h}I}zvABa2W5G~t_O=`nKS+$m7vQU9ig`iAI|rurrgSH z>lgM`RwTq7U%vb#@iHStPEBpo>P*0FlYH#e>Vw#NU`Hk)zIqUtg8Aoa zc~3q08lK8V%@*E8w2@K76WyB{q8pLxa4`9Bu6v(hIR%p&wVLJNY1Yf_a4VUJ;SUrG z8Q;bp+ckI+USCje2EsKQ4|3biV;>426B2L6BqpjJkYw^F+w2y2VWjX9sB?7{*LA)d zL1qf*^q=w6PftNwt~5mtM8hd6=o9m~d>$4Cip`C&zkmN`EiWzwHH>QC>$*wSPn{AL z06A?QccoIC1aK@$XJ==W8uB6RfiR;mWn7+dy*7(u!y~LS_+A+WTK^elXcR7;Fwghw z*eV?kiLu+_v?J$gfbrt%)?{s7A`_SEKvvrdnLCa~C3xZ0G|5gdU(%m1*zd0VOGpL2 zy%QVtqTCOm5T ziAF%7UhS9<^20Jukvqh42)kijWUENMNuAdYGugjf04dU;=GkYGb6O0U@U(D19Iiknl&KWc5@Ddfj^@^X>e zoV9emX1nZ~`iE?B3IpdslSG2|S)jkeR0YP!jd*GnR6m#AOV(~Y7$Ug)$9Ajy2Xt9D zC^?P>^8?}>v;!iS@n(CAX#K)k!Wg z?p&JYosk@PAy(0+rXIb-+ZCX)?DRdWfc1!ni8?{Ok7+*uv0kek z*#`T{im|3jBxJl-q8;huCSAI5=H&a(!%lyPkC1?qllR$?vf_=-Rhu!d`jPZE_W5l|g;A z=BxTN)tj{kyS!G!F{90zvOlJOifj$-AGDwtO-NSusCnvX$78Ny(QnVrYJsLu?PfPB z{U#S$VPPPD(ctx`z)RZ?#JvwoO;_R9koHUb?&mIrZ@gnv2wUBuGoYPHvNKK8f;1Qq8dVN}d@nu}A<)v(R zL>JUGPFGFJr2;jsH?p6NsN;XdkoB2D*J6!D-x)QaQ#JA_5S@jY`LA?A0*CGxZz&J} zxqAVSJs%-&0ry35veZ&Hv4>M{K=GQIM!|PKC%oTYu{~^HAnrA5Q-2VPHx4~+Z*i*9 zK^EL6b?n?peDQ=M!6@CKIjF@%kbyH$hO;>^@EVhOz!{&2Oa`<>Ty76@EW$e~gv%!% zuAVlW{5JVm#HeA(#c9@VsV*uSS)i!=OB|rKKgj zA57wBLPDhV3;O15lCf2w0sPhLH&q^k6RhzaNZ|OzZ$+6RZ{ovId%!{lf|=DI>7zFC%4Y`E=TU6ryrv)qvMDZiHp;GYo;KoQi-v6 z+W{LG5DGP^ihZox{Kh??VnC~m($}^YvLw3pwdm|kuk*nsi+Ube1`nmV(&5^;o^!X* zdiO2*aYMn;O6C{{OOIi zXh(vEX;D45a$5oEQ>T;3vLa_x%mJfd-sKnxbJxHx=>SN`rg1KLjfrv-OT!&|<28R2 zv9b&fG8gIOT%Z4qSk+%<8@WrV5?%Is*dk_5=7RNHQh2JIo$p+>1~=b?*536emlWpi zGA;_5iQ1PZ$xmuwz5XRNO{q1!;_nl~6HNepRUeM_E&gD$=pt}8@ptX`dh6_WPtlg^ zEB2e&hpGO=KsZacwrZiH^EVaL9G zTh4;3KW)`(XnnrTeaI`gyfltoh{+L}4h5mnZWR1%a5(zM>m&zi!{K9H>ON{}k6L}- z!f5B0Z#$u{1d7EGsm=pEc~K4semuSpAL+bm={Xpv6Z=DqJ6d@YW0PHT*AW(qsmE6+ zD99Dsim$%>E@PX&nzaiq%R!-Qsxx|3pTHz@&5xbHVK~?~n%en%H(8^3=&q^f0v0U} zw0&mq6UH)B{Fe>5Tb&-%mMyVl;-wT?KhP2xKXBaQEY7kc>e!L~sDN!%D5lkHC#K)* zW?PYJI2_jwwv~C7I@Yc9{=@2J50&H(=2iv&Oedahmitpoi6It}77cz5HMHk521=I@ z&olMmo4v=VOJXxP8@}26hw4($47l`VZC)<55XCeH3Yh;fvZp7)=XmX7UvzWww{PFh z@MS>Uxa0PZpQL6B#&hqam5O558!dk|rQqRfM2eG&)K+%;lu*(9R~^k;h^UgB6x(v+ zmce1K%1ClMxjx)5%A=L>s|Cr_u&^Rh>AYJ>9ae#}V}j-xxq({i!`JSg*WR3OXYJyu zEqzjs=(Lf2Fr{Q#$=OPs;T8H8fO2iarE&+c-n8_F>w+ zz2gqR>HA74HD9p8o$K!oG-?|*vMiDRe)GKByKcDsWRI{Ihm82_e|2WBERuwmJB4TC zld1cfR*q7KLtmALCkzyYO%4=X+5CQ?u4TrqPgk6<8WJ)u`$5Ci43c!2*VCg36?pu^ zytuBWL@n(x@e{?Fqo|pu20HH*X8aQISLWm|Vm_{S>t>$uTP`T8E7CC+oF=7hvqIRy zrN&r%FH;pP6d0X-l#t) zQV=;3aLC{p&7FcMDn#S72abF;WnXFkbom^sn0MNkKXuGC@a8?q3kK>wH;PVzMCH=N zbJSqDiy-E?nU;C`wg{25yQAuxhQ(KaGpbhGFGLnYH{zVRq7>ESngbqb+8)` z?!Su{l98=_}4(+=ge z?WN7N%>vG9)8@-BUxH0|bUkjjV6&B;WQ@?w+OQc-1rN1b!uVvQt}c}3hO~`gk43>XMCT z(fHpUQE415E4+1hEKnW)%#w`7QC8G*^c7lNnXh{)PN#~_#4PQ@l8syUn3J2~*?@I> z|B@s-6v~Li$&;9C3emi#!+XKjV`_uMoS_=B_QnwJ`xCEtD+}G*$V_8?Z4d87RiAoi z27C7O;Xdg-=5Q&Ow?b3|v7UB+guH4G)wm8Y)t)Y#Wz3O{+$l1NCwU%iu3l=^qvC4> z<+ce{Ij^-}THV15iU#jdFMAS=HhLsFjbmO39E|*EzK`mqesbJYB!5SKMD;zlUB;y* z{vwcZLbTDb3At41X{NsX@L-0(XX#PW(o6>L+rguBEXTPFC(m`Ajjq&$3%$nUU~k z7q3SdeScP&$Mw+B(NT|gdml~LP`ad8Httnl+S*ry;b*q;hL%d{0zvWH??r}68YqeP zYH;#@Ajend|I<2O8fm}3rEzj{MqI`zCkYu#wU#rD9b8lNuMA`Px3y=_$h#K?Cf?mW z!bC%((Hz;CbPYCeVpuz$?Ngx$&QjG=dn7rWkaakp3_4q$cXrYW$wjcRvTPNNx9*$k z=MmWStRBk7?JKl-3nLY+zvc~p$^GbMZ%DGkNX_ntU~<=?c?V&)!+~HNieh?lLAymQ z^9(wfuWHF36t3cHeG>}YoNb1T<%vi-cm%|v2@74 z*bV@KS)ghSO*F)-s;;&I>18b>S%HyHF$YNgH@B{lPXm23p!v&ZtK++oGiBXtS6^*2 z!#OcwE%?++AK1rS9L`BYtnV1r)L+lIF51Cah&o(znD?!Sx~G7Dl1Hn-h{aj5CFZe* zX1b=`y?!t3Z(*V30_yW#r-qSl_a^Goo(R5c_z)CURMng9bN9icOeqI>05cie{ z*$&dg=w?$iNdzC;3-=~B(+3+Q{p)QbS<3F+6}?u!?#FXVsh*vIP+OsnUTgd87$y1Zpa2l9?31G2%2`(CWv`}e8N z8hFJ;a<6)V#y3>%KiMJ&(zGpfR+@5l(b?+xo3c^~Gfoem63JMFIo=Gsl#>g$nksW- zvwF4q)yy(3>D$<>v05?XwPwXAu%_EgcV9MM?f3+2WgMb^sYO%Y6K^yiLj%qD-+$$={Bfs+Y2v3o&gRw| zm}&Zlw-$PJF5l25F~cusTF0ht@n8TV^2Wg62(|TPFPEkwyd3n7m$hE#h$A)$z&L)R zr?=>-_;kNX$-;sW2o*y)(@d5Sk26L?|=+EwWU{VrqwE1F- zr*m2Romboaoj3TpK3fi|%LWr-Ih|HISnaxH?jukQ5cum=u)e;I24!rjr~oSU@X2aR z2r|DR=jQ*_ZR}`OF!x=czOwCiZFDFP1-$fA{V&IHi11=^!pd=sihrL7aM#*8UhBqrkNS}m4fIR<-Co5weBDR-e?FvfZbwS- z3L5^m#qhHvAoRaTy-tvdN~miZJNr)p42HC$xIXp_JZi2?Eg9p7=X)N2Q%#rVEYtUnte#!4xgYFwc1 zem>?(LtT&V{ehX*cq+4=Yx7plgy5gy1DHAp7jC^3L5t@1=HWRtn>QC^Y~)`eBk2EL zfJ(buIOgE!i{QyMNr~>+Q~sYtzY5+n=jORhZ3|6SexI0N8YgVx&zTL1p?-z9M3$08B`cRK$bGvW(`&#W&juXtQN@+GUW{@$Z=!l|G2Gsm2}^aT99)+pT#JFq9|ft_B+V>VDDjI1~cd%}WNLpW<#@hK%2uFc2N5=&b6?V}46g zv{E$Jo?K=vMaItxj3taX5qVE?t-x>WDMS6v{yq8E&)<#zo`+W$em_BJT)2~lg`JK0 z+Aa2KfAq}v?me3xWBg%VLwUk8eXMbgAcn}QtAkjC^j8^3q~6}9;^DFC>_HTEj@K0CYpkS=jcHQ_Y}dn> z?gqLi^HA}nf*2x5cyFrY4{9Q>cy5#T%*-gCvJ{%f?^#({oyQ^>wE3Wl0Yvl6X+L6= zdi{RI!u0OOlZdH{K|axG$<@?Rh!ROiPM)0O4efq}tomH-2dsUTF#VsW_Q73?s5av` zfeU4On#Ef?us^eG0njf3iDHQmoYZpG;NRK%%Jj0f&X%;J z?WSnbhlz7eFj2!0d2#AUXyX(RN8=b*eu0t|DJY|-O22ft2>BBHGD|H5^6!Kil6(IwYU z>LA$iJ(nBX?_31DkHT)XF#qmh$Kea-B;j*TyXJF^^^Fa{Dxehe`rgnW1a$n0d|SKM zo(c*Ae}`8Jc_~2#aa$G^7WhqS9;? zgP2d0J+HfRt$#4n<)x6VDzM*k3NzqnM=H0p0;cBnh72@m;4o6vv_v}Ns z*pvOlb=O~(lgwLw5d;QB)rU1@(Yp|tu*k@d$E4(Prt}_=-UHQf$TvvMYz{K$UcN zfjU56=BzogqI(tS$Q(I=iH~_n6+Yo2~ei+8-sOuI{35xi~6#1`%ALzkW-M1O(GfCN>8T=WA0}SB-!v z5ST3Y-xkaCOIPecYIuu|y*Z_6U;%PKq(z4f#VI77Z>!Yp6}y1 zSI2lx|K&w9Y}{8Es7#-1*x8Mp3vLDt2e~AEd=Xvteu+OUThe7_MNM=$*6(P4ncrsm zx$Qh3xB}deK*ZUO^B_YalEd-pOJMJ&?Byj4%BQbhzXpC=G%>O7tB>01?N@#QQpF3e zKg__a52&G?X5QcIM3RX9y}?7@S^kQhfXWUg78U~dKLP5`Fxd?44)woLy8TuAbR7=L zY%H{tXViV26JCGRma8%BBt!`;#yGj!E~Q>m2uT8sX=QN)Jpq%VNnd39V*&!{_r=pA z$zrK~w>RBJw%{3$YeugfX9bF0j!7Q^+dp&~uh4#K|1{%7y}!a>p7U41*sWi@VXYyl zb}dhi>(isD`NN|n5V_g+(DQ>_ZV*TG)+^ft^vU%Ie&4&{~FNq-=bt;F2-~9j`AJ`2D z8u&-CxGVhxVr@6DYRFsVmBG`kVYcma&sO$JYh?LEc1Ga1r(JEqeO}f)*JgWQh3=t0 zlPu(jFCC1%jo8WP-g4r0J3JRX9e^Qb+LueUmVqbO=&8AUSVRO!z{^_pi>ll5_YIsG zf$3XuK^r+(sDJ+aNd$>sDdqFOYvFA(2sOk%3oFZ1Eb@AGa-e*f!takis}_Tedt=u! zZ6&yNQaF-%PM-Lc9JdoA@(y})<%Q*XI!|GL9PQsCsij5A)%piNwf-RVeBz zrt#M{PszRv@vtT)&?q(syjz_7N!k0m{Ajp{k)%Hm$p@Ah>s9hxz>-*ljC$xfc8h`k zv#X!?*Ak!r*9-Cn$8MaNOh;dX@_M=J2FSrys(rip;QDK@i{k~)8PQEcZMuc&PvQR* z@$j3Q8!hKU5>pBnNsLuC(07c4+RqafTI_iWzr_<@%@4w2VP)SOTkF2rKXn+cUhB63Po0H?Xo3lkWhuq;vAD63wtnehd zXsB<+>R0@{d;k1O7s+&N6mdT?yT8Mx_E|+n`WzV82+#0N_L|X0I$UYb+#1$=?!2w= zmGZ@BV^ZMY#;=fhhuzrt*Lhm=np&Sz5H$#uD7rKuZi&gzbxIxByi&{3w7c9t;lrO4=JWxSzTQYy``aMq~ zNw>|9R2LV;e#?oIWWJUZCA|+zM}6{pz9`h0U1@L&&%23-eFi;!^C~5OGJn0^VG@ zju~VkZtVWo3o8KI1{?2cm%aDP9ve7SRaM}HO4I(+9s=cWadFP`jc~QUom2qq0a?VO zo*2*WihNPIJ!%X`KJy4*ADT6XeV)0 zI&X0131YLqt=l_WVmAt+_eN`MZjy+BX&xC!sVjMN?)TJ8#7<0R6f3(cwpM3T-KN;| z9p6aYeKICBGc$YmT^Y&Bl0+(v2ALtC{#O%)y_LA(T|>M|qFW#NR3vVFo5Qae95T_Q zusBA1Vsg0y0ozAdIK9uY?OLG%Zbu51m&e18@bLOK+~P7}G^DFzYtlf)vswVN{d{@> zxC=QBbJVLTh|D-eQ%NX!czDF0*b?C5v-(Lm96v28a+=Y1*VG1zSgpXXS9AmUdcwH- zmoVC~zH0vIesQ_u`Bcu{eC>f1M9|iWC#$T8734Xz2Lmya+rvXc&**!UfIL%g4KYW` zhEpSeZ%Stk{w6}_-bFV6bj{SBQo1SUahh-T@R`!=Z*KR>`*pCt2=fw#i%c$s2ve1L zgxea5OP$GQ*jLayyniomkntjpy0Eb2q>tjp^J;HNHR7xvs><3VQ&b|;uZEP+k`%5i z=rHpGVEqWb|JB-onK~UE9g4bovZ$w1JmUJ#p7hI?oi7MFzz_!-n`&eYXZD0)dLa9~ja)ajl}Z&ODeY{`~CG=WtBcr^JNs!U<7hid^>e<)koZ>1osdC{{t? zG6S&;gGY?bvhOnL%UjIcEq;0a8c9wz`<3R5jn<@u$e<|HruWgj7|Xw_&!h!`A>K=z zhJ*zD-4=vQPMpTkwe?KzXB89!fFbIvQ%HK>yl~3vd@+Qcgn3uvnx)T~Pq~4(!gZcE zG*vwQaruVr#L>~iqXETX;|(PlS$#k@+BneBthOHsZK0x$FPN9juac~KIo#Tt7C9`RX|d!!P^50oX&2^ZWCjR3$5 zmMJ>M17L1XdT;xjG@LPxYy7!B`_rcZX=!>qyu48-Ktg!~xSm#J%KgHzw@ofdSM~#f z&V6CKFb&aHJMfkAZ}o9KE1x%@S?TGNR4;0>4--YdW4_bzexUu$`Aq2KT5c!jozUt+ z0aIy+UtgxgZ7SNW_?s=wD|38rird3`$Ey)ClQMntZIQ{S8ATN_Ojo%Y^A_hHQ-SMc z=d5gCJb)_zV55QoXO)JL(NmBDfC)xg-|o19_UqR#a{+2}bo98?R2y~Jdh7_!nGgJU z?B9*lmx32)DD-9p!+IL!32aB4xGpJeM5wQ&rD&4jC3AmL1nre1C6Nwh?0;XA}h{5AsCuHy0uKAr2YBS*X9-*r3ZzyLn$GC#3Xee-D+>*eccUVB)}S2lE}9P0OBH2192PQ~L_9(P&Bu$5?)e zJFP7QhAMe0ek=7|q^Rs*;T(>(Y+974Y$nUfkI6s{Lxh9@kkJ4R(G;qA3fzq{pe?1N z2-@A%^_=CW|iCC^rOI_YlW|66#S^usuRCp^IKY8?5vj{>m~7btRF+( z{Z2UkB{Hd9F{$L9P z)zHw*XdQNTqkW}b(S&}Sw#P+r-@4b!UqdM#ZrFa4(w{-ok8ovUqml&46sd$CrGAs! zPrq9j&&#GfMjWDiH7GF+j@O*x;R_;aj6Ow zJtTY0{Q3`6dd6-^rBF{+PGKFSjyxAeoU(;#r%UznWMW>}2h-&;r5ck(=3qcpHZ0Dv z>gJt-u1zv~IR>U|uDNSgF0S&fJxoWgE8EGJ*o33VhXwD}bKkAzsuBBYusXW4THkPp zy|wSPI;|cY8SI|wrqY~=W!W}iZs5fWtuBuJSoJ2Upuq%ar06hwPvxw@$&kbfRON&l+**Ds%8NsM~Le+S>r)Qizj+E!V7{dTCI*EV9#kc zxfD94gP964j&6|JsxTA&<;ntSjtv-086?PWWo6gkEB=BDD8S81`Gm&V3HBdO9KI+I z5ztGg6cStb^BVfq-j>DEcKD6NHM>`9hAdahBSNugux1-w{rNv=d(Wt-zU5oA0R;sC z6#>aA5+o{;qkw{fC^`!0Ux+;jeS-1ok` zG2WN92aTKFYt^n*HEY(ajrVcfgH?0WwsJ=6#S0*UBdyo#j~PrC@Ys&AM=opVuYknf z0R83WwHDxL9t)x!t3t#xdTm=`29~=dSildvs7h5bb1Y-Mc2@6j_QxKX9-eLbMuXnA}}||wWwf&>=GzkBaM$Zx2v|q zUDaK9K^Q!er$NlApaAO3w}B}2+N`VT5F7?8>}bD#pZaI5=SDlC7MvKV$~i3DAwD8q z%d5Le$1py=@PPHR9mMs->L9+1x*6)S4V_x*ce5T`d&O0ipc2-AaQX^W5cUa7HyR94 z4#-QV?+YfX0=&g>?eiVZpR_}>nJ62nozdc$=N!MXG}%MC=01>)G)oIXSuQ_LR4` z_rb|YxpoZ|FRHaX#**7+Bqm!{sV5oklY#KkwGiP{=x4D$uaB3>NOS)XGr9#%Pz`=! z&xo8lD*2)$k#}AHil%M+QK^)h8^N6SneJ4DbvK}FxZ94^j)oO6rwy_WY;@ZUuoe2_-Z67N zjrE=C?(WWXPXhhhEPtTDj0&IuId$J=fvhk%$hB9w{^OiS?)YM*4ZVjC&nC@n!jkh@ z9ODayakbxT~Yzmt>JKer-KdpC{c0|=~2!PMS&ZryhB3H(iM z1mp*2XQspL$pA5t4Qf03n;vra!2?wzB*EjZEL9eFmtyOBkDbR^jiJW+=(qi;MfZkK zsTSR5ISVaSUMu+HMf#?@o@GHtIo~%^hgp%Q>7hPYzNuB@R!Wj)gQks&+zRFY+3VJN z30t4^l1vc~g7P>0+zlv=jXq)!k8Gzai}+wT;{5$yc%J6R5w)1@CJ6A*e7U!l1kd)1mgt9JrT<)$r*HZhym>l7%)72s|RC7akog?d2tm>-*WHH*S(=r|QoQ{Bl3;Q(LpcE59mc zY%kP-{aK-1TuF|-&d)&X6vNqzueiM8IoU~PlTVg7LKBmHv2W5A?tUG)-8?%=11XU2 z&apH#rx?QKi%!!U>h@=di>Tp)Gcu(q@k*05Pm)P7EULpi{wyr@{Sc) z?3Kx)ANMcS^n`Yz|0&x_VD%BDsQS!7{A%KY-`)b@N+|8MNws=!0u;3;jwf(5PooSJ zt(Ch8ad8Iu?oYzDg{M!G3@{}}uMuxs4T^I|SZ|!;7*l!hl9vwAUc}_` z{HWtB_M)YBne>AgTQyIVbD+b zYpp)^_@OHD)8y=gXu9OTfARbs1r{u-w`TUn@w7j9LSv_4YTnRJBrDkQ8L9-DN6!TO zdbKOUA`-roNy|206NCESwK(qSEb&bD|~xBVaR&qW5TNkAho$=`$Ks!xKgDJ&;p~4}lkV2dXOvWmdYAmGF&~6>hSs8GsAWV7iW&PTqeVM>vSvvp@RZ1VZa{l>|MOB z78!ndRBGq!(A1WLs;=!463aI%g!&*4r)x0z0SFyrVC9j^S-ZIAspp@l!^#X))M8od z*oC$1Ro<4GzeEA@o3?flv7*;cu6;Xa&N7=REx2`c!-NN>x-1`csF|z3dYUZK$LV+g z#9Vn;JiV+6FxN(m@KeT;4yF~)8g*D+zxPe}z)jBodh33uy5sDx%qFKdL_&O&%^SsY zI$AnHekFwm1;Gj0%kfgiTibL{CLd!r-Y4*fg3+X)*H%4;U(bDo6pXQW4@NO`qpq0( zGqx5$m3g=HujTCr>x6#@Z14Ec+BvHXKFPnA#rc}Vo&9tB=Y|y<_`w8O(mLvU=Y9`( zJii9oE7et^jW&_zN2kL@3pt0yEMs+Fgn-lZBHLSz!6S_gRl-trp2~P#+ zDR6LCMTLA{Av`iZS`4Q!P!VAyf(6HOxlobW32I+Qh!`1Oj-Jm>=OK&?rW=GO($-x| zY@zy}%X~@FJwu*z6zMa?43DFq7AXc5`SZP$e&5#n#(4B#5_WKx^{n8HZuV;E=@rGn zyq(cpM^Vy8WYTCAzn~*4pYLhVlmo{pwxM`x9m;KRFSycy>qFHJ>qnJEkYJ1yQ8jmm zC~%M6lT|Jqim6>N!sqs5CdJ1urxc~Vq_yE-v6+)uhOy2W4t@>!Nk}jrR`_zM<4hBU z>c~#e?X|I$L-w|T7k1?Q+D#of-kL z*CKyTcF8y@{aYmw@am(MxE=O3(+w22h8TNECR(!#K@Ci?ts?Ovh>jTj(?5_(09h?O zX?$0*M%ZK&U8X8#Ro^hNgf=SlGC-raDlyy6&`8jKx1p^9vl0-k7=c~8h`qP2mlOHCnsPh(p%s4pc; zAuoyh(6*6$I(n#f)Yr*os_CO}5(}}`4gVLeElkJOvw>qLE#_n+BNM(%RW6SLlQA(y z8uF#9jkv2Ke0i#r)>IXXoqZvTi_2f6@)mYR_e!?|gZn9^3cDNbNIlxx@ z%zSc#k$7EJ5W4aJTwhp?7Fa1;)I!m+){ryDY+bY1We_hkYk2Fqo$0x4ya!ZSnms?L8S4m%SBP#!A+n^IDfvVn^^=6 znMxA%dTzql4?Ez(^~Lg&XjmocUr|`i$jVAO^u~LGGvH5Us+#jpkF$cIQ9?;G-=S!`BSdN4s@GVi0C3g z9_Ubda&&u+orfoKdwW~P_WE)tn%~CDi}Q=kTSfisCrk5-5?WHodObect4L{~JItfc zx$~z~h#}XHzvtJjU=6tyu-}eXH-H-?_BkoYFKwor_|wcG<&Zda5nol{IEQ zHF@edzxenxLCmL1^Np95SI$X|zjLOhfeOUJ^CIq88NIV@lUe8$O%A`{^yEY}{g!Um zOoa0sbKor3|HSV8Z) ziqYpE7MB(XeJX4D8fZn^m%=o%8D$fDV`ChT%}2wJ6SRLDXAKuBQT6EvnL1y{2g^VG zjaZiX$Sqh;3j@ltGRH*Q(>3IteS3I6C|S|pg(Z@w>&Npa=cp|g`81%~=FVDuy70d~ z72Xg#SNn8RH&^*L$?WBYz+;3RZu%E=6g`=aCWU7lW*KLY#8XORrn`N;aLj0(OmA6R z9u-+3SslE(jAXCA<`%$C?ZBFN_?@Ld6Ad5<)YL&O3Da8e9eeMRlWRZHmb-ajDTgcw z=%@Y9*>=6%2O6pju#@#W_z5qbFSOb43&vHvxV-qxIk5fM&G`)_ z-Ma%2z*sasT<%<6TLTen2cRpj4`;0tBSB4BDv0t85LRS~#Mjr?AIsO2{O}=0?S+1o z-DjnAsZcOa2nrRQt#LL&9$k72GuS4`W&Tw2( zEJJ;{1+Rw9Ssdl$(X;Rck)4v6uvq%J-@zKzP|*w4_+aZ)=|CG&kGH$K9&2ubEHs0g zJ^e_{du^A>If^kT?V|DUsK{XKn}}U>)2DdgPt>z8pVq{eSJw^>GW$C_`_rY53V&|V z876F%dzNu{Ra!EXJmY)Y^R``f#e4{7WaxDD0|~tPaGK>MClwB^MYSgdJa1+7jg+2A zj&pA!T~vgIn3V{Wbh07;oIO%ZR`=K;JGyJQs_s)uWjRu#0!1nyx>Aw>l^7ip^L*Rg z@W!|1u`x$kIdppI;^Wp;hYs?G+UUu6+@Ur}vWV*G`GgkcU!~LR?AUm|;zf%GkcFfq zr(=TUoeuxWghK6kRlN&?8;(iV){a`Iif-!)97JrC=2>mQoc6m=+g9Vg)_0u6ZiCHn zMCtE0Ha5KY_vpb8Ft=Qsq1$_~huWf;*_6=M%Mvmmiw{S{8&+xuDHbkS4V2;c_AZ#3gp>Rhn#9|+2^seoz z_O>nctDfFZo9Emd<=ZA7xfX(Ox@K&a2K6__Gb>9xc}4P^@nOA0 zDs$PIWt3n%qHlMhJAfd2MSbL!9X(HB&!1kAyQg_-(fjj5oYB=YHn(^m?k*P$Z~a7Q zZ_S3PJd>BmVViZc+H#Q9Nz~0*S#4YQPg>|Hcia}P4-}`t!=7KC|1R&kMqXPd!P7@d zEm2mtWW=hkKzHY%oA){^>1L=_SZyHH(nSbXyB)}K%EtTCty{*NEXQs$dtjvkGujq~ zU7tPSbMP@M;+1Fw?ZI?&cQ46Jd=PCP*mG`f#70J23v0;a{=g-%?6wWL(ojc5b?KvY zJ>jzxJ^vsmR*;iiQM`IFpY7CWcL`ryxRfnjxtQNtkh5^{ zi0!rlCkCy)=M1bTvR}_i0G=PvKXXuiidLRnQW-tb|0f%siHFYFPi;E;-?1`P(W7q= zZr;4D8tQOvD&#hAhCUmoA+OD66|9&eD@!QcE=c9TW9a3G89ooc zfY8ELL7MlkFz7S)EecHgS+V(`w`5zg&h0v|8i#o|+1kN* zXJL_#r24$~wbn%}#@7QNKqEOS9S;$JS@YuI9Z(%FgJGbI?_*v83>O@0U}7F1)P{t` z#aWos*=-Cl^d*UOA8!m}+1cA00?uK+J{+Coo~H{IqL?pHUK)5fN;xIYR20e!A0Hob z%a;0kpl?`q_QOMmQRZV@@cQ~q+x%k6)4G%TD53w&1+X{BIR_Ie}#%aYItWbq=t5(Z)4g zymGPs3Rt}m2G73IUu}%)xP9>M|Fco9pg+oajg4V2URRWe9X)gBxqnJ{i*w{-d3cYB zV=yInrq)TFYu%jnhS9Wu(;*wv`(<4HsQnBUrnVjs?`j?A$CDzvW(WRxwXo3A(5=-V z8en-HY_yM(2AQw0iZ&(w6y2vGZc)u;=Z`Rt^ES_H?#0hE_y9p%&MOM9GdjP2p5e; z*P#x(!8f^Zw(N|Ycv{%O5T4i9p}Qi}k|M}mu&2K|&>kORyY#uZ@Z19c8zyh48=Ml< zF2|R)HY6(qt z33A(5j-Myd@kc4ZuD;M@xrP=!d&HJ)?&ojN5PfVm7et3S4?y&78!C2u7M?PMl#O}+ zO&;f*-`%v~!pkl15)y^jdWre|oO$m+;Go13u?@HTJ})T#oRkXo`q)fjX|r93?xT~odpu_BC`0 zoAR|;{;Xp=#P1QQ{TIp$<8jyGuY)GGose%YXTyJ(FZtSz+f#0**{%ns?*nLP$_yaq$J^831ZG}m_fM;||bP}PCo%#sp@MN3l>RzR1Hb$*olWwM)^;uLNQ z&=Xj8V{2Z(w$>?C9-KDizHNSw|XN;B0k>B z^#^RFC*#{Es&iTH=3?fivLO^?7XyX{SG+(1T&2>%gTCe5ynjQtax}OGfz8NPmR63lqw&bsDZ;OUTX#bK{*CTTl zwp>zedYn7zXEEg1RirOip9F)6_`y!8Bs&;0sEFCD^+yk;C1unpZTbpL{OyZmDxwf$ zJM5AA06SAfxy{JY5uRdo>Og_y{UY#Kr$4k^xkWptzD%>^RrPto7 zWbYWKJ)@Id$+H09fvneQw%F=o-sV5a7fLNV!Qe_kj7p4{PVff#A!|Y(a=62MFw?iw zb(}*w%46>FYq<;e4s!|G^`?cBt@Xrr?$SvLH{&orqyy&GinGD@1O>K_hk~eo-JbP9%h^3yxB>3*U`rPLe==%j`6_Osz zLAO>%ok@saph`=cXgiT4`AV$JP_TgmKgnk9OI3muQCnN6o`FGcF@?)JxZA|qqS7yR zIe!p_ivvI_{`~v~?w$1zI2@KSTn#E!sK+U?l%if$B@5$m?fO1m1vqAo0wijGYguR3 zpJ(GuNr)h;vY-#R4BK5o%gh{VGZ zJ<0ujKyE`ks+_lrC_NzmvD!p~CojGkvYaKNF7aQn~Y<1$jC{qu2B%&z!Pab+fa9yT;mcoyLRdb;WFN$p7}y|uT7}_fjvP5U%`0&nQNH@aAnG}z-_vFqvQ_qvw>jVH8kYDjbfY@* zibUrP{A2%2U8>AbkbJihm`E~*;0d}}RIs|jNqjWxcPGxnt_s z*Z43=x#+B_vmz0>+1eYC>A@Y6l1f{DM$~iMg#SEak)}~O<0!GzYa@iOX0{Z_g97h0 z>!>4`&UCB7nm5bT<<@#wzV$h~dWy3F!u%#3N;SUkYFdS_l{hieOg_nrn^EMmt&Y6O z^r7$jt9~3O8+{LXaoGp|AByUKj`v5d4}Ip|-Zy^nvb|?AWNEq7a0h&lFJ_9IAUtcZ zbW{mBBbo5%L+qY!GDG#8-CURdP+Bt=aom*iT&!8q6+VN0RzXqVOL!_-cGoWZ6SuA$ zigGtu=agrxGiW7hh|Gi(G{2yy9VZa_OHB9uOu;tgwoRa<$&*IJ^ThaNXC&N0gz=xi zL*jea3y3I(F{g?_v9>79753*F666mE5mdVyoxsK8a$e0q>wTQHI3BPd(Qmmd=iwtG4>m{hq^HN^%`DAN9b8S$4#VV$6vW@86_D`x52niGMn4J*;^Rga2dGe(# zkc`q3Ps6kb@ey<$+;&93=r}n!0k5`kV>ruvyzp?CPjeQcqXc!H?{Uey6)zto z+mwqU=fl+!g1RA04+-W`}b{lKWMpZGpKS@<~1Ae0rZEk$jG+dc)r`W zZZ&pq$?wH4b%exQ!OqqI59>Y!MJJ#ODCVe=tr7lK}c}X z#`E07rl!yyaodG*Z;t9jY2~|9fnUDq9?aCn{7iKm$g8s#`*Y{!uVE4F3_)KlcpszV zfVSBduhbUJ*R2)rPegIJ`+V=$*zXXjoihEqxZ0fbAcf8J6|HA5$=T^r%XW;`qyr&Q z)z*m{zu>Q;QKP>xA9pWb|8-1QKt#>I<260C51n248u?Jf7WA=02T>U?Q{ z;@HCsS(LSp9#`RAQ&LXxCw~EG4-WPXS3y{czcdx!rbv(etg5t{=>%yiF7w z-0so~vfF!(S2c2kjq1HvI+`@J2(wWSg1KFpevXser~kAE>K(c!;{3Ne@2tU@VHE!nB&&DEel|E@Ek~B7h3!`3^Na>Ol z)`pIb4u4Zd>gDV6;kTyfvuDqKXJp-b;qgb#JbFcJn%qtRc|U~L;?B3Uc6VRw3Mqa1 z5wQpbI%wyr@DESqTtS_{=e#_=eLmUfv~emkM1zQI6x%|&J6)WUQ^9x3X!^WDyyuOb zy(=1S|D!ni5A>KjRi133(sA>CcLLdX#p4t;WMvPBtL?(O`e5jtSl;i+rq&CaXBzgl z3z{1!TASS-zK+t{1P+pxKHTL=ik#%D7}22U%iin9XrTI??$pJk=%16ebyeE+^_EpS z-2HP*!8hDTUWl6S#~wvBaroF%R#kjBe8wHe9L@kXfX%i6_n2=MSLZ>g5Y#B8x? zHzfG_6P0{yJLAn_`ltR6inZ5cc>xg?u!Bp$V0JPNowtUDhLJ0-JR7)>j`aTUIPX9& zxyJ_^@6@9HOHKJ&1)JqR_dh`Vn0ACjpQ%vT_Qdp4yLGDV-baOUq*K0Z8RV&VpLHV=WGww&@# z18wfP*Xg8H4N^S@nl=qq= zy2&`T{sJ)VA=PJcPOT3s?kqIaz$-%jm<@h8Kj)7h3dg-}vRxEan>i<#UtR{`G9DoD zt3@GR5(_uSZYkBBUP<4$yf}K-irIHK$}UCy4|@ao1&x0m^A3+b`)cd~Nw+oX=3C*= zI|f7N+9+MMQVn`Lw{H2bmD^4lQ;FMQE^@V=-`B#D1F5|8@|f(VK}!vYvaF~67y@#Q z*WrOQZ^pmOe0eWhfEozu`IGV{vS>_)|JK@SWz=nwdkEgSv4 z+9l}bk=Tn<+(I_#$P9RXYQx}hI5G{qg`S5ywxCpD_9^zqe>4Y!KLNKcErwfS7r zb@-nqa!{VllnX|=%K0lA=azm(LLY$16o!Ck!NC6SD;jAPM6Gm31J1hyqQN_ln^PfD z2;!~fl2I5@F+AL7}0EEBh-rgV(eX46W1YP;&v2*_C zZoz$5A($-%A=f`q6$4<-#mQD@cem00eA5Fmvasaj5UKQ>je-BYI{&QG#C*dRme+hE z7U+Hjbiiy?76}Q7TLc7>Ir$&7&szgXu1B2%8345hIbczhoBCr{Fvr*npv_$xhF{HiG+{I#CL7OhsQ4Wx7u4gkgi z)J5Nc0Yv~=egE-80x&WAg!Z1>%<$(8Pu6?#5sRVh{Eo=M1$S=(x-^Kov#xnT|J~kO zhCpx$0yO_TeDL|O=`L5_)S!%L`*!Cb%fDM4Oh#2}>WlN0?ZNJbfu<*{HZnvj^tN-e9SL+!jd(lk0+-aP$(QIKB0 z8x_Ud2aBs&(R1-_`XwT$P27>hd58?ZRHd`3Ed%()AksAG`YfBM%pz-5U>V(;0q+ zZbQz+`PI)HVz|=8e8TZg&Go57c;tNH&I$!Fa%D`Wsw|nnyh}mHFW87{fUzg$h12d# zENER^oS$#po-CtyX>tcZa5v)vic zY_tlzkC>(M!!s+e+-&pcWp zNzPk~i3Mf7IE8dQC*(bkK z5Gcnn(9^f@M>yKq72C%u=R9R*{thOs1iCXYF#$v(C@?Vapw`C+l4nHvuboqsw`qv0 zI)2@ZO*QJo{NLSFOKYd}0W04LB(2yBvj#WCwMQTc4okQl6+0<%80mE(6YLabt#|g9 z80J^0kB}aBq#1q)3zpwAL&MkoaK=fmk8a<--TqA{QpQiK zfR{tz-MbfPAs}lBdJRF}M2=b>8RU8=+rt=BG}wcQnx=6~cf5+!EdIK)=HXh#gME)`8LS(xk67-Km06-{J#VL;iRcOH8xR zmBj7#P7M|s9I8+yqu{{iWDh5=<)uqm4BBL&KZ{ zA;~qu1Nk(;L_~~Rtr=j5!6C(ruBz~ZISM}VaeQ_$*CqVWgF(048TafrOrZKv993|j zS>_!j7%h7nn52gwpS88KRTLlw1?z~)UVdY@b5+B#qh}A*7SQ}-KG4%VmdT&jt?o;PMNB&VilM2_dwn zxchh!h)n>Aguco}gmR4V#-nHSLFd|?PG%!tBnrNzYk+oiKt0kgnBqPq=7@wkDN~yk zN@LyCDiU32l2c@16-`ms97kk-wd7}}G1Dr78iP5A=npClVn&>yvC&3{x+Ivp*j}4j zJ<~%cOdRY1F%fQBeqR@N^td<2B3nP)f2q7NQpF&XF_e2{+viu6_j~PAoYy#gz6k!3 z$Z`A$exdT8-_2yZuy<{o?ZtBClb)HS>68szYb~O3Gee;qhDSD>u4}Tf zv7LZMPVwU7jgDX%BcP!J0Z`mffi_>}`&bE}kf3<{Y5`y=j2l2NIvN0TIQu)5h_vyo z4bZ0v0b>83hBM_L0ES!bhm$CP?uhl7>X)Dl#|TCNtgoAiqR)+3&yv75HKYS#4XeN> z{r>&C*b_7}Tpg<%9v$Ve!SmknLHKFClSa0+mo4!F6ckE5F6+K{p^tVv*?{GpAAfR5 z6lw;cLQbw7Yk^{(c>1uqTWwiM^t(!j42C43l_PMsAp5=>`hJ#ILKzq~l!%m1cn}tqxW`bCMUM{k8%>G;$SGkNi zKIy?{*DtXpu`NJHwEk4BNEB^^H^hlU4aLjbOwBKogVF8t#Co4rYu?ns&CT0fl5T=| zd70ps<0)MA_-7o^%=+ktie_`Q3bZ;oAci?Y^M&_W2{&=M{>Z}O7pubD6082ajvrz+ zznJRRY;dI)qeJx8={FdSe8D)d3-sA+AbRHx#9|rR0>HKxMsmngJ^sF_7NMB>1z9qndKu92OOBsOA;LhNuj9Bcxt-x=*wek_zJg4$0}5(vP&AvE~WE0n3y9 zXj4?;4J$at$NEfK7SN7J?ThsPT|2%@sE!#Z?MkTOW|HXy(Rvx?XdI0q+E-c7&Q z2WO}3n>Tfi_ads?PHiUMhrDn!pD;A2HV1u7(bbM?i<_G=Z{ObXzC2w4^*3%Con!wh ziBtnXg98FkM4%09lIHg1Fm)gO}{R$MJue(0!ypNUqPBX5JHjrH3Yj8qHs%ZSB?VPZD zvZx%0sD|mc&qt5g5Vg1QGQxb7vNUThnWRo0zW!ANSDWUXt__{};?>{*{Z?XXDrMu6 zb7He%lIq~_tnw_Y=6N!9Q2PW)4i{i8KGUoOF?i!>r;|+JJrUaSDqZV-E#OPKbfnWq z0jn~wMo*NnJ{ki2mmjZa!qve5FYcd>;BeoKqR(U72=&KuSp3ivYF(BI-;*Pld`76g zQc)qc++%vO_lB3TVh@L^=^*-;hG6OGv&9VY{<5-kT0^BT?Oc@>Gy@LR7%U6*pJ{Hq zZr4wDSDX}qV=k9JY>wZ4RQamckszxS{a3zS`1q+{eQ*&}zvB{fmTUhNH&M^O2AebH z0qfp{C86vkLiy@hkr#whlndYIwtj=;-hSUtkNpz-4paHSdB*Kpw}kKhxFq4!`X(sA zX=*R#xN^_cp6rK=(M~m;Llq7_Wr;%S&@CNSq9xJMxFQ4-E|_N~*B;n1BqU=?NELk+ z?TJMF6Xud?6zVJl@y!|ROr^ok{s&=`HFEA>p?G$wDYwLTINZV-QfRis>65A0pA;mt ze)ZaR^P)O(`aGUGxixhMkDV92J6YC|>qW(F)(9*bG*&LI_Qv11abtePdCgGi)2F8p zI_UXhpxic6q)+F1CIq|X3?>(Y_y80KZxIo-gCqk)eGxM73D9RJQD#C)m8Jlqf#Nrv z{=Ohhk(QPYr+s;P+Jh5fx9ahr$QyI7sR8YE{{W6ph^RZ-7MD8nX#TyB{)yg3N9+DD z43*GRXEhRs?eVpI4X%K0IZyF!j@}!X;EY+)=uaKoxJ8)V--9``ozc;$4vqSI&M=HT zhztoLq>HxETZ`OVEj=Ao_$euZc5M@G_KmlD1Q3x*<>wENy-rZ?B{oNl)+T+~kk;(L ziGFjYXsm*^$SuMn?zY6|k zduBHiR|Qf@W9cL$0LDBQk$KLbg#eXjzzbqR+{VtPP@7b1{(Ig@3%Yn8bdq91e@Nx= z5N+fsGT$P$XwI=DFR+1BFo?=dx!TYDMtx9fsa;>K0l}G-Sq?K?( z;Ho%=J3gv5@P`ys{OR28U|f7r<|~!&(VKl&UE&*x3r3c^7nT7TSFp<3EroZk(M5tF z#N-!)m1%4>&S$lY&Kd!V-Qxyf%6j_v*BO<(#)fVPoDF!A9 zzt|yH)pOn^e?GsmVm!3($rmZ}CXmdy{|K}_>e!_3PL*2*fbt|zh09Ye_4 z>U@E!$P({w_myjD<+euG&cui*sP0rIO)2z~(a5bLhbfM&M607IFne#b{J0qeUeb_f zrTPEGnG$aF;U(T*$jn$L^a!5-M~@>Pw3md&ljLB);G>G|VMyDsnd3NhT{MXia@X%n zk>vr1s1gzVG^?HR-ew>8emNIGn?L^z_o}xPLCD2f&&tEQM}PCcE5@mRsw*sh1=5bKmG0OW=Deu!n%U>! zq0z=(^NC5}@3iEl%3DEr<@s5@h_e#<8F?|ymV!06n*`Vg-}Dv|A?H{9lE*fa6e~OA z@8%HagyIcQFMbbv$e#6wW>-%r@2L(BP&TQd7rI4g);MC=kMrwSk`)(TVfA^wfLMM& z(RJI*86UJ|Uf%oR?AE)q@WqGlPlDEzUnd!^K6-LeuW305cgAi|*B&wz5mVUem~33& zuI%Z1wj!$Eu}TT=GZ*%801t!m-0-v#DDQWxAK5s0C__zNFK#O=Z^Kbo$Fr?pciJM& zO>EVAuzm!5Qn4FqT78eoF~1%vGPEns=zYW?fOIz*uXQ>STw5yKKTLT60SyfU72*$z zicm4)JD!6%_Oe>vxY;ZHjorIy(ONA-XuP8nz{bm!O^(u?aNcFU4qWah2WkGz6>~6|(vGVV zE;e06>LDEN<9u>`Dr9ra6~2Kbvp&G5j&L=pypyxzo9=sG8V`>H@xIXM0wnBDLE^i$ zC0n57D%A&uD7?xVj?~i68@*3XE)CjFW!SY#KfQ}}2kqye%JC&D%Q-#Fyj<)4{rk7> z+-V&wnZk1-l|cE;9%=lP)js&9_<9%!Vn-*}~# z_M+k`C{K1;lE?5DoBbgq*5JeafbyCSW%xzqZlCQM(J*Tr2_Pg8y-r#jF0qBYk=&sLMjO~s|HfjB z{m#R6%BgRAXSN)X^6qOPW$7d(IRQF9LozjA;b41TSv|`Q_|d8nRojnfM>n-|P0(L` z1i>80DLVW5+a^0H3L4h2!pq0`m_0F!#+j4Uu|P`{j>~o zk8C2l=W#o*_Mygm0J;WXjeyezw`~FPCnNf?@ZGW9J{kP)a`l}BGw)UVIM#pkb5iYz zF-&^Eu8BEkT_`kmMe=o9&ewW=!3R9&ETs4mG7Me!2^%rV^_N5;XW4gQg)QTOG?EwTqUU*C9rFXD;8`dtYEp3tYi z$9pRfbf8$(x!Y&L_rP7OzeUo;boGzG%lK`fHR>QK>&vWy(*_|%`GmhH7i4G11!?I=o8y4iPbrCRuH0Myp6e+zbYGc*d0SEs;7j`UutBDfql%GP%$GZy4e3 zKp-a{>TPVpmjeH@fk)h5q6n#Q#vnN|8H@F5`cpI{H23xBX{5*dvqf8&IXG7)ix62= zX%U}y!rf4rp45erJ4CKs8u3$wh|R5Ao^jAfL*2rXf`uB3HC#Pi;doo7epN4+@6_}- z)k^X~i?9pzv;u-x_x3SCqXD-;zVVH(FC}Y_jd!QobY}Bug3@*Sc~c^U&Wf~a2xSoRRPMom}^0xcL@I z`2s&&pswj#zv&3KRSiOqnvI-W%o0QPrYl1LU&|gy<^dh?;ty;Ig3kCp0JAT#Uy=eS zw+t9{XpqRT4rZmT0gGP&T&~RDzrSMP;7(+ib5mLqPsaC}T(u0yw7CntPraI`M#p$%u9E($g?Gy*-lerR z3Wvoflml#xp@insn#mPG;xv6heSt)`E9GAbdxUj6*VE{ViK7LlBl7Vuf_)?eb7|6n zr#hZ$u5HFnPTo~24O3qe3G1}>D8`bHWCP9b?<5{u4)fE$i#<$YxAWo_^4(nEpbKi}c%-VVB-QG^;^~PrJvJNsJfb3f&VHg$*;nv#cLqMy z%E!<+o=hWsHa?Mgbi^dg{Pkd*>1Zvq-YfZ#74x>Hp&go9V{lQq*Ev*RIm42bep$pS zGllUQy;LC0IB|>?I!n$iHrSUE$&D-SJ#Jmeu{nAS&XT)}rRwVH?Vy}joN(35 z3sgx#(IxDcow=N*_N-VKZW-H(29XIZ&MVvOPe-Y!=ZlV!2H zz0;E!2i?13+1!-RW93kFo+Pu{PQ}wuqj6~dTzTluMve|5CuF>fmXx|(?RKwbl|6$= zpdZ+Rw<1FKM`xW%0}q***$Y(!;yRu#?k#mM3^Q3+IgT}q()<;c>1gMXmJ(7a$2C{d zyd7>+=U>`|FT>I`K$WjjnXAfDXzs~_z#v&o%3||@gfY!bXSsFlvne9Gn)#pX*~L%{ zFE8~(-vGz>M5JVadb*P3KsJvH?762Fuuz?_nPkve~ks4%;}@2^{NuRBT{Iy@l3E1ya$r`8>8N9gjs ze-D{Gs(l{PraeBuFBlBbhqawmUa-Jr+9vN+jJb%!dH)E)bD3DVM?%>)7e2=A7)bC| zpmf~kl6ZP7G|D~#Mhs6jl|lzni1pCZbLL5D(#2k~fH3oeQhlxps47=wTtc9-a`rJ% zS_P|x4SWYH-{=OZSYBwe0iW!*<57@3a^z`&-)lqzd;EdQ^i024{zO+<$>gm6VkK$o z6tCl|L+uLE)jwC(WAZXm3WtHz_=oAO{nM+gibDaF%p!xe67K1}m!1mLACH=J0TY_m zXJUZa35w32t|Tq)8!3KaE!6ulf32Si7{89U=vX<-(*Y`+@dtCwA0e06)OIaiD}WU{ zURYasI^WRN7vLeQ(9VELQ5(DVwLe+o%=Q05wu$kWhv0{+RUumVS0Mr18&j4 zMqh99V73yy|E1}tPe7F7wcB~MrVAJ$@l&tJ_2LoqWFr*^8#_BcKi`@CBghege*RNZ zFMK7H9=g|X85|aN9~&E+nUysi)CiBaG%mBpWt)O@ix{$)ll|8kB=^KK$(bsp^pli+6VftqWbJ57l-zsz$e>NS2`Cg7@ zyG6>L>wLZkj_I=3jVd7z)**c>gsAV>MB;*yC&!D!(;LeddJL;#+C$L2?>U3nv(!_w z30%4hZgS2A7URuc_b*2r*&}M|==Y(K6ApC{l1+t_=q&vtaYi%F!B{r2F{S$bF{Osx zi6wSb2rRFf=wDz@#qS(c&#qGP=7}`ymjC;A>v%IU!{7!PJ5e^=ST4MRt=tYvqR4nK z_<&438g!`w8Vu0H*5D*akqKb?+I9jB_7#w2sQUOkq^BRryq1?0UZf192e)qB0^Jv9 zFIlAyW9B=f_lU%Cxji65sLI9Tlu9P5CdiN{G}>;a(MLQr%eGt0a~(I;km{LgA9>TI ztsk*C?&hIxWTf=$*-;>A20vcwQP7w^VZZQ7{#j-k2-VGpf=><~MUJLve+ujCpQ>DZ z6-y;;u&%9u(<7-OW8y0hw@7JQ33u6t`9xL0O#ZxQZ&;g5ZMCY@>$v*>tPnVgNznTRI12VNpMm6c{3t%=_HcZ(q0yrR z{!x3)JAP&U;DUM%*v(`!*zp3YgSyQFWaX2p-B2x^Cog?YZl1eeVM1jNQFJ}E{XWWO z`qe2(Yn25nm;M6k#@Jh3h42E@RCbXJmz27Jx{8wm{kPh$64|&F3N}0+j~L}&B)40L zeKt-A9dNiV#(RoOxwld@QQ3ldeik!lpRCv*0UkYwqP`h0s8YNy6+nqIY0mMy7371c zb5-fZx4k4I0wgtUPy>|LIT@jY)pmu92b%j{2yj0yteQ+OE&T$#j$=Q4It!F^Cw_v0aR9qp zY5N^jZrcJ}&-%dsIrS6Bc5HzU9cVpwls;&<_jb=u0bQY-^_POJ$b#nXEH7WjanO)( z5J0wJ=fG?*>?Qnop565Mtg%{Q(-g?ow%0!PZxVJrY9cL+9c0RrnI%WuHj3wX$A}E= zrNT?-yudd-eIg(2I^J_0R_InL_NXhvD@(`r{d(zzM8zY32;*(X<ODpzNddW!<%Y-oU$)yc_;m1cAhcRU-6W9-EIB*Hs+J)9yW6;1+MZt?%B;XeM?5C zIA%Rm(O@TwiD6@~BZThjovv#x`|kdVeJTT$xZ``996t|*oZ2(H`&d6zAw0ZQ+#KS6 zf0)=PJk_wOBW>m{Izlq!7?;f$`DDY}g2~T4gA%nFD7(qlU4U~lk+^ox`b$E`^Hic&-=;6Z`kpe+ zQx=N`3JbyvuF~hSuuC&*tN$qvxb31-c)Px1Fi~sd(UGq@xtqh)kfZLvBn`B}q=Qn6 zrtk4vs|jV2+51`P{ySP#rnC7S18{(x@I*NR8lFbD1AicZT=Urs=P`q<)H9(Gq%mOv zjix~83kXpfbO+*=uytQKuFQAWF7RMeo^a*aiEHei&i&=VbJRUIf)4+ym5ts=*DX;0 zDSX7(CVn)11h-yHJRhsric^raImh zL`z8VR|TF9YB$kDkC%OlQFk{Olq%WRJ`w9y-6%2v@{bL9G^gNS$zl%w-2X_)tsV2|{k<_~cq+rZ zi5=24IO`QUfTjE)tyXPckUHeOv;)70tJ>U%(!nVsQmE9BO&^^t((YyWIOGISNhVi4r-uX3e zPXB}H_QbQOLq(0Or&L-9Lo;zah4e9wkf4(DzLD-Jfg|Vi$cPu&()>0OObQ#iCv^E2 z?Dc`P%5vHD46VLZTP!_A2VoKETE?>|Hr776MO!K>Du@25On@4E*;rTzd^ zNJl&EktHIoVGR7w|20v9_NFeGHU5Wwa*J9qR8Y1V!+-Gze&OyN-~3KRe;gdjxhD2B zxa~2++v9mBnF{>2-!A`I^4I=IAQ03a@xfGUXZeE9o3n7skRHwGZ3$w$^} zM6Q}soV$PL3_D2th_11Z7p(0UoV{VTA`1P_`(FLUNRHl=c~~|FmfC6F$CRvBLl&SD zLx}g)!aH|!P?No6R+8Lq; zs_JHxm9Nuu@5c4#53?{hIK$>}jyI|AIVVCU#yV~hISM^m?{0r2n<3(~4-ZRl=aXuc zh|bwsY1*@LruEYUUinvB+lU|CUf-nn59}U2g}i_I{B<*L%p}jrfusNBhI6xvCHzTH zx`aaHO|REcSDV3~CxaPWFNF*R6O441QI44xQ#d}?i4~?;4H#%W@=u8C`w+-GrmpDz zBV_a}qy12qEeqyFLZ3%%#P#xWo9PyLY#4oxh<-xdw*-pgAvNUrc$E{#!G{C!UV(Rh zy%JkVhFWFi)Z+)5Y0qAqnS})yh4})5B>%@-B;cWbke1Z^gC8j{ELUD0JMoUTYDS1{53Oa#tVTbC{nR(FrP?44qCm!+O0Vs*l3&^P!fR|K#J zVhgeTp*KC2+>FCdC95m^fRA4i@w_RaC##?NZuX-aiL7n&OE=@XRyH@{4`nYwjd9`F ziwFX3b1=LF$Q5lK){=WI!H}Z<_vDQH`R&@F$ur-^G@J(QISz62TTR^Zx`J1 zPBFH){!{)LxP`AkeU*>R`Ucybw=zG+SLujzK2 zkR#ea8fH{#Q=yVGUnHSS4N-9Hk|8opSo zyKIs6xq&rSN=)z~SA=LmfzD&hQ|PmHc!%hhH1lanV-tWpv?h=|2_DDq=CIXE6$D2b z)*TsNAan$&m0M%ppTX;1ScQ$HfGJC70 z*x30Zm4&|Cm-nG${$ld`Gb5jpY^OgL_f<HwYeavQU#hz4JRH7omhCII$fMrnCl_L~lI%JDr`T6FJ%lMhXRZN3@p3Li zS2r5(`Z07iR13qF%>Wj8mSgJ6E& zF(3Q9Q~L%#s!!A1)6T0>%M%~j-z&HtZAfB?OTNP!uZ{pWjXx5+^I(~E|KG&K-?#C- z!MlCXkD-zKJEy<*NYZ82NSzG1{DD_X#SZeJby@}nik_Zgz_|fv7RM)%*j*6$5$ob? z5deq&I66AYhO+8UGq&dh=RW{H07(4XuK%4i`@)P08#rFjmTq2xiNpR9<`+%D(AvrQ z!e}u!yr`+V3qSk+*bd;NOVu%F+{@Fx3jl=`J3yzq4T9Mt z*i0^plkpL7|E*bwGt{eob&Z*sIVLu?#Cy{WNZHl}qsAT3wXtBb0`j@+iEDjcV@HB? zLlLl&uz%Zif?Wh6PLFQJ<4lcjI{5`nCc8DSO|>&%qHLm#24FfScz1Jv39(Chh64dQ zAbY^fokOO;bjM+$`g5*kHZbKq9R%iP{4YrT{p4QHV%-A0rZ_%Bpn0DKN^NUk>fptN{tIvG{#ZMxg@FoH+5dwDf#*=lqC7<0QEIE|#C_ z-u%xG$ZxCA&)@1$dCV{1kDbVPnI^UaWL+T}1NjL408lBVdN^Kl{_TwQ) zVgb0|xnALjx)iIpFnwZu)PpzTZHM0V;Fbv)F=_0@x=1d%ua*us0OAwaM0q|Qa6p6M za)PFVt|p1S=Xr4TEM0JCN1SZkGQf~tQ$`>ZmBln}#P|1+5r#Ff{j+?CuiM+0kiLKh z1+ZY4+>LVlJA{?NZoLK-@Su@aE>`R+2#_EDP+geYShW zeeh$}ckbOY2Uaee!?*(}y2XBYlu5BQXieeFyy^j`JCiBN(Q{Z}Pu-w?ypT=5%r6a|ZSwCqcI=7iC- z6)_cLZO~cUZm4nK2N-Y_59765%XZ&xjG2oLg1E{xyS}ioJeSf{xc@Vf2bnESgl8Oh z!5=(VQBRY%g}%qO=#z{Uk=YsBOr|Az0DI$AS-eX0bT!f)O8d@c&UT;&YrZ82TUoC=H_qnV%TdSe&u}>v8tf zNWQK>sQtLNVb|w?9;zcNTSb!caZehYFGpaN^M4xNZLVx}fva#oZrMxP>B@|iP%^+! zQ8??$gk1Nng^DJNn^o08j}UaICG@x+?Fj1 z2`jF|Lk*q=MR5TFup(=wAh3uSdM^#nwNVgI-=t0Q=`t&MmGuBV9hBsuI~aH@o>0tkjjq#wf%}b;q zfCLt**a!K^b=*r`J=gjXA0hA-GQMmZtCZMys8BArcqA0ObF+Js=*l!NOqC>ln{@D3 z*vL^eh)B9>=#M;Nb%DE#KDsH8%gfjgSCR72Ed&fiG!%ZC`}EGRXiZJ%KXq^x{^BH`L7hR)X^GKelfRL2&t>7J$nB9d4p=EzxDc+QI7*UD#icU$#eJX-PWT4m~oNA z&_F$MQcdCc&l<9*9V6fte@bv*Vvag%*ovqbsP{o8(~kTc#mc<`EEOFMnSBO3c|=5bw>^3H0$4$4 z60IC=?XMHRPl|sQv)o-%*ltRB`06_vU+~5#?ZE69zytr>W%kW@y??=^O!*lVp(_JYhN(&}^u>Jp?{`+CbJPZ;xIbf@xSS=A7gg5H z<^^WEmU*uw6YlWd%bEb5lvsYL`v-ykA_nohZo?3{2GS8tXEIMJTUF(~p;b7d$y8`- zgE2#T4Wfv`^jf;vUYH6`E^bs&uY2uLyXjH}pG#J7MjdjaQLf*8ip!QZy?*cS*Et{n z`O#d6K0coHHg$jQkbi|tIMROM3%cVnecs~;qg3ijNaKKoS+U|8gchdHPL=`?WjC%; zF5{c6CLwNbV|zOTOfA{fx(=$0o?92_#1eT^`Urh2Ikf+-Mw{qGn^{mc))Pd3cxeg& zYgpKAQf0WGs7!;M`L2k)Bog3)<=vt?jzhx>C4iog5OwPPBQRy97as^r%OX8L@Hu3_ z0q3`gn_K?@Ru31I$g$mq(6|u+_|oVJzuRD6Yq2p(2@$k)R`*iRNgo@--`DTqouXV3 znAjA=T`>FvfM9r(YN6$LLS?X;u^?cePj!F9j^4$bUK>qr;_%yEEUii4TVz8a(4|R$ zW){X(9;ff<>2A|C^c%C=Udj1DcN#YL(xi6#oP)(5iaUiWh#`6K1IhP*EV>)@|->X;N`u3AU~+d!VOnWhvnmi6MOaY zb|uSNjqD-r^ZhM;dlopa1Fuz)q}H3Jf^1{(&rYbdJhGCNaHtC0bn>mPdcKyFAnD=A zD!OOGscT)q!x07$iJf)GOq>&$b;$X_vVQhuaneHx9V}Ct5nORiwWw?TE-XkkJ8FCX z#?XLWxs(h{0_wE)`S>xklRZ=(qpPql)zFw43$-OZm{Xu?xKi?0AG&XggCLPjFkU{L_>9G zdChx<;z=Z(OgTQlQy}moIt*^MK1lrm8;edcg0$?uc1jYlJrBXQcRT;v3s_X z+d?NLw(il0u4juL`_=lv3` z%ErZqzv?T!;bY9K_Xv=vqgD&$$O-_K;J2N~Y3yJWU#MqYD$KFoB>*X|N{pApP{SA@ zLZukfy3f=#BdCWoLcQrF-P2>C=b6u2B}->0LDz&}&O2a2CK^V|MrhB~i2Y+@>>ill zQ4NY9Y|LYyKyF-!%F|2<7JC)wwr}d7s}<<6F$&lnHhEE8P;ff9Y1&sF{NSt*GY5ww zsRl6DqY4v`Kj4;>)daW3(7KAVv%#&jzA`cYJq{96c5e zkAkNYZ=ahlp4&_=K!kA|U?0$u3{JjrH=H)QA@2Ae6DmNnup;RhJj1n4W%)S82l>!{ z%f77To0(%S0o~V%4re4$ekkEc*S&js6>f(QkMCMwXV4Ci+9@$2a)>e)fp=-0YL9=- z@kOu3Tsm=o%rj#>rW~~3o3jYs%e{T0IRYPX>I_evVAY7@S~)y>WsvgH`mi zB|tQA06ieKr0`4ou#E@qM4Rd=<0Wh@BEKqAm>Q23v76p|kp(713IoIj7%;SgPEOm= zAJ^{b7P(i$Kve}`tMZ1)O2;ceVdq&4{?;RkG@_MQiDHgyz{{f&r=Xw!IzMZ&Cs0++ z{pGepHt1SWp!~irA(2#JkGyNgMBBa^8Az;xs%pC{z8epGd!pmwXtrAduAO$)m3p*7 zvTKK#PD^WLUt09tV#xhH#iR;yL$tPiYeI!OI!v0vC8CjP!D z5gB2NqW}F*G0}U10q1eyD0SQNK9xc6kB=Z!-;>;bxc{vu>aoHSKm9{dWK(y(Zv~Sv zJXoV}CC(XY{el!PoxD6*pK7@K>JJ2@mALZ;KVHq9`eNWYzhZ^cE8piC=P@Wgu1boM z#BFy>==uO%q|0RLM6JjYLmoIeE6#1p5k~4W&SX6RobPaP%6d<9!Z2(9h732c0v@D ztS*3`d=7-pj;OyD$9CIp;ZtK&`w%5_{=k|UBoMGupm5A}sjpg7^;oz|3%9q}|1;b= zF}~E6IPbj`7QNp;PZ-DxS97zfcMJEC5=-YP4yZT>rVUsVfF2er znkO!YWE?<*-D{JEgi`d$74LQwfhBEQ+ zA7)|K{_h|V+SZU3|KqO%?}|4^83frPfH}iAB(`R)7F)fNm(TAxz)l3pK^cQ>zJaEh zi*d|DcZ*IkovCecf&+A5d8&wq&_iURqp}NrnL!gThhskLg;n z9G<;r{;2a0KIz!$phs2N&dPSfAL@!rD%CAsU4CbIYa-2E+VMgX^b@w9*&0{;Uz9v} zrWWrxqWcKd$R!r?XfEf09D?8susfHTf$$aKo=WA;eGeU4(tm5A*q%g-DJj%qs$ORh zUfNN!8Qy<|z@z%0d+G|=@T4_?i*`WMo)Sx%-BjPjZtc{R;#5ihy;pTefJ?cv9%EeC{ItB_;Lt5P;kZ=bG=GSFFk*%`^EdH_nRxkdhMP| z4Ejf{Hwe4=@d7_z>@I+(em0At2$`#wdItn_MEU!K-_Jq0=H%=pr$8<}@zPKj7}k~)qUKUDqkONAGYwA80B&4(|46gufJ z7%vKo+w84^jg(>r+1CAu1LTUV#80oBdLMyFbN>EPU|8hGZsS{$lF0xjHr!zO`cf7H z>pA>X9&UL14j9qd)upJ!9bIq~;N!EU-*xg&G8G4i3}xQ^4GDV0^rr7x?N?vMo*eDF zY$1LwFI%^d5RaZ+@_HG(hGE!3o|}id;h8g0sp=Yt1VLS<%OhOQJ6zT{x7i-(#27Y2 z;wa3E9o;`3U;q!;N4dglGS{<49CriAcV-FzJfWOA-q@A- zDj?*@k@K@x_m(2Iv7NidQ&Ko+WFJDOaG}MezxFhxxwQdO(=|0j!G#b0JO5#5$8{_K zs}q`?OgFi<^5pDJ6#U%qwpD3hz=uFtEA{be=0=Hj)sJu)*CZ%})H;I?4?m6S2dxi) zNeF$70xlq3-Tl%HG11ZHq=)zKe+ENR!Qwp0SWzVDKi_dK-UE!-0FNuZamNW=({}?S z#yR>L9wi{$c2L)sA^GmCs1I>^AokI}4tkem7i_U`W)n0hJz&c0#K)Oo+hVgF#_u=UwRYNrTZb-1U1)}+0VVV@!^E!6dwJw%AqF_2OkpZ2`?Emx^cpPOHB+CSouML zX*bI+-aB;p44cS%(6~HQ>tRn*S_2atB{enNkblG8Y#6Um^(XM+Z}4xRb!D{@25?+< zNMH(3)476g-9VCafqj)?@aCVn&p#v9pe&R0FM!<(c-{x6%&zbVpioG#VOs<(J>8N0RYVQb!gdp<{gUzik1-dL#PgZWMFXg8+6Hp zE`zb@V+~FN=|?YxJ$o7U*PZ&;mvnwMnH~KI0);r$P~nvCJ7wsLU+n}8P&FBkYDg-| z9w{>J{&Y>2|L>2{N^k0QG0XXSV}P*Mp6iMQKU*Q>T*PeWIO2_0V@}Pnjkxd`7eL8P2O?=_M}L<6P~~MMxk~nN<#=vKNBeJSJ<`g73g9$!)^% zM}QSmhbK>n+!taml#9^GnE4GI^H4#Qm&brP!$1^$f|0SJp%{q;XNxEv@Q6U?nUs)l z`Jg`z=MTt3Z~y|?R45$IMF$#5K_CEBBIxujbo0r_d`?VM0Mbu`!t_s303n|qVf(2V z%VPCXENPSBgvHaX-5k-tr59a-Yz zD&QZ5IOXbqLgG+eDWYK)G1T2GZ6G2Hi??%Ib{PTB5kuKLJ9jMn=I)=*5*!fg-s>_4 z`}T2J1q*0qhU&T0&VyR7u-Q34dMuSw@A1LQ`j4yL#vyiN5wF(F2wcun2sD}8q)n!Y zDG&^WZERg?a7IdA{?Q;&whA0v$GGZKJ1|#JP*R$YGj#d}rgKXXmon)7&Ark*`9VnG zqKoEMzRu4dc}+G1G}#?)i{!9px#h;~jbDQ!N(|JyDRF+FK%(IkplS6S2S@LCg*{da2IhXLHb|ZM@?c|% zU2jv$%KR1`4?UOhgWix~GD8#aWbn>)*8%A10`Q60uuGyn-vx_>u2>PLi3C6qYO$Ec z82oRnisAB20_)1F_~nTC_)F(Nzgh+wq=OHi?E_dw;sKi-sbA`!S^&k} zg@hifCn@2P`HDbn_!behmHj&Q&y@atY+QfzfpjkH|6(dUJ%HdByzR?Mz4xb26>1{- z1o*E1Y-fUjbEWDZlnZl|f8m;08hrT$gto0|TI-+VIz0Uhg7X&_Xss1uo62ZKRC3C2 z|M0%HD+P-sY7RCkZV}q{y|e;@JD)qb(7z6(7nQ@bg*;QY6}g?4e?wMc_oWhIhR%Nr zYc^>@VI`w>UtuM9iwnLst0{P!K2Pn!tZvg6IJ(hz8#=;Tp`uR`f`{BL>*9+(>fvM5i3m1aN9NKI|N`Z%Kr4w4w-CYEz8LU=EOMq=s#?UgTiafhj40tbv z%VA)$MPPm}pJC-$X69Bv-1Gyr_*i2=9m?_Q+dWclJl!j;x|dHYuIO+n(DWG144||G z4CnzCN=>h?X9qrQy2U<|W29rA9FU&~2>kJrr>EB^NH+nb-r7zDy+mbTG$N3{qkK;< z!Vk-jS8{ZBT6Db;&lO?2U3eSA<*a(hbk*ZVb%{OX*)l|mzzmS--(m~1JzOl)`MWO< zK3SmfR*dIIeZ84LNuBxoDgANjM>tr6hKoip!b)Ozn5}bX%==wkXwa za-#fzqFGLz5r?lU>8_x)t~t(9`#pkv67z{Q5{RNoBxX1Jdw}u6`U+*%b{|t{*p&_m zybikEK$Y>uz{6}ynJ}Zv^jIy(sTWl;O|S%wfoJm=JbhqCv9eFfV3cDSV^5=(yiHUB zwbZG{FmNXAOV0c6RFQnwMxpYc4#*%)b}TiD`(`$#Ew0;ALDAvWrcK@4p^$#2U~EUO zzK*%!cRxX?++p-R%ewcGR5+rL!cz9vA}2WOA|SVCawt4*s=`iqH-F(}N5koTz>+13 ztb1INrbHG-{3X-`ndc=&?Hd8wHh(a2#e;StHllXG5i(FlH2$mmv+N-N|1{Bj0l^#; zkTd|5u!M9LNRq&^{sgcbkV*hyAZVZ)yF_;kj18B%e}ANZH}70GP$6FhK7w?>gelTG z%_n3kkOV9)Qz1%A9biheW5!#W1)hCzLrAMx%~d8M!!>?Kp=#R9w2aptIeG! zjl@{WS{kII6Q;13}{LO7zpit!0T-PIo|-Of=%WB-~v^ZDa6~ zQX>7jWWlm{hd8HgOXdLd$i7tPR5a1BPj6$GC`kAg?_;r+|1|WeAOR}i0@{cK4S7LH z)c7BKlf7D>ZvFui)hjTiA2jzR`fmpS{CM-Xyug-hmxL4cVTZw_`UU#Sl(v?7cgziU zoA3TdHdqc%x+P&((E*EXRX=AA*6oXewn`%*#I z1-neZ8v)LM!=PvDmrwEbZdPnK4$`bc4I6{S{3$N_D=5dj(xU$A&@=cYX4abqalqIw zMD7|5;q6XRK&MRg>i0&E&=yDSCMZbM;8ATefsrDP<1s+`zPVPn{+In`gP#{@U}ppa z%QdkS5U?fJs)NZSg0Mx$S0?-}_xt<%N~sdDH1Sb%r&h6jYxuI4aMbCBu&XeR zUi0(b5BJqUW~}MFKOBmc#5Yg1vjFM~z&P8wjULK>KnLq!nTm@h5=&=uDf$suCzW%j z6vOtnH#mXhvpX@}A1EG{#!7F%IL8ccL{V_Qqmhnh*n#VYt82GmQ=I@oN56OS+gJ#pB6>5#bl8A1@jf&fUozay4LXhQ>;*~1&<4PkzUJlxdH1#U%4{JJmIN#j= zdZq!ODzf8~l#=T)Od;+ovSpXC@s2VEj;UEL#Zdsh;F@lrtiR8v%yp8h zx~3x=Hn%rhoNUoA0MSUR-?-D(BNx>R@D=Exn6i28}&OXC3j z32&?a+?1}1;ZzmKK)OK&*WbnNz!rFpAt(%NxSDf6X$@Qcz;eT0RrdZrVt)4qW=b-v zAA_ENO8s#dLbZjvtEu9u*&rYi!1E0 zQ>k0S@5EP%HT0>|I{ANc`Y8n(K%lHl2Y|lt*)5GUGBM!eJw?`0nGfyGwe3y@g7RQp zfsD-)%N_EU4MR@B8xsvEh9>k1?tI^5^GiP8^3`$sud}3>BqslfK}R}}!?&b1IR9wS z9mZoc{cIKQ-`aKa2uyVZDz1KQ(%s&vE0`(ued`0kD2{gg5Gbz>kq-%8W&{ntRoE3J z!XobSU@p^R`qwv~{;HO4aNorW@Cv@I*H|}(ffQ6E%67hDr#=ypO+H+Yq;h~JN2waB z&O_kX8h`PQOl9Ro>u{o12?8x9yv?B7*Xm?#7<#^PAPV1DfR5 z7;s-c3SEEmgqh=tCq4`bauHPgkNwP6k%y8OG~J(9dnC0FL>m?Pt@Z)4`Toh&_GkKQ zVDwSXkM$F!@4r@GEqZR3Ym=3mtIsdr%_n+qdtl{zqKAY>TA+u@xPqf!9;UOrLr7G) zSE)O5^}YT3H&3cFu9luEZQ&)XGr7hD=_jx@?KMILpsfu?Q|lEyy@JRIBw3!Z5IE9^ z-lP=fAhPo%`=tdh5_cWj0}Z2TOtFzPMZ9_=ec%`C2Vl`tS!D{Ck+pR*Bnyi!e~!lJ`k zYyPyR5EECR<|LdnMX92=f^6+bd6(Jh@8vF#42Mk#mbhQT2&uX8O5|}oQfE7;|DnTo zXi2|6@64hRl%e~hb5~8yNTB_e{qpde6#M;4spc=*W>K?5`}|EPi(6Y1c#yIwrB^OA z4kKPTE4>%P@0DZ)W{pq%Asu&P%}3-%3(NZLn75Y%SX`R#K-1L3PWCF1XO=E55IRjl zlPUSlO-=L_itoyT7uJ{i;gJ4nmpLG2Y3c3`18ANMap^`2DFn3ceI6^bE(7U1&GI=* zIXM9gC<2)g4Q;-C3aGQb=XC7Ej{>wx4&?SuPEHnI;=DIQ>;b^(SwJxc1XK_#0AJfBpDnX5hrlBanK3 z)p=${x!BWqZwJ%6v^(6-!duA8vpj1Xwk?wNZMRB0aJLn~ybDQ9l#8wOwcXwbIxA&l z)SK4h=IVY^qFxwv9?#~A3Yb0X<+)^4cJOtR*p5O+@m)bop#>UdA>FBGkZy z$3rz{86lFA66ZM2HdFo_`Rdy_2klS&9$<>Y=(A=F*a9Fue{-Ffip*NE&VxnZst9|+ z!;K?tRe1l{JYx58WFY)%?WgD_ie|eV2g@`rG%T9oB#VX}t}N~$5{c$Hqmi5YBpbPD zJ{w5%3@_CWl&vEywErT1U(gb$%0^=M`Q}odS^g0ZER1jt+qCW9bne?LStR|K22itJJ8dFXyTk#$iX#A!!Y;a$E!*cs?GZs*RcA8veUGL81*_M zm`zppxDiK_dZuh9^6QY6nzp)%kL}4@w^|=a=*cX&^h*;yfq|s}JRSS-@-QnaD{vpF zX`=wOuqJSR0IP%UuNeV;cwWGgTn8Taz=tjdOetaO1L4RL$WSZHfaEkBRFfr^-Istr zGq4sdcsXV4g>D5l@?$k_JfNVXIZFVGcQ!l>xa&HMm1a&}1|6YK)i3%`iq9VUBpF1z zSSx$L$APw-QebQWyrEvdetnu%=zGaf9x{cw_%l@Fxq{CfyNaY#h2u6XzmqbD;Nt8T zSO+7op@~9sE%9{Tvy&&om(5L5xpz4Fngu3jm-a8#Pe}0^aH?l&Y#t=Edt0LY{NWp1 z>>TV%licZT_hDDWlW!TyygVqi1`dfg!Rq@!UMpL6yIq)z;M)?uG&Q+9mFOM>4vBNC zs4oX!o~)f%e?nD}?v;U2$U{DsXp)kz*EP`Zzhj4dR}!#Rq2PoRwW9VQU=m?f{>{)h zNEkeusdd4!r{3BjwfzikGCnHM$j}^>em`oHC{FFJ+JvZ^Xe$tYoX9%E?x?eh_2lL> zwVr(q#N$!~G5X0sZY)@3%Ku(vi~d|*0$EJHpM3Hu3Sie#dn`B(JQgOLX7Pv63M@dHB~ul>S`Ge8vJW? zbed(!YZ)(Wj+bFg9S#hxOpR~8{CnZyi44ET1>WCT=gV7zAs%tJhYPLRuWp(xPb)8G;${t<7 zxL1aeroJ^*90d8+bry4+b06urX815P8+m^p{fzcR6FQcT#e%$Sw%>ei@ru_c6?RBu z*GJc|?&mU`s_{7{qLs!EqCfTsU#RNaJ;%;7k(Egski&Py+eP zAvCw6O9lLlvUPH8hMSv@YZ`Ruk(G9fACjHdHy!f=K#c0$2kwPB(sL*SaKbg9r8X+( zeia0dza9yUoBHOG|L>2^SA+on(_7Okyp0o5nFBAui(|;acXo%Vqwl8n9`2tg>g(GH zd!IL_L@QDz?smruIImuOd$#wz)PKzUiAZ;Dzg>skuB>DSq1Gm%I@oMJT8`=1sj@*dUfAlk?pX)$EXz3U2_x$V2tm+ePS`hlz3x9nF zUXA;!<2>nFgj@?Cn(Q128ymDU%rhc2Ax<#|CP*=96yqbnyoxel**57dlBn_t?4T=u zw2Nq6o0xpXVY0+-`HT9H2k_L#v#pwRPmff6v$r`O1}DV3;qKZKoOwtFoYE*yD~+D# ze&2n&MJp(Vu<7GqU$Ty5z?Hkv?%v{S{6*p6oHj$Q&aokj&g)1jF1>_^0E#YfGTp4o z2?9CB`{j!f8NtENXrPR%gfQc?0XOS*PS4IE6QsCb6hbqR<&~P`@B7se19MIU@CMWQ z)$7Xx+;2D?B}6>|ANJS1cp0Pg_mWWqz6>rdE~1r|mX-s6+^$omgLkWB%c@=8#6-Jz zcs8{Rryk56G9VpIG;VvX+5Q1tm&oj9l3yXPJjfc&6=`VRz8T;Esf@xuei`EE)W@%E zb;x_}TdF^$yXy4H;PGbzWt5+1(l{T+zE1Vi*XkQAgr8rU>z5_VO=Ffd&B5AmUP&^b zruNo8_tAd$43jWK(KcXIiL6>T^f%uZU&K;92lU90eKh7IlY??{T;ZO~42ulal7UMK zFGQO{n2Ol%aGIPdIVPa%m6Dn=wl^UO?#1YCZ%i9I49j#7lw8IT)+S}OQ;j0;PW>_N z>7B33@bk%`o(Su2NQJ@vF;T-wbqo0otciisK0ZF1V3rPB;D}vC{bt*hoi#Md>Hb{* ztukVK;e(uE_6Hs*s<6qPKlB7c5&8n%9j)bZxu^k~3INQVn?@gsRn)eJ*;+mX)6bp7 znZmHYYdrkoWJ-U}yL?RlWT*wEsPh!5^j&IoejW(Co>lP<^HW1ptnpO8KCN^0u)VP;;;t-_`j1JfeW7vG<($f8$Twn_^?^y7!b=XEhp|(mdu4H=o6OI?feNl6eR_Ra z)0)hdPlb3NSdJeJIi!}GYqsUrzYX%0HuS?k1>47;;%6t<0v@{Jk8rI|ntDz^N*9S% zisn?x{m@*B?!|ZYvxn*@Rya{vTf!az!lE)bW0+gf)JT0JLT2x9O9pXnUX5ga(a;F- zkKNWBXYqGiQ_2IELtb89%`GjsOXgrZoEJ!&eJ2oqVTI+Fq%ias3O!WdHsmVoLkY_S znNW;QjPNGzC|wVmRN6qpG1f>)m*vy>f#XlXT?WoD8%^c{b2BT)9+|jYdm#pE-cNDY z%2_&lu`Qpew`FJ;ZRKIK(<)RO;(3){^mWf0ql?e_dE88R3M_wg?v2dr0#Qt6)+y(z zE4byao;fZ~a)DySfn<7Q*T*|ZEupJhXD&tf^lUuFPazw-m+!IM;pLk}ASkrVg7r_P zIE-WWeJ38iHF=#Pwsz9DGNRx!vc0ZRE@Nm5M5147=;n}M$YAqOGC)>|9xI*A$2nUU zvY4g|Z6WrFcJORSK(LF~8ByM`gEazldUfen{kdpH66$z}>-_2miT0`79)|k*wE*@% zc2Y(Y8yw%=f4Akcg5^>mG3R`qf0fM*ThhzDN~1c8Ta;k|-H#+<_&4^77}bSr)fvlNPjW z^3?LRwCqF+0_{G03#`hcV%-9^SinHY#7mr$M)&UB#~KAa;)0Y#6^4l-#WkHmryyBG zuDtkAio?29y%GA(T>;CLp94x955@})lp>fk=gLZor+#lB2_9gaSu97S@CJ6-z$mEy^qu~v;Lxb?B+@RL!n z%YS8Z|JcfQexFK^OKnf>tbMww8Aj8K?{5M{x6k|g0U>l7>(06Z;{u;R{&?JUGb;)N zuXow@ilq-k&@B>QK)~i#QL$>R;)XbT zse;Ek?+k6|C3~GtS0y>QGhp>HAXEE+qQ@iG|ky2k6c&o)bGrSu~(+!u}%@4p!c>8T~mr=eV1Rx0K#3re{BM;9XOHK zg^YbC_q;5iDpf?O6w9mcgt+Z~j0}@I*94GvCIk@eWjAZZ_7eeum zGS0Ffpof*(l3{k4leq;%*pUvbgxiYs>aH6)Ejn~ydH0ekUGjgd1f^^p$Iac1+26}g zk9puU(JDM=gsM@gIWQQb!4=rtOzP8eyY6mS;^9Y&!|W{%G!{4z$bDwM;o=c(buAbB(A23pdv6kb<-=z=O?c=t5S^_oq6WO@3+Oiw4&Uw^t zGXRw7v$R&r9r%4M2d^7d}?P@ude+N!hbvsQzl6~my@7OzrcLh|xg^2Sig zomjLn6nkCahvisdIVIoyOV%SIxmP6VX{V-05isH&9Q;3|y#-X1>)Snyi3);>g0x8r zC?O3h3JOR_x8z87mmU z92uGCxu5&KuD$oQch?NIIS*B*X}0%;*wy40uZYg@?753g7$yQXM>{ujhziT726vf0 zcILvsEA|tmL>I#;*XR2w0{j>r>na{8eWW%^d%iAvT252BN>}k)ml-L+!A&RA(AS~d ze@UPI)m-_2$55qgeeokNf|t?N@nxF&(R^l?yIW69@XPE*hq~TiGhY4XV*Q9K7ZH?o zq``ip9Z?Vc=k(BU#((@|OZgIIh%}K=c+@=csPwH=>x0RQI+GqwC&@QxNbl``QIMd|<|+21sUF(HMi5#@J{NV^ z^nNL|zR^H_27G&qI7-Z}9Z zpP{C;g<&-gyHruM>K*0U-bdqnVlbpK+j=T*dzsOELCE&Qc~RmHv2SJaF?952o1aHV zqjOjFEj$Lw55qUSCG>DaaLHDkw!e4&_}U}p{?DRu0cBEuuHoC!X_(XIr3rl6Gy*fU z(9_;!O;6P*G`nQplc5i)P}$GCv*EScB{pB5GMbvCWm_}QQgLd3DJh8ri9$_LZSCB4 zJS1xvKzA#0xHIl+k7|!=g<0C4&?Q@jXR{Wi$pbRNRk89Hp5pF7i@|G^;dmj4zreBVN%=3JMCu@8VtE1s|y? zrYctuYSj3G$XBfN6?gSHwC-$IHA)7XE>fwRN2l%-{cOiJMzJ1SLN&eIhnB6yoK|dX zTt{zxphjL%UGsH3OTo&@T{;{bD(N?STR3r%-|oh$Qxpe6N7$6kCg8DkTy#)!Y<%=l zGj2ZTpj=EF*7yynUwr@_cmZ z0)D@Nru5MX$C1)AQ`0C5kIS^^tt`wy*lYj2CVsV+OsA75srWf;9>al;d z08i7gkQIO5#%8hMp~t~$Lt#-^c=)G~5FZe3|6ZkhAaaOzdbLc$8zFWJuI{7^hxL67|Qc@bjHmIVn(okb{bu!`~H8mgoycV^cm zOv!%hxmnBg0eS@F)aKg!HoByzRk!WnI9*C?aunN6rwbL@(p+Dxd)bI7cd=F{0c>*# z#jDo=)Nwidp>dbJm#)~%14oAOp6l*PKOxw=Ha~>&KL`kHPqx8VWqr-IetFMY+^1fgR<{$NT;p`<*T--pNaJgze!GP)5fN#$89nyM9vSPCf0+6u>6Zl{vr+D!V zB>wjH;j;C_5BGX8+>U0OhPPn&x*a0#PMWH zqV(kC!EW=RadB-F@*+Qg&4H=;-ypn!W$~6l)Ge~*j(|XDYMiZ z%4u`tnCvhWwDVPFAACws6n*aq9woPbe79i2Xpfo*+O+q@&v(0ESyn^AYMZe~UQ{q7 zE2k@D*k^6^&jWHKZr9Ci!rWaRdrW`r+nLo+qU9o$bl1@~C9Rq)gpB zc9AQ`$*I)xnE}fuIF=p;%tJ##84!10L*)dRu_~LTnb8YxRz5mt58^8_zPH*41d_woLXNs!hXoIQOV;xJLyLW8GO$8%f&7I!Iakur~5;q+0 z{_^s%?0D$_aYUj=NFthO3>d%)f)ON>+Q9 z34J3YeZB3uJ&RwpVX$aDIJK6#tOAD2x-d|fUVle3vuDMN39KVPdDey&mw)!`LVth% zEzN>&^|=t~ISGenjIeW1oj(;1=#;=;a4tX`UVvK`K+iWtMPp%aR?XHu`yJ+W9!pDe z->MXf~O2Y#yZik}-<40Dz77FK9raps@)dg9-9zG2c8u+NxpI3wrL= z@3+E_4njNsloV#zosf3|nhi8h4^&ktAxIT?w%(u9ZhFx)v@lnMf=E%3{e^tUuk-=< z`!>k25*Cf#C##~xJokP@1{@|-ERZC|iandIdiU*z5h^DBHX%=6Yi?DiV19@Z9*>KU z^rw4ePMK2GC2(a-c2e~~K;65+Rd3(%O}F8|+(edxib>0B%a_{%-lRvZwVdf{zZuz| z*XlEL1o3&{HN+h5vBSVZZDV6N@+E-bvYtbZ`Y_@@@R#o4dA76S4Uf@w{r9e2m2}O zO=a6r8%qzs+jVHI7~=?vlmF4&TBE>k52>$ljO&-PGu(rhDGq%S ze2xtdKD@jYJt$T`;N9le`;&_`@oxv+w!~uvBrjUKX4|7x#O-yeJ_9^d7$Lz^r%Z@y z!YShcvLulx!_~bN&q{*YqrmL^^X?tt5VD-{KC!<%wLa-R`$<2lV;1mH3rlcJ?ND9= z;De~1t1_3acShGoop&mira_m3BfcQ7LY4Y$+l@8&%xA^LSEfX&k7-c|FQ3h~%R%9@ z03^lG&LeD(<)oZ)+W`WPsHGoN4IWS*) zBjAt&hU4n+vH>CWxKObbs?W-Qz8HZeZ{<&Y-VWb{JEU&2}hcW+3*?RR8YURsg^G%uRp+5}b|gg#SHQmT$e3ffOyUiuy* z>VB87(;&VBe)V$|Bm!(~eh{z)Sq<=nUWG_diMM2T;Cb`#@Q8%`AqbGW1Cep(K^W+P z-|VFmap`yw3$g$yPzB>W)&obE`sR0eZ%DbrU&a8EzvIuVU!T6Ek$3!&qHNE2aPwv2 zlP9^&ezAxV4(t(K3mZGTMzIw=uu4b&q(wluw)3*?m%XeWHC61X=xfIppoK6UeuTZE=YkPWTt*i92 z{m0YtRm3kt@@Yj4535J4aeV2P(QhWkSN)LNU=;|jq%};R1bR!6A4;XNaOKbFDZx6) zQMPqWwPRiO1A@=&(`sg8c4CEWX=HRNG(}6$tx7f{PQ+Roxe(PlmO%L;n8ssNu;ip;qY~7pK<=pUOlBC zy=RK}y((4Bq2D`!yL*^RpWX042%0)--n=fJK5{p2dxuxGiX)3_Yl+}r8r78OOxfdl z7IW#(j$N&K*J8k8CxykY4lSh|8o>%5C8Y;NTWk<_Kc$y-?u}2_30#%JJlbva`J)u; zKAc&(^Rg;*${L1RFcUjAI;y`@3Xn__^w9b{uE6q0LtSu@nVI<}6O-h7e}A3@|9}8x zIEB>`Dj+Y)805c&O+!^gkG$jBIC3k%ZLc(nQrUr`hrJ2&?; z5WVD&PJ`hm%ggxA7qUGGAbD+4)ALj@9E6f56b5}j0muq^iNXsE3|TYY-smC-=&bAR z&aYpE_D_Kb#(}gHLqFpU+xiqPK4N8Kdj@J*RYNTWg%4{WUhQpig(xm*5Jk^tb~EyY z?Xmd9dmQdgOB)#__LmYmOGEKZ)6EQu8*Y_$TNB=)*k%+<)3W0eac5bL)plFHb(Wvc z#10xxC1oVuQ@r?Sch4KGDo2kq6!5}+dTU09c3n-(8$%n?#^BIT)>d5}E&ZwSALftE z&IP5UyN3i?KlOs_GnbMwYEh=2y_oyPgMSs5r(oM(7B~~A%@Kg1n#RU?z>&<1xcXpT z{GZk14e(6JUNEmFvC?O_;ozLFnTldIRxnfoKcm0e#S#b7h`nOse;@kIB6;DI`%v;g|E_R6N z3K*_hSz(t=;Vcb^LxypfiVA+$oc^v|Xy?gfm;ZasG~0N*fC0D}N#VfI@K z8ddl$EqJ8;Qc#YuoH}I>@85ql8i^?0;5RvtT>vR$KkVB@QF(IU7##|2T|@+`B!4S7 zU~&d%Mc3Uxth=4P{nPr4U_RrhG%0m;^*|?Lm*205UDg_zcV|=3?*apjK_TN?E+z5c z0ShbZ*PNVl(3XGMqppEw&@tS4sc&U%4d#6Z7Th%AS+UH*!sR`~OW~)M9jM$fo)OK{ z!69xI%(2Utb@AGlzSlI9KUEP7yL7XpL={6Roi!QnWK)YBPEplh(DhPQ4gG>u9-UG4 zbapXS#J7L53Ywt)gJ!}W5@K*IA&Tcfznd6nhBsCb^UaBoij<;9?;$Kq5kH3s?|g8| z2NmU`%JPR<;&Iglk)3Bb^X;eGAdfB2!F01LA8471eJgzP&*YR8``NCWM6g36=4&b{ zDtvMFr3x zSQrkO87$@kOyej-QoXFMegIfXYZFq3fh*gkmoOiju&D2cT-=6XXB^DXc?8u>(Io$|*_+P!_-1uy1%5z+lohJ;+Up`CdeaH|8L6iw#d8 zXvpt#9y8-0c;H?12ns>VIHH%TN>9dqw3_rL34S>JN9FimJRYwpXnguBnetcPV?HV> z{B$P>K9IbBl@`52oA3TqGSjTj;Vtw`utgz4D~E-?k8-g>=P4+ZmRFI$EMw4L_NI2{!nqQBYGi!W6JN3{Q-b;i4Vkk3od%g-e$nfpX3T-V8i>4Kl}J z66~Rh$|aEFeIFj)NaFw*c6QaC9|$JSgMtfo6iBeYEvp^fvL3feT#;lNuzfh|?` zpXJ;D1uncFBNA3%I|6N6U!ge_^j`($y=^%jc3A8$ z&s|3j$j@&=YZD6mXS59mt)HBB@Lw`i6y#(I->tmF#@-bD5Fz=Y6ovoXZm)0EA|;61 z9tgny38d30hLhFd{T%GpZ+KFN(p*ng#%C91ehtC!r)OkLfJ-=-AC5t0;f6Y-wSNikDt4_7{ANbfddVJrD7#hFM>RqXC;`;D8c zENi6yV-*ikTHNW^qJn?Q9A*uAa>)Q~S*p$OkB*M+pFDBm1S}=x>8ebv;@OzX^X)Y6O;ca*5sxoVns2LnOGc9|*ALf)ee67Dsk>E% z7oNFzK`z@V9Dly!VvEkMJP|NjVlE4ll5JC4#_g*&|6VBPT7S%RCIIA8sNp)_o1Fru zI&x+CiXhhGw$1Zl?{d-g79N6g@;Vo!1QrzU0BKk#?-k zij5t zCoWy;6OjY5_>xMPYS>qnGlIQY&%UPSxgG$V2bk{sP|DxjbTeAy`9+`up?svt439Ba zQ@oheQw2=Jk$2+GT`$Ns!!x@-sYFWIx|;foX~N{ugn3PWFZCx$L#Upiw5M^RGD zQ-P5d@A7B-jUCP|-qSWa8g-(S#V<~NszOd(rrY^Zv>#!@k07=GU16*}_F+Qz`OdVn z+2RMJAi7ALG+8^Kl1c3{gIOZf*dPf}B%g~&g8SK{&(*q)h>cXf%4@PawClBryDao0 zen;MW@8se!34|F>3Y#q0F#ACqTp92-T`SP;q~w-?TGxF5Gix%cN;#dDDeux9e58yU z0r4TC#iA5jVd0c(V;Ya{#`JK9!&OGBY5)AxinbABroY4{bo|ZgoW%NygCl?Ki=joRcs824a!Rd@x3~8aXvE#u)wTG^Dehi4CU{qB)(-$kxac6^& zN%Nn(1A|tFf0OPnl3{17fhw1UlM_iS8yhnKuL4xD3nRXSELOJx?>zMMVYDv4CI6hKD9%t(uG2+|!!?{ZDX>SMFEsqsNK$;@=8|OW! zFcFxIOflV*-UJpmi72;6!G|F9bnuI_JdIfa?xI^X<2tO-v9SpwRlICNukGFW3+9J= zl+Xq5=33{0OvL0YwY+iO5xhw;zLv( z=lT6#2H<%j?{&cDF2`UXi@wU&^q%xIs`vdd6YQ=Dh(FsVA{9YHp^$`KX-V<#_gWD5 zJ>;gu#T*uf=N`0}61=_3{d&A!-HJUFA@b+pUa^%XoS&)7nwbk&>-*fSQo`@~x?FnSQtu<0Q z=^BJ+0m~E<8{0W=E`QM1v~iz&4Jad*va;^0ETP^{57|3MV_U@tuXMK&RD!5Lh#o!; z_fTwVr|FZD57m9b%O}2fV`E1Z?*Lo>!ZuE#Y}i=+j9^CtIiUL?(~-2^F3O~^oCE{~ zxZ|gZ$nIVdHy^|OPX@BEX>EPuYWBCon}5xBb7WkP$ezk!pS>bZ7tAx%Z(-$Oh&Lyh znovW7MAu8y{V&DNV~y-=SBhuYJg>*^6b2>b(QSANn6aN`fZP7rFy*8@VCmoE7(axWxAQ zV3DOE;ABqkXE*n|swB!bZ+pQiL~rlAxhN?-GqSw}7hz~bjx6ZKoXd!%1bV3K zf74*IEkV{9-j+XN4(39X94>(`*1ouhilO^G`gS_2pF2k`0~D)Q=P9pYd_t-%?n*~P z@H|5n9%&!?TK z5>30-+xM-r;C@yKmqoO;W8_KN=bluP2u9=@3H7TJJ~t~a68ZfV&%(FH&yU#~5QppT z7r(NcN+K&(Ty}Mw_{#rxwuWCT=pX9^$a^?Kd%`TWBE@ z+l9ToUTqNBmI#!7u(_Iae;3<($RSdJV3+8_ivAo~H_#G2p_12%Nw`zvzA+6FF+mTe z_pr$Oc@Jk>c+(1Khycb13YE`2ZyCXgWI=UB_1~JMEWCQUdQ)O{!$O3eExq`7xOKkJDdebFfRTH z(@HCL>p8P))l;>^FBYk4L$@j6tcOPpq{Jye~|2UpVpRFxE|z zZV=W*O=IWB6A7r5?5B)M&lKxp^vQX0ah@jAs0=*euGL+cB9H1FBn#8IK;XP3o?wwB$>lI;1E(A#+bpOYF9-gosxatPFl9wnFd^A90M5-rsTdYr zyt`{I=VADayN!o%C`BV8IJda;B+}c)r;WeZJ5&FuSFdYX8O0Ri5K9QAryg*rgs07_ zXQAJium4Y-%nzvF_3PIYa=X8Er>Uf?=bM~G#?8ADpA_Gif=r;<;Xx<8ixAT%k#J(> zTE@RxfX-j#7$9PSW+Ac}tv&$-eq(DZpM{L9?D6N&b0d?yFhDnCvjohl>&g$}{z~`a z);}U3a1NUS1I`@pYRMP8VKbWpZrf_%5jL|e*~PaLrlkHB1lL;X|Ca*UDfW9|L=}C) zg3zitNr-{uUCo8TO7DNw#6vW0kMzu=HI4DOkKn2TlUu*1r{4yHcPk!%oQcWF^A|4s zl`tNd8V06Ad3kxK&YY>nVzFs-bN)xL&I`I#9#IgL1@%Iv-!W{+8?b9ItbKQ$5}l4U z0~5jw9{F$O3~-0(#5^Jab^j$a5(G-vK z_EwPfElI@m9-!ITU#S%EOeCqKtNIFy9f8)aALES~od&YKE6-Ra;SXyL6k@C2C3YCM zGlp0WlQ1mM`MO@a#@DW2pT%LsVMy}eV7=W0YY29%0W^F~w_WiN@@os08@)g0fun@{ zIv9OAikO1IkT^VcK8WoXFdYC$o^9Bk?+pgLr)k9=Y^MM!ACGfi`U+S9GG?B`#l(aZ zKx=@4<$~)*oX6S|UDwG=e^^0fM(Flo7FZ0@3zV#@OjF;!!)a|FCi-agRha1qS_)F9 zsF%7N{hT&uJWgWAwYn*_uD<<)PL&JYR>Y$efMz2h$oK9M2?`1#4&R8buGs4AcR;FswzV+_?&=po_Ix8WIG=qy_#YJi;s9r^utTKK6pi z(dx#=!|Yzf6%vATT447C0Qx~X7LqIuXm)2d-6Reg7~BIu*l)Kw_6Tkw;A#=0eUQD( zV>!T!gxVpXAGAl2{MK=TI!57tQ?)tPxGJ~x*5;~v1GCp1m;B$+$U4=to>}LjElOg* zJt}X*+uAuvC|-2(s(k&yNl|34T9d(3p92&;CG-T80EBZ5UzYK!Zr2)NS_>L&Q7`$- z+MaO*ul-jP2Rn`Q|C~eUoX8Yh5eSi5M}{Z1z>R*frvI>BnLn-BSjCTJ4DC@hFhnA zx6p)vKG39e{T9+fLNt8K|6-Pnya(CVe({kR4U7?Y$Vf?z04qXBk9(WX1^~}8gvCjO zxClYShpy+Q%D?m-NL1=?KXJ>4bPy0q?e60UWdHDzw-bld#6i=QKq#`dcUqI6q(H2 z1(G4bo%2L=bg?t`?aFN_wb-4bKO9fE&T6-oK3={ba>T~YDFM}@H`n^K&-b6Qdi$1-2SD#4A|j_qNi!Vd;#ovCIwTNh9wd7N8oac$v=XL12QT;1ITH|{Ud0$3H(*k5a;~*5W39_w1CSssh6{~i}NT8|#+`X=$;Vop~5x)ea zXnc0|i9aQO92K-jTELt(HaDlMc$t_md3kx2l$X;M6`FR}uMogc-?zwp;^W-==4j@A z%df0hsq9{uex=clXmcKFNUarR@p~|kDj`tr(jinqOqBF!S3t0Qbj)eIaAuPBom~3Y z_yx_(JZb;zv9YN)2_gRa(JAuxGRI$A{tKGnH@fAKRRoDRi2O+GU`3)_?kKuB>##>3 zu(CBj5>S}f?wS=1kNv0X8EZGbH+U}3Puk3)nT<^!UZ*$3inwwUlQZ0aZWzh=J59kv zgr&%a^U+bR1Cjp1`VEJjI~S^*!_{E#4p1fuEx+WgR}Yv63199-(03S_m~?@M(HFN1 zMHLl8IfN-xlV;QBJX!4ZTeku-GBRie8C-rJftlz3PX9b}dzfC+x}V$BU3;NE~Jy3JwY z;L|J%hIf$3Mg(3Ew_kKzfwcz{fA8L%0<+3y60VAuk_kt?Ks5xc4g~uM#kSpk^7+%J z>b=c*4md399obbgZVzc30A>nirqsYB5h2Ex@Qf0LMntT`8FV-2W(Iq+vM7?~IW|6S z9y2~MQ46L)uJf722p0jQD=)(NCc?7yH4X$QT|Lj}G+5+HWoC&MVoU8&aF}WU?o0$F zk80r~9b_Aiwyx4xK1^0dgXjR>c*$*BPXy0?dVh{F8?K|!qqC#xLnpi1>92FUMe3ui zn{OW+9kknsC?uM3)C@XAi7u9tep}Bn#D(kc?7g+~Ae==xVp*U)%>W-Ja`A%?aF}(aDlr=v7|?W_ z+1v-^M_;LZN~huH2cB$fZ19>HYLm?E$e8970r%=1`$XaGvfvH-)5qUB{wS{toaRqg zX7hdO@v%VZtw!a-d4mYKM=6;`zwrbWlpvfySH~%B##Wp&T%9S+N8AH`=VX~+AMwVp z?YdNS6{42w+aTHL^$W^p7lOMuxv&hwg+P{;za3c*~uWv zhMKi0gBw^Pj;8Sh#@?^p^BwGN*EHYci}SrwbRKF29W#wG@qb!wb(8Ta4qpOVz>6;I z(xhZimKLz-We>3jT8Gt9%2rqrAKlL0fu4>=+Iut4@Dq7gYLD1Q&>mCT5AFzI>ze7k> zUp|W5pNi}5tx6G_-}iLgW^YOcTkX(KKPT)VXCl)60`nqbG@+^pL{r)(y|$3A~+G{2W14RG2cQ*TYJtjWe@6SKj_Im#Kl>u z$M3>@W`?vd7~-G_ism-i`69DP?N7f7GsDRe2*+|E?~&#HKr%z6E~&#Pm^ zJ*CTFi=q$0kt4DW1xCXtw5O{1;8>l%xh0P?wxYfWhSlrk?(-hu z{#!z)6c-oA7xy&ZX;Bk=#*Md27JY6J5)jvxzxU`dr%N<6O<=D8Qe?CK84wWS)*@jJ zA6?P(hX6_Y@`b3fYOhofIyQj6I#Pm-+pi%pd;ffYvY?Pq&u)8MNN(VHrBj`}Kj{(K zz(by}$jF~yAZ+8{07LV@r$eD+W0>+UL-yzUtIz!VGXf)-4DuYs;FZNgN;f1j{GT7`?^o}{Pyeks|NsBh!&;DX zB9cs`Ot<$2?fm}s@-%RK%Brd(pJFa_KWpUTwCE_E&gbd#Fd~l*UKpK7;(=VZ^v!#Hugtu5_|n7 zc15K$l*I4LOO~-Z$$UL8y=*_0yC3E^ zbuXP)yz%(F!jbzI6D4MDoI7>y!G*5;S*^TsjQCNRYg%%7wD)3r*H&mGIN}mDT z+27_r;2GgVHh$>VRlNzSl2uhBI3fFZI-Wot#%b%!M=I|bB>yX&k)~^z8cXq~u zA+GtUDtg^D;>hEw4@d$!O=Zx+m`Gzoc0VSJx_RW9D1V5(8h2#z62(@WsH!!((zGFR zt1-PTqo_Y+P;DJ!nXZmLV28`$M@-o^8lgl?^sP&khu`#$+OHE+c;`gAiSEU8TJ4}L z$>0%%@d7ukkU01E8S&aF;mZMA#j1zzTkBP>Iirb!g-?{qt z%_tp?L&L3re~S}9OcpjY|ll|4fE95Y9 z4-f~SmBIXnR_dB1G~BOWABTB?S3jOVhu!Z6Gjlf_uuQ4)Yr7Z2mxtc@YqyOQ7~aBf z3Nvj~MXbD3C~u$%I4L$*<$Hp>HCd)veZ9y`2d~&H8FD^0u#KsWA678*-w&;(1pT=a2`!H4|dJn}q&Ew7i<7|?zdo#=!yq1ot zRQ)u{J2~mek#)}+$Ha=$-6vErV664rCyy!5^S;;+f+_fg@OAfRb84xX_0M_73`h4q zba`#{`OE7mNjx#8{vmdFRt)oOEI$I1xs#L=;SFDt=0Cx(bjH6y4c=kX?VzH9BU$*F zxYS_7$hySbrnP_Od=2$3f6>8h%oPW|9*jj!1ZP;~{$S^PHzjMkn)@WWy}YHi>iWB2 zGtgqjV!gI_H`kSr1%G_h`}Abp1A5EJP;> z0Qewa^P30+4%RlVJp=0yV3(_248uua^)_dT`DdI6#M?LE6eDJEc?m_ckn8H-$WzwC$RuOy?V2w|4ak^Z|$zM|0YU=9F^1sO&+48y`SE;X;(0R{T z<7ylmfhn_FFK~v45K0YImRy_YsLYRzy>~PLCcOQJPK3Q>CZaweD6)+$pE28j5d`Y}AGe5- zH)vZuhxd-=STwk)F7KcA4kI^Kxpbpwz9nWf_{+EMBfAG`%bi!_&z85-T}??O3Tga_ zlCO|6QoxHRAGt74yNb1@Wlhg|;Sl|+aIscbY}b`fWx(vvYx+v^2v4%ETU^;YDz%DW z$!rPEkgu7Y`3I@RD{Bgxjk#3?G5!ZUrgu2)1YxCm#f@wV;pkcQr1yOmw+hw7epNkO zX)Ll%y+iYYny#&a7|o}5OrFa#L~(do|9Q$oB1-(urKKz@5y6T3Pd3?asjc@oqiwGe zxVeoAP6tk4_tCu5*4g-lTq<0*3@*{}@p0mA`Ddw_Og{(F!3_A6#C7&Uyu3-=__|(? zMhi~&;8|ajcsfh48V3KaE6px#Q`4=%VRH9;@YvL|eOse}6hb__N8_~oSN_ZIta|?H zWpp_y|K@GYroH5TpPSd?Rve{P@7sjkV8V*4E)8E;o8q0#BJaFKtqotAI0m8nUHIN# z7Q?t8U35_#`E$aqS@r>(BV!9;GT+#ncQDq3z;u6h&q3Ue{2GRE<-^vF&~|xP*>LNl z8((neUA3eoDds@6C(*e{@u7OYJP8P(p0HtFfzlM#5*d)SX=!W#VMq}U@en1}Vgv&3 z#H6IJ%4{&D^c=8L=t24#3su3DjDkt_3Kti7prnL>!8O1ym<0s-rfeg)^h{uuh)%%f zV-bNlDXr?h0SY8-=Dk_S05}nXaD%t8 zg1Y*P!U`z{*Vou|wd)bh$Xq9)!wMGNiX5&$yy^gxeIhNL*XLC;g)I^|ASVSYrk#m-I=CT(0SN!pYlj3NpqdxFWApPefI5Ra#orio zmyxm8fzky`5|h1kZ~5gAfyfiIVyNw?>z{%)SCu;j*?c3ac3&C`_t>+o(=mlK#*A>6 z2%4L4O}sfDe1ZC6{pWrX4IX#>O0Jah;@O`Ag8n;uQ71B@^54mvQ0sHpD0Z6!hjTB1 zybz5V=KS3-LhiwA@#<=UNSAqgRKkv%+xUEcPO(!a^+lXNUp`fr_>!EzHBYu+PS}iz z;*D01U~7|#W=u-OU*+l1i4{+oC`j&lwEcX2nDKd6%${&a9z?M6G>~eF=sJ8}?ztF7 zZo|3#Y;iZb@>D|OPyTZ6in!v=cd2c*k|CWj!J&H`eJg^aN3LeXu3t5ux>en)RF_9T zf3p}Zx;4ggIWVTV)u>--T|*dko+;$xVoGAB_{~DR<)k&cSE|J%W~E`$;oXax&w~Ey zeCwWaD4pN?6ZDQ_{2$_?8~so?{nnT_%JT&o1Lgg!|MTH1Cbw>#%^=~okZ+Vuk$6a3 zK8d?kv}7%nEw9y1oI~&^R69Ia<$>#Xl`V6l*F2!eI@1#&~9^T3Sc9sNqINf-k zD>K4WjSHssT)qLn!0)E=uRkFp1Qxokn~>27+yjIigrnbznHCeDgv`JwUukJ6bZ>S9 z{5B7$CQLW)(olm@Ewn=8ut+DSreJ!b2Ij0R8Bp9&k+7nH_<_U89$FEd^9HQIH7~q4xCY zS!j6SvDzBzt90iF0ClPYT9J0>sRZ1&_NJE*mqELU$I5JBiCY<|sXt!ues1>O=#+{A ztPdK*Yrq45+A?Z;xWJTxTy*0ce6r`j;{g*A#f1ou=R!i~_oY?>&oYE_ zW$DQUD@&l+->4Y3k@$>VO zo|Fg)6U<%4xtR1xd=n8drJ(tN90~H%x9a|SJAHev?G@c)@x}mSYo1AreIqMtj`wdB z+mDuwoNcJV_nsDlMYQs7h~e<#U+9IUFC^I`=R+|@tB~#EY`8|2%;oAsOeNY7K3?u> z`pb}?4yQb%GyL;b%cR5B2~x>pt;ZhN^<>yoR!emIDNvnZsT3hbjPmJOZx$N zzaOijt2Xux!`Q&B^OhxDFXsn89xT=^zc_T0I4i}O%$?`H7;ub@T>fO;wgI7Zd^3pg zB97!uw6GOt7U}Fzw5M8ktO};ShcQuH+)64|v*0=(JN-+j+f7;zGcO06Y@0GVhl|%& zy2O9ee0#f9fKU`?%|E|qZuWHZ;385n-7o1^UBG{|)e$;3r6eq&9vSQhuX7BRgPnp3 zmK&MTH&|Fsy}V;s|KZc8zZ#9kfX#)$K!4Nj7G}6>ac;Y%b9JiIyzTUoyuZmt&p`nyty=^ zB`o|Yy8KgH<)Kxf)q|RMtY7|KsE@yX)A3dU(#o)&bZB@O@i%aFS1KtoF(H&NOl|Vf za#v=&jTRnv$2ISaY(LXfX){k3p9-3ZH=Vutc9iU98yeH}cFb>YdU<=gI5EB@ zb<^Aee}8X%-031No}S%_aPLjBpINN$v03I>x=TLU5&ZY%tR$q=%Q4pVyTzLxMmm*dUEm<#@2Ezf1~M?v;q< zR(hviAZ@|R*>Y*4fJs=GHca{rhOmjb3YqPI9fNP7rjEP-4-W+w7gzNBS*eHG)+D9c zasa0Z3KmZpt3YSTMHVINBr*N_day_}1Q)wN>;M1ujlRG~0X1 zyfgmIE=pWnoCv&*Tz5tXf||=>7usxu>kN5mJ_@dQs#x^?SQy#RTdF{DQ=9JFs=1sW z$)g|&Gk#Q3%C7iJqq=pF>$pT9!&4PjX)cVLt4cUkf`Z%loD+Wkaai`EY3GUAhr~I^ zJ}%2`}4@jSUzcCijq$g4N}XRu+D z9^Nj*IML$1AzSA=;wVpQp;Vq&@0fAF||Foeu_)QaNYV6&HX~TnsCE%>T|t5 zKC{dMcB>9PQiQT5o$p4~M%)?)dEa%*M0=L3Q8C``?ZjH`#nFml0wd7rVgGYF&(mXL zhVss=pL+fhet~M(I*)AAb}Ujo9JZWHa=2g9{=-@7@6> z{|RW8^nGKL9@?+hh#T3;0I7L7AlElHXJ+qq|Nb~C%I*f2yDiM~e1drb>p^gBMsf%- z2WuqAxw|&eIE4wJ9f$)QKLJF!@NJ>eHx-2EcD5@jD**xUpD3XjbMj;6;tGZ^AIXNU zyy4h|zMOXpBn<-g4(z!)#6Pg=6&1%i=F>XO*c4oTjU-%3I`LCSdv#@_@+ueCG7u$a z%r=Cyv%QK{HB7vfE3X7b9@Rt|fx|@7lgD5Hs8W|4NTd7d*rSDd6ZLg zVFK0Dk!#qXrD2>Nz(m2mZ4ob>e&illTWOhbTUO5ze$Y%TN_lDAnQK~e8#K-vh?L3p zI^uOl++WmGDppn8S&d)wBtNPh%NJPaM*p@A4INKOcauPL$g8a6E$t#wtDuN#2|JY8 zJ9#1wpXmJE7mGApaHhsIiA-;-Wfo%c4?OG`H7m({I={;|YY6sqQga2Vu5cF3KBG7 z+Nc6dA&`+S!C#c&Fvs$~80R%;wzqgu9uxx3!;TTy4H*=NBNX}dEV^rXbYi(!E?~Z&3R=`SdoNJ8dDRp5 zBd!}1nM?xAO+7q7FZ&&rk+)Zw{GUc&J_LeJgEOR41o{$K;C&1g$KrDcYX z&#I!++4Gm8YbzYl3SU#l-oGNheb+W;!oeMZ6ylJ*(!ISnnUgWGP|wMhf#a`gD*;u% zWDl=1f6&mOTXs<~4dtSb->F?ePy}qQGJ+zs{))Iv&2y?WY%+@OW=JCp%iRQm@g@i& zL|W}YrG;?w3%y#L2w;kUR=~q6>gYU8k&A^frPN_N%8cWcaL=Ih&; z;c|Y>%%YJlBc){7uTVoG=W~G>wyC1>>mnK0$Zpb4*Ziwl(L_ z`aehCnpDqAm6#T`auh{s=~1q$n>+fNk9yG$b~fH$uXTL#LP6{dd6LqcA$JrD8yl1o zH3vuTvt=d^m~MVZd#3bM@ePfh{H3b6kuCC_*u1ne(&_A{$WJ*!Qe-cnuWP^FxRkKZ zWfsreg6$F3){d(izZu&zpU!)7PmbtU-7hJ-KtG3&?OGmhYB zNx^W=V4cHEi;Y($R5|?o9PPXEbgfEWslnyo@2UrFA5u80l}7b-DGok29Bva_zO~z2 z&}>l5?9~*(@F#c`0ir6Af@>5P`m?;z^Kqe9(R;hB z2$EgRSo3+gC)%4q*zKa`)55??4jI94+5H`>%_1EUXWwXS`*U7u>%Jak@VQk3a~sfj zz39oH785&581mj-GY*wb8g}&Y&W16-`1orOYt<5)n46nh0ZDb>F8vxjZZr#Cyoyl6 zw`;<#3cm$W>|7@G;N`ZT&ClqFKw0=4RERJmn|gnjFHy8mlLJP@uUxx!)Y{q<#|r4~ z>VSE6CvcF!sJ|RILWS6TfMRkgHDZ5bwgx(k7SCHb4w_lx)v$a{A&=5pOSW5evn7 zil{stuw{*Cd@+S{AgOU<4RsM*d7I-ul+;O2+{S{V_6XJ8ImlXHp&{pYzD6r3v_u1# z?Ej(bEuf-YyZ&Ji1%t2%K`>B4Na<=8lWrfbzzCmNz=jAmL4Lid(SWo z)(Zy!hICMT-@%XtC*YSkD=c$%bYkldw!lTfo)puk%HscQS&?zQgO>h4UQHA7LYglP z<$z1~abhj3NV>``bAXoW?|_BpP_cb`t2|slt;=8&&-oseZGcwY1cPvx{`FL$fuTmfa`Y}EWEQw3ctDE`gv*~j(h=e_6AM0wmjO&) z+VsQznpXc=6p;&^d}9lrVThTmE=%oJ?@->`i{Dz|Pdqy~G=Z;*hOGNcjA=BlX>hOM zDzGWx{=Tq?kxO5REh!BHcJzdgHLw{`2E)n@TlV8D-U{y{1ewqK@J@g->r>E%k zE_&hCA8ZBRSdTb>M8bk#Hb5^QRpC`;+(|J_aBGDsDzC zob}|7wwrphf4{}Q-snI7-3vR5bjAO(A4wVj9Dox}v@8=?KlT6K%6KUr`{I@WzTrUy$E}A^cLP z2Yv^dUpn*i!#TRK1)r*+w~|A;?y53}!`VrBQXSv7!U)Qmbx*F7eR4E=JLMofS=x^U zJcztFgf~}sjW_W$g^yEy6+_rv+-U5>4kz8YE}jy9i=BR_!lBaqS#%Bck957fui%xE;k(3LPR54LtN?-G9H zy&KbeSh_J&p|Ibfz@}M^V4E+GAy^W#e+yqOaKow1Aa+>+`fWHm+^=f6sQNi3u879b zI7V<4GaFp^vfFNR)xG_)0nGXF(5E%?&iViXcjYE!@u9kT4*YC1+rqI;tgsUK#{5oi zXmixUb1y3UFtMraySSWEU0uzFkx%ulxE1BilBqM98ddw2_6PCPtI2un{Fykr1p*rz zjW?NOMX1~Z5P;&L(wv7ta7OTS^ZU303%(W{Z&~m1nUW3^qaM9*LL*7wwY!T)a;)N_yXQ3} z%gJXqrR{`AuB^V$c@H-h<1g_jFc_8YcqjS;ucY)N5d*fq*{Ba@%ci*%54yrl(87CYlW;}!f|!0jJg@;!0CoXq}?ibW1LP0&o|X1my!-cgA_nkRiwN8di= zmA!Pd{SM>0&}JT3vo@b4isil4kc}NVSToEYz|zgPnPhnVFRbq;!vV^RGJdIhC+EKhK6z)r-sM$?U-u%^*6KOBn0~!9a9+sc{Rjhs_ zg(H#6T;t2>!oy{H5`tc=LDV%~Nq(=MQptOryP^>B$&it#_9s^^y(d3BPzhCmi3u}1M=~K+5?SAI0 z^TBmCAw4^;&XY<|=r;NNtx0ZUYiD7Ksv;y^-!qH_1h13M!o_XwcuB%Q3$r*fLq@v| zy?VK>$GstXw}s{BcsX)RQavqGNx5x+!i4a4I@)Yyv9G@h-PCFirsP7mihGZ{9&UD~ z;qE9rPE-wV%t3oSG*=bub>pZutk^j6leV?b!U(Kwb@5;7#V~A!R|j|ljIpm zMKmamb~AnV?ZU>seRP;pIPR$EJZ#^mTY9$wS6asdvK-zA&H&ta+(O^Ooc{d%jeX@g zGUC*@;@{gOiQkp4Ss$;=-AJ#CinRZTVB%BF zM6gXc9>R-6_Ko}a4^+Hlt=6H(GJkon5}~hI?pLVJnUcpoiL1fz=W*t3StCd~$lW!% z1pl+iVLj?D{`&LWja0Fz4|Fx#0b*Vf zNw8w5ryQ>%|9uwKzGcnOwkW(an)14Hp-MygGMGF;_v!Pj zn)43Uz4z@Z($h^@esya11<36r^zJQAUr3U53&GW`Y7ntg=89Wp-tFYdtK7dlAcWPz zSl8PG@DBCs0sicW1vCwSYW4)Nkk838#}AKx-MV7)b+OeTkg2DW6>}&q;5Gkcsvv#j3S3R2<#P{qXhAms}Sfv>WLs+Z-N;+QY)acAqoq!o-E&MM7#8IbhYMn3DA% zr$4525hSvrir{tz zbyTWyfYT?FUMJcPjU7JA9h>@RcV6-_d+$-T$7MUd^Z>L5hNzBdo)78r!h3>~`{*nL z?6@O7j*<{0Z>$+T_2Ak`5`MuZoT;cDY0rYWQCqQ<^7YHxHwDk#9yZ#~9kFhT5eaWk z&F0WsXjKQp8sz~5sHxF=doQ703#+s^>vz>goB*1!Qyc5N9f?oVt83(UlK1pj;MdyQ z8b6vHTe9!kFQ(RqX&m)N0nJ$M6@pv|#yOfnH9M@*N=Vty5+mx_!sR-#lFP$EnLoS( z-^9~xEL?HUEMGc{CV=tq&GaWt=#Pt*E~S-&9uCw5cZrFH6_%rpj@9SZ{O9()6=&i; zQS@i}&R<`Am-4W2`!+I(#nG*2=&o6SV1nmsd5%>0jQOie+yFGtRkdewgsn0>j@Ca* zfTll+aU?+y-wpu_?c$jvmKhp8c8Sf+qD5x{wMzrQP_FjZ)(csED>xqGY9xW}Jut^#`StywgVW+03CMP6QL zSHd}zsM1g-x20u6safOuQE?b_q-tf3-#d5=TgmUmXQGs<67n>AQSiVaO|8@yUzrW|x%tVjELurr$Ln*&e$N$ZP;nQV` zzDb}!??1^+V$2gYoZ=DO$bnDALn`I%np82{-_bJbLw$p`MbBfQdB_g%6yYW0#{FBY zyN!ovu1T{7^Mg94+9xRw_0uvNo|RogLZnrkvF4MgP!waoI;u=~e|Ls5zN1ps$yUrO zam*ofZ=>vP_Y7;bQyg}G&((L!v(hbJ8ySm1ad#e$hgPl`XFFzA48!ekbTrFV8C5FnO4=XnA!|LGi zjc4Zj!Q04E@tYkuu-?eLl-NCZW4ZmFavG$oc#|9!eR4j0>AML$82w8;Mvu}$&>*t_4AQq3Z9@WM__nxo zF3Rp-GD#-c=VN_^-c00M#?AM}YCRd-?9kXJsruotf<+4)txU)O~d1s-fXHs>> zmxW6iC6NCHBn~9hQfcROgAz~6MRZFMWQ&N*cV&x00ffFAf?6AP!Vd8`5CP!ZW|v0d zMb-P$dlwup3nAY3T?JRtywMp$4~A{MuJR}y{(x-)N>;ZXKd&u_&FDXZ%9790{d zWiSb+TYW`i$#Ro)IA>GDYWGccL$))9N+P{-CXtIV>6@URl$oGG7X&rh*6{ z2Wmd%^vQYR@>nsgUr8}mpG!g8SU^cyA}G<$QOqZn4psY=(!2AiM7a<}T9|2B!NtS7 zvDrLZj*)Z+p7brfu6&c9^Hh{|vm$k_$_<3XsV+Pd^+11O3+dju4md{(fu}Srh7MZg&?qPsMl3daXj{Wfte6U=JsCA@YW7QT1s_pf8d=|@Jd zeK$FHSJhG~g0-}e_Tu-La)kjrC*{m9o*S3Ffu%Uy-xGIi$6rA|?ehaUNynRZhrOvjirT-gp9x1V((^oXA=yy6agG{=>76tgRD^rGu|14pbxdPeMv+#y~b zgI9u{vLW|d_ZR5N^&i!1&l}siX-PLD%zGxQim|*>D1IX72;n(xU?R`sPj|uvJ8vW< zsdb4}Q35(k=TVgwD?9K41;U661ZHUn+z@L1Z)>66GQp4qVo&1dkuyS8g4uJy1tUA_rZi1S;WF zYl}XTQ^pDR=jQ{!V9!sQ>Vw@JNr16{R-THJlFrtGQRGikyTD+?{<`231N;``0r zdFa@Iu|$S_qhE$Grkv)U7w6iXR^D6;62mbzDN~PUt=&d8=|)K*A*e5U?bQdjgynV> z=*T`51U^OIgYDzR!MfbCTJLH;4kOO6z#IRz&sJrz`0W8P-($fNp!tzag5vCG`668% zk?S@OvQ$YUR69$7XFgk{_2r<1PKAoOXonA4Mj~y7oe171i0w& z6DQV@2uuK^U;^E3SymtDcWHnnRl(@EtgLJeoCphSC#8VDM{p0)82~Y#0`tb2ap$zo zM<5fxn+kwz9^-g++a~BtIEV?^H(6IP;t^!bWdEkDTItb7eWroi z>L}+&GNg^L}hx%_%dDdtIooYm1Ro0n(uWurBk*_G9JmN^7`k^&CN|AE z_FC;tNXne%arJny%xjG4nmzTRFH%o}L7FMb9x!IkKEeJR+%9Q?dvJ#CEMe$QsWa#C zVWp5ORLb}M8qhzw37rEeY2m=WJ4ba4qE_1Z6oNG7ojOgw{oIw3P3o&XT)!-4KSIm) z@xFmv-~O56?ks6ypSuR=&gYFNNJzh>wtWux4J;_EGF3`{nz5l^BE9ibi-JUS=IWn@rb3aC{WQ z;x;+i%6~*W$(;LeJFt1<`YoAFbNYVYqc256P6r7-D_nPi|7Ng2sk(t*^PP;l$5U9h z&FIjd=uvMBcRtZej?c6oUeGt|qGtbVMvYziabzKV&_|D1Y-I?p=;mgNkb#(DV(-_0 z15Viq0z{luiufxAeMa8U5`qUHrw2^b<4QqEmC|d$%Fce{#*MlO+nAUb5YlPNmPVP% z%aem+h>M$BY`1n%S&fI1grQv#*{8L23d3)6g7E{WM0z^v3+Kmz6TPqSD%e{EMTKk? zu=VVhmAfZz6&yWBbuYs7y1R6fmG$S1f@OLZ5fvKJyJk6y$U!Ss;)vC4Yn~WVuGOoz zS_0@)ZNAdG8*83>u_AL(Cc-tYRR1I_n%Hb5_@*LDwXLIaPZoOnFum#qMT~4a7uxeF z7y6f<2C#obL>4K+&E~KVCjH2t{dp|mlDrlFYEHC@>Qy~!1;f<0+PZ&DXx@rf#Yj}H zDRp@rIFjZh5FgxvswgZ!y@AY^+bwtj@@BYVU_JADwtp?ScBU;3fy@# zMFPG7rhuumZH}qcXuU8?pICgRq2tp zmRolF4ty065zqAv@_RX1$x_JL-ZqQ{=JevU^_@$X#LN@-UECe>D-jpkuv_Za^~CXF zng97SUb6iZ$49z+z9Cq-yohvz7dLEsD@ySchpW|xWyREBzu;gdZtjn>iSFbH@+7Z- znE=d(&|omaxW>sD3>BRaQcGMwOHU)}R@5fn4g4Scxel+~*F%!r0}xa#XL;rl9Ua&g zk>Vj$0`QH%=VWwpKOPikXAcCMjW}!D`K4>YOIpUI!x-rHwAV18ARh{SBio)ZWX@0gDttX9d3?<#$85BQAD&eB#?diGffCd0UY_F1=w03Fs@m(%2P>y}?Z;qCV2e`v(pNu* zRq7~xmOw+u{@<|aA1u^*xp02>>qP%%Rd^A)YI&LDg6n5Q9218+Okg1E4evVx)R6J< zaqv~b=kRzP%rjRuwoKdM4Z|V+9N$WUo07vqk3U$`4sq;)>wp0WJ3y;_o{CE9{(VAd z9GS($#!M2Sb-(!}5^vlCo+QCiX$BDg1zO|+MYzZ~$Re1+%kipr#|-CwrF_vYUGeeQ zDC{nCapM|Yp>>wff+G_!Imq61(K){hg%Wi@ju;X-Uo@;sfp7ON4;T7SSzn+L!s6IV zh7*XG)@%E5A+SqBpAHORCa%TS;(Ig>xPYUo9o-t2+4~}@gevOUmjsOoPg6vU^3WXyTQvWYTF5%0&7R_HX9io2nAdrAb?59`GG(T%&>z(C85T82Exk^; zMI7w;5js(3%#^%^g~M^5l209TgX$C!oroTGKEx(1^tTxqQqsf0&dTvoip;A~`;N5_ zYBbia>WRHpiKISvo}m`aRBfKlS+X1DR&!#}h_x|Pj$R0Do3nTge}53CvKlG6ZwZx* zm2@))pY6Yya(|yiBs_EnEFi7}l@>To;0(TyuBxt%XK;h?_vFcwSNZs;z}u+F@s~yR z&Hl8!$xEhw0Q&vXJ}7z&NRrV-qi${UpKe;!-PV*|{V8%g4l51-Kh>yQX2EY~Yk`;&e;*Hk=smN29m164|E_)|H(`4~h3&_s@>9<6$8CgQnKPaJ62WB)*i7cQ%o_ zg*Q27uR~ifujT8Se#_s( zRC!n>BEHRRySKfoVh`4rA-XE`&Yq-$!~W|ZfHcY6;8ThMx#^pT{U6|S#X4?xBQUalP1)h=mVT;ScdQ{)84 z$eB%>Rjk7eF;=T$Wc9%lR{iGvLMM!_;o?D*9pcm{c->ey% z09XDyOwz;OsKPoQxtfbHr4iboo=$NF(Se|;=V?Z}#PkIfo6(_)JF3QJ*)8ikWZ%=< zcbVlB8?)b^-x|8*aV(ZG>~`@we?VrJ{jao#C349+2?5Rg$N8N@+zuc4Z;trdhI{62~9A53Ry+r z7}Ayeb8^_g3dyc|>%kvmz%VCRr;|~9M5w2WIhXsUc71yT z4@qn+oE}TqyPW&LLRbqXbM~D4VTFR1_FP>--tu#R!Ml}S1u&)i^Lf8P%cvznQW9LM zjwqLHs47nS3b0cv{T?b_=v%6se*Q$v396RLLB1=)^%-fEKtwkm%WzPS7QRRCkCHR{ z)V4`6P~}C~w}ftv66XH}V`30*sk-xTZv8s7<-g3tI@-Cn8!!07(h~awS0)UI;q$>@ zG7q6;{k9M40~IyP`woW|JrTxtHYrldbmV}|))5l@<5jTe(u@rfa1RCd-KgBz zClMvFK%3Ou2{hZM;^f?f7S(hY`@SE^BS{=9>d)Hdv8(# zO1(hO{H05m%n{6Iju8=^A}u=FD9RO~JbeNlpwbu^F+j&>^|5JsZ_igSK4hex67}t6 z+@XuhfrcK*KyX7(r}8w#EgW&w^ukOq;_Ec^>-0Lapxgn~n(tJlL}g_anz*nkZC9pakq&*;Kd@r1<0fn!f=LzgY1g`Nf0Z<;;%!=ZS$e_8W691vL8 zzBeixGvsqfTDe$`dAbDG-;ti^P^CWB*MYm+m!}WMf0)dD{hc!CV1CJROf9!tMkcCR z?Fcto^4{N5`ZkEXZpX@sZ10PN?z}KL_`TTeG+2T9^wrsOm=_c=_J^Lp3J}{He{#?` zYVK7zq=)TW;F>-taYhJE?mt&BoD6H$I+QU+dT7sf^ldY5AC&ImNd9gL%P7H?XRbRl zq`t^B(bY%Lo*f}|0Xvd9H0mFvP%Gt6q!LcKF&a8Z)h)D5^EhhE@N#bK8p-v)b!iIC z{r&wW2gU9B1RzO+)CI7M0TTt=Y9G5JC0Z^IjMzEC6!!R|>tczwni}2Ns@IfQ%A=F& zxiZObCmT`(8xLrr>rQjM3st)|K9zl@br?^H08lljKi|c=C|kw7E0{a`kideJ7@myk z6r!1$JmJ^svYP6sItldB$^TA08WjQoEyN15>l5ePoYHTJPeTq8>_L zXNvQ)h{(-!;q2ru(eQepgFbPOGYn6x&+GQ(Sh1PMhNfQYFBLsyJ+Ze?8WgUo{(EtL zH2H{58=)P)?vX!h(x6`INhc$-@3`6DJ7=wjkn_a+O}%@d{NB&+s;KDb^Vpu7o^QpI z#w0TDE(k;@oTX?x`tdwgmMJe2Mf>-vItJ=lpcUga4NPp4lS!esX~^*G1Rz%!0XyMR z-!^BHa?i1?kjgN9dHSi+ZePu*{w@X$!l*SL-`PhV-(>5wakj!(Po$4^%8a$fa& zmHLY1DvL4o#fO+dwXc;ql`WhL1$lX+TymOc2`SGxL|s?Srg3}1a_#u}uEka($VEIy zL&JMFjNr%YwutN1-FWx**%9{@IiA@>d-ot5kyN37F-eJf+zEjqOGKj1nsX6xb-(pa z`cZD9l3-Yp93aC`nOoV?7`Dq3E4l>o#Tea4r!aT*KP1}wXij2Z4F&9u>1?dxI8!}k z&LpCC4T^|G6L=V*D*ss_v>@Ir%6txgH5>wLQk{T z`?!B+R8)>k;Cn{Ym!IZ>sG6FpaXH*heQ*(`?ydA73Br>V#k5{Ixs0j5qF(8#&Vc5r zw&9Jb&cZcsgv93@+-_CHZI#}sF|qN>;kfQsv0V!zR2jZtr~JW?GL?xy&q)9ttS zljeo_p#Ltm((k-=Ifg!hK+#})%d$HNZq0=mlT(JKRjm4^Up~p|-+y;s`sLHxRB^qT zLJLE+MAF>r80z`XDsP3=u{+uvOTzYn%qy{{RN7uJbnUA-C5g=4CLmR%IOSl~bN-bZ z_qtfUsq#fxszQvrCC3$Ug;}&NE&+2m3!F$Iae8#iL{d$r;&UDw`Dl6XfYs>j@D(!k zWq$n;Dwp~zA72v^Mt`C5#pvqlkF8}O+zW&p&j>4A{`64WXUyn#uVS~yEp*b}==u(J zA|$6%%(=0jE^)zGwT2(l5r|7A--RBV8?WonC z?p#g2@GI`qoB(KqJDxa5`S|dr9^N}7KqI%!dk2?=8N}lcnD20(Vjsj3J#jt?5foklC_2lp!Dq+`Nln$Z^t9HIQjU73WqeXCu3fn#DKYzUpoeVWo zMfocnMl3W#?rWJ}i?^oj$IgqEE!or7H2TPf^}8}(IxaW|A=j~EFP*WY+vs1(&zQF8 z+TVKBvUaQA-dt3O-kel#^;ly?c2$pOW5%gTILS=XFgRF%YtI@9<<$G5_^YbDM&^4{ ziJYD1rHwR>v#!%V$aBFJ@=qULKnW+VaOFjgZy5U=^_nH+OfFg<{Xeg+Bt9YCFD|^z z#1O<_kQT%X!-5<&8+*C;p0~@GGDf+5#B#x08@=}7>BRLk*Nwb&2vAf@e{n3E_(?3& z-Kj@?O|%h~x8nw!XK1!37l7?54T282Fxmo9YeaFCH60%#yFfU`G`Roqe$<`EUla@^ zStFPvf`%MK{mDY`p~2aeRXtW&dGTdG7go#3%i5!avYYmC#nEdl#EwgC`>7+e>4(_{ zRm2dL>SUYfsSPh5bYW3=%HGk_RVfU+EQ=|-QE|um$r(9D-|zZ1ElBgDM3)NGTfM;E z43-^hwYZ=US69_em1!;8J9<jW`IbC*H)< zt>~YmOWH9ayD*fOXyyA&miGV%AGrALytN=5#p=w?PIdf@vy*4}iMu|2Qo%@3E%QSo9AHr>eR7hXDg>ILnTPg)<{%|$E!sgj$U1`~%|8{@Gz z#ftyEwq*=W=YN6mR~^|cQPBi&bs9z`3EDG(0|a_ucy1wwz0dx^40rRo24EoXM7-Nz38Y-W^&crOmTN_D7DRO z#UuTOXFatXcQjUmw=i2~V}!*A8-@?0bjz+lbPVygtPysq7yH4czPQBBQXMbfl>={KO~lJg ziBWy=UCzgy>w3%jQ5h6Tb(Sq+&qmxk(s)(6sx^hBcD@W7w;)yG@OJ=KcW$GX!B~UM zqxbLApeQdA%efx4lPtT6rXN~1h=#cuH=!==cVu!$2X0vo2OYYRJB&8y>*}w#45KWY zmG`Y_?`*T-?}EsvzFw8hErj+uYmO4XqB&1jz9Jjb4yrMQT)lr5);aT7-Kqj6@gRvG zgN{q0sIRa~Ld$XeDF@PR#~4d{;B7GXGpT!^+i?N*Se8nIZdKJQ7q!Z zd0MqNgI>SRr>?qnio+o1ay6E$lX`gW0)u02?KxDay4FLNN1ydu{BUaXq!Aa*pZK2& zaTwx<&(e~?0$|-ahRZS!RIGBN5WFv)5&5jAfj$40*jJ!x^`JC0Z*sQ>w@`t@8^Avp zVX4vFPr%>+JNsd_h9N9VBT+K%A6&oR#>?FO!It>i+cL4s!e#0v1b=>9hxLxYTc*ck zd5$zcPRLpPC?+3|^*>3*_@hmNIidsv+LKfe?I zu~9cS$p3$FSbkc3poRx?K0DP#5yyp#5P<%we;+9sI8mDRMZnjw1-%x(o=R^y^TBiG zDZSK8 z1_iF(?C#x$v1<2CVB+-OxP$cI-PM%8GOA`e-7iJ2&e*W{E??5! z$W63t~syKQG$FTs2) zK4Wp)4NeeWHQ33AgQOT?dDTC=y+`kzBSZj^d|97AzpO~Ae}0XZx7(WZFTRI6eL&6; zFmiH$Wh0muEl1q^#E$!t1NpEu6)#`h=7u#N+w{$H4Gt6JVt#axS<41}vxB;?IJXvu z3E(LCTC$ui4t>ONy$>YK8TddvyFy`VjjBuLYP52|lD~{=laVw}9ClJ!kjd(aaKrzu zvBWdu!mcFv7_GlAC;o4F+H6hB}FzFMd~q3Ex{)wib?-6;26sd@Nn4?6puo) z&g%9;gTF z8XF5=C;RUH!vz3uVMv;pyhG)@pX9c{iho5Q2GqmY{TrO7o4}4DC*osFN!*W-?ybvv z!kLa%`kRPPR|f7GIb3(MKE)wSHdQjNu-5&D5m2un?$O%rxcSI@EJ1h$k!XBHR6eY) zTENxp3}1bNE=5)I0dj0=WgV%AvLzunZ708hNfqbbjzALpilqGN+Uq`*puos3LL8>p z=tics@vb``EK_app4XG`75<#AI`s$M>nky)YD8L+8Ix@y1G7mrr7;PX+i9 zLK5zA%P1_Fip9MYxhP>TRVOiVFVVQrANI$I-<_qUs@J8!ejKQ~NS$BSzPh0Ce(2ht zMN~&7i|hxNKe)#DK<#U}$;q!kL6z2P0U3AcV6SwXxDSIQCgJUV^!O3ewOb|ai~mL; z)!CpU8Ox(^WXCz>!&^lt@;|0n35IG<68;7Qr z{s7#~0lkD%+5m}5CZul<+o0s0J-Z5C!lbl!{NNGc28Va#qO-a$^hCfH!G9I7cnTN? z`<-Kq2FpZ<(76A{rM_8$LbMKipupIHltE-5u^o=rkH z(S}qW;X{wXH1#()f*8Q_$~mkO?d;Qf!9AXwfOA)s4eTXHJoxwna;+j}+-DUhG3`Q<23vr{KQge0!hIpq^k`ckbdEKjd?H zr+(e9bT;p{K3To$a0f0VQ7xN4DhCbiwqa7riKIRLzq%qus0#+9-MkhF~x! z1k>1yl$7A5Y}WJn903fuOknJaSDzmzAtCwz)c|}A2@-^zXTH5A@>s8@X4kE_7$jA@ zGFs`Gjn<{d+jRX(kvN*>LJ(fPapNuCU6+zg?OEX<l6Z<~>>e;iWC)EP{c^jRd|BCXK zku;JGZR8Oi<9W08Q-A=@Gs9KJH>t~B?yz8dM16#kEea$w9+F-&P}Hl}52Lwor;VlR zW$XnKe%p;TsvnG#KSiTv^2zAk$V)qFtkL!~@1HX=Z+=j>6`IUjPM4fzZ5Eb5zDAK; zy?Wa(u~gy4F`>O&z0dXGrnvFqpgb{#WF5 zca;QUqa#zDK2v}FF6y%`50M6bzq?;-gH8lvPy30wQ+;{5+8h{AUYFns%nJwi`cc?= z5LCm^N+F>x%xf!EPOlkRR*l#CvK({>W|GAaG!T5CI9e>wxp^Y=J6!7?%_i zU|M@P?n9qm85dyYP;^>=57z1gm_l_rL8o3-%^`XOx z;`IIFCw$LSDHit5dIT#fYV7vL0cEQ5voXbk2aNez95ih7=)t8gWFNVsO0!gMT~9sV zsk+zan>UrWT(NQe&70bjrvi#>0^aAs@@h!tl2BKaeWY>m_5Ew=ba1P+=UG1HD{b!%P6|a@=rVHr)6uosyz@^5vA?6Q`VQVXLRr@ud2WbQu&ya zv#YDz2GggLa6%oNOjBJ}2kBU^gSrI*YxS0F; z^>+<+sl#1j&ln8u9QR{rRY(u)X1yWi_v|69p{6alX_wRb&m8>=7j+yNigGr%RV+qJ z2(yWemoJ{sl{`28ah+j!?tNv6^uu~XKGo-U%yJ@2G8R0)FTA?b}H|4&PbAQ68H!p z;2HM0j+7-3I)nQM*E61&SJ|q+!Cilahe}~TEG$gWX(@V`FF|zu%wdV3IMq|l%9#dl{+Ln;E{v9jqXT}x>FlZF1`IbR%E;b0)NzwGR!acyd*j&igwxfBh~ zu@4lzp^Cle#JAomJccCnfWfwICuk_b7>(Pc#0u7DQ_&7qSe@##f1}K>*>Oj z*!Ffof3LYYE&Jd{>PhlbP=V^8?HD$FvY4IJRzJJn|enZtqm; z^SiGe1niU=@GOWX;=-v$HLy)#V7i{xANjH|ntHj=Z?vS7lHXe4*F(G6fhbK}Z(O`k z6=gvE!#5eM7-?~5TXjlrskUcrQM`}EzY@aNTY^i0Y2Ye3h%9jJMeTd+xLi-Y@Jscg zBGQ0vCE>^?Kx1TfcgN!lIxCprk?N51zBR}0RVaZ+YV^WoQ6{-VLRP|FW>@GdcDAzO zKm59l++0DUlJFTy3pvP&)-;yWaF5b$CmXtI`+lTKY_o|Tp z9$G9yI)5K}55gI{E6?89*V|iLy;@`M!H6=t+U>G{f8T0&fp^nemajdV1~e&QO2WM~QWI zCZemMm(XY7qoXw5XW?U0%%|-kyc?HeR;b0o)yBvp)Hz;Vtj;sn!mXlqmQp$7)wg7! zAVnm{cdMQY^yqKny_R_%``_;zkjKcRMT%yzwB6*%V7UK1h1|8uJdJH-D362f>TQ;~ z`SXKn0Wlk)XC6v1pSw75R45*%7*dlOz4qDVjl_?HnRRB=T1UROyD{;f(_41pnk5u` z@UckbJT5(r(f>AmxtWSg2*E;%}XK+mf`1fe*U7(c<wUKe`rtLi&bnK}V0h zC4D20L{1C+;Gx6zrO`yQmNS}yIKfQw1hbpGJhVh?` z|KznJY8sYW?$n*ny@dTj$88hBgoE)H=0d>x_t$UTTF~0jZ$zYwg4WCnJRq@vkqMRAJ< z>~nB~z(_@LmYcp+mhtAbn75xbl6KpE3U_=joF^!fc_DikPW8FBzj4zlDNkPt-KRT0 zRAimdo5D{!@UbT8yzZs8P};6ENog1a5f}RRbW6kGF~eMmh+sn|;3sEOrhB1NbS{;b zn5*9lx#4r?XoK#;E#EDwSgD?nCmSw^s=A~_bR7Hx4~2ex z>rupJ%X`g=NyJnHB_Lzkc(?+&@j|7=Iq&ARTakc(lxJ^ORES}!QGdp2aChYU8<{ik z_w=(PEWPIt!Nm?IF?ck~TBDMZI>3}4SZ}Hc4faB-mkF{iNwa=o`TmR##{9F}SoD%! zN8uUtgvtvlW-cS1#bLoE5!>Y8RL2+4>@&n|QfHd`7RW0-)zdL`Ok!&`-e^P?EJOvt zppe&jlD~Cyjgla}!cI?nhtMW3-m)Vp*7hw$p9kTqF^wjess4Uo{#w+trBk$Zbg#I7 zn+shY8R5>?qhy&UzJk9e`VC^=)4UsBE*;I&E{i))_8*}`TJ|{|c~@~Z+!WYDS3dgv z!9@&(al*@CTy;=~5w~H9R#jfV_Sg=(A{*zXsJ`4yM|L#6$-nP_!K)1i9i~i8kXp|a z&>u@e8fUN(%H^6zE@sqtXzu2aoqI(kRy~_-c3%?rNk!eD12zA%h0kr%djJa%r(7`W|9Bf_oC9zNIn_G6C*VMP4Z?AUbavkNOldy`~7&N`Y@@5Z0cfX@g zbuN&2MpP{9*YC3gzJ$XH2%Yv%guC44->L!J$fRn0KG=R&#I|3+ok1Ogv{}q^FzX4$ zS?Sgn)*?a%Eex;?Hqm@8ffylRDnBG(h&xm#E?KRw!|tjCbdF*=hZj*9Rjeq!JaGjW zM;La!I@HNKdYa0{%+6BhMV^q=grUr4IDeA#Uy{5+c4d6hramQ=V1@k9Upa9gRNNjJ`abh+aLlVCcixAJXoyR z*SoPeIo@QGA2hMR%GPeM8L z!L#ntxxk1UQuXVdJ>r8=Iy;%z_z92Tc;Wgjccx+dp_i7q&-tgbLg}z(M1_9|*_Cw- z6YI^MnyRkrdU_k=#D%AdW!`l@H|bQi84Q76XcLSM2*61~g||`|mIlJr`{#yZlZZ6M z=LN$9)q*vN3y6b4eltC;ccPi({d-510f_O))jhXX7+0bs8_mZmW&+a@LU{0H{+_} zi7#(nOyZwbpnZv~{ibX6@Z!_EYi>0xAzs!Mgvn}IbKTjz!@;YLntH`peuwKJZTn{o z-6#woQU0y(Tkijly0;9g>hIb`K@=4fL{d^w5JkFM1yPX@R6tsg?(R?|q(iz(q(K@4 zL|RgbMR#|uMYG3J|31%t_w$~8z1MZlhciFuf@?9?Tyy?njQhUFJ(8R5=4n>e_rh>H zFqG;|Jyb6pQ+HL_r@qM?oAz7*jBWRWzPjDTE?TOk@jRk-+j?MsIFl8~(5>)PpZjN? z6c3smx`!Dd!u)CB$ok>Fi?CYh2Vjtk5j=pvu4aFP;0*OomBedRMr)k3|+_q=(%#2=#j;Jk-Wv|DvU@_TH%arh=3<5J%el*|&DBH&qzU{P)v4%4 zV$gmJUCIJo&2zW*gYirk>cxIX`pJ8Xzq26!z zng_K*IcKW?*?5($l+(r`)`>eCVY6B5YLX}W7^lY)|ijgnE2VD;~@}b z8lNNWXA61u#*P!noLl3lhZG0}ej*78skLdGJ`Wl(__W8HJTvOg-K8FXz524+PKWyq z<{QCMJ#HYNSpr8EMrhVV`NzHob(C%*7dtbhAP^d5fQZ_2kFv1to1WJE02l#hiAUR)CgiKBwH%e!$#%XnkTBtC&C_ zguQtj&GM*|uP)o%b$#z>ICnL0bX46ilYZu;;^^4H@|zXL>cL$1q4O|@mBBJE*6!Ge zsu)aTLD#i9y^9XUKMuKBHN1+0f#_P^|=uP5^{hYm3hx+_*s=rX+-=W&Qv+zFs*J^5h9q2DL zcFQLV-J6@DdaWh8pXrz)MEsGicY8Gp%)r|_Bs5E8np?Lc)?f%qbAayx5=X%K+?WwJ zP!Lk?*cZ>?*_^EBLgK~4LkeZ3dU|^86cg{%^3tK-A@)V`OJwuBH=ZjhJ_65gS=nA! zl$EU@b4zFE*+fXW)fy&m*Zk0{Hz;;^;VGaK`bI+aR?;ik(_U(7_6<60T7`TpV>*S& zkXg~*euKf{8u&TVpKM$E-9x%W`LlM}bqQoSBpkp5O#~O5KFz=Ouwzi8PzjP*0)j9% z{zRnbskq{!%kI^&Zf;lK-p>^A(<^-WBaHMi^%VuCl@RzWj8_B&zpkNlZc_Z_Z+1n> zU}v%S0wr(Iy4jje^RD4fH^=ulz8bY3iHo{YS+fmYtr{irPznEytf%aTgf;P@#Ug8h zv}4@2&C5hY4yB82q(d$k>yc{8tBb>k>KodXA4pUfZhZ>d_tzn;eRJ=$>Y=DH(A^*h z;Wa0aMdBkdlfQjiKzyVCm#dp??U$pA+hdSt&SD3@h-}?hzMzCKz3*<e{1dbF`mDY1wV@N%HAAAFX!;j(-mHA%0k*3S}&5= z#PdFSUvi8io`gFYkqt!Y;W>s@f!sv)=OlUo*I{{$Q-Rryh(}NJ@y4P zL6Z2~=%3};Ub(skp)M7bLxPOsYb3O72L^yM_F9~&Hmb!P4=ev^;Q`}a&F8&dZ0BX! zz)N6I0VEiV=g;FHv(Rhr;ie}0X2(`XY6J#cQ~^SkVn%ditVZa_i5Qk#6QX~oKFODd zD!KhxDWV5EwlUrRPgUEp&T{98Ta%#wzXv4M6+immrn8 zb?X-NaA0>iv=iPP$HL%;W+uR7yo4quVEF)$EV?mq10`;x(AOJt|#8Tzl#4aG^-?A{2heF+7yL!R+E zhcS;wdHMq1x~=q5fA#FiBa!?!>ZwKPuUfHyhU_jHg2-bJ4BP!eKzN6adNMOiR7$OH*@v7ACg5?-i+(l40`` z-**q53y}pUIKM4^a)N!EE3xZiD^<;HfdrbGc&Wz|e5c&+pS$QgnS1u+k&&j4ucFGF zt@RyK@jpA_yN*~sQF(b1=K~}i;6q@}0Ck-pAm`No`3|00r^oL6Pk}XT>v?0>n$*%( zj#KZj&u}D-nSsPiI;#3@l)CG;SehEmS$COWbu~YUv`P13b^hg0EAz&v^zA_lZO6B# zPO+&|j<1&|9-SMmcZBE-(t2ZCFO>LVF z#d!y@wvljzE7|eyOH?UYjl*p9zTkV6PHk#y^QS^JiievvT2u99PJ=vT7r0VWN{o5j zOQ=%UyDK}cu4TTRasP<=a=N$IeX;7W3x6EDrVNPLpMuk+( z?DFw^>rpX1NUp|~z~0SzhQVjj?5Od9__ORBm!rPvW1I+A?2H!%J)|djavAx9*{~U{ z?Z{>_p1n#xx#<^C*4$dq)3~F6K$*&jsH63B6b<1s?K$rMXaWAP(#I9i_SBl!MF%Wy zls0001q#}F@j11_{575SXb<@;ZO}2|+20|-6>%NC zwzk-t1^plK!CMjvZM~qmN>Fg{#P+pAc(slkG9@GswcLe`_=%5q{72$B?4;V>Cvc9?Jj=cd^rLQ>2 zudr#J2I@{1P-r!(!2c1UEsZ+M&?X={`(6m@5`DKiLrP8_(hH$zfD5}P+Z9fFidCb& zds>GoWyl!+XJ+@^17Yr%u(oX>zav)~*vGt*8D12M6E{uiB|Oe#INWU)@%H(_x-&EL z3F{PyMUv#RRQo1O{fsNe9*k9{C&{1HV4vSz-HLc5Qud}~WYuEGlS9Aq%E9hg&^MTE zN1PPs&r}||G~S2lG+c@XmKBIRU=eswH>)}?vV8KYIC5_-M5NHU{e2!^a!&US8ny}93yb|RYe=KhnZn)>{|pf7 z@;c|`L(T(f(o>?&D~9FA+&eK4cRp@yNqjcd$C-l&+G?k zYtHmAtpcZdtkNTl#YuxEk~cKLzfRpqCH7SY1C0NC!HUwwCb}PK`pW70_b@T>ut=2L z<98o5(RE1d5G&NHs4t3kLok)>cphvh@2{)d2nw8Yql@u z1J@-KYTvtxcU|RUUOxW)i22)k(bLL%xh^8>$m5mwl?UnR330?hD3uulN_utoD0wCNer z?G}dq>R*ocmyAX#dKd1D!@d?89W9}xL=J8?zZC#c?Akc+OwkjB&vF*c={$w3M?u}W zAy>6>xMTq~T4;e82Vhx$L_#%@4E!y+t_%7uU;x0q4rb2gs1e&jyA4eMfJdkUfhB^* zdW~o^eb6AmdZ5!rrslZ91yU@q*Rt|%Kpf_7e{nt~WSTI}$;8TVI;){RYDz$V@FRZG z#1FB?8}@e!rue3Z_OKucwWs}*Gs|mp6y<@-{CUh2a@;c1;`bbO-1>^ZH%QB%7q>xvEZntZ5i{R^oGs}#&#w@o4AIgqiK?^qA+`e7~aYFDK z=@G#L`}aS;SrW}9Xty=g4kwPYRt!6cD9cx!y=&ichB;Q~+X_n9JvaH>Q;3R0L`)Cy zv7Pzg3h@edqfc~Zo%p@c;D*aqwmc)YJ7!_I!Jd?REmSI>PPAU?6Wp5)MdpF(CJy=! z#5--aD7hjejq`8efJPg?s$elt2&`~qn&k9@Rd&wGtB7%q#1u~zwll8COfUL{QSv)} z%d#@16K@DnSCh=h{bo7KxU1|6h?6KkY9`dWm7#{Wb@gX~na*!%t-*;ffHEH+U+fPq z!T%KPjA~uD&Y6ekN=GBWP{2@!NQ+XAAGD>tN!ZNUTbm&fiU}&$ z9Q}wmSgJ>oPQb9;W_6YIOc<9O`0 z$k1NDzF}@2p}?gNWEym#fY2wf={oe%fiC6HbIA^120PFILKhsb1+eA^bEw|MEiP9n zu_ihWyX`GXsUsTPvH0fFt%RxjeX@ZE55ydKb}u9{wcljm?O8%~?^C5D=bP+N7%A=H zogtN$&0wrEDAvffI*jXnNkOZ{Z2BzYxtJJTau!2wc&9S3WQ;7dpqvTHZ^f4sZ|JKt zVAJY~NXlZMzj4RsD@vgRKa81+UHZv~F+B?=vd$p&ur*=~^?<>`V3 zH}zBUhXUu-&ozucpH=V-a_TiE;#EZ*Ze(FdQV4p2sCd&r-E}EJ*y*yV3s2y|n0+|Z zk=(w$BM1c$>Go^${V_6qOxFF#N#|#KLq0NSq)s9<7&0-of)gRNUgwrS(&xI8yadOH z)OowW6=hewo8Z!RosT=1*7(*)bV-WoQmbpeg9IdOa{=j7OMrha(-2ZO01&&0#!B9U zW0E;9cf{#4*zDlC(zb7d2RlsDno|&5xrJQvTH`kLw=Lh3mQg<~U{ty?%y-ISZg6W% zo@D_X;NoORWS40fVnaySmj1z@Tu~T5^d?!tUrBu_CpsFxeG0=_$#w!&`hvMK8V8yKx74W#K;zDN<<;zP#K14Gt{``KrgCXyhXtD9<(4sysdszH zOcra?u&!3k{kU>_VKkfSx+5O)ix$%cTt_ZQdKz$1QZvQcYg=Q^A_QcY?lv{%I5Cg) z0{TB(Q&lcJG9n^0BEtO7w0pZCTDkfC^T>teNXZQ3CYE(Xxa;v!e=ZkuB}_H6_XWrV zptYr2T}VfV-+xl;Ah(y7jSfzTA6x@kvrR*Nee4 zq@z3YUj;nnJ~{jsQ(vUNiM+s%G(PdMZq?aXe?sbtvSq78A(XMO+zS-_kZTwBTl47O zdq4hW#9kJ!z1Se3P+hMmA6pkyq$;?%h&X*lA@1x-rqhM60QcAqgWfRpg12GGAYi2x zWFOU{P&G#&y+7HteU$LSHtzk|_q>Wlk?PVTJ-Z78vKx?<-O(X@L#ptpTUBL$U+#a? z@Ej{S%nX>0{=mxcL2p;N0U$vqu999_FSbR<_$0}#bt87hmFkmndKh(aEk4MpmbVHL zhw;;8t-l5I+~9*6;xMP?xkB!1VzsAtD9@S9XZbAyb_lE7Jf3Yj!Kuzdtvbecj*qc zZ*Oue%l8-GcLw>h>Js*k_-YWMCn$xqABnGLbp@&(D#K^Ng~%zYSzM&u3J^gdmXe%Y zXTpip3JtTk7g2Nj`HD;u@)KtoUUNg`jkWU$a`qTa&6Z6r%@KG?RDFgo*(*F$Mm}|+ zTX-NbRS@a1xjnps%zH9+0L$KOs6JNE>4?o>@JXJx;!lGtUP4UlosbMT*nEH*4o073 z2FU2q-RW2K4g0cXCnqO~h8hxfj0cNkK?t1!I)_{E9$b<;Poe!#n2(Q-fi~!*B%mRy zgt)k8GJ}uB_Pi|J?b{N7KKP0H;gT{x=Op)s_ol$-1sZ=IFp^jstqOxohk^cC?-~mo z9i7DE$L}?ZmBR3-_!&AyPQJiNG&#w`U~joK)#$&qXxDGVon|^#{T*7TC6oD!9|!{^ zZRzmkON)q6KkE#L?rn!YLc;NVG$EW%DvP0A0VM0oV+leVzU(LMY(5qnu}(GfTr+e2 zC+7XSEeC-NSDI{ijSh}QT%SX_uLV-#1|#;{StnZ$wMQvX>inO+^H|2e;C49(&@6s_ zG^+P0`{{)+V&; zuU=gHtmTNfjXUaam+i_wBH(kqiKF;7pzE97cZ+Z8}qL52cFk-d8AP-=C=+qw32!>bbc0O$dN z$^Aon3Dk2Jc;Qpxe?9JI5zfo z(4w=15OQ9rXIalVCCtWLbGcks^AMtY$3N@o4?ix!SUvajtle?~VhhbYpouZ_>yfB> zsyhQt(F)-!C8h^WyTXSiK`2HnJ^;O}%5 z&$e8scB?+zd_pf3{Z2gP8XsR2ge=@9!;fI`L-+TAai;}9)^mCJpBc|?3Lh+U zYK&Eii#9`4`keSegYp?bWCH*&8)VJMsKgOd$DsDC(W$Qirh3DlZ;IYL99S@~)Lwt> zvO8(b-fatNCF9DG2cvsMPcFV{#$qXi)Ya*@bK~8^#%CFf+S4BfKw0jPokGuMQ>J~o zRmA1(C&i12UCBqrk$w(=b0TREc*0#bZ-A_FzL;*9ZT7*z9nDf>s|KkdLeAh}KBP9N z!RgcSOdB-Wf}s%L{@N-|!`{AELmQ$8^IgU1 zGBfY&$WB+0`+0s}qQ(2GDe3V8nHA$HkaMM}&aL7Nb;L?el?r!gaFVw)H(#Zr^Z3Mo z!e4!vBmcYT1&xV8b7)5;SO@l+M2>4mY{#Ua7buwR(bIlAa{mnV+8-h!x|1K^rng=r zB_<9I3Aqnmj^K^3o}mi9T(FA$NPSEQzIv2D(ZHvoqGGXja&6_5?7z#gq{AN|Icivl zV*GtRI}yc%!Hr#Xi*9!c5p^O;1id_%}6{pve5V;c-k0 zsIg|K{%d(sxyQSAUAd|Pd7Id$Pj_X%*u0XTpJcB`9N1e5Ca0u0lQ-{h)v!D_;QC>HoX4MvckTvvQqqthq)zRKFHEf{gy>)ioo&=Opz|ApU$ zUH?)A^}KalE%$O6+s|H!cRk)y^1XLx!>R9#OYoyt z*UB-OiRJao;au!T>qViE+@r=_is-WM|A!JOnFQSfs90$ zlL_9Dr?7m=5p4?L-pF=m+{cXPKNDPhf1jf4%>tBAfms`o2XreJNQF8PFl(bK0hM4! z7V7=(l!gxZt@AB6#2PBvBVGo%|1e6G!a_Z{{a4<8f7tI7mbUl99_nKLaV4*-2EoHr zun&7v^Ztoeg-_hNWVKPD!Aofu(+*2X!7nB(2{b9zboVg`+}_uWOk z&s|Lal>E+fqj$_4CcEp=zLtyYP}Ohz>F<3M4*FBb+FR}H2w7AcAn}ToHaXpb02rsK z*SILq#e*X1Wsx-(rU4gJm9G!XhDk2-3@>1;=MtWUI>ZaBX00#YOZ`0~Oi&Y|KPXos zbfyxVEl|nUZtO=oj6)>V#(XYETxRT*H`Rg8$Al#Ebq zQg!gx?j@^u+E(UtXcE9R{UbB;rU?5zmUuE?9|#Q!a#PBo2S5ngF}(%RElA20Ztna? zWiQiM$jXVfV|Sw_NyDafqQ(j9TGFq?Sn@ zSR9;`l{Vcy&?rDho_{}etoheFC;t{9>tKL_4~p?H!0ZzYmwl2k%)t2S=g*&dy_vmS zzC3wF-o{PO>=Qdj5O<(t^3TbJoj|kQk@+GE)Tekui0qejz@?yt-{^=Rl4dVK*a+b@ zj&FlS?<7!8GXJQIBxGcCHzpg9Yrv7d2h+0s?FG={k+7(tRnj+ZoQ4`GD3gKSTt{5K zwtKl;7gqaY>gUT0N&Xh>U&}Qe@F30}4i>MgXAnmjwkFfy%jWWIZa1rOF>e(k)U?2; z*9keG3t)9awKH_=MsrQFDdGf_lx^U{`!ZhxGfN>4`|fYJdTX}*3ZF&lm@(mf??Ku> zC1aT|lF=G=>23h~?7`Li7BsLoD85We`ta`EuP*}DKeIcZf!tKLL(8>XhwbkTp4=}r zKe-~T7We%bYA5x+>2!{^fL_q083YFFbLW6fC|n7H$#VJfjsA(bxTpHk55VqzmjI-QLm*Az2))X8y>KD1T zzILr%(V(qUS8vqq+@31Q4`OeywXm3+iBQ*pFwBpf3x|d#f%f1c8LJz7L<-&{ z6}D??ANRz>Tx66m(Iq_p9Igh&-|p*OCdY7-A?6uWk)JmvB)K1xf5|DUEX*(fDiX}P zrFb^_lue0;I$K+DeH*^zl#VLpqR+%~8^hQ8@6ntvc&NM2mMBVyLA^ZscCT|AEAg=& z##X{Y6a4;1s`QRUCx`RC6}s6XFlo+`0g*=x5@eiKQlIVEk_v zsB~63{+8A8+To`vjGoL@<<%`$7TYE&Ti{gBs@czk-8WuXSU@|iLM6L9vH{3EHOdGBYrGJF|pLktsNco*REm4w2MRJh+8~2GebZ~h;B~~KA-PAJSLWxov7G} z0rEm0w+0z~xeHb_l8<&~=1y`)tYeES10t3V#~7s&gyWDaLt2Fcl({m%s(zG;VFxf7 z1DK@;0L6Lx`AyTD>^}5pW1^KKc#eLU8L8Z+qis-X_em}dhnbmK5H)Oxw*oHNkmr-i z$jCs5(_d;z11&St;3SA!TE8!WAov@$)wvXTH~c3jwn0G{H1Ly_OOIN|-1L^>SRY0Y z&U!}vo>Ga?8Z0BA#-*cjt-7@iUqljoD$nEL<>ph0iG3G6 zqX7d<;%$;ta*_U!HNU)2%y{_l`~Wd8|0R4uSJH#TZ}dx=9^tg4><>rBBo zsi*S2y&utO7brrhbg?NaHYVnYni@5Rw6rvgxdNzCH{zxMK{~9fvZW>?OHrTyp4afGMn2R{?Z)cSp92{`K-*&_c`NqUh2n!3lYS{eqoMDM^e2NKEWg46G(j~L1 zXP5qdkzkx;4)iVgx8wHtUpMLh;WxLNQy+&a*0?xlG@|il;4@EN@lo5zWy&*g1aHg|MDh?e+Vi(V$a z4VIvNubH6m@J85dm-CcMj9NH41)t^e;o@xOE49=Q)m|uFLrKw(bx#kquTA>e>4ue9HirLqhk;Nuw&YS_uH@QL`Dd^pd z!3#xa+%f*3De$U>Jn04<9RU#$IK+5~4ndDf_18{?1CpAWwAZd(D=SmE-6{6)VWD;_ zNjS5r(qe39ET230M8mU1wAC7M6VlMgn3Tb8WexfXC}B6|7*HEmxIVy2T&!*-o3$AmA7bSb{6f5?$v}) zcWsB*>^s+7`Sr(NNZF#ge{3A?cSz7WN?I};pL$oM|ENl!u9ZvX8a<&jb4O}@y`|oa zv_(feivq9VnuG~9s@)6Q@%5D*9sx{M^_@_ZK&H# zoPk7550Og+tx?es!f3Td?Wxu30~5y|_R#ABoM>CFHRULmPOh!Vz$StLY$X)3grI$s z)1|4t>f;XJh%{xx?P;L&oL1&^hvch;mwxXOdU9Z zzBmouJ7NzW6j;tlgFFf?w^yyOnkX4ExeirKAa7-dC<>2~wVFX#v;G_u7W0{nW0;$p zqc;Y~TLtP1Aq3B>IXQL&e-YSXPZ82fn(?mL!sd_mJ2>92K3LTPzL=8VY9VGM!Dau} z(f&@|&=3V+>1fItP$oXGBmw;g#NnXdP<)LCQ!@DaL$uq!rj{0LU)OHm&g?qqs<1ZX zHY(c>Z_^D4w4Chf%PLXMAu)gv;=PN8xVc`i0G-_&Ubc>ayW5R+$LK%QyMa}vQIeFX0NKQBD zl4@`%M|L~0-{helZ(bb9j$S{H(JJ0PK^KvPYziAaM_o5$O*6AqsW!X9KV_GNydKLJ zsb^Xm&2+eKbtBJC=xdwjlzs2XHSTJq&75F0i!0xU!)367%BILz$}eaSeBwhQa|#jb znPX<7*?i&F&#Lcm3hSfJkNQToErA|yl_yqab(crK;FiaJn3%<>wj+`G0gTJiWQfaJ zN$RJE$MBASoUiD43m*DE9%>~T8b`8|2hecF+vZ$+B3wbl;UHjz%hiZybmmq%<($e! zIom9^dgR{yiC9hiI=7gKQDwjK1jqbskD)kos9B0YWCjyqWSjICt~Zsvzz1qcU`I-~Sk6k_@GU z_4V}_U4S>)tc|uq8CeGygPg~~nS^zWiTfDR3<3F(u(;qY=;8_<>Q z=&Z@DU}PHt95N8;V4$T2fRG;^jGbWn^;SZ@OAGv|IfJ0O)%aITfNbmm(!pFuB6T3k z^4LFf_ADVOsco;Lp7WgZ1xkU!{(~=a^($2y>)U4%@7X=KS68kM2r@3)w!h=Vu{_ui=jIBfU&eaWN%Fc(}b0qnCUoWtSc9XH5m^-vnKR67p zyF(*XTEC@yr;(&1aEA-0%DhQq-|3U(gkoHCRM$4i?gVc(A5JjE*Q0L)_b=8>q}$o) zQ}^I=#Ger1qXIsU;jsj~s|~!{bKcGv)99qZ{8$*DOly_gXxbv}#F5Cv1b6AOADLt6 z*N6u+FRfmjwW-_GQ%umyHc=~XE)%w*yx>9>?qWccq)v_Rg$!dZ`MsMqf9w}?+(0F3 zsGD*((5q>SwW!|dIAv#NN5sE{sH<}a9u7h+2uLTByr?yy69ZKE1wolrx#gUXKMfg~ zo~v4#1o7t9*2<80j3m8RSRn?!(DtR^;9$iZd9;%_Oic75jsaPUoulIrZXX5&4RkCl zfsob_MGJs2yg_f~#KMApjXYEi-#^NhH;g(lo;S#}+aE}%EUN(3Q)`uWskcf@YjGu9 z^CN8=unGcLqZYd9v{I%RxL>99EYUsc$t#;`aOC^!z~hpH2@yqPm5jSdyM9*=-_V@6 z`SEjuVDn+;%r31)5nkIH?^S8?u3umM$-?A#-QQ={xi!A_W~xE(RO{k&BbMT3dr@4! zFSNCityI;TEv_n(+Wn?Ds#r^17b0B&9eLm!r@Fs_)`4*Ca`}iZ*dftyq zgBaL;ZagIS!x%|)D;>$Q98ty|>++Wd^QZT2He5N=Yfrj3;~!JUwDLr_`YMf!Dz17b ztJ=XX#hEGfYOhyQPpH#7vBu3OY+Fc5MGx7g%DvKm^vz-NT3-%##+JF@mf1T|wdMyl z-M^09G|`buA?)l_K3^YSiL&}cvFSBqHr-5sHYkCovySB6MET)kaC{H> z@L}342P*LWs06}c<$ffM`r!+vQ?u&}HI?<%b*EZ-7l#4O`iu%ozOHI;%*Fvh3eV@#_W?E>JS`fF{ zs1WgaDoX`~c~{w86ENuzDgGNa8KzuiK+UB?%88>Ckm>98EE;yfQq?9Lel7W zrIDwmu&~e`+=_8_hN_(f;Di+h@daKb1(u?Ft2-9)qrH!$_+Gx+5SzaIJDJjei-0OR z3+RpzF3<-NwOQW3|` z$H2s)L^MX%n&vA5yXZvnYD4nYip+5PvvgrIyVJu&ZN)`vn1%thpIpCH%rtU0cj9~H zEHM*{IUm0IPPuyvZ}#MrVFY<#D<2=Rs$i~7l3JY20J99K?>9PGt}cdpHR0Lig`dR1 zzM-LKT4(FLb6Lx{QsX=ZT3^-Duj7{??f&FJH$w@vlrGLJ{ph@$5Z&0OUV{D)k&>V- znU%yFbsTB&5Yo^qJSLRDa3w-F2rwp!1Ur^+#@YN=b&6FET=w<>C-H0$tw3419#j>A zaYd;*AUB$|Z_l9dEp#&jsKbWbF!;fU@6lX)OcO*i`rw`ek>f{a$EKV*3rovAXhKv5 z4=pcxZsDCf3ypu7T7(Qn_6Mi-zwZ(R_2{c zB_I1)Wmj!FC0shGoHfWb=S?>FCUxYdBButnapTTa4!*eIo;wv)770}{J0FWN``0&l zQky=GzZ{9saUa4QR~B&VNog@kpm?yY5JRQuRf1pF(hxOtLh!Gtp_$#JUr280#vicgLy*5h?R~WlAlIm6YDbU6QWJ zU-ih1Xo9CnsfJpEBFLiy0hLN852ypJZml`qVFAQLuRs4)(u-1s+}SF27f7)vI}VLO zEd_*@1^~P0mQKheXs>d|ihKP5&zKX(Sv<K?6z@_-7f1$5Hp6N{fH6>(Otd*)463BE(U`OUI}gvhz|!91mNfz?HW$NZ^k zmY!0(^O+lBsmp(`Cp>DwN3UO}9_;|>0^yD(G|fO70x35I?X4Pndy7>pR9ma zREn9sZZ|nx!sa(hiH(VFv7HjOBD37sarb}w1$i}LuV`aQbR9`iheJf9p;vRf5_2%YyQcD`e_1`dq+y@bZ zw6$V@SO|!on^z{ZgLNIuWYx+kVhKx~Jjko1Vz`+;1HR@m1)2tyokQBqxz|3URU;%W zmV2enP%;#3=sQs8zR$yDURL6|CLLXDq?*aQO8w$Cdk%X`YkN7feWWGsBVVc7be$K& z&QEw;;nvF+&SajM8!o!D*)cas>agSdzEC)&`>sTC0iET`67iDA zYpZ1MdYBGw57jHKBhQTah#0*5Jz9@nZ(3MN2+=#BY@$od4)1So)SZ1AcTa4(^O{a4 z-fvvFhJ+MfA=As_gMPMckb1!i^|2c4Tf{?6S)Md%Tt0!%eWv&KEtkWt{=QTH{5$gzjd1QeYx(rs6=NIwZ(okD;2wFN6zsLx;qaq8x#Ldy?f-sf6V6{?x!hyp(JxWp z*HZwqG5pR;xlhiJM>-vDG^M7cnJ@RVp)qa9QvY89LrTN5*ZvI*K^=6uro~ZQyH#zZ z_4mqBr~gdwZ*_I{6CR}VfQAOIpe+}{YH}E81)!AQ0Y+E=QL06|y^|_uZv1LbMGnEu zzNV)&%3?{vM7j7}5 z9uz90W8qNX{JAssCd>77l`WOm#cyXm4;2lJOJ0tZ=8RWw(Y#e{XIOYoE>^3J(cg(2 z+rP@e(IuWB<<3@wnCu4bW-IBlzfYa{2T2z?+x?J`0D}-Pab5v|A$0)(fvHKkk{a@Q zW8wPG1Z%-zA#Zw(UP*7Y#0o_FdDYy^qIdc>!tpTQsD~2ige-J!Bif?%A~QW5SUWHh z_Vr^i?`Iu+wKvs%{p*)|>+WMfqI9@BOllOhClr1Pw`_7zrZjk=w>-5_(9&=X5oSz@hts*X( z{pzMUtdZgSM-tL*rXrqnG_?;!?#GD-k>LgoW0enRSiJp0saQbcbU3be!EfPJ_NY(- zo~j{*Idu#UDu6xyp{du#QSCrO|75vAA^|FyIH7K+^S%!D1qysZHJi4DHvwH`d`2q9 zZ3NG8X?en(jzjv(33QIK%nr{KYOKhmu#RY5wR?VRNBQ0$LrUd3&r5d5dgrKzfw@MI8gGSB)#xv_&GzT!wY_FXOHcsHGchqy z%<;f2q?0YacZooBsL*9mVR&P*it=R@MNJn*MGbe`X!^aT?4Tusk)6mhk-iq&Hf+e8uU0X85hnEWYv$jBqCo+s~f?3mwhr#tp%#g2Af@^w?`ri6R=y}R8;)ZiP4xoY z*Yybvbz@oId}}aX-nxRZi{L1+%vGxK6g_Bg^gXuV_@QoMqxU3G;E6`J68MuvjZc~k zaV!6rTMBT%W|w>Qt*SAhYSsOgr5{sV-psK6RA*+yia6CJs}*yF&5fGv;oh9XxubBp zgy-Cpa#(8*#M)HhTHgq!v z6E6@qYwq@BimN$rM?Kb8Sg|yxGjWlh_@3O-@-FS`g}eWjc(}{xjLKi zjD}b6lnKtP(jdx>t4X6reh_1qdKMM3I`ZA=KYffzERyiggL$O=k@y!Z z_@O~F2@G1{@nAp-z!oK0R#`cCA>q<>drmo5EK#0zXJ%op z({vyrSL|{T*=8$vw^X``e)rvT?ao8>b9$6-)#Dap1)O}eL{_!>QJrD)LKYTp1y5h9$@m#3iKuABVo_FMk#ljByfq=iQ>*q+IO#jbR^lieajKVDd4+<~z;6#pn9}UT ziEU%@mOJjr2c`F_#>cHYM(5%j>nu5*P}^>(OgT;8(@Hw18lP2T+xtVY0AJ|+-^anw zWkRo-^P$bIqBml*cZlj5G({fU&Hc z8ArwTf3a zp}^pjZ0;t{>PRNh+E2PST(6e5SZDdl?3Oou9;LWZ$l-Uyz8$I{^XO^iGQ4Pu|3+BK zA3><*S&L6Gbfaebn%^3}dM6gCf?jW}`SLio`0(~~ErG=~VK2{izf)F^(YG*8%5!Uh zF7W*NSHktK>7V|8*TvxEsAOByQjamGrJW8@UxO<1`$ocy2x{ zl==LTuzXe+bS?zqYJFqlR&W28;aHs36wbEtGpP+9$DbA6Uad}x5#ivY${MifeG7aa ziT#}A25!}Ub=}MK?iRj?_cD))@9;+)@8K}A{3 zL^`8MY;9D!nZdSJ;)w3EfGR)T(kn z^f5-QqrSmP)Gw^Yb-11>>lFKW6UjxbJ+pjjhg?BL`0qBliI}pgBK#NYiM|!VKL1@b z`_B|b)4RXl^FNJm3knKAGW!hhD7XYOOG`%7|CNA@UrKgwEiIgKr&-CXl&VQcoVET= z$s)oSnCJCHN~7>8>`mFUa$hFe|Aex6w)B6$URcb3Wb}v(PQXES_i(H8F?8u}3ZQC> z7m5M33dD@7k1nr0kes!$60E5%3~8qBru3=7xMoZ)x7M+4(f-W4ZMOp4-{+vV2)T z1#}K`k31L0|cT)e^_tUy6Gwasjrv znCQ9_7gn6lQl@Wg!`&hR*^qAn%*Il>mLQ=nYWmua6>ncSjjFP|-nJiMj(@NCszr*q zE3VEJzKi6zLfHJX{}KkJrTm`uS3yF90ixq&1Ule#aM;L#RxkfwMZ8aAvkH{5728Q? ziwE+Rviq5*gQGq)M&A^v>+OzMEpmG4^7y>V z?raL<7}oB-=q`$~--cSA8so$b7p3QkRr#uyK+DO7-f6Q@doRZS?ZymBtN6n%67%}; zrrvd#EMj@pUA(Vd=B&G~6!$Y^hh6LiDF+oQ`u|Q#%`P0YHi#mqw3G+PFfG^)4_8cG zuLHM1cvE_OZB6~7-3yA-Kt~BJrMomxv5d9`_f~G;UQ9eNtXp;_w=$Fls$xKx$7d_u zXEddMBppXc=g{5qVMzbZ20w@@`iGF>$EUvjp(&y^zeI;p;A^z-blYgM3`#FQSh4Ys zLLkpLIxc%2rN%u<8W^{+J9{7+M8g&BVX@rgJgqxBIlCfjbSPywuAXwO2=f9Nrs@Aa1we^b7vSNC&7wAIGuNX_6MEr6LBKIQSn&m9zuwt=E|&j%tFZe`?7O{- z>!Y@f34d>x9Q!W?+;F1jMT$mHvw;kYjmw}X5p)FLh8=#ZPeF8){`%@23<`BsiOB#h z#debFy{0?I)O^+Tz~W?SSg=nl>l_j{RBMtpdyDvb)hKw7x?+BoW5-y}(>0n?fso1z z>5C#cn$bzF8D8!Rrv}{PW3i9FDf^qk!i`}a#(k$T*-kQDlyn(lw-DjXs*T4Rv2JR3 zVoz3*>uz#X+lt^dM5sJ18*3nLdXQl=V61@qj-!PxrZUxu`M~8(7KGea)Q?Ab`6$-n z{_bfXVt?lY45p^0;EAT^S_P*52H;RQYd05gV}g|2t=)j``tcnI{%+Gs2_z`>J{l!_ zq9YewfKz(z4|sp)7kJ->G5t|jpvYdH2d|>tn+K!Y{;=*j&({ zKZeyu>F%H9|Drfg>TT{(;)!V2$3ofZ?F}BB>Swce+oZ{cy8HXeo6=|qq@^E_26m=> zRiOFG^0@KJSB!@)j1{KlXJ&dnw8kgAacjl-mTMAlW~OcXiSqLZ1!BE&i?iayy!_1-ZZjES-Ga z<4&XKg4p6t7OVMZRrNsew@Hpgjnc5oe-3{9Cd=r_M{{ze}&IU>tm*`8eBe+ z>KNft+4krKj$+!EH*R@OG~OO%^eyJPfAv-e!>(G#Yj)}kQkyp6LyBx=wd5j7r_!&@ zwVIy92^44MeeW-R%av-}`tU4&!7Ev7+vr^P%?~D{4VDu_fv?8(ri`X^$7DDDXmMb_ z%Pu;FR#k@Z+pS*(jueT26oig}!5d_}r}A|>sll7x*Vh*p?g1ti_9aS6m5=2el#kF{ z!M-Xj;50nP)>u>}vK>X@+*_;s7l4X{)at!BogR^MDOG|jl9h6hp@no>zaCwUgp)snLn%tpY{o( z&a^gtk%=)E@*c?_8(p1UsSTo&DKMILbOM1U26SvDN143vv0VuH8qJEdC)=V_`BYm- zd%61P^!>%!sAp!%r1Qf==KmFJHWy#5WBGQp;&(r%9otrot>nfc=iB&;ej7;_Url#M z3{u;j61VkIN4zzCq1Rh5Po^6>;pDI|>pXtmPS0Xt>mP+F$U7x}RF)2XGp*inm;;%c zg{b%c4jkVZ`L1jmc$+w+6Z9-CEiFk9R+N=}Sap15U(QDy{!st_1|*Z4^^*Oc5jH_4 z_S=)*XJwg1g^elF*R>kV^$)A){9H%|ZFx#w=e|rgOBgodCDyICWghsZ`+KVF5w^Cr z-`a_s9~sgg{I-GnqY0npt~EC3D!n3w1bz*LY4O>qDfht6;?Ddd4)YDOGzY~;QH>5~ z_lSp9C)PY*Wh-$0Vz4{$B4w9XduGK@@cVa>L?e%>kjC}(cl*SSMTLJ?pXaP`zZJ?> zUt0TxA-ei=91s5bBr!3&&EBQBH~)J7>0jXXKl2*?&$#hhN%5~=W!xw_Q-qGy!$L#r zK;zon+?)va6UvlM>+0%y34`o@0(f}P3qS(!rn2(D`n&u;p9>)M+dr{$UKNu31Y{qN zq@_Wt%k?`}c5-wyGP-kewB`!%HM$AK7Z8!)t^D03Xg3kRW0kNT0JhKJ;G6;-qZ{Z> z;(!tTSz@e=;j;U`7<&t_D%0+J7O0q5;GUgrV z(1NBEFA@K*43U?Yf;pu(P%aSW=wMs~W&<*PVyyj{GkZitL}caU609Qr?P;t_ z;Z5EWB*bIt8PCBaZ&F$tSf=}4c~1_;wu7kh7!#H;LBb*8UvT99?yx1cD96d<#xL_T z;~VJd!;u0{4uVg6Plh@8#I4nU*kZLPZ7v%By2OhCD_;&a(Ri=lkk!GJ`CDbr;GjED z4vY^eXl-TjF8++^LmM`4&biU}PmTZQ)hkQH)v)c5t{ziK`U6}KXsyLCZw3Gm=FQL{ z8P~6i-9uTNW`O{y25mP8<%T3P8u&0 z_!1~EnpMV4GO3>%=OmfVI;}jVi0#4(85|^Ra3RsBUI+FPmf)jV%A(2>&I3aB^L9G_kwXTBSy8hSBYFQ;YM9s6Jl_+Qd{+;i;AgKDlgR zwye53b7!GvBwiqArYA7mlqM7;MTg znu!FqUq1#gL>Df~=zjU@1E?X^xz#I0{fvZpX4?jQ`J&NI2x1N(UcG;Ro}gQy`D)+w zLJU>-nwwC*#40B~Kx6h>M+YI$=snSSiRg(!tvQet_^h-o?{zMSF`5}Y-4kS~FauNje{q|M`RZeei?Lq}Y!pd_mrr-Xg@N*1A~e+^H`GE{N){Age2nt^GoMQrF02BQap3IP?4rrH zV+luK#C2FiB!q%9nz*&_Xb-<`0o^?^}#_rx;VzLhoV)tRCmI@Irt}V%p>RlNK1#5ml$>GB(wH(u4?7hDNp-4+VU&zEM z+}751M@K_drl+^p)z44qh4xlvDfSA(Py=N+!jf<1Z`fMF!Ann;3a%}<-|4Zo1f-;K zuTp6j|Fq98CYsrdcN@o7&2gm3WqX{$sLWFj3Qn|6UC3je*aha{m+BAYDNi?%FprfPy|Fs%fhASqGUp`$P-24q2av(PYWUdU`&mCf@aVs*R5-w z2M2o~HAD~nESze>ZUhO!vgsWn#P;5)E^jH^YHjY%y!w^zgF#&h1=DcQC+m*S9(jvT07YmZ1_;0GTjUxYhmh)Px|x>iAn8%X zXjic}vC6>|CdRa078SJw>q&3|bgBKvjy?VJ&tE~#=y>-r7dRC`9iOcC6q?)(;aVDH zq&b}%OpYE1G+G=-bMY3Z0vQV(O><~rIj37B4oV5kp14sRGHar$<((-sB55MhTh z2glM}#R-1?mv{r5O=9TdMJbSX_4n)0lEkjW91HP8wBPpIZ^VQtvQPqhXz$f@?eB-p`H#MSs?^XMhsdJg}cHW|IYK0cZRXz&kfL-qRZvbN7 zsWK~t=pG7B>?y7=2Q9rVQw|Vr4$kC&2heMpUBmTEJ3?9e1B^y<-Fx6fGXUa=3 z5qoludj@u3VaT8e>L12I_=~YQ*WiS11fvO#V!k*dPuP#$0VNza&dJTn+Qz2)nLyi3 z=zNIVH`XN%DYLz!Dca7HAH^&!c~ISjLVbl?FzoVAy|RG(y=FNd$^_cKK$!OzkIs|a z^GaUx$9K6q>$K%BUdfeG8pmuGLPzudwAlo1fW?vDH93hDiM0Ys)l+0&-~xPX$+VKN zCNU1<*v4%z&70B1ikht8-n}*tN(#bmoDcyPy*QwfWgUGf1Y_AOAjSd%4BsPXvdL)6 zXnQv~E5V86$E|e(^KI}Ey93Sg_6Z9I(-x;F2Tj?|Sh5p?X8_MYH%{oi!0}-jOIAMS z1S2NGwh(h`tiVWy+aMhwvP*BV+=QdB7iT>hdaJf>^^~PZQCv%Y%qS=8($i+^Dbq8( zA!l&vf%a|QV4lr-h_Ye0nw*7hZ+)b>?T?F%eo+6 z+OM=4BBlU*CVAzymBh%`w|q$i*EKdaM)*|WYuYF?sJ}`!P~o+;>uuEAl<=FQ#em$a z+z#VwcI_NQS2a||vG{Drr$n}?TggrOvv%6~dh-dc;-DrBx@K3K%5obk?scWK=jUu4XgiwRu+KmW&tk z^G=}i7s1Zo0O}4PH8y?KacTlUo(pit!1A+mV{i&Q)KZk z=v*Du#7DSs>mdpPYm+6O)+j=uBbevJvdCxD3d8(F=vdxXZfpd5wNZ)c$$Z8OXKWw> zmY-p8(p7QsTc}v(ah4*MVTxV`Ch@tqbT~o+J{jl4EmBCZq;Uo&*$gV8GP(jXBWWr} zB8JCFc-JxI_Yb!UUqxp0%+b;=619G}y&5xsZ4a!DAJDW|z~kA;$>~owx1%))Q6V%C z*&3l~#7L%liN1D-#qO`*eA%$LXHKZ%E#qWN9@A(Ovj?qlOvJH~uXNP5Ev%Rm>X6>%G0*4lOj(J>N#!buWhAkd8po zw78#Vl(`-$DhC*w+u*c}qA}`!j@AnOJhfW%{>G2MCzmz@brJZ$Cy0ot~ZT!1F~YQ)3Jd4rYpci_t4x z^v4av&0BjD?REr*z23!tP$-m3AxwORr_WL9?CTj0BK)J`fntd{o{^dfCaBDdlY|!o z@`Fs&nCRGa6EYU4X|I?Tx(LnJim-t-^++*KDGd78zYyT6C?t}Tvoq$ahN9?{ro44o zWUPqYvUzig78%;87vny?^Y(wI6mDtvmh`G{4}>Yu=jv*2fHyvd#&;hEyt*d%jGP=@ z*(v|k;N#w!W4saKlseF_m=|ts6$_o*-V}eb?Sts5kLikhbYE{*&=rVuM24Ju=t`;Q zb3Z^AT zy!J^$gEHORu5>zNHgsF`POgCr%frWKCxNx1=$HZdlCJQ!7)~5wG!rr6GmzipEqoRp zZM>JCc{jpPR`#pH+)h6V=BMSp9(R0IrrI`rFq4AYixfcV&aHup`I%gGEb8I>JWh@I zx1aKRku*=%o6;)Uf(jKhx=G9=tz8#>yR*&We$C}oJIW4z`WjtiAzt?7x|~=U^QJ&W zwb(?az+=?`vyx|eWaVyz^LKW6G4befor!1X__KrS_rN4>zfDt%i#K!1SnAx5JW1&7 z{NA}rZ^5Zzq3MLn>0Vxg+v6?NnLRKEDY~7gk7$fw??cXv1(1O{+0 zp|%Mmgu37V9DHn@OJT#oRg2VtJUlowbj&^fww)K>fdiLt;?yBm<@Hc;Ysg(h(2oRHII%fmL+pbCZ;hVblA<50ekJ)xJJHSG|~7 zSmFxXU;6qM-{Dd5z6h&8p52=*Ei7dG0-O7s9A4ta#l5E^WeV6RT#!jDKNwz|Uv)vl z)HHOvc4FD9k>i1b5oa%1|0o^iC_gIS-uqZ}aP`NtrR1QH+mvaUZH76_uaYM=7(7`yZ9BA8Lu$s5FfVnP4;)W*S2jtw*Ii#u+{{(y`6q^gHH31s^{5} zj7uO%V^Glxlq#=vN?$hCFz(!`?D<=H35Q59_3J_s@=PUEf>acioafG|Lphc>5;oV) ze)-z9CoWUj?ZvIF8rUfV)sdb7{0lni*82KUVPkXVP)~*;f9M0pI_;2UMX(SVXPp5O zzq*Q{?GHb^h9_1ZDo1a2$>4L1@qCyc7YiuiH6lI-505M~-5C9xbS5G_YkJM<)z--5 zoe>~r0T`Lqx1?HRcGmaIAkZ8_oM1U^em=d`Hc*mgG_w#Fmq1xml@g-*yb21U;xyOC zsa%KCun9@7%U6h#Vi3xWsv{(4%gkKlxW|dK2)ot+#C>Y72u;Pq!=t3EOu^XLIL>oV z8-+wM&FtQlnUxhI;m0*LF_E)Bn>05ynt<8J15iDuliR6_E>KS>LKH_7FxI8V)1zWz zuY*fc4RY&ZsTCGvS(zlnpLO?EtuYiz`*8F5hnv#6x)JjW3pT(7;xw`qL4hPI(D+T7 zj^GmG^VK8ce2str)ucuijwjBPR;1CntQz#hT05_uzvU(!9n(JV;xH%?f`pw_t$Myx zp?KhPoHWMk)iw1XF$oZIOv_uCRD;gcaB`?oHN*T(*chpjA~3%z)vWoVxOo4QGk34! z{|wvH9*xjb!b@6}_8pjuIvpwP6+5MU4U-0vMM>kYS{8CsTxMm5RelRE|K!l$W>>P~ zmwc6(SuRVrJa_fCIX+{*426%L@?L?jJVRGinm&|8VZMU!q$H>*d&8Qaj%aLw+g zM%H*bc*e8T1Ryp{gk3;@uyfYrNGloeNIzy6C$=%F1x^zhRsS+w7!u^x*@-uIS{5dh zjh_(|G=z=3DwM?G+Un#{GZS#1qVV}PZrOVvsX;njVK(60? zupgseZEhUXsu(SV83Pqw4g?2rJxsH`72mw&&g!6Fw93)pS41S zN1RR&y%Sfhz=OqTN-rIt#KJ_i6jp1EJ9l0bxb}*(XW~?nBkJ6Ai1kBfdgpP*nmJ4f zo-*t^O-Glp?{ndZlIx9q{5oP=3i@8EIX3foxEtl4y_>+VZ6OoXQ6%iWk5Nj1k7}Ec z`IB~}oa_0?1m&_d&PJx{jV+ChnEfYr^JeSzV`t8s5hN|Qg%z83&PZ8)A?e zRGY(3M2;Vqgbk`84xyl)cyZ^aT8-li=}6P*_wZbXgv`<+dgg8$t@b(YkUG{F@QbYe zh(L}ShF!ae;~tU?uh7s=27i&WXVqYDLHuLaz<{ArdrOOYcvu*a3rrWEkU*ia6L$)w zkT@!B@{eFtxCXemvshmcBi`S*fc_N&6H^RAuFXhGW8QjlxY5GGk>kf}kr;Ll4ytv_ zblBK@|F~8o!~6msmgZ5-qkH^t>Q`0C*tI*SM_l)Ri>{99pzTn@8)UEf1qF5XPPiZP zcaCu-WlOc(ciuQ$R~<1VfBQkvDZ2Nm`-E*p#LlmwyGO=huN5?x<&argTe|r9iH#{Y zclJoxf))}eC4^nFNoQYQ6sqr9gLztBGL^z2h7assS zw8;0Od`+dfoIY~~V_o~6JRQc>GU&+T2j2D?geOc}G=Q>FDD3d)zDVg^{+ZPSC@LpBUOt z)>n?N$*`QB!vS5HVetzu9j`F+D&mJ<8yxG+wFBAS`+jOSTY8Onk@+nCr9UnGhj;tD zpa31+GkF=A7lKiqF8E2~SR_Ue`A8@jT* zr=#PQ9nG7I$ApV$0P=8rGk^?=laU+KJ~#7!;aL%pe@J#iJM#ENKssLUOeo8_+YowYEj7IH?{I4I=bw{gEne^a{&_9 z?54XXIv{JQHo@pZxvm}Dp357BDF%{8Yh=4 z1fEa#z<@(z=ie_rB(|D(y>fkCLzt(bDeuib9KnV~;G53OU?JE~8^BPdVTEeCccR1eU4h zIb~u+^8!A|9vtRFxk{Xx2>u{zkhvVstvuu2!qS`k3iTJ@p=(&HOeb&uaqZeQd3E(; z-o^6d+0rW5=jiC_YEI%XU4G1+;j_k{UN8DzKa~IZ^Hnb-=!0l3(JI_V73vNy?k6`C&+SFQY3Z6~q%q!Wmfgcu`^^DP+( z%-72AE5Gob{t>YtUTac^o0TBbEGaIQL9Vk`ka^`7GTsp1pHAsAcI;lkU;h5v@8(#t z|2%lN6Zea5$h<2gn<3fS>XPfSqVSW zSa_}uB7$)oKW_5+>R>w#;8es+PM*n^^^~fncCMH-N%8wKT3Z9jeSVarqah-sJoM~b z1o~ z?(g5<0K!tKs;VMlJSzLWr^iaIpng5BnWKzPHqWq!Qf9)^UH37xiC`Nd;s8&4p3& z@Q;NbMh!eT>>VAI8q04d!u41@FE5Y(q&kNgq9%6U$?eOZI%5}c6wy7a3GYf^KXdjh z8CWOBk!c;Vf$*-;@PwdOukIu&#~VJS#-*glw>ynL8==r#0>?7U7UKkh3Jjao$gw&S zbBmf*Er*ot`h#gRKD(?j%3*;-wmWxtAh3$$buzuE$)!wt6E&h*c`$^Qs&~Pshd(52 zN=>R7H$0lB0LO735apPZ5#-)p2?+@!S%X}uCe4aCTT=&T$m1CLa{(u+`NjB@i;H2}+hE$oa#Jp*iAc9=O+m^!Pl=d{nv&B; z?8=pX6hGByVc4LwK}IxC4pjD4_q&t&mA@k?=|k5D^A+k)4o#qNX-a`TTr~`-QV?Cc zyStC>+jjwX9HpkiM3opHCT9nou@Z&nTpe0)9JvAx`A-;{GOhG-^Op(+1W(~tt?RLI z^>~?5on|c232zwM__D;)h2Q@=wyFZwoeqB>d3X1$d4I!g^zj2N87m^PX4*~!{V9Az zTl2+@#lh#tbloqq<>cnH$6wY7x-K_bNQ%p=p!dx;mg#sLV$(Osk~P{{$Sy9>zBeLp zl)gGo>aDMxaH;Nm^4ab3in1|3^jtaS8{cf~YgUo?MA9;NisMw)(lQB4y^L!pA#e5? zHXSCDDcgVH;u>hDkaFXQHa^4Ny<`-joZQ?}tgNh0JUtUCM~I@5Re@#qZskQMy$a;s zm!5cdyksX5bf6Z#<;n2XK^|DE*0Q`0=!QNmRZNK(8HK*UbyV{RFHzF%xJL9Wg7VuGr$A>+*u_cKt(jq>~0opb`xW*yd<2ZFnPBc{J zkazUOY+y(mb&!sF-SKWPfemo@_{3*u^obYvZrKK4%XSA!!07h_+cdIqBlJ z*b-6te+E$F)}JcM%2BXiH(LDzF3J1r4j=7inqu>2H!e7VKzMycsBXXR z6``psz9-REO86QYMAVZ&K8|v6-H@Ge&dIhLO*7c>+i$<);6 zpHqP(s7{$xM?8aCkdL(Jgt`$oS^{&2V^U0Liwk^cm}GaXS-yqRq=uzF)}K6bU@BQ_A?D4+ct#|DOfVHk-!8P-G&}C1 zb5W77VDiGB?2YXAg4BcfW&?XCJjE8J1|zwB*=<>1RP5+$R zg*fZ+dnc~s@F@mr&8QlYvahCJ*^*K|P9a_t4tJD`Gyhg=*0nak`YG0wvfy_b&A-2k zO=(-(w4U-HGPnY11vbQtcL28GfEi`=iVWJW( zZO+!?PV>_i@N8-3Tgi9t&1(O7_Fcu-aL0tl9;j>fUUV9AvFW?xC^GQk;mNG-x;EQq zogGannjL^y<WXui&eE^!5$8V(Gb+O>BKt==>K zr1o58`qx=>=#C&*LmPlXZ2tF`*!l!gf@5=Rax(4g;{kP)X^dRz(s=le*M|YL!8OkU zmd~eCvQzTa=_*yfzy68>#+hGONJg}xGAwEmK{PQUoA!8sC0e*UR8OkhK$+lLNI7+%C<`Ykw(vslwj*(6Womb7q6*s*aJ%; z4sD5yrY@Bkn?d3O?HZ+WZe^v_S0s%KPfUyL>2|8Q6+69}v zD9fwt{2C%Fqhkuf*ts`vE=ASA!7;ul!z`Do=xYNUmI*gRr{a5WZzQJt=y#X+5?}?Y z1(jQ_W2m<{*&nS$Om&T=yf5AgMl7*O@$q6pv;-_ZNwjRqeu*omlKPpIU%GK){_6VW zzkIE}V7vh@k|D}G(`%Z4thJ@`*2=G+e|R(d*Z;5b#y1ThS1hUp4EDS08yZVe)1-;^ zTlCgg4dl|ZTzfd(7wXy7-ye;nn9usi$}2L_oke!4AJ*nPr7_%+DF>_&!TaljwVU8` zEBi*n!66I6;9px71@xPAc7Yr>JYA#a~?L_4i2dW{puj&#gRHjI7>(`KiG_?b9cGi5M zWHJXfEFQVT?;BmC?-l+zFVoD#{Ru} zYDfVT?FneG7LlCHqM55U-f%IupSNzdmy}ySkUsmm)!O;YT@T@mqq3xpcQ5*j$+3%z z)sIvM-q21(!GBPkQ!F=!GqZ4!V@L_}kKfg$<%sBPT zuT3rhxdH$^cAranMwCo2@g$I=Q4$Txcu-)LlPdeamxvls4VkaGDj^|D zT7MSV8PDn6v39foi9wfoGo75AoLDUZgRhrGL#rYdtHjt8kXaF5?9t=Lb%QObwdh_U zJ>o6;J0prexBWeV9Ox#ipydj-cA%&Svw@r*k3UgbIMWuUX5s72z{nU0sU-0uwgdf6 zi%+Lc?`9VVPw~l9+UFfoM2*yxZWglm)03qB;>^mGZfQ8Z8ZNk^L?;HtG?7{X-c*p4 z-7juwZS4cY8<={>qIIO!4E~my$ijqogj7hI^ggYZUaLd5ESdzeM<3zT0t!86%i1Op zY~pOl=TS&%8lGcUor!K7GaQY)amk@Z;wjOjK?92c12@obHH2?S7Ce;!Lfom2C(#xM z8WKZ=Y4hU_{!=84{+}yLnD^MyH@|quTO3w{USSxErd;YcH4+Do3!l8Q(sc{PJOJCb z+v$|iZf!3o_9UtGhk}hNj@9x+# zlx*GK29rTLx^IsWQnXkE8`~%jT{oYaW~QcVbY^7m$5p%=_^hmME%~|XFEH0QczNZ~ zf#E-!lHj-e9qSpjEnc)bUCY|HoU{HG;&;hxcFLaZVm#0a?y9)ro-YtTQNx%E2@6^9VfzVf^);5y?K5bwF*(Wo0)kQYQ z?+=X*>v`;(!Squ~<4433Nl_sIr<{>Nn_h;XQdYRf`mkRS6XQH{Mg>5Spv(NU!ZX75 z7K)X6g`gcv*WfkR@`h&Ev*##@L=yKZzVj9Q7bJk8z&DHnkvpNJiJ@or_or^Xml5Yt&yL98PCgrm?14$L)nu2&3tx7hemaSYst%pJD7C`6@pW zuRt29jgykpbh>#vl`1Od_SGtB=eOpsBNa+9_Re7gog#MyWs*gu6lh#R924ZSIl0!^ zdj7l;_^1wglIBBSy?OF1U$^-~{yKm2y9AvJ7m$=Q&D;z$nTqqEyznnl(LX4!@)3Qq?g>wz0TLgit! zB5z!B9C*RUE$$_&U7>H0;K#rl<~k;q?3*aV7uh^a9;_2-2wZsy5O zhSYp*)>4=fw=?)0`NE6c%D{h-T2kM%K-w#>%i$fG96Dle^>nt(>9yi`eb@B@`_`$# zNCo|4FY#`;{vJ*_*UDN0jxM%{)4y5>dm{0)b2LfK4)LJmA~Kl z-3aH5z*Oy{9Ao<7RxS7aq=X1b7jfCG0SjRt&$=2=gV}6&_*8V=RZfnQ;=ZK~*UUZI zu+fMyOme+#bXZLoE6an_f$piUs+4p0Ye#*L1_*cEdNx;kQ~ay9dv8f?BZ-0eZU#0)$J$IzjCxcSP z9%YVMPQCLOubO3W(9Yj)pY7Op+a+Gp#XLz@_;jY?p!eOySk{0kFCMF81!?uLxw)3` zaH+hALpRD(RYL-tGHo8}E)E~)-qkZ0{YINlZ{C}qx}fR*_ML6e#6rbcU%9^$(u7@_ zewOQ{E;<<{3)kk8Xw4Nzd+t2I`r{pFWvu<&sIv93SYL&lRxEW+u9jGI1IS}8p-EH? z9IqC0Y!+}3k5Q?iM`O4 z1qC7n9AB{X7q-*H*!sbCgM=@gOEV|RPs>FGd9l2cQx4Iv!qhPkf{*nEC2HZ`tW`&<{QZ%us}XVnLHn_=FS^M@UdhaC zY;p>~Gqt~33=j;tHfwp6%lFJK`e_2`c6xSTFHK{IjbGGyhXd-M zc({&^p!vXG9^C4oT~ZV!7mF8dYPnyU(reTYCFdKTQuNP^RjjBR@A)m&cqEQGm!?*l z*On#|I=cJ!^_ONm$#TNMaUOB2ea2LLyhP?SkDtrsb?m-vGzSt@Tz#HqRGiuIDmSS* z4r)T6wcgb!)r5+PD^?6`XRa5AxFl4&Q>sU_T~m)3D#X=C9@;UIB_%E)F!25C`j{{3 z7VGauXI>WxxhCBbLW}pQczY*JIpwyad|YROfwSQg(lD9r2)?(yz<8yIUS4Ea_};i; zt>~Vq$Si7tiBm0RxjqJKH=Lk6`0E8%f!&|tU^6q1+D9IxRVs#v7|jv11sBRXfj2^g<6voI1du6hP)jialu3O|X(qb1XwUJvITyoGpJ;Wd zCTv<-e&=ZohJ9V}6UdaM7ygzH8lVfkZ zp#MvCvP-H;h{OOBNpe)mX3$K^Nx`4b=w(iBE~?UFAd^WFvwbZsEn}%D2dbg*fY$X0 zaBrgF+>{nKKY9nn70eAm4CFd^_^@Av$elmiK#0`ppbc;Wn~=G6>sI&A?2IU6xv5X$ z!on^S1QSSv4vNF`IN-#s2SjTXs)&+^VWL1Hh(1JnTlrQj91xL!CDaWr*B{b*ma^g7 zXt84gg8%9@YoM#UB{$VB{umV^z?(rp(e);)?-qqb>JMvgN?iytBmURDqvjgLZ8kT@ zER9>dm$9jC1TBW{LmJP?evXg2%$)5Fy4ZEPdkt!`xjD$cD|t?^g+215a2ysZRHc%f zzB8MgNs-#x;*x%@wMB>5{|*OdTqReOO@3I-!Q+9nF=LmqT;G?@Rg3GRKTd1WSG6dR z6f|YtU(c5F=o#R0A(d0MttDlL@{jY&W7lGhcB|`z+}_H|6pRY(Pj#$XqD* zOWQ+evupPv?*pi>sIOKXl%FaoxSZe1FEu2dA!Vz>9S1EZ0`Pua!=eLaO~xmo^uGQb zY)21piHndXA1*F@a2TGY&@+vZJXM?3o326RVA!HI6A% z8P%aZ-I2Ov70!f>)YKpfhvo>Y>hxz$)hMN%i0cn^MWQdHPu5VGrRxMwCobVWm`y&RtEri0b8dZ)^j&Wb4vcJ}rkN2)XN z%H43d^`fl3Yy-t>+9=SRvOVSrg@4dWT%#jlDOtp2m*g6*T+jkqCr<=lAS})pC&wh> zGS5A}{uiGLii`831AcKo(@n_eg({v+fB5iH_o!!x*36uDa6F0u2@a`^G`ow_+YcXJ z9A#WOz%s_-M@ju_>F9j_jf@AkP>}JWGBT7=_{q*BRuVP(p;KUdh5}Ty!90k4pYo2$ zRFK|GQ%1emXY9xi&zgYJAxU15Hv+sH_hgap5{5zE}5NxRXvxbL$eZVOGXQd9lLcbYlE)XbscSpzm(5fgnh^nFSU=a13yE%V{K@;`+IwRTaD_7N8 z7r!|3n|x&Lj|?@M>~AR#6-+#;800!BAGt3z`|zX(mMdXMcb9)-X7WqwmYOopt$rxC3Kds5{Jig0 zmlf&ytTv(julTa{Q$^YSuCujci>);Ni1zbBw0es`Ia zO;UR!UVU`B@<(e#(TC7sm&Y+JlO1N(!kls{Gntmw=9PYG0(~#v2Qp9#9;kZCXw37) zRdh#Tk5!^Gyk@2a%{k$M`OK4t!9|ux3XGgP21p4K`@7^Q6k)i_%N3TWX1h zFVP|Z_pap2ml}+SAp;d_6%+;B!-p9^gC}PQ#v+gtE^*a&H}A9F{kAIAtX&fXGW6x7 z;j-FVVZPiusJGd$oD;K)|6fRJ)ukT!#{8TBIZ;mn=zkd+B=(8P-F@Jo$E^G-xv2SZ z6-!$?U%h)$K_g>vH(XRbzJ%+H8LLsN3c~Xb>4X$N7c(-;yjwRq+qG-gk%OFVxK2r4h+ETr z_mRiW3a%nKhw>|}UG=K*NF!?eUi$W1J3Mg`3MkF?k8JG>DL&0z5iw#j;CMXBz?nVg=Bc_} z)c2TLnvB@y7nt>ES$(Z7u2nhypjrLJj8JP!cYlv&?;?_N?zt^>0|n+*77FIgwtjpr zk~u9x$bV#Xa=k-CAE>Ucv!Azhi5qO0QkAZrRrhGh5#fuY`94kH2OingtuVvVDm%lH z3o@JyD@7wai$VioFW{v1x)o-Zc_V=))4djb=-4LH+tsH=(o)sYFmIk@axT1=_1H#I zf2hKhtc|7zld#j>ziB8;tB90^E~=`f{hUH?-YGVxloG-9uCqcB+531WdzXXld+MVl zR4V?+;j^QDd2Rpp5BYE25Wf>8CAOHUc2Yt>;GY&i5IQ*7+&YDV*DI{APalaQ`Q1Bda2yikn0-~iivjB>6m7cN=D+ZL zi3bzgGGGSoZAp8(mKPe67*cr-C9<-emuX9e40@T>NceP4>}KMraiG4}imY!q)Z4O_ zZ`_Ql@KBn_Mt?~i4!(dpM^7~inSr~xI9(_Ll94!+U)6&Nlz5^_#_djJ4Mk3hmat)4 zPA1Y`lL4T_38G}EEO?XbMOJi9pB z-bT+)50(ZL?&6qqkE_kl4vSkaZFF_eexj8tlusi%{=ZSrYN4&%`0rNkZh=GDzw{8VftMujxUTN$4Lk};_ zI4B=`Dd@QkWFa20BSf5&O&s$Z$N-yYq&Gc%8eE0N8-q>-rWPs#;Dg{RAI24c@K2#{w4>uKWxku6D5v45 z&4;Q_Xl^K(-^7DWBp&2x2lqY8AN)Nv|2MmvA>k5Jn)EE#HUfW4H!KWP7Eij=9ayhz zT^g4=%v6}4YLJ_64SG1zrpsV5*-}sKVyrb{K0-UjrJeyg(C=qXH;|+yZr^@Ohz8i$ z60i%k?*F=U1E0AiW)zJGTdbcvnxZBZm?Y=6VSfc3UHxOc5wVQEiS`Z-HdpTZCW6q! zJva_cWF?wWR~J`KTY(cxA@kYaot?*9R*tIkYm#Wj7nZ*U@!$7+ZvWZLS!(vL{QK({ z-fm+%Q$P_%IV`pM^Y+AY(RW$xb1 zKueex>||Lhf92shcvBFGMQ4>Hb0jw>t299_ot z|L$z78Z2P()smIC_iTPlW?s43bgzY9;EKO~af;~~aU?;VYizq#%L@%!NFd4Z45$B% z53VLD@Z zC)@-=D57@+jiE+2%Aw_l(&`O22?-F^DwL1W2>TEbOM@E0H zS2nIZD@y7HJ@MJ1NdK$(sX}STT>(#kOHZ$t%z9wuNv{`|zV9}Ie&2>kB}~nTF5MiL zS5^>RhAPqR{pZ27VQHzY{QUpZD*VsY_4_}K!~Y-`lPeE_PIuOBqz1Rc7uSqV=IuU= z{8U0;PrvU3UAA+I%cH!8F^x;EGT~PK1`dYB=UQo#Rt}^xKYErMYhgW8i{2||T;Shi z<7ad5bth7*oTo)4$iZ$9`Uryinu~EDw)I-#;00B}!-dzl;e<212p9&S{h)$cFpxl6 zFv4slragP2aYpomo0-S-eQA@KnlE>z{wFQvc3gX+wyW^m&?z*j2BB_uKt~-XcGJCW zT5|v~i0OKK*1P?>!SGYfxtj>|R@<(fO^>Ewmg!MnWiyv>{tmqeEi1RV#;9PLlgC!u z{CKQXV)SKEIq@=jP7A({kgRW#41rG_OsDU*)(@6*3i=6QV8sz}5F-@<0z-~ygMuoM zACs3NgI#6~u*$LvL9fh)?uF}YIl3V+fJQ2Wre0x_3TF={gb{I8i=v5&H;ZttlS{|^ zID(A}i9i*!bTMdVg1gq7V&CJhSA(-0iPf;-~r0XgGf zPjx{UV*fu)E2BQ>O9(Wh8!4i{yT52p~`gLu0_?_b0Y4TA5fhF>Wyv4GLRu7IK zg#|Ng$Rpv4d_=6b0S|+sx}IZC0%k(X7*4JYSO4-nU-sur$rs%{5|Rp&+(-F{QY~rB z%IHpwUn~iqje~w|D$DU1fy0#i;h2t_2A0t za1~Bv{3atAom<2)wmWc$mF5M-eRyPKeJ1V0!}WY%O1HvOXCpm*YIa@(El)y$HdT+0 zgcB5rw{M<^pEZDH3|34#ItPt*0@@F7^w62bGRD=UHtK~mrl1^?A*hSbmTud<*^Zh@ zoSyeMKee~F&!&Q?0lO7l@4u`qEuoKiLf? z8w#ln_tvwElav#$vc=9_TP$<7*~hJR$#JUwZBaYfUggTmUVqrA=b^Vzs<2yc#lRbT?xUv! z)Yj%-$)Z$fsLpm78FqB3lc~|g0G#xDXvucH7BY%+Z0|)rabG z=I>fEuUmInW;REQTl0g}%U;LwbEndq?aI%HxVU#D$$r_wGXx zjFR<}?+$tbgrgCfB~{SfWANp#w-c44+n|7Zr(L+^7Zd`uqoH7F{=i;)o0Y``zqSU1 z)*mwiuoS{_zy0x#4RrS)#<&1;mWBfRuA&!LB_*%8xCnzS^d>a)VZEj!J)y@??H+K- z6PTEB>@1+)%qGl?stYQv^Y->Oe2Ep=sK|Nu7U17Y2EJc_ulh)e>+#RgRasjMr3vU^#@ZEN{d71 zxm;sKA9GTI9^HSmZM71`Em&^hQEgTd)1EuB{uf)tl*WP=Cx5>Y+qBgv!DDx3_wmi@ z^MV|#=j3UN&!%V7uTBir(36Z?Jjr|CzR2lv+@~_2JNirUOJ!sK)ZMMdXD=ol+~svV z@uKB#bgLaE^M^+}&AYR6i)QSYxx^RWPr1$Q9FaNTeaq^jW^Bkwwu3K~5C$9K~?<5LqEMHa3_i)>mKZM1kW9sU-%Nujk|AQ${i^eaQD3 zL)y#u=xCYKe!_x+OxxXl(bd&$NY>lRzTNG4i$Y{hV#QRJW6bRQ0?>@AWIZX!E7&p| zs2q?>J>uTM7h#$+IXOu@0o}M;Fr-B9g4bmAs-dRTP4ITqw>-OSr}MTj_pegIzVE6D zIvFsuA}m8b#@wX)&FwNqyqz~NIBIQe9bhxrIZo_W{K91zT_JuQFmz^a6mL!FV6dUuIN&5LaA<}&#ABz)^ABOf_R`t^YG znma975%Om0w0(V3G`F*koARgB8s2~Z_@qK$z32O$KXU)P+ZlH#|4qpsey;m?&TRSM zOnW>QbM5ki%P(S-hShXOj^ue}SRJ-7oY`8Gsg}J!eW$sQ*i|o*9@T2WyKBdK&wH-g zdbl)cTh?VzFPcPI`^@bC@U^Q>h9ex7x2?n5dy% z)y!fnmYDjOlT5$JI(72PtALkb$=0bRmL0#+JvnIiM$a(HXWr!KctMWaT=>CXH5XTn zh#V`}yJqz}mx9&$LP_)vmvm!kt6gb+52~EkeQ0?9n{RDxRJ@Ffq+q&IP)$0|jFs`r zL%Kzdl|NL%CAZCvojBIKz~JaA4|#MtBx|S%tTQ_1QZt$4%rFm7;5J{MU_r;;%BC z%(#8~YD~2H^YZ>j!!HJTttU;iV`F1?x>S^w8rf<|{(Tz(Dt%g*o7%?A{1gryouF>L zhvcGGaMy>TsL4!DE-q13Ri@4K^oj7;+P!zLfc-J<0WRic+tP{oTdV(qBJ3H+n#5Uf z|NbgMX)diOFaO7rCmZ1=@oPOCQtqL`7a2UE_B1*96f~#s^qj#-~IYL zpQlH+d%WM*bzbLr9LIT_#xyS#yD$GaH(KB%vMtaiH~* zQ`2yHmEqOL+&O6u?778N6h=M0Ow_a1AFf0ioHCL;yVK|N{9ccT0w>=_#a^_O+w8aB zkG1Z_E}nk!bEfPsgFQ_&&NamrQ@jcTH>GSqR*ys@Y>Md}R-o_?Zod<&f&< z4tg&Px_f!izKf1_&T>bCX0t{b9}=RPtK-v15grkCU~u{&Qm5m`39B%|-|(=t19l2- z@UC6ERs%#KIbyq0s{Zxx)u9ntP26dkC#9Xl@r2rv7up+HIk~V`uOz1EhTgn+^R23? z0LF0Qq$Eyf{zHd0FF4J+?-LN%j6OFB_!Fbt+^&-H;lq7Q#zjNi!JuP`Dk|@dsyhLJ zCg?W^y^f4r{Z^D`WcYz4WQ9h()m+XeDXyenAA=Dlk=BF_5}6cC;kXXPVn!^+Cs)pf z8;oorVM(d0sX4*`hSRSgV{-K&`y*>f@e@7kNaxOO-_=mxAy?wECP&82v?0niPhP9=1PDN}iu2{V=?-_KHn&{~yjR3^VeA;-Bh|GF3LQ z2DVR9*$s+VZ;-T^60)Mc?E7x2Dj;-kSo!GBZ*$`PQ*3@up8V*OdkY}wBog4)()$Vi zW)1nmH1*M=M}OhslN|AP+r`8(PyJe3Icsb96}>c|*2rFBn3ATZW>VJsT^rqa!9Np; zB{Ggwt5FWZWe$_A5Diw7Q)fe}`J(avOqDCioWzNK3j-tDP88>R<(%o6s&>YAFX*RNmS zm}$ofIbH1IK3G`iqH*aP+@ThmOtlPO^Sz> z+JjA$1<>Kit5@mKB}C80w%~C(mhIxji+Jw#0|%&hGG1G{3}i%G2`6!!R@kB4rr|DT z>cUw$Y?(*rA1-awo3Ewfd0DdN<&Ew`7cxvWW-}HK(3aOdTFsyGa(2r8T&LY4^MhA` z&cf+@WHy&$oLt_Jlb`a?O_*Jr!l;ajuse4XdmN8VQt{R1QS7qayy?1FaMOlghd0L> z)^HePt_yZ%Cd)zoS&*9`^L;Y>tWlyK-I14-*R};WG~{U)Mk7-)N>0e52;WPkHR!?Q zy(9GN$sf)w>ec1z>~%u2X5Vhe3bPy(R}Hi9ALzcRAAKcxJb2yb=`xMl3v8>_?~-ZE zRCTnFy0D=%RwpMZ#j({oeSuO2BJ58i`1Pw-98Z)t2GJT%VRfW(U_+Ca5kdgTjjUxiCug~DHalqdwVMf!o#Lm1DDrZ?VNbo z6dUw-&gh8xps0_2$kx6->NFKvOhzIsAtma-LOAV^blJXtKP3{>q0v#@sK;ai3)9Wd z;aoyb7>IrY>kIn>3UsCvKBV7OnYjr13{Int=#>gjOx$dkIn4n+tE$5o4$_3=oXzovF88S$DA?v(>{asR(~W8G}=rFM6q8Noo95D;_z)l!S?O@ zclvTZS$VBfR_+DE2SqK1Na%is#l%~S_0mzYkHG#(9%NkKbxWuK>OVC-y~g*+n3Q;n z0|sy|5G%tB=D<@NA7NwVc6V5Na%#&zD>h^cy%vaDcSxJYLtoBN|6t79LMPU>ygG*D z*V~`jWj-op`!zAYN`WO%P?xhQeb1>> z#(N2wu&orMWBK=Ofk%o9phtL#oCpu7XhDGs9~LUeQX??B=1}7&v-OCsF27~!sfbd+ z1zmpaxZudz-dk{LyPaHxRGX-E(d!wXBscfOS%q85++4`R)3di}0RZDR1PsEvv$x54 z2PfxxB=9h}KVB1CcFa2v)xz5&2RKUt8#(4}sZ>O5_N&b9;@tF=e*?WJ`=KA@CTn@= z>A$N6xVW4Szs->>h@*rTdI%52mlGaeQ0yE|caV}Qx|L$b2rma-C_#hJwXe+(65>WS zc7ZyiKBGbLaLC=NA3t4ArO;mPTqq=W;T7W5Gq*|bDc%&CQ-6h>{F8O}t=YSw>@t$O zp0m7L_~82N4ad=x6hrexuV?*%O%qn%YUe~K%|b#8e=lkmTFi`TzX$PF2fYRF;lr+J zR`aSuyu7@YJ<-an2#^mG68Jnkpkl7p!D?ar z@tNIBFs8`p3RC9Vq=& zXP%E5Rp9XZE(vA!7IzIGCo$V{O)nl~)I`oxW{`z`YVuN5yK%YCGdeVoCT#NB8 zv?J~;vcMyDu?+@6Bu6$;>+G7EEiYcY5H{=C3~fTM{?BkJy1RX~SEp>y{Y?xbhkNG( zUBnI|wtZGe#d<+-ZWonmoLHb#-;_Jo_F-cZQj`sHhVcE>Lr; zW>Rxf=Ly!=_*GIh1*#;yv&QIdS){0J!VcfdMZBuj7*8 z6SGfgJmdDq8QLKc+Q824jmZSLWo0zS`)wxC)P!oL2nezfOi5vhx@zau{%6HpcfGxqGZ0webTRa3U345H{&p2L(XE?je~G%+16|=~ZZB1}&_R zq{9H~M$hlzQoC^0LBY9(^!m*kiGz%J2!W&_s46HabI}#30Vv(b#6;rSwP)Wf)YW$& znO_ZWCj`2kTwFAeW7BSlZZAwO6<=a%!&1sOg7b26ms-ofnw!ane8~2@p z)Y=v5PzcZ+6N(c~7n%1|?k5U37|gmFArYVf+wTHIYarCucbCU{5@^W>YmCmGr365D zRY74L)W8Hr2xo!Qva(N=MmOPLfwWi?q6Q)b!E$~4rm!#j3eDbj>_mEhEQElt& zbOz?N3r7q@8F%j5_GD(h>*(szM5hZb@o5;SlD6;IL530{r@(!Sx}Kf~VMRw=By^0E zNDzP&qKg=I>PMV2ffqi2Vx>cmbx*Y7ZbveNHu|@*v4sE|`Je)X-awMuza1LC0;F8u0SaF8 znk$yQc5SD)IJ>#Id9KcvY9Kp&@Jj%3poXT=+$2!8q-?auIDf)xG4W}vfsy{(%F9@w zz(DD@Y;lEBR9)~+Ff7jU3JQpQYGh$i1aDs_`14YBq6K2}%yk$BLu5t2apOpSJFA$j zFkv)E+(z7ClH=>wyRi&bCnqOMWPsnQKE_R&>ibx61@{&n76cwZg5rqu_VChp0KEM} zwBj=z3ZO>3%Yz3L;r1VPNptk)V`O+>MA#uYS$!ov7iJu#RA#cCmz!!*_yAC0;z`bV zm9OlJ#%>10>onih)s+X7z>-|HNph#p@q>(bZC4PMz?cCt1h)lT#qt$b&Zr8EJB`1pooV6H|n7;1YnybfWX?H^N6}0~;GH z6u6dFRyhDm;1@>Hk@x?Un%W9hZ)1<^g9mD$A1ihzg8&4?N@NZ-iHV77Iy&{Q?J?AX z4@&B(+1Xu3k1A=2BCQ65ybb8Byy9)x?^+JE-usbUP^NBZ2mp)@?Wt5Xa1x{6y!o{` zJIoo2pi0=h(E%l~PLki5-Cs&dazS~Lkyuz*n8a*TB}BZXZ`-q(jrYg}!3|Qq>OF!b z#vdv5E0;ixm4Ktjx)k*n34k)@ldJ*zCx*5D;K7}3$a4t)r{|GJ;d_OU4v!sYBQD@< zIO!2ONRD`SO?<-c`Xo*`6cULT+Bx@)jRKfK0MUfSUwl$h0iqd+nCKG!@natRJdLJD zv})_>a>~nTTO1ZMz6l{GK!*jq=J)F9>+^wwL*=-Cj2*9r`4Vg3wBQ&R$a)a{jZcP% z0~OhdY~DNew{1v@T5MOJaN9wukGmU#5h_K`g15q>Q#-nb77H@6fcF&D#3GC zhfi!jz7|?@b|5jqCBm*RM1ban`2!O(vpQ&00(QiK^Z5C5z!ua&Ft~`x#P;(-pGl>1 zGVlp}c){8m2N~P3{7zrFvH^OvN0{2M7HeGh2UREB=8%MtLk4pSH5Gb`9ub`>&7W`$ zg{?+*6IA4fthaCX!ImdGH+K~Z6rG~5b9f!xR3c*F+hf86GeYjQx27;%KNBI{29d7~ zq1jBuEJ_@aZ9P5p?`>K>TWun3zQ)>6Ur$xT9$7voasc0n#Nv6mYD%dDbr(?sMda%S zqcEavZf=HU|7rBVNKXUPgTjiIhlj_x7y6F-etrxvigj#lRU;AOW%$hd6fg{6g^qw5 z8Of2zUXZF~jb8a-ZFRuLV@ApKMQ-|*9OKE8yjyu2Pw{luoq6d6Gd&lw?VzhBx)_tVRGbBhzKPxP|!zGVVAI*Bt|}b{;bj#iu(dIuPHgVzP>(J zP+wm^qThuzkX;TL#7ivR>({T7vQg9;VL5<1+W$WPnqcNL4ZyQ#cliZ3?Y9>0Hzg${ zf#(6G;oNhYY+VC~_|eN|*w>D*#Y8n8{`vD(C>m&>2lAgckd*mrJuxKPl<}!kHD}2!Ws0cI~unaQgR0N z_xJnFd`L*3*LXj;o)Nb}C+;9(=hDOa>i-1g%I{lGLsNi*e;7uuIVB}jIIGYIahmf~ z{UNrCYQ5dv`}UBKhIn8FwZl(V`-hH9no4Z^hmS|7si}D(t;NJ3I_#Qtl$1I6>JL@z zr%%O9z@!fGe#5n$tSl@VID4N9>A$$r1>G%bwroV8cnqDt@SOL=8KqU(u;?Me%=3VN zFaTK+PvQ;yy4q#0$6ZDO1_xl`bVrocz*RZd4+8JTU|k9dQUH!+H{4V(o?~h#Y{*7Y zF+f@(En)gkXV_SbCAoRcToa!k*ecVy z`g-=V!^nZ(yy1X^kw`otiNl1a=wjpK2mUv#DT%Lz)QKS$dWv?Oe_%NC0UO-Qf`iL* zs10*JY8x7QqXG&0^hrsZ0cX*_;bwKS#Ngm<*r0@Awn>4Fq_RQHcd)XDPv~lEQ~I2F z0GUV||2}bX*omn~osVu?oP81)xE?cnIP~K#tz7cT>bF_%oe(B(FcF|3Qd31mX2227 zAgds}eu+1JYr42#x1-G;Q)Y0sOifKCUi)&n2~rfo%HI~{UTvD`Uf8?t*e%0~S!pf^ zBGwL-XBSFt9>Q!O31Q7?d`DocDfOHjn>KEI86EA8z^Q>RuaIueMTnumiiJU>7L|!B z1%qer&QkyAB$|Yf5H^^9VLa2?m*%~{kn*&By<-O9k2-9`g&h_|fb>2s=}kTpI}6h; z#u6xYov?&kp{kRhhNXv2Q%z;%W&(D>WH=hECdO`K}5p^;5i2pN5{qpW&qQ1RSmOn+iMfG zHzB#>gY`_?C!9+FlETrG01j{EbmQyi0V`Uqq7N%(tl8Vt(H`+|_+S@-Ln} zb0N|Y)VfnR?+K_=ef*}fFMbTO$~NS8_rt<=%uMvVr=xqqY&^MzkHbv2Z(Ghn{u*V4xRWM z3H2u^OQhAm;PfaRtsjhbNg}rZLK8bF@&^+(Rv|FudPhb^UV-0`07m|?LJ0~SeD&9_ zUpK&MW+EdCI_e;C2R24VMufuSL|KAd$8S9Fs5p)%r;H3y^byceP*8jgIwGTezQd)k z<_X~v>0DK}p7EfglhbO@%{x_;Ki%C=^w)>bEd!(L+!l)9e$$E+vb!pJ%TcqQ*H@A; zokQi*T}m93m;mUEmqtqCJOUl=L4|k{++3W23A~SqA)Hdaxq5rkNk~Yv_VhqVx86BT zHN{{X^c@7mCw%m1E-+eT7Z*bUktsQMjJG2UuwFj}W^m~-_G(i^=w+XDE2|W9f^Tx5 zWY)g@J<9!8a-u)?>)Z^fpVyjudj6!G5y9fy$HU{AmS*MayVH-MlZ&xP^%Rg^L}UUt zMiQ22E`09hA!f8{kl_f$@tSmw8fM1%sFKvRwd*>LfWZ1vUhd-K^IfK){BwN#2JlHv zzP>-#2b{rG&WEV+F_fy+(?#BxS3(9SqPEsnlBlR?gvmcZVIK4r#q%wx#;Xep3(ItD z`fB+exVrA*;)-b=jsE!I1Fiyuebjfck&EY-ip8W;ehi+2kH%8RM)wd%wTH zTXFp4nC+J{$4dE$Txg-h!^1=E=FQ4_^*Jpd9ohN$uaokwoW&~5ZXm+}l zRQ2tfWsh4%m!MS~H>#Acus0X3NR7I1@nY}wrt4DY&lC8bfw?Wx`Z5EX6@aU`gXc=% zT*$wGsgsqzmk#ukCuwHTt0wmhUDq@a@7uY2PyAfX*77lPadt4P<<-0>5A5w4)TYt1 z_DCMp8}lj2mHSp{y^L+WA{V^N82_&78PAhyHdWo06^sfX664aT_3-Tpo?nV77r)oH z>-Nw7t)8zoWX6HT zeF+Nhzd0bk#(pJ=EiCCB`}Prfs{ZM-moH1%+KOP^4Atw#<)BvSpS~)E!~<0*vRVq* z#8ATozV_9puwlW@ak+@Fnz)1oeLcMw0t+t94m=+Un?qNwccuAq=sb-@yB^D$_t2qS z#6VbyJ3@~ofnY{50`^CoYQ$HBN3pBMgkL7J;{+U7(TP%#%D`< z0(kQE-2~+OK_&tO9|Hc=^z;dN`7JOGApkJAl|23yEbbsi?DUR~2O!8=(c@72*@_>{ zBi`u9LP1WBEJhLwfJB1r8b7kSx7Ge+aFch2A6XkPkqxoHA=QDUAgRlpH#ISNU`*!d z>gxKK#0^MMO>H}ZV%y-LH>yZ=gc~B!(zSFdW-&k6j- zKu17}t~hYeC>W1cH$oo+(^^k20#w;SlVR^U9)~xbfW~7{^dk8DA{<_ikd%t~EwEZU zY+OcjvB%Z`a=*$tKG9$(xqBm-<71;^m_QK!Lr_eN4YpHE80@73F&Qo<4%L@{P-2)T zWUyA9fd=FusZ$VDh(uJtId~`H%fi?)Msq=Hr~R12Eung;?=k6jAkDbAt-ztmSFYx` zk|K?j=lfz~SYWiOF3xNLE)wssXn*j=XC|VnOJY^(K%JYnx0IUNOQY)t{4^HkriopF zGyZ`tz7EL|MjcdUD5?k$1;bgd>~}%q9x9$)7*GV%0b?^|_Muef2SEUwiW)227Q7)U zSf49mr~<-TP!{eG5!nic@nVR35e+nf@Ofm!;v@$(MLI4my^+}6V8FwgvA27-Z6VoC z)ZGT+bQ*)Wp?)B%jQvGX85jy984^+YLIh8Z6bZnJI{L>H(o7<*L?%pYe}P7wxTO$v zlfAv(W{aKaeDqym{HUVi*!TLE`uO^;1`&z` ze&5ro%F35mnK=uiqoeAWnJ2RA-M|i?Guwe%4}|s1pIzCS`gB<{7a^iKUA4ynUUolz z+*jeIxDsfaZa;q%Kn)oQXnS{c3@Z**9wPcwQ>ERIj@nEtYM9$a$ubI>iHiMX;h7UL@GBFKuQ%pMrm(+PMcX!2 zGQhR@MkWm@b!+hbip0WQR>e0+*_U&ohXSfCPkJeZndLLb7FR6;5OP5~)U8`3aFoL5zu;#UGmY=AZirWP}RK7@d80q*YwLN59}Kiu^J zG#uA%`1LzJB?bG5g2+I0(L)1TSUTMsp#rT+%v5fo=V29Fe{T22n_60w6`n3GE9K(HJf9A^;yZ`@$JaN&XxV&yfj zR4eQ8p0&s_w_@N=YBIVJh`BqpJp$XelK^=Y;;eh@>+6IfdH-&OkbHb?46cwFyxi0y zX$D}M48GYzBO}rWMxVF3LK>=eM{AX$D5s^CH1i_^#@ruGY@(6Wg?aZiUhAD9krb?i zw;Pr17N#3tLmGS0<4tHuMdulkGy{=im=8940&r48afhjfRCuRI*@~hf3JCV8YEWR@ zu%Fff6@yXatI&+QCDY?&?7wg}#7o3R*=nSw1pt;=MS2CmWgz2?kB`42z4aY_lajKA z&n#J|)Gs82mgHF& zsbhON&-b(ayTZ7h`N;Av!cqtXwg-Y90XO5la2Gc86crSx!xf02TOf;jgwKdBMRpQ# zC*4lohu6bPu+oV6_}_D4sIWaMt)cPpQxN>ds%CjX45f)yKPuzu%1UC^uR@l+C|a0_ z{BkH_{mX^Pu5hq>&Oqwl@_Vjq_OY$)jNkBQeGH9<%t;U`jjb_@Jx-@c8QW>otI+K-1pwv7y^4 zyaDTW{F!KoqxPAm<4j5_{o5h0{9dn|e`SN5{}(d;hCYQJI5T|Pg{`^b=C z73u3y#jweSd;7}l9Q#rQ?lL#8=MGnu24_IbT11Ky59@UtJ$3nnqdSd=xNp zuH9k1T1No=&SF9ajStb%NdxOK@N_u*(cz<~ps zuQ^x89FLog^tn1(ZXVkpH1)mfG`6Ppv9`kBQhuPqyZ`(l5&=@YTFxqhe!6z;8Wm@P zoM&8w$e-St+9_7+MZvLY>3@W*=PV~&p$C9H)Vh95IlTuc2;9nQ66$u&pvA`L%PLmN z1}pf4gUBE%?dRyxkZ2;nHD%F;h6c{@h@N@Hy=Zan_v`w5!{;=Xlyx+{z}<-$1eqMc zL&0Ja*!A&qtX^NY|mc%E}$zausImmum}PJ9>8?q4EWTd-uUCh%i^KIZ$f@Z|yRTt}~Y}^|MGJFzr`EJ;F0qHOl zZ$$S2s((oIm~V9WsX_%iG&YtG7ms*M+;Fb41O7tb9RT#5cb5==`I8;kThQ|ykS~fd z!DxmZeAB5o8lzOU%?Se)Y)rSHtE#FRkCm{M-sxG-Cwlftn!(!e1NdAL_Z1kBtwOOx zjFQ!9`uP4mHLS~!{4$nDnOj&$fhCn#Rr}@3IxyV?oe3wIc(|l}hRvsh6(;!hh^Exy zR*@bsj2!PjF#V~w=UZjvNoW9Zfcn6++o+`wcQ6-Y?h${BaPg}Q5bTcTkL+wuOc@o>tvqe@CNi!H~@{wY7kezh|bS6%-KP&Oi4znKfl$9VYH zsU7tDacqN+hu`lB$a!!!0}VWhG+LRzZ!aSY%OmWT9YR8k82q&%)*TvHhG%G;lCAxe z?1hg6l=<<~rGcq=Qdec-E5I z27d+F=u8^k)a6bT{V2w!n9x;e5FHC2#0a?uIC()eA4iGhSnu}^sNgl5Q#U0fR$=mQ z0`L-ONf_yYV>_nCS@=@=P6otDZoZ#gEMw!suY#{A7Q&+PyocG*voWD9c0{vJwYU5 zxOQa6ExD?;wTzyunv%;{?0Gw5oA4h+re3(}k|J|pMUmMi()r>A3&DI%kT_As z`Wv>O8Jr0yAz>3;c>@u<#cpyNxC?M8ktTA3KVL;fxgm`0pjV9iWxwBkeTzp+?&0tb z^Zw~9GlwlVO0K3mcpskorloMXBd9$0UVyUEt?Mf8g>)p8j#R}7N3#}YIB|Zpc6YBs z@&ra#69l%-IP^e6IwjIof2Qc0kPqu<|8t?52sn|O+Z)5?jATAsMZz8in z0Wl0uhN!CKOGsmo|2To`t4SA~01QcnSlCts1J->=_E>JA4;sKXp=t!`_t0Htah#Yl zN~{8cOU%h31JFRQ!LrqcJ#!dHN{cv8$Y-{0-Ac3tMR0kuvb2o8$sCThUaW%PcNGcA zNCE06HHau7u&-@sK%djqeRThkP^PA)W+U>UsgcSKaP{`xyVpR!g|zC*LyVv^LsR^5 za7Jyo5cqvST8FI~-!+Gt{hw%Eh4}v>zALrA9=CllTSNTvx(V0gg7Pl$Bz%irkFnTI5?OF3TO4`)@Rog6dr;r z$SE$4y2wElIyMhSnAJE7@^F#S+i74&q@A@=!~rEK5~B$V(U z4c&ra-~iB{88!E~cYr!XS`<^#@yX4N5*I4OShnXU(jBF`>G#YC&{b7Qiu!^A0wLDF z2CObNr##(jw;1r^MRod2x)*9F^!PURO`*mMY)Vd;ybZx3pt{lyo|WRCHJgEmsTJQo z-XNP3$an~0*Nb+rq4J8@--l5gOfaARIih4^8)N6Nbo6B%iiKEB$0n7?NTB7TrwLv= z_t{`i^X3@Q%<4?><=3UZs+>oZC%_d}*8|t{*6ANknwVb+0}tz2>?1;mU;pRv*0Z&y zAAeo{77cY!k3D_%EEfa>mfkteevqQ5N>(H5L5-d){Td&5kzWjvzpeC_yZfT1PoV%3;^gNjN6jT&-~{- z$2PBUtY-c74B(m3BPE6Nkd|Gnv3|#YzSeh{eB%0lS9LtITLN^+6$ORb`mn#$I{sq$ z@2|2#U_|hfNWn?jE5|94JxVhmK3A?@y@$_1I*uKROi_8yjlbUspewUd%r+9&<06CG$I0o08keZv!pZ@_4#S^}$^n{OBB(%?V4(ZpFQkv}#O1hrX};9fGGUlI zJt`l(DH)6cy1@!p8k?HtLEWUhU*bP+Y|XNK$MgO{!vFQ9_;Z(F@_$wp*3-IWc=Y(vKuj^J!Cgu{5fm*~CNr^Cw&)v7n=6ZMBHCJ_P;It9S3H(5ga= zE%YP|8cq%VeA4b)d=t7QD$W{K859NJbcwZHvG}pFb_zwHCr}3j_^{$m2vSy)@{5Xg z@$wE8JN@MfUkSObTx}^|Rm27*B_-tQxk;Zoct~g~By7Pfq0!;@OxP3iVVWX&OcCQ- zhT5;%z7b?orm#EkwB^VoMRnTzCeE0}dl7}3hDU4Tt|+Q-Kka(clzm&NJz=z}vv%x< z?R)ceMO4F^fu6;fQ~w)mO9rZoPn<(+4~IG9QHpwM2oKI&x>Wd_Pm@GK%LWq!dq~** zdM>R1L?fa~Cbnp&oT|xMSPM84>X_>>Lf{AyCnNMc9W-OaYmH1RPp0-4@~*C@Q3U+o2y_dc4Y_U5=OWpzCqLG?DfZ+TWBEi;Qu= zg%||n2;g75eVww`;ugc~$)@cj$6&)PYhnW)hz(M&CY|Ao9vRAB?uhnjD>1ySuBMKr4Z~MmvJPK#D_aaEiIomg&)?r)T2VoPY4=7- z9P6$~yEr`c(GUirC2CS?r;k9r*m55%u)reZ%Z~x&pz94Jtz^^yK|cJ-cjG9a5UHCY zy?pbAoPflk9Qclzb%fv%(nLw1_*FVQe&>uK2|*7MA$!LHs!h_}LdwQz4U{q)2&OSe z486uwdYt(RA8&a|i^mAsn_(E<_9I88qsA})jn+J324n;(>jI-qS$TONyb`LaSvTg+@Y(3NIB$TD zgfRX7$B(8Nx2sU8A^{pHUS;qK&9@h@Y#{V}E@E{E01*)8+75f<&1!0DBoeqLI$Si4 zrx=o`35<(q;>0aBLY*2m5FS!Eg>g&9t=SmZ4$Kq%Wj+A`lrPOoyAhemOWEx_K_V?5 z+sy}*RsK?X-$%Wd_f$MydM8*akiIxHmZTgd5z_m?^OK|Ug*Y-r`sroWgbr>}_Sa&F zbboBfNetgpGAMr1EI?yp<&UZfw~e~q1|OTGq2hQgt(_s>143#Q2{9QoD-A?Kdoa=n zjT-M|2^9dENVUIDv7&kDWmFVC=KHc>Nw9@&7TdOMi&!%Ws2;Gp6ABsRa!Ji%N}6*Q zhXq1-{vJF!%1a4PH_!3i8)2eFyYJw^&9S(xgfI;py)bKX%%Nm7{)wbsJ!X$Y1>Ha2 z_yr#%-;0&F?sIxHi~aULVw6v(r_ZY_PUoLgkzN zF-%!?ul2*du15K#Z{JBCyLE%&$u){}U2_Y<;+I26S`2xUqr02myi(p7Z(PL*&PL*YB)4a?sN}2>R?cB#z5$O0|JUs zZ5I;ewlRZ`cSv^sh*-ZkQyj7^At01pxe8zGxVS5MDpx}k!Ds+mFH z%0FOkE=+!yj+q}Mq!>cx=dU|l$0sg*c98LJ(iP>lF&oPz2JwQ>=MYUZzki=bjD_No zAXP|6wTRatK_z|XSe(P+0w{olQyK=BF2(9UPrSv=KL4BDR?KD($wzb6_@}#t&>s;{ zl@|gIIfHkXJ5Qox;&p_;pFkTy`azlg1{I55ZG6~S&9`M>(&QmMIt8&7B-4#>2KHbV z&s5498@mg{P-$>DaAXFeaI2oE$f-Q`g)8luRpWlopMSv#S`LYiCzun!#y623UkDKi zAOxhS@0449XD(n8%!bXIWqXDFbqLAE`DX{CENuVo3zKo)t+gqoKVsywo_T@v?|Aq4 zNowGwntGme8(wX?CY#B)8e#5O1G64^S=p0FQ~?yB(y!6=I=`QXGsbkQ{1g5nQ{!Qi zqOzKB#lp)MFJh#R!443$TekS04>e+jIW|lPhDSzr^76(j*(fQoK==n9asxAS-75Oz z_nh-HK=RH(hX!n4SU@AnWa)nRjFyz1>$NMtXP5ra>yNHQ-yNj`CHxN`sxV7$vwe$a=aiF;8KcUYBU|1V_rm{5 z)u*_kwC!Bh0(hlS@aZt#{Cg#~L{O>G)BBWmo5+(6N2=S92R-zz;(}2jmb2p-$sn~1ON6!evzcaHceI9Erkw8DWoT| zTfcj+@FEgBtSXFlTDtMqc2WCdrQK(qla0$g<@#5OHSPj^Qm@0mA)AOmEa*> z0;4*3U1^?78Omd%XYJ5S>3xk@xu$2fy~Ra6`PvU=vU~UMcf%Edlt=-RC8X452L3b0 zU;`1%E0qG2+=eTh~9^zw4Xfy1toY`At1)V0qhr(wy*WCD*mz1YRIf3Hl&+pj3MUi)EACM~&>;Jw>Ifr_6k`4G21 z%H6;I?s^YTg(bO+7Qm?_;XXr(Zp7dKPmb zSsqz8w?=-f_|-XhPli=0ySIPnaVv?=5$#tne%NdG%NLW5sa@!F3&7NfAiSS&p9RKFv@vzef0gl zAEmG*&JHuPn1p_fnk@_r1(|k9;%bK*jaHL@?A#pdKK;xT#})DTa%w->!JRvQoV)(q zmY<1Q_{b6U1cjbP?s0yHA&o~BPf0_0GbfWBo}E0|LMt-SOnuJ1WO_b3Yh!bUC``|{ zHV5AG&!X^t|NN3`_jb}D>C3~p+YG;Jen|={mP|9=MAwR5XrRWz;n*d^=vQ{nKL|it9LfNuT7AN`kSf3Zqpl~ z*2Pm5`~er~zQfFm4=7qpPM!(|kaO|LyNA_7Of1+0#Wp!G@ZPi`Re4P5kSkW~Xd zsjqx!CWnGOzpBa)q*tZ(!tc$S$$doIW*2i_&%Q6pTx6$al}Q;bJ)cv=uG09@(_OHb zQcXzNH;CLR>F7ilWo!4E8q0fzq`MDOr&9gT%jApC(=%1oDKRwieVEz$lR|lLxO!3I zYM5X`rVgj2wQ7hiJdp&U&)f}@$j&fVHVst47lkH}?59ru4lFJy`LO3!*3_@{h$4ja z9esl$@Ee_jdD~uI-p8l_p$5pqm2nOXRQApPCR=kD0vn7NpI6$H4ddKjpL%24;Y6!@ zA$29N%ZqZ6abW>Gf@>Tx6B#oR?}dh{CUqL}RB@YG%r*w5PIK#y-|e~)c40d2j=@Zf z*-!cPj9=v#y*RWs)(bxNnWXN}TR0+S+vdLhbo|8GnHj0rVYAyM?5FvryfTw@ntBT- zZsNb%|8(6;duRQsSw-vjSg}OCRHEikzS!nq?zV7thWmfD0D+dhFVc4S+#}xW6HU*m z>{d6s=OeD)se|W!&e|nRRh&O}?p0#qQd?AMt5hhHzV1r>5L#&B(8`~>6h{l(Lne}0&G2E-p2&$yDKK8 z`9o6R1&;KR-yk#Jy?fUq;2@)z#1Pw0`IFs+A~ea6^=RRXv9-MmAJbD5ysHX=y4s3c za%=bLj=wKhWw^lnyP51rKy}UfTe%$+YCrj=dS@@De;tjCRySFEk#&ig=klCAS+%H7 zi=yo4RymH|58r#~vTGEpJ4XD6C+xT<76ht+i1M*`B2B5AXcY7VyY>T3A(&BU+K>Q6 zjw1%0wzaCwXPF{h-O0)6j>_2w4cd@oeIX3=Z}JILjgJqugyLdU*0Zs*S3ta8(>lD8 zs^*_#Sjys_*Xh0a@L{8G@dGTBqzDJ%W*@fHVE5Y1Qa4pTxyFLBwNSfi^2<1Fs*84o zM~<3fhvCn&ExsqT(%gi&#plm4GBV`n8D5M|w$;C@xA&FivEhqz+*GFSx*j=Ic>!m= z=N|=aimdx&&J+1@JsCR2^E`jXw(H&Cqey0Z(-p7#$5ci(qx~?8Cn_O~wF>MpP5)$q zZV))UoX>H_H}Kk`hIICiVmT zBg2kX{A`pssB7h&BH&sR+)@4j}zjxN?(`iDJ1D1&#os43i|w?L2xZk_kIN*dc6#~kMZ@QL9y@!P0*8ulBV`NAp(A%{GOcG* z|5VT4&#^Z2l=iQ%MLJ? z+Bpw(88>e}351%_J)(Z&Fo`JP(4)Y$Az(QapXq(Ilww^wc<9g*Xqq8+CWMT5jO+Nn z>DL{ogU(^ zcQp!-HP70VjatskjoGnwg+6;>K!e*_2-4>S)XAZC6U;!ju(XS+VWKq2kB(5_IvC-M zKAs=|;EW!rxBTmGsiFH0`i<*}-pkkIp~{N`j##>}t5EX*tnh`k>q~QOmbG=aAquq# z2`Nv!)^(%3-JqtZ^;>qI;kMcX11zakrT|eeffpQ0 z5T=>R>=YEPESrv_oLRk{ho@q~4zd-5kJazp)SYsU<3G?-oIJ69Q!bJKbILeaVmEB8 z^nbBvVs!UGPWb!M;|`1BQ~iUz_68RYejf1|pY^^bF_knlx#*Nt4wXmZweD3x#jC}8 z``ho*-u+W8kT5vIcr8QPv>`gwQJ1SKr;L8_{od9T$<_t&`n|Tw>FB^kJ5t`>P}^LG zg;91~z+1YdWo24m<6r>MV9$XbwP?{^{rbcljf6@iJDVf;F~(%pYmBa(=PF#XOXoQi z2H8A*_UzVq5?CJJpr8wP?%WBkkFW|V-v7%<+#yHG{jiUju>U2#9f`&nV_m}~=Q*c2 z?64q`u0`^UtoYK>eA=iqoP6oeOQt}>@ zZ#4}X?}CE*;$lGU-@JMAabTdXg~-8!9&l$LEdlds#X>J})ZggaCU6ofxcnLzDF0m+$coTU zKx0TqqmnYx2D6se4G2RYVum!NPv{jhNu4OI?1OpM97~ ztLXO%c4*>mqzgS>an#ea={Z~9>;b<#8-C&VeqO%3*odNU504fNc6WaP@~eP~6s9xr zy2_9^41OM^MSj!Mo7GeN3G}@!D z)CI6lUGOYAIQY?qf;rk}(C&V?*>V>eACo7*+&)*G?~x}Ic2Ini)3K>=E!6+cx?*j8 z6bNw4sA!aTPIy~Xde~sAi+o(Oxxd+w&V;KQzOQDp&r!O%Fh|wPmd75mCQ|5|#kYte z9mVF93px5f9Fp51DAmfjzm|Hd@MHAc z9ArG76e=2T|M(Z|x`?)8NLpObA?4Pxm}wBvZI+RFUU8*vPs%vru&@Hj#B#W-o;6qe zG~QG5!x%SCg#nfE&p%4D^E7lE{b4qP3*#_cuZp=e4;YO==%9gzr?qv8>IExB?cny* zdcVzn!(YrC_PjD=^3JfJvnpIw?=sWcI#sIdu}4tQPuU>N85{x-=DvF55$CU6V?;E^ zaaLoa=UbeQMn*u=aY7STguVowm8z;OI{E}Ybn_-CoqO%hT9Ia;0Z*fw9bUWu+Nx*g z3Xf_%V9jft*Wa*}_TG;1V_79@n5ACY2@BU4&HkXvmhWxN(rs<;In~HDAU&*lr=Es) zAca?O;MaUlcv{QiZ#J;$p~wM|+8Mp=CI7hv)d`#g3BxJKv?wW~T2@;E&;<2a1k+v} zl|4j5CwLU-VH7lNx#mX+Zr3E~0FaBxBNP+;JCDAJYf1>$j@nnTeW!`mnX`>CdtM?M zEAd0Fh7K#8ivu^_U4Iec6n@XV%|7c(2hVX?+095&wnHfKzWR4(=PC4wIRSS-ElhmF z7jtWSr7HyYIFLcL`v-I~{!Z70Y+y+MNmX@_+I^{&_l%fJW;QkR9N#ve2M49_l$8{I$5~GORtkP2Fa4CSTJb$&Gkl^ ztuZqw)i-rXxhuSqB0CaKR0@BZ-&DUERr$%+X-CqN+cLA7zW-2PIi^rZ$+h~YO}zZ) zznA_X8~)!hkmV~F_1{1anX<*PP+{Zykam57UaWQchE*p6VP6H&N6!3YOG~Tq%F-r2 zLfdkfKL-zY-=|Mc8W?aRPdIq}(^DLII`c3{CPD!2u5P7m*1~vP0OS_fW11jnJz38G z=XF?4FNd;9Y%0V-((|%guq*qDq!a!fN#iilwjC+ni(}gr3V1=COekZZf1yBg#WXxM z5quoN3;#mjK!;{z2o&fHru?-i{8O@27kTLxR5# zv&faaM^uymYKi( zX5bGj0|u#aC;;eczMfD+Jz6&_pzJcXurT~_M9Pz;AkZ+g7_#gOAkuujQ1$$MY>6c9 z(-}@iX69mGybs;o!(YFK_G}Hwi%qlyDo2<%5lxtA#Rr0xgS}1Aj+H^Mv~EXEf$rBA zUFJ|(E!9;f47jY2eT9IrB!Oi{&*yJ6pYxCaZv`E*@FMh#XjIz-(;xDR zasRBjr78s$6hf!E|9-liJEiIVFZi;S5j@mm7Lg&i5wH^>V6oo%uMZ`@ zhsdUs&=j1H-G|o5LhKt9(a;JsOA9#>zwukJq*0)LLgy2s3$0168NE) zm|ucc)s&l-OCRsYx|L4`sjA3yhwo+vhO4050Y@NZN}7PMs5cG&u!mOvQ7jx=Qos$j zKM0F#2s;Y+b@h)nVxxXJH8#)`h?FbTyoeZAWP&J(7RUyPt4qu5jLelKbW^}hH#Y4} zgd}lse2!94Q+IeB8;C!KWhQQrIelyX#?qo5S--TNiH}9^6YYV9L&!YLF$7fG2{Kt) z+8I-yrT63sun*s|^sK-Cr@bd}O(L-l{Q?8szu$QcojV-fu-S}FX8wdOKqZZa4?T-Z zI5Nnc+Du#z{AYz+JV{Bwv)AB~&W3ahHAh{#1rO+N4G4~tBNhN3qKT^j#J>3@NGF$| z{FUF%Qd_=-(wMiFu*bCQ7C#izw@`%T7Z%pf5T`Kmha}mD|9P_?`H2+E`&@lbT#j0H zaWSQPfksW_yLXSEURDzd&4|@#6k6=kV`pHSl`GAgc67Fu(!&0c)QR=-jz~|NcWSFN2(iD`hTm z%lTF==6~DN%F2*|($ZF|u7${+@cIy6n9@A&{O_xIeCcXNJLagpL{f`wUsc~_dx!}B zCu3>1#L}8Bj8}Lst&J}{-U~l~lK)uAxLpb~$X1viG!c`5*6l*zNSDD0RdB8IS*#q2X-9gZ; zf=IL&{xhiTplCxoZaL!IAFTQHQryHv_m>P6vx6?76EY!iSZff9fn`8V@xSdY0JOh~ z_lCYkyoD|t0oLL$X3-^-p_rO*6(bgV+9zVa0hM>fdTB+^88!*J|5WgHjdy=u zT&vpJeh~ zrj-S_Rp}@p=K0q{n`LRye_`lsEo!LyXkvQabw|w85S}z9QTC-@!X0JVw+xM94C@C7 zoH4*EcVT{(FW^Ge3?z~bGoWd%$jMc$@~J;>d5fmkVY2$vtZWcxms0{y!XLLu9vRs! zDUc*14bh#gM@4?#wNi~J`uT5tRRd=)GMvAOYhxa4f%0+V0=4>n+h=A@|7L}!hGPE| zWM0$A*n35rE3|Ij9Qr0nXWXEll5n&5iFL~1NF;d;71>LSpC~uC7fOK<%raN4PVEVg z2=PAe=IOadNaz`a!ZovvrCK1jZO5zRxwg3wHx7@NqD#j>3>s@7n1G1;CPXrN^@(nf z*)~jAA2a<;O;``Y5rH_mpu%kL?L7}3g#;8Wyin%mtuE8p*VJ?t@)Gyw{%mG>n4@+l zIkkbV`@W5Gh5H1b{EMmKQPHQtM{4wpD_xfYgd4QL#94~I@A@A*1i!$?o#@)YAwhW7 z~T!>Zx__E+gQB+jK4JO5Fl`_J%x8`jv8Mw?- zkbe<&a}bM-8yH?f3!Ux4bQkP$gQ4ow=_eQkAWU>dx*uIm27Gwg!WI{uoqi>oWj=uk=OhDMc z%f?tLV%(ouXQIJAKXh^KHmuDEn=gWgvBXHdcYCH_On{Pv(5Q}w01L?nMap~g%VToX zVK$6_41o{FmMW9aw8J}?=Z`4ZdEpL&Or1qPTeiDx}?cL#Z*rG9&&cy+v4DT4-*T!>5$Vf{wq2B^`A1r%qaGGmAWr)v*HxCoTF%)Xg0~+ZWS4o92eR!JVSnCBr|YN&ZN)K*B90;-dLUV zEG*{$IN=T?>Nh+~OcYQ~FTVLJp@=OnFV6{(PHTHRvCD7jh8{L#11SrGA2=rk8oOpVOc^F4L%8~x$Fso10lHc_5R**dYibDMKc=&H#?-{>6u$Zo>qfGD#XLJrrH*bRmul;2zZdoSiJ--tCx4L%^LR@7AJd9;(qc3B z$}Z189qO^_Qahb>XtlR_^YiGh=IvWxHe~)7tddvN!lxn5dUPy~Alt3o^93;=4>SDEo;jn2E?iij-N)?qcg@C?zcogc z4>IC-hm?&19ampN#Y2Ik4CBbCE025k?nD2mZKi|W#N8X9UQv4tlJ(k$!?fek^;N^t z?ch2fs62sYk^q=nq4VZ5cqX$?nWNByfLDufm#A87Nd)9moO zsBoPT_rSbpHzacrGo`NE;@5El?nBmc6=DkXBEyq_1h>M)mk={<6}5hv=Suexf6EeW zt1O{wDTev=*%0>;T2n$_Ra{E2~rEY|%Ulbojh)!QYyYCVaU}=lfW9K}%~7hc6{`(#rRvK$$o~ zG8jP(Tk!-KJRxM zc_r;NXDH5ykff^2dCpV(^;M&e&GUnc46D1max{`3PZZpkaESL)R{eO*$2>D?Z?Iuc z@&CuyTfjxNZvVq53MzFF5hN7^R1^f14g&?HRFrP%?vPZJ5CQ3CR1lQzh5?Z-rMqFI zC5N8>+NkH8dw=(R`P_TXy(lxo-p_uXwZ647`1l^$=2V*RQmD#*PPt0Hv%~du-;MY^ zKC|+fojTZipmRbtaX1hh;mBJEf1m+RGakZ0s19$HL7WRY9CUC}!e(C8*B1p3?X$WE zKu(iEfNL|YMHz^cMq#>=E+_-Rr*r)t(;|cwOaUOM)ey`AlcD@!C`YF=F@QZV{!kq7 zK-?}ytf9U>E5B=MyOmRDAW4R{bOQYnV*+p;Y~VRyBLwET2ovv|@}wS-Yyu@-8U1!| zX9U;k$62A2=|scC#}@%fJW#EOfeX|Ztk58c8RK-{lFF|D^-sZ-Dd#cK3h0=?kLUx) z0h0NK;34x?Jx}1CBV8i03xLbmsGOB5LM39^+~+wyVQ^%mq_s6SKntK|ho$={T~lTW z2c3H_+FcCVpu=fWPGk(iMYT@QDwwt@*(_525x@Pb`M;;DdmdH^2)m&+ue?$?2*HAO zxF*m}kV2pg9Ma&_&t7PovGb?Z!!&NL63IQL&HXtqP?lkpYA-yyF(s%kD@r4-132ie&hPC_R4t=Dx%wsYas-ja9c;Ok9%)k%wFz-W-42EZTh* z)ZoRC+~5TBy*dkt(bq%}!F)X{NL(j<8DzdKuN z{LooNQJp@mFO>paKk(qUz(fi=wajUWryxv$2p6pZH1^AhsuDFuE!Cu5OvV~!ZZ|#! z0gu5y;#uVZNpLmY@cE~Uj#(B5EI}-3FR}i933BnTDs>v42mjQ=(C|2 zO?>yxV`O)l4{}b^YRk1$zi{qt0pi$oX(Xl{+G{y5g#id%32ZyKat8>Xb3E?Yf%sGW z8#iuvEGWUEGl@+uKxzK2%8Sbm`nCEem0DMkvky8FJd=<5aPN%Mzs+_FM27SU6Zo&sY7>y2bfJpKXrt4_EsAVcPtQq0k5Q5svmlww+a7d6i}{AE99o0 z!3pdKNcDOmBTL#*nXAZRmD<>Yd5sxESSalZdJsOTXDDZ&W*lngwLQ?V=nE8u-6#u4ygc_n_Vwxau6t*xT zB!dZJj?O}gXswm;GhCayAv612a`}iHbQKe7@Y@v}Dg0GP`Q*|4LWAA|0My>zZt6bT z*473I#C;0On9)%Pv{vscwn;V<>wyLUC>qimg{=!#R<@(x-aieEWv|gHcwn6odfPi- zZ2SlOQCBLzNWW8u_JdRRQVU_#Td2BGp1sC<67>`BYVJE|LQAF1%q%m@1 zve#{JZ9EPyl=9T7^56+kKh>KL!|iY_tC_2$`s0-2b^e^vAJ=TTpjke>@a*Li)b2^J z_B2Bb$qdA$LSqc`2^!7s!9$7R2F%xfaj;>?VWb5auRY+Fj;MG+_kSJY#NLB`4$}6z zm1_9xW*M#R|lw*hk&e5$nEBR-G=k3S%T439AseBJo?h< z&S|`zfUI+Z?m`l>PC>N^e|;IstISqRrb_;6Kq~-LMVeC>Zzcxlxmi2)A&`2~#wzWz zh%o}UIrcd@UI+3z0S_k!QhvY&D-2;)NK{#p94Y{yLa5R1$JLrceFRUm7W_>}Q4B#+ zig(JP55V>U(u<(38@*4VJkF`b8o|06gz2m6G^z(Uh`4xk{8(j$dboM{cD^kg^h}Dc zfW)vZ;cZ;(tx6QJT^d4bOcWIjmmX1}3h+RrmA*yQUXGT-Lj*l^$FU zJPH*uo7wQ?yi9wo-_I9)!ln*wn_s0sW)g=mex6 z6|l6-0VteWo4Dr$tO$szWm* z6oo9Z+&2qLOCzJWVRxukK#b4vxRQGM{B62Jj*P<}%f#SbYV z{xp9(Y{bQ9W&eM0eZV*YN2vw$aA6y+9yHm7ciB9suBae@qB01JVORPOx^GtE*Q*(W zEt{H{01tMV_vVc*oOeKa!w5@=YrK^&ijZ2c+k@Er znsElCg~!7`clh$6lU zC;<4KH!j1YYPR+-3gnUdKl~lsevsJW!K^PRwW2_xTIvS<8H8kmPYcT$QwpoM>txGs z3>yAr4P$z25wU9f+OI?WBhW(7jRHD~1XaRzmp_NhT}DnDD3+s5&7oCM9Y>JT-(MV@ zzga&ZhLFznKLiiPd-|=heUmnY09`>%e{HMiUvF)sau{x3A|)_y=g*#fS37B8gP;UR zGY`Yo&!0c9R^apx<05{*CAWWtkX%WK(0~k+qT=HGziD)MT}8@+Pm-t%Qr$w^WBe9UEX*~Sn*O(0An}gjal-?;i;s^g;k%@j6&LJfF#2L*QWqx& z5aQ%lUkXI|0%4o5xvF01PYU_jOk+)|`^}+S<_fnvK&(im4AKk(NKXO81bt_`NaY_9 zzeAzzg}*<{mEU@w#}G6KCj>+k2SaxVy&A}jbMq~RO2QE45Rs)ofs&=$a1x0jyE>lq z-R%aIAn9E9I~%myBF?eOj>cPbXlQiH06=SH9{3V%R02Ay$Ie>)6_w*Fj zu3q_r){ZSMn)fI?d1BWvsSkQM$f0m_bnNV})5+GlMzf5?X7Wlr4U32XtK4mHL@jKU z#)2+AyGbTsq_NG9KP zZ*ctG$TIe4Um8RCTeO4EgvEQZw+h0R2Q3tvfhJcW30>KR|Q?0+c|Ffs`Z|ld?!)F({x003i$*qY(ZA zP#QdVwI~_+a7>=+lOESAV13P#zfl z=Fy;d>;t+K!dEJxPcz>d9WXET2dxxf$(4|NcpsDr{!dcTr$m=gb#xb9vS8VnUP3VK5{vMReE}jCJNl1fF z`Dc^LxP~6&W`q-=`24@2y=MKLub;rO2TecZUB<@6rRL*;ZT|y@dsi#U5pVuRZB11+ z*&r8-Iv(3nk6&3RFO(6(6Fol9lV~A1*{`~D^1AkokXk;=(Ho#|y(cT1^y$+v0MOt= znl2!tC>)0Ds34L*uXc9)5$2@}WDfxUtq%+x9vncx&iqj`pDRV+uxW$>L7MrC94-2w zeJf!3i10oEbOJgikPyfSsc+W+%fvtfg@pCVRP5TSyKGHGfXqT|nl#_W2m0s9bvJbmH0qeyhch@NUlNrAbCn)iuwJO9(M3yRQ|vu4PStL1tIQn5|X27 zVbLIJ1&KOi*5XKXaIj9qU;8Yd`4wB?AM^D+tQt z#PZ&A0iB9xm6lW`Q1c)>eO4+82mcg|-kAXLxQ88j7{xXTh6Fh{sQ{F>uY>t=h#4N9 zemo?_+yzQ|r0W~bCIyS?86N%4oiZteJq6|6B%s>U5Fs!M4+@)z$X-s1dvWdGN)sca zlmlM=Hze?Xs)?g%Pu?Lb4{*L|h(At-ISbG?AUQ{0g7~DBZ*rk((zAr@_t(|QZ#wSb zzAk=T+ilSzO&_>Z>)_2_#uZf0{1(Sjiya(oN7gH;jI}xz_}TBoPrfe9ftha;^kUeUq8Q8 zhcuX7g&1nd6;;)BbeNF38OCm0hrg z7J+=tR&c=1Z(pxlKM-*shn3nrVvs!M*v@rjew+Y>Pw}P1^NUFx`5QZd4AT9#vBP_F z*8}`gRpYjSVEsqq)YT7y8RVrQ-)Uw>Ig=VOX3}{pO@Zthg@Q`{2r21x&ac2T{q7|Z zqSHA59@+^|mAtd*Xm1DQ3M&iCD{uuqLyai4h`38>{>qQ84@>(0ZXXMOtxAw$w)wHz z)6>yGS}ytxx%v=*9h^>RAP(V|GW=n};oG#?# zc#t@NP|GR{yH{LY|EzrwlC+qR{e2c8*U6`zGvJ^Q{`>C$7yI)6erMxgm3@RTI5-0k zxA4RS1Rh?KmEFLtfqjb(cp;Fqz%(3~5nuf317)#b*|w9g#$t~p)+hT4{e6G#71`w# z_u(nW#d}_MOLe=;9jb!og<#}83G8CPHUFzbQ@s4ULV_a#VuFJJ90%|jiu#XfY0cgH zz^7%yZHFA~ULXx&0~Lf@c!k<(b!WR6H*vJbh;G}~AvG4)5jWCqv7e`Zru2Hz$wfJ3 zy`xiMBs&77bjxke_M93oHAM-T(-dydcBa{MA!-3UsJEGTcmQla35PSM2{Fp;z+YLK zX7lTk?TT$I3JIrN!cA;CvzB9z91CK2efUx0#XAp<9Ev!~bWGv#Uuo=ObN#*fT2&c7 zXYIeU1Ra-+R-2 zs@mvnOzb_4+2h2?Rw~6E)0O7=+WfIk3c_s8Q-pmEA}~p*_-M1~eWn(7lp*gs5x&cg)E-Y&NtfNcB@Fw~^jmc~_lAndI2k!> z_e7(9Os{W7V2|l=-W$!()MT~ma7%Jnd9m;p z9hNyxE7H+*eD5Z3Ly~g@GRpgNa;FB&@=QP;99VWs^2S*_MPc6P<-JS;-9R_I!M*+S zyRN&DE(2mI!UZim4%S^A6OwV+#O|nIqk44T8vaC*XxsD8eph3f6ZJpZG{wlR?z|U? zy!t&!o2OzIeeWgVtD=4*9EctVom_He?e#SaxnHD}5;zj%yu-^;eoP!-tq1%{zzm8Z#YDucgyodpg582Sg8> zD7RNWXS3zbc8QJKB|lsV23Iw`Cv>k#pz0eHzj!&4>R|OzH{27) zi8)Tgo=*aVNgO&O&P2+qyM!6KMyW4r&&z3kSt~_-aB{gFoXmO7Y5Sz2^~`L> zy=N-ZNGK(+x!;pEQXclXX}ZYOEyen2o5{WSikEYoOMcZ(+s7go2 z3{&*hs?@dxq#puA2x0QQPuu}^Ml$hvQDyFT1CziP3SwBGzHh-P3rLPIB`!w~kz}U< z8v!d0=2CKTpv!bm%qyg;z}%T;Q1u%Pmk9uu1bA{Om^7d!Nrp2MWQ~>}n?S_#&{j0X zyL)>A5z3|4h@|860wc)ot8NTa4CH#R&=+OUf40K*{}%BWFmWRM{)P=_&ZXyt<15=W|o9^%rrIaGx2Vdj7|DW zIajWg7KmIS&{Qy|wc{|W|75|y&hw(59_?B}<3W2`SX#2&hg&axO_SW6QPch-gHCn4 zS)Rq<3%HNYdG0;aX1&&ye5zzi%$c7&-g?|9VC-{ODWBs+N1L-zI2`|OrPpXa@04!x z6>~+ARJZ@NlXTe_N8tFU`g(@FEu9u_FZ+et53|FL(kf(c7ti1&V&b*>IK@^v{1`)m zmAUuqa2pY;yNWz&JMIyzM55K0JzjX-?V}E2H zyGdiUJNgyHmJ_k-@;TX4lh(JfCh){tm6ewb_PfuAVm9}&{=|!ee%~t3rt#A9dhg{u zlbMT;54}W5Yd_no^M#2&UVT|5C7z}$K`dn2MN;G8IVpE#iCeaG3p5eZ{?6wo_#8K9BDnRC+`n4Z0ry zkOCK3QNd9{LLR7qda%fv03&9)|4(D_1a{8p3RjDuiYrl?`Yv^WnOCNh`6}MyXbX#H zLj#U+s1p?nv)XL2>qa)S(iH^1j9^LrnYZ$GQ!>CpKzu*j^u=7LSa5&bKKn_AQ(r<$ z8J?a9%yz=bW<)vi^{DlSX%ETDFq`p4HJS;DV2>o%{P2l;=2Vqt$ZU#zdg>1r+-K1vofqv3tJzr7Sy2zBZD{L-aVMoK?z z>dS89Ehm!I0s`MmS#E#7zw_zG?)%*AxIbEeuP^UHX(idz@WEXC+JoRz0qksU4it;- zUa!X;FuS2Cvb&3eVqtOaa^tD@XO0cwx)SeEAcrv!R#Bk3gmo#{<-WJgvd5|6(jVHC zu{kjz;Ie7?(KV)JdSwX}85z98cBN&V=NgPKlD<_GT3E>NdwjL{WLxreL@wQW8xUq8=~FE1N(nIF9es+zU(n$lM3s2EkH%IJmU))Ak@o}UekHKsykWEYf|?DrPa_4+e^>~3yt zw$BBVN~?;tb!{|vNzFJLQ3VE$wLN~g3y%iRSnHmA-xe-+g8e!etr5i*#PA@o9O?s* zVLAhHRymRNQ=}3NC7l3mq$%4Nx6FVd=O@sBSpXRXZ_<;(;iH+f2b6{t7)GGCYBI&H zt|ovM5dz&>;60;wW(Eo~>*~Hiww-zTxb(q5Y`Mi0}JriyaB5L;pqhc zkKm=@G_8JO0A_FmECUYWDK&vRZf68Ib=fO-UG;M|=rPGXXe*ZT1l6w{Tl*$Wokp(J zl=CWCLCkaZ*%n);PZH)OD}&JAcVWZbQ|TE_;xdC+oYY-gq4csjF@m=?{GKxMidUDC z9j-b@&0JsJrkTTag!yg#EPZwDO?ut;G~x`?p`hpVO|n!2*+%SMn%^VBvBXD5iyu1h zY(&2o_x4&9Z(p!=T~na>+F9e#v0*3CQF8nmgV3?+tH`56MpDuD#D1ibTy1a7v0~2* zAF2nnvFYtn_iMYaD}*=4Qf-axkNc00H-);|j)rS+*o3~B*^i^H8GUWH5)qUEQmmfKf_xLiZhc*M zA%po0ya-tTj*J##;-A8vh*!4SDuk$g0l5M6?KVKeXMka@t3$%)p%~T}l0W;q7&+`= zG>}Q_uV5zBX^+Srs9r9p@_zrM?8iM|zgrOH$GL7gnT~SzP(>mgvy?7_a${_6PSxD$ z;X`t$lNsb>jPp+w)7SOsG+aE7`*Gam#qRNRM&!w%^k9cUL;Qul?LzWv9yO37dNPo~w_!#**BO*tM%Z-zwi3FELV{ zw68VEQs}<>4kgwyK^yyU=gD!h6X%3KjXQS_H2EdjQKSsHc}Aia1$!T?ZwXMqw*4dy zC&nY{0J23229151p$9p4Eg%yl75*MU7a9lLa03MQgzi%Q>TiO0l76{rAAi=|G4GNM z5EXKM_DT=lEVy$aW!B41mVYErt~CH$Yg~rk-u3?-GS-TJz{2`ShOrLWTI6G0ZR^b_ z!=qZR3&}W`AwTnnFx^-A1&RfUg%w69ntA_B7W|JtZO%VPDwCAy=W-Bd-TSR!DCfT9$6ckB{so6Kk+;M;zHXns2m z>|7v4Pc$EzO^;9w0^b1M=)yt^*fdS@%>HzC-h9K5x}ALx72+XcYsmBm zWEH%E7Wv~B+CU8B2wu!hkcku#1e!_~9#aInEruu4 zik}WiZ#exSP1#e7t}UG%^*K+rHAtGSW}~A($c%Jef!3>f#tIDp_E|_YgN6i=>%p@O zGwNSUm;pd&15F1A_pDTuLGubM1eTxU?@v&iWq1N1d{=wnClGrs1mq(uRQ`~8A@Xz2 zu!|%fz@ov7Lj%|tfRalO{~*FcWRCh09;jGc;oc~kD!s*SxA?WG%{wxDd0xHX)w{?C z=BlflA7Xl*{)TXflMhu!+}5np<^FDZ!`WQvXO`-oT;$c~z|wVq#}hpBwGgI*P&b7;XY=K1ScONJc@GFM5EF@r{Wa6IzI$pV^%XuW`}W&yWoAUZ5$7 z=c-xZn^s=8saK1gWN5%U`{Ci`xWyR>C`~sw25#k=40P!H)MHr3eWj2CAg_u-PWUF^ zut;Zh742*#QN;`kgWr~W% zqrvn`Q{f~%fAY|3z8gCzP!3fB)n(M4cpN$oh_g`K`t&dXk-ovy?g>c6W<7WAeRF>^ zh*((Qv48{bZBWqjF5?>gTqimSaAg>M|4R(^6=z|SXfQ*He6_ocswjnuXeHS$S zpdTB~Ul{k}oCf6U1LR|aSRx0+DF90tGY|48sA)|V%1I^3a|i~JjjD|BSv?@UHNiY& z%r>c84r}%-PKkGnPdbnECz4Pq?X0@D$9zP-t+QSepd~ z1WY5%IxMqmhTq^?Q}bJn8tv?CZ7su03u|yUL)cOaXw@7?pA$BABJOP1tTMAB|MlT& z@)mmh@1_>e8w-)bW=Hg8<4TNSXWIOBnvmN-b4`T(+9!H3oCl3BVDaFb0iK+VpT9T3 z74|wBzuWgAc{VsI>K+`OTrn*SOK1pVRVgrS0J5~{4c>45?_yxrqse8_0klNIY7k|PdkYg{#O`AY{+k_>F-@#K<<~u=MRPiWqbTLnuE{a0lLbkxqXK1 ziT9!wpzi`u3Kk+{pyr>7`?Cstd^%4Z=!sfwK&1wqfj>a43@$%!_G}=bXuuC3W4XaM z8Yr@%bmi{Ff8OT$&0n-hBy?QhmV#(NtDmD)+Evql7$y zgM-%;PA6D@V6AaiLBhHLtIhu_?4QrHtNiQPkb_}z+4?Os88GFgpY-p$41MIuUmsc? zW-BH_OLK&Ppxgc*wGoqb`0vwJ&~g;dn~aea;=n+G9|Ev2=$sG$c+9fX{$J{{6}IJ! z1w6R+iPL?iyl`3|L7{jcR|R8smhH@=yMYY;5fO@4B>wd#?gYO+#BI2wprbCSfJaCK zayjt<1avU7;im)U2w*(e*p40;K1Y-T75~1w;>KPFd>_>(u!t^0u~1S0$t6q>Cxle+ z`PBcq8nMR@-iHGZQihX2@uXLfZ`}2oO2hGeIMcu6d19W$1^9nxnP2N8DEj@ua(u)L zTbNRz8@_#MdBN@9YbOW?N@bbC);J%3|MZ0iz~RUx{r=m$rh|!-nZNEwy+EURVXpQ> z(m@f}didb8B;K{1?LA~+A#>oAhdXS|v9hs|Bwt6Gj~Jw5k8ZQ4!;g3R!1hT%Mx*ra z;!f?`ubT^zGImeRy#F*DkIV)N4WAj22id$jxG?!?dNCXM^e=v|3d#KI#@!zZAU7gN z?AQAKXI(L2eqEt|F0Obh#jiT`pEVWBcCh#)ufXGK@Xy=AtNi}y)=1OL0BsX=tk z)mL)sKX)I2KuXngP^j-2EjUM&U*IT+U#}2PbWu4-`1$j@H%hii8Z4G&W~rp_ntdBF z`u6U5swt+G-X+(T3pVKUBr;6={D}%W!ew?UEFWcOugQGr5nD;Ck(a+D#E~1pne9*D z&p$vj2*=` zh%t9-Z8fmiYO)1E>eE%-4aRziwgyJLfG*V*-J7SV9kRnlF+9rWCo$}EQY+RY>@JsH zX;tpD;QCM-Tsh*lmubAvV=yYu(3Lq~v?5D4UZ0WaBS)w3v~J2)P{4{QhR@x1-4G3# z;{3@w+qrs_n1@=0Z~vl}U*IiZrCJ_;k-$%q&{%S4m^Hpa^!ZCklrh$!fNr2P7E(yO zHuf}Gwm)#>)3+YI_v-?^ZP8~*t2X4|rqZ#AwiK+=_o&wXVou!~$7;_|pYZ)o6`hnw zZF~N}kKuDcy`TD>CKNcNj}{xLIqkf#ZnD4Z>XR*vH7+qr)Kby~M^5OT3zxMp6rul#AcF%`J)Gm~A3LMG>h9cGUTz){hM9dej7`{leDX5T6alm|J+( ze0uhH5p7_){0Wsi)YE4^ll`TNxobo5YW)^DaKV0squ7LMbB|#d38L8QBbUz!5S<eyxQEQTsPV+Ymn4&xs-w0w9th@9Nu)CBEP znZ$^wVtdY=RqLi?=^YPqb-|8;bryrpHqDyu1)*ak$HzXr?7!!jL?_Zn$mrTAA1LCY zux-OHv-v6%C-L&fvz98|=|Yyk?@2f!bC%~)OAjeVVM8*ty>@(kB9)K+@loTs__TC4 z#%)KwhkdkzZg1(tYHG*XgdVF{fB`X070V3vJLufLLkEl`nq8VScHRt(!OE7NO@q+;~m*Eit$WTiqboR>AM*5KrBh!qgV>!d;ldugk0E(VH5TH#Lu>gSu+j z19#&#PqUJmqoh_h9J0*zMbw;n=-eW$MCccl79MKKCWq|4a9LSdJ(NYwO0(T35>Q;? zdbE81FIi1ZCW)at-E|g+{=$A+b-$0hez7IE<@xS}8jWt!CoMLI4EI+!TdZj!pGTK* zb)E88F-@1e#u84^eeR2|C~+KB+7_>k@fT0F zc4qQr&tg5x>>S)b+`?a8Avs%OE`L?4qlc`+wX^xsz|Mq-ac&=faPW&h-RbEd=Aw=0 zRFm`5xmI$+tE0Eilbn%mW9M)%%T-G6U2t#3x7E8GK2GEO_A&>D{$SDaWwksX#Mt}x z0SanQ>m^rguIWrWH#4a*SkKa67|&~WhGO=s7m6lLw9Y~$WyKPzn5kS1f}e|*FCTfA zbr$z#2dSunR+|FUON|DLxZK<*+fw9}lxaDFA8bu!E~)pm^BYa*{@g zhQ*6Kv#z1c$DT~;Fse18KCj+EP;E4NjTmG%vejox_Hc*(B23eqb<)WY7BD|=BQ@u; zIe4rPyQTTD&Te2tC|uK=`ejYQmmH_lc%-W2Rp0$B`%j20HtFXEMMQXaSQ#s=4xrLM z_NqujS=peN8V0l_tB94p`_hF>&V+Aw9~ZoOLxnom&-vDtku#Q{Y-dX)hdak{e1ER> zQ=3?IMcLEAtPO!LbZ%))4O9I;9&}IAeNs@BmB^l}!Adcw%JvD+ckKxGgueT9?6689 zN8#l}Lrz6gCl7MmW`XW)D*_QOnJSgYhA&2Z->@0$23(KY%E9Rw77m8rea^0kgSrD# z6uYF*o~SLX5aK&}Of$e@GwR!y9P~FTs4SeFpY$m##fzvlG*FJDPk!tfez)|byy?vm zdw#k^1KqzU&-ez)HAFKB6c@vV|4XUrf>`O$&Aa-n-ya4!IxFtY-LbbP#Ty&brYwkv z3cK@I_ho8%S(ytl@vYm90oDadmvu3df1RYe_24ar?8(OyY4#c_C4`XQX}-|)BB)#? zL|v)5U6XXRdic~`+t-;2rMxlzX%v0pFTD|@dtxU-EmN9_`3t*gLdu*$zDh$w3(F<$ zlbo3;O9_wclN+w*|HO>!)dpiE$VV|Y{hk{Y4eCSYtAW^0+SD%SXID7OHw5?H+}0Mm z5~PPx7MUFao8nB?&?vvgjqE;C{egU;=DW4Rk3-fGa#&lF-6ewD=(_( zKgr)i0~d72$JfeThPC=CB(<{uAO*wglko-}CuBes5ed|2*N;!ONSh$w9?Gf<`h0cm zI0(`WcDA<9n4PuJlhfbBumg+$M>nJ$0=vEGG{FkqKlKL9n3h^7b_nK~^_SGt)M7lyBcnpnaFiu~+$l&W$yRDIW#j|<{2Z-$Gsl#XW)6*C@&(2l zDjEdja%c~2>I~}E#J1TK!_c|xck1(Rwi!Yi&JMFjay-;;r+2$7tZ}3K-K_ttL%-kh z`bDQ<_sN=%lNhSEnh$Qcxt-fOdi3&+%Zt>E%s?tx)df}$8CHt$bk(LSxp+ISJ6t;p z&#zhZJ(qGGtjlO&%wCVM>on`f&J#QfNNv!yzVmy7-qN(@A|#{#B6RJDgq!Fv39Sw( z@eyk^yBwa}PgE0q+vo*OGf!=5I-&Txy5dJ1-k<|zBkV?Zo}^mw#|bb0#F3w-Xu?-c6PIYa|jU8=d7%DcHk7I}*JBf8Ecy%-4y8e7uI$l;Uow)a=&C@{f{T)?q zc88>n)K@#l&tHhzaM*Gfps%{kEdwI z=wz+-ByM6wluB%OQ~uRXnSkCU?*O{7mW%RH+f@>I=)5arxCzn4xSv~tLR0of@?^n)xs-{S(S)#}AF*x`eXH2}+_n=C zCse%NJs>G1Rjrea$j(F~Mg4#LFvJtIEqiklpOAYeH52GGSh(CK-BfGy53MyAo^5K2 z;#}acU#5eZ4T*``(r#0fh5h@Q67T`L2h5*h7e&CWfFQX58Q#*#ZS1)Z$%?uVU5w=4 zfN!2h3&uFJbCH(((E=#0IKZDCAyB~S9RTP8?gDfH!Q>%fZ;Gx!qUG2p-9yTJovDg} ziSKW3T5l+VFRV1U67&&QC?p`n1b-VVl+gFgZGX!E34n;?nm~kgkV$exYiamaw?*%b ziCIW!`$0W4b@l$;#{Iq7ZWiBL3Foy9AFOmK3tsA- z*&@oHx@b@EFx1Xx0qsh5Sc7ZZpF{TRu*L~ot$oG*_r+77U^y8)+y273?e@gy1MH+|J90Vb=Qbu zzIuC??$JWUZZOqN)C+PK?lhbHZEiZ4t`m_stKpKU`O&W2wT$cXBAbVS>-C5kM%PD1 zaWN7Bqfo*Ony*&;?D3)5q}=L$?G|8nZVrR#Lp$>>l;~!e%n`V>Xyh%fW~%u0sT(j~ zRK3rP=p;BE91WLeJ~1bVy}{3Dc!` zjEHDzj-|Q9O{bH} zmM@*Fs*;Z06{KKregQbK?{VzV?fb@;3Iov*a~TWX)ib(%g(E9lZ;Xx*9@VBMI1?4c z5a;;_g>$6#Yia0BRn0bSB9D*D^>vHT_*%6l>?RUf)y7FYRU`0uZ6#~SBxA8U@_XSp zWFFtVd5(g?{l(OXs~kFY`&+35*)VFEyy4>K&OlCO35Py+^hZ@p>ygbJ#bUHJb=hvM z>e@m_%z!z~ouHa8I1zhev%jjZIxc=}pwvLOuN3tvO}p$LA08ft{GO7NsKqSWw?Xoi zCN}OmlGt#55>GZUB^ec2Q8eRyy&+nMqS-iKdoz{X(K(B(K2sCy)@ zSM-_y<{7Vv&=`ksRo(TJu&{Va*wGi&lqtam|V40(WzHmDoJD50UJQ=I;TW;*`Im1Yjql>k1ckcn9zK z^XCv%H-^D@gYgOC!D{t$ms>-z$H)RlgQYkwMh#uK`j*+747I#m+%$GsncZRd^$0uN zA>o;z_q`^DM=WpkTJgGHJtkq*&}>Q@QPv#BS1tq3zPjD)*H`WQ*%$X)gLg(NhZnj; zGomyttkcb6=5(B5n0S<>XeS2DDEhktfjS}T`KZ6S(|g~o_oG4WV(yX zOc~Z1fvASun`hi7^Qb1AtrgJsmeN0bs((?V17iao>1IFB%|1!@qdJtzyryS>D0yh_ z$WI^b+jy6xKPj*U8QVDqS=fE!eIFdOu#T%K(5tx|y~Y2Xr1W8A5P^AX;<~}t`G9Dp z0j%0VxOKh2r*E@Oi&yHh>yA>WXOj2HSDY7Pr%HCmBQ&cFPdG&-v}rcSMJLPVR+atr zj^bX^bPmp~Zd$Vt3J^&+${hlW6PgW&=Lpu=n93&keuj2F-e452pS;(07|G4^XU@O` zv7xFWwThB%(D&kK6lw9d*Qcu6L=Bpj24rcEx~%nDxs4RW5iT-F=qsG|(*8EFlHsyB z18%hGllImrJ-`Oo$Sm}4<&f@{8S&t6he=aVXl633&(+xc_(>kkYb0%z?Vm5UFAiuj z;0@=JUIckOQC>4Sal3>yXI=6%KHHTs-*XF2lNJ#ttyc+!x$-8>R8xmm3D;^#Xmgy~ zG)UQ!ALs(PARJ3CW#TlA3M|RT1x=nJWe{v0 z2+=%zwsHjZ-ZOKet8nzEiV2x_VP9vu z4Pe0Y#8ws)=|1|5_2&y+bcfU~jZUuB1KjhbIqm*Z!tRU1fdQ#GHfpnhD58*t$!^iG z&$IoZZUy_i@6dBL-^*1M_4hIC#MM6yx5uKNy=@kYI^zHOOJw1u{mHxLONn73tVbPGGCl_ac&k&_t?rWS*h)3Y8WFE$DS0J(w`K zmC(L!-*)#fwQPXxUbRZ)$NN3|A|0P?MxJ5l!BLC?C9*e+B0h0qr4Kx?AdWnD_N)is zHa4Kb2czX72shRPV){+V`{@uK%50$)eiG`=Qh0{Y7W+X{Bh7WX7oGHbi%qCLf0h8s zZM&T4=W?dT;hZ()*}mhq0XW$gc}nyX zBVDTYQ1d&=Wif@~`ox4-y1U$9|C-_#m7%-ZR()-=g-r@81FSr`8Cj3&_)g^(M{-3t zROR7_-aX0RwibN7EFfUh(@QHD_C`%?tB*I9z-#RCJxP2~Q4NO7LuwCi<(K$APA#9^ z>C@#m?Ag0J`&FF0!=AV)d1&z=<8}JgSNDIRr4QbBd2i6q2H)UdweZ}GESXf&$r7qs zyk<%Lb2U66uQ-uZ)qOAQnfZRayHQN{+LANn^XG@Y_M|)aUrYM#F8NwlSD$=zNmDA24w#S2Lf(^S#V-@UuhAM1jbZj5VAyL{{Rt;Vt`dMcYshcvp@rKF_lVD9vn zb(^6v%V-}GGW}caQ^Gr2_XYE%N7wu2)R{dL%byW8m}q?IiC0_@y&niM``++^8;c{F zGp>^jR>d~{@8&UIhFntw85~4awY8H2s(RXb)+OlVx({c3CTxIH8xK@!?9%Z`Nl7Q^ z=)}Pgvi^y)NhT!2VzPbnON^u2_)5SiI=Rn29(Ka2gHu`*IR|ae|mH_DR#-x7%b5p0}VHnrU+#I{k z0$HTMZG_zu1j-!r08wa5ay-x$d{STp6+&5=8j$5t*Jh4pER2w)cGY^J+>@X@1|rgW zxrU`J3uGtrgT*01%w9IW-o9NtW43^@lbm){+&?z@pw8{&>`P0grkH%OG-j6d3xIF- znto6RBM?P{=s}fU? zxJWI&Z~tm4BNXErchTK~0w4pm?!5U6Jy?yAsup?)9!E#Mir)M;MU|2n6^RTcWT+~? zB`*q1RPbYUaitp{pSZe~RaK+qs%DON@!U&PpCxEi>B!h< z?FyidzHMhd0&&2P0n>_4V_wx;?)yWP$`oyhZ+#nJe4 zacaySak8G1*Ez|}Hzs~YH>h>G-sQa(Xx>6y_vd>XwmTYk$} zx`*bxQP5tqLgrv(q@irfgv;W_6MBVGvMK-k&Czx5z5+X6-knWc5ZU%!AVMJ08QM$WpKuWqhhVB|VW|(iI=e*zduJx~r$k>eA~St9!;X@x~=l? zL51hKa{(|G`TW@^7JZ_NmpM<-dM<-J+88YHqF0qT{e01mxU#hjGA^=fSLDoY%bEcp zh1X0c@Ni}6_k@OT{hRD|)un5PHq`BPN4W7vuw1W)*$k?hwiX?WWBcRXN9b zTa%@6f*`TaIv3wxs+z8Tf$2)}1JJCH>#1y@T@?tXTi?ZH&(C5puYvp#kTity+E05@ z2-r3%2+n)}(z?}NUUlf>DZP{J z8xI&k+{DLU{>a8@F?qO79`w|t)SRY#rp$_17~EJ)VPd@?EP}wtf;v#Lzm5`U9byYN z{r&mpzvyZ)@KwaYBZtgQ3IKsV@L!TIymeyQ8Luk$WTe5(Z(%T{^wMK+@thNFmR5-4 zz;FR$QnN2`z()bK(7>xx1IRBRqPYIKxVR`a8W14lD0Q6`=95>sNdu?MaM^Uz?TR;BT5g{I$-fRb$O29cyxhot9{^u(GcFtSv}4 zUA0f67JKEs!sF}f`@5reWl(-<@JEw0%}GqP2dRD`4a_)NVUALfnNp)(p?Q?))|`n5 z9=S7zG;?mxH}Txbc7BI5t~W`)SC8jsvWKi8MkW)hm^4hdM`YaBPwRuX4%a=R+PC8n ztLsy8hED_&djJiP2psN4l604sOg2w3BY&2b;O-IW_x{+Ng$8@sevP@j}ZC2A;-L%oQArDed)X7y%MtiTNB#Qqs=Q>1P{K znow56KA=C)_^dts?uRtfD9{cY(Rq~`gntUZ*yT_yM5o)`ciF`TkN?GuK2e6Y4#EF; zA3Alh+Kg#m5=5AfXX)yxN(%x&@x@_n4x#Dp1+E=ub-b{XQ^B_Sw!`DE@s0>F(W9+{ywe>Y08k!o z_}{=KpmC1TbVmd59wx!}_GOeI!+M)8;Cz7{KQ1mA69?4IGE0|=moDKRspC6FeawEA z6RR}z8_5(_H22nemMiO+d*?C|?N@SzTmX`?@C6Vh zF%z}BdyjG|@aInKy#qdf#$o(*Dcpb*+U+M{4iQgmnC7RyP2gW znV`1*gD>We^|kD@&$Feq2-*X)(!{OaZ!;5oRnW24R&xuBm9MMcgj9Q@)!8&}wo(ug zh03&ldI}=yb-lM6V6Pb?Q>6PX`M@6m1>MvEBcK>!)EP~t41f9@R9?tH$!%@8n4!WR ztsoGwwPkC%x(jMD$b&vz2qaYo)&>+eLV&6}gUFOO^#c(2DYbFHAygw*8mN!vT|ShM zpxq&j_-Z%5wDkd~Wdmu9=b1wf*w}8TzQB_K1l0a2Wd`5c9T6brL$NEMF*Lumv0=US zYPQ-&umasc4HDH4FH)ZZMLhZfLzyA-p1Jk)P+-z9cYxt&p)9Qb2pY1pbzUfYCmj|{ z`-|yPB0_DsWtD;lDFbe=VqQG+<@=UY2JoH_VPUs>zljCU&C!*bIudEWtp2SS8uT<7 zmX$aF)R>ykGqn)NV+RuW`6D>w_W;&`_{0aZ(iac<&WW2y;%v0wK51|aMw0W#G99E5 zEQ~qj8A2iNiya&et65Bi7HYPAiV_G_0~x=7#6-4lNl8ExF>PaEdH-8nee}+n-B1Pf zf{CeeroH}8MAy2DlD@0uLKy0q7#@z%WI}03XgDtcAwf=9AM;DHB%dnbpXU6UwX4fF zy@U^hEx6ZyzNMkK_bj#Roy#S@CH~h(u7!l}O9_7r#8LzY0J+lWGf_UWUbLO%NJHvA z7f>;gEiXFA94N{@tM#BS_|Ob@*Du}GQ@MQV-cs0q?rA#370h$_^=XLk5g|o{!TV_J z(nt>sbDIZ5Mh_mbwkgQ-Lx$3_+^kgfTYU6&O-;>Q*lSyzt0Q1Ms89;0VXO=E41Yx|E{;ytanJ5@qm zyLl9NIG?_96A`z=ngfE*Dd0*k6+m%QU}H4fbnI!^;3>fKLc-uTBqR(yy|o&M^|Dce zypKPB|NgcS0!=K)L2n!kc-o$em+tm*ZH8Rnh~iS?r)w1)_;yN{i0)YlV{tfT-3Z_F*C0$@Tm9ft{+%_GJ0I z`t!5E_3tBT9=tx@-V3YAmE^QXzH58NK#!+${46nv2{UUUZzpnL@@bX#5e4Y&#Rt;`__1^k9$*==8(0n<0!g|+ zNlKA7SVrv;J&tar{zNXP>6HDJ-y)-;0)ax}gU9rD`z_~oSA%6_U?o-tP~T&Sx0o;~ zj=nhHS~{_y{@K~2Kgy??+~*B~I^TeBFE;mh9;fqIqA`ljH8^B;`on8SsD$O z{Qqn}Z{hrmsPd-{!e6r(tNZ5&=aru4UBOgS5~!}klbgAj?m@Y~gmOyC>__liDdAMx z?`M9DSUgMZyMcGsPdXd)A{jqloKl!s;$D^Djc=k@mPO8&*FQlo7yk)6vyUdH?Ggz9 zF~OP|9R_Fkq0F(g!MAsmdn;e#y){<(XLmGrU9BR&xDFZQ-6p@J+8exx@AYrLQ&UF2 z6a`i7lI9sbePu32El+RY){!IZA7)vulUO(&GgSJO)>|OTP(%Jw)cIcR@z!&o(@^8F zZ6eSOwgU0lV|eBZD_G(iDZNS)U1`*9C3E9YO%4cuzU8sh;L`t zll>`yC6b!d#nT>*-OKVA6|uYeoIahhcGaZHtd!;dxOt0Sz@M++etC#HI8f~5d74j) zC5!t7(YJ*yNVY7)JCe(csVbW@LLN6p15G^&KFhg&t0^6LzT1{~ektU^;I=I#UlvT$AmQC@W3quf?ID%sZJ_r@P;USO z!tYg8Rdk}FR8h&$D@)wggu;fVc?Ow&Ao&kC{l7-aQ8Z&ywI_Eb=C!pifNJlDSwcKX zu(vyI6vFSg^HBNwSLo;c1DWXX;$Fi_GQb7;q^CDdpJtBT5AM-_R8RJ#5Tb<{(OC36 zp2o^6(~CHsg%Rx)8u}77eM_e2@o;Ts1?y;;v}W#nBDKnwUgq@V*m=~tXeYeD9cDQD zkE@Q}c=;X8$WVbE>!!X+;r;!8ehyfE{t_FOb?TayvHU>LuqL(Il8O~thaDdhl0_9d zi}kH+ljM4(9f5}ftzLsCHixdD6e>Ne|NKyL`=ZTspp({VC7BJN#o8?sv4omGUS86W%`9!?O(cX=neSv4V!b zp-mSF$Try{FuKZ!YleWvDODI;>mNw``M!J==F+~+7~xw>3b&gH(h(Pr7da98Fzz{@ zkx^)bQ!RH0sGY_z4-gn-4&M$Ds=Kbp*@}xNOx^E$s8Th0Q_95TuE`?t{6^l} zCc164`*;+?hx-AEY$D@X`{QT+7JPM>mt~%7F&_z*BU%aOIzy=&uhyH?Le4W>M|Co4 zFMV%2P3?NsF(XNP1I~Awf~;d++Reh!9=zh$fFerPmqD+}7u!E`<@}Ok^KnbkPdrBP zUt>{LhcZ2V3x0`FPTC3E~RtKbQ;X z)qdXuNh-MGRbB0Pvd7(UQYpGBEftF-`s*F|=e?w)Jf6TCg~6TT*$W+3=L2Ws{!MiS zrPPsMJw5pTSmpCO>)OT#^eKbmSbqZV-~Wl^N@+ahd6kW)rv_VPtwooM0aLN!z)S=z z;NgGI{^xtm^P;PD`u}{b5j4q2f;m#Z7K$vs8W45kGcPYkH<#Qs?llaM7sE1|QvC1N zy#F)&F_O(09IV3E0j2$prlw%U=D+`}_5frA2mkwZzzhA8NoMG29|7I2A)&^-8tVs0 z4Juz>ba#V%eUjQnZKw6Xpc#x>?pi;qt|7z(R%!nP5#l^VMc z7J1`M5?T5&-eP}yYW%332IVt<4M1P>>Zw6a;|pe8L~4};*4|#))R-X*hZNA?2XL~( z7)qYAbdsw3D~70Idpy);tT5H3J;J=XKp!6QKA2!}S^{&`Tvv{bJ-20hX_QpD+S}Yb zLk+a>e?^@bPyWXRSd5OVI`#Y}(cQmZ!M$NC#dLE$~R+!x2z;r%nchtQ#Jc& zO5hx2IjZGKz}sPzqoI6eKP`bZHhu=j?jW4^01ddNWH!q~w+RkteJq zUEfJf#K>mK$_|mq8Y8cq$2R86RJ)aOfvavc5%z?BWq)=jXL;>(E6G4Tk4thQ+CjS9 zR1T?E<9s8I)3B*0>ro3rPSV&ItFF%aS?a(zpICW2c?zp8jY5oQLr-UCTQM=SBWj5S zC}sR~-6gG;~RYM*ujKd=bj7|CfG z8X8(SbQP{Q@pV2R)^3<$VL4re-wufged_r7vkAenKP@F$FmdMPuMYU-GpRR>K(i%u zV+07;#mb5=?Q=QXDo9QO0Q@4822V4M4=yIjWpm&&V97ifGo_45KIr2~~e)q?* z=t~kLaF|TH6%m5QFXVq%heGvkSpUMD(d+eS^MtYv#Uk$P{IFJ zyW%&gSGMf_!2Z0M(xLCei(C?#_Gp7bvWTHr1gUm~HUDe^KV|L7 zPN|n36Vt)YZ-M5)n#DB%tMUG(H7BUxxATj#t=(5$T6QxWw=<9|6E%L!?z8`jlV=q!d{YXeD-7Y@ek9NqDRend> zIkRl$PKTX(UQ~p2Sf_KXE-6(#%(`!1nDvC@C;R?tmU}nR#X<%$Pw#*~mfi4eGIZ)= z`6j$wzEGjLXu!1S=ww?!p;YvE2u!~`tCz2c(YRyY)Sg&@0jv=wf%&Lxn%sBX^k$KlW#36?VMnAOYTj1Y`<9Qd9)?rN}ifb4g;a3oAg~pJo@hf zQ2Ve80UJ@yHG3vT$-*(pLKMfNM*#*FXK!TWoaogDDaqBXU& zLeiGrfNMZtSXh}4$kMT}@ys=2d*hF5Mywt_WNJqAkpZO1ycwwNefGT&us%DDuGt@* zUG86Zm6d(gE1R-h_mU%bE}pceW#zz}RY6_G?P?i$jHAR#5C=V7_!-{9XS&=fl_omYOp zy4p)tL7q*$x1xe2m9s28C*RCZ-#}$|1`jKVh$oeYr>HmGi-!a9|lLU2$Wy z^W_`riZs?X+n3Kw=<9$$Fv%aKiLTEvZQ}*4Kc1k6#|QD@v5czW(U;ZkMJ3Av(dmm*g_;GDLl48l zp>Uo>YTt5q>Xp8)_>fckMRQU9om-+uOHUw@!N=Qs0~WXUhNBjLj9ANY9H5lMqr^zB z2V4>!wTAUc*zhY3T`gbNt!%!_wa8{3r(aP|ytkgj=Id}IN<<={mjk|~@VI=PaECDm z$z5D;oumvsuyP%`ius9%{wDa@pt!dsamwtDE+!*uZbl;OPLwm~}i*RLrrRNclCU5G>#aXErq&(UTgg|JZ*7R2^4Z1Z$2ohjI z^S)meVWg=(mGHgj^smMAounOjYUj_+gG}w+t-iB}_ z3JP9a^beSN{*duVY-8cNx?V-Kh3{<*DVgs5Rt^FA2L8sFAORGN@R~Qm@5$ck1Iuy> zP@Oi}$@{uCx-9;4o+-wb7wlwn%}NsOfkk0J&in=7gv4^7sKP1vOLFTU3HhZvYgm^` zk;FIw6I!;rQ5@SvziW&rlBXX-_Xe230Z7M3!EwAJ5n#bA9vF;EsA9`YRV+p{VXhrE zrQ>%;yHCNTbnkX{?~T(7hO=Y4!_5o2SyI3`3*QuE*)$r7O32mU9~{xcRw!O z6XIxRv#xU;K4|)Oo#l@7@qzRPjAQDB?m?#7iwfKK0Ur@$aW<5E6!_;S+wY;5Xu{1= zr~|u8WYut4pPXCK1ta4 z4;mZ^rpsTX`>o4Qk86+OFXm-Vy_cOKgX+I!B9a;_b2U?9VT6a?*sj9VifFXpXuftJ zxZWD|qF~%+8=LW{X~sHUKZ=x;ROI-FP;!2h@o=FgApmx8qGWnET!pDbobDRt>k;fG z@E5NnmTT47-T)tQ1491Tkd>KZUd{roj1kVVz7Jz@rG^t%zuo#V>UwhkuI`~&N! zr}3Z^pykcAYW5VOl2KcXpFB^EQN^(yg3-4-;?dOKb?Ptxa6m`|@fN4n}V4WHl9JyIZJjG9W|F z0CIetd{@m6V%vf1hti$e**$lSb=su!M)U{bZLNZ1MgzYfJ2(z%9pE&|9|kHrVhIyelqZ`P7Wm@+HUUygF>s$R?i zVGtqpyOuP@tF!feJ&ojet7BS5`hBRN^(pu8J`xMENRpNsBnAZ5tsxKqmR$>fVd~3n zE9=O;5qO<}em`i$3r4|%H_LBx4MMXuLQE7%#7&;|e(hm6<^29g?w<=~v4@29$bJ)) z?AMk-1f`$@J^kZrO5r}&AmAi>TMUcfi$#uUF&*FA*3?3sWjeaM|4Q8L{uR%h z{0ux%?tzl*t9K3@w2AwZioD(@6AgybJ}g1qlcau0@In3VM_pL*{h5a4YZLePddz5ht}- z$*(-1T;;tj{7{UE+O@W-%4p%#X&kfqVC@Jeu$XB=NC*;;z%MV6(KjrC(GLJcR&f71@qhv+I^qBtoa(ed@+rM09E7w}AtA``qrOW`V(TsW%^xEhmlrm*dqwO4)$z zPDsczE6r{D`_39Z&NnfrfoByzPjT@R%r4yXsFkIy_RiG}e3_|S!g{mZQmCt3n;zVS ztP+0l+^T4%-+ae`%>HTaLtocZ^1wb)FL9-9W(FVRE2js0L}}@n&r=8J0Iu(;L%+0Q zEH645lE)j{*)i)@b#;`{KfcQY07;WAatNqWeHi&?=`qce4|LiIzr*7AP(y|^Yiw=e6i!fW!bR z{td6u#YioAKBydWssvx9Q{QVVEUW@w`m7IhQAKpk3bUx-vi-e4*~n!M&CgWR(q-9G ziwawRK>Fmrzy|m(B_J?^^lr^nxU^j>k+bx7@!TmhP4ip*sbY_I0ozM^jw^&g+K@e} z=V$q;6ijS6wvc`8(0gfl`GE*C?)v@ps-!O_R2utQIxOJ$`ZpkEuTUK|D}V!AzM{!} z69x`IOFkXnJ!GaiqSAikE-=Mt!+s5g!c`VWQ5di;Ha2u}N;^kJNC1~gV|Cl^aRU%L z2Unt>W1Bxmta)5|FRl_AkU6~w-xyKWdL00`4YhF$JS_JO`0Kh00t)jTP5KT$+FLL) zOPY$cQ{WqUudyeL$-Ka8Ujc0rh2O;n@?H{8cuL{XJb~PfV{diod)4wYwf%Ly4*&E} zLw%Vj&ifv(7>1fpqiTg3)B%U~6Db~Q7Ut&HAUS9ZH>95uumATFO_4#|a9jN6`0K!^ zb+JPblBF+l2b5z#hX=9uiAFv~!U!7$l9E z0L1#jcxcqx1loUb_ZkWvo?Nbje+`BD93RanI-PK;duz|pqcYw>X4q!Hxk95auU@H` zap*CpiCsbpPw2mEF_g2E$-06$WwVUKDQszyPta|1xM`~r@e90bTYEBwkVBWRy^Bi} z22#~0X+5tKa3S-a>_qw&>@iXGn~v(?6T{olYQtFuGg||n{2Lk`n`)U`iMgnKVhY{5 z(E8XOEmp1B8B;%Hp033a*>!b`&){KOR<@sF>D0-ashPImH$kSmq_^My#%Z=pww7LajfW+nF!m~VKvA6fR2@=4&+H1T~ zR_Um8;hwd;Xj;)t-~R4sbA-27knst*l7tXJGMA1Cs#k>|zm?f{%gPK@GFaipR%SQz z52VQLf&c&~58TnmVXbGK%g-4x0pnK}qsA+^Geyp{rTIA$NMk)Y8$*nrwbS^5W(mug z5+;y*hRqU+1^^k3LV+>MZbC*dYhYOxuazpjPW3+e62(bV*qy?~A^Z>rG!eCaM?iUJ zo2ZgM2jE3T(N}QFfcA!~`dnNbuAtz$RMvWG_s5pjcGHttzpEZ6;8;3P=s?SUy7auY zcLHbtGcqGcBXBzP z_LR&*LX;Q_nLzA3;3_=*xHdezz}79j8F95q?;(+GBb!8)qh&0Rb*B!1HCP($bLLJ} zs@|F7LTEdLPm-h2zM#_e{!{m>#Z$05&o3<4x~nKEHlO3)ncIbdot+|X=*x4N2$Rci z69VA}uaj|+;4cdO2Q7xIJ0XbQE%P@q|OStZ`SwDK;r7L~)9fS4HU~vaH1b2FGo)Iq z%++pM*;~0L-5G#+@j3Q;1i0}orjn+bn&w~54mv|($9QF0wSaA9;X;E_hbL8}Pr6=c zUVBQlD*gZ6dJP!^Avb2(@RdV8>Ftu6o?$5?clIh~ud`2~yDwb$IBl;>W zlJ!1VUP>)gno%-liXOL0FHV$Oa0z?Vk?G`XJmK^i)BKy1Sbt}VaKOse_rjzq-619p z<0AF9KQ1c<+Za%xPB9B3ZQIqUuzU;B%9gg)pRq95#N#lU+X;h~eCA|Uaos=leCZFO zDB+@f4wLuR*Y5-AE`E_<({L+sseQsMk&1^YwYO=p6u_n%`b0=*{<1)SuW3vB)US(> zyuXmkeS0G80RwGQs4Db6bQPSzM1mh_Nl5QfeGtgkSqNKZQ-1yWrljzm5I7E(8}Ovd zbT|&ZsZ~@|u^>c|f<+z}tWv~y4HbyA=V&z>jf03j6n>aqL?_uZ`$ud*i z#9dJ6I)r9%0bKxMMYJEqdz?c-0^$yvm*Erwjq|B8SP(BSukpi0B>|2_fd3ZTH^0ao zBJ|7Rx2o&Y}%*S+nhtmMPE71qEp|PCeF1nFBM}!avW^fhz1> zdCk0M#y_LOKvaP{_vDB{^iqhkhDyn&m?y#y)7_dzPxB&ec&VC*rt-cVJ!KhVa&cjM zfQ76~lUaSKq-`1e?$&a@|G)DM?1d! z-O{rKpw4G1ALUaA8iB_c*dEe%981k4o^om)=M_5J_K4BJlM6F0e-rg2tDwNmI6C^M zp{m>ytPfy%8VRDYC~Q8S{{;I(U@#?@|Gn_8*(3FA0*wk$9hLplQ6p}n%gp3?6VE3r zeM$Jh&+KYAEG)1;?-&s5owOh8*M72F-4${F1@bAdyh^LOo!j35s~={}`P7qqa2jgW z&ilC5?uv&AseM#><1e>JcJCfNK-sIkytpMPzJHA|0>A)&{8B zLq40y;qfyYZ}$9X!gLgwvI{`(J;?s1B`>|ay-oHSvB*UoA$Oy`bk7b6T_W4}<}Gd) z84nSzxf@#eithUP?#(vJ`Ft4xWfh#ypRVrK$kpV|?)0teBl%&G%|Y{E;s3PX zp5$%}5C^$@Y)4aEeiuks_IeKmdHhz`x5kff!7yEImrmQ~Zi{>u08OxX_({-5EhP&J zP4laitKkz^ZlHK9nL6;h?O<%37xdy7?rm?xfa=T(0OBniPL#%@FGbmP4<*82z9nVd zv3U3SAdAUf1ShVR4#(n*k#(_$(=D0Eb6tnUy@5q;a=fd2o%4hKbx5n(^#CwGrw6}@ zK0z0F!iJ!iUqj=?T?BB#$#0NZSPU0XhuolrT2g&aPKK#M(cv9^;;DK!Nkdv5$>fe0 z0iXy>t>4xR^ePH1r?ZI6d*ba?seTe31-?ja+E>plHbRntllc8`8K3C*Ce|kkaXh&F z#mttXPNY+3S4C27f2IdlsmwT*WY3@nz_ zR45|irIkLojMywq;>ogZgUk<>>VCT0RS-rEcz}(&aM2@i zGtMTAw1=a>!rCM{wQSeas@_9wXW5ylgeGi7F|_;&N7jC9o-PXXzNL5R%1CWa)4NK} zN-C2oKulT96uJ4unA+^b93N~?Fq1tvu?A?x%uoYoQD6tHcn!Kb{UtuzKe zTx(8f@^ninp1N(0--}B))-L!jt^|15ZclmaT!Va~y7++f06$>T^&;EUYbVcx`=l9G zY=OYK0t*DWXm=aLu)+1d9b8S>%Kr^6msRMSIvXK=4iZAw-H5Tw23wwBdSpkB(i@av zB|73pSQzco8Hc@Qkcqaxoe^PQdA)PuT~CvI>A(XZROO+r+n{)0IaNclQh!o_JjrMu zlskLfDe>cSJRxL~5Fy%*x4LUAGoY@8JU6 z_6nZ#5pk9DyA9(CgS*`Zza@)Wi;|ObkUxevIr-Bnv)FsiBoXaz8R<@}i9AY*jnS*$ zr9rWlfm56Yv^;{LAfw@AwL~C6>Az%abbhhu&ff4m0l*GfQg>aySG*+9LSUYRQJz+b?AnR*rZ%W@d~JCY?%;)Jk3BN5qqO1BOxY|>5a?8~ z&oxPk?s}_2lW!+7(!%Zwfme^GEU~v zav7$g7h#>OuxVu0mC8RF;jj#@+KdmG{u0w5VE2bCb#+b_)6vz}TpcE$S;hQJGwONu zSxhCcR|8_tSb+KYGt4D#IW?plg&hIV$`ldZmKevs{D{o`HONCZ`{_@=Mz^Ps`%7f3 z!qFmapm4jr;FNF5eH8j-z+R_Q^S$BzrPkm_ljv56l)%Jc~9k(gGzK{QDaQBsEesBv>z zG`}ZcJNZedboMFiqV`reRB(i;yqdLRp!g~#XL;gI+-pnn3o}@!YHVJ5#b}y!Nt=bW zN9$4Nbvq?GFCn*+1|u}h)RABjv%Uat^rZgumx~L^MiUkD7!U(I-nZyF?IDkNfn>b9 zO7?Eng6CM?`sdo!EG}A21$I(}e8+ES3;}Q?R`!<<@6fzFQvZcPYuPZeopy zKIgQ9Vns>ayOl&95o=9Vm{l(d@4flXqi zNy3i}BFT&y zsLQckQ6v8Qs=@xlxX8L|Ih^Gr9W41TR|0%f*Bd_@ntOiyKh|)6K5*yGUnbFGIYpd@ zuRe>zX!KEMiacJA?~vT!v%b!*t>S%})PSOIIK8&FUjm(Avqlx@r(=?m-ZIE^a*7Md z9d#w->%P6%#YLgI;h0?sI%eFp>4{>ePL?iImec$-$K{V_9-N$wb4{I#-=AP<`Aqu- z6G*SNfh|&&}S%M2jLYr_)2eaNDQn` zbq$DHAE_vBLcQz|Io1~$QrqY075-{L=%{_Q+3n`Z^2U7K#>4exUiiL- zc5GqFR9ud)FI`H?Dm0UA^ZdD#RQlfmdfeH8RpEfE`2G5`XSTG^DFLHis<}g};NX=z zs+rf|XdK&VORqn#?(BjkSXpx4kWu2e7drHCZjzslZ}{?bSS81Bv<_9``s*-K2E6(f zZ)1C=uSh`GV+m{E=C*e<2HY`P5$c7UqgbdZheqV$;3}^L#L)1Ezu)@qm|HPbyVL9J-cvUcsOyI!cozz92?&$66{Y3b=%1OH;5+O_ThwLa^+_&DpV zuJr~rHHN7?v;{+0US6Y_iOGa5ZP)m1yweAz*vof?K(49&3 znkbi1_6w8GT`-jkx@QIcSo>#W*|cB3O1cZ&xnp!$zjn?*mTYyVd9VCv$_Qd&lD5oN zy%X}v&}wpYBiQni_%1*K(x4S8x)RcznhL%FHK>mjFz38{TIpeX#MW}iw(g6d!{O^5 z9xUybm+3Bf=4VvO@aVQUU5Sw$+AJ%HFs-g+hR#CvO)|O)RLT_8=h29gOddecxyq*; znCQ1tr!N!lrlWDohgOkq{SI)4xoCgrK|8H6X(1};8aMeI&Ep+R?gS))8z+?6q5onlW`IRM>-5TE{xH;jI&OOJnr( z0=y1B2Di6eL!ZA)D$m!IH;YMN6^9CCP&YEXfZQgNOLmxz4#L{l1hYnbA9HB6UT{Vu zmUguCGr0Re^UWaRTo>22T$K9#`zm^gf@DxZbG*H2mx#n*75}A4Tq3z6c@_-)k!c@I zHrjFs$8+!L)JCC-FBHQO1MU0XSbuWO$!TFoV=#-zaqac`F2egZurpqO!>Eek{@8zc`#vIsPNU!5{p1+4*s1oFF( z*1FQcOo6i@UpM`m>n^TBX0`i%HpBBnPQCPHO&y7mGVtI-vHJ@0Q42`|`KKc3+i?l!Y=aFQjLum^Yr(1U2*9mPdE+O z{obi%VrvxQXykoX)C&xjXGTHRbLL!D19COfk-WTb`y{w%DvMs%I8V6h2vf%y4p%v; zbi>50z3P5FTQBw}v&>q&uJyIvtOVQHX6EF+ z=9E8cIhWk<5?q@%TeYw3{c@s1KqN!DT`B{Jql>}}G+B0X%5vg2yu)oaxBkQ(S~few zkzvEgV}$}gcKS+8hWnXRrUO2Va0&cb*$r7%&5^LKOqM$`%=WK8O>s3=e0FW`Vz-eN z3b}D(<$o`b1%@Bstj?kE@L1oNRLxAwJ)jh!Ll$dL{anmj>w8;o(y>H?UZEfqFVPAYa2-`}n~IE8?$Zdrxmd&>c|9Z^GRjrG zPn=DwB&v*B1HUR=q(^}kX6N7wWKHa*7KGPsig`mWPUMq)PH=(p2`5hM|0o5aR1ebHGM|qqPLA*n#)5{=?md*}l|7v_ClE z5#!?Hwf<_rI7$u!_%Smx>chyW@0>AlI@Tw!=(ekfj)iJhBrh8eGOiP>#~FMPHW09n z=4`pkRPtw|pM<8Lv)na&_HMZ0a0eggZX-|Vr!@JGG_(Cwt!WiHw&B^@;*smQZk05& z(MKrhO(pH^J$;YW`kFJQ5Xe4_iZ$P;Dd!HbLxMShH*W0C2Kt}E_dw(6k1g0$v)W7)B6nyUv1t z{JIvJ#@8@-o?U_JDnYXo!o*q2`!Fa5F{U;I;#^uQHiR9{F$G*m{>%p=87kLYX%{6d z8@50)(U6l3m)q8LE%Uvwl(WYu^z4VeGR&_%I{lHiT3F`Cp`vlYOn(85T_9c#K^vBL zre5A5no0jl*BJ5ik;nbOrXj0&9uy>pc}4i4TxVUfD6K_my&lNnwBiO~&=zNv&dhpu ziNhqAnZ71}c5&mEFp`%q-6r}Vw{yJgaoy%jnLa^+jBH)x{m-h!Vba0|R()e}Xv5h_ zQ_~<{r2`rC&36t8zRv~15}vPXy4FTHfPlzHTFh3~xvIv;o!QWE4sk1DGG?)ae9`*7 zcV&v-MQ@=FRoA0j&0ePY6}_-S-_57Z*p@M&e_dN~=4i7CQ}LDF{%n;P{dulxA8;3h ziyPeY5A9dMT!<9duFt)`dru8Tvr|H+B9XU@Kp38jJa}63oZz!KSxh+yk71?seK#pz z>+wNxR)zLMmyV<7q4iUO%HW8!?%+sHIA?m?Fnylwj%ifh&b(W`%p3<*Qpz3zehz6e zRg?RtPUT>gS7K6-__Un$;-;sLa}ySCX`#^#WdBY@qRRiB2zZM6$RhT@*;K7>#W1Z; z#|W4}b=P7GXJ;qhjAX2{MO7JUL-k{ji@U}x{F@gIWroqhCv#$4V{AL3Zh_dQ+wD07VHEyuiT5^P@p5>&Ww?x2ZDeeNIk;A|X`k_J;m4MX|I@ zDQC9{Vio(hrYwAH5xxvbRZ~^Prv%aTSXh$D;)65k(z`G@rMTIs<(oPj{aKNRH!IA0 z(rN<;saba*?rLZjHt@@LnJ>0@*LixF6=gB(Ky&mk<}zY$Ij+$V`f_*s{L*Lr<7KWU zwFIq=)FiGA$R!5Ok?0di#fV}5JpJ2v@$ep1n;L>n~GCVcISSGln}pJ=e`FMH$HWfEctn^Kglxxv2Io zKlJdfQHw=i$V0=HWE9Y9tcEfznL(+OGU;F*e;@aJKR1->Iqo4^id9lTgM-~3tyT-2 z%Q{vKo!nQ=F^qcnQfRVL$y=}DQTZl}V;e3ewdmp$c?q(N1WcTv95l@K>d;C*R>=0d zmu`NR^Df8RmrfY!6W6DFBom>B*32>7*-_CqtW-P~5zz2vEMut#lQ@Ft)PshE?3ir* zA=_FvTqxEESNQq-FcABp%53!e&H=)8gHay~QIhVezL%puc;fHhn5RAX@=JwXdXyJa zJIO#==&&;BU3xdZ`ZIH6k)}c9&biI_9dgm}Vvk@fkIQ|J9d_zD`{|qPhMax^+eYp! zfT5ep*I;=$Jqf>>uM%;Mj7!b~U1rM?^Ry$1 z%9_uCvwrNK&-+VTdec}S2pMGT&I)yzlje~7UpCAzm$6DeW^0)A(Yvy%ee$rPsXA?M z8PiI8kW0(LTKW7eiYVmHh^%Orva$_Rj#8y>XJd;qN_n@;=sJ8QjMIfKEeu(vMMd2v zm4$IYmxwl#d^8IuJ;+r0z7O8o8_iEY=6KjvA)uIz!Xd<&^^)l{4kK03CaoVDB$C<) z*1L@3h+44fmlXawS;EBa<&1PpM`dLv(=X==i1Sm8hNnIpkOsB=TKK+5T=DjIE=E$!9~x(OLRXTPw`Av5l|4zX~pTvyjY?`$w6dx-K+k zQZW5^fb^I>S%+k%L2c#VI1Mn}gfaa|N;q^ME@!E;ts_^Z+aF6uM<`J%L)l8e_8F=E z|A!b-rB}2L3=F;<8%P~|nz4Ry29nb0ajo@8wPVp779s8Pv9y=#xmhu;Ymn~v2T9YN zonHu4fzWH55^AF^po|J$Rk^?8sX}R~2oP>#Pwtu$z z{aC|LMdp$E+4{Gk#CEmB**U6H;*k9#`A}JOm7lJ+UV8fRb30$KM;vXcOs%hIcu%uw zrbW3J8@G}PstE)JO1?=`^A{J7NUC7iXb-0xlqNlH$#B1vyBh3rssZsFeTgNt!1KbO z$$$4ma5|-;+2sQRRd9jnaAW_^3Rs~4lz#g?MT8c7xxz5QGC3yN?IOz@^D`%R4ZXM@ z57i!}jXdYlgu4n`QIg$N@pbAROmA(Lj!*&vso{!#KwpC$m?Y8jr=Y#hIF0q~n;vSX zNI5ltIhj54=8b!90TI+T7(S2*Z7kW1zxI~Wed%$&uQ~w$2aghJku(p>ayvG$1eYDX zx&7sj>o-$6t)>L#eiO|_DX|LQP1$chS29~}#9F^aj!P&nTOv>jckz=e1gJB*kZW(Vkc2wW3>!z*gEZ@>E4OzhC#KG8G??Fk;1i7{Rfd2jM*hZdzSt{vJ8NQt281Z{xX*pMC9FJVh@4 z?(G%Otpx7tMc{tX($ew+SXYw^yFSv>)9Yl%`Jac9G5zymOn1r1f`Q2+K+vz@Q}96` zwulC5U^Xo6=~>m!2i#6S0OOnoe0;+N^D#;J$Ye1eQsDDL5N-@q+T(dG2o#gW^{vMv zK?CB?=eWVWyY|tEiMIs>D<3EN0}qz-!}a&)N1KWHy5O^~x)29|_6#sH19caScm$Qu zJJ9zMr_KI95MM;g-_|N+(0W_tcj2DBOUY*$@Z$$v?jj357ofZZI9`IjXD%+KV?j_w0-faVnv9B`OzWJ5UXh%E8X6dstpvbz zl=Sr#1v1g>rt)N*b1$ebsj^^D5_mlAc{@0+GBy4-GS)lh6GzdHO ziJj0rdUO-mxXAeXH}qElbE02BnNiLUGpg$_KcZtn=^p{#F0k?$v!C}$KvMB3L2Vf+1lYk%yc)6MPe$%$D(nFckF4~da;u@X(~0J0YyjoU%E^^jo&PsPQY3$y5M%mNq{Fx^dO>d)FD{UqvO9-;$o5Pk^i$$*Kc4IXC$1y1iewLq2_K0p4`% z?v|6e!1pR7BBBlW#r4;Dxd8cD24P`}KGB0u6T7`hBEi7-hfY9%6jY%8PkUb;PUYIR zzuFr1ZqOu^!mbQOD48NO5;A1W%r<5$Lu70>&_IbKWJ>0#A~LQ@DpSTqXeCs}3}v>g z^*f)i_j|nGe!t`Wx6HaFtdifPh4CajBLujREDix zzy8vVjY1Kw2RCA`G#XxNcaz;m{`i2X!K{D&)VBG|i>_6TJYMBh3rmhC?Ei1g!YwFd3ml@%M;aB$qj!#|dh+}zv@3S53mQi(cjYPyY0+`-PPIa7VAWgS`i3-4h;pS4*<-(-g<*|cWUCTp7Do;_>OaOrT@L{A+Pr}yYuj2(Rbt|64-;O8_vF(I#} zwg$K4N>V$F)(Sh9VSkWsc5j%tqoZS&6`t1bnz_h+(uXh~gT>qhSj?4HR;~b+?A*C? zO@u57?e*Ew7IhO$qyZ(v@6B$RBJi-KReLvipVhlx-#DsHT?@KlTpr^~zlND^R&i&S zg!}OCg2p8Q=nep5#5J(`_DA~*QtxazYJpPk2`b^%7h3N0m&sD-hM!AZT6hlY1(w)h zUtBn_!`0Bh0GEkBv31s${n4R^*b0iq0j{wu3hDm`|(Bq%)0BwceK;403^>JsQaGHvv==WW1lH* zfL!2y1OmhEdKtnA#*ojiZ{R=kwQBQaKfmoFA}r`adG!zCo*ws4m;T(TgGWYg0EviZ zN2G@Zx9{D%x_O_a2|V(Wb+dV}|DQM=-0zM`A@c4J3uKzPgHOq8u?X>q<2ty3$#_qj z;AX#`h)+yP>Vk=zC3ywlCMk(`{9BvVS;;29!uKWbwSx%b5RKRVNK*k$M=bz6hZby+U1#kkU!;_Y5cBy=Y(1)vM4#9P%wyZ5Ce z*+n?s28?Xy<6Dm2lVX9qdOl6oB5e4>#m&7aNYWRdY@@L;s-EF}DLv)l3flPTUufX4?am6*339dL&&g#`t!kEt=s zm=k?iL`t%Cj@Cp=V+-9yK(HCwc>+QAemvB7wYRb?tIqn7$CwR)G;mET+i~l_9|J|Z zdog9twI{TqKX}0AeM^aMEIQN0f<5Rk@Pa#4v4qb$;+j0e7-(?IOBUqyKEz@aeFz(G@FJoCPEiW8&c8kl<&W@3aJ$brj4482@#6 z>V}$zPkDLyaTOK9?lqRu>LYE8PUFb^9Yc83IX?N%Ct1EuTi41i(7C#(oddMf;Cgrz zJ{9g`elUd!S0=+o(L(IoljxG{>}Z@1iEXZIlW z^8A@#9z1{l*sQul%9nn?Z#C!5moE=@xuLNeVZd%#6CDVH&d6ChUeZ$=$Mo+0{!3XU z)60o8Z)us>;+UusbsaI@i2<_6+p`>Qd9_KZoJEtJH!R!5Vce*6clQe<%stLuPr!;g zUz7G1y*O&V`R5q&-9B__={8~Eb8#fOdf}KX=|?ijbhIUHgX$$B$uw{*e&8?WW>SDDH8yWO(Gc>8p8Vc z_v~J+rD@t3hY3QA4_7a3%{gsG8^ALY#J*f^?occDzpqYdg6x*BC79H8r6bV)#M-{NNF@ zI@_|^>G;6kU+%^^S2SQcYWqZfJ{Xd~$Yfz@!X zWWM$d5qc`e`gWNqnLeFHDZF$8eOF*Nx5<$_D@G!6GIGg1c$k8n1pE7 zV=bY5SMq;_&9n1|OPgm^xLUEGIq7T2Ykw`)!;N{;w7VGbjzo~iZ z)Ty+uG7}%1`GkaYG&S#ldAOoZ+9@FLjZdpjS5#EA;@!I@i zJ4aey2Y@FV%Zw@~4=?hxZTL z&4qi>ABp86W8+?0}>T#GSDMh{of8JT^SRkg&g0g^Qw2UmP+$(BbgPqED^PQIZs2`fKtO<;J;*K!kFe5?GkJzZ?k7?FQ$FN7i<-W? zEZ3F&5@XPh2|DI`rD2&)+dQk?_Gpa}haIS0_`eYYR z7c;x$!!}2<;cDs8TG6bv^WQDW|BDHURD@#jI_$;cushYQ4JUUeC@8k4r-!G@pP4;1 zIXPL*9=Cx{K!6g=tt2=j1A>43#*H5a9T~&0Gq$1AraU^XY|KIT%fQY1pV=W8IL*Vbk z5Gq(a5o9A`2_A<2O+j)2o6crb{uOi8HJQNFNv=jbdcSibhcE3cusMx7HXqgpbxH+LM-2d3p0}9F82hWMgBasjtspx<^x2cbBZJ>=i%1+`>#Wd$h2$ z%sua8U~nI8x5wc8Y=dg!N!vF)loL{GupiOEr3kg@b+j^0N4M6T!gKTA)w8^)%O{vb zioFeFwLwwCw%_R?*omH~8+y(?(aZ3F+q5f+Bi#&bZfIY)r5AvEnK6U5r-NYp;Sgpa#Gos1x$ilfQ;fKQI6vJ>bnT-Ch z!=W8Q^KXnDn|mV*2S+9ztQM=Osi^pFg3LR2IKE&uS@?evX?QI$lNOGD2eRRnCd8aJ zGK%i&>$?LBa)*5@kJvV)*TG$#1t_insBcEq%d1d;luXUy?ghL+FpGkam;svI3v&@Q zlwEAaH6Ri$eT}=uhT4q*V#Q14n@$}&c4d5!${$vlfUVPV-U2C%mseI0w@^`0(FIoC zeB9i}FngQ7ubDr;=ltQ{R}bu6%Dknn&Xq)}<=$G+mgXZ~CT=4|zqMn!yS)?IPJfG4 z3pgXG!H%Rr?Bol`q!j>^UsK>{}sV-|nmZDi>J3(b?>#f~V1Ll+6%qPHC)Cx0 z4FtnbF$hIgCn|*Sl-oH_W9$m4nINQrTlcH0N{`)nV}np<_K$DjR4p)=MMwlcT*l!J zGdCajW;MEzXAB{#RmTsloU5n=N?1F6Pj9D+zL3WJ7s^Ib~8kxtf;E8Yi3)uYE^1V%1MCw z%2G88iv${t#=^$tQ8qnGqumJ(ez1XGyJx)35$vFrlRmsGkVCqF`ep&qN-pRh1eLhL zqjEnPDwiz^3`D%`NKr%tn}wB?EsCA%0ReX?HlY+73am4|Hl*1{K{u_idD{148D}FZ zGxMi%t%dA4v2uRTD#R46zsEUBPCk{h<~ZQy*wbh2sOc}z}D zF}R|#(zV@tLLqptWX55_RzVW$L#Qt@f`iv##`nIH8qzc|F^QiD-)rWl5+#`c;%f`Z zwE5FpwlpwA%=Ir$`FHN$|M2ji5p$3(?Ky6Zb&BQ8%uQgB6lh+yx-#=;=q^B9>3_)* z5ne{cKi)}A&$Kf#r+#!3$d;O(ZYN?1gJrgTmH}dpAHBwyzY1d^>|f}l0;4>HR}^)S zfL_jV^d$Fy4DE_S*NuU)OT_f$VsHEd>f{iIk||4X+Q^%K0eRaw5(}}y1%mem7M5pi zVs`IiFJE4a`d56jtV@p?I+%;vv}Ie}l9~L(lyU0u;X=0|ZO}HzUli*TUS37u$C>XA z8KApygrbttQ}gm*%bD(~@c4%hxxP^gbsURE{0Nea#kBkxW3Q28)iJWh=;eNqfUUh| zudN{jH>Y~d57@BHe`1@RqE(j8@y$q5PLfd^8QIdvFIY{ivy1t604*xN7aJQZmv@Qz z*I(7t)M~ZTHqL*#Jz!#5nB zCopng-&!Y6-bRPorp$>Y^Jigvh(xeFoP?H#M*#rC!66e_4yHf)PyI?e)PN#_K-e@& zYtPh+Z+kI#m3~ExmDX)tFp9uLUH0tJBQ<9KBhbm4p-kjJ>70-VDSwE}zs&B*bY z^~g66RH%nIG>R3$)4)=W6#jktG(5)#PLC%gbU1?Q^6~R)El^!#J3fW5mqT~%un@tG zF&Y~in=a*~$KnK)f{B`#jV%>Qu1>IdUK_R}^vV6XfJZJCKZ7;y-}vo;tjtUms68Rb zs4+6$ES#KKXS-gX25r`P+}ea~_!*!z9N1kLwclAzJ539P#ULFjR3G$_>o;#|1G-P} zghYGv?NU%TF^R=e?NgH%zd)H%1zu}uX@y4n(x+}pZk->GVZJ$i0FbaPso{LtSq!EI z=sMt^+pyrucT^0T1_tRB6^doAUOkIRn>hxM;M=)#*qthHJ33mhbdO-0FQmNwPcJS_ zW0?$@F&)(-&ktVJ=yXsnv9^W0YY4e;5vk`gf)!nrb8u4jAD%r{V92^L#5U3u>z`Ou z=GZn1V(DwpCal$xY3a&mcF+2Uh8Ur<^XFQt_5ZEZ|FoG1u-hnMC=|-By?g7ZmYe(l zl<_Cy<#q`Rs|RZu8D)Ygs|LGFP~8fReRVO-Y;0{seQ1+%--vo21Jj6#Ou(>Ul#gBc z2Np7uAO`e8-+-POGx)5F89!#!-MfB$IRRt{4d}wkO_OhA>2XNEsUWn|@7zk_Yinz5 zwH79vyXb$7_?FG^MGA5t&z?hde?)m2@^S(Jh+r(y(id2tb@*+`!2=VJUi!;-S|oRkF1 zYA}+*nTGm$O+CF-2;U-74C3UV5r;-aW#HCVIOO$1^CfG4S*ZJUcKlRjz`))(8CfNH&UxqhEoS%nB4)Jd{=DBbcLf`*0X}-~2 z18)MN;s!o^8EM55P};qF_bg{#LT7lYjVX&J5+ZJcALI%zlCM)8!dy;w4`b~%6vgHu zrlrr?7Q~A*>@E2xv^E#Z8&DmW=k$IEUm>EdBhnNA3NV(2l$Cc>Azuv9z7 zoX9%L^KA~Rt=*zBHmqB>y>w5B(+LlcX`P#}0sS|THd=7~{6$FqWSzCY>F#DF;^4)_ zC9PO=Nb45T&piP@i+bqbnTK6iI2ZAKWYc!lAoPAi6tJF&BNR8uQ%Kn zBTb4;>twB67*!JKzn-X3MwBV1(9rw6J;EfHZ67_T+rS0pk0>MEvvDe{TmSiYuC7l} zWJwmRBpF`KGjgLz70kcwlip4VpTi)1Y?9`W3crnpZ)cww)d*B!ap(jIJs+mh^YIg=aw_lBOKQGs<$|s-z<|o zX&7aAyEv<%cH5;5YdJpFS%?O^4aa(n_rz`K?365EiBUg0c%OcDKm$4D^C$}^1>XN>NY>++VK zDK{6c99|JzUb8*K_mMesP!&D+gn(Z7!Wvi4I%dxO4`S%FUO9Vyomrz>(xN*4%#8jf z@AJ!6Bb)nf^JRP`SEZ$8!zr`CH%4-noMYM`pOI=YL}%g?%6ewU`1984)SOYfw5FqN zv~yqDJU+$wkBz)gbxG_A_-I@#w~x`y+nnQf_x_QrqVWRybcdg<=1C3v{WZnZU}GVT zzB@xbDM>MPMcvU81!F4r52OTe($w;$+;%aB*ILleM2vI3wVfy^NpBBi66M4)Bh zzb{wrgRp_cmoL);nsJ%80&jDDlL|_D`093RSXlSR%|$~8qnEz+RYre@!F*_Y+zAN< zg4++@s-d5aciqCq_Ss2t%mU&YUskf3P?{FRf$FBuXCz#5(@(7JSlie1t-_cRC8e`V z;fY?U-nrUIMOW8z#nX}EIm0beii&u0Br7%3LAa^oOmcdWK}YdRY7FJd3N2ytWUE^o zfvWYp1eb`K~pI9H=MlBQ%_e}Uvt|97? zRyd^C+&EK`W#8sJ*8hdS$e}pQ*jw3{Gc-9VW;%tdH*^EJ?uO8;UJLpBQ>yI@Ur36D zR2S*FMynTf@5;@7N1Pff9>s)sGaA0{bJM7q-1WVmL(ob&-$vDL$F=}95^1Hb!qG#T zLEelv%eT^<%RdW+)69jJ9T#v{UC_l`euLNE21?ScadwH=els&ogzY=6=!SHE%bp3} z$@sw<+G>URlG*cYXDbu*h1xwCn%>@;51QJ*1Q}}@sHf)F(9~nR&ne<=_SNmsB-5r0$_zO@#~2-6r(Bhv zLMO&%F1DxsJ{=RDLoum!3JOo`X^{ix3ac!$?-mW;|2Wk((^}%3BxsPVKFuC7))meW zP|%?bkiAq^P3w7Och`n~ATGyj)Y|*m+4zC6u_w(fV*^6IacfJ3q6&pdTvBd+Y&J~# zeto&OElpy!rplk&aLVhuX5hz6N6mhRq#}#9b7-1(qG536g_d#67pK#2GN~Tp;l9+T zCND7>dHn6#y1Mz>w-@!Ze)-t>hjJ*WX5)@@9r>_dVsBW_xV{N0KlAoW(atwk`@gUF zRjub!fRZ#%Sk8 z_Ni0%=#JGfqg7ea4Krj$6c&@mEL3Y4p-W#gB8_wlPFZH_lD%ycqN5r7#q^3woe9pW zH^(mf_vfz7vmWn~rHDInCdD0D%0H5;zo98R$upp#w-LRw@65J3VF|I%w|iERA=>{Y zV&u7RUy`$Bzou#2V7_B&Nt91gXC|3`J;vL)yU|Fg5WI_hVdJUT#j*yZzc;!Q`G#jslI58kX?`aIfT z$WAxBufLzdD_l=yrF7T#HypG2^@31*`sAH=QE9cc`i$aurA4*Ja;6H*3_R#rz~!zF z9Il*u6a=$p)x*knm2-ud-c4w?rfi*2XnD&Ifceyh)fzDF&1P(|s0C6@#n3-lcvWdcA1Yb@ct- zS=U*AJ;%0WqvG^wE{0nCUX8)~>5m>M<$Y9+JfJIBT1ziVteH6Aop}5_r*HbG6i+{{ z=em-jsKbD`3yeOsqGj9f`Y!c)@t`}JGRmo}aaQ4)(WxTMUA^s1wvI(D)>U~8gPPb4 z!*zNz`6x8Cg!a9oK!@++cfGwn2tSrO#JIvAyF`GkQDIn$ohe!nkyI9~#Vk{ybLB+q&;C<$M%b ztz|Ch`B3+oZxY<>r)0A;B*pXI%8(<1*@lA7v)x(j>JuEoQWbR3yH*R8L}1{i=-6xv z3%dNV)1yI9#==jy$C*_4O@SS8LvkEEnOQPlRQayIk49r+*xkk#+OrGw(t7QBuYuMe?KBaA+HkKNd z?s3U2LcUaX>~EbZ=>P1LpEEht%_bem8f_&O>7vuhPQZN>-)@&>&jl#;HT25v#;}@g zs*_EVZDScDd1(cSa*neOp&X2snz8Qt6M9}UhBw&Gp4Ya$m)^98onIm|(y>4xc=Yt4 zKd~<6!c;>qN%s+FzSGb<@RJqVmki#k60V|}-@EkLik(aTSjjIh$wJPbNa0o6f;)mL zvy!$Z$pq(jH{%6d&Uvg1>91L0b7ZjKj{drJqlIoCDWGi@V#d=RyG`G#dD9e1WcIt9 z%3I0Vlsfj(Q`eRDSz%%bNG2Gj(ak4BWgbROLvGOzIltd``rt-EgXK$QCo7FDo~>NP zDV77T^Ly5-I3`7lN5AviSf4qLCCnqBU%9C&B3in-DQ@)1=v0|@-CYW{i~<#A`AbqR zwt;tj#*N3nI42h?Dk^CisPic-@BL_4>c2|sQt7tRJ*{3c=L_Tw@=W+ zzCI-pvMq7cCIn!*$<9s*1-@*GmiE}mC#)W`fz@XC`Jcvn3sX2hc;5OdOICAa&<(i* ziW_$-Um*k=chh-VP>jw?v zjST75IiRr;DwA)&kL%>Qn(xPoroM02W8KySLjzKLo z>AXI|`m+vKZO1(7yk~NIvKddlzKa`|a85k#98H5O_^(=L&O8DEvwatzZ9Yv?>dL*Z z8l73U*n~v-s`X26v>PLzV5fgtNy8M}x8JKJAc0xG3uzWcCEei`gWgXvY6&d+3 zEmiuCm117`4!&|pJ)X+3VDd2@XYr?_thK0{`tC{#%okF_PRGj12D>fJJr3XR9M3^g=_!__rT0pcbw_lXFz*-EUYB53bL~Rt3$Y z?$11}-1VP+&L+2l5drt1r%`yH+$wOjE!gJ<+37+RRq$1wAX+mia|WxzKI(czJAQJnnJd^!rV>GMa;H=r;QztNXTW z7z;I%h>`S{xm_$(a9O3x%i;<)d1M|~Lpt!#yP3VAJI>)kqqGrOXtIQFHT~E*&nVD1 zS|B4ycjArGS5mpTwo<9_wZB!ONs{N0Vz#X}ZwfLU+*nWVzo{TRqm;NNDot#(c6Gg` zmiI~Ca6se2W8LTqf)#>(r~OKPZR z_C?%C&B&PY>UQiW7Ch$;yi2~uh~4m)3xcmhn%R@$548+^e(R7|*(a0%oJ@` zWi9!zMX@VJn);dZF8oXJJg`bU&8>0luQw7!eDqc3`K!ZUGC%hfEd2bxOCg;MrmHV< V#W;8yGKe=Q98)=(dD!IQ{{#E_l7j#M literal 0 HcmV?d00001 diff --git a/doc/doxygen/chapters/images/eclipse_hello_svg_graph.png b/doc/doxygen/chapters/images/eclipse_hello_svg_graph.png new file mode 100644 index 0000000000000000000000000000000000000000..aec8a02fe334655b199e4a09215bdbadc4cf5005 GIT binary patch literal 232450 zcmZU)1zc1A+dhtfAW|ZNGzKA(0@9%ZN`unf-Q5gC0Z9Srn2LgQcZ^0_7%*VK=t+$3 zjqQK*^L@U*&-47x>$Q#8Iotbv*L7d_bv|gSzaqcMa1##?k6cMn{tX@;aTp#RA^i;^ z;4f?s@BRW_2)$k?Y2N^j;2YM_czE~ll;odl`{(Q|*k#<&xjZ{?(}^1B7Rby|c>C>s zR&S*xeou6B!PF}c6Jx!rmY27@@N0InXteJ<%(zaN#AfmH$C~gknSi*D-_((1hV9|8 zHEk$(d*y4(MAKN2))X`ZRwRf$sW_;2fzOD{)RWKLfizT=Cc|i>(y9hWN-$;zzK>4w zZsKC2|9voRl6C*s2OY@bcI9==pI4>eV&)Oq_k2r4|( z%6FkR72GZ0p2j=FmvUc3t;W(mGTcHxqriag!U>(y5%uzx(f8zbn^$<;Bs>rPb$G6M ziK#MCyz+_~cUJbSV6UKL52K7T)rfwywcS%z^PE)%~gRs}SUgQ1OXFTu}_7G!gT-e>9$j?kw z$CG;x7eP~ZuoL)@40+$zI* z|Gw*2!$@dZkfgeFo2*HqLj-j5QTj3OV$SW3e=qI9D}G?xn{uA=`xBN(TqFDLwrM=; zlEP6Vy$OyW+ zUx=LyhZ_4%W%l&GqTzonH=##jPW7)NDZhI#bpUV)5kI<-;z_kk74`@f&M->$XeVWg z^!9GAS*niS3d-+ul$da<<@`0VmCX7ANmfvjs;}?v7jgnD@eS7b0&;8QhS2sITcgW4F9g~N&=jgl?g;gG~ZfWUM}SZ+$!8}y5Ij4yNE16FKreDbkN z5xI79OC67XgDp3-aAY})ag<%`{8vOy>Nd^4Hj7F;P@E}YgW}8haHdd0pN+aCYEp2o zMCEv6ZOwh@8PWJSF9{`Og@79lOCbO16*UWsu7y!OxjF?Vp=p(qg3xC*cJ^Y;nuclk zByCVw?-4VFO*rL99Z&YGZ28hRE%Zd8Svmvv56Y6x>l60cfZd@97*r)CA+ha=hG@4eF6%ZV2j=+B z4b(c%PT&@o1xQFp?%%(kXO)Oj*VNSfzMV>|c=b0Mto^p3Tk@XfyOxuj+*P2KeeL(e zccBj-KEzWQMhK8aNba?^3|?qRpZIs2j13G}U0sqNZ%t*_Il+E4d2z0+uHq@Gsab7L zyBzP##y)k@g|_(dcXf9^juY_B;!+W)_S5`@=2$UY7g)ih`IH-fXw7-RL{YN{|7T;1 zNGtB9@fgIwQ5;fIF&Iqg21U{MWJCEnZ0x6E1@x4bNB?Ime|y*O@qCI{e+KAkqO6*_ zx_YJq3CTemi}cOJ!pgyuK^U))P_A0G2o)341dk7Q(W?qm=?}57CAFqWNlEjeX#c)A zdIO8L3H&0+-(g+Z{$PB-6L-{_*3-U(r<4xei%V3GO0l zLQ9u+wzj`Y^xV{?eZ=~iVNr=PmmQGZ=`wiZTJlizhY!8g&Vqszx2UL0Eg02(uWmjc zm-nIXX*}@oDbW#^0l#Vpq5ZScBxRSm+RR)@zwtKo6E+Txa|9sXeDWW1)6Smb_d|j?p^2A;o-Cc4QqZ4z-^V-0us;UZypifWIk>1Uf zF8a;h^Z@|@-n;W3d{J}BNl61=6=Da5huI!Kj@eyE{9@HlVN;*^X%Z~%71}4PrY^B5 z50cuRF0(t@;9c9;xJ^UT8+PrATWZL@KOwi&q$Oc~-dMtKR~`}?a@Alx*X+aSG+o-e z>@Hyb40QYU%JzKTcSz7hFfAhZNZPa_T}wl{7sUNzYmlccRLMx^w7ob))ADquiHtRr z;r{)oc7G8&sdb$O*)7M3Z%W?V)8u#!a5YU$It7LII%#_Cd=ybBB-AXxCftU4uN+Oy zpA~O4uy0K?%0(9bGl(Z#m1vLe3u&Egu&>~9tOCY)FZ zWbnJcs=2s=W~*(u(@jLu&d*og?e*R@8;oLA@7_L1%JBDR3kO{~sArxf%3>A24=se8 zwdtqPxL+C7Mq8uio*r*P(=#&k`C6QLpapG|9WzyWx0#u{u~%sQ7T>3BL1(4jE$T-d z;2A2^`F;fW!e?9;ua$+Br9vn6@!M`;kL%(BVU_#ld7;Z2hGqPJqxV(R+M14P#=Zc| z*o($*-h)v?=7Nm$F5gB`Aq$cpB%TYxtb*6p*K6#@`RRzHj)uKjk4Dim#5Hz+?cr{$ zV>)^!3H__f(?+-t(9N|OtCkwu5yO`WQs8gjuGtI*U##l|oJ_&!MVxQ&x1TcstEU&* zsE#0+r?s}WPR_{qd4?GKR$2Mc$nA;~xT1Ql#32%#9kv2jYr|oU07vsg)rqd01b&FV zVFx5D!t2-nBpEs>|9f~9t%t0pK`3&b?O$ib-&H{DkAC)&^0#&|y0yl`QGL}ss?epR zS1c>TLQ$|Pg;;oRHP6dZ#Gt;ZuFY)fqqR0kXvaq%@F;t#R10a_m1S zFYNZ`qM{<_#n#j+tBPYz(6yCSYGw8pE-Nbxwwg{M`f0t=XY*$3m{+X1+P1ULs5?44 zJH59i!;7>Ks}m~xoH8=uCFRXT-2%~Zacob0_*sJmQb%(oZ!<9&2jJ6bP}EE2jHl;x z{v3*_=sOK;y+d=IR-OdvaC^=7vf}gfjen|a(eSS?5!dwVo!JZMy;Nc z!O8(U2|CXRe9d1yUr%ppy*dr*>FX1)>MI1|QqXpojzc3fuVz#tmX`NH*os|=@9zA2 zV%Fe~pFf-DjGBW2#$?bu744T2c6k9r{B6gA1tf@&GwIpc**HeggqGdr*w$8K-Evh9 z;o9o~0T<6a-W*kc+OoVx={p-e8Y-;^Zp)yZY4uD4NGjS+Z*%6j->i2La+>DpOJp5u z*$eHcwCGRCX-(EMz2N`ygsB- zt7B-u?=P?DWrD@l_lQ!{(_g)OS%3=(4rWp>tQi($N3;Ru?BUBe^TAJC#vZ*i&hzJz z@OspJN%XTin>5_6E;hvTaLe)Rd2le9N*WIkx1NUsg~{33vpWZ$>U~kokI!C(h8D8z7hTSy_<^poVD{t>BRtF=q@BEi#4xt|SjT@Kg_TvwdDVXPkT;_s|eWsrF z^xQ%CI3wD<3)F%V0e_)pUZ@^)k?8W9z@;8%~`ck8Y1e>9>l=SrenJO!TR{tkJ0U=y^N3GtmeeyA< z^=4R#hRpj@6g(-#D=hTtveJFoQp6RL@8FfjG>s`))tLhRa*oo*JId4>Ri%S|M`I^c;D0EN z-VxHTwk828N4Ie9aE^HF!KxeQ!&Oau59DYr;q~{Go;HGbb~(Tzi77ew`6I3o1ise( zG&Gpnd!nSKX0!G)Nk1*|>(>XKUS6|plBxABt6b7=52IK^SHg)|4j}+Eqxb&(tK2x2 zmfwwzNE8t)Bx6R!Ea`hsS~@o_PIbD>NXsSD!{c?TYei47PD#lE@rMr{00v7>QJ1&| z)D+u-!r9fTQn|ardJVEMXC^Li^hc-irbysK0sy4gy^ZL`e^jy6JT-ad+O%Hq80|7p zb=w|M{G(dmxw@t%e1-d0EZEM+zG-j^oSy#SuMrdjSQ3d73pWUVj3M(YE5n#2>yO4? zK^s>STCVw@Ei^RlRCDk9w*rOaS3QG~(xJ~vPfUIoYq0y9%(c=nWSP(K_3IZI1q6jh zzZObz)bAE-e*S!qHIeh{pii!^KVw|0@a5Uwi|hoGYn3hpI~}Y&WMLLobiJ9*QP9%0 z*s_1VLse%4v7l8Svh{Mysn!XW_B#LN6PG#Rb|77RX1iuA@9ut}xHvyQA2CLH2m}l1 zGeyN41)F#=U|9xwdL>)i$G}#f&~o0-Rp$h@6DW3dt>|5N97_lb9s=JRz`YOe-mR24@(w+am1PCp!Qee1G4ZwDfof8ICKPVbI*X&KstRZqH%Kkdb{87g z)3QC9Vqi}2dg=m)bb|tGM!s~?^dSY;-D^w~P+5L9w%SO+A=4^jY>V*-x?^Cz=?K_v z(DDhbC0w_rzs6@<_a-UOU6h;Dq4Qgx&9&4fJ$yA$W9J9zYWndAGIyvvS5VxX+FDV( z=L0NtFx2Rc>*tV?uQ4P`fEN|0U)>7(A{S)|_sz`UE`fxS0-y~5W`l&6WJ^G#{)ymc z2MP=9fJX!%b49QH2HdBkL5t62vGrnVRQ6&m#j&m}nW{a_Ce@(f8`DsAhS&iWZhNjL zCoS!zvhrg+vk+!p!`geiyfj1Aii(PB#U(>?jch>{jSLEHp<~z?JlYU#bS@#3~mM_m^dI@gf_#?zg-)hArl5DS!T)>ED`i`jC)dyVMbW9(?_Ky`!F@OlHBr zWRX_W@2^i(bHqrx1?Zk`8S7S<4prL>Z9)qvrGtF!PPV2>4Qhyn1l`3b77owyf_5!t zD#KNCFRAc2!NG!nshi{7ow`Mh zc6QXPtYfGKgz3fwL}v<7x@ijiXT!B0KRzV%CSCjSLy?I5aXTCafE5gscj!=q*wIIF zeFB~#yji7;Tma!9PHiI-Buf+9X?dZzE8M9Dr8gQVut3630TNC^8n01uR&#m-B9yv2 ztfZnn;ryJiFQF~DVKI=P0j^+gulU|YtgMG^=9vEaHe@A2vg=4|0AowYW!}vL}Tp9l_6QQ_HM6TQ=K;{g${r$@#gM-P>m_HQlA`Td& zTUf1i@quB3`WgT)SF8H-Y`fll!28n`02Yk-`SZra#2_ar$`dE|;)N7Y@bO`$LDbZo z>Rs!e$scbG0lg#&v{Br>yi`5J_gw=lvx6|h*fRlnRbHO(Y(bO?Uf;BqtRjZP94V=!Cc} zw$AFD!sq7+>UNEEdJl*AA-C?_k+-zOfB4F@Ll*laZ)Qf{)AKadpuGC>F73voX2L%u zx6Co9^9LMdJCru^?7$>xE2z`KXFI#VU>lXr61v$ZxZPpCQ_>W?KA2mzOPQ>MeE<=qQw-$n`h6`%~dw8JG=; z!=qTp4pEEIEe57ohvvs^>a*1$cW7;r^gbropn-n7W{kO~%+FZS-@n(n=ASYEMDB+e zWxRW$q7h%e20wUU=`OM0FLixwjcK2M+w*!7c&~TUd%W64C0K?(ARyDfO@S`*xwiHL zbFZam%0;O3T-PiL;4A?(;-;p|^I(>nH+g_W`XN4^9SG&FE-6952|B^GJM*rh5L|l_ z0rBaw`yGU3bEUxEl8`$2;Za*pKj(_K#3P^Xu>sDgZkDm`@As^qpBLt{1 zz*JQ(jOpCbT`%Bf^+2Dn%EiWM2V#a-TQUNz+Nw8EQc4Xg(5%l14po@Vo)*s$=f5-= zPSrE*1Rk4-5b;$m`tLx|it1iwKVpCUkSEJu5nTtq()nshX=}0%cL{v4 z4eqoPL=jao5o&a}voLpBmK|w}Y;BFE)(w&N`}c~G^d15N28!x;BJtUmsOHm;jjz! zNmV3g{AbCmb4jltdq2-)%g^|iJR_+Xm?DPgW1!vy=Ch`)?ZJnnLXRE|_Y|C~m`ACT zNS*GG*Sg%*+aCOD_2|iOYZMCW;O~a4y?PrAGxhIX;CElp z-Mcjwbg6uP_IV1us$d)#AsM*LxM95djzlC2eieuC6ZpJiV?N2&jSF z+!Y+}wH~{3L*@fljWLgDElEYiIcb4~n1C8*!SXT4qt2N!&1c_qD)Kqp*CP&c^dgdM z`*mT=KTBew;2Bzp<+r7xryo(c8_y+Ftp8ZgQ_`)7^+ftlF{ToK!>eC;E3sd0DDCwK z^Wf`FFkWB1z!_h6CWe82m#?*fEiY5n`GL^GhB@-a9EwYdox%8qoUtnDxu*)l|1^K` ze2*R-FGVk5HYgMl1h<0zN%7xiYdGJp59$4?vWnWESpVS;fPcaJR@4b)h&P|wQ`g?I z*RurDjy9R!kkk0Bw6?XacAR1WzANBF4#0)%bvVMVoJm_R%q2RhCgm;uG1h-R_h%B@!-o+k+oFHi zs`)QWD`%GtxL1XW-Xq8Rd zvf?4Q&UzI53Kt`#&?<@|08&xeZR%?ohbt;Ji=y>|D|7WBk^tq`m@|5FXm7qQwjZ21 zyjhq#_wB%SuKGO~iR&6%n*#DNu%hSo_V)Q{v&PDMDYeJm^R9O->VA~Kwr#dvK`-`K zHO{XRW$8YBnv5P7$z7W&xd483fYtLxTAUa18@Imu_;HD7n^G-=OeC#)Gb7+|cA(aN z{Ox&Tf0pl*T+Ax-=8R{?|lnKWTs6Skf~Cr3~=5 z{)_?-@8gWBz3H!CfA8NWgY)FpE8}?jv~2t>Ia@iUZ02)-HvK+ihg_?4AGgGz4-d@z zpkkp}Xs?LA^Ha1|(?Ob%L0uqu%h}Ebl)rtA71vzzd`K*(_3|=X3U|mIe7dfOLB@+V z?N?W|p7v#5tH7<7x}EPvvC;=DHsw6Q)CTrL>Cm-&rXg2qyUk~z?buzkAe2)I^T-KS z=2q!Bc(!@zm~bjNpO9(8ty}Gez`le7JGzmIYajig!ne9m#5(DeO+HJRwc0wPX)`@p zpD1;3XDE%Izb3)P0D}mjtPB`R3rshYiEPfHW`tgnGd3bSKiCws;X0%K{@eA62v-Z3 zw8?EnPAr$=Gxn+kV|TE;G6h4d!C6C#1+Kol0-ImNq+W9?EW**={;&6QRjdfiKF$f5 zHkdzd*xu6q8BRctSL;l#)16$Wr)4YyPMoY2w)~pUJ=}s$fDg5v`Ag}yN2=t}+_`fI z7F}G)>}R!^`PhlwoK0o>AtWf|(PHewat(QOF_WF8Z0yX#mp)_``qGxeqw0wmQ-a`- z=!T5=(J}N2Z9O`6bbdxaP3MCRVC!LJu+i1I0J&bKJXca8W0v|c>+J0Y zxQRnk>QqB>h@n=t-gLpqc###cDQMdT>)E$C<;wrDS@(M|m@g?S?WJ}sEzl`99~L`7 z`hLsRiQ5))%YGCoMLk(Px=>U>=xw%MJ`NcUKDwe*4czeHry|V8;5u zz_ECF1ITd9DhNPZgxU*`;@GO@v6b5=S4_^1b<)IvZKe|w&=2t+de?8BkMaL^S91wzfQg1~T2p7O+N|c|wk4i5gkIarZ;J`w;Y?ev{y{DI{K8+lE8} ztHirKP)@v!fokBAjA4UoZ@z}i$ehpihnScLoTBOJ9I|TZqq*6^;N#sX>afX4;oB># zCA^>sD~)~Vd=p*dKw=Hjz1s)o-r0l{Sno|gziioapr=tVyp&OwT({nB+or&wl+FqY zbPW+2kn`e+|Csj*eNyIU1ca5a?O1Mq+!$o7I<{ZCS=Gnq>URqLaGqo$%rtluf@wZs zJnx6>u9eV3fdNmEyQ=HVxuhQghZkvK38%FK2XKIcPsL?JuXN(qb$nEXZCP)wWfT=* zvMyCz66wQy7)ACmLlv6(+VU39?4nYl(rjviG;Tf_A5ZOLq@mGq zVdX^o;Z_w*311?Jg^J-#_Ee*rhZ~cP;{ydX&EfMa=qz(Wn>sG!<)|VjpcsuN=^-~OAXYL zfWZX2HkIxC_xEMPqHLQt*B(&uSX|OKdtLp9R5qRW)=QkfhX-6g-FVlZw`}1?S ze4YKZFq_$V+%LA|%QGanVeu_M^q=BdqJSjc(}UiVzVomilr19C{^sFouna3;;^+n( z`tdlg(aMoQ93xKSHZ}G6M(h|c9|lNd^Q_+dsw_+Jr$>(;udHgwK9u5#1L*!~ee4wu zD+?fVfHA;W&8uNiIii+?4U56lz&s#l-ncIwl3M{m=N-MwiFbiZ?f-h1I^9qm!eF1B z?y5CZ?Qo;O!t;n6cC=5e;nCUj@Gb-S6y~1RX;*lo=Knc3N z=zn7K)v$Kyq+xNnDcy8=0cKX>7s4&%)`<^%ZPZ+Cxv*))*h&_oYw1Pn>#ccXlkBNe zy5fqgQx6@RXIip`t_?mP(cqoJW3T! zdr7(@tF31biJG1j%3naZ@D7iThFU97;=hnbPe7d&ttWFs@(KzEv*~6}Hmf+#kGmZJ z`s=Cw&j0g3cKkphfjhV`18wa!oZYMrXXrIw)S}#Bu}+)>P6|kgO0(I64uv_xKp6l0 zIb7O?OnIV`2xk6%M0 zAkXV%`~j}9)e1?0ov+P=m>lWh0b>T()rqn#PO3szssBm-%i9ap?07+MPCvE=K5bYK z90kP={P^+c!NZ4H&fr!_9|q3cgS+bDPJ}?NxIQL*#kOo=;ll@UN*~1l9Y!$viZU3t z>ijj|sM;1Kfd{O>#3qEB`md`%g&Dq?;HgW6(xK++E#{6BolIb9y!mt@)&gcAG_z)~ zZH@4Rb~RNlWBk znnR3J=XPPqtMvu-E;n^@sR^bsuK0^UvS}5nL@{8$1&AAKSJw*%cmeq~K=W_aZR16( zQY09cyZpx)TxSJi)R`p@;DVYn zA9e?aO{vpKs4Dn?i#ll0sPNDQzOhI`S>*#4i6mo?m-iPlO^hnt*Gq04-nGJF&O0em<~ZIg$X)wZ;<{5d!ZOpt!zx_ulC4ePe) zUUm?C-R==4=VJrsSIBN<}JRUpb$6a)`B*=XWf&UibWl;74;q@U8SNqD+vh(OzF2f*V}4F zyWc&`k7*EBS5jj~o39i$U%u!KVYthvWtA;AE8=2*g+Bc<=Ft{_yehIOfNAT6QAd%l z9~9L1#{*=sA1$EbQQ0?;QLW`4*>BXtj)@n2633PSF3IH|z zqq$F~N$_QDvy?4g;1gWxT&cn)4v6NWR3>Z3c!tWSo=pB4+*{3)PAJo~y%$3BSk|?~ z3EtB~c)Fv+CFG)^e1O(1yS=TSA5$#BN%0%T|Jf_pN;x{QP;FZmBaF10n3xbRFw(=j zJlX8FGYeI)u(%JP5m*2k$kTsJNI}<8GX{x*hv-hWaRg`nAH9LjZxybG1N&7Fa>mB&VywWyEQ#;(6A~#Iv z*V110jR#OZ1qu%R5+b>X+kc36^X5&UTcn?I<>qK+H>!7Ws+CHYMzXsa8F77uX2A-f&%;56YCi8*sABDMJtX$j&>z_$ysJ4iW5v~Rr>MD!)tOx> z+ypn^Z8c73EHaOLwl|**aw9_H7or>`Y#kp1sWw$li&bKNvYrw)Y5NE9mlT8;8)W*- zR)mY8)>1MqMjC*?B7M|ecFDNc`f7}&|Mp$DnBTQWA?okuz^jbiz`!x%blm%r3C0NJ z!Pa~>L&yO5C++F>w9seJ0=UWuKy~-70-izIcYqn-ZfgLUDnpnB9oxy?SZ!i~=x-F5 z;5E!^328RM#$9s$_I)NIIM!wMcn@AUzWQI)RZ_lIemtHCS%ebh;@CqCsS)vfJ~lqS zwA$m4W2t&cpjKIrFy8nzNm(d#(!B}vg8MsKWWbavuVr_wJZNuq+YM6q<43emB)G3& zB)epO+N|GnG+#ry`_CE9`}bHt=U)PQZ8JE4o$fy3hbRNQlYfj<21N=85kQ)GbKA+&nu+6n|q<~XwCy2SFipi*hpmf;FeN;fKC2t{0Ih{Hm$ zPTjhc#6joEDN6{|2aZQgaJ3w~@@<4jL~?TD)+OVOo1QgpxKNf>Bmz7BFTg}T@^fgUim~P6{Jhk} z8R{WWRJv+-8MFIWEnvF~J^j5@40E+M>5q7LtlbfxYoCOO6bnC4f1@48zYf>fXDgeG9pF^(FL-2(#|Csrj+y*7b@YUG z`xxmY{BH%lc>|0G@3DzBl05vEr>mu~g(d3BY0#UZFbc9`u3r@s)Klwl4fAE$a3MCT->tYufonfuB?cb25E1EGPzEJnZ15TZT0h%CsK(^BZ@H(x)uQ%h6C|6wUFp}e0J)}pNpbiJbP{p*wo9^k@B z4@Z-~LoDFUP2vBdESW;QN$sf|?u;abJtd21ZI!aLT+MjuL<^+i`+!XD=TAidEf*9y znf`a50`E>D1qCG~O!2R5C<%A}vYh+x@}g3nLGrEbY^r6YQ{T4E2O^bKp=@HpaTzoI zT@Q^2YZGpFcI*pyF*!l>m}F zQqzit*Q+scan>DS*J>T724#66+W3vFIXO&k-n`lOFE%Y`C+ofkSfa5}jEN!&6sUA4 zD#@KvxqrhM-nV*C>4uOwJUnFJ%{zSY;sv0$k4$uP2ERLpG zdNF1S!#Y&hYeY)My79ZV#MJ+ErPfsp}@M`GWW zJ#R&ZcQ5yl`F_mKRZ!Qpjei)Pi?_)oISTSRAbMek&-ref#!={lO$H4B~3@g$q1PIH{{6v{SK%gk#v!(s@1+y445nuq1CSzs%q1OuFGjbKw1!UWO z(J!XVl>#~QAD3nmc-3`=4%sYfoZo#pAWat;K7_J03v@ImSpv-9=~>Mvb&KkU3C^&} z@;V?=TsvP)BcXBNlJd7_HEeCYz-iw&}h8eDgk8sGxhN{ zaI78Hb-t0R0S=6UOtftYeXNfBrT%El;u%FJ+Czdq``@_$X4474RJQ9KiE-$~x)g3F zMgCpWz{to6=SV7HTK;>PJ;t=TBM@x4b?=VR0@G;Kf952L<_Yii&aMYE5nd-2ojYYN zhF(eWTo0hB`8Fsq1r)KhW+3X4Z5(p_0v(NdnBSo#q^rQFi2T|Rq%kjG#s44cb6`>G*a^N5-jL|u{sw?aK>y(>gdrC!gjqpo-UFSZ1YR9 zim@T@Tu-8pDNirdhGtFBW>KUi5D4u<)|3h6Xcrhr()1xVS zLwsG$K-VVgt8}9GBF3BP`D;4c)rHQlf>B59TFyRNh?(RRy`;1}im>kG&mc&syn+sh z-NnlgVqulpZ%X(TXwVqB82%Gdk>fKEY5$wm?$=*Ux0*}ekzK6a2=2DySF=6l#g%$u z8D-tBFI{R$q~%)B>8Dlnp(L+K_@E3{-=0(<$7XphEv4l6GgM1l?Svf-o(>ES+IIKF zp9i;xCEIRpZZ?|m1s?A_x94@U%T7%l0Rs{fT3X#@gBtcRwUMzgWQJAqR3Mz(VSUA~ zB$t)x&Wp2lD}gvb$7PiI3=IQruCbpg)=6O(Qt9dK9j|u*kUU)12EP$710dEAaWDqt zZvYhnXc!;on(6B+0!kO49I7#-J7m>5KqHTjy+QbmYs8A+u_(XYRR1VCpJ5@=3FumZ#^fsoTIRxtS+9=L^atj-cI9}es;O|!jk$e zoxl%vIgRqm@;fIEwQ1+&sVYNG0Ptl7`m)T?*g_ctf zvF;UzfE@)jS^{BgmFAoqgqOg4spDH%T7`!5fRR_m`8Y=@-tCC{d@21EyXsLhpTimu zpHR9+C-J!Ok`$t+zPfrin3fdOr7fC_? z!rL2q0sjeMJijkd)&O&g@a=6fqQ^3b+cF92grofq<4zJ@DSWi#lkC%6nY1 zgF&2KOJxfqSR-WcS%c@v+SQy79mLU{2_f1786w7l3Jg*@Ah-kKtILz_W%o)4C`W>H z{TBDKsPjz6QfI`0zf`G7P2{(4b5|X$q1{A*XS*Z7-3AUoWymZ!Chs;!pQ6~<*!IzT zpkI6D4~oSBk%7SZ+IC3}#Bq_d)Kntglz^{y*?TGw9e!Hz_AUNfMi2d9pBHj+T#p|8 z@HSPdUR<`nq~cdEJX)H1K$Ci0czE}&@O7v#cRV10AnUfTAFqwH_~ek7yym9#;QRw; zUI)t8YrzNRv7AAAu>&3Z4R4@~w7A8ebmasC6het^H9RoWIY{~HrSNfU%2yP22lLYa^0z1;}N@TT0uXwRPUL7MK%V+2>Vvt@ox z&M(=F1F&@0T^Ph$KaSA#x7AT52rG+ydhztL*hR~JjYm$%kyLvSN;`q|2tlNTkIQV2 z@Vg5dILvjL4KU}wzk6#mG1l_gB9VHN4)XE5*bUxgbSBrax5mcHr8JG@Sj~k0hVas|esL;=v0B8ug@x zn`J9|yhE%Aehv*i6c(;Eh5t~N+trHd4j}0Dcr#3jhNkKlWukb)u7YT>H`z zE>hihIbu7;DqDSC0`fd<1Yhi?YS9Ze2PAh~h&N%jJGndS12(=C&0{-g=ps9Xs1a`a z)j4<<0ki&6EDwydnXdG6NjXAIP?#FYJ@(KsC|mv3Gke7w9wewt+@`;J^L~@9D*j#0 zAIhAOj~Ok12;S6S5ND#esqBM!79hofb8*)zGt}8sT&k@Vej5>Ckl9*1`w~Bhm$>|M zG3Y#=lFwSh_wfKE?3dV?~tW4*XlBTXHf)*8A~)qjJM5}B;Rw~Qg>GoK#LJl^!mktNx2 z7sHR?l(gYjFlBWujF^%XNdhfBE8G5cXXC0V4is`gco)1FllP(j)@H&+Vq7Yo{QDNA zt=$5vl3S+d>%0K7U(OWlTGM+uoFlq9F+C8z^MK+G&0)nC5c}Xl%&bwnB^;}YH&-X< z|GOnUz-y;_gx1c%Z!AYO%2xy}NIKBaHZN!|)tS&>P#Hpk_KM+utQ1}6kAiJtR@b<* z#pf<_6Vuy{DV?)dTGE+@AAWP5>u}?b?_Vfp&&Fb+8+DlhZ3(+byEJ69F=@3QQ#h(* zUa`J<@*aG6h@qsLEV|C=P01wuYH>P&eA31uRiNI5SgIGCB!6l0sh8P2$J z}VAZGlf)Xe9R15}VBOUjHa9pp}$4fTbJmp|1bZ-3|2Jn1I966J<_iF=;=`&ZL> z6bc*B1tfM@!5CZ1kS$gbF<3=n(rB^2&O{9>GPOz7OUC$k?~H5>Q>Y zV5cEHwxsh4bAj8@=sBgmI76E#my&#>-Q-@!B+9l8^Ey9xYpx(lDyA}*=k zqc`6Pl7o}nZm{-5zc(?ET$Kon56*fDMvSw0%!nMboCrI~u&7i#aXZ{81Wn{?9r%6b z-zc}SGrW&$-d&M!lLg{VBp54tX@X@5oU}t{OntSxxsYj z9z!oeT(#yrC@NsT&4M8#va@#GEXBu5wAZ%%_aU zN;lrW*9i_a-|Trk5Ll;^?5(V(whjolOMKbfEiEl;;t5X;DR@X_E`4b(t}CqFrG5R; zOg{8E)!Do^Ers5VvKNQ5*)6d&Ds}?yGvDjxLQT-=U>7Y-{b&n1n^Q1!z0X8xWlZLr z$8f0r2{Q$mbFd6)w}1z6lgFf0PbrGma=FXw=TF|Q(?EcNXRlTuWtw^SiqjMG@mj{n zN6ym#Y3JEv;+E5IZ!IPSIT8S&Hx-K^gleiJ9Wz|R-`;tlw>Yd|N8FC{tQMT9br4G1 zECA+SI~pn$XN%qvKAXzh@5J8t?pd3?J%70zs_Q#7gct+6ZioNAPbF80H7i|~Pf{`n3{BU=GbzbJNHl}w%aR`*)K zp1Wl_c0{DvclPwe+m+M-eMI6%15QWx-ou8xa!{QF2NINvQHS$w23+IoQi7>@yQ)5F1f5lPH@RZ$GJSeT!hpF zK);YGO86ucT2;tn+b$_~B~kp(4w@WRO1a$m>eTV?TJLO(jd8sGl&<7EZGgE5x|xlg z*#<-U>r!Qn_?F^6ni`Me{FD9MY~~9o_q%=*Z&D%Zrca9F>fCtS*ArKNPb8{cpaM>} zHFw*(;!(Uk^z?G*pD6M->MoOLw-km?>ezb6g~-V^2esST>)eN+(39Bmi?`B4dfFu@ z7-v1eXEyt84g;e?fizEilmF2IkVRO?5k*)qDdXlu(p;=yPhJ}!LL3cf0ls4xU7XxH z5+Z3MCXDY7sDUx{&h!K+$w2s?CgQmw%3U&HBjWhy$y(Hz`|1aN5dMw5wJIDeC0E64 zb%u)CVG4<_J7S+jSh*fCeZ}ABPFUr&>0}(V`xJw&TM4^-YrPkM#AYt#Pk56V*Qm$oSenw#(v{H2bHX42Q0&I*v-oGvZ$Hodm=+mlzwR4S3L|J1I7tKk40<7d$3dB3bV*R~*LIgE zJ*q<7uE9AL>oX;h5r%G%@~e_}V_F>z;E;|M*5P|n<-GmAsOZ9bl&|C;_$`Vg+_RgN zU$|U93b=~MtW}P-nL7NwHv61Rd8;2g*_QKU>*lCL6==bVYtU{p8S#6*L>4~Hzky1$ z^_YqWAv1ae(y;Y$&k4|a(S=XwBb7|2L-Rs*xoF)~Ehc}GM&I;0xGL39`i;fSw(Olh z+Byzj1#=lyXD<>Ez94NoOL!E0?=-RvCV9dm>+F~37kK(M@UidMVv8lgH^ubFZRu>! z0%zNWAhK!i2=T2&$xi|xrr|r*gEciZ(7b?l#Qk}H1^S`{Bhji}@vwU=QXewIbC=sBbhbOpF8@NpLS*>!4*}txP{2vS805-Xh8$jHY)h*VhpEPFaC#&WjHM z_PJZHvRB?~&qj2jA_U4LMuF@NU>%oWznNYB+IS;>o}sx)x*nA&zpc~?*&-)9>R=vB zDEsQ~-&Z09I+Y%|UBSP^2@$)K&%WECcAh|TC7vIfFoq)&(%WUfMOH!cw$FOX=ai=D zT67JQm^#_87)=RnZ#Iovjib^1bsF{&8cXt%Syp1q^A)4|E~>i`g+e66h=&Fm%Q>oY;2oW9 z2qB?dwcCOp>vJ_MVV7>R&7oYIsI2yiQAHRM*5JKddiRCWSVMVWcH=h!QJAEU2C^5^ zpE+5@$nA?F>Lt{&c+>AHC5t?%r0ytLY)%c=lM0`rA5(z@D=WPgZ(e_vP9)jFXJeVT zNan!o*jH#{jIX=U1wytB)D$A_o(7eSZa@BRdG^M68{D6_K+M}t!))ZU)UFa?T`2e2 zTC{V;#D1~B?fI8>B*!aH_e0ovvDl?N`JHU4eTxO$g?fO$#QHkTCp~RV)7`JFU*GO6 zw%0itmnd}aN2TMH?*%eSeU`mh>3QPjJjQP~my9p6$Mx0b1({#2%ts{x3G-+2N=ui0 zx&e!(S3W-d%7S%o{y($lePLWPB#aL#vT&&=M({v7pX8p;jEOyG|4o{laoAQYELxBC? zY6MwZ+In7KAy-gI8kS!5iZ^*uvhO4U&2ZUkx63_gN~C#Q&Y1PIh>O8wCM#j~GBn* zj)JIKt$ffflLT6vDm~mLcsfY-t~>+_Lzfn=53Tlu1m!--D9HDW_&dh@eCjpOuRW2R z`J7N?Fx6*1*YE?ED1wrnv-#z+;E`neniY@T;FY?|h6z;5!=+K?3Qj;mzNc#3!;8~f zfg571)F>y~0$AfFoMhey|BbLSsc8Fip;$Kzk8aNCvX^Egzj_lIX+JTRtzSbO!Ggl7jmsQ<;XH zKqR;yr;uM^_K>2aWqiSQzSf4l-sIP=BNB_&U1-i?7k9hG>8E$FyCJG^1Z&K0xUj5H}6o6@*nJq_|at-G~cF7Kqcd?_fLiW0c5Hh;9_vBW!=DH9A0 zc}GeeKfK^H(W-`5XRbV$_V7i%*pJWS9y6B!pcIG&WNZ==R*?!W2^cXoX2#{?&i}l} z9ZL4ovH9q%y^_61EGH~3+AF6ioXH;FokM1h8wXx2IwiLIhNOL<$%0(nwx_n6^FxV7 zJ-WsO)xhXhy+d-a<2S#aZ;Icr2-qyT4lX9%aVNJn*U7G*>)?OJlKm*`$iW1|H(maS z=48U>7l=wuu|Jl&_N#wo;6&d5YZh=E+;6SFg%J;@a8~avmlA+PXAqf>R^uuKl)KN{ zm^7E4`R#Y`N-Fdb>Yl7c_}bI=5-qjPC|wc!$s$Wtjn94wmA&1kBU;C$6Gn5dAL-d=Aj^a*stTHk@c3C7Ui!^JA`L_LFjB%Q9%? zdDe3^=nce5Un-+urHHWzAn2!?`D3W@1cB{8Kh5~_VForV;`4j0$UTNq3u9I=2aj%argDZN73^-=0+hgcnw~k zT($Fti&mG!4(i7Ry+f<3Txwm2%QYdWcQ%#H6hKWoq*~>BfrcyX$=@$uQpzcnXb!KF z3mxw?S)FKJ9xZW~fZRJCNK}$?b0^2fz5;i-L~bYI>~G;6Yu1yyC=CvV#fD*b{TS@b z8FE;(Y;o4}V_%7%o$;PqcTgDx4%|9nlg&~`PUyG#f&zo$pq5!SbR-+}=;I4!=Y?k5 zhjQxtZu=h7|9)56!)%YYqzKR5nqgWZ*0P^i4>xY*em3o%*RN`_nmswz=Z%v@t%KXf z;{0mMd;Zt%M?+l0?Mx#WGbJhlE1r+S;yku?rl4AonAZ=wUS2b1c%{a4gw+lQ(KeX8 zGYFqfl))8m#vvc@#WMIDT9qNFQu$ToXSFA;qZRQBHwE)EWXIYUJNjCkiHGmP@RR=r zZhy?a=mD7iE)CM|zJMm7!b7aWN=V2jpL|KBOq+#2?LdKS(-ys_;dOh`iBvI06@P`LHiFINV~h z3af_O^X^dh^`Q~YE?l1$&^63>&NP*lGrl#l3zrXdX`xbtwTFetd~S2!v0P_HSf!>Z zxIk{bedvb@YTZS<(Zw@Wgd%_>Bl9gfPxX~bVUHmw!6>0$Krm|7G&t2~uJF5(zq~DY z$+CJ`$@^>Zd3o+RO@fh0Mm#3tB|f`)l%>|xUQ>8a7WS4k79RF+QK=f|O^Y4%d~J9; zKU18xJv?5z1D2N81#HX+S3(Rz?XS$B>G~J6;Zz8ILg~Tm%I*8=!=^^R$8^ZZ$=B__ zW>V2e=EuZdD%y{TZnTf`^mjz*oxa){_Zp(E31^y3*6Q9?KvAh{j6@)Ah+w^F+pc~i z#^E_+45~6BU({cf^E?yU(9KsF4AW4gc(LXa^EaT6Q^7qFZ$I5)U;CrFIvv3I^VR3z zTD)1vqFWBpeY#ofa&I*z&ZMe}q+D_vOu7jqRR zD0xVp<;iiyUfB|(8^U6MyHJ)P7#ja|uCxK==6sP^&Lnv$Z2E}%C3_A60h_t@uLqYO z_+DI%#fDY7`Jt9aY85t4FWEI&-*h2HSL>l@xnI)9&$4{G;;&i1wpQr)rHu)y@fP(< zy?FDpOLFQ7Prhu8MW`uLzF0C9hPMM@3@rh$IHH@^Y?ZH z(0|s|0&7e5X+*CpPX~=EYYD%za;3s0qe=aO&uf1sr%d6(O;)MmziWP}c0y1=HdQ(| zwf%IsuiGD;TR=lbo?l_t$7a9eYZ9pBqLM!Q!yBWR`2GWl>zdJ`CB|QDL|^E_#;>C@ ztfPw42fqZ}Db^pKoRL=HRjfifQ9@BTo7M2+vzE*?t9?B++-wH-x|~;I0#ZJ{t;*{o z4U>?0%gh{8Qc{9Hfq~Bw42U09R$9BauT5T9mme=dqbFmAlD*u%a#g9YehJcpIXV9K zmy#kPh4_5tyO9F-FzA%fi(^du&RrtTV(e$1p)%H*Lh+|y2J zc0JONKj31+2wA!a$cd-jE(nX>SgiL)tWEx+PGP2R$ikyk9{i>V)%Ej>we=-%sQR&=GW7HD{YjY& z!h6r?6#@=docO@c5o-xCiLswu>#Oe^=LL1>Hmvblqtp+-3?r|C3&C``mezp;p@IV7 zYn3Qrk4@2f7c0RK1p4%C6f%wOe_zkD%%aowPx5x0rHhSk4h}18O{dg)`ze6(;j6QN z!Yoxt=ez-mv+KTG$#O$z5V1>eyviJil51L{ZDHZ*zq5=AjosocIC0oI9%&DbsF!Hs8PJnxR4rUw3rR&e z^?of;BTn#GcGrYK7s-^H=8>3m>FXtu3kjoQa+sjYSQ!3X4*#>Z?{|s8OWE60c zhZRYYS`0=lpCD(U&P2RO_?lzpCG%||v%AKGxMA3C1w71V?_~6Z3C$>o zcPLq|wIMmQ+sw~0@iU%LjCedM%0_>I6wKhZ_1BwuWlYKimYIPju)Y%uQr2yhNuxHn z4so#_$N=pymR$~H!;PK?)=7qsu73N^N8YIQKgc0s>bGcNd+9^48Dj08_QzgqE~d0K zw>mEYz$#=3N4r;BMgT|Mh1_KmM`>C4rrLV&V}~S)8$P!Px>H%FkD?9|lLa;u=ZD=q;YHJ(11(X}9ZQy3D3mPHeE#WtgMf2KFQ+IeA~S zuXO9m+*%*KP1E8^$gtF%VOjKf`zxdOs*4`+xfPl-8Mk^pf5t{Xyc0h)e95rOuvUk{ zzFT&GJAX8v*-;v+EM@rXRXdTGt+T`y`;F6@FaZ0B9>kxOEHuCdO?;NMMRvQe73kY# zY(u~LK8$!@acBi|pVSXq+;mZJxZcHr7W;@NfE$YINYZhoWtGY2<+Pa)J?=u5wq!O} zZ@~-o*mSMy^!e^d@mrBAyyfLAJR0He7Om~ghBO-!U|c^FwkWMvBx6z)Pt*jy zTy_L~8gQZqEM;BjxyxUO51%=|S--B^ovet*IcPGh&J z?U}3g3F9S;!Z}{Y3BPsu7wDP+67tLH{TcEWNCf5GPW7`+HD!X)y@dYYO@x2r&dYxe znCmy?a{1=C^y=NC)jN~Bunm6zi3Q3S$UI}ZvG=Z888Wn|(4XfsY4yWf;<6^i)xY6S z{>jzXbM>6t5kKaRe6py5;%HUULdE9m2Qq#PUI`t~UFgfG)aW;Jd%Ha<4wO=sdyGJ3pDBdQq^@58g6GD-3yavxVq*B8D*(afI^alDR-VYw=VDoYzHO|>=a*{l8gdzY&6dIa+;W5^*|=$0cl=v#YDkHSo_fQV z;K^@cBg>)L0rfzs-jpcV9m16eWHiPTX_b0?>%l7cP0sbR4Y&IQKnXT_XSn^|viagf zxn_XDa^k87MZ0=I8YiGGwaIo{#Yk4RA;?P#LdaZ5TB@sy2=^c#8&~D9fc`NEEgML# zYJ{bXrgzM!Xo}y`GX?bHX-Ixcg@mz=m&jZm%_Qu#d-hDHa@L3(ea)Agd$65waF&DE z?r0PO<=fI5F-1k(QmpK|+qk}lixr0R^J0eRG(x4R6U(V3bivR3x01CmNkv&S$f86_ z#U@E)d4`O}r_uwCr2-1C%}m_hO@t#!NI`+h#QN) zDk6Yu`KitLd%o6ecd-x~^l~bQtPVckz#Xsa9&Ql3KT)(YbdQ++%AIv=j zpeGe!v4{_}604Hu1k|PkhH#Ry{S=aHjeCZyBZ)JoBR>-^LgO{%$*!GMn}|WbSh37G zNDVr!*U(`*S(aw17Zu)^W=c4pEMcF^J${#eNbmqn(v-J;4rkMBzL_Au>drm+Ak0Xa z(CYO_852cqxurz`c2op(k@vLUv0t9EgI zNGcuei0fx?jPGj7r;kv@4_;4KK|Q3|vOPUuB^myzcS551Y)Ly-&7~_k+rJ_@e5b*YGO$@byq4j1gZ@FS4mW?|`;?*HNg9u8O^Ms{&#RDOmI~D` zeW(5nq*7M3yO@R!wvRJv1#v%zHeV}Rjb;le;=Pj7S*g2v71wASN7(voPxfH9CahOI zXO2q17&<23xEcN$d+8HIZUm@#qmxt{cEC0XE+fzV9YT1!J`%7 zw*j0V4Gm3&YN_47z)s$tW79Wb*r0+Hlp(Z|Od; zY~E$&nZ(A&ua^kKj0g$}0z_l9rw3%+e)=i$bd6>68XL*0Ha?vurY#9e6s8#YgK|x< z(#m6G+EP;nt+t$whjiw5hMs_b&U?m=eh6r~cjo{E*bsHI{Om15Dij$V{l!KE3CL-6 zkLDeAn@>Ce8Ge*Z3J5}A1LensJn zqz$oF9O16x@Ww@jjuw4ehF=g#%4bnqmJQ0Q3=c@)cQJ(Dqe~(tCP=AGB#&QOxU5!< z`_*B8AS2$NE-dKm+c+fVIpU@Bze#78&&Ay9p(niWwo_4glgwsO6lFif5_WiKo|T1e zt{tVgr6|Rr@C4M#sYzINqm5Kb ze?chybdlD|ZlzQyvp5qOyNi*r(^<8(%$=CMhkPsJ6UYj+eF8|;Fle|(Pia<`uFZ!<^Vek!M>Q?O$vGp8m+@%uPrMkI1Q>pc%2e^E0``btC= zRl`7ta+NlH`-jJ{@jj597VX->SaobJuoirY2wzZO9remBhnti_FyMu=AF}9JUz-Pn zik^;i-h{M0BN&@9trL>&*_EK>6^2IAzeo4m{O4yAnU_VDzqX1xzW&wa!<6HNP6e*) zkQ;X_r)Yf8s0;BC_=b5hHvRz@)IP* zxWQgp$=#ed>UHx}dGPbK>e6^UZ>z9vTToImV33Y?;2mY%Zlb}BUR11Rz}z)W4YF$v z8C`Pu_>{Vx-jt^sUWE^Hn6sHT#m#9Qet~ZJ-alK`gpX(uik^?Xa2o?9$qSdK#)nsdr~n|XBn9tI)dOujDPHDQ33jNvjQcqmD<<04RNJ+ z3ns~nfe0hc-Y1@r?`uTx6ZH}dxaOKeGqgss(rbF34t`61O4jpm9rR$u>d<<+X%}z0 znkJQ$Q&%5KVJz;2hJl8K$%LyY&IzuyYs=oKh%s4mV#y53iIs-bE42F+Q>EvoxBbxZ zUzrTAbmHS>78536#!8?}ZPY5)*$J`et|1PXIqKT5_0uJv-OdS)X^%@fXqHai?_beo z^zPG;Odfh&XHNH?jXoTgt~%)hn}WiaLl}mpbLve*bPprWi)7Z8i*s;a&go=@UX1QwRgmiM|XyZI|v(q}V#N zHaStgM)at^zdptp=YjebfsfQtKha)1P8S3eMILx1HAyJonPWTrq ztx2ErwFJI7QoXrd^Qf)I%w5RZb?>y$p(Ax0vT(ne-GXa5R(!-VyS|pPSIWsqD-UA32=}K3sGOv22+Uk2bra!&oqw|TLdhR~{VG-$k zK%*L=D>#C#dl+A&<2JBpFT5viFV13=?(|WO?PRrd2G4pe*N=buysJ>g{+;}}aAy+#uz4$hP~PDdt1Moi557%N(S*SE$& zF>~|Xyab)3L)s4%x{C{6tv8$v49FEW zTe^I=50qssPw+z_?w(KA%${$UQ(m@mf85#mnJ9Bcrow7ClGN3&C!|x&BA$GVW6*P3 z@zMQO`k_pgW>K>RItJPg9@|3h4BmG(d*hAQAGm=J$e)LQjlF}doFhtc8aHTp-je#D z+K}i7x=5SNr0CP!+3p$_lq+e{G(EC~MM~7#~^YLmP|h=($+s8 zAT`|_3XI7~*8Au+6fS)l2tj^Jn~FW!;@@}q6%oGiFi?&9;-F!P_?6(V6NhS@F2j+g z2SZ6h{u^4&ye(7M8d=R zb1VCoD#Ru4uAP+7IsF(EN8VB^ewK?Xf;+A2Bzo9LhRn&;;aPa~wCE&mCcJRI-Ta-f zfXO6KfOWfd@r2~v^#%;K9Jm+jafVG&zNa{#@rkG*Pk9}n)B@~@y338RM z#bh~&$<_v8TALx5V%WYvjDUT5tC>I`sBqTEd&>)2SqPnJi_k9aknRsw%J%~vrMC0Y zP=xK_T}~JJ?sjsx_hOfQB=<7gKHG)ZxtR9aWqgUE?8x<#3UgMGXyxPOErMj?$Ib>*I+3>(e+{<~tNBWE{bO z4|mthP3hN+ibixxM1($8GpO!r%M=!q*00RmvQk?)&Y~~w?zg;TTg-4o<;@0A9M^AL zgiZP1AKu6&Z{O3)E2*8&D9HR+(D)|n>svU#x5D3o5`W))eKLq}@tZqTCJuJkO`gA4 zzUUDJd32GZs9zVhC@F_{rWj31!Nw{n9s^&=!ui(GkS zR$jqicI@3y8gs&DKX@z>5-n06)%5MyVzb|5iA6=Msur9Lxe@!{5AH9Q79*MR^&>J* zUX*`)9v*ySyUnU^fB0RegBoLp$YY)BH@+vVN|)IQ&3Y*Ho;LobrF#EEME>}5mXgRG zrptM~?6;fm+MOlOOTP+2Ey>w9{i1of2@ZJ`W-xc?Kx-9f;ZQ8~f~*ggGfGs`+%2?g zwY#n?n(TL5k$ZhYB6q{Mh>1%IW?0S6s7cA8qHUV;<$sIQ@$Yqev`}NAPTg&IYAET7 zdBN#01LPZbca7(3OeCH?AMYMQ8K~&#w~w5b*Fpvvy4DcO12U}tCLb|6r*7J`uI_sv zBjoSaK1GGB-DK_1`TiJ(Lw$_IbtPEn;g#Avs$J0u`xtDeO8wzN=9m=(ZQ9w&+{E=S z7~hc!0o^!lPpfo2Fu8Zq?I)M_%t;psM~&6*>@#??Wqn2o*cY$?P@)wuOZ7xkA1Jl0 z{0b?(vxq-R$Z>N(6C%QacGnWuR-G<*z$XDsC#=1pWYoz~X_BA|pMNw>jJjUo$*f85f93 z)M}3tST0Q1r}~ig{&D+lqg>)B|F+fu;rOYKzmik8gQZfi&u(eBzM8w-JMlGTD*`Gh zCvPflp@vD!QtCq|7FP6xu(-(1e6pp^LLCuRQ`lwfsbv#vH${&1pPUJ^#|0NWEg8GT zZWj8M!vXv*@pgiQ1wUr`?-L_w;;QP#2x&>P$!bl64M+=4(z|(&^((}Urc7*n-zzsL zVKOQGqhqjkmy1b?igapKcgLGC_|oyPUxfBxnR;^UYIk)$DN zm$MNx(<6=+>N1OH053*CMBA(;#_rx>vw?WYd3z_{>R-vur@>dWxoKFq7FwNM{JcI3C!ZI*vS;CJk$J>-d3@ig@FvqO^W)dA zj24f!eknJmEa!8|aIJO{XR&I#<7-V?L)Y`T?(_K+>8s(Qe8dLB$B)yK+ug@SJ)0WT zV@bMuAy-!(`3bhBi7`WhePo|Eyxc3Ij*4-TGppWUoYXr)lfLxA)P>$^6>_(z@uKE2*m`~4t^r+7S^ zYU}%(ORf!1EY%<&=TKXrFcIdTEC{EH*UYFDg8r6nM@qu$!L8a7ZnT2ct7%Oz(vTCp zEG1BM(uhRJd3SID&onhz5z~GZX0hnac6ksRI78CSvUXo+H=7!xc6Fmjr*^t1(FJWi zYrmdtA%Uw#KEcT7ZNsa^T&9!^+xnP9VYf4EbT<&EopgDEr)l8jp{rKoj0DN%x$dLLbhjr%-tz_(`S? z`0#%N5(9~3_HzIOoABRo6d_H1I>dgw+8}0WQFSvpMibKx82;vwn^tz`FJkc@pNLaX z!|PP`Qlfk{l)hJ~7Ue}f{HLf<9TFC{6)Q(Ebzd(=-xkci;u;D-AIq)~qLFlW;&F)D zu&HsB=1S20g95rsY2s)19`5fC=$xwXR7l^MUww6{r^&gKt>_x2x z2l3P+sy{?I@v8*u_?LZyfllRh$wjLYC(5u0DywCKc3?>xFx6!CHjT}GP0Z*~;Pd5G zSAb~)qG>-|iAu)2spN#XsaOT^!!oUw5(&c}0$#S{qexM0MDjE{TU$Vp>NKogySOXD zk{+HwsObNREi`_zsrH(+hP|?KTpe^rK{c#fVVv#qTdkfax9Q!N^oY0Kgk%p@<_XuA zivpcsq%ve!H;5U&@9DU>1c?Vgp8wwF1GCMPw8qne)?V!*Wx5zH;VjPGR~th z&$juRqfg1EH`}@!F}rikcb36Qu?gdLzt3_UFd`F@g;dp>vnB0vj^w9U4zKq;Y*kGk zG-O^KBGj&xAK=b4Rve8j%hNEy~(s-EpfqDxv>dXN?4n ztc(T&uZTS^Wq?;j--DK?PG30DSQ9H)yt7A+$L;rQ$F3JDe2oyKZ}!GL4egp8H=`{c z-COKIW`U^z3p;CfE+ngHhwEA^{JvSQ72?{-ari<8w7)KV=CR)&9wt1}VUdmjJ&9`< zOeWS|=Xf91YQ{7D#l|+?dHsqmT9YURGM0>$(esn|>DP6mM(do$iDj%q8YkR+n!k?M zTZa~!e$*(_qSVv%7>+Zc#$+Kg+--S|9I}SHNwze7j>r2hJZ&W60$%w>oQBrNOitXr zHM$+jvOeK%!b|_OA?hpllwWka{G+D2bm#h;&|Q!1N3y!_C9n=P4n2L z&9=6j1_MD*H6~NhPRGR9iU(9^waemN9R+Pf$5M@`Z+I|aV`>j(CThj! zPFv2}*^2`;Y|jK)@1IKL!C`?w)dHs@`x~*Uj+Zs|bf52}q~5O6uh1dqy$1@lJe`go z?8HX13MY~Cg-=qlBqf3o-xDd^uP&!%Q^K5x{v?(t(uyU}8~k4VMMfU*EWKYzhAEA~ z0vXE0>1cwXM&O-2#3+!3{_v0L?kgR*2{j)65p&sDy7@gkyj451Vq7D-3UW!Xf#Kmk zMS;h5WR%@m2rY>Lp%vep`bcWD>kqwdCycTRB(zFgUHf!WqIi9G6JOhh#_gm%rPb2z zWSJq9Nx=O2xQ<1TS0IiM92R=GK0+d|a&z9FD&&hyVS{-X!X|e6>zarXe}cgW&9zKB#+YfE3P9hcMBwi8`R5+7(@)t3;VG< zKBIN{Czo?36f^MQ2*nb0`I|&Y6*4UgmGgCQpLpOaKDT2oUr(vCPXebl-TvoaaT^l3 z_t&qnl5vignIwZbk$r54i+Z5*{}N8)?sn zR#E;6g@-!dXM=`6 z1a9jQrHe(Y+11aN`^$8Pp}#>_(p{t>NEMo*@~f}c@PS3Xi~P)pW<2gTw3G zrRA*m`8NaAYF7l`dUPm*<>?kUka~^W&0O5JTa}nz{ZU$5FATC0`qjPJRdnVX_-l7V z@_iZfiH$!frwxdTfoc*(AliW=sH6+Z%d?b)d=wp|O=L9rlXyvd0$nnGrkrg?foSD+PfjRO81yqs=f?q$zfEco z6egqZ?mkoOYOuA(G<&6*(o(OMQ2p5qYk!rg*pq90ODOAxJjLVIYxfQ^-_bHYk)?{s z4Y&E-B&xdLq^Kexb8t+b%71jZPC3;x)}`+6cYl7o`LyerD|MoG+D!2kSh0oe9PC(e z>)(!FWA0blr~Nxcx|x7onRP!o9x%sBXEeX9=v5K+pSc0N2y)uuN15Uv1qE`xr&;4O z8cKxP2kHIm+4mbWc%L zRebOdSDwjw5fvpSVueE&e$zE^$HU!)h?rQmT)w1)>%}~aM4kCuCt!+jUQNpR%wu4M zWNFu`f()Q?sUnb}D7QG}Ntle8jf)Ns9`gO8D~UwJXw|i$g&tpDL&nv{nNFs zyHx*{{{F#lMY0f-*#+KK+26vr z^c#2we0p>|vhYq`3 zVhB_W4EZfBB{DEC-=`cEgXIlujTA}%qr0W0<(q^tO~@C3y59rM>|vf6W-+m`-6}?@ zJwM(sE7zJTWZyg**8eS7QF+{7a_%<(ebQgIr`WhSR$;G>plm8+UU9V1S%_msBdg1N3m89t4Fs1pT5xa{BBA~Yb(jD>~C>^ z`?vu%vFSoE4U#8)t*ZjsGvmOqMW(C?}G%AA6(oVC_f#N)%_+MtVH_v zQTt!(Jn-?GyeSRmDnpO_87E=-?EoI_OR(sgr52dg zKClREx?dfXcr%kCBRk$0r2xT?$z*qW78N)(vPcw*N)v|_6#=*etE#wA78P<)7UtHE zLf6!^G)Yxe)#`3Qxv}lG$rJzCDS*M-O`V_835<)7;Qaua?PA&O#0D%bX7^dfI`gG3 z!0anHBD71kwBJfWVVpI!fQp-msZ#kve4ew~7nawY0BNI=-mcYmZbVrrxUC#6@t@7Y zAJgGNsG%()Jgn;twt%1tHEuDE#MJ&0p?!hD#9PmF5+iZc@k*gl1nRPo^Ii*PMHZwz zJP{VW36|m{g>YLABzTzHN4_80-rp>vn&1VY6((#N7jurK_(&e?hk*&g=#tY4T1RI9 z3g>adM$tXY2mmcCw*cyHe~k&lSgjbIYK<{q=zLO`DsrI?@y#>YZ3>?<6`Uc+e}nL! zaaTV;{`aGBI)~P8)C_c)Z@n0Z6HvMev}7`iOuupxlI7)K5y{o-e(G0$`271tr7`_f zvEEV{vhQcu2^L22n7k3F;y{x6KDmQ*5|_=FmhF#@0GI;k9UviPvuM-DWHdtP`|B;Y z)A8oTDF4n7^C%fFFH!qSMBV{MDriFz3)8R4qEg`hcm4BUj{4gVYTzdy*v{7=25`phB|;y9VSLUu!r|IG3&Njtq7pbc3bM}8$m zg!coj7ik`DHy?F6;cRUk^(vJsls|x(omE8TkCg~+lsa93%#Mf3_t!_j>5LXEX&^#_ zI=?yE0GmReVYN=>ZZy>2jNR@+K096-|IgS>eN@uf#dnYNwMVm!y=I)a|CVjQu&&>H zJ+?r6*Ig+QB;Yjtg(bI?U?wVhIZGx1gh)_o7jHZ`iH(hg$~cJsrjch%46fNRF z7*}r)K;soKbhFyrO2~^!iF`ZNp);S5Aa`KRiOXJ^$8r6?(Bk#g)@oyx?1sx9Gwyn8 zOBj1>ee|=;t28eiRfTh<2OH{;&Tb@Z;8f6ITB}HIt)NhP?`F^`E2CgInl=m;!5dCn zGT=F+I^|C6e)@TPBsFx{5HdmOVJ0Ul+lexvxmm`25(!uFf1eI&6kNsagmQ(o4&b~c zd1q0N{z#iB*-L4PU`bfO=Z^|?>^RjO! zBgm253tpgXa+CjeV*T@$2+hc#KRl7WQpYLVM*qU{Qcz}$rF8#h|CvNwK|y>%|2Tzn zD2Wrhi$w-1dAP29&*(7{t_rcRpa4og^?zgX&jRSf5`jW__hgMOIXNBQP+9_0CXTE0 zq&1gfW3W|m38DQzKQ?#;`=9?U-6=9noB~%?a#?Dj84ZW~?^62D3wNLRka-7SdYMLV z;{4w)kbHN&l=(j|_|LaI5DSAP_-D5GqEIU?D{?%}C&OVxp_pF? zV~|5=_jxRJTVn20n&Ei@;CKZ&qUYD>mlYy7Sxdk|0hUcD_8OE73>Xo8eoK~C#}!X3 zhAsAgUOw|}7g;4!Vg2tH4Q{f5_}aG}AP`O`nIP8lcPw%XMg810&if6PLIgt=^6FH% zx>!b5aJi@IaDSguU;JzcJszKDw~6PS0Uz+ygX-??{gZ3=ml*qA#T|`(*`66s)0z)} zwMuSU_KWY`N-T#GnK)8MTt}aufl3sR#|$@vUtXmpV7@1$sN1EI{v-${v3$VTv;ULQ ztAMOd*^ch7->HxYFXOPR@m9r6BTSMKKa(6kuqt>dvCu+I&x*?rPrp4W8F}?W1B2)z zIAbNVK)auKJeN%XH$ceE%2ZV+>-GK5TJ#l;$8wO<$l)UQIdB`|x#b}1BmeI^U*CNi zx?#&I!Cy${D($WC^m@))zR&>7Fc2C6L+)=Vsn8KP;FW4TdDopjLt?{EBm*;>+#WZp zsrOsC3Ejw1OH^ryW1r>b@qiDMQ6+##kD8iVrzZqw(Qb5U(ea=(%3s>xdX%5}+JEDK zH6qYuYNKeYTM|Kws3;v^y1jj>s^o$1b2hm!JBn)bH6N8s!!ss^#Z*Y zjiG08Te=Zec9Y8Z(Pam(UcCaI#k2KRB!J@rPVU}-)^{B2)&6vU;FT~~RP0@-ooHC< zRm~M+yin$r8r^ZlYe?))Xd1o|P3ZRDdHz8BsB&VF*b$$V)a47YvMDJkXoYFWpKzGH z13Vb|)gu|ndNb=7!;klcXvp<;*t6yD%OV|wl^yHG z#oNaxuHkaNP0b+8zCiwY&!!QiRqhkFqWzohIL@3Bj|k-U&Ks+?pb7ZLeZ~m85x!)% zlSc%olNJa)@b*K)({O$VxQ}2*zCN52Fg4xRDINe?6mYah>A4>HuND9>`Q;U|h$J+& zo84|KqobqIAva)$g$C*-+sUm|eiz#*w;!=9CzJp!4v6z5iU%@%3{+Gdrqx8(tGI}8 zv$bY;V9-DxYSFLk*8KUr!Z4Ghp(NJO>QTOA;3J3a4enIIy_+m8lYTVPj@ z0#vy&6A5AgIg@a>lW{m-njpw*=?EEA)x5(-@{W{Yr-K%ehbO2GG&TnDrUlw977vey ziy@%g+|u_E|8GX0hUdBwZ{D!p5wb}>zdw9I!tTlvs8a!4tBAeUk(NCnM6%H_01LQPGa1oZ<-%EKp5@%u4r&%$T@y4N-+%+xUt?`xc(&|*;id0%`1%^M z)au4A5QMffXOw0LOs=6)JVqJ;lJ<0fmH~FG{28h@jA+m(-7CDP=oKE`#e8u&BS0Ri z?ld|sanS?Y-s|I~P=K}tmW?gv=VA&7ZJv(^j8EOzcdK0(;6TxGc7FgCF6X*ikwwx= z(!iM}_n@v{KqUN{*Odhr<-G$YesBd`RKrQEC_x($6A=Q<*GGE``56CtJjByM;(Q-` zx6zA5KmpPRT-o&M7y1{fgSG|}qS_hKKl>p>B`5cVA1z?~1S0xpqm#C-?1DDsd9F>5 zyOPN%uaM;8xVc2NmS$%3#PSV1Ea-o&U-@Zc=Rhm}oV%s@)V1a5D#|7nC?aC{Od&(n z(Yhu)2=N+qq^~IL3W{KKTyzHe4Y*{2aJ$7bUPdH+exUhzCbt>q`aW$b+wtgMRreRf zGizsg5V(B2ve6V5s^Hl_xL12b&5MF>9M3xW$!eB2)e3K9WW;y73Jc%z#P#b}%-QK_ zhUk>(hvz#Z)jD%%{cV^g{u#g1isB!@;t@dK-T>s-4+8!)04T4vUSsFHcU$kj1|-TK zN}8T;ez!O+x2myws02@w>6S~+7 zoIkP^iwiYdxq!#aM)j)pq$w*H346~xltIs@pHGkXV8~oNRzK0)DXs6v4H~oHgJe3L z$l?bb9i2<}rx#652fach5-`8(HL}NlAHX7fG#>l3>~+E`D<|htCWKL(?+K6b<_mCd zpKUTgp@|C_&6m(SU48dZZ6}o0SL-%v>FdW|-BLe3d%E0wB3r2|xILOYxcbhA#e6n$ zWx90Za84RTSUqYpme;o)6DNoFMwG|VLLUb#j&t$0L$WwHEc8Q4S4fmUx#P0d7ciY6Y2d0StsT51bu%nnS@bp5UcAkSdS ziv~A|!M2k8xal&D7^6pofG{9OluQDu@zmLBBMk6SS^$U?Ta-!kzq;KVWu!NKB(q5X9u-<^#D z)Mo&${XD2xdJB?CuUU*SmCDuoud2uA8+G9=dwW+m4-rKPl31+z3ab-r8m;MOoU4ho z`z2;9mtM1568?2;9v_dY{}!fuN?>JueW(J3PRP+wAd`*gx|0esDkd(D>gnlOAWu`K z)h=mjO2d^}*5R6)$uqssx}u->O8+@FTi2?>x=WLdBn(FTIYHp#;2RWAM(x4rQt&tt zj*pMw5WIS4D>r8A*)LS(Qw}uaJJl1aXb-O*Eh7eMh5mc8?b?uOtIt%VxxL3bm>Z}% zF1&N$z$q*R_qr%xo?EQQIRgPjS)6a#Q5)~x@k~{c!uvtn)os-b88X8Q7`}JC#?t~M zV9#E-zee>8+jHr^v@4oU48a7^8;qB2FMkj2CsbY-8(`LxdpEm3AwZqYkXoP7xS!wT z=Zf9AK^(ELvHk{;-pf5_XXn+mwO?TS=zMnc5$%*^Yi}Zpsy!10qCkp1{t2s zf1=+VM}4JydQ8Y*I{A40cmqa1(!dP>A1}<=Sy*C0F^buBk_yV6iwHbqO6y%|D4h>Cdf~IY1wL3v@^QjG)nh~e|m(RJzOvV z&h;+c#V8Uw;hcTod_4^_XdG%kvY2XC@mV zRGcDKlpM;(#|OLxXzQ(kxGzL<*IX(`n|VO#@)i__A2w-$6V*K&u^($d=Kc_isuB@h%2)-0RF@{iV3ji}MJCGeH+5$!)WZbR%O zs|uz}|9;-+xnxR*5!3HKZd`Sr@^%4H`$giAVXPq|b%tTsK8yL7w~rsZ&dF+Um}o!b zBy6p+H_T+Qfe8%#?CfmD%t4`6J0Cs$$fxD>u+mgo(Oi5tH#J$=KURHqJ2zxU+g?z{ zgdT-BW0B=;_aWfXZMsVKh7%12za$0AlawMm3JII?Tv8fS02UT{w#9`x-SeLMDURl) zyEQmZ*;H^w);EzgEV zZ}2&~&u`iQJ3~C32^MTxU<(d)SQ?a_J&*+}X2tHJK>dFQ-(ZAP$?W$m4RUD^v#^Db zwQKjWK7JQu%7ups9Fh7H*oNkJ9qon&9Gp8F3AkAYR5;(8aUAEqr{!x%FQD{o5H z2CZ%E8qL`I%d`XpUv~Fp_RDed@PvmZbp4Kv7RfBwX4=@?+>2-Qoc(2)DVv4~>3fWU z+KWxi5dWW}qoXG7)brqG(~Z+8>K4n`0If)1f>A$c&I>HO3{XX6EKF&%2DvZd*Ee9WhoC0BfBjPeaIa;67G{ zs0Sb@4CfO<#ThiQc0218hLF)5L@xFGIj|-t)Uf9_VALzNz)@1Y2h0X97EY7Zfg-TB z>FJ_L<96{knw3ueFg|@7xa!?`Yyss1N9YR;>#LXEkGD!Pm1{9s3HbeH|K>t(^R#EW zBVy{u+sR7h<;U4wa9KEI=?RNd;!1XP1rl>a@>L-Mn>}-6D3IWwW0{T9ihxS;{HA{J z#W^GI(C*b)FH}opvss3w$1lZMa@UknJ?n;%Xb{uq*P4@MW0UpXk_7+7(Y(4q>_jjX z6>Di1o`~3WD*6%{LiCpm2K6n&_`<@%UGaPYb&2FC!Xac2pFX{gA&e1b@kGbcs0Buz zuTE63PP?C)=WXiu8nda_d64q(@Idc}?fw0>vg)TAt3t2yOa(adP6bddqW1*?DOp#2 z2vFnr+DuHdDS&cK?N#1Dh5^m5Mfbp#_;)tv=liu+e`%U}()gm69InQg5EW5nvqzx< zOZ@wphnp|NpgB6%mK?P0y`qfH9uZMgng8g+owQu&QRTkzB#y|we)-MQgOUqj-8WsU zvbJ)tF-fGd)XS|xgNQw;9`TaEWBWCk*3S40n~?j zOQzEnXw-f3m?*b?-`Myn&VnkDL9^2MZE=d_>p&rj_2Ki6#l^OFyCkRJzh0tUab9i0 zA1$({^*r0b=;qxEREUOo0Uf5rqj42cZl#TO_sPl0A;lAU^QNE?pNuS&YnOT4xxJR% zO(!I?V7=|K*z~Kx!;`A>i&I48wRQ4^^zbaD{9wiG#j|5uW6KcfjugerT%51SZ3g^R z^6P;6Q4*BRQc?9tT?{;lUY*@_pun(so>}Ya>P2%}KS~v@`)!(EtGaRmV$gRWbJeRH zf@lUF>fwCQgNAvca#dvn}vF)SH?%5!YsTp9(lhw-d zzNHxkx0)X|;wmQgH}2>;r&%sR*}l&^FB-?NL@>~4Pgd>^WUiSW#}}5AJYoAI9z6e> zh&guHDE!qSSPwK>-dYk8HKB2-A=4r!mv28yU&TwKoy=(b+#x=V9=>nV z^VjD68m4)O>8jB_rS)R?aDo?e_`mtBjyC&CeK(W5QyR{3-F$^!yS}Ayv0Q3Src}$? z&1+Y$&dkp@K$qON0~w)%wOWe#%}q@{aC=-~K{TX&pIqf(XCKOP^I+AwFD$b47dU^V z^7M~Zg*AD1>6zISahE`cCncB0&!r{-+cWVkQ@Q&zC}}vUy>HCSuuzNyU#b+llo)sh z5e>c_$|1z16#pUK4?5IyLBV%vfw{Sn3babv2yLnEWX>?)1jOf0K5r!97se& z+SDuU{2A-c+Owtsgv43TH$x|RE}xt>eO<(q`!QJv^eSxpx*uLrTEEct6Lid--yERs zT9Um)^HqJ~hFW`h;c79}DK5)+yh;Uk6zsURUa8!eXE2N4wp_Z$?dL8o-2FzKGjPc- zJMYkJF)uFq;Jc^{JsBy~!#FrNi2R^7^lqQAx+h!Ne423}tJCQ5pSLa@rT8xUx_e8% z&8DiOUr!t5nX2@5hX9YWy;o%L!*n?KfRuZCInlEZs4#VPMBv9*_(*vcc>}p}zMJD^ za>a%c_b^}2u6HMfgojhuuMhl`G!h0h(|x!hA}xIaK~I@e6r;8?rM(+JIGHJEr`NJd*l5a0E$@#ilZK>T8FVwV zcF0D-d}5MI;b5Uq;F#ZhGe2nIa>^y3_k|uI@0kV!NvHMPOTB@W-asP07^o4w@;NN# z7aU)h1)&F9?{4n+55w)5pV9I9VkuOOE%Wno+s0FW##x_!`}WOh{$Tp4H9|k?OO*Jw z<%&!ZJReb>!~-ezRz9Gr+pjt1LPige#R;=;i3GE>Hj$gpCfg?4I1${ zSx=#6C$h^q;1DLZ`%5LvbuLK8B}`=2uO>cZ{kq*N93I8t$q!bTBC|_?-G*IJZJ-Yn zl$2acg1MBDA50+206}6ni{8@G5>i{SO=U3~t=C)j*B`bECJUoTMYE0Vx}3L046N_d z)G?j~=e@+RHQDcOx}5~&c5uZ-wZw9y%ciwM3`y|3aK^EGf)*`xady;jSk3HoJtA8^ z?OhoMb77&`)Ss%sa5kc#kBO0VI<23dR|*`Gtb3e3n|45ry!i29wn11r*X7yS`Pu4l z9l7n2+uK#goe%-&k#(I$Df3f)J|h50HuJs@`|HDt&b-CTojM0`T#DoD7872A$=Vn1 zu5FCg5C;jp3?b_-+)IqF=C^tf%VA#hIB3k#SZeP+eebGS#eMf6MD6Ji%R4R}fJz|( zZc=p~=PEUZw=y#`2lACdEbn)AvzE@0$!UU4OoyMgakJM*tAHQKs@BaZsk|0_{^v8x zh?a`tWfbLNmuGe72gG1x{5HKQQ~a;ocIkJbC1J7&dn}jL42lCcPlw+qTg_D1osvQs zpiQJ{B7E`^qoVwR#?V{Oc!7U4h41zYn&3ja4dLv;IjiByzEQl=#0@wq`VA-hP|g}S zFd&bPP1afMyf;!~icck#&}huIdy1&*FHz$NCKu=ez|=CVOJ@$0 z;Wpl}J$0fzQCK*u*mL%{?+hhRuX1Hmx=RhS1W}_IB_2PSoC8c!AHU!tD~t1KoWiF6 zPqe>7glto5C-CYD?=7TIHO{VD ziR9n)sbDzYU)Fm3go@Q_re08M8gcOSA3!~WGsV|Y+}$vEgYMDQKnM+{N$iX zpM}}2@y&xhj}MypMnDW=k$V{RwEL2aFC62$6u#{@gzFSZzsw}Rj68pVsBZ8JrHBId zJ)YZMw`q57nKMcMPytC)ha|Y}I94jlJs%(8u{HVKJz|O%r+Uunuyrk;uxc?xV0YrM zDF?>F@x?7K5Mx?l%Fb85aNWRi=ZBnJ4A+MZ3t zEO~zzv!XoZ33XuuIVuz_7maFVe!;=t1${6%xw!AI6QHJ~<_Mp5#YEdqE&%Ver((sV zl&9G=-Hq*f%F{N@SLLV@VcwI33dn(imzN}7@~a-^?~Uc}1xTjl41zHvM>+RXx>XA< zbjqJKN^#hwiny+ zJ0Y_kgQEMTXoS;T8`aMK{_AAvBw6Zc?tNPr1xs(RCAopzw-D+fvyPn3zqLNhYC1$m zb0j~r&=#JdV1gu`%cea#4mLgKAx4v{T@*Z~y6Jiw5m^%Hn;K3bQWl1ip_uCuZ1b9r z>NA%v&*_eV`@@dh-#OiFgKfc4#AP-?{qT}bWKANcq0bL4BFY7Z76AoJ8=iOo<=!!#c&r8p|4jyz%L>WW;bs;VLevcq7m z(f?_Ls8BG8h?5g1OaY4Q9&4DKED{p+xHsD6lbd_HhCjES8#^V0%3XTu;X=E%yf#b2 z67_qg=wzKI_(Y0pztk8}4fi(pz3~Vzm9HFa)$I&sD^Wb*YTK<%GHsk3PEOJMT6^gc zI(T}Phd6c~$>;l9Nql8JX?e)rS$|ACG#*%HiIMQ8%iZe5%hR(uIN0EFt6Qle zS&Sngy?O3i$p$DA%|dEkmB3>(OvOimsO8J~T%<--skwhpg=R`A0w1n%z%M&sys)8Q z2-fmE$Lo8cmzpdZkrinJozjsR8d4LXPIMQ7(n8U+FrgqK>kZ|fBKva#!|VOhneuGr zV_z~_-()G|OXh3Vj9eVXKdXS6L?Iy0NA|k~n$}tde@7Tq@phctTCa$Riav7#4wQ}P z*25Nr+Y4@+1tt*@5%HKmcad#Dhq$WMU;%w^lg`CEzKdekALY8 zD%YVZ^``#~et%#JVzuAb4t5yfAiR6^_>w}AMUB_{` zqeILe%0CSPI9fe1=ls2cRX#_#aN-2NVjM&kSU_&1mk<#`Dh6azu%x7VwNofO0kot+ zV_`dui6(7h31>ou(!qGm7*f+6GTugZCO)A7@Ql#2^GDYxBBe9{9@W4wh@e`+%^>9^kb^{b(^+Mu=RfgUeLF5RmbKkc6gy)Q7+I%^}IO7hc`jmG^_WA)SMGqe#PN>nZvL+ zlR-vcwI(Jf37ND}L=8~=S+AAzEXM_(9|jSR7U*2T=mX3^_P=<$=OG zJ3qg1^JarKlXR}h($nFdVxyg%ou9`Cs>LUN>;U<_9Z5pP^hs&mNBu?dsK=PCysFA} zXYTb6ReHm4e{j@X`TQmBm9jD?P{G2&7+Lapvf2Wpgn{Tl-(WqDpiuxN2F!q%Qc~R{ zB7+$##z+bqJc!nIt;ZkKbh>6Ii*%unO>=WI^5*DEekpumxzMVc7_Lxv_6tIUN}UC% zLYCqpWV&!isf;~cv8_~#(d-5*<4B#S05QizG+6iDczb1JjudnJ;^Rs288u%SnFd(C z)*7I(&r6iBXtCWobLQpav$nTx8+BAA`pTe@0wA{{p$eid*4r$~l!GSy3x*wraODxyfVnVxOuV&W%z0y+*u&n68!- z^bK!fsjJFeWm(hE*aJqTS}?SGQna@ zlQBzJs7`&eP>`rf{g+>9(gF>iz1#E|Y|!$bo=0_3_Vu%-2b9WW+e&=6uc*_d6DcVr z5R|C(8fcVE)e_9}Q*3Q;Zp8^WGEBQ1$N%-)lTK(=}n36DYlw$#H<&-L`)=jNK%`in)#`s6CABI{Us7ZE{_cYc6K7j(R>7x7ov?4W?MrWt_^h{;?8zj$X^M+Rte5z`_3XM zD%x&KV;Ur3NVgbzc1=2az?p2h(H|F@*U|fC{QX(}C0EEOVwRwTBk7B8!L}9u$qEG) z`W2HBB77sGIFfkGyr>YM;^)`q-bsGFJgImMT;SaLyqwAABrUdXr=Ta%%a*^r0RLmn zX-!0$dk%Ap-sFh($^Fm&x`WLcji&m)FG1KQ!$GQz8{dk=l{Gs_b?g#&G34! zamgZSfYa5q^mSA}|6sa)kAc4^euFVUk}2g;##n^Amx^yco9@)u=I8wf8Z@SY{p<^Q z#qh%q<*`Kdql3BC^LIj?#>ew@F&r)(b}Je=N}b%DlabA~XxP0y>ut*o6KluK?JFOZ zq?Y`ys~$Bo++Q0+l4`@6uD425Z+VHP{c3Ke6g~z=P63eKQ$WB9wj@c3{eBh<6A^o| z6lkY_#}zEIXPoRX4f*@qSPS&0r`@mU@+nTYon&a6-h~tSRV{yVu)RG4rt-AD$bbHbUy4kmBMR~nJC$QW4@PL(Et{=KUurq6 zIW_yw^=PC>n-E5+V?E?R>SHsB#|_&9Uu>VxB^y{;O2NPxXWxg7FB^XTOs;-;=P(ln zD&dflBIh$yKFTxyAJ=1we!JV%?c+@?Tis13$sE$|w&`YT!}HS*n=v;gL@v4rC!)CC z6Zcz;obiqmvYQc{9wHbWNG$8lp-O9`b+5+hs03(cc$c~4J|z6V{bFJ{EX&!tyqD+ZenCL*;KBOm?}jx>6UV79ruzLnlO?%Xyl$8# zDddb0k5b+c*T1k0SoJ0MU9R$B2Ags8_uh~ff)oIE&qvSm6MGaemw&=@H8Z1A&Al%y z+@d>(f0+LXkG4p+2@`1~gl^*^qM|g2&mWv-IJvp|iVVa6MEmFGTRQtvysF?VK0wLj z2s@gd30@@q^v|VSkfyw$ovPxE&gxB}mZYfG&9b?4&Ek13)l7Wi9Ex8xPIFu9bs=}h zK-Pedi=$gV-(%ds#6)(ZhlaHV`yOU)3SGBv8KM3(x~ zrS{gc^KCZAo`E;OxHsh*)DQja?=N>l2p9a%3k=7)7)A;y;$t*_eL2Qt-K@AaY~VQhDFT(?s#u0 z(vhH|(s~{pNIql}J*u%1AoxTNc*2d)oB%h|X;yPiw}aI`St^mAs}#c>r?EUKUxpS- zyR+&d=PsYlmZja9J_Sm4`k#)ny5F%Jwlnpp@KBDRKgo4W%(<1`RFpr6=|scdF@?6v zlJYcoAbTTUfNNu6oYJdJKLvjA56kI<(PE?2;q}#_T;f#W5JQ+Ndu<3G&gYge5-(0o zO*y%^*sSzC1b-PLm~$jLDyzrKEU73d(ZOaV7VqWdg~5;Nw$w=o&GuS(5Y=*&o2TsA z!STWutUggJSaGN4Pv7peDUPd^Sx9JWKcJF~d!8x3B*WuKtC%wb#`hQjTSNzx5Iz{ncSk06FW@P!D%G%)JGgqs*nUR|ePSXYCaTZmP>YKa)mMh< z8DHC2cpaBxPOj}ucfF9_``CcsX1oZy9JLbplg{qw^eb=eOvzCm2Gi5$BOFJ=>m3em zqX3dc>2UA;vfwK4e&Y`zE1`e~FAzWC(JVf&0ptL6>6U#pLMw-DgyKx;y(fBMSV3Tf z{c(Z!@XDRy1QiMwmh3y;Xvk@;*x1-&ZqPFGry0BLYTpxZ4>kc}_74n1DuLNbjFggb zKsx87NooQ;gv>jTBrb|-ZY2D%ufo;{N?y_*O(!9psaqIG7y9@(u@^YlzDvY5!&OXA zA%Y6t6%nu;bkH`kFxxJPgPqY-#13C!cmJx{hI#LZsqN(Sp6{&l(C;@A!iiCzRAWz- za-bvCBBDElNNJ9JFO2WT znUg#DhWdb$Q+7ew;ODqVNp&0a%eW8=n3GP={pR*@J_j(I$%)IZ)LmzjB^4RH`9 zlls~MmslvMzI2wSjkr>GTn@}eAA3DSJhpn#riD`T;#QgGDuzc?m{c3W*6et8{6$4% zCqHSiWct4}u~rZSWoRO>NH}}4+<^NA{*93dmE`VS@AvQD4+izXjRG6oKPZU)z~vV_ z;#bb?q8DhOW6#XYJhv&@|9T%Z0XH{ZNl8iUrYll#fr2Qrb2#m(W-d@-LT;)V-w;k- z^Qy`}dfKwql*`-nq0;qFXXtA9t|`G9x|^n2IJT5%KC&<3@nib+dYOJma-$5@FeiR) z;-NS9Fjm$XZ24KCHnMvV5`{%cMjq{IgP#QAAkcwy!&kS#{n!Kyo0Yx&A|$Iw_dy^j zPwB`qMO6s@i63L_5gO!mP=&Puy!o`--YxFggAxFKf_Q37+M0OeW@q6iZ$+WX1Hk zT`c%>dfANAnd{RzK0|+sO9YEQDoeWiZELf)&Kfat&!tY5c?`)X1WBWY2(-Tw`UhgAvfwppsrnQw9 zD7>r(PK7b}Q=ZqmJ(YJph>DgkDytF8I^ z_$2olV~Uggg-_l=*YCTyxG%wCl3QvySs4L*?alDES3olu!{AV^*UH?Iahxw8jsy`s zD;m?Bk8Y99_;ObEXhF98u(CUFZPa2y07k0NN?6I(@cM z{&(1UvXn z-wc1s5D3VB=;=%+b&}X{(UroSQC4rw-1pv7owv=r344ft8!KQgk+IIqF102ag&WJ% zIJw_5tkIF^KsR8y_J`J#YWBOr_WB+pPJ&&4`|NCWkpm8`)=|0qSa-~UGjvHomWnh5 z;H#BRYM^gT+VucUgPhE|FQUp%$k_if+>1n#9K!*k#r`{Tj~1-I!?EUTFM4%Ai^AQj;H z`0(*#k=?2+jeN#)Kn(57V_|JEr6bMFCsiMZM1)I%ZZD|~*KAc! zeByTLY@t~#)Sn!0eC(hEFE4YD&QB*gs^QcRaLy`~nB*^SaRV|$%^dbW^`Yo0^Wz!PXF<5t#?+gS>1XQZ3Y^9fGv6N4 zGt`Yb?>Qd2G*nOr*n3iKoT0~$y(x>afAfsUYIt=;u^7GX!$%gKWsa)?%s&QBRI8l| z6>!R{UkeKtThAj10)@a*l8;RwopOK~KS{UII!)*D{*z}gZB#*Hyo(gu&;IF~dLOA- zZ^XsvD6nsw*?VYJe=YMZNl}^q!g{wGfFU4sqZAtPgu4wtaZ`*D9X^jX;mh4T7?Cm4i7%U?no0 zNSb1J@l8jhUa2n=Ota2kOHr@+n)~9`<%H#-dd((?Q zVq_!!2!%m`1vfEIVNa9)QT`E(tIt%gu<>@s8-`LD=hd{Y*Kgc-2<5Rgno_`(2HQL{ z8E7rsgr*o?`!*!bzr*@qO#z;YQk(pDZNgZu%4knYn#pB^DWJhA zrXegWIbHcW1X0IO!p}s+8K0*=;k};2yGw05rS_tAOFFMwCYJWC(CxE?Ta*ojK5|vhB1L*TAH|>_d1O z1(n{a6*dOo4j0nZeFQ4t(b3TiSVc!YkG6HPPaNwkL2*q^PL9mHZLnqp4+)z)ysZ(o zL1A(6Ja~v03(gvt)_v;g1VE2^KtVwP4n8k3iP57aK?bv1v%gAlkIHg}BH%GbL>S1qhZ=}o&$aS612y+#e&s0I5Z5Xe}WTI+$;vfqctSiC&VU)Nu;y1pv^Ij9qVQ75*d)%+{7Be}6 z6H~hA_a3kG@7PU@1#{)XKTN2xr z3dCuuO`(<-jU`$555+FpSeqMsv6s0^Y!Op6tK(&iV96ZFR>B5L=R7|v zfb9Vs_grRr0?i+1nWy(aBm-;p&cVSF@R2Z2F#&*9iS)j^yE`&;0Ki`kV62^iu^>o! zBbd~dVGWYa&J(oWK|voz)yz4yybNMikWRgkIG+MNAa&Bmue4+r7k1f9iSiq$yf zXy5n08`0ylu&{8tZLsaHaZp>II=p0|@il15VD7 zq8czV{aG6ffOG=V3@YH;fCd1p0S=<$`j8d*O!?~FcG?`ZG6rEZ0+w{|+M;zVFN1R} zl#<+AjJYncgYHY)r`rgWjKSO+r0M#UZ~84L3#~`W6wM845L&5dg9#?jecNh|O((i@ z$aio3{>W#(x@-Zdir!LZG*>Cou(vbc(hsZx(w-O;WB>qF@rE|g?Vy6gsB}hPt|3Q{ zfIJ7^NfVjYgB6Qjt@NACBGQKd?JX3wYc`0mQYNI>J_dX6_40z|C?fr+FC7zBj@(F zdkR*1=uMP+(k z=&Kf+n92Bi3Fmo_Yd3g5ZFRKlb`M%rDrHNGtJ0iOP{@pPZW}Di81xxM?rP#51-*EW zgy*55GM!QUxv$=9px#RlCTAZump@T)L56_^JMM++3Z|<0Aug+KPX|r$>36b91|vA{ z13m>pODOfOl9G~qR*J;UR4Fm~2u@KoW8j+pk{JTvGrzSJoSkhNs@Ez7)=X<_>jAK=kH~p| z-2zAg*c6YU^L2{nN)Cl$PzG zXYPB$!vTXzkrh=8HtgK*KP3d0S5#TCzVv3Mcuz@2<5GGaDAVu4nr8VPXCIZxwTpy= zG(JN*86HRI)XPI=xFoCRFAV==6p6J4pW2e`1zV;E`*Liaudys<&KmSd4ZWGpk*S45 zAAl%|7m{!hNp!AoJI2)Nyg+kx#c*?XBj74Jl9ScqCRZ(`_>1?ozLcejNK4SJCck?1 zDwA&G_1mP}uK=xoQlCyMGAcYRwJ&TLdV*-&fzAa-C+qop617=_CMNI%Hwbo-54Os6 zt8Mk2+1L+%Xnshwa(vnJ7GpyVhAoWqn%PWc8+Cv%a)HtDDNwKv>Yhxr&paLP5$zyFb|9}+mU+OWGi#efNsFaaXr zm-5Zi(KBaTLUy;S4Ie`asu@j1BQWC`Y)hL!;RWEVnQ{On$H*q^u+Nda%b#FbN&BNu1<~Dd@i({eWeZCS$>{9&C4w@9Gm2xParckX~zP(%aV2bcQ^8|1~H0?Ji6Y?2s|v5gC;H|@*mgqWcnW#`xa zYHRZctoOz7>3_b&9YH@E=`mByzcc~<9e`fNjtI>>jjCp#+F@2519-P1&*AQN+iAkz z0OvO(*f5IXcAggF>ArA#jAikfymN|0;o8>G5kBR3ZS&|YVNtFuS1Nk?TQ!$Q(&_wDt4@->FiTqUmr$N6af~Jx#eZw@Nir( ztF{(T0C-U;x6;Yn|7SgXdEG?+-Fje!7=~QzGC6*6Rp_Fvw(X!8Z)jvp^AN)UI!7@kOdq&~(TqxYUWLdJH|WSz&a4G=Jh#Wwc&hoA#qsRXTeuGjF}u z9Kr5&@eMaicRKN}Y@0~?8yVJhRRjqiS&Ci1bWOc7b&Gahb}+l=oEQro1~4>(4HgV` zY;XHM;L$2F*{w*EkdmsvtZxW=$pC1Oo29QCAx^70#_6S?A}gz4tt6uF_%3(+wsk^teZ zv%AoSB!`hEC$J?TktOuUDWQoixu5b?X^X>)m4(I6&#!)X7(YlzZb2E6K&UaS9~vrc zwTJWT_p4`ppH8bb4q}J0#En~4P&ELxF9EP~st~n$K!HHTPzi4XeW?j4u{2Ohw;4ww zP07!uAZ0gXtA}_PD|=4)1l-k)Ht~q$(bdb68FVrl&9SA(Z~}2%tBI<-{I!5=?e% zm}~Npn)rCec}GF$Ugj`+avh%7*|GWK1WoT^T#}1Q*V4pg&N2_#(0Rbpu;YD!U4lkC zx}zTZYYIpW1vnPy?n@0h1_5X{C0SrYVlWxZdJT;q;Zo268aC1$k49d*JAp6qP&k<6 zXG257nmd$H!DL%{pehZaj|-BsWNt73<4nqj2zu~i7Gt`;BZdSG@KGu z2{;$P0*E^MxJVht+dch)*)di}`EqCY$1F2jEuXg#kDNuzTcB z8FC=Y#JO`vn!z1;iT%A3+NbzPQRC8{Qh3idNXj65LNhiL=EmW?g1toT96 zB$znjQmO=~zeF!noIa~&tAXE-9A|?Rf59503AZoH>ffb+AIgXA91a#q-@z^%-#WfN zL+3+!BTk2Dc4gInNYXcGC~!#geQr2~Lmq?iCYz!F;S++U`IVLH&uwg2;PdJO5qT9I z9fy#x4f5dC!EAh_FbO7mStC%wBB}5BViCRrgF7f~*ADDY{=8+-tZoH^77Cc2kZzr( zU=7%6<*^`H149Sy=$=$z?7sefXf;+Wex4&w1G*kNEblKfc0D$tV<6a($nrqQ*k&9~)n$w%y>WRphUnXj7(`2Na{bQ^rhIhL>av zJ|s>0KO~O~zY!i8OoW$BQ#-wFKM-ue0ai{iyJ>VZ8mNv`sA%G}N>Yf1(pOw^a?9xU z(eCc>%j5Ac2@pAeRX?hhuSU&w%e;0$P5plKMF{1k(+SWry;c@ zIQ|jjo@b%JOKP)3Z}0A=z65>C525)vJ5=M#RC^hfWLvvbPw6rHvSy)pv%|Da#~W=T>my-A~F97uK_AObEg=U z3+i_(Wm1KZ^#3L~sClq-Wn&H(XU;(5APd+&O7uzym)Y z3cbsKxJNe1fz7Q8WpgMHS;K*{XEN+0fc@{?X@3tY)656EK<=!trm$6Q1E(-5WOo3@ zP%@PA$-wuB@!;9Bh|yw1qa3&)kShk|7693hEhYi>E_)!)6#p`i0jqRKntkoO(K?lD{uEia5 zQ}VuxH|6N7yp+#6L#*?4wO7nX@&$SvNBsSR~ITM zfa;UZlzoDJo1_)2P62idf1zC~=y%5h`H`pgI8t{0>!K!mRpp4F1lZMb0QskmrDZpm z6#N4MQb9z9g6fV7(7X3W^gcYw8jhVx>BF;ds_NQ(v!>XDR*CCg&(&_E;cP(9`Zf@ zERA)?1z#}yQJ-rl!yRd?r;9;}+b56E-pOCPN%NXoR`ozT~XL=^u(wWACC3uip!P2qWSUif*eF1m~~YLgBf; zo4eYBFMiYZnw7?n)H&7b925%Yy)|p74=7>A^`1p{R4cca+ zJb*dZkPF*Dkc0{M-lA!9kVYlL_C5R4FV&1U0{p^jD436+RT9cow1p!J7C)q^d!aSd zu<4omiCvz01+un6q8(Z`!VNncYgvnQM%)i+nvzJoE1k-9fx7=Y>#{%0dcbtZ*Nce$ zQy$$1C%Q+HiEh=bUi1~{?} z#GE75>z_mU5B^ssjd8*kau=Q##5m0{Q?8^FV2qL4ksO-;QSL~ z!n5X$u9$t?dn5Cz!Y*^dO&)^ZQ>|AohWBQ}yI+39(t)OO*CHVZ!~;TMv6^`fH}zgF z>)XD^u+5tQWk57Q(;muEK>}Ymfkke|HhPngnGu|_5@3Pa$Bab6BqRVZv_q}0VPN!w zj~q#J?(MNATmmL8EG_*G+bZ%_y+&?j(C@=)&3uZz0|}BiK4}_6_q!p}XDj=ay8BN) zIN6b%;`BtWG3i`WS#|!(>Gav1T1tNC`0|&#DzY^QT%ERC6I^a?ZX>nsJTG3nkm9EM zN2mawSZ2{hNXO&HM!52AzAFP}^T(BV>c_0y5&NF%-yOrfgo$24InEKx5Iq7D4}pM( zTf%N>il8hLkFALv!~O$`(n*|9fTbDp)G-8Z-6 z-4Gu6hZoQIr*VX+F{nP4ZtZ@1*`_4Eduz!;xoz;<`j4Dmub!>pS*)uacQ+1mZPARI{nKPe^h< z9;u0xb8^lu$!GO==vq?F&2GGVpBpctBIH1+!evq|T7kDtMyBYpMEb*pbn9B9B@=;P zRfJ!a+|)<_5xH064>iuQv3X;q?e3a_9qRVWeW%@3sPt+!$dDX;HP=t7V5H`^2 zvgH1GShJ+pv&e_?ZKMIj-QE$?M_gmk*d2-QcAU{MS*yRyu zq)qr;ZNH`JyMW~dSIAMg44vs2nf=GoZ>Jm&QC3R0Jx&p-!5)=(@j|}ss;}2q&yD#r zeO1o!7ft%?x!pM<#qHTeect7eJoEG{B*Vv`dZe3T%wdi-%4O2W!n#d7de`gixiE8z zIX24okMxE=m!5rb*qK?{l;`F!`#JadQpeI(KxEQmjNo0?fbM)@&8sJk$s8*;E}iJT z4LbU_<9`m&X!*x`l06j8PS$LCJ|qK15bkn|)zpnavrZ_n0udIM&>p`*SNY7Ot%w<+ zr)OY3bac;PzcyF=WXraXmDQ=r*3qXJsC z&FKY4Rqkxw+Zk_g`ej%6mM;qmOTQ~g6Y~=vdm7exU=n%S&eMv6(gJ>9q>Vs6Pdyqg zPU6HQupy*Fo(XA&p3&D+?;fD*V5aOWz%i&lgL+M5k7lHwHCYF6tA}I+JEPW%RaOmign5eaNBo*5XeNs@$opv5UF7}X%D;h43 zo?JB!NZAaFmKA|Pf@?$<5FRMcaPn_pV^c9P^(Y$wR$>Fc!2&AjzasV3K!o&H5glP; zg?`_Gz*AS0X|)Sylf-vM)sa7tothv^-{}=5kB)`%ep{tzUVygu?GLQUb5zlYunrz6;*M(5 zh}4E(I_gLJkH0#^eZTNy|B|OcHvDPokeG>}XZ;WIe?)K(q&2)tew-M>(KR%<{%57c zCA<>5Yk}|N*x`2fw0QJKR5rIgJeIul&+#rTXBPq6tNH60O3a@JacvS$YU1vgibpCQ zdU}r-jE^|kF@_I&v}q+?d|o~|*pJ!vi`b?ivU6L%#+h@nOmMqnqexO<|MXuYg+;RyowR4GA zIaIv#$#SYHFI;Ryb>VO(^eQn(e$_T|$Aso1-^C?T&9avuI8G_Qv@r;WxA#gy-m zVAcoAd2U~f<#TTLa=p~e`vyZF9X{+e=fa?6d2X=RKFdUe&+%1de7){(hee&#g zy@|5zmU~M;Dn@G0=L z0X`yGM&xYg@z^dlY$#~W9u)m|V^%*;f&p%?n$TXeOIB~PTeJk-mb`Z*LO$XV?1%>W zZp}~0c3?xHz^`^11aBoWLI79}udzXF-sOeFJO)hQK#!mvLEU2%pDR1K#hCbiU{)aR zLL(ZP?7=tz&je&Xi$uZzWCG?OvBwP#4h|$>eg{r0yz;qTJ|#wy<)xsfYh^1+#(cJ1 zoE70=cDb*{a0I`QME|8r@h(FLIMBjVf&4jUc=jbOP8>i5aDZ#LetG(t{Ve`t_8)09 zhkM?0B|oE3Je03!t+iHO;aQnnFetJaII}S$KQ&^w z@_?Gphcx|SrZq02aZ>!3Hd9Zb2P^-{s@nX%>4D{Z$&f~d0>7h(wwh_I+gIi;sbIwh zgi$avV*VLQIX5>eyh~{UuYMTH$6qbWI=3uF+se&s=4qb!SL#f?j9Cio=4%S7%NUl@ znYR>iyM~n5psrOsfj1f&F|fKFS%tgjBl!Y=UdRsq(DVKbZljIq#8XEWYHEKNNNcsw zdKc`mHm8V@-i5d4&rFB5$4v8OfO42PGB*eD$Va7cabaN_itGXN@}j_1OvvlR1WD2} z5tJ*{Cu`YIym}AC)AN$o94_@+jEs!MEwBFq8-AYQX%88T4Z95}eMp%TVkZUsUgTa8 z{dNvsRaVanw{TnFZ&KigoYx=(e1%Q110CICK zJyco~eLYgt(>~e}Fr~d3!xQ8s`B&jhH!~P1peiNM^6{?i#t1WHr{a-6ZjnTlpdO0yV~y?zOlTI+i7YGRniU!frngKrb`3I1?UsG39=Cl;O<1-HLGVpQ zc$e4x&0~`RSLNsmx`B**2ss-!T-YpLQ)37Z5Zxk*XP2rGt#uBUJ#?y?sN-P7=dk!T zRdgnV$+VId+^Ja?H?9(GCSXtC+1)qb`q1(1MAQN?AfIM;*6sx&fsj+y3_Z)KBuO8d zrZpbs&HF(7_vMi_+oowuiU9eWZlME1rpeTAk2bdxl~A_italP7x_%<|l*v#0)jS<` zggLhqc{&D!898{b;P-}4tAJlY7n&MQ)Vd=}Y~~RNOLZH1N=nMIOY3>@)5EIEJFns9 zv0oct=1IVKLP@z)>p|vJ|J`o24+zMGOtpc8rX|Ywe=&kbWkdtqNsTzQ*ke_W`Av%; zlR?_O$VE;<(h0q`{62kxBOqO<+r(S!7qHgd=K=^`2ed=OLHELaZA~-2^7`FcOAiYh zUaJk3a6SA(mcWGCNhkI1g7oM?r_DLFK6gSw{?d*8mcKy%g2_sIkQSkSv)gSQ3bi3c zSnXT9kt?C`j;1v4=SW9lxE({J-1OE`$Tc<2-Bm?y5AF%&<*C3KjIYKgR85ml^!5^z zlKK}YtBd+s_P+3ZwO?*t&DMYXZEhjllVvjQ;^y4iN~eus{tq1X zZguSpn9PVcKj}YBp1nY1hA&Q zmXrd&Q&LPm3%T}LS;ndYPoK^&F0RYC@qJlV>5Jje`QIhb=jm&a()$)jK7iaNyx>r< zuy--<{>8Vd1kjjXt;P~>zhaGjAp``;&CQJ}45~QM;VYLfhtv&yxYhH0uFR^|d|8`r zmq7oS^)jEtH#+jMTLqT))5nS1X1n#Mj`en@N@=C3mQ_2wEay5uIdf)*ZSqkGtIE|% z763Lg0VP8+OXoQZ_vGfL#eQBCuv#!ooT%CyGrqPieDrXFFDw_ zgrQZ5E<46wMc(8wv)#LTM=1Srm+-9R&GM!DD z&aBT^YNyFo`*d6PQjdnNzEIQ~a=A|?P6=ZuQ|e+3o)yjgV4&Qa%iS~0Qem~SmvhyN zC&z}6RfAOVEmfZyz%ED7-0UpmDV&hYYz#bOp*sBYrz!j|9{^-sA>6PAs`|ziqw9t+ z|M+lY3DxCM01shdC%mW8i6!eaxMG@3)vA(dei{Wu(mW0JT;$_|oyrma)93+Wx>Q|% zd@#w@~xtyQjf)l-&Xt#yqju+Nz9N#JT&#Hdu@0BsjDZ!il zTz4_FW+x5Jjy%B39s}?ezK&tPM+w|C2s)FTqN2d;0g;v%ys*RvQARF7+6{>uG)<*p z;xVL^+-v1q?(7I2n5wFi9#RS}J6zd%GIfYoxtZT!qvF-Gp;~M2t;+mG2Sr;_?uwu3 zC~`UOcJt%Q6JKt8pc|#fb^Zi_>MB zLKjvlSla>u?i=*4W>O3{WgT9VU)|!t)D(7SD3M;1xfuK2w55I=T-_Q}75h2j1Zz(D z#UB2oKx&SGZ2o+X*BtI_<0>a2BG__9E>hi&l<%DNys)>oXpOpH=H*>Uw%S{aKE%1BT0I-z<8(zU?Uvl~6_4sd7IgXb^8#h_<3otU zYYWv5_rTi4pq_(KG2zZeG)qM#C7UE9B#356z&^{Uv=7iNv67`G?8Y8I0YVr!qOOVn zm9LnS3hxI{Au(-uwv=fGG8GBoM@y8RZv{6J(mz01KvMQ~7UIlo-oE|R_|AXMh#*PG z`MH5Dh4HF(L8gbUEC%;nZ1GyX(3$wksqSlAMybz-TvyR`Yx7$7bG~^O=S(*jxoXJ7 zbB%av#*f8S*n0gM*hde(xL8ysBUYuHap1;9H8Z!Cgp{PSrSYy;0t08HKTn2?CX1r6 zP#e265zR$GasbFHG;4WV~VNW6azr z`H9Og!f4tY^oo^ROs5n#zaM`@Dmr|@{r2z%qm#UD#wu28Yp)`!=;;^8h##She>qzy zoNeL{@a5(7R8dt6y+np=#Bm;08Vc?GZ2`jaZL2^NUnW|dYD|%EdRIK4{t9tZXPH*h zw*R`A7#%C?bI@eewYB{sl%g7-i1m2<__0x^x`6|)c?yBM?MPeun$P!-_sM*BrV`C! zdBxov<)$YQzTxai${Xl;7qj{dJTqu@!rMwCzHX1*A#9vX#%5B{+EZPn@GvSnG0&2~ z>27iG?%y=3bcjZNNj%(5{F6ene9u9-{^vhc%krN~?(i(i=_qXOdpoCgI+}cwBJa9# zUxyE~D82{Xqe4v{_Q^6);z7B!+gqLOyra6r!JuPH0{rtE|E_Q8etUs^i<*Z@Y5VC+QHH%;x0X)#Og2xCx#>k+ zJxE=gW|n_7Jg-V2KU1Kc?acR4DdD2W7up#o44f5ok6aL`wxKpKnt9(CeV*A`fi?6o z@3^P1Ui98&O25K-F~+dF{lN{Vm4%CXs|@NHbj5_<)0|YK(`YnOA9HR?!sUCv4zJ112=a@OaLt>S(cTkp+z_2UMYKM%0z z$n3UWzumCEqTaw@yK0d1-Av()6{AB{Pw$-z9u+RU8s6n?PqQ2mxNX8vv{Xu!)b?9p z;3JOHWBc1Khq8C|hg{a~2va8#>XQEATtp67P3X z&x*r8>B2gyz}9S)J^~Nf5;hvdrk;t!u+r#?2`NTz=)D2_u8r}}}CgPE+NB+MD6qG)~ znvy56)d#0YU0+`e^9&<%b9=!W-+&8J_D3d_D)xwwOkh+8Q>d;H#jF+MZM*mG-QWnY z7L<=uHL3p{k5_wXYl-oAs617n&^}&RBk+v*Tv%t9=-u-%oeN}vG|7wIost!&PdgO# zv5af(kCw@}uNl5>c(L`B*-DDP!n+q78@Q@hezYlstI4rz&jwZZcl7p&ryLr*Rb3O7 zo7S4HW@h1WPn(yn%DlS53!T?zDLoX_hxlz@0W63Ir4W;JY#a1USjMQ=7U)3O`}eyk zDT`oqBPu9Zu*W_9?n^6H$E~NM_K&puJ7Ks#nW9}4KuW0Vut5wF!-3-! zw+THsNsI}c|MNwIwdw0V)G*?dx+|J*4eYoWRV+3r*XidMu~wF@8S#ykcYb{L*P>jy z^(G1O#5$X_V~x(a-)D4wMxQp-b2`6KIR9j3t-IM@ zD)Hg+bi5i4xMFX!9#itt&;Ka=P#aCbC^jVdah)|@+7{!DDS@&lXSG%K)yJ|i`5{TR znx2RvHE=pQqncBJQOQtU1O>(?`%*BR5dsE*)h1SDgxC$h&S_7Pe-f%!BOViI`Eiq$ z#HOwG%(T275dPd~exxSnAR(}eT+}t&`jhM%C-Z@9QXM~&aPtgL_pxmUwp+|?k_~BK z+^Qcj(ZnU{sUexn@y2ScuzBP@PltlPduYr$iIbG22YJRnOiWx@Zb^Ge-?TP8-%U<# zmz#45!n=9}p`h@b5{n%G>Ly{= z2xJK)oBs#)J@V~;VBh!Rfn~5_0WF^cJex2Gbb)Feu(7dmD^~xF%RV87KJP||82Kxd zM)(+hDdQW6bvOq_j?I9GFnS$Wd02?<&RTnkeGVY6xtDC(aVP{!*ou2FItANg(h_t! zX!bis51fhtZLGR#yIS_Tz>a4VejOxa>j%kQj_t|o{}jsk0O{$(g6>o5As)Bc8L{$O zlQCb}7rFS=G*ABpW<3Ugs}zG0LpbtdN<*AD=!(0yTwQpeL#`+O*tzh*PxDK`)!S9F zWhGGXX6ejJolQSy@uHR+tq#c_+;&596#2!KUHXIZ4-Sxb+|xB*1C&TeZs3rGp&)@9 z<9Al*$Ndc?x+Sav-XXL+!kXg_B`s-&5|Cem-*x278Bg%IFxy`!?l#)IZCekZW4t4+x7JHzK)z$ zU7Wu^Sx}y0`MpmeVBUIvTHJuL;K97c<{mqX6g{(!{NdAvV-|1loV51li zuv$&s1$V0w-(IYP18vnau#{`Sz6beX9?DOcDCH((!J>lTz(UFF21kaT(NWRMmmd}N z5UX2Y`SG7l%2Gi-(wH_#mT4~r6g`FYBuqZF^P1tOIs(!l%}LT}V1kbN`t|GBJiBT; z+pG;qfC~%-F-|@1F(zZaa)hvT2g%1{0G~4N(Uv`6 z?IJ!Wf}6;L1#W880E3Ui(SLMQ4}JCmC<^fBHcJ@4y?JukMr(H-Z+nhq?NQ=JR3G|{ zWc|U$il<>qM||rzRe_JPFDY$(WFk$VX}mW(3&5mk>r{V`D+= z1O}yP}nTAKmUAhfRZ-P*!!8U)1l$4YZc0e?nXwc5% zec1-CpCE31$J?t2MY{`--!T7V%^-#pa`1PyAA>cXQumIUtv7f8q7(*QOI4fdK)B0L*)^ z3o9!t!_2x5?sFH%y+45ROTXp2_Km8;=T7Ey6g`czc<%FpREUunb{AWPnk(}a4;>5yqDoBzHzn&oC+6m zYT0*2{}6~JBtI|T=H|_%aHtw8D)q89*1O83@!E%pi&YOtL^p;h%x769TJAZT;rP3y zyCHca%%yQInBwgb$*biCY$S=%TV_+{|~T$O-k(ZT&EZ;`@a?k+e-u%XDY*|Y8F z20kH?i^MN3u%+@SPT&6Zzi386Vq!P3wH02@{UAzTjFE->NE}WE!O~wS=y>UzRE2y& z_p(@4praLT7aQFd-sspLu-3PM^>MX{)xiCarm+%zo-?jFUqw+<^a!jiP{8=$-JK%W z;%WH*5ePh7qi^QeTH&&TbYE(c9TRgJyItro0;K;%Rn%s;U$*q-fic@L7MA$WcmC1f z1(98n;(XUMKyMS{BP=az;b)&JscvRr9v^5nDM305yj|v=bhcb(IlQLaiiXz1Z!>n_LK90 zKjan@5)#xwn{sX>cJF|OMn^}tk(!zsHchl5dR~Hz0u=A5>=}})GD|xT1lM;_zvoI`z|BelXS_7#V#J_fzQ}iOVfkV+py* z0OH!wW5){Qa>0}b)0zzf36SaqwY2D=t4>bu1O=W&?;8sY3FLEZxp7G_rrqL*?}H(l z)yhoMX#A$(N@vbeRRLO-H)&uEqMOVZe9uB6rbU7 z{yYoq)sNB^YZZYjQPKIk^X+ro7JYY4kUnfUty9uim2;wyX`h*Sqtq_qhkiGd zuGazsH7C#cRbT5}YAz0~VmA#)DvY*nrB6*u*AEmg0Yr?IEicci(oj?TAH&qA0VDD6 z0|U25H;p7ECK8^zASs^b;n^2Oj>8f`fEhs-9QR1FS!z!jMhDgR+<1&l2!H=NqFs!* zZnQhIUouh}?wK|3bV1OIl#OW-n$a~hT>FYWqL=N1-q3}Z@vPs%c<{ds`kzny$WrUV z)WN*{bGDach51N?!%2N4y}bRoR!<9xBnNNVL;p3~#sX8(8R^u2;_Ki4bf?!F`7K=p zBL$V`9kXXXyCpn4p}!cH;PhOyGf!w?ssM%Z-q(=c*#loHDlUiCyp*%SXaC&e>*G@d z67_WniIVpAK#YUlNk)ne6PVApZ*o{T5-FebUf!ndpI=0LVN2`RJ|rrLua3<(x}0e% zu0_^rI6pkZLc4u zrgn>nI0^S!`T{g+`yT3eV z0!J;vx`p6Yq1-_6Spqe|iT~YMTg{Th{>t~nLo>3CoChp>Vva;di>4hkpChvf5f;gN zmN_v|<$Wt4%QDco3uU}uZmk^sy8E(u$>(X29>fYpr5 zfFWF6t9aztvA;m3g?Zs&5**Nj5N|$j@}Rju1=N@XNud(egXoJ2;&j&yqlth;+AX|W z5|Cd>+_$JZqTmH#X7+OTfAAcDi)^kbPh+@k){9$?XtLE-h2HQgNf0!#7YHh)5I9^N z(O=A`?R%EIDoqSRg`PPuDq%*u8;TC|wmD*T^6;>0OiavV0KD&>H?@oK3mMd&KY0=> zgHGCzA`@GFjBL=s(<2Og+>`!7NkRycQ67gP3L;1C0p1!FHL}N3mN*=LN0rZo@#jsr z)$&NdbiH{udr>Cv`nSa0)y2vg2}?pMI=N4H`=Zo()Reo~o?<2-IO zqe>=-yPXbe7dkpRa43yX`kvey`Ol{!V%-b1n%!Uf=^aRGt5+TsM{>}vOdL<V_RL-eL?~zkbNiem7}u19>JzVV5S4k5F}irHOD#+{>Nx;k zU$o8~x>x?!`B36)(w;ou6Ame_8N)8?6}7ZGn|#;)@lh$L_eX=Yc|&Mr&A^RyB}$@q z%gYKyH{UOFnXYY5|##klvXIQzW4UNG;9Y7vS;7EtIk^~F-JJT!0;t(8Kft| zrV`V`+7#W}aOZc&9wPZUB0BSpq4>e5`6N5LHv|aSdn1HWm#T8b!3i6dA@d^%;I;5X z=)wqNK1}eR0_c$?JxZphkR(!hofLhQhwYGV?J5Wr1m^Vc%cGrZvRbD}^>0k=KQh$I zG@9M^z@mZG^zx=?AB%Y7_$v+zw@q6-N$!T`SJk|U8u0#>+|5u&&q4R$()T1!xB?Nz z3IC=xs&W`tsY|92!k40=T{wcjOIXlxD&F9Jq}_w5DR?edKOUd)}k6G5fA3CM6C7%CWv_yIt?7g>C4^!Zck=-@_zwf zpm?XJ_^zn)8@I^$)`V-+yhTkU?8@QydX@WcstM^;h)K(ur~5INd*r4is)==6a1;ov zef6Haaw)I5qaz5V(rcL}SPh?uTnhO=a`3fM2mvhL7+UaG=X7P?nh|0GkOIDxl@)+o z%FLX!s`#%r%jCC=mS$2EV>cNm&Ke9)e0T8?e(ED!R5bFm$Y2dk2oVS%<#%&)oBS_~ z-~l?l`XU6E1FhFEzQPn!EAA@P|8Sd&zz}pQ!((T`v3+Btda=>T)L^ISjU66p&OM9~ z(XhK{@*BMZhq0i6s4JoGymP@nEv~MHI0NfO>2c=oLb(za5s_rKWc~@uP40{lRe$w5 z?6Cy#s!#0SP!gmd`>{>hWy)ac_QJ(=a^aFxxzR6+M6BFt1!$n#fiPbjTxA5)QzS6Q zLaF%|t447bRexnk8J3IJQI-*l$I!5`+{bcU!hn^^>o}%CxGvz@zeB-f3Pu+y zaX}5V@CR^(phnn$>Yh?I7Zk>;$=BXsf$aqP{TanV=6b9P=N@PH%2+lwf)6^t;4QnLm?! zwlZsuTk(!`hx6f8NdQ4bmLjS@$oBoQ+85jjQ~2+ok2=cCybahWNzaR#1;`+`BTt}K zL;Br%5h5U{JBknH;H=cTrXBx8{MI0A{3C)bg{p?ww3)1+AH)MHHq0!Zc%PKOn-0GC zvik->QKn}=&5>a%co-unaWjv9CF z5ISv=GiS~aDYUn@SB>QW$^W4@BmrbbDEuFeZ6YI+482MYA{psLsJ&3cz5^6j+&hI@ zU+$A>WKqI4$W8y)Kty1|ao(hVJDwRplE>BjOYVT*G#0?+UBDte?3e7pk|x`QK0o|w zzWgV__P~DM17^mvdOQVLZW8i5{0Qz&rr1QReA%9PDWKDKYS-o2-la}kTLE&xO&4CI z25*umoCh`KrmpLp&9KpD0pl9e#B)0IriU*_kS(0uKX)R4+l0&!Lx^4b_g_PYi-*S+ zS%+PQgcu&%vC>!YGWYWI1?D##2)gz<<4z@F2QSJeI5rZi4gjYz{1hvnhDG7w7+JT>Oq-lOBwC0R_?8w(22DR5 z(-wxyZZ~w`p`jWN&3?y2LaH*<2Z!{=M9bI`*-OiA^~jXPI7ZRN{H)-;H^bAm?MbH^%T5dCT^12p3H#6S zlRUkj_8>W>q$aj2*hala>gC$Vq{7=i9kCrx?=#L@St>41JHIR0wAE`(^yd{&U~ae# z&rB(4SB*r(PjYePEE8@b6avdTnVFe64Ld4u3J6Omz#D9h8FQHqke{PJ&VM&MQTpY} z5q9>ZybBes#)zx*5<#MnJh{rK)K6G2%+JN)3!-(%$fdQlo6fQyC*20G04&ELPa9h!Jekq`~tE4l~Nt(Etn?|YHOf-Sp4Z9EosvId5iZV*oQ{}%pkT9 z`#(>uWLc*SZ56~ZrY|ABSk-kDk0#fD5t}W=FU)^W5qz6hR#rBzb?Fz41E)}*1g6%2 zSqSx$HXG?78X9pFxu`z4>{)VQU?M0em=3E%^fdGHmM%LEml3`m40{R47iC;W`r3|P zeJG3~9R2;NVOVzu3v)!@a6Co*ifPO>RJQl;-v@e5%A()G0f`xD@0qpc2npI3-ri`& zb^x~5002a9NQK@@0_tSUbg;91E4<;bt_4+smu3L=O9;S%G^adErNhj{LukLzlJ!=S zSp!dZzQtWMK6c!HhHjUFmlhXt=4??2`0zGqgU`I~*@HzU5h~Fz$r5X@7k4{!voYyX z9@Q2oW{=-q7qi%-To=r>7I1C3l)?9$y%&#=V(p6mPLiJrhjUjfPG1vfcNGs6(u{dG zGpWe<;BPL#PkWBw`oic{H{)|dJc0a&MBejIhYMbk^?Z|?E9d3rv}YIVtz>^rm(fG# z?!P`b?=iYLFL^ceKuzi<5~;#{Novzn!kLy2K;H?ZPE|AdvKF6~5gcY9puhc+j84Mj zHQxxp$=^w0! zaW|!(r78v=qk<~fttqZXdpr|7FJq943_KJP^(vN7S`k74`D{aG@0_K8pB39X#@m8l zI~Bir$+c$4l2UeCYbml26tJ6(?QTvxJbgSm~?sG`M=C1b>8xSnv+h|kAO`HpA|;gb(R>uL-w@`o7{jdLvZahIj0M` zA|$~ai2fVYDqgX|Kk%Ty5ezh_5#CXI6U_%I`9auEz?}89d|&^v{n^pE3Br;B6L2&X zm_QM8Ffh6SVK7eRH0YeeoU{wv>rHvsNCGf(vehtGKC5jSmbt_RKf|m^mn%)bu+U{i zJ^yd3&r@O~BR|L`yt~r$t)+#aL%GEwu%~@Mv9;IlPaH2HR!*ZIkpE=L2;9E7&kY-^ z(DouiKfnNpSa0LzwheQ1iSymvgk}PbHhno=jB=(g(fQ5QZP~Gd{>TwWTt`4=KYsqa z3*VIDMU3u=kBW_;b_9fjy6DZha^->)V62~ui;3(5k7oGf(ZP7*)$+yzzcmuSTf0h_ zw5p=<);-Q*@ z;}i#S45`j~=ByRBWXf{$oH}&%VrC0TJ#CH4j%|-?x;#?H)&7Ntl08C9x!}w8o1R|ahCpc#LuI=GoA}8`@h$zHr>L(hq|kOs>z>F zvJ~IFZ??AoiE^rkrpEi7a;fVQ%2Rh`>iM&BZ~mI2vZp(@Ip6s#*Uq`I9jl(%1yJ5_ z3<*UvC7Cvn! z$+B-K@?=*<+eEDi%48|GeKYQMoNQ}ZX&r1$J}q3--n6#AZu5`HZfya zPfv#}d_L2vCAunD^NYpaHop~KaAiRKa746F#UL5MF}Ij+20+3WV7ycJ-`)dNwHtmB zc=0G0Ua@O?CWuBgyg79`rf%r#(0FbRmwwA-(ArVJFE3B8C0Ps@-MYQhQ=(evsFx6T z%#Hc4Z7&qY-@H8b?za10$t0z+uVum}nJty}nmXp?v06HIaaQNDeHdT83Xy;F^yxO0 zBJ#?+&nwAanyJ}4>AVG--sJq%1#us z>&(TT_CBb_`mqkH2dxId7Aa#=wPPw7>FX=PAC{xI!olxFP03}jF7o`R?@;}-ehT=z zlQ80y|0R>*xYO6y_tMw*UCq7tKj5ZMSLc?(gJci+i4U{3)jsZ_obO>nUdGn>e*iNABZf^z1t{Yh z>T1~IC%?ASG{2giNnp_}7;^7im>J!|llC*z$7W_aQo<{h^n~*H!b!J*FK4r!dW2J! zMh-;vQa9_8Tv*%OX2qR36y`C+B_}_*ptp83+OxA_IYO1=BkiL5Fm-`THt*(%$@9*1 zMVHqW_tN9)pi0+lnjQ!hk76ihV7caCPY<1Xt*p4Z611FcR z5gc5N1K{Q7x1T~lY1c(;DY_Ca$%>y1WJ@EHsjIF1sC-cO2|6ZJ zGX@3@WRKd~+Awa3elgd`&P5*HpyLy=5?E`Yb@NL}&(qiOe`QbTYfaer8|jTzvGcBa z3W@amWqM|{(tXDv(%x|8;)0G(!ave-Dh{lkx^39ICufj6wxgnFh(AC26~`v2luxq- zd0~_jiAxSoADP)~S7g?86%k=P9kuu-+2EtVhY6jzfp1EqP(Ya%%|9Gu{baa#8`p~e za>2SBfvIgORR(-*iFb4->)WDT=hg-$MqW;uE=~tDCYQ^b>WnK!q$b6wMp~}5u354F z<)3~0b<>&KrHXaSZRnO4m3=f(E@tUn%6^eE(TP2w58wy}-AV>Fxqzd=0R$(n zK6y5{oRgDsA~P$C4@nLn&OgDK$>SV~X`)?>g=Qt^0Pv3p^Vz?yo|0M&1 zGbDi4Uitfb2L{G0*YNZ5EfC%v~I*UZ9+{B)`0LE;5c~gK;DOxSzFOp!i`E! zgBrS@1~Le~l~64Mn;{G`LhkSzl5f7vb|U0KCU_s%qf0B}pQ>5DFFVS~Z~&1CBR7WA zr(+Vgre9f`%`zTW?G3oz^JQ%%Zjv+$mmd_K&7>2BQ}Df&@B{j%n}chSkm zx@1hVLxewxv0qDkVLx!S{~V2R9MvnfWIH?DCArCNJy{n2!U2h0d=6XlA6~HhI1t== z-T#AhLt~(~g^!F;=Zl32%eK+Or04A?4>g#DzIQYh?+8|m9_UxD+p?L9^<3yr-y>@? z{#^0%p{IqCjpt5;SW%}Q`c~P$KJ#Gv*~>SkO^stL*Q!_KCVa~)4=HE7GKnna{8{Pc z79&)&w6;k3hUffx%1i2LI*W$)@>UGKHaQ6OM@T9!8h%-dc)!5)-XSHj?fb=GOtt7P z>O~`t20B<+SlkZcg84{otz_Q)}X+Rzr{+@Y+tCJXsg3%u6C3#P>8fIC#=&*Wd4X_-VlJNH4f?j#AK@$DH^TZbBr7WblvShQdPYp~zCLTu(I9rxfWAtY5(437Y{C&lJKYWMW9ih4 zs`BzW?h&AXFMwhiqtOcK&iaDFaC%T)$az&C;ZB zYu|EP)?fakVyh;r$CfrE&6el~q+9OzaDHMo?{yE)%!og&NfJF@ea_?*K#3c+- zfmm+JL+uF|Davp4FZIre6mHYOomZ}0xmfgi0*8RCGdff<@&Lc>d4z(H=xBGKWmCX7 zchS%o{FEc6J=k)rZOZ-kh^*T@@mJq_?-4eF62ndO>~abUk~eQ&L=8kxQD2-qapKY1 zQtp~Y{$CqkzI^!=D|0kwYMqz>a6@Goc#YQXB_Nx@uo(Fy7wup>SW8%d`T=;L3k9;w zUx&d*1bOB$Bqa<{;i`5sG6479*Vka)0~rJeXPdh4`82`YA|Z?$)y4b_$u?rZ}Tq8tzaMyM5>*_26*Jr^CClH+8Q`Ug_#@uaSDS_mF2&%mtHc zq9G;G13!%JyYBn=^4#T-_bSN&u!EKrVYZOy*i0Q2md$qJ1?k$Widc=@?jO&{v&s1! z=4A0dO)ciGm4@o`d@YE1E#C6xcK`HfiD5Ut0ZA3->|LeG?+r&oUz%tMW&YHvlIj^^ z9pT2I{Zg4(93^zErgE;@U+`xoMc&kYtzEMn9B*=29*u0|+B`hJ`Ighw0~g5bXsmh; zo>8vs8}{IBozV)UI=Io&+z)koHJOKMHmKbTh?(I<5w*MB8b2K+f%+aH$#%N7?F zY7pU(aAEv?7rx=bEnsREz2cS+ikZD3op1*h@h(glizvY8j_q}(i$`NPC+bMpfpQGGE zJ5RR^8f;r=ZI(*b(N*edXe+1U4PvRNxx3_97rg1jz^)VisJEZr9N6kPncOnjV8*eAS zsPV11;*d&JZdKp1ZnD9!^V70)57Tw3HgDv=SF?slSs3i2>>le@p_8~|v@0@0p7;@k zYS|Wx;g(9<#5mRb)^NssS~9tNI5ahtzEtIv7Vmic*j=FT=UjluHwA^J$kCOM)P;;a zE?Evb{%e`dxmW`B8iRTyahAxQr3ugp(66iqV~XiMA~{g|+D~S>^%nP;f3)Ou>b~&v zyLR;|8IbJ;1G+#sTA<28XfG@&d8}1wYM2N<0R~HIy1I&Evui*=G3$8)HT=gG)x0Jb*2*f_@bi;MZ>6Y+pQRb%nqwwl`TVqdbvZ1b~M~shIU^#E|xAw_{(dQb}H4QGcYFW&!Xc+1{ zWv8r8Y>-NM( znl;@{Yn)wC*`uhasHZ>EnXY#KekrDM1OXIv58reJI}g5vE2nl1e`P7KduX6pJlf z7Op4nC4yu3bY-_A!-LU~Ih7R>;)sW1t*-%M4jN}_rHGx~abkb^0UzMG3JS22Xv^#9 zAZDVXVrFY~s>mh3O@~xfJIRKJR7=)Y=L9g%Tz?TO>Dn_bi1F96XBV}!v{EzyzhJ9G z#*{5EDI%5t&}C|Jb#{!m`kEU|Cp0L^^q5-4_H8sy+_ltdEBo}Yo*n+tw zK>_CFnGyyv2qrc7Qhje^qo720L0IeQ?|+}{z)ZBbi{)iyFn#TV z-!9D2!2Web49TA+5M&@o@d*jv!KC;MtG})V*30(x{IJEq9HJ)2&IZ&H$EP=nwl=&e z)~*;)4E*b_bp+-#GZT&h3<<$zC724JYZDIP;QoNVeJT9oH@1whut%6+)qx5PV+6Bd zMJzjk%g1|+Ag@;EuZ#kyia?W(iOV_D+kXat77w$XH-bXGHKjB-bE6eEx7tU}eCe3} z(eP28CI^hlK+}bLle9hc`n~)2OUcU0(&d7258rP%J~7?L-sk94av*|;6IkNQ;}BZY zX>m{6qdtsagZBM{9#c%^N)JMS2fb6>$(F(kpo#?Cv|0W?h`Ez~!O90$l z54@<;P>is_jsZOg4f;erD1Fl$`_( z1fy_no7red3v?Y|VMpLw6O9L4Yw$O)o0PIoMpW`gD~RoTgna0{!zv$EZ_%URL}TMZ zL0bjX08Ek%T`*Da1|T^As#mO;9_)ZlDWyMyy|`G0FaA3@xdXt~g_nR(l%0klfZ%WD6$r@JexEpXE%$ZF#7=K_Z4#8{s94Au2J$xh^jL z%LOc$Kk#Ib! zPjIAjR%R73S&mEd7+%Il1;Zsq_%Koe@0MRnXg%sh&Sr;2lSOYok4o<)?$-LR=tj^t zVjAk6$dky*aOMoOAk^TPX)cqCh1RgZjsx6%EA$61brCa2y3nOcO__skepJYX1>-%Y z85G0Nl2_rz;~U7zI>{iFT1+w(@lu_UE;}@)YJh%vl zeTTKBPMqinOnOhrCq4pbp;{`{x?F#kKnyo-OaN6Rn)iLHe##G*{T_fw_`ZpK8>6t?E;}VUaRs~ag?2%J>|b%4f8X?F%-1T zT?Y?tkzS@JmT+QVeQF|WdIw&}hr~V4uZF(OLN2lbQU{nnuetekT8Fh8lA6){kMBCI zU$ya#8X8f>rFkp%5M{yD;l{2A1m*gA)WcCqpmRmBewhj(40?%$Q>ZmDX2z)iSjBTQ z;Zr^KBqAN;3a_HTSF!x=Y*U*C(wCH++!LaNtHgf;tT1#KTNTv5JUfafk41u^T)~J? z#HM>-wARyvZzXP%Omk}b8E_p3;f4$fv~=uPTZT8J$4(=}O+^J+YbEh1J^g?*C$`|i z^J6M4cZHl}&z?P?okiCLD;{JMNP{K#q1@#ux2liwSmTH;`cmIWIk*XEyRW3&554LE ze@F9Ea`BFM@9-=-KzBsp23j;UI*ilOLpVsFJ4;GShbg&vctD1KZNcdZBJFyz?J%tO zL-V!=PA15uro#(OxvS2gfB+{A#S1{d+UbZ*T<|b|jd3fs%oPLw!Byd3TQ&&?kwBXm zzW~Q$0h>kx>{v8ngY|?jiooh51(7tHVh^M6llkihu|fg=tM$MGj2NR8QiKhEu{Z$p ztll-ya&7s%Aw|JMAqL)*%=h2xr0fPB$71^NyerqB>g&;2@D*I#1Zp0 zIX+K(HF!UrABQ(6% zZ%*7OAyG|L4xbQ0RSEy)*DUJmB={yb08--6aW)3U0o;?jU3m}p?%S7$V-9;Y8T}6A zl_ZBXdr(DU({d2l(U(GsOV7pC)MYI`>k)q1cO70_I7- z7SvYrO>dtbs2)Ia+MtH;U*XT?h3cR-LF1aMo7)1|5D#73$q;i0Tn<=pA7~{qi<}Uz zfr(ayRv`m47h-MI=7;Ty?k6&MMeDneMTw|GW<~B z*kh_tP~{i|@(9`w{GEJ~acWqg&%n6hDhfE8+0n?vD>3{y)^*gq*=90^Vvv+ofj&GB z>HwkJyobF8NM&Wdr76vy%$T9J!nzgWvSI%iK+}TCO0m*9JbN^`sPlo8z63fxQlEgu z2G~%-i&5Q|>gpz&_cxWXiIQNX$VL$&R3ITl6&%P1fQ2W&yr4iZ(4pnqxBAGjGJWSt zgN(XXK+|RBg(;O@{AkGJ8k27E|2JR3uZ#_tQ1XBA zVmH{P;d)K_fC`aYP|P-LAzAN!h+9&=S*iW-0wx2(v{9M)cM!A5;!gePJla%0N9+v1C|nThb?IKGZ@`SoUIEMyOAI{CEIb6sc(yH5p+I2Q%gH z`1qR(V=@_hqy_^dUC`f@-wu1_Bi2hXzhOP z!V=f~mX3Z$c_YrWCPbW2|E8DCBVT=acf}24T|zR2l7eK>;U3lW`ebPQUi$f& zz`M+RybZwTb)PBds`^8{_$6t4qQnj#It2LIOy#N4!Sgv*)9;OALPA1>FJ`1J)RZ8g zb$8#PTLWA0Alu#RJ|@0DJ>vLrIwoP3C+j~8)NZ=hTLQeH578CbfjKexSG{P&kURlp z5RTQBR#uZg3MtkUtyp#L#kXHd~1>t3OZ$HogDE_Mh~86EU8 zWY7=kMn|Q_R0G2^sZJaJ!*{Ny*d`?Ke$CP6U%yL`$fcSK%-a&(yYBjlk?znjs& zCj+<6Dz5Nv0(Jvb|EvrSmoAYK?rf;L z2u&FfAe=Zzfg4M;HLxDspBl=?OW~3-+-Fk}PkxRxc>x0sq# zNl39(_yWp<4^pZV^~b&V3WO*wNcU-Zf&fHp;p=c%vm=)ELjMrRWD#OxWo3l5iozly zQT46=jyk~6{VIQWY-s2yVgV2=*p)GOrKg{#lMquhxXs4O8p(Naq`UhskZE~&6*GJ> zG1;>-R=PD+3oHr9DydSk|4zL5YFZyxdk%F8$gX~Da9B>dGZXi;>=C_A&-y%~JOxPr z!!}n`r-YstTch7C#}Xz9sJ%azmAT>%cB4w+Um9gcA`{0!n4FoBF=zT1)!Irz)PVSK z7SeI(!$nemiBzfHqFl%71BiXGz|a+I=S-I8jgS>$dl{LTLowssLPkkR2@L$Dz5%sA zdJiLbEbgG?uYl|M0k*rF!6!7k0|Uc{7pgI`gxwxElld@O{n)-^UUuKUeZ=Y}q+wK9 zqU!2LK_I!h=12}63#d)dnCv=q2#Ip`^b0Xj(fE{pyQM??1 z<`_ljBo@s~0aOW3vU1>4SkMQ;KE8M?swX0o=zLMJ$E!VJ?i zRh^TIiwJWXpuG`GbkWEXfr!YQA zsRJfEfddGZjTx6xKXIa$*On$aIOab*06~bp!F6Lr;_a}TQzqJ%THFV0yN~MS#;_RG zh&K2`=)G9c@jKnV%zqEaBr**d!RaI1?-5xEBT~dO6v3tvi$73o0GDy}@}j_VBT6%9 zyqwC4KY#v#Rr3rO{(!6lxtyu2ddDy#XmLToUzi6{)fZ!sPz0b^-e|QNsQkzGkHNqN3c&Y84#zt z00};~Nk1WZhtwb5zQ_6bRU$l5cM*$j0J~OIRq>eQV%-4l6VNYiyv_Ngr9G6Cnm^kv zU%SQ*4OjLMxK1gt$Nq%S-xXj6lVLjaB2jqSUP?uU&<=X0q@2TxB(HspK`?Rj2x34`=+u-kihe+3#C9-? za0kFDpt(B(iRFLx@H4_2c3>7jQa_yBvIe#zCc`~rV;;`VSA0y-7JwE2m3je&2ZM_a z|KRn&a_Dhekw0{*7~ zczk!;J=C!8Sg|_dIq$8B<4Azuxt%lR`bk9X#HneOB$$bJcIJv}BGQaot4-LGcTkt1 z69pZf*dzrBoW7}*jm?tIZHwD%=gytGr)Xw}Z4ARovzTAv-bS}{ot55@3mZ}TeAPeo z@=iex>)pQ=={?$c&Kfl-0S34F?mB+8t+1m$*M+7NHyX)%Mxk?sz?9Fur#Q^>5?YFm z(2jp$SdK%#;S|9qR1K#4;&U2&DPFkHs0yVKd#e8Y7}frNe&)gx?RKirwN4Z~!kMuo#*7T!#tn#P+k_?kmNY>-yfVOI>({mjl~*U>$Z zra|+T>wo^*bW3RO@g;qd0h|OQc<0-BB8jesHo5>2f4O3 z%gUq=nl?rJOGfnBGO@@~gsba%H7zX{SjXcw*GFpOcB3Ubi2fWO4_O}foYWE34X~u4 z&RhY!Z4z8t7tq-8w53J@2=$`0^ktB4U+RLhfFgkS)&BnewW~466{f>ynVFF(e8GF^ z86Fmfat;xPthyTXDP-3|kQ#{$eC^sbr-724sN;Z1K_*WO%@4AuIVN5)6YF8`pJ|AA z-xQa+*xugmLFFJ8oV!V0KUm`5qe1igXi!C>?Y*O`3wVGWdbFc}2%$~YP**3-L#c|; zP*_ytjAAqq3*Crv3k;gtybFc|3`eM1fVgwr6F`J1h4g@sbViBIS%y){R&knrTq;>m zou0N)JTN&kl6<(O=tCmm{_&dv866XY72a6b?S!YxY=tVXXxrH=V$IOU0Od@i(f;aS zf=CP^7y)(>d?CzXfrmC@aSsXj2C&}Wg7Gtnio=Q--9<~36OcLF0~;Ign9OihWu+$I zAj}D=O-xLX6NtY-b{D>A0hpg4f?xqL4#8*icJcu5z`Q1s2ponz4{-}(2tb0dFeT)Y z*_XK1Bc8@X-eJEq#?_|)mkgM|0`+1Xi-?F|?pX(LCv-PSRduyCkT2vvIA?vv$IXKq zCJKGdd-nLy=vVH-_J`QsgkR|W>eUmx>`T*9{y5b*hmVkr0(s2tpf@H!L8Qti9)2O! z&NuZx7aL@AQ8efG#Enf`aOt;y@*&VJD4^PX7IhbG{}JE5GkQ)!_>0wC&xVEs)*K{s zUFK-~HN$nmJ_jq11bn(g%gH+^s-nZVu7dgo6B0FT?TBca=z1J;)FMg^vQUDNk z@3?6DE%#)TLdi^hIbW~V<4;e=*M@!keBUyjxuPg*V491>u;8#Zv8Jx0Q-)V)1tG?3 zZj<-oB-urr_@6**#bv$q|S*Ng{iLnVRS<*Z#DvRhH!`=PkaV)dAbI%?j zylik`kHFs^J>yUq8h9$Ga;K)Ih|(ECKUO4qsal+%sCr)11Ml9wgIPX{^%(;$ETbY$ zW>v6&1b}){Vn$4;)BuQ-a;Xa15LTNpI-@tBPa^Um}DoN)TgHbgaXr5S_RmL zY$6T>f~YBxRB~QmBpq3b9*|Ie!;ti1(bfT6#*y9{F5^wWzp&F~=sX0*A+m_nCW=!-<9p$))zbQ0KtETSz^; z+7=0WIu1#n{LV55u5;(EL^UuqF*)VlOgKG^X5lkLz7zLpN6PUPFiol7(gg`5-hq&n z)s^_^n6!=pTvLEl#P}68y9myxkXeQT>L<)oU5QR#ruN;*=q~-b>wQL#S% zl3V*xWQWd247l|MRgrX=iXJocznW-~uSK#-o%1M7?07SaUP8VdSQu1bMY3Vkgo&Xu zF>yzW6epacUg(klieI2^Q=BR&m^)G0_PvRWWSpIyB}=2AB<(_vi}QUMOWrzc!36<{ z=OL64gO%*iKcm`^)oktlM6A5iDDfO^%aF=QMRkI?JD85M9;*KFfG<2LsJ7%WcC5L4*Z3Pt+Y5CQ4)mGM`-P@!pv_E_=IR0ihgKkbwv# zF}y_JlSMdC;kUk*s?J@VIg3a;2)|MSzQiJA5+aok4V404Bq~ePspy!d(7`in;$VmX zdb9+BLb$i!v!cbj3={M&wbe;4hn)uwTt{yScaO;3VR7TrR#Ax4P+xyk(7O3giktg; zC?O{-ycK~c8pj)_1X?Zo)=ne_WIZfZEW9wTzB1UB!H$wfuQgRHV;1gC#EKuys5<;M zVt*inTo@Y#@L5To%ricTn|BSQ`Y1c;SbmPrh@K3mPKBYok*VeJ9Ig!U3My=A@SM_- z{o$)ymSNAj-)|Nx)pG>6Os1|cbI_19d3J1H9OPCY-%b)T{pbjp-OoeHDwYLZIVie4 zh{D@`*^;m_1CWP?NW{k$IZzYXj;D!ucrffDm^8rx!H~}lwYjT30Vs)i`WKWW#4PLo zA@057xo-PDU`=O5+J)k5Dh~4W&-r?t)%iQc@Av&4-{bgv-s^+qVl~2m;66CAYAmXCpzlCE2OA81 z6^$+KvgaK=u%p%!12LN@=7COG^v0xk2m_j+K(fFAHnz=3VdqJ*MNvoLUzLyhBpKTT z&1qT{t}lgde_+;71GV|^bT@r=R%zhwCKk|IVQ3w(uRRS2a0Mic_CR)c5JQL{4H>Es z7choSpTs+Z!J@!Zy%(rEal;{64N_(y@WdFTd!ptNMUSKIdI_KiL7^C#n=?TNhynr$1)(DW{gn~!Wc9PM6SUZ%a(qDOJ)3IEHkt4{#^(eqn7*7DrQ0q2b~`}PR9Yq z4K4gjAl9pmBV%KE7#O@1arQmanhR`$2stdVEYYOVX3+hz0)J#$;c}C)XBd(arKbaf|s7NK(B0IA)7n= z+|b=)+^2NfDR$2?)&a&CWV zslWobIpI?rl4n*Lv>B)ln7O&-@oq@mS zIC(USilRhVbH9G2ijmPX-EbRc5=`MywNA&iJggSsLQku8mz%^iCE{BK#qNE@f|t6N zQ}yNRSE=Q>)7W!2Z`_!JxoK3o3BbFtjhA6RM~e)^G8ntJbn@Ng`1qXSVi#mi1xZAJ zU3?&B1@|O5A;Denk-FLX8TU z80Uef={RJ6==?rGL%96IKhgtozSotitXFCLpPulHneUx z9pi67!WC>;q&>N}1_xXR4%NcP?AQX(DFC5V#jOX2dId05&2qO1vSX6qe7GyMkbz!k zMx^&2hw+zq6l!FKM@I<|i~C4hH2}N|xL)*lu%TX84}!Z?H8t}9E}6x8*B?1BRAB*W za@emPYKnGC<>{>sy(u~!;U0Xcdt4Lh+;%8dUjdOVEwj?;?9J=f&!7tOxO3|_j!MaW z_Uu_=qy+(~om6KeZSYKS>T^+%WI3rQ$eDnP=E1+i$}txCZYUA7;A900f|G-odMtV( z!Xk?e`(_b8bR6FS7wJv?w<}ZfbUaxzkFth(XJ(}#LPtn|2+XHQ2z@h583^#wKjAjI zO1bpgF}DJ4;m8K(@-3q2AHXvfofnxE8;?lJ&;cCV&rvng@s0QJ2d4B?gt-k<9$o5@ z2be@Fb@9v|I1u^xblh~QAKhWug~+M{?8iCy+l-;XLXg5IbaP~y5e~GBp?vy9xiGqy zcmQYEk32%Ofno@QE=2XX2aFHq9aN)OtBfeqo*YOY#7*0W2AJP=I;c$h;iE@Cv3%N5 z&=NKu_OCIl~#k?gk;jq zjsz~eMkPcBGlhRQ=z%<4M4qInK|1H^Zv+&n6ucqJD45H_?! zpo`C-%OWi+BHrBtb#^+aCOPAG3G z+q2X}{0+ULJ+#ji2imTe)4bHIYb!mOY9pLE&Ioa*Jkv>;ZZI`K)|R1p$9WA!xdvH4 zRkWKof7&e0dsjY2CUy+(n=JRjVHsi}!6_fDm{uOdNeG9m$(D8ad&a=$a6Vya(n=+l zBi?9H=fXdYm8PDJOModV2MLF>UmuDALN*g90DCl&hQ7yt`c=~jHvA_ZG3<;${zTDj zzF^mR-Z-0IQi?RAA+#g_4lS&$*(`_Km=zdm*t2Ft&01?FrlMXC&a6^JF^Up=6NKd; z;qldE2)c+BfEvty2?pj2RSDQZAsd?vv0Zpb_wU)W9O6Te{btk2{}okD9kb;>jJD#l zm+>NG){^z_(undYTkS}h!)H`Y*w{jPli$621qBp|4V@a+#1chmz~Qy#ufLk{fua>s zTrqyW89!Z6@SYd}a}J-Hkczz)z^=0idJGEQh1n52W+SMFX&X8f>cSQG!#rMsntDLH zh6OszU>;-kt97xG-i`x{`F!3_Uz$GLrf-^}IoIEJNIpqlONy^!o1?zHZ(tzGw2gqb z#%{%ajd}cJE%iXbTnnxr&@j0lCnoGnQ6$^k{|M-E7M}j;Z#R#=P6!7#Ot)15%ife}ryBiS4%CdpAT+j=;KVU8 z`u>I0<5Rr<4Y+N|ky4bJi9MZiY6-bzmwI{q!$*rs`;~VqOpckfWg3O_euo?Zq;ETP zvb@8e_mU}P!0nfz`}++20cHTNWR~eb!SfsmB%5~bbjGG8Fh2>!ud5AUF$%1RAxymJ z-kH{p03IYkz-WIZmJbyj3H`YIf^`}4eUNe?i1zC4<6SlZ7AU`I!x4i zr}L=&^=lJ@6B84!&MR*g+;I#t_2Hu=%!pkL@jPa5h`3+a9ftHq&zkdVYfagzi)REDB88rYX2vjdAw|TRBIzwu@io?WMyO@AI zn)#~O`B)8IT@f&Sz4Zc&jDbAfG5sT!xVzYv{{YU$O+E7++X{;g%PjNrc@e}pr{1e2 z$??WMdVMl$l)~q|AfpgA6^pwLU=f~8#As~}DqJ`l_WJb(hzr27SabzDEe<*ai(mC6 zOd=vx$!a<<-9jfx9<8qjy#?mAnz=@#%~|Z&eV^5F<>FY?-2-fZW(>QIRbEu*)9dm# zZ!GhxMrQ)4u`!Lcc$)0!o*sEdzn27^K!-q-aZmJ zzc~rgI&{8Jv^oNg#gTR3was?z%DK>?n*v#^e9=~oH2;WMz|ZDQWyxSKeo~TA`=&-7WDA_k7aA57jCw}DN&Mf`3Z+~0 zBF94q1>P25prwSp!W3%kRYIX8IqY8-$Kvhhwd88O+ISc;(!|6qE6LdMlV7!01+0QzF_eB~M^}oIA$taPF7q90&+*mjQ z`JfAc<#^?u*wXg{TnNr4n|fG))ngzpP41ikg1#i`RP@AT)hY+pHPK&*PiVxJY#Pf4 zdj=KliE~;EH}F6q5!hlNt+OX4J1HV=C}V!Q_a!ud2Bu}IGs$uL=2FzHD&kbWs212a zWjgV_&;DJqW{CA?|_7yZzPgy2?>$*AbZjl~SQ;F)%rowWg;=mB)?A_d8z+_o(ricybuXySQE z0y&D6vFfJU${GvkG<^UCGz=P1i(Gngh^3}Wl;v1PPF@1#;B~hFLm1otSw4$*^RM>W zVdk34Qs!CJ5KKRXr3Xnxo{8AJWzuf{OYEOpQW7g-1w9?0N<dMLFwS?na0Xa$oE|YXK-BJm zaSL2HB1^~BDWk1uxFYq`NnYR4CoQyOtYCgz#51G4U9NLDRsedFLlv;V+TW;uz{DvCFtJf306nRGD}LuFA-) z{s7WX5PiSdFebq>0z5Wo%#dq+frAOEqVofwoc@wJGGiVY~uxV*+;N zNS;8mOGM%T79PMX4JTLH88b7qwb>v+A-?j0O1`pmVjdT3>{Zprzq zX^j)~-lA&#l%>BK>^meZM&3WSuZWc8w(cvP5V}20TRk@-$7w&my4PE*ImJD@tG`a= z*`G7>&%!lUIb7qFF#Ss?VEQIT6OUi+pE-!%c*lMS$}l!w>-_sI)KrjD@Xjp!NFPZ( z7&UjdHSoHp^toqwXV|wG35V8u8@%yRj#1pe;SqbIR!zC+U9_FqrA*(3_bKnW5(86% zDvPB=j$ACx-SlbI)|F?@amsDoDCgwU#5lXd|HXBcE!l%LPh^K*j=UVmRI^=hpan{zr|Bvj7&QSK*=IC>v42+BvxiZRwB=ZEbZ{{ z3x*2-WnlIpVQRX%%!HW14B9);bdWr}{u3EjxI;|PphC7^b@C0iv(}|te%w-!#R#YK z=E)0}i1?<>&eI=kG3MlkP=nZgaK0~aB(sXKaGCB)9tcVMK4k#SscayBAt2s%& zDTD`!`aq*0(*FXo(yYao$BY?C!QoZ9kdy2{-XsdR{V4B{qY3p(^k(s@ruWG;LXb6) z`IzYQAtqUrK_8KmXf=y;OV{Ux!UV@)rD{f+ocF~n3qeD<(s}hSB2SnESd^@f3|v0h zkm-0`_r_hJke?rF^o+V21E&t(K4G6HR@t+5&-j9{f=Wzm>YRMJ_Q{OSy}XqCj~9kx zCNpZAu`i^@HM^UzQVqsqFD z+75))W;~?`xn6%7|2Z@=QgN)+4>~Wj`Nb%Kb$3?aK7hn*2HIL);%a7O1e03Und6)( z0O7Q?R*8dzfi{4hPBf-)6PTc(+PoHM-$$@ z3(*s2Y!R0|@ba)!`@WYmv~hP^mwhh1w3ns#1dXfzx;ycY4)LqsaNj~7JVJ5Cvo|e5 zu_)_fb6ElfD@TIrhhJe-`L_k0uUsUeg)nB?TR7ro~Lx z7j(wIE;#S9pFj08j#k<04L1o&!rIEzT$rxOPGqcCKy=G;!uw(u?SQ4GxLjDzbPIgnt%3N0DpZbebaqs&CKo2UY@kwNMp zDJ7K)w#an+1qU9}Re(ex9K88fOiBIb8tF&c6zkTl1LYfzf6R8Seeg>DmJ!@qAo8ez zm;fArwN48WR}>&r)FB^rTAwoU#LUD5`bQ5S%ewtFTd8RM^?l~qB&!Lwr6)jN3cAYlI|Gvy+6?6f{xRF zaL`X?C`nK-;F@?}>;(#EXNCC?!iB-w))wrqdc{nOyR$;->D zRmXSb2q4NjqP9R0L(FL?aVidpi)Uwkje&7+@7@*g_UiWIsC@^=CCQK{a3Q!ui{`tO zrq6v5`0yJ0&;9%N(ZO8n?NbTAJka>w?2_T8x5tjlU%S?&=yqztBPK3CQM&FVpd?`A zmE6Ro022d=F2EiPXM?z3qAAKVD34H0>CfRwfhcHnz*w=S#t~8(d}cESS=)NiR+50i zS(nNG-jerzWQND+Yqnf}{yQ}vgnup+sA;N;Tgn{|-JjCb^nmLY>j}0)uEth&(>q&Cr8> z)ni3Jq&y696#E;OhL2ugqEE8Jb&BHue#h6@AdSnO?`R{~Zz zOe`_Xr?<;fZ?is=R#R!u;R@r=@@>H-D16QZK_QsusOIKq5iE zEiN#>!buq^V+hWP0IY;`!QhYH7ZBN!x|?K=Kp}LxeYnj0z*@b>@hDVamsoxUD!|sU za8dQzNd2AgNI_*Q(z#PoQu0P=02$W=^g#<)Y3&KTK}fPfl&Vt^OaTw#FG%IpG^F^H zWSVM(&VrUC1(QR&d4&bu(GCt>kDTGd+wD6m%X;FPn1%8WSd7}gsMxr$f2>@GH?u&e zsE;R2t?7r0yYTJhL9thihrQ!0f@=QCOmOYT|ERh8#YcT%)oE#Ekx|3+US6-6yZaW6 z>zq9$G{P7^1K>sXVmxed&0Sp=;MtVt$v7|(Tp`Fw9Z~_H>jXXz# z{JytrP|nZx8``(trDT9mCrzvqR*vVW0H9dSyR&Qs9O?pG22#g1l7w9N-b@W%A>eoH zfUckQED``CjA=V-3xS1;7#Z~*(f`xz#(cSOJ@@08gvlE@y|0x@JKa^pp5EpQG~AX^ zf>&83*pD<^*U-oX(lm(-2SA*ZpwVh6jm9pDh^PTPNli~D;!UVlNl9BW!3ymlOxeWN ziL*N&e-C*>vDIFXz#@9&js3zr(f6ZEJOLZF?ih4|p!hyDHK}M#DWApBPE<(z{0um{ z$ut^La5J2YJ(N$xwwQc})#Hfj8IMg&lK5G=3K{)Yq_M3hIhRSEso=oL+Jf{9Gm4a~ z>py>f2;D~3!n`%Xtf3G&en^R|EEt(zS5syVIu3#IF#;-wg{l8m?6wu}qfXsM;aa_K z77{G}AHO7RPu^uP*c5%`;0~@ij_0j=e@y>WyjNZNMo_x&b3tH5ovUDQ^G_x(<7308 z?g^~_d(vIY=vKc{SbROpmg8El*fS}q4ElkS2tJ$&;82ZBHLJ z)VdmG1qcd{_A{hC`}oj;lCoM}07K*eTg6&VMG(jDUkogtWsGjk12e4ScioOoYS zg<;QxAWycz9eNq2VmM@Y z3e3YCooRwzy}AeOAW><66|-!4!~|*fx0(Bi{L)-cQAd2lO@c*Z1elDDcRmk>f#3;j zBN=uesagP&V}#DTS{D-&w}nSc0nPYV6{k8ia}!8-&|*<0YJX;U=t{^-EO^Ep%PZ)a zGYAPCrqUH1(>JE`oz0ZQpfz6NoI;3qg*+=E1C)V^A_-wAf}6Nl9co zXfKj3vAE%pMmv8(N8*!K{MivVCWOMHgoN3;PAE@qJ2}RVbAc^O&NZg{B{WvLR z#hVWEQkh}pGxF8I1FVoN*G+~A**M&L-n2;fUDV{94R<}g3ZRPM_3tJiVR zs4J!|csoOdSLiPBW-Ezp{7)D(zrVYvT}ij;qnL?-@O{o@ig{ns=}kV$^e8F6A*r+o zq%Tt)P6lK0^DXsGZ#OnH4a5cU z{`#;2c;{PC>hgxJV?H5p-geZU&%-+tfE-o?gvnAQ_xSH4)wb^}FXA1pBvs#pEwtP{x1$hdwk*t3Y9X);T z*WAj2j~`!^^mFs@5Ia`N5vcG%;*4~KxK*E;TIbnBzhinJ=*^DeuU~t9C7Oe!2l*G zHtRaaL$jIp_AKf*)kG+rVvBbaXI}bUYIOYkdDqE_67?js&%|p3>06r3)Cr)hRP>zA zAS-W~m|$y!mt+kW7nie(i^8eV1JX~uC{H_4@~jAAl~AoIzFXOJOhQ@eaYTDRlkhGp zGaV_l81C_aDo9Zp*G-5?i<@lQvIX$I3)Buc0&=R8r` za;(2E+7uIV*799CTIC5>2)eEi9C{bEmMuV$=pw)TJ(?XzbT*;QA8hn2a#2Y&S_Taf zU8&w7NY$7yC=#g)cY&P~s~sW5anooq`NqB2)E0o^3mA%_OC}NLRuj^?>ruy)AMnB~ zn8>3;iV9QyP_kjD(TN$%09z`=B+%b!myg2`H^v~#?1=)o=V(QW^Qb=^ox1w;7Qw_L zzgvJSsK>BkiA(_#?|`_&^Hnq8F3-W1Dx#sW52%hh`kk1qw-O}Qk9{a9zT0(Fpdu~q zF!%9@Ew5ERX_bYD){;Db1aGv#0Lcq{TeKR}S55FZ890>69^Aq^)#gq@nL1EU?BnC} z2Ll&?EGMZbGV~9G34I2o!CA!o4XKj3j9)n<1u@u5VaK*5L8?&gsIf67)zNHK$qKaU#; z1{p~!$00g}`LUrkIBBi>?KfX1t!SGjZ9iu1_ni4?ahc)kUcDP!Is(wuf*(L@(Wtwo z8KDSIA^N*DH*JRX55>nR)C@ndm)tDhK{bUBSb}XE4CWj32@n|~HnLK8Y}4k=7~BwT zK7(o+q+X*%E456lVnl+W+1c~L!U;(8Av`Dq!X(ZDUN|kJc#r@Kc+tcZQql@A4vrc6 zN~&Z@g()b2{AP7-E!|;qo6C|9s+FA6v7*^gUu#V6{KSLrUGAoX5$Syc?e|aC^I6wK z?Hakg%`DO!MHuYPL_Uomo3Wk{&d~jNX0ELU&+Ye9nkZa4!gGz|(hQ0cL^3~vjS!oa zo(q01Qp(KUjU z6fOlu>d-3(yM82uHMX%x9f*TE#!mKtfWpTH{&`S_WP-)NO>2h{<;x=L^J-PnC{=TZ zX7mUV3iIgqCvFH?%O^VxCw~-{)qdn*jRkdvx_3aP$~v?Ruj>Bx2XhMuUz|?I%+$ z#>a;ywNKJVD9DD~J!`$2rFT4jmiO9;v|+75ul&4CiOa_4ehz1*q`vE~V&RDG_kwt{ z=H2cVr~DM1uE7cq)M)ncnwR!2SQM*5O-u!<69fodG?Bi;cJ9M#`7Y;pC3_YscIX;3 zl&vl-9tS5%zOjEV(NvMCwQzAXX$)1riO3&8yo4uu@{YgaOI-UG z?u`~X8fFR3Q_6PQ@L|gQ@xp*kix79vA1Q?&1v@kalo&n$&=A@m!nbYLF6z^;S&)7(8#7^)fPciGWbaqs- zLZII8f}~3U?1&gv%5MLG1LWlS7#Z3lEJu&++y8T7n{(H^dU@?K@@TxW%z&x%;1Ui2RIpY6n(PatZ9VJC3WJ(|ho}qwP(52yPQlFZ*IK60>N4$SE16}xc! zPrUp)%S-M~sOYI$IiK#GG}ri&s!zshZd0u*Tf13O-M@8f>@@a}H)y5ituLC#H=p zCb%zLTHe0Gv%r;cC5xd^pG0Psusy4x`OE}$T#m`{Ep^g=v;c;8Cs<$bQO&b&t*w(4 z5D&Q<&2GeMH1*#4?ubvmTUZtyeRe3vdOuqYbqAuN6L00e4cW9sbYb>t24pAs2=JIuQ1Dre1_3XqoyH#s6wnx7 zecr^a0GTq#4)Mv&J?DR%!|C~}<<2gm$2n5Iar5668LJ8gRu8~ZO{XDP-bdT&#G*Pt zJ7nN;)}bwHi43LqS4?>L-vRHa^AM7Rpp8KaE`M}#`rG*3(KyiFBvZ>KX^$8 zAL*G17#CJqh@0zH=T}bkuVI*{Je+U$nvZMx3NOB=@b~S)%O)o-sgR##Gi`Ihza9gD*@m(@HvQ4&nYtX8Qp6j7*|}eH3{TIz@2ZMI>m9LYEPzBoldTj*E#&_E#5W zcEixOUxorXFD{y$%1+$T$8#@j)eB$Q1Ki6hdkSA4k}7ev&D|Qnu&RRcOo{ZT{-ne= z5A6ra3-`ZVI2k3U!2j;nquwvdzuG7)-U-xal-a0HhX8&Kkzyvpl3{|rS)MHJ&x6sr znPSViFxC|ne*Z%HLZ|&aCH^6?FKBv@g&KyX^hHfOa!Ayl#}inFLp%AbtZ#kZXe}NX z4>`}oTutEOe;FUh@6sLR#yVaR_Jf`=mg|rrBM{*eAWGn1_5>-|b5s902HBH>34?H- z2{YutvV=ZtndhZ(q#&_kUM#WUpFT|iljYbz_8Wfqyo(^(y@xIZ5mtYZN1is20_r9p z5|?LYP!J-6NYZ7l7#0+m$T#iS5pH6ysGtBrGx>{!eoJf1=t`{{pT+oQfcl3#r~|rjIGbYoG2idd@uN^J)!#`-!584R_A*e3Vk*LeXiS>dtc5??Wx{% zWJrno?z0L?oz1*7q61EYg#~ZpQv{W++C~_dJ@D0&HEb>qVcQ!<-PVz`$MwEctWRCu zr;SaXZe`Z@Pf_6YT=RBy=K#x@u`80Z(&RKZHjW?f{(j09*PWOKNB~54`F>DCk+#EW zD7E3Ij7Q!b!lp1I489!E^u*`rve0<_OzR8)xZwi}g>KR3WAK6opLGHuDTbkY*kpOc z^CP90{V8dgnVA#08j{A41AYEeSRCAi54%<6tsBhg1-r$y zz@BtMXF4!D=Uw+xf90u}NG;V4lMMk>_A~~yngQ8$>)C8a7c{D_?@xI1u&Hs`n-YiO z-m!_R!I6rbJnXCOO2RK68>A_Vk*sJ6i|;7$zXalI zn%O}s-B=>}g7vPmGZpB%3sCVQi%Pqka;LAZ%^%{^61+_IJhRI3r6XXjl8%F3`~j3jP)y zop2%mgTPuh!red;lPDfO95A)Rrx+(|wAsV|2Q%DV&sbCKg}-#BRplDil=pL`UaO`I}j$@Xhuodk{o;T9q(Kz@>pFl>XKuu*{2z}E0ij90p8m6UA? z;X5#eP7uy7=&4L1=CFts{vc)MWy+3W?(JDl4s)Tql@;b@CK(uYc0fU zgyy*iddKo@Ge7WhK=~7B$kQQZM$Nq(siuRavs+L3WUDdhnyuP6vk(#!CnH;LcT|Uc zXXD7mfEA2+_ER#m%2fNt$|HD`<;t$STS>$&WsVr?3Xlg={ScniweK^7{j4z}8Xtlt zM6ZP4@m+8^!mnKQcws&IQMW^cD8LlD2O3pL{EX0I_Lg3q>brb%reWJ|mZuEn%fc_C?<*P{dgIKb_jgjf^zk^EfY)}&Wdtw0$ zSf3RPGxN|#y*O#C=8#{%1E_de$=U#X0svZPLdq(6Tm_D)1O3n_2vlqyQ0>z#U{+5TC`*j}K0_JO3zP zuqIjQx4@y3Y#=Dlsx@lb4d~r~H-bl7-8x3w>QuTzAX9Uj%x%;D?z6MqMvl&Y;twKU z-Zv2#`5v9}hq@Q%m7L;pS>oC&o6(;o^zHB|dnc{NsksYkFhwK_> zjQwL{w0A5v(SnKB!L*Q>U}zXYeZOWas>#VYGxM{2iQ4lI+j71l!Q3EDw6E4YqFy}w zm(??|U0M@j)Q#zh{z=?x_dd8isA6`$f~9}1Kr=Tg`da*QsQy97IAS5fN)|o#sFfMT z?VSOa;E<3Iu{5EOGVTs=UpG4X_a_eCUg0QRB9duD=en>mJaWis8=K{((}%)+-OHW!?8j30;FNrar^c9k3JwW{a*~zcc(Oi3=y$UOYU$(51ns36ZN|-or7r(FHeV zlyM*7z{=-Y6p*yc-~OL^1FSTX4$sQE0f-MtIaXTD{V#=pVm~iZMVx8bUI^Ipqwzcd z?j8CbE2gb_Jt2=JEN3DV;aMRgC541O71ZxAZ%kLXc8!RMu^Gp-UOBj>YG4go=~8G9 z&=7**EYGqf<7!qALkI{D(=H?H>}x3D@V37c7spiF{ipsxdd3j$j{YP+#E4R$QlL&< z?${vOdNr!biZLVRFjO9o-1r}YT<4xYd80$dw7Z(-j>0O-7rt_;2F5d)+UhkvYo)pO zpRbxwe{VKI*RhTB#fwI}J$3Ihd1veDm*z~?Q-V8#!N5x3ip*52yW^{FrP3qc1}v?P zW)822Px22H+x6h;CxMqWLNhtp9Xp-RpfV)d3PUxtY_25S6k8X7)9kler3}an2$3wn z8DqFSyr<(=NsX4Y#13Y|V_VlgD2a!e-v9nvv^`>q(lazQp$lHBH>vhANgZ1;S0@gp zlQwAK%H$;di=iX4iS+eWA1Ahd$YSfOlbcexGkdDBvF6=aO4Wv|*&hUyYZgrQ#%Z>y z{GyxX+F;PZWv4hnJRJy?1?^}3JPv!B0wxenL_sYKM_ zmotbkVQ}IQpcL$d-hhS4^jH$M%K(7e%Dy8u!i~ARJH@*Nh&puK&3%1TQU|WXmYCFT zK5$4u#YHD7*%;~2GS*M65f%jbxJlQy-LPl9;4%ytw<9Gs%*4YZhD{W2Kmy&k@y6T@ z3U)2X@MAaI%gc}1J^>?zM4{M`)SgxYI&m2puiFQ#&zg={{3UI9sQaSU!&9~tad+J$ zIMj0r^8<$8sfk=N$lkuruuWCtrr@cf@v7Q)@8ZJ}UvH4M^cnviv;0$>c^P|~pYkH` zhpKGP?%ijsE1m&@!v3Bl?hJ?Ta}ST#N3$`V5C`fg(pRd~vKZU6nluWUK2COgGCBd1 zMW+4s%XL**dxdq3xrGH^o!qqP{M9pS?u<{&M*Eo!G?kReSPzt*3OTuONiDMldPrg% zM#jn;`5)*C4jqbcjzLuk@Lf&q)hSMR@j#YDHJpC`{ymu|2c((m^v#>m<2ribiraKh znB1&S-FOEAWsHu>0UVQL20czsJXRi$G{8)2!+I}bX5$XoTA?s16 zyL(kTJ6-7AcNb4h7~Y)03fs-f`V>?Zi6`qfejs>PF!-C@ms|6*mN3aj`NyGW1JK@_ z<3Qb$eF&Y%abV~U?Zvipi|BW5rXlneHf9H8{8pA6IoJjz##dTpx3HTFf5UPHO*k=J z651S8&e@Q%ZwXrBj#RWCF}Wp?W>;rtr+Z+a8)_T$T(#C;(amPTG-)-VBFQKXac~mb zlYf*M6bK?0wx)r&;%fmTmyKM*qJjq5%;%+~JW9B+zP=(J+pM=58KNZ^N}i~R6b1Yp zCxG~j;q?r4X(RE&XalC2aD-;me`{Me0QF{oCZYG4nj9mtMVob{I!rp{J& zaTfj6wZIy-cm7gOXoZE9TZPvTvs-yt3G_^EeJf8Z1-G=Zs0^Ks+i1l&nbh0%SV?%l zmh0Sh3M|~R?Ecu1Yv(>KVY}K6C^_LyYyr8~;y}Ys6nB^fu4HXOTu0bdN$|4e!ra7< zM4inf-vNs!W~XIcM^AfhdxFmFWP9ATIyu%}mx3Ns{53ISQcQA(j?CSbVbP{MVQTAit zvs{=fvR|P79qp-lLM9brv{zZy@LKP#D=-E8)Ke2s?}yR8lN+>8LK8PL|q zrThW{%~c{od&C?bj z8(Lt8KySnx{Z{;ev$yUg_Dik8b@C%yqpB!qTD_TK#@v-aWi!ORH;68Vd(3*iU`rq%}ae*2!R zOJ9I(g=D85JX+NdhwAVS5=W7#9%~Y(uAy!}oo&Ht`XOlps>- zpFeY8yrehUN`?G@hmDjagUP?u9(U;mv)@&;UgR}@oqs!HwF}c&`JtDJ*4OKPj?Wjb zeQ?jXLS-*E|MM8elKt9qH<_hEoo!~hGy4y2S<7IjM%%e3d+%f`#XSMb)l9Ne>))jv z`acMv?4OH-Q13)=L5BZ%|DFa^JV>R&cHl-1PakMj0S-z@Nx{-a={)%IjV$F7Lu4mw zns7gqX3n3%LDAAok0MQ{ru%i`r_Ym(*4tQ6uY!Qygn8izRTGngr5QXP?3Uj^!&{|b zkS5%HP&UNK*GZ$PyB8A^Qxd=}jd-C8qN1UI49D#FNTSlWC|MTdV<8nUd}0dh@M`;8 z51Q4R`&PRu^_bf|h&&O|)3W-8)iD({s-3m#1Z=3AWCC*=dUT&Gh?zKk3Ji3Vd!iRB z%UC=&VK^{8GCT}tG0<@aYWZ>jj21vqhoavR!JbO*D+gEYfVmV*+7!UNvDi%;V1>t{ zL*hT_RrInKmRKHHeD);SX@^Tg@mAS&+7Y}C@*%Oeql3RRF-u#22graUA=1`qd5VC^ z!cYLm;lnUNFDF!HW~NsbKZxIJh`C3A2uUzPtqzA$sEIN>PB4PidfL;49I8EG8uR9| z&qu68;zW|QMiEb1OeN+iw@8^xNoG&@_bMZ7(O#0ULoyc|(iRG=?VY?9Paz}F^HyfS z5CAC0HOgN*mY#I#U?hWgp{uaL_T}<8;n-T7%4)h|r!LjW_sWD99!j;_s;5ACxw!pQ zUdI-C+f9}k@<11~KjFxyvOCFs;zYUMxy{8zpRb+K;+m;mACiNQVKG0|IR!u~s`ysQZPyz`=U_9hE+e(7_#JDgoPPnf~X5_k}Y#&L`p z4zPP~$MRvZ77PtVEbX+4Dq+Bpe?+-qtVYRAQClk8j@zcD z7=saiu_e6QBoo7UKv|G_cdh) zLfP5BsTic!ZX~W1Fr;T%L8Gk_;-39&wv{ASg8zy#yhg3o&=_1|<|2BzyZKuZZOBIR z=d_E8&2_rnv>%Eu-k_*alDo)##;`B1`>xn!%j-Awoc8{%1_dc5I;fhhA+z*tjCo8! zvZ`?941U#HrLwz|i(x6tD{*HfkZKyc}D{c0d5akY0f~H@IR? zT!+}OiFXS+tk%}n3Xf0!niNNV$W_We{R%@6yk+sobim%5xJDVKDGCOwuHDYg z&aB6e2SWylM9k1KTUS~(N9<&XFBj9*)eWiJ{IAceEe803#8go8+avn{b7f0c+h==& zUraD);K3jslIgDSx)h!KTbrrj1A1}Ey@5ca(de4GLY7N1eErplgCDlY>a_JF(iVTZ z0ri~dHbZ@1U0(aSUKPY$YrAgf?fX8@av|FWMWBlw?d)U~<-Mi9e#Mr5lju;u zYk65z)NFi>0W*Xd9Rwi;hpGtxOtcZ>nt!j#bCNum@`6*2nx75DrSfF(7Xq)_surm+nA*Ed+|btxCeO)@Z6kkm$ZlE z1`6Eesu~*ivCC&K%%1{$`}px%qGM?fjdNE7`kC zPl;8sL0}x(0=GVi#gbQ(i zpFgWazW^C}|C93xs~$nKNTxXe%pgvG5MX4$H^u*ZHtOLL85k}3hmm5pnA4>bM$il( zcar6EN1T7^KIT|<=bFLd)z{aD__pW@qQ^K0>fqZv!`7|mfh@t3LIora?jr>SPGnS) zVw_m(&{vfvl=2q=QPj;48#Cr|DX6c1iESEl25z!8wxzb7n~Tn#?K8CgJ{LM5(eOiO z=%zfM%kaBGjlr>(Te9YD*6a>ErUpUtlwBS3W5b4T)lFL`Ppr{0%;tPo!SZe{#IkRG zSo)s8)H-jLSzT7ns{+IQ(YXF)tAEti4x^eHfzD$H%om9RV>RixjKdhK>H%=f$fs$cb~7L=FK1mm|+ zQ9Zo)y6j>z9>~qKv=;$%;hRJrKR5y)F?_*i&55aji2Wo}y6R5|;9(I06RL$AaXVL{ES zt$i<6BN-|YEEAP9Cjid{8Jh8r_b=Jt|36RME6(2=6D#&q}okJjwppIN1+4(grD zRd1?2E`9N1U!G7G@98V;RtC&9@8Vc3Mvwdf)*a#>O+Q=o#qIXZ1@wX*sRau2PtXG= z{Zs3lqc{0aT)AP~Kgvh$4-~u#t;7ymT9On-uw>-%ll*gp^dRM-CH_p!aDo-3)P3Ls!odko!y>`9?G!07PY>W&t z9z-BrA)C&Xg`80 zBm0|}+Weo0kpeba5)bqc*>hjPF~bl`@|0>{Yud(zAxshxr!|kzTgJ%q;d#l(%p`e%;8!5 zn*JkovU){;Qp=wUO0{d&s;8{g4U}f-5&EB%d}?noE&rpql3ZQ)4?e3N0O9lE{=~$g z5QuaE4agEQh6@jScuF6L#0>!bYR1!qBM<%nZSS8)4bg*m9)uI zVXk(XZ|gsQU+FeaW5Uyt{aTsFC2ZoPzK;@`7NH|M*`L5SBvw0S!#~x&Qm1BQ z3Z^2wx^D0Jk=Lh)xyO8<4OJ^ai!inc{*v6L`SFzZdHMv(VI?_>pe1nPR~~v4dF3D( zd-OlCrN_x7sPs`nU>=q`(lCi`Av)%;catq3q`%gz3C5BGxO4)6ajR7v79mm;Pv}-= zu`NZESb~hd@8raWV;rI1v3$x+y*mHP-2~fi#xmKQw|j+MVGg=W*IQ5BO2SW|90J5z zr|RD~YO$%bD#I- zlwA)5b3Tb5X;o}9UunPd^e?8;=C;~&G4+i}lDRJ*m5(y8rY5zLGkIWLd>K=BW5uP; z@4w-rpZOU|{g;k*wzVPeK?qVABt8MJT!#@L6j)9u;KOgbLf? zEdx^W5{lNL0;yXVqbAZ_yb9UEIeK&pd=F2}2*ck3h}#6=h=r>R7Gdv8Zqr}^>z8Z=xL7}QMSl}jM+<`^aa4!x(U#fx1%H^4XXdt7?VNx^9}NPxh%R& zJ8?!2c78%ULoy#@?!y2tKgzB<+1c48C)hcjEutTbMOzBw6+JKD#+!b~c{xS~ncuGH(!YP-0n2NQ7pU~goP#v-v9T%wcGdk()*mb1C#+Y97tuzTDNY6 zRl2ab{M&zg?ZDCh)gw~C;AIR72#2;=D%#kYE@m`T9Ie}3XDg1k9{A;KS@bXe;69Pd?1r+ z)AKdKD>sQ8T)t`L%5#Sv|FvhsGUM~P=g&R-JM~yp`utn9>+vh19^Sii@6M`aa=ISx zmp|X|_gzzoi+=muH3|paK7V-O^X2uG+=&rh%L@bC6B&bPCLuoD6EFAP*A_dmIHI`z zKEW4ZLMBvHR25n;7k76_e=8Kqnd^W2o)f(E$x`Jt7NZSQA6Z&iAsy5*v*J&~uy|LE zT|P4q{O$L3Ab*;y@I^C#yhrWPDb#%Emg;&+x$Z%@3Y1&Re2zYzrX~OCt>b?By~^*> z?PjapKJ(bOFyGi`C%$?qN$g+W%=pt20nfbX_95Q!9I+cm-I(U3WD*k7F#-XWbUNF4 zC~-f3{hByE;u3^lSV#|Rm9m3vaeru#^rERQkx(RCbi@7m+rh}Brk%%Q&%1IxjufHp z)}pCX5;)bruRV0t?V1HPrJ|_w|LZ58weIc>iLUoQH&M->dv0Y#W)h9wMA&e7jj-QQ zN3pQIe0;n7#kAsiSyd2T7aqGDHiQ;r=}UUghMMm@H*2h~)4-z6+7`QXjI(r8hI7cnG<<|@q@~r7tpoZ%d@Oj%kYfgUGv-W^`Bi?Mni?9c=t^L4yZ{#funpvLxOa7R zS=I3Zv{27?J_92(JDnx6=f2h&Xb0Y`3Pfd%Z}%0U$nf+SLrRHLp^!)a^?_YMe=|K$ zb~qsC9B<$2^pbykFN5+jDecD(A2Q_LN!NNiQD>g^BCw>SVcP3xP!XU%Im<{f*j8C62jrWM%kCvC6QlhC{Dn@3xM-pHm~Z0<&8 zBX8bEzPxI0>2m*5le}|T<<8a=yL097jK*dYZ!)5v-u9nno6I`7rLcC&$7`DWV4bS0 z?T6OPTs`O^rNdD__Q6IzX^{FuVGU#A4`IFNu?B%~j`LI9eUrn|%HZHI3QAxR$q^6>lnKhYQ86MN7mYJ2L zcZg1Can(NblR`8#Fg8^D09DA4Q)bVpg=sc6wu|E8WH243dqdF~Cztw zViS^wL*}Uf5ilH2jvJp#f>MOXwCglPQ^l?Yd3nYN7suF%p#0bR&!0WZ>5a=4gEA8g zz6R(=YUXU;wd>Xq1bf*IE`yidGnYuhFN886W;G$mt~nCtn5sM>Y>Nh#0`vAj4rHI{ zR6Yz4^bLB|dgFp!HK(>N?LM1pZBKL#XEQgata%pU)7<*=qstD9WNOgP$x*iBhrapq0iDwXnNHeBC1i_`o%KdSCCMgxhwM%Ey6&&)obUJdyKdJ%*B|HHKIeSu zRPXoe^?W`a_er`jId&-GFss-zX7Vp};?6aDnpEV6ha^QOn;dS=OcopSHCEB{2kT1L z$tXrYW75rk9OtlIX>XF`P-Im4`@dooP57mhY@fU{U~}`7m5jd?si9dFE8lOswBUPg zmd-STX25UT^Z%$WB72aSQH7;79zL(}XR?b!DoAkjeno z>vWxw%~-X3B6VosgiW)9&>faJMuV>Izp4y=T;rqXzHcUQAiu5F`5J489TmTDmFv93 zi5|ZcQ=U0Rw%rJS1S7MEC|_)z?>pJ~%OI2^NCX%$$xs5+9XxOo8JR>10fYz6%B_VO z^G!r~0kK!$&<3cFG=FmrtCZCULKTPd5U{H#5X<}6r>bON@GC6;PiR}R5-O+63Htz^ zqf`?eYU})Q-zCT<0s?iMAK@8qcB{%l*oFj=9707=G%NSpOVWOC+u?z?Ll-#m{A`-t zPQ=Z1brlFOUFp(r{{wFJ{V15b4dCqg)29l{EnjnOi3(<%uzq1-vcMD|hafUf zckiw*j2nW@hbYV?`oGB_$0sV6Fqbv2EJN&eAMfQ%qox(CBYzQwE0~dINkOPPhlQ|O zY@U$}5GJISLBt^&>`cuw#6Ni^k^CPHX``R-OsgW6Hd~x$XWQ-Q*cN_(x~8x3S&q}f z#;l0yW<|wYcea{r{>i}06eBryc(ba5RjBq@$oATojH`EB6>c85f3jq~yhYccx4-+c z+=IpCi|%ZW=>6P*Au;0zzdar_dE~_jm2aHW2sjT`;CgSS;(Lvi)FMQc!q0L_O?3Jko_^ zkW#n?xx>wTN9;VtP@}QFT(~ZY7!i0YCP4-f2@P;FRt&>3it*GKInH! zvO{zi3l0XT+7TyKk>ZBSz+$|`z;}4q6nqB~=e{&GHJOTOrg|6}9)+zqDI@r=4;I-S z6dz!z+b8bo=x4L%*KGLd`L&3yP9v>cCexEXH??vZGhoSSXlNLmbq2-{_cVd;8_q8O z9Fl}X!K9`+7{4Zh_bS%5AxJMSA>lEYvBU|Qy1F`R2ZwAm9o-PfRqZ-e(T-X6(@k=@ zzTWH_&9Sa)<8&tjcTozYxs)qv+_R6IsF9W}XVmN+H}Ei*wPyB_oqtQErOEk#Q9 z2#Q*{_35B<&uGsboz&=rZ?4sbm)#~O3tVMhDSRK_ciO3?SgMbjw5+3}@i5wtvnqu- zRzF;vyJp0+M&*Tqj{OcJRr#wvfnAq%Pfcp8yzn~v>PMk&p+TeNQ{l%P$rO79k7e{y zCm*Ai{>@Yh#&c2{@l-10+swfMb|R#E2sW4{1&JY70cDCqOo!UO_w+yBaD>M)<<=8( zN&k-*zAc((sBIqc=s&N_<+9h}IFn}W{KWeo0{bkiPm%3)KUM7#)}zMwlP=X!_WEj0 z_8X35zJ{)qLCW`U9`n1ccD|Y?O(o11y>GD*&I`}Zk9%|kRK{t*ig0ft0}Jp691D$C zQ|g8s!QP=UBo3?b!otENO#Q)S$ec03%s>={Lf1f~5)#>oADz_nIo(kf1M&+9S52XZ zhenp^`3n~ws=NAMA}Ep#Le4+g@c&A)xYds@z-%1rxq*lazslj?VpBYug<^6U+@ZjI ziHaC(wph(ARM%6Ih%`+|*bMAfd~bSG;=V?tc$(9UA%sDniONSnE|tST3+_UU3We?` zT+{44JOci+6I~Gq40`Dv4%K1HbS2(y6qplETO95HsYPe&mFxonZP+$xJJdp818-9W z%#js65WdtY) z+TX^-RpZCRbnzDj1yCjhyn@6q0n?$Ooq7HmL@^6I(xV)Y5yWY}c#pS0;NqwmfQrcO~rdjm)Iz$Kp{_K<$1@7w# zaYq2BW6aB#zeb3CPMvCD6jDxX028bkdR-|QTVKebcV&)o(Yj8(R(Y)(H#D}gth&Lj4X=#=2ONy@6=y4ORheVY(!x# zY0H)SvU2ft#6A(X&8rXO8PKCraw%p3w-J3w0wWk z#l9iCH)}ho^jB$`^)A?r4S{)c=@u};zycAO2?ql(PhfvU+X31I9-ykcF6m>PNp2?9 zw$q7qO^c@c7(WbUYGnxRD9Vezsqp;$m=M{X`vy0dF2yRZvuU}u*D5!>UAvRbCRb_H ztVvVX`|RUkce~qR)!NLHkHecIx-YN&^`MgQ}WFW(RKxXDm6pi7gjub;kti=s~k^ddxi< zP!%u3>7-P*Fga;l{)CeWCLRzBGmyZwHWXf#3l%zp$ppWHlOk)aYfG!M9Vmy#(m-S^SocbSsjHl;O-^;i zPb9i{2}(OX9RWSxb=Q{y)!m)*!=aLy`AEq^$l+1G2eOZhNR}X)UBOv|gA2!@Z)6=X zF?;i5I}0d`UY7L*N))soz(@keK2YhVubhfac=x_mRS9%>Ab_d|re|MuCbQJSTH_yZ zB3{Qm{l|hfH{M~kH1W}&&{ty359YS|{l@SF@#QcTxSbP>hT7S2}^)w2QwAI9F>%k((kXTLukJRC}k1Ag{(3+ z!VrKqPqN>YH(FL9K-mdnE>s(2xKh<%e2QMaA-{?v8Emh@%S)SkEf~C|cwQe-tBfgFnk(r1B&@!Xkf7x}+DAo!$Gc|e7R2j~Et*oV6i|WeD;50_ zAly|QH7z+xBcpIP(l}M-{q-lSZbf=bAkFC5jlyE526!FT$Pio!vsQ;^{2wR@QjYjvSwZlIf zg!TH{B`ZQ*6z0c1JbqayxZn(Y7Lg3w#`EWO2#r>Tpa$_ek-LWX>Mqdz zH}1Bfiis#u0+Kz0tP>1f*EurcJOTs4?4ULI^-{rg^bq0;2y0#g_9liDJHxMX!F-!x zPKdNWJqPU%@hq7rgnj{+V}yquQS7%dyI{CFB=zJwbS^1M4)CAG#<+rGlqmr@CWKZy zNUnMUG%X1*T2;)-rGsj|1Qu`jo3|iAHp|V%p7idCgFeyC4r0xA`o~2?^x=1#x$yBx zOp^q1Ty#BF{ZTDMmUY=_exj>OFBA`Eofv>0&EHrYU;XqxNws{=`$kR7$C+IC1s+_y zn>xO{3!e)dXCINK4O)%x9O6Akx>_Zq*TZ7E@mEpvZhBPa-t2N(SW_R-5w~|<=jE|$iYVl~8_eFcf>&r~tDb`yT ztc&!-oy2GbX(Twxy>cg>HQltIdLt;MZ!_vTa-lI_kX$}u|Az~Ca^!DP65cMhlD}!2 zz1FcTAE|ii$}4ugugyj`Z`mXl;#C{W^LR?>ApZ6?cH^UqO=5$$8Wt$CV%J~Bd59=~ z{9bEqu03;1j%9gASW56i9`f9mAw{G6qW6$udknk_M9+bM@5+iUW5rCbpe6@L1+!pVNNkF z&syn<`rEFRi=DfqyeO+`>J4wUjs3{6*W;J=8?;&PzV!E%XcuyYVD&t6Z>zReH#7Vj z@nrJSg+(gl|_L;b9G>sqYX^ZV%jzgyL3k+;8h|n(LFYx|&Af)g( z-)a3Q4Xc>fT|ev!z=U8@kjKk`IpbVvlc1A)6=%z1kBe$*FHIDVpf3-31F)OI8dVt} zt%PKmS64Y?m*;g(N!Em~ZLkYC^X`XKVIj|09v}XA^`>@XyVAw`mxr}-t(2hW_*%&y zanU0S?iwUPK`KIOvA#U-oz+)PRL~+RiSPrUZiUdC(1`hO&6`t&eqavy3uU1EdLy*63JdF-8ZOZ#M1fwt&dF5^1hh!dnYHi ze6DTrcQ1+yDHC(kI6mC1igzV4YV@FGxVz9P-Ojtt?YdpOW+*kC&8LlW|LxhGV#PNH z){S?JK}k{ZAHXO=Ld{oMu?7D;;}J*V&VB0V8GSdZs_A~HsBo5rI;&K@TfPF@p4a9Q za%Dcd?#+(fmcBVg=Ba8t#(bpMBx}lB>d=v)zjGFHnnV^hey9DuS&R8AdnsF{az_-{ zLWt-;p)K!{mKGx}D`FTC@Yijv<6~uwtIEaubPzH^XxNC@dZgxQDoLdL=PW$blfhwOE(Zteh9^~`!NZL zuF+*qk9DL%RV;OcroPg^$IeGY4+M?XWg_lC*e;oRa0yEk2BoKfK7kPt^kCSLl+-wh zrPUpfbU42oi%G_)$V=}{&3vO0*9J751k{5#>M-C2J85YR)0i%&uE%%^q-PwPSarw4 z=#>V-Lx#Nl_vzmL1e61sZul4$%pW3BF^|$C^~c17Jgya)yM=S4q0vzpJje*Y%51g& zy&QX;jsILONN2UKka4Hsfuiu6$tjxiea5R|5ndsC#n)cNZm3BNiVpES*17hI+CBBP z&8<&@Z~a@oPaK*1Njxj_liQL`F1S3pTb3rkGVpGfB#*U?eG%(v3xStrmnBEGE_cxR zjuK#Bl)5s|G!%#+o(hsdyc{T{27O4SAo8+zbhH9rSQ#iW3>?Ts3#NE2oJHMzR%b_? zeB931ZJ#C~EDK5@~Pqd)s#KN&=(M zQmyX-$>igdvI~Q&EHT*wVje^{rb%pvzlS4Kh&Dit;O8VfqF*a3iQ@(_DlL3U$8hID z}cyMhFEdvek0iX|m|SXA=@f zh{H@A+K`KjkUH2*@E1C_p5qQU4xLwC70u?&2awYa$aMx66=8cMN+OU7JX|jZ9y)_Z zn`l&qQ@oM~(SpOYoG2XC0};?9bjbS9riq)J<2uwH3a6DcJ~VNLlhkNC2E^aZGqly$ ze`a||^^3z2JP;;Vg5?I_I;K>`D_71S$tMRvn0H#$bub|rftDgv(N~p~yHT_RS;+0y z4R;UlhFz4aKEi#{7mw6<3Qy)X>itp5Pa9ZiqrbFYYAsMAi(M7maFxgBi6%>lD$QER zMED!+*4U!2;Z#mB_GD)@Uw60dE3a&Q_jBgO!)vXb`zo2d^ja3P`OTe14>?!UnLgG} z#nuzRrLGq|OS@-}0qL+Mjv}mmgo+k}AQ>ekB@qR-;W*HN@xpa?j6WA}sUk_kg{!Iu zP45Ok)Lfx>BPcF>{B2@n<~Ww%AR7R@ff)YXC@ag@R{?#GKAgCO^cjwPfx*`1==ia= zItl#uiD3<)>?2fUMc!^6Er$T!A#3kCAH7K%iF+%pmX6+Pd|Y@>O5Qk8jfQ1UW0fp2 z4KaA7AS`4cwIMPojEs<)!I(!mLLfEbi$8X0{O;*M!SiuQU(6dBH-O!L^yGMyFEA{Kcj>LGG^k_@uDF!?|k24e(}`rE$^O%X;v^8V=seCFfhRD>Fr659{=rHm_w8AbF)>yrm&s|Yf4LFt01`wM}w=n9`OiI8)7U*)JxOInJT z6ggEn{-wDsS_5Y(hO3M4o-ZpJFYDwvIb#<}gmH;CV zLMR>T!QxMn`61E?TrnTt&x9Y1jAL!@3Ka4tz9CW!y2JFGOl7x}fmH)v5L;2T#>VjM zg{4Dz4w)v(2ngCD;WJ!bN5L?oDx|wd(P1;YCSfeEsycf7cunTd1%GVqo40N~gdqe$ zrNZ+Uf$(EXG9;qpMC_BQ7D1QAU~Ixk`wi&B1G@@2!KKw#bwjX#6?b)Y4J-C|+nH3q z#V;r2@$_}m2yz{W2n)gqgeI4lWpfi<)i?@KdYt2`aD;e^rzjO;lFcVivB&M=b|)R^ zr8>LGS|1<$)juHTNjOzP$n!46eCs(~)$n5%bMv6Bwrjifl*z(}$|Ho|&&8$9r@Keb zj-tokg(UD9LcinyT**Py6s?bGvNCSqc(Y35Vq<$#I%Sd8=q_2`R0#f(unt1#!N9+D z&bd=}ZW3x1M62lq5fXi)!q)PVl9J5VPH16wYH9B+MOh2F2Y=nDAXFOFKP`dT+YiAH zrUp$>2_$}k($>lPsGBo2K?C6O+_}%nCT)o5tzFMwhjsKvYisBG22&evk6odpNv&d`KGhpgjo{TF8J1x-x%F@KNTCc^XrxS>Ug=Cw8AZ z1iNJ0`#{207BbM~GLdgbvFH3MaKRf{Trh!UW_l z-(U10gXix<@vN2s5DS&^(ID;tcy4g2r)s($`0PCbzA;OqQL7NS$n%DHs4Bk&{3qE+ z5#RSak1?mry-;-`g1caQ&+xOZKiCR>jg@ej#s7Zvf4^qPJy9MaCcOPMRzKwQ|0xT@ z%{5Z78|1>FlP7n<>IYo*9|%eOdT|;~VR0eSHi+RJkz#&r|A&_6I?HdBQ~PToav4+{ zu|9)=;L?7#8^yXD>jc~epQ{|^egI5;_zI$78ZWiRP*wpzCrHqW8T+rf&Hw2N8f7IX z#3-L-hY7L;NLfF=%mMuGb8&UnxZ248brA3zENcY0i)u+ClnNU|2qQABGf+qIPn%^P zwUyPx(d^%bSmGm?k_IeP0+cw_q~bpn%gD3ngLAkyZr;=GULMc6>gJG;b=S%zSJKvoOLXkhRQ)-KQx2zhbOO*MJ7 z|DiRv&HF1?l}#^5xHOM1wV1H7ALD+Tw-zy^p<^~ZL^uY1L9G2ydJz%+0|U~?SJ-gV z)|Nc-{d>4)_mTl9t|CL|za;Q;X}M|i4vZBS59xD5d>%;(WsB$6G$TYsL%JZL0#-zp^psrJFN9rQ>g5!&kDYA2EGXv%9;$`|jPc(s6ORpTcG3Tde1Q zN)Oez7TE_Zxj!OB?WUxZW(?Ej?pM>%xE?sG-|;?XDR zsIZYo)(zc+%;nl~GMI_a;LgVP!0FKG#EONF_#AunyI3p&!1ldG5e8QJ&v3PdByRnE zN)T6*U)!YZmQ#Og=pFp$B6nl3kAu}%QChv8z)I++=7KT$jy3FsYA6}Tr3a)Gc3k8+ zv6BJfM>>T!P9Y?!LAk#fqFDlxf#VYw!g)};`1KLx9e|)n#BwRfpR*7jH0e`0fai2bdMF+~x!c_(w3l z!ZUXigGVnMB{%xY{OwvV$O@l6Bex}=LN=XFf4!0Bl-sDZ==+WMxOeOjd$_@&`r1VH zgblAmSbkSQiF{i?w57{d$?=HtRtFZ=_mWms;mzJ3jjs*n6sEbYmQ#6YUno}zg9KW< zTrhF1s@|xH9E+$?Q~!}4hDW=H`(^EvPerdy({J`H%%Y2qKlR0G_2FxMiJswcSts=a zW5TmWlWx{*FS52yX$%)Z=o6#7tn>QtsProZa1b|}$Y-F#5hUZ(^{6pqNlt5YSdVPn z-4~b9Tdo>kv-?$3!}i^@#ZQ+bJ~rgfz4CmYbaG{~*egP77~C+KtdaTP0^ z67KC8F0GlW?%owidhRv5)R+=u%=|}2_n8pAVCgze@6Dx2dICI*SMT#CvZJ!Wp9$6m zAR4&XzX3-B)8-d$go2=gIMZ;>;pCCEwfi6MriTq+Y^^obzXwfy;|%}t_1ubo=5r#1kVd`>=S6EeM$D7 zBJkk|LOS6%c@3lLS!Lxt80l_cHcQgUr;o1visZgOZqglazKM+I6eX<^HEuBRW9CHj zBRP?5pcE_9jT?}5CQW%AhcPj7fRzgXWu558@l{NII7pI~^7gU*m}fuBw=p{}DTQi_ zelR{a)Asyhwy*2+(4Xg=sp#aF2hYUXQc;Kjw43^%+x4-sXm50_nuuAwu{)FO$&21n z_O7!W;hkq>wBK%vx%;?mdM9L;4Ro@Vyu$&neliJA$9%hEYH=z=Jg9o8S7K*_{+mki zP8#+QD(VFQ>?b5SpzXZV)Y1!D@y8yU1Db?eC4XI;Lq9+x4E7$BLr#Y+D zr+Lh3WKxIn1oF?{@XZ@MiTa)d&X$$ZjT!wO?QdVwzG9*iv*Xnri}~GXJbV&zx4#)^$r#m zd3a%nTyUSv|11>;gr0^L#U9Ek7n=|dQd*v{(hc!>ZL|MWT&+c3ZZ4oy!Y7Yn=5wzU zw$sv5oJO7kbj1UZuzV_wx4+m0Ssi$JGJ;gXP27ukhXMEp_XDGa426%aAej!8HQg{) z5h1Z;LLwp=a7+@7_aCqA#|VTJOri@laxNnrCyG)V_5*#SCjWoQL?|2E3aa{XR3dH|5a{G2ezivo5;r*#N{-XLy+$Vtn~ z%PV%X#jA?dln6}+&rAgnH6bw(el)$iJ#CL+tGw^&Nm)=3IIjoP@sGfP&{SZqMNR6} z&ID!7x_L|pZ2o1!C!hZoM(|=+Gb~}yDCB+bdN)_ULQ9UZ`SUpUWs@DPcM2p~__=M( zz4HQB)A);>goJaO^Cc|&Z29P0NNWa0Uy4&QUhE3Jw;QlXn@M&t=EB{({PL;KKji!Q}DbeL^^AhqMZJ{G`|hqGH-sn0j;LETcET?mWMZNY#mnz;9(}( z*ic1LYkUI)93(S>r{-dg82meFZaNTL6x!Y4WIwin!ftb{nynMf_siuH>oa%8Wlbdz z41fa%guLTeRufTghhREL2;hA{E$N$+3qgS8unglaJq|o&--D*;n*FyQqG1INd72~U`SNzL1hozML%pgU5{+XZWu%Ev{BXdto0Oh`s zpfl|?jF))~ZN-O-m{;2Zg@hXlwgp!@8FTUrER$v3O+PAisL$X&UWSG?X*2WL4iM(h z*Ck{Ix5gyLcN|k2iu_bu$>1O_k&sD$i@)IwW0-q+>W3tYyR{#_vU|x4GzDJHpB;1L zKG*woXU(t47x^1r#K{ci<+$6@Euy;$A+lDj(GWMpoDr>6F?;*>(I0l{TMqC|F8RduF} zGnFW`M{@7~5U!E-=W|fJ_cDtKX(5O&dXthu@!{I>x2W1k%&k|IK*0=nh7h+)BDOEr z$1+^G|KAjBh|Kr?Zs{xTs}Y=wS({k-8bB!jw@PWnrLu{xP5#@MqrxArIs%TC?8ArH227}WcydTlI>)SiqdTr`c^%*4$|7Ir;KyBl@M8uU z_J%xBq)Yg@-9+VwI@V3czopcXB_whTyR9~iIlmoWWmSj|#R$uLWAtRK`e%V(Gs7~a z)6D_$t%N-MxktxcIaO~D1+yhzr=@JAJygt0C5`;+JwMY}YKIaY4W4-Fl^d^`xhiOH z;T|Hd<5k>I9x9Nb8l=p3CI3g=BFM+EQHUW-gROcQ;^H%@;0CC@LF z%6x-%FK7pPy7ycoS&jMQfHUSSpZsZg=Wf|lLciK8_Ndr|jmHYN*OC84N>Ug0E^v@N z!WFgmU)HI#ILSxME%S>?7ypq(9An%(&yt%O6f|yaph-QAfApAB)mA=pzU6$o=;}=y z`wX=>_Sy4whnXcfeyKY*S=#|P(w*4-KNrPtAF5OVk(f6k#fPX@s+x+7+>d84_y~;| zFBu%+N#O&(i4Y6YSYKa5L^jMYUiDF#$^Bf~l6;0d7qY%!x&PZTavo5$DfQZ8DHAE;9OisTOPICEC*vA zjyM74*V9oCG-IiAJLCV`yw66eBqI2Mq}Et(5M9ioTG(C3U>EMGO6J$^s+T77>gn>u z`sX6VCb6*Vg1kH{+5xMkpq2z-a0*g9$k>|>Wb|i`EY1S9jvk?vICSVPv`0jC9VFM4 zBLO854I&foi=e7-{$Jjn3TUdmbg}2qLrng5!JxXcsHghOTKPeI0qsAgr>E5lTu%{- z43Y4FfG;SeMg3wAVz%A`KEuJT8^;wQe__Htleszm%E0(qQy`a3(B~m59|7Stj8-*R zO<@y2kcHqorppD3{S1(LwIo?|nrCW^$<7@AW#IesReK-!qax?UK^1HBBzDukS^(J) zP#lyPk_-(18pfe(5?SI~ptK^&9N;0$86&l&$fsA{_{l0?uNikZdb{s_E5^`@_1!1x z2cZhPR?k4sw2t|hkWa&zRnMN7l5$i;gu%+n3RxQ5;Z}U~_+z1tmj+Bo#)*6#-HGpp zz&c287%ft9BFj;_WNF&iDsFM;%NLi>tgkn#Fw^i`N_q} zK*ziWs!sHp{ZrRM7TRfZZ?AINsLP@mS);exUHorX>#h;|5*DcCfFfW!Byy_|&ja-T zg#S8%Y7QgZ5FqeQgl@fHSAx2O&#V{B&PT*JKpC@>p1uqfk6TV7P6t09JBu~)ZsBOg zKdd8UCDs;5(Phvs#77QAD2x;_RZqr|7(9ih%o=2}gJe1fNCL5{qJPslPcXlzGt z2Jw?{ug2!&;hEm-zU*krZJAbR!G?uc*D>i|-E8e2Qdj%DvL#6Lk%RQz~|Lb zjfw0mfL+gnf=Y3vn+g;4b^F1XVHLrbf68%kKa1Oqg1zZFsYZ*_eMAfkKBs3dUfhGn zv%@X(d|VyTfc6xT8TR%9U@stk+V~y90zxvxZ$=A4GC?>2_({J}`rPml^>Gxw;8)%! z;V$Z&-)WkUA3sfGuiJwSla_LgVI$Us3Ik-G0zt zj!hKYe^Z+u!e0pR(q?7BnxKFY!G_U(o~k;I62qJFSRMmR?e?T`j-rO_{fNS0IY3{T(>SGU@T02$n0MjRt88LULmehz-m zU~CQ0@?Kf6k|~DP^GnyRedqW653L6*oOSSzRiFkDsDw8P#x=+>ufV!zXU9({Hd33w zQB&2AHwnY!f$lHnOpWBBh03y)@;&zzueUBYMVhVX6jIN^lz-B)T?lE>a$rPp$O0vN zSq8@yWFS^o+dxBLI>B8eQ-Z$%1dUe`Ja{nb3h!ulZ?FDRC;mm1doj_a0QU*3t`8Bz zrjsRVPE>ke5N~!259v&Vacii>x(9G!?O@T!Ug0Qk!mEbvT;9 z2$Pvo?pPl^d1%k2zsaPc)70DkXbgV*`09gqFzMfQ2t)42hCnxqnkoL@BTl z!F`wjPLGW^_1&I4!qG6hJTsB%vC4_smtH_WuGoc$*1jh0z5o8sh+|FV&#|_dkTbD{ zJAV4ffAjlk9G^tNBMK(Yyi%w^>IEoazz&2<6INmH1UC6fRxgI9Iec7m?&3v^Ous;W zZ7G*;B&uV~@xfA1mDtKh`zp)T81-o-w%w36ZrNfQ7%GlVNI)vQiMtpWh_Iat7cP*Q z&X0&WPDZMS0dod(MgU?WmaO;ubY)iypnmI zz91|u)XIn}jw{c8z3Z~X-q5o2ys$ya&#H|tB6Qc4j;3u-U{d^Wb3d9aHAQPtj zU}y>Oe-YVW^r2=Dm1I=!6i0d}b~nP1j%%$Bc?^V@7`r+U%xG^Su+}5#LbcPBR z7y{9snD9+awF(m^?DLgB^A+*ooq}!?S|Pk@>Z0>l&oYCS(eh9V-U>Nt0wd}+XphZ^ zAo^$P=J82J=j1det$-!(#b0Km1y7o$^)Ij3$9}haL3vxCv$Vt(f2tVf;t-$Yj)=1H z*Nki8u||GvP5+AUZ>16y;+KRL)qcb6l2sBW*G{eNtk#Se)N%8NSBmzisx5@g*Pl1NQ_U zY!`LCsi0oq%k|5dtfTTUFHYi;XFtQ1%=-DuSP&pb=@}d~^1wy|Aw*j>l&qJh(x zk#x$qxP6~qGWMe4%HLD;LkV7JOHQBuFe}$9j|yNG=pI7skQIzP6>lHJ7i206%OJl7L#BgiM^CT%7ZVVHh}aO2;( zbBB;QCrvb^rgcN&Aw+}y}p##okH1Pm*8earc$sLSqTeC z^2FMiWcD(7qmsZ>#aHZSQxgxcaXmgTFL5`~W$f+_Cv~HRZm+#(Uj6mtoIS&I9%-g| zs?Tiqo*R)ix4slN_`T|lAmnIA_KwUZYnMwL`onW?Cu;}IPW&Zku`<#j@$c2;4gWw@ zpNI18lsQM!qYI8XZ;KBEytdsIjI138XdGTxz zV;3mAJLwHyVT(#Qv6$F2Lc~eo;R7Re-NJ(Kl_Kc{9u$B_vM8Ebm@J|JVnD<=64WDH z{Xu*zP*?RrAa8`(3Yiu}OAOIvhFyZ=NnFBGgzNNX&=N*i;IMureB(9HDy;C|>d7fX zW)!2{*FL2!I#U+d+${W$j{L_6O$rC5pT`t;>6x_jna~T2SmsE_)!Fo|Tz{}`yKm0N za&UGjT{+sh*NHFbaKlK;Y{%J9E!RyES({_-A1S_{3po7Ja!v;wL-w$wXMvvr2e*R% z?wx6dM+fBB&h-YkB`_BC*;4KkcVf5S#uK{eMk!(C6p>a}_~Bbw>@crN?BleNS!eeZ z!wv4#!F9Ro61ArF=DPDo3&&Sj@;$3J=7)>znAr6#U2trTmpLopIw=#MA*3E=jpD&fjSXBYAsi)iZshZ8Ne$TQC^<3)h#6rA_d z00J3E5p&bi2wD%r96>mtiDCmX{d}w8doW1XkHAs$cIH^rrB&KgvDi+9JAoKX;bJF#T65?*E#pY4~)T zB&xqOwA)N=yCH=9LiisAHNuv^9`e1afx@08S0um_TNOUbAYAsLMC&|OqJ6^)%U$j7 zRN}7^;t4Pjxul=YVnnnmA+^GU1{MKrb#Pmqs1E{^ON7D0VnWQ$a17F`kgFa&a9{%^ zrRSFFepxxWFSWHh1hYeo(vShBh`?Wj!ez}^_m%AdAP#u-xM?<^JIw7bL`*B%m-xF! z`!83s8r>Ye_E({~_Ty~bL&86UjGOy&PHd}+o!5Tq_rO0Zxpbhe)BcIGSZ!AYtBBa0 z+us}U?%`fx5k zWxHAbvbL@y()EmSKJ!K<<%H+ zOj_?15-+i_mn907s#R{$v(>Mtywp9N+x2pxHCs(RFn`5B^`oY=V z{YH8dgF0h$oJt6L32ncUh)_Cqwzk&b^qGJBX#<*D^S*!oA!l-N1AW$(%bX(;z2(PE zHRGCijoU>YEGK%!v@RWePfJ7I^2@&ExCx?@?R)pCjyr?If;Q?0q?YpsI1Vt2my@i1 zl#5BUPaNm)e`y`IS2e%n>0+Jw&c)f)ZE>NHh9PqgY=r%t z!&4rl>dM8P$dna3I+x-cW28(kXeon%6E2O7Wu2UhxFtA~5ixuLr-TCOye$+B$oU+c z6{JzoMws#SJrh`|An`{0bC#AShOT54!V^IT-4$Gk@ml#17)*lP_CrcBmYGB*C7d;= z#HpJht|eB@9tFAm^MrfnBiNHvv)q* z;75_;LTS^d)iT#cpY_w$BZ`Vum+QeFu@_<)?0lO766t5|=7?!+Ih-xxn z=i$0sV;IMJbLv)c+r#ySphF^5ocF7l*J1LM?r`>17Wq-Gm>TW;{4=P0!uFf$)MqE@?wGB7Fj>}lqGF@N(RC&AAqTQLl4i&iF zfBG@mOnbe}y{n{+LG|XX(sv@A_7;=-E94((-1pW{j&_+izv3spc+A&v@r2t=x&ljP zhh!GpFk!pLFg!y)d>^V0Y*VjazuwNs=vuAUOtN4=g_Z!Mc=<+df3mAAHTe5jlG=8a zjCxa2H_`f-%B%TCwwXflM!3k(+)xHhB5|$#`1%anHOrnx6w?_YFcC{;?~fnnAZLKf zJic~cI&$I>okEN7u4gEw#LuGUbhX;Ed-rF6L|~L+u+rk>N7;j%PWY61ETLb4QS=Fe zV3o01e;LpLChh4oO@37!ber;}D|+H8z0_tp*+!W<*m)|tA8nq{(;rc@VBvo>RZF_t{Ihg- zsFoX<*csWpNDl6Vx!HLd)6!V^I6V^+*(+xR*6HRFuJ5t-*H4n!qdz}s{kmyVePyB2 zm1i|=uea2FPkAw6iN&&rY>q+RTf!nu)qN|EB5I7U{w2}qoiw6qX|l{ech3Dou!D5> z_t7)g-H&#e|Z{+^m+73%Bku+_wVT2`j#XICG}X%^|}S6d8XgIoD^ z1)pfB*4ykfehyAH_9!*w$AKA|M;yAeozCAkzNOL5`A8SsS*ZVmJpIMy#1NL-neAMR z=A$>?^t7ayx?8HoYn<1x6j)>KG%PGoSY0{YFx3=P<7uVi7RV#38IV`~VjwPIQINmj zF&C;ZA-TfWauGxJ0va0#P!@O2$;2?g@O<>tpbPv3oj=u7!jR8;g@(DFC%mlJa7^5e<^)^S(!`x6!`9WzwGx<8kc^a7lK%GTd5 zOM`p!=FQkquz94BNCGeTE!$thK|~~KqVnU@Azx9&zOGGVNK6HO;0s`2OSWXyFTx@M zZ^h(bbbqov{`oM3(a;i^n?LD_16TjKsw(A^t)pxZg3UzHpdp2G;ii4c+Jva>NQh)= z-Lr_02Rd=Zr45uZ=PMf}hBN7>ZBlv%M@?>7jwN4uJk7>lRZ?2au=R3c9=$%E)ZuRj zLpw$STp}V5js(g@Nlx6{_)pu@2*1bCUVQniP2aae@x_C725rOU^dvf94n=p8 z>6lLKx?S(5s@~#E`*BrM@oBJR*)t)oB(9g!B|AkWho^PRuQ*E3evpvoI@o6{n%I^v zH#61Y*ptCdHpr*p?rb6EDc*WVCAs)h*+0zdOcw`QBxQ#Sg;ncPdB3mgM7_!UE9UH( zw1gKKELLBNJD)Xwc3i4(yR%c7J@@?I3-ZG>AKI_@1vQ(oky%-3eWYw_?C$M3W9Yt( zyzSQY;uq`JjNE2emfQvkPA^MUJ``HnIu&0!_*2-2TV4L4?tY6ku>iw6A6FcE_H%d# z2h2(~i<n(9+mm&3WjekPPw%NxvCnYt@(fFqCDFZ|o z4PpUz)-^NDIq5M#>O>p|`q&9w7yMTMG#4RcAS8Pa9;8F3A3YOCpum7;@56?LxsqtU z0Hz0s^)lc^Hg@&{Ae1~j*3%awL;@jF-*^)}7Q{7#ure_1RQpccLtZ&U{^okTaj5f= zhvyAj6xM0{;!;ROc~$lLqMtFGIAFNmUz{PxQ(&e!wphK4zrB8R(Tka-O{zO*0 zjAeC62_*|l#Flcf7iE~B20QYyTS|4w0C5&|$$RYCzFlF+Ew;ilBO^ookrwebzPj2o z+V9df#>UNMJv*70?vtEvTRcFf8hwp&pza%oOHXx%uL|JNMxHN5>OY zagj~KWHlysgM7l#?*z-pwsCv)eCV;Lx7c+q#A7b@yN1A#Q}<)%b-jJvn@^W79dH%P zAAJ&@Q-4@8J=5eg`E!+tB%AxjT>(svc7sBU=3{JE5(lT3&WSIx|KmZOlWX@}B~HH~ zh1*qdMod`1QcmxSo#a3A(G4~|(|_}D`+hzf|9UxeK%FryAUAqlT_vS1s&3>->$l;| zqu$BNcfUC1<)p1%pehnxA-_GlY=0t3|JaMi>GI;99-@K!bv>fGmgsI9i9>4_qB4DE zfr>23cl`^)NZ_uM>nDT%rcg>yv0mue=Qm(9{P=#RRsEAaN^6pG*Tu&SeT>fkG}K`f zDto;cU)5|$PS<8ZhH*~}VV*(=(iYI3Xb!2uR6|CT z>>zyLxT}pgbi!boi{|tQXd6sd-(8xrFHER-Ww3gG2A)i0Q%7fJzCaoec@1o;cuNwr zHi+5{UnQ#Q05*ZSA~FhqZvKPVfT{i!vn(Rdk>CgMgwuBLv zfXI$vm%2IKRkjP2-HJai(r+_X9-^%%;2z#V>gD$%HRLwLr_OCSE^KpD&dysR$V6`4 zKP@Z0-?dD6o35y2B8B(nY5ob{(rpJ`g}%4BQ@7dr$KJg?H;m-X^;~nEC-AhTWy&j8C@dkwyn|Kgik6lPcIV|e|5b?~ly<|zPsAghI+dG*7``A0 zkG0B$reyv09BV#AJ#e`*Fo2`eQ#41C6CjKX(;Ga%S=5?O&%sfnyfsm zh97D5Xw5&0H3u3+t>4uopLu>uM|Pz`Zf)a_BL;dS!wNi$E^!sEHli(mFH*V8E2nRr zq3eh_R?b@J6Z0sw??>NE^GwN(2c!|7jC}GKs2q}Gwer6U`&+I57_+f8*Wt0$sxCd@ zvEP5wNNdFXkfx`)TIcGg$p3O&s!2c7(umlUrYmf_->zvQb(iEbpx0v(JFW65##Zr03HqWwSTTAP=$BJ2H|L^b5+LqI0PRfVzQDLFP zmiwQKm;2%BM)=4D1bc!HMu9?2x`FyeVP4*}r1d~uf${YK`5%z_)V_DJ_3O=5*3i&s znXxUmw?LaA=&OK4^hjtKXvQ85`y)2r_~DzPqG(Um6%Mnm<@+=z7srU=kNUHSn1bsE z_y-vq8(ZYtw@)#%V+b`f*qGz)EA7N?xo^nmxzO5T^~&5w`)%%-ch^{CkJsm>9aKwg zQf>8#dM;ne@nTEnaQZ#fi{#K6lx=cc>jP$71N=V4 z5z!Ovdk5R1$csegPSjuF*u94o79Lnx#!|>?SIOh5h4`?r?b7G3U-egzt1cICjd&KQ z|6)6dlzBEeH)wkWH`j-`>H?V^KStz@^B>sCu>P>SQg}o{ ze^=;2g024jab3ir6GnYp=0qDA7uTra`r4Yos#TZHP_8PxGQeD}wqz^DCoL>aO>?-o zfBOc2YMVm9?v%k59w%F=|Csyvf0Ej0apRl5Lb4ezj@2Snh+$y2(g$bX+!|oPo7iBA z4fzZp4+TZVvnD1(t5nORbA{7K(z=-c`Vi+NcH4&2DQu_L=qI8|OOWk+R8xzJ|<5 zgp!N=ZfCV8TI*gWy}n^f4MbelruioBUf$cJJbK5@4<}i z%;PPO9_63%h<^9eVC-1iDZlNMFTxyL2LG8-Pxi0$_4eoQU@@yqV9BOy)hRsoSe&~x zMC?IceENcHP(<7HvKgg@m9DV`%S~Ii>ce(KgapI8M3>^?;!=moR3aucvo+l|9#w-V zPs-i5{^kl1zIoE9tcttQGz)RXU*D`kVDud5l(X}@S~~`uWeyWnkPA>*r0N!iBe(9Q z*aLuxljzF{c8mBA75<$3TPp zY;{H&%a{)HwX1P0gH@rvqGx;V%zR#Td@e<HPDx12~bloF7=Whm3Fx$VBCBQg)n%q`v~ zKk6m78Qka<$w}(5Z{oFcn-g&m(^kvj_8z_1UTAg3R4Cy3g_6sd67n(M4d8z~?B z@S*?Myy8HX?TZ)QA5sdm^cwmS4SLsfpwQv6DA+h4Ec&qJ?MfS7TmOTLVz`v0LQ4Z& z*UKvRWbdKd5cV>DdZgov)9Q1i$wwu|vfEMBmQ4U7Uyv#kJ-#$`k^LMAdQ8l2BNomoEAANgbLDIFnU=76Uzu9{Gy^ zeOIU>83YhL!*YNVCQ?`9RfUOB7+672EFHJq=zUb>?|u71!)5*rI$Gh>S^kK1F1$jy zoy?D2@Zp0qLnR@#reDAIHFZDfUAtc2KwgkH?Y7m47)D^T4tqK?>PYg z#B&DJ4YgI`uA`jY&p}PczO!XlK!pj#V2;Osc$a|%+4)V@Y$kKKkZ4^6rPZx!8>T;) z>WyNAL4W&fhNoD_H_U`Vh@h`z<}F5_Vs`s>+?QN{#OO)b;3H7|?n*^vGO<71-}L@E zU>cqFtyUsw@27e9RpDyUxqf{mu1!tE;_Wq{Q{HK7q%uTSntv)%$F9O*9Q$Kqr0umi zt2C?p#z<7?St9G$g%i@sy3|gttl%NHHs}}`(|6M=jQ3I?QbhrC_9bcg9TIA~^mk zX5WteGN~JA?RxeB?Fu3~2_D63>FD@8FdZSxO29u%Nb#V#=CX;7_Ghdc-Cc;gIy*bv z;9|Pe`smvvnVe4_GG?Dbqyh%`dR6j&GlSAR$2c5AktTFA1mrtzax3+Zj z_HI%+yYjQ={x75)t{mLJJA!vOp5AJLHENBhQ%s6uQz~J^#t#f;?-c^%-3p~N5L3tI z#SME41NJRn^S54rskHzS;(9TvID++i@@B^}DkUJ`3oEu&o)VTpZ*$BCL|D${nn)U2 zkU7ms-OSC$Doq6qrgGsExKbSx2KcneAWtFeqPM{|@$>M~_ylnXxD*}ChJS?0Guixh zA}$|#WoG(kw5j}ujW<406P0484U*bRdN=mn*peu+o;=wAUQc|=Q%JDzMK4_1bRc}o znQAg5G?BWxy^PbK4O%%5U0AM?Gyx|9w}Vuu1X@hDQiR9e!=XS0YAi7p1heW3c#iX% z`UAKuDryY|zq9Fd_I2HJNgWk+u`uB9y~<+~TRG%lHQe-(ve-`1(AO`)EhD_>2&51m zsch-Ni0i#@0V(m+U>b)2KOeYq^{VFW>%Shiz2}UNEeUtgrEz}EX#cTD?vy4<2m9MhM@1pcI|ozsqXm!iVN1;g<IwboeK2`b30Xp#hsn$B%Xz6B zw`YE65E*>qBo(IwqE`oS6Tis3h862IKzn*9G9< zfcHr}M{MZ0;ii!^rIh-MC_2Cdd?V|%XdZ(RF%YVouyg{o@TBe35m+1g zG>#|0f)f-;^(p(`cf8QqXt+cwoivyu*A(N?^@G<$gwTh` z6*%(IEPZiXP0=fQK^gv=qIBEE(#U_PFS(B)>1f!kg>m8U`@I&A_sX~XrBMdNQQUjK zm8!mZ>M$dneM`&|wkc=eXW&Hd1w8@6q33>nh6i?oX9B&eItbGky&yTT1Z!ktH|Sd( zu!sSx$#-GF4)PWl5b@Zuz-H)?x3~9y-tQ>$bqwFPN4r&0RI&%RQ8@%w?h*+)^NNbj zf5_M*J08GvH5k^gmr#p40zXyIB%Tr$M4&~Eg8zRU_lbx=s&ypjJf{K;IC9W5EhtL9?M z-MMVVf46BXLCZR84WlrSD)zx@3^!oq%9VPJNnycuuv7xM@Bu24P5^t**BGQW4e=Aa z*LmkZk-98<#`#8ir$wKct?U|iw8CCH(&cSBxX?~g!i|N!1w8hcRDwW;N2x;??);?= zt1zLtzK~@uE;59TnY&}-OPBt&3TkUdnh4cuW zkU7!tzaNA^45%m}4ge&I+MCNG3{y0OFoB^?6P*B$RF$K&$a%sT2Hma8nJZoNzje$H zNw3%iT{?Qdoh!UMUORcSv*hw#;t*TV+|IdC|1YPY2Q|!t@tE%scqx>#3~_{qd4(UJ zITQvTcwiVKvNWT`CIwHtw%pOl$y}}DD8h%IFURy<*}!8^`w;CDgM$dX+XB*PxHcb=EAg;7`3>Q$53 zTyh(p@3(t+)->3tau2m9`DS0*xnoBW;R_GfE5dU3`n6@YF^I%JMn}IPhRvuo%aiCo zTFIEY7<3{@20Txi?fk z`sD2)dMbQauIr{X;WU_&XRB${DO|X{LiCcNw#9H!Ro6Xx$;(uHOsziSFU0>5x)}(V zwpvd;TF{q@h$)ZH8C#EPI}D^2*FiAw36KaL6`SbkAEILh(W`qTJu&ecVKxRLBgiI% zI209zj%+VvJy5r|B)UV?uPN3cRtM1AFrPl zq{X%94Blirc3_f;mpAM7Z0E^)XMBC@p1Q}j25gKL!%%DYLwCwy_c{K4ElR694}ZpCZ+h!RSXp68m)EYbTyp*TYlL-tddGh;rdhs`qo-c}%!vG5 zp)G%V?Up3!f9+b9|ChsPP3cyTu3?w_g@KV%`GWXETsGUV`vkP2NY;p>-`hvx%yqw^q5z(H99-fm!vwnQBG zj}He$%(|pK~JRjM4B@NOam(TmJm~Hr0~9hH{tZ&tG^%9T5If7s)IQ;TL7U4^ppfgd+zUn;&k8+Kges$s!X z|Mku3()YYWsQamvQqs8_^2;6<4=9Zfq$}za4;jRt3;QAy8YQxN!~Q#hiq91l99HB@ zC4MaL>pJ7)xBpQiV{+HKm@1}4d2SQK4b(dxuQ%O%riJa-U3m^OO5~g27@~q);sA*4 zFoVOe01)+ROgevd(HXnH{Tn?dJJ>O3`A~@Tgzk2%Hl=Qx0Cl~#4cTm$UGB8;{=0l4 z{zqnJ zuOBBbJa-?d+BMW$Ws}%HedcY|Hio8l+u-{m&zdxhgY)Yp3d+ez{5GlEoE)Atn(c`z zd~U%~SN|f=efW-pan{}<@j<0zBeylqUg1|mt7Y12URt?kb&b3!Y`R*|e`C9;X$4#M zKi4!iN)9L_>tzTEoLVt76Y@!qR#r)PP1nW#>DSEBnq!CkBjVhR^uKF;9FQojV3;m< zh^w7Wx}hU_O?9OE{kO8P@>Vb7p2O_--?{w^hg0e+Ds5vTta}J^DG>Llkl>Am0|Cx% zXMzdmFl4?FR!sBTlm30S!)WtVAIKH+p0NCINGwgSwCwIuXwUXzc9UmBm;*`*8pP}P z*BJ%Neoo8d(7G1!LhY^A$X1ghy9Y}mY{%~C+VZX#@iOuqs^|%Exn5G6FnnY4MJoht z-3@Vn*{vnaKyOf$_DcJa6rQcH_Zf?q3(_tryt;h(#nav_0j^l@4OMntt4}?lVw&nD9c$~-BQXH~Yx8;adJ71g2 z_#t-gb|(F{d?x4XjZ5O2%N$ESv?&!tXasimd>i7~&V0}4?&GaF1#R*c>(gSnR~S`h zv_B~g&Lr`uv3%++{@S@70rj>1KZ&nG_rY=ikF>{~uZ=)t2oWf@zb_D;%`^M{d%axh znZH(k+95e=d3#lUL*t7V9KJ~-`BTj%w&P#Uj9o01uNomK+*74fe!6qz^VeZ}_sX2L z=R8bxG%OOgQj$_U)Ia`?S^?6xI=Z(X(mI(KPz>pr>FU^=tVV8<47AwH$0#cadg5f;T|ny=6*FqFjMFS}7x zP*9KyLNS;e0L3M`H^adwiu9iX?|w$ml$zwX?o*B-hIy4wjRZE^HXHYDYrmB5WiZ)Y zxge*XUw`%P%GVSR!AW6`lk{p54v%OLZ=kMBlZ#I;JDL?{pUQ%NmDsOm`9}O%C#T|< z-Iuo?4|%rX@Kn#2uooQEVKz?}GqqE^Us0bwl)G1h<|fBA=Tw)KJ8kzjI-cxmDEo3R zN0WK~fM4&bh#rI4!!9c)=5BeLUX<(Fai&H1CzisZ2^9&FBwESG{ypuY>JBCxjkKnA z-JVZ{e^{)Kv8~elVU-@L{E#Ep>w3sPa$X#hmWlb1zN>D^#r5is-sD!bo*(?UMRj+F zgr5!X#`USa4`j}_e5wfFsh5~8-*F?Fd*5Aqe-3HG$?rQnt73Gv>6xX;Xlg~ik(H@` zY(;(8UU-+s^{T@`^m)n_R{V`e?q^&`bNY!*{XT;V|Kg14I>j6Ca6eDH(R^oZ&Pw9z zn67-=9nPwB5-uyR%_mJr*h^is;Y$U2BDL?G-qqsLK3}m2_PI-&t1NE5Rrw1{=tJBd z`)?RLl)ARHEpzDXqWq5afy3VmIeRZ2x)h~aQG1g|<^vaV>6YF!`SoJymPf9|Ur;f; zyw6LXQ8z+exyUNVe8ajE8&X}3Q(ZP$dHp;=|GjX9e-Q`5E<5{eZL}<)fc5S2|r>zsQdwIKE|=~fng@&w)~OgkNjV27j87at2rOTLUzF&^qkUJ zZPi@zcV6K4ip2X@uii3=E3fPS=Kjg3N3{PkneFyhg@YL$;XM?GPmu}s0W58^d;LDL z3ekTqC-&-W%_r-|@_g)4(_}x#<nYN;2(V4c$geZ$2lLFmwt%1wrsXxeiwFQH~r0@9_45W$k(VD*vU!sDUq3lJUuKN+r596Vf=`4sXR%m;$?# zMRD&yg%1H$7yU-z(Aa1;R2aneQ@P6@!CWM>vyHetLOAW)=Rx!5a8*$d2ykcdOw zCIk0qIMkWQMwoRK!HHC!;TH-Hf{z(&2GCmq^5;oOHflUU2vJ&ETA-KEBrG!m zh5caggy-fgeC!|yfL-ce$E-%KBYAPHhcV?OA|%k7qqMz&MFy<4>A)8-RsK8b)hA&e zc6YH4eC>8J-+1HytD*p!4RQ} zWRFiunmlrEK0R~~&jSxuGX(|3W8Kg8-*K(&2F&@x;fRxCpu)M*FL&hAZ3B9`vPTPb zWk7RES(s?Dp|wx>D^}@=izl`4lHkkAa8&_h3G*w$$#yz^U*bl`Yoy!+?txoHI|M(!K zpt7mxvv_+Z=jXgDTz}J2&e$285XgP1b+LYH=gxop9G`_Lsvi>#dRp4!B9IE@Sz&V)4KW`!ed@n zb#HH$KI9R4BaF+TUwOf7^6&NA?$;ZB3U}lztScz_T$wb?WNp~BzGavHr$e8#>Sd}< z%tz`<`UX8|*h0V5cIm{}tpi66>ipAe665fVCWa&Sl5fy}U9Q!dWRjAY>Xj9ow7Wj$ zB1S$(0ojBdQBo9p{KQT!=%jqxUsouP9}K3(f z#8pPNc(jw{7MH+OKlIn?jc?w)TZ=~<3TMcEfLMn$nW1}!Y32tIt5!JSh4`ipSgM?` z&F;mt?Ms@ma-idqb@Is8-#gus#4m3De>Br#-Hzx2GxuWdww+C5X#nCeELGm^4nE1l19;_uqesKx zW87W(R&91mtkQ>>pmxpoZJ9zMsqw+(*^R6OCVp@3{>sk=noqq%mqWOsYzHOU&X&F9Pq@P*QZab^#-wQfRL@O`Tk9ps zB*xu~33tJ0&B)PYGNUw0lrL0mEU6A(LGzDN?yEY^oC^=tguxHor$2f5?5$02Ov`9P z?RHeSNToktu;$i%b#%N@YO|QVo~3~NaMSy-lK2;-y{_iF9iKgrQCxZ94@Gd_Z1REi z5|2Ro}xGV-s{Vs zGv&oNoC^#uy#4R}m+e2xC!9|di|C%|HuqPY<2V>39#S*dNWazi)iv$z0 zu;R-jvMwhZ@qx#7Mmah}`8vvUMVCU#|24;&3-0PU(9++3@L=oa%~#@7@eJUYj(p=X zz%1(E4~!ZSr*87m-3Y7T@jy~!NNgAgCK+la zKdIVY|AFh^QRjot8K`MAgyIW&Fw`?Nx*XvV5nI7egiklz7{uZmYDkWb!1zr;n#;0; z;^Qf_{wvhAw9W!{x-_61b~O=74;W~#a)Q?&CUM%J0q4wgJ`uAd5)41cN5eCbEww#X zneRhX7wN?QyLM->FtAN1C@RV+sQN0OTZoh4W^C~VbNkVmNUHqUJU(Me-LreOyN;d@ z3jO9J&&@J-$HjOi?Vb47rwt$vvlUa{1isv0VDbHu|3DT4p-#{?ij6rLbj$O@&Z^H* zr?u|c9S*E2wOG5Z`@81`|H}_qNg12i1#LCAc9qrHX33ZeEcs+?_%U((5L2_Ulg7v1 z85U=SRKpGC;*^>g{zF{hhK{_2=eS}%h#rvl++ux)bG|2b^&VkPzrHS#*^gGv3zbKj zm$DqAFPBqBdUm7^P7U9<{_HgOhK;N9i%NZuN=h~}s)k)jp0Cx|!J)XuDcX6fRyK_~ zc-}euH$ASjM)fQc3aM=Ul0w1mCn$xPzJjH%2`Uu#(!eh&)FC1g07o++5heV7ULU)Q zf4emaXAc?@YsI1?|B)jbpPFO0c&4cjdlP6^n1-(bNC!5^sj#brsVWSIAc2tcQdUrCA@6++I<*@F5HFEZBrCfO zx860`9dzbps@_{JUBTs({gF;?99w_Utgs-lbbga8iz4d z=Vp|2*NlD_`*=c)v9kVA*(1dSm)F$FH|eIjeflUJ_4zsb0lYNQmS?^w04od8PA9u92ck< zT}2wKpktl+zoi=5erJjjm`@XcHNRLpsOY&x#y%f#*zw6u&09gLLWDygKo{PmM4&Rp z<;B%WbE4h>{-uH0(1$=`{s>~R4LgqH!;E?;@%3CV>u;3#t-!BhelGxA4Bf$Gq}sM^ zn>2)(xI5Ud>bxk-Tl4?d(G%30(8Dz1qzXH%v$(JTpO<$@bYWK&HSQrZVgtZT=;5Fw zT~Ye-eLCBH(_{OCe_VqmpbO48F$wT@eOp~uK+xq+$;Z)EIg1bM zH8R$Bne*D`Xri$q&2dWmF?i-@26^^%er8GA)pS-NeaZENBON)Z*Rir%BK`W5`83kb;xiNtjF=QOonkXw)1Q z`E!V`0vUox?@e8tpPeOqgJj-Q5fUmM39HS0A?s z`vjr9p2Lm{GlM!W{<)Fc_({imk7o4xE1~-zzUA@saJ$08Q~jki)Ki|&3lzb7c!uJUA1 zRjc|T#{HKGT3$2Tk8sPR4YseYkq zcv^Rf*eQ^Uzjkn*idW4|NJ{F2phQ4FTW!|OBc7J3W1yYo6#Lp6U8>AE0y%7fp><}V z!$LX}na08%3pPuP4;l)S$*rcA)wZrQVzKhclabx~Oc!1Rc@;nBr#xX3q`Vl{J+RpB z_IV_bInmhI-6!()v3CI>T;AiiQdXu?ttU*7#+IxWWP7YNX&3b-G7XKE%-F-WcgxDl zCH5C9+?!veSDh2kJSJmsg6f)M1?$KR=VY8?saA;M&S@IKXA6bj_l9s6s|@X0H)ebP zyjo2CP>{04{Eqpn4oswky#+=K#AreIa2FS|Gk0FENxIwMy^qcf_ zKGDUAtyECvAX7c=LeqvKds0I;E-GTyR@T|(H8(XeE+#!a6Wjr(I?c&}I)wPVfK!@P zyL~aNSq59Z_nviyyJ(7y_n+E%gl#!u;YSwkbb`G-;qJ)3xpw_g?+5tyLT3xMe zu{((_eAK?s=B4~F?_7(ss{NK!iHhmXgZf=jf=`ZjkoO90*<9`!I&L=~A-Aux`Cew~ z`Bwpyaf-vU!4&o%6kB>#uAC168dm&K zKEA3vz_q>X)!KRWz{`qS7AAuiihZOyPik?4uiwCSsV+2^o7Zhvl>M_t&#U`KtRJ0X zoo>&S(d+n!)?e^K`{g`~n{{orh953IIr++RMS8DT$9%)1J4@Ew>qSb>f1boo&`d$t7GJ)h!q`#A5i$xc|{m|-o~javpa{i z_i~1YRYxjM+tg`kG-e(2tJ|X;wx8~t;6OA|ELvKwBWG=*@^)NI%y&dOz$~pI&u7xC zzk0usu`!eElmi+r?TNlGpxkR}1T(91aO6}}M3Xd4j3325&~o;=8(UFf=<@DRbb=*c zPKuefJ~g#>oK{W!iVe--xunM??VY9Z4Q6SUdY`Xm&3<%&iS8j(jF9 z9Uw4)h1DP0nrH8aBfbW`rEOZpDV26|>cK;YIsx;*J$_I{Lg4V>4d81K_C~OxGP7!M zC?PzP;6RJRrLCI+nusK%B><^vve%!*A&oTz$mw&c;fE>A9B^#tJxr>8Y2$s;k@q~A z%)760nP}?qoi%CAGSSpQ4HT8c^afJ#5n1wPl!m>l8_2`MtsP0zgt(hCZ{}}AFbX99 zI5HfQkdmCG+gr~!K5e4o1Wam-Hqohi z-ml8d^q2U!c*4w;tB9Z5l2)L-dzGcTloq`?rIu%DI>{cY7$WV0hIVNszkonz-#ZF5 z9cV9@YS(vUxnF6Rwl&7RPfX=m=sbhAiSSs2Yl{|)nifW%Io3*mKVs2e{ZV}ugxgo1 zo*&_#G8qn6>=US_*Zs<##8ud`WqI^|eqz(HqfuTW zCSzwu2Nm)gPl6=^XRcl7CaB;7Sq$`&{=|)c0v07MUk(Se4OynL=9dt44?qUL)XdVd z&YyBwBJ67P5{%CfuR;*-;BNqO3!J^&S%NU!1#$I(yE_eGE$oUR>WW z?86dw27^ys^Xj6Aw((+g)nr{SfG1ekV#Ik3>H^w)uu}OQmxNHu0`~L);7ROI730dP zs+D+$>(;K#|M>BIbRNEq$Z}CvSI;f(IGtuUdkdLi;s9^W`Og0-s@^JIpi&^#$~REi zAGx^|78Kltt;u=w1&j_l@Pe;RO$JZVwfsXh!!ipiN=1-7U<-5(-7o?QMPNZkNWicV zNeg2esDK2YhhNR`Firm9_?)-vHU+}x0W`hOiCW_5g19YvySqngWu!KP5#-`RT~bm4 zVb=>79!j3F!Y~)EWYzf2IMrO()2v2W1aqk?;NcS9X#n-1QRSl|P-!r9huTS`$~f-o*j)}=$DF%QCz#oUQ} zSq%-1OTOHX@OZ%xv>Z(&ZXu6>-M}r_oa6i9K3@deTGXU3FmeI^cwi_fv7zBPHtax~ zfrCU08(y;l(y?CnhjgM8#T6R^)a`?$oJ}k&wE4sg1}0YER=t*2K}v=O&;_M->gh6Y zQ|}DMrt{zW{z*8}5$@|mc7{K{*_o}c|E#h5Gnkzc+trtQk#z3v_GO zs^c}&B#mil_?-g52LE2@Siw+fsRZ^|aTL-3JZg4D``le@vnA_~f%Ews7cmLlbASz^ zC|>iM{SAs4+y4FMxtLN2w|pqNKy!oZIvrG4j!EEVNzUN|@k}0tX)%nPp;T(>TXJXM zawRQvb3!vm+*T?;k^Q(aa31irBechD4l8m%wZ%^|o9MfOS;&eEheZMPmKlQdd_*C^ z$#J1;HyALhX&4Kk`-F35CYzK>my91ltAvCUT(z~bqRK!!>q@oWIToq5XWh`&e&OL! z2$O-gmMxW)Pbt%srQPwlQ;blh0v~;d0?r#&|J6vch|Sq;_hSW0Xn|sPq=TN*)(+FT zJ=qE?NjwtRW;O92Jmux&&O?J8JBc(iO=IKO%rzSWSaIHjU5&JSdfIvQ3i#Xg-JvYc_1$cmtz_nVFdmB&4Ep z$Mj%Tl&<$2LRvxCH5hx~>N*AlxWzkd`;HxeSMObotlb4C*AHMHsZPQXi{B_?ssBXQjKMy1Kd{ zr8>A=Ab260lIqy;<3#$r(lDe=UFH!eO~)~NF|6=~`##<$ytgSQU7r|D~neFi-5>LSXf?00N*89702ohG{{H{n9P{&GIUGpzb zO=+diGHW758cUwA00omCpg|;`6sB3&U-8i4;?j%Q&Dfi0*F>Vyf{iI*dWg-NSjY&% zBa*0id3g~K>4SAZAcC0Qf-e6JuC!uxxlr;Gq~~0H;0su8wMwdyvSSDx-wgX8T-U0t zQbGlAgB{>GdlJ^n0K=gHKoPNyU?(Gn)Mgx)$O9u>7vz^S#Bd4-FtW3=gP9CNT&^r; zR~!&|bSjE^LI^}sL=$TxJy*?k{F3JpWUixLu#FVBIz57PlSa;pvA zK@Scu(r<(bGKvc-j4fOs0TD~~7{e_gGVHNUtnPT`xWJ0|$Q}S6tJJ4QS_vm}9GXZ$ z703PqfPdv1h^Mhs3ES;^)EfxL>dVatz@g{Axd%N@7|L8C`5Pzb2@qK%193hAhz0Z_ zh`DtqP&atSDn$q4gl=G@Y6CA1=G)W5-E_Y(o-+@7_AFN%Z(WcLrvbLNKv_clFRM-5 zLePg1fV>srWE$G{!fT-|6CoW0#(%+BS#<&VhAw>`SE)8Z5j}=C0Sq{ zij4)+JRZaPW7?(LVg-nZL86_+hr|p+2RpD>Kr@jD15k5Cs!|0fX@RKatns!|$IzA< zP;=sbq%5T*ChC9AH1yie%g5KSiwl1#nqWQ^!P1Y&iIkiLQ<@9fQ6lOoAtB-O5c#_I*w`)X@H}Of&CShQSy(O` z8O6v`(W_U7Uhws)0LNFr5=W1I(>pTt6KuhZ!pYbF6c?Qu`+wdq>MUdl`}p`kTnt~* z)9{Y&?(W9<(J?&ij!CinCNSh50L(GN8hIcy%a!sf-vkq0T!6fS>#Hdtb#sD=v%cL0B=cgGIf_ibOQ*0)rk97`V0N znlkU&1yG(Q@X__H{CSpxUOnXWgm|6S-&_FG_WE@P(1E(*m_ef{hE)N>OSsolqpHWb zpe0N3t;Tea$X$Vg;mZX-VOw6J2!z$I9rOb^OLKz)*0lzyZ8i~?eBOf*4I@|`)H-Nf ztD#zm2)N_askj_%w07K-N`RcS$JXn^%=zjTxL^O=3^IyL17Y# zM3`AuU|=)b+`bfkfegVtBoTFyBptSi0Nc~oa+NSg@rTCx>h4MN+8)u5TOD%vmZWw z+zC-foiFn+jwqxWU|Zzl@5)WvpKkd2_3J*G&jNyiSZG9MH|Q^?ETv~Eoo>=_B{xy# zG3<1H@IXA*>)O8soSweYlBo?2sfX>sxX6$_1kVzuBIX&`8xvAeLStgwVB0HstOs`- z?LGeX)-jpQO-3dTW~nn1HByjbY`~19c9qN^=1G+(I42$4)-6TJ9>T0l45MLbVVOzH zPOs2Ib3SpjQL2e7^>(hH< z)8C0Q99k58*?9w;LqD(&-h`b#q*9jEhF<@~&7z>wuO_H8c$7cDQ1_edXLc)FT82V8nn~A%j`s)%;7z)WmJWv%?IHZa38NuKL zDUcA89)p?q($X}^&~vaZdZ)a+{4P<4>!!Jo*AhiDu|p%m*YIg}Mg}H2h468E7ZtUV zC;)K|>5rjVNCcO?!l48v`2w~^>4ptYfiRda&RNEG;hGS`KjcnIwnA!87{yI8Gt%xB zxjuNL{Wt8_@UMga=&={eb@!;CENs<5RH?+T1J!EXU_-nvjvidM+)TgQQeel7L!LQ~ zMTtsM5;o{q+8Wp$vAMT-^BT41G2>1X2!@$iaA@d#3^Ci^C4tdSBy1~jrSG;KI=*c^ zHU6(nnb7=DGB$f6lQ*O?(P7bs*x9dNzm7UeL0Op#0SE%N(#Zs{8+WXUI+mjWLx^j9PNvL z`KeU{)&l+nIoW5$EtzS!OUU(-H@rQCGY1{ACTKFgTIAem7Iec((+?se4+0JKJw<87 z=1T}Has9)vs+IIn=yX8!;cNdMYYXzH&!G?Q-#_NKWCyttU^rrj0_qtnfan@2J-q|K zY;f!K-4_Wl!n0=)_&G=&=o%a>!#+76%!@8fsbDZ0(uRhUPl!JCXF1-f5y5Z2kdT$_ zNa`^mc!8tY&tKZzr&USu4$FP+B@tmVxgkeMA89O;-hD2I>N5>m0#l9vFPcQBZ zf(}yaH*ClR!{R3+mB-550@WedXZLY#;DoKHc7>1-;P9j5Wb@qOf5NU3X?yPO?m7p* zg?o8>7sHFD?`vY$S%!ZA*!}|M7*i>ut>D9BYxs!S5tOI_6R4h#s^&br8i~$2xNTA` z%VM3fa`S4>)<4LYBKhSMb}gvVvD(jIyB|~`{rs35R}}SIqyKIlUTEBwOKN`3zpP2Q z`tsQ)mgVSSvge~m7o6`Md5gixP(pID;fN0X4Nc9rT;t;o$!TdtM1iO+dFj%KwL1AU zO&b%Es~^By=Sp&rnSEn(^I$-$%7^lDH-H9w8TJ^;(Hyn?(Qz7q?fN%G$7^e4(NeG$ zsQtd^XHxF`LXtM6DuHuYfO0Ksw4AG|rMR9uw`Zh%QG@~kW>e^-@BzG1#&GVHlt>Bw zSO+Pw{y}!W*zKb_vixrba?Pswt~$yhmwdAwE9@Qq{GPR6lKxo+C7>i6&}bP<6Zv1i z;giQo;`<*y_~c_sd&|OJ)0~e8X8+x-jXJ&TUk#k9kbDZXNFo<-p!UMx9h}G@Pq*Y) zZv~?U_87-_c#Om4dE-$Dt*lEvK22_Kk1Z-fO{DI? z)9Le(D=NyA0dsqr$lP}`tEkw2;=~qgcB#_h#*C^m=Lf{#q882VffrmQ!BmK&tfPmQkolZePL2OjGGv$C9 z&;jC6dMIo=ML2GQmttHyLqu>uGYEk)Qn_#7b73qvGLh$`-;l;JExJa}; z=x-4W98xg+?EqvUWL~Xd+=`P>L8iCaJb@)f%d0sxQ z@MvR`z{&xZ($|)oEpPKbL$Q>|i5bn1@XeH|Eu8_<2>t7-J;#r~Ky%bJG7?uZ4P)}b z;hp)Y10`{H3ETstLnj>le-*h`8OG-pp}!?^n4sw;2-k#Z8D`psW3@aYsn#%bUAun0 zCYZ6OCaucQvZapE4*)bwKk6E%rZU;E>YIDNqe_*U3c{n1ja6Hom*|$bK zQmT>fg=F;T)Fq&dD1>>Y9|oJ_!3mZLuO`4kTAeuKS@;nC4A_?c}?45EMs5-_3|Z=yWI2I$A@8t6JcALIm;Ob{FyEZ*j|c)wRxF4KUg^JpvJz~A$m!yJJ&hxX*sW7XEE#b+VPuXf zZPMTYV3fVSPKH0Bv6^*MWXRsJt6dy_yxsowL0`4IV%m)v+}Zac!{eq-7EMgi%gvlOk|t z>`36oh9iv%6;-VF(NmOh>j>D@5gd3XF1O#qwEK6QN|s}F&u!R_a~S4|7G4b07{E~x z!Kd12fU&L#?`fpHOUEldp?ir|Xq-qxHS2`EwFEAl4&Ju!1kek@?g7+%^jGjE$5UFp zua`x@;sC%`o0<{Y{MvAxMtLxE0#cqnoBuLuCQ2LsC}L(H-lk0XYl_bIG0|JB!Vk14 zOLyOvSyS;_+|v0$j#8MN*hed|#tY{4+l$;U?6hhD8pczPG=Y=K$O zA%22Nh8!Gupy8mJECDnA!^`~_`|4wv(D?Z7e>Iq-RZI)x5yA@%i`dxn@zUrEq z_Z~f3hx&(L0adh&x%$Ct7EJLlJX6@EzQM~17P8#^KnT8L*rbjs12ZTfP~q$D^LZ)7 zhT};s88A4op}GFRrER{sj&h!p?Sl=^eT840J$rWa#}9}l`c&RGO3%l}#N1h&Y_P_k z64WEG*ud%Rj9EbGyYb?}LfE!^1cJMYAf5kzU~8ETD*$_!U~*)3^CppOL?r*7;(9V} z;`(%O>!wZc*Y5=I4vr;}B@!?OZtj=t=~z(vfTRM@3#qb-@Xa5^8sT;%5vawfOdz3! zm^>sUohU6W1u{u!KESF?)Tv+$nQM^Us3NMxP~&)Z+SIt6lW^e;mi~eL!_YHxJnVT) z%#mEKd3Tg<%P3jnAC$~$q$E~KrJb{Ab=bfAuKKTicO`~LD0PYf*-&LzkF%j$&_r(r z^${})XAF)fE2(T1Kn#2YXqZ5})7u@Qd9ulvBM^E3>_n2Ax^W2v3N~S>0n{sKG;vWo2di0$jc-tba8i!o&9II+ybo@TYf4I(^-Q=>TCZ zpK9QFr8mcZo)?cA;k*M!87iBJzS9;KDe_cbi;7mGD3p&;D8M7Dm}|r1IN#jP-4ANZ z=WJ86iX*eWi`Ro~L>26lducEm-;YxPR3$)dDV54QQBUJ16##}TL_y?s^jZky3c$IZ zqus^oq(ZK_fbB#*P|bNl2w_M^|Cq71L|zW~Al z1{<_&gH9c|Pt3HI(T|;PKo-M;)L&kBlq6S#6Kz>hAv&Vn7|Y-Q z`@>T_0ZaD=0TX;OoJlZ@sIAS_F57_xu1L-sv=t$9-}tl%+6{R!fpPw9!AI49V`tm( zkyIFelTm?!bR#(^Z(^N#Aiq;S}&Q?5;$n6eghoV`#zbU?KydffgW= zoATsbO#=f7J-v5qn-oJP8vN||4!JAV zVqmCTwqMF3)iRQl$cXUX;(~$|Xlzg~5%n=V^!cLjz|#lb>0AdWho=}`P5;-39C_^P z{}GUDo*5i3VGkm}RZRYV0A0fvzuf<1jXG{!6@aiG6BC565=b3pO~J|E&5RdqKlcri zf_vj#gULIc{%?tgujHE{y*Uqn7?^!=b2F2gni?bya%2Z48xccb>;DB~qZ9eq-}Rub zb#b1@N|1nB+Fih%vgmu8dt}fhMOMIaP&MA>$dSSHX>DVmyP)2CRyqV*B4S;?`gRN5 z?CdP)a)w^z_M5^$Ny0`C^xeHFX!3NDBgKJ8QM`34scjT2`tO(x00jK@{d?_3j+g)( z4nV+xcXoAlRv$6WupHch$q|a>23t3PdDxFo)aZGwH!(o+DNKZ&L8D0fbV@@jhy=(1 zEEa;%gAp}2YDq8A$-b36#50c|Ayn*`jTuWfW(8J3bcH$*C~7d4&Sae0@?$IHUn()E zy1mb&w<^4EvDLMZ0p>CVz!g{3?4*pnSFSV}_RZ#429@4V8eTJ#Hdtf1-sJnYzVFxL zZOY4iO&k@_v#q8Uh&puGFNJxUfIcL`#_qOJs!>-02Sgu&yKOMiQ(JaT&Hf{_G!{DI z^_Tz@BBNE+cCbxqy$g}-hmOxLmWB{#H8y6R)(VwA004N>!6!t-ATW5%B=zGWgQ0O< zef>vQBf05!@f|;2fTIz$)zp|vLz3Gf}7xTeH?X$d(?_GW1n7pN`M0 z`sqkgNcJH@Vjp_3@1I@EVcN?3v!?NtHnv@ne%~jdqLtJm@(aPl)jD^Pbr~VTmj++9 zOlRfF{KwBC0J|^MpAJv{+ByD|M{B>F`7}7Se6jyU@YXp!e!LzuB8xFaJRZZt_9Q>{ zk>sS8>+2U64>wuf*656us`U}Iu2P&b5qvv4+kY)EKR3E8*QoQX?xJBk)iJHvnu?Z} zMaq8=K1LNCl0{3*M+ANa~ltW z9(VR4lMdpOaUv2)7d1KH7W2?{zL8EF=k?T&?JEzCZ(TL*rRtnP`ZJ{&p^u5=jxs7f8kbl#gV^{RTuF1(?QB>7F zmVZU@%K2}Tey@Ff)XADMgPA7Zw`pi=JqoitJk0yNlZ$r-UahKVO+B!i<&o>fFt7MUO)gd1}&?{ z!NWp9jU}mM{O)8Ms4;Bq*Z7YfeF~aeg5B(x(s%&B8M>k!$8azPaBI!!>e;RM1?WsJ zU{;Q;P0QHW_}H`HQ6|XorCS@WgoxGIu>5)Ug}(l34b(xZcwbky9X^e*R6kk?{R;u7 zHm8$JjSUSBit7F#XSQFxv&cpzpXRlo)f{9tdV3$9ei7^bi?xS%sG`-HT+ggK+~?ak1prims~DJ#E+Ti?Q>?E=#jcu z#84+H=lR%lN$u}-Omxz6k?(8E?~iiQ=uQMhd-qoez4|cMkk>OXD})DS?&*(iyh49v zqNve@&a&y-!BJO_+dUo`sw|5<8RIz26c?(t=FLS>&Oo&(?$X()?nPoorn+tFecn*&>;WG*H z3t-is2J`$j69+KJ?1Ci0ZMP^3s%dPp?!b`16VXECm}IpGl6OIX`{(?5>YkpgyA){S z3Fr>Tnh)rDI1@Snob&`y79zeZa&hNjnnBPQaNJ>>#dA>#o)51vt7LJ)2}exqp99TC zA4LzdOMsUR>%TYfI`9-d+g*iDmS(jRVJPXe;;$6dJ%nNG^7N$8S=?*c3`OZ38g6V@ z#Sb^L4wU=BSq$1|=>xtFMFWdT#{DKyhvdo|f3U00gYL~P`u2i3c|f8s=ZeMd>$z)g zbrg!+uVnhD^~%9-)W|@mlq>$(_qwX3!f~?bNB3wKyLpXHW?ku!sH^6kq+Y!)_49o> z>MJRqcX~>=%sJ}synLhUONHl)Dr#vD$0wX#g$5GQQMIyi@>OeiyCEGl3=-0C*BPUl?Zrsr5 zlK;s5K%SZ3?sS&8O4Gv0$icY=-X$x6T#8Uo-YCcLVv9TN5~KN2(+KHh=Lz22NoJj% zaG`89u^ksw4*fV9h`x0LY#IUWfq*VLlBHE?|pS6%`wA z44Ye6z#P~akFSiW4dOSz6~O?8nCc_YIc(h$V-2aPDE%j4ix@Q%d?zT_@NK?O9l;Lc zuQjy)4`=TI6;-xvfugqB<}8Wtl}t&NAc$pXBLYeg34(!K6o}+( zA}7g`Q3NDQmi*@?bieoCeQ*5nYus*IE(LYY*?X_G=9+UZ7EDh>24#t+AeJ|!{C9mA zge)M|6*nCrT^t~~)xU53Bb7NBGaoblL9o`Pft9=yd{td^#7DyPM$SuGuo~PDlZpCP zzeU_3QU9U({xuG_gIB&h(7P{WBp}<&ka0|;=Z&JxXR8G)j?_m#L=9Lu>F>+yJi1~v zURc?UCH9rpu-0v5&(PG^@n@`+=_ijs>-yZ!#+1 zZ9lli@2OwrP<{qCZ!GhkDc-fv$mFIOZJ+32ac6YdJ$YuI=O#rFF6&(~=XRV4Y4A_o zmaHW&)_rE*X!YVj=K4j#A^f49m9d>&1>V}+2U0X$?kaX4D@e}U#xo`Tp)`3@aVr1q zx`J(8(_>o%ESJZXQc}dMHK%K`G9=pRmP#3edu@vNCmbE^UNqZ;WYtkahJ)lSV;;Z! zwcrowtv9FNMiw}J*Lm^o9@Yk5{=!b*Z^_pN^g)1p6cE77fZ_-01=G;~fatw;HzNeB zphxJkFFTN9j*-b5pwNce{9}{*dUUo; zYj_L%V3MUl1dDSHXU=J$V(5>d&!A&GR@)GYIQANqIP72rbey(vb}_#hy>g5*JWIV(n_4&PPx#2^1eGbbk3C4Z&~4f^X**EW(W!0~Aeh*GsMe%w;FS_> zD4vU*{cLYRQ!tsA4O7^q=q=uaQHh?O9)Wp&bk-%s6h+t7bsQaiaZ7T4NN`G<7${Dd z4)_^ha}jlFfEkZJ4?Jz~H4^tdSMy39t9sstVT`BfO3kFT+uG3q>4D36Eyrft{U@q2 zB;=P&{!@8d^v*vLXeGc1AWV4%^t;_bW{8I30qA_>G6sxsCqX64Wk;=Fp~-qN*Z8*j zM6V<_|9ly3Y?7EoEH`dEc{`fXtrzB9Vx#=Hx$DD?%x4iXf*V`wKNp2L+xSqd zWx1!RZ3?vA)5g1%_@5!w3voVSz3oPt(vAb^eHl0V%&e8Ys1vTP++&Noz1{g&XB6&= z@-9(RdcEU0PkidDJ^=}x>@_R31DYM=I#Z|OoT<*0CI{^(r~h!urne3V*#6QlKUkpV z-BIWelGQ-vYo@knSnuK$?)EvqQ^$V|&{zcWSGED_;2Faal+t}aDM=81xlLUcP7ur& zry%AKmHfda^Fk;{V2hD{@OYyaOg$EuW8h%nR3gwKG9gGJ;T1p7Ze((cK)^nZ@kqsD z;|Iae2ILY_)Soq2{Jf8ki>pLw z!VLf9eY32NlTPIjdVm_n8-Ptq*oiodLDcfOj$jWS3kZp>`d3WKz9O8Rwoxn=8~SBI z6SGrZ`Kck!{@xE)^)&;HoU5&HATrQR!n1#5c^>b7xBP+Xs5TpWct`y`dYWMXvCJVuq#(Wf^k{#Yrkb^Fd)wVK zr&Spz_geAxtYnnY`u4?kfAud{!*l1T<87B~&sXpq)T!!f$XS$gx* zdr_`jRT=SpQJ0MMZ@=;7Xx3k|r|q~{BfXlUkiz6JZQX;POIx)xT{xH8dMdiLCF*Mb zmVxVo9Th8p@wqUIN6L#q2MmUe<=9p09Uwt$7qOFNcSW8BN*T+(eH|WA&c|N?^~O=P zRAK39oy(7FsZ3}*BIfC$u0y8RX*$7Zw2-v2jZ_6K38!CmSwgdX-o8=9cgiYw|j$@%@yN&0b(OEp-pm_gt40cFRp&$L#LubH|e&lwMYzSC;L(V*k6ZlkLT^(PnhM6mGk*T__EF z8>e=v2U(|e-uN!FXzOs-9t{!C!i+YFzQX&QkK0JlARes(5$goOIlD z*pZ9pWOihWoh7Woi;`Gc-w)I(dJZ7Rf=nG zJsz;Pw{L%`Pwv(3-om@?)gn{br!PD|aF}o0WH_3uc#eFA!n)IM52Tx;18t8=lMgM^>b0uDtfKT&%icq)+tGZR_!O;2GaqT29fF#_mkVaRpPI zcQm521UW^%@sB5yRgfsPPg-Ww46cDxujTi|akenZATa!7|YS)0C z!u#g@ojWjpdgk1b!Zk_h`fcyvAduJv0~=KgF!<1Pb^hAP4H$9#W7fiuDYu}`&BkD^bMXm>M;Bq52j=lfd(` zDidIs3dRNJ|9i--S))2LNIcP1cbVZU*KjGl!_u#0Umgx?Q>$F#{j6d7SwluL1pQ3s zc(}NXepwdJmZT4|-1ok`yY_X9{f@56>v)zt)hSCkMx; zOg#R<_=&f-B&ur+nk^Ob_w+o4ytTEp6K_=njx15Iw! zok2%SKDM;BuHU*<5)HS?K0t*7z0_%6z|H8`{%*Rn204RKu+qO~$GHE7%V)`wYKP<0 z2>S(GkP=BEhep2}$nQPO&Le{l5#oJbdCnK#GoRMy(aeK;u(=BsG@#Ib2P4T{Utj+o zYQ9zjUi3E_yK?kS6R(OD>q6 zo))=z`{mNL7ZlcRy1l3;h&8Oa)WhhIV5^*9aBF#JlAe43Yps6pyNTy?9V2P)V3&Z7 z70(>>lRTbp+x%c#Uij_2aPym=MkhZM%kMv`ddk&xyxYc9pg1nQ!K~qrKA|dW6Il!c z?Cpd!01{NPUEgaUEum@Ev565>S2xk%Hg5P@m7FMgf^zWbc=` z?`7aSx^N4_P2ZykFcp}Uk@616r1!afdiwg|V8I!q-V;4Fe^sFzEH@W0BtFC(rrhl8 zkn~UUKcswp$k^)nzu-?<;WG@2{`vdOJG&OF{O4~Bt8bG}H~%{FCwW43f1ts86?!4= z7Bri|V(}X8ZHxj7(EdTh@QzyZqfa+9mgQZFGRiLts5rY4M9wPkhx6$UZ7*zL3jdC> z{X!V{TkQZE%3;+W0X~QjIu}vB_Of188PJ|OVwjILH~;K!-M)0@>1aUE0f``FDF;&e zG5lSLT`59VUq6@-hrs*my(hDY}xI(x$m2e+f;ruhTF!EJ_d;7bM zn`zC74NOT_?2}9m1sl+A-S=g(Pt>xA==w2WIbi8UahFhhbm`m2yG-IYQpxBT)5*rE z;RH5kWp#hYVO42=5mg}RE`abh?c5nTJ<%raBWR^hFOv7IQkIi@jV5tYqQaE2QcQIO zI{$Di%D;HHTIp9=mSEq8m)>n@vPe>-XMo&VxfVwU6QZblQ|;2ug%d_1!jBwaHo7ktC)>kZx&?U1Bh zT?y)uT8zFk<`-)Pn}UbiKQiJ~VptwiET%rWg1!V7f)0csG#GAy=$9MA3vsN~*DNpg z1-IU#X9rc~S_F*;#t`y`im6lb0K5~%n?8CX7M@0W;F+r{2ehj^$;tp5_pBR=D?M)S z!N%#Ss>&PC>e+)Dm;YG4s%jdFFfEI1ZL|v4EnLT;XAoESo!tvfS!Ay`(1 zAGl3WebSk4{C*|XC&`y0Ciza~;218E7fz+tU0*N2oNNqQp-mXa88(FnAO!Lf-VLvV zV5}fI6uxfdfsz{>VO;{k;l1;IM_!@-ag-=eV2^}5rd_423RJQR4b_e-*Jl?`-S4jp znQ*7#ph+15J^oqOBps9$8umoZ+e>$*K!RoC;E=}LEH^JNWzmyr!lsTGrE0@mwq`|! zn^|IpDRrWX!&2UcSziC6sx`Ii(5ZqX<;wLm7rOY%0Og+YC$`Md+p?o5>Y}kzy%jZL zM<7QBDbpA&Ea#}U1OQh1F%{dg6Q(PXHPijxu`qusg{UA5CJC4$8d?`)EO6!PE^X~b zpT!!dAcQUj6Qmq=UYKJe2}ZIT^jMzRTe0!FuYoTZ0@}+J>x6n6Uu+RUXRHgNGsiiT zKiLrK)Y%xVa~}!Len}WI$RlLH0B*0R>DC>LjJBC$jW?WO5a!|I!v-B9r@F@6S{ln< zei0Go3j!`eSRmPnyLUsF`$GQGrB~$F(KayK|J;qvPkYAnWR%Nx!PlS+g&pcqtew|WlxT5dx3n%5ppj>(?OB2cC@BZ{ z2m1Y3pvTw%`G&mDj6iG9_C6P^<~(h@a>*TVdiQ#2QLKBvnTfyug2bA8Sr42+S^4-B zk-I3P{7KR&&mK}fr75C4cSanXIU^Q8DUn-NCcjO<)Pn_wW;>XgHDHB6F?kT2T0YY{ z6;N0*=!B(E4ljhU;icE;)G2eeF!`CFynwASwa z<%5a^?|REqb#8KBcK39*KUUAo?jU96jC1*Z)Ejxp%6jMrLJAdDN~3}oWgeE{_~9a1_KLVqMn z$#L~BPqgDtqEdAykr-%$5r!6bAToPLcXp1kQ*d&`>vGLrp6t$<7#>KLhjph?Qet9` zahx&ESuiwcy>C1Ic2ZyDti^LWK>XLUO6hS~w~gF`{Pm+9cjAOxG3gzMAPtmL?qyBT zssF@zn^#cafv)q4%b}ZZ6%+3nU>XWqTK+q4{Jw;p^!i*1uA7~g zH#V=is&y#tO0^tDfj=gCJ{pX9@+PUA`}RpdDdCYm-Xna?&hEfpe;o(etOC5VL5Rws znkuY}kBp+i2t;{8EJe)`G&vZitF-){TciVrKkuxgtm?H?cg$YE@LqcNA^m`w`pX&J zJ-Ir4%a^Z;Dm@Zu8fT?sMv~BhfeMTnN8s$Xy3~4mLd~gJEl;Z839UraIaRbncLM8V z)lLx=RO`s#((Lb3aHl#zu;gZPHiqindkk3&>%oKPNJ7V+W45_!93ELo@(4hR(1rUM zzf}|@A4^03P)?L(z=qS`^CoMTTwB7BDE2wCYXenqAw$!?N^Dm?vxd|>47%}Ferfh% zFLiIOF1HcnbYtk_bzE`j;NG$_x#gL9p7fMdP;Mtoh_t+Niz(miGp4XK(~v{6P-jyV z^HXGtHZdhPC^1xhc1|Yi7%z{@iw`p#*{Sqjv%E#S|F101YbHS~cZOybTfEv6`E>5Z z!(ST$kXwUb2A-s^kWJSCW#uZ$j(t;9@J`)Wu#gOKqP4^eQm0bM^Oh_HN84n2if-F* z?X<+3=(=RE+}Q*LRk6jZAe+#nAIX@WazN4?s+a$)qJzo@(z-o(rJm29hr)-z2!v*9 z?lm))z|a&*lGLV`#AX7_*R9SH>fX7!@Alq=B5A1PG&4ykp&-AFsq>L=(xV|A9@^9| zZ2|{3A&c*S!$?Hok}sHUC`QkN=S32l1ZixqyALP8nuDv96aqoG<#oU}a=3~(@Yk^X zaO88^1pnERym$7jxu%^+qW zjr3AaK)(jDTN6i@SLS7`8>d(}5!E9Rt2&bBdm0e{e3c*V?W}-p1|TEzpcK;#A~{b=A#*TZIZ09TZ;@4MG0qJyk_ z@#4$wh7727`-g@qaL&>p+LnHP_!#J3ahpFuaP_0Nm-9_YD0)uum04+5@R&)m0ppsD zTepTmBSVAlXB__&a(9x};FBVrOTqjD8E-H>oqMGBg>A!AS65dA{t$!*%GAWDTOgFl z$a*Q@GzwY%cmRqxc`tNtIh7+I5_gmaIRUs2Cf^S3Gdv(;w^0!u?7!V*Bv|_0`}gy! zs+6&|qbWRi%p9uk0xNq~%sx?BMN&zbUxcYhC7#|O>{$%K0Eq7g0~g_(ED{upn2Z&H z^3l`VTh+CsA<;B#ND2E|1F3Y3CtV{+4wc+9clq`xbvRkbsj2z6bS|yQn&$%S6b%RU zAI#COhZd;H=mD9u{@(Dgckzv&}#?Hzb=LC7Q6xw+-!jQW$%YG_G^ zn1_vT3g^A2o7-PlMvfqZN=Zsa3y1tR9>JV8X?qBc2Ek+OgqA}HA`g=x2Rpk&ylwE? zeP%X1N-O}pp#=6hmpjExYIw14&H_w;@WBIG{UOM=zGfOmtHfSym*db%RY9f#ot76K zf~GI5#X7+$53MPjE98yWsUIbK_1?;@f5G7pk&9JKObhke6R6oxCMsxX9F#wDg0hkxet-)bZ~tS z_ZBeoCmeXQ1`%uQ7WaM9ayB;cEMZtb2RoMMYel zNnpObib^O>17l#mZY-RfoX!Z!ga85LrK`qeD^FIuwEeZV!)9OK9)kqAB3WW~F}&n| zJF~r8Iy))qj>g>H8=08G@oc5oq+KN4s;>ZKjJXOPD>m_aC6{oApRex6qIV8(h&Y%l z9X)yz>rkIYh4zXUjK|E+pMQcd!tGm#Zd`cw16pgpcM8rjMOoP=NYfy=X7TiYs17D- z4=rA!dnza}@WA22Z`E`{LPGM56BX4&1?Z*V^M_8$!VqI0fwwz7PMwxnBRcqmerV606TtFH z04QFO%p$j5iB@6y80RQrU>^Q=@vuhOZ}!gq<+Y79>5eHFuu*7PdwM2mOMJ&>H4WAZ z9U-0Fr5W>ITm8=lF+V%JWu=sij52D`y`DG3C-`??_jX#5AvT{QXU*iuPpYpd9Y21X zqnbnklBikcbGWe}Pa??$EM-uVUx0lVvhE6WK*v^&-y8XIVvSL*+g?&^?7aT#n0nHO zi%&jW{DMHq;<=9#?M?My^h+|J^oau{j|ukc4T!5L`~>C!0TuXNSDvP(W(8DA9Gb}ra_w8I!Q3v#$)?$&0s|fz1cpjU%yVaj zy2?iehMSA4TAd_hk|T=KyJN1JcsmMsTp72yB^pSAa*!AqSUZzMPYIA*-i%@_KMrpQ@A z5p@ExT_s>5NbciE!T9sbF;qL@ah)_y&m<~Pk|?0ETTh)l847P9fXaUQ#r|~oN7dB8 zsz8&hIu(`YEbYWRM8{QhpQJt3@zqAhkua2Uc|}E)Xm{~J&cK#!4`TA&d-p1vjAs*% zn?79sG3}y?{hUd==xX09D(EGOwa}3r5EfS7>~yN3p~2F|=1^G{QYy&pP@&614N2R0 zCuvuPoJEszTX8@Y`Zg$4uO#fX)g$X3z5C$Wu?KzEZ;sPRO*MQp#zXV3A3 zRsAa&%pD&%RnOLHp6>2titZ{)?i?%h;Y*QN_aQs`G>XoOo<_{!BSj&>5HXf7NPK#< zSg|9+uJ1BFqao&JR=B(pQ*50=K6K#7k;ED+sKk|0%&uQgZ7q6nwx4l!oeVu9xZ3+w zAMs+f>t-OeW z`8g0=Y<6hZ^2)E-+vg!&scgGyVQ~SDe00~j@XU}XL{i(P_e3y?6FodI@FcploUb+x^0g`$IH=JE)DCV2X+IDC5lY$f0KUTNd@C$aS=`qC;bA4Xy5_M^JBjNV zC-`h6J;U~}7Ms7DFAtK3@lr`-s9YYG{e`SB?5GOc`AagFzr4Z_h$}mjfT>2~?)jS~ z?TR^Y|Gb)Z{df5KB6-Vp(hxw?peqlXUia`87D;5ELiU59ctGW-`2d>Zda6!`&i$C1 zYO_guoJlReo~~|=A#Y9Ixqa>&3npqPQDF$OB$tH%DiG_d1DsD9foQdN&o3+E2QxPf zR`18bt~-=qicA9);$4<7JPdMBkg^kxEfa0dK#tdr4O!SH$>V$R*OG%NF>z_yD@_(%B4tUap zLU0kcbI%`h7gfG^=A<})J9o{&p~`lCf837W?(vRBKD1RK7y(ZH?X3j@hs7w&HVjNWIO6rd2c7a*)l3>mNu9Sc+ zh)RspOp2pQ&>uF>Us3v$dJziu|H7MH==_Rkl_s_UVeTNTY2;i z;h?UTXnx7Fv$GWMGHKj{q>Wu3!@LymI~1TIO%1kCJmo0LtXziP2`LuPybQB!r05}? z9viMT>lB`CBAG5fG_(WvBXk=oCV5=jV~IEP0$pyv31{3uROLV+2xi+)q!u;r^qf$q z?gN!wl5|mWJz}Ipt+W_9H|2b)m!?l%FE7ftYw|exOZe$0b-sK_mwnw{U*4gapuJd? zkzKS0Z@p5bq=nU4WaYYf&Q(f<#(%` znI%)ccdB?BwmE?gAfm;y>}IZqfX_m7;@79KvVf^Tw9ClCCs1Nw>5tywfYt1y1&bFi ziB%Xe-ADfD_g+ymRdy-G^RuEgm#8`x%cjc4kv?AOiJnZ$&PV+#JZ<{Q#B!I{$ zPney=Qmz2N%-yojT@q$I%6=)4n!UyMk>Vg<@OEf~iZl*NP&F%Ucp`)Xlw;-L8Fn#L*bsp6K=7vY+=e=Hgo?YXU~WNv{oR~s?%*UQ|{B!`d}|;w1LVk-MXIk z@px@+VcJuvA1neR63bVZ?6-QT*tfr|E9;Twz?-RW+~;ejhwjq5K0PoPdN*!Y%a&NS z^n6F!FG000-#P6ojvS*%WbC#&pH6oy-{Um2#>*leuHV-Au>O&=PQ4z;56Yd>huR+2i?h75cS2E6g)QaHk@GTLv4*9_Dy#q8TXI;rGTWsRjpL)S%@V)M+W#UgSz}!(ZMz?HSpdV1kPG&9_hY zYdstPEN(SwYU(@iqDlg4Rput<^OlNRNg0T$=1gs*Y;xrV6q(y>;SU6vCFY45K>hvT* zeH!eVSCRf087oS-JW{TGMZsna9WilDQMjxeLNpBim5StdlQU~|fR4OgO2pa?#& zl?3~dh9L5FO#R^I+Pu{KZjQ}ogW9OTr1)U!DjC$~oDR=kn`NJA&afmw#=t@K+u((}nN45-!`#`TdC5_VIln`p4wKTZr8fv%AB8Qr0pIAXm(N(mrmss}fxfuLtI z(wo5iAobAz|h+heJ zk#h;DUu5eJRti?8)Y-E!qSX6qm@9FP>sAH7j>Zw~--2;& zB!YdpTHVZYdU@TClmd51qX(-M4)vs6MOTSF4(^FfZ93SWB*RA#FfI>$%JJc}G`J;e%zS?EQuK`lZkAu6!f(!nR z>6rSe`RKV-RK#AT9qQDh;!$;ByL+ESt~a3??2pTcaUV|4IF%IAov=%~W-I5}OVRd) z>B?#PjqKA8);fOetY&Kj+UD~U6SluVi#`N0D zsM*$)OH6>p5d;9ZJ)PzI&TBu`bvLA`W0sr(z|5W0i9R-0(*5L6d3zMlbQM8aYA3ze zY#nFe?F$ILU3q4Ix1rr~4FyVN>a_tv5x_DJD_CBAf2aK!_@B5*ZFmRWdnqY%g06+{ zL28YIvJ}1v0MgOc4aZsRm6ADvYrLX9IyJmq8wtw}lvWPIT_pf>2zoE%n(}V@w^1@- zmKT1N03^Z&eKa&H63iNX>NX3b9)>${30CH7xHs_`m7YYFr&r@SG9)7UFm3MSRX$kF~nr{0Zp+QG+hcgH89A)C1XW(Mj5hoCG- zox=|cvU%>)J+xmD=B)5B3c(qY+t`>P(O32CQBQ_KQm4DZx9WFQ;{$Eo-5iokK*Xy z1!A}L_wi$%1vl7S3HK~f@`#|~gihnd!}Ew53u~(5*%xB&kxPp1aYvOIlx}KB*YoB& zw(C5q; z-v##<*VC6P->~*_cOlNM2$S8jctCxN=X>yF58C=Y>VRs4Lo%M{E?tQRRu=AxV%U$~ z7A}}MS%bi|ycQVj*YyXz+LBXGMd2B6ypm@$h^f3fk*A>zEE?IBtm)+riEC3h^L5V- zPii4Wug?I7;Yu%RB17ByxKX}AFmrHv0k@6?WK>H*gGsMO$>YmVe}5^u|L_xzK-PeU z#JhscRYUC(RIz#Rci03de}odn(>(%OAHu}4>H7K#W<{E$NrhZFso7&>C}W}#a(nIK zdvL?Lnq-BeH0xQ@Y*A(1Hun1rP?AtZ{TNkpVgnfkEDn>7P0Y+uOLy8Gb;EpGrWUp|pMQ95$!af+>z4B{|ZJ11miIER`aQ&M!A>l)BrxFhx+* z$01zW%zZWe>#N&cXUUOBFUnAUkh;)uQ%sjIU+TFa+g5K>DSx-kSY5xRhD#L*isn&>cYVndK#-IR@WQmY-(7uD=L_{f<3%u4q z_$`Mt6fms93sDCp*dEx_4h9DWwZnJP>;C=w=!{%Nq@!JUQ(94jo*8L=Hc+P>CmW|H zwVivEuNe#7(2I=1E(5)?ilmirV@{b8=pV4HhN6(% ziOtmX{~LmEDC}xyJxMg+8bN|90VD9w&o$KFcLGcQe*Yp(=%-z&>fYeM&dWD$i7+LF znRMzeH!XYpu|gf^DZ54XCCPU$qF5ylnXclsFWl}p^X(vesh-sn|`}Qn@vO;fbYcR~^f8JWU^UDn56d{*hIvR;K^#!0{V!xXf zkuZsx*B-cER}IE6D53=pYH-~~ggM{Q#1}HcDx@%-`aIKw)yqyrTDFG0$9vd_Q1pTsLM8<@O*{PC7XqmgNy6zDEs@l*-`6!~Nq&80p+Gaa&fPe(u8kKq6j6amwTpIKL zp^t>uCG6^aU68c6f8{R#BYEh2RlzTg6kKRXs4H zfH%?L5=v4_&nHh_LR;o3`Q8t^9Z6q2M;JxjHsL>Xap3^jy6PHZ(9vf^`e_j&LI#B6gzy~-QVD`ps1{g-#JYO-w^5- zL-fmV7Y`2jL1h2{PKr_)QG#w!3s;|rZn1qXtWmy1~)yWu(RcVxYCPa}U055xN* zuy&Z)2|TKR^g>yBEsmd)$kLJ|)_Hl>i8q?uUkVGmAF=O(tyFgr#}pmOsE4|i=V(;m>pt3d<0=DGMu<0);6qlZEp|u~ zr=k&|hz*cWw*vEgxT7i$05YL0#*HyKXR+Tfd<_!@7)<`HMV+)p7bLu3b0wtDO6b(j z0I}0}5QQ&<6N=n2N{6PeC(ibN&SwpSSRr7cy_GbVu~A_7wg1%V)9n2GN|5x&P+)yY zw>9(L9}EV z#42t73|)e!z%F(cKv&PE@ip7{BhJaKQyw8z>&IlYDFD7CXIa5S+8BV{7~}~wEUk95 z?hgzs2t>EE(ry-SmK7ukCPRlRcFSFBPgM@kUL(=vud1D3h}51QK&f#_)5MWVI*tH@ zjp&bO8vw>L@X1lp@k!M*0eo-A>c+;I>!`b~QaX>~;tQ*9yWS!E0OTtu3k;MYV%~S> zgRE&SkX;mK6?P3s)d*lgq9u%WgAmLs?9R>nC>_NeC-ToPwpc=1BeU?(Szz!jm7t5x6Q(I9Mfm(liNCh;%)KFW z&4l&l|A^XOq`?Zg0wh{qP%S25#1qGF0G1qx@Wv+p{<^0#*STsd_enG!(YTPqh6l}V zH^gSONvW_UQUvnd($k~GK9HG}WsFvIO^ksnEV5+Tb>{m3;rC`V60#^3EG^k4FPQyH zSa2g{EsAdE16|Yf>L&92_yZYH0sGPZOVex5Oylf-YqwjAB_QcV z2Ku6Sz7cT1>3?qoD1S`{*S7=ijPwE$Rq%`}@>J%o`)2sJnWm%%ej->W6(}^=_faXe zIM{~JGG}99If;;#qCYTy?@3RO!C3X=$-hzNzJXB#P_5APPjjpArtfmn9gy7*(om=H zeA^+h{q17j`5TRH$Bf{DLDBqozG3&YImPk*{2Fc{UErH{0CmcNhFT9Uh*I*D1V88i zaAl4;pVcrt!9<_TU#fgX-mWCV^F$j3YK&@6PSnPYnD&t5i0a@Cv8>GMLx`Y&Ys4gW zJ#mIhX+QV-1^8#1$^U()fr}v% z#wK0OSoYrflmueCiA?3p`SZg8pK9k}^lX9u&ZYD0oV%{NuQNQq_h3DgBZ)JvYJ7mt z6pA#FQ2GE%1Jqt6Qdj=vQ(WEHxR}4vrN-A2$GxrkTfVDJMXA`_Ca4X8mIPL@5)OQJzfU*-W=ZA9iWAtyfsWzE68iGC zsZVo{=DqWSnXUA1$1Y51(wQ{HNQ4AgtvgSrYGOw3Fids4#6EubyJ}jOSVO$j$bXhc zP?(2uE)xw!_mG1_Lv7%w{~F4r{L^vWqkNmPj$HoyC!yiDRGR$%gHDNj#WPX{e&w1F zN22^77#jePcfQ(gVDSVJgC=Y{MIa~8pc0f>QVw$gKU2*;y0(b z_%=EBkYsuOBjl+VnNhMGj~ZPO!nf1B_DfV#&)lZ8Pnydu@NAovXL4I&%^+89Vg8BC z*1zaU)2sAeMF6k^7nxqbhdG357$9@v?hK;@8|P@@%m!Xf1S<-k#!Yc&vf^81#VDZo$Kct^|#rLXK|my---_%>`oYe&b^^1 zd|bzt^>R_#yJsraS0)!srdo#FOC5i}5;pnbMfp_G`I_;m%`N^#vD?Ir3{nr?&FtVC z*BbJ1jGEYUeDG3~LB4Yd23MkPey2v)i$8M%-NGN=zU>a<-JOC$DGe=~I7_ogn83_K zTqe7uXLfuJ-m=wiC3k(*2{#XSOD(q_ z_349kns?lfX_+{BPBdTGq{&UY`SG_*uzTDuebMCHTrRo}|YKx1DhtqL{*;FYuj3!rroyrL;5giI6klNy_V78PW zJL!0Qgd5Eo!mtDvn(*o%G~2Xwt15+SE~te1ZJXvy18ZU>`u9*f)*6?A;)G?|`Apq3 zOkvVF{NK_(zx*2Sh=Om!z8z}OCj?pBOlrl?x>2LnURylIreAG4X4I!|^1P3kU>n)d zLFzjgwRkDL^lN@=)1@0`h<9_8UmM88}IDe?9X5@oF&Ga`V6Q7ufk6ZAO)xWWQ zg(WtdxVSdT61Dg%-e8ODRM!MM3t8n}Nzz(Q%B}7&Zb(g9vU+<$f?;33K zG5|rDR8K5~cnfuk@4=qhgs6Sz{Z!a3<^uHrrF8j?Rb%)YyNF0C z_7NKdC(b*B2~^hs`jEGMSx&8IV)I*;>b*BL>zE7&eFVoQZY9K;*4%dseDW%xy-1na zgJqL$YkOzd+3u|5i5th?2GmSPyDyod_{C2-231bAEE;-lpJ3wO=mcgNI+*!%u%EEr z(U4X|f;EI{p|%6M<*<~V}h%f zU;+*Spdped+?QC=0wZKTBilJ1opIOPdc2|ce-@;)XLi=sl&cl|NK8P;dzN{boNK7j zX$n+6C>W$NE$D>jf!vkQb7`GGoTt(vKCet-s;|hYeUs<7;@zPUYLbp5Qq=Hr0pxRN!lHBL z=_*t0VBYnW3km^DB0Il(_wKk&+~i_j`V=f4RqV0)enumST8m%EP<0=|;~5Rn1;pOm z+S-&FJVG$20Bm*=jI5GPHR@=iadCHR%Wi?FM;o$%QrM5w+eXR3xoqb73z8@XevUuj@h(0kW*R6yy( zi-z>r>bX^BZn%k4d_Jeb$F7t8?`)zOG3@rdZ=7%9ra#gDA>}cYulUbIyM`#O}2p zH~-19n?=`^<%Is|Bp3JE80pP_ejXXc9YZ*X(E*+n!#Sh`Ce1Wq088K_PGs(>8Gs!c#XvF^B$oAntNi7Ygtj=wHTn)|0Q))YTL@NVrFyo0%+nVyq~TPorKKfpdSdn}@$X0tDF&wp4`OvT zJKcqP0T~V#le0^+63r6;$PLhE&Uj_#P!j_R`GS)>%H#;^j@ssoPJbjyy#*@H#PI(;VYhYl}E{q5O(ZL*3cOFaZ!7#$@ zN6Z;e)7DQm$Zwf*7*zTVD8B>yv!zmOk02U68Rd+rdMb!tp?@d5=UB-YL z!7kYk)HDwi%@IoMDAoQF5~{h-2E(8@&64p>@7A`f-D zfU238V)!Vq76#%F4BG?%ap;m_s%!z!VM=*|%0Y`BgqY_ z@yhmbkZWdNPuZRHXBLS(1_kWGpqw`)XYlRNDeQtZ5Qf)=(`XAYp|kbje*gEzaql2~ zz-=Pttj#&TZ9dz+C$`=@Vv{AC`Yzn? zp&~OP$e>V6i^ar2OApNQeG78}1_{@lh0t+a`}JXIb&}P7)b1d8yag%@xEvdMHNUSj z>Lng@In^vs<9l~TrmVgR5WkX5JH>_rN3?XZ?fgwxmt=Wh0f3Ow^O}C2F<^YRkdP1X z<8|A%Io-d%lmY*lX~=t-m#Mum165I%5zU-)#hS*|TOKdTZpcuw8Wu;saoV!})PE2*1Wr7?<3+1#l`+!T}#P?mI%M!JQE<7$mjUrDF=+>`P;}2Lqny z=)*+2H5U;C5T773FxgV#nSC*KFBZv_i~ z>f!vSh&8BLM9$S6+}`JO_4S3;KLMwc48KM<=e_?*OmNpn^6>Qv|G~{G5IAuv#_N48 zwCQo!Agnd*wj+uvsb{hHX0x^G`KM)7eeLUi9n4Sr;E!}1B%>4D|HQ1Yo0Ct5L;fAeSAL4WeKftt9^q_XSA0{M_LC#iS@B@*M zZ)UWY3 z59Wb};L7oluD3&54=2)_*J7bao{_1Sbk*YG1N!E7OyO#PeNm|IeS`ZFod_^U*`ier z5(}vB-{lO_K7KrobOUS33&cnwh$bpkENfNa*@s_`W4`pKT+t9{ z1_0**&~L!Njp{(FHOyk?Fk>C569!bLsCFYF;gwVINkOsthn36Db5R4f+wyoGJkZ!$ zDsOANE|k{xm7V|I**_!C%*06Yp1D`-hX47M z*@gWAeZ)VoN%>rWG+sWuB{VX!b;hHt{cH3da9OnK;0KQ6XDkdhK*}8wq5I8 zMlCYR-izno)r#LUx9d=|!W*KUW`y1WQg4QXMaA(+52kiOm^}W4Ky+a z_^lfKF@}V3>136wnqhCQ+$pl<&IfbvJsbbIj;ED@j4k~Zl{|=( zm}^SE(2Grl@uPlVK_*-XoVyPEQK}aEM=W_KW*gitF`Slhgu5z|S%4#z>-=)Ykf9-1j}02;&4pW{BoSdAI<8Fa8JT|6NswHqAal zRzy-X_to$3{`W_DJ;m6Le?Jrc%V_`11^<_?{eSz4My(J7-ju`(Am$jVLuVZbfwrt% zlHCc26<@oBnIxDY1iL1Up2I<_p0m-eHD`#uwkno2rC!x|GbAstQ@8BCFizdk>8z3< z+n+%z`@&-eT>}`*NAfeSG@nuHA^uw!?2J=Cq?ioRV1RENh!h&(Uuw!xevXc0Ogo2! z><0-J7awbVhTZ`HSRd;XYbu_8V(yZjZZ2IpbNe8AhRItc#w>mRtrJtdy#)tFD!64> zw6L1WMc*1TeJGzEaHer;jo%wzA|hkl|0;fUkhE&S=HVMw&-bLr9I?Iq;buw2nVyn` zR*(2#2TTZ?!DI(otuwFKC$m|R4$i&Dkn9(|Lt_wxi>=5^7{Ci? z235Qna9En(?szue3JJ*@f=k{k$hc^>QsRWOrBn6oQNN&uKkg^h95*_)_)w(8#EEVG zt28~7?|HfI_uo-Z$+~#wdzs!Je~*MC_Osw`JM5*b4rLh9a9;4Ek-P}-<;@v z*TKf)+30I-Z5uW+wkgxV!oup+orL?%UvF}yZ9N<;&^vu?Rc^1D+2Qo$qKvf;pC%jC z9`)z_TGjD@rrte%<>%;;fr)LM)6Wdsf7AHASvkl2-C+Ox7C9I)RrmV0^u7%!{;?*?Rh++FpMV?>@-~cP)}qyIMX12W1IwwC zF0VZeP}ch9q?TE7d+@#lj=623yn1&3aA6Z8DMDc1;>^c5Wo~Vqnb~suc0p!lP=)(G zFZr&VA|@hRKo^_M>}9PzgBY1_{c+?b=~@gl70cRDz}&LShDlfM@` z@CZ+Fo*eFeT#}hnQop}%RY%UJv-(GhG7=gCewMO(2u@Fks*cNWndPmI^!?4FFV?H^ zvEDekKX>|iW=3QEqs%LrnH4!EYn(4l^mgSB`X70cT`E4s9OB~t;Ue$hpKZ|v*Qbz5mhx|kL!#RnRi?GCjQcpIj{X*J=h}7q zKBG+3(B50gvAPp?hyMJYQZa5f9wx&T5My5ddBODredbs73%$oPCy##WbsZ?s=Yf6B zZn&F7@ML8`dQS!}BO#DgP?&Mw-5?`U^81t3M^shWfQQ=4!!`i=VA;Dj8UjG54w^#U zpFht*XdDGnl#3%8g#+4G$Vy164pW(JAXi`g`Q;Wsc9O?~rclq;2L!DT^_rZfrn9el z_h+h3-+mUBwTz6};9MQx-~bwP2**!Tn*o)IWyFPe1dgp!iD4k-mq6+U!^&*FDveL* zS0rLUJSBGYceASwx3?X{W`L>n&Q4HF%DBgkFh)XcVKo!8kAg ztJq|tQNH8v@Ae1v*^m#_g#Q|U?dBu3#?hC!Y7NqBt_+VUISue!%24au(b+imERyRIS!AY3 z)QtxDT|9>h+YY9}@!l_&^-}+ahmU_}7Jp3=t6xWL_g+r+N6(JB=MyLY${ublPqc~4 zEugH4d>!%2L4UG^@mv7UEgc6dySDb?7mw)C125XAvv@11N=}aYrfvSO$Tju9j=3IV zc|R?E!^|3##TumMU1+#I5pdPD^ZV8QD(~TKOjOU7eM%xDoEr5S&xZ%?r26x_dFY%S z0rwi1+q&yhyf6V~$x3rQ>d$#Dz!q8BmC3Oo5l~?uuol2*S`WZp_R5TgE`WUjmi(y5u_4Kv!yFm&H`q z8TWx}=7tai*R9%!--Y@-=(*_wUqDiOIZlmOM!uPV$%P(NV%vpn9z*B;4iYqUEEjA+ zI6%9TR?!N;0&}^SUIaJ$Q92@Js3{A-;AofGUpFlAig*s?k zJI7p%G@>Z?@-pKuMqmD3wgEFyKN|A7J$?EClFUC>tiX8J$_v>S4c1z zC&k}*w*38~C~10}h`GKo|Kj~lTlw@3iw6j9N7$SOWt36vk-s13nUgZDj6YU#|zD}EP z*);t_%p2j1vF74j@e3#Orn`d=M8!&6J2AJ20+fd)1viWSKb(C9R8(u*HdyEZ#W)H` z*#!cM(qW^bViD2^h;%m$98nP^L`0OdQM!>XDTz^#ZV-^}9+>~ypy!AeN zy$0jV-p_uX`@Zfg68^|q&$*!A<9=pt;O0RVr#S)6S8=(YYgtaRh&t|DerBr6%{01N zBsp`c(6-xeWt9H?KKr4JLNMUYDJ!$lfYls1d(Rt%HO6Fh$;O;m8YqvD-=-Sgj5!#) z^k(aU`pwSh;2%GGR^X>bT5}8)>zXx*I=2LC2xMKtQ3N%EdVPYf`_`j`s2Xhfhe$|< zs>brb0?7VC&y@cU(1p+~Z7m=OGRaEZQd6bE#u}6G;IOAsGm;!umo7?CeVu zE%~Uj*=j!Muj$47pXjjMRe#my-X*`v(%yEp3S;#VSZ>NeQaDO!glAqqsDL7*B^XbC#|!-B5@((6qySfO1IHG zUFT_?o(j=Fsp~lnwy;*cujgN#_vX$=WBu}M6G^EX>P(VmURy3-ad_s;*Yd1!t9nav zbxC1-`43G~RTi2Y$;R-U+uJ@pl9Za#GLH)L$F@MT!ZMbZju$MB4bh28YVB$}Z>qW< zr|q}7`m>?!MyP4_B}ul7b#QNsn2c*G&y+L}PzpM9OGl?hnEz8^BknznZuz16Ab`&o z7p}voC#(iKQN_a1u>rTyN0MWAT$GGUGa$pb0Dy?q=+j?B=MCRNA9POmiyTsit6og= zs(RKnHL(NHv6%;3{|aKLF+074Yze^ensBm#_2`qwkF&wVLq+@Q_3JxC9DB=b4no(ApPRRp3! zpxU8;0<2H@*0|mV{s_@A0kI2%-VO_6PH8ivL6HuL4y@NgQe*tyX`X5DGGn7lW`C3Z zlRMAItz*!SLMHFTp(g5D6 z2@EAy4ds=UtqmxX8|wcyi(Y!TfYq1?PXN?RQwu}4pyHy`$^9^$;->)&>iM=C^BkhF zt~_)ElkKO#IF>zr`t&nAO`ZdG!_-J@v}otWhglK;kBMY$prnN7Kg#=a7cLM19G~;^ zF-^|61%2xL{17NII=&ZT#Ec4FK9_O^TP@9LFfU=c}N z2XQrw^V!!t0GAQ&8ASH0x{LsjBy29LYNtXk$jC%{ug|P^K&fCAg`>yg!AzS3>1p2Z zkXFd{nX)hJ4)Zzdup2KsM#nj)8gM%0U0^X!+gO&9M@oIg4`v*jwp6nI^mMe^6Z-^b zj`_6KE?t>VD=bFewQ6l>iD=C6&z~5Iy%F;2+~D4HuDNWM8mPZ_eC5mT>?mdxcLT*lmomS>xu(ed4|lq|okOFceME_|{0 z{C2nN?cyu-Q=;URX1j@ zGgL?AXh+VUM?odCiGKqOzG4qj4!OI8RNnUy%&w-hoi z;fnpz#Zx_%beIlF#Z})vvXPUMEX|wo83#0x5`;p~qCbai>((zYRsx}M-Ihm=jj)M2 z`pM7O5&Zsdt=uhL;~~FmNRt23s*shJk9U|cLxGLs2)Hn+-+|{Fx_f)S-~@tywM=cC zHa*5ZFuf*CgsUZ*{wVWvihJ&ql~#2~_i(?zdcnM%jfcodpV?R)%~ev?AzD-?&kCpO zyt67TKAAgTGmnocSUK7{?{!)@FfTQcTT&BWe8|FMO-F6BLEL0WZbQ~dk2+ts_$bZJ z^u+Y3&?;roS86`d;znI2f0#?xjkm39w%ft_=FwkDoNUXT16;YLcqp5tIp!ss=PLDR zX1xZ>c&RNy*%BwGnc@r4=1>BxLuT%LtGsVw(|xZlkVAE_aVpE_#$-##$U;aB&E$=^ zd8w(W4S8SJ%-1IO@aYDu8*2`!{+gn*`eebB`qZeJYwUs?C6q$HFbp>hx;@l0<+dJ)zHU#1=%_QiQxm6 zN@A{`KME4Kt=a%32tx&+tCi3;;ST3vF`;>X{>BrCJ{4}95E`t)RzQd!OF_y0HVwvv zEs8=nFR#6a4{r=8D}Xa^|M>j!bm*}z1PH9fLLlZb5PKl_=eQj$9s)5a}Z&L9IFn;G%ZilqgdK!V~{h-n)c^KhaJfJ4VC9lj!$4Q85zY*p9(L zPcNBy`+(~F>#{x9+5E0)r?^$zF>B0dT=VmX(vWJ##D4K>v;xEQAMM>*W&;mKx3|4= zde!|hiZasFVzMXgdFyKG)_Ap?%$o2j1556@Mz*Rw{P$AcP2aPr%xQQdXRBefpB)~8 z7q$f-jDFoaXTCxpX0NGj+mwTvqMO4o}>R}lTKp|KrG%!pVm$7{H@Xy*y&9!4DP5N}#Mg~8Z!47MP!?ZG*}T>hhytn6m+ z+AaF3*IHXo$$4c(M7V?Yg!6l=%9j)gtP<$>h$M3{@qo$Q&>sGYHdO*uWS@K1yZObP zUw+RuS#2M(upM7Ud5hsb^0pmj+Q8U9F5#`d-)iZ=={n*W@&~pv)@N7lccK-FIr~9g(>zrLN8nH$R!Y zbK65+dQN}e6w2>!o*6%-rbaz_`sCt1t3a*Euba~^o}Qx&sl^+QZJ?Y;7xuZfEW1dp ziqiRrf5;0%Zh}_|dq7pq`#rY6^Tzvo8Y2eYgWI7u+5HU?1z>PH5rqV#EEY+OMk$@o zwp!x&vVjuXKPVlc4+g`{N*g7eqoa8i$oUZ4%-EFxOiOCD3)AHzg@6#@|`(=Am^=j8lx5cm`P| zL&2UXZAV|-p{bVUq3A=m;!~Orzxh_%-x-XbK>OcGpxI~V-YmYusVExB=0}sE?=xDQ z?6a~8KBe|L3IF=Wr+7j`CXL?^zw*Cb<40FZ8Hq|-_s>-R?WI`}jPA~g5{g++cJ>6|Rj@O@FuMj?Pw}F!#RV=mN$){NS zYIe~%7sZLxZsASTN4%M5Pn=kWF{t0JRYc}IVm->)!2~U?zimjp!8q@l^Fr){1YO2l zgO@ysv4yl|r_l---nrO*&BsA&@{COe>nra!P_0bwZQg&8H>=|4(FJY#ZM)c>@Kg@l z%v@o1zPKg7C*VsdKa#`lr|cg zGk(=AN{RN$Om=FlbrI%SHUq4Ktixg-i(PxUt@kVd_#w(cK3uf-$<%x%29Ey5;0tKr zyS~0k?J$UXpU}carfj*y_SaFawmd-|HeR8XjPuL>7~^GYNZ1i;BTIwPM_yh!aOh3+gd3Vq zU|=cmPhxhDQ8fXw^*5&Ypdl4_kYI^QFGtz1t$xk?^NIG)B7L<6mmN~woiB4T6wGe= zeA-sqX2zzyF19P;XG>{XsM7*2}l=r`J{6km6m{6!u?U7BGDR~%xJ1zKlB@+HTu`qcB6udLd_KfSVAtofa2 zV0aicz2FO(>kDLR15P|Eb)t;pb9xMh8B_Uo*1wo0hcI2G4&UDJ=-tSVAHqKK9HOV4 zUwI}QJCeyPRo&YpnH(tDf-idp8VoMmY>kk!=d9YSrWxp#71iAxal^@AI(q8akY%a_~I(|H&kFV6o#_AsfucreiD7uYN>_TRB) zrY#HLI}{iQRD!BPgVGWN;ZAHW@`lT%fx2G3Vz$LOG9)x^K+V2K_yenx!xYbfl8dFD zRC;emQB8|OW)@TD99%?3ho7)mJX*WFRjWWZI3(dYX{Y@atM_+0=FCh@kB&BWPi+EP zYaP50Xe`;(aocz#Q#i5S+R}Qr*n#Ks`=v(KjJL;bj$hrk;Z$#d3#H?gY;D9OOYI|$ z+4U~8z;?QFYTgTMrP|_)!p8qG;UktxMW=xjwQNuX!1F;sh#71#C=|+B8JTW8B5yHv z#OMlr3e9s-YaU|7(V}*xBNz+FPDOb`G^{AOJ7!Tl-$EM{i1^&0ENU>u9KJwbMK*rJ z_R{D?o%h`Rh7;!q5U?9ims}f2PNXQ?=D$L-rzq)r%6#B|s$y0$dk4hd3J{X=sg&xNilcYtXv9?{`soqkIZ z%fCAJPih@Zpv+`UvZbf3zaw(|+;;c#cXXIsbm;77+*kZ$r`zAxYa}U1Pm-h!E5DPo z`u+LocIS-77v0?#CVj!8r6JpiWlOA0sMo9x!Q(*Q+`}ndfDA z49E#d0p*|xo<|xBm_k^Z8`vE+oEuSuro+WzVvjQht48TRH+X68%zLzn>R)^W zP4l8%Y1JV-xQRC6e?y+lzisZfoFqA}9-`rWFZSYW?%U&@b*U$fJvH%zZJTO0gy&Co zbj2!0{26rM9j1>PjX2NRe!G!*LszXbL3-%Y1>4a*v+fU;ZEKtSi}?RhAr2ZD4foq6 zjbRRiHGCM^&58qAAZatO2|b1+i3sF_$DKPSk2GLoY)AAa*5*cb_KQeh#OdkzBfRXE zCJq`3y!pCO8WPt9Nbkpj39Mxjt{>R9>)?!_1K^fhHNEzVkwAccsX>wgH)_SudgFs7 z*IvrAmD;T4u!-^D;i2PEboh()-VT@HB0(98D~wZ zHM_6}Y!xrlz2c~@nIx{!w6lI;tf1e&Sk2j8e0YX5#~-JtG4O?6`=BFf=f;8VC}Ay0 z>QCC~A7%8NTjCw-+Gp$477lz~cXw@}`ur7zOct?zW699q5dIOtsTtbF!+Fi2f)|s> z2@AwBE_HFbvzN41u&`E#T5m8s8`74w=QzpJykjrl=S<#_2Hu*cGn(^^9$8eS9iRYV z1;T&i6fP86l-4pqUWWDYb)}$h)MYtIqAdYSL@p|Qz52-zGLguOjDzJ`n2LJxYC%8y z2y=aa^58ve7ZQ37Hrl%N{~eFNZtl#$a`-NUi#7Hka1#9iFKk0Qx+Stg(PpV%H3Mio z1a=Qz4>DR&EFh@;49Fz#%_N#BV4{aX@*ySw@L2MwJ0%}*w}I}-3;(kF$;sFFEjopeEp&)l@b>WA!%a691OD6l>v$7Ge1wI#?g`Fy*~sZW61>ur7u_1PUgKGN;{KR z>&7>Kyf&*+wfbpa%MljRWZs*#eZ9&Q+r0z)*Zk^T=`uRhTP@dynD1{q+>!gFaNe~} zC+9?;SXM}WUe0x)bjD0xS5Jw_7scfXjFVbDu`Z7%*~eM5WV(WDYRB}?Q_5^f<+t}O z6pXVoP#ft3YO?EPW2SAVJGtlLzS4wV5)vel#`ZPV*GPO@UG1yU?z z7|6x-LYgXM*Jov-g^!^dZkskEILSq&OG^r;jHfL7|0BV#Zlta$QGb9Iy>r*6Gb+-T zqgB$L!R7SU_~RuIMp<`}X`vg+v#{IfZz{?Ibb>Uv6~HHjWP_lRMnps;{V^4{ znqkU(m(}vLX>UMm58LBc+?^5Wy(_g+(BAl*j4>m_85hkVweWX?86>6LTjAn!%&oD7 z9B+^`>;x|ufn?$o~ zha9c@_{dRBRwue8P&?pJf1Zo;qtO#g$(AX~>1=TUbNAleSL&D%x8`;5V-UP>*E4fc zKOj6vpR+aD>-ddBVh(BZLvMBYbi5slI|_o;W_h|hj3c#^og!XMv&PR!4ws)`HhtqL z+?SH&TIggb!$4N*qD=QxG&NI)cn;w&7(lP6vKS?m&7A|)&QotP@iwUW zLHBLi437s^Zl=W|+B@8JktZd&foX`Ap><%O3_e0^(UQ>hgAmPYt>7n%(StyMc-_^f znjt}b14FWJJacMG`=d9p@kec4V0Xw!wHi2o<;sRCSYO|wo%;{W$JhdXfvp~vduX7* zrxk)hct$Us!z~OY6;?b93-)k z#5WGk4^=lfrM*MHPq1Xr?3+7uK?oRqE<}5+qeS6Q^%Hl`dAAXGUHw6fDdROPpOu*FDtVvStrD_zx13H$Hd!BzlNvJu7J0y7gh!As!wkQj#K1 zx|L<(P*cOxk;=5J3^j>qsj%vl$5}njK$vF> zON{4y8q46Vp^5Y@cV-RFZP*A2ZXj$ig(hWrxw!OAcw1%xXTHg~k!d(G!?jpf?(n<6 zq<6S%X?YYrcLdTZMx~5#tE4`~#8mW_#WI3P4+p5jXze9tf9@oWJW|6UUVZ6&z++5U3;C>tW%f>r7f0t>j_xPH(eM3*7rBU2kP9~e-fGeN-Hmqns;@xp;J}{? zELZtWIecKBAJ+Z@Ot}?wbn)P-WCyn@0I8+l!u=ANQ5XD=6ni@e4JFp7c3^h!7BfYxBlGrpjB|QL|fG>O2>6a%s#R3QxN;I(44@hS48iC=@SwkiR0p#1t*(5;620r1_M&mW!O*LL4Vq z07OY;bQ(_lUN@2C7$@>Nr4U+|29b3jSBBgj{~k2{bMv*1+SojnQ>O(D56Uu<*LhUO zDn>#zax+$8_3G8xo$b1clAzgyg7E{Ri#^nennmPV{!VO6cU&+il)^aVT zO>Avm-#UBec$7g5#Xx=U&YjB=oIQogGx9|agXm4oEyPNj3Dx`(!29ds#(g%|%W3rF{|ztQCJoYb{=3RjhWq zZfuNE??!uNL(M|yZk~yMeb{37wScS z^?>xujS^0aDD0`X6SoP1c#*b+unvuyo_r-ktGR3yw|y^vZq%BtYC5T8OQEHmX@~rZ zGH`8moffgTQ>2ZJ;!U5ro%SUDix9rUudyn5BxvaX`~BbVi@b^@Kn~u~B<~2e#!`p&`gNY1eEU{b36OazYAXWq7suB*X=!M_oDl>S3g469B?jso}M(<)j|#Ib<7jM=3Y zW<<(hUzX`bgB**swC8iO?*WL$*0P zIY96H=n*|Z>qc}}z9H6$8C*=as7d`s!-0ZdT59Hy@N*S|Poe{;i>HgPyMQ?O%21iY zl9ESpafhMWg0~|vh(%HkP%ALI4NMzwRpFtLkgM|h?%)Dm(_PdE5c-U@NaZ^OpOY0x z%lZ>4H$cqU63dyhu`|J{B_4!sNMT|WxDOhMqN|0pFGxmW<}PULThx9JDDXK0LzgJ- zP=(WG!;2W`88{skSx!#46?#wME=UMkVbt;=EX<6}*bM?581ff@oh1Y(%}3F{Xa87u zj zqBH%aBYcd`7h)uoXrN5vi-5tnfrVv_GgJf^9Ad1!8446^2zV5+`l9m#h+2^ui)!B{fjZ(Yd8D}yMpuXMX{2lRG3k@=Y!a1bY-AGJo^fE$6&c}4Sm>_7KfSz__uJR5F2cMGE&nZ}#9nDLZ~~JE3oW8ybov!{d-q zVl`u+Ec68dvjmF|@I3bXWQ~7o8@B%HC1H?ZgRriV(b3m{rHQ+Z90y#IjVFP=6%y#o*keroDgXEd)w5DVtg`>|u7yE9~a@z14HKEL$wPhw%yFtCgK`R3b$@Nh0TT7J4pjZ>4ni zSLsT8h{$70`{kDw43gt@kO}Z0Elm_gLr1^Np<@80t6`tWKT9p;&!yW=tlxT(761B! z3kh^Mmj3lREBfkRTPg7e|98LBR$onvzg+YV%w+)gp`eptf`MTt%u#7yqVB!byUr?*xF+^TM_SZg8V);RAF{++{ zwKQEeB!`e(?D136g-H^zRtdY9YZK8)bHDGJE_rEQ6$DpWB8}IeIsFAT>M;m%eL_Np z_EB*P<$@|#0?qo3ERuM1#^hF9;k37h9L1-022TZ+=kQvoR#?@C&GRN1-yGgq$y=Nl zEs(~~FZ48^(_-G&tF-%G4`a;4WySaggLBl-w{IT@1vT|2$zzgB&zqi&r5_+mdRK=^ z3$rG(RFZyU3zM2d4vxG4$G>HdwZF~*B61ZD5idk653F)|B{zd&eUTbQ$g|%0son&i z%=3jqnMQp5KJvj_^&{=E8{E5D3MqWnpR_)EQIv>S!c6=6md1LA;5Z^PGu1NgVoY=6ci1*;@H&S#WcGV<6E$i@p zQ?uGSS%n6P>!;t$4RO@J285^zt$ThHvw@Qk_|~%c*Uw!?Z|O{+lQGS98i%~5NGhhCibP5LY=nXV}D}Onx{cCTwYWM=2ZP&mAoKMN!nut!J8=Ib3O^I%^tb# zVineG`FSNSbdwopPo1D&=o0q-5UyzEG|F9J&7rA5{%FB2=&aQ2kS(ke5X2jzSWxXJ znQxTX(bk#sGHd=B>6(#cIMqL}NTSSxYJ52V+11iLn|5n&$+RDEyfYu*ZPv?tGW6o> zgwhiv)9t34?l0#v9CvJ->r z)=@q5OFGX{{oSJd7(6h)pU-nS-bj&L7Q-J`n=KmMv@?nt4zobPbabC z+3$Hfa$!$fF&P6XT*XJi#miLDv6lADGh-J{EL+yJRmpOoDP>`>HbHk2S!|2}iwg#baj^MDBN#Lv8p;p29nxXf zVl84!fJ{qQ5HONEIkr!f+FOq;0VDm(kfH)_+snhV&KZszrbyyS6?gi* z?V46>Atdg}HG_ML-aB0%hY?OEE z44$a`WYx|mf8)BJTq9q%QT@7$D_Z^S>CE#2(|Q&P)znt2GYj>vTjv7>dsVs{WIL@A++S?i=hvUtP3pJYTd=|P6tjieL zu1i(TxBl2N*5$|P+m|&O@1ry z%JVj3M!1kVu||o}v4kQ!w7lH+O@~@Q!F=8sGm2VB%=kB^+Q)I>%9N4li*F3ages^R z4-=f6E}go(y0Ur3uc0G6DnqoUg&k!9QLv*YVp~TC_$TWtb$TShu;Ceg2e$h-#|W}~ z;zP9{5Z)^w1IKT|;Zz7Mhc@ymjgr-XxD)FPrE51v?1WexeBR`C*84e_+@(+#<}lb1 zBErBiX6kIOv0kW3BzV5)A-I)4J%%(9EG*avSc=Z1!G?_p7VeREOq%$uv7AY%_=6B1 z?kUo;YYF(gCPHk)4`X-)=;C@*RQ}DjEb2TjBI$UN4}9X`p2=rZ%J5q!Xsz?L zh16HSOeEprpEcq(%kQz+vfW5@ofK3(S*R2pdF0GkxY8KR$-^w>ntJ;k8(zeuj&J4r4%I2q~>IJ;QW#^jLxb+K@9vI7TynFff!7)jvtC!fr zFAoVix_<3;{j6W|U^Ke$Ijvx-9D^6D#U^?=r=kSWq39~1usDl69d~w%O)Bj{;S!GN z={+qetgVfyc4f_mrFN$KX|~h;<<4TVGi0!_^yIlW!oEJmnVX~(<{Oo1Qa>k~m(gyU zQ|1Y`a>{#_MRiT!uVcQSGps**cs|1^?P$_$l64K}kjh-0C=iQdHOp)EVycQW9W>Ek@am0P9cm|^WgeNvwF=Mjne0b2qBHw_}7z0`3k8P`!S#;bXqjvv3y1U zl-wR7V)+n&BRJD~G^rLnCy;ls_=rDpoQKx7Rq-h08uHNAR=5%+5?)1Qx5a|+;$tr_ z{&O}^YibAt0HoQqXU~(sK;@`l40n+Vm0(c!1dPj?aB^F;yH0+Fd7V(lNrCd|$1zrA zGxK3t>+MbtJo(}sbW=ZwvQo&;ZdJ=;=Rb;D?vH*p>?k_Ot^T8hb8qrbHtR{wg2uV` z?LBR6a&vQCnc4M@qMs7GkKLgxk5PzJ7)bhKf|PDJDsy_J+H)l>lYK92b@U~@54;U7 zeyiWN&i9#iXGlID!|RhgyDDFw%q=Ps?KoiV*^_IP{Vj*7+(z8j$E{eay-iTZ_Yo@- zM`v|yZ$q<+WOI3KLqmfl#{i1{zN{eiEOk%Ov?c|THt*|3jlPSsz6Zv#n>52Y7*X!IIZ}5@&Xuw>x`)NG6GnJz|u!PHR{p93$ z?(Cag3ZyIBGq-De;u7F~=u;QUb?_0xANi|GGN+#kjbn-v`bo8RduW&0h|d z?&dVBGi|$q<@+U6&D2m=g@u<Uw%q41nqN?iTsci@mS-@l?k zIqHi{PLXJ1xh<%fo%#1G1#F%H2potY*@`&-cB#|Cn|h!hiztvesO^HX3-P@(09>FjCNwZlmi9CNNFJ!<< zlyq5HC`F3D?vU|O)=(#fSCp`Cj)xdrI+M$8lG`T?f)no?y+&I zu>0X7g`_s~`>IX!CPuPNDV>|s6K}1g*&pf|JCl8{s_ROQ2E(A)LGpq%kCkiQW=+lW zGku&)9QU%kc=acZxTez62EtPpCg7ovk6NB!p~N->bss>YwkIe|&yefH26O?t%lir; zS>L(oX+D$IV`x)aA7-Gt1FfZZl35CZQ5fzMOavOMv>+HXrf$vxuz?%*0_6m~Gx)h6 zh%~B?StmQfV@1fa_fHf7Mr77NCF>y!Fe-R=|3VW_{qQ@x*Z}T9r{f0eAk3n30E@sC z05km6CcztEFw29W`f{Qxf%6ujAAvQ_2XiMKuUn(dKZVo3bsmyPowM8_QcyA0##&O< zW2Pfn&p33ZzB(@=@YCag=x8%hQix=WdmEpEu<7=)*IZG{2Ig;XthvN?W_I$8gV3O5 zwOArUG)>5=KI(uams{psjfJDdk;pxbXT7X~Qa(yi%`EK_`bv%G*1oHB+3ChC`g|t5 znQq&TeRo5pnxu#JTUU0rweZd6haO0tNUyW#v&zD`8Ul{CRg zA@o|L4Ob`o-(H;a> zE@)Lc9n||h+0hYVM{mqyp?e;lm@pikM*bMwo*sPtni`~S7+*(JWjKd@3qJ5TsQPKWM+btxO&Y;ihIv}6H>rr>gf3RTd>|N z9}e~OoC7!z=ApKD9{!ENE*d%Q7&SM^JL(vAF;lL4#+pg)#xoS$R~$(yH5$dchha4> ze*@f*> zci#ho3hu(WvuCqAT&rGUkHW0{sukjqNJ%qTh-!D_2r4sBrrm1J}(7xI3S|zkWt@;MN0V2#KiEW zA~9nqlb)WwFT(?tGrwQoJ>-ai*OB9L;hUX7X(I;280^P33`D4;B`J%$=KWQrmwziB z3;(aYwh!hZb5lb)IQ|V9k~}aw{;p5?*T)k4RbK%dq#-6Ju(1ID6@tMZa3SmLg|YBo zpnVeZ3gTPtdZ6DCV4SIe2qILj72pBcd8qwfH>XbZo>@9}kYc``mp9t1n<7`Jbr@Pw zdR*4t#%_qrYSV4`0jhPNiikF6`{&Yr6kS|&{@VcQg7DvvY+T64&~GJ5+;s^F4g$Dc z$0^~i!AwKYcOi0n1}xids)_gryPy0EJRKBcwO<5JO$qQ6E?|Tm2yWMb0}o-5OsgJW zXM=D|SUr6vlfCe+pwE0EEcu2=Ww+wFVqur|`{nm+X}w&>!u1jxT@4mM4!m-pVUxds z)dvT$S1(^WWBLOSVPtaB=rIXl_QdGK<@3K>ed@n0we!m0qNn^A@`5NIW7#@&@F8L3 zHkL!jiO~#Q@%lp1-&}y|D3u0v(|>)XTZ+VJZO7U$IGCV-E;{GT{^Xuh*~NdAkN)OT+_(ngvs#J55H>3C*xS z5=Q^Io1=*r6t68Dc0X6|JmXOEi1)p~qzO+7mLCx>LfF408Pv7gEs&iS_%Xr_H*ZwU zu=|GI81Y~7p`-8Hu|kP{3a&GEfO(EQnGgl4L6G5qM)TeX`TdEn3I{FD5&x%$|GC)& zrdJ9v>a-a3P26gbjiww{HVK8Atd<8edAn+xcLb2JOZ(tx!c`{*-jGamZOya{`gc&Y)5rw<^ z{@xwGEJ2HIvtWO~8bN3L=Qm)F=lf8*BM$CVvQZNbo#}V%>N^iHoRyAWzoqM5mQ#M& z2sw_VP%$+Nl?8K_qw>ah|l>mm+MLtnSIFV%~-+_3c{~T+OltWU(CFeX2ELM4Q(bMdgtPKA#hH#B)-+48?&Ob1D>YZzCplKUhU#eF$ezT zt9ET-om?nHU7Pe_a^=BVDGDpA0=-y!h{*{aRq2nkU#@(v-W)lq!06ASh-~T z%BvO=+)Q{g#h4P(Idr5IVAUX$iAB_k8P3gTYTZ!p7l0Ksc>L#c_z+nUKY& zFzGyoZHz_4Vjmm`qedZYA_g@zWF+UcgDh#DHiW2SxV%U~`~*_TF3?QyD5bGysb!JG zkW?nCO7-uu4h{{~iHr{Ty|W+TUfKj-f|3Wn1i%&=m>?2UNhn^PO0Qso6(md|!ThOs zurtF#E>fg~pUkE+KN~J=m#~+GC8MWdD))eLK-o6ol*c=z?4CTU4#+(r8m|*_Kz`7K zISPH)^0cOG6)rjD(|UTmCr)Nex4LkhyEu@~6`f!%iV`Wt-g^7ol$x}ioiW=wm(G&- zrr?$m-q3H6O8lAB%)L@xlUf}G;Ug24L-IM`n0>`_bUy3qHGOMOQ!Fl&xW3PjI1s-? zh%*HurIy(!D}Z4v^5O8NB7{NWsZ)GNuu+^)`S0*qWR-0a7GV1eonQ@EM-Y}M5JX}e z-*+In&Hx9dIa~m-qG83V1&Unf7xs0oeL87Ps+}*Id~d|wTnPm2?*03t*i|3<`-85K z-frap%_}J0;pq!U%Z9|jn|Y2BBUD4$?C+ zT5}a(LRtqi9z0P5(ciZk)*J49^R@9hGFZo}jzI{3YjYE`V#4r;c@@eh9S{JG$gf1O zTv7NG9E}2OO|UZ{rTB7^QT&ITJ7BBB$an%qwanSeOXi;8^FntPmynRLR4-ORPb{k= zw>PF`hNUjFHdAQepI3m~#M`L0F!Rj1t)%k1n>bswG$OnMb`8y^!PGvu^rBQCyag^@ zTGa}gVK;b*a6$|KR0d!ClTa&xi$NUSppF!3r@?$mN-8b*k*DXLojdae zn6XL_l$BU70y~7LLRcJvD^*R3VY(t)!`T?XDJ03w2VUPZ?i(ktv-8s;}s(@>mNfd_wsT>TNsN^qYP(}Bsut8peZbvH5t{?O|*=QkBll57Opm1qhV~k zNv`kS!53p=14hQL`aNe0qIF5zw+EL7H3X-aH^l49Z_m3wpj{htdGOPlt`$wVCC~YF zF1^#{bR73qa;qf_Kw%Mu3vY`uCIuX2eFJk76K}3{}yCxU0#|fiMAwCzcL0yoK&o!Ry{R0C7iYbichK=F%2hwj+{c&@*#ic(KRB zFNp^F!!lGfZB{LVAB-KWyXZ{1N?B^C{Jw;(E-ZenBWHi(_TKeZ7A?Bnz^~ZB&+mf= z96Ck@P|FeCq1UgWVgY+#Flyv&B=+wQ{CG|2TiTS7h*jeyi*dmS)gYy)OKDalWz&2) z#iYhNd9+@V%@wi&wr1Qz8Gi6JQ95_NUMh=}BRTL|wU;&P{Y=KZ)t+zP)G8DE)+r{| zpYY=NtI@*8zI#ewS9~2R>7()#H+Y1Mz=h&lx~(}vB2Qs&nL%n{NfrD7+rS%E2NI4< zDLI8iWtACpJLSDq;kr$!NAbr56BPum?12%K6o-)cWj8f=5s$nZ{BRLL)!Cb%U*pLY z_2~{Qq}ok{Y~a?Tq7B_h7noyjkobe@{9CkIR=slY)+)hF(Xr*-R&BS;tCE?~1Lag+ z3t0c#;8Xd+|2L44xV5k|_ZGhpDgEiN364A7U9Y1u?e<=KpP*Ir`0~StYzcau*IpT# z#=2Y!J+E6+4w~DP9J%_;$x^ptg>sfkE+kBGWyn!c55^5GW3BqpJ`1?mk zRaFk)stI~ky{{wWBw?WnFKV1U)gLo}G+@jFgLS`0rkE1)!@>ba_K7Q3HXt!F>}wLx zQ}m|aP;oqpTWi1gkF_dqZcMe*2hoF|Yb(4OK;1{cI z=Ca>+!SJN*stw*HC%*&-E%pZ$-;uWT>deZOwqIl4IKA2JL|fp+Nv7j;IEH#pA9&rE;3-O2*AxojjVN?bmimgWCCtj;34NiMAZ!vZ z??)HrqKSJ+BiOk9yn{p2lZRwUr`KQN?yhh;U5~~FYqSq4Ys)x8f+IXz84Ap6E{UH zzvN0Tm_9`H%EGtpZXw4b{r>I6UT|eQY~FPNO>QZC2#-grW+-b1$REEcQ&dc&&th>l z{i{)Rh(_y0lY#8i3HxrwCL5dFCl?cWjSn2YloLR89^Yg#K=WxXR=UR8GI|J2GGV!b zZKWM7+B6H7-!4zSI<{`CU7RY*v+EP}ixi>lt^Fh5VD zrnPAg+gn*KH4euko?T@btoA~q1ih^RbR%4%h6uwcR68uNv4CYoC?Ie@E0f>HOybK$ z`r@Vu7!z)QOAQ98;TL+*wFH2jt=jH=lqb(Q;RwBkhU)a3yx7_|F<&p8b8{&=8~vz) z+MID(Y~Dg_evYo@V@7`aHm;KM%7Ki&84*=wWWH_u@eHoZZ{AGjGE@s$DEY!rQF(9$ zo&CnGiX%0pcb18&s_t=cEYs4y;pRRbw!WiLSX3b3xPn3l#oumvSc6bnV%pdHJ+pL~ zPW(0z&-2m7ZNf1P+k_%0W3iF1A`*H(9rX>d7NDmr3skezV1De|#TF9nlt0>jV&B2P zVv17pql;fVJ39zQ#O#mH1 z6on9*r?gmZ0eThPyROE!vYoYVYG!G4mHH69ij^r@obKG&+B4nd4d_*PSFW^@xBahu zSM8PLIgUf^O6~1Qk9}neioECC9yR3tD3)sUY>8Go%qV8^+>r$K4TJl=d;Gjl=ik#&Iyugscis@x#(478ht?A_&q*<6b@z~6x`O&4 zf3nQyOm?e~&gi7KcX?su-ZW093zE|f%VpKjpmp=Ws{hLuJzI_6a176(NTMFpy#rT) z*g6AHeqF)NMYD$biZW=OmDfVejZ5auoW287;8qhMq6i@l_s>_!N{&b2MfIGMF}VAY^vZ2JMATA5ug zj8SjZvPQX_^&c9?v-CHs6fMlx+!Q!DH7I%`Q!HR^@R6xLi8m|oOtMj#Lfq$+$dPYv zc2CcyQg3AY&vFWIyBBgApF8GE_wL=g*(sT>GC`TKR+m^IwpE##wm&(9SE2SCzo5?PDo(-yMZIougWvUA_Q9bgR>F=klSRInndi&HW zDKEQfn6eRTm0Q>IdzbNJ{e?S*SN(AeITb+la@tIY=nTMc(pRr;q#;bTaPpzsS;r=n zjjwzg9)|bKWda_)la6N=74YX#3AFE+oqo<`kSBKQ!Vwaw{O!+Are!x`S`)Zv9fX$z{mwZNET%X0@#`2+gzW(g>-3kH@y`*K+KJWN$Vy62?x$uGoYga&_^AMwP zfKow@Jw4AZXI?s46(JNOKnSZuu~bAK1WAL)KwZB9;|RUN5) zQF#Uen&={s&>$>aC)-`<_6V&mVLONt#NS|<(9xuF_tdgoGg!UFH5kN3BLpSQcuw-{#~%;jrr*dfYOPM7EG%N1=F z$q?lMNyT@2`k()y%@p?xJK|-&=XQ70-N$-ccG=qr_p#jP*XTF0z5RBqJN`!bL3i46 zEnk_{HM>N%TndTEyks=##nF9i+G}I3YO{LUFxfTNVfdrN-T{%1M4!~{37#!KprK&c zOw7{#X6dG!^`BI;!*)x01!5>_K!cV{_B_6rh5xaMKuEHkq}mQ-A` zoB0p9J@W0 zo<2^_8Lp2nkJqRDgPc?D(b;#pP&k>dIqv-^7yURdsE*@FoPL5d|a#1O!Rx2BjngMUW2Z?hffjMY;r}ySux)ySqF0 zncL@mzVH42bG%S5x3KowbIp0@xW}j^xiDwY+>A)qqP0t^yltc!a+vZh!8+WiYdD^dP38r~!qpp(vNxc31Ct0UaD(*(V84DK{dv{|OrFdqV9@}0KnE0w zxQH=fNivv>SmL9+0-;z3Xc?`pdINO=Q0{`^MSh5@d;CpEm86NOseD=Oj9)unF31Gq z39PLtJFWdtcD_Kj9tU|-`0IkHAZ;N~)8cz@lv0{r>n=urUJ}i2a)N?!4&2oes@s_O z-}Jq5abPXK@{QsAO!R1GF9F*tpLlcYL)W=D5fR7Zv(aN%E%(Y%6Wv>-Q==@4aK$`eim&~g0E_QM`MO9nEY)d7J0OMt~<=b2F1cU9Q z$s7{&+LmhgYCTl~=#_5?ON4d{wt+RztEa$?MCX1gzv*x=a6e#1Pn(&45=P7o8n|!w zz5bnJU}GMASR)bJESM*9AqETGh(haPKa%0rX+7xXaV10))mIwh{!+P-~WB-|Mkk&-?0d>i}?lhQ2ze{@f%V6 zE!|b4i->fxU0{o>8PL(3vQ3`tEKaq4Po65*OM9^RPjmN&6eG=PSp|$!<-dLz(fz%^ zf4@Re_UOMFk^yXS0LKM36QK)1BZ^*>f@Dx|(U*LHQ7TlXj(9A`#B_IuaCC8OHrC)S zc}RPm{v;)A|&GqANKY8r-$sj5Gb#)^eDl?HcTy&HUB0^j&OQN`&s8 z)b4-QKt?$>{nr~QT7d?4&{>-hBuCSF1Kf0Ac;O+|F++466Ha!*-~Qcy{lt4K9F2vA z7($BYx47WOm@F(LgV30qG$|Cg|N5o>xurPf`0*fr|4z~1L+y(u{c??y29ZD3e?#&A z`JNsyLyVs6dhQCN5b;C=a=0b6&=j>(kw*T_WFZ5Ppo<$UbW=?{nOq!k= zJ?KQ+?dV?D9)iZaCKy?}ZoFL39_v~h%F%n>z19>!G@LRmbOv z3W)?lok_T_8EzkTzkDTu24qcCawtE!+AY+``&cXBO_r;8zq6tE-Nox8vLJctx(o&f zlYgtQxjAX!URaJ|Z6ijp=7gKNsX@N)(^>kqYu`z3F-hm=Om6NI`92llnSt4q6oDA^ zj=8x+k{6enR$8Oz1;l)bMN=jq9c}za?MBTn?=xC6|3P|WH9>BxPb=JKggtv)j@G6P zW~-%J_?a@}j_tD1yjiDTy9FUM6lLa|b`*Bp{B9UJ{H6*(dA9uTBY+UG+|ar7BOX~6C&;)A@p z@i^FPzoJyS2D;%=9hVnGBtzqr)*l)@4zm>nqiYJKmKR8v6bxTyn3aCMTg`DnImx+S zTu*{>`3KzX$IMk(aQ5jZ-83H1H!xK&w$^_wX@mxO#~WC${=sm4=z#EGI19gD68oFA zcj@&o#~vMH__0>6-Q6;r0C9+gz(7PnmPdtKhNcxZSJ%ixIcBQF`b=R7l7$>_3D!$5 zHc?R_N0+Ct9hgFXUVH7N+uV3P-n9GPaIs+I#24xmpTzGxwB93ozO%VWvEC#Mj}Sln zroze{M=DJQ@ocZIvNZMPKGCr5CEMO#q|{ zSb}Dq;-m4Ia{@o%+1E3cJe1yY0_viGG9Tz6g4vJ&c9Tigje*69n&hO(i?v0l#)~{V zAOiajFF+Ij<=r=+%Tj$lA@6#3d^=}1sSLIXtp9a=l76?N)L3w;w$rQ6sq4@bPiC4~ zl=aK2<;e)^z)?pT@A1(^D;a{zGWSSW!Ed|G;`0Nh!nWimyk!_UH3J)DJABZ~^}2^M ziHYbj0otoor%07NP~(<2rUjf{Ay0xMr{xyJ)WmTY51}zvvJTd3r-H0o50}$-hl@kk z1%qHawJ!fI%KGlr@)jbAft&A^hIL82NF`i@;`lsZ*z9LdUwD{S-m=eV@Eo> za8IjdW2Y0je%r_^0Up&Dx4W)H|LgVIz9*PTj4ye*;up-k0@2Rxa`$I@7t?Jz5#0ry z$3#i>)`6h#RQ2tM8{NbPv$g{l*&y^Q&F&@1=J@;8$5yUt4EJoHRaGk%`gN-iiM=a^somwIID61~w*URv?)>O^Lc4<9oPoMRiKwk~UO9Rs}0m%Lg09=5nAA~aYb;`z z7@qaIq|#d_pmztV?ttkX{r71G%#Z!?(p^km3aT&z_5AD-Su1&*MKVFr6-7I!rQv%c zTIz`B@%?`O{>bvf!+F=nB;4Ev5(~BAU{D*($^+gM;IDdm^6YjDt}t3@%as8rgVE!v z^}emMasxDA+f6yZ;9-;3qtmU~OssmBiyYWaU1nZoMi;}7YO^HZLI=(0uvX@Ps*OR2 z2!g7hM}_cBuLEVQOT&OFQ;2y6V?EeU)h1=HafIK)E(fAzWEIFyHJe$*5zjy$SUuzq+T#u0e<>f*G- zycC)q&lPVIl}T1`yO_@yr7IdcSAU<3IO}%J1107ZLo8;2iX`fA@97oX4&}BQ%T9Ce zDK1uOqQWhwT-jy2MmlMA z<|GjJ^)XD`n(Kid;^zmzZ0BP%3;=i7 z&}2F}9fZRsR=XD0Pdv5;uUfUgenpSyjcPy?sY1WDnVbe@8{l^w3vh(wV-uqrc|f2< zc8R{zRYk77_AL5%x&YUdJE@&y-q?Yo-wCYjT%GINw_e8&%g!8MU25kCV!?Z5>$5w^ z6ehPTbX0r6x4eCKWVz~Qo7AsuZN^Y{mRs-2aqn<-?WkBaEO4V^QK|cI#cxc2MAZ1; z7p)QR4;0@JZq8|yDl;yB)Sg;zvfpes2Jg%O&NX68@#;+u$f6PE$+bI2+}K5I~!|j!0|xN%v?003}&RjT#ibaq5Lv5BsG-+ zz=L4wB3xWm6U81t-Wbg@>(wO`1I*q4E-V6efcoQ5n(L0|C=Mtl_+FRQrZOPhXks-12=13kcS@WeICECB3_x#a%p~lf zgtw22_~;;~{#WF5(XTJPZh}f5`hq_mHXSf?N_ZSldoISYCXXm=-Ya@q>=(9QJKb-t zl7InMc{QDss#uH`(1SOR?E;776Dh)ak|M6Pj6xr&EW2@*t^3a8q4}6`vQ4g^b{S7^)6${ z)>A!47uE}hg4dUW04SGQloim@qJtV7q+EX1ay`7DLCTdE8V})!mHq{YiPQDcn^+Es z952h`CAP*Aa|@IEn$?4^>@O!p%CE=MNw)L~$RoNH|8_pt-fX&zM-mHK!4Lix+Vk~w ztS}FqUL@;s81Lq_iP}1vp%SdMuYS_X+|}1Z&GypgeK`O7aC4guT4I zV$M%Y*4C5L`c%X^FZP|znr|Ot)_k{|h0Y7((W`*MPHI+GOebla#HG3V>wQ+G4$f!+{zBc8dygg{jJq;AI9;bl8!1(_96wh+&NFG=| zz&^EnZIC%qqbOCmVE|or-D&yxhQb@+==fxX#z{`8z6M+X+$|O@GUoAtKUwXQCiePp zW7fjB0xLZzV}?#mW5Uw~wYJ7Zjh=D!f|VcdFHbic?;_*2VR{<9In@2qwu;w&kN z3gDywUPg6=d97dU>%_5`~@JG0)Xud3B$VJ$Q{seE7v)) zfc6H!IWmJjyoS^Ivq~N47{E?d1FBmXBMG3>@bUFM2At;rg!N&c{S@@`foFs9Xo7Bg zG(f9DlujJLD_4`NtsvY3`LpfxzWruUcgc>!MDc7cx6lMef4n&BN(RxjVLJ^t(KSDt zK>2>X<40m-`*%&Of=Nb+TKhv70sGx?L!c`l=mupvLhw|Ari)EDC)Gwn6IiQV@B2UV zynOi`OyAv-6=?0n@b0pUJ4|D2N>0!E*;-MfVi>dxHC@kJxhoc>U65tvOUF`r{w$@D znBG=0Z+NoJKoK7Y|zw18slD7I9GQZuX1VQuF*&)Zxx-kcUW0scC74*;B~aG z);L~_S(~U3{HT?=$8F2;prjwzP1DcuyS4qscKk^i{*g_J=XMu|&=+M{zxHg}5o7xj zVp+VmqBV6NJUDGQHtFT~ZEfl|l=0!TQ?95!tF6cVh<~sAjVZ9dZb~IauBJbJ{t=kT zF=RWnsLPRcdw%hz&NVT+rcu!8UAY~9Js*qYyd0$Qau|9wSh7AoQEf6NaX@TWJIg}j zUng=Ub0}|^y#p=QNSGH$)b(FTS1lyQbg#v*C`Bf@g{BX8>=j7;_{A^x8b+Dzo)RQk zuO6<-cZ*JQ;Nm$lW?RQ=M*;8zsOPM*>DDrDrSos5O5A7w2}2uOI8p*w*MWKcQ|%I8awJMkZNkya{^k<-zK@N1afqC*q~() zC&>ZPNUWlZ?D5?#Z}sxdXW~J|7uSxMvj!nwZ=i{PZX)Dg-<&tW&9QDU0)@_3bd35t zW%yUd_hRn+h7g3Ci{<>v=pXhsY{i0ee+t37Bu(0Pf?h?(g`f7($(#p^W7GMa``Ifn za23n5|43;Oyc`~_nR{~1ithb|sZsXRS41hif%ehSh&!82wLm$CDfF&xrj>N83QHHL zEEl@B!NQ!-XLs^j**~Y}y|`-UJCw&cufJ3qvjZGCKx-Q>ikBxU*-aAAJ=8Lzd+Wo& zDONZwE!18gGzlM6*&$9|))$>jS3inn<<%H9v1xb+oA0jUy0-wu>XNnitGhfuLMXZ&T?}p&A&P`>U3F()+b?Hu3Qr)$z_oHW7G& z2SJxj5>mzC_T@!$XOwMn>?h8JL1zoRC_WT)bRa57#whj-LZx_xN*6%#XZE|s)AaZ9 zLMmJDJ`QB>>-7#Znw^qqzJu{y8}Q*`c}cQ}@cdZP#C3KnNKnwxOStS+ za>pqs`ANKyF@#2`LrKC`$F@hOs*G@xZWn_=N-73`Az()=ZEX07g%S$qj<4)mOul&W z0vmvtV0x!@_4QV_QM%&GJYEY;hduK*w~uOhJp~=vpYYJ^L&A=vnqw`*g$tKyN)u;J z$!}z?iE$Vt3mI}qc7nzFD~kYl z2{64bEN{gU6eP8@y6?w?CkW@Dgil~QLgU#Pz8tZ6LH&s&R)S%;_C=#e_yrC;?`-R$ z=$HV8y{RigGe%7sGyk*ijz2#;bjjEemeFrN3$=+Z;IE1OmNK1C~t5y~}HoHQAXf6@8XjrpqaFi#DJ`cRzRufzXvmg3i__?Zk zN4r#X?;gCY?0eC%_17`yW1qh2>wRPS?GinpTzO5iwz1xPIbc=Lij3tmn3$HzV_$`p zIhfQs#M`OpJDwd{RK!z7lI+=mK_rwmzH%i9O_l0I{!p5M1wtI+k;?3u z-5VU29xkp4btuf++gH;>T~FplM{#w&sMWN9CqWr<;STU~m!w0RSwR{5a4Qgazx;5k~-K@)k(OKwbbG zV=(^dYCSj%Ab|}5hg}CC?*U=iTM-eTyrR&ywzeg}s|sXqn7+rvpmY#`sbUx>6rewf zU#axH9?(4P)`SqS==ikPHz5jR!@jgJ8!ePRlH)cBZDT5}4v&h3I6&$6c`kbk8#{Ij z2gLzI6fp98-`(*g@Ytb(e|ZB&4?=chA3!R`W-@^e0lq)DH=0q~bxm3HxJTIFLu-?# z{Jq&Bq`?xr_`0v!!wEzzARG1*584x5l)gT=iUE2s}Cw(=H34ko<}?%aICYxH+2 zGNLIRC3&P%4%nM=xy986kBw_@-{wzfiG>$@Xg7ubuxPJEL2yP`HP2nWyWO4WRIMK} zTm0=C3qtaDBw(H>ZZWl7aC5?B|3q!I?SE-Mt7@D~%OZK=^ZkOg2x>ArsWaU={bn(D z2$31Kz@LOsrRIi~zwrbJF@7pQw|lqqB9jY|2jMp>^VajD!|Io1CLf9L2a`k@vNlh z)7?MOJ@bM`_#PXnNPRF?#+Eg1oTq#q*?zhq6aV}7?;*C~^Pg4YGOy5xoh_~Yex~_y~2t99)P(&n<9o`VQ9w_T$_Y+w(nio0EG(|V1uKP zOZ)~D6hXzAyxT)xpTqd(N_npglO=xaNpEA>nXRL<<5az%5~{N|YWbbp(TNYKa@Fz+ zp30D>lVsUAdkEQ$BhN}Wy>Bh!;4)W;k(G_jI}8hpwuL$>Lvia7DyncuuQmC2xU%z~ zUkrk!B}>Z(=7eoOZ19Upd47Vim_MF?=c_g%6BHjonk+0Nv@O4VzB?%nW;Os%t&GD_ z9NF~V`nnz=#AK?79AM319OO&enaN@x-7d1-Rscuqo%NKM`9j^}a+Odp(67RG2sHmd z1^W?==VO@BeSy*d*4pR+$5ou9TI z@ZUlp{c#*t%y{g8AR9d2(DvZP&Lv@l-@{uluxy#xBqrcgF$BO;h`_bW`iWjhIC1c2 z2Gv(GNng>Co}nZ8k$B83!0qLR3ybW>b@kY`cYDZ^bF*WBpOTP$|AQYcAvqFarj7LW zmM`=-vpc_>cLn9qVA8X$L0P|W4>O3T)xeRg5}Hc=o#=3f?S0iI`kT@dmeb`0!20s^ zjFYEkI$CL)%O7hjVywdEhs$VPH zIbOB?$UN-DbEd`4*Lr*tR)ihWJ7F?Pm*iu-*xoZf%UfoT&=zCi4$NJ17@G#Ajuag@ zl{~u_pP2EdV27(I=lgP6_%DdlqNa_CU zsiQbRXDqP{Q7b+5hgyh^it9SrSlOTL$N5U3a=v^?V5VAGwrk!{%*;GQ26M2lUWtSf z-Zk`=ok;_JHWlfSII`M4f_Gw{@!;X_v}HbgcmX)67o1iVKRQZo&!-VNUvBvUFL+mC zhP(hZgsBxj${9&=INhVX`x zmirM6ls{Bv(L;}CgqwjM;rcqA7nPUaQcwVTLX=QT;ERBar7N_kk%9Rm2HFJiSi^rR zls4QnpC|{PY4B^qSNQ=9&!krUY16qZSF?5C`O~058X(i-^*OV1&ni)d*AoND?}}1t zx$r`0ByjLU=B^E9`|0G5G{eOV_A%q~ERyFj!qzo^)^N1GF#frc4(2=8{#=zxoUZR zqE}|zP9ywWx5{!E-uZIxo4pxHE%La$tuzKdg5QOat_||hcrd%w?v0b<?)`q<^& zIg%ff0e}$~g zk(4$&Gd-%4bQm&XBOJ?H-nM<-(%SNYiGojLgyTHg*C+d&qXm;4;Sc={LxV*E2+CjjmS?~9mA+9%yiJuZ7 z1dhB}IBR)7bb>)uYw!V~B)sd9=QjqgcB9MpmgBfa?l@(mewJEE)#e&3b5+!{6v+Ze;M$_5AVEJg~({7rV zw^)Tlel^;5N4pn^j)r$!zkV4OnN5;Lcog5UZggJmaxPClmunes2Z!bklQJ9kjK>?* z8OhPPCe6dYBj$V?vu=hR!wGu){_1I(yC!#l$E1L-D)NFz{R7%>g=y#!Cjt79VL33^ z5)#ra7Q7CtH=zXdPKRcY75kSo>Ix@agtYGPclX%%$y%J~*?PUtyY?lVt7Zp?fDZt} zUQ2-DN(7+0U||2M+FD9ZPJB?q=>&n>Fc2ezRY(8>Ya1AXL<|-4ZV$%S1+>~f52C_? zgAsJm!AJm@IwAu@0x$|wZx%lQKJG=W1@u>2Gv%*gSs`d;Gy_3eByTu??!m3=Bq-wB zxCR#oF&iQzCTW_NH14n8@p=LL4&XmUl-mb>(OY;qJp&n&)C(Ng_B9K44N@M5od8_u z9RgW$T!wE;l7_}q>7j!^wP&r}z85mjTzKk=&geJ~@&iwRPn!t1>lvL%D==$z+?ni= zYBbtmI5KtY;=i8CS=FSaHRZBJ5fsG!if-h0Ruv*}40Ug8xOnWh!^Zy1b`41Qf%E|Q z#NiA@DG{H^ND`%ie9a-VA$n$g?r($WNOv|{Hf)ByG@k4{T5VO!uWC+bIxI{V_2{V7 zRWG6beR-~Y+}ZfXm4t9YzHUSt+EZz)KcaE4#@(3GTr4)FL$=Lg(mP3b6>`Uqj*dLT z%Pvv2Th z4(Qp_%=)B!rL5||>g@RTPpf+9#Ui4!8S z@p$!69qCP_I;f?whG1#8j4P?hH%@f?{E@Q98lfLM{$gx5@9o`bj;!G;KP=LKX&a4q zh7!yKo}}uvzl(~xS_Yfk-Hz_&IETPf^O>MaoWegXRLDMRjFr<~_9t5%d?%hVY&?|f z!Z}w%VqlHmBbWy4hQ~MB(PUxIMnP+9o8ql}&JSr~{O_Gv4M2qj-d3z zGw5zD!Z48bk7>% zc&E&jfkqZHSa=POdpkZL0FQ!!Aqr+JVVRG~1U-m_!Gtp$7)Wi2Vo(M2bv_klvn#cS z4G}L*fKCWNfO&&a_OZM#xPgIzFpUs!rB8H`c}3vzp!w|s(0PDw;VEbCSbSbQem_3L&Dh69*s&~+E;Han^o@iDXp!oHF|S>iQu6#xNvVco&&oT zz!-pyi|Y+MDHysZ!XRX8GkD$DwD*N|xGXTOQ?|GD_ol--=FWKB;hi3O2wWdHtwLkf zA0=eM{Bwf|0mGlAjZN!yFh0)bOb+AN^XBvq-!(#7@?dCetIAaz{AkYu{u&76SOZp!--b~q6oM7Og?JCSpbuwwB97NDfWNNvidX@BgU zFOD8e7Pd4b2{ve;8h#aWkFf*s*qd@j4Cm)2wt^;zw?1Bcv0GUAVZ<$P8*^zYhM;Y} zDzG6jF6JQGievlvb4!pmx%?>*OB0q>aATgtN9*ZLAOf9wA662`jp94%Nm>^zoh=g0^^=WBUywZ(i@uSfn==kMNt)tZ_ zkg++<%?-LNZxd%yFzr;@?o7&6US?)ca51B3XjC|O1+QyrXb4!Jo_=`Er96={6PlaU zv{sEwpOmP24dKBe3H|OImFT9=yfl~0`@{7$CPPPok1EExLs8K5k1<>BpW>3I;{nPG z5~!3^k4b7SN)D4g`aH=e4S^dKm{Q81yq-649hsbMeFn##o=9!EJHcQGVu(V`MK*%3 zvS6GNbhy7Rmgi3HTHRilZQ1f<`X+*8A2b&Lj{$N+!-~T@qSH~pcUHEG?`gzYWOWi9 zxR)hhvmy~&llYIDs;UjFcOp_MP{-f%Ff3OovzR5+wln4>mJ<`BZcweoWOsA@9v|&5 zPwAJEKHaTSxw=y1)H`@|^0yvHO3V)5AJ1;bB>;g%rnwe<;^Q)|{-w<+| z`@~Ge!*Zs%`FX&B;|utqT(6obzAR^EWTbv(a8hD|Ytlq^XqlgXp#7{=Fz+Y19+a92 z&t@|6v~m^qw?Z~H!BRM=Q}zVycJ*j)v4VDLJrXx}b(Qlv50AsL&1-*;>u0)AYRHhf ziw8VDb}%5bxb5DjUiPR^_6H|2WfUW2d;2xPkVn}c=FyKom)6x=3vTnm?c>}vuTN0G zlDR)ERbgCPC6X*8K1~tzGh)zFOVD@1Z2Iwzj|rG#3YK_CLwsEEo=&NmzHeiRUXwM5#y(S&c&H=NqkYN-49qtzU-! z+Q`Kzu9FXvwP^Y!>SDz!P<)p1MVv=~<*_%k5)bl0u#T~Uk?;`~=W$x>&SW9e7AX!6 z`Ib2rVC`o!of^v4zP-7TjJsmjvCsiHrUu(RR#-6x8lJ=o-$bRNHP1OK^FTZeM<2y#aC#wEjFP=Lb9U*K zY_XE~xW%n>2Rl0L>Fyl%>EdFQz1qySTsx{gq&7?O%K9@eVPb@AnJ&cpw}O!h|Dy(>YUyYf#bTB? z=>e|F{u)oE!aQItH#!A#D{v-}to3AEaJlb#7eY@%mm&Cu0HbBiK?gvk?{QO#3iyqB{sKS>w-8cH^2JAzq>5ymOr2jq! z;W@K}yG_kJmHTH9t%<(pTO-vG-DdZGGE>zqSnvq&%!lVn@|6X3k9H*OO+G&z_Marb zS;Aw^s8r_aK}Q!{^tNsy}$#K_@;SZkFRid(6}ez7E}pRjSKv* zRedCzJ0}OeItUpd_l>t=3HOVoS|!Imd-?32jR*deC$-(4>(&{WOb{oF4FSK^nx81r zhq}lKuP%HPS*4N@T-7$5ter`}Cm}2wSZ?|M{i+H+J}H397c;SPEicsy5zZZM>7C7NS*1W};b8ms`Tg(Z z5v_pO`0`^1EUaHo@r4T~k16AxwGen~6ux_J#WKgww{);9zifqrs7=HJ|8MT{zi+t0 zn3<6*$VNr=imp%G%Ie1oNex{jYJvGd*7l~R{@V%)@V2S+|Bts7NtXva5IuOyNhuT# zgP%8K)k|nsH2lLn|L+e3K3|p8f7&Gfzt8$2pce5zx0M6grhOmD056O^9QT)_UG=}znM-TfU7p-OMcRsLw+CuC?Mzke@ibZq3$O32Fkea;2h zDHAOGwfvPK#b{Ai4p9XZROefUz`A$0?B9eM5?`G%C2+f7e)%$;^|6*iO`QP?k;iTW zy^fE6d3~K~T2Ayw%r74gxnyC({0~*t)!gi+2D9OFSHDW`N(YPGPxXlfou1s^Uf{S+ z#&q_6c2!BKpnYaSROSzg6f@^UMeeKAYSs4nA@z%)662xR17q>5YuSc502u${V33C!nOpr_*=wC&=ci1c(K6JHZ#5G~%-f#7QL?SgeQMCVY3MjUtl@dmz1Wk}*|t_~ zeXdfz^X?Nnf1GZvCga#9pX=GHR9zh$m(!?rxqIhx`ZEiSFrOAy>yt$ftEEEkDl1CR z-5B$1R7{>ANNUz<`q2pOSnLd=6kvhVE;nhNpPye*Pq_jrYa;gsU-eh!s!s!gg8>Uw zRTif2LZj~vAeNw+=- zVt#V012xN`9a~wn!ROmuy@Rd8x}AwHaf(c@QOk^nTa1>!?B-FN9Dpvn#CsXqNv^&? z@aT(iX+Uh*X~nYhq(jafF>@e+rJB=X0tUs8)gpi<@r0g*p!@3;$(AaRxIVNS)2lYX z3qG8nHGC;WlIwTIdnXpwQP^k}@hoh_wtKy;J7Q}w6RRVP)XX_!-1xXd6^&VMCW6a( zj|=df7F(U-p_8#p(?H&?X{3|EWcq4Hi14CDz2(Z9px0sSrj_Vy#?!uaT;1J@{i%8fCW_9G<{{yM zFOW|-Tr)^r!=cy}XBz?13hUWRr&@n5_x8SgG$_YA1-;&Ln z|DHO#V;C`z6Wmie&w!P1Zw3lAiRrgJzxmEK^PKyhyP<&Z{YaLYvpr0>>t3y#Y;zhe zsN0&+pOH*O#?V@O$W(zc%Ga+0N@hHK&=cai;|X}RdS{GIxuiCHO*b>nIstFLrCD~t zC9v^LAC7542v{+PSy+Cq_(3sMZ3*Bo{{ zB!J)thnV>5%$Mkhc(jg~cGOAEMaOybm~0~>kY~VyKnINzl$b(7q!E!GwX_=hW~FU| z90cqUQJ}Zaz_Ax5Ah`TM$*~tBwMvtA?#*>H%?iJ-RroG{%v7|9jIliY_ zx2q38LF4zR*+EMXut5?uSN3j(c1;xxo#C0!% znY0h>kDD2|4+*>b`&*me`7=AMAl_BD@pgoh=$`Vk@Jsb0yV$7%wIC3Y^iE~YIS*)D z>0B5Ux{d{-i0de%&Yuym4C)p$r}w-pzh|cS^m(nz;rzx9c;n~uwcUdG!tZ%sQIWj0 zMk|MVTT?M11E2_RKtE}77mbXJtXq3}B%VLkXXQIxczscNnfAp=_i#-aPmd{@%l@~q zLk!jRKKbYS<9Zt?Zlzy(S54=G%M&kpiizi+7iXf8&wmz=FH1)#z8TV(H43A=!OcY` zxz^9iJ24i?lCc-Ld@E5*R&AE1cK^7WDH8*pt`bX|LC$Xbe+<^drrPHD zetOTC4*NL8;n!7uV6G#Zun^koSG}_9rM{Y;dTvUx;8(tw<}{i?T{+Fv;4`l25sZb; z^z&t7473}UW@(!ybNOgEP5EJ3QuzKon8+F;cXYofP1Sz?Lrz-Lx&sl#%aZgzu2%6E zVBflPUy|FKD@B%%W?YR(m%{#>iMgP8LY}Itd;IV-UAe;bQH}j}$mN1N+WYQqN3#dj zDyLmMKH%~FJm>0G@oHCpxA0n3vE%uAGz1bZo9O3HBlE^ity3m0#NEx=21*DEwvW3< zsJ6zA;UITgnIyILKwJxAWMo7z)hLKp$BkuNKC^5qqTUdKzuD)jl8>Z|PU(+TIy4K6 zt=4F`U@B0bCSWmrRpYR4JiLvoGnrJsHC*qwRu>}6DReM(8qfmhh?g8Mu+_SLZAP=O2) zz4N9SBBBIQ-G}T?1RP(P2a%L7Q; z>Bh+VYe%DlT<|kmA=+(4DN}c&gIBjqM!es^;uf!GCLiCtwxOrChWl;T8-?dV#%D`# ze{=R@Ju`Kx+<_^M!z{=kDd`*BJAX>*3u;1kQ!Q&UPNO%@wugR#n0H1W2Xzi6lqc=F zX=#;UtcyfYvbemf?5}wAcm60j*1Xa7f zHF7^qbSQ>~BYenJoT`L6^f@~tbqH{&l<{$=k^CB&n2DH8;+M!y?z1bko$j0}cHw+$ zGXB`ZKM=}1*<92dIQkU&#Cartx5g(r8_1^B#m2&I+J+-^e zLhp(6)T6&pUi9lThO^oo?yoasy-u;F7kAj08u9&Zp5L0w$YI1-s_gb^%syyaE^i($ zCyn@8|JQBe@3UJO^JK}egY+}GKU3ie74iiSZ6^}4M2Mf<$=+s}XZ5%RP&A58`yssktI=$|U~4IZ{+;zyo-7=*yp5CQ*k zf(5Z_T~lACU3aV32ygrI5`bqTzr57qM|we>9MkY8>@hat^4HXXB%Qyp8>oo=mboX*C8g;P*1;*C|c&y=xShXpn2D`LESTZJer?78}KBCv2-W{;-L7euBR1tx0wE;+ROyXYwsea4RRa6JAVG~p^IAZ0&#aF zH(Zn;@csL}*~7^qs|e~+cW?g^;9+%vL7MHtkxFtjMpCV`ilDD2<#UZW-xKttdP!YU zzB^I+LznW4yYRQ}c@=|-5mQ-EY}y?v=dE?xjoO}W!HOz!vS4DqXUI_6<#NFyXdAe5 zm^ixxKqlqOhI^{a<6et~@~hc|KWkqn3Um$ZDS2kj@_zsRUcNguO)$$-o?EzRl0^Js;G;p;ntJP_xMfXku-8{gP#D6>%^5()^ zwz2u$bYrG${DXkUGKI-xF>7$W(`W&iu39}ZrvCBc$3O35Z0S* za54qJhPdjEFkLFbv9~i!>eSJejC?P9Oa2-7PH z7tDi72Zrd_WlnL%m-7t_)dMKzc%(9SR4Q*unbfeEUD<5tludQO?Ea}1p zcM}i&(_l+4aW;-!ZZbLg&repr-d(5^tSz)r;c&DXKrMvr zW5exs$DHeqFAmH8+SGfZl07;+wJDF*Cl^!QYVD`0&++pf_CAIXvZ$SLtzP*cf~*Sf zhi)eYI|ByrrofkxF^Y+j^YYG~Ewml4;yfilfn+>ez@?;I-c`kg4AmN738lXwDO6#i zfIr_IelnCAVPr|ZD;gcudusmTP}sBOR2Ii z%)&wLxe9W^nCr2RkgJ{5%%p@M!G}Ak3#++>1$GETw7vc3dK|O4>2^Uk3sy^0OuL)h z1^CpWu5aNPqfRGqJ=FGTA!M!*KR$MEk!{IWEFXM) zq@K^~aM(OGoVK6wg}x5V5kZ=Kv!uXgRl2LTzygbjQ`o>l23qgHD1<=Aoh&Ho%+>k$ zj8fm7sNOPRW$-@`$Px<#G(vei%>SS)v3sE{!MxY2kX>y+{yQq`}v2#yAQ&Lf?ztt22*wqM0O z!eVxrL;!eB(0MT~CoHUcK+@5%NOh?!I66Q6_Bm5j*1Vu~TS7|C8_074poomQoiQ5L z_h!?f`S$ilbdhc(MB!aO>B8k5uw)3rG2%JWJQf8E7xH3@_?|dgj!Dp8Z|U;!o*EJJ zxgrX%yB-Po-yXj7@1DrTRv0zJN+!IMEdZ3yqa@=_*VhowIQ>hz=jhTOE@S3DJlHW z*Q~E`0a+5;{4_Fzyt(4=Lz%zT5+^s6=m$9uQl76o4bnNuK>$s?kCLy51|AYz)8X6R zo`H@Zjo~R4hZ-4|ZD)Ii0)QO0wYRr7BathOPqJLjWxaL5EodTUvlzz(X26Ya$iDnHA6V^RhHYK{T*)mv%L8QxIqY> zU!6?J%loE!XvBLhVTRkF4B2zvx#`Or70UJa{wTf;u2dHmb?zG(a_U?LA6r!tvk^6Y z7Jk(7M1^7+lfi@e{T(Ukf_LKKEBOu!tiIQa-K(j`-R;9CbYu2i!}ZEgk(~RkeM-sk zZPbLBNZ*lA?VYQI@BFRSn#eLp z`rZdCd^h2!dcjG;lZXen|6kG4e)C7xWQqS7r?OEGLcG0&S}b6tMa?xx{V<+~3`Tp< z^Kh&3vONV|$e-(Qjx>f3UJlH*M=yx^?a{@<9zr$vU8X`?dHO51=1XZ7zLXI_6L@?< zn5T2rg_9BX^J%>bOq6!z=SXBb;Akuvc_pHy1p(98miJ<8paYEwsd8UM0)&M_O?e+< zf5g((#J7`5>Y+cz6xPAC>$ldL?7yxNV0H zf{~F?Y~sGd>?b|(Gbt&cNe81ZbUi>vZv(0rKP%;1{b1Ym)*agVOR$2=af>;sp;00# z8B`XuyIu$sSyS>5(mY%v4-UG2KCayW4Z^+YS}c$~0YLX~#%8{1d0PbuFFm!V?-_mj zJZVZwIjn=fx3~8OK`m^|W`b)}z?XSYmnDHA;#UcUxPS{vWTpbK1#;wssL&T!JYq7B zPQr)s4jJn$xUUA)TLg``Tf&FA%FboiQoXc`mG6|{`9Z8P%T`iXI9uv-?;c#|7$0d*}vs+B`=Ul)0 zveh<>@X!VU9vvJ3xKihqnt`GTawdmE3cw(&nSV34@|2VI1ns&ICS&V+1NxB_&P`ZpX+3mtX zfE5M~vs5(ZwSZ>eF&2NwR6HlkWjg-xZ2?KlY0K$P`wjalsDN6H>z5KfNYPGj;Rgi; zFNZ~ET(phB);os!%KG2uxX?+tX)VFLLt*CDoH3p5sHM& zb7HNf-Jr>kh=`SK$~=>C*~vUC88a>!Gta}CtoOWEd+*=(_k6#<=Xw8m|9Fq%xsQ$x zEbigDuJdz#&d+(>=b6yWlaZ0JAHv^GdKOoW?#839LK+c$@jp5us{{DHw|uWe`9Wui z?a`W=!+>+&OHSF%cpTof?eDz+Pcgo=JMH^&Hp2~_*uUd?367n*JZbfHBCy!~fWRzK z_ll0s(fqkBD^sy9aTnGnOXk?-HLD;zKF=)2ZZpLtezKT|D|=wmpR!NYxO~%-C+DB(UaTnol}{w0 z`$KI^;7Mrg{4drZC9SXdj}zisG@A=-6gPx9b`J#$9r*A|0><%$Ij&~?5a2E zsyvMZYpOSPR$Re%+3C*Rw_KiECvLnbQ9&lgA*yqIq>c2UZ32aY>V{CC-R)ycZuM1K z=pzrom*4H*eW`zOlQMniTFm>0vOUIjK0Dj;^BqI<^@2m+??BBjd{0uh@i0D7&EG6< zhbULbDi^S=e5GI zchu;FnQcx&Pz`%wp55-|HwYaOazynk%=vnf)1PS_0bE?@*rLJ2jv+ z`k}i2j_S5HLY{)_m1C9uTz?Mhs%{1b42%HY5IO6uvq6)RW32CyV`T9_nt6XqCVE4~ z;~=8-kkibpa@Fm`n+o>J-V;8eDzjNY$vx|n(Zv^L>E5do2kx&v33r~a-F1S!66pBQ z=kqw;oa+bK20=I4P`uCC7IVMPb6wUNQRn|)2H%DaLF&Y~lrOKFIl&)G!y|j;BTw4C@*0Sg`Y{LQx6mJ%Kkw|N0qs7K6gXNw zP5pjxbv(j-N=6;Hl?Bt=lZn5D{q04!);7vH8+>mygo3 zaF32Nenl((0+~K>!;CLO+!(bhw|;{?gwk1f{N;n_=Y+zIr{!fn4U1qg1z6E7-1~32 zJQhIZId<>%sQjGew&tkpIgXP(q@v|g=zl5UGskvvWoaSL2Xb&MC)YN$yM}V*X8F63 z-S!`2WNW}|D$V`5!s{##fup#7eRsgu`1|5t(g=KvhcZQ_@;f``dB&u(-MeaP=oWI>}0$Gj$Ip>aEUVPaEqt zP;@>%o0AuE^^oesxWr2x+MGtTYA>M6zc}Nn26yE~whM z`}f}oEwd~sISBJ~>Z|mAW!ICwSUa_(Hp_`Wp1@{Z(<7xIl|E_VQFowbLxuIdZC>J^}Sv_{`fMvwm>ZXy=@wmhwZY0HD=;c(Z zIBwNO8?!{|jqGXV{os4q?>A14=ZZcrb|Lc9Rq1olDplLIFG}Ea(3t6Vg~g?%8`Diu z$1xak@^{5c4=!ve@u11IO|9h6weCPW5;r6fvqQx91+Rxb#bwZVHQ|&_}p&3>Hj(JLh>uH^yA{<4PrSFLPv~1 z_J0}p+zNICS@p$_1iVJe+27G8Jm|^rLF5udTd{B&$eW0Xi1^~zU*2@`V8BH8NKMQw z@P=7@i4qut07Q*>NaX72$c-qu22b1#FTP#Z)O=p$rZIKejz6CS?yY-_fU{ut56GI3 zd|NYm=#7ef*glcP8$wCSVzJ6SznBykB=CWZ2J7gThJ;q%TwedU#p^yK~FUadG7>uP8wBXgG-rL9wBWL(qJ7S`^X79W_CfF!ci6nD18l@;hePt#i#Ns93t zQTp>8*-7o_*Rq|>RL|Ru3vuE=t48kveiFH5-u#1HekX3qK+yY92Fu>3v;oq?9d!(1{ay>E7mre02obUdadC}Q;q$+i^D zJUq1)A|cihcB=VpzGBav^9?t@&UhM#XO3OV&#`;?qCPVv@ny=9pwnt{*TY8TYThvx zOA-V#n_IvITvN8z;eXhWD{Fn6Gg;ush8!y^|3XtsQ|-)^tg77QP~rBindX|icXdLk zm^hOz$uNm(QzW}SI6iLDZE;QO+Obp3(U>53FR-GiV;?}~79hhpG#s%U3#9y(`)vDF zSN^kSAL^9Y739F)@-GlP#Ts6Gras#dEg7wJ`^Vs)ovM)9buy!@>%oGwIB9aLRiLH}$Cw@u5LVtxYyj791C;rYQwXUOFR{RwZ; z$;VB$v?sGG)3X+{;!jv}op6n3EFW+=$&7zGceJ?^@2&rpVlwb7j9VzoU_(3wo0_o_ zwf|B>xE}4}HnQkG>K{c3qpq#47rV_U$o0uW!(5)V-qRF^j$!n=?%86?38;f^n+< zd*t?s9oytNvq>ig+Q5y_$l2`Ivrr1yo{2wBKN%-*6gaTUIpoZ+p1!Ke&tHSG9{vYf zW0+ie;9207r#oD3swO<^4p)is3FM9lL_1OFmD~^g{9Z((By|}_ztiR(&Xu#nO9I%j zVIhZ2_j_UQnac+4o$Q$_JFgH`nY4uP5p9%_0ppp=2q$#bS!E3XeGMHPKK?!{-g)^b zp^>-pEB(#E@;C((=BW1haVKvfL)BMD9mxC%fAkdAUo!WO%kve>=e8a(_h`k#!-q?; z%R3p~^xdazplNQ^CDl=czY;Scl=75;EE}u8Up%hQv*Wn`;4QX!zxfl#6Z0L6V_^v; zX568n-?)w9*mmz>!(ddit+=cG*6*uc9iDNRcuOFO-cwhLPpY~2lBc3OcWACZeIop` zcqV5_cql44`ON+M-ODd2ylp`d_(P0SHWf;17S9rNU-jKQlf=G>y`fk$U&=1>6uTo? zl15*3_@EWuN{qgZxwNR;NR%E>^jN>-ZQWKRo8^`1i+{-*zA=z5POdwRFSnH8oX|+a z1^*t5W&2DGw?I=UXKo+5lKOVkEqPSqqUsVo^LS{FyyY3SO}Ye+?$^7^uYX&eR!`Z` z*ME5M)Oyg((4M4Fy1`-ehs#90DUCzZuyQW-%4@h>puG*R(bb+`9#Q1lDwb*~GU4_V z`<%jijBts_9okN~Fg-y!KrxD0-F#7t*uF~M^<}#4z^F+YL$3$-@=yY9FBP{(eVTUG zcD)_rd>nVin~HLwiDl015wchf3;8@#|8)*8LM9sgi9>dd$h7>msk8r+$99{xEk8Pc z{n4@gqX}E4Nyy~8=b z+UuJH@0!pVsYXR6HGviF$!C_v`W)tOH3)1`@!16ZS~nQexfKmk`qYFRoJx3i7jJ}o zW||KvhW>1rN5XG)h9X=f&9K-=_@QweCq~r|Dep2H);Ph3@9Xn>vR$ix&z^^8AFbbU znCeu{%*jotk$Oz|W&T@sg(zX3>_i@O^mCK#+)7o@hLtVwgw^Ugftm1r! z5m$1Zq__9Y3<;A<1ntOQ8Y8}!DtuWMx`w%Uwz2%nvQ(8Sjv3vBcIZJdsCBvK(iENO zAqqK_w8rl=Sm#H?YZ@VF$j!9^j0%y(4x1*nY)!3+qv2LD!z9YHBxvEvl zP&KTbsW+{!voSW}>^UkU^N4}{VyOA6+M^3qg0ULa5`uD3UeBy@l&DQqEG213; zU$>g(q#G~n8zuWza?}nDQ%G-g-%`;skG&Q%Oox>eB+Vt2&ZKm)o8y1=UD%BNRP6eM zd!5bKV7@!IB7mub<;y54enH|L35clYsdMKjx zUI=a3haJ0&yFB{z*8+|&0ts5YxZ}iAH9<>kT0+(&MMpAimCdQVuEE^>BvH0lgS)}& z@n>H3WNc$(l~<;yu(JR`xo(C(I1w$fpjBs{u=u>YaQ~%Vjx(Qp0Wa(M4pDY&iRl_A zu}hdNKAd+O_e(d?A|41$)J?_$Ziy2vEt%Ug#0-hgV$U&~A{R7OmbSi~BXU#+3_HZh zwn}wwh$k}>z9qHOn`%*8b`0)W9wXOS%%tw7h3OHub_88`Tuz3Q-CJM%u<_8DdFCk2 zI$pOC7g|8DSe3eKPEu{)9g9!gq~x`UwgxhIg0C7w{~vtdENR)*f;ji9{QyX*c1((Y zX0ub@^%$mO|N4jKs+Yd=v3IpqH7%RIJU_EGOZS@cxsS%~onIUxVm1`ejn19I@At1g zpav}P1-+VT-Tdr%@~2M;le*A^<;)%7tOuvepL5MW@5a*n_K243fY0dkFPx3U=!#VK43e$m`ZP_9X3LrKYB!8qaI0Q*@`^ zg7z)ZXGt*?1pFdQzt4@(*m*XaBIemU6%>XUKdF*O`>LU4ny0s;}454^m_3clXz)b=wb7F$1=^StD6UW zGM9@&R}G#sUUWEFti+vGBRgGc6&3MK=u2fhS`0K#VT5+AJ&(ZJo~%IQ=k+;IA8hq; zn}V;sUtpw$zQ{o%?-1?$>y^*R>b{~ghCFlhyH;#(M3=%w^yaAERNKK!;`-pTOetb& zjqj7J*GIIc{cq#uj4yJ|y)xPhGso8O(86nNu`H_1wM(@dV;h4uT=S{V%4VJZ|-VV5p?x0n`~3@ zS$zI>+8(#~=Rlcnq2@(S!toP-d?_CurhNTf?dVqX=h*nf#j{u4qM=QlX|Uf(#?b(D z7J=9t%zY)%+&Ygt@NLkfpRp;GNSQ0pv2})AuZG1&Ree$`U5Z4_JshF%x{EYUTGF>r zQ=69hobx1fxxtn>+6izecD`KQ{}U}x!1wp|qnUrt#KOYo|1cM6*i(9DWKq3xrhT30 zK5Jj*v4q3Z{=ab!2|hXUb9;R8_c@Z%V{_j>ji_7)KI0peKHobv$7%fLZS@@EvX11; zezQ^=nrSRscwSyBxvfotW2CQNLdpL0>BYd`_70or5@oZJ(9-dfq^1CjWt#iYH7@8i zZWJM9e21c}{NBIFE__*NX{6-?o4E&W{x`B!Xvik|y5+}$HdFCEHFi|&QaOy=&y9hb7ukm%KjgPd3>8gG`u{JT-t0i)xsw zbKP?9RLjR*nBUWH$d+$kpa0&v^_F$FuJN!!`ygKgk((^s^8lC{YaMOvo9p%(* z>ucsXr>^P%-}B4>zUH7BLA^;1kf5cKSF>G&u3Y;v6_juHFjifnel=W&w*K8n#!b*i;9$`IJn2~;E#9H_BNPX*Czd9-4SE~6DT$lCA8{vPLnpa z<8f=!m-6`apS<+F0sckoN#>VnJF0s|93Z4ix87A_+(%|isnKU$cr7P>u35&;WUjA%ec&d`ekU*byl%GNB z`W!ft=6NSw3^3A3QspBWcKNF6(QMx$cM7bBHQiJ8wMLaRnu(x%>D^zQDKg}hwj2=> z`_x%3k&{l%bNQP-#oRR267`F=!l)f>VuB-MS1vE_ljPtoQ1Ocl`E0~gdgC^}zRT03 zU|#89i3htG?j8E2EzUY?HxSbV+)lOUSk3$(9*Ol|e9!%Y8Jg`dlIVX@9~A6+Y#;jl z&4O2G6IEO(qpplkp1=1=dnzv)<xYK z^J^0Hj+Nk&>ly6H3rdly6-BF-+ofTCGsDD-0*&*BY6yHZnG0X2NzGa0`aJMeoS!9A zKISG}PhI7{R=|(24i{bLi9Xwt_C246x<{9+TVhi?uUr+V-@n$+QM!5%-FNKhVDvS+ZQCi^29Ic?c*ApMuU@SvaG>As z&$VtSCeKV{&K~_r$gOQ2tI33S=vSXPNYnH#8C)dpf1pxWR*rpW+cq+DE@k#;IvRDG zwp;xL_RihK;bRUR=u4w*6Mhl<&AcAm4_)zLD5Bq&xF@^2s2iv!u{#coPCg?rXROvCSTT}N)${_}cAKqBBH&6IR48~5Z zlm@(fcGQ#~nr0@<>QR%zY>8nG2&eZt1WMK4Hgi!}7iE1XbU=XTz}Cy}^v>?SRH48| zZBY;Om-dk(j5vG?Mt{BA)E|TU!*^G1X30CC-vY)Cy^C?D$=)2FJkwo|{QfMZE>NK2 zYaBG~36t0gLf9SVIq!uobNVl@9&u3A@=9FT23209fsuaP))>>olqazEs?VN=_g6M? zkW>zH4l!r?eS!^FqD42>+HOXnTK#rUS8s88bofOa+YPG#MJ@64wutAi z{LC5RT+qhRqS<0~lkm5JAYy$E8FPQ@r&Mz zbt-d7^eU;x#EP#T1Kt`Xn@~MlmHgNSh1w?tDnb*hs~KxyVcvcFP^dg1PZT?<<}iGU zQoSU=NaaPL_{AR+Q5D(sa(hs$!*BO(zHRWEFL|Y!p1y zv)T8fdR|kO#;yB4xk&UtnbG&zmNkhBm{N=_&bxfQN~*vHt>AnM zxx3>xa)jYUdtKFq=epAd7}jXejzAFr&pXg56B-boe=B7^(6_qWri)xAIW4Wvjt&(K zqo5N~p-stnjF0PZfc)K_JX`Ovch;>fEx}MOBd;pr&qo|d#l_Ux_vgg-zO1?&apKT$ zO%OSC4*s-ZiG(4*Z=Hm_YmHD8O$Mq0#$6FXE?tqWf!cwZsfGU3GSDTmy{k*Uu%oLh zsi2_1V2SVGK^f>+f753sUo)gRTFDx2P@>b08#=hh?l>SL%zM-MPc!WJdpDh>HQ4d+ z-n~`Rd#&FKQ@_Z1s=ynYZlP?f@7dW!vG+zRdEJHHGU;4?Mww>OiHV6j_Mi9-!Q^(n;t=*>Fk^LzdxbT)1c3#fnAzui5!$4sP8IY&1VlZ&AFoqL;Y44 zJE`w)qR4sxm)PO2k|>l-azTL{6n56mzTOD^hxN?NVxW6=-aSWkb@dPxCF{A;rRMgI zj?ug}9Xm(N&2X~^wkatotKD5mx&*^?mtKd$QD}Fg1I2nU?tM^LMAotI6Vi3ID&rRG zVOB|8$irL+n0 zHQ+6Zbee@pdbdYM;gGhrcF_iPs;$IpPMJPm>7XZ%+)KrR8nRrjI5;>YUVaw>6GLl*kc%pC-T&nzDo-7r&C2Y- zM8XcVs&d(b6~&K5_%$zlIJBFJ$1^t$DBpKhrFD}MGcsm54SEU+9Qz}oG>jEAA)4sQ zG(UFinBLW^ucfdPS{fxMk<RCnpM8-VIgIlh!dxpc~vz6Vq3&{4r83g1`evj!FycYPOf){5*VSSSix z!otIoGBPB=9L!_DP|K*>pQtq}48OuYC2GoeOtzOonZcszoQ6R6WaQ6KzK^E?11zsV zUq~4&by+@RIi--u#GLHDV6824%+``*Af*QSwr{7{;>3^#jNiE3&m&s6nSJA$G#!fWSDE zRBikA?OR^~)c63P5ZWWE(+CYWOYli~$2`cEZzo8zWDNTgQ0c#jT{8&zy3E>aIof#fGISDcpikCToZXuZ#S@h0D~` z)CO=ROBL+g(`yP~7&;yv9=-c;JIg6ePGfxJ^$Q!{(HnTvG7?cVsBs&m z`R2&z6JE1gUEJauxl{qv|%8vJh9%{YP+QWF&Z|r1ElAuRfQWuae2#&@_{KYFCT6QAQ)Q z9uAIG_m?Y-#&1x(f5iHS2>&lg_iyMH8_@ghG(vi_!BP|+JY&aJD*n46kL3#vRA5yen6XTiIi zJb7{i%3IlUJ+`}d&-neESNT%(lmN?9HRY!_)u)Na78^@pE*v}F_uM}k@ywX}w3uKf*VA-6bA z078Q0Y3u8^vDHZ2{B)wDqXWo;y(6EJr~QknA^&C*#CuAa?49P2c3VsNIiBe$zi(9O03Rr3LSJK{Q_3vh!hN!hJs2}(RhLcen9 za8u9jT_0yF6g{US+H{m=)GN)Aze91w0J1vom~2bZg(ZxuT}rzAuJvgL>$4WKZ*Wr9 zapmwY6+gd1i6DOEuOF7O*ckX0YpCLNK-%J*n+Z5|aGGH+C5w*&5^IG(RiZ;kOXrXg zXaH%q5hFZDWa1Fl1woB<%ZIZ-@a$Tl<|I$eyA!w7prG?R#OHEDm3R&3yAseo+~-Et zt+SqU1JSUwc(Ccoco<>exS~s|y-wpmy+{$#maRo@<3WpiS$EYV!Uh8@1anCuZ_JRP z=_%RX(WLagOLd??6r_Sq6*`d1RbUcpw@Z}<15o5$w}ciyi1lyb7JQE&DP@YdG1VBY zBoF+-Nze_jc^sBY<8TlLH=;Yqefgb?NMXOH3F4^C%m#)^L)g2RFd0_uAFo0kC0%fk zZ}E&SeA9=sk6Z`5$FU>hn~+;;8DM!wAGGy+NmZW=*pnc^8WPjFQZ)3cno!iYHfAR+=2(zvT3Ehn0i> z#tM7$nk%K@SC?mcy@(97A>Pu`GUmZ#GPENy?f&Fq32w^y%hYD%krdu?f>}l*C*tx0 zNJZQf|1}gHEdksa`pY*Ysjei;CC@;O?*_oVPb3mrwZ2G#ew%?5HGptpaeX+*8d@%$ z9)2b3+HDoS^>>!};c6fxVV$=LoR$7@9$APILc_zYzdYN;QHB`Rf3d!vN|%X*@Whmq zdju@_W7{{D^nvn8XcOrZqB6&3mVeLSARnJw)jp$! zu&{v0bF+hw7H+Q)poHWER7wZV=GKd+b0KGSF5QEi^_q!El$d@ZLL`!tlWkW@!4>j3 zl}hu;+1Elx@z+2DL^X@uCqe)bDl_cBLmt53=(p3e0k-Y?i%$WHYk{f4_EY5kpaSf$ z--;K|R%C4o1u2I#ND-9-0s<)EcuM$9--RlFtRr%DR6>Q&2f*Rt#d`6jRxK%n08fA= zr;a!VbBTpXIy%iS)_T#dwYIysTpXUx9MAu)4DyUoVz4 z(nZ|x7`Q(i;10E(pg|^Pw-w6-`QQBHZm`*Oa&pp@j9r5s%`vBcQ1p{@nqEqkM>I&5 zOmkja`>HUL*9LJga^NdiG;w93O%&z^s4;X6wypf>`D~=%e5~L4UrFBIKO6uEov9tF zdYKC$&#V2?e-yzg8cR-!sXxza8j533MKlrG$LILzkfv+DN7pI5ZV2Mx6T;2vd!2~U zaGUfA!iFu(W$Tc_o+FoQ>%+v|CXz=ClNWNU7~S$7`Vn6fCP;BPPT)i{N_3 zfFv|Snco=bQyLEqXqy;PM7nDXxL#yX5RR$3u@Hn09c6itJQLPxmBDY&U-8LjobQEO zmi3(e1_6=WKZyC|`5xH^%RhLAF>6JnQ38`fSOV9?VrS5yqtw3dQ+8DZ+_efc+^Gl0 zAJkVjT?78Q6_^*CRZ_m+z0Hi_RJLN+e0uzSAD+p$0iJGS)rl#WS-N=1jmcOe6CidU zFC;P?5W)dYOqNLP$W*P@XAP)LA^4$|z*h%kkBA$l)h>=UMD@nts1rN*~{ zz}`m0GkF}4hlRJk38To@;Ew3G@s~wS4(&{JKBB)-p)t?bWyo$eAf0>sgVH zO&9CNkWg9OpGzCUJ~tEi4vojH(M4s&H=J^wE?+kr; z&XIosu7->lc)~G=oEvz{=b!uqypT*;>y`&-gy3DEllm{nJFbDDUOjm5AmXJ)&6z9a zu)qnzb%o_C>AsW(^JXB*xv)f$&>jp#C2g2xM4{Ud4=bCWpMQWwSS*l88E^(x_t1w< z8+-dks6Chi3>J1`={krtu?;I4b=}S{J=PMbH-IF;4UtKU&;71^um`hWln;b zCo5-MIeMo%#|nvt+8|bwQ(}oOewNW1X8`kLT}NuC2g;P7rR^nn+Kl4K6rLat#{lun zQW>cGiytEq2oPe9Ai+XZyWw#j2y#A=bWMH$H4YgWi*nL|rcbQ^HojxBggV5--qAgy z3{es52DqV{8J#|kA%VcM$cta#b#V1X1|F`?8u)(?>ebCsuN)l#k%TSj_=}55Q?{SlLa=qIuItXJ)0hKHg z2^@anIbziqCWVyivo&H#95(Y$!vB)@&7S`WEbCiPO->`EFyxYk0G_&$U6~ zM($m0AV)GVUt=2(@>oG=2n6>syd@2;4DqGHn&FbBh`7}UN(LhI>8Mgh><3Cu15?K1 z^K@7?^}%$3(*|YZk3^)PU9bn5fRLAxnyP=}#=AQtp->|riwG|ot`0~B?~luz#pi)U zb=&;3$=(Z9yly~J1Sa&jyKR%A3&_refa>TTZe%W< zOLKq@2c88(t6QFEE$W3Ur)I?^sfP9t%^M2E%YM(@p=;erD zWUaO9>o&o9yZ^ZPE_nbTw(zkVPdp#vBkfQWO# znxHd4gc8AnVbOQg&!_qU>8XHGMY>H;kyX!P@TslIXRjuZoq zGL{AgHPL-1^$GVn)`6uZB`1p^!M!e?+J=bRATfGFAjw7j#v0RvkN-P;_Q8fjVgFmG zD60g>?6}k;S+?8?4rL}cQB~>IQbxV8_3;vNX_2?9%R9wP6MBc?EPtJUM!~Z zAJ9BBEPpm=+M0ls18uPZI8r1a$!LC;Spzr}8N1H3Y4C?xc3>B__V#__}?n(mUx5S%!-t_C0xGh|CqQ`DPJTTD&zWhQs40mg`v|a4RdI=zbA|L}a4O%?V1g zpW-^)yr#ds1_j4=RNP!M1QBik*K}qfl1A)D^u6=v+a3Q#(wQG&M7zxKSg3Rz4c+M1 zKLnZsIK`;=+y%%xO`FbrBa9Fm2rPZCOtA(z^cb>m)nO{%f!K!RFCeZpB&7n0m)`gn zxwTN!6ogn3Xc*kyIAH1l@&gfw{XZ9tsfD3_BWzmit_d_Yd|VnJ+1>0*5;cxfcod9X6+k=I}=Yf|Xg1dCn!lS%kBm9yb*Q-DGLB_5luvS0dgkjj`#7(TG55X1H>hyan-dVAyea! z5#X7P9Y*HSh%$(-Te#GLkT68g2JxhFms0g#P|rgw{2wq~Fc{)WX$yc*-5nd`Ug1RB5gZpPBhLO zNi}EO10I}ZM23h?7E~NLm4O8tka=KXH|#RO|E2aKU^=wcS@Dov_D*duB2JJQvtCPG zpntqm@^XaP!+p7j45nYf$gUQY^D7WB^anuIF6-fd+p4dxXEZBaVa|byuL{Gcl6H{W zivi0R9NArnQJWmnlbLHv5Wx4wcVZxn4u!-Ew*Uez@EeGG&&A`rGEX5?c%Qi82(U;@ z{4nU7X(ok|02tj)SdP|6>Y4|`cnZ(RWQIjKm3tGX1a%ly7Sc#C1@^Tz=`&k2z6d;S z99lj%fwRd4o$Ydz+{}j{)-up|2@@VD4X3NaQ>2(8i01ut$AJ(qTqNRY>SQ9mbE#QL z3LF*=*76dCgjWU1W#7@qN+4O222vRf<{Pf8R8Xe5zWy78j5TK6u5?}PwjPIUN+Zyb zo|J=TILaszLm=@Jnz?|shCW2;kzEJ1!X>od=u;lKlN zL;^zcWf0UPAUPG{DHzigbn^=2_-gdvT7YO{F+Q{1sm#hrr*wb82S=f6Q{86Xo3K70 zl53-v0dEqGcGnvK?emc_i!}ruVIkccQQc-Eh~a#Zn_)7D%);|z1k6B%lt6}bvLm$( z!b5t8Il~}IfJS1{Np4^;R*(TkQmkf>VnqCCdZZF$o{{!%KKUe&q(MHgU!eks1H;uo z(liKr1jR`U(lT84>3_)~@Njm9Y1dz)89y{=J5F+UbXyamK?EEvAu}8|en4U!iH1@P zQeE4$*|PY|B9V9!~X#z1W8id04+*&=^ZH^z&>+> zJC%3}F;gTb03N)Dtw!=+L^H!o7OB*TLh>20+EH={%|+C=m;KR*XF?=Y<|=u^1j*nb zPn43HQQ!DZws+(cLiXEqG0~735!M7W4_g?nncJr}M-mCY8G>Z;G*yB6-(4K312@|E z`{wPgbt2*X?NpY4Mc8P=UkX9iAOvtKg9`}BHfK`1C}`#|URd)fmq1TxwT3M`4ML@h zJ!c{zc?<2yM>TuKr~&+^Wip8y4{*zI=uu?-wn454lIwW}HM$`%9%F!TESaUcy869m zk8RUjk4npmQUsNIl$u}d-$6cz=&Fd*1!at;UgH3BiqJvO9ZaUTA@PadY6gp>eCtKD z2?{* zZW2u#i$s`nu%4kCp`41(>Sf4YL&m2gWmvS%)T=)X0#Xcq)f)d6`2xwX%EB6TpZ>Ps z*X8v6^ON)p=9LU{JKyUJj>YL*(%pelZ^{yexrF4@GRszt5pRx=A0%D_4L@Ys8}Djr zYWkEL9RuLG_Ut{ⅇU}*eg5*8kTVV_+8N0(A10lI?;;o9bjYPm7{sZOaPb5u~%wg zED&8p*hj6S^XW%m2u+ZlUwnk7|Az1v5Xv_sz+GOJEE)-UWX7!ARL1DQ14|XrL@au2 z4$+yxcKTDOCaGy@$Ez$BK8)wDQfEkjId6dZC;)r5+c-@_QXP*{5@6@RJ8-*+kbQWv zofm?)Baly~EGJ>G6;oa(Rtk>|B&0(7~w~}@!-Wp zU@Zzyk!=h}%2C#3=*!qzoFni)=*nm-osp7q4CcB4jG&CU-Hj35MF4YINSU}qrQttNIY6q(C8;WrnHy_wgaL+| zKsP8PokDVuQoi4}b9nTTl^}r&P1oxov!ujvDl@;!2@li@{vksr?DLrn>9ffX_n>3? zSrF7--ytalfZE&5-zoTmD&WX)_+B}QK@w-s439{00}(%=xL6U%Uu+e9-$Lh_Kxkht zdBalf*5h8QtO&Jxv# zI!)eKEkv?m?lV270FoU&n9L0_Qwc&eDH$*$kOaola+~krtexzTd~dX(M}6=ibv=-o zA#jkN`_19y3MOp>3=A@_!pJ_41J>_fBY{NI_fhl%#!45rclabo{1G5ks9HyR7$Q+S zIEs%QZYFMw)g&lRj_i~`SU-}2K&%?T0DC2$Iq#3TOvEu*8Wa+$!%4nb1hAVDE0c+* zki9qYFr7h|jr-+{8K)%p8kQ26k24};|7XRy=MENGChJG{zNIgY4t0;4I_FKn?u^f^ zRoi8Ly!un3$BfePnZ9s7j_W}>5azp(sX9Fxs^6M#*Hv&VyBkThfyYbyQ@I zXi2r~|Ua>LknG=Qb;fBkKY5r5h1L_E2YrmfvRwE_)b5qQNr3D zfVy<%c5kOdRh;S1Z&4_J5+tgAF7^LGaQiEYQy6pDxTQXT^=bMl$*vHX*F_8b=aaG$ zM}q<5@@boJHhztsDJ-%B4>|akK(PvuFp)#g$oLIk^eLNjSMFgYMB%fHpq1$`iyQuN zrdyo(PnXhkq`UhgrU%|x3>WN;DoAR{6RH}T-Mh_7%=1U2n*ZWnMNh4IMNun(H+zlq zU0F||o(82~;P_yAlxOXWz!SBpO}qUnJ$|f3TjZ38b@vAkWoV1Zl@d&yHckdinK#bP z%*ChCzql*go>Tm_Sni4BJ~8*nckVwT_tv*Y?!5kYzJC6y*sPArTJxQ*vAtfUZ=RmV zhjCd&w>k{&o0!j5we2ZB)N805o!DQ}I!Y&Aj-7h0nItF{>ak(UN&k*y`jG=hi(Gs< zO0L{1uK^V(npy;I&beo{MfrsEH1y$4)`w5A zq~X|?0h-2znMVH%HD1gS-#d0NPnR?t#I1FIrIQxV z@>;Laq+s+cNwnWzrf>IC67)UKx+T<1Lt+zpUfq# zeXe8gH}ee`8_k>7{TeK%Ai6 zz12G5dgm#zspXQL>lObsW*H4YF>i9SK>KfW0sJ^?o?3ZL=EH zywSCH+rB!E%p+)K+g`=DqE7;S#`1svXFbpKbO%2b=nedPoSwj~iwM-~|1f^1HphJ5 zKZT1?Q#>*etQKn*X{Zu&?dE$mikN22qROxdh12FQ*Wujg!&E!sh z;`6_T;walA9gGsBP83;xT~Gvnt#$|mhdLs$;IQ4mK(K^Kp>=OTIZI1cGotK*?gZbwrwZfNjkQ@Vtd85&6|Dh-rsl5xqt4ne$+EZ ztyy!_tT9Ht?-(`1f5?f$f5rX^0s;arDIuZ=0s=t*0s>|S4f!eQks8MPY#>YpWd%V% zYGPqu^}l>x6BSEtasC@)rU&1*1!un@6HGZOak%z<{D`ORd(` zIJbG;4Ym7bwPmguC7zPwP~Uj*Q9P`V$ER~T9^^bt$oB{B3`(GV4HnA(@%`^c0}Ege zGKJtO_#fg|TSJN8a0LG$tLJmh2(0z`pF98RwEw|q1>{^c{tw|5e-!A**}pvl?dw2~ zuyV@zkK@39mqrAJL_FpF`>oq~1^rO}t{ocGH!{9|RQ;DW9KXI_3I4D8pF<0Zh9myR zln|%}x99{63z90e2WR%E)z9EIBOIaP?3yEsew-pQ30;+m(r7gV*XBjXrwL}#bzm+v zp&sS*X>$Y%MK}CCQiSkmJQ-hq=hg$oM;5y!Y3B~Xey<~B93g^hK3YV|<`1SD{#E3Wk6Fp6ND{Tfa~Jg)f={n*Zo25Ho$57%L6v9ri5#Bv z#kuzIxTVrYD$@=3R0$9SGKM?*rs@s+!^;vqn8l_lo`0Wc>j3yi64eW;XwzenKj zwY=i*A0>+Y)n5l*N-m)-t)If=Q*nkf<+!_FCc-Y2go;`wIB{?8JAt|YDl z0?mHl<NAom?5rw~Ouy*4(M(8d)cdudj6ADa+w(&xudNbP0xpbLWvp$GZa! zpfxO>Yqhzbv$lLNH=_dpyK<9pZ*n)9w2il867opDle$(dM>VEVYf`H|2N0=9%m zjE90(oy6o5ydkmKwgfnVi}Z%~>>D@2_7HVWe3NT#2$m!3`+s!r27J?`ur(W^RO}VZ zAD2sK6^`Kk*9$;#?Ybqi*G)G@v`alReZ#6yOINQkT-DRGD_(f$j#q;;Sn!c0+6l8MRV0FkuiY_2VJ~7fS!@wyA~l=bK@K_G9T; z2{^7DYVR;g?B}C*y(WDWxz(!>L80%aIgRo)LNyreUt}Kdm|qWO?hpN<-n&O_X?l5R zg5#GOqal#ETJwwkEjawznKErtdq*<-`gLf?eDac?-lS_sHiZ3ZPpRwRdUkN8T+`G6 z=#+mFCz<8=7TDg>ygg+!ErB__OmWa+gSz0rm(gl}vDm=Qbhq=Zd8JQag0IjKoE<*& zY%*J3M-CLr-MhtpMW|57{br2`?6>^^xO2R7s%r z!iAE+ecZ;)+gAD>yI2w9d&2Bd5%`joW_8%v#KcET@}JG}MS?w7_SdLHKx~m|6W#89 z6IrRZ?Q;}Y@Qp8bx{&E&qDB&}Pbid4dzfM9ZLu~GVb>LF=u)X_GlrtHhOs@CiSt{# zgMR2*U{zy>A4Y@=Vhq+AV`O;>~9>N|-ZkP*2ueQ=)+UpwZ?hT_rV@*{IFj z8e)^KcO-UgH{!sT`=UH+46X;fo-kDUZHxz=9*oS4V{a%s`Cgo->tWC4(>eRw^M&Bm zfSCFd&NThRiG$Lnx9FzSFC0QbDd!k3i@x`QZq$x^Ug5s4TR-Wz4@g7cPo-a6@PzYF^i8 zc>Y6OK(tRQy#{4r;w7o!hHs}~(Pt#FYGd-q8gb5wY;X|qe7{yx*HF~Dug$#H@Xl&o zmp62{s$-mXvTNvK@O*m^Idk0mKx^nmw2hKtiFLqAO+!$up=z43)&c*cbKl}TS0`wB z(_#90oaWtvIkL24oZ6IkNHbPfL@)^c>I`H`B9E@v>aD!Y%w z_AL^Yi*=2&;ACTmU3Gn+oEnUU#=WR0t}H=lfks>W-dxpQsC1!)LT6$L7VS&C^ZkEd0q!)e^LAjzlXE542OS@+Gm-~Y2YL1+C*YoV5xUxa9nk*1 zB@|&OD%19m`q~LN;PLWW5xnbofaeaaAuIiVffQ+hIcyIWl_0+GwSQ{o?-$M=fj zIbd07nmRY~J*nJ_(X)~dwDo0ROdU5AJjD21-(;Ba+`jF{s5uSJ+<}HE)<0K#@xhMj z29ij8Au+rmL4iw^w1Nwe&ty?YaczW&pX*3iMS^mh-UwnzWb9$R^vFNX>P(IqDh0g4e9%X2yRkBZs>hlZD3L8ug>W z7;Baa7Gs(5Q?U*gx zARXS9*PbT%e;TAyKOOGet*#aV&JnlnAxq?<+q6id_Sg9arg-tkwhky^>GVHH%S>=LUh$2Z&=(DY@_1vhGwndb{5t&c`Zng4 z0^{FgXu85WU(QR?e3uSrCUl-OjiB?0(&cYR2%8(i_C>sCx5EombXmAw(S1vzpyQ9- zkgY0mnLy>aKQn&yG}u0;K*8sV;Y)O+IA3NFS@0Uy^?6~-lx0}Sp5Wi`NE&Uu$U4`x zKWor>?iS5@s1k)3?c5}oq~FW(gq)tkisnnb8A@E7GLOX@Fd2G>1jrNo8%wawahu_JtaS{me7A4|Jc1~n;Y+Zfwkd(!Dz3a zAg$lz%*l2nLyR#e1Rs{-d!JovS1}_F4wW9|sThF(J6rHZ8#-m5j7ypiZ=bYDCY^cw7 z@_~cMlH~=Sm&ko&5W1$zcOwj;)8mXt_XFLwR~PmZjZ(V=ISMXZpt+m%e*_Y)oCV z2DEV2Y|w>+X@R5gWMgx}cZ`$<#yat=;#sgBS%f`TjKoQe+V_QpeHiDYflO_LRVHae64o8iLK5|u`N_WaOTIyHi1aG`~%tToJ@*lIvN*b zX46XRIp0%R+v~$!6kLd$9Mzj!lGJlL9uPB)K*6K#^YDs4c})9M7M%#%W{Y!Unqk7! z1!s<*-uNZa%`BGv73m?jk^lb8a*BEaKwDg)9-;!!D>>n5IpW?3az;26C2STs%HSWR1F{x3F(Y-cvPe}S8 zC|8EKT$DKnyvnn8C_G9M#c!bO;y`h|TXdB&y>DzD-OYp9MFbt2?Zjj}zuDi0Wb}Z+ zu=vX%T2!t;SDyhhNsS>-0dH)7O!w@7yr;e%};OXgQ@i&R%RXJx%P zK&9u+lGSnez^ii{67D(I9g;Vr-4)wiIAlr?$|xv#l)c`Lm~sf0HKY?CBy-%7ofX>Z zxcV$oUb_i@OaF0?-_*>J2M^&gCoJmTNn##E(r)F3r=$M<7Xpp?B+)s)r$`a=)}SuK z*hvhvCI3r6Ln!G?)j!e9Y`424u2*>D#%AYL$5Z1A1}XgEj8rEcdU?u#s?wP(kBbEq zK5&spj<%SLh!l$b99)BYqScP(Vd6O=uOVjm;-eey4eFv^hQW8UW7uZ7AuF-PPRIh5 z-fq(w!`zTlzd4suH#nU3J%_Vrl!FBSfr!Bq5>_^R-JFO0!wObOf`C zSJ^{OU+&eJMZ#U&cHjEI~hGPj-!SlJmx$O zI?U5UU7qV=pq6C}=gMUcEv9M>(9?6LqCT4v^zyBOY~>%AFFJTUc-Q;U?7XPznEq5p zJzgL>`;0l3+!6NL&L&m$)eCRkBMi5E%?du>{w7l5lu*B54{tfnR979IiQJ-6K}VQNnxRCI;H4kmA7Tu)By6B|ot)qW7?N0A8ONS67A z5{aNRK_5)t8!gRnkA+M2?tLG}P3fZDD7}1v=ghmbjI%nEMi3T|Kb2TrQr%dZ9-X97 zmk%Eyc`v#C)?#|er^sv$Me-IFTGEI`NCpfn4pd5t#O7n(;rozzj)vprL;t8>rtW6( zrnA;ZKduz9=Hvd;$4(~3yU=>xZ1ZczN%Q{cyAi%3B5^j~!P>;dJ*$bvXv;?(dnP&o zg89d?s`Is3Vrh?5oC_yeG6E@qD5UfM=*cU$jyvg_*wjs zMW3&bOWRJ%Q9lS}5AOEEFVcgQ+rKPugqzQWVo$vJ$Id4BU8>pGOy@ws=`aeL8RfHFX%_sH$ieju9Ub!C<3 z@kONACBR~o$yjCxt^YX1^IzEXd6#c)_=hkfd=-y_iA>PRnEkCqtir1Ov zD3nDgm(Y01ebohc_!c&|;Z=Vt!^E;=z%5dt{2YZZ3u_qAm?w2)WNe01Z*M_(kTxA9 z%X2wrk&vVwu68zQ{-5k& zfq6^;g(%{hr?bmEN?RD##-BZjNb7w(FCvV)J3_nfAGgjWDJw}{uUxrF+=M7to>$AK z8{3A|^V%1^=B0N%;A$tGo3jm!3tsnMPH6pI53g`jDM13)6UnC(pe%PVylR<7GA6r> z0>2QO!Cx zQiwxUt39nfM1zInWAiv&XfOJ*Gu!$|)~yO44DwDZw%K39$6gMw)7y#4BZ5huwX?fD zAVHK^v;O5%+_fH*u#0W^l6Nt@M?TFWNPp)-2aqR`$@y3qhFwmz$(Oja0R+Z;egqB@<^A9Fh_m&$!s0Byu)|l?AdNj+=xwPfEqu=qnwfe*p z`AtSjEg6DC^C<}AecJ8BJ`y>9XCo|d@nth5Bl7D6h7+2fn#+v5>L%g)_3PvEGr@pz z%48HgY_*f4hY4UY}@_@0{!zgFeA>!`)$Ct3a$-roX-oxa?_DOFVSK z?M7%Hk!VN%zYrnvVxn)fS8>8fs2j z@BG(Mu}a!`>Y=D3kuurP+Y)W!MXnVZ3^u;LnGzVD$>ENm%b5BcRo2FePA@(&G!bG<#b%R(sY<&Igp3i&DT8} z40jbTY3jyG*LQDOT&F5xay%*d+BcH&es+5ZY&4kwSJ>H>GoWir<6Hi`s>ootmc5)N zpO+1+izG6c-{tN0%&54X04K3V`MHPb#LKW9z7|WOggd1%bi1(p-mch-2d2ZbWWDdc zIa_k7BBWV5h26B|MRb~B)_jUYRwdtPP+#f!N-dldGxvP%c%&DNn8T0 zr5cALx@L@LMloXS#)V!g@dM5I%dGfV#~yRUnZ-V@10|rHJ1rbFWV+lMQc@$TY+JS> z?St2G4ED)vMH_DQ!nQoMhy5*C*V~=(LkFF_@k!Vw^A=A&CcC=^oGU(adjRHVwey;y z$+(ta71aLCwL9e>V6z5=&wF>a4DU^>m!^5`Q6P8P;~FfkYcD3G(+F|=_xXWLbv|3V z&T_u3Wa&C&C=dBwUB`o=%=E=K4mnKjIDBhn$!n>CWS~0V4axeG11wwK0UjV6JhteO z@nzrdWwLW{#RHeh^fh`mq;o7T92IlE9DZ2t8xDr-PT~dI4{K>8pJyFUf<>yjfAXCJ zIvlk*=%Q?=28(H>!NgxIHjd+b_eJ zk3Nfz=I#~CdVNIb1l^OrM(GPCeLp4;OEN7P^lvAd>ks_doloYp3)hx0n6}<8o=6D= z-8lCvln$FmE7yrk9ZX6O+>JUiiCI3PU^)2ikl6R~#f%(BHr993gDhL_f(MkB7^PCY zhb>1tSFKeqLhh2dcFfy0)}%{!L_fB?59BJ_543;Ia|Jcg8XZ%^rng$wpe)w#fq^F? z=2=jZJicYpZ?@c{zq1xX&z2e8-9D)CdN1x90HF61Dh6A+Fv1GkB?uT+#Dh5 zyuRqR*96eJn5W8>vm3;pwTj?&^Va;NH!-)S4ZbP4Ty3Y6O_7v70?Z?B4EGCX^c1sl zbidC=)v$Gv2>HR;n!eAergxnTHz-4DUD4~jo!KGec~booX&^o$4R}C3%)?a=^_klx z{{^%6Fl1TVU0&U^TkdWC;^na`#m(Kx$6iBhm+||x?CCE!eH@q~qi<6UroJK3-0`q4 z1UZ*?Vv~{02Ddw}&zVSyqq}ptx${|@@8cG^pNM9NFI-w~WVg!~|5s>SUq{ktnu?xe<8My~@Hxm)7Q@X1s% zU!WM%(W#^n-X*VW-UC@IgLkjI`$O@f+s8@EYqbr|UITh7fBNlf%;{*4G-MVSjO)oI_YHeG^imH-`ikGy=Xtq<69x|FEg?Zx4F0NJ|S~2uBLA5ix zU$v@ZsGPuiv^_bKvXXHcWp>zdF&+OcgyE?FKDw$a_d(>s0pGWMa;tCBl)-m^yipY* zjDzdFNM^<9Xq*v{Jt1H92xGWXFIBtH*X@6syC2m6!lmtdWqW>78^q3^F>(4WIB9-` z)X24H@^YtT6GPSUD2;7o?H^`YBQIin6hlH|QTUexfnwo06qEl*|EC({n(ceog)jS| zgi_i6pmi+qpWTxGQ9b-{{QoEx(mL#n4^b{%z^bI2bF4aBs~{B7y6pM>u|^15uuz>Z zi6HS`z@-qlv-~HjPDlPfRZxXxy1(t{|2DYFNqhgGm48hpA~2$+@YkVT>VMWQKNyJ7 zVW^s5|3hmi_`aI4K5fYV5PtOiO8f6Jr_{d{20yX4{SSrh|9&XSf7WnumEnF2|Cg#f zaK*g;K+k_^&yj{H<@rzk^q3<`|@Jy6o)f0b?xaL>WMXNXBj(x8)IRtz*2h>!@f z$oB0>c3(bA%E1TMt-9rBBmyj^Tg3`g=bVf*7QtBnaUK<&+AdAJ@FZt(aAfF%Y(2 zbO=blh70HckzxeZS#XqGL?2^`SZ&dcIJH0ZO49>~RZ zu{4m`w=~loj}8uexx^m-K%ieiBe%j?is`Q*#syL*lb8*ud`F;6F|M;ZLwLoHzO5IL zlD1H5B$t__d)hT7-R3~466gOcVSg_B0CJF2!!m^8%Asf#2{Ol1D()OiM^eHzo+jnS zC6(keps3iYK(@5=P|^^2oq3uE57`4!$Zh9EJl^GtTm=$EEgJVF-+ZxezL_O^?=_1< ztFDZVPLDnYk%dJc&-;Yys@;)!`}4949j`sKkL}4ud21hlQo}+jA{D zBn#qjkpc({3+UhgXd<)O5)eQ#K#c=1lwhLEetms|t&aelu=F$&f{7MI@Our0oV%{u7 z(`I9a_0M9EVtMPrL+PyLZ7gmEOI#0B9F7^k>=t3IK6??>EzUcfP(HcA7YlrUZ?eY? z)l-M5Er2MkMPo)Y(rFP4ybD zi&S%f{rJs>Lo`j*B})HFp)qJzahCWSiDYQ#WZ_vgA)&zfZISe4 zQBDNI3Skh!sWqz;$0e!&N&d~x+A{v`GM~tq=-4`kyI>=i7c%!R*@%(Q3hLUU;Pl>N zZ3t(Qve!yjEKd)a8+Lt9hLYD!6W~KQx-^@0E#+8Td5FvEM?9CkrK?^&HE?IZ24`xL zHQ$7UL`()dLSU0EIMH?`dg%TrNpTd0C?V3__NU2ne{_9|P;7Rl=Jj|WLHT8o>>zH) z`18-2AIVl-EdOAmEF>a+za)hWDK&Fq(Jm=mp8#Q)SYeP5QRBtZM5{B-<6$4-;p%VL z_aIQ24;SkdUHm?`xCY8VUD(kpWnDNd%O-VQ@3)J7-Rw6)} zI%>ZKZJq~+_CaDEf#UlRkYLOi=lm<*?Ur{Jqj38l9L^z%`^@HAHQ zjCfyI?e>heGS&E^`@ZQhGcN|Y-Co~2jXhtfSK-1~QT`#vO;r1I=H`G3C(@WG+ZYk4 z()W&8f#yG`_9y%#@hC;8zR|7{L=a#|8DWSYSE@uvLV4y_i;t$-sHEhN4750uWPDYn zKJ5$6@T8a_Asc>jPwBK}wwAH8o_$e+VK%_9279uC)}$g4ehmvQ4+Ey2Aez%M=r_pw zKDm+pdYay8%vSo~>jc8a(2-521)Hyg7HOa0ryDy5mQ6}cVz6D;3PHe=>GJU&o|x##jSH*9v1uvhX1t^zPsqp-)A8Z!c)8UdYP259 z?D9LB?KF4|g7JFZW0%3?4+;8-Ql%03mkogf;=P`Q;k&#oxs5?Hb*xdbHYl<&I_V77 zLqJO5rGSa4%RL-crmS)@ z8v}EB8^U#S69Dt5b&|K|?hd3udg~$BI*+Ykk$~Ono_583YdHJCz7zM&0Jf}E{%Bz% zxZs0CJI_IUSER9maS%*j z6~T$tlAS5Ht8|b&-7=C9kifCPu|)N@WwHDyo&?*ma+TWleAvd{Gfq2w$=Ivb1Z_tm zMl32@cZht{l!gWlawix&xWE_+zpZZU##ou~@62 z__@7tdnA(ldAY75Z^-QzJ3jU!bU&gG-d%72aHsQS&a*7p83G`(vq*k;u5AnmJyZ=4 zEJsM;8I|j##^8&v9FNHKnjz*H6&h{+GutRXD>WvTO|u0{<+3GYCq-uOp2OX;Rl+q( zTbodF7o}4tvWG`U4W|nRydK7#V(|F=tGd#MDJkSQPU%GX8$B8!8ArF?Vax!MNsEU()>n3Iq_GS$2fg1~ z0M)8I;%eccc-29JJHdvUZ^^SQ%`bmca>V@Xg> zI|tr>QGe`!6iN$id%`9vE^>LJN_*gx^ug2h;mbHDBnA>GnqE?L$ImGr?Rr=T0idon zM2o1wjx$2^4@t=$hlZsn9>?2_0YZ%(31oF^CuR~Fff_k3_}Uq{* z^NCnKFCx5P;P8O)C_L@=N93JqI-^Bhe-5TcR~YkMug^szGUvHN1`TMjXcY?Injt(? z)hbqgjPK|sGus-bh0>c#PriQeo+OhXYcKwyS_z1|B5dz(@az`QL;Bd86FfF3Ri?)7 zgm#5;LW1AZsu}f(-Z*>rjcJ2{W&R;l0Fn<|z$(ada2P-{aJ6?`vD}13Ua3+U=D)mpUknzX{rjaym$$iI_BG z5rtLuw&kLl*b#}-G$#O1x|;OKfwlodv5H<(0o2p51cR5Q`iLIk5yuu53%&tj(7B6| z-JpQ{hJ7bRsjsTOvWmB=yZ2{)^wvCJsHY&%z_nw1jbnE1PIW)1hoAE>NNf_39#{Sn z9haM}RzW>5`6t-n*7W&>rc6N9kErnpC8txylKC%T+uJB~&8HUVe^&dyV7vwaZQ3Jx z6Gk3CS{JV|u`#lEGSZA09ebqS-;YyQuVzs*jX}3K5H4hOdx5$WrXnBbS`Pd6Xer7D z+;VSE#Yu4ZtGWDZ-lz|+_pVuKwj&06g{_mou)oxGY6g7?)PU{sC4j#Jpuaq4jFER-36d zd4e)a3$;26GRfg1WtI83oSd>__n;k)gyBfASA7R$Q5ABXSiK=e>)+?={!JrJ?cb*8 zBuTzEG}C+U&?_?RUk+gy^Kkyj`Y}e%J22X9$j!{o9-5p95dzY6E-DlmWPprFt5D|o zbfW<;EkxLbw3hA*6e;Q!9LPK#^>FEnKxHMmc;t2(oI>3*WB7m#lM(T=aYRnq6@9q771og z@YI_tM3haEt+roOvi>0>3rn}kJ(*m&I-ivecV^L&^RDw~Z{`B(7KGO57yYdWSoG+D zkmhPXn0^pMi4MV6C<;2her-CJZv;lr(Ee}*$izgZEY@Mw)_1IEmA>gQYZ?Gzki7Qx zELghEB=;K(hx=gugU^#(rsAS`!`_ww$P?)-D5PHOzMP01ubf@xAs|;1^`h-hPdp*F z=VR%KVl|&(ZFXgij2tO~$>`C}=fn5G?!kJ|as3O%&ADb(Qvk>or2E|V#43|bXE_Y~ z4pO=*cMR!vTC^73@jx7jh-bBR=D21P>aWUt%_f_Ab6`x-oD;Tuf4y042ZU0BfT<3PI4YXw7P>w2?Io`UgJ+z zLy{o+qudh-s%6G=Sy1vEbJ#ivBIp(OkHE9DXx8;3#-IEgJ#wH6gA_<`qi<5B3p zFNh5p!(cZRXSS4qC@oQDA#64yxabTQ>o2}I?Y#D<5KGwSnYh4-T0;z5$Cvg<@(F@% zte6Uy^eqYY^7&0%#`CY#%=YF@Au>UI#*-V{+n4Ec1B12mk_tDn=_xCxUQB%8o!2ks zM6tmQ$L`3H0U*19vaX~K0lcjgTW?vICbfuQKekoLi428r$*-u(Et}O~5D{)er;wUVU!0Oknfx&vFTu`vE(uu1 z>3w}E@OrK{*P;3$Iy%i79fJoXI5%Ly0X8msLG&LpT7rvg_#`DsF!I@A(kcf-&k3oM z1(Dbn()yFJ8n}g-^BH7dNHc%GTQb5z@Tb+BE)LYke0=3Y@$x}Ic_k$$-CV21t?@z! zA;uLP9qzZ(EF#|}YNRi|h7F8Do#;7{{{a^C$r)cI7-EvbM+M0g!)o?)Ilyy@xk#bW zCd7vb@yCTw8SSjXs$_`qioW$k&G*X|J@`6cRQ$>J=(E1X{o&6}iJ_nDkBae4Bq=)u z-dWSB^f1pj@H@yK5D^D0Kcvb{!azc4fpd^jE+4JcYyXg5Ve7YYJFv&P+M_wgZ`7$9 zf{G(bP)T13e>DeX`hGBoi9ww|zZGY3*`razMurc{-0y|{z=>h-`a%Cme_>Or4yx#V zde>BQ0yf&1hceGN+V2LybUvKe0zF*yd(()lmUKdl&$yw@2+{JTyLwjN4!Ul3Q70XC zF5ZWg5Z8to1EEgEWwmQc<=m!N;KNVAkfSryp7V!Yh>H6g@r-yD`X@NRYXJZUyL;xac7>IZiB z_NzV4xIze+ALHuZVjFU32WrVKXb~<$hl`aVXyJr+(hR@;I-iM2ZtOJm7I4(Yk`cc? zh?RD?5hL45BmInd-8I=uN$IlI$}F@NU$L95SgJM_TI5(*Wne&oh1kO#tqkPs$z;rI z3O_CF`%Zvr^nSv@o1LOw^k{d94^#3v$(&15mrsC zSlBc34dLW@qN`Ag8ak+oS8jmOEeJ>dZ66$(z!a7t0aHQ}zJ#s6Me?0jgf<)tRv#ck zHG56@6>_P@9Tp!{C)R)Cgp9FA+`)nAxA~Eg_cd{N&8;@;TMT1{Yn zm4-=NwtGi~k?>!5%Fn(i&5D4fsK4$?&?xkTk_ujLkm~Y~oP%6QtUH{@8fuHn*igK@ z=8pvnB{DSWi|GPgvC%9d{Y?3bt6L&#kT#@C_JFXK*VJ}_;F>8iBrgmPAYlEA2rB3( zgaGywq!E2JyZ;m3(6u%V!Et{m7bvkGIoKm}hm*|*CyuXM;(A}@(#3p;G4%q&RuVHN zV>A6*TcfeGD|Wzppuu09$S`B79oEp#Jx!<{w4AYF>_NAqC-<*b0Ch); zb*P1*@Oq94oQ!pHa4YQj5>Y*U;+Mczgxm9#94zKiMkBos^u;C{fGY4(h!{m*)=2II8%+)=OFtI#r(2fCrHW2wy)gmi)8zww%B>>w+07N zP+FkkrPx;cC!oKZ>9jhXM0XoGZn1^H^WG+2|CO_lvYy+DRT^QB+Ln|fpT3|Vql};1)nJNj+|p(Mw~B^9mWRdrN)jeqZE#67BVzHGMmxza`)c+0b%jj5muCVAw{NJw0=KXkbIr z|FLm?m@$D^&VD`R%YSe`D!18Zn|i%~adWg>jQ6=&8P3Kq6);-n9{k~M(mymRa&vpl z?_xN+q377ia%|oo84VLDASJC|y=~zeGUPe`OhB3g4N8Kon|!zyO6+itN`e#NMW6v& z62tZC>guA34ooR~acN2ZiX!MjHQfEH$m^pGG2^};xEelcy1gOe#mdRaiS0I^05Y-!18Z@Jz8@Mo^gKbb zzZMjK#T*5J34%2+29$`UQv{Dh@klwg?MO<4cYx>gIxp3H7aYweXAHm2@={!J713qP zj)>4Ofcb5t)PQWmvgQw= zOA(s3U&%o71Gt)s9d79HANeVL0kRON#kOAZ969Dg4W2Nhu)LQt(C`na!Ri!Vkwqe} zOJZU3W1dLv|9tF z1P3$bwpUX-lSIYB3}|ZyoTPa$f(*F%(xr`uA#w`pthzKS{icpTr^_{=JX|lvp8wdZ z#+a-l_Dv06l3x$j#WHyxbIWjSDbsO0(0Djm1#WcZBEBdZ02`@!SfW1sejy6ES5LEq z4bm)L_qx~>DQ(1w3zdz}%qX(R0;aq%JA>SUe4DiJ`vR+boUEc+-`A8i1|X~MG9fgZ zQ5{A>`%=Re4*+j7pWt0iM)P{*75n|!{Yx?tPOP};lpn4|Tccs|@0JB4&%StNE7)dq zS)sVyMJ%R966ypd7!mu}4It%q4(go^+PFyn8qcxe^~D}G*b?@<#L;@Zg0-R=hat0| z5(=|5RpY#X#fZ+1EVN$n9*M6JV#tA&@zsptLQ;m-<*Lg$7h|GyjO0M!DnpDKm6ViP zgBNGvV63ZBEB*kDwr+$qXBJNhj&a@Bc%XC|@+ir;Ut=BT!VwER=yGHq49(DQ z-(fj``L~=BqdllCsp{G6G&0LeY;unHX3YTjnEt4gowWHw zd*634X0RptZ$5w+p%tUf(DOuS)Ub1&+?RSQ|6icAyCusqz9>8-8GPw*ZI$2Tl0BSZ$ikjKP9vfQs`xjj0Q@DiH$?{ zyxcqBckeJt$X(xTmCG}JS{yF74Gl)NCs*IIGZ8<1+kmr`6AmuBl7D}6jUEtX)TSfq zJIJ(JlAg0@AX-Kq4y2AQF^nW7e67~f-2!F-o$aQ6Nl7EREVS1~CWb!I(#5iMxWk!~( zH3L@U>N@%SF)JiwN&#l@oVKv6g0OW|VOWifOiD=4>l7v2G!n;CG4bicH0df5#!K5d zl;6gdarb(d>m;uT)=+^Tq6YEMzHRp!C1Ytlo&??^vB)F{uSKEpz1twA(Z@2*?qm(w zAyd}<_F6wULRmMwN2R|3>{IUHe@|Ojh%n77L?4fUs1ObB2k}^VB9>@R=D5{;T|$@M zr!Z`dOH1>!Y5u9xXp9^!^7~I^N&Bxbyi9hb@3_RHrqd;5U`S@el7}wc&OIPTvLGm^ z42ry@*!UKyljJj=NFFBpM5uRv9h}UyGMEg`gYv1A4cU(Sv9lr(XKsuIC z<3soHN^s?o1-KHE>Ogq3X=6({KHOw|^+$A3*NMI`^ans$bN#&OJ6;?b>2&WBLGRiC zTf`UGgCksfWbmb!6jj&LfT>S>* zDSsL=7KPo_IjtRLB_t(389IxsFe)}|DjkLR!$6%jhmHE~M19OSvMB;v22wvwqEjxx z=yb-d_yQ}u_E(uw^J$aCa4&D2)Mi$JG00fI-CeYT=_UR2xDU1C7WRFcn9CEy5j9nd zfFvi2M46trm{%lbLaX`3-;|_aG_c%AgW;_^E-x;W{SM4(rXEv#V7$~^2HjNkUXWd-0!a|d5Majx2PNZ`E{Zv(g@{%Bi7dFI#s6Gsehn`;YE;OrS z`*?!uy4bs+c#QA%E^K|=`0rMjjU?{wKflTM*AA;S1-tsBY!p1AZKds&R$(YC<>RG> zGudF~P8W!fihosXbr%L!TN$aa_Fe!^;1khk zN=(j}UWLUA@z747GF5zY`-fgq49F0-CLT6+T>F&npI)qO-J&G>K4M!+2={tbmtJp& zan(s`w}-^?Fr|Gb?u@0?`9e z9VvG1epBF0@j~k|!Ke@j$9@!Y=_CDo`pMHu%_jf{_^KZqIaL&GbJ(WIedwdOA|s^B z{AYmtEML!Wu1J0@%}7D(D*ty7G?)Y*Jc{qi_u85etrv1ZI)p+Yd_F&h;nWA;vP=_A z6T~d1aiGb%_Bs6811ZVLY%i|B=kpRZHEbL7hNFuBAq5T~<1Pt%?hQMGFWM4J=}yQa zCioM;=k;O54sZ-o^=vVwEqQ)U4gZ{8O?B8tHw68AWfN|b!}1^q_O2R80L#INnn=sR z5Kxex!nRU_+K438XS-rR3lqq+-0h-)MVo0i`|Bem2*;*8B1uY1rCm+}!QwEsv3sZIpXlSVb~75G^d8=tsDZ0$z*zr z76&zS+@L!Id`Y}!doaE$;;PZ9c#Wg$!EM#_>3n(r{hV>u84Nu4OrHAbmArfBo&5Zn zXV8w)pond9)0n?x4PKv*c8Lk-y6%1tgdm}7KhDhV$QOCJ%>Uw3vLoM9qf5&AkHnwR z8O_LN+|X`xZkM&kppd$*qw6|`$Aib`$D82C>+`Z!NXovOjHf1q$CE;$$A>}GUGND< zAsj|GeE6h?nzcV*^*`pIyI#oJMLhJg8|d=cb6nc-s51ao9L1=YWGq%k#xJ z-jR$n5B!XZ#_TiI=9tl=;_qam_xp=hJqOQ*Kcah6VCWm*u^>1DExaBrSO~S>z^2#T z2UK5sG^-MkdjcHuIt0Il9S?#Xin|iTMF%G?JXlc(mO|#q(Bn^F6_HZ$0k)P&$W8|t z#)?*9MT#&q-Hq?C%f#H1)Q;4SXX8n1i@ip3({Hw*zGX!#$<11iA#t^X$es?@pu>HQ z-6y1k>ZhRWM%Nuf#G|sK$Hj&EjP&ukOYnT`iVwS@XM=GPgg$8c`i^V;kdd;I%F^{D zcIda?_cSrV&#+^L;y4bPH<>p+_<{%j^e1k*^KN3MMQK?Dju~cnBd)P#cwG8X|t5KuQ5Z;>7>Z)D`N!igj|LP23W|frtgs7$jyw zq&h@Wa$+mjS=DoIlm{Jt|49XtmxXYoB++j$Xh;DY^h9cF?MAL7dqO%AX*)>E!KVun zQxhncI#J7YeUO#|dh;zG+>6EoK#IDSZO(DR)p4c7v6^rB;7Gu7khVhqxB(Qkl4yvOz_kLc*lI=0>J0+8>q(eVhe_?htFPhnu zdy`$y6us{EuLWX=ko6jQwc+Q#W)5;zDc6okb%PBa1q92=`0$s%q_fw{pkN8kXRk44 z;&7I~{0a-kj%C=DSGMBS4kS%-%Z}@qm&Bs-A~LeG@%cRQcr`ATWfKaA=#c9tcI*gN z6tu%BE~b0WE({&ioz7i4;PHBzmC{zz1pWjcnYpA8@n*gC%5ZVGxjk|o9T;RG>}x+E1b)AdjPw*{Py3AJ z%hoaB_~W_stVxU=F^K&9oYomD;wlOWcMV2wul@;V=C6k((!q#m? z)C2@L78z-Y^zPmX+p;h%tC10l5Q0vfI?}`AVMkFhLkIPvthAINg9dQ(P1jLfQ^WkZ zbCHg0y(r=P(QfIfX~ZHSEHg$*vSeGCBsE#YFLgxSmlZY?Q)aI@0rVzpa8692@ent zjA}XfbPyhcs+dE%h+FbVke2OEc$+Ie)HSFOZg!$(YY&<`CdRMXlGS}_*{+FzaIj5_ zs+h#*Gw_)F5(f(2=0b;+my;65z4=N{S*97z7qp{;YlL#I8krML>ma!5^1!q5#GK2$_wF#>@A zwc!ZCU@iWtLb|m{W8i>}6m)2V$J5}MshN1aUedF2=+M18H5C<9Y*|H8a61V}eqy?T zN5+5a!cj0y46lhVT21YaEjR^j(Y1qMVImuUs9pFlKfiqdUq5y-Iifkow7lIF3PAO} zqZy?=v^vJV+H>x_g^V6O3d@W(dfyU6WHbI1KSj$q9{NvnvoHi_f?WYl6l4rHFhT?O z2?Y4l-C&Eg>*H}98i)2p#_3>Lc!Us2o)ntL+WNxh))L#ni)@Lo3!Rh^MT z!>!2+MqusPkbEL!oDV6Zz{uKp-jw5jHW1y~#7%L-NVq{U7vjh(4|wwG^ZOVwurET> zkwWLaPiOG#GcR$?b-%!}EGjCi8wFF`QyVsXZL&WJf!F6HJ2Rc-3+A(S?PkW08_9X6 zjAitQf#l|9x7s-23Q&#RE=<$JviIdm(wIPX1$o&iXqrZKO@Q3oY;;Ya>jt4vh_bRW zyk0NfCd^G8#~~7pl9`#ob=Uop3x0AADXFRGx=vzJA}616GF!Iqplv~Zs|4!WpSDX# zOe8lqo6TFc&?h~YnouUCAxKOR@%Vo3)ZB@;S3@VFx@$_}E(o|^n}`LW7}SQKCJ4D{ z;PW^vShR=>&prLH{1*IJ>Io3GBrndZ<>7B?2$>R1hd!M%2&bp8qRMF`s5A>H(KRp~ zffIFoE2JjCmPi~o_@mh}%Lgf?>vEy(Gq=W0;@K{wl&;%FGY^y!w~UT!X(UY>X-Xov z_gEJa+q97_97k7FK|!Z%`lbh1xv`qf)ee`xS;lSSQ~CL@WD<;nH(|GI$#ZjSd8sy? zDz6XXDUGFoWS>sVc8FSTV@p}re|O4h^*>Q@%M;U8k?bf;img@-q-Z1nFdxNc-J z-h)`513^m3s%M|2>cbBi*0(SIEnA2FDaz>LOj3zSra1o&<4UpM$<`bm&~9%M}bBNIHMq4KRg-y0n~VjZSw zHaYQaDG7cbL;Cl|kpkW0<=xL_@y1(|x!@;fvUsIQAW$2(fY`tDyRH^qarX*$)&bXHR~p8A0I+S_4LWH%&5|B!1J&eaCZX#(mSrJ?AROMA z4aD`kh5AQ(S$Ex?B21kyWZO0wnVI#0I5kbf>+_S9lh3y8MYM0%rq%1;91O#tcdwp& z^7(Xz4jn>nCal{m=-3`~4`_x!*WJ44qH*`GtGR<~1z-VD8$wZcLS9`9rU{+$B-^)d zM@N#Gk*?}_5Gf8g-2F)vpRbGHIFR7e>C-)fO^%mfpm}ZO2xHb{UP;WCNt^CNIsS~FaOLHv(l@Q;-(y*`gxCK4B2(usW#zg;wpZ2?u{07h z+t73Hv7CANbzE~wH!*}=kTpYz2xGnu_`Icqj;r=&7SO!&#j>p-`Dr*YR~cX2{?;|s-#=JC#puk!XM z-?3z45!F@_IURa3a?+Vxa^x*l!eHpaxeGC)N{uwu1F^SFzO`JBgk?E5^;oAjESh{=->kD^KTv0UzmgNQ(?>Hhzx>yT{{rQ+w144o=*kJ{pY@4VIB&*`TL1yPmbRH6$mOAwoboP#iDv zuK3fyw@a3aqFGQ9+?h5m9_pcf>2z<)r*>V_Y=te)!II~|n+<-G{eE(? zGuXUgBZXUw7&mG#=bbp35rcb@mz~in<=+)aU2sQ<5E|80m2BR)fkve)E4ue{F17o2lgANv27>nSCCw<*ePQ>rKqIygX^Y(KeeQ&~|hky!P^ zPfJVIaMecOuC8NhZnRD5PUeB;iw_9!N>}A+X|@Z!f%QT&WfV74haHbG1emT6foxo_ z7Poo)HE6;LGVres?U+bg1czCOy57fX5g^mN8`^0iuOU(1m^J*SrEcKE4AAc*bO z3d=A6!>w<_g4U@Gd!;VF^v?Y%k-S`zK;+v1zd1Gq;|LNUV&&!h@XKG4pOi#rI#KR0z@PjM9cExaV+$>)k7VQl*CLE z%d&{YVu)yjtaiC1r>A3h4&460FbtAYQ%EmpOKoB*wpE5DH5{SUTbN>52;IIjb&smetkYbMLheGvedB7`8vLx1Ca^|Me<<@$mN)Hypph zMXZ}Qm38wz=J_YabJufEbI0h+mW>m!A9&}nm-%9s$L~PNM&^IEk@=s$&6`&}$47rV zf!szXjxgo+6ZzTXCZ5~vO17`9qM~z{WC7r?{pIU8;WsnbJ;ZT{@~x|=3=YIhZg>Hc z!YO~?vKyXbQB}i~#Zk7dn#DV-X7TQ`PjTjbFYwe={i!=t%OAHc=ei}aMSyni8dV6%yPD_ zE@$iN#jMUclUq;kwjVdTbvSDJ9v~~{^6*tx^5)JE!)hZFJ5NRmv8d&Ie+K9 zCoZ9Td}gh88q2tj2J_ch#1_wFcJNBhZE7mzY-QG*t=NE;K8o=J8wd|}5ntSU1;2P~ zJ{5L@3&%=WJNIMO&i#Z}Ub&oS-hPlX+cs9OtPO8*=Zz1syeTWnNDW16=kd|nc}#iz z4X%FfEgm{4j{}nTkAgmTq_NLmmez2)kcbANf*Ic$T0NMJ1%`dSmyRngM7OwANbKrb zQq+kXP7q>Sp!#dDf{iQ%q%d%DE=KSCXEaY*}koa(()?CjTp!Yqxv#pP)~BR(p#7H99uGdB z7sGJo^cIL`giHfUdbhWi5IS{Cp-YaB!lDQ} zs!XnWqmpYzC3D@FBoaLbHX*Yecyex#7dB{AIyOSsZk7rfi=vIh&PFbR?mx1D$l9QF z8}BM*qrX>4$r}*^V`eQkk2*43Fq~+VHNX2EiA6

e&-}-8y$n2Q=y0u~^|4vC>*1 z2^E;VQpxMxjl#R{CVjvFQU?q;+*Q~ox{%nhD4jokh=c{{hz>mp&>xCnd#$V{2g7B5cbQ9vvHjgHKwdBzOo0 zBc!D4Uly+Bl78GDc>1<^obupeLQ5Xu>OYR+>j%e^-lA?9V%~$7aN$FX2ucv1YzCZk zI+F&p!5`knk}sz4>GE>S9n<;CML$LV=v{6f*!;A*BCQ+4#!V!xeQ$bnYD0EP0=iXA z;lfXN?frS|h=y4H+|ArQekRYHp3(H=Qp=$C#7T_p+nEmeX?UVlY+bvYlKulp+w}n9 zA#cDWP8ijLPHl2X)gzP@uI7j6vGlKBpOTb+c@Jm*;sw@3B?v!lhn>P1<9m^(*RWyU z=Y06}I;ta^n0)6?hJ z_uFsLhW5t8;pJ%PdqkJ>uZu6@PqS-qgom6#r*YAlBk7(A6>Dbl?&}}3tTM*N_iyK? zoqO{2?}w5AVDx3;xNQEtt_Y`gHs8#TaqfiO7dK_u45lx&072rY;}}_2G#Oy#pDyH* zN0$?p0)N|)oO{8^4C|PJS-gU2@4U(9YikLvc!{fj*_B=&UPq56XS0H&?#FS?#D28P zNXDbtgex{O@AFUiZeuM@?Q)*^)$I(O@e-F5>|v}kx^c;0?%>2-EPk}~-n7@?XzFuE z5{Ja?LZZb*1L+D4POO0d5p`WIXu`@t?rH&1_uhoaW;YImmIl6DaKbyS9wHlDi-%o> z2Tv|`m%m}>T<8`hE<9M4MKl^E91atSM2JR0bS!;}<>CxFB_}t(o^}jPb0{u$brG?{ zUHRPgfmE;DjOU55=$fccQ7?j9(IWBpV)w1lb>!}w9d91na^KqyIJUc}xTnMsdJGbB zKufZ)?bgpFli>3*pjQ|C2?>N8o$+IiVcfuOShhn+c@?2h2-|k{c^)AIKA)GYj8u+2 zrVmv;I+Br=!lc2S8QiBMxtVFLUiOAK(!q9YbWL}ozYFmCdrq^Yu9aH-MaM*7|=M#sA-z`yk3%%l88hit_b1tVcRwykDE(9DJc;lgu5ywM7;}$ zj86iB!5|swDfoSd*Pu%XL608Y89JyRue|yiH{EzWV|o=Zca6@D?U0ZRiAio6Nv{tK zedmI;?SN%MGz#I6YYh+yLZ6N>uoEnpzkr!DXL0$Z=O5NO{x8?_{`YEm^3@Qw11F70 z;f}Mi+2q)~{6m0LL!vv9T3Zscnrkbtr}lFNX(=Vr0O?3K?t@(?2(-ZMf_S!zmaL-J z?=M{!isrgqAY3Z|+i}ZpxiKp0%W(jWA^?veNHzpXhI^e0hh@-7)A`#AWqdZPmIr19 zsVtWK`NHN~<26NLmBouoqm)~20NtHRcB9JfESL~((j}oaFyX2D@rjQ^axcg4Qqw9S zc)7;nlpa3%Q6`fX(ugKx z;2Y8(!{cc+%L7f<@%a6e4IYoxwFi2B4k`IL_;nMiilKZP1WS?IwjwJbHOWJDRp6kD z50Jpn+aBY0-<`@mGizA)+^yU_@e3Y5Ijh+##IYrha?`&S6O^E3j^UoyU*tCa?Gb(Nw{`Blz|6PjFFl)|W&oC;k38{xfMW-_6=@x$oC6u_h`JMggZj@CMIa)t8-y z5x?VipZtzjo4#*Hga$d&7H}@ zx+P?%f;n^6U`_1TXu=X+FpGsTSNIrl>@n`7*D2#0tVvBkQ<7 z@WIR1GdN{utNiN5%X$3Lv-$1U71YjtoJXc#&SR6B8Y^_#oPIZt{omlmlihna@#-(n zvmJfCV!J*FmqJh{z!gMA2d6>O)9dnni<(s)b z@XQC_^1)Yg*uK4pU~Q0SG>RFEVVb7<8jBH&#fU|tL?aO*;V{A4TFT2S*i^WUuV*i1 z@yZP>U9py_GnSC(KQt3=DJ8aJV>vdCbZ{I8$FZ?23(K;w z4*Md(?`O>D(RAt9hQI&)pA>K1z_>m({X0Q42qin*t+WVAOX6Fx`?;hDinl{qG5CPv z2Em|C4l`$b$FgP1x%9G2S-EBdmLuv{YE?a3kG`FCQaTB|?F6N|#k94xl-EY_W0ERN zQlv?W6T=saB5T9g)gkQa5LRUnt0IV5UW-{4z$^=3mDXUD)DSDKCRS8Utf&gJs0yp7 z3bUvhtGEigv<9;*fQ*(cQ>;yhUPrqoj2#H$1fw_+6B#iv%L3S?HCV+}SVh(G zx>XS?s&eaGT#Z#ygB7o*Sr))7uf?heVpRs~+fUYp@x`K~I5ASB+tz+e%4?%cTU$%1 zZsBbwNa-Z#+xfuF;Ior7PVep|1sh_PYyDx`jkmh29;OYZg^XIrh=~lxw}^?1xPK>R z#rKPEmg8EnSoUt`#j;7k;?%A_auN@Jj{Az)v)TNA|3|z2{Yfh^t2PsF zFRp+5Pn?jcA?-q5z3px$7dQ8Q2KeHM7g!#Zpk;FMpPy(v`2*0i7<<=~+&3W&4M}+M zb38Go49g_2arUTFJV3y{^{@c0W$sViRMh;a<_yV;UfeLt77M$H{#(Tq6+>i=f5Zsu%?8m&{AEd6da zRSpQx0LG7wTYSh$$|_uOTubXhx9p~-IHI+?_x^Jv_r2&`o_=~BAt~?`g=nm=DfUk~7v@Y%=nXwl5n+lh<+?;4Kr3czI5ci->Q zzgoc2*5{58w4F2brnkBC7GQY1yS;D8CiP^9YzHmw!cgaGQJ?8zm!Oi4vx^|nU^7eH zOcr(MDd5R)T`eMOpk{{qzAk8=k&V^$QOutI#qkt0it7*z2Ho^hwYAjN)*`DG(YE9c zHk4-&%l|1II=06!^k&yn(>*wjhGw|I=jw!r@=AA+VMh@Z7ehs*MlhN}IOMK?S{B%L zgU#9b-idEbeRdk|Yk}T&1l}$a@pYYuzw1Q2?T2HTU|Q~V0<}qXafg250gR&O8{GwIvA8;e(M;?94(MmQR6^|CkWzwtifG??(apB?wJbg*s9 z-IvBevP9e>;)F@Z#Zv;h|0kuyV;J~-zWAcy&SP}Lpl7e1_~XJ#!wewdNCc$7vaGlO z;<{}36B4Pat~$WF)^T9%m7Sf*#0ldW(62j>J@y~I`(`>_v%{QnjATG(NYJ3R3d)P2 zG(JJED1&G%WF^Ar-f+?oNY$!&{`nVq`>nU=-mMFrI<}`xyY_6@yanBth$Ro+J^wM* z^P*u1ymEUQy+?Rh>x=Qo!U|rRT}_81opi4x#jr^=Y*Gz}B+Ws$ZDif#wc#W-X5pAN zjv1enS{5=EpM0A3&i$h%B5ER|CQdkp6N+L7BG@(Y$$m}9-Ky&*>UDxg5W6abT@`ZE zbGl2^&9AHGuH;M0u0Kzc*QmRGU0DJVoKO@e9BWkey1G+0$&R^oCf){)S=UZ>vR&j$O+N;>edrs3GtFjG}?wpdjaD{j7-wB%&k*fvBWuDeBi%&rq48iq)9 zOq*DFEmm2OP)!l(fUR=7WoZDB>D*qVXIA+?d*4h-jY+q*umq(}e9t2?T_>jlnSV!bAJ|i5>ea~F-DW7h&0fPZ&pGi-nsr>}` zLx<8q2V{g5%huILg%YXbIb&S1>$bM;OFrLV?Neujjk|3K$!An*}8Eg#}3G@FX=uv?smYKZP=+#ifIFj63} z5(3|YQ%jvWsFN}dtMens#NLepO4h}7h(@FCYdA4R; zs;kGBj8*ML_k-?rGf7C>HM{m0U>NR#Ldu|X=ycKj1eSkA&BC_{Z<>LjnRq0;ShsNty}EZ|Q2$;mT(W|@ z9)5$r-g*&!U+bIjN(l}UM>+^cAca8Gl?$!@!StjPj%U)uW3g=;P1i8u5ygcN)CK}{ z>eR{gp^on#iA2cB%EGiP_d!?Nxq2ImM%`TNK0nzx+1U2ZlqrtmVA>|ZV334_1RTfh z*U8DLR8*FemYUWobv{tq6trQ&_~S@QO6Jo~KIW^hr*rPvXE9{RVEVMv2%Can)a{c( zfZq#AUiUQ|4D$J>)0z75Cv@%Dj%$ALGghu%jXyDwMT-`(Zv6(vjTy~pr=CnWSc~T% zq=Wyl)^l`wFK=F8@Y^qge7PaY<|>Qwf0Gzn6~ zeZ1VDt5WU^zUrnRtsBqWe(p#>*U*H9=HMFJ(NCtHk}A>+9sE z0&H%-jaxn-09_y&1lJMusY&X5-q`sXch}H#fu?I9C7Q0)dwfVK-Jh+xYs6oLXz*Fv z`Pt_YB>6Ow{TfLL8p%G5yi@~2=zO?5Ol{ObXo5*yd^|8IiQGgjew_om#{%e@sdL#t zKeiBjvdN-4#%@+N;msNy`@6qmb?$qn12J1-IiLxNM-zB-fg$2?KZT%lg@e{+ch{T@ zpWyiRI+qXd(>+s17p*ha)OC}V;Fu4J>QpvV@5H^^Cq819mDn!n*2K_~bLh~m zJE^6vg`_lNn6mNpcPsZ^+H<$*)}Hn?*Ro_q6bWdV$1t*ovFCj=j$u?UgHINi*c+Cy ztQPvE0iq2PC-udcxd<~bmv0xvIAe_a{N`+*$+v4QFotmYug}6acRn+M^Y~_7l=CLW z7i3Ci@$E_rFz7sSBHgvR@@8_{wU^TEt7lmoE8@eOCoy{J<^1ZZOE~Yi9;6?}XN9Ay z?@20GtYDiBU^4%YBT4(?zG`SwR$PW9<5_G-XW|)SNuK%%0c!=HeYuTWyLYI!%!tna zoG(iq5MBnIb`l-pzX$f_6|AUnN8Rw}H_<8MrhT@pU0On^1)V%Q+qYrEH~AZ(wV{21 z25Z2ns-)5e*}xdD(f4St-<>13&mGC^JKIGp+T{C|;7#qqrkuyfTXP4Qp$*XcUI;gNAiOdDw+1KoWV%lTp(U<3;W$Vw6}#OX*af#DaRZv|8c?!s6QQ+q z2t2~}>I_!;|3vozJ;=?;X%yJ6xg;f}Qn4eOw4^fh1kimTZ18&G;zRsZi`C%I??ysF z4|0yb7Q3d1;No`)FZ%#%+Z?cJ-5=w4n2bHed~jr3lW{=L>4~S!pho+Jw=6~2wHU_E zCKJLHAfx~z0lcYDW9E_WYe5j2X7JS8(|GIixeOZEhoq!LR@YfO#H}1e{GohT@u6Xm zv&P~B4FZFjtlhAM?p@k5a`+$?E?&VM55La+H(y9{V(ZqUzRhh1N4n*d@c@(3aU(oy zn#RZxLjVV_*I?bc^?W()8wL&RkERLQwQY;f@2elYq?G8IhH1HWXd-@N48vga=8aTW zRpa$~`EuH~l$Dk9``_J3Mn*<`{Y}#%9FCytdR%~V`?t`&Y}-+UK#-J_NLuQF`G)T= zwr#U@YZXh@RIt7{n}9Zy%Azg&_Wz!uAX{hHumSYw(T%ok+r%wDBqg=AY}!=F%2lga zzkVHU3-Y-1g0tz@w+~*g7t=Bc)CL$bcrb-qxAFOxU$LpMkgKlvIgYdu(mv#7ZU3wF z%uUpI`t%fDTI%89Sz!V(hsn!BjOgH}TaJ&-RWUq+Ph_?8ku zT#%6Ns}{HPZYV$)hCoB0New+dP6>_rV`QUqP=C(4aYASsXikd*PNK!*v&1JjJJa1Z z792!_&qCMG4b7dL>H-6S*IOqDNE}Br(qCwFPEn_kAcP>ntC5_bx#B~DAla|cG2P3? za)&vEu32oVU+1<_30ywFkKW8lN^>zZLDzJR%Ln-IdNe-TU{h(zMh{qdeFV1-_u=t? zodp{NMw8sjU~dxi!hoN+yuo2B&bN`~C5_t09_@_=JVw@!hp_Zr?UD(3xr{=ivdpXAKF)vc_QG6z7AmY2)k!jp@XJo1tSL?uqSd)&Uo2aCe@ zf5T61dX*KmyL9fV?W~@^{m`}%fKyIMxjQ-3vh&H?+vG~mB|j%VMmpt`R5+04rgG~% zaU$LSvWOMd4yI3Ai8W>*29nAd(^(Lc7~RG&e$+T3-F0RxFXNl(OED)7!-J&iyJ^ge zNze;8{)E1bCNe3<{*iZ{F!}kfUuJPtEo(k~oLfG7g1g&|;M^;(<(6NZLeG>#{IIYu zj<~+(P*Gk%OFa>yja;P!nJ1jX#MDojTy3&&%BO6-=4v`<05RrI`HXEg2;Wf7J!7XR z;gpwB-qH)lqO{maOC*q#7@uocQ6hFrPq34tvdy!Mh@OQfZvun6bS61DdB1;beLgSYSQm0~SK&zne=3+U z@We%jx>s|{9AYJF@#Xb&BSEJXkT(7XQpa9{T~$bQ{C68O`hBGe6!r*-16+K$63Q)cqcm-86dzZc2L ziL72%h-q4J;X%|}1T@@$`k;*sO;UF^mvz%~nWoA5P21?wu`NRe^=0X@)!g&w+dO#7 zg{^J@Q8($YLulG=s*o8ok%?{@t7-!1hKDPDemR=1VVV}f&`v))%eDc59nT|=5a^o5 zmcl}+tE$m;9ox3C9ft*rmh!KE|A*iI?oMpmb?=D9+;|e9P#D87To(|>!RzyroRoyn z1ltNXA9C@*wjGu%FXFwKB_!sxVPK!0OdOI(f=?$9GTFE_z<0CP^8Poo=)Cx2vNAn5 zj*KtD86+h7=+>nZ#~(YA&Ye1vlCrb>1^M}Go->d3ow_n^%xJ!v_BF-DB|Q1`vs`rH z`DABi;8@W^642+zP)}WhD+c-*oNsXZ7eUrnIn3D-Wm~{tTxUN;wJ{9c!7yA42t6VQ zM;$jCMqT_x*&xM-D)NAkY;IT}Ny9K&Axo0r*AVfH90Eu&;DmO9pY`+7HBB6(6q}Of zx;^Yzw1~O)-b>);|Y?Z1Qc38pY^+Jd-;7ixnFz)_?sq8}1%R4?V=JX>*B4je>FG>F4W? zd0bomwtO>Nrhmg4dl-H75Hr7+O-O>4eH zp)Ip`zT zfEEuIdW$l2@13Kw&mB$h) zxx$0pAV^9>mmE(P$RN@V;^g0k)#h%bmQ){5vZ3Hmw>jY*bf|rVih#`m{dPL`>Q6yI zK3?yEOwJSI@w0~l=F?Nb7vCIfaH~Bmy*lN4-uP@D1N!wM zIXQ{78@9xS2hEM^P;d1h>SG{?c-sj9DJjW>LT=WFx)l+25g_VT`c2bhW8ro>cWA@F z{ykZ=bQQmQ>|OqR!}<7qt(VkBIy=P&A>2sxP6LY#hj>PexO;?cyMZXHs;hSjSq<9P zvMh|azz~f@@cDc=nnp!Md3`nuAq3KK<4w$(GmpZ;t)!=?5((mKU z8jfRA69}Mb8lg~Y1hi^Cv6#iPZ!M?D-;VPynMkUk5skUxm>B`T?xkOsB!=|LW5+oo zc;%h>q#Bz!^Mo#LAWgrI#DoNrl9JH%-Ir2QQ&SlFQ9v`z6>5bh-J%`V>=FS zz5O0%oPH|p+qT8A!UuJ5|FP5)+%7vf-{6h&Quy-6dqbAJ|N;LH|rGEDS-CUn9vUNKVj6@(J3c8AwOxgXIyzmV>5ioY~#O@5UyOlX#HK0~%mx zf-Y$qm-O;rYc?NjbO<{dj+6*}_om(bccSrUAq2wEKzIKqL6e-+LGZKQI^EI_p2b6R zi3S3EaLqMj_!Ah^t{r0AHrEtK*WEP9wp}ke)LA`K!@;2RMRy?o36yP1iV%anxP{+9_79D55MP^lS|q5_8<7muygk~x6`t6$kL$DmQ)uP69)FwQnIFq;-GsbEi;>} zeSQ~A7QXrt^Fk7|bjJUA3h!LogT^7Y|DV14j&Gwp{5bx(J8j7;cH->46EX;Ug@iB) z1xlfG0HshmfVM!P`$utDEo~_+yA;YQEvwM7_uh~M0)gzk9q*}i_xnTgjO}>Iww&+# zg|H*tot{U}+17paj3WuWdlL_T(Yap9oqLIL8JKHARUEr;8#@xy0<xLmSSU)fAsMj(n@DnD|b4l{Padn`AGpwpS+VlUiUYiSXoYV+pApl?VmF8 z&3|xueqXv2>`UjKN%nx`eB~#xj|9=%Ebig=T6zA5D96oAf*9de5FaeMQ3c zJslc<-x-!=kw0_jX-_qP+3_?MMXuUTN%F6=NVJZ^6_1YhCITeZLb2lr47XrBu90avd9~lD=>Xy3S`Pl0`{L&((O&O2R?_>44 z%|xS7EXzzzGUZB4B$>Q<+%DWM7jCx;m&+nMJCg}xhBIvFApCwWE|*pRcw(F6&F#kR zc4Ju}to44cfc-e3EG?s5H;GL_TT4kv36+&q6crUGCI+*thOXy^S^u?hz|&Kr(HMWZ z?^75*hhq+&$j%)mD>uTrt*~Kx^0p1uZGlxA45}+UeCw2hDH(JKuP>@)#E9Vx&d(<^ zGqc4D@;ZYC4P?%o*{oQ;jF~g0Gj8lyVzDT}P>^Szd!AKm*W&hta8mS+es8H~a{~h= ze_jya_wxc|_)KbThbNZTu%*mk;vgT{A&cygi>#1ER>(!bXC^LmtmK8LX(om^mkj2D7KDLw@Jh8lnTH7JRXY%`b0sg!o&?CV^okRd(aMnbN)5ci@Jz%;M z6GnEJ<6u}8rrW~rF*tdw#doH69zG0%y9mF{`qYA;*0hdx`$Yng=gpp zDzk0#^|F!qCrj;()*2rm0%`^dr+9mJtQ5$bNsu#V-Cs7A5 z88B^XlMAjEIaRD&v6(nvxDVmXGnxkvy~H(veWrL4^PiL~;@viu*l>zIO*SsE0Wxeu)tlIFZFX{o;0NK75`RciI>khjQ$}iIwJj`yb7b15BLg$2|SQ7HU3x zJ~3B-na43FAIcurqjL>l#wkDN{?}jTZ>J2$0_F@ICYc-PLz-S^91YSwY>Y-Gi-^2Ip7%1I6kk*?^tW(BqsP1_e2-J z!Q16M+NYiHyFvA=}G+C3=PC}=x=eI`Z$KS=@`@a)HP1}gu4$~%& zW7Vcz{Pvc|sSS7R4O6QcO;&2#@m2$fy5SFXD`F&eb?V<>)!j_f#N+W03J zSp))pVzC%CH8n&c5yIgxcJg?kK!E)GL1boTFlx*w+-^^k-XjimpU~EY4?Pz=IF7@k z&#WXoW)4~2Y*wr@SicQ6?0`)>yuD{w6JPZ2E24lBnh1oBfFM2e4k}%mh&1V4dIHh| zNDW9Y(u;_K(u;JY1qdCHPJjeL@1X^x-Tcn~ob&3wx-ag2GI^d%X7)^GCTs1z*5045 z^>xF$X^#p!Oacj`Z}&fDytYmGtQB6oNS)SzY(P@rTNGcV&>6ko3(AW7WQ;kA3EB)b{FR=n^PR42>j}>+uq|U*X@`@Yd zrU%8VdnJV}0A#}h7<`wPhZ|uF?jaEU@c==B2sm(Skvysh}k9V zI45Ik_!NS=c=%m_R-k(WhZ@<5u@?q98 zS+-p1ZyApc4Zk>QbJgM=RMS-)xy|>>uYiD#6V~})`%cfEb1aSL`gHNcK?rnmM;IH< z21vUUAo~RSCdXMhIV6(Z2TLP+vCc9^PY#&cA@j;+QpyVcX)wCqgG#z?90|Qi)E%Q? z>-wwr$nCnix|Ev?&h9oYwSICB;6^1IviVxMuiTYxkE~~uF$ois-UxuRWX#L0CWXJd zYUh)Le=IN8^5o7R-Tw2ESMi$JHRwLkEd%()Y02x>hr6^4ZT^(YJ=-v2{P2fqoX`7( zoi5(M4D3@yTO|#_9g*OHNfVf`^CSthJwb@ErNoqt@@x5#@!bpbSmdATV!9h|$wb7H zmJ^hK#HZzv9x1ygee|(b`<7NKPM{MuU;s!T!x~Kg*Hs1LC0nPL{l>|lR=DP9^MGkpR;R4=YZTZ`LCX2OV!^03>`K0c>H_y3E z+Pi3Q(tqHwH_ZOtq1M4%Ag95g&R$I%>0?;uW<=JE?PZb)Aav+`(?H9$kE$*DG-6t2 z((zOG(5MiAE#M zU2qALUdYPr)e5ePo#qncFf{hEoe6!yT&;mQYueAwS}e~t?)eF?m~ zl!!Ct%Te6^<8$DJ77IGV>wBp$Iy4+=bzXD~bS9B@AbSVDN{M5Y&ExEntT|zhz~UH0 zo*&t^W#X5CO*4w%#vHYbsoJ9XK9IIf!OD8O5OHP}>c&1#A5sT^)tG?Klv6EtDM*jH z7L#Xah%+7>ACU>>5h`Zdh$y74@(TRq4=l2@90^-$K}L-!V7kKY(wcz@>U5-^Qt?g} zD3J}lqE867M#cB% zE`I+iTjwD)O9TIWJS{VjkMm zQ}}Hz8$`&Ol_cc4?1i{Ri1%nrwO8D2}Ltol12ll`C3 z<;zn09^3jmU_ay8wB3bgd5q^%ML98yFV^#=s>YA>FY@eRA&I2o~-&s1O)(5xzc|$l4QUYsn1^{*`pQSKGzV>x7btcz(PWFQ@Zw$%x*a zK!@zW5@#+LNG^-1M!Ow94BBnF+`d%TIbR`?T%wpd?2iWE-u_F<`H9c_cmK z>k(+?EXqLMcpl_SaxVDhT39}ifbhr zkPGBB^EOda3$X_ExwsWYo5Q9#Euiq-4k2OX!qyDPD^NevjoaX-gHFY7L?w5h5fr5? zcHglb34wHXbkEk@BO1A9sZ#_8CJ8@su(&6hEg(KIUjnV~mAaLuF&cOTp3(lG8f4Dg zc^Dc9x~KXa0-*We&u3<7gXturkkl-2aK&yJhRHm=V5 zQZR~wxxZbMtIh#VTiL!tAAFIAA^9geW6Lev7!|(wzF8CggK@@fQc^{X_9;Vs$_wMT zvGkxWo{}KK36S*R$LsU)#9-r&4ySB<>&t8)^Z?IPiKW-d7x`RX&M)CQ``q7O+TXwV zYGs4&_`7XlSC>Y+Ps~Q5YLRigp`EZ+YKplm4+m8efF*5)L$gW#blUK4_(t_nUHq&M zQ+V;-<(d;cQ$VNKwChE5V#c2OJ>CIIZaw!{zq9ui0WP8?P^Xuz`4b>5F5PTpu}AiZ zh7l&{pHS%GGJ9!R8NGrlu=nYA=)WzIV(mPwrk1a!3K~oL@{AN-KUob^qfT8DU4^c0 zNLVj75a-u0s!z_Y`6dQKCofnW$gwrWQZ)=wNc$DfxwljEld8L`oGD^vNm)b<)`wf8 zKzJShI^eXnj`!7L4(a6YO5U?v3|^q&o-u#^{nwIMyK86Z0Tj=kya)9qXGrjlQ7c=` zikA8FyG`aq1XUV;GK}Nd`QD2>+jJP82IdUe*iDixs`H2Ici2QZeH@zr725sDnGIRE zLs8%wdptM#4!|a4g&n$<&y^YYEawsM?PA{UE=L&1rkuc*Aea2}kXtuJ;`<|KTmU|u z>o7B4Zx%52yWFxiQ!+m|`?*vc3Uqi0JvSGG7E-no5FYkn{LBOaRdkI=?P0Mjv=rb| zF9c!R)NO56Uk_~1UVuc)lIwk$h0Ho-NgW8TG*8`H`;QCWU}tT$6pqx3$MP3n{@f0D z`=NbNwyt2!lbT5^^REVs>gv41I6Z#(*e2o{I5pM8A71W9lOPl=%oc6(F0#axV<(;8 z!F<@J`>+c@FA-t&EMjP{?+0loGwkz@uijhZZsVfNDaoa%`c>^(c?tSCf|oBO3FCO> zRBcp}yL9LskyS@pr`G)bj?|Y>Fe%|#mfGk!;R7|dSEX)H7Nf=jS-j_6$5vo-eNQj0 zE;VQB#CFBzO0e$vCW`Aqiosc_H&+j)dJn)OF|fmq``qBkxF_djjYn8M-Q$&w-X12< z%S+F>(+$y)sr)l9(aW+D5H_LXNaa_Uq@ZFb9CiMXn^ofTvWWse;jTKBhfsyny9QN+ zo6|s-&u@u986hRFiw?#70Y=9~#CE?#{>=qs*#FSD9ooZrWcIoI_V9jwfUUq<1EGVU znmIPGUHXz&qAzke=ajz~G6M5lx-{6nES4v`Dd8 zV+65|i+_rj*Y1pz<9@-#X5_<$%6Ei7KZlT5QOCe{{F?8Vgf1|WC^V9yocqo1+}1JK zFvlB_jQcPhdpmbZqwS_6t9j6~433;(-|@dLze{)}Z}F<-p6rPA8yGRE zo6pUG&H6z=o_->5>`vV$H$KJH?~-*FYqGxB`r-D#(NlYQ#XPX6MHS4tsJL#?TQu2i z2K4Jp4YWoy>m#1_d(x9noSfapU~=-UzRBnFx7EePQD8TL{Q0p=V=u;?(qg?7h;(Y7 zE|+iTM;(E}IXyyRa@##b3CR9$M~x=)@T=&``x)cEaN!2ck65xvKf73?KS*tp_h&T( zHdU7h_9hVa7IY7x5VIEwf|4=P^9;W&bl8XCsigZ*5`gGz%O0T5e#5&o0fJobs0w)r zsem_^up$Br%5A1vv}0(P0E8Jsr4&pIRi5L1I$7P$VlE>s^<$e*?k#nbxMaZ@UPMLU7oYkiEh$fXyD=DfrGRxf=|iXFGeQlC0v1M z%0wid>tizxWJxU1n^^+-6A~xqWWcO5nLa}k{N(b_dj_@I#=UlXu;$w7FCd43i*ZcC zaw?9e`_9hnP(7A!V5GxfgNN=tZPl-30X8=e!j1WH9QYG ze6ALj(ekI-y`4mTSX7TSE^H=`ggI>B@QKiSVRrZk-wwO0U?Qd_g<5}FfXtOEeNC)F zH8gUDb5*>b)MWKyQLdHurG^{RkVsaRBY3LNqIlnbUC60qT5xsGFP8u>X#yRohaPnw z-*d|eO@OmmB{`*RcqVW7q~hOgO(Nu2+DuGATD)imGx^R0()`o~%Xu7May*1n8QuKi^mG=~O-coLPUZHgbUM{s8#}_7a-ulY5@9 z2fZ42$K`(UaYex85K;EvzQ9!>+mix~1ii?a=%>MIch7@>U3UE9#V*K0v1T3}yfOb)nS%rpQP$vf6roXE%-SQR6|J%c%`q{L}^Nf?~YrpkS8*+6~KQ*%FDlT>hcT4Zz>E zt1?PPu(n*h#~!L%&MkS&_{2oOKGFl*pWW#9?R$zTq139Q%E&@K{5gx5|Gw<7q#)VHT^YHVl`%h&L2HN=Zl=I^C6SnkR+ZAk?7#rx0&ovLI&Sh0RkFSP{3z9x z7!JuU|FF#2kvO9*L7zyS5CL(OrGL%YSJ!?v&zS4GBz;N{iNF*W7ZY`$3VR5`zZ~BG2~9E$Z^eIn*d07_DsC9nR(la0MqkGUwXqhJ7e= zbkrIVZ$8jesJv$D<0!+VsWRQvu@6&m$j1%U85=!<+tW7*Fb;`q2a0fp{OI>&jBpG8 zLHOMo#%-uN>yeoQX{O!LEQ(>lg8&ELg=a^HGy7I+5yO49;fgXc8{t{%4m$w~oT`?< z)8$Z$xhat&ojlKWoN4dvcqlTlqd>%Nrrz#Y^B=8}>n2 zWBQ|0dr`Xu*Y!Lw!v=Ii*!RtE<@_)JCID3rRejLC$c<#gmPnRFHoK(G!ymbXdxe!hP#< z)0Nqz9~kHe`2=sj;Jj_ZK`i_>s102gR8%HAsfXD^CjE*>WHO670zSwZ9&7?)lTl51 z8L)+s(?*4E(pbZq3b9W#{nnci^h}a}lKj)Sb&U9KQHJSMhqz$%iVB6aI| z+g)A-*oZECu(i-xbLBLe+oPsuB;f^r_W2*KZGxjN;KM%k*!svBVV`Z2yoanzh{7Lj z*=m-TX@5&W)T(6mYf%-@ngEXUIWfQYo7aE9$?LcPpr~AdpWNtQnjLyYxeTR}M2GHZ zx!SPcuaKNm(^%s+XTF1w_R!a$&YKCu@m8sJn&I0=i8J>p;sp;=;DX!k7lg#1XF@Qt z9N#2Gwp42J{NWRb(qWT~L`=3D@@oB)i~nXaVn?w;f+3OuNR;m6Irm+$@8H-bn@cl8 z5Tke-)H2YM=-*7}x7l^sKQN#X%+A8i5TBft9O{&H%4Z(|r{d-1ea6enE+k2jIHMm@ zxZ~6M^G7K=6S-v8L6;bybWk%>D%dq+bj}UgC<+<;!24=ma^k#J$meipLp#0wpF|X) zg(M!nqd>ms3m_*?*aHR4+Z`Pf*94q0NVv|Yn7x}qk8dbqLRkG`dRJzPbADcGFE6pK z3tK%`ZOw13+1_?mxOz`jILxHg2ph?11bGiW?n_{X7mws%FWOe^?3Lu=TEq8HZ{!yR z=`7{%=~#dwTRID|x_XO3Zwb#)Lzai0ASTep9DLn<#oeWTkway%F>esbWtO^{Ps zEkjOewYGhQw!v(ngfnIqWc@LRxj{vx=gF$ZJ;~2{FbzVpUD=-AmcJxx^%9@!W1avF ztV^D>by^oJ6DTavz-_aj5W~wTL+xfoElzh)zOmktoU)&$4ikGKDWTyS{f?438Y?Fny zJ}0;jn)v*ceQ$4C2bLe6A+On4-s9E>>bCw--W{z>Ew7=f^L^cjLQjwJ`>$Vd+4Jw5 zImVU8ey^e|#-_!0&ks1X%FGgwZYw^f_lBkDRe==ab`qJnOmiE(4e-0CPK z7vA<9{rK^dt?G=PXi{znQC~s@6v@acQcF!v9xu`)m#7;0gLSNNm~znq-ZC_3ugLGK zR)6ITkQF0mDOZ*BqITu|*o}JX5OorN$M;Bo?>*I|dsz0%AMuZ^hH*QfB^!=+44wF`hY0!C6Cb;&= zM^Jpvk$4e|{4TF!;dRTB|MI4e_(}PlWP8 zddqh~w?|LZIUi~8-_j>iB_gWbiYgr?R8)G)Eq-T*lgp4ye5l^*feiBTKB9)SFa|#s z7_vckhMw+xA)HNsSAJdTiSQdqcPb~FNs(o14b(;Z0gMDxqy4 z!o)vh=fh&=`y)pjSdwYH-h7^ep zW-eely0=d+;+D=eMUJAnKBijDq&_bi@Q%~+QHc_5K6~er-UX{H5Ox^UIyyea1_Ua; zpg0#j+_(*8Hr2H`tloQW<1J*%2&Bp;BKLms66EhiXb7AIERV$8|!A|ZAjz_+&wr9Xq~PfKY$3!Nv2+SalO{pAKD&4(knVhQ6tD5 zuh*H4(+ybVCcSlWmvV8y6ygSWNtpyF*jSKQizRCp`{x&93&Ks<;k|p%%r-^)Z0BV` zx!rA0A3g;pG00aM(~IdLYYkp$#(TE@!f>z&mu82treP-{?3!EH0ZguRB$8t`+?e5% z&E&nhe031x_@h#T|C=jN)pOW?@td!p&A3!T9#BCzU{ZbbYI^H5Q8qyd1hhg0;oL-| z3++v28??Q($)%If%U*@1q|jaPKq z%Gt#sSBC9gxfQ5C*y~KY{7xvmv&{^zoE?wASTD?umz#DFBqFsr*Y5}~F#A=mpTn|711I+gB)@X{qZ_18~FUtp>ecx;^I<|t<5H=j)!0zEKAMb5PD z@G`%5KINHUM);5j*hE6G@qv%cXhi6XaMb?BiY_bS9QO)V8x5x}<6sh57>O{8_5NPd zqfq?9e)h>DuV`zr@_=!RVuA#XIAuRHgg&B8^_q#ow$^;NRM^kP`yP@YV`wFZnoKZf zZDlag=G052X~DtfLfgZ}bKncY$~7U!&EOEdHL}gi5dNDqp_R$VFq)(Nu%sSSupG7RXTPeZu*i-@zM`Ha~nzR@v5?)x{S#L*J zC7sh(^RotbP;hh19fPFXhT}4-xkQx{0O6~ z3)^bOX8t16Uy8D%9RPUsu%PY4+0GY5b8o$f^J@=MWZNhwJ{v>A@;E)y`nr^e;=^F7 zjeRrX-by+vVt|Psfo)+epWPl=20fQBo}*;F8~Icz+J3R?%Wl`=o5r}0Feh7>74We8 z(z4n?^NFqMJ1qv;uv1f?Nf^0cMkH#(9&8~YE^ck;vCwyJ-OMEA&541nr7$~7lR!+l zY@^m~7|^wVrE4f5E5G?jR~IpKU|?EqNEd`j zYhqOvbwQr5s_8C~51pvxHycJz3W5P z6o#I?2h+BlOrk8b+A!q&Nev*H^XQP~fA=dB+>|~%?AmVJes`!AC?D{6XS#x$TQ+uZ z@Rg?s^0utXUmoI!T~9G@yBLr?J=KaUEM;V%(H#o#$@oYbERR@4cM0Ac1m!S`lOMRp z3yZ*4Itwi%jFcfJL<)%-4JaFzw10PmSEKmOydaxaIYWxkb!F4bBO`i!cBZ?EvqODH zB!WX-o^zARxvPKkK5Y61v)|F$WjxA>k!ES*G6@b(&SFtmV6ioK0>D4Yb#xgJ8}kF* zrxg>Ecg2&LtD}KvZ$`UI?I_14{}7klcp3-@aCA*%#QI*uBa`d1R=qmsV5NC$_K5S6 z-~4TZgYo-7mBjQ>fuiq-xZ=dn(7j*3N&~+dMcU%_KONxHz}EmKiMVcf`|!VB4`I&Q z*f;=%yJ!OBj}dr9B5IE_eW=28j{8EgtQ{AHh|dxw;Yc%%BzjafE0rFD@my|Nlq!yG zT4sObtN)hDz9{4lJ~^Ru6TN(XH`GVBlaHN!tLkz_WLuS`F{iMo_|4|E=(50!u-uV^ z>U$|2J7%59-j>qw@(-hSn-M|uG9R8PcpPa5v6f97QV~~sV@6*8<2Z_<<*GEgdj8#Cri~^!P8e_e?*u+zT|=hx;RpSsv3;6fSs?azu@6PS0?QqV2ag zyMR$$v8cOoXO zfFA|HBebNl{!z}b+$;to6~`i;vM1|W98xH)fO-l7<5 zT4F#19|ed$CplMZl0aQcBxhMz+9#Ky18EoN9cS=VVyry&4UIQQAUyn-ta4$@?rpRJ z4)+69!yHA%ssiz+saai6tnvH0@zO@{OAoI)1j)tX`v}Ro@t`&p*So_d)Udbe%o{^F zn}8f3btf#_??&F6P$xn-*l3vGomgwRvMvX*Pq=gA%!WHd-KSYgYogm+vZi{-4*jJo z0zM_*I_)4zgg(6hN_}|tbUL~1dYM~G2-W|m24jWxTd46{IY?auS=%k%D~q`yBZ{)7 zqFAFGJ6-|e2j7O-hdQlf(k56_dGR~LAr1S_tnt$y!%MWdJ^HPn!67T*1lMuV(YxYi zLDxp@LwgQ?{?zOqnT-sdHj0!V3fuTd6ih?2vUjQR9oM^ro}Q01pdNG$;!#~m$zF4{ zTY~J!;LbnENYM4b9d^%j{inf;FYlE_m;WQ#nUPnjx5SwtW(4o9=g_zt_M&pDgV=d?P=jv!Y8Q-3>k6lWN0 z^>wWLXT)ADP}=W~1d+ty_|XXejg7#i4)aj0xGBj$IsAx6){yq}+_*c(8*z^FnNI-B znUU7n=4DmZAlGvcJ{eP0hyHPKIus<)ehBLUzUx3)DzYlUZDS;z#iztwm)c^eI8>(( zI_!_Rh*sC1X8duLa?sokc#WsueR@bbFg(hYf3-T?`GmI8dNta5@o&SPLX7`zKkbGuvb=Ux@7wLq$&R-`J^g~X;VT?Oi3*;K6)CY1Hytlrey6Cp?eQd)P z735gAiqV~|vF8YfB|6|UGI$PZv>MhiO6JUL?}ww(SlTM%)`xP(hjD-|H(1Wqo>vYs zL&<U#fpl?i)&$`_GonPHvb8?@DJ!lhg*?eQblA@r3nuLZ(?(^- z9=oJvhdwJwkBV3@Zv9p1id8cpSc*OvG}YtbB!7@^B@5BwR_#2{ID>*yhTLUNHCrSW zGMI9gIbnEYXGSO1?U|`K;r-KXjZw>;=>$##V4t2^T3!w%WeTA7VDySZL z$~Whhxgo0KMhiF=ecb7ZSUL)F3zagxe`NSBf#@xno|aTcT)hyK6C@q7f4cn!=e5px zvwue_ol(~I4q&usUpvMY$VoJ7qRavDahvPIyS2XRm^h$pLT;!(-j14IU-GlFJ0YI& z%_r01L8FMmwzgb+ROGd}zLJHzU^ooWci?iH;vMutZ(P;c_bRN0+5*m8FI?Ic{ggHC zFUa*P1gIzZ>+Ykj@Vo_y^s9H0bYJy3aW!zFT?vFzpfW~+(HBokn$K&)^%9AM zy&%sL4;|bl@esPW%guuMAx)hrBUA9CnVx5NaPCm`*9|pW(blypTz2xG26wFHs~!M zt*$s8-P<^G_%FqCch%Ed+4<1nUgp_%>Si=pYOi{EQNfJ&QB3XbvArwGM42s3<6A>E zoLstEzeDuwt!@S~o=+O4CdQzR3@H9QJvX63xFj%T@p+D;GyN|CT`_LC}sZY z83FjpikOX7>={49aQfe&0oGeDk(Y!4-`p)4V*wqBzPxpS8o+%ipUfcHfQm$DmjbY zd~5B!^Yv!@-@z4uq7Q%HC^M2?-1DA7RpM|Qpteo~vLft5L%VWJT+Mj;5d=v6o5CKZ zAgM$|VTbONTM(4Y%A!w8f2<@oSY4>p^1c1Dfx#*{lX6pW%i?8bao4x5;OR62>VYhs zxlxg?^#*t-s=EaXO^unMtHjSZ2Trb+09cHQ?fG$(2;}B&LXJInG#L&ySwvZ++Y5t) z65mS!eB!P*hWic<7L>~ZOE;>m;lSJi%iBO6^PQ|$97@$zKf3-ohxdN3_G%5UA#F&P zZX#yTVz#UP@;UZKm>v{@|N2$<)2HMG74eu|kZ~Jt0^e9Cvq+QGtII*i&ckr^n6A<9 z=sGeB{`l9Mjnb~sU6$k`nV*sW+J!ts&!LQy`* z7hudD?5>KRz4y4Rz;#HS-)UQng*v~jV_8$lAFp-OKMbHsM7}jLqKd_b3~V`DF8Y3! zmUgp}hy}3Z{7td(6bVa9OGDS1HJQ;0X5_r^Z_acmis=Z@c!pkI;izpIG1Yl?TUH+6 z_Fy0U;KJV@z>=#A0&+6Mzj^ysS=Er-z>hw-^X+prd-Qgv-pqm%h^PwVrgrx-?}*kKe2lAR4Io6tcL6Bn?2~e5!Lgl2J2>LV?c; z_2vMBgY1+Km&wi{&5KUA?b^!9Eo$?T6xWf~K)^6#V(Mj}&*!TMw}R`OVYJt(T@_m< zjYj#Bkz|QdQS&Y+REm@Pb6?DJ`=1#rwLEX9S#;me&d`g?br=af4-2NChHb3iTIOnt zl7dvpI2F17tf31N5ca~nio`=Ab0&se->3+IDb#kab*d-I-hIfPfci_8d>1-(MRGlg zp(IzsgFTB&Te3?_c}q)6=M3Hs56@jr`DRp=4WtX0&f(cj_z7OGLFynN8xZh_pz|H+5O=n%WXI50q(ye0ZT?W^mYMc?=_cQVz9RWtqxbT?y>6ih6+3IF=x z;x{KdyF7>6I&|}Z3GsyKXK@=;eCqoF+Tno$?%my`Jij*U;Z<%=p9Ri0JKtsGGQm(L zVOz_qEaK#3aoQ>*s^sR65zIEta5{zRkn*Ux^|$MIleV>!xO;|%dvV7`I7|nx1a8c) z%1_IIzvA(DMyYKC$A2?m(uWr9>Pe9a`WA*!Lu=g_ET_A>q+layA`%}6>ss@)HZd$P zsVx7-JIHOyXJYuML~NhSy#+$eG4n}8bgcK+)tx6H|H!iDP4Tu)Mf~tj4lS~Dd6&bj z&$01#Y`oI>9H}XoAB3=85+*kq@j(5J%;>N=+9oBkl<7eeliIA_pRZ@UQ>IC_A2ed8 zey2UyFL$9`vPFf{o_!pM^i6;Bj-Cr0Y8`%j=$FFBFja+~#NCp|Myns_o&QmjPEZnK zDUvaZVXo?`u^o6?X=W^_7_3X#IE6##^tmzeMbh;);1#PCnZsab&Q3rTOOK7|*@o~v zDPxD*VEvZHNquE}`BoSA*BnWs5dA+zeefhcc%J=#%3lv2C?(bZD#>YY&e{I=YM$?h zFaFyVU*5c04*%bKeoN~n{-15BGW&lKmNa`{_6kh*+%~VP;uqx?#YZP z%jTYE>;8TpH&ce@d~eGx_?M2()-z-MgD%LJ)(O}T@F5>~Q*iv^ zQQ!3A;OiT1Gf@Otg#UgU6;p`O6P@cW)uU7+BkT-^-19iyhwIg-?(aPe`4t*-HU*DC zjvbNw@#DO1$t z*s^X0|BDMAc}z#?A0Ep!is>WUo7KZ#=Wk=6z=I$=dBvjj0=NoYZdc4P&e1G^(Kc}{ z;c(wxUwZeS_^;D_KJcIVw&QJ~tSuL*ullEg9ny9z9i|NX%^7pzsgbdYD~*>;DfWH| ztXgvU9Uk8EvjbyoIUVR;QZ;LW8k!l%=85IeizBz1+Y_~C!JA^xJ3S^r@Z{a&iN+sp znCSI>Qa{MW$S{r({T93GtLk-rTHy18H`8>%!b7eAQeB zt{@Opp{VM4yLvsYj{B7wpb-5n%#GRu*9TdV_xrso<=tWR?;-5yZ|y(DFo$QUuVq6- z>9b7iCa;<6hMw{0AF{CrrQWW#8g~TAB9TpQO_z_u2KGh@H;!ie8X8gmwhA(L_V==5 z)EjrUaLJ)t(1R}B^Q-TsO{;2S7${k{JB%1tD?5#+^nga z(n)e8)VeoB#5e9#9)@RD=6mM5v-X%fL{@Y(ZzdC5cKWkv%41Xg#+=Bk&nZb8`chyE zFADRZ!#su6KbULupoDyeGb>E!zD`b$*(7-)+UrRC4O6cotulqLuO2vy`T~mT=8xUy zZBEXGn^JT7W-9I6E)}`{vrlt7%?_45^^WnS6fB0<7?)w`t5&nXTT~gfOsFTzIJCwr&%qlw)9b{7(I+QFi#CcQ;mAk4Z+pGp8( zxXjNbE3*m~E0pBsX)(u%8B`kWYrp#99JORiZR{LUO?; z#!;UpYatH(pXuGR7j`y-gQGVQ9*6EGNH4agVwM4_aHQ`8C%ci2Y~+~ZSLB`6XvYw5 zVO%?eSh0OYe2^KmKz+W93PuP^%hM=IrL~3f@{W(rT`09aSbE$b^`+o;R`F8x7Wu;O&aDDB7dNyzHh(QS1D8K({GajK0W7#1!MRWI^x=lj^1wN!V~khk ze7@dYRSC7*eV<^duJ};x0nJ;4?*80I?OcP#+e|)GWlvY9_*vk>84NS9S9sNBfm38L zO}pwG63_Dp@enMO*bz>m)>ZtJM1ZC=x#Bu75?}g~EqQY+ChK-5HJ3WLQ&O!yC}uC; zo1sKUr#<>iXDP@vEItW-ZfUrxrqnMAK&P z(Au{965jXndgt#Ro_pU2e34Q2C{fm+?WXFs_U9bu=g&e*gNp6ZTwJw)5lo|G{?Pff zg&Iyd7`%6X(w+i)nt0f08aVVJ74MF}rN(uYKqg|Vu=6heEkd4fqn0x@IL|*J(hwZi z8oAYZ)@dvPF9AwTPv;7K8@o%7T5oXNUmqTi`KHi*`Xu>+TT9* z=X>aC%=`|bfi(0^C@#`E8*-9c2#D?8AIOUQZ_ECsF#j)p>Hjc&ZCTLqG+ zOnM4J{1q87L>NdOc`G%VA=5aKV1w*>zA>p#R^ie~D8~v!OL>k4pp1-{X zb@SkmfhaN>1yv_~q1o@$kH`^M4AR4S7 zh|3Ei@lv(7dW!Oz@AWuBdRTTL1q65YU!R=lN+07c4^OIa+!Kc)>wF#T_0t_$wlb*$QGdLjszVLQ8AADcVK ziI?>JVRFnG?DJ20p{AA=QyA!XY~+<$V2ziFu0y9N)_r_;a~vOXbkm~<8Ja7q0f3zq zR$55i(G$}S_o{*uU4lQ5TKaFj`%I@l5`eizgwl-mpuHXav+ z6Rn8w*Y+`YV=?4T4d1mdYKhM;vJ_Vm63a{#zv%awX0Q$ktAuP67=?Vs5)i^ABeITl z9UP9eIZXXebvGbqG-I{i9|G*e|NY#GwEcP8W7NFx-Fz3f3 zj>3QM=Dt(7nYe$2#ej3h%rDko215}HHsn_1>t&J)!&w{w>yO0S_JswdT_4ry{*Gg0 z4a(D%q}8K|p-wc^Z@VB}@?Y^V3tsQi%D#5HIfGo}EAD!}^1F?im~}i##!pw0m%Ci% zZOE8;RELS+^QrY2(7f8wRa}n<;;KVSj@3#}=f61)AP;$e{(qX=^hRb7o~Usz zzQ_OlW1`h>_1-j9C?SRLx!nr{^Z^*m)CmED)vq=!SAJUOW?S?bG#{oY~L)He(u(z*d%@JN++T@D~XDwcnADMqfY>qIeRBlMhXrTV``w9_BLqcnz)n_5(p7>5u zsoKhOC!spWXFTMh<}l$eJ%Pf}FnZgK)zITcDz9@fB3J*OS@MkF_zhzfkv+=elmamb zxi5Rwt|)oVBuz$fGDS*$v7Gs}3P(fWvC=1dV>u<@Wb)^;qG!7ijOG)Ir?qT{b7ID^ z)^_ifss(peC5x&?yE;_Gi|WOHd?|ix@R~0#%y@;wd#}GTp2Ph)kpr9f|QEhmbnA3_>EghG6%uwd@Uuqf1h~buL;I$uIgnQ zz@2g%5W`RdcW{IuNNF=bT1mYf^6Qw2g(OxEb#}+4_e+6 z5JlTNNyZP%8~Jn$GoK%OvcT)Fx*WK3E+*5?FMG%AItA8duMgh9{S0kmVO<|{s1%rR zYQVL(_(;jcvefokqOaGgz18Js7mmdC+E$&*+A+aWYx#n^@Eh8qV$1NDZ!~>Dv$GAQ z;dzfcQx8@IJ+?<)_Ew3%P~0vZN2)@OttcpisUX$eimyUb)tCfo?kcxHkuN<53qUBP z>x>P7tyQSH+jknKRi?H%R3Wvw(UrZIAm>?>8{*%Xi%|?eWMM!I{_mQ6)cF)#)bgn5G#~XQPMB}c?;E%F< zS0~bf|JqO#_n^f0C$y1dv_ave3ro{-zX9=h@#iwy$zaY3hz zt~7P~b{&FMyWoOkN~a8^ni)ksf9OSSdozmf2`@C?8-@-njB*pOI~A)5LqS?ANj4z( z#7BGHDi;ag@rN2GN9%&6{CVM_Ab8ACwjSJRq5c`bE>HNAfYn4BB;G4R<%aRCyUJo@ z8T9m$14W$4CnH zXd8(h@!k_p`AX<2SbM)|B-EDI@@ruJusM9387!{XVzB>FN2ed2g`~-%M#dzHaA_g3C35AKF;FBxf-i{km}!K-L1 zKmjZAN0p$QX+0pc4Z{c=4m{P}$9H=ytc3Nm)wegI4u1Abk59-i$%5s$CuK?pnBEeZS0!+Qjl_DR8s2(uvtt{l&L;H}I$y&0>6^a&f9)Ph`> zX5UAEBc=nUmYuXGb9gjYmvEZO`^>Z2U5#}-khq519ACX){s9sJLFwmEkkzhV1z1nZ zU(T?jDbDhdX>yWD@AXY&^&NAwXc5lB`H&ixTSszmXl+Ay1IS7z*%kE3_bH*#g!;F_ zRN)GtD&a{uG-KCq#|qxt0F!Npf`(A(EgKo*{Y8!-_whFw`*5hxV& z?<)F^0x=HHVXn&ay06pprng?qO;4G3Fbrn)psoxo)N#k}X0V@uivm^27P5Wr@eegd zAi_X!P0ztb5R(U2z`n$fIi%LL&E;ucLHDvAdGF%ils8{gt%S7kw(f^PLEfH&Fp!KV zZ&WJn+gnCDJ;mV9FKgSy25LbfhSO+nG{R&Xp@m?|<;~--dljpprr`62pGB9h+L6RO z_CKn2OAlDFU=`Z`XwV&B5_O|INH9Mwc0uDe<;GH<^Ir+75;k(%A~IaAp`Z)*YLA z$|aL~H{_WtwF-ref*SbiUuw0Oo~N4Hj+ogz-4YBl&}`#K6(WG|!=RIDo(LIG(tM}0 z?ZFi;4L@7{DKc}ls#GWvvLf#WMSk-AQN2H=@CbR=`84}8Q9gW19MBc|U66jPgB4r} z;13XX6yW&EGkw;qt9ZaelphGRec6NCy97^}Du==@6@+g6nC?2T#ufDr3X@bj)pyaa zfr7piInMZQDF{(PzQk3AA?Sq)I}DpXyeXrNZy$7Gk_{6$bm_LVkNn!e31#e_KG*D- zUVn2WQ@j;aw$ku|X8iiyYXMYCxf_brBR^gHXM+Zo`fjEMsTKFl7)hYG?^Dfvqm9`d zWSUiEYF|+X!68SHXQ1QO)S$bMt#LEj$jZ)DAL^ehaf@7YJvb^ZcU^&8idq$?SOehc z`cb4m$VxZaN#hfJx$#^h>%pKq(qn6|5+tH$G*Ao@F__}B#$q(K_iFpP?XN{0+%tZs zN2K(rqc(M$_=G$@P#S$nbL9N<5a<6NDYm-Y2 zl!4&Dmz#K-xVgt2c`0oc*98XGrmCKxo&Jjn^HnF@?7R3>Z)YBi|44H7ebBuX_7ww%3b{EhOH+34g(^Do&A)4|ngEZVDLM&DowE z=D#pxK{oYT0ueqBjJx9p03X-E5RaLVG?h z@Nc=^fdI9q+E0iwQ`k9W#AM$5;o|5>lXQRGocp~fG zg>2cQq!Oqk{`x#!IA6D+u_dV&rIzCgcdBZj{>d~x%zgK9d(V72BZPw(VrV)zE59MQ z#`?m;YB3R?+t>n&%RuLY09gc7D<%=U%#T~nQ4>Kh(Y1De-##uyoI<-hw{BG(0- zEG(jq=Di;$MVKn@dGK2>kolV>DJ_HA4W?4bZO$Cy)Yyh$TKa0B>H*}z9J|A42flNt zNBr6PUC8buutyhM+oZ0WmS~h8+(-j5sZJB`tvFaoy|%gHeieAyz-V>ux-ShOoNFr} z%|$1=huS3yZnRwYMv_*9E7+PyYa->_mei{eD>4rh99eryc?617_Z%z)DFzkmN3}@U zbj|vwx^-F=)@65g7zgq+D^*MBZ1=bVfFD3G=Acz+DpuC<1`QdKjog$sEuNK~K{0?Q z;`}AhGY-vPP|SxSUl* zS}i)kcjJ!bls0?o0`T>zkI!d(Wr^}UVgBc!zw|j9PEBZi6VG|L6+({#QxTk>z_|qBadA#3H0wcImzwK;X6LP(ED-5+pqDoh9ux^-!`cf_KWO$dc zouhkx?Z&m4zC6K=lee)NFz`r)HN1CNU~-(FW^Q-Hb-?%YHX_HSjQP zb<_}LkTOmOWC7VsR)gg*R6{w2$9`Lh5$47=@7}3%+C{@<(l(!>0MYzjN&9e58ORch8cD))XhnGEj7CHbmUOTr68~m6wce$k}zUsf2zgK4qwb#&37%%hTWVXBcwgg z_^cK5ZL_-ipuxIGaIz)-@-@yl$#p@Z>AFV&xSaMRd}*7lFKoPEMEWj{lJ~nzo8CW% zDP)LO?pyGCvNL7$Q=apQaS3Zp?}%`D9OEADYOxo%BnOvE65LB!EKm17VEk3J^6zFQAnxESzGTln?<#@{Z&Prk zOjke9ovs&1ArR#IE@4AdHvL{RR^0B@Qc{e^2CjyO`V@e2ufPJP>r-1`C^*b@KE`g* zqt<`6hF=MW`VzFOn2KtCwNATzutZ^AJ9xA?Xr^%L#j$(SB1;4ARNsIYgm!Mz+@9Tl z;RTzyXVfp3DFXma!alEY?A*UY@b^P zXpup#6;N78kwo!U?GIn#eH7VR@r|cRLG#O7R|4`Bo!)YIA~GJxsjG_UTi_xU(`9{e zFrFo;mXe#Jz?OV3P3YxpO9yx|(J+^{myV>2v)g%#vf9WMI?FfxUqZVtt(GiI0`*cz z2=Jo3PW~G%X*JBfOvS@?sN${zB)zShSU8gLaT1;AQR?+1x@!qK*AgC(gxz@DY8&f) zYw=7X#(ry~r9g52VJJrs&NPabcc`IfZnb%0%cVe7BxlUEzx61)&()aw?BG(40?t;zZZ#QuipFX&n+SOE4I#$N|BR@>t_V8iM ziM?fp&bQSpH4I;ep@qD6Jr&*;ubbR5)a;sugq0tw>;t&3Xb7-QsMUgE@r5^8RWA(M zOl2~Ot|7QKA8+c1-~F{`m`oK?SMC$!ZP*jQ(JK{gh-RF`fqDC#5>EDs)=#P#?wP^< z#;@9IEuP-`yDq*-1VWUE`nv@J=&q0@H?VEZsyFxx(@hZ}u82Oxq3=?E z^;o;4al744q{-mO!A}=XP zfPhos!|f< z>_*W(KJUsresz{<029Zu8`+jw`;n5a$;N`M%B+)s8N%78b@A{tnYv(fI?z7gzU(>; z$~Bwgezk~7z54bMrOX_Cfb?Bhy`0`IvM;Ri*RW?+Fydve*lD)|)XTGwGxtq^!<&|~ z3T2D+jliYlK(~E!v7>RTQLU~_VQ28UWz@KC> zJ8{_3E4rU0lQMT7+sV?g4%EpUt`g8w6!c zTd%GvZClN4=+%Ig!<_qU@$k%5Rl+06=A0J34aR;3NoD8-#&M`9d{U5+}R#E%|v6DcvjnL^4{{E zH2sWCZcaYT=uN~~+M!C&BKc4VSDD_u*iwPPRZQjEy*!9hGy@j&lb&zwDMKspD(j<6 zBg~@r_yPJ~K1p4~RP8_U0jN3hb&tE)*L^8pwxTMBVt0=5Jg~P{K_PUV%}gQAZXIEf zUB0_&;>-pl*VHNZru__~4fC6sv`Z4K(55 ziw~sa(|vug?gmv=x8t%;1^BBwI5-q6B3@dd zYAW0$X(Yu4ebMDzI`fP-N9Yo!=af7qxgNXZ*ff(K9BF)kL7%2~4+ObDXI^Sdi%j{o7wtzc5p zj?2Jj%Rx*O5)MQJwYLxt3JFTzfW)s8bYjUkd-|`&xCokIkB(>VP%_L74KN8FYkv70} z>>-Qpszcy7)|2b5wwB~f5&ZFK2x(vq_f$r)?hk>Xp}+sVC=_N`vWq3>(xIYhWLMT5yKa?SJ=g#qGiw5KNbGe7|W!c$7oIY+DllE zQ59l@6`r?Ai>1ZyiDPJrYEBKTf5$}W%z_{@{ADwT3$~!)ODd-bbxMIrU75)`?I2{ z%zp||Ji_CDg^kz%;ZKn>&?awL;H?Y_+y%1Q7a>*1kBri8B`)j+d{ z-Yb9N_h+=o(`BSfvNhe%9;$q`L%`@bkqL+hEJVM=>cLsO6$cB`njEmRYfgevaukRcr42y z{}Ijaz1fINN}+H|zlgn&Z_z?y_Pc`pvC*5#n-TTWdii>97Jxqjo&t@raya#2(PDtv zMP}>X#3yC=)B#z>gpVCQz?-1ifiGo$K?oS#GlV@bNQG$C8vGG5rHoZh4q&bUZHwkB zIM_zofkD2ROxb&g4%5h#6qm9brUG@W`yMz)6LE95l8oo*g%FnakAaw*83rHZ4phg_ z`1unb>l~EO%jfaC@SWsn`S4DXG4DJN!wP6j1kHO~Qc^6VF?O74#TKu!aiL^4bdgUp zDR;i(xS^KbAcWwoG8VM*j;9p^7=>Ho&~-C10IU*Bo-f1f5w7W~tlPAAuE5ZeI}bHk zUu-WPQxxF#N#5@!sy=-~-oI~T-bTDp!aC(ZGIG{lw$no&zx;V?6ow~wI`a;!#rDu` z_Dap+K)m~rv)-}O{v4L;(BUa%BZnD^dV>m@bYt=X;lZY|+X92!x>MizR$-R5H(S1= z|9|rGD_aX2h8xl^BI^Cs(OU3tb=&sR5nCCnu&bp+ z5VOeRg!wrouog?p zox3aVJDVo$QAuHjWIhBsCkDbQ_;=Dp{H5P$V_g}B%NTY6J?Q8e3q;)Mzgl1k&-@IC zz)}E*6uV*yw3a8R_a0;E89@J^Y3P0h2=dqgs%XnfZoY(N|0=+MwP!VKUsZl%X{1Iw zR@#*pf@!a?p0EbpOZkt<-4oUm)nAG+VAop@{C?*u-WBYBTxcAmyeoY_{U?^5aVbDN z6|&!aFd}jw2Nv)C0OuxlB~j?#l5pVQ-E+gv?fC&sN{kNNA--I`*tFh_F{%ybcpfI~6mN}IW0 zVG?@bywn8u_|mcNKQqzaK$ZkRZ0}R*gnAmB78}-ulqA1(n;t%85%SUs129}ZEQymN z+4k5hdQ@dL{NQbi$3C9sL^V#kZ3zgs-S!<-ZGR(j-ZOB65^8BBf(y3+N!0f9gQy#K zk%_dmS!Ejy2D7Dy=M9iiP@js1h8*w-7!8e>&JDJBfsHr76ECtf8S1LSyB|=6UiW^l;L?bL5N7wEu=4eB*5O zj@_U)WV+A;Sjh=0NTeDe;tbP{?FrLw9XUIBT0X*1s`$IwZ_U0HRr{;3tSplD48rQYs@?9FvNknQ%0%Xb1RHt>G#>9}RYiN|sq_Z84?K{jl8orQ|{FxWUE&^#ls??Wd@H^@%DdEJL`w-9&%uA6|DV(2)s;rlAQ!Sh~Yuj9Ue#6IvSl+f&_nmG-=v5imk zq2q;hF)=a8ZW}QLja#{l@R7;IUa`->!MLxXsrl|Zv5cyYj^X)64e0j%MuX=p)mslE zGfKGDcKz|4vSlu1q$VUbRAYZaaiiE;bNKU7jX-1L;oiQt7p_36Jvd|JUEcyVbdB|z z_i^ClkJ`O&e#v_QIeH$Dwgu41J=h6K^V@HcV)Idu@;daCVxMz9s8PHUUP?Im@<~si z^2T1Bh6cgzn<=Oa6;I>jzLvT=xAXdmW^dRb$8cdKDr2oti)Z5plat7>)_Qehh`_ob zEV3O>aAhSY$r}ES5V|*k(i>}VP52SzXtg>4t^rv&S|K0jef@fWr&&Yc=vS1wdbEJ+ z+CgnG@D>k%ma{hG*pADBBBZuZ7^WW320x!qpH{PxN+4JujAm*itlj^0XO;iBh>gd_ z)ed&tR}FrUda{r!RyVa!89L@RNE`y`*%$@czsj#9|c5(QMDaBFr27- zY^~eyft=PYNa7S}KRmMI6mpzXLxZP#4-4*&uIze*la}7~UPQ1TEmN|KTF@Pb-iVy3 z+%z-e-bzF0yR;A;%BB;TK}GnSo_8Miv9BjL%Qs5_Tq8kP2|6 zwBImHiV=|L-5S}Tn=;+S#bwfM*)uK1o%WL1?wH;;E`Xb(ZWeD18a4LW%XeMJEf@Sf zL|=7A6kykMCbRX^06w-K?L20>@6@wT2-^Lm0=|Lqe{y7gL<1iUG+O-g?!-p#XsRI@ zpnv`+&fS|$cN05hhiU;r|7TCI5Fc-PI&Pm4^zQl|m zr`QfXrx))8T!S3zRZ06xZX%r8e#<4>R*<&@ZcY$vY^pkSaJ^UKI@8Pn=6{iT% zc!SAnXkRRBUMIkY@rY0WIZkrE`>9R1Ng)`)A$o47Vc`8K`?ZMcR@Up;yZDJB5r`Gv zK_+H(Y}imokMDl9Qz7beZB)d$%sOZAhZ8?rERx}xc833>$7&?Xd!&EkOj2=K0bUAG z_K!HDdR?Q#E7FmKF?v6kOTU#oq7c`fk*ZO>Km$mizE+bU`8=Ea>btyVb)iBqLOp;a zN;^@!3`n2)RK8vy3IN17h{UlGGYIwzMnIbbWZ4g^{PgU_Go|x<|H8Qd8Z>ycnef)v zfu(o++nWcU2V85JYqq-dJ@9#o6SZDVz|*T3mLcSTVM#*KasIZK=$9W3X&&6#Cg!_T z8ymSj#RjqS%=z^MG;ZM*7?0tX>%x2QyPFc~FbbC5=C`R7*`$hmOO2pNH^d@_ zGA7JDtq%&qA5nIHIPAVVt~-Jogeb?IyEdB--PF+l$JL&{6vR@4PMREQ$DKzuHM6d+ z122ZvH}7If05ceg3zLu}5pa2bBQKre+?|q1$L7mBq=4!mlV`~$_N5;~XJ$Ks!d}^E zqNa!#P5Q(W%i9wLBR~`08*ach(wngme?trnvd%m}V(jRW@S$;P`;wM~P^$7&QqR!A z^9GR<7Ra=>x6Sz+-=gxn^6h=1@^doyC*!Mxub99}<;5x0#mIJ`!1u4RA5cclI&oa? zwjK9C*3G$8?fliSSgzx=E0N%MxMz9{_3#*(_jN^_&c!Fql%NV>)obKn)KKJ0I-P1N zuf;28b_?u`ht$L8bA*c?8)~0Z#DG@RkzV%!59qlC9fi_E7_~i-gcrxKg9Lq(Yv0Lx zc$gEh=ee0V)fQp?tYX;4`1zaI23J8610I@V&C59wREV8b9RLyJO9Zhvwvxq_y_?xZ zHZ<0UIXmfL?X3#P%!MmXzdQ?~xTwq!BFDvn?{|lQL<9v*|GGM9r>&LKMG{JEd#UBw zu=rs0{HJA8$fH)c`a`JrYN9^&_ay4!0V#%v{Rtc6A+PD+BlrYy69tAv(MJO!)X zh&3L6Q=@X;nLF8Rc**zC8@yC=hy$`f#mrDVrX@k6HxHkq3ykhOo`?ImpC$J*@jYcu3YE=*Qm;3Lr+FF6dSh>0yV_CFHQ z4HV}JU}3p~AAn^Md+#KTu^65YC<~c#eNn;YGHy~tABh+P49}&SON~hKPgrx@^hUpn z#CXRVO0hd}bQM$jxx)zX+YIe){3=Zf)G%uHYYB0ZsNK)RiI(=3zNoD_hTLBgJLUyy zt7@pdZ~v&Ra+Hq#0&1)7eafsD3h3x|-7=SajT3>_=bKSfP*L8E?`-ENFEn8YHK~wr zzcl#O+on8*x<@Rg@2k5FNjZHV$3U1v#mGlD9V|Pkf%KjDiSY-_f0?2Ai5^ZjR!dY1IK za?qt&!**)W5dGe_a-c9fIJ0yI5w!BuAG4A11v5v7;ZvY_D0A}fh=)!jum?KNBhJl% zV1*NjhfZ*${vHPuhC2*pKV0Mh3_!wxJtAMG7`|1LzL#mm?4r{%W0|_KB7-qw*x1SAPj@B+$M)?&tin(byeKKV_^@hhQu^4KqU| z1@QKh&6R@5-W|0>aK_}OrG25+`{fv4>UQ#&jrr?D%bn-N(QMRQd2qa|<0Pca3fNe* zH&7BZWr~$Uynp`>Mgn+6m7nn|9Y(oFumQg&%HLq;%ci#x z!=MLE$z_Xuf@Icp!9Ajh4^b25&Oq935_vo}KgVyNp%UJ|tc~@A?&#VvuTF!?i_qy` zbEKr*E5r8m)I&I=)YNXOD$zZ$GJRHCJUJn!N3AAGOnV5Tu-qw6Sa2R}CFP47tkNiz zwQ;4;Eq`iKb3<;wUg6m@oR}CzaGdD7$F{dO66f9{e{O&KT57Ivpo&-?PVNk0_3()hU$e-HY`Is4`2 z7#mPp_@{8OCWT3L-Akd1PCqZUp9T#0mVEgl?DuS~Yb_Y2l-4MX_G#>y#MD!D&ibvDggvgCpz7mibnyhNmwdJ2w!v6z*5AWxsxZb(HihF(}(JmdVIunD-j(mMd3XR=Kn_lhgD=o~VhB35%D&MLeIc#m- zclDB^`1Jwb+#7MfsM?DLb&Y7|_*^i>d9T{{7`E%wgRK-WXrKu-*7{Xq?1{6w3SJ0= z`@hYLvRaXr&mS#m2;Nn+$+8}ODSx*~;Iq7!P$<=4`p!a*&bxt|q6daI9?l5PM82Ln z9#QjHNCKA%Ut4^io9zBJRO!3n&@We7J^^fKUJEQb%!KQZJhvTHx!PVd6G%y3%35>G zPX_kx9!#?4X3y^K&?2Y8MglK|5sv}nY-&Sqa+97Z(aux3%!QC--5;#Cr**UY&wQ~Z zBH3}(K7gqr&k><`FZ4QTf`h*)P@26bu^3~52ktt#X377t({Vt(-97|SkkfSDNVw*- zWpRRhPzb!6mPH$@{zbrZ`C{GoHRtF1uh_!okZPCqt#$=i{r4=4V+v}CCD_(U*OXc2a zhxZGSKCQBW#klC=ft;8!Va~+PRm0O&hvx|kqMljNL__2mdgU61V`$7ADf}T_|ESTv`s@Hdxu@%(v#!w= zp_ZkqSx&3=9$WI@u-$dKoSFKFSLn2t_u#bd(w3wA;W`hI-TB*A>^2^qJ3Tr{;HE%L zMpf^wj>Vg;9xTX(y*qzfXc@6Sl#i{_2+IQ*0h~gJghgBMgFb!RY?9&c4@kr9?Fpd9 z*QqzT|NFZY3s>&L@efOz5C>BDId24hP9!|EI)Juu*m~~;eaC+ym&66y|I^V;c_EJc zYQkRwn+kd!b@yS>jpx6W>kSmpBqN^7$Ff^Gg*{`Ao@AQd_uLz^P&`9A0dFba+6uE_ zU7c+WN}M#gjc;yiw0(e6Q0#90>K(X|N(|^ka;OdF>RGJ#cI__bUj0dcbz>b8i7W)$ z?ZkU;y^tw7RV3+pR6+%Oa7=fMp)>?U@eh}I-JOsS8`_1XS~vlHwjq%xM_u^*ObF3{ z_>4yK`Z;e%fP6*v{1ooyBz)jj5OjU@mF*d(1{xCDAAGS{I4?*&y&FjXGn zy-2pC(2)pkY4-vnSW93_=o4T1Z)_m{&+6E~10<3nodyf7U&{PDXkmVjE#Z|>5+q?P6k_7+JUrpb-S-_%~fZ z4O?1L^4j0<{r?PF$m~@OM>d@b=jGP@Gbf$XeI+&{5yw>D(EPcTDtAGfUFOlH&r857 zfxhJBJ#v-zFfuUcEIung?kl~T(omd#3_EVngw)AEYV&uUgd4%dBc(UZHh!Vvs_yz49cN8=y$?_w=%BGoz$iK~ z0`DK}`3HmlYjhfh@z^oi!ey4WPD>g0^5TT7$Gu&-#?O|*8}c(QB;PRw=vn5{UGdPG zm3$B9;J|*kSp*@lvxwo}fDW+DrWh@DBphWZz_X%dFFPfgQRRvdRoWacs5`U`ZS*4F zV$7Ei6zsnTh4lwLKy{c@A0AiVwWVy&nvloH+Y}7cb`wkT-;S#MYL1MYLPwPE7ZF7} zNM0FA$LG&xBNQKK&O&sS|EU=A*pSIcBJ&SEzU_I!F4okN-gwF?k!W4E+PPGH)H)7* zjom#VMpyuu^A8-t#P&i7ijVoGIE@Xa#7W30&IoL1-vO{#r0&_w$@Xx|Si3=M!&E8@m?3exK>jlqzo{=5u<=BYtvvez5vz*A=UKV|pxrD=mq4 z>X$oNye}{lHa*`g0eUzAe->$zVSIt>fdY9Zyqbx&qcAS){@uw=BQB4Su($5D(gTB} zDi~U1O@JZp)pmz795QwX=P>8#52~afs`f_rIJ}Sd+%`TBWbV_oUiqAs_WCjoaVc!X z|61u)Cuzx&5BKQ|+%`1aiql8bpZ-{|nU>)Ez>vAbN&b7^by~K}%*niKZN0p_9LL)F z@bx{Zk#W%!daTiGadRnKJh5Sqwflt_8z5?VFEZsGE5DJ&Yr89=L+Q-)YN>HwJbSA> zXXi0?yDNe++=pmZmrx|G+2s1ZoW(sb7;mxMhaU1(meOfWx=m0t#U6WWTM``F?^#ss z{C3WPyYFuZ-uCJ?J-@cI-c`K?oAZ1lL=8^r71o<|u&b9{8#L|+Cmu^Q?|wf6;q>WH z1ht1!v-sbg?;?NF;Bi@49Dpwq?4L4jG`e2R5=}|b&k(1bubFqBPWd=~jhJ>MZeu*V zjFR#YA+YZ_J2%bf=BSpZSDf-aU)92?-9y3SM1^sGXU`!5>#=OK-yYr{VG6>@|JC&N zo;E&xh+OiZV8OLqCF8r=g?9=xZ{ER{MTl)lD*t#*V%W$zT3_l$!4;m-*^BPYsGZSc$1|i*iwQ(4AB3I6J(c!xH$o6#B43sdW9> z^RQ*Z`Qt&q7n1&C7Qkyyu$E3^$5p$s0K1uE*-hAu4pV=hKY7QjCf=Xg;U_)xAe)>K zUJQLUHoz6xQ09xBCkoZS_m?MLeEZk*3|h;(7^wmuZTt6)WkYi{F1CSW{^K_zU;QvR zucJrq-&ejr9LmFjhJo1y1itLlZBo*Nz}(v=nwtJqO8J*A>7>g^OR5viZ@j#wc9*6z z&Zee#pavVPq5%%ret01D+(r1NYcnowUA7|@$Bg{6!lu!5bpN6lQF&92k(v3@JBE{Z zmb0E-?vYFqQgwj`-Z&GHUP^;E@bZ=rig8Q~PQnQZT3koxJ8p?{-S~?=oi5JVDRX8P z+#hm&Ec83Z`Z~^b+8GqDe$QC=VL5J2xY(1i%Fy1WnKCaMRDCuH-r1 zh3~=qiuLwo+$v@@4a*yVUe#@~9`e{s;hRSvQtSU#HF7u}g-c`cTr zbzzB=IJ682n=D z44k2+Jf`cfPsQ~0c zKDgg0_~pxu&=(6)kqvRx$(x0D^|VR9$+QG2?+!*O8NviiM$20_Z9i^znV7h^ng3

q#)_IgLCz<+`Dd5a-wo-88?hC9X0@1b>@XW2hOEwUAL7HoVMR0@l=}!$zJ} zM)oP}eucHwBFD09?d?0k-*2$u+PTjqiXK_3HF(Z5875I?lVc0}(l)Q$kWck{Ujm;B zxYLwRT6{9};i#Q|tJ%OvHFs3xfSl%qFO7YIs~}B=3XWmo0Yjh0hz&D_VmtalgVd_z zxJL4+Yp)2>y0dn7mIXs1P|74!ujv^@gU_=$(`1$q?1a2Lcp7s(T6uFD2!)`o+zW>x zm-*YbDmmf6^5$UgxGI|tjT2IaZ;r_3cb7Mv?W^=|1K3~_e6acsRxz9b-=N=1Lnc#k zBffuaj3nW?Hz7ZNkys!>Ni2(8%2ko*m)yX{eauK zMYLc-DIOLgwS=jSbM$B-}*vEWeD z*}LOMTG6c>srds(1{=yOgs`E^CQ>vI%UqGK3;*NQ{v9^31OCLL3+Z9v(Lb;JFU}FT zO(fgU2IjsUJ%Aoz^@RBU7A3L4923(zAFnvPME^g(2?BDpY2jO53m6}`{Aj1E`te@VFpb$?e6Brvy=2y=CuB%pXiHxkE!wb}ar z9;Jw`>*o!MyNr0ujQCy|nt<_0l*Y)7IavhDa3q|!Wc2us+cjbiX-Ivwer z-P{@InzbuSfvGQ+U&=adTD+8WV{7r(bk~ujN90W4+^k~NM$AmX`OL0qi}j60rciky z(W#GG!p$GaItLZ0wA#21r{z%U!(4}Uvx5|I4rgU&xj7d04AER>ZgM{$c0!%QK?!aY z1BqO}wzISib?1I$MfO$1#@SHZKBFF@IWtUgqj7)yAxkf+AfKU2UZF4c2Y*eO((OgL z#A&;;TzutiXV>%mMy6vq_KfpNOGYIVp0q0El?C=W+&uuYr6gi%5=JZR}Oc9x-;w1;xnB%Gp2&|fgb}c$7>F!mmR8|Zi`tQbq>z!O~ z7%e?k8`!-S*NKcB+f`?ba#Yc0g>$g&O8uxn&DQ*-~7S%L11?EQ(4TNkyG z`L@N4jSU35X~KU2L?fIRQxuWJaT?5aL}a`Vw9MxBb6qI(;_dW~YD9nyPkCFpI?wrX z^~Gr__{-vVQ@8r&gU;#2O5m4&1+~7@=?(sg%IPWS-RbGL0qm9jUeUTo#p0z5 z(tbW?5T&G|BIe_BWg^x~X#fC{H^W05<3tYe0Z3U!EziIqDOsE&d@>cu0D9nrY&5?U z5T)d%qpt*5Hb@6fR}>4Ax<9J$N#NV!djeyv_DPCmD#&a4FhVsxkLPMqH(XLd+mb1WD)54Cn_iX~u`b2>HJ4>>@^K(zqd4^|Bt(?TW z-*s0UpKVG4MmAqVLb`Jl^O^f$3wIUlV~+hYO`v2zrkOzSyy|>6;1{RJEVE+_m6ohD zhFrh&tW?F?+=Gcz&JA?sFhM2XqAFP3cIntEw>Sn^Xv5dttPIvxaCB^ z!Y_LwQZKsb;Ni5`Bd4QpaF!h1qXB2B?dn=ZnOdCAXU!BD`qz#a8|nj1`p_Q*kEXA? zDpG;(K9AKC*Dv15RV22p4ZcDYs4%F2stisFNyx0$*PXNH<+>MFXd;9AatK4kj2d`UP!!dRc3A zN@$~yDy@N^np7q9YM?Vuh^>hik`CDvWPDfP zq^d45tYC4=tjWUaBUEj%Op;N5|JpX9A z13so^R!f0UO9C)~kB>jEX~!qNMss3Pz7zkOq6{2*ZKJx{`mDW${l8!rMuX-?^E|3a8cv z(}?-+u8tTT!oMq=_?h=44#4`NZ1kMo4sAu?x7N7b5~J6ar;*MjViwL7f9piATn|sXi@kW90SX z@0!5G`zcz{cppDJ!^2vYZ$9{58wO$X&;H=52+E;2djB~j(hn9VK<`h4(iR{3vsKnX zU_C@6!BDp{Q$3RM3m^-7O#HYm<}KWn9tM7{bf5X_rQb~yLbjO)VwJ`eA*{&Cmti(l7rDw)-#l|2yvdzbxkecklubvH>_-HNKNj zeregsNK4t`ne=>irrSZA!{(+j9X*%rRx=K8a)KS`t3C1X%+Z1!r5u(kLVzT5n@iSGPT++}VvJ6MV_4#M6yhmw(dy%CHAN8%M6|ozy zv#ghQ=eR@M2}%+;kF)Cf92%M}+y5HEX_wav5mHQl>SqUXCAc>+gP;qNHLor}_|6Pi zPfCBojq0-4EA?!*aiZPMF*~o#HQsfANJ}S^!;EKU9F|?k*F)6>-i*dSySYS6O&!wM z*q9UKWzRi#UBvdi*wVHdO!DEjtFQ#!NcCQdKI%qww|2RmP>){amO}(eda9WVvg;7> zj3H{1 zPDdf(#FB6og(&^Q8#iw(*f~_>1bNz*fPSB5|H`$K!L6y6NVOMsd8Xeq6ei;Z1h<*I zpy%YY#{$ZmlG}o~7UUjJmc}1{ov5Rob82@JmU5NFw=(CLFSv@I>ObMFWl3EnP%u&b zJ~$)<BF{e_En{yoF9C<{qeZ0^zRHz$OBi3l_DNVR{TPxd0+ULboT!Nkw9+0?@NAPdXR9n zG`KF=gO@Vu^T9m#B;!ygu+rdVjqUn;Wy7Yx~WOMh~y@xEPMF`-gAdeU`3mn{jK+s+25U zfil(V(V%?~9)IOiM$g{HIaOYg8Q}~0?&}pCi-_RZvTyiyL1dDV4$iC{&#?DjqtC-{ z@y}^T4$`^-<@O!7r|1?hT zN9bxsfAS5h;w%I{*4M4~pt-i+++a*a2J)=v1JfD$+7QOBJ40N`%@lr?!#mD$c*h!M zkN=Jjsyxk~ONP=ezo&C8d75t#0s{<)Ho+)J@nSwnM%vjsbvU2>dKQOIW13`iFO_au zH=f+CEAw*QoR{+^a#b{krVZwa{$H{7q65>rKFymH&9E!O=)HgN!ox4}%l0rFS9bNw zoY=95ac?bS!G>|HocuVI(zrG@BR28<)4h57pZ!E7IaH#9|6|U`Ei7KRgMU_y;@;vZ za}v$YA9``u%dD1G2)5;IOo5W-yR6OCjZg@ea2(2&gH!6P4xle_^EdV zJk`ZuUac9jAAE3};h&eDK82C{uPiG@9^VM}@Y;n$dZ+74djx&Nyf{645Y1q}d` zXPWtNdyhXj?KsP`*fC zPTFyj$piZ^a!(vau}*wG{yiG2^<%2C8GNu4ylOQ^@iUc?@~2agVZZltRx^H zAm<6WQU3h-s9B>XMT-{kOa|f)^3&^|vntFPcqo1M=gb)0o9e!)Pi_sUOzX$qqP6E% znaE8dm--+`xY5bA<>lqPdGZNvT5JdT`S~}Qdcuxrr zFPuntFW>O5%02g#WbL#yggCsot$7m)rMXWT&JdGk3C$Sn`~G6W`e-Q7l~-SBLedz5X?I`(hJQZ%ujJpx-jRKH?(Y*gtmS!R z)GrLI>G}5lW?rka5@a*@;2`8;XhJh6E9EMd&&m56MC@C@_&=sHYw2bVoV>s#qX5N9 zl%_(By0p6Yah~l}kD>`1IQGE5d^2Vy>$dIXz|m7&h>XT&_)(-(WokF?Oz#2xc;NQJ zIjeZ0kFx0ZvHUS}DeL#0B;>L|K&dL!Y1Wa4`}gO;dPTGU{!ZK(*8TGbe@%ALWS z3A5o>xHu)sR_69bZFso<({!wqqLoO@30C~|JAckx%$i-tIClvGN>t?b79DutktgWY zybP&d3p+T!Z4R@RY-Y>$-RwOS%<(f93A+@H&0)c>P)VxX-jus~_T$+H>ryP;Ld7y? zV0DTNOflG}6<==ti@v22Hjv{yYkwWZA4|5dbN>;Joed=_)`7J^AQfuVr+xQ640^mZ z<$bTs>#4ClssGk$;w(jZY~ohFYoFx6HN)Ana5nSTZ)N+=ee6GSoHG}~h>VWIZuk;V zyaKfvwd2932hh7&DbjCe6VHXM(-|{%3UgQQ;PBZ<;(d!y;g&mSd*9;>e6l^2Q)@Eq zxJ2-#rOaQvinW`zvFqS5PF;v3+HR1qNNFlouT9Uv-}7RlgyybhBztC$<2;!{yC>&UZO4V!8uuV?} zJ==z|i7U`{guh?*!{ zZ7sVt@8QU)bA(&W_#tIdc3AXG0>m92*bb1t?a&1`XSErO&gE)1gvE?~0g29$3hP zKc+Ez=|=XSJWt4F6YubKlZ-TL)u(zX*FZ!ebmvFeCm@0?#5J#j9}{yW5} z@EE)bmZ5sRmUMZnKmG5mneb#QlgvmCu3X5H^;`IF>kfA9Kg!ATp+rX8F?mzVRF_hJt*Z2jg%hO7v~fra87 zKV-}+^^)A6rjVm!JLhR!J$`nwYu{mlPoCvs_+?_^!8;(3%C%e3{plAOcyBfGUp~t6 zsgs!U?=n_zJ;14O8`gm0RKC3#T^=9Apl-LOQcp&+HX2hEFLPKl}>szdw)~(>rt9KY+C~O`29t^Ipn+U^H+4xDT6YkgwT$jCr&E zwcFLVJQYe=Av_*O$az8>a3#UGO^kSVBB$(NDS9`b|JXlS;K7;2qEsj!fQ48bju0+{ zIH066fy>)AvnL+BZfQ=_WbaMHv32zZqD%|L>ei=n!tySy8$$25787FnQtQRv82MnC zJQ4uQTXj~VY{m>M6f7Ko1+Wv(?oC^X>eZSow=OY5S^M=so*uq})5&`93gOhH5KbN5 z&6eo8Jk_h-)lmoM^24t)2~V~@qquNvCrgj*WZBdy%=!8+etM{4wh9-u`wyOfaxgQF z#wOcjA;)*I?D#I0Po2cy@BhZH1M5&Ai)qImn#vnby~Tvxk;&d)i#d0Uuye=Qzx^PA z_x7V>r6l9_P2%OJ-edCqsATzxJ-&-&lXkIe@>qU)_;Y?6)}3nCT(!or>YL|zZB}Ys z!P>bTa)O;Jrm%D63?@wffXU;YCdc)Z8P2+iKQnP_^4_uH!_RPF<6I7GoX=lVUt!{; z*JzrRD@@xCetzvu#+^y+&A({EPVHmOjD4(~HHp7o`;EWeXqANS{(A2t(W zPqIei&T(MlJPvG}&!2xj&W{s6p-b81)nfdfQ9OG8m+X$eHrY!TjcO`wX%oXW-YFnlu z`^Vm8w*UAvJ%=pkY<$u=BRGFNg7e4svgJ%=dOvgZqCa-kNZy~kn@|UYuKb1%Cq2lV zXDhn=WN|Sfidq4_oQX`ox48J|#Qi$=K1X4-TCrO5e$&)Tk(Y9wNOqW^%$qom^A5o9 zqv^mGX`jOYRebOw#sw!Imbtpi&dH^}^YcF`dd-dxn&msT`N7M-8X~iH*6UvQ}noLkGQ~z zhzp$9zlEhU|G;3mHhS+9q3o#Q;qzj&5szFEt;qwPr<9+-y3yFz<5)y|X&iFHzYqUEs*p#T?nXguniLfNv*%P0vcnul94R-c8Tw z(PMb#nf|%B#Jpwf*l|4l^i!VBh6|WPuKk?nzS_!V)4*E32VZ{u2$kG&Y`Y?j^pBs| z@H6aP^*5ianaAudcTs553|5_rOMW#FdWzjE|6%v4Im}o)nwg(>phVLC2Eyj@`A73P z6lFrhd_MeQ4iEj-gCZV^8hO?3&fA|frOqSWX;I!E2xH>v9oPUPUt?ON`OZZ&tG@r9 zwUR^rJI2T?24Nqz83Jl#2xcDrg4GV3dT-ebcwys15CD36s-y_WIy^}RCB)y^zU zVlPKyH@VWuWFq^q34HeZ5$x6qJoH#w`~ma)zr6bR7i_r<0nI;P!l&(U!?Ubx$z$=- zE3z4Uuu`REbIMzHaM*6=#Gh~SLhZ49^Z0E9WO{|iutoM>;gD zMydP|b>SGhHmo7MWmA%NK%(S=O zqE)jA^eLCMQ<^<^BK;nGi+SfA7}kQ^(&0fKXnhMMydnwSIERTd|Kn`j8J2w5pTRX2 z@a3I_G93waoSnnVJzwUpV>V0-d`sL;`_8SXQ=u?EabcV|u$|T0%Fw#*)g#PtavIO| ze1qwy;xVyMpi(pLy{ie;i<(?Ku#P$Zt|R#JC3a7InI7@JEcou;w43EyOVY4wCz@0$ zK_P!Hh>PUZ-c`(5xPx{W7isv)AbUv6pJN;>1 z(;ILQ{QLiSV@5Es3Crp6(A_kuTAX}b;_Q(C`5t=^F&Je4gP?)Jk3f7+#dSBgqh#Y|G5;H#s@>Y&(Bo&`{?EzmdN(Z6EAG=Xw<> z>SH7P%n^2MSiy#pb*Y-Pux9f9!QJ`P$gp^0WN?pfSe;wSyz(D6N?37LFDq8BydQk< zn3xSLS?dh?8a|Eb(WP9DZ;}k}!c?u_m^*4$r*f&n_{Lq}z`FU&Sap=k>D66WsZ_fK zP48;T?YC5+T**S@_c4e$x1SZ${${~}NaD{eD}S}t+~4+g$`&nv zkHyaAi$~bJXg;fsUczx{7h|7)i#wK$<>6~SspC9f_xd!+`MO1CIybIH$@~Vf5$6fs zyN-nmcX8gH#ts~}i*Fx)jG^lza2VbcuX{H=yELI%Q3yS-hN*weV^?S#M`!$xzLjdQ z=-oTAe15PLtVWwo&8bx`kb-^&v09ZoY(5=J$flp@^RUU}Ss&0UNt7sC^-yMcRpsHK zuhB6@&t*p8Tgm)o&Sf?YdxG9yZY9bD!#j}Lo$jH1?K1dBpJdyjIV{+H5!>nIeEQgv zSf@?ql{#r`0mGZ}&F-U9{c;o!@WX1x5O#bgOXeXKvBhz?gP;KhZcsBVt)P?n~ znT@CY9}XSLvEXC$@82(L%gW~Razj@0wr0&5PMkc!`-2C2I_o(rcjz)cc=2cU#+ex2 zRp>KfXewVTNRY#0+hu7aGJL4i`XTOZP>#R=Uo7@0&g@volz(?{K0cD|Gyet*EcwdO zxMK&J-BN}^UY9w!Wj<5p?Ip|+&A#8?U}(qXeAh8a^QK(9uE`9eXgdbJnRFd)W%Z_L zOu(m3a~h@f1UGUa<0c)&j)718{yg6;-xWTc)M-M6gNu_91cse3qI>D{rUBuRHD`c z2h#hK2f6Q??ZiZ`X435AJdu@%y5lT=5B`t^=N%Y6)p=sfM26i{_G*5f8oC8)P&qRTFhiL$7-oK|gKi*p;^?-<1J^GLMB1JPy zlMmK^M)!xlV@tG+{lEXrycZjAf2vJ2EtGE1h3*ez)ZW9N&L2&s^EJc!@nXLz^tXm1hi+^+~0XT<;DElCZ=hm6TFIG>C7cKV-HQZuVaa;)8;sR1K&!` z>GnX|3|>!h#kS1Eev_kPFP{B#=CU`=1WPG;jhf33cc%;rzJqC+$%gs1dXB;Oujk!a z$MJ9QEbsJk-_tk4YQ?ac8BCM7C*@wt2aDzUX!+nN_Tatjy_E2BVWs-qQ9S2+hPrNeRdA>IC-NrLxe&@qZ zWs=t)adYWc<7xg$_2cJu4vu|?&o@NiFbdG%#b259P74CBl<_GB_5X)w?|X^qC*#;X z?mK1-7)P(btUW&Ry5%vx9GPzji-<{qub~l)Ad6-hF!qKR)vqgO{8mX75;D8Fm*dKWduL!CCg|p-i(BqfV#$ z>0TlG`(WJmk-YNlHlj?h6m8Di(AH$E9ILuA&tfeL@u7pzZV=} z_|l~JBixeayBX&=eJnDOk{wkOr5 zO=+?3pU97k!f{}wa-T2wH0jL`09HzLc!#&|E`$Y>xGnQocq*fh|0JY+^e?`faSjIt z-Zh_L^tZjQ4Lkr?t_E7{2PZLdUP!_`{rPcN^1uUtU)6{B`nx{PCn=6|%oz1IM>ChD z#Fa+jCNJ_jdnjfB?8n%#D=cfbNIsS5HMl=__!xkV zeXG`THraV3ie*3j!nRlwjQliv`zJn18FNL6l=*2y!%o+@*9Bt469ckAlchKuB>;nP~Qz4_*{~}L)PZS%;r3~ zSeIvcdLSY^f`J2{%ii)l$|PjrP+p&S9J^`YS9=iO|F22fuR-Vd*mhAG8SnhF)lUo&Ne3gzkBw-32-@aODNPQF`t z;-^Kd{VRwkTUH>y#Q;oVPHkk;X9KzOj#fPQ;SBagrO0&LCjMP?9tU78)s+F=T+=a! z5m1S$#hnLb?8S>jW_n%OIl5>eyKN>|%X3e+25Ceq18C5w23~*}zmFaJGjqe`;N-%2 zY_*wSc+>dFJ~YU%Uj+xp7A<6_^SePbdZG_^q|uBgf8#zpUe6maiQ6)tMW?eAeE{;) zq*Z-<48Re>;X@&AJil0}RIQTpiJBvn^PySIYX*F7r&W_eSO7EbFb9t%**-DrnY%O$ z2L`@PyU{g`mu|hPHQ%p<%6a;(;Hqtz(__y{xr*hm8bJI$CjGdOQ}LNzc&;$4URbQ&>Hk;~&w8fY z|Ce|Y@v>U4iyx1PjlCMc_VZ8DtF;?=*P>;@&EPe4S0?yWszP}yV48$n2qmjWqosT^ zT2yqdYx|Ld97($PxJ}Gid_Li`+MOBrXw@t?nY3+Nz=D%@z>C`Vb)`nC30Nq3#~oC# z0%i<*b{)obhq8sz_rJhn)tv#J*p2g9bnfbzE`H|%796(&1_AB+CQNrNqgY3td9aeR z9*SI@BtVkK>!H|<%vlnWaLv_)XS$V6{hG^Cj)(f)?FmjfQpoF#t!zQI1?lr6%{UO@oByNRIO-IhC~o)Pv&Kyk-+ICg5F<7LC#fjKuF{ z(9``tC{}SB*ssg!^TrFn9;#3{_Z(8EZxB9i9@-ol=F0FKemINp$@>nr*0GK zXX*VII~TXl=8uW9ShRLK`-0CA9t-|O%2K69AWhXB=g(cnB%^*GZ0z2)$Jx1-S4A2(ER~5si;X=y_Bl`5)`~P~l+HZX@-%H! z7VD;?*yH!IeV?7I_GXo@Oj(Lq;H2FoEG!%cCA0rE4}j=kmj3l8f6rUVrd@|Q84^Xj zR{$j|R->xVap!tAmiMzO^6Q&7MeSMIN*06{?qbCBxM0 z7tV3PPKqYHnP3#*o@XAV+LEyxh&#jVS3A>c?&A!6>Jc7jQ=P&Ytz0vL#WR)>Zi11& zIrn!AOumohH2th_`?k#Kv1g&ugU@i^sQ>uqw2ec5_M_FdE<82h33_*FN~Hpx?{C=I zvLS`#bKec0}u9836gc&(-l)A{_HH@7#rgMg8T2{ohHz4I{f-Rj|$$Lk^cv7PJ+Pq^l) z*EG$C836ejHldF9WERCbxVUQLDX8!Y%qegzu zYp=gSctph2d9$>vtSv8BWixMu3KnF<@L_qgOlg)if~B7hX3|MJ82;3MX&5ipP4C@Z zR~*}}NF%enC{dy)7A|2o!w3mCp-hTNj3Sf>^u{0#(|(DF%O=jIMtNNPKGv?k;0!8M zX-bpQsaGmyNH5B{~~}mHZEPA5M;QGR?Uk!uah0e*|IGmSRPo9`sLHE zydqaEWpkVfh98YuHzw2l?Z+Qv^iudjBPqA(2ui(y~} zZ{+VG-Pswjkc}g<@lggBNH1VGY}n@vdj5qgzo#EubnQOo!ZV_UfGVwc{Oys<-?*On zBOarE5$6-+$eq71;JIHpl<+jgaq*n92XPDvmMot64Q^gw$%psTxYIy}{QNJgcOB=# zrC4mXSR&3HX6vdA9Ei-u6Ie5n3n8&tzF>=s%S1TKjO5(;7-!m+;*`$Jd&6cV=Pz8H zr%Z|T=CM+|bn%2I&{3Q}pOyByPDZ}`16$MzZMm({13drDJocy4EXNF6!t~{lm>A@1(S=T_y!V`w^fN10 zwxtV>J)?LRzMl9g-K*!vFztlwoWbxH9;9KNR`hu5XO@@p5Cz~Wb44X~d%$!YghT%=cb0hke+`MPIoT0ZeUBd4xl>ydLrMB9ju zi{gCnF4nHz!^NC5Bs2WU?>q@$#>Wtwq2GAWbHGU&J+l}nDy2eu`Z8J zTPRhwM8df;obwkG+lx5}x!`OSfMKO%slaqEjJ!&eBG8!#7tdY1Qm(vQ4>>MgNZ3CN z3T1H3WfUk$sf3#u$Hntp$Z|89eB|>>*q35sh;!4uZQ}iQW`Bkc8%Bll6(EcACElB7 zb)MY%$!gv(p+flz3?DWu+spHm$koI6-}qz1n+AUMUf`=2YLkAqVXio~T~j|~j)9-Q zZ^C)NMw~6R6>h=LFJT&N*xYr1xtMuz18eu$0Skp1H|MrAf(rH{`#2P50#?dZtDJUS z6}N#|b5A(~)Rpg~V~vDsh4@44JLEipc~!1Xp$!zGNnEuP?Lt z)5myy-UUpb8Vnfs6OWcT#K^~<;HO>D6mI%5leb?aCN7Na-`qtpBaXe}eqe4yrV@9< zickN59{Jt3jQsA~TtE2T^VrL$1dW~;#_akPd8pf`tch?CvF1m{Y19$lrIZKs~5?{&YCoql-c+2lW*h}Edi{|+;QogHhIXxN>; z54Gm@a>WSnHMxB81ly;5&+xgyE^JPlqA1ohN^+$aU<-vhzRRzJGLA+Yer2eg{VrLl z^1$aTYV!oMe*c*v)n57fO()Gii8P)eT zJn;sk#mgJRntq*hF}yhooc=YyW&mkN4Bv&%%n~hJ3!2ewCk>L z77iZ3N6XG&$H2FIOL{+XKh0~DqG$nc?3XTZbp0fT|FrT-9~D_=O}l?3hvQ0+F}q(! zxw20i-ZXvr7v61?(IW=~?-I9UtzU^uu%vMUPP)Exa`@%tdMMSxW`B}&Sh!NHxV0|J zdNZ-X@4dsgUw+L-3G-B^2RX`HT9$M~Z)Et}zi}wu#PGe17e0TEI+^IO z+!e>RtLlf$Fp^ZnX)VjJTn8;3Qi$2KI-!+;5A~ZjOf#rpKY5%}b^r?{N|#78&=a+M z0#kyWZR4u-m;V^MOx2$%?q;`KgZdxo^Rs^d7w(Bz8*lx!vFEu zM;o|Aej2|so?)Gf5;6T{K3*PyrR<}Op8h2dCVZOi_P%{+`_6p+iQ3EVU^~>#{p+o~ zdX&4h&27Hg&+y(O|wp;pff~0~udr*pD*hx5b1y z!0@ZfYyV8)odzj7b!t&jlQ+O6LY6MWMa6&UCMcNP;#JME%{eG<|> z&zTDjD3ifXZWJW2m|wy?=Qw@dk*Z6M1q&AP%!2bZqEEFzU#CZ{E zLX3ZTnzg8rvse4F#leNdCy|x{6f2r|=VGB~V9|v9**SOSJPy+C8=8@4ICCi>R9`gE*^y};u2UuX|6c7U0grh zl;YXFW&>dkz^ew$niNlSPqJN#boL8@fuFx$>eD$+@%ML=IAI6Fw*h^gsDI^V){Klu z=^q9RaRz~R4dah@8j;Vu!1&*0 zaL(aPy=ULyfuz^-z6A*I1!Ci|#e1mka8;dUTn{ZDk|bZDQdz77W5*;SJOXFe2Q{gl z&tQAFN!+$gY>n+s>x}9V`(gGTipKh<99`UPBA4SG}kmdbds6Px`Et5zPs-n5pB zE{?pZersjCrtQFH4`$=0b2#c`)WN0~x87bIuPK|c*^jY)!)YA#%BMb${W$A3IGY|B zUNxy*!?l-mhOaL^2E>{sG0`!Y1f;Rv?VR3!kZ@EFXM?~#0Q9RzID!KV|ZQa_n1O()6Fea-C6)8lG>eW26 za7$_KxxWcPYgZC&MzM6%&uo6+9UA8HH@xGI?B`&t33ySZYj4^V%iS9QuX0tXA|C|@Mh99@wo?hphIj@KOe0>s{V8;+0o&9>j zTH!Wo1z6Y`;vjbGIyT4jpnV$q@#W3y*%5C77K+rmHTSoZNDB7g@x1r_HeyV$mhZ{O zuQtkL>mgSh+wQ6#ZX%AetY3EsJ76i=nC7>ny;cmvaK^Yylek#tYv3tYb>#9-`FKtk zCKdwke}%p^lUy+-{c2!bT&k}Io9CwT!H9Lv{i)JpywWQZA3ccM!*@>(V#-Me)(VgE zGvwY1ZOs{Hm>{v6|wX1pI7{{Yb3~N~`l<~}ip1c+=P2}d|B9;UDjwWn; zgF;0TI%o86OowI#m^v>K`>7d>p7A0rduG%Tqml3wz{HG;&Bl9#NnUxd+fzKPwNkZB zYbp-g$-($|_WU`9Mg6{{vr9gZYo%(NHdOjz2M6NgSo`a5YHB=*@+G=zEVhh@)2pb6MW=2c8aI5(>S+}^Fk?} zxHvoDqShm>kgpMUHqFnp1($GyFXYER4s!4F)pI|GK9%S`up7f>kK|yyos-8-;UIC} zH@UEI7AvAmFbdGR>z!ozJ|msvW=1xt|ynY$tN@uZ%x%A1|izNe=VkLPk$!BpqXikH4Di!ad#VBMzjA`e@(G%E7{@~TTxPZ3?{=^}>vqSc` zhV)4GH8@M*ViZm2@HYNLFemJg?3t|n1e0ETg}=`@z!FH8A@9&G!=4~7v35vr-d-At z>3cgb|2C5D2_0sd;U_o|?f^?UDwR(bdf>vMd2EaWullWN;l@`s)8(pq!Nv7UwlATg zR(#8=!~S9Y$>=nyqi7D#`ha&QolbbtP=S`s%R8e&Mlrhf>+bwCm?MO_?>@uOC8tyG z+|S`~Hj}hgqFTbIz~Xn#WYV^%tYz5nr$Bx$XFx4(Kf4Yl@65`(UT>ai= z;n_E)aU{j}6r&GL=A{>Y%$X6iW)f0qyfdwQdmTBrh?#sZo zF7BV*rS)!Z_2_8k?(*_--aK&bh}k%jk7u68fq~y0&oTIc%zQB}2^SpOE=eO-T;{)# zk5aorIm*<3l<&4&&gJE&&bDJU8zP*o_ikz4g!CHpdey4OZ9WOtj`L>lUy_S-``IN7 z?)C^j?z3T93eoC=kqk=NiX#8*)UBRyueg0CvkoPxA>)oR{e>>{pL`NK7D{&cg0Fg) z&0v2rIY0Yl9{*x1(G;Y`knw!n_Ucy-4KFX}gEcdp(6A&mYSenZ95Ihb^3kHtqui4B z-*C-U*=zY=hO_XGQT#SNj9*9Or}VA$xuaHP$_57DXT)>q;z{=Xx0VeDLYyrW3@fF( zzR9!ou0B1@-}+64Kem#`em_iH*hap7xFu5?ci_&()hS!hoA{`UoH($Jb-{P=-GUEj z>RpMhJ?>!G*7aPD+rgLJJFvOi{j{r7o?`jDoHzF2+gTr)R^{kTtvWUFnz;p=Z8zWa z=}SbPp7eaAD|L$)V6DPapTEJs_YY-tn4NP=KjEH66KLALH4UnkqOdkRHDzP|Ks2Lf>;r1XM=arIgW0ul1HS#{ zGQM{phEam=bP4a zXkLr*f%)*ZMRD%*;?s)5ltgyYIe@&$etOHf|>) z9_qrL9-XLPCLi%($Jn-FF~{5e!s2)85NrSs0%RipTD=3$~75Vdg>pGRxu4N%!AQ#2ZAinK6$mWgg*_&M3n!1TYwLCng~VYg|1^!d z*P(?AdQ3Emb>N{p3$x_kQ0zy>)4TmX?th>I4XOlEz}o_5JeLlv;H2C73^6F#{cYZu zxQ4;2BZv>4#(?%a_`B!*w5<_{CGsS@*Dd1T<$JkkOKa!#q5g}X@!XSLN4tjADCHl| z`2!o6zhFJbqMU)%f=ypx=p!jFIIUE9@>AZO+l}{Dh7q;>XC7&_l(u)@$sH8~u$|w_ zihmcd<*f7Roo~%PjCiwIlBVC;tA{e;*Zf{O6`RUm=N+7z^(q~^ub_MDsuZzAac2K! z7OyJAn=8lgU?#dn@vZkfUkzG9x39NzIbc?9~_J~!SJR`;6IXn|H zS}5GG7tgdWizUa$Hsrj1$Q@?r9EN|fj1w^?oLI(ZU(TWT?+=pMw|Cvc!O3;&IA#Yd z#c9&4db+RGTFP|cp_cz+>7q--@BWepI-a9%ml}AV-o(s5XR!4`Jf`7C%|7Fp@LCKpsdw1Q}-hPzT|4d}w-f--uK|q7om@uvnl`{wm*pL23 z|9-!4DBhsteP8qY>vfYaGvHQktK`M({X3XEb{S87(wcn7m+;+-PcwXn4ewen^YPP_ zJ$7yBsuod3p7`2@{UoPD{qQy*)*i*FovS#tb5$BSESoTY7!Q@58%N${l;Rr4+qBc%6T?3}HoxgG&dO@XOarl1$)5cGP^&?v%Y@FXVn;*B$c0;DJ$q3}`FaBbT|FgXE%T_|{ah%(^h(C8O zx@J2wu4okTW^AU>2*RoloZ4 z+}DmY-#o}x`T$mH_WO`QvmfEBZBf`mcQNbdUCg>V7rq5HbIwtRGCAFrS=n#M!Lesv zV*A#E#Dqk%c+y@Lr@Gy}Dbw+NzU*7&>PgQqlAR3lO7GE*gm)d?$utbSu7i#y^9-W^ zcf2s31(m;R{QhWS zPi|oH_Zyg;o9n>42G5TDg|I&T7_mMSN8|w(j2+H`tP~@l34{Oo1@C8H=Id2wu*HY6 zZ`oA#ElWA27v2Ys5zlQ|zCbYy100bYSv!*>Yg3N&qIlEi`FY#`YNZww@~O$Qzl_H= zZ~&hyI!SE!K^9Lsn7mFgEEK8#G~bVXpSFdQcB-4rdMKmv6zs@5AKcA~m*;RgK8E9~ zC-KedBokOlv2MGK2U}$(N|2vMZ~eh|a{vRrU(b2lMRv{`&(3+tmWX%B7QFV;kGxvX z4X?$xvfj|;E8pSTcS3wc>N{kVoe)|=Z)g?d6}8fN`(O}dAfZppELz>smpsuo1pwAcYGNJvutO~Pndi5`SlBCWv zyi3vMwQ)@N=th|e*6MyH=IV;$->nNu~FJt%eC!knqs@AMcqgI{hmE}*S7!7 zGGW#VHtq@LY=jL zDbB>(FMy)us!^wLTe?5~B;D@_ym|nd=a@Hh9nmHj1zXXreLA1W%uf1=3N+%qX|t&H z-FJ-rcMUs_Uf{CnL%|Xix%G|~-1T6Kgr4nlzAZDq-#C|-tz*ydr{$o}c&F4n7O&gR z!QJNmd{DwtS8cdTip|8OO-i=vm(FRl%dJ9`gD7 zQ>btuaF}3PaNnmdEZAb>C|IZ<`TTSDo%`#XFO|CtW@d-Jte!rZf99=V{q}MPEJ9u>B7=nbSfG?$?p8achM1Svym>A zUH;->pdUTjw%L(t1Rgx1q_G{i(uo-qz&jZ>qF1fEg59&~Mma2{0JH?g`_8Tudk3dt zhkJX3zq%oAuE?y+2v?}1k8>zKhm-YLi;_DFqiOD?uL|{1^=F@+QD7=EpJ?~WdUSAt z-Rk_zR4pjV#LHzbgW688KI@D(ecp@YmbD*w87r@mnlVNok|odyY9GiCs0VZo!_X`3(vn5Nbi< zh`8OZVU}?s4duPiCG5Xbk1-ZSCjH0-YtmeJFJ=Xh_c1l%wA4o>24GPdnT%Y`s?&bz zPB6wZ&R><+(i&;6OYOg;N?CZ#TntkUBR1R;ZyjFzzzd)HO;Af{u1nKbya)Us9Lej4 z$*hNd8u$YJQb~QvwQS@HamI#nL5@|ow_5*ui7+Yn%*tKeJKcuQ^23;iksuY9R5qc4jsY2e>| z2-jmvDJ5R$$UD~92*o$7@2=>Is1w$k49{#m^=73%vcf=?F=vSQ(v)Do(cb5KWN1dT zE}b{Kl0}*>vrLk}h7a9$qC@tP${R0U^NZEgO6QNzr?@=30k!NCC@*@RX|UWk|D>}= znV`nP#}pVvMJ{U6&87Uz#Q5D74OGpwjEvKC4NAn~ z$?LM}VJ)H%*hduj{#>#64AEM~DMNOrkZ1aG;%>m!-_O!+qNc;f&L5DHm;TsUt_cl^ zIXLN;uGgK}*}N_9jgewBjM~J2&}^3>qQ|F=TFTCw}l&Tp61kj^ei z#gphd!?Q8n2G~&EV8_7B>m!je(!C&~ENz>ZkhSDpt^8~oMT##7AU~g=B-@Nh(F4?t zwURDfSf&xX1mn(2_MbP{wp`WNv!ixzjbpl9fHT zL4K#e>u7ImZqx3C5K}9wzH!uJX%_mT6=-6^^i4Gts;6LwP4Xo{GvDM%C?B$kJbfJm zhw491B7MwtyS8gWHRQyLTPFJWl@j;=>9T=J+_Gf|^lM6Lm2*8@sYYwn< z{C3oB?M+)RQ_=CO%o7jFJocc>Q}j!ArxHB+T2ZR}rLUsT&wbUt6Y)wVnP$YRYisMJ zp#k>_n2u32oVtxl@M#!5HHUV z92`r)fs=?XpsS+^7-#~z&VUgd(1QcV)d2tw0Hr+?)$alV@m_x3k^ddQ&QK(S0ScQt rIN`lR!aWH%5uYG@AP&rkB;asC!M^wq;nqe|aFK|uHNvXW!Xy4K^|;*_ literal 0 HcmV?d00001 diff --git a/doc/doxygen/chapters/images/eclipse_install_cdt.png b/doc/doxygen/chapters/images/eclipse_install_cdt.png new file mode 100644 index 0000000000000000000000000000000000000000..efe24ef5c8339fc9942c531a2e880d5d7cab1933 GIT binary patch literal 109713 zcmY(r1z1&E7cRUIq`RcM8|hY3N|2Be5D<{=Zb7;`HYtsCcSxt?raPp&>tCMpo%`Rr zpXGyl@5P#Pj(EpALPM0_%b+0>Ap-z_Ci_lO1pr`g!9V7RFThW+dwWMIbH)ZDo^9l!Lkdbx z)2d49BA04T>O4h17ZHf=XNXrsm5p(g^rsIGF+OU1ocRNTTlh8zegA1;fnBgzLiXEd z#BUUTFZ)8Riq8gb-@bK_Kjr$S_VLK+Qz0YQ_7mkG>=-Ng#VA4vRgC5Y0|RaAJ=xPr=CY$WDIA^m}o!c)+9W`cB^ovmgouz*O>Ce@)tR*cq9cB1zO=Wy*Ao@XH;Hs!^| zdbQ?G<$A3erP^~vs(tJ0Q~(1j>q5(I0hx$85z1d9$Af2@&Qa& zW1rLpHV2~A)YS*d!M$$uW#UE5G)s_;iesrb9WS$riRs62(9`eC=-3Dxx16FS{CkSZ zTm$FhS}7JMBK{V?FPSkc`Rm#0>_(O%AF7AUjRml9RBcVH|t?; zFJInz-CrdbI-rNWVlOHx+MOyg0yVp|KCrOlrd4fn*c*{nTvFoT;DCVoM_&zB-?Erd zF8MOoO<^!sHm=EW_XPmzhGaBcaxbl387^BHo>EOtQ%&vSb|wa zEo4gu6D~xWI^T>AvBCm+q?pF5?|B-qzK#tvtbPky_H>YDF{!h3OJ010`+?P~X}AWH zB^3slF44H$ovhEwI#~~+A5K0M1Qe2OwY~onrD?UZ_PL?F@1-B*7QBOOH7;~etWRL+1D?4+8}2S5~u7X$lLToI1<{5 z$AS60H`Olk{2-Rws?%~qJdH2)V?52~$%+A2yUo*%o2d16dxbD?ywWI$+-DI0WXP9x zFQNLyg@xeOqsDUXEKW)E%kYK#2jia5iLIjOc(@$qw~>bO`P(!wq-8%loUeHYIIrfhek)|>ur4a zNxZM-7$pKx&(zfTNfc)UCJUj@1>p6aCnqI3r=ptJ;8&Sr?p4Uq$ z&1}NoZ@Q z{(|m6{68g=m#%03bYK&9d+134mDWoYL$eD5p#8RGpvX_P)gyn4IZ&)#^89rDDoS8G z8AJ4DwZjjVKHYJ`>snFtv>OLnrpx8Dx5D-MwVwNK-g2u4yLJr&UwvUqi^4JktE{wi zq-Lo>>NcLnC%0ZgBkPvae5o+}*5{{##l@e4gM-92EvHutHswV{dTpLk($Zlz?T>?8 zJxxuiXJ^P*rgnDY6$TQ(=YiO*_&|-HnuMtEN$mP_jSf3!XQ6Aqf_8RZjbw^lZzbt7 zv#=x!oVV`n?SUqLJV2i*dO4-4Q_~OctI0A`G&%0F;^o2*!AHyu`I_mv9s(+ne=9yN z)9Xq=Zfzq<%q|SHh5eMIA-3id&~HdN*Ih?%59X?E8n+Tbr5fFw7=gsh;GZQIQ|rYB zPTav@Y3htKqho)H;H)-L!_>CEcV+s|k9kRYTAx0tXgBFJ+^_$7`RWt5QLn}AEt+2E zGGaja7y+?}dwk1H&nx|JJmr}WsKfZI_Ve{(=Dp!iw)ThOZ{Ng$*=CoqaKdt@y(va9 zF{Lod4gq?53~LHB!&>r@yS?V=W|*w9NGbD?TzGB*n%xu_aY|g|MY|Paw0>sLyttV^ zdLjUMd3hUsQKrM;l2p^v(?ZViZ2C{60L1fb05ssDB1UqKEK?el5z;#8X9dP|4g+y+ zZI8QMdQjV6M^btCmK?npaNR*~)AM<}98DKWcbaJc;o&B);mbfy^HGzAbK1`^ia(%gb8u0x&})GraH0!H@vqJ#_0eoSsH-zdAU=A7f{ahZlXYy<)9j z)Yj9Zyr{5RsI%qwx;?|QmO?sgJfE$zs+?LV$b8xqbALC!Apt`(@)opFo`!lRI ztvnlb3r}!BdE4Cts41d|UIHT|YfJF}cI#A59)Yniq{5PtyeUMuZ|fEmn7LEny9nt^ zoD*0G7hvK1-L#gH$N2G{d?{bT6j+4-*D0xE*>2NbO)INXvl%rF4cIsQBu9&l=RdWM z*)>mAz!Q4itOZ>!Iz0Qrz&@%REVu3}@-1Pa2Et@nEkwqjQA)s%KyEv0%*N_1pjQXg zV;P)Ay%2#+->rQaNny|`#=mxR8&Qp4TsX?10f^tl1q1}}@bVz%nk>kG-31hr;Cu~!1=@|~ z5u`rM-cuk_y=&2HS1owYWzqrmU!+$;)xG!g(9ZPfHLsv{!P zyw{fP^*hWXTxZpG;-TA-ja){}^D|He{?&Ah-y7= zL(AKrij^`&Mcf^A8|}A1@Fk$8mL}OxF$AHu$!VVmReRrRZ{GrZ>T8E>1bxuoUcQ2? z6eu0r^0&C2u7f;P)a!0)<|8V(C^swX189z5%$9b$T~1)u*xA{^AmZ-l-nX=C*!!d+ zSD=*X{dBvtV>=6hE!-Z;PS?e<_hoFZKE zf_uxNUCZRQ9uh?EeG~G0-<}3Tfr*GpD#(wCSZ}HIuo)L6eDQl@OoR^)0GZ(PNzel1 zsj%%64KA{=sp*PcUn}GsB%~+9Tw@PsLu2}`YsQtfiyYx$>}`dhHm9fWMO1&5m&0%< za(Oe{uXe&BQ9{l^J61(*^|*dG_DMn~7lo)0g-L1GTi^cq@i)*Kfv(C9IPOiIf;@O+ zV1SLe#1Y9XL|aAW;o;n;^=@zfxYb34gMfjN(J`#vX2p7}B|g#Gl-3_9&CCBHUR&EBH64g}F}bgAFuseY@mj(8+VU^{9(XZ!5__HtDC=FP&M_YM;aD z^*^cV5Wn=TQ4R~Q5ts9yzd+wG@!EW&xJ1^gDBizogs|ed&dM5_ILH!m+Q z0|NsF+)(NLlH#k7{TRfO(o%LtMo!cpIfi_|=5`(2>izXGRY34qvx^O(;KhwH!^PwE z{`K*S1wl3h0=c^b0}t{q#F=SMEC+Y@riNq+DKzn~*49XXnnscgRqfo|OTRY^M5QIg z^X#`Iy#iEp zh?&`}2@wgv5Vd9tZ)74RRaB6}zr(ma9<{ib7{DKX_w*=~)~Xz!mXP3KSXme!?J$~5 z7$&v1$FxQG*87Ya@mtHydWndKGgKiU4NMFkPkzAz{M*T+ZfBB_(w!o!0nFbmqkOzFb?#csXl?L?Vwb8v0di-K~DH-mfu z+s}EtKYnd033{H*Y1?rU^bfydMudO;f*TG9?=v*9vhs4%^?Hc_4;%_53g2{>FVqQ< zAmC9Vp0y)V#$$F7Ox79AL%ovdrKzxbN*ARNVJP+l?d6IZa!O~fLsqP^~KT}ghcfok=8yaVvl+iRo@je5ZGBzeDu<@bS5C8&dtTZls>NET! zMZZxXtwP+aGcKChklP;@r? z`zt7aFs*<1I$dlGZo=JN+X2NMabRFzbYOgJ;j{(u$ju7`|362xet2U^L6P28A5McZ ztqU2-uM`*wIPTE*JeL$ro%fIIvWUM?yi>(OH?m#GlyO+iJ*u- zA2g@dIZz`0SGceyFk11frXWDw-rVG_XAwj3^zz!@NmXgGz)G$hta(rDOl*9=PaUHf zhM#gMRA)IYD7M>%@K4nO6HWl4rltnw6vYPzmY079^jTy*y--FeKB8~~(l@`u0{*u~ zL_tf7=i$5)EiLT`>$~%~)bgnRee)~?GM(=zulVY5M=-~vH0g*q{8q2>-LQ(p9c{D z?nN7Ww{E5V*4?qsjoA?XDLZyV3-3mRGQ(7{u{@A`He7w?pqia`GnvQ=mK!?Xyg{qB zy1xn{^Et~Mr{^w3W&T+9@DxP8P-`WRh^5h{+W?j>SgEOhU{#VLZ;=QS^ID}|Pd{ui ze@skB0MVO|&7$a}A4jR~>v&E>;U9{Pm`j7P<<>P@e(F|(<7VqJm1x7H!#Nz}ms}Vr z8S1>&^DRybb+NIAr%v~g=|hR-Ul;Ou@^AsFnEnTwHdZDU)bHQW)Whc2(J{cC{46OM z8W~B@z-Q6MMg9)P4xGiyb4$m+!kqX77aN#-)fFb>a%4GOoNyy(I9t)wbf-CttDodk zU$wy5$6czR9SZ#Z;Zc84!_A{$tUg_jf*(3oEDZhRvyt~o-*Ez|k56#dct~UMv|ynx zLO}NunDP$b*t8$s#Psn{@v2t@+D8+fZfrpd+Cdq9pqBn(Z0!AbSkK<_Fc_YsZ#hpx zBcI4hF6esF*~fMG{gu4DJSaFFE%nPs46%p1y^`{Vi`Xd}8%IVGPhD(74GoR*`=|RM zw#J)hT@S^L9J%z`{Tao_dofYZW75;hOTJ~#XZ`A@tKms6_v;rRFlj8?9eZVtWV}Sv z_(vv_daji=$IH-Xrm~uQIO{v|FdMweyvGa9Fwt`Yn=lzWroks36@F42X`;L=n>0 zM+-@rNw1K<$4ciR(=o6z4_S?iH$%DJkA;)uTJP`eJzqs7k@et)R6ap&68+Vn9WamA2TRSy`*lC& zYhU{24QZf~3x3%R#p{CVH+f1$Sug!s<8UqN;W8V;r?)M+JBgDE3+^#A_PD~u$A$N2 zOcDC)Faoh(6?R+>(fD*-mm9(3#yWdJk0zSwmfDSnW^DN(adBSHOD?ry#k#79W|hv5 zcVbUlY;s6D9MUNZac9DJo2qHN&li8<3o^W|=cC`k9zUFu6UPh=&__Ob9wl*!-9UR@ zK?)wZw^B@>A5ZDs8XLh4M2dP(>07r)sNBsobBSf%Z=H_`({}yTYChs?fBy9|#ipR3 zpd?hg{^?$9$?X+oyo2){V?oRP6GG;rN1{IDD4aXQx}>;-z(`9=>rAfhagNkNW;-Vf>Un~U=? z3~60fc{%M`%SOlBGf-z>Dfjv5?6H2)@mJb$i^sHa(uV`g~tigmT=^`sj8u!Wji z`ugJM^PWWw)0Wf0l}rTRzRah6G1HNZ^ResKuV0HkT~xa)->-fz8Y2mRv6MSOMfb^R zq=HhSE%}-@FSn|AZ&~PxN~ZBIIGmwS((3!no#9RDReW@Z_*}V8atjz8eZGG0T_ICn zHkpwj&ggYp3q4MMSPh%Zuz+X|Y5a6u%hO=N^7kuYYmRsDewK*Ju)8yZKJTT@&(EiM z6K5vGpFOO()O*&OPvhaR32f!!5GSz#%HlRRm$lbo4}Y|b`o20OZuEvX-UjZY{VFUj zYdcNU`D+Oqy%6s`S^7t(y>olI%U@C3;eytZAbnyYFghrcz0UO7ZYW`E|Jl~|SB`v# zNJeA)ON3_FOZfP>sAT-Bbz+Lbx836)BoGO@DqDx)hiqKqDt)nf;zQAUy03Wv3Dy_B z<+EmxhKkfIr5|l``8}JyzUSay3~@ zQnyo`EPT91)L#n};g97$YLgf}jNa2Vs*7alw+kJosp;uuq@^`~J&j$@$uVYbBj)s+ z&7qaQzusTqpDuC+0CNkAtsOP(0JBtmug4MjF|nkywEL%a6Giuvwd-X_OyYJkAJX5r z7%8kG7}Uo-5{$M%rY8zmu2eeoxU8guBCZ07;UOET*FzzX1vEiJjr;x2cih@q>a$cnT3I?ntw4Jm#;Pe%Z z2Q+!DuLYGoU)DA|AA4I4PMSizGHi3PyEEUkoR5-aW+ov3LN14&zyNMz)AZqIOh4$( z0|Rwk5H3AbdTDYo`^jxZ$hmYsDo`z!>hKrK9B%$8yTpd(SPl83koNAPkgDnuZJC)@&$|o*&&md+t@rRAE~)eWz1_ zf@YA&rVj&vQSEr8UdhBndV$w!rtH~!YmW!Wbr~aLq&tMJ*I6&@745aOEJl(FKDq3I zg1!2vuAc64r4^RM0^>_7pM*}vpM^yTQ$SkyvpCi$cS@&Qz%&A<*7%{pF8th>`6Ksx z?7xV7OHP~T_V^YSfkwBy-2uub{wFtxEJnlq>W`kfI<09Wad-4v{~Fw(tAmk&(HIAN z*0@p+(RbD@N6_}?$?o$p0Uf;u2Ek+RH`58f|>soIfTm0?L}o(cJG7=%%bV2>5q?a*YY>EA-wVREv6UhR{cm z^48?n&2T;83kV=}Gcz&al7p6`s3d>>iHnsMHR#v)*Kp36KgNGdBS_;?V&>`;(}ss{ ze&I7pn9XHeTwEMj4$0WeRnZVdd{k2_RJkqU^aG_%`zpg{dMha*;n3}5yl*+UAk6UZn9cf_PW2nI-36d36WdqWY$i8=BSLXSJMvzGU=vUYYoez5A( z(}pH43z4L<8){tk436@Q0xl0Jsu^z1`*l`J%}(dxKHy)x*I|5zzu0~FV5O_;CMC7e z>s0i+D)-A9v8bMXmB5^?Zy@Rp7Bj_sU&=EE5$(GwnHbOOUXd-3BjBP2R#jJl*HP;^ z`zu}VtsAaB^gmE`5_Bk3M$-4}Oj`6t?hUw^w3*VU^PZNfDv>_NT~y%l?!pbES9|;W z2v^6M?!w-zY<|TEXapzSlVjZq4*eZ4GA?@!HqUKJPv>sO86uDH1QB$X_>}1$qi%D{ zBKFDuWESn1sRz2+hx2v2(?9O@Gt=-RdI4X~Hp8t!=|21j=_%9ee6u>Sy}~P*T93z( zdLXN;>}kZs#C=&aI7X1caceMk)w{D-V?|z zw&sMYcRWeLq@s2wdf1=f){AN^7V=G_j>Z(cMDfF5d>N{wgIzdCrjH8WlM1z5?Ck@X z5xshDUU#R|GAernjN>6k@pi*k^APEC#_v$|;`5OBlirvxGa4m8x9V#bi$K=t6c7ZRu3W`hbZs z(dYHuo&2q>Eotfx=D)?B`<79E|2GS;RtGX`SYOkJkZO{s#OV8@mW>Pqx+sDBmTwOa zXJ==o8J-WABFm@#D)z^uK8+j8_n`X2K#%_+$jR5zU@6Q0WqZd=`rTQLT7WchKX^m%1Wo4Y&q;w zBs<=t)c&P>GpzB}9J7Ez#uWn!Yqx+R-Hq{b>NU{Ze5+p%CT&h1l3h{Yw zvl=%0H_6=eA+54mK?mNU>~O1xPfzoBTph8pFepRS`S8ZSI^IW;f@m`0RD9j`cn>xb zL9zZAA><3@TlT&lx;-xt70JuC;|;Oi-`{shO3cgqj)H>R-D0zTqx!0Ry~8gw6t+}5 z(X}mg^`xS?hMNSvq{NP`b#H#ZtX12KI5m?9Bkf`QtL09aF1U)=GnWB&4S#(xhFFMB zw^Wk8+lEs+8Sj^qQQc-aIk}qaQ%cZ^`q};ReHs-^OnS3V&R%wYu8;teTZS{W{;?q( zgm&)u=i&5vqrOPqQk~}-<9*ORnsy>XB7&=B9f{F{dQP(#do!C{S?%q$H@WAsD(wo{ zO9XAR&Z^nBt(V$b_c?W^IK0h;Ezc1AVcMx~8e_70Xua5H4JZ=uvCxCkpkMzc|L?fI zvBwH4kStaHns?doi9gpx|fA}YzBl%k8;QDX64~9!Ezc#RF^UpUVDOvxg)*wSSv%Hwz?95Y6 z^4~9VvnH0eJ0CV}C%^*Qc&ksJ{|Nw?aAvR9O|2tIh3W{ml0`gt0aOCsDneua*P2bM zC0*FZF4y4H!rukj8Ax{ap)+cHVVJvwDI>9L|1I0Oy8P*X&-#CB5iI&E9O3V1#1RD^ zurbAR#tRJyg2c@JyY(9q4F)(n6NrTvC?0@0>c)-DIYIzdD>^?jfaSBwuelkV@Ia?B z1191NU$fqv7{nb2CN}DKRB8aI)Z``#At_$`_uLeShClA^7P%x9ou=of?_FrR;6 zSa@sJSN*hf@%}I%TL4K&pIqnIME+lGCAy(}tX0>G12r`+eX=-L{k=2zdarAR2Eh4u zF5kBS=k8ajBA%6hj@;i}q@0f(y;aU}inw6A(p_}ufdw=pU-#3 z+W+K)4Gs-GPCSCO8F+LO*LT&4$m>oBgoK3ErepZ9QDDpDuXp6&bw0> z>?dSE!G?S9FP4x^)3fDI81+tL857`q$A!REH>4)|Eh1%wt%sDJGd_U(#nf9vJ0%UD zm*LRufN>vMzFce3?0yW6Cb%3e-acNhgq5Ffj}UX247a$QZ$c?00&@!r!pgxuGWZJF zDk&-jrV7qFYTG`WUP0 zInOK)?zPbhQ2Ga=ng>n@UXr!Aj`NvjIQg~IgUgW$IwpfPHqJhhYLDxqiHV6mav#sZ zct#xh_i(;{#YHAU1k_p1y*B6yxZH<=KiciE9NKAX+{m_AaMMcSlK*b&l3m9sfgVMndkdO*W^C^I|;nXt{W z@GlNB*Hu5}vIfJouJ7hFkaMt4Ut_bzc4^`x2y#cgECFz{t%Kp z?z5Bb@*dpd-k!PLhtVTGztMk7#?f>+PTZ+l}|dpK*J9pQ*wRV1pVg!ap7RJnQJ_$RiSp zdfxO#5VwO`0z3Ujvz7i1-pg(~OLl#v9#;p9EG+Rcma`QGPY<_TrlY9e#Q_SsV`!-k z2m&i3=|Zo7uC)_TaCY-%1+7=}7V#=T19%RQ1OcB9W-A@GhZ~QY_JJ(hhhfk* zz!AX-Nm_epeZe~_?MBc40inRR5A0R+ctMO3c@bK{Zx*^Z@;X=Ksu`yB* z834{I>Fevan~srTDJ*9sCVo^_rZ|?9mj_)~miEJcF&DdrM|g9Gt2uL$%Eo7|taSmp zb$NgYG-~-(krD%|XHmyG9vAId&qq!Ptk$VR%Y^5WJMAeeCy;?%X_>GT1atmF<6_h+n6cU zk;qmK9d&04krPhZFGTTBk;VEq8r_m_t?~hyJ0}evhxo*nny%+txB6OUbg$Lz0fzEW zyxOB~ewL-SY+h47pyD8u{Mr)McY+l(zr(Fa&@tY-KAiT+gcBGLzyskV0%vJ9ZCVYs z*(b5hj=QfxYgn*uRwPOOMUW=u(}pjR#HJgFPU?PtSTFPA1Hi!k_jNp9YcZA~a@_vx z4S>a^znxow9SAl42@P$6BrO;|@I)W~dh|c9+R~=~MYov2%qQEDt8%QEcNn#&qfgx{ z&zV={*_^IqR059ub}Q^Uvv!+`L*B^k_XGK~#IXLOP4!ix69wvhHKB$;RsyTwtGbXW z=wpTFI7l2CjR-{CFTn9DnFu1}Hfqy({Bv)$WER#q1DV{nkL%6JH0Zueq$QVjG{P-(T+OHE*=2I3^+HaB<{u-n)* z{;K-^>|_MzJ^x#h%K!RANE9HV1HL_B8j&Odv4HI;5z+UB#>|HsF0^Q0ObSQPMx|I^ z!=n-xJzaF(+Z-;|``i$|0$I@tYweHXi@I`h-Bu5Woi1Lbh==o;@^+@-QNiODBs4rQ ztLg9qUAWGCdhnR138-)Z=S4lP>ugp;Ng~^`T=r*5!SUD#9rH}NoCuGknTePBc!@RU z0q_1NWURm{y-bI!T)Ux5^_{l0^Sibv+)*N*PI-34N9U^r*!LvF@5gYLt%inkph{kF z0M(__(2f_J6OXB$y|Gb19oQ3;UA09)Mn;~BsQU(rgmgl3X}QJy=0BoABP|eAu(t1- ztHUnF{Z(3wF@6H_l%3&NZFEsAnlP9tt%#`+}ywWFA z*`EJja|{lP`?R@+s8UmmGk)THtle>{Fm8J+^Zqc%Ch>S>h+t@NaCffxm`6i@rAcwa zbmsr`)V4o+d0Ab)pB&wn4-M1F@HpxTqB8l9uByoH=JrcqJm03@{x=8xyij<47ol0X zf*a) zoPI?}zmzO2%nA5nQa#T@H!>T?8l*nCte|utQm!NKKsb9rFAA&KY#n615=H2mt|6G* zsKhbMh%L0kCcf3mrRqYI<>yO>u{3+9jOV|Ax%EzcJwWKg`8!5+C)yVtOdR-O{lY48 zXuqP(LcaD$i6a_?bK-~f5EubdSuo;=*>4aF;$Lw5LR2AIh4kg*4;X$Fe}hE%h95{a ziRUy){eV@fRxS|~A5w+$4Yr_;j2}jVkzX1SrmLpvgoRph?R7;lxw=uLEB~~d+@fJ> zeuK3cD z1~+b0iNY@K5lt34W*{tNN&&z$f7OG9UO+7E8|!P9dU<&n^Obc2i-HsDjm68VMQ}WI z*!mURfinVIH>)@efa&6z7?z-A?%FHS`QgG&L}IGc2Uw-CmX=(oXNoc-#Mip+C|Q@C z%6}CV;4m88ukjG7MC0uI^?~cMpJGkhwNFDS9VnP*P2_;9`2lIQxqdqojN9?Cjgf0e z;FY0Ax6)7Kf;v@Tb=X!09jFTY6N?GId`f(Dl2!PD9X)5k#}?1poMUvurDp+=w#yLi zMYb{r2_hUneZH9rBJ%!V;{XAdmfFR+$*_4bm@cuRsS>R&s+E-#|2Vzd<|o&wp`N2c z6-Ql|_;w&t3XhWi2M$z@LOz>zr~&4jYBCGmLZqckRaka>YJL%~G}&>Ll*SMuC5gi5 z&rg`xG|udnLEJEJ95t)hD>krtC5xsZq0J0vK#1Ka%aW}e1HGgq%KL~psm+7Aw`M)s z#wXm{dxsM(iaytwy$otfYAqp6+`%re);rR9o9Y<#~LY5AA}Qj~hNbF-3RLl%0CFW_cy;892D? zpG^kZ_fTfsX>IpUemJ?H+%%1;Nt7|b{c=USo@1V!>)7=tEuRpTXwmtIaSK9B7iBl9 zh+j*~DX=q42Aj+>NGic7uRT~)69IJig>2_Bag>qdiJg!8JXAN=6>2C<{y1zb$(c&a z_sk{-J}<&$&KF8E$6c|I>QuTjuRpHwAVj)@plT;2TE(Ju}z0`9OVk^1Ssz# z+Y#*F-D&?q#1_>uw+@wzH;ae|tbX4!tRS6NXzG{2Y5BrJez)7bcm{wROQ za?Y?YvccqG8VT3mVMqB>Wqr;GD|2VCRd6uZnWSqKhFj^IAyvlUW zpe)?@Skqy)e&6Oqi_=nloK**nZ{K0j*99DPSiI036=Zn5xUYg7c<3>Q(uAnmLw21s$(vU@YTi4{dU4OKO>nmb-M}^mv2( zj%BMmhWY(|76DQFg8H%6biobFlHJ-Le}`Cq2XKtkdQ1tki-D6NFPw8Vz`-sI63roy zoLB?lG6_Xw%$tCk(&bH2sM!@EsU9}e7>WyI;B}~GzOFVu628prI4I^b=hTFyOnG}x ze_K*)s`!4LsHVTXieY+UDj*&Pv36UCe+Lk^3H&p=9-|YdX&02i5b~`$_T3s|w+MVf zG9jlTmndlgJN0w zE4x>)?>kdhvWrb^7HA8Ww&8-!jgWSlN&y2m{vYv};(o*sZ1fMKNJgJ{#?GYgn|=(z zCe6u)nrJs3#>nfF#}^;XT}^xNNWcJozLaFy?;bVJABFQT^uK*KF8;)DIXL>J7{qVCRNK4MB`WVs4kz^?{~D&bIgk8U^bsgw+6G z2OS4qlxu~^I<4?&lh<=zHvlnieeEN*j_0MGSp&}0m$9&nMr9!aEy1=)9LY^7@1!5E z$&Rf%)%+bsje9rMh+N7DVdt8ao9Tm}hk@=6O{>@4A_<*f*dFN7FmJ^!Bi0axujfU? z!l)`*%4YnIzZyX@rw1KVSMXllAe-Vtfieo@nFMIpSM&`BHH*^>}k)g0q6G>ty1C@V?TI}neFo)e}95b;GCu#inkYZ!x>%C zT8aDL*3%J{kKBFDmW=yYtSIu}{U~-8xd}JuY>@+S#NVcf*>8&s@P8UXah-E3!qEYY zC*)AA2~-baFXp$-q{==yIwO}6VjkiXYI3m`k2Vf1xoGgZL^Ju%1TAiT`H`~2VdA5mhyRs!sQd<&{1+I=7d=rkE;Mo#!~x7xJ5mgqZnf% z@(Q~J{1Q7R!Bai5{AmUrr&_8aCxQc~!s1!;Mw&t~h{Ia6j+q_B>W7sERisluuF_l< zmcwIy*?AF(3pJ-qtZoUVtF-UvXvaj3QCXMEeC!L#sRb+Fe?d-&x z!XCRY8IJPCgl^fA&fr+zg$vSLudgiJ8CLC1MZ8Ef7nUz0QD3~|%o8V|^d4)w>?1vL z&Xw8cLD)4-dAtkrzkrWlc;prCPD`lD?jd5sF$9!@dKsdG#JQK39EL7rl~R30uzsZ* zA=$m;$8yM4;iV1Fw#-@Wl7uzrCK|xmVjCrZ`5anahF^AsNXPK9&q1m$nMt%I9vl01 zaYAb`08_l%h|1#?8vRkiTtHz)ZiOd2EzB=Ot*a0Y_$h6l!bN&jzrd_L{Zxz6pjgr% z1A@JEO&6NiWFd=DFjsP{y_ziB$!sx|W_hbexr(<$5v?;B2->*{$&YYxV zg-_nroqoX#Le;zX1R!zdd&bbOTC6WU^qyB8y`tZxsKmVGsgjMTx?j$s{Ruya_2L`qblMx44z~+nA!|8jdR;<%ctU&RmHF%ybd1OL>u-m^E6%uprj` zkKn`I*Z8nm5FBxZ-WO98b;g_rEXvl$I$Myiv10Aqhj7kDj1bM`jG)xKcSqZ6AAg$E zEt*4f&L)bjnML-~gy84o&5pkBw5s6k&Hfs5IW3BIr`=hc&&~@ltfHT+DY(rlbM0VL z>}`xzW+p(!)F)+LM6~(v$S2ff@e&~p7QtFXngR}+p<9(T1ZDJ?O~`vG1ST_dGjfoN zwVVcfEpNxaXBY4jP4~fcy~CNoL%%XF%dO<+rEm~{zKrzZS=@q}>#I_lrJbQb2A#$- zStu#>#?XpySE{~!XqHkn4Fh3{DRaY)ti3GNT5j_&vPY_TZVe)@k&I~_wqq;DLft9# z_TBUB9`00fEqo56Ukb?=9*T_GwB8+D-xtZw`ar0CE_{5SWLFr~Y0Aq>X#qtWf=*v7 zI+$M^5y@tfotulB6d&uu4rNaRRJyCT?;Fu1=d?=x4D!K`#D0AY{-pl$W5`1F=PZ}& z#a+?6Evfyx$30sIH^?57^1H7}L|>4JqSW^{5g|fl1>xF++_iBxEuxf+n!hDv zaQQB@+nXcAWADxk??|t2kLzaM^_?(ZRvs&?=0#ikwK2n%)D`~cL3 zsS5aAW4tnSYZu3U2S03lUvbw8N-IG+hIV8B8No}qu8F)$zb9>@>A+1!Uq8xJ z(e{F~CpaneQdn{xVF=e^S1^vcto-baFlVjOo1ck4;3?@Uy<{$xU(x!{EJj??dbcIw zwXejfD7F?WnW0eAktkRv)t-DZK01g#6}BIRVK_|@#pIjs!UV)SvSf{&CTSrB*w(;to=;W@f!4_r}WQbaoa-x3u=W1Db^6o-{oxV1_H}2}H25rZ{{0sq z$FfOX1Jitcz%`(Jc&m=y%{%gKqtiUKt+oAv7T;H*oll#3nqTZRiH+XKQV<{_!x&Z) za4GikXWQ?h(+2cg2V&Y7rDiGDprt|*Se~W_3BmY>=NsRos zzv$9K#2>9}E4vUZL~O$`O=c--CRPU0Jm9|2rS5iJW5+y z#?GtTi(Z|oYEAZ#!IwF5hng8yY4zh%k^#z8qqBL#dfq>7??ShyLyJX4I1sUf43deG zgOMma;=EdT{0P3fmA$0x*3&4e<1e*UFIhKhZYasD6{9~p!KL+Ra6e+Aym%9AK+yS_ zqu(!;u85B+oEQ>v79IH$R?L5uJ~mJ&xmaROK@FutVCQNxzVOwapw1IgPnf@C$9 zw~0*A^Ih~T_D%Gd;#YsX6#Pk|pSZ(ZiE0X`oo|VE=Jop}>^!H4CI; zG`~;M_#)h0OQVf)tfkti-l2E(y*G+_Gtil!dLDO4x;wooI6ADLa4O)&f98g9-a98$ zCNIEOB*tvsh%e4zRH4UQbyUD1y`2Vcv`2P$ojLFGbvfC~P6AaZUx~x=9>H*tM2Qd* z99mxRme3*i@1Xwue*sYK3bV038~*7aTw4S<_&F##Qq05-wSa56-fW$I01k+GR_R6g&UY<~3$G)_ba3jFpA|v*NCWxILNYnX zZc&55uOkznNB}h*CPmg+$KS$M+*HN4B<0dE63Y?MG#1#i-`_mdvkK zAzDJT!TfuYER#tI5n<=|#wcnIl+URjROgX{mLi#<*qu+`!IFRXPc7GyJYwtH+Lr$I zL|Pun<}Bdz;YF<=sqspq@f`zY_k~W>=0rAYi0955VMsI1BsU~0ZBpVJOpbZaLTj@H zrau*?jDJdIZ`IN{ZF@47=udAkHES5&iE(cwHA<0XkQeZS97Y?B#f#=ux<%Fl0JeR2pq?CKu;Ubmw4s)X z$NPQDL0UkmGrm7hUxy#ScQ8+|^-)8f5se@|B|cSQcA0?^K3noiCL@oeo@JE7Ji@*P z{k~ebqF+~^TSfw6D}6A@xHzWNanvv!P*bX0j0FqjP&njHIe!#CZ5tC{NjmaYH+Y^x<%kOzXSuF}5s(#AX#1t1J_bQiXG~nr`IxZN^HuE4%dq zK_=7%O%j7Sf|zF@bnBX#hB@QQ_ay$@I_9Jj!71$`=jM4bTo0?2SzD_@wAagsov+hF zbNDIanLX5}d3Pfxm-MlX6W=xWtj+On)U)t}&2jqP#nqNg8A$R|qB}^(4`5q7vn4}5 z*s0~|;~%Jz=-CMCmsOwF2>z(&%$(1;YB;+zfSe~gbz^Qo92#L!-;gmCFm`?mB3DHHibvCCwPB$0`@U9h<2CFBjci!WX3bIUMw0yUj0C-+F5x% zp^UzJhICYH7C<1`mWuYp%c#4ACofok40+W+`e9FJFFoG$9iv-l5B>iK0YU!0+nT#c zYNp;Dvl@e1^ja=t{F?CbPQ(rd&IaYyr2C4t&{XzHnvj;=wUhR1q;6Z}-divJ8b&}o zTI8IYrYT=i#<|IrJr8o9y1Nt-7z!=0nj zr6`=RYO`i&v~o?+GHUVLw>y^+6OT1Beul=+5lOK>cH_2{n$6xnC)1G0(XBO?iqVJE-m&;QERDVlVseL0AVOYdx?#sl)3lFP@T$k)=xioki?C!5?GT)NhofO zlX@yzOHo@8QRZbFeTVA^f%s2O$z?nB%bKd>cu4ug%r+jDCc!#yp0QXoT5B@vK{4aN zxhcA*_%n0OW;VppS1)RLRP~lvw{BzNS!q3MNa9+WxQ4q0<9=%_eWUf87jK&Re$Q@# z5aQjLH64Chb?;}huY1RndYe*dDiQ|} zeWbOqRoR^bwwoTWVbT>Kaucn6;6nlntGPk$6T!5Q;IIzVFbpw-0PB1n5xHY=f@zv^ zS-;&AfVu#N=yP7pgW6m0`1Uoe7l~H}JIk&S3FsmH=br#TeT{^l+vo1r-C^9n z%I;kLRrBJ$(KB_m-h!9sr@1fj&#Fu87_NBx*2$cMhE*QUeJ4<~a!Fn-Ipx6Z-I>10 ziF!pG2Oq_)y(>E&&JAcvu$N5{yTkBIS}jvGDoPFI)vyNVF*|^MT#V}1dT4{kM=Oc_ zo&@5_gc1bsYcGkvCXmP0jH9DToR%%B>_L*CdgtzTVYxN&x8{jA;F@a7@~nhBzPKFZ zvPVxL-SYd%BlGKOaa4(f$ojw;;hYg-nvC!mBPQqEWIVlf&TCkL8uz%|celS8R z=kPRSOjCW}!0+$A3^;%MzUjB#btxg_@q1ryv=Bl9&#R5go7=?rHK){_EtmV~F|(+p ztF;ZcpOIp^NpsGtvdwelznHM-h)y( z7Sg&*c`?O?sc9w^8{!d3Qw$LSynSxJ6s^yc%QXyYzpKjZnAh-(gb?>8Z+9^HD}=Z^S~@x*#&0021{5ILg6BX9~e z(EY2HyTpE&a^3N4*8P}K>*KUv8H;IV$JU;QaskSQ9sC&vX;ib>ZbW(@G?*W-{zXrSJJj+?OZtbrK{EmTNA~Uf}9$ zYvpm7ZhPvC({jHQH=Qd>A>{r%>BE`ap3ml-#hY}z+p_#r>o2E_nVb_!O@JV!l(CpN za))!S?u!6xJ+R!jE`D+-Gl8zuQb^bl=(l)wT}o+k0D}+!Vyz7z#1MOz6?fKSI;5cu zShcP0J}>z#%IQprlNL?V4oPGmZ|_&qV@M%3$KX&~*5xU!eXhSXK6D)J;gzVg-CC#U z;gI>n0BY(!2{ow5h;!M4Dku1u4)s^=F^?riS&i%?lRAY-ZwDfF*2(i0LB4N++j9&` zlX1NPO%w#v7mKUGh$Du$k0mS&5$-;?Hc+M{KV$KvJ=W1_a;oDQEf<=E$=oatC3xOt zU(o|3jz5KrcwhAFceEO=I_DyI{OPWjudkX8a2BGXG;x8-C7R%DbZQ6WHWAFWW3r{Y z`+axn@~b;St#te?j3G^*J&7Y;>*i4hyqbPW0;t#3Uu9F3P z^FtCi9u2aMS{7vRgS1NxpYONM{ssV0$L)uB4UeCGXne)8BP2<9S3_}}!;_-2r%t>T z0G@o{P4RWg{Bs~#q!#-^+%vy6^BpjRBupcx}@zLnPf ztFC7-3FU+nAzLiQ?5DbpBQiLbUWJv2EsQB#U&lETj+qG6i9)uG|GJ zj2C0fWKlw5)`?PHeSo{jQo$D2d4+n{aH%w{^LF>mGAdO@5uP!oj3s;z1=G4x&oFBD z30|Ev_*RcZ!xC8#c0G)@GS8NqKyoWW=XJGiq+&^u|LPo$#(&`+x)n}s>UfSTY#ir4 zC5XiB_asOT0OWw*Cn^AYJa1J+ucdU_U0xr&99E#(yRrPDl3zJQ2zP8V@DwBNeLDA1 zXkqL;Cg1ewmP9kN32sl|tb2N>o{73IYE4qh5dZ*JV~|4{@1$kSb^}!3KMm@y004wh zE|-rQT5PIv)Hr1%-1hZl>RKsiUp4pI9Ufd01c~+Ll+&QpU@Q{P0tagn5Oh2peJ$6T zNa_bw^ND?i6q>CgA;0s}ygVO|FjAjtk!g3in}DB$0O0#RzJz;N4~7Qy#f`?lT#^{W zgHM;M@6An}8zw*T`?F5^)o;!QfES+r;8(vn8wh#fsrTh3-P(u=07We_8X!$W1d#Z( z9M$k8)|b0dYM-_CsktWhS8h#nB&S+U_7*M8UDKL9A%rnU z->lro2%&Nv4r2^}0bzs_vAA4z>LOYZQZCRJ-arDEI^KCZt&+P}ka|eHnQ_HY=A4=0 z+%d7_!1gN$A@(YH(9a_9Qb^+D;^D!g_Np~DsdZNFRc^7wW7gp}{!($?fHR}q)k*8Z zm9AaP2RrUf&?h;IGWp0^^B%xe=e4TqS5l3$#qJ(7#n>9ug{O|D1V5t+0P8t^kaGSQ?~-4DiMgqZf+gd7~kCDq+2zZS~%i%HvZLaplIxz{>= zw|DOTNi%eA`TdZk&>-Dv9>2Q-)hae2gisDZ-RC^2cY#lRmL@`;ZJ_q@E`t`o#naJG zSRDG;Tg)PfheC>klu}B70>oral}vezQ4<*F6x9~4B}-;z&S8x5}$Kb?HjfD2{piqpULEab1Du#7Vp2X<6r>5IMwQCC>-qG zvy=F`WbqnX1Dr7y4hO^G5CDY3A?i>chdeV=`pukkd-1W`r*v$7jh8iJ00uA$lv8TGRf=;8rr0mb#R__> zPI3?n9th>i@+bAFC0K{0$nV^mh_V&PfDiy?oh$4z4A${Dpga0ncGObv?7gHm;+2Qk zuMDuHC9=Ryt>O4JE0|SGkYLfR%%DMoGCK~xhpNdb`<}VX zoHcc2)g|lLa+hZS1`x)%sT`z32_a%~+PQ+u1P~R@Lr!o!!IQUeAseL>lmny*G2+$6 z<-yt?9L^9^&fQX+9$$Md+NCNaw_mguZwro$SS?PAkhFX4;N;HbVJSDqMy^5}+85KkM#DQE8Fu=CLdpa%&vG*ksLgeHN00B;cXmhF3R{@Bx;gJ`T@zLt9@0#<>X~z%m(W=$L%^PRs z?;I69YX0A*g~A~K*jrimw~vE?T%_$RGc3S1?%hU50qDxmmgTacGJr5eyk|3e1%3LFuyQ6!H1+VFSKJ-59n81M$cs|R z?wH+o=~pa6%mK7le<$(|wXX>}WNy2lvzxUj{ z+K9B);b|p2b3};aP#q27-%Iy5K_v&T(V8lrr`Xc-vrf1MgAjud&i_Ai-yNSt(L6qT z_oR^C6Ck1Y0HH$?daqKXHw6LdMN||OeHBzJ*g%ScC?LIAkd8p;kOHZ+P*O-pZzOr1 z=l1u<-tFzRCr{A#{r={Y58QLNWp;LUc4l^W7Ly%oQj0ju*>~Bhih5Ooj#OcLkkK7A z6&W>0wiH>q*@Mh~4H)^O)dA#tJfyoL4WP>T?wIZ>ed5*kzwrw3DaKk4I=CSTDOX^ZOUkb zfDo!zzxGS7jO;rgA~WOZ&zle4x&2_+h~68veBj~fW~1Vi&=X=uON!P!_8RdlfSCe zBT3QMLBRf*Ml~vp^bDFr#Vu9VqDp`nb>ANJY?NuFv)WU*R%>}a9PG!_D7~GA*7MmF z2q7A-$F?b`Te2;j|A9V+_0)ZgWnG_@igdk#6}A{-jzf}rg^Un=JZ6;#Wj|3s4Lp{=ExQmq%={Q9btqNM7Z>-NiS zR?!C#m(z(3#h?e$N`4g}`}GNjRhc2XBrl`j$sdJTGuJ-F}qt8*v3z2a4SsM)kx z{Wq4sGH2n0EgSdk-E~xJZrWfld-zW?XRE)m1I%~=uEuo54)2&7$rx*itphRMUhJ+Dd{xr(iJ#n<_G`&cJhrsEeI()+h?F*F@2x zLD<^@0DuIL1scy>=p>mUVuZRX)P3sL2OHy-1rJBpxQs{c&JN^fR`Ih&{>@z6TwWYI z-!Yd+ck+S@NyU)@)&fHNO#+Ca5~16O^vv`N$Y1^GqG-~6IM!;mf8SCdWZ_Jf<7 z%fh8o2Mp=<;d^US@1?=NF=?s5NOzG(MOAs}ppvG4koZey;f7@SY}Jn-5)dQA6TJ`s z5aP)hSt19OgRqKuhq{Dvhej^hOB? zk-4gDMT8V)j6Eek2iVzPHj=4&Uv>X4SkjgLw-G{RG*VQV?r{(jV8Zr5^Z5R0#_ix+09gj zg^~*VM&DN+-7hG}EBEmZXxOm9&Y#cNRgjDbVC4pB?HJZ2Vv}a|HvY7#Nz?i{PxBVc z{?D)xeO`ZSW_1;P3Qe2U-}uu<9XhpCX>z)?>voR~(X6Xb(livj+SCUP9Put}x> zMNcdJRmJMg1nh%!hLiYXbb>UqU8aLM!WaZVm^{eOSW6bGa=VV8LX4HU+UdvDCbl`J%2`u| znC-^eNs50$F%?!e@TU)24ykT+Hbj%^rR_m#PjxDd^Fyf!tmTf6Gks{%JSmo;mbfB5d&<42>&8(Je)>St8~Qpab&>@ZLj zt}Xewc?ik2GV%3_+uX_K77~^O#9@Xxj%NlWCv6TB-G`XDC?4q@x5diHl|)2oWR2QS zqiQ4lDhPrVtUS*%Ddl4*RCG}7QA;!p1!N>e3SdM+y-B#q=h+#uh~kXoRc+8GoBowO zORC|O+)Hz*XEo9hB>FzG!&@oV!Vp2g0s!VX#39bX!O6+V$-%)v5CklgS~xSgei(6r zDsVMhjh){#8zwsv&HT_)-mO?Sl$cb~>=Y^y3i?7%8Qh}qu?kk1=pf< zW~S~771bY0MQ@HIhiOaOY1eyqXMlh}O z@M)cg^E{6bx9a^J39(Toz6SYZu@?IUdf43(8LCApFK>^p)-Lz(bhB8j@4orflxc-f zMGyc0AOJ~3K~y9DxuQ?NWa3w>oL^K_a`|HHa~FaTYTUG5%a%>NeLURUTuV#Ka&q$H zVv_FPdrT9sa#<_65CZ=IpSJCq`v>}XdU_xZl|QS@NXxo?=V9vobmN-oNJ9vLqoV@= z5XXbrQV9U09_JcPPEIBcaR{NJqB4vzQCS)^3~m+H(9hq;#nn0IY5x7Yk8fN{&_>V) znZJ32kdu=`%hrt>HVX0c_i=Hp@vN-8(rnIt@-#gyJ2~loX=#bBK%_67oE$rJY86ta zrl*&?v$JziVQFS&)|HE~d3pH&H1ZlK8H7{GaqX05V*+F1*6GVNwnx}7$FJ)|@b+ttx$$8V}MtQ6QE8JYZ?77t5m6oVKc^>|%^L=5DxZRivtv zhPlL8K{u5=u2OAM5nq-P|7C08-v!&fP^D^aXAjzMKU6D$4Gq#rOV-8NsA{by4Nyh* zYDXX4B_ahKR?L7(9|}#cwhb_I&xUI7>IyNVHc}@&|3-+^6^$=T7gwi=lV2D$cA&4X zm$u~ev?o7q+;jBLvsSBOX$&w2NOX$xJQ^^#>%^(U>Ng11MK3?E@XV>J`*s~q&&bxW z9ToRj5QMEeS4($kWsV!UP~6uI_^tP5^cxUKQV#03yuPNcqAfq{ zJG}ptc;*$e5}Eh*@t8GtY`=jKE-p26QNqX>3e3(Hy5m{QNY2pcP{0CG^ahSBdI%g~EZDI@P+1TO11xYH05~9o0E5+P6$GfP zunN|4jIq^<5%2)ywgeLfWVKp3P6|Uo;!<-CppaIyUEd2 z511q6Ai_NuFbD{Epo0t1Tc|A@G9gY5EC`sG794M)1ee=_Bx49m163j-qNm^0sB(zG zP8xa=V_?8YzOhUCNY@M*yF$>jsZ>@Ri-4gyXMiW$OhtEbI-SeTP^yZM6gwZORz7)I zSwl>j7y(C*1PB<>7IEZaK?Imbh(}n8%Fl~=RnBJdTs|Q%s~5_W907nFj!c%c3gw2L zftL2GNc5NFo`xZ1G}xhv5W?6>dXJ)nn6+OTCQc%$Bwr=H93eyuU6PD!AGcbG9!%G1 zSv8bgR-@=y>cAo^+vKG(0Kazkxq_Ee@4XmWOPazVgZtyYWGEC?3FSuw^O zG7-XBtpf4lOZ`HG5^$UefD~dC1T_MR>ZD%N3UH82ItekI`B7ASbpgrud#IYM`<`Z# z9u&u%L^^}aMM!35B*(gt_#Yuw>mXkhbP$+OimV~Zgw4=K=feOTAPZZRm|Yux97#Ko zut`}6Z94)$j9>q`%&7rGI$(_V?K+>G{m|FnbN2k9?K(EDRtc1U!B(%x(0@?3@W{3R z5Pd!2^2L}f+gDc~>ngy_&E$=>3lt4jkP5 zt=B&NnKa@(FnRc#ay=udtwG>y|@y;syYKIa&W;+Jq1qJ8|Hg z1>?!)8BJXtOP5U#k8JzyTWidfW?)8X^Uwxgt$W+kW=KEWzYBf1ptPU7stJN%E3qlw`p$poUcT(#Vw6%?c zGGi4O>iTKeG*!+|h<(=EXhEJ-1=4P1t63lZjVVtqV$eB6@AAYQ7JT44b!TB1wEFNbWO1SqV^J#uwQDvps#nriXaA4ymb$xujK(yBdqJ$G? z)%zbU96G!Y+ss>W$rb+V!Q|t#i=+F1x6~_AhNP3eP9K;ou%n!*Z#y93%Fb`n~{Zjy9Gex5=T^(aw*GP z75?;%Ry(m1Tg}9*2))79qN{7_{#4Bt+Im^ERq|9ZYm%RWkq4;CCMq`%NCj#wr78uQ z5i@8M7dDF`jqUQ;2ZRt3?P{XTVT>^#F+!_Y7v>O~7b7ftXKAL3`KKw3+n%A$-uJ%>~=!b)49uHa%6SvkH9$W5+>zd(Et)RI<3 za2tgngpjn*f-zQwdk2k`g|XVNq56uAPsrFtL;I5zJBm3-sXrJfmsML7RieQbix8r| zFwt?2L@yOb<8&a7=Xn8J1woWH7_5MKvf@DGoRNpFN}N?Q4wm*}+0Vd;v%@!}$2nTl z%P70S8{KP$1?5kgMc(M88dD@-hFluL;?2m)YHd#qy7<@zj0KFXvR}XY664%;Ql};WgtlxvZ{g%`zT8T_VT^rzJyw3Um}<|tIR%@(-*fCplxnw+ ztD8&r9_>es?FaUad@ZQhXz5$i5JH(5Pgi}g@n&oaZ4u9-u@eTqI)4H&ExLAZ|H8;V z2lgHZ003^%OD}kNxswv2&P9Lu=});ic_6L8TCG-$1ORB#w0{4AU7I$mj~O4v)aj$B zqW|n!`TFm+AJ}`$Vv*MtY(# zo|HXsQXo!+Aix+)JN7heX}E@{Z-TSx5v$u8Pu)62faoniOlgv?h6Ic)0uuv)saKxW z=xirBA2mw9@=O(d4|;5djhz;3M%!=f$t5YBiWceSRhd9cp%hQ{l)1gQX`0FGxBwmi zI7U}yEehR--)R))nLv(h#S zgaNU7g(6-bD_0d#vDzbE07ZQL|2t(Lu;PMM8!+<4{^ViX)D(Zs zB;QX?zROOr4V%=RyFgr)DK0L3^_6#Sr=-#b68*+pVgBXk1E~)luleR(5^;ag(wAqv zv>b?g7ytv%vxzFuRZMk1Ns5#eAx_>tuG|}^Lq+@I&8upuRu*juMx|klIUWPXAc)!` zQDiaWmoh>?CGu2i(7%GM;=Pf>iNRCcBv0l5roigX2du)ORmvm{#i+6DL2yI`x}fs< zQ_D(P*A*vbx-D2jQ?2v^j{u3y7X4;K?N)fg*jBT~NtMcne7l0iyCxph1~JfwHZP6A zE0=>gaxuH)^y3Lptya^@6&SY{#2${`hL9ke?W*Wf628C%e zDKb<6m4>EW-Djf}Mx%q?Y@^Co$p6^YgHoCC*kL~FNLEFj(yMxuPm zd$g$5OMdb^uMDM1siE)GP`#|W6$am{B$T?zm`ZYWD%&2-FN`C1B9NbI6Xt&i7^>d! z3;^}&2RCU}9{}?6i*CiGEPj1jdYX8BuXH%2s+y{7p|XVb9b5i=`m(L^5b6@qrg@8o z;wJ#8Q?FLfKAlOL%1ZM)Z+?}Pl}pQ4 zRYx9*i%P4U_!Z3(b#Wcp_sy5vSVf;L;nxVqLDIJn2Y-NyhRsHQ(j zbK_c~#Y{>TT`BYw5yqGVtCElu5JFR59!2*cee}UvqUZzbrx65l%Ie(TR}bz#_Trd< z01(!ud5h3S3GsI*Vj{O7Rn#-Z*<7Q!O7)VR*o+K=e*#7V`jgM!?mw{m;J#C`vT~|b z8nEP|cV;tRwGxLRg~G!@5yR3O(1z`;Fc&!6n)@3V8;;jh2k3;>coBx_V8xE4Z! zxK$UEcwM~Y0^|fi0Od(C#A%Poh{(vcoN*9O_N6v~Z6&+PacWecolvEAg%JAilO=uo zM~oOeJ6jUk+S;-5rTsXFqE5S3fEDAnx}eSY97-hl-=368wq(XHQYQe3KD2x-xiH3< zDFkCgHj#6jiI}L!YDGu@5Fptu1;h0QguJHT$iz5 zUtuyS)Jk%F4K11HI0pa=##W1k+*T>>%2p405a6mGz{3Xgs9!v$b?#e# z)lGTotirKcBbL`&TM>M{YN+)2w1+0E% zKd4&t_|cQoCoXHPSo-ku=7U7n2Y`V?yY1g|8~}J}$DXgR7sr_}HcL4rMIQu&*eDhl z0C{eg%yh%>FfBNJ_?L^L+{yVlECb~YM718yP}x^0`*?_l#rbnUQq!^#uK&M#ZK3acHo ztfsEx*)r1ARBvQCF7N;#MoH33tde(F;k#lbpt4w|U#86%cj`pc<3|}#9iSz=GHb%g zW9J`Km7OR7y3B0D#WHO8fYo2TXQSB2&M)NW7wQp{_4jZ9TyoMqcX#*7N;BwzjF|DE zCogfJA9xm^Eu%NP{a?|%a6BAN^aq>}ADK{c9(Z&$df(?^TJ(Y-NsgAY$ZvlGV4PE+0sjN+@bWEw! zOAk77Op6{c45<-T;sP489t#*FtJO-=a~wybqp&j8P95|gLbc2}Rz%PVf}IBmUn%Df zsTRGe!4_e8X>}X`5Ec+{92WPhTCG;A6j({sUozzduwW}sdq%1eGym#)UbIbS+Y3ys zwt9hCs8xiMHi*XsxC(;k0_{F|XkbmB+FIckSli6Q$xIae^diSUl0EawP2wJX+77v0 zR51_$z{}Y(v{_+o&&p2+J$!j@V=IK>5k`;BOt)l>EGPGsWA- z)5S$$>-hP3^Cm?w86&Gv=O>8k@TDbX#l@v;Pvqtll$Tei@S&{0^L)GZq2%SgI}cOu zKi0*c6;;=++$6ewa7d7!pLcFBtebN`70_cjNsn@6>J zvlec~CC0`i0O+g@l99_+Ew)imyAn5V+O>JpE;g@DF15Q(bZE1r79|6y*iW&KpH)6U z!>h5;i80X$2!Vrx$;8{h4% zm{IRP2UoBqQv={;Tyk7&G5{f(cUYU|9Xqw2`tryX?|gmrN?esy8~G89W)1eOwgJ^r z_5QCw%igTIZ%+v-HdYpHFB&QV&U7wD2*j;qdI#=VRI%cYUIa`4O}f=YD;U9mKiTwl z0!u(JmQ-vNy4E8bl9DR=9LE8YscoZhUE(N#1Hj}9A+jI{h%=Gkn*z27WTjZ1$!YKz zR>Ubf3n2tRPW_6>bURDce2pv>O9aW5GXT~42FG{9Yz>qF>a|M$d7=f<{V1!-TdPHTAr0dn&$6}3qYu9)-nQ&*?V)J zuZ({Ku;$ual_)3Q>ejDKzCK7zd-sERUAuSqAK>=xvM-MxJx_nKA_HUGq-g^eS24PL za?*XJ#!Vf0sx1R6*#isD~9F4g37%_JqJh-K=Qi*H7tn%pc+01A$orT#`yfX%kvk%6yCMn z_FwkXl)=F@L+aGJclTk-R!!a9T}z5fY4-4l4jAJL=Wo!>-()8=Fmn=98^>-cr(Cb< zCPp$U`xHs{_#%aVsa5nTS#%UrY4Fnth9(GY*%Sb$a(CXPU_oZWr+p5Q!X6SXgAW znx!dcOAND_?y5_U!5J zCMMjqy<&|x8-8dV9ga;;eQ_87(Y?4z*A?OfI(GHMvl=*o!AA;_hXG6=h^`D1&+{g; z)k-PG>Q77s96eS+=p_}Y6JePw+sM@_%hUzkmLP<9o^T-RS+`p z>dqGHLY(9_5&!{0JRpRD<2YHhKpX}T03oSV4g&`9e7u!xrK8tV5dk8IE({KFRxBVa zF2RZYE(lfx2xAZ|q?sHb5QIvM&0^azC*HKcl8AL+i=vMM5O|*Fc^-hXTFq7~#(+6a zeX^)Zvqbl9N`w$dHza^+ymN|ah<1x@1sH4GSe$D5qxb;B6=X<@IB0AMQDR$|pgIB- z`z&|03@ihHKa#wQ%N;mQZac;pi3_9}149~~h2{8<6t7vGvRej~K^%K$gv!4PjbjzTK@wG2=rxpP9^<=Y6j4hT_W3V{q-}O zg&_cN_i&}ZJw4rh{k-S~o_lw6mIbt`75jB+ zG2Z#hAAS2r`1yMS0LO8C2ZZ+>5MEwh5gVO+D>ga$Mtpqy?aGQu5qYU~HEYYXTDojT zMMWimB$V|~0D$0-Ao9c0!;Qw+%LWha-L><`g-fRQAJDx<4d;~Pd!MXad;k99g-fOn7}Uen z)#Y}|{na0TpPYD?G>Kp}YwpBu-8=gFd6yKI#YQJ=+PEV@gVov%E4%gR3;^pktpb2) zlV6L8jt78RwS(r&n-U%=A^kgy8=EzH{ci+@n{xpPx^0acN9+{Koa$;%_CZ3YWEKJ0PfL zz@jBHBD-~Tb#qO7oN;j9U)#3rw_2@oQ2<|jwPMiV-n}AUeD{MTefo9n)pgALh0}WW z3HR`DPfvUD>-K|tcl`kXj*bo!CXF02taq*2frvxt=}-Rn?bNoP_X$$aK!1PV*>fj% z>(SYd&^9J<^Tu5X@wZ7`x~ym$1gmgh-_hcdlFz?-_pNv4O&Pz?D5Z^o{QVCf6Q}#M z8Dpo-7`t!xkyZcsu4b)(*$XCxN3{3$_ADtWyA_x6!^Yk5w~_#JtG|4!?|`m-x=mR9 z<=buAg}(dN=O>Rv8C9WpkBQlU0Z^;fO{Y#D722|~rLBqW@cn>+WJZB zu%<)%&&+w6M@t9@^gaC7#@LvoxieOTwrVo@rQt0?8+m!T=jIl~#UyR~ephCO`es90 zL=%MH4xTD0DO1)egnKAWk0Pywodgaxz5#8GP`g)g?l--O?-nw~zQo>yT*tq4R zR;`=9FzD5sr}+w$j%+9<^eddc1UEBKjdX<)x-HN;Y%a+4QiTBCZ4?kJZt8eEQ`px{$CretjY5LKM z^%tUI#*7=-Z(w-Mntq7mGBci@`0L`HUB|UU1FQYnR^6^NvZ<;I7>kOMR*$hWyV_J% zT3j5@8KJ6Mqfuk&hJLlORsH2AU9d4;R5)V!)T|7|ayk`D0`m6Vgh-zogHHo9KdRfHH{G|lM@mW0v7!ptN%UVc~eeKj+d8bY5r3q z<{2fd`EY|@y;0HCWR0>j5z#zzphITx7Fp9gcua}LBv~UX4DZNqOep-0|2gWE`lKBR&ySpCTe>5W_ zt3#)-C9ltH)VSV?NIeD2+S7#vb_;*^nFw|_Nr+8e4yZCby*u&{X8@O~TC{gRw?H}&2_+7WZ+ zztpf%y*;~r$AHF+9rEVe^NLGK4;?t_<_o!Sh06M?_K@6LVe-QC^x@B1?&?McVZZC-zCPNSyv-+Aj(mP6>Oh{a+B zfN$1)csu3(^4C|pd$>)XId1uT3#=C5z}}VasAeU#V?;aap8}{r_$4(1P1zT-}6n48qT|S{_!9+JtU;&*zrR) z|NQBkneW}W9%nY25kgB|pMenCvT66j)GD@M$pn<-ZS&B^8#b>jEGjv)?|4p5UT{dw z7sn3j65eL)$oa>PoDFN!tXH2d`*#0DOX%G<93gb<$hqb%8m`;)K~Z7Jp#vv!pXLXL z1dSRqpi`HyiKCYk6cp*SQKJ;n=hG)HCBJ;HWveE2>(wqUF5SHCQ&%^a{kxAoe30%R z==BalUw(PG5dZ8*#wNhvG2JueaCa zXFA2PPe#u+zP_OBd(esq%utuXiH1jiLce&7mbs17$UJ;rQv?iAJI*6)>J&! zPdVS4iC?o|UY47fxiEB=ecBs7MP|n&eeT74mI~-Yrxu;t=fU=ST0mJJSgHYk?r^^s z4E@E#QT2M%wzH#gGtYNwgdhzRhza~cg;a~t_^hf?QX4?jRH8916BaU#<#cfZY=J;# z&GxwW6KKn=)3BIb{;(48xY~&Li5wl%MB@@_h6<8Dtb*vycv6Idd@G93O-T4EhW5PM zr1*kj<9;5Q8b3tSUgvNqdw&Z~<#Bi#Kw5;*TU~hD^!}Kx-hT6bGT)pt&-e0d*U@~; z{#M+dKUOTOv(#|;LTNLW#_COq2fwV&^}B01j3ufQeeixP!oSw{>&TBXJBMF6LH;&s85z05c zMiM8l65V1<`w?RxxZsBqnFaX$UV1Zy&F;)I>78R2gU1;K%C~u`KU{usX#)Y}FPrfz zWcYwTORMYq%7!rh%l)ah>&G+rHQsFn0_`K z$b!Shrq8;#yyS6sJ_W9+AJJsF(XbsnPcDY?YJST&z-U#d2#7^Z_u)2U8s>hh?nGrR zKa7@N1LP(lbIH*7Qo+6r29MxU)63-IMhXz=f+p?!l~bCwC}(zQB>x6-ihJ|dU0yve zZe(y7uk9QrmB5oC+dl0?V&_I`2D9PRBaO;Pt#dS4Q>77bf?V&uzKtD!1w9_^a(H^S zCoyWcU7RVy!BL}JAHH`!%s$MAC;v)k-Db1@8v>kGYpIvG>mI0KopNKF@YmyM95RxU zZ|~{n(;OJXFXhxNRb6a`-8n=+EC(?4`7WEbrKjY9&kxbxRnufMo9CKz`_K30?ti<8 z=*p601H6Hb$8DlF6gl{u&w?W9?|$)@V^yY2%Y}A*v$Da8zO*N}$FY=WXZyG#Dt_<1 zwJ0xL2KV#AW5o?_r<<8Lj-vaM*`T9m=;ym=X5a$Z?LTaKC;*A1>QcK;=4Oq{c<+E> zv8*ydTo#*4Ik3;HN_YJLgV}Bl&QgE#pFbMC=h4*Zct?{YW+sB4O2?u5j*j+(eah3h zz4|YMC&xUU%6)t%$C5XtC4cSv!ucBdXfHLI-oH}Do5;A~P|(dSE_xilDq}-H*mk@) zOEx=!^T8Z91i>tPo?BxXLAw?Ucd8Qx+|QT>D$@&dW@&3dWA zKhxQ29cqQz6MNkXf$GCgW-oDfe(L;Fjnb*(% z40QY@Ss(lvg{4oC*{J%{Er*@dNFgR4GUu2kLK5c|7YP@CMkQJ0k8sLgZSVwrdCjkL zQzZ}1b7F{Z5c(13i@1WfzO{ISbxgeWtF~OsY+ZjM6G{!#-U2%zUL;8A(int=4Gjzc zSCNHQr{CzN5J>(4J5(y_a%=(-n}gR>d@L+1{QO;3tLz#`n(+vUiG1Wbw!oQEwa~Fx zpQ4fOR9cso?p&H0)y`+K~oJ>S|_oZvtG zUac1{26X+*h+fa4(6CSAY40k=g{g(}CAr^jZV*%c9A74jW+cW+sf@ck?K_~V&iCgl zfFXQ$o6C|7>9M6njDp$BFhTX!vupF?`zMKNfm4b9SR8QLKPwb30Lg#&Tp&exdy9m* zs|d=u^tJvACN?!ae0+9`&!flwH@?&l1baFgM2NKamk$)*USU?$cKyj@Z0Y6};RkeZM!u3SY2av=Y?WuD`RwYf*$&qK> zj;l9MGpqMz%Ny|oP=w}Fl4?*6`V!p8n_mfMei>bO4h8h-xSZ$u?N7sGnpfZ!S z7NnwaZRVev>#diKl`j{??{6MPCPwI@+3mXb+Z_B>8w1uP)4f1UOKLvSR#HN7AZ8%4 z=qxDsZc&guiW&#_}|>@#}W*PEt*;%eN_ zFc3K&E<}(56)O+`nF^%n&*mt{R)^|Hy5K>OkY2)?YKBSNS>d60(E^3mEGRo_YnyQ30IqJvJ)~0u(0;e&(8rkd~tCB zc-pY0zQeQyLqymC-pA(VcJ}sh0Re`q?D|yk`aH{wlM|z)t=@Vuy3Q&WA7YEZpYxd; z!)7!_&g*~_5*vrb{hLBIxxBVDVmE1EIHK_<&aV<)@VQdf3xpUm!t;aK*P5x5>PwEz&r;{p{W5tJcS};>vd$V*Dh?3b08Sq+{^@B1B zK!14N@?x_lpMZ7u@%olPqQ~z;EGz3GD+|W-xtv=VCc3@4zkRr} zm6mGmTL7$OQj?O74=;BQF49t5#VP5Cf4CAXG>T?&@4%K?9h73X@~jwt3U~?X+O7iPerffGyl5Bq%9O#nPqY}AR>j;xA=x$ z2=Vc{#)xT%VEY5vdIAEO_jfXD(>xRlyE1h;=4=*<5;7E!9Sg!9iJw|}KUEFOwf8$8 zQPu?lLep{{5&wkiD`%gXOza{malt<<2w-1<5E$2h;J03C*^9&reqZyvI7tg$0 z`@cUAFQJhu3BWu46cjWRE$S3%HVdS3gkKP(w9$mbqcd)AZ>iiqINt-Tb)L_5PxlWF z;^GoUoxchlUH6RvnIq_6=rAR(gps6}#8fB{uyUvx03`aW3VTyX(zS?X*=PBAbq!|M z~h<)TpUD)qjaPb&*$0o6JD6qp zS}`^R2V+S!j`TNK;H4#J>uPCbq^7z7zB7Omk$cFwZr~R-3m^}}VL8>kx~jPvTl!v9 z9+Q@~&}{AnD%m7)-GA9^X1e_Yv*O?Ohw3&&H{~R; zpC^Nx?NFoF`K&|u9I@bq&8`>t&lm)=>~S2jtaYeoJkGWsH7!WRGHz}ZPjfzcVBIF- zPOlff$g0gotABtXH7ACIHBJVKW?F3)21+eqJXQ-SVpnR_BFVkg0rst6f271=5daiO z(Pl13-8EMs&e23g@qPZG*G=K!(!Jy>m&;FW&3=b~<2pNiA;sp)#c8$x`T7j;zoATR zEenu=v}R^mq}F1`L-5> zq`K%;r#zT6qS5rr3Iv2=0%|ojf*#qGh(O^SZrXSvXzR%G+4yv0mFBM@H8OB48(d9o z;O)fL@2w{N$>6X-}Eh^J8yBF?@L@ zM?VuNT#@a}dV8@D6*ZpZ(+)xEviS(v$(sq6p9V3T_7sdTMTUg`e1Gh_UzW8W$CeI& z?YZ=1o-8S3g_Vj7{4s}!0u92TI-NL&JMEfhX`h{~gV;JdF2njq=W&1G`PeNPVYlgx zJ|J3%GqFjs@|u|dw^_B`voLs&SaJdl+8;Bc9LeG>%h83%c~yt`U6L|x0=l|$gz^RW zX6sjP4iL$fFFFpM48z?Z2(;9&JKVuzPOfvd-r%v?3Dv3>QKcRF@$ioj?>KC|I-(V| z6l@9URUz@DhsunFK!FkB%8Tjbrd)`5TzdT7y@&)}%v!yXRBBbMW>I(EHtkOm`B%eZ z6$HS;XWqogOYimB(wsdcR9(-omjqfMSqjvje!REBlEkI^Y!0)hADU5#~)yEwy5e? zmd{+~uzA zk_tq^NQ}>^$Wog2Pr2J3$s|IIp|O<`_mfh@^^=1tb{AhhkYkM>%Gap3to6A>zmY@> z1RyrIGs-UKT#K}(Alh)2xBXG-;S+%K8^*>Kgyy7hB<)NK(@!vizA#RJ|9D1u0rj;0d>ld&j10hplgrqUU|IsJ@gv^sIUo($>(0%*#uj<#}ngQLGxJMRYP>!vW= zbGmG|@7VS_M8~>7m0dUdw&|o%tzHC+TD32zdwCfhB=zr3 z*^{E8@Cey+OOlyoMiC%;3v>j?SiU`ll&#O z;>(BgRl>5RoueZzkG=#DPxW#UYm8yDN-XmR&O>G$0`*sVHEK-S5vg6Sh?BR;OoJ!X zqH-z^XT48HL>=6%9?c83&L~#%k5@g4CUrqe`~;IVZcaQ(wcYc@aUn^%8nj<-CnnfZ z(%CBZ`c6XgX|{k`5txU1<38YBLMB>xn`M(OlGV{? z37>XVmr-uLbU>x!fuM6i(~nS}@8^ruHKMI$mT>agD5yJBOqQ$1R#{(MD9oCsNQXWn zK^ZLf(I!^nl;aNyBSADK-b#1_TYlYo=b)-_H&QhoJ-m@K}45{PMg>ez@LsvmuN zwXE1HRz~VLS;)&j{UG-5|NabhGfFAH;(UT}aB$FS{9+mvh4}jR>Q!Iq_2S2Q58-=q zbU3L~{9;FmtZayy`NLU%Wo&wHHexv+NRVb0j5>60aOvGFbQeYf45D z-%b+-7&?8Avva02hEIs*S3iHBqck64tnjyZHa7VIv+rBL9~^nKzjY+Ak-_6YsD>i! z?+YQx^WeKg2oDmd2COF!0G5Q)+TmP?(!MeQ=` zMihx^cYa=PvoWeWi0oZyY{pAhQc{8h0eBebcHnff@Nnm-YI%3_UO9_2BJ+J3|b|XR92_ zIxJIVqR9IlT&8LuAxIE<_cM(?b>jZZp8`Htnal5g6*+JLli#`{3M+QaO5MmdXNJld zOCk<^wfKWiyEcU(_4g0MOvBW!_9@AYzTwFwWn)CE0<$t1HwFT}m&frq;fPY`tmc>@ zlPUSg-3$JLIY7H#+ZNeyM5J*1@8#jp@juegzFR868NP0Lb4$4F$Q}Ai5MOsH;C{2d zO0f&zdoh}gJiP|_!q9Q$$tfv?p#cHY?$J>Uz*&EN2BgGVZ72+WQLJN`kEaHgo!i=N zPB(Xs!tjJV7(u=`->BBVsir|V>^;vu5NbexAW>MpFz+zOT@cMcjFI}&3!|~vDwDTg~gfmkOVQ=^A;<$s)G0>+sX8#caoW$fUyu%ZhC;-3# zugW`Hmt?T!&DQ?8;KRHe_u)PkHdf-?Z?p*4UNM5y$&ljR>0RVrW2X~9u9tukm|9rf z3uujzkivw8AbEeRg@sEW>wJ-A0>Ao`nVX}41iITnRIwcM!T$Sj?uY?)A)v2TmTog~jgtSM2XR5Jn6lJg20))VF%n@6k*aq> z%!1*Wu|ZHBilWm_W6&bIY=-p1mA`+P*s$|*WgyZX!vR)%BNSqJ#oV0FNOFVGFhEcenv${2uGe|6$iB?7Z36B^ zbmS7fHqY8>2$;HKP|GtAz8d$_cirMb>4kj zZsAnQlEgQPn$1E4`1wD3jCOFkt}z%Qsi@+5CpUke)Ng;k&V8ds6N>tUviGdnD5JsZ zWovJbjCezh2cTt9czp<)4T`GV57YgboHU41Y;(;p7H3=L)P70Pbu{Jk@^1n>$d194 z)|W@D`g2cZfF1-E`Giz0h%VXCZ08?NK7`&{KTbwwX5NHu;iW7A|KZUQvv-_~fQ`*0 zFC0ht(}S0?6TmH8U0g0w@B&7xI)N@IsVuaI-nNyddBH zrSWl^Zd|zeyJFXcG9VQJ&_1>a1aQfWgGmVq|E1YC%68+&GBoX)(1ieCMnMjlWkSH* zj1K~$x47S3B@KGvW&|82uiacoQ~(L)_4cF^ICYGfOh7;=7ELTL>+0rVx%#$TtlRl( zQ>z_KQ9)sWGVf=!4q>(tUu3vsl9|&;&$Ry7qhMHjTj%3lIET_bu=eJMhUz?`qwanJ zeO1yLN_=+R4J2!tPuMI)m_`9{4wSGayHrPZQnIG)!5kKxc<}VD8uGf&#?uIOHk16k zP;0*N=^)(utMgWj^-7PCC4&Xn754rwM>TW?z6^d| zBfsdx5-s3t5+znrk|6{%OlRMhOJf;CaNHh>OCw94&?%&ejt87_RWOI_eoQ)Kn3Um&c~-+AU_@SI^3Ov(qXOIle>$MOC_!(d{Zt3s(PZW-sEGMhi%DmF%W zAUBt8f*@iJ_66#&yFWhm7daqT;-EY}+z;QLK+GnuTZCN%C(FhEe5z*Z0fW>D-9;1( zXL<+$dHIp&hAu|!2gIl6hp=UU`?g$tZfjA9f}4B@49FLDHA+W_htus!t9IKZ0s&2A zeye=k?Yr2-bLove`T^nNX2tRZY-92XEb!=k1!Gmpt`@To=XDd0%yp$u5gS9!7M!n7 z6}zS5&*W7O6&0Mhiq%sju>(90(ADVJbq^_9zD@toCXiTb%WeZ7y#n$LtJT(z z_l{ryDN)H1ssr)#?L5BkF}l!~IIqF-P*TzcgFMVQOhw98=?%6*>(-0aiggdiR9+hk zfgIhj$VXq$W(}s!)s^Fi)2jCyHUvmu(K>T&eccf=pm8^7M(-d?NgI$n`FAAll1Grsp4vA$L7iyQQw=jFwL-|I?#woN4z zc);f#pAzRu3|gpK->48TR(GI+fqnu6;m7PnFoFgxM6cQGbayU6N17L7NL8c*SRKetGJ$ z0WvK79?up{S%g~Jzt0|@TomL*MfU;u^04*!rl+ZirK1ASy@x5{=xsafKHKZwU1X6R z3f6_!=Rr)}{O?ydXy~}zI>Ta>Ur{I^Urs%y) z>;Jy#+@W3pd)C7(n24*j3Xni_u8Y$&rPGZARsQD-`2l5`k>{E|`BooxeH*~#izdGi zq8(x4!~^wA-z;SKQC(CYyg@AcMZADMtn$(Jy;3*&hs1b^Hjmy}pdV0q?FmlwM6ZpH z-%Ybg&g6AiA6d2=Ylfi0{J2dD09Ee?zir1_>8A0`@OK}Yj9KyfRBh)!1G1yDRR^sB zhCNbBnug})v&j8ffFN#hID{aX1De_S1+)QLHV4(qUt;7_h&lyL#l7!mt(S$_e^&RM z9BOV-^NbRt!`aK66)RN#ZM1%#vPtmkpC?>#WURINRRT|T$d+}S62S^t+ApyZ*ymkn zXQ}kek)Cn{@{sHWY{;xMLp)oZfOB^ z-YU2hS=q!>oqED+8ZraC#ob#zVQ2Gf8zK4f}aT-e*=Mmt50q){v^OkfPsBxvzjik zYD=l}(BbU8iV=VF;q?f{)GJ${kSl`$B^#49w9xGCJ9LHfP!1k#f!{WD9_p1tbVAyALHpitb5L$6vZQnM6Oyq zY25q3tCy$Sx8UP&>gep#m5w?eUmJIK6@yn^a~c&wNK_E0c`c(I!SG-#^+O>#Ynxo` z6FvRcPQ2vD-6CCpjaBPcY5*i))7<4jjBJQEh)mkee0F0QFyXp*%+=H_9N_TzQ6FXeCx7@F5SCsEOkuHGOc{<*KTl|)L zf%X1`&+odatK)FXaA{*sSo8ZZtIF#bP<6f1lX;&BXMYTj`?x*073hiQ#mz5n3g5S;=vfHoMZe_b3TBY4R>P{D(Hm+ye{MjlO5nMIzX~I{3sh=4Y z7q{6gR(5nf?el`)uWQ0?Ldh30o35;GdmYx-sTU^^HFCLx7kZ$yecqC@UmlBX+TXR4 zxzA4u+4!HPZFGICitS~T@W5J=6Gl{lk6k_xM($ad%V4&C}?0kMvPv{qvB?$9YrNduvqob^KGA?glVa z+k(uOo@gQjL@eg27eI?sx4JVn+0>UbS>cz&tH2S&_KUTPADh_a{H zFTbBr+q7L5{hs`Bx);vG6OUBmvzfd4ad^?@0|v0=vyleYqbpQDO0=N&@oS*3HP#;w z1gy^f=%wkn=@=(}{2UOxJe~r#UZ*)_+9@>#SS+$5G0^R&z2Ux+u&8HLS`{wqNjf%G zw5pcwr$!J7!v!WfSv+=40nRogI;CWyA1N7^XC@RV#=&SUapqARYv>cF!Y}BU17q}5 z#2#@l+`l>}a3YygiqDN+=}_K>OfMOvN3nu-q+c1TxjHsF0t85F|51+7Y3QFJ(GkIb z=zS7#{M=C`?V`t6t-{iL100C8VH`r-hJ9tR;(clSLt}oqP&LD0wd<7-)ZV@`f9ZKT z^x<&`PK@ed-btLtw|<}6#C&qe&*<`ByHHZkTX&tk&wfqslG?Df`M4(be$H>ZAs(35q~UZmX5z2TwIQNSh-v*x%Bby z7r%PHHOlIGrsfCO-BS{Oy!&|SPqZJi*csPBWj{Ug{|&V|I2-ADf9sw$s{4qAyz9kASKaaE)uNNG`MDPLY)wA)^rcb}f~^QnicyMe=f`73PHQKb z1<9O6hHOLv!SzkS*VxjioKAZs_>oymZXAD~i#d2dQ*4)}+zys9gyFXl2#_2+=^jeH zVID50FX;0c3I{UTqWm4)C*Ex}x`T8f@bQ>$)r{y9A1XVc{hkQ=RXPX>v# zWMbXu*QcAWccRJdZ`!Ip2U|VXi|Qg=@DB za`rr0)fE0#xcQUv{ojw}0vjFdt&}7J2MbNn(1WV)FYxY8@h#Q6AM&87d5(y>6`2CJ zFzEGUPX86S8Oif5c3426^HMwY7304BVI@?!7MqC6aIlKqEjjo$sPH#|yRut*SXL@n zeC0}BoeIcEy2H6d3HuttMCl_=$FV;qx+I_S{Dx&_7%d|I&4Q z;F5m7{dluq%0D{&=e-}k`3c+mwg0M&=l#xy?``~(z5y;VV$e=LYRA=?B3*;zXV8{Q zm)ns;cBhrYVWaCMFYnupH{R6)c8TL~%rV3F`U=Ifd-ZQL(fY$SU6lAHxt-&>Obe>9B+q|0=zrT)M&QsG{i7Ghq~{Mc6slF_7M|rU zN=m7qnoGV;>m^=ZAz@+-g_n3eu5=6UwA*o@@N=4?#fN4@;o6;YO%}#1g@OB~{=}b7 z=R{6@c)u30u(AXkXtpTj6L8QXzRm0USavx$EU#o1XT~^`DzZQ|wQ@#V$qeaB&z&*03rU+T?f7sIg(liFGST7+l)p_{H~RH(Vv$DRsoqn^aTXDiu1|2<9ea6jPxzmCh z6oQ>c7{O7r3^xlH#sqpFLQyt#G2I14TNks&UAzUIy1^96Y>-Fs7moM^-rsck%~7G* zCaDu%w$?9F+Bph0VLlLks?vVM0RfGcr#Z8k{L-f#P4*NlA>#QOyDw>w%gV^Gom`>QxmHKIwUN5BL?6$f8A;eva{JVqkIMhDL_R5cd5ko5 z@Mr2;Ba&12;-izBGLOS=MtnR-+zwvx`>;5#6I`yaLxRV6ar@qjHh_dnXh?2`{fOLGm@b~A9+}FXU#&8K2vXQ8d(|FR)K z{*)rI;Wsq{_vDv#$a5FJHJv?o4ys>hha98cwf%>j6?(k+|ER1iA*9O?3+5EK&=jm6|JesxXP^&d>f*lJZ0ju z-NY>p#sV~6ZpU+GQfI-mwvM!GcS;+r9hszLb-pEWKkYx2Fb|_8B1o}<1+a4jpbwe( zTFaaj6q^YtE?7p}0xKyYo-irAASouD{nA}qYOg|r+AoYl+w2gORa&>5MRIH_N!HlW z<}mO+-?4DS=yiP~jvEWI$eyE?iCZja!>_%;#Hqw6ji;SUBSyvfle@qqmkuGRXL|}I zzm7!u_p6`g%NiBPNW!9bivUK}?rTd{hoD=PKU|fbW}$BQiaroNrd|weE`@+9CO6$z z4{bVrR*{g^j+oHzuh~+#p(2V1wJ@DTogpjVlI*KuBXRQLbhIca=F@6Mr}wc6B zq&KXje}d{>9VRG58SLIu(!*2xaYD$I@(U|7%{BmUni_|Ugu1z-FoUS`XA4wyvtA;}j3O{LXTr7eB}oq~gOE3nRBLWC|VZFN-C(ejXoRd4mEw;;#j{Ws{$DK8O-r=(Jm7L{QkO|{NeS|IQk|6;$z9s zeISu!f`(oAXi^r6(BmaXJdl_2Xjl88;riC_$Hw8c@cCkG32G72xa zx<^RD!j-V6AXmV8bs9ikClZYw6n-MP_!mQF&n2!Ul6!w-IzQsO;qx4-5&nx z6YI5Hg{n;$EmCfzAXY-B5d5F=1q8khpC|gs8;n?Mchaf#<}cW2u@b&6lw5(ow6PL% z8!&A0O0Ut|w_=X)dGl66CkTkoEWIXs12EIW`CBV*dnOhxJf&VvT*?KYD^hQiZ#}5k zlTnQnQJo`XQoNzVwMq&`l?aElh(XSm8z0+U?=R6_uS_QD&U z&u8IvTPwxhg;i{H`TerssZ%*EwEYb1ME+_jfdu4?Wf8OyW~Q*e$>&=r)yASD zQ0zbwI2`+Pq7U^OZQ>ZZMm9f~PAyA9nPQruP1$%-fb@_vcaRI<$SeaM*GfQe@yKxQ zkMh4!H`}oyzO+N9_5P_%DL9#a3gPpmb@)Kg;J9kmf(xZG2w_BX6GcT3M!NS;Yt)yK z>l6@@23mk;59fdVf(*CrhDm?ofu8i`X=hzN>>xCI8GkraN50Rrp`*Dcs?e^g%>wRZ<>9Od+$a9)O$^SB< z?Xu>**s!Q;f4qMfL9mRm9~-j5VcqfqM&-YmsvGc}29NyhRpa`E{x_aXpJjN*q!KSa zj2{e!uEcCTtVr;(Pu&u4U&QV%7|cGq#B80xKqO#$+*anq(tXA z{17|&IZSl~El%l~(X+1~tS$Y}D&FJU zTV+=Q7P>;7{1K79^)6T?G;1hg-g?*0$s8ad7)Wat+cB!O!>oSoPgtrZcWe5sRmmXOqaF=LdYlk6Y(e5bF zGQ9}fS}ohi5scp){#E3#I!4EuRa9YFMk*P4d1bQigsF#3mif1i<~s}9j*)m2bq^>} zCnNp0!lh(ePmU0HT~TaCb*N_y&WHb{v_(67@bL7vh`=B&B-wD{^0SxIf8S<_yS`Ds z-?~?iThFoXuZ)d0t8ep6ULX7)n@m;BC+)whx*jpJpUXVn@0N!6p9_^}8;-0Tie)mt zJD-)T$o#C4I{}IUjxSc5^>0@i&O3M~?!VJpF7hE}5h<9Tx;LSx_4-bEeX{sI7fi8P zH>qy>Q{65gN{S8vSyebcHoCP6%oTvhe1(I;wTEce?S)(f=gKg^$-?3#^hyTA808NnN#Ab?;3{vDzetc=5 z5>IELzMkb`fRij*t(2oXYS{ShcK0WJTPVdJUDqAr{RKF|Vj{6?J!op)xcWaS+VCtz z&;+Lp{Yz!stOKU;m|cUHr>-#bESxi-MR3uhXutEib=uKTLAZ0s7YK2~YxU4|anf>M z)I|#-*hWT=5W!BUGn=ZWvLv*khP6^KiDjOX?aaZFE0yKgpiT>3Frwvl(+Kw)(&>> zK`@gF*v14q;X+m83ey2aUqOYnKG4@uu%WD2(T};Lr2>CoN=Jj&umUg}iCp76gZ>n` zB{!U*yR`~`Bj2LH=$Sh37PX4|I0v=@RWqx-sudNUjtN<2qVT>R~pNEk8nu}apcD4@!Z2wXMTOa zKvuaF)TxvmwJy3))o!7@_ka*UNpfcm`GFv@WDx&3)Bf0@5EZ^tVC`@V-xnGSQ5nC^ z!T-W`Aaj1pJh=0Fww#u(K1s6fhsEKuDo-$h|Lm7ibT8u=LAN`UPJhS@VXkm<-dtYh z$4=Ti?|uKn+o1Q(jDzmuR_Mh1U_N#dnkW)dn?-W6RqV!dtuTsVSO}pdt%MX3NR^fb zXkNjm=aesmJ9uii$v^9F zt)B;9cDcORB817mbORpP&$N@wE)F=pa~uhw6trDK>J$-ZqD^Evgm;}X^mK@%k=*pa zgB6>4n-Y2msv3nuhY;F{yi{@LoOqF2>`9=u#YQyu6K^0R4!#{?T{DQ<)Bh87tK5hD zhxiV6)7#T8kUc_>wCKr=Sg0jmPE}axXDMocltcBtINRftY$@Fnm5PX?xCse{QjM4R zO?#Rle^y8cl$y}+>nJHq5SkQLszkHA(u|DA&CW)3gb0?$)lZ}B@ZuAn`hH!>qg~ID3%^EfUItOS?jy60n%ZbunJ9ZUa6=^|&ah_p%vn>|9Cv)pRsWtTQm6z1re!DbM+lh+G(wDr*4&r`CX=6>;L;dhP8_$edf1VL(IvdTYbYa`26@Fc5+FP1}CxR>8m#LSq&Z=a{pK{10t!9adHM{DB@C1mplpN~ef) zN{7a_H{v?(RM`cl*Zo_ul8Z|K9ZnaP|h)-ZP(>H8X40ObCT5 zHaKnvL8FYuEc3Om&c1Z0G_G)T^Tz>YeQw5v4y=h!OkR!gi_A_oD)_rU4Bk!L&z$LF z);qoGoiFqcYiHjHDMXF2Mvmey2YENvHpO7Y8x*q{Y&V~PtXh{R}~F2ctAE!aLW;!<}DUp)Ksoaz;6vBkZ5oD{7@3slVmb||;%oS4%6 zjJ_w9hXm;QVvkJ5>trL}MKOacs2sMetxUXxq*0arQ0RLeuM%LDkD@)-^0G6>Sfg+Z zJ9;10a*X>s&Cw<;h-n~PmbC7^2{c%st31Q*?ZyhSE7?^~*xLNK+^$(Yu&KTpja#c; zp!@>(3)EJ{w^BHjU|;6ESgdGLh6Y3fG+L@MsEv4)sjPygOLLZJ)8TJStEMIF_I>D-3GlJ)$C>c-oVO9pD{kR|X;MyH@!vF%!^P%*M z`vvT7-T`>-K)Vb%SVv9Nu7|IX>MMNdZl^UkL-+lsBnC!K;MrTW$36UW;U90{NYz#} z9l34AwFGtW!f4s3)|#YHJ2&{--6jV>dqT!-D$$29-h&7beB}*xQ^ixR>?j45gvL)V zv0a&Cauo}xvio^8YsQGTXYd+La@c0;#G~t&_L;&YJ~Xy&*$`}>$_D*LJ7&1+tqw3z z=Z1T^yUpH_)|Vk#pNg>|hNsIFp{vHkJX`rOe1o1)KYkv3yY|B}|~~r{>APi0ZbNSg#`((~j?>o+5&eVHjc@tK=<< zVAT^oikh~ad=Pv4Ua=z6t;3S)BdxE}PuY))TcSoyKN+3lG+V0e=u7ia z-sN5fhWS674vFn{iqeD)4N>^x`64g#Z(De|t|&bwH9odr`6BOT-MG|>O@wAqi`4iA zdCz8=a*)-UA=vFSIx^l-6+Fv`3wo!5QsEL;qkDpoUJe&Hs=P$TYfPu#%p0uKG<4>7 zF((=b@Vac~_2n%)EcOV{;)W$!KbdG10mZ!zl^;t{tZni&kl(elukati9(%p10)J_Q z?c*KiVqjZ1VK>1eaZI3%z>SlerHIeJ-TigwWCl7Rv6j+m2UhN|ivjQ8sF0^k1`Id* z=!^eII1RBA`1qQ7qPDBa0uc{xz}9>N3v=fis!K_YSI^>JP3Go0K~gEl3^~(J8owVb za%;>?L>$F?YX=z2*$EssTpNLm@M#S^2$;yak1Cirr2g zh=!vv(@Qa;<2J)ti8-DVDEpz|3}^)$@VF2UexTQpU6maN#Y^K~7HK zU=r&~z38o`b4BFMxw34OKP8Q(QA|mW_Qg}6TSJ=fdR(Y1<_#@*2J0lhG;|NZh%ZTQ zcO&aK`Yy*rry!mC1 z&i^Rlb>UJ>Oei-ij;boxvh%wQL~<;a9rVJ@tcCBtHlvVf55KZ`loeySE8i>Jn-2@_ zEO%$lqJY3CH4!DnK1*OjogZ5e9_|nY6TD8h4Ci1rvjIy9>I{zCI(IpGOUwc< zk*zxgWym?1_8T1cJzMRXPRVLeT+Nm(%p zW6BqzAyF4C1}#UMV!vGO{3sMR=u zto1W4-w|CsN$K9FsLePodYx>5L!B{8tSmk;#D6%0JZ0wl$t?@oj8o5*3+uS13(sf3 zy>(E@5WN{wkvSECWs-F0j4mDUMgjk<%)}6RD9)aSTK*W`rC*GTq0qbtHCA?9mz2xU z_jqKzd5fi}oVl{Lfv!u^^$1szE(aXb*xU|92KigCc7yQxhl?3N2`MzRcm|emrKL{Ps4->F2s-@(`uoe~a^{F1Zz-QN=FEw}F(^`2SV?O@^r zne}7lb{=X{DIiuQVYxKeGDu*S8+5w<*_!m)&wjx;K zu<-S$F!|P~?ICPZmGJhC&mIrLPdy%T5#PYyH-#`nfBSutNu%q+$|5>0+j9O+5Iq_a z7tFTwIND+v79X)!Jh$0K{rm%)ua`#gq~KL%&m`Uj=daD951Ski(n}7FHF7cE1F;cY z%ncpoAF!j$D1palS$P6)3mvhanciq!l5NbAPGGx&qrBbk-rw~`1{XG&S&Tm_dpwlY z35lK(n^0hZ@Mdd;N0@%foTb{iFu(MUs^tVT+h6owkp=rdv>_u~4Chqvd&sJtd&K=V z{m_DMvXN)~j7iJt9jtJ_`D987rH>*=cAr4nqeS;Bl%bp0k5c_QDjdvBPLkUxp3Er zLK@D1v?Fl(Qjv1OkV`n3OL>4%wD)BW4b&mlN7h(#~~d_ZnA?q#>)8(uKt-`vj#07 zBYa}poU!+0+~s}ygh)Cpxk}el(^%&z$A|N(PI<;J+AWGo&21mPu#hi{^$A?=vVBpc z;rEd%P7fXfKhwzJl?LXQ1u?IO;#)4!ERv z1`{ojb@JoPrazr~O7vAu2R0sU{6$E4XRI;}%o9#gz=`7v+W^vIS1Av>Med}S5!nhJ zV`C&fUG8)_ulu(r&#ev__bFa?L0(UXn{TnNkNDKp)ZVpt?ruZVZzH-ytBs*g3(aS` zwH7p}RV-yDZ$?bM(s0el^%BxGwHFNZ3TbGB#J9N1?TXJY!VId&oP|*--$q8t1+~Vv z)c1uYyFHNAAq^MyUcHUdT{%>j2m)^gAHB=2Vr~33jTn5XLG#kI`wCsc(z7tgdwG0p z{Dq{>=Ikrv&EdzrevCyjJa1-c=7FVL5Ef-a(q_dQ!F0Jw`B)*6BE}nTPMl zksk>rMU{00zpBr z$7zjjfsqp$W@s=5R0okH+E?plw)L!j-YOk))Z)F={CRRL2p>|-tu#c^>j+N}_vz&|o(?1|Nl`u?N*ZkC$A$uFIahDPj^r8)+ z$=A?^RdnZh@w4-HYacW9IN5ScVFbn~)Y@ea0t8Ym&ZN6&9m(yi` z-lW~XR{E{Q?Jm1NQg?1+WGlHP{G>LOTS-A9C0M%oy02g|?ZwCAK4h|>S8Ixn_T5C% z2WEsbq%;La$V_77tCJ1&K|M-$K=(mhcfV3rSBQ+x#H_t9I(}fq`owz-7qp_vg}qO| zVz!b+E-I))nhDLo8+{r(^!PlElR;LEHMzCh7|jzy4bJOrj-XL|0sSzp zZok~^ZY1Ick;lpSuf`*Mds7ZXAbGlk>BdS-kgUcchrx6;$;#P#_~N1wdnf3faOj-= z_{aR_s|#SiIQDQzqOM4~C-J$6ffwpejG0O`lU6E>>{~UK|85pD+HC1&{oT%BhQRZp z-AEeK7vZe1|9c86=cij-O-;?Jxy24>u?UowEjpj}epj$y^k=PPc?+Z4kP(Th8^ycn zWh799Eqt#NGAN-73`S0Ocb{wVf;y6rs1mZL+T|oh28Z<>xPn34JHlKv3}A&NceEaY znV?R4lBsK{PJXp^b`h1Hwo7gIsZ>mFq=d`pZgQK#(&@lJrn;Lg1Kz=4RK}zvM&g!Bj2Nv3q`m@}23?fV|yC1zN8> zIE2VC%kZ*Dx~Foke1>3p04qcI!>^F8iI88*x(c&>%v_` z^zryx@sDiV!(qBuYZ)0C&lAc_C!XeqN8dD8{pYv8S0HDtPy7qlvu_t5kF7;!wfycY z8H~;PKqjv1R^1te!N9+KYh6_rva@6L&RPh$xSw=xr;p4I9@~iY{or@tRwlbbSH!b9 z{|!g@TXTep$7ABmvRifRYLF$WD)L!Pna5khp~;%Qy9>^$=~>;f45t~svP&q2KO$A| z-5t^~0Fyp1nhw5G0e-Wwsby6@5TJh=Z|9f4+!`L8O#dlO5_7-5sZ_@&O% z0sfe5!t)es;fApPSWD=n8M=n9^1ay~T-5$J6?IvN^UgxPFulhw+T%|>_bgvnK`XWKvYqdh&EzVA9b$Jf24YmVF9Z3*Nywip|RX7$L_q zGryU>qN82f>O?pI8*PZ*_4T>#&AM(J_5gtN@Z{;@7Z!LhZI&Ck+1dB@iP`9No5?R9 zd6sQlu}S#%WDK>!?Ag~_xSaKHCfDu!u{YglKK#!D8BNqBKEF)`t>^lzO9q7wlC z0F{3c>G4Ilu*bZQinjJ9;Bmb4fkQgKeYm?K#APt2$U-Nazql3t{Q2wP4+RytXun^w zmyx)CSmfYbLf|7-y3!PooV$3tV4T=ehYmVqVO*G^vcNIe(?o?KAYfg2jFfwmq)dp1 zmpa>eclQ+p_GSGF;|(THJP%_W#KQGkHD zJ-}A1(}V-!`VG|eTA1GkE*AQTmoob6g%Ro6uUJ3Qjec)dEh}|Z<(6U(KXE_lnpqkJ zfgI-;;+^gFUaE&oP5;uqnxjZ`1SZACG2PW0Tu;?X=)yDm_U-*E#5fNla-ZJ)1F=n8 zJ{g(pxE1MKp9@=tnT1jLpZ5UxAlhXXiGB*+_yqv|UJ?=!r2r_}KRBRTO2hy@;Jnw2 zV)B>oN)j@e_%*RtTkCQ#T>=#RK?0@Pk5>a9_v~Xzq|Pyg({PA4H17isFkm%v>dO@I z%P%lio*(Mb?_o0K!8yET^ZMf^#b)AUe#yy`YRxtqm@-v9K5Zx={ensixFFLU*$77^ z3ZQ-5Rw`KgDR%u?yHpIB!9TyqEbe%cUPVjGcp+z$HcT79?JAQ`u#8N&-S(j8?Uouy z@96=OmX?BV;D@h6J7}{zAdcK-ks_6SQ#%x}D$!dGj-Z6rDNQ4FT*bDkm5H`DJ>uot zV7+|9do`I9n>zvvBA2#jC&0=Af!=jRlx^z+GCG*aV*ly|M7e(s9#`0~YAHPYJ?MvI zj#5Fj;;0jm_3%-0a(#002QqjN*@tXZz45Erc%Jq-W+o=CwhFyAUT8FkG)?xsHWvvZ zcccFYSz_S|_4)FjX?AY&)o(%a^f)tRR=VYSo)#9VF!qmJv0(E2Gn=pu>qjBM?E7I) z^bG5-g^y|*`}d9{`H9_EsBo$I{i?Dui1eW_trDy~#A}p);CXu5TTTBC!5gBjOBY{S z&_`M+om-*JPzWi^rH&oh*+lNf+gPNoRC>!jLNu{3`e(@s^0S33E-x$4;~ea>pyCYD zWQ7y*oZkw&x&4u-Y+021L!38auXNk2AAg;ymKZ3jgbAS6J{635K3hGHKJ3rWz>uaD zS``x(Z5p=ey~t_yX(t4YPHI2rcs&&1SD`I&0-_4rj&K?r@mY2U4u$*8>rln($eJ2- zQDypVGqVHyD*AUS47f#zX7R63(LE`R! z+FA%9hyC_NfFBZuOsaaX7T|96 zojks>8JJ`QOM@8`?p|~eg9JOPMm{^AuP5~=X#HW){_ft~%s7iqdPDpONKo|g(Ta^d zTsp_pqCuO%iJc?DUT(Ki-$u}M4clR6CQ@&(-;e+`KwEd^XKt=SLcC<2SxA@9{Jbj& zoP|~VWd4Z`kh(Qttp#no@qF8E{Jel>IlRdq%-NuCm%9oD%95@Uc@q{Eb{Isi2EYXr zIF$4uzUw~=PUnG-dTmDP^VSO>aZLbWYm9Met2soy#8k?{A&cF@(uY{3wf*%@z-D{;ak}kTzrZjOZ;a{a*BLf- z1aG&MDl4!K{WXGp$JV%+nez#9?ke27rv$T>hZ&Q9z!qHx=MMePHW>1O+D>iwoe`F1 zyz(njFpRR~is=L3_xIp>!KI_#EIJu4_eU)M8L!{y((ATcd;DwW0NNb;Q;;!uRL!=e z1Uo`s@?WZ;1>%Ydf;eax#F5J(v~%*bXWhz)2di8>KTrb?XgY1w%KrVAh!TSEMzS1v zl}Ay0LdumtX#A%OSLObPJs>0kx=E`rrF&NnznBx%iPi(9mW8 zf&P0qqxzuICIrZy!FYpSd{n^k?NpGOjrLx<``=VF{K&1f1%{}^dKXk$agx3u7;uD_ zM)S-6OeAXvR+!7Jlp)WI6M;Q95?c3Phkp3)Bvogj0{lF*(B0=FFXnu~(uPBN>#Hj{ zCZT4Gzym<({XO1H;OS&BD$t>7fiThEmTw^b_g(`Y4&5;-rS3ZDurpy-3a`}aU%p#U zo@SFgz66~ji_dsapSD0tTIT9EGVc%kakkBvw7qV|rlzJQ7EnRipTY^o7Tvjo`0j2t zauiMWVyHHH!Y4AYT_v90BF>pR&KxfROsc7=q4*Sr$MaPhu%H`p$>+;$Hjeq+By+|) zQLtsT8o6BT{~?YQJMTp+MzD9V|2^&`Yt#ugCJH%ul-Kvx3yG)=K~P@EN>HN3i|{-D zya!+b6-am9FL*?H32Gh0XDw}fIPX9)?HNuB5dI>hm2$%UGjTH`tK8&bnDwL!#lh zvYcV1A)AixUWqWHid3nEhX-XiW3&^w`9PgXirf2n(7%5G@T!twyk)>>%& zR3Dt2R=qM>aBP+*B_kjpeBV8J4_&c_Ls3vuOI!?v#k?}Ex^i$RP&RyTM|KVehmMY} zKxjMyH0|QntAl}={i6bH+)cJV^ zt`lC+EW~W43A&=Irsg!E`+W6?!CP)O!?3qhS^NuTGBsN!W z#sIR@e4o`Qez^4eWNs&8$rj%g%Zr}u~-LQu7)b#Yz-m;t1(K0#+ zct)bMv^>Vo^;b1BFNr?DgWjo?xVl{JGiQta9?Ez=ycm&7XWP3oD?8fTyIom%s?E*a z-p264!}N0PAqo&)TRL&y@ju%zsC2m>@k$@rNo3HdtZ3p(8Pf4Osu_}zQL!`pTR^M_ zVWL6E!E~Y|;IZg!DIY$r76fkuQgG7bh!D9jB`NGD+9-bzEEq)Fgjy5j9NKRExCW2u zU^x#DvimwRyk*N+sy?#qpFH?tAUosi_)|o`}43m#^o)B*MeP z_lqnRyc(g)<{Uh3)5RtexlNT{hX#sNa}*15^3rm9VkC0_vAyaBju`>86N@BZ_k6ek zIF-TSKnIE&*b-QBvtmdj6!3hw1jcxktD3H zY$yt}uPWybe~e}KgyBz2PO`a-QHzhE-oGsElo~IO8ROz z0)z#|4JJiyB_|gJYBRp#`gNb+8}0g%#_?!Ae(3A_?yti?nBAu7L9 zJf)ER!oWbp;oIpMorxC9g=PQ(nh7EQYpn-rkF`AS(qt$dk1&-6by)_{ehK2*fi5~_xB_jMxpx!~K4a{p&3p#5Yq{~sa>VfP%x{Z@N(VDFr{8El z8DgXJFb(ZlHoF4@DUH}&&NqQySaQ2QoU1avyWB&^$qt+_05&V zLatnjD&Y86y5CSX2XI)(k(5>)Kb^7V)X-UHDw%}KIR|R#NPK!LP4$MCSLlH#TD9gC7CiBipFQ_-Dt+QD?xTB%2fkJ{I36Xl7)esc zCUF~d=MJSnkC(KRlqjyG6%!CZ0m46fF{$Wh!Yd{=rJaM3D)!Op=ELC8~ET5376+ys(O>e0g+9h(C(v1Bqz7Y1Dbc?ECKcprMtsora1w>lIIHpC z4}J?^^;yVpIfi`4rtaOj0bbt3%uK7r#(+QLH~m4Q-Eg$B7Fn8X%SHV`y2sOPNm&y$ zlze}GUsk7@7H{TIqssAkv0(C1OG}FpSTd8&4*=b>XPmKA<#dwX6!;o;g2FG@*X(8~J%dUf=1hGw&m zx-1?a&*RZ$ukxRR=sxiJgW4YvHYG2+w6sbiKYw-ukt(^V&osP51rELIJY3sFIMO|= zrY_Wec06l3@z_Dtt$Pj9)S^K%pr12W1+%;1-+pmj6TZB@=HOuK)V4EQaP;!>0(j=( ze;h8#pD}{_!XZg|blj^zo49uiKzT$|?Vz3%^_Pf$%D-Po=a5H#q4+y>~FgT z^NV77a3YU)KF8EaeSQ=gxZ2YA!T@Bcf#q@ZMX$^hB0vAvIJPH^HR-n#V^}Cq~c9bGmj%% z&PSqxj_ly(+*%Uh%caO)QkQL1cb@gfmL%4)jr3gJU`lwHOlwZCETFIcXPcjXg{v_Z z>I7qogVlGFXW%i$FfdR3K3z&Yj$ob+ysMr()7CxAiCGm+UV&0?`TIdQXa47E<+tZ- z%ded!CwP))mF-=^fqbl`&w%5Tm@wWMdgJ%s{xgj=n~v_r41+2^0pHU);VFLt(#05pe{O$A;9tikew|VKI3;JT? zQikta!w4grsL?ocKuh5M<6lL*>CL&RwMcY2I+<${XgCS?@qs0wjq}O;S-Ks*rIj0I zmBdvGfmFzXt01ee2#{pnWO`kb3pakqVFv0KyGlqo0?L~DJmwse=$C{E< zZe$*`rC(52d(m3E)O3Z<@r8{c{hg`O-d(-T-sKPXX!Oksx0lyV<)+3SpX-l)Rcw6X zjF+F~m~ZFSJ_xyH|K41G{6brX2*l94on?rcc#OUC)KI;wC`$4l50Hxm3^MXQH)~i{ zZ#rweQP``JlQkNL6a6R{p$<3tjhQ2>;)|$A$L_X*-KS7EK97Unhs%}O2i3JGd(}p$ z)dRlRAfM{qx9``nX$jMYB?Zr27BJ6L3gQvAlXn)?Qq^pY{Js z6KNxZWR=|=w<63-*^F(gx?WHsdP|lPxm!zbi-*5~+2TDTKP8Vt{Prr$f{^dNVk_nP z=%h>r_3K&XP$&muyh{arh>;FmsPK2(fkyS(P@@L<^#H7qM9~U!DI}wmer@X=f6I@h zx>RE{>oKj?j}*a)Y9)6aB+qqDE6vNz&1y4cEr83jzH~BIMUygsPL|d1bQ<(t>-WpI z;nC#-Mr57QsL!W1HwEVb`U^k@xhhWy?Y;79jD}wz4d;?HxKroqa>II+x z?(SR%Dzg>=&%vEPqtDRP)O0fiby%PUOIZEp`?xNxdV)%AI2k!HxI(FUje86FHhbtan68dNr>21-+C>;&mxpQ1##n zk?H-T$X2=hLFIthE;-Q(81&~0N1G4Bv;R_>RPK4-OX2#16La?CI#u-mWj|?u#8eul z!;tZi#_2uH^vY#v7n}(mt>WCZcEGzUM|jB0F)T89Os!LJa4;$q`DE;EkO6Gq0m<7} zQdH!H4lWp=$?56-peOU|u^y6>z2jQN0%>Zy+nmJO+UlD|F-YInDmnhg%jU=XgZ+IV zl)(cm@>A%90HxABjv+<^iE>H2x9gz{;~g%ZbCnSh5x^a722LOt57_@8xZffI&~{Y7 zV+A}mz;n0zI`jdWY6n_{I<0t|FD!EyUmqH$yA9)d##}l%I0*pp72vm0F8~Yjv+dZ& znDNKEVV*p>lvlZ=7tErk=X(^dQWnkY?S>ejaTO2*XEq`TW*OqT@LxeO$*$_g}D zJUtlvMET1_dy_#5vb1ySD!R{=F@L)t?%nH+J%Gbxbk?ouqdtjnlqI^wdrFQ>ScmRQ z!J>x-Hrc*GF>^H6fPC3$hhX2^DT68dJdnqnyi7;ZsoE6iqq9*vt|I@yDNj1x{aFNK zD4pGG-5*6MSGr)pa5Jwb!&!fEw&JAu$yHPvVmh@U$9uL6ZS2|M>mSOF6j+{|yd1;u zn$@Sz7@;uO$#6Sf_PCejxl)#Z_xK@TwX9*B-iP-t9)QtKI^ap%UTp8Z1XwW7^TG12 z6=_qEGG(YAD%|^;1-_#!*^}J$vZ9AtCfWXY&e(4F#Ot(eN5^@sQi=fvgr&bwYjNT> z;s)&Xcn~qtsblE_r8T_H(xH<^EQ}@Ex9ycioQ=>X;H2DZgKrRvm5L7Be+m3iMV`r` z@Wc85hq}DPxZ~MJTrvY78fM(GLDVCT{g)-rWhn%5>^VR>FjaD>NfJf@w+WL9r_5}I zP$-7Y~M3fvxIzF9YLaMERX!P9yNsJ&QgzBJe1 z;8bc(A{0K2jRhcCNZame3h07UX0>oVxIOfDd=3b&%rDIR2K=v!5gx|G0nFB7d{!Oz zy&U(SnX0fL@K&xe{qbA_G&@n(k{Pitj^tA$fx~L6&vAOXEwF*2+hVoCPof}|qnLUz zqvQ1m__08tTLS|FcW;UDm5B+v-EhujhsP_Bp2qYzl@#DCd6LJRuL7~5>6sZgZwBHB z49Btdfb%cYVfaO=B?RV$!Ukv`nX)Kvr!eE8&CSiD^+lROC>?4>gi7*fp4ryQ$x(ld zj5Zt8m#sXyC&}y%Y)GQiJ9uv&DkL3;1PhW9tC9%7hEQXmC+Rt)nJ;09i-nbX<5r$GV`2^U4S6z2idtF& z+>Fj6Xdpp5yNfPdojS**`ai)K0J_F$F5CVn*!r+xk}2s=<_w9ldGW)FN=n!}jq@6PsyJyQH|Di?x--Jjc>%moST%mmvPyLs#Z|&|261DPbHV*}` ztcd@fI{ri<#)C|8!HGCuYKuCohNZ?EB3%E#G}!1oQ|}r{g!9|Lh9`AAIw6@- zL}~>^Gh18MQo(07FZ+3TJa!SNDv6W~k-}plS;ArJNxq?Tkj88J)tpqJDb#DRK)o(} zTANhj<@^wzoIF>X) z!xI2D=ngY6F~J9dW2is}UNVq(@b%%+!3E(~ZcdKTL!He)0%IbzOh{K2S_=T?q6VHH zi14K3CP%-D8r&|nhlDui%xeWw5dH)SzGKta5?8Y424zq_mb`yYAr=8>xz`|TG98^7 z)9x_*vk-C#U%o-*t{^6Dl+b>+Z%bXu^2+mp^0h%5P#H^MVC?_^D;_0*`eTC-^MYMyf?}5*k~T5lsn@S#OJ*Vih$~pz)I2 zDKBbiv1SWiwwXwH&q%MU zqo&rwE!GLdx4aC^-hs3|`>6h`hDT0|W22){16$qUL~cv$Gk+FJ$YqQavDfOXnCqXFf}ui(`vja$jk6F8cGHNi#jg1TXa#F6rbod%U3yD?mycpVZ#ija#tzS z-IDJg9k`vY1+muD>?J1yn~zVB9mI=u)E3h_%(Kc+M$gTP9uBKH{e|m%g=_}?`@=%2 zgkS0}X;f51@DlvHHu-?6Ok&|7g#d`qOU#1~|4@&|@t8hw00?klZU1do;E8gm5%AC% zFaxR4Im#q3`SP@JM=^2CHs(o8j_av=u^~qHG*0}i2553q$Za=-9&Tb`g-9THQ6C&~ z7;-@m>KbERXma?%Q={vF;IaTVOe^ElV-YDAjE>{+0s<^7R%^gcYJ|5hGH68L;HaM* zBW=}`hgeT-h8Wa4$w6`yCwN~!6}sX%JSH1J0+gn!9bjmNHl4Vyq5g&yG&4*6ROxH6 zFlWvYFUiivb_1WEqbM;iR5o^Kdk5%jdD>Sm0&;V6@!WP^0C32$brH3*i-S)L2GXJs z6qJ-=Ut7fSeVMm6tkQTFPY_4!7^4@N3jKn{O?T4vN6+}Zcox6m``5&(^F^1xnTS1N zr4F|=H(iH?=~Y;V>D_X5vOI}nYxrq$-VR~yuI6Ay6u6nf&fa(HiKqsZ7ZjLGhjG)6 zT#^IlVy_wwo%zh~h~ylc7_JqGl#ePhm1ys0DRw;YL?Zg#%8v_GM>E?6uskF6|5U8N zcF-{KurhzmAS-KW888E9%yTc`uBRVUkh=2^bO0usoWQ7-u+vn+Rg)NrjY#bXkO?>Z9%|PI2 zIl}j_Xut;%xyVzL`5^P0(ZmyJm0b@FdQKZ_o=bSYP?^?Qu|XM8bwWsKsO#fj4*yN! z=R!b&o0UsN97kLnD2F$0>w2cSU&PB_u@3$86&9)Bwa1DymhoA0XT_#U<+ah*bOCBi z;e=OIHsr<+E-mLlydOT;ZHMW5+1qc2wW47*Z&b`tMj9uHG+l7j6*2(K2Pn z%5&Mkvrt`xU{1$D;K&pfL-+yedYp&;?v07mbE9$@4H(Uq?&D=&Mvq+jYZ7GNOhwv3 zjU>sO^==|3jKE@`C@auJ(3dO*U&+2WOp|BP1z$r);J`6@LPC}X^4YgX1eS5a1mV+i zUoY>Q5p0o8CW816tUOXhQ>U;aSR%fHXalTh11z1VVsf0}1gU%0u8_=3g|6mNsXAwg zl9`=%JX*}I@Tv|J!qY%SEs*zdhjpWQXXasc-T`_x>ye`^Pu~Vvq}Zto0{Qq}62C{a zhK3)}FLzMLn16Zeeui$)fLG6^vB=V@A(u~oPWN@3$w+= zCGVfk7YF0cVZsY>dE{2+dkG|kL8@P3pukd`!cqj2Fv9zMQ4ciQd_leao|6jpAMyqX z^n#fUBnOe2EzHZS`|Ie%Xk0A47Y_c-1!xi`6{L)kd~awdUmvYtwkhe5OZJgy^WhEd zrSoTPK`^{dYEuW(dSMEE$0$w8FhjDcbKAJ8#)6^6cNe6=;#6syXla`Y>9P^GZS$^H zUr#hlIan$B5!6P~ax{wA0da=JzSn5XlsW?RCcSrm*f5F?@8g_~_ldzE-YyTve8KDM zdgi2{;A`ZlR#dvz25gF7sk0|o(cU31c6A-$2Y%5|1&iM-t3W7YnT_I0(q5kIVqMJj zf3?#oWoA9Nzi)VTB=T+zb>)4SS7~9b*UB62ci6$fuV2woy-Td>#F7e=#?eGmK6je> zhc%Yq&CWR%`jG)7Ttl_VT9Oiej!E8h)Jjp`HbH7^197sXFnAxl^{&(*CbWU_PB7zX zn6*uTOj1}ywaatG(`1!E#{Sj0B!s%>vT4$ldGcc2qtbaz-^f?JhLlNk)7GW3ibaEa zpx028CGZPAma|afYsRaHt*huO7~thslTl!88u2plg`&$K zECg?ddTHasxW4Gr*|Wa0(au=wy1nKVVM+M;A@rbwapZ9ag9c}7$EC@1;9+O|CF!N<9UUUW#!b0aslm7)DpvG?`)$8En9 zuB*Fh?%O3?dFxccPHc*Z73>kz;1f0Yzu{bp(xV%8r0>4tlD^{-fgP;AeuD{T7kY=s zd0gVU_q-IB7|&v<-r~7!AFi1@0q(g-ll5v?a#&Aw@G|k!?xt_HIQ@nBsIJJJ28)u~ z%7Qk+O3z#z)=A~6Zut1l`)@rlNMspf0oukA)X#fcFV3PHn6~-CG`} zU7&|AL|RZ5vf>f-4-ifhsSiRs?s;k8A$m-`XKzLjT!8LTxWP0&vt+m1WC+}p*0_wt zK#>r5A#J`O6lZ*L@<57DV0hVXmQFBpu^|0GrR}F~yVTZl|DTWkWCwqE+&Z-(GO&OQ z+rh*;0wj*b_Bre5!Rv40iajEzexd|3>Lk9SP}O7b5ZA-oULP_?2vip${Uq91PLIjV zGS{VLR#9A6idN~F_0KczdvNBIp2`7D)-a+d?4r?tBlP246vFReh zg7?Afys`)AWb@(~s@7l(FSP0++gOEziCLk)OQWN=#Uca-2nZjDnRw*iN&3(0p_a5^ z&tnm|CidZ-0R%lGR$s?P>#LK3yHD(f;s@dh>EV+L(YG^oqIR;tGBOoAv(l%3!T;3w z4u=x)9|ZXbqQ%NTef<%deeO%Dof`GLGy{uu! z)YsF^$@~!@Z_8G2U$7x_#>{eStv##ZV^N#|2TcRYpL^CzE*@!pmhDppu}gnG169Bo z{-z@67s}r=3nu^ndO;%n5V2LJd&6;#XTFJ;bL67W5)}lxRHPp$oJ>|Ws8`L4ROvpl zpS3vRFZ<2;+$JnWMH?MaFF|(D@w&X`H}_#}aVl>bS%A@54j2T|?w*V7K}AB}yEb)6 zht_?zc(0lS_~NF!}IbL`v5%vAnla&U$E?}5GDYG@y$p4%e{y}h#Uy3?dg3Dncu@14=f zw~>CJ&L$Gbo7gwLkAIp}#AKyOdIRr#V z%1=hr7p%emr4_}oD}nm}bJ_kDA<+NV1yAU1`CReP&`%PgmtvniJa#`=Mr`cwAHewt z`qmsRd1XR!wh9UDR!7&UL$_Qf-}JmIYqZ^;EI3f8+MTU105VcOlY|1f9O)j{a|abG zoXgG|yJOiiI�OnHrgwrpgcU4ovD_pb&rcy{|visLSGj`YVA=tt~8$TAm+IW zqIUxOTI7YNRiiauzR$!uFO(L^`?cKd7X^`c)-^Uh03?DgIqAB%xXiR$HQ#RgBA|L+ zq^tnB-yhuF0Max;mm9pEpDHUWk$|)dfXI+unVv!^ZjTZNikp9tx!u@^Vg-JfxQhqP-y@|SN5G$QKymmfJOIwe`39Yfx(TUw! z@6isOiitOg(XD4pn>h&m&^>bD+p)LFbxc5LHxSq70)ea>MC`Z-?bcJ|4Rl>ZFO(7W z*x-axJ%ArPy;4a&w&?-R(A!}sF~G7nEI^q^Q`1g^hz-EyZ%4utHs?rLIBLGWrctk2 zIfUDsn>aTm2whlUiHo)e(rfaAu%8k#zop?^(c4?7i}GOxB>@dm-(w#%*?i2S9`-r7 zx|`8)JK6($?dv&XUDv%FDyf9Vvu?uA3yvpEP|LZ>Eue2v*z8kc%YzYsULcS?51fE1 zcfUQK(e>hcVuro6*&f4YH>7^BBVBPr?kl541D33ZlNnbyj0xs1YOQy<)~(iy~s z(+S6=%YaZAhLOD#*XtnKM(xS&YuH@IksJA*)FJ`QY!&mtmj zZuh&<6lFQs{r&yR?iVsGD<@a3x*k_k6$U-c0AktM*~@_MT2@W$U)tfrKVd&@T#N{` z0F>*CrLkJ51@vH0$5}fnqjVBWKroQ0q@ttq6r1rNTI{kjoHmgw6YA^^rP;snwq-w zzy5CAs#&UQ&Pxuno!GlN-+^CWokI>LZOTjaVD{kebxV&@!5leRjGgm4o>>N}L#CJ? zcz=E?R`6#5AJGtnLj%;f^={74`0y|f5FZlQv?5Jgp}J)KCQy;%X%{y)H`WAJM#kmk zB%}U;0cQH*(X)jpuEd0dN72&xBd1_YEC8pg&X?5%6ch^4(L@ zj&KTKGuv;zM^=oyt-Tsjs&eNMiKW~6qb!g38gR@KTDl4nr)TF|rNBs3O zNaqJsJh^2}O1?)wvsy_sG{A*yxj$T5ofC4yYUPF;?6__0pC6VM6-I~CK)Ycz)pip# z1{de5ykB<@CS34D9mNTr8oVb><}K*5 z=Cia+uB?P@O}FohaAEdwl$uIM6W%tS>Nlu+z9LJ+7LDyJ59-vm=ZSV-uD^kUY_FfiI;X~fav>tz^j1YN{p zZKKbzbd~tUn#rgF1UBb?9&oi9&~3&82JTu;to``+`J*X^*wGeDpf#|vuy~zs!E_Q0 zDlPhoG>XScbQmyOTn0of`BMe#rvcw5BqU^()I^MRKeLdenu1Heb{9;FG9&ox8*a=% z{1iJvx7hpCES*mE9y|V^l5u2YB&hk%9+-5T24}mic;~)NoiqU=yk~?$iXcYMTZ(A* zIUSLy8W>O=I=0La8VGZY>8zdg;{>FJ&@c_`$BG_lUrI#$sO4Cg?JG1X^MRr6#qQbllpR9eVE;e?Q z`skVC3u7ts1Qv#zhHU3Q?<5nAB?lgmYx5dU3nrRh_+N)Q@N24Tf_EU023lT76pcy` zc!YkdWed$gAZimV)jIIo;a)aQ)%kdQ>#Fi>_6;?bQ%)O)`V6E;J!H&#Xixv8b;Qou zxtBC5Bs_(k)TKrz>vit8+^U*tJ^_JeIPnPy<~y6VJ8NX3*2Dx|W)&(7(qm(zA)owY zouJY=bo3}9J3XJyhem#)Rc5asgUs~Nx^@ck^@CgNlS*RW|2wg71l;RWRV`nq*i&vaJ7_Kyutr`sM~pBMva z!$(4Byq4l#rwKVN1b3Mc$}ql+1)6SAaWRemi81}K74Pv}09?Bk5c(Ard->v*ECehG z-7?f(W&rPq3{XUOw6wGU`o5~FN{}+fyeE-L$Td7XhVnTNFxEmQX^JiAS;32o>-ub? zT)=Mfb4CWIVGd<+$UHd(1qf7N_<*MRCQxm3A)cnDMdd_)rnr%=?pd+XrCio){J0@2 z1zFj;VSDxBQGa+qXAG5leY!E(Q_PM@(?A*4BUNv2Ap&59=I7@ZJNXj?0hH->{Z7;C z2lM&taVd*CD#T#;9g|d#5I)CNnW7Zx1w?Vrk#?G4^x14}wwlYtEdeqd*o^yB=G8%d zJhSrl9Q?W!PyBIqj}iq@M0?wu$by%`MK#8Z)zWw4#ZD>z1UXTzH5f1kkJ~QnPW;Ti zk)ScU;7Xi2GizKKF}K7c+N8HUUFJNnsnXZKH01KM4a*Tu zl&goQyw-dKZ`~$$C;QVxZMfI&;gaF|89jggFeSw^qo=2lc4n@8YjC-(LBZvKe;6+l zfC5U~uu$j5p5^^RLB<58om-lJaEAzUZmYUBW!T}DGAZNa&{9V(> zNu*`ol?P{QvXWWdQuABa&T$kC9WANZrG3r_66fwqMZ40et^QZa%-Lz1o6lDJ?*!@F z$0UbW>DtlaADmyBhyYa^q{IGl8_Gs<~Ju^p=*a3Fn;PCLLxHtlpH;6I=F8GCWn`wO4 z?}-WI{%Kp>ufev^-d+XR)Rv=Q+aIfHT@tJi!DNSv6BNMOy4+-G!1wy@#;wW8Nq+lj zLEjU{1lFuM0;siEz|}>6x-iUsh7~tPmz^}Ml^{Q49wa`V837xmFv+I^zexa)1{psbLUjz#>fR!7rLURGvmzEh`D*zitRChvR)O{1{4h-zv-D_s-@%pgwjLM!~d=AM( z0r)j%`s(lV*VNQRC`GxD($7ZD+u@4$LcG~ElR*JO!A^8RC!wVb3NJ03m%MK=R-9Ap zYO`_xoW`1phb- zrI`(YOVTmZ=i6k?J>2nW(Oz~F0$CaMa$dhY{JEPD)d2SXYVZ9A&m`!%!}5ifzw|M8 zcUM$aw0(Wk-(SNItE#GreE1bCIb7Jykd@Du@dq=nyc z;Ukba@kJ1h%wkpWn0<{@ggN?kFdeiZsjcluXis9q;lHgM%-Q>^15pp<(?ora(miH% zpe9mn&q;*G<{dp74WTZQ@@Jr}N5Ix(wPAVE>M0&rpT_hYMy=t;R180(d7a0!~}POn$_}^qqrif)5BSd ziuw(8sRhOsGfJ#p%!AU6sK_RP^A`=D$2Rr}E>tDT_H6&pJj23cWeI%tu_y*X0iOUvEi%N8Qk{j1msH5o8jkLGOZ2UyFxq?~c_e>lx^i_xF?D4ind*u~Gwbyi9)-WP zE(9v}N=j~uc82z!OTo{WueiKv@6;FMcZEeogs^hM#xnuHc&(Ec08L!@B}L=8Ddaj*hZ0qNehK zbWL@2;tlp-0oz|6=~S4hB1Svb{~3Sb-t17NK64P202c*w-_nhiUOe>HD$#Zt5Iu%8 zjD)WND#d538qu}|@IC->!XDd?v#(ELuR+mZ{>ngR`RgZOlu4kmzgfN2ky+prI*3et*wXQuS$RPCIkrlOeX5= zQ24<&Q5yc36PKiRNjczbu{_}Nv4LJJFwi6`_>U-Z6?2NKiTJu6k9mO`aIu#JPvd^GBM?H5oK%So_iU#58Vh2ktm8iH`YkuZl;DR0|0+Z^hRZ@Bvg;uV2>C z&=5Yf2AHW;=Q>?IJw>MYlX;Fdd$4}FS58n~pLc7|{Mv7*9d4MgB`w!(+@$96$%S6* zTwm>g)e2-`TV?qyD1bbn8}pDEEre42rp~L}zK8?u-#j4K(Q>sWe7*~|ghmy-e8{X% z5Ys6JT+ecZe)<;(8A3TwP$$a+yb1k=S*~AK@n<+NYd%&=E{|sno2X9VbRAOqTvXIR zzlKpVjWl!w>8dhEvTzK)&tGCAP{B^&rg13fgX1wFHvPlR2H4*W4Gj$pX2ARfAon^W zJN;XmO$@vfjA1t;1L3U-OS(9I=3!Hn9*7^B&yZdE*`9oiN_{AkR-AZ)Rs!(m9Fq>4 z+EaOQIYX{gb@ju0eAU(IxR#O4Zz`vh6B9t!Nsjpe>Beu^CK6*|W+_5$)>h5&1uYJ= zBu>v*Fzaju44J{M^@4t9u7J>ymhQN|7`Vov;QwI1`F6d`Z^qT3G_|Z514FCLinpe! zW^}?l;A2i`BcPk)dnT|>cB>fVb$jih8d4>;r0LS1z2_Po${tlG}o|JEP85CJi=VhF@vK6_Zb(S zyy5hT*GWjzW{TpFl(E_pPd;P3m{TownVN$jp^=UCmKAHY^gc@&1hzFx3 zqmKv}ywx6j&}FaW(eiwoy7>i$1llj+jT<64;bskO%;8{QOHJtdG@MHv^Y3a=m;Eb` ze`NkO4qwA^>0)_zHgC>orJa56PcxwX;NOutsJAN;TPW=Got+K3k>FH;mgWDIMz^sc zPdc!F50|nQ1W0dhZzm@d$lD%r=t3Z={C4Cbo{s+h{uLBpg9Qu>#KVrm_{ABIIZ?3k zy3MqMFDj=C@p&Jt0kUdOWas$2eG8OGrl+URjDD~wnZFBaZoq&j4{^F&!n_+4VYJ-0 z?+a3=<~JYx0{HZkdZZvi1_ue1C7L!863rkZXrlpfG5*an6Ag;$M_TGH10R6De@9)A z(`sR7k9#*rD4kOn*j0wwSbTv9b>Z*l(z{~s5Za2gb#NHy>(Y4A)B~NbulKKSdPKLu z+zc|enaRnsLxhpTL`B}$ue1UJ$4g!DK+ySqvU$3{3R0JIP}FS%c7z1AR(b$9BtNqx z3q^T1$|th3u(HzA*Uwx{SY`qXGYc_RwQi&BM8$^>A9f9VuTEFGY zP^xL;?#@p@CG2jyHkgGK5*U74;tAkrP_Y3eqt>xh!bj~7jnK;|$PP9TRM&46!1FO;N(zwL#`O)w%%v+R_&l6pUBBRlBgjS1GZ+BT-OLaMGphpFM)S zEm1p64#)vgF2jq{)l5rE%S=(9n(FF9X7N+m-$`IhyubtZES6ea%;Tt}$;)dYad7%g zT;~QOpu4E@KWUo>Hh)E|j#yW*_uvzB?)z$whyU3Jbq5r3p~OjA>cN(N;dgYzPt{pR z2eV#+Q?Gf`e6t+k1IlIx$;1hwA3c*U9wi(Cx|+x4IJ*BpT$~DUa)a?5)=Gyd2}U>| zF{bTd+M7jguC8r`pkNugkVYA5T$UXw5p*}2^0_lW@L&`J+zC2iA%@k_k0$Qo{X1IB zqVM8!S>nq|NjB&G21iQu3QSss>XqM=!mUtaoV!6INKkfy(~2H`Sv-JqH7q|SuWXvJk5wyeepA~)-VsgV z0fyGX!oqG2xXte}l#7R!R+en1E8Jd4NVscS7}{s(mJ()8{!0Q=szF3Ed8^(X>2^lL z4v#h0DM$2h_kNZo;s9Q>Fg%LaukouH0lje-j~syXvqPjyS+>bStrOz#CnJWA_;Y&c z&OVFEE;Cj(Hb7*L=gP*nWMY*jzl7k1y(~vi;M|qK2Lk2|w`qs&vZ_a@;%L6K%r_4L zY0;Mh2<}pFQwj>m1RdX8Sc7QwRVG>7(%nnS9OzOSZq?%WWTMV(UsQ4c71TZ{;|^LpTx!Q3{+-f(NHsuDI`fcX*N&@Pk1Hc)T@rOr9DS7w1GW%B0;2P~ffiKdC&XRa8_^V8mYou80W!WNi|? z1PtDSvg?U>zDE6#2&7Hp;F_zyelaKJck?|)>tLCvk7RoVkD@>P+DB&&`9Ze>=?og0 zov2+fq!kzZM!Eta2EE82HteiKG6OUS3XWflDaWv=eX||^ZhU=p2|!ToYKBjEL2j-Y zkikIP5&ZJ*2P{vXc!IVROU`v8$OfQgv?HB7X50`-M*gROl8vhLc6M>0@%;1N#-T$) zJ@wTCAnS$U(EvRufIYq|GAhc(#zy>nvyukXapAPOZlRa;wJx&IM?9XpKsDJ)Pfun7 zb|t_=M7S_!DCI}#O4HJj;Yj=5e*u9?BfT=pOUn2y&D~tb+c=WHeb|eXGrI)hb3B8% z_%*bX8hZ{X^f%xC{PNV4UXGNQxHm&|%Azvw+qY1$ySd-Kp*dYpeT#HmN3&j12JD6n zf(M?UBqQtXvjuWNl~jV_$?$feMaoyd$+57oGQ8IdUnhfL-iiEso^OHtZtm9iHT#B% zVL3=*yk-Iaobu^YLbBKHZd_qJGnAbTGqAd@JuRK&Y7$NSse%U(7DHWUzGs$b-X7xj zSuRgA2RgB7F!fr`hzql&cj{Z%_WsD^AT_NOM5smG9s`3^Ms zF)OF9$0rH+tbBOQxJ;4|a(Ny$H#lO}&AoP*prO$U_4Kg)btWpQqobp%I|)`GhMI>Y zBq3w-Xaw*ID?!c;sKXL5E_M3O$@;~k!I}s#6|h(OAKbpVz&e%n98=!@+TLHHgy(M1 z{80!tC~^a#c*E5sX5gvioC{*O%4?%AEa4d(u2 zzxc^@ef2;n>!IY>4mC^R+r*Bu7e6yML(yMsN20_8{|LQ}Vl6>EaQ2uHTaC;AZ4}Td=K2MV|qF;hQ&3;J;nLC3ALn z@hB2DnJ_S>OD@F9Rr8eG*)VcfrVEG71a5r7L9C!}-GB1;7~`VD(2Jb~ccIN1_e?~T znRGF$<#NtX zxX}}FP!GUqo{9>BgB%m{iVwZ);EKV`OaJ$yWFm^uNkn9Ze9p*uLxveFTlgPm(gCT+ zfN6|BC1~=q>200{nu>wfTgfpvK?-|f%A~FYzNFqd1^KkS2Vv@M497~rxpAw?DF%Ms zlChea8k<6Q(3m0mA*a4Pz>cCc6<5)YyMtNf0`?+WhSZZ4=2QGOE-wH4WkE^NWTU0> zN#YyslVvRP1j^EDs<)u|1?<(0A1a8S&LkK-;x z=?@n_*t8q@cDOad+yJ6Fm~$!{z({oR1_MKYp`<(@2#1pUYtfIh-^dZL;>LPfgNw23 z=>ghN&ocRIZ9&sB*HGhdtF2BNz_9|t#;KY1Y427PX}X|SMN?Mu#vv{h&r)KcTEs7; ztNrYc>Q;;Ro@MI<>+Q`6APLF}ieS9kU=n8Q`I6{f*5JFjHnfmfx!V7LmDNOte{APx zHBCU;T)B~yIU^UQi{(IBS>ban!!iUC+-v}j0aHmh1Q$QIZQGp(LRWY}PL3%kp0tk* z!@gTtS!vmqR;W_k?u~VJTwYMLAjnCE*_$cA#U&u%C3*wHvHjzr1950FXzO!CB`g;i z1%kuLv-Y5Spi5DNxej~|z8%HYpOEwrk6Hn!uW8}|SF`3l0OBRuO^tzq=iK{5zzPF6 znd?JTCqN5)-|~P1frIUtOBa;~H;&7}IFw@wHrMF-_EQW##vgAMPEJl?Ki%FlrTj`% zO9*JZL8MYL_r8Jl_ZVWX(KcB#=dF!{xX<=99*ytoJoQMfhZ{?GBpN=*;Rh?~!B&=6 zW_ir9I-Mv$IVNc60a$awp5LS3FI@|DOT~Pbe$8H!f2inrfHyiDf2*$Mu+l z1ci@LiZXCh`W`M-{|-W+({_8-I^x&7D`+gVy0XMdJSR21J7WMhIM)8s>aut+Frcic z$RX-PZ{i~@Ep4Nx2MkA2=I8}TyEH356qgEcRpC$IM&k6Wk9dN^0l_t*ss;=KAeTsG zPy9sp5;UI~L9(;6+X4%TP%GF;#6rOH&x^3uAu}lc(8S&z;3(Ckr7z~*x5~*?kv|{+ ziA>BQ+M<zwCt;id6NP}gt(&5v6<>()%dnzRYkF32T!*CJRPGqr zLf#kS?^|X1(}eZB7nZc)QV|>JjmX4n*pPO)5b?#kpSzcxceZY`?0>9<< zBgz9??kSnYtou}iTE6?{WI{snbFbmta5~YXo;=X9=RFBM&)Oknr&psPFq3TSP6Nxn zXGZ7b(jW!SlP6>XD+{0Niq=!oeU5%DR{+~o)=ld2q!!i4CuBI@4TcG5KCKld!$C=> znfwMUwWn3&96dacHk1fWO_A~KOwb~1?llMk4cXbAgI{msR%yK$gJ(Ir05iZ!PG@Xb z&G^<)%H$-?^_t8=8+Cr`Ja)Y+yI8s*@QN%*MXAqSE8oGO6@3{Q1+$&v|N4Cp>55A( z_>uEinA>-(`Db?adnqY^5`k}}LowAK@aOZ4ZfH$!_w+JLR*b#JWbpP2-ml+(PEPB4 z@8y0uOy@)#rPpt!yC*A@sTNu&W+tYj$U(rMEGj4&2C*U5@`Xh6QUuWgG48|S4?WA1 z)gDIjHpRN7d*`>XSNlYL%BC$Uw}e6LoM!-H;D|#VV>ZC&>#jLYn*}N~{s+)Ld*zLa z^4a@3*V3uHP7C=PV}7tPu~y8$KPYc7w`F8xq%<_(O@2frtQ>r!Y3Yp)*H=i`Y(mlHI4b_|(f*6KM+Q%I^E zA(Xp792OjiG3Zhf0aipW~WuE|RwQk|-TK%%O z1|2sag(i#IjD6$KV=XQUq)dv-@bCG|g1Gd=u%}-59R$Xndp*b43P7^;GxqNb0{pMy zOgkG5Gxa%F1iq{kjj@eRAm^RejpL{H%?|CBk=LY<3oH zU7)#jm-NAS<@L{#r>5=6;*FGGvHZRiBis^@ChT(r8ne9=SE^r5bTaL{dvW!7$7ucP zr{8f1^}~Z58eQ3x@a?({PSkslcbcM1fKkxBz8a*K@m?4&elRJg33{PTolyUKq1RWv zeO+ElGUCpH={E-`a6$)d&j-0w>3Ph}L>%Y*l=7b~EEMc|gUCj6emYRz>2%}3krSrJ z6c0=%HAn8DSq8?(H9g(DrfuvOIOlT(U5|O4EQw8hIZ_q-XBQ#$Zd8>Em1r)nAZa@3#{mI;|7Fb zr+L6V9-Md+ZvC2!GWi1x3<3>J4G^liae$)kX$Zis65`tF78cfBV&ap_D+lHD$qK*D zpyL^ee8*QMJU8z*zu{hx$DoholN;FgHFv9i&Q_e$%4FZgYyMeFoqmI8Z2C4pFz*}# zX&ot7kI(gV^?>_rVt7)>f0K+}PEi&J>>{2g;hq$4(fXVPJy7Tu(o2EhujI}@;N*lA z7Nx<&$H>Sw;<~ohzU2=$YCu;1lb}%O|9gT0TzZiw4&80C4^{_!zT-dd_*;EJI}Kx2 z26R%c0uw;+&!4Z*a3E*-`k9N4?yCn4#zTpHJ z8tG_|KnK0J+NxkiwDi@ij1wGL&zNJPlU;2p^ay~P?F~2Cf6;c_kkFLK%Al}lIaP`q z81~-&nOcP{JV_Wcn*iAczHgt`E9w~_#wB--9=&_>28#X>s=|Pt@OCrO871N{xWTY3 zF;wkHPZIg>3CsNREANd-DMKS>g2ugW8{p;RYLg9$lwWc_jkcZA_*whc53heCM3Mli zD%Q#!N~YD|%8};zC_#?o#{iW+RWIT%pZ$^PpJG8Y2L127(UOun)3MXqIDvstM(rPU za~gxaXHSF%dRr{`Rk;iBUq9Hs2{IE5(KNYIAO{FF(oH--W{KD*$(72wv;BzA6{`iG zV!WU%>^L|Yv&dNdH1jbde6{vyYz1U7{~11)DwPZ5rj-{vJs0xIPez=-K8StXzrB=9*h1MC{q0i$j_y2Y>a!H*xI=SRj?|gog z)U%kL^oD!f+RnO~-#o>IA*15CpACQdq-So~k1lBls_C3ql~UmO#z}Ft6OS! z%Ye|2Px5A|nQ-FZJH{F2F{Bb-;k2Z3H)@Y4o^=ouK@VIkpD=SZ6 z^LFTM0>As`+q?N$>c?c5`uuBAOU_blBAHyc>X!&358tDf3TRM8P{a3KJc??$(;xX3 zzphSTL^umxy+XjWI+m z9eM*7q}z;CpijWbd|1G45DjQ4ydku;0jQ()_I52)OR?3L5#%(?VNPQcl%`bT-OA2I@i)X7~ zj16#x|N0ePsRqt?4P1y9S®2=m7Z`?^6u#_U><@Gbb@YrS|H3v+0A>@erV|q#o#Y`aSE2NP z(;g#G#XoYJ!u`K?rGNR=1r)V+v$w#LYZ6l5UmvgpC}%PF8YJAqK^ky zPv4Am-LAZ9@;e9a+W5r8qkiF?hSQZ)v=b%5YP3iLxCVgO&)-vF1HMpgJ(|L26CS=; zJ5%og2hMgR5-Bnuel%{X5*31dH)tW`8Z;UPB;E4a;l_izLA02CfQ6@Mb(#jQVus1q z{R^N{Oo1&RL{V)-01QSjXLw#7BDBg4sjgbI6T*(J4n~v%fae$R)?b7@%1+cvOdD*| zll+WJcmiqB?stnyFW|Ak3dzpNdC3&dILM8mq4E2@h!O!l<%l$W^binr zv@0V{E3%`~_)K+%g4VrEs!&E$GiuKR87Mt5Q5y2hqPL)|jP!OE6bh}X zazzg30l-EYWe!?80OufoDu=q5sAzgPC}-jFNRJii3}B;A26Bh(mJA#zAJCNs{? z&OpHd+KvgyRcA$;Q@5sIGVGLzZoxG}_AX{?HP34%8r@vFOg z!Rx9NHjCB1;h0tL-Ofci`mfAxvP&AkodS~LdBnG@5D4Y2*h=7^LjAOZ?xZ?KG_pvmut#0Y6V0;vUT3WzzLL4j>X8^`CM zIN)PK0?8^9Kn;V!>B1Vi+`HEHNX+?$CMA0BH)BhZ9;UN!j zLb{U2#7TzdQ+mKk0m-&#iYUM>y|`G!e{u15@-LN6*7%f^Nw5w>42#uS&yRO#u0z1> z90Ua7F>T>nWa$`_T8$28tiVkwoV&WEiRz|HeyRaK98W%|_+1a24Ub5@0aP?ChAc!v z{n~RewHuqCo_uVs%2Bcz3cvbktMJ4f4UXP@rUl3QwwX4z@(ufeTk{KArKfe0an=d~ z=2ALupicz0`eYpH4Y&S&Hx;YGAFqX5!NT3%;%HRa>aX06*(@Xi4utWq-6QA^{wq7l z|9seh;rAI!>F73E+E%U}*VN87D+IO^4?}g1}$C~0RYRzc;5WFw;T5PNbmG1L0P|w*(%u9+ceJzyDq40PX+&{Z?#rgPa=iya}plk zxeYZ}Jv;v2@fGhpwvS>zru4La0EZQLWxgoOB(JIHPAK~;?EK{T9{M*Ar4^<7CqsRu z{OfPJk1oyMti;zg=hk=U4v5;bEf2VUkxwri+Vg1_0NCmG_3c;jEk%o%zR& zd}*P+zpV{k;c5Tl`TM_XSnSrl3%x z-A?q$V4!Fb%E?hlTy@jbIdnnhb*HCZk=gtX)7mPAOic)=M~s8EN0MD1IjRrzrVv!} z6cKxBYzXKSH?WFBn$lE(`?2TuHdog--s&2v>Qhes+}J0OER)+;w8 zarR4^@>|j0YH%^+%-3KY^*guzAZ_~6G`@A-%F5Q-zV2hD!vxI3!PG_GApj1lb3H|B zva{Dk>qA0bpiTW+8X6}76t|=M`WpQ%0UNq_m^f}*?`~`K{u_EQ|D?KKcg52UTXJ)C z1G;=t&%o%Yd1_x6jnFFkjb_&R5gu;PtZQ=><{O|j5%ceEPc zIVO^H6`JAULjzF^v1mubfJb$r)Bs$mvszkMvP1%5Da^3Di~ruu8=&F^+EZ9*NfmQ? z)22zaa7nPACLDGB{v%Fflf_Pk4ORN#>VRcxAGnULzAosoJBPgK-=k94nkTRsgnX7L zgMR>99qw_EMrOMTQFWs3;ZcS+`SM?srR^)N4(|S{l!#os@L`cJd7M1+NVAi5Iloe4>75({+VNq@5l=k>rQF zFO}Tg)q3vCHzkV<#40Ob)m!nAhnX6BDyR?-?2Ex;@fREg?`{SywAanpCrv40s<0*` zPV2{*CzYC`kH3oy+CM9FU0xq$(<%oRh(;w#=#nUEjp5OwM=kQl+r@=8Rwv7BCzN^UpNW;p33=@U#TDAERV&WyEWimc@LQ-X=};Iq8QQa>dW_TWkr)*X47m4uE!aI2YE$##E{PYGH_mj)wT8Qx9~Az8p=J-#+G=RbceLG z6gfXPBEGbq_9g zA3@s~W>4C3O|KG}%jat6-F<>PQ7i98>KxZMb&v@SFL9|n`i=D?&DX1h?KTNT4MXIs zI5605*7>hL#i?BDq^4V1$m`K6tC0!Vn3;4x-13;C%1m?pK`K;VKPQZOzk9KEQhRJM z3$N&4W|GWoQ>FA-t@U%{K3G*Hr_WmpUm&db>=*x&8>iC=Rw^jr;ox9&{X)vi3u@QL zLqagi$?}raLGH282qEDR9?WZPq>C`t^C6ef`UVYVz>5B*OqMU(tj}XmHmVPgd(HUL z$$nLv3$+HT7;#RXeJB5@KKRvUg95$>O1`~|F|;*QsT zA)3Xz3bgo`bMF>CV-Y!{R^}CYV-07%{&o7Gd*r7w)dozI+~=5O(D;fm^zh6#7(ZH< zgM|mJ1_E_BsRtBTJxVktD<8;LWd+OQuO`*DV*XB)eXL&b(68mawS}YYSrVw9WDY(R zx325YiHYYnO?f|4Xk5;}7_ic>91U%KBFtPAgmvbOSesYWuF{gGQ=`tzhWuZ-u1Qhs zb$yjD+_#iyV3^IOUr{Y^$bk{>tnLa9;}M`2GSDsLXr&62YX;*3u4?kdh24|OHRekz zrY8A<@L;B_Hi4$k-%*kgAJS{r79-jgja_#by?q&+Ccn7qVQAiVzBCsQ;ZfCojqm?_ z^7Pv`n{L+{7}N~Yj62kY&1W6rk*{sp zJ@dJi@J?D%&s$(hn`^~i4UgkdB)6v z&JKgSuOUvsA7QaH@ZzGP+*-j1mGH6v$aNczXIay%m>@*O?59|**GNZXMTJzodhXU! zSgaKy8x@iS#yEMZ9;OsvvV5$IUS1;}#PNul97TNg|B{^ifaa_v@FohT+z<-E@s;kR(_?^#at=hJ&oBO=N}GwB0)z(?%X<(2sLMs#1(V= zqbsxeE1JR|4zFm?|6;J<@DeE_9wAc+Hd#*#rg!MYk1$>giG1#^Dked{^6mirv}NB~ z)Or#z%!=HuHKjus87AqL>s#A#aB)1K(j*5PklOoimclp6!ndl)6`R_f7h2T-I42Uvd+8>V*&7o})}9Yw?mHB3&L^4e=wupe4jT2@w6 zVA7?}%(S=1Ay&6$>r8!}_qGo=D_9bVq|RE z9p3$Q2%x5F@uO0gts@w0-GT-S{i#b(<5?FgbQyIe{kNvFru^q6#% z1K=k5Tnx#m%Pmj*__4T?IbLJ#a+om#P#qYt8XB6Q=JksIv;RT7CogtVi|muVj*Hw| zc_GmcSy#qoPj5$jxiuUcHq^+Rmps3q_`!1@|4&TF;aLw*!L&~>Hjo>|4uS(t`Ub71 zgPpUXZW|^mdiU_A;dc8gcan;;ph)0zdvJ|eQdEMkdJS-|3+_ESvsq+Xdg@e_69XNf z?i{8akN*tf5A2gyPkv5*F2uk6xA;s$YB_r7spPMLIQboD1J7u z@kGhH5|fs~J%0xO;5D3L#BI(--2uPUa!f|DYz`2tqC)V_6Lasj&G@A=;~`k=tCx~N z+s<`_>BE`sT~sJcI`*EsEp5{sPsPPo9c`z-XMm#)Zj6y3d~ z9Z8?Cyc~?5)BJ{)3`DQ&u((*$4xs|3Gfl!d#v$1!P*)aX)6hr(_b$J{$4(*&1<8S6 zP8#ZAY?}#|!#`*~B-Q+!;C6gODER~Iqs!Eh-hXF4&&{KBrpZmdx6zD8$6U(GTip{wLV+3D2Kxv27FZ7_wV(Xg|kWMBtTFnUVbvo=oF3Dt~h zPjPszrzf!E>h0GwJ+0Tj5d{g?F=^X719xht;rJ@1RL|%9ys|=}1(_@*6=vXA_vCMW zQqzUbPu}mA^U!|)(9^z7M5+5Ae1f^_@D?59VvAw;Q2dcqcfgd;4|t?PtkGyGz!>l5=0PMWN#jGZyetf}i?8s} zkdiWW>1h~YSvng6jS;wi0XO=fM-njim#Pv-?r|jUD=JcUF0blUC@_^Hgx|nc%`8V? zhVhRHB_cI+biOGpzMGjH9jWxU+vHly(;qG#K?=r*d;WR%%lzB?N_v0Re4J%0B>`!C z?ev2i0kl5^mPiuwCMz}X@z4Mv9)W5&{6R!jPG(z9a+|@c!1&i^6kbCBHndX+({B+$ zBBf$iBdE(4s^b>NJ?(DPni@YCsFC zN#IE0te?s7Ja+tT&5JAZA(J4yK>FX3&ROffNoS<3@_)`YTa<2xJ_KDsLH%vrOfWv! z$)T%nS=)W6sN1Z9P|pbr?dYMQis9klBC7J*-4$kDEEc?1ka}K=DXAJ|UNO4d;(VEt ztHyj-60@)2Hiy=GdF1LBKl(-*`cT8AEe7a3xNTK&g9OS#Ip}5%YMId)FfS)*#!#EbgCs*PNY68}-RqM7b&%HTh(g^qnwK|Hd3H*p>5f zu&bMg*5`!#v5Q$xtbf`&G!15J+Gr;xsHh)q8vvn>erl(@P|U5&{mo!Mw{2pVdZl0N z?kCAC?sA^g&Y5aj(+d={yT`S5y0Ri zfb9%DMd}@JBRw^GdgSK3&tYI4$7vMHvhHN#d_1J`%IgJ+GkU@uwM7)|P^~@ddk(A8 z-$r@w6qb?pH-_j{WttvE9QcQmdB?$G4?>xF^F|^L7&2UmrA9X={Pun$XPf*J&7GNKa?}u3=dZRC?zYB!93af z#g5nk`)7JYN;(x|aDhf>LfL(!ZGDxO<)Ivc{*$9=`5#3f>CJ!w8-9W<==+TRg}tJ1 zW?-VzRo5p}Ih zZ@^7Pylug;47&;HMQF)*&CJ**<%f*DGCb;LC_By94(SD4l{KmvNtykKRjFUoHp;6t ziMCd7CIR1Igzr++apkK__R9x^#w;#eee9Rv%v3!U*qlk6nwFf<%=jtMkgzz&oZS7b z3agDzPecvJJ+l^~4dlcm9^*m?`#wxa$9Z8QyH`C3CBe2UKA?b|?{C32iNFvq$K^u` z&D;GO`P7OdL*kERRn)y?W!n;Qh(08b4La932E0T*p1n!GOb{w9?d^RI;wOvj!F@Wc z5Xm#qU%)6XJz1U%3CnxJdU0?;$f0W&(Jo7a4k)j7hCe$f_bewzJ!te4k2>fYo}(p5 z=>5`KbtwT1U|zE|o1@1yoA)q=J1p13?-hn9 z>Aj$=sXO5y_KAW{i{GGX*LXZT0T`n~oHK6cvrk zuIQc6S=me|!^HSm(Mo`a@kc3??;lO={~Vp~7;yP}(aYc7qI)?lX}5>xe_`%DfTG&g zby3zbfPxZ4K%#);qy&-dB1=w^gCIFd&h%0N2?8QXvVeeqWSW+&kB{h;mlbcM3 zH(>2`_Bng+y6@I~Rj+PYR#7xPd-j}Tjxql4|AOL3v2#?etI5nc*z&9T5~zU(2Gp)# zaG0@&X;i3lYA8qNvTZ?CzZk64_56=^CSHn27Oi*;rhD?uM@ln?7OchZK1uPEUM{)$ z22?*Q>rkh|@66?h29mM2C6#qH&OsQTWX|K&poPWJ(M6u1y%MQJj8~ZJ|@!>q550Z7 zu^qD*msPk0?lei}tOP$XJi&(X2WVf*=~PRegBXAcmc~<4jn?8@_6Q+<>l<|+1WtlN z!Yxy@oMegWo*lh~6QY$R$Bx)_{J#5-ey<4-L zcuHDHZ}RqOsKc$(4Z>R7Uw!}8zX4``%8+&-h^D8L$bsOdX0PK%P-wvQxkGZbNrT$+ zjG#@E@BT80u|l+&aEAxx!|R?0Ln>)3q5WTJWV^YseA!vMvX%5Q((n&L6u8gWGY-Dt zu@5f<>#iE}pM4^7`!gMw$(2gVVRwHM`nJcLq9)YV;r!go1k@aW!edcAU<;z;(jXY@ zQMUhSG(|MQpHo-sa8-1Scer>tiCPGbf^Zu(l4iC zY;oT=1O)7s;6GQ6v1f-4qs1doZ(w(0GXWf@0*jA;Za}uXue{j@j|&9!cOvEyA))Qy z7kd+|n#2v%hNZ9NO<-7e=9n1w!#M&J&JU7 zKf%`Hc6rdB@S72@uRl9IW0KmhsQ3;Z#mEFE#*ZYFJt&iinz|;S zPCveTyz`pWI#umIfD$SJc?EX1(vrd9xxju|RW(YYz8KSv8jW_bk!FL(79CmPI`|aS)h?r%LEo4HD+6l!bCWNZ13Ad*35bUrhi{o*en`Wc8 zyR<4twwEAj8T`0VV6iVxLqHPK^fLJAsV~2TQ5JqqX#5p#2h6Jl2Y(n0x^f zmkE`4Eraqi+>T6HA7|^Wy<@68GW1e--Sd=Xi?dVOIe4EOauyGZuW#WV<8T zzDKrFRXun07I5Bowrd8@UN;JK%s9&xUvS8+U!iv3f}be1O|b-UFbGe);K2VFT?haVkJ(eeE0bL zC17rLMu)F;3kOAE8|6jpq0=7AX&jgVna~*~i6G@teP8dR(Q(NBXacZ`|44tdyWHTr zMU$P;7>KQZpRTk6oMk6$(Q|KZ>QIhNj{DS>;VRqSgjR?ub}>Ts$&+;aYsO}Z@!KiWT#!Jcr0p*>w;61J#tr81?_(wd*+L36De8_xRrRLq~M z8ydbo>QsQ+IXWGPV^gw2K!)*2kaSK7*ulZvX1EiyUq1c?4jyoJ3Vy@(FlyyQXvL9- zr&rYV=eLTDGo}p6agkj~$f>EzhBPT1EifHLAt2S~L-XgdI-LN_+#eI)mSR#4>ft#6 z)ZkCIaRBR1wvnh8hv0cys3-S=o}*i^8yu;M3oW=a2(B*r4M#_Lmkipr4o%(jrTKCJdtr-4z8>6fRBIZkn-xs!<@Z(M`gF`6) ztT=z>p@b>HY?%z!6m4SZtv(q#@qppo#oGP|7n)d^k+}d=x>Nrh;jRQb$itduE%crN zqT6PXqp+X_iy9Wi*{e5dNowLS&|!{RTKw^|+Ag@%8QPM0IWIkCR!Vw8WWA){;Fhep zLxx1nFj@TSEm^}@Qik?Bu`*L@0Sj~w6Etu?2zBvCZXB^#Qj_ga`~X$gL<^^g{i391 zsKTS!6H{czAKRX11`J61cgX%1kUiA!>cdVa)$X{ELp1K?%k1WTx%h5^3Y`kj54>@M zl$bi|VE zm(njlaUM#x(%QN)JzWq(ca7PjE@&aVf$&ZE@#irva&Dg9F!a#h0Ee>kAkZ8iw_DpN z)O9Vm*UdOZMHLhj21~c>I6hSfQvpHmlvI>jltsJ(9HBHfuI7FJ_2QBJUu{f7!=HK4 z=%fIznM4(vCzYkug>T4!9DC#Ndof@;H)RZl?MC)6Ou?@8g^W5vD<0nMvrfy*+(a?0 z!lp!M)%)RRelcmDkEl$ETOpnHbztV^wmkDM)cpjPlgra7qAGp(IAE&EVIhBeH=1Jh6TgQL=!FLlhXFoNUeOf4yoYaZODtg z*QADqkdK6aBzf5enk#RY_0g`YIkEuAXUNN=PbFR35!1v#PZ|2clDkgr;y3^E#=@#$Yj9|sIB5oK=1%k3L!3zf@i}Yri1B$0zD#X zJ2S4Y-;73{Nz}S-Zf?VS1#J|9GrmsqrxoOx1cSEFo9E5n(#rL?#bVKfm>+aY=o2tL z?AI#vOUM}`j{NQIKk3okZE~5Yad!k*Yz!UPS(1-eyCD6Jmd1I2OY6*}2^ESMY=`CZ zb*{Vyogcx}4S^Q4FIep*YF+wMHIwtp-Ie0Uzw_rqgs$i(}+b2{%vkE+PCH z;#%mAAl=iEJz*2DY$peodGERZBPdLPFXI<=O8(ig;U|5qKayAcn(wOa z&J^z9t3wXUku5J00npFeX#c>#X??_32uAcyb&iExELY($1PdYt?)l(?lx&l&ILO{JA^e706V?RiZf!=`cY{HnP}>AQ z+~XU2%_1>IQ-gmxh*T%>Q+XcgwC^^UzrE{b@CPLpGI#_8y)`V_YQd0pEy&MhZhkh| z>8YS>_vh!%$XjOtvqS0P@Xx>eaQL(Bu&V=nDKn%RT{n}Sf6O!=r(PVp9z{kGl9vwE z_kI$)?pJ~yQ|vNud@n=%!Y^-bN3rcy+!+L}2EjtW=g9OCFoMK9__5l%H+Y8;4tyMn>A?r`5TQjp7@MqU4-WlK`#^1j0iD(Y~rsvaA(q(@IVI zJ2$TC+Ac5gxSY2F%4*EabZd+ai(PyB>1ppUhu-I<+nw<>(2T2iSMV2J zd%(oa&mqxJv?Oc4bPBqu-OY2rGp28oJrwXhr|jEHOr9#(^yT-t!U})ZvfIim zTBoNAgqT)72A#*tp-zs$AxzBeb1z7K65eu59DWAy-E?FA%dh*v^gWgy%=u$wxV_}Y zyFzaLeKX6G;boS_bEK(tp=hdPnv? zC)tH({Hoi-2NJKh(swuz{eA$V=>wPM?AKZK|Ii2A1D2 zKDwJvm_b~YP^bx5!C|s2GnorB+dlhn&7zD#5#BOQX>t;Syf3oWP9)o6t88X>n_tk#LtD z627-jM!TJNz~9V#PFmC3>pA)5o7H_Q?HICUagb6Thn=cRO}mbbgTU_Q*mgSQ`I#6` zm6^A=FGO}RJ}F}+sF?mdF`+FRVVtyB6h^uJU%(tiUlmE)Ww(Icj%{f?1{P@_cVpj+ zD#buSvIiZs{q3=lyL-EmMt{r|u)ifEaTH102`k;LP3&9Sxy^h2Oj4QhXX?{s**61B zV$Y#Gu{qqAr zC&AM!GRvLtr0GErQI|VtYx4F1*?)&b-h-xZUoq3`PkTdK8-k`se^1E#12vXS-!&h0 z{XVAK`7ZqPP?4+Hs+i;kvSm9M1^La$jHwMbPQ<^~V{!Qd94@Zwyxe8n$+$b1m|Lrz@j+bmpP$hhO{cUy(0N zZharSGCtJgD$NI|Vc{g$8IyBvUwnhW_vId+bkcF| zI^FpX|A!pduPq_#yOJ`ZLQ5Wls>ghMc@9q>Dro-``U58n(7@kAAUH=l^=NN_w)-Qu zo_h-E8ZBo`>5HC!@bShs+N@Dev6X>3s$KbPGTD(DF;VzZMqG%4>)`O%%t%OSoAjf_ z-GDF-8S&XiVj@YW>+EG0IAKugD^GgFI5K3te7;HA!H^}41n(^w!R>*ccVJkXsVF8)$kME_;{_vtf zs*kd<0R{uKMzB`?tPLmg*vMQs{QljB8}*QAnKQ)4fO>HP4PC%hfJPxZVMHm6`e=*X@&}z|V$2gLWa3wqp!#r&&FB z=*4_Y)I{pM5Jv#@n$(6*;Hcx%ecRzAQ?JAYsOstB+bvAQB)d2s>|^~TC)3;Jj?S(z zgT$9QM-Fi~V7T6sErZxy`-=-hF~76u=Mhj}x%B(_6h{Bmw}nmMg72t-7zZ{H$u^!< z)!RkL68#dwtAtZ!wKBMFOWE#|g5|QTECCs8qHNA89nI?MM!^11+gw=Z+s8!yfZUNg z7KT0i9zh}Z7v3Z{%wAZak%8IirfwnIg(^Px^mMam(I`awHL*RMDr1;hxE=66DJTF= z{ckuJ;PKo=e(qdOM&d$%%^(nRDJYBu`Vb?e{soD%v0jB|lw|r2_AS+@o#Hi0@*12tiP%r+f=ypbZ&Q(&)Y{0%>Ir;H^KDKOY z_T)#rU(#EC1(gOXjlq9Xb2cRY2Q>$ILJl4K{PfxY9`J@t%na{8W3o0(W?B4oiLlJu zD|Y%TxMe(jePL^vTF{O_@IXD|dWV`ds`*b$mV%xC|ClTW_uyjmQo&eF2=2+83?FDM zeH!|2bP^us(8T!No}LCoIZx#T5wm%=W~26hq$#9x9pw(a-^zxIRr8wuBxacz^exizksx zP}0Lp^0=;Ed{+^WZdelg4vhLh|pZgY8HE zN;+v8oumyYDz1`M#v&+h4~Wr+zVk2i^l^0^>5K^_0~R3eZ8M-&l!QBtI)d%`>d&uT z3%i8BgYGdv(1@7xoQ-)qT+78_S}2A-?^kVK4e?G;QP{o;+G}jOPPK8G|7vt z!#6T3gBDsp7EbQJg1mH9~OD2|o{H74!yXPSrG5qX4 zEeSx5haG{OjF%|`(X!BOUDX@4*Bj-LwbBp}AT2&?XsGqhdNxrGrA96C`51BU!!Kp-j`tYh@F?tP}8a1r2OHh6IC4^%E|xl9*UuuL@jL#BHL+!OvR0NKf&LcyyVI z2UdC=M~n%6z~1P5|idD{w7>0 zOUgsuuzJoz^fKn<>Lc;Y*oEWQgg50)+2}PS6D4@zK=lCZ*fA&e;3Z3%Y&cWaGP}X& zeZ_ba3Eg?J&~28cH=rWttn#$999HR6^=7N(nO=YO>NGrjxNzRyF(^NVtkr!wskX!^N}o8o zh0njvHEv2;*cxg`#7w1XZgK{VxFR`Eeh)ZmI@ftkY#tK%_PqV&(_rQ*$_y^Dv4^Kv zvifH&z#4fGw0G+=OAJNX<%5&pP_j2|51ui-MLY++0(Cy>P=&<{euknmD`PPrRT##N zj!gC4k`@rv#GE|xzAtKl`Fiu^d9{1*{QD|+6|^%G%_;Q{I(qe8M8mqu_ z=Rt2|Ef4VN;i*4s zwy^;uoS=Po+TAckQ{lF5%kR{u#5|r7cC-HU7 z%9kUfT%2gr)B99ZtZ>DDdVcrb+Xoy|-~uJd2;VZ3xW(=DF9HOCADnWEicb|f(z{~U z4gy+lO{L{NyGtXq+;rx+PD>kd(p%DMMN>V28M;K+DoRFb^DoQ@4UcF^rjh?xTqerr z{Ghu%ld2Pb*NQq)Dri>f?EHualO^Jr&ABx-o0B5DDvF8k?HgD?tZ(zdp&PiHcLJJ@ zlkeR|%M{3$@cpdjbKyieGaAithyTQ%VbtvBTdUCPsK^~RNs6Kn{W zcMxl`B~uFP@QRJx@6;}OHS%vB2apo~SK;+bhdZ^MKT_pH?;b3F5`@Uo9b+_PyNLyY zw(C}!3-rzj*7NKDiG(~^j(r8xBuXPS?b(AiAD;CO*w>kUb@=Gj>c5q*%-=fHt*q&m zEg~ah4jPg>uM`27(1MdmBS%YqR^P&k)0iLr7V6fxvY=h0N8Zv5?!X+? zU#F!)Iz>pBNz5;&82&s22Rijfdb1vF$M^o{2@=d#n9CgOUtQR5r)g_9N0PPP|GpOK z-o)%0g((92l?#Hyr)Lvwg{|wqJFO-^(#gVum?1TQgW6l)qak6Ge1TtYZA~g82NEYc zO2FmFa(H?;-M_!ObGPk2OZu>Isi<|Den<27!9s=ha)JCNhN)V%eT!08;1BU{&g<2ae8$EE*;&LoRYe!jAJY!uY z;?fWZ=x~JGy?ub@)VrIpo z=6{Dx<>{Bq--u~{H0<)kUOX~-B7c{?_L-0CV453O#WFEu+1gHE@^WMo!=#$MLVtYB zPes#y#$@wQjXHXzp=gS%Q5js!t$Kp9n-qHX7*4D z*@8kljVv*B@+gi^Up^T&-W*m3OheI1wUocP0Ct08gAyxC9zcLL11s;e7gp3O`X9ss z7ewslf725f2K0Y$Z|!i3R?Y^!x4&fv7JVJWvPttXUn|bu6vDMAJAiG*6L%$pBhVF9 zmKTv#zZH3FhH7oZL3=o7UVY=8=fMG>4n?s5aeypQUnO_aDRFk7=@*{)eV?xpjDy`n zUGx(XS~=!1qA$p$GUUl0IMI2&>e#5?*_v#0Sm^I}_IpE3MOkC!0s;Np(06F{0njzD z2sz~nlN}blKQugXu$uE`A?8ba?YZ{yN*!*mel%bSkk|bk99Gd)IYmVawz(~Tx6AMI zrl;Ima5T$W(abIxfGuuLH)q;RkZNR#&rVmh6r_>0?qa(CT}j}xJ^esh%pdeo*dC^n zWHU!7cu{k)&M|BqoD61^9GnC>LQgwa zKWh{+WPjk~<)mR?s9s!(di(cXK^>$F5AE#akL$}SYg$+YxA}t*ZrZ^&6xbWUV-fQN z7}dW5!XWNW>Up3V__v0!teC=6n=s>2me4PNc<=ehYMkSL^l%3jM=6=x|5?;HE0*d5 zzrLNd7mPnOxW=TbNi+{ah>X;i`uC; z9p!Zp0H~V)IyU5@GZs+obBJAd6dPyQrcNaGFO?7YUKa&T&Qk4z1>Q`JGk1+Ae^J~e zJyO;3b;9|*#!)g)#Pdfjzc1x->X=wC1+a5W(Gy*6J$oouUOv&VVyt9`Jwbzd|FG1= z<9TpVmA%?)W;w~&u7yYh6}fTdL!|03>(*br*OyRzp0(mXS07QF-FrInILL|vae1wY zVSUbzHGTS9eq+Rk*YCX$OD4RaHm2_~=A&|IUUGJsX&hNQI05!Wk*nSOlgX=>s0j$Z z@GhSAMdW@4vTUx@FC`7>WE3jTo0~sL`+z6ZzG=d`W1B|_d2@YaRleB%LgP2=o41yPYb1uvD6^GE4g~3 z%U5gnGMQHy)OwV8PkX2rdo+~ue~}V zZrtmnjJs9NHy`|dsQk;CmpB+;Y0O}4mp^P)=&w~y)u0G_qhe(xpz;Z}Fm?~w?LnX3 z{w=>a;>GZ5mL?PnyABXNH5UPgK}?Wpau2X6Qm_6=RvvB8HW@-TwYX7tow6iWuk$9^bW^jiBv(%Noxm znM3X%Ih(`NH;ov#z}N3zIT-Hh5Vi}sT++4BG!ieUS0$4W9;-sHyGL#Ni?86#PF zmIlE7)Ax4(LNI_X>l322^{p~ao?DF3oLv0up^kN4ML9)J^0Wcu3)bbQ3Tz76|6ZFh zI{r(E^=}&Q>*=5!C}v`S2UJf-VcP83BS5yZ0#C5FrY2W02{@_BsoghEs3vWJu zQ`2FwMqxQ8pG$)eU*uhIE#1YRkdt!{M>cWBBdN$et#u_{L{gDccxruOy2T=H3WD$q zQn4W|66RAoISVC&_ExMG^p#d`{#ec&i+1x3ye7Iw zjNG4(o(;yGxZN6Nq+4C%<&SIReZO*oeEG6!embZ2wYLk^bXkn>mFrh%@ z7L9zVq>`ZkQj&KpNQsmHb6p#@n%os2m?O?fc|BeNqDz@XURZJ~ii=I7u{L7=iR{Mf z*MnD5m|5E%$UqJzp@59a->k_?ac0dma(7aP-A8RVz{IqF;{D`Hpra*=|Ko{2B7gmS ztn=JN9q4Mh-Ph23d(9V4`TZZ|9{O-}{U)sb8*M!)>AeAKL100f6CeNVzTeI_YzZ;3 zLA~OdV!qDgIAy69A1KN`wfbX6g-Uy~(o-237_Dg(t%cGG4#L$&xX~^pKQlo?i|*DR zvKPH9k1)L+(4aiN;{a+oi97%7V-1?GMV%LT-|T3|cg}zK59T!Q!453xy$i*R4$D7$ zYkGe9<7Dt?nUv-m%%Ka9B2CyY|04!~cMEfZ=FrIa^@yK?V`Cl|GkfTL67&yM0T8CGJ2-G;%(VQlv9aM6j54vU zu4`$*=ckx+b91MqrVg)pkq_55G(e8qn1Fwu%nisi0&(=33G)mX4F(4XyTZ=uR z-hVU1Pb~ZUrSQu3xA(mMR3x;&IWRFI6#O*X+rVZSB~w*jKiZ{sU|(NcvV0bK)7yJI z-N>yeDgR{2MAJ4VHa4Ko3=xdrZfRtrylw_^A6Yzr6=dAoxBUYHrK9gJP=a3<7n)Jb zqg{Bw`Qg;eOp|juNNzfJ+LQrdFH8ZeIUq3i1jMrP%;rOJ!*L;At3%A>Px5t;maik^ z^R)9vivuWP6r1P@98R$~J?`6aT`4Y4E?jW=x4;lM1#wZ+AYg0S6E1Zys>>rX1)}1C zSLMjsJ_6iM5Jb2FLh2h}m|x#t!$EK?NcW_Pl+Oy<>)B)TGpXVWvqS0|C9~l&z~~Rz z`ts#(7s(wv1@{tq1cKL8XOBBYfnRBsJj*V28zMu0)53E=wxPYaDfe( z4L%B97Rv6itp-WxJg^CnKntD zt`s=XpaRx|7;La5W&Tc04HbYP=sd+w&?TkB{>Rv-R7^Ha*36~TYp{8G#$rO zvEwC&QYfE~;z}X<4eqtWv9S-N`>ofbwVwVp?1^0Y<+8IVPSwOBk`~?x-h*=kvd8K+ zBRzGgMeCD}`7WE*n>kAAEiyivUH#*(n+A#|b{mrrhtr_Lp*(QDF43#`JpcGGmA6?^ z!5C<9nj>FXX+}mC&dhIz3enx5h!pWaLgY*i>qlC2H8f03_63cyFQ$f%1_>rqxcq#0=FsHr&EloygPy-mEHzn%jN~aopUWn~@=iQ| z;UngkRBbTrRQKU4X7uhOtcqV%?<5;=^UhG&*;&NO=E)1IIg}Bx2 zuLvS%T}8c`%{0EliCG#x0|!>laLpo{D0I<wx$hR0x*IIhdlgeRXQd#~fu!p!?1qeRU+A>@3XZ}hqX z69o!J1r5fJw93zyiP6!G{?Spr-1TJ@65V!z_!*f;{@eW}GVm48lOW^|$f;=dG+#Jm z!ma<{BHQNAv|egrg(>p^S``viJumV6ZG1anLea+BT;(=)-ujel<3RYt=85L`WnT?Z7gRi(W&|I^? zT*qdlpW921CXX8nphaNR2cp@h-D~lSYs352IyiJo6Fzq{^{>n7XypY+i!KxH2aX0c zYUn`7kz27F{9-xQ(j7^-^_;a$#P9CK6rrVC^OR!gOMOMh1xlf)`q+i~vW0rr zNAzXBao{jogtM4s{C)K{VfD8AZ%<}biUN*MvR^D3seTwoj~mzMO@qN(3gzab+~gM? zGXAtbyO*5{jj$)*4)^u+S1G1@-zWWodiJAnfT~le8TbdaKWc+r+>tVQf$RgGHenK7 zF1b75d#=0#rzL$A)Q+Vpbw*tIi4@x56y!FEIQ`<$-||UpA_PZU<0$eIEe-PtwQ_6h zw+pCBn(@m1rhw3sg;-K8a95~9xQ-xA}>|> z!=l7vh?-AjnPU$snoy2>R8%gX=3s77gTmUo3-|&QlVFI3WMsuA*oKC<`pmJ%hHor) z*6o!}eyR%_vc1_a;TRcQ%(5RXn&Os7&CO*K>=tYmdp1Atizoi%B#4|Z|EmBoxrKvU zzRs5;_Twg3Ge28=l`VFxA7!C3jxgqNGoST_Ud*nhIV2-G;Ov*_xc!mN<1MR=%|}j_ z=Cfxdi3ckqEfM{i>w#P?HA56^OFcoUq$pyjqzasgFtKlp%6l0d%yk;bM8xa7 zM3FtbK3VELZt(#@U8uJRjJE#diL*=#x4ctOl<43mBI%TUpn8LZYw1)+JFb`nD=357 z9$11iQPSkL7jupw%mYjNL@+GHdQIh@B#X8gu0$C0>lUAKCGGUXH!OHjf*x`TdaQy$8Pb7r&C{#Io0ni1~ai2hiLIV|wV2!wYDYLq zWAqbo{UyV7bs|M`@jtk(C!ViDrbUUCQF|)By)sVe2h{v~x`S}ED%G11t~a=_;t;Me zlt6!93W9vwO<*!5?q=gp6aGGn&pQ^AJ;aH-!usP!u@LS~GH_xBcWr3HEop7}Rt{$0 z9gGR4n|l#GOT_O`872)Z^K9o+;+~4Bq@?Iee-lhNdFtIzSrxL>*t{B@84ce2=dv|t z-~C>;vvHf=)3Krto#lpKh3kuZ!&uOW=`V)4vlX%@T9&ELEZprYdYjFWJ}HpbZuY1s zPVMjrDw~K$B|p#C__Q!V5*yBZl3iCC=+|Z2y0P5u=AaTbiwzq-m+E3yJ@jW)gu^F) zY?w?F#l^hxElGI*B3eq3=Os7BgEDA<-WH%ZcVYXO^S`q{`9I&;{NM9Vql2x-d9-&J zN__#GM^LdmIoW!!)tb+WvdBedWd-AQpucsB-yPE;+Vgp#B=HIG^Lx#FH}8@BFN96CMaS${!?Yi|Cxs zd11Y}h{8SAlBc-?&y6<{CkCJJ-m@C}+JvOD_dd(8AtxoHv?thOl-}3hFkpGQVKA`R zEA{CT!6;&5-x?8aZl#p!kd{doU{_d3ld90mTD@O3vDOp2b>lKtuuT{!JPl}h4og(1 zf0n16=TK`gotW~|$BZ>lqg3zF_N{=JI~`;`j8_xor_6tx)8x7d-al>}^nTfjnIRqTH zph?OdABcpY21|nvw%EbgpuKI>a+4kryiW~p-s!E>4Bj7mcW`{#J72=2a zme3O1;ciC$;KstCui4pLaA0{uz|=TlKr^nCDT>JbyqZY)v#QiMaBFDTfHWJ|+5!R` zx2qXezT;23afKEh%T68i2<+HPYNpe1QkSksr8fL&xin@33ult>Z*C3-9;c64Sx^YQ zN_+gT-Pe$V-QbYyrTQ&gp}DxXkDC19j1Lxpn*r+au(+-1dTsgGY)1dnFq!%?y6Dg? z?9Qf)-$sx4(fQ8eM#L8jU3jyXSJyI9J=o0Hsq|nD@kZXdE4dk5EmdDTR4iXGK4{jw ze5of#yL`agz-@%(jV1{KxZQO2C`TRO=V#=^Y4I_CLOpmC@OR6Su<$mxD+PBRi|e@{?Y60*}~ z#O7=nxtSiQ&~D-p;D?FtA`S3uTBtW$vXy`x2IGf0P(x+}_SQa?pA}MAmGcc7q16pa z@}^y4-WX*^*IN28=ystE zmyH{en8@{dSE{5TABP2Iajn}H^miD}h6s@sH;*FQtxwFm+^Z&>IVp`#$EwRpgeKG; zQ9gVtN>b%ARTmU8yywi%!IJarMQN7eK!?d>qv)tg&3LSfL9_3}{btLnBi?3RmFgqH zB>m)Zb_4N{fJGD@+trclLfdIE9qb8lv#T@WrA^DI>$bBbAc#YW z_tZ@+Zm996uc4SsAEHvg=7mHKzJiDxMtfF|6gW)yJfL-3TSHN6vsh6_=drfcsdd(< zA>7YI-ELf_dX%x%y-x2$V!)!*reII z;XlqYRYZ3P6`IcuO)dC29Vds|@gp3u3eR^Jr{DTOOJ5k#f8f+>dV-q3;g~|6@=pHNvJC__vrnf)pFY%OQOD=I z66~-$lnNCsPH%XLCy|7+?vB?stEU^J*$&zW=|;Mjdh@66o?WW+D}hNOCy{kI23$+^ zgA*ehE4H?cC%AY|Iq7V-sWK@QuC7X+*I&$dZ<1z4beaXZ( zLfLn_)A`{Ha>9(UOa*9QOI;YiD2S|#A4dB~B59pmh_A|t1iVja8{L1q`ABMJrVrWR z*>y-))`GmRm!ow&ApkO)cW+7fI#ZJypsL$er2PJNHGsMJfW7}e|Z$$eG8Bg zS%+INtAHE>P_pfBzeEX1eY|~;P1_pzqpBc1Y4LlZwBL~#G$_{TUiuS`PSq8WB35(#R)H6Fag1UzY}S(5%6JJ!IAJr;O~H9(%J zHs){sesHf0B_6vV9y>nC!?=@7`09HQzo@80W=1CPeoj}9R<6|Qw*3Lh%f_#^Y5H}c ztl51xGo`_>sdIhZ%s~--b_uuE6WJSvL_0d5Dxz!6com=X)j4<4MJe+#y1B@RdO3Pn zeStwf_V&{E0qW!i<8*B`&3ISolkF|l< zuWv;H!@TkiPm||KIW`XQ22Lh#9E@2gVqlbe(eQAC@EmBm%Ai;*D@!umU@7V|1^QHE zY!=}^-`~B(W;dY$Pt@p{dHA#1>pFgPXtfjB!PNLWL2u%hGTVFfk;x+`-8)0=Kb0-; zA0yt9WcRF~gJliW3%5J-CJRlq)ozqShebqas!* z8b;goslLjoKz&8NRC6-v+Fz6xMpArDtUJ+}*1hqx=hpRb`Foj90>1~xV>PA4goLJA zD&gBlU{@xv$fM88z-U;oR9}W#*Aurf8qbZwSaatm()+C!?Y-YB8XwSs#txN?P#6Puvh$XCV;&m|Pp{=M>1EnU(E8h#+8!d`GJXG)<7#zsFBObWjCB2L`VtKD3quWi*Us0rd&uYpC zjDr5%c;$BQgG}G)xg(2J`u99qyhz$;SbdpL(uYSfLAFzaQU_bwx<&~6b4O`Wa~Nu@ zt+0!|U`AAj1m&W{l?Ks$*O|HoCpow0SAxm4Rc3Z-Vc0onn^G(zTCvEGmoeaEX*cp(e&V=z z3OuqKM4|WS778>LCHpltl%P*$@q<%i{qr)+*g9}wk5FF1F;A2p-rx^4#HZfy(k^fED>9T2yOAHR=qo}EUILuyMK zB`r9m_m02E;ZL&DZiL(xBKUl#?f!hJ%kKEYDSrOI86omimA8RH)5%NGo%3KMvdp8; zOr|8jHv%07sIo9pC$^)Bj!+R!nkjjh=3o@NxlihPqaPl|lOG^@^KF)lM9{J|{#UZ2 z^pnpikMA#@vlfNOYnX+cmBQ$Q zmQ%NJzv7i2W%O8sK;WaMmDov1sCS7WF9`6QJV(bia%W~5z8?PO_CBDGu-W7L88x=K zd%W1$`wpx&jdma!rmO1euo>k8MLBNTBZ9ZbIs`&?e_PPh>=;!8=0lc1Hn)> zldgS#*0XFTz0z2*UoEoIezl^8n$h==^;e?-Utj=Ow_4iT6XTxJVuiSe

H zK6mEi>s=dqBhgsXf0BLyuIpq{sl@nLG#(E}q9Ve`c$SXLBH>6hpUV!PIn~kC9g0M+ z?6GpQnem7LxS=004k$sQF86Hb&4Fvi>SH%)awcW0D#L& zDZ|VA%2V(DRD-O)8V&K10ssJDZ79`97vT}I{>7<X#6q%uwHo9F1polRnknV{ zhRKCiy*tn#loDOx=i(2>w5TL^-j; SWXtgY00004fA1UHKy(@|jCG%C`J>zp`^SWZ3DE-1RDs*rC z=g%MccH{s2$iwh_y%jeEl4Z#z&+mFL*L8KsZ{M;!oX)Km;E7=RaX*c&0ERXcP~H8t7@JW zGGNR%ZJ3diGGZzbLp)HF&5W))mL({ILc)h5Zm}r^$`4hNQLHSN|495GEapH))@gfDgqxO4d1Nh&AdrLK4Ft&-0*Zoe_hlHTA}wAaneqa8lC0qvwO zcUB)8v>(Zu03CP9z$Wz?yeW-Z1! zjKDNa?&J2h2r-1ApFUSOd}c+?O)&r6hDyw+&QIOBz7QvDw5WU5Le6tU{Z+jcm(`VH z+sp$stEWP0e>8*DPPJLlyryz+k726XvtF_HdS<7xc~_s^qDuDc9NO+iiyK zL2lb64})ZlFWZJgN#z%Zi?K{vTnXvvVvdejP(OZz4veiR9RKy1t1)5sxxXTR|DGaB zS$q;LN;4x=Su3pZ81>zcX|~nwrm9yqY5|1DY?m(w#N(W_u%e=3qAfAem|CtmOr&%6 zz^S>S*42hotrRzuGGcGw8=F2_ z&(}d48^*J4F*fVwK8yJ)QzetX$5zG=$Xdl*UEO0;B&QhS9;R4P^5t%2q^0K??Szx{ ze5oQ}>^dIpaKy=}empuUn2kL-Med>tKPaXTmX?(jxE%{+_+0(G*quz~{_BqV!}mDJ z=l(2J;8q$73v0q9XmD_ll_X^=UL>v(>CU28I= zCQFSSgn&*#Ma9dgqwDv`bkue|cChr5eWMjhozYcgx_fIxYp%tO41D(7oH{%@DUN2m zIll0qp!1G?`^}mR7Sa~B|mYJYx0Kt@I; zV9g-|p~|$dYtH`~da>bgyzBv$<8qXcZ|G`4m4a6T4#wA6u$7Fkx0mW*F?sC!7kMgP z-kmCKhh7|AKOdyD!i2O1R~l+!ghAG*DigS{@Nm=p8K#S4VgKOZj}@(s^fF1U$emLq zvlwLABI4q_T8*}ab}N4kl9sjM3LUvAAlQ{1S5$hH0|+9oci`$8JVK~ddkxuhp)liI zpH6>8+j#0y@DhCd_))($6;qUkelTWH2Nv-SqkD{5_T!g)^owQ}J*SIpDhx#XA*M#7 zdd>MJ2c5@Dk`!5LN5`7^HZODxl*9JBy-1ZWUz~bzzs&j}^((5X%2`^{4<)nousUbC z8$v(d&B~~ktD*FWsNz1|%_8tTKO~7YT)q!E30Ky;7EmTC?jnht3DCdrU*<9QUKv zTrw?(^l=%oFaaX!djEd>_;IkDJ_Z&uI0MgrVN}dElYKn}NhO@T{Cqklrq%k{Hy|FOmu=BY55|d8{=MI$ zr2YxFhcWYsIaMG=>I=LWuck=Pv7Oe9250G9+17X^89_i?M$B*;C&R_{?RM(GXwTWl z1O_f=S3Z9DKwjB;hMdXgLczp@np_GI=ht!Q?d_lYNp;I@voLqSuhn?8@45(1N9v8Y zuXEd_I6El#8+)ZiaTyt+78X*!!!2&^BNz&+j0YHg^*7oee+q&%!iP8N;}9Y{jbv#@ z{_&M^J?_`9j|tzWQ>kQhS7$r2+C=>jN)?~*3*nKY{^zi6MK`Y0WhARbym8z;Hds8KFFr>b>Lk=68o7FapT&AO~(52IZ zlM2EkrIqc{vlTzJohK^sS#!A4xqQ&!{B-2fIFNeAPwyX_w3#C!qz8s&ZS&gSg8E?c zuM?V4y$XsPZdT`7E6dN=h5*!Cg z4`OLq-=<~mbF6Q}k2?DkPr+gcQtdoevl*9`qAIp-WvEDw&0_f!KP&vnx&j7fYQUP% zaLxyv-@nl+h_OVWC;GDKB1Dt||7Hx74KBZ;DxylDmdkTHCe1~v@a;fo5yW>XuBf2K zIKf1O%f}8`Eq!i%_&sshyt=@OB0!4XeAKt;2;D{UuEq6m5P^)*WF*aQzTJm^D2ZwP z>7LB^oHN+zaRf7%AI^uACE%Oi0$jfo`%;M^aW6B@{i`5Feh7 zuH@lvc5>fD?b9bjx9ca6V?tAY1nb$xw(u8}1hE4e)dCMbb&mDKitFpRR{Y&+C3@ zH!g~O=z=a(=!j`*;*pk-ao_!cr-+lnb;uW=@HvI=l2F(CR8PJ9O@_Rr`X=cqwq$Sy zKBWudfkwo2D7yP$xjC2J0)l_k~Q`Sd|Ety}o{mi2sFxO-B81u8JX=lQ0e|MWISNIrj;{9Wm^w2BIj)7HZ) za<~KCR03FR_)vfNg5X38Ec zEa*nsYcHJt8^Ni;Enk#j)k?qn+y~xw&RI8b39qOuJ8g4ysA#HAb4@Gh=*T@k!G4mL zUp-k>e#7J$UREX##i)6K1!7so(GeLI9;Kk5fcC&-YVcsb_E+|kv)OtJG+xp%Y0w&~ zw<2$xUwaj5EDgRygO-u~0=AN>OhShkYVP{^?7yod14I&WrJK!HJx!aVGT~7uW2Nz* zOr7eEr% zqooffCcM<2?bZ9Xoz`8y7`@jx@kI3&A`1@>=OK2+Q4tGC|oxIwj4tuYG;U(i!!&ZK$S7PP~?odH4fbr!Ih{AO5W zWW@B4;^LgDledre=*H`Pmlrz|s6~_WmZ=PC4~+x}@QRQteH|BcQZugeB(vIwb23qj zH%@T!UNCQJv1{Ek&M$_P=%g=v6>ECm*efrI9rM7{R*P-& zC49r(W54`|v8kzk{gWL@+q&b=VhDtqiz_57EUaoW&FJ=QjMp>&D+Wcw>wu?Oqm`R? z9l|WHl8cC}W)D70qQay_5+>uRa^}--Dbg5(L4_y@GZ)etyGpNZX(WHz7C%d%+0hS9 zp0(z3I&O2rCV5;puFG+9w1zIXA)HR~Tg|8qsuU41*$aIC#mzcm;ial)xvX>R{h;reZ#Fw#Z-yAX8ObSx9)r zGJA^qguO)6o}8ibu#~bG1@GfGY_T4(1u2T#IB>2F4GoJ+oiIsAd?3BN$oDFgtwo`6p7u@T*NOZwffvH1 zO!JcS3`Om~Opq`yp}79q+8_Dv#Q*oBFrxnbl}g&1rkm*EtsTtS)564gPRck&_hyc! zu_||FR2W1Clz(iy3bKS=I}y4{A@BzO=ZOZ!0{QIxGsRWQ?p85|+84*T`O#KqQ8;q- z^>Yg-f|va!|4V3}RXu9xXrILky*G5?V$o9x@I8CU1~@4Y^txbLS6F14O8 zaOgw5xZhL{*{gK5 zitf8I)mQ0cR6EcooK_p|leaHiu&qZkm=imlnt@9okiHA#Tyh z?Kl4Rrvk@VT;%>f%31$EjM^;$h5>gz1UUrbW(nrPoa-ZhSWbqz z9A#HYLc#3X>?_iA1V33gTN#=>EA6P0O3LMqn0Be4n^wkvGkOG|Gc>eVSDqI} z2)zGLDfu_d|EHc`W#f=4Ox!EOOAzP?lXJt3%diQP63vnk;>Kcjk!*{Kmde|L>pGzDT zPJ$iV5qCNmlEwxV5Gd!Cj}QHJ=b!uIomBjjdbg{-ZQG?~Ja{|qmygq0g;a@47Qvl(oDI-GV~E<%nsnow|}Pi&+YgU{`$yqkjKC(Ysx`97qy_4rJU=VT1Vb#+N2Z=Pm4%i zd=4*PH8nE1MP6Xg6T;@O_G{c_rQIiXAm4%z0iE<6HFe0`+??sqLaPURb45Z*io0xD z#XioPH}UcDfjtvELYwEe0FCrn*2iaLL;+HE%cEmOK}rgVfPkRb^2TaP&-d13Yb0IL z*_ngP@4?}>hXr!uqMU{X0k~RRT)?!Y4Ob<&AP&yAq?v~ELd`!t$JguUw}^0Qb|Osk z)opimSH6o@UM~Tk!Bb|6`|1@=K*+0X%n!lEfp11#)KnBzamhOsafl-%SV>_c@-Phe zUD%Zt9u6J*2+ZuCAFo57bbN0&bL&8J<^Xu7b$g!QqodT!%rRD#Z4L7^@Ti0zU0t~V z0fO+BlM}agepttCSsc8=%gg(YhUR>bp%SVq?00$NUup0N^YilsMMc;oBqQsw!ld+0 zfO3{iVTCT5l-eH643CQ1$nrY+Fj=SOvj6ADYZn)nR?x;k37r1Iri~Dlmmg*G=#M6n zG&X*xp`o#{wS|R^-9zHGG(oEnBm{^uVQ_WuYK`SIM8YE&V>pEkwsu~&!}pPvo*r2y zHUJhOM6(X`H3fURFMF6bg295YXS~)jBMXK zIw>gttl2F7yc--E;VQ;#E>WOM zi_ES0qpzB^Q0HA+=mcs2IaD%UhfYr=k4Rlj%qP{r=RGz$m}x~e3z(62?$-0vZyw$F z%`WB5WV2D236sPmM1O^=p__-oI_->;qmkU5^6WEIwqp6m3O}WQg#xfc4|Go;8V=v= zjrl#XJ8caEfq~EMNLEP+^SJd~-n?mW(@{xRS3XX)w+LV*1}Ve`oLd-4#vKVUyQ zKSzRt0neQ}lFog8Ija{6kV8fLo#h)QO=npGa1REH4K`7PoDtxC-%?UisxEJa^q#sG z>aAYZ4Nu!18eQl{f6le_uwikbOu_2-+TDf)l*{<_qN=i(#@U0_^JDZXQ!}>uth=yC z)3-}P%4?JaY}LZpwaW#GKK@64KG-~oNW4<~PNqZb1{JccKKF&7HvonkT4%e8+?~aa z?0EBVy&9dHEzgU-1AjbRCS-d}AV5skhWQp`$n7{uQY0O~E;cBcQ{hr=EGXMG-a)gP`rO1pO}avEiEl- zA<1sg9ZbEy&|pIZHLK&M^VFb?I_)&Zo!5k(vGx1i~0u!1O~j_8E%5H*KLUX z_2hu_oqtYsZMEfrFam8hS6$`m`%Ju^=Fp#duiJYsBK&FgXsI4Y9zPKpzPpV8E`C;k z*M9&0ZFFR2Yz(ESrY2!vV35#*v-*eCVTKh2Y(MlT1%-ZVuJb-3yAyE60IwqP*Xvj? z%Y6-LYczA{uyIA+(~}QsVq&69ucLipL$_mPW$oP}d@~?zf|0OqcJgDJZnx6KWJY6* z=W9$37V0BG?F0>4c+aZ-!_6t9UVH0+&G+cwV|DMPFbG z+@O{|;pw^uba!`Ce^Kv>MUw)>pQZhV0+8I^SMz2ze=zm`6i8)w9sShyI;_`rnO1GR zoY4Z5q`1NU1O}4F?Pf9^1B202v65^$SGg*GgJky1UpvWp-IoX)NdS?ewQ{f(J>4?z zQcRf4;$Qe0113^~in)6b0N+%OI`+q!weUKvFb7S{h;V;?5u00EVpRqu$~-06H^Hk` z4a~0sSUF(Q(8ck)j1X~H>!3J=SYL;+BKnkEi zoOY+JJg>3vBN$^^ksgcUDU8}1>^0jM=cR)zH<;JLeg;vgp)nu(DkVr9C>m9Uy~r;} z=mo-L@H#=I@VQu~f4y5i_U#8591!)rwv8`U<=gk~;w~L&0rV zm8n+;rrWiry8ZPC1z!mM1qJT_GnQ1f53mS171fXV3mN90!Qt36XA_cSFXvn$mUQG- zIQH#H5Bg%+ex1c6=xo=n7VWk{F8v4g#H2eI36u+vIA$vK#`>a(G@9(meLFhhU?($3 z7V|;UYs|O)PtqeRIfpNHxYwwE#E5)q3HrVmaRTSeu`F`K$*A_4jtz~EM*O{lB2C|= z3=Q2vKyR7hd%7L?CLd@NOp%nll3fRqm@}8+ZUhN?7bPcJ#0O1(zU0@nP;%A!$~M35 zWIov+_Sn7CPbqJTLM$^4ST-dW9F;Z=4AQ$&SQDwWvWfnm7vM*1rJHKCrzAn|v@K-W zmmad@y?$10t3Y?%p4G3>HraGnb`*8=T_S2yA~FB=mZ$$WH&E|Si-ebvM6|URuV^jM zV*7iZ9`D1WqiyO+ZL9$HQ`XfT{qsZSr3SwchLn^+J3BiPw;%!1varzb@ugi{Tu@&V zIql=x zSElZ&k7BQ_zNz$fG8`n%^sSxoPn0Va5N^)lkO`Gdr5Vvi9EtmGy;2`mD>G$(Q;SqO z5IkboWyxk#tB#Q}r=s42w3!WFC(S)8z5W-y{-f*-j}H{w0T}yzAvvwMDm~96|g1Hq-1A54^BewuauN=6-cwc zv^adsLRpk~hcz%3Fe(+N(((vp-rQ-x$>wU0hKF)AJl{6?DW>}KKGHu_;-5b{%sq7@ zb8|KFiU-;ves{+z!qy|!-`<9YC|0ji@is?T4)fH*{=-c~peXJQoL7~W8&>2H`!M0M z*EsN+w5Wg?EC=McKUSXg?9!$!!ws4J@B&LIiitBm;YeTmx@8 zn$rsq1=)s|E~e}G{hyeZJ?QAFH}X_c-rNFPkxK*v_h4zx)j>3y02^vWSAOM>HepkRR zr4&@yTtp7>51cr%kO%{O^R$>iQIUy~^P3Y(c;E-&e#}F7Q#)9+LWXGL&ka^wRj6ilF;}|A29~K|pf*(f|pIuUO}L|5n{uOLbe5iQzNX zq6Dc5CCH>$0~S#uhK7HqDG4UIvC}6-O*}}RuPsT5&WFMs;X?gmoP|-~LNI7WrzCj- zpHTv1qNa<*4i23hBG2)F z5;s#0G4!WN=Q7i;kDJU-cUPup#E`Bx>p5RMi4`8M7}qEk8V5@L$0{IwOUD|SoXr;V z4o1}~3$kk6j{GY{Dlk(W3t2vWed!m}dX( zC(EC(!g9kBvfN!`cRnB+^{;W=Q=hE_|1y-F-oL_1WoBAQS2d`ATaull?9J@p|D5Ih{^gxR=d?lh#4@s`<7WK zz<(vSs4#7Bfv?rqZ#;a$c^DEXoOAZKu!nI)JIavLKuz^Y=ulR|72PlWd*QeLyG3F0 z!mR1cJY>9UMvX|*P8q#}cbuaGU&Yu@t@vDY$xKwIo7!wq#BwYMtCiwV_WvgQN-?Lm z+@JfbG8nKHGzD+%Z6+9ra;}yajqELLZ|3?6R?bCnD^k3)q!Ti$w0J4HS2NaMc8}o+ z)h1K~4#9!5VeMkLoILrz%QeSorEN$LM&R>N@y3b=0tSpMq#O22*2EUBq}=~VC||>e z)w~a_$&InC%Rf}6@7ym-m!ozwKbpwB(p4ZK&6cQFfD>cu*0;BOuj*nub391X(G)Bq zLYZS#>`9{a5xqGD-m&nMmmVG-IRc_s%IlyyV>r2g@@qpm%?UHami*7>=-xl9cH&bk zD_r@-YR+PkoJ7C+CBLO$#2%raWSi_4m!2C`r|xDFjMcG`)GDN8!2b8FDnywj+mYM3 z(KVBYxoZA2H0Hv@R{7>@wg@aLYz85mR|3);Cd>$1{;zY>Xh++>ofVUsyO;o7>;YOBhBY-4vCQ3v z6HegXAw^ET?me+5-Kjjk1DlezlkFEQ!%Xw$t_E0Kfs?IfwCVt0$odaShHcop1-o?b zNlL4Kwy*6V|Am=ko}fe6AE8(c%bt3k;QJZ{MzQHVIX9=ON#e}yoYtNMHuZt2;yBU{ zpXhbfXN4`IJEypxII602Zv1RtaP%j1hzFa${gGc+YW?nx3yVXPmYbhuiMhZPzuscN zjxa9x2S{H@_(HqyGvvk_ON8KPEH5;Ee%zyYFm-=ze zr#NjC?HOZ2$rqJII90j z=nNmbG_R5osx(G;m|#kEM?>%Q;rSZB9WZq1DaG*fB`>5L5$_2=>o^$xS(<`tRZ**a z$IhFu@mqU|87~fk;N{HM{#$>1H1D>bGKdJhGWx0k5%?zXIPcv@xF(% zbd*5FPuBnCKpK&9nob**GD9;ZPNBwZ=K0el4=G^^YijK$CXJWFMFG9C8{pNS-qHd{Sxs=(-=y-9VGIlXb%Z zJDdBgU)#Ks`Y4kjO+!Js78VoL6L$Z~q8Dyvm!4|D|~ zghxhF($lv(9yP6_9k|{j36A{C-$B&ylfL8XElGObxm%x7bAxD zd@wfN2*++hEz_(=P973hR{ld*(ox=^Q*Vh2h?f_UI$A#H`DQ&9&{HTa=-=wHms{M} zjrvfi6|%gwH-pc4n?cVb1)8&a@A_;{Jxux3qQI8qvejtSmXlY`6q#zIDSRp9Kf|YP zorAHQ-=IveI&izAK!@Eq#XAGEN?#cx8BHZ96mGkVWL#J{NyQYzuH3595^=L<&xk|FCXe&ExdthKeBGgXpiHhSzz5Vz@BYV#m3Lq;-??87t3M5?jJ3l$ZuL0F=d{Er z{ZeUq{I?BwFZ|EPgoO{bzknqLFf>Lo7^5c?eH+N7ZlQ_OI5yww={x`_6(Ew6FK*Q+ zVDp1=(=Jv(vLmRWJ%QJ03$UsW0l<9`0!Y;KvNA@{B=$OF-L*x+=nmqdAHpz08` zC`~tOks5X8O8NIdwXM`>akW{5vqhk}JAe9|1A#;Ug6F1bi1*HEInk!$X6x0 zKv{AMNmL>0+pO%txb%7?yo`yQsDiChI{rjQ3yH`p29}Rx1sZmtc;3$;tT_sVLdmh+ zHGID3&q757gUhDrbzX=Ahps@ByFLR(Ojf{jj6Xsk5TmoCq$HSc85zWC^W~dvEz61D zzsF@|#mp0{bZwvK?t3QO#{L3ym8Pb&x3>Tr8(V)t^7rq+Y!q{kXZ-o>g`SezvMHCB zn*@xeWq}=R`R-)A!0OCO+QVU7C9emNa(jDwUsT}6(CxU!ucZ~u5eQ`782(>$J-+Tv z7O?{|LHV=alA_ck2cUj{zo*~yH8ZYtiCn`4TDOnzhy&kZgWK`nCBNrK*-So8&$cU9A3C#V4vDqQsedZ};Vd`FWB*Si|H`wK`0=Oh zKm${Kf6^ez_HDv)mX_y}@#V_u*>nTNk>&-eC={viXWxm0{k}_)$ydnqYPEqCM2NDN zF~K6mmy*a+*4r76dwMbj4I z4&Aq8#h?w7kXNn}P~02S>^8U8#8JV)segJasVc6DtL+=q^)pa0! zFg$aH_vX!e-v{?UlamDeu8h08yVieOQ&ZW^YW`z)%8)WOr3UndUCqOyT}P&TCf^HD z3QV4TfcPeq1`G!gECO1AUWe}sZHRpuRh^3s)>QuM$H|;9DM+ThwN!-3=WsL&5 zS4v)fYr(1#b%@S}#i&1~mlfsea1rm18Suruu+WxMv@>NIla}e$yClK)zG-Vk`ip={ z!2l$B;H%eZBa_NRUgsm+zz_6^_88oDM9*qYL&F>Y7aqrPV=3TMI`B3P#;>Ewe`_qU@y=?So3@d*h%yxSRK*GJ15&CV9J&Go$vlD-vM zdot%|XK6OEl1|OnxJ-h6HFwAM^PjaxqQ`usqWfd>{B7&2T(=FLZjaz+4(a>WXO)!f z7s}L`-pE3b5+M4@dzpv{$Pt0$oveXoR9`b<$BhXBDsD?o@8}rjN7}pns(ojPaEO@; z`m{n7&Jh$;u(}NBF$w9BNwvmL8o{xp#!Y}2D&wlCb{RJOtJ6}%c~!ZhKiQ6K>$Ph2 zbowOGniepR<18km*^wRk_qM{OFICVh3Gh!?xVYkZ<-kCw_U_%guyosIDMsB^bz&j! zCZ#*@oRqd5kFL2EX&9nm;J7JfIP?+ZUXY80gn@HMRrU~K5H^u783~+lnm67;D_rF5+hB}52t*o%kmFP9N%!lK8NUk`s|=@6-Zh}X zAn6D{UQBjdb47x&gl|HibH@UiMKmlYTGKLtX?Jes+NsXEsKsWCcbk|E*qKj(rWXk~ zBc;!uDT5JxnUk1x2y66YY|4O@zZgPT;|GKu@!1NU-WrpkU<^^&Y$2T`msuUTnH9{# zpOUepd7#YZ-er4i5yZvEQ)n}Pp?&;)g@FjCS(aZf1gU?js(-2khbNaySxbu{1eJ)I zf`TG#s+_4o@J@$&+E>#uR?Y#0RJovE-u-dI$w0{CTGagWoAAjesD{tP~g{rrdDq7PPW2}&sVWcBV{|fOz=ymf#?rm&&-P^qU zio)*Nx7u~vx7xH{sM>W_D6WxRuWUJ$<;-<6vbBwBt%Zy${LoH^E~#VsbL+D=3)^+{ z;;%1SjbW(w5#oO<9x3d;w5zw&K0WK)2W}oLf%hHb?YrWI*}36GTzb9k?RyY2v@-ZP zr>subzO405=)txzrPjC}|Ez!ZtIZ&Lw{!2LXxjmuL1wGSNA1h9-|Yr*DNn!rewn4G z9a9pSpL{VL%c6?0LK~JO^ZBLseD~$*cx7yadqozwl3z42D8TERYz}_c_Itd*LiTzJyGFRAaXNe(P>wx@cVH5OYgZxmF`Zu(?up zOj!pfpK?2(#a>8qU7t%;U9Uq`Qf$P z%g)$zbaYS~+uMzE)LMT|-B%QX7(u|Xw4R8*q}BXbAx~LND;q{$-c%uU61z) zQSX?UnG=N>7+z^!e5Jg80KC9ex_;VD zcC$Nol^aUB_NC(vm)&gC+F3Y*i_7&u{pmdW8GO`BfnM*6UQe^CFaS9R*y2G=->EV- zxAPKmRE>r}jE#*Ife*3S&DhJlJO=P6_IHg*e`GX33!wv9u213e2ST~u#qlKxcz zzC-W0wmqn@!QNy(Ld;2`1WuTAoew)g&Lg3g`1qHbc?%P42FusL||Bvhn2j zjM|J0w;ASIFg^F$;Zk$s;!vtKDJyw~T&dM*hls;do>p|lOS0mZrt}OA`Cjc^#SI2< z%6`n^enO)zG?7=~40aAtE)aB)nUE0Mg6nRrGGyqE{{2K8>r-#3a)&8_M=L)qF zSh_h#ZcS(;gG#`XDr4SJy~lj2Aw(nFlhZj+{AlgA7vCrH(d=N9hcO}q^8^mvlcQIE z<({6+>rGx>0AWB(pe6nVt25VvcG>(TU)<`Nd5xcQ2EsrA#~csn9>5zB$#*955q$10kjmx3U%{l5n6~2p)d<%HNTVm9jKlsr zb$36g?kD`wNUh+thw|k=_+h>L*?p5=lT=l8!bNZc(RQ&R8caXs%egRX)`fk2jYrye zfLb>LGIe$NaS>An@?t}u7`osuFbEMew28d@x+ppmKntL{pN;Y#vx2jVBW?iBy1?0Q zN+vW1XB98sqo3QN9jz}A7p3RlLG76BdpoO_$KcyPUAlnf0wm$(+M^wcGu30X3Ov|i6(#Xl#^_= zb~^B|LLg_L6n#rf{3sy-4;;2H$|YE+KP_%xv<%G6=o$fV2YR(}0I7kIC6(XJHes_q zNlytUiBY3FhJ^oTSxg{AI7XHO10_Mej z+xeaoW%=Ii{$sRJH35A#qgF!{=t4ku`M2P+<2!VL6R66LCqCei0shz^V8!B)wv)@c znj)wWl5?mN=+}Ba9dxS`&}nvF1$SKl93U@;4#@vbKpbr>8QmO8K3@$ba}9eK_e{5z z>ThVzZm_4fUL*uyb0aU38Nkz!qpy>Fd3kyGT-Nw6%B=@FXhMDz78drh%!AVkGP2RX z&VZMX=Rax+0{U+I{0OR6Y5or(`iz$z5{xoE-EMpAl@#qP)H48*WqX)?%vz~2rsgnw z%eZmdK1+Dwk=t3@Z}arI=?=2G9Y+CYs-yqMCgr1m(#P?E`nXZ5%DC1;Xe_R!nDcwy zs$Diix8Z(6l^{VhaW+My!=O+d{EeIa9YS33&egT_wz1RR2D}@?MPZGOLuhF?AyJjp z(hQ~g5IShGby(y!Z{K(r(t~Drh;%utQP{th1OFAfXvG(}^>k6h4;>b1b?@3v8XMZ( zEedwU!ong&w$SJd3`W`muL^+8F{w`DIOUrxEG$K>tr=xyWop;iJXkujbhNZ0lN=x? zrY)W4R#jny_F$^vj~`zrWM?aY&X1$!eIn`%@`Q-q@H!wzb%D=ed@AY$55gJz_D6 zQh&OrQ5Yr0`q{(kORbdud6%g5})z<>_T>h~lZ9Gn2PAGkQBfEqBqW36S)eLW(9jh13fO4S8P;oRO$!n)H1quku)pAq zyKI#3+K>TGx$--nOmiU96&OjjWyjFpt1Mi7tgntK9$K{@2doxaIfyXOLVyTMADJ=L z@I%gsxLQ)m4o#SENEa10{k|Nl^p_Hr6awW8tU9KzQH~JC1%sIQPX}5Q z8urx$Jo*n6olyGGemZVG)|=UVC;#esp8xoyIHD4|qb2?Cr(^P9#e&&S ziD)p&IG7(5Eh66Pi(!6HR8qmH&Jk9`#K0^-go)@h%gyK5)sdnKxDefIxPQcS9Eg`| zFc!ngOx_kUKF)MB-5OqUDw3wR8lg>4v7qY)BqJCOehnJn7qrH;SWw!W!eJ3F*I2mH z;U^5bfM3164hM5=>0h--04wz!M0?7iZvI4-eisaA1^a-vbL7m()^~|H@<+{lGqIdO zi8svp(yn8vg>^2s=r!C{9+Zlrp$e@ERr1-^%A2ow91{-tO#}^)U!UlSR&0_*ODOcr zSV$5cCjaLJV99UbRDG`x&icp@OUE!6O6&q%x`ews&-wOP<9BP-yw64Q-oP9Qe+{OP zZ9Y?4Z4IXq3wkDI^1Id8uZq6hDp2%)r(31=oNW$i)S99y2tRVf3f&cJ_zsW60_z&l z0fl9Wby8wJn_yMq;wG+oX!-ui;apysP52K_BUxXW{g8hhksk?K#Bx98D*okQPHgm) zrWYqCdMBOV|L!kGT;h+AI8ijMfWY+7*Ex!`O0+UjcsTTl4W!)kV&A!g@PXxu-|c7v z&{!|E0NlcNtAaXBrt0cy96Y?CYvs?MrR3x|2~B-{g}`foAX2|5V+2h;E@@@OwB@EC zLX=XfbyCiRIRM4UZZmMdhlk%$QuY+z0}AlA$;(JRE!N}~OfdmoF?NDAdb1ZW6HS1} z+L^5+vsrAY|0FL{t->W!EQ=0Fiy1s?Z)!@pxp5ByqoV!l9-3)e3k62{y3L$~sKAm3 z|6Ys2$i(4`%1qLr`kT)-G39$fY_|*$qOC+D)%}f7HVo>k@BT2x3L0r1TUmv>U4~C0x0dM z?p%6;5ou`i`5G#~*3nQ<1de9%zjzD5>^8`-ubDLKazNFpvsp}%NoF1}Xay{F@YksZ0?thX2ovDms5m$hK~YYpDymnfSUfZ~GaEwS z`1@D=O96taTxdyaHlGW9S63IP;^=;Ue!zcYc4GKmEi0+uCr8vmp;6%M#zq8|xr#qRy0%bLPb|q0JC1oSkDzRm8&sWU`^1PaPl7Y<% z)rDnGNho-9(xEQ4giOd^_X^2BTMjQBi2rCVH zVMOjgrQtBTx&*q`WnT@LTg<15F%{2ns{n-x@D7+Odau*sY61|#M{#k7$2%9WDefQx zg4rJqDBuq^0yM_2U%%|{E-V0n;0}5fS)rCpKu=D#dN{pd(H)*H{i5;H@yCO8e+&sA z=TZP6rR%=>N*7qDZ#7J62#g-@E-&_`BVSyv;*oC#uIn|Kz>_lc$C7Q|UG85TEvJM3 z-@x&D*)1V4@fQf}+lTY9pYEp=o^4ril#+h{6AOCKKCnd-mh~^80l%sdAKN!>Sr6ae zYE`eH`hs_;&*mRwgcgl?h~Cwo3wRSBLd7D|am81m%b2qEDam#H4?~ z5Npe*z5PzenwXl9$H-}Ht8=D}`*kqqi+D*>NfGiZTIuMw{30Ig-qYnLn>@CtA958F z4`9j^{WBAmFxL` zkG|0WgZ|z^*Phw{R4H!fE#e3Y@aib#y=}j9_ioFNA1GK@pX_*fWE0pW1k635d8ENX zgV#oiBA)yM3#va=Z~&G7x91tVb>k_OeA(a-72#1}u7lr7%uU~eCH0JQ z0;&^Yfe#2?^Ffg+8koWa!(OR2D$N&qWj8EBLK6lqJnc=4j9E8nQ6+=0kmsvFrn zH7q`I3uJHIo2}qx4%I0(flf+=3D5qSSi2&fh5c4ky-{fc-j}#kMPt71C>Zm#DnbrdNlbR|K*Pc7I+;86R8Nc4!kcS zqi)y!Sdub6f|^H}Cj6}cS$6l5yxDO>U?6HJAqQO*ij%nbhYM#dEn>;62ta}fs#k~d z9zsLUZa(xOnqIBU^w$$A*(7lQHts!QKxDo^6R3RBi``AA*5BWsJ8bU2lPodDz`y*Z zz#W<@yNMuPZf?K>UjVaUWo5M&eCIJAU((#10dO3<$H%A)J!1Aog*41o+g(V#F@vgc z5tdyMl<=Y~#=ZE#-wVxoq!$}LeZoSpgCH_QR2n%sxgqm!9&@&|Mu;bOM%ME8Gl_^^ zN$0U->u5{AX!$^$clgy(r6#B6q^H!fgWw8-);Mf56}V zc>}%NNlIFlCRLauMi?DN1ZjmErJKaf)uk(zpu|BhTs@W%6nWYg>B?Vz+V+&t@_O7d z+M<=GYu3p^J-}Hdu1PO9u@*(5Fa9?BsrT{TFxo~m_79Cg`yqz(ZbA}LQdtlv(|PC| z9ZdcFDeu5T0()iK;NW2HIm|{Z25ooj?Ce}%gD2o|$mOH0E;FezT%FZg>Qp7Ug^C&p z=>$$rmBiLyuExZ~yv=-9YQHplp{l$;z|Tc-^hAf}S*H6W*m zy2r}Kwx|^P5F5L*yPMUi0Z&7w-RPg%3P_;HPB4VM5QsN1qnrUf|%ilOSIc1lW z$RBy1dpuNfoYo@fQB+7Y423cLYFrf+bS{AvZs(HT&$G-i(bljsGG#z$bCif{EZ~l!8A|8Ut{YY8BR3+o9!}$LrcTa#OH~`OH-bEV2c~}{N@}fv zM9xn@kSLF)4Zkn=)2W`!#m!v`S8Q@}as?dM5R4%@HC6QQPNqf|z2d-gy66Be1zJ+w ztP0g=-gLvIZ1>gQS$3Z~Ms&SlUFHCTVB9tOMdY5>#jY4a*^WTF(7!)dS!QHR>>ek! z*sJ)uH}M6tNaI^japPGxLzQoB%RaIu+hw{_h4o5cRX#sm)_c{QdF5I*r2&W8Enzv9 zBQ4$M+7{ol>K_k{&aNnjWHew#OMaxlB8bLBc_)h!QP#^Bn^K}kFFfD)crhq&(Z&Q5VEnS@vCe+m(e2P|Wg(GvaVGF4fVcS+bdr(hdIG;&#HgV~6-+o&gI zc4^5tCIrH9p2Oc{y!5+`xT`BGrhX+d!>&?2*=5f=68H+gUR37#HC^o8gGai2YzyUE z>Qef2FUicTed24o>GxR@eUWd48emk(P5U8A12^`Ae>( z*-t?R#8Dc2Ny?KG6ycnKVx{|sFW-876U=aD|Isu2E89#>I`t}o9kMf!>g#>q61o_H zTgYy6oWs}G7k#M@9x~T!-<=&BVq)T6Hz(>h`Bs3-*up~3<*lZA^G&pWTwG;PL47(W zGZPW30Vs0lfxOBq$MrbwRVD@oz`w(wiD zIZN`UEL*fI_XV8Xv6E$7FG0`fhUEzHCWW%)&!Rau3m?*aY+&MZZtoKX?|0pukiDc1O`P3? zhy07gzuw#+9-_IytS0964K2%hg5~TR#?#LH3$c%kv=+@D%vQnF`}qEal-Z*-k#c$F z_G7dDk8$irNm5H48Kp^5ov*|ma1PN`QdPXMU}bk`66H))_WfRLdlC@Q#OgO1xg~cI zibTOkLwv8)zF+i)gE?_?xGcu%pC@sZ7#<3=+mqklE55(+6!DE(nuxJXP;0C1q`L>- zr#dog>8f_{;v?*dd0P5rx>NWmZM9u%U|iDUq)5W_&rzns{o;}wl64-$8!cJ2MuVy`#=E4&3AtVU!}#~J9Zz=kxOJTMmHNEi!CZQzBY17mJrHN zQc6>KC-i-q91Ar%;(Wq?-=&HQKH>|X9I4QQN_=5_mVduJssb(U(6N0^V=6C}qq(@W zB&HxqsNrRcw>*vBtL1jS`@=qWys?NkRX7-}xY5zUF+|Tn83@f{`s57CFFF`L179cySqovSN~+* zh>X>3j;4PGFeC{KHi>s!a)9+mSTQOrM*b8?=h45NY4p2+d^S>nKiut29=f!rI}m*B zc7`(S_cMQ7MAVpyIxIC!``n3YW`rs*9Fvsqg@c1bGKUV*#l^*ku&|}kUxplvLH~X@ zw4L3fZ1`Q{hGH2)7M{W)St91Flhf1m!Buk&^nbqPt?0k=o0fmhHKgZo|M|r5-Au^u z-tzUvJ2Rkgq%vjw?2sl;+tPyS-#|@6Vy?jh7vkwZ56KmWcr9NhQLKXQ=?%{#l+3xe zQL+Lw9uu=a|GtbWdXQ%Kj3oTU>gwvBkLv}z8XGHdcEc2(^FwsmEA~508TrKmr!xk%j;qq= zm6rL8VP$3vlr%KqRH1|_nn}2})W04qsF!M72d^K~!8J zC*yuDy-;^()=IH+Nx6Ls#%+qO+MaGVxw=hqi3$po5mGk% z=o&Ps&XL3wB=uS;9V-rhJ#Gn)wFdB zKVKg06`7yTboR^-2nRAAQRfZ*y4c4!Vmh?3KElW5k(Z>=p4vGu6YuXbGM)4Kp57Hn zLwre(E|&3&b=%K>W4taFb94Lw4t^)Rd)#qLARo6e`_n)yJgKWES^A@c^4D2 zy}!RG=^WDOG6tl7;;|17l` zp((j4ss14t)S<)k7BNzdC&hV$`#Cz=A4U&?O)2He8kwlN9l!~>xSddUHel2P$ z{=q|#mp3ue#L6>sN@puOx6{*T@m6?3On^y)+w(yqcXwU^-GkDLi<;PMTQ%rH;!i5N zN_q=BqO)=saEE8h^y7cuQ#v*Lx`{ia&EQ49xUi5mxVm$|t{hO-1PwNV#j18##fIs} z#ofK!>(VXZxg{b%P6>gbczJojFv8}rA|J!3Nk#7t2}+Q2hh#BQB$6{&3i+9Y&);@} zs*2m=gav*V0J5F)?WV!6D)dNU#N6Kj7eshWQBY7I5(4XBV{GuNwAt7fZlt_Cvs)+U z_WYF5oJ)1^S;N{^l0CkFF#r+*_wJZcy2ZEcOd8!>4#w{`n6 zUL{r>E8gftLS}6Zzu{`Q#S1Whx@x}JuP@y8I*H`5vxbJL6lYs?V-3z^l3Bh)uCDbQ z?vIWgYKeSqU||?tO;(vseWgUgW7b~sHyb={O0C2Xy^g$QG(Nw+^o`A-Hd`e)*q9Dg zAIwQtrdvGO3f*dBZrT(o9$`BU9jmaIXrFUP2(q|rm)RIE|HOP#T@nKO(uk>n5M{0EE=Jv7}Y!9Ap#|w+gspWK7bc1=_ zb6>@slon0Ul+2oJRgjd4 z#K1wLjL2NR67U?YcD>hRdEHfT!IISzV<5|4#K0e8AjvGf_=&dR;gBc8b3JGfSElMX z5iA;DxHPaGK=5GWNlZK&E2ss5^MU|HX4p0Xs*44wm;H&2#PJMZIuK8WRZ?F56r>XX zEP*kd4uCk+D34QX=+gmbd~pB1k8Dl`2%z2v21X_%1o`A6ciNj8a!Q600NakG`SEZ9m z*;8@-m(1gG5u1;nf9zsjt~dEuK(OE(lbTu#25ZH5drFE-$&n4LwVBjqDt=SM4pF9cbXE?RlGSbJ7HAt^R|;M6Z=|t$oH;ouB%W|E z_7(84h#K*zsB7VNS$(~b*8h4{&vuP`dvfNJNxPtgzCO8zB&_tIYd#>*IdlUCyFE9quZ+}4%`%WC+Gf7eQPOdai1?Ud*0c2KuO@yLGrq_fM) zyO$v(hqs{e*RxpGj=8JHXZ-sx(eUHi?hVn#cwUBHO!QBOmCEs1DKhtVkttagr)?Py z3$TC`eHm0olmY@Ni0H58<`=*wAiqE;%8$2Az()ynDl$AAGFpYX?H2BU_7v%VpjJqM zm?W$p3;|7pDG~{+ga`{E!e_i-B?N6-eUAgZYzm+H^Dhw4(OqM|98R&1Wv%gTiMMuo z zBir&nUf5Fx>>7$8>seb5L}sMwjZAyq4rmQWMeWix_d~@HVwOnaa_pv4Clq%(R72Nh{(iNFgn2a!_Di#k((nw^cBLIdr1md(x>6v8nSGKI zfBc*&25t|gPkA}p(*QvOl?2?dE1Gob&V`?>R3+D-qff_Jt*ntdj=n`e$t9bt!J zv6=be$_DRNoe~jL9vS&-S4GK7ir#JYT3+!12O*9UEgaW%{<$Ubp;g`VPLP>>L`9QMny8HR-* zz2^idi;|7)AtxtiTHA{*1WcGCKY3P$(NM!f!_PmZm)jjPI>GyQYbgM(nRSCeu$Wk= z{thzo?(x1bl%p=x?RhVy>9UnWY=04vH_HoA`8e4Z34TKQQ^sBIZrl)!wKzLHL*$5r z*7K=nM-8!WY;NYNiC&ep2T@358fEriywAvZ`s0WBX-#QXc$Suoq`}NPzl&PS{K;r9 zevW9ODh`jO!y4rV#7=k4|0cuzgta==Dvh1r|$7p|tAO%xv}tX>!{-2dh)T z^%owEmsX5yI^L0+n$HOwHC5<>*PX`t7@LAvqMbrTrM5ze5VYjO!$ML4B^r5`iuZ1e zW=FEn;m?-upFRM-uC`N{{IRiik~RmMd#aNP1i84J%FS>sQp6x%GLaJe|01GR=wx-|851^V;+dV&!9kD-sWkI3A%T?NSmNBGK z^JV4b8QY!>r$=>;#N0=C$Xna=f^3}D(zfpL@xT3`7cI((Dj20dHMb5G%`edu*dfsWem+aMh--#Vwmw~`dYxgq>``w7j4Jk5pz%?9~4*Dd~# z@BJr1Vqy z2O-S#37pL84du|Mw|^p&d+`G4Uvl9gF7A)(%bDv31O#0$s+&&La$pd$zlfj!rGO}~ z9R9X+IA|b^=LOxW2>jK%ckeQ@vUXt=YVv1H<79xiRgBb|3Rigczghra!wxx?Zmlb_b*hokXq@8NZs8`tBK=b#l(oOA4a-PY!78VA~gbF5_YtIYvtjVohQ%* zeB=9rAD^%?lYObK{xCQ9#8(@YwOS-oUH_o2n3FCmVxUQ*Juq0Hb)+K@$b$QXga&qb zwj#Ag7~2hp#uAMf1fqIAf_`Vm%XNoDy?Yk)+DuusYr6@KM6b*HS!7Z@4{j)s@Wc%T zckgvXP2asDp~;r>mPmMJNQfjGM!HChz?;%r1ETA6U%wUy1_oruKwuM)%=h;7MRU^a z?XaNpQd0U)PHH`Q`m|3c>~8qy&o4oPH#<9PH}a+C?RfQPfU6RnUf(!9MaeXI^LKsz z=>RRQl&lpKH+KynYEcs%ZBe=wqm75-XIIk9f$n0KuH5^Xz^*~0rcT$L_*L;UDbOU)N5OD<2mKCv z?Y68+&qPGERR>~tCi!W~PC-dMzrCzBJIx!Dd7a4U?y^++4(ZLCx;}jybMiMp= znA73Yf`CG4Aj9L_g<8E%|LHj<@!x$p!P3rDM#r&7ZhCi@RzzrV7eD1yj;J0=>S@x^ z#eSKK=np&H$I}o*Z66zpr=oJ2IM~^tF{zHizJLF5F>Bc^=o~y4;(Cpxw6wM9I0Pg+ zx(jzTAI52T&aJGxo~>LsI&#{xQ?TDT+KW)zQs%7ukpDH`hwjT0erP~cJxj%$Ikj}W zO}CjMy$U||{PTrMs;a7174Dp#Qs%2;M<{XI9H6_Z9^oj*lE$@eyB|T$nY6eK%kJHe z1?p8P+~vCET*R8$v$B5-bG4Xz`%=e8kI>%warG+5hq-5D>>d}p4mlIrM@aQ zJn|i&WnzLj;lQ7IJ%Taj37sEqYgM~^jvM9j@Tfn1Gkac9&n?Y-|MV#Up3QfdrQ?+c zpWL~nVz8Hs>U!}51;?VIx`u1Nt4)D}G2#dN9jxUs6RJNI_}?CV7QCt741lXQrG-GL z)%ULe1QNZj>iX~xFnRMO#uIe>fAL30%t&EzM(dQL3v2!JBN7fqUzyXYtPcEZfA)kP zZ+_;h^;JA&ETQ5xDk_Y)a+Cg_5RSr2F7?gutsv3AukbBchKPQ8wgKnadpN5Dz}U2Y za#esZZ??!#NC-tx*p;zK)TKzj^dfajE)q3T{Hk1CTRV$~9}<`Ybln$?u=xA0#k5D_ z6QY{F)-wL{xvD=>NNCk1ypUDB-J(BRJ+ja0Cf%LL=_AkdYV+>!`L#mPI2zg0 zZ||Qum8%}C_PFGhpffJAMh#1PBJUC1F6&pug*QgwkKgZ-6-E|Jsr;dd z?){7Da+m1 zxPxi&LPOF{Cin@BFkBME{|xvemj5el*jy?g>TkbR#Cz3zk^EoS_=+y?RrE2z{QmBQiPUHbPC%Bic%(0+m0GrG2h6uezE zCcw9SS%(~xX*&m=7QCd0P`2m45kWCKx7Iy;!WVooY=%t5U;JiN9G}^@Ry0u_Roxrq z8)}1WU|GO2vvPoBmXKQO!&fK)@Cv`y9VpwSFbG+YXmB2_J0(B=4<4@^ny}%S7liKU zhWQ_?{+j9w>OXvq?=|bUG}Ly&|8gDPakXZR7aizvFrigmRfImwk^LMQDGac!=}0z* zADIi1pl)fg??2D3BMJbP52@uoM<-j_FjH`SAgj8R29f^iXnKQ+F9n6p;^2>9buRA| zl`9$Br44m~s{=N)U@Oa?k@4|e+jYk~9jSpXyQj;hQ)O8q`lMVoJ_iR5NPK*JrqlJ@ zuzxP5(Bq)NEPnUiy$s+AX`;K)T4|*qZ_UQh@tZi+%R`tgjh|ai8~oNTuiKgn9Wfr~ zy8?0y=Fu*25(OqECbpD=JcAWPR-p0LKRDiuT0OG zZv_ySZUD;hS7KtCBmrq_qlzYyU^_;KRCt6D060xBZ~B#&+jC^4xv_8-Ub`{i3k3y# zT3(1R2l+WR{{0v0F~GJSeHERhYd1h%@=o=Bsc6Sa-qNzyP^8VHq^$g2H2<4MIFTzS z7kqWq^WqS=$YU;MGesmYdm?HVG&QAWJ&&$B*krw7D5oEi**`uAb~e>KyOcO<#DYBp-yRTm6d!pPZjY^OC_#^BFeJPoF+<7{vAcNv-T^@1a-H zjoKDtH*K!K(_Ltldt|$DVvBfwb5e;-6h=WQw2v1#w|8La{rK8_Gh)N;pr6!=FlfqvgvW78I=pLva}l9aCLQO z&<>gT`ANjY#3rYvLIs%fQz5=H>jx{m`SrR010|bri^tV4t85Za-`Q~f9ZXE2-|GrA zI17`V^P{@n(f0r-M0IyZ)zbP}tk!SblbecvOaXId>c#o2nAqM}#DJ^K%F_@xJHDy& zgZY)xCU{*Yt+Z=Dtlfq8CN4X(`Wxd79gpkz&yV&O%NC!1-dxC2LPGa6E%~)99LV?P zZ@-4y@fL_BsGmRovtsWr)Pm5dL6GGBlFER5JggVv`U34k4142&@2yiLFn{W#az}p4 zZ_HEW2J5geu>O!|UBzWPcMFe6E7vX#>J8|#0|A;uw%MH%L5L_Ps*2y$!D_}PCPqYW z?i?SRfP?2Yb2^~xtl&QfJqwAI2*D-DbifsKuNe-#5C=*Q$hmA#LGeMxpo(l$q3Ofnk1vG-BI4s! z(i3}o8Ey+4uvmWKnR-_9HjfQJdIt5f_mCt%*z9s+WwbP7@?0^#NLjfudKI}@*ZWcg zl)WH5L`WUrV3K=s1Dd>l5rZChXnq6Wzcx}VcYSpUyk}%8(cl0m9uO16SST%ltopjr0Hf8(|OM+{k7UT*k*)_uZR3E6NS4fw6& zK+InTZv~3oj_zoBu++>#YCOEt4rI(Kb6-|I0R*TmU(MBR4<~~;6CQ$;6h8la2?-dv zr~i2lOn_?zLlit`%4qc8?fw|;ouN?H(71 zw%0b}7RR=qP8L%R#Jnim6!V&D#E|fni}g@ov{N@~2Iw+q*!=Xjlv~CYspp&Ax8yS; z`FLsu2H_MRN2^_m+S*vXCSN0IX(bUHj*25UJxpfU13rF_Sgf!0Z<6;MMp*&mcsp28Y}r#kM+oopVA*x zWov={pHn`!oRfLIwk%8HaSQ^@;4_fErDc|atJiu_w4M$sa14BTMZk(c+_hLdSN1=U z=NC0PF`)i@c>g}qV>wkpxZMa7(dlw30@Fr$3<1b+JrO~W5DFk6F0QxoGaW>%Z7c7c zkYG~;)9xrlBSYFTXgd?VFw~-lb(wQ)GsLfI?EI;z1q{Q(AVh>pI{v$$TG9PqX*cL& z85^Fmj%k?e?Me23ea5k29aa?eBT66%mTO!bK3sy^0N<9s+@ARVNJEi;gG2bQKu<-0 zPt$%ds727dmRgw0$n!M*M9pR2aAK-Yr15zEXcOJt1E-ag>_<7XjIhv?S@S2?ZEZV4 zqY5}U3+XC&yF;VDHpZ&Rk~298K4rb;K+RTCj#9VRtLL-slXh}&c$M)hZDd_SO0E_D z>7ldHurw1Y^nS5Se11`BftUE)dNw*XJI4L0HKq`$!eVfIu0*P?jUPHG5JHoZW@9Zu z0N{6!7BuUvfvP?ZS?Qi_N%2V%W1(uWsHervG}x8@+VX4E)?svj!lE`3J1I1;9|Bd7RiVX;r^Kl;nehSB0w3+?m~%6&Dw; zDT!rJ7kc&T76cZ5-UfUHcs>Hoew%)E=pP{JxqYOw^7exV4+^wuB0w|^5!bJP@Pgx- zZU7Bdq&RNr9~+CO%c$Nv7@4zkEU1PrXol7!pc?{w)V2uh^kvL3e>MB|uOwj?jcUAj z`0!z9%!9aMdZTAqSYQ)#adU%7lSYt!A|oX#>V0A&k&aH)G&c#unScqtYMD^>WD>gy2|6izxS#h`*KQ=W z`pz#g*fH;7ZE!{qjIbH3#zvVz5s zsJp&#BP(d{q>fP))Pq1(fO9mg>m1K$VIoDuP{ z`#)fwTm&w3?E@^C>U60NQ+hGAitCh9CkV5}9tm)UlSVL&=V{pl|z##v>HLL7# z9r7^~P2o30Lm=PSWvyEqoc#1t+gj~w%mxUq7>pXY9p;coi z;zbIZIwS``NhyPM@d|`;=;(}wXJ$p{Hd5DbB`kl(2i?Ic{?wF8R(HIKSj9+_T3R;F zjz2oqRdr5J&lKeWb0*?kC~Hbd2x@5X^mo<3HD6%J1Z)OIag)WW{2P#LV{ka`c-!iT z?7|P1-ja>{_sB?W-hf~rkTTS=DF#PL+##NBH_MlMKZoKq=nO1NHI<$I3|%ul`7 zrTJ%l&;USHhWO%Uknb+6dtaZEAw-Pz=3nR#A>`ojQ6}AI4pz*A!MZE*3D`CUpq=bP z8|mu$2uW0bmzG4KPw49D$$-`biZUA{e-D{+$nQn)*!=a`U}j+nhtJg01d9MlA(CW( zd!6HYfJR{vF6yu79}^S7Mybp(fwv_Cks7rw#z;ILs08-YL`x!6Ie=5Q0S!zWs|}AX zLV}4P{G`;Zm^GEo5bhFKlP+M@g!g_5B^Ov~@uQgvGbN1*Q}Sbj{9$v*knad57Y-r( z-p)cPtbSR}!BNPT9pFSEF|e;hRq8c}bphv9<#JFeiLq;7NN@`NrJxXM75eZNzlC$A zP((4I<2$B!1_xDX9WBm|nxmsj|%Xx$A8zZAc%%9STG+?;L zFi|3|HPi(=IO$E}o4L_ntJ&}VF0QTwS}>)Q#uNK~efGBVz9=S{m5uen%ClaNON=sm zyUzYD{q~I!0&X{H6msQIaJwdmk-D~}-6N4y^#av;r|>P67PwEqtP@U!FhCSF%sano z9WCH3en9L2!SlCqWBR;YTqS@u3keB9C(8_-RDpI~48U$TZ{9?hh7f#wGOyav{#t%< znbj1JmX?;w$!s9>BI*A_t4nwJwFvw%w+T6e>gQWUvy@}|`Hn^n5mCF)(14a!6x!Gu zH*RP(dc;HL6rY7yazg1Wd03D;_87=uP=J%V{Jnulm39?K|Njeo;eav_I14=E)y{rH zlkYwJGITA3fd0Lk<953y+?CdLvK6)!@_;bnAD4cCdqDZ~ajwQ=JG*_Xv2JWh4gAF7 zus{>Qcg95+8jv?2lNX~R9<(~B8)4i_6OX^R@Y?ID9ZsFaH-9N?bQelkz621?DKz#J3JVRz?o^W=(yY^hr>OB4%(et>?#xkSf z%Apxv5lKljxSpVAPPjC(2m*K(yfmWn^5I?FZ+Mc!KtB%dG${LAT*`c6U4&s|<8tRK zMcE_?6066va9h6E8kqDD&~Ff z8jA-SEQvT7-8`q-0t;Gec$%#k;lP<{iR0jL4&*I1krsTYm$#FC#G+F++u*+y!T306TY7Qp9N>6 zXh{{Tj`!6;^Tu%ZB4l9-;}V{ks;n7kdkBbD=60{3mQL0;n5 zC+Kk#HqOq=$=vq;kzGcmgM$#l;nK2yoWv^DQGHL83jMDD^KmZP|AwhpzP+WsS^nTZ zV$*B0wQQ890pb7Eo31%vg%oRcsa{Y&9In79-}JhF^QEI5t3qgTVvt5P>V!98mZZOI zPX4zJk0Il+t9%69_F31u+%vgAqTpQw~Tgn?@@3+D^g7n&K@?Gkp0lw z!CaCp@*pa`Xg%bU?ql_Mvz0@Rk-O0Jb#{way{=k^FW4a2vXDY5y=6Adlma4Lc>*ia zLz>RxrDDP3-z5_EfQZ7hQ>+kQp@E@Ju-c|2ASeF%7W|I6GU+>VM{ScyzO~t<*{+%8 zf&2IxWSqxt&TqM-0ZVay8n~|Z@D4f#;(aRq5j|JF%2S>LT)4pxS>o<9Ek5^~+8EyI z`5PNH$GKuIZHN-vprByHfATNx{pa(wkcj0E6JA*VSJ3@`lS2OoSp_Iwe^<8N zca55nF*#|H*v}o*gST$qN>j|EbI6yc0dk|GZy1A_Sq?00W3#c83=HUE;0W^a^YaBG z>vLZ1;UC@BYWAmlGjKU~{}2_ARB)-ZC$JXw010H=bovOX4>)0v%$mP`vi>3X6MVWa z@-!dbj+RW?hl&b&2;D?N5L&1x!*o*|R8lhCYKeQI^1KgjOEWG68}*Eyv@Y2lgO}=9 zRBZoyxsvJcYK1JT!>0JtIOwXlR-Fc(XJdMl?dY6sPu7R0qtJrtg?jfA`5- z1j(bJwTT>%YA`h;dtbDCRLVKgztjMIGTnh1%#$ge=VQ))9~aAb`-=q#c%Wt#oo8yv z%_0yhI7M`%gq`vOYvB|hmucGD=C>22h4iLTUd&-^>!*-oqp(~-fN zY1G;-Q!rC~7n(@-dje@HU_TnJFss?I91h-Fo_(5`o%KcWD?WRgVBOKi;?U|KY#TqD zbala>gE1);(KeeQ#_f294#!_Nx3>DJGHU!>-J$`27ihW1^t?nS)Fx-sBSc8avK^Hq zyhyoZgraqomfcD70msa57^>D@vG( z-vKpMq|v1FU!-Mbov6zVL6`8+6E1Gctg;PWoA7-+CQxvQ`!oqo)jE+w`---2q<4LO z>Um5mo66nFpUh(`=Rh5KYAzAK1`>vgT~w3)lng*W{-!+!d7owYRoNbTb(3zEtC39H zQq<*P6i(&#s03)dEGH_m-A973J`{$X&<{4WiiJfaqy+diAYYyza)cXB9Fd5NzYlo- zGhMsRuFR?CSf{?e9=vP@%gjwIn?LXt$Y)?;2T%&%=TTom7eB_!moIZSlyjr10s<%{ z<~mCalepaxazrRackZHnF$!LSVdPdxu2)>fKCW$@2MUV+MeX`A+SKvNpHMea(&u?5 z?R(km$uWQU?GJ?HkMG}>CPBfUNNUvQ^a z^XwVX^`sq;cwOs(l2~>1nrYfUm0_;+c=^}jwEJSUlsfzz3CvjNgg;XOQj6RBu54gn zi1GboWpE&l>xwqT)y<>E!`8;;Z|5(=w^NlENYvES<>>e<#nQ#vP5t&tx^vs!MffL| z3*N!5CjE(aq%n*+pda|kUR-c@PZNU?pd zMT`ttUXVd~ON9GuM7RFx&jeEaVf(r$27a{U4<9d2^>Qoz_;~dT+Z7gV-kE#N{kmQt zgTArToc69Pwd_p6oOmGal84v(^qbZRcooKKZIOLM4qJ(ni$4I+(%%&`&(a>iF{3!p z+DdiU`+EAd(sA#vdoQP66$pKO3Jrege5~%m%)58ESY*+AKau?wn>XEr$B&*-U<2 zHa6~Yyj?I7XEDA;b6 zHhL*PS2SZ*QW~8tyP>y)9P37SWFq7-3&uzhLk9BH%s#bq|MC?)D0O#5m zT%nTP-1(dK(&6ol&Ji1ew;*OqD9uQr`Ru*K0Jj6scs_wSh3TY2!cZcSFR)fUR@Fe- zoRjnxy5Q7G>m1DyCMZhKpa0=V!i`?dFYa4{+wym1rE|Tw@6Fh<6jT>}z!rMl>=AOw zN6XP1`U!OBr(_IKp|P{A0j!4OJxSco{iYiEPra0CITu<`7e1~Maaw=Ayxd#21~UoQ zVDU}Ady?pZie8OlL^nI_JwUU!GE*hze>M2~x6%A!Ua8^hZw3UZ3>Q8U4^v{68=`+C zo#}`5Ck7|x6Ul`UFzS9n_k5l5dTVYjBsMm7%r@bxNqb55R(uAYZ|%pGcX^9^*T31J zRGEL%Xk4D1?MFtw9e1$4z07>=`ILDeH}Q9@1T7|Rfk4@NlSlICLw48qPziGGFEFt$(AbfWgD?(Qz5>+S9qI34GZO%cF&G17m`x<0&$1f}QAMP}FS z+1H6wnk&*W4uzI{jq4l;oRH z=+ylC8}{N62$@}~=f#67FE8T`b#;C=QI$0vBRBHgx;x0(F-k`vcc29D%-CZM$hu`u zYM~fXP-#bBA04qyn>Ml-u|g*i>CMZ-rTTI_g8BRJB8vgHLvVWq{$Y}{!sqdaFKJJ{Jb4oI_!t8% zH@AtI=1I^^U%RKi#CEEfH)?2q(wmL5TP7?`CeM(WRZ1kh;k48U+B!e%rtj+C=w%yO z>VkNulNLRe$82mpKayVvl1ftJ6^$S07}rQoc%2R}4qY}dh{ad0Nov1-oyX4q%`;p3 z!q~~tWafIbf?l0U_n_fHYv5c?p{9by*N&8ib2w@1!}qc?d_EhWiSxJUD-(DZjf3ErN%64engGChC9+;rkA)@tcIpYhdm8do&q zfwgo>$UQt}A*x9I#j-5Sa@yg~CPp2kYEAcDY#!jmY4lHiGX5g7-Z8tqynN_$?RHtU zu^jcMP`llAC7sH2rqVL{O@-K#&s-aXCHxF;M+bJe2Chbh75?Pe2KyTq9qp7V7vc^(c|bxERs(QV}jx2DBc5XVqa@ctG1Rh@6opMu!YA>)20rc z6VIiVx&yxqx`2yn zBl_RhWwL#@Z=Dzpbw04#u5s=8CC&=B1sVsHhm#nL2^?@1!I&5uPp#GCyyCAU*DYxUz<7dYkw9ZpzuNH{TX z+_)O8ahLP2j%JLcJ~`q?a1hS<4M!? zY;|Ihz?>k5>{iAzg_2G|3cP_E2-M2|^XJU#N7~H&K zV#~!qM?WxqzryG-&d$e2JRr!4t?ONPgIYGZy<}N*_VXjgG+MgvheVvV%xn*Jy($SE zu^+mQM5m^vj+og!ASMo}Xz?pC+kZ6K(<^jMo(C4w?|qt8^lgRG@zsAPj*-%oIxxk* zYN8&qmg}(-l0S;IPG4??7RdHsU>>)~7ObW}oqyhEw;sniT%#n1V^deXFHxGujo}C@ z4A;{zEbB8qval;xVsB#ZZm-lP(`QS}8oge+=Zl1?p=>5l<1HKzSO2}X_%$m9_FeeF zuX=U15N|Vv%FF7H$4*lTbD?*OXOlUp$)7wShH7InklZnBF>?O_DQ4;DS0zDS_Ry*6 zypi>nNDP`4KMiXY^KkZ!J>SnHk^f4JgqCGV#^P@`I;zC8pYANR zeRL^<`S69;*hb6u8hfqw!mx|e1ODc(41BzAY0fY6`jhL&?IsqxsNrVxdKIT8X|GxExFG5!J|h3qj7O)v+$(9F*YG*<*S$M zFECALhmyd8kNzPvG!IvR58Cr8VrsMtle|1~R3+PY6uyhYy zt#kYNEapca=DOZO5rrt6=CDDN-=%xy12>S;$;sn5*Pj*>)1lqDGlcGY^XcOqGJ`XB zC0H+`men_LSC6FCz^L?;i{=glQHbchXuf5hJD1GyVWJDO@)#K%7D-x$RXf^|~sRp!M7ms0!USdGG^7GXgO9xAte2SpD>Lt5y`y z3DNnbH4iBw)`Ah4_f0?wbn;V^LX3hRr75%qBps8%n9F7|;7ITXmXpWvVJ+cj!$0+> zCyvWZKiM~J8dh7e^#247?(PUE8jdfzjvMz`$+65hSLJS&~Co~-x(wAB0KFp-l4vzpgw=LL#LM`C3@#wsS+VSTIF zYQMCXJ$8GDTfb#BT)P)W_hZ7%$;HA*S|?HN0nG3@F5G8pz`;K?m5g<3vL)ZT{v%{b zPMay<_ye^NsHXSg@o}g?#Q&o0E2FCHx^_28cXy*mOLs^Jh;&Pr(kXMU@N1a8n^VBEca8k(|9=5!11HeXo& zvn{uH6OjZITCEUY(dW0u=Z-W~@q^F~t>v!soFS?4#}C-*ZRTT547lBAI&BVc?1c$e zd@7#vW!yhV_7lLa9NV2Dl1+t2W&$1>L{&UP_&og6UX7>c22QZGCuvTI7#RJp1ze1_ z_8Wn=dA7=0;D|#+OkB+Fy5sSVR%&IrwZd#!XL{~yr@@=uY8t>`MEj^mUZ&F;42mR= zbc&S*04@nJRfykRCJSwdtJk5CwD+Ijyt02hYCri)6QkNR5|LEy%l;YzKv(_HEql7V zTP}5Vo5JVK!ygmbJ3HFThf5OEhcol$`PjowujmjqY%x}|zjlj_9t2cWoqy+AN%Cbu zdG>6XdZgnwJJ=^1JU5XcSs%~v5i#mB`0q^mqpiM)D12K@&zQ67a}lxXN_x_cIA}k7 zk2Sw07qM{=1tY!14GvI##Hf5)o0G%;f`f|M0-JokP@ce6_Ny3im3} zy7?}#EXtPK%3zX=%`;iy7cM3yrk$iBpfAD)#62)2@^i;XP{V+bV-nPFJBnF9v%I}( ztS5=%u&^4u&uGy}1!3a{1Km&vHo!JM(GcfBqoY4)s8A;6hZMgYD4#E`sECO_n%PgD zwJqnp+!GZZDUpv(27TfH$r~%Dzbz0vxz5vYkdaR+xkD{xJ5Xw!F<6C^Ua?J>BFm~R zl;us;5}x4w!ipw;b@;Su%cP?x`F#>p_u^wd~eV0 zAbu)yeb1K1*N~9(egh*$M7`}xcOR76f2$o2CU2NTSbpQ`-s^Th)4&yDT<~wKX7icA z-oHEGx%WE3hVaf72Y$6axu9J-^LdKW^|<;DUi@t1kXHbah!B&*@`#aC2?RCo`o;~J zrkTROj)LGN2 z&Xb!PPyPNKd2Nac0Rm)_#7DAT7)`&Ff4`UMN}XZ^FZ?QIf7*RZA~WX;*!TD&-RI^i zP~kq{g#c0(4$evjD#UD}BxPo&XetO&(p-3pN~)7R!l+)>m2cVBks=avT14YfH1Eun zUKR!94B%q0Lb8VA)8ll?^%8#pjt}N>ph1uJFwil)0gcCV2#fAx2v_i8r6mtZ{;-Nd zWoG@uDPz;sPROCxZKcW;q~bE|q9NTre2B?UEs1Q`9s17Np#UlE~a*V8)OHIND zPEAIYi0mmNYcJyxo+vOQdSeGo3Q(78bgiXl`hufGD#FQEbRD4YZsOZyzrvuFlX*+m zlSM|>v{6^*b3)K8tWF;+KzxJIhz+uq*nP`ta6xqh=`drcUz@Fe;wib^ca(T53G^lC zz|MYOWIy?@+4uxt2;7a&3Qx@62sIV%E7=2KJx}ak+H<6cD2e2U&ep{+G@1>QG2p?)`Jf zHz7CNnEE4~oM(?SgF|D`Kfi8d3~I5hiwEK;_@et~tbw9n4=+cx^`e(@+UkE*HfSOS z#s{E$3Jpb+(y=O0_*j7MIpxfi2)@YIm&a6DQDxenULPO3{!~`progWBgzXH$&+TyTC?qU2)-O=oW8*ZHNR473gQx zPh8Oe{1fy;&ik(#=>N|@TgduVk|CPuezYQ{@V7f5>4LzdWG>Dau?*d;+phLm&AJL<$zv$Wg6tjVhXQ2 z);l|mm4ie=g_&<>j)~=7w`GU+(eDxM8GW4SSP-6g_b;6Q{sHM|c^h$Z)%#v}e1Yt} zhG*n?``GmSc|$`|N)Y|za`|Uo3L8@sgW>T7K>Q#%SvW$7191?!=hv&&BGA=!AxS>- zT{)$(T+Ms6vx>yEpKCO{_NTZurim$#DDWkz~ep zv1IyQ$R0GVV1+}UL8Rin?l&~!OeIdt%{#f|vlE%mZz`yqEJ#m8-ySbZz%KL9UD|%P zR3AjOm{{+h+54 zeAaS$`kqk8LE>ZYR?mX>@lNXNEJcR61XTj9Qhbea-TuIn4c5ybHi+lSSXqI^!a`Cf zKt2F6fvJS6MP)ADaXC}#bjk9QIfLB=lGw-uQBC_ zd5TU>&>$3}=^pzs&|colaetqs^Vk7 zcIN6IJDp0s`FeO*Y{X9esMxK ze71=RusY&(P3?+7K$-_{c?&Z>%Bd=Ygw$J6AmS2pmjZ<~A__`?4y|K@@W%6*CyB-( z;~4PziTV*^#bM$7s?W_G85TD&`LWO<7_@icUVwsvMU3qa_TqYnlKXRxA5TvBY3(ba zKbo9o_drSnDBk^tJ@I5-e$V+%89&H73}aUx!E-U)Y>FZ4d7uXAfFe1@m!znFS1^=p zcKt98=jZU+%slScF~ureo@}q}rmdi^PYUH3==P0$5RD~=rK5K?Wy0!H=P-LQO4*Vd zn1^N2LYuw$?HE8uGv0pA4$dJoD$KB)g7-6`au}>`#=g$Zy(My)gFB6}+OKtv!}Lr% z-@VJam~m6oMoS6>b4yNQ>5uH->1jwzJi5MkIaSu-s!Xdx&*X`V?eT5WnI~<-T(Vc# zOQUM8teGK%r|S00`=r;#H?J5CIa3T+36D>{^h%pg5<_B(6%FP?IPA9%b*~3E5z&x3 zzXY9u9W!INCK#2%FPbc)!*2yc>;<-H@sv^L;@scg*#X1Xx*nH(;V&bO!CDUS^(oWo zwX~J(T6=yscNCxB)<6AE@e5Fld$+$Xkd6X^expCfYx^;yv1>HP=PC+DM~^EyU;6=R zNVqRrw;8ZUfiq!{jc&8ibmR_ZSPA3zq;j3k1(iJTwV*Nf_Hq|k9LH-jxk28aZ6&E> zV;gv_n4=MMn9RMIj_g2)3V&bokx@ALG9%#h83Z5zf9G=}#K~L^XJI@p4-iIKH3fZG z&>!q)jrp!?BJ#aE-Zm%TvZ4k6&@BmCbTy*8SNlmsG5jkmk$7-n>k1 z}k+ zuSRZKrrR#cgpUI9#(>NPdI(l`P6$*z^YDe^(UhQ&5=W$x$Y*>>li}kxn#p*-RjLjB z!#{u;IV!VQu_0mOC?aWq#(Mrsq=3ufkcEDqsCY=+z5lbi48g>1=a_uwle*=;XrDx8 z9;(25;GyGIvV>!G+h${`p;~*8K5Vej(HZb3j@vqH!pvmDjrh5a)9|*pi-Y3@oBj`x ztxfws4;ZG{@q-fJFu=LXDa8%*UnV5+JfvfaP1{~apObtW@*Y>C8_2OQrkoLBD!1<-DDD zcxRNOy2$*5=<0GDIih#HvbB{={PJL<@bv_z+%L(<7ZFL!ufP@LBp$p*!Wpc=Fy$TC zm0KROdUPYXqwZ1apRIDMexKfxR}O42Q&gL=%B{D$rm&;Ek`J2CoLt~r4}ZCs{@9bp zzw@}BZMGvqoWbo5$)g6u5XjF`OO3@QFOon^laHnw`!{FOWP(r#4j@OL_ii~^bldhP zlCA&7c5Z52Wce8&ir#)WLTj~I8hZn#&j2((mNl;Q4x<_D#X{2ug=Z_Koaqh4{2~-0V3{ODi-ePT+-ZIWyl*% zGdd6Jy~~5YTn3cxXoR6jdTygsj}>ZJQ0or^iGim7{_qCznv`2Kv!|k~!sO-j)m}4HB90>p{7zv7Hd!kInr_fM4 zPAf%<{K?5|&0UWDUFO*wvl^gE2z@xs)t!%Qx~ET-2eK4XA92 z(#BH(><&aQY;(acPL;|sY&f+GG&bH6uCVR0cm;{U03UfhXJQ?ML9+!xqb zChS;U-49sAy9nV$1uq&?OYPeZWmvZNxcE>u!e?g*!4Ae%tWM3#n|v3{etN394<2rC zAAtRQVWeMpu5^Z*-H(M;ALjG+5T~%UgtU9N&UtzCr06p=Z z9dq-(2gsW2cTmq0Vn~|?cTsO+`SHV=oQFq5eoao!cQAVmQ3rnMfWtqigBc@xuV`-YIK+|R&s=_WAT_o}2W4#3&u<`d7Lv)nl=c!MQ**F(=#>j5mtG;_krob^LlQbOR?XsY4dD@JgbQea8twz^*?c>CPI z`gyRSVnO9&^77a~N83Yt<=WuIE>)wRkZ1Lp07P9}+vBF83zV;W3e>-}kV9vXuoX;-mmDA^}0qQRTgyYMn^H~2dp0KGA) zw?_LXg*>$Ld(YI&mP?4*G+T!bybc!f3}y$EVuv@OaK-&x^{fx#Sj*#(D_|^{+}oei zA&RqB_=xDVd`>~rud{>sG@2G;>V74uV?_#&_BY11ec^BX=~Sr-Lbq}VrFD<=i=J1$ znEn8nc^f8=&{3p1lFCMAE9XWa;Cv7~VJ0z7zGT0##jLmXVQ$XVa!3`>+9*Li>2`CI z{J894vrq#X5X^|Tniq4#Oj=Egw@2E=ZGnrx?ymw5K5k_LhC zT0|Ya_3>mMT1oqhHscPJCG}6JbE~_?iDWt400ROr7n|1>li*S{!FOt!Y?j}vK=xVE z)$U!^(sFQAcY1L&NdldAfQ{c2m8$vifmU1Q zB3GJ>bKs*es5bz^fdr6bB0`Vt%)-Nu?c!-QSxxaFhxP|@9;z>y+cuUP(QoSN>s@7< z-)%^0^4g;3wTY13T|U9*OoRgYfzjm%MsY&^>+C%FbXlOHdj>g<*ZBPdq=WpoN!#66 z;#f(M$f0{Hk<3RVCj4iOI;bhKsFkyyd@YBc@3Uje;r!%O`9vaaz&*Kd8W98!>FI@| zp`x1Y6wb>VQdt7xnu60aEHb>!iD^TFdDa%b6Q|Ff--SSjj733!;^Q+e@&z>Je2tBu zm9y8}bLkB}^6AJHK{sB~&{r)YEdymcm^Uc`uy?Z^1|Z2L`=mfd-*B$GO8@O2$TZ8G z_{846#cLffF~PS1!T~?wrFtD{qB()SWBWp5Q3J_y&%Lilgbr{T6eSk75HL#?13sSX z8|$R|aV>+zRS!$rMOx~@$)leDK7ojW=2)==xlW8Pk5rS3*t^+8x1C?m7R)VGWLSHZ zbmxg=Ib3)8p%#psNk4UN*sIAM9?iXE@@nK}AHG|ta=n8#( zwePZ#z*Kx!lqzX>BFme-ya5b_FTQ-y2|f@x7O1MKR;rgLGg)j5xLC0^c|y7g9|%R9 z4xPpW@TlC{ROudjqypc~kmgj)b)<)e>*R8qY%}|KU20Zel&daLv%9iU$xsB+ z%}s8sv_$N%^$-eN+<@C2M=$ZQGVbihm%&(mn0tpWDm7K>BtG7ayeXXy$U`FL3ip+u zKBcRFFx}35Ej*-Y{lhUo8eM?BNCwR8>~AuC?Dj6L0sLw-T9oMe<0DFD?vsA!%)z=P z`5))911CmG$_}}xv5tmY`>Pt=RLo~leUMtMaTcPvI`M@Bg1bLf^$YDPxRm%Rls%~# z7u9mvSfWhuh=y4cf9(&ngo$~g&v|jWdjs-XARP(y%jf3qV1l}F@Xq#{bM%S2&4xh# zjI!R*x?CHKSZpkM;?j$k-d@fu-h@x=jQli1MIVaLWb)UvO83Kx7}i%0f_Dx_gNsAl z=ju;7@UGYai}Jb=&EfJ?hmrk}>#$pW0wawcbXo(d<=tFKbRfBo9n%YABW4~Lo&O1L?+r`0qEtQW z;>;rD_)R|Nd)}%lMt;v0_XA=U-o})ta-}Kf0_*X-dD`&+*(fHe##?+2!#Mt)Y#51UZa!5wAC4O3-IUpPap5X ziZ2TWPjwpI(QsB$QH@hLBs$!SPTrTx`7578#`D%m_50iQ(YQQNp)3!)M$8} z;VKkFfcGH=E_P4?y*Y{$&4u3i$g|O3*1 zhc`x@?S=6o_jYC*d7H9+;jW1Gk+ZHZV2n$hRyJ{)|YK%;h2s zv|qvK3N_n_<@F7_NsuS|q54RjjVP3D=VMOi#l%696w6({?w2v;3wJBhQA8*6c6wq} zPn|qzM7a3GlyRXS#C1K=2s}P^f316HzNkJ(?{Lltd{F+v9Q}YAxnU*mb`PFvRfZX| z0k@loaV130#`HKpLmDZni@CVJbG8s#S@+>J(p0K$DT9k`OIAuJI>{I>aAHSc96LUZ zvX-6exebYNesB>Bbm>mtXte@;27)Lb;{zLoE34rofF+h-{G54sTJS14^^IU?&o_kJS!>w z-r~P1%;p@pnKZIHl9T$4kB?2+)^FGdxgBr;fo+eQx$GL0PvvcFNaN>+5hpl${T!`# z>wTlzHpA;90A8!`++?VC+LfKjXq{8>hgr>J)HqSCCr4lLxS$9n&?ArWD-^H1J>&X& zea>NZ4MpYvp9THq?7+0G;psGy@vo2pCcs7To(KO>i;XGv)HzryJ6Ng>adh+KV@byf zlYsIlpl}{TB5R{ES~{uQ59NCw%1Ac^fn)u!1( zCC6PrtL2eiB>5dSq~Fd~+u3trO`S*g4|Nj%idTmJ9w^l8d z{gQ~t;!C`W?CF`)j|xSF@CdXYKYFCk-EqKz)MN5JY)}FxqW6|Pt#lY$|*&w9cqR9gqb1FdEwn|0f^F7X?VO`*- z+C84oULyd0eV?)z1iZ0$>-Iyt2A`il4PUfp`6gx;ZQ5=9H^>Xq=)kcN8IDBQ`M%g> z@D$H#S-(_QN;2AQ!a7B$*OV|g(#?(3?WsGZ$E50#qE<@|RAel}>&RvNPG7eNO`GDY zx5uhiUO${+?EK7S)YkZ4kvRu#VcWY8YKP#@0XCg~$)hJ4IKcF>ghNmeTaC3x zoh^neiR3LBxmYx*kR&3NNnZ)yA28F#7Gt<%+^$<^P;nA@-Ad1sB&g%&recfnEX`-R zuMN@uH(Q<6^1kw`${j`!Xh_TDO^svE;i#l#5v*tHCt<`jUz>?n>7r?Rc3>oev*G?L zXZ`KA?)$^Gh4DT@RD2WDBwxeD@iI&b;?w0;7Ys1k=_%gmgQ;*oBfa_+!E1LM+bxp4 zCa&_Kehi3IdP^7T-L^*)5%h_^5%HU_c6FCZ6=M)_Vxm$>i&%2CS^jFkD${F^2Ee$% zc-up0WN63RR~tQs=H?|XGQx+ot%;N4;~2E@NbOJdgsI=D6>)prlShWe`oQ2Oj^C9P zOf*2n_ILWiT&KyCh&U=wOiSm4qq@re@E%Uq?T&eR)D?)5`U8%1)(YiY*QO6p0VroY zQm_rt6#5xWD*WxNt>~P?_Hewxk)Ynjbaj(J9|8dwgz@hV$F5U}oB-j!OO}T*q3o&;=%;e6FB4D3QS+Fs2;emN7Jmj0a zg49A)0BNxnt*wc3(CbVNom0Eb&0+UD9z~jkf&=^7eeogEX(e3BxLJ~UehL%=UE->h zuhuGN43}?_eMiMvPAWW2jsk*J2a+(eKrJ~j0fUHSFF2j98Zfg2<~)FPioWgoqNl&x zgk3a;UQ8nJqMu2S=^I+o@Fo~~11hUgWrJAi@>Zt?yy?DBTDS!Dp6I6{PfW4n!#j#q z=RUH}uHS{k{QV8;lIP-m=kAQZ^L@O^BOP;~1;pAzDUM3}N!k(J3QWMr?OuqdU7t`d zjg5~-3IERO7`@>j4~%h@N|+1n;1M-E{{_kEn+|)yjUkhVPT~6Y??b7@bde`KK6eX5WINb_Rpn_;(?eY zeeqYxpETsw{s~YfU~buqz+t+s_lXvu7lMy;?R9G$(?w@)JEM-l@5zPTZi<#4(S9Q< zwA9TJh=?Hdte?&$g$qRMY^{ER&!awcrd9hEm95zu{2fu27M_n!3!o9a7+xhsVmM@Y zQNNUhfFRl9SAke|ARxkOYHp5oWPSU@(QsN&g6ihRaLGXhTjYrcboa3-i9zBaF{^)% z5N0R69k&^k_RkG!=9q3k!}513M*vWJqXF@%MEi-ERv|q&;O>%=n;Rar+W?fdE%L6L z;R=Y)TlFxaBsWeWoZD3m-MZ;bVk}AB2AowZ2Qz;+fGP!;l7YVfOnqP~9vw@W!Q?Kx zp@9%^M;#h3V*J@(k5fhGpPrbDxL9M@)k`@=z85|ooKb1z#UUZtsM9~C$iF{P-9DJl zGnn|bS)s{fHp2oJMZzWFrxhi1cexSwm3h{CHPTVrjX~vO!Pqt`f#+$FRqF#u*>#KjGqjuC;c?nM5o~C8cprqk z16hsC1aOg7omq5@Mrm-a6-t46F)){M`hf4!P^_ZCX*CN8i6Mo70Cdjts+*R(L{Awz zmB%vpV`hnjc_BGK0cI;sJjU)V!~eKn#Ojojx6K`sLH2C0J#JveH#eY50Jy#(ulqK^ z%&ih|ZVyBCw)P73zMcGj?ZqW;Z8zn%*G~~5R@mbw9s+$t0r;NlL{jo(Lo@HD32$%1 zSXQ~kJazMJ&0j$;v9#WYhRf*r#@H2#K`k2_ArrdPGv4J5e&&w6g4yvy-k#52yMlBs zIbP$rFwAwc!peLM3m7MGDK7%BUxnQ=DJh>js?hDF6eyDbQptLeUeB1m6=5uCeAM<} zwy^WA<>Aodo13b7@TIj1tJNPmRB5vCmtbC4f{{-{*+Jp7_q(&=HD#NY`B%C^S4= zEj)aig*@3<6ut=t0oL4klU~E=riF_KSSa+P0NmUl7RsA3QYF{~PX58~RxnX`%w_;P~Ev zQ)^ZwuQWXJpoCf}rTwde&~RO8s^O5SRtW-^R#YKja0PrlC))HtcpehTr-u>WJS4OE znd`fybZt z1yetFJ{QC%Ywpawa5+wlHzb_C67$p{wLCYCkA_IrKk># zq@XizMvaIJHE0z~POOhVR?Qy+hy@_SIS|!7 zbsP-^_q`m|3y7RuI0QH{=&AJEQdsw-?(uaya1*Zi~JxTn3&bhq!*Z9m`S zy--U(%PlgFLiyKO`lQ@#g(YG7uN&=+(Rjo(__ksjwwsvvlpDL3P2(RNC<*Qn;%)Gf zT+Qbw{(UofIKy77l8tq3JJF~j6{0JxmJ4XYz|y9RLMr+V*tnozMv>#xC)MKkpRutF z%!Z(YyKgSMBqF@y2=G1qfD%ka$6U)xHul%*&d6suwpFx3?%k@(Wo$9sCdcbdf#{Nj zvV#rYHXJbn_KBYsQHW@X)ie7m!;|=!1;i@L?vwdDfY}{c|EhIulCwCTKP)WyZerw) zIT+A`m^a&qPEut#QQ@WF@&vtaJbIsFmxc$Q$dO&^RBn6e*5ld4T58YX4e@ z4kJTSEB*aLyJ{wJz8-tonI$*rx|ehwy?OU~cOMI?W(9|gz3}jf7q>Ljz~oOjnGXfZ zI|Z~MN=8PM728`ZFL*Q{AG&hQg*L8qX9+g#B4+m_dw(Xm^bM|-TlcTF^#~WKHk zV>%6d8%I7Ql{aQybms3r!9{2ChJiIp!dn#P$fM3?A=)hhAI!P={PxXHr@Fu4rCTz42Q%6E2t#h6)jhYjeO_xH~LC|l7cuUg~@N|vVcT4VN< zEEXmS+0QjnD|FmX6iP};Ru8G8pTJ1)+tssRiY3t6 z!b8q?=AHu+<${|1XU{N*xz-W-mP2Ri;`0|TdzIc*x~=D$!z)+*5R8Q22N1#6qDw}R z%hbYaBqE!ktqRHtX9C+)UdB53Yz)ikflZA4_y&d=fPrbrq z&GW@faX+IcXc=cKJ$Qh;1IQ61%gLA6tcL_x&YA!~Vm@h_^`^jSJ4QrrZ?`A$8v@2dXEV8ArNN2v;?O{cfA9N>`}jn&m9o-d zGG6FBKCU|?2aF{kEtUwjd!_&o0Sn5j{iP=o&_vV4A?lat1x2NCug#v|(6KOx?Z$Sx z9;FXL8W!f#OFW15)G~IbpZ97pXONsQK^eHfiXR}n0WVST4SI{zpz09nI7A#hq8NK) zeOL(eOi_}Ux_8b$v!Xr+V!O6h_w{t(X^XMAH($+z%tib!S<@*aK(4M&tEYf8ucPbb zu-*D^dhiR4I?)p}QU*YZv_wwLyWcng#AC-Nzh3@5H8JHrAjqVER-e!f>hbYWlh*}3 z!+ZB#WZE%5Ao0OLu0L+G+&5YB=#gEq>$%;$)T=^46dd^7KqJs$Ax@1IL5ZbOMtU#K zl=p@Z9v=lQMHRJ+m33<`>ouJpdp%@F!*=6*YOf(N1une}ml|vJ=#^CWtrMGrtG?jx zA22Vl*@hA^hk99FJ~Mrpt242t?mF^`7>qT%y|FXh*IU8*11=}|2Cvd*gQv&j`>huM zOaRc`YOcns)fwGpwgR72SGND;0*L6{cwSFffeAhEcDsO+pvHQ%U7AULoUpRG|3qeb z4LaYG;W6FWyFF*X2nfJbURxC+0c^k49SZ7PCreo`Q+DvGF1y~5F?~2;!yl{7AeEiu zj3yNbrr<7-GLeu7@>#FVmH)QcVt+<^ufdQSAJM9KM<^|e)ZwzYip6F z88JAXd}i&5c3TQ)VF}c6th>FHX4B_}P^t=G?gx&y-$Rv&qQII$&-=g$*-|>I1dvHM2Ny)*80;aTU6Y4Rm z*|c)Na>s3uh4PrvQGW~P!X1U_^NqUMyb+9e(vulzp}&Sw;VT)8;A6(aady8k|Ggm- zN6goUttkOf`4nr!nW|X8(v@-^kt}q?abL9oftZH4^sH(1+IckKVW!q)C?8mY;Toi4 zN3IG?%NpDt_?-{t0-#STJM}Yy*Lr^vq2uJS43U`ModI$A%2XQJZ|chwHh{tk`1yZZ z1A%d@DYDlOG2K6L@%+Bi)$7rKc)yT69-H+k`waj%ltz2(yd~vz9S~I_`uerc{poWC zRsovRy37~|4P3rZBy*iaf3vx33g|C)whx2yWH8}k^Q)qel90bsfb0+2qcP`8_37O| zxdeR%eRq?rCO5X4uT#&#ObQb_Gw>yyTDGH6v~WMo6b>~*%wiyk5l`jxU(n}nVR zVEVhPJVEL?4zs#(4zRFsVXJ#bGPFkCLYX`NV(XOso2>)9Mq}3AIqm)Lpa!0)G{BMo z@*^Po;njbjDMkmfM#kSQoziDdYm+$GsQg@nR1uc{=+E6&mfNPts*8pt41Waj!EXni z?R!HRKvV#M0L)|fkF3Es#+N}8R^lk2i5XD&U>XV(p;*L5La$in#&msvoubkA;X834 za~bL$I^;@~B@{9IF8&B0YTEe0;%fI);d@pH<%=KpE&Kr!f2X$wwddeq$t5gIkOsX! z^qT!(oNdK~fa?l@F+@al#~I0mjg&#iQ4KVjM=X_(QBEg!55SVlL86T(;*ezWq%vJXtV}KrSUu+-MN-0>bYrrbSsB$^9F4+C5?#@Oe8Zb{Fno(b@H=B|Y2ZN;&gBAc~ z!1+omF#x2e-Cr-&qjSIhR6V!X{V;U2ePwl`?hNCH5NF|I!7ETOnUI|IlQ2%#;Ei(| z`PHdRoQ9dNjB()D8*%D*$`_mvZ+4_eYUKK=vsiltKb^P2vY$Axaj4_Iks^f2f=3Fq zP&G1k_02TP>aSc9;Cu77P(G$q`${VcgZ>S5WrmeC2p=2lK}7%Bxw29u_+b<7{^2Fy zL5QXEQz0UM=@R3p_z9a=mDD+s^;R=-P>6szBWJ#}M4i`2Xe68+8hdT<241e^fS23kywkPNP{;i{jIm+Yu)l0+w#ln` z;VwNaJhG=W5_S2c)jXtV8iPRRVB}?Sah1Rf#f*#oWTChe;0SgYRT}&fP=Ts?k~w^+ zAb{c(-v6>fo{XOos3!T-(J9JraY5bI2SjZra|HV98*0VHOU5p0CIpzSion8EPYf}V zmY6#NH$@e;QWG{O#c$fcuaimAHJM^ew;|wu5xeHyyWsU;m?2MQqnz%ELOHL+hQzQx zCL)2sL2pdIQfJ#>KMN4U#7l!j1a+=<0=Dtd9&7UW_umo*oI6;!FG7MoVd8Iv_>^sT z8&HQvAO&W&Bi>k$iTU4W(f#a_c#nzwLOfnV7@UhP3Z@s*p$ZTx*rm*! zaP0JGja%Heh5#T*=CT#eIGiH^ta~8){RyPso|W!+rmkSWSWacf13Fr$cnaix=CxmC z3_F0)JkvKG1eKM{Xs?gWABeduLgzR{JhOIt@N_QhKn`NU?)x}?GtAdSdZZl0KyBG^ zi?))zfupdxksBe@ca=v56x=Ggw`=~z>5V2%YR=oY8@Gp+&z{|%Ol@Zx9d`BddK?M@ z?l4B2bl|d(M}*kHk_1?bPz5PAAQI6Qj zWe?T$odb)6vPg|9FFAiYj7yc1*%T8#eh&Ez2lAUR6ISh#1=PJ6w71f+gl=wHfguP0 z?t%D_Ho_GCrl4E+F30ily&~?7!jr9u1i4-pK2Q2IR>xI14TS2<^V0RiAGU z_#bczna!Dt=w!|NIi2i(=pD_q#%se5WjG!TDj01&3-?}1B03G0+jqJeYN8ri;UEdB zr?=PbKz{(3n1BMn&W;s4DHiuFHAMp~l#1zAbLzqYssFb?DaUEf0|=D3YN(T^y^#AwrqB_sWI0;tTroles%OYN+w z!GRvb2q{QH<;Cgmmjgr0J-!+A1wO*hInfj&VN&>lj?XY8Buse4x~UB~Cu5}n4dMZl zm^>`@*dX*J_RpTwHXRm8=Q48IYBjf^!Kp5j%B&9?bcB(#$>#LP5mBKr=>F8@xZd6l z5>oK1dg^S1!TzC`p+k{>C8&L$!Bq$O0VYb}D=0x@h5B9-1@Zo_AT9@X|0F)V<2sCr zQCs+nbcr)-onss!N>l}_PmMWNkiQ`FBd;ggwtPm%)R{MBA9W<@FC^?rSRZl)soRvZ z&=_Q2nL__ORtoqWxS>Ts+&|mm+CLk+TbVC@Q zP@c;fBE8YZ#KT53`mXf7HcP414m44vx%|?}nxSv07n{5>u&}ZrbT4`&-!LR*H4H#x{qa82z@8MT8R0skIlpU6m$B%MmFVuSR8r7#>k;U`Aus7nnCnF6QbAn;;|P@9z&F z?)_-2EXp`(D*X3vCe>dSEwYtx$-baAAT-KvXmAFBtQCku5m4P55WcBwNVX`OH@1e3 z=9KkX!MufW5Fw(Ao6<*ltAwcVPV#vGQO*vor}1wp8hxcL%`z+a5Y6;5tOWsa?L>kI zEPMMn*ULFW#bUJ3NYntp-b^GA>)H2ww>YDJ-X4)@wFORw~ra z398t8vQW0{Y%?r<1P_+qb! zJ-tX-!bj999Wp0@`^V(-_iHrrp5h5Quni1svj1`5aA^D{@;ED@r-y#EEI_WHpa4`X z?&&a5>QSJ=$$J)KGpbmBX=-#HP zN*cSZG~gyyDohx_L#AJy4uq?J^LkO-^JI7W8~#gO!m@}?Uzik~==ypBhep(DB7u$G z+qva8m90D}BS4J)eZnIyxN(^)Zy}r`!-#20j}bnSA)vTU@|fe6sn9Os6TKCkO@U@8_4`EXjSIC_y5Xgt|KXLR~2p^>^U^l|jSX07pb* zet*(mUZ6@046KRR|A?NRR)I8L35oMRvz6(%c$@G0k~1^$gT9N>+Jgu;aB=CR0uWAy zOxPEf)XX?#kWtWp5+Fea4-i`;-hv>oz8M~0nT&sTS+IKh7ax9^S_p4hG~PX|Xw>G$uqfFt^0gYgUYlqrG9 z7B>MCcDS3HM)3T_rn-Yu!K9{csPWuv6}Y*X(`0fG z6!MB2GyxZalepLr_~gTDhWMVlKYx7m3k<5I6cSPkSE(%hVj1ND6B@-MyVfLo527Us-;vN2C*%CSSQiZD7lZh2JOBe|W zxNF-45=u&77R6mz2E-9i1#?V34v8pxXIoS7kLT(l>YFP~9SR8(BM87e3l2Pi3s-3I3T zz$JrpET#PDrJa?JEI+g^xmNQz2?QGEjTxDBK>Pz*l70goSO0|MKO% zo!xuMsMN@`w6A~vivHO)%~34^_wp@^28cR$sk7DZ4@MK2tTYCb3$(B>Q>9`q$d^|j zF4gzP4@dCUf@%aA*#QZ~GN`%vmAUy~E_|fZf?xcrtM@k#_hN|1L!0+EDj>K8gi7Hq zFF#)2!ml4ugvTgAT$U~{y}hYi-7SvK937M0RA)q6kdTl{v}%!4WF1MNWq2={pcd3IhUFwm`bsW>*jjScQ~zbVeM%ol?Z|Y7mGM$Fv|~o6k`Qu%J7tQtU88=DZndStR6RDGVP?EC+mh^Jr}A~ z+BrI!&ZzMP3CTPf+&Hkd7N}a#dxNNNoD|f{qZQFNZxG|+mIlV!B#qYTYqT)_IKX}4 z0B`x9p~MKC)>N@(XI-<&JPva5<@0^MXK%rP&Czlj3ZUOC4=HLU zL#qA)zBG&!?X7aMLDJUI`4$jh-{y#nAb)@hF*7qGHUOD%08U~02_-Hl1M5a>vcgV= z(CZC?l%kd~V#;a*hzSID0wys7+-!b1-eaj5PqRroe`hgV6A-C&@T~l0(tplb*R3g2 z`r6_5P3g$Ik53;m2JS~I3KqAg8ZEe^5mVfNb%V(62W&mwfwH(Z~^u1+bgId&R{- z<+40@DD?FB7$fX?Bm8TPdRyD14@4MWTE3&&kOd zby_geJ2MujrS;qZEyqNwQ6=K+%n9)c9UWbFJfAdxiekb->a~A}Y7nfu+%hi4r3Y&RBoo8p_Wobv>*yotL*{96=1Mffm!YOaOcj+b zT9>=Dh37-AZq4eV+N&|q(ZNey)tjsqw!?81-@SJC_Y0{9fK3C#EbGpCFMz=0IP($h ztFLb&+!MiUDP(ZoSEp)oB_Xotg#V8>)u^Uh}o_+S=)|44cMR3Tjs%8}qi z!jKuTyD&`KSj}G0O+Q~rslt}h$jHccZSYR^a7Fh!nAhl~x45*F&}Rs6hVXz>7*WYg z2-joK!olHTc1eu<;Ri|E7hq>4{jc224S?A`K|Gl$bV~H<>+uPDx&bl@e^2v&6Z7NBN*{nXiwrgxy7pvpe?(vmKyeb0vHkOn%AbLH1MUe3%H?AlybO9er|3KC`1CE zF(OY-Peto1yQ}U|h2E{!pmVcl#VJqV?fCO&=5M}E)$x=|u%c1(cN}|-puMyG-Ch3F z0dNOt2rNfOM}dCBb-MP{8)&pF#i2$5BWpA?oo`8b-GL|+2P(iHjNw-fxVr-3cJN^DPUZN+R-d5{0DKz zJ;pmmm&YCu5m7tcKH3=l+^n6Cj{St$Fo=1vL@_r?Iaeq6G+W4kIPV zsqd|o5b`MIs3C4zd#CE^Y6Mh}*XPR@Y+T$HV3a4>TZ|Qb1A3*RaZwDr)hD3J$d3(= z9sMr6_>XBYEyh z@hh87_Yk4X5h3q4q={y#d^b>v@6{OT06NGKF8=rFaxiJ10T@MXF3_T?NT-mkA_&}8 zHO#4OKX~JOqXBXw(n=W72qY2_=pI(D$W|>auv=HC)G5^ONP;$kpGPweD(Vmi0M)-_K}XzWmDn8Flw;Jr~B^ zz(x8Sj00qqM4YzEmx&d!liR=pqgpG@b z_8&Z2*m`OsxJh+gQvw4s7@3%MkB=SbWdJmwTxLlM7x+^enk=cc%J(m6XkhvSHk^aX z8QR1?5tz;f;2<1Av(Q6j$oD`OFqB2eiwnd4h9qkE4I|*bxOrodNQKj?tI=vAJf$Pn zmYz%nhW54{nQydMp3T0~CKpeq_O>S{9=#Phv)R=qhD%y>m#*YP+`1D@|MFfU>rspE zkO-wonHpV(LTTq#p-|#4Hy1uv*NMlv_WECjgt6(A=6PdY5qNC;Wq^$iPwzX(g-*JeC(I74?d^*@?y-3W?*tz}%EuKP&&&e9F=;(0Z)=eCU?6|q7^4U37(uP)Z z`|wzw@~9b?-PHF7rHRF#MQrR}$@0{iU?l{I+B;S^Tm>w5X83QQpin*~zTAUuoKn%Twc?B>=XO;39x0--LSips=$?M!M%X~tH5=PU* zzt^=Ie0u$3M6|4(7lYdNZC&eb5VROAJK$xfCLwZcDSk^2|d?aaI;R;9;7z&f9N(P6Jfi(m7z z@xSH*u+46n3uUt=4z$tZ|BR6M-v67FLk};x@VKn*aF!h1y1abQk|9AM`dxa~Ib)>6 zT^|-LBqJWeXo?gvlq@i={}Dylm%VS#Wt(n3I=2B+jc#0g-|LM}+w(@<;18@F4AF~l zO}#<}!;L-@DYhRO)Y0{4QUB3h^t)bJgY56+WTx%5-;MU%!)c;MtjEVX4mJsF3nwh$ z(QE5+HRs5#rT)KgdC^m^aIUV!8%&T$KHBO-y~WC6Z;nz4;w<1ypCFl|S}b z7sf^Y1pj%V`t0s|51Y{c5}>or*R%TR!tzFU;rqAr<2dKmwO4Mmn64x%)v`pKfz zenn?e#w>?z0rvvL_$XP_P4;VpZRVWCn^ktmuV1}VDT16)d;K3Oh%3SJdHeVXhgv^> z{(NW7mu_!Bnr|qB=TK0foz3OQ6u_??00vgxwqwzsTwKqPn)DwPzVUHpcpg^im4{Ta z3?lr%z)|Iv7}c?Ki%6})YnfUnE9BDWGT$%xdHCmVcX)W|Z}0CK&Z{qgL;3CN*Qda( zZS=v~@A0?_xrL6)9y79P8^hxQ5mh&JZdJp4AfLHn_v!vJ#K!*Wr{L+>w|PPk^dqdl z_P&wGDR3*nvA-vys54w`vn*AaO_?F;$$IW^L=#QxbZ-$RruoBLiN-|~EW>mrzKb0c=*VaW z6a}y~#TU8v{ydZ6RD{h8{w}-v>zJ!6iF#54~U7kY?~)C_F%*mt!9k` zG&(f0uG^7nLWm2CpPA=sV0XRDcb#sp4)YAu09Fjf$<@`B>Srlc%#eC?0f3vq)qnvM za^(Ydf}1%~W93p3$8 zn2m*ng#}FTUgKgIJ^(*4E+&MDtxl?@jRJZ+w4qME?YWAweL?Tt9QD01pL`!RT>`GJ zR8NO4Be`T$lOND#fh=sS)&mg}L4x5|2gjxUkO7&SH@8LZqRUA{|d_7PsnGU}rH|5eg2`Dhzr50)fMrx<+@# z*#X@4MaDzlxwih9cnog{hRDLJ0L~`AGU1@7l~)u#O1B@D;(8iww_o1bQt7n2K)PIi zjoZ1{z@*)XI=BKiVx=gaobUJWIW2}NCXGZ})T}b+LIm0;Orp0on2zvDV8`SG53Q0J zu*tu{XHQB_wgTulvFi>$goR|WAaG;>E*!r7kBEpr@Ct1(G!Lj4)*ROmxqS2W1qgPi zLA+ZS0M!S-x#IeIA;7MW9yP&Z_!bBV^|laZli{rP3bW;oEqrxlbqeV?-O&QQ=PzDZ z!KKnaFc8UR5jWr-&usJn!qpfq2Pw`Ha17x1Qg2qvm273Svl9Aa#t9_$y>yG?b>m!J zT?PKIH?X(^h8UQoS7EOQt{F#8m`njgZWpPq2iTla#hM}-b*%Br|-M9RB zCVT=I-qWk|Z4f6uY)@6wE|(%arb3rvbA?D0;~taWhAJ6av^0ixzA=@3NNt1%Gjf=>syLIP-2k+~`mic-h~EE`<1 z#;>N6#lv@YcGBo+Qwl0NTFvC-72)(LmzWZexE}NgWV{Y#v!yhvwzc z7Zes!($WfL4&4}@5hDVCMyoDkKEOJ~<{!z($|fJfo(F;9Aq`i0SI< zL$F9qFabg<3#~MGOU_?OlgPif=_bGo8i+U%=x$INMr%FzfRJ4xuIPT7l7gbuD25;0 zI;hQ^L%^4_nCy5jb_7;kf$j^3UTy-4pUA( zz&@q7TN}K)o|CZwR6z(12$4W3*gO-V4TtUVJaK6-T^0bs{fqV#uM}!hb;Vy->DAvbc^Vtt!qBaKd3Mi_y1MPjSY^og z+0<;1kmWv5&A}=p0B974S7L8(ugrQ8!F#aJ=YX7u*6v(0;^IQbqwOuw6Hi57lK)v- zvsm~eGE!7upFCToxWKnQ?GDb69%Ez~T)skyKM<#ha79HugGx1b#AdeP7RXQLcucy_ zcz9x~s<=T0UC!&4(b=mWo|#{~Vz%~R+7C=4FK?rDGrjPw?J`<}4QWZ8Pc6j*wCqyFa! ziHYm46I4H|p7Oh$9T?TXw(allzkToC;zu;%Jfc0s%Cuvn`u*y5g#AUNdXxWQ0(^=p zQ0V<%dB1*Ym@vVPGK0v^fJ;nEhJpW||0jw0Ura+o>;GmNhWt0vFzUaVhP?~$;{W{P zPvk!ITK_>>GIOlik#hMqe}YzHHVR+AqK-#6xLT}{YlZDX{5kJtW@km;8M05+T>a54 zAf&rLjqj!qzK4$R$nWoRXSu9RdWu%t##L?5G^i9cw2qK??JcQZjOa24M~4WuH}9V; zEcR#VMXM0d-V?qf*`=wXZtZ$Q zX0njB+h)VTeg_>a3QCCw%1_$q5W$0R(|+Do`pDC%goYavJlq!MdZG2&`7>Ly{8k_M z{W}ahc{9;Z5kbq$S4ZMup2M||4ABw_dXr>#QC^cb%K6r132hgu{QFvjms3;tgZukQ zQ_sGd3epEVkSzI$tJ^b-LB5HZvyO4Ixp;j8vtW5Eh_aQvr zXe=v>T-h~`+vzpbWq0noA)-AtlDR33Qsz_ABq)1lDY?z|SU7sNQVl-dI*L*=*ia0u@0POg0QlPwhR_*(%X zwZYck{LjF)f!T2YRnO-N4+^S__qY-T9RQd%7(}doFsFU>0?_Or(ZhJd65DnCp%0LE zh-R~B(a!G$Jrr!>X3^9@s&t|4s>~F~1FK3Pj|vA8gFHnKEcevYt7GVc23rV%AwHun zNbWzR28Q(b*l@tSw1K%@9WTut0C(}4o5`U2IoTX)Mc{37Znd&fU>1C7NFZxyd}9Qp zB9z z6ez#R%y6a5fg`jGUkL3NfWE;s+^XE>)$fUugj|%DT~8#Fk4Clgs2e6ay3SnFHw0-J za}{t4@Z=VM2oul#`SA>4ofS2llKV@B*d-5tgXikQFOJ+8k<#CXqb5}7LNgwB*n*uduU##w6-dp zI)ZStVH$eH7pYtuj_vQEe7k>((l2!SZfHa>MdN8kw8D?H3nJFvmb9!CUVL(uishBu z*qV&-4GeD=6A{tv8k^1>;wx@8WVVJ^f`mn5IrL^AlyLK$ORwC4)tDSSFzSerk1t-Z zs9IMhoMj|{NauRcCk$0+ppR(+qx36y>6@j~_UYZL1F7>%OG^hAL>5)RPMU?^KL_3# zAhBaM||)4m;III*waZP3QWRN+!+M&zjUxH@D)<1#WG3_xlDS?;SwZg_y6o*vvMtIU_+ zJwe_*H?Qh@&Op0v#>*Z7ES@Hi1eB!EEqq$kQz>G_)&4QuX9C zTywPa8tKlRZG*sVR1=DTDDZ_)rTGQA=uHolS|pvnpj|VD&K^(o)P}n%luHa5 z$@26+FKL)4cEN%m3)+>(FVCpN;W+hMepAVAneM>?PY(59dpYRn)Zn^h&S9E@i<+od zr3*-`O7tG))g`}qMMNZYN49uqle^lx^*1XHF=`F)fx~x52J<9!FV7C)P~8Vt5g`VF zbINYJg8gr_3mP`Erluwht}ipjE<}ZSCL^@CG)i^gjwkPd5YP?E0;G8mF#@T8_f8kv zG(9*hRC?)Iym^E#5otz}>l2@Wi@RQLoa5u-x?yFJ@L5fjk`K+NYhquBhvk9G3ohpZ za~H(oEeL63uZ;o`N_haq6iwvlN~8pU>Mf&Mg=}q^iP+3-%eQac zP-KYgg1mgq`7VV}Abx$MjR6NtBzN5hlLbE}I5QH9Gh2)kHxT66{|Df_IJCOF!n znRI)f?(L(C)H;X$Ioe$POZq*izpO8)7h9i)CFIx4(}IG6lMCP6lilSZ+wHN?dK5Hf z{ld&7Q4G>Ul3tSXHyXcpWf0?))JRjWJ+GuXm?HiD;8+LC%8H6gHtVu}E;siMTx-x< z5_^IdLRW5D^>Z_P(_iKp!08*2~HRd3(4ZYyvi}JRy3PSQuZv+HHrKED~$8VNm4mr6iwI`J_ z9MXk28P#*+hPJ4xcm4VGD-TI0y|u6`DWbDe;?rjoC*ziRiKv~YxHR0XZdAYsM%H=_ zg#3NdXj;lyr654NKN^JyeWp=ZS0#wH{ZzeejZ~~6LxT?HmSZXMs;GM zdPaeREi*%+PLMJUre36^K5q3MRS1dmSBELb*b$8@E_kW=P_`*DYU)aO#H)7B_S^QG z{2I9aLXBVVH?GTH71Sq#;0O;K!eO`q5g|ADIWZ94gIrXXqh3ix(+Vkk$jCB6ato7$ zS3*b#6~+QV=pDY0X71L_?s7x}M|^%~woxLI(QCW*npk<2j?mzrn2*mR4$STzxE+W% zHvJXIGNjpG z9b)(3;HTdcPq<%U+F_afW|VDK!g>*7M!ODcG<7xa%}>TWd@+xG4M#B5Meh)H9g=ES zcfam2pTuW9pk-t#JXr~mb~4{W_rtg0qjp!igG;|+E5nalNC1D08XGq;!A;K19lEUP z_(S??U(;Q^G5|CWl%V)<&3p+KBWa%A=iWh(kbcoDXuCe_4+T&q*Slz(vE8JxLWv*# z#5(kYgDUpj7xMG*ny;a|>c{=f-cThH)Xnh{;CTjx#c5bHQrn4Ii+%5ydPxLj)qeO4 zk3-*lI2JD={r@T`qD)*1)-I}?|4Sc%N)Kd{JbU{-WL|Yj>cb3SKKDG9gPK{QVrx zicq$~I01j?d)Y`(IZYPSAKX?Gq@}yJV`shPuc)CvTU8$8AGjRI=djl(oq6Ki>>%Md zbJbrpygsJ3gw`Eb=9ZL{^5DVa&NY%+;cSKd&JB(Y|8d3X65&|HVw-w6yWRQlfmF&L zFRz;zPz?OJbm@5*B$^g)K7BrQyA-?-$816DUH0 zobqJvKgvu&_nK<0I}d0{IrP^&#}fje1Arok@GwA;XM}Q)O41~7a(>ocZdzn%R10(qZsw>c7(kO32=jJ4=|iI<7TDu{f#1K znGy*1#q8Vn?+GC9R$ZN~dlqk6R3GAkNv`ei7M0ZFw46b5n6MHq-oIo1B;)wPl6#3J z_7*z8fj@MAx|O9~JJl_|IlD!gjCHRV^IrCN*$)Talc0Oir85d=0xv1p!sFeqyrg+A z{mS~arV9>3!_zrmWk?fhkyci`+H&ppqUwQqhbnC=8e9#R)9s7-p^?!FH!iB=yRjNz zfNE|#WS{%zs~X~v>FTLp+l_N^MnOaXrA!Rb*;}TC%{1l6TyF(oQ2iIHgd3(Ckm zfLoaax}Twd&gyhe50oUb%Q25xFFZ#OnjN>RkMS9_Z`NMyp~G#Qq*`hY&KjX_bHmB* zym~-KS_b3?j&?zA5o-}}G9CEn`~CZdH+Q>Ke6f>^X}u*R>+LzKm7z}e;lpQcS0MK} zt)-`)fHQCk{~?9_8zW?9?$u}q|HYtZfjL{M^qD0}qC0Nnc#>Ph4NzJQ0JTR)B zfaXf9VbewVp~wER#} zgc5&C=}K_BWT;%q;VPb)o_^o%Gi3IA!h}jkhlQc%-ghI0+b#-5L-oQ=fSL|b0lj;q z9(r)@jPCh^aUYdD%>D{)1L$*x;NlmnpR=U*$V09g+J_<0;s?Q*T-lPip02L0<>uHw zBeb+RjT_;z$LDkBh9pnALQPQ-GaMd}810JBD;Rl(Qi-XaaRAy^VSTUny_phtji9fm z7bBCGIawV#t!&MHQ_HE->itc9NxC?@>JJ`HQrYdqJ`ns{CmYXD67!y7MM?w=PsYM% zU2B_t5Gp8ORuFU-r5^}~tI-P2rwHb}dn({v|4Ud=#xHs}7AE~e?>cEcKa`&R6brST zQodZ|Qxvj`qxhf7-!O@=FJA2kv$j@yuo~yBLhu22EY`pA=TeX*@c3Xo(ZBwP@;o(3 z&ZsUzLP(-nsEUUts4b0NV@XKAv-&u}Zs1q07Q5M>AX4DTWsJ3myTQnDLs`N*+f-1U z9PE=wx@Hp!X@P!_VGx4sv-w+D{J%r9(^O@r+>X`5Xw_9VU56s8<>J~8#v{Ji~y zIjrVxZddI~eIBAHbE2f8v1C_J(%N=njqZB1ph<#Sq9pMMcVRw7A>kfoo%YAzgf~9i z&5Hpa4^Nj@b8Znx^>Ag@UM#mHTsb7>2pr3RuLp@HKe2Ce8~;@cfX{G)P8#dGnEDKA zbsX&Y-U>+~`^_;yclVD2N;iZ@2`M6qsWR3Ip?I`NrSz_W6CLuLVwxGs`i9GVi}W&uK2yjyFOg(D{$T+Gs=NbKbdjaUi&Rdz6~LKQ&3U& zc+#ZbDI~prpAEg?H9~-^&z6GURLEohzjd~B(j-;1J_AtaEcZ#bHtj?&EnXlLRrtZX z*1=3Pp`3SGjFIcE|4|>4^0znr`Q7-W*M>Z;j(3uh(y$%y zQkbV{em=tab(rSlLR8sfO#L67(`duRMd#ggPlz+iEtF4h+Pd3=!pdq#8KNb14K$uT zO zn(D5jwP4*4Rr+Rqi(h3;P==OV^qg@*0;V)oRk;~H9KEA?U8mx2#VQm(l>9&64(5YU zg?{hdS^DCwSS3(2!**q2l9pqC%zIaKYlV^_ChpRcNrs)%-RNape!oVRwMkz+j2HUI`6FjFA`sGfMTL zfC(*-Dh=kkC*5b@;qy2G4^sNtd6P~Umwv?If%IdSHB#JZ?RE+(}1#?H+2FG#LP`t3}yS?$( z|E=IAI7A2AZ>RH@8!l7u2gT zoy>SaS9`=6U1F*S(bJckXl!8-b^-KS=KS81c6giP#RBP7yF=TXn_gn+WNdU;kktfL z<1w%EPfZ1PX#_*%6!L1P^F6T_1eLyC`^&pCWoT;>DRi}`-Y_7L)BbCMLEx4hl>-c4 z8{v0b6L`neKhV!%eee?L)-5l88p%cGWETaUcagL%w%bxZX)n8Dcsdj{JsLkb*$@dd zIrFRCV`CS;x+Fz8JIvgHjsQ~gYxe+RE-80+-l?f6+s%nc-qK?sS_k5}M}Sd&lm42t zmNzS(!kk2rDJLO_8lOd*UA5F~zNt+Xk*eEHdk>XvV?*WJb;vOK2>fVF6$ghXFR#92 z8w$YDQzSD*cQtKIbwRs0q-DvuY5YDwDMzS0j<2SNKl0@n)PZ)!;A!ZN#PPuU?VX?9 zI}aRbh`f*e0J<*&mKQ9Mg+E{fLR&a53- zsXw2NPFr^y|9Z+|T&2$qFi|INji^W}tQH>(=po+KYE0=`SXk<>Hc>>q_qcBCa2**M z9~Y0N`a(;RGuEH#4~mpr=6_CX4M5DO`!jotjlWW{m5~^jkrV26j%E3R=U(!g`jsEg zk`ZMSc0{b&jL{ooYSAV=x{SL98?*?y7X*IN`g)8Cou}C zyd>A0m6C*Q|4c?X;Z%9`%tYaU8ISR682T}Pus-19?sh(?6fUoREcrgX(Py$Nvd}=e z(jBGea+3f;)r>(a+1L2=k`bWrF_?67OT}?6TGHB&D?mmeX6anN`EJ`2lgc`?RvDgYLvSbuP?D+_U0|EdT|c}z$aiXmY5HHpH{fMT^Cx!e8qb1H23hqX20u+ zNBqtiHJvz?p<;sRY*`$T zzC_?IMRGIWRyVLlaXmZr;IKQxfTfeUUa@t)+U*%Z!hgu5!l)$`&(Sf@6d$aMjL&gF z%xXG@Q5RJIIfGd%qXN@@W1?*-`#PlGWA~8l;-b;v=E=F@s5w#2N@J+W3+U6wtvx43 zdKFdrOLc@6=`pYMPeHoc8+Gu3tX#Gs(dh#GwAobygAm)Rs>xlC%X6JL*9(TLv!;2> z%jI^5rA&*khov5TVEp}~*$Qn&%qcgoODs}tS_2RtI9W%$_Ut=J?PXYA-n;Pgv?__v z)gJroQIN6dkT-I?qUJI%hWd>3Mlw{g^{1~;E{@K(5jsF$u0=fIdO^l|4dq(DeJ&N0 z(n4<=#>RB*n@g4#O*BbFF-~-}+B=iSBDuRE;P>&08Y)dy%LW6*;>(01 z2K4wM5@Nub5nIgo>lWXUwj#0 z4n53ef;mN~o-*l0`>aFB6F5%Y?Rzcb7UC@(0`skfT3e4{B(Z`XuOjrcJ!ZH4N)X6L zPVfdQU$ZWZ0TC&eDb}-rv_yd)t4y57)pI z<1VR&Mp4k$qfIEmL9FuVQ7ehmc}BsupU&7m_u&FY8~V<2w-SiY=;V@)K|dO|k)P7h z#p6&bDr5$>kH|(|wKO$hyN#R4oJG8U=489}D1~Rw(#uV@CO+eAO74Y#ovjK11f&4| zWm*aN==Y64^0jB}a~(7p_j44C?Z9?E)z=W2#o^ zn44NVPaN`gyqu7VEJgxa4S4m!S*fX`Lo+ulYIt01Q@1xu6)9V|EUNgoS2Vy9GM<>P zcb!~#q04t@B6lCWk2TTz+gpivLab(^4Hq1R&^lpy6@RF>%Hx35+|iM#Frwl5hKxdL z?9A@z{xUa~lFsd|_#JGd+KUqJ%B`vaQVYH5$UWlB(rL$7bM?xZ`T0iG?JD%Tzb-mT z^cJ*GV~{;wqC<}l$&EL~=V>&ds}}uV&?srC=HLW)4&5$sdaYJJv@NN}2*GlBlB}Ym z)sz~!9|?t9jz4Xt%_JL+I#sBct7{dM3N0B6HEYbIc*DhN8RvFJ32ORFx8bzxS|wVI zisw((M^dF@hoE3&_3!V`2jy^PH%5ujUPGbaWTjoAG&H+xPWNPJDu@?8fFW;=u4l}| zkB^bUgF})pGCfoq*QSTr2W=={G{10yJlz38wu7r`x3jah;!#IXjB_=n2cKrD5!KrC z_ukbkR8e_vDYFGy-&?+o`3pbgR^3`LN9IM^OB#g~>~Z5S9nvTBg;S-{`#qtdtKTv- zHxLW_AK&IT{2#hL^Te(ua*nj49Q!XG9}@j(>v~~-Ox&se(8=j^*e1EK-q>dIMXFlQ zc=`Ncjp=v_i@CWuRM7{;Y($i1Y)dGk2+@WQz*Sqxi?q@|y2c_;6>3;)4oSN7T$BEk zvtCVQwbQ9Gvd`3F=w;)>_l>8mo?1yhBl$9d7n;6Kd(4vW+6?@u?+uxl1M z#B3Vq(#cjpSNg1AR#xc(e9x0wtn22KSE?MAJ5TXwHRd|YZIY5x_gluddlOw8xJDCP zCWTT10;SDJ2@{s%Pa@y)LMRiJ{VdBfi_ig@Up>Wm-+p|hZNFt>+56TFQ3-M@8r>k_iK~~x3mqUib_iAEoQ`X zSoW^T*g1^s?JS7mELjcL#z8aH!1!=E4Eloefm+YcUmP~>U0XZU+t%6n{BTb%%FCy4 z?wsUte=!i-A%S-H?%lt@b;JsAfpEj}O167I+oa>%f-WcQv7Hz7*~&z|PH|2s+7?kq zE$GnS+B-D0dW_{;JqWRI7_{-Qz0k08_x`wED2*&!EN@gkpHez8-EI4>wr`+f5geI8$ z(y^|+=)&A2fggA*9U-zT^#lr7m(*OjJN=!^F^;s?Rta~u*?2Om$ zt6$JmkJGGR6*tf7Xd)L$^|eN*Q|5Vfx?hH^%0uIfjYpn z^3LhFsBd6sG+${-DJYaY8YF^|$#32>4Q0qtfwD#M0y6F9qWp=e3V`w^DQ^KjOswGKJBG%e>nRVY~M z`^2~{p6+gu^yX^C^HNla%8u-%#{meR$MkgidTr1%@?^#lrVGA>I0xn+%)@hW@tvL< zts#QiBN=7T}JK>t=T z+GP9Gz+iT9v3YO1X4zQC`_ax{2n=wmcW%6j;&PVDfTg>2xa+poSIs5O!|_?u{n9^^ zfM8*1k){2Uea=b|`QY2|d;B^p*5XE52XO8V?YGS^{!2!5{%kwIe)S2oa+LX&K(v{E zWW?hDRGfG3SitcI`wpiDmIOkXtg-X%&X~ijqLhX!b*{cuTeULB`;a(J6_>`t!3nW? z^*JFy8#48`?_k<Sc- za+6E_Jf&hfvSLR6f_I25XQhC_@TI!GfdLb@J_cuXoHXy*jJF0#N|yibeuliCbpiSN zfBTLe5=TTM!+<*qolQOP5JLk(D=JN%C=}XyLp`vbFRmS>N+Q!QO}?|k@>m|LbET_E z2k!IE0Xr4k-2JG&cSkODpAVDt>WMuUQ!m>T>DvnT+*MBg-SUXcmmvp-8 z^{K>6MUh%s{=YvWOP`)V@4E`MtI6Y)1Lw1YEjIxoIu?F>`WyWBP7dZhAQeVr=A#EE z8)|x$i_a7*=O-)dXOr>pu<@~bUs~UR4t!q&{j1Q_>$4DH6pErT zU#8*1w_1H;e<+DKx5bwp&!3YA!x(Or>;$_^1-2onLN?Ahu4jgKC;Ys9??IIy(weJK z(61{i8=DZ^t_RHA@JnoFTDDxHf0~Xy&ZG#!hx@0hFzT`P?NwCW(LD>0s)<9z+W4+_ zk6Uns5%SC@BFx#3<;dU0Yvb*R*r2>>$#_}4)ascmNqjwQ?I7DLS00g(yu*JawHXZf z+cptt@#Q~#iAaYP8TtZ4|Ivq-z90|0EpuJG>k~-)g5^bM)u$VetT_;w^*=NGH&Ipa zc>XI`bow_~eD?>Z>1U3iA!{^LZAH&hNJ00xgrgFEqc>);Dtni9e%}8kN?(@Y`R8j5 zQZ#rPk-1(qhi6@x`8Vz!^Im*Co_6>BczM3Rx_rGjxXNVjR9E5U(wX){O;mi|qA;F< zx85CZ6zh8Xp@zqzo{?t!ExOBC|L9Wo!J!IyYIoH`ozVY$(Rb5`V&7iCY`rz@S=bO2^4GrO6rA6K*_R)KE z?-v>Oed-CLU4)^^G|s0uP|x;NB{|G5{MulfyD>4N1oa|LH1c5TrRWNorJ1KKb`ZJz z>oPJiVL@d6n~#$3irl|Rw1&g~x_tW9{iM$_ubxU$QU%=MLfE_ zFA*2HVRNKmvRU1eHEQNh#Pgjtmg`4waJ!~r?t}XTceIKh|FaRO2zUKu1@PINAAt*_ zcDgSO#TCegfAf&rC@J5zHU&jRwGTALk+D&^8DcnGFd+dRSAD*s_|nhKSqOj?<;)iLeCn09tXI=A9REBbIn;)a>7)=F zIrCn+@~C7F>pCmgmo#G^lqMwlZ#!&E=vi~FDM(-xzj>uy`eSE#)SlC zNdF?DkDEo;l7Qu(`paJ%x6x8iEqTa3VU4)msH;_BHh>S}=;XY+820MI@&F&{yEX@w zfq}vNxxy=w-=mku;bkeSZjwRIzhOw*3e-#`O^{cs=5d`2L@ zFMdKj=Z%lQ9wFWW`^iHS*XdGek_y|kFH%sqmc8;|q@ui%Xqw^$1;I31?X~;k$B%p0 z57{|58Qt0!$bS5!Q?K!Zv?dhTe`$JqcI@`)${{s`_Tq>ru|@jr`$uGnWJVKdzelYR zy7bdDj4vp%D2Bk7fd*vdFy-S>4@jzW)@oKL>8hl8rbrSJP1ka++i~-7JD&VTQ{LYa zi*wPxsp(q&EnYfS|1Z-sr(c5T{rv;w&YPUDlBWxe$GcD{vi+-`89p1Nib0N@=^QQe zB)wrXFXW#U_RksR7knl)myJA_t{y$28U0ZuoBY))|IKN4puXP2Xa42!&4k2xho78> z??=##R$U%;>CCTkN;=88&^EgH_4SS+labMqc~#77%P;U9ArJLBcC6US+CBv)(q8P2 zXl_h;cwUc}#QsSmg0wHaMK=+1n&!0ezZ-$MzcX?m6bcTIW)hC5tm>z#&+!>;Um`8L zbJRaGYR-m{(PFH7lH zG2-M+v6-1253iXnt{vmE+I}5+nj7k0Tw+Ph>0M0X{-@m`OrbzGd0xYO9EZQlO6x%7 zpj4W4`h<9~n^I1hf&I+uO?8}zopW%?;9{=o>D#y}!HZ2zQE|~{T-12V?}L!o928wI zt8S++N;_6#6SCZS@$zr&7ze9r&6Kh0^+>TvS!Fo_DTxH|Un4LSwQ(Hc=Otzx*_~Lt z8p7ahfB5hl3NrSVH5DV7t#0TD2i};O@Iq&DzQ!pP>K=&}wPV zzu{3xY3=;_xcD+%kE&4tr;RDGmC<;C>A)g~c^SCY7ZM2FEN$4Qs{Qh0e`;)`JxrN+#%$%8@Y_tIKyO z$A>u!eG0fqsNTdyNfkS2{fv3L@CRk(Y9fGY_VM@UvlBB6hJ4YgQO+^d*d4#^3{Ljv zJYZQMxt$O!=SkW6mjAu(<0nsc4s26J2k1E+OhlvYUm5l@V~A!r8!YWh^Rm{&v#;t! zu~o*W)79$ksZ?u7ztJY+|M-!eo&BeXw5aGUKzNTBm%BM1E-V5;3Wxx1RmEB$s2@}B zne=FY&Iaxw|K^72;aKUEmux^@^z5iFgg}G$$s?vhAhaa*n#>Tc@4Jqdm z2Nt>xQJD+2V&kfeB*YAl85^y&Jx&?>q>)R(ON#7)-a{^a_!EZEfT}d5p#ejnBDxx7 zVwDG4iQT9L;ykE^Mn1kj_%l)?a6Z1AHb6V~6!~}d><`h4(4j9+G~;H@FUP<7O1=(8 z66CL+DS7#Q5zR}AA|xuRZB;YJ@+_SCHA(cvOGSHH^_tGg?QBxb zL{d7sz@d)tz6yD9q^+iAhV!oSi87~$c*TSkbaY{K>JrsEmeG6WuwM&k9=e>Dh@z#a z=JIB#Tw!AIoIFKRygCWNqmZ8cJNvn%XTYHQP4M$ag1;@|r%W%WS>3t!sz(`x0|+|4 z&NljEP1HBWr*?P8g`oy(SNT(*uv`}wd0n_aog^2fjFro(# z&rVm1l#j;>iP+c_E6s=qaGEzbEPliUzo`*wM0ur>=UC4TzP`V+z2S5~dbyKGJj`{_WB>1bYDqYJb~)%q0fRkO4du`R#) zC0KaQVXZ}iX1|@#IzJi7Wbv`fiG;%*0}TzWVJ+(FMaszf`@4}U{QRjSC{H^W3#VOu zNu@Ehr*kYu%Qvst9cFKwRjw}4FHqv0y3V!6cdi*y2bP#OAAY?Jeal*-tZvRnb92cZ z6IFH{j@R9jE-PcnOy=e}3fpu6G##B$`_Lf8+i-k-yw`YD4gGXsxhm>!dmuBkKh#3^ zcYl*j!pZ1?4IT{{9UZ+4io}1_0`x>&?WfCLIP8FvZ`s)&dKQXU9|0zIf0*BT>EQ`iQ72YSLnONvqld_g&TG=S5Bw!>JAT?$}x_ zR%z8SXsp+@P=&|qgpSAXOH=d6qcr5n^|7WquU*Agzpyn*GREN8%HzD_@h7I|XVd|Y z|F*6e2uBSL$;B$K_mmtQH^>!nAKw`2{@ABE?XN#l$lm(6sag5Wk@=T_5KU{wONN#} zxA2}}cS#`?@4nNQSdW@#Iu{m=|CSuOknleTBX~{!V8iW(4B{7x$OD1+$2VUq8NQtT@ZqhH;3$oz(_i_-zTn76BUfmDz!7Cr z^kyN7*7@7}`{eu-8Qff*5cTX;<;e{~l6Wm^xnyRyLd#0JI+fU4C+GL^NGupMGtGtq zoC``{y`3x*eXILuzSXZLGm*L{z;P_XJUb4q2EYrrdK}$EVeUE}aT{U?0UKQLZPU6q?q{`xvN!4>LpXVg0KIbhTb;1`?qmn`ud|&n? zx+yx>?N4#7`R;|dfTdM&jdxgVY# z$0ge$10KOQ+Bd-NI66D-jA$xmkLhbc1+i-R_izozxWrcJnb|G=4q;-{yz!Dd-@ZXM zjIj1OvhhfUmrx?q;o5%Yi%+91)>=p*ias8yhR0)%7kjwymS1P`PF= z&!mzyP(=*|ETT9otFYy}U%S=W_Qfd%c7-POKTWK}2H4O`&u?T6Mbc0!j3BF+WF8HN z@R*S@P~L~ZQ?R~%*O@qb$9Ofx&UewXJ0Kn%EwH2Z4WmgGEYz2X-Yc-8njjoL$S+pM zeFx6U-O$vc^>gC~LoFZsV%@PO{~yBMI;`rp+ZqKGP!Le0L_!b*>5wiJ>5}g5R-{uw zK)O36q(xd9kq(h=kZy%V_nGT?_Ph7__Bq$}{r3d0*7~jco-yW_Vko&5>we#pafQzP!gWt#*b z?c%xD@spJ=tY;^4RSUa%`nkR2WMw%Uac|)~7DS?%jEg=MYYFN`r|)3ReXN7kPUn8` z=cBC|d8kZbL4<4KGYm5r)$3y*y?wPhvPIwDw2B$hokB;y0kRj;rly~FPrrS0Nl8iB z**!VyVW*@VIM!9b$BV{;Ew`^)V}qlf2H8^MPPSd`gw!038m@+n0ybc75j`_bV7TtCg>}ybwY5W3xKtgfNC6UNP<0J1(B?Mz+-* zVH6k-g@x1m{HnYH0{ozm={it>B^h-I?m`C6q zD!-$7Fy&M1#Kc11^Cev1{0`J5KWtBmDv`%>Vwu5vr}GD{24hcJm8G=c#1P?zUw{&e zKniv;*L8oUQNMJ-(Tyj}LTUVO{0~CO$^9eAi{-2i>vCL>McAy9#sFd(OSvWvQJK2t zrSU!slT;ZmNM0nnLk%&1s(0KZTgS$Dx7d1h*$!WH<;=IK%wt!~*z+J=ErvSKBzSJ+ zXDUza;fB$;%|I4yy2qt!P3-RA-n|LuojVN}w~d=4a$Pb+2d*EeGV8RY^1pHTI~340 zq?_~io+rJD``jt^1J?=>+S-GSADec4KZb^UucpcZ+e1mNRZW?`y7D^r*mw$=>RIGl zj;+dKsT{-gDJ)*E1!}>w!_{3OPdM;KD}_Tr+GB?UqqiThMWKqM8J-l3^m@MydviBS zFh+bP{Qd#s87}G4Kv-(x;Ur@@K%ez~VKd|`h|_~DjpQCSGb639|DM_dfKQHhbJK$v z>nqW$wi{8T9pg#{b|4Awuba4zMG4xBjJlOoGJyosN)u;5@zoi{MH=agtM4MNJMS?< z*5NTjI?5Fb$%Z~3`EGdk2wBZW8Ot9DwQcq$2_07T1jViDK1g!&4F%||1Ic6U0@yt%}6RkhAGiWEouw&%V5g=({r&*0}50#VT^ z$L{_6`)jix4wp`MRL@ij2Fk$g`MyEW)2pi#SEZSDkUe1jF|a}Q_3MZU3bB<9$v6|J zvpcImm;F!wPgf;N{*Yyk_1f`IeuhF@?2yd9=VyED!;{8{ z1I#d--@jI9m}$qB8*;D|mI}yR+1kt~JVl~E8#s8@JE{a5-UE&~!Qt}q($Y&8^@Oep z-+(4sc<8W)2&IrO0EOVNb&eUCRS`47bvSEhV*@D4=jRu0IdQo>1R6&px7?jIjccP3`Nj8iPUKN$B)N^xAN#NMtIRk9)G){Jcy$U(R5UC+yD^((VLR(dXGJ zcijw$N`3d|oR&u-Bld$&s}JLXm$^4)-`IXn?1wmkRlxv!@Ves2Q7I-lp{qTg?A;w7JJ?7n)_;n*b6USOX){PzM~bS`H|GmoE8_s9)e zTe0cpnI8zJ>5Z3enp{LbFGZ`~?-|Ip`1a-2U(5VhrJoUMDUAk2X=ogkzF?(T{IpJj~@2aBUJ$m{<5Qz}9xk&4u)Tq9zsjbLPu4z<$*U%qScIdk>KE%?y zZkJ!IF?5wKvCPN1u0XfW{mYk9;9WHTup7QSx46Iqt=Sf57ARZ$&#Q^v?hcxqdw6#C zcCVC&3uV8_^1n@f`81WU0;xg?Y1Sl|NFmqB#%X6Ym)O%5mQ5K{d_+?`+#+OpasQbD0NZI2Lu z_a&>R0>3iRf{A+1_{;w6E9LZ%F8l~o;?im~IH>Vp0r{-NQa z`8Hed~TnQeO#`jczkk#pz~UB9*>CzisnhP{T*4MxeV; zWziGcRArJ@(S>$vmdiR{{@i(r>u_tPKYsJdsQ$7#n8@`c)`E2ULMIbBbE!*;m4eQjYpd*$g}dx3zLr=x=rLh=-G{7KZA9ux(G3ofE6`cdElb(*_6G^Mm88|JC1mk5z-Dawc668*6IKIsM=Y z8Y#b5OA$C@ct+k;L_?dIQRgdTle10wpesOq$_akLb3&)xVB=cU?@st|&J)LxR%UBA{q2_xWw@2hxw^^QiMHE8XRf(oDZE&~PpPU0s##INTnGZS82DC`!}!ROr4F;yp%YY{iHg2u&Aj18~k4U( zuZXP<+5DI;OpldBLaN*$%m~eW1)I7p=903_e$8|k;befXruCUA##_@qY5?c12jG;i zWA*H&KP6#;@m+gA+R#*$z*v!iT=nU}M&h<~4#)_o@mGwp;ES@!5lBZFv)#<9sZyQo zk4pL{TS&fi93xVYd}l^{|9;KOxmpo9)2gmtsPtW^k_EL1(Lq#?7(UmV_-e%_X*Tjl zf?n=xN5@JY`3I!<<{sW>hh)@};NO&~hks=`kP_0%2r6E#+XvhqLu#L&Ioa$YcAgau zkv!^7YSBy^B?Y;Tmq6SNI@io6KsX1K(Okh;w$yr=Ue|5jHH=*NeLx+hrj^B#w1iA+ zUmpPri~Qo%|Mb@8n=QE^K>mP)B`G6QmRHDI)J06F84m%s41i8TCJm`z)4l|*Ws^nL ziiVhgzEAsH5|XlDt}t}$hjxcpf2uaTCB}>lzdgw_D2pyg_M&}k^{-dKe*p>B&aSaK z-c|d7^CAJPhbKg?P#Zb@oe_r<6_UlaVm`K_5N)Mz?fOOIM~W@kR5kZhywn*ZLy41l zvEJ5Xc|Z*o;r$+7@9y(Vo3}Q_a@&*3r;PcM$_3XSE$0%QqVTxRFK+i`P=-@(9+;96 ztHkoL(qX#Y!wIDHmk%lG4{%?5?7be1lr z9AAEYbGrO~APrY4zV2zp_tNCkVaT?f2l)crJ%qI0T;u|%^e z{w5aoGC)2Mu+Hia+BcycCJ8W>b!`6n>lHajU^tHznfxqD|t-Z@QMteaJwTJ z1u0>;LK%jDfgwQ)FoLhJnrcrFf7c{@S3!Ym_10^MkuZ5Bfq4-9>2}f@Q*B0~f4Ak^5S3YXFa?cA#5GVej z4(?l}R3cSVP98n)E-!Wt5r(VIb&)21ak#?PDsvF$nT5zj74iu1d;$Ut9M-+(JbW|h zB%+k5n!3N%a6WiyJ^s$9BYfyfUltf=5OoQ|`oT(HQYiqTN~;ku1$;M$zrZ7{ zVhd->N;COBB|ZMlf&<*{jShlL2={J)_Zw;a7j3qQCqUxgn@jq^z@fWXC+ObQ-=C}2 zPp(}@ZV=szc6(zk+@N1Tx=@__iG9_F;v=FsGw93{PfkgZ%q&QHp(LEf->12Q|I?Y{ zRcgvA+OtCm#eI2Nm2GEXO$-<;1BdCt_Of4H@`GEFlD&zXJ;w~a%3iSsZ>70h)=vWK z_q!d!(ssXc%T-mKp$Fe& zo5-YXvb;6UOHC;fw2{7mh|_fxqDr+utiwMu9TA^IPO6W=|2E1a z)hD?8#`Y@S`e^#WW>s?93&rMv@3&&RBMT-1sXx)NYkD>Y;{U)O%=i>OQ0CZ1Y8exk zCILr`7V`ta|5E7`?hF5dLCebKMeQ;e}-9*^>-&M2e8e`lQ#MV$@?g-BO)|QO# z>48+Nx5hF@K;_7ZW#wdcZ`GO=&Rg~ZGll=){ogGfo-9YYk=k z0D6*lk+1Lt%Q9nL>yxIYyfPM~thAoWp~{=>buqD234XN**aG>`XPGaSY=1Pm>#&JP z5$a@ly_XuZCV^fGZqyKVMAmP9 z>aKrZgfeOU?QK$T#4Po67{`NNg{WsVyh;2cO+FD3CNnb@O!+Q&Kjq|HTl^*c|%@{?6nJo>1`pZ#m_VTg>9;c$dO+jM~)3Y$3 z>&;&Sx|smtx9?SFagz@sOELUgEHwenyiV6p{_TN0RGWPZk{gyc?TF=^RLK()c|~(HRDO0GTX_n?7ZT9RK0XN8gK6x}~Sg_^~3EcfB$UV5jz@KsA- zv~W~RiuIo0l(R<<5^34N6$eupX8YRWXZk@>!kuyjL+G)3_eh7XUKT37%Fz{#_3)>> z`qSk%6i-((*YQq?DYe;g@wgiw{JF2On~<5e}Id=l>9sC5%0po!zUb4<-`pQ@la7O+xM;y za7oMz3rZJ0AJ#mHP$=%L18T!uyC?PBO=E z=FAE?z@qg&O$^z0@W1U9rBNb>@f)&7`CLj5tggLLDBOj@>142N7z>wsWel2Y5ib&K zFm-|sEd-V(%8Vy4VTPezX3hH-Mk!S`|!ChyP87 zAbzy2d%1}Fxi2A5D(3}!I0Amgf~Q<%rbi3vgWfRx4Ivgvp>;c=cXT}*AB$>4I|LUs zn^H5^q2=8)V`Ec9@6eSgaBh&I=+M9{gPfM-wx-;b8#*9L*{ihd23y1^w$t@pgUbxO zo-4P@7#9J2o||oOIfzvYK_#m@*buSQB)QiSYSQsjrCWPg^A zIs6Y7ApZEER|CcA&+Lycg6^&cEiO<1|9m`wcJx>UPpY|e{S|`6<~tiddZ_OA`B9b| z0uE`p;zwq)AtR=Lpu`r`;B|hLzp=2j^ggqbj_xrTwieqgi{zRKcH+dFwSAIcRO&O~ ziA1qb5ow*zG(Zsv_Vv9E+aL9OjzPOuP&ND)ZvbY}@M(0@bOEqN1M&pOY!M%|P{WdD zPsZg1l=2gHjcpOM7;Ny!5PN&B1_&6ddqxU)3poZ|+0K55kEgZkP3=F1{al1}bi&mN zsjaB^-qKfZ4(m?+jj_UxHBQIL7^FXEtql>Tw z2DFCu?xWG2xjsEad(qgpohiEzGQl8dkx=g3z25}{jPa&TEf|g};I|nz^J{Gx%eF&r zn_QXrA>=g-x8!`e1YTY^~izCdhCk zZ8=n~L_E&q(r(HL$utG#v^~)9B8}Pyw=O>m%oZ~Qc=NP~r+Y|UHOl`6So4@jkBW^g z665NCg|Eu4BTQ#*ciXv6QbKBBvC=BUt-Gu@H@zfRbkFpu#q>=$mB&Pn0Y#NBAs1X5 zZzR*RooMlH9Df%a96a!xUhcUrn($LBA-nO4#7p`8`EcQ@g%YbV^;!=!I98uU>YS!) z?uk3d+&*ae;kx!6<6DX3J#YxLiPCv(r^vbZ+V(k*(78aF;kdz4M%1tL(|6As{$~9! zDvqQc(ruuIGb4CkqjbrN{Z&4xf@GM4^CQ*xm7FM2h5VFX>j(+3cPs4S+^m;~3KEmu zKx~bB5m=Cw-+sV1>_VR>m6M5NqPiZ?1G~rwU|u*+Z*~_%QT_wMbef_3$d7vDkIRy{ z8bR5v$Ck?Pgd%CU7VV27f>Hx3OH^66T~q5`zo(QJk^~Z+^TL8ly_OoxVr4Wnlh~T( z<+0gt9th7CKY${sz1%x!abW=lGV^Cy9TL*=;!)Ivw9A?;g!h>yiWu>~Bu_d2QIE;0 zKj3Op@=b3Y8)GeIclq_qSacrN(f)@{f!++hMR0v}mUP{1jIE~VGKx;M82V?=Z3f1) z>#Ew)2-?04WVvh}Oxz}adI!bKj8^DuO+=MZ#|NbJ;!H9#ZC@jD(;*hO0=c?uC-_{STo9tt{sdNh6im)~sBcXy8YgYMd~r z?#)Uk>+-3wV{;BONMaSDM1rJZ!u(Mfn#6=>RUS(QD^0usfNzy)Iel*sjYWH_%t;n> z*={iG92Q-o@>=E?d!(|jp8f3&Z|zz+OZ%j*d}_(4UkrSM<%YjD5Ie82TE2gucwLJ~dw_NYENy)oPf$=y9DPBQ5Eo$+kZT~xv4 ze?v@BX$BwfMMfq0=d{bIhh^s%_Q`HcO@RJMSh z9G*(Pvj0P;KuLfZV4+8aT35O_qN!=RxROC>e3A>3&Z;3k*VASMzBoAIR1u3gzSp(%yF+noi}Juq6A zS z8X@obYzVbDLCB1BPfsgUwJb4@Y?qCka-1j#q( zT{}BFflEPAsfqo{*sV>o&L@hCUUurM_|;&HnbjrqWVfsiizI?rj2-FrYj)mpur|Ne zbOE+1V5I=7+cP|#Od!AWYX9l(HNo3QBq`T{FLYUV(vumCsMIV(l?dKDx~P&pn_{T@ zJ)0>NZ!eA7_hy5KsCeO{)HMuVikV2>+gmCDt9thXGbIQ|tBFZ*Qkm2Dd(ESUE~x}L zlBUm<9cl%BOcF}^N!dS(B7ODh*JkCC)nHd|5Do7(N&bBW^NNXw`Co@WQA;l5=TeyU zw!SAY3>bVUiC4xLon(592>KVJYTt)Yl*Da?OhPbtkcMtDxynOj1M$CjCQkQKns$== zT%ldNcbCBAoD?Qsso!`|!1cuZr1OWlqvqm|qt^Rh{L4&$TOj06Lpk9D%0LIV%Kj7=-=wmrX|f5x3lO6yGj(ScPqM zO4Kon*y@@i^kg#VOGBB1#C&MiRC)5&b*R)60|0oK3Cg%SqNm%9ceb?ob68GR*xI~t zD~9nEYy?EWhTw%g2T`<7Ze3-03LdaS&$u zj*6zDN!|VFz6@z4Fk-B^il+QG{xFj$5%WcSkQ>eDk$p(ZeOGeX#o05&*B~-9GlP!3 zEK?*7i`+O)Ogp6JktE&)y5rKw#3P3K@<|a8q`LipJ>iplc!pE)Bwf%8(9`bh{>i0V zZoiJV($tXf$Mf_K9%)Mk>cxruUYueqtt4I#VN)NB5{D|)D1OAeRHQ;KGeP}o({0q7 zXQr`ph{t6+DHGtd?Xt2yU(asz82HD)ijItkX-&~64{;Lu7walLVP&vlpEN9AVE>|9 z8WInMwRDNh>RRAEy0Vt$%ZqO=>-PD}FePA$hOc7iU#7|b3(pE;?e(_8Dd}|y{d>4Y z16(4&oL+Cc-O$&nV>fx^KIzYS5o~1Vdbng4+!zN#FT(9xJ9{j-u$yhrnem*Q|oeovoZ0mbBWf5T$hza?Aur~HoQ)Vq(<$g$WO^O8;O(f!B}20iQUPx{^KUAeW0_WII19qaVfVi{<7 zpj-TGo64u!FE;)^2oZx)B4{#DKc)f9TVKt^O$k4R`(NINqOpYA@Nr-WgqwvtWh7F?r{I=!H`J3B|M zt?hW4)yxXqwqM`u5ymm5=&i8N%*^y`eZvQB?L4Krs;&y4fyZ^bqV}+BmL>86kM1T* zh0E6>`xq5FttExR5kTPkjg!jD%O{@FrG_>iRvAcMA?TpIkS?@C9o^E<6&SCT^@~>+ zI^G^rQepDR2+9+i>&3&tl|>2HH22aD;k&|_KDp)EZ=Uw&1SgcF;b%~rp8fJsVIK8R zuzQaPx@6Q%Y^;UYF|l7+@?H0It?+=8vV@3bc==Kc@Auz+;G~R`#%J^G{LS@Ot-Pu7 zLgU~MlAEF<;p2_7(4+bAIN5TknfSH&Fm9~3(oj3*gujG`aG0;)EPy-(Oz8cwn zCxk$$6|R2Nc8*zUls}fiz>)r-?V~}7{pFE5SW))0VU58mhFI-{~T-+=EG3b4*op{NmSje{VYWAYClFj8iQd-ytf} ze_iD-Erum9KHrGK>{76~;QJS0ySiEMlrZWaulyAM2uXehGlJm~ zy(Vo$Fj%B9Fch|4fjF*LO7Tm_8IXp=71%+MO?t|9QbS+*S#92dh=|B?q`)seUM=zr zE(+KbH_%mW`h}zsF>kD2X5~Ujtu8AsJ~lJRbfj;!R*ik z#QC1u&8C35^II?=0Zm$idM;4N1}Ei@G}3YEWD0DemX`G3;jq#k`T#a#B9fDPKtB8h ze0XW?I*${3u&t2-g<4eg3iG>t$=s{pH$Y>Ic06}=*#VY8uogu5=#ejo6CjP5P(Wnt z7@Y7x(I5C1x1e@{KPvwE^=k_m)7a0l$(eMRl9H1D?92I(guT`;cmiHA#-J(>FO1aZ z=dzh>1#c@0@Sd6T*u#d!+2LX>qIV#2Jy@W^0Gxw^%`w;x4BssWZ~(lSV3agkq8AQE zP@v@bV_+Z>8j|w&OePv)3Ncja-d7jS(PfWmY4_kU?SL!Q;>Jc$CLIt1?*EXja zgc1D<3mHJJbOjv7P?lC!><_mzz-r!{O&D-RIk~Xi*yLoaXOJR5;`;V|_Ew&Q#rgU7 zb#?BECXP-{;O1lxjx?|a9!Y%*SVTrOVY4?jAr1J2ET zFlO_?-yb}IS`s9gLS61-x;0cyB>58I(#tw{2 zzOM~rDHL9XeVYL{B9OBOKVKW`NmmDOHv#c(?ImxdpB=a(hLG{c!4qY*8f65k1JZLE zz7Wta&Xp;ft#u3r3$6J-f5L);vB1jk{A})ui}?;YzY`*kHCe|DnpbC?3#RNok6RiPL`zKK82TbZf<%_0BS z)mp>5cAQIaWdU$U1<7C7kmw7t&SjQ}r@jRz14QO){x+ULiEO&}Lw1%Ao%zhk)Jt3V5=h>qFECAs0*n#r_B0`9yMR_dhk0o;?ec__c!5JhbjG-RPMN%F0B%HDjcF zsbP>NgR_Vs^`d~(;sj-l1?H3&Snu5?7(hUwU-$Be6gta^($5qUJrm!Uk z2i>aLVBPeW!=Fvg~^c@R6g~=wBo77;0KUARw`6w3eLI@ zrTsY=i~xlSV~gOD6f|t$Dt&{~k-%Yw1B|)$eYp0B5j8!6C*EL`Fb%FUNQ1TJ=4TL4 z4HZ@(oxE!D@&xCS7jO?C5{XCdgB6HDdbz1=;H%r<9@yT| z5dc{%c$sh%^&uZ$66lNzovuAv=}T5?bww3XwtcHmT0BxhLx$P?XQYq_g2HF`T*F^< zl0gPNDYp7sAJfPQGJ%_$(;@vw!R-&+0@_S#WZ>3eAzj?SLty>ar|Tfl9V~jXZ4(X3 zud~m2SUKN8AYE8nlgJVaWz-!5^9Yau2O34=u6IW`h1uo#N&8q7jdbrU*!mz1q~N_i zrlRtJB7)Vlml%YKdUZ?af_BR@>DoI70k6ONNR0K(!pqFe%wzCBLLOMQhToShky+r{ z3h#IsLLfN$u{&-k>v|rjlq2YaTHq1g=d{Gt(8b2b3J4RhtNLd#NiQjN0wHrMqtRKoLadE4y#|!mULbI-UJ(x<^6L zk96IO)?NY7Vr#Y`8NSljXNNY(G_T<0K9vp5xz5P@VgkQ}v?GFFN{8_D6+9T=1(;?* z8R!^$B#TGH|MonH=+XrlI)p)Tb88@fCwz4t4GsZ&?GNlwz^hGx`k9fjF*U7Riaxk) zAnzGGC#3s{T)L1fR3Bj81%3|!P)l4x*Rq+Z9odlV0R3njU?jq}+U|S+g*|B6!{Z9g z;sQ~3)q{#kuj2JPU=;*Dsv4D+v2QGCSXu7~3kw&rPDSqVzW?r(1vcMyn7g)A}9SJG&p<>Df& z)YLIfeSwUzn%9-8ZFLsify|uB=lIR@tje>+NSz2OVk7N26m@KYTlgn<&W?^?n3*xQ zIZ^LgK2Qo(CF_@HqlaUQNIHfZJxz=1Rc|6_->U*8L~!RmooK@Sa|oUBar5@CtFPw9Th+7{|dCp*WP zdRGppemcH?|1`YZ-rkO6*0q2YQ6C>U6j4w&k_dynB*|QWzkgIpij13E?ZBs?k00qO zwc#QfE_Ft}Gw8!5Cd~xo6RNOKtr#Oi!;m9nT@1WtC|rY!8lK2Bd0iwxFhe>pBX7fS zoL8x+M+pWPpl?QPDsDkW&={#0G$51n)#F^|1)P$sV*stoqH! zRw04knHj41$9#N~dAbm7(Y9ixXaB+>6g%L~mB+l{bA@AgGv6n3XdTKXa1iqY7gOY~ zA}w&i-UyN7?r`%7{o>wB6RhELUD>^Q@b(}67e`E@KqVyTma1-Iv zK}k2kSXCepYufzDL6phjV1sk?M+XJ=IRS{| zgyKyyDr7??q2cxjtYNS0OTK@$GY5g1AO_4O_07xzKuV^h9)Ss`2x<8V>i;X!+>kCI zF(W(=|IXRH`V-93C;=uKc^c)9k%wudKn0z);wQWhF!oE#$arWuTJ)r(vkMu$rfc3J zi*KZd0AvrU{-Gz>AO;9I%5Z%!N4ZcH$aI#!AcLILzxbpNs%FTax3jZ@dTDoOC*sJu z%LK}?t%>pg@QKDB=jgzP!`8gJ*l_~s&E;e{UIR>oLFpby8%UrY!cC3WMeTs(xSv50 z4hU(bdz}V?q%Yi0S?E0Tp|ss{N${ae2EPU+$e}Ml`sY^$D$;ouIz5q3uM(!KJ`ex* zW1%qI8Xr(JT=0Ctw%ZsJ`GtOG;<$%JkvaEDi0;;91AL!K(Cd`PL`G7AMUf-hk6jAM>ZtvXcLn18r8yB(A5bp61S+dF*t=331>J+QkTMLk);WL zBYNqS0S-W1D`^7lZKFLFDXb^`h}HUKFc>WF?eWrfWPIX3xi+V|viQ|ik~MoS%LW8! zF12oAtlY|~_llvBG=w7rdbANxrN;;Lf+xiexWV2fBMXI-0KE=9Yp-%79qv?&V`7fY z-r$Pl7b-IIrJsaruQaumnfTHO2rRWxmxUHZ_+|*h07MGhAgkIJ@@-> zSsVKQk+qrs7l~=tnWOp-7XV6k%>S0SQT`u^o0k8UxB>RjeDRAxc9l|O(3e}VTZgraoA z#j?;HRW%4Tgy`^j_;{5#D#*uJbeadc(72t$4Fl;xdsg({w-ynTeM2z{FaN$Kh2*tj zt_Q_jb@e0k_efJ`@)SB^H{<&kf3Inq9!0aKUcX1QheC1ntYTuqymB(-4$1kMP}B#o zpiVAs%e8;_hV6(!H90o(@*jjqLc906$HjGMz19w{8b4*#uO0AFOSX(PS^uRq*=RP- zF8u2J`nf-}H@cOm=sr(Es8EI5eYY) zFrs$vR(%vY{52_7w!-PDq7SG4^t2Co?>Su=y~XM|_EKEou)JcJHD%6Gm_t~XUnMpC z11GWFJ^P>U@FK}{_obasW*_o@J*0~&qj*8TdF|5gBiFy8SmtTRl-WAo=L)m*$~-?) zh5G?4>e0gfrz`6Yp>@0YgzvHc+8Qxf;_mJqaxvI}_%OGfA5;*s$xoSI3sJHK`Uc=; zjck}t)IafRY(yJO;d>jm$!%w7XD6S9esF8&h{Z{#H}ivt1fdib4h3(JRID75bt#$e zLn`D!3}6$$dMc(S_-%O#SuWsk=JKaz74t$D50=Pz?U!!C<$Vm4)_<*$KP*40qnfL8 z4u=}~&VvWiP%X36@Ayd8$w3z#?Tdj6yZRq(MfnBq_P?PgtUFeGQH7n^4HkQX( zVbcPVI=uN<2DQ&%H|N*g2xi3)E&=^&M0p5>F%oh6J1dBc=3`&TR0@>4;#VP+fGRcx zH06*pEg(xb?g7S?BJ52r8MOw6-;s_Cvi!>UNN=0p-+e8h|NpcK_Njee4SqnzI5irT zc{RWTvf}Ia*Od?VvNRkKHU*s%t(%EChx%Ph;ZtA2sDOp17gS zoZ;wq7xgCydOv@^SN(U^yk_LpEsuB{ zR~Y9m|8_*Fd?w{_z7zntJ|>P6XkrZEdE`(PLVte_*pTTmFE|V;C@7#e#ovGe5n1O! z1>6tMe{HsbA0p#pE-qEMy~S{0MaaD9*d(p+zmSgu{>&o|j&g|!aFEOv3ni)FZGYf# zzT1KHM}m&9)Zhm?Z{NqekJcv2X(7(tcnBCHAZ~+M;<)hBe!021&riUytZVh1$MJjs z(xkFbtpow7MiCk*DE_F^^`KOPoCPKEy$^nVyDeBOtZ!F(JrFQazt5_VVpqTGKTriv z8$VYshSP7o0;~t&WMZ#aK$?Ld&15xltF(oh{=|n)hlVVGlaqe?a98h{!{c<>bW8h_3Zs~Y4FV`B-pZKw6&M^KPPtUvkxf*8Vg=wLgb zAVa2F?~4PXbk9SL$TH!JeI;1xfp&%Z@nghBVM$bKsts#8FlnFzgIQmEfl48=h4At5 z!IX6eLnwF+=si4_jGz%Q0gE7yy*}=(>6&Qpj9g)`g8PMpgOC8FNl#oUAk5I)2I!Vx z{JuDvqXh~Q_=pasJnju(<4>Am!9=ps`JMlO()yAS7;%9gI(`!L-f4iD_#$o}EM_{e z$K-lnU6S&=es+5sGbHLkvkLJy!Oa7=Od~_g3(pZ_v%QCW{;*+sa@r?aIP|IOC-ZAk zq{0I*75)gjJaej=AA2A`qQm^xc_r5K#^;2yDe>9R7sWqMC>0AiU(SZI^OocWV8ltG z)oNc~<;@*i=k-G1Z|k|BI;uWSDq__2Xk-GpKq?_2RO(~T4mot}wE*41WnE?bjUvWj z4UNy6SHx3{Y;1wTm*YQ5lD&{N6Vvssh0WpihKh0|DoF)02e{bSCU8=bbtgOwq?{~Z zIZJzBr9}Pk;k$TUw?K#-9MH9*fRm0OaE6gCTRTTbt6+(RVed}`a36$^>mc%r1e%Lv zBX7rI@NyqOCraGw4k)u&2=RB0et{nlVk`i2MS)O={u>O>z@rIer<6B4e!V%%L9=`(;r5mHgSnpzERo+)X^$NWXNDt#GHn=&$u6~=|K_*-o)p+;^dtEu?gv$ib2hS|N& zT`Y(5o&&xFqmzY&w%~d|wvla(x3@RaxRcjYU7Z-4DLIEKT6 z2iM_Rqn|Z~hGIh}#Zvh(=XaB6#~(rS(ORY+RS%K=%j@g2brdWPTP+~y_&(*LZiPit zX6i82C0|~cuW2}Vb<%lZRgdc$>)HV~kJI@Ll)|PfiayQird=5MB_4Je3bO&WCr{>vlyq?@1nF@D;1MWw^D}^O$`X&~BiuuNa-@+}< z9ezY9mk-0jC?WiP8^uZ3#PFJH^u^hpQt0X{me~hw8>FutWEGeqd`AM39HzGc7DJ)~ z$oL2S7jnEoEs0?LF_%@w;Cl_#Jqlo1I4H0Zc5)8SIA67xWF9ww&u*?!R#jD1DEA?Y zp7#YO%t^SAU;@~68C+dn!1q+15tmT z?|{a9b3bB6LX?oP5jB2TMC=~NuK~#WT3KlV_!O997IoV9exleE0QnT@`!`r+qXkXK2M}F=0~$igtLVfWw9z5D zUIBBF7kf*C-`*GjKB!{3O@I6s+^M15I^P-Mt99DwjI|o8@mfSrXVHF{Y^L?*5*9(N zr$01#o6Nc{Vgobj8%SwJ39SZJ*d&i4xw`irNQZ8Wn~+H>qv92b1Gp#vlu_|&Fw+Dp8eIfIK482WV_;;& zL{VPW<*ErmEBaukuirqeqzo2B4!4d|fjr9QIBJ&1K1k7XC*+CAbYQ(z*?yS`+LR0oSQr=qRL;+FZr{#U%#jj12n`Bq>FCI9eglO!EUCkQ z12PD#%Rfq2qK~DEmHMLF&<#Zb5HrvoGsJ($+)_{RQUGaR>>t~ojAnDBiSRMe?3z;d zK0x#6hzu4G5dZ{Vg(;X!Djz{-V2Di8(?H}o)H9!XCCrRz#}?wq2yawAWAESQz(hVF)+5aw^3d4t<;Jb89QI)uZh40F>cMQ-9LHENQaH9wI zvM=nP?ELYLqstRco2!%2!8JUS0Mut=@J(Mn>IJ|7+fBHs& zxF>6Dv&Zx2y9`66axq5-KBh3-ze*@jXyrcO%D#ZD`-q5UAC~x?%g%v{&FR=m;j7zA z0?p@pg%3XsS%7|dc6Rol+3$y9Atwnm((&c4J*t0Z`d!@}q%1cUi?!KYs8lH>qh6|- zq91IGWElnEoV2g*!5l-R1BWb6t%O`7@qU||U3g(IR_yfjP246Xir{aa53sPXfNJ=) zw2HEMj-o-ArDseiWRebF{^i@d_X$kwE9i-!8#&!yPc2y#wnYbf-#PCqS#YPfnXLE( z7Zg!5eL`3RML;wFbFa6?*&2@*uwcd~1JSUr%vE9e{vr$)kp|`;Ki(8NS-RJFb$ND8 z0oFor!6mQ-T$p(Qf||D5W6qgv^yJs`Jc#vB3JVH~N{yIa9socVWxB~beSd%7!vyL4 z4lk0h)r!3TH6-ZvDVYG1!Fn#F0xH0O=c2Y?1Oz1Wz>gnUC*|5%+rn1%gQLK}Jphs% zFRz9W;r;u8m6eV{w0U$vg-S9qGNqZ)aJvw{oXEx1-(Sm7{GOu0G2Y}(#L%eO0+LgZ zsXXddyswr#$L;pSNN_0xrHqyFf%mp`VMNm5(u;=T(3k=oToJle0Mgd5nM zuQWi2OHa@4X;M+N*<`9?3wB4O3T zz{<+^LmG1`-&fBuX5RkK#Z8L~TQU%qWeUVKsx^ii+)oCl#>elFlFGp5wgp80_pW#V z>^f{uqG4gt@HKqtJzxpx?38d-NrWjZ0Ec2(1D!;HfT;isWbc-v7$}z;O{?bb=U3`= zajM7wdY+76T}}^v{oXQDBDiKwP}zh2sQ3mS7`%4k;Vn zq)OVeb2=E0Gs5sGs%g`K4{{gHm3dBfHmlvN0121%$MSLxFjxT7Sr}!K)Yzz&=!U{u zk{3L{kKiDU158;=?57CqKb3VA)`m8QUQ??kS>w#?iK;bE!`ot>4r^n z*FNWe?!9B&`@UnmAI=w#ZZ^;Ji?!yQYfdQpOZWc%s}1ecp4t{-)K22{1*Fuwe_AR8 zlWr=__zQMTIIb_0x+Ir+AQ@bev~0z1Lpt1_+(m7>^`8Ol+CF-_PuNOW7+u~Z{rZ%$ zcjbzZ6&32Vf{0EHtnsI-jY!NbEXL|zA}Mx^(B*$_|(cb6fc zqpapfh<96)#sR1n8v_CY0?+dUWj`#EUMP;*;J+ISlG|sXKDjrX34r4n9Kq;-YG|{d zOv42R(eJ|t!3KMPX}?211ZSVQxjDFo-zDKo0Md7IY6@0jCZNn`XD`a`a^_|L(%cQ; zbWnNuOMtbZ^b^){DYIP^N0VNh^^n_viHQl)%?~F_aPP~d5d{t~iob^{6I$;5tr?gM z9)XZ}F+gzPQZ4Ub{+ueWq=W-~MbDihW#{RS{aO-8Z@V^(ujp^IA| zZeY%?mal2l+9b|YSPJG2F`1mHsFfQx|49{zMYRf?9In6aTI%Vtzbxv1!vy@8$=OjT zWYX9$e9>tSN!<6W%KRCt@9$g_yCQwxD1_*FT82yQej=Y;ND-|vG#;-3z^tTDHGFGKJfgZ>DQlF0UnNQc5yh(9+iC7hU57nk1if9_c)c=#~QOrFG6Y zR!d2!Hf4++^ICmpJz%_@C`GvyCJW2uc{Dseps+z2g%)`FZwAUIRsU>ne?&k4BQAGf zOZPfS!X#3C zIoudn8+i9n^T2`pb(KtuYCI&gMauU9oU-5x*&!%Gt_wH5`BY8$Pt2HabGA#P&sJIK z+)?XwDr#yeGE}fOc6r-7X^?ThSITxZXe7E4EO5)l%{jGU8U^rbaF3W>`4d7WX1lKl z^_i|NH9z4(XJuI#85=s)LFmAAXtdgn65W$kRn5-ogaQ=6##V9M>7|hTy~_PCtpgQN z$JbU!z}{bNmwDBsWRMfJ1B?SdURT!M!(;2e4baEvk-cEaOZ*(=!nQFMBTko9Y?$d2 zCW~)f2x4%4O3H9ODU%?bwWFzaU1se+6ov$!fZ;T`B3?T=?3T1#)ig z%X7)_w5#nUr7t?~aW z+l7ROn7+1->an@p9WQ)JA_?_ z)@%6aE+LY&r)h3oJQ8hnjZ7jFJKm&(&7pUGlAvqoE8vrwN(voVI$w4}8js(tpq2cU z_4PN=y@|59EPYvfR-6U0M!H63iFSOnEyklcdh+)1L%9TM-#m8Z2LB1ry+usE#rhYN zz}m>jiB~HtMSt1I=ru3L!0!h|Ky<#i=j0AFCg|f z{4_2Mc3z2b-*Y;8`jwGFeq06(GCsZw_{7dZA6YdOsF=GvL>Fv4?YTS}*zuIh*l4r= zh1Ngy{}7lq-xmX!7w;sca65#EH;+JEC!C@XN4^N6Mhb2w*Te$Je~|W3ZVtXl!Sp)+ zH^eiPw1U2gf;-X9(N6J^3jY!eKc|}?C|-Ad{ro96V|7S3bjxUZi;azKe|>B%kWqoC@TJ`eQIFaPq&QH;ZmW z9TDPMNOIH`T+5|^gF^SPGG0yqLWZY&d_LXX-3gVN{Bu9ia+^(&$d&UnNunnrM!1BD zn$-^+!+;e@Ele%Uf;nWl@JH|`4@7X4;s1T6&*d4ZuV5h`W7{63zR)TAp-vN7`_U! z8aZCF6u+qO_kMfp*xzu;=H=zvINFeBjmsN&@>9#-1U~jQEiJA4+1btx?a96V_k?fI zYq7a8l=8fi7leN^D3QeI$t=l&NBYpS0#g6vq}Pp*+c9YTU=^*v1H1|c(pn`*yB_Xj zp~Y!eeV&r|eSZPDap1ZP(zTz^DgTdz0XpnhsG>jE>2+THkLIF!rQ0j9i2^DR=(%<% zEwVMLLESR=2ecg^SOPMhB=8aGe+dxRsmyOtZ=oPZ9nC94BP=c>b03&SG(`YSG9M15 z@P7dlx1Wm1g~>ialUJrRAZI`ebw7Umc>n(WCb-NOMT6kAvHZXOdaDYyaN!7ALkfDD zYQR~_O^2Wj_a|Vt{9670d;{O`wIb}t=+_NDj~Vn~#5~Tgpe6)6Cvt$2!54_xPVc|$ zil(O-Z*)O_0&E4kcmnwry6}W|gJwh%pNNHv4+T(1dgyG~sgOYuAsxeRMGP0*e3Q?$1U?UZK>HFB6G4gbIz>jN+II2bng$JKP;#=i z$)|tR84C6fAN)^lhwuuZjiB=kPkjMk)>5L=baQWS2+%*eP~eLw+iK5c$xs!Q zl)xP06>w%AyFzpCzbE)j6um3}!$Q{uAa#RA`2om_ppxrGqp~1X0!^txfd(Fk5cBf$ z7jd~DR}GH2;?mLBiQeZjL^>vB-h}By^9uh!`wAg@Mv1~@zj66;8PNy@d3%(-DxLD z?K4&>(zyas6bzmIG_hkPa*u-{WzfFEH~a-CGKeqW>%yUd-f<$?3UC-Krlf~I|L=P- z!$-9S;m+r9!9 zUn%}eV|j`evZ9M(^h6wjV$Mg9ulr*XT}3y6 z$KPMfc%Xi4NGg z+U!LL$VhJ@w+L)-khKF^3;l}$QYb|R1{mR8k4{X;r;9%Z3566i$O|c?=vEj29ver; zw&CFlh3L2ZpRIM0UIC&dCib?&sk^rqlf-K4DgnmhC^kk=FORCI$CK~LKDV4Hcq~^QbHX5Lhyfi^v-u+)iiHmC$l`s@VF@mSGT^zY(4iEJPgN z%f=_9You()hA&r*M=S-0!|m%wM@KT5O7wK}J+&U`nVEKnXDGbq8 zpmT8+ly>(qyC3};$yF-08Cbg79w|;v4~J(9Y9>|qz*H!s$Dn1LBuh2bPnDTu@qm4X zg0pq|9&0q3U6dOs5FUta0qaZ76BHAiDP8L~D_O@U;KYLPD3*8IW~^Dt2(shqg>kBA z@CVr4yh$3v=kk!Z#)*a73nnX)LQ{h!1jJ4(O@QZ~aN4-~l$0g4n=odh*0ET3o&qC7 zD&f`8tj@89*t(+1^!HAhzkvsX(d<}_Mi==cf-Ba}Tgeps%XwORyx^NfYh5Qt&JO?P zmtB7HOR?Uzo<=6GKhM!yS07?MyOa0uh6Ds8!*{GG*-M~Wgys3gKj^mGi_)T^CKy`> z{n6~)oB*I`fEn@tX}67y{Y@)D3!W(eL80U4OOc3DlXEWj zy87fNv6ncXC)l84hH#WmAN)hNJ4QoSCTf+79Y)<|F`!OmLsZaI+_f`Co$V>y`I~xx zGQEGUvENDmyFQu$$DmPkZLlLRR@QY1q{#&qlEV79^D!auG&aQ3jxi77-)rFG<54WV z0kyP74WTeBw15CH$rzs4`dUXlBO~RsAW%23?3IpTpqYEe{7eq}1*!*?R(qQ_Np>gv zl5Onm>6C~r-!2G$wLA4o2}bS;>wT!%y)qN9ayf7io7KgrP`ZYL?p#eslv%@ z<_4)DhFB8LJjcUBg~J2#GAWB=6B;&#kj7Awo8PpIso*6FFUC!Td9uXgJv+B&LIW!`jY{R_OxFJ!~(22*GHSwWDJO z40^J9WCS!@R~HsC;zZ%7P+j7M9G@Qjtv9b%ETi_jS|5Iw+GvUB7ARKI)sYa+;JQFN z5X1Y_=x7nml-JqEc6UpHwXX^4$DQMMcr7s=yQ@>_@*)@GmC1K-8T;!@u#Ca?X%tp`q)`YC1K%#c#o*B#~-sEL2R(Kpe39W9l(8(9qKn7u1e!!#eyK$?wD z-AybKHkBy1bn%~U?Yb5QZ!zJ9M&+|F#($H_Ohq)k{wVSW2zxOY`+lvMgH1iZ7MY7tR9UIN5jO;? z-eQ9ZwhYN+G1x2w?3Zn8g$;TYmX~bAL=?>A`Q_#1gu!s$QDJ{}FT{GGsAcm2Q}Q~x zkU=J!wS+ZZ-gwH!wiyuaa@l-s`qUcx&w~fYA&f`1e{aL8iOlNBxBF_!YF35afRayD;mG`h)M=ZsU*Fw z3JZ+U@$vpTRr%7B>B!s>=lLn!v`pk1@`g*B{MWqEg=c<%J%N!>_(0YWel?ck*0_+P zPS6;&vJwMrqvpM*6G&ONsn4HudXalqZf#?I)0&dO=uP5A{LCJ}h(R0 zw$D*V?yq}u!kaL>RtF32H9?|rERV+xjDbu^w&f-uzvot0)s{KsZ5(@NjLKc2R%tFNZGQg#JjuSL->vWi?>sB^q%eabG7sz8I@>bOJ@1^60@3O(ee z*Yjt5eAinRNu*;wDJK0c%FMh;FmkT&JT_x@=VJgYi*GVi&edN0Ildpa2Tl$kYASG% zwlM`R3!ckKJHt$muYjK&8y>fW~t-20X%;&saTD`YGl2zGlhxQVux(n87ZHHO{<1vr!7zd&LhQVmU1H`a0G? zY>QbxN^a9&{P1!4fyZ=03dJmburS{eU@?`wiFeX^&Wn3m z$D3L$LL}v@U@$wHLHowHp3?j$+=i?e_f|(TjT$d7%*RV2k;!hod1by~`R7K=I^w1@ zczCf8z8DC}vPcPB7>-sK^vAe=vh6>DE#A)tzDK(=CO6tngU5_#e6A@J;=K1->@-|# zd`#RMTZ+WUQfB{}9*Em%BF@!k4_2qUJ!{h+Zk~CpbAVpy=`*H2GB-$w3WRri6@7{( zS&UuvuJptF1Fr+z#wxZ(J0`3ebQN&z)>;0{hc1s~R-R2IG&sRY(91j7H8&PTAH!iu zhXEJ0v)!n5%WeFCsCd0hpYxOO*sy2ZkIzr{!xF=Ppy9EpI|y)$kb?8eA6`4qIJK82 zcf1c78+2Zr80O_!Ce{{t&I>zkO!tVS3hNn}b!lTIu48VD<0X2@;e%At{lP15DCZ!j z5bRAm<+{AoRTmqlRMphbYVht(8X}?Vp+tY0c>X{macl*O6SxF>Pg|zx?Wddkx9&mA zjoPjgfm!^+E>_=PS)#{VT;&(kji%Q3(Tt;kL5DQwQX2hTZ4#0=Hj^JvBaKy=l-6;z zwok;f``0l@C*8YyYxZ1sD^4>yyzK2lz1M!hf|B!c9TmY;wOjP|r-?fGpTUzd7u-%; zo$Yb#7N!cy^0G9MgRZO8$8L#gf_5>rrt-IUoG3=;~U$6KwmBYzgs5+lXEg$)PSLXK$MF+LnavftbEJ<*PT=y00}~_LgA)h1 zN{{R}d4Ri-J}_<7#!(HgINm(H?C@d-spD@}p|RyckJBs8(@op5vd4QBYpJqv;cz8@ z0l{vBqf%0)qMO^zi1je#%C_L7UgZ6SU4^#;Q9kjeBxYu2fP&p7>}pk+DJ?yxw3_BJ z!gE$rrn2=5DI51Xln@scrBW!=5Fe02hh1ceKZf{`7N~)Kuh}JJexp%zuF?IV*L4r` z_IK3*qodPN8#c>nI_^@aSOyE#D;4b7{QwZ4{D5}*868zo$~(b<+9yCh^RvB9j>6w> zpg|p3bGc7MPZAmsEL1phjT1BPo8EKB9hVrQvQ5hQ zL2A3C2~X6|F|oG4z)soac_wwt#B_9_hAt}EYqzboW#L6-eLYcO9nWtaVnMcXr&$bO zJ$wtan4|d194%Zze!_1J3ZsSu%DKZ0_fzYbt_mVfJ8Iy27rDlP1${DW70$amKH1qH z-bc!()9p{XAnlK<{o>eACA`5*YUU1R&htuS!?l}0b#;&6gvw>hh48_gp6x`TCd$so zK4ZLNy7@I9aq@jDxpUWpUjb!=9QHASA|V*RYyEeY1B**;93b$tYHbhjC!ChnedNdc zqI=XUI$>(Z4!JlFtNQB%v2B!MGVdkAgdP|VabaL4QO5JX-UFl|E8EHeyNGi=5C}B4yo!@`}Eqb^<&@!Q$my?=# zkyin$sMe;_8Hl1sha<)4hJ8ct`h&OFZkI=@jbQ2$#7 zEKRCB=P8^>NmYyO1Daj<(|tKT5Rh5G$Lj{pECkgActVw0^*j6fvrG9uC_l5Cj(!pJ z4~6Zr!aHk_Ku9DeF2(SrcW7vcjGI?6wI;!+yXtS?Bl?S8d*FC6&FGxhIIVPt8+0ui zhXYJERr4gd)*vrWiC!I+0AlJF6&QA-!57o=@3(4gBVQLHy9K*WnHz^0m*_e+59FLE zyqK?e?+4vIc2zXH2ncz5xzCc}b%J*hc@BeDvxkYbX}k%ibAL9a)|4*SBj1U(-E8q= zT3F`>u!v=Ls2Q4^2+A0Lh5Swl&DtmVfwLWN-st?5{$m+SEi znMS^r85i~G1V-|gL(3F)@TtcX(v=ni<7uLi#U&tX0z1FR#!8op$D5Hfc#*Wvz_#X2 zT`&hKW>a@Cmul75kI%il%>vzVfw~HGlw~f5stu=x>0;a`!?^^V;n%g{JxW8<$8NPq zx9C-&C;{PCa+VU_*U3fxf7sW}ft%he?U+0EECUsmIA`pD`c768hyn9EJ*_2l@jEu+ zP!?7-lgZ@1Ck8KD&3R=X0`wZ2$Mwtcvn@$F^^m`J2aGQ z5q78A!nNJ~^$ZpiL@_iE4L!DEEWEn%wj4CQl&jOc{R1q}r8N(cKn{5EV$oq32DAOOgZ?pMfHi5Wn- zd$?~O>@ZR1XJ-0$wr)!Mdw(|qX ze%}7;cdbb1uF~tLR80X9nG4a>Q0KFljSt=?TERZ6{b!Dbs_CZ&zV7J9 z9tht5EG&xxLH{#}`aR`sG^6nT>L;063|qF3?hpOW{(Ar8eyH-lkmUb&F6aMj7Du59 zQ2NR0mLy)cDz8gE5DVF_^j`-;WgZVbb@TN8nOT<1gBY~fE7gkKv%dPP`h{EU&iNB9 zkB!Hm3G7sV`42SuS7_MnM?Tf?5hT*YV)1y2)`u0Os{+rj+?(R8`y(xt!0{SD%}(`J zIo+*`Yc~*_9HRqeVvh`aVkJ-vnoy_i7ccK0zxOS(81RGp-t+llWX3w22N7Qr?s}(w zxTctUWB$*k*79*3tJTR}ki!V?A>kGt^VDf)hSn_{df9a{Ao#vi5@TW>LNonmaYgm%6nL+r-Q`V#{UGEnj@?5z3V|7w%d6`c}EVv3{FslqV#UuTJG(B=#l$M*Yvh=2mUTE?OK~_H{IM^ z)SXC3tkAV%hkT|Qm6fJR*7|sH1}c@<)w& z$HYHYjjg{&ZgwHJrW&OU_#54i1=a@J09iM_(6Hx96surhNVh2MZ`0tu=juTPm(QD1 zTx}|r#C`=6m;`r>hGbmDBcy#2tiH>m|YU$Y-RG2Sg_pyH+px@LFr!{uzP)=sD6F<(4e zDDh=~CCal_ulfu%(K3@MwV~#Ibm8WkF|=Y2B?~;n0AF7rpS^$pPC>Isn(A1E3Z-0f zHU>;-EUdh|yw52fo{(7xXl85MIM>WhN3tXptB33XXfDZ^sQ<<_G6I#~y(m?}vC#f( zLAPm=Lk0{e3Eu7Qj!^>0T8Phh%XH<{~k_0e&O>qf|RQguscy<(o-b~g9ffFibOALyYcP4>vxW!CWFQoDF69Z zWU|rtLICLc<(FI!NPd;ls~7EsPA%`dE_rfAbb5Lk9&p;N?~hF)svkdSHnKzRw0>`8 zH9NTr9R|}lB6#cqe7}NDjOoPFAHvYnc(ptqQE&%)6hEWis!jn>ihH4lKGab!EQT}D zFrK(&L0WeIvEi6pj`AJqFBL_6GG7^l4s`9-&Z(*eZTpy*s%ol=E~0}akB(BWGM41Y z=SIE8BR|i-A0zE+qa*M>*N*h$mJ;S|m?H4?_ZM-=RVaXM#BMf{*ETd13DAh{#rM{{ z%N;Ka!2c6}kCz`=E{=dUxk2@u=uxJW5ddnwo&7NV&u{%Dx#1EMI1O<0>;AgJGFEKB z0?njvMs%82gM_HD1J_d{%XR?9Rv<;x>K+kzuP#kt34e2QzDPbhai75=;#4#Cu!Qgs zg9%4Ri}QQHU66@Ar(he_sf{6>zy=i7+t>HH&&9=V@qiJc#C?%{*9j=?X~Nx_j^kjc z5b)S-9=^qYska`}2i-c~lb3!jcW|H(xtoe4X`N(q&sJv__Tsn-91J)YTh(zVBZ8+#A2ya;_at)+QYnlpM-<>a_I( zG{$LZWM6MQf8l#@MX+YF)~oRspSflI9vEDyRQ|z*y`QaAP6J?&K)%ud6soB%dvXb| zpO|U7X~>|-!=`f1DP;sXt6~GeoL3m!)G*R;yD1QKJ^(Pmvx0J>H-TGZD~UG%Dzn%2 ztY{QlBAPKmwF-HU9y+d3F_>8;oT!Tyvo1d7tHmdUDqJ6a!!&z{M{0M%ssm zae&d+KE?Wy5dI!u_4bJgy2QI3Y(JxBDt9`+r0g$GBs1c8byS54Bi)`48<&VlNJQ|% z&~V;aTa_J(VZ>^3VXAWas_n^zy4Br3P$z;WOBlibx!OV{)%9h8R%0*ZS&SXGTTap{ zq9%?L?S+%Q_ zKWS}uj(iRjJS2V)fIRK|6bt4BBxA!a|1PjJCXyRa*u|Pjb#`;>@S=?rujAnj2n=Lb zFNEwSuo7J0e1MTe+}4uqw<mR>u*5F=}n;vOMxgddAX|%|0&$7EO9gf-?r`9%W=c`@UT+Ej5>&?%_@U5|U zsx>B!rLeqf=i#DgGoJbP5lYuxv)f$|oAy9yT2juCl2gazNz!`;^TcwPJOqi19;oR- zs#{(+EBIhC#xo9YOfAQ1^4D)Z5AbF%bICpZc^@LHpeJrnIU@&X0_W?nRo9)LtS-zU1~4Z@J%BuiK=wZEQyD*;{i!kv`N^z_KpFNQLauLN?G3>H_n zyc5V1X9|v!0Eop*mowQvON|-uyX|`w=wdnB5P5v!2hAk4DC}%2o6`hNppNZmpMM|h zyqhEE3q6d=UQ6h!-kjLF^U;6STK0!(N7lCas>5dMORT%MzQO*%@vQ!9qh+zL8u_;M zpMBfE>=k)_w%k`wAfcQGv@rAT2b5~VHa1 z)!0HT=JDCtSTYk_!gas1AMs7|p%Zv7&#Nj_4gzYNLc>UcTU*EQpbplXpEv#GeCt0g zKr9D?k*~bV&UvWW@kQ7PFaRG75j^rVyTDtQ8ujR`e+)P)cEf_#XF4Ph!)FjK1~*p% z+udzcruLTBKHd?tXM-oGM{9ADL?V+cWoG6MIqjY>A1P)aPJ*_)j>4jP;!C&ps>x~O z;kjAtz7h7^XKJ*XK_~1Ao;m{C(Ur!9*JWi&)Lop)MBHAEC4{=0}*uT>j)= z%N-!r;a+N<-4XUcum)H>xpyQC!51i?Lso`!vD&4R((KwXsjzS3Flu(LBari2*$+E9 zI_i$j_V%|1Cf>!qXEyv_drk0Sq9+%#$wkASiV5?HJw5)gZy!O?54K5ZFbm6zLqvNO zR6U=oJk;f~6@)U@rfUoLM;gxMl$4_E;Tq`t_3H(VdTO62G}2)}j0b0&&x|LhU6wk{ z?7USxzt+yrZV|Vs`E5O)nef)HhiRR1GHmbf$o#&k@WvTiD2lGx=UbJKjLntz-=4Zv zHHqMLei@Q8Hd&b_HL>;b`si1YhG(G`mpp@7i^L_XMkU!Hh>i~@S=xf!Ci~2_Ta8(B zY<6}`*N^vdv>0&~#9_XyO^ErCge%-Vr(o*sI=iTXpxT-EJebN@Zr zDB;v<(*=ar9XH+r!i}&+b38v^KQ(+7u9h*o-p4c%8PUF z0yK@BgMsa_bsV;s%B{z^c&hE`6ORhy=f|7OXBrY6V`Ix4^b)6=QUv`uaNR;+Qd7Rr zQd7yZBs>f?g8(6kn3zoidrxD>##Bv|>AAKlSVP0mdj#Z$hk?mvZC6zP{NaRVg~zjx zW>qKSg+Xy&asLpTYTWCFL4}pOqe+UXW!B#e=G%Dow_DIZ5uj;HQ#Vo&{Ta0%61jt^ z3VM(L9F@el@n|#nSUS=t%*NB0B9c8j()FTqo7rvGUw~_h zN-|f0JEeCNz53jWNr6nwA3uT^UL!UVkFy8ODbLm?%OfTy2{)@xEg9Nc_mU}-W??p& z^5x5+#baE7ThHDM<=R}4l$Cu@fxO)?PVx%$mj(FTDKN_dlYEgdksd!$>tqD+BfuS^ z=OUyMd0JCL`8we-7OuoR58?277Lq-XhnQ4*tmUEqx7XRq8SB-#nar%LPtttnLDUFo z{gquOLM^Ll+CZ9@Ny0+CeE^rv>Z8z%ISZL{DFyJrj` zlXV(Ati)(PAF#!HM50~Yr;!Nnhyf}*ZDn96qgN?0I#iJ6JI_-(#WxG7m&qftCR)4h` z&Yw?>q{U;`eeQ8G(^W~(P-|M>TG-gU%GgMA(8<7ZGb(jOY3NufjM>IFhoiwj5yhKb zvchSrSrs4WoSbJ9?3{RT=l*EXGnhOpuFd;Sh2BPBzyuPm>mkIvo#3f}_Ot>C-~yO5 zP~`bZez0W4um1Y=9SgsS$x@#jEf?xj>6+2Ici#?ne;`IiR#8GiLS*EWj~)p<#W$;JJWy8U~u^*tSX;xsfg zFjeNez5Sw#&(P2yB`0UdGWD=CZ=G+V)5v8#U6$DBu(pm>Dv7Vhf{436-7fQXx;`sD zz}*>2Z$1ujhIY~h;xVcfhLtHLRX|xi!CV-clXF&O!Ohr{^r5nn-3hg!uUc?9b#C74 zwRqR4vwr;ogokqS17Ja2zsggoL~&T~OdT1QI-;#es4b$oc0g;V={;|tLF|UoaGT+_ z^Riv4%V)48Y0A4Y?zH;m-McjTwU@@fy}`cXccC^v4xP#?P%ONDOjy}86q3l%ypWAZ z=`)s5jJ$9{(H?j}7d@aA*HJyHQs=-0(WI(%)_lPPFZB+Twb6PICC?)tgJY=dc8(&X zOV9ncb`B1Ts~cWICo37x_hGP%_4a|QrTKv0v7Mo2>`TX=qBV{y-64s!FyaZ=da2GP zVU5Fz9(!Z!+RSwM9rX&!7GvgOz>a`QZOQ0~&(_Svq0Tnh-$Kz=+d_5gt&fNLiAXLJ|%wek70|6*zF_d8&ghJ*#{QscKf4DT3&Y4zOKC2 zuV1HAt^0(zVdb!#;9}jaA{_ig_iyIBNTXB7-X7Uhs@vx$`%Bwlr0#XLirE-#iV2)E zotubFUbnIR7DJG^ib_c>6=YEvB4R9E?6oQ+ap0R)k9?`$EiW%fJ(Z80HXC3hH4uLU@f=feA zV8=T-|4Dv1kF`W6MMcG-$w?lEg_hKz3Ck^ARm1fo%0bwVx_IOVA3~M0Qi<_Wb8>Pr zGBUC^xEy2=t!-*i$*9if4kvkInE8b{Ij)z2Sh+J+(|I{LF8f_6q$1gYT5g((ihI9O z=P7xzrQA&X436FW@5-NI;eXFaPtS%=GaL5Eg&6G-X7R%M{Y@ffNg@vOs<`kt_+L3m zTpns0wjy8SA|nl`BR>9ole=iZia&JagMSW*dz}D$VR|Ym8Ie$eEo&F}EKGu9?;3+O zw%iHE*!Blld})S;aeMnef>cdW+-})s4Vv&3bJ~*l3oF(Jg@Qu@Y^u7&H%t4B%i<9w zrxuLCn@)#vSj1fBE?)abHsxJJvY1H}ngpY^5;;WzE0rsFy70L#{_b)x*zhPOs&&j}uh=ri6LLg9#J>5q+^xDxpT&be zpQX(=OY`lCc+qbe(AA;u-=D)T@{*fd9sOymC&BS_YhURiSd0+kb$;fNh4l_yIiKg% zmB!{Z6H!ei8aA4vwE7?qsdp>L-=cN(vR$alr!x|n%eS%6QLD72?C)|rZZIe&a+q&_qwPCw zI)&TTi;!p8+O@?Wvpnc*b!`oXE{u>#^$-H~ol3l#G%_hTI5Z5&QO7Wn7)co2nYGF)e3w8k zw)_ZoM6a+&YL2OIsA|V<-miC`THQT*u5m8lVyo~CtldkeVC=RjI+rl`I+YwnAGe3d zmOm>pKp!ZibgW++FE1umYe0p%s3myVIWk<+YkjaouQc`V^R4Hspr5%q=AEAzF4QFT4aS_KHU}miuqGzW#wY&FD_-N^+4Q5GT6Sc zjEx$m^&oPIh)fg&Cr8x1WyJT9X`Q)8p`}JM(nqJUMph2?=Ri0_pbyA1>uZC-L2qEJ zAuP&P!Fe%bHDkhZNk2=elad*tIEh&m^)s#Y_A2S5r>u``C)ZVh=hTH1OBsS<>8SP z`zLpANK(lE`}l&=7RX>2>=soYMarH9k>>D?Hd%6%h@#@f>5srS?Fp$8NU6V0PW7pn zRSYc|x`toAD|9Dd%i7vLFbd2KwJKh1xEZzhdF@B6n}$m#`q63S4sm&SBLROYqrVEMqI0?j)S9Y3LTjN)!F zgCV*Y7O!WGmpeBJ8ZVRFPG|IE%ZI`aB#@Nv>+0M=`Ay$NNuec>JM@syeeos_HTBr8 zm!hJWlo$oL*$3>4BZ4L?ofuCJH-)66Flm)MI!h66^+$&3mGc!LI}REU=zNM<-`C!E zN*9SGU7=;TSo_h(Ok0gmv;nGu=(?|OlmjRWBrJeMp@njoIZ!*6~O zHo;X^SklNv#289@imsc$P-SPJmnbUs`=cf8=eY?OH#!^r2&uJ23i*+T?Rj(UBRdD0 zE?9J3iX*=H%oR=>0>|hpS$W~&uYyMD8iC-hFc5@Z`SS;RR8{J_q?VSFx{`WCa&l;& zai0A}jvA;=jVH4n(DI)WzHkaYJL@W>n_)Nq?M*OI_U9nI>&2717%`F*m_&Sfs%--U z5w)j>x{H~~I}<)L)s>gLQu*3k{P#?T%cx26j{d^<(A?BctNg&BohaRM+ib+D=yU3jEu=uN6vie z=f3i%1jIOPV=*zdd-HT6U3qCa91I|kH`r?M@OJV36^?u`I}TdU zN$k}F7{IZHOESrAha8g7d($YB>B-iE=})hS%e<!1n3vfcWkOXsQocd2$3c?-a!%kIijig zF3G{u0izz>lRbp`#e15ya^E|mqM{4Ep6AY{yV#r6waMw~-$++NT^62sPSym6Ox3U) zHzK0r8AUkN49OK+ZDNm(oGZ*Hq-NZ<=BJ;vR26s2Tsu6heSWGNT)L687fq7wG4N~!APOHYof+)J+iQLMB9PnfAjFaXB-IBz}WLRY8CmX>Sz zN@kTegs-sXn3fNrAf2NZJTC)61N8@g$)Y{8hqT>azj@PR^;My}t1A%36DBm;5C|-O zexuBWyrYvtwGJ>oh<9+Txv*)1 z0v?8cF#)c`n@0Uv;v;i0L+^A`QlgTc9dg*-i~MI>M9NN~UVsH&?Ai}4JZ{^KarCKt zp{6-fAU99^43;|#ncon>VT&!et>byR!A|VH{Tc?wyYpg8(E$km^;OP9b|^amqkq#pZNlgpOa}mTctulwIKgZ*|RJ6+R5}`NPyt zfMb-r&vQmb327p4w9^2XjkB09%Y1hgR;}3};GrT>_P^OD&c{15K<>gp1f6-xrNuq7=jdJDYu zZg^cBQ>L~DRY-a-gPR$&$;UG!UN5{Z&TMNjhQT3P$bEOkNYcd2{IOJIM!LYh2*d0FJNOz!=PF|`AAzJCL z9#EVy_@`ShxjU#7)n`n$(@*~VM~g%c);p_OXz3sD<@d}3mk6LTDJvbeW~>GN{>g3W zbtC%1irui}WN9^5uGc<3Q6|?q^=T301JeIA-haUbq3q#AO&w5F^dfgnOT&KTed_2yzbl8h1S&xj(-%H7XY6>i$As-;h z;9ECvJ75$I;W5>wE`YPc&n$ob+#EB{ggI7JRFVwJ6I&^K_y+_1>jig;Hgg$Y#7Qb4 zSD9iZHDI*vIZxf-q_s#W(Y4u4zWP`eJ7M2FhBt2&^smJL&0B0g#@vr#mP$;hh~3-X zzL#t>Zvq?EwSIaxW_iu3fr6AJ0>yvyZICM`$9Cb@gW&?rE`XJ<_Cn2r>@t*<$# zsyAa70B2*Eh=ajd#pHtTFyyD$P)Obrv&RgAg*U0Pqcs-XkBCVS-%dBl!PwMf!01a{ zlyHg`BG_X5g;#GLZE0}Gla39|+9K$;f+!vZE+NuA0#s|dCM^FK2*K6aA@-=E75^R> z?){RG(5)#e+nvNj1XSgFp+Z|W3cu@tID04Y5-axVB1^q-VhYKgb;U=IyMDfFmp*Ip zN^*^>=dOs1A8~b98}DwP8=27=Sn3%pPz?M!$O<)HYgx8@UQ|jx4{qj6sfd&3WJRJt zebW2)SEJ~(-uNF@UbSBT`8S2MNAE622^u{iE^Q_i?uTS$$p7{gP6ci-{<dHf;QpSy}v2ADWkDwRD0tbX4H5ML!zu)+9#)vO?sMJ z{9O)$$(6dgr(IS|c{f+zDmk)Y1=S^Qv0FXk+UWorm(n1_816=E{UWHG)S!mWnY)njeM`o?I@g40~0E0o+vAct*cE zl;8EEwGgVhYf$V#nTi#R-|?}%UHh>lE&=q9fJ_%H47E=7ojZ0_wDX+ZhmL1nPv9z; zO422qGV{9G8%+XBv4yo~+&trPd5yqpwENLK@%&CZu>;kXz@Yt0HgA)$5^`K0jRH)a z5}!vo;>gXZYUE}u)04YY(V34&t4toj+MEa9m~^Fl+Vfe|=2RDmM2^7E$Hb|NJ_wK3 z?_Zb4fSjBtPAaI~BytT#Hm4H!?EG2_8`Lyw?>0@UvVgDlQ<&zYj!3%E?WM`fe@@Od z=}L=u>w*Iut$ajg(NQotU4mJX?2s{Q$rdvCc(+vXqpzRV0d7h8#??AjGfWG$sU}L2 zI)8|tZF!vh!ef|FsZirelgG6i+v{nW=+B45Dv{)vrZy5b7VB4DTJ9H(gmCQ?r;?;t zZD~bWz(j+%RQ&hvateCHG-fVDsN{gfrFnCER`i46x15B84-EJn;$}7JOtp@ zFl*FAbaUE`olUudJ9y#v47B~id}pH7?E_&vqcr-8im`|NcUOl4-XSVTV89PjhEjL7 zi(I=CsI|@3xNSYvfF_Y%jEb$_>mT|YBrHksXeLjy-P>)`a7LQn?m~bND+Es>m!JSy zkd?GPS{@JZg!dKH{O^mo{&i163S99qS`zu3%mYQ69E2~59`yJx&z3H#WskTngG*kr66rQcy z-I$yTeRP?yu&^>fr*TFDi!0L%*$d-?w$Ki>1>@W7y=AwB<7WYh7$n=Ma19rTA|H4-?Fl9DJoJ^QRy7uY8M?&IMnU+a%(tEISkbyl>>RblH@ZH+dr#p zyj{Ld+k}SbwiisLUyb}D=&vRWrfFOpfI*-K7T^7T3N1Le+%CXVBjj}q3JSW50UnpV z{FzcaEm8F5V>Q7=zhy4K)ku6xzN1b{QnV-*3;bI^Wx|bGCnsfjBqVDd@^nQsNd$0- z?9Yv~G@ncR`m%DwyQ@`th1<7BWBM>4@p}7rR+uBoXms|qvT4hWX@4=rE zN)UA-0iM_v_tsxJ8pLpfA>CpGr z@5qExAAsXBnD~wE&WGl~r3k#rE-)sg;N--Rz%Q+!fQ`k!{{kiypn?e!5>_ZUJBCO& z$d@F8pBAc|hjATbiS6(&v^8tZF!RYkrH8OknToW zN(7aZ?(U(xLsGgy8pHwV?%H$i`~N)0zS(zs?6JGPP`rkjYtHjJ>sQ~;Sl6FeQ$RU6 zIzO-bNU7a-AL-ez%FauI3AS?JlC5lNKL@9Rlea+o7APH1epv{gD%PD{JFe{=KdCMO zhK9n9*~!O>8MJOlvc9h=*)zlCgr+E0*1jLW1ORcvV~i(qZ}?{4#yx#Xnm}|e3fiqh zo6pAR%Y&vh!#cG?ES_~*G7En~vpeP1?dF4+2?uHcv`AB*Iy;>gI0aL{e1ejY!UJEK zs3oG9VkkUq`$9G8>A=!G0z|lq`)16R;{j$eaoo{olk~e`CMi)-Mso)zPMTgU@nYw(B%C5ti)3V=LjCA$Gb7&m#F&Gq zgwMPVHgFxG|1h$l|1}xfD<;83^i;#f@M(v`tA(AizLk~D&k&RU$_2=f{DOV5jDFy< z=(K@j)U%u9(OUF#HIXIG~HDX!W!P{&KIa$gba(2F_0N_Y=pq6A|n>bqsS zUgye62AH;=vVOkZM$b#6_N>cu%8}u*pZXe?;e(ZkY~_80uGQ=tm>0jTj3a0+@;$vWgWQ$xZ zR&N8=4&vdA{?bneAlLqsSP0KQ@Wu~1GxQi(+`m_H^a~acwd&ni-F1mOvopu^Y;J%C zx1c7nT8k0fB>!qt)LR$u9ByJWL@tpe0uj)eQhi-=&wnnx;2D}Hy3L!zG!*(^c@RAW zkI_psgosGsz~8VCg&+gJVlRvIk9prf{@4(_{%FmO#{ZwU|G)7IM>X{w-`cS>WX0fL z{w0fjLuK0V!5$h1!k*C5{(l<%%f<0Q#Vx^!!C#d*4IkbOk7mJtq8|Oz1H9j+I^aFg zxhKa?^G_d<_gNd}KKb{>umAO;z%Miz=s#Vg>hj;exD`YL|Fn8)#4z~n7bE_=cS-)! ztKg$jV*%n1a6VzJ85xv-RH@0DkeV8%WBfe{Xv{#Q-F}_kU66@;ozVFWV5}o{N zhu!zrEi!_tsP{xmqYYAEd*uD_?8-;XR|zBf4b3Y{aq|Wcm>Ov5T9xpJ%V$6q zobhaq-=?iB0;qNYkHLcbt|7<;zXdsrwIdOrNk`?Xc6Nqs1~-`O%HGd8#D(TtEh3jb zfJ2+pkrSwc2JS^Ua7qAV4r#DO>97H!IM|U9fXxmn(*B$#O#+Cj)8KHuUROJ@i2lk2U44C<(2t&g z`FcHM#l_79S3d$Wi?Ddj{*OXx1ho1Mjse{Y-+-zE4l%KO+PrI-a^HwA0e6z3l&ow_ za(qa_Oejz;giBC?lB)z9Am~$~TaN`&s=%kn zjmp*E{!Gww7@p^4(QZPC=~uU&1ZH0t5#Oi2KJkcb1_T!Es?Xso4k)WVga)QSoRv)6 ziOj$DeK~8|!(bFVYZ5m-`IW6L3orqS)bhDJRt*vA>=y=(-4kL!&V;s7!Q@d#A-ad{ zt_eucrW57>iGK*F5g>S<@2~B-F9Ygb+0?Q7k|*JbLAr(K!B1Z#K*jq?Ty60^I{KZpH}G=y zk1E#1rhrGPst?UY%l~uiF0<4he830rXT9zzd|V%YIRXuHgpThoCAE(FpA_$yZTn#q1;_HMz5!p5FQX zBMa~X4JC7i0uk{XB;7jO4VWi8^6CNQt1st!-@?OzA0!z+qvQqP?OD(%xfMQrliz$= zng<@^yJk0XHvKo*A@u%qYFVh0^npKM+0A_N`IiA_+|t zwZS5vcZVw>jg5`HTcIz=t+4%#GV)#^5+t->Y_Mk9%-Z{3arQ49v|uLMP-&ED$pbY; z;EM?JzU74DG?Hs=h&XS3#jX!cKmq;8kTy4hOKE6nNB--;;FgDd@5>wXLzw3Ks@>8& zgpcP}7V!xAA`$UHz?V6Ls zzUkVDox*c=akkrlXJi#V!8(_KDw2l{WCjFYo>r?gCOSstCHOVmobJP!n?=%tZbkP_ zJCS4p6&c+M31d4Ls!Ez|DsqYpmA7%D=-4ERphpnj)NnQ)-;pX9^cix%bHa*4->=7~ zI8@qh0&(Mb${iqb#rLs(4cY<@RQDp6^cRahDnzEo%vmAr;>XXTxwVV6cetXiU0t}!0CY|t@;*%4`l56BI>o{T|S84 zob7!9ODFK8m*~|-1HX^35($5nXZ6O1?ZqTu=W49p!dG9Wf&m*t1OUC*@|0BpJ#0@* zY5sT2h;R1l;6zQ(aTNgp%tr8S6;hsppq-&>E5N|~*l-J-#Q-|=z#o{`HaIj;b1M#> z@TjJrxhpBC4d$Dypkv_~I`&ghQTY@Y$ZDqoqNSw~F=|8ORSZcF!0#TblTTxDt_DOY zI%^2ED;&kZA_wQC0(U%8CX06rsNgfcta#3M0e>WpwM-7kYKsJ>f5+BrS1QRlA~66m z?!wCUSEDapUKrftQoO2v3pVcOPZcovW5Bv_r zB)?#>Pk+^d4=L-2aO50~Rfzc#U+Dse>EYY`zvD6EIej_~`rcR7EsM-cF@Ssh80!fd zJ^*ezh`v8R*6Cb*EeRGa76hPsD*_lXxdnNFPn1acfqEd4KL+8*dg)1iS1CD5dGHzO zar=rN|7?EYGa`??g|jX00=W7(H#KtLvLmEv#rKBQs>mv{6QG05i+HWZTTt7?+SnLF z>B3XqGIxq3JAni3U_!f6RMsQ)pCbe20I!2TKMEY>Ihk3Lc4@?}56CU2zn{+6wLa%? zHyRxtF8w7)#qQa!U~n9rP@8}ZH6u&>eFk&@ou@6Jaj;}wn+Onn0pLKen0~K}41b-Y z`ZYYxNs=5NmW&;DVv;pB2bkklzZD$)Z~XW@jBt%7u;jgkZgCMILB95$>4Yk>3Q z_r&k7cCWP5A+T}TzAPkwk?mMmNC8e&EOMcrnLjQND;$4N*|)cP7xBcz#56jLfdrF} z{r#<6!=2QTVU>-PBe6l*;~Z8KrUuDJR8Z*g8UOprwdOMvRJ5qn=%La*dsu{U_FEp3 zf^(Pi)Fcn@o3YH-K431$D=5LHtX$j=fp~lJ!uc;o7x+;p4r)eYU~zSQ&1G?}r+}Ru zL;vs+6qW^H5Hzxc7i1)Ni@IN+uiBn!)2aBv$+BMOVa7+}7!vTPwT%ul!MlPZjgW0l? z!XA6Iw4kC5#~&OOfYS(Mc?d;C#luqPMrTSF40#Bg0D%7d;9(f21r&Y8d%6(*8fu4c zT==m-nNcH+&yERUxv8U=K;UThb@IwoZPN*A;zCGwkNzuEU zbR;AsA7ehQZGZO|9IaMwDEX2e9tLbcEo)qNHwq|aw)|D`9N@;W9Ez#TqTP zGd+n>lnvs6fP>L$z6D5~&vmjdzB5l1;Q|%)sB=P=94fQtB)_%BU_X69T?GK=$SyUV zr49uC(>$mhTdrSD)t+}QSrE7|TbSJV9R(cd4sunyWc;rxBV%*GxP(NQXJr=&xL~4e_px4e|lCYpn*({|L@h;xoH2f z0K8tp?T!EG{9E*|QLz9H$&D7;#*ob}THg#dTuk_pbbtR7@}L~G0r*kke{D4MRoYL* ztrq{8kZ@0?GZ_Di&+a?g8XlYdNIm8;Dn8z;qrz=l0aiBdlu>MQYC6#v-JNOkk9{eu zD@hSbQp3V~Q|GaVVwTB@POf~c%4tG9aNrck>IQ6TA|PmnXziU3=KAwaxF0!FJ|wypg@R z9jLEGy6>XJ8;6NV$w_FEl&)GmI9`aoh-sbHF5r3-^!M>d}wMV6d zgUqt;O)swUQ*X9Vv>Cq~vAv;|v+154{A$c)^bWrRAsA_@MNzSTbL~$#gV<}!g|nU< zvuW{d2CgKJ4@(4Mq8>E^lg@EKf1Jzq7YA^IvSy$n)*FdsetX5Tfu_ul%*y`oKvaRv zP$mpSF`oe-G#qm)a{kL?x&a;ZffTHP$Deq&Hhy!5<+q`-OYhw=E#XKW?#VhG33@*6 z@6v2l^;bV~odrEAB_OINTw@Qn_!NW@IjwdtLj1lmS6fBp%uepCm(Tg#8^_tUGubXS zL>YAj0|y>!MiA0_{rh?r{7ZB#)G5Seqs@ralbJ}1Wn`!3uRCdrrmg!VKz_0 z-#Sjar}@J=xD2rmMJq5k4DQ=d4+-8be!hJ9qT>yTir5tmh>;iAaKqcA9`FX8qf{jF zsODU$W!vItCA2)|a??rT-9`3^IwOm-5^7{}hSFMDT;`~I|2bcV$l!qZgIj+cbP!OY zK^{!QwhQYRRs*>fM#~d2gnP7!!MS*Gj*fv%w7>va3eV5kRP^3j@Wo)dz8cOufA``w^}0m$f=0Bcy{ zONjW_pELkFfVV@>5a*e_Nec4d)h!Si{&+eri$7Wd7BN*!Cj5@YvL#s)eU{A@sw*B> z9>0Uz7=oyvOpold(~ij=cG%*}shWZMFWSEE*$vw1LpF>clLEh{0kO&S%8z4=yy4+k zp=YuA(W+3#_{gRbd;5UGs3lMQNh$bvr#(mmVRS;aMa8~ui$plf)uvIjGTCeS+ZenR zi6@vOdOCOS6d_0VR@AjpwC?Cv@X+e`)d=mqqzIxOTbZLVA79QqV#n>!q4nrCx9GP>9VN|{AK!C zBLA41rJab1HjyTJ2S?a4YR{dn^NHBWHm`f{i4+kbIL(QSMBF2*Q5g~A>_c0%)tJ&O z!p^~N&wrfSsH6J}C-eP@$j6lLAKpC6@Y|dWlx<9mL+%I_73O{*L1RbQnJurM3J%Nv zaESB_wI#4I<&-It({Tr;>E5x>TStoM8la$Ob*Rgw|Bq{DN*y1|(0=khSf1C3st$|X z9@@u$g8o*Yn`BrkqjP3GNV&Xz;{%z)ohyMq+Pzqq_1yP99e%o3JMs+~*cdmbhQ0^; zC3iP82we^>%XwF2UxD<4*1aE*%b4nf){AGC2y)0H`o5)IWT_veuOj23{YN>(W%a%^ zVk$Wm?Ju`BA1A@$C;^7*t_(q-?a~MNd|ZaV0O^0cEd*5jrWz8+EeXTf;>^)bIk59qKIH6a;RWw zzkTapUtcOj21fkv`9enowJ#_ww`(7k&vq}jSOzon9^qkEXo!4F{?wRcC57pwjN=KRn#HJC#eF-Rt+A@df{oqg zW(x*#LDvEIQ`m>@mMOI0)Eb?TU~$$goUd%=?j^)5exLXyV1Q51H)g`f$kNi2n+3jt z&1!&wW@`OBDMQXA(sI7ey6X^CC5QInc%1?@{P!2Z)j|)}FhHVAoU(nZm}&i%=gGLV ztDs)M9_kJmhGCnSPXICb?iXYxM?UHpk-)pG3zsvrFB5FVK*Sq}!S6yx!CyZC%j!wT z>3prH;}0bJA0iq<hunm^ zk!JH96lFsRN(Z|GzYiVUtVK&KpYLj6{w%E}BhBI$TB=+{?U?;BpHT(Guq}E01%0l0 zfKWOx`8ZuR6r0=+MgZT*bGxWO%iq&C_dJnwgAZ+BUscEv?eg&@q6_j`G;l8lMMkOw z9kala91qOf-5g6g^E2Xu+2Yw-6~>g|VSS7&$KAOkhhrYk|L9}KSW7{|1!hY$ zQKbV5WE3=fsnFCYNTe|e@v{1+?&QlpAh<1#nV%)LJrGIfAKL^q&0=n@7c%RoE7Y|z zZW$}HS#S^B?29hUgu3p{$AKLT#UGg6*Fbs#lsP>jq1d?>DKN zn~MEG+|ORH9Dc?`MQIc-S)5f(LP=GzLb|tPmUvKTrvFUWmzGa>T+ZunMR*tz`Vx+; zJgLK{{GYhG98Y>p@UeWA?4Kb;SW#vFRDS)T#?)l{2Q|$Gp`)g$okVNPn5X4mTy1n| z#PHg}d@@7>gG(!B4P-c4bH3g|rBtW35K>G)L9wxh&mAoF>a4KoF*Y z`2%P{pT{k80S=GHZa!8VRMLaE64>uw0kKuIz^o}U1v5-au@;FdeE6~`*+Y8tyDPu* z#xlM3vuwHUa6q=oVTi!%ldoD&WiUaoKBwsWq2c@6kIC8mrbg-@946s;qFxMrjEX6w zk8p)*MN>eJlRwqB?!1TL-{0FoRXc46xxB}aoX zr+l%WSMaLgpzY>Os~^B74=4fjMDU_hA2}XA{@RxInwk{c;^2zgrf(i}q%R77HvcCr zx8Oh{`C_-bQh+@1lqj4|Q6?yNiaXb=XKn5I*Mi?)baH=aeV$H0dtXBlU}{bumFOHz z6&TY0D3}U4IGai_idA)+r+5l$Wq~GoM-u6c&kQS9*mSG#Y7y`gc^nVSifE0F|MoKhgq4bS9nVXW z?ImLP;qjAKszb;V`d06=GmSg7Okc3>un^A?f{FBdYc0sDaYOnDQ)a*7WY=xHGH$YC&cp-bJu6b!cFtsRLmf?b4|!^;LmZ@uAHEz9&$_2Z!G0 z5XfT;ZWVtZN8MlEe*feZ7Lk3OA?uDM71c(MRXn3do z3CJByTDq3bXH>YnsaA}RQNbZjm(BWt%l@%{B%Hqe1?GEw1_K@On)EgX5^P|V1#)5s zzD&67{BRnqSCcMg$&+5V4eR3$`J_2T&EPvS>SwBy@RQ?yp^UbG+u5U_Z`cygWq2@* zTGkGYy@#(y|4);~Y}UzPW<&=2x5k;2?wPX->duS#^FM$_X90+$d15JCW>3Hg9geF5 zNvv9!>ibPQW!nSUVAELRN#Jq3%v^me5vRW2c!{i*!frZ_%q?&>t%DOifG7+~Rv)}%opAjqNGKzJeZ}oc zA@ufQv|S)Q2{zW$j2lC|lFb;P!+M5vTZ+cT@GYd)YB8;r;P2|4N1RG}C_j zgKv=;2!XA|>)~`1GO!H!ebbwcyjgFj&E2ChuSoQBqueZCaT}+hqAdLqQqYSFdWxYC zYMt)ZA!1){ra=2JEyW|T;3|+j)>QV>ym8~+wQ~)a5Qw8A-85*tg|dl}{B1g;@78~y zc<(gSDi0?r6y=5zvU7PJn}Rx0((Cg*@6u}ch*VEhc(^st;7FRgPH4E7V9BWW_}-j- zyCndn)b^dlO~ZdJ5g$_<$XI`RYqs;c^++%w1G~@RaSbObjoybkfEaKn{ z>C%rnW~H99nB@^{Uvaahog2NJ0z;uR)87m6{_~B8rjtE$8E~u53Sq#FWd-&Aa5M*M z;F~3omwK)Kf)W+V?|?7TL+@llw0b8hx^p*D8nyXrx&qzL?2L|;k&suMR3+~b4+&lx z<(pSTZ#rsixSdVpo>96r#p!>Ow{7+)TC|^{y-QA&xT$z1!t;cG`bl}P9^7+*1ZD7N zjL*3~Q8VH{)lQ!}is;Kcqm$JowX;LEjzUDGW+1@}C#W$v=hwo_-kzd@Dzi?Ud12+5 zegFRcm90HRQBY7J)90<0!h=l(QA$doU|~LsXfSwR>J<-s(GKUHre||6^c+|p#8Y7n zvFHh&DQA0WU?4T!5!?W8siV`sPLr?OFa9f9h{EyD3&J@62|x+}8I)GxoMItLHYdk+ z??3Qp-Lp6oe*ea`UbpEexNk$^(U*Se|AZ<3pN!c4{}Ffozq%D73T+?}uSMeuJ~%&D zFw%6-9zVQ8R5VQ!nocMFXAsHa;hAgSvqyf1kEs76F@KRgcB;=NRn=Vb3i8=6*a>6$ zKa^IVJcP*P{OFsF96QvxL`RFeRvu_IuXHULcn86UK>GVfXhdm~KHZ842D9aZq_*=j zY8n=#2me=i(yvwVQg(jwLR?cD#dg8dXpkDnL*bfG`&P2@-QWKO4+X`nkitWGVwj9` zRkd7L`>WT!6l=BAh>RD>g)Js#;ZSHU4Q+Hx(reQ|85GnnztK($Cy3gT*cKdr;^umX7^`9R~p;_1?|h~CS}K#U&Qadb&MA< z17vMs4cJ{?Qd*q6vN;;-!s2IZuB=%?38UA%o0kuQ(m%vseBYU0{8^Oa2Z+j&hK^sS zrC|mdXaBT;nudw>>|wt7D_(8{LzN%u29GxOKJ0>AZw zkGY6w`cdlmk;k>-s6fQJfMs!x=w6|y^@1qd-UM62%jR8SlFWHsn34?QM$Y(VvDl4t1g;|Be*?^e0W(i-o<+n!@^ z<3nH_Yt*(Yx+9hA^p;k&z?~SK9?g zdy6|FfL$Z5o_ko?iUs|6rclP3cDtxZFqGCk;sa|r-{)WauxGlV&n=d!4PwnNrSLFp z5KfL}NehLSJ0~N>{W--Vu$p5c+w}V8hrLymS9B?JW?lApHJ$r9i4_?bP5-QGw_Ntp zBj9vDddcQH!Stfr^YOxq<Gev-|J5=@q=i~eSbp3y^(%ke5ji!(W(z6 zT)sqU&;D-7dGbrr{csv;M^*;K);$0G%vf&B^xI!5HVUK1h9AYm{jQuvu~`k|QeP54 zAlkJ?*weD4h*|>fIHM1f>{r9o7w<&4(EUS_*KHT?k)cpyB@U+t^4a5rJV1F6g)bLU zjTI;yy60rZQ(tX^7bcL^%o5@#5e?M?!oBU&rDb%ua-P^OwmHhI`aon!khbV!@v;%k z;#}8$W;cB1;o*6wudCo{$66~JIpKW#Cv)F#sW+U5j~8=W3|&`JlDa@gm;KkK!+cfH z^?ILV+kRPAR#fu*d^MUrg`mzC7le9!EO~wRm(qqsCD7m1?D5BIeST(Ly-FU7`(J&1 z3t{f%ZM~q#pX$U@awuuDpbRFZppfC3+w3AwiMd&F|7egXQQw<6QOr|sjGJa-G_~Vz zrydv-uXl78gpkBZjZHq)DYZI3Z9az#>`fcLs@q!(SlWL9#L^)UAjSC-goC9q2%iZ* z!qu3brL^$aAG;@As&hftPSRFP`Y`Q(akfRMK^&U7*|crcEav& z{aOOxszUh<0u{bz^R+|iyfL3{Z%+nqX$LkhPBPkkqCln}m(_1Lnd(=IE9q$a?UVAh z(@xZf8`h=v3{Df7_hTY@3=3}inXO&Q+0!^$3H&11?e7H(HaXH1t}O*D&SbTa5}`Ta zA|4mFnZ*U!&*$*M|9;$fJ$s$pL??FZ?DJx{=5bEovYN<5?@dNIrYu2^boahvW)aB@ zw=%*KK0X0jTZ^axI}lBJ9ydK5M#0PUT}Blzm5tCm=3C;kQSqajyM`tEA#-jopVLv~ zA}Et`L3Q6s-@(PYbK}>@NP0k2s{qxVJZHPibN8_;jYRROL!hRK7^3xS^M28uAE4lf zo(3cOv2}clCy2(uMYr>79jD^pS_c#&^I=8W8>r9No-; z{b76;Op8u_!p#`c3En#KPR(0{%|7xA&{71l6SYWK3d&eI<&#@u&y%W%LPA1Au6>-I zLq|rM%zL%1B&XNf&hc40aXNmb*(>%o7#uW~7YdGqKHLzCWn>VwrZ*D~)9?$k{uR{- z{T8p`El|afojSXd2W<-O!pG;~<#*}2^=w7sun36^?K0#*@p%j%ppr0ru%nPE(^F%} za9C^%bx5x4Vxg~;>#CTzaMR^H2h#V${L28qaTxoDqCDY1ql9MM+H8iGJ9t-qEvMag z;Ongj6I1b9Jk<7;(X*4ysF)?ss7j|n0;~kh@81o3VINer1fp%q+LF&w_?)1%ap0dy z&jk%Y|dejW1P!dQRH%kygsJbyYw_U5|}0BpZ-ke$={{n{bhU zsbYSy{BFPHOap<>c3E-mhop$diZKZ2!|>za8K4Wk&L`S8rQp{d0+7Rg4w|T|KJRjI zK|5_eWg1$U1Km6t@7seLDf-4shbf)gJe!q_6A2Sj339b$Q(kR*^#%lm1jZ*iQ0;{S`x8@BLjaKAy{K99NC5#iXZB()_Uv4i`S9wtyN}r8GJup=m*=+R;_oX8Gd3g; zaJSSa|1!v22vRb3hZKA92&)gTuY3#``T6*&nrraD9JjQk5@d|(j1qSX2xDV2pO0k( zIxsB;>7F}t=P+npR=r$$VoB4%~G(FPN zxfc^>yr9jH^&^i4+eNJ!`^`yii`T8L<5U!Wr<6dRQh-dE!t3(j%I)fny^iDERt<@6 zox|qCvKP?#qgnRYvGaZCrS!O|)g0#uFSs{(%%{u8+WJ1TOadw+PNNu~KrQp=~y3Do2gd+2l7vm`@N^~WwWECMo5Tc_99 zWOpPEm~}qGjyaZ}f$@X+^ygk1*!S{r)0Do2I;0IJb}mv(*18GEDebR7Y8x8v95chj z+aC~tb_N~L%gR`C;pb=71F_|7U*3}&eAN1dz`pg@lj<#El=hMI6{5VfLGb``rl(VbgH*0x6{bMU!$}~`LzLo8Zv+fP#SqAf#g!viPfJz^LW3T!_qkk< z*H>GvA)N||4r-LFvS-}JSA2d9UxR_|1((LPRGj| zR3-TZX#sJ+>CWx8KLP^Lt?XgCp%J~v)CEfbYY1Q7ySL3Hf{5Cb_A5+=c>4-j7yaZDTNR|N!jh}Z9Z#9Catsj%9aZeE| z^>&PM7NCz6Lqn#m&Gt*QcW>6vfs%oojF-@4z9JnnH}{`5FXr}uWFb$Sm5=qQgT3LH zItPwIV83}CkUMP*Yy`0XfCsIY65e#}g$JRf?~_rE2#46rPRz_i48o|Gt2YS}_eWZA z_LqtUg5<97ic1jD}!Npav_@q#WQFL>~ z82i?>&Rf&fZ`;xDf;TpZ)<21>EivteiP*<0B4Hn=)I8nuqtoiE~v>;Q&qc+ju*IeBDtR6<}0oxT(`rgQW5B!d~o?q3hDo(mDUJ++@ABv)h}RnmG== zIl!pd__arR=;7DmyhYNo;My~>6lOIkpr<-g1!Hb7y8mcwl5-Q_2(}YOCbDm{J7(@% z25q{HAsOuV#2wlJrte|=Z|AhNP0h@!Y*ehqysGEIMmM|G&(g;gm#@D6DqD{=88qtJ z7zPDeWvpMOV)69Y`0*iersk%nUa3MtyRn)Y?+jD}`gZcb(t}ano?bIG_4=xnidI}a zawzPnM!7yA5#{(&<+T@|?flTT*~&9Y3t%0@w;bZ^?UBFUl1MA&=ALkB)T^nhi;9gU zs$pfOCWk<-?grV7YEkM7y_f8X>7LPnbt6<-OQN(0SEd@AmU}nU0t)wsq|t4cd=d}Z zR>B8iTb#pLP?MTEVPefx!U&cShFnnhL@Ji*>9Tp;G3q-|*hz(@seZV5vAk)ie{h0o zl}tcXGBdTP=qYWsYAsc;v$*fp9551)1uW8l3p!d_kOVXmZ?}LOmYhKN0shrd{;J~2>7H4Gi!NaS& z0M~wb`P`?Zy+DcEe}ogMSwVr3h?DA>>0DK{qX3z@=Ir+v6X902l!?|GuRXE(CLdu+ zpKC_2z9_bPkz72S8sw{_k;Jdk?xxWt4^U%2ghT?3JNC9~lJXUhe6+N+8hOyxcVAUC zse-CuWm(v_?&Q+Yk_u)ZI;^Z<(XF&{uBx%bljsf z%=OjAKAZ9dmQuiW!)RkV^ziw33G*UY5!x)dc!FriK}4i45;ESC<0z0?sB;kK;%>zV znhmB*;xL`epG|JN0aFB!;mAlV91j}9;uxf*_11s(!o;7;VP8+9{4QO*^+L=%=5fNp z@Gw6cjTkNMcTg73{V?BHa^*|@-lsV%g<$d7vBp$ZNo#8$IDr8gHbvkM|H&;=a5xFr z(=^~rKj1GZ_^@ISyt(1r)#B+Bzw#Lp<@n`K^VF{FqpZ)`an!46c2rk{z0g=}}Asz19wbd3`lRH-j zn(x;^GOvjiV88C|fx}y!`@07)-LbHa9ZdR#bOZN)UZqMLo)Pe zs9mf-Mn$}f)%`9uHrA-7uJ&l)WYkDrZZ)YO{ZA6@sMDbSMRa;{FE|f_T;O4hQ8C=q zaRutON6qkuB5~1;8)?;kf3}W5w2pft=WB1^ccF_1SxcAZ{)7}2GLI&v)s$XLA7Q>6 z#{6k{{pi|bGlmxrw$N~rUwvRhD5)~Htgk1=dEI;s&XWe^k=;@@KacqABhe!l!h4K~ zV;hx6X4~`^5IQyXlk<)07gSY7%O^8@kM3jNH~b>UGBz=ma68hMyK%EK*~Q28Ueu^_ zCk)Ezlcu4pL_>CRk|9YZIr8%PRaHHPXb?%NH2IaE#LD5ombZVnGRukt5VY%yT{cWy zKB1QP1+J{jH5Jb8aMEj4y4Ro@^pPrk`0xwtO|+nBst1&%;eRNoZ`8L>R^f?BkB=C) z?B3gM;qEo9TRrhSl+A=y9ghVVME3PKvIU+LcM(cBoaNBls|X_~#J4IQsm=#&R z@}MnmZw~V&(3jMCW8tqJo6*xQ;ykOp70Fw3xV*X3V62B&&)RZbvM2F5rr&hiHIofk z^AzjEfimnG;M*40ZX#x6ofh17=9M?+qFnlj-HK7lYbaQ({&qvPyJ%Px7YScecxErSALmSetKo)w^mX&2P$s+>{eWn~F3Q z@(4cQi3mGF^L1U3(6DEKx%=HFM~`~~pTP@p`n1c2ksrQn^$8*K>$}VErH3nV7rjpu z^OYNT3StQ)^t`C5EFecx85cQ)=b`{$xxLAMb%H(_vrSyr?p+*}r zRqj|G_syzy>i_%KvdffDp;0!V%mhDV+_ZP^!{It!ebr|dC*eIJ@1$b6ZFSQ#i1;-X zVi-|HY|T@W?Z|9~`!?KAL{&U-%)V_o`O)v-mLnn>Su~gmu&I7?b;viv5jG_#)Da3p zk?)caAAPf}{^zv7_f#5*h|@5>n+7K)Sk>21+^#@5o5`7r!Zsv(Y{n}(BoJMWB>q!R zG7U^j>g+>_1ShQ)7^sZm6_Ir_F$~U6(e-DK7{n{Et9wEiU!LyPyesHD(Ax$%c~8TL z@T`*Nh?LBAl1Yn-OXHabUgyvD8FHoc*T9S`?=OeA7%#Hurz*GQ6~oe}!dRXa02eoKE?Q3cUJfTl?d!L#yC(f@b&oWKyT73*9L8f>IKsN9Ql|2avgP1F z1=zUqqeGy$yVKhf_N{x$JYLO9Ysy?A1n@^*T3M=0!tm4o0BQhrlK=n! literal 0 HcmV?d00001 diff --git a/doc/doxygen/chapters/images/eclipse_installer.png b/doc/doxygen/chapters/images/eclipse_installer.png new file mode 100644 index 0000000000000000000000000000000000000000..357366ef2dea473a898bdd1c51f30300889432ad GIT binary patch literal 85596 zcmZ5{1yCGK)GZ+p2o@YdaEIXTvS@I3cXxM!yF+kyhv4oG0RoG=ySx08@BOdp)tiE? z+L`X2mV3`VS2j#eMhp=S2Mz)P0`Z5qumS|c2N&Q!7WNZxr&UPu2?D|o;)k%HlH1Zr zhieAzkn_fSB~rq*)#L}xkEDV>1;-0Y#3l0Tb_ti`r;=$$4$qwUmbN5{KP} z-_4^J2XN7VC}a74GCg#pw2{*jVb(or7>%(KR@QUnYgEQh?Z+wTTrIU|(Y^S*e~qC+ zhcn~Lju{*rOes)6LPDx0M8_+mKR(#xr|do57le!s2Mrel2?~|e zWTm?F=rBzLgJ4m^pq5@f109hl6jKmVR7_sC2>E$qLq>d}9J;~j9 z-_x&VekTV6^p$7pVjlXwxdq6lrEj@8LkEWQ;}(}P1{dk9G&(}H0WE@v1OuN?R$Uid z9_x8bh>Zp2joC4m3R=3oP{q7U>D zld%;ecUj1OUW;UXAfY4|^fT-Fuo-QD_smC0JzAzK6!>iYttF4-8g1CWnxs`XBD4aM zD7I8c?c+K`{zE*Qa>Fko#`2jNY_c;nbmsc;abrgMD}g<;6JTfd!yqOiLMsK;FKvXk zG5hP zBc-M9x819Eq%-7R43PSzP9V1pRvC;O+uMiHJ^oL&0leagCOs8|$z98QMZX~Cg6d8( zH{ggI4dv46W4uSwr9{BaP^hzYax8e-9S;Tz(+haGxRZFQf94}Kt3~?f7a=uDN>X0g zH3cZ{4e_3s9G+Ux_F*wNT_G%ohI%jryeBA-$JUUBI%CW_QLi`eNs90N=%YPmP35_4 zMWbKP)n$lT_$A&~hvQ&MWnoA|gQZ~yphZ7SJFs3Ye5v`=8S=~HL&K{_#jCCW`Ii9t zaAD%&ED@O(T=fZmb)S*+2{eRR=5<|PsJU)d66)!;~ zRCkeAyf_5!UzWA1xAT8QD;lEAy~1#v*Z6~i3~xlnT4%w1KCDYVjpw(2k`!7gg#V1s z zSt8WXVo0JO(G%3T-!M5JgbL)LCPJJaEMv3%6i_WUj`flS97&>h98OG!E#@(ARG=-d zs9@=ZZq~v8X;ga6u(@@j3Bt$N`<0B-V8vpx8BcH1qM#*b7zUCAy(&L;`78*3hr<^Yttt*uJO^+b;VhMtUz)!w8oV z)5%EHlm*ahx(pGL*H_~qis?JAPxdc5zZT&=AikdP9RKbP0&VWH55RtS zzQ^A`*7c5!J~LsnQV+{m4dHk|H1eYTt@C&l`1X_6PlYM82L{kbaQpJS9-T)pv3^^m zamcB^6#u#m+3Dgy?_GQm$sfE)w9jQQj;~^IwTKjf&*dn&gneT*T)Hq>qB=`7Mv(F& z%Wyln)r*IyDxKfrvIBErthsu zdpaANP~3NYMd0RKw#(?p=qpmxF6O=-eU}LY(R0aAGPuI-wsiF^-R&c5%D0^tj&hAz zX42krW$tP0J7yzQ6O$eYR;)FF)|ctpn{|x}cx=<}poyU79}*}(u0xM?->~IbKSCHh zLF_$D5%EVi>@$ZJO}Ox7)R(M7e|A}BqdQ^}gsq`z*@5hBN7SDGEk*>D%=5!+oZaSL znv%CkPhDkoRZL!(x1lg^VV}CU^KRr!gyM)SZiWwErgB+*PIo#Mue$b_-8PtUtl^K@ zDqpEa&8sN}qE-*uy?j?JWU~2kd!cc5G0c%a79mQ)l%NN)K6u&iPj4qV`mV3ccinl3?My+(|?|Y2Gg#t_xFJA{@3obcH($(~OP$j(%aa zP!M?96IQE@$&*!kPxc_#(Ly*+R|4ayDiTuDSpXj_H9vl9s?ZxKs}hNz1$F1X3gNEs z4+l$+9iG~nzONbn`!;WF$%q&+M6f#2|A64r6%wVXQnP!d+{{~*Mwo(&hrQckddJMk z7htXd1*yZ;rn%s?D6zzKVxDX+5smj%|G6IKSK2RVqJtxwy;199iy?v4!M7KFf9}YY z8*W1B{%BdD60}A4j|K-std*9^S6VJ3x{ucTK&^LAY;eUf=8a^e`7C97WFsa&u^5)chQTc z%)SIPJ!Rx=*}jc{(34PJH`WS~yN(ZeW2c6r=D_>k7b(;tLv3JPk|LLNm4$yl z-zEU5*BpIE4=T=H=-Yq4#pip{UGJ+)%H+8`kl}l`y|=q>v(>+k{!moBXV>Ozf=bnS z+mL+~eZMkV-Tr>-#zTki{Q%AP`eoh7!z0>^C3LwmVGLtrH@tnDx9KXE-wUQtA<47z z8e8|Vw_}`)IgsQ-L1KX-r^nOMLkI9wRjXwX-)=nI+VH-KklhD@295CwR)I2fnu5?9 z9i%O;%WNcu!p#Srup9?|qrYe(6!i&q+k0Pf&&~gCb=s5xLg4fJO)<5fBjUULG!I2}lv2 zP^rfPe+u+ak|BF`g$+j$G9ts$@XB4!w+OSj5JB<#I#Fc5gxLQ1gS~0!`}mEuQnO~e zCE(?`%PU*( zG5fnjsq7akT7AE`hWn6}kkVC)3`El#d2g~Y>d1zE*Q+ypU zQ}4%L9(LA!GvDtozf4arMnX4of@s7$^UXs0bqXk=y(V;*+3ZC^wh&Gp(6|1u>z{ny zspH<<01IlUf!}HS!GERL-MOh{ua`a!&b_1g+zo|>nQnyh3lD<-dQBBpZo(|pRy|63 zVs2P|nzIfu;lBol=ECTke5u=~`R$TRD2gw1@^*GZ^?Z1t zEUom1P$Umr57N$@Z?x5t*%zL){feRhB&4nrSR%KV%b~}`DIb?5r;n&T#4(d)-xU6 z);kU&Z?|acdm}@x^*%!U_iq=UJv^5m@?$z;jK{c-T&_BePqo}{xt8aB(yVj)f4*xf z%*?I!p1<_0{%&L^r2XM)`yImfP4_*D;C;g$5OHh*$r=yqFRtyEI4s%&b)=x~aOG8T zs?^LM*B!RJKZ~fn9%9x9l2oh9!Cl4GQBqt}zFvDFJhu<{XFg!_-+EL%7=Enh!Gnm6 zdWo11X>lFZA_!hz_C8r@x;5y9{Ah!uuv$p_Lj2lLCj*C{_TGNMt%|Hsy%ngN4E?5Z z(1IAft{U;=$%web_TByl|4;j{bJvGlI?oL`tvfEzepD#QIsk5>!L6sy6?-)*T`;XS zb=Ai-8C)D0tO^ozT`vy3n-^bW97{3HPd|Ir^ZQGnI&OUx$NO0ChtSwl^u>>2knA!X zC0x&fY2>YANWG;jefN7CCA!4zqCYW|CHQ{x+H;|6AG=6DehkyCFB3ey3=Y@NDBH~} zv?K??R<(QJNl(a+YjA>K*|ZzutFkbIqh({$#sL2nn}em7cil6xQniw34O4p@i-&H2vew{X4JL1VMDQQ+m1$RgxD-Kg1O;FBt>We%H> zT;7;R(R`z|+}Z1HCAi;jUM@|+7mdMIDmjV2u^7wUxlJ3?uO>d0V9Axi_${y0x=wP86q7@ABb2 zp0D(b^P!8KTV{ySj%K<+4;7$Z%dUl8Qu>+S@n+m@;bksrSPs$D@CVD znmL-8`_)(){!8}KLOgU;IBLsLX5(k04bR3>lD`2bEfaS?5brZ4GM5{l4exs&&_a77 zJ5v3O{nUBcyA1O6(j)4~`VD=&B`=ZNBHt(c40xB5ZP^d z-ztrqnVH$BULEp-r=hO&?9r6_+ajIU-Ss-{T+E(twMzNF*7Nhh;Ny;xkHG-71-}Z0^_8ql- zJ=1pcwkb$tVHI0+cpOzMLV#RwS^bn;ebNrgI(S#4{_bNmWetE9^3XW|MeP041jJ1* ze09&s%;>oSCQz6C*+lUnu^q02s-1X;VT-BvCif&+&uxw#KH_LN^F-h`I7y2q5aGA^ z%$=%7;sJ>tOsW0**STfXbSbB~PQ3%l2I`n)Wxz)W5*~4;3|f@vS6p1uIPn^nQ;k66 zM2SL=w3)|iggya@u(91dh0+{KIJ%VbGbGJYr9aMivn;8ZnPfOokU7UsB8Q$p03!nV zCq=}lSS^83 zTJjkG%sOG9o`b>N&u`)q;!?#j)4d3NM&xn5r}Ljbc?0^is|l_#ACnj@WZE`l=6Rak ze;oB)83K3SK%BdsTQ_TQ;blsDZ3x*8>|fJVE@l>LPOB^968?=sa9(D6*vux%dKUwj z&Eg)eFMRPHW5X0iAHRqcPeA)g-|%gh{53%eA1la%k)HkQO@|ga^q{ILIv^ItAf1qvh40XU3&@z&N^_hUiv)Rs{!lbt z!K^t`Dnh9eoZsG_$%_wvKtMoAnFL)52{r<-+mBvZtd{c(?CfG(nN?F89-f}1fE*$s zen5^HIy^dZU~vseL8KznY_#G$Mr49esnQDjEj9U_uW*p_vkMj31TEv!(=unXZ>+yi zU!!e{@!W2_++KhUl;!;Gmcud6^^VTk)!nx+_P}HK!U$v-tb{y#oO5>hYwpSbt186dhgr zSeGO@|Lz3exyc>F=@cE>Qtd>Ot-AFl-8GgpOD zFX@yA)#T8M0Kb`JctK+M*~EnYx2`Agg>bj};*spV7fr}VAd4;I(z$Iob6x+Z3lzXC*>m6B8j(C_$b2&qcVo zX{@sm;$N;(pURJE?>6OsJ4MLEIda6EWG&QB0v@*ej+@m@^=!Z-F|t>%YnPjJRwu8C_^;z4VTf)ccS>Eg4)m% zOw$EVo+aYGIqQO~Wc0Np4ezU_1c=DKm-ZU>k8`E~q2Kzsn6fe`7r8|%x~E4l5E+P? z6eK#MNfs&q`XEo8R6|qVX=_A?+|6*J9%`Nj1^K~?kHDrWU5_CYd_1u0ppPx;?Xq#} z0R)2+CCJ)!Y`rPc4QkzF?t!pfE$4UsyM%+&Q{%-tV=R_g9(w*e_7!(LW@f6+=i89S z*)P8F#^1SMgI@qEH7HsK$f9q(99m52OlkcFS>`880F-B};wA8js z+rzT#+Xn>+guap(g=8{DgjpBm3T;(o47x4{#CxxOdHnZ9nY>!Up_nLUzO;>^WxD-a z+s@?8o*#VAQ|o>4@mXsspiPKEgH%Anu!BcDea?}VxNusot-;I*;`URn1iP|t8)!H5 z=Y+^NaZ?yyzoizb(Oj>;U$Lt-<&rPZe%ygf2w}}0bS(|8e$1LWFX-HNq#OxobPa2o zMtb>yC_ojoa-oTQ0tr>O&DB5ciA?VWN1g=@MdOK9ckKb-Jkgy`&wO16Vr;fRV*GGb z`f2E&#{n5C*=C)2{%`Pnn!|UN`q$^gcQ4#qo9kl2$%|((CeU#NFuDn+jxjY;4U?0) z9SWrk%*+A$*uRRx*3T*MW9nVcb&~0Ib+8V4^Z=yk+w2Q5x3mOQ3r<#wL1k zaj_Lo8+_Y}=iQkw^-f#Z&mtW^LYgZVYQ-k$>B&7{U_Z@>ghWrVZ*17L`3V@!NAUH& zedexph3gwTz3IS_jgxeKEWP}2id~y_h5GuCw(I(^GPq*s)E*@wq}l4&0tbf#JNO5m zrZDlL?tIVjniFC<_*Op^vnMuttf?xug;lggi3lCZ?s0w&V6ay!v(IjCivcftzAXvg zY&(7)mU70AAVf*aui5Q8zZ^a5$_g1m#ek(19nrg{Bx{w(Q%3Sj;J&fZ?t^`wqgLFG zcT3X3mro65jlZjP=FKAMmCdamJJ$nh?Aq}vTD8!K*|Z$gp{nv3XI^lYd|5*uov0N~ zgRW^6N|r}N6B{naoVj0xm`5I&IxkH~=^ECY*zmjlt1YpoZTYT#oXdv16^5F!Fqz>O zGujlP?N3JApFC@S$BSS?bL*M)*)IB=fNFj3o0jmI!jY_(Pd@aOK|pFWfeNNNqvdk+ zEAdWpT9N14GnXZumzjU=y3To>1ZnHcikfLnMWmVo_J#ogV4iq*cr0ved1cfI2?-Ty zwU9AGhOQjSWVlEP6DB`MQO(i@!!f?YehO$_wIL@br%aN#xx1UkG8D>1PBxxvxO?=^ zSEErVRT?&80yLM4iz`+E6)s3l8AC+=2<&X<#`!s<@8nsRariK)InDk)R_OdqX9{jqeiJntt^M`KXonwW~CVeQ9>4C zmXKPm(Sj&hW#NK+--c{2|9OI4Z$tbW4YmuSxq9$hkH>-&yfO>UbqEn&o#G7^ITqCZ zeobd_j4{ZBfwYlukEp@=TMma0FZckx9!0yY7R#Kp9Hj8o3FL@V)z+YhcTqlS+8 z>{r_PTge!)ezBeejTHtKc}I^GZk8F#YyE{leO`iu6kOlpQl|iaF8FQTR{ZyUCnzET z`r&Ke;qKP@eo)>X@8K`9z{1~F`eo0uxJ@Mp>!thjWf43hPQF9VylPe5&agbSXbI^AGSP{2OlAQrh6b~~+AeH~9Y02Q<_Zs2 zr}PD6_1m7jAdyCQaRAS@HF&J)kuJL^_3(1mw4c4AGM1-`q!MEDA_npikvECOgLYD(ix_$eiKWk=3B5J zU-r=U<%UaJEe&Aue=eW8%tQQ}>9i5k*R!s7pGMntnFfu*($X;Y&daC@_4>Wv-)1tt zU@67xW8dQK(tk=euC)Zyxty;})EW+P_`Glgf&B5fUF7IefCNo@l`G%=gzf0R71}W} zAOxKIzCZKaEg1eIG2Ca(A4TLJR`&UZfUa~$4U?LNO*9d&&-dH69_F^TL%R;S<~Rxx zWpftZZFLq65)B$;*a-7oWBb?QRG*ZQYQg4Ns1i6eq3G; zbY#jvVldPP+`J3F@q1{Rk7>nZ-JOxLv$+NKrA?%fpKNeceQANW2G`pLAK zF4I-cYd?7tM(j4aKS26J0AK|bAjoFDB+Qp>dVjWhb8}PU`|jhUUA-LNXtmgTxjpn1 z2L}WzsHli$39b@>^#@!xL`2|dzT)E-0o%N|sQ&cy1dwelx*`9MFo4GqGZau(hRf%L zEg}ya1hQIdPt6yLVdCS9PK~UsnwfEzqYB-3|{SyeZ|Jk(Wum6 zA-igqW(4eCax$h$wa&y~)VI`(3{n6%*w{WTe8VVKpaNFd&kEM+ySZTjxGOG~Q|Q{7 z&eQWVr^gNDqSbs=Xx^XcX+cRz_;2hs!V*8o6H0BNzQID7Sz8<59?dy9In~&%^DQkc zsWn(&ko=b}t2|fw(rA}W@A`%VU(NoW zZk|@4K&3^8RbO8p*ryLk!8I8y7_Po6ABcCZm=N4L`QVs!l0M{?qQ`?tr zW)>EJXPO{W3{)Us2MgxP0M|Oj<_s&TB2;@g1HB=xdfHN{OCV zTpR-I7N9SyS)EGT;O_y>NvRU`$y&!Z>Dkbs?F!ANs;b=bG7a57PAyswuWwJTWo0w- z^Mh+0p3^K*&d2k$fLZ{10L))J z0l2iZw1rM@?o_GZf342fV~~HIot>rR698D-K6-F+vh$JP0RmW6;+QdTII#*>EmPjn z^xlV(UJrc)7tZ~E{^VF$nRV~FWILiVP>}_JK1k0ZgDxJnW9sdxy~;n6V3!*i9bE_DW>wo6 z7by3SdlG==Ne-#~E|X3*_%n?IfJe>!H0UQ-)?ZcR z6Q9cH7fg~QQK*lB@iy#CGD!9Q;e0g{ZVylxuyH}1Zxhalh&C2a@9OT%A<$n>1l?Hxe5cpN;{Sx`P(l zfA&RRJcsJYL^>PKQ4zfGT{*&Q+UZ|GNo9#~0Gaa=?* z;9HlhNa<%Qj?4F;;e7B>a&Q@PxDkt5Fxth zJcSyFiMa*1k-X!xg)5LdG0;V;{{CPQe+mr zT6rbZ($*6Xke$%^kx|G$N2Wg`oh%F)gS7=IQB4kp3F%T~Bu$qGtXdU^O&!K3NX<{J zgec)=w-wPHpaUUdz>*b=!9)8Z5@Ct)i-%vC91k@-CADm@CQd64=)C1y3PEvrvb<9} z2T)`46?&(}$Fhqr&sIOk6w&KA)3`OJ+9I)pBz_n;q*+nJLju;Sb`V?VePGo^9v0vr zrq6~`e;O)6)-b$Nd@fwBGF0l@^gWoNdc3vLCIcgVb1ZH{EC3Pg8@j2PX^Y8h_+up~jx9*zVZ`MLmalZ}` zz$s9vUch*1c|PCXRO`ThTnRQ>hO+br zo|Kzc=sYnU_-@hd?7%)q=X~AVd>8myHus+=H0U_q3Q~u9ry`>HOHcP0r4vl?ed7JS zLtsUFmA)%GrA*GT>y8~wSROl9fSr4IGkEgV9^6Y)B?^`HHYQ))UiqF>?&#fDsH`2Y zUg_L|O?hm4?u#vbVM4BV^I-1=OJsaQzEo>++WON4DjYA=onbOpD}M}pq5j_biu;v{ zA-yWgRr`AqTv?gJ(5h|s_BCN3h!O`f=NCPLlToY`wNQJh|Jg2YBaEzduBto+*K@0# zCoe2v!XyJTYXI`k)DfsSwLqXj^NLMmSx^5kwxVG|GcW#Av?Z5w>-^*W2cN3->ywt-ZlC6&elD!qG+Yj;YGHj?RDNK z*@?)^(t`Q1)ksNa9I2vDxFZ3ZbK^9#xDGJ4>1`4zgqZE?=L3y-AqHH)Pag~!lv4_l zVu~_Wr_pV4m`2FasFEAUI-cT56=z~(5-pX>E6h)tpHwj^G8ryla-u&LbUoaeQ6CH$ zGCQ^r15z2mgW?p5!doc7GVNQmBbGs+TDgLjj-=%^2bcj+-0IZXPS5$O7$JI&rwBo^ zJ`*F`oG;EPRgO?L*6IjU7H2oDJ8UgZ8M|+AH58Oh) z3TQK3YMcppI`;eEBa>mI+%XY^6)}(92)*kt%Urqb}^hYkriU0 z37)`T^n?WKr8$YzxR0KcyNo`vPwnE zIGlsedv@(Nub)#hvo^1e8y#McZMM26yo>W^T7x&vg(BKJO1jGzxQ9NM8m4Nl$ncP3By)KziS+YurV40U^iyQ~v^UH!q#o zoI{*&k6s>wDV1nOHV%nWwXKV+0G1|p$CGXp{KrjMSka$5_3k(tIPS09>ybmpdo5zw zCRS$X(%^%W(>zyW0=gzk|2tY4gkFWKlc_9&hXZ7~rpbgayo2fM_bPCUko=E5A6IV6 zU1t};18zq^)fPb3G;DWhHSy3Z%)g5tU=PEg0CxP?+%kR-p%o2 z{*V|)^D~6@m5w+g5$Y3z%or-lsZ92zyk? zm5weU=%Cmv>B~9UR~XpPe#7=%@VSYJLvTJ!wB1-{(`=UsXR`U`m#42OhIj4_*V29c zOcNd*V}(rY?QbS3)*5|^{N5SfQ|J2IN$EeNWDM0YHG!asqh@_&7+lZVY1>E<^UBNK za*4_bmbcz{ZoNlpVWQeh-7)3`N|=Z^*}lcBd(d`B^yOpMGeWZQbW>S2_u8AN7C!sm zCA*0|X9PrCid6BdrM7iF_-!#?f%PLF8;|YEajA)mReG<4e3(yaZN|I+!Bd`XK2|*^ zM~$JwJfg^@gi~E>jzUzm$%`Gk-mZT21CYn6-GMo+iqY)>JE~F|;e=P%Qx;}ZEzl=L z#?TT?v|-fn{x!0vZ)m~eRkvGHPBzWukxfuXq&@5}?ssrt15r<985Xun}*0?*o zi!z2&L-XgSzlwn#e454=!?>kJUqv`K-Su%kp$VC@Xy>-%g8n85!=jo0=+(dzYM3>NBEUv#dzB~b|tN46I14#4m%>p{0VtoH5{*-)t z0sS&AMLukL&2v4hC{8u)gqZJPQ$z-oa2dv)#m}^IjGy6&tmQ&eOK9JYJFc&oO@% z!6LtjBmk8Kbd@=V$Nb({J70s-{1c_V36r3rC;W6Uc5Bt_%;M<7AM9O)uYPhYQbLv_ zJ>sEvYcl^NY1d$cZPv(v=XI8%rXZ-uQtA60^So137KXR*wmgk_?c>qd4+_PV_X&dO z#r27~CasX6V_4S^`0aLRPo9cdY;~(I7?67=61Yq30Rkjz=;Uh$B*=D#O44lF7BW;+Bhu{PDI!cq28jiq&P%Cu2QKqaTGRJZWuF% zykc7#apl(1mD#~5H2d|Tn8#(EWaY85Y4dJD2k#$Zczqx!(np1buBpJf2de^g!17e8 zqX;MbQq-Jcrn*?5T4+x~^a9@rxV@lk^Nb@AQZi>m9eJe{eD)TpMi+Vvi2-8Z_?M_D z(#kqZVo^YLPVW$H*{dVgwjBXxJcIY050y)-=*gAZJ-MqeZZH1X0Lqx3+nCbD?M!B+0T(D^4z@T9HTv zRdiV?%Wt;Jm3-NRG->7A<_N)z?!k-=QejIcQkO&5bm zxE7vP&FU#i>RmX|Ieg@+RNOR^FKIS0>44c`Z@j2NeZa}?&HMw?Q(nC04*Y+3bD*;pVVq{wZUGOGY(Do1 zqJ{?EvgcM}09E#7XFu`_kehj?!_i0&V;@+&f5A5)M+@AJgT^T_0@yR|B7e7k=ChfZ z4Pp9ZoyV+3Z|yMg2WlWSL%W9+QBv)&WOcv`1NO%hVDZ#m^I4GZjli}Y875P|}M=4G< zLOLsj5S1WpeA1W@KOe0EfC7x`>Hd71P6R)8PAolPAi$9AjNT!(`waWjzsX*YlNtu( z^IY|1$EHd;-gN(Ckm@b^taK^qN#9gzk5ln**S?9lwv?z)TZL%>2?N93r@4Q)om=0P z9-SpaGiET>7}dOsJ{`;y0%P*^Ynt(@@wW?TFSTUl?8?fO=CqJbPyU#<2mR#?7P|~< zcHM}f?GFH995TG;ld52xe8ak{vzW#4`oRBvzH;j!Hy?MUCC1D!LyK#yb&Nfy&8dx^ z_JE%|%OlH+O^gs%c$T$-j3)qy1SC0Po+yAwb5QOqBQPZk19S7Kkg2041T~g_{wg9x zcKE2LL<`hOF-R5Y%h4uNVIo8$V)t*s5yjAAQ|~%dZbzWOSAz%>2gnJih;aYF!myak z&ec!SqgjW}%&qtB3`3!YEpHdu8uOp>X0E)=gD+PMr|it`eKfe1i~$ir z&;2vKZ2sJC*yXF!g+&Ava*tn-TNqbv+k(_I6tKK)VqlaJx7{cFZA*C`%a+mw1560e}Kph^SOucfd<4vNu{M0;oe%$1c)R z>k{CY0mOR+|33bdvWC}HX~M?_z_G>lQ>JKq4iE>X{_(Bq^q(0hg{`aRMW;DTEgSZ7 z_G%G{AEfxZPvF)5H$2ag!g{NP)~`)N+6070V0kFKj?BaevfxkA+qcinPxHrW4fcrs za>#y_>{deZ=+OjR;l@8&pW2SsxB*vb0T|Om^=|Xl^Yk`kmsbiGPyc-l>sR_^Yz9?s zoAv>n>Z%lRq0#P>vXTVGT}KVfSlvI4_m8LCA={?kga;7=rnSbQkGiV$&;ngV|M4ja z2#x`!Dr6+?vw^fd;Jvp>7P}hrZ>lZOe0%s&Oo7YjE`Q4`Z2%(4)CZt{n2)~UM z4T`$pOE4x==C+6WDFy1eH^nr50vlNK>KYFzq6u#isZ?2$;$AW_XBI0kh7Gn;hURZqYmuTrdYWOtfDOpgIz~iv4ctdBI-_a50P%BUvTdY8>fFjsh<<1?K zeAl{1=NET~Dkc`86?rcJ>#tsaXxZu_`mIe4D#QhEBlmoDO4IihkEM|{mvWT2$7rsd z7>KRYzGYn)vmxf2$j(oFf4Vjh@ty>amEbg%igDK;iogOsDhGGAk3Y#X(my`h)V1q; zd%Z`ZA&dE^`1@FiiXbe@=}FMy;md4!lErF;3l~u79)dZxA>R>D2f^bitO5LxP`Vyl*T?K zf1Y6(DUN97ZT#F*N3*;wP=>7>02G)|>E6dLZ=5WR$U)|(Hte#iHfL1FAd58IBC29OISAze5`T1ndZv!`1=Ivkd~W zbtsB;Xh+`J`esUqXgaUCbxZBnNeU4;IM=E9B6JU+Le`qG1?l#mV=6{o?(U z%aW7Y#{qg$K;xj*2u zO9=JQ&IJ_20TFE=N-s)?oC6fTnd3Zqb-L^34jCZi9-4!h(AcrVG|ITy1D(5qZB4jP_N-L(3Zn7gbC3_ zpH#1*HxG{6xHM;R4aVkUh|)|#zcSX6|3aH7-)-P#!#z8Uvm&RR!tOI1Z!V~0wyGZ0rBIkLi6<`_sFip?K$ljXqE*i zN#gT`?W@Un{fehEosmNDtN@#1Dw46IlzekgfmabP?#Q8<*G+nK6<}rF5}y$#pxD9k zluS|}(iq=eLk)}WWdRj~(+jIaMKi^i?B<`97hUrBl4bLBJRXiW377k`Ev#+rQ83b? z5I^)c!vRe~IqFRa-o)xA#snw4%QZ_}sKU?X{JOVMnLD^&c+x~;&66QXoXOr_g{-k-nfw1%7FH4A(kyClKku+8zBvL zV)i2K-w_WkEk#J`#!c3wP{$$Lu>h%RDMjMS%fj%{{uJJFEgc@EX|V`bGDc-bq`aiO zw5-bG?!=tiCC82+k#+ch?5#0*>_FqHN z<_?@OFzd`Bmaz%jNP=Q4v&nQ>4z zoO~8%y1u$`GHJ${cytN!=uw|i^%~_r(J=;OQH6;=l7|jz=C|gA zA}3D`i&#L5l=0={BTA?{k(^H7r3O%cseH}GT?ZZPi}s&na%cnu{}@HrZYohy!^-P)VHanF|IS`8!CoA_|FP3xd`AL|%eoqbmXTRX3@?6Lym(8Gmb>l1s9`5D>yS0b6R6zrb>_1G;p9bW+;g_7>@I_(nQ7p}1po-LqP%XWu zO`DWF=~gF!w1vEvKLft4Ss0V-dLuYuOk05huJ@WMF`PJ(zA>$ zUc@WA+71r1BdW5~D$b-{iIOsmTs*MzH3qOk|IE}z8W}{IwPQ%T64Amp+|kiePIuB1 z4Hxf=OCilHwnZ=YQ^u*m(>2{!tl5v%sH#C3RHau3&GL!-$&vqY)V| zQp7ZtyEyc93Qt&Gt0G)?YH*NyVdmviV?$?VO^*Rcl z?>jRE@d-+!F0qoev1CaZIalb9yVzTj3J5f4veYJd9AZ8gnrW`I83kE zvZZ9!FR#|sNaeq=i(f5r&N#YcB17hDl&-J8I70Z#(~-2 zD5l(FRw_3JW=7iRm(}zs<3RDwu~G>R9bSFL@%lW^$Y2MVWawUwdaqe21I%MUu3XsA z6Ln8R%ojv_V7)TFwj&YzeTq@fs@N)d;jbIFwSUsXEoJLop%q3=LBFAs04u1Ir@Lqm zjB+8GJl|b20jf`}Uo{ijy8CqT1R9@+Mnf1fH#;L{WtQ=<2@jj@QyLQ?zEt7zm}g-7 zy^ficEYe<@uy^;?%Erg$nOf>%6Zt*eE z*x_K@MMbqxljiSa`^&{mIFjEoajRs_2QLU^3u_%IboIuutWq?j^6?Xt{Dc@r#wVUF zibbQu*x5)?YIwJ2R>vlEV&-pTziHuL_6XY0P{VjBciPQYL(|&5*C!-@9hI+Ny$=g~ z=lzkv)WPaoU!t!6D%W=y93F)Uhx`yC_wT37;w} zBiD9wb676=$JprVWw3-?G$}^lS1vj7r7zPn1$|LJlJSKg&MG6lRg+pNxcpKEYg~_C z!8Zyo{~0=QgTd#&=S6^n6pdF~v9dzChesk-QToh|bt6j}_nd3#;(+1nw1Y7eh9Af? zIk^!A+cmLl_O(P5p%=2O<9lnf(v(2hAjQ?|RqP$DMmQ3T*U<-g&A#Zx8?cadaeGgg zyjb)a=W_BaS;aC8`^MITSEaH z&=^e~*NM5x99>tf2Elx7Baa0i-LM=qODn2`^gQ3mFG_=Nl&aC$*j(e1@$*!M-@oM? zpRM;%%*xEVy1v&>)%1KH)|#-@GeC0RiNjEjtw$VDQY4MwBFKp|eaWm6&L3E%&5CE#5R@geMW=(c-3OtkSsaHN%3 zshS=%TdXqj_+&U?FJp@<$#5poQjcwX!IhchSmgTC(mgBP%^DNf*h>-z&cJ>sQHjIub0L> zcfB>QaYZ+QeHh(zL^(As2Y}FL&cK4^K~rUnGjo%|;L}@wuwB+73EWy<##XO~*X+e@ z1{=>7eDS3g%U2T8q;#xCe@8Go*faAY(cK}FI&>PFbZRDQe%;i@0jP$>h`T||oN=1n3rLAfTYLml=9AXTO6 zxC^>v5n%3RqL7(5dE|tZlgd62zY$G6fzOE#1&-`V z;qzSM``N(=Q5WiLycepB%bS#WMQDpE>$>W22YwCu7lsanrx)FLeOMeJ(C_g-Ej~Sa zjE$WaFn5{HCIhzHzFE7s?if)NY*Vmq&6@D)`{hZJ#H2#Uh&tC#zbho`=_RM&>RTL8 zPxL`kB`1Yc*}NW^!b-AtZDfW38ta{a8Dr8YLyfGTbJ>K0N&*}ob=1d3yJJ(4A-{;- zv5msY;q-!r9@q$RWZv(LsFl{Uhyei_=LKiiugprKhK!RGi{mYd-Qfs6TY{x2IVQ`@ z=Y`r`gbEz3lM}WOHJC*3t9_{Q5F41hXwGlt^FGIu79X;y8$R=~jRel`Qvm>47GIe} zMu)5E5z?axJhd3pf`S%AqsjNXHSWo1ebQDRi)={(i%m32M^$Sm6^BoxQRQcuB|CUO$kZQleD3bbM|~wqE%D|?1}%C%I;V)O&I!_PQTmXX z9*;lHh6Eg0IY)KbWObc-HZ^TY0a(KC-}Raws(1S7M9)#S&%K+_g}llxshgD*s@3e; zgyu9S4s6jJuor^UJ87R00Kfq#X8b?kqB?06EnrR(V8FQY@sB(GeJ0X&E3YoKR)kCf zQ$%&Otu)P!^<2m80J|B4r3(u4^=y=t}vud0fY;Q4oucBbGmv{Rac_16Vr+<};}y z{-4Hj_iY?f$OE-g?P^j6E6HSJ@yXKUfZ<1v;)qE}d$z|A^nMU`S~3RwGGBD>n!pmE z+u3ko9n{nkHK5Nk=?Bo_&+tFgRWrv$^tBCL07ghK;F5-%xF+f=i5#GZ@uh{d=o^`22pgsU8E$M376LIP&Uc;0YY5$Xc0tqP}IaA5&P)98COC z7f~21pH~OR`-E`VZX{rEhW{~vdjJ+6zy-8-pq#>6lo~ZgR4&`PeU}A-7#5C(<+{Ns zmv?cOj}7W^NdjDRA`T`I#ixcwN)VT{DQJbNYC_KjvGTe&4HipL2{_Ya2tYNs=0JhT z9*m&3LJ@zn`&ji{7Xvs2Q$@TsultdD1LF2T(ws4p5B-;51gUOV0Iet0nB*@j`;aO7 zqt_hV0VV>7zvWTE8gW3G{pdy$^US<}D$3Do#eK_!3P46b*t1!TYGuIe(Fb5~Ul6ha z!*jLP8lmc^!)i*J#$si>mauKMWAiVvk2vhqM-r3{1+)zT8JX-G#)IIVP=8pDP> zPKt@!CBVs{sx47pZF=eY^VE1$Jk@P{L9<*-AsRW9yJ<+4BjM){X=d(E0t|5h)oj#R z^n2obXRGZecMme7i33;>)p9W;-wXzYRN+Y4UHZ;}7b<<-YXGO;gh0d08wCGROMoWT z?ISLdCRs-3rbr9DMD`>aM@A~?Ox0JTlAeu&8_WHYl6AxY`$k>_61zt4I?Cdr2422b zoz#AI=`yr}d`Vr()gv$nqP1#U*!RG!S3|nh3k0>Mr7(IQwaXI0WJoe$^b>{rkdjkm zFgd;Hc54*`tAkMQg{UHcvS96~sOJ8VM@Q&8h3g6+jWkOQ2W}C#5_MsCUElF?$CaLM z>(J}s_%I3czYA0HwMyhkDf!v?!DcN;(TpKEB_N=N^;guKOUf<6Z5j$6BO=X6rHo6Z z#J470T8JdY*sA49nh&2Qs$(~REpC()S`|y3yH!*{0xz}~(9cJbpWOP#he6kLl`RmXSyPJM&kT>#qn$hQRb^wmMMfgy;~?m+Pfv z7(w3nXLD5+$hEL*wWGfE`#P+P!BNUKwTBkLW}xyBgy6utBJ|mNdeyRc+BLQU7zCC_ zqYT>OW+zeDs)IX#X91*08;&$my>w+6AKwJxNl;N)p9h(vBJ`Tu=$TkvGN2T1kOLPh zKpi!7*uRgm>f$L$MI}ioW^Q#@MsNT9$YC8tWWpKbPH`3wi_MQoJO!}2IP8lUUS&75 z9-&68oCXaP6e_AD6io5?XXA9*8IF)E000=QQ1fQ-uyY@AYR`n9M^w?i(ObeiMo#WM zf*4IwxJ)Dxs%@QVUh0L-gsI_8MCVE2UdX@#=a#Ep#RAOsdn>E8PleIBxF z1hHfv7@LFVU>-W~&$yP$BUePKlLs05F34BCfz1Uc4ayR!F{^PV-!=_( z0|*E3KX@jp5U|d`bk1);{sm6E=6E&p@qy3`qslRW6#8ra@O1TD{Pb41&f)bnOr2Wh zJf6U$lhUAMfWqM;Zw1DV4e*iKb}$8s6rVZagizo0+Bddb`@dI_6^A&1Up)%+a3rL> z4jmLUKstn4?eT)6!NrZ{pozLv?E->x8N-M%>TTH)H^DCG8=SP&a3|WI?oNN$BgPg! z_5%@1|3Gvr&L@e9PkHNF{qz^FzFfajd062+sD<0fO=ikH9{#)2K454ce>mxBox<;s zbGZ9-nThRVs&^i6d{}H_c5Y*zV}iwf!?!(@k|j2lRQ=C3{Lklb7CDPeIBO=~$lfdR zd~$BqzL{|!O`b1cxkbh+Fl=)BT)PM4{IxbILo}^e-cClL_MTEN3s)kqYvV=C$lC*1{yH4BfzLLeHHn1N>8;ZqA z%u}iy*pQ7)p`C4!oc*m%jm&`2U8F>Vc~DfTIa6KUOoLVBX4!V|0^~6bn5BtvN^J*L zpx0Ol#u}8f7ZHbr?|{AK)twg>KoDeM4$CpS_J_W818+mp-F5}}rI?}7wbS7{0%b@3 zp%oM{Uy|7Ovyc1TI}DuJ60C^uKZW>oq(i^1qKIH`TDZq3F^|v!`*$d@I{T)j4b@K} zJ%17wq~bw0)_a6LIb{>s?uL*wa|v^*U$ofgIFo1G3t8X0d$(hyzDrG#GMh4|mFE<{ z0#($q(w)@bwQl4EaH`!o+LhFa^fJ;0lP;9aGbY%OBD;feM-b0M#K*?0dz$ZS_8x5$ z=Czu#4}GzQ`+7Y?*cSw?zGSzhVCD`n#=Ugx!1}srkonz{kP5>_0J7MyKnTWGU{0Jb zO3NKs-V8ux{8=5*Ex>wwz5xOQ$S1#HGK$2Z86i>CtO%p?T!#Yrq|I5LKl-+8zSuM% zzl?~+-Y`M`epZuZn4n5C36$V>uOeeJogwwcMo*($?@JKLH=WqA9 zss0Yqfc$`x)LA39wvFD1@kp2EpKjRDX*>}3Z8lgoYDopY6Yh6``5{SRW?p?hpjJW= zcJmHo?ZwHbWL%#p!vg7mt3e(AHzzHu40@u2sWBZvW1HvLoX0hfLiEXSfoW0vSy71` z$7gWPrnC_O>wq{?DHC|)ifJgB?Y^cYNRR>`gJEu9u)#!ZZnjH;6UORfM0NfqYArH@ zt@d*AxDr5MKCkgi`+&6?=Q5*>Yu%Df(jdjob;Q4u^?nXCdbW^@1q?plBHbX3GA1uC zMuNpsbBjZm1JqMMCW1K3j3AzhAcmWQ!wtd_Vl>;_eD4on&}hmo4Zg+k{LJHsJfGa0 z?D%_yUPm)ZcwX(@b{I{f1*VuJWRo@jNEEHrB*nMP&Mpq`>}CY9G02_@Q`5%f!n|Os zgQC6VWcpN-1(L9QoZ^<-v~04bHZHG*SU{X6SF2=VVu1i*+1=3)ws7%Zhv5M@`M#5S zvScyZq#y_c%wh;;Hv3dlGo1sc50`en_5EsNw0od`)m?HjQ1dY_|EU#rI!OsQNnyUU z9BtZvYEP|gpnE15+-{1>D}b!_5nDo}bBVKL9o&x5@#oIDZVvF#b72s&p9^HFwQ-Ls ze?&pnp!R1Rf-JpvQR?W<3Lij`6aVJ=0V@BK|7x&@8km%+(S+K=SuDNf!_&#QWMkH9 zsEvSMNZF>vClC$@#fVXgM-#`8Q${P7#iwNQMwzF+BbtvaXa?clJLDFDMipX^ysXJi z=u0F7vBSSA%BfePY#HjpJga8tASl^){^x|9(Yh}=A_r1rABgw@%IEd3!g3hG?tiIB zC5r`0{L1HeaiH(DfeI4agA*^iCMXRe4|kf<+?1&vzea#pK+-z*`<=PDmYA?IXMsH_ zu`EI$NB|F}C<2r-EQb^)6EQnyORukNVlx*7n}JV`8U@m{{=1=u>WO}gsM(c`@V-oa z<1rZ^&RNjq7lz?^u8zpbO~xwGfbRjx^o3y@fJzTmSoD4bCE9>;6*IG0VPXi|U$C-k zwMki%ero@jf4s@~Xl8b#Z)8W5;1JW8!W@9=bXitme30%8Rda6GxmOy22}IqfcD&@p zyE)MQENf9ynUPYg51zJ}*&c6rL5!Q1;3tpiiP>htXifo=ED~H>3e>RqRcI|f*7@|f z6Shr7MP>>6+&C}+0jO^i96%YME_dTTTiN0FcLHPr%L7nNF!fl&sqBkNO?eT^YA7=; zG$@t@H%mE6&>2mr06o7abpI@l* zM<+|EdLm#QqCm3+CDFC=&1~RdK>8FRXf1$LHhdUSik>J)UuB=0MJ)O-Td5A+U(_x@8PF6p#r zz3qnWg4H3_C{U{nLf`CW-ei*mPzH4ov1WQJglcU@g;NFTkBY`8W#{oztX2MN(KN&W zITWU@18HGQR3R`$W%Y$i$l1_>9iEOtOH)^gh+DEhdQ z$;uBivOY#iPKnOwO%UGUix5q@IE#FcS zT=|&Ki@?*ui)!;T*A*$F)zaS6yUtuh!1Kz&bE4%4LO-~z1WGE9e^b#zhk}|W^ue;d z9G&WDTUmw1F_L3HhDHsxiBEqs#mLi_$N3bkLZ@$}PgeEXDCzxF)+63(1^?lV|kc0s+)-Ko$<OIMAXnDKWiWN>Gb_7DV>}gV#QVrv103U5*<4LS!i{FWL{S{ftlXfLE9~eD z#^ljTA4%4N6Kj_0bMA(TwUt5xXmPgxXcw%e#~@kj@2XQ_T}F&;kO*KbSp7%i~wBlv=4c7YI~jy2_t(Z4K2M7Q(>x#h_+V8L-*Yf z+fNfAZe@(Fl;h^Dm#VCS^ux#AQV-aNgESV_{Zh3S^%NJA`^yCJF@_igp5 zqo>`BFJco@W!sc!e|sI)iEcC0ZvXmB#&z>3TK16W(}#?V-9<5ic|O#!wvx*F@I_j| z=`~|8I%AtQafF=r>S3$g82z<`jso=WOxsk60~CvEQ*?+o?XGU@bA|wa?IYEwC$ry! zfpI3ZZEGk2ab9G6=5P1EYekS~A_)-kq+j0{6x64hcHX^9y8LWTkbQKbw1xoHQ^0@P zN`jFj#_uPJ_?lAp`dB39Qwy3sXfEy+vM{$-l`Zylv2)8`Eeibq^acNqM&bYa!k*7T zRpQB?S=za0{tav*8j8g9q{VRG(l;)WyuWg(&s5W3hySDI2!ySdCd(M7#&o*Zt<7u@ zg3+3*XKOGEi0K$p8O^7V`I!iN>G{y}*3xDY5n!`!lrx zU}aQMEqgkA?&`Ml(PjYzz@R;{WRf;WA8@i{zO;s)OkrpzlsCSaoThV56G!dv5Wbbx@=zFxF6^#RUE6iN-qd zRp5PxF4M8`It@@=x_F5~$IJWPFwzhaK*&>05lsXbH%+JCmp6HQUX}P(9IC;lL$-w4 zmMrV~HeV^lsz88_P)rfl(GldI-@eeTNK=?;A23Rg+y&q zV+0!m9SDHMvQi)N7Gv{%u2z9MhC82LxMA_uJ;8Ni{)MgPT-s#?VCd(&hPZEW&`I8X6Xw>#o zsuXG=@tV7`cC(B94*$CGb`A;FzYQ`aEHz-{0C>1wEZAfyaD8)8%4*@d(q1d3f{xEF zcAa5nFq)n}3E54+Hk+qngo~C;?unNDG}Zt=%0PPdFLi%EJnT|kouv3a>grs>zT)FM z@w7Dte3{aO{B~x^V?MXd7mWKI+gB%nV@twxgj@B#SM=DIeDWs!=fT%=Nk`j^k7Mws zHw*VeRBgR&i{hsqi$x`W&Qx_;9(3yeF3yLUYkJ+`@vnAzW_9hK1NKu-Q*_@G@#CCg z?z*CdLc-;d<)`+F@u{V6<(9ve8}LUR9TTX0+rq~l_OI+c4+NQUE>1g9S|e-s@8mt7 z+nei%@@mWc>Y#nqt^eHZPde=R&UcT`f}al6cJ!v~1&ucyMEPz|^h~=8HW!?)WRq%^ zu%#%|wjo4MuYJ$>eXL`?$`?9#-B+~#0i@A@K8!O~Zu6So_tYKGLSK)IFZ&s5dL0g- zFr`VH_7iy$l{D47SgPNq*1JCV{7V<&DAs9{N~!mB2E%&>IlT5FR1wY2Ee&m6cK!&c z{&W^RM=`F~+Mj+*x2;-;|8|V|Z`$9&z3b*GT@OTcwib|b|FV?|xNPFlDyBW>kX<>* zO<84RhtE8Tyy5w>;B}_WtSw)3u+Re%Q}Q_T)KtUEW8<>S)PgF&OxLMt%CKR5sHcia zNo5ksDFWVFWZ5w|%VtOhaq}je<4HU5SbPyLh50%IbrznYfnrSb8D+A;+-rS!Yv?=1`Tp~@#F>D_w)YU;Jr=38{b zH-9#1GZyJDeVkZsv|~;yVGGPBW)~xtk9(`={txlBDMaPt>&4Gy*Y}wo-UU=~GV)aK zg*s!arX1x_5!^si7GUkQS-PLX7bd-*8XzW_4e3KUsSYg*r z8?d!6db0AB0<+iA0Sla~t)+KCg4{MW=8$>;c}#LH*lc}$W;*Q}7e3HhV04*EUWe{t z@PRIkL5&t&&J3NA;nBPgMAG2?%=gW@)-^n~o|GIr{U5Xgo8n^|8@a1y>B4dVx!tM9 zn2iMwKOW$-!Om-h7xuAf+I&kI^8(HDC-b|w3g69_ZBK{d3aSZpP_f7Ptf#)vQCe7F zVly+uLpTE(SGR^{o42X69xhy&>BE&OBlMG7p1!*}-0jz$c0s>C+aEw%p084FcSkUP z9f{JC@=rTa{;_*{wtgE&Ns7Z-WANB#5i-+7bBg5lNvf&Y9oDZoR--(*i`hB3h%h!LtzW^L> zk1NqjBTYyTUdLJ})$2FHMt|Mcko0{X>X&<_+no4W+4VIBk{XVJM(#(Cmu0gR*3f;g z*w1cAmE$DK+ib|xY_$5T<&W2!@aj-QU*9Cjdxe{3&V`pyH!OT2x*J-~tFMU6da{Dg zx|=>$W|v`8-F&w5+MW*u%D0g@)R!Ctcqv;Ik89@}4sf*jZ>P1}L$7>So~hsYi7}w( z@V*y-GuP{ks7;Owf93h3_mE}p$kyiQ7vpnN9a;C$dijkMZ9Cw^eoXu0@>A!8pCa8* z4j{AUznQ661LTnP^4`G{t2BQ$fddYF+$6Z4@bx4*4PuJsZ;JDM*r>1Mtj8Fps4nCM z2@gQ&Ipns}RwyYmc6#p!Z)O$-*r1v1+yW94xmY2NR&8FGFPe3^h6^80@irVO1kR4G z7CZ5dT98S;GNf$UKJ7_6>tA;+d*V$m{&pX{Dr?P+(bDy#crmSAAG7?lvV6AEDu2q; z?#ubin||7UBX44=lw8o8cGQ%qb=(l!e%G$I1=zsP0lmGL1;fzOZS|N#_ehfc-G0Ru zUs0P?5XK~Z_0B$%IOmpuRxb* z-|X!3gtFXgtfj^ZXh@5reb;;L+-IfuE+$-K$1rb#+aI52Ww)4|bI}8CpX1{E#`o@J z5IEVH(0ESIc~XAW3T@f^+q>Zp+Z-SJ%;K5qQK`N$Yno+f<@HW=a;C)`9gpKzMG|L) zU`7<6D*KPVCnIS@huVKI^uyzhFcD}@OHDKMx|UxW>A9u@iw>q`b#&Tp5h;yXHe8+8 zsf!-{N8;zMG0Tljr9haw=<_8#PXf**2X4KGwFj?FM1GTfRm*11)~XyDcm8-d*tdYzy1ws+63}`aUwGF=`E3F56vlFN)jx;t}Y`zQZT%tRgSTIROvz zIz#V?KRK}%kaypdJvmI1TAS?GIa|$D`G=l<@Y%jv7GuL&Q+~QeYVkoKdW^fWbNU+K z``A;jUT^>5nFRL2D3&ZZ_+}uhB^a}`Sw6)DF`Ehay4?1=E9w#)aFt8-A7APIk{kWBZhMM2;oPX!k`H=)8c$2A~s$2T+R298+!b?uVCoQS^n5lNL`fJ(vu&L``M`u{%4jZ zqgGBE-A>M@a}s0%9E0O>GVQz65kUBQ+66YIwIw;u83Zy97Tgy9<3n)6B7@Jl>(Lre znCMk&cOz2Mu6-@4NR1`k0!31*zAUNrT4lXDNnpHBMoLRUN6u;+d7(Lvr%_oIZYbUd z2;%`RyNZ+BzHW_f`M#aS(*Yh3s#dwWDA1^gzTe2eP~lyf@A%z>Tbt z`QGY&DcfCD2B;A%aqnM?<72{C{EmOXzQin@{cX+-7?{!C!d>rVwFPIkar|cp)96no z_`Mlz>gCu7Kbg8a6*mxf@*Urt8T8}x`=rZo49&4y0>aOt)IRpR{@UO6yso4 zCQ}$b2!}BBtASOJ)FkMkuDmR9{*NMhwh#&^gh42yZ9l0A_)l}Yl>t+{0A)t5I(5v* zmOM?p0yQfeS0_*ukeI|8u1C*jyLA$-2MCEj+DLnIBu_wC09R)x8D?8h^$$U14Ud z{_&gyOaq7oJSktWv57St-Hrg&wi0nF8tqxHysi|L2Hydx z9+ET~Iv@UE%Gm~U?xu3b^FyF8VsWQAU%ohSK1Q$-o^|)WexWU4{)DhDK0jj zD$@e}E=zIQ+6>d)i(`NjGsf)4)#L3xmaC9P^_7pl_Px zy#nBmJ;N%zcYVZy3a^1MuEl6hgR@0U8k-ZmMx$>0bVXcCXBW^##tI#uIOpNv5_1BTJ z?qi@pYm``#AP4b1*aij7g@Jgsjp-6>>Y(oFBbX( z!8g6W^&D$%E)9p555%iZ^tSq!bGn2+I~pvzykNVC8!E>i``*2Oe9&tjd!~CXp#KOg z<5$T}&#^1Q&3!%hZ_gLF_hD>ydH1g_yTbZ=T}QJxb=M=I)@<@|uxaY)Yk$6}^5gB! zyUc=Y=B_5BA%s8}ugmZ&7WRY9LRA>8i%=DLQK{{OM8nIlx>AUJ5E{yMLLutqMc4`9TMF8(SPb z6qc|esn+rfWO*5J#>VUL@bHCODLGzF@6*Cdu^_5O0M_ZWW(QAm4T06G({4)@$_^;Egmy*rJI}#k zyDQyAMN?QnCbLd_pU%!Np!L;4q7Y#oP|qx^{R?w&aX7C5W43nkU@{+YoUJZniJfA59O$m{wI{@6Qg!wOLT zAGR6OH<&!=T5|pX7rrPQg8anTQQVr>}mLy4;#X+t|R+iZ*$;Tt9Wv*2= zg39dJ%HVQtDb;)Cmt_v;a$N#&;{zZVC~v=E`F87nWSmryqfVzt7tiHxj}8Q5B#Bpy z&y7D08Y4PmPv6{WTnlUkT2dbr*0)E(A9;r@^1wG-W%f(vJ4k=gn`Q?)#ngqe7x;>R zZXck>U~Ur{C%HXSV@&a)^LN%IULy17guG%3IV~+1K(#ZCHC9--?e=@>n8;^`TXP=Q z%p!`xeIz9XV?OJ12TJ=Dj_*z7*ve^B<0iBWWO_LC>S9GmMd=80sv)`!u8nM9`vBwm z;p*vHk`^FLp%V9E9Sn2Y~z*Co@-3tOJQF_?Y- zcF$4+z%E{=C)VfXKzOt3;^Cnc+ACz$W@UW%pd#OrW#cMKqH(ZuIt=jy^tPLQS`4|R zg$e88{Y5Ga+gDnp35LGbc!i79`>{6_5lFW1&oAqZ*I=I{&xc z?0&I|eFDdOKN@^QiH5`i#21p}qjs+57$pw5f6xJfQ!3XRKC=$2nW8cq+omoXLW17{_*Kq|5sbh z{p6_?ayL9W3|(@qEmb{^HO8A=39^32(F%)@@dHZfE-ayo0Q-vn)fQ9BV%w{)weCWV*Vmnt zeh&L9o*oyycGgi&+Mgb{#F1-r(O{Qu%crYdgkfBQ6U^9rCKg1*m_|l!uam}nqK#O3 zzuUG)CbHKC_$xX%I%2uM`tZ)<>^x(~N%L@dJNae(be-@ZLHoDz=wa#1)zr3!| zZMU?R-Qs<(`JdokE%@}CaM+k@Ui|tCSH|7!=Wz_f2}Gy(JNX|M>#3=oAPY)l@~=ne z+1bj5-Q*{yOura%E>o&MO8XA zrO&kYUgZ@-J;8jSDj)?Wt>_4yDC#$0KW#%<8uImdc+YoU|0F8t7mhiSfD<7dop#&j z@TIKS5He-LritVd94M=mHnHohqJsJIE6a!L4)LRP{&pxr*2A4G8A+wNG;3NNhauCR zMr}b9x;T&f9%ry!la<%KG7A0f?arvP-Ms|-)3xhUV6TCZs3WGW&ptzmB`ZVdvDb0N zwKqxIb>lHvC|aa!!Y@r_cq3>)Ns07KQ>*Awc*ABL(sKl}e#|dQo6xuV`)u0x$;OI< zwb7kBnX%<*{&=BlW{2(RAuF1sK=ey_`D(W75<|4scftO+XrdKte$PyQQqJSsxaNu7 z+69+>lRm7dF>M~-^WCd8hk4dbMmN*%59s%6renEZN-JkaS}t`el0Gl%S_iDIlF&>a ziWN@lIkW7WJyX6nw=fA3W4yKB`)96FGK|JaC_nzUD%Oxn?tBAX-o89HO`SaG8?NH3 z7&cG|zQmh8>43R1aGl}yedKjXOK>9%9kIx7Y&6%13!gA+q15u^%_PQXvil+a2fw6J zUB=8A_pf=3yX`j#@}NbVP7}^Jse(`u1NC1Yh;Hqu!=&=Nj5zGR9@BuZGV9Fs>6!`y ztC1NO2`r-`l~c7^f8NW=B$vg3Q5XpUW;KIt$yjiuWMH)kn7Ny^8 zr$dcH5+gx^kkTzqS#2NlSUf60ss&7@`Vqq~8(`SP=$yCDm-g=@TJ~GN-T}<%NpGN{~}dQ^iQErJQC% zELq`O^CA#a$FRG;j>oJI<2!@}80Lw-FYSeWH7w(K!|w}qc3EnQ;yIdqjn?3BXjY3W zqDYDn@e^Xsq>ZCJi0EQl-5UZHDn{7a6`W$}B$p1yjY2B9ae2no>j|^T4KC}m7nq_k z)uy*xlXD}vD92mtSso#r3$2)n7^kCfA-!uG8#AaBZ*mnG>%l#XqN& z?N5xbgVaQUVv-Lb&Hf6Ts~COMEOZXye;nfZzu=a`GpUYo;{^=m}n;!h{%Ch8YA(6In6-)A(wC8c<9jXc1<;QdY$We1CYonV(g5lCeymXKW7?X1Q<(b$&kBsot8IJEKr)0Fc zMCxGl8rM22VgUAgGEPR6Z`s-UQ@?aa4BH>+pB~C=<01+MDjS(T(1+v%ganZ1pSpj) zKa1(-cDbM?y1Nc$P64Tf$hfQ|dHFGSd}JO>OibxHWB|k1Go&W>orf}6R7+JG90|ht zAbV^x&e+zciPBbsvD9FZ(J%~x$X~9Igwu96zxYdG<1pTulGCTdoR@YKBOf}fI2b;D z!;Zc5^eMK46Kq&pMIUGw+f8C7qhD}qxzKR>L@$bnJWBkHtiNd>!J)A!EH>avVfIfu z`@3Ql{{wgO`g1sI6&vAFbIE0ULDb)K!_sw?Q8`D7Js7W3&G$(sR7_eh@VLQiLm!<#9U4Et{-FOFg})zb=+t=XzS|7ht(Bfy zzw+BhJCL58f)gahM`q>bmR%3Syx%fi?ut(>+}FnDzwShN1J})U*hsEy|2LEz4b5Cp zD0h-9Wpb8tu_n_tyv~)rVCfv%LD7d1Ua5sG1e}gzSNdLAi zXi8|3TFuG}<=%onzE%sUotcYlpBG(%pmmTPacH#^r`%m~&lD-5f7-=ZVjW1Q=)($jtfv@GvG&g)rR}gcBuc`8qLrB(OIBiZpf0tP}*JF zK*zMxaLWMP{UWPSgwIun|oPcUfo*7s68$F7oVUEByNrIG8DjsMO>EvcF zYWde_09zv{mfL7{;H++-e`YB?bA78Lkt0EkPnse_o7(8_@86|rH{$PIEGr`!`&)gO ze2sb3*(G~=pPGh7Fjnc}(m8JbOrM>*(Hb!%6xF6Vv~VuhiWUW}|J%@yuge{DtnZP8 z%wiSXqA=+(MacBU#92Ed-XbC<{hLZEjeEX2(Z^|grLV;F2Q`=yu(_rhwqGV*Vg11xzBE_>y- zEIVSED5fz2(QcXHpp?vTw2Z5qs9>fLeg97GJ>1gbk`j@MXE}08jv1Gv6t zam~ri1hp3X43W>&x(78FvjDWe96cUFJqXnhPDx4D0*1zE^w)YHJ2Rn0iI> ze*QdZ-fm)p=EIspzbSEeWa(B=p&~dOx7u!$dmfRZA(ACa*=<*JA2dbAo8yT|6TFuG ze7=VnK@rxGJ~Y34Lp(V-86c#8;qm!Y{na{VtVPk}Ug^{fjNgmJ#l^F%wzf73!zH@@ zM#t^s)Fy`mCm?8r$k-y_>U!nuz7I4ifT9M_2$}Yx2l8dx!zru|)Jw~zuV25$AtX$E z`pnNC-M`+LeFf@+!=t0Guon%0T9UQ3^-xeB5K@AD<)@Y}3p6~iYbAj)#`gJ3)bQTq z)YMpcUr$~_w9p|kHy2k|Rz5zT?tRKIQq$4&&f{mP79emo?L_}=df(W_<|t@tU-NEc zw%W8}?}{%g8VYp*N?Jf6WST(Mg&XKAI6693gs3e$zkzP%&{))J?pavw9v*J)OrWQK zTI{>-fhD@`fOk7NL?OoMdMBIFxqeKAdya-Ywtg=EKH9UmjB$gE$P;CMx&j;}f&eWF zh)-P{Pz7;>gcgBfNZ^Z~?20LURnYqa_S45&e6^1=x? z(c)kVLbQPHRnb=}vE*_Yp8~1FFfcLcHN-w;Py$iN@LnwU8}KnYETHLO_jvD~Ga+MF zX4O}8@MiviJ(hehr4$r)7j}U_Tx)BqLo4`?*F)ExUg{{*od&c?dOt;m5-3cC2V_%+ zh5KZcY0)Oc8a9iQzQ0L5U^U8hh>DCHL^q~QAVmuuh!KSBUKl%%C1Qt)VAbwATnxw0}J!GLo5_o5q)qnYlBZu?@ip7Ao&^hTfvfIAIpQo#mVPm5KUl*~%TLodE5? z0m&6ciutJ7$4u*XGd5b^x)TDUd5KtZ@Rjmh_>^d&sK~*aW4RMW3-Hz6G+I+Y@x^p3 zlY%gM=-91zziOH?PLeccAg7HmQY=&~SK6b`c=^=ffszNg${2cewSIoHv8*>2ETTw` z6~20YvjbpBg4F8Oon*e!*)bM$|KJWWxLYK5VhAHB(A$gzXi6R|x9cSe&ejb;tEKx7 zl&*B5_8naRS=2hfVUbFrH8wWZ{IUYWT65Y0BG7MY@+o*vI?`t9tUqplYTh{}AYVZt zL!eDRqP5sBEh;VTw5exhViK`edb^>Upl1u*k_fJ7f z3(<%z`0>#vU5*OS3;_8RFmKVs2tc|_NlR;JFplc{(h`T7}X{@Iz`^8qfv;8*kO=)9%iW04gjBjBl<>j;184y>A(yb`W&ku~Fl2=kz76t04V{qTbM%N<$#eL`g+esaXaF zxn{=&raoDQG76u&>;9e>D?D%{jg#zuarWNPT=(JsctfQn84*IVGO|Y!Au}VCQ1(p9 zsH}`q2$7K;iX>SfdnLr%Op;Bq60)+__wl|z-*diy{m%J)zxO%!Irm+>->-2!uj_F= zuE%v<0W4TPhew>;IoVy|QTpRVrF&7SQ66i`)NvnDMUY#u-t&0o7~QXmCvNVt!=kJ?@=06wbR9PhFi%AvyFOm%K1tYVP}heS=Fom^NlhUAGe}9*gxAA1v*c5sYDpSJ4bf zROI3{<)_una}!xzb#441m)>C*ZfqoOpBMhEriNtq?%l+Bj-Nlv5*GyV(-P0n$B!RB zST@taiOVyShfqGna%0K^KGbRf4hvT^Gc&QVY45&$ zhz{6Qr0!WY=kgK{1Dk{J{35MJhUV1cz?e>zKDo8dT1|>AUnRVvs3Z*ZC-AjjGukQcULw&dwP5I@+=R#y1K$z z^^|_!lYk-5xkzF!8S&bhnht4eYt!s%w_p!~H-rbu(dEJ2;y=>T({ZI{%Hh5$DM@eM zJSi!Wz`4Ql676c0|z`}bUU$#61XW|sN>BzDiB#)>0|H;~*u0iR5d9bAYp zUvc(z(k-x?n>Of+d~AHT?H8N;Z@s;jBpHcL$lrfIY?70U%M?4lDk`Mjzkg3lN0(Gs zC>DOP*2V&>g|RcMq_mVKsI{z25>DW$LNazYB8tRBM;l{%)X3;42%OW1?HgoxgW?`* zA2V)Rk+HhL58?^lMf`?ilZEf33-ZN%;v)cf4v~>T7;~DBFSVi~0mf!&Wrf>IDJ_lT zy7Le_{##zgR=4W0W}C;l{XV{aeiqugSZ;+w1Gf(&991@H zf2*bx_N3cgpy7>v=G%;nXFWao3W|zRBBLcY&d|}*(tZ~%iVsh8&SqIBsH7N;}*}?KarPk7LGP1VeYFmf6azZl(Aq8cHNbfw1z9y z96x@XjrM>LzqqPoLWr1kRGu_1Ll>VP9>J8;Ic$;p^~)3%ER}dflL`sxDQRh@Or1-d zRAH0w_)pp=Mt)X34z!KwCOOF-?(ZTQ&Ybk|3*&RXZ{B&0B>@2k=I7_}5%Vgq z-@GC5tCrE!JPPNI2!X4?>b3a^!+3R$&b+d+moP@0|1$^4cEmONHd`iVW)jVp=h#7n zj*+o3iXa7t(tdFUG8GjSyl9*oEN>ef9sPZ;L^Wd}&kBbgCPSd)c-TAr78erL$mVq? zU!NFFupe%I-m$nJ-y{(qEM(Pk`RY|4X404EoYn7SPnEV!F2bWg1`Qkn)KK3un8F_D z!;Q^-rvY@mnJ*g{s2LeCd3bo>K{Tcp;>T2<&F>k-iD(>4%vdE@3v21<__j~f*yO`k zM<*txW@H>fWHm7vO1c__SO4nO!@J`xuiLD3b4>TZonZN}%B4#&ad7~m{O8UE;#X}& zk<6Ut<&EIT%Lrlw_i*=D&3+gQ)}lH}^YZZ2VC|v4zKS@50+l>)$?EDV)v;qNq`Pu- zZ9}`Y$w{!NSTv6K!0@_pzi@R}?5DsI${eb6qw7KAh*~{6*W9W*;v!xM=b1BbEJy`D z=x4r&L{cOnbihQS8M_Zb$zv^{3!(hE?RE(3P0dO|aIuS|`qit{WPSi8rAlvknvNf; zPuD5zRdaC2Df>p)I>Y^L+UcBnYB-`3Vglk0o^VK1lwb8wNMm9`!q^BQJh$8Vw2+D< z<2e2sN0j1u9qtQH6MONn1VR@@e#oZafu@Qhl4~soVCM9{ZYIXScapbVWsDJ z;u^?I9RwQz+C#L5_QE^BKf~cMknJOwhhbTvcNH@tfq1++j2Q0?>%)o83su1e2ln4d&CPWTHJ}oVn--0`-R69-l1Bch z40%0)U;-D9O@{C!76n}}L6|L}v9a1R?{)h`G>lwKj3%|+A4mH2>ql_I_4<}hwIU)S z^6D+M(ZhJqKyBaPgq;Pn@VV&3T3Ra@YybT{(=O_E6x|00;_uahNDF zT?#Tkyc9Dhr%2q>E5O@196j^tYgF=h#-U12;9=k^fm>-0`O43*-6Fdy&H$8g;vr&D zvEf0q-wMP}8zn{Stxn#PK%_DsWm$hvZulGvrl}4P;*y<8De9RwW83#0!LdG{`!~8;k&4O5$q9>ym;hDDD<-#n?defbQ#*`+ zi)0Vpe)MC+HQCK^udOsM!8p}-BC)rY_FYTw2oyaLNyiX#yy(fF_bT%X^8l~^77yDO z($mV_!rakmh;(h=; z9n4t`2(ZdH1j|QVl}TfizZvsmv8Zp|@N2j)_)oxCWO~V1NQokgvhsfRaC8=5Li~ki zh-?0WlmTG+LUJ1n42d8P1prlbjF1&E?C0c!A{4mpcCocRRBqUcZIxKG2`B@@Y6M=9 zmzRghIXXIi=$&>cHzYeDQG=8;KR=%sTyG~DG-FBa-oRQ}#8+$@B?bdh5�>B+lHM z?d*s4?#2rb3#0g`&x<9NsqM-D$>1k+Gj)Km5c}YmaJ?bj4?lL>0%H)kV}^!I07yop z(%Y^zUr&@@uE0`VgzT)Reu z{s0kJu8D|*+riJ{DdVEwb=VLA+4?4e$z>fK_P8!#y5^)Ac>GbBxZB+DoO*i40eZ2z z(^Qx)EhD35qAdE8OnL`JU@d?u>Kcd?{Bh0Sznd?m0EG}!Ck+kl%a?aB#~C+S=Jm06 z=OF%8fBhPgapF=KEB6k(HRL!5AQKbl>OO=;vMql)(gAc?$Vuy$5q^p8&B+|E3kHd( z`|Q~>5058}ln6UhjSU$X;C4 z(2x-kFgHl@){pNK_eRDn!}7sq{Bzs1*klKBF#M{#JRWfk(Z#%33NEs(O%qQ6#)|LO zMtT8wP+o3OQBhPBt;EXm{9t-VK>GwMU6840+70aG0plQ&bLWczH<DmI6dw0YM9x32q{`F@B$$^B`GtWk z;EJ%(EbiWaD=k8XE24ypX^lG~=eyk=8Qm*>Q)IS&cRdQCsZk@s`Uh#A*7j z)FB4PhybLqB+pR4rSWFxbMFbS<^1X!8^gtnv%iaF|2rhcDl(8_?mT9sQp88q)osz4 zl31xPdqUUofo=mEnX`=dGm@Lz(-h zadzOK8{)nojV@+?O%_v7rsVE$8+H+G~ zy~QHnQI+wl_wNtHt2!BOJqctBBjriyuG^hS%;mO0=P&&jUrl+o))}atS`9XA zXtyD|-t{3S({q+0g`+yu2Raph7$Pe}C{SNv#DwYnq6<<4s$d}ZbL}Jk;vWt>!|#JCai-3p5Z=|lp7Iff;pA?oH$q5 znbbZ(PV(pq7aLP@YnJSt8kLjul)t`d-#rMo3ioxAbQgl>weznHruX(cK273W-xt?B z7WF-#dgw!+WvA>9MX^%lEUh#{o%JuK0-uLIxI6t+8Aag|yP{2VZY*qOIS-2mvMM7g ztbUvb68CaQc~EOA@NME>lINxkWsm}JY4W>wN`@kq&4m;9^qs8A+wPa+EfHjl&tB9t zM|#`W*LQ26(>32kp5)w(zC-nExt7~J&qhPpdcSs`SiScCR6tTa7F{DlKSfSTq)`x) zlQ^~DGkfxHopqW5`E+7_=acJp<@ehjMzShP5sGG?bYE&49Mr&>OEPAnrTqz}+qUb& zP;3HsDP`rX?neGcLWRfrNXP-~kml2CnVtRM{%Jz%+r-ByMwba@MB-7b#hwpcK^(v; z=B=>5nU@64ooh@u|G<2EDDFvNy)SJ$M3eAZI4MK=&f-6x``| zYhEO=wzEr3P6pV$bo1uvIMLf~wW_8_S&TOST$?v}MO>eqp*uH!&@-l~we>F82cpOG zA*SZom`&k0*+G+419Ag*+UL}J8!if4kgMOS#bt?;@v+6L-vy|1n(BH@ zy5!SG>Rd?gGdL5aQGE64)wz<+@?Zqy1lO_Sh=|1Sgls>w^qVXYM&)X=9VZ{36*5bF zVo6B}mP=&l=In}Ul;@{?lbrk*q51xO5u`6b$*!G%C>GK`jQOA9@8i|mJ34B?2l)8-jErMR-8o|9z~duk zE-ES`$qg_;k7ooLLVx%^W533uX21NWV6` zyuP}a?WG2AOD}4xOVyf$-RQo)dp#G^jfe~;SO=m6!@|M>SqeV+t4%&G>#S%UIL8m& zwuphiijUi@-+*4gbv#ozW#>Ljk3?erWOQ&a86qWiqXP)R;UXXcwysXe85t$Ilcv0S zbs_i&aZR?GSpUV_x2Kb~=zdKoE2PiHRz}qrN^>o?~XFrrwu1fnKYJ>@S1OWhOVqpCI2R(o5h# z!RsIwL;@MAM1l}2C%4<1IYgCRAV#(CZtHoFZ-@jc2(z1;o48#rZtk>q?~H+{K?JVy z$bi=Z0wvNK_atuG+P+AB9gw7CW|quzh)vnl-24fC3UCz3!snS~)N!z1JM6qXxKex? z_ECqavh&@|sQ+@eJO36>*tP7Zk2$58H@} zvB_O*QvU~kopnu^ZuEPaHo{%5^NiuD`LEDZicc@H;J1#H8}fB#f(J-!t;1#dUu1#h zfZu2V++|}!{8RhAumk+i;x2 z9#`H+l@HM#;eW*Qqg5*-ZU}qcz1{0?p6s@Mf(&`yJd#?+?+DYE@dXDsY=C{-2kddz zqaFK>FxK2DZ13+^v$EmPfpD-MD zJP9TM-c|^25gWnH1x-N4IF68dRwjaMJsP9{_^FWE?tp?GczlFS?3s@Pcq4Manwo84 z+@iP{pxCo+|2XlPpO#4gj15HAkda`u`wa3)7wEce7RU;#B z+z)k4J2lG0|4jM{n?Z&Z5*HU}m<`Ik-U2)ks*jp!SHA!FfpAth62^b4?%%3AGDKt{ zfFX__%aQ1q=Fp<_g79+o`$U#ODdOPp*Vl56ZyG^$qW*Jw{cdUh9U^3{9@4!xRf^*X zHwf4Q|0$Q(@_khp6^~ganf@a%!ksc>ycu z;j$NMC?OpZsT~<%#SIzVb4dYriF5wXt$gZ7YWsw>tt|>NkKmbL+am?erTvZU_kda* z8Rj!?*)0j%0ak;-Kw(0X@rYES2AN2yN%dYtMHM?czSi^0FWu+Gs1AfGv5x4<;7}sf z)zsF0te%=&S(ymBmzdvN(AK-|!4mpu$i6<+J@0!gB$W95d#6d_0V84?)S9)5=M z_7CbMEtKW(N?{amGowrOKcu;f(%-*-mz|yZ;X?@cM{vj^+D!q*+vly)E>|PkAbe>e zgS+)1C@5$?^ls&#mtNl@bWL?VpsVM7Z~X}5=$!*a&9M7aI`5!E-3clhnmm+J~hx+cMGD3%u}CjK8)TkR%GD#620p8+1cX&E5Hx%ou3EFA0QSWOX3Tt{)XBcgyKk< zWmDS?jvaaM+WFgO(WWnku+4c!!sqkUm4lUUtgaM1%gq%$OR8mHZ~`FM^KS-wR_7!L zWKb~n?P_8;=45bWu*a#sV(kP{A%ek~z5WeuHx1lcfh*&5a^teQTX&uvSdq=mF$0W% zg;v{&Hs+YYx+nhpNdkGUovIEpev~w^TQn;p!yG=vu`l9MA_u)LU+I2BC_WUIn|#Bd zK{2WJJ>2Iy9#4BHhO#&6&MuX(iy#t~`|cho?02HaH86$>kNRbycSuJP|WHUKJEX z8k9N_D{A`JuaAm07swSzn-4zfwtb77a&fZr?s&X2q9ifYA!-vn_XTF=I`!5t0}Bf| za%9`zP)YId*i2dxKDhiXcRKdGK-{L9DANTe@8&PAtzS--XxhOGQG9uYSeBC`aQ0hlA?!17;*FVZMF!NQ0J3Q>XEh_?bgB>hUKxF>&wYtu!0)= z{r%a_>*PN2@4F)gEyinj`{2{KrK2Q|;8wxx;$sH}4B=`(Pgx5$0Ar(=Q8VlSUjhzg z4-$0Awcp8LhEV{{t2aAIzbQcf>33Rc0*HVQo||r93Bfj2`yJ8N(<6NSN^a*;O@HjB z*1_|3h?>aT;7hgi^&8+pfOxGETzw%cxb^1n;lrqiQH2vOq894S3|5Fe3dh11*9<~( zKCIVVkU*aTWmsS>OZH2Yv%nmps@hmEg%p@rpaXKnhtIz?bGmFbT$?EFuuGE);6az? z7#I_Tv5gIPkba}i#=>;CRj^d`ZJRiYsMV)}Il}*=(lPSz#=-Ls8X6kJkw4&y5J8Fc zwp+Kj!LZ?uzkU76EZr*m-DWYVH7}HB-`>5TP~gdN4sh;3Pojhl>X7L3K~XNv3_fqO zCRRTW9XiA>C@2ehA}L9>!mrsKvF3vQ{dbJs zto~Va)7xRS{u|iBv(iZPL_VsOi2s0Ha6qOw55(3*xWuy?v#4J~jNth4jmP?u!}@d~ zU(X;xA&LYc$?@IP)JWy*L!AOIOEul@E=imG*kr}$T9+?>!d^SzYrr8qI5jmjqN!cK zQ#=~`lFRV)oi0MVG!@)RhO&j)(S!T0I_+&Kk+E#vf$DfjBk{XjAS2}Mm6~=col-eY zN}|L{fydB2O(}Sy3%<8!UG}d0l9+y`j*|>ZosLCqK7(=!akR&e6ARGL9y~?>!`raX zI0{-sC57c!AVJ_?H~|)+!RX!{fjUq4bX>O+o+WTdh7LEuTS(oGPF8&ZTO%!4&vohV!xCqgSO@-rp8mi?Z29$d!) zO>%VkV^rrCc&H+~ZLb}aZe8lMRSHIef$C&;38t}ECgQ*>L>88f=2pBk=r+$^&G>n} zOQ+kZe&9MjImQ>4=zmf6uVVMrFQ45k>X>fy(Swx&x-3q#ni;hh2e?6Xm?iY-#$Ujx zv?~Wn`_I(b4Z32z+Z5#RAIXB=hkJIAAk^e!o%mDq|W}C4fqnExiSn1Kt)CuuQ&6Jv7|^5 zyRxvdu<$QT2PER}Xf`?fbRMgep4<;?x5tOxo282yZm|D!zpzxo4DD~>Q$Wq>&Z9VpTq4wJ}0GlchoY=aQ_pO95eDeza#CvKiD zc}>;`6@pjpuFY>M=02s&q&Rf21Dkxh2o=4{ocQMkF^qMX$|UQty!jM#kxCxA5LoJu zuh})SCW-2)#C*2H2IdQIaCDY<(T$g{Y0Js$9S7%Y&a$&?m-qP?RnXE(DO#NdkVoZ4 zW`mlhJ!GBVL$4(*zzfL6=gIs~Az3CxzlUUG*)N)5`Ld?o)2eqIj2f>fu2|pC&Yp9wHBQ%y~F6hIlLb~hSRj%&`Ev+72;Zl>}WeM3|mO()x+|#IbJ&aXA*X^ftmjWm226raomXMgtEB^OlK5(b?MmKUT4H>kH~TbbTuls%mBSl{pH zXe;@zrPr=Y=uB;K@Xmw>Yn)6g^_#yJB|Mjst;7e&6nl-;tLxDHI{B^^MN+EzAA>jTV&|eSBU+el@214ZZ4)4J>jv^Xh*#;0~u`Oz@m1 zH#qSa#NGYdrA_} z#A?3aBB3VK<#CepaXY3Q28at0UzEYt*H0vTQiB1tHecc=KTYKFeXBSXN}Hg^lw-~C z@FI)PAZFyNBx~*0&0trx+#S*;ncODse-R*F@!w|=J-mv$jM9%TxM-HMDSsAYsu@3c zW?gdFOh|J5`%%+2aVYE_ud545iYpsdguB{>*tsv7Y;AH&pJE7LQMND1k89_6U_UObFg@S?&isuvUi7lP6^0Xk$&@j zBiub$J4}#Y3|25@4L^-O6WQ1G7K_18%$i)?19d`kbiFUKD6=v&eiz|o37}XQU|ty5 zG4U}>OlGK|474`)Jxc$YM&3Vc{&hm$K#~XG$+lz?DtEu&X2L2{KGIZr??!Z@)olXs?f5Puso=?@SaY=_w7pC zvR5M>0arvo?@ZU{@*`8MM>XcVPCe+_{&kx5=Z{`d`@uLnh1AmT-&t=I6n?mQODJ(A zu#hDHgP+f}&`7U)6xiAgjdf-!?)pRDxcc3w(xZD^>cg^dtMpx}nU{$iOwym$rSCDN zU5RCK9~Z-8e=YXAXfmj`c(a8UwLs6UWUKuvSX8Z!?yuDvqcZB!Vbi$t%l|a@?qgqR z=#;Jvh`4&;%fGNp=iW3!+1I3K){|qU7#^2)i-fgHZI3=Tan^;>QIkqDT}Xds7kKfPgt9Q}aRKMFJG@5LuVkr^wyR($!-&0KaSPs*;|L=4IAT^?^vPtZ|`IE7Aj8W z5cR-&iS*{Lycl*2x`(7DN?w#q`1$vN^UwH&g-kn16=90;363KUQ}+9aT`NRyH<61H z#lGdjxpt=W)g?heA2YH>{RTb4v^h$$r?I%pcAzBeSJ;h~gb6t+`72zHwic4z7bc(2 z26;V*hFyf@Jm6!_{-o^rD7>$X!KtroZ?b0Va>b_j?bO!0b?19IvpGlNwzm#S^sHZK zh`#N)`P-1|eL&GfWd85N62%$!B-DD-7Vw>~-&S1NlAO$qRZs0MOpSPpEnSTVXPeeA z=gW9Ja}r}K$w|K3hcsx*d{H4obX!p>}uQk`MRT=$f)2Xd_Bg6Zm zJmE!`R#5hJ%(7mbVyqFpu(uAhay)G>C; zNkqIYw0cp;I3NdCYLGox^Yasl^m-fC)qk&>n7P1suY)SI8F8t*T zqfDkwXw(eQM|+2W5)42?dX3%Y&zS>U6JZP=kOy{n@2~&+Vg846c>=N1m2S~}VbH7U zcRdwb%Icf2({3fcuGI$DErOP29|^JkQr@T__rGY_J?Qy1s<~Iku-yy}`I$nwtsn(T z(Jc4wlZC1WzUz43sLHW;c=XEEC=}6(7Yl-#TK9>2z6md^m|QHIsJcG4>iyDY`qzm6 znLqU0J9Ntn5}td`^G4l1X4Ooqer+qv@X{%{c9`<7a-1K$26Gl=!O$8T5rd`P?&D0A zE8PbX5t>?DT*nF(>mBq@^YR&Q5469OXsiADlU19N(b8(Xcr%#yJ8vFGE;-r?IhEgh zW2t+7kK&L5sbBK(1wBcbUIHVd+xYaxzkRLt8`c~tqkEcETNg???aV)Ty{uS3np%w1 zSU!b9yuU|{hfcAGx%(>Dqr36DFR}=1jcClIz-=(32Jw0PU5Dp;)%H$+Zn1AU^12J2 z7SV^ddu=BEi)Vs%NkEuiMqd78IAtDWJ86{tWy9y4nRguK=341Wlm_Mds`M6VT)(Q~ z18L{M!pDc>;zq6}dwBv_bBYBlkLEG>aYs}}BVAW)){8SrNu|$x68*MEd ztdbcvJ$@}vJw3FUQ$c&IN0$e~!v?&&O-KBVria_N$K!|{yW=NjiXWqI78Usz)z@wB zi^nTY2xn$gj4dTDtVGi$j=sI#X(4Q`Y2IAoRx9YV5S!nBiuGCpgqnQTb7YA z-ykcsUi=owh!WAWrj;QoG_w_6TD;9$IB#dPCcu{BS~gtn+%R26)qVOJ z72Dc_I{vuB;ta<=4{_S$2bgogP5(OVeA{BN$1Nb>u3mmY@a_j1!}q#5v$@_l$g}NR zidRq##A)ZDv)cSZO%*i;DaQ!Gker0VAAO1{qJgwQF^9Kwd4g+AWivICJFTe5%MAnd zli3+H3}+oFWXPY^QP}pBgyjZ!KPwF1k#(cg-M~ZcY~toGMVDVW9vi(;o}bs$xL49E zA9RW=U-gk@%)0)8PdN34%G+uVTANL3%IVg&$;MG@#ekvL%yZU1vd^C*VPkpCX{hm{ zYUh!a_I*xY*GjIce<6x@iJDKIg zlCD&vTQN&FnE7EV&B%?q9c&h>IBBxkouP42)ip(azSPtiHw>z;Rm`4Rs&olhA8;2E zxo`3B-rV=w!Wr?Rxcaj`d4j z9vMB?o$683pDhd22EOn8+9N{y%p@Cg;8oU1W= zusU%h!Nl>K+~j} zxi!{4gNZqAcRc&uwiX5%haL=idVgjS+ny~xVy?<}(7kU@#KRA>gWjK(x5veZ;cY+3 z=dwu;XHDIau~cbyU!$#!Jveay$m@(n#}VyLINPOc{&U?L+IFXeN7YFk%HEnO<)hS% z@$hT%AQANJuG|_mB%3-Pq{0?r^z#?r*4T+Foss4w-U@1yb7uw?zFLvX`_DJ*l3Hl> zQqgqy`^)j_E^ied)G&fgj$i$~0oZQ6_aQBLAd> zKeu{q@I05wrQi8zLzd=9`LL6vWm(zerZZtiA-!X^WqH8Ab9g*&dsaHSS%G4XRcuDT z&S-18bjQ1_)1&Q)o6PE|x7$2E>WeU~t?ahWHom5lcBKh(Cc5@jKGjK9u>0yTPG5-r zUZFc_g@1d9j@o@oXfRZyUt_DImy^d4lOp8qR8Y#WeefJUu;UM1L-F`l+)DS6#33Rs zS7I%mX)gTlDpVp|#Z2{+ic?~VgA@uY=QyR;1_vnu*<8jS%rIAe@*kwRn5nY>&>>z_ zc{q`;{)F@Gl=meh`~8V%W4UsH-g7Q*T$xo%a%HL4W%hOhimh}Jrx0bGMc>=BPXan<%Qcaqf$KkSp_OnxU&seLP{z-0nP&TRiD5VPOn@U7jcKB*-4dpRGY-X>Q z2l;_)cBR|Q!|rZZ-!%@wzju_Zg>7v9s5ppcJgL?!k=QrayzewuOG;M7v&-TQUG;}L zMA(4r_O8`^y_x^wDivlc+}xVJBt5ptRB>1 zbktnshjOQ5lvpQb=*=8$kIAGYz3Wm_4-NvDv$44(Kua?y{qsm$uTVheWSzAjIFgIX zHwPO3)+0CG?O{ekCYRX8SYE#@B`Sa&|;zqnytm@rgvR_`Gd z)%But;aQh|J^DK7VMeVn|BW^|c6tG=%&q=Xuj0EW5OfZtcWj)iEc`qp ztrO?Z+k4%9T*7erJ?BQKLo?k2DC-%>j0{6wnY#aEn6r{r)+l+zH1$J3bZQ@os-+0i z)(Cy~$NM7ED}RT}R&LOT+S_RUHhd8L$hpjp#t#4f#{&B@mwlYRK~B6>w$9<#?a|P~w)Njs>>pE~iEFdZ+hi`m;cqA)@cD(hl;oi^cmR4^=F7DqS z8kv6;R~w1J$O)-T(vLU~P(^m7Wu0b${Cn`+BSJ==%E5OZ2PZFY?VZ9?$}H%uiC0&=yKrD&?(cfc%jjQAor3|eYh(Uji)$|ZS#gon ze^hdJ6G`pBKK`n#AJ+UO+?UnX zm489mft@y8#C5c%JPAsx0 zvkZ}#(w*guR!C3EC?gNFSQY2-X6E6}h&&ae3U%KFaRy6A$1s#H(SC;s2Iwz<{vUGD zd2=w^C<0&MlK-FPMD*?o2n*Nz?Jq@38EgZBQFS@F>ZAug)h5JV7yZmHkfATSp<^3$ z?DnoMB@>gAX!wL43J(E=wi;8y#p!eRev?CnUOH2O`R5lPIMGRufNlYuSj+v+Oz1Sb z!sUbdDF#tNnEfNmfcs!KYQiY3>Xo}KCavvxEQ3a0^y_wa>!I5Q&46e+ghT*+KD9&k z_;z$_ooil9p~ofgkez2<~eYT&aQ7R9oc5Sq6$h3U~KB z;tDnDe!37`nf!HcsZCkN^~AuojFaK7FEGdN+ID{yLm`V6fQDD^iC`C%3% zPA?=#uXWNfbP6?ORKz8Z2L%Uz>*{)1Wqct;3(ZFm@DXj7*ujrR6UbR{1yFO^|Lr>q z(aW*X!1EBKp?Bne^anf9-ZBgwFFHDiz2kUBXb4By6Q_cfo<14vQ+QJlTb4S{U{*o> zG_gYu1$tDbUrM@$;nm_16BTYyVyw7JsV7MKV_*l45329byrAjI4ASYx`;MSN6)&4u z>etnM+B>cD(P@v*t)G}MqAGFjeA*Qo<=5QIq1U>ftqpiFd;HdP`cp?}b#Pd6kB&(r z&9SCaHpQ%@CZ-3$3Dn$;Me74`T}7_V)ThoJ@P4SCip4=*xH~Ar;K0!JmWR%C)C8f2 zL9?=exVREOZP_2EdiGx}5dC{BV#Gu79J7RC^fKd?up*64VB0b`2xE=}SNv2T$it>9i=S#OUcGfu8mW zR2L9uL4GM32SbDcM-XB_sAGs7?B;?P9r3W~;tP>BHC0ujYDx3Tl^S$N4-XGR{|hm2 zT4ttvdWR#LfQena2?=i@(t>A~WK>Mi(!6?A9)eY98zF>(k`fxOYm$jnR9TNErCi^) zS)+fxW_ZrR^W+d0SDoXsV*SLqz`Gyfa{rKO=b4mM2#4B-xk&m$jb;f<{PL3arNhnK zlLP{x*z_k~y{)0)Mrf&;*Z#X~PzHup~}FI1t=ioS2;@ zbq02+KmX1=xr17Se&oos;mO@)e3s)F468vwXo| zL-oeghqAJl@I096qnBgqGZn@akhH-Aw~m&2yj5sc(S{6tx!41teXWNuQ3Bc;6mjt_ z(CS!FMk^!_y^XtLireTfbsk3d%^R)&Wd)R(ZWJ1~5{1X*s^1P@Ko4HfmCU}2%9}Q^ z-kxFpto~OpQ{%BtvixO>27#Doj$J=9b{G_VLq~(_L(F<0wyVm?JJ% zR4}eD&(uMyMX6c9hwy?I2CY#wgv|W3;&2vFGNA+4(5~?6<(1^0C0fJ0pw%In&*u94 zOIE|rXWMY4`x2ZSB!Zo`*IR#3hh$@#9#O=njp7BgH(T4bx@w`=+6?2fjkBvsQAw7%TF<0ywn;!6_E%p#JM60$yGM_ETrBy zye=%!HY5n5nZs%Hqb?7gsO;;Iq#SNep6W)Gph7B~R3+B=0Ke9&IMQWKKGF<0Q1H)~ za9KR(qRrx;v?pX;@koEWDr=Ib2?U5ne z`7c59YmlAeJ?NBN^;Cc1Tb;LB|&@ZXL8 zKPg;NIl?Y#-VkIDVkU)Es-!dt5kHGB>e`hLm%^TK1|Xs!0ZrmGWbdOna38vVs7uf| zrIq%Ad>`>AJFB?ayP!vdkmcC##!%|{?m;wtfDzWuJQlC+r?CI-e^;*7XWy%c**5%&@WV@_q6D8tIi%?V1{+STQ<5 z9fD)@jCa@=ImK+2n=zdI?rHMs((ixzcY2B=lfwGgyaw9_h?!IH^X*kRs;H6q-nt{E zIk80+oon>n_o4(Og_Ti9x>ITmizsZr(Cy4pld+Uu&@`atVB?O z5$k)_?dtT9Sg)5_ODjYbz(uZ6r)Z8iA&~l0j_wq-@()Ztg(Moz8HrINSNf(Z-{8jD zDO7#sk~Pf)>8M{z{^wZ`m^X)tThY40!JI0G$ex7{e>8BpQ~y48;-ia6 z;?G|X!&oOik4M=Zvf_}=ce`owqg|w_uyU35dO@LaPPEHNYO9c_pq!e)={%tYk`0rA zt-oTkk%j%fue`?-&wg3ZU`i=Yq0@Z9$+gT$ zABE@6aK}?jOfXu;iP^6PhW_-idOp{jE$w}(PQlQS%x|d>A1C=QxOb<^AA;P};N5^Y zqrzZo))p_XVa)`OKl~l}Z1DeN)6#M>p>Ywx#96mhj~q~NSlyE)UpVjL4o>j>drZdV zV|u^SD|&l#rmk(`;|NVr^vmcy`=#ayh4ziN5$(_TQ%vmfkTBaa*EFJ*S0?K(zfyU4 z@8^H_jvT%Ip`lc(5=!5D5_ySEH!F*Ew`%8d`*a&B(rx>xvx4bax{Q=xC&-7fVo1oR zMvA|t`ZZ9tBv#CxoN_v-mwCe2+TPnLQKF&gJ3~i)O5gh1_!u&$lP=}Vq^PGfe0IcN zW4JLu_SS#^%5=q(1Jfg;=k~8uPkH{OHHepsR&^_wzR|oE#EtX|g_y3wbym_{I7Y3p z_YVi|)mw7Si~aEAB3Hy1EO@0Wl@SAPXVZ-;>WM`9A2=$v+W_jZW05vni?5511PqxwDgTW_k zPMf&7`9kJ?lFpxRB1J@0w0hfp;hJqqxxGJlc&GCf*=1|^nl)idJhe|0d;o_KUF~P zc=7P1e{sSO48>(w8O-jP2iS8qKzq}jo4F@nL55IdfBg~l z2bM4XvgnfXdDg!;6=SHLIQ4e2j~TXe_W5(K&A#RCWm)bWSVYJgo+iJj!KTe1W$z2Y z+sNR5ikK$5Bhb}t{E6b5&-B`6L!K}w<@s>NbJ78Gkn{4S_O5@yDOxHptoTna7=ee{FiH2cC?!m7UA8CW!92Xp2epYot4khmJky5$Ay0l`IIaR~yF0^&8AI&um z5MFXz;H33A*mH#?^w+qwYowjmf&i;>(!$gZqPG`>)Ik4*%TTwO`|3ey)PFB1Oj4Z5 zHsU!pZhQSLFN{7^-QUG>e6_`EGq#SY{~(3r+5*kWdgYde?_Q4R_=NFvqqY70!RcVZGSQeK`Hdskiby8q4z$UaLn3&Pqcc=suS>m^^HY zHlCMeiswe1g$1MVC|DyQ&zq~fBj@j8^_?+Rm!kCY&#T`6i2NM{(v8Y`Vnu1+7}?- z5fqRroiYrsdR1rrUHi(*#>aM*@z$)7u3Ng^+iS~4vs2oNUu{H)x{%p=jReqDbAgHF zp9qu&I~e*pzhryPLGT_sxM9Pzz0hp*T5CVU*{+t(ig|6>*H0y-^3>+>OQjiuhscOS z68I$VKEvL>R7~f_b&nt1{#W@@D&n{h3gQ1rkubR)exp{bV(l*CHEMiB(a_jXq9hRw zF>1_iZ#F&`cTGAN-hH99_~fpwuZz#ur3DQmza*%n7;3$!($}1ejbrYEiP_F zI??lpLbyzYWv^Pg_{Yki&>>~1l~6Q>vJl7OxS#<) z-A~1-mT~2o{e)Ey^KYJw^%KA@0io!*KAQUK2e-xZ#DlPuDh$0&oM=q1$lap;=t=wn z@LCr#4Z3vphp_jq{mL1Y>TGJO6|Ow5$s+mvBbIA8OM1)(`A-g>G5$~PAFfc)lRKJ3 z9;PuDWzK2i2hx=xGvt;`6~Iuzi%ES*jC$qSiIIIL++^IJc~6o;{@rx~%<*c&`xqo2 zI_5&*tM0nwiGdv_xSaB>e79nFgp~c|?9_7g&#)uEmx4V0WK@>TtS79F*7Q{UxD))K zY{P^=K8l_3;E7_-b-F^GMD9wW4f(vBPvaYhLReGNpk9+CaY0eYOBKM8oSyi81_usoKJ7nxM?;prO{IB1cZPSA8#}jW%tF4M49NZfu{aWnvFq; z!`>D&{{M2t99vG>%zRni8N^n$?8CED?TY5*hz7UD3D&UBz%Rr5aNoJwCr?Ce?fd6U z$l@1)+@=V0^o))7$@iz49%8>?S? zK}A%{5VDgHny5DY(J)~y{I||gA0RSVTWH;&H~(i&ay$)yxYs*U5 zn7b2j*zYM9D?=&2SZxjTy^0dausDKhVzOlV^6xpQ!14R(aQSooj;EpBQ;qNu20{O~ zlDv-|4*`9>bNT!uqiexkQVO5ne7 zcJ>35<#cy-qkoomW#Cj%czYZ*fku5?x&Q$Sv-D+--9J#>Y$aAEg=b}Y;*uG-N3%{!^Zkjjs&7WbL0u2oF;Z| z`Qo{H&Gn$)%JSy#Cn7(;RGOza`SEv<-+3zy%72xQ9$Sls+3lgiV6FW??{EqwOX@#gH9T(@GnpLXN zT4v4esm{;oI+9Pkd1$c`Kv|s8icxigdA(t@m*)>(op0l3iVJKB)c5kbz3jUys_qiU z))N<1hv^h(7N^G@9yqTgFc;4yA~QW}pj;YXb1jLT9FI-W$jp`GZ9AmxIsIo)7iArg zpr|$24u+(PmAzwxR;HPiSK?jznB6qW(1H^!g?wt`%KE(VWU)V|3HcL^Ck`(JtZ0_f z5EVLc&by%Q$hX=5qVZ>wlSs1!>JhdczZ~=fj=XwU`{pnUm2e%8vR4#4^u+->o?p1n z)_vmC@Qje;*fG)vS2OedYNV=d>DrlZJ5TD`UAnHxm{YD{gw%NF_P+ZZTT_o|78$zX z5qpv)Uj4LR|F==WoT{k$ft4fYL*2+U+OE&yPVn=^Cct6kA)m)q2_a)HAx+~h`yHvr zgl6adh}#W{XO%4^pJyhJ=?aQne{ZU{;sn8@nM zZRUs$0Qyiq>*a!n6$uvH^5nQyyK934)K-BeVd#%R>(N(Qch2MD&J?&>(lHMA)#hqw zJVl|uFzai`Ckbo^0H)vbO0;|4l4wD@@LL}k7I?Z_xIkz8VTwSdTo=i_k zc{Ic;1Z=N#n%Bwu4;rUilU6&n=P(_c+W1!fMz8m*vfVC|f5WT^p= zd)`(|m-CEa+qw4v=Q7T*eHpzhFH*~4*Q-@HG`c(OJf%=-Oe$UNT zhKNJb4X`#kKRcUn;nS&}CBWKC#>4({DSi)HvD%;}`K}%*KXYt(&1`kt;(EYpXpT#ByIb}RTk@c;t{y3H%vTg5) z^87_bDyL07*0MUpSrLl&V=glL@j=<>ZSsV9e8}}ta|w-?XDo$+w_QyY^7-SiP8B0x z4OEZ+f`4y#yX|0)SofD!(yXTRhH7uMh2ZmFuYebossip3=8$K6){`nA4I@2a{ZAln zH>QWbvK>$;)-kS+`ky%2Fl_71ib&9fOtc~U4fG}ZI*psdbEp_?tFYGN2 z3ih96xtiMIF}&qY?6=y@uWc@Qj7b_dsrpSrihI7#MSs^ewz45R#K6Mvf1fGqdH74V z_T{MT(3UY`$5WJ<&aLbk3>Sod5quGn90+~D2gUszV6SBuN!I?+JtuS!x$BTxFv zd_~T@-_nXJxxc654$$PWk?{+Ncp(0ND2!*DI~*^ngl5m39? zKjXeMU0*hc^VcUCU&e5bfzf$u|Aj@WCECaf`#_Rmo@!c*$9M)0e!{lBg0glzK0(4a zk+jm_us5U%u;}qJ%R?|F^6qsJl3D)hF#VORZlp%JKAzL#mP`uEOQ8?{Or9O~f1=mE zYs=~K$E#b0MZ)J~tbX{f4=4TO%&)ui@8C8Iwn$yKt(uQa$GbMOWtk*%8rDTCi0pp0 z#HPqNFLv(JjaGGy$ro=vDH;b*2dXQx#T|*EgNIVaqbk!4I-z`_H^!Z}+RNVADMzt` z9h}D(cqzI06`-NX&jnH?80m*mV%}ce!gw;%rumu(sxDM@xhchz<;IunjrSvL$CXUwieM?`6q*I%t<5(E-?n+4+sJdXJ`+spS6Krh+2{Kzh|R z3^bg6ps#Gd-v8d#P(9>W&FxCFwCk24qVWfSPm`0|C*pei)sF)$2F{nLRbr`;V-b3C zI~tkhfjN2K+4lY_J%%uOpVg<&TRaGzel|RYKolYQ8WarhTdx$=-icTJtq}uQTdXC zSb8w8D&mW~x&54}oF+Yxd2ynGBd;>~(}B>XD32gIQe~@QTWYWU$OkTRmg*UtZE!P| z$|rFl-);z9?tX`-}IK-g>obj?z#|o1yM7*CuSrM>=#kYQTBsP4BzgaMA+Ys@l<1P|Z8J z_rj2$vlxAh9QX>0lM;#%EgU(*Ju^L$x&@+|=^Z5+O7*iLHT&Dftx&I};KsAG3DyOD z;}w)wdJJ-@alPdrB>)39Boe+LBg(G$h~^$;W#xH|?9OnN9EB{u>)VZ^*gy46f45>f zXX<9o)HLTGCT8|Kz2=g*_YK-w-loe8)XeXHt!scT_pUE+`pje|J+H{^Ki#uUiFQVZ zDs94qAl+?Di){pToAm+0`XSROL5BPPNA@Y7ic4Gl&Uq<1gx&d&YD&1Wv-zTSiDXSJ zrE9)iLuZdOm3x;fhWz#!7l^jVSbw*3vNeL({ySjs>Vq#CC_n*BF&XmQrXP}oNZ))Sp zUC-TJHWlce?{wwNoH6SVz2(Tlzs%VOV)2b)>|GxUz+s(Pkrg*uq~h3z`H_8|qnj4@ z(nbjA`m`c^!9&!@CEUMg5x@ax&Kpt@5uDO^Vt*}{3kiZNhsU+UjgvC*r#e-Fng z16R5seZ_Qee{Nq^{l_%IYPs??91>Y3FGKh_e={b2TQ#~WFmj5ntc;je`6j z(uXLFN&A2%^v0Umk_#cm1Q0y`>m=QfBmsMXxMfqAdiYxgi<$s6^ z@svjwQUji`Na}}cGt^@Ka&GU)>DG_?NnVM!WPpf>8XGKB9Y0DNakYTCcsNm#-kp;e z!o~i6x_!l|hZ8zjbh}?fXHeBt7wo+ELL-h^r(ETMQX2XreRJCZU7NjY%@&FNd2>+; zeE&MpZ=oyf_M-whSy#Ba>)C?@$wD64|5)MNMP|2+(zH_@A026YCn$7E*`QNu)Pm#h z^Uj#2tkU0XJ*gak1{Q zAb5{Ji!jesBg&MRco_~7S1YTP`pcBhIN&Id%pSO=38o&bFa(WH=_fqRI6*Vz*GoV^+Ic?PH*pj^%q~_Trda;>4f8J;eq~9a z{h-!%@srlsAhWxl%$d2b{LyuGSfXo^c6}J{&|TZvyL7sX%)l1Oetzxn=exs-$K_Zg ztdx|fc5XipjJL0`P%&Pd@VV)ef=}bQOzQn8FZsAUhTWY5>!lPRx*!cIGtUZShSkL)xWd? zfGSCcr$UQ6x@+jh{-))%^-Rh;j1OvZ{+6#-K>a&*!Xf9!8Yqlc3UmbHuz>j`C;$Tj zgBxm*)$U#e1CvU;_vxJ*Z=YcFZ&S;IzLYn8cokR1v#n=8` zUEPV$&~6YB?=~73=#m__2L%;Bn>sL9)9W94p5N`181u_~--l#R8swW(agSvZ-?WTo zlP#AP7GM)1ucz03NgBpe_01bDju){dQ6iQdXhC$mRwX~EO8hoCf z#Xi0Oh(ScB@ugkfm~Qch^)`pV<*7IS)e>=;&!`YcEO`n*jC& zs9f0RSq;?2@G1cE2binhb7Y3_j{r#l^Y-f1t0Hp~D7^Z#Pgy6MlCm2$!{zf`Toe>P z8EMH|bOt+4kp&BXwC~#H%ru##05E#8Fu!!?xv^&B+1UwQTYx<<_pox~ILrR%6baaz zlx8|V|0g_hbhW>RIu0ABKnM}v1V{(Auc^H~-2dtU)qxZdp(o5w$kzi@2WSQq%Akft z*hc`J)+D{cO91!q!#{tlAhyg{UWRcl-U`YhAV=^%pwWhVf^smVWuTG)DJ&QxK)LCv zu$4BUW-lx({0&0Z0R6>X?w|31Z4-vs*z7=-KqMbJe&?kQ*NiRv&o(9fo$&thW~hoo zAI09D?|&M|KrX`V2cHe0>%l!EGaf)*?}COmv|b<_3c3D|rm%hy5D)-e1L#%oMfb|>%V{N$nlJ+r6AK1X z1b=|U9Z-Idv#T?5&usabAQMMh;_m500?_1IkN+Z zDlUGn-S%-OpLaJ5IV`vEudG~TmjzxMHw5K5AU0D{ZtL=XhWe6KzMh{yUIwsrfD~a@ zd>EdC=wWD`1%wkdCiCkoXmeT{utSIbS*{pDR108VAUPop1-0gIc`T+_oi@BL=%+&< z4fhuIHz7vs@B{2pIcS#xas%aSNZmmo2lz5}MdA<8kpcz++ZUJ#JQ=)Bogb4q4Rom* z8A!O7Ix4`cjM7oMJbXc>gwZ=Yko;&LiU0AgZl0Z0SFe{jo4w>%y{t+x-K3z%A=tOb zt|*&%CD+w@E7ULIPw<`i!og444e%O#Q(qpihki2@vmn_-tQ3gk!?@?`Gl}y6u%@dd ze1dUazNM%gasq*h)6#;}47e#N08(L367rwdgu^HR5`jvT=U#U=H(~=Lb3ly(P=uH4?k)y(yLW|! z4ee>hJ~MJqyn;9@b^gN-7il~6*GazIDf}c5a1_%ROKjCczHI`^~I2ZuX z31M-BTxgX8P1`W>&Y@SD5Xl2XSUzzI$B!nsv}S%I~rMQ!QegL ztdQF|0clY|>=5jSCYcSXkEVM8ylEMI0nux?#}K-w!UrL`dJ9feNZi2ZN^Pvp;XOhS zT{ z?U5MNb#&bIdcE`xdTW2PC`dWJJzM|$Oo^mgoYO@XKe~z20WFh;B~2^jjZW{W>7tzZ ziY1})r33HIIc(d>@K!96o9?*!_xGiIV->A#**OOG=A0`1HZ^`u6XJYg5mzN68eCao z7px|XCiF{bPlV7`t2{gs(j#f5W_4?{e07rb_tZ|)ePORYCp68C?A!Tewf@AE#PFG^ zl{3v3ANx$IJTAE>pIK!vFxWOzUbZ}|=}|w)*P5xub-2Dz8X{}3AhZxXB`Ej_$omrv zVJJ27uz6CNUp*n0vr-0a3H*3gG(Jt27mG&E$f*168?m^{hzwvi0MEnxxa`{C&?6@hyi85WRD zh7%o}ZyEsZM*Vd52rLevpao$tas*gLw^fua9G2T z0ESzDI(H171E7?5sIEcm9-?ws4>imVWgqcd7_s5Y_ppfHf@woe4h{57k>XI#_(=0p zB!U<5KUX%ot(s6lG+L4*NY;W7@{_-GsewXAgD-Z!<)~9JVeba)Fr10BwY5>g`1CIM zg3Vwb=<TDEb?vC{o^iMb@xe9LPi=i z3Iviqw^H~jv|k$ym4jF%p*~z~9tlUVzCAJQiXP_&lZM!Lk`2EWQt% z$j^86-oHZQ_Wj4|x^?2G4%@YJ^oNAvb|X9@=tCJ1%LV5xm^=iX)5y?Ty8mhO&frR{ zMC6|O@84;W%WON|jb`^oi^&ezG;F|H#N74mdOj8fG|Qnee5)WW!S?qRZhEhBkMJrVLSnKN_@@WoTq(P z(K%`8;`NmOPt=<5Buh@-oZnuSoH)gP$!85#`f`xvmi*Ox8?TTj@isZecx2q#I+pt)copxXTWuo1x zObspfRESW*0}O}l3*MO-893oV6MLiLGDopL(KgYTg*Hjkx{om{IMl}uNXa&R_C_u$t z3*5ki6+-6N$b{Pr z1ajd#p$JkFl(rGDKvn@IfH*t-&v{a;DE*hr9At4NVC{g=3t2ln9FUiwgSq!sf~30> zfcwVY6CwyA5X{G_hs@m<1XQu2pWx%zFMVza1RNN`nTAC-1F!%P!2#)p6Cf;d8WH(H z&B9M{{vqgaQ1(T42KxhbJgs08il8(N5JOU+l@Hz!l#+Fgj4Yw0h}3)P%$Y{8y~3J? zug?8x#7PHdJTQZxoQudJy8ATBa|nmek_jbG^IXES^V;?6$**54J&M-IxPaG?s|%k3 z5S_z5svX|;tMEM88dhfwL;a0ny&!fp_w?{lEFw&2n~+pQ<6*r^pKV3=?+LCxp*Ds!g>F^6LYW>8UpQZ) zU0r9D|6hMKT{p2lJ|S7{;%+vqotPMUqyvB?q2Ju+d^x=ZZ6C<-q7hkLEvK!c1BXTY z1V;uG71##=&%Jy}0f!s7NwMrf97PXAmv`#A{iUc20!$WWX6I~~&{FNTXQVidQ>~1Q zs&rEN>(_s?13SBGhn$GkLc>ti+}MH_EQj=rjTxA4&&tQuS$s0riw;gqN@BFsMSX~B z%DymFi#skUiSuAMD#nn8p%j$hsQK|Q!BBxnInLtXb|5LIz_D{sO**Lt(gjV!@I)la z9yfyji0}ug);9OL;W-Rha6kZ+3yTX{*>_9~?D?^lFR%qv_U%3DJnKGqMF*@a42<7U zS|k-4`r~9ZG-QdohapVvhaL=++}yO^ormoNaULIA2z_5vF~Jj~geN2?ta>^D)<)Pv z;V2>O3-MH{zjR&`4t<}6TZoI>x4weo8*Cy#lmk+P�t|v0{2VCU$p+qs08rCx1KC z8DFOQP$fMhC&wh3ulPDsYoQ1#MrGyjn@wpNCm`;#YY1pN0}kQUQL z7z(S>gVBG$EA;#~nl3Y`;5m+R3cE__Lg;bYT@i;Q7bB}*`FEAAOqL@5A_sYyB$?>x z^&#jYYJ^_q!2Iw}yLD1|BmG(}wPe@Emo>AhjsHnszV8#_eXLXn8QZ>a zrS!E)uxbto^4o(2rX$E^dUew#6)MFG=d>Io z=HC7ID$P`Fu6{@I@$$0gt1m4pNKK)-&c*pX8JhJ+HNXKtns+N0nmVHtTAfn+#^+o3 zQ*=r`ecA%L&scchMXi&Vso*bGqqb?toxTPHHyB@Z5vA<0UQ^5+a8N}vYT?6#Onf8Gg`tgB-oM{-b22MNT|f$j z?+II}tEuU&fPl>}U;ffkG2&xH_(c~LZ8_r0p&!ka%`9dw0PY~}3KZO*Kd(S$7}kdt zo}O`_G7`NA_=>^^P;|o{Zf@7FUq3%6kIH;>85_}%uLgIdow*PZ_#1f)ClQ>Ex)u^r zo4HV?Tt(i+OQ()YqzQZW^;NR zb88_ko%JtsT~rC!??8FB+cIqE@iOo-G~De4sHo44{MPEl?2+g?3vfPpPoCTrb5=15 z^(QYcZ`b21-V+@nSmrpKAaoHH7LmTOsB6+RKw88JA8Erw>T7>Lx}tZ{S%ApGy}lTP z8#WeY2JK5Q>&?#2D=Xu*DhiT4cSnnlm)G{&D0+&rd}kv{V=C()oV;AvItD{U9FG|J z3{CNb(Pn7gMV#o-L1EN$MRNmT3^C)~(K%JVOa$e~0B)O5;;5vQR%LxGa%aF<)AM@0 z`V>yOFP}{_>!ghSZ`2hm5(WkjyQFcKl@2(vp)pcp?Qx5jwG>sMracg%^UbR;Z=OP)l=lS zwLo7PMK+f3WsXXm)Xu@Fr=&#xMU_ITy2>;0b2#z7f3N-G^Mmm4+_JLE_*JO9qkITw z7ONWkn^o8Yx1O+JJwwAm)T+$dzp!Ue$u$^t9<2q}Af1bWSQ31thv3_w>#g$0TsU6>rm zv=??dy1N%X6!qv@zSx??TIg27u85-tn4x!`KJA>~prf3bZ&8K2WwG1D!T&i1!`FKE z5%b^&tTKDvoXB-`aUq1GVQ;o;+m>h2z6ZmqKfzoS$6%-~P&IRTejZxzl7J_+$6-=$ z6P7gEJ>cZSPNFh1ry{grhz)-73tq$lTPMdDh0q>UC`b0TnclMu4+GH^9H^1!v(|a- zyr>`{UI@}4x~n+$Nw2gwfhg!_aKN+N7{`OV?Y-7AD|+ZUR(5qg~tV01EZ1C8r?7??zbx5(9nRIadT<+*O8HtyY0Ad!!i%`vX;Mpxlsk6_QBJE zv^N{Ct*dJc+|E<9xE;k=q-Lu2<3BsczQ?G-Ck`SF;G#3#+#0ZnpR}5%+l|pkOJ^s~ z-&N@F(?;Jz_Jb=Tag0Rvgh2|59%u_%>*?z5&CA6hCrVcY$0W3rUZMW}o_2}NSSs@3 zG`!yk&p+&X*b?BDLE*hs5TIc1jG%)arn*erWeP;k3k4{O!JxLt*E9AbTShLw(Bdw> z*+&FWa8>WUkS6=0Zb8+|rm2MeM7Z&0 zL&t?^B0z$@>TX;)>KW*b!|W7K9KV|5J7EJ&1RUv(_V+@u94uY9XF8H+EptOVqu9Od7wh;@}J1EItj@I!@@C1_ljc6*JVZoL_8{ zlR0zP=HUty-%&ph;Uaec$*_9JX_r|lX0$doV-&eZ_57=>@^`t)NMt&Bdg|?5hR4r6 z6lv(KV*e37-smyFM2KiT;N4ZeL6&Ro0>R7{v0wHJ*V0614d|@Ilm*>cbfz$u6L%ST zcx_cMPqZ3~u^v@COt0fd)F^{)LkB*;sHm1Fr;Lr=%#!|W9Ns0u8kF(bd(ec(sU`#` zWBbAZm(wUM`%xdBv(SQr1eHh?D9RCs&v0weTPEic)PG~+ilgS}IGn=Bf>?>La=^ku znT_M;ZJ6Le)&U)uWNWl5?GwHJoS|Q> zaNulv*l~z5=uVLl*t@X}r!ZWC-xfN@Y}LI}OJOmUSjwn-Ag6#6Ip&e)v$XTyzCFuM zdo-yDb^K#$MH?KsP@Q8P1@Adzj>=}eUzNy_pGscfN0P#e<#v(#K!iK1TBbn3M zKUc7QFHRq1VHt!^!fA8~;G$bwyAy`;FHw=2=U#-!L$`bH5b?KIeMQ}qo&OX#qS2y5 zAJ2ZpAX%+#IvbIfZLtei$^HU+D-T!G`Dq6E19Otyp-v-CsOD%xzO}X0!Z(XEDgxc) z6Lge>t^qpY0!*m%zISxg3pGh$Fv7{S8%pkCml;scBK_c8k{37yzg+0fLzr5|$za}W z&d9Q@uUrp(LAn!T*i?pm3Yhi5Y6yZAjzjhIGl%qe3^TMa?m^K<2#cUR#n5IGt`Z#& zI?5om9wDaPY?8{4Gh(O}MdOE1wwc-6tJt#C>piu~KL9gXe6+@Mw^F0!D-O^IYJqPC zMbeU>*KzbClGetp+hL{-&6e%ItoX)a+b}f3Q%0pwYn3l7DvF6Xy3#3tf$3677?H{< z90w?nBS^#2XRlPW+-rQnp5XJfwb@J7ct~g(YG+<4>c>hQxrTp$&7b<8TzkI| zcg$bWggxYT93Khaa#RY7i;L7Lwt4sOtn!X^9~+Alv!SQl4%Z3X{VBBev1f2WC}gmE z#NOP;;m^L%;Le{vP7?63JEldoW5*6LsmwB`lNiYmEe||5q6q-WlXlxbb(~3q)r=;D z^%+W&`!|r&@X}@7bPn4xA3O-VT9gTxNfKuAL{|sB+I?|9;VgwToE+_9Ln0j77Z$KJ zJQb*;CCf80V6fybt(Y7PL_-*9xbH@Tg&;y)ZtwBTt5EKN0>!nhWS>Oz1_-)3({4n> zWRqh&!gpnlIwoCYzpav9X`YMWBjo#T-8^vSq7*qjXsSf*mO*vVrsi`)dW4n)FiozeeFdf!N27(P*SUIOhHj*gP~Vn_)|* zl77|Fa_GJH=z>?Wg74_}Ays|(a$k3StNEPXuPM}A=!amdS8qO& zr>>9Qgo_Iu9x}yg{NvpaBLl-79@+KM%D%Rnx(d0vbb^IQ5dGgM$U^9PpewVRBJfE; zeXDb^A;zqb$HCP5jli8eRX^YIx(%q30!4svCnl)2o$skPW&Y$=c^;J{8F!g7i^cQL zvxXsD9MP$1X{p2<>-voww1FLP!S?uLbhVFDf9G>rNOV*eMUM~)nR5T<$OSD1=kmNt4b#yL96Erkxf5vbCoyMltY zV=93NPXG}Fn9%r;-Tn=3@o=rj0Rr(G@<7CTd3}B{xgqeusyz}n9I-IMMg&K<30(_B zNE}39M;TBv@UidaJ?b(v%WwxwtgMXOPcUG_Jgm=cmh2gz*mzaX@s_l&kt)%f_En^X8RP$nNSJ;0j8$|po8s?A|5lV4K8 zy?6sdTs-&S%A;0wd6V_4t7YOpEXu#d`m|l(qNDYab6P7g$Pc(^QGIla-n_Yx-qJVY@$E)|`OhOc zk2pCwiLPl@Ryfuuf+7MYPA|kPR5YG`MInzWy|_4zxXE(9;w&$z!>fKI@wH1mt>hQ^ z(%gR{trDfb5)+cLvM*RoBp!+n3#h$;oWN#k;{4Z*VL0I-g?$Av4&nd{5KqLX^S-b$BJ~;;I|V^XD)-d-Im901=v*=9 zoTiG|dYvPfb8K_xvab+rjN{TA zbhqBP9o_9+@i1NO;WA(JxfAO63)M4G+qh%4G%8OVHEW}$*s3(dX)>5W{0B;1D!IBx z{O6ABH*1Sg8cLTf&-vevLzecOJzZ*L*+t~m6KYUj3^D1ChbYERd6(|39 zze+ya?ixk0BONd2=<*V0$pCcqxnKfzv5Uff4_D_%Bhl1krs*hGDrSL(_a4B`_i!u$fOu+T@!x#z>dO%H#|5*UfM0wN3BaMxGxkYccT)arj z6nq}zA?K%>s+o=LPMN;cW$PH ztnE(`OQ?n`NPm+*wrhPR)1kT8W`F78JO5V;1}Rm+b;pFpF|gI*UQo-QUpjBR8A%6a z?~R2W-h&N{E9AA_b;He;3X3o##mY8vAZGO{Kb>F+$j?Cek zC3dVOW>dwY^+{_pt>j9X6)`4dul4x{6(6U6)A1E1XbFZ_ULUI2c+ix)@l6}yx_7zb zgz-xH+@l!D6H2tW%+-F@Ec^*mLp1BXIU4GH z>b=IityWo?Om6iI`0;CHW;dr#q-=6CY|SFtQxU>K|eqG2!__XzT)8b%t2U z+m6e}D~i37TKB;YgeE!R2{^wd;PoBJ3@dOp5NN zI8_;v4r-R#(Y8gR|dM zv8Q4+IthOFbHpR)B{y!=O|5?JAiS~|2FNULeAXcf&RI3x@#`L9fYkv`s55MrKO zJ5tNLbuGHcP}^;y{l4t3*6iIazl>G&f}(vYK7`vG!*1#jZO#8p<1|sf_f6*3?Ka+0 zv!z~`@ID7poge9xwB=>ReOy<3$>9Tb!+U#&eO>2n11K;VU63 z*=k&fYtMX#4<oJz`DLeJ{8``pT@ti_0FYPlk1Y!r5s3}K^yDA8}|xJ z?%(A49NjeDpS1Rc@kxU5?+x;j|2d9?G&Qc-35Ud?-i<#SjtT>QW40Zmc^bXCa=uqF zVdnGu!|4}BW$pdS!O&YJOFOXJ{#+Eh%W3Kx3-Ya3TGKZF^QsA{(I{JS69d)Bu%fNW ze;O;-Qn{?X9^V-|}n%CFp!KV}9LP z-}Cg%H=p@b+>%XxuFFNMYkWN6>vJ`(i^qkyhGxt3Xb`o83x9pfEr`@~FgzMvXU`j0 zaQnjzp_Xrh+n;AwM6LJiQb{UN8vd$mDAn6AweeNwrufNiTg~=J<%YBV%N{+ZPWq-D-7d&$!Ychjn`_^mUNs;Kx6S={*EJNv)UEh z6RRZE<8!3an2Qdb(j|evp#^hKUu;ogr&FS*rTq0SkpB6m46g1%RpW~EIt#B?mPHyF zrXLvxhy7+{Hvi7z8xZqbg0SDLbDtsq#wue)RRHIEbuK!`k~g~-RV$V(cR3YHkq!MO zlHk&t*q`pV7$&tgZoj^mV>~cu!{{;UtSTTenTqAGRc3ZeW7bcm-8K|6RqlLwWpw?+ z{(UQo>3OToQ~oi|Q^v#I{=eI{mIqf}Ps^(h?`1@CdBlBOhmU%8h=1)M8X~?s-cLLD zEi{0=Gm~JQ)TsdB@VWd;t7980HD{xiO-OE0#INLU^smGv&d)V${CiO3ksW)rW+__H zy7}6V)z4F_yRR72e0FWs&(z-@dGofz`het{ZO7)jmdVa5Q z{PwK@XYX-FxP)3oHvI7zSO_a~{>u$rbX-W`Z>RPt@+9c?9VJc-jN0&AS*`3P1*L!a zs=E4PxY+6XEgh2QT}Bz_d)?k*Ew0H)tgDfXBPDGQWRp!1OjZ1b*)Cn;SzRTq(#Bnm zkXdaq8R{AxWAXEk~1`^{MKSO9uXG`XQRuxy6j0j(^MOa!PjkP zXC+B9GM)=DLM(nxJ8)fzH^o0UZG4KoC*QwXD6_nSFLBJWHPMYNQv8s?LGtj%-yPWO ztCJ4vB3NUrYay{2Sv=5ql)4$ur{1r9xC0atm#JFDAX+(&+uWz`(wUe`ysk=mOM5Tl z;!CF|%3ft6mL4`zh;Hno>$`oVlh9`bq?UELI4tT;=hb24mxd=_DCSR}w11F*-Tm#l zh;47XS8Mo>&W264ZbFS&B2P}rLmB4}-aj0#Jo|mXJW6C`~eD;GS9Eq#(Wn?wDJvaKvF5SmjLuKE3Y-~DwbVS>^HdO;An_ezOb4H^?Ni;XX59CpBAGI{rT(#< zvd@Zvbx3s-xJ-Jxoi1|#<=QHt*p}I>uWVZyW*F<#3zisak`t23?*r(zw@6+db z^~$+i-8x*R90|u_^d+}R(MC{hxcQzxPMRr|X$YUh%$U_LfwUa0vrI!;Xc0?2T~E$P zT44j@)R6B%gPs3#o!Vh4L#fulUPcQ8Ybs1G6}JR8)KB`xPm$M;1hD@ntVoSR-lmJ` zLRABb(1XDy-EK3PP-^^I_eIIe`$T0G_|vLK*O-=R_i49D`TZL97L z=Qu-}k(8-?x%e-BqOWHPp>VW9u&NF8J(kWe|i21el!m-}|@Co0Upe|g9<{_EZ-)CQeA>$weIXdXZl@J6FIU*2K_d{34(By{*_ zW;r(&SkG?W_!sL>7yjp%Dkc96(~Me{LWEZF+_?VcAmP8c8$nKgUHP`_FGug+F8^79 zYtWgw3r>{#cHhx6rgU7G6)115S#D19fyzg{_t&l=`au0Gzvs1k$cg?7!#OFfHqZc* zTKOc69r)Qobh>wAdThL-Q$;#gXDm*jX z;oG`y#H-|Da-g4oW8UCPBcCH#mR0pocQxK?Yoc-#6LZ{<>c;ZeMY&z+4fS*Uhq?qj zHq(X5Q3j>x+OeGeUMJ%_VD!-R#&=bY1k0tKMlO2F$df)|gG~N0N}ioZba}JSZ2TK2 znI+#ANMy5Gwel`9pjQ%;krWoF-dIZSHTQH33LEx9omG$WA-BN4 zF+^H+jc&fDlvUx7#lOye=We)^$-5kQNBfv<&-aVYHq@>QF;FRj zI~DXktee*N%ocB%@dz#7aTDqCW@KHm4WxC%w>9_Mnr{=YSgI?aB<5=QrY()e`1#7a) zCswCpO{#1~HH-}Ig528?1Cg|ss7=Nti|7N-)qMZXZSY>CFYClsQ!B3kp((?wE*_8n zJU-|*U1R)V?G{fF?eR_Q8C*{ObV5QcQ8g3tnJvj0-M!nSBQJKfQ>Ut>%qE?8Ul2Q> zA>Ej>rW`ucT)d;+o|oZxgj9K;KO8&nw_GnD{$fOE_DYpdrx#qjyXACudA?Cog>(4E zY^h0~nE%x9wHp}~CxV|Fx@WN!8?HyR)(=w+=6Sk!O>nZU2u0UHY1KaL1z9-vPk;1e z=A^0bp)!&ShS>_-O4nSciqrlp{=!@NX2DTp%+*)E_O398#TjLy&*p^5wK4G|{;t)o znpiWV{ub5bhn(?sdv{W>cF}9{GEC)3>6{e}y)kZMA#!-VRpR8a%41ygv@}uq?20$n zFVje>yqoovSsp(llhiN-xcLKKbvcHkrL2#*=}(t4ATTH-y#fF5T=>hI@Zq zI%xbNAFLfZY)cyQ8~d%|nyH=nZS>b)WAE6N8^V8Apk?;GLpDTJhQ_vSihCqY?fL!7 z&nXvac{kXUqNC3T7fL_+c#m8BE^2l0cnx$BEpbf4RV2#^hfg#lv5se<{iRjhCZWJdPCn zeHr;!w05P3T908lP>EU!z7h244gWs!YxQde7j^^|mIyzOUACB2%YGus79c2kN!_mB zuvIFZ-!)n5thCONq|%K9zi{m!rE&(#8S<28)R`5r-j*VNI=!6#+ECZ~0;pHI!{2Oc zXQB*`>)(a!*$sYXkjn{tl=zRK~susmdwBl-l6t1<=Upy!@s9{*% zrJ&P9Nx7YlO6kR?wG~p#Up7k1eL=Dxooklze61UNi?5;~h6dch2VYNRZK~4`_BhRvZu?!-BlJ+HEzq>eR{EET4<1w z;cQlLp!x4{I?CE%Cs@zoXyH{(u}g%D4u@cY5!WTlwo5fnVu;)FmU?SOvYCxwKRP-- zXi&lGyW}vlwnLJMCDZDL`QaeJOP!*O)ZstBG&Qz#iiP@1pXH88Mh%B*qOx~&$o*n^ zK35DO(bN*{$MM+*m1XR&_Xh`$_1eoNt4T#2n54>fH3UH*$t|_>YSh+wMtI3iWH$AkPcol7e^>buIMpV#i1m*?Z)Rl+U z62+5dy+>mkvNDYQpbTAXIGMP>Nm_Yo+F8^4b!sW@k`&Wef%)OKw>5YJ>k0fXTtt$|FB=*r8ZkDVSQ?ue*^pjH+6CNu*jE?ULu{5^Gpjf z^6uEh7rY$6>X+CaCx?2fkSphAbBDHfyn%d{c4|v^Z+ik}DTcsg0Oi2J!J(_Gi$gk^ z<#$@g*4o0MRgJDX$}*4uM@O%NQ*Sr*J}>Xn!dR=A)Lf-Y7eGsCmttX5P-b%*5YNF+ zK~x0w(yf$LY3qhrgE??WpkNWe(20(|Lm24*1B&tsFhkH7WRri-4mE%_vM*d7qaAmj zBusaKKL^s(+;&LxufC)!~{f4 z){M_B=>XAf)Cvx_GB56^=qP`X7Jb%^y^!p$(D85o4=ug8#3#ss=7RCl+OaeLG8x5} zzEYzgwmU>!2E8x$o^knPwQui>6Elq}ea4ur71Q&dG;*Z$ZHwif?^RDXN-g&9V+WxC zn0Y@^62Oa~b%II>^Pxw>w zH9Fn+G+?f1_XGzNTu}w0)X`Dh&>CnP&2*Jhl9HD&=Fp!*0~`=9z`8e~gH7PI0>^+2 z1T+j7-Zu%62SdO%hRAup5J1pLO@)1N;NalV;$f}&ZQK>SRq*j9MtYz{U;z3*P9=oX zxWWJ`!PIDq1Dy65vQNroOxJN~z^?yCQo*lyJXq*=LbCvfKFoeF&LS=fs2r@Ax}jgf z-+ZN6ft%*#=NA$Yk;9Y^8~_YshTac9Y~ZJ(3=uTa)ja~FE*BjJ+$o6zGvatX0M`Ol z6K5!yF2b}sG%31n@b15ura@|oKs<&}K@fg{u-OuD>zkg^e@HoSY3*%N^Yk-(HqIV- zo2-KNHxPZq@BVAB$_HREx#%e{Z%t0jRNJ<^dGiJ<{z9ozng&68aotA1BmrM{bcBR_ zLGnpVxe0dK^t1p0|Hm`Inp39$n(jiXI^09lp%Px8-{UoQ5(u2uH-YTA@7`VDh(<#V z)SwIgj~YaPnO<6v>x^mmHath5*Eph&VpjRZg=}4HQg3-|i5^g3+#Vez+KzP=PUp)h z8(il+fu@CW{f~ey9U%Cyc)^SP4|{j6i!<(WXr7^ew}({4N50z3UW_GWllKAymR4Ho zeV!~sd6>IPJ+=8^okb7lfaaF8$37FzT!(fUPU^knc>F43(Y(-r+`n`ttmpZ_#QxT6 zb~$FvjMZaVZ118z7z|jUWqdIHbzHn&-*>W^@WI9ov&zu$Iw*Gk4cOBG4gxqO1n%n9 ztLH#20GWmr2|^|C;MlGN2NaV~3=Y+K834;P0|g*uHpi$G^RkrXc%U8tL`4AL=HYQX zonWpFUUYkzrw|t9-n%w%Zy}(hhDC%S0Pr>A-KDX>oA~<^6eHjOh;Plxss{b{)G6Xo zz-v`E>lbyP9x#fww)f8-A^tnye+}*_K761aQx&qbF#sosaX~BC)YFTd4yi4p0QVDEJB-N**b$yE*pr}PI8FaP2QUyo?c1Nf zfB*RD(}RTh3P#c8u)hJV#)o2}YEpWU^q78a z+~8-47+aDhlPGDDkgWx!G)RL|O;V{8?X*!DYtke|Dlv(YW?EFVh)~)kOG=bbNh+x% zmGXWb^ZLtRzI~tPzVCCNbDitD4rCFuYo43G80$a|OjT&Z{5zTTd1$Ftd6) zaz@QR^;^tR6ghlD8qb?Isvecrp%XnGQoOhKZP{kRdY^Gj&HS;8zlUnS?baR~cf81= zUdE?)&vp0mO`F@yeU$GOZB1%#{N6S6KzC?tPhIW)kZ?c!WBdAVwYTn$-t37xa-HPB zyJ_Eh(zEk_a|&rRtuC2-=ksTKciQh`ZyYf?Ff>wH2Uf=49&bm&RuEVLFh`p!lj`Wu zH+Pxb@!9637QWsEIx;*8z9@e+!9Nem@}k9yA3u4r8V6i4e}U5j(+jmB^d_t~gt0MhxRquyU#7YqWiNW9umTE5@N`I z;(vjB3SseccQ^DSI6lJ2BQ|NfDl&h#N&FR5YPJw3Kv2?S4A=|N1pU{K!vQT3Sw7HG)iy zcSMxtL&-M;{qdaB<}NN!{d1uog&pnb_u&$V`{$pWwr<=JWd4F7E6ZUK1kT&HZ)B4K z0z#1!bX&|dK@^5I5pKJ{qe(CzpeR^JGYTa(uF37f{>)#kQjKVXLMt5#b4;K_+`L%~ zxm7!GMgt1}oL{f#$-->3aJ#Tpnwy+7D(cxT&ySgnj5(Wp$P-;o{3e(%0)g@0XDQ?u zYz0UpOkKeM%VDAN>C>l$2qy>wn1IAj!jgpjY&v=(JYKdhUMSB#oAB@{?J6#rH`3A=O2sA|H!p=4xkzHtLd0t6oh_+lkeMKg z-mi0pmnR4lG0+frq|iCY9+;98%ZX-_I$szve9Zs56QJ`fuXxq-waB+2YQh=fJn40R znTl1ZliNqTH#EgIDwV63Hf`Q!a?5GR(P*tWy$SwvewW=MT{q>}iPUfX+auo=$E-@J znkZV@pwe0WblUd^KgBD<&HBqYAv^G1!vOcekGTq}z|?li^UfaQy%JKTZCfv%GEtBj zct_&7?>1~2=0PR#lT6s3Yp!w1* zav@>QxKm&G^x31cx!=;OTUdMe#_Y5fjm19(&A4Qq4~zOkUJP1H-X?>~k3UuKDSQ0# zWl7O#8DU*mrwNrI-_Z?l+d2opcV~GOLYvd{?_-U_bURMn;N6Fbc=@ev$fNkVd>xd# z9?c2qgy8aLkPq&-o26-8j^egJ2aIxvoE<6@k{n32(Xp}G8L=>2nKs69E*NsUB!7qwHSB&4=$<{PpK2zH{0~m97OF<}o&}>zBi1Y&PVHuM$6VNn0uwDpe z{qM8l4LmmWW#7)19||01!wyCYmu`<2Juai1136^ zQtk02$Q;n}?LZTnkkHWE)qMAbTskH@$P=9txGhORkUm^O#_;*4p3V+UPORU}3Y$gT z@5zBPyE7SAWZ>Y^KYZ{{3w}>El%Lw$cllkEXjb(yvR!Y#z0Ve(#oIamoqgAJ$GF(G zd~vl}k;hJlR-3o>87G|BGAE}o+kHOJ)srU%{w4OeJp;NwM(N2C@WzV_?7g`Q&hG?HWJi<&A zP=DpMx)B(V2TBBJ#fz)XC_C0284)odRUcOs zZa;EbVW1s_r}?9XUXue!-6!{(luX6l;<;U#XfY4BGxKlD(5v6Pnc-8}Tw}JhveN!P z%?%kZ)W|UTn%;-aNpr{U{h=t!)n-Lc5Xf)c; zT+G*SD=iQo4L1-uGIBtfptXuPs}vR<7mv<;qlmi=R}QO*!0JEJHr8~c^Kkp&9wWzK zBj<@UR@|BfN4u=?G8EGcQ?y!=XM9E6&jhJQmNmBZzlrdjv|^grxj+7`%DQt{Ch+6y zp3pCy-`4G4HW1!V(DeCREzKWbY;&dF!Fx_S$6f164f91S{n{-HAq_+r6op}#q}Uv* z|EzsRN=B=U+7h<~o~Gm12;&563>;8&k)2Vk3tY(cbE>kvSHQ7@Zb?`r4L}x198!+h z;8km#3h;nO8YLYZ(vwI%39n30QH_@a%vQ2StJalvr*%e~D7j@_ za9X-vDLnf2?QLIw<{j}*5&!-0XkaNg=q+6MHB+vNm``iCY5HBBO7l!aNka}gyYQZU zui#cg_({#5KY41W%d(<|ncByR!y|{x3ibb{!t+0PQc_$j%U?%S@(d)2WPseq zSV&Mn3hI)IicJJ)XKicihjD~k>kCH4O?a#gU&W$95(}xRQ-_$5%kX}v zAs8eaT9S8VL&TAwvod=-8cFc-2J|5+bFs2~gl3`?kAsbvLa%@Iu>^y8V zx;?w?)FaLdsT4~6u@i@WGKZeq^Uyt8gQ|(RgL3g2VkP2(dfHAA-yc>CM1LSJ)IEa5 zkH!3V{qLKaNE?Gma2Pa-7t&jdANy9OkzluP{Qh|(u1xDkdkjj-?v;b^FF002B~E3# z8XI4Vih2{JI<7e|zwhp`>1xaCOqD(aP6i!d^ekJ1CzISi}#FB3* zKZuXLa)oTCd+}>8LztCgc25FVU`q#@1pWR{;dZTNw7^{b>QFaiy0!O`deIFyLM*?K zt$wG^o9gSnwb2h|YsQj5uhOfJZ8T*Klq0+&7j9}hDc=%<7FEvEZ+x?nWSXG14e+{n zH7x6wk}-u%uU&?F-O1Rt%)pq@dzH<3^8LB*ZvjD~8xPV=dfH@YY&JSwV}0;xEBq;ffU{ zB&+OnUh=5Ab7hT3kB8cYRem2+@#O4|WO0G65p*yD4IzAJRN z)~AHtiC8P`(OJ3#2LnN{SY&y`=G^()a-j*|+PW&cdb|X;k=4H14}Vht=~N0Ab*>?5 zqQ-b}1{|jA7xl7^4kfBGscKhm5RQ{dDk={^x9<-?I=-2-{A{y9b{V=vAexD7^Bh$Y z4;<*Z&~Oi0@LQ*)np56&dYZ>;YiSi)r%J7AR=q6ME+8_fAG2z}j!eC~^g*9Nz(KFE z>1TVApnVES@>+8wbnvZ#-e$pxgW}AYJOl^N={sJV+g}od!e>K6k7_~3t}l<<4BRm; z=LI5VvNKL82r|QGk7b`KwRUVya^LmTMamkHHTvYhf+6wtiz3y$&k=K}w$MpogVMo| z`yXXH8F;XCd#R((k9b8QA;4~Gk+r8}I<6WzK37j^-CtFB;QWdffZTPV3dz)m%=`37 zRg`|hyd>|k^UDrzimOM$G|}7?RjvQz`IrYI_^CPd9y7!v9(sHUz8-Z08qB2!22}VQ zmXSn{_$+Aov3C30tE1@Ulj9AgZ@9lG3-Zg$&oFI7LW$5t{&9M(G*AH*JxO8Jjop(m zRCsXa@V$%gPa(M*pE_D6I4ms83~@R#;DLUuJd{B7kB_-S@a1OAKncbTV4?R%Z4llU zptV#}+)_;~Ey9^4N@FrcSvhrdAp&s6TkXTByLaM7D!9Ik7z&mmm|$Vu)KC)2M|0`0 zcKjk|!Gl0(*&vI+cscUy?f4hdg1Z0WO>cm0KhlQhu4(6Ky z3_S109BxWe+1tAy0V+3aW z$f9-bl~Jn;&AmSEiJWlQf=!Gdi6=gh^0~)L@F_!s`qDrWsEzbaCFXfBelqLLC!(M(N+)*aP8mYpb3CAyF@lR5OP6iZgtNZR8JI*lBeLqU!&w6Mt6L8G{J1m(9- zKE`d1c<6VnMA8#QhL|1;s1P0pWOYtk7~8us*oeKKuX$~q;^tX3w@huZgs9L?k{TCPg{LfQFmOe2= zS^B`Dk#Z%K#p@KQDmmmc0=3_pRyXx3rJVgMe&*cy_h#!{qCOM<6&D44{g6-H&q#d1 znYiZJo`l*Ud%4&ZB{)Y4Jqrq$@by|58n_wY6*@uW&o@ru^h-!zK~Qsc&P7(nR7a_w zN9f^t_k(2mw7gu?$|~ml*A#z^hpX`QMb3q-ld!546_lCi&dIrjf!PuPlsXZtM4tcT zC0Cbz=-TIe0R_}(ZwzP(V@~B=ba0(qSYq#)FA-RPr5DBNsLKkwJ%u1jhj(1gZsiEi zo`%q>o38T&+fPboRLM%C8*)u87Z-Q!Uq`e6)(N^HF~%3j3WO#(vcsoPNsL{JMl9%p zA=YZtUCmv1y&j3b7C%5_w6(Wy*b~_|jnIhu0~jj9POjjl2CA?r4k0>ECw)D*K228L zk#G4(C%9yw;-}4DraICXpkpMsFHRd%Fnwq2kAXjcJ629kdhh2J9mtxDMRc0qF+y@o zsHp{)hj4?VA0^@G`PQf&h_~2%zX_`kO|CGxl+t^3T0Nx>R&EojGH6EGERs7?YoEjGbmm(;01QH=`XoL>w$Fy zQ-)uH~3div8Ao8_LEnNM|i3Agp#i` zq?5H%2bK#qakzZNWM)nQ{8YoMX98@yv@fA+=ABrR0wM=(Io=;z+hmWk^SG=Ug=je;s{qgt z&`=+HqXa!l&eWd}LSOyfwSTBf);1#}lrabmey5w+xAJBLZl z#ED7Q@Kr_kiKnbnamy^>Z3FbX=s5tDZ@Mw~RTYah0?1gXc?Xt<9= ztW6XwBKcOla0^$@-3LNuYgvvQZnSwjBbNnzFqY^m$;CMRIRWK_Y_eeD>(UDkMcZ(h zJ9?t=$&)Acjp8aIYUOxwNU;~NaKWSDzU}-Jt#ms@-($ycM6fuG$*bUyg{^q7w0ZhV z1OfmkqcRC|F1Y0!H9Uod=ZwJ3+qMPZyFDYYu4^CdO}qs~zB|#Edap=w_1<6Ti@!|7 zL3sXrT8lX06wn3*LRy`%PHgHw@$u1_nMMyRsWsX?ybizel`oU)i)%PJX!rQr9iy|c z|09q7b}GdysoIEt!~M_abJ~6*Gf<8Qpv#1Z0>+gSD;bqeLz372fB;2*ND8|Qx=~8u zqq+TSKB?hIh$R$p{^(falRiOX8&7q)^OrQ136Wnu)?j!jgJZkC{tfeIz_-X5e%n{u=3lAvnSa+_b>69B_IO@p2fc6P)UPuR!Ql2HB+WdyKl2d#C`%v zF|>)ch1l0o?1Qy{N(pYAlxZ?*8+3I=M5U$}t=S?b{butb<>Atyyza z&z4GH)IzUd)N)Vt*N&`+2j$iyV=|$+IT^v zR(;|Bbb>#M%YX+uu8{s?;AF4&u)EZc3ZWX8AKz-5X(T)G$VM-4(C!;i=hx36se}R6 zPN($v&a*Wu`SX`E)BAi1ACKvtpev^>THWMbzgD8BT(YOeGVjicnyq7YWNr0KRG;(7 zv|(0clxKPwgAx?B-PFV@12pE<%p1Kvu9?|?)r+pGYPjZO?VgnK4KQZb^`6Sesq?h2 zk=i`1uJ)?j``Jq^5HmhH|0;bgsR-=3&VTtLU$K3&!V)9t4GyQ9o7ZN~ajMj-qFM6F zt8;@X_Sb)D)g3Eq==6j|Bc#dClj}76%uet83ZsTjv|vUSG1l~Kb4XuAb!=s2Rd0&N zBez+z%5oe?Cv$CwWp-tt4PM5FLT)nsV4BIYcNU|_t;o;_Gf2&TlRK<2F zbpG}$xDEaKN7|WSw~s4vNCkkRTPo^%+-r-_J_#(CwYtxt!E~o-@y-AmjH8+=$qHNC z`evzLH1`hg>h4Y@>0coim%Ux3?{fXBz9RR9vvUXL*6aQ|!lUit!LA6`*=k0GiHCd_ z>o#v)9{Ay0_v$b02hP7KnmyyMv1{gCx$j$2bM`^=SQ&-(qWZ%U1zm|}wOvd1JICsv z5gr@UwLt)JRII(WN__EBqlrHI2SJCf>D!ei%fwt&gQeee_c{7s`1OaVSgO`FF$_)w z`~~c-Fgd}=I6NXE7rl60TK(D~f;4XH*2xrHcXs642p{bg{54UjC0N=S;sl)(tU)jR zyy_fPw(pnXDtFM%)#W>zD-tM}Xk$T~GEqohWqeT(B&+F+$W@Rz>Rld}BuO@I`NS6g zeF{WGeEy<3rKOQT-B|sQr??D!^8{GM`8{USFGmuDt0vvCZO1kP0lwOsES7%fW35ZZ z<<(J!sXl=VT$C^7*{zB*-g86Na9xl=+%wrpzbREcI5+3>2HR~$75cgx6~z1j(0`jF z2$&;f9rC~VJ83Ul=U?D8?4Ln)2F=D#a!Xt0k5sn2Zo6ogw2tw@S4*sv%{N%Ceb+C& z=7(oo-Rg%{zsCm8J?l7brkwEN#z*RIDw_CRyc`l_n6+#l4HJ3CO zD$Qv)*NVNL-}8O_&iUh<=bYDRzxHlh*7~f^aKG>Sx~}`W-~OuSWXZ|s$Vf;?$mQjv z)ksJ-KPUc6x*4zh-TsRjzy2~it$dn!=Wp3YRV)e zuE$77Zh4WAEaRnHqa-8_JR~IJmq=Sf9s2wvH7 zLr%wzgk@=Z+-a zmn@n2(Hr;cZk<@f!Cmz;f8oo&oH>$4yiCf?@>}qW^zWoi`1RN6Cj6J=??V>EU#Ell z@!wmAjCaM;zsL1WJF{T?zjWpctY{)d^hbzl6M?YkU# zs25!}vu$>ATIj8~SzcPYL#ISO6 z>+QWmZ>=>`8(-R&cf~j8{gum?8_S(0Z`~qc-kEDqQRcC_tfQlo%+Z{onfarq+$z)A zyUcLmM~`lyr8n~~AA%3tEwU&08ea;gvl6Nn78YK?j^8^^?wVohUR4NVvnb~K5GA1K zl)SpS(ARk7<*h>&w*n3c3eMvxe(x$?`1v!z?p(C6&GgqdNl8a=>bM0$mMwmidl(qp z$C{3Yn2dYp^kT%E$jHdX+j9G=JUwM)W#_)j%E;j6j7|^K z+`fGqH@E)XyTzG$DXH}pJ3hW=Le{;}lI~@?#n&SvBQv#gb99S;v>AFTle)}}x8B~e zJyz2Fy-jp(?&Qs5)zPk&bJuAl*)`qa| z-LnU0Aa%W4n2(Q7^FTmPV#01laXA$pJTML1JMCNpui((oP@D};v#-1R`Q>V#E!%hV z3ks&xMQE%M25pye^vi#Amxe?}>SSLmJfC>BF+N}R{9`R~EZXGVV_ z?1-<=jMRVp@L}`j%|7G@O|7gHN8-esf+%e#K4)j^7VqBA^P;KgA*GtC>QjUiu61Z| zkpF7?jrOOPXKBM9K6n4+h)C=2?>8_oi1S!ZFD)${kF&M2BRgw(c&;bC~%2^3Har z(T4u+QoGvP+B@4Rw7f6r%s0fJY0EVn5#~C6d=$?j^&lcp{Kl7418?t5w4^6qT>Lpy zmzS4k=r%2r*JC}|Td{lhZU!+&4bF8un;40kf^2LVh;lqv^QI*K@wgnl(w~UIddXkk z@I~7x7(aacn5~}r3Gdc-)!$^k*CYP6t>U5px$J$$qwD1>Ks;Uj1^gPpDFcmLL3+V_tU3OUX+|hnXZ9Xjx7umR# zgOd|q{b|PS!i7hfne5E3J31nNc-+}W{`KqE(VuU^tAb}|?L9p`335a4!`$86_WPVf zAZfN#+;ocSc=Y)3O=stZp{id$#0Y+PcK7H6*fcYwNp0-x6qS?)T|7Q#>F_J^oIIIV zR1|dY-jIvrv12J%++M+0rHw=SdU`A@EDGcMksu;MLiS$cjpEuxFC4rw8MUobNTZD( zYm{&-L_S%~(9key=eDg|Ylm!OM&GnHV9432XMwSq^dk9!E(jg>eFF(bIWg0fC zA|20v@7hayaZRWn~$=m&NNJKYqM0(WWBMk*dPv zbNO?wVZ9A~J=RrRaZ%Cy^mKftnW<@KSJzEfS4r1dzmcWx?rur<<+RFgKi7XXIN00M zBfW>Ps$kiKDI{lSXP1|kkGmElr6?SEc>2z^M)@dy6qU8PHbe1xc51R^ndaVJNnYNE zXLsz{rQyB!g^`BW>4%BN=gb6d@9p-?{`B#5fdm#-%#xaguMxQmaS}^EN_mHU*GsC z;1muzDCWo^!E|Qb;eF@{ui%0Leo^_5ZL+d8h-79#!SH=gRGUsU=XMoa&xpr`g$dY? zsuJEUP7f?m93_9_C^eVF%+DW6*@Tm7Y0-!sQCC-&S{mF-c;~&7c62jhnfK($>FH_z zfPjSZnV|^7J%rI7782K6Nt@ntNAZUyv?pCgaC-&YjeJTknyF#(#O3fxNyVMZU@DB0 zxOtXztTi*jwHwKJFJXSNuYP7_yft$NGm4Mas+Fs&=&EC8EI)B^c_cJ%_1Vo(W>(KQ zcg^+Y&F0C~mGy@{HUn?d*O0EVy@F)|_oWYWTrBXTv|ae2&wNwE&$a2P)1(36-NU1b zEwoQYI1QdVpTD{kh#b$vJcuMdzLoe`$NIb<`bx*EwMRMc*D|?joJ9?Yk&KE z)ut(=A}Yc@jtfs1Z9YYdcu_`;Qs?6w2=DG5;A=5+9BWE$Pr7Z;pVnKMYglDaYCD9) zJ!+gMeksTZM>0L*Sd=AZ5y7P z9zQ^_Ue$FroKx4Qdh6)+AF;Q7=HRWr0@_P#uE^Il@9i;kpKC>#c=+%kDKipHjNr8> ze}DN#Zzgf)Q>5`s0c>&Z^9PuZu(141EFhfe8l8!<6s=XA?d=8Ed&(LdMIFB#52@AI z`K>+g4rAY#??Pvx)tHd}oMsF6WbxCdhrAkCH>K6bXPOttdVBf$X3oob|KMH~sWM** zOh}cGT%B!3sWUb;-9tDVL@#35l4e9pthzxAVxzv9YUu!D%pXa}zE_edlj|0kKS)Ty zv+<%__P;?6_T4aZP|tZkKi^8&&!mRmM$=FeD&syj6e-)ghg{oK}1Gc-?h`86DQKIZz|X= zza`X3GehHms`QEC7lqfl=2PQ56qJ=$haX$(>FK$-yQBJ^ld=j7@b_o-O5-}8G zM@m$*v$ZvB%|2Ug_OiO9q~v%A$`=5ZbyrdLojYe*Gc?EEka=sJDEMeJ;@5sy+39Kf zPg-7=pM$srK!7Tg<)We@aytGeBO(eH*N`FsQm=Iuq=$uR#)vwcixJg)|8}f+rs!Ii zmf9JqqUPpi#m>X7TQe2nZl~or`aVITTsDVYYlw~c>%V7FR zg6pdbI?FDuuCAu0rt#qk_vTP9qoe6t>0KqZzmUD1w2AG>j=jg8<7Ohus;4MDkUPSz zos)&knHQHo8yK}rr6VBkd54FOk9{`RD;U|2VAmogV^HP6uz!Ea^&ZiK2Ok3~*1Uat z?AWo4>k|!gj22~gWa-CQO zp}Z?KoyFEwE;GYw0UEB?sNK`H-23t6i|V(ZTeof{M74K!M_+!FAoegML9%)+E-wS(OP)M= z-u*UkL4?{ZfRX2TX23DBLG%as%_&!ZT=+JU$u*xw=}H#g&M-SSM|0s8ke0Wvudc2x zIK_H|p@)B9;6!6$4d5+UhrWG(p;g-+0;9O|QzRbXJrIuY%E^NLz#DOC>3{}~j)D~y z$?eD3XY01Ea;B%Xw6uVyupT?MytJeo$vf1Nt`5*4bn;|;n`uDEg9o_JR&ydiZHJG> z#rXvU?8(=;a^(uJGl5fv6l`U7ex7Tn<}isnp^1d%<9(^M_5_*0gF@FE^!nS%|5qA{u*P!G^JRx3H;3e)>$1^h(|D;D-H10D>?Dx@m?wn`2 z1e}NbS%)%uf%1>)Y38|IeF;0%M6}QD}EvCdV6`NaH8CXl^kO` zZ4NA?QE%F`>5R`V5r=V&wjBL41OgWQXM7jRGm3n(A=h8~o;-Q-rCCL)2LUwJoJz}a z;i+-$!zcl(&)K>WpIMm)aWb=A>#T8Z2>Y2gw1y47)JXc;SZfazk`W(?k$d&maGtXk z7h_{%@v^Arnp;=Vwc=}Cj!5gcI3yj*S6(~38S`Y8mX^pFL}zhJP?A7@e&trJb>e)C zns*Z9@ILUrBCG{ay%=GeuOPNPWe$P&?rG|$DD1Rboa#q5BFiE-GaKR5E!suLKl>hgWA;*%J0xTfU`3|$4`#5m? z*U=(&+ji5NTUm(#{@t}Y&V6Icj!@m%9d`kKxR+Jr<>f)gGRhah3HI#Y?>zLLMzqxQ zBi&q^{i6#69j)pub71G;nZjKifAg5nENI+mnoht{uiR1DA?NRrCD2YBI)hR1#mov$cgvP7 z>nk%VtZlU$zGXD$A*;AVMe77%IHhbrpc$0&i!&cQ?Z*A7dK8b7o&hBVuKA)V#Io zQ>?_}&+!Ty1z&$188HTCc}D}3*2N_ z;Trb%@x;hTst4_sx_pkhs7H^09V9*1o`V|+3JL7;5Pl$J2NwL zeCtPCvXPF~V%T4F-y65v&@ar6se_Hr^(pfc7X?%lZ^HRx1scsD850u|36GWb-d=-3 z6rc*k?%dp*=8!1zPX4oJiBh^i58#wQ@1RQRnq3OXKY=sJykE7oX}_!APUig;t9-UJ zgh@{rESZbz6F6g6S9ya&5GXHB!`8`(cJUP-jjM}vqoSTpxckyi6X1J1q(F%n^{Wf& z5lG#5q99P>Omy!LLN-h8z|ux_p`aIj;@WarPcQoZ{dywd-y2CnvL*(ZE%eyo!-?P( z{{H^ZZX8aXdV!21c&#%uCWe8Lk!v}66UnXH$n#U1kQ=cyD9Fi`m6T?urp#N?8bLv2 z;s&>q@aXx3@?Y&K({u=-IduRWZaJTcje{er-FIK+fJ>~w?j2L7&z(D$;q;h_0BIvE zCgz^yrfUFA_kuHB3O)rh?%%`2l)I+kcq`v|X87Ln5%+_(c`AF=PQNADOhZi#Ms!NT z!g=Xu^0;OviAiWwlD*l?t zt@66+E4i39>Q<_kqruj=$^JVQ!gy{mSIm?tXChJJ^Y#wQe~~DA|F25_#3Tca_qwI0 zbwd?~!q~OFv?_>hfF(f!)j}_;}rNgp8)N4mPDX-PXO337XIDYe4cq4EjE$2mz z_nXBWzX>CWo!B`zH1sh>4B58`LGkNn9rsRmY6+xdWKVNcA@P-vrQrAfBeVZT`&kV?aQ_^3q7$eI_Y`bhTvU^u}sn z#^rklg=RyRPsxNmdL(T6;Susjthmch&@geQAKgIiq|8;T^Ib@p!}W2V(5>(iFl%80 zxz?$EhVK_c5R`lN>^XQ)7h)E)g>-}4sAkSzUfc%WLNNocNH=Aabi?{-gpP!2k`b{Z z?C*gRtZs+zopZQ4eMV$jE5s8o0l>wP`ly z5QMtzyXk@SGW1Gqfnfit+%ZHfS_g$H-IjhL8UYvkkQYEEh%yz9MZ_rFW5+G(@Ps8D z#zP75kgD!13r>QCtgS44`}S=g=SAw})Q8wPW3C@aU2{Op*Se`4d@Gur~F3$t9K6$k2xi-tgHYLY3As4 z)Ye)wCdl9rhrC|b)F8V};Wm4CcpysYB;B0zw6{^e%Krv-S_!Rj`}XbW8looesp;O- z)mZ^e;@db(>X@0C`Ak1Ng#y9H_Y1U)myd5uunlPgRbE9!MNzTNx^n64V(AY+%5S|D zx^se3@9}xS9^h_S&xSJGkRT?hUND2oO}s~rczvqEnF2>`*}U1v$f$C8oV~LfZ;cbO zHb_EqYXXf zPN=q9gBV24D30~<;8Uv9Q!PXQQJgKh%It;W#9g`gwO(p{c4j6p zAb^hFJlL|o>3eU*FN8~2-B3o_{0hS`5dalXVZEGa#fG_R(Wy{ zmUnwne&RQ;d2m=3f-n#$>jSwm)X_|B?#^dB$;eWaA_<0nMF$5@K@Gw}U0wS%iM6I% z=_bOQ1RjK|l|m9k1bF>LCw$`)*t<(CRA*O+!H{dA#Qz-JQE@iDofE9dV`lgx9{wmA zI?S*8`aGe;dj$hied{Wo1g-=w@uH-lxQ~jcDRA|Po(>uWLVa${xrR~%zmSk-uBhPJz$iDMQ+?>aSaqE9W zj`9k|K@M|sTceO5O~rYxEdeYd`-&iiGv9_}C}UR%Ex2H(+o<4ojHYDlt z)63Dm>=Y`Px8eGQ?27^f(rTXh`T1FEf*h9DsKfA^>mra$#Kd$Reh{UqS>L-L*}4}l zbOTu98X<2eVqMG0z0ju^QiXg0u!yxaItcJt30-cvc%1g=&Ye36eroA={r&rebC7K8 zJ#O5%0gyFjJ*_p3UXqlEh^Q#AV*?Z2hWd?nC2>l5tdt-OGU9e=_;v!}UATC$sn_A_ z>m62B6D1|Wx689sk9({wji9fPg~-JUInC5C*Pn@uKYMg|SM@g}O(bG((MQS2M7OV^3jG(Tv@OKvBuoO8_BAN=bZBR=$ml$dcmS%9fVnxK8u! zH$@^OYpmQkC?fLK~qg3_1E_8nZt3yHVI(7U;tGsKX-oOr`~BsS8$k}J#g)q zwp?Bh7b~mm>dPBok*}B(a@URwB&Y~*Io3e{fk3Re$6uNdQTzTqsNeX)H}d^FBtp-^ z>U(9@BKEmGy@OWJqsYkgWgiGuO2_uLzm#k*59qP=@ThdnRS)B1XIDHHk}movH#hgi z!rzrQ7p|`?PU9JXEGEWLPMzOvc+`PMTU&c>`19BP%+YX>AfT1;n7Fl=u&_5gcU=X= zQKU6roi9UM5ut)?Zw^3;Pvhcx^Iq`nc(|^1|Y(oX3~g#^hi$1Yjoiq#H)tA6wpFJYn(q^ z6B+#WjU!9}J;E{WRSG7_?qX|0`M#>2?rxNezKWZ&^^~vQzRl-Q6g%5-Q#()B(aA~5 z=|{9>9ujS%RCiAglHVwenO87Yiin8Fjvcc-PJOLu=dS?Vc;^&s?O~|oGOWysT7j?u zLZe~$BT(X9+Oaq(&m502s}2f7?;?CXp7RQLN0ekf$3YsJRw%GSLPD`4#NK0PI=BVa z^2FD#$TW)gp+(dI2;}OQ7XfvR&!C`1L`5Ny8t9qoimr&qGaykC3l)mHaF7^?FqCsP z?Hs+PR{#pa&Qq71ol6Mu!r<@Kz7%3kKeED%sLM^D$^qkGRUJiFCpDE@zbtg^u3D9c zyPe$(Ad{JyS46gg2;MLTf_v)swILUvG4c#ltj0|S&p@i;s4$_TR9cN7%Ja+h`Hp@||B zKtJ=<3OL^sjy1D}rXALtFp?r_MFC%XXXm!<+qo}1J#_TwQ2_x51Q!ZOvXE2g4-_=0 zYCCr9$mqq(GO;5n(ZXPVVSq@P!H)t96d1rguMhP zfuh&DOOQlO(D7<-N1HwqOVEoFQJ^Ln($w4x_(N84ANqr8YcmjIhE{d}SG){ZFBXeF zp0tY8s`1sU#H9g3su8c6#&iR3GhdjRpEtnw0cekWim@^=fh1+vXti0vT8kiu3)L|E ziuI{cQ$}&!1+@p{8KE#N%q1+mn3-E?Yj1zVi}-MSnLI{23)yRI9L8JXwf|B~R+Eyd zYVu5dnx8KUfMwZKSC@%?I)ttB8$TIKqZV(BoMVMT^SMCSRt&q)pVC~O2x~PlGmDs2 zH#$I5KD4sp7OA8)8)|J}5Q{XpXm`omX86N_njm4G1snNKUXguAok8YbH7Zn z64OP)f*G{_!cEq#`qXj^tDsZ!VBxO%zKz_***yB?!na^C{5eniFxsd)#5bung)1&U zJ>mEqDsppI0$E;hF|D)Har2l54<1Y%dD5<42yTyOc*1cB=#sWVPhqsJqE1jo}@``yNVec#RIn~Tb`^!5u>2!RU$L=?TCek?9{e^gyn~+A51@?*2fn ztc7Ik2=tq_ESk2wd2$*Tug})YF`ATnNHHz*x*1H z&ZlbxTnHRlO2{qRvp)*{3sgz zQcUa@ij=n&QNcIh^HEzDj&%ziPP}hMvv9=*9laA<&M43U=#I|Jd$&ne^RSd@ySkR4 z-`j&z0OhbB53U#S!R zv94~f&Gd6Ut_Zm!Wmc656tX{OX4EA-4V-8aRFA{RBCY4h6;YS0cA}zKi(*wLUbW%0 z_L?t6h35sSYF~rrOE3Fp;FkeTq>h4Oq}?x2ozGD}#bT(z&( z7um~~eBc8ey{NC*~7iCCNO*;aJB}XIXZ~HOI3@(~p@^#r(-n5w~D|Qm!1OJDZYrjs@VdTBP*P>v{X7c{b7*U8t6AwhdsEoV~U7(srLNW2DkCx9xQon4@wu!w38| z*s_Cb_O?DKrQgk@{8&D47l%bdn(xV7)jW6o?qrn|j{0*n_VepMFpUC|Z0-z<;PK5V#B9{Y}t4sd?dGql5iX$tf58fBpz@fsR_*)>pkNk}feh?Vk0 zL1}>Nr!^X_O(Bb>L8wy~??dw=A$f7D);J$Fp`d{lGvGkJU5RrSHe8y!Ef z7sXO2cug`Dy8AESg=WOZQ&^w^OjP++A6#)gLGhCd#hD0e%f>c!5^ z-kKB^)(Y!qEA0h9`FJ}D0Ht$^ihCI^rfcx-I{fvlCUo!fhXO`iVn<~3KAniD6A5ZA zh)$@^Lt0S+#PPeqGOnJkVM||=pOrGy;xIEUZfJfEq;JZR`-=B*>VThG%rezfgx7Os z5)@3~CIb5B?S)<0SGJ?}%wz|n{v}kLqoqlxUrwzF0c{I#60fbRQ&d!BkE>2KCwH6q zCUplXShiI&#qresBw)nb#neNIkJ37VoRbZ>(gz#%s};qJc#MMNS;mLS^}pw$Um z)ZxQt3NN6nrlzJY2h1u3E=NOsY47ORL6NO`e810wTK?V|1Tz|gRCi?4HPBJEDl+*Rcm6~nyIUYwuq{bd1V-m+-IepYF=v7iy z_IBcF{8Mr@B803om_eM`!p2jjni~QBHP(FZ1COVyO3zN!*4L-%C(phCU(&Wuxp(j0 zRMcgi@DZ1aR!^NER<-nk2B%j6HsUAYEq(S3cSH7A2+jM0^b8DG(h3wDkR%`@>_9UH z%l&y0gh38*aW|NRyn>-Pprr%;Dd9ZzqPdxmjZLBOg1x;xR(p#!hpPsSO$nMHX;Z@= zBKPjyTRrofEpd6aS;c`$ZiJQq@qDpyy#NXsS^;QWf$YP^qhoGIDxHf>fF|PqK3+6e@cg1b#S(g0L7McA@cl!^Y-x7xA-$ zXm!EkDGd7rmNC?mV+1*@C)nk31GgU-o%0Wdx~jGMwwgytPVODVO>jC#2*o&HLO=2l zf3oUvSbmTW@skJf48#B}Wvod~7M52vHL~S-vr+u$dS7+7MpqJ6_;e=B~XyY$(zF#_k9aun=P zoY{R6x2=etd^Wn+5nO3OrOuz@A{fudxj~ObYZ+&fGk5CuN5u#=R8 z=$II*)(nSV^Vj|@di3=4aKU~Z7|^ssLT5RA`1|+os9L`(Cl78f$S>sa1PB56f!d26 z0@l>k-sh#CuQ30;DN9w9`QM zbM;CeqWGDafzdpJ$SEo&hHr~)#zVUF0*y#`d*Izcwfcrem+6Q7;U-W&h^gzJy4{Y2 z2Il0sJeG=O5ET^#3(-w9SD*qy@@;YD*f^tY97m77fg|+wYiR-B%lkp$@qho`JruS+ z7+zMF9a^JDM@M1n`RlL0%q8}5c_9vYE(jgBf+v9dCvIY9stkT_#pU z+(>V48DJLY*j%mbDCmS-n}6R|XSAEVg0bwz`N?vA-)Us=q$=f6y5Bby)`HhBUYwHn z>s9?;T(-|OXy3~_yf)y$U%0P0x{`p%Y;WLPEuGp2j0X zbdvx1h<=5u=A2MnJ{Ymc&G{@0P<-CfPmn{!4mfix1X=CVt5;v4Cw2CfTyFIy#8X!C z)X#|5TNRKcYp}^=y_tEQa#Vu??TgpmSN5Mxq!9_)n|FtwzAm5Tl?^>>!bNEzuaZK3 zR6_5k9PDKT^R5HZo}bNx!%fDdwyFW9UNkupr z%0Xy-|KX(SD%3h==G*26Jz{aiJXe1&KQ!LZPDGPIH+wtp)1;U0-pMV#;cO_{n;%l=Q!~U z{WB?vg-l^ioq2X5Xml+LXAx4%|D83#m!igokdfT}^PMqoznGj#+-UY%ovk?4$rt2b zomp1;Ul~jDUuy&=MzyeU`L_6R=trMs|8(t8pn7;~T_et=SmQU+-lhY57N(yxTIcxzK8TJqrL zH8wE9A|kjAo*(i8Tu4q$wUYRgxiJ;dg(Zs*gW|P!bunRd1JoHu%0}3JzS2Lk15aej zzJmwLG2(OO$^g=QlL&lnP@>MLss{g)EiOS7#Wh0YoSdWlgN}jM@WahN162 z!|_jYxx`VohPOgoALk=tLRdexZ}$Z%wwnIB?e_}(hLVb&CRAKi&krjoUC3#A`ucEd z6zu==1rRhHy`9ZF+Tn_tZX+cd6_*76(FzB)dHv_c1PO?ACs-v?rLj|0{ z5Dq-_?qOHsoK-Q|_{%3O@9OHxZHE-h&CM~K5E7ye<+zY>qx!9kJXcVn@C?(t>^K5Q z0U(YJhQo~;=3PbK;qKGHVf6pnpf2kop0Y=?h21h=eft!8LLG1tps^We8nf+{E*FZx zt}eKzFnxpEEePNruOjjmW@=DMvBS- zKc5SF7cj|D65wLXoTDt+C@^18V7{Tm{ibIh9}5}6bj+3TWh1gKccJ-?CN|13=2@s1 z8R6z~cFjLM@wbrIYW5)$AL*--;u zzwV)?#?aN_f3q-3APShBot>tga1c7UO+U6Io%kIaAKKc44jsCL#0WheEkHC5fECdP zLQ4OK**(oh&j8BE!nQvtB-Lq--55NLHh)NPBfzSItgTXXlo8B(2mWB+qoey^I# zx{zwKb(fvPw*Ah>U>~Qs%^L((pgeApd7YFcap3g&u<$vyupGM2-7nwIR_U{o(0oSl z&Jhv7sjZLRk7ukoA&#+NI)Q$S>Q9Awuf>wq+UVuYgei1O0*tC26R*!)Gu>|rizI~6 zi}w@mC-tMKWAVe3@T|BvPSk;|F5i9)J~5baER2krlH^0+z)Pq_I7_#~mjiC(y6}VH zzyWzuoeI~&iVAmFV+t7MM&!?+pM=J&6pZp1R@}>aj@V}G$T#r{Mn1$4jT>ZhkSiqE z38d0V(9#G0re7NjieY)6pU7wSX?A+L^yWg&#&OL%3&vohOo_T1 z82F56{fFPb_-e<)Pq6Z$g)j|6ZNogykB;bO{y$65;h!dB(-5$@8n zjAr&ePuI?kwLA&`33~q-xx(#e4(}S7-O8J>#Wo zZ#2=)E}+>p;r!4rcs*WoZ8cCt#Y;OY$5$o(;7*uFx(cm|VaCCO`%`t0o{04ejr~SJ zi5}Y}R3YF$A75W+Amy-R*vGYYsjxht9w$RqXP?PyfvrO?A8(q~i|MXD3Uj5nnlH3o*QOVK}PwxYgQH(J%6il7nO!Sg( zWya$ujqv!Mo6hPH^*p)cCZmO7Ae+1Q?nxyA+2SYsHv;(tnjfycJ{vSsu*tGvQ4}` z@K*~)5LiAZ96oI?th}9~nrGHHH04h8ZVlx+d2V%@^XD4p=f1FW!0;E78lROj?DemCDYb_-Vk59 zG-_0(O=>L`vpbGpQde6$2DS!AyNbOE7NfE8Ag!UB&CkJF!empLc5W}0^jIH#4KRQ; zRzHvn34i}2@yAeAbqeVps|JH}NYVbnOozQbh4|n}V-^gvsBi_>+Xp2kUIBvkB|lgM z&ovyDWPQ6!$1c31g1|M7%RXRn3wGe$l<}s z-_zwjKmMl$)Kh&E(gFxlqpcBRV`D4^QuC+($D~sYA>h~3 zxPeH349tUYRJM%Sh1+Z(lc#VcFM^j#FIbzN+R%90F}X%ZN0;GyYNMO-;_0=H{5L&4 z`V@aeg(4I(N&>Nmq^1VRNm*Ihf%-RBzZ@7C2)kGZoIs<-F!?}_f&2j(B zjInrtRH7hC)B?~9hu}Px{Gjs>&V(gZTuRFG<=55PZz+j_^6U+}W zqQuAuGovj$ABc#a4WgKhPk^f?RjEQKn(QuBm%aU4EO?4?mJR&}RSftGX$IrK#F;63 zs^52E6DD4XZE3hvC%=Dp2kt<(9$|@N@bU7h{S%f?A3fR+&mAB#M02*Af^! zl=f6;Yr(+{aDV-m(qwF00LNRY-jA*oPur zh^hIPFHirYL=~YlVF(tPx&Y0IvXao#TWJaC)osX2aCBlf1bPYA(8?kO+25;=V6~~%xKoN7W1l$t|9!7;o8!V?^)pdiS&eAx$z&Z(@G`l7+;wtR z>r=iLJGx)v`YdeH+^b&P`PWZ&@pdn-`&G-n3pmR8wgXF#}7>>Xf&jqA=- zBArg|jk+)BmBa33qC<6?Kbta~^CFvpH0OcWQ5LL%xKW_k@@LktH4(_p+c&9TTg8`%P{vyYF@yU6Q|ZRw(%C|I+%SvPmVY zu|YMw@W7{;-{Q%ZQ*2?pyAm4%>}iziYG=f2Y={8I7B8tDm(~c3eXP<aPC6D{~G9^5`kPB+Knb9uiH)ik+5_eHd@M_e{~f*yqr<&H$n zGurf<8?paqPtacfRYFC=)Ln{e0oPw1^11Klhf^S*eR<;z*P(WwpwJyZW#SMt)=f9XN(y7-UUMm@o& zmG0iTXNjCkb-E|0?q=LVePUuJU7ktzmkIkf%}%zhNf9yAp3D|6Hl(Wm+|z`pt-koO z>iTPSqQEnU=PA$Gu+hJW42Z{>Tb(TS2yNQ2%#ML(W!4R;<3IOK`J&B)X#cKjBQRt# zE)}8kSIHxf<_|9$oX9q0*7=J1frt6)F58mJJ7{ za8ARV2znv;V@z2!8y};&Z`#H$Ei1dRvBjPP2f#tD!*f69iD{Z4C<4IvQ{=?f9z+hI%rBgwC@hq6h*Wc*cn%oh7OKuz#yMBUbHYBdm9XZHBN zWwjo(#mt{K7iFV9RMP`KAY6N?K;V??AX171UD45MwncWO>h}_2s;Bk1p!-il(%in{ zX)u|^#Cv|VUpDv8Kl-bq^@*n)2WyzT32)@`z?jm91O8)Gbjb~s&68WcE2hAJE# zZ{@xmd8kcv-LmkM)ttPQufq4F6potrNxTHo!w(*{ccyV)VhItn`!&LcS}>(%usX#u zkepQ`r7cJ&|7hQj_2my?wbbK=b^cg4)Zq}FF$?9)7j4;gDYM+(Q!48#|leI42#l;0JN0uP>|E1Zv)$wV*h94pRIDSH}?acZu z5eKfeeSSmToy@TsnIZs%dB}6|!i5XqywVhLaL09ODi~sS2qzgcSOQ6N4}_bz#J1_K zMhrQ#gefR1@t2Pp($<6&Ihshtwlr(GkGn3uGtR#sCgdkiH)^(oXs+AS-r6c4DQO@O ziCKVJ*0?n8FJHf^nbS}acZnkDWEm#Np(m!If(?4!<^Khr?F^Q(g=jbih0yp17>k!6 zE(XpAPAc@66M&)uZqqoM$`J-;iQSxLC;L1xwg?#k>j~mNQzx|HC*6A@`O&i)hS?ST zs_5xcp7YIFI()ZvvExap>zpO@LU;++`38mmEr`WMyUTKbZII z8OA<$cj7N!9P(45JA0q;UKtL)s6rLVPmQ0O)p^n^R5s1T{l7vzSwgn4vZi9>BVh)! zpx8jl*Ag3nr8!FFLTrqY`-%lHc{Ex1iwmCY*L-)pzqqVHt$Y=cN+H8NmRFwq?*02c(us4r3B^y57{BtjyA{uRNcVa>EtzXYYeOs#jnh#xqdL1ip}v5bw5KM2Ip0 zErdZuTJOEqbP%g6%to-qj*Us%nYgVL2!{9oQYWiLMnz>LCPDkR?#TBh8NCIgp)>JE zIAe1`!stjd7^f=d6B)Qjn>|&}F6y?mK-&zjERi|N$7hFUi>(Eqe6bjW5gF!C^t+^Z z?EC_xKW=wqk30l1n5L1Ow`i~<9lklty8(gMqj>e3gC9SJ0h{Q)z^oyZMXEMO+|^t0 zl#%>5=!-AC{2Q{cv6)%32rxadf=?kr5i81zB6>a^@H3u9w`>_gLE zXGbp^Q0}vI=XXC_Kwoi|Qw_NQGC)-CSQ6wLPPHf>_T$GP-ZbkbImf0J%R#va8W^-K zjf_OYhuXa+>%}l~c-*ov#D2`iA=c{byycGUq0&AO_@my3*A1$#0B7oG{~0FBg_r#y z!NE6Ri6&6}LX+U~<@1sgRU9>Y(O(*?$fucfP8b>9NluP=A>4{1QGs{w9x*v|%;Jmi zbsQSyWbH@!;7bCYp; z{rR(&^Y*>^%w*mPPoA85eHj8DoW4}P%r-ynEew)Lj%Mb* zn8=WjXW*U08SAZE$(bbSRh)2` zz#}p))H5Q*Tv@`q=K{0GCAS@=v4blF!AC>to1nf_xLGNHV>>(5O+?*Y~3~vg? zG;sys?{Hi7dHl+5c!&qk=f2Q`jt)HF*cs<@b{7y#1c#38Z);Rq+HnlYLC(Q85+7qF zx@u~+D}(Q;N+JPaIL4mnWT557aF>*%WG^NmfEB>2Im-qiQ$BzGoH*OFge??Cnf@50 zD(KOsAc7NIek{-B%~3$?Pu$p=t7AIBygXb{SiaM&R7LZm-1_hNvnLiW+gDvO2LYTi?%L(HRLc zWTZ2HZgzLZTCVSMi@gloI}mrZp~XfCI_Zc$lAJKCXTZ2K)dkKsTu&^v@6`@jmWmBc zUSl7hmTv2p{F)doaE>k(L#*J+g3u(eFNFOkSP!t-6^1U>FfB=2;XaRohhYFGY)}AO zoxS}mq$dP)<3R48m0 zaktA!>Bl;M<31l)vgv-WzrrMG6Mr?Ilc84@ z4%$nV!?D_->O%p(8?HV(6`xD9_GOAFQ;YIpT*_Hw?zbF!WRLQ}oUaTjv)yiQdYdzu zqK=Z|9%FO5<25!!CtD=CMaW(k3(U{MlmlxZS&xXNg$3ed30Dt*i`EOa$-CB9x%$|; z5-xLX&mql!>{ocmY}D?x507F_j&9D>rrzRLue*U)W?L-!8(1tO5!ukXd)pD{r1WMgxFZWlzLD0XL8*NpXLN(P4S zm}MsmkWV42uKyJyERPW5NE`Q8_glacQF6& z3{p37z))Sf=uH2hAV*tUwjq&n+&s)r-dtTSTo&1a7{$bx0Oqctg`yoPucFe2-65Eh zfFAIo1$Lytwj0Db*{wVGm6Vh`0cQD9>%eR!rZNKFA_Lq{t{)v653xITFIw30 zi)SIPLel9bN=nM$@bG<%j0VJA4`@lr$JF9b2_k0G~_%liU*}0+wKY;pR)0mUMgeG@)OF%y5@cq6~5^ zrU6i?P>8{{%Fz79Zh+|MsixxA(L6L2%EodMhp7!dH2^Wga>s1T%Ixg#cniMp=vP52 zypM;JPHHRx#T(aXTS|1N;a>I&f9mQI!&J%ZGkkc@i(7sxNkZ5Wa-+L0OO6z^A}gm# z+*9??2D$$KU{BGFF6@7@{r?Mb>Ngwzzq|2ioC9l`>u_VyqCU^eQ5T-m*%#c$%eszo#^%+O$^e?^iz?wiAy)x1^c*oeRE#M*o{^@;j-|x33cho zCi6G>EmfmrR8&8%%#8$yx|u(r@L_k{bnkuo$a0&Vr=Z`!4ss1i-}@yT%ETiE*YY2K zN{9z`Nz~N1qWk(HM;&+_6r=A3`~%^&#!!rK?nc`;Y?=1@t1YY&zY>avn~8b+)r|Nx zVH+B-3ZiL6G4=E=CZgXxfGA(tu0T8z3^@zi%a?yzIG%TeW7C2@4Bc3Cd!4kY?f>}( zv}~kQxv7nQwif5E#JMU+v_RL{+y58^(QQMPgbTGC!#19t z>nE?geaZns-~Rl$_`h9#01O%ihE;6aBXG67p{1n&1rtgxuaFQnulg_b91SsalmT=z zM0QnU;266-{g;A1G4TssW~{2rVMrl;B{qk_(f`$&fA^ioVWEH+t4JD&Rvg|6*c`YK zv1Ah%??U3kHVLqaRL{7N1Jxg&c@f4BbuV6oeGQJAX`Cl^K_QL<<5Pu#@rUp7X2*|%n3vI#(OqQ z6)({e*SU1*lDT=B9v2g@u^$)jwvF9{Uo_~K+9snC(AQuqA>ydp-eXE|I9KR#wYmOr z*j+;d3mzHV%|dLfhTBdYs{_4+8yfz(KRz>f!Ee~xYbckMl&qlXwxJ=CP!l&fg-Gwd zzPd2m*a$qQa_$@mVZ)z8Z{rXVal!sixGgMR9C{_el(y*lVb~pl2t0AW4Gq}ci5;VU zunX2l2~6M@eGRb(^Zxxfbe5)nOm`ous}EyyLP{gRY;3C0-hRPfS`Ej=PPYHX0}KsB zGb6Sf#YQwRDWI`k1OKbEG^z%^?*Az5%)@$4+cs{7A&CkpiG)a`P}UX_Ntu*F*_SB0 z{6Z<2(SnfdMT?@{zE+kQl_+TwS&D2evR0-V)%&?C-e=z9Io{=XpFie!W~hFBzxRDD z=XIXv^`VI%wt(Z{$>-hCWxyf$!zF^ZNEAhU`GeqBL1#;9hAyz*%>ARC{es=q(pj2qTAxS2>%@b-T2Y{^@mSe$KKQTjMRRE4XwhH8DT{d1;Np#K6BkOzDCf&Ax4~Pmpea~yq(S0 zplXZT=Oer)Xvmmc;7cb?^_Aya{Ct4CvL8V24L1x72{B|~=jGJ~!pQr!xp{gWvFH)L zva)HB$n3UI_VQIEptp3^*c(6t9WR*B2Leu=S_LGAmjg^&gJ7pwka!XjYSFXC23=^z z+1y-R<&A$JZ2|ni&bt9D!U8xyah5l@A8?1TO0t(d;eSGH@Zgp3;9v!*4qv)!`CG1E z{+Y$F=O*FcQ)f@dfA7wnX`$1nP1_nm8lHBCVkdt9Byz+R1*kUp-Cc9{1_q9H%nqY+ z2Fx?_*y8Mbp3Ad5_u)fm@^&7-aTahTw)eQt#5ONC*UPu5MAngsa0-0moG|G(jSy)6 zil2L1Cd|3N?iih7&pi%yJ`2Bm`C{VL#VQo={uSsOjp6B|{w1}>>lqh%U)kJ!kO?Hk zE7Aj%(deBvdgoemc5oa~H+h|ksZ{NYPXoU;)cjAhe_^^(>dnZTkH{2n4EU0P!QA81 zKuAtRMLm7;gxC`E*9B&JLNTtuyQltMQau_>6!T=%3&*GF$n@Rsbj|;wSD&{2)-f;% z9!gT1JIJX?ih`*@x9>EjoWe=H^9}&D$i-X#RZRyer1Q{C{W|aX5Ol`zI&g#Z#|7bo zy!QNzG(leE6q?d4b;GBw;8#g+{Deh8_QlZcfEBmmK?!-|3c;AZ^}V)q)*-xC*C5;y z`Y64jLq*Png*SF=5+bc7!@Bnz#!J?A7z&oKV#SIkMT6k^QFed(`gBd~XlAVdDZDI@{c?YVa(gzpbO3aWhC*iZ%5LC0w#Q<_+&`PR5Tmj zSS#JmfF_)8+)VORX8{O7TUd&+(XT_#sydT9^n-4pD~-jP0t@dDy2f)_7x~ca0UDq9-*|v`9LHPq8@3B1~&&g zXXxD8@f3eL3&0G7!k9LOZU)ofv~@K5h-V}lH$G|!y*^?#OkE1M3NxvF@b(yzZeNxt zg~KH!jymRD=uW&D0V$0-xSo7XVgs(f!-Z+>N^<{0rQE_mVYTCWC@kPv11jb zr3F+1Ter5FYZ9Pje~Ab5$4wk&HdF2A++ zZL$c_AKkB?Xnj%fe&_IQz3yHMBsjgk&<55t?GS_2|I8-$?+;<%K62ztJ>OSX>E7kF zs~cK)wf_%-;K`2%9uE1R`GHreN4=sDQ=m_+)OeakOa_rK? zKA1lJ2k+o0#SjFVfPMS03d18SirEEN?!gk*Q|(%)ec|EZgzt4w@nEk&WJD(63BJMO z`3I4m5jzb7F|pXyP*?XN2NvTXC{mu)`&Tg15VS{g>=pQhtnP~xkZBs?T(2x=iBDIvk=F2cB z_((03Qk|89Ge~C#g{eCKTj1buaQ}X&w|6VuaRs<^>5@r!aGZS0yqv$N5S)OGEnZ#7 z>S@H%A3wGm^qsVU?x}0AS(Ywc;Ge|E+20i+vuDqiZ|wJT^BH|8*n`7MGLOz=nEt+P zOEO1A6<2rEL5zHI2N*gva~QYY>NVhPr<#STY3Hp2cgZbUy{PJG)r~io5m#Nd4J8@A zqC`Y3*h@(%x}^Y+&IlM3dd+OcRQsUrJ!aJ6QTO>}GljF}FxN815@1Xpx<7Ei!|#>Y z)cS6K&0VD@GO+@O<$An~Kr94i2o69EA^8&R5#Xf%tAWt?tT@pGTs!Q)jd zs~(3|yXp_L88NT&ZT8&CnDpC~?r^3d$e~w(w6PhG^~8x25XS_xV19VWu)wcIFFOu) z(i!pa#b;${@81_H$zSRZkxkkpXU*O|Gfoui8sz;UC<#Ig&}&GUT}OCvmGfJKt!(=j2W3>PM%6y*RJ zP6liK&=b?0wq?9TilHpn;v_hrM@~WMH-tHh@POl%uY7#|(GQI;4YtvXoyTNe+f^{;`XpT~o zdBlXENjOg8|3uY`57vu{if;@r$gXYLw%vKPP5HLwdwK%MRq#=W4pbI2e32V%vHakF z@@!;Cc-XgzQ==myG_0&gvqv?d13s%rV-uHQCz~teYu_&~qpbYrd|nWcHTWf>O#Bf! zU=-PgVuK>KM=(+5#9x_1ezVcw8xzm`ckOym*GvHm3%9%Bo-kxYQv=tqJ|s1&$gM8& zR>iOMJv;BuNt8HEnsgg(Jdfs0X=!vbng4~+fMIV7eT+WX9w`hYBj*GXGm&bcGWl0p zf6jnOH#h$^1ilhj6gf9GZTy%~qa25}ug~A2;l+KWwrDs zkO&3+>(eK^u8ahIm!AN05hT?>O>O)SG8GELbAywZu|O;9Vl4DtwQqaSCBu08PudqR ze9W|@YO zHdZWN%+`#%mDe!pT^8ywN%4>MqK|*kIQ59m@dKWzE&rBNqFee?OS=e78NzCOuNL9b zN|@z|S7(N6G*OZk6wrCahK_4uVNJ`4pPMv7P{od>Hj4!Rj|}&yhG}41z^4nrP<>>BIgVdK0v;{zCUCnJRec)eJGqBTE={S@!}u)?WZv4Y&{vR z$QfU|ew_pnl^WMwqxopQ=OxD8goDOdUxI9oY8B9;iJhU~8xvE;=>yw{2r+ZSRH~KD zxLW5CdP!Rwf&&8&N9a0V9=hM`v-(eAw_dt3+s}W^H^VpMQm$mO@u-M31Ej7|lvT(@BZKF$jXX%zN_NV)0wpx57U!iD%D`<8bKMu{pHFlnR}CjG!$~2n$=lEu#pcM9+5{Y@pr|)Q~$&+2#F~#SCIt zVPvG*EE6Yk;H^7)542gp2UjwgS_&l;t;oA`RRO~9G(SB(mY+^XUiW&xrA^W~tOGZ1 z{+nSPO)W!#&K(5-e&FocPq|rI^Ka;*O)V-CcHc|`xHYT!%8R|s(cMlmF##2=V=f?W zin%%UDMnEqf6;vkS38`Zk?9Mca{T$e;l!MhnhIKSyq06@h0!c*+9=$B7<88%gVD$o zT1`V^^1E}A5M72$kMY8Ulj|i%+kxr=rAa^&W1l#M@!&qWFZk!+8~PWGAR82nD`{z^ z*wK*(JcAy8o~-ybI4(zX17q(3?PffEdQ|QUMpHsJ8z__f0AD;!P3b1KzdT`-ONnxx z&jG)E?C8;g1xrRs+}ri@U+s&?Z3A*qiZ&nKYC}&4beV@kav{8nVw{JbU*1ut(9yy!IMLoCn9n z{BEkzdlOkz7TNq+JiX$L9pCW34nY|a!L+k5e6Zbni~>e;gJwKRHq|^BThfs^5L!|g zLLP9hN`(*VbwAw zM34l7ZtuKPopNg8O!=O_rp#2we&9s;%Ud~fyPOMW96UGlW%|G&$0(N$PLo`DpfM*W zRjH$&{p}`QVxRER;=Yh3hlE*;R`1+i>-d(Y;=`lUTCHA|{Zyaj_37aFzF6WzxXH*E9Vf)J~T@l^YZG>;kSf7c$aK0JW z1#K$vw?M0muNfM!oI8znSJ$ekz2+)=0-K_WLvX=$Bz(We zBsJ_kcUae&tH{jeYiZ858xM!EM2D=#w=P%gwA}B$XGk<;g5!QON@XYVA9=2TZfkU6b z(+*cz@=K3d_p*(0k8!f|qTa^NJ2X`)rfli9F+WA?oUnFc_OoeI4cP<^IPY!HOJ^&! zPfb+z>@6ye7FjzK% zpqm%9x~2QxnvbpzO#q=BWWZ}L!G?N=I5~LKs9E3Z1)V^P%h-1TK>+9(-vI!CoqYjz z^ybZ*-=`V75ag6F9KS++DQ~>x+h^0Bg)7Zk6m4VSXM-fRR~wV5)Vy|X!WE7Kz{Wc@ zxt6vS0HMq0rLOWVjPbyuM>Ce3=cn;upg9%((v0aRcT-eE$#kCAnR6GA55rVI3d?sb zMDzcKFOOIY&UUjvpa5bb*%yO;j78}VaR4Fn_X9x20`v#0-yg2cy?g!kd3uCl`*S}2 zM>9)*%L*mVo}a(}0I%0?-wIbzUe~Gz(E&gH5;RFhhMW0YxGhMjq0>TdAs|>67c%8D z15W9J0_c#27`ZnsO7ru}0H9z@3P!O^ltwZ_)Q^R%AKfrdf$p3Ez&Z+gw9zf9xYpei z6kI5!sg!sRAD>=111Uh5Dp0GY7ItY;BfQvlWbfGa<#|(MpSX@ba@cnt8QoTZRtGip zvwVGghG3+Yl_kEAAteoCU`ugRBj@wj&EQv$N$P|4JMYsa0|R57AN+UC=fxo71YbTD z%1@Th*ndAGXDQKV##lR`O9LvRFBOa&JTe0V?bfb6`~A_ZB$MfHs8GtP1XNJ>f}Zm< zH8qvy<~#F!1T+W%G50?e9}fT^=MY)Qo61zN3T)vxBOJ()n zOIjBcfKgtUGNbpc=jpZ<|a#_U9t`!9H_?eAtUw-7Z67jve6i2KD}+e^#*Y|!@_IF4q5RbW>)p+n;pNbyEMG+%tPOYFPf$} z-jlo=|HNuW`B|!`k?`S?j-kOWpL7gHBj+-w#Tfr)#|qTqw}G z0D#>IiJr?$HMk#L3lAID*CsE1{XBT5cI3A?1J!fNK8?@qePGg>c1_f!J4=*5MnKB= ztnB>c=)`z_Z>S!+Qe;s9dib8mp!o z>nX2b{3zeWd$Pks#l^a@>Ta)xAwLsTOu@o3+G$98ov|2wyn8#R{{6y@c^EJE3{dW< za=oYJ5^K8vmqy#+swz|N-ORY{=$w*W{Ym~W_32F`+UNoFR;x)GW=UtxkZgZk-&Gp;=jywNlenRBJ>d=ARU5_n4@1WrK^~NE`EgaqA zhCyb@uU`Lt;F#w=pXGb{I|^pT?tdQ7-%I!aTmRe~sF@W|8GAQna3`1W4&rS8@nTid z(c0I+#(H+CJ57U2e{PEX|Bf?XKk}E&yZeu>Nh6+TWG=3>*&k`= z{9uQa!f>ZM`zmvs;_Fm;%=z5D)hE@j{mlapHD=`pgw$vbl>X;8AGJbun!HK>LBl#U zj4o6@{Ac*Dt;PwtxJ{eC`>YR*{I*}?s`cAW!_L(+AI;6Xs{wvSv z`f4eeQ-bzZXT7;QZ^XWWU7iLii>7WK)NXz2zsvUsAY@zz=dO)zmzEl)#bpg#P;1;l z>E)}F|CAZj9x<{^Nl6jjnSvbOVZ>m(@g_N~Gss_!i( z&}`D!OR3Sv%w@|Z88b(zmD_&7YoC0&LAAV1E5p)=V5b5o=``7NY5h9G=XHPi`nA}- z6&BW8m*K&<+NzxUx-<%>FgtIuh-MurkRWj_qKU|R%6~&-F+Dq3F4jy@@vxBiYU?l? zCg;Xv#nAaf|K7WMP}iNt>o+ybADOt8MW>Mk){U3?siYz1eekX4dtH^xP_O9?{*lF9 zrGJ6@bvjjOVpP(c5{IO%ds)6;48CR^QaBgBeQlE?2LFF94atcUTdbo8bj=&uV9@5k zl+)Qa6*W7jt&44Zcf28Em^(7WCWj3FIYSh$&l*4ew@*Pi>jl%<`00lTRZD4wkdlP( z@kTn6q_yTdZc|l}xaVkn8@njLUQqjX#Gt*q zN3K7txBg0a=onzQlt|s4ttARmOjd256>u8U zW&F;2>uu_7pQVkTntSE#&BGt(OD_EEYpJH|2jkNC>*U$K+VwS#X4~*=VDrlIVOm<5 ztTXZWjmlcl-hpDCy5iP^>@yFHgHG+m@O1ajF>X7=MDAftXGy}G)k~K~mc2Ocnn*9S zeLS8cf@|9I_23L4Le97=TqtoEgR|MM^L0Jdp2fFjb`@Ms<2mh_QQBx~4Q&6=%{x2K z1?&-`IRN}}K{QuQ#9_?8Ze$+?8u((p3GMbY1CTK`_7x2chy4d6_A-2V-t1J3<8SP_ z#PcdT$i1r6>Ua~sA^o8LAzhj7!(LOVL+Kvi_3d+-Dl2{9Fc>$}x>Vnn?7+tuG296H zKC&%NRt0q6xOX>m#p{k(dZ9b~Ktl}&4VQfbFnC=iRO4K~20VARgSt;a5Ye40nr?LM ztk?tn??q}YmpSk!dQ`!ZXg@#1&~$D}MxZ!b9-5j1lP|oiRykcY)A_)>&^|(%j($tO z*Q`gGGpiI#kuQ=jcf99NzYEJ(jmpHRcqqP4{70q$CC=vr#H!R&iQF4*oc?Uq$2k`gj( z%IB1`K4Q@(kkH7L1v=0V47@VZ)1RDMY>Hfh%C+H7Z@WXg{rvjYr6u3Q9Bpea<*IHV zA#9Jk?!5xW1}wSiZmYpsz&G7GZ2a=t%=iwKYWk{nJs86SqZQo(Ro@}E4-v1Gum7b> z)xyQ9n=tw#{%Bvh9(POClEesVJwa;6?%ny8S9#9Hcj(m8*SAY*jD9PM$ou{piaSGjnt1#KVzcSYC~fAMCTR>T91b01#ZE>(^5;LxyI~q4K-h&ccW( zHnJtg|66Y1Z9{$kH+Hwn*VoXd!SCp;OuPseB9da1a4#p7d zBeQM;r5+{7!i8!tL#(g2tm6J>j{Fk(t1=RBu`9J&Eb(Qtm=n$n5Fz-Kwv}K?ye)sh zDVcZeh@t6>=|u2%z*&&`K=c{emfeHoiZ&QDXj69BcxZpDF&U?l1~Y%es8Ls$YJmrF z`%hwy@dj*(&ZY?J2FD`ykamnx&?39bfMTBCKKm}eFN7NSFhTSH0*qUF$VR4^%>z~w zredgFmT9wcXw?Gd$IL#hLn9)j69D@_D8=RqX0UwUEoYw}I%kYmis>$;97I^$;P(aR zCdelHN&|t&$b4IHQ|r|yw(P}yUQ^fIuunSa&a!&+nApAhI?Ydv+siKsDQl>X&RR_J z2oaB}KUoFhFo!##i-9|&q==#qYNDT-;n6~vaaJvX?g(j%UnSx#>ks&Z6 zie4jD&JxQH^k%3lOt{K_V>mZmQs`c2PD{H(Z*B8xR*s6vv@~)|6Sm7H0w{m8iBaE z62ZMJqXF0Cj4|+>=5|x-@6RWg<H@^|-2r*meH>Ey{$!6#&;Js3`L#?(8TIXe%_s+@9C z_;k`7X`EEL#nbUyy|bhiSfGnBMs;iipaEn%nRy9D3Suj^e%4_dnW?M<5qsiua;y&O zK8TyS2BBfqM}T z(3`+vEPD?A02_@%Np1yNb1Y2M3l_`aax=N@yIGpKh389(1;UBKDb|@;@K~2}lL62r zmE`3EC`YV^dO^00Z)>#zVPG;tmw!24E z5c(!bI~N-@3Rg^mtn?&srq}|<6S_o<=M80s8qL=;-I!bg(-=fo&~*4ryrJ>S$|W_9 z%aQHqXQ2YH^N8G|dE!L!;SZWia0iHuFJ;olrnmfj+l|S-^aq;aOF{UVeZ?#-|F8*y z4E&e#L_xF{T8zD-LJbqQc2H~ljMm7lCW1L9W>e-R=(+4%Og2a)6og%G*s*elIy`q~ z(VLPIa!>?wemKHvbtkI zYO1%-pcp0P?n4))upJJpGiF|(WJT;+ue}ufJg}U-NR;>$#C}xxZ_6K@o_e#s>r)GL zg;x<(LwefY;T#HrL!;rQPcPtq-mH1q0XOQTq|N+L8UXjosmyfPXJ4u|&y)ush6j*= z0Dvs(AIm28Fl=UliP@4R^b!WK)``Z+*q4lj=Vy zsHeIYNjHe688d~^V58OFxUtFTc>JHIj~lLaNR5`iV@Qg%Jyvh-XV)7f~Eu;PKiq z%7)GfB1JI*9LtypQyEU8KE{v+?sbmx`+Ph{jUG)264M__;IZJ~_G$+;ZLR0(#2e0s z0{e{^vxg}hykBzAm#V6MLRb{H<|{`-a)A^yon$rl@=Ci|vkG{n#x&AI%4${Mhky$g)|;iu^%C1d;k<@NrIAp4 zWV#ddW>^nDXtT>dH;E(RwHGVX2eGypXs_VqP>viQ?lM=6+ap$eP|1xYdlCr=-fSk} zHzIkKGkBBsUPRo+ADIU;e@pGj^3z_R{B7H`d6wv6Zc32jpr!V@A(^WNb_TCH(XAQ> zE}r*=4?bD&U4Cxen#`8~gNP9xjN0eS7hk$^sMDHvk}>m@2-$`(n6|WhR2b#|P|c~v z*WW*j!q|2{*8!gsEnOo@C~7+<{SjLJ%6jD-Xg(9SbA~}t0ej~e4PUYV-~p3PmDA|k zH#se>guf%n#p~$){XO?B%*`XB9!Yx-x~y~y=H1x7eP1!gWQ4>*9EeL494^GJix+L) zPev@quTMbaJAI(15mec!&MFYkXNu=3Q~%eHLfbo>8A2Y=?|sUGLeyFX4e_bckC{ne zN_V=KcTum%h~iftJ``ch_9O|zP^vV3VA&+ePu2`~Kw;*+C9>SH#{J9P0aVx*w>d0X!lLLsMeZjeiXn%bV*!12k^26-tTU1P zRTkm#&9eX{)q{^AS2unPiHR9_eQ+bro#ce78LDn`2i0$;Amu12PsD6|nbJ}M&_?0) zC@N!nd&K_-N^1FKj~qRkH|V{&rq5d5C{RSt`oF12X&4#$R>c;Ig;piC)TUe;A~*ig z`*y1B!7;l0!w!lA1d0=3x*SaBM0wJO#QORAb(?UN%}TTEFkGu9|AQ-o;)*4X2bOBa zC?!tP_Odx+(bIOasM9vQk1a}~_2F1i(CQ8yDwd=DUGsQZDB!pi@vV?H(q^M&>FOHH zy_ZlKJ0~0~5k8&>{s5Bgjgn3KQnSvux!KrMp(TTJaY5Iq^*`x1eyd$i7Jv&FRVkCr2(8J=WNkaPv=Y>~T7$K;s6j23rN8+?y_b}j z2CX(WTma z88yqe!5JJT=&ZlAX%oHXI4hNTn(zd@5m?74Ii#$6Mh_{a28F=N^ZgQW327FQDaD|6 zCAbKDCP+YcMLDNzj11OHxLU8ik!Q zD+x?IeiN@TQHCOsA+^KvJvN%KC2c*63|k#gy(-r^>?Mo4A2Ri8FQmVuC8Bosu^Zy) z;)%uFSK1HL-O6Zn-!cke_g%GuC=~pm*IO# zL(k@<6{}Vi;(x{Y$We!4cH3X|OdPfaF zGPh@T428JckU5*k#i;$yaG0P?aacQb=x}tHgY;Zy>P{kJk6}yha4INE$mkU{jJP^0aw;h93P-$@cHigU- zmJ_#8O&qU;N}VJrb1r`{a7-;3bhDdVxZY3cJ)i*BWZ{Du>4~c!&TVo1fi$SR%4QFz zAY{5p*>||nh#?#!)JDocqpS72$h?{U)Vj8@uV1~2a8o+Fe}9)k_UVv^>C0r>w@2m* z$~lT^fZsD`OshBD_qE;Hc+7aCb=S>{CrTu`YLtAb6`nl6iBizNuFtq^DK#4LFcot< z8KnihWQ;CBBEDR0trF&*jCI&F89p6>gbF}02xsuHQ*v;Hl3d zgW73if%%UcigIK+>4Xh}6$P4wlz5GW}pu^D7hp%Yl(@&=57 zH+(GGz0tDOfstIsR#T{jDb?YHD3>|@%_0B5=B@JXe@qh~Q}*u+Pl>ci6;B~q2@mj3 zlbyDHY0pqO%;H;peOZq|EYi!kp$C{-*PunXf1owabz74W{W!TMa^1hIbOuZPzfYtZ bWV>0k9w9r-`wzanmD$8!O->sx`29Zsd5+wO literal 0 HcmV?d00001 diff --git a/doc/doxygen/chapters/images/parallel_worker2.png b/doc/doxygen/chapters/images/parallel_worker2.png new file mode 100644 index 0000000000000000000000000000000000000000..70ebb4edd983c63d478162eeda925a76f1a42717 GIT binary patch literal 3412 zcmd5;XHXN`w%+tE)k6`G7C@RnBy?1yMWh@lDugDzBLotp2Z3-vnqs7gM3IiPgY*u@ z!wE%-fT09|pdh^q%FVrZ?#z8Z-pu>?_N>0vH*3vWe|EB^xiJT;5Gw$H18!n)o2nbB zT*gdCm5zQ0BdVhJyl!?KfZBAnQx^uRFXn1;+YEq*(g4Ih0^kQV6#Er`P-OsCodM8# z4gh~pe#`GV0ML|J-m*44J3FHVwEsss17`?;pvwOySs;4=4yciQkWZDL8PPv`1$#Gd8vCa1ctnTmpV2P5Y>R%2H3C#wo*U}4bar#?eE?##^b|7 zLw!6v7D*&*e7vrf*1PKJ{*I3E31V?CKD;*6r`+RK0UX;H-#^@;`&>&jRfIE<)4u5L zyT7UxvlKuqz_n)qM*yfdFaW><0P4X|00;rV0)PYnPyo0AkOv?Tj1!5#hPv)EB3?O` z+TIR%+uRCB7+}j=QABQ=jaiQzD7P|E0=r=obm%b;wah^ow3g3O@+5LLYz(m6v#lQG z#7EGeKV#>~%~PvCHrw_}rmJ55dXwdRgzG-pJ~&<6P5$}q_@VD1Y92SZR{oZk+J)x6 zsj&gTGKiQ^9hM-Idm&W)>|gxA=+_q2U_!#p44LNH*|_Djl85S&sAVL$!F6l&=k?s< z7$L&>E`F{_)*{xRwQ^OBDn&u5d;Q*{M&^01lP{DQkDB^F@XkZ(UuUW*T(*OMaw(ts zRnn@?rmd}ApUXh8>b%I6JF6afIGCp%IHy2p=T2jzkMGV*Bwcdi7igim!qWhy6#Sbq zeZrH-p}T2F(oIu zBU{$HiNfP?ifRg&9gXXruJ(_ttrN+}VD%?v)=#qAM;b z%n}kaO7d9+!gLM$CmDqs7AB3FuuYzRTc|rT+Rj(utVI*+Mf zY4?lq!Qo~O&C>k#tGglIr~EwvS8IxS=P-}E2kJJ~Gq($K_r@A(i$`9l_L(}natW?! z-yHi3_dzDwmN3+u-i?0#$L+m!!<}mtydKX8b}p{UaM4%q+sWN$!9ojiObbK!70#+_ z1zW1~A(p*9Gxv~fU%oY=V-IvPZ21mc63_FXgcLXz4%I4FIHc=x)I z{yAk-vcBR6VtcRS;$I>HVVVid^VY#zh8AoT<_LGe#Hb4kkSjBNxh0S`=MtBxW*Zas zThC-$k|)@Y{(heF#LZbeXb);l^f&8zUjeqY7T!~0w=At@hUCc$ zJlxF-GC6zxCgugTFh)XaS`gCxRe5s;cENAVEzTcBgdlgmV@jNEnQuuY4BC7RTlq9K z)+h1AeX#Um_7a2yhrC3>hO5{W(9c^N{Xno7ZYh%cZwxGqRq&ehyth}}R#Ha2_*oj1 z4u0fqt;&s5_>dOD6kXJyCvSG>H4rEKWA0j*B5ufIztFP-wYA^b`Qvl65#P?wsfbJ8 zgEM^E+Fh_aSMA(c$K^C<<0rlDKdcs@_ZC4KOO8cLLZWy1*@}PK#c}g5;%huTM8kJ9 z8PZuYI}>=!q$yvb-n=o8P4x`d%V+O*ok74A1UMlgjvR~GBLavo6a>aLhY8n2aGMlH z#YDv~9WaF>a(gJn=~D~mrW~ezG_|0L!-$Jfl;*yIpCris61%i(}apIi}WK>`A5v>kn?0Ctv<~r`=ko>nr zayDxW`c~<-aUD%h%^|@FJ@Q$i7`-RcG84lX;Rq9|pSwGUD_QQ%u|iLMm94XKV{dIb zr=y?^#r32mwnwKcZX`W4Z|MFl%#knzrOd>_S75~Ud(Kbt8U#v1>M4-QkZOJnqm^Zt z>rBK$VK>EUxk@@1zo=`w+aQ6>K2<26G)LpdcwU#r$x}hndLT_fNw42b+DhLw@4Wyj za}enD%*oYY+dBxw4buY^2kaXn@9!7JIXGvXh!bkP13!gv%t+_8GMXL?-i6#x`uLqx z`d+osSNWOfr>4hOo@oZ8k*4x`;mtq!50gj#RJM_3S=(}xZlM2<&wj0U zf2Y=Pp5x+I95hN&YdojOWk(kgUFQ_Kq*re1VRk3*3IiU(yv`UGMV)@1zT0KK(S5k?sl$tj1 zYtr#H#lXkf7HTlVvtF5W<38(3eAi1~r_N@QC4}tgw+&)Z5VT6Z0^Lfjk7?TIdh2KV zloN^+1y6tC6YWgQB>*jTdUZ%yoa1H1(LGvnfF`^yVxYDn%?DD6#smitrZL-pW{n41Yw1Qr z>$PltLGF1+*bzeKqP~7N_k5tlhZ*c`P-$s6> z=SHeDA8QEVHFJ!-274<^k9TjiZz9pU`0?&MoQwvLG3RA}ET$h%)lm;c9px{_F`=mzS#=r`<&sR^piaTiMGm_W8{A31x@=k>u|EvaQ2R;W?Y}X` zr~yPSpg5WEbQb1Hj)?ruwkFl=f=;yJix^)X@}TKI`{fRK(Q}J3y_viN5Duw{yT@wa zh9olz`-MfQaQXQQF>d;!7T1AeQ?zYd{ZQirkb96&k`gy^m?12C6RY9%1Le>IVx?qt zQJ6Gb+s(eJ8-KP*nMgVrnvD61rjTKajiht*9mGX3_BOXp+Fu>`<^CA~=8^l?E_%L9 zypvRl(k+^2rM0VGcXvX_l-+3h-t;*wf$NJIkB10tt;Ge>aHFn^wrv!G^>Wl?3ul-E zFU^RN?X>W_@mb!T;NKg+;q|62a!tP2E2WM2;y7zqVNv~-h3+eB0Jy0ccDojpYGeG{ zi?t8vP-Yn*?Bnm4?lb+u@q!h+n)G+@eYWRCD^icSyRLbSn=Cbnn|2x*b8VBh+wgC} zxNnH$hlk(uLMG3A1KdRP*K*L-Yef1Khj=JoN+7 zKto+g6Q%-1Yp5uxUV*C0E6Xcy$X2vcF?|2Q*t!RYcn1amO9LZC6(uz>1_YZXH4os1 L<_2%|oiYCa#H{2E literal 0 HcmV?d00001 diff --git a/doc/doxygen/chapters/images/runtime-par.png b/doc/doxygen/chapters/images/runtime-par.png new file mode 100644 index 0000000000000000000000000000000000000000..a9b89df0f1cf6d4fdf772952d2568f9e5958c053 GIT binary patch literal 73678 zcmXVX1ymbd*L84rC_&PH}>Jad)>+DDDo$+Tvc^`SZNrzp}Ds z-N{Vm%$0NZ*=L_$%1`pEq^n*^A4x~Q zY!eoTIXUCMkt-AbCA6Nmlfjqlw=0ec8BWy8qfQQ ztoL01ccDf&c@Sn$wFvjlxk1WSb~*}bLm^$+U_x5INiiHni(VY<|6NE&g%ow5Ael_m zPR9zlZkGkq3SS?y^r0h%;xXE_kfxCQ@3HtXu^?L@kBt|nLXy^!%QaUWi$`xVC%~g@K$B9*9AgcS>$Ml@A{w>O5 z{h6(h@lo|7?o&S@NvZ6^hcZ2qlF*X^oz#($JL4-COddQHoao=~jqavu=p_AT!0iO9 zU@XO{KJE>y=T+hDO}JC>2te!wtz|PXN8>BNoehDE1)3d|WV;m!*NoV7`piHVx<*pA7UcrsB*&$U9}&B2N;gQC7i7jZgbx z5?1L`#I&Z#^QR>R`P9B35U;%%-oXG-%m9y@YXi&Ak?lfBK<#LL>8;z~jo?`@bC7eQ z0PveRO~)AS^|E1T=V$bU-!u25%`aq1r6C(CK@!(&;vm&2z~>MA0)Ng(0ee0p0^=HD z86%wQ$;7gGwhgaQbeyJvw{+Sx*vG8U{jtgA?sx@8=Xt;d^e0p?_cFUn!(q^pPoqfA ze2-6Kz=jH5n!Lw8EvS&&c|hvexW@^wA}+BMxMmpy8xwW1OGpX%7cuO0{^N74wrT{i zE^vT4Qa#>Ds#$X+o=`Z^`CS(M_U&+xJd%ng|-A#2j`4bgVA z+;hI2F_rV*CH@5*7OB#`e+NmV@pHvgLKO&f>mZukk!OWH^k4F@rGv~FPNX^q8zB>@ z(}&OdQHg0KiQFW%$Q3thTo@u=>sz)gxzMyzI+m2yW>Qf1jwRwOs0|#}YY)umna-YV zwY!myPbBRDL5g?}A;Ur>F4v85gLDz5_?1C;G<8>p(pDHC_sX4Ii0~LRsRD99V`H5@*{BhNb1x2d2W+8mfO{>-Au#_7;2S+7@w;o+G z)Ahj}9bgY&4FzKkAjnsZD6;OU_PMJ3nKkFsV5G$l3Yj)^RB_J4=ah#OH;XIEy(u6H(&VkvR8hLaHS#38!;NvZ$`Z7+kql-@ zA&Cf_$V-oR<0f%2aYO}?lSL>QC94<_V&7$m?>8xk=U5hXW7{DoRu-yD%@}^$C z-4$yp%R56>$>r!1OefjR`Fc`a7HP6LUCwUh9n}f!Y$YlDeTXrc@7AgRi#20{C)(6Y zVv&Y>o;opuaZxA`y+GBenWZm1JT>VZFxV#CmX-qf;uDLT?@E;Y$4l;*1(1=pe5}D+ z0@)IdvA$xjpj7N7*g32aXp`O~T0b`#q1zUrKtrwfhYD2_yYZ4DK16dI=G;XbV>Be{ z$T&UMsw!|d6$m{e5;N5|oq^1pJ9KAfJibSLh3Nmh`TwlJd+YpMIijc(j^v#}$f}or zUW~x3I&|7r5neyvQYi96yum|GL$EG?%nXP+h)r>_jEqtT0)lAmC-c_QY8s7S4a3U) z&C1@v`dbk$b+fh0%}N9d#b=aPZ_z*8hk7`w{xV`*J{taM?C0Khu1MdYB8A{oxbDoj z_JtH;SqTi-_LJbP7b%ns7kh`DF^kjHnk-poH~E=bqx2dzG#Nof)~Q#{F-0cxNc(G4 zhNNz-TqrjH&{j>m%0dsIa(jREjBO5*tI?L0lS2`YwkTpsMUpvxU444Kld5xvKNgdL zEKoK!Xq3mZY6xw1NkRAbVcD(4FGIK$WvUld$+8MX^O1_jp-{;tOBg6NBaojy&oOW!6YA$Mlr zRm35pGA{XgCVz2QAg2JDQT_WEiv|M=V5JyNJG>Y^Md)dQ?dhArwgiXsyyc)ESqfb# zP)7#{mP4lL<^ccP{H{RS&352S8vLSVwlA-bd}#ad}|{s3p=Z{;7_YOUUI?M>@z$NU>u2KlrRz?rr2z3YcDn``T2}6* z?R>c&aAWV!-3VQTWSGU6BwUMdDJE-j2t&0sth2a^)anM8x&bGGa3{I&VsCwj{h#T! zW;3#7>fdEK42GXi0Nqy*I>Kvi{6L~d5goWoWu)(YKu_fCA-AvVA;OfO|6WxIr95m2 z(civOFb*zzh!M&DTS^yw*xa>2pvMmM>%an#(}{~%WlB2Z2GBo@^{7v zADPQpEOwbNaZBVy(@ZA@9%rcMiB@>PAJJWZ8R=W>Z^1Z|Sv~_MI zKYmq@x5enVRp*)G-whR1jI1*F^pF0{yY(n88*|kfdJ$Hi{*cc3z^ONwb6~)G7pG|Kxe{rIy@R+V75#kt-{5S8-2+Yqgs~;l`;Lkkk;Y?@mkol)V zq(%Q0%k}q^(md7pRuzhhY?7z`i*RXdW4Q50cCqWINzk_FWy|SNGMh`F^U3Hz&*s)? z#1>yJ_;heqR$XLTH3P1{OQT*2(vy1#GcL>bX+&o8pye=f1nGcg+u5V>tKr8Tn0EYG zEHBqsh&sS1KeGFfAm;=%ZsT{+brfA{Eul@{in!12s9TmTG@WaTu%A3=MHAh?Mn8_A zLomo916D3|H@DsKo)MY1+mtI3J6AA075x3a+90y2lPku;cl(*dMIoRx2mBgO@b&w- zAnO|m#$h3mXGG_^ zi&Ma-AXBqSlFs#t=Y|J?D0AyYD^ypo!m(ScYHe8@%mvIu(EHc;-ri6{1b5D zAn>p^4(OGgGEU7;cS+Q&D=wnjbuxOLSl&dy1ee zTsk)LM@E~QVL~b2g{JcUs$76uBuw_Uq_P|l^T&r;9S<*k4w_C4W__ohe|bdyfB4q6 zEh%HLn5>d*8!^bxQEKcJC5YnB_~#dVuWeILsa1Kk)bFt6Vw;*iSs8L^5C`Ye+<*Qv&Z-KCq`V63Y+b|| zv$5`B(oisE`}nqmcG~WgN~s`zA3UJ^G|ML zHg@*eiQ_oQCFvj)xu&6hZP{r@+ z)?&(pHhPncLVioLJ~2+YxVg6V&C zr0>7ma=Cqv$lhfPRPA-XY zRf~0++%A09o^yb9Kq+;YrK-Y5EYLEk?3zR+{-p5isrdfe^dAVH~xxyx^xolU*G5A=bI?_{UuS>g!e9@{2 zmEO>J*oVqKfPK#=BQ`B5Fz4%vdSW_udwZ;ybzb}8tK6WTzCmaCaEg5qw~h~@Ao+>% z+n(m66!KX(t^DbMqX?b%cPlEsy42)CfFRF2niekas0ejf1XUa6N_ZwJ(a3|bB>!`f z9Yo3{@0&K6|EjFmy`b@B5R?Bs2m096t=q$$Jee2*K%aOb|520(hJ|E;xuU@lP9zcL z?$xlM=)C&=?%O`Ro9G&j0nTiMZ+Xwigx@B4vq_OCPi9rz!uf(!}dBqNqA-dC1ql<(r}kf%+uoiPfS#CQJp zeYe1``!nrT9lWzv?=aqlS9mK@N`@Dw!yRdL9Y7#pn1-G3^M@GobNGuuBlzd5s$lak z46)w$hvzIZ@+(O#Q`6L_l^7F}XVk&|stO1JZ59bele7 z315#3^JekOum1&Yex2)nswalXy>E%mUjBV#BvYfPp)3b^a-9^0^wk!uel5!e!j|Wa zpzZuLnNp^Nl~3|++LPtJNn+}`Qgqe+f;O++sfgc(dJw9@TBAlFSmBH=}y z=N0A-8R z&uo$*cWL1%P%B7tu%E~;wx|v;1k(K}6FTTrZ1KVTr4~RDMJcigdJL9Hrqr`m5hp*O zkN71bbSp3IBp*;?_j!NE%>l)b3%=^*k}f$Fq}Pn9wA@H?;8lOJX|q*NcT&0fv^D{v z!RznBT9Z#z*|y1NK&gOB$v8-b3~st!U=!d_(MRZU?M9H|&8Rx!Z$b*%og}`}hiZkK zhmhESt_H7M-LgY7!p6fG#73^u*N#8H|CRMtU*|*ci|}!f(|k2T>$orOXZMB0KtCr5 zx)II!);4OUxeHwh{yLeRir-u_3%kSII~M#Xlc4;&C+|Ww7wa72<3d zuJCuFFX=}AJ))GdQO=2@4`C_)Uo@s*{;iVkv;y2wwP>eg_J?bRQp}=t`cC%aXvBIF zCmbd&wa>;lG++FvXXdRF&g2ciHFevr#U0KN23nkP?o{?l^q?{vry+uBP1PxjC1;&0 zVs&7is25dt@1;l#M+WJo-`~E$ z(zDT1#Ew@FfdPv91XVA^t}e@5>6$?hRm5Atf+Nrj5Ko<(Tko1YLf4;!mrTBrquZ5O z9VHD~BafL87$+&0!t6+uaX99I_76WFjv@5Wkao~uR8&P;;1hfceg`r=^FV-ZbReRY zr-ZQJ!7+byxK>c_L7+MPW-?qYUuQ#K4qLFPmlI2AXONh3D5iog;ajfpMP zXqL4_k5KN{)S~jW!I^Qmc6?|bigWLc@yj=;2hZg3-t7&XCzegIzY*!FWL{*<#13z6 zN~`3zu~A`;xK+U@659haJ#2^BjA9wO+!4-jCq~O$w5bOUgtr@)JHw~au3%N1L{Yd) z?_#G`(v*y-d{05-EN%t-B<>k`N$(pKOSosEpf>%jBRyn(Eg9qc)3WMwj<4{VqS*d_ zjFB(IF-0-5T+=M22rBVq>rq1qU~xW~C&6ElGsyS{bt5g)R5fEAT^i(^N*ph$AORim zKdrv{hM%Qzi)0zkI=Ocj#^mK=4_k0-j1JmGPAiwF%bng284pud1;pcLDUMz}IVH4i z65S%S_ZKwO?v$2~o*~Yq*p<$G`rAk?HK7d`v>d1=?DR*4^(zereWTHy{<9M{G!pLl zBVb?eZP7t1lE&5e41uyeL&li!a5I!J1dN$NlA^{&7RtiJQY`?u#{n!o-oK~=-k9ku z4QwQA4nTt!HBdVkbM-=>u4vAng0v@N1j+HDM|mTHz9AKit5|J*MQ`8Xtv~zuj?UP> zbzaLeYZIC@A`hORkei&81Y<^z4jgrtE+s@aS5>bIbQiPcBVK^Q(i;jGhBXm9D7D=g zMk}mb%2I2PDbyf$;L&eex-!ZUlA7Fa15PHLpK00c49_HEj^Eq2km7X3Qi`AO2TrY^ z5XIpSOpJ#aEc!FB3d|sPOxLQ1lh{0v0XAu;4f&sy#x6-*-}5SXe=@D(FyUM@d zBUr#R^+i7B$RS7fsxSwpfw5_$Stk6^W094$J~XJ%o;`zO1w_5hf1@K6u?Q^N^IeJ6 zQHz#zAqq>etSY!lUCvydKk_ijqT2Pn97?qe=0N4R187fi@Qk@onY7KBuC${e*W;r8 zo=fD?6|V2c2Ni8tHId1eajc`;jzngvJZNL0bHdYn4H)((1#yA? z#!R>!8Fw91wshQm3QHZ7LQA=H^a;m`SmILvLBwqqJmp`@)>A=GCqJ>srPc8>==kQ# zlRbe%3Npl4_?e@M4`krP93~F`h1rY`LAgumf%EohgMKXn3_=%XjF~eCRqQ|0X}P}s z%#*u%=UI7X`Xl^3EVJZOe%S8zrw_(}n@3J~Wjm^8ly@8Q!fU35tTNRZQBewkApSaG z_8Pr%N8mAn0k-9^^0X0CsfvTJ2$~u;$0lso@0+%Z!yO<~MS9GZ9xI3=U5ecZvzcWh zt@6iOw++1Ap$iX99C^3ZPmeVU0=(r^4iuR|2j8#9iYQxW_lWr{-v$rr)x*TAYO=Wo z^X))ITq^bqs^r9+5_`<1K4d{+P5k&0N2cE)2#@5cMbXEo%IMt0x4&YU>gGMOOTr0v zgi?natARKXn0)%jEBkQWLNv66pFV;N53@h)U7Lxta+5b%@O-s%QxvcuD2^zdC(^Jw zU{>|561zRp?eJcX*-78&qM`QG#Nvc`PXQ)_=(m5mk?rJFtwlv6`xYtmu?bZJ5grTY(7HVsZHr#YoIe1tcQYsH6WcY* z*$u!Mz!Gl415&ckk8-wzPw6p~0?agD>zDXoh>t~AMc>R&ue$s2u8g`R#gpfEaRISSgjV_#0G(kg$Ukn2_ z3clO&PDKi!&4J6Ns%peFQ;mo`;=9*Sdq{03T@@CQ4@$AV|GHT&FL6w^a-WI%PRv`C z1iNKnW%?hd4>4M5Q`_ zfm6xshsCH2iTG!yjqGO_cWXaoI_Ck@mT%Vm&5p$-<#p{8IMLIfeTW!C)`Bg=f#BsK z6H}zl@sOj1J*G%?*G)?BGn>UMS-MAHsaS3`u8407Y9G*sziK9d9IIAzlK z%@XZj=Pf;boBzIwAZ#zIQ)PMDE1qUYY_Y1%|+-!>gTRyHXeSmvFN}|5_9Yc>XzX4rdf}eI5h=M<*#xAY5 z*CO;*wIj`}9ZnY8QI)lF&FZE4Cf_H9WCGn1I}y~m3qXr(ryhIHNntZvf-iDY>+%7rtB8M<`(BbbCWPCXX~! zokW!dT{@MJo)fHrZc6+GBx$qmr$SO8m<9juFPfW{Ss?bdPdcNTEpryW{SKVg(7G|E zO(QqQG*9qFnfmZ*-U4e*mhoUTU9;2Fh$uT*EVTUQzUv$z;1zwFc>jI|C&-*`fhgz5 z1!DgG!%G1&JxJH{oqOOnU)QemV>X-CkmShvk?-2s+yM*ux$XtTOub}+Rl>i|kS^Y? z3Z5cPpCoCy>u4mlPG{Y*VVS+kv}Hsa(oGOu-&q1!X-s0k4>iv%&;wgX45j^?O~#}< zV)ja-eSN8kmsCrJ$eAP}OGASfiM|3c0>73?BZ?FcYyRh>dhs8d!sqOSr!Oe~jZ3Hq z*Pm{mQgTQJ4KHzSYp>Yisl=s+P_!9o%h4DF87D{M0DPS#kkvsLp~T0z&d&iDjz|8u z@tbm6U$pokCjVNr?E1UaPKi1~YW+{d1WWMYnthBIR@%*+C6a7Qa)?{>1xq=vMsBD?F< zWf88cR=tm>X)X{APxkp=DGljQWiKy~wK%sR6rEWB@^87^1y35~k z(ae_`j`a}(mxC4VS>>dt8IXNCGN&3n0hHW-#XWs6*W8AaNha?wk0Y7GqEa)NI??5g zJek#);p#_M_&1%b+Uu46U;OFWQ?buRCTk<+{~)bQtRjn_Oq!hVOWt0qnrcWS z2vi|>l~uS8QXjc3XRvM|KZ8U6W!c-i<(?LcFuM%yB{Ns?{-ZQ*n>A-!E}5MM$+F@u zS&~TPRWV!7aCfe_>Eo`WcbX%K2vB2Qv(-}sE0$r>42P}-tV~4Ry+@J!7<%aSf>Of6 zYJ)ftJuwgDbFzfGBcRixfa@Y1Sn=JaSUQclExn~-31r{&@Y%M;#J^!j0YxIjHL+4< zg`ZT&vRT$R2omIpMzJ!dQl4TFVyaJ)$y)8c7SLe0X(YYF@=H7NS>O;*-Hp?vR90^M=$$X|A~xv(&(1x+YgskGj|4Eai;Hr)f5l!9wkquZ6z~TkZdDp z;$LLmK|;;2&*pH+M*3@!2K?+L_01)3*@+xKM-tD0x-8>K86dWEaE`%?ATzpFt%>~C zxtIa7c#Iew%;ZTaWi-mv>|el+S!IQiK#&g}G6nxv;bv}W$QEvLwlHV@d-{^ty12`N zF)J;xCsXO=BN>~qotFdwYF>+tfR*}K16N_{OD>$M$rBp_Bvo+0W;k^sJxDf7UJ3g! zEV)87dFEseb+TZ6d9RxwOPk{U*`y?b6>U2E2&JVplHwX5%0PW2pF_@JtN(u@#Y zc-bv?_N2gJ9o_3$WxVrvqKB=`aT1omE{MA&XjzcSqj*YfG~!3P8w6GomfY8GriV~h zVYVFaf=XjJny_N|cY`&vjX=Mg_oA(bE&Sz3nCKVeSqUr&tlT4NL*ryR_-J`uLye5N zKI0VBd-ksu;$lFS*%7MvgiuHG=j3~1EYyKg2=F=zt71u3G}2s>sFVB)a3b?=esaXJl9#{Nllp;+;mP1uVYx$;Y0D}v`aikx?4zQ<`Q{f_yGD9X5zS;Flx zM6dgKcHPQCY)b;gnGG`~qyC@QG)wy^UPTT!;I@AcY}7<|Y3MnvGwic*{GDG^!DnnR zhz#7%Q$F@IMR&dSJ!Wb~xeHjBB>04W)n=bXh&050+5INP!X~Rm3gp*aDa!1`OXuHB z9fEMTVPlqEkAZd!yWaf$_c#s@F$k;mQTD!I3y{Exxb}CEl!) z&sB(3z48ySX{)^DfbX0PXgzZrW9mhX95Ff@1yj}LmznJz&gD1a%y)0iM?njJQ>zY) zM*n1?1rLgBD#p&!d6|C$@UQgkT5S%q9gkGUVRDtjaow|E+#$Jxz4j~j${-iV>$F&P z@La{IcXE>moE|taGqKR4S4dmzWqxEmhmt2_qD%lObRX^+bVyAJN&m-m1%ry!+n8 z3l-`M=$?Jle*4aZvl1|tr{P)fSrBUN6D0REvMGeZrNk2_{QgEqIVq#EM1t~^fa0eT zsSWGAX-Yxy0k0td z5(-2*nItV$a4NGSXlAkU>vzAPHN=@uD_!gi#&o9VZOC^rOo>-YCZ%y%Kf|t}aWX=odMx`@h4opQl21 zAF~SVS*fGH=|TDJbWT0v zQc+Z5GMz^NXqP6)ERh?(sP5CJk41B)ghGqjvyp}XFc?|tmd&uW+~~9x&Fq_%a^P{ZkjJbN?fpfuAWXBw zB+exNup!rzyBC49omV6yeogJ)0{WCdXVh}bHnv}G7rIbL98650ln7~ZAUmnB%C8UD zslz%7f&uL#+D76w@|_^;Wc;ls5w&9grEgD{abGFT$kb+|73~Yde2h)Q+TP$y#m_lt zAFB!LYL%Nsl%}o|x(oI+4U%=`v~S6Lpz@DivF)Ln9kQ)pc~(V)IYEpfU<`JQ3#Juv z4)a4|MW`erpQeETU_F->QSi+6wpcLGAF2J{N9bk^)H>exT&cy970h-o7PO_Xnx*h} z=aTbgfLGg2hW(zoUhwSAz4bv};+Y5B3_cmgZecjhyAD0jHVal#3kmV|@i%70MvzS|zJnyfeZZy(O5>m{5HvUZxFX znT7l-Gw^*d)L8m7%>!y}DN_a`J=4B<|4a5gHb#u6;L=p<*^`Ef=QH-*Zvog9 zN4vBK$bLEALoSiu@zNi9LNim$1ezlpLSKXK8G%(l{A@=iH;GSP{}Z8X;)vYYJ;b#~ z0)1q&7}F!0BfmalsrlC2&(#$jJL6nLE7MI1q8*1M-hGVEvZr^P$FcjWs+S{GRBshu z{s{KBsh0xY+UTuHTG=FdA}=``QGAlJQox!bN)LVu9Nnkp@i2OE+uoQ4v`yX9_V6>N z9a68ud8|Li$R}XNc=lij-dcVC_N(7^jE(^*oA^X&y@pHkzkmju1$f^*?ou|2`6904;~5z^yQU$u)jGC%hs+9&tWUFC^JM+|HhrX3O#f(8CUretr8NH~ zw2nLTq_5#Uvq_;Emdg@3P!}T(p=kPUzJa2$Dms^{Qs7SQY*7>Hl!1H!1Wa0`Ov=>@ zBr(050oAW;@QF_$>Se_#UVJ!g36G?C_(EfN26FK*ZYjs!ZK0(+N*RKOISJDHPsYjR zIhz4f`zWU;ImOG5P!fD6pP3-vRLvZeLEu|g$DpWs=Zh{bjN^5L6=uo$Ob)Fgbr7*B zY`&h94^IzeYSt4(CIw)=zPl((ALEEfj2Gg99$oPgo01uHNWC8tVcE;^`zq7mWCRcs zniup@KSVq`qh;$PN>*b<)*;9+<<~&7v30XmV7AvLu3sx;$oq(iV$g(GZ3u59l3pAe zj#Npu(AU#?$#}yVAaax^?>C1TV>M_AlweFwQpt1^bf39yss}40Ylp&zZ16y{(~m}< zVUWJxX#3mPVToUb-8Kkiy8)uNv@4se_K)=-TfkC)WmL-T-IJoa69h+_o zs~h*VKqkL<5bt@Cyv%t)6M~g6f2^Y|IV$wc2%!omp zi)}4WJ}w7uQ9-5s&9F!R(ab0xol-tZigF^%V!eiUSQ0QS2;_mH^irxCI5 z0CmO;SR&t#5WyHbP34{@&{`s1Gg}vK&m13(7D~o zNb%gnb!TQYL=-3Ypo?B`JBx%&YnWdf@f2kZ?-&Ozhojg3(ul_}Q&_Cj|8Jo5f|%N{ z@r)>a-RRfe=)y|*Z~BbcM?Y>p0Wi2ocGXj|(88$rVy`wmLgrgiqyIl}*o%NlBLa&zkQ%wxFfpoxTn3KT6AS zt2C)#o3=Bg=7;e3BbJotugJCzrV8Q)w@%yVffV<PtnU(gFsHi! zN@}l|H{!1`UGOZj4af7YF6|Gj>_Ez065+u5_tp2EIQ#`@Hpk>oRHjXdBc%##2e*I| zLEIL|epX7weOX)}FQV8r$kw7<2-0V^h%k^dZo8gF2fC!3%{!c?n$~%;0==-Yu@nso zF@KwKlw7}PN)`2`FzG7|2b(Gn!>wFmUQB*L7H?rgPib`^V&ICEp+<3s1g#wprb`OX zYW+`Ha_Txi_uHQ2V8vV%0dPDk(UP&hRf9If> z7}Bar$lUcIKwG{_-wM0}1)mhQF#_#9?UXzbjP)@@TnhQS31Kp1>f&GhfB6-7k}mn` zj+^@sE`GU+(s;G?b{0-`t!-y=MVk(?H@mJmEpt0BqMcI;44Zyp4$EqN;h5sT>pVY3 zh;UnbDP{ARqPwiOnqn$idA7h)pcGw-?l4E_0r*?lbcZq)b$ zvD7E%{8rH2kQKbTJtbbTgrk6b_MwZzRcr=hJ&Sg*XSTaJY+xLB3#R7$n{Mkm$l`Q7kUf)Pra~}!6P6Mb6Z9bsDl8bNFxus+!Cil%VPPx(s20xcZnhgrL~+Im5?EV){lrL`;+<-CPmG|q z1k%ONVEMuWTko$#kWKzJOfHBU+cOcD%J>z7A=t7+k|nhv`nF>k3*fmwV%44`z?9v{ z^gfp@t7+$vjF7CZ4)8@6k)Iu2bPg)s*b3HLR6~y?VS6NOK8&ovw&5!?ER{97p$;e5 zA%WQ6d$3R6u>$s?VweJ>?>(GhMHvJMh~k#H0Xl5q>%hi4V)9$5meNYD+HT=vHRNN;~_@Nn(i*5PR&gd%-TX!XWH|7D)@FXv*fb}tf`Vt+)t*eR0=+M zjJ6bZjh27XYUn)7TFO#|`YU9cG%Ucr+TK#vl8_BVnc+sV?Viv%Rk-ymd?bF`q^bY$ z{GqwEb7<91xWrARutbU?!VCW(E|d|FvqVm)z%fDa!l@L%aCoEMXl}jC&FP!p(ea-D zrJhv(NJFY;Z&j%|-QcOEw=|R}fFqk(Y%Wal+LxK4QEhZ+?3bl|jWV4DbHv8=Q z%?R#M!|2k%9vDB(v1xzSJij4v7HTnI_N9p@i#e)YpLf3`=Rm!P%dw{q2rUAVCqhxsJ1%U?C7Yqz=fe-Fsd3U1_JmQj*X}>jB479wF1b;=uIxe*Kls-`-qXPJUTYtX-o{V<`nyq_#YWqi@Qnrs5l4G zqrSk08Fl&~pTWQhsK^}or_r30iq|1YDdPo9@X!%GuXH`Rz7}z;l%Q24*$IWWRMbXdIyjEJgPJ38f4cu{(5__( zR(ut08dhEa`OPhl8Wrug;QjE>H3H(Luj7L=YFoXU$USyWD33NJ(c$q$%~Ycl+GQid zwt42ioHGnsl1&%V$veP9WR?2F#ve&;Q55(4s|3veZsXv++Z>kDT)P%}#-m|MOTd^3 zThcTbwNQNB{540_SpHTBWdAo1hx(nRj&0%?+@~2-@Fs$H;ZzmIcI_&nve2)+OHuA< zFm5{ogEzVY)rSLXg--NCYg{91u9gQ(JIeI(kA&e`1a7k`+rt~sjy}LtH?jeNIljs% zl#EE9-)BFjxT^-sO*k@}+@{Z-8|(pZJ;-;%0k2BM?Q5rhg^{OC(d#th_WaV!w|w09 zW9*pY$Jus_xqq0>$`=?cvfp}pn*C_NBEq=*oPj|xSGhNf76X9W>`cJVHvjasL=h8a zm@azEo(s#s*f&pQfdX!g{_vKDIo_-B#n`?UR`*E(r}Dr zpQb_u7X!n?xs=&cheZ`|zOHr-YviwL26m|ankRYPoJI`#BK*8{ytA3P7}b#9Y|7Yi znXRIWBxjC~z`*s-e5D{HgivWQ`a?__w37APngseBZd~I{?2S3|L1jv#Ts_nylhgdNIEs4t2#k}oke-8rLg=NeA} zLOFERd)Gr-F``<$?+{|iz%>deOZgf3ftp-F63J1zihjB&N&j zfz;QSmqJ~3A+$P`Nh5aSh$+<(cd^-R;1L$7uQaBh^dQr4IIV42!}*ay`;ZFqZgd~2 z-wcDM1(H-oYtje+DvaN7LUn*M?HV7b77C&V#!JnOoDRB7REyBMF8b^Q3SOUng%Y_? z9<-n;4ZArzSwYn^oeSoews5%AFvFnFw9ktGwOt|dlvp*P&&M{{vxKjo*`)uKz|>&3 z+hro_2j=z$4_qq_Ba=L%L^nBtS6fnXGYYUI zwaL8y&Ap1DZ2KoeVcgC*U<&C%{XW4aTcM23HikSdI(L+lCKSD7dB>EcIFE5(DaNIS z5Yk!8gBlo*_|X0N1%Bm)Q0*!C>F|=D=GN99A4=yeI<|UvBW#EaEl42!mzk8 zkCmy~x|O?ISRL!`6Z}C0vr%i_$hY1=YrhdG^?59#5v7J;qq}eFjZQ$9f5Z%2Nv4So zD-2kVbU^F4+ArLQyXy{`)WccCK_7c(^}rj?Yz4>Fw@*dvaSoe@_-Ljd6NwGm_*0PTEEtGGdfAhwtzc!s`e8)=>gXoy z!9$s3_||h5WuPM<;0@s`jadlnqNxg^R=?JiNuIN$Zo$L(>T%H;qD-{$*=*|k@z8+G!Djj&F}(*7zIw4}hPK)A zMWac|u}*$KL6DUmrI9A7APa$?@;!a1ZX7_cB&+0S;(RVe=`rE~&ud@HG>!T=1tVg| zXE|weClV!-koOY@-&Axkn0?qok@I1z@QMe!)2e=R)ly5K8g$8hl+>LuS16pBnY_Pv zN;j{zZ)G-4S9%&IRDJLbUYS5Xn^o z$kmwaResG)yEE-nd0%u&gml`pCzAXOu3w%lre%!Xw-vq<$9@4e-*Q#~>pI>&%@jOO zfB|xF&j>-*CPX1T-dw6iKW4IpGlfE(DS8Iv!!(7Ss{_Gp+0? zy-ZH}umkE)O^W)|7>C~6KE$pSP*r13nxC*G3}-ho$>%i$d~g8Ga{jam-{IjBkW(8_ zQ2buOGvIj9{d`i^5j14~E{9Cm8$V z<9D+2aDG20Cy^ANcsE>l%T8cO3fveQNs+qIHP6BWF;01P661YsT8e?geydSMEnSSG$AE~7kHu3hNuon^R;e^G2j6-n ziGQ5{-^~igMSKlZ&Y~ui_HjlBw24LeAHnburtu4zvioh@fWQNGfgctC`X;AH)Y%M zwu_ZSQWA~DuX@)HH|t2vKB;CZf}JeD&OEdW@$~pJ2(MY>=RXrW-lF4D=&YqOcs&)l zq9=)Qjx&!vf+hFy6-=V+)*Es>$ag0qC>2GxGgq-+(^bekv3D5z{+g=UJJammH|rL# zsnRnuhv-XH7x230@Xj21M-#}<<=cukV=TpFsm0(6^>QfCG7^z$scQ>v1HIT}-`tcyX#p%|B3cB+ODgxj!^`WB@e@BD_`yI6;MAZAeV{x^eOPB=CJ!^Fn-eb`V zYwAYHC!`Ud=8vBDb4;$<%H5Tu#W8j}y*WOd3$=Rk>GtIl2;rPe22PGR7AY=EnDIlWoaym=dPb|^# zsk>-_lXm%0)i{d=>QF;gcUUgMORjolVy22u@=K#^ssh*2KM*Lle;ynT{60B5EIKuq zPU=b~F50Hw#;W#tRO1_M$)C~*!J^WmQ>tazwt}%p!4y}T>6+&2JCK=(Z1baDnpRu_c{;|xeCM^~ z2j{%$PnD3F1?>HxH^t5?HK%QfD{Di{jg8l&{z)0(5d&r73@8WT(=yZIg}+ueQgAx= z#)P(x7^%k3>A>H3t>qE{58l6dq%IyVa`M+Rd()rxm%U$4yyVcY@-K!}3%lW5Z*Uu5 zISxeU_kQj`Uo!uI*7>Wj-|G7BVi6Qr`z9jU6#eM{#sYlab$%fcOrwscvKB)DeT#f6 zE2prywEp^j>9Uh_Fh#z6eNX@DF{r)-RWOpDjs-!#DOB(ZFnqJGvSlu` z-YFbT;V0Wq-(5c2L6YdwcBg`BBk49fG}K&X(o*huVRMhg!ebLQazHFp6Bq*-iIJBc z>d1Wz4;3TUsWN%wCG)4L9ic3FSKAOq5?$}L!WZ?Rb*3&r2VZr@ZZVm9E1t>%@s;99pm8JNaQaOeU>@`;SWE*^`x0Xd}F0A$IDCQF!+_BbVbS zqd7^0c4K=~D4fssu8_)u>eXbAsk-H4x(Mr(GBHa>I!*ud@W;ptxv#BcefrvJmcK|k zr2DTg#t8RMq?pVL8gh4L!*T(IQgB8*Ro_lH()PE21T-5NLsdMT?!yRS2 zNV}a%cE4mm6oeuuu=q4gO}-s%b=K%W0+w;>%=rnF-6})(1HM0$X;lW5dH;aJ{mE&5 z8?Wt&u)-B)XqE(JpfNt+)Wt8GsG$NG)o{w*Ozl}I2AdSsM+SeS#ZxoPL^H^OsCv3i zn*k9Z7YFWGI4#?_@!gcJfw^S!-#}3Pdu+)er|RJmGQvVHPf5k#R9q?e z`e2X0T42P0w(o)q;gep9IcgYX4VCY;!}9vy64KAaW@h_ooUjq0@jzS)%m(emePt!2cmZ`vP+QH+A{^ub8)n&nEU zf5#otQmjSh*synh3#O9chxf;+V>Fj*fK`X#b;R|MM~5_%Qr{3H%ID*@1kDw)-4qg| zJ7TXjjOUnG0>aJK4vdw7WZ4k;g~v|*<4m`CHeQtKX<7RaLH76NzITkPc3HdV3M>A7 zBgtEwzijxD5&cSP1_vV;LnN*=eq;i?4~O6DtBqTL28}6v@zoD96G*6MvzKD(>ymC4 z);=%Q)_1}^sVY7$56fm=(RJBXy!d-IQEYWg&(V{|Aytq3IevVxh7cmw+B>FH!CN?` zcxGQp)1^fyud^|#4%dMSD+voutklwZAFVD*$@F|BXKL#L^QD&~LocrF=d`$>Y`*ql zeS%n7!*|vA4@I8~ni4hGbLg_PBZg!EU5XoT9s;O=T}z4W15+j|8!HB$rgteeWhmyD ztpG!UXs4ZBn0P|R*tJvtykM9k{S*)1!QN0iVfaLxo*{zP&=IsAR5abP&2(5%#MPLG zwzB14spkH+3G@W~-~(P*0Y5urQn@UHuVUDP29-ywbtDN5Zv|w`R*Fh}5n<9_fp;OV zryKe5>~Y;KGBY3XxO9*!n1;0?yhq`;h>=rmhm9kidF{plT079@8(PNOIDxO^AV?-% zWr!{=r!OxQ@j%~+8X2%1DqGG?z1(iKAU(v8Z_L*9fUJZX;|~1`beN271p7`PyZn>i$m_N3NM2}z{j|4e3Z(J6C<@0MqSc( z_BU3FqgEqVu=F#KnEJc^G!g#tra=JoPdEr}PYL5mWs3GCFg0!zh=aq1f)EP^F_Oba zlhoQn?gvY!84((zPBfi`cxxaz3la*$}nM+Mwq9p6TJ}1QY%|TBz4h zWg!ng3t!P07a^|vg5PgZW(BLkmem5qVnq6@{Pd+DS@3p4Vp3N~QfEBg9>}Eg97!FO z2g^J;AvJ3Cat>CE*5sq8n_7z_0*%pq-C4xrW4z);8$^9y>ok-P1;qVg4Sg;zkF?&+ zs(JaMmD3ngZT)gW;8fzy*ZWyj4s+{i``UjPf~`yqZ?Ov6}==_m3v^KMwQXa?TKiLst5K7LoQI&b&!MY$AJ1S_7?tbddjleiQCDF`s_OVHAIs}!XY z*eJXk?mUM5nv2>SktE%DUXU8Fh2v4_-miEBr>D2n zqPNUH{7iP*Z*uEl;2{=8j9hwK7r7OAz=d~s)HwIk$|iyOWL6&06P!&M`g>}Ip_13` z3o900xz{WS*8Qe=HdZ1_p^s9O&R_(>sno(wVijI1HY4gPTFd|3}#c)BD(1>DOYX zHNh2ZjLf+!4WxX?F-ktM%W;YI^ejOSYVdTk-UO)`SPuWeKY|!f<>G7(#Xo1kLYJFg zeF52Dy1#j}7Wa{rptgw)pJ)8=KP#kMjc)|1xI^F~$`M!6J>)3R6Zv6-(@NZEDV&?< zT4)Gyrh#Vb-SyqA>siz!(t^fu7ETe;FWq*xp57%+YzmL5P|xi0)5vde zS?3#q9}ta9O*XV#;%b?=3MToSbhEHda|l!RE8)bS`2uS0Ac4k})P4;}A8B(G!@y08 zp-e&S1EWVZ;2jtW6}u`s(Wm~39^i@ebTC(eWI@~PhH6=P`_gXizhbUyuM5<;w=$(N z3)lwA{@w|KXjKpJOMVlPrSmG>CuHhdnVNlJ+nqH%rHVIXXb0K-$98&m`G2j*=@lUf zV-1uaq5#|T4qaQsI{f+f>VIy(QSqR0RT{~zT$Y3ccLPNh(1$FB7#n&i>*y{OoKQt| zx-BCGa@yiw?a$1Ja~u{U`s?WFEmb zZ7u1KFl`NG`K=6vU;HsiEE~GZ<un-6~gpJFk|OUnDd;QGhHh>@Y} zB*^qx7U&$zBR!(HKDHCdv zPI9W8FR@7=F1;|i;HW8O<_2pM;q@1t{p!MIzL9p0zZ?2uk0^i`9iHP%g z;qY~(RYk-%F&5w1_5x+zcA>`bMRGY|K+#A2-PAU(oN`hip>*Zpqhx(`EeN`Z8j{NA zlEGXEHKT0xsm2C8xRrO#%Pj6Vw@ir=ps4J?{Z##R^Sodk7mbr`qn&fxg_eLs(dt1T zA5z!xDV^HrRoVak75zcjSA{ta@L8pwh@AXLL6^_;iIEqgh4=)Q4FlV@C1u*vLl62( zK5x|pn2{rT<$brZKQP9^w>u_plw1zAlvFMYJRK4)`dCA3_eq{&& zdH8(8E3tc3ZA&}1G53^-3?|BpPvtjKl3fXh16G@i5Uad1Cv_Ur+XE(;Sbb7xn=u8w zG$r#m*`bySD*bFITh2WsVs0wh2U)fW(p@=pZ4Il;MjvUoklbbn`>lw$M**pjzZn*fZm(Gl8vn& z7)nK*_wHr^VL;gVTMhOxoX(5bMRdrV3W&FS%+`hMpPmBZps>6?{_=RobwAP%vrzaf;-i?U zEqZbnHC(b_tt{_CV6kXaPOkwcq3N7@bDAlUn94C0P7V_chr)XE5f_GhTDYH1W8;A# zVJWWyf0nZ%?tMd6QiEbJjH8dQ@Gq}&2bUGWgS#9uM_NRkYcFyDL=fw3Jl1mYhacWU z&iBj_liyT`81<^e;*n&9lA?CiXeeD&3RhlB6?_jEv<)s{)tx3_7W2JD5u%m@@23wA z%tK(~%dE`-0EDY8zuE8;x8R~WZ#dcg6cr(Bxum@&JImm%=XuH?mlIv3_-jW-!x3EF z{h2O{*Kx%xftRAltH`eNpp64$f$8S{4#eIJibl$$S_C|@(O3>4}ZaxOZIRmWdbyyq*~Ir*O0h-SI;s0^~NRDAWl|l)f|A zSpW|zEcXHYW!LSkr`Fe9`PbuHbi5m9Ez40Od0c?YV?$Bl72*_Vsee9iju=jVqtFTquZPC)*V$2+|zBnvNytP+Pa( zv0&rf_7rh4YYV1$6=8mpzoH%LVj1efdK6jo&8tUp%4oWs4cwF&lyz??h))xh728hk95~3(9 zb?XX$mttMM;jkRsWri^t`6Lguh17wJAAfSzabIs*(Q@3lB)tNz+G5Dl=TC~O=WGj; zn&9o&U*jyKp(IjxX55vvO~H+JIntiQouX${KLX|?O&GH1TPMF=GC^}rx`=<8=}{T& zNteuv!4t%s0a9fi<@^Iks2(%zF06&t#lKZ`BMAZ}7;rS?L?O2JGHHoUzzW4t!TI(U zS*Fv(`jV|(oy8rk;R6lRX!zP?W{JyrA+=IAmx4ZdkIp2Aie27!L8ALJ*>aU}loEex zs#2N=ehzFV)3++swOpLDtJm(gu%=NEo^2Zp;jv6&e-m|aqaTX4hP)f+Qi=f6_hJHk zvseF)XXZNyLgamHWw%7GWO|PI5B+Sr(`sl&BihzYcOiuk+>G5!{6pl_ch{)GAEro% zU}Gv#E=Sdhv(IqXk`$y)F3v07Gkd&_$yoIh>{_PMm6-2KX*$|{%ru0fK<@16B{ga~ zba`El{+3At=3TS&u$t)>HK#3iD21~^e$|gFIfBeV=6rJJyhl?*wXXgN0lZ?Xq42UJ z@@XS{hcRrYt`-%Dmf-{Fy}o_5w5EonY3bHtam&BjL8;+=Dp3BoY!ki5a&uOJD#C_Q zX3@k)0JTn7uY|rI9bpko{8H*&u?BF`6mLPQ^tG%WboMtHdu=)2El33rtiR3VzTn^b z?Z68==``KdspUM*2(QZzB`lcB`o4+wgU`NnnxQD@W$kMBEi^g(P8I}Lkf!B6ZN;7- zD5O-5=^2tMk{uo>#+wu3hQ~ix2cC3K%kI*L(dL|v`E}2~oUQEkK=YGTE@U_sgxe&}goo3xN z>{nC9J+=hV%XC)6J;iZ|hiv(TnrdSk`L#ejeMkP-I?IjI^l z#~3QB&`0)RNIt8ly2=c>vBEjs0rr-qAW^KEf#OF`0 zM2t{_F%iA4O*%pzmbJlC2|o%IR5|2KaK> zjYHI!D1$vI`D(>pie8!(5RP^~Pp&4OtsSc6>}&d{R%Cs)Qa=-2dy?*c=cHFJug z2KjOxEIF%X7SBqQOH|(<=v$!Im;ecDv@+9k@rL(sGqWBQ5o-u5P$r;ojqc2?;N=MW ziP8ap)I4CkpE1;|LJKs+*f<$0gspF_xJzcfqV3uSM2K>ONvmYpMq~QBXRN1bYahhj%$iuU7^0i zLx}tr&tkXK4cWW?mzC$n_bwhmuoTOof{0)|WfL()ti`!?d&6|!Krq_ASgk30g_1BV zF|I)f(yjWzhbbOS2Cuw(CPc%rQHKiPU_6$5qtV%du-+j6ih=;dW|eKNkOIC1KKD}k$rDXOaaL-#MyK$rRdai4(WIZ(w)=48Oj~UpC7`TWH@G^^mPIaV|@5^ z{mhxLY2D+W5-n5PG_QIBC)oj{`&O(^Tmmh5=l)F^Z-nH;=Md}diQfItM=Da?2!kS+ zLvNX@&&=??)iHH@hqvuD0E6q^j(U8(HmgLz1d`3eveJNm=REj-JT+!KAug+A)I{l> z4)M896k}CitV57F>bC5`==3I{=n*Lm!Kw&hrGHdfJzV-7ebU0NJRn#RSM{AKC9SVd zpV#2a$QNj3g~*COlxQ-!^jUn5!@$=u#3+OG6{Y+mCD@^Ow+S$roWI^1^d9Gzvy?g{ zV<^^s(NbWsH#6l*TJuGF&SCiNniyN%c13O5Q^EG^w0){#xBh=pDgWr)QD|-0 z3(K*UJ&+o-2SWlN*sgih`>+r!cEKQ{vRh8v?{CwlMm{hPql2sb4afPqzM0zPg5P2e zr??>>K7p7)56qyhY$rh@20h7o?r#(B&gs8RC|Sc&O4=X5y2W%R8s1jBrtjP5vCbilGCTV%@F; zJohQv4|cH@Gp`BRftA=AvlrmeP}hn9hAfKr$2?a8@{Rp1l3CEGH`14CD(?4&;w1WQ z3=Xj($qBOyBqqux>`%h+(9DSX?AtU?3OKnb;$WXJHkUEX#f?+ zDQ|N1U_Gqzhx8lNOebpH$=3}nVr*ik$>$Hh(8tx+((kiIL>JEjIz@^o9~GrFoVG#) z;%3*#FrZ3Yn=nP;XRy|GJDUfByf2LL|Dx<($C?qI1jx}|E=4w4FbPx#GgjgY+2vXV zOfPQ!(r7}A56o3!lqMXm5&tf$Ff;^u_y}6@d9jma>hO3MYXut1UsdC1IN{~ui?9GS zMD5yS+N3s(ZE{G;N=+6*w^KG1a=GW^$6<79FJL?<`=d-it|;r!b0zIA;5&n{ZK3*q z+9ogJKV}A2cRE_zFTIC#zj)FvPFwYj2-;tc13e4w90(?N{D<2Q8R#%pJs-C_$9i;C zvKrsmK{aP)opFeo65!g6XHDAUEKC0|5g*$sJS-z;YF?ZYk~kLr`#4D=Yb`) z_QBIC{RVZ z>n}3W{YVVY#na*8M4256Q3W4r7YrIDQLOah@eH(+Sh7z{pzyEBl2Cs&{R^a)%_CJ+ zpTo*1_xi!Z>OT6u-{LDRF2Jp}aP}RPtHC>inBXpTLmP{x;S){bHW>DHZ%O==$7~#& zNv+&BjI0yT!g}pC&KzZ^LhfZpLZsX_0E!B3++{b$goxPO^osJRC3E}`(eZNoJu@UJFGPp)Pa!sbfE8XZ9o1Ya zUqdJt{?nE%nURXO6wm=zKXqG-E~f(C+2`WZbm!V;WkxJDwO~OwLvEH=8Ap`s{qSEmY?cp|eNtEE0lrIUINaP+>>eM9rn zBZ=s|!&Nd}3tBnpv~8L`K?ylPY}s?!qyc^uE9Yi*R6rG3bR%(qUpcLF;3IA@+)a_v zBd>7+9w%1hIqxG4)rRH#05nnO?`&il0vxN|+7Szh-xpy0psL5v1t!hUjF6uN@_F0y zXVH|bRioi9qC1THqIrS*JgGOy<`hVV|K53O;SzU%np=+<$KgYh2v%;$-06d0Y`? zO$P(-CTKu{5MRP42Uiy&bIm1x&@9)y@^v*iN!d;07zLD>C;EH(K*AqE3&X_&vpwx} zGRBk%hI3J8W)bFnJ)ANxz26`8R@=yCe&nZa>(9+&d}8{BHT4NgT4TNdDwv~QJ8g<{ zYMU>xi<>QS&_2Up?-Y_^8m%gu9KAAx`66JTfj=b7H;k=MKZrB5Nbc^vRwxHObquNJ zbBvdKXj?I)zH_(M;D=yjY!2eh22#$5@$}3*>AQVb=Jf8dW=1|!!3LaZmQyuXJ3Ol2 zJHO+ie}4y5+tHqfDWj5hTdl&pC4mpdHehFm)0l$d4!-pRqEjreBAWg8p+L^)qw~~- zA_{ns+NRoK7P^hE5L((@q_^}C476ThM82l<_9^k5)>ic2UdrLyyj2+uePSl{biUBRf$ofn!sXF|jylWy2Vu+qabg;Al|+tXy9eH6=;* zI|@d2WU)`usi(qC-zo3tYk=h8ZnY?qc;Mc||KRe4*icXJoi|<_BS_$- zq&JnVJcM(P6SGbqliK-JqjtWubF@cLnsd*B&ohp8pa^34!qBB%fPhuO^7*~I6~{qp+_A7Por1Ag>8Rzmex^1*iV z$}iWD@se2&rYI*MijT4PL5y$;CMGm2s$% zeN;T*Z=?6pBUCz+1t|DJTt6i$9?|r}67Wu>pnNRvM^kzdZcgFU(FNVZ^-}NzDW3!(Ont!oSl(T+2@rbok@Rde`xqEmbzKzcD4p&#GS))cO)*~p z-*lDv&F}jAw+VHSkcopW2L8j3t-~!BH?dtK%>+DJNGzO0sy^OYi@RCUYCK#OF{7ss z!IQ*{?e}4;Tp#z#FIi|C6i0f zpP&6hfPSDxEso-;5_|#_Vr{I1jdsQ^zgkA$YNhb920|=U9L&Fr_%x4h_q-oRhVm#5 zU_N+dmrIW{JS=Uc&I_KR7N8L_4RlU7_&OArN1Bqd|JCmpf?=(^Lj#cMfT;dOj}t!< z%UJ4Ury+5ubJe?V)smYro)Kf45ea%a*Ah4@461H5SS1@59CO_Zi&3!SgASnFA;{V=p?s_VC+zxpy z3SxCMouwmAK6kuqdj}#aur~=c(l2&7@6k|D?gb6OM{6}5cN9&FXYKo__J7)kKTYg3 z6DUTxRa_i!R&nluUQ2vQz*7_!SVFQ{I{L)mv11R_{g-@6Nx!Pu0D)~)LHgv@g34_O zz%Wvs3lXeN0sqo3TO5;*@_`q10nsi+6JdV>z;Lwl68zn}h4Fa0rg$k=)ESq7AT7Kt$&Q>VU{};sc}G^#dneoxRPoQ z*W#1>V(}`K8A2HT;yYLCTYHqjLivY*0^PK>C=qB+EUJIgX}d2{!btXhg$pL);q5@Z zhaa4;c^9g^IO|d7A8x%P^WX&06x_~E5jxX|;Sr-pyBtv%K)d}(x1MxaK)%fLb0E7Y zwxJ3Soku7+`*gCp^5eq0xwZoHU_h6X>bGgUu}YvA!w{v9nX+x=;tv;P_N>@8pS*Q+ z4k=XbK3rb+24y^(gEQQq{o_nmeJ9?@zpR@x-AF)td<;+SSi@~ujXlaj;Rhb6W072t zb`+`w5T)qdg#uOUvv~P5U8kLrahkf>tnwSt|1>k*cR+KPQp)SlvVuqw%TzA)ZlHKn zqAG@Wz9kgB4ID5e1i#;_LJ}0;&zfR~bd@;u_(&4wUD`Cq1zHy|wd^`MW;W{`Mz=fn z3@=!od@MiL7oGQuv*;w4#&hFHz%Iy$k*QuZ5MBO~*!d_JGLtP#@B@`@<^C*%?-%;2 zl&NoQ89PQQPu5~2UeeU2M(I->Y3p(d)emE>$KNDz6E8F<}HQicyFIuC?bHZqZ(v6qlm zk7HmGe8ZL+!(#mk4JS+O%y#pfwQ&uggWYAh(+#<5%_SP&0*~R252LTa!vwFVzW^qW zb*uqm%Q&@y8%BqTs&%)$ZnKs1x(i!P%qm;}VROX$b8y|Bp+6_Bg=Qf>P58hrGNZow zf++6(8ZUduvR8A$m>Y5hf2<~o>1yzAa>u=3e27chk?#t`n4jH51F zrAY~2e+lP`LK z_*MY&Hy$R?BjO()@;O$Tch5^qDAHEVcxneSzsD{_^E5-gM^|&i6fb|Qh)*M31n(0k zlIiP!Z6WyUt8+CRogK38$O&x2xL8H&HMXWn`SYqblsQPT<|R64NctmO2%$Yk^lueN z^=O^*WZ#yhYEsmP%Gjla63N4Usys<;=NJWEvAV0)KcP2vvjm^l_iQDYOK(~7*91@A2rNm(*FbSI`^v(&rhG{gs%3 z-61Kwk^rd>J%gXfJ)87)OS}|`c~&W>g`(UP z{5)nIAxHZ(n=1d^n}cnCmX+!2uJx`iJBh49CLJc!iSp^w*79g@{m7YT#yM`=*6%8e z#;6jh>#(hRqWktT8l%?(^5Q=A&dr8UG(azTY+SJD9B$~$b&C6W!4Ets^_pl)1-~tw z)3Jp23kKOpey!n)_gL%Y6rIOOOvQ`pr*g-$Fc|OPK$jUzYIn{LYf4_Q)kUrLx=^ZW z`VyNbshq8jzB47u6YZjmlrdmR@pj6{+SlRP7E4IMCYyi(dVsW9Z<(;eluG+RU7WPg zn4-5)fkL1%J&^mftXtpw{!7SQG7ayMZh(w*c1VrHf;Pc0ZeZUIB%HF)S~JYAF|{y? z)pR%KYB7cfK$jcggf>+!^^dS&=aMrdgxVFkqsUe{*uJu)7|-`-MK7jW7hx6LiJ2FH zQyVU>4V-Fk&+#2kQ>oQA=)2Cg(vc97piC&oZo^#9it4vN)gh%gN;7*%sfX0dQ=Cv7 zO2d_=_X}tt@Bu5@`1ts?yJBP?C!2qD(9OC|LVTYNocjq^+Bx1OYv4lQ9{DHseaZvq zM?q%mi)-^49E!t6XII`l4V|R&V0vpQ6sfzbX~`%M&D81{>)YjuhgCC~wyf9x^nk$A zhOVw$%z54-`Pn;mW1-5nM^A+K&~-pAaOFr2g%SL{BgR9Pp>c^62w(`O+X^r?P18q+ z-lJK#5;GhdUGgn__NOIby@zLN&BV+3K&I1Z>o^3rL`moYTKGKRUD+!2dtj{gcs=vn zT+;B@gN-w~>SDR~1-1u06zX^cP>$B@1}SynYCko?nvNCPS={*U?`{;ko+f#75n_oP zAMEmn$+_X|deJHc0$B-HT7r|EBfBm5)&oN*VV~sCPUWiZ7FYjiNlg#pktr4%?o2?( zsZau~C__BX+_fbE3}V|?J!?)1VZ#eNmY*aFOEPk9RnKv=dP-iRt15R9^akk`vsRom zte=9^-`|a6vq@&emU>#w4UvVS??w~{2oq-SykRUCB^u+cp^N=69$c0TCLaIWZZqa^ zZLJS1oKYZKUm}gDJzUtz9RO{4bhxX!Oz}S>Zsv@``CjV6CDcsADU1lQ03M5jnN^ztx0EjM&_mSu1kFbj0Y*c6Me|AEs)3TmUk9{b!3$ zKk226x^~iy2@%lKcs}w8o*r7F^W(~7Cjy&(#PDoTwQ9bV#I50j?q$CUU|dn8H&h#6 zLtZBDyammgWpYwD0;5J#aHm$%bR4+oq~zb`hn)R3h>CS6f8p4owjV9rihHiMyGJ(p z0|W$4U&z-I%}iGvv0IFL-;eHY!=HkaPu9aszd=OG3C)6ezWN+VDggu;K#AW7NJ3gB zNMhJ+iEd_h`qUb4SVX8%b?^vUN8Ho|#efzGx0drJCS4k+=tCMIqj_#aAEMZ=x`tVP z!NorF&jo;uF0{lB35RXfTtq5fB_+SEC3qw!;Lf*GaH+-JYnaoPwMxkjlzP9(QtAAb z{lkmUUd*FDyt zaVw<Kb1r*7qSbn7l+6JJis%Q9X{t;U&mLHT_|N;`z{HZDgnGUNPo9kPuDS z?5S>o!Y`v(>GTM);UvrW08RcE3ue8RYFYrGOh%?dJSDw&DgL2?h1lTNiHtrr7n`y+ z9{({y6E!gTj9dYsFz;Cms5kx6fjMJ?_i+;I48H4fA`Vc)m*?$&_TZvEm*q#Y)kC-hF?s7W~<2ViKPPG$+Yr=Mb$N2Uu)G^&XdZqn^9TP%0Glg z&*s`)chvXX@oxGE*LAs>R;BiGSD|6QvHJUkx*#V;)d?{QZO7%h)Tu?S!}9g~2@2H= zX1<*btIZSGSPr>VE4dUBSUM3g8Uc|cyubBhSFN$An}r>0Q_}v)YI4JWn5?`$Xpz#l z^c|_V;L0k-7^zjZVU}Pdlz7wte+2BnK#HQPgwbCct!NQL^27m4u{XOPLbKGwk%3Kr z-0T?75t2ssOIz%38gD*A+m^%zWL_}{kLpwB0PE7qS}{~KoyJWz^$hp+iPq}}J!HX6 z|2&L|gE`inf~MSFP{2KLXmla`3<+pLXT$fe{^r{Tt`7Zi%V$(Dn4E=qxVt+M6Nt0q zB7Ph#wT*8`Pt{lGLlEyPLsrzRP?Kl~429j3x+e1XL$h{I=TjQVd9J%Dwt>F%8?pW* zgCtau;sRSWwA>2=3J&tO!cii=Y_kz z4RS(T1o9$v8|scU^5O2|Gsd${aRhiRBXf$>CgW{Tq}Ypu*2dyz$&!rtP24mz}RjqfnYA@brazpuw>4#A_W z47K@p9g5`T4FpzkF$oiISlVZ^$&l)6O}131=g)HUUB2S{7-F zxmc8s&?T%`#*9eos*w?F<-WADo4gwFK`to>M-lQJSi_8-8g z9JBikOV3+z#mBT{b{da}Vsl|~5VOw?pjs6|&0c@De4xBCzmn;yUuZxEim88VuWczZ z5!*Qm1{)lUlQv``pNBXxluDvKKO;twrL!50M2d4Yw*yjUO_i&P40w-nWQ+~&qZ&T! z)WQr?f1JhW7A&}$m7jl>5R-|rI%bQ#%(pXMmJ#v6 z?n=wrrBJNXg0Tv$P5+hZA{ImrrI|mPHV_n6S1h1wI}c+ipkNhh#hmU+54aG7n4iJ= zuJ*-T8hfonJ!`Yh%v4faEdDvYb;X?neaO4u(^<>FI{5}#|<|=+SVqeAS)taot7Au+(E4{nX zeXvc;(5W)x(vL0Cxxu5mz$N+fF4>nwbGMS9JxPoPkMxJ%d)u9NXg}qjE`-7F-V^WY z?emT~F<4jn7j28HMI?tX`<}XouyQ6H0z3L3wwzcI)V{j-+0ORxzLWcv0h-g)4+mvg z5LOVLG5(v*=C*l49hgZBt-#U|tuw!iJDN}ebYwjD3nn{Y)D#w{TRnA!5+o7;I- zRXnc+@6(%0ejv4wZnITk4b6vdNOY>S}e{Z7Az%K{l5*4%oSIZ1p z5=KhF#}wTvY*&lhF>=}RbvNaLdjZr>L#gy!mrnZ)%AcQjeWO;oPRn)^q-Nba))p=% zY)TBDvc8-;UXLISW*^|)onIss^`0W&ZWHCc3DNI5;XUy={RHJU2HigUA31M&x;ZUI z%WGL1t0XF7XfGW-n_&tn`i3Vh&CFCvEr~0k?T!Ain#PFGb4dfPWm44diD|RAU^*%r zwiY6PWoZM0SVgSkE`DbH66DVWH>iPdWJzjEAEc}J7EiQWl+Rayki9HoeJj17WArNP zSn1LGXnrivfq&(v>KQGyXAW#b-Dn8Jn8LPqU80zoIE# zCaGKrUD9a%*|zbvqDHfC`(`M(`m6Tqo?ZQ%xiqoGT+*4r@8HumcE!_xfrQjTNt04( zEe`^xTdG1J3PSPHQ|8HRRD|_l#eBKoL&YiTK@zg-F*j?#eY(n#l=6;Tsf1@I7(!Wk zQM)y-m$zsY8mM9gtFXXUn+J)^2)WS;0@#a2qwGC3-HS#ERpqvHl2l2$BU>k6*laJ( z%N1b=*c|K)z5mBVg;DrSfVtVyJOD+1Le-C*^}hmQ!U2v;M8g&2arSZkHS8?FJp6wO zkXy+pEcFOX5c_Rgnq#gSl_s=ikq@8i5(DQYg=*|NSISA(8L~OHZSFWQSUtjf~hz9Lc99d{u_qhEpZse`I=PkJ!ck1zjeNy+FPz z{a{JTCzLvk`OgxA-O-U+NC%YGgol!Xp2emWI;M0Aghv^mH*Q_?kkW=F1gA3w6Hc^2 z^37~lfAKDU87JBc1T#|Qi2Hsx2h`2-rxRQT7^b+t7Smq@-Pn|qiq^3lDkX$xJY)PIEAIkdl@{YyptW}(3b zf&4+T5~3Q*ZHS53!mAATan#2rf2n zp$+0Io*E?|l3uFopv^0dU+#yNJ12P_JpwZ0{W$Av<{H?@0ux(F2DV#hQJ=eHoP(i1 zKm}&hhuj{XLpKdUs^K_kF-YIAKSU-a=l*4*kitsjDgagj0;B}#PD$W(t>^Jhhp~X{ zTHp3?YblYy_5Zya)FpI8{CvQ{ z>$t-S%$PFH%ga!JPV*)7Wnq7{4)x)b=JA(K>s?1RsXE))6d3@Nds!g5RETX_ut9Pc z91t-ywCu23?bu0vrDl9Jic`5*mNT7?_H`i3W_bGfiXA8X=?*Ba^aOVHD8vQ^)=}M8 zSqmP1!{KyCrKq5^_PIM4S+9d7x_y8+vWeD3on5VkRifm}ko&4P8~>Oi2f3Tf@sU`> zdZatCzN`ePs1Ie-OIADL(;B#QW? z@XKcBsge_=GO8(}o05=0?)W`su_VkTF7hVYTx<-6@1lQK32<5#;sa}^uQ_PLNlTR1 zgw&H^=e&`Y94^)D{*R_}jF01c-1auM)7VyHTN^u#8{0M;+i2XNL1VLFW7|n%8;zgo z_xFEZpR>J# z)^@X3!pnPy>_8hzSfaRp8}25v;$gWtsaWdX6(k%f__0;ZNJzXk`jbiG+i>Z0ym9#{GMxl`AoIV;!^K0B6v(J{vhO!<+)eA zi*q1V3K@3hvxF&BkARZqHxei$6+iav#K4wl-RABz6{!MvbY)p>v_)%j9m!8T0vh{m zYK1mMf&Cd7)88*qaV+!D-5FYA)^VR+uC@rqkr9m(j}IKm7=NmY4A*zM4~z|zd;|=w zLkHhf)hQ(cPGVX$T^3_lz6ua9mUq1+sLK-VF2Vei)6KBM9==)cFjNHJe?o)4hmn}Y zSKSklTl3?{m^HSQrOiow;5)s>pKG6A=(C8+aw=Hkq}Z4rd1kkw#2M$K0y*Vkhn|>m z4T0g9uRn?19zDbzn+9%!7oSO=&&FxS z){n!e>fRKN^qm8;aK?D~PtMnse%`V%Iy|FIBYqxtDli!;PL$g4Ef!2Z@TI zf2s!vb!Nla5mohzy5m$|JNh{MyJ+3Wg(q|F1BNDk(~<4HBvwe1 zopj8|G7o}*YTzqqQcadlljV8~aMCu&v|FC3f?1R{0HvPghJE01`6>sWkRrumS z)&EevJ6v`yInKhsk=X7F*-3!*Aj1~$Hj18zTge(U95LgjKO~?)NB!_Jp|%iC-`WF&!Z3j^*6omu1>ZIsixJJd$G+HIwl9ZC3L zIit(RWP<}_o;M$zI(u+4eTOEOegSIWfZg9asq?@ox<|uWin<}QanGsLX0rj!C06NO z@0!%kuI;V4s2AnCzR?$qhYL&wp}8iF2WLv}m|%;a{fs5lD_@4*`b4yV1|+8On}7^+ zR$(Txs%4EiGBBk=b!(&F`?V6{Nzj`mzp5{hp3%halP=?DN*&po;)rpxPV92fW~YJm z_9igmlg9koQNOQrwqiIhWwxT55veMksc_fo$0yWO{u;`P60)pA=i*&xwgWr_DR&2Cn&?Sp! z1ix-O##??y1`T@WU>C4L5G?$lt-f2fM;B!^yA}4-lVGv6i=yboCc|}Rfbkp%#b8<_ z_!ynDUwsr#h6k&3|Kp_NmyG=KORmU6SZbp%`uYm$Z_tB}qSvO2;J({Mvt_$JW?Onk zAzt5P&5X>-pG_=4oQ={F-Gj>{7fc((v$l#3j-KdMK>NJ5GWsribA>*ye z%8Ta2a;3)q+c~v!g7l!PstgkP)TF+xhD>M^E2nA;9L=$lM;;=i07bl49cb7uV7UhU zlDl&PVIZWyyJ;2a=*biXm|YrfJp^{W-+A{9!CjLf3MR%gGus+7gDCj?Lvv-s(8o3^gucRuQmx$mESlw%p5WMZqw+jTa zT$ToIjmpML%RFt!3HOU1i(&(N#8VY}&JoXZ=_WHD!o^8%7*s;aqaRc%(-mf?TVzY9 zVd0x$e>HLCHcn|!?w@CW`~BqzOHJDCEBbH0jybE1Zl3)x79KnVGdYn9B9>++z?q9_ zrH}+s_fe>puBG-lO)`<@Ifol0>{I-*-z?4+)2Cyy>$llFLHME6RVOqj5$*L6rf999 zj<%KM$LMb6ZdZJm2!e)rv3!OB;*Z#G%Gnrgf0Pm2CxP_WJa3);2s)kZBc6+(IkKZJ z)_1J;`byWRPJS$th|}8krp#qTVCTRG)$Yuv4Ay4O*=u7(m_*0I3@4-I0ru0qOd#a3 z$E@OKU96sH6w(XdszJt9giyk8Q&dp5=BKzRVoM;hg~}?RP*5ZIH`?n$1B09>_%ja< z^^n}VO)!LaOL#-{C+=k=f<5mft6>uMnkxXb?9d;DmEz}QQZq}$2>F|)!Ql-y z!m9a4Tj*bf)T`R2+%uFnd8{Y`3dIt#tiPCHg{e(*;A(^z9+Ijx(}@~Fl)hP#B-Z+B zX37#Mzw^&|b;a{=Q1htIPa)zKza3Yx6LTqS001-~5s~_e7DjK|C#7&0z6~DF;3)Hv z8aoc>vp7H-lbWk1-cYDd8@g(B$E`>lR|e!5mw`3fQ;K+q7+5y zA#UL-G5h4i_e~0|;7S#!{Im;$^^TyLY=POeIQqKyGDeuxbAJJg5z=yp&Unl?*iK&R zH2RGq>+6@vs!G!x0{9+aMh!nDeiEp=0KFbm8`-yo&kS6p9hrY8|f%ZBq+9* z5b&(}70@wmLW)Ti{Dz)=%3Y%b9|tNYoU!?+D=aQetMOhP-4tCM75?fenWXtI)va%4 zs!n>C8Nf%AT0iw%xMZh2SSQaWqE&I9IK^^Ww^l4mU#wdv*eXg`(j=A+&WG_GDR@A7 z*x%+u?h~x$O}fJdG^Hy3v4ywGTQJuEAH&G1&K zkp2G4gN95`93%w6YUC?H!M=033B!HrS_d=PUkx#22l>l2vR(x(vR;hdNR5~|3$|=d zLYsr_N27($!GIyu&rhGNXhSS`j0FcttvC~TsMv`23gHY<77XWD>FO`SDpiIpxB)N0 z`T3XiIz<3<2%DY5pD1!%7sflo{{2|eeQc|Ptpm36+pNaGrO_Gpi2WiY*a zxAEcoIFUfD>XgvKonjZ_rb87UNYs4_bb=TNG-YNjnJ!N9#UeH8 zFkw@I>B=1-|7r^19cv+S5krHGM)&UPTB;@@jrp^-?HZ>|_kJvnps85l>*RX-1I=SVUq0v)X)zw%vu%poO2y7=CEv6n zpg<7%+k5Xd?YFmyINf=h#SMAjEQnTyIC7|FbjTvjtvZl^4k;_!R{W;Jq_{g1&o^_8 zp340uasUj}u(qnAiDH5Uf$BhodW-fmUyFOwqxyxPANlJ<6Y(04%-8eK*p2w}7VEu0 z)q`i=KuxcXv_z;w7}(xZbKoLKe!qPCfP<2mA2v-&sGIsz)Kke|NrYJd!LZ&p{1yF- z3gt=u@%pW?Lw|7L&*wVgUjIpPAt5PrQUD73vAg7h-fH&PWG>Ay>brj>crB%eq-1eD zaCT@v#b&R+z(=aSvz4@(X*F|9lTT7`Up+o}{N4pb0&#g~nOEo>Q{jUGXvp|K07c~Q^iF61pmkj3s|Wt>VzR*apslhtC&8)eNio#8ur{ymjb z$_1qTTiV~ZOq_{o^-zDZ*JEWO6Qd zi;DsHOHN{X2j?Y$0G9TP1ySyW(%$Cpi2G5agNK(W1Bfn{k+zLH8syo2i;s?TyKS~_U%1YF)8 z=FytzEQEIa$>wmCoWP8TajBuARaQ)^&`et+5Ee)1_bOJ*t0&qd^s-*h^!SqfkoZ~J@KC=9g!7S zoJluOr>>n}xyq@)F<{op*!8cVe2h^y5(ZV4tk$ zKdt6)k>0ZP*kKg{~|IW!Rt3(z|emRs;vjj&dBc%fak$>jAWNPMJRRXyG)2mRV|de6IV;i}(# z=nxUS5O$q%iNf8_TwAq7n!KyLt~Y>joD1Cvl8O0%L{5C+zog&f3)xgbko7xoLQYLPn`28teDtT`G)uIp@n1`dXHRm?72UawFv8KN zG#lIVt6Fr5?>d-wDY^LI!g?;e6G0+te{XfM5d9&E?8F@H2!@KKkja!q9yXRB8@^_8 zpz#gXgoBWv@-<%~uIk;Pz>l!A5BQBpe4PHMNFm(6Vl(2WOoXP%L2cz+LoY3#24_XK zH~9@CgSJ!IABriWB=+si5oyvx;D?*+@Yv|}1wTbvV(9{QB{HwL5O;hSC){g^$p2pV;~pBDiG>SkBk+d|UuWs*q%PPq`-W>{7%@ zn-!k?h4{MYgtgM|=#mVRLWzZR%B@oUPFbAvsPM(Q$={!D^3jn1J%jeHB`@VUKh~%8 zhLz-Mg+Ca8R>{btpCy9#xfDppEO?HuycKNgcF;FmRFODjA=1(Pd+f5y(j*__h-L=d zvmF-|D(vgor2-;gjs~D3dT6x%uGBOuaMzG(%RbwiIX^;VHY&O}q4kE!d@; z1mF(Ke5b?zb>XBiSv0Q^H99Tl7KmdJ5t$;=_^D8l-{Gfv9c6Mx3j#=iV9|vYAJrc# zuB@Ps_C^RcF*||?(CM8CDkR}$q7q42wurJvo%~o!#3{KSYC>UG@hW+dAbLe3AyV-v z+=)>?t}F{6bX*_M)-=T!&vc^1P7d2;AGs68xys!JqbQs%&(*&*M^t~0lYzht4$S@YFth$GnJuY4dqf+m6680EOeCj{N_d_*cKHsd-ND8YF==^&4ZT|U`fMLepkD> zv+qE*@?zhmGMImq*5->MRfF>zy=-o=GgI7&daIGL3<&h@P?eHlaZcdsNvTx4>-E~g zCr>)02nd&?jat;sXX2}Lb&l=iO!D^4&%gPl-Q}TGfn&$2zcMa0IdL^mSi<7Y)P@ZO{ zNcoW$Pr9mq8ln#ryuN5jQN-@{q@B?;;Cz3A8d7Z-y7iQ~KpI zGy-J6wU5aH7A6ZH=HYB9>0zx_90bT@D5+_<8@=KOJXBfavHo%e6Zm5h{IH^ew(3`( z_Pk7{`@=}Jwcg(OF=vc+vQd{US*}|!d9L$;8Q@HYn@6e;nnxKf47!YAe6@%0iGV<~ zVX;vhG0X_dCf^B`(6*-xOD${6QcCD}p0?ZBeT*r-eCnG*6}i+niml>~9#AZ9adjTv zY8^pF+Ai{0liJ(Rhpu4&hlk|74mF_QCLWAXWDnWS?B<`z$Y&O^NEY;m8|$<9V=3aw zMuAP_eO3$Drh2{oqsC5?CWo26$$X7LQ`$rZDiI`%JOPT97dAp*Ja~Q`yn7v$`XKe# zbe&lre1XM$#c*5MyI_2*ti%f+C0QA*j3*-&GaF$=jM>ADu!v7G)_ne|gq^O!bc46V z*pGxVwJ6ds80wgX&JQySqN00?G*pwzNd8;PWM>b>9Cf&M zB{T}2C}oy~PNS|;?oTxddNNONe4aYGRqQJzLi~9oiE=0TTqz{JmlQAcGO-<&`<$o4 zPF7?=vhbwoV}*fFlVR($VZDHq^F5`Zt+r@B*sHQ+oU{Xft%mQKS_VaIhgn9BM(vb? zX$W218)_K%qU~@ZGJNi{*{~qKFFOr|$#P;XI=PcGuD|wrMrMx}okOK%Zc;NHaHaKW z?48s>l@sc{28W}UsoOXsXsmB0=rv=0$}?E*_D`agA9+v?yA&uvHws0t{?8Q5e0 zJV<*yJ3Z&0oq~m*Y&BVV%tMW)P??h_$SrecC?nGfTT7&H68Je$wyDbmK zBeO_AUAU64@79?sdZ4s-0;XO{BaZGdCe(g*wR%JOvjZcKb{p*2;gNR&J41rQ#zC3m37k&> zGH8)9YHcE_vrue8NWRH-kh*rDq|R%dy6t9yD9S7iCNglHLOzaOf@w|CsIXdIOL zLS19HzH(0^pD0C{j3XlN8uE(5bXbYcoINJZIP;dp=?mMe?AFYfKksJmla92my_fZI z$A305@QM41q_8%9Aw;uxl3h*Ajls+R`j8LN)*p;p`y8(| zIKBx1cm(omlD4d(M^?W_k(Kpi#ygDpN|Q^&S;(l zb&ie)yaH4eA_qu)f9yx|wZ}%6Yoc3<7aeRf>0fYL0*{?ZzSSy=FzAKu4!lbh89)Oc z*&=8gy!GAfI_pOpkJ+A^d8CvzsQcu@DXgQOP`einj3{hXke5M1Mlz-oV5X6wqR`_~%q2J15F6h!e{yDkaM`gs1xX1dnP8AHq^`XdElWX|$ zslIjh=R8**qjuoxQgNJ@{&Le{yTVVel`UJ99%S8Aq+ZaINjQSa6 zMGG1TPUhDg;HcFs2LI+n{2@IgSJ+s+UzKXS@WDFonJ$F=$!#H^vKsW?uKe7n`2Ww_ zWD*1WwRe^~c&zfqU(qvobfAvGFg*a?u#)iOU(@QoiwQYN&S}H?W!&pmQ_u@k_fN*} zFQbEj$a3dOP8S4$S(_+~Q?aHL+jM#i(#065M#T#o&mTI(-}o^xzp0SfKR!N_)FRnb z17$bEHweR{#a(WB9$06`12YX;xnv+V!9P zde}(xGJNM4C|}f(1pLIw<{TNM)X4mY6HRWU!cW_c=J1}YbX$!Ul^3ry1R3Yh`p33^ zPDf;3x7>-KdvZW;^?wgmZ~~eAG5?Xqt*SbZJP)^_4q^8Wm!Vz8Ia36Xd+1>tVD?O1 zo^OnnWC+mx*zx^@U83Q&D$93P0*GNR-P>PKQrF|zd8QD3Z3&c5qyv#8#Eg^Xaw;Ua zb|?$r7-HHP!X&>_8)Np_4l)7_;e20VV3#t8h8SJyjMzx)JQrPmE%kuUo?A#03!A(p zPbIqB^<7@C1P1~V)E%x7O+@qtw?@5{;vSFIdC8tJ>Vk5bPfzgH%il4HuDRnFTxv zdH&bw#<{>gu@Q6=D(xx$7U3q|L(W13+IVOX296k2C=K+QPu_h)b_Cn}z%H?2qdqC( zaSK*3L{R<60a5>p{Pg>4e1wUEHcv!b+F94C3fPa{Se=j5BtU6(KhS~XnkybC$X*t9 zL2O(p!Pg{`_wd4Z{Gg<#7gWf{>N`MPfp&hU5aV61*|Oc$Q$t6vxfdeq;0hvK3}HWg zCL``$>_qat=>Z4|Heg#3h7&WMFwXLoF)&g2I9egMBAYmMeAf&8*2-uQlP)x?Q}DkH zeP&3-ypq?!ygq|YHGTGh;_Nlwj@;e%UH`4!&fX6-UZMb`cFASp6-n}jVJBV7;H_)( z5@$X0FTbIGHthj0vWg$s({4&TP^>OCmg2r zsi+ryHS}x88>c#bHVI*gqphUB4grV&2C46f?D%Q8hEPv`Q+$poJ$dVpYFw%o|cPo37yqIy66cT;`dChO|tNo!RX3<)!u*Ra192h2f#Xx55Vj}O1V5I-AaB2kvr zepR8Q%aLdO`vow0CrbG>hp9&YF6+=6sb|HwavAxOWqLtV6kAY&xblD$T^cFhu4*hXiQd_^BKr$n=a(Dn(uoIqWNlymWI2F=A zOsYcs5W7^F6D(Unj}+#G@nzexAUeeEcMiQQZ-^5(RS?cBo`DXMRpV^T3%?^S)t_=X z_*~WT)V>lZ(*NyQiSQ%8IkXpRU0z(m0fsrB>;@ZduwK{fYlmA&$`S2}m^x;bJel`_riy1IllK!^{y^ttK~H zX&nW5+_3L#rwEp3zZr&c1b*0oI21@xm{(KW&P1b2Q6E5|XA;tKs-C@Tt`R}z0J*1N z>AgP7c!_5+q^L-qmW76`@R&dcyM{mUg`RacI~yf%$YK*|&^@kP<3zm~j4>5xd8ELQ z$yq#wEYmtL@Z*Ip@TW_6C*(Dv+2tffTY*0$+s38;8&<)_59y5)WofoTB%0)b!Xp?; z|Lgs`#!`gJgW~TboIzwKK%0xloigJO`#Lhv1XpI5MIRRw@EwW?9(8(^`iz4H77b8t zRjr$q_t+hJ%L^BLEyey8-%c$5bGl=vv>B`72iqk)C@($`4wDy7)#TXprqhF_7P6qp zNE17fweqiev`lqtV7c0;z*TAg-Su$qMh9tco@i|~Di8sC+15m(3pK*u3D%N%K&Wfw zSaEbFNihid&XGx1RvI1C=`QWWHUmsW$sa49hJPMiP=rt(?W*`~Dy%3Kf-w~rh2jUS zvP0x6(~nq10pc?TRVGXg*;fyy!jE4@$vb-}`Goc_i50NU+F3K@@j0kt`(WC>+u*ev zHr(CK;_%_#M!oEzIV9t3%d_JBX*z`8CV#X1GSOYFpH%GF9s;{q4l{BRC;6VXo}F%^fI^H_Vzo{Dp9`M$_SE+z8R z^0OFRPq-Z@6t@g2Q1c7qi`cx-d3eSwmia9U(67GB{z_~7`696yGaaa~d3@qa2Bktg z0+vk5D~>?T-b{XUPU)wZ;X@))>s5{0KY8K}R>Yo0Z6$xHZfqs!t1d+iQ`u7k9eF!{JM8j|i zXiLBHvY>dW;X!;Ni;PBiXs2SOkNQqd3vio}x9Ho+n%%zV?*4qcQ4#BxzL1+ z08k%O!J(Em^~Gj15>k)s&v!%+NyDlP0HcfDw8|df0Di-2QI~{C3zM}$21(u`^UMQ` zuzBpX(XgXMLi?c!n{-~N)yO@$%l=WPx^|1jj8^Nz)D7d^4AKlh7jioF&~Q1&soqGY z<}ZI-k|I+V-jHeX;6<<2Hvz~ z6A!ufEPXN%lhRJx!GD9MqLPY`4tuGaS;rb7#~=~3SK!qd|BxYkkl2V`rSw^slLX_6 zfa2oUmWn(4a?3%f@Opks>*Q=&fAH0Dc%$6@q!op`(P$ILh`}+|bF0jYLUk}CEu%85 zX2!IlY*Lg^?j3}NfC|5P=Oo$DmyambukmPjemp4S6tD;tGzj(no8Vpg(PJ+)YhNZAcC)YN zXG3VYX?G?>@d>5d?Mh~Z?RqiOH6(S8&fKRDzVlBD)vvuTgSNuhWUQ$tN7Dbpr>1Dq zk>lY-8ov5Wqni4iGr>MAEgXg8TCjBBew|i3iqXoY65=+zCw}v1V#Vm2L-mq4*(#u< zK2Z_cBcAO#?24u!b(PAAoii_!6&ywPL(|Ta5}*2zIQ$_Qmpq@|uD{Y5j*R=QNgIP} zygToJzjRR2*Z>aEhssx&q{mNs=Ljeed|U?f;St&MsgrQ*7M!R*KbljTKV}5(#oo^l zrwz?m#KCg!{&LPPqWp+vw_=9f{chstbAgNB(41J8505h;eCCi&R5s5Cq(Eao@v&@K3Y`?a3}aR>BR8h3B-5VqdzVD z>Q6;9XWn@Qih7uLPw&Qn=5lpLsbC7V9Y_fLbSV;5;x&EEC)B%%Sm-IXrED%s-LxVkuyP&&3P3x-55p@Cs?J$^fM0l)S!)tQ6%{5tyz?kOk%n!JJ1}$@ zCZ_fx5p6)JDu=~`(?-!aR;i1q@K;U`wrC%hqNGR-uG1zco1x3r;*xS$El#5P)(=|X zu(BifAR2mL2D);13=sHIzi7~>;%(VnMsybv??>k0aJcYnc@`mG2Eqe_HgBj)VZDEE z{g5gP!gIG7q`iK54Fogj7Gp_6{4yWThE@ENQ<7vf^bM(pI<5KDIuz?*vXf@=Z8yiVIaxCbGo6#bH_01k!;@TmlYx;zW_FL2pDq$8 zvR6a@>`$5-Tdy3mGx~Z?#$_MQ?IK+;{QZ0ky|QUc3?H3>YYwEIzxr4Lff+|Z&TR64 zyla2|6?nit)ZK>KSoO%e2C3)Vs%*;hl3e};Gx>B*Lqi`Fnj$m25kmh3bnDmwtsP)YR%41U67UT)@f;mY9QWQDJ$z7rfL#BxRf#WJJ01a z=of?tI}Z?=M8@7ykV^|TYfGnPqTdupPk2#jy&pi^?6)r_889lYlkd*8r_U(t9134kwoec3P~}1 z?iKo+0)_n)TX9=<=Qot@i{C>N*t{9(-~#-$j8w4nKo2Pc#mly-ALKXLjHK(j$n;`v zi(eZ>29f=Gfu4f7UUcuv-ZT6)mP*e8bsqt??Z?Jo zUqu6uqf1O!){2Km;;WEUTm@Kfq-I zsr2ewz{hSn5DDyMEN%{KABxRb_)*a3(1s-3?(%q*ffpCFJHdl!W`!bTy@uq3 zpq5#xY%bODlZBL=cp34+kL-~uR;Du4u-1?&i3QdpW`$6RX-?fMQ#5fISN+QZj!sha zS+uO0r4WDNCA>|tp|Z><2~FM(K>B82;U!htWXm!0z0QNF4&4;%NI{sNx#XWP3TNK7 zkpcg+EXnJrE{WXQEWuQkpz)I*DOCR082pk zgKNC)gg5)i=dlmAm_uEKTyQgnEz=xz4a-o-*?jgkU{@Y4Q5kWZiVn!2GQLyHLe?_} zs|YZ4Tz`b{3Syx%d)mJ??SmQ+Qh~KLa2C2J`M@P?@-7G{rYr64Z3p%`B?S|#RsGLB zdtikk^1$skW)7rDV?BYL8y1Rud4A8ANl+;}SkxSLkg#p*NQP=X&HCHqITWUOBrL5l#t4{{CvFKlPsA9oYA8F{5GoWyNtQ5$G zq%2vc^$v>gg{-7^BCY*?ejVV2P1+pGVBn#x7*V6S6a`7=0`#P*@j^zZSG%wzL z)C)2+1KC1&=TA!F>!w(Rv^=pfGV8oHfHp_6%kkCY!67 zF)O`F?vE{txX)(Gqdp`)qC`nTRtuu?BL;EqwrDUCQg+%iR!I9LA#=B#wrz zgw$&AVur}bTPMt+S>qnD$uO&$KqL!LrOmlF7}3=fDbZMnf70QZkEJsDk_{i!Pr2+^ zh|K7@KG^=2Y|I~$sxfS3DsQYBSrn=qF3z%_tXDJuMcoP%InFHmR1E?17B$ONRWqgJ zxq`<*;f+R;d5a1}U(V3>s!x&zH(nZw^27fIu&SY=3bE$CH*P~T{@@`nRXk-`LHmKsKPjZA$OICuq$9Uyr} zS4-JY$XVH)lx5mz+Bpu+x{}4p8;MY+@U@V&*UQUpM#g{y1CmYR)EH%PF=GK2W zqF{HLSI8_!-ub30Z>#}bfb`uQDKUexY0MO2)?bz}PPFa^mKKIt2y&}y(zjCb2WUu} zrUd0ayQD?I>VS9~Dw4}KUf`x1D`#ISfcp+}$3atTMva+quVJjxRLJ?gJBgM;-f$*X zz$@UGD2(&MhOHZ4ZoA^UN{v(p(L#W_TQW6RrPRd{A1^uMvy^!+%0G{XHYIax4E)&f zXJaGJmap_RML%@df~-)1t8RuoqJ@UR9_P3+3o~7^Bay!E`Pi`c#Qmxtya3$2rfCMi zIP(!K;-$wO-#}eiSar&V5cS7s_oU1D2#i)%rgN;lOhT)WvE&cbNYq`iw7?Cp7r_XO zE*zrGjR#nq%BH2MS!1-&z;N?``(h?)F02tbrnXHPNhT6n#ZV>-Ho;8X{&NCea-xrL z;!_cx{aowsL@E7uf7fUOqJ#!Z%4A%yF%p*=nYu7=1JJ9^s-1f#P%*G3pIN?sWnzIiuAOa;mz^fLO{T2%Xsp8Nc;mC5JiQv;o87;A5ZqdJzec2rPjM za6)mxh(_=jVt1LJ)FjwuhL-BbETJ)jO=OanoO!Q}B%l>?{K*r^7 z#{IF-rL(AS)0f1zlp%veQF}=sc9(P^1 zPvYQ9;#QJ75#uDomRgtS>9K{Ytk&&fE=Icl&g2}vt-e^P?ABUQXaAR?fm}1NGSz+w zQ6&y^T8I~jmunYS;a(ZP)!{SueiOd|#=q0Rx>glX?UrosCZbx9C~u5gCiN4|TAw)J zG%4f%hR~9&NbT8aYT<Jkk6`&qyx!{wYa^nr0_&gM%io@PHKEp=)h+r|0-UJ8rXmrJ9OS`LMf6Vui97CwbG zZ4r&DaNr)qI;MCYaf~iZZ4r78&Cx}wK9QQ#d+aJ+=b(Rg!00Y`c^9^?8#ppF`S;!L zV>xtc84ONxW;;xA!g9m^miyEVd}Q2f)~duuu(L{H-IhA)M~m`e{UN=r#gR+V-A+P! z>|}g<70d{uSUuqU{lW19317Fo-QDFj^bGv_^n(DwqY=>nm>=D+cW?YlFgtNz&GCYo zld9{{9q3(HW)E_DEG1LY1@N63ctx_UOdGU(5KcdswgeM@ul@jVLDrSvbCjQxcvAd5 z`)=7xL9aFMt>RbUBXk<^;P7EQ3+Eo@X_2-qWyiinfljdX9me&L-qd6l_hr$I$< zo^@Lg)hblQ!dnvDv5&6CL#n2oW2jz~{>OlQl?p_j5%=hkLveOOt-uUB8eIQA6oLSRS`*Fn&>=`zITJ*(XxNp0UFi}A1kXFazvacE~oge#oIyh z5{ByZlVI#_v_Mw>oES6~OPYNke$mL@{e}Hb*xL=I#RIe?2>CM2czHD%NaNE;;tkQQ zTbS-J2Dt>8-(Q{b2ZA@h5DJhC{aO>8)#K*qWjYDu=LV$Y;3AUi8l7#8Vtw50#bLNW zrzKT~|EAGF$Z~hWZPlntT_`PA{N0?`TO!Pkvu54(>$~B^fCKVH>svp|SIi)%QI;$o zV;Wlhn*VLC6(ATEa%9SjyCJCP$G}@t)@we8~9c+r$D)>k3?9E)tQiM1gV? zE$1p)8wcoKHQ6j{q>$*HqtFyAiN~W#*4-O3TqXuK8YlqgcPpn2wl}u8bo_;oKn6%<>lsJUs%BaF#JZ_h1)0eQt zZeg37_-F0Fh^f_ET@JZ1wMQ7LN4+^GjwTt zU4$+9^uXb1%YmP8BoVZgfi+XC@AkC#iDsOIBE!RAM!ZwrvX9y=W z&}Ku1`xK(&kR@icXFt?XkufSf9>d~ebBhH=-=$T)%-34Cp3|~wEv$`p5eOYhu(l$7 z@fTGYClxjdgoH+)^%y`+2rR%?ju9m6IBB`SssDN7a=Z;SGCd2J`?o`rj~A(*uS#{G z#m!QX8mu&t!EblLR@+{;6$d=_uU&JCnT}IF$pPCA6m;C6R@6x~GeyL=2UH8avI>GOqa^9^GRspnJg$FM@33p{5*S?)`3 z3l>~?V=WmvVDskD^_!}}>j4POiX{{V22bh>E`0w#<@SwZJYrJyZb;`;OsPuU{o#K? zz6HqB6AE!=t91>PDlZ)xE>RR13KV`i#B2mn*wd4p!BNUEba|fQK;kn<$F;RG)Q5!c z|Bt=546Ca9;zc(h-5_j2x_blCEg=mmNK1oAcZ1}nL8QACkxpsZbV+xEba$VHzVH8k z&U5eiaGvLWxF60J+^#ia)EwhC#$0PnA3CEL%%9}WYFkFSe*G_ts1lLM_wc#(6^w>+rd9aup%hKuBz=3%fyz6HfpDS{L>y^cw z^tO>F8#!~N$49V%_hV1nn?)a=Cksym34{9T)jtU?!+)hoYUNx(Dp#?JD)jr}WI4@j zh1}uy_sBCWAG|GT!knSz)60Ed#5#I|!iw}nIBfmhLy9z{_LsR{K-sZG3%=RATagF& z8tSXD2SmSeI_&V40fX+y93bG@1!@%7&0lbAE6q(FMAfa>=K8r{eTUf(o%TBhBZEY zq(iY*H6ob`2uAENUAZwKIMsnD{>_hN;F%A|18_zwyO@TIplOybE?8{4)OZn)Z0Gki zQqs1sCgoWg-yosyxfRjx;1|u#Pb37qrut52CyTV#VcyZ3h-RI4WS@BHoESJc z$u9cKQ=d;VTp_J$V0s9Kv8tmE(%zzeC1T?qfnCpg;H=sePvDGTZjn|e4oENM(@~r{ zb*|hy)j+7`&s032U+8lw_#MLlj(HUcA1EvM;lX9H!W+CaN@VFFu@#KVI|F8tA#ANn z?=IGY7v*6ICfYG+9zp%gj@Wtg@;V7#$OB*82u2a%LNF!=dLG@y>9)<9+Y}pe*!liL z%L5M(UYDY5K{wGL&wt)S(ozOnLk*!u=CwZzKTithMsjX8-Zu=w1J9D?J~$z0B~`^y zA^`|elkw#`_qddRl6+97WRC&e#BGvPI&x~;OE#`gW7xS1ThE_-I7g2ol4&S;FvPxj zAf7_@;>Hs<;!n47MlDu;Q#T4WuI`gk>y<8nlVFm_i;CCO2V)@RT3hEf21Mo|g|UdV z8sN1XaDqdnG-$XKwSk)fZ7&%^mz}tPAHa3s|FO(??d|Nuk9Qs~$P~5G2LE#l_$Sox zPoqB<0MY-aSKI&38|4v~(ms&??)on$co7iL$v+RXf>c+{fBt13Xn+@CU9J5cwq^1k z_Gr}qitX=(S3z%iFYm#As6N8~Z6QMdBAEY_#O#MB#{D0u;L;o`FRuMB!u_8m2*jZO z3;_7|e>L#GiuwP$Mq4nJD_lT*!7LaEVA^-Chcu{|?(Sjy-~PiI z_%}nlL7r?x4`kZEm0*47*JAPqZiF)8;A&5E+LNZsVTpQP?XEvlwp=zrmMW@`V(jO`HRSsp}wYH{?6JbSz)6J z$NFi5h5KTq#QR0fJ+?(U%KG2#z0SxjB43@QU1kLyZBxhILXNKt z67VLAGp}Hz9ZFB>1*u~p9_2OAITdYp+epJxHVs3XJ_ zTsh*4v&6{$vG#6@Hkqd2KQjgYnF%>+>oEz77zaM~k7>yMk=85<%<*K*vl{(N=>aju zk*I$R*n{K8@K)v^Yhy9Y-C(b~ZE6fZdtNM(x{NfL^6}|=@Hr#856*8PoU4-_< zNHrqlP=SEo`esk&UuVcshT0onZjdd6r!JD7**z#vYMdGpG+dFDcDJX$^^w=_%aY}K zzJGMhJ#(h`M0e5fdo}-p{ijJjz$hOb7jWWLBe}UQ3BP>|xRyy;02@h1m$q}SN%5Lf zv}w+leMw)8=wk-s%l5N~kmthN{)gM|P20&A4F@}}$yGq1p)1cAoV~fh5$aM#bwb2| z4XXJwt{sFpW?SPeo&PwMT(csb?+aYM;kayMFWg}YJT0bQn`sm$pIw6>4uf+LJ&7tX ziYJx_Fy@$d^{$XoaWad|lA^Qh+isxeiUoN;2io;$AsBezGa=>PM@f7`nM;povk-Gw z8GEkatiX)3${cQjZD&>JQa>Ijj3AQ2NwlOp4b9LAEw;YX4K1sLI3(ERo zu*_v1-@pftXr4gi=kOKfJqLSK!A;DTh1MprDTdel^==RzqU+%E2kw`gw-uh!6K*qW{|f*`|Jln;Y3mWjnn&Z2U;j4>(c zIcC+rhKQG4`vLd4(DodH(YX!7*}dA#Ws4A-$KT@}Fo~gX&dDNO3*v3;D(X9XM=zy= zdu1ru_{~BTG_m7I@eztM5W-6FZTT5D;0@!zNXAc%qs>$!bM4op_#)<5U97QTPQ*dZp!S|L{Tv#9M``fPzKR9dZl+v)HsCBlI@H8pMT}S=vlQ`k$N7JNN z3Cm5#T-ivh4cv;Toa0a%&FA@bBlomjacO{;!!jjz>RG}*7qvm@g={f<9S3`=bANc6 zk`$N1*GKuE`^X9xKWWk-rrWa_jH8(po?1y>BSY!Xe7VCDdXNM-%6|V17~m?2jO$2U zrcv72*Tq&3D+i+Fp^|W$-Dsl=uvHF^l`d~nm0DQ_?=sVHEot!8N(%7cMb9x$v+k>5qEgx z{^5@Xf+4dV%Mh|i5*?9!!U=wlkWMXzy#J*OP`Dg|EZ|~zgF*#x)DMMGJGTS$yH?ET z|3or85c@7*pctK&beudp27#f%fPv*9ll?&uotA#R>Fn)AdTQW+FBe9Sm;%uJqXpt2 zYaisNs45;BKO&8PAOK&2_3Cd`MhtI`YP5>8_^j zzdOcFa$*>$-Z#(`c*_5L{)p5xx45bz1s3TK_zuK>xPPx9I&~GsHi)1=2Vl^^U*NjM zXNQ~&D=u=?HF^xA8bWm)T#u{mae@4etbc#~jotrm3+yFnU0LS8Jpf**Qa-MA!$Ips z=%B=+rd4gN_Ww->WUhPq7`JuySxi6#YuPUca9+)A#r#*evC1Zt+GRW6&&!cMBup6X zeY(Du6Q2HdxA2Ab`3sz3;{OhWMDu116!AjJ5CqyOZ+2iSN%tmNsIG$}O*nnW?}yN| z3O?{e!Tuh%*o%-Pi54OX)Kvc?`XCHCPQ6X-X7Wju`QTZz)Vq=Abp;NXC0`YdJ$|nO zU$CVW1drhc9#AiR5!d0-mN=+&Veuj$clADCLG^yPA6D>uuq}QhM<2)>e z3eR{dB}_>RgS&?BBnP9{rk9gOI+!-~O)H6F10hW=tNS>ZwWkyliqd1l=|U5j*I^;D zf0ujke!khM)@HF;OlMI=J#ma}eqVG$;6h;N;?_<19459z@uLb2+$Ora^Hply?{qAj zUk;wp2!$#TM-qif8<**mTJ&{><(rUGw^5fWS5q1tXf|^g7`O*Cfa*&<>8x}+_ikDk z!@Pho7n8(fcu@7Z$>6^DJAhmW31tNl{k*A(d~;N(Z_X{*f4Z`>b{%Ie9BBD}f&h}O{B`IQ}gZ?-M=@WM@GtwC0*q`I6osnCV}ZJaC?Ztu0P=VS`mciZ~LJqi!DR z!r4;)Qal`Rwvw25bX);+Ej%%+{=(K0{6T;#c1-d=E%3$sL_nNT_K#i1CCHo#ujwTa z^;pUs;;o3M=|HJzuTTjff5aEaA1));zeG~9K9+kw9Isg~zi0NqHnqYe(G$l7#~sm) zF;w=N*?@i)^q2Q2V-deQkR>P!KV6!oe548O8`7iLj-PpL^uA(!_Jq2Uy>vz0mG8&N z-62s=LNr_~agq!Oj^Sy>zszq?nW+?kc;F7BjOUyQWEG-TEP8BqH5|RssTD?p(>2`K z8EF2H2CKh>V|`)5D?LCJFWCtF5S*@QF$mj~B?p!6x|g563{eQ{1lw7@_}e;}!jFRhvnrXg;mOq`u)rlkj2Jg{oxG8*z93I4H%RUMC{>S5B+0gO~BarA;?D>7X zm5B=L&m5KC27^*?Xfho;ePyhlqi<0`muSrn4J6}KBF$dNg2!T=U6mlpz?g5O^H(dj zFTlO^p{XU|b#6oaoqB>}(cK!u@Poe|4a&p=7J5{JgNQ@CU57eXLpv(AtwDXj$vF2fx42* zGlq(o^$&9dN6MH5`pNh)9a1i`81gFBg*s5;u7bCNCb*-1-LII`{Y{w?kkC~*e~^4D zeUrV7&o_s!%vyybOj-^xtJlVnNTj&;j9Q-Zr@~lIYa^~jGki@WLh{UE74LBsd_g@; z&GK=MO3KHD#4ZM#7JV{WWR8E}&{!Tb;9}#Jj==nU=A#I?slY*h(N`{92h72UaIXjh zR+#As`BMTn}P8jN` zV%TN2C8zNnnfd&+XTA1nvW~MP)UQe+PtTR_&0l4lUlQJ42nU!q zrf;%->X7Pyrn%TYhag-il+j+I->zUEt;_*LGdGEXC{;rQOIi0>knB)eYnQUD5xKdF zKk~xxIfhoIOCS-|)j7|_;T#zh%AKm7e*Xwu5D7PLsE8Gb6{jSuta%j53MUpSTqUA{ zKOCaZeAt9v${qsH`jl2V1Bg?lL+!%A>;2gdv;H5YkoPwX7dFG)j$m48-g?$)vtbjh za%s2TfN1g(VU`8Lh2ZCLQV}XwKk-;mGrF$ci9~MIrj68T}s?3}C^oCN!QaI9TZ~HwF@nwl*EzN)*mNsdE#B zz&M7MlJ}h|H=2VdmAlQ7rGmM`!1(nv8_wJ*O*>^J!qBYni*iAKlpla{-GOVC+ewdl zffbB(8~w!huwt4D`;I_bmM!a%f_!3bY#C^$9bUb0rfIzv?78x^(Ls{xp3z7#?}>c{ zGg-&*C%n(XS|Nf5#A!XtcVwA>;$Mdd+M>#52#4j;+240?j6)t*KGayMA27plZqhyd z-Mwl3iZ${!kMqcsufh7pk5^G6nBjnw4DlH-0Ew!6WeMpT5sc{I$dxR4W0+cyU< zrdU+%*U1L!+3?Iq^rBv$RbfOd><3B6y1jr@8XKQP4UZN1s>$Ee$jLw#jn9o_s&^u;AQ zkh4Qlb{p1CLS{Q#lMZ2WbX z2c7fyfIpyRT2|@LXhWsmJZ|^J2Bmdw-ICTVll)X2RGM?rYl|m~=VlEBCv1OzlIHzK z4ebKaCzWSDL%0$B6%~r!n|+fvA}>(t368S9Uv%{MAG)w8Cj%$#C_d#eiLDF%;EIAU z?WBXzd!oMI?rmJ(-ITr^c?2g4tzS0zKoGgYS?8YlNO-cu>DpBs{}xi#PVz?vyPTI- z0`&G^@d1G}M`T>5ErFPOqy;+`(XNTF3FgYVopaRazrLAZ>rLJ=b#w}(Rh|s&C5k*M zQ<5vl5clvA2@Lr?&^5lH)yc7S!B`LBp{(lFzoelIZd~xIXg*RZqUY`vnfj!QU)n#Z5^F#SA5{{wB@<3-)VN_cs0Y|J zi}tAYeKpbtjUN9y!lah8T|Da0M3PR(zIOE4H|3SJawoIo8iR0I7nv+K-!L$89P4Fb zIB|X%uD`4t&$}xEf99C1HmojO*4{7J*qWd=!CJ{`ARaj8cWHXPA|D3c;|tPVBQuNtV?I++ z;$TAxYI77LK9@v_w%D9^OGLh^4H!F=SU+lwZ!-Hn31iMuPQAmCn!BwJ8*m_YQ9kNm7l`Lxy@>=PfIeXm=Cmz01!)dECM{voaX~A+{h}XPI zl57QzYeV+{HsxaX>usaf6)FK}v>6{sqX6q;URjGn&ChJMW^r&*2-$ zF9=Guzd(sN!)xVe+s?}h`}qrq;qbAJrg&st>@!d$` z8jGqk-HUOD;Z{?3gbP(}tP@|lYtOohQ`x~BYn%aTn9&Skea1C%h&-1RTwEaXi_b1!&V+|!)Eb+1pEY$C=9}M;h*1WIV5Do zig9>+&`4*ir75UG9G;S1NdQ6hinHToKKzigf{>5^0x!Yuvfyo5&r9sVYH6&s{ zzT!7;`n82BVv3Jc!2^>;TJ&M{N}(PWLBbF`5&?CHQ3#GCqY-O9Qu)r4LUhTw?k7=R zdbC47%8{saM@S6yp+VWxN!~YvHr_|VgT4Sh^0|C*TZRoPQauYzK_~-akG$&f8i3$R zm#Y`w<_dn^zR(PDUYE76T9XsL+BbIF4H)H5_eJ2-{Ri+NCaaB6&e#NuX?*9WOSZtz z)rSq*5Y67+5cbWG5g3thWilaRcVJQtN|R&vz7^G#+`Y$Tz_|Y1!4y4WYdW?aoILsk zcq}c%#*6q}hGs-ZCUoPGZQZp6!l6V%Ek=qfNZYE?-0cec?lm9w>hib6_6v4ej{_XZ z5*sI3+1hoQC5q+TX+FEvzRZ=;WWlj;-R^2mex68X(9LViO(3#wQVA3EO78=a!dseM zH4p{Pk76an8^C}`y`HvRE{|~aQSj2OX#kkY<=2J>36-rrybH_E4Q*}*f~Pya?Qhpy zp%!fKtG9M^vIzmc%nWS%reb7H6^A^QlShKT6j^iA{C4e=tUuw*z7BXvU%EnzKq4B1 zQnn)z;~tZ*sj*cnrNL#hTMMC{8s=H$!ak89GHCmvX-Om1!)x^oh3a$DQ=Nv)u9J<| z?n8oO?zT&vdNknVeClp8DABF^@?H4LQsT?!pwk>aS$23KD9m!<>Ic+*9o8!kF9MwG@XtrumS*a7Dez#AfJ#$kuR=UjObuvsb~~5AXU8w z;IBLO!TPtkhNXBLhfD%Whl2Hz>X_k{2{~zQF(;n+jd#wv zAmnu&`3`cG?g0QeXn4`!2vMjvEoB^swbGho`wwAMDn+9*{QQ$m2w#7$y?QyX8Kmy( z(Ee;bx*9<5(8WGgUJ@0uaJmR`H&Ic48a+$V4N5GTCxcXEKOPvPL@!Of+*eSAkD75# z-iJ|AS(4B`)FY>5Q^72~URmLrvQ1>Xi#|g*x}K%X&itQvb`24a8$nKW_ zgks&HF)lHQOU4B6_u%*ND|fvw(s-^KNIJA-df~(o9mU#I3U0lmRl(~(SRRYq_j;N; zum66nZCHNZ%$mEz4@mpkJjOCr^4XZ{NnKSyeiZjPl>Q=?=WQ_Ke?oAf$2?hoa*Y2^ z>hF6x(?0nh{ZAwi8(fVq0$P5S%H#EOhBa{!%yrf-BUwyMKIS_%b+SNO#cWpn4R9Mk z00i8F3Bdka7D>cox(G|J9PtIw$FPPVo|EiMg>cg3xplemn+rwT|Hc!>hUf7GI zI4*Nhdl?#41w8Uqn6n-`Y6qeK>U*qeT&Fij+G0E9RBkdxB{T*m`fs6BxTW_16`<)5 zZxNlcj?oA++ccLMkO6*bGVtX8Zh=%k<3eNuLxDEhSLr~M1z74;4#MQhsL-!iUjUhY z8Ym7wC}Dj8WYvr9o&rYi644Cf7)ZM{vDvw4V60)4G2?hLU{D!xD#A?}2c2)_Qyt`t zpRXk#oBln?F{Xr=^jCh}yM2aaq6zZ8d%>PLK!!!JEW0sje8>ErQ-@XIgVQHNa}!O5 zS%L#>z}dNhko6x>DDFHc&d>5_45YTF^`_Lxr8w7F6{fp00CXt;)MBa=hB8Ckb7Kj2 zN7p;G0C#v(R=Ppsb-IJVf9TP8+E4)!9vxzVWv|v*x0?$O6tJNJtWaMkd;j3 zZ;4+(=6a2N8N#mhZUz)WXVOE9yZRz6Qi)@w1K-^K+*u@{78E=D6L2_!K&k4R+w=A3 zqXQPAaGaPoDp`9Qc;-)d`oB}kXp_Sd8$8MMa<>em@hhW-;NE+dPpTcECp z`>_ZtJ(e24zqt^kw+-O4BsOmaF2id~=eI9GV= z;UYGf^d@6-qqzMCoXFzk?B%(5<4`sf5>03tBNAo*R%Dw(iI4yD3AH{e{xkQYQMZQj z8wp&-nyh@G@gXegO+h~da*!L+kW*o5nz$U`aVCLc1JEgw`!x@S8KVM`Z;rSp*=h-Z zges6ekn03WM~`O@rWS#S6uQ}09eNeU0x8M6>b?;VNN*8v1**zn9L|D&Vkg($$-xQ zR-J!>ip%#|oB;(wxK`%k5gzKLkO%VE9D#W{vR(swTtbl=fP4i9a^z9}?RIjh<30w( zzNzT@Tiz=7ZM6pr!io(2(D-kGUjzcUQJ?g-bhU_a%y<^TTL2I#jE;jk(yd51s%sJ1 zFY(BsSB^pOS8NNukOE-tdVoac!#(+SJNJ(PA#rtNfA;~Q12qX5dpV|>w?^y6Tl*Ap zJe+Yw?|}G0P91EQ&k)Wu`n7m9@)ZURn_-vMRC5WHb&< z+ziCgx>T?wCm7WOd(fZHa>aasgz=zr-@#s4u_y#>5Lih-)qXX=R;$RIs1G7p&@$vvHq~U!|)%+Nynsc_` zS$=_i_Qm&2O8HAU*qdCzD#8y#6dl8OOibeP)G`qXKvM3AC>u8BbiEQdA-1d{08~s` zpy3yJF>fgSy6Z{F)+aIzzuJ&Ol27!;&3@QNGyrL`kp3YD;vlzMCyPudOzW8ggp9)g zKP0z6I|hjA;cu!JVSEAen}ek<(%+Js6u4j4zXH|xnG+@l@mn`D4QKu8b90}5L~y2> zgdS)rPhR8lY+?0(&f#&w+L(JRTr9S^mgF%@k+&k`ikpbNMHWOlo!S@$aGr0c7}3=- zM~V?4L>%`8RDi6#{E3I@*QaTn?`wNHP{g&j^VeBxO@Iu9Y&xsP*bHiJX5ooRSz#ip zZkNQ!_&JjkQ_*Hk3@C$sxHB5rRZ5)LS&d9!QTa^%Q&kzKEsck_C`@gi(5^m!*vJf< zUbeAmL`7*@LP|%Yc_HFAQ)xO7$a@C+UaEn1Lf`!;)G?Uf>K;uNdDV0vn}!htxbo`k zfze=IpghLf_Q?Rq^exTmrCrJZ^UiTGkWmJLaZCFFIT^6*iJAnyh9zwr-Ckv`Ze#$q zcrtXkLCq%724qTCm_C(Dw8}q*mLg`x7Cl>~2m+IaT`>^4;}An^5GnvSQ338NmZ;vm znA|P}K9+N9_AzIH(a4H`Ohe9yK0gB8s76MZsLve{2&ERt-W2jhxkGwD^Hbv0vG|>p|C^hUfJ$U4)3*oP2-#)a?&vgZn% z*eh8#Jjxr3t(b2qu8*Gs@RwL~kQUD<hWQG>k=xEAah?yn+sXdI1i~@yAS$F3r1B*EG( zL+M#GKW>jvIn8h^6*6W9GjwO2d$4VCZd54ssSInw$>;{25h;nZl`F6~0XRF}=Oz%d zP5uK2?f0lYEAe}tl}0Y=Y2g!?paF~X1wh#>+ON;~0C+|7YEGdzq#dc&yux}>qQb95 z{)k&;Sc{a+l`2E)M1P zL~vNE3Xf&}asOo;@d2=X(aQv;!D0Mqo$uR2d{h;(xWFDNus1ss-sG!VN$?g5=}9_nQ29OqOFqOa|kt!@s-K5Q-z#vd2L#XZG;$CAjbe=9us_ zt}C|3@oJjR`jk~D0i@G)4XAYC9qizBcSLwaCtjxFRo^t@vDITRHtSGExlsk0Xeid; z#5Q?hQeP$(Lf&M^c|-~Qv&X(g+E1F6e?x^zCW7{lF&F@bDn9q zri4sL8wyZyd>p;UK%1*fk+?R5?zMD3d5Y1D;YFl_Rop1LSc_2hL!KDjXiX#@10I&_ z1zM14H2uWiUQ{rn6av9~W-=^CW3j(f!n9Ojf9kl8#kD`>a9~%45zl(JRATM)6~M;h zY~^7qZ=@9MbI|{7AYYdj@lUURcfP4)sNN|jbZk@pVLou`+2H?U+`xY~qW?S9zkA_0 z|JgwYDv5tb!1;GWAK1YCOKaeNFa0A1s=)tH|F2|#4UJ%npXf`Opxp!?hQC88O7oMM z(P_TG^xFab1l>oUdXj$&no2U#fO{O+D*GK!S8QVlP2}{ufQxw9;`+K9Cw1isC)N_4 z->6j~Omgvx19V=AiI;i;^2-EWB7O762}@EW;1mUQ!KK9@VgzB7fAgzn|4<7wwp{Gt zeYvYh?spQ_oB^;Erv{Jo2`a!YAc970WDa-2F*2mKwLztU4ViWgtLdKp09+jXjx+ZN zIAJDfezD7lC59t zU%h_LiUfu2EG2?~YX-R3^=5zjl;A5@8$!fs zHPDm3N@2`0LHbbr~I0ei`YkTpP{!$0$Go?9AMYs^jZe7`xK<2=YqJ1*B z#tLw`f507qwB{@6^qC za-cZ@7c8t{2mt%z zMpP91BZc?A->HMGWAgkbOl!Boz*pwn4H-wg3CNedvb6y_VsU^Ra?25fZN!nkbeXZZ z3`%|XliQ#@agvI#DD40F0xqt^cZAt!lOfZgQiR0jBvSyP(vZpn@a7+)k5o->lNn`O z&m>iF5rT|Oh4TF%I6PmhvjD7}PW~`cQOZ#)^T8=V5SknY5d;wVjpBa!aZpI+g#0HL z2bn588Yzq*#ov_ge691qzjR>T2+C;0XkejmuwRHSDRFoci%3)@z>%`Ofjc1@I9FPc znt_cg`5An+U%5W)3o3n*MDIO(XYn)!Fi;22lZXW{oza@TyqI@x$=-6&>0vq0*`<(Q z*}ztMMhWOHY(7sbavdx1x`2SQGgHPU4+@88zWi0kS#3PE%(Q?n>+>~wlfmEBTN7|y z==YKO`oJns3xkyIl5Zg=ip}J6H?p6QYV0yVUcqSbxQz4!5>o^!2dF=8@^Ve_0oU-k z^zm<<=(~b$w0Oh74E)a_m^WyCI%PI&RayY5RHi{tu}(L;#Ayf=Kd8#m2nXlKTzz%& zy=+~Nb`cj1zj3q0Us0Ky2fQ9qUa2$bqK|tLkxVVa zZgQ`Vme6r+%f3-wgHIrFYXhoPLK!K>gBz04es?3Vc!bLuG;(G%nFERMm9MaAY$ z)Xk{+HDl+R#cUT;q_LOv$hLFisY2UIQqFgWtK8~tWM9~-9(rKD7BxL~Y}ccuKcK!sfN9C7XPp2 zdAy_8#));O3Ku;Tc|+D+YfP;&h@iU>PogejE4L@Q_ZCk?bTV&U>C;^u4*y=gcmX)1<9IG#L#>56u)F&@0@gH;Z5bHbOh8pMS&<|h1P zM)vw#>F=AE(Ob-_Pb=c2_-x+d4>pVl=8 zml$p8!e?}lq3xR-A~e*`q0$thybB3dAS!CJA`)Y9o-&=e+x5=a0lVKGzaxC)ohaM~B&MJw z0j8VVcNJOyG&BkQtIGSG@)~XC8E|4M=_YVy}ceWU zvIoSV3y6L_$zX}_JR6iATEdsuZSEjr<`NwY zO4;+A2e}~jFLh?Tet6R-EFbbDubdi<%ksME545iq7++e*jqB1>Q2u**{G z=Se3QXTulX-ltb)nb<-#wtTyS6YFD>U%%t4Wr{R4K_&}4(hG|f$1e!Zm{dLWRBSo) zryBUKy{w&|Z(DwN-E7l!zIXUJUddhWgQrlGi&LK0vz=EhKgO;_s(mt!1=(s8)`35- z^$W*ciWT;ptOHe?-PVLH=Kq?~~T&(uwXls_U+eEm@Sm1x>Zi znxmWr&YUfG{$w^YJ!m2A-Mzh*@V`I?L%)L1F|`-_cb(78N{1#Z2K9HGixs>}N8>Vf zn(83-_#%^@5C>1%-?kaITx&lZ)C5}67COv+*;J912giouHaa$82N(x{$)1H>=@=@* zp_vn{+)qDYOdEg$%|zz)Va)v`suYDO>UJq&blvW8uPt-;ceDg=tZ?@`>rUs1Krlh) zf$hsOdO&ifhU-$z(s>lAI44!|0SiRws=+So*mrn1_X?Q!nOep~_}(V!S@y{`6Gjx~ z*B=EAJ^~u7Us`|swCv({&!0!?3eH(fcgdZ%;4|)+SudwL4fHrTOa_%1Nd)D>cLMzI zJJchjzis&C*z|V%c}BgdnvkG$(umDqRVq5g3cA+hvLq+TrEgS2j_(5j`m?8-4Tn_w zz1tnd#o>0#FVrU$m-OebIDQstLT8ty2_Up*YEgsV^Z=4%(u2VE1W}Ze@!>zGpVYWw z(csfqG+i=#NH+0vO=VyI=)GD$>}Pv1zoI=Wa0N(>=b}36Y~>p`87#Ncv`si}3xtP?rir3P%}e-@e6>Sj~S}A zRmS*yTFblea~#gE06`qCB^v4(R__J%#kOHp>AR_)F@~f$2aQja)lKj!+u2#6d?%W* zCU(QX!pyF261n!0mJZq;tNl(id-QYGH_;_|CEmV^#@F-;dbu=(lX9bNXHrI!3olbR zrL=RbG&7KVq0@NGdr|!b%MTwHH&11n+F-YSAv}1rf#>YQ>v;R2Q1>2KI{({TPMA>&AAypr8AxwWb8=n6lnwTS?-g?OElJF&vLOSswBmnU1s zfjIa3Rc5~OVNmTw&ec5O{G#j0q}P=0wt~>%uZmI&6C*^9#yeMwrYRfErl*7KtzsJY zV_X|Ht_|;-T1Zq5n)cz5pb-VWj~(CBr0eMohumV6N%3SX1Q9JP zd~g#x{WcXAl!```iJX#}1edOBLAzuSaE$PhBs@wXc$@RVCeF4W2QMx<8 z+_b6DbU12gCZ8hlOk~nP;k2n_*wvv ztm0G7<74lr22MPEC0t`ub6zfpKc?+iZqJhVVo@yCQ8EaFX>n;PW(zKUsliq_@}18~ zd{XO+txIxtHfbB1-IV*eIbC(E^7_(_Pk!=;6hgj!lSL09RGxUdW_%*qfG=s7X3J3F z^_bAMZe?8D`Lr9s_T1e2O!uJL1ocf7X49;ZMFNM=w35oMMeW4-dD7iS%b}ETS^Wvy z(ZLbZF7d@6Y&NYQ^xZukqczjO!?nu1PRo*~+2^32L<{#qqe9%=qN4zzG=1isGt(yU zRT7Y=^D_(NmDoG!&7!h`&bVd3iU&^+?wQmoZbiK3o3Un&!f?_2fn9i|w+b@~c;ZR6 zPe39B++gd~(-KMOf76ZNoonW|53KgSJ+nzuQ|{txgKzNOkQ?JM^7yuLsw#c2ajt-{ zvuY0^=gRXrT6CXN_C$*i7s_-g!m3p7-QP?s;nO>k4N93eN?ReM);OFbYKwrAVoR3` z$ITT)Hv&A7OX^3R8k7ph%BZdTjMz$R(W*u(Zu|L>X-b{YB*2{~M0HA&#Y1x8G@`Od z)LUFog-_*G*jx+b5;1h)myiz$DW7eB%kXn*nwf;F(zlpwVjHpfJ!1{v1PdWM%ImE8 zf}uM?nok(YI;rySD3Q?D_g*Q@tOf3^{3Hl)(%!Q`Ae|Yu`7uAlTRj^VhGunsm4nR& zPPfrytE^z(7A5_he)d3j*YGeVYjHbEzB~Ouep@IiztyBO!_m=1KfLqWo9F{fJvf5P z!X8f+Nytfrta8Jnbe-mlgL$#6%{jn8k1Q*PTr>w9Bhr%c@YTB9j;Jz{f&xbzt=mr$s{;suD`L%^uriBA!oD#eaROi#$NEc;T_V{fqn za5^O7W#8sUlB+W?N*?%4vkt_ZG2QFm1=g}+t3Q3G#Po}Uy}NleMXJ=7xCtC08e%vI zI}5DjY^TA$oal_J3A}I=yCk3Y>UdR z?-&FMd3Qr;<4xyA5_a}h7x|@%cc944V%PPA&%_BL=9u{PIPYbe+XQP>rrHQT%{-}t*yJEx=G zUx{^AMcabUHW*yJNa2L(1$bP^x-R%e7HxYCaD8bX9w+7LAewy$D%rT~%X^ zcLaas2vD=rDy$d=fZU#0#{8Qbz2`1t)4{Ki%DQ&)wzn%PfvzKn;O6AIf}diP~u%kar$NDKPLUzH;KXAGOaf7(Q}% zH{@xIhyfZauIFHGHR{(oQs=Sqz)1}5#NB*0ahtwdCf3D;OJjz-OR%XT;F&kEM}qmr*k{`gnWjbacnii=qzC__t}73RvVGg6 zdXo^z781%fgE1J!lB{{{OUT&wb;vUI7WIZ=dL{eFPPReDHb{xcZpPLiBuf|}Teg0; z`hLgpeSds^&2c=>aox-FT-SA<=XqVnGl-`vn1$)FY!%u(tQ=2+vZMDWNUb1ME|MyD zVbq6XvHxw+<7DcZ*JX_oJ7at?a4kNMToOe8n*Eah>--EPG@Y~pP!=zeo{-P97A<<3 zUyX2zao0dBbRjGY%A3V^Niwlkv!?rC&w0B+sHV5Etfd(K2~FQUFV)ztxUiUsYMR1@ zv=p;PTCKcFu_Sv8DHdnfbYgWPPf(NBG5c!ChbJPNUBy|G7~k>=l&$-Q=eCx*_C~G^ zv+Hh2u|-baTM`5hL}U(`@T0wq`N5jZMHszyGvwUnmOm?fx9c4mQfN(gD&0HCIVFH6 zOSboZgD_<|hOQU;?(8?GZZkSkf28DNmaso;*H^4OXL!HUG;{PdpIEVTRm%&6nIS8w{h(JdHr{Jrsq zx({a)bNpw#L6H5Mvacy~I2NFt*_M$&l#zd&F1yoy=&k7dpjxj@NQkabhRQ8(u<&+v zW57r+`wYLBBi~C@`k49qn3aMJu_KJG19LEI0wawD+g$zC=3qW*;YCV3CLnH-KpAwr z#166LnU$O?ZvCERbN>3+Xg8MI+r5)Nla87k_stz4P8Zp{JGy4w69>OEyY+R3G$7B^ zvVCJyG3I+}%Kj1WRP(ovmyfc5GhszL);dp0FvbhxVu`%A&ZT~4b;__)PdT(N1K2ft zWJtRG%W}ni1I_~Q4t7;J z$?BSD;-yXJ{fgr;#W;`AqwbqeoyP6AUmv_t%C0EcUE-~V2n?jST{S(u2g?~{d+^3K z;w67QRH9Rh{7iX_0KcCMnnE_2oI;#W9GPC8Ax)<)?NpScMuW?P$g|)ExleUsXb{sr zl*ZXV@p?DvU8`tse%_;E`PkbHri!YG+DH-<)#^2eH5}TviRs}xB}7{|@-y=}UNQ+c zRa~5Drxsn7Lw=%&oJ*pNit6>nHO!7la*5|>-PK#&5z}%Ga3>)RuiNKOmQ|wwBSud& zDJW1S z@aYo>$o&yhP_6fc(PqspyDcADB0*;PaXEkE?#1Y%sq2Oz@tJ)wFq^hl+#Uq}fziA~ zgAjGAMH(^`g%{^&z)$RYSwPPzty-iZe$9|bV+H4*(7{G8n&4F}QNKRBG$o z^x4~7w=gnc@|dD=AF&nhI*#pCO}Ejns=a^`->J|E>|>$MH|v1$}6e4jP1c)&W+#vzkeD-1B4y2JXE~6tXNxw zp`p5^KD{l+uz9^BC9rC|+>guu^=G}^9S}!XZAc@VPW>;!!@1>@^NA8|!Cg055_{G) zGc|IH!x`uf)iRNb`|!YgCmRP@E6LjNYiq|$a%Lq;k;l%)2YU9Rz*PhS@lET9*k1FI zci*fP<+eGY~~U!K=?Q(O|&pn4*dE2}3zBZI_Tzk8Ml%$q_q0SE5=)Hl>8 zS0KO%$6ULXHhV!RlUOG*P=T|{8P{*XS0dLxL6~@RtitaF`FEiGr;Y1&i|etznwssr z0$TCCVhbsk8il)@!#@q9y@DX%@HslvYAj$6dGoKk$8$|beaS0gOZxrlxm20^s*MW$ z=(<+Eh~yxMN3t}W?hxf*NOG|+Zn5LCzP&APZz6f8P`Ho}BdSIB92>YR6`EaOc*U`# z@Vu<}c^xtYKP6uiaJx#ieQED*-i+^} z*b~AA*Sw=llRv2Mz_{&GRPDYSgaIav&afvJppDDXOr;(_e{7#o`a6<1>N|Jf?|qcs zPx7B5`UeFs5i(>krKRQVSn7BlGsS0b!nX^|_103Mg(v8#K=%g<6Hkl^p;lk9W{~$8 zpjMIGgY)DXRN)r=-;Kk=BOeI*J`aw%9;G$)e8q&Y0FGa{zt0q%RoUb%I5S-P%BfpZ zZ!)*Yn{DxW!SX(sa*%MK(A`}SENr9DGO@uT29ihWwDvKf7=HPWPe5^1-fPofNI#dq z;n*NL>@S{UWWdoV8o7qmu6^(Mb?Syt;wy-xVZHD052M zwl;!3Gy>b`ePK~iho{&@>AZOjv|KyOO(BM^;(y zig6(1?~as=3|scvdu0d3SLeK}j&M1&rIRa#JNslbAtQ68aD}1l9A~i7>q(=kF|Y&O zt~z`r#VF^-F&hNVp-Xp|u;!_bXl4dm&yN6CP-is8V>sXLZgno9zb-H?Yup1wu)E8m z?$yY@EJM~75_f<0TIew=wJmVlWc^dkpeLEdl}S(4Y8*t5obok46{odDXZ}QuUs~P$ zgI?^Xx!g${@8YxQU+2IpPc>12yJ=kWt0+Ge=K^X$+m`YqOEB=jCsx>V56sVF_g)eK zvK1H~qqJdBNUzpEA&m<=V0`aJ0;kCC?Kwkz26R~^ioCJlj`-|LCZWwoiWjGPtXa3J zyRy<*Lwt|YnJaGnVkWQt1&`#U7GBosk7>Ig<$8KAXDwKtMGvfEnzDfMZeZ3jFqh}i zGB-EBaQ`$-_+trccd6y~;rN{|fG_g?@~b=N#|GrPbwCZXp)Pr+X@9^Ilih)Kmb1QT zpl(_aHlOV9>|y|R_T5^Le%-5{oXR^u67Dm z`>0F0Y9#cxGP3k}ik`)DSlZ{9Q+4+ulQ*+F1Ap=aI|wyVUSIVFNLWB}xh~yQS6JLa z*(NS|W0W);Bh)V}yYUq%}{9S|gDE-hcq!I5aV+%;0nEUB!eueE*yp zfCX`r>TkLnScwX-V*r%XdFaqp>#cjunIv*p6>o1O)FZ`s1y!RyVtBlExY$6}A6=NC=zh_k*5`3%EgOJxEVSA%Ent7IW*dL^J2*LvD?N@bgn_I+Nv?Q=r4*eZ_IpFCt9gjAHOm z*o#3dlhH{&|8KTWNwNuk1rVPmiE1hy$TpFP2o^!RsKN+JbvD%^GpIm&+!K&C0Bi z5?v=~8ma|uRPwqC;XDz7oidG+Ny85%b1nm)C}EemP>>Ut7C4m1r`fVCqVxMQr z_QT}NU?)Ua5D+ zRb}%e{pj`y$aXEk8hv~ppv!B)u2v}iy9gostrUcyeZl#0g8qY|GRVw*EajJ=2D8=p z_#J|izc{TXTt~C5XJ|QoP7wR;Dvp0+Bz=^4StIVfY303=SdKkgHKQp1s-MP!{5?}mou_;8{Niz3`%1vr&2jz z#Ol=O_7ZG`4HN7vjk3iwkAUI@wwl*(VP^KK`=Dr@*oOyx<>`N2&&H=*jHQbx)OJNc zD>QL@-Sw8On?=XMBVzunAUVmb?}|X1-sV_V5Ft-ci>-vJ4Myd_C(X^3tX6cZtHVZP zz2b?h<)%fBc$BemPKl(Ojltb}tBnG+?69tk2`r*?1KEgF5gxkAd--?i=}uG3l{T#R zw*Iv^BSC(0!|G8~RO+%hSQpXY=e?cM1Eum&4;!X-;t+GW+`o2i7MDaSs=dPdz7mCg zw~O&$N78tpqu@+)CxU-Ev?f1C{x1V5GRDfszLsj4vbV1X)iDFBoiYvF!6}sBL+c4L z^T?Vn2FpP~H8rr-A2TGAk%AA)XLK%3_QmQvmadLVJ_jqdF48U(D(73iYaK^zhv z$5NJmSK)5D!iN=Lw%;!QsmWbwlL(nJd~w{)`#+6yF;EtP5B(+o?U&=Z&QFc~Q*C>6 zzG&?pZF^wS1Ycb1Iw&|_4b-#@ban`IK|1-nfFBxZDVQu2CJlwb%wPzl3>+jWaVZ#5 zN@_LXsmK2@@bPu_a1H(c4YGRR!@xk~cm_XbSL96(6PExr-ypQJ2ii@|H-rYRAT1#c zmkWVQ!6c*+vf@(WQr>mn9s?V;|Js6SqG^mYw4g8vS^iVj3<}^rjgF>)242lR F>ObOYUFrY; literal 0 HcmV?d00001 diff --git a/doc/doxygen/chapters/images/starpu_chol_model_11_type.png b/doc/doxygen/chapters/images/starpu_chol_model_11_type.png new file mode 100644 index 0000000000000000000000000000000000000000..ba9323a1ff21529fcc2601949d7be7128158c16c GIT binary patch literal 5871 zcmZ`-2|UyP|KH}Cdm&1WrBb=C5Hph`k<;aGFD3#o@btySk^%~p?w4QNlHp;YHC_oSa^DR;&8aEtgN!KG75##-rhbr zISGcKYiK-Ncvw6*MYp5V0AOJ(2B@efEi3>75HK3V9el>gCCRBIIk2yj_U;V;r9dDZ z@tv~}mP7#3*$s%L0yt|r9uJs6w(vnS>EZOLRd@N<)<%hVB=8Ed5VP~^N{bLTaGT&v}Vt=K>i?t4Z20=ml%$zcZ8iv#NACaFG zA5VwnUAyge{Hv$xjs33KAwa=aT%WSBwfQ4$TyK8rY0cDQvoj-~R+bK2=L)t}M`ccK z3sr|tJIhplD?VQ=){C({mX`MQN|N+idMU~u8}Y=$)FyOx`$Uglwbt6*-&A=VE_>?( zZ&U#vmD#a;s_2tmOJQhf*yIV9jl{5Wd%!%Lvg2W=Tob=?CVBPtV8ioj^gHd@wLC4f zSy`0cHLTks57`Pw@APp}xUtEYuXN^v3y`kNmzyaB&k&FC5U%A7uV+y&!%8#x2Fbp< z$jDzW$Ro;1*|Qw8`K>-bQ?+tlj|@1vA7{=^@y`Rq;Z?cC2l$+Pv@+hyeBR~UMMt~* zkz*k|XFPz@+PmN3hNfHI&hiOgaMaS_T+l*CecbYRGnrx_1%6uAR10ZHFsDTScf{km z)ESonpd($n^YsCJ@5|kzr978>P+5H5k0!6&mUj)IjR<6IJxHlj6|?BTI@M?yd_2<; zkWdjQFsM@&B~ZoM5g<_2drM>YH@P;nJ1-J=JUFR5#)FHC-X}EGi83(nfBYp znqSVP2(%4ED7XTp&x+RYtD1b{KF_c-Jh=o+*}oi^4SLr{Rxqy~rj6JHpszwvwhs9Q z48KCMMq|Huh53U%`~0!cU(k4Qv|?Dg1xJU=Pg|&Rsju7CfyNz3S7b%5jy`n;MN=!_ zq-B~J{atO%_`;4R@*-TN8xY`E=x*<3%Y+#Nk*#t zB|g}RZ?{#`&KhC%wExh-n<5Y~r3Z-7@jtMXJ~xVq%xq%TW~uimV}kUff>af#vA~d> z*7y5)Kk%!!7*nA?Z#cqr=?E+@(U=nD=AkLFBZW+?`M$F}qu!Qf<$O1E^!er8*Dla}K<$Rs)dH?EwH`=4o>4^9$GHBeRnpWxJ10tZpb= zC5aDP35czol|O9$eZIEMrk$B*M(Fc+lQlN~hO9lam7aH!8T)W`Y}ETzHDGmnrfZ_f zXwAfn>f>bQ_`v~dX_#?dMlW~5#bo|fMg%k?b#i+?SzPDTRaO5Holhd!<_n=(qc6N~ zpUq)9N2a_?bf)Cbjd^(O6D=gHHyLfa6qhUn*lc#K2yTb`o;6+BK+=MOLc;Z59L%C> z`h+$=AG$Nk@H1^RxYu9##J3=Hs@pr^66J#^iTOQfzR75+w=2^<^dfP_;z6l1)Q}7Y z)<~7t=9JP=uZ`{*Hk5_d2Cu=@EV3_>zKdvYUoadS(`K}mc4wP;R&r(U9J7aYx}o`d z1sC~rqi#4;G;_nqs}Fb4d%C@c(+r9YFRJvoNBt!JuuC`YIIJUnb#M;qApB3h0d210 z)b_Q36lk*rcqV{dZ(hZ}7e0~ALGm=c6uRN^bX@n%X5Xl7X%xd_s*+2RN-90htu&HB z%2n&GtVD6=YL(0A?GBKtjtpByLgT_Iibi2LrHsQvHQut4RNXhj#u+m;)%$IZC&$E( zz}7YtelJL#7tC*)TAyhXl@E_&rTNvpvGAEKl=|W#vm`J3z2!e|i$%-v&OC!RqDg*( z`qhEf1{3yALb`PFc%;U}y|slMhce3TRV_33h-G(iWY-mR-D%gj(a-T<3hH3haSLI? z?`>r}F+ZeDUW{ptP>F8Mnb1wyC@lI|sr9Wt*3UvJO{Z}w;LVPC$>4bRb-zDJ{wlul zZ$JHleMa-$owhmp>5L3*Z(2j)KhV7Y`(ie^x&DBjhr?G6)QfYs(ohslg}DE^I3Vs9 z>_V7nq5c#-ab@mZT)&qA#7S> zVGW(6^U#__@>VOm>Jk4(C#=W#Oo=s16JLAHM>n!u=-*|!SMfMPq~H!KvN3+vZPd*1 zpx#^7UBy+G?Y39Y>bW(W2)9mLd8Kzh(q{U#Ehx2NC4=d@vA64dIZGS76P)9zAJHr) z2einV`Pa={PNX%=M#_smk!=dM?5odPc&Pm&Og?k~(*T8jAQpMi)|O`5d#S48g2ixTNR>1b07 zQy+<$NDA^O+z@-%NH%T|+u09$PF06b3o*+eQ0MxE0lvM^`<-@@QLrs((UR$nMd(tO z2q*J+a5HQ0XC^}K z8fVm#SEiphA4-{}Q4g^S&>VuGi?E$*Fm4L6(hc0rtVv*PuY&J5o4lgyH9k|MQB7{N zmOO4ghM;S($s&xK(rhvp^AC&-pv*tqY#_t@!}k|(GC=^|e^xO#Dv^~^XABMtjzSvB z611>EXEYAWzAZbW@qdA3G>-b$g2x$RSRKiQ08#9QzT*L+V!$*!4_-H-$g({a)sw00 z!SJmo%d)GbR8PkK1xA#=V2(m2uq{OtGL;Q{DP&o8c%ccof5>2t3Sm+)_&Ps)d{E+@Irr0oYqMkYN5X_zO6h ze}OPF7;J#u@*otr#MT>%!AP)XGvzwHi1MXv(Gt(z|xEny&7< zoAX92tLctBK<;H{#}4V4OGNLnaC(((`Y4-Ydm3 zem&HL95>Q1-qU1{LqfJeK^h4V94Vzy7-h){`&@{!Z)Xm|BGLWFqs86xv(9+J7S-m2m(O;ctvM@Vv=ur z`ksoKqZkqxMc*$4vv~vzq#DQw$Bf#=4Zjf%IoCZPC&gePyikS7f5&6O za6Ij1t)4iHyP<;qrKV29G`kk;C!EP2w_ScYcl^lFcX)tMr)!>*)1~4 zJAjlX9uyXNC}jQUFmKBq$A_l8@Ng7(@TYQu0#bG%04*a>fGy`|LmL@6uG%?jf*9W-3Z`G;8EgU|9U0a1iT<9D(FV zBk&d(UCRZqMXpaqPh*-W>OuRU?^P{XE+jw~=lsBid}%cRC(`@TE_z{?J3{T8-!eE$ zU)rQWV*;ylf*{CD#Del9L3t-t!5tQJ;N<=T_U}*-=q2=rI0ATn_$mmaK+hsz({6Ie zfwl#FGX9s9fH8%UxEYCI)^K%$rPzRgWAPpYI{VK-mljn%A4TqrakD(LK>QxA&*Xiq z8HIyDe-{DtrO0yn6a=bQL=k`=H&|;q5Gj= z?w_ct-2=*jNY}i{j;pKRJ9VWqI(U#8e;CEip;nX3@W0 z+sSELp=_v?(Y*rm8-JjezQGU0N!=WLnN^%Q8C(;lB*VxtygK(oJY_3kpCQw3^hb!p z&j&zs-B0cD%*8-C*8!*;5+)O5fw^^rEJ5#4M^Z>fdVUWSW)>&QnqUG>54AJ;r1SO` z3#7;)s}6OU>kD6RQY3JB)kOX0D=uEyzN@EFAS)6|^KHLOw`}n&vS>#!&bKSw`Zcz% zj%P{R>3!O-(9z$oxwS{ys8de~RF~9Ws7`}fubGiJ^Eya?_LSPK!|s~>@}4r03ZCMh zwiaIX_P5^5enKjU)()I^`}MQuc~Rt9tLDkHa-WUpV$*&wApg-G_a$+sW-0E)JHAcB zpW9=tzO={0-}L{k*?A$NHv8AW%4lNYSFaO;pDj8Q^mYxbW|Pu%`i@JW_xTz3?-cZ_ zYu8`gAMrk3U9o;$y^S{6>DoVh<)-&u2iuUm*IA!kE-JtFAXcK)>&Ch}Iv}$Yd<4Q@j5{+>l z=>4x6XjjNMrIRZDoc>k(s&8D`xhwc^iS5jd(FLUY;gX#c5my;{gVUDrX@!tFw==%o#{hIg!SjdMjq zUm##X1IGOuSN|Lvf!$@u2#uyDdJL`Grj{e9C+|e`g!j1sFSo-(o!sZk3&JORwiG2j zZ+WhN%w8UgGI$=|7s0&FH*z+*-quIy?5uy3onc3-elhugy#TDA3c02NQM;Xwz2>KeR5%&0XHYel?{yq#tVz=ymB(%G)doQ6#ughROC#D_G*) z=5EGPwYEvwxlgS@JzeGJvNkohFPLWG9zbhIfxR2`>TPb6DF;8#W_&@GVK3?a$Xkb` zNHLywXASu}%Rba9*{xNYQ-=cf|Jdp|8s>Sb%N*;VDal1fkKZS~g}v*%CG+l_{56&K z$Ga!5X&u~b=U8VkQiWIWW6!X4p?16fU0n#Is?*TK*g`~PWcC`%YGz`2vgCwo%>MvL C5-FJg literal 0 HcmV?d00001 diff --git a/doc/doxygen/chapters/images/starpu_gflops_non_linear_memset_regression_based_energy.png b/doc/doxygen/chapters/images/starpu_gflops_non_linear_memset_regression_based_energy.png new file mode 100644 index 0000000000000000000000000000000000000000..4dbc5594965108ed854d8724f409d652c8ba1f59 GIT binary patch literal 6958 zcmb7o2UJtt(r8XdAT$ZR2?<3Jkt+40g(@mlL3$IBDpi_N4n;w_0#c-d6a__4Kxsih zq=WPxrGtdfJNe^x|NHK`@7=ZTdS~r(X3xx-vS;r-YtMeBuX~M_ih~LQfzV#pQojX( zKtTuuz#+&;6e!)55fUP+uWhJ7BoaxK+}zwh01JVDL@a~|!Q-(Q$oKCM6{nv>EE)o0 zupm*)28@9~Fd&2o5<`eXuPged)zkfd@BqTjOy{f7Ti^X<# zcP}g~kW3IHBogt7{#7~D2tDE2B@x)OG$r|Jy=^Mfu!z&VF{5KxM&h9~S z1p?`b>|KNWaghk=9Uw9#5kvF|AV}1J9D!adFCP&WV;gE*YM)Z??oQ=`XyR)KJ`A4> z8Jy0YPSnR3xrl;H5Ri!p#E=>Y+F(H&BKXID3^A9~Y>2rYL@bs_8V?cMQX&aXF+uFP@Jr<_8#`xyV$iUX5=Z?L!>n}th<|9> zBXksh7fH+i_26ie^NpG~c``a~K3H&#vLL!$HXs+k>!=7M=_Od^#xUfybMw~nQE5EF(>$LZ3`XaDB8DyUH)lUGE z!(=|fzjHGKOH)g_k})$9R$LJ{+HKu4XQ<##{) z2)HN4v#J^LD4^yoUUyHE%C{xkd&OJWOGc-mrGAFmac_cY0&j*{TxL?`N}*2ip1b!O zzy8Cq&}jR`4y7FX_o&L*jm9`l7pD+WW~-?B^kkomIz^qMD_=CYn=CD5Zx@+q#q>1Q zT^l@=PjW9RlLt6=)f;mD-h$uCz{295a3K`!PHpw>+a;U}hWeatAfdE;Ph0^spySU;en|X-wu9 z_ixg-*rcD*UC;XI{Nlbmqgc+xR4I+ouSNwz!n?&Mi5#Y31_Q?p)5WQ4r&TYO=YCV4 z$}bbwY1{WH$7WX_98$lpcCINKoLYBU?VBn2zP+uSe0^f3F%SL{>Pvj#_1fs&+4WL7 zy|X-DXJ=!~<1xZGYXitBNW*y>5O8{jaJ=U({GHgmF!cbl6W z{nu^B#@+$j`1(|CfHTQ%K%hW0rMo%)Z}s+$YmH_`7pF?^Nz_yiK>xCXNdU)_=45-6 zEJs5$_F0Q=dP!7iOaZ}XkrF;VBl2c8KH>&O)12nDX`~8r;^RJ$uN3^m!4wVj< zZl^8^9bwI|7FOBakD8s|Pu(^rt8@|_3*PfG9(?`jdDygLkAJn?@SG$o{G*`c8#Sl5 ziUQ&V6Sd2yR;KKwGlY8nAk|*#Ev_`0Znq>*$8n09n|jR8_JL{8=lx&YpY7FYDUFGq2>Kr)qNg=xzV|1hEQ z88{c)Gn3EoHNG|O#$5&p(PeQN^R|h=V_3KQ&&D+;fkM>tkM>y$N|{9lGut(jti8#l zF4bQ*qP}{6wv#XJGF&ikZ!U0pI*>YvOLiI(3X{3}s*BUMYsh-(pV^@^9E@n_I+lE1HWqD?@KaeqEFse*Dtyl%)ekvwSwcAgCte?AeC=B>((k`<<4-M4u+;Q)RadM!L?U z&y}nS9dbCf&r0?K@BIpt+Bc&!@7wFunJrgrh31F2SqlSVX3ZMPr|vgj=&^`O0n?}? zyJ3lI_gC_#EVUmdhnWTrJri;D65$pFBfr+(`ZI9Ru2nW>n}oEB#Q0R4_T>(S!1){ ztCzKliq6;~(x}HMN^H{xKD-%Rr)jxz<)JeR%7^4V#u*biN#*!7WMLz?t!E%Qv*mK3 zuf6W^m#<|p&M%?sk9u^PrdAz20z`7xUG*sPee6ekk2CUJt5#ME?qHIzNX1n+UvA3- zxTGIPvcYRN*tNf>E;2=Kdq7y?>bbEyr=6SmuEQA{Rpy?992FcdnYg$nW!5x?*RQy8 zFX|UF{tysTcaJ9vR+UE8Fnb7e`ANUr*u~Sy2C9Y2^W0Rh#+DDO+EJOH>hnA$@nrwR zOB`B$7rR&3_LjqUblZ=z6bCa}NF5&ZhF>|n|S%c=eE$cy*#&}=ELwZ6kcDYq*)+9JD-Q7(W_r#<7HnrHt` zC8e1EEZ}bU`17#2@b_r&eM=K=r;+a+8892^*KwM4&4PRzux{Toj-6V%sg*`|pj2r7 z3VPp9Jn`az{mIIRPmk#7gyng4tPD?zM9|cQy0c6x0xgp|c2ul(}>i&od;h|VJa$r#tWmAYi_^W5FX%^8FIxeY6yI{@3LAs*t zjhu@<*W<&%K8>2htRTHRNC0(n!+I80cTTB86Bc;+Ta${6IXy2fXgi2@q)GgSDy-hU z(LE2vvstoN;YJBndSOo-!nZF+f_?Xg;sqGRWHb0^Lgv_@X?yf*Kyl|yaNA_K` z4xxqZBBkr4Jti`Gg6+wZ!$?r3XMZJ4Fn=;wu@wUgWXn5KY$HLIN9*C}8YCh9P$~0_ zeoL%|+ZGK~ifJ0y38&4r`MYab&`l>s^0aNX@z@pNUl)k#o$W0Z*_S~AIjz>!e)+@e zwCY%a&6)y=d=>1gg=5FFC-EwE;>F4Q+=v6Vvs3w<+=Cv{FOlf?r%1x#p%QcHKy!)& z{sBonFukkz#jvYfK#LF8aOex7Kl5yXPPWslNBPkEmNGfu-r5=ES-AU^?&gkYul@XaSUr7Ljz|6ODxIn1 zI#L4$Y_xPzoZqBoOxCWm_{)ceo;<=OM{1C_L^wGBrcPmU2Dpj%U#)nHAdWB{# z1O<931If>h-S;hdahF|C4=sS8xw!0C;8A~x++$h2h|2X~!v{@92$;pv7b>x?Z+;dh zim@Ta%YONYxWpcJtJQL|c{%7iPfNIL-+E3=)zwGU+`5c;cgo!dYMOPDcyO0f?||7; znzPcs9u)TdWC%5|0YJZfLpb`Oaa0|rueKio?OBPg7jtnj9OvCG z;ozAg;rj;AZ#1z-g8YVG~Wh9bs=nb>Jnkx413zHJ_w3fEL=PfC!SS^1cu=%^|4te80VNSB z#{R1B6piEDZeTV95oru&xL8kYav9YDEs>2vo08dvWDR+egtn1DlC*c=s7T+^T&SUI zjQ+yzqPWBSv2O{48Gyc2ZB0c2^!oI?s<%Jh`kFtkB&jglmp~SLb}^IRBD|!K`w;ymZ+qqG4ntl9blvDulh#m*U+@6+x9f| zv9pP&_PC~Lm3_p{^8LPJA=q@JgUG;LDhs+gx`bd*30|@vmww>m*(eGJxWd#gy(+`M zX}Xv%gq$M06ln)!?#E{{_N9yR=vj;R6|-Ku=zh zMl>!$jDG7D!hj`p=~$^A8a&n{M(_1})F_}aXefB~ zAU{KB6LH{44AUrCt7T+Rl0zkubd`{Qb`G+$B$DD^w96DYOLy{sSc##hdOk`^HqaCX zao;%q_9vNC3G}CVfF%l(P&NZBCU!tXX3;0kJ7LlJkyH0!1ws2`Ff>u3MBS}hw z{R8Wk3gD0mqAX$`=%yjcSK&3Jp#3%Pexs-z-kSJ{1{L|Znf0erC}Z-kU^Grj4AFqWFqQ>xJxFg-f!AV6 zzzj z(9q>=6)ZE7@X(*dEe=i3p(t>1HI+Gd>4WxVtbLf|rTOhNFA?olrSI)77}!VBj*6)^ zXqQrW82~#mzmUtC`D)JVQ8NVwYXof+C=_p(iY3ES5JxB*7;i;zofn@?1;Mq55s<+d zdn!g=)m)7z4`M2fJFr-cgtscElD59PjR@~KSg<3l=U>_g}l7T{7zh;?W3bZg90MH{N zs$#I-X~LA{q=kX-XZeU#LE}sbH8{m$aix4vHtqV%YvLTlS;3U8h8L2m(g-fF}rXOEc1@lTvONkOKV6Z3gqpz62KA08RNrv|T0sDfp>2da^R zlw~nQ*>N8l?e{K84_+I7oBz_nT{mC0Z{r8*v{50CKf9uj7@XNGE!)#)t*uI@Ok(72STSu$g{}`jtsw|DO1N!y8({NA#1b?D!0txOE2sRXK3V>=-5cfcEKn1Ia zws9r6vm)3yVN-DEH*&zQIV79aIwr?1zJ!*e0?+NtyuB){LTn;z>!1zPX7HZ6k>! zAP{V14cwCC+z_AuyU)u72j;LfVdr80!kBR?NHMBFp&kRI779prS%O?#q@OnEB(*UR_?{ZQG z$MpTEXQ24lYQ(*(0Knu4>+V$WerntOv`!`=g&CXK6Y28)DNnbN9SYDCbGBYqU0Td~|$@`Fk&+2BjPqTSt*n)8Z;jfnUazo1ddPc~!e`>I1b?rZ^T<(59{*itX;2_{ zt}Z+*Kegb_SF*6hfyZHi0h71KeNk4XhrPE}+{4ItuM$7a`QEGQ{~f75vf4H%CoR%o zjWyMG+zAHA!s-Wn3Oo+K15e%; zrm_w*-`Es??)eOj`;gne{G?=;l(pR-di1N#EE-a6Z^Cn4J7|1cpaA@QRPU?g4IIn! z04g)fIe+n_E2fk;Jm3s`na#|eih`Z*JDuQSVryXf#L>t;A9IdwYpHzBD5P@lLhUYh z4g-6tHEc=-sYpu|YZYxqJ4>^r5lL~ljaP1dr)QdS-20=7cWx^Nly`5_3ICeUS}^jp zCR|}aKCC|At$z@!ii}IM!;0zif8MYCfdn11sGhtuLRjN%=9=|DdwL7gRsfZp zxcdAfAGDmGr2tjMQ?C-?wX)^F+b=)aCk=bOdre>Q_>+^yLjA4JoYD9(O(nLG_U}&K z0RpxkDuy7K1f~jujCob+66t#}lU;?@DOKX@LMe4T9}=#vvdMYY8iZJFedJYO6U+^+ zc$=hUJt?JOPAbz+-`wi?qw%kF31g}^Y==9(tam1Dzzc=o;s;}48<98lZkE7;-e=sC zpx0h9OqR!QMGo$1-(KBF{~>0_Hj*|BzB&34_q&k%yjAHGt=hD2Wk=eZ`lA3-e~|(g zjA@>e=Z!uo9O1^Ao5_5rr8aRsO^3y%V&uh6t-%Tx7{$mu(S zZ9_$KLX72~uR_;q=fLShDx-!u|(fQrwGE+`UMU0KuUYcXw&g7FsA0+#Oon0u=X9G&nCk z=broS_rC9q@&0)kJK1Z``J1xV+-vSN)}D!aqppC3L52YU0I-x4WwihRWEkR$^%Mm` z(Zk1>1^}KhI>^Ypaj>!i02HIr((K-(yd@quJ`>w}FNOE|!yB0-R)*lz7lzD}RpnB< zd2i&LBb$)fq9gg`66KJHjninI;&AL|L6_!3BKj1vjt$xUPPR zo8)k})zs7qyk-gY1QZ_H%#JE+yj?39QN>Ak?6%W4_-g!%chz8RH^}!;WxRv^CVAsv z%p{Ny13 zlE3tedB7D78a*=7ATq=pof#l!PA&AbQi{VKpjJqjia~Z0Q##30gwP3)RpJ zsV6HTs82(`z921@=1s)=h`lEz|AM?Wf`aDNAUQ_p>mqyRXT!n3qP#b_IvBdrUT^Tv zpL=3ABQJ-e6@BF`uycp=Y(?8`07#FfaF|FGvZr1}!pAa(XLWY^x4s2N}q z?;_h+Qd9I{B4rmuf*BEFVKK@*o=KxhVar!a;z2SM0f{B;RW;xKYVa9T58dhgMQ4P? z)Hl?5jHZ81Z>3?eroqHQOGxTuw#c=O<(5!pQFQ-E`{`1#UKvYiOl3rANm)UeSh-ON zMI?TqyA0xqwO&E@W9i53>h!AF;!8!!nsL1ZP05cd zWfC=AlTUxnf0?M@8B4Z3F%bStG@Hrzc1oU4x6bseok%|B3t^3y!9lg)=Xb@FB_>}2 z6wqgRW};?^~PFCLNmNe5N?x#|h7+4dP%nvm#jT-IawD#=4uMdIu68AsQ z7}1`gA;t=;B7!r*H)1&=h}?nvB(5?JR>N51ZQSk1(?#dpxru>wWf2W?b!_vxYa%ZG`Y5blgAZ+V>2HsFY}rsEp&GcDg>c(YYbl zw8s!1Qfe_4o))c}shgMfGuwgEF<{!zzQh^dR@?P#RBuMGyJ5mRx}+~^BX_g4mhUV6 zeErgx2S<}pQ^pZ4nCbTQEfRWVSb?8a2bEJtQ$7Ds-`YpIR|(@t})t zA74LRusB@seEb4-R&?!r?r`b5TQW|v*75^chgmE69Yq#dZ=}vuW7SlBN1fSU zHLzZK<$4&ugZj5B+?`x~3J(r%kTtfnUx&~;YWZuq)>;C+PtBv%m8GM(XJ{|XD;0mS z+N*|5&X#$7n#z#ud%zrvH6h)|n$4QYI11 z1JgV}oAIt|&J{}-@32-E>(soA9vX&@)R#04X$~V-PmJ=VO&wO2@{mMZ&InH%cQg)x zi!)j@dNj-=%kLm9m*~?t-y|^)-?o(jf9!Neb{ol|$!*ES^xdr+Ns>JnoNP?4l((TU zlrkt>T3j+5kuSK`J<3+dI;ijUBEeB8ZANRmYa7vDQMJBxF*XH9x-oAge(nBJT&48| zXtrgkt2*OWZ{lSX=RcO8ecglm;Y0j9mBSa^a*oP-AO6eTFp;w6c)z%tQ>#p?tbzPj zSFg_Rdmkz)G#kE^pG=*6+~~$%Ve-ov&Cbr=%26@=l^&a(mGYA5_`&(nL%+(!$%MS* z%PxUXn)`kqS5Zf7uDAHIM?^sB#pj!aY;Qxb1^V>23YlrrDV9TweKN5fV0GD2ZOgrW zn^{7EzJytT^y%v%x$eHZ7?lhl=a`8)L=4PuGc5a~KYu_UrDo;$VxO{U+wZ8~Yv4ft zRwy9D#b8+|Xli{MVsvPf*Qn+SI?p+8e6&3^x((iw8lbEbH35wsd*f7-whCGEd2Z$3 zw;jNJ;jTC}lvETa**!*c9KHgQ_P2H^42fE^C zV{pYTCBP-md@p@MB~MNhzDp7@ofV`MWN5Ic38;~)6%@GVLT)XWV}>$VxQK0^KcrsO zZKrKl)dej)%y3~4sI?Qdn%=Zr#f|1{xW=tsj{1&^2V`8iShW9c8@=kgU9v>eOnE&6ORKPUiNCV6u4n)HSkmLm@PX)}F z0N($bg}CGf$o&PI{Tuf8?|%yZTf<*ce^mW*{AvG#N3{RV`kOxc5A1(q{sRB)&wqmd z-Tt>`gvcN9|Ize6O8$2EU)uh5;~&=lHu1NHzkmPn_HT0t%zyC^fk_97ThepNL^dmN?On; zZ2A21+~B{Q351aenG< zJN$NQ{c53o zPzBIdkFi2%9J7-GK9|4Dle=yHXnp-%*dt9lp!V~8Wy-9Vk-q!-aOHGv$!tA2q;h@H zovqAa`MX-jL7>HwXeQ2?|HO^Z+$K*-Z|x^g|RmbRns*POh{`Dm3hwz@5^aXPP|jloc@)%AK<2O zNe~VGh*79C@BQJ>uOaVhL|zg+pjDt*MheGERlfm_8`^FEq!cn+UXj9;9h<1K7d0D? z|5Ba@qnU1Tp324~7`p6e8J0>O)c&nrDJX}#*x=D_(l3MF`W4-zR+Nl;(8Ssqsz0>c z(ZLRB9PE%gOetVL)g*fwDj##GVv3GM#2t?!WY`T_%`_0+zv9GJ}qN4A;TevVdzO2H2&YIFP-Lylu+Uai{a?MbO zW)cJ-ctOMDta6?xr#=!s_tV0sF!IO>9gtR^&U_ zu!*Ok?SM$s>}+51E~^58}Y$ zt-CH6t9}i-tMnS>K#KUAA#j+{`$2d6(W@aFVB`3^iSI3Hud=hRfP2FGuMt$@ zIb_u9oqLszo1Q8Y6dqsjAHZSTOAX)$ag2LIS@i}qM;eeG3idRT(aYpLP$Z?4k>HVH z(Yq2l_OgPVUj{Tq(0l%ni$g}DWH?%*e1Z(*c4%*9g773l=yl4H^6F)v>K*br!ws-( z;Po4sBcl~mV2SfYAW0j>RVp4o?V@Z_~XZC}WG!TFdM^RaKbdkjJn0_cO zQPFwnEgnGwq6=DEg+lw~lOoc?%Plj7ujfDZN$lf&`Yvvj=0_J<(0w0P^(7MFzQu+L z-iYPt$}hc_K#JjIjg^;|paE_390Nf|E!YVjJ`gA7<@ZRg_m@xoAyw7xlh^|4$8UDI zA`U{m+MjY*^452!xBwl6hPa_Pmj?HOdun@ zpCwOS>AYUFB&8%D-~q>vALg!0O6zo{P&7fxs?!LIU&!$gqcvpB46-XBdl2rRm zw@P|mAClWV*O5klp3qW6$E&(Rt(Xrr3K-*QK5AM>X69jm7m|edn;6f1Y>p+iQF7s_h74q6`q-APzyyX8P8BARZ`K~j^xmy9ela%N~5+p?rVJ$hlgkM>;H+r5Xb zO7Y_x7w`C|Hcj?*};fgXH7$B+BJUEXdt+ z$;W(MF@nck8$8i-m^a8%DrNl7O^e76{n$E(+f z66S+2AXM&dEE~AD8WtBRil)&zFPZ5!a9!_~?hm;his|YLuC*z)Tpst}cn_nu2&=2N z$sdmIKMlsa>_r$qe1f;T65{kp`epTEx3w%ITp5$uYpLUlw#5_mXKn@$u0E-^9TD14 zvbJ^x|34UQga3FCR&cz$eV)-mM)7$O9jHOTS&E~}P+MZu{4!&%i67sCNtYpDclijm zo9@zkzPvyx#}`EzJ(0KT8fsW)lY2tX%iGWfoS8W=rKiSQqPr}3U9CW2JnYl^pl!eh z5pZW|U_RJa7`k@_1{?$SW` zv%hZ>m$d|D(Yqotc;g_}=6I^Rf+j}#t@nI#bI%$3y9Zl4IQwOedkwp;-~sMP<*+_X z>^VBR;@GD8>^rvP-KS%^U5SI&^NQ-V#>H;*(Z#ivJh3$A!Jq+d0m`M)1+cY(ECGW1?DlU8B!77vGaxBK1TsmIrCE}H|@3bv8s&xF6 zkby~u+LoL0#z_aX5a|7_ND@Qm`Df92jZl;F45*Q}5vdRcZ9h@~@j5DH@yqDDLOE&r z5=B|sSAxmx!^_f*JA#|)$yZO+fLe#1^)4zCH+kGVQJ-;@m{J%6eq}Q$)30c4>dmD0 zNxz~X-Jcfa2giO6BDq!i`urkU6zhK0J41yjh7rvoNimXE(msiLh?y*DFfQ)nqG8$c z9Mq;Lk5NeCbxQ7c^rHsBM4hN`&Zl?-lekeMxGAi$J*iDzKQWH+dkT;S^XM*WmzB<& z+cyO=j}#5)E!TXcW9}5?Cn9A{o$1p^sJ}=vy@V@TE076)*BFqI{i9_|zSt;SGfAtF z>OEX;g^PqGqGt^~qG#hAH9jbt@pMP>s>`q*CVs2<>Dy*=^v{WKYXkaMfBA^%Pw*tB zs1h#IEW4$?@$C(y%G9!WfyP}G<^rhh6pE6Qi(X}w+oTHoGSPhY`W03EBl3W}wGo7k zUN3i-KQDf^B|7nG%qA+wDIMwikOW#FlTk%ZVoURDnDqLeAfNTj3orjP(D^w3Mfp`L z4sa*FBB9#2{;}8rSk=`4oGTA2S!F;ATgG3vNJ2y;kiNuTmX=E}Sy>Gh__KLH!M=2p zia!3+!BkE9u0AsnA&a|{PXSàRQJZt-EQzj!}W+?MSBvK*1x`k}Uh@U-~`a5%^ zH9?f(W}m#)k;{ni*5U$vyQXuvtuidoQXsur<-_1&vUavCny5kv?fXVQ_w>8l=E&Z` zq$q)j3nyxcc9+w%mL8%Z@gJ`yTm~+D6nbb;|A~N$GrhvcFo`IZ%K2jX*9(2}2>)OV zjQ#5LCLaW+k3zmI@XKrRvuYjAs?!HyO-%?F=g@7O;P`KbJ;EPnD!hKs5P1$_84}3z zDx*r!29krOQx0(g4Ji$nRVg&)$gZqJXJSRJt{R)?tl0&-@t0e9{SUd8xnmv1w(`5O z73KE@I~dsT*_Lp2Qk2P&o(i5!Oih^uu3KeLQx{}zfAj^K;SZIQKpJn1Hec_(gfTNQ zmb#r?%`!>&``>Nav;Gbo)6rV1ig0LbASG_swS1$dYQk-IbJ2;IkPwt#mZo!dbOYXS z$vLMNNPt6QVTp%LCJ8TpIc}zNk9#x#O6O=wS>xj=S3$l6sW2)etkg1ZY^*mIX^@g1 zcfgo>tQ2_@n=GN?ASR{EMMy7JAj&KRD!q}Jxjj|TeiNJ4jtmLw=MkW~@8swq`BWnX zEA)-EEg-cs4PK?>kIw<+i%Y0A!hW}ep#ddwWKBQz2ScT;xYf?4{kQeEXNL(AMeQ-0 zt5+d`x3KI#7kC9Grk`6d5lYR);PW*SPM?jOUI5Y6x+XKG)^6w%MaxBrE`$tk4oe+Y z_{Ju(KmMVVO7}@Hn9RJd)|jJz50!4?iLLq}>33JeqP4^aeGvsUYv+;M##&4KL=;v#; zc!uBL!ioe$zvvUmaRs4lVmm(1@^7O=Fxj?~`|e3lfan4?B36e5@`(P4XBE6%T73cu zTU;|ZOiyAf723q!?~5G$*>bwxkf%dDNI4x-QN8NB-~asDB?95E+vdaF(E&s?Yn>3Z z3G*|(ThYA*VxXgUo*rI~7jP|yTW(b=>cGVDRu&gNFJ@>72?02Fo5`+QdBZ~qcjWCn z8@<6Z?H{|Pd%kV=Uvg<#rLur*W+a&JaNh?%^AAS#@Xmj_# z@7qb^!)08}vLr-U@|9XuERY8D1_`s4^pUcM`+X5yuYPb11u0SVSF8Y40yTK+RYB9) zMU6PfZ>rTU+vdvJm0hLwS%95tzZj=q7>Skb5|g(a+yrg$qkK zn*Pc{q99cpqD8@<^QKOltJFpi{CT_%oL!D;_cy$yV8L|T&qo4S@{l1YrmZ3rf%iED zl?RA28iH>k^F#sK5BZ^xC} zeGxg1#sB>L+^XX~>D7b8v0v{WN-S|r&^3rC2EFwF7afTHiJPb6;-={%3B@&nE4=HG zcYKw}j7POtXSnx0i_oN9Bo|nZk^O^$gXedn3Z7BvYRV)HqA<~5e^Es9n1a}G&W`x z_sS#JCoOr<3=^SWBOkLPTE0?4*oFe$G=0SLYo$hJCK~5E=Pgp!z8=2EeP6iEilmdK zOa{|&7?#+qW`qaQlJKc*hK`V-IH$70i8LFW;mNm!Ze?CO_W-FbH^al!5cQ!iEi~S! zMQo*Ts$TD%drYZs2DkMgK==(W;&DFUCv1x zZCH2`t=^}Ocx=7niR3x^_R1ybW?xe1(h#mZzB>r0$?9wn1AQY|x!tad;JNv*LU1^& z-WMM-r)q8#(c}79;u^H$#r5KMt{`kCFFNlD5dBbSy?00@KJrPhVP>dlo;c1F*$CBFRDIYd0r%r{|{ClI3=Cb|++ z@rpy8LrlbEjWs3>dYJX`XYku^VPbN<+t61x-8K;rM6isw3z){zGq_km=@V5YMQb39ow|lgBn;FImPA=z*rtd}xu?khNLXbpMWeFcb-;$9GGi zjt?;<)r2B$(W|p&3bFN#`A7297Rbn>s;{q}<`cyfRaQG+kO| zXpY;ZPw0HJdVv(lS6~#nqN5`JYq?nlmewue)L(u%BL(Xm$QH0nJQkq={o-+KUwnx; z84AwZ!b6})HiltMzAY#aly2V(88$+s&A|Cs%ni61f9z_GQL9reF%3M-uGPLBWfC1I z>C<0B9jwxi0+e)SY<|dD4N`X9ZwjGLytS~(at+uikdtY9l`}~Ql5G^1(=53aC&U)C zAU&gI_tCcuO8p|Vz_`@Fq2w(JJ+5&38WpqkeoA-CB<5bTdR@msYO7i)w9tMnkn4R| z+-^Te?QFv=oJSk8k`@;Ncg}LTyAQ-bCnp*6oYZ9~tTM#>D-|F%f^~%q27Cj$i-^#2 z)(t7;2wI{?lLggvV9^~o7#TXR6W}sJvY+etVnCFa&vdhgY2o?SG0`&mQzAwgwR*6G zMCX!rvad{|TV1V&Kt$>w*$kxjHe4xUe<#|=&kCAZy(P97{r$RtQhA3TJ8a#OakL=< zUDqdu*%-0QCBY1JSoW0+ma*n_>b)slnzV3SepbsmXu?u+#2INKy&Ju@5;hNOE$Bkx zHK8)4PNS(?fNfqR(%PG7S}Bnar2VxT5QZXh%35d9g{IY47IiV8=Mo;;^^&FTyS_`8 zHTz}wxV`sy3r4HC%Q9siTQ3odLGIUOczoaskMmlKhUFGz6;U8KR}R_AR&p0jj+imR z<)j)@f1aRh@v@Ks=f)TVy~R@KF-Xr1J)BZ7Hu^xA5mYl}soWyBWIE_i0JCwnj4SUFA2w|CF_Q;L==hlVR!8P-PeZ;Da1ART`~ID zjVWZErA_fi-JjgPqQsosjq-ZlSM1chq8q(+`N~qwMfk3v<$VwCTfv$~MNmtnS`@eq zWi(##OKzjI&zKG)edc_f*~5-0H_u?+y+8_WnwCUt7BEvw|Mo1xUFWu7#8O_pP@RiA-}m24d8S^fg_s@&It;y8XzwZ8L3 z>4-oD=9&J;m6jI>yi;+m?P(ON)IN*tm;y{Ckuj z#&@&@EiGb!PaBWhHd?M@qyDi41VX@+Du278g zkb2=Kf;}=fE+!cb2k$fXY2 zQT%{cH&5uS1-nayDps`!imZt5v!4NfXeh&oGuyx6ZtqcUdIeEC$?ApIV1EcYSLj*O zFWKQvvEqw;?%D+8@{>j$6qigPe}!jfOLQ!vOWmLL^hR6(HVo)jwGjgE@5wsFXQ<{_ z=;T^2Ws48uelCsdmd<`I6ZCRLK)|BeHA2_DP5s#k%}aQz0X@*vKN54c*9xFgJd3xV z4#`x|u<$2AF~F;jk2MwzaF84C^0W&+PA*RbOW3+_U5Hs$Tlteb_s@{+t5t^lwsDa& zJZI_u@GjK%EdzU$Ap>IwF8*G=B?5v_5txgL)W>LX7Jp*FZmu-F=#G2R_~Tw*ji~>_ zw@#3f&R_tzd}MjxT(q2oy8p>4gXT;4QB*?sd40l(g(^ZInBldI9jzJ?QhDl%K2Upxo%0H-D4Mp0d?%KB^O<z@BwrOx!YJrc zosDacvCz{d<1M4ce5q+{`OB$h(a^^GxEF?e2-6WKP$sG|%rgnz*r0$B?q5E37G@1# z9hh`r!>t^vf5nWvC8oY9869**9(tKw1Enr1*~N{I4T<3!3?PO+BfmXuQP`pyB=5)M zo1F}Wg6zzqOO#!4KaF^sx6)Lz^4Sr0?_VqbfbN*I8Fo_Wdx4-4CWjcWU*(=)KMVVs`u*>N~(rCSGHEBz6V-=QX=7tHBiYzRy!VSCoHQ>Ld||5kdws6POA-o zme}@WT6FjFqR4$!uwuj~;ls{RXffc!jAVH_nD=VUpkFr~f~{T3Y(Dy%G4&QB1+`RF z+U=Dsc6*hGDSP1ErR7O*;9z=veK?CYCbXpz-E~xqBti0e*yj#k+Q;ErQ@w$8E7i=m zK`D1BSZ_^Yc{hgQ=~+-oEY=4yDV{^X;EIT zsVc|LRF)U>Flg3jXxhTqz|?`bPKbflvx-BYaK&Lk5K0NzgRb~?RIX1F#vi}I^bDX^mo*2@o@CcPZ;&&2`%oaq zF{qRTXIh83E$l6~9B$vKj0WNUo-TA*&8IuVOkF?dX83!1}sP%U1g z<$6(Ah)Dx)GoL_0)?d%>JJ4TlTasajQ2iD4-F!F|d9xko^8%#hxkemuOwgGqOpGxm z`*3p+a-1a@#-fdp2p%$^Fv`6Q35k|NefCA<{-<#rh>@bZ}tYhQm{`vSwBkkx_N~Y z6oviYD~lyX1W}f%MoGaKc(S-EzVSw30jb|_v@_z`ZSay1L4x&5RcYYGMR9cwpGn%u z1@|<$)P6P_$*5{@;19OhQ!Ja3*j4pcl1nd9UUU`2+1E+VTG2R^gqTID>6(A~I5Eo^?M{(Ja_J&uw1Lo4q$sI54G61b3-Y z8-ayM7;NR4*s_{k7dtt$SrJtB0;8bi4Dd7qi$TP3_y?#8YS`0g8FupB=_l{&+yTMIL9sO?mr&gmLP3^XZ86 z=VG5~^>%i_7JBB52INW}i))coW^Xp159?cBT{UFy&pAdFMTQVRoeFoHW3ZB4Je3p{ zzwgn=&ksxmUElx)+oUpAUO-BE@ic{1Ta6Z;ORpY{!wGZ1KtuklB;5zmqLPmq9I7?)l!+eA?=#E6?yH;Kyp zNF@gALvep}tMppBe)ZBW;AKLSt99)g5*+9rv3`zf1`WcP2)X`h)>koESOi=xb|+>W zAbA{&5+5;1B4dSr8BJL#Ypnv89~!wk@@?f~__UGLPG0P^rB!$3ud)5vd-9PCU(Tv~ z1>5LbA8>7P$LxG4R9*r*mo`DEHJ@JKC^X1zsCSCa+{>gjzz4xjaaoHuYz_9%X9zEw z$oVE>yz&ZKZ8kVnoo?Hz1t^f!N@KHj`^W|ZEVH>Jt+MqFH`C)pf1!Nf?na32Gz>>S zFRi9Fo4T0yS$;Wtf$9izf7=xBXqd{}jOcj@7%wMS9v!|{tsK-6|r)o$SI&i9>) zIz+U8bjh_pE=vi^nVZIq$JVkJgL>?x*~WI$%cs6(1Ux zRaoVSYBBxXUCGtiO@Z&;kn?zL+Q4)rhKw(gLXW%&q#M?aWd`mmx3glbn>ZiJ#ox6( z`POa^J&+aHl)!nH`MNmTC0VpStKq;hzLo1E-Q_NZxjocLj=>ouz8(w}|GDla)Jjq9 z^((>twpEULig)ZJs>Oa!C*hhK;oOJldDv8@mz#S!U*n52;Qd8bmFDGkK=Hv`{Bkhw znWm>2JwHsKm<;jYvrOBqixKmco0uC}20ea$w?g?IAL`mW$SrXeE-gw@XJEj<54#BM zfGQ}N-NME}8<)*h79j_{`}16`;zwM~Fn^)8RvPNv_4uy2Cvo7hU!3qE-Yq||)tPGv zzF7#LOp@!H1EH^v3jK#s8!D5{Q^AQ{iQFhEqO;FeOlBUrN9-*?{J7m;w>55dap*f-x4xM8$>uU{@ZJ z{Go89x~LXbsxNO}JRyt&=lxpM&9eh9fXG$2>t+YVR|@R}kK>_V&K4HVmq&iF9?hul zq<|dyT051A5krn<26^1k4PEHFb{U)PRa8EI&YJ_W@)HyVv2q78{Z4Ei86??2y&&`L zdg>i01GvPJglKpzKXAnmx`uhX`wG1~9%fL`HZ9O8^X6%VN3FRjV%I>iY_}kdan&o{ zu4QInEM)M-E)5vn)c3~&AKfO1@58qzH+_x}a;1Vmc~_4I^6!3VYhV`vd$CXGF?~#C z9w?idkP`PA>Q-^S;b@h^s!wHZH^o)Pj5~X;8Y*C=4zhu7Th)7W{Z#^asXDIrPBK4# zkPVe9cCR776X)9>W=I1jFkImm&pI11ABd*j*s?23JneA#30l?Od~{6o;rr9;pdX{z zF=)32q4Uigk8z5;MzE|Nd|USg#@yH6v2gs_+o5^lWN;BJ6Nm;0ay7X3E`DHBrW9Hi z#J%3E2Ded@{8|43Z7|2*DYU;WUPm8h6wB~*OM)svQjn^H)t`%QrV=Kl+Qg#J>Y$ph z)N#=qfWKzDulRn8b2PIbua%!7@K&^Et?InT^4ryMOrJ{Dm)pvu^oi|&><6UGRe$~? z29b8h6JKj{rb>1=?$miR`o@Vy$-yV>4BrSSmtON}&$jphx;gN|yW547i>M|5Lq%^c ziFG8;m%U3gsUC@4AEw7Umi<=C;w=C2Qqq83VoP|b$7ADU$(5EynsSW|{z1-t*=sh+ ze_$0OVXSOOP~3joc?`_fkr@ku^e#L|8ry*h*x#DS5NLk(Jb>*cupsYwB*?oIz1rQQ z+R45RP}t!0$n@7Eq=EZsIY1I(xA0$KT5U}7jYJ;M8^LCskB-9+&WtTYZH59YkM1o( zkjuYADYGQHYU$n1>pR%S&%*a=Erf9vTWbs(7eha^@#q-B5W6ZHdP04c4Dug&Km)}& zm>d2jr2eknKbpeX+D;Gv?2V1hETv&*s=U1-m1W8-s*GDSZF!op+ZQFs*ulyp0oEjC zH4N|I-&x-66DR2oj(Qf4VXHa$x@yGWpydBiP|{L?yH5&yg{Z^ecxlDkT0O~yYqkd{ zZ-%~)$!w-(CvtrhI+^|8$uHvtw)K~V%>qlD2Dqv2+buf1UB%Ke!2WH^1+fD){-;qn zCyf&E-5C=&;pYT#SeU{R=wG>5xj-+4v9C5OVzyADl4J|#DBgeAzl^{Mzp*3tMS)O1 zNu!dLn!xYB9f~r~%QM-%*&}WXPVnHPNP<0=oR+i-4!0`RDeTt?e1Y_GK_DR2fOm(F z5A(`icD0ysYutJ`QEC3`GEVURh>2qBq33(C>^)7$2UkrQNDltMrUjpk@yOGnC2|d{ z&3ps4PTh_QE>5$P*esiKrp}Z`t6xGK?G<%uxJc-)u{Ns*#MfQU)Vi-9zRl?h6bE{1 zz?#|MsF(yViqL=wTyeMd5D4{`4cap0|Z+8y*p4r+43wb4iN6)CdRGP;m?vZ7!)!o}^pW>q>PrruQB@s($;%eBV&!n~N zv_Bgq?g@oTCmRk1knyZo9UD{#Y{;Wvzw;(;5WwuT_=XOF6EM_%zR-;CX2RVNpS9I? z>KEVer48%zvr80J)NF??HPA1-drWcOVG(nlWcnsYfzU9#d`tFZgUL>$lqrOMYs4hw ziSz{2N|$Ui`Bp6Uj@-2_N?@bmAU}Xw$OZGW{}ZiLPIx{Cm>gX<2{T({ak0};2X?>3Vv_g8^9xjJ|$+IvsT zp0JLZ-oc#aE8Uv!`kUR&Hml4{cdmXQrIkql6Gn}P84bcwf(fVly~|uPCtb7fq+sR@ z(0A~6n^b1I#KG^@IN@I~3KFzjvjFqaWJ3XdoaPTK^HGQv5DgNxT%L3UBRM8TaLvP8 zj@t8}JB+hkDAx$7Ba8ym$AI`WRIEaEV52$@E!v@M-X^DUK(>{QG)j2_G>Dh#@wmY9 zHP!t48&BQ8QWnY77iky7x~h>Bz;2F-G#_P%1t3CnkF7rW_6e76m-@6a3~-3HiH-eq zkdop>qNos!V_h@}K(sr6@EgZHf+w|n_kSX2KyFe}uwTK5w^vArjdB#kTM_gpXt=>L zA-6Qh-(lv6H|hTe^FQ_fr?jRO1a8QntHOD(YNXfy#Z-y(6%03n8J=HUzPC}*jiEnm z_`T%2g%hIg#|8iAJ90937|R8W`bjlt>8zI6KylC%oTwv2@z=|_e^n{5eAcvC*ZPPdO-h=OdE%;{4b`o5 znhQmkNfcG-PXzZ%7NP<5@dn%>j|K1nAE5wdalAjpQHdi7ipOSsJtqmKjNfFAT|f$r zo@Z`1u*2A5L~R&#JdEn{mGqYd-fVHefS?7PYf9dNw%&))wIcxh1#M#dO zyj@g5HbvNEI?6xgsBn7dIUTcp;8Z??Zh6Q#iyRnKh4Qys=f+qkq_mWq3VQ^wMY zqYSH$18G-v`W+>Y>8gn)I@B~dKW9SILU|K(zn8bp*=E1;C{^{CgSLIEd3q$Rp(vo= z2F=fsZU6K=>@EcA!~$Ay!C9>6_6}^fX#eyyOmZSfSF^e_6FT$XG>~(6S%b9SBbG=s ztBfP!|8!+oOmCw2)HKIoWi*R-%*0~`e#*KwKx>%#w+;74>9xW2^;1k<4#qPdx#z>+ zfKp*TyAgiZv9<r>OTJ@qWa;G1phL@4xPt>7?m5dSQuKjrvD`J|9UnLIAu0`cG6 z|N7y_kn5e!tUzQSz4Oe1|<;+mmI>#K>2k1v1_wr)VaO9y*MN`~M^` zhEPwG8i)dqfRHceG7vTwPbC2%YBjpVXU=3M%;Re6bY=7EY&G`WC(<)+TQKFt37(`{ zpIwBTN0XbBA|7K#N>2iG-v}U}XuVXRSWk`NFD{52xa{sspbtCu=a2ifa9J#yJd)q0 z4m{wue=T8;k#ASZ_wzeaQhn+7o9uFy20S5J(S}l`VzS(XQe4HIX(IG$o|2!LkYXB~ z@9oRs0&dbW4iJxMqsl{du5`L%MR4V9muP`7v}`F<{2n2Vx1b4T=g1aQR4DL3#+nIdX zd*6M|bAI=^|J-@HXI8C#*Lqh~*Qze6nh;fG87%ZS=l}o!OHNi&9RNUt!B4E`NN|nz zEA&YK0FBm0LPFK%<3|8MHY6dzN;OW8pzr8RaQCa|OYvY;i5Ny2|M(aB^y4LkqB|L? zQg+`O5t%~2aZ5!@ArKfQP})kp#t8d@i0>M}j5yfjjHf21=d?_eaYZW9@|5B==J@Ec zwd68(-!@{ zzIf;W5{6r1?5Ch7m!nWkzAxvqh5dH5%WUL4_B9;d%tgsTIM16jt_$5j7 z+0tj^=U*yc5M_&TM!(F(-W8R8L0la~Lc!ZljQ&OZhc!Lgpg-_OhAOTmx|SG7_0>6s zD|Qp&Vj#+o;9q>)NDo*(fw;-fCQOlak*0&^*iqfS`mA0!ai^nix8EEa!|8%mI!TZ5 zu6YD8F;volCqc0edTMj(_9d`Q4hH zR-V8j1Soy;%{SxBQ=csna&ZHIM z?=-G{r(}z`*WC>jv$q)~Uv+7e{JxCZP(mxLulVoo?`K z^ehv4X14OGDvv6VelwmcULao2xur3PPe)MEyTrZjn*T)YgdZ(6@JC>S;q1cl46@F-lzEr3b@EtpBML)u3-U7Z1o91XI0}C8 zrh|O_-9raSyYU{BVX;$*7qKI;LX=)gZrYKoRt$YfOz{FNSF&V@Gl^G;%gla?Z6?0T zVrm_7Z>4^yuNHTfQcC>R=1{IuJjq@Qf2lN}&!AnTxEni;kxw1hrT9Uc-7AIC!o$L9 z6La(ZCc-8%USepLe5QQb;47^!j0W%I>lEu`v~u%vx5^Vsrn4_)Nh?ORqtrxlmkNX{ zI>w*>nl1ZX#4!?Ud7{f-ia(u9t2ZIdrB!2m)`l+~R)$vr(%moTDYefg%`qzTkwKm2 zm?t*f)<1Ph;4{ZB8nqm6|@y@hOPFXnZ23b_q{Q_GU4ROzZ9WLZfWPq-kk5a z@RI9ON0W677>+RRfEJG=p-hxHBw@N9TsmILKJc{ptNY(ONNJ ze7XRXH<2XL^N2YTZbY=6GMzG&a#V9)Git72PG@oDIKEVCHK7^#aa1KuE$t@F3z*;n z-iUNuwJTacx5rwZuTcUSJk}2!Dle!UP#k<)J~7A?Gqzb;$UqQiKEpe0*j70J&QGaR zXj9M=F4{wyFHt9Pszfjk^jZsmW80nIIt`>yq&B6(dhXW@MBY5=o~(~A6}BSLaKBU~YoS`1frRyUyXlC|hL7#e$hbE035uIwz!E>SN7nrxbB zDNZ@n8G#HUyhk!qZ@O@UgCl3jY|69>S&ARrxi5DD-WN1QdPdxyeoX$D(wE75&3pdP z{a93_R$oPKkr95pho0&TGB| z=e-`bA3wv>-Gmlhf_(BXN^j><-SoXoQ75a4BqoU_7!J_)-Ux03%L^8&n;&#qO`@`N zgiUS`1B_lZ@_F#X*$>{y!iv z0RVWP3;+-~r8lVw!lzn9s*0LYPft&9%>S>Sc)%q5NB~U1kN+0f0aAhh?P$QLH6YOq zP#g;AQ3g200Dh4JSUdn_k$_=sfY*Oa{@V#$X#t3l0a*N1!oU3hb-{p+e;46I)A4_} z{v4rz-v8MCqxl;Q9Pmf@H|#(1U*TWHzkdGt{uBRy`T1iA*ZiaW!}3@7kLKUP|5IzY zE!@vv{a^5Zg83W9zq$Ss{J$dk2mBwpzhVES`#%7Gi~kDpuk(M0@dy8J`421^Fb}6Q z1%&AWeE)-l3xB%-902bXX}vzg@N%WAy0QkK88?ONo0g?yF(r5yws?Mda&UBbd~L8g zHQKjv2AXgC)_icYoE0gVQgA&ubKG{KTCfGW+$!L*OuCEeo4)N^%o-WSy#>u(_4gk+ zT{Jwv3~NWuPg15ID7DhYR#Ub_z}MZyhv&yOQ_8lq?KQB&p z&U`oXgV*;D9k?+tPmH;(7;*0(QL_g1dIoCcm>t9G={+)%8qjNO-YAJm&tFbu6kpz50CY6O3(41uTT;RWn|D4X!kH>HU)m2VO3Uw2W1f2lyT0zAQ0ANCtlN8f* zpFhk9%USOQEuBzz5X8>T*B2XApqwvCO@Q@nn@9nSdOe!v=S^C8@ji)u{uFm4)b64*4RDZ>pE*2+X_d5a4D8CS8VefoL zUqt;=yGe7CLtrzeU)RFi4F6K<+s_)Epk}+@;Kgneo~FZ!k}gGkMWMtWVBTeAvU+@= zs%O^ccdfV&dzUgz)M=U()e1k;jdXaMT7T;wr4Ee=jcf5_{AO(sI&ez;@T5S<(~D=GmdlimUx$pGQGaJ3@zbL@ zZsj_GE(=J{t-A^#K1=DK6pZ1GWh;T#x2PrpTw+8F$KF_%>+FQE_dJG1aD{UO>4?6m zpU7R`n5@)ZQly^dJ=4i_UL9Um+fW9MhoqyHQoX@%rM+?~7-Hwv0_d)T^OGQd~_Yug&`q4b_KadO? zBGRKoEpS~OwC-&ynGUu@!sF3r81@an5YqXZAbIlw$41B0F5I=~f-p08U$Nz1YXa(n z)Zl~nJj~fNk?P^X503Hdri9Y9u7gqsjp@{>rn$29 zn)hjz_YDtwg5-vu4hos4JRYU@7FPA;LE{x*70{*kr$^A8W#=Ar)+l3SzL&AF?)_5! z)n2|Y^GVzz9__m;jjuzC&voiqBp{B2QC}Hmh&xh$$nb#L-Uiz)usNLB^ZQ6dvTa_J z*ilEt{udo#M;%0as?T4Nb|M!h)Bx7Dk-W?SQ|VuAOSw#?H1$;Lonk_-LY>=sV( zUK+~Y`O(eKAW1Y}qr|ZIJI{aa+HQ?aOGU23Gh2C)>bY%V=HVI{*x=Uea~2eXpWOiN~X?dQe%%DbwP*$o5>9V43h7j<`5pyE+ zU+w(v{?lQJ@(&=6OdcxJLZUD-(K6jtXQ8!kNHLk9jwUH2b?+cfpn$#ynJ3_H->|y> zY8HqNnSs2oi}BK#!AQI_Z7f#nmr)zmyPObue4SFP@w4#7@avF01c^5Iq#p-dO$*7}~Fe-uT zo#UmNU;e~varom6M5W;$Q%hBS^sMg}Gla=OhFlGqW7}WE;3N+FghbY7irY({I!=w6 zDkh0oF0O8=aOUa_P@j59BF@U`;Bq)KJFN@W#6DDt%TQ27>$3r&^Q2x@-%#zE+E8;` zXH4{=;|sDJ$SCmLcb~|sk$crODb0*J?qp5)TT%LVxTLWIb=SM*j32v7 z7)og??l?-qb+Ez-4w>Q~)sx7ktGfKO&Ma%{WoPU(*rW{m>l+tifDP7Y%NrQ8*P`kL z@vt&=9j@QyD3^w;7;TQVG^Ge{Y)3kd#iUZdQ0!cc6*lSVF)pOy=d|@A@;E(YpIIkeF-^Jn zB9oP7oKg*}S5W2|JwWfR5!#>8THG24jbH)dVDxzctEHlQBbS1Dt0{Z1r@Z#pDXu(! z(L>S%+c^$6f;$csPHJUIr*o^d7st$BkH~IpOLI0331J#sx#NsrL4v=2+zh%ep+O~h zJro+x=jxN*=4%LmVC2dHuN+^XYR(A7?X#rfhYT3)Le!H7=woC0$0E!dg}%a0@9p%t zmn2*yU}C0J+K~^Xzm7uX%Zf^CT)3IWZ&gLc)SV1*QV4B7>q0cJ>0vT$DQ?Zi-2$oj zazv@sMH({v@J4}$vnicTNs6VXthSh-lsL#b{l}rN>DGbdAjm^vkqDAMPlvBh5SgSQEw-#owfP3| zi$v=cw<3=^&KuUN*KX#Vq1`xFHFcG~?keJS*`vW9a0=+INK?W?W12YySD4d_}_Dvy+Q!=AwUA~|8M8#MgeP#Z_NR2|F?_( zt#f_}NdffxA43(9EtZfdkUF`JI#Li45dR0i|84qjS*>;rWhwh;`)IMAxsMOf%%JO> zIz&LKqorrIYd-`GN2n4i>dOy#HCaFJuAanxpEY{{lQuG`FE$Pp$Q4@<279YZO)5+0 zpkxX^uN8i3MndoM`AKXRl7yWsMUlhpgPC(V3|JJLgMJ(s0@=qf>50W*kEe4idt>P# zSym{cnDiKFTvcNxk9Z5dUvIgmMeg}KYQDjn*h1{NM?$m-&8T_H5*j2R&VpGfo#^e= z^ORe!XzQ2h%pd1#C!`=CuA@@mKD(+nl zgmw&w9VroXr^v5IfLEFW;yyAfZ3nV_YzLzj^zV-XJJw*1?I{R!kr&qY5S_v!Wxhr& zWV!*f9)fy{x%INI6ksRB(&rE^s_wSRL?*f75~ym;t0~uA z-3>Em;UOo$26pNYQ)2bd^}Au_Sz?3-X@*|``y*sS6q?? z_G4PpzuF#uzXOo86#WQER>|$!U=49td<%|#wZ^khGAJ)6TEnh6P zv9ToTKo;GwH?ZGy-q>v#l6hdm9R!|L3`dQoqCx^_kN2N84tU%4B9{43`bj6#zWP9F zw{$kvY-`q4P#^(KmGVnL%K6_C9XTau-e4{T=F?%O@hdH8X zfkIS_0z|4mzY~a^dtRTRVnVuP?;NQ9QJpb{!|xfs;^|17D+L^byfOQTkm3&&V=1J> zg!e>GRXM_H$sVk0Ta%fHT8n{Ru{hn?%bS^_gE1*pfCYoH%babWmR3Uzgn5q7`Ze>! zu;cQ4mQ6ljP%ZsOq+va`mfZNrh(%Ga*csoG^gB2C*yi#k*WciGkFkb{wj|uaMHV9& zI=vO%Hwt|?i~Ztyp_S-25^uL(?%MS}!8qH=52HVuq+r9G)KWs|<=n04 z5aIy~)#sy%UR7xPUb2rDKl$(W90lu&cSw5~v|#8O^;?&44nF(El4ANof^*u7rK7ep zuZDw)EFFt#1K>XEfRNnYK$-&iR;U91nd2tJRGsfV0@=9)0h zcU-yyM=|g9YSMv=ynBd(Vw=vxyHadeFt$6J`O7wJ7z-q=XkR&5>i35_JwDK+A%gal zu(e0ZK81`b(yk2GMt`+uV>|3?2qX-Kifs;$9)4|yJcHjid{6N0?vt+4Q=bwASd}v_H_i8Egi+#N?s}f?XMF{))*~mK3BWAw!_VY<+n&tb!lHX( zJ=hYJNWnkg4mH1wa$Ke}m-gv=seos?#mj*Kv2+nny~1@?c=BrE?`K9`^;P)pm!0T- zKl(={%e)#(!@NZ1-;YTjB_()3uM7J&?5`fe3ww#XKOp<3>p$e`ee=nWN8G@c573NZ zyr6*B93MjrNgWK&y6QsOlqE4ZH8tZPtJ{lc0s0V&hc{q&Rpm+<2X1|z!=H!ag&)_k ztx)16;1^KfUVbD-2y9?S(w5nZ3K7FoizFhU+L58<>?32e0AHdKB+jG;m|lPgA0b~K zWoG$3Vwi*}({R<5)}Tf`8!4?qgM1B!&c+OP-_$+_-hLwM6zK2@+q3-l|kPhvv?pxi2l@PY!;6ujWgTsp7B z6^+nOx?#FHSsidBOaYD#2r4E8qVii8&%Xsb@kMCCtBRcu`4kN8_)(f#a(Rl!yHep} z#G5@78XmBg@=Y-l%xuB*pZqq~HYa(lk~3voy@mVe-1iK$R;?2l?oe^Gk$s*5b=qhlGrV)OnOOVV;$k5hW-& zY_}4CIqT|q$apw2@TjD`RKqiWPG~li=`i{ZgVX)hsA635G~3m1hqav2Zi$xj;ot%(I>$z!L^UfH+Jm=<90p;jMb4{N_lli4Nm%F{dmdmXLOk3^E>&!3txUkym4^>2JiAlbM$0|7o?MJ@0=YDNQ^89P`orub(WL!Q1 zBD|RbR`Aafb4D+J=V9DA0PSd!l!-XUhe+AVq~1v5nv|DDu+e}1wLK%_T%+X zMe94VtZ!6t4;*pjiDu-ZiX*eKI35?ZnKx=@vCi&&*7RI~gs2J5iLc^D_oyq?cRc@+Mb%Pm+s_yEh!lwk{Ot+%Qwfxh~{m1st?&pH*eH zxn0t_EDI4l7P;t0Dqb2)!3x9r6br_tf;F~34Q~4SKGt3NZQmRH*3sl9x@%tN(7;GG zR zcTf9CFGFF+k^PE3jg{XlgpS|ia&r_Iqj1Vt$|H^{l2U{xJud9JA}=6pEP2{3JRV&R zC1tbayrckA$bAH6KPtTJCzT!FkKAD@eBR%B@d9||!j>`kVG^FpWt2*j^*(n3bX~oP z<#~FFB&25Y$_>sx()PE?Th`1u7J%+3Cv+rD^tH^%mO99;a5!Yl)oLW~88C z2;;YoHYpKaK~wo|B#^pMxH*T)WrSYsChjY$lC|5#m#1RluqoUP$yUYp zyAy8ihh)hw>qtwv0CWWemCCkFpBF#KPinsy0NeL%^BL6*B;g9uFm4!QRUN>z?Svx| zT@z1m1!x%awTc6i=kw%Bg|ZsVbW!QfYUSa62D+zM<%VPfmh!(h2QsJ`h^il|a}&<} zEvaN-)p^BXyVo-Au!dy5!~?QryslG-ef5mo0M9jvvjG+)X8Z z&(>d(npN=9FEYN%!jUEAte7lnrg8{ALn@PL5r^>;MJ?YzFD|(5Xc`YtRUv=I)bL?$ zG%H%(36UnB5kfze^91hDVmvFt7_`*rcLKZ$iJ#jY3})~70G z{M{bovWGs-#?rG`VDNtCt7}-@_TtI)od{uH{xARiOw?-_*6L`SM=1fJ;gw)J*zq}z zqtdKPUdZwYLAjSP#TD%_XAHiAEpFqn)8ZK-gckwUVmLhJ1L*>dVK`_Jw+HgG9Yr1) z`Ry!Lb%Mr_9kkLqzAhL0x%Os&8(}x?WuLex9w+m7^f*`H7FZigfG_c!=h`4hJgrFJ zekMVGE5ft3+b@H@klL(}w#+R=$V`+?^+n`}U+w+xC@GT~CAm2|eae0aanTZIyIZ*bF z!po9J0gmBp7;}BAqh#v^ufF|cDhjLn0`{gB2~`ScuwLz+v_OE&geU|z?C(5xOP8`<#OK8QS9_oc1t~G-_!Qeh?~4@>h4(b9VF? z#g?A+G}d2U0%GHMW{Lg`Sb?N-6}M`ZYxILp~n`_=Guq zijl}iFPL(*iQxGLD_QI*i4AFJdRSYF1|pCkGGp2|_=l{bfGTx#vo-$NWJ{qK{maiO zlwx#cK8B?zyP8fAL2^=Ex1g$N1zY6lv5 zP^I25hPzRvUFZs5nwUgGo;d3SU6iEBc8FRP6f}ir*`jPmG!erL@}ZMp zh|Yi~Tq9z{%vYPe<4I~ZBNyS9K6M3wx{$wfvBLNSdy&*pE=j{ttC}l6Tl_Rb%#9$X zvQwy{Gvw!YuK9$>VcLx)q}aEa%0CIfqxQsADdH>gC4y&U=_w3vms44#6uYm|UeT`8gkKoatyJr-XFu6rSAIoK|KUll~bseuCjv*2kiOIVn|hlm1=lFQ46;{k9+~yIW1qf^*&6G@oQ>h#S)wCw_tK zU_4v!hvHcA+?`y+&3O^u^(~owMf0`A*vLfPXD*WoXS-s^7b;_k_QGaOoR-d(HPa&( z+|)xaXZJ6v7lrXBSL9EOZ25Q|dT=-`?uYU9{Pq&K@_ScpimPcJL%?k*R2D7otLf`Cc17a20#YweXsn^QjG z9=C_K6?cF_0Z_bG#08l>G%rNRudkl1Kj214oIN)OZBiYkd2~Vi)|7*+J1Z~NHe@)& zUgQ+j%|BH`1u`Mf`Cd4byl>%{0df~-RnzQ>rQ{`1SOncKx&m)irKx&G-KI1jo25z` zZ5j`y@68aKbX4sqAfl=nKorKSjfHF~_(K4g8%xZen6Er{eZ-FBUaiq4UIcx8ZXQ=y z1Eo<wYC*93 z+T3CLP?C%kJ(u5w z#V5d!@_3VJQ)myo+>1McN_}|9Ey^AXH>H<(L+2Lhv@(d+#SwPXx#WzPrhB&OI*?8T zU+^fJtChD=Y&CU3T`pE;%gbY7(e0%M*3ILGoac_VT&!s0rPE)eY1;@)1f+o(o#(YH zp5L7DmWO2U9KKIzl3>kt!FYbpE{e=84NH4feDv9Z_d+oF%&K&~MJg(*^rGc0=oe!5 zppdP3khAm32b@cn$|f`0?U{o#aWdpn6T=s%>W<_Il9q1K{IJm~?6W10Q0icQuBUAu z@r;x(E?Tf`Ypdo<@N$P~?_pO{E8o&k=i|_y*F^?g7r*i_38e2>Eo2SIWHJ zD@UPpxp8;F$8-1l8zLL=&S$FlBPWX<6hZ{WMk)WJ|q&C${3;OvQxX3|rfvJ{S#% zY8yE8;IuC0aCsEbNAA0SdmEfoia`b097C4SvC2FmyGQ>XK=thxH>AQVRLQuN_#o#{ zFlC_>b=KUMmlX`4U7k^1`kk=Yyacq4i5&Rw!(H2T7A2 zWSce=tN+rltN#7t17{o9=VA1^t^L+tpXy%w;yZRbLW>7@+#COn=5qo9s6da@A4z+( zK4!jA47u6nJ2Y?ep<&`qS{(L)R|!5#o=TeNV#82;LUtP${3omSk^G7^0NB|czyfH` zH|ppx=|NJYV^QHZ>|wLC@D--d$9mnso<)RyDXw)GU;`P5%7Xa+a%RV5JO{vyl%H8I zisixAE1?1C(E0ZZ6G$KDzj*emNnj>E6!HezfzH zs)^D3XPcT(Pf|4#Do|eBl(EsYmH2@KSm6SKFLi0;`SEeJITTVgJX??U$|#vY|7XYw zDJ5>&gK?AHDOB)&dG}L(DW49gH_vnxk0Y+U9KEjK@i6#g>2+Rp<5v2w7YY9C?8n?$ zf-w0n4xL!?-tW#d7rgfpe)rVm!(|G(?DwguuR?byi)`3!Z5sE5oaM;JHLspdfM4z} z72|A>m0@pWYIj%;7^#%@Z9YB;C#)d915xQ}n0&KqYv*+cvNcfSU2aQhbLMJmf%vu- z(MH`)!r~mX|Fxz6?(fSJIZKK@0)`4j%C{G=qR~v1k)IBKHuck*OKS8Mbdejh`xefP z7bl}=HrIOK+k>AN^kC9vLbPCj^0Ti+1}=9YpnlSHYM@H2=lSFJ1)~ZH*ox$C+LlwU zTG7z{pKHuPKXS{FRFN}^GiMMeNQ|a~1J+fLM3Hu);_XN=0M^2bnZQ)++N&_ULr25< z5OupAd+H>-Wcu#-BjJM@wLO!t9Gu|lIfj{_=PklLzaM)o(&&4%q0&IT&mrQ9H>53B zbM%G!0h9JeCAn#nDI6HL8=oY874Xe3H`=|TnU$ZHWo2Sl^Dn9lxDqDr(PZLr7nk?W zKt14(^4ek8k6iv2(qdMA_8(+m(f2>dubNC8?vytmSeE%dOau~o$;-MR$zN~A6FY>i z`1<3Zzz`fWd-~HLBXbRepi(d2#E^!E)mIj~Ltd5Jt^xfoEF&$r)zF|V5P$EytA08C~F!8bcY_L_>WAMRFKkkNsfFoAn#xuo@@J? z#@3PBjCyF!L&Ok|T4Y&r#6~sZtDfZ3?)oulv8mI|CA3;2u`W6?_=>l1@8&B#U>l<58slLis<;uSi2JE4r==}E^L4RZ?&z))+&ju~Mh5D+W ze1N)~lKtY6Jx^ZfE4_#-%LqHkAxZc?&>_c~QEd*^0}o_~P9O*&{e2r055%X5Cr6&r zG^wLhQ&5Jd+^~tgczo42c&u?Q4GeC8y#Yrw*#X~bNcl0S3V1_?x|FTx73Ni^a#w$` z?|4#xdq~`xsG3%fq7q_ekG&QN8{aT3&7^=QV>I3Bf9w<^M0dvtkOy_;M|_A3UAnq; zx&t0nXD*h4a4uzrj=AUsV=|t&I5c-dZtdL1(3DA0-$c?~AvlJPeBMs7CH2_}0?xD= zhDbMx=u4aB%qj|>Wb33`DLdORRirtch%Fv(z~oZ*lI8TsE?gHE;1ZUFhfA^+Cb}iHE7RQl>waW^3vJzIycm3=J}P(P-VjoY03Eh$(oU{_ed5B z(pB+y2X~;3^e}mWaub_H-gXf1sM%;Q#~rwRmt#S_+>oTDZFu$oa;t~!Mh>Dz^h6E2 z%H;E#Rr7!>tOxU9d!`iC`>FYi#H)|iL?MgmO{e`XmadsxI*GM0{I1P6Z0P<#wU*C4 zp+7u6hS&9Tjkm4ZU*GW#5(dTi^8D+b^5H&;2*gn+5ff1l!BsS_Qo63@rmhz6KLRb_ zKLAd4P7W4!eilwXO%C?=oLuiYIhfcv-?OtL&NRgQZwB^`<~Eid|FeNhC=LtU;O!p< zCztoKHtH5G;*KB(a~lUMaYuIm7Z1BT=Q~cOckJ-vk=5KYaOL{uE5+ literal 0 HcmV?d00001 diff --git a/doc/doxygen/chapters/images/starpu_non_linear_memset_regression_based.png b/doc/doxygen/chapters/images/starpu_non_linear_memset_regression_based.png new file mode 100644 index 0000000000000000000000000000000000000000..4f41a250ba47d31002b373d0042a43eff3ee79e3 GIT binary patch literal 8553 zcmZ8{2{@GB+y5D3A8Qy2BYY)Uv+p9L5|!+-q$1l`vSb;fY=uN2ge)mEh-}%$SE!II z*)wBH_OTC!c_01W_x=B`_qv|BX3l-i{kix1InQ(E@l}%xtjs5v0RXHQjr6YrK!XPW zMq-3RlpJS?W#|ik)%b=%RaI4ppWF7_KsUY|qTRiFx1*z@u&|IyrE+p|#>U151_svG z*X!%+r>CcPb#<+*tSBfbn46pb`t_^0xL8C)Br!3OnVFfEmX?c)i-CcGm6cUcP>^bF zPAx9}@ZrPl+qa9U#pYCVHL4mjm06G~D9goV4geJZU#hQpyI^sG+U)EsG(JT=Wj>`= z-0oX!4%9}t)ke6O$+T*Vr%tV%FfRt&RPK?H5iTm19wbbU+1J;XOP0&n*f@e7u?OkR z3hBm8)Eeevt9%|Fa6|HLR{Fy{F=3L~kmc0EIzz;> z_C0tFfXV5Bcn%;63>mSUG>(yg?}(yV#A4=I9sN_)L!(M$;qBO&-h3N_eD#IhM|*wm z6f&Z-8tQelc<-?^vIpkyV`DHz(UK}H=2$_?arSa&cG~s~;<1>lhVz5dfDq;dP|V(Z zg*t(SqB_#(`ZQ!~rKUeM&~`}n+~&jOQYlA-%D$tfyKHf=L7PpZhf0am;u|qHo^OQ9 z*yYidhU4hPm4&$*`}-eT!t$*Sh9@f8M!p)181EzTu^77FSBPWzeDeJ?Hr%Q z%w2~5WXuEAGOLob)#7jKw==z+mkfTI9EP+;oo4&(uX1<72J`)WR)TYMuJdq}eQF2g zq_^?n(oAulM}eE_sOrcE%2oSHSNxyFB2QYAli}U!L+VZs;Fa~&DM*pgv3t4r_x^m? z4%l$25W3tHHgSC3leM)`Sm&bnfl}kx0+&n^%LdK*bk|au)bYBu$%b1v`?}2V=a+Qa zeBG9BH`f3e<~#lg+(NrUDLD#~wLJeChqJ%8k?nL2C}?UK6Yu%L+qU+8Y&{OHpYpc0j#q?-V^!+uKc8IkUpV|0d96!xu#j zd;No0fLA2^_||@MzUJ)>R${|BHO)?)-v5^ANn=;#2P5Wr?MVw5TKHXhP4Dery)dk; zc;xK1T6rBWYdU=1nT(%O%7zZ6mAZDpQmb|wk!`F}z(iVqL7vcJ5GZt*UVhFC z<~k%1cBo?yxdavFfDPN_Qj zF5orT$k;9Pp6Us4yPOvE{6MVmNK9=+X=aXwT#Z52y9GIrNLMZ!qimx5#Ns(~wDY%X z(eCuuK9<1jxKBHc7K*hl{0-JHYvu7snnx9X)?t~F4$a(#}F=e8{^S1xD>p-(9AV4|m zp3BZq&GHIO6`^tOZ&Efbj@8F4-#=59dFWxedS)=_$us|wxtS3k!q`Yt)jg%;p)M8P zV7D<0&aay*RYgu`M|r1UL%pdn_AUQsz4(I7jV{GoUtwDf1MgNu0yDCe-HR%R+y7nB z0Eu_Dadx7q0@r&I;=g3Pd&_F;sY>(z^`rmoP<<@Ko67u6{(5DjCkqgGYvU%KC240% z4~{-sA}%*ucF*m>*HZ*<(;N?Q32^SvsrcDChhNvr$q%+8P6qrUN9(PcFcQa&*o^$7P6c@ zFr~yRqGQ)7F=6y58zyC@I#l_z^L~F7_nh^u_0IXON`aL-0lp~{AJbWIm#Xbw6oXUW z=C{B^ddO~FU?H*${+`WpzT5WRk2`yIolhF%bApn38d4$2w;{<7vYRWuxnkJ7Q#=|^ zo5PG>^e@7ssteIMZ;(6YdS&YP1#fBFAO+S>Txw6b^vze+MYiAlKi<~HF4qZ|cZzET z#5?xH_gIXRx*o0_l&~}}4@}voer{m(PM2=TqSx{hR(47e*XnVxzwBfGk9_ls@0u}H zU9K2G;^w*TC|5(Z+%%x7^jNE?tw-8BTbd%p8oUCxyf=CoAvL<&MP0tGzM&Ztl9WX` zfwwMVCR)7U3TDDa-PtTaswm5=ymNrr-93eVt1AnWxx~HOyA6x8^KRDq>^@L z@Bg-D+mY9J;Ck1(Hw1jY^wZ(mSgfyJ2}5n6?!_PO>0kHzUb~3N%qld${BAevT~_ft zB!GJPOv&}F%8ipN1~8Om;EVz~Z)7j?&$s*bDW6n$G;HYcmsx(-Y2`f7!E2>|FS))` zcD!R=8j5aodrU# zb3)a_z)Q|rsj5!kq zTNa8QSpa(CF{0r0vfqsuC`M zSMUY}%Ch0q&!={hkAC=KOHNn)IK$v$b|L@GA-tA`B!)F0(udrM$V_stmuJM$*2dep zTNCH=dS6fxEg+Q1QALi&9`~inlQS~4Bkj2Pi;SCXuMz|jkbx%oV!tZg8L?vaO?Cza zcZE+RxrcW>`tgJ;ujFvmpfsI7uNMS4xfzts>bh=`EAl>dU_Gu&8lG@gR;7Jl}fG6fe6^H-INJs-S*z#G|O5^1nWzbQ0weHjkGPZiG z>qMtaa$u99DfS&q*d(^2X)kgB2Z*=$SGMR4gpzE-e6Jn7L5GmdhJJmQ)t2kK_;VTo z3>S&K*d%2N%B$sn7;pcFu|pP!;&$mA3*gNcV0hOA2-D`1`rB|AAMPO}88NrxEo1`X z?-K*vuz_bHZcc|`KzuYip>$0{34ez3{>IU`eLBzvTN6F`sFu4+9KiBQIO<>r1MXN8_% zMZ|jvGzoS6D_7~Ul&KD>&H~ity3L^)+q!)*RLPFsG6j4xgAfNX) z*LIPE0!3h*VIL%*UC5a(J);9YM$bf=)$pxm&h4L9zT z1^E}Z*3PRvR^BGDVtY3!O`VU4Oi(lxC7S0?%7_MuK(=0a_dLTet|L4Y3U4&bu1-Y| z((Kf8Zy)>HglO!p|#A!f{JE^S6tZM zcc3Gu@<{h_A3IH1XjQv>^SVTPN6{JE!0ycAleONDyiQ(co*q{ zz8p_V1-ukgBycPkk=6l4-EL@;Fal?Q{E+`^O>oq<$lfB<&=I(ps+b2R07#V|ztl+F`f- zI5tX~fTJQCeu1aLcdM-*3iEx2o;5!U2O#&!)W`jXcy{F;1jZ<%uN)lGz5rFdbGKe> z)KQu$W8=tQTdx7RJ~m=9A*ZdA##k|%ne@#cUB z%CG=1w0koi@Uha?enqq3jbyPKe=5a~bhuAdmGOdLjq)X8*SF61+zf-^|DdSC$gdZ+ z_x=?xJ6a;tBHp&@BaxU0H zY*X*-g9bxohM9+#rm|bj@tQnlvekh9pt82vwh;_`Y+2c2wM4xO06FEwC_z81$v!9QNu*Cn6M6jIIa87*PWh3C79;)@!sdK)VdNgs>)$J z9Tb=Va2dljPYvLBFNt@W-5(VOWj1bWKG(xDJDKTyG{h^m=Q+I42Gh6_24d8D=0EBI zl+i~loo8A4MZ2ystDR z?e%0Xo&9oD$}K>M-IB|=5rwaQr5+IX#s~>yd5P1x?!T9Q!Yr0X4SD~Ta%ie>R^CwN zb*PTh=^dk_fUNZbU9^0lzh=R^QV{ZVa(Ht2IY+=h;V)}B5cH}SZ$1*DI>+w*9Eu`! zcMq|X-v1u2**Ashm+sz%YW|o5{ll7t>E`H>}%oIx3f(EOH)Qx7ZbleY4 zA}$}Wc`*ztWEtHmvG4;Vkr*_Uyn1^Xtg>j(4J)mF@enb~8}~I-RB5JC1|*jzQP?PV z{kx8zoU_*wBw`In`_mVjM4UNV;;wkeJD%{IvniDYLH-offK=p<5Dt}JnxtiAq3)tq zT^Vf@^Qk&$Q{h9Ipebwu&-p#GD5$7!O_dnz*Mw>d%<~|@Aw92;ywHs=?#}u1!hzn_x$4IWnk9=T3+OI=l%V;I}raGtLQh*QZFW7WbvALE5GCF26B`0f>x)m0`WA66*~V>B~bW8R?yNH#HOJ*|+-Pm;#X zKNK)xq10Ch9FQ)b)B~jCLLs*@^pO+LWMudR`6fH0&`Aths1_y%WL@`!F>M+EFL}J_ zmlC@D$h*#DBtd004sy9^K)8c0gg9gC14KX9d=FPS9}bYN>q3d?XYgM3@6dH)~HSf7|SIfJ)?x` zir`BY`#`gWp-ipY^<)NPR`KM2D7(m4lrr%NIV;7UrAz9Fp!hMaWfSj$6oIdBY z<0Q3{)};w3+?bBw;P;K}#%>Z}cZHtXgyuItgd7z3sabyZyk@b|Z6poW<-X9dEy6wh z{mzqY&>*}&*H&c0lm<#7(;XEt^1*o^2Nu;s_k)Cn_}duhiRt4pT^vo}E;KY)|5=qo z&9<+#oSIMqY2x;;YH2<-pZa=xoKF!7d3qOmdn)zATm+QevY3@bPD@+$O*K|M==sP} zduP9SbDsNI=KdyM2prrgF4$-LgeQ{>Il&1)|x$4nk0e5{3Z#?6<> z%jh=CK+V}96>T}-P>EmNNhh$UkR}iOe8mN^FmfJyXvo4Bk4>bw5Az4;?u%KM`ixU> zjN0L+`euZZdY~Lh^r(L-*8J8XJq!@V`%6%_$CI@Vt`;6>WhKumq!FR4dzPq!`i{Of zTR`t44w+yOKkLQU$fV>CJ=TWM$N_edAdc9S-K+#&X!662C~eXosB$yo-=PHK_|Cwr z6jW;OVx|oaZXRVYkdxfd*x-+Ws!Ygb_Nugo39?@T;){4x(<+=<5IY^D8*UCchF?;IQo8NYbmZ>`T$q7RV&K*%+)=iE?JoX~y{z=A))DWLMoVQtrL z?5JD_??;E1=k1Pc#*q&zQ$NsRt7yql1g=1G0NJ=elyWw3eB=`BRMkKi`P((yg=?qY z-Z=@w*K*WMW-i>nM0M_^(=NAAA9Owyt|k4(w(BoWA)r?9q4++-NGA!CRxwUY{p^qy z=Mz?I&OfKyElhpG!GZ^&BV^^!7!MXpW9U&FUVgWo7glbuiQ*+(mXZTIb!;|#4xx@< zrMxlY2Lcbq=ha+i$dKz*1Q>9Cj`9!La3KQl1%1Hxz=9Sd?cHmJSKg=4gbs|YPr-+7 z(}i>nD+P3(=XVJMLe~TQHy*%h9h#ipzV=$EZwqkeE1|J6c@d@|U_pAxij^EO+-qfB zsQ)SKE(nDDO6oIuPiT9K(G}}ekK@?pS<8bJ*RCX8_HK29S;RwcKI?zx{@rNXBRmuy z)sHgCH{N7y>veUXS~5D6TZCW)fj0MED*qVo%ON`ArR+)G*GPybs+J>0A*}O;U=VKA zf!;^Sz;^ONsrN76nsH^=VNPpMai!&ym2<^KmzzO}U|hbXkn7DWb(1< zXpaJanW3q_U@*Ci3r9kr`=ltK|2xGV&wd2T3H5ePs5Jk>510Fwf!^_>HcSr5hX@@z zeo;D#T{G24nhp<1Ji>xvFBhx@gMc8YzXJcFQPgiiJ&6;D+A#t{NLlEz+?wsomtNXQ ztme>eM#R;|UVz!-4K^ng1@UM!yTfLyrKje9=q|!YZ%%Y#jF}hI=9jB$pq*J=Y!r>h zf0AO%_Ja=ydd$1_4)K8=8`f$9`!x_aHimPugVj>(k} z#r@hwVtajI$X*L_Qf4sT3sMJqM<9dGp4U${?T`o@vjv8obXMf*p$ShkCp5tb9hZ#) zJND(e`T*(3KZm0=%E>=h5Z1x(wyO+n(tZ^yeU$uF)AX*2U2vzV$E1p=c4cAhGoR#9m&i`=|NvozMVl zgodQK>t5i;tM^-&wr%}nQLe(?eK;*KeD{xR4fi3v*p;#{cTwx*re|+r+#K8s{6}y$ z=j*SJ@BJFYU9%WWo~a#t-BbED>PCdL7dqF~@$DgVmag>uKqpx$=DoKKf-+oSvxQYh z2!CqcUQYNjdLVq6eV}Q=Xu86-Dmu`hYU+fEW3g=Tn3szBQsv@3-6@`lVZD4_~o#_ZFoE3@Yx#%HwScq4`fSgecoZT z$XIoa;qFLURYJll!*>q&kAA&RHOF1`zCkD+G@Vbzg`;1WA#@!*yJq2b|82@?@z;=B zBSwe#@7Y0aC6&Hw7B;(r{BrTpXivW~+G7-RC)4#DBJAd~)^k5o-oWkJnY6C&|2BMIdi6~U&aODDD(Ux~ z8aLLJOLq^uPCg0v>EbXj+BrS(S}igg8GrD@^tDIs2=D-q5nU1N* z_0b)rb*nGeO_w7QHU~c8B%88K2Uh}cqua!|KVFlmV~&AJdbrSA~8q#)HaJrsqVG}M(i9Ime0bEzsbb& z@S91ScEL=-<7Pf(Q{G7>tzO#IeVs?gni452w-CqA33}*ACiGsN+`E0cS9dwy3lPKh zWZj%>FgH?92N+Zr8jZS$=8qK1dg%jAUi{cC92`fznVeMF z{g<;3BS)tUY4LtAyV8T?ph9~+E1LJn*k{UQbh!0|70*p@>6DZWU6$qakSmTCz+wHTz*l1 zZ261jBC=oCPT99a`be!YsYrL#q)qVem#uvK#hbJ$JI>OrXZv-neH5kFI&N7ejOA_S zyg@k4n(7z-^^RFnP;Z(fho{CElA65y`vUKX)fMRP0dhzG1(>C7(FBC-&X7`G{yokA u8>+vj`MGPbL*7WmK_L!3M&U@WAu60FI?3bs%8CQT3oaU%=$Gm_KKw7yVD9$- literal 0 HcmV?d00001 diff --git a/doc/doxygen/chapters/images/starpu_non_linear_memset_regression_based_2.png b/doc/doxygen/chapters/images/starpu_non_linear_memset_regression_based_2.png new file mode 100644 index 0000000000000000000000000000000000000000..64324b66127c69a9246e88b9f45f8d6f84158881 GIT binary patch literal 8061 zcmc(EcTf}Iw`dZG5Ks}MN(YfDARPn}x^xf_5d>7a^p2E7L=ovoQE5@>T_e4P-a$a5 z7wI+hUhY>FMdLtgQ9*^%fQu)6>&XD70R-%L!*Q#Pyzu2LN!j8UF(&@Ly#F0GPMc zl@$zqQ#Mi|pYr*xy?;p)>xIjMcyrMZ%dy#X>-x!Umha{ z(<{eMhE6L2i!=jgJs#NbOEq!+yZ`I;3n7Q&+VK1F4%J6t77LjEdJV2eC#lxf{PW(! z#UFJ)kJ}mK?t6(qygn!8Og(e@1)noWt~%E{ee*%NEAQ^4_AtgVs1a2z*?wy#h|VvT z9wTA@!RL_tO`Xa<<3l;I#^e5}S2=swQ|rv2u{@{w(ga5%2@|fa=Df)`xH-g5_j>7Z zSpGq%KoE*IBCacgqz)>ikIF9B$Bq(7Bv|zSz!W@II#JdPe371sLe+{G97J6ifVA(e z-?4whp1HJ%mxj$|fRp>(OM70(!tezCgN|KfA8W4PIlalv=sVOGaP36WK~v2Faqy!~ z;V3`laD`g6BlqSf{V$PA#GEkyJav`+_^*k%@&@TswkzvDj-#)9hwQb*+rOsAu#{;m z)~WO}uNJc0AaKq&5Xn_MChBrm9uhY>RM_M_w-`WLP3d(nML8Zx)d)-Lq0sqbon&uG*DvS6A<1yP9xHIVJD=>vrUsZr?&aZh7E@O zA+SrKcAuVWDW|hqL=g5~!!$E7$o-XaCZJYeqsv&bzZJ*ksIA$xT)inv9D7@9Dl-W! z^_7m@a6f*OvBj{2`A%X9$c0TW4RIU zhyIKpgSwzL;iUu8#VdJ+`|^jk9Xsp`t}<@dQ3>)_OCso?mchP4V>h|RUa_Ur|7@{UBh-z10j=KxL(-xcr%IsK*to=l@_ zDwy@Ak@7%)tsuHd@&j+({ow4nmf|%w(|k465!MZrc9;C|-IGJ|h}ql*5@W=xhK7Q1 zak;A_*_IM{-#rHyxo^-|rg!SbdkrRP<2W8CCv`Fe?_OEGUedNMg{&XX(0|YNL1@k%u;%rc!`@XR1$vSjD*t^}&Uc7; z(OKFk%|N81zpO9nj2bbMO2>|A5$A_@n`8#Z4E_ZDxc(U;O?T%7*Ss*M%c(p@M~zDBhC4XC#_iTfsfX_-}@C9a6VjYU$ixrvX@MI$N;Y=fAp z!nRMN4nmE^9yc5;N75L-cz!zz+&}6|TbQ->G%%Em`YQ$hC`;|PMGZHb52R_;+X9aw zOExE!VhsHT3X{O`NM;^Kl-uy2J0y^!WxArPx_wm-QXJyNC!4}nIT)2)j?}H1 zvYAKQ=wBD_ftSssF zqeNK$U|NVyo_~b?k%k4L{$i!6K{!^JnvF#XIL4iv{8$Ot8$ndI%D?}$Qmvoy)y1oKj_5^C=r9KXKCZB6;mLXTAL!ysu6+9)fdHi!CLdsCs3)t8T#uZ#M`IHu12J3;(A}!+7DFP!q!>Lgx)hH%aIY+1BN5N~Le2=f76*4^EaAThZZp4yE(^Q)=E<5U z=!gD>gRVFg(w_B2WJ&R|C==roYbFU|wwrL=G6>u`uhl_BvLw@c3!(Z+{M%Ja%jv{g ziXGp8)~W4!QGfPNkhKAMbh7^;xE#AWd_EwRL4!=Y*mzxdSmnW}b2|cj_+Ybhj%<^@kz*G`tbgl z^;=9U>TSVia$j@9{{RXQSCf7I1`iz5KC2y{sX2t;tc&;7!iR|ggl#|V)v9nAOSP1< z9nD(GorJT%c{q78JkVKh#^lFKjt<#aX15)FLGeS@^7)L#+CZz@HCjPu;I|h2e`_+HCgItG~WFSn(ik z>-`EeVH|h)dUy`9rH^v~SIBU{Z{38$l`rfP)wt_h{&_&(d3bD4!x8IgRZ}>W;p4Pd z$9e$0+))nt1&ur<>QSEW3z;vDXs6I#rSJ9_+@0Fls#MsTh_vl@c&FnVxxK(9V$Pn@ zvVC5HeFAgPLM(DI#X_&gZIU!~tIm_CEcEOd;b^vhm6Q5nRLzQQxbk?p!qE>x^2`{O zTTj28H%dEuOKm=l1%wWNpy-bYus~p@N&v$p>q(0E+<(q1Q%)z6Q6VT|tEU<$4K1_K zWrDpxDH^;xijYk%dB7iXNLygcw+wdou$`nYeKfqPH_&9o8p=Yop*_DPlt3I zf-ATS9vZ(l<~DPpu47=Xx^u^I{5^>&EtOQyl|Rx-sf9&_;(~=^UXWBv=)50s@fTJ zY=HQgWABn)uasxeF@(tixU}XWmcV8k&SYE8_l}v3&a8*4lR-5M?o*~gW_2EHUW#le&xkHM1V7Egh!e<5B{RDwEj3x?5>AFM6Eq`ShGY?XSs(88fnL@B99g z7K88;0cGHtqyG;x>6?K(O; z?|^rh!f)Wa$eu{|T-kteJ=I^Js?8~t;~z~_nm08GUd7r7=h+4I%0GAsN!zjSqk8!O zK)i_Rqk@3nzjbhI=|Ass7*3p<>;TYpu#H}P$8kOa#_(4W(7k@<1%U)*~I z0HFldFL>v$*o>)poBwQ}CXo6)1g6pY$Pi-V1xqR*+RQP4|L>E%zVRw1g0-;j-LfKr zg>n?)+Z&rdN(Slw;-uW&zD-6@pJhoWAe<0aENU0}PU!p|J%XpR+4p@Go6C6!_)nJ0 zSe4I{+407yy%auL|4X18mGxImCijTXbaVun^fUj$SdO@6)tG>n2DBs;%h?y=<;|>i zF9j~xQgP1Jb_0_>`sbXiJy5gbxjn&tW;dHaE9qA4a$$n(s69}(TmIiHsNz&bNPBK4 z5fr)mG>iFVbB)$3e5PRD_Op8aE6F~}a$8rb_p=F$;-f5&*|_?*{P_^u{Z=Em##0Tk zSldECkW{{U@wxjf@-Lgv)$=di{^5TC)gm$AiHYp>7OG)7o&`>T1EUq%RnWPkIN`Gf&@u40o!NE_%IvMt=xt zGjyea%(`I+_tNd;j!jRWaD^=D8Jme1gO=O-YQQN5m)~tya0Vj*;8{c7dl>Vx$1@IP z!06_W-kyE4Le*Kg;r!XC0iw%w3~|U9uwt@~$rIGnPxLC~uU3sDk{GQJF?XR{xm7#) z{X|^A)5qwd&gi}P(4jFV&T=TPVpW8e;}=|=VS z?*Ou&>;*#NM@f^`Q>KKq40#i0;hpT~;)ahNO4`byX>Hs(nHYdcTqaqrg6Y4$pFY== zZ+hyJG(E5M^9Gr)B6t1LvdfTgx3nJO{mAU5^oglHD9sRXA7L=x$A9r%9GJU8Vkt&# zNzYktyW;X1py{nxH2?WTX+1=ki65=BU*0&n9%A;AVZ}e^Ld07Uy+V9scK+>paAc}i zu3y^4gLp)6YRvl;7KvvIhsu9T0{QXU^J&}FXw+;;M|u&omt6H$U#f4C?ds-U0_9cI zGI$=-sJylyc1&?2|0xB0Elm(r`@tqUx(I8Jq!ukM%f_J8MmMPt1-9 zX^=q-9-)Cx*359PQT)UevrJCe{fUTLPc_y;LUP7OApxh1E-&_zP?0i(9F33Hr&+%u z2Ys^WLU3Sw{FCjaNOV z?gnL^4tGYU#$+-Hp)X8Wg%VlsvTI5|LR1}|zI$6Qg}|-kPGwwB={BA6S<4gddC-Xo ztIOM?@TP^)0Z|&S>6X)PWUZjSFSSRYwS~jDe{tAverC^lqAhu5PU(D&P=Yrahd=lY zAg}MZztp}b;8Ct8sXcOOLY37E8qcZ;BMfwO(A6N})g6a@!pvzW^yt~-v)*YsR~p;T z)w+f)qF^a@C|L|hD>|$Set0_?qC;h=|1L;Ab<%DXRz-0g$+Xn9OR*r?APp+$)Hpw8 zR56jYK#X!@YwphQ_1*`*d)y@K)n7-wZ~4_(aTKbBfJaEab=QI9DB6E=ui5qmE?)Kx z^N0exPJTHuQ8aq%tm*uZ4=7q2RB|>L=>}5NGKo_?!qcr!Uug%zcw!H$?kcj~? z_C%^izYaY}UQY*ysLPn+wpz=5R=tJ5z7Sn1%d?!1X7XOuNuU~H7*I88W4Yi{b?x=l zXN+4M0Pu$l;)qTcg~pqdF4vGnFP0L0J_CgQn1dpQ;~T!FuI`lWg%>-+faai2fWru) zC~(eD(vl|TAWjo@edcA8aMt?Vyt@^K3tbsohp|{p2-Fnp;X#Mv9u}FprPoz|{!zYa z!-&(v4m4p5J~Rk#^%?tpXweRlRY&xHvq?mJKtfziX(oDjPr@U_4G_v{mDA5W;Rzs57uyQ{;Wd`F35}|FdNy_N zL%I!3eF}*aiXN%c%9sH0*8{cri+kQnWYT2UVlqGi$p@LdB#fA;1kjn5eQ|XnsD@b` z%rhad{)V1)?ISJHc_!Ccwe4u^EkOEbFys^9l@rW#U8R~q0Jur|2>^vRuHEzG^k+V$ z6-ELYu5oM^R9taFuXAc~_Br(3tvlAa1142JG2bd@ajfuHa)KS(6@<;`PwDzIse?yy zDC<>ipGlt7e&&0afEed9A=`SBQG5M9p-%WT1zdYNL8`|!)9QQ)?5i|`0he>l>?sD; zq#sEAB$iqSpXwo^iL?>^589s61l+}xNl|K|+Z2Ftnx6q3nX+1ld1pPu{C)hkCpB9o zHCc>VXDx0+D*aUK->vv@NQ^AG7_hhKic%W*3t-!kBMcq><_Y9T1XuM+f%95~;X<9T zH^~7Z430P1@$>1web3ksLVCts!`Cons=qI4ivN2=xLNxqCI0>De^@{DSpNrg za9j_dI)kyagGoN+7+aXN+wS^WIjw1l+cDp`>;J>kjl5r>b-Gxlj(xwyqrciO4T-oQ9p(wUGKYuqaBh!wy1XRB-F0SuZ@E` z)nRW`H$M(1a*%@AMj-QBKH^i)Q}}{Z@@qceq0w$~&r?QM1&P>*{2<#e0$RGp#*V=yq#r{m|Uc8;ILeXipl!(XrWWu)5kjh z{r8KIYdgNmR?kPpLz?O3UbUp>OWZ~WzITSKY#Tn%VcjX~O^Z-U3;&&9POQ>b>b!bi z$ikY-xZQ;53sgi(ci=4K(I!%EZAmA0q}Gn!PfEl3b;`xiMQ)@8J9ofPEnq>cWR?4k z+Ir$tjEq;&F$)TZB=bmf(g7SO&YxZL>2{zYanzo{vz|0eZCsUS(gXIJ5%(i-vMM%Y z`@=Qt3n*W5ems9TtTN~#;g!xe9)9ypiO@&NnQ{Rm#$KVJ=H<7es}fp_daJtOvAR;6 zD4xI%uW$WQJ$JfMoHF=@s)^G(|7)%mV@YyX&-dtVQD>sOB#p~sh}ZNi-%On!xuc3w z)=0yi`ZpMvEGOiT9cCRRc&9!py{chEw)7UB*1Q6SM`9*2jYuJLQAA~NyhCHRttSIg z1;ZFfeyE-8-ZX+<9T7jz`#He(Uq9H6Hbe7T0~Nc{RsC>ZgaPU*I?6?gmcjoAZdt{K literal 0 HcmV?d00001 diff --git a/doc/doxygen/chapters/images/starpu_non_linear_memset_regression_based_energy.png b/doc/doxygen/chapters/images/starpu_non_linear_memset_regression_based_energy.png new file mode 100644 index 0000000000000000000000000000000000000000..2cc02492522594ab67efb46f3b814a18b26c165c GIT binary patch literal 9366 zcma)i1yoeszv!7^0Edu90g;jtNok~{yQPutl133mK)OK?K|o|cy1NymL0TF?N@C~~ z=FY%(@4Mf6>;KkU>&$S@-uu_hoN#qjIeZ*S8~_0D738Hg0RRjG0MOzs4Acsk;L!x? zK}=m)TLy_lqE@o9ve19p004$;1CYSZPMZYK-3>_E{X(|U0x*d-7?R5r76||nFaQZd z1|Y}a4>wur6g9vS^&LPTNzW0fd;J_qLE(yG1po#@2R8vUi%4K_ z7)hFl3{c;N!H_b*Im~S%`Q`3vRCBFET?p>c(QFos76}J-0(X*t(YdU-ICTjf2Tm9% z03#)ZNuVwm%(M+=iiDy0mq2ErE>mQdE3&N(iINA|Hc*?fi`>ns8E8kg544r}Atkbq zv@Ifu98lW#I0vG?_ty^DRhBav9QF3GD6IlQUD!nv$vK#W0*y z?m*gG|LIyv_V7z+l^g6bog5QkYtm@#%JA-Cel8j$tU_|XvE8v#EA}$XQjP!T9>@3# z1iw`!4bj=&pZRvgx-dt8fbj<)jpK=9s1xWo|D@@>S?TyeraPpLCO(3u6mM<}f603x z2gN{D7}2J znn&EVmJh5;B$*Ig&2Kg?M_i}uIcdq67NGNka(vmU_H&OL3`5U!#fyB+Re$>ho~?RV zH%>XKn))}=u(|rTfd1s_J^xlHQrj~xgM89nJR!zcF|y4}LbMUAp6&8;J(k(aWp<%& zBUf^)ec%CvMN#U^Z1wGnr2}mHRkgVYwVhRy0R92Cm|&1``H9iG?_%^T z)#~=GNieVBHbrjzDv_>7dYl}ek%_Md$CmTQA#NO<&Wv>up*w`+ALn=cqcnO(uIAk` z4y^I~@JpRt5*s2*`mVZk1}QSJIJAqdc9M4b1Np{4@iZ=LS0;mhyJ*Ff1^U7FR+#Mt zwQ!BaIlI*+e8E+r+|E}Cvc_1{>1O)+5Ej+plXf9$9b3?!;=v7rTtvN=k?L2>GYi)k zkD7w+az3@O@U7dI(4yvfv_cAFt9PF0lC>>4czC`_1Lr*x+p%;h-Bn03V;3NL0kOr` zk%$5W5QvrjOiPX)IkA9rjft;I9Qkk0Fsk=ssvAgctUrr6+Wp3BpcE1OR#5>_m}^)} z*Lu*#^n{vfraojdMI7Iq=S891uQzKA^TKka8Z!)PV=ufyuOR0xEc^^q|P^h(}sNUZ_1dwKHt8YB9_{t?&yei zvMf=@@YjXnh<1wPcj~ZqA#aH7;E&xFid<39%MvQkAYYa(Zn2NKd673uEG3(Sdz09k zOHss}n@O^8(58bAO5#J~L!*rBi`LTzioC|@B{*gH*TnKjI-j$-^x8vh8@^|zzy?wH zmx;?#5wB|L+lGzG$MZB)7~RVlB!)a0B+P{CUtP?<7kP{4u0+Ksu}1Mm@Xckj0r(F| z&7X-!H&RhTIJ!;(_UuY(Jn);=OS~aIg~YC2ZeA{*@xRnMR)L~{lWEvsdtWc>eJ>!LaFf|5DoJWaTunkVnm0T-MqroOgT*X(@1`jq{db+HLbfwa1 z#{lu4hvyveHJrPpH92(A=!)3*eJEs~BBK~LdpfT*T6(uPmf4VX^CYD$7yG59gSyz! z!0?NX;^FbHuGKUgF={5IVLtRV4Un(rIWs~KNy7iRIzGoM^qa%Fd{wO`E68urXG*s8 zOze9Z%|EDc(8qLm#U~QMD?6GG31`~lqraqwik6cf% zQ(Ao-zkh@O=)Y3pUEX?Fxh;~NlANhC;A5Na;&r!%z!pb5j0uzgRH`HLKJ4)_!d*g^ z#2zJ!|9#-i)fg0;nM#%)sFb*@S*oadmT-AaG`DYHzBK-FigzP-&iU;7I6p^)gBLI9 zU}qZQiaW8c|3_LatH9P)XO!o3*ag0aVag*=LC;Fa_DN^MR*LF)5m}Jm3KDWV^y`WT z)jW5QYjJArz{vEK&v9(K0YT$viU~$G@Hk?{|3mQM!&|#mDYOKkUv20xyAJzAJ_My6 znU8ZcfoC@xPf7#xG`g+0+zG0_&Zl{f76vBf@{fa)NZvatt2c;9Z8XrlbO!2&KRUO) z##tH2_;ZiqNq&Nw>Oin!v`A-u09cfzr$#mURE11a`{q!Q?lRMBx!9>S~-l6lnv?X8v`;K`Sz zqi*q&L~2BwSZf3q+c$1HGru zGi#Vw=01M%n2Uwue`$H!kt#ce_f}4)Oi1%d$e!GEXW+qIYm0N8$BYFIDo$+>w;9;W z`ESws0=k%WBGvgEhSF-@7XN6E%RTsmuVRXw^nk^ZJWN$>CV;RQr_yeozhYaI}dI#_1bqs&&Ca@+U=5KkqTAkO^VTZ*JQG*mSe#!3W``2+R!5)T;%+py*r9s zIHTqL5Ee?st^!=R#oGxUyn!R)u7CTn4_1$08M60=M#_^3-u-)*w0@$ znYj%cZqdABf|-c}w|KXTi58fJx)5i{iKNXIqd|H@Pe+1`cDnLYiR$QF=3WG{HYMh0 zcuQ}6Lyxmf5BG-=rSH4Fpx>NfO9UUOm6~_L$(=&hG)MXPj1;c|S+4r0$`3cc+YVd% zaCnYp-JcC7BkEfD6e>T!b7sVE;n4Fd0phFYV5g^?(XWB6fG# z6JhUG+TL%|Bw1<7p6y$DE(Zw6mLHAjFgdI~Cn1v(p6~DGR<2E#b5QTt5}5d>^Vxpc zF=Rc)qHaUa)4D6--ElIq)3C3yUjT#L36!iPd{j^w&ZxwZ997n{RzYFd=WliYw^=dV ztmiTws0J1CiA+S4%_FL#DZJT;Y+4wKY?aE4{v0`Y7k_D$j$S7-P!dbg0yhi^XFn-o z!B?`1cETx}-_k>9d`*pWW6#9M&SpQ3wDRiTuJv+hTLmaCDmB3wq9PLt&8DS)r6Q_9Kj4-% za{R|`xNi76A7fUA>TvF&+<;iH_vb^_Hp4s%%*R(?S*jm!(%U>Wia5K28+yN@Mq$Gt z?i=YIMJg)!qH3vVYv4-=Pq?FWm^&*M$^XqAN=qFnYUNdQ%0}}edtle7L5zBnyV9cA zZ{?Oj6-Bxdu%Q==hC2nJ?#z+UN{dTYu~r?qqICp~UKOS(2PnrP{Bp&nDO3pSgD!6c6bpjYaZ$Duzqed2C;wJ+{Bjv%b&!)<+ zG+AFz=^p7mgY6`OXZD5+en>zG8K@&=oX~L0DX>jU(|X%RHJq&lqlrL}BJA#lUeWh0 zy)_(HseqO2Z86ref%zg#cP184V2;H{9+77QCcDX35waKI;Qru|@`)*1fQQ(W8x{Jo z1K_d};D8+Ogp?L;v0YZ&Dh&QxLGqQ3q&hryY70ghb2y?}bIo5k!puBX4Ivrfj9V8d7*;ivC|z=Yh> zaogaf$1x90JM>U7>esDthK&x~FajYIJKeTY`|eIgzwzRPi!y7;%y)FJYt)O-HkcB1mDmL2z^PTe~f8IAn{0 zC?B=NGlAOc`K^OmLh&chs2dP=b5jcHzQHA5bHiV}Y|B44(q8|K{jJ(z=v}978>tTN zOzpNXw21PH7?<4LXvyv!Xkg-9t3pa_?1ul_XGP!;bk^=5%KUS~4z`~GND9rWdFId& z3K>dsufJnW3!H3^6aYKIM%;ZKqy_HBhQ(RNDN}sW+1N7JQqly0^h9h&R%Q5ZLbz!DYUDwKI z^{GlpTBUcrRIkc#J_es=;Lnd2E)KmI(RhrA3NrMuIZEfzOB(Lx<3Urt6 zB96Od7^gk#ofcKT%@woa!j0+eH5CKk*`nRTRpWdcf;_AejW?KOA~s5602_H&YzRc& zNI~fG9eLK=B9zMxO_mSmPpK+%upP+%C-hgG3l3qjJS@{o0!OyN;r$60{MY+(-*dc94a_LzseJ-&w4HI`lrYNcQlf{miPmQ) z&nK1{pabn~!MSBucduRi0piZ9$9pcn3@_=>dU?8+j1=-KMrl~82KN@z!^M-C(S?(L zTnLk36U z+Wp5yZcJnH4(&-2DgGIgjn@G&#dYaqf^b#Jzk#_Fu0HHSJLE{Lmc})KjoSSDYqKAI zh7e@1qx$iCzke8RJn*Awvz0oI@=rQxEV+)}kyv8*b7(|zK>qOk5BZrNex0@v#{n|wlb4S4F8r`sN~ zQDnlkRQYLvp#UZWWeFf}l2Wd)#wFuqH&5kxvxRU3&9NZk=<`t+;Y8VYQ$KZAyB+U_CKRaV7?;+_knPl>?Xd|;BN;;8C+O~3Ybo-2Gs5$4r5h>mn^4hNuO?4e$zsL)rI=X&-HrdH_CW*5Qr6JH|vp@fbkKZ%2Y3I04`78gAz z&^`Z(xX@Ho(yG??33Ru{=qHFM2db{q6l7aoOk-wATIvIyHAI1*-ao?{F(AGQKUXRW z6xkzu`TnBNu+rVpR;n67xB){ zR8*SI^&%g1*c7z}dsu_slW%+7n~@#tFHemsJEAqfjk_<6k^TH>Lj}qK_!q|J0`IG9 zX}ZZye)x+nghS@rj5+RotyOy7YgV6m9)gCX)=T#%VfPaF$@@FU?mez=>4Dy z@JxD~I4b*B7A0Q2p8grT(5&88xuU=!%pY(Y;$zs0!o}2h6UCR5Y%XS3WJsnGsgZ0^ zZ*8IFHC~VcY^`jcd=<*sE|VjQ{adSVtwi2%iUQNJanZLUHjDF1pcc)yD$RV1*J|DF zbw0O#CQRUZU6m3*cTo}K{p@y?nkrO&A?fg*I!@ad_V7Sk*Ks?yMi24FFJqw7V7|t_VIr)M3WN}l4MKS1F*BSejs9<_hMoQs*t1E$EK@ zdw?SoN*4?{6$utyRGJS&1;7>FFiG3v7MvYI;>Dre+Y`tgP0UDXiL=jn^=8h>Iju@f zx8%b;5w3ws+TdsOgtWmossAOAbyEUQ)>gy=Lx!(YbAxE9?JWos7GQBCOJA-WZ z-h2abp&E<1dvNiKSptQdoIJlC-aT;dxB3m5+V9H2rE9cZH#xq6?->k-c#(*gRG0KZ zg_f{~6NGMxHoiB#XKpK8kc4uUSu4eT*jsDumW+yqdS2;NAqsqO^2 zb(8fyba!#DONfV-^GyYZzjDLAE?ICx`UOb?NZ3CImHvVWwQtyqS)cm(o2TTeSEg~> zM_ih1aN>>2*8U@VeG_j4@So4E=qGz@VThamTFgF>xb4f)bOFUUs!6w_Be(ra+F%5#xoxQoL<(MiIDEQ(A_6hSK4xS8-WKP zF+x7v34CGNz^~v#-KOFUKp=*7V+28r_r}y+6WtRqc|KLWkWH{zx&yl->?YqM?`_WW z`r_{QqB=vWmX^NCBxf?W4LR=fZ_2P3!EMPZa$3=_k|=iMq2c?SP({}1$Ebu7=;|4n5urqibI>U0>>~7pau?@1$8*$Cn}!MP z#-bWgRA`b5l9injhfry^Ut9D3>3QQwA|8M))$DcG{h0ovWp|}Z$?udu;3A8+cKOdO z(9Pxs*6fW{pqZ2V*$^?y+V0kk5}{hSoPa``Yp7V5qm)mTGfd@-e6*=|}(NDS#B zyR4Wwng}s}0q{3%28NV}VFDgd%1fN?OhP*UeaTy<=ivWa5Rz&nQZ~cmgy)x@YMn!TYcc8_^;LHuXwb9qd3P<&Jy%Zr8x)&g&zB8)C^ z(0ppXxZPM(GJoY&SN|8?$7sG-0y{T{*E$}u&vVv$l+G6F+#EZ$|0(ZT-g&{8ay5sj zcfSsV-j%wLIhI9!oTkO3%B!98Ss^f*%!(WDqvN%~G@|g~sXF`1OR>z}4IYZAAaHEzlW&-h3oI?*UA=kd(plBJ{vViF7AwZ8c zuT|bW{ONzTQh6Q9z~74#=^MDd)Xn*m&4K%0B(C!aEAlElcfoKd;D{GZe?;f6){=$w z>Gm5`oV)p~dXR5JGYYQl?!nP=3>hl)WxuAzYrYM=Uv%B|X{QB*j5n*@boLJi#WS0w zp2wn}&RW+J{RK>hH~>0KpK1(dQ#Zrgz75xN!N{a^;H%1{7txc;H+igunb_!S_s-_g z?(HxDtx>WzoSp45hfggR%Ny4@$7qRCfOzf=uuwY55bDoxtu21l=t8SI-P^@$Co^RM z%_EfICwY2RGchMpRm7ZJ{JYT>ij5wO2JT9oo0~o4@rA^G&3c*)d(fl^ufCb`UlwJ= zoc-sUr{~;oDF;z()&&S zHK*vwhFUjj43Xn_IGlT+yY#mH%89b&rLtWL2V*7X$%M+&y;(Kf*lCzPUd3%EPezAl zOPKtq7R1#`*DR&vfp3;Y`7M#2DVsW0)Rex84S7E_RCV@L9!B`3;l!S^B&!U#eCH#% z2>r33GXkgj^nKR<^S~W9FLbWi@q8-yI}QBKr`hCX|F*!i9|t4D9ACdLo&}0VE@oYe zQoT+$qmPM!pZLHTL3TAjb4hznIFj+{P(3|iWJle|otO0FC(fx={WJ;bb=>4N`IUHK z6QTW~s(15!p>Y!kHoOXrms$LCaR~W>!^DAwwg}PqwJ%BYp;V8WBMJZt2h@mYk2Vft zHYM6C!oOglLVX5Pn(TU3*T6@kl-9=hzo5Ao{}VK74lDp&&z@++6*kh^pzFc-f6-ni z{*Q=nwxqPxl|BLZ|lm6EhT^DxvLkV=|`d@;*)Kxk#@Gq zTItrK{v(&s-5;o`)xpj}x-B{Jskp>&pmz{VCC7xhuJ6A*5;U;0xZGn+cLq+U1JRpo z-M6OG!R!vg+__rZzY{y)Q%Cw?Dj?iC-Po$;wcO#zeq{j#LG;1E?Qh3)va(mbgMn=% zixJBUuZf8lIiXywBwwolU17QoP7PUDk;D4~RBhXObQg4<4B4Zgd4igDqwY(SbYQQ8 z_!!|;sB+e3%M$aYQ}BiU!tdeU4?L|Al&|Jr6Hb0w6D5-y%0YDPk*K^cF~46qG2>Sm zKV(hKx*HyqDd2JbH!v~0aL=*6Hd|!6_AG^A(e5Iay>Z!sz9BY(X9M*=-4VK0hnD!M zcV_XX4R#fFuN;X!SnGA@Mmwhm)Z}K)z_xQ!W>kyrz~rpI&ARosP7BW9(NqcIW{*~h zarLaLhmu2c)0om~y2wA-g~<4guYbq1aadg)C)ACQv!?PqiBHwkyc%CUvTM+UF{x}~ zB(hYp%`N@(n6d2RZ>Q%PC=V;4oU7Y)+_wuLosoL1;o-axqw-ik#ovw3>{v3flJpwL zN1ZW-mo)?R!o@!NU4W^7Be0J$)Ii`@oI(fri=ewd9jmd#_#MQXr~7S;k9gkm7858h zHHBD4T8ug+vd=&w+jzU^8w#3VjNa=kdEUrAr%0A=`u!PYOqcbt!P`u8{_c;u32YJ; z2^?+R^gX;r`%8P~-`!Eg3&{+XzbXsE2fI6CQ4Rics5+i; zl~C;pxwXxHF(6(h&ojq0RW4c)rjp1$QDX0WPvC`eBYQ2ENTM3%`*~>iqXgy0B6Qct zKN4EI%v#Dhk2?3U_YeNo0$E#mVUn}v8Zyn)gp$YGHFWmsyer?*yG?O}z8rApX#_`- zjy;Zctc&R`ELD{@XS>c10m1+Ly^*hR$ZP|L6C^|M?*? zp+~vdB4J97v+!f1L*m8v&bl*xNb9aBbScKe*$cQoW(*U}3CVRf-7XTyugIsG zqUGU?2G*LFJ6_qSAxh*|E#&q-yssfr@ECaX)yruD6sSiCz6V#PEP=4-N=FxV<{U@N zDmUhN>q3qPWUUxR!sP0Gjv6B&!cSp;_)7Jq3!DUMw1)103YsTWmOR& z+=i@!_?W`jgMk61GDR3ZBF)SVlf^$GoO3H<-Z;MhsMK*6npUp0zzxs(Yi1t%1(MX6 zQeYWG>^e2t$QEMup_9q+{Py~`KKNcKJ_`;nW4bdOziumamMl`xwPqL*Kb1(_aodDb z@6b_h>eI@{1>6iaBby<=*$R5Xm0wN#gZx1orxA&_d3m00EsJ5N8@jk}j1TsyFm&d` z+I3j^h#DS;EpJ8SYzyfQfsRh+b5s@28UFS1$`a@=!JNoj@|npy38>d&00kLU>5`|W Gf&T^6;g~c4 literal 0 HcmV?d00001 diff --git a/doc/doxygen/chapters/images/starpu_power_non_linear_memset_regression_based.png b/doc/doxygen/chapters/images/starpu_power_non_linear_memset_regression_based.png new file mode 100644 index 0000000000000000000000000000000000000000..2fd8ae14e35f58968af1d01b20d73c6ebc7a0e53 GIT binary patch literal 6923 zcma)A2UL^al21Z_5Q;RVN)R`-81*jy)nAlcWB}4a1aPYt9Dme9|VE` zAP^V_BO`Gj7eppWgt)GzfeMjGByqB{v;VxXAP_*rf{38)Z7dqp+Y3^#UnF9AKmZyG z5D`{DEC_@KKtzBTLLAQ*7AC2paU&!>c6N3Y3Zj6U(RZ1VaLjNZg(aBQ|tZZf_H(KqPM<8`3j`FP2X%*7$F9EE}5}2?_`_ z7(KKJ`U8;&8X6@sClNz*2>?J;0UZM#>u>xC__+2)$EGOigM-;@fQOh5+78=(4H}!v zo=en48#>+qm_YzDGk_*F0IFDU_?Cj*`=FZB>T3A?+mzTG+v;<{(@=?FjB85%mcGuJs1fuKtbAyww z({X@6Y=LUZ3I@LEKQgGoN=q(*9$(%roLYQ)b|Z%z>i&^oyo6}^@HN@TaTSGkNl=oF z|8dQ0ia8uu!1d^EO(g$Rcz4`hro5f;+lEdWZ&ZpN`w}w>2&K zha>%UJmtY-fp48c{lM239tD>N^+m~krGxk__qYEe>y+kZ6y^)i$xe!S#P z8(nRQrJMNivS9Yw0@=6C^to-#0^Hc7QO*5b?i3l4j1RszHExE0%WQ)eV%XM2i$E6@|PuTXs=Eixyi zX(>>N@}tvsyp~-?OYNtOiLALa=&puQvh2hv#s3*e>J=U4GDxCL(@(q_|?NJ`^L*AEdvZ}8j&y=jeQ3{?i=YQl`|iN=FrGH>yc8XLmZeT>%U)hW)G>s~1x zKTU1T)uuE{Hjt*$9>dGA=#_>GGAjn>kXa*P`KT4K}7WaGSn2$$J>39KVp--)E5%qLpE=*R9!h<@kV_ilgijWv1WAWwF5eJkpaqmFpk zvhPfB_#mi!J;OK)>0=q4|E+uf0XvxbRNcXUDDIepiy`eDHv-Z z9M3R0Sc|T>=u4ReV^!m(Di%*8RM~O$um?8nG7|9mK3MDc7TL+XQ^q$x!}iD99dsIh zVpyp;zCNyyS_ak{^9`Y0Bf&MP1drh<`vsvQS}`h^_zvXFn9JaRRu|~&och-K5>4Oh zR;|`1WYR%UB-x3aDOm%X1NgHG3FSR9budP0j59jb5gqUpk&$YN zxXgUgE&eCrrxXRCTGFtgHX6fDAPvqBS}X76lR~atDTtZcp>(>mF|)r^%suks8J8E` zRqd8!&Vgv8t0VtO(!pLj%Lrb#_h{UnOQ5@@!nn%R%s_EEvuQr_ZQKCG+y%Q*6Ehb~ zs2|IABiAj(87{Tf%DgYF-B5&rVPzq>38>i&0>c2=2ax|v?sT03S6>#tZ&czIpr#j^ z^7%&3at#uw;#oiydGz+Kg&7`p~-nzHy!!h zdVgpn|4f!%c-H5rlnZtshds{;xEpjkm%kbYw!D`8sl18oLy*wE{RxNAe$CQeU!enC zNZ+|U#cEft^~Shbsgk3(wfbW&jr0VK^jFF?N4I`08~COG`AsYMjH4iXaaWZa&Tj-A zylZOo9M9cNo_tBYCH)PF+Pu?jhE6cjNh>toN=rfIo z(0nP`)eCM%_!_AM3}e5AGvyrx^RtoyXXnFtg!8hFEQy3Q!Z^$+bgU&%&M+b?O-N;B)~sEkcipY^FY(_JN#@Gl25ZivSWAACFz&GYd5KJNHpZSj+?Q`egc2{qb;Xe zVp~JT2wUHW9%j}}i2AB69OpOfpcS;cdd2$=HYw^+yw=HJ#Tnfw6>GPfNV2CTmMQ9y*0TeQ#czRJCIrbE zGFS+=OH#CKOTcqH)51!U@Z>rE>83~e*JgLTIJ>J~WdeezNGoILFGH) zCUU<^EwB(5PeQvcV}y=+)E@o`-s@K8Lof#~UgY6{l7o4L{xN|#`6%M<_4o)v#;U6Ld7d_;?FB(Sk|9bfUZSmmr z&adS7EHx8M@a`k75;AZ6C}6%Is0>OJPvLs6i-j^G9iPWurZHbN;D_gs4{YM|OEMxgp2u=Q%~v(~ zkuM97o%%L|j%hyBDa=P8gj+HyUg)zj)!X7E7hd&NH#K)15ZRRER~U3F9B|~p)m!1< z#_(xTb3^k}9?~bQX&lB-d8$N9ZH1FznNHkrd3)2o)p{UIbZ>Sk6GS2A5d(NB-#-1z z`Ws^kthZc}h2pA6msP}P1*`x)!QWTe27GR^NK1G<%XwP;-VBZPqbJbWkqw6dJ^)b5 zgF8T2PC{V5)YB4_8Zw$Vbn|${ zZx=T;h5<@#_AYA0&Xy2@`pK139R8~0PDZc|duV5HJHW^Q!E0?oTn;P^$fyQfh zH^)Bk=IX-MJIHV!zKh3-`v$)a^{@0X1cc9u+m(MUN{WhRt2&2+0;~fD2mFeo9N2%% zGlwVy0NKg--r-lnl}$(Cp<-NTz>~gJ?DWU`uw>1t648u5n4< zlTK``%|bIk(VZIvzAO{hE37wRpEeoZ8R)0yzC#JVe7YUc3#Q_eUo?V2)DczT>t7)N zKvD378Upd=v4SB+!T?@0ni>)XbXwT$BA41Q?yw^3_ZHzP3=psm1(?=Cb>(aMhaeF2 zrZ3w|xS!y=#muD+kTD<{TxiuT`ymJnb=u8JTb^99xBzMR#iqBS^+WJQRwx6>Fngy9 zs~$^kGoy^8e5I@V&T7I7bhzd9CS592=JVSZ_Q!sSE2a}>nP@vTZeJO?1gJDP=?1 zhl8;c9uF9qcG^&yu(0!MPH8uSm;cb*v zb_&C)*6Rcyh<>D&;nf(dIck_=Z3?Exq@4Aa9uI^Ljox`}s>Qzo>2S$D%Ao(s(2e3< zB_(dJYOfUPfDiX9fn!aAbF)r9?FGP3KN2hQd0S*#Bk&tmw7c3@Xq}PXEm`rMY3i=- z7nQ&n^bYTDDy6+j2snso^nx#~}gz) zpU}Gg`PS!ELdV6FOi#gxPe{Sv8%%tssZn=-l>ip1%*{nFjzIn_M_O_xukkGmkwqID z5ps6JOLB0?P_BQ@*m6R+@1-Ndj^jjH66H#)U58TPYy+(rF+8rWj=Z^x6y1K8hdo{NQj;g^g$M$ zP+Id)sAv3}Md zC>mh6c!ah+V>9O)%8X|~}q<1}6hc!Vv)#s+Ype#Gb3X=^Lk5K6t=ajE!~?bCxhuGX5ACs! zAaa{m0*e8rc=|4x-cc3-;loa#&(+d|^_Q7@^`69iLeJrQnQpXp*UK7Xv?pzoERr6^Dk!G|(#cN4CX|pd zAoRUyD~<&U1V2(FoRloHR{+4vnmgDmzjeb<_k@`{Sd*`ERU1?7Efjp&W}I_~j#Iw@ zldnpcDLPd*8gmxJY?6RichBdAkD-R!k(N72I<%7aJjiQ~S$MZvG=) zos$^#+8X>7dc^&Pv!y%*;PA@q(`)Rp6pnb-t)YhYBXV%w<0OD7T-P~6(qY6mBka!8 zY~qj;7`*)bZOD&abXLF9>mE%b$u#QqKzf9JTc#^Vo!=eIQ|)_F7pCH$?=TJvNKv*} zFMU`Q^a(h-D!~uY+}7t(&a6LrlS zw)Udf-sgJ_f zP9Q0R2j}^_yfKVv=f201$gIv!Mlh*}kIk|a%qp3i#0cM`I^JA+Mv-E<<9zMw3RQiI zZN;lC9~Bhm=DpMOshILk3uHpN?M0^>SEgv%;Ly0_d`y#k!p*#%j2LZQpUZ5$XlPt` z4BU0DQ`B^m^Tg4q)MxS$^pqyNwE@-MnQJ!j{*mH zvIAt-ni??Q67F=rPvc%5f{g`fB1kln}{Fo!k|1XE^L3v1VQ7HE{X+Tv~nSN=w#t9&<9)=VOX8BOe%L$AOnfm!%Fp`3f+14~HZ6a{E*! z4{qIj+L42i#Cjj;qaD7-2=0_(U8F^@g2VP+u@mN(a5Dd**6w{Q(Zuk-l;PGgJkW?; zfS&bV6zBg&>HN>af3a8+2GjYVVD&FB{M(iN|F%o`??6&GULL5U(QBUd45lxDD(nYU zW(e3i#xb5WGxG6>xRo#T>>#k`WNh=wFjx2}QZd57&~*E$!o1q_s^W5B z^^H1aExQ+?t~sPFr$&9{o~BJP-!~@Hm73{?meur^Rh|k-FR;DcsrWcfXMCc4Dra*d zM)j?VkHxS-z8@8M%jvIRSqI@jrE#zIJhGYLzsl1bv8l&40yj;IKvRmf_zic<;>S!W zKic;8tq{_Vk4$)n;dfQOZ$eQFK~yg(KY?xy@yQ@lSukXHLf14% zj63(M?o+Ngvl5y)zxC#ic3zVWRr+Tbb=gV{RlVTFpVk_?TzX84X1P~2B-ohu0#k|| zo+LiHC5ZE1+hX&Tt&`Mu=sveSLFSE`etYkg1-SkUX8z71$KAlZ!ywMkA>dHs6@cEv zr+h9gR!SZw%~Og=sj;69Mm)BAc9_zNaVe-^Wug`>Zw)9zv_$D z-FAwazYKye;v0X;8zfN6hVw&Q93$d#+q!*jD$3**bSH=_G6tX>1Ewa4y0T*XG%sLp zi&{50{QD^NIu<>xKkt_6B`u!oq4GCr3?3md{q6?=azgtwbW}~%{Qj)pIpkOxI6K+J z*|xdG@A;qn6sPSM&iXH%4vc0I|6`nm$%3dAvc^s?Oe`yc|MN(X^u~FP+vZ+IMYM0l zKW5o3oCzArO%FO$FyA+Twk6MA?$tZU#HX@UN$-z~p(#n*;5xuwEGx1of_?kCd~dxG zR+^>Wqhc<1EY$}78xhIL%%?9ak;vItfg(3ilrmT~)xX(s?ej*PCQ@qX3vAj+)KO2$ zO!z1~SIw+H`uz;6l#Q746%jn14K#8WtdXxKTnB7~%Pcn_*ULSw6wK*2b6+_$RlUql zNt@!Z+b?)=k@hb5K|0 z{Znml%0L(AhUw~N57qRA&+ zi4*Zo$bx=Xf1^tl=pA-lBfI5JQ`%)63H z2%109NUAvDd3j*{v)Jh&a*U9iYCFpo`gk@Ni{B_OZhvkorIoDdoMxaMWGpauDRn4V qmpPffg}bV~(|A*J_rgC%#G+50CH8z6?p$>Gho`2Ztz4mK74{z@*Nlb$ literal 0 HcmV?d00001 diff --git a/doc/doxygen/chapters/images/starpu_starpu_slu_lu_model_11.png b/doc/doxygen/chapters/images/starpu_starpu_slu_lu_model_11.png new file mode 100644 index 0000000000000000000000000000000000000000..0a8ba89a492c7354798fd859159f0fc9ee503eb4 GIT binary patch literal 7629 zcmZvB2|QHo`}f#Vgp^&QCrV{$ND-2&RY#V)h|*tNqYphuU%qtBw5+TwJkV=-s;{H&Bg8_XP|eNF@7}%Ra=GH- z;*TCZ!eB5&BGJUeBt1R7y}f;IZjMT&YH4ZN*w}pd@Ig^gv8$_#pPyeyNNCTVJ#aXj zYh%NO?m|LbU0p6D!Tn&vwbAA3!ntq>t_1W9@gWW*!pFyzkl=D*Ft`mV-~&m>rbI-y zcaKdC494Z*UEi%d%@3cH3&_{C8H3q;fN^2E9}*`gCt)9Kls|;?iY)L5uD}8k1>w^l zNC7q^_y^s@fB>$t?u@7?S5UWILU&L=_nQ!WMp>CF8j!)myTLE_iyIICgOBk_%qrV- zUkVsh4k!_{`3MsP`2<(@=nl%saR&zn1qB5!UAiRYg4ux5vE%M}`#=~>c=zVT(?gB; z0}%y-E?AffQU&>hcJbb+{)-<5+k4@H$=PcmL$g_)sk!HaW2X4@4(-C93Hj_W`0|-g zftT`2>dW!DnQy(f!e^%0rQu%)pFP<`#%ZJ1XR5>NgHlpIMrZE&Wxo*ebmk~IUX_Hu;G9?G#>@zC zzhDkb5&e1kf3{PW)Q=Wr6kyrG$-~)g%0E*MjqEkc(J6QrGdd$RjeJ5bz(S0x-kF((k+w)whz{ivujc*J~laPFdk zji!C#?~5yE>1nn-eT|poQq>0oFgJ#FxY}BtbI{WYPq9An(Dr+em2#47Op>deSZ;S_ zR?i2bRL|U{K0n=zpXfgSIm@X1DqL021&(ygk!*+GLA3}g`>>sk;zFtJy9Ji=ds?_- z?gtO3#Wh_$sH8)6E;^)&!WUHBIM@0*u*~y(-Gr4{*=>323#QlHj;T^bkb@78P=r3j zUisB=8rwV9E`q*x?-}{2x|Ag!D7uU0c^D@cI{Pm2CTUm7=QGxx<%*Q@d@~+e;X{ot z)P4?Xdd|zhUenQny_5hZhgVNio=R4{b&vq~-?!hkt(FIZduB2l4lkni0iGC>t*#I~?Uma_^d!fbf>qP-<$?m~oMgHHc!LbGuST(^T&Icl`AjC4gPnZFZXXV8c2Vljm}3{z+lX6}|agx+=i4%m|5-^Ic_` zbb0=#LW{<|abLf-7+)QFA91B5|GwW6gAgivy#E&K6BgkMPb+G$#IJn(`rY(59L`2eM5O`{uib*5MG?)a1tf=eQ&Y8A0y}v z40}$6cw6S6THMHs@HLCTK{@y}5XBFqHepbCVRvk3*j}gd&cpkz%KY!StjG^Qv%5!z zw#Wk%>DXp_Jv0nOhv%Dp>2W`I)PB z@1^#69p4i9Lv>^=_!4-hx+S&9u%&otA!tus<1e+t-sVmcntnLz0KgmxaE;AAHb)EwA5t<*YjKPy&4`wX!I!xOrJh_6=Q?E?5tV+9=+Qt2$feB6UJAE*;;4;HEbx%^AG&Xq|ejxW-_#;Sn z-j?o?7dd~3*tvz6suOY&ci}IRkfyW z^UM$jR>;qzc#X2qB(L7#Nk4XnP+q68?&$X4!oazU)kLp1Z2MyzjORSl+krK_ilR*h zzSe1Xc4?}(d^mCK+m3T|VW7FyJciKj`CqXg-%kizXG&vfLse~f5EXsVFvIBUcVmlY z1-)6LaS~?tMEvc)$~9Inr`0Rn4)i9v)8f3hoW_IiqJ^^54(;8~GI;rT0e7hP!P+si zg`x{9#E`iKQdL|0kfh2wVmOG8rbTz0y7A+tRMpb$lzKwuO{|^AtH38wOf*~4_qIuj zM^lCCW43dSf9F|Iz>Xvx$y{O^*m$Ko?+JRT)zWg3kM?{#{6pVoIjpMeZJbtTO0?B+ zUTnj1^TpM-DYI9<-QE%HXS+YKRmIvCp5|ZvAsAiOe)?kg=r0wa1|o*sTw AK_^6 za{Jz=HiymJqmST4+fRnK<(Y6qYUtraF!*rqp)$1wyBg9^D)>$^fKYwRtv^!M{e)BT z#$wTL7@)L_3(re&15tj#R;m-lg$q(NzGI!X7jGUhOc|Vdy>8x^h+FXI zb`^>sK=l7k{jY>$+Vi_#87~8~dg^gPFw#)f( zeca;svCcZqPH;e1#-hM2(eVDwZL+fN^l%Gorga;`ru8qS@ts)0V?^}>g&3~PQiuC1 zH>Tw<8GfuYD6IDgCw$u5ck)EHk+l zoD}qY(2rX6ZvzluFbm$9>Tes%`iRbn7Nt4P*7(=Nidd!dR0W!6{`?l zR2rzTm})%NxAeBe)O zK=&k+7$}Yx_Ady~tRE$m{7yJ^pXr&ge|$y`Xy5~lA^}n=l*Il)P}GGUFK2*c2_O>? z;G6j0SMDP~uRB<2XokojS*9{H)lpS}oienN^PkbNOQwE{f47GF`YT+{VzOWWJ)SLq zdqfI^T3=Ot(e{u3JqL$aD6?pNrl z?x+*NUWs;r*tn&`(E6YW$c<)xSwV#7Ag7xmAPaCkfK80}EWP>?IU@+j&|Js=Wluuv zwvP?nMTCrdhLHH?GybUpDc*0{sP&@ih$xY>D)$jmwPt0bY>=X57pBXpG{& zB*97W>*&Ox5Sjrj~bJg^O{IXI|j5DPyViMVkC%f@|APh|4_lY{RX3C!gS-V=vFMRmT4*3#udMj(iA(DT)P^d+(nN+ejK=tWcB|5h1eYxAmH)KOYX$l zPsdNF2_6wDCd7jUkD;NVBo4^a%0zL3$OOM4HZF_@8qyCQt=$75P%TI6GG|FntcfDa za#09}Zu4qAQbi}`hnIN8)8ZsX0@JI&oXN&%0)D@ocR=a4NMr`9LI9vIcf21AQ-c0m zV!=leGSuxLXzi@W*A`VE+KCf@l_L)XqcG49M_B!1=%w#DXmPMb_jwcfAs0xXEjD)c zIfQb5A9yr%bGY3&KDQ3@1xzUql0U{r^T%V`jI6@yF<(d(C%wv@{#)mpYy3uvtG@r)Xc*kGkhsq#_ z%U2m>!C0bD7N?3J9Mlzar}&h7|oXW{DYh?PENH3dq%v@1J^`fsE(qwAr`ikRgI+RR(WfnY< zy-s<#$+NDQI(eTNQu=4s51MF%>@a8b%{a1?8kU!4DM-kH6ZM?wQSC$oI09tbo*;B> zB4b1$Q$pgp2iaNn?3;{-3ZSU6hc)5RJqc*dipxD+bi-Fx3o0$Lyn7zaV>tLKHa{kZ zwIt&B;1myS|1Z;Y7k|7zC9a5&$10Qj=nDgk{rCC~g4qhRG$;N1f$g^ziF?M0TJ`jB=jPFa&Q;$lXHS7cPggbk6^aWd?_|6QY$$G!eI?k%>OH}#0mkq4 z`crgSlHad^PlW+;AL{wk@<=J$fDg!?HmPC@(C{Zk5i5+bcZp10U%9pAxp(rQi_34f z(a835Jb(ez7&d4-dkc#RP|JVc<-y9HAii)JxT!RN$oy)2wgO%e*DLxel8F|ScNxgs zcN%weuG)V*oCjOaxY-~=p{-N>PD|z=C9*3;Xq=Q%>c)m0((f^N`mz7`?=JPW&p1~9 z*)GG44W=P3M9yk>-~j3%t99_Tr7T?^E7!(0+|Bl_N)iD0$n$Gf z=AWeSWKCHb4jhX1k;ldsF11~KTe6O5zKzv?=$lQ=$FXW!(u_MA^OJ8bf(}U^ci!R! zqO@z;x=!?_Cf1`p>dD$ZuAR7EJ{r%Oh!r}JnfMhV+Q}iU=BIwp^{>|e1D$(yMaPNz zXrX@Gl|1BGt%MUd4wtWu$%zfJE12tnwr5;KYd>gVXzEWjhR(o~L)u|ZT9wOUj!<1Zrk>JpMe{Np$r$k^I zvbyfX(!&jsIr)ZP{xp>LuF+}9?ciJk=BLSk(>C8gRlSW1p{!Ya{w$dbT2WG$A8uf+8T^!XJi^>lrSq5*>7 zOQc#|#RI~WOcEUGSLVej7<_|D!uL(Ebq-lKzc!(Vd)5F74!68H6g~RyZC`^Q-pF_+ z>^L$xiY9qt9eevF0Uprha@*|kcI-X@(3YJgTi!@M0oBrtmSFxb;|UZ8V;%CN_PQW8 z4v{yz{N76m4R;XiL={girz;6z#g2V>{W=MMva`}cV^04spK`p?oumQqSo7cwH6|uA z_xEWbR#APbZs*aw{C)=bEnWZ|7`>y})nHmW{AKaIfeSguCp&f*76D^=mCae-EA@B8 zGu!sMPg!FfLsnGEyV_i469zZ+^)L1&kB!gg-;+idP(mf>j>(9?S6S8+GHrdsFa_sm z4`rg2V^F`>eO-CF%C*RryMi<q0dVMqB){is}AN* zY>G5_^edRe{NlSYHNJ{1x&<{JUGK%ZYX3Ct%Qn0Lne9#UDbansbkG;tek7mtZX5)g z&?gN6(?l`a%syxaDJC#&*l_au!Sv}SFeNBEQj2xamnezVZLJgqfk%+bngH~2YxJSV zKcdu0lc6;;6(Vjmdz)N8K`19sC%HVPcb`yvmNFI*xc~!(#?~gop;vYx0(`)1TG~#u zYe>b?Mj|PEk&ZRz?a1|^^u11ENXr1^c-xUWPJilV1L!0OZ6@zD`n=-XY7apdr?-sz z2!DB~iQ04&GSdW|-a(N-?IRk7dVDhXPeUX zJv1k_LJD+Gx%1X^Zwx9Omyz}*#|4+)a( z>bFvSEcQZCbqgYC9}kv3@7pE6?H^{*U+Ab|sVGEolo?2x7&21_T-7X(LvLY8s0!xz zLDCl}J5kyHzFHqqf`SxIH6Vf#r5w0Z?0Cj9KeMUlZk}nME=c?lW}-jhj=il>EFR$ePXHBLclZ z#lx;|XTeVauO>*97CUItG&?JDWFUlhPg|NS$e4lf*@T9*)zCwD!XsPycpzZ;0lU99 zYsVie4nl>3*t!+-@zT!5YiiZ74$`EXIZles@J$hgzoluAY$Wq8RFRv|^U~TNDOMAd z=FL>ng=?VS`*oNf5Ipr;l9Nr4^|AlUcoX^zUEezlD&0*<&iu{xo&RIoXw&>(EVyg!rw*(au6oNs=(Ilt~P1-CyMm*vcsv?rmx1HUDu6%gF5px~d z%R>=&Miqx8L%W_;h63dI36gXe)F~UNNg;veP#6+dgt!iMvWie$LRD&PGO<|;P#HnZ z+1B5Gd7IlWmTARfCU&GYk#&>W^7=U7tFtXYyOWxTVQx$Ilj#wuzCs{oiwRp~b>oXW z)wa%rkg92pq{7d7{*^a^jQOy*hLlpvi+Jk>k9tg3PHOG3jCgr?*F}f)&6BCs(K~pv z;lDq7MIts&lVr#;!((1MXvFJC37bLhjc*=DeUEEj*-$C+F5N%|8JGArI<5b?w?>zb za=5Xq4lun~rtfQaf7~FXF6=50EBWluD4o-WR|2v}-Akox>!m#-36I`=O?DcsUQq!R zeOODE4n6y*X9)-a=jE}{e&HwZ+da3A+q*T`>dLIW$eJf|efb-s9S4?5!ult3sDz;0 zn)KB3P*o<7R_pBV~&aOEs&-Uon2VqhF9ZW7;#%$MpRzFaI-Eyrc1d#u{st zQT;sT=D5paBZDFMWf90U{PbA_B|k z+~2QDaa7hzrBh$Ji=K$};Wz$2a26NIB1JSWef?wQn*(fz4n!<0TDBDyyXWSJbUn%W zae&b;BDqm8+0CL(@-2P1XQCCiV*dNzJcxGm1#jlz32o;ak8c*Ux z`m$Q+6BW1vAYaz?YyITNLNoj}9ABMh)hGfZK}TmlNMCaIqGrvqmedd=oe@X1Cmy~{ z$w38|BpnapU%b8s^@BWE_1+!r=}!E`X=&_+m?J=d>ea5_2g2h8Ui|s%(E=NrMV=QX49j4_v$ZH!NMn=i7n+Wf|vJ9=uOc|SRdA1pSVKY#m%WcH@k{n-4$ zTUBv)ep9Z*5b_lErs=JW7)Aal(NIHRUL0~|Uohndq`&Ii%P{|b1pZK?Stp{06g&?p zibIPgHilkaXa!fEKZX`fZ`>3rfjO+ECzig{u`DHYxZ_J>twx{qu@dSQe|qiW*sBW( z1f#*OB0>?C*3!y>ge;T{BZRKR`1;q`hFleoWagPEI5oueojT?^v3iAmeyYv_e!cDP zCE0klaAARG9_r7Ks#EnB=kCpHIj@-~rnJfkHKJIi4C=Icedbv35a-WK>=(0p@b7oN dQY2Rl8Id7A9CO$W`pyWuU}|YnY;@zn{{al+#P9$B literal 0 HcmV?d00001 diff --git a/doc/doxygen/chapters/images/starpupy_handle_func_perf.png b/doc/doxygen/chapters/images/starpupy_handle_func_perf.png new file mode 100644 index 0000000000000000000000000000000000000000..8b66f99a868a36fab6b9f47fb2688a2fa62af795 GIT binary patch literal 170242 zcmeFZd05Ts|312~vCSVtC<s;s0^I6xmw`sN3^M0P!bl~e|p1OhP4z5WrOsY zQ;HM{`R&S!zplbBm3t#D;1?lVNp)K#OG8@+eQN{CIel9zGfP`Dxgk>68DKMTtimQ2G}QzD8K>g30TP znmWnVrER>a){Vyt>o^@&?%2EImu*d%uC9lIN8g_~QQcMDI6iuGQ_)~v;^0#2uJdEP zS!dn!cP_^={`ImieA}?{Uw_7LZ@TZN{Q5us>L%lF|NWc4cjUjj@%L5`Yx%Ej{A)Y@ zyBq)7jQ`rkzc=H*yYb)M__+iBjS~Mhh5zoxe|O{09r*u~CB$4DTuynj-QC^YH#nG+ zhleNd>;bKS75|vR8<8yg#=DlfR+%*o9)*WU2g8}X|eD<7Yd-9X)8+V0(NhVD}K z-fU4Ud7azwTK?a*;B#Ncn;|dyf3FMcdnNHdR{G;_RLpcJ zluug=4*7Fqsm`t<1^r-qYY zkGg!M!_#y9QxQi+>aWZGMpg_I3@e_n_>?FQBwMs)4O-{iy0u)I>fuplN52PqgzkUqHaF6BY zvQH7`f{tjEe`pMs_8D{%@hP#@DGp$c2;b&fx@D*O0(Qdhi`&8?w&>?e`!rVjDdPG_ zu9`|r>&1?DVzZ~7@JD|e+R7?(_^@EyKMTVTw6wLftP>yk<>ug9f=@H-n3m}Z~^NZbJ!@0b?ydEbDh2}TEuQ1>ZOu;ExPiv%G zJk`mg{j~`#C1tV-iZ#qIj89qHC>aK@A9>>gav^bRwjQ`L`lT>n!-x($E9>c}#~d2E zOML7;zgsCf@lhg1Dbm}pGB_Gf^7X@`{oChPuUa(_pWa^Mx-`CrOXKyaef#z~O;n#w zwe7DNyee~?O~P%xLa3PY#Nu>Eto=~)Kz(e0u>H3vxK72>V`{FmWqQ8;{)W}X{=XMJ zeE6RYI}aUSv1?$!=mKJ7B+OkspxwH&@o>-{eiDWOH9q*%KWQ^bAx}fm+ z^Jk~I{z!ofZzzsVPP+BiS_6WxNiWVG@WFRn83e=f^K~<5qby}b6u)_Mih{cOW6e~P z2%H?-(WB~_nVA7Y4mgenfi_<~0 zh0fcXVw~rv=H@$S3HO+szGemQu*k{1K3!T;Qqo}H`}#`tZxxRZONfbyX(nB~+ni=@ znk0JoutII5T!4p%hZj3%yiT_L_T&tB)?kxSXnkL(Yr^M(d$HH2hpp}H({z5%VDn-S zkil1|be=mp{x!|K1(7?B*oaMUd6Jz_y5IHT%+q+g--8Da{)r#0?`vvY!{7pQG!!qMzWjtzEXgN6|&NH9yQNurdQuk;I z(!KV1Wkran)?j0z+R2l*e*5jW(%pT}?=jWHCgwTWNp`)gi&EIH6>y>c-Q9I8boauR zb}#vacXnPl7a}q%6`-;Y2J1AEJI(L9bR!}>{K{ZMJY48$eA&BmMI9Y_OG`^>+yya+ zLT9MM8C@H9saelob#TZx&KLhH8=3L2_C2ntZ5({_`(}3b($}B-p9r@toMx&;o96KFFamaC*aNW`iF+H_4jhm=;8SwZEvHP#c`k)H+G%p2 zr`fe3PJ=h_Z11=r{#O+tt1;Y~snk{ErF81lofx%5?j1XJv|TM`bed{1@(T{uHffa& zC@3#4uUp5H^9)X2R#GB;smL?)q7D-ilgY7DtKmP;SGxq(vJU+25QBU0F*|4HP z?k+Vxf99t`Hd6bgJmnEP$@=3o_zH`PN)LHj^E}cSYuyB|r}7)uFpmpxW*VwFMc%%B zyJF;Wi9l+-^P_MiM3YiqHk0ZXcSS@+d;0sM8{)N0r@l3*^Z$(gKf=+g6Rh3*qLc-dO8yhY?5PccyGAFG#DjKhspT}&xI5)X{`VWJk zpLzRD8sfO`6Jf3SsST>yt&Hv-v60U6A^yoW8yp?mVh8HyW2C&6sNpwn?pwPtB(Afw z#N^X=zFle?De2n{kxTiq?T4t$ckkWPYSXRBgIDtwA6hSgM^0ndn_^1a@FXz5IkT(y z0f&f)HjFfDyz*Fm*y&Al+qbi`u}OzIPX#uo7>BuS*yV2;H!)#LtY8PXw*36mkX>&% zz4@nCCrAKRP4fQvdt>C{tW#)M8+GB}$Y7gfwjUBvhARx0tDsoH(kOAC(DS^LW9@+WF! zn^R2#kTz{QoTu54XO7u_`zXFNy2*KdDBaz|BiV_I!zodGY334f-UB|;I6Acp7oL6j z@+DzFH{dAzjs$U(9;v2{=W*WF4h{j#iwombPMyE6wjFL25-|HDze(i#&GVsR@u&=q zdrJN05cei$x*o`~>$(dG2;gty70;acqcF~+B1kYlrkPtsSvg!c*U7lRZ38N1MP!PF zg$2&3+39>*Jjn=T5f7L|x9!=Jh&05pZ{G=Wd_h54nGe(+B~=Csg&|xIAZMVWQna;A zseF1Y5&>R!^(H3H>BFL;q9j!@3frCu60~e+bLcS7vsXLj5y3T57OfOnmujju)Sj!g zY}qn=IaPmW^J@c-RU3Cl;adl9ZxTB=orH6KDJQYeRGr&K)b|(9FA$ z>9R0-dOZ1hv}N9`)E?)FN1AD7(IKMOO{Rz2G;^*cAwGpo4mJ)BXOD0 z%euob#C6G0f;x(0)@sYM={qk$-OR*fVKG=6`C#;c>q5cI()@@QBGJxGVl&V5c>_Ir z$^!J#{DXsc@87?_U_r=bA!A?3hYuez%<{i|GwYh}a1}&#$CIrW5p=BaQ23Wb!9ds( z1J!fcidE`uVmK`cYLfksrcKLWV!p*0UIbzl6B<%V*T?4)+s&z-2>lwO3Cv-Z9eGmc z&wD$zd@1zUAT8CFWydwQTkOUsD|`DR!B3xh379rKZW$jR_d*dFSR5o|W0+a#T>J8z z8$#W_k|^EWtiv~EtS-TXiY)UN4dFI^O;yda+9nlG1f)z%VkEWn^`9R`h?JlnzVdOe zhxzFTOahJVFVF4jYR$BXD1y%#22S^UA9&6!G2tC5;KM%W}JC3{C`55$);BtuJlSa(r90Wf!- z_THWJ$v-5-B2Fv4a-okp;)rMK$zr0@PtNcAD zRG-LQ`Lv~!Bm-HUOGBFUrj1WL&0C)ATD{5p@cE~jX5BqcGrwKs<=g6+YHD!V@PPJ% z+BdESmoNJc_H|G!&T%eV|HhD8J7cf3su45QJ%G(`Nl+<}S>VdY;{-W4O%KPrEY7iU zah>aMnU80>@%?QXlPQws*y!kCR~bX=0xyNvwiW^0Ix^<5TImTVPo69l^#}eGTlg;F z5jY4dj)1rKe@eO}bM`EMdKD^IM@L7ef#}HcvKumre(Wj}@N?a(tMbJ?JU#1Ra;RS; zQPd80JrHlGl@pIgQN@1fko?vIXXT}&r0y2^hM#{btWoaCBE50rMyJKeMnqdKF0Hh< z@;sbV-VJBxGP~foIL^h1S~&u@fR5`UWOv+kb5qS|cc06r&5O^@%&4biW@M16MF~qt zvF%rkU1WDkw6V4(^+lU2ZL!y$m6a8+GQNC-PYuc81swl4!sGTKKErZ(85wufabYlH zEi==m*AcqchXiyotWMbURp!QY>3Xp#y^I15FtBi4n(eEHGmVGBPAA{3<2ge;g8cFP zlAlA|x*gnK%XspAf${N>hF2L23JL^J1g&LkWYWmyIp#222fwO|QQ^p)AI{!hbqy{% z*&i7yLlr|}v}Wu=9TLENF8HW|L%U;SW?OOb>7=A2l!|lRK5}u~D_hgdwHAALTo2Ql zfa}J(O9c24zYd`6XLs6dPpmB&WT zu)74v_|N2~3`a#pSwB7YXV|554s`cK9Aq=b+Z37Us=hmne%TihezvhaSB{N4A>m7x zM2ag--oRi;VRFE^F0M&YQYu8H%s>Ai|AbzQZf-zu@B;Vs-9~-K>th8KDl2~%{E|ZH zOnblO4dq*D2W`n=adE^o(JCK+@o@ng)i*Fu!*Smmj~x-t^9jzuwUH4txBV&h@jYtN zW77dKYWZoG_)VB~4E{5qdFs@uGgMv29y*W|F|q;N5zAHR=m)E0gJcRne-ynwb&!Qc z^6lHV)No`WSt^tBM784l{M=lOR=R~|$`ysX-W@o7$s&@YpE=TA}l0hCk#da9TnP$UsZNW`9leK5~gL_F7Nl6qUBzoXdr z&sw-_a-qAgj|0G!DsxwXg^iQ*EIKmJyni@__rY+l{5oH$LfBqvsoP_${JThm zN2P18fwLZ2nua^#ei6b`jC> za=u=T!|_7EuF;eWjsu(4tT}woflz5EXHqRYbWt0nGKgeRskEK^foFS$#}N)q7G_2P z6%s!tv0EpC^ujF4mlh6DiFqVoBbFNT{3UTHoEyjjIa1KZDdpxqJp|=VR5m7uWw1i%IXz(lL$gLHQPrx zyzFjcmo3TMEpqLS7z2Kj)PY+Od4IztHjJ`>+@6IRNVP-Fl$_bk6wK@Fh!ku{cerg{v$_J5ue(g3qzd0=nNIf6Q$S(3t$!rrpAa8MPf1Ls2DWkceEk}QrORLujWrRnsfHB{g~i3D z@QEj=2UHWZqfrq1n=ZtziPy124OB-kEpS?O*lAKfayWFLmLiywIsqf=|9v%+Jdi>+ zNYChNj+IDlO}lfll^qQg%#gv6p;QtyQS{n@pW>Cstf4K>HdyJk#0dugS3~#^1n8IE zjK|pb?tQb{9Yr|1%~WR4XWalhFWHds(@c)tuNz+|u;ke^w8wAhaoo;6?$o}1y4OT| zm_hf!;lzoB6w}6Gw>zEeTRz1{QEk@z`s*uomzF1`3-fXpo~ zUJz#j*4<|e3=K<*BdIPng>8#-3?7ydxkTMue*15q)=uYxE)PY_B%sv5z(8X|xPN(#k-5;|>vk{d)U#*L z%9!%IwPEsQj7PWwH;;Y)T+q`DHu=ZP%0Nh{xI#bQ)AL+u>70tq!x_{QU-7}s znKnkzL+iG4K4b>!9bcv9R>rnwV)s$2XLV^QDJi@wp1Sr94aK82AUtbTh^U|DrTf$# z4d7~Z6D^d$s)@Qf&qw1b-Z?}AL`$GRxKjp(8Y{u z+J60bf?68<_7De$tZ75sBW8q?Jnc)m!wY${y+IthwPWnjeLxr7sa#xK@$aVRWU}ms z*!x|V6^|WUL-m{FRBp(&;!;&pu45CqGFbJ50cev8p z1_WLo`0clo;Ej?UdVQSdDFYqM~@=y8g*n<(uZYZl#DzF8XqWFeeY0?ZAhkuYo>;%;1An&F9;GY zvdCx^C4+&m`<}_k$v{;s(bo*~P!5i#5uBavFarKSIiezgg^p)gYq<1Q1pSzrq8#pp zMXvnzQv;2OiCK1o1C4sF^$0r^LmCqu$cD%1j?MlvbiF*STf*s0W?PfbY`f%f5L>jRMKmOy zA)ayw3O44LBdgDW%wwOt#tbxZSXu@TvUwsbEC#hzkWs{9 zGBO+CMtT@VGW$Y&m=R#q%Nd-&)DVi$P5%m*Qq=(F_7r0u!}7=a)klEo(2pR6M$g*$ zys2hWc6N3}P>yk{=HM%_Rbuny3r2wJqO^r^YWO}0H{U0B5AEMC0a{s*)&wRPJhAYN znO7+I6V*5Uw6a!{wd<0>+Ay#j$a}7nJ>2;6oQmJWk07ae&wvkN zIz*ebbe#_(42gNuYe`awH>X=BBKaU)#S-{)a!unv7U!}XqRNRDvy%t-@R)$4r@(3` z+}p_54b~AQMQKZm)+W)3hmvQc*h79FLFuUq{%AOBkQ;4;{;$#6<`cw&0Eq4e+z?|tdZqm6BgD4VyH*q zM#sPqgLp@55&jR~&qN<07T`a;I4pqGymdbqN@r%4HflsI3U}xV_I5iX4N2~ zl#wBv0gZc@Y~VOfeNP=yZp>N}J`K59QB(6tL!urcyOS}BRKM3EmY_r@fkLi;mL`po z`sWg>SN`(LL1blwLnT-!hw`-F%-ozwe@(u>?DAC;=xaftdAs@+1Zpbx>4P z2fU|hvS9_YG^&5tM=hSku1a(|%DDC=>4##qFT%oDR2JG^uO=iUX!BIij(6|eA>0*w zuJMV0DZkmLL!_gJeK5fA$?xGDBQU#Bec(h^v1CbX-A&ydnMZnj+M7}L+ zYtzAA)WXl}v+PU>Nv=BY4c-#pSI-)H;{`ffgxSkqoC*R_U6va@vQ+(>&z|X9UtPm_ zDYTo$-IJ{m7#L)?w7^4Ms}kX!n3LNQW6`!|Z1lcwfEtI8aF&h=Bk*60n&pe&r}2&G zjPEI@{Q9gWSj<^-bab>=uB0X;AnmD`bMuP_NKy?6IWIXh7gsx<5PuzO5<6zzu|PEjl}OIgU2)=OCj3v_Uu;)ltIEeM_#Y z1M?5EdLS~?rRIgChi&(V&Cn%CBeW+SF>hvu9Kgdx>E+q#D1|U(#8OhvBAyHKBoBW3 z7KbI8y#He}9IYPYS}dU40ODlrCh>*6tgMlxxqHzyMbiST?9#sPinfK6?`eLB229!jgrxZ1_+M`PrCfS7$2=0pSu$TJb| z-=8}7_|WIqRY6?Cvd97{24&lG;n*fM;rCr;dv*|bjSStu=+OEcZQoEQ^kB+F+liqI z+4M$9dTiIscggkV(Mv#m@s%`(;V0e0!xpI>q@9BEDnZJ`y6f+6WkpYU6KV=qRn)j~ zQ%O@TF9_j5B8-^x+(D4^vi_Xas7h4S)uT4=m%7sQ+F<*@qnE6b`Yc+CFH z=FN&HZ*608>GHn53z%IQZHB4gw%G3O%LtpFvY0#5_<%z})yoy!+the^kMre0Ypa#0 z({i0AzCJe!btc#>jXP5D)3*`FpO2Fs@9D`k*AsANNyf~ zUf@f0bh2#sEv(iy0yE+M#~-h*v}D>u>N@qaNZ0tti5YvmmOs&wZW*AQQ^)&rEzfSC z2W|J|{XJm^iwM#l3%Y)i>PsEFudRKnr=LC{MkN~^hs`mHg|$WX%PHSBL-&&Ihf2x> zF5la6Gb1`2nh<8=5#6#x>NY4tPTWO46?}5^>I*cbmC>UDasm+1k&%{u1PuitF|`7I zZ5>0j10(PZ5u*%=8u38|eQfkn_)Y2#N=ivJXPwe6B02@4JirQ|S@D>g=)W{b{kibY zmwo{bQuOpeXi=Q#@84zn`5h0{U1rE==Sm>y@ZeyH(n_&7r%Bzc(Zb~v3)s@KPQ%d> z)|zl>fAfG!cY&)fwr#bMkj^%8FE9VmO4^&SGSa0b(#j?Swkv;G|L89zz%G?dlnlDZ>kAH< z>HI=zy~4srF}!^FLrY6``64P{+lj9V==bP>LTyZsk~N~FxDi=Yl(OK&-?nC7b!dH0 z_ihEJxm7lu=-L0lTv8GL{D$+f55ND=Y4B?!;Al+9PiotKL$`N zj!8;pmX@~~hP3hU*(9%B$MfOsTb1^9^b;FC&{i#Jmcg|Zz5BmxiJF_d#BRVzp)dpU z|MTiRA}?=sA~5i3z%NT*F7M35_JSQdRv$kfL-wgGHe&Z~Z8IHi3Z>^H z*#C!vmX??M66HzR&xQw9ZRKZeW*-kkg5BL_KIX}j1D-zzBnXH{lJht})l%w$7R!XfIM zdp=ZmZ}f+OyxG}EU%yl>0Ta&bB+2CztF7eB1@Heqsm5+<`RQ+C;}}$f$?v~(du+Vw z!{wOYcoO}`rd>~YdnZ_5jSkDnQI2?dUhVjs|KM?DsX4hLsY7tq3;Q>w;fUoq56Q3H z#IxjkXNf^@Nv~yS8wFG%LJMpCVKLqS+^?GNvX}xgK+#!dO7`qoPMg-pk9R^S2s-Wi9GxqpZ=I-V1E*HN#T^#C zgY`+5iaEq01;dcs@gBQxpQ>NvcI$X$mb|={+ni-FHPZ3Z{4AK<6r#raw%RVtWY0}D z-b-;z&QG(nOenagr|7c8wsO@*CG}4xh;0eqzlYYxHL01jUQ2_b=UjcvWE^v}YP=C0 zK>o_rH2j5K>L~p(%7tC17AeP9t_Pk%SLPve>Xqt~=$!Gcmmn1}Ku|1@>cHF8dz6nK zznS@<Z`qM>Iw<$~ak*z&%ph#f5G-Me@3NdZX?Bj{NRHD!4!5)B`)+6T082*n0~#+Al; z;D8A-iS^a1NeXu(a5j~{nfXH?N^&*n&$(^dlQ7(tUDnjpWWVyuOOlT!hg#HrPp_>D zi`phQIBofJ^96Mk6cTv((ia84r|Z(wYXLH{tX}OfJ97#J`BfC;)N}_9p`*&*j5z)- z$Y&kQDA?bB;-T`NQdcvbjin#|w`9xR=-Cqy$1yMN*puM53GHeOl^s%PH8!(?@0aB) zpy7H(Ml1(;FWj?e_}eZL^54BPGcYU)THzWPC>|Si@U*wL?%1etWM^k%ch^=Hp2S~; zj%v5?Y{|~}CB|j(aVZ-((_?)Xpg*QWJQXvaoiuz6vBIAQ`f*_D_Z6#4!LOlVvVBNe zTAJiWf_oht98d+HBS`j@egPU(E*{Q*3Mk7&EPy?wJa;|^--xzg6sq4|1IKs`xQ=GJ z1s8sDc7A@a`t%-T(R<)a_d~up+@mf>n|6aDHdlMT>WP3Xgj+Y3@V^B#JHs6FQJs^j z8ikEK$a^CGIyRgkfr2=W>f0c&8`?&^&MZv?r0^<8BmFrwUf`U|3JN6f ziAo3|wjdo8i`fU;GM*$c_d_8}PU6q5Je+&SYSlh_U=6nb|QK1n@nNVdnGy12(-fpSl+yO zGpMv-m8YTlJD&(zh6pz#QV#DRWev7s@D2js!QF4U)6>XE*n0n;%tF4!JR*f`4-HJcv@J52~Ij>)bzZ|;VaOt}C3 z{ps$nU!&(HJCv`@9FdfI$;N4Rcc8&pLrg3_QD^}gW6r}X_EK8!R0bz4n2dPu5l57? ziin8l2SK6?=@5$jD6me>Pyw3MzVu=^Vy2#uR8w0^n$M&&4XsaOAfFMTDGB`q9RiW> z6)r(4Qv3MurS8toXB!!X4`O2hytDd3uGb|SRjqw`8t|A|8UoSkhx;TzRyj5;N=Qf$ zwGmNtLQ&`hiDY>T_fr;A$eT4Yte!)9Rh~RL1dSq*3K6rwS92UZ7*#ieC`Hsyp#4EC zY-3_V!5-^Bs9hN<&Lb^&IrB?^u9GyvGLc82xgkbqCJ+&G#o|lPp< zITVWh;B>G|BHA6O4!Z~bfTR($IUx$sftzwnkFxVNV9)E&hi4SICJ&C>-P@axrxVZ_ z4WiAc=3Ge^tq=Cf39J~xk_4Q*1PBANt~#_cC*cwUEtdHz=@xBqOhyPNP<1~cD(845@cazCG9zzPA8m;e(GF=B;8uL||HW1>2(}*yw=L%b3gMBOiAdkr(|q^ z0%j-G6R)!2fTvH*7qAj3d;6&kF>#+-Gu`&lx#l*->0~!Nyu5zrp*I6l_-4Ev`S(eS zuIr*5v{GQo6z5r4Sm;6GzP{pf875LSaFvA~)*t}5Q{J+jN7w8O??6MmX>}i(LvA3= zK0_J?W}&DxBsA1=Jp=y-k#>Z|GP~iRBj&ujckf;=Ernd+1=aR0c-&!^`60nD2rtTA z8Y=q~(c(F*sHDWz3=8y<_BlXL^+cM`FgxZn5tTUwzG){&Q4Rqxl6+-3a-{zG{PpXV zGrSv_M6M`AGIxPQHfRvO`sICDyQ`^coVtE^WTaLKd~9cx$hap|Q~bu%!nD}T#{2j0 zyMb)`4B8-0JF})WBp|?OVx%J<9i$otCYPDdiXAIJV0t3f6;xMOKc0`=VTWQx3YN4% zPEM{1Qz;QeoP*6e71;(mY1x)wnuhYuerj##0j z!^WY&OS0?`vzdo$d%-;*=0lT1W1_!3Hya8qk!!CWVbd-2(#!&9pJFAizbcI-oDV)) z{bCVbc7&N8sf-_?seB0erPq`wI`x(=E;gdPT`wpoNS;(Z%NFu3P#Qb=46$Kgj3Vl@ znCXdo70z<%<+WPb+VZ1|EGuf$Fx&@jX2YgUn+j@cYe5?C;Y@Re&fg8)e4>TjkDf|v zp39;W^cwWKy1GSRAg@%jii>y5Af#|M%M7bYHVym*jNdRp#l;7lVYpl5Xv3$x+=wLm z_}lo;6M}Pw>r*0z!ACcnYG=GEKFH3VVi*0!Jn>I1<~RFIl*X@LB#n!`O=)qOEY7nA zqeer`Uy>RV7WG({Ji@jN(2BJ|B zA?!TfvUO|45B(GRzXU>zkHu1zBV_1^K1nV3?MM}&g%LZDMEh~5b6_hcA;XFV%}!X$ zJ8QOLOiJPTVHPw4&c|pZzmyC@Y9}5A&Qiz9htek7kqPp`w8krjPS@x=)rqCo?i zx9hKwgelP_q`z1ip)JY5F1DLP<3UCP$QrnGvg%Co-P;EtK_nCxB$)Y`Z9p`LEqqnY zY6^HS>0osa4471lT`F2nW(h#(4Rqw^7p4(aPHLlW?cH@d5}f)QV-FjYZo`^l%ZEt& z3{vPzOUY3AlTilg8e|c6+qf}rGP^?h8E7HYK>*E}a0%KRv^-Rvcj=znK(%euafrsF zM-*j%xM z3VK=?!q&X`;bS)QaBG8|Zo1BsDx|{M?o8@Qj2vl?X8eR0><7eYaWK-j+pCMO%uj7^ z&dGc-_3eH`w9v`onJd+$^sB|F3~D%D<2OhHL1X5LA+An_p%zxj zQ`(_Pk#ams%KmqLVH3$2E8oFIM?FgC>#^`a7f$LAsPi#HmFw@e=J(3DWAhwQ^mG z4L0q_a|WNmf6pIr>i@f*>yoK#>k&vYL3UCVpLan!2qh~* zH<^p*y>1tcf=CA3e}I=^hcAV^qO^?P^&F(1SpXPT;`%NwuPVGPiqi z#ztL*ePbq(Ad+h2h(&7@T7n)!z+1^#Dc4G7eLbCpdK~rIU1`qdZCNUaBClY;7#0(j zuHmZ^u_+w=Nu_8%&B+n>4axhQFFTi@rm8y1Ba;m_rz3#AqN#xT-`&bSlQ>eEG6DVD{FLVpbXXuOv)Q6N%YFY1@Rzzh> z0)x=maqR6CBvq#_FF`0S_T|nE#FFy9R20gG@riQ2s;>~I8w}e3>nbjLGgh}}%a#`? zKBd`1HY5V(kggihU97(6PLm4J-$_+|PW$MJCX4_O9OQy(U)AX{8!_WW4JVTCp-Wsh zRGAEPx~NU7X}kUBlKEJ8Chb(iq&9d2Zi!E_7me2`8#_3n-Z;n@cV>g))ek&KlouO5 zh#zxZS`c82v&a)YJm`;mFnEe!91UU%<0*&k z)dPx_20ITMW;BxR?pr`K%>0&mMfrYoOBvUsW{A;q-o(BNE6Y-?+xk15z9kitVIf-LO4uU80vQ9ql=SHI+}7C z$>WPnz#=TtB)~*}m%}U}%Fx8;UubNZjo?>QP32@{Vtl)N(JD^^+n0#54hqP~Q*9gr zYQf+ei=WQC*HH{FO&KF9njlz`*rC|MG04av)c86i)m`K^%ZM6CjZ((W+~8-=_Kwe2 zBb_R?@CZtD_*JqVJoweUI1!(|agSIWQCMjIE{ViRcmT*9AiW)%oXoxWQ`FUIZ$xH7 zyw$O{HXby_s)8 zmYSLx!hBkLb^ULlR$wxkq5l?Yl|I=Q_a2N8v){&QB9MCa?2Sn%6yVaYnW9==>EB)= z&06FHVcV~Jp}M>97^BdTDxRT2A)=RXn8(4T1MMfeL}N$4x^V=mfM$Pxf7Q>|omn6{ z@SDiLdCAfDmxWTN?ch+d0-T50TZ`|C7inZj~O==(^Z_`4IB! zy6H!OaA}Na0Lm$8YSJJa1PZV6A%Ic_3|xRU(0e|L>9ZIj`9J`TNE_(!N^!!l5*c9RJ6eTA~0#t znbr6o1_pN2Z}A$*QlJC)(*s0m{Wiby35Vo^jAFyv$u1Nsj)N>dbnjbfL1CdJ@OLc~ zdSoIoHhEhJu)iKunwdqrcrX8f@kla3>v7W(LwiIcPpAR#WvU&6WDo(-Xlg_@s@ycSe8vb{ild^of90!iNw1sb-lH z&!5v2CkJx7yKQ?^gm71bGf`h1ze{Eue$IQ>fiJ*Jwh3f0P%Jb^Y!bb^S(oZ=)Oj6( zeLCdc?BOg~3kwSnY*9!xJ>a!qALChd^!PUjEwnHT3k3F*%ny=;gU^9$)Dl+YhpNpx ziwfe)1Tt9C7Dj~=vuXpw2gnK#>IfMLHpmvrp|1c7$zOM1UYyK@U|!q=qet30I)uc@ z#SkKh9{9pJLaku(gq@dH5iGxEwtYNU$plE3$xv)#*O+e&lqH~~&$%uR>XGSl#BxHg zA}2QhlGqN^vDGK&Xn~_L$SmH?f8}AjV;>`zUY_=bT}PBr&|t*le876O|(5QOc<% zDiNT-K)Mml6QX`Srk11?p%f?g5a3zbY^p>_|7bTOBUvp725CCi^lxtg4pM<;f6nQX zxmi$;M6``{p55azqo3LFHWtsdfpCvxxO6yg-okFURRbs0oN+Y_oQ8l^=NlOfLp3to zvY)V6q`yHFKj?3>2?=TJ611pGt`dv=^y!oJ15!MJR+NKz-(?WAh$GvEIVH5SYQf`9 z0z-0OWp#?;xj7dXKm0`g9S0EuHD3qw>QZ4ZZ^pAc6M|GY^ z8Nme*vXtKKIYE-zWxpe@6Sb?;m;0=BAc}FUF;}~aFqZrggIuZL#}Hon1N2-DKfr8O z1Ti(ZqY_A&`qu`sZH|3Ue^6TRZ+MW0yd5wJ5u*S(%Yk2`-n(}X?GXWx?_)Yvf4WZR zs?v&c3f5z+Yu7Ry*b#6%c7EIHC3}wjVV9*lHvH3iW;tK?XGr-$DJ zt>L3Ar?8uS?|e&!6UY`pdS(h^CptO8e$UR$`8U(Zj54G>J-Drb=#e1SMiuN!=upU= zNXH}BBGsEoRFgEuV6k*Nc0{6%Cj220B!VSkE~!eqk-xYd9d{!3fr?2L8d#UPepW<_ zGbk#EuT4O)Yzl9H!=`-|1hOUyP((eH(uw@w3dcFMD=RCB9iR!b50e;VR<}9ZAu;6o zu+ZZ7f=wXQVp`W(uS8G24sKD0{P`3TF{0D~aS{b4ij?KhUq?Es=8t_JAFqXrO}yVK zcaVoijsQBGkN^tc*s@2urb9#w2&*E)ws@F+5H70P+QBC!BseD3sYLd(hzN)+3x=ew zVqZFHP_X63m~sXD^pFe%MS!t;U196)KZyJfe3KUEkXE%r8Pm-CyeYm7-1}n~G*J-3 z$j;Ih`osk+T8~QixUNoYfa)e3HOy&vZ6lbYaBMr7R3i8scty>)I1{BMSsH7|wUbCh zMCgXn4;cL{Dc0y1%HF;`L2@CWa6L5DjG|7;WFG+Nz-+`2x)iO@gVWbw{JsR7t=@A( z9^>Bx==y>i0B?Q}WDlG452=GY?#bPyn&D){ZAd*lRixo zrSQ^JdOo3^$&$ekaio0vDekd?5&kt(R&FjSxf1e-gs$R_>co60({V#_7Uq&AF2|MeFl-R*GGS+Tf7J5<}NO@*1!;ze)w>Zt|3bq z6SOen80b2K3N8x1^JY{$b;p;C0(HD9XjjEyLB{tI#>WQA773`cM zJgZj=I$lqI1o(8rIgCgSb~DbVb5H`*+CyDeWC$eva){FDZ$I;0AHXuMr(u+eBp7NqWN|gy4W3hpNKJ8cZ1B&Z@n-0SNZG7R zO;a)lW+E;c811Pq^eD_fp6yE8D}CPDE}qquc6!@3{!_ZT@o4(oxE+JuQu^}0l@n)b z+H?K&SlFPSxb2QP0!w}fmI)`10|%plP0t1i@f+%IqJ>T4-ki|`1r+9Dj>D44zn<5wstkR*6_lG=t0C*kkiIc?AB?p@ON7tGbHN7- z-?x6GTC4%C4kaa2M#fR1i$#nfEhf_1#V{dxgdYor2=N4}Cer!GC>l4)Fp?$FbYS7M zUw0HdC%F4KG6p~xLDY8^e+jv6?y5Yr9-@|)Sl>ZW=XfYUqMvmJ@7~=?qVE%d)unIW zN^^6cfT$~>uC9OWuRDKJYrG&$O`pz969u}ykd7;C)#n9PP%tOlegtogcTPq#wW? z5jTT=`IkJZrR48qW0(|ddH>d}bvTL04p-Ncl9GK~X>`yeMx!GXi0dYD$Wopmo>a#8`)iFBrLPAH6Hf1Q{M+PB+`1>a*-*hc3+^Ig|3i2Nh zWoG#5>NXNNP=+2o{Vu;h<$WlaFWbfC)#SPV0V1(-tH_s?e${z+0= z+NXPD4;L~#z0`9E%pxWxHqbUKnk&038B1>*n=k`z+T(xw_Jj5N)ey%1xAv~RjH&id zwW#*A4G4yV>wb6$xr~a3C!%}^iVjs*t$(gTgODu&gP{gl-n$(o0|T{`)vxIu@bQ4G zNaaE&AA!l1BS)a3j8&$M_cNNSm%uT~Rl8zq82Z{2e9%Vbd0*Y~dsm6JE@ zts=8?7@C9K#SfeAoP`TU1qa{8(LyN}!lU+6Xa1YG4;E^V+lnZy%-Y&AJfnld?K5e? z7u(THMyCT)>M3??jXEfKA%(uYQz~eVZG z+$5lU_H21Qoy*pYry*N^t~JB&XG()A4&t(dxZCZi^Vj?n`Cs02++CN=LWcn#C0*U= zgL^Oh0i96nt3z5%P4g_$xWhu`+|vZy>|$-V!JpajdWY7qFh*WAyrB8|#E4M+tpR*) z4B>qccY(xS*$KTlXa4Ta8q(m{h9M2o0YFJbCN$#9*lQ8XP@%JuOR!MpzqSLk`tiCs z81?FqEEW5iX>Id>jgRj<9EpWa&cAc0P4mQw{oVM5iRm1M;WQch;J{>A7`V#iVkNhM zr?dW83Q-UA^%)KFS}}fW%T~h_P8xTY|7%n~WOfAw@guYx;>)79i9km9<8>j_-Q5~G z-qpg_r?iOR)52o()TZd@+MX|pY9S%|us~c$Bn7=%V;c0Zl-6!I;44`o*oK&w9~ysW zD8tjZSNp{QQ>1Z!_P6=+eocM6*0lc`n)E+k!?Qk2zORb*Vqzi`BS3O!5Qo2bVa74v z!-kXs=8f&x{2w6Ql)oPRTl2JnBi5GG&mp(t*Wo5{0w~C^Hu_big@uI;J+1cWezT$u zf*?e*O$>4wL>UxKl`lkbZ4SAK==Q1&kfbMoswBN1QVW6bT z_v2oagBWH4A9R+~YstOnR*(@pa@`E1$Cyo5AmhY`4<81=?$N#2aA-X$csa0WD&!kJ zf8K}CLEzXNxkCnh^Vmglog6A^Xr$%{-;E)m1LuRIFbrMBWC=V=i~%G7X`~Z*kbpp{ zcB=nZh?PLGuP=~?Dg(cn-7k&)FTQdB|4_eVpc96RxW;!9EJlPKb{m1`P?f3{Luu4V zbQs9CC5?@0z;SDFr4Q!Md>=mC8$Cl0h~>j$s&KQ4Yf73`}HMGct{`>DtQ zHY8)a@eOwULQ5#Q31$XO()0QA^QfqUn0zRlEBrZpe6vL!LD!qf3#K9yIKR{FxyO#_ z!R``Q)2a|LG@7z-D;vfON_YcffCEd`Zdg3?_O0c?qt{+zZu1Nk|0BQ0S;y)ZRIzIZ zdB{G)6yYNe5-2HF^B0h2{SKfk7GL;&JI$gk1}8dzaWASnZU2Zh_Ei9&r2_RJkTDtT zCw&CmPNqRLB&jqq|Bl*y5GW`8*A;CtW8-pKH&PvN5sn)AfuIy^G3gWo)i~xPl(Eei z59cHIDdG7!8`TNf1i=pO;CP05WDxE9W^jjY>*=A9`3;iox{EZ}kuf@z)>c;XP#cf~ zoSC^s&K_XS=x!yN$!cW!0}A^(Of=rTcaPjbKrZ=#>Vr&$`L8Vn!s~~|p%G#-2sTti zzrR^Qr1;{rS-C~9SO$4}$fPy7CkZ?S>CWPUk7@wlL}(rcfRMNyDli(ahOlT@6~cwS zw&>W0{dB^89Q43Re+@?iPLTVDpev6-Ep3A0x%c5SX29ZTJUn?S5fPE{eQ%%?C0$c| zg95HFLs*Tht5b%6irgDUr~^k&jPB?qy~LpGSIj6W;D!#cH7qDOiROhiQ!GwGFVW+` zdpPSYaVHfa$Z_|GE$A!KdL!S3MT?QS7|69^eN9oW{CG{_s?whc*L-tIS%qk{O`i*| zD<-Te_QB0{j?suD8DhfXMW$emlAL<)fBmZUh>=s`j&jehUkH+x3Rp zh^`IoMFNWV-1z=`VH!9$HgWO%)=T(wWq3STnB;Q4{a^n_hi9_o~+9r@=mJbna6m()l*oPLFI)cj~%lXFqB?A0o z5Paq!Qa~JMl?Gk?5VnWQ0ksE?LUZK=Fix5$umtixYB-?_$@P7}iDf$D@L(3Ys_?#% z5xwE2jb(!fmmkqQu2@!yn?$0(Z4q2gcpT`jf~Ha8rr-h;qBSE@9!TV~99zgNEQ~_B z26rEk5ko>TfC8tBaYUbIuBBt?07`T+`jiD?1~&u>h7$QQCO62j0}G7^9jC zNx=}N)FQnwV^7IQ1uDBK1J+_!E@n~D@(lzdRGKO>5@eYd4SYd7c=g8JA4z))?npQ{ zVO$=D8$8XDXk^3%n-Gh~Zye;eVhk435%JOgKU|a3N9a2Ya0cRn0mF@tAVmAx3wMo< z4B@>;3;(jWgIs7JSLMDKCIMF#-4gCv5$EhPO) zlU9V^ROsiLj>0TrqN2`#^9(M$Mo4e)F@zq5aDX(HBIa-xh2|X^b3f)_wW4i)mVq~0 z&S^Gp!Zf-Ny%1#BQ4 zjABGUzzPEjNR_T4Dj-FqBgGgXHp)n`AswU(Qlt}!BB0U)R0Kqd6p`yn}IRk~H?ZAfvKoX(eP+|~4R#P7sNusEMjO_T&a>7-}ok1R5gvKPMG%p=EgWUF`GZ42~HLl2uj4;_k!j!YB_NW)bH{){`8;LzADkZljPPb?z|5)=q| z?gWV%5HoggPC?zvClCf>Z+$4iqH<`DYH>Z-L`gFS<+{)gJD;fe36m??qu@o@2?ArH z`9pa*tE;ecM@`hQ2Akz?4^Nnmsk zSHw7bY0+{mIXSuPlK5|Iy;6P<|Bn{6aq1;zp(t#FCLgJtJ7Wj>4$!*ct;vAz=keYO z9u*Pl?`Un^Y-D6~J}e_|8acUEw`pM+lgE1v1D-$9_J6~ja3oVk-wdVSLZ-`E3FVZ&f>W zs$)m3n}EaUK^UAKb)1`#{_a?*yR>$auJn0*lNU6{d>|`67-d z@=i>D{>{b@oxBPcud;u(l?~EmUb1&;_UwYApp4|g#(l7;8vWKXWRymsF{M4&Py6Z7 z0d{tFV#)>yfhJSNuowFsT@cYaj1ge1bdp|-sc|N?0#6sBYPI<33W$;-FdYo^{xX5c zu26LL;vI}5%>;WaQ_;cOGNKtKL1Oxkj@h}C9#`I&f@LSy+VJc#;X6`RML~p~BN+75D(97c<(`V#wMB z1w=IcalIzj9d;zQyJ_t&6~5*T3r4^|*$Q@c5{H2B%+l`NDu)gQ@L&x0-QPaw+>Y7Frb%aJj^`wg(b_zZ~yXe?#5-^{{8naib|7v zE?>TpZmzx4;;jZq@SXMH``k6{NA`?Q%0_KDTyW&hZ+x{m!$)?Q4kB*cSqt92V4NRf5Q2C>7u}H61lkaV#cmMO#6S!gHL)>Y?ZvQCLt^Wb83vp&Gi+R^p*lhRTL~sD z1w`8&lG-@*?tBGFb3kohU|BI2T=b6r$;QH+c4PgHTryJ;WS38Vn_jAy>)=q6YTO53 z;{m%KWz{cmiX|I(ZQAsKW9@`k?frp2;n_{@ek3Y-G2TcvMqg3pcGbk zEROe~iAjJxo#GC$we5xg#u@gzq>(k?@G;o}{rtjEwvnca?0_JqV+f^*QX+Z+A#_?| z1!ED!%)#0gNnQbIcitDxBS}NuVplkDcXDufB!ricr-XvF!iLU=n#z2j;hZsb_w%NZi#)IDtZ&w-?Lo%%Fdf#edq^qAf zbH8AsTZnL*EBDA#{S}t@ox67NL3v?hR&lRM@z90CCBC!!y1LAH=?fOjP97LST6A}E zZcu_@{*MG{l$Y-j+D3NE0tCf>`Wrs}Etq`->TsVL@J6$l1RVlFQfzF~@A~8`1`3i& zndIc-p6tc2tp>s*faPxY$ZO-@BrjDf{;p#3;=OjYAq+~2Qm4=uV2zIeiLCkdW>zH9 zOU*Iiu#BgoY)eFQ91nTm1cp(2PmiA2>?%XYJy`t%f`fwzc}9v{AZN}l2l8$x zrxwzJGeiPG>=Pgk@zK#FWdlwin41|q=-h0s6I%m9-|5g>)d8~_0$WrD5epd{;9HYM z&LK4jRof02wULKu3Nr-J1>ZvoE%713MrKw>5)i1AiAU#*ah=L2|J30uA-cB&L=<>gA|^~tqRQ$e7tZt?EX5yFOInXN5! zcPER$%1LaSs*uoE6J+i5OU}cgGWq&ShYAt>JAna?Vj~(|G%0AY*HmFR6K)dYQ|s+6 z)(TxTj8w1tuU@^u;{V{ohrR5pw2>k&BfuAH2J9Nnto7={-P1hudGq$Mb4u&{_1BHs z`&A!*OC}4*&fiP#KezjSp8ig7pc#@6Zznw!SPlXh4MtG5eiwmU(N`mf1tzwA(V5Wh z=#YUK*hg-3ai`4%L zMW7WeJ(Vz*vRZaxwO8818(oX43C z`GKQUgep0?!(%Wmv9jqp+iajb>8@jzi9XT2^s_SbxR{7}2{IhU_V2wR^2 zCkbJ00$m0^d~w!mx=sMDQXmtG+e&L5X2)yyiL4LhAD_q!zM(#@MfOMP^b^`0GL|43 z8G?`B6w5YgIRLcq&}T#QrF?zkWrk*FWp122mG=DZk3ae?Wq3TwVDs8EfA3ERERtSp zoN;H_@-{q&}lE)B{FnP)PRv=cHX)UEV>HaW84hvVl;HhsVKvT*6$ zTf*Dvr(X6gKW4_0xMkO^w&4rXgImwP{CL+XRE9h5*U-@mGGvrvz0uIoiNmgA-{7+F z(w+sE_B?4Fu*^N}aQR91t2!+{x|yj{LX9kMX|HHNiF&6(rp5@iyq?SkOQP09D9~2V z;^Sn*+S%D546z>Uv3cy~xsp`vsIXS-+qaV%8n!Qfp7pV6$kf$sI+Xl*6`P2<%j6

St`xU%rrDLqse*$PY?F8{aL4bujwJd8o*@ExY zyD8nYPp5UtYnuw>z5GQ(t=`S_Uce8&5peJG`A)C}aX$Ew7q4^BF0vDI8VT@$2KTUy zXWmian1?SVm1ZaE3vI(1Cb7w#fm}sU_|v$`1+o`vYHBXxn(a>o>gOQ&aTp7Pl9(wF z8{+pHdZn&niVcEDOFAk-Q_$dYS2Sv~a~MQUO|9{ku81A+slZY(gs|O0VU1xc2qy`NTCwLXETnEU=Qb`_yS*d2$p?hH$Gku1Z}5v)yKWuF58pFVEE?({ zEKEd!X@F)-iF-XzvYGJ;cS*koRwNn@0;M}s-(%D=Uq#`6wX}=c{CA}F+DFHJEEDHT z$eU7LFh8*DCAUbhd9#K&k6MzK*J%xXX?Yo=CqFll`_O5~PR1Onk=+PGrQ&jM8h{x@ z@-XLzPJcQrb=?JIJ#z8eRLfok-z(^w-C~@269opBGAh9wbr+Xh9>{xNwD3?@QzX#a zyHTgEJT0+(aI@bcj`Q}F@S@;T@KbaA64NizNy~Lwu>EJ*Aq9Vu2mkd+oBU*K( z^7-YA27~+6)eUX-t47Y)3vLqqO>P#PYmnN@{j zo}^ks250@m+xmizW7P$GoXg6^59#mT z?*3h;J3npyYCoUhRbiQ#zo9%-!WYTsJ8!^4%$4T$Mb|dIkD2l{eMmefQoSW5!|oxE z!1nV2_|>;M*cZ%K$GDTc%TDw7yl>u5t6#qcS?}|EL$4P4Wt-Zt{hL~B@er-=;>ur- zQ23LFTT!OPfk?d@4dY&W%h|7627)CB!3iYiH4SHsZ*`n>l0L93mT*7L7`9>S|14=G z<uI@!? z?X<9Y_KDOG2BKxIX&E*>nbaP?Bb?=8-M5>(@BIc$cQ!rr|0HdXun@hyz22M)PvExfXsBEPAyvod9%q-p1gEG##cUfh>ZIJZ~*bS0&XE%@!A}^1~SCtQrw)6`=Igx?z{~(?uBlpa+ zxrfzq($bVQ&kekr_ADgJVf)!*_}gK=5o9tE@FYGlk%VI1^_km%Y1aEM@6X-jZq z+@@0}q{wp{0~!J=6~W!RwGFMZqwH(zW$*v~`2Ckto6*b}jC zoXZdyQFTAhFy$Y2ix#Z#H~X@bBmC)y!YkE0i-iIe(gj6c@YUQ+OiNoY@Y8*0!94Yo zueI6vD`a%{tk*a^g5 zC7Au#NFq=1@_r3d)0p`eO9h1{Q|DidD=S^IIryq69iJA)*MH|XzlgN7v>_5Zhd2bu z@wOY9T3Rt5KAgzbPrmeYtyeh0_ayw%`hNWHHnJg_xytt%wOEl=KCKsL7@f!Fu4(92 zKf^I|uzYK^&qRTxL%5=n)r|&&*O$8R8$4!8A6PWlOMdTRJPN|EC-2M48mudJQR|J1 zxANz??w8;UGQEve&8@JxOM>+@XhXXO93Kjs2ks4z#`9$Ju78%<5O7aM;i6poOiCO? z6o4!giUM#3c7hRP&0y^c$8VS;kb|I*Cbz!iB`fVK!D z$qCGtkD|OcZ@KgMKA|a1r7PvVqN*FVYA#r?o#Tf?np$;q=7q5DArt6yh+Ry?qeAX+fwLvxR@#6&YOSYe9_k{kCw&B1LQoBI!7zenr z%b{>GBpCtRv8{6-MDxZ6@w4lSmPNT|uqODwe^d^w3mx#_S;nAa`qQME}1KRrM4v6u69hSS}k}R@>&7EvK3p+Xv zGk8+64#nuiefVG{^wUkjl&ouy*W+_5Rn`CmQ1+%-f6^BXMpbiheapUS?_7%pY|8_V zc{^K&b^bc0ia!I6`rvYdOmqe(rvf-u8m*dx3aUC?7*UCdD)@x?fx*qua>uT` z`y8JCGZ)?}CrcLLpN(b)gYAH|_XMab@A~YLhv-fcNP_=bXlu;UywYe3lQT0VYhOKA zcbmDUyg;rYljFLjQqVM};gu(5JoGnjVui%|Herx&dm&(xyGV+IJVd!Gk(hpe5~zY? zy&^p~Tqya`-q7*qsBm+A9S!vK?hqn)_f&teGpeqDJ9ioun=-2x8g2E_vkh8G<%JC9 z-g~vz?<7CpY4Daad88mNI*B>Di%cf`5;7ZpCQK?hSZx}u0y&XGB(x%yv<|4EYUq3+ zQQ$4cFUTC&^z#mJ@lK5VDiTf^A1{di;v~)8XkEN?0`aGPLy>d$wemFAi=cv~ORsV) z(~d(Q;Qwqb9iD3S0EKh~#ScQI>Ci_|tUN&yK?@Cf3@ z;3?gD?b1`Ou_{q9vHG0_153eLDnZEE`{KT%2Wiq+RKzgwJ>v|%<&tJLRt^d2}_lsmJ2?fj{q;&@DFbU%m zE@znDUJH-SK_rKQOCrzwoaEPsED)EUzXvmB$8R|Gq4NMH4ljMsv8>HLH0Js9pSN8T z2P?Z?Q+2DJq17#((!TY5$IQr&{_aO1Qkac>t^415e5W_DWyO&Lo6KLmegxsjrB_u| z8|XH00ytWZ>`P(Rhth3&iDCOgArv{gPo zI%Tddb}-mRRxXcwf3GZ}E#wK3V=?p|GrdK1d7I@$HH0(wk$bko!tZ2xtH6;BTX&4N z5BvBXUmcQctYn0}TJYxV!iB$~S0tB7UUH$f;tgYNuNSSCj@j)p6LZG)j;>yFAdkfx zoBZnPLnfz!5+02eYN%~PdEu1SPwfnG*XD1a-c0^65tW=du$wAP8&2w!bFd}(nTV(a z&BzFHXT5rrzNfbr8jbFc-mWT{Lu8NhQ^YDg_wKNYhrNFQgizy8*AL3r8(wLXk&!P( zw55l$vugrFXAiuw_pYvSy>c)suE5QojqOx|r?#m|P~fmkbF8>+r?$y$^^@rN{eSHYo7}&tHa^7& zZNRRpoWC6Cq>mzUwTR<5(UKp$|tUz2?_am#pqgF#fPvG%A9k+;w~ z?~v`s5`^VMP3TM%WHx6I!7VxZ?z|oN5Jd*z9!N4RbWQhA-f)t+LcLb=pGcLgt!s*n zh%j#(V7K4(G_%#r>}{CpnYgv zeCOTg@DLS!nPNF|eLuWD0RFFK;gkFjcPZZ`mn`eJ0fEuD)ni zlxxG-Z{pG}`VC{w(w+p8$z4xrWhdB!Yv$27Ly1X5cBU0p^?S*oaB~nB^Vx>a;vIm+ z15IyF^cc-{tCoEpC4sZ*p%Z9}JRliEBttmx;#XZnDoI`*qUq^*(Ogn1AuB)C*&h4_ zmiVIel*yp9#U z%^mG~`|zW{fyf=e;M5ra4vm5XL{l6d4StbDTVeIAZ#`dd`FV}elj|_ifXGM_NJKZ= zCK^Pd^|Qg|c|=$rAKx9VvAZv~dD=3zo{R*d>w%!Gf_+Zm9JcC7s|cHS0&^~5_ig)W zvlRRF0=@QQHGy}Nr>#f2Tq z%xl_4XEe(ethz^avkO;}ABDqN>i`yQZ>W#)Sx*`&1IHH6iC!!B1dK2ZqwzBB9L_J3&9jEYX5l_n;y$Dp1F*6O zIr1_w=ID_Ka0YM@wi(r6eVfau0b__Sh(IJbbb^?IY;$4?3Jg6xCue>_9~RR*)vZZH2iA1s}020-7b)DW5nwf5)2Y+f6s-q-OeNv2y zo?$^;waw!9z*LCmF~}q?Cr9`6-E#+Hzd$BXhX+C&v)8$#M*RBDBYp#?h?%@aqQsHY zi@l4t9aaxADeW6wKWxXkX1^)Tf(Ah3ibEK3Gd!SVCuwzc7b?6yvok^Dgf{l6c$ z-0Jr6?@G+g%orsvcYWkEaPEpGojS+$gCxDEhs5L%^Y`mf-3P}mqM5EgTsn7Ar!z8Y z<6=(T8~k$GR)==2M2}-xN>gD@^ES`ogaC#8fk7Q;FRKiG=Ee~LlA33*vZq_>EI5SoYUfhBf{I{ymswMAx?0h8JU(rJo&?QhwN;{43o&mW#(C zdp}g*r<)OvTpFVz}z|y6Hs!Ly6vH7zuVZu8zbGP%C7b$U(#tyNqA|ffgg%!{E zU;TZ#uYBt@3VTO+6G?p!+@<~vy>*1y!RXt?_3;s~MSVs(fH*QF4Rv+jTJ-HgZUE5~ zwpbsHF|}`SW!=~JNB=%CjLC?y0`)T!UhV3d=ag@vs8sH6lArhw;Hu1>`fidnx1k_n3gonnnI2*a3(&V(qz$=>{{ z@l#vLSui~c4AFwI6N{=@Usg1bYN3ag7Qcm0$-Sym3!|?t4#pjA6O%9OKO91L+-9vy zU6vr}Jnuu@swO-6rs7p4w!ljZ$(?)DXldZmn?VkKBLQIKj23vBofEMk5KRHq{OuwCtk_ zxrOW$m_cD(*kg(H8$p}NFL#gXLvD0GEKE)`xF;+XXXAjS-+j*%R3I)+vUAMa7nEFX z&90RC1Y+cIUSAoZQqh|N} zUkE`*Udx`O)j=OMASha(oRp+@Qb&xct_REdLO0ahjgSjdbDPz{RIrsKT*N|&uRq+b zc^neX(QP?n9 zp-mc6iw#=`0?Z!&VUbV$Hi?CxM7~>x&Rd|Y*S-ftr=8@Sczno-i|c0~$~Qa<71}i4 z*dqX(OpmHRf0p$>?)a)IRh|)!p7Y=3W>eK^>V}00krLMq_mrRpUrA99nb`c@JmtV2Eq{l485cVG65W75B)|=R(1iCTcyQ(R@~rQgFJB zPqqq2?#8X3>GviO6KIyZJ^?F!|0@iCJ`6mnIYM0kjb?iTYa_EsFHH-Ii!+Vn(c@OR z4Ah4j2&!?r}{MjB`CdU!5~#%u^ltVi&?Dj|u{)Ylgd?s>10 z01O+RhT4k(uG;?#aPiqAtR+^id+%L!R&@JPs>H(J05w7ZhJ&qwQ{T4rXRQLCK9AvQ z<+4u9tXut>ESP2OHjuD$z3_c-e08GZ=+9I0S^a*L&ly|!*vCNgPSMp)bzELAFtzl& zgaF=MIY}V{nY4v!ufW=N+Rh;ao%Wd)LTel2RnMS&HEI!C+bBrK!Eb-qoUc%K8uvvT zeKdrc1Y6%zFfIzDu5WLyPCXlL4xNGCP9gTUS&u_aMAU^d)fq5&R+W|C2?qvv=zQ9{ z&wu5AVczekAHE?c&w`#Y|KmSV6N&q@GYvoP*jae%U3Gn0P(o6Yq1x>W1#C;#4050w zxYh9n_I0}s<|?i5Guo;p{Qr|A@^Gu5+Ux+~Dbz!?RjJxu{u|NU0%M@`50o5$nus)- zfG75dO(;LN6mLcQyd3P~ehf&dZu6zZl{m!3c`E@i@$$Hs9DY3Yaf=}R#{-*scnI*Y zDgiP2LGw1UeQ-;P7C)oT2r4fbe>FHo# zPxa1+f%}{M3RYA8Ub4cAnW8$Ab$NJZv8hqBEvu23{_t;GMAb zgu(g~Q=+(F4q>|Ylkd**G_BIKW$ZKYCmSpjRhHxTB;;Kvz)aY+%ec*^_m2Ia`{QdX z`>#<&V}kV3eg9_(bKD21fMyACKvh62&YHG0j2drb9f$p5^Gez()Lo0X9X;m1cbkLb-*xdb z?}S`vWm6M>xwDfyxQI>uX7nGk8y9SQp}3UGIU5@$h}2ERTYdh=hPnDZE`M5olfOi_ zY}wN9|MyTbtgp+I$2Esn7KpgsxwDg=);_RAonku7vDh6s)p5UdrXiw4{r=!!L+*~9 z)HO-021^?XAKYl$=;YUay=Y_V(R{M0_t#es#_*K(aD`*)^YS)pq80`tvHTaRH}*S0 z?rIy>7CdYiQ6xHF&*S3anmZnh2-0?bV2z;8?;@(WN_#Kt3vWj&DonahQW6CQxfx+e zG3?t)2nf79e9Pw$<5rvOldHYXHYm@0Q6R25NK$6i5$GYtbBJqECM{F=D#r@ur~9P= z85hs7!+mYe1jbL*L453>cYeB$eH>-bduAE5dk$CK;3+-!{t{zk`iK?cK_sDc*a2c+ zv?Qm|639$f;=`8=)WmN%{BUky_ZlQ^(SwfVK3>EhlA`AIhw4 z6Lv^cqjE+Ff-KZhkP}XI56yw3HR{ifr z!n=aY%uE-`f{HWPJtkE(X}p^%JeK;c?4@ zLp1>`LWz0Di)j7|=>jWs5~vmVmT(Wplh&WJ^>1NS!96FR7TR|xI@&3O`m_xT=D0`J z{ghjNxg-+W9)7IH(N@%&aJ9PH1~&m2eSP4)Hih=7+ho5}E@Aet0$ynPSg%GNZmS~_ zdENUw3kBcU!_ztsm1bbz3o_L2-re42Gr&jnB((m4{(4pmOry#0`~!hfX*phEZ@d4b zPRg^i5ssNn{uxlnPz)#4=V0j#ym9{alFdoMtV(wY`&Vd;Hs<%WKD%~_&4W_6u<6td zf`B)ppEEhR1d}8#d|zQQrVO~MXIu4ON;5Bg^;SF<)~~Ss!NS7jq>d1J;2RiH>NOOj zxB{IW+u{_m%shXN(=duzf6N<(F9+Q*oIF5EN$0~Pust;+WNK?$VRx*P>$;(dogKpx^ zNKfaq!6Ee9mRy><-wP9qExH=2s(8wElXo_a0d09E|8mEcLWgW=blQttcqy)dR#Zg^ z4hSMdPISYvZCiCgb z<8tB+9{A^;n3*^5#_bagHga?Agd=$MO-}hJT6V2N^ELWeUXuu(*1Nkmzebf7MKYT->q~`9B z{6=8yh-@IGG~oOIoCN*rwilN%X`u}(Yh6=ks?==P^VICqYrC^uWElu+ zK2FLxs<9yA6mons6j_}J7OufY`ku&x$m{(BpHqvQzoVA77x78c*gS5XNbNPVYUZWK zr?@K^TE$%`*bb5X0#fxyM-Ob&q29N(AN+>etJ5-H)&S@trrFrLHdheqrm3d3DY)FO z@@VnabmS2%Qe(}V*^w%d3$lrkn-i@cDYo>-PaA1sfPWg4$>dTu@9CH}>N6YbpLKXW zYC@PMyef^Tl@y|o8_gxH=Ma_bLJHy>PM2c|thk3Tu?M|>+`iL1qx!}w5dZc&DFh)4 zx*$P;aWTMB?7s3Np*PUP5PIetIxx1mAe6b8x_rJ!eHV>$FX_N=CcT{`>w2yCgDo}D zN#S4VVs!ZMIvzUmtr-?sbcXc3Z3x;o;N`6^?+Xn}Oho-2;*h;sdJHF|0{0pozRFbP z9Y~R3j&eDp{p7=83{jS6ep?ak=c~1GnIkeF70y@I7y?(?t!?kku^G;A1E0 z9yXOV8>lSVHDjsFtZ?9Rrp;B=oLXOj?SsJ!ibh5bGIe@{aap0mCy+ceBt3p1RLwcZ zh+E~>S*iGmiABCnc+OwJ)n~l>scZ4L=z-$&1Ie`}26b`(2T2hobzmxSfF&g7D1zLz zlVA>B+QglsiONi_O{b0xh4hxvpaz3PteBpj4}sza2HIU&QmkN@Wwp3@k*~{QBNp1! ze2Mwfqa-Mj305FtB;T=PM`>nn+Wuv)cJD>ljg>RpZ|_jK)VAr$8|G<$Hqhud~kwmXp_% zrc5bD@V+78;u&l6ALPo8A|H@Q+xKq@uW*=oPa{KMZNCAQ7DpBWjWarZlKi)O8$3T93zmH@*qf`}=RsHaX` zEJIMI7L@<$KH1Q03+SbLvC-J?_Z&Z5HdKCno}5U5)QPqp{EixBslz~_stJCoG5UVh z2IhToe`uli;>Dd;mHl~Z*M(~Hr|-qg)4gp(1R%Q`31#;WeiRel_Vgucl_iax1l8CP zy(}rf(}1H9QVz}^o}{Cx5vy(t=PeJ$@rms=U3h>|aQt z$5eKR3u{3vZj-$YBA{Bd@l|1aR`Cn5Yh)oQ#x`F$+4D<5!WkI*1CFn8PNX|%MTG1BTX_2CtawL zAop|U29Bp(FKKlK{%`l!f>pK+=cx;&Ma6$z;2{esg5Jz}i~|^m`qHHp3E57OJOX^D zud%TuBLRICdKYMZB{*1ZTL~pBCV|_C0om+Ba%k~?IB%?y!8g_eWOO}Lzd!N_d_RZ5 zrf4CoPc*x21qn(ZM&f&}BRV7y7EQiH+c0s{lLSk6=s&hTWiB9{J4Y;fCZBp|4jaVm zkI=MqxQOlbf-t^CIME;B-C7_Z>AFTr=CD*cF z)_`sM?9GV%5eFcBe5j(W^Trp$Y@AZM9p|i>-hFQ`ggs5ue=&bRf9mQH|B?3ZEBT12 z12m4;+O=zm;moy5$q4n?EuJ`Fhs2ges5JyD1JzB4iu&9CE~wblqo4frXjvHneHPzKKgw3NfEvNm@<_`NA@4lyY_~wJ8(Z|#UJ1_kzT!ACCibU9?xb~V8u6Ve4 zcDlKA{=185ji#|7F@&)737qvo(0<;g@-%Uqwr7{h+!?lSOo9Q5A0~ojNeuYE3 zbBeVSRfM4!eLxln0L0bUT1i1xTUV#Z04F4lYz;(&*S_(PKmLDtqPA3!YOXDdjQ}xR zCBd|aQsW+QJ&}4eHtEmemj05<%&05t=l>ETaN8$Zl+?bPawe7uAr8>cP~mu$i}RU1 z^PBZERo1W{>9Wb5nT~B@GDdtHt1A2}aKH4Kld@74d)~xF*gRtPzey1yXOl+lOICAy z?+A6m;A?UQJkH()!Btp;j39FKFr9RNHXM8CbG%VV6>~WG(7$e(gbVgrrqp6x3mLl* z$a2&PCs%ismBy*M%`8FP0D-SxeS?Dor6C41)`t?g=m@@&D9{o`E0F)8#MtDoF!-7r zpo1kBnJ^A(^`I-pXtHw()w9e#faC042kj&{>u~a!j4}d_$b4u$K}<6xX|qS`rXs-`2QX=6SEE2D~aZxyuX7Hc)S+sZk^}jy%z4*kXV{W3a_& zeG>9ohV~1~bH=whNI)~s?HfB;%>4VDM8g8{=9MS*@!3wu1)83CXwq;5_eVW`&!v~z z^wMDzkWfGJ7H^fQb79IbrdtoPMhY7lDSzU|1Jwtqq$fG>_b)e6d{KP%{bgH%qtoOW zuh1*Bk9TyOqBAN!_5`tP8LlJl6rM-(drMk--AI()k6Sc}q%X5^#Q)bSNe`kubI;_`iHjaB#jbrxt9gQw6vj}q(R?)Y_+ zrSZdZ_p##RMzQfIDwG%~Fmk2c-;ZGtY%;&ayoEV-6mdU+xMa+iDZ3xiocjbhKq7I~8F^hl!dT?uXp= zX@CpLyN5N9L6fZanfbUU3Q&dUWi=%qP5k_zHKHoyZAQ;INYLvOa-4tHO6-Sl;4&2F z-e?Pw!uN^Q^U00}Y*9|E*Ewxb6&E{0$1iXHqRFW_UNJXt7w7h{xc{w11O6Ht4v=*g znWi!y9>M!XT=TovpD6?tql`ULrNGkM{02{mU7gN94PFFYcW&`qm5zjUlGKCqwQ-o> zb?`GJkBpS7jX_qQ-(?fK4uy1l%ABOCcaH)9yDY!JKJJQw!N!8x={o2R0;ez?6d91E zV3q^FaVIwyU^2=P8T)0-Qq}#_B=a`Y7Lmm#8w1?r- zqQ0jVI8PM3)18E`CTHS-nlpvh6%1Gbj;?f;W@2@nvuoD{cV1e~4fI3N|CuDf53d&g zictXqJ5%dTXrSwXNi|JtK;z+B=6CHhP(Am4ZPgc*;8?FU7J8(Pw- zpU5ful_O%1x?q?9UkIP=d{F;7Gs(*B%bne8grdJ^ISJ3jMpEzcuneH|{#wSZi_M_w zP-uJ4#l|B5z23X2+me!xp=4|xvQcE-#4s14kJyXz!+;3#n1Xu8|HJ_=Zo$TlmRV%i z!LdNDC~0NxOG{MF@kVNQfZAEQ9lm~D?0)LN0wC-VsuH*%JNsi%wk5ulqaYX3 z!H=ZYTT)!xMT87r8#>Q@MC^~3$7iCgI3ku$N@ljZEUi?f@!N)dXu!5Cc27#`Q`i=8 zLmdb6>44K!A#7i?i6j^6L_+KD32XtzRFZ2WFHD-0PxI^D^;LjPV_Ayg%9+P4I~Xg7 z+LO+uqkFNvhVlBzUr(9Q(y5=DH#iGGVUdK7l z-KB6Qjy{>+S@Lk}gW@^oXW86Q;Xm5n+q?eRR%y2HxAcik9oANUxq6LPdDngFDxPao(t|6*u^b1m4aVjk=8kLjuugrdHEjU_l z&S0RSoQC=CfrYDZ*ZtXpAxL`n4u_`iR=HTT%O>}Wix?9GoLY1y(4XcgcEU@z&7-FBd;SBPY9VV&eClJs7n-uk>0PD;8s>WxV-%nujstA{Y|h7G z^SX6wGv^^{*OZ%ue4ny&b(Uaz6}SI%6yn3zUHq<3;Zp+5|E^6;j=GocUmu8+0xgOs z_^N5<;*#RcHPU(}CRu>Yq~W^qKOVJfZoGBYV`rmd#R)VM10=%R-7 z=dpOb_3*$Wrnn5dj^Iv*oI~ds)z#ZVt}$U{+7bzXWNw1~3jC{=kRuxB%%l7s< zHrh8>0j4-`KV2k4^MtMNYm zW!Tr>wga@UeV^MQ)?Pl>;uU#Qa2JZqdZZ4TxDco_j3Eo}3Sz^H^Notm#$0dhf6~c4n9OBe*2L)p@s(dl^8P?;O*hqPd zpR6)UL>M2sZ7o!*<-evLJ;bzcZ{scUWr-a;G^*d`B-OA%81oxl&3?o*L~HCktQhN< zw{b|4EF+ZH#HRw`T|1Dhu`dYtL+b#kuOKbl*SCQAt?RE^G(i0=$o^KFC_e>OCitJ^ zmzuO~Z-4O`4rt*lsb&^X57|8h)8R1GYNTfSNZ{OCRYcZzdoqEt_m0>S>*_>cZUpgQ zPuBPC2QMvhm(dw3f3~{KmesM)Tz?>o`-1uNkP$xpXDh;Guw>meRA54PK2HlCH|OH# zn47y8F_d=gI(?@9z1U~5sgR^=*YufoTHNVQLIVkOK{Bbo9v=TvUP#`&mQ9S3PAeDfv7I4lE)e3Tpp(I0kO=s%H*QFMlFi-9PPZi`fBHMZe#u0m4s6CP z1bKyxg}GkSqPDWAC!dO?JKu%5e9o5lI?n&o4`*!3Wmnnwhj_STk+#ab;Vj z2PZ4k&JAS1cXt}T?1yul7NkJfjyo5Tv=o+4VB&8+nQ-9y3XBt}H&3LlUP|dziaanP zSknA+s`ISjq}!L)kGuwLX-fcD5DtYg7=6V46>61NU7fd#1g29jP9kf2+=P=)4D4z( zQE@AB=OUiu#)2YwZm0dtV^4RIWTGy3ENun1Q15V6+*B0~6Uuts7AwyW0}s9lLnpPz zq|`-&OTJDtYX!o#WZ?J?$S)6&gAT5(*-p?0*FKl#9`Oz(qX`B{XhI4RWBvy6>0w5@ zzEWfi5Kk-OAaxvh%S&jA2Fn}A9f>9g4ujA5h42?RSjo%@h%N||?;37hKYNt;zX=pZ z{7Q^vg0xeoPn(l=OzQ_~ZidN9ybCHwJ(`4W6{qVEDXEN2Yn?(Qfdd2n+;H~<>xd^pgP~cv_wtHShi-(rNCP{ThN(e z4?|c>(m(gG8>~gLsDvsl$s&Zl5sOfMz5(=?`F_;s^|)6HJS}MW3MdV*!D4hA{tyxM zC`#Pq)V=$ET@njK#~8C~54Xy>!MCVf$(JxqDR-TiSB{nC6uBS7_6a%t`g89ft<^kw z)ba0g)L)R;_?2rV)LAqJHnbzyM5;0^zCgLSY>%}DhL8{$ZKBo%qlwE=4oNHWaWVkD=1a-zr0lu$CYZhFyz68`C6&$tO40w)p&$|qYht1> zNLqdXC#a&aTYDYvB%<4b@fTK-Tp1NDOqjltTmJ3i(a05-?cwN2@$z!#U)d0TW@tA! zdSoheeQ}l-ggG%V;DVm1sg8Hv@&$6#bMtWhD!(G2mp;K*q&TvzkG-Ph_{}YC7 z5^X{`=`gVF`cN23XNgRXXP^9&6Mg87L#9CM3;O@bJ!pvEi9Y#$`cOx4ff(y|;d?OZ z+ewd3q9xE%kvvZPFv$NwZSCTs0Klz9f43glu|w1Set(u?>T6}dP1_H-&3=#eg|{@? z$LHXhgE{|;j|-mT@)RKbNTECP?VnHO(G&;MSugdFdc)5cJ87FtXGW>}~> z@fAEGp754_GkW#Y>#LA+vPW$%Lq5hS22mC92pa((jW2lDo)75`MiACi&dfoUz`(

wx32Ph@>crqrlMLzUe0>9A zD(?90FON1g46v^KSP`|?;@((YXkAC_pj1dQgu5Xx3&~lQ@J}V_IS;L0zvNIx`eI{HDT9s}KYqFP((l5w;J$k-uC1*xd>lViiFTDPTc~=|`dD$y8 zOJ^iGN=H;zujj1EXK^LF_snhBB?Lx&jGGige96Rj*V9C8=OI=g6*F zw!J6?M6akaDm7Qd(<(1f7ey+h&-tT<3+7)Gt(d$ZLu}_cI1Fky$;vgM-i92uLpC<_ zMzLV@;t~@Nfe?F8|L$A~7r7|nRw9Aj;I0`|8$dF*5H*r++f{9-K=p;L@^kP2aB1*E zhAzFEAVu|mwXygavIhWhe2p)KF$#lQXhAIM!;bI@z#laty%XtrfBb4&2h0hAQ%h+NHo4w6U!qv%tHMy`&49Tb)xw4Y8JI_8 zY{F1W4)UzH(6OEDP?Fbg;biILyg|k3b}{#^hYN~OA1W~bV&{MsaXkE@Fe~G2zDpKX zm@SBrGp%Z{Doi5hrM-FchB7U-yCM}n49K+IXx5SZ_ z7);XBn8IuBsRK{}Qj_tE3(hTNy#$ay0sk$IEJQg+&a@y_EZ&rQq2Tgb(?d8;lTaA> zu!8155W=bGObEgMuyb&khKr%kEUhP@4fie#(!mXpR9}Opk3o8tRp@y`FM#cZeeHqo zy7l<#BHF)e(gDYW$1U+ETf{gGqaE|;qQR89NN_?0*Nj#aDD?SPeb92mrw{#>-MA=t z$=nb4j*_O4fXuZr7aq9*;1=9HT`60}7h=nRlqAsbP~Ddpbo0A+!t}xM3yp(cRnB=#7Ua z`F@>a|8aGR^zHnYtE-%9HHVBY@4o}RyXRi9_|$;|5E!0p=v{REVXqbv8cJi7 zC@L1T%TZn^@_^)!ccpLyCyG{~I>5qhiIWu`mH!IqRxOU2vqQq`s7+4fPUMBs)gZSF z{!x zE@_FBKZZJf_>o=>4D6GfB)NstA=HdUiH93-?lK7>DG@<=2Q%gU#837lU_$xjP0_5O z>-*z1jS3Z5uw*|IO(H*t@}Cjcc{dCx&>qRegBB#gM-1wiY+^koRe`lcA}I_=3>=rT z%NffoJ!r0yKIeT=Ce&($>}($V^Hl!yKVu}zC+Fyi0n2PMKRjN66GbgBS?)HeHExdf zyV#n@;Eoq3Yg>NK9vpuP5zx2o%AU=tAEr>RkF+FVAOB@`k$(JI7cJ&ZVR&G6F!TwK1zH85C_JMY zy1H7v9ptg# zf*kxf^lY>58Ju0S7AMGx=JoF-N7?Z5@;2VPL6IgNuU0bcgg8;SagSbKW>z19xx{rQ z%uSv27&x>_7eEcP|IRsG`>FL5zeMj>k^@CNDGE$WPE_XdRkZEf z(Q?ObgI9p(Q!vZr&ze-`3gKZ{LJIL{DTtPw)wizJz#nOD2T3;kRu5w;qRcw*Z0aCrbGKGZ>{Z_O-n}p!U7t!LMGAMWDch864vt<^r8tw-8>E zhqMH%|Dh+wSLI{K<6np{2%z#fZzJYHjejU&Z+JU3dES6~N=W?l+k*KZOG7pmE8O5g z|3ZBpvGu1sZsDWej(-%bp&UN{_Inh0pl1e-|Hf#a7@oa;OAg${;P!kU7%;_4KK$XCaBA$*4t$So@djm@ncB z=6@s%bh>!ok;d&`p%-N79bYcXTG%TcsHYB(9d*$;2I4u}bwV zHIA|uAzKRF83-%$>6u|Ao76q0-hqEatp!3V`;%`;22SgEaw2t~&l;Hvj$&KLgWFn} zp5cnH5L27LLL@u!A^<6JhhzULAZnNrS_cmny$PH9oT1I6tjR=qPL4G|AaWqu$K|_l zKqHRU9Xdg!?@SxGxLg7k0@Tlij*>}%ftWij%g?sgSEtIs?f}HSCF<)38j%nDpZR>P z4U*;?{!c_3=c=!=+0DMO{SS<#Bp<_EXLj~oR+5!^%|bc?Cw|7Qhyn8=^pnKLKS$?w zM_Oaa96#znI*_ESCZdl_xwZW?+-|$L1fu`=zYk90KdD8nB58@*>}>gntX&1pERa1I z%GY=AOtDylGv##3kAC?FQ%^zLa2caL3^7133xZT3UR)d<(9TTqnU>r1v$}+SH2D~O z8el?%XaHqo|KV;Vl`!}cmY*NLc?fW~1-D@%A+ z^kQr5y1ug@%2E1Nm%g5!#o#1p;fXfB*|+6gbI+`3U@L1)e+@wE3{E7#CPz+h1|`~M z8x^qPn|jVcl5}O2y>NXo<6z3t47D~@2^T*@{IGCJmcFT}VQ=FkWR+ioc_`s&F%vtW zNtk_ghC-33B$n6zJJISQp!sJvbBOycp?a`+@gd)gh;HY|fY|h@+KpTrIrzJ5k7X*xn5& zrr6-l9_I*Vwi0%USxWeSb0bYnW0SusPW7b{ZNfq&)LK#!KgqNFBlz`le9u?Y1iImv zsz!ytlrg-QI5Nh%`83-4Roj1%yF&TPq4cFxc0_q=%SmpXytQ>pp9${@k#8SZbk6wT zJcT0!@=ea^{(ne&>#!)(ux}W8IYD% zkrwGLk!A>qVd#0!8&`Lq=lhQD`{U)HRy+Ey>?7k>21i0ywqq|WG%Ji@6&7cBCQ_&g)2o_5?L9sJ{$ z=l;K^oe8vXcATf)NbfoMA?H@SH^F)HfJnJB#Ao%rXymSg#oj%9eU}0LyHJrR#jY2u zQDxq~ql?7Hec|8j{Uk_y*fYLU5P{$R(4tQ-Ec0j0 zxE`I|1HFOU_EZR$u7KD_@0r#hFc%{4TLp8?8H5{wpj~>tJGkhZ=D~aOyGM5a;{p7Y zu=4Ls5%H}fKJq#V_*lY-6$X-hg^Un{cV-mA(2xg-#15DKcyeO(ZHt#UV#CJ`3C8e< z^1VBc94G=PXQaOcr^yT8fEJ>-9s>@Yw7?GuO8oQK!rFAmOyJR3DEI6wp?Za|iE#Az ztt#+Dw6wGgLpO1h$wFpLSBuZuO+Cj3#$Z$AVh4{)W^TAJhFU~1bak<}doB*$`}cAX z-~RT9hz0lx4A2Zkpiqob(1oi4^=Nq=v2KU?70yElKIDg{mos-2DOiamwao+TNY8U^ zTMm3q0Paxs+~SZ@)*IRZ7EV1^Dk!n?Sy?yF{rl=)yL63yKROM}MS(>Ngv{QbZs4(l zOlOzZVYGsZ`yPVS`O2aIRd92aCjgrv+yHh(W*Yl`;V-XE+)8VU_&fVcwJ?|%U?dbt zOJ~pH2H{Wq{*g~0KZs8IJ&|W9nOFo0b`E034e=@Ock#Xi4S21PfHs&@@WsNkbx(Eo ziO8{FU}Iru%MDQrq9KB7ZvZalMgAG_)zPS2CP%3D$P6;{V$^Sdwc|6| zTjqaMnIcs#Y4cuVh@cn{6@=cAp1b5%J(w&Crnd6D6N&N?6~l%Q-)n#@)tTVe_FD-& z{X*iOmk*w#4uH1j=}8Z?Qq&k-Pfkr`14FK`&f(!blQ3j1(Fy+L0PrXwR|rD;<{bWq zrSl@Pc0KZ8{Ow2RQhNa`6zoGy*+C7V3p8~b{*(ePl|4Vkv%q@=-bJ9en{!F@NWz^C zaEvpq%Z6_E_Z7MS>5p6<>;jS5a)KvF8zRAN4JfaYCKcM+`r#&$c@vJN5PV7uJU_8L z>lgWOL$pij?!!heiL$OgmGr9l-lC#IbbuuzDd{c{IXfW%`?u}i>P-oqESWt<7$}?X zadV$ALB?<&@FCVpAcY5I?@af*KLwxX9+rz7Ao?`2(IYRsGe~WITQ=_~{$)VJLXrTr zb7z4PBIY~J4j&EOa6WXy(7_>Qz`#f%58-huiCrB1IONC5ULj}4!?yAFK3rr+KAw`> zDguNj*genIRbym+XPOVixGEgzKXo%qfdRN+Wh+yA{0X|l@6!dz_J1w5oH_?K@}iNp za_{F;?jDu^TxzW6$W|PZB&d!!4}7ZY{-YweDn^$h4Jv{(Rj48wAcB|$QKdl@4Jx>L zP-IXc%gyf}D40}TpiG5&W>^u*uRSt=*HR4v#|+vPcOdN4*LOkM49}v~$|PaXU~Q|| zx2gyw$pyOfU8SM$d9T0c>k;sWlgYtuJ8}+6dT_R>hT1?BRMQn?Y^MLadTeF?FAv^X zukxk18@Mz^Zvz=c68NW%pf*K;9bdHrg!c$%{%e(nP#706f1mh^jDH}^vj|8;VQlt)bo%fJ!PbB1jA7klyc5J|do$B|q1QyJHnlN-)0Ikn@T5ZXY9pt-Of=3jX zkV=M_{lD1PF-l3yUMT+m99zg6^FSgF>}mP2!LeS;4SRu-Q)wq2!S|+rg%M0aIRtJ&?#|VL$8vBnY;@(x>X(*pvS^GNyriIGEd*zr=BREg~CG4p(>*6o5#PSzGJN zN@I_Vv)$d})%4*Ep<62`kPY}l-GD9NHxB}2a=PYm?kUjlB_lGq8*=#_8-Twd-rb0J z56LJ&sQbtwhiJF=++=cmaG7>3&K+_*)E(=fwLzSJBN+}o{cN(K5SCrF>wy?=YC|F_ zP!Llg;YGT?xIf-hD|X?`alf!BxarDrGEHVMlhs2G76#(&mA0M%q*Yq(?i6hNTLtg+ zH~Ri?^GWrU_6SUSkIK&8Q~+@^L&AxnC+0-dyY5vZIWT%NfQXBR{qea-|-`sBi&VhkL^?2EEPML?#&f+$ag??aG}?RBVu{i zbe&B-t2NKG3G5y}j_1P51vJFZ733=)qNAe`qi!v|xE91qOpGpcJQoSnS^8O13M@nf zinWAbDD969iAa!1b!88G@n8}>+Cm$A7f)LEelVoPlfn0!fXfcIt*Y_ZX}E}Ik7E?+ z+Ok$x9|4Hy{N)&)`P6Lhh&z&}1q*&6r$uIfZ51CSArUHOu@o@<{e(q1@& zZNfBA7G=)$BfLr^r4CsrYzDevlV_F!ogdhdbs*YdlhqJg;C6XH=k-74DJ7*B%lRB4FA<#GvQz-_5ehuAHO9>$aTG@xHki!sQh&Zi32a4 zNr_o48`+UZvT68Zi*0oPS_cob5Fr?8%Y-4iT$qW?K(paOw|obdG=}+s?;qKko14QJ z;#+$Dw>evaujPLoo{e+yAeuD-Xin2670m_4#jtB59Q{U+9&z-v`TqFy_oe2z@b6+O z0WJ`)zf`WoE?Phey>@{`D5C!_E#;F^2K6bETr?Wdbpt*j8B7Za0(PY{OsPL100&iy$h=@9nyH9{ z%S;S8EwG5K2_ym<3FDtghM5X{W$Nmxu6qSN)Ig+MHob1JM7ZwxP9b-Mi0GF+z__=~ z{(t8!t?_{D096(s=W!Z@44MfxK`q1=^#DCcC@Ug3V=p-JH{prU? zXj%iw*;XOmK?Y%#A>s50GZM+`t3F2B{9AO@g@h2m6+`wV4KP^Slgh%dH5||@BxdrD zc_Ns458L9Qv`tS0n{v30VaafV%ZRJlfy0mVUykk&W5Y~|QTq42{v9hsvvM z&QJ@D+q#8Szwio3O`eD7{t;Dj0wkbq1&1e95^nFt4V4XGJtGdRF}tbmL%CkMq{pE7 z`T6xbN*|1*mOI|`8Xc@T$lZ3nSu2Uz$|l#){>7sw?F)*EN*^8_J#ukk&7*H^Kdu7ck|3dcdwOsvkSq~$2Ct(@W=}+T zIFi5Kt*P&#B`+^u=+f5Krd{b(f|wMW$9t;-I;aBk9d4NLBUxF3Fmc3{?gKxS_aE2! zsRh>KPSSW8l$6jmVw*MY-IH*cz9z@QveK1U%_Sy*CwBcuV+eOr(s%ZA~O)^T)BXZ0P%wL6-ZOuH3Pf z&cteGMh4<2!(J-yZ|&_i^TRvOm6a!(MjZ-`aDSd1F4~r=$7?}EmrkXWy3S`nSmiYI zo7rwi$gXlHPS z*?aBtv*5fuoB7@cF??=uO6)|M%6XzkNVOkV4Gt0-#u`vV3#tJpuULP2bOgD8HD6ax z8cqB;9U>8jQ$z>9s21A|fo)Fh;bJISTRP#8Z>1r9x#`0Szy2yhP@tjr%a_H{d2H7o zyz*H6s7}qdEqm-3y=neZ&p&(-_`m{tf$ImpAj~XWuW`r>%qF{ o>d4`gC^j3muNnv4n5K0}!;JwzzAeBN#7Z&m6w!HV|Ab7p81iq3z zZiV^xaMP~Kc5pq_JNW1mJa`I7LkjuiY5El&$Tk7#M9~bE@L(b2O@!&>GTVu4$g@z} zIGi|12C@@S4ZjBMAyk4Bu4Skd0lmn@orl;wUU-_32J&a=jm3*lA+A;74k+q1x1^=`(pPPSolgqWb;*F<<7Pn z;!tvSj%HxF{-A)*_1(F_ziaBHXG`$SBk;`7T0LYtGPk#b8k_Y%kwxxUCLGxl!x_UaFC}#*B7#dyN-CV3R`~8MoqIL^YYA9F zhMIdnM`;BJhx?14`}6EO6^KZJKx~A>3@?v2Jlla+uk}re#-T&;8V7eBorJ@BVy?un ztIJP4#Lc?oLvS?P<;!}vbWA=p{RVcCPYk9!Nc8yHo^t{K=f_>)(Mh1bAs2uA_${@W z(M5W9fs(vFqjBe8l^p#ZHWtf3SdtpeURtd_c%P&zEvJ`|#;n1nzQ^X-JHU&A$X&>A zTO5X|HnHmk!z&Q}4EGr3GM_mP5k2}~7%fD8&ga2Lx2g%a5G)~6uZgQKr>@+?O)v2? zGRmnN{XCY}cTr9#q+<3*z$P}y=a8l(Y=?uTN ztEcn7S0X`gY=fs236TAw>h50hoez7}8=_Z`VFAEu=FG5m^qTsSAX2!5F!J@SQX@&A zo{+P0wuBnqV}(}`xxC2bIa60}7jn$CpCA1YPrk{<))Z-?Vf2%^M3(~2An)Dx2YCZyPu%|fM56t7iB26= z5sK&RWrPC$l4r zVE=f$yxiS5#tRcak}!0OPz*zSj$+u^GaUwcW#846+t2dGj**jvp8xirKVEQYKkRkx za32amP>I-PoD2{}Owtj#&v1q3k+OmhuR}uJ-bIrH*Bil9s)Q38qdxROs{f%RL_Mk`&ml5*>Z^0^+--fh(eN z@7^$;xi*&U!!}j@#l--^=3z&kFQ9+bWC#!V)70F>=;cS zXw_%H)3_axeQW5u!zFfBI+Y36OKx;&93q6m13ww)5A(e>dpqZv`tJLZv`m?r{M0>0 zhF7nf&yoN4u0K19+_v6w_k1YURoJ(u+z>Mrq^g4v!c#?9Vva&Fy$=>#*8RRv?1Xs* zzM4xyqhHJp;O(Ibf`pDSlk;ohv^=bh$(@~t0s|cj^_TuVgS&_KL45%Wf4cohWnuOT zXG>F)byFBDfApxxh3unS6O%sZq$Ey8sgn|%u%<*bxs<@`$;xE*h`8Ef+5O_lC=a}T z?_IS&y9gYR3$hFvMyJfJ^?MTK_1WxoGm(t@h#qOfA`%uxn{j9th+E+dHD52jM-H_M zD-Iv6=l3Wxv%$KQ`S zM0Av1C|<(PFJ9x=w)fSLsYk^0cVX`*4cK9VwdfrkefSeSjz1Udb|WBr|J@_M2!FFz zxh~+~g0OBZuEyuUiic(Mz&AB*-P5Ez{$U>kJEg1V6L>7(!k8f!=I$5CKAYhCQGd5e*jMP0X~QE| zO!@Zf!38z)$5q?gvQqkD_ng5#{~m6}DHXX(9CpubZuxls+c%%$i{?pThO;?kCM|=v z7^$QH>XJQp6DeZ!holU=t|RKB)HiQ@W=9C|5UwK=L>2XZEZ9>_&{O~Yvu9+_2*0m{ zwv=m8fkPsWGWc%dZc$bu_xCR4I-7+D(Rsc)^G|2LY|!{i&)cd=r0H@JU2NXFk3Z5N zM-kRu;LN8&p%ewt=leESRiYrWTo;zKY}gE~etf#Z<*|wf-c#yfQXlc5GK0jAA3xt9 zrth8Dd%pT}9ekN-j~m9NEKp7R9Hffo>xthb?Ce%2Tt1tfG7LTWadmlpol6)Z$QVuS5O0%0w|JO zKuxj&hY%$mGMw`GKvbGeaY^8d8Mz;OJG0{Dv-|Z%cNK-sei4ux;%VJEdJDNEN11A` z!y$_F-jK_I{T^A%elKa76BE2)bJCLM6UWD$P!U{8@>93P#9fd7-ql*|Ow46M1n8RM zIZJduXEQnMiuAm?Fsdc29cig9FJ#cXkwj-*_GCW`Pw}p8$UFNvFn=QLahcEoLhdB-(Mp2FIqLfc}?@B)|2LXInS9u-UCEWCZuwAVx^ zZ)I&xP1kSqM{=7D6G|<^Aqu_AdVOb+&3byofReAkW5=brr&vDhmR(jkYzx-<_EkRZ z59+jpjdOZDFnk6Bc{!C~u5)P{7p7Z3M=y3Ym4=XXmAPaVc$abePQNbPZe(Lj?cj@Q zV#Lj{prRnH6{qduGbNAR-SFzhYv%P6-ki4t#tkL`QY4r1> z>DC(QZuTsE+K#ugdSdyL1wFHHyWRkdHX7p>*hwf=2`!b`8!0}KU*-iyysymnn>KIx zc2gMG?7U*YR^n&X`nuPJom}LC`cjgMnws)=%dg2{ovuiQUy)H#>h^LjI7>V7anU?s zG4p%bkg`uDw}G}5hJTD0yJB~~8=oUX(O?!nw>F#e`07AOhZP%fb*ZbA$FDmq?9I`% zg4)ryU?Y{j}hXMe0+f7cY!qgKn+;y_fuIY6N;#@;6yP9PewK`B>X&pz$%e@`sFQ zPP`AR?XZpC0oTRIVGkKToGSKCxuy2Ln9HJB^A-AR5p2Cw?3%OST)za(LQe?swwn7*%#onj|Y*e(=}` z6Wy2N=g#|c(~_Gu57?j*563hgzZnswvqBT1s>xhaqqQif68NfMO*qfh_q=4@cgR1V zOy9hjZe97|!|s;N`R*yP3>vO1dp<{^snE`GqScz-e( znL5=E`y<;c);;Qn6RplrvBmm$;2a>_Tf^Vi`V6_LAE%5ACLR6N%FVmd-x|ZGcxbQ0c2#|@u=OkT3T`IxwF$kk8KF+W9q-MIgTA`pNQ(&;k8f46E+lfM&70+>oDF%&i3-l<6yLZuN~%z>wsqi0))BZs6;%A|E` ziEG?ge>n5xgjG)ZY@X*r`kYcz%UC+`X$D5A9s>i<97p`1%FX*q+|xgtrF-;s^4G*( zvU-MR$khe6kL;>nSX&=j%xaO<<8zAyTewY82a-eM+D1a>r_w0<(_4KhrKNjH5_`YTJIBsljj#mo%AWL=&k z%O{+*^srH!QMYV9OuG5bcZX5LmX1@{B@;V0a$zV_WYc{}B=i-$;Wf(9wZ!dbmg>`4 zS^b7Z^;qXi(Mw3Hq;?P8)U=I%nGyeZeju4q1}(#JtUTbuq)+6(8$_IshefB5r>iM7 zFO}=`ZjUl<;4OcIz>PvNti`vM<3z2F)B8?|lzk;UB6`#s9v`3l?VB?WizkJ$BywOU zba{oRo~wzRtPK{__3G*kc~!4%hd!HagFRFc!!THvV{0lDPyV5?xzInL(>J zGWxT5qHy}b?Fv%rlV!Y= z*9Cbk3b{=QMg2#*{8j{sfk&AUtXMMqo_Y_NNlk(0(1JYZLOb+@Q-!{SOjee@#GgNH z!_c5^7w(&1LTctH?D1r@ZuZ{hsiPB#>|B=)a&t#f{POARYX_^+n`ReX_K9>9c!*|F z4z9G&Zd7KwY-|Kw(C(N`-icYHCYLcXLf`y;^lAh}ebLDP@+a0A@dRi*vPY}grt;(Z zl96+FsTxcsn<}0Y9klFV&0)m8wGrIb4ddV!)S>^baQwxXy0Cocg0Eck)=Rpe+f}$V z66dPP&Z7zEw(v62*WzXj5~}XjY!ed`W@w0qQ+6qgh(z8~^P+PsqggqInTkD8 zg9{<3n5|r>GBaHaa*9R#{9-1S#w|ZR+8X!>gA*d6#EkTkauIyf_ZO}7+6f!HWXB8F z4Wy*1KUera@p9Sec++e~5;7Yf7M2B5OSY*HT@4qXY7dooTQ1Y{DQl*?X_Q7sx<RzO$X6*3O~}VW^EhI>xA0N^#OXU9V{S$o7>$22(do{PKMFs*Pij&NGKFym*ZROZJe*#l-}I+wBDlSm+{br z0PUHL1KpdQs&L}9LDzL$h{uWsIo?dIZDh3+4X{f&k~3L7_TlI$%#ki0*V5DCi&b`X z#9j9noxyD_CJ&WiHt`Y`k82xU@RTzw9oq5Htgr=pXU${``Q9?Kve^12JA=((tbUCR zIL)qTUlO-{XOp?VwC=zWZM^_>Tl;9fMZ&i&FUR@o1tRaqOF3mcmKm@U7 zGv*89T(Z*+XeanNUI<51=PJ9GbcshqX~gH)_SFV;*>!_BD`C*cFr#v5h?@h@% zY7M;L?#~n0E&KD1JL6sZNy4y+oXI@&S=Ng^Eyu++ghzx7Ok)hA16p`xv$ur9_4x)# zfY_-Xqtq2;AT7Lo-`mVJrdx>6%Ut1bTP;4)Z0B3_4Rse8SzDLv$#>?9`qt&H9T9B8 z`X`)Ab$QAecarNjwcHl2CZoA__K=@wm&ChFOT;XXqmGyt*DTg88A>c zYgY|=K4OIl2|}GdW9{oORF9{Lm$S1kcdR((=sCtV>EYHmLQeZB}LbHP%%6kshAHwW8f4)BcHBVV|WrwEdIQ+-P6{( zmYYlBySx23uV@%~8SL?!SY;)pA>rq}nwbNw8Ht%0E0~@B)>~z6Jp^hVVzz@$CRAj0 zA!a1iR-yuN_chvu?%gZcj(4&ZEHtxiiqi4*@2nYpT^p21XaP}95U-ued4Ir|ahLR* zoqw#5U<~qWkAD!qCn>zk%&tY(9AkdyiXaVCX;_M>jL+NsUp7gaeb*B~VS>k{q(~1G z^UUifsw3DW&;Cb5{fvNC;)nVo@B*_yz8KO5q(7mxRd`unAR96%+N>Y*8EgiB1$vPFG2mk44Le zy3anmD=c=gF?TkLv3&BSjN&jaMUx&4o14}v$0j4TNw#6yIJYP`2VEbR2OW(;uu(!1rr-+=E zTLWW&=WeFx)c1B_f~8;r-UHJ^pIUp>cUcPiQYi}VZ<8#cBE2aTJM5LeD-zYgGUTjM zEqKIs&;obs+yzU&tT-R-(jWWNH8`=ip^&)}tLh2jZ42(RVdOSxr8$O@@5QwYe9||k z@YSTrI4ov@-&ImU;If`qZWn!&q2!f!mq5panpz6*XJq^I!Q=&QMMfq+vMpb!`pXW@ ztwJB)fsWIc--ienc{Fj!S@v*1=pQ=2rD*p)7qMmUHOnv0W^LtdS6tXL`-gknDum*3 zN4CGdP^VJS*Iy|tGM$QJn;(EBaG}3zQ|uG%r!_p?!}0A#6l!E^)90MTup2>echNVQ zgenUM$JHW0-~872Py|C}Gw;Uu?7r@areqgv79O;lA;S7Q@2#9QwZgSy;`5j#iX&AH8(P$2dq5ks(FHOH+!3Uqajj*F z4G1rMB}231rKo+I_9h5Cg+Ouz5s8h4P9iO%xV6q+3 z-Y9j+M46{4n4q~?4Z4&r?7XLoL>*E$01G!Nt(2oN3Vq43oxH3XH1yJpagwNq^b8kV zx)pmqe8iEw)o}sZYgYGb)rmXyYZ?Ko^-jm4h4~G#KT~Jn5_c1L&eEtBtC3(_;<|6! zIZ-C4&)+KWvwFrjxM;}SITFcT&{=X9(`+%jYg!E@Sm^Z!&`?#bMR&QY*=3WX!_kB-VQYq*H0I3W zG8Pr|`uePjSjYp+C*j7a7<+B{AQVcftg<00B7sm=E*V#^IX_|_bg$ISe!{{sH&>ak z+x}=@dv9*HkGs3Nf_H#VeOGnMm6^c4l3AhKQp!Vpk22mqc}K$UOt4Phoc~5Wd;OZe zUjA@<0;j&J?wT9l8^`jK?^aLfEltk!hf}3~S^gMkJjW@M@n*4C$zwXP|EX778;Y8t z(7Gc?bN9hSiIRC~fq|XHsM)go@lv`|_nZ3>`#OCv(H}d|O}$ZT4^l0l-Qr_(#_2 zw2jVd!oo71Ygd)<3mV)R62bx1$-@;CMZb_~T0*jkue)L~=3UkW1G{5oMEBQHE($uv zaQWH<>wYFCy0t_TiPEb0F+XT2C%mh659h_8sWxg|z%kDQ3*qgoY{gc{p35 zFP==lU3^cuV6ufxso0|1)Z*x&Hr?c)4J=&-K zi<2>V;hJ9e%|4Iy4F)||k0SAR@v4j7nUO3HOGWTiOwB_$ilB$s2W%S;zw@>;UVuI- ztU0BZZ~xN6L4tG`XB+kP7&o)ym;IEyXwQr)lJL-XLAMF3PF9P6< zBdWl`aNymQvj}Wu^tVEom$S4i0t32xFe{#L=-iemf_j&AQ{3G>bHdd(NzgMcY&?DC zA&z7MTlcyWiUIU9Nm2*H8Z1g$hAky)s+e`B|3%~WmIUPk^_<2f^&R8vTU;Z(^V=)z z%-FZ=((yMX-x7wC$fiiG+KhRMPsS^N?Xh6zlX{lUEK? z&2tI3F)Z1YV?L+Y{#q5D*8@kStNpb`%eS{16ZE@g>L+M@Gzq6lq4&*s^56mL^Bsa$ zm+6R)bNg6j^kqT2Rt}g{K3bcszZTb{gNDh8W0?j(@n=eg;xE#a_M`w%KH;z$);tL~G^e}3f_A_% zq=)QYaVGIQxew;Zg`JA%W@RbgH^$8o5`LYbu%NRr1y{^Q9+R6=oC3 z`J_~Ek4vqK>Bg0gP^(^{R*h&po9-xenxd*c8piMqFeK8L$Nm?3lK_QT{TpQLZo z4=BlYP=#``uexyWn$6@~J)1Y^BR7c4q$d>9$!u?S_+t^YQ|jIw)e0Kkw@R!x>DeY_ z$dea~srtQ{)h7Zw6_R6d;tkHp61K|e?BZL3QhYz=6CCp$6C$UXb39zw)mpMAX1j(m z;{;`K(lX&eu2OC{?V6%n{hoR=ZffKTm9VBLZie)ZQbU^=&lD>RyOY|F-EuSSyJ7IsXg_i5{s}`}Px4o0@{1MJ)fK(u1X9WEgMh zVN!gBd2igmjyZis0j50`V)s-DFAWn!H#QFmpgC#J6NwProd^kXSa_3~?C)d?=ys`Z z!j|b`NT;oK+J&v+v|4tq=5wTjT|D-Y5gA!u&Ha8!+XftXm_a)Hd{ixBNT+0Qp)gY8n232tj?kVj45Yq~E@ z%(cjQz_$I>1pzKgncYz@H&`$2@Y1LVo2WV`$uOv$u>g>~52jXt;dN@dC%y^BW0y3% z&hOptJf4Z^qJc1OW7t))e~>iys=NHEGqF8MYL)9-wYo9Y{`qNpqG(sDTnw;|(LaqW zeL1BF;T%=ErB>lx*f%X#I>ywrN+>TB@H@I^1=62ZwOPFHx5&Exp;{65d$qZj>bWzD zFF2)ZhMCDd=?)wp>zhw8x+o;ZFI#huHqMB+<||FyPn*D(*E@Mr%$U9ji%A?Q;-LG& z(U3jimfbc<(5^_H@0)4+iJ6*C*~gi#qkxSk4-qbjNg2GIqFn*KZ}7KxsZ2iK@vYM0 z{Pgm{l)Sv>L~Egejmd*IW>nr+1(IuFiYEBdjfA@Ovl|Z3O*{`fP2H7mj+0Y!vJ86L zJy<2KqdphfoHj+HPskv5VB@gcTwNdQ{#X&F%i6l1J;HiX+;VYrc)`dx*>j_cUzl<>Sz!_@=qVH%swn1*^a@4rev#_6yT`YYmj$)0*;%T~} z-Odk%Jbj<7QPkGIKRUmnDVRSbU|+1vF_b2WN|$ktQ$Brye1csmqWGgxBYQz+!d2~1 z)-F=BX!K`~?dTR7+LarF3NX0FU_+{_FQI6m(1<=U9XJQUQJ?1S3QZM zrfKY=5INssb~E$Yxd4x;V>^>aWNd461J5sdGIP_(4yHKpvsq1l2d~o zHWk&4{Whr4;`@*#ULW{P^SeKFDWOnx$93`~V~8w@A&7=r%YHH|S(eaoUXBpXZLlc# zDyT3e1tK;4JpqCb$yynXZNhmpG9)xIXw_y`4M<|1dRQ+fHY5&1d0 zNTBEz%ZQ1QvfWxc4vZ5$gv+H(&3|jV+G5tNL}^QHrh$H|>8Z!~#=YkFO3~~wJ}pw= zx{xT4an!~a#Gb&r@~HNgf1vrE?-$%RIL7rG=QXW}dcUd?hT5GKa)=We49q z)p8A;q!&=InR$}ZZSD@^>Kt?R2S*OTp3~UI#%PTd&qb|p6Tz$0LW3UHhBGyAJ)NrQPE(QI36$UO#AOJ? z5x~kPbsQkHJD<(N$^)NIGr&CrrULQ{0BTKUFEfe^9G7LKl;!0k(E+CCqrgS=Lx7Fk z4hN4_RTD?P|3r8MX0%jJD|%`8Xvy$^W{F&}opP#tcW18ggqQc?`=r_1qisR1#ij*a z-Znui1)bMpJm00vji9~nnjG!@&|N=_4JO8Rk++_3b{8cuwvApxy}ndaw|SO!tc!HS zLv1>UxBQlNSi#i&0_TU$(i-=I51(wohs~4III)T0zgL5oZ2vyw@48qhlbhSv z5Y%2V5}Q1N_f*sK>@{s{O_0*#9_>FfzM>^eJ2L9&Hol$VYc_)o!8b(|*cWS%+6${` z;Cytl7yNDf)5jEMv(kIw6|Zlm0)l=8+tX`kx+B`K8Q`ktqmWXm=`KRk(ONl_LNl~< zbEl|;q$sMVw^hbrIx(&E8Di2F(j#Nkf zbcC>NuAL}U%0kN<=y2c%oYY~HKmr7DK*qKhDs@5RR<85?An8pTYX$W!n51hWCN@6X zOA;<~y-JAv?>Pu`1#V%n*vuma>}Pb3wcxeYhgl86v~#9748Ma&V^cm2cnoRx-0&%P z5#06OPj;ct!JdhlPm+Zth$<{A5l8EL6=Pr@>$&A>ZIS{$gn=PZj!wzX4rZp%`WD=M z%@ z&1RuXa>s@Y)*49IWpghsqOkQnrICjUHPV<)l2I1*I~>*Y{gs{`kH0^!GjP4g4&}4i z5Y72bK9r%kq$I%RCFT*T+Hglphx?<3X0L-LCtrWo(b*aa&xjw)8eXdDKj~$<8uXLa zdyQ)9C|Ex;tCqjDm(wriM$szY-MCW|MWg7|;8Q~}#hlG75nAKkzT0@lJToTGbmvY? zGFho?$c&O+23cbmLy172BWA?dn~ZAnCy&{(+lJfM0`D49KFv5qbLxho1G9Z4gVJ*6 z$fe?zDe+C6ql))Y4dyQ+E>M}vwO!w=yrv}_>56JOm&n-^(iT*60{8j>O8#1pW)HSG z5-SqdnHthr<~CuyxUbtwI~r$hM4cDcm*hDmnNM+&_~Vs4gCVUA+T+N-3z(9Ox;i^F zJ;vp#*+(A`COE}8!(@gS{YnR8ukvVtZjo z{a3?S`u4V5w8;dES7qa)aDwL)uG^cFNxndzo-Ru}EDZhew)L}-ygbW1Q&QE`1I8|` zeHxB36n0f|ZHJECkYnLhaHKvNyKWQKr*(s0?eMT{yh3G|D^7 zAI}%7FMz)WCT`B%Ac>LfNbBZr5B*#YY@CA1MDN)XKVQ*~)RS6KWmro#Z*NEUguT_; z@pdl2X{~rroxD_tPFIPkwAULIA+&t3l63fTFtela`B7{c zJB%A@gtabrTNCbU%z1YU%1Gt*i~C9z7o}%?qf~bx>{cg!xSLtjA)o)vG}^ zS=Vu>b_*%gs?*LesZLt$+-b_#S|dTE;jK(x(8aCS9bor4WXPP1`w-~`- zyOFz0+7h>>$LBOHUvuRgvrwYQl3CnOL3KsW#O;KiO{8ikW*H;qQ~(K9`iYXBWx2*w zyZKl>_UBlLW+YP$wT@F+^+0LmwsN-ZKo;p_I#d zNQD%m2~oo&LNnZ!U%%;7LME#AEsuf>7d8!jJMTN&k3S{RT?j}GG?mEGPznw9{IQQ( z<29$0beqFanjhb1-S=dCgDQ3*H0>dW=G_GPMf?CuI~wd7b-_C5gq<4?B4h)IO#nvG ziiqeE@(eB`qIV?wv7mqpwOK}=Sq&WdM8t(1(&Rh{J9u*6kHs$_mRkeyTIHx*H$oB; zjkX2{brILIVdL7`+6;{x{pL!Vf*{uS?&@ij}Dk(O)jcup2?HQT8sEu#ZNpzcOH&PdDJ`fIh!Lc7Q3hhx*?eU_yH)eT2! zf+!0$u{|T`-d;Oa&SJ~{g2YW=syNZ|nH<7d#TZnU&;DxWkUEjsWn3(C;IO9Qg@9@g zT#NmX)et@->C(a+KUj&AU2WM5(aa#{FRXfyQyO>uaDQ<(qk-6YHCs!JoIZ>E$S#hoOS9z!V z8f8o)A{&|6Lor#c9_;LqO4@Yq(Y?_;6*o*8nG7Z}o0;hQFYCM#6_+h|bw0r6dfN@7 z_oP*|_0NMs**V0;Z3Dw)HKAUaAvS?)*`p)m!7eHOkPW9 z4h|Qn!F_x9&8?P^GHhtL@FmTj&eqMoe2k)GOW z+y`x3QJ3#;^;LEB?&CKJS8&}hxml4nP@Z=|sBccFZz8mLwSgn2FlZ)JPQS02bx|&- zaClX{ytsj_Z1K5E<+9a4CvJFhMYG8(uF=+OLT%WS70de*rc;F#lwsqf#yr2`FHz2l z<-Bx@_qg~f>q=G>b4`gA=}Z^-+{+-_xBBV|LM4v7Z`s?+@+v>=Ths5cQ7c1vn!Q|& zMjLD=E%0p{6ny>q>HAH0M^k$D+ZJhW&86sDiXww(hO!K#b*z}%CuvgCw~mw=q^%PD zXw$4v7$s#DCnG{IDlymp;GO?MyoS1v>?bs@*Tk>fqBQF zacVSxwi{etwHDp4$VIENb7>;_8YEl*!IBcJ|D%COARSfA5qWws%|JbpJh16oR@spX z#mHE8!R6;%zCPHKRal(%)N;W7oYIViP(wzx*AbE34)y7Y8-(5BJh$@Qo&41t-(>7^ z)6GKL7+kfqJdpXUqkfKQG+a|pPq>c)#;DXX%Z(daz-0<#5%T$HQxt7XmGhqlHSLqo z!R8I2TP`Dy*1%t7UTBfen{QIvmUjo=k3;E@3P+*Y%%(E7exMIq+c>3ICdZt-m?p&V zKy;CcU-p5q|5bVEj5dsUmuFnQVZeefyINIjiD(183Zq}IppD42f%2l#5Ve+kZ06v( zjc=S)PC>VPUt{W*>E;LYll=HO+i16hGC9((hZ#<=5xw-CUc`1m06o?IxmX1cM;NU$NkWpZ}}NMCbdz2$r8a{xLwVL;X%? z#uVJpcFWXc?E8@2_Yo0KrDb)K$H&!fyqXU~zj?FUwcq#p^&L~Lq`J7tgG8BD-_6t0 zx4O$lbnKzN|a&K(HM69@t zNR9Z~%K@JiXu36T%Cso6olRL(=vf1f1_eh>e#czo91T71O6eV6KQ_~DLsc2rBYorGhrF5G77cie8A4X z%b)W*hy+$BAEh{yWR222-H_$Ks78(AWNSK3r0=5EB+N!Dw9Ss2E0JYnF(xa!jApp6nViwp@X#E46#nZ&b1fR*Han& zpAnn7dQRN#dCbgWsjK;`ih#M%y)ruNrr~`R!*_A_1~G6btcf)VofV!q{7|WuVdkY1 z(lJx6I#Q#iOl)DH!|fi&f~x zHWiATn3(l){UCGfT@zzrtM65-$fAs=27j%A3jyFxsM%g03kd_H(xbCmhSQfh5d$t1}xb$}NWNfCW_OHRaCGu34U8tERCbX&#*VSkVrq~X# zVl)k#X&0``hmq&awkLZqe<(`0@93C$_?BK{_gz6c%S!aQ%}e8x+WV^J2QZgSc2;)N zBQHnm&6*ubD)ObX9WiF#9wAT)MFD50)au%=`q$w2-+h*x5pF-bBCxS4==BKUh2?1U z@&3 z_!Av&wICtc=LHS}Sj1ux)|)F#xTdts^z!v;cupR#z3|BBw^N?W1gcoa3+d5-7#9LIgf2iJ9duIv4Njq~+-ou_$Im%h=yp~e_ykwn)h#qrPI z6k?@s-BP}g6c+Zg-mQ)_)YPdwDhrObX-U2A%q)ChfJL!3X#0`b@o=vF`|DCI%o9Uq zX<1%~$($G-zj^M=gna&S-+LE1b=>V z$j&@?``c_mCR!mTFXeb@tG~hzJx_~<+Rx=r)Qx&!k)3>{r$i)~MF*t=G&`-uw4T7h zDleJ)GElphyLC4iA^L&Yp@^Cq@ekchyWJxndG363D)`g5Usl^i8fh_^6$VzJyY&M` z()8~P4tp`z@kDFnsD9h^#^tr{=7)vr*Th9X5)}77(U_SsaR2Lxt>;sg(<-)d?~3`> zvP3~XG3=a^Y}z+hEltg_k*UQ_yF>N_+h~s6*D^P6e*HFHw6yl1^~-#f{FwMEZSCO0 zg+4n2ooBvVMo)G{($otiNk469OLg1$bNM2@(&w71RwZoB43n4|mgxn)6=<$x6?x*o z-}i)4QAX<8m;P#NV>@3sTo*GmRQUDgHaD@~!`X~47#bh0rFOX`dc?0(`SL)$mz}(x zMSlEy*OC&8hmXb$B)@Wty97JtU23?pACqey^weHsPPKb>Jlcy{Ev4>fa&*yIuiN@xc;2$H)s^QYhb$4F{$xKmxUQ_c z+YgXd@yXuW)~2uJE+#Z|4qf*ZqnEcDQstYqMU9l3Iu>v%$_6TLiAdnq|EyTL;_XsZ zwPQD}${(y*!;C>~n&s4t@xf*Vzh@v9VRlmDZg*|MBUViFImcV&y4V>S8V1%{MnTR| zRYl9mN)j8Vc-5n|(dE#KR;zXxlv~Xp92v*)>lYfbBSBJD)+)2}jpq9fu#-Dun%y0e`>0Z3a zXAisb-n%{?tJz~dt=v<)VV?)DFo#v##G~IrQT5k5YZCe6%pUVxGJRy_opr+t7 zJYwXwWbkF7s%EXXxN;eqP;$JK6V$6V}K^lNm{xmr7hTIQA)Fk=@*@}+H?L|GY`MqaM~;VaM&x8 zHgnRDgF+w7$)+PMDj@ms$@%2R&33#ml&c!udhgsxU1UC2KWv`x=y~x;p+hWVE!z#o zQj$VG^;yTASQDou-aE39PDx0~Nvk#c=^le7f!-Mo73si(YOnK-x+m7hkIdD7QSa^4 zd_>2=U^B=}+uL&dxH6ach-^ZUfcw#&S{c&r{OwG_={tAo62vWE+N6wr*iGYJcy(0f z*BL@v#3*p(jJ>@*qoVpjmo=`Sak+En&f%nvrw{yh^?yD) zKK1#W!Ib5Pw$wKYEOH~?)f@!%Fj>F&_FPNv>Z9g4Nf%`8vo~%vy;Uk~?I&zysLqp; znE0cW8;h~K6%q++5UiqA8GlQiK_H8RAy*Jt!ioYOQsNd#7<=swP)?&9&%g{=rC>yzu%WmIR?F10{6Jb|oby^6oolmB$|1 zS`5hgLyr`?cX~&zwkIH6QSQ}Sw?TXC{rE&%)h&9}Yazqt1{K>9=mCf%xaY-(h6kEF ze{*xJwldPa)RF6DKPpIUJ}Ldtv%Hech8MoCjICuJ7FF#{KtFbhW;| zP`JS+jpW}*@k8L1CFwFZ5(=8H-94a>Qc+hAV(kXmTP-M}W1$IVAgOI1YRL8<-h#=c zPx%%5sh%Z=1gY}QXa_tDFYsSUm+^Lg0AIe*BRbgx8J#Grt&EJtMzZ(0g|n^fMXr3~ z@vc{Udi{EN{H<9Jk31h8Ow}tLc|(nPTEC0ON;)}|b#!(=bP0^pSeFu)qZhMd`ko&v z<}#Iwyf}x1t_*ZGcP5B#7PL;}KV+yOuaGpdbVo&~WLUkj^5t7p?zHJ*g`DR_8rLlJ zvn?#2C7y)70><}DY$nFC?r!3Yx6KF7=98E>P%)_NTM!(qmJ`qY%L)20ATCG*TL{Rc z(crnFF;&~WE?Fa?y3*n?5kr(fG&X9y5x2^@6A8ru6*I3!dmUn^%7GbVRM|jbWv4=y=`4 z>I1QP5DQC?4o6VRN!^p~L3;G7F)A>&@Y)YeUt6iB{}2;og(%C`u9 zvBl_myKuPIJ>Dy#ocnq~hwmn?)z){m=ylF;}xC@HF6*^Mk};oi0DcqX_kO!8-c z`g5=R7M--{1oe4cDY5>OYH?*!c^;Io^3JnoEBy@%CpK+#&rm)1svp~h7hKFa}3!ND|M(Ct?H!_Nx!4NS=%*4b+43Xc^u-0GESuP=1 zzaT|Kj=FZ&LO_GjSp7#}Inifkk-G+t!87i(xftEus-&8IH0C^)6e^c{+#_>Phkj~Q zKSDSyFadBl`Cl$eYSV>Iam1XME?1Txet?*;q(1{HxaDkiM$L6?LbkRatM?bpBp9#y zceWa3vttxQtOq;{>@j|yrTR7H#pnqsc}q(z_IWm8;e55E2+oC%tGh@flqeq#?F$cYRr~Ho} zJN<$jy+DZ|EqmxJ?TJiPZy!u`kx7PMll2kzQhptA;s36YnB!RJ0SHU_HFNzD+PeGq z@BiFE*&ZIH)A=n-tM;3t6K+r#2yQwoy&hBxonEmbQYW+MK+j{0!vEs^*+nJs`>0`b zLxf=jK_nh(F+-V+LVAIgLHB7Xd0AO;*aHRWj}Og-i`WIM!&Vp8MU5Pj%<>D!mN|{6pXGPXeE(PMbBryaRgy5F3zt4s7$n_g$ z3n#5t4F9`9Df*`tR-!#+31!>ONsFoFw67BS`qE!(E zJ0I}}hmGYkFtPmmy1AA(i5J_d4=Ab61g~8AJbtXN^9PeJtE8lr%8?`4_M_-FTwg`@ zF|B(-SsU)QL3s!pKGt41EB%C$e-A~WTy*_k&;0~d^=RD7i6)Qxq4|2^DfO7)60~Qb zou0+Y?ew0mnLY^IVlYzH4cw{vWr*RhaxW(?iERUeM<#MeF6uqP54jTxG`x@D=U% z@Rn0CALics#(CYkJFmGI|M<(WO0r(wgwXwLN}QQV+$%b5>}%i;))rU{Jl5Lil0G<= ziypU6Qd>9t@pqI`WGtzjem!E--;boVrlUH5nv+RQ@V z_nhjFzf@&{{pyznZv_l&`RZn70_-dG$73~En z5m)^?Lv}oSJJU&b zbmDubUfi=G87a*z$8Y}imzRR0V>YIR*trI58~XPfTWtULYS%{m!^A?-=hxM3XiRMr zuo^fNsYq{OxykHXPFmZNVq==|`nwrVe42Nu=p@2!(C;#WqT`&Jl zO7_2&4H(mZzXERl)3x>cE%L?9?ZhJf`TBq5JK?u4fMn+Ng@71Vc5ZI&VltBQ&#S_6 zuCryX^DCv19-p-_8agae@=yl7q5dy5C}?aRg@$s+R&VL!rM%c*$U3(wj2T*ML^p5! z`t>@Sj|Vs1W8jzWtxqwnw8;F`Og6B!7}Gs==xvR)9{3Umz8cr^r7}j2v`z9DWK!mT zQD=7~yg96tdTaPlu#wl9IPsvSduR0d7W&jHOkQ}NnQY;gd|Gol@O7(P!z2w+N(Q_)oyqAv`|RQd@p)<_RVaF z`;l2=rK3i{yEit{G6QAD0QDr^H{PzCe*k z_+4S7y)*xA$+j1jmnqa6`v4c>f8cy}REpeQfz3+dGZ zm)=qn_OI7mt0XoP6003Mc92y9da-RcIW@8s-7eNVR!%&(uCEGzJ3MUO3>DIz>R4|k zT6b{FLhSV~Ff?sjaw_CdR$VLK>cTVD*4EET7O%Zd1REj9dkiHIX&ISn5HF@gL`2}1 z!yZa5$-##}k7Q3RQL=pRy)|WadVF~f6JceZqCc+SM z$8%0Yyfg_PI7>Zwo6^#4in+t6dLA?lA#~nE6GzKsHl0xKqRm-!xOnGlt~C-Iynj1y z%0f?c?SA7w%GJe6RMbk#QS<%D)6zKt0NuO(rCA%Q&a)i1A5u}%Ly7iLNe2QSzvK! zXmCj(dd#%$;D4f^E4;WWTE?nMYWKRi#TBe6c~kLFf*~T%yq}{f)Q4HTPXLvx^+$8~ zq+{MTr6plW_cM^k`cg_VTY5n>~lrWlf-N0`@Y>7-P)P)akbsV||?%Y=2M~lK} zUMK~cfpm~vNa%^XVZqrjPM7+wmfRdH_X9e`Yja1dWH{}%P#RCFz1RX0Rk8#@5Uj#3 zCZ@S!#R|f)X$9i!x|gh!q&=JGX{R85y+>43!)f}b@f{gTg0=R_3p ztg~~Tz?IKoM6(|Ba=J$9cyMpl7EoN5EoiX!r`H?qOBwXO@9VoBl-`mMOZLITzqw)q zp|l12>FEz2K9IdJHQKGvn}-3zo0%CVe5~Dsy`4x-8-ICu7CfJhch#4>W>hL0zYh_0 zKwBZJly!~raoZ7DXe2c z4asL*d3nN73`iojXdLJSeUbO-&q9{1BEz_;Cf{XVIY*4bnUFNkJPNe~|Dv`^5FR#F zEv!Pxz#rfmm#bH|xJNWi$g_u{ZFT9v)?W$((3?{m8Kx}RLOjW$P0Y+mu(HDQC#27x zf0Aw4y#u6piHZBbn)s@53I@@zI_4B%N3N=_HtLgG<62M|&EMpS2?!(brxUZUY5Q&I z+;YaN8}<8J`DVDl@t>rgB~NObl~YtNlKf5h@+SUT1uY&U+>jphTeEhTi44{N8BOBO zpOH^ZP4)2bFdXY$RAJxPV{?xPQITP;&tDgdQo@t?8$B^o^c2K|Q{cko$Y{!U$@hKm zU<`K40Bnd!His#yN=EY``>lEu>FDT`jKnE7Q|k2jpr{(-v-3m`q^uG?Lq#E^nwVrf zDP_<5i@tVglX%)SvK$)jz&&a+O&F^BihE*6dg%+8d)M>%pL%=kS}$b{$5dGBrBe}q}s5&#~vCHnge z#s`{vD#CBDxXkZQgUjja1Gst4b@Y(7E!Mt7Y^SX@#4jcO-VEoT`OLcZWecb z<xzDYsCF7ZWbPR*`}IxiSoK8sFjcCs!TeHkrk_-yNK$hU(Aawq7FZ6AUK|2jIJy; zDd|(EjPLPM5o$~Dsf~l@SK8NpwCilI{roWG1{C*|0F(askb_N%e2S^w6hd&#&c$^) zHpJeLLuK{0#{pHB)nPzYv&^H3$nGpY=@h!TfL^%YaKBP;HW{}Wb#)@TOT#311_BFc zN3I_Q_j8Zd{FF*B{pxK`7a8*j+YBUrc(7Zg>GC@dI&(oLe*eXS!u%aJ_bkeUPV5f4 zdfjdHmWZkTQiMzUTTHlNB-#2 zA%}0b=npv0*jf#?ON{hGo$TFnY#UJMsV~UJ4VQ3p@bf?9in|u6uF&W?KR2F_ zi~>E=GV#VTfmBShINNGwVZ}9vhFnShfTIvWT+>%S+{y&oJnOhOOPs+~QY!NJqnkJL zTySM^=-W?dr+uTo6}tDrYSb9SqSdo@5jA?fwn#}yktH=oq%t}~9Fi|-Yg=J=^_aR$ zt1aBofT{5Z%{ewkwedi@p8gnYKUjA%S$W#`~Xs2iK>0OzhjS^(uHSL15RHB6|lr6ylQ zH2?j@?{`q{py-3M{2qvyYv5aA%f+S>-%pXgA74udFJ+kNQjVjpQ_7^b=NjJ5VkdTmSjki2ct7Jwj+q z275?t2#o9`!B9?Nl(_nT%%Xlj8~s!NJ2iT~l%x)SzSauKPPPn6l2R>%BobUJ?t=RU%*7DQ-qOO}Lkk|agYzTDDcXG6t zVze5W9waUFlh!97O02v^uek?go;l52IFw~~)h6+B#Pw?ZpZhDZuOq=QKZ|68@R5_( zwr#|8k=L8sP5pJb<1LJRysiQxMDQAc@4b~7EP_`h#Ak+X5IW=3D~PCkYq}^GEr_y| zJ(7^xXc>AUYiZG0rj+kI+t~i4|CEfIvgylj%cQDcCae0ie;xk?+dmKH;v1gT$kcjC z@Jh=pys^*xs^9rnwy7Cc8mcZI&U(Lxld~3DLjdmFx}bGqbafg5y^Qa1Zx)S(3-Ivt ztbtEgP587DS%*FR8`LRd?lB$&2ku;^KbJ;4CXX|xvMLXO-2~TFixA5c8%YHa`oc&3 zg$k1|Z?1ryBN|UYZKj%9=QfdRG0ReC5WE@&b9qBU`WD&0(?9#GabG__?eASlp+y!; z>mI3Om}m&h2(BLP@nPwE@m#k+A&c+D2TqC33>E%!f5R##R0w@9vLPOG#43AH1tsR4 zmPxSUsS^s#`|y;IHZb&Nn6~1H9k3g2H!Pv&Qs1-i^yGCfU+$?PO7>@X-#}~ z^Z{$!vu9_}RhWx#a3eNXo0rIG?%YxNdn1YGQjfS@)NgrZ_pz+34wcuMbR(5_Z{O-e z8;NKt#v6wQ2PYL5pB(FJP|!@ttm`E^Qg^f4_p;!or_)2wyzQ;epGzm66c1+)u6OZr z+8Su`JpJCKxT4}B?_@LsfGB;eyZS`_bn_s?tj73=?IF%@H;)Z8tHDiV%2xZa;-hwd z;w#7SAT!<8twhP%5MQR(0lp;aoWYh=Qgq`c6=8>oRuc)quV|$Q69qBs%1DF>#G0n* zl_%?4%&ab~AoS)^0ld1>>E%)RNoYWXh?ggURgyJw_9AsA(H#QhL{Cpo27XV*gQtm- z2iYVKL3KOl_6z!6@{7KnEZo1$vxr*L9&vG?9LPv!9g%d`0xPxT~>A?DI2xfKow zQ4};2EAz)cN+Zk%5L)7oAqK%DQ%3$>0;_Rzwh~zpL{6r#P zQ_=90O|%=3T6F^ysOqYWOb6$vhjF=3ClgVm@r5m@_i!Nk3kQ1CiK^;p(J{qYXu{QE z$w!iacE^q*L=f00C_xNzv{Mb`&M`z?ip0E+_iG<8Zako?r`Jn>D2Nobk4$0+hnMms=Ln9MmTvAebO_$vzGvK_;Y=0G`>;j@6T8LPt-pBH{%-2x13F za0wi}9!A{~g6_n&Wj0#}#>hUA*WQ}He5>P`&L9gf7!UGOg29mK;M2n0QHIl>f<;td zJ|D8;e&#lI^5KWRmNSnN@l#o;7}k++cb52bnrKJ@@cAKz)dG@i-=%g3>ulP%Q8Ce_ zFSSTk>igR}8xojJCx<)eiXd#Kn(hQ#Q}2Bdz6mV-^yyRLxj9Hw1^QxL>eW4U05E{# zzy?^gr0vW_Z}Mdm6O(jZ_D)gpKAV(Ym>bp1xi%#>EL9`4iN*;@(f; z1_TlWtb1fKo)hxugQc)Y)tA$hrn77hv0%N2T-MQv%EEeKC}KQl62$&N9TD7(`f1o^ zTtZEaq6MsGZ5ezqqikGM^TJ`*4Sw+6QPqJ3_QM|!JF@}@O%BI9mPg4YJ$tnhpu}dbS0Tr8ZJQ&k1PdQ8JUC(Edzr%JAj%`d=0-;!$>vx z>6{KZ)bOdSEIFf`?UDK=Jc*az+zh8Dn&Lix{%jap3L(ysw;SXRusxCS-xrr5=M>ri z3sC*->SKV3-@%NdWvl_+l}XZi{<-MDsS$*8_}qxvjgoqQMNda)2m(Z2H=n?Y_du>u z5ed-%v}2V;>nDD+S;0UnzVb$BWa>P^D77$h;Lc)y5_mgZZ)a&>EQD8jVJMUk>D+5; zxFR4rRIs@A=N3#rCg)8V0ss)5_izO zo|FA(KT+71g@M*x|R(2 zXBl7kv^Ec2M0tQe9^u-FqIc@6SFhAD?e`G1F$C6|E5`a7(`8^spCxB~XV8GO^FnRq z)i7vPBn@Es(g*l1y}d2$9yV5Q2X|TL&;nQ64e`MW!0T~$xw21^WDuNO#i<341Y^3E6Y;e~_OGWV?m>%f+dad=wfQiNzTo#1 z%c*mky_SF*%HN}o;Qs+Df&0%kabt+>$B!RmY?zfD(Ga?~k&w$r{uK9}PqT%-EqDiz z4}nrt`~iF$0Lws2$9(vw?vZkF6|4iRFJ?ux=S(-d0bJV+EjCzrd++p1?^fcr_4+BC zJ}WCLdv6S!7^LEmjs#HE{cFe8hM z5+NcMCsPCvkqM_)WZtck#pkV)C1O5tKZ3?4&OwlsO?yjATXN=lX;nE}LdUOLtB_8f;8Y zkSZDI++u@_PM3eS~~dae(Mzk3yohhYZHTg0@C!*9a`pQ&Ms_qtVfj>4tS8y2V7`5Ha06 zQI9`pFwu$uccM8?U@G#AP^&$U6_-Fg5FHg&(~xt{>FB(`i#az&lM2XVKSkGfcSe&* zHOX3lR5bKk=VF%xh&kCyK1+?~NPc-F<<+nUc?bZfq)3^F)PjJc?qz3>tE@+^afPU1 zFW|WdJB$FF+qe6i7zM)6WrKk&g7+V1jz!GJclpy9OwAFkYNQa+?*-<{t5AlKH!3%E zMlryoMrgLMzK-rw&_Pqpdl!9MF__O#$XOnE5FPxb4_GCUB5w%c*H|frKZl2@9xqQpi5dx zD$XhgNS>5D5|evYA~pk8Y8kwtLF{G!sNR|e#jjf-PCw6!PIW~x0Lw*}y(k9F^6S>B zY7pNdRYkl?X|PCUV>Y4%LO4)awDK*IslBkYB=?Wd+n+z>#wt45{-8EROFVaGxP-vC zi&*9|0;8Wcw-_$yxaV7N78##8jyPAWo7@?Y(}3lyrCO+(|^@6t5fv&EN4* zaW*zd0v8cYUu5L<=LQG6lk>iXGZwnE68Ijai;Ut_l8mAl8g{-mdcwPyaJCz`(t`h38F5;A5{U}24mD3V^-3$AC1P(fXIUF^epRycCx zI;wdtwL%-?N;-T=!j=9$n^ZrQdPSYpXW$*ljPx! zVxa&ws$9!@*NoK&yy_zN$%xyL^Be4|w5)96x;ZcaqpEH-ptso#1yyro%UzfdtOt%S zPQ!5E>1&o+hD)2>P78HxB+VASQ3U&c?PP*Epj_mNNXR_~;9-O_al-?mK0ed6m~?nH zjbfvrh6`C)-Br=V@DT$B^VS8wSb_Ta7-9nmPaZ=`Z2wLGkE8ayUT|&Kh}>hI!@i>s znXEnc=)nVp%BBkzF1uh@Dp0)#)}>WD#Z2=SH5KSxj^P~f@VL0!@hGN34$Ba`(8}$g znuqw8z8|&>x#dXiEMG6BKk1A5-(7}Y#JoG$pFR79TuJbdwmkBp)HMc?Dk!$1#klpv<0Tz30* z_VSHS5GgKO!;%PP_v5H)$b*3(eJsYn5D_R+R;$P!ariCW%%*gNbIURbUdoSpmxsh> z>PJzYu3$B(0dtux1wItwF|VtnO(??ndW1eP|d>641X_G;B8u72`KP|J2 z>bXeoN2%DO{Gy7+BN5_r6Oq6>A%yM|i-tU6+W98Gc zk(ew$Ov=rA!B+kVT2(rS-jjitotem3SlYCZ{B{AM88>CS+J(wekktql^;@osZAQ+e6ZCq zjg>>6v?tVNml7b8JOKp0#8)8!up;uwWO)+E6R{I!C}&CR^un}NY!)poriv+c2-S(A zDLS9RNjhEE!pPQ*RbGu$%<==GorW$QBfkhN~Pue*A&AR;^uUa4adRyG)j7`Gtfe!#|M(g!hyJs%XK4_hMpV;`l9z zdGuH4W05S_LgJOs<%P)d(aKe;$Ug4|cv(w2Gjtw*&;EiqcA)@^qM#9=0-{H2KR%Fz za<>zl8rEwH`djXMgiHkV0B@`zFgv!^IJhF$)MQy8i9sJlG&-Yth#De}o`kwQ^c7i9 zgsLIs=%65`drU&|SjpDuZRmLbHVQv4g_!hGn&Gd7rqA(Doo4NKAXi~V$yqUFV;BUP z!kJ=`5^x}LCKM15_9OiWq{d~e@-&fURXs>U5e=OOrce)oZpgY_AOR#K&(8)=qf3ZX z?0pvvtSEPBL5kY@YNSj7;h?6=tQE=<3qxI2x9k1Q5r^ZEwp78UjW+0ocEhQhm6rAp zn;AM<`gnij>sdMYd8sP=CtIF2$m-s*gLYcH3T*!kq~JFKsA;F(;sMTF&O2aaH$)c^)y^H z?S%9X5Li!ZzDs1eQedEI-g(o!$rz&GPCgEC!Cl`>@KJgI3Z{8!)sg^!Y#3rY(9rV& zWi!}7beQ^QInYObRI? zLhko9S8ORf&|aKXBiphPz}|HYS}*gF&hq+5Z?X8&z=OTW@WJB0VEHKbj;wM0^%b=p zX)MFSL&no%y#?{82AKBZD-a z=JuIi1*A7CsU&_K@MaVq;c?W6;$lt*#}lZMxL`UJ^>uyERhx;iA>bwow+{>grhptU z_4!`!L4Hv0O0^iY$C+pZXL<(|hfAso+*9k?e}y)bMH_yH`L2yRdU{6iwH|lwR01(1 z9eR7j#Kb=9re}liLzCPvL&JE20|O^qmJ=1`ar<@!E`CEF?^Qn-sozz3a>cH$TmF0i zzdi(ovcwG&bN_s^@b5y8`TzgU-*@2uV|Qal+`)!LMJavGY{o+3@)Ei;UTK%g!mp?8 z2)K0lPK;HFQO7w$d(#*i<;=H3{i0>t5FFI8?rY*7b-qtKdud%iuAUw* zTH#AsD(T%dK5HLbe<%H?=Ja`oP@2!qw*I&O=gaSiG-V?I#sBeQzvV|kS>k`b)cD-F*d%kDCA-}&F;nk%d>{EBH> z#bos@jEuF5He|K#!D$9$$ls`@8`jYt{qHHGjs*#tU595-6!jQA$Y#bCvS|M}*+bw2VZSK#oM5aa_UUeksCXqf$XI=mNo zpP0PVl9pZxCq#=F>|JH04^R|7gp_cuN?l?~@rvQHPk1M$rg-(rf_#JA{^ymSjJv?Q zy6~Cx+8ySmU9}1H$l*WLJLj76q4;XqQ*&o%=<27-d{k6aOd{5vC|o0vpjZlL?`=gN}ln#?So zJ+<(`NOX{_nJbOhO}{c?DKHXNPR{=96|MwPwS^}_eV8UeIq6MpEuAYck^PrHZCiUEFQt6So&S8q;#`UHe8~Y*tZtvC_m!1DaM14VZpFlNKFD~{ zr%s@SkU4|_Yd35tgxdAc_&E2d;QYADe36=IuB57}s*iR~gBJ9gw?R*@udi>%AX&CH z^lEBq_%4z8k6U{3&Y4AF#j{DC>5JF;OxOE!s-48AFDlxtHN3R|$m?NQ+0}RHx${gG zeGR@J5>l3B(RuRp>0Qu4k59!_7bjU`cjh1Sy3yUsFNu54T3R<5V8|CvH?Sg`Wo~5T_eKcoi!ygs*`zVwi+`r!k zWm~E}EB>=_r8yUl%_Q#32qi`3nOD-(@&I4DxT}gI665=$;fDjVw73# zGHYMHybGL<@2&`NOv>R@ESGqnT(z<3L@^oyhZpqp^!o4r=iYggE|{K;MI@v`--m>` zIWHP@V1YW^KDbcO-M0*F@L4GvcAwtZZHvxpilCt27x^!E#OC84q^6KmJ~wCmkDWSr zf{r5j)=*tTg9=ZN>)AZ=0g4}|+E0J)j;~*@%1AlxdeH5!{WAQeU`Ybc<+Khq{5jiS zUNesLe{O-Sd&FKTDw1yz=yd&@W5W%+l{ZaQdzy|RzJRgJlWafc^0VXlW$H@P__lEN6 z(NYjzy)5)Of2UzJ*fmscEc4G;Ml*LM$J>9N|A}l(;`!>viyIBm{bD)T5{w+;lAfM> zZ0v4iw|^nt*To`=ij7qhId=Ke<7*#uK8~Qo6=_%-KZ+jdg11dU{n&oWT`#w9-wy8p zSmNsR^?Vxhze0dNrypgf8~8Z_m|U_xS%Y0AS=~o0)3+6I9M2P3Kwsg-)8zk(E%X20 zrAwk_X8w{Dmv_J5G!Fr@dOO`GBYjDssX|3e&mP58Y z04pwHIVvWpKGe*$uT0fGf$Iclu?dCmL-gF5pVGYfH!wIkI(D_@izB6sMfny`j~kM! z>TD|`kY(#Ud_15c9kO@JBW|r-f`X4>&%Snc7NJCS0_m-pl~r!s4@->!XEYQP>hO{? z7`gv@-8XMA59IxcKWkdMzD1z}1Wb zo+i!N7fnq~b15Ck2ax-RI8PaTvboq%iN|OG+GK*O0*Nr_zdFDHcuxnlH=IFDu!=J&BcpX+o|7PF7Hf4$2g3h^JXex0mNa0R?Ri66m9qIq-hn&f`27qXHh|S8$a+K`TEm z{pg;GFIg5;GUhc}59TR`b%{@QulNZialNAU4-cM*ItZY+cI^1^D)fvSxwF}9k&}}< zBr2N3eEiyiAwo)+#PngdsG(spd3J8!zt5>ock|VuZTr-gnZBZmixU#nrJB&WNOMl# zAnxp9FXpu4emi!I@8YD|w}b6620k{snKi2%mf0sPB=iF~f;n0sP;9t;_ih_fQT8A0 z;_O=vs&ZN2H?jCJu5v$CR6xKVxvWB_iAGRxaNL>jMc7D}0HXI{-IZr7<$U7MZ^o_p zsu*;g`vXg1w<^#T)T?|K5IqaVR^B%>kk3MLQ}mJ@J9(0N*8_IBTd#UQNL?5k9^PB^ z6PSO24sS($cii>vPbv(+iO3wRE8t-M30PoJ&~a%7k}ZQHG8HKB1CLGvu=mB0eE*wB-#^f}jzPn#-8+$DtE z_AGN<_)+~YFZW~fY<4@_&i33wY#jT2`GuK@A6cC|ok7iAA?Sm+JRthS;&hUg1_k35 zPXx_&pcY<;Kt)AG1?2jr=*n{_965U*SL21f*Y^FpTUr`lUS6JTkf2YE_&m4FHOv*9 z#!iD_h<2PD9vx^7Y;~CzZM8lqOpnf@uVkZ-I9(a-s#He z%p7+wXR;3Y2J<{yh#b}I*YMGQF=z^-P@`j zgc%c)lV(5uk|bsCY@5N;%F3J29_R=>?1e$K8#=M*3=+{t2y0OYgo>J)`YXEsuQa~6 z19bR7aB%7U?MH{Ni0gjb2!D6$-aQ&L1r{_l1>U|*jix#&Zx&Gxes8!1Djx8%11R=P zNa(f7Oi_rX#m(;o;0A#A7BH_JnS+gcRnb;Mh2qb<`uc;J46W$1m;_*Y{@Dd80F+_o z9Umv(%SlOX=i#~k`SUr1j6X(3B5{u;14@8qPz@BDze5>iZ;o~UoBDbX@QFMK2#~h4 zJOG$=JK9CIA3F4^z~cNDqgiYM9>vLrU~0 zOxP==YH)~%1Y)_S8dUEh*E=@$uCQ=txd5plK8C-->4|s6&AMD3a!OHgBf0huAL5FO zILgi;+KNS;h3Sy(eVlCr6VoTS4qKFQgn;Jxcz5^oNW%dMS$3@jQWcIsI;(Twa>6;) zk?bH58?UgiZD{iv`uTGg56{{{9_%p=^Z;GJhH*r%v7}fXYB7_5Ps}VW+tFe5rlH{p zfUY6bH=cw42yyg->4w}opsH^GjS%jk?#jsJ@UNdj#Y2&tq?)xICk6YcHg9GTe=^7D z;NSpw@mj-`%W=!DY+X%a%SThb5rc9?`dd?<;niL2LomYErg@kzyaMS%aOpJC_{ zZTP6L^1+NmqY(m)XpitWm9TnW_ABNp{tf}#%S7xq`g3P0PN`*<`7Sw^D{MJPJ3q^P zsAhtRi<9y7v5eDyKRi$JbCK5kuD+jyZE*5>ac3qaYBRX`C-&jWD z2^+`Y>ht)qZF9eZygZphV!#-r&7L>np8|9@U$DzBNz}@3p?CtCr2h<<(Pc3Y@chuG zpD65ac7;hSc5Eto|DFMLm5C+WT0f1=x~8RF-MWtA=N(srxB~B_$=B1+F~akG-Oej_BhU`3;|@ zI}mDwdr>BccSADJx}Xh{wd}%bCP5y#R*+IzXq#5Y+%rSX%Te1I(DW_MSd@x4XJlk7 zyMV>RBNyaq26J;noB5#07poMP!$@T=v$C?b8_1kG#ZPeK#XP7YhQAb3|5kD=$4R> zV1`~zN9|44118^Fcs<2a)KKN}1aSYoiJOO~g0dPhy*BPaRzE^Ee#9t7l=tvPBYh7n zg6Hk?OfDzFsxZ2)V`2*GJak-AatngigRjKr_I9R;qK(bb>DIg8NEn*;WJb8osvAuT zRGUe+c1ZaIAUjVqax%K6;#wAYPpSZA+)s&w)p(C8g?ZD4_3J%hvKd{GZ$4hsw#*Z2 zn>SNbM5M_UVG~V(K1`fX**nCAS189}aYr2GKHx6Q&dsA{-O-?j?%HJ+Fr`@`NW!E% z*hFnqsKjuxw|h% za}Iik=nsv5u{dB@L!H*}iP7}cVO&P)`z2k-&_X_*qX-BHpmHFh7$<)Vc9Sl95Os|s z$OSztgV31r9JNDKOisb0;M&50VZ8Ioe92NN zos`6>5F`7+f@OAQX0wpFPO6t01D&M?5cdxNp9p~QVArXSBU@4w>U8z0q8K5g^I5s*vD3vnf({C9+~@3v7Ip+8okM-apC6`{d(H(bH49x)mA313Ml3%5vhJ=EF|k~d1El& zeb=e4IfGwE4FnxFWthBU;`KATd4Z*6)!Z0&3CEtU({d`u+CyU4TqTd0D4Fxl%uOeG zZ`f+eev9Urvr~)Bvl;9D4lARgr&?Q0c-QqPBcNPN{KO?ABh=6}hS?P9^b+&Aj8~pP zaElT6Q|jtWNRpOZKvj@(2rU(p8Qdz#OfdPn$Wh)2JHrpGpryTxxGhm5#~(`=J+mxj zA)+M!{R@JGEIvQiU4?4KzPgu~-nalS($|t3i2e5b`EyD!vKK5D^;fn&NEg~0imnI{ zQoLR-G@G;zlT6A8=2QrEKaJE&ujmj^UP1>6KGGm78=FsXFpX<&u2XnP!KO}C_glB9 zhG4Ken_3DBZ=nA8{KD(&mx}s3J5?^oHv>PR0GqS9HobJo63W^wTh8}sx2%5f!Lm@- z3tl=PB?*}mip$CnNvCP1`;yOV0sd72y{Hb2baJzy9_Mb}Xr+OTPpb#t_J zd9X+kt`VzkIl@@8Jm&`=j^R2!XIbn%d-m)h;^1@Lp(fJ{+KY{w-UON+8Q&EQ2W|AGYw6!^q2rnTz@!N>R7H=9nmxlRwrj77mQnM{R6+aOlK!wL5`g&BJ@y&z}9- zsbbznSBX^qT)H8h`1}kXP=*2knMkB7lGF|^qhYw2nQ7xw6c3>BT0iEzmu}$GTlsCB-)*{wY9hKT%b^hV@Ey{jc83! zNQlb6LkOLG{(-hONfC-?ue9cgA}8G=zYkuNb@%QEK|$(W0zyu=*cfJ&Y;6TGx$?TX zIhb7`hU9Xua&s*nMC=6?q(7FG^G%iB+RH+$6CzG;t;!{5*T3pqnVo6Au_Nt_Z|8`~ zOM!!J_MJo4o_3Mta>%s?Pp(%tTW@`rLyxParNz#_mFwii4{rb7RrBe0MTNUV3qC%) zI=UxFe4awdXSNKpOOp+FBwC=rbyq*bhQ*P7X+y)A%w)EmJbQ}e`^%eiCzcBYavIUx zy#(6TJ?BR&u4is$I4 zrKVj69qx3rJ?6V$67k-yHM@kD=8>YqwLu0ww z*lr=){&M84%g3xbq;E^#1NT+iQS{+#x}nm~!PasBK^Bi*<|$>Gn>HtXa+%@Rx|sTC zG$`4IHtqRylb#W4kz|>&+3ExpZZe$MH=EM&?c257Tv6m8oj^pF?uz=Z*v+9Y)ca=O zUeHgZBUU~XY)uD1vZP(^ zd1_kPPCmZ00w;8$EZG=zCKE{*yo_4nW>ZOo?Ua>C7i`CRl+Zu3Tmmnj_pe%JFrfa3 zF{C=;OPO4W6$4+tzxq>Z9X+KJB#E=2N@MYt3z5Y zhwFQl)d!i90aF#|K20*yF!?^BuvtVz1V){v0A_4t4CVMhSo&{B-B8FL8KbwPfqI`s zzUlXU+ch7<<;dqdJCXLKuR?oo=jV%~w@e$^l%(`rUM2tfe{b)}ldh6?zjizdH&7^f zbIEC{peowG)?Uxa{tgY7_u5-q&rbZHJ4%`G*q&wVBwAnKE&e9#u=L!9b?lN00P4Zv z(NELP^l>N=fUTz!poTU^j243O4)n@xLnhQJMqKDMDQ9$$6!Ca|F+9U&jE8|7h7V~7 zLMp$ekcSW7p)us88)pK0eW1NrkJTKN( z9D{O>1`P|q$}o^Vd%mJG(FZvPT}m~wdvjMQXGw4?=@d3-!%!%Ya5mqZcaH-lymLfx_Qf9WK;Jhq0A!n=SyOhR*gi@Og$cmZp8p}tX?@2 z^cS6&fnb!kN}yo^WeY!m0tLq(i9kObW>NOEft^}9+X%D^cf{d>wx}m|S5Qz;G3f4= zx#P$%s1UI00x!{O|6Uh9s=@*#_nRk5h1v#LSOs@fg^Qbo^HC0kR2{!HYbWAifA?Ht z+P<;r{j^jG&-1Dr=yEAe%{0nTw$-J`IIXbjlzJ$olH*+Q(swy_<0DcJ4(vO7*0yzU zGp<(;X$x-vJ4dDs>-YTm4G>L@^)+lca-UIMa0pZuOTc%_4k^<$KuF;? z`Vw=n<(!-(Dxe>WLCKH}#g^^(5@zPlP~p6-s`7?1sRM8#N(UR@R;Vpwz|AD(kj&L@ z)o5t|3VXP`&> z0YXoqaJ(b;8AQ-2R2z_1OMuw;7|jkTK%tX^K6j;9pW7O)*db z?Anl|M~~D15AuEFL9AJG2-kXa#y)l{nI6|?n6F>5N;t2m$OF&E_ujoc02}NwS2mH} z&u1{76Kz~OXyJO04MTG1hnIIRH1rVCFG|~uFc)a;snD#`2i$X|x7KqW96bWYli!JL z3(7oNeMaP}w&=jKkHGMeTGM-LV|W{4L>r{uQRpg3_5${sqFX?RvluRZdMOEL)YNsF znMWZMs&jFaKIlhp-nyj=JOlBY)%HLurgwt~haQCHgl3`!Eg61~IOBD!=M^hfT#t&Xnfk~ih+KER6DmZ36Q~?Yws9*FDr?*~wPot=)*w^1r z#$BL73#;D!Qv*bQ@)t_9P0^QbW#U{b$dH^Id64zXhsFCC%O3geXOy%S%YR|TG?U1* zd9$$p%(ZK6Vb+nBJ&H^2IpngWQ5{iOJiA)m(g^j{ofCvCaoYrxKvI^1@aclrgZ_YE zo%LxZoQN@H>9goHIS|vZTKR;9?}DgnInXRkk1&wBmqUEC)vM5IrH9ffVq) zup$ql5;K()W?}K>^<-VWx)dbdy~r9RrKRT(mC8e;yio^K!)A%N3xQj8HzY(We3pz< zVPa5HUfu{yn=rS{xtzM zeZaa1Zi*{UBsYQn1M3TtHGy|}ltAVmv^q_D)}6Iyl9Ebq-jMzix($}lqa%n#N)^UA zIM#v<)s8L}DKdb3)$-)GuhI`vW8Oio4L~KieYA@N!XUiy?C}TPVvFzCv8;NF=kpfD zjk%?z9v(d5H9(Bp`7%Pv_;esD*<@^Gg{iI;P*D{pQn6IdmBx)^QQ*M^jhD3c#! zWI#Khw>fwum54bmHD~xBj9>m-&ap~ZJrA-7a_N&p?fAo+s>Xq5=%QeO%=tRXjugEj zW*}BY_%@=(1EO&vy#I+xNFiiXWh4$lj1mNONVuTuU+Z}9PXHicqcn$Ff_r+)wKS85 z4z@O3%Yvj=I5@P8&U36$=PJ7;rPz}d`-)@i8GY)HN!=fl^TOQ}8;`hC)3?=21Z`sq zw-c=Tre{#aSh(z3!XL?cKWo%}=D~v+nV-=fRB<*d4S+bF@^b=i|BV7dsk`_cdT79Ef6f z7y@N2*)^H20PAaB*;}7=7~TueB%bT=t;EJE+uvC%(~NbTU7U~hb%6-$Ty28K%|*(T zieh44>ffn$Cmc_q`NpN~d9lJs*{Ur`?`RC;3saM)BWx>aL^RbKO*e=##~i-#aCiQt zoHI54vKs^h7F%0Sa4O8N^0D^~pjhQIU5b3oE^%(pfY=U;L0N}Mac*vI+h@e-YM5Z( zkYK?L>`|AZPm{2qAcM2>yqpPmfn6Z*(zXQ~h9~V)S3QJj1ck7&$erJF_u}{iM?6$Q z<_=9RVexpeRe3nai%a+YhA$sP!`uVY(5B^_H7QxWl@Sn%-B{v=nX7$^^vhN$QlvDf z-bOSzC{K7B%_1Egb8+8anQ0TuW%={m9Dy_-PB#!`+Xn}aU=oCO;v5PMG?(pPzm|YG zY|y`S?|>}=LjguE=&jp6e=a1M_x5dG)DxJFzen~n_SjT640*LUETM3Y)-8Z(XnK>Z zl>S;&rQbdV+;%cIUz#dSd-;*0{q;EE6<%J|aU2el>#i_vwYk@tsFAfg;JIs^ltwMp z_{cTUGZ-R^9B>9*%l;t3BHJ+#LyAA0`*wm|QNgS!6}7l$jdpvp8xm;>$o3eqU=^*~ zvc*^M_?;X93J&9VmrT3RjCVULgceM0R z$&QZMF_y5D%Y@@7C|$f3z6)$T|DR+QZ1iNS|MCb`8NfNQmA9c6bGv;_2PrY6^Z zAS|F?95%cYRn_H~0Bqpr@A&*#?TZyAD{JKK{7`ANfB9lVzaHm($P|g9ZW0|BdG9 zW8dPl-dSx*E1mhiuZXP;js~xOFQ1XIc0Zz+D%`J?H6Bl^ zyKT3U(yj9SV3{m5hjlmaq+~PbZR=%m{qQ6G! z?IG;$7e5{hN?h(V&2)%=Xu?CcK=|aGW_p_NcOk5uKtdxeKC=)Ra)##M%v!YLPvpVh zf!JecyxZfmNTe=%zy>@G;RKXulB~~8Uf<8{inwL-&o|2EG+~dEu{tw!$^nrLm!1wx z6ml+INfRDk`vCk+rKARRS&C)LlGtTdK6)gapphbMl43C1u7C6`R-l2z*1sl7jGM{4PBM&mEv4^T)b45=q;+ND+Xp1MPPwpk>AK@Qu8!ZPu z-pdj$m^SEq2@}WPDWmw+I?+m1_4T66&riF)z0>}dEx(BVTb)PUS-f$ZdB^$`NuPS} z9&yQOTf0{$sM7-MP^7rR)u-UwpfewD$X+$G1+e`X^uZ?qR#r_Fo}njPsosfR{W z4$G+~qF)?^;)xono-G(WOE6x?NRT7|(tPWA%ew=F9Z=x+y#=nHI-hRI&;Eh>a(5{! zqPjvll?QDksSq`TU2)h~z;`A_8j$jj zU<*g>A<*TX0U02AWd7Kw+qK24MULo~Ffh_cyTpO!t)j-cuU;bNNQBq*>(~8x%Mo`6HbNhsVsd_TKSbqLa58Sc^q>q%s@oZRPjc-@mDax)K)V z-;m%zHMM}$-9Ekl{BI1UJXKdCjDM_U;?4n%(u3flJX}5megkM4Ern=qcMYMFKd!u{ zSS3U2uhpUS6@I`z>Dw zR)+#7p_W-}gsSHnCfUgt$YMzvnbapz`j)%_7|+nh1H2Zm8zcU%i{74|`Y1_1#A~qu z)+MAF>;lp>PEQP0rNm?9k{KZP1WZ~Oc@);9G~7kWfXcY?OhEctFaaK8{*Ruo6Ez4l zMkSDvE?T@e|NLfFTj^R@OH+VWeEY>L%ptH*GDoHGmjvvH&!*`Za1hB1fV1I|k=MXB zpw1cXxFcqPhQ*HxLV5e5LkWg zx+`sR*-jqQBQ!E-Z+Jm-L9L`?X66kX0k2hbtY;^hGs{6(tHiYT_TKUJT>-coK*Ec> zDp7TX6>v!u22qf_n!&*;G7B|wa@QqI+6a`d$AB_`Uv#lLk`km$0P<}1+1OC;Fnnts z+=2ipJT_gP3SbNr49B6Np*#?C&?hBS=*Z@Nshq{LNf zJ*1NRv8g<4NxWu0OG>iF<{`GvKeAsxuy712T|zNaWwk-M;(GA+w%o5HR$BFGOuKeP zs&Mkn-?~}sxLa`;@nV;AS8#VuAmNtf0Sp2sdj!IsYIGQAUv7GOb^}Ty6txeSNh|5` z0aA=n;l%iN*Nz)6;mw?();|b%M345COw8q(< zh*e{90oPzNNH~BF6S5o;772M2U9JeGR+w0<$aJhzDQdLShfoKmP>`h_ea(4Ec5fMCSjKtob#w3gJ^8NrJCtz6(sm5&hf-C66F<%73 zeG2f52B_4)m1x7uI?ncr0Asp9|4Bw2L`8j(6P~;`A9L4>qXIkJtD%d^~1=u?l zb1q8_dJiCM=kT})#}@0)!()HA2}T?95}2uB(gxdaS=2Zr!a+%ZwQ~b%Y9H@Yp-C8R zA3?JW;?i3DG{U0ABTL7ko+Xe&y1iu#^A>gtOD{` zFOW%B8ap*KG{}vXZz0DFXS{s*9^8S_K(Q)>Cs+c@;qen_p5;E zg_pt%+DtT3aOQOE?dV@wxA;?;c-v6BqR>pdwwXq1T?LcO)oTZMIB7n5Mh~=i2h~1z z_30JnoOvi$L20Gd<~tfGjMTz| z)*)bDa=N_Bb)Ki1MxrpICTD6WYi=@#g&z!S8}g?wv0y#;(!RIq$a@MU;i$TBB)wm`F#E=Yd3bmv=1gl z5}5krfN|oF5(gRGz9})fa?j;{wV>4$wD?O#n(s;l@w(89SOV5_7JcK}z(E?(l)aSk+U* zIrA87wN3rRv|BD;y()=77-&}hy7E&+vzGgC&s7-wd6}aOaG0j0qg(CLj;I4XS~7hW zQ?`9rxJ-Eqh>xSi^HJ@px{AA!2s6~;h^nKZMc7mYpFYjHPwYQX4zX%sJwjYg5|q@s zgj$BpIyKOadXC+ty}#dtP#!Y-0srnpd}7Q4ozx`U5qOpX1##hqU*X;6ae#K+EtUvD zw&Nw4r;0o|1E0(y^K7PgjM8=A>C>l8?ID>Nkah%PD8V^-?()o}c>x3$(4$BK1`8r1 zAbkurODPERijAF3htP#=Kc@gfBaa){S>;5q&=)&$p7&*Qa|r}YPcm`(s+HrAsf*$R z(X>KIS-H%lHN*Td1Wb)Eo#%~hBgFG>ev@vY zC?J0o=RcH~trnj=zH)C|%s)^Rb+ts1VMkY5nTAX{LdMh&oJZ&7?R~_j$imXHlfZGu zGoNGcZ8dEUfZ2v-jGBYPH=t|g1}+MZ!TUXOKc=P#Ir1FWguyuq8#wh` z1hpbZf=omfxa&L$TjbkBAhG)b*VLGM3E8|HI$w^>)F=U)@;Se z$w>ss;1rZ2zJpYxoa1=r%o)qUk88*w4VX^SU@a36UrKHc>VZtV?}8wtmQPqlqJKc; z8~T%;`o4J_)+ZDN;)Ln(1#oq^BOd2&m`<44oc`U@Cq4ggLOp9$iw*SyDSAKy-vsKj zIqb$<_9{kSZZA6Nmdaj{k(u+NzkO%0;q>eA{^}`^Wco{?SO& zTgV?2WNLz0D)tVa_O6rWg+F@V$50?%>f?$59UepD=F)sV09?Jzt>XaAA0xG5u#li$ zG}x3bmY<(bv=~U9q>TW2TY!f?MQRzOA3`)n@BDW41UOiLv~QxB#?Wa8D1HOhx1Bzjcnz;*`7^-5=`28P7cGT zsr%O{p(%jG&=AyH5N(ItmH+8!=5bb0qgB4&OFAQL$wf<+JjN~9{s5y$v+aF{^k@mk zwB2osTo^fRgivJ!g@|u(a0#TS6gF`GFx{T>E3e;d=1NXk$4^%qmn7WFxr$Ce;Aj%> zm9JFmJq+K!)!TSdFF<444+qZk2D?5!3+Q^4q}Tq?qwRs>ZPB|8LcU82ub&!XN{3D} zvyRAnyH?H4$?UfB_X&0%t>gkhcLjpeC0|MvH}?f?z~Rd z{nYg8dOUEE#8>D7T=+^h?mujC7(g^b^Ak{;_bLpeB1wFyOVBpmTtSuz(zgwqDhVeY zu7|6%VZP*3G>xgKEp9CX!=Z;wr*j;nwW)NDbxaHoKSYdjiC%pc1R@l#YoUSQ4C!E_ z+QvwoxFMAFBcdsxC}Cl}l~FT)4vXL?l$amybDdu4=b*OoMp$R)x9iR-0Y73#r8Qoq zYrtb_^u$47rJX$~k(L@O_j2`GeY*bk7Nzh3C#-7QXZz+pqUJLDx}9%|qx_a7N=>Qk zI+Ui)1vGh~^$d5`J()Wy9P?-b(ggbm>w7l+5GlC$7CM$(Yrm& z6hRdMMlJ@UCc%LKt}YVUvX9n@+#J570kzpws{M50Ct?YdB#~(1_O6~cxL<3U4kDHWta?Mk!iXF1h_IgRdbD2v zt0)qMeWxH@KsaZ`qNM=CO9a(MnM!t++OcPw;Fui_;A^D-;dw#$0tKq*m5@*cHnu=s zPYxW+3Z4g)nT9`0?`39YgU#ntF(GEtbR00vs3mWrY6_ZI^G}w?a%s$7_KJMh^sCx+ zAiOH8y>X=^U4i%{OmGT8E(g7x9TnYUbO)%vA^5aG{Dscuoa=GPZ}i5Z_x^pI0y^c_t=~Q-I_L6PSGDrEGrC5` z48J&_d8;$#r1(XqH*Z~E$-gi*d^%JYeJ5ITZ?M*(lj0%kHbiZR!T3P++s~~bE!!IY z%&z8%*8O<~^`v*tP@GaS#H(3@M!ajnq&AKNZTEWcxPVQ7)HONiJYR-&Xj-QF6s<0F zES4Q_789SwICZna-7NAnD-*sop^#BJa-;_n#&!(POY58=ds=>Ha3ADQrKnU1iQKiP zx6y_nwsPVWCaCmn2u)RiGrsrlzec5Rx~@a9`*bf{PpR~P*NFIZb`dxdg|{IVLZuw?73W5h6YB@xggVX7T9f}H=vJcB3{S|78b}JwTs5vCRB0&QGxtn8*U4vv zO~Mn@6`2$JQK64~*G|~g>iHS!sBTW~T*5O`a0ns?SSc;O6CNH8P9ISQfw{Bk)bnPh ziQl=-r)m9qYTotphbHgbbcqxG=VV#3(V&~u^O1WM`{dFRTxgATIuy#hFkskHh2d9Y zdb-?EqWx=n{i9n8K;May0~XSdiRI)VTP)||Ium6Mg?|v#R)DCGQUWjBm9`Khj%`xf zJ5D&=6r2v1g3s0d3lzv-!38_wP6wh-*V~0m8wnHdxR;aJ&(@hj+`0_H?l?0 zQ-B*X9H3X0sptf~go7h+W%$X7*`{UHqLEZmQYx+s-x3jOt9`_LkOP=42gy2++`9_we9zHY;5@H11x-Z+Q93^dhe@`;ey+CN>s{vfNej z49WrO+h)FQ+-}YM=yCgF??~fQ1R=TEY(ORinM-W#K0D@yz^UL4+Kf@;xg?y7&^(Su zGy(&tl|hpuFacu?>PiQfi?M(}Jn4MIefk{gEXbM~z|b(YN0J9;vmKreZ&#ZY-&>ar z`bwyRql|Xz-0U=>2cwS0s&P&9_k^-b%m0qxi!KrMMK8Wy7Lfj;t^(edykyH_(MjZw$1*2j1nVTnJ_POO{Qzp88 zSp0!@M?xg&J^GsfvOr$&)x?{K6tLwIv2FlApuZUn7QICyOToj#1E0`?7)Rj=tP>Iv zN;iWfKJ$Rp-uAY(Je;#qh{W<0E7$cT17WqnQSDUJFp;|fqXWQJ>Dv|ptsKOmB{Ha@ty{lefUKWWFK7}myxG*# zy4Silmynf;+VZa>5wj4T-^Y*sU~pmUenIjN@8U~5(Pj@5${}!$^67crl$6}U4T84l z3SA(`fh{JtYA|SkwdYM_@pmAxU^MX*x(ZBp37-lca_)Gu-D+MheZZ0$!T@6n6c%V{ z_n@jvN=@a1h!%1I03|B8V_Q)ENBh{IizBT7;4>sb;+cUBIt=85P^A#$2-*fN%0m=k zQ86*ePP2A|WrN(;fM&RVyZjU(I|1?=s&bz5ZEMp3Nq--v&N%UhsIZBW4x4rav6;=b z96MqJ#SCe0v5)p+$7k3K^TOjh3SAnCxq#x4Y*a@bKwJ$F?g=R$?&y@zeqsDgP&a(B zkXo~p>hsgm=%1BPa1q#@@FoGT$3V9Peu_KL287%oMP~*4OS(v4@t_GNgDXM=mAE5x z-+VyOaIKM(6KWUHWAP+HjR(Goi?=rmp3la0>s(PLNL=}Hl-LmSwqBQdjH;c0oM6u1 zhS+BacyK#>9Z*HUco+3SA+DTwd7f~1h;nTyFzPRx2+P|n4?EaK4T)5yalGA@H( zgkqdDZA1|?U;`L~usw<7nUK;DN4MZs_y>UCMmC(Jr>9TBqk?zXd0}Qg82=tVE57*S zo3Y}7`Kv=PT;$CIg4YyF!mIJsN5|}k3>@KQ=YV`nmMfau<*Qch!2$0VY};e(4iOy z6U=JCd2q*B8Ot!gL#WYsc|`zneneAlf*zBzBC8Z&k;j18(}jrCgDM4i+5;JA6sElhv}!b=|K@&hgL_Y z8#US@-Ra{6!7nB$xjDXgmMPL{Jo_O(FBhjpA1In%>JMnTIxhJBJT}b$FHVJFG^St2 zYw-lm@9hHvM68%|KHeB^*A|$Wpsw@}5w_a6XaDtF#XWmnBU-s+`yBa%gqCP)YZK@m z8+&+sJPm}Ti7+IR8W?1v9WF!>YubOgYZbhd+xq$rp=-c@^iC}hSZ@56&NB0p7x!$+ z>(<89%fq}G89R!+mVS^36YYLBa)%1zk?ezO#$qRiRI`Em`?iQO|!ZnPZh4#J+Qb5 zi$_Pp>5n>ZAWk_@XVz|^Be^`87_FpT79f7kX4{!(Jdxis_tu-K)6O?CUc7iT=@DPU zAox?!l(DDM$G74scJVLhc0`x48QN0zH3d#^v*@N}Wj3l#q>jf9!XILbBSZzR-$vy|}5XbeX`5LkcVTocfQKDgw z@L&I1PLMef{iGnx#F>Alzn-$80lp*ElErH;jB&_Bi#}dme2t)S*Y6bixTtNIqS)qg4^5Cj0dcyVuah7=M5Pc_^P5njs zvZ#_^tWW8wa?ADPTqn&TCnx-NI^0?C*4iwl5A~^w0uo%s&gIvx(oDVPJX3IgsO}kS zeUp|w%u0DxUmtSUWv^x4wWsT_^1*|zAwq(C6oMneCs7DcvB^{cd0CkBLH`Nd8oX0JHY>m^L8p8i5B_cf}T#e%ulWA^vx zrC3S!HR?A!J+@vsSvyb^6UEa#(m^+yWpDqO(b_Yh@{Bmz1RlNiw~0^jD>2c^WWf`c zFSi)(KJ{E(aJqZLOn-ykMYG10;r)RsM^jrbI_s`G%y}x$&E%dBrN7Ybqe~GDtw^9N!qUwFHdIv-0A>rk#u+XmpuA`JXH%pOB;KlDWB>uU3PoY1z|u zG)jbmQeNm@_(hG{x+<1A&igw8H*-GCvgeNRH*O25(J`cl+%PslbNSo@DUZ!|DcZ#4F%eL*{rp(Sg4(B%V_n5vpH(-Kqf33>u;+1>BHZQHw%0_&+~e` zPoO1xa`kcB@XvdEI@G;JJcSB^cmTUap4;1d=nx+BK|Zo0PQ#Oj3)+1s6KP=_^3xlS z4@X?m{_+|7O)T@{y-r_nYlUso_`vLFuDrbNCz*eyU0iqfQ7RX<87J&K$UyOKYp2TD z$I{UB2t`p1OSXO0ur^(er(;nCgR+}`n!r~%0ZUXM&1{Eu1TI^v@I3gI;Ntvn(%L=5UWk8)$m3DK z@n2wrkK&uR`IEy-*UT7F>0A;n4_(~qw8Vo2Jj;M{PF3>ssm8m<(w_|RwC$YlGaJ%M zqu=l07ZT5J%}mM2~Cs=gn3Tb z`~WV~zpDc|$56@>WuxWPh&~E&(_WAa_H>O~qFTh54MWSn{GF`tOxon#@mtXooY~}S zmYXWv-P=Lb0yQx^J;odA9j0g*wHNL8%JZ?vvr^Ao{>?D2(|dP>!q7x6n;m<~DgJeL z=@Z&yCAW}G>p{S%gJGzpi3X62E|46p_j_H0&Kd~mKRCS*P$;a9T75e4bd^ekj>jD@-+L;wQkI27-HQ$wEjVCW%!8;s!WBV zZh1SDJ6g5(?k^boI@ECoJo)`SU_|m#0hIAs{sAl$=zB;x459{U^PoLDV;w<6K`?40 zhC67OP`lCUypK8nHkCUXpCs_;oBLZGMpe+g?t{sUX%Fb7e?C!uB^rK4-;ILgIj#B< z;|fFk&R=wHqrC8rD%_4i^F!vInvCv@yLR07eJO9oa2%ggnj7srr9^kQ&ZX^ZZSXzZ zveGh=I}VojCgh%t>#wP)nSxIQ#UduA3Y9r(c6qz4gy({?bHf5V-WhuT3QRglnqRk$ zGV#RHt&Bv34g@+?E&*Tn>r?D_zWZR1ESuG!d#jZ`B~rezK<%k$1`OfG`j^Z((RF(d zUW-<4f5NwPnQjSt+^MQ$-B)&(8#z}|$=mN7Z3&=W3F(e`=kwgpiTKW9lPIL3qz91R z+zSeVaSsb3U|HQfj1GzQBg*OFiHRO?5ir>4JhfwL45-y8^aoo2LCG$A`Rl64uj&WX zR$cnUVEH-EFhV0>DmAg<6Tif}5kVej7f~x;oi^3&w#!eSd1!p9EcunU992t8=5)}h zLUB{wMW5SWwd=M%qSaeq&{nA6HAH0)4y_vLI9;e#{CIBSed`uwB*q+YlToPJ20gS6 z>Pi9!0v$`Q)`RSzTf=L+z?0gnzpttJdvfwx;qv=qQr4oO(s}~TY-2BWLXcs>?}JezSih1L=h1Ac!F!qDMR7y~G} zXAVehS+ULZ#fI665c;0VovB6ve2x(g?~`Q`DzX(fQw4JK*?Ch(B>Hem%oirq@_kWd z33uS3W2W)_TIhN~nkW^2n$W6QVz^C#RkEbd+T5Iy7{#VL#6h}q9i3H=Q<$IM9snt( zwtwcUBtlC5$HhQBw*?Q#SpnpDN#oa+RT}r_G$&8Ld2w-lc?-qvx&pJ@Gh0+s6kC|K z+kF^mOlP5E64YF)S$PWgb#NWhw9?a6%a`v`S~j=tb6cCs?seVGzj@^NEpO~LSDvQG zW>-vN44G%foVLFagBiKuA^jQOTD`_ZZg*3cGg*1!k!m84kwK)NoRa^EYqUlA?z9Ar zW$WSU#B}{3wfHz`NnE*4(_v)7bCS=WKc^tfyLx8`k13F(wayhWGBFXgO^KcBQz^k1GHeNdDM;m9jpgfHjx@;)|=T1+Ag0IYqIO+RydFrce6dTT_>dmUDd-Hsax$5vCVclJJ-2*eR8Y^J^ z>xQ$hcr8vHM>V3Zg6TcQO)oFeM8zkc>gp!E-u{tk;CJ1)bLaZg< z*3vZW19Rt1OInR<%iA)a8~!t2WAiQMZQCBfcay_%HBd26>e?!t@BK>I8ZUbGGNHd) z&*qUQH6Zc-;BNUIzGZT=pe|EJq7zcPdHC;VIwvT~Tl_Q6>UUG|}jzASR;}#N+F?C#ClP`CWJAEaPFZ@o^Ll z4`NSf{@v0c4KhKMLjZfk4+*thT+;^j!Hxq?>r+XhPiTm``)z^_< z;H&X(ED<` zC5UJIRRJztV0LUqt7|TXcrTxWmK4`gT<2t>1!-uw0xYRuzf}cB$`m*)1mIy6JfQ=I z9!d2`lP%*Je{bFmxqd6_UzDf@4UTk&s<9|*zwAC{X}~X+J-Yks^mmm_r6X^KR-Q{H@$yW|e5SqN+|=AkGQ-3l z4NV6aZ$D`9_;es0plY%aW4NH=tOmvSsEL zKZD}TTJa=AIJMRl&KiF36im9_Oh$34ExicX3-c>EJS*iqM9xcNe?S^%H(-aT&w z7v7E9g8eReNWVVC6@NDL=md}Sh~aCT6?J)PvyIJ;qfJ70zvNzj&Sr4jws%dvBjxGS zYsHuHAAYgnyQC2f|M3C!g}R*mXI(zQT&YT)1Ou6f|NVieU6en3xbxK_(-qzgeQ&$V z5}jrv%6@Rm(CC-$2w4@JJy-9ErVOLo5@`~#=lf0IkYQ5lB zn-}ZK+0eG;bCAAENte8dzdx$C^@}3tvxDYR^S|6m)L()Mx?huyWjg%x{DbFDYThN7 z->F~Jl>G28*O%2aJ9%~?)aE$6V=>rPkYP?6xSDvO`p8zEXLr}_UyGuPLMKJu zF_1i{mkjz7f4^t^&Fyk=okGSSMIL4z_X7G9rLx4gmD@t>KHeX;Zc$cj;#n*+!?$6h zR_%0ub3<~JS)l0Al$g{fYhgXSX+hFxEkD01^9X4euu%WAhz|PJJV6twoYw0<>3lLhzC52M((q=VT?}IDl!sRSCuB}FRBg{?y&|*K^Y%1L{xaErwBgw4qRH~wc8f4|ZurMx4U$$dNuy`%Uza9|K{zREl$~N`6e9_A+JWp-WJXU52c%Og-TP8>nC66 zb#yy*My+OQWa5R=t;llyBg&6jgYp9=3O(}+V~Qn$9A7Qspr5q+bXkkMzRdI>#scv; zE;n@e0SlR2_NR_fLw3zyuU8=YQEscp{1&F04^q9qu}JC6{!sLtW9FjrEy-)$5>VL? z`_-Y{urZ0Y=YhZQEhp&&nX7r{lZkjO%J1iL`JZyQl?C5EE?q{cp^<2*PfNk=l(enK zS9n%3tg`x5xsk#-ZZ1zIzlp8$K7CS4sdC(nKE|;xpf1hI|J(N4b1LDr%SJIvnWR1* z5r2y95ncDPRJ`xnI&2x9s;N8JG5HSHZBw2nuX`UHcZsv{ zLE4u5pt4TZZS9@ZazhmF7N^=;b4-4Z^U@xD5D=!7nr!feta=L`s9uZZc|o#A&x2|6 z{@~3oQ~%xvw|m=m2F>qg7|l7t-w{c#UmUeUYbvh%oSjvMOXew|FXM6_W?6y`1%&0a z(wDhxjBs;eid&sa?yuR|k8gwOL6g>;FxY-3V`>En!2zHrrV+q+F(l)IJ_L!}kBpfl z-e)QQ9~N8E<^q?ncMaw99gL%Ffw~h7x3}w7dwMz;GWWa-*Wc7Cv{c4{=I~)!FWvES zoi*!%H=meK~Pp{I}FpJnmKJhjly*mDEhG70d?e{X%-|y@E;|1 z(&$m2%CzyovKeHEjS>>e@Z0`-HiWej`mHkoc{B5GB^1kj^-jIoRK#cJr;*?3lq5HE zm#tD>2H?(KPVqld?)W4OEmrRN<^3X=Ql$gG zx(3&bjWm~co_St9JVl?ltFIdSX-$CI=y06yqIzYy9-p!;7R)r^cV!(bUBp4}Uz@|s_$wj3^Sk+&BQNLc$L zu%O)gS>;Th&iDv>rT%-Hs0!LS=}kBGM_hh%IowT)28QeZ78)JzbyUAkl?Y zaC4X90!Ie4QwUn#Acjm~Zdro4#}sI0#&4AdMgJZr|7E+rjIy{gExRh>h-QeF@OK-p zb~=C?rz4Y8xIRlwu|y)0U^4oiVcFk-eg^1LZ9I1#9iDC0TMUA{H+ieZ` z5wR1K8%^5^Sa`Aub}sw@KLrGMfoK>|NJ9%UPzQI!)A&JfOW0Or1)lI>5B^`v2)|Bb z-DSLabFKZy)mCEL-+cZw;KtFvIK|Q9xJCQX=q>|NgUlZWzUfc!pGig z;WlBjv>Cp0s)RHikGbpU<1Q)nEH5LEDTwpwkA;Dll6mm| zMRDVk<>ljF;iI3sVp}h}ZivaEJEQH9!xR*$r>mA#cWtJ8CAa9yF|U_b^Uo(+E7G%75++FLIspr8)fE7jVqX!RzF@NLO0!UBK1~k z!9vva;lV?PmI2G>1;&)b6;-u zdey~RRHc`C9vAjpZ2iR4I#;pjcrG>O!OR*9i}LmXJdo%UIG%ulu!zv0gHER|wf3)X zONX%Br^7d{>nKl8ZqBGt>l=*r`BL7ED5`eXk<4~}H} zqQ>kyZ}bOb`$de5=P|3PWsW|v_!n*_{MS4N39|;QhC0zjORgt+YFui$_qIWKJg7Kk zvHjP>?3Dg2H91-u<*zh%wsp3Vo@}AeiE{h3$X05Dgmj@HnU9>`iDOFq|EgnVSy=wu zMP^xa)Qq;H8qfLDR;BsmLRA2o%xy^cF1O`T;vg{) z2f$6v$TLylui^6g3)1VFTgpa{qjjnu>TF}nRx#j*&j{Z;v5oCw3fz}2F+Mmcd*sNp zQe)QKu4$pNr+yC@<7F{>zIN92$M>D{5!wq6g#li+KB`908{sexa_0LGY_hK7W;(xp zGx|^8J|Of5fi5=?!x<6lcsr^XoSV|}KgPCwxlmo&L$y2BY-8SlVPk0c?uHd5)yaHv zTr@i^Bd;3cjE#}Fg&!F8VqpuW8dR(%$1dQ+2(S3~_|$?w>uXu2VwfU}O_KRZLB;1u zCnoOPzIK;!)2Yfx#{%W-8asMQ!=AQu>s8G5s(D59c2+kcgR7se@I!T`u@KM}++TcD z1!ZM9;fS_C1zz~~_={iUBjK-EMy$f87fnwT418&%xEp=~)?Cy)gF<&rSL{}-xwLw3 z#Tx_T<$hu;VX_5^{8|Cy@h5vLRenNM0~Z(4D2<%T;3u1h*PrLUIWiB^UIk#Vj1eF{e-u!hR3UZ}JxZFw}sq#bU!T3dof z@e$>wP7Q7tz6{7MJom1gg}`bAAr##6?-5Tb%>7oZT)D?z@Fc8S1TIfQr#Ha7-^Shn zTn zFB{9)>T4(K{Ms*=($uuA^Lkhb&ASDw#R_9hx=SBRHGdAdeWvf0h{Hx>=S`*IhWdKA zj}&L#P~kuB+Qi|Gla+XdgnAFVTQ?mFmkSm;^}HHfY;aS^p%^F(iCG~0rCDu5p-q5G z#?J%V`p0SiI@(5U*}pr)HgM#X+QHj=!wILvlnx0y#F@J-?NH$0wp#?OtTODO=^63h zn{!S)b5&c{8r3SG*2v&mxWr7+U-?o=;5!aLSovVgT8xv}l_g|d`T80f*9D(M|G#0i ziL{LE?26piuFoH$$hM$OGri%-sawN;^RC<2mCNSkkx!e<9|Su^clbYc2>LE~+?F!J zf=y}cpBvWc{NmkI^^Kopy&AS$$BcTs&(Xv1Cte3=&_`8h!-hAF>zXCeFMuDOXvO}g z*szegFjkd@6l+AKo^&YkIsomTjzWvGbI3wYiX9P@@U=?o(rhGl^y; zD*Ny)UEALM&V|d#@hLE*fRSQbOi_J`Aqxby#C%sYzj*Hd^^9C@+Q!a~2pD0eP!1nJ zc4>n<^s? zb$@hoCb*~M+k0*8*Y|rA4!z|lTb*}9++KhAYnvg4oY@sWTeHl)Aato71e*|ZX(d>7 zR>@DS0WGtrrDe{2I{L{ILLP^ly#(jclM|J|q_tjq4zfF(kr-di@PBihA*6b8g!c(- zd9Rm>GbJ2g{{2`?+&RnP?JOl`6?e0!tNZPJzC;^|zIsYbjnYKq#;Z?q$0mBZd$8&K z=&;1r=~MnQOVA;+g!=C+ntsXG$+uZA`?KZEq0NPft31f$TL^6((g$MnNmvVrJy5!b zL+oS$whZxdg%+d%W|2r`Q2KFza1BFgg4JB?f7;auV}D?iu!-$bo};$uxV4cOgF*vR zr$T`vg#^HBsOV<|yz=WaaPMR6I$kPORh_!kWjsOj4nkNPKt5^a1xoekm(raW40uJ@gY`D zf$?7eVH*VZGDM4l*?630DuZu{pV@TiGkZehhXTP6h#}RauswO3MkApb({~|qc1bn0?qvkc=PaX*a&VTV6YK50S@65 zp0f)_KN7oRGHt&S+x|0-ke{!#&h*9Y=cIDOC@S<+Wob61nbEdW*SGRL7zsSGS#*>u z=iH+2VO~D!&-q&@S@McDM&6rG4=wojgn*2$jX&cxc#Wjhs7=(}4SC#!esR-Af5kbs znudnVJSUvjML|san`)yDtWcO8I}Tr0*E@HXz@q3pmW14RyFd&!LX z-%s5NHu}z@-Io*jD8;v|uo{@z(ttAjrZ+t&=Y`r0cg|KEURr&AMZdSx{CeD6unf=JM^zs(dZgLR`e{N;y~`~x zGtrB@iut12HJ?9kSjp>yFL;#NDw(D2+u71gQiOYZ+bH5S4MumR`_iz7T(GoRWhz^n zpTH5Y=T&9cx(j#p?^$%~?BDilS@y?kvWhH9;n=n7po8hpo3dU(fbEay!Kh%+ds=7y zW9)oYOW6KXXIiItoLguXCEwuURbEPCeQ#|@pWD^A46BsKCzIY@ORKdqlGURZNE@$v zdswR`;9`=|lX#Oe#{PVlk`FAs76%?M4b2TV$#qf_6GcT!k6GPsK0V?PIx64VJosa- zQo1s;d-~9n?4E91GkyFiG+ku3{7eq z)eKx?zb6L+8nNo=D}O@LMT`4uY1P5K`2v{Zd|D{#ElpFzAWUn7zC3qWQVNzd!zC#q%?| zcL$p@bv4o$4<1x-+5;rwZAqLXeeL^eLN>mB?H&c{E#-$YMh+?+R%my8|A2LL>59mP ze;Djnk*_#-o{oa=tqPd_6vY)4@}KA4_d~bPnjizSi=qtk&P|&(t(BBaZGJBF?+?7S zNsNVG<(h=v>o?7v4QcVheW2jPo#je<@?`h?+~oPKJ5fT>>p`kl+B9uJElX^bt!mI9BFS}o=&q^}~Hk>RH=PTOU}Qz38;H{OPND^|dL6q1>wEYb(!KQTUxOJ?k{3 zkDAJ+YZMf&TW`;e&D1M?b3k3v^T7DI^vDz#U8Hgk4P6644|wc6go(CsqX*b?Nf+Mn zHbOdJDrumv?}N3pbu}^0n4{YD^RiV_eZ#l{;GcI=dfL`dYf{&0!}1Y_SM~)a>4jfv zEVw++MsPES%X5y5K6G+q-@3_d&Yf|w2<3X8BcIlrH8Svf+IUGEH``!h(^KlS=fM3a zqqod1s^1MLj;dzbB9mowddVKPDd(A=+;0j-?&CDB|HamqKvVg4?H?pElUYcp%t|G5 zQG}vIluQk#%=3IIQzev$GNqKLOeM1;5;8^^GM9Ot=i$HabB^Bk`~Kfry=#5zTR-PK z=eh5D-+N#Cx~>g#+theG190mD5;omM&w#}*1#Zjm>&^hkERuW)hav8{G%Y$Fn)r=3 z=lo5!I3I+BW5?OXN~%nr`*0t#r83&^TB0+(M0KbO?W*6bVT*Cp$ z!0{7~;f*1?DA2|F49WG7m;!tRAafYFaG!@;;JE&koMrMegm=-DFUsl46Ctn zY*f@L!MJ*{o|MylJH@rSw0;EHt}nF|dcJRw9x<91#?Z#LzSv;Ppu2a+sM@|jxW>xB%r6awOpx`H3L zk?wRLL)ga_V2C`G8pQpq&SKk|F8RTIbw1OkV`t8-^4lcSRs51Z1`jnK=reH~$1IMM z?2S%emv(P19M(H*z$)%yEW6&Y6=!tF$_k`x7Vfm@O*W}RU}5@+$~3XuPGw*wFT)kD z1pp|FuB*II33L4-z`+;>w_0GP`9b#v?oN!Ms(+0hpfd{pxLZ??G>Qn{JsqMPjI(Mk z``Kkclb%Ovmrou<>+i>%d(pdFEwywjnpV#3ga*}B4>JQQ3IFy|*Vb}9=^*N*@ynOP z@o`I5b0_BYj)hme8~5P$0DujItj7lrK`{U@9#DhQG}6;Y!S!tqXocmPNG}uCBqHr) zk$j9KiE5~}EMWpP$zVR-T zO{X*DizBpX+jo2BWdd8g-5)yVmmha5_MlgcFkz zkw*MlfvAvpxjW|LU6~LF?84jI`;;Dm0)FS zhc-#TEUIb)SjECTV?UW!$ba2oc`VH-Q5$`rt~mO>QOHffPJ2ALaso!QF{M2wzTx-} zHm%QLc6N0J7|?J^2bK8tvEgo?jGlb4`7-?5>ZUB$XM1|?jh!^Nc{e3@ zY>5}r?0fvA=ZF~XHxQ@CnW5FviGq`-aUSB+jFq%H^XfM(ST*ebq4F&dh?&(wr&s9l=Hzm%PUiDr0<+J!P zaiFkgLmbrvN-NcXwV%5QmM$QElO5pXO;Gb&f4Hm-M$8b+o)JE1QOC z-Dr)+L8tfkO|0ED(EFX*_xROf#kE7odjM%g!R!C8uGy~vw!LVr;2(>S1by~Lm0<}v zF1N`5Sy$7>z3htb1I#A6lbTnpP7T)5O<>%of}S5u(i6$+C3OlBPwBX8k~Lqz`^ZsG zC(p7W!K^a_K570?$=1^}X|mfocRL^>2E79soB=80c^w^P2&)_*r$rKNupH|ayrk|& zLR<~q1bm3qC|v(=mE*I;jnd4dLqYCWw5Jpvf1*pi#6r?#BUZ-Y_S(N_p;nn(e<9a(rg8;{;xGv z9-N2IlchtKvSFylzgHKb8`VGXC14kL68<151w`&RC}GPZqS-hX7p8ds1}{yT8q~F4 zU0ifNW&DgxdxDDJ*L&e^hVKW_Y5kP?RF=n2cmA#9Rl0K!tpgK8Cg(C}5>?gIyvod+ z&a0fC>cKIIq!mDqzv}zHuf43k45@*JDH;|P zthCMtH@5sp*3RxQbLtz!FXD}krxfw3Eu4CYP`eMi!R|n<4+iGGdy9&#V+yhH{e(7^9 zKS7UQ5rtyF#veA^3#hA#fVvGj>f_x5#G6z{`Cdiuhg{Q9-I&AVgY|)B%9hW(ZX46; zHwTj+=`6f&ugZIq$t3BbKX<>h#sRUp!^;D9)4^?M=Ohb%#k`1ofo8~nBOA>Y@ntlr zk9SZ=Sv(Eggp4~flATPvy*j+G;riE>>Z_mps2+!hHM|zV=x0SfynFLJruJ;GmwV*| z2jNY#qJv=#5^gXPGcKyZ2QlH0~QZl+B-u~ZnJ*GD%a8eEvQ>k;` z{6ME3M0>}aUik=>>u6~NlLuqoq*vbPH-{f^mRAVBmiq~M7rrYsCW97AYaFOc4-)k- zcNyYAVAdxmm%OfedDy;&^>~KcF8|-Qy+hn@4m)kw=Q(s(AGD@=_H24$Yu6aH`0~%3 zrZ*o<*F)$ZFXnKWK~jZf$G-rr3j(Ym6@QEZlAGB=Rr5#Q#2Ucp7L-JxkzZR}cQEBfj= zbovDr%YIluG+s_cr|G^#LM(Mx!I!4jj~-u;!Ef34q`P-X!Ii>yOHA(~y30R<`W`t( zfL9Hg&y23n5%w(ymiwm+V4~3;;r9jWr1%wx5D-jI@ZKv|_`b%(nCGXu(Eg7D)~b`( zr;LT0U&+;DUw}JrsmWCLqX;K2>F+iE>Q78-+0SUcdt`yG$nPP<=o0y&8zdf(^nV3x zRVHfdA?Or^lhVHSZAXRFQ4yg#(%lEgW^*ksJhVJg_WGQgWcD`~)k%+tl3MpC;Lsn?j48}u@NyZuBSV~3)VLnz>4kp(ZACI_|MU^MXP!gb`4m7 zF-{#O{_K&9vzM?{0s0h&trudGjoR;8GP6oR)+;2TcDX?GzJaK6>u>wnUn&iU?xxO< z=7m33p(uOAm9({WVq?|Oj|#v4(iG7dTm|hs^3y^Nc!&=Gx!;U9aeqdH%{F!F`pq^l zz0npjTk7UKl6Ijqb69usesS&er>IYRx&>WUxYpgzXok4w;~!Tr?hKTV6@`@QDN__m z%OuD!0=wFrXbB_sV9rf4;1e+2(s#7&j6fOKv2GmKz3j@u%=`$(Uqf*TJ`MM_kEl+W zw`bY4%@v=>9Nr@Xb=>FL9P}%{%CPu5QG~{KiAh@}0qrCMjAB_rDg}z%vW= z{k!vpL4OS2k-C0h&;3*PR9{7%*ZUQ{UA9L;X$xFb!a~Wa1O{eWGJ;%B%0#F?=k?M1 zGb6kQqYK1G1dZ<*8{TI@IodCOk`t+3&O0(AUu;6r@U1W)02~&NL#5>2PrV(yF76I3 z%f87=zWIjr5Y~)|BE-7!xRtcOruj6#amu-P`qb9sVP5d!Sy&{Fd-^uWCI!712 zP)K@%!vN#;!N`5x?UBj>OzCH@d=V3DztM8}6z=5Fw{k@iG%KW6*BMUjJ5X%*`(`MI zk|~{4bo&NA?@oTuPiZQ25_x>U0)hFQI?9H?lujs->;5UQOE4$D6zySiM->0O$)lD% zif|Q}*WG<~k;?P>Ik9V5l-y5RA9KWOnTO`njQZZ&&+mQ0eE*FbCuqHQp`AA*gL`KmJXas$-VUD|Auz&>>b9@I+#|94mGQI_G}QrjaJPeK{}O|ctO`2Vj}sF7${O$= z4lqZe0S%bczA80w+x5{xbtUt~{vko`mnJsiJ>hi%A6JA6=%;jnGrX{ z|6SB>bd0@5Q&aL_#AyEN=sRtX&X=b{z(-Xqx692?P;WnBJ z5b$S|`W#JGzU!kY+xOvY>0Z7@fTyC?*L?usWtegz?%=<_i@64CR}M}~FG$)K>r>yZ zuYOncQ0u_Q7N6B6v82uEcYIR2(~FoQp8UP?5&kCm``VzgoOD1WOx(a zadLoTFQzte>q0axHMrX7vOk2lU{0=T(S&aSRW>`_IyC1PIryeqz4s(16M}(y1TzK7 zAv?XNiY>~vt`pM6Z2p&oM@xHH9^ALkia%U~IdPPL)qQ%O_2f%MGe^zY!*aHKzWuNH zrlotaU$Ard^h%u_*8V%a-?aZLMI3;xiN7zU<>kd7l}f(z5Eyziem^L|_YkrS7=y!Y zMclsvdY>bh4Dt`}*fg)ZH#-`~zDC!w_o$}?*{hGsLQ>Udy`N~Rz1htXvCE(L4y)Mq z4V89xAJkKV4BLYQk=?C#y32S5mT6KDpN@}D7>jKglO%#Z#GiHwZ=E_#u$ThkUn2YdOW z>EJ_lsT7uvj)vzV?lmdML}#2zd2-pCPk$-nG<-`rf|O7Npd=BaG_I_libB7;m4 zD*HdQRR`>< zWB=uSz{;u)b5J};P*ag={ifRGJO(QI1IC}_m%2gS`J+82#}hYvGD+V4jW^D9?em&*|;lTvdd2?^EH9mBYp19dN`g|sC=faaEslCU7d82PUXVR>y^`_# zBKdO1;>X5nd(-YLyp^$GxY11>ZM5_889~MSgva>-RWV@|BkHX@J>k}W$m!~Wk>m`l z2r(PgU9Jv)AftPAReNwDY|q8B=bWx{#I+s_+{*oH2&4W6SS1Lrh!g;8;?w;4wm2ba z%X#)0X>&^XWLM)`Mw&ATYEysO=dUrwTGi+;li`NM4_A@z%~#$1_8IrkDB%yo02v`Z zhf95ReNfyUc(tu2^c_KTf%Hl=SohH#OWoA`swcvE9?36YI=7@frWBkqR)t0HJ{2qF1FWX>#mWl8Drl*lY-=ZJcKMV^{PNcAx7O|5C#yM0=4>pf*shxqVeKU z=PwMbByO$nB4aOTARzn^E)o_1LZSIC-~F2K0I8${1A&b&AEEuTN^tKl{qn_={x8f_ z*d_P6Y!>!2vCF{3EyX=@UPsQxu|MtE+I&U5N1n3PhysMcni5~8`;w0;)Ozsj`!Le? z-IuUmlACW(oEX`oud92?$;qjx=Ab?r>38n=5t&&}@G!aRSiu4i;|y+V4d?j&EMZBa z%to3X{fd>MF!mO6nG>Pi+*q!Zbl7{xm45egi11OeMko+NK;xH|jyA8W|A2kF?y*b{o`mM#ykewC^F zh%Z0Vs=0S%sIPd*^jBKu#5`xT~LvN4Cq06bcrGT8bA-0Y$ibIU(V9X$!lC016Fe(RN!`S!e5ozGd) z9=}O7Kjwa^fp89(B_6j`>&Qf;!Ay~4eykc`h(V)hBsUmY#mIXe1`WlB;aN-`3}p2L zi!7y_jZ-;1K=(m1d*~%>HyDoDn4;m5FdPuvtbV|j{+qB>W3m}x?>lCQylX_#^ZrDR zisBnd#GBedV+f^85J<=Iof@-%sYTr!6{rG` z?(vaQ+8EIe_q0LmMd+i0FW388Ch#Maj+b+P_Jg09ur7el){9riE}%xrc3ktE?R)i6aj z;!m*t9{!yp0#~K809*ru%VDM%z7}RHqvjDq3_lJ#-;#BMrBIyKn z0?hcm8x7;R-(q^9&d(cse#;VOjY8<_w+h@Myd&Ki#pS<ayLUi&IKoCPz41))v1M zP6c6lQ_pDn&-^W4{lv^1u<$KZ(%X%ajBsPblfm8b;2oGql09;aOSFl$Gs(JE@!0nC z>C>Gae?w7BMMj#C#>USp1_lPIh3*qSxS?i3INkBOBRjedT@T|e5fP{3-UK%$t7hdC zoB@q^9;RN2SJsWDQekGtLmpa~$kCwSo#4fdZ=0leIJiN@v8yJ!{4Tkfq51x^&zPPj zD3}d5Uc}|&e~S*w4Jg7HzzY$|%Pe3kfL#mMWqFJ31Z*~?)ez9w-f~fSpV-IT?mKWnjf&!v$rq23Zui= zb?+(jYkq^emKO9!teKr$-Fs-ugSZu1Kt!|yQyHdE$5@i`ce;;ZdaaDDxA)7drQ&I2 z158shF!`v234Oj)Clswt0cZODseM1+U*3Hi=9k3nzHYxX(Lr$nSfye4IG81i8Sg4w zs`?DJrGOll!L-(J*<)Z>M_aj~)-mNf<=u(h+E7zuV`heeE%u|35g`)(77`5rSfm$# zG!R%uL?}xYuTApkLE=;fvL^MS*H_3~fdK5gvXrJi7FiGUDs7?ub=YccXG zwTASB_13$9%9NRE(~gY}26?1OAc@3^OmAw2uI*B%?^DOA)H(V$;#f)jrJuk&=$#puaQEo=z zPb8JC0-?;*UBPcM(A1DX<#y-#WDg9Y$G|m_@##5X>!y9AhgzlW?CS}X%_wWr$TQc%Xdze|iR6U-q1^6)M{JTpsodhsk=n%YpX8r%& z+`l@xjS@6rKM+cwv_bIHH*W~ykSfczGh!d>$m0KGNy06ALUgCLJY&kPg4Z7?rR{^_ z;xZ?0nfSjsr=nRVH+fsaZ{}<3@v#oojQM za@oj*@gX~->MGqH%23Xz_$RSkq_?AFPl6j2j8L5{nrr?*N#Zs^)mNHxOIr+MApU`Z z_<;zBB{j^kW&u+nUKCL6rZaT)COleGw0Y4qY7l#(nV0(D^mAb?(Q}XklU}^|`?xMN zQ(-A0J!WT3%XY9I74GkT5CR#4WQ+T^hYXw0n*=AEsz;KeunVr_rYHZ=iq!I1vt4e* zl^wP>j*;msQTSxFo9RJC#q{!DN9)tXgRnEcT~+}^rV_YwsfM}Y7_e_W2-e3)d=ED9 zAFfUgQJUhP+9h3NBYwDNi*EOUpqY}D70?h3k1st*4Cw879jz?jWZf#7yQ?7a>;YDm zFY7BelW^|cFaS(t-%M6z@AF&A%Ne?Mc9fV))*{M;(2yp=BzL((QeCep8n=EEuR5vZ z#l}fsS86&PE4k$teqL0&YYXG1#lf=89J)|wAb~1xV!O_r|AR&#k%h)iJJ%g;-Lv8Y zF48;c_MFkuqO-EHnjdQo2N)Y{&sAXTkHIHPM(|LTXqDAe!wd}#H3K&9R7`+4b#AT= zDfQW7saNuk-ZI_VWSG>{rl*}xFJ}D~>JRARRZBZ@moZl5+wU#~GUCX|0Z*8I)({{l zwR`lZ%*={rG$#gKgvsH{sEIa?@@f>;s|BN{2jFMLtVtA2E2FWv0LV5#^z`NN9?#N9 zy#=D8{{DU+>$S>UFd>|K>kCMmIoMsx!w3)n#<4)zG=2f~ai`zf1Tto7i8_o`#h(x% z$FJA;#$h15S#`bQ;vueM0XMs3^QKBP#It{XvYPI6PU_WrSN0ihrP{uC>33u8g9@h< zbKC+3TJYT{$Y456DFE{a4@QSB*VY+$DDK-Cehcpq8yTF>F9Icwzp@MNsQ$shY}h%+ z^{yOUp{68kV@o=o0DTiN>WvSE6hcA(-$VAcY1bsMV{^ZD(2rN($?ISTGwNL|1z8Jy zy(u@(J!3LwOdL*3YHn8ffu28E>d(JR8I4IZGwZvMNOH`sv+6XPk;~64po;swZXqBs z$+KC0zL$IL7snY$f#rf-%gvrYq0(iG@aqljcJccf<_4XM4U5+zWGqb_`09sRJnuGa5uOzvuw(}42JdE%1=a;w zCOG{dmruBF{_2@WJ=q3Bk95Drr+~@UgEu&2Z@=6ZBb^~Sztm!Q{?*R;DSD;llaw1q zPSvUBYQ*%3wW8jo^})KEcW@k1rc=h2Ei^&&!S-0WPkU^-*N4Y+&&XcDuW0J(VYF}1 zA%#(3$j!E?xj7$9Z>=ElQ(FE)Z}8Ea7V^M9F!h}um_uO5e4u;a5(jV%u6O1+s$JQO z`k25;dV2llFy^z(E{+0dfy)&5uJx|ZskBY0Dc!9-dpv%Z7CyJ0Q3f&RDO@COP4X9ibUO+$KI1_u=!Gqj6Cp;O&@R zA{(%&8U~|;|27UxJ=t84@YwWh%jEmCuk*8*O@;}|a4Nd7N=K|+MqA~hj@?5KmE4Tq zIsiDtUq{}=$Yi+KTN)AoG{}BNJXK?s2r;M8w72t3=Xy(v8q>@TseHI%_7lideDm(D zMrKzgNBW0_zjhaAI`GxTb3I*|)AsT4@}i``-{XJ26InPqIT70n|KBLM-o1D^vqc+r z1=)x53JFxzjq%pe#q^4le7L64p6r_pp~`cEjX10U*(aM_T+(Iz)qM~UsXM$EX{t}7 z*Q#?BkWoUn+Y5ZP+2I61b$Q=iXxDc0IaPk5gYB>j)D<^ZW;2V=aUI`6!R_J7MsuPt zpNhWhEsqhMs;Yw>3DNqji$L`OCw&p8(%THHV?DgGuB-FweSeMc*b}q+e|3oRVUmYB z0F%&qxe0L|QLaeHc7pVv*&~{8+Fa`KL>cHZ$;d#9?Sr9;`F@|Z4oMFO$}K|Pk#$`} zNCl{2@2_5FUG`q;$}GN^!Z5#9?_MUWP|WmzKnc+~Es1tBG@}v$RG$9EWsj_LfZ@=C zae%NW35g2-ol;xX2Um42N8P_Ydr5oNc zgM;blyXks4vq*{3=y&aVFqjy9^;0Kx-(E25$`+g-CQ({toF8LTOicfDgk5R#D}Ny2 z&ek2-Ln11e2c5sc`uG|!;sz=FF_;LKlAIh+rMA`N12v))bc|3;Mr5isB5@}kdNZ7Z zb93 zM%>@f9?gc<=6$R8Zal8DlL50wj`i^2Kl+9ESP~%|#|_D`>-$oZ3l|uHKn8}0a9MlY zP}kNDchA=gBf^@gt5tP@c3m)H@3;fX*Ia}mBTLEIN+0n7z--BI#NPO1V4Sr`+4{Pq zd)}v-p3k=DbL|eoY^uz-VT#1p^hGYI1l%ue=RdGk7xJHj!osA$|B?Wz&(2N46DdZ- zbCY7BO6RfMc_z)JXbMQ!Fr2FkO}-mx`M~w~j6i)9;lHU+J9#qZ5%^25 zuMbWCus;xfPqo}@%MX+mlLhZ)Z^^%WvG)_7DECC6tH{sP-Awm!t9q0hIk()+TeT2< zNw~q9xTR0y8F7#_Qv-}!2O)47uwfMeCgh<$KhY5lhW3YHOT%mf>%9cbF#aR(b5`b{ z#F^}D9LY%Daplu(78;_FYJQ#SedsM^4Ykn?_UsgYD4z;${vUhuS=4P#hDR|=8 zzyRy~hCl#){mvJ<39=CHW#^OI9epXbkO~8s{{s^dKms9y#7iKJ*nR&L(3W1?_mt2; zrxMYdX2f$eAWa2k?*D3iGzqn33+~t$tfL+S%eO_hi87BKzm2kj>uSr(_WPWjlMJN| z3YZTc7S8V#$q1lVJid8oWCYNbn|k<#l%?42WDdnNo#=&uHD3@DQOCgG47?MV%G>=~ zPl(7FO0^CF=&2xk($9G)D61I5X0qq5-|HmYXIpKb4;s{PZI!>v2ujDosfYbMF_Z`s z5l$3)nRQMbFl40TzH$d2!6WKj@q{HcjONfi0GtlN%tHPeNT-GBmOwWkRsTzm`yZKM zn~As@1I>-%0-zq-^Yjk{YWcwh{Yn+yYfnjI(aMOC93sNek2&e-GJjjEk6~2ChOns| zp<=trpzI6XVaR&U#RmO#c2>cFq(j4wSOp|IM-k5zZvfqGXso{kdo*HXcYsl&&VVjd z^ScyU)IUPaBSx z^XT2$T834w!#jTGDlM)!#-7Hv8D*pKuN$A|0{X-VE#r!700cAwOBYUb725cKQ9&{r z=6)X^QqpR4!pF7D&%fsSE<0?g*{&|d>&v8?tP6b};}Kve&< z4@Pw{tQO+g-zz!7?^%)vCpJw)4xO?i<3MuLR9C+bq~q=1<>Cd!6+Q|4IeW`{DdVc+xUDn2(% zrJ6>pBRFBYVk?BUcXG3H_DBw|4`yyC+I89RGtJ8jmMx+I8ye9f>Bn=-J)Gha*GzVW#fn=L=!{k7%K!T-;SEcTe5@j1dJ z!$54bzuu7p77OY5gPhQ}1KuSyXd!O^)e(V|H-WrT8*|7Xf{Ak_w|gU8S02#>Mt@o! zbbo!>0271z@J6RUMbb&K;WkF_b$%vgHg9R!DjwJT(P9tvW}YSUQ*m0Ot?Z9xeagpu zjx+ZpktM$gqDYIVASBG{7WII;-Yoz|64J=U$hr!nWHwB0vKO3gWt&Fqf%S-NS|O1^ z_Of8$x_jusyCAqQp-Bp4iT6f@EyS~xmb0;Dwm6>M*yB9~Go8&Mr|vOIItg6$5<0c- z5n>|+#u=%q7Ku=_eDLRxQtq{Hc`x}S57E*ogXm~AQ?&FxdEmFm_i#AVqjl)PuD}Fc zz)Ww#(BjI@9te6!-H!%Q`qL{`0U_XWHOr-Q+f_gb2LI=}@n5e^55@x3*i{%NFrPjR z32)V7zInJm0XLn}2=6PWS82?E+jDi{Q5;RN*Oyit4=Gl`D`3Yf!a>tLhsPFzdHv9F z4);mdd&i1F6Fv~ehR!<7+5rai-wT8WaT+NVRoQTNQ)ACy9ZZIY@ia;sR+JmA}xhcpG-iH4$pOi;amXb zsQK>n08&iep9dmgHcBvM7#g@y#stv%LAU|UQwe%A9QaOtAw4Kgm!HpNPjd$cXQ^mT zYpMW`g|g^4F@$_c(}OH3-?^`p#PT;Yj1nuHqD#UZv(?)A&0<_c_`S@$2)T2G4FIEL zR{0aX=Z8;hX;pS#w-^25J_u?2EsW&jJhw4#hmq?CJ7OqAl!Oe-D%}|6(7C~ly)u~$W zA&`&|Uwg1lF{pK8X5-`ZXjscR48-x4LQN4otI?O#P zks-7!LJ(dyRN&>o;3jZ>8N20fd9m%2RgnmhOTwhHoi@xav>4iFc`u>vm@-%nuueIp zB@-BW)d0^!E;A#phN4BXwvc#GMM%xJRdFM#EOwU(PTBQPc9LQ*IAc+|=Z%feaE5V^ zSy@#o?q~IP9fRWKqeXdw0$=KJ%kz{QwA@tOa>@ca>VfzJ?_!4+HT-vc*JXit+%kYj z%u_*7d9-E1rFV?x%IAj{9rN=WKNz0bXMQ8ncB_{x=IA@o`CKzHAgeare|-lx*;Rgw zup1`v$>!#}SmJ&<5KIs7JEW|x!lEWToPS-X=uEb}hG$!g zZW;N2_m(X>3Y0K4r;tz(L9-s1a)bz+b}IH*U}jIGkfihe@hnW0KlNUy1eR!n)Z2F5cV#EYV?YBFZ>{@bi*O zw_EhO^(`N$@y&_$k19cv>vLxPXJRDzj?~_2lkC>>tL-j^0)ld*E{4Rugu|TCeHde9 z<eYpobR*4ezT0K|e%RsO zd!ni#Ds0hr$>{B6sk1fQhxUP<04INAG%euIHG`r&h%%NJ+Tbr^PRhF+1nJtj=aGQ( zZWBTobDQwjXwwfWDk{iQ7vRc2_upH><=C}DFc&sEt0bd@DlqU-0#BJApd;&7H&$oT z=lOw&YGeqtssa}kl`>b^7k@~ss~$DZOnC^MCc90z;ncS?M8YDb0Qv*l$L7=2x6nrs zq+~lw!?Ammv1~@Vfd%Umox4yE(ZubTP4}MPOvb)PLF>ae?fy&eQJjx;t0um|bcOIi zI#H&W^Rg81Vvr9IKxdBOFx|;k@DhG$bpAkoJMbMIoSLpf{~;&v(t$Gy1B2kn zDd@`pq`iZa?xyUJ!0oXs5OP@o%wY7F+{dw$*I!iVqx5qK-xKlu4g`riH!UpYP}9HF z*gF&k-taEvf$|6D3kzPt*8w`eB8YYH0Xx%N3+<|USeCBOVmsD%(g&HW?+e(Jb$R&Ok3o1*FPT1wx?QSkC*V^HRi>0A#- z%Zrea0L=TBxBSJ)#PVtB9 z+8HG>g?xE_6jofu*>ua#vT4U*DHUbM;q#Cp)dv#VR_bXFQ1LqYF>yl_@lO#tfzRG$ z2s=5A*habY=~MQ^JN@#WP1dJH^2ev-f!PayCO`l`Ue#o~D0qK+?b23KEIKWHeJ3cs zgoI#0TUnErK#Y2XO&)vJ2j7n4*39d}YLY%4 zmT%dle^{R3e2OZc0Jsue#~2pZ8n$SvzYf;@HAvpx{ph5fgc-+Cpz6zniNe4ugC5Lv z$bT#4k1^fXb#i671+*B{2?3@;Hp~I@h{j8KWvTIC4doQK2*zVsvS?+$s0p1!5vJy8 z;I0A+`k;-#z@+A$(`+Xb75PluMe*QoYA}sRH~2fj1Xoe+X@sWT%f7 z+LWusGpg-itZ`77Y7N>i&c)#!nS7#{k&&BNtj-lq`T6rQtZRM2e8c*`2kbXi&zoi# zPlwtAtt_0!Y%`PaQUvrEP$u{s76_b=Zor6IkhfJ9#%Z{|KU9hTs;`fcyd=}vA|{L) zAk^^=w$1d*FrX+15}rA&;U+;Jd2jov5?@5%f zr|0kg#Ixr(1Lu*qrTd6qJz|NnK|RnO0e3KpdP~fxAQI*-u}T=%A~hx`9x*^M3>ezh zg2JG}=YRtI$aB07LomO9xZMg|2X02gT|QFG%zKJ6;AAEl_CyFKDq@El zvl(&u`Fxn;ZZ?!um(FB~=szXj9IU5x4vsxa-L>&8jCSmpt#I!kF&?{E;~^%PNeKE+ z@$Q>G-{Ff8#YKB84oa3o9@D0X|8eCCXRDDrH`g(nUhq5ON0Y*c&996X5Ae611!~SC zF|@M7)RUJN3^FkZ-r}OM(!u*ZIXEDiLN7yp=9HkztlZ@!jA1T3lzHuK=z4>hIu;A9 z2<6r32O_2Q^Tg*$qeKg}3L4`xiHg3labT=n9qK>oXYk;8?^Q}+*O;hvbFS_8W-+9SkL;4V@7>>^;)bLZ}z z`fg57&rPLo!mUiWok8+Oj^Ey30)QmGpmHi)nM>TN1tl`99gBM9h_XY6ZLXe%5xx9$ zsTH@0uI|_CFpB|34z)a<<}F3X0?h&kH>2<1`2k3uVSbA+%C3Z!;Ew&+0E{5qCX4F2 zu?HXO+Hbf{8JC34H8u4n1=sg}9jJlv!B7DH*awx%2Z~@iTDCp%?9Y#h;gkvWy+A4{ z{`o-5p(Vh?#~N}7jNO1m<6iq0g2Ur8$xWoUQ&3FkhdxV^3I$+&tEn4^GXzcCUKmYG z%=Pq6;j=^Ff6z*ro?u_<$QHcn&JEdxL;KC7r0)B1ZU=BWDTkZfe;hDDF1|*$g@89Fj2^zT>23aqg ztnr(Iy92)P+T@4avJzjlsStoU*W6%A2$Zfr(>=e%uZJCNlI9F#5mA2ZJQ$#b&1y?& zP;jAad4C7QmHIeY|5##euJa3W9gXw!@jp5qr&)mY90rEIug0ut=Gu4W%~X8`9@H$% z1SLgwV%=V2VhPZ4A8q+i19aJN&-k)3WyC=85eL3?^eTF2#RM%VHl3-VNKjh)wKSDY`e%UIV#2&}Z6b#R|FxdqOo4yGxfQskeu>!VX z__?&d+EEm*#D(|rJrNLR$DFBGPZ?p5rOedB4j6QjW@Q0RY52sBsV9qbxZ)b0HqviOQ2G+OQe&T6gR+GWV<5DBzv~P@G9uK6 zP{Fn=)#LG{OC?JRlen!-{zPgjDoc2;o`n9-uAy@Jz6>n1c5U04o)0dJkoBzb&~Q^3 z6)HP9k+V2tUkVs&1r{w7B0$qRuQNMx{Xs~%qo81lqBk+Qil`?+J46~BB~i;8&`0jY z$u93&)2rKe>SRCIZczkWD5=cGZV?uWuhQXzDnsf8wCCB@FVO&F*G6)hb+P~1Zdl14WLcO`7?XhG8NsJ8^E`T-* zC2Hxe9f?~$g3Q~NA@{|Y9M*N2oEK#X7M z+G@lWEeom2IjB(%sqH3XW?}hJ_e(fxtpy3I=yD)_A^Z5eEFs!muIzD@HIW`|$1qTJp2` zZ{JcvOUH}5eGzWp#9IKVRt-_``aA+IT&6>xGMWj3^V?|4bL0mODB`^W>!zr4vpuF{$b%qMfY=Vmz}jMz>x%dE7j5?yxyPv4Rk9~`=)=`rP&JZbGm3ZqI0zHj z%Y(jXW2yXip0plKX{aGEMH{lW;d%i*B@_!F7ZL&N)~(;!%S#qgQ?cs+W~zP8Fe-e? zc~p2xX#v7G&~)ZfCwO~4gUayL;xCxm3S-Dnh@1wN5b$6;?m&0N8QlA%V9+hN6dKtr zAdTvOsYmqm+}vWG7Fz8F>EGw5&8KO$hIuRIkh4ln&AMNrod>Eh=}f%4DKxewAW0V_ z2e*I6zi_|N|I-QR1m6D&+WexBM=q-+%M}ubod?g(&dvg%$b4mX011x~1-=!x83j6z z8C0^MQN%C=6G5RyPEk6XnA-n=T;WB;Wd=PfbA1iipg({9@+KzmycxRm{Ldd~KzwJ0 zO$HvAe|f!SD+UI>B&$oBpOyK{PvEGeDdH;?{B?`IQo?scvj{{>4`J-yZ9}{KK$LZ! z5pe3y>+ioY2!_$1OZ=l}9o0_|lg?Z+pwxUCNsbB+;3jmf;|FKFaGPkHuwR;#6m%z` zdjiF1KG;fru0uZ5_Y7l9v;(xbdOX- zR1}%ZLQYQRusm(%2UYtV%$&~zI1d?KyO+#ygYt`QNaJ1D|3JzK>iYWcBS$hiazp~w zSw$&p#xhhcFam@MfL@{^bdjp+eP^+6{$qv{d2yz)U>I5~RjB!Wjr`QUq_>ax%bPxQ z{E!M3Nj?ry4*tap!*k({TXKl6V4%;VJs0%}a-rvj^<$r+2IdyuFp%{O;ALZZ%Dky*&=5mHPM%Z)C>UUNHta{NbgC-|pF5(9@7`$5Hq=CF{ zh{n_Ke|7hjKYDVJbXN)?K0i*cx=r3*C|$tDl(74^MNvuxM}+N8)7kPxmpZS&#?odL?$9!oGNc z6-sT#q8fEHp&U82i22IlV`rt~v}siX*ek%@3Y}LiESOD+T{%%1mh`L}6gh z$c^dD9vCi#(ZFWjDOynZtq);?K(?Tk2yiOt?dz3LwUIL;!4vwIW*LTg%GB27-cvW% zpw!@wTQdSq=9S-{Z8Z^{%IK1ECV4jem&w`ypFr1*6pQzFXnaFU!%GKh8uP&){Jz8% z^ryR0F1)x4Pxh-(Yy#|AdDY8`a+Q`Hv$2*~bkX!R3!#Srn$3UgHVA7Z&>aN|P0wHl zxQ#f5ho0KU%*LjF$qGGQq$CUA&NUbM5w_QhZOK&XcRaWS>}ZDjHC%YSSL5mr zOG^)F+bagFVpJXC`8xph+p)R26fEniHQ_aokOYw7h8MC=*mP~fZ4Ga{?o+xQ!L+=P z(Xehzu;TG6!M{vMe$Lr z(Gv{&KiV2iR?n4qR~M*h=KzWzDdjw>9A>vX-8hipJoM<}yYu<^Njbeuual&Dsu6sf z03@6nCy8VeJ;Uudd{+YmdLWjVodG%#fJf6~CR@)TJGf+e^ZplntktKG_HB}uo$OF}_4hN($I5f4 zh6e!?WktaoT&P*jx1}2;eFS2b8HhD@^O{(xpCat2YTydG;!O<_*E)y5jSjh!fv$pk zE+!&^2`MX7Rd-^+>P$#j_|IKx@fTY0vPI6>Y?p?P6L?=UzC~etzoAYSe6n6E0QDbz)K$15%25A*yEW+1Zavpn;&wB~7RP)nxPVjxAZ&w12-{Ks}|B zu~g?SY)>{cA`bvuaKYJ%sjQ8a4u@{yk793kWABNh7}&4n(8&cVgVD{Wh64n~UmAOs z0p$u%Q~-dZU94A9a#?~NAk^OGb;9{?rF%rjrll?D#@lyap4sk zL&VIjmg5ca9VCCm>$36t6x{d1V(m+w!NIUAoH%AzvSR+*=SZ0K++dwNbUP5+otc$& zwpOYEh_%v>KqD2xDwYRigk?aZ_6!}kCqdXH0~R1PIrIvG;NpZswLt9z3|` z^XJ?ZQPx+G&aU?#Q0jx|A&Wsa6i94YRa@Ji_AdQ_TD4w~$!!SA$x`ma459|-TL$pd zYH=gwHv46V;bsb$Dpe`1FdZuTop(w!Xw+jm*iNe$)HyY~P6_H9Ow>#6Gf1TC z_zc6i=ci0qR}23!LaVK6}GGI3@Q^M+z|Kz2_W= z{wVFG0ZmH=Jr?_yE7OJpVp-kuFO5kKQ^jV!n=4TUKf}2BfF-rsQNRN3(~7>mg*|~g zX`zB_baN=*8S`J!?b5X@;<5--BqA!=B1{HGg5YfX9!15KL?__T0Ve_}17aa@&|!Q=J^pUfA^& zSbZ@)+N&OGWe*=}2ooWrG_QZx44w>BvT6<28WM2k14Y0Jlz4Et0o(vd!^VBEkWPSt z2Zpkg>ggM?Ohvp4`2PzG*%THwm=-K)dxd?h(Bn{KWZZHbRs2h?(4bOdt7u#xb<$f( z@B~KPA;8$QUO&m-t^s!eQl{sQiZ0jYbJ8V5b#h~wocW|G}zUZ+>)D<5WYHRf^5Zfrj#3b&JRT!DM67fI%a%;X-9gIo29{hpYy* zenBjq5Jt>%c8SptB51PpHvL@VJn~`&qi+acQvsDA=?aIoTsG+NG}1&55nLaK9{8>S zp1`midyKZ*lOp1Y$|L*b>CCIHdTop9EKR4T{OOe&GmgyX`uXO0W*7q{=msT_rH|fv z+DuFQzR@~FcfZ8nO@2mJ3c7BI=W)eobLzhECqc>Fgd%?Px0VbF} zn9i>}xO{63D+!ET8v}K65Pm;6$UPPtOP$4j)#ngkk-#=G#Vpf@Z||)rIdm;p!w?Pt z0OUp-kxmK>m)-~9Z5s6=i;5T)V-*!{i~~`%z}0Np)MNe!!+PO#S##n`{2h=uxT6aQ zlvyuFufNPJq2rB&f4JBoAR(`YK8gC~SNRMB z(Qin8gOFlqP-J{f)j)T+#h=EcNAE8cOmbCNd4I=;)dsp>D^ev<3A zcPC?{?ZGo>+$KgwN`Rq3PQqmod{r5iA!;;h?pcThe0|Qs2}N%*|eR2vAU2lyw2&84*RdRrvvjdc;|n>%##@MIF)Y$;%Q?5 zBoPi$Aid0m0u40Hfv5pUr2l9U9lBst$Qls=l0{`cmEfQq*EBR_*x*ypzY&*hg6u9E z#H}0AG`R*zD&~~p=;sToi3=?HSHFOI!$KUlKAZQ;K##zCoLT%r!Dj+x1LdY|7w4=C zFr-U?fab6=z|yIkaRd|dge))+VjP=9kR`LM3#AzjB*R60s-eP}1kW#cwRk_%L9np7 zV+68!B(nt?og@luwjDc50pipKOp5)ugQ*WSs!m9{LQd4!izn@B6K#3RFu6J;r zbD)8HOUZfUwn#cy+JHK$@#*ya1E$1UnMz#i-w|O$N2H+lE_E9YizR$VT8`zBvp>XW z*P&*ON$yBe8Ak8~4JWgcJBxK?SaxPlXMW~2^2o>&A*_uVyzAo@X1*0HYAN(Yp75iy zgsLCDV^;;DoFQCm7CQY8K@Hao2JeJMvL2V==M!#jc%lvvT^Ghc_pv^z4^>=}3OOrq z=vEh?Tg$oXJrtjrp%Tn6U%1qrSu(AB@6>-5JAzsJKb25ld2l{b;DbGQrXHkYj~yDwqv4m2c@lOC0^hcd@nDn{thom*>skrMA`?IQcng zc)HHV9zAVLUTP6V3zOVSsHWrvLNgLE461KxWpc|=V4|AZ8=0tACKY1)?{^Zmh-wh zG^3veZa}6Eh;7+4oVCUbEH-b|+VbbyyAO!gR*?t)U1v}axm>A?j*QejFL<9I?pe(P zuSWxd*CWMHJO$a#0Q{+{7Gybc&b>#`VFf*tWf7 z!kadIP%WZ<*UPG zNh?MUX-P0I-Iz$m`_lZf?d|uk_;#rWE3Gbf;M>(F(5@|kd{WZopwC|}v-qc%NTho{ zrQ+8Cai;+@yzBQ$EqDQ1vQ}=ZPqz=@uUH^7d?}j!E`ktCWRnc}udMrIEw_8;7jj^< zPc^}Rnyj(yI9y3LInhq`(Hu$uUip%)?zok?@_Rk~H->*7-LV0| zDYe|Z?)%I);)`VdCj-4#{p7$L!frW_T04w=$Zg8Gr4P-p8XRcdP$hyY0=Ny%SX)~c zX(E24@UOXsa{lmAXmO00SPKJx4{ZF921Gwh8hS_q)S6^iYPO)uWjVK0fHRSk5!eW> zg9C9#{vGnwBM3S~J_?AiV%7ZS@v~>1KTG5$^ZZj1ydK~se4w&*Oq0x7MtaWVw1FFc zeMmGkOlNO+tPDu%xp&DPJV?3r`_ml|xsRp$JN|v4>bcQr+rWb`n7FA3L5F3BGaQQ( zfHiKc23`sPdArL+!abudOOQpNFa{@rG1|zD118FC{VV|kir^6Of_u3#|F z;1m1*$JtweRh_o$qf4<7yFfr4K~w}pN2y-B^G$ zlF~{_OXq_B{UXf%_CEh>pX(g2`L6j!SZn>(8&BNN{oIc{S0HmI{T>?-^?iC_<0jJi ze_0%xuh}SXi8D~eVp|PgZI}KVEmrUN$z4#hx7snhsOxUrK@b5VHMbi~K9m(c@s7J2 z@*J(LzlcW)dvfr=>YnWyt6tk^+Avf{uCDjtUtM9g%aVeaIMe2(LexT{&%JZFKE6|@ zfq&b5i)JqR>0*ESfz0h=>Xz4V(uRGnHD-zZ`A7S3?BZ3CY9|_>yHSbH&+9r&b#q%L zDb=M~VcLMm@B<2Vg;aJKGH-02(0~-MWnO|&+~i=Zt<#h57TNTzwnB!7dg_FX|47hh zJUs=z?=SZlfSq|IP3;}fl=${eOPx-3KNABBO*(I~Byc@2S0{`9wKZy+Kib<(vt}>i zS-<5*gCE41c3yb2jztcA^Xt75V>s(%=L`0Mb=6f}x_r8CS%V?%Tn z6O-Vv2z`pkBv{9oC;H4O?VwO{sX5y1|@I0`4U#|w|9in zk2p49$m}=ylKr<3{*^046|qWgp64Yoxmv%GDbB(uV`J_NX4EqEj`qx$Y!T}Ap z{1-Xf&oXTDro?^lJzPUO;`}x!5FL#(K>syS+avfAs8*NLoaa;Df*Q`4_vnGwtNGYn z_3|t0gIw#rx)1N~U1R%oYCDUApApgR?>gW-yxdE8Ge83#`;ATZ(?8v=W!sZZP>Zgm z0{V^%6!)%%d3<=7;cvIyoA$g!dyjH9z1|{W#{S@fYsAUJOH_ zzXhwP)&f@;QJFKU)L{*z1s&l1U_uEQrFwtm`deRLokGHHj1F=F4HB)#p%@3669jcw zwszN}Ef-1Kv2LbkOadZthi8m-|9%O$6l1;(+?$^N4oa> zLgJ5;S@!DGS+*wgoqxW*7Chk1wvb-oO-uGb{L9@JwZiWS&OCqt-{rB=?MBh{?8N%- zv(p2e`5b|5P*%~x$W~}CFsy2vMUmVktpr*$&b=rKm3tGtFTY>oPqtpO>r0Hav-vqw zlf-2^S$L;d08L(d92BF4)Y0e@lg{0ckg}t^;s^SP>cLD64t`xK;TpWqK81G@hT#fM zA!Ejn=|P6?9x@br-8mG!$vh>?t`JY>WfW0V@POu;RT_t&W9a9rCp@62bTOMv_r)62 zf2|jv72b4jZhY=SpNfu@=njeR=gX-Wv{SW-&WHZM~~^pfT%J!g7Ygwi9>Ie9KETrGTxje9u>U>|~xZ3z&v9;{kbS z8&w`{`RlL0;-fWDe)yU>x$oHUIi(~6C^P(MaeArD=Oxfn#L@G{UBCUM;>zRsa`*yH zPp*D_RW@pryH+XVXu>xA%2nJM8@g*GXuk9MD%Tc^L)0q_U|DSu@)a~#8iXrPrk2>@ zs+~z{v>R_k0G!=o8aT~NEGa2lm!lm73b-5@HH&&TVJ5_dLIr)EPv`9a4@xPc-Kg#( zg0-;dOT-uh@D%(kG`u3~qf~{@RjVhj1Hkw)TT$~NZTh9uW(e+!4gk4F(EstBwn;@o z!o~USU1G1~1hmM4K^=a*BYNNHcR8Zotf)fT%l_FXy3@o2{Or-1Pf5q`18+NAV)34WcJiyguD+G;LGSkO z3oOd>K!Tf#elkjBd41Bu`?{5mPCSp0rM=kuaQ=g-dn6vg>}OuX5`BWjMu;Z?hA?~IH=XkqI6am(TJJ9t~RaF2~=e~K^t zI5iq;Zt%^#C+b?`+e@pa8^ITxQMVVq+iN-V=e6G|7toH=9?{2_`~8oIFxm`S-T>yN zK%s-x)S(OQ*`l8HV*i%`I9C`UHuLol2${dRz#uH@I9$5kD&w@k_|V6vQw_8nv4pgQ zJ$L@fMOhcEuOIz{uVFY} z{(M;>L44Zpe?pO42SbLWkO%yf&wB%%o;;!atcs|4VbDA`JDv(bF=tzQA{q@kGyQh% zU|@LvU7v=i!xkpy{+k-NTdQLk@{6&JsU11v#y zgQoPSoomYbXX$%wIC_mLRv^*#I?B6jRwXGiVL?(5&nTDkWRcopX!-wVu1)oS%bK>D ztjFWY9xU!=5m_eV@jgN4xertB)agG!E4>3k;zUh|%+DDMHBs{iCH5ALTQ}X$a}8hx z8PEB$zwAOAtke_H1#JYWDk_PxF`5ZomdSZmMorxv9h6dk`f=z?t7aWzTT`D1*nDonWy6GC#hdG_i&P2# zI4EyX!`W3K^%C5gKd+L|RsP!dC&1SZxi7)7>itYcpS6}pp-RyIj z-Wcl)9?d`nMRGG$9 zvzWmVWM^MS7n7NE=vNI%hVoL*8uNlKqF3`stK%G0)6Dz+;F>#@%l9b3dv2wF_hb5| zJ9G`^^~T)hYkrfSJKil;2&rz8XdVw!8QQ)9RPb9zH`07OZrOM#s?vDgeG2Q=qvvDS}V|neZGAB_nF1?*Z=iA^a43TT+Azd zZf+0y?c;AfY;X5G>CE%is`6dB`3KQ~_uqKhA4>!XpDr^RXu5MpPHsnSeb{CgBp~v$ z{gta#lGm|NOiez%+bIa1?_uk^wO-8uVw|%-{`~@XO8_zk>$3kA&Up28KWL7l@CV_1rX0f4%IJjq3%!hF#?~ zu!uR7eibFH$J5fDV||}a$(~whc6sFeFFNDD1c^PtRTeIb$hbkR9MZGuLm-Y&T|ez; z{&4@3iP^x8#N^DCE@IoL_XaZ0^v_%QR=U;6FF;-MBDvy1;*G8gkNhxqT#-my9r z?|61_Ju;~OD0h+KFm-uyo8Nq;HgVA;Qtz(#uU%%(%Cx!G-+;C>)UNydMcmCMcJiO!7^jRyuq`D@0)dj>C?R~7=6@E;p{J3|JFHi6GQS8m?-{_Q1@s@2wlRo6bT?T{m8g4FT1 zjc3iYKW|TuHQ4yV@6?a?&7}|gm~lka*udRR*4Pc!DYpu?BW|8KP_%9Il;T z>3?@u?bGS(G?&Es?!oSeYei^>L2VZ{pOgC;HNjR{`N*AF+oN|VApPaPFMpS{U47Y$ zFJ0SBz{Fl$3;8_@IW24h!Bn<5EMNYsz8TC0aK!ZMHiZ1Tb0lAiJ>k6UtCK5J##DAp zC+-kvvWiEa7Aa^WFPEH~MdHk@zFSzh$z}17%Y_UPAp{J0!3W7boLARJ7y*1T7QE&p z&(%LNZ%b_g9eirE%;ZOWrDYrIw=5O_D@f&ho7cYltvsD_%;nz$ab+v)CApaW;J^;H z0K3MZTpJ6oFE(8vd-BjK_dH>UVU0)3>ymrw8QFsy<`QW;XWryyjvUgET)h$00lK-_ z_W4HUyR=2BoD_B0(WOd=l<99`b!DYiN2=T0{T7puuKDRvS-kdqAxDhIGO&`N@YzT8kX6UC23MF!H`4kKys;QI zQ@ip@M2ge%)5op9>f2*)oJl#`n06;;E5K1U_Twr{`UXp4C^hQq6&rUr_tf%f{GJ*% zzf1B~2w3Gg|2ywt^$*uZnuZO7LJW6Le@(>phH-bh5yKHdu_xQ9X=7$e8O#B;!)LW6 z>JsmBs}q5U9&Ys@yXg%x(_RTWoIB~{_HThxZ|T5D$bYCLOtW0Ajy11g(jdNB6>;P9^^Hva7Jq9zYwpN$dpHt3xCv|i zJS~Jmt$XCyY(^JUSo5iwMZ`~CQDX7iW7bvfJiOxe9{2!9_Qw-S0uj6vA$vZ57K_uu zSKQXR-LcAiHsZ|vefHZq>tQDU>%LMVb8%RMXAiiW85(K5-M>{J$#r1gGH-h2Uj3bO zwUqVSyZgnQ!#H$U#io{i|79d zHg%jYSsw_v&bxoVBNRpRoMYW)X-zWv?dy8FvVuJLWBD{~&7SUVR? zUl?|3W2^Uh;d25GRh6g@=e0_oE zZS8|o43Fc3awWGh_g}a$o6{?*=IfJxDY`qsw^We9HU0``Ai5Ta8&ei$*8a0!yb&JNycHJKcMHW(L{@ zrKAdu4Cb4eG&Sa5cx?E>-@IS>*f;KU{>gSa%KV@6Cn+10A_(*k5vT&-vOeisDAMO& zKDV72JfhtdQ6=bLUp(6r>8hVGSYAI?&oSNW`Z3q{rgQ`sUw@_s&$8EXFJ2{ZEKgl&r0R@ zBRF{!W?|vBC_tDxsWyY>q>8P%#VP*^A8i0Gei@LY%HRzqDU4V#s|+% z=I?nNX-~oY;dY+X>8@#6@vz!tvmT$9Sq4c`V-4~BSDRPtWQ$VaN_O++b9JC`TJ^rW z{>|v)gX>o9?fE659C^&~;1_&7T_{$$Mt-EnXJaROSB=@|x@Et=z1y-Sk`#*crwl7j zAsGmxZ1wD>B`8Ead6^adU1Ojrr&3*=Z``aa?&HPgxGih)cN$gtH4aR6I`yHREh~Ke z&q4e5j@MVk9)~QF?C*D^L~cb9m~vZuy||L!CLSpb>o7})a;cmUGl|)hl-a(&mgih; z?yVJ=PSTY>S0%rEJgM#=BmdieEz9(~2TFQt-f!bR7hrGQ)ob6HyLI@nR9vHZPEfP5 z_SONL@#togtc6eE{DUmgFvcoT4_www5*w{7&+Ccu^t0COQ=Y<>q-YrKeK6(G`P$`4 z;iSStT7c!NqrBJZlk(r+%a6U56WbhgJf~>S?>CdYv5B&b64bSIO@8yN!rH;}-qgE4 zpXLA{i za5L23C!=9GJ@uc>o{wm(Hiss^-)~7Owc}77(r@ikni-PSj%RBMxUE(vb9liFl$1bg1lBB2TXZcID`QRg( zd80t>)%&*&Z2bLJS5wJmDP^VI8j5h)HM4tiy4fi|ZL{pTlB0WTO$zI#<*%@H`X}pl zD&0G=$Ew{u+_L6j2lvvTu;KtQ zQFM=#L@LLZhg}Zs;9eP|<$Ls93je@mSuPu$Cgu1iGE+^~dPlCEFz#!>a=A!c5V?ES zygBomMX8u8ZA@>3+F8ouG2PW}8!F3lK@hV=x8JvyPi7TcTnjhuW`|F8#cLfY6t3Ft zlB}Atyvbjmu9pow+^*ZyWTjneCq8?0vPaV?Sz?yg`!+xe8f7i^N zekl2~d;Wf?XLt_K?tX`~mlJ?s9uKR^OZnTgp7+L{EYIc8SlAkd_MVGZqmJlSz9j|{ z(}{KZ#8j)-K|sgA}jMc)3jCcvrK8@Wgkv{K`2i z-p)ClbY_z>r?81BbIwBs%8WTy!&Hs{C8n|Inry0z_`zEnrblH!JGh>W{Y?6Hojq{n z?~ivBaHQ;xRbUjiwZx==05trB#tZD;sdKvMDdr4bzn)uA&)K=$^d)fbNc0J`pb!Ksr znV79{*!syny;n|uVY`%6-M6lJc*1o1lCsM1rH)V(md5m5pL9Qgn$Q2_J1)uDU2#EF zz(SxbMPbA`P$;J3sm~fegFBZtz@I6ld z-KoKb2pfaIq3`$ePp)T=$qqPYZE{@0I`f$CU3E9J`Y6{EFO{r2OLADK|_$Z_Ih$+$j;5V3^rrvSfZ?tM_HiJxcUtq%3h1P`zCAr}F+S zp4I26yf>(4cBwPb{RR_^7UfaUH5GrSFi51 zL9bOy+Pb5=y$@fT%u`-{IW#d}UYLhNR?jsp;Dgoh6VDkua=Yht?HWjn{9C&WitS{9o%8dWFrM0}*I zJWo+Qal|;?Jm-1kR1Z%kJaJl`I4#9)x@F7t1hvbd`#J*ynN7|PFQ<5h#2xV#j52>a z5T6`Qw&~}^wZ9CKl#Q*^Qcd-C+2m7ORFYUkw{12EiPo!O%lCINFJ`Y{$21|{hMFw9 zxXyt5obB6BTH8%m+sa(gO(P`vY>0~c4mED8CiqR zt~=hIow-@aNh_5XD6D&O;(noSPlGV%EpwN@ zZMs#acVNxKmby6lZCf;DhC6F$mLL9V(fgIN-6_U@yqRZgE?4Nuc)a%Zac|+_3cadr zHNjDD_pg{1Y)tUdk61G$pNQ`@)a4+Mc%-5xD*DNjwv1tgxpB?X)|o+20@obGewfvRll+RauAhk>6%hC-*uetEahi$_Xn2^aTsf0(Kbx~l%;gI1#5 zP=1_C^UF`&#V5W&gSq6yw~K|@f(h(0PxzYgv-0WiJQr~lH71L%ui7%lsSdOVO9tRz zbXJ%%*q9cj_Ix1TfMN6e-rE;CQnzAQ5 zhFow7c!yKMJN5sj{k}xM5GM7hOeCD4M2;2|P)Dmdn*;~E^B(Fxb zX-e|H5YQ5dIk9JQqE$Zd-kpx3mR>Wn1Du?Xn#EbxsHMd2h~?zrX_6Q|7x!sh&W5&E zr=;?nbHoejyX~xX>f3c{xKW3*>>caV6OT4ht)jWg?kg2jRNI-v{$RjI_g2&zI# z$>C0pU3P9W!-4si&ah7pXwHlR1mm?ytGJL;<@|W9)Qa`oS!;Kut@!A`cT;zG38d6 zXTf?lMD=JB+h(+Iecbjfkql}hJ>VEDYwwzea3}`s6U-~y)dntret+E6N$itMgLzL< zi=B&@p!eaI6PC()F&iIn}+l{1*J+exnbByTc z<|hWvCc*kclQW%8E@zx0Stx#jQI<9%Ut1y_IX6=dy$jv2uu)NaD68E3P_)pRnHa=m zzdLVa1DhJ!B|kxQC1gpqT;TB-cD6j}tNRLsn$EJ?A7$iwV>vfgU)bzWao)8vkSBUs zW`-2=%Jmw?9(T3e-d|qz`+Z{-U?8MR7`6Rg5G~XT)R9Q(cNDG)6*p#I5rNe1@U}Kr zML!GMo~Y|(G2Mb%(#9cOvoxm&j+ktF=%ym z5Ng~SsiX37t#zauBC6q1*>%FfV=+CA7Kkfxry%n8ttm#+!Aa` zKk>cK8DxNOp!L2R5R*mb6!d8&Fy_tz<6r{O1Ho+l^CiMIlj!W(DKh?!QNrFfsZKZI zboh?yW24Fn9&hlne7f~)2z`k;tG5}rSsmHZS}Y{HPGo=T_rGp_c^<5qJQeu*WjO@?*z&bJ z)ji^2Mw(|jg!rJh*-eA2tT(zSj8gN|&^j5HvrRQ}Y*g}qN>f~1q+iwQOY>2l9aQ>9 zYxJ74rw#kW<$q+8U@>*5pejQ1&ida^PP2HeSogeH#0{)>;nGT?6l;4#RVDjy0A+;s~0wix{9)t;b*OO;CX6q!qYYx`RU;BKQ})Q zK1$c<#IwzaC*%23CikL1P1C^iewC>d`G~gRK%SD*$r-AhB~r@Dmo=n@l@j)&RC?XZ zXsE5FPjgOM!!B`0zb{{CN;9MOmOz&hf2V6%{uURUkG{I~#pw03YUj?i3;Q8(pR&8` zDuw|Uvq_<;zo$W?N?fbJtxBX6TI@&6d%v;eH>8>lL#MqxsoqcqLoiI=FIibxtBrCc zxEseuOQE%_5ycb)aXK{cK3S3-9;7M%)2O?$MO;fOXLsE?Q8qvtcY74pCW9ewJXz<= zg4Bn5#Wy5BO*Sz*Ol;a-wD5KmM_82%4QV~56KYE7DDA5gC^3`u_d)Z3$xyx-Pj>o| zZ9Es6>}_;X1}ct2GVUzKNQFfn>Q`0p&`;AjVxUqSViU1tP0suFr;PbkiZLo;g>si( z_*pcCYjP|sbEPNwrs7~tu;p79D(#I2+e(TmEMH+9^$P72;%8W`ZjP2>E~JfNHf;=j z!h2mgWTWFtF$ls9{kjf`CG?!HTtx|MSzX?F--5;}o%wCm*eqdU^4!VjxlWBx4&d(}sI%Up2?MbXvJtatKxD5j!7SBW1LOY>wgc`>A51LwEuh4!b3|xBU^;7_UFMVhm6!Q z^a|iQ2splxZw|J%w;$l)F&Y|{o9ZZ))v!-BF9S8|N#XFm{HdPp_w8uY9LOowP8lSt z^F~&OS9SYm^Ln@&RE6(o>8TTd_w&pq>B_IBxLaFo`x39m;)sTP$U7LN%Fz?8YCF?a zl`3j>u*CdZyf*Li@h-+0>#;*CI2QbkF5?WD0Hs;03GvOcYF+#wPm*+rJ1eA2BRY*jk|UG zwX{fP*pz9h`XZeDZ+OkLqF)(SiR1NG@3)wIJ|Dt+!fjzy$~F-LaqBUQ`B^HMZ|_R_ zR=mF6;JhU|;xDevh~lFE8Hi|HHAI z$Q>V2irrnvB#4$;FR;{!*|eJK3a!X?m}9EP2RtTb6I04=u2{RHY}r;GRe8!CVp=Q1 zF2`4&_{6z|n%o?nsXsu^!?w#_sUg&?2RA3=M^pY6mih+)y`%*}Cxai{mjk&g!hNjv`WoRaAw zWopK(bzkj_hV8o)W&>)1fvZ$h;!%5eIti@n?Zbs}%4I*H&4z@oVT5}nyijpX;I4+I>z{N^Smjdei*IXb;R z?M=F)$bVwHiOz{Rz^vpI**OB$)E=41b16N~7?SP_YUYgDlFTj@dI8WNK;AQ~sdp7n zp?VTe`+Dr)K`IPKX0yb?<6RU&UQ)$fu@g$XrD%B8O+eoO4<^)HGK5@o z271$K(X>g(<_ET`-8r5H#izfw#vDUF^K6QHZfY z^U1#6&=-V1=BfomgW|RhAZKV3c2&`rv)_a7JgZ86T#0AIx2Ba zQY!iPT{wsE0#Qb-PH0E;LPl`47#*`p>501i3#lPq>o|qmAN2JKK?fBx7FLTAbS+T_ zzvNSn;Dz8;kY@}|l~LxzukAg}Z?SdZLn(T17A_LSrw+Ea0&~J3;LHihN8NstgiUZ@Ksul>NsEzPs?};$H+nUHB8UY>IHIiJ7AitUv zHl#5Dj1N?}4(`P}Mp2MaetB^}zr{w%c|vN24xcHcq}|beE?^mVe#UWE$~0q*Ak>^|Z_mK7~Y#;@Ba? zbXvx$IqHdrTsrTkxxmQtDh1Pq@(FHr&lq!`CQ6Z?g?n6dIN-v_V5R0zp(L8{V7#IG z_Ucg~Ee3JDNR3^sr$60ZJ&-P6C8x{NUdj}5>{exRbHP)_hbQ*nTjsynrD&V2`+l*=FZ}gSTuK{~yvzx#~20w<+(z}ty0(nEQ_etc5x4W!LmbUW@`jPW-^k=D$dT`G>|JOwUP;I^tx80pd<#MofB- z>u~EwROPLsKE-0KoATjXo!<;_pxBYv?p(Sf2Uqg;yvGkSrw?odd1pVB#L0M0C zO4{wQ@CDOqO$S;6nx#ei5-y{`zoUbj ze*Y5C5!E2h>k1yaki`K>dO^4oH+T1{v}efRQ)TE>-hw7i`VNW|R1MsrGBwg$%SU{R zsps+pNraGL!=|S}7P-v+BiZYU^{jqSDHE~~mZV1;x@Rcn0x}BdULhe~PfRvm7%$5T&(<6JX5QwOw1D>_u(is+a^8fKWdi_IK3dq-(r`_!;Z_uK!w_vdBd2}j7IP^p0 zflq-h;bt>r;*S_Mq#*jY!uSu1CU8SDz(Fon-&VGiG=Gxqf0jgI2`C&$V+1On{PKh= zXlg(6h+(Hke5tr7#zi?`2y2M#fK78DDmo~NDhgOwN$x$kQ177}-nvE=6Y*8TLG0ti*WnAXyJ2`lJ!2Sba^|3G^$d z3lA6Z4JSgl*u&Dge;^a=>-;izH-9tOLYl@M*a8N(_y}b0?|h74Na&dNamx) zw4WF0`4>#J^4qxgbQ?kxdsy~_!$c-{7(q(Mi!-cYhC6w$|9RS(*&74WVi`H1zdCiI#p$w! z1^^)nMejKjPqd-Vux*x30q|;*H#tEOnlxQg&!q{HXdbx$+o_vi*54rP?(SZEWnH*!%jANssmHW<+jgYjy~&AJ7=H!wMwY^Pyry9YsWgpo4r9FlX~S9aw_F6JKv zps8R=j-i@zep2~He6gSm`1R#6n%Uq38ZcNfN^aJ9!rO`|r#P2(CUrWYA&{@_!f=r+ zbP8$bK4pAnEG4;D*E#!slSv^LBlu7lZd$d$lfwV<%?>_H=17?#BD3fvd;yQshVF?X z2!5J&hlL8}vM=HK8w;)U^3yueLj>=M8f&L<+{9$@TD_ReBla^BSFT*S2|_WCcG)shgBI7aEDTw_3Gh=1R zy*l05_b&vBi*4PybrNcY9s|(&VlZcsthgP{+ls)w5HN~d=2Sj!9eo^r?2 zWm|5LE_@p4LNCClFb(72ly7OxMJG%kUXVT22}!b}xOX;~BpfS2nC?FdFXW}SOikZ4c zFmQYXayr$Izp@Fp{JodTMlia|o+}b0BSdZ$`3du_WVw5hpqg-n$0ycTZ( z0T`oJ(KlkfCr>#f5P!eF6D|FGL)< zp=E>023#lW=6!*N@TZJ6eTjIzcqr! z;%EnMJs*Xmu~rlc4AS;+*&WYMt|Nb0H*t!t_%vmMnJZA6lhWs6H40eaq@ z1duT+H(M>8HdasMLqjng?M?8b`EO2Cxh^X!dvCsQ^kr@qdc}sL3cTLIaolYC@&d?i zaydFcNrFdC{W|f9@3}lXZY$tXDSFBBj6&oPsA2Ljfu>y9;FWIl;|ZB9zu$YN##Pop z4w?gPbQs^ceWXHmUK2eMhi9@tF^ftx$)0H*E{jwcos(@suNi4aWE3{J*}832!hU=N zQ4G8Q;U5!>aO;G#C?wGa+F3*h($KnnEk7sdBPvAl4Gai=3r5L*HVn~qx1s^16b`tF z1ZldT|K3RakE!R9gsae?E+w)Lk*S3g=`Vuy`mWH1jV1r_nIt%fn>x7Z_Aenm&09Ob z8$EdO2)KQeej5W>Xn<3NPf`V~ z2FibZ5_vrmX8sg}XIvyqhpYD>0zK7ZzIFNa?K23 zjEcKL0|VFy!bh;sZUeNjD?`uE4UUcvwUOJ=o9=Do8UUmEyODGAp(DE+CYjqPH(hQJ(2qZzrN01n>`I6e* zit{AWh(k&3*0?&a!s$P+vQzI7Mk?V!%R5T^<2{ZcG1$huB}?iy>SoMg-7-EcplbK^69#5;^M|mdK8Vd7iqa1gWIKCGO zQ8QhMmCU9^ya%?dnQR=euW1PR{b>#l$r|k616>6}xNgrEd(U*B!CNg%oD^QLh5KX! z)b#2R-;-T~g{-{23uX`|?g-wPDDDY=cchDhI*m3Y_ln4@iVS!MDJqb3HsX~?K!eox z7VLT{yrM$SF&rN9#w1Y%%aB~1TBL30FHi4HsJM^{x2o4G0eyo=ly7TM)r-HGPZf#j zfn}A(%gCC!TeG5_|Aim23F0aRzg@kLaTwe%PX;7_#?duR70AZ-7X~<1fjfyp;66`H zdn;;7)ldY~&RL6@i6hmxAnbAZ(o=0*u?FW=-A_8NNMc*13=ZZeocQ znhry8N-)Jj(!b^~sNl3ZhpqRnR0JT6D+ zsy6k-7sl9+d_6PSG@j)NpZG>S-JIZwV~}1YC3DWm@puwW;@Y+AX-?Nk_st<=L@ij5 zOa!5_k!%@*GSXDynPy>%0T1_il1PvoUAUsloqBJ8HG-{ohI$hM5mlwKkeFATQX6k} za9AhuGYHDt0yrYIx)D*NSgjq>+#^q7e9oARBllM@5Xi6`dqE;wvSj#?n%*3~#T$Ha zDS{H~qqYbgIrXFllR`BFN;pzd;K6cq6l%_z2x6u=fbl(K5#OBimeXc{nI>xcao7$; z2@5bV)p6@z079;=o2zK8RJUk!$NvPNpj7&3r=iztXOVF0qA)p}HUbGr#bH)fmF0Pb zXJ&Y#xnYSp*AMqsY9sEtfTV>~0kxa$#)|=9>sG~LJGGp+lD;5A$pbOLp*`>pxl6Zx z;0Qx=@yPYhe>2rsA$w?poR?{_L#*(h9{FBt=k1JAG7)ZmSM|rY=`uQ7s24jK_`DbOPkgP z9Y(nO?Q2~%Q>vfsP`*HPU%VlV2+758#A?dl-N@UjLDY3d*X|NO1|9tQ+>5X|YOgzS z4WS!8y7G`Vc_`@JDE62a@#Y1i#a~L!E|85xkp%2=;>kJ+CC);UdLo7w-n{SJ7wFBm zTNkSd+l=*+Squcr!WoSh84p_%uF!~i?EwTXgLX8z+T`qjwW0>za@F)&OhgFLOZoZA z$pnz07JiLj7f2UlNv7ecW+G({yGtN~s%ci^&l?#5_z|08Z-L5avoD&L0BI6JDL_q1}{Wg+_yHFR)Fy$hVl!H<9ytqp(qOY`ADGiwHl` z3bQdW0&tP-60;upk~Q7$iy+`|r=RHsa+C=NgJ2*?vVCFtcnE?3y-`21rC1M8C<%~8 z>(ML)(C1IGdANwudn;MJh>b)@J)cCY*oCVB*dv)Cxqc+_?LwAMAkbcza)BvkpnHVp zLA{CmEoN}47eA%8At5axkObLqlE0Bu3{!wOpoJH1I&+AVvoOG>-y35K+K@-9hIQ`5 zg$Syhuk73&q;4VN|LCT8h|3uhix7M(ghDj|Lah%UPsP%&ky-e~_n8N>K3sWnI-walhrSm}}18SRmJ3Ek+2c<}UJNF@}FveC7T z-$D>@=2@#4Mt)W00kEU#ju6rkJ&VZQ{lM_Te^p+Vl~0y{Z+*!2@Z@^21ck;J2w3is zx(5D$YKQ5VGI6PE__RwRzY03c&V0m8x^h+yUB$zm%EJz`Af;ifk7AFPd_6`ase}2J ztY-kLY5n5^=&E1=RNqDxQA3Zbafp0g?tm81w};3EGh$0bdbTv-9;=|no!p3L#rakG zF`5O}hKGUP?-r2BG;Y*mLiidNu`#7*Nf>n${XF;=<}bzk`i4nnbMkl;EFL|$-(k@- zoB>;`8F(jAhCExIWXEtNH^f1@vgacvdL(~r-M;4GM)}c0rPe@yIKmL|PLD$OjElsqkfmdy^Oe|X|0k~}QHdC-u50ewhMqahwT>z|6+K zrHe>T2gz`OK-(cHf{2_JtNH%nzA5bemAMRTZrfA7lC#u|k#ZUExi~A}dAs|tpoIit zL&T>^VCtw=QjgjTj^S5rLNi0w0+ie|XRG3sOL!67mumYA;4@LE}c| znE~vKm@3t9)fD@I>O-2~{9w(0Tyz6@hjMfJe5jr*{sqTd=i4CR;6hw<#!p zxB44WK}4~6_U9U;(wApY+RFv}RzeIH4F zo~vgHe^`i78~$wh1jB6CmKj-8iO=@aZAm{>ha00Y^87#(t#v@ z3ljcaN_Gn}EfMA*!3@%r)N+~o(NA~~z9-g^h>{JxNn-J!I@Elth8-XTEtGG<`64Sp zLJ^@;C+P~cZNA{a3Cj)R=9c5MeSR4XHE}!%YKFr^QmN6j0IV%X_H^UG5MmM0d77jC zw^%>m;mQDZhcU^kx>h^X-(00*Q-ZviJ?y4NzZC+OuWA^CNyvz%u)NwNam53<23K3j zqLhq4`He`MQb?8zs}W2t!N16PC)-c_Te=+0qZ7}U4MiHPn*Oh<6Y ztcE^wTDz0x%yk?mK2*QtMeN6`$4zWbcYFDyw=7CBR{r80gs3z4NZ16dfQe?mja8?;1#W@$pFG)7MtbB39;jM3h#6)mI zk}jVXl~w-$(F2h~ruCu8JP`V;PJ;KStPMq2E+xkqY>wp#&B!=LLO*oDP+QKK4rI^S z6VI)}SI<>Sn5%FVM^I_fwTEGvXUvd-C5eX#BLL}5cW*NItB1x@2y6|lZ~uA{t&)fX zyJQ1AWLG;G&Or_gUiDJs@ObU8!j&r|0?J3AFDDjX2Ul&1aEYWJW-H3ONynF(-DuST z++Gkl{Shx6LZo*7f^fT={FDW^WMWGnW)<^;|k|lcr^|o<$zErcxFL10&ORXn9>Lmf8Nvug&OEd68Bpgha zE}U})xJ0Ttq#u=db&?JcBh&r833fQGs5ZwVd1T;8QXuSu6^c{nO+j1kaA_&ZrGcY0 zni9n3lY2y}HX`39bVB`1q>)BAlTE3G0XGGuON&&lkdMaWIL3^a?J}#pREsZfitw`& z0p%B1%6W(cO7jK8l+6rzW~+s@BI4%|#@-*nMv(7Ds&~45mq%5m%008LbQT%O3)bC! z?JlBSHI!Jm<`%@%Bdd?(!-O~MawQ*wRY5R*ND3_dP67mRn$2#V?6p#muNqFu-{*UU z=SIe%!Zl^~V@f2hB@udqUC){*!Ozp!AsA(CV%__*%$8g3G*PQXhC57(7>GEH$kaT7 zfg_;20gd5Y5^s}VjuMyuP%)KJN_ND@6-Xk5BKZ%V@RX+U>w)k9v(70Y7AWsgVCCxUV)K=n@Q#Nk()WTC$-ot^xXJQq(}y zY)H1It52x%NWnAR#TD$x&vbCLW?}afOAje%S;{W2?O(6i( zfeif|t_FF3QdTJ@ci0Fh{bWO@cyhxmQrG#CKI$gKKojfR(-5QEkp%pR7uPDv!auMI z2Y@Zi^i3KYXB~>wCrzObp6j+#XM?`^#^t2Hyp(zGsAx^tltN=fxzfU94rozKj z*J%h;G!A(@)?kz`ttW8}LD{~%BPDy_I?>hd6p|kywlz>b9nJW3)DQk(?+KU=v2AW5<&HRM`hmAt_HMucZ-onvI}L4Ym! z`Smr)4ufF`=c^{bwMYu;E#Y8K@RIKNVigJ*`XQy+xa%BJq|8@U2O-IX?%EZK;}!(< z)rYsVf-+>ctxI(t}-Lu5Q>Y8+^a^JO6@2bI{3Ayj0`$NY&x z!dywCA=NJ+)ru`z3IrTLv&yB{&V;Ryjan_hyQHGp_6xw^A!U1*bnlDwL|)De?kT84 zc;?DRoYAwZl>j22!$5`3ZkKZzEbdGtR{Xu_N*UAm0uPPhp3yw9$Rx+ z`LW4L(C!jTg?ElUJ&i-aJb>x4lQ}&0UvDO|A~r@-n>LBSNmZP< zT2>%5ND}rFDdKpTh??2~pq0f0`2fP!fpS@t2CKOi3kXV+01Lel5OEN%PVg)j@u~f| z$i)Okg8j7hF5DdBOj!29U;_;iIuGGOqN8FMAVUW(Z2>7}k)u%9oBs9mPA#k?v95AR zcreX3Nux6WWLHxC1dm%AxIxK2Uwt%CYIhJhd?2J~S)43n#5hV5mqAz$2`S#&0L->MUj+(`$|+$@bOJ5GECj0#~df&UdNx2KG_deZPjyaGW13C9F& z*-pge5_>R2d>sKp?;onjI7Co(k{)BX9KMjP2JMCy;|I0KaR%Bd%!3w4BE2ZKxjB3a z&f9O^;5p0<%(Vl-C?b!`hTK+W2#7&Du3{Uk%*y_9#=}YiV$?muKcF-U2GxXIjN6obc~tz)wL6HhqFlcTN0xaR}csA9XKx+ zNrfx6`m{WKNEjA~d3;fSIZB{>++tH;DxFwbrCZr#>ydOwQ>{xMD4I19w2OEd^8Lt8 z;IS-BAX;9T`dRPfBud)>gatrKIG8rgY)MuuBe`c zBL2KhM_%Dgz8T@$jjpvIKqt_L43)gO1QJg$f+0Yz0YV%e!m(k6#UP9d63~-~BBs)q zK)wK5V^UE(L_s>e4VX$1Zh!~D@!*JxC(#29Y&YRUceD(tTq9ENKm<}k(0)?&BFr2N zP5nxkW@O^6Db^)w%!=ePj!60|b5pZfNcxd1J!JpZ7~B-b9X?wc8a3>4L_O$y2qVQd z3+!Vj!u=9NyPl!WSj{fe!A+0^iWUK@uH8aojjdhxd_wG9OY>xXKX-_>nMxAn&AbLsC8x zcbj@v{Um80GZq3mA~d=ETZSqE|Ihwy+3B4X25O3cRx8W__DFy$AhS1shU-0xfO*}l z7}Qi^_IXnc|Q8}pJC$nrxo3+Z*zg-%G1@)Wt~$( F69BYC;GzHk literal 0 HcmV?d00001 diff --git a/doc/doxygen/chapters/images/starpupy_handle_func_perf_pickle.png b/doc/doxygen/chapters/images/starpupy_handle_func_perf_pickle.png new file mode 100644 index 0000000000000000000000000000000000000000..4b0f2406553c525cbc31939b547906bf3143201f GIT binary patch literal 116438 zcmeFZcUYCz);&s8G$P0`iiiqC1p#S_f`SyI0R@y^1SvrUr6W}~&5BVJl&aE1dY9fo zMWsoR-a!HBeFFjlzp*gMIp2NGeeS(~-;cx?+TQQG-nr%+bBr<9eIo*U#(m6O zqM?!f{WrDiK_eZGI%+e^C6jZn#&17J&8F#d&->#2{DPK%?zyxh_$ReK;iHP%Gx>@P zp#p7X3UAg>l(~K0v&d(8QSQcl&+lHPOFC#Kw6=JD>9@0$gIB?g}XDjo))OF?0va^r9fB(MBC~a`A%S~|D`o|B` z&@}y7e2+0+=Fv_=-St+|Pw9&fmgoLV)1P*pk8bnk#?Qs~n6_OfF zE~mAW=hD->R_`4gPe&%jvgm(sl5YYjZc?=HZbY8ykz$ zN{ec5-x0f$=D?cbgT8)#kv%0|^4@QYiz~AC{&=ky&c9OECH;$$k+Jf*usr^#T3e@H zymYA{?TX_3;-bENTTV_+d3m|Y@4w&q{{6dLm^RIg_=%YrKMwM~4RPw1VlMo6r4x5T zUNdcFV;dW|xpn*YsK<~06)fqJt9DW`uR}l<_nkYENkfwq-MvXrR8;iul7EBa!@G44 zURiXA%?vfjZ`ra%=-JtYFCKzbZTkfT6qJ>frzR(r14Yc!?S>mqwmtCj3h1!VBv+d8 zK|Z8{pGUT}uwWoGB0?o`D~(d_(~7M>uEh#)W80p?hgHu7im2=8=!6Xi@M*m~bm_y@ zsot{ILYKVfBIa=x9l4@kwRo)-sKyLz;^$YE=ZcbLS~w3oByHPufm+?3W2kEo_VVS+ zBc_cBsRE~hk2^GORTNh?D$!3fdV~j}qN>U%yJiiF2)7@&2 zv7cv;!Gi=1F$ztSn0prOgPQQuj57nD!b@ikU#Y4;#V8RGcIuv-_t%fl!VN3@jXU!l zcS^bDbCj86#3_bKlK*ebFx4EHOwfqOzderH+S)up(p%ga11NJ3GU%J#geMN}pddck*6eHH{&;qRh2Kt+=u9)Jq*e$PZEN{@XXDn(Dd%@f-D6}geA(Y^_2tg= zIB(3W&&GkmrcpeqQD@2-A4H#h_EI~;q^zy&0%B{Y1jY8rV&K{R1-OHuwyb0Xv|6+R ztIK>}fWoL7{u`$n?dk07TpuaV%&(JOV|=yQro*OMV&}m>Zq%g)1?|3j=g!qnFQvV! z*vEtq9FP;tpSZQ2kuBlHi(e(aPZU4!^z`Qtw(Qi|?<0$75wO*l9jc4=8Y^0HEty7A z(&xXBsrJVoFW$a=Yg;Wb7j)FjT6r^@IQP+`N0+|!cqvp|seB$;T6(6hua7CW>w;xL z-}T^=r%u`R2Rn!*R##Ni9j+D+FZbg`KKM8p!t_+sCTizV3qChDw}u2Q)qOG#m1Jbx zFS)#VoUZRWAG!0$)n6?wEm_5E&uGM}Un=ob#}?}wA5RuBZ@r48R4^Aj|LR1BNrQK2 zXz0T1XuN8)g6@RyL2N)=OheeI4f6{Naq96JtwnA{ndWU#{`?mX{QBz&2@195zD?4i zLzj!@p+kqr23=a1nQ6RWscWBylucQl?^h3(VQ_SEiq}drTKF1hS!L=_=FK@&AM1m~ zrH;F;T=?y`-xjEKiorvR*zCQyC@PhDvbw;gzlt)FQbK*VRWU{-QjU&+q2c&=e)z|a zXBk+<)NbCq8K;|<=`W~1)?8wdW!n78fGl}(a`I3^d~{3NmE##sD@$|n*gcfdjHYdD zY)J>yey6kH` zd$Z`U9v7mvTnQc2Us(wFS`{4M<+^xI-+59xCHWc0xnME%rNz0X zYN?fk5D6!fnis!`xh$k}@a)<1n?t8#^a6D-9PwlFZQ?U!w&jY4xg~yLp=)Kie_ON5 z;%F90$6bXk9_`~9O?s->h%O^5>OP<-+bUcIT@&uyh7EHE%o&uz(Wv^7J16SF`)UMTrW zh~tOYM+pgh1ykj`l(`Ov4AUk7gwnZinMbu>D+4pGPhMR5K3$zVoRn8z;N-A0+h*w< zf$eb>&vd7-iLd=gbC`f`?!bFyeZEhoha9Iap9?ys+?Hh-aPQtdR5~_^>5qq>OE_gV zChI4!-LUy;ZG^1nsMPm~Q|F!^krgbUde4rugu5+IhmyLOZuR+Xg^2^c^!(5zcRi<3 zVe;lfjfsJjxv|k>C2LR?n|i#Yk|dmF2Rx6D)pR&?=?*Mj$HqHw@ZhV`X}xT#u#+cG zq6%q9%r;-5P(PivL#>U)3X*DKQ81%{Uva^M@%CjeZ;Vks73w^7-+sI^QPyYw;L7s& z3U#uC{dh(mWyWM_d2w93fReFYKqrQF)Ap-xZqa(qFy~sxTT3nDTSJ`%c?&bc(V=e3 zTJrvUpAfD`OzOF#N2?0DeiS!rYZcq6UbmSxORwSnP|Lc{#e&1{?sJOS7$tKIHmA(l zO?2BJUPVXS=9E}8H+4TaNTc(Z&ad5jUc{>V#HH6ae)8-O7PAv#bNhZRIv0DP zr(WUY$ygN8Y9(Uh{ZxF~XW5 zt!1Ry6?sP{QqJ$;$cS0-LHd1?lATC!GI*9@qi)z+GT5+B{QWfs78HWTSdMmH53ftr zNYrNO`S6H0^{b-f{5#~K2R-xT^<+PIvL4@E@yph&3|PXR(kJ^A7CW7%D?gwT(HEDx zjXKSa45qu{1{C_8l5RCEBP8tj76-z;un0(`yDOs31sc`IYgSKwUl>k4ir`#1mND~o zli&{Xw#ZquYPf1BZrS`+~ z-p5mvRgd+m8h&D%BICI`_Q}4}2l)BV8Kvd)9ycGzigQAgRVnIBnr6Q(SzCB<6eUt}%cJiqD=sTR^pnfA7J}oTp#m&o5|8;oQ#7o(y<# zwZAG@PFD6By{ywf%^u;aHQl!s=LYLeW6A4lBjxRo^h~j}hiW4|fFbERmi;eSC||xD z(PKA`9e$~_d(D-iDsR-Nu_LyF$Q{wfby3FnmYlr2s)a>TYFe6~pI_hAl8TCo*kIIi zX&@tjU^|mG;6pjT1KtrxRzXe02>wik;^OUnBbS)yrCsc+KiAYKm55$8G8#fAZhB=< zOszTP=2j5+)bMo?apTbW!6uLM?i?n80CZp7d@gJYtY0{CKu~Wn>=I$nD zlkeXq7rJjVZew9_)Dvv~Qq|Vcp{pLJCYmfMt1zywqZ5QA9)0TG=E0B84yU%5p-Md)2jf4D_<5n%BTB+wn4hY+2sHkf4@|y>84+#`r{7|AyH-;O;C1dQu$K^tkU!!oDb z>6P&m8*=STQU5CrS$6jQ>EWS@+ZnN6d_S7)ki2b|)8Gcz@q+K69!%_l8Qd*yeAziU zWe4e_4X;#hJfCKCZnP~+kyKcXB%RnT`(&;nV72q?6U8MYHknVi_n}WKb>x*NHJ#uc zPj;y!KVHr9rY6mfJO@F0b~m?`Uou?6ntnxd(vfSYnq|>J{d|u-&ZHqu%y!^$uHEqd zz)Fpmm##ZH8c%e;c`oHvIF{YBd8uD&Mby&sLmKV+^(Kq6quOXeG!o8-p}ZKE`|`AA zng`yxeVdbMMT}kiy3)$G5-BS3&QN549)aN*2F2^~aL$SZR&JpzEG(2?77}eOEiHYF zr}d?$C;06VS-PKp=Khqky?^BSCQqla)LS;(JF30~J9LufE2vd8E7QYx?&3Ck28nOo zw;y((8$qZChHyX@L?#Y?EU+K2aYek z5IXGk#GtplF6}vF` zaS;(5CDZBYM>sh+j+yri569~jP+~Qc^>&I{M+`M3CzFlHFSjEhBoW_OKo$@7$Nn3= zwFrQtAZ*%L@69Qn?mTbZ0eB$oo+LdvHRT;K{dU+!e{EhzlD^wg!?ncGjy%n1h2UDW zF$vFwP36jqwhK1iW|WKpM=_X~)v1E*k7w?AF>7vOLJ1vM1KPlIA>vwZ-n>}=LJ-Uj z_3bJa6ckj%GFyFQp^P~{zKixTXT&~_#M@?8v2D5M&Yu0~__>Wy>bupOOhJvSS7Y|> z-AgTDce{$njBK<(-NP5sul2~-4b_*WIHTY(kJ$0a&UHE^hd6#^DOg?@Zbkifr)NcrT_%du%5^Wzv-7qFnN$S|lOU=q}DO)D6~E z1p9}I=hROgw$^jBV z=CPC0yw|)crt|0YT)VX%7$_Q8SK3E-702g#eLBLN@0I&pRdE35OIhflY;&kLn)$Y-*BK`PkwuP)a zC^h@__4Qq*E04x0hr6RixhxV$I$h(BPY-ElnmtP@TBcM6iK@oN#(FMcqjF;H3noA8 zqVzsFO&v}y8oJA@mp#x^^6uS$`!0(bGy~MvYZ-AhS*7Mh3q!n1zwyR#CY2nh<3A%QH@ z>Z@2*cet-a;OHGbe}>fajTINQ(D8+GFi9;0H&DL4bh4v3-QyLblt(hty*AADv-^oA z2xMODZ@1U?SGqXxKRsx0OU!O4!tHwxd+umvTYa=5JGlqX8SDuPV0t~OnXqx~?<6yW z$5|2m>X3i_)2C0Q$(%u}5|2blLF+U#l2$F}onKJk(cyrNKrR!H!nM=+A?N4MpTnB; z-E`4|s)OvZC|a_YtzF9~{$kH5_p3<7B%LSajfq9Gy+=K1xJA2(TE-O`gG;1A?#4pBk4lN&{XsXXL6$Z)j%1-ir2X;-Q>(DP+nd{2j-DCKu^ z{RFb;j_Y9bxFDo1Mluv_w{S)+YJ{+x-&>=E-kQg@Bg}1 zTIQ?U`fV(t%7nHWYIMnaSiR)P9A{_@K&+gMro z+1W!Yj@M*$$#vw(cRBB~8@6{e&$$7pENk=i<0owPid$`u2AQfWYrVo-xyv8 z7ckT7qc{Z4&C|dqnTcu8mCrnK!D02XZ+KkJ*grV%Dd)kTI|MApr|&%yraz^n<-}TW zOjWh1%tV4ic52$=NlW{0Z{DyPia0#>QcOJQ^(3TFcXBf2!J|i-e{^$tT5Y)ZMbdJ8 z5)-}UCN*1H8wY>e&5F#^;6h>%BXMXQL@e4#2@D9`b3gepfhH)==&7UhoG0IdTMppn z<&~F}l_eo0?EG!QbnisMpL6?Gudr!vAV2D$D$_5u7dksXmseD%8+}!Xj7nPM9cX_M z6tva5ecO)alj`xaKDbJw=||vLMWpVnQ=0u*;-)p>Pp@6OmfGgQUbLLb!P&QRrZz;P zs)&&$L>OX0%&9N-u?IYesiSZ zCs?AylQsF7Y3ANeuhlEm;a#oS^PxOOmwUbV{GWbHHOnk6Fa4(F=%^+1ET3sxz08&^ z#aKNhC8Z0F*VpOQPUh#oWnvN4($j1D(CWZ(zb{u(p||vSa3dt-g!B>T?D{;1 zPs7Pi^!`?p1wuMP5~);vxpTVV_4UHQlobaP-j9KO40^rw#50th120Xle$!sP6)0_7 zg4J>9=|!ii($g!k*#7rUzxw!9A$Ke{0DRp3;?gH&y-^7X1#)Mdm7hG>Nu0YZ@vj{`F!^8k2jjoR&VlqWb1dFw?(ke&6r2=(rdiZK-+jiwAIS{Oi}- z4XxJGSV^8#d=bO~bb&@^aK|OqzkXk!q|ZHwrkf6mz+Ydn`bopo+mkIg{?9)@Pt6KY z7UI4?wi(vi(;ub6AX@PEqMi+-eeoTP{(%<}5#K{!9_;%|gI8K^Hj|Y*8x;5Wai>Dq z73DqAd3nGS!Eyy$(OC}JFTt{e%(2h1=OA|5sxzUrA)$ZE_eNNs6 zZn1^X2ah_Ec3Fx?et5JJ|6Di}1gedD60Oy_@MtcjDxF8768HWaAzpq;u==O9>tn7| zhj{j@MxFh{tnaLB^v1XBaxJ>E;9J?X2&&_poSc61I!OR-YAPz;Ur=$W8Jv3`ED~r>z`Pd-%3r#r- zcFuw9tz1EF`aJqBGk(DTtOhNhC@u>eiVqbrMH0tXBAgQg`EMy1LB7wl=6&Ke5H>L?hddWpi&DW^% zd0)CBIQS!m<=aXqvV=s364EoDL$@m=vs;`IY&OE|tQydaTC55u6r^~AVs`?RH+2+s z3x5O8$Fy~8_>CJhXoGcSubeA=9vB!8yuyoei3al%z!OmXT*Rxei>j)sxV3bn%AK;_9TueXCA9;{ z4wR50s-{4C=UefW@6#srF`pbOh)Mt`m{jAUqM~x-YR#|eiQ4Lgg@r@{Kph=VuVi=o zPB_TY>S{&ENDb(CS%*;^BX6$V5J#CAnwp+g1!|u8bedgsv@6Hv3Q3y?@jD2k`os%I zWov=;;@Bi8+#t=NY2>0pgSV3{aRsfgQ~dg?HEY%oikjuHkF$YR!?Mr3l=%jd-m)R3aliEDATfQtUeRF zDsQ~-LT?f04@CI2ug|zO-PqHf=xmBoOZ6#*uNKXV-5Kr{vXKbCVS2C99+1{Ks>{{s zq||cr$z399jewP- zC_adob7=|ERYQ@R+bR#-k`}$5VP`~Lw4%JX=WfxLArekN+8V7nHqo7~i{=IBi71or zndNmBasM2$)BV+&$QC{GkvovLAlO>$3D>pZ=)!?UtI`0wH zk5Dh}n_0C7B#Gig80o%>0MqIa34(^JcNNZojBUWRWn3!ROu7zm>9%)zT3T8FaT-z$ zD^~j(Tu~#q#c14%SVoG~=aWck#)$prhp%`-1tq;wECQEw8Z(tgy9V%jLtRBhmqMdM zUA0Y{k{aAr7WEU)r<{b6Dk}Jx_v3icN)oPPIMA{X+s7pWE3BB+VIR2seOfV5H&27C z*kqX@J?~ zGL`)KvMrMBeQd5p{XdS)x0%5mMA#UJ$$;k>CMv@Ab#vA zI|J^NZxNI}y)5YXTM!tK`!8JrnZ?=aw)}nGoulXV&9w>{_uSpZLdbvgRKf1Ix9fx> zJre$3?{ctwfPgUG@gelV;)7Gk!tCtqC8qw5AOCSi$aP_mD>}FE+m}tz_H<5)RUJ)W z|2a9|KfauxSCCu!9B|8+I^0w}>0J7Fk3xSd`7~klvpOm9b*XTk z?DBr-vN+2EzlwW6K)@v4<9K7Jm|ZzM6n0`=HsEzyCZ?wk*VfkVt^oUI^t-gQXp$*z z6(xoRecoXR91yuI*U{3lU|TAflfL&udyb7@P6rdO#tzs#KEOw?dw7#x5!s)uGgRsk z&_H9hBiLE>=zZ?>%s*r~{1CgSYS2~Ug28wLz5$5<>)3Z5V(^i0a9MXV@dOOA+8(`r zeWC~3HS|);-DQ5Bk2y`FcgISIdQx3o-JEXhqk0@^{P6nCyRYt$`83y-BR4(18qnS4 zz|0VQW_O(S{3vWcUf9%}9_yb%$yk*JT54;Wv&OgV6P=g(@a*tC!DbC@?R@gh^z?7> zN}@@e?_E0Sb$FltO2>T#=yMx0v%@Ew{r{cpkJI4Sx-c^=|K4`9Z7rT#qE?!WqQs0Z zkpj_I4?@Um#6Go!uS5Fx-^o_xG#%l)GItB|r6zbUERKL|YuXhru%^5|WxdT&ctGW) zr8Q_AW1$A1jTb1vy3HU8Ox{0W8fwjmhlrwSCMX~!CACUB^n)z$w1I_4%zoslc9zBH z`SG!_q~_-HM7nP=LVE2R9E>G>HfnE{1OyCVVPdo=M)D+f^xPuuyTKV14Xa88P^xxf81fL9$R+PXyxMK zc+v5y0&H*3K5*0;pc@3f?L_?X96VSvDSqUL8e+>DE)Z=EtAY{G_$(|eq@`6oj`Z>g zDcrSebgU)qG(pdx(^-di9l83VyZet(d@WHs!<$1|Mka1Zbmf&e6bJ$;NNvSi6Iq0q zNrK0k-Ucn{-F0`D^9EC=bJfM$cBJ_b5qeQ4KB4mAP9TCrqP{b=jODEOpLwrO(k$cd zV=tgBcUXH&AGI=xsxX?=k~|1 zyYQH--^^Bh?PE$REiIdxqAL=5zxKwaeZ)uxxyb~c0SGZZEoK6fu1<4u{k{CmeFbw< zZUP$4`SmZQ%Q7LSrx6VV;z@a3o$AbRlO|F{2Eb=zL_`eIIJ5o@4n;-9=zb^y%{AjX z78VvTOdD${C{#%|DgTQuxT7&=R2*zNk)a`~M`1Hx1!f`&HJj5&%E>mqk@l*@nTx2k zgH^}J>mg>dcc2O{;Ms2gTQ!N@V$b2>LOf%S8O1`uB@O_Xp!Nh-ezgs<(GLfsL9{j0>vBioyZ7!5BWH6; z_S-Pon{RUvynOjGx43w_@tMIjwA1~1a(SHU%`&a@ zY!?4&Zm8YxO4CuW_z-~Q2+;+>3}jscaMyHa(U+>UBS~#|OW9g%BDDgFU9=r;Orcud z`iFs|{9e9zd~KbZ?Yf}t6*4WO#(!}TpFcD59?K5Db*r_!c;CcZhV8{|$8|n^Qk9Fg zqRIXMJ4e%>3p3MeOTCX}MfW?6%ND!K>gM(e<~4_eaH=0n?#KQUJpH-_7{B2AR0UMO zPk@x_y@jZq%pHsB-WM-kBv=D7(dlEUAaT*E#DNU;jsT1V>MTo9gr^W(R#59Cm73o@ zxrvg3&NCJc7w-truR&S5*grF+`@9C8oM!t3Ihl$FzKw;ly~Ip}7yA%=t16&8rz|u; zcp!@YNG=2II0!}YQ<82T6-~B&{qAU5Qs-P5=qO3Si#yHQvz6hW%)_AUMDD{zF0ARW`Eb#ZANm8kH~=H@8)4L{rhi!o9ZE)r#7CRB zh8V+Kxbv{l!*I^MAa)VNn80;5X-C+jj-X~YBAQFD8iUpbId8vHFCzj0;q2mKSIls906m0;t!+BdhoFp!jY5I?zoGRD zV|t{JmOPs*fbid29EXVZ{=T%J6At^uZ@1yeX`uiZn;4_dQ3vqBuy3 z=haLiHMcs@DfvcpbhPBcz&;`?qQUsYo1Aa0$0&y@AijzH`(@B0YC>uz4n4mC9lFC7 z?Z+5)A5#KPr$IDYumy0!mX$<*v)uCh?0)hsf)QTGpUv~2D3Tfn(2WQfy3ZAIY+QJU zq;ob=q#1S`Jb}mmDONQ)h*IsRhKyYgMhyUEL zAok+WA(5Jg{ygE;tAjIxbr8G&)UG1jpy%b{IvdalNKC%limhY6E)adWCOT*sX9STW z)M3);akRPZzuu>8RZYWQtp_$&hxv#kHLy>%A3YpnM~MuiPIfl-1^GQRfWdH*s=QpP zG)f~5s~b0N{8A7V@CE(7s+`;dL`@hmeT1L((g5=*5clXuQ>PE}oWxwa4XBLgJzY1! zi0Il5Fp|i~woYga7jt#KKReDIYu>igwCBTtg_)){yD#s4nO|ts=*Ych(qIzMnkAJa z^T|`*f7u4G6g1dFbLDC_Ji-eceLsup^l?zY2pN zZzfeBQcbsPG3Ch0=G8EkhUY6aZRJ}$bH5lw1WkMUW!wFQg?3E4+m(%MHax3j?d#d< z_~ui_jj6coRcnsE#80a{GW+0oj)Y=}Z>D9n+itdymD^K-hYl(8Y9x^Gyquvu1B>SY zTsjs>Q7gb+I{4VNlOD{18YlpSdccMK0#HOer?4$l8qQo75Iiun^fS3C!%CBLwV%Rm zn6@0!>Fj*(7Gt=>vHW8@r-xy|zJc@>Z;ofZ<6Xb2$5DM-aFviaW;f@Ffz#$@x)XAO z_J~8$5xcnQRHyN!#mU~XT3;T`3=W;WfXzfLM&S7w>;T*MggBNfo^#bRslkE>?rH&{ zp*!$B&WfQj@Tex9J#;v~i}$kZnV4o=*Wt2mq#FI>j}B#~fMwv`*TZ%CwQ-7>l7+>C zv-=ro*Jr9+9r%NG1CchI94YGqDly8wG^19TIt)+S0@HQyqgs5G{SL!;NY_{dq2OYy*TbIQvDXEc? zGD$~o_W8W)0#zqtzG#lQ}cJuv_#v1TGRnKXbUR_Tww=Op*=#pm%8=HE4_T-9Oq8`nO zd1xTyk>Vpe?m^(a!xAp{Qd8~A4n{yv@s4m=Wb>``=Yyo>-8;y#U#h5eYVy8sS#RuC zE=lF{%?Ibd{VwYbgr?Il{S%F^#L2&vloQV|?V&NeB{JHw&a&u{ccp#3v5U+HMbDDu zWq#Pi@_QU93--asChx~-vVT6ZPJrgc#XVS#A0IlIn;5h%)Z9+%PT^oZc2LIN>3LPq z{3-i9Y46KI^V^+*UcdH_yl{-B`}v81jcO+#v0UPQ{Pw!zUHy6;GQx?E6W2~P8eL&0 z8Whj*#T#|j9)^iMDan?)!lttY^u-ZDL6>D!G#c4PTWpdqUX*+ss{b(Jdv(&^y+uF& z1{yU!3qxHr@*^z!1L>c{^W>b~60cODH>_a=)gLeHoW<8&q|N$zN;N-qoX2n%N< z$j43)^EgS7ajfHVg>x8)07=})IqJmUp-(REMtsS^Dc@%~@~6G38pb;2Os2nbFHM|E zoL*CH^Q zuV@8);n;gI`{Jb@rd_*|pFU;ekUeyW{g}V>@AQH_bdhL1t^ymasw&FZ&*cdr@mrSD z;eP?S3RYN?dd!>P*BJETggb^|0}WadJ`-&Wupe-MDoaKI@Go3{u+21rp-V=Ik$r!! zotZK!$WSfn-li!l?r9?`O?*6*qnD4SLMsNOAX#zu)QN;?g&K;rn-nrkw!JfLOYd zJs$&)DiW@r7@q+%WZ|$UOsgw;DHSRIN+TZ>Kn_yu3DXOTm{fSuT!n;$5S2BSo!Kp3 zJ7b7*+(|g;^lFsNtF(zHXr?bXR#r8lcZJX}0JW8?GqR zQLe3_vD!n$)Xob)3|{z?6Dqhh6slySyEMx+^<5&-g13yqyYSzoAc++cb*S1V=vO^E zZ()GNYQzD$#LQ@$rmL$f(LV~mPsvALN(K=NM#(=bD+?1t!jP0sGoFf_2)TtsUgyiB zx_dbCdr01XpR&pCI(HeAwXH%nFtYh_aP9|tnXDJ57VG_eX{q6r!70*V-W}7Q*y)Uh z;=%p<$|p|T$aNS`iijvGGI;i1^`DDQ#;%aW35!e&JYZ9ajDz0l);qE)rDw6tM8hk=x+Y4GK@UJI42YH~1W zvc5Lg)KYMAQ&kVPk7j^?F8LK$NRR+375XR0{sW&xG%Cn@!mvsF!l2GT0*$)}{&9q? z5Pay@&AU1kc8RPr?XLJkLYO%Srv;QMr=UO_Dl;E;S;oM!YN%=@+JteE z$mnR;+Fe!P1|(cPW@(Pyl_cr_G{34g2iQ*lp7d7*1aPcA9MOCJ0KSMrNUH+^u68=W z(ma43V5=GdZtLOf?SLB%$nDAOSVuge&aj%EO4BDv0s-Y@3?i8r>f1Xy1n2FDTw^8@ z2)Tv}4&c$wf(t+W^pmtjC@3akqXMH<*xufbsj4{>G&2kEM03g#C4yL2D={L0X%{kb zKuBdfxamtpir>CX$DJVJ4+sfO<nSvq?9dYFZ_ZP^k= zL?T8B>SIha&U`#veQZ_=`T}7GT`)8AzEA_=8=42TNx^1XXMDv3#5tSmHvZQHg*0EUu5tU9b%GH!@uNJuyJq;V>ASv&x_Vz?h74GWDycmC|AJpZ+j!U~G2h~;XC462|* zpyK~cx?kTUx(c`fVtA97D&0rv^V5WT!xHe_fy`?NEZC7yrSW}K7p-}K-`rzrD%H<_ z_BMxcf7J!wKqqbQ-p|Tfeh2K-uMYf5fa`dd(*C-xB@=>tL>&u(1g^c=@_yjWMjCE& z0L3jrhYvSEVUd`4y*9VU^LW*TqRVB!5JxaJ264EOb`p_#4$fLtVE-ga0WxuF>a73O zHjsy&A;CO6@xiY0^QSr}6o0cbXQhf3tR3d^y6uh9v`98YQLaxllwW0{CFdiE2M;ur z7T(j1VS6W;gUh`Rmkf$WUcvA^cOV%~hq9YEp$T0DL;uQ&`hKnK= zEh8C$1sM|!2U}U?=v&s#P3{{Q;J_I9EXl|!7D1Xy!_u0%ksWpjsmy z(ew8QkCSN*uW*b)qQgcj-^@czZ!yERQ+`q$x6?fgsc#T|qJ~Re5*lWS%qlt7hBH%wU2dun608@g^AG zShO~|Fj+#O$O{LqsXbNWPGj1WhC& zxo}~~TW__^d)=3!?PE(F&;bCFvi!3(4J1D||LBI%R=ent*(1?*v^S$Ua&l;642n55 z6ZC!@eW7#*%74shFE;6<56u~+USk-P1RlrS^1Tp zKk;c|b%0-nCj4d0F70c|%Eoy_=L2h1S6I8n2~{Fs6(CWCs!+l@h6 zZOk{Kbc0*2`HvBYLD;lc2L)pAa6JtaIb;7{Nge%aD>?Ag?*1u&1Dx*hs>+KgMAKD( zYqFyQpfDah^4>OjM#d)412b^IVsyX`*W&}X@Jr%0=yIMum-5>L%+OR=KZY@gpo!5$ zXbG6L?oB@jUO4O~{8KKGcup|DazoK3&EtKiHa$KN7ofUB$-UWu7HlySfm>mIzHbAS z&?AtS6Nts}LY4(#3dn3Eh$J~rkxa9emw0G$5WS64ifQHN-z@THwZDXlMqt=NiPUl! zT&*)Nhb`JN6ML?aseoiX31}510K~(4z_J2dD-_(t;3;cUsh`#C%uckrQXl^kRbXzE zJWZqj0$Jk6?6Q)~gXaNt2k8wBM^)v@YoAu;2#+1r4V%f z<*Qc)=ykj~(7ExaEM8ho)(9QX(#SGm*6ZqcU6(~IQ0K?N?vv^|xm5sq_1?c#M z>brn>3Aow@v-*CnqG-HQoEneYnR<`WK`j-4md1R?X}CWWFzp+MOfUph0#iZZU<6M( zIr{(prY4m3?0(ghu&x}uLDjBC>WJe3!6;T)1@qvxD&jc@WFqXlcFM)OKo3zcqik#G zNqJ?%3m=yJrx@jZpghlD6sd;rQgD`E1*t5(`B5H*fh zw4aBbK>_)RA$g9zd&Qu+>Ty}=@Vc%Vt}(HdVOX(U`t)geZ{o^Uk5!Qd;Hkrww(oSh zmI-c}8$<*=Lrn<7=;fuwcmNDy0+zSc^;I)yD(=%;2~a)tCSYSy@8oKYi7Ylc4hB^n z&5?-&pHG>pk&RPBobYc12fcp%Rp{ANbeybWSJGB4f8VPyt!w`M_>MVFMZj{83Eg0S z2-GIyokjPaVtL~2$J%2dNjLm82B@l-ly>gv!QU=@cz8`*5#v3`@A5i6d3pNxSJqN> zu+O0c2+%RG+R82lsJ+P#A+q`IpRNF(&!9~6f>ejRsrF=%;^+tqgFf}prGM)UC(`~y zZVdlYWL>)&s6K0<5e$#xs#Fvl$OP^^ZC&On~%#;yAYFy)aG zVW0Y8vWhEX2}WJW>;!Xz2E6DTeT>6=Ov@BYP`)tPlfhJ0G7Bf~Ob;*x zIy$e$4pI=AT_VGq+xeFZJ+myk1bEz@*1z1QG5kOEgmT#LyRL<(xc<=u$bikI=d=Hu z3e+Bd0o|c&##1LxlJV7e_?k!y0CQ5$#oN^|H4)a4SbA{Hv7}s52kR7?NC0K_e=0(y!-b@m0r?&@HVMW9yo+-cVxjzSn zvhmmQK=|ilQKJcLB-LBa8`^LTB9NDhD;h!)vWwiAGc_nqY8Z}&LR8+?ws@;#H`qG> z^~e%7r?X@@7U1m&yb{1BA3m)ej|#M?1$HFVC(eN&P9IRIQsdtM+f;y6;C~xELT_@D zv`2W6=kSn`_6TdclCSre^rsYPe5maQ5JT~r$%P3@!zm?fb?L@+Y8X8z33>o=L7{j) zWEd`-Rnt4n>LhFtF2wPqo?)VXnwuT?z_MsbgMcHD{n+d35D&;)IU)IQDWsn9>*i9u zO{Fft9AF0!GelfSkTPsB2xE*6gzV`ff4ru_LntV90g_0^kq@gH6o#hHocR;!E}S%1 zz+%Yeen)J+o&~S_aSO$wB~==4v8ogjK#%4@?An)ij^on6X+-&ohY5uAg#e}_^`|mk zLyBO6s}6;!qO$TF8N(&*-tOIeU)eaB!<_zc9xuzUQv4_Zhz>y8a)JWdZbbhMuy`C4z9ye+Y(t;nqPgP!=^TMEY zt>~~ftCxgCe4PELo9-(K_c&Ub?B_4JEY>PT;5-aqqG3`-E4+YEUcGu%Vd4axxwXJ) zPJNiQZ;<5+by02@o;oBIkmGzjZk=!8OXG(54;zcu8uFcFntR%_>Ma!ult_a|cdPKXX1bVKGVG0bzC1P*D2i7yT}$u~<` zQztRA{W#MsHu+p&oN-f9VQO351M{w~BH8;x+&kiCM{-X5M8jQuR>t_;<_$WHahEK@$;WyX;^CrG)gJFEM52_<*F+S@`lVqqG`bSDMMInHJHlKN`|BgFfi4dCKs&wR)H>vbr{^yb*+MD zqzb%vDJQ?E4A&hLzJl6XkqQgNfU@4m*RM5DkQ2JQH`CFXa4A+#M|X?P*|16#?I;}C zOq2cfQyqp6N!JJBW7?mQMd)|%XpcDx=D!9KR>a%K2S;)oCY?4$YR<*(UXh!no;~yQ zC8k`Zle``VC4hxw?hr^SghOl?6{Qh9;DO?Ucxd~G#Q@V{m$U-h>EiGt$fzfLhIYuL zgCu#QY^yXH>e_&X!S(B00K$y(;R+zrlIU0uaP{BeD7!rMB+KF4v&y3f#GP?|5)|-) z-WA3_{uy|wVs5`l=NS?b^D`M08Ip4kxiUxPqX*~z!WkYI^16YQ_EX7WCQUrTcELhI znap_wSq>4fBVM@w^s4=^ZnKy+T>?p+2SFGXJt9_|=hneA(7y0vj!kMCh39b>4TwJU$M zJVQ}Xs{MR1Ep4mc^W3rI7dDAt?$3Gm4qwp3M2>oV<8_EHn9AvOT#Uk8h4qvZ+>z_1 zt-y|MAqOda7^tUFIy5;hg}YG_G0Oxosi1!5jOFP8;~nm>Ty_8aW61NmChTzBM011pPmGsdkrT>K#EdKlPL+Zeea^&hJ*<|gsi_>YAaV_L$OBM16f8QO z2iBHb*e;RJ-!2uR6`JQ4&j{^IO*kSMgn90H*xY$~n?CZ{*56N5*xhQ-jX=#|I^7+qrK0bVSnpxHz zn7Bvr(x6%vj#N|L9JA&l{dcgrDv|04XHDpxC3m3ccPHF0nNV zNEB5;ErPOBGl~0PFfnP|jlaU3onH!w^-`zkG%xpFc64OY#cgo!`um(KnsVWPhPm2! zs7J`C*b>4lZA&MLC()Y*_OIVDDe2UUH<;Q{~pSrc|pZ|N#9vaslyqGiGYgS zZL4>6!}|Tf5jn+`vRJMwM9mwcqkjm*wif#hYN=jGh+jpn4jvuTGgwG*=2`bic(pS? znD|A7pBy8DLe~I47U90ox$qAEBG0;2e)R#|crb$T7diR1h>?+f80nH_VUa4o zJmqW9du9q6*IM;NO5Vq>Bt7yndfiABV4*4Jm$a0TJ~43LdKA8m=SPga32{m+n0R?| zSQ}V#g;2>v6jgQjxIv89_m=qx|M42Xol`$?dQ}jnQ9EMq6vgLXi7C=S7VrK!`5!u@uQ~d|7dm0l zT;&-+AhCe=nr4er4=p=?EWYWZ28!DD3JRoj=%NbGo)w+JT&ZcP*;ovE20X^nYC#!@ zBPsfjC*PNp91s!FyLpqpJkFWMO6#xiLyqEsh z?ndtWdkS#WQ1^$PdE!2cOb9XuMsVjeEvt~PJmrMSx{B_{sSUy$8BWZRqpbGI=1z`x zd4;S~F|U_=7EAHE4{Di?48#d45ZEy3On|Y;XgZF9S*Zn?!-+%W2$5~5dolK6_KzH{ z6hg?de{1Js#?Q{OL5`ehvdUoHwyhpf`o>UKfJW`=UrS=eg7dsI5R05-y6i6TX#h>0 zM=MJFl@*!C{#yMYsN{o#7gp$w!Q$DHke`3{he{r7Qon;Aw*e)&w4);#wlB<6(T8#A z$Ut-gI*26L2&P`*(?X+HavEM%o}Qr}L6XFule1lmFL1YnXkA@a*+H|ZiJ$#joYAiA zuP_^X0SOg?`GH~UpJ-0dLhTN?hKY;;44$i@0fqGFx@uX25-K1qE$wH1?XSqIGCD5s z!#*VQK|$6LW>d^-;*_+G#2@w{4xh5b)$6o6iPzkMmJ%+RA-Lrhz?QFqW))}Wd^oPp ze`xrzs@eV+9&n=#0`5vs)o9d{_4ZOs|M=CDoR5J$iNwV3Q>ONz0Tp9g_2nJz%8k-A z$Jqbg*k8^k%8F5JllJa)uV_1--)zoMtfbLcSoHPFNA{$~AA_%^e+M!0BYWsh&2VQN zSop5LGUn%jLF{k%Z2;&s?q^9^_x?!+sA+kBoO{R3{!gJDpFXMh4?x^qAZlZ+aK>5_ z(|gDq(Fkf-8Vd6BPnLfr&_QlX81@;&h*)xX*TkC*NKEl3vr^9U5gt(3Y1WJ!u!f;` zs(MnYEp(so$uVYTW(lMM_w;~2+eYLeoOz()v7MjVZc`l#tG}VHb*#Z(fqfte=Z}EBQ#Uiq zM;bZi9|{#3whiCFz@(2KLw&tdRrK@>Qd<<{%KgfE zuVylY=G$k7uaF+vy!q8{$jf3jUmubgvcK{&xb6caFT34Yji>Bc-}DVvej8|c`Q8hm zq%I?q4-`*QrQwbi>9PJkb@|a&PQClJKhX$0tPWJ(0()!Uwts$kTf=SD^6~dDviLSo z&aR*1;N~@L()Y@e_nsZyl?1URReSp0%NZC{lxi5P9TXZ=5QnG9KD?O&*<6gCXjbsgX|^c$H?zc|N8Z#KH^ z`adQ4Vgs(CY5OV{1UDqLUR99?{59Nbbs$vFH*&7-(Hkm2+hs=b-qg9r8AU6SdRMAO zR!mE^9msUsbK#N$zrMH&7=p{x{2%SnLk^}^RiD*6WxiLlRiF=qLlZOuawaG=170B^ z6_hs>SUGV(!ECBPwJZoXGT;xb8fQN6iil`{;#UVxQ=WTdHDHwPd~uKCf9zu;=QP)u z?CZ;GBr?sFY)AWTwdCj zZfCmBynTnYUbkaXLW0X(f#?mealQ7#g9w25Vo6xYPp}p9;%KdsDL6KV0`y&ZW_r;2 zDWK+%<44i%LVER>CWA(BAN!Jn8l%qbf^o7TWekWT4o=D*M_dwQgC{DELApUjq`L$JL~=kt8AAHp&*-l2JJ-3+x~?66?9IIM&J*|jtJ}0q67KEr{x6El zv(*rpP^}@~e8BJ7nj{#z835iqMMI7zXkR((W_!BFAJtnCKBKr%ZV++9z9f;t-N76S z)PHwi1T>`pQIi2LU|H>k*8#IIgX9oojss345h>>Z9f7b8B(>I<`R)JmCa^RdFWhJE z&;?IR4eUWs&w2{c8iQrf$&dL4&jIqor83*wD-y>WxKN{k8ZglMd|x?&wMON1LV{De zvf=Z^#qW&`Sb~v?)5A{vb zF&!WhR@Jj6pi~8Q>R(44FZ413Sp;ZV7(F-!&0y~#K`)RROF-$TLYXL4Bm`6|LU4bI z0`>yTMg{k88~!`K$?h~85MS(s#>S>*W)6W?$*W48Fg|%Y!P}eB?|Dq>yLbFh9#S?K5zzqG~n1AV>>i1^JxH+OfC zm+L_G1LQP?3|Uuy_+2&J_S9__jUgBno3e>*N&g0I+99Jyz57a`t1DHWc3`STmR+(g zANr8P*Of}>!R@rRwied^6X1A|uV({!D#RfHjwe7=IWy+|$0t*jNseoPZbPc2M(ZLI zle)QiG+YY+3+bDi4;r1-_$|waK+^+FM)ncK&PTPfq&eN50;8gS7y~US`V~-th9a$C z6AOE8mLR66Hy-i>(tEx^*9rb5@D<5|Ks=Zp%1U#n5g;qFTRr1TigVYcpe2TK|bxmlCf~vhvm6i&5eRO2G(S7=#R22QtcM}zI@Ea2WfC7y1C6FM% zIcHD*!dxLW4iQ(7B5DDaga}7n@Z`ha#HjsE zB#MNF70UTs0_9~WJ!k`~t%{5$CgXzWffx*iniABU*O8!gIm@F~0?sO35dtBp)FFAa zCfQc_=bk>^kb7sJC^=z5c_8+5;O)Ifpg(p~4=4tQ5r|JFDkhK}6}SOBCFOw85F8%1!Qfer0YNA7Nfm2wip1Tz{|yl4?YFJQ$ioV zaYjUf8S{0Z(4+kC^K7+savYr5q#eE|sDxniYqzE9FP#4jMLfrAy!rtfS_|m(5 z^OIA^i~Fl~Xsm}!T3SXJaN6)6#)TKkzKZYKl^cO)_eq7flIjA8RYWfEOD||8$aD|z z9gu-dEZElGaQf>H>QllE+CKQd(_AxkT3H~_d+oddrIr7~M>d2U8=8p;V=MtsF0-~c zcgje2UEFoo}gdjO23lcvF^b*zk({)SB zS=Gz10BDU9wUR#@uC3KFUwOMbP~sq_#7HA!X)I7>XIsT0omA#Z+V-TTVgUe|k8O`F zA3COw09Oi-5!t&1`Fv!&f&7bj|MlKPPQ~Dx+LOmiPLJb+gjk^QWno~D7;kRgltO5i z^!xXEke7qfI-oHlN5lPT%%=MfVS@g>YJOba<~1^si58HX?ZwXaO|8y1w}#*MNFFFm zPGxQw)X^@IKgZ9$QyMzbb?RNAFS^Z8ZWhEnrGfv`Sg(=NaU~`WEGVYGOI*oFR6qUt zb*Cp=Oh;q(UUJg?9EfJ4r&?SI*vvR%dIpfK;6SAp2sp7l`ngKa??u?RvETqqsk-3TOb(AN}B?bpv;x3P6DP z%@gESB7vQr$ty`UrRuiR%B5H#2T1~K#xZh5*yfn7zlH5dy((K()nTm50svys{Zi1J zu3ZCb(OQ#5Z}Txh-=j@Cdev1Q-+5!m=R5E?WThu0U?F<}5az;JHbAIRsgpA|ye)a2 ziyE8E6M)P?XGk(m&1vQR_2R<8uCT~b)zZ?mbXC(&Jsf{yY!x=9>4^DlNfO&Zo*Nuq z{U4vLi3ugXPBM)PzV*C5J|sWsLrro+i2sGlXWkV)Lhl)I?>Z(ByK&YBikR(GIqqrx^B@(uxJf#{jdpe}=c&_7&m(1Sj~h`%@@TE}~F5p|*PF#qTM1=gD4P@*fG z9D~nJbw=@Zu8vIYd>^^Hxe^@Q`Tmh+VxHwOfJrW2TzcFjBQMYHSW!RRWe5Rl6MzRS;= zPbNk(=lM5>LP#m`AdBC;5G^niDxr&u;8atFYY*H?PN{3U zVn05A0m3|rxT6P~fKGYv05?8*;`S#~Xkky|MmTFf$x3Vn(OP)!czCv#nuiz_%3ivp zGS#}a%M{*9r5m9EZ&$}`Zle-<1T}352+RWQ&UMgR4r)6AXkD(}0Bj-qB^6@G)qKY4Oz1|#lL%Z{_peUYpm)AF{ri#YvwkEkN}Rby~*RHH#eZ77CJr~ zO=}IVbkZbvL?^f+lK62+I@T#idBf7 zJa$9G)!wt7ii(Aj`<;I7hZ@o!aS~fSB{P_-x`1Zm* z=YTzd_=mTNE<)PiQwM^!F{i$9`1wa125kgEOu>nws%Y9tJR2p`dx`? zXQp=urTcTaU=ohQFc4q{Vc7{!jyJb%>p^Cj=3U^Wq;83`k!cHyaJxbXTSjlC<3t4PXir^P!_}*EHtHZqXqr0=YIkQe_SqZYRU&Ob4KEs6eu!W zT_a()tKYsY(v$i#3D!6;DRj=xBN;CH0V5l~erd`whmyvAH%2x9PBfHF(g)Q(eh^m- zq}(m2S&j8hqK`>9^54mBDMN8-s73#?hoCV&Qp4)JDv6G5))^sSH^G9)CLoZ*EeiIr z!tb&^F{7iM;|*(Ipl^jm!%0U^e-j{26~KK^sN(9=Rrnc{u3AyGVF|ms{&gjI7Zxv?tN}vq^0gcpy<)!DqxG1@dpMqb#s52JSR%wIw& zN=)N>y%)U^umK_`WXVRz1h`Wm|Dnr#3+ae%!FdS($i6PQXSwss@L>Rr&UC@|HVISV z3md<>Axlzmc$g1vS+@Tr!CNSEXn{vIGL5IT*UowzgSkC?0(yqM2M%P|j8wtcBJ@YU zo`i7ZKM8(|OPT3QOIgAmZchNLRM#FjRy#jS)RMY-^)SfvyrN?^HRc%3_g+MGZt8+}Y&YkM&Ky zyLH@ow*f0A0FM+79SVWdeW8Gp&MRwOk(* z7s!bMF>X09y#v0sw;x1Gf(`N2<59^8g)u+8`u-ekRIq^DhYK|rw2{i69 z1MQ{D+|K^lWJV2aoSV3VLc0k_9IB;;le48JyZ?r+yb!3YPI29nE zM(@>MX#HON4`VrOE~EAPWQH6>5I9sS#{9aeq8~ix;w}w-fk$T?k6`!E}4$;60&zPykT_DEteS3NSu&Sj4&@-v&5` z^zrTh>~w*)loGQ3Arc(KjcR~8*$tpjRb)p8XERE+11tIs1f9Oob^pJA!g76(eNFXOvhR;%*0ptP?A8I!Z4Yir?{3pMjS-DNYCu<+(Z)S_n17mf` zqZIiT6)wI)JO8T%-0M)8_$PV4a0T}7TygU~zL3Zq- zEoBByZO@H40TjbAQ0|;VZw^i&m|_$J85Ue+mH#h(3Sl(@GP?j0Cy!L%V0K`@iu`4Q z<=O{+Oc-^7@Yk&F0d#)ezWP7cBou9+`Ig0MrG=qv&=)bkCwJKp!+9nM~S<& zT_6s-Cm$0JGB8+-2;;pC7Y94=^ekYiben$VoBhiU%xB=2;{$ZKjvM5tD3u^uz^3YQ zT?}Lkz~Xb0`u(iApXandVq*!k0Oa$sK?ah<)u?JO)h9l~iLzm3AX@p#tBZpUfFvub zmUXjD0VDa-ik~=I}14vuXqE768RqeuVk>NMd6fiIu0L+PEZPp=E*)PTm|Kaf= z#G<3YWN2WXt2r45O!y$^;g-xE{vuEN7#p3TFDm;32uDiGqbdK!Ke|j8519-CO-&iv zk-dLmN>;vqzBJdkWBsBC*R&&M@EE^dfd7#Y8baslHhCbt(tS!ITlq%b9S8^v@i`ju z`t{>gkEvwi;{#Sk71w_Takj z(P}*Dm+av~hrq614!ZBjf=!V1fw!*wbsdl{DDf9+H8_4J+1%LSX)wlZ{|-a=Vtzq! z3K!4_=~chaLAF1_r4NA2Xw1()02V_{e>l=VE+QX;n(%f~ZEun4XwO)u zcx;gNfUqQcFF^UvQ7R!AEEiW+oV2a;`wPIV0e-8J?f|G-|KRG49W}hbV`z9xERaGH`JA)ldi4_u2?%&T20YbCDk%a_x z>+OpyEIZOLhpAyl%sCrBF5}tO{LoP$N(Ss#j`Qq2SCrtig9huJ)ST-&COQR%3> z8R?7sr3kNs;0WZ0%h2YV5;6n%BOiXc-Je1X0_OfdF!}MF{&Lcf)JD?ISbPJ2KjW#3 zK~ESG5^nC@ea&{4=9jSa9&JivyZ5I%{EB)s_LZel&PHgEPJJnSt)f%NnRT8slXd_5 z7wVU+uy=TPw3s4=swF5wjOzKdAnJHHXI8kcImWeBXf?7q1luaswuMN(b0SWe$WPtb zsRHh#EWOs0tD$5HpscE~ad+v^WY}1zio@jsz4C3wV)sDZ>>M>c4@rX5p+j`t8GSIn zfT`Y^c4S)SHU@LXO}(XJQWr<1G@?V0xIz{n4syJ}0&Aj%9+-Afrc-}$4slB;R)I;Bs`3!#AcOzCv$s+oVWPNURuxneNMZ&B z5zw@_?BTJWW)L(Zyih?xc1NDa%0=;IbYcQF!|c8_uac6IZu=tABq2;R?ypkayQw8E zk<ZK;g;U@QvDlu_ukK6hxL?W(*@2&u0rHve6NrDcXSKgRdAF z5J3`Y8R!4(SqIQR%U$u?YT)vH_19JAU&qnA`Jn3P_$;Qihm~_%jiQcQC^o+7$``-q z1x?npIpgD9f78S=`q#4gGsqenpAU=QVKO!uPWK8E2QNE={)%m5A}<+#iq`LYn`;)7q>R2RK`awW}2H0z4M)I znWi{PeD>C^fgz$>#~I*fG_O!Q&j=eKN1lFukhA8BntqkK;*_axM4P&HnZA!QM;h@& z^u&Y&UOote;r*wNlNjg~8t17ks}Od+a~}ch zOub=##<6M8dblF;H;g)$l%9Tvv+rCry{Y5!F09J^&D#tNv8hQ(r;PJogxpihsJI_O zpO$HrxJcSbYHoJ_n^t1Ub^|0!c{dzcG2Ye|BjUvO2a><&vRqXz+ps1^REK{1cAbXG zyEPyPHa_AH2q`te8y2Oq-uL2!UxWTeC2r)m(l@|BtS z${FmyKc^Yn69Z*!syS$c>Gd35;CHrQ+T$z#6Bu~&NlYI;LEhO78gfm3F+Gz#Xb`9W zb2;oa2M45PHx99KY-zv(N-tPC9~OF_p~kk!Ah*$^Nm;hhm@nBPtJiZ*+^@?*p)cW@ z!=Ph42GhO#qsD|dPzC~v6;Ii}4nf5bMgFdkXQ&RQitLhae^oET4oL@D1f;%)9+&E^ z0@-y~bTnIF-KiP4xG>%nkRDw#P&v5cxA5sjh~>QWt%075J}Fs+*i2Y%VJ`)*pEz-q zh6c!=x$J@hTpOUPESI}?VIH8|mC(+2kN2U_H&8m!K79Jn5C-?G1B?b)no+lX(f8b| z=U8~-0L7+~#P%=uxuFe}5ZuPnS}6>xky@U0_9qZN1^w8I!5{Lu1bG(t=*LCs76&j1 zI{%=T1$7q)B@*0bd3bo>DFcq*L`PaeGAo;N)99U7-@Z-Q3u>emELmcGUMFrav$A5< z)nC}0Kk=UOE=ICX~A;cZF5%1DHg z6NTLK!AV^WR+${=xht=qLb@UETU0);^73BT3pVdfsmZ+#hZ;TB&hCa$aiC_kXK|x- zd_qadd;9XkTa!y)-rfUqfxg=P3fEczMjy5+jCc0?D@mIAKw(Sy`4OPqTyIcnp6bo! zv&qj(>PWnC`Z5M{&li1BbxplwG`0sQ704n^rzZlji>&T%;&(QuE(M@yDDS@6@TS5Q z+lLONa9yNli(h29{_I&|C}Htb{~W}4;Qcat3)}{EvWR)wTc@Ib{?y9rFZp&DGcCL_ z7vlNzFmYrjJi9{0?fccV`R9s6VrPud^LfCdli49M`W40QgO3O2dLii{14VWyE1Oge z&o0lv6uzzxPYzR_>MEQh*$Kyjh{j%!T2@x}FGo~iN{aRLD8jYc^L!jHED_PCll}Cm z3v3kTa*xeX`l)AretCsvBVl3tAh0)>o7<#Y?(uHS@$Bs_8=g_(vTUFPr=FcuD&6jO zJzI2V!o?K}ln2W0MpMc_L>m?vNi`ERpbj!QShf>CU4W!BAs1tiCJr9a1d!sQ5M( z7*=a=$nGPoZRu+dN_iQ@Dv2ruHkAyp+SIgI#MV>s%RMJ)qcOtg`l35%@I%i}l*Q!b zaU zy1&pMLwKF2>%$MbPW5EQ3);Pgk+yQ3NMUF8U}lC;L$ZrXwuo(QgJTxl-WvRag2qP8 zf}*rjSU&TiY zvI}#NpAdHttZYAh(BNlCo9)QK_o6w55WM~v-Mg3sl0&)wxNT-8RRhu9gP|cYH8n18 zu+H-9yHZq9o1D*j`t;>(-@_Qml#bhL1DWjROVjH(Jx-W!mBbVu+A1UsAYGc*PQbHM z@Mz>i6@0faYboAsR!&YX5+of^_1rQxez5Xg3+B;*IO=?yWOGcQgQY^Gwy$AnV^gsV z!NBl9BP^ArrSr$y>V(uz%iWZYgX2oCQ<@grl_IyX3gd92NG|pbe_^}>(3y$^acEc- zN*lqZWAWlJb4Qq%6mp&F9DH8;y)W~-C#KpUO&}>MDWy**tFv<%9Zn@KYolv|{`6Yl z7t|GVi-By-or|!F3=B7(tiWkDZ6GU`JhH9P*b=5Fp$C8)M1bCw-iMJ~B_HH)l=gv_ zItQXL^m}@G)(1zn-=P1F8a)6d&&~XR5BH{+zwg|@D;9b3W3ZgNc?gYjUylM-W0uehy(E9eeWMFz*+1L3pj!v z*S~mJiycNSwWuu&YeJA$lvUAHzo&0~SntC#RBUau42roY_2(z=JDe1v6b=&v&WQ>p zz1oFopVI=v%XON)?HYhEf=2t82@%>33)NM=lcm9(C<17od`j~fJyLN|j^b)sNtwRbAGLZ2r$GJYo^rd8d( z+?h>SSPFj7`QdK!Rcj)tt;JP@ zA^-8*x7x`su@Fm5eBlLdE^mtK@cDG0{6#qpI~!g*N}Bqv$rbuJ+dq6IrqS=u<+avy z+xGxG25;jW=WWkz(c~ogML9RfZV_{j>wU1tCbeQyT3v1PA{GYmRLjboeHACxrVj5N z5^PBpMRujV>2H6SFVAhr^;;#1Z0f?QQ*@dS%`R75A3wFT?7kusao2h{o<}-Lq&Sfd z?sw_SaCZEJ7%G@C%HK{egYU<#Ubnc7;~ zhB?m6zE86pVNHeFoP8EF!Udt7g>VdCEY@+`z!B3@lAK4v3=EUyIIl)z=_E^RVauj2 z5If~sR-P}yW#n)XKA0Q)@QB((rH0UH$MwP#JhFL=zfGE z1GR=WUAhCDL13*tK7oU^(f;ZjnVD8yl0nwwkDWO7E9;=cj13uB^`{kHJ#M!^&oW;@pZF24Nud6oPEn zP*evRZj27pyW5Kq=oCU3eUF82!Y_co;WMg^ZRmse=RGvSOI7d`i*9U2xgYsT^{#(- zO5t}LgP}H`)8^?Yb;%va%bLbAR4)20g~Y#{usR3Ld7DnI~^`#({LKR%8cFmP> zd^Uj|_t`cUYP9+>y9R^Ybm>4Iuy}3Fbc|U^NiD0XhY8&^)#~4m`+Nlw(zsZ~#pMX+ z*p=jnAn1IUQgR0}5P166dNfsT`S4nX^LmuwyOkfzB|6*e=)l`{z4PaIP@$xx^ux_q zI>$<0yY|4(&kt0&Ssy-JheR4Pz>^`a;h>p&N*4==-+u!Y7p38wwtJNR>)fbtV=x9t z+<1aQDAuyTxG8J7v8NZhD+t$|;t&Ve+3e2m+kHqk%HXU?|3FF{LEl3?J)OW=CQ~-L zpIAXkN(2Ck5DtW<@m&iduYp|RQTh?LLfO0{VLk9f4d<>(9`RP8;@FE1%X84ZuT zy%zPm^>5mo&Wfm5asm99N7}J!Is9)b7Cq0>NKwdBNo#$Mg}ri^d{Dv2j%mm@Zr3u6 zO@^T~r6yb8L@tC}tq@56Li};Hnh_7BI4ry_Hg*^#SSeNZZv;5EQ?LMtKqcaM2s{|GK~*& znj_qL9t2%!l4W)nnHS?fCk{%1>m*wJFU>(ZF336y~4iRcU>mnT7rY(jkT~uEq zVZ*-r_dP=Bi9%3@^3Oyv2&GX!dM7O}Z-F6_jvH5G=jSII+7rc6ld5)aG+$P(Tt7E= z4+G#8V^N|PCMVf4u*2Ovyy-)@jkCweCMj90SK+Zc#Zd^HX1GHZ~kV2jY{{N!> z6*N|3l8;Xrp^fJzmZN$E{QOR{N$hwE#j;H}oslJc8}n^|iVpAr2;udE`mRA`N;Vbn zpC?b+(rrfW?iaNPK|jaiPxC{aG|nEG*t7<27MkD)Np%rnwuS zQ}kFg#+etyvHJ%>o<0Oi&I!H#2I>TO{ue*oGT@JoL6RO$UCl~WsumJIv-P|}dIprB zy9^D!s_L<^tzWrfDD>*0g(V=pFkW8u%8gu*R2 zesi434Z>TNd?hxo9XfOfO)TRDgagHX2ycAPzd@rf2C*p-eX~bPDtj!#dMy6aRXscl zqYEXc_i-Of3GCBhh?te5JmdBH+czcH(V=}h1CnxqtZXE8t;ifdKbmubKjKWr64W>V zd#T&r>NJhVW@uo6&7PR*vr*MW4K{wMgC34CNe;MH*dWQI9cG*!Um63nm>Dck;IH(Y z^@jsHJp6RGVH?cmg_w=dE@J4A^|6^;_&(Q(^{#@m7VHgFRFTk1KHN`Hp>*QJPw07p zoq-ly{mw7E|K!BdEA$MnMweyJ^S?Z#IFjd@zox4l#cL6YCnk5+2T!gaxDYg7TBpl| zCDJT;Kpo}~-0U(ty#8%E^$vXBF&VerN1sAK3YhmHiX&Z)v-BomeAx}TtcUEII9?rv zzB}P?di=QVrJUksu7=2%`(-CjNwZyky-^crZOM$P-YdBED2$i37(*O%rwUC zR^>7p&FIi3#)Nf4gAHJ!IOXZntRf;h$ji)Ut^1F0=Rl+8a4)%TPrDHSn)&1MP%jo* z4@IVDe(5zkAB~H#w->kkS*61b{srJhCMLiJ8u?|3L3nkZFTSafJ0zMZ-iovCf@Q$G z)iWRxTSb)IR`rNq90nykG1D_8!_VivlbG5Yx@m}|f3N|`&*|At=_S7mX+(cLprCQP zJ29aXHN?m<$9R;cEOpOo*ygxg_hPSy4s+UR>9VpiL7!e^DA>ojyoRKi!N6jttk4ic zMh%g&3Jwf@AH7%yP0-R_VHN~CNS0hZ0z43xQEAlO{rbi3 zTA55p%F8#1f({VaT zh(Kz8l?)pMUAN2AcHurpT;-uT0LC;iT2%4={ctP(Y<<8Ew5SEYO8AldxtbRXuU<9z z_S288dG8dK>1sR!)TN^2LSaU;x)X#I_)k=(aKfpd_Gjq6v`=bi5qX4O0m#oR;T5N% z!q`JEmXV?7E7x0<0}Jl9>zWDn1oWIKRUS6#ZE;i3$iuYL$(^33|1Q1@0hW-Y{QISx zVP}_4_|qk~;PQ_$s%0=@>wk0P=Hnd1=cT~`bY3FGVo`&cT$+x$MPv1)f_bv*w za(ut<%wyqDifg4%Asx5i?9&GR(`>qZN?kb@LO3e9QXC48Bx=JI)st9PI@3?02D}&& zqsPf$!so;EuS(I*(;vf4k}IZ7*9f`wP13;LshJ)^%_XUuGIY|t;b~$-`g}Z&x!OU0 zbs0Rp|0+f@{YJj+<-*`ib!~1rzXSaKC`fS=1B6+-@4uCU65uao(&HaNwDx=K}iDLg(Kxo7aHGQ}2D8a~{ENr+Ap$+PQW zHSc97c1X7*C8>ECjeV;vOr*^z{S@QO!__9yQ0ko1&(GE*Z@+%a{(^fZoD)2j3o2EZ z_E<8_MF(7_=SqG&5sfGT79X))5b(DlRt@t$5ZVk?F;qaR#uFEWsM&xP3<8LPfngSQ z4P}q2(?$a-DpVe4_8w8!m{++s+V$T>#&s%j07zNd$MuHjhkcGt;sP&B>PAV=H(hDw zE+j;r1?t&+>{eQtwyew*X=^BnBEO3Mya{ISuQ57Tf>Ejs$2El9z*ssMZ`K7SKybqj z7&oP%dGK)OWUfs@gDz=F<)aBS2cU;7H%+B#`N>!xAW|TJ@cF*-(=U*g)xXxO#Ft!3 zD1oZ)ZllThq4rDgf5i+4A4|zW-D>B^He;49ai@Z7jRh+(L_aYptE{^WKf=%vukDLg z74S2Ef*a+VfqBEp{Fda1UU8)l@UHRFn};61?SS!^ z7B2u^-~=8;(tU+p<<_U?CZ|Hw)eleve&v<6E7QH(h{#ZRHI+{6Oy7cL-k?%&3%{bW z@-vQre^VPb9>SKTD>SNV@M-L&IA;`|m6aV{wWZ;<(_gh(G& zR?o~x|1MN~a+A`{<>k?vpXr*Kg2z(6{^6k1&4Mi@kKg?Qj^qhvm7lgsUXk@f{FWQZ zY4m!&8VnVbh1&yPfZzPt*`q1f-`_buG)a;U_y+4hvId=O@^PYCPzDRbD0id{o1QkS zo`RvbXJ6M3pdq=x5$5YOfQ3z81kl>PyoK#m)lCy~q5u#x`mk!%G>ViN7+eAGXMhO% zI#7c09zFX*1n_st^U?jrQzF1z&=nYUzfnqHuslXXFlh_dcdhl>pk%XHP>MEbt)V)j zS#JH)fK-&G9K*u0T?{A35EX*mVBFAS;l~tL0s26TQ!z!=say;pUEUOL$@I*|iE2j( zZp$jAJ=LXYZr+6$2>9iID(-j>3LH8v=)jbDV zg+yshcibG@tp5wc03xtQ4k)FrsE#NFAzMk!6T& z!|DvUra+NsJkAPl^p3iEE&@*lQJcw`;_%`~qDpWuKQKm_+gl4TK6r1xl_4b+Ro`om zx{pb0JPs6yQW&}p&sx;|>IRtPqMMvYdWPrwyOx{)qGyVwA9A;u4Wu#S4nZ&@Cd>5d zk(z^!$0aX9OF7zUK|>9Q7g*?J1DW0=9xC|=9|lc z7{ZPK2yGQBCX%cUGC(50`%lXz_yfAZ*FFo*q$YyCl^><0LRr%zv9qRx9v$+J!y%gP z2F(T7F_(|jK*cpFOtRzDBwty&Uyj2u)RQxULx4Z$yqOkz_;KkY7|CB2IIMP#ChX3E zl8!IdHT!vfmKgeamGfCzEbJ_$bb72}p}J-iI6vl}n5$AK{vbS4Y!U8V%)>{BnS^x0 z0?#eCSf>GVgfGAfK7&$7Rn^tQF1U^dlX2+}&?9CM2sswAJ;Ky%YU+hbDvwW|$_Ex1 zpr0{gxXyZ_PS4mv6QRM1T)vwj(dPwZqxrGM#wO7vf;_=OZUy-5H4tJ<*0(annf05s zgd1$b<>%e{WU=U=4xrBH{swf6&*0eu!Uuqf)z^kY>Xot8P+2tAYVhhYOu&+gp52vjhQ;6;RkesP%Gj8sMfJZSC6AOel2Z(6?f|2-D}wCZob@yLHnB%5qinPGgR9k~@#! zxPylQpF!#3d+gcQ@B*sSucIQZ>}njsYBI_VwL{ zQLz%hVG#YT&|QXVF>^Eeu~SOj!^iOSiG6I8l-R-IzPnYMD#9-9)mt+?g{xd;GV3ii z&EqDrvX-r{C+=hQMsS8Q766LlIYCo)`gLL=iu3msJ_6Xd{XxiKM=L_$4!9|)hDRri2m{#z1V{_=ajMU;k zB}hzsKF4mFEMma$`e-GDPZ)kOD;)Wu9KPy!-o@3acAC zUM7-Az?#NozI;iM{@?&cQpA#Z zm!_sW!`JkT7Nf~Hv7=m;eNl9Rvw#E*Y~1iIVo&Rj?4juyAm2I|yyxkf9|{MK;jK{@ zo!AD7I}ex+OLJD?$JJb3Oo9jyHmqQB6cTdH>$mWA`aZIAcWz`S3Th-Zz|BjCp7_Q; z)3ExELFeYuk~wO+>g#{C?>z=jk|ENWpPQGbz%W{&@9;M~3sgZbnE3qtnv2APgC{+C zxSMOOBTk-d?jV5K9BA>a^}fzFuM2mmIhH{w;W~v~sA7wCw{9wy$h$d5W_zPZ5BAgP z!!>&d(b;6W43J|AM}R^v)>jN$2vNtUmHeUu$S4yUL8~O=FnH3Sdup>KWBSwWXMqzg zDr*oLsUSSsk4?OVt*tfrusCRQUdiP2-!B==J=jl!=6ArzL@@vkm%=~~Ia$BRUsmC_~nEPC^yGF79q zJ$ZKwLSG8X5{Uwh2G2C)<>9q00$}vN#GE*Ey90Wzr9v~E#a09KxIvSE-pzH({1%R4 z8DM{{i+74D-Fg@~K&O%4njYUQuMqY+QDnPfx83-3b6K%U(=Ah#*XSm62R-6jJ7RJ~ zqd{)Bj!o{JzaeM#{i6f35H8dU=j8c^({NJV_%}IBua!kybZG==AJi+xjcq&mM-n@8 zD(|V$Ryj7!adzj{d1{bi{?lo!hrQc}N#J4S$WkykeR`dVFHl!EdvaA%SJ&NZfH!{} z=T-)vSfKhtPUeEJntOTgX0}Rss6Vc)Ba&QAOb?^0`tzI$H%H-~-&ykWgU zQ!$BvSxy7N=#Ky}pnDiTc0e)7$H!-%f9Zl!8ol}Iylz;8O?cPZlOr`*^)lsfu2WJP zBZ`yiR@h&MEx3$82rfFIpe&T{Nh?6omH!p*+mCK(E z15pw^g6AB;MF)|dL?Ea-Kr@&*vb|Ca3HVY^6X8u+Lhj6DaC(N`K;f)1&|%p?_BX(3 zk7Z?b1L2NzhIC+lPyUqD%-||yQx=B!L?#Nx&Jm8$K^dO0tF6oXG(IH1uueNRBqM^L1C6Pi87GaCs#1LG%sl7F!mZ@PLH5 z>leFDD$`#~S3KkbFL1~i?N6V#%*E-<;oQT+j_y4K3=8HlNM#P52k8Nttr;LRdA52P zrr?Fv8S3l4c<};fJKT1g8I}^*lhpO}fH|=kavH>;(H}QxV1IB-+GnY1^*a-i2m~8? ztWMDv*5Yy*N=gaYMOI;9^@fHN%laS)%`g~uS_q~GC|naDfR^wqw5kdMi$XncEkzHW zmmI;jewrLOO2dl#^7$9ZWBui!rb_pg#kY=HS=$Yb-@p4bHNJTx+t8qrx|3x!U0AZJ z?+`qYk_pf`@Ji_QmjHzC2!pICX|su0~)t@q`@31VGU9k&U}d>kIX^*FgL_)r^N@*CL*-A>!fJS;3r&zi&kiG%(ElQ2O> z=|*<8;vo`rx&6kmwA54?LTFS}I*f7AtKOQkVZctsI0OJiTyfh+iw+=nP+Ag_8XIM) zd@g~wf|?S8DLfYy57J|A=x#tt_Xe9NoWJt@@U+Wl_ZgRCyM}yfFCKmbys|JNp0HWNbuc~}Va)*kfzwVBCFXFJ zj-RaNhbtU$nBdXivmcicgDxKg{#Nhm(E2n_iG2f3BQqE&6(!_g@@`N#RuSf`2~h{| z$Aa_a#h)wp(SEVi#>QBC!SJ+#ZuYyDg(eVos|GOQo01ZCshf9MaLcp>M)_$ z&LL+xC5a)Ia=S4keUX)G0qS>UdwWa8E;llXog*wgBRWuwTV=63zN+-o<3pVV<|+%r zlTX`V$*=K@jkOqoI#90fu1#wQOAjO4Ldgxc@!(IZ?%$pP6%)A{*s~v}qS2Do{Y0*6 z|Jc#bT4QEyh6VV$xVcacA^im09`in}VOpA3`7xFy%THQY_IdX%6-w#{wl4YJc#8p;l_5HgyRqLe;@}Hjws%$xgO1Ek_;nbul3H5^Ws{7;yO29aFTf<0sJ6W3UHC(VGLdMplJDh}m>D zI1uTPu`$6g`OO;(h|@#FD`)lQ;{HK34UKdlB_?xic-h*+@ZlEfSwnGt>hF8>TJtOU z1PGmT!x`u((A~`S^OsQ8>i5~da#>xIRt?*5OSyLdm|MQlx*W>|qi4pd(s8ITTt3r~>fK{RYf}L`(P!uFgASTcE4D9`+k1j%#r$+c7bb; zfCKzn!#U-=1yu|<5rdIvfoPkDWiwT~0>!n3 zw}1H*=akE|6q{QN?m5u>>K!3cC?YA^hTr#xHv8mx9&oA5CBAMR3?FW5i*Q=tUsxD0 zzA*O6w^<;*&T!bo=%v>7WH_zo6|ZM5qLamY=!uI>$t8lMGuKoIkv~^mnv=`QR;s#K zj7_3ORBTI|Nh?P+173t_WD(vd3HY+t)kTkB1o8D&riD>8Zy1OV4m1no-FsQQwGqAX zrKm+zq9%1q%6Xx3vQ+Bk%Q~33HBhPweiYqe)4=}u94|hmx@8v_D%QL;F%?~0)n_%Y zyVKfTUG17Vd79;|6Nlb_Wi=?z4jhYGJx0U5>RxknGU=V8*BkwuwRM-u7hE{ks)TpX z6LZD;6Mg$>?{Agy6-#X1-XK{`pM}DGD$cTscI=yl&H1vsDh5@7S-k@z3>g}cw2)D| zG0gvbz+q`<)g$$+4^;@*UbBHQr?Qmw!@FFOY+3Nz+JGVJ%`nKxEZcqTDCs37<>Ucv ze1Vn<9N}eOcav(aO57bN>$$&h?Cd{94Q&DvMidu#y(T+43b$s#$naoK%LwhG1JOC9 zHTR18b8D>ft+a3>#I6+n-m=}XQHeZ-q%B=~5?@HO!%%70{e_k9=A4dR#XVazXX?u& z2NwG2C{IT%P>!@sfD^)PnR{%0XmFF}?-Qdt+u7y3#n!9Vr28II9BVrPV0(s%5ouQRH+BkUG3bdd%Xgz5R0ySOTOb~TTw9MzjwG9_@UcVJ+s)Z*) zRhM93kmau*qeqM4MVzxIXiwb?&?1b*-#)v7!_Mbz8?bq$4Re z+lp}D35ri_ZWaZVSeYV$6jFn2U0{Gxw~kq84X>`T652EYa^;Y&Qk~q~!|KX*`2@nc zEWvH{LdvBZt=7e#2T6x9u|%KM#Yq|$}O|K{P3~FmnWbsr=AE~L>o^Xm@ z^C>HSF=R8drg=9EhUEPmqj8RbV5b^v)0>koA>5(w>!4h`c<0r+?n#Mn-!^Tcb&3sr zUAH(GZ>@hj4E<5m1&;nupIa)uJ>3bBy=qiea+K}f5@&iT%{y}V%|1P3Pd25!Pzec8 zODzH~Y(gVhhMaV2o}C)mxAjk!&4S_mwd6>8%VzIgCZ-3S>@5Jr!gid^=#dxZ4ZSJ4 z=;{76z}jtX)c1|AWL+(GDz?vdx6_a?>5P@({-fNd-X8io9EsMWdubv51YOdzQ(Gav9F{&)Cn~)vCb$_Wo-{0@L&hX1!KJSc#_eW-QF9Foq}BqQoiJ z?)zR;-@2rvxW8hQv%4CNkG`naSkb(6Nljknx)W7LAdq#hMPZOv8r69BI$w9F@Xm(r z&qL_6QDy782XPyw1kmS(35UGcCTZMN<< z+VR~f(v-`T)%i05c<~XOTJN&s)-ul@d?Y9;%NhHr!N11DmqsR*JKnTDsAL!~$~$ds zP0LX+HhPY5Nl6r%Sqg-ma-55v4wA4N<7WF>AMa(=aH)Uv{!a0^PPy}Yvrl?{G%&l@ z^nr!TdCU3^j{_Vr##HROZb7Bw56tPio>ta(mb6pFccS;)8)je#b z=|PDq0>wYK36p5Mo$Q;6Pl|dNsD+a8Fc9UA@5{?iqW@T>Xr*%}in+!Ktx12*nb9V_ zL)?v);L#9zqX2Nm`63SRyvy8aii+^L7CPso<#z#2bpnOPG+v@;s2{uC{o3$DZdr+n zD4xCnyGy#Atb)JFCG@JrEsk>)SDXr>Ft?veD~@oP?W)g*Xl`=H&$RPb!c-Sai{V$X zBq=E^Dm_nH{Alf5{-oVV0Ygfwf{Lvy#h`cM83n&-&O*l`jd_=m;&sF7FK_!-Zjh2U za39KUvA~9tyb$W_G`jf0T+8z6-BQ^_Rq_5n*_A2UskJ()!Fi|YN>W(+lE3Iu>hZ2D zrQ434E~yqvbau9Mwx4~LyJ5qhC^Jo6TiG0&`?n!6A=QOVQqdNtf6_2#%H67}+ zm}%*^e|O_-y7Z0%jW#p6&jSL?EOjnD?=a^|U#FuRw6bn?ME`U{Lw`|6G_&L9(a-VY z#l@PkRE28Mj-sbcC^0=Pee_ti0LpXBThi#zA6FRq~m4Oy7qot16}#-R$7; z0jH&Qmf2PhI~xAZ??MkBHdguflRUoN$Qi}CA$h+Qi{5-X+VL*qS*~u1(v>-ZOj$KJ zCdOZrpY<2)*UWv{`gBfUF=c4Pn2D)>K>51 zhcjM*i|LcZqLpBwu~}S>^tjU8_-E$i*`a|Yf@>*`QS1~MEW0WL; ztB7nYPshJ0rf&>3ReLEJ>B#KehW?b9fdgjklWeBaa7lSSjC)BvAetyw8$=t$ju`^R zK_tQuh&w5Uz-=2C)4%@l9X$fwHW=_HCR;oOQn1$>7Ja|uUH!N8VArr2jnk`T3aXJO zR+K!=zlyU9lgF|cqZ(V67M@>tp==bd`)W$c((f`UTcsgRzb?n~y}_t~NB2^ihEu2~ zgm&Mww-jEZp*EGuwA7LP0?T2%!$j|&<~DxDve$#(3%(SYYc)~$pFB|=;XeK$1NPRV z2)dKWzs8;Kma+gldqBB>ZeY7{g>m`@r_P(RjS^8$A`z9^k)Eq;)U8kQ<=N4-Fc#?d zjIQ`79Yb=qpxpCQ@I={AzJ1!g^h#}PTuVeN{b{p^#wGzUjE+HwNw~6dhrW%&7HKiN z<#|ojqE2W2y!>2#KKth*uC)Dwskx1$ZpZAAn3$#bjgt&P(a}aHuPpT*>$mg?vPxO( zdxV^H{0iy}&CMyZi@OmfDVF~tu^`sS6we;vopj!!R*x-RyLIWFo%Y;_lWksJ4$0Zo zTDYTlrEs~~lr%Fkpp}u4$ikjGz5LWG@U$t<(1G38mz`3RSob_GS|2;Ujgb^c*QTHl zB*#XrlK34u(c_{b!H>ne18W^6y2^_diwqwe^bIyrJnP|^TFm?N+9H@N-PEl-N&b}JusJtkAf zt8Er|JYL*S-tuIS)^WREw=!2mL^zl7t@KC(ZoG;ce0*jmK5S6^Y4I{Pz|+Yj`9R=a z!^T}~#)YPRG#bl4RaB?l(>VtBPN|6xmDq_JsmNxWE^hEy+{!MR$3v>n6{qbg7}^oi zk#3#a?_}rFTVs67VU2l51{T9Ro0+Vo6RBgT`O)80KU1+|mQn{$i2!!#D}=G{&WI$SW__-3|c zbutu4PCLAkG_GUo%<9u`4}X`)VinL$OU5J5HE+H~19jBfhz_$vNX8Md0eVBCi_g=u z#h#Szmwo@)GfJW`AHv%lF#?5}=|s3$4QO8tFH$UU0tGSs8^7}$>Oa&wdX^*^DL1^{ zxL8-W-;#tl)2L&z0!lW@M1|j%1=EGL^N9E47Mtc@tePm7sN)|gPGg{3#-x4jOKMZs z*8IW8dSUNA1?S=#g>!ksNzaoLfC=CZets54)soSL8tSR%A=OnfE=Rf?ESvTvJ19I4 zHnv`x>0frB$fbe=qso$al0X&bGp`Q&ZtE@vX3Z24bcXlaCvZ4s#f>^oxG(fEj2z@u zMtQA^=n-1mj{1BC2_xt79S3A16XgbgwH9lCS^DePDNe;zKmbA+iL7h_GJ5^T#3ea= zLl@_Ew)WS=V@8a%R{57#j6UIgou0iglWyMpYhJq5FCjJIa>FP6wg?H(iWYtd4W;H* zpChNa(l%9BH*>yD%(q&`n&i9UyclBJl?Qx;(Rg`jP{&WFB9!bq5697UXx+S<Y#1qBsN)``S6bqTXjTG)KUm0QCMfj-q!ziQG;%ZinfFOYTyV5U(C?K*ipUn z9W9O7ZASe;%UfOzIAxTvtf8RlSXi&FFEht8n6n}t*nM(o-m5WkTQ!M#Uia5D7md=w z*X<_xjqIXqR+ByMe#<|!C&9Ap`FwFc39t1!)~4rc`j;NgC?p?ZTXWUyK6B8e$cGV zCp^LwKYxG2iEpJGQ$~8ax`+*>$%L9Gh;4(GuffRIE#E{v55^IN`^fmY*v)=VT(fSY z0nv$o5eM1Z*bMD*Bg~+M0fCi71khSTM%XCxjq~pIiwIYTcb%1Mvw9%tV@Q4Lh``5v zvhA6{m(}K4f=9Qt?o0e~e(LCyi)lu%!}#ICpXH`i2Z2#*be&0IS`jHt3K+T4qBlJI zklEOcW8Azl*itRSCDi*=;;3WP*j_oQ0acBg7oIFHcJo5~M(g%sJWu~KX`M{R=$EW) zkI>jGF2%0_m*9h$fMiC-QgD|Nf*8}=t@4uuB}8+q>{9Yat(~>(J&MX17PD8r_^(@V z2>KLwdqp|%;W%|%NZpc*qu0yeQ7O-siTIhV;`8OTCZqO?ewLc%eT4Vt$tRmJ3^XDF4+gJ9V z82G%+Z?1gBXnKR|$-9T zRNERxxy(788z^12a&l)63M2;Kx^=hF%Z)8UTAIuF^-Qc9m{&^jI689rY)p42^|)NL=26N@9ApTif;N7$v-ls5*ue`9{c;(acE!B2rdN@_amTu{TsO*@?q$4^-*C>3OBPpMvd z?|i(ZNhqK~h+rUWpBW}yxGz@wJjK>IQCyqx#M`APsdEDbk-^+MRyy zW$eP@Ov{vi?4!;5Y!(cj+9&A=3nWih6lM?TWUz-Tf3G?yetbbQQ?QLIYpk3$B<(J* zf%p9A&h%WTeIn3~60ROgH7v`v#-RvJW6htzyCb5x`zllbRb`lHwS5e(jQ;wsF5aQIaT_KZ24U7Qfs}%8#BrQM_ci z$055O+`qJbi{;!-3yJyxWW#+Rpf$w&c&W(s+K`ekEHt zOh#TW;>^E%Ot;@cW(%lx&(A?rPHr<`@**H@%nB?B9e1oaJ z&7y4kP_%pXQ+erBf6b>>C7z8n+BNRIq8?^b{()DDt)E(%UoR;qSE9ZZUKA_QwOeyX zSH!u76w|SZ<4evX>;v<8;t>{UR<&Q}E{VHMwe9CUciTe3b#f9(%%@pQeUR2e#pLA4Q#CHEGd)@t!YhO%^aYwgpJa_yBxzV`R8mu-rT+b5iO+-^Ma5-pTmu5PV zG%_$!fTeNRp`hUH?!6j)-*S2bvyLmjyy8xCqSd3D{^U}XT+P6FBYEkrbBd--(JI{r zWiciGP8J5y z41J*f{X>0P6<<}EfnrEX-g(*Ws=CU{XeZu;k4F=m{S#hFi2MB%_mLG3^@)u=>)SG= zw6gg-$+&V{yyT3i4@&b~$x#SusQI3SS>bK4hU#?7mf*9KmT_L+g#l9I$XXpOZ-*NGUEz3>;8 zo(W!@w5g_-m3ex=_xjHW(djjrCOYGv^=?$$0O5FQ+X2WpAm0>|yuHN4H@JP_aiC+m zOhV4wXv`~Ri{SAMn5m!PlikgI=cMzDf zF);L77TSlmv6-Pa5%AqX-^*F8AczUWFGB=!=q;=(MaLaXYtdiSc^x;xD> zC6VcM7`o!JGq9Ey`?fQZ>7xjQ?ollgmKO5ysV*|eSYLFR*|~FnE&Bv%);<; z*N=sf>^*W?c8Pcw!qSl)Iaa+|k)!Lx(!{#bzw0LyPd7c?@^O~84O=fv5O!`cR&)^@ zrq3V6?sQVa`PHF(4~Ww&O~(uDAW>du>-x~KfFio7SubaH7U2bvr1kepzd6ffk3DP^ z94{{mt_%|FaGrcT{nd5(R|O4vSTnIA7m;!HEO~kM<<6%a`}c7ywVQl4VU!tW z)Hs;$>1A%?s2L6~E$#lMnoF!+DUs3ck++8=6_pJbZBLx6v4puw z{7zVaqWuMn@5)p9z-0Ce@EH17jb<;4<#ByJv*!okoVurdf?`>>v&}vi7Jil<*I~-2 zX+1XW?9^S<-dneg<=K`E1zlI}Efn>hH4ezDRa!pFlsGC@{=EMmz!~MoaR=NkPm0HMHBPu{O|2}|eV_B` zUF?p2rQEJ+@>u}kh9&3}`1|YmEL8}bld`kybF-||SB$f5Ek~C^4-NT9RO?*xqRG~x z8Eh!=h>L#r%Mo?h%a5UyR_s?F;~Hc#?LVXb;!S9A>RD8>vFW$g&h?MfH8ki2HEyCY zI2W|{^2Or-2#TfNYz*ktupenCId$vg27Te{R$1Jl*W?z*J97ILvsIVO?`MosnKw)G z@-JoOVYD^r=e~A~WZ)}M`&0+ZnqzBAUdyO;k#a3B+qsFXfxH}Dt<+DgN~K4Yfse%i z67)dRT%y@xO-R28kGeyK)hAt32`Nsi;=K0~XV%+?CkS$IZEF7h$+K?FzDEJ!@g1hM z;{y`G%^I4HEr%pOJu^=CcUn|q`}8SOe~G(D)iUia6#Xvbm#ijAm3H4A_D?YnO^tHor$+6b z89kfNT(7;JHQ%(%BcPn>iRzT)mVj%4(fpFJ#gRGtv7b+2w<#G-6JZIyCuuP7Ard79iR3g6O0qr9`&F<`#ZDBcI=`uHq)_!8H2g4_6L2Ud>cLgD zJ#t^8GsE>?>><&wC5Ba`kX6S>&7k6CyXYn=R-H#6oK|cL{VjOf zn(4WzpD0Y)N$lJG^rp?Uo|eif88^?-WH-046Q>+h4Gr5)a^8-lXJRr8MY|{Zk`3#M z>KnGatesp$%MgzB9%mcW)EyW!{Fm&>^q-z!L64|fru=(+}5d&4x zLV;n%z=Ms-7Kwa42tT`(%bkv&V&gva0W_6FKq~?908D@$A4AX+EhY`cuN(py$)MO& zBP|={@aS3&H_162YJf6x99?UPdL7id#Zw$OkO&UO%&e#8Q=QCD(oBu#PNP zo6-f0V@M3DW~N0)UnE!|kW(~jC7R=-Ht!^>mjbZ@(u_yJj`D74|MjSK*eD)I0S+-K zw38nY$p$zOaRdcX+NBq`A-Rk1CSEG5Dw6a4OVO(!1l@2PK1AFF7ie{MBPbj{bnh}5 z+1vlLopL+_Kc9ylZ9ynqqn5Hr^Y2wamGAf2Ccj|2Fyhqcb_K&SS6SyOtoTJ2S z#35J?B$m`TGW0u0o2iM66Dr0gZFr-|H;`Sd8vU%rMyr2oA|Ab7sy;`Ovm=O8hCBHZ zr&RCs<~#;U#p>~ZtDoox24r%OTs@6+;^d-~4>%AFV_nX2H*P%X96OrV1rS3<#(lp~ zhv4tGEv0HvyhQ)!&sFbW{33UNW@w(!RnAy6I`tD#GCggd7E7Pr?{x8O zx$Zl(#DqLNh*E2?m3+}aNnUON=L?M%xt~VCRifU7jvnH;b5oSydS#vWioYT?FaTZ| zT4H&_|4})m$iU#KV7IV?sQKr-?fv?*teEWo#$dscK~vTA`$S~B7||RbcICfy#jQ_4 zz2Ki$dQ#d&CN3dC2PNpysM7~dbqmpWbo5<&Z_&4&Or1>6vB-))&&z7V{*p5uS2wMR z#Ny6yG}2K~aX+-;E>%@L zJCf|bdv#HQ@%r%J=NGI)sp%i!q{Zfj#zC|Wg!xp<1mOfbC~vlXwa5;7seCGx=gFTs zS?%6Gx0b(+c${y%2%et5(y_`s*_M_~43`MbB9KXkXew>pZrreQ^%y=v{3TbMolFFNzJgF8JF zUK&25s^XQ(?Zx$_<+L5c^XL9N-@|@*S7h~-D1`s_ad8w@=D2*70T;+gOe(kFDgiAn zL0dF8=nXr?rNR#+kE)D_YS>U{=77SrvfNH znGj7Q!(f^b&1GJ@!v0(Y*?O!Rk-n8RHAJf#FO4+%JnGk|vbxFnBqfnoepOb%Tk2D# zD@l;m(ee2{OGw9SU^G_foxkw(X$JMp-^&;}SM2^BE&r9?bJca-;mhj(W#wz&82Q}* zFhDeBeDFW!+Or3Ep{qnEt*pGf^vvK{8H|NbqkGT}puS&1Cqe4ZdfK1H z--j80G^H&}l-ui2_Wi-Q?0%YXy(GZO>d~JkRL}Hu8cishgHNc9{4Qw*V}R&-5NqV> z{v~W?9TjUNrik|vGdQTDlDfp8l4U)9BQ8!O9aA69Y3wa(JO01t3i^Gnf13h8j6>hC z6xRN1(#U91QoFtIq6WvDX^u%>uY=;$$3E5MLVF6L@p!cz;P+{s5ifmLdTv zC`rb}QPGPPSPy^^CM#Ci9z1ZA&KJG^7vELP?jTxmh$H_TG4|IdL^Xu)x3ru#ZMTZS zCR}drc>OCI#~hj))l^j6btzjow$%S01cNf??z3lys;Wcf%=vzwjqD9Ne97+YXWgDI zF2RLbb!K7*3yVn1npP7N@$EnmaqpfcHJ@Py$)29fgUlMn+0QZX^G0{`{5gS@7(L(J z!zWIjl!LScK`SMM!lTGH2~q>4JcFX#N`T!aCuvuyQa}pX91w{k=E4iR>2pF$3rpfp zpH@!Veb8Ag%axg*cZLuQAJMQahW}ACU-2NCe~IHx4&Bg}^$lq8)DN@y_Be`so#48I zCl!o|&g02Gt2Doo3|VkuDo2>-I$8sN_{9zm?xnW>zGr53+#*%MX&IW}7KSl&=NLq8 zp{AcW1nsdftgnRFan`LK5eLMz}En_;4(R&OgT>U$5Jb^qjGyEU%mpH#@ciIkw5yj3Zsc5NfSZBZtBAU2(D?T6- z+oG5cxM6$6T707Yyl=jil}q;O6xLa{zo6!-Do%qEm&&RtI~lM-eVPqaRI6J5 z-W#z~nt^Zs|AL*QnyKeW^b1g1R-Ywa@)j#8sgfT_vAxD<%ZLKDnBJ@WmazM9aL1!Z z%0OM;cdAoO9h#Y?rHyFOsZ{O0?FUhf)#J_+bn$mJ{!eOO(;7wv7QeFH}Og`YjzJ(ZqyiQwat1G49?C z1k&&)!NyEQ%Qzzfmj3$)+ao6ZW%K_PUStC@?(`dInEHgcF zfF2$j^TbZDvQqyzPkwy&Y$`LM;JbPZc4AGrzzK=tJMAi|iukYZZ)MK0S9jN3f8WF8 zUy>n?0&oA0!t(LFe3XDXA!Ss)uKidXXi%$1!QsN&A z5lmZLsObL#y61ubKOJ9#e_q#ys=A1HKR+=ivAU$B8(3jS=(*d_e8R{md3(`NT(VFK z`7I_^Ruu(>hd!!Y$5vG^a37JzAd8tL#>NUm{X5n6HTrQ;QZ2=2Kihm~%{q4bSrf@1 z)=SV>Z-6R9_O-Wbp-F3mCg=-Zo+u45!XzWF%QIv=!B9E$>msUm0H{nx#vit#=KpD1 z?m`D(@lt7--cLR~DFB6jA7av05`+e3?YSP0;POoUx&V6OGwb-~LepMPijQ{WN`vki zfp!OfFv|#?43r5*Z9TAA?fh-tl&%8W3AJ3^4=hGw<)zf|+Fp*T#GP{9zFJ7Hul>vy zX5#AcMf-QhG?`Hgvv~msI^2iIF;5=yoH~5X<$6N&jS#FS*uK*Mq+;+Y|75T@J$-;@ z_Txip$|JweP+$1-#5Rk<0~#{iJe3E~W73tKkvqE;!Bg!0AzS2h8-+z)Yt;y|vEKP~lbr8Qw*2$^FUamFVjfztRu4U% z{dSw8a!6gy_lp%zTSETA9byNKG|9<;EqIJ@{=^_auYiOO3PQz#g657x1c082-$}Wm z_U|TjM-U*M!Pw(mcLEa9ZddkIo#(Z)^bDj`zkOu%l|T4vz=sL`S4eMDh{?!IzJlyzw${DO6qEb(Lc4dvsjz5{ud4%_ zrThdZr`&$`IBlsvTRPdDU4&^KcIRJodv74X^6A*vXirNUGjdCaw^p9%e5s60@XSRF z{v_v>^SYT~kq;i6QaSZB^$Fcy*R#p$8#>bt_0ig>Wv#YN9nXXI2iw6O4=O%Y^P+qOserN zWSf=8+qak0KbK?!DOM-O6pT8kiY|O*&DO>v?7X$+&+iYi{%I~J+oeOy!Ed5~R-!#? z_Q$9ati!h6+XQ#Ey1L-}aq&ObP4?w=gd!k^ytBptyF*S_k{>dIMvFLRC&W8>k~}#NsN^$vM)6?&GRryW{(3H3q#!; z6nJsYj^pTcYBe#h9pV=C;r?%v4j(i`?F@#(@8iJ{n3=oCyy7vLv{2SP?+@DqRC_h_ zjK*$;m7M(keZz3q;#LO6CyF<2+|IPTiOKdKZn?;&apAw`8oYMyoBH+o1N8dfK@V|}Lqlz}Qa7Jm)zRq?Zz%f@L*n<@3^x2xI=LG0m;7*5R#rBl zWawq*SfNv&p#~&gWG3#@qt{gG@ngE3$2dhR0i=HL;5xxBK4pDkFitDM{@tfV@b_-1ueyZ$gdDjqUUpG+K7Q z<>UVQgm&!(Y|rWh0r>$NxR|tIgLHFeU3F*-+Cy7;XYG;|G>aBz_`kZj`0;b0DREzWAko|Bp!d4s*ZqwwB)>q zfZK)GI|`1{^_)Kk2HNkVUIh+SeWH#|^;PjIdNE+6rrHY^$E#4#&cqZb?sMZtqnOR# zxkx4eYgCHyzeIl+=u=v)(3kz2+E%E>5ohvu-6K1QB-x;jUTtV%5JKr!PIYwwJ6|z+ z+M6C-(aCwFyjF2mQM(17=)c!Oz!=HGt_Xn*)9sX=V_vKTAOyE4fjPaP^Y@n`Gmj2! ziIht`AFnTT;|68l-vL7QYxPE&!w6{B@8j^U-BIL!zrnxl_hSFrDDY}lXYb#i3My+$ zNgsZ+k^B&JLWnNo1W_*<0wNR7|;wK13+4Mnb022u(5v&)yKOuLHm9_YrwMgVN>t z1D?%?n@{a`*K}SYX;@S;aEuUV-y?PIf*0THGy2Mr8EgW}X#EX9S6$kE+Mc%Jc zK2Ma3=ltoQs|#s~lg2dX!=z@$yS2z&h0t-z%un<8LwsxtKbhrP!qz$1XL@%d!~@WT zY6xa2$H>v6N7-+E6He7Ew>Oh!j1 z7Wm{}trYwAF)=eUCzOZ7M#qq60yi0%Kkl&FNSeP(Lw)vq8XJ|%?r)dxd_j1N{V)>e z+}zx%`BsTU6Rwy~+X#_PCVgPewYt2$XkIXg5O!QW>S@$Z2g2@8C?UItL7Wd_%s-}G z`PuywP;JaI>*7HTn%ugL+w0Lt%As4e*Yg0I2sA%hP*^Ijs+wfj^qh3r;lP0d?I2M@ ziw|$P+wxfG&3hO zomvtc=65{uaVe5vbZA@l;5O=FmW7K0!dinr9Un?)4NBqXypBbRvlN}6p0zr3Vf|#ACl-0$)@!9VK$MWSK^e`uOU0Gi^U5P?M)O$4HP_1TGJ0}9~ae6EN;y$s;9=! zZ5J+zzHiW2SR1h=RfxD5{MNCBi{<0-$XQFlk z9vOAf?~hiii%A|jl`i@M4Z}-K6#3z`)2dd1MZ3N`)%(nb4Gqv5g$-p#vSf90PCrCl zVfqHgsHL1JmMnFBTV^zKk2O*ZZc1%Z(cHOkanV#r%OI?F((Zwr)|Z3-)blBC*Gt_{ zFNRn1yXjhLVwB^s{NT1bkBi8j4Q?Y_BKr}=LdLkT_F6PcWP$r;5=>sScO`LK_E#8X zyYTEi9Q%L!W$pjA9+qLJ`S-Z)`QLs47F< zRaaftKd6oi)J%f*Z&14I z9zIMKnMy3-sPE5j*13}jl$B$o?k$DQqIHD#nH1rU(3{*e`jw0VNcr(Ib@-gOOxbw0tfft3A zi=b;lT3YL^a_V?5fn{Y#V-s^a+i0Kjo+!0%mSkU6)>-XDJ{L4x1ghLQ%{mj%g)Gn< zMRsHlvQ0mK+K9R7OSyA>veoAMd9w!TyxhB-OO#SZHLaJ4LQsui)bFqoSL5Z^L(y_l|#b z_sig;qN`}Wd`c&pgsF#;Yv)vy=iYXsF%Y0`pl|^Dym!K;x!8GyQ)EY0@ssb{1kV{7 zcYn;(_kt9#+SDs(wX_%7TZ=v*hfvlrGCBL%4a)+8N{|o|tS8)mbXsGjHU05-a|>qE ze?q7X;{%6@&Fyocy>a``e%OlkeuHh$uJW?jw^@xwpwHBexqJRO7QB?2wlhGZocIwq#dXI$o*_AWcJ3ZDE_ z|5?i7bp16LEv8%cY|g(n^U%$0t2Wd{%OIywuNk%W!oKRdhZt2Ke*!??PIG-NbsC4G zc3+i$8AG7D?9?x)9O<3XeEmx3+lG5_%n^GW$Rkf0nrab4j)Wn%OZIn^E%@B68#Vcl z9&MHkJtvc_lum9y3km^NVgfDl55Z3&_#Ib?lkhFPTep)pGVHvy(cCD`%dhV zA8nwxlXl#@=qx(gh)5oWKrXSb7q2Wy4}{yBC0yKi?_S-OH+wIyUh^i{;t!g!#Y3zJ zNRHuLejs##&tEf&KBCJ2aJPX?Px|)w*s8V?hU+emn|C7(y^4Su#z##~O%V#Sgt0_$ zvr|+u35NK!2iw+5+D||9KdEGds&N7^PH^fm$s8ez{&q9P=HS@{D#8%S|K@gb8q0+e zw*Q`s=iX@^cBDITLfjeioJ8<^0cXY%Jy&fmbKO2POa$h*7ntUpZ8C>b0p=F9ky zXZzvapNGhPu8?)Dqri~SSXfwOSq(J?lQh$nBmz&jh#d>t`>x{7lS}{p75+H#`t*)}&w~ik1*QKn zKzkQvyTIzs^JfDh+Yw{__scbBz_{XH{;4e_TZoJ_6Nf3GfuIelpa?FJ7!y62b^O~j z-UxcvQ;_X(Tyv&j^&bOD$8~?d3@<&ioz8|y;ot3sZ1ZXOSO0!X7~B7PCuEzA33U43 z;{gYeu=#tuJ4oc|{vQ9c?d|Vhk^TE|^^Y)q5&x{bAW%!_QiPc0GbNONEOstlm>!DJ zD7d|0h7kE8+qL!g6-DlO#zRVi-9_W@<3jNaq45N_@lo_ULMj9T7n^1%G0;~fiii!1 zW5i+Hm5^i5T*NX1K9<37bK*?akV$yJ`WkJ`GN5a^L<$xQ$XXgREG(bol(6InzfjL<1>8~f< zu&x@nV#p0*|7}FTc6f9EpC|{RkgZ#{VpI_fdXyQTo-}4NZD%KNl~_Qx*H#iAEBFYw zZ~S{l>INnhh#XB=P3jFo@+=3yHt#U+f7$0h|O zr4hofAo@IxhaLGk5w@lBJ zLHBM1HmrAzo}8?#59Z!9tXlCTtDFr<`NVGZ z&7H*vcGz%Ba$vV5v6N5teF*7V4bu>WFJj!+KUTLJio?4xn|kd|BswsOt&s!G22>8wt*y&$ksD?W&7>f08ZUg~n z2siAl!#J;rvj@}b3f1yXU6BNd8M!tEWPQVJzHPyoTRaZW}FkFt!?Lz8OE4owR8w zDW~`#o6(L_@PpJQ;$=$d!ph2e(7|c4x01MTY}G(-q!W8G{+}H=gmFNWQNMpLj8{~M zfe5|W$48I;TA8V=C z9J-E>P%5*Uge0Fu|f%mwt;pPg__p({Lt72m;^Z zTNuHFe`3O^COMe_Tz^tRf`L^;(L9f1pS@QzK2XEv>qGP`4@_eiBTmgqsI{ zbEs_$Alw5fZmc)fQ8M^+dC3lkUFm1%vbE3m};IyoLS+FqQ=d)8(>R6Lk%N>Qr$@(!NiUv z<}q^F4TQuJRQUcrw_zjeNX{O3iY)UKPino0J%VeYRBJLiRLs=^I31iN^YAB;vc^ zfJt?b!$|%Ofu_aO5&ScCqR^}`f-cJjVv#DI0bDkJcC;g&(6$oz_2WKJ?lUmZ)4QI8 zK&_B4GypAZt`1^DKKw{=>~%~nR5K_KB0O9hfdIgS0C@T{%rYMna`i9sFv~ zZ{NPD?o)$gaWfnrzKQpHh(S)gVFI`EHixV8=6zy9KqFRyr7)|(F#wXv1kDllh%B3a zKi2eL{}hrsG_mvGCj89uhZv!d(t66rs)nQOI4mE5`AX?5hj|Oarz5;_;`sy4nf$g# z^tTw&Ouu64NdWw)I8EyDWYaP6MLMu-w!Y82Ub2x;6h*R@@Q{t$(SgbY1P91}QZav? z7EzvqgC|ko*VmmhRdqyb>dI2_N<3U>!o|>Xn0SZ^+S=Ra`|t1MPk>6#RYEL_*a7=P z%v(c*CZSUn2whlEpo6=*AxfKyRU3Mo4TO6{g4$3L@NTJY%s_JzW*1y&(D%znPzck8 zc0?)@`xXTyCiovp8YG37DP3{BixW_4Cy-{V-YrNsLA3%a4lo|mH9MlCkD{hQ2KI;o z+)+rQ8Sh6Kk5?PP`@8mrOyl+^T`UQWsAq)?(N^3y$HDvQIydFwTzq|ef;(N&%Y|Bq z2@ljKVA?e#rxzvRiVL6D5bA}583K>Wz(R|7BD>5^l|A)+OMFR|ABI#qQ6Y=nCmn88 zmrh6Dhrkg+5fT;;5w1JCCZ`^wiv%ar7>^u!m{tiThJ@9I_=6d`GBrpo3K#fVp+S}a z72oUzZV_U%9#%)U74ZqXyispLC>mN0)VAzXQg2||6OI)NG2Ujrxy~4bb~Boe@iIUH z9q#)@ur@F7)xv`$1j{z?Wvu2+qy7y1UEYCOPAWk`K?J`Q+t=RVn+LNj`o!bn%{f>>XBBn=$~S@C?0IVz;?*{xQAZQg+NHE~s|$7W+9tn>Hy*ERU_v;VI= zk^dijTz0}$Xx=LzYTQhSAY;=#fpLpE_jHt{x!uW~vnzy%vDa$gSz;I6pVA~UD_xiC z6@3;=NKx$G?cplart=3tuSDqRVMHTRn2g{r;%V(z^ieXsw%rLE$Z&VDvv)keFye3}|GO_(dA8_)BzLjtr)l zO8@aj+dUGS>-}Y39WW4vNCJ_D=GbM|G2mFO5UoX{k|7?N>f6!V#J%h}CY7Q>v3qwE zktqVW)d#q&zhRvkMqYxdN8{aPmad{IyY+9aE~wi`o<;8a-_kGy^p@Tb!79R(6Y-#o zLIe`M242p;gs}ga-;6-nO)egV#?(iy#!M!dk7l_AJAnP&%mphRWfj{=l)opJU_3I3 zn|&a!`(b2eV<()UD)>YHW+5eWtP8r@b4=CTp8%785TQOq6dB!1|5?>&$0_be-9=%- zR=j$gRUV<;?gfl4gut>5yCG#m>}|$IDO~T*Y~QzcFQKxATCkOs?_IX)3M~ue6Lr`3 z%G|ujTdPWb?(jDg?w!Y_&T$w98>*c_WAD&G$^{>mJeR z)#{e3T)2AS+RF0B$I*8ZzO0!q(n_D=8AvIdYp>(E-|SRtx1XFmNuK$r>@G@|w}`uP z+fLGxDJHkn?tc8}&(XU>+|I3Y4Fv@S^i1DW`Jddfb?e${*-ZE^y>J<;XQqxFJ7ziY z?NU_$vjjMnmP-ytkNQ}@%FA1GbFx=VJn(d?Y%Z*U-^cMGCLbc+jQ12V`wMXaVNp@J zXyGj<6jLf5?WUtVck}QLD$ZCaJ3{Ow=)r^a8+RPEQW~g{vh7IjfHu8eltuM*|1R~DSULq=A{B_@19{`<%3+)bN= zE?4kvgFW!)py&E%(%*;A;;0kqbsD%fPA2@G+=cuh+6*}@pMT=C?9a}L5*nq_VmzUg zRt?dkQRHf`E>sG1+>_pY zAFeksp0nJTlX_yuo3_j@6^n|_&#_yq2CtQqU7}YSG|Ieu!Fs`JVNu;r##U88K;YuC zia_(3?H4azOi<1^nwFO4$E+3rYf|6tb?J?-FdCxazwAA&{>-Q)DdXB(2n?|(zqo^M zWD>3(L4{w5#{srRf0x1rY};>Py9TF?Vo>{?P;f)WX8)_aPGc+=2e`}*A(i>2q~tzg z3;`z;lE|(WI{|gS4R50Mhw`1s$iv~t|6~myIY>>6Sc8UWcqT3|0Pgu7lh&gg_avv~ z_qcz59Rfj;ZTwK9#z};sHR)3B26KnHf8EdBgA6)q3z9$j30L!d*8#pz0+b8 zr&H;(or==~3sCH+Z&;n_?IN#{wsdKG$CL62Zy*jMcKPz1rhLYGp!NIlEpPPQPq6B3_c*YwWK(Fa{1#0czM?#0O<(i8g-3TZoakU0>0xE8iaz zVG7p!6hfMN_U{+L>unlk(=OYNxA7^mLkx?q>;~#I+3Gr!E{jb0r|@Nh<`OE${kz&*|U?3)3p6P zJX%{Q%RGMa1Tia4cqHdP*)iX{fByiV9Xr&f&vA3Z`I7ztb6^5yO9uRRYyfqzwf%Ee z;1&^ImBN{mxN>Ft%AE5`IUG~`SEezuh+jX8x(xoGU)J6J{_%pKAX!&|wKP?a0>alD z@p@Ph#hM;&aWAK_KUFf90e5CEvnqSjt-5Dt@5{!CXP_b$fhz_gSBfm41Rn8YH@9d+ z(0CCP!kgHA#`h)Mg#mb=H-I@LM3G)2^z#eZ;h_bMLOYI4{_wc?m;qTqY|Mqp-4+fz}?;|e%3exzVXV12IcV6!H-IcyK zw6%AY#qpAJ9Wx*0tKgYJmU8%j7r?Niu|*PQ{+%Lw&1 zuMxo&3F9F(&P*T@S5D>uk4M+$38orloVY)05!7?Dau9Urw1F?x6rO9;Qe}gHX;UeT z7QeIzIF|?RWWX7=8s(xvZ$<;6y4r|8A1FinVTSI9{p-%0R)2+(yXz^av+1J5V9~R& zvwJ=&pkI3^#%AJM8D7N>G@DgaTt-Hi(v_UL>ct9*s7rtR_;Ci_JZtCYB!!bb$XQ9X zwI;~W^}v~|WABe}TBtm(@=~XL3RNZt?(C+f4r{<d}(YM%+2%c=EM7vW@jU4sukT z**=xMbNluyxl0&Wl3bsuG#p2Z;W%dHsATHl+9C0fl>;M>io>Y+vi&krs+5RF2;~=H z8tStfw|-As0ERwg0^zIYBe^m$B1D?hJFUCY_gH*~K4zp#OiauKhBWcGk4c4|LBz-d zyYiKCryhI%&`^_C1qDRrE$pgb>Ok>EgP zI=ZG@T3!Q6x#yH3>6$KIhYZK$QU>?Mc-2YuxV!au2+6Zjt>j&j?f&k%(AIALYWeD} zKSm7)-=#4M`?-tvH(ERWYJ-++$G!twmY1!aK95X~b_C)s<+69> zyM=x8b<2;J%&e?F-rl>26%#p{M)Che*METJ+=l<-c#DiQh>8X&np&it5mFRFp`@gt zy-U+*P)eatNm^D*doPu=OPbnKQ)y}aUhmTL`FxK5_kSP9!||Z|e&5%1Ue|S=uW??r z5L`Tidg159EH+Yb3&m*2Xa~{73mGQ~+?X4NNn#pp4?9kYAb&QUVf*VlcZ|;XrH0%} zN$=ty%+?drnytlBtN2b)b6s1BL5z5ozQY`<-G-CLl@I*mnS^2#l!K8=Ni zh5O3qp0mG62itP1tIrWQxDTuJH!N~_1%-mDDglk$WW$Eyni`>z^!nSk08tuLnNFow zRL+>={fRvoLn;$fAxMQwB*d)-Xk}z%m~t-v+jA_~B&;`$voC!$a9&KLFAK>3d@J5X zR!m~&L8o!5y&pbBcKgNY+nF#al}Nu)**}|Wn_oqG?88w)RVd$Jv-rw& zJI}=q42T}&5qz3C!2nh7ZrrvnUbkXra7c*A@MN)#SCRi<#`7jPs%0_FMTjb#N8%fXOB_SXE*JlN2FJBD{+QK88*a9EiX)F z0d9K6b^acxGZ8E?>%%Jk7h{D;8tP(t9WG(`>R9z={yRqI z$|ic_4W8Q1SxgE~UX>ea;}oLe8nky4z2%~JEJQr%;T%;}d)2WJiE$%_0pU%r4&A5P zr>v$tvik+?r^k2h=8m0hInlEe77_h*@vT{>WT^SvRD*6oqGoRJ6CQajR(^effm=fIDcT}br9(W!d4HGSK%B;z$?MrxmRPG_WG8*H6OLmMKMuw(1M z{uV+uIEWo9>U2Xn8Y7TA!Fa|l7B}pM83im@O<(APNhoN5IZ)vqRaQg^N%^6}rVRjY z->F1XhdNC&VNKyE=Pg?d2@cjrf&*M{znIty%v=KU_iL<_eN?$%SHK9pNYNd5JakkS zz4HIUoJka#SCjPpI3M6s6@!g9S;|?92ok#J%P4Qaf zF;|ln4BO+i3iPUwb7(TZSFk z78e&Wku7SfOFc~xXu+3?ipL5~v%Z1eR4fwDkoNxu&P&8Fu8@0)O)`&PuZ)1Z&%6|N z152C$8mdLc7R@`&EiL!*@p*$hR6))zB`s}$4+J@P1FB!C@jUV~uMR$(?5~PuQN@K! zD7R<94%mUNX-qa`zgfg8{Es1`K#`Gyq)qQNum7kT4OtQuqo6Bro(rh1ltLtmP>TJY zn!+x4fQn5f7<7Oq#@Q7I0xc`+>zLXUTWxH8_$y{*Wo6d@`Un~1XCEH6tKZGDtnEg8MjSxvFC_rZf6 zg#~v1RI%1O40P#%#5sqm;PauqLZ_bbJYSMwz*2jJ>Z&H$P&LD9@YuO?wD?v>ny0ZI zeuLbfz=_j2G*kw(Uk$Kv^IxCTj|$Iu78WL}h?-tgQ&VPn!4lGfWs;6xzh3nw5Jd=5 z+-JP1qft|giG?_tiHL`E`a+7)l{aL8ceo_PVD2@ z_$x2Br{c1yJ&y2$64Qz#2)$!?Aq8;0R+{TggbUr?lY+Q8=+}=JMKv`>Tw?PE$4irh zOy*d3$W93>F5;X(+82ohgOYgP&remut5bXmg9ZGu5M>SO_;olC`a#9L#jznSqV^A9 zO32){fA>N9t0@mQZNKq-*ZXIWe-;jZGc}V)rpWy*xW_wOTsJvxpHb9iEvZtzt0f0M zTmE7954_^Niy?5EpVXcM2dELUNauf#kIScyUq2tT_pmP))qj=5a&?Ipnh z;VzW_b|VXP-4V2n6%V(p=OL%`I!ZYfBDG^?ophI8*Oh{e64v*clf&dTa^a+`PdGUH zcO5&Wi@VD%<;3vj%OLoo46%aLE)Z5MQ@V(MWYxst9hsN6ax4t$uo}ya#lxd6pK_= zRZ|?@MtNH$3(8lO*%F<$-G&OaO>95vh(6&Sc8Z7M) zfVClvhyavuYb@TP9yr}sNb(SB-;({``NN#hzC3|cz7`bKe4ofQ+FakYqHXWBKO3WR zwiZ^G%BU@7)in!Te{jO!GqAM(xZ?Mlczxls^MsJrq zv*(AARRM|8h3&2y^XeP4sc+4BeQ@Tq)2)~1y55?T|49CBWJL5o5R&*U;`CX!pP5?q z3imHf-n^3do;D%yV{20T#&6^N%B0n%RlZW;528IA{VpE04`3@57~do`p?x zA1{b)SKXkx)j)|pF2m?e-~Ekg+pW74niLkCKW#9TldW`cPP7|8n&^Amn77%Mmi zVUwezvula+h>76wSRd(o1E6~&;LM8-;yn< zST^ByPvgEbiN<~9hQ`M|`kLP-8Ux2e8ZaC%4}&rKPlXQsZrzM;qsF(RGE~;IL5zSH zi?sJhNHm)WNkL*jv2ID|@>X4gyDhC(zApN+uPbwx^k+zkG z%y49@xQHN~AM+ak;P&OnpcsJ$o12@Xj_>;>F3h7j)J?v$lx~qBIRWr)DqW4-;I#H%-@JN~cz zwym8Z$HTYMl`?_6pf`(Fip;IzMXq9IX_;utIP=WQYctfK3AT)45)!&h9$Jh@B3lbR zC!%4zQjfhhbS$>U@Trq(uBnpJUuDtN=jjafvb%2fi$3sxeS&#+GDm-3#S?$B-bb9( zu>-o-EnXfE`Vx83LV0O_=^R(mJeic#u=FN6KlLyyq;;)>b%SG;Wp7V-ab5nTcJ(u# zgv3s-q5{+jekoPRfCt)hWuN3^jQ~7klC6alAEgxTBniG%c!=R6BjQyzT z9~@i`Se5(oCq~%5dH}iTAs>EODgW5Zs}Pwevb89~(8H#zxZJ!KoBiSE4%s^!P!1SW zzaa%ck1G>I3THUNFC_F=h2nbwtROuCcHllGO%xIqFh#&owm_B1p;Mq zduDR-CQvsfR?E>=@xN0;9)W=jsLqIl4Vgn^OFuwI6j|;-xmDt>OT*OgB4q?LniNn; zcMA3H_{ zUajv5a;kFtq@~9?^5(^YEa=rNtWn4yC7*A9LJZ#YJ3xFyvW#aDH_)^WVuF(3ot);|TrQ;eF9 z0Waga^nw2M>(@!Q>mR*)_s+`c8s82Gl|G(X<}Y2%W>G*D`?7`pP)0Vig4U<#{c*a6 z^xHkoV|DAMr>dzLUz{2LX)DP2Ju9kS@7#GQsW%#uvkToEJi+Zp-&r%yzbg(+U@drzy-;atc-)%Dkfo|W}$FBIC=Xt<}%;q0R z+=6@v!9SC@1qXp~=qBRM&J{p0#g|1gfPjlwByDIJ8RfyKL%haEQsTq1S5WW?j`0Mi zc^e|F%~|?uhy)|Vs3ZrlA_>xS_X=G_2j~4rqfeqJK#5OaCPdOn&wYWTZczZ3HnP?{ zBEd&qDt_k=7smc}^E9A}P$DNqOeH{)1Ca5vdiQq_KWNTB0m*HIA>LyQq$jH8f|soG z(B_RJm!aFX4QMCJXGi7F{;ysspqiKi%?Psz4S_kB=FIJr1+<>x6=u$xjtA(2E%a6YgH+SP+S1Aw` z1|`H@Ragp#7EmejT}vS_$)K$kOyXu8Bq+8xQ4|6I{f)I<7pceq6j*@BhY1n@f?zNQ zD$PPDBzwU|@@Zr|13bI1xM%PfKgjjmlyYM3pf7hXk!bT+Rj4RTbct z+7a8unTm#n!(|Qs*vC=L{>r26In}wcRE#8=2M!uGvwq&*^~?0HW|rV<@#touq$!yX zi{qjJb4BVq0l$@{_cFC zJZ7O~|G|Ubxh_O_`uY3AjHX3E$+P2yG<8$gutV+@6BGMs55y+0clL7Jbz_oZ0&c|~ z)uKyM4A=(`|P0!wK)O3;&#)d@egY|FAg zi4D>FflFIfW20I)XhVazCgR=lu<);Cq!D>bY^ZnJ(_OxYs0zhwM}tWjA!)BX7=@^1 zf=)-tK?Z<})lL2S56^!iTB4=LTGJD8<3m-nvpo3X}*Vly7GkZlcHq_ItmI#oT5te>HzSGVP%AWR}16?D62xe z(h%GkIjnXaq&K0hwQU_kwGME8io8Cwhc5Y@kaKdu~_3O}*A(9G&! z+|TX3SvB5M#ph&1@&t2q;(u?;_CCy{w~*RY&TlVWt;@8Dvo8=z1?w=lY zTfb#=VHVAmO;dS=w=OIurkZ}djOLY|H{g-`bS>3~SY{Z<&$tA(^0l-gK(+UmW15Xi z%oh2$kWYXJ#@FXS)hGncgsRLsSKnbE%)`^yw;$L2zD4w;I^|>vInJ69yd3bBlLTfC zVlN;u4O3AL!^V^8*4xQZkNHJK1Hf8U<2Hn(5g#9)gZkC3xN7#yJQWqym!_tdFrJU& zZt>qxsS@ZO-X_sI8-dlt(#P-S?mhu|v)P}4^N0^QXb*r0HOVZ!)O+2Dy;>Kb1Xcu= zhb&=&(#=?gSXdck-amp6$#e~f2djDYWf`@YzBu%W|$HG z&#`Y`+d+|({4JUVz$_C1Gg0YHvO>#ZJPf7DX+weYZz%wWrS%$~OSd3y40$!(+S0NM zal_2aT$`-1m}S(S69OU*=l{#tP;AKN*yb1O}=i#3su{4&MImiFiLBt1b>eq*6s$`uXVmE5D(3UXU0h07Qbc97&LI+#ohu zIXMC^MWChG{bt9$U6ItjYnlJF_BC81|HA5oP0v+U& zm?(k+B?6Q?@z6*Ev3{N}R>pP@4+oE?7ipIY58CcsqaMJDxA=75zA*4N$7EO!ND&1E zMZDc_ZG>0x?Ct?nFgeU}8k842hf;{#ne#fChzd!oZG6z_? z_RnL(S?kfZsjpsD^A?;$h-6}ALP_+%>Xw>mB+_SSA%soaqyUtrS@doKHC$@749%$} zs7HXH1ps+)MzY{=?uI;9zNPbvW{U&GA3JR zcLGGIi@A2V&1HEpNE`ltVi#_?nREVR=;Kd0n*p$?#r&}1jEu|3a2^f$6Qsf9_9A5P`-{XH?EjnxY?`V(9Y zG(Lu|MH;mtya3AHV6lN~U=w^AhK(v5*zOh9{;{zt$QX_xdPUCg$O3~Glax$EGMaRy z1n0(0_^WIe_PT!37I$6{11o$HOT6Uo6vhT3_a6LrCTJvGuZk6bfJ<2eWjPxe5meX< zY5-v(yu*v1c)yrM~B~CI7FZ zz=p3`cV2kPAMG(4(vXw0IEerRmfwpcG8!wGB+01BA0^2(bb*Ji4f_jjP3gl5#OA3x z6ikSmV4c`cD1{tmMtcFebAtN;q#IG8fYh0v5L&@!aub7sp`|@_swAi~7s_d<`Uu)s zSgL_oW0>>}8#i{MVj?JwGr?j)KV=Bv5o9+6-U?{rAg6|X{qVN|X|N~Zn3*i7im?k5j3$zxkkK@t;tBAR z6%-aWrJAx4N*(~&sZL8^1LJSk^Ar>m*r5tYY@Gq9+=VZ83wiNyTXqoY<)1%>uE$(G z6d<5k2^IpLR4P;zy~x^ArA)w;5GggPfMC>n$dO3#2}}p|X%Vw1Y92tTJBjEDLNk~I z!-fNd3dkx_$5H{YW$@z@)6t_xi9{bd*GbgWgr36#;&T!!)aR-Yy#Unqs7NAwB=Y!7^J!^a{|%X0W#UGv}Fko|o(0Y<}MiNeC|X zs?SuIygkf#d)m@&fPGaSdZF3eof~gVDeQZYpqA}Fw&*G+Hz5W9ZsG-)at!wK z6zc0Hyq9n^@K*BO;O6Ki2( zql=no_-7_Tt*q?Ly62)IA_O7caCCGW`2KzyD6M{2b;^FI2@}SPNoa^fKyVOTFH&Ui z*l%!>Ut!ftP_u)}qAABoa&gq`k*V%jDcW%#x3Cb#9nc++d8Q8@V*G#56~pPnA1=r2 zgxMk*UnuoVGqy^>+uyk$zd-*{PGm>rucakh`r&UlxU{r{DxpUS0zP#5` z_^=5Pe5?37G~^A#vf$tGiJGTcPVn3oVn??LgU2>MbvgzHkLc+AXgy(PXBW|8gVLIsMMCa5&n1(Wtr^eXap_m* zsnh&XD(hBg^LzCQ?iCsWt0AN;wqof}4-rcOlyDxj%EK%^t!_3W&F3?1j0qW?gNJ{( z#pL{pr)#-*PbDV@1k<{u&76u@S8qHWcvx7)x&@wC4&T`6rk#&~ofTLVhTnpJAsURCSG# zN$P0%$qh|S1$HxR5KpoT{UoiBE@1^CX;1seRAqnS=%Ngi(Rk4xq@=liEl7l&Z0okL ztLYSfpS3A4+}z3UQ&DiJSa7I`dZ)=g3B&KM*Z59xu(R8qI<4$SrvK|AqokT3CmeZq8pl$_O-H-eT4tbPAg$Sn6+ac8pZ-9ppidv`!?*X=Gkz`IkPLOJ#X$eH; zNXusW;$eRxx_zux<-7o7? z@RO{6%oD0Bmww6W^pf{qy@t*X^_>#7?s=wfj#F}7F0b_S^89)U7<13fSMMOB7aVMN zSqWReOxsQ}K24K-;(>g3yG!y5rC+)}aDZkW9rC;V2Y`s`}gl> zA7`NRgPeogpGI6lg6zq&XAfX}3kj#A^K%%%2$CQvXc6DK`qm4*u#Q*ssV5ez$0h1E zsr_ZpJQesrz;2-6La?&kk!YpyQ4d;Ll1POnNh3PC()Gm7xd$SBqOtllsUmxjGf)W7m6%z zfHIAeRIA@XDnF&0{$|OYn(QX-PnQLZL=)BVJ$2`rXYIIDyVx}pjJwFI43fMS*Udat zs!rHT;VY_V-S&bXS39hJ$Ty^1I12opJ<}OGCnwj#iv9Tq6&n8H#VsftRAi9&@~|s; zkTUsoZx=S3Y%+%#=uwZ>Z7N~BWU*1>fKvXeuL8@v)%F_?no?gC#=GQb!~PQw z8foSuxOj~B(M+2W0wpyaYQ()65KKIL{J5yFaBm>{f6jx_%(n%~HE*6W_-RFt(~K(^ zjASfs*+|Rr8@sJWskZYp8ZxrmL=RP5pal;o ze*iaz1==G^cqw$#JsV!6f3H#(rJzJHfn_K6aWjDc{`=!*joF!I%?H-&9tze8ZaU5~ zea>U?_2U<|Z^hHleAkSQF1>*em7Ks)4*_aHo7T#jCw}SSe)&9Roh3`<*`RW#l(^sP z-}FBD6%lbyuao1dL&^BPI@Ry;ZD-9Zx4Fqyjc?Z!rf_*n!gV!4I4vc;1XK#01EKxj zMgXg*UgS=m)J|*ym&1xQV{jS_C-JAuvd+OMet(zl4@Jd(bDn~4ZB6+rH`)Yi5331e z9F}y$)p{|D^QI!5;tS~7gv>Mj;=C1__6aLq+tc2_*~8yMqR`4*)T4qovIw z7;*q*BeV%?;~?L5i-?Tl*UCR9%euI{q|tpLB1ywa;kE9z1Y0D7p+UhuDrUR8Rdp3z zR-dkK*}GwoI~O-C&mo5-ys-MJr07@u{~ zFY~bF2OZePZEa@REOh=mWgbyV>x!n_eio}~_@0+y+r8xP726n~^Y6f8^iV^~OUJ}i z`tE#qZ8OjWLN^7Wc2>Adi7#6f{9%?na%;il=kQJUe!|T#hnYX8tM;C{(I)jmdU8_- z)e&~LILX=q1BGsj1+K`~)AysQjwatemnO(3PJPqWeC)^#Cd-*!!XGYc3LLvfsu`!w z9D;DNCVWA6aR5M@bT2j4Hp2#310Ed6RXD5U5_Kzthb@6}L8ik# zVvo`d0{pbaw0I11y%RRzd)zzt3Em{>63k>qFQlEdWH_M~Z z)cjWF`Hf#$RvK58M+EiEMSrbZ%3%s^xP2(v?~$kc-6#;?2jK2Mkuqa#ZCzAfe~1B_ z=-OhsQd1M}a5y5W;BV+t(hYV=jL|pq>p7pN)Guk<)M*Z6>1|~-cZp~}HXG!(E)B72?o-r;!+_Vr+5?)JfQ zu7F~r_O&mg5Sc3^x5AJf5j}edbr@k2K#D`Ko9M4-YC4uF5%;a-zafyufm_)lWkHb= z!%calL6!B3w$Qs@)_0@7E&ARy552HFc0x>@hhn7pXsBM8l>yh4t(Qv*8uB{!R2P4` zaXHCm#9KM)b26r(@SmAX&cr4;%hkCl~mn)<)#9aS+mr)8x+zMg~5wu!TD@w)Yk zuMhgEj|Cp6j@puz9@%2Dxjp~8k|op5ADazb6@EPG;e?!K-Cf-~|2?W_tA~;33yJOS z)Q9$8zCJc^UM-Dw2cIhtu_*~dvE|aepCYy!rpar$PjIg8#Aeuwj`i!@k@=`9yvKW8 z+-SNuAF=}~s2%)EkPxjcq#la@34`(JX?C7(XDkk=k+h`VPfR>I^vgx$y^`2Z>N8On zS}T)ohQFh6l|64TWLf+9{Ez)fSNDj`_ECNG^!zGK$Up3#ecQ73wqJiXia7~XP}WDN zwpcEhnwl>y?RvERY?NZ;Pb=jYC-TCoekZBBo_zeqQcu5?F)J&t#_t_xiGkq0CtuAK z$7%R(+UO_qgmL>T58g>!$+-TD8jH}`>1(rt`(Y&!i_euH`Ph(IU3*iSMcp~7#ZB|w z>cUGQwJKoQf)1~O%p-fNWqr{nSJ{Lot zaCHv{haTmA#F1ALj&|0_okVxhmDf7ui?iH2ttQxohv}C{`}oSz#MII!nvE*MPvzH( zyZ!jlK4@jm8lvo?Xzf7`{ptM=m+~vNe0#o^r}j;LoGD#XxADTRSBWwPwG$7I1g1?0fTT_B<~(in9MDarJY+Q@cqj z&EK5I$M*e;Z-hSQ>)wYrXy9D@Rgw0wawfIU zU(Q&$^B?k5M&HmydTEuu8c4LwTHF+)ZD^ZxygSM)zCb}{JHglN83sV7xzI8fbdtY1qJY?5|x zURk^D&TY#42RDqrZ(eX3tFOkRPDuZsNA4zeYo*YAoJC%bcP(>Yn>XZfm=QnsRp=pg zrQwTL5*`OkJ|?8ZY=*QZjs9C;)er7ZIoBUhkmp=aZB@nF#Ue!hx3Z{9{(As2cyjg?QEg-Dp*PVG{@8RGm z>4I8|o9n;LcgO#()*B_6|3$bT+5SV@ch-Ay-p~AT&u`fL{-9N3@#K3&{js!H8Qd-M1#bGkeoHSVOdK{*r+A~` zniz6wH-_SWPbK|scT$JxBX(kZ*X&*%+7+DUSoH8Sb;(9~jc@uT&yx(dD!H*2A=XZL6NKko+u!>E~} zQ}KeDsO(*Sp%a2;Z$#(q9+*i4v?-P4v8P$zUT1vjZlG$)-{ik{=(bYL>Y5UzZo2K- zbjAK3nd7OoQ&aKZBY!b5s@ebIuL9Na{qr}bCM(V07TfJ;z|$$Kd85Ag*cT0(ND94y zJPWec!_{Gzw)SSuAD^E-m&~KaH)BW7&OW_uB^aV(50?@05If>&Z`7JlIZ6Yv!TVgdYsHa6=Y>Ws*ml))+FB$ z6uBEh*R=_!D`n88-p;-dqxhieaewNIF7?yaZ#PA6xWTmSG)`iiu{}T0aI^l>dU_t6 zF*D3Z4=b|6wI`9A2(AwuyQR{|G~N6tfxr07jcGO&w`1xNbPlt=^UHQ`=TEQ>YtSeL zoq8j;QSuZ~9!*C?ME5|arQQjWiZCPsuT&V2)m@wF(OvGO^V9in(;uF(OJv;t&c042 zkh@&s>iMp3C+@in7(kLq*4Th0IG=W49D2?T4|?x|ubulzSvo*QU5!*_3*l!mXKf#Pg~}@DrhGJ$0uA z^5xT3S7U5g?qeDB@I#*ZIt8j9^k!8=`wEfPHl(D483FF;0wBI>sU{)+qbRla*2=34 ziPzWS@zUKto~tp+C|3PKdZC3oI5wuT^Kc%6V(RsdUTaHAA)!8}=R6%_mZtZy4f#0| z?R>QmB0ajR>c*`rtpYr<3)dngoj>pZIjt=Dyv$g3EaQi?LdB^etwAOVnZkVS1lz9&28+lh35eOwPK+qW zfgJA!Z37mZu)Ptca6(=(Mr--Z*&eM6R$lfsZkyvB#rdx4$LnYb@2?b7X%z~nu;4cOR58a!ScLOO?(Qp^F%d5qNSxpfXfJ+6{!-mKrP}D|K`MR zz9Ikq^0OF?7YcF|92(|%B?hwO{{1!e()ap)Ds+ASC`s|)-hn70dtgC4&z%Hc)R<7rhgC>`Sj?bo@hV&{Vlt8a~2(2EN~dS^ZQhW1Ye|9S%KJ} zVbY(O$9um`Q(uvj8+{UcSaDT89w~clb)MM$>wx}MyaM`npOT;6BIB4tptV(|SkRHw z4c~0v>jlJYp?a{DbC~5bRwapsX~4ZBy~do%U~^{(Q!v=F%-$T-a$O+#00{T8%B@+V zwKYX|oe9}=+~Z#Tt2yWWN_n-Bx^G3KT6fE|9h)oA8+`SbS7Yv_DChJ!v3Gm_Y`B-g zc0BCZj1d*_T;eBAE0@+mZgm_EIT+3aumOcb@nL6gPx=H(Rp54Nxz5sXN%a8y=>zqZ zZ29Hi7h<1ruULPxg@S zD}&7g`i$1>Zu*I3CS-9hMTu% zWn13SM{p_s+m0UfK;y7O2&M`6O6}2mh)GySKE=15ZO>}V`pqAg_q^gRDw3byC)41y zN%q8jrR#F;AqBE|>c9GHImc(R3);{aV*1dx-%d+$F5dW*M!MFYW6mV_l(CiakH?hP zUlA>Gn1r+`AO$O~t9yzzFV$#O0sV`?7?G7)3jC{gQP*itXrjtT+6T*IQPZW=Sgh_I(j+jt1Zd zI!H{R)N!N@>Yu^>LGFAON(YjG)^rgtwcQZ%q+06yixCR}4b2?UeZ^UjMlUSJjof4u zG~B_baXTPInpuU;RVMCIQK`^&wyK*8AN3g*54gzlYsZtE|Bf`W_SK=;#4WCyM`K)B zXH3k9_p^V1gXpDNrVT$7f4hZ+x1zNJG(|9=D5NUu>vQ2xJ81iP@jh&BnxYZ|&9}=|Nko2444H`Yom>pbsCJ*A7&?Dmw8u?1Wz6HAtOaAXhEpgAd;R%Q+Kp~NMd2%@o)OGzUtn`nN+uL*71TC$;oU@X4i?{ z{04^W_P?YebC{HiCMCHG%~Lw7L>!9(sq#a)NSHrf^hni>o-*W5zTXjdJbrp)9cj*t z^Np2f@$`S~KL&kEDA?ob&$kqz(HEp5he5cze-N_+jJ48g9(}3p(5w-5T3QmA2erWL zJECuP*6-q7&-ky2{w%LVb`x1aRso=u9b#L9D%iX&ZXj!1dHJ^x^?bd0=4A1dxG$?4 zDS7#n*xAv+l;tS~Dy0Q)Aw~DQMP!U5>e;oO$``K=UcD-+VEbv!$HUCxi&ZA?+(R_# z`8))HIl+*EK;Fj4=vkplegkbDy_v-)UKjg6dnTu&!-2C;+0fF=>t-1V{0SXdmJ)Yt4j82w?;{Jh~+s`^RrBE42y*5GTIoJ%H zCm}T!UM6n_^&!~l0DkmS(VY$TG*;uc?&&fkiGO2lgH zb@o;f=vSa;>a&nro9}G_0*4z0q-?2E4jgu&|2Z@Df!;>3H|&wig!a@GD_KFN;tZ#uiY^3}og_bwmf82045#x+Rqdb%?4fF*l&;p&A!UAybn zICmUg^YKAs54t@Vk}{2XxL3JUqRKUGr^ z>RXi>e1DV;A0~}32#Cio%qV=fv5KDeB(09z3jeJlvVN^6 zP+{zw{Ma(*PQn8)3xQYNYl!->JI_J7E z_U+ko7h;26yEX2}!k5_Y6AV?Pa+msw{5j*TdDZyVpXCYZNER*|@SkOJJ{=UlU@89} zM)R0|SERMLHIFxoq0E8%&%-E0ti7Li-z-n*xN-Iufs>_Q!xhC;4wQTRH4+jM9KpK% zlV3vAPV~0=;r*+kH4?K&rS}BQQ=k0*Ya%AB=Fx4VjuzVxX?-a!?&|C`S9SVVThVi* z@|<@FU$?ouCE=#ojZe04?K&$2;$4N~-b6 z@rx_n7rHCwbM2l>hiqA!T8zQ|)TYm`E9`&9A_~&kS52hIwB}zCnB}vj4gcD-wZ0^z z3nW0m+GQ70A~+h;x1|`}tq7`^hZI;oDAhi3|K`OnsqK=-=BJswKmA%q05V+A$=caMmr`A&&EKs?ps6 zefcvK`}e=QR?-p1Bg!f}yM;!*_4Ib?&-d?06{u{7yhxOwLx zov!Zpd{XpLz!q$&$EC7fDVQX!cO?~{^IOT5iMYoea1SCidQTV`S6@1nf;WaP1JRze znH8kB$WA$+XKWL{OY?fK)17$N0`-DL$4yqZsZ6uiu}B>G-8Cr5uS!ia^qz%a_Rb58 zH>`XD`AE$R>l*HKPv6&X7H_`TiOX7t`bZ@sY)GatoXw6&tID)HOouK1_)diRQ$1VqCf* z0zi8g1|ScR3VS6cT4szuHkPWQBR=%^kn{{1^IH7te|KTG+zG5s-S<~rToOlqc~`Qz zJP36W)w8-|e>Cp(dZ)#uH}x;m#bP$xZ+E^ARAcId(@xqG97_#9dX1XtMNP71$YJj* zB}bLIi40Ie`z(Y~_K;|DU3^Cian={@gb3qAH9oWxsP?Jpm0G0xCwjQY!LvZWMZ!Me z&@erBQTFU$^scQTq#OSv$41xJjG0|!qK;O+pDCGSyqQH32P!QajjM5)=9$HTt;aQO z3Y*_$*~9AybF)b|K0jgqO0|MQf~Z{!VQ#a>l^kYfY0JDKL%vT|=dpDR!19cV<;nhL zHe<*1>PLEhykCkXRgyf9?$6D&ugrK7;1Jc3>HB(kz{JBG`|Aur{6`MWJ)k_=$|o;-oAt_+!6=6Ox63ycGx3_1oUQ%>&N9^rf{Ii{@kdY^Q4|{ zWwv;-Z*^H{TA#OW`7-@uZ?F>xuu+AT<^tDSg>w=S>P`9 zjNv?T6PHQ-+A6D?tTdS=9j&p?)A#9RriR_F5BeJ)(0p{fVUODU^gWVx2O$t`I5d1$ z0kD+;zw&uRrmXQ$%wUe>`j4T9-AP+gmsL`x`^$`)&TpVpTO^@;tc-9XzsD^Ui$HRjXzPxW1?U1y#g|F?o zqb~R=jW(fZFz232%GOtzQ9iHPsK(Ii!{2uQZNO$L;eZMMTS6g=%I7{Au759{zusVSL1Wt5p8ym7K(k>yMnk+2P;=d`8CCrE9f>i|12bMmL8=w zUU2RtdPuyKv^{|8Ua^6df#DSTUJ&|J<&}Z~Z#UQIw>RGEVSa6B_m%Xv=MOj-J_Uzw zqx;Tn@l1l|dY7-)t+|<;u&n;WUX(%4*T1={_&khAg8i~k|CJub8=%2b#`{Hx^WkKv zq+aWE+9Uh$VY~?opGI&&t$pA_@h+de+){G%;USl+(|he5L*IGz&Sb+VLc0Z`4ENeS zcgU7%TFQMCD9QIs?^+4DuhvMjrA%>h8u-o(Hx+`Zu6qqfF)bSLq1+s%d$k&w@;PR+ zf_xgA6d5=YZYR4vkkk8iRjImE?OR&ca=4Rk0s2;v7@s=YSbl0CTQ<>`)%*7<# zZFlp8b%QR6tdIa4V-SB#NWqngGjoTR8&I~kq@OV3qKy1P?e{M&=s9bUC?3ejr}0Ft zoK%(hbl;w(68(Vc^gb;|w)YPs9S)DX?Bojwl1#W-w>9Xp$2N*M&%EYj(pmfV?wXZn zZGH7`Vs&~^0WiAbu(^ch_af*QWI(iI{>tuBXnhBE575yE^x|e>}}IsW?~?NFt)5n#KJc_M%e)WWMl1 z472>(D;r<(|D{mTe5f^`!}jTLdHk6(50VW52_I0<8e{VjyL!RyYp091(iQnP4VQnd zm%dTiXEZ;ROfdO)&FmpMpoae`+qL?55C6)zuU~ZX3}(9TA{(026s#D)`01*F*>8XH zTN+9=;s&H4SB^R4*4a(#lmE=Po9j}1%MkliL|zSnt{$vgOQJbw5}L|)<>knhN8B>g zH?UbO=@_T;5)Z07ii0bfdT$c)1_U4(SkTpH02slcGk$&tpY^`i z?CM=dN!Udt>K>iQ%G=qTX7r|$MAK4KujR(-kI@}PgNO;f=et%*EpEmODv-wP5O6=S zS>>U(#aD}E=h4X*Ozo4bCqOx~&ycSiZ6Y{Y;WpzzxBf8n%N%U33YGQE&6UM9IY}yO zP)BNi!8){?+`)gZXtTe#dGki)XbK6sVwF!(5<+SI^7LC6^G{`!7)pJ0Ci$>#mJE6y zA}G|AiFJh517{QzzMUOGz0E9XQ~uvN(rLd-zFgiH9D3%91Z6CTWJh;a=>E9!@Q;NT zUL95{O1E^-uU@CHh2qqyM+YD;dQ+J8W0q1z*G4C~*>?BJjM+cIUf$2I7QcJM9I!IP z#0FL99IKXY!HEJV<@nm;zs$5)Cu-zc9n(;;6{yd;D&3X^23}A-^7AB1g+_oRrNVXS|ZLRDJpJYq^6M~*`XM)c*QVq zciJV2AI-2p6XT>)^p8OJ91cA*(Si(7-X!_V;n-%yRrZvwxCNhE?(b;Y}xDg z?s~H~d#()>XlMM8a&vu;xW-7lBcDISTPRIvI9I zd-KfBmrK2|t&kZ1clp1tLyo@&Mxii`0YOes91*5!WN`8;3iG%^Kl&vj_rnu!2I~Z&h*q+U_3B}Rb;)K%&6Lb6 zBP_ARyn5;!#R5VM5+?FzI|I# z>@|<%8EJLy4);i@jdU^Rk^VvTXs%iE;>w1Q4&4_LBvkDvwUmfGHq%xBn(j^{W{mE4 zb8~aE3s<{h_(}cx605>DH%qg_Lo#7lne6~0&4o$}@7V1&g4{QD8HdK*&D7i#uQtr` zVdYs{_Yg@o;eR50nR<2xRb-K-aF#G-?t3!o6>#G$Ke_wSf+o4k3w8lTol1O5 zWr4YmPui(kvCs`tw*aF}?E-U5Wr6&?>*sB~a}MJv3ggmW6M`2U7Z{L4RO zrQzu+8tR!o)jMW%ho#a<@^#G(}NMZVaN> z6f}R`R((??bHvVyt*RHYbLO+RtSU|?sBm+-d-i?WbWI#)=wz@?xBP{9%jv5Br>Z&K zR+#6SL;Y93C-q}j47bmFz3$JATDPn5l(qXL_t@{9G@sdAZ!ZRQ&Hpq^XCwkZekt<5 zt%>M=Ky>LuKLL7ZJli^T6ye*#ti;8G;3wo7Acd{^UDyv6rT;X$RVF zbX?-6Ek5F59`l>VvAngjm~SA9$Af1Y=N zE}3v-0uSEdRaAg$5=@(6hJOYWo#=FjK8*!n#K$cDVZs&zwBBqqXI~wen;8Gb+cEH> zscrr7*?X#~(xf+w^ZpE;i+q&+USa$A%wf}iryjjNYyxw|$Qn;8De1!VN050ir;dX> zcnXOx+F^lqS~Q*teDZ|o=}b7j2J1gIoNehxZ$#KdUI^dX1Ug&`{aN758He8tCKv;f zg@pw)d(9ba1VVQnJv=0Ly<44O8NW;E%^y*M&f7p5XZcvkRmLf=Xx*>+`?l?F{h=OZ zfpIZ|b0wzHcEg+kx-J%vE}v(+pxbQM)n>=g=Ddvrje00jALU(b**``kru$WD;myW(pv;*-F-5)$V9?VXIM- zcICL^_K-en!>DmF^8_tS-H-;Ai-ToyO`cHXb`NnY*QSH$9*Wgb2 z6#hMyc#&?V=w6Y1ea(yMmhtw9;vX#+?^wntF))lzd=neAztp^3c{b#>?m1H4TN`w_ zemu^7(IT@zDsJu;^#0d{{~u|80afMNwSmIRRupjy$d)t+X(gpiBn9b4 zq@GX$(y@rcO8s>3klM`PEMvFcgdT#%G1pAa@ zKd%8y5JU|TDw9YHCd|qThXQ>Vp#Qx>-?6ZHuJvvggP!}$3<6w3AOSK+W6uubt7$`H zmnI*IVbI*zzI^)v>J44md-wHLo=U82d0*=$J~T7AE95$ZHrvlP)53C-CoGl}=P1 z;jZ1B&6;>1UFB}n*ucQ+N6WkHf0jb0cOTlFsSHG}wuU=>_{3{*h4nY4HMjstKcJI{0Rd9^ z=s@~oz+vWi(&N{GftaZXLGog|V~cK#Mm^@gS6U}V**d36>x`r;l9?*E7ZKJRofV^C z%|}o_Z!}5lq~iWj`Whoke5Mxjnh?eQbgB6_&P7r;WE?=!<4Edh&TH4+@TtynY9nG% zGSQ+m?nuuXC;?mVY`g%gw*lReCJ;0EM(nF(h7^1x^wo?;MI5{dqVrWKUEMAm>#|EY zpT|ck?!l-?yE~T!E;7p2l?MKGz2V`VEIHpO!4v4ods)EWG0a!bV0nQAwKiUav=XAb zKQ|81L;qg8GHd64^<5h^!e)y{ed&HWrLNsErcF4GZ>{wj5%+g>nQaf+8z~82s?Vmqr>yToI#%sChxBFGw-J z2l>f?fvXcY4s%ccxqo~;`%8vL9zxRSy7Lzi`J4LC#_^lSj)59h%#-V1)VLOzG-uJ@ zGCHT`x=D?iPf~dir9Y8ABKs&W^SZKpqwMm;rBeEXKSw)c*kt%5dYKNHxy_X4O zS5z<%!4OnwHa0du(&{I`DBVS9CTJW=fL8Lg^PDxvEDPYTB>el9w|m>Q^w$@r(XWAa zWS53PYnAmx7&va`VCKwQf27}pUU)LrlQn+Wnz5e&m`={qf>yLpA%o0tv?Tui8@ziZJ%YOtJ}Q29eCVFxpAZO^yt zWmT%jC_u$?vo~inKcoQ~IGonrRe7}*=xSeB8I$ZPp`hezh$THR+GDi;C2kA;L1bN} z-RT#%*9WHyw2KV}bc>!nP6cj2@8Y2M#|w!^@4u$H=&veQ{`@aCt<%u26R@2t%TbJ!CEOimX)3eU=V;`yBejaVyeee*2 zv%P*DCyErCe!7SwnJ|#!4xYuIt;_tT9m_eIaK~$00#L{VvbfJBjI>^H;TH{>S&0<_-w(lH1P%v9y6Xezrp!JC4D8 zFtRiH`=5?ULN&vG2avE@e&{|03u6Sd!T&0a5P+H`9iT7qS8kr`%AJJVlbEW3Ivzy2 z=qwq9sDNeQ)jQkyuTu@c21H3pBHN@-tDs4EdUJVb?%YSpukJ@6!oKO|I>BV^P|*mj zVaPEihPc-y&;Yy%Q{qA3>MhLRfplTu>bN&Rj$jHPkjp+a11D5$dFBQzLNc`T8_dlH z_cY40)wwrz&{Oymb;0#tNo#7VethgSdSTgfJd_jrzJ8)n@B8WJ-FU6g*2YHRA4YF) z={MA1*e@IaGd9t=z$WbyI_*Dq14xXp2kCF#Y#E5{Bh^qNWN6<-eQ7;9GKO1X8`e#F z+8SgBveVStSydY-i_jBcwY8*74bz{iy)kFs#ii;0?JsL|` zTow33jPegLhz)~Sfazh~a7+|EP6Y0cP5s$`{e1E2t@3h*7vIiO4EaAAf|)=Cx)prJ zjp5cz(l3#o`Sfn2APn%(5P>5pA#oajgK>!ezs{RpW+E&$f1#(j7*e)Swp;a`KdSG7 zAt=gKXU@0OF0`%f~H@P0(%3fg@4uQXu6r}J9^T=~5B(!KjY z^}M0TxU6p_ly-=?sS+LpDyU~jNG3tkCjW5#1e{7p4ClU`)ilvpdih*c!}aptZv?Qp zFZ8)d9EH-mPV2GUBrCkSZ1VRPV?WGTgb+pC=UDlx+@A$q1cCip?K3!XT*K?YgN<1t z*Fdq-Q@q5P+{l0-g`suX7ZO4nC+^GL(x$AMH&6UvSxnH}o zTgpnhF?x3l#&8cN7c1`niUSkAGbuJ9=|fiQy8quLGY{>UggwGAMaXF&NR6mhAyxDL ziahyu=$#1`Ka+y|1Tke;QS~fqIpxW@K1b-Dx2nL*+I6JpH;tV;_im!`YxgF{u`hs` zYiXjNS-Eoqf-U~lh$@+^?`(~I(GiQC;^d1rQp;VgQ0&hfLq)i zv?d>&>&~_JmLbG3HmyJ=wi$V(VU0Ell-m^VoU!xm}ij$qdq z!+)=<*!QgV(c9ZeUr$qea^}G7`U^im0Q$oq$^Xq*p8z!B>}^~>j9k$h-=RF1|Mi>4 z1UuJ7YU;{n64SPuX0r?ZXLa3(-=4aU<(qhcGsA8ln5bifdZ}v&%O)OZ`v|hwndp{G zxaU89;O=rxe5-2gO_co<9**o{0oy;)p$&Ed6dWK=4Y@^BR7(meXoDM-_4$c;X6rXz z%vW{U#UUA*%3+%VJqZEUg63RA#NTyi=@Bv@taT0Ksf8Xe5MD*M-CP$4-2NG#n0qWI zhqh>9R&1b#ZqOntnR6?PB;3Yt{QO`*$Ut?i1rR}ttgoi4=`o7Zy+i@=%d#JCqp7KL zjZToP*r7S$Dd96cpid&~6GYk(xO1Bjf;)n~DG<=dp+{LJQZN`qb(#l;3EN!3Tepi6V!y@TShj9A5oiHaI|bobih%;(!5Om<`#b@zDDav59O&l-hf z;ncW92TaRCy7Dv{F_9L%EZqmbEZ~s|kf-^PF@ptzt_UCnI_y_iSiC_K^b@o)_ySs$ zZBTm=i1DA5^g+G|FkGrCcbH@Y%2&BRBIMQc(=kKmPTw@P3t@AdEhjJvBdjs=!uo(^ zi3me8d4Dm+&@2~igV;y^?GOLM5CIJzh_{YTY()}ggn^Bi07ZcUW1!u_aYDtMEnUlm zQJ|`7c}OB%nhm&y9Apv?{_qk=B%q`B{nsylC>`WJt0n@TJE&7?71?+o<0$}e2E>6n;ebXG zpt`fm&!%Yv;uxG>+{O*5+4DWxteTodHs_;xScoWj++E_W-LQ?qO?a2D>tg z*8x`YzxpQZfEW#Icw~GW=uF`pZVNm<=n49+1A+VQmYEr-Ihj0;FmI`pv9_N%E*tKi zi#{KH`ydOp>OM;6a=bmQYPjlC(DCuQyNd%xak+-lhgG*)MuG)`IE4bnSKNb?7Qw*) zng=16^>+uvtDj}h{{CG<*BKD;*I(eA4C}P8wIF@TA(tp4^=4~TuQ>|w<-mGrsJ=qj z1q*V7-HnjDzk+xnkh>2BG5oj1wIbAStwUxD4lYX(I0^@S7Q);i9Kf8?5-3}HKS{`>{fW4qf`4Va-(eOfEr#4`ek z8Tg0M1P25PMT0;iVpT~8T<|1lyvW1&BbK}1nu$b;hP-pO61_fSsWayMUgF3CSZ79W6lX=4e<@ zh7Q0-9ae*E+B${q%xo1Z<~>Vag;CtB@t>_ zi0roTdLJ6mQv;Z@S3u)Sw>gmVjsi6Tq~RWeMgc4#x`l;>Zb2R14#bfz&=G`Cj*hR* z8|_(-O92pkv%XdIHwwsOve@uu8k)v{{KZoJkem$l*1iIJi{Hy7^;Z-Xo-P}q8I9Hc z1~fafQ#~t6AQ1WL1Jp2;_Rj-k?Oe0TBG6b6Lk#$OHN2T*;MfP3x_^_Cm58lNpuJ*f5FL&*@{inxI>~3MWZ|9E8V&C4?E;n0Oyr z{BqlM|6j?y9Dzoe>jDCsiTa_SE!*wJaUl|%@R4IDf5WO7_*ICk0(!Ttp=0Qx`q{Fb zKe!r;UJ$J_s)eb1=05LVA0X~aw|Xtvyv%U-tbaGoV+3u+G-`W-Spgv79_>jXEnTBT ztij=YiP{N{*%j@(?VJcJdHQA~rjUUp_C1vm zQ@saTMp`jVF|jDfXfiC4HSe)&Yh~-RZNsxkY>k&JHrsDy_$Cqa7AU;E$*#o4!+xg$ z=7D=`?!MqZcdUw~>Sn|83+-q#&*oPl&N1885fqE-BF_1HFmkl6xmgazllefMq#a}+ zS+sj=Y94{6TP)ywnt4_L$Wp@A(|NV@P||3(1tNmRC=pF$xKKhSHZ(K|WXPMtL5Ik) zWImtBud%+KbxTl4NE4Pv9uzwYrnQBm>gl~ z*Ch@uwfS4eP^g%-_$2AG6jFf6^es(2$rYV^M2BJEl8X+3BG*18XaEkVrm!>xz#XQ7<3MKZ8L$5aUOg5}@RhZ!RP^eY1SSI1eDk>o$X2U8u^Xs!0=yaRz&;kIg zeE05akddNUgL;#YwrU#&0Q#x2^?_EyP;Hewohc9~Nq`+MAmR$T*xv8oGjH{%OrL@2 z-^Eu&`m>2M3^ckHy06doF_T}CI(902oU7)8|IuR!AI@zrk>fIg>3#6zNelr-QY`lF zx!5c%W|5_L>R#agr{jF$(=r1<&79O;{DDNU)*>z8R^=W|o$>Pz4Ezi{`&sD1RnpUo zrB~dAG2RAczrUR|?a52QKAM^W#Dk5}*bth3C1BE$&G0|)kB4L)_CK?reI>6^-uGfq zXt66=E}Xt>HDq+wjcr+nUbr+OAD90%D19U-zJ@L|%57k&Yvin_j$4_e5|*EHa)faG zHeI%nqkf=;{~727Qotk0hGicM%8AdRqyl3HvUs4pLRqFNFaHT3fA5a-xY~vWCGa6; zD>@9-H8rz%Y0h?_e``6>!TJ8qkNBk`VWN+ds;y9}r0jigxG0NWm7e^@cac>5QW%WUd}Hi=*@7-Ui}<0Zt@t(nNAa7>3A7>& zcGs@O|NLos&ZvpHCbco3*p7P^ovEw424dKF?q;RY3FKBwk3{oVQMC+QqP-5Hv2B#t zfVM%Hzk# zxLy0Zi?H}n3+pSb3Fse^fpg7o-^81=HsHhDwgQ!G$lt`%lREiQsopKopfVP*%}_zd za#pbI!$y%mi}v<~4I;TyAxit2#f z;wU45eNZ6isL)eVumci8AyKb#PFiWa%4iy`NpKkC)ih0Qi4INb4=tkbU`g%q1WDJr*4hu?9>sp18sNhk)HC|6wKd?^1#vVa@DrtFE z(JQq~M!lu#f^IsdgE}*3>@+iUpYDx=-T z*~~!8ivE=v9DBRG3c2wTR%wRty`I)gHvT>dFX%>5QISTGO#(CmU6&*xA?XAqsP_1{ z3g7iRI~U=8lS||2Q*_0cZ~Y<@n;6IN3hZy|l0M&C32ZCO<}nYerCeVKj&jbNo2~vK z_2R{N-(dBhBfZjO;QhTW>_uX@4U z(+d|A4ALg@#>T5KmvI74I3$jo zP<}t~dK;CH{ha*0dvA~lApk2&gAqc%bb)ZylJYYAms;?Cwnom9+Pdy!+pZ*&QL||~ zXD}yz_~$0OMI^a`ig&IGj_>kie5h{tk<#VIo1FqKJ&r?QzL9_H+d@kF8G@zaVNa?) zk#nK?vT|^3_Bz%t!cIzCW27dlrqj#K3EajqVU9I4_^xaPrJ%7{Gci%-+ILZ-Ka_Jm z;k)0&Ff&hxOG@Gw`V(brJ2{p$76;dJZJ;l~ELxYm=c%OZc_< z#KN2>*8{mZ)USP{)fO~klMW_gcH3@z7cyS7s=`e;Eo$hm(;`nMMWBVy+s8-I);13) zHgS_t&W}O!@fV;x6AQ6mRbq(JRi<&BGQ7qb;dyQq-4!+oWV9TzVNV5r`d(ICHPZ~r zyv^>Y7m#jjV-N=S>NgDOvitm5(4N8=DN2&Dwz=#UP0+ z(M%Xt9w~qpFR!MsNoDfRnl7>$6zxGC#m^8Y<-8_A#n)AE{ZXA*lK!LV%GaiS=`TyV zMO_*`ifuQroi5gUtDe^S%zy6>g~|8RDXN^XU?Z>r*JQ)#Gt~6(s zFFg{=G*b#X!6qQxOhin~-qQ)16Z3-=y3pQ|CT`F*Eq2sx@4yX|51(Mr40)-PGXYWV5u}Hg-B# zv(J-wZ8DG%L#6ai1qT;Gpe5^ycj<>_s0*0@zrT}KJyPc;L^ScPer0`G6 za&D~lXR{K+ zY<8VY`UPQ`{qeKv-#U6s&Nv}{f3Kiu$(JUvxOGsMQdA4f8L6%Pw9OHI!WKsjxJ${! z_>OQ|fnQ`bFvQ?)TU?z4wXuD}P|5pV;d3taHz22XwYpHHo^w4RR zJy-s@GQ_&lArjoi;k<%`M|?o zVdUmiywY-i!coc_W*6r@^B|+OTvHa1YiJHF-xS~aBR%cI*|7ZQ_;DqX-+9Dr9GNqd zgwwk_y>20eb0IJ7KR4S|jpQ>lFd?``EVi=_h7K?scsJBj2wmZPFTdm(%Px%-+c8e{<;w~>G#w9x zXSd9%@|27eQr+#jG$>Q86M4ihh%4-AI>=*svz#!mh5jk9*lo;Lcun#Ey-_e zR?|tg`78$m;cKC;tRU}rCdfM^QP|<})|#gC&mu#gGor_i$zpq|ePn-jmz9i-qV@E% z$@WqUvpl6d-rJlOk&EOakI4VDTtnBmC+ZSk8z}qJaXeTdknu5xQ*1&!oDq(MznqXw zX(!=SoM#=&WAvf+Qm6Cr&5m;73)PU35lNe?F-0B?#PZV8@)V62t-i0xlUc# zv>v-znWl|;&T+cv;PE$=%U!p)|9N*jm&4dJHKpeq5zBt5QKRY0E6BeFe*+#B{?9u5 z^P>b}Dw&@}mXUc5S>1yr1-4bh3Q3O>T`XD?@lhi9>wXf;lhPtiVWZcsnLfQxRD@5I zWgL@{xM_yg$WTxV<9St>@|#*vsjiu`-^`(!h*`LAPJdUHa1r@a2VaYqO-PflBLERz zbC@;i7)5fvpBGYKitHpK1EvMtcRSb87#AtTG~cC1B2Zw-yWNw|dv$WI+vq0DSQafX z_;Q)n_4V5Wb7IYY57}r#MQ>WFPA*Zl5!2 z{Xr%-;9BB1hfnyk8TYZ8Uol%X>E{R)2Xo=h+gAKsaKvnfzGWPWyM zMYqL-?-vXr);86FgYf)!*sl?G0*k!-6$Yl>`yl=sA3@E0r7|zYWy^%rM#ssd{eDvb zn@0u9LRzeXUYwLil)AbRDnDj7l;UF9C~>)*U-VD z_aw%@Cslw?HS`sKp9B`Y99?a5)=A5-C`&#`DamZHiHY!sqHY^^w(56+yNb!Zr4 zeBb-|secavvv2@9yNT zjE5S@HbD)ml9D&z+qd{A5r_=qdCeLqBf-3(krYvag5(_XM&)&v9J!GIL;lf{^l6m) z*}zD4iyi|Kfxy7VJ=_Uu;3%%xf@IK_< z5jHXHRS|8C!j>`5OAwSqyv|J&5g}4nczVn0)G{j}7Y~7=ii_7fC)eHalE>T;HuMJd~Y*K(iRDV+r zkD1E-VK_7g&-K0lKF^$Xvwiq$D5oC}qVK0H}9>uqVzDD=eb{M6w1 ztnwT&KVQDdo;Sb#_g7$HRxjG()8(*>A=A$QMmVn zZ;Bg%HBogEBJy~M=M)s{#auf%NKHe|@l{nNl)I$8T>f?sefNL8Z+uHEez_SNg45Fr z@RK9wJb!RLhwCfHwZsQ>jl4D}-I668az?hAV zO&drKxopoD5S=@h-nI!t+lWq|ZUN=+@x{f(kzYqq@&b~QFqvNAS)4x3P$!BxerHV+ z)N(Ih!x_d5+E*?5S*HEu@t4dmRcwCqNIYTjvM>nCN^|=PY=tv|75f9!4zO?SIx#WJ zGij`}1SgF`3Dd?hm3BbR0J2na%(remnVz0Lgum3h^dY(+{Q33k_hDp82aL9eff+qg zgUH~>VgT@h!^2aPqF^5M-MhRy;8K#y>+)cMouUg0;dJOLc=~$RvA_~PQk|%14w|JRpv1>c}Zsg1LOA~gD0BRripC1p%QgBvub{X*NFKtbD0E;cFy-=-IvR^ zO3yN0GFFfXbJamd0h}=H{|)K1QJmZvp{S1qVo^t_#C_pGi*jrwgq#e1bi;{}7+K zWnoVD-$sEXXbs0+j`1!Ne0XWlT9RS!_vH68(-r&R`a=0ExVA$$c>O+7VX2~om}msY zUh24(=jnTOA3Y}f`yduYWr+`wvgF5aR*GU>5o zv#4{d6;h3WW0g9VY3MsN;z3R(pGs~4(w+d%DW;f#iiEtwfMXIw7Vtl4hR6VLKG<;w zB)G^(NflS@AkqLbMB4gE6iVka*R0LnAD@(=of%fd7cfP3Ghn{empLbhg;U=S4Q5Ee z?f$|%GqbzITz)>u&rjWX861~W1ck17sz@k%;6mYj&?}>+`0MJ4Q_@ho&8pYFLh#5a z)WC-pQfq%?V--{c#{-Oz0&{#bsV<;mbXbZwl3*Uf*F291D*o`)$C5MMij=%{=|$Zs zfq+qo7|*rUQgd5Bz09AjY?+wItWshiInRJwFxM$in;X{Gi?!`GG_q)FQ(_iS2W<7v zGWV?Y`M<9K#IiNrd7*sUGIYQhN`wGSgd8L^LwM&SsJ_o!>?V=f8VPwkCKDTqTU_jY!?;e- ze6NsBt$$2p|Cj-(kraM1S{f$TIt}t! zall}A+p&LkM?^)D@F$c5f_KEsa#n=>w%L1S(svuJRXfu%(jIS!%Sd?0PIwaBj}G-u z&DbZ=u#oM@D&C=DT(b=U*}^7phO+seJkRrW;`OmabUMA=+|=LJYx8Es)gcVi{zkfCMCT!(JCUxUNLFMP+8yO64(^~vA0jNV#b5V(@+66firukM zVpdY(4vY4E3W~A2Z zhvXk1a;W4J?*O_|EG=V4Rf?o)M7&%{KEvm*m@l?4z#b`RsI$Hv8qUMKPpPV{>`}=L zUDMKm3JM0BbKKRYC2Y+Z0he^F-pEN3o#$&0zb+oeE9kt&qdPfvTg4(%U0x5h=7>TZJwDN7%Etd;l6|LZ<6ZPj_AH#or z%jmYUbgN&_c`eAO;maw2G^8lB$9A?t+Bs_#$oSU$gW|t1C8cGRbC*C)^z-(zFr@w2 zRRDx7e`IC~ZV<$(>X=w|q=jCd)Ay4ERAKbT?u%R-7S5EOgJ-@Y@*n|8#6tN|LmD0) zNKvS$-ep^+u*gicyJmfcq2D>{%x%sxCDV>{Z+^Cr)WnK{C(=>Ev6%IOpz%c3`S&wX zB2bGNJ{6dG^0>sYQ&Ld9Af}{$_)s!KLqNx2Tm0i53SSI#}>EN}g$trLIK(WTr!m7>W%othRu-J0;_X0F|g zfj6t$##ibRLSY90ML$|2-3JeN2b3QBs$a3)wg<4k;Z00+LPB@+{&F-v?xAwUBl%CG z*9l)2HRSPcRYQtZXBq>#GgFrVeuR*;n_x}izduN^8pLNA8;73fv1s!e5|VBExC_K_Krc6fgGkN$n|T!Br>^g!6){m33T0U zf8o5Y-71w|=(tQM-&X8Aq!jJTb^<;k#km}{eFL8!78Pzo^)C-JKI!RHtg4{Vnm;$U z7meIPIT*W2hfgkdCHbYyv8B9BHZ?QDh8TTQc&}i1aJiwQP-Z4<;~+oZD7#0l<+Xpj zy)a%RLaw{LR`~ARHT0`jP{W>XW6qs3ROhJB+S)SftT+X+XpVZ@avSsXs9yrkOg6@G zCMYP_(Dw`x*VjY~s+%!?%uIe%xA(>6tvDS#nKIysNUiB8N;rW1ZH`FX1Z?}-K*Lj* z{P~}M)hvhe_=8-x=JVt;92U9-W=uFMPMig+wynR2aga>)l}X>ClL zrwMS~`Ss2dyj)UrxJxyFD!ZE^>k?4g{GKmp)D-mN1M~K3>bs!oR0SWH`RAq+j3PgH z;^b3EQcOEN^`xd|EAi!3U6qhL(_kLg(zaq;w?nMQtI6ij%qH9X@d?4G&SIQM=?8C{ zbKDl1^&!E{C$3*_WW>z$Kce;j@iFb|Oh0-uKH4?s6gJ1LyCRAQu)9^P@8)qwt{mzTjRcsICy&Q9p>7!QROQR~y$Ve_mrr>;Is!s+)wcn9e z!F4n&spsU0>hgSIkY-CD+SGBZ-YYG=S3WoAUDePalk%m-_3>wM->-%ht_f8#KGoEd zTl8KF*Q3{^^RP=i*E6OIW#l+3S4 z&U|*2Qi$71%_?W5l-O!0j9V|Z&&YfFdL_#ZGgEi{k&Lgy#D`@&9=T}yQthWP%ic1Y zY2Eho@hTY{>HlQ+c%Q1&Y|S%R<7j*p8_K3?%a;|p6H6;*Ny6h(kSOx9B`+NpiJnea zlW)yGaVm75(QH}lAXjzQ_>L?f#NCksFG^mD>CKU$B+FT-t*?W7hMRDShrb-Wb28!q_xBts@bv&pLx{wQfKPURmS5k|L68U z_oT+$5wtEZ%a6~0q7V=iV$@SkD{B6yc%%ZySu86WX3iZp@-A=VAe{FB?8n1zJ=3vKxc@e=EUR`+V( zlh&8JCA^hUju`B>$J*Aj8wu3r@ua{_=gqzo?e0?;j%=Q4D`JFl;jD#XMs;)ond{~&P0A@2X%h)GV0 z&h>9E~&VB-CF{#Rb&1 z_2k`Rf7)BX9~^7Z+fx8c-wfOOi!>Hog;ib&F53+*auJ1J*6Co2pGApr?yh0GZP1PS;+`p# z>}G}{!hrX-CG^R~H z|5$elAsieyhVBB)Gjn6_&&dV(U*W+NEX`@DM;0STR}NdUpoj z$dA2}%@*xq6ym(IqGgjIddQm<` zai@vPH80}XWm`US5_g_*la`;HuJZYjZ6LXsVdFE{nLu<__M(;wZep>b*Kc*5|E^JO zhBQU%+pgxUl~vZ6Gg@X=2We@{-Uw2_VF6qD5Z@}h$73YZ=n=)x`8_CJIW4}1Hn4mL zPe!yhb+z9i`ixN3Hx`Hfjtm;QwA8p6Nii|z+iJzL9Ey(2%3A;2cE~Jiw^W~XlL~k^ zopYl<-E0&6CVqG(qC(ZhF5CQ3Lxjn%uv-&p>1@j)RAK{ru4X(zP4(P99|Y4nWTXR| zqHuJ*nUQC>AT-^NB_rY2#Bq&hD&ImUdJjBz9OOB8v#1-VVwv0p>)VyR?A{9vrsc}x z*KP!t*&FD^1_kO121W^+&_;^vZ|ExytM`5O>j6HSfkv4MFKe1FgIqNJo3W-uz)>WN zzr^_y%7%=RA3h2s6dG&Xj>z50z)z;@gd`Z&&rLoM5ATe0$@*3$tDN>0f;WvMd|bY_ z@_qFXcK;B=uugq_!}mV9x~wAF*UNRTY1<#!e@!i*TsoRuBd zU5d-v3iBw8`<6qW)}D^V1dbL$TDJ1_lyA*raCHM2*xHf^kMMC`39ErVTcO5Ab~mlY z@bCC6JL||I%-u+EfkL+M_V>+jCQrrGG7_- znb(^jSG}!bmT$(ixuvosqih_}1+0S1+{ zs<5z{2Hp}LpMX!x3b*5_=YIC?%w3BkKt>6Qw$fQ%x^*^AP*Za`Kw5@Si-xa~HJkx? zU5WhknV83yg*wU;M@i)*z?fcuZq4KNP;!GPbCS1={8Gn)U2g7KyNM4OahdV~S(YUk zHiJotva(lZ%+2~WU>NRPr?h3W7$t+kh| zGcnI`wKBF}{3WGey=o<|?C8ko-dFhRRoRQrwp)3UCYrc_H*exk63=Pc=iAN%jZHGu zTfO&gC7OQqVe@JT+Y>vL)V`dAlpd`T6(VBZ+O>CQ)|3hh*XCtY0Sw*z%BIUPKVYHJ z)N@o8OH2hkEt!8D8+m<)jglT4a;hBUAAp}-KsJ`)>I^OCVnP(Z;({uw8ro4I{ zDFl3ripVVAYMS%@7<_G%#UcAy>@%MXbtm<-wr8_1B{TL5F@dS|flLV*nm`qn;T?~? z@0;beO{^PdGsdB1s_-cQ2Hzsi)yQ`iLpw0|J|kK?`ApQ zbhRUF)Jwy(hIPU*`TWc$!Al}i3I(zAJk~L@HBUar#&ZL+qFRbUeV{kFi)qxL$Sucq z883EH-s7Q6C>t};*0;Sbo^A5&+3ak*wgU#L4=(#<%mu{fL$=mKEz8xtKLkPanim!+ zrc24+6hTMLhBLVyDP}bYvA+3<6Nwt!qB_M(;)+i&!G>EWjg6&y3#2P{o?V zTb3tcIChJj)u0DI~t%$5$?utC5gvT(51gY*3daxQzCEwcK=rVKd!x&gJ23 z)qdvha#^LWp=-SJ7n0pMGGxtK|K$5{u+Z4U8W}!~IkRS-p<~fp-=n%SJLtyW z9#T{^QKp$;WDe-(rTuf~c$5R+r~DB1`DPz0cFpj`>7~(t_`HNOGduWQXlMn}l-V_? zQ%rkBgc$vB0d!~R>sp4HYK?s!t7=atKH_$^jaa95Q%X}@o+10=8QHUa2Txd;P`V0U ziUPb;8#6bBmstY|Gn}}+ws`6784a7!&>3Mqg#d&1r?ZN%c#v^hy=I_=GD%q5iD6MeuuK%AYnP??!?c%l=}^c-6D#@}fhh{YTBC zKkE*C8hJPUpfs_{t*A)TT-zMV#ogEu{~jr8I%J*UvCvds2R!o6x@yTXW92!QZOPmU zGu7#tw1kH~P$Rsku+=wWX4&)YR~`?kuvAm;sLDfG72`{3pOmG%Ma+pw3j7X}6x0FOTd$3fsWu3E+(G)#e(r@+T+CHa-i_uNl#N z8q*ege|k5#J11AxSxGX4ze0Wv{UT>))X3xyorP|1K5vG~@^bxPQ)I~({d`MaWzT9u zqn>C#YUX|$AN?@(S7tqJ*>7^u6>U3Gv-fv4C@<=N?Z*L zx<^?vH|*Fip#fTa7kRKBQA5YUwiG9*RT`+<`i-n$Z7Uj7F6j>bsj9Z0Pov^&R0#miEG|B`8A2gqo$)D3m@_LNMvs_#XYh_TPhnPfW> zCC|nSkm#m!f0mh2A1+7}9BfTJo)DbT#I~0!C;8%4`oir4gauXUv#FLr&L(&&M&*B) z&mc?{z0S_2jaBwqxJ04qTB+%A^UGtu&l}A=tkN1dDIouv_V>)C)ivjB;5ssSzfMi?YLSba>`*p#(0tyUi2 zJ|nHB_P_oqfpP1siL#T{Hs73Ku^TT7qOSSpuLUaUhpD-Y#@hdL8I-&n_`bF$I@_WA zs|f)lhc9jBbk=U^WgQQom)S|Xq>LQ~yC@XS6Bcp*ySra+fB$!mRmxbVp=&8ADLuKye&W8=(*%Ov!C$wTL0wS_MkBXP{rXIP@!~Z) zIyxu^6%#@*@YjKq{Fdo3Z`KsVyOfJ-fO3t4_A*LJD9|1rb&%v`1wpd~l;(icJ}~`n zQId>`Dh*~HtS@kv|xUM z<{6~Y0F+o0XyA65iuI}!X5!?OgYx!6U1MWoRvsRu1G<>o>fn{bi&mx?!rxrE;ttSx z)|2ToL2w$RuJ`cKBZ~Ru)m3F+@|Hs_KuTL5^ne6yr;q^%F-g&`8r((=H~0bJfT`N@ zc^;mgIxSGxJ3M}0LUOg3Oc+d+4D1>*7@PD(?DlOn=e4;P-7b@j*rc$qu(&L_DB%=n zPQC%N>h`*xd9rj^hD0Twfi7HyJ9pem zyp9xtS&_eY&{h(J!AyS<u??&>oilhUKKcTH2Uh1fv+`nskqhFdV z?vALg|9*KX#3C{r?0N2TocEu-lEv~8mEP{SJXmkce0(ZMZ`8Uxs^A$1>LmKe&9Jkm zWz-kFe}DOHk06`>9>1Uef3b6IXCnpe@X$1t2)qSosA=o3I?~6&&#wwC;cZ}g`bEep z*^yQ%q}vDDX`tn|4XPS53s;X=zJSTx#Fx1qgz7k@aFon(mF@3sFVJ2b;)BY2Yc+*# zIyB4v0&{8F4y~1OAZQ1~e;E+Tv|G1E6%4PVDl15aX#OSJxu1pg$}~Ulf3$byQB9ut z8l>vfD&Bh$urAOAmukf#vJ<6RrUC*Q!AKzBib#Z_2BX9f?1)UONM$h~+A=H>zy#?S zP1r;oaX~9fLXa&`kR=pQ42vwuJa4q^+;h*IGxyxT?m5Fh@P+f``@VPiZQt`ezcPQk z*=Li7=YUQx=4qp`#Z|qzKO7y6h@peOl~G#iY!lY#Lx362{+dgOK+|W;DCUH?2;IHC zX~Dr)K*divfBr8Jl_jJOydOy`iJKuSp0m)l2aLTaG+03XYA)f}y9n)hno`R$HB}gN zD^OowpKK5G{eGlf_BHM2w>O9?3UJJBo}O|jpsqzbGWOi*5eq@;B{z|n%;zsFkgw4E zG`ID^?jk}sN|fR4HE2e?u%H8r=d~@}czX6$AP_*+$k-mHEN6g;jJ{u&FWP-2f`L|H z>99wy>D_F5dp)=mNi$F~HjtN$J$32<2$N_{W~oDaJ4ULq@h^fqX)Nb*QU=;PIg7zbAnw$8%M+#PTVQ*54V9F1~NxVa&k7oji8Sg=&GA)tZqa;a|ecIBpsPh z*Aw^6n`K{`b5YviY*zBLTw$qQ#)<%n>3?@mF`;?!%J)EQl!2?9f>+oO%#&m2{UuCmWPxg^p{oM7F z)w_Z~v5UlIT!Wywy<_8s4OcNW`Sp}8=shvYPrROf_P-)1w8ydVa1~^TnYe`Tt=$-N5 zKA~N6#758AMuJFr+Cr$H7oq*ujhas zV+IJX!XtrHM5%nay?w(j9?&m%f4?F!9&ZvDfUgrAmF2C;!=B$e`(eRG+>6lXMUD=ZVP`F-iig0xI@} zs8Z5;OcS5RjWK7K70t}<`hIt8K6gk;^GPz2n|c$WOd-xOP3Y_sP-g2YeuM<&bhq( zp0-6t4B%?W(Tdx%^ek^cYq}tt+5~Rpx95&SS@9}X)-~IM4b=;bjrUD~YT0bJv%>YC z+|?E<9n_JL{cCLp{z`-r!LM!$ctFI+zaJ$4+x6Q~Mb;az{F@J6gy=x7Z~yy$J#F7H z^pcbVl*d=@zy0;};#WU8F`^Q(z61&<-PZBM*fkFwi&bv`Z&Ao4BP+1ip2N=GH|I6) z(n4qg2OU?owqpS5!D$bON;rFVHU-KLQE*siMxJRnl0H$1`{MP?ddQ=Xg?&t)t_PD`_R*~8eKc!eQ!F#mgjL1 zm4(m$21Q&W@qxO^oY5ViDvBgO&w@Tu%`P6o@>cFB101v~Axbc?hhe-6HpUs|h(J_T zET>Qyz*@R*+C<2+td>ZH!y@aj#xiLnEdnMMmZ6iZSYxKZ%2%M&g9gxe$g8-1b1di`!$Qc@BbWUN!C#e*=B>hASg zKY%?65LQkL5+bdM+k-F~5RBCa9;aFnw{FH&*JO@LS-sTAfo4C*3th0_LhZU3^b>Md z2n$rjeInSwDIb;)(QhxD-%&|42*NOQ0_aZV?m>u?d)jbuzR4>31En$u=&m|=nU#4g zP7@y1!N;f3HXQ@mTgaO#px-!{2T_q;5RDahu1l?ot`@dZjx5fjf3WFhAyG5Jw#($A zOAHl#5oF*`Q}5e>>yFX%1=rjdP$oDQ7yOS3nsSX5LV;QDzFPw+>0Lacc))ITlc&u19O9*d(vR`RqTqFFEjf=y|a!zd$Bt(sDcOBUQtiQ`6i-9hC-z!; zha>`AZF^%A6Zb?k5fTxHO|n2mVA1Ya80TC}g2Q40=_O{mzoBR&s%9G>fn%6@fLj-r=vBo$bzGe%YH2P^T4SV*ZEACTaqX^g*oTGxWwi<0B@_AZ~ z7qG4rUcXqe77IJ~z6l$us=^&b8_)>QcjwNGyG-cDKxc7Vtbz=)o;NaR@;mZ@6D>{posbDcqt6&%=PvqlJfyG=o}=H(EU-Ui+Rb8(iP%5*tZV-|2O_+cFvSW3QIVz{Jz20+Jw6BoKK@WK-Bp<9Mr2z=~rJ*2B_QE!so$4X}87dO|3gu@1xj!&NXS zg=Bs~GEVrmQi2eHFmCbG%V(MUi$hV-etO}!XH(SW8zW^)#NjP;MIc8=62(FNIM99=2mx3&4$m&V9}pmqo( zJaPh26G;Jgpn*Nm46~hb@Tm%Ji1mYT)xmw9KdfUwI!=V@OBS^HbSel~R#nc?Xe#IK za=oK4iGnkWdq! zczqa|(`Mv@pB_1j2K3|Yapq($Lb0fdVU4R8uMWd(UWDQcv>Kjd&hmM9J-J^|7_{|8 zNYe;9_F<9Bd&LL!yWr5ON|pULl_G?ajIRKMm->pqjNbAuFe0qs27+h=uk00h~ zM|rM7R@nGRe}PQq16O@z`HAac$Sl1I*$5 z7+RDH(P7bN+>tzG+q-W{kQs&uH~8oTBp?ddyR_eZxG^~;B?sv=b^Vk&iEt(mjTcr9 zECfmg`OOy8xoRL|%7U_aTy-@d)iC370x~x->8dtVq+PrAS=n&Czq^}T4H|W=J@oSz zHjqB2?c5qnrdw`KYuj&3Zqn7%|Z&)s-1M z&`@;u!=6>qzrYE~bG^-5j(1;v^x(nrMhoByzd;2K@&G}F2&_IqR7%Pyh?BC=(b1{w z)*I-)hfqkrgpH3bTf~$@FR1Ww$I#Hwx0OkWiJ3%920h#1!&o&|(7PacV~}Quzot-U zS4dfl43|p4Lj;o3`1EvK92ysBZen2M$Vy4JjjE)uh_enGISOV&_i1aXpE*~(HP6Ok zP|{)eyt|tT&tM$*6*jnI^rVDLUQ>99CnCB8;xdFqugAw#aHqalR8b?pcTT;z!MN@W; zmDUi~Cw>!|XE};TRMul)&haK@<{UF;Ni&lKaxu9ti!KDr)@_@%NK!RL`rL;S)3&{P z_e$@F0}PBAP@w9F@@RmQ!PQJi&-5gESk_>_$)Iw_o|_v0;ypfJ};bAdqP9z?$)zyGwoRABV@6`Roe3@1IuNV1$JxfpTfAwzQ n->3bL*CqcRlK=NZ(yujW3M;NFADz%KVBK83z7TEL`SsrczFYae literal 0 HcmV?d00001 diff --git a/doc/doxygen/chapters/images/starpupy_handle_perf.png b/doc/doxygen/chapters/images/starpupy_handle_perf.png new file mode 100644 index 0000000000000000000000000000000000000000..6b691ef5bd52852d5ed16556e79a0ed478ff7104 GIT binary patch literal 162085 zcmeFZXH=Eh)-`z4Qmc$*Km`K^P%@H4GDZYMGDud*AVGpiMlG=v5j`YPk_eJPBxe-? z$&xduB*{vY+;dxc%eP1O7+-(gzj|E9s9SnV&Uv1__gZVNIp^AUj zIS&Uf2RGYZBO4n_Ye7y>EpVvaUQ@Y#ap~*yGFpAPW3+*C<@+j>QY;&|d8Bz#Ys#Cga`-E_V_TJzU5+>< z$3@izP8tZ93iJwJGP$%NV8w^uj_wU*8ed#^-J>bHYwK3wUCtu(UgP2o9j3!o9dnoE z?34XU4zD|br}*X9tAwLXS-e^Ae^=w5 z75MK-`0q*h?@9P?6#O>|{u>4Vzm9^Ypb(c?Nm;Rll;z8pcMlK8Z(~Ia z+0TgmowEFekdD*WTTaf-G~2d4U%7T$l+(Fv4{V?~(&?m~Cp{{hVHHu8{f;CEkQD0%(*nC(EFyq;dT zC(H6*KX)W=a~cQbRj&SMN3MFhdC=9-j`;R$yTPI6WNNsE%n^r$SzDP$hmVc-RW(K{ zMk+l&OPgxic&@A1OLbD;=4_r9#ETX*bWTD`h=`HB&Kwq@%=aaprkte=g-TvCOGt`6U0;3)tkciRAcWFs6_O8$S`0*p1h(pTaWTI=2(EK6E;NW2D z%cbSrJw3joF2B6$)C#!e;tDP)0b*DZhuF$;`@P;p5}0qNUW<)F>rr3NRi# zXwsSIj8D%ioVrc#a(?xOUGIZM9Jg-Xd?|^Yg+=P=)2AlYVYggx9G0{9Jn`{iu&}Ve zPi?0B_V0H!vAZm;Z|gk&ZKs=zS!bTsojZ5dY}imqv*;b3pp`8}Rf#))u{!v{gV4wj z7j?YtV?I-r>u+5D{QwH~-i^_1S(DPqlYcQpAG^6qCA}|&{;1`vHa|bV=aoT1gDt71 zEiW$KY-?+~nSbFSga& z+nbT>uAR-U?c29!W@NNg|Br7Fzw(CsM2td&6g8a3u=0F+aIJQxRZq6cmR}#_!p29- z{&bTOng3Ei4R@ZKQX_vY-Msy{fcE+GkKVj_;}Z~Iud%ScK+eG->*dRrYa;egD3;R# zf`X+bB_$2||9+NT`H$dl%huG=@_qa`CMT~hJR%}mGt)}y?Af}}{G|kk$pN*?uHQc1 zw*ByG*>H2R#J+v|ZmFm$C{S~9bA2B@V&ddX)EEiDrXrTfrl#-SgOgNv+rz^H8EAZR z`aj=yH~Gfm{LWXow`eKLntYOzlikh&&4HgSQYHcu*mc85x(j{ z+3R+EvWzp|X#U&R-YSF#gCx#=Z`yQ+qby0IGvlrcgF3U5jXJ5i#T!Jvyjo$>k&_wr z>>16TJqa)M%S>h``t3%4i_5t7H7qbNP$|<&A4}MhvJH#bM;@cUKN_3#`BS0CFGR(K zbmU(qrR^M=a@Ve1OPJc(%AuKYA*rIiUU?_Kc^!_{M99*-gtR}0asan}U8F3%C%u?9 z4T}=fSk^f8;lroh-Fhp3`|Z!g6_gKqE^!BtOrLC6#Udi2b?)4`G>$+1vzknYy)W)? zkN2RsWwSFgOLla0V9Rp)F=xw{9`>@Fa^boIR+xN!jM+qenxk+RK-})WOHdwfT;Y z{`^Ub_@@1o-#_GW+kBQ=zgw_we&?TsrQIbp8CUEr=y^`l2~8#VuK3Sircm5Ic~bUL zwr*dsT57kD!P3mTP5nLo5t-Hz>ZvbVA1(YFvu?X3Wjw1ht-Wb>9F^nXn9tclxqIVg zJj&d>s<>NS=&>o*Lldg84NEN}&nTfKdJ1^c__BAk*ogW(XGatRtkk)&$w}qmmel%W zLs{pgg=r~i>15kGj|HrspFQYZSXNRZi8Ys-9%)at=*-h}wwm&|d$-iOElIbyRF6Aa zHbnH4s?(--(T=_40cjkC99yRcQ|(5zQ_WgcBBXpv8|SuVPWIPI9Q2laa>T6EE=1;N9pYQM@C?#dbDG4t{b@hAk(#zZ&w;=^l)`Yq)Ee_N~c&NmyN1%R0DMp?{;qgU? z^4g7P=8l!J-u3cI|Eyaku4`%e!eM$Oz9ZLhu-Py~`OaoWUe`sZTkh^AD33>?`&~|5 zi?KwMp4u&RwJtG#F=o|zx+v8|?Sb~}bYWp(b5r3F>95;qXc)aq!tr&@SzG_J6QNBP z|G4n$Ue`86d^mW2m%#o*hYo4@KPKPm~9psJ^M5$*W zIN(S^_hQ$`i1y`t(VwUH?&^ZWHr;1rBO;5{Qi_Yt@hmkLM!clszn@j+yX;l22kIPU zrB!SX_uNGP0W3IHN-ae~Lc;XRhu1OMxmj)P?aBZ+^$A)UD^_o)t9~ZIK9!oLH{x0B z>E+e<;q^_LKmXi|iclY|D1kasmE2qCv7MEdS7~Hq1jo33yh^7e6 z_VQb6w=+oUm-$sRE+Pl{l<0AX39(l%wR1F-rNo->&0yv14?izn1REhR1X@d`{ilFTPgCq$YL~>YISc*-*0s+4E)NqgOoF>?YHS5;-JbChDpf36uavZJu!%@bU)*aKi zj?;b&c!Nr(_0|c^@xlF}p?k-90<2}sY;_p=QIZ}XzIbDxF_ClQ#*F}POxTlTgNm%4 z$rlEdfdV*o0XaE2-6JCjS+;{FZ~pp2N>Vb@IA1wdC0Z@nVC>5mx&E4n`b=xXg_*vP z%&F}9)bqIxslhI@#vX~fh}tYbZarJqg2NL98^u1~ITZZB%gYP-nFfbw98~lhCCQ${2bIgLXCx$z;PW1O*v9g))OShBuWuMDoDCx}4 zzj338mB}c_r0$VPm&vjZ9Rym)?ONbBGFUX~BM255gGp3Wd!z6dx0ZB^#Pag9r*LGk zLMCk)NhWns4-=C%Z{A#&aYa`v*Fm}B34bF_WvYJJw#m^>9kd8^hcCDlE_xr7srmA; zpy$JfP&6d(Iy-d$XC1m9O4L0)aeH}?MB!Z;MoEOd3QlFu$B$QP7h;uTpChlCHpHp1 zsJ#r&x;kvqakYz0o!)ukAwD7;OZnKat2N&>KW|}5gIz02$#H5({=$U|311U+@)P{n zHCO}%)!8-Dly0tCuasre?-W#qoX^moz@|Fi`^17}|Ni5)gAMgvUSbKzCc~}ivh=R= zNiK^shNuT+1F3AEa~DUlM|rP(xe##nAn68{En9}YZZvBj+(ioav(uhFsKm%}%#4hu z5QNn5AR)Vf{>YGqo-%*sXQ$VTEKJm9S$@2McX(DGt7_VjWAE^_a66+uXW_1$J7aNB znT{T9y3|kz^wh3m#$Q@mst|cDC1%pY9y80iv3jn~O!t;9hq^=<1MMh^TXlVbvbz z$$Hfp(S}#oSMceVZbdsWkW{?K8v%X9tR>v8!(M__CGI_rs)@^JhtcaBD`%%#E$Uxf zJYoC!{V~#7P#L9DE*7jZ>&Q_9lC#XaFXk{|grV$EacWd{|x>6@86vlXZqCh zoHElaIyuC|#Kso`0s;m)aDQECsO&Sw|?AIA^uKW=Jt)=B>8*Z)j*t%0l`j0<$;s`!}sh=I=FgdCeqr@6; zc0-nxeh7!=`OvYR+wW?Vug6QTa&z=vMwx#4_U*|N)Q68AU8`M4(8)L7CF<0a;j%ER z6*H9ITgkVMc}|xr`AXO66V&Q#JB!#>$LSHXGu%ha+l;l z?b>EWy;thXsv(lV@Ju7!{IrzRUHew7%R0%^zzS@$fk&^rvAlZqNMKNq=lY#T9<|oi z)_UeS&pj@F!lipUuhJP^y!*|YH}{n=X{5z*8NJ`DJ2li?g!s7^7Z)cyrzRA#)2!*m z>%TocQ;o{X%Xy1DcN(7Io}C&tO=9QfuF-Tvd+Ax`&)G+3U38Duz9SNNnihE3ouR8K zNq3vA6gS$X@i^P^M_l~5OAT?FnKft5oOxj+Eh)*R?L4V`g34vycGSZx-8%7ESlBvM zRn-DC3M5r!immXtjL# z^5t-4h**eec}_z6u&{>v*ZetP55BJQ0G`ZQoco#wdKuZ-J8}9Brvq#3lzGg{#Xwv8 zTqgOmw{^&pORl+L+~UIAPU=0B1a-|~WM?|S+H@z;MUK8)5rgtanyq6oLJ~kFp7~3Y zjVjX-2pYU)YNK}YHLv2LB08)uF9U6?O8iTQ@_da9%cuKFeqXnacX4)*)7tU)@f+1~ zYCL#dtL`(gw$j|s&jwsFuPQC&?r<1nKS7mG)XsI%@&M0L@<6XHTCw<(sg#O}-^}2W z2WOs9wILb_eA(;O^t|VSg^iQg_w9S#gbsdYJTTE7`KY{clIiy+&LOLRoKGQNUti<5 z#TLBxclGODZvJ%favM|Ebf*SwUuDJNnp$tG;Sisc#-?tcNlh$!vNG&Y44HC3fz&|zvb$%;5ki%hM!HiG7kBG$nKMK4zs=~Z zBwGI zoSYqqP3#^RFsT*=H`GI_gSSQEydyd;8?OZ~jc-9L_trJZKJ%khT~b+_)> z6ZiMu4N_;C!tT;Y^jAw9P)_VPz4EsS`8%6~GiL>aR!3R&a9Lg5@ZwlK#nSZxj!TAO zV}hjXqJ3YNX2#wH{#{Q!ce0B~Q4WDNo*mZG6bIzYdS<0+6tsZT`TATTi z_Uu?x9rY}m=!Qg{#7>tvncEw7D`;v4iq3vMz^<7g*P3pj3esW???{BSe~QzL)o$Ty za)12sM`?Mv4?{#`w7+b#zy0=G)WrgK{@r#crG!OFn8LoB09>t%`Fbu; z#8HLRpAgr@=P5? zh6F8E$$_@au8{_I!9IlB*f_xqLYN;hjgqN?#7?s9Q)trXODDQ?NOO9Xqg zm5$CNPnDmP^NNp2j`G^mjC~8W(Taxzj&?6P2hL6omK78n&>0Pxm;kGjDowSzCC?a* z#~?U)|Ni|cu1hX}qw1H_r^=5C2t=DUCd_{F;7mcyf5MUKxHxZ*(*F7S>OFD@U@J5J zfU_noDbZ7-ory?y10{z->cH0dfLbHuofj6By=v2s6LMiEEv;mh<;CllbPZX!t~}Yb zTSPgb&0}nQklkYRIgV)hu7W~FapjnM?6a4aeGolO`SX=CBOBXU9N=C{<)`1qW&87& zr61=vQ=f``G1Sh=XC!y6t4EPQ0kD(6G_nKlpWXxjCDB3n*oE0i@BDn%yyEXX+*R4C zp-v_+n|$d#zdZyoYLqIAyfEh6w$qnQO+mzA;_z*=PoF;V>K3ibU0R$IbeYfSaGutZ z2@>S%>{|Fzu+af%158}K@H9?{R08FK%fQAB8;;zz1HsabI0OzF1Pp#bC!9r7aF~Hv zXDPId>&mB-o_j>p`}_M>3aT6%nas5AV~_;mOKT}s|HdXZmQ^H>yb*9^;`xHLfiSBH z%0u#_Q4pvxPlTY zWBkY#0B@j`AQ8tSjcFPpJOWiJ+E$f4sG{lfSr5jA13^Sl*YM7<{@9UF%ZTSipW8A^ z)~wsbwB3KHqSA0jK$esJP3+LXr$WZkQYrcV8rdk7c>A%*VFrofY_aCwDIa>l5oIK< z+byVsO0A4Tm%u1#fu^+ib(YZNG%Y1k#(Q?!-^wq6J&u;*alW$LoS&pKFR{fa)M8Vl z4Z@thDJV#_9aM3isFqMit$qypI`ON;p%j(6`T2QXz_85h>_L>qTI6r)XdWok0W?_* z{U~PdYh}ga$9l@jDk@|Yrp5w=>_&K3kDB|0sBL}1XZk#V$FMhco1L8aOm=R(TGNWm4xB6|Y=L2H2A@Ha6@T(N^?jUD9xI;Vi4KpABCR zdg(gor8Z`}>97E0ijx`+G!U(18!4#yhg4E$4Im+3qN_>60$3cju_%^CwTvdY+D_6Dl?4;56Fir>8e`TocKxT zhqvw937Ds^r+0qXdS*;tvSEd-M%rb0O-;?~COKPx>K}v)8y-G;?py4dH&)vReu$ko?s0hG^ z7!k**&p>RMX&Tu&cApFyEdhyD{GX;3I%ft=%W5ONAg{AXJ{EIzNf=(eaSxFtz>_Hf z@oBckx8J;XZ{2ufMIhtbaRttk=1lnrizC~`pp;Vyoqd(d@~0>c|e25MYPZT z4S<>1t2WBO%+0N^G~Xu{Kb2JIY>z}Yi8Ek?qu8t8zr=w?A7HPt@!Hb2Vlf6uz>?HQ zd=mh7gk*v&h^984Lox@YUM57eJP5f)Z7Fh652CF5Dz95dMHHSp%vjNsMFf3uhUMrk|fM_J|eDeAIXnO7m6MHip% zO1_>%!tL9)p^>t4a&n5y4JB{eN6YNEDo;Dtf%)js^AIBmmA{piHu}pKOM#ygaU&Dwc(9$isCyy9a&ULVb zltE||qOpR1o_Ke_>odT1^5@1RT?xVon7j|Yj*7A$%^<+Q35&d$=Ym&_AwCaAsYssx0A!#Fl1cZHJoVCkGo?5dBSg@LI@MI-pAE~)1`b_ zW`~li0OJpf%yfGb0dsy~0SJXdE6+&{z=KJyJzXJ2i5YF!+1-LRWa}r9#G@c064eOl z7d*cxBLjmG>U@l1=o*R-M66728ra3htP&tTMsEYG@J3=WN(5$N)voiL-yMMw+u9Jo&8a z(&!R1pg-9qJSzg@&X(_l5E#wRzN*?>)AoHqb`$tS@}h*We#~!?Kmr+jEvUhe;A@}$ zAheaj)H*jjU+rBQ)(L>QSR@O=@}PmQPq*lFs{j2*KukaL@lm|%@gy=MLjdppx0Dal zfsJ0z$G$`%;Ivfj|o=Z7Qm$*Z8Xd_jB+;J;}Gk{Fo5NW0E|y^XM8MDDPD$NmpDS_jUh{<+Lf z4^fY8?1Xx>8S9oLRfMoD=Z~j_R{4p1fwTakh1c?fxIW4X_MT)%QIuRyIzB0d#gNzY zkB|>fd+i}FP-rBDH9)Psx^XDjVl>K;o z2V{j*>%MdKi8`Go^rmH^P7t993l7RRtf}cdVTK{8Dj66&U$t>h9GF>+A)T<3e~^WS zCZc3y6a^aX8A$6v@@c&RhP+PSEIk(Tmlhp~7Uj>ORpYuiPLH6fxa24PystH529l86~{L#!RE9x~jU7b?yc%=|Q0 znKOT%Y6WRG3Fcjjs_inPui@1;FE5XzW;3n*mQ|)`tx+hzQL)?G+Y>ZF zo*A4T0OxZ+o0hDTZEwjTAPgB4iHBgXaJ()JQhywW4|%?i9nM|3vLvtBwu|u){R40B z#8O{J$;HLo?vK^ta3kq-Wn}yoCFPrRN`sB^Az?4&cS>R5cNyHzjKA_OpTRxet3z7s z{n}r?b$PJ@vcL~ibA82XqA=jV6OE9>lycIi3fH+mUIjVg52=86#A*TXNkk-sFfX$F zXlB{iJ#smY-LJ1#^Vo?@V z3gDJ4ef_$mfpIzI^1C0#j}PW$0m}}nKbk0MP&W01BmTrLi#=0`rhg=lx#F1@#vDATco=@-&kJ+8`d) zdrED+Lv&>A?QcQMZ_Y5Gh+hF0^2;ZD;6P_yKQk`I%wc|?LrWbp`7u#Z*;uO{yY8;> zlwG?tFKg|mygK*iucjyp)BYa$lPW_K> z3jKS2#!x3BA}G~}=N_-5f_e)r@Q7x8tgEpHoOsg|ob+FQ-C|;8mGS3Th1sElIG+}YcgbR$_g z%oA#AIZJEDHz_AB#+Ujz%bzw0W&Koaz{xU#l>YAT9>3d72>t)p|*?KVd(oV*WS~nHk?YU*>yAyTvHcYE0b1EAQL@n`{-8(kH_6-H&D#!FxM(v807!)Zf*C=%LfUrLTysLc1<}* zU_2AP2!+YPQD~O8m7{B(%&b{jMv>b};zf=ETbnAXT#89O3(?_;XaHrz2WCo8Xahig z@=i`Ur|30SU4kNC52p!nM#8^`;GsumP8c27GMp z>uAGZN7DwmtAGsJJv4MVd$~Ltu=rURFBM-c@J=$VIdx@WcKwgVn(_6!s(s~(HO&?t zt;U4iR1fKu%28i7&dg0ee4_CpzH9|$?{>tV+oee8ylEV-V@`Q&lPujJu^=*kNWV&) z&t?8wwwB6uP`KX-Gw7%^Tedudj1dcIAEw*Fdx?pO@Q;{jts0X1<$L+PeSM#P`=)%? zGblmY-^H(cuTE#5QhbXc`Vh~v2PHK8f2W8CpI^R%a`)-l?QCM(4yCIXrND56RV9AV zQ;s$^DFo1m#Ye~mXoah-D;n3015m}laCrnJy{oSl^%yFIQsL=d2x}F(SLf%veyioQn(-1MC zf{#O$SLt0KG_?G))1+Gf3iCxZM5h-+Og;c5nMIy=?{a%R^zjLA)N$2zk`t(WsGEK;sC1rQVLB z-Ruv8I_WN5x-@|XoFG=BmO+~6868a|=GOF1mt?^yPw7&i;%TA`5W^t;8w$U=^Qctw z=pFQ(F|arS{i*`OfS+;H=ri2m?(*&9YLTfXUDCCpxgf~JbGMKRcvsdh*Fe+OC0B`> zE((IE_UUU3*oh;d1QPop+yrC`0BH#Qtume4ydaD~gN5-Ky0y~6!c!2?3Gspc(5Ek{ zprD{WUW4D4Q%4&b+?wBie^1;J=!v9eJFU0u-fbr8Qdy=Vo%kcjU(lD7xDLF%lb&L# zdTO_DlRPtX^_mTfnlg{yqHmsIQS6lj&*gT>3&zgS9a-0^$DZX1LCdG(H44Q+{qC(q z1_h|3*}XfSv|iBeVsOy%ilok*kpovp?775=0YbA54iCbN0LU8S5Nw;1eauTNO8%fb z;PBe#>gsyy_U*xk5_{yKJgu?WL&K`#!$5i)hmdGm>ccFE z!+6A^! zOhDBcSGq_s!;dg`PMd)vB!UJ`RXsFr<+$_m;0$UJd9mpEOws777>){E`SjNw=LvaV zX2sJ)Aw=PW5~m_EUd{vl>M`imCy8C0=p;!I0S$>97Ww|Y95Eb&d-jIU{V75)8nt1Y z^Ff)%-xD8}dJR}%pmLk9pHA&QYAH_GF>U)E8sf0A8|_es?*K|Usi@Yj21!4l`30HN zr>THUfX%hOJ}HV|-XDf^mW@r1vdUIDq;YJdxZU8Pd=T|{^vLK8H%#RgS)G9So;dnD z8{vGC_SJtP)Oa|2$g8jd4v_Vdr!iX*Z$1j=&%TnTj4RJFhdY-R@%tS|E}u_WfIGtz zra_GqNp2R^gv*l|)O$D-v@i$vnqk*qyzpgFe)-w@)7w7^nwy)6LFLHhrVHk>va-Me#KJW664t}8*B#-|$qO_sr{yu|whuuD(mN_BNJqT~x=^i-x#aJA_sWwS zxO9u!CKff4(fS;Ku_BC}k+JX^h{*MlMc8(>wD!u}`zSxiiR*#FbV2F#PC=3Z>%9l! z0Dr@mHtVXMS-J;UJ&4LXV1FrU%NL0v+=_gbjWIG_!BJRf6S&5+uBMM)rY%|;0nc84 z4^%<-K;uh&=@ZnCBO~TG?*(naz$d)MHE$p2odr1%CR^#eI5(LUlr%d%YH@}eJgCu? z-LMXuUUPKp9GhXfH!c9??Fu?MP2G6y+!nHyKvdQu1%Ne27-%7p6(K;3>WvkcT7Y z1T_gR3J+1#bsh#iBQa--Ok4@XBw z_nOW&Xjrt*Us@oOGwWc>EP$qTKJ9WqrU+SoQKyW3B}Ppzp4MkE(859W@@NP$Bv}ym z>}A=)ET3T9TA2zib;@D}3*Dy>n17YKiVlJO_>C^$tQMBdRj~f)|anoX2p@_J@ zst<{Xw14aI*I{e18c{vgJTNEx56*JSW@Tk9%W$&yzVgkIQ+M3JPPgmy>I3`3f;MfR z_I?-;U^TeR?Gneo&X$Z5uqw<=wa=JV&>45+3^f*cHtSlnealbdGaZ@Vk)Cu(@9De$ zUGK|$%>Tak1)?ci%dTC!CPrPCG{7b`qB|cSn!`I2@^MD+9E3Jvc|dt*#dH#qkUY-M z8cmC1B9nEBV^dRUM%EBUKt?{@%qV39kyd4nW!j=Z0BR85m9h`t3zg0TF3BtHbahAhm3 z(283y9yo9!#A)mf!5e52$~rqcb1I-k>NrpOf@>i*LX_@ys~iH?k+>Sya^{4hvOI3x zjm52&peL+1$>t!jtS2XN1jS+ng6(^0G3lX<5?OnXmdD(F9@XwRs8pzp#}h_Z5ZW2V z)OL2lcwwxJvv&&4h%@dNKeyX;5~HVib>zuAo6)5$uzUR+hht_o+`4<=>6-%j_lpbh zK94fAd|C5j-@iAikI$CsDHY*deLGBt{SS&;*xSvoz&61o0X|L9z`)?CCkna3nKMt< z8(xHvta`CIo9H25ipmlnENO=lS0thd?kAZ9aphx0vvW1isY=jPh|vXZk~M4AP#LTr z_25IQQe7eV)uowfuxsaJh!-PcXA>C(&KMJf{VcJ@e}2CYT3!D$FVPxs`>D=zSCKUm z#JjZNO+ZFy1pM@U@Zf@r*Yp23-Wh8nDB%KC`WMz(C4!M=kwg*KNLX-&ZJRtD(B>;! zexvuVOrp%O*3@vEa*g3epa#hrSbo+zmAAqJWu94{l*(uh(Tzj$?wvc#@4t=-H9+I` z1#g6^Ii8zU4Rb*h;i$!nIgHi|C@3nX0=*DPiWqCv)gLqT12jEgAlL!2)_XI0Ll~#1 zFISkkZp@Kt))69eX&S+>>%g{4z~B~H;fJid*d&3zQFN<8Hiw~Xv|If#8Q9k~dQq8K zsC#yPyvh`iqb18W7De48DHk1+NH$B(b7c z2#Eg3Vso;;l2U&_4E6Pm@!0Oqe9$0^Lr0r+^=f|NtTt%~ ze}4azPS7TbN~OlF;x(7}$H!s&@rKRUOH(2I=H(j}Cr3GZ&T94zvih<+8Wu;XjaSZg zCcyD#gteXpQ4X{8YItu6BqlCbC=!NT+4b>CQF5}1yY1F{2KK`tA>a|P zIX>yCNW50r{JYWfrzY2J3txr(A(Enp+JU7JFer95fp45P_SR(ri)e_NVuy3|DTgD| za!lSCi#bkRPB2Rs{Jv{_PUc5WL`>xO^+1-*0w@QF)a7&fDKY&@0DeGRvmUwX{^?UM z^ML~(GG&yW6)9o6&^&%gxl#!BayL4C%iC@39tJA8#*b3xRd+0W0s2=XRA7oD!ob4Kw|Gcz;B6Q8TmvNf6}<{Bj3>xn-pArXt- zo%+(h#0eV-UBDioT9QftE@Z36H|_XKtJv;B{j7xuF|OG) z;{V`0iik8y&nK^r{zni zM;_Czz}(qAIBr74(e^SjS2Yii*59xbXEzpJYC55-aR>---{*<y;f;(_!_iL2Aaza>BxF4=+!N9zPf8W4yS6i%0ksNnrxSXRy+a1p})Jt zTP3m4bF!OHsI!w@?Ipa7kdS)UEWtOam6ViZC--%!yKNAiTWl(7;P%ohRtTVD2$6bN zm@A|;JEMi{)9%K)xeaw@mB&tHUpWVZW8~uexGbr+urit@4Bwi(*Ar-}J(_wWXSH>3 zBHFKBp;wMP=nZ@Oo`LtvOzXVto~IEcd*Q+db}t>6MS_lAISyN=THbOGNDo%l){0=& zFgXDedbGyW4+X?>)Wb^$P{2~see)JrEBEgYe!!5K-3ZlV6s&W4&j;~!%VFkBT9{y` zxM?~$vx68C*yj($khL)lJv8zmr%u;3dwXMynIQiNVtV0e!=nR+y>VzxdRqJ*J&Ho_ zxy>ji$Kdy}EA!v3NM@ek1>Puq_wGeYAFvEHZuJ0Z4>K|{me*f9L|SP{`^pQC0t4f~ zqMe~a^bJnrvbgH9b{ktp4_`%Mpo0sJv}Wt4uCBFCi55L~4!vADvG34_NAv>A@^d=Y z-kQ8ja|jj_D_dKlqQjjOO}sRTZ>}a!e*l02Ba?Mp;9R9bD+-1Rak&j+M6_!9JeptP z6-GZ8_wCy^p#=868AyBdR%X)D(zQ2Mu4S9bu7=S7RqsKpNS~8c<($Eq*&Or?#B59r zhbZ11TNNMtla8JmefO~A?p+62dHE#_XqYaxK4xT=2InV#d5D+r5_6lulQX%O`lZgF z4@B}jh0do2QU4UYO#qq7orDQleSIDxwAkdG1wsci62Q!CRCRQe{}22(#F~Rta4Q$% zDxok!##Sja8`4PnJF##HOkLakBi3?da{*Y4ezeBg~gpY-OE1_VMP^ZoVL zZ#a!#{K}}k?*#IVYo9X%dAGTI`*M-A-?vm8G2Pws=~I~fcyB+Vf-fEA!SwClo3=Hk z9bUKVC=Qfw?V%8-FtSSO!TbPg(6y4Tb@r9}T%GtVAFJC@nXKFf=|2jAfy410sq8t# zz40_Mrh%e(O8W$GEpdV4SbPSR);kqAZFBLrAqL__L1~3m>HC~KCWgk-5Vem_T%Gxd zCqnV+M~eUMxm8;Omq@IDb#J$64%9*7GsNDf|NhNDU%x-du7kAvG#E*r%*+F7)plL5 z^|;WE?S%zZtOS1<&WYTfuNVtk-;Es zn={nYGs4?%GfQ{K`#}OXMNUje3pyS->o|Qx!NK9)u3Z@0or!|L;LCtarYiUT+RBxl zs-AR;W}562O4_mc-4wYCL{ggrFw87hh3UYXg8k7U)AWR`A@m?-k#c8x zpA0l5Jp*uvhYc&P)CzGwqj;Yq z9QK2KC$?_e<~N!|sB5vMc|}v?de#QKf=DUBh3M)VE!z_r6| znTlk3Bz1|5oI+VF#WfyeY4P zWT1X@7R2jVA-vWfR}Z9_x5uEfB2of&H5ud2VXpIC^yID*s8egEZ?wIE^?<|#!dHec zv0Hh10~GeQGJ#Szg%^OnSdDQ zlwdm{%R~qSV`#?4DG4+P6z<{Nc>4EvIsZvgWyZSZJcKQ|@0Z(mJEuEGizIv#3yx(4T_1(#{>S)%TlRS_IpLn^zD~-B3~s@pWJD5hipj5S z$&t7rWq=f3R3u^yd*dp;sTErT|1?_*k^T5PofN4P;8t40*tWhzAvsg3?ecRrX^e_? z7;2`<2B_V8=|eWeDCp;u`WPQteOn{(e)p%%p)ATp+w9{hl%^GEx8%+8KOt4)3wqx>Y?swWH1 zJlx0d8|9VxCdzMamoAOz8MhrROWS#nk0s{vNP|LfCb1cV6Rd7v{Lu<9 z9aK=Pg6|}O%!st49>5@%lK3lD=l~H^M*0+6dl+1{oOQprBaH(Fg*1dC8bj9q8;g#5 z?Y|bCgzTB07M%n!eKIp|e3^OWxc(TY7rxmEc*4wCd58yR*3?jbjsxJl}7% z_;F5ggX;xm=16>>2BtJ%_2tUUP*S+HX6ZqQ4rgYz*oDC{3MGT{e;R6Lr~v_SYqxJ> zK62G>^X4OPC&}oST~(eO=mf{1!X1DMclN_0L)>8h{@N?+{xIUNzt~}fh#2qN4#SLE zgjA}6Cta;_+U4!OOdoL-#4<{T1-AI#AEIQUNqm#@)JUTeXoW; zI^L6n$2s`Vq!cWnDYnf2`BCvC?}rb=@sLbMu0BWaR#IK9s9%=1*L6vBY|5MkvO4*? zSLT?Wb^lgf-HwQe!tTuWW&v?bvDhWyg?{{M+qmukHisBs&cd;EQcaDS z1s(yD`o}Ofor4)U-S!TJvUej~IiZh^+L^x$dsf_&nTy#!L>{KMesUpLp_J}!F`DiC z(lCAb_+3>oH|JBz9Lw+Nn|`UO#oCkwuKvM4AM-orl;E18hMP9t|NQQuzjADYCfZP{ zj|hBPKYv*sf?sHGer5(9h6q;GH_BlrnSEIswP1Sd>7V&sdVV?Of+6+>5BCIS8ex&0 zWV~8nqHjdItnI28`;sYl0Pf$=8|0QNKY8jD?xl!EWAC}^dLX!)2#Oa%z z#f~5Q=MEK?_d5zpAYTK~lS^?l{C^t0UR;lhTc?#Z;jv-2!!e<&-v*nKTybfIDsZ+p zuHb<8yz^bzpgj6KO4B}qLns-?VDN+QI0kw!v9ORiPVoAW-z^4f#t0(;N$MPrLHXY9 zpKheX*$$H%Ns85M)5I+w5RGpur4B49W?G4Ke^PK->fA}8bkn1Y6i>45ez!?LK%hBm zU0Wo%eFEctM7V~fA_-Q^Pmranal03;?J*lv__?5`*w}}ExTSc&rHsNW2G<@Zu4(WR z79QX#qsJER_{EH}Y&r?D)EO$w7baa&YrL|vg{o+Nd{|q-%a=94%~E(=C8v2#Dg%9p z;rHF{s@29FT}KjNblXa^T4Ot|$>l_`r=}qCjAlfKe z4{`^9EjmOGXPJZEH*VY*6E+8tL7XUVZWMBh2++l2QD=u>K9F|jag`EyFn=fpm=d+$ zE#{<GkgMv1p>#I*rW4f4rKgTFLhg4JH|qh3ExdjG))YT)OV|`DiVp0IbHw#uOm0 z6Y&==2Ij^qfJNlW8W35;kx9q}_#u>5 z=ym&R1Ut~95X<+#)7yI#!1xhW<@O$jGeBtyXV2dIK8b(^ z)|+8+xU~`Xm(kcKxf^-7xhK%;5ho663jr~t6GGKd0oLOeDJFdn&NjKUh1?^R>u6`~ zw)w{qkxM}j;mbM)@m?&|+R%Qt;6$K;qAcstYhff}f&`^8q;!ZxOj{SO5`z(GVq)z! z`r&rSP@pGPkQS8;sDk|P4G17s2uyG)BWP5>+>G(z`j;D!pva(g3|I&<+e~;R^jVMs zrB|$4_oB%aJSHf=41^u<;#%n9tiZos!ef8K&u^*Gr}&<33max&V=%>Z4My`q z7{~>G;OSLi-6p$8ItS8j0c7C|F4?YGTAIsWss&PcMg~REnqr_Ln-{b(u17cqbBq$G zLIz1Rm`boP;{v%D2yA?fM=?y&po1YjG+NyPiXHfw2@GH;!pZi%t;MV0CMIK0@#@)E zeD`Rlgo{gV&qw*{Csg$5cY%I)#AqhxDFHI zLDCFj>-9uCcalLF((b{W$tfu%E2|2M18*-(Y=khN`7)3Of^aKjumRCcINH;yg-5b9gcTmiy(+T14q#i=DV=3FAf}R>a7+F-qwzDeHql zaTv+$i`&zOF1HcN8<~Y5djV`t4TnvGT+D)YRx_Ayu^kx{t=~a1aDlT^Ha+xUCJA3j z>_`tEK19l-G7!5GGB8oT_%P@@o=Wnpb(KgY$_5z+n}yfBXJ|gZ^S!Xm=LaB)81-)p zf;%CmpY=Nq@2zS+7(YX%(&4rs*HV#-hC)I@7?>eKlNlJa<`Zb{;3Bw~IqQDwmN5ho zaxKvJJCVqJG8pyRKnA)q-vs?QA#Sby0RdrP1|Y@wedONe;2Vlp;IK6X2B)MT-Abmm#E~DN7RQ# zkO96hJ|YP|Cc;ECN*1|~1M?BQx0e{AF{(gZX7yc881JHnqm3;m&jiM%A7gc8$$8lo zs8bk(IYRz6A!l%72DudyJ6YV8hw&rg0-eRBcOT8XwJ@M|6qj3=oP9f6lg|Coc!c(Op;KVy&15^=WwY^W)YdIA)Js$8V*}O zaw=?6f%UW-v+YI`zzlV!6lF(X2u{U63+ibWLzyGFEmVq%7H_YFMDhOULwzksKMezp z{kX)8%m~{+bV0)JRX^9HPH)=x6TmV(^6}vz^9^W8A!J5rYO*jLwtPWa_;!)$B_0TA zbHvC`j4=z*xsE69>J2qD83TK(75KXNfczzTz!rQ>J7t)#R>f{mOJ9T{%0JOo)W-aX&SfP+mZ&nXyp={zMb z*lNApgP+5H5FZFF-}KYlJL)ekT!(*>o!L>+P%>R};2TmB8K5Fl(BEZbt|fuKru3Mv zr_%?z;NHeJU~2f|^VE*@EA2bG%io^gkv!kiqtaLj=#Q^}$@4u3e`}G*hRVnnAwNzi zC1d;lpf4zr&0EfCq7j03-xe1Z3mnZTZA?$zbL|7GFUJDtp|BbKD-_CwCA34!<7F?RU_t3IvHoAYG-3Cy%lQt@7aJ}NJM>H+ zwd+zRLJAyPnFK?SHG$cS3|B%kyJ&kPdnZz;_@+&p$X$-uf(U{_fn4B{d(5qW7gNwo z1&8m>?;3e7lo?X^kihRJBEdhq>#eJQXxCs+a2y_i1;9+_%F^RNM_5?*#SUUnK_WF2N#qo7@}$ls8dGe>jCNmZM~bVsyJe*-#M`OmtYcLEu8;086Z zb*wDcZ>R&aPZAdd9Fn*IR0l}y`SP(K6?UZKiG1KOi8P8FIQik`#%)mzZ>$bA#xfB_ z8+cizl@UIj_hB%Q4Upkt!mMLB%?M07H5{;N@_Df&2KN!CIoVFLbDLk*xJ-VvFF0)3xQ?}EkZ(JI8mq;Li3}?I+%uw(GT-Ae!QLS9S=b>a z?9k0@91sH(NW9&oq@)tR>!VG^ z!zAny!Gowh;)uc_>J^UijB2Ivl4I}7*RHOv*Eqwlvw$yp!9z9_53{X)67EFpAahX} zp1AJUv0&y7wya*0F13)^s_a9suhsXHGO~5vM}LkD1MiWvwG-Vkjyl)k_c>B8+yP6P z_O%oeduX%?3?hLc$v2E+eOz2z=FXc(EO?2N1ODYzDAx%{NGI{i}4oeB^V(O{wOOO_&y zf$G=?ob=g;w$i}QimIbXsx@a)4q%l_q??w>r+B0Lv>nA(sjQ+(H0gm{n67 zAEIbU|8%w3)M>|mGnS?>vePIv0Xpp!j$)lpLgG`S0~%zBY0XH+_K_M8K+t5Hfyt=P z8g`U-)X%=6jKb!f=5awu!a6`3M}r)VB&twu3Z!w0TfIBA`b|*XIP5%5gnk8+Fm;bB z2exCxb1UP3h=>N!2y=D7?I0@w!4jMhxIBKiAP~=AQr*E0lvqY~5~EQ790@~f_gaP% z>jG3cvG`_^wUHik!bT!{)Hvjp4u2=Ye%#!>dv{g5)*g~Dkjo^A4RQ-%ID;#>tIjkU zL)uH{SFpQ@lLJ`8nnTMDgyYlN@onEGkaF-t;YlPgB;zA}4G12e{jR@&gIPOF(?~1m zz>9gyqCp9g5`nHGMEyq?R&b9@twA%3<+GlT-|DdT_KR}z#mtm5MiH=+M8Pw3lq3zLp z#R%ubcnom&b6yZQw)Y$$%82d|jSIJ8OkcJ6qeJTdForWjNPJ)!pJ zwJnfy5jS#}XQEPT&Cvg+qx~-TvAgvEoSt1if9v4Ep`yx^m%96UCb%Gz#s;m%5lal9 zfdVyThHu7pk9F0z$% zv#*rbw1(a%T_IRs+t1X?D=1X&>&*^>@|v_b4qyA(t4M5cp)euN%f$DQW;wG1DNi26 z$E&TE828$}+j7d4Z_C%GD_4o~b8~k_&XGFXMo~}PFO81X!&-beqL1N-K6!cahle9^ zxbY5e2q=ty4qF)I6DNi&gCR~z{>fuBWmxvc*>Tb8Du|wh&b()H{`>FfQ*Z8&1r9eH z2>x4Wv^XR&^)=k9wP8ycL#R>Q(GK8@#D)n?W#@F^M?d{KyjfUSy6uC4#>vkwbR@kq zY`UY-W*z(dxmMA;=Wr*J17FW2BXjJ{n-U)!3=9K~oQ{h+I&;+`iau@@hj#ix9;PpR zB(`-O=n5hTD)w$kHz|%0?&Dz+Nahm|6EL(mm>)N3z689~dhy{ea6v;)_StfqmUqS9 zBsCNF-*jB^=_cNzgH##&%AZ_35?Ru8hEJ&hX`X=IwEE1~r)Us7lp`x*Wn+s*A4q%1 z)Ob{aJfnaLeHR|_hDZEr&^7ygMew(K@PIe-%Ee^-w#Tnum+&b+c10~kw23gGlUEtC z%)NMV4_3u;zkWEyia?_KL=^~2Es^5@T;vC$sxtI667UIWja-s{kBgi^xJIT$&-d*q z=%!|A0_~_o+(M#k0nk~)>-}I0_gR}{sNHF#PK3)eeoxeouLW_? z%l@;W64?X6M$&fVe*^Ahr8!D}KHsP6vW9g6~0E<40@AZq>zspb1Af8F}5 z_UD7MLH$Cd(U-2!8nXxOO3fuj4LYr$Sh*p# zMJM>@mwPbWVzZG^aNOU&osIvBNbC%yy?al6Y$5B�=NZ%9D$go0_Bd;KA)pb z;J3*>|GakChgNdJZb_ohx?uffVx~jQR5UaUBY%jatGL^S7If)DIgU8Gr4V|I%aN@L zTeiH@lWTMo<2d?*Ab*5}z4SO)Bmg!EERKVnM6j!{hZ(FlOB&Rga{;Zi z1W<&-D^bElM@Judq!e9AYlPJe@gWA&6iFCw2+F*|LD6caH(K6J%pEnA-aOOuhz50! zJJR{1eZ}(CjXLEY(yq>*%gMu1Ut=<-toZ^IceXkA0l0@=Vxe3YQ`gl^K&(jD6H!p* zy!QP%v2Vder6z`8mwvkP)lH(<8gexov#ASW@i5xa_fpf*X(V?V{drm*2ZrvGpnwBb zBy2rn*;6P~qX6ZR3C`H;ab9(`{ZZe(0p@7s;Wqlq%WJoq@(Y+keW_@by z5K-M=uU8{`pX>0w(sY}^oWQH7ZyA3=i}E79UsrI6suP()Yi_@Xl*@K z*k*U)QJKhIj{H^|{kX`kOH?D5s;EQ+t260^nLdO<6A2g5L z%>3J2-Ap|kiL}9mOWCcY*5dh0Ra8}R$pT!sb#aFJ82J0b$FPcuk)omi0hj!Qn>Pje zMjrZ=rXR7k*5}!^>(Ji4YxuUcsYFB_^Edc-YCS&Wvy(FJS(Tu}kKf4v265~HF0Cye zkK%HHrJkckWoOfMJ8#zLbHv>rBh7=d6-aCvsGpwZf+DikJ$%?14{xh%Y*JD!Qrd{N z5Jx?9*@?qWvmqy##7^W-y*9asn-S;*ij9>61;7HHQtZh6NZ!0AL6>~EhlL+6m=5ex z%W%onwfa*>ufK44TDO3gpSF;F*B1+;)7SJx0(0_db=?-Mt3U0?qEeel_rk)}QFq%mi1UEx2&*9OP>8@@rF`J|!9; z62@W6Nor#Vo<}DJihx+0k2HymiIK%BOHEDX%5dl~!%zQRtK}PU zCi+248B*m@i?pWgP~!U1WOz5`?c1d4k3UoYdU-5Uhw#U@WLPM3UaO%Sl*^2-X{f5Y zTGp$gqGEKrs$G{&7VI(V#Z8rj*xgJvfMK1&55GS|);ZX4#Rq59)YZ=yebovrD)KjC zyDg^>9RBPX@9vT_wmHB1`d@Yn`mkBnt5{e#?K+ehOhfqUz=IIpFD{P@SS55ysA>9VnlH=MqH{BsGM_}e_wVnJWm=$FNYjn}>o1zGp9t^U+xW841z0EU#55^yYNB@#_Udss{PHK3 zj^vUd zTpqn$28&tPwy$MFG3%n=d>%gBfgR*uTy2;>8(5rr7BA3@2|xXG>#-fmC+fMJ^I7|@ za{3nZUlta+76!8!pAbpSj0}H%UXvp(>6OZ!%PGt@98(Jxh)9#80NPhBSz@_|(q~Ix z$?68Fu|kVx7}SgmC$3 z$>{7^Gsu;Yo}Kpi*0Sm!&zodb^;EU%FPMZyHH&*4^_4bGe1q<1DD@~@XG)iov$v9bzA zKQahgVtCKXdzrFjU-Q$tE+jiTwzjqJI<|XpW$38-dhX&+dpT#L;?B7ncRnGfy_&s! z4(XSIgEO%v0E(j)zvT_kE#$--o=e(F@@)2APlZH8>wcek9b zJI9Pu^SK;1x3{pZ%~hhmcHotbJzR&+lrbmq}Q!>6N!oZrmL6@33N5djNe1qN$F}FxtV5> zBeU=Fq5l1I(!s;jc2l#JuJ3N1=CE&B|L3P(Kq`jc*B zIkP<%n_Rl&9DRe~yaDm2JV}fE^5qoknl=0@Z@*^51EdmpwT(JJkawtXspQnAU3h-9 z`N9Ks_S{Igc~O>cp??C%cU@w@0ZqB#dhQ)Rep!(2!b0buupch|YJ0@5poZR^Jr|8ZnyXJ0O9z_n6_1H9|8Oj>Q!f(ZA2Xj%r1epZ%jZGzV zH%!GB6>$Tj)pVHDVP$FD7wlT1zre{)9=i*_`ZL=#+YX7Z2*<1SsYKFR};xaRZjeFs^`UFZ4u z?W7bGSc~3?9XjMPsdMP&`_Atly{&`i7nO$ayW~>~5RST=XYYZ-SVNP|PnYQsN}g|- zr_|?F=1r4lk@WKCKPm3cm770bly9%?dx<34bb9b<8Rnol?J)RFiZX9yh!c4i{WVWk zs2(@V5f-tM{L6nHQ9KKb!9q>%Q?;BNI|uCw#cAG)AHZM?$s6Sm*>J`@Scx=n`o$q6 z9_W`IV-i(Vv;i?W`QW2&=%j8kOO78#k|L0Q#dazpQ`@PZs?vCI(RsgVi|k;#T@R7l7XTeNHb&gI+S z&nrL@w}1n7X^GFjFa=+HyY^{A_gJ=`7)K2Ohe;_;3OQIQRV*}_J?qK>XX-FnQ{#^t zW${gsn>|Cjh9JT3@@L<5D)(HPJ?AlmDyi{PWG(!haG;`!$^=%<%IrdTc|dR)s*y*_ z0S^vpRR;CK$XpZ;J|^JW!t{7&7HmZuTwlE z_|(L*Q}er-Wgg)NR$RVLq3p^h&npZua&1y=9L8RmVw?hLli(`c>6$ZSW^3vJt6oU} zE!OCb;TIK67YrLR%btVloqqjzE2J?0Mo4t__+s1r9{lV}d3bq`DV}}w*GpEEnbKGx zT-dC!uC7|iOP5gESJ5LNlamz37>*%2K>}e=-^jJTu?Zp=K~}Q$=>1Uioa+JozH9y7 z;kKw0VyzAEmcGb+GbmiqU?>p+U-sbo(Szf4%u;@ZmdVduahbSqOWIe6(@(rFiGN#hRYME}L|w-_QF~dwRaU#`|swM^YB-+_>wv-HZ=u7c@1iEuz`$ zNHlGB&fmbj%|q3qhZVm}O|k9C@q&4jflG|Mf$_0`-ug%ImfU>WStL2Sc=OSx0eO_2HHJ9;JR&a3SY)?6qU&Am%eadayh^! z5S$brUx$1H^_nMsB^DG)QBB1Uhq0~>?x?jw_Dg$?H;KK&Wsh5$fi= zsY56j9z2MmJ;{Hle&$=9+>r}!#Wvwjna>bZAswn8Vt0HZ!!iP6O08ibL0gme)0vCF z&BQBAac?UN_qJ^XVzYAP7cULkKGfpAYv@wjV&ldRdZ}rTIp19JAi~FIi}2}pg+=e~ zvwro#SFkOj7guPBuk>&nH>|FyL7s_29?H*}Zipw!@L%EzHrO5Y*R|1F;yuI(nt%`G z0N2Xf)+~%)vs~ur9=^P(c_Y{Es%xZ4sXQwg`u0suU424cU46={H2rvGr1@=GMf~}a z?Ke#V&cq+0wC94LI=(IF!e2T&{2lrOo7(M~#vZ!w{l-QTNZ=DsxOi9I=GnJT8x>v| z+1PB`di3XBcLLGYIy}~DnQ>}8vXSZ6z@bPJJ~kcRkn_pca?i3d`xiE|MLv5LBPUnn zm+zT`%Bb(^8a}~Z_PxP}y}10%__p9NjxHg)&TR$m@Zea}l%7bS0<>S%5C+v#(a-t^ zv92bH{elk0*HFHAn*_ur=n91Nu0>D^!{tJpezNq$YHBQiq0-h-?x zI!Vu-4SCd&H|IE$C{HG%ATdkG@6*4VoxLBY;bRY#+4J{xRA#1N-nx$7wa6mWNT!)m zN>&m3{-3|Cl|C51g)BU~N2E#^9leSKzxPsJXJ@Cx5k}ypEO+drvY!;#kGuT?(pXL?UL2mCMACE2qCk z)8D>Lz=!(cNv5W9Kdn>o@wIlBc73k8c5PJAnM+o*Ar9trY1jL;`wZ}Voa{r%RaPqQ zQ*cC3e+E_4M7xZn>E#e2cLfzz$?dOG@yl%;^-y{jcIQr8OaFl|sn1*L&M6w54vJ~c zKab7u0KdZB`cGWP$vgsURS7zd$P*$4NsKe83WM!>GQ7ewGsTI0I2qS62m@wiSQ(9$ zPK}qcMKQ*NQQEbeFJS-Z-zyz0#RGx}K%hb^_ta93CS;6V;a4)^inGQUwmOD~MCWBZ6=jPW*P#cSMds;du1 zUdw$`pK-&~rd4t$PxudS7<3R&9| z^XP~L!1>EmYkw5D!x`t1b>8@MLVr4Q8@bB(g0f?O=Wp=!`=F^&a}L$@gEN0ROf4k4 z;qaS&;5g{Ea)}Q8998ro_ee~P%xiHH+RDz;mzDn1s3sK%?QR8Cnf2B_WB_Y}#*P|hi!8DwcjVo^LD#}>F2$kEJh3)H65n}} zlbqD1K!n6Q!o7K**qeSiJzwc2LQr${OPAj0HQ?#Yzro4Ru>-BRk{*AEjb96u;|LOE)zQxwhR<6|fAGNdLI>Ij9T1EdQt-hL_ z#~eq!V`F1sUc7=PHv4gQx1|roMqg3hPO4$#sAvEH|epB>f?*(>%+gP*T1;+&OfWgYZt{mzE#XAi+86ndltHjGaZ&=9H* zegA%s)$0}VKAsnE0#P_XqKv)Uuk!)irnXrM?*zFOoY6-4Z|tZTH+cA)Q~hWLcS zdoDUOR7#}bQ;r_ytp+xYZ(YF4*hx~3XXy|oH*T)F#tzTZss!oTbLRSjWfICa4W@HQ zCuX)tlYnIAIiX|cL;_I(Yd(B+(_p9E=MDdzoH$9WVd}nD(0?oYle6i$#>W62eM1}* zEH4SFs;HbH#GBKOFSAwf2HO_T<1*8?4F9~imEBxJz1j?WwCY1}5@IcQi_Li4MXlm~ z<&*deC$U2WH^V48qxdb1_c@7`mpwU*ECi(v567oR;L_L+WFF7RSc5osfc~H;UE7$P zC<}b1VURuCj4c0iC=Pr0?HBX$k7dQi`C~_^qi`we&ebq6@ty_)q?YYGS}9}*ifZ)4 z#^pq11cX*+hi3d&#s!oaA{Fr~=)af!Nv680#k*Ko!D2YIs4xQ);CSiF^eVmPp=V#a z(BzDalO&nM>K(^p)-_!`90)LmPm%MB{#q{Yc_a+sTL}xOZ_J-}*Tq|wmy+%^Oc+K) zZlG@@AnA(<_K^)V7S0`mgM$hFSw%Zt_2g!k|EZX?q2F!yF9Vmh7!W8Llh?9~_V5}R zP2j>lkQxRjNijJ|P62@Q3P%|fNeW)@*7A8cvr^2N}#E>*+mkQc2kT}>I~`diqsPN35{|x@euRg zKcNTdwaOzJ5EPf0U+(aa^32x9?Yt!=4!-_^UIqQ3re>1fRK3tght8`2+h7-p=9zxB zuV=Be(laS05HXNbU>~&?q$phlbA+oxrtr(5|HrdK0RWB@AeNFuQ_3CbLq+fRtCFYp zPE0{$ogv6%Ctdo9HEvDeXn9;7y(IsZ^-M!Gn2==(3i}WgH+i&es;*7)uG9*D^-7;# zu$ONy$H>f0;(Sb5#L+^=`m&QHOn(iif-j2vc7Qyb$6q+T9s?%4&b+rpV(D7Tr%Uoz z({7PJ8{I`p@)2sUL+!UdsPBeP)IQG0&{`*Qt_*a>dGtxd34UQbFQ*+$F2%SMK3occ z{QtP|*7)_UeZaMxyM}_MANKNXj6+=*`TtFWcS0pKmG&^4E6Wn#SNL#BPxU$y4+}LLavY;J)6>S#j$SqaOZ((K7ot7X~nY zhQ&d@y#EsoL ziGFBQ+?%=d03mWPniV{2NIA}KYhwT+cG8SCrKPoIY2T=g=f}< z2L*BRN=}`Kh*+O*OwLTo)!)eWA#?GP-O2wauNO{hnC;X~h-1`q3X-Y&TT+e+_O*i(KfsNW# zQ%?Vb*Z2{$e@<1JW+x7B9*j+N(Diy|+n>~OVF}W?WxoERGFn<^3&gnb*G|`+=5CyQ zSJ_g1du!iU&Fk$Fmf>Ep)W@ED@;CjzJ4ry&F#C7yf7REg)@KebL)i{SwXjuO1Bm*y z+$f)48M!~m@>#-`c6=elsfP^IbC;aFLhQ7*NV}Gyr;C$zWabl{Zv4s90=-H<3zIFE zELpy19@HcrI27U_{)fYqi+Ibusp^Xb>c(GQU&inl`e{N_SF;MdbH_qe`fdDi3Hn-^ zw+O}PEq<^oM6X5)U_wAclIUxZ*|C)j$^-!AVZDFmOQnK-p9URpe6TFqjwT{UrJc_Y z;w&=l`nb)<`;sT>yyFG*kCr6_=7WVBu1LWmk(l1Wx>8YMjehPyNumwY%>5pk!%jNz43w$8#xC0uDg2O>&uCM@fQ0UbjFBy< z1upRb$C#OJug%n-%mq8wN~>hGO3GS_H3D0Rl06A6gp64kY}m!Er> z1ngcCc8tjh%2rY}ktTeQWP}GvSSwyxRu)Rp({>I{hZoIZn+gU>++~H^9IrjI270y!bQc5IcmKm2Qv-I zbbl`JH0gd*Tpj3Bq-%-MB!@Y$e?r{gciL^JZFsDqQBgrbfd)v|nQy=G<*` z=HaIf^N+J)jSdI$#xhQNy!UVzpwG&+wzj^g{hJQayjY9fF(@8|-ZL$6AN*vk@x0<5 zJpwPh)QgMw_>;b36sL8?U{9uB6TT+W1Fio4u^AbsN(EwJO*=X|*f%DLeo$@kob@vt zo5oS{*X_#Wcav5};(<;m0!&^HMascdPu+#iERobOvZl1t}R@n?{ zx4@Ql3vRruu?EgVcE7`DWk@IXpPGe5Vo%Pno-W462Y;%CEo=hRdc#VVGxtg=URcBe z)2^nXPyn&su0l3FcfHNWO5h-#Tma@<1|{O@YKfZ3qx6Ncyq6q3WB|3<0nXchl7p-h zibeCwC?`jG)SK>$U3`q$i#IHE+cfh=eJyfFzSR_8mm+?DH_Lr=(R}N^Xfg z{i`4=HeBEAw3Jgqhb-SAoAmT}Nwy9AGdC@C3~)M`7yCh3)m!t%!qT!8GTW8UeAC*! zL_4b1dgi32Gucib0Z?u{18M{e5n8nkOXI3;sScQdZ^R2-6~*jPQyjo*_Ywc(P* zg#nMzLy!?^2(|Z{B42tkl*E{fzCOzXppbI6OY>4pfrag8O=&hYj`teK87%M4wxI zW{+szh}(z#{qJjv-Q3)W92@rco6;K+2-$@>HxPk1KTSzVy*6U$#op}IR9Xs#HgO02kga>pzP zGE98Z(6k|R_5%m?V!?ViEE*@ZF+%D+HFBsaesBiLakj;BaB^WC zS)`v&<`d)q>#G{Ud;kIHa%K~d;3TL|Vqqrc;q_*r1U&^H4=rcguc8fkufJYaWAiz- z^b7bw!#X}50=)68rMQbV%?5v6wtALw0EZ!b?dNy<9#KrAvjL_1;ko>G-lUry3d8cJC{{X&U0A3D^C*jPSt2(8h(^61W;J6y6@ZKN3n z*SwmHCPd2sd6?(s4ez-&mr*rbl~%1(B>fp1FK$DgNC59Q?%vBepvWU@fP|N z3#)@B_VSjAOTHKgH0KxIn7#5nx$hTtqWF28j9O2Hh-r!E6S}SR(S+(1K0bbJJw`NQ zKS`gP$GCWAJ}6QPY4(3YN(*TYM7`P4?&D#o^L(ifshRq}a!Ingj?$~_g`!MOFa9`* zeifgI%xEyZVC3%nxjEUL@u3$VWz-3Rq5@dTv;27C>(@(Kzv(^B&W`g5xT;3$qQ^C- z+_U!uE;-MAL0S3BX~a4U<}P3LZ*DZlL>lyQ;{Jy+YFZ=94u@?5LxbHA=L2R|B+xL= zg6K95czkVo@!~DnK0k-^rUYcgzuh-%u@cu7yj;C_TTt1$WO9?p>*Drbbx3iK3{n6T zp;V7Vu6<*Ho3cUkc_KgIaQ|yZQ(}HI1fBg?1?fPu)Jmia78sj*=%#@&ywd^C9i_D< zo+{Fw{$z@jj7`rqxZ6=52LueGwI?zt=q}csuTG-LjOu)Xo^j}0e8Y;T58bLcV~7={ zY`wa=JnN!(B}2m)NK?}u2xvR-M_u|Gsw+_4zNql`y`4r2bq%IjBFaGOEI5#%lQ@f1 zI!Tm&JRW;Y;z8(<*G*63WgKKb26(kinl$zhP6%nt@RtjfuK zciHT2dGYeSP*`ltNP1B*r+4Xa(BpD(q64tCz2pNy0XJ9p5u9NEI;CT9CO(m5T=DA| z1e55;k57e+VSJ(~nuT&W85@52rKL*)kw7H^lyg$PwN-aVRLah2V2X0U zb?(Ls_u}H>0Dh|cBXPJYi=2^_wG>-H&0%T`oFAZ9Y_Pzvfq}25*`!@+QyZv!+p~3T zva*(~BJr7Wm5HckS>2>lQM?h%f_O{+gFs%rC;X;23pj=P;Wv^jZX(9-?x`ZP^d`sJ z;>Cr!nT*8Pf=)6>B@t%T6MhCF{+xDQyNHoUuwH?X!bHn9$&L~R5#h7}a)#MVu8Tc5 zDRdE~~J~7P zT|n9YN51q{gnsCSHbE&iqhd=goj6+QW59OQHtU~o9-0Ip2B8e0+d2H^(w#r&qNf7I zt0itaMvK0$vHy1tbCc|jvZjkOj49-+6W!+<1SryB%A~Ci+oKZgOu|S+P|&>HxM}9< zrM!TSYuJvg0?KgE%}pB>yGeUl2tU3Eq8j&UtVfE2sGVZqtYn>;1B?@GDk)?<20d3; zH6vlCY?KAB=>W_${iA=Qo90Fa?|b#a;wv`w&)nmc7A;4C#ct@tc{)gLbJ;*%|rfIB%e< zMO$I#EGfS{kL>5nQS<7WM0MbZUXYn(L!HaFr(XxG2%7AgJRqSZuX>W(#9;2-EI;M+ ze3~nYBi&I(0ZSo~I1~2Q%T}fZ{&BIL`e?6~KX;Ixm6Xkyh1V!2rJyoNO^rjikL~M= zs=~5B$gcr6Phf6g;gOcX9gu1YBOT?E-BDZh@g_ZY4_!|n*(Z*qm(m!mu|y-kudyv5 z{S*F=_cP3QlQ_yq@+VZqx+>GmhdU~dZHo;*?FCU#i;aFnn8&1!YLrFQ&mV$jRnbjg zPOqY-d^yO+m#J|0a5^ulD%VQ-7I`8{?*Sl9Wfc`F_PusZG}z;y{(-+A-MjbFhrLnD zDrc6e)KuB-H%Z{oV`C?|3yGo*a~!CUh>tz36H}w#fLr?G>c78^fxCe*3`;G=IM(-k zy&y(hw=gLxv-dB?C%I~=jynudS@Q0Z`+`*5>fI(s4I1 zS|!&0LsM{h6dl2adowYmvTpH|Ny{olJ)TI^Uhu={{Ax9GWo35a0bec;wC7owOouTF z^Xlm*oR~Lx_3-8uq{V@ZCmODw#7EHxx+BD1jfQg08-FuCBL%MouMXoQ1g4b@ z6zfl_p!%|P^%_LG?Tl`&9M{xF7A z$}1fGgN5BT1!BrmZ`W;{{@tNdi(QJr!}}mBAUu56(0&|?kFvA*eZ4G?(|b876D#SZ z=EWb^Z~WcR9XT)qyl?EW5_*F{365p=HlJ8u4>5F%ov-Ovf`J?b9Sv%G7Ols}UHw65mP{s;Z(_lSY3Ov~A)#7c&Xk!sa zDoa;&MVh45wZX&7hiXAA2$qNWOn)Y2!Q*NC3waX=OXv&!2yt{slNgPb!Z;6p1l3AU znMwJA4Ot3d(i7MFIH$kqHeLId)ij}qe2mP+vckp6dri{vCgVyZ02~L1T|5=IV(!f5 z3{6kHcHEX$BN+`c3+yqkKsOuF;3DAOqCY-hIP?ufY`Dyzy+~-ya|f*6fnJHOFg3=d zLN|j#_o{Acsu_YKpDd(UM&RJ!8yM#Ez}9c~?lW)bBp1XEYZ#lm=lR#641HBq?@k(R zY~zaQk8-+>k`a8g1`&}~E}?gfyhBqD66n}?oyR7XX|K0((f@QIFZm$i2naG{IHL(G zu+=8(mk)ZJu{WBKF<4UwT+l9|V{JPi1{@N6EPUSoZeTE6$4)-g>Be%BFVg!K;G)2Q z->CMahn0-&f{LZ1^4#>bQkW&mjgNzPjLW{}Nt#U_moJEj;zYMvFGP;>A=Era75#-A zKrcfi{WSml`OIBwtctd&iM*+8l|t4aP)IAiD%VPrlnLTwO@J;mRZ^`g6_OV(p8mRx ziAdyxg~=eFV*t8sQSV<;gYo&{!~IrW$r0k6(T+I%Geue@>3?5#d4>f>cp0tD>x5}| z28eAW>gxLXNicLoH7CoW(pZxg*1g=Pqw}kZaBI#jeGWQ1s z9uy=GYAzYNu#au#fB(Yi^hg{bDaR6>SsYIrJP|OD4aC7BHX*^6Yj3b3F=C%;ni`(! zc>dM>?!{M=vtn~8?WIebVsTHeegkC9ymP%O|^6CH-Ikc=uizgpgR-FQBsViH?Z&9cWOY59CHDW2rkQJN;oNIqB8l z?PO+naGk4vCW|g&)0}f`HvtMZP%jgUkG7yoqkQy5f@R_mPhVVX)b(dGuAay=+o38Fe9(|YuAL}Y*8M+265$!9?!zRYYWu*>7dk78;ki-7|48JYq zxRS0(3@9pIV}d$>kKJ7D>gZzjJ(0&Df`l|<4_(cP;>dw6Munh~aT*N*n$X4QL-RC8 zz;2mcPm2c-RLn5ZX_s8x)HDQl|KRk-p)g-Xh{AL_}zjs3BMM8!KxGR;>qW zJdc@+zu?5s5!kO2wWxxReyo?*N;I6d6DMHasG!YQy0yBOJKXp6C}AHb<_w(xJ7Eue z&fU7jf&0Idq)r=9L7AFq7Po1KBT-^y?!7m&lmhK;tEe!(`+zGcm878pGAt=6$%H%k zpo`@AA6}&O#{_dk0QExP8Js}Gx1euqTvdmTF&ZOYybOP2;EmdWG)E5((ufAGtLrXk z`WR4)Mr_j4vg3B{+4Tb+QKC%iY;eTMojkIn`q6tRK^xZvgBZh+CNrK*lBkzj+z3^@CFzH&jE){A`C9@MrrUd!xE=Eol z8bx{_v_`z}z+-U!VlSc_!1|-XUEc?`X)9NHP3OD-D>u<4WR9fxg5~fa@&+*Qx6#ky z<}etiU)a{)@vKs@QbCj&uEfJayU#eEq=eX2j9OPFXkYe}80A3>w~oLW<+iq8Qz4|M z8sS3~s#N9a-p-R0dJD`1tPrXoGM+->KqYAfwydwuX@!m-FBfvQgSICYpVoleX`}-S z3t{co*Au@pTBSlI_mo7NnobIYocHA8d#HKoQmTr|R+L66tu5n6NK1tbNfOGA(FM7r zA}NBH2do$=h|%`xnv=Hh#vk-PRIKAxD{-7vO~aeUb}^npXx>bsjkRk#Qpx0U`CSLc zBXSf4+9m(9ekSNC%yjoj2%@SIG0H}2guZYLRy{EJ^DBwJ)>uu0=5z=S7o}TM_Cyw# zWpSIG+~A--LZvp*>~3pYnW0kv$<_3wL%YiYalGiks#eEX6rhxVMOfLpERAI5v z?U#wTCg_8~Osz~|UP(?#dNzb~t4z37CycludslY&l+@TLl@gfQ=A!`!^z;FAHAqOy zPG>JpH$v!fDFG)5&UX#5>_duei(1>7LJ=!`NZPoCUZ~7Obuz(&I5$rYrRVYPO8y$v zN4mO5TgEc)nRIBL%%S|PM2JQ|I{x2lU?I_L1=VNUrVvs|vDne;bb14vGN_CW1E-&B zegV~<57%#C_Q%4KZf(5_d8g9BgR#J0F^p%}1Q_1wlpjf#>G!Nm^rzr&%cI~+R~2!J z_`9O7<(>pXpvA%Q%D_WTzKx&X%*Jfa6Z!)ACj_m8+%=PVU)NeJ5DXD$VT+nB9p|ke zDGf2HAT~REe0(@KsXY?Jnwc=)uu>Sa51I)&7udZ>Y=4uJsA*eNKu5xWs|&US2*9FcWZnE~;CJOWJ$ zmeMQytjASG0S2)Bz!tM)Wu1^F$HUv{8p1e#mZ4tP z7+#5hZI6PMCQTEVgI#`Hru*`LLJPZw%4M9HT+(fxQ)g(D9ds3m)e5o?OX%CD2^3I- zGi%mYO|*} zEX%gXXnCm*tV4bw3kjnzD)B>y4_|ICJH+W~Fbz964Ux9S0$98_PSiTmH6U=}CX^7E zPK*jyL(8Ej)xyL2wcZb+c}BfJ-_US=V4(3*lK)y@dD?A|01U^WN7d9~h=1SMwWaH9 z?&>7+!Y+aM1u&Xvqo%8#cF+Z%H)04~C>&&n1!A&$#Wo9!#hJkHEQnKBS>K4v;AW#a zv%VtwAxpBW35^4+nXhIySVfbVVW`FQTV-t>ZUN(7fiT&$0w zEZClvBZZDPUP#5cxtD%2bDVa-F-p%Go9XhB8ZLW0r|c233@Yf-(dK22(U5Kw?OX!Q zRw+$bNT{9QNMMP!L{-M%RM5q_RgcL}oZ&MlNjU0|QP_DPao?Vxb?FXQ%p-#v*lB1l zPDT3v(PHG7*9<%$uDg@|*rXiaEOMfVTmAp`{F%N#q&1iiv_m}VZH)(Ko|mM-%SK&8 z0?mGHt<&f)BZDtHgn-vx2F1RR!}t}22fGjBF)J%8dqQs#Rhxl8OZaD^U?PT!ba!8< zl9DVkT33@FN^~SAhLd7qJZov)10tC1N66NB4l^pcTjyult@M)iYf!ABY49&70o7Gg zj^6YPuc|5pXA6UKC8&D--8yFKx`9a2h|m#?LFgeEOq!(+Epy2yjj-IZsPYpaUn4## zWXhNcY%%$GdCTEu&MixZQ#pM8{5rfH^|{i_e}q^&@qs4 z3KC7Hs;d*!b*XE-!qGBAx|LK^+|{Jt#-I(c`u|c#y^w1DaiqUZKIN=@kKVPwM$O@Q zlzG2+q|`-zPK(0)qT#FV@D!BuB2E*;s|0NELMusPQvqvvu9)0hJ^CC7%%ymj#oHL z$8FLv`;$;?pXX3ZNt^e}`D>o(IY3-B08}IT$rB~uizdV3d85@yRybMH_272|@BEK* zTBq-))bOvxqq)n}P6gGMCvRirq9-Km!W9<|cLOYyV>WhaXy=6{@ZG;PfxE7dx<+bT zD^u-6@cyX?#D&6`x`<9qty zSTJ8n=e6bMDN>?z=^7Hn(7QV_j{tfHrVN)L80DD88xGz^hXq8`7Phr*}JyLP%FYElWILR+G57IaXa`&i7H8-6VvJ ztLd;(aj~j~hDN;Mvt0(Sev_?0bgqXhmIDAVdq0zZaL?OX(}Bn7`eyc}q+d<>(j}vP z+JOYY3<#VDbE}~p)Q2-SU!nQ!%JVmb*M5UUhw9d-=u>)VF#5D#Ylbb1?F*&eT4wUE^0vs&97-=^@67cW?s zoS2_?KxQy*yl0P)VwBa-W{r&A$9HX5CmJmZcMeTCP%S!!8nPsr3><QIc7IsTaD~Gq-Rg zs*$$mt06=Rzj*NuKwzQS*~M_l$zEdEJEczIxMu zrKXu@eQ0Zy4HVT5o0vl>(hrBX~i%++EnH2Fo-F_@_;^q?1 zx2|=mLPAHhv{cL!27Z365UxFP_;5|GY7}t<>5_zl!1*U=@?C#b;2=5w-JK_E}MV-#kLcvoovJ*va|Ct%k7`Oem2jbOd`k^Ho|l_T$$lx z<+%6hXJ4NGDjhez=JxS1?rqwYLuo$aHj*I^>L$&TPH-zc-0tgh#Cz)?)$r9zH(n<+ z-4lW9?mc@@14RhtjjPac$W@oG%&;P5yDEj$HHPjXcaSCUX7Ug12WO50j)cV$*?&Yr zMj9w&kclmv=OJ_AiB!NoO-s)4E%TQ>acSV+A6K0EIdU*yEbnW}!~0qK=>`D+?>^1X zb6b~Sa&6tZgb6QV_}w5l(%L3Z4R$qu!Acpq-aGC2LGBqLnvmNtQl-yMX#GZ~ENN=3 zuI`P7Fb6c{P`w~~f_Kx@dD7Jg4}nD*UK}` z4FXgLJISA)hO21{B)?P2XmlwuvPukY44iGtocPs+t^CKFG$rGL%SMuM18bPfeNU^N zsAoaDAZ8QlmK*0R*&<8i2LI+)b8V%7nq8iFbWP^vR-2)=F@5Dtymqy`%*PGGgiV`z z=I8l&>D!JZPFP+_P-*&mam7d*qJ|qg_ugQ8@9J*9ZzZ(Au}+qxb(9NbbgY$>fT$3H z$cRo|jBsb1Motc!XH{S6yp)^yzPO>iS+ANJ;5fr0OU&yC9sm9F1LAqWy*0AeI`G$o zcfqUuM(7rp#~m<{Vioi3lyghJEWhRnn!|)y2O`#hRmVre2(6UN%Yn%}hBiOOroIJM zcyU)`T37hTC3NgPw%75=%7t2CrQF-(O-#H>&gfcbeHnWCc=u!FIu_3rZbdu8cE7)M z?o4s(Emz||MfUJ*VNc#6vpH+%_VETc<3H1kQ}C}Jy3JAVcIDetAAjeqknC5h=IX*e zef6-St^QnrFp$Ay<3~58J^U2+Ms!!@ymzgiAIa-{P~}kmq~JPHIU^$#Rd1F>$J57Z z^W?BkM`Bd7rp6agtZtG0ko)t???l<02{)*LD%E4e$`rnd_e#!~U7q4keE!&D)pT6i z=!YI}$TDJ4B4}6-?n7_%7Fn1ty29ThX2q7CDtNn|PT$jsqHE%y5TA2?%}S=!1Lo(c zcUUV%2UWP9n%zoiIPbqz(TBr*Qu_&tq)%7n1NO+o#(8tu`b5po)VzG1(yJ2Gl>c~OsWi-lqDB9KF4i1f zH8<&qPScpDmc6Z&$08I0!$+^0amohl$`?1c_%tYfIzTrsFPzJyEHtfHwCQ+j9u=0r zv}*>BfHqv;FNb-dODelQ@QXT4zB-cv9wT+Xvm5iLTLF>0`8!$a2G_iC(qJg2R+u+F zS0;XC5}ngQ>x+(hwaWat{4BSt;v&BZpm+PoZZ4QxTp+GP(2EIO`F<42jV${y-Czwi zkZXG83F@Cl+9VUsXzt|c{1%>DbGTn*k4;e3?ckzn2eeF;35SF+lO*zCRDupisF^$a2NKb z%egz`S=U*l=Y9}ye&JWS%P_v;Ypi-jOQDMFZK0U4u+?@N{$?)~Xu`ve1%(H8rB4JO z+|<$Q=&xs|P@k(Cm#bLW@^+c}W7&Z4QWoXEB#w&xA;{K7GNwb3-gLMoqRKUo>L&KjJb!ajcO*x35iuEx3{ z%VL;qITV&y^gieKsC99tKmx5asb@SWAX4}*Jv*aZcfVLZnr(X2rbF?y@gMZe5IuV` z&h{SEKP!Lm)u;5z-LIlfvrequ;ggtga|OjtnNIGB1NEA7hwM(^i?s$FNe3`jz)mpo zwj_f=S?Ik?(kd>du%X!od?|cxIv~3%<9vOJPwDlx%4gCM{a3{Gi;Cjse%=sYPWxt> zxcXf}uLI}4;0lu{Eu+mYVci|MHP+T+1)TC`HOE^M%^Sj9IQ<;H&x_mM`s4VPYY<|X z`6Riz>KfqzUHlr~K)boax232!Veff;N2+*2{4*w%sGw8!x{3#?+2?fai+gw?snFj| zwa}X;+);7S&e3uIhPXw~9dghjl%=SB=dYWP?!NoV@o4_O4^@q~`CB&xn^N0;6v(g9 zGxpsxY>>ne$!p&9f9N{zc&h*R|7$84)jK6KMP-+48HMbHV~J*|S2~R+7F2bOd!{4|rE?YC z0;xGNR;lW05t_d$5`FgX3^NYq@rXI+grV!)v=j$z5clNURO5TA)i{jRqW*X=^ z<1QH1#l(__KVFO9o1t#b;a|`8|uNNl5HRq z;X2DNnF7H%8oWzHs4+8!N0d0n%d29}p{oh`69DUK@*iJ@-ZO=6U0isNmj zbHH6s)RHJyJE3fQ6WGUJibXYJ6K@fdb^1~Vn<$=lQv#?e741YCnqsU?aB#ztag%OF z_n4MmcRNs0-1Pj&0WD_MZUVI$R_GgpgJK2FB9;cBIB9>F-CslY})_!>grVwiNOfWM&fQ(`{uKs2Qps zv`-ty2;CWmutpt&@k4QevFJ|dbIQpgYGR6;;=6*6yxj13t%aQ(Ll-Cx*VLrcW=9-S z@gApgCkyC;#dWkhM)!pZWo+~C*3kDO+y8ZbN*Jk1k$P!k z4frn8j~^S8f+qm6MJ%$qN6oS2=u4|Wv*sF9k6B`&=NMBwhi^)c z(1*;e78?V04c)y?y*+g!W%s52sOJ#7co-~|6Y2)vBcOA7g%Ulz{CsdUG`r@)hMQl8 zJ)xuw(XN?1UB}niUcpz`{Bv;<;q-tG%Iu;OGC%h=^CpsH$AQ`sbFY-kfPJ0k``H(w`?RCjimRzS`K zZ-}EXyWHCgq-YMcyeRaT-??=Uj?|1P?I7)qHo|X7dnTiMFf z&v$gpxilx|+t{dU#=vZJ+hfJ}pXEFPeHdgU2ck&(Lt@QpkXwo2)m2p);fP0G72qTu zp7Uk1P{?4{w65A%YUd^WSip~4{;@PTAMZFLagKt!{swurC*qsOz;?+bx+05P2pJFo z0vM5-H3W_9i3uZj>Rz0B2LOV`jVjv5nK`@;|_hOsgaRi zZCVDyC9zi0QDSP|*z)e4?y~Z{JazY`D(8!%5!&jwMDCaS6OuVSvXe=ur)i;kzv^#q zetm?Qfo6OO);R=MQeY3$K(IX>=&vpVSvaFxOa_74j(^w{omVW#?SE6Y&N3t=QwT76 z(!SH6JX460u6g2woT1ysA^>_iKrrx6ZS_h5)=Pvuoj9-r5Y*|8IvNWP8 zn`@VCnrn)+J=_|=)+b=NmO4OS)j%ac&`Ap@J72sz0I{{BWGNH^@hPeZa* zc{7qdI(~cF3*K@{8t7&2EXkR*H|=;k=Z+?93nuPh$(AKZ^p;v|<5ZFlzGE9#bQ{U% z{z3HPY)sz5O12gI)RSFtUXi^@m-t4@4As2He(Oi{8=1%Ve_-jp${bdfQG$M8^X6Pq z_W#~YW_@#LR$}28ll!y%Q?M^bsczl+1xlKq9W$~*0Db8Tpjg!) zMkS@ZJUIRN^E9w2xhy=%S48!Ngn{F+GCmD~30%Bc@*XScIlIiKqm*nS*5zos zXpF&$t2{J{v5YS4+4AqJ6u-TI&f3Mc_OK;oFWB0o*)^89puLiZCw#@GHZc7UEn{98 z=1zI}f*2dt^r42PtJ$gPiYw!ub(IE5!b*_3A?BU(ru*oPL=Xv`)%L9x@phP6@$o}6 zu1v&F9^Cs#YeQD^BzRtd=FcE5NjDb{9@Az(`CJ;y|NglpwqE=*WYr-~yJRd|P-{se z@ytvI16{z8!;^c8??gF7UZF(T29-JBl(PIoCIC1G1u%?eP?iu}L{a&oDAa=b*~bm5r!5`2CAwP zo(-jjl!Ez`HUL*)GC4Vfjhqc2&;C>6oMbeJU3j0PE*OJ?zeg39RY$VBGLp2UC|iJ} z#EPQJt=5hc!XSW?4I>RZDbiiOn(`#f@Z`7N_gWGyPI`4SLnwGd!jRtTPNXqPi z_8AdluPS*re|*1njoD)BOkzpNY6w&ztfK1arBl+3rz9_cU$oK}94h$+-q{M^UZeC9 z)8$h%LmYx$>FpeX_!dCVtS`xNLM_o&unNT>qZH?K>FEVGK4OYGl+~HX)(SCBE z!-mXK=G?4ca_m#``|%;{t(0y#A^-51rySn{bnh0z3jf}U13`1fXS9n_)4hs@c2zO z&sU;JX(PdV?#nu>E8bVGvF$R}m?en|p?J9PbC>IhU-1jFjjM9uL~iN%Q*t6y&)6x+ zTly5J*bY#*1-dq>aD9a0)pOiIc;j85kyQ#RD^@AUHIV!G>4RN6RRzI>05*4dV^;2c z96-H__Z#V?Siv5whuBj4bOYA47gQx6?UunZ`c@vWftR3H$%#%BnuDsnM%oCU=$W31`e#%oYgonco|{^#9~lEeGxWNQ^39;1oA z$E&k?hIVvTkC_^NUXoe7;GEq!(;L*Fu3DDc*Y~Yb>kLiteXu>o**RRcD0K^&PJ>>D z?ta`^$+O_43UE0A@tAK&a^_q9O>kgBd02A+@Ehcc$f}Kf@+$bPc);hT76z$?Ekq5SW3kEvzv1%(XBO0^WGcAa_0g)8gi!G zu~Sx;weo zi_6tc1h^Jv(H<$`x>D9AoOS4!Rib6E>14GgDDHKhd3gzn6lUx$?$mfg@fJ3X}zu0w`jy_kW-=e{)&-xsdR6-ot{D{qm@5u^#Sf^9d5=v z$JBIaz8&SLzEawRr)hw8jc+>|Y=a$M1+uIgy5n|j+h7q82=>g3ItTOvcPWtbCCMWhvHpb#t#l;{TS?e3FxEC977oDBq+KN*y zsJsg9)hl+8Bp+#if0V(s*RPGrJ~s{bSNqtdrPc=rreQXmp*~+S$_7DHYhz*x4seD| ztCcsDuA2kh+ z2TCuq*DUhN&I?iV2I?zQGl$+Q+T7k>HCl1se!S{8hHthfHv35w6U}CieR>jxBbp;- z1E4(oas6^=NUo-{{>dFMdY|tNO*=Ar`sGn|h3thCfk7B}N4fWucvTgM%%bX*-ee0Q zDrYaT2iJmX7!sP)T%MdOs;8qKl>jQ$cil@Hry*Rc>BNIQqyEr*ce8>;&a_-7!12w^;VD9lsg(mp5o;d5%HFlvkd&yUy?{2_~HEh5Wp!;Tj3Yl<8?s)Cy{>N7%5c*Mo^g(Yo7dh@C-iEx z%4_Yl{Xi#JLb>oNzNGKEc6=M}jQO5=o!3>A7yR=;lDY>TftDgP2yxznG@y77rd+mM zG)9An?Ys&yX3kK$j;Q$S8smlsuN5cd(4Nw{bww{gWe8;ZVr~wANZM+?pU_9E4_D@% zw`d$^9xSR47|XZtT7oR1D@BTm585~&C$eQ|nuxiZOvr$JfBPhqvyZDQQozS2rE!ZwAY|bipL2DgiNb zhQ?O_1kT@ZArnU-o8C!3Em`9TpE*&kYuHn>7Cak3n5o&_9&a?Pzna5cE^9uJtl#GA znHKU)boqNvpf7X`RP%|K0}5?u4JWR8tGmXlFh{tka>K^X~YCIr@q{ zlXynLP!iri;haEpXBNOuQ1t96gjEVY9cWlXfFee~_Yi7tkg$kZc(08D8ooAcbylbU z4^8t^=omP6o&)Lacsb8;GmT+hZ=B+hvZ5uB&y0mlZ}ht*MoF|veHfZ<#5)GA`l2tM zDJVb*JG{eb{{%s^s}t`yQ7aouW?K6CfnX1MY){g~2~>qywAaogZh00B=ovj`6LH{i zRzv;JPzAaX@Xn;|p_IAGCQsj9qhLT6jZ7Gx=q7(pPAV?& z<>iBiT3{P(822!%@Na1_Hz@I1j0LsSZV=zHr)iyq5hF=@p3HTQ^V4XmxZx5!$Ljr z7pb=we(YPWBeb-}E&jpV&$&)&8kO{0Z!cFkXJ^hN?xK7uWqAsel_i}VK4&5h%14p; z@c15alta65sL#*avW%MDgYP(v>>H8xk?Y-PtC+5mK}8HEy9QnDCU>Dc>S7bI+UJWj zp`rChR}9KINW=-MEiE-eJV>;H*s%^*S{^ehey4SJFrZm71!c2G!uzp3@s!vlHo5FE z2W`Ff@y389Xb7#$VTw?H5Lr$?lon7JFDF{HU24tsh%E>t;;g*9RJ;Ve7veqZZ*+OH z_D3aVg)S**LL&Zdo#p%b03GbZr=?|b8pHcZzLk{+8(4?EO@*RYgL)eQi;OFuyo#}^ zk1`E!CWC-|T3qF-H-4LFJyJp1B4X8|em-8UsxR!}%`h;ml0nbbgJ>_btNABX`sW9W zYs#eCbSU@QS_-yDE0d<=!+_p_c6}cr1M8L~CJ0ekAxG~fU zsM-l21MjK{4?K_AK-S<|!`3Gf#d-q^-Kgpj18{q8(a`|#f5LVu(o7e_7kl9vu{Z*D z+y*}7?$1zeqfGG1TqnJ`e!h&|Or7(yP{PK6b4iXvIih!i`+W(&z_5I9=XM7~Kw_OE ziLlSGDC-k6=7pyYlr=V+mG;T_GPY6Q|I3&*4rUMvtKq2R>hrd$QqQ;@wC z>`M#XM*iY5GyC$b2?AhR^!n+C`>N{2`jET3QL7M0lFt?y# zy3c~VC1vSS-Cx7eMQZY&YI@hcJ$q81c0r~Oy3AA;-Lv|!b+*~^&OiUammc<#Ii<`X z!D&G?iYIe0>r1>SIfY{20l5+$=aL%uR(((iL<7v82>JaB^@D z>WckpiEMareASP@8NOz*>|6&DnFxq5E$mVOZU!ba``TlCe{G;sJ1DDyynR-nnP3)4 zB6W$81*g0ww$YBOC{yiq6~#h1c_3U#= zR#eSxnWi5F#)Jc`DXl|L>N%>DV*iOV$N;Ig`kf(l9FRf+IV%&f+3JFIiXiJRp=?TS z>7VoGcPM26A8evt8;(%qBB0p_*0@OV2B==WF)5HKa-)H(R&ENSMgAc-vTrrGE0JQ< zQndKRSN(~j_UqSyiAV8<%r26MDb|~TY}s!gz{#Wy%LV7s=_On>S)#nWI8>{yZm5&P z&l07fRo!eQkTth9T{k9}%V&YMWzAxU)f%GK)W=!(Sx@S`AL|y^VNTo0VNb(lcPPug z*R2|^pXpIsy<+QUYpcQ_=OUAk&?(Zj|1}_4d(BVe{1=`)6a0+u{+HeLA)(+w|0vKy z>59Fuu^P9kU|*@&4-S#%xvGfBAp5+T3B0^Ep$!VsK1$BroO&N;8-J69W$Gexz4ZOO zzG`IV!}l^uve%3)?-hShoHQz$z=HLCm4^tSUSK(&TWrHea zJ)oy^9{K$F0sTzXg#{5@f7j_%l^m~U=dK(gL@eeh_MgMHa8&ciz z$4bP^@XAGMYECfvf`o7M;de>40>6WjeoC>wm#%ED@6T2pVl+0%LFi^lA)@pC06eqGw_laF)*A5R(0cq|S=oOCwIU2|Ej zW{|Ws)+NcSc@pOW*evqo4lcV__?XkL6-(422sNLA32+?YI%f;-T+r_f(W7=o3GvQf zPZcSmCdTq_R73Z23R9ZGC#-n~h2aFee%@5Z4 z2_%)TNl|_ngQD-_|3GN4ZDFc+?A7GfH0VMfS^sMV1u|e%&#yi}BM_qARTmLM>#5&6B zSHpR$RY&C|KdZ#ll8&Fkp!5x7BmzRjsIW%3o)Zv*d6)M!LdsQo->a-EVD|J$N)vSJQIK3@SzNW0vF5ol|+<`cJg(@7nhlXx5F&Pk*ElRb+H~ zb#t1@u=ci-1!?2p5}9`tRf=J}?OM2XV;PRg^3m;=mdZ?5``8a*dlD2DMPAWGt(Nyy zQx*!4lGlK(4*vY5qPb(AFi$O8^yw@uq*ObU+N|$HbSLk2z+Dt}UxsYyA+Z}AZC7K) zk$sc5Jor*2h~%MML5;yKb~Qr*P7vU)w3LQy;F}4>cm+)M=7$fptH&-2oSCdb4mtF$ zcFIQhTUcBT)njz1tl9NxXz<2qV|I4>^B%AF#3t;{T&c}K0|E4~!#h8ZI%J<@RIx9V7&n zr1!IfdL6idK%L?CQ_Yef1@4mfB0T$!omsPsi|Y^w&6B{A-fl;I!L?l*S0_Dv)0ajF zjuC3CGVY#gx$O8mZY$RaS|uM_%ws5$^t!_kntyJRtELf>qgrpou;ND43$EMAh#JmI z$~?*4yIwrx*w#F<5*|+?ugwGo>Eno>_5PPFJ!V@@=il^WY<%t+V^)`tix=tMBc5Z11Z1oT zoQBG9MBh#Nw7T@^IyzSp8-$QruWeG>V8&uaZwT~}z1ATxm5xV_gC4T?aL3QCNuKug z9~qtYTK+Xcf5HeYXPZ=0tQY8;%sXy3*e_$mxE5)2FT+Cm4J)rldtthSmUQpDq0Y|{ zOTQUJfg=ivaal78E7QOHlip|7)i}((LCQH#*x*Xh-L39SuJy4*=6oy6Wtq(6xJ>9| zp-bJuh+B?xX#J`(Za~bAy4qDgXRA_orYd9BtWho5M~#K{=jfVGVRLFebD+7*dQs_t zg_@$WGfteIE*LKhXC_nAN%ikoDKSfpaFoc1EErda$LHRq?njq?yBkh^^az=%hX^L| zc}C96!sI+|H|1PnPTpt#-n&z_wJ}?yHnEf2kD?`7`mMU96!pPl_1T_IA*vGVYW(h$av zM&vick1EWppNPXjh|n1W{9B*=AwAfY&r%mmwwJ8P%%>Db%>sR^<2$KFGg4aOn+KHHvFPenlaE8Yn>Xfm}VMI&*c~GKsh( zse3Cc`f;(smPSReWR@jFabLC7%}!$MmKNg5GI0cGk5;&{i0^L?as+*c6_vTCRr)0= z=P>HVYN$q5-VwOKCXfPamJ0-hIHVd@fB(qn4Ym>TCD}B@Ipx?YJbjn9*CKP>=F3^XbxAxh7pBeWTuxtn1w>8Ssp(HYDRQGrD1$p znh=l)0}|=KPUc7cYBBDu+C9s^4)0qn&Ch4K$j=|=R?oBP=+$(IRmvxpL_pU!%~r`P zbYeXBl(Ps6|BziLgvmZ@7?2Vdwc}?l^5_NmQ79K#f{pxJITUAXDXE#J(LL6H-Is3O z|H(oQUjTX9_NglfI#v?`%Zk9Bg^1tG!?8CzOMwy&7|CLGp6bWcALwS#O${6E>Dk$n zk`qZODq;yA>GIhv%Q9QhUZd5nB#=|x^w{=y=U3VE%%Oo?Hec6qx3Mr+de`yCW6@Kr z)a+A+Mo%^)bOSu&>-`FJ@WpCASiJ&v3*M2YH{@WWqz2zox}{G)U`(bDo)f7$a|?gM@923*Vrt zc)gDcN6kLor${}_xFc5kpevI-Fpu2Nktsq8d$l#9zZy1M7#f$)JwG`c z5KvQyyDLL_O5U!7!^XmkvKWu8<*pzf3Zzz9{(#M>bI1^z9#{_M?a8E9-&-$2qv0*~ z-b00WU*~~Zt$T?hxmIEh?NV(@hNCTO!F_3cA5n2WY8IA;0sK-TijQlIx*y}ZK0Yk; z(sFtDwJ+)3yZzkJ8Vz%EViQl52@ArgsBqUAnK}lbxo>#+57KDcfXKpdd@AV-&=}J* zFqlEI7#%Db#^o~PsgvD)2a#w4p>tn4T0=R4 z$DiIn8xt9wq#KW^Tyq*|a$AL=0hu_`%n91R!f&x-HS_c0c zM$f!J{?cSQoJWwmosjKElP#xGkC$DMcbD7!rje4gf~>iF@R=;SOCpeNeLu#a;y1g( zU=_)-#BfY(x23*QA3v`EdSvRi6>J@`++c5(%C&;I?2p+IBA;lniTcH%9NNZo;#Wh- zM$Q*QjMWo&U4xV;D2C9ZreI~`*t;^ z7rrm$=Vp{AQDq9Jwo*aL25w>o)Y5jsvQ?K<6#8-6EMnha(qA|Wupws9CEGDU^2=Xz zEZH2ex2Q0>!O72@_uec~dxpgxlp-4Ml3!M^%Vcl7zgQ6XD-n}woeldBQqWU0x@w(IDgP?=~}w-17e>Y&)KI3l8>uw1XOcvG&W;~P2Uys;7==e-_+ z26KHjX-Ub&g@|y9?u6ZjF7|}h2qAXBT3e+yQ!hDkZ32Bxqu4pz((+S5_l=AB!aNGn zerh>6zxIdUQwM6RY9&O;h-niER+%=@@tOaKsh-Kk-5~2#pPu1nb4aWalP&aBf3V;d zsoT`%NS-}*?_4zgVS`S^rMQP1;`S-&SUfXd#gfU#+)B|6>J>UiyrX{Q(xoQ)lSU&6 zPz$zOzH2RVpUET?+85MV4q3O3368bUN3O0e{2rK!Vk7z7Wmm;;t4ggTLbI**qeV&b zONj8W0_OU?P-C0;TfxQW&u7bYujYtn7pA2MZ4Wi~!J=mh^w-l|3p(o1zp(B;$T!N4 z^QLIt3Lef(qml|KL4T0?2`SrK^DBf91U1O(a1t)<29%^KW8h)>z zBMyn5b?6q*WD3}n+3!yw$s6L(Nf`WcIDy8~-24Y;%fr@}zkX3a=k0q6XO9{)%WVoo zkCa#lIbvu1Y?BPtH14W41nU-Eag)08*?hrjgRdp(Yb84^;}+rBUUqhrrKwsN0nLd4On z>?f}=i)Gct*$N!=K9Z?44107Y(mys@dISVz0ho+Wy>^P zxQ#Hn153MX=)TA?mb!5kDW_NQOG_<{-NX=A7*-hSSqm~ zE>&jC{w{RavOi|U@8m3S&|qE0dSE5VBznwyv}MXld$L$d!IP_M7EcRQ7dG!T6&HW6 zG233O52%7Q4Ck`#S<&}@+kt&LOsUO~XLu(60w}i*gH(M={oITCxL04ruS{z7G4NPH zMF;L_Cp$!5v33qSZ|1zeyG~-Sp>2c3%ul%!Wv)x4kQ_gLB%QfCdTEn^jkElxN{r^m zi)&lqQys=udqTf2hgJHi7yN{s&(l@txsc4vS2INp33kHFwL!yKf;;9mt;UP|V@mgz z2w3U|`-R5lpS(LRQ!B?7VNv5X>&~*&MF>gvuA=$a(C(VJ3*>jgOKDh!9^A%E<9Dae z3Zo#CATMpd?U~mS(B@Oq%exTmi+w}+Bj`aKFCXz1i@X>SQ zgquZ;v!OC44Z=zEbQu{wN=kcaZ~Xmn_;KSqBbPWhFium!4jq^?Ye28!7MBUs4AJar z$uP&oi+QMnA4ofWnk1H0X5EvSMI4j$TQ8))#%9n2&%a6bDkxNg)bx{U113NZwYkt; zo=4Cz=}1eA>$uD3t}sZIn*KoOARdOu>3p`==1XN|ed0ua!?@x`5vrct#z0!i8O3nq zV%HZ}q*lhE<+(ZejnibyHn&oiog$%HyDE0p%fAO=o-chXU?WKyJj(PQHKYzT5=CpX zNx^xFjmW>++u82^#F+JUiX|gT<{y!z?CeNmL12H99tN(=US?bx+H{|d`JrV^zg%Ui zwF<#64%3F57xA?%7^-jYzIDL)iBo}@qc32Fz~VEQ;-4e~%?e0OFm}IXx2U#4z!I?n ze&F~_U@Lq&MT_jYvO@Au@VXz+(j5IdqL8Ig+Wb~&nWLs@=bVE-G5~%~@wO&;J3C{^ zQ2{yCkt@or11th|$|W|Z@ctBDDsH$=WB04cjsamz{K)#6*4TXwg z^Rh*Vu!!pA7=+|hG5tw)zv&jQfeT`0>r@}cR$-+V|GBoed&zNhUy*KkBUZGV2Mos4 z);WpaWV=4{9R_HLqS79kIxmlqhFD5jV#r`i`X!|oK4s z>hWFPXJgtFiy#N*tG`xhJmMr>R~H%m?n9~|P$Xz!c*W3Eh1x+)8wF|ufr5@Udthv4 z3k%2KmgE5fTn1hR9#4ogLc2D4Y2Mid0?Z_uNNpJ_zoIP0zcbhp_Ur)a`l&?!2NLyT3IFtMk8ZA_t{_6~8 zVuJK6Yi9PTL;B;X36s)$vT0-=vza1WA7m_kdid$HO%G&yuI2Py^C?ta#m=NNi69yn zEj^8VnBcmC)WNR)OwMTv98DiFSP_Cb){cE?!|{$^#23LVgP#0A;ztZ%?zL_;KpoA0 zlavw7Qjd;Xq^E0P?{UM!Cej~&F3m8zf4{A?55;^RxwMV&;}P7Aj%|Vlw)I};R;`)E z$e}Pl;8;>1b}!U}t_(tKsaFV8f=>j$JXSQyQS9V)s&#(e`u;Zu)rtN2V%J#N_k^@7t_Nid|rHv(0>_i}e*9Z-Lg z$jYOQP@>Ndn*w}ds9m))lSQyFUUam^IEVrUEGFr$)?892S?Z+!tWExkZtA7ip zIm~zzQq=rV@c8itq=#I1%=cGP;hc?9BO{w;&Q9<*F0Q{=9kg5p;8_I-(lv}E$JLl1 z`nH^1JUUU9@QuTEspdP=-+6`de z3IY_0S3&=_%<%>aMg;KXR)IR&*o1u=7_;@=k1Jl*7r*+#lD^wQ-6U{nW$6|xCVI!c zw$&I&n#Z z>&?QmuAX@5of?Ep4vSmu1BI{=!RBmRxbJ#^Zi8}tWm0h; zMmOirRFA*>iLiUA+m&+qd)D#Ld?+Y~3G{^;{6Le+@yBp4u zMMD*(^YP=*jGtFo2De1G{M4zXF(Zkwlimi6g2av}8KzCMrb!N}Phyd`v;(^-87d4u zjD0pq(jq<>8R5RyvBCg?m<1k86(j)WmzS3^GR7BcrCjF&EgKvAp!XKUk>7IUULfj; zqG$3UD?y%%lj@XWeu+gxHAZ|Or20$v{@>3oft`-Qf@)c*Gmyu1Bv8fpCKl zR#t(B=Z9@sB{84oE`tJIHq`5oMM)K9KzoEXT_N*~EodpKS1z;yuiGYS+(UNP3G#M! zRz0Uyv;t^DTo-01lq(S`y0?U<33HJ!x6;&1sk|KCu@A*J)#H4VE(vGrMEVerAP ziyw0vS{B!KStFUs3Wzj^ep7nnS*qidX+|Z*^%oW8v5=oXUMLf2Ia*@jwdg4jW39+1 zxFI3QIuQ{mqvqxpLPPVI=h_c7N!Gept~{>~dU_pU)gF|mr-B?ALxe;L@Q$3Ia!3Vb zrX{Mq0G0=|f1JKNI|7tB+3Ojmm zusg)=%^TyQ^oI%c-XC*cytv0%{Ffl-U~#w%`Ctq#hYf<2YfQr#!RWh zSVLV*}Tfi*AWqFch|&Rsx=lDrUjA(D%g1cl)Y0;MpJ;;n5$IBu>jFJ zLaA>~1Gv!uz(|ezU_xkfOYF@-AN&*#AmYZVTqcBlu(K95)1GfgfHJlWX8afcN+}rl zKlP|H2RY*gpvL->mVJKjpC=NHOLV8%*)cY6G!L^jR_nQ}rcJH`1St`oKjcq&#WqH!p8QqU_i5 zmiVqG6~)FHQs0ODh);o5=;8kw~tRG=UX%!$PnY>iWAz;XT`<+!l2eK=45}$$vyR2@!k1iJ%n1BLNZu6qW;eF|R7XOi>0HAl5GEB>Z6k zNj)ns$A!#6)zzBcwVy8<8L1e7Pr0gT%BUMTo9CYu=%y(vRnQd5=GH*$74RdhIezISy}hOf=3hgVt`7Z+EW z2^(j8E}y$5Cuao+`da}_8G@h|9Hj5RGKB2^dElyYlliy=-)c`UEI>A1enI-vi_d)T zGf*sF^B)P9FLUctHS&UE!m5C$y);>|B&wvu-TQ+l&U!rVgM^+%{Q*XPS?K!L(ZkZ) zf!l|+cNwJXdPw_ijL4Dqw4vZ2ZL-=mnX3S($5WJ6^s1&cJmFl5` zyvpvTT2$o*-hCO%HS7&AsG(9m`=&R04jUeTngghgU!4T`@P(k%_dXD{liJAvOwIG> z&x<&t_%&1SN-8pj3H^O)c?&nZ;Q9=qdUpe={_^GaA}*oJNz=f^g#$c25G{t<*_!i& zUo(ycgi3w2p7DW;hg@@c>cdt%oJBw@4+G@6-yxTVK~ixRB~ziUt_}!dB#XHYQ8ah} zZO_7}$WT3jALI-v+_&&FnWOvjLb1T15wOStk&U5L8TZ24r*&N2E3AOZei~q-(rar0 zH!=*YgU+LcEkJiZ{`~oqFKhn@aOp&@qUAd~&VB(N9!Q%33cnNyJYWQ?!Y5bI2*h=* zPx+Ml0%bSUF^0S8K-HF8G5)aWpLxvVj2y@$hAKa-*<=G=z|aVsfH0RXEIh#sElPn| z$dfD}Gh+9rM4HOsC}kGzgM=~)M0n&hc@*U65H+qTM~zVU*irF^X7ibczG+=k$3Qbj z*An0RwisZ)(|AO2F5KKHTyNS@%l~7>Qwsl@aSsW~Y6LWhvcbB+=357TcLZug>pFj^ zyr(V!A;@f7pN9@;V0~iwLB!`?U7a7m7Ce7sLs13SoXwp{Eof|9aW(o2NqYAE9O!!k zfQ*m@`=>TReETJ$L8az;=yP5aj!y?}io$|2Y5*Ef0|y{7-q)hA#5yMnm`sW>Tc`AT=REI4J+ zq^RY2dU|7=97eb*rt-h)c}plpOoHOyLN3s1VC z3S@(eNklAc6(rW2HjSK%5SOPk#xs?peB_+l37#@@HS7oh8#9?x@k|ZINPa% zrUD}2Qn$zq#Yeh)Ss%DD_W)ASls~BQFD~2Ldxx^#0z8}X$B!R-kVpm}o^!9=51)^! z^|fOFaJA@Y*UgM}t7xx5z{@kxt_LPOdKB>D@5`SJJAAkxO{P~m*v9QBO8&pjUt!^- z?jJlu%}^}I@(PyC|85{7TMF=dD&mr*EqHkH_;@22*8jK$R3dMwuABw(VOs_e2s7r7 z(6jv6o}S=S;NAv1F&e{JVA96N`}Bc^k505UQLwmv_@Y$LPTiwA`$`9(*AAig&z~Q) zv$cIu&wBW;It*#ZYl+UPpnqm=7h) zj?Ol@JEEKY7Jsc7dwbghMKm373vjpG!*%zM^AL6RaEnB-q6TTQP;dlGK?DLmwErKG zAAsEVNf4z~kOOe8Q-!p0VA%j2k~p?X=fgvJ&S%$N0JFwNXa?3moy(t*1NN!jd#{SJ z8gW{GrfoZq(fiw+tkx9!uX9n|;}TtvtQCvl`H#O(9#b2>@0~1;&H`RK*uomc0GESKgADYi^Lm94<`gfEQanZRayX06Q(F7k=EAn2 zO01ZBvsHV0uYqZup^-D7-E{!(z!4Bmgp%R*7 zodml~z=(8dKz;9`6EgO27@Ydp)58XkltAs=;#z~uom(>^P+s1B6UyG1O`62nU0jtq zW(EX*kOt-lA8F29;uUb!CtUnXJEelZ0)vPXXy%MM!TnO(AH{I(1vT&TAI`pK@18XH zIc)z71j2s0VrG=p-w`}9c#B@ohMI0SEO^kg-pA@dait(4LX-FAhn%->`4&AE)M5eJ zHbds`7LCe*^WV|^%4Diq2Gpmw^9os&E-!0=4RDap#$6yPb?R~$_h<7bznmB^eA7!5 z9Hs~uylhhjB)C4)-uBiGfxD%m}~->|XH>`(wo9 z|5=+MH1DqYYc4LX8h{37N6LbYDG1Bxnw!IN^2qlK3hYN3+`#SN5K)s*>e8!!)vcp$ z{>R>^cx~4&1RW00xx#W!pe_hgGc_ISazhs1K_43FZ7{f}Kmv)sYBOY8vqFuVJFO^t z!3)eapepTB=qD#;mh0)ckd^umB?f~XEL$W31<64kwZyD2_7u&y4I3lyWq&l=PG zz4=@u)s;U#Cc*zt!?}1!)&$njoFV76ww?gPe@mklw8h94fJTt7HLy z*x;N;N>?@r(BR{&ph@5OJF6i?mWD_ zS|A?B%U44QJqQbf>ZKSyRY3n_^F{au=5B4}*0gi}F+NoCU#|dU>pZ}|k_UlJpeP;y zf)!;OOM!p-01{hRy$I#@%bzud^XBJ;j)ovIGou%`4Z9xq>UeHB0-;k~V9~qXkcCy2 z|91#0j{ZA@Z9#ipnZFD8Xh7gT5oEJ4a&VLa`mj-6YCvIvkX6Z%V<%tKACgQZ=Kt5I}A_&Ke$C!I&kF7U|)g zf=X}rF4n4;NRZRh9aIKT7lF1{v2BC;VrDlaS^Z-*0Pm>re|kr>j=n(#=++IJEv5gt z6;*mDi0!4{D>8xu#^IStfQJRz+U7t*<^2K{mvI|x8!3AZ%YYSbkf*H;AbNwB?LTDz z_t@B?-rEr;)nr);gibTSqgCx!Q+oWn%AzlJ;AbEHrhsea+OEc45PN)FPzLDHN*MVO zBO{bQI!`Ce`u(tfqwt+lBfTL9$&_nA?}KAp3-m4Jf&YzU-M#wv8JC}eS?w@D(S#MB zeR?h52RJ++{GR6kbh9Y(9N;ALFggW*>lA`_rT2;+2m>`4t{zOGNx6VYtvU<}t47x; zuKSmmZ%f-Mv0y$r|LBl51Qwui_K_(+J$MKp<_A;LU=;wNx?*i5rE>6222mjl0NgiiCjh|@P~c+$oUJ?%RT=+If4EVUIy)><1tsuoUCGdJCv=8w2}oTQ;8DXz}k{E|PwG>)-x#z5H{oW9#^{W32Sy zK&^s>_2@VVN6E-gL|-9I0aZXsp%$`DP0N%{5er!Ci3hvQ&s=~{L~bYn46+A!>42SO zNJfhjWlOs`*X9F)JS#vgBmr!~WH0{jY9Qt>L%HcIuK4&V1C!q&e+&=9f-ly=+rL3Z z85n7&wD~{JHNbo5lLOB_eU#s{ToF7BW-v=R1;n=~-X=w7U=|#k%TsMmI+u-Y53KkyA~lUet_s{|(3V8{TP8n~ib zu@`_BD&^N~qN~dTCYv48U}64v_VeeaJzceWz-Upn-6;0(zlMX#fg7a4RVGn@da*!> zFg;+&Slg=NvF4lIAHEGfKvSf!xLAWCbpGhkBS_`p8=vxX!_&LY21Nt19*Tx)KgbQF zI2xMN%D;Tc(X|NS?B)x(7>Nl4v z*!7k81><&r!&&1X2L|mh;F9COyaQu3l0sGF>UOIco7zt20%|0H?=HMKzUWz%f0EKJ zr%nKSkZr4(9*zJTtllL)eFK9C;7J=|XA}7(_e9#idNcSGP`(L1&wS;n&dL`71%eo^ z_Kpsew49)zR;+=!`E({RXtRYHNMqG+_3$}sBD=c>0XGk0VGo$t^&nCWskEY8z(Jd^ zEAXzjN`qZHBjQb>daNutC{Cl|zFyq?{63K6bEEiKzki1a)2i!C)l3KjKe5K8IxPIc z;j@AZ9KQBdEd@?Q$H9glV%R-+JNb(Z7+#c9hDRX#3V=+dczJ_|NW?Q$ZZo*8JvHaP z?Qk+z_qAyI(t4kUstH2b)SdW5@ut0(l!;CG+>fHV^33QT4h|tzF!YQ3f*lLUt$;v# zzhTs%&#ea3yw?yYicwVGJ;2iCOeVbqYg;~OuEI=^sn|WYESL=*aJHXR4TB@W-egjD@RfR2vMu0YC4A0Gv1 z$0a@x9<(eKw>^Ae2~Uv7Tj{?oqo$tBbc)KzaNQUzG$CKC99NiCb6KO zac#HwI=1t1FHnb`DWAYFT>iIPq^|#6J73Y#hbw@^->G<_7Qo1Hd2&S68=pL$1GaQ& zZLNgr(7$$*f>HqSgvo;WusU-=up24N&sWjPyEC##d7)Q2e`BGzwY^%$oM ziMgjDMP0k5N?xwR0*e{#=B9nit9K8WQlp8~ISW>k`kVU#x&+iAZ{m6*(Y$3Lj<<^0 zh}=UTXD(?>clKA~lj&^=%k0-Q!eT*E z-@YA;Ot!i!BU4NMz#f&Nrqy1m0jdPKSLM2(H_bd`{*`X@xB*5PXXhN?=Wj#ybh!jo zxZ1SZ^v16n*}`D(2S3{WHc?Ifa>3Snp%fH0L5#C@0kDdFwY!* z6+A5VI~HRxu^=LLuuHSIk-Qu2r)|s=)(mn03xs#xXGQw;*6r?ul9Ct%7LOhUqVLF! znECZ)xdfX#`@+(8@Wsi4-4gD78gMZ|tVKpAP9dCSV-JjsjWt|mKP-Io^y!;i4G9Rx zpFGLbP0~$$t3CTte4@Yq`?*yd`Y$Za-DRB!Y7o6IJM3({=umEd{}9R<))Tte(a}X$ zzh&*lS9PuL?rU$1)6AS>xZ`p(=HGSO^4~j3XlI&(2iOrQL&G)3ODLj&+mKF+tBHXb zR<+A!qlQLCu0`;2O6L9JTVA;*E?y)HcM_SPNZL93hsQ{J6V8#A?Y|{|Xbymnlx|2f z&#~!s=As`(DvMK^_s-5YZ<)egzMMuGwiJ0c<`%7eYI^>IUGob^xvGO%-tHMR6a({r zv5=iBtq>>!2L&h`jsr z=T5rMKOT0NzjcN;By~lM0QvBwC@M~16q*?Q;K0n_!D0K`bey`!UL1>V+@4ye#+i_& z5`H#J$8+qY!|zleUgL-RC9kFY{i>Tw*>23s!)aykrW_=ol}X zw36}D^EwV3?BQu=&d4dt%9_7 ze=RldZz+f8Z{0LB+DI!P#6G6LSxl?`N?7czn?>@t9`J;AEPWkx>>MIH)jbPvVo(TFK|nvtbsMkG>7Nb+a?< z-W#z4?Yhx&Z+wv!5nKC`o@sWi$r}l--V|==7s9=e(%}D}1nDbR!*M{Ql(Ku- z(}7d_WcpohzCM3bl)kH5ar@^wHC*gk#sG$fYvi5qQPKKuyFq_{H^*gziQx$Eht_w0 z441IS>G*|1PeSfk^iVGIEIqk$y9})q9iF9rGGEk z;rIjEV*ec4?%1|pFOA$LjXb8ZnxE!f0#{4k=lmDx_a#{}c$|S#TRhdza!wY{6AqMD zo0aEMU0dr^9wlrQ3sR#hsJuLd7-R7(XKc5xHX9cE-1a2iTVm-?!E8k#0zAP2*kZ|MXcE zIUSE5E7#GC4V|&FI&<4-?yc~dMtN@Yf2R4MnxEM|__prJ&`6IOi?4B1Rx<`qC-}0) zY0Yp*WATRHcXKH!)}WJ|6!pSJ8vqV!L|LkA70JdSb7N<>;#&Ni3xw!oW1@|`SUY19${xHq?`vp(;rZ)#M%IfDJ5Q_GeRj6q<0LTESFmc?Psv?E zLTB-CC8Z*YW&`$9ii!a&a3=s%b|T!M`M{QO`xboApdKvorShiM?vp?Bc4_#3vjsmF z|2fNUyKr*wL7zX|qZW+B4rZ($1;BgA>NNIk6IJ$cxEj1%x$>@}&4Z~N9Q&94RO5W! z^=a(zS({&LOGhmYlTeu;Nv2W3wwkKKX&J z{+ExqmP-MVP0tOpW;in~NrI)TSL=oNKXv^2LHj99^+9z^-FDyz?6^HK0wHg?#CWn3 z`w9wZ;~Wgk9R*-mO-4fOOn<9-;+#B$=fgh=%?3>T)Qh}LaUE{|4^4#-=FOYBmK%qRkH4aH$X6;WcJgFjos6y zDewdLP_mVQhI{+Gho%?+{w|=HhCdIy3uCA7a&ci}SFf+(I6?#w5NE6jX|n6q9mL?} z$xE>!flgFbf3D*onMn?Y2=~l-S67Rva@f{YHjjQUoxg2Lu+tpGp*cIRPf~FxoyJ9)e)!Qe|FOANoaCC~j9PM*TuS$mrpm}D zNmKWkywtF9@IxUVR5Cj@DEJwir1$5a*ZehTmn~^Mtg~^Sq|{|}xB<^h(GXGa(Vx0Tn&w+8hq5r=j6DjX=I|y;Jdc^h!-!+*R82Kjx)v@6sr{R^qKs^L!vIWMSA?+Fwu;Bnu5U z?n7cT9@&{?W`-$OoolM%YHblygqpg48JXV-g8j43=@>W#RmC8&lgsQWv2!F=SGD0S zlT^9>vEAt2ynA1&2SN=HVEy(Q0zWqt^)&DD($|Qy3-2_ua9Mxcgg*aJ$JXL8^Yyp0 zv*k`!Y!ek~|Jyvj5s$U@Mgl+j1v7F@Sqx01*PEtN3qLs z6wMxSO`X=P&sQDqrhHh%hcaIHck}|ZG7wD2a-7O%TjhZ)-TU(w-|?)zu<-+M<5|uR z%YIHUZ$4^u*YI>oyMS&zmAZ$#A8QQUsSyeetJ4P*U>WXx?R~t5P1l6aMgEWZ5pM)d zK`~Ion3Y8fZvH-UW=+nGPXq5}HUEVjawYhjQ+~eRfJop}bmVAMtw>NpR({WB_w0O& zb2{W7IDI~{kBqLp)4nv}m84*Gi>5$wjkBt8=6(A36&@YOkB>PN#N3;;^ONE{30WtY z>*|u4(pogDk3Fnu6_#RgQyzG(y~S}0V}Q*&M~4S>rY3eSePu0rwOfR8gan%&lNb8UsSbcr2QB~mBd_>5_8`S&fhG;}#yj>IZQ-m(yza(q3;e9v}3|GXJ zv-9&MIvze;SMAp^u9SLt(%D zZN<~A%&2ni<@T*%*b!9z?EClXMWPNFbc6IAK86{JXVc|1%|jf;ykwKkPzP(2AE1vM z5;f9*xiOHS!5^JgP&n_;WLet$OLd2d^2Lw3(O&6MD+{<|1dgsR&Tq^wq{W$p%)|V` zOH7I{WH9-k3b(1fL zDlOS0m0#MhwAme+jbCe-_u=L=CEtWgwAV0p^|MGI#f#rv&^J<{>o={NJKiNCDf#9& z_36_yxNvD+S~pvxW(9rKDyEoC-PM{2_NsPcN7I`cS*4jy-e*OHic3^vB&#&FDKHLE zUFjqj{|UP>y6D(X_QCE`%Zy&Co<3cK9QBr_wVN9|#6MSF3U;1xnz^V#D-!QCQ}GQV zL3Bj-qOMd_?D1ip8f`O&4w}7)1H|>ki%u8(0`1Mkv=pUpu3soK6=dWwWI_YlI1S-WnJ)b1PA`M}{nFIo9tSBX4SZQ4N5gYdbk`Q{ zo9sK-;xgL0A5NAE)RCkMLei|J`4@v;+?pI=eiQu^_IG;6&3W!@_@pBh;_x^7l_ZsbMtX(#nhuad3l&jZ}?B9`4SMzxvo=$}2OIALM zftdpd(+`}#Cl`pCl?g4Y6Se{z>oF3s(!F2m(>(Z~cBR=!d_ncOox6Ac63?r={5fKi zt@?!X@w2ppFO^!jGMYOP{ptGsJep<*+|^f{qvUFF zL9BeS(TRJF8Wmr)&u(Tet5E>LMHEkHd<13T z`5%Vh);p%bZQ7B4VC$6V92hwbmPOwUHZrWA|An95z5WxaL!%R-{pac_pWo>$ zxBlm^v#(&W{T)2dzn*eK{Qnc*ncX;DbqPTjoX9XobI#v_>w2P}fL4o=y#L<`vfpxv z-#_Me{kMK;{$+E29$}fo_y2tNNO0t-4HQbqDbext;%|RlWc%lzDEwMO*1cWbpD9A? z@iL)MOrerKQx~rxp*|<*n)^{MWlU6g{rWXjZ@~eZ*ijM2NF=MnJ&l8o zSKX%G<@MEO`<8|~?eWY%gpxPMZCBU4T>3XD@aUV7mRil7kl9Y-+hE9Nz$Zktuxk=f z7OZA2GCgW+=3CA$_&b#+IcnY4~FIG4NH$ z^l)@Y-0&LOo~5QoztVWrsULScK6U?-nVW?FhfhQPnI{|W`YiXY)39_q-jlp((xvjg zTXA>%fmdNR4J#Y&c(Rf^0$h^12a_Fzhsxt_1sx(WM70mkS z$DwZI^>u{d{wF>D%h?4fn+7MBS*gEvUJ(2@WVucs&62_z4S$ZCUX-hNdF)`vKgZ1G z+P>|Vj*K6m^Bv!w-bCZE?^$}cM{#fNvzuQBnEotfQHO}SQABBQ>GxyXrk;36;1=|f z+ik{VpXuBfv}BsUoF=EIcH73XJdXF=pVZt=ZM;+aaSx^XHGLk%bzj?jg!MFWbp0CF zzN7vNSX7cXQ?4Gu7aNl|&3!TdozE78Os&vEHOJ4K3A6g1;Zwc~Sr%64QE1lpEOk9c zzF2Dd`-}PS2yTIh@LwAh5&gn>mi^DS|3-NE=P$Eg%zlT3GgdwQ4AgK@_QU9L#bebG zaM?YBm7@IvQgN>`OhiJT?cLD9?g+4=ij94JaLRWHi)`s*J}C+Jm6(J{{qp4tQnxJG zP_Kn>uMl6Xl4%nMSdTH(j5w)o&+C))i+Sz{rD4w8b!bhrqobo`Mw%`MWnAv%&TNYR z^hv$+(cbOVx(jF|pjeF>adOw*NT&Wq7B8Zook%x5HNk-lDKgO+C{aN2b$!@_!c1V)NF3=^rR_Rr^qO!5hu+1^1x38SO7qR=J?4OPpN`Maw&@~~8Y zTR_8y*_1*ZPb~RSDtcfGEm0Ygsmw_KK(M-|s~;D>**xnsrp^IjiMx$LmVF^bb&=P^ zQ1^ycMijRsB0nceb236NP}ISQB#F~4QQfkT-aXLv%kp&fc-1Pj9$yLt=`KD#z9WhY zXp|GOhyB9k0-`ZBP!TVvf_x#OW$Nc;Qcod&Qgj`DZ}QgPj6LP^z}SW7qrzw)U0Q>L zVeG`u%ssEkyEu&ejI-X`+%bUZVO}-w?r$IRa+%!j_Hevz;;);yK~I z_BUBQaQ<~$>pS{#*FpUtp|RSPN5Em&glGWqG|nijZAo8N@e*C_)ecV7mnZ9$G7R-J z?nl1w1`n30>r;9X+NsAwyRaSKqv6}2ZgB-8KI6R(P7Cswjt_Q{+D8a;HotmLNfvOY zETymvW%%By(2K@#}AWD?pq6ZXfrkK?9wrtj2l(#YRG=Vm}Pvq#gS~aX=_I^Es2PK#>Mc=>mm+jX}>4W{ndyMn1F3D*3GgciqMM=IkY{bXp@MEua zSM|-?MtOe+i?$eBbyiAZ#B&!40Z6JZ+ zLIGGj7wvNKjRXYbX$M%A9k6e^yT41;{KrWQPrNQCHg67Ec{E<74ea{<#ig)?>>L&Zt9)ZW6O)nd|TeP1B=?4qd_40f8Q)NCq2V z$a`0<=thcZJZ^{-E}_VZ45{Wn|C)wO&4gTkr37Ws-_fECcSl-Ww1|o+QM*7vQ#1Z- zlAd96mL18SvY+s)W-nY4Y?>_PubtMYn^aFu9BQC*>u_qw#9-ssH~z*?YN~Bn@4BzD zIq2HY`Xnjb%B(=Gm@6&0;UdVHo*KPLRCiSqb;`!6ucSdQMCx^+iCbOYty|XxsMKnX zHzH307$F)$q22rUDFt4!xpH76q$hPMoO0fb7Pu3QF>oVFNx=wh(af z?I!$+BWg^4x!P1R^}Ky~v5nvC)62ZJRF28k0_M#kC^?Y9 z%yJKB)UFv%jidS?M%kZ{*|oL?D}g5v({C#xgc=LAiP3qA{yqfXyr;rCH z#mKAELqR4XX~!TWbZcpM|(l80J0 zNPm>#)uZySEvq{l#6-EW{kU4{rvD~BYTs;v-5%GXuQ2g#k4qG(HfzeTj#EA8I$YCW zislneVR*eU(SsEiKVTK@%UfhWJ=QTaRHwzUZ=cdW|F}}G72=JC2_p0S5L)b{epi4G~?QPvn8 zq#TJ)HQ-xHyCo7eVc#)^bvfq8;VRfeGAV5(SiOh>=euPlC3}v^l?LCI#6abBx+0eSJSXK zE^gGD>?@$Vl-WcI45<$uJbdyb7H1T~8gpgLkewbYcS$v#qBJlE8B%DDS~lEy<|!1N zzhY|r3f~^r?PrfS>!YQqJQ<|EWP^G|yY)gEx>7?vnBfSBBCA!+@DxxC){tx{?_axu zC3)k*jg*S6oXP&<(v`u7u5|@yi8__~3G^rdIn+GaSI}eA{mHUi@$A{hVv~ImZ(hGn zb_$`qI&7R>J_@PQLx2Cq=#&LC2lG=m5m$gxMit}7@W9Mz%4!Zal8kYgnTEcsny*~s8TOwnO7mSBbIwt=Vx8OkjN?CO&5SFb@Z*=xooMP!)-Pvj zg1|9j^vmrr>S_x&V>$oRJ&@+r65zqDk&&!%h4M-UtB8U))wCuXf3(k^)k)&4WI%5t zaPi9#XckkPK2Q$3&#ufvZ|`H&^Ex`+0n8gV?2z8g%}pyi0f%SoWZ6@rtW`rQ>z+K> zk`cPp(*j#YZ1nRYth@&NOo~rfDaF$f--KQ%7-hvq#XekELHS(gslj%CbE8*o%T2X- zT<;ml`7a38F_z2x0M(4>KnWj2Q!%WaIRCSQi}AqIE~Zcp#~m;|3M5B%6tQl^-mKfa zIfhi-;>23aE~wwcOc7$3AxvB`fZpF!jm0&w5(@=W{m6TlxF{f1&72UBT z0zNLonu&S+khM@x%WF)BgeP zOCc=~kJ)_SQt0Y6Ylct>+jVlvgzKA;WC!@WJd~LB#BLHvZ1-(l`I# zB+#N%i2a{0Uvk(ifTwYRMI!pHF4=?wrXxR_QrZE>F~sx~pI%ADialAub_4CZ*w`e2 zW@?BC8J1*U!e()U|1l8yv%&_)p?3W*(T@1y7j7B<aw*?wMtzvJwr8Ji07$_5C5vot^5B z9qM5hdMU@NBf#T_t4rPe+y~$8M%!;iJ-zT)rI?ik6+h~e)SM1hMVvHGGQE5RV(({A z-NJewygsWJpio?c=k+=_SfC-c$3BzLSXyBgXy3IfPAx57I=?pCVT43N3}TM9=Nj-- zS)+;;yz(J()}hIPa-xJLm3g`tVgp~FTAU;7SGzK<8Pnpap~AgcNpBkGI4^^fWN(v? zZEo5dQ3z)bd#$LVva#!uh1+9j;&U*d-Dbsm0ZComu<6mlgVC_%3)7boXD@&`&#y^vh(rD`)b%c zo*qhY!O-=b<+EBET)~N>Fc9Q}xP*pG9!Emf zQ+UHptgC5d|EeXFiILttnN7{?0<{XKPO+fDCk#5uEe9D4xpPL(W6WDFyw2}Ra7u8| z=-RnD6PBx=oF{s2-Msk$9TZTa(##%*3F{;W?vOdUDhuIz=4uP%>KGH&^Gn zxUlWm!NJac)M;Y422dk@YGP!+swE8XIy88MYyLZcAn%yH-mxMbAF~+n8q} zA%b{sme!2|%loVVrv{iG?ZVJgm}S%1PbH z+BIvQWo3zo4pvC;o{%Sw#)$X_)PkILbj(6@6^anQPVJhZpFJ86S)(vxaaq%8DaJ4u zhM3sc*s^|s#weIj!_mH0f)Fq}EnN2TY6Ht?Y?=d>?Z>Ey9>1!V7j>nU@t>HQXqlM> zG-Uz8#jO^Rc;PxR*2p{7Fs???*JgB<&%AJ`ud#`Ta}s!7*y{PhNP@6@%U zyQER9UX^!c(NNwZy2z|7b;1?FgeRlN3VHg6hSWR3P#}_z(Jv3|!u*-+VufXjG3Mqu zW8v7Wy3bWEY;?y>UhoSKbnGw6+S2? zjpeUHB~Y?ilh7(=F?F3nPfS}AwgOHw?EhK>wq_{afq*t2HM7%la-MR+J^VdQnaTtz z6LtynNJ)^~kmHp3){`*?;_oR$y0h&+PzSEs_zP7LWF=d#Z!kjh0tP{g;J04P36+&+ znm_~yAs*`Jg5Mp*?0J5*%Kw+5vh+9&Kbb zF@4IVZ!!eRk$yk`b782@scv*;a1TMDOiW_pDHJjpJ}8@$)QP|Q-MoOtfhrqQW8@cZ zxPI;0M<_1RQ&)eIjzg2ji>2madklW>a-1~+P*WM+{brw3^fNI-i=XW=B=+$FQ>f0} z$M%?7vLncmm#3-6#y=v!#&1{A59B%QaK(>c|E`RYt_Zt|L|_PwOwC(gUBA+Emr*#i zRtWO>{cUY+CvLwvAFrCSo5yK-ax9$HvNBZKx=SP?T7NvQ<#u7)-b|YwqoYz%{^%wWo0JWs;72XN+$+Xm+x;dV&&lvwUb<8QBDUI3 zSyR)@dAu_sC0xU%UKiJftM_OcPH>qrmZu)jezmx&duXZAK~xQ%kZzZt6J7hr%x}5{ zuL?9wQ7uI!E|bj!U}S)#iOz zRYgTzXCN=@+Px>LU^~OHHXsPQ0_HSF^k(y7Y?fD6Xn|O#aXCQd`0=67*XU9^a{zXZ zXgKtwTq*qU;lp@Mf-{@+YKI;@(KbiDDe`44E7%iV-1yK3ZP1tw^1@C6t zw!8LqNsT>(F5W^bb*(XQug=lz>}qgeUCC8)D^@0`a_!l3l0c$w17jqRT^gzA0XP<((*>8BP5g7J36-9M|c#B%U4hun$lQ#;?o{5oDGYwA_t(EI+ z50e2R{or}jxb06tM-H^6Ig2(`$IoT34*jT?_Y k)>&VEqgPQ5vUewC?FO8%aYeO z*9?{Ll@pTb1E@S!X|j11RZ*$b>St1PB&Tr2&{xaZ?!oX(TJ98dohNg#qWmTw6Z5Q) z*T|jfF(&{Hix!U(6MjR7yJJ;B)2!0OPuC>rm*-glivzr^+9G%gTlyAQm#XhJHa!ht zWX2fbR8SVA);$&o_86`V-KRPcd&gd$BIT}EG>2HD;2?o{v);ddk7SER3|C^k$$0L} z#6jultzI#9B1{A{5MG7M(YSNxj%s(wlLPXbHfUCqh7h}NTa@gF9T*;0;VCQ9Z=s8mU$??F3ns^x^jb-3YT*VNmXJ}~#(8xCFaYQPM zst|*K_aH;pu>bb)WP|v}I{7r2)rDacK=m(^#?%uoU=OVc`F)Ecor#*4xcGq|N#$l%xp1*JVPwTLE;m_cO!gNGppK_BKvgzZ?%;BE^ui%IF_qG`?6F^iO8 zh0QB-@X*+#0SV12`&Vq>Rsb-=aY2_XZthy}xdI?K+2dRyyE?w0@q~iDCOA*IgHu)@ zbCA3WJ=>U&YFx)n5(1=p+VepBBcTCwj00*y83?*gMITK|G5D|zi`IFEuk4hptZcv- zo+A0-+IS7MUiz%N+0C2%mHC1*few4Xo|CdJq02wj^#kAsS22mGAoDPk)85s1(puQk zfd1v)&dyHMGS%WyhL9@Q&HGFYD(z8_NTpqBF!y9%G5&e7gp!>l7dSYo(Bh~JxAUspYKgtdHO8 zB{m)je{O}xHk*mS9*+SGH7I*>09Au%hjB5x8%`+hLr-`_1ZoEBgkiUj5e#?qE<@OC zJ0M`xcoOd5C0Fu3g-n#93K@qLg^C&Ck68*^1;Yy^bw2iklQqXTqI(i+i z$qi)jQ4&gegyBd?Q(=d;VXr=r6N-xRP&qzi`FuRxF0!i=_1ng^!!e3MX+zz0bq>u? zxuB{iv>+(6%ajbxkT3Gv!UHm$1dgA>_rD^g!yZym09 za9NkJJOiJ8*%op&bYi2yge%876NhS#Bf;km2K9oltC*P{IT&EmU|7m9eHm1K##p=m zLo{1M>nU}lWltYHdX(j*FF5#fz31<)nsRtKMt~b(3<=GI)`VWzxFljLGDQ>(wW1@h|NB3N*i^hcY$^?Nj9@wjD9 ziVk@bv-JJG6q8}44ZIRzP)2;uZ`q{0GGULFd>C`Wc2c2X?5 ze}$C^)#O*5)6}1P1|diOum^DNoyxi655Y-?Y~s_~i6YtNZ|Hf)8WkEET9ySTk!*O^ zqX888v#M@kcjJU?r9D}FR|A1ON&XVejB|iY70qelwkG6GXRvmj#C&D$%pWgQ2atHd z#MX+hrc2tu==;_f)et)=UVgrYVKa3QzEvJ;K7=MzMxbsF(aoqK9tc2JYvoRuK{Q9R z$5L76FWK-N^`LwtUUPVytgs(M0+C_myw&ZYGUx za6A&qCc6TEGHhnb*cDx_hCzOM<$+DR8%dBR0G?{SX-t^FAdK;w!YYO*auM8FeTOw{ zTdz|mwLb^VOm^50)hG{% ztVD1jjZv=MT_tK@m;GxmN1`pyVO&iD-xOR8RYAB~(G)IE2o8}X>4+mGRoPw9F;^SR zts^6HS|vNm0@Td+IL3t_IFO4-l7oaB_+WY4Dxo9`M&8U^n!XJggsJt#ocS)dM!guzyltQWQM= zMVU@MsM~}%oICyNYUxVS3Q~U>@gT>pmx()Pms))1T=@t}I?o}%9|waL2LJ&q71OMX zL(e5mI))ogg)q}YUc+l93cafu5kn>QCR!y`Hh^ujQi(rR1>W9Wpq+aef_9($- zK7ICVkwnHmRMBU%NuyU|^twA3glSHo#u68nLqF26_UIjODX!CHtXM1AqV?6$y6idk zGX!Zw5r1j0yNFLH$#fIt#V51Nah>q=RfL$72EsD6+KiXKAP|AHE=9l#5~^U)Em;6W z0E#LgQFBuFi!4#BKnwqgS*?i=r{S3?;$Z=1(0v0!Uecfo*qoD|Mh*FPq*~?BRFSbo- ztz&~gl9kva#wh|+bYK13wx?z!Q!U3aJ@i@+``n{@i3XLkU%Ap;Cdqi%2hf{CWkE2@ zq#36Z0yrTAl7yl$ZOTwYBD@0a0yc{xE83&fje|XijTns(mo$xWs+v16Fq4Nhh!hHpZIGfU84ekb2^jV*T1vjW(c1^!V7SRhQGeNA*=*iq4Y2H z*kN{)h+{~~ZRobv`kHFNUH9#&f?)G&XHgp3Z*53GgJ44f3*wRh<9arF2ZOR~9WJs|*ql+wB>M(r2TiqwLqknO!C?nl>a$V?=v2VakXXL&{O! z&@d`ztbH>oqv*g^orzgl_30kLdL)7h#9UiO>*~{_A0num9nj&xyJ!;n4P924zu!zq zFB6+Enfm4BQjMW4YFsLb;)aHX)KJ0{kpqNB9#5h{A_23pNhJX$P?izWHw?TNZj_-_ zv1%i#Cz175LS9i$u+?pS&C=OzR-yS%dkh)rvq}>p z2(bTJ!wBiaLL?+$%*LlDdVv+y2@OaX6-eaD*@qT(4B4|ulVAgRubi2k2JeGFMGifIy*EaT zG*%<6;RwDWrw({v{r6V0`WLzoWLe%$)j~8mxPwbGO9j10l|Z$(m-3F|+IM?xrW(((jk zk)jiv1Trug-+a^t%U!0E-b`#l(;AW{i9&DAR`c`bzvjW<`W|AA$tR<<`C|PCg=nS zDUV18B9{}9#rb~|$11C4qJ4Gk%x`Spw~Tk>swd<-EN5W&>mxh^>r`qRIwJ}E4mPJmVfI9PQ> zMRb9lJcZ2=pFP}g%D1N7EO*L=z!?T%n^?3~#`BCg<-7I_Tx^OxG=2_0(|1&deWn=( zc_TxGm&&16p39HXKbcLQ&R%o1exWC$pMWvCaGobkp$+k<(I=eF`mE_sfEhIRSMz@G z;29970m#7+?p>i!XAVw{v>?U4rIje!(EmthzK(>&;CG?I+=9%1&L`5d|MIbo^egx; zAOHV%{=FRk|7!eSUX4=cM>cvPM+Ze;vXyDJniBtzMPTGopc7W289S zw08d0!HWDNtWaV-$uEig7yE*Ly*(=?*`w#etl<@ z^8459-Md%j*s%)e;ZmTmTKMnR_>e-3){?egkw{>JoN}8%VeZw9?jb;Qqy<-W^zONq zM03BVloSCoK)lN;ZIdbb8d0EY?RrK=E@@=0x5ob?k2hd~LfMs_XXUe=%P(|1W@_)1 z$j+dLK=Lh{v5EQCq4+Cl>`BiJCIUXvy`#k`jo-;W^C9l?&j+;p_?55T2-F>0vtzv1 zWg^b%*PF`RX*Nb{Zx@E_nSUPE4HjXW(TPLp?F`(P7mBMrCrT89lfbk8)UGC)mgT^pro1~J_#v&*19klrSlz|1d=;?_-wCuz~$dF!Y0 zI`s0l0?_B!wJS=xT&#9Ij<9>(z^_Mzd+YkMn>*Ay{N4;bF5ka(Yo#=6=SSTI!J%#M0VfN< zAH7B-vz<}$fC4ZSlC8$0!0^9#@9`EM60UK%mgBD_0AC%cjb_hq(X}wBF*)~{6^I~D^H{5aqhp+XP)vDBp-xsujkqZ`tZKz{d@!z8wD*`Kw{q6yx)_g z5!G^$vV}-b9MQ;jZ=Pr4L){VGUsAEvNp%N-cWA_43_Z-zKoPrb8%{}uL1SkLetWgc zS{tSSQHOm{aqIxy!~v*%)9OjvdxUz$=+1TH_U+qNYkyx%`;_9{B>VlPfd51s;QY?UEjtUn2iV(lDP^za6 zOvdbrRDO7-dwg`De6V=w%F&kGV(%-u#hw?oNeiVOSdz%POKo_+h91-a%da1neay?& zg!G(mRlJ7qZl%~`R~9ZIg3c)QbY6rq+6V$WQ%gw0CN3;t{;WOsG9Bau(M~=P3u^)3bsy3ce0c=a+DDLh?mcwv z>Ry}p+(9&*2^L@Q*Sg)W1?tobz&KEQN?b}4LDm!9xXi=dlGvEvk;`hZ;ZTkbMI7vn zeTxv7@H3X*9xC%XjSpJ2VZsV^1@}veE65k&CWjCNM4LPxKX%eki>fO|p2ZF)I7%UE zHR4tR$0^vbH%{$*jorIZz-TQPZC!z>z2 zTdm{N3i!(14vvp_naD{C)qY%Xit2fEM$5ow_)!4@J1P<@Nf+CW5>lEQUhT#b^+GiPz2ii-kadR~r?;WDkLMOd0nZ6#I=!m0O_ zmN?)m^8S7tkKmSB4+#c^s594Zb7$OuOqUnLCR6|iz@)DMC4Ils_;Hiv6oS7BoXW!? z^bpC3K|g_Ts?D{`IvPot(=ke1DDycEYOFrpa>tUt)>1wrp8SSX+&Q=yvNY-!->k#Q z^F+x5kMpE$v(tzMsexh1E)SYBpH!zymv}*KtpTyK7eCsXHBk0U<@tG3=~3YXyAyH!pv@D6V{)=n(bWKQS`4mwWtd(jK+gd3Lp?^8xif--8EB{Fh0% z=QUlH80xw&w_Ca}Qk?230TJ8+dRBJ!={6e(6KIHF;~WA?sh#yRn}zskSSD?~AA{BuWDPE52?mRls@p8 zHX)O_h(!>rN@WjfGN>#k14RzO-CdP_z#hiclB9pK0{A2O*V0uC-eDcn;KG-orfLB_ zOf#zipd=4i#LsX%E3sLI)3x- zsU;d#g|F11MbSo3M<;nP;gVTK@#aHpVm)=FPqVC*pSxJ-PU`ir4i@2 zEYVCWUH&0feRRSr_rA~`oEJOFV(^A886%@Ob=N=+EEVsAj78VJBp1e1g|e7U#+wG! zf4<4z&|Z@kD3qCU2v41_bk$;L$n+Zey}i9ZBN?>HH5e!LtUVXe22f~F4jOBvC65PD znKRvjQi0B4f*%&L2tqt}t;J7Lk8EZmTmNaO8LF#o*`SIT{VJ~dP zwa@Q43ep5liVDz-kI-G!$gDKyyKH7V^(p;+k!{wyt|@crLiPxX4=bZhlzfLKh_X$! z0hiwIf?pIX)hKc-%kOb0bEAEB z*pyA*L?*<)Uhv~*B2R5axKW7m&V19frlWA?H@&<%kd)XC52mKGP~9SbNvDv`0`y7# zh2aUOUtEwz|J|f=CoShOU6SSEWYovDbj0$hLrlfGV}~Uq9r7WEubXs~&gzO^zj+g; z=7X9xusA2++c%2ZTZK>Pj;Qy3N`K&QmT?n`41QBP7&E9ePlgASzu0EpoRtNY^j8Q9 zn)<=@Qt#aphxaf5Zp^SoQtd0!Ll*#iS279>x_UaWAQ611h{`sSK8>o!U^3AsXaig< z2wHkqP{F&^3g36B;2eBIeJY!f5RnOoIxvD-lmma8Wc}&M&ZGfemuW|&;-unKQ*5D< ztIRIEy^iB8(LU!9zZ#~Kg87ucIxU|fKAxe{`Db2D_1sRVZ3qz6V!tG z4ILq;Zs-Tvxx+#2#e0&())Z4N%2X*?P;z$Wgonz7sl#BouiE?;2zbHAY=`}z(6`a& z_?(FtoihH3f6(hp|Gh(~p0O;hQSu=dbt2U^I8UrvTU$Qu-K}_a5vQ9F&`&kxGsKcR zunu5+1tktC+Poc~s1Bz9&3%P-ivV71pM`zM6H+W7P0;-~4LR{wkdxW~n)O1{C-yfT zfiWjQ3cG*}=er$7!X`=tPRkUUrwX*s#2=a7Ac zF6asru$KVoA{uvyi3Q?RPPe^;(cFjsZQQ?+hm3aIMM`h4J%=rW2(lQ7E7>z=RwMSM zpxRJBs47pdL;a-xzE!T+yn73V0ffGgYecHDzDxtj1KHbM7|3Kv0h;QP9T7$y&w@t+ z#!9JPb}WZODx4z@sTa~zDuf=F8^$eEEoY^i2;|uO=;-m1ceYTL`3BbBUI5P0BCj*46E91(4 z_>o`&Y&RDI3O#`28Nr&LpiIvuo?8HkBLhiQx({K`kP`_w!v{v4JA3d9;7kElEDCZ) znBV4JOYh&_Oj46RuOmnz&8%;z?o`Td$H~2u6D{$H@u29PU!?6xh%6b6f4Ilp&S8@`84Ih_CLl6s%tHtS) zN~C<=3YEoBPG0C%sXtIh;y#M*d+#EvppskVFV1TvTr?WV$BOkbkDTnWwRyRYOO;<^ zLwd=DVSBw{RXs_iLLv3Kb8^oU2B_oHc9`s7i~onyUGx|yrp;i7e>7+N1D43+S;?mD zf?Hd$p7R_subXOndi|ucQ-|4x(kB5dC#Ev)*^2^htY%Sb()tagrSIRTF4QS>Uoq2O za>cTIm5cM6b@%QzN>W1$-YpcD7E*b*`}&G*+S(Rj3hhP;YRtQ;Q#_&5(TIB{eu!P>f70!g{Whw106B|cX06*Y z9({hrKN@5nJ{)-!;wb9nZfVIf-6s5otD@v1l6S?^)#j_KNyUuL%t$(G)zb}Gjz`q-95^uM+HCI8mKCabkdQNwg4Z|C zBp|#w0@z2wO}NkEWEO#x<63%EXM4LFj%jEw*lM>+C1MafBAI>Zh9DXO)ySI#f== zZ0#nvx5x0qK5uW{y&Hz~soUYL=zixR;kQ?@YcRIJgRVT0d+*-+Ftj3SZMwDBdlSfU zX3@ES0?xS}yDfaeq#=3vyR6`!D91dHyQzO~%L7zNNi3&z$8G3fGgF=fy=m#6?i?e7 z6(^idB<&U`M=hlH?5|I9iXVx$}tO<+nTc{Oy=QJIN?cX@gL_LeH?ZA*;14NZ^KD{cjni~G^%XocIU2%)vJS# z`}S?mi`Qs9>a>pIaIsJ8%{O|bSAVt9J%yCT>WB(uXXndGh((C(AGUH`=gyf}i`GFe zsr!huFG8Q*ha3_ga7gkutV0_pm~C)W1!AbKl?#`24ssgabcjCG`kGKR5Tqm?{6^jj zGzjz*8~VR~y?~SW4k!ABKr{01htkZNz#JXqi&e|Ay{Nh)0Y`9OQwAcsMAR|vGjD#P zoeR6Uknnq{rj0(Sj<8$k;;>I`jJ6i+poEnbKa^YNkhR3#1;)Y;y@0?7(b!`p_4I>L z<>q;=3p#HlAiCMXgbAt=KG|p(@wL?77Ycn}0!2*6AZE5jBZ|uGRFA``fZwT_yc1^W z3;cQs4>6t&h+Yq8j5rzXNWuQs7izzPou3KdkLCqs_~&#$o%$4G<%WzUE7tcR?Z2h6 zEo}XBT1FxMxNgQg6ltU*5zE`OV|7Ou1fysT@CZ=boh13H_9DrcH~OnBiN6_n>J{QR z?3LE5nh?ChitV6T2p{^O)B82TT!BT)Xl2u4AytW0d43hH_d?Xi5nw}BHn!`=@#*ex zr}`?nfFQ&j4ch@e+To@s29N}}bGsqfL!qER&j1_&aYm3Z9=>#`-x)($=61&Lo|L^U zy}1%WVqp0y;u%3Au2`S<{ewkbn!*k_H?yYC%?_;@-GpQ0%0i&uXD?m3wQX%4)sq7n zz>})aCDql{L)(6+-QU*9?8S@lBpk$nx@D;Vdc?S5v5+g4G%?{s8QTeE)xhle zB}Np#p%ChYG9WghPl*Hz3F#T&{9jLiZhv4qK_e*-_==w|9PF&3D=dC+2)f+wRS|Tc z44F|P-U@gWf`f+*cny#~2_kb)8E=rW_5Ze4Eyd^!VCTD!&vgz!{u!khvmR9-7r+Z( zPU;;1EJkXlUGYPXLj2G%p!0M<0mF*m1!R{2NW%Wy1S-Xw`7jdcYiv^4!eEp77X+U{UTXo`=}h`1T{vPxFx|M~1utAo zzbz(c1J`kfLEsiv0Zz>chpi-3`hh`J_$q`EMm`DoQ@gZsM95PD108~+Odce5B_G@= zp~uh&Ca`sCebfLi4kt;|lUxN6q9CR}3Ucb0tn398eh~VC`14}!Op{z4QiJdba>6W# zZl0h@jyNsivyrq&MEwvhsQ~ivfX*E7l5NoU2O|MSI8o&H`yruOy?L|$w_Q#+K6;qy zBt>K(#G2xn?8LPmqECdJhR`~)vMXof6EV`M1`-ZNl#o3=|K`>cfUC7!s@@Pm5EcyT z0a@&;D;CT0aCvv0>lB_zF;c{+ZaG*j(k;LM=EjFt=Jy$?Y|Idt1Urr|F3(W*E#2To)a^_<-2@7@9leK-!R&?OIQd3X6mo;L0PmM z5@Sfai>E7B<-q`&A>XG!=l3O#51j@ME#3ya?ir+4xBj_D)WKU@PVc;B7$hU_Q;kEnz_S*2f9LZrvpjZSbv(N6@Ogd>`lfNKcJA*D7 zx3d?8neCp@lpBU|T4d^>pNvFd0YlUrpk6PKworgOp}%@`ah^&D#?x}=rmc&%URj4{ z3}^2Ypd)J-h3tf*A$S7J89acGZ8zDY3Q5H6PZL+af{C)GS_8W`X-`I=$(bg)!jIfR zl-BC3ihuM8B>+#EK9+GEef@)IkH}&PEHOq~+@15)(5fr52%L*D^ZCn{E8s-JPV~|W z{?v;dX*$snpjUvn(=5>Llg*DrRFKXUYo-cPq}&1cy2Borq7IwYDB08q_>Ir9#_T7f z3Q)WfJbpk5FEPEKUs?;UX+By{ILUg@G`>UB2kgscFd0T~9RM978V>Q{HPlm1Z&pmzN6c@07&MyYATF z&}A8?<(ojIv1WG=(;>dYvX087zNuC8%t&pI{ANakoi--n389NO8j4anE2dq_H z>h~!f6aR3F&AC~t@27TqdSNhj`P;@x`FNUSZmc4J8C7G49Q?@cfe?9n9NNn16j zzX2|Q2IMU@L*tSFS4(h^v(CoW;q2e--@4@e50AIyR7e~_rhw;bEq+ta)ho>G51K(MUo6q%{OhtItPn$%`LbVa2BsxGKMI%^|Kq@{FKJxDovbCZ_VAjjVKH+nBpy1MEFtc!6W zTk+ILU53tbk*w@l)C!jX^V_1zeq|dyPByRb_#S<~LCy@|5i1 z_M@wtzCPmqc2YsTv;Kv{`%Q|rGgclBbFZ`XP^Y!GdV&%;|L?`;dI1F;vZl8Me#}s! zF`YI_QTgOEe=H!i3I^q_bcgwXc;bQR5{3>@Qs+_QXsh0!m7Ii`d5*IEJZz);z?6U$ zf}@;+kX@*i#F=iD7cp1zOVDZpHWu-0qvD~MuRG+S{Zw;vW<%Buc3o->mXbkudghWKN_wQeTHDkES z{?7RoeHjFWqK6Kb^%zaJYhx1<-O8X7t zXuC(TW|z!T{gi=0cFxPMuoSqscFNA%^HQ6yw-*14Q>p=Yc%RR%`TOZh>vHeyJM1>% zI7fZX5|h&kxqf^KRRjIQb|q2{g)%*Th=a>saGYo=$UtPnY%#ZGG*%4N@+we-5@ zNg1=*7WB0v9gW3xjS+D)X2CCG+{0^G@88Woc}z}cV(C!n?#<^+|FK}s@&)ERZtKDo z1k$E^G*i!9m|iMXm9sXQRu*?&?%x6Q*LA2C*?pU5UpesePF;eBl(trRf*`N8qxadF z`(1bjkU{wOXKY!xxWJpO^ZCq&R@~~NE7dzG zz)Iivv}Y?-b#-*4+WU7k?0dfe{j@B13dL~Z#gY$T${>!}fi)d0f)~`W=V|$0cWUgc1!D0frOxhfg@)hjg}T~-T;3tHFdZEpdaIs?$KZy^k1I>I zd%^2AxNq=>)S3bF!G5z_I~GUza!nWD1@)AZ=87E=Z(C;sVd;tXi6`%+NIPb$`{ZtK6he4U7y*^|r#ud+Ejc9Y*=ovD+pAi?&G`h+k1;J6Phq^nqW=7;B}5 z#=PfImqNlCh(3aO>k&MDX!fbhIilz1EI)--W`>AJA)KeTkR+JdhevFMCKf%bver~3 z;%m*6w?En6T8BNe2?mZEAdk#p3q#?s*on=>7>5R|B?oqPjg>t(s=7PZ>o@=MegQ#D zrdKcZ@?&{F;(8QdY02 ztIBUmk{Rj8}QCYwkc7U{hDE)Q;OUSfSidr%7ps+gK zQ3{oS6mWN<_JIFpxkGK9=RtAI0-}t7l!eRyhcHB036~K&!F|==dZ8#6$B@Qs%5O2Ne2E?C_*z!^1oaj(bOv(Lz*v&(Z!V z9i=Nap8y$8WCuj`0v3z#zl6j)YBP4^G~hbtLaM7F&^;7nn_!tKPc_SMT8LIH5zh#V z;XLcgz2JW!}H4g#5e24j{2`OClXs}*6-1Xce#=T;89w;81rA-CLD2a}q^mnju zMM(1S9}aB2dr(J5=Q>6-jMA4v7V3qd;|LT{c#D?h4fI_~)Zqs^igF;XCXazU0=&d= z^y8#Re#co@7%xN9V~7`xhVL2DI|P3G;#!}FhN2XPiC4OduWbaT22Y^UkFt0^Wb~Ex zyWxc{BY4BG>JH%Xo?h2}-9c*&NTPx~M545SPK8Xg71jN*Z<|rC9{Q59kkn@&<%F01 za~@<%Fe+`w*uoeU=S5^vA*@0ge+bH#aPjYve^X1ZctfCypx7ayq@;@9`jZY6-S*Y_ z60BbaZeR2i*1rgZx*|PDK`-=9Yb6($w!~Qed%K@S+T^!>QM8|Tpe$P}gy!MEyH{70 zca(n3w9-mH4apSfMc)F-R6!O?IygCW1_apX?Wl# zgC0fxufn#a#<#E(FLOf&D~s6)PRFh?6VOKdXcoMY(hu zV2(*g2n^E6U}sFhX5Zah8r#@lZF3i%MuI7MjrN&r36oh5Jn8de+nI0`P~a$NVbg4G zJpeX%C2Y_#@JFcbylp&x_3ChaxL~pq0hD13XNDaH-TdMHo+z@4 zApF=*lQlvp97m#0+nYy0Cx1D<;UbVEGF{NO5`_u@a!^(Lg4<#a)G`DUo<4V_P8x2@ z(+ztpX=+dx5J(9R?vX>CmN-TQk#J&7(8s`=v-;>9pvy#$X*(~2X#Rx)6u6Es;Oy^wMC}de}5IsCKwH=0<+%~!huVR4PuDi2}Fk^g?qeI~d6qcPq0ByhtI-NkcreMwvS1CMd?lHj-;sv^Q z&E6DMBhjT$c*wK1uSU03o>Bn}5F>2I>E+zh=RPj&=_S7m848;q;qsVH?9cFH1CdVv z7_;2Ql#~!iyBE`p84j3SDINPAyW|g0vzad3PWL00naw5wgjbkKP;U)BZbU>9Y1v}h zoJT9B!6BZjlschR*eCN1vliOex?OU#uDP3wAXWUKD^`awbuO6s$7G7{&t>Q4k+N#GGw0o`{Gs z7Iw-hP(UlHH4y4%O5wx{JNXbx)?;YG&g*71;c_4m{&0jm7-smu&yPzfr!W$^z=}iS z(^h%8|0+=JU7No4l&ZzW;ixX4MEVEe&V@Xm~ zRaFN2)9C^8gU!w`(yzxh?+fC5F90;=C!%^>4{FI5Z-pBu+ZHnolpm79yj*aUYIP#! zoADJ_x%L@`=2d&spTCOk>v+aS{5$4p9?Y_ZJp1>bl05cJ@|LBhhr~md7D>0J$j-v% z*bb}0duG11FGPH=yLCyP3FJN1^u`w>Jm}Ez#%ZQcS<9H8zXPXd6v(^$tRetfK_+Nz zf2@8s?sow*kX#_zwHBxnUJ{ip%Bfxb5tnFi|1^$NeF+KCas66U8>uV245N54(p;#c=aEy!UNfu@=n-Uz}5OiL(KJ+KvD zV|rnGfOO$IsX&ktx6LdDoCaj>p`V^8WD&hEsvRxV17*u3a2t5Fv>)%rZE|KN08v2( z=wi1#k+&mP{EYb86NV`VUVGlBgD$xR;Y?*S^o_MB+Bj>${1tslPEq}Yjl>;@KX?V8 ziyBM|vhtQgD|?hWux*CBsOtu7u%q?<{CEy>gKk^^_UW$2E<_3ls4or~ za|AVq+P&2^8^Z8&RJG#{h!-oz3iG%T510s2z?>0?5MXoEv|(r<8GG&|-A8QF{iprA z0ocDqvT*;&5j@IASaU$(1=kJd5fM5@?1npcts@6eZjFHgP!$B)`w{AfN>6M@=Q?#pG2~kQm ztwkZ@d&t(r*#@2(Nmi`TyXFdH{RL514-2j`@C1liWGRI88KzqPZv0oTr*!1-4b%w4 z84lJcR|}5~R6b!nY)IJF*V;WcAqvRC%-2 zUEE;ZZhp&R^xcJsov16n17~iASF;|e7DOTb;r7mRgwcUWdF>w8JEO8Fw-SmC*3@xI zntu;n@?=dSO!krO&LFc7{7R?qL>L6w?f%({tDcJr`8j%^H&AE2#1gfhH8%Q4{Tt~t z5GV8kk98p94Z5Kf!jyvx>8QrxhSd;kKW%b%*K(3!NJu4P<9;zV?)4NuzPBkq;l;cG zmHn9JgHpeBBDN`*xF!m7Qt8qcBDeQo$v3$ z-O@p&a8L98bZ@>(Q{{$_Y@H6pM^%ep`yyE}q?yHm2azjnXMndlfX0rjRv4+|v<|ro zbjeLe0?Z(1{sE;BS$;8Q4U{i5j>%2vf9h;kJoqF3VFNNE{{N%@xew!m>0D>D6tvdQ z!N_^S9zMXLA5d#Umxa%zPtVMe7c(C77=9k^;5 zQPNtZzrMpq$5R6;>cPUc!xi^EcR5dx?qzjhA?#=cFRHUIj;7qy-VnM-H0_*$>86Mz zk#$ApyQq2R6!Kdh1el~A;ijr-t-flb6LMhuiL-O@8<;$0BBJ)h?)m}^(UeyNB5~f7`MnA3;LT!586HG${;H#0P$nu28k0H zvg{$r87zaJ85TJn?g?L@o%YfR3I!8nM7|Fy!}x8Eo)7t(DphyCC@ka{e(NP2Q%jm-Nu9O2)q2<^g&N~}1TwAeteF5wPfpZhYoK+nf69ZjXH+OW3YZcp$#M2V7 zbn*US%cxRK0kjJ3RdpGsB-|vcgf@mSjs|)R&(<{x3fcV~x14)mR~kGa0AeEI9kFQ> z&kHb3)T4Em4sN|%sd5D5oam>8ixv@?q0NzOPZu7&KCC-zqUR0))q8`zR;k(s5YM_M zE*D~V!2Uy|p6@}7czYgKtU~#NN}s(JHEe0w6;|&EECmgtISHnhuaYIL=%GFI4qr4X zhRfkusHJlSnFjB!O?v=sT4rj{BCuJKSnxm=2cTL#uq*b3z7JM?kfv z74L{O_}MeI_{&oZq9XWkONc`F_wfENHGRK%f`AQ9E3!M;qdiyO-Nk0F`d zxPaC&yS+W4a@6WM&n**y_AG(9t5S%_C~ur-GW=OOpxsdV3qtG$1T58IiM7EzvMo-t z`}z`V?Pc4#Vr(#>w!t;Xh1)F13Fpkv!vl8EBw@hsdv$3(>DFzFLqkKEpOdA4OTYf) z1`iH5T8RcSupZh&--($o#ACoWYywznFNRs~SN`FCiG}mm=4P zP)ms8CPmRoqD|bEMtCH2XW;~5!PYRUuXlAx-!H_4dP6|LSjK{S#0`;*0;c_7xPzfL zBfgg$wBwH(2|^2nw$4DMI4`_yaR8Y~X{TJ{p;XYDO?E|O^)tT>BwZ_mkT05fh1RG4HN~nu{5WWf%#B<_uNmcJ#>nX$AN&${tio7N* zNQO}qN+WZ@4!CDSSzzn9zFfWgl@uf){wQZv%jNxXzx70KT;>KB6OrB!(3{EY0d>C{ zbZi7kz^HoFoB1u%WhtPJy_xx$I40pdpo=pA=E|Y(fXExqhnU6!#81HdxcR`YT=<9y zSq{7`IJFgWTOt=n{t%K_qV0!KpsYX#N-OSn^_)P^%i{VA1Ps&c-HOl>;@#KXIM<)- zcy4y*_t4HVJ(P@f)>*Cv3|$B*tK6G~ikIoC_OAh8PW2BI)@8XpW}=V94S@f=9|xZm zxJE@(J(!}l*j+N~7xK8)B4hI_5r&XdL86aBS4Q9)JVKyn$9FjqmncfG_i-Dz<KkoCb#h8q*z6(-7+-lxOIe0YQ90MNVWk-GD?B zzCM7rh3NT>(YV70O7zAAjUwmn!p>?Q*y~hf}3ot_@AgJxYj?i0pt+9xi zMg%m#Nz#E?5qm0882~HWk2Q8{tj09}0Y<=jG198#E`GVab17UBn}}!}gHa!f{?AzY zP;L+>APUKrOIu6L(3_ASLhM{AijghAAg}?hfiQ!(w}_?1EMuShqK$JvO%ir`&9fr&nX2&srl8b8%AAsjFjC#N0}8~~41EgC2C5ZyX4aYBLe2xpEE z0U!}2fH29dN!CX=D=tVI+)4%_I*8OqW>4lyQUqhgAkZ&Xhl82N@GnF`6rfg%gK`i3 zMknBEAx<_FxK7~-91jfR*a^f&CUu3m^K|lBk<@q{Rma`kPXe)eCZo-Qj3n!E7a3fr zb=(k$MfTeybPkD-#3Osm20%e;<&&j*w!7f@LFKUoZI=*EzG4H=&VUCOrIOMSh4cyh z%ldtLHQbcx_7AkU2AiTB=I>2F!*3Jo@5g!Og8xuofik7WOKf-A=@jdNuOIJwGU{Pg zUVj{HGF*4Fk_PER6Z>KWzDl=@JN{05A!5JY6e*T0nNf#q864 zh=&md32N$HQWRi5+yt=(TEZ8OKSLmX0F-snaZ_ZlNQJ*dG`<3qHq8K{7vV(`O)gf6 zPOR|GNSo^fYz9TY~qn9zsvDF34>uV>6yLRILa~|8efM70Azd*OCpepLR73o zPm+!ZrN}jo3jn@qJXUz+RfVxf<^(|=LHjND7<9nu%LkN-=-{uwm%G~}_bx6;kQg^` z#|M#A1Qhu+nzb%uydF@cGM^okz@R30GHeJ07sm8k0gc-d<690o7rBV7HHS5Jl7v=a zaVZb1!m6Q<`-k9M5z#M>nV!UohuvWivRF~5f*?rMQGRml_=9`V*YWnq^+B_DDLT3Y zohS@_T-ch_iM;7j-;vIBPtjgwW5*z4l{_YbtfJc`*^g+OoL>bw!=Vga;#PZXxt(uB=0LZb%%4m*b zWN;{MFvMg&#LNfqTM^GAB{KG>e+Da`U5T{Y5TS$CniRG^eE z+n(w%Wb0OB>7CVanl-X`P)#^QD{9J*OYkVK?NF*1Co51ahN|KkT-tc}HKwE-?q^Yjt@Ev;q3B;`r z37t4|jj9gplGni-lBnYm*!hBkLKg-PX(;@=ugThc$)T#c@|V_{fvTS)=(Tk*4nf}pYMEbQxyYt>^L2jTyWd#hw4*5;*QJ?PQlY4!m2YInVsH+6HNSD? zl6>Z?3FVUgX=~zN#~Ynei@iyuESNS2>n}HtL9YD7o>~|P`_$o`&?q|C)s#fimyYS_ z`fR_MUp|OrwDjE$-*tAS3)<(Xcp?J8!z2BwqQ)4H2A^(OvRuWOa?k(Tj#Hq&A=)s^ zP@o`M0M1qLhYP6Nx%Yut!2aTUcIIOCci>`q7EhGr4Bv<_;h3g@3(D9NYeNlcd&^lV z5Vv=}*M4`Cd!&;CqWhVP{1SVn`}ePPfLMv?l-__fO^O(Qw%@SS{871TG$nWSi=S0r zYPk5HBWw3L5VR}M6rvkA08EX*j#$6D5Df^Xi>ad>7UT}m-y9UyRtS?l`Yk&W1OeEH zJZRGQC4XMN7X$V@REh*B#nKak1#o`ZZw)>3`gt*Kh%6Ge)xD)dSF=5-XKDMl=^IY3 zD~)-0U@FFF=THl%v5iW&NXs}$o>Mvjml#sZAo@ZFinJD35~`bz1HU|gf`rU_5DU~# zTj_0-E24yPryZic?&$fAlz54)h9OXqCIg~A-ADUN3MYSISc6{BPe>z`DK|dZGB&Tbnx_!82V@ME{>?fL|y)&@_2TcZ7;O2y8aMtP5agbYi zVgSrT68tZAjQIso3!GUwy)&x3)JDeCvb#>dpocJShCod#%Xdoh42@Gagm&iF=$?Nc zj;h8ovnj6CT(H+tVEm6l@83EI~$!CZKWUEiOd> zA%oT?)zs|Bt(jPBR8!*_eu;ygt;^T^Ixt6>e{5&FjoU-+>|uMieG&3?Dn1SM3#cX` z47p^ePR0m|Pi$_0iqZ8gDsz;E68@+`pKR@*iDvWZgoJT`K}Wy?sTe^!LBhIna*$IO)p&A43VMo*jrzb; zzrru;RgKqzX!k|asJuXaGvuN!NjDz#S{%w4Jp)u6aN1qV+!#P{4^j2#Xyg5GC!q`? zaz=Ic?v1K}{|-jRhNI2;ZcU;>H8{sMO|lkcTz;oy8LF3l&xv z7p{_2V%YXm5Ss%1&bU&LdL0lzaB=R>%m032kS-KmEuYx*ytdUPX&rlK`-|hdt)a-G zt==y$JTuLa)-9aVaYEN|>l{ny#uyM>>0%YD!K9+hN1D(tp@ z#)k(IOt%ne>gxW-6JIg@MJk)~M^@2nSNi+?Izkz&FW(crXHoFjgSICB>r&+k9h)0f zHq<^Ul^Y9{ruL8t`0ofM|8zRfTfe!eM|NT+zC$HDqJanr% z#uyn=<1n%0Ul(j1m42^M>v+@IeLo*QF@MMWz>^pz&3whjwO(+es-HB~76dHJ%E1_1 zM~@z5H2k)KNb6Qc%vzeBQ!-7XX~@&QY~|3*&23M+c}|A}iDRb*u@iYSz30*|90IM_SOw} zxU=SD8YC48i#M0-ZxOBZT&(<9X6t=(|Hy&%sp@n-^I>D_+sc|!WsVDV17#rd`}6Ha z&$%8QRO9`3+|mU z@rT>8S6=Jmbk|>6Y-2UB!p2?be$H}()AhN7MT^Tb>V)f;#?Fh(!zkFKwO)`jn18;m z_HOKbAfu3&02hK7bIs)EmUS)U`8>+pcv@r4#h6@+HxG=zhb)<@{ys})y8M|SnyRdJ z*H{|O!x`F+$=`u4kimG^&|om9^Hr&OZvB*h9HzecDcxxw>%F`ArMBudXX@9-+ zrP-Icsy?$#>}uaf+(`}IFinGX& z-IM9YD93_S^6)O3(aAcAp(tvs{-aD) zT@$ss>qvwv^ViF>)4q7BCA+(J4UT@bt;rBvd@W3~jET{GkjTCM+&O`;JsB-Qu~FoBXf+09vbNRU`ofN0c0gtvC<*~uGXG#ERr zh^i6Qt0^42KJZiQ^Sr|)%PyFdGaCgHO_t1J(n6$V`AET|^AY+wqWXcm5_rre1n9uj zSE1Qn4xt5XH|qf^{sO*0VpD1fr2_zroYjU*t41Ei1W}K>3*tE0|L}ed&k5IW&+mvG z+izm*wOiS&us~tcj?`!}PaCBtN|?L7jk5+VyPKGHT6@1=MwAU2PbL^D&Kx96)A_BOPidHZ**#!9k1u zjA}91JO^0Q7;j!nIRCV&P;x7rPRl;Bw7BAX^RKwsEZvZ>P%;?L;K)uk3iODjG|!*i zxN`C2t&_8|Z2A1n8!a*g z;8GF!6gUJEm!Z_60^Z@;i*|xwJfxQ-963%DT3YUi%Kinam@*B6En8HjfBkv&H{TBR zm9+EwNGaq;&&AunsIZ4AmP&_;ca0xd2=^_W~5M zEhhFdI9oFxmj(z(yoKx)VyJ2OlVVyl!7DH_HiYe-yg60gh0>jU90g2`y9*jZI}%3D zwz&74mA237kna9|mbL~<=bC>P5EHOZ=uZm@a{^fulEI38pj-n-F}r_fpD0K1sR7#` z9#xqcm92F%B-d#ypv~0XY3DajD?ZH!OG~H%*A9_&q)-vlH}Do37fJQY#}0y(nv#bC z(!sFsHI$pE!q(neJ~<{BH+73ja~1nzx@fV+5nrnG-h|LsbZ6MD%t)HEJ(UAu`SjW} z59XX?oEiSa}ZmHjUwa6zaoNum*JSGc8!x<_ie%@iAq#h6}!h(7kIBX~~W>V#$e#1O_kMov^ zR1g1i)-wj{F8-TBl;-7qNyT@JlboKfnEXCn?VHs#B~8~(9(?Eg%zEUdr};nbTn}=y zqlQGn18FODLxkr~%&agXB*})BG#dv>%bBNN%C*QLbuW5TY~LwBiXJy&^#;&Gh&&R; z>(r>4;*tx%#0rpmYm43+Fz2!`0%Nv&0H+|9>*^u=dnz8<D8)+%Y_UStu%>FK|A{$0A@XatS0E1Zi2NSAOzHy z>SYECuc6Q;KpKUx7#I%WsU!w?KO9C7G7h{<=kkI z*t#&WE3ba_N_EZJLjTv(V)wj66A{9Rn6#qhNDjV*bPk|FHvmu(tyu9e`k+vvT?7c% z1?Vd@I@;s26N!;h?YOCcEeHZDQ%DcJ^nUt+Jvk6{5Z@mVIu(SJkbE^D zT@1*q$%>ApyYEs{uY%;{Afp*?(cM?@uGB4~e2ImAdXQb*oZWl67hmjO>%XHoV8J#0 zvhA;iYp{&Hddszh>3sn`J8X>N?{WzCMbts~dp~xLfHkHkFw3p*?nJ{`E?YI(A<-Iwr-`;e8lVX1K6yONC zzwp@o1ML~MGpQCbo8I3D)?X^SxRcb0kAMYD4kTS|o&1YpOw}hs+j7kE!s}wo8{)I4 zReP;+bU)r1dtA(xb#$t}dh%X|)s_&(ynOa`vH^(DRRgM=2yvFM++eOH1>oaz;G~5l zmqs=*S_?b^oV^~Lp}&2t(z;t7S#eXYwO8heoQ&F`PG3;EHrppFW{LjR^VcB1FzSbg zrO^;0XBv$0`w>Bpr^Vq-q@DmrNp6CX&44&$egqoQ(9j5Ryf_!oK|Bn7XkfK}_kOsm zpqZg3-_}+v*9?AqVC{xa7QA`U-<{Kws;1qJ*i)*qSY+X%1UJ7OxRq!bVa7jGb&Ixx zFn@zLItcwySvVoy2qB2WNH39`1p+)0AsHc|z~gLEYFO;QEA!+hS(&k7np6Q_) zbR??%AZJCZ^P`0Kv5JQ~E^8}ZVOX6V$c=A^8egeUw1B!)XTzTh5!ed!<-Lv^jiaZ3NCv?A^Ap-0SmT8r=q}2V4ZLvYX)0ILVYp z`cKCdWH*(s;{AiAkCU>32J2t!`y|(&-94p)E_lPHs6dYQ?e*;!q!+34T@Z_q6khTy z+f}5iYMDdZj>}2kM$YSKa$%z`*n~Qk!9mpZ07I|#{~~H42$dm#+M2dUMi^d>$!ErB z(8?dGqQ&z~El7NHbI3jWpJqG#;ZD)r(gF;RhHj6Z>04uF7B7tOPHV&FUALDTqYpuQ zb4e+NZLS`H`}L^Y=0m^@t#HEEcHCY(NPD3zvXcXxY6YQuq|&|t`$4yl_Kja`19w_e-+$D{K?l}y^wxT^Gd(Fn_Xae z$dNM#1-6M6*iDN&8r){QM{Rl8@`Y6G@uCTxeWM}tTGac{?u5L4!*hQhXN95c1(m~wgA5b0POaqmkEd9xx{;Q!CD-1 z9>r-i$M7lS@btA5VC-yf2v@qIMicrN6)W^*BqYde901T~lcREwXiTKJpmwcnUY!Wl zv(VPbFOY6joI3U0lcg|gimlF@Y4Fg#gqAt`gun8Q`-#R|&kR`lL(+2V7ON(agocoj0*?NsljF@QXL&q;yFnWdI8UnI{UGeF61P zxmBbxJ_zT_p7pEKV-^*DkL(sc)@1qe_8WiOlOLC;=DHMVh>g4`ByPQx$@pAV0E#Nc z5iHiWdg8zW8M-mPPxMa{5VmmHZokAat|mt{ib1RA)r6DU%YC#}Jjdi^vgJ;(p+p^v zBsWcjBBTOB3DU2l9XxrXPC=61$8vlup2CSZC+ZH>)7f=zT~HvU%KY$>Ck6pi)eB=q zM4C_W6at|dT0DL+;4NjdA6PU@?pYya!2aLgj5ATd&dO}O>TG@A^7G>X&d@!5C`l-pG9_gf^JTHBIDJTiN)^BMOIDsHMP2H zuC@LmwGozs36*PUNvlsQG>OD*6-{UG8{ePS(k$QH_})4lO7y@Favba0DuqtS4!mLWh|>dw{zV#NUK}p zV)ofq=U~QB@Mj=~-lA}DEW(z_4Wj4<_75UcLW^Yt`Th+f`i5WU0Lfx|5%uxYR-jWR zAFOJs^wwQExy75`HC^jD%d(%B{c91D(M%#}H(N5ZB3Is|?7R(UI1(aRv&Q0wPL5ue zZWR7KcN)=O-m6DwLK8lDJ`X#BSn}b9T2Z}Pn3r0IZgk>C%Y+mi7nDR#U6;>zLu=%^ zTD9AE&_i>1UvJir%mK$m7ng1Rcw5n9&B`O;$(;&2iGw@Zs~-RpSAb<24M|ou;jM|I0|4zLCXLIcn)Wnx>G>Q!+IS$fS?%nzUmd5ny*oV? zcK*xOmK`<@Jr^AIJ<{REP#ZVNJKaJw3+zqyq?ehv?I49m2cIdG=D`eeqPMuW$>Ou_ zU<2!3<%tCxvp?0H+qkQL*YglmE!5-VLmt&AlwZ0Gt3T+a_RF-I{BnbW!7k23ll|s} zy60I8Avxp|)<%aM5mkF6Gq7rpe(+Um%B?+WDZ!YB{AsszxG&N6LdW(N#I^xm8=}T1 zUxO+#4d>2PK7FE3$Fz%EN+Npmn^T&OjpsxRZ#kTqI&<#}-XCSFIM5Cs#c_|8U7P zxb^JDrup(2oG~6VepZyODarZCvyi!~dGr`_@F3ajPk@^4H|krWhZG>VAT`i=EY|mV6pi{5SMOy;YW{P%zLLFR&ePg0w~f!5Z^ZXT z=lxaI>6QfxaVnlzsn;JS9jtXXNbQ12hQy3Gk&A{;KQFPDRgQeQjn8akF{&vpV&9iN zLXwW*9HJRo5k8pcL<400EaqXGWMwf`i92|>Mm?*}{;=Z3r86|YKYD(G!ntv|>o(gH zCFy7WIribK1DU|l&qx&yMS#K@2(~kDqMRtgC!x!HTjqs-9t2gs(ylU$REJQ(6+|0P z-s})vRu-fwf)dS7E;G}Yd*f^ut|(WtY~!Z&7v?&AeQ}3Emg5-rDfkN^*besiws{LC zG8Fn1P#8h<(*;sD1&6ld>?Tgo_Zy%nT7&Q+6&&(~)0}30Ml=-(F~W93GG3qx!HPMX zST4{-6~lw1PrSad`p^H9$ne)PK|87oudyb-Ne?)|vmz@SKJq?ww{;UuJ?iu6Ufhnw zGCQU;ZQG@Np*T>{bwu^HalAZv9!luT0+L z{yd`nLtm_hB3>i+_6iFJsQ>?u8(IjCwO-?QLpK*Yg9{`4Mwx9>tS zdxq#)B~c>noNHVW9x0yjBP48xJIhQphpc;h34=)#G3cg=A_gZ>pz|kDVNjAZ0+MQm z?&l1x+&gM7WdL@1i(qAcXp)J>~du;%NDLVXAY*BOXtTgqQ z-ZwQ#&h&6Gtw#q9Z*U4Pozl`I_rKLo z!55$~p{bMPgUL}Hq>a>_SPe!tYksKns)KtYOXnoVt7g&9)OQ!}+b95B?Nw+q8PMd0k40vUAltVSEhXAVAW zIhmM*FXv&$4uwzNYL<(nTauyO18Tq-nD$1+KfU^bCCh~n>Zcv6ESh{{3P;0OvGifV z(0L9GF7tOkIhE-$UwEi_9|B;Vo9Qxw_@XkQS+kgw5dIR({bIOf!5(I~;o2i1Z$lE{ z*CsTgL^S)ZuaQv zyB5?NbowomW2_f+LHUuEvKM5ZVBGX4V8Kr$YcGWQ3W^pZ03%z|h(8RXPvT9|UIPkm z;){a9{QO&fRLrXUut(d&Lmi!DrUUNN%)REv zuJFEPzI`OiG*8bJ*(%ylW-=e+T2#Y8bU58YFu)EVZlm^Tfbyg+ZJxMFG-5bS`BEk4 z{vrMAVo%NJrOmPt$*04O6*8`VyXNFr{${?uhRO|H6iu`_5J&B@?wdyb_t+mvHC_io%W^Qz+MMd$0zSBoW8dhI7jk2KTaTp*32 z%iWYxEKkIjM2tLKMS?B%Lo7><70_0UoP3o0cVg)YC(G7;*elLDxY9XkYTj!7TF@->*K>ng?6wt_c~OKw??78H#z$hXlsOT9vnFmr(;LpFj z(zfaAYp|c49AjY%TRyFi)p(bt@HKVHNZQZ6F<&eFezwM-eP=Kj^At;)d9295V1 z9}rmj5|xjhASdDMqEQh;Zn#kuyXeGqjlB+>)meY;U9RTI4H=@w4>Ygk=ANEjqYXqB znMTMMU~IrFkWIwaSrkXkgdo8f8l$~CaOgz-Uno_X=C5Bi$OW3C8YbDq`EoUNU7~wM z@{e=oRUS&XG9-VDUzXF=fBPAbHUdP`j&!KsR}ys4xjFwyMLseonq!rE+S6>^npCfP zIn3;ecUo@4pFMmZVk?Yk#qpJgAjL4!NKq#^Pj7yIPi&-{51Cl#UHQ@0 zGORYqJUpyZl#D&FfG7tcHv;FcjG&OkOO{yeIVK~#ANdDDI{)+hZInixQxf^mRbgsF z&dr)u=R+Gb>kZU=p7x0?m$}bzO;bH|x>#+@$=b7vuN%hc1mJLkJL5&c_-YKZpCfMk zAxdvK7nYu`t_pjS6aQau1NYsnqPvpWz1^cXOmA8q@bm5s;cGu$FL?XCDJsrPS;j;@ z-xN_j>c8iR)2$$?TFUXZ-Q<~rBqaqq$rKp*gCwr6Oy;m~=SsDoPPd5XHFp!DxW=f5 zB%Gf=_F<&N-DOHg&XMt^oIjUTk<2^~oDWP=S&&S$`45C- z8XHlbnji-vGmSkJqHhcsGJ|y};JPPBp0Ewmsj6WlmIs6~R#J&FOqzcI0^`$m z8{!0UeCh#Cqe+sckz$sbcjrO=IRz~vqK{PXpOe*4-1*6@v$wo%vv*tNvzZ3#KqT=K ztjzxlSi3l|cF+wTXq>_CuLUMX!pB%8fX+7m_ky6a6=}FW@aR^Jb_A*6)DK-*7hVYu zih;&dy$CoZv?}hyU;UEw4wjmS%BR#fTpU!oB>kG;W^efcU#>dz?z)clbmQ(ALZP~+j!I3PNF;8j`WDSGybPH22 zh!s?iS$K)|xB9?VR2MK`BSyl@dvWEnE6WYzc|C??q0bfM-K6>$-}ab}!8EylsIIpA zOi_5s`5>DeeOt9_!3Kf#UG;KiQ?8lisdcQ&aO!N&a%i|9h+^{{e#z()DS`xX^z?ji z$tXRekOS;k=Pz>5z2;xw(0xvMBRM$Z|0Kdx%|G4qg$OjqbN)gu{Qj!0N zTH9BFNUd5V{{^WbKZL9nx*;hFlTA9xO?)^Osar^p+Wuv*=aHezfoET9r2h^;{Hx>P z_kVH(*Hnvj-+3_gn1Fx=&!)gjVZ!!PfH2g_)8Rcu1NZDyF#j^$vSos zk#%}LJPbV-W66se|BLF1!DOP~)^MS7UJmj#?OR;CU!yZu^Nn-1!IqUi+TWS0cR7%D z2tUt!_j+jFjgX)v2q4}E=k9Q|Jrq-jeErTN{rdxTRk2GcRW){e*s<@Oq+9RyU8&Pz zysp+VLPgiky>0CC&%N3z=ek0PI68fNE2OnT#9I`H(3{tA6FsSEBb5Lk*`Rd`eoqTV zLF+|LJT07ZG)&?R-FAe{(bTjIGf;FiavSU6mMo zXl{+98V;y{tw^6JGyhsg zNzzUG6A-u?Y4C}|9@xc2!{aNnTorIEcN0v2H^63UFNm3VC7^c~O0PraXTmnOZx-J1 zdUXg{?cBSjYvYek%c|HE7cH`j04|vJ?bQAk*Xi^_UuHPD5A4A6FO2*RJzo76dPJ|a zw%q_KCQ%rPAp_E6pXwYmsgZcR;l}T83|={r7<>BMXr^nx{0+6=Lorq3_NdA167+r( z1l=zYc!(caIh$jY~jX@cwih?{j7m znQRN*5=vnFNcUXgF((IJ$D2*6p~9r_#*}PMH+NGtHL_dJDIit%`ajFo3?D^|_0yC4 zYS;a!oP`23tNj$zfpjGVS?q0s+KxrDijDH!&@hSa8yD1|5^4tuvB6MHOKf$(EWPe`eLA( z4ESqxwlA2&r)ivfIUq62s!SS3@+9PNO`p3_m8A31Xn*4nWpQbqYSZRW6tXEx|H%We zD|T$%=n#2MSpmvs>MH6x_nmv&D6@N|diAk$vx#5~M3B(LrmI@^KcLg_^g2rQw(%+C zPK#QNQ^oS7E+R9`HY#ixUvWgo$uldg_kKw!h%rVDcWQ#bpKRl#so9!i-mEbZ9J%rR zz@h57s>l7{`>{)Pl!`tnkBlhy?z}HH1tpPnZ;HdGAKQ`DAAFG*cp5kU=g~Nv6WQ>< zqVuhq;X_H<4+_fvP3IE69v$v?*pY|lg@Y-Q3v1&7kO~D#Dt*`h2?6^wnA zwy?s!%T|#yWjD~A{?>@F1=M-tngIk7dW`n5l3i=+wtuu0E9}tKn!zqDxk*cpvVLPc zr(p1sKpEcE8@I0y-f3=VSuB6thl|T3;tAr>g!uy5d5w&3@EA#JE!ZTuUh>1c`El{F z0~c%fb#)95HkBO6iEUA>D2ZNO`ncedb7_y6Mg0kK3O)M!A3lM%Lvngo*P3Mx?zer;lVoFH+jEe4FO30fd$sie#>#`|GstdP}`#v5o}zN)N{-ttPvwsqI|0mAFtaV+$b<;5w;`?>{*Pvsuo-xu7u ziODQG*%7(V>sZ#d+on4L&xL-vyPTQhd=+_FcW7--j5@wG_1C{BORL$^_Vr{A4ZoNt z!TDtm#k;JfZKS4~<$=T(t|k?0YinBslU3%@a}k(Vf42MgH>|UEtoK`K9k|jxk!jQCtucInIH0(eDcsz55Xo@c zC_Zqp9(gC@|H+Y&)jiR-)Apya(%p_TLSjL5* zDwc-hJ4<-*o0~2$(H8W|m>}^ZkaOHI}z`XzzZq zSng9l1bQJ9l!tW@}pHUYkSW3_EjuW z8hoQ{RFm_3!2XOzs@B5u~ z{yDSOcI~~LciwuQJFn}y4<2j=jYtCM3fzS$i14MY8wU zUH)N4WyZTp3Mz_0Gyi8~ z8__EqArXKOtB(irJti2G9MVByOCIpB2>@I8G2gMh7N)4eIo1~6z?@3@<7OQTYs0c6 z=>?_4#E;g|A#C-r+|moNc~&tF3*DT%PcV;1k&xpGPwkMaP(*o}eCMo<`P6xbf!{t8?3C1|Mb{0Cxd8cgEBpA^Cy*n~Y)mKi*L z{H*j^o;icd+8k-@J2pntI*S3W(I;k4J2-%87y$GT-en@t{|s0o!r4xpW>w*gZ) z9p-JlMP%b(QV%?#d-nNHK*;C`(A8RC7IZ$qSt^jRj>^HHp5W|c1c&DXHrr!V+{PGq z4A$w#wqGmbojXdCM8SM(t4godx9DCkUJg#>f?lIri#GKPZrq0X!i)8n`k~1e_Z{K6>rD zDl4W2s#G;adDaq>m2kyIOMO4|ThgP@;j03vQ8uREdu2Y1>E$i4 z-ePSK^921dWi`?IUApOkZnIM=OUR8#Jwl!CdqZ7zUMahq7LF}I6qL(3TO7g>lc){{OBQL#<90dkZl>>(yyeSWE?C>4u z9jKaQDA_MndV!2vh>6}Z^p)o-4+<&;)3C^-!@f-caS^95@-g`9W?)9t<{?_m8AuCe zY`mpKOWtSu0j*edajnfHkdhOW`};}1zLKhFG5MAe7BhLmh$nbPM-Vc@SDy-5Uw2M5 zRb8O?@(M{L8fk$^U z|Jewl(#A@sD>^5a5Fm5m&7R3lGf~g}J*k&=iM3G-R~qGt;DLw6$7jO)D=r=$Wq3i& zpx^+?v8O!|Rlv5<=HvhbILOG$ADt`I58{Ofa1*gCM~@D^i{3uBEn{tP`P#SL8(zz3 zi`JS$zukx9u(r0T&cEl{NJ5QDF{0>#ZqVL1oqvx!L@{eF91z(ASoD+@fr?V3jS*t5 zK|lapzX~h}P1we*pc)gy!>0$qAO{+F0?6*NX-;}kZGrex!7{7YM%ur=a766Xlp7KnCxOswF;H>F1>R^O zb`jzr!1}Aga7u01IMe(IB9`IosoopwqFVbXr$V9R_7Di@=A@{8&jOo><@qr1%Ol@4 z?Jg>p#ZWWilM}*^@de(xg;#hE0YsOM2yzq)b~%B5#}p4BO$jfV#BVvDJbHwQDIL(% zO2HsL%mw(Wk+B?*l}Dy+k9^Nf#+01+^yw2a5gfW}A+_^VTj!p-{pAa({oN3Wo8#l5 z8AfLQ7YrsHvmBh!{od|#JdTTk+-L>~EsIW_<)5_XpAMPNSyXx>huVqr)#i)|?3DYy zXZLQd)2HQt*rWrj2dEtg&W(N7giQWjMiP*6kkG@-W&`xNa`-U7v6hjX1bM19*p}Y& zPWt-M&~zmL9Sof5TY!vq6xk;O!9W{Cu>rlJ44O~&t{GrJ{GUF(rmXB=SyhEAr<@-y z!6!jUB@{fykt6uM)GB_&jn!(0jgkc;vW<5LwC|b)XqkhJJid+%mCoRg8wIXUABQNo zKnh-Q-_MI1)Qe)isJ$L75IJPR{=5qta~U*NW}z2|2}VYy091JN-f5(`e=3a(s^$Ix z0fOgbCrhF0K4@@^f#EIeT@AXkYAqo9`WEDwe}Q~WGxU?vz;UI4@Va#tcB8PDE<7d^CL(XEd$}OUImrp(Xu9|OQoq7U}kmHcsCs$ETNJ<)k zUnW6{f#?Gx8oM=Lz6AaBuG$C4G-a5DM(A-H<&WW=6Ik83y=<>p-@lLbD1{sY=4Y6C z^fX;db$2G`)Z*i%YlYQpuKFTL#^P_%@1La0wCF}Dw${w?az8jJ4{d|@uJsqF)7QRS z-1Pv+DlH%+6dE4R34!d`*u4_W43Ec-?X!d$Xocok>i(_`30Ce>ReLFX+ZWO#)49aU zg!@17oT`R8+^`#V#cu3fWxR~q6?O$)Gti_`k;e=T8T~T1d{T2W2U>LzvmJ(@jzUse z`6PHoaUlZ7nSYm6H9>%yMLsv`h9e!VYlFxRPl-jsF~G^|*L?mQ zct>LO@FH&6P@^!3YvZgwo6qy3N>ls2fgzl9NGV`9Y?h1(*^16;>UxpB#TRU7}gXB7cv3 z)$6r4~CAi+Rv?YuY>%@aSrG>C~HY>YtEXCVrTm%DK| zmKg;aWAvv^Sv!kUE&~3r35w)${B>HyM!>39fSQpL;%Y6TSCo_eSR{m#NVc1AL9%Z| zgHQjkdK%8FK*{Hj8#L1cvucox>>$wF{R$4TaqmvHGiP8>f*1}|ZrorP(U?GM+SDGz z|05q;9m8097C~kS_WCFwNpi$_mpjz*V;}9s4mV&EWS&^NO+5Wz))3j$i$`NklV%rx zl6u!Ue<$CC-7Y{`i96(81>fJh69iq8V1T49nM}63$W&8PlT#v`ReAh0a7bBjv!N>&Z=rHy#~rrs@1@_5lf zv$PkN9^vC82_GI2e*diBUx^&rXN^24?dyH&7no6x9zALW^>;8y7*MZkIjU=T(>{Ou z1TI0bOyp*Df?0gf{A9${7s2uFYHx9!d9yG-0rCm|XWS0Abda@Er_4S_|n z;zTLCY(;&be*tOg9ldaQd3od=ASVJ?3>Ne!=@}R-oSzwSi;C(12M32b}DJ^}Dfh4)E5J2+8=uDOs}ntyLA z%FXjAYRR|!Fn)%OaQ^nR-`)+gVKwrT<8_&hlIkPPif@f@(GvMB|9QF=Fr#Av+uI*C z={%cq@`rN`eJYxcJkv<%v<_i<3Dpf9eA3unxo;^`cp0u)jbECRr0@I>2hiD^= zym4+zCSP(OvX40B+2XK9F%~v*sJ~Kni&}T3fcqSQ)H8=Kt;rEaHjVVZ-FnEVsqB?W zv7fU;VPPgoHpDQm4IHe?*39x%tlNfX^P0_o68XCsq1f?Yrf%KS56Dq{PRP=k2V$Qk za9tpqH3%BL9?s!cbqV#?)~{YGf(+%j;py=nt4~8#-?1Ipml$LW{a=cXI!qY;N^)h~ zC!lvrz}Q2>qWLO=)O84GDyzJMsn%aGkJjv?_yK|<>aQscP{jNSu3Qk@1hBg_8Ab0k zfs)`XFu9RR+cL1*9l@+13?-0lXBDGmjiaUX^ov_|uW_n}Pn0ws+R>Hx z$h`MBo#vSJ0ZS$>^k`d*`TQ7GjD}L>3OIw>znFqV@4UPiD92_%U+E2KVVQ%Xmm9>B z&`J+rX#+_^BvHxG%l$NDvHili{08or*U+2)^K|w0SiA&>=VIb)hb8AP=Z{B8>3XCW zV#=3HB-ipyo1$YCn2 z;(`79n*b0p-2Z38+dhDkbzQXQM!PR4tIP6>So2A*W}`>fR&(=iJ#iS#SgP_43W6kP z>Cq!sri-UvFmOQTc$Ip6fG&51imq-th>b8DFQj?c+n=%zI$=3A@K-}d2{CV!1D_Qi6jA@_?Z(?TF`KNyRDi4;-GE~_iXU;xH7W6*9 zL(c4ZdF7R>%D1bJMS6~ep5$0e*bsL80F^nvQ^E3=yLAxPiYlZ=a#Y~f>mnJPHHI`l z-suR!=kamgLv<%1Np~rgv4K303mCP^n=x8;3Dedat|477|JRPtALSVD&|aO! zO)X7mrW^AsWaaM%uCxkYVv_u!@Ue+6h#e z+jt<2L4?opKvxlNT?yK9IuPA}jGVP|(e~n|hsC`-jRA+-W!<3#p`>}gy!ql(*FQ$X z4JIAqyJ#cN-GI2GnkDAiY2+BKW5|9Lfq@vt`#ZYbr(ks^i)0}ZACM7FHgnB@z=+i4~@68 zQZw`9-@WO(jsyZeHII>!*IkfGgwf8C2<8ahcH4wq=@I_Gt*ujG`u#iUxN&4(?<5h^ zX2|y9BpdF&!h}S_eGj@L%12RuzYKSp;B#~3R-^^9IK@qIwakzQr2x9xDFV4YSRHrrXjk9G_4+m_Oq}`QghIQ}G-arnV za)bJBU&P{`?Lv2<##V53#o9%X7qdJjM7P?RqW~m_$14IQ>6Ok9NHW?ed9TEnpI%%V zCY5`dSeyb&-It5R`~|3zgUHWPK5`r(v3xHW%>;6D@U*g_0>w&@>J(4OLy}|uVQ0kS z4Yj!7iDHb~tOhwq5kEC59R!_~l2gK5&Uz;)j|YWH_y>+#RlavG9ZFzfad8=tl{W1>x%E?q9Ecl1DSxm@)$yVVUI$3W~*r4fWG+)I(cSyEg65CWEDS5_(GCveR9ZYVwK;qx&r#Q$KuGNyB$=79 zu}rP0nu;*_0ZK4#L8BYMJvo)$umVXRl&0hX^mTxiw&0I3%~rhi`edaT>m|s;!CyXJ zH*E}m$#lMZ&6?P)5@C2ooXJ{{TR=_!o&55+SjZ(QCzX*NT=5V?ZmGQ)tegU*P1h$x z$$2Fpfw6$0x50SO<5Y)#vY<0E#=jlP^ts#F)eY7P#!U4-*oAe-gF;Vazh`H`g3_-> za)2xpT}9p;8qU8L6~)et1jy6ZDczF3RwOYRftsoo)BqfzQ}F$d|EY-h=OZ+2l!@yW z5*ujJ>JsuQRK?H*UBR8!jk_*fsN<^AOj43v=X&CY!Ida8ODaQZteLD_RpH656sR}STi+s@k?24#GIwQ%<1aVV0-wBU@A)iJ!f(d14WY0@p56(@4HGm-(( zoD!~D^_qT0Mx9`=_=Z*y-cbP_Hu6M41jVMv_#qi2($DY&31wxd@2AWNvLfe1J_BK4 zKnP`rfI-+fGc*bU+xlqqaUA>j_n+a%+`oKDdei&#F!s)!iPbt4^B$?(dtc9RpOJ_1 zA^E{R{Byy~scz)*eR}U-4>$Ra+l}n6WpExN1gHy@%``|NWvXtDrBJ8=BF3MCL!rOF z-zj1{mCVe)`G{%_lHu`QL7d$)bVs+N?)nv@Td!LZADukV{)B(Np?e(qHr^YSIli#m z2P0x^`(9nbUcdfzg_}nSUaODcmPy{ha`f1-Bmk!v=^EaD0#zyk2>grOYrnQ*$BsSU z94|601CZ@r3T4~DCHI}*M+?0hi-+EpZOV=#O3ppz4*|J?q4BH6fFLh~+jyJuAP7Mw z$~zw~b-d^Ao`;~O{hc-cVdS}aFd)GLG-`CA6Pgb*+|JbIAj6WtS7$UiGK;QiR2zK! z^oa-3itpdQJ4H}lG8}g_BHsE_%i1rtkR1Dq`1LiUq5$sV;iKohG8eE6LZ3Ls=O~c{ zdDdL0G>ct&KC~%hF-Lx+7B{Nd;L!7}+np7t%17+(LrH+z< zUoff#nmyGuu$`tD|2)J4DG4m+o#x)B{1_-(K)wNPEcO0v^XWTui)ltU zd3x5d(ss?AwMVD+Xg1?msVlsVt?Y0*>Mny#n6j|n=MfM9KD6nP;A^*KEyjZep zAO2eg;f>;RqqAF9#nt4FQi2x`)S96=(~=Q0I}m~;dST_{KFo!L5V$q#_J3?R{5N9P zPW}5LfRb(Ao_*5|cnBge41A8JBNAKEBIu9YGH|7M!lYQZ^lBQi z@kVfxTG-BAgq6vNQ|>Ajq<`KYL0%r}<7p(rdiQr6@*l{jr#c5OyMB&&y?k9?zX<8U zNjP5MpIu&v=*SCFUVh+=07F#P-+Nf+nLM8zNrOmHDc=GONe?77P+JTtp=2Bsf&53*o zK8u-S_d;IU`>sO{cYrF$Z6UnJrOIK&va5Z92gAXOiI&iI>)%8EhYC6p_^)pW5O&fq z<0Pi0sBvKsod@)}R9msxu`r8>{w+`*VYTL?F2; zsYMb5 z<;QcG5QEM0`ylQW`cFxq4+YU+xdv3)=U`x>QZT^W6OryEG+L2d3lVd|0ci+&S2;zu zHqQ>#vv8B(`;YcQZa-`s9&at69v2MY0qf!~&8^y>eW^)w>XG+uH^!5qMUpy=jW+XY zMG1;wU!*~ThGj6%jsTxZ0WMFlgHvQL)U)^O-K!kDmxd_?dab`e7rqrHJl!?O=m9aP z|5N315_eSAFTJi`*rZb86qV>HSbELkJNCfA5Sb0b&6-K4!^AzyFDCE2i3nO~WfD^# zgRvh{+O-#YWI}c_6t@_kNeRyTc(!c0y(qAdZ{EBK2)OE(sQ~9O8!ou}RUTw`rd&LG zpt-660t=y7P0gc+58wRbQKSYNW$kx)W5OdRfc_nm>9w{Tb<8y}(X{)YD|9F(q}cZ^ zcjSV&r~Yyq5g%bJ>iWrh!@zCo68FXSi5kiyosdUtKA6Q8AO|({IMMuG0defNFdNNm z*yekV!lg?dEfiw_i!Tb|_v8e-%iabB(d>qe@tmyinp$2z>DDg3qV{t3%UPrW2Vp{h z)EOveYo&#EZ^q4C{1s3a4OLfB`CHVg!@-HG(=xN@YPg9x$T@YCy5=Gq3>p{;pji#L zqq6$eE${EMZ>)o(Z06<3$@flHsW!&sT0ufj8KKCYz@~__zcyBgKelQoL!?@My)%v@ z?A`VOErjO?)+{aS$T}r%^6uX*9xMROj|^IHB+z~ozEOcflRzLN^`wv~KxATZg6Ga% z1EGvIn4)H?n`-n$e7EiYDjdkU-Z53?)fsSF>MYbaa459cQHXhcc`iht{CdrdSaZ#- z+GFOkcS9^o$1VuX+7wtj=_1=k(iXujAul={nxC&;ou(`|$S(knx&|ww?kmW*L&+WU z;*_*L3^^z$!cJCgSD-5tp{UyHWx3P)`7v~ADrsqWc<*{(ov-8 z*0%!yf2l^&3L3*};1b=?S1xIl@KP?{#ndgjJ0>=h!=A^8;X!N+PzKv^ zTcI9`-eF=f{|Oo$^y7z0-(-vper<)oAc6;kMvr?VSk|VBT*pp60k!2NGbG_5VCqZ3 z{ku;BU@>BTf@8Dj`?>L^Ubu<#VN}fokXBn@SekPNP|MYP1_S?Vh~rbkab9Dj>5{V2 zIov9H^H-anSLqfF3a>%N2Hr!Uyxn1Ov`u}aL;EfEV^4=wj#+@i7Y@$jdZ8&?+}vMo zbHIkIP^7FXsD^m7RME;(uqz-lhv+!8z${}CbJCR$A3iumY~SanC?QgtF}!f6$?(cb zJoKu2JTw7L7Gg1@3A*5~6-bhkklJ0A$E(HFg~ki0#_?=6+~y>m-{(^bwLZsRBasHc zGt#FHe*7$r?hc$L^hu}8QVNQ31*@~4*Gt0by<HHBO?%emedLLf=d>&R>2zoDG(VAxw08W?CF+5T0rzTRI* ztO*pHOF+?rI|$G!b92Pm*Na$?@Xy%I0jC99Hwf_*Fd>7Bo&5@oTyFvLObfs@IoR1# zpztp$yPY)(Xkgp3ZYz@G-R-KpG&CNsDF?>hlUROnLL>WGf0C-{cc_YTa|2=$906&r z4Ra~hk+Q^GPB4R&1`?Yn4rITIVe}@&L<1;uDLfe(@CZH)@>c|)03aw8?~Rq-Ihe<0 z0KF;_gfixugFO$7ATGjZ9uEH{aEP;5{)mVZc3$3{rT7<-u) zudU67Xcc6VXPr0Ju(_sAx=1zByL!uXd$hoW7X$zeRYAa1ylQfU@xh3r`1sF1zhM6B zH7IsL6!$he`y8O}TC0w_A1~iRbykcXJw6YEbZ)d~q3J5U1b!SsHAJyx7;=|wlshbD zI}C=A9?>d`b+hl;*NH>QQa31jPWa6}j}{mdkpd1n>GMv#BJQp*$U~ok*;Wcy&NLX#t`d zsp%uYZ2%gE-;wu1%6|a*WrwgIM!ul25N!bL6*@u%9eNVTydiO_DHUXS9#5iLN84p8yF)def-JNod37*0Qt90iF2^SYU{^ zg0w0OY!!;(omJb$olLQrjQ@%dTWc*Z%QI8|081jf7lI?-wOG2FvRME+K(H+aHc=5U z+eEdRXqhEawH}`)rFt4UWy$s#7df_f%wiBJ&;I#yq1%&Z^ca~HcQ%L4gQFb6c900}SQh$ljWZq!)S zmk#+2=*mFg@)Ow`plW3daJA;1jkQZTVDLu1zVU7+(h(-dlAJ4&M){nc*;b$GpmmRe)aLv1_*puEnX_sUbY63W9WZ1 z0s7bxS-x6{3$OQFpnTnjny2GKR(7HXWNOe60gB%SklB)&jWtGEL%wAQ@%J}S&xe_Q zzTxuXvLD{LC#c8Epo3^U(x&b!m&WlRRFb1U#2gkiW{ z49{jl?bFDsZkr{{hk=N>BfnOq#Ce$5m5=GjP407QIQ@#fHIpBTQu5x8BlJDf{O7;! zMk&7J!zmIexSlo{ox;Z^Xh;k;)(d94orgHw0FI)Xf$N+Yp*;I3DI|v0FZU<{VP^lG z{lrpa^pqpCK4BTn+%Tbh36BS|8R1IQwLU|*38a~q z-hjs(?~Ey^e06Eait-+Oh?KrPNLxURivMR0`Zuou)#U!_%;wd&Jn^S#gLCHrCyE`KyvhXuuTIm^AEG*+C~fA`WZqleH424jXe9tdXKn<&V~4_|*!i#ca^=o1 zjn7_s$yjc9af|A>4KoY6-07$n-03O&3GR|)R zBFKC0+__%6+8ihuG+IIjwixtBt=?`$s%BxLk;4|@CK40kyAV4!NsRgH)g9livRSP8 z=fCW~C%wqCj0g?95q|t=m*v;j11pow)hfZ{J~2m&?~;q+U%zBwA!~w^=>w6)TUeFV z*bTj0YYdKJdrwaeY^TTtFrWoMUPcJM0A|=B;9P}`$$%^XcDH~t2n!9xLs-ds{=5dX z`Ev?x2>#b1u8-{XGX3JQ3+7*r(B;%Nh(1dm8(VE_EaR)sX-!jo1r2ct+X{zn%Wi!M z&yFq``%f*`%8IEL#4meA7+E9*leC{b!}`&1{&hXRFFtMIk@8>*w|R#!d9DpATB`47 z-&IpOPiSO8ah*a;?kDfK9W1RtyGJne&fSg?h?a2~Om75GSFp;0EV>v_aYzxk(mOjK z*;kYj9aJ!0ta6rgihSt`X$TbMfDBaq5-LkEb8d6fD$+5cgGN~fZHKBDkA``>7^%N` z#4_EUtLQePigc1%wCEOBrY@&qj+gLK7IuYm(+)uIKy-wth{E#p3F`=|})c8Syi9K3;gL0mAdEhHE}JKHs9*%q!A8+hS@e~(#ranhXL zjaO7dd-e_yy(UaFYQe<=`k09((`~Mn+3#ZkcC-a@L`-_c+mzbh-^3CdWDBUozLD zpMF+S+XQq;rSZvLlk}4EE}ZNN>`m1y8uj(VsK!KdOBDB+ykIcp>REKj^}+@VJHEAi z0$YJ>G>@K<9 zFx_K^mRi&GrZx!Hl;4s2h=9N*+XHu|Xhs`@L&)>DXcGiKZAwSIa_WqC<_*REsg2~0&HrqmbL7L)JupqC zXKku#^DXNKwRO>uW&^y_ri=qH)dsG}DnMx|BgN(v@713yZZJnP9|ZVXvy6Ku>^x}r z1H12z>zn_)YVSE?ijeh+hVeQXb*0Y*!@*JDdUI5qpX}PFa>90FeYI_-N5-Y;@8co5 zE-qjDD@xl&TZPU#Atxvd7kg6a|C86zO-T0;uce;p{_Y_N*H}ebQsc^k2HMOf|fcov{_%JbaN0%l%b*3bJq|4!Av|EjJGV8K;kB|La8> z8?OwW<+5JQfcn-A+23N}92rJd#RK&Tf+E(2<$XpL79Uug2DRV#QcZ%7Vet^pI_^}G z;utHtUeoE%2mYxH*ev9+vr*MQn4*u%6%p`}te(m05yu2YqdLO*>sdX2-;zNdfzR3P zSdRr3j1YtkcOjV!W`}?Q`mWr)k1Ltp>Zx)92i4 zSESEYuX=L`Y1Vw^S~Jo6#@{Haz%Ob4$#7_Zexq>bF3j;JUTdQl3==Eo!wX8+n+u30D5#qDJf;E^ z2}B7kWhD;Xj+t5FS)>-0<7YDr&5IkXzTX-G^c0yUKF&%Xn}4_edp7R=Wm33y7?0@z z8UYu*cBx^Le2b5Qjqc*mH6OeFRYdq67qma=j}7&(DDKF{sl8m9`8*5L$MlApn%-vs zgvp_rIq5tX+&}bJDUnPKH#uU#N9^g}p`Tvnm-q)mJ4Hjol;_KrkdMws4GRPooJ>vE zt*R>hCwYFAdaf@)CuzbpJ${J#CcFAx*RJ2!%UHGgsI%BsABmOr#yPtDke+kG0A?GdWwt06)5+k0em%6` z%yS)jTuuF-+T!-*HzyOIme^fFz?IYy?4jTO@Y4IN@|rT4`>$P%0eaZ^y5y+RL=xWwFB$&et!3-$l2hmXuCMKM>W3gGqq2ZwxY`S4LB~B)wnlT`Zg%RsA)Y+gO(< z!lMk$0jQusP}B1=fSK~>;}MvsPfiN-DEoE_9j(r(LwEkDd?>qdFOx;;`V{`o-4yX> zIt2eB&rYkMJJ#0%m(ZTI;X`-K@810=a9N(mJAy|2$u7QH=@m!qHK)gSYTOdmj}3eQ z3vnoGI94s8!CS9uu0Ez-udq(VzAFg-U>6};r2ae}O_%C8TDgmUSj{zJ#Xho{j;}VK z`Bki;Yu(hWrAANFtAbLq*o2=X->Djg4tq7i8tXMBrLPHda5ovQ0EtIm=!S{xLb4tg zU9l4PAt<6gz=OM%%{Thk*KKlvzV<>^(TrmRo>@{wE;r?zv4*C;e*Z$h$Jnq}b$j7g zm0)4-a&m)HX0*tR-_E!;%hn5X@e(FI2OF;S%dX$-5mF3@t`3a0Shs?avm0JuaC;RX ztIs6%jHQ{X&h*5}8jtk$I();Zi{biTzph&u{)Z}8eR%GTA|b)+!a`V^y6n-Fyo95F z-B^#a0}I+Us+|^nL;vR|5+Onv=;b}thYt_g&EE^T{_LgbWGArYm*?kt7NW&|A)(#L zM*9&hD0}xZHa6>ez1r*Y3|E3wJ3oNVh0w;|hZyeABmyX$<|9WxjDvuWj~U|o>MEZc z6oDB;FdW{1m6a6+7Q8_6N9e|N;23;LnP}`E=Iij2UqAOZ76Sur=(u;+K3dU&nfOCJ z<d z4AawV*&50DxT}BC4;R1hIq{5KdN$>G43xw^N!gX}4R-i$GUU3SM$}A+LHKj;TvGO^ z0zWO){qEcRmP(umeyMWz?)BB?x)|Xv4v!Deh{hSko_&aOOz5E>c*jSV!fUlZi|;{M zWo81S50@H5N>*AK76J=n+>1-S@7NaHZoSU4(6(;pk}h*tp!@5Zhg>!ub<;fD&o_Lc z?k0MA@BLU!9i9HR=f!I?jYD;&PEOYQEoSf1-T6{;f%$8+NUe95qfxk%kz292)15m5 z-u0PhmRjcGH2TAaVwRwtTbbD@> znnYC(AKEQ(`GKp=aZ&L$aof}0ubPU|@sk38%(?t9Dk>>FFtBTQwL!ApJ~-rd55{qH zXd`{NjPITiI^sz^FW+C{wl_X3vz<|q6PTnwsb?E+UsK}sf+4Ba=zf-VjA)W%=hru; z5>9*@gqh_aHqy6@q2)~Q&IyuY3vTt%rcBioP@ahOy`7$kiBE_+P~kycbh<~reDg^e ztsk{eZ`=hTRTpW_q&_p7fzJf50hpe$q--PD_7 zGQ|54@-K{FRJudY7=^EfnmR>OYuBcC43+8BhI2LtF`!`fVZ}Cp+d*j;hJQ6d&OFA~ zn2i5l2)NV4ocK`Zng-Fcl^8~O?nV>yZ}%M))9f!lPl)3u{w2ANIYveuE5>-cfPpf(3PvvZV-)TO?H^<45FeU zNPdZryjyumA;U&*XwJc8Dznft^nGpkot=lZ(}sMfyIi}!pKM7@um1aWacEe}i}b?5 zC;Yq_fq|=Wu~=-K8Q;04(xHY%r~8TJIi)&B`w#I;>}n)XDI`cv8gJB$w_J~enBFq; ze6_#*S+kT(u7>{8>In@j`w!7HP}3XeST{{{jgrYlRc&S_VN7=q9;~J2Rk)&L*>;Ti zbC8O3>)WVmY|Hf6y<9ch>>?ZlR1Sm8(iy>y3anzN?(W?awfyK|T~6s_^^ccx zu1DKMveGp*SfwS4EsrJRZ)ZuG+zAy>iWE~>{9XN#<+i*$9v~Pi6-$#TGdbC=gfc1T zp4`SEr$$~CuwD&unVuQAIsF$WSj$$dzt*z2R5%RG-LW$HUS8T8hSNB^T5?^f6SFSwnCKPu=VF*}8Hg`d66rt=ROj6azbBQV|eFy*Y@D+^5uTDss z4=US#y8&3h+(wmsQJigxH8f7Hvj|OP3-JGoPJz&Q(hr4v51+us zStK!iC0&Uty)+;rz4NfqE);sma`4AT&e?^*h@mu-WVNBu$?<}iEKkHqc^uqMF%H8p zr>2@P-nnk)UZ??P))d?WAQ#dWnuRHe2&hX``Ahmi}NI$*HR1{5m*v{v9DXhE--^g5ND}%C=eW z^Y^mxN3{O50#~mnHA-pD`xu4s^t9fr)%5rs+0TD7zEo;Xs(8f9aE_^XI-hI&HgBV+ z+fVmT5leQbYYbkx#=k{UfC4GMLCh!)f(1=iOmNVjgt{%X1C`;_imJi z>(VsZ^^2rh&9Z)Abr?icN}uAYrVkI?=YM&!T-#4_qFQ@pcEC11zGrWkA7mCiSVh;x zWg=s_ku1ukBEfqT!U>LQI6<)cz8}@Pa&A zRS&-tgR(hkdU)zF1?^Un#(?kN_5phNscw$sa09$p zZz$lJ9$w@{mIO+UQ0YClg9Q>E=sO?84ITOiGPuH78M-4!=mD(_gl7ff>h$yZbsMGX zU(X2^V876P}ysHb7KB?%cs!m;SyA*n6P4$RGv` zlt6f(iCCw4be9~f#~QvgZKQyWUGv)t`{!N88>Mc$bWaIMt?7q~34eBn*1}>cq@*R2 z0G^*PB-foxH4sDS|3Fl;XnS8GPkB^*R5WRL9V1k!Jkq75Q7n27C|+PR^2|7a;%^Rs ziXh&Q0HdYm|MI1do15D^W>kXvgm@X~2mV)i5QJNZNE&5&uP)vn?`2T2U6Kv`F|S5yYDzs1g<8MU%t^PrGOGz%IY7q&Jt4YP%jQpqmXN+G z3bjgyIF*5cfp(nyGnk@{T(F2uk0G$Q1hYN*Djw()y~+@E4^h-2j9nQ49gA6qRLY?! zAKlIpT?)O#phO4IH;2X%<$-`xNQ<0uGr)ia{H#xHx7$@_^q`f~<059g*8{Y(ulMI&A?{OM!BQ63B&x zjE1E11)_#Pv>$Fb(A}7L*$-)7qDkcDWKbai;SUiEqNm{4hSU8eitK>G`#*Uch+C&h z14RK4N^0r_xVlY`$Yip z?{lFz9G~$$*NhivPtc}+Vm$+`oKfhC%eqU7h-gDwGxOSJ(ue}6lo+C~9}8*`y*`q&UOpMn99Y~hm5N%;v>g&Guh z)x4XNF~kjl)>O7`uM%Vd4n_cKefz$RDfbOhLsx?H;O^ujB!?9h6kPxOL^FbFTC9A$ z4#ii%&BRy-gB%D#D#d=BI6%2nITJ{beQE*Ct`iK+`s>JR2bkNxS3w87q4M>P$LoNO zI<(YTkX0lN_zL0&q0Or_cLX5T&?COZoZ2gUAx1gU0UvyBLIo z!>=2<&k%qiTsIz?F`YV;iZ=Am4S-$clwKqiVS@hso5U8N1Vut%v;tdBdM*@{fKE<3 zw{U?zQ9uMD^8q_7aI3xSlKlUJ*u%`PpIj=NB;e5C;MY@Awk5S&%i}nDTkIk7gpf_` zF`?MWSu*FZ<0rYPPw_$hzooovz_#6h@{BhqShzi)0Ee0c@;vLu>gwZ>3C=yRiKh7v zQd74ghzA4)3mPn~01bo+%`h3_jMl~9Kark70Y$?E?yj&h@v`Z~UI5@k5T|URp9e5w zyipkiceaVKTYpL8gIYa8^g$j5crT;-_f78_qxFlmme<|wrOyFYl(c&T6x818x0W*? z_wwo-r7jL{$qP#I$U!L7XI>uVj{LO6K{WalFC~9heu3beZ|}cB`Fq$N#w+Br2;l+P zq&T-!fO>E8v4)%{?rqhUQ3}v!s>u51|FSA#)~27&cG;UQ_RfzJ_8i2IW`=mJwIZ#> z&|A00GB=w}ukIpBX{dNaJ_MSW=rrA|KU8lQrOjm#il^xE2;dJ>+- z=0}e3-o2m*AQJ!^a$CT`TOsghA261^Kyo;=`NXH3{^hHsXaTLQ%_zieHgKd3lDsd_ z($)h|u7s_ee$GFiKtvAt82)=zsD!{+KF|aMWRR^(&+TeF2upyvO8L@N2|Ik>e40)a zI{5W#Xe<{*Y;xlIbiib7UK&bW@98;9X6i76kQqV`re?p>8kn|{_}^VyA8`hMyFN~NVV9zDL~u=X}}f$*B0LY zF#a!wBUfZ|UlxKNLwRxhUvx52v+J+&;T%4?sg+4@07qEo^f}iQS1@&-X0WSVtoypCqBbcCpaNxcE z^EYYp96Wn=`fQyI74ba}4bBv#FZGE=fUyTOrb=QW@Myz4_Zufu`tJ5rn_vF_dU|OT zJp?7TtMrF@^}<0kf@&3fp;ZrGYYOY!q#!jD*W5VolP6J*baV(z|M70$DGDk;cN-A5 zbxmA4u-&X<0hLFE(htuwRkcngT6H2gA4Z!{z)cq~TQrYvntRlQ!oQ?`p3!gU1dm(t z-X%_G=#-1~@_qaSN-^Af_8limUtZi?h=s=xG6pY*IrxJtij&iOAZN*M!5OcS>BS6f z%prCIW>KC)s>829suj=(2-nUM0FJuvg6G(I?xzV+$g1?oKv-4?%=r(d#;1m73+4)X zMgB&Dk_O7kfYukd$Sead{E9-Mv%0&S22IX6&X@;o%?<~OQ? zA^Q0jG<(YtxTpD~QFA|VW`OGOFWkP3ZS!V)kd3kVnSyZ{fF1xSYsU4PLvxchmENhT zMWfIosq~a~Q=*j$WL&mvzjk0hm)i!Vz-4#mH*T}xi0ae(vpFmL2uYwSiBWtxkU+FC zo|s#nQ#~$s_2K6EQVjnrGM@kd5f9aem^OrhpLbZdgz$6m7HD#Ttvg85W)L)G{I6^( zJgx=XO=c9zMgLz9XX~}R39|910OCrUaZFS1YXy*WaaaPy+X8e!508RE)Ep?LZZ4Nm z%5w3pK=6W~3b1wOr7du5S5Z+S5mE+!UIh1_Nc{9-e3Np3LOFq7*!rr`z^|`eO}!xh zs;7r-%Ovq;MK&3~x%$zj(Y9>!E9;MdP`t>YzollQj!_FwQVF{HH`j>I0k-)Q z6{=v)fqxGIZUAi&ngp@ZBvpdf8op!_SlT%SS2w@5DDcMy^qqlL?eZLnRyGnCDhG^h zs%eBWjMcX{)*wh-Y#xKE`2uZNS+l-L5VWoGBT)2}5HJ;&68I|sxJOI0)`8*g1g~tl zw0X?b{Xc8#)gb`)eSB_z8QGu@9*|5T(6lmKIL9zxFd*tZCI7NGk}2I>Xc!Cpd-RFw z01V9S*$ZT#?mFTu7LsR`zAQyrZmF(@EqLaC7(uyHpgRNHg}v+GrDK^=^@O0niKwKQ zl)0I7+WhC*L>YegZ+>CPRfcj4Vfx#Y`~LotbUyR?=&p+1nlP+eG1Vf@QG0@(dHgqt zvIw{iZ{>GekBf5TA44_^V|E@8K?M;o%>cQFI0{zk10poInP^JVC}9r@QiX$b(~K9J4By)b z0$a`U%huzpd9WRF^!=P4uVtAx$h|7z(avW{`seX#yjO?A!t-b6tCqu$R{h#sW~d&& z|Fn%BtwC%>H^CU#0J=WiNE%wQY7jdOC)n6*+R}=$|M@VDcto_7Rq9ygJKt6DrRg<0 z%$E$GqMILW*MQQ6-Nk$STeNl`MDKF*B1T`VyRgOkY%q8eyc6iy=q-9E;iOy zDt=RT^Gg-If4(op$9B0Rn-zkvf?oXCJ=ic7ZED3MJcwXndlqn9&oxcFrEFDT-!N_) zKp#c`tP(fk8Rrek^z{oG9;0^cB47kXaBEjz3g0AR1VFuXJafB@0gToycT)J&1ns>Z zB|8=o$pt~4@2FL4&Er+yL+2xa^r)TIf!pl;pisttf)uWOwZTr44J`8*rNEagVzn_M zLaana8)0T3CPh?MS#3W9IS#<uO zx2+q}^4hkQ*T~zEm{L#>N0#?kB-P>|H6@a1O}#De#dbBa_9Zi>Kz(Z|*oyq;u8oVc za^|M|%^$`m*NJbEu2{;;TbWVM<&)~#y}FmSfqF$j0ZrpK%(5A#qfoK6|ILtARc!^> z9y)rGqeT`gIo1)8cYOzOMc4!TKeNoQtT}U^ReYjNtE^djmcqf@P1EaBK#7&W zBu!y9fzW*z9)=wIVVSrO#gBK>#m}_Nb6EX9?R{rZRoS-nQM7Fpq`Q??L_lp!pwfbX zWLqxkQHc@+2_gbY&NjvdqonqtyJ6~3Q zc~}V#;;28!g?#x^D<$WhQQ@^n+vYUs*5{=+=d(-&r0Av{h8)^DVUoA1E4*P+K&@z% z`SlRECw|*)$5(+H26V1Vi)-UDjbu(0f)gC)Z3%eGu3d^~T&NDg_}W<=9Sn@F<)sh% zZer&fjExQ&=5#!rpdj_S%I{OXH=2Ucb1NIp7Z@iOI*cvxqOdNm+7`UtmGG_>x^)$) z26Ft?qtA6?JD>e}ezg8+?btg2iY>8L26Ez|UteH12a>`P-nT1F{O;MFp1vwczplzc zwi@Xs_%3U*hQGAwq%?>duB5E{E~N7D-u?*`&eS(iqd6&c5wbZTJ#=ko`l->Zb zn#-0w+^^RxZaDw;(cLfmv4G_-XUH4yHSK)YCN;S#=EXd_dvW?fe(PZrazIQ~H*h#r zz{oOHbO&6~-qV5bdsdc~w{s39D<@i}swQ(|%5nW{L#mlqdCedUjK~6YZ`d{rNpEdX zOO=a!Jjy>h-IfIoQGe6<+V++4CoIg&=5Mi-dur5qriJyRc9f+)u-JX}d>|B?kkHPVMS9<{P+vl|Cljfa)vcw&?YMAwbXu>& zD@u4}(c_c;ZrzMe{kl%C_t0o?ZKK5}zx7GN`SX|>7PSk}vl$HO?)dQFY@lXsF&^c)~=Nm=yFzAM7>?lWJoZ z=O)2@;i7Gs$Rwqc8$AQLiXyHWtpzPrp7;Gj1O=KLisv7E?TM!p2}M~K^opOqz>bX? zE0Y_(i0T0-QurkgR#_**CEQ#TvBQYsj!p9%Qqm+Led)Jsvww7&rBM6p#)@Yo;XwP# zi?Xj*7xnZ|uZ>+Ac;&q?87Es^$U&%e95N0@oj^Oc_373kO&lN%kvP4|b36B@(z|Gd zu1mA}p$W?3fTjEb^nxqFAtW(hbrTb4=gI*Y4fvcr_r|P#zm>yynWDV#()JHt&q5QY z)!x=P_I5;gxqiKeo#0xXjWeW-asE$9dt5Cj9Xe!w`e{M;^pU87gO%2EPRlOW`p#`$ zZQNMpYzlS`a6gjBG+#AAq12iYBW>Wb$mdyS850wH$*|L0yK|uaDZ1dB1V_j9tIw4u zB07=}jFY8mJL`B9ouhh#>$$crru!0q>O~=Ni9R(_eX4S(je}d`Rm-@N59TsKMFZ-i z!-e%#PZ*--9dyh4Ff`nJq>lA-^Yeu!)zknqbnFm9UzF5G!K*uO-3&58%l+=%h)5?# zjbvp9QUmo0?=kXILDLh=Wz~f)L{*Jl7_yknE^xFFjgbDb1~Z$#7SSlhe{S7LuW;J! z_I7t&3xe9s)88|pE1xf14)v8|4e>g8(UhgjG>(BM52CNtMJHdZei%pZiBqKN#!0xc6n+;<7G@gOLqkWGE%?wew4j`|kHg<3AeN zd{TO`%P~CLZs=F`?64Y1Nw;`jvsbCV-I!qaJ-cDITENp%*6%d3&VdLx{UA4Yw$vs1 z$X@f2;YXHs&RH6nw7VXE+)eytPZK2QIWJ5!DmCpr4nNm@qMhA|Fzw1q9uoL1*%oIi8s z-KrC}OK(!nvTYNi_gKn5G>C{yo0#hMPi@lvxlCN8l5dqKXo4gb>k9%?b53j*tYWlL zYuAa-S&WT4%ddYLEWIEi+{Mr7bZ)y?^G&WZ`H?<9JR4`Y>=9NX5K`%a?Yy8HM@ z9(reP&xcJWC|kKK_Bupzi1@L6uTj)4wyvcmcG9>hC6|4*?qy3U-I4ewV(sHCmJJd- zd68Dx(9}SuetF64Wc8b;Kx+VIqg2z^_5P0$&{j`&>^@v`lL2i8tI{IGWpcRr6rtEa zXth*Nt1^wNw`8}ZmpgYJIxAxF2jkO=#YGM^v@mZJ?(5<3|i^7Ao_0&d+{3}`>BMCf4N{PMoO zwzqfRtWi^qb$_DrSH(jJC#TJk}4c=7D5@&Y_ zmKSK|-yPxW-}qWoESbk6U9(tX`FXd7fYF)MB6a(CUa@v3FQ(2h@zT(LU$}w{gx-`} z8^3=wEK&z<(kvU>+yf@&=3PY#+39IG=9z|i%uFJ24_3G{RM1R4$T3}>AaKJpMCZy1bj3%J-f)K&~d1vp{$!6VKW`P0v5s`3hr2kvU z_}6zKzWb`5J@Z*F1BmN)oj`5VyscTLCV3Vtu}QSTIBg!g^n;T-;$nim)O*9vv`XwL zTa(4dZzWYjLMv|N=#%%O1x=y{Q)!%Io_F~krM!-HwArkfKWAtq%3pW$Wth}=#fw>J z%3YrV`uVn9&n^m0cBaG+8BYca+gZ8>fFB66QJU+RLrggAOV)hin#|L%*I61<(Yc;d zFezlhw~7V_H>Q&wS-jL>Z6RiLa>GXc4(9ORBn^%g9sTJi%TyZFCFYHndf&Z^omS7R za+2V$D@a8*e)gh?{~UCp&?-VJ^K zPI$)%Yf9lk?#i*uJ%t7f$zM$Z#oZ0V>+lx5JH?*x*-Qc=Z{KfJ{#(D2Qeb$3@CwYV%ff;-|LM8D>b=MI21iky3s^nUk>ELB zZ8!tE_nQg{N=E`~o<_I*_M0BFj9(^?oJs&|0^gx(jlM$Hkpf6mbh!y(c6~n?P3@=& zJ0f+U(>WcIC_zm%br>=ScmL*QVT(W3(5Hv_Bg?AJO>&$)TUjjQ&mB907A1<9Rjf6g zg+;H^*C7rRfJ-w^jA@1WqWksq^CX;AYqRteV(vOD``K<_5(qRaHaX!gedjLUD$Z>I zdS2&icHIa(RO#Ys*i^%GGUbBK33=F4-5H1e;9iGP=YEfQbUDutx*qO-IPGxT=#cUE zO=;h+_nxB*J$S*|$TLKGK5NVsU&voT8>FkYCDrtwZyW{+md4Aanb~-9M+z^f{bAJk z`^d@wI#A8g4+P8oYI=X)rxviD!C&xb&Q*o+QZV^+J`9_5)R}Cz=x8}qqdDQR+B45} zF2kXQfkEb~9*twO`^HUzI^z9~x7N;U2!};?yh!}`@qPXk^i}0ukvcT`ZfnSSK0x_-$3cN>m;bnqhBwsBFelqLC2D(4Jx3l~ko5un zdX2^1tDCH8Je5XlKgehOs?~X1%ORV`aDRi(4;~3QU;ucNQ8VIj3uMhVqak7xo}fx| z%Rf4)l8mrMU_R;9U@zkub(ImPlC&xb$+`IVqt_>H{xM|F`;%7Uu?VRzaY+gN8>ck^ z!C!>3{2Y}tT7)KMTjpICf*!Blsi3gey&)@k;A)V!4(U1@WGNhm9QD4kbw4Jm3Q3uI z+zc91MjLaJXK1!%3@I6y4z~}uyx>v2W%2r_O(;{h7UqBK{p~l!Qjgq@+B0o>&&1y4 z-CZ-0pfp}XQ>A`C;iuh6tDGLz-)5%Gn3`)C$3l179otYs#upp=m-0ue;K`ey21}N@P~k& z%gFq+fUnge2A1L?-e+>BqVw@tq^K6J>M%cMXG9y9M9x@AvOHZGfh8YY(nQkC63B%Ni0tvAXMqjv}DrWlpcczO%ygo|KlJPDX10V!OJ( z+ckW?u|J||!f^&mGS z94yRM7yfixHS-7rnCQWg!w4~m)Am~yqEG1>NXHKsnyC(_Kb}8;M6>1amkyK0k&b*8 z4i2rp`QP7p(I4$|kXnpLc%e!Ez*NqJ9=K9A2`iR&hoSGpdO;{h~H2YgJCbCr3o>ujip+n5JUL)so6oOFG3J4>6#2M;6r z(*Bf!f*4!vrIdG-<@eG|RYP<=^wdP46bJF@D0g>vz9xo}Y*b;~wZZ=;k9M6i8ekN9 z)4M$WC9<0^)s7g3V#O@8@!FgZE+46#^2Kq6=1q(Cr4~1IJx0G=^ek?peau7mq(e6E zsEcKL{IW^F4Su`CsR2Qp*`dmRBu#HuF-p=z>~G zE1!0EB(9(>|AX*v{{i0!i(2FCbku!5vBTQKRBc=ZHy=L2&UNl8onZoyi`4JUL;nR~m+`okttrX3sG`WL>=2^(tu1Gy=)E|? za9qv&@88<<2>!gp1dQ=V^{cjl_qrJrqnst=_bl2e@;~oDj{Vo4{_9(RtNnl3xidSz zQi}d-BX{2*IsV`O{hz*5@CO-22dRT5w3;w!>Imhgp~LIlx6Z%!;!8z+M$n_SX*a|6wRBW5Bx=60>wTN+)q8CtdStk zJj+|CM5rb5p^>huM{$d%&uurgla~ zrU8?oDLFaE?r%Hthd=XL_jUH4g2r2oQlElkB}|EddzGAP)79o@kI>d%A1lL#=EX{X zW(N1$_V0tnD|w6K!JfKZ*fKgnH8llYX&(t=8z|D+#~)I1yxHB>En-HVXf~mBvz>)` zmHRq}_dymkar#GSzBgf`K1KiIReUH}-Ki$z8|=(DlR9XgI(~=j+v;%9?2?7|OY5QD zSLd&)^%UIXglPmcFsi`IU1xi@g7{o@MZHhIdP7G5$~;|t1G*kug-dgTj_>d6`wZ@s zPnc-2srW$YW{nWhAL=Sbvgz#;T~czq5K6I*-PhM?%wMA9gzxiWUAJk6JYM+}sGbwf z1|B9`0{d8;5GwiY^I{`Hir{^z&$TiLNBc7Rb4e50QS84CsB4=#Xi(ZqH<#bTdtM8v zPwZL8=@tWTVo7Jl|wTA~-{4_Q#g80Fac zJ24X|j;>aNQ*k^zKeR=l{@Pa)^%TtkkJ3LynOI!Tm)2J#L<5^_8S%k zpLGhYV~uF9`;6!R^yoH+A{ZYDOGw_}qgCb%ay$6R`Ot*nAedyrsCotEBRw@@cMO|S4pFf`8%(l0VqsnquystGoGBWW zb#Cjz#htRS?+J&2s^k_5NB14wmtg*lB0`%qvB`Tx(1yqslK(#WfFl_kgk|B(%(G@8N9_}+>v7MhDo;+j*A|!I&(ZU@M38=MMaSS`|GKC!&u8FBni8?T- zWPqd~{g-G;yfcxupvccAU3gAt$3aK!aGm#w&r$R4<5VCNlVY0NDZEOZx-UUlVM81o zJPRDnu6L3payg%zPD0Zx!)4w!yZ-wUC%>tnaoig>#DO7Mh(`<{0}nW#YFNSZa6F&E z1MHOHN{J}%I!f`AcJN|_`k{jNzBz`~$4u5K>9fp6Sag@HB8x`Io7)E0bhE#Hxz!Q3#ei6&PU% zscph5`aP&J9)MrU?ywD=->{}z8IN+cJ?FVAmeF}|&)&Uf&}#U&U12YUmrUGtTmEl^ z6`=s7o@f1ZMWl}~XCe>);X}ee-e8KART*VSDMI(#jV;-?$#2C*(H>yMXSIR~I{-7h5|NOpi{3Tl+{x?P1 z!^hs0ZWni4al@|BYm_W|h4qhNHoN~*V{+(rLF11_t5@0kPwe35uF?&^>H7(L<$z()_}s%0%VFy;Q{Df3`TV37J;WD>KoZ zZZtjA0@B#Wdj#Ywa6{uDeC5Hq!!Xja50^7WP4Li;JOM8raSxeJbW>P28JLbnY;WsQ zCZU|-OO!0^7Y27X9-74y_&>+dLOPnmIjD-*xB^gQ3ca2@hTS6R!HHV}ZI!dg|7GRIsD`y~)>6h=z(M@D3enQZd8al|Gb<&hA|(kHFj65> zn(%UB7~a*)YPYPs$005f7iNjBbtC$nCZ?l|e}4cqp}&?IPw4D3^sQqAS;(GvvN8Vk zuRanb-f#c>cH+f<`?vqMZ~ZCwj{NW6dOPq%)SC5wC2Ysn|Maa}26*I8Tz|jR>2EzG z_@grq|JMt-|3uhK5i(?4eY@MNa7rb)Da>_l0HdLmm~)b7Xq{>L)$#L3Ka70C#xQA~ z!QoG$g4KR9hS%k(Y?E2>;&cl}a1veJ5dVQRE8xKSSZBpOD(P6q9oT_H1#7#IX&5eX zE>v(>x0psg_j&HJV=QbD6qTEyqm@6L}Hxd%Af!IkyXG@NyKhs z;c5H)a_9s8497p#@8hDn&ecV__T-W)NyvOz+C7hxk`5xjsTS@J^M#KTvKC(O3_fOY znZs>iT<*|*)vX}rs{&bf7)TVaM7m7RmJIgOA|tt45Pl`J_SJ-k-WP1$kGZjh=+{YU z@J!emhlpAx2X09N+dWv&M15|0c>YdT=!00;U~+4R^M}&JkZCa?Z=z6+hktpQe*Z)^ z()-U}yk5a9`f4wJQpL}gFB8X)EJDv$M)2B4#RLto6FMMewZ!aaK3&%sG-j17Kt0$c)I_u4s92ln9cy zT=qPqsG#6Y)&=sa4v4JWn@)V30$r;*r;~6B9kr+14F(RKiOP(3_ zTMr_hoHJEOmtJF)A~DVt1E<+?#aE^l6E;MxTwp}I%_70ygl*1Yc|6e!9CWUqpK!(? z=zJO<4;jL0{;O}I+JPN#UKpL%bRI7wt1DDITP>m~>9(|B{JiYv0W7D_mGSpw4s!!> zlC>!w2<6UtFtYj_%4*(CQgWO^qs1y~jQ5ZX+Xq}_ai13P<)a!pI(6F8MvLZ&4Lrch zOCyh`>m?jp(=~Ml2e@MGR*i>9WyC?@>HtNrTJUn7ix3mWkDgk%+}^}ao|;Q|v|us~ zwE1z5AMa)twXH&=uDC_orw0kmYj}i6+YlyrYI1%bhl#5VD;`GMx@0CH=}n*;Np#OK1!`1C{h(2^UA7E0{)Xknr_4 z(!|G!fG zlH=a2Hq%6#6wkwX`>B?H{&=c>Kn+@~Ib@SzueTVb7Oq?oGrAS524UWKz`n@k1%u=d z@HBqdMZKVRtig@bddZyB72X5v?#g_hB0M7?LfbNFUr; z=wyJ=@)meiOTY>B*8N;w74xV4rW^Q5~GF16NSWaO}_*^S3UJ9V}i<3J2HB;0(%QGYzHS`oZB04;O_3N z8s@(>2mql=v0?&tQB^cQ5{f|9AW+k`f;=&fQ|~?g8shE)hPN2L{!f`ym$TbxjEXuHUo_2)v4B}R?qhx#i~FSJW=u4 ziuC5=x*F{*(iV7#pQ(4fe>s&x55Z>_9a|VH^#o&mL9;=m%Uv#M*r9}0+h(2HSP8^d z30Yl>BX%}<%gDy=68(AHIS-(drG#k~9+fTH^Q}#|AacsY`)XbEfg1}0a&Fu{TX5X0 z#*uBRW~N@uPB{i(3~x}VX#BpRE_d;hKn+W3=Jcq`sFQM2F@18WMs!rE@Cn^D4!fZh zP%_tnEcmha!3Gge4H4;-dm@0ij0ocZSwWL_5K(x-inCA+3+iBeSg3bbF?_cL9y&>| zZ>*G=*JsPVF!K|cNBjrt_l6oP4yWf@57b_MAU?g{XA8D)vY@_HWD63&t-ZayW%{8e zDq>jfhB=i~EzNdBivlmVNGqHR=8HM47-s+ePUbY(wa$&rD+4`Ie;Q{tsIZFK8Y6oM z;jcONVRNA>xFxMX(X6sq^!6lMZ~POlGd^N?awlcVcN@cO#?7kBoI1k_<2O~NGGhR0}@Pr=b5n`cI_6*!_-`?sVxTu%)I8hayiTJ{$GY7$5MAVg@PHqo0oRH@Ovr-2!6b-mZjaB|gw{=> zL(fDymfNjp)p<69mh}p!^x#|*4SDgjfFnX-tTcdCqD6D-;L$FRFx%E^R3FKu@dpF# zxmLLa`*WXHd7K-!+EVf|LR1v#u0AR(ps0yFHmU=bTbK|F&7sz;e# zlE6L^Z5z(|kT4Fhbp=9&T1ZJ78ohw=4O^ahuzY%A#Jk)dXVDkiP_Ir#s2bAG-pSdW zd96XnG_K&R)u1fUrf#=e47qori=Ys~upL%aN2oseBY8$QXvX$WdHQ8lM-xNg&4ng}$?FhROQ z^?_#2mD}vL@r_NO93Y`K;NBF0qI|B2!KZ!NVM>%u5ZVaCkfp&UYFJk6Q69!G z)?o9H5EQkZ3@zH^L?v%Of~P8Gx0zDs`L?ba)Oz@BhnkI%N0Y$Mh<&n**VUr%)EkwR zY}Rb7h~91`Co4Wxe~(>M5VvuGgws%T;iBH3_hRKof#m4Z4Z<7Uf#27X7B6?wlaSNYNr@d~b?mrD`!h*SocQItY$eKf_P{K#1P{G(k z&1$~viLeyGA>nZbN_NKQpjsLiv-oHEIr^w}-1w;KteO~rcB9Xkm^ULUm5w*s?lfA+ zX}EMIwQyP;-ztsay!_tCLGRVFPLfskRK#z6^dxjzE(w8ukgeT z?^PDs+a%0tbX+s)Q8dc-$;s?Eoy4BlVG*cP{c_PHQ~CZ85?DdMCe42Qy?U`#W=EW5 zxP#jd_EJbde31$DdY|H`z*PGr{B7g4_JE6~!9B~XGw`x0kv3o7c08iw6|~~e#Q+32 zi<|T1OkQN;r6p&N7A_(IOnTddut>d}#M)HDoAu_&U;{!B2Tw@dY9wN5B-D{sxPlU-^1IYCkg9a{v172p9u;s^&o&L` z;>_{XL@%JLC^g|0V-whTZIJlL;`%6-g>X?vV7K7R_Yya+$@x6wHSKb*!Q>)$8yIPN znFZWpUel-HkDBH>!N6jP+;7ipx1FkRlk`^2jLzI8=rP6azgqx>34xbQ^m(l|yTUXN z1jo9C0>lmvxK8<%^ z=?@<+PCQGnu$4aWcHb^Q9kbPDuD;0C_8LSu=!f^IFGw{uN|R#2iyNBYD}>(p9MG;(Nu28Q{&I-i^qya{RON?+J%u5wRC4@uCbSubA^da%7MtEX{x=X{h^Q>aYG&-4Ii`w-S)`&-in7HiM z=#Jrs(=^6{W@=>vE#F67!bKZDI>EI_zTMt6ikH@W$;pIf4qx8~(@4YS@ng4b-i2-H zln0F8=z9~F6yU*J71!dz1F$2M2QMNp)N<(c7ojP0A|78so7hZq9vK=uKf@;CV~wq)U?Ts{*!v*uR+Q4G7N0557S=OY0R zzus%V2S=MuB|n0j;>0ZKCX%k7^x6@O*mAFf5aU8-s>>l{M>eKNsz^T3kfM!D%SUPW z0%<=QQ|m_om>@m2L(j|gvFeq?RZek8BZ+L|T7lG_Kn!4H<{yJj#n`c2uzl9|r+HFx zc2lS1RWUIu^<&AcuK}6>7|V za5vRR#U}}*RM^DC%Ewi)eCv>8!fW;+!jq&vUw)6(ml#E?JHg=rU04F~(#Mx8ksz>u zA*7(nO@JB?t>du26h7Jod&*V8xja+8-391PLaWKxlO-AaZ6I7!@AGnw#3@mAPNno4pYcGIH5P2|wqB*vO*&garzuQvW$z@oX zh-gg?z(n(r9ZhrwDwVz7~v*$32B;d`WfuE6y){H}nA`-1GT zcQ@xGPm7P_!v$*KAT+ch^Bk%|qs|Ro=C7U#^lAt0K89J^SAk3<3}QEXfnFsvE&*{M zNj%jDu$%1-EAsckQ}yQwXl?*cajo{s;<~%UbKt3IoJ(W3!z^DPAP<$f^msqXi~A5R zC!b#gVseV$yt9-2Zrzy#t0UEmgyYS`uwq;50bWR|D>URsAWjHYikGHRCjnq6Wn#>X z`xLv46553#Zfp|y$k3&ux#9s~{`nB*eknwBA@EJm+$q#U99z-iuvtHlHyUi{1nN$W zIj~C+6;Pa5^fuCpL@u)fNsU_C6)HY2O5HBcdA2ujavqhZU%C=4%ipw~cubUx_)*j| zG?MW_>G$!ceATutQ@n5Lk{M{{FguoH1)VFPvv)*+)RMDU04ywX`oXp%yWtRDBftwJ z5ErQ$0>JdcgZBsNdQ}jdLN2)x>qnp`?OL#p4IzqlK$DLZG&=OLy=IZYg=pG!WHwrW zqUb8<(;^Z6>j8*SE-!6t(7DYX0F4BG5||MePJkhJyehOvSOR3%$4w53N6b8#dHcZC zPim7e7jYX_aJXPqw#6lveD+1y??%brx(54T%FE<$E)TH=aY2JNqY$3jg#w`*u*el8 zheXmB%?kbDsCWo)Mi4%nPnR233p3@CvJ<9nJ~it;NS+D-;t~ggL=aZu)d-x1>s_u9 zr?&&&{RL}UK_WC_Pe|xT?m=WYBpc~w3?P(XaVHSW4#cS2nVs_~#(Q+YtFeOqr)p*g z>aoJYR@kHhLfNY_rpix)=B5Uip2!E*XN-Yeo|p0X=XWx(?K*v zoC7)V^z$8c1rn%A(l*P9d1KGeTP(`iCvWkB(hVpm-0cLAo*E)7< zd00KQRDj$aqHz(9hNRdhh8!>Wi0(<$y04S%Q4EvzDPvXBDDr-q(pDy zLLz>^3;hUjWsuEUE<^1p#3+S$Dt}(*cN>Wvv_wIpewLMcxsh#-t>NeCe87;k@Mi!Z zVru9K)ktwZ5MZ4|U45w35M3|B+xbY6379;0da}5Qgbu=1eZ+~w+^Je(Bio zFZNIpJjhO>GSY(xvl@YDI)L9^G{n)ARW<$LR(7IxUPFREI7Js@o2FRAA-RC0tkBm$ zutf(nMOva+0&aeU3iNsgP7(hsfx{y*cFEoXd6h^RRl~afeuOi_8g04 z^yI;^RKh}fBbpjm)|=s_lH7#2c_6LT03z$(^p5cwLm6Xb?eJQ^UnYBg&zB;gJ1rVg zU3|JP?^OUy@JCHb5{*2SNatGK=()TnMiWP0AIpst3{i@@RA7CS7jVO&XQi76=DIy^ z%hIiI?H*frBcHQUVyHQUA&JQce QlTV>a%l;$f#HHW=2V0a8_5c6? literal 0 HcmV?d00001 diff --git a/doc/doxygen/chapters/images/starpupy_handle_perf_pickle.png b/doc/doxygen/chapters/images/starpupy_handle_perf_pickle.png new file mode 100644 index 0000000000000000000000000000000000000000..99424e6054cb4d3e29dfb67a52c40e5840be15ff GIT binary patch literal 117441 zcmeGEbySw?+CB#>x);c6)g0v?R0MHQC-%tzF}%%ZEB?Zx2@hyD_6YYWpUGhoAcg}D>%r;8{(=~R8-q^$p2SGi9{Mvt)ilmxNuhS zR?twJowZ_jeeqZabNI~a{jb(~(>>qg{)n2L;p1&;`L)|~*;ifo=M$$q4cCc()kbN^ zy8rzsSc?1ZI-2d9+1I{jzqmh@`T6?j-o?(BckVFXr(AM#5;3U=@K*>)sVb+xmtrtl zth?K7KThJ$uN$(`(_8=j1^!G}Cn59SFSkAoXZ!D04@IkY{^zUP=MVO8{Lfb(UcH!H z`=77AeGzw9{hzOH7%~gt`u_Yn$x8|O_kUAS?c4LeABVf~zc=&ulK9`H^7}sg@4j2P z5C2aewlpwx=SHmWD<#e9_9v%4V`pZuZx&&$>x8R;hn}{PtcSo!ycW5^`6r zybKD8dRXu@Iw*GymARtG@e|}c|7n-EO^Ota-@o|!o}T?oZ)Gr_Q8n{a-`9g3Q#Efd z*Ie@ydVBWX`e^gEEONl^{{HA~A`7w<8ag_Cg(my1&)Xk7ctCqA=gE`JENpBot^Ynz zsvGvqLU&@H`};?mwWLRDX4}^5NlQx`HzWkRER2as$m5!)=121fKfgFAdHHf(sSjse z(zW8%#X-mQ+!q!W;`ncv zL`o~Qq?^@6$sH70oUEayrap9|{MfakzH)z_`W9{1MWwjj|DM;ItI{%IyKP3>HQTc7 zxU196&CPFGS!KT-!vgW~_qXSJwTfz=w}*#EP;l@t1HowcB^&d6a&5x78#mtJZ*3NS zSq63&dormMy3-~a4ZVE1yYQcX*xA@3zYHJe;j#JlVO4iek5x z-6T8q-^V%=eWrTnd+LOQ1hPKorUq$gX<16*vNZBtau)E2IJ!9^GIH>r$2%$EU%to+i2b=+-dl0sSBJlS zYrMR)Sl_#0>podi)40`Z)+nVJD?Q!AC%I21_#Ga zPFmw}mekawG}3({`D?hzFY{%*1Ex&204T z!<_&Dy99@sk$5jQ8RORXmZxopUhd&D4p)j*=3r*-3V5dOJ)LepsG+Hu*blH;>^*bO zFD~xzzgOXHW)Gd+Zn^iUZbauyryEoTy+}++>Hj)4BUk^tn|NDxpl9cg@TJ9wb(_K;{bi-f5kKCVWfPl|lQYENmiOz& zrp)*6X|gIl-rJy*>tM4uH<)OQ-B_~r&r13riX&UTp0zRLSZv<;!RB1`PIkT5Z{E-{ zFqoO~%N`y#(cZg#KcA7jU~#r|sUNq+=)agH?%cWaAfc+f{O~LBeJaw@&koah1O*wM zd>$GX7gxqwuTxrHe%F)ClxCK_|mNVARS{6xrZL_;ZFO(`TKWZ>(YhdXxc;F{la-)Y8&ILn@hRZAmqH^S$7q zN!RBW)PMaYD=z*>zak(aHI;YE9$s-_Vc{6LH*U%a8b)nwY;0opHf&R#8mR03{{2;R znu$r#6Z#XK!V5nO$W7mCk@E^gV%@rR9H=X*s;c#7?l;@$x+i|&aa_^vIHVL~_|K#5 zl2TGB300%mT;$h{S7{~@yR@D=OdYqNO!fp;hfDOB4Xe!;Z3w%CYFL`35NSF(+&57r z(hz8o7wykuFyOj8ADTBez*(25o&Uv|RXQk2E7!pYO=X}u!UL5q@}GY$NL1fhv%wm@ zg!9lLNdeoTFVFX%pZv6mo947uzRTid$MRAGt4@Z)lrhR1S(fWYW{UP&MClZ`%}q6E zPyO)V9_S5q^<|D%&yXG)8=J#lxX&GD=j4>)&R#E`JaC&19>Rhk?1T`~vZXh;5$T1!8B!Zgu zkYVFHow>1MrnFmQy2GuR3flyK-ac(J5Wc)P>PiVeqMmN}^V`QgCrn@JUb|M8Zl-=Y zfHzt^vGCVeC!g)m6|;^!&F)ekg^oPusiBlAV?4;YrTNh`*JT&1(#ovGPY*U>Tb;Zd zBB*L*mHpPzp#0^lXobjur~K_-urkq8#n9^79G=;SxGvcX&31XY zdw3`SC`^63x2++JB#3Cg3>_m&^g8iD6wRd#0 zQLnU7DNTQfW4PKzo$s~)FwJ{?mWYGM#u8Jq}A-K>u<|<2ivtDJ8ADJUpxkto6?ZYenWq&LK|@_o0jx@pm^?+oM zyWK2Db7%IT1DEqN;?P*adnix5y=eenui>Ol^d2>++*eRlRh6}fqv)<(yLLD35zpp= zqkxh6C`FxUhO^I>XM02U=H=yy0uJR3q86F8y37xsC_};47o2IeT358t9~rvm?Af#R zy;eQt$C&Apl9KB69&Tkg4*2r)pxCp+no*dg;;53|V-LIRHXE)k@qSrAg zr@>`DRS8ofpK!_TmIXr)F`%Pk;WraZo^@C2Q!;2F7=@8X7}C z2GiL?i_v2DF3&aOo2JiVgKR+&9CRHWpU?5R*f8}68AN=+Cm(akvDbcHt5|R)tM;hd z=3`F|DU~Ol%FWsQMo+5X{ft(@tseS<-GYr8!nfY(@YbE_-Luz=0G@DZ*4O>D4?emhY3f8wcN>Fe0bwBN@AAFh;Y4El4@IPy|35NC8 zL=L_UG?&LZuZ@t}h0b%iyOeezG-mylJy)0K4@ zFNjgk7}1;Yba$7xwoc7k7?%?I_3a+PW6kMiu~^;R+0OIR!?cWy=FV*bHUrnq=!1Gc zHK?0Xymb{{DJPpf$6iSx^*jFRwS)VNFkh zU%Yq~7WTJ>-xkJkai#HMJ@WG&Ge9}p1wZ1py-Lt`dAg2%F4?6-dC?) z&EbsWs<`b)hkJjmcq=Kwn|5)fa`9|F5AbvP#<$xj8RO3JE4AjsQ#N6(fRl0q?V8kE zgz^`vL?XJ&Dq4!3%8R-wzHzHfiL6bFweAh31h(K-SojJ5Iw$*EP<&hbPBv*nw3_jsKS{BqZYwS;ixt17J;H}O zs{rCd>iQ9l_p;q(ehs)2*rM*Sj{Vhk&zLg zx&9a+RX3goKw%Se4Rc9LOSX8g+RIJXDg{Og%f>7BCkBtjsixF|cm`T>JsXxR%H1)m zdEQfaE=pHdcM{FT1YH=6<9UV2%g}}P*ah8656iJ1g?oe?Qr}7k$Dm}C78i@HP-oMo zn@yGzMei9K;;-jzJMXqR$-j(W28$=&(_`*Wr+CKADK*yG7FElAJ;wQBJet?w+0*!ZKKtz$B*W zW4PTJ=54j{>iieQ#A2m`Pm`5Y+jYprO$pU6{z}p({U*RR?(7b;jJCIUtbJZG!e7xS zrar9MM)T78$fMZUSY{43wqpI+!o;&c>MqNRc4>>fX&osBo&c1SrJRXT0H>7lQ~Zgw zZ#;!#)Xg(50xTw=6xO8}RGrJmUDYY_lw8m^I(IeY>T^_;6&XOV&`M${~-2MQ#KdrwQm>!;$n&*D}%kc~Q>?lSRf^ zv)Nz2*2uUxD|o`F`a-tds501OLf@~IJ@l>Uoo#%oo`-k9A+vTqcl>|mJSz!VXtin3BsD9`W{}m@ z)saQ^%E?Hm4dnjAZtbWm;HNWQ^~o zST`AG<~h#~EY5T^;Lp_WpX&23D!w|m&i>XdL#&+`jm%_l$?o3X$n23UsW+letH-;` zY$huBO>i{A3T^UZlwwQk>Xa<<7t(XrGNX~@xy0vQ!L1{`WDwME+J*?vZ`UyBC$HJw(y&QO=AE%Z-1pwxZwH{FSaz7MF&2^FK3{b)IHjMuojgvcdTe{)=ax&n#0tW-Q~3zd^`Xph>E23r6rljQ#;b-LgY?qdn2>q~b(6r+? zaNyi%{&F7O5w&nUCffd!TlSu)#ZH?Xh)o>uRX6`iV7p$a_km#nV7x86kDiecSxf~t zi3d`|&8}-Movc^4s6+AOy;iiD&$Ln2q(1H|@Tdpxv=6!Ce2&wm75+S8Q-cjEz?pn@ zBdwg0OWg(_Zdha9Aue-9*yh@?%JJ3J5mHJi`V|L;wQFa_jJMDqzkr9Xh!#WyK^A7_ zMnz}3i*gAqubmk8@*0L=Z7QQX)G*k!zAAdU*97tn^`=eP!fe0Q5Xq-Ckr6CX;djr!K2@_w<^Xp|xRiAjU`K^&X#LUK4R#heQe<$M% zpNUnG3O+3nt1Q7Fpw)6^>(%kD5+X06E4nrbc5Voc8Aks%I} z;Dc%Dm-FPjBdR*5Y&DmETa2=+H=l3LZ3*(TDx~gz$P^y5uisa_-2POyt&du|cj;nH z=*@Xsj^ouqUk8o%wtmm~lxMNtoQCKmSvi>k9;Y8h-m?eV#J%SMMF!Hk?65f1fJboD z@u5;2I-U(wiQI`wfdF13N^ZeNB}o|>6(^@0E&Fa-+}tiV@ms!yRR>opA^+=Nw~Tr& z8GU~Kskc5M?PB`;l4M8%q#tdTJ(e|cPR0Tse)_M?X3v9`B48Y2#kpGxp`xL|ssa(=}G%SS7EK%;LBn z)0(17PFm82QM@h-TPFBeZtpnbs2=8hsD3(qC+o}QLmZ!fjg^mVp^Df&Y_?*uPFw2? zHm%&w|PV%9@k4;7OK~&N` z)i%B<*|uBKim~bwgw5lYtInMLKWyXS7CRU7%F#TpIc#(3=c8VM_KzzU=UMpqqs`Qfnt7w8cJ;pvq;3~Ea1KbY>&Lp= zpNJ+B_UqTD7PHxY;IW{aS2=px~Wf4kcMeV;zDzSZ~D?aP%1F< zJGa2SQV+fThMFTa$TmDMcpME-HZ+rDvOi|*1fnG@u3 z-S!^}B0QjNdS0mDIiTA@YfvD+RA#398&Ul`=Nl5_J?JPYDQ)LA+}0I~V3REh7Bq6y z)&OKBh{YuN@QD-3P&=zcmUCl=V$z`x3t>=mjs-&d<}7GzenK#NUlT?nE@5^ zc9`tjFC!y^(xV{d$CY+!8l)f=o>f^z#bq!Vho#wGpWt9KO>HQwkn7v@Lq(tNA=tfs zic=LJ~`<@e=ar-tE5kjHAsU_)N%$x7cT_3BGsC?nV z{hZufD3ol4+CLTYK=z*QK6Va^46aGFRh3XAI$?9FTdpj8Cg`21H@hVRyCL>RI5|0a zhlIFDWH;ozhvpdl`Lj5vl+nk#>t^%VX4Bk-0GuF8pD=G_mjGh>`t9RAqJIN)MFGhM z9J?l(H`db|;ipxaL7fIr;xJaYYod*fgF|9*Zc1TxyqgG@5HO5E4^Ff!fCGX@#S01v z)xFa>zX-*l-e%**jRQ52o`BuVB)6n-V%5eA=coOIuAXVEyLZe_L2sJaBU}9;l7>bKWu@V=A)n^E{D$+uPfTZVXU8 z*=&*+4PiM(_SIchnGn;I4zT*EzBisRpbg_wQ+1klZG^pQOuT=%jj29&rkxW9ot>MD z0fj&9vha(gvob_Tov7g1+1Wi+p^2c)de^TvK7rj6@sQDhyI}e)KYk`sX=do~! z6o7pc2@DiNv~ip|#2=gSyUS4oj6rr*f&Ya4RGVdE0+p~$-}JY5zw1{AFF^p z_2#Iz#QlgOf^wE{epL$$yXT{WD^;AmrMf0UuPmG`Lukf(HQ6jZqV1EBVRxq`62=!1I0-5sJ}Y?ukeK2JwVkJw2rY)_ z^%bWFIYYy6uNKVCHZ1KqB3t=#{l||VwcrZpI=nu0_4LF5 z70;pPra8`-1C>iJ&$uo}vPk*T7%hu2+FyX^dSbKy1T_XaiSd`Pv$G=;K!L_HBW-G# zHzRPY$#%z44%{aFK!QczhQDO&gm!oc<(e$KYgtP1ayScI|u1Yl}cd&`f-uF^ME#U#rg8_xKx3f}su& znFxC)6}(<*$IaPU(0;OW8+&P8MF$UNtY?3>ZaeA~P5p_;O+iC0p zxCPe^4`G8O(Pz9{_ND12;;|*xc7TX$9z<(roQjl; z(3JPw%xEI;YrTH3`Mj(9mtK37gW z(c8Z;z}cTv94_fIM9)XncxP3Z;%Td%C;LmF`IWN_V&|F+wdb~+betaIV>0--y5M>F z{Ezx@iJhH*sQl1lU%!6+1Z-_D^cGRy*S(Y{klE-VbP~;#c-ZrJQteQ-zq4GE zJYMvS!NfTvAi!AsIPq+DyXh2x`U?II)AFEG6EVe_Df&;pqXX^}c52KL9-W#; z(~0lQdjEbms@JrAZb*x8Po93L*CyFlfw_TsG&y{r<1&Q*&9fF?~U-tHrziw@U zra%XD^rL7So0*uHI`fwo6K$Hhm((et9*Hew&Wu&~NeaQS)qux<#6u_l>m=oI5tRvcLSB`u*APNGZo@8nYHJ zwkj1{^Tw-=+!7x@%Fm8p+#4&+5dHkenZ1NN0~`{zFo^cofU8fvUb18HXTdf?B*1sk zVN_E;a^29O^@i^Wqt*tSl}NckYU!pNW$RLsGK}jaqobqe@bkzE)Nu;8r|mwbd!Ibd zc-0h9Y`PlfnKl(eT0h)%^SoL8!3p?^gPFmntVBs50wz@aU~q6SEj@j7YisN3;vpD+ z8X6j@FqMfYJqH1>8)he-4(%RC5G@af06$s~1`;YEWgONGLKx8iIkfi6#f*-ZP)8o2 z7P3O!cU~APc7Ud_sdecwTgVrjNFaViN^^=PCupM!RYxu5ii}Kla}Xf25RM? zz%9XbGr_t!eY5Kk(iuK4URX-#nWFwyOrU)en`%~hVhMI#y_Ln+*AHy$uU-uUEq88e z2*YB6H@32UUZkX?1Ywo=c58B@amgAR$K1Vp*R<%A<8<5-8cQrAKNpuY(mQf+mE#7% z&p81J3kwUQRa5jS;czf~^7}*%dkRgzL~VNu3nmH)BBFOOy8O}!tTJ)oa!NdR?i{lO z=zSHxumm8)dz6e=?1=${JctP9;E)CPTI1XMUCS944lS%yM1(vD83lVcdlrZrhXv4b zI6EYie+AE8JLRCLmMO26bWSZ>IV(!;D~sX3)>0AqZHsOdWB+TXGTPa1YbI>xLX_4g~gJSPZDLW5DubIK%gGl#-kLu?_~insEv_1Ky3Dt3r~wO|9rKZsUU ze{Y*`6nw!exsEf+nKvISOZ*%khk$v}ehVG9Rvi2S6a2JsQ!)XelL%C_r0V1bp{A4o zBol0kWBK^_L}1b8e;5`(R0~VO2qK(Xwr%`K-n==)T!I(gnOl7~3HGxt%YLTix@c4J z^4R&BNEsF2Panhf$I?7tu4ho}45#F~=}FWpSI zqElJW-^JYK5Ifmwg#?xS)v)VH;+B^3%S+XF*U4YFpSdiO-?sktanEqJ8~36ViZ7j^ zD=hR0VRHI=3*+H5GZU`tYRRhCrGX9DMIEWECrQA7C|eQ|5)O0yk(*i@z?u-hxG2&n z0*7Q01+nbtW|N!Ou~SJ_1HLqi1Uv^Ke%OR|bzNsTdEq=A4Y zEF|`A+dR+BaA@b{tlzpXieL)B*DugJuX#-#E=MJ?h6Nkry1YawzupJTN94zIGbYvH zkA;^PtdLeZ(h)w64?m1mZq}4ssAd!7?H!Ku)1308qW0*9Aa@7~JrrWwwxXwx9(_T+ zEFyntu1sw>6L>Kg5HC5=^ZL@iuFg&NlbQo9-$3D*L#}Casss`Gv#k3MNMI4?TMtmP z9nWjEQTML8)|O>MRz$(WZQ3(4yU5=Cc4fy7gr~}xdt;R3bzjraFxw0;KV>{(Gh`^t zEq>v`*G=20!s7mh`}DVk(;Fl?|CZ|nm__4(0EBIqR5je5mZezcE@PeAQw?1 zaGqX}V^L+;3=DduozVT5K=9t5v!Mp{*g z*o{Zh%k~`u0tBWJpq1`ILYYXdlLJ85%*~-4U1jjRm(E~`WG)= z;G05@A3vUI+LX-c&<3S78g%Ir`m^$xGqr9|<_K&_6XGl3KqOj(U7*$QJJsXO=(fMG zNV3c?@Nwl4_3&1WiTRvY=Oef@%iSwI@+essRs^ZKV5;Y#gUj)UsAyiJ=#%DyHo)vZ zu?xFa1+vOvYsLbQ1R0QOd05g)c~166)Ka6wi(?g|DavtIUWK|YsS*Vo!GiU8Jl`@C>%*iWK?!Y>%iodP6TwTOp+m2Zg4pjcRj0xo9afGL z0N_MPs)cd^04s%^Wd5R(MWjk-CJ49h&8ju)qmU3Lq#5>T_sED=p9`cI=efzRNZQtp z9!hCjm&?ZF^6M3hHiAx2?zP7EZ2ljl@hVy+$XzxqW*O0NSNtwFXJus>Lw*I-yF)h8 zDFFep04@sGqZZVG6A;A#jgutV@XMxY@sGy_1vCn!Dn>g$584C=k%)9Bi1ABwcC;^& zip^S_ZsGRdJL&N5%3%mK56B0QbQg9?HEE0ZdDY&+oMuau7!!C)eb~>6*|x*hHJADO zVLozHFQJ}h;dXQl4{L@uOd31={J23m|{vQJ6w%JJb%P1@ex zF^44va;j7sOm6#!o2>EzCgw*YhD=jvv7hw$3}Tfjv9Y-aFRFt29~KeGcgp{x^h^5p z^%(*Os4xFT`w$^g*5J^ViCPY$dD{s~L-u?+D;lI@s!>IVxB{7`54obR*f)f+IzCj6 zlMWG#F7((nh(}J$>lG6~QnDI_Fb$30B<2>^kI*Z^)-`l`qXV*WH z9YN0%UKz|1Fi&-+1Qpx+ERTWYPPS&Q!H(sH&auvl6ZGth08+fjAChhFrqf3ZBb0&! zqYzm!S$LPoS`LaV&&uRY#C|%)i%=!$*w`DJ%q)i4|MS5CYc`ax?H%u~cH2^&X{E0_ z5#m^GT#1dlZJVRmMk>8+!)ABnW8T@WX=twh`AM>BK^CJ{5qp0t{9#?!^(93XZsY}6 zRWU*j&+2>zdEcbLm&p6nG*70}**8T&;{KZwK(3OC>dePyj6TW|t_Ccqx7?z*pdAFAn=30H_^hRx`dA?@qKCm8o+0}VX z*|IX^h|P#ozc6CS7ZAt2cl`JdAAfr7Zw@LAvYDA#;X5$EI%_MU+6Bc z(K^fA3uU^gNxm%$2&|_yC|*q>Az}L29;zF;EA}SKXx~@==JX@UA6mY?;TdLljW8T& z!E%|oLDH2;$XT?Gf-E}!jU(rt=a?XL;Nr!LK8<7U6S>GG2B0UqD9yKvhjNp1xP8st z(~tGi(NS)#Jm+kCkEeWosDO%@$wV#x;X#RvY=c{;&GJi91F^zeu^e*V?fcEm7-;}*eqQTivc zPn>YmK_O}(8o0E1f{SuAxVi{8q3sZ15^^M|((oO+fBblj83T=(&L26bq1sb-Yq$+LVd!xR(vPxrFRK_<)xY)g}TlzNegP; zJ;A-`&MXore)0gqH2?b7-^k-Zw3gvij{9u4oYQa>5X$}0qvshJ86?B{Ym-a!OH2xE zKPdVcqYa1{uH-LneEV=NmY0OaN#GX(#|olDo(pi;5auqWJSLw+y8wIB^V*UB^Ja$4 zZv`aki_^Cu&8%>jwxY7Kugy@7`$v*Uhl_sG&MqSXV;KshvobO!u!Y<@2MHp(D>qA? zFjWMpCTkUBRImAj-@m^WDNHdE>4q{vfYn70x~8Fdq*|f@u}F|gR`xkTWvQvDPM#?? zgUUYK+FFPT8|Mx3@bD0`7dMRL-J!hImo_0zU85qSvXi?OHP74p&v-_ea817 zL=b!V!0AzkE9s{GPz#R0kZ41c7F@gmS$RdcH6#dv-dkVgf8Ei!*Y3!!>DjDzhj>Q? z$&5gnS&K4a=7cbkahO1>mwYC^nCm%!!5Ck!US+V9RdjOOkTprty)aTfk)yJPeAjHfxF^2)RJvyL1t@6JO0tx zNz25Ph7=ssR6yh3WEtlFY!Ve(v6_dMsAf*{?wt>tgh?T(-T8S(pVF9oq zWsx`gwqj|1X0!ntE&-_M6YE#IW81cEBVoEdgWXOrsYqT(%b}meB#r_x@YF?7QG(*< zkl|Swb0C@MDV}|}Az12tWC{y%8eFg_#P3RvK9PDh1~M51{zwQBuy3YQ=PfW8B8ZW> z0*IS+plO>fJ=+CHAMzD@I$ok4PpgU9r>3R`Q6mc@fK4b-KIEW`LJlNdL~0OW0~JB1 z6hbisk=Ka@radxYlk`6r4z3s8A@D_5xWjk}8|~h`CeCeSVNt!m;$mi^pb(Lm@9Qvm zw71J?RLH8ba#Qfh{cw3%FKpOCnVaIt@GGBpSu77^FpyYH)wprkOimxc)M&)<9lBnK z)k4jcs&~b4Nd$%H@@a-&&kR7)vJ_AEtp%PpP4t06Fb*s%s1tG=JZgomU| zheEhnO0u*Xr+^pbL}ROzeysW~zt?I1OisofG%%o@2iZ({! zKvjI9Vq@iVo$5%k7YX)O+;3XZ0NXvb^iszD*( z=B+Q=xOuZT6iMh|rn>QS73eKL);nB2?`-KoXS;=- zN>=j4XAem)jQ}<2C&9VXhG~AxPX4ql}pYN-qgMrwjRn6tb98U zdq%%!PYi<7a{0?k*_a>_Fl*0Ig;Z0E;Ue6&m~%TnhB}QuCWL4aiZuo&m27R(A>^B- zZ``)6ivN2ipoG1nBPRlfINm@@Mq=87pHsd_Z(}s4LoNCG<7fL$y+#586z6kI&808= z$eCeJWRQ*tq8tOhNdO}4zy5WV;aZvR5f}sh!f@=)wXBoU|B;J27NeP+PIeMXnHa-q z>-Rt2V^+qEE6BvLBOR<$7(|Qqt1$Qv>?C zR<-Of2PWoCJIq;y9&Yssp!rFwNHhzlrQ)?z-V7LibiynLvIAyBbF}ft+zw95u4KWc zK}uBA4cjfQ3V)Yq=qgin8UR>1+>AS6QqXp2jZ)mnE2~zSv1r;y7|5j=)rgX1L?Rnd z0?9}hK^)+FKHlEm7Cr`Y@87>~6wJ7C_yOx8S5H@$wS;=jCUtkMl9LqEqO4m%B>n-g zRts#HreCq=vj6dmNbVka_v2W;RaxS?!8$TiVqJD5!?5qmD+f3mHpL8feJCSjly#zQ z2_rO$Bpqt`;{wqaK=RdcZY3chZJZM9=ckf4p8*H~DKi8Ro0g95?al7W<&DTCC?Vm1 zc~&z@g8f9`-0ZC8*UVfDCllG3sCzK2%CMBtpHtIoY;0}oadXn_C-jNCLn5kLx4y4~ z?0J#o7MR^o`6B2YIoY|>`|>*%k$?e zX3wso(%UN`^EVT6t@7}0V2GIdKD>Y5uqxzr+H;?9*hWTZESOVUsZSVkX;!?cX-I70 zla@b}iid`VRJ+{EJQxI~@2pUp-@E zW1}Q3T}jKtg8(N)tIW*>?^q4-ZOkz8Fwm_^X&UEQy1S7Xyp;_023o?+;&NQb87Kl_ zF)6(2Qul%M6QXnBk!fI}<|`tc2{9G1p`oET|5PAN63(NPQx_Ar>MO0Zh8#xJCG=Yi zIF=h6QEREUZEM)XopV)F`AbgN?AZ9Y{L@XYG4^-Jb`lU!$Lx;r&5uSR;M|F$T04JULzsa}<*wH~3ko zOVP4Xg+(EZS7-x2_zHO??7^m80}x{x! z(AG6Nnh3R~q^W6^(uqX87VZ=UbD5X-)(RrHCJXk!iwql?f57!-hg09g+E?c{2#tR}7;xGqhUAfvvQfd7 zB?(@PfJdyJiN~EF{egI@xCaUd29YEF|7+OlULj|eE#i%eK zU{Rz#l;B6F-Wt;lq^=VL_fRl2jM{@CEn>RD7mB%(^cr&_&@;oql9fne2Dl&rIIOY! zslzKWBMBl%OdTxauy5bKm8I9`%Kup>jTlfOMh_625i!;BW_$L=15D1e!!9dJ%df}^q#9H)l1M+uW(*k6 z^_0sfZDizo`}S8*m|F>ABdaQZaXJvo&d=gvIMfa;$6-F=T>x>`KygUZ zEu=QlUBn=lA^^;Rg9rJr)ua+U1j*V8aT=EZ&BCQ$n;Ugq=2TH>c%66uBBdkEDGdOV zlsKp}dBmlG0ENA&M6QxLmvJTH7NI~Qr8!}ze4oZ0?#~_#pC9G-3P{Z7TKsu0lS`l_ zN9MB6^stx1RPyCptEpu79WI&**thk@Rk#JjOTH{04&{K1qqV!tn@Q+9mP4di8G1kz z(?jSjOwZZb*?zoQ0?jr}6(RgnFhS4YCg%5CKPOpgYt}-CaHN?^8{@7BF$}2_(*hNj zjK@!J-SFiaUZ??cqYo?~!;YQb``_`8@f7E58hQ@17TdfX6Mb?)P+|3LB11zJ zbF-R~>+6)%-hHd~O&e~}KJ9{+2w1)4z5Z9c$sLMp<4nber*|RHrXZ_M289PFa_XO9 zgeyiZEd~TC0zx*46q2hVAmH}|`R8YH+D!EHW2NQ1u5bN97!9nrAz!|_Y1OG6`0N|Z zDELoL$uJtmvZ7=p#keCc7jHcvK{L!Rjt0c|{|<_$IcHBc8R);Z zD&AV6du*q_jK~q2eqGiJ%SP7W!z~rzz1?c{lBw|*OG-XJYJe_EOLyY_sNkKw`y^z% zRmmU=VhBh|@rU+c_7hVM8dD=}>TEJ0h`@DP4>cYmOBq8B^5Wv+Nkt?(^77?NCv`(D zZZXeF6waMyzJ-NT8Nyq)F(nSvvC*`QSuTRukvBc;!HgMRgK#9HmAG1(HG;1`r2T?K zHifJw2MfzZ{x<8CA#b6%zBkibSB4*C*FxNej5j|cm6X(zT+HYtC=?OE7ttH)C+g|D z8dHOT6E;1+Yq6e-bQtgFiV9`PQaTpq!wqAdo2c7P@Yxt_ zytpomdF7yg+?JLtG;SlcRI(?=I$X)guY3H|$99;KoI;{`dhzNcTRl52A9$iV&_dFz zMY6qp`UwrYhNx)HGsfU(NYv|Wu=?&DIhZL`}u-{I8fVZ-g(&$OcK-EAv zh14X#qC+41sHh_Dz_LH29A`Lslsp6PpF6*yq$-g_?mc+l#+pQZDkv_i^YLJM z|5bG}>x0#j5jm%Xs)9}()Yk5B%0N0tNnc-oqu6??cWmHLhXhG|X#8CAqUgyBEmzGw zN6!SDQn+d7mu}&A;9!JVOE|0b)CCT_C?mQiGG9TX^?YgRY2|OW!onGcj*l1bcrsKZ z=RF#@N(|BaZy(^7b&ZUe|Gp$Ds;g{~GE&a-lJJdc-bx%4lce*=d}e=2ENzKjRO-hujnV+V`bB7uHVo`aR0uZq(sx9k^|Zu>Pl$?6V%jMVh~9(lpWqi7ISbVH!wt@554U)i0!umSKvPXF(<*MB33}VyAfr%+icp>Zd;1|NIyLm)f~^7_kxuFpIMtm0h-?* z=zZKxr?PE!VWL`SMjX%eQq@C>?|&BYJc~qCiotTKvQ>z1uAJKoj%}XJu!j@x-W_=n zs?B-oRHnu>hKOHLZ)1|NVY_`G1zFTafxoEybajn>A77(yNVWf~ScIpD@6zaUxSzIf z5Jp#MOJ93?55wz{fAONFt6NFCq$Du@apmOXN!7z#<1-d-!om`;Ru9Qk?U~-1S>8FX z@4ZgZ;nV&gVP`ecc1ao-YJxb%&JQ8G1MnFK+%bS5k`%i~zd!g)((Jqi$05n)Ty?da zD+6sU?b~0)Nno`)%q6%oTKF*+7H;{HX|(|Xh=iNnt7-Q+CB~?PhTpzDeMqIUIOO|k z_Ipv27ou(P5-^hL#y~j9M8Ygdc=zro<`meE9b3^|$Q!$m*L%t&qLphA<~lr9w)gi5 zh(orLuL^;~JUP`M-&r*M>@U5iI$g$t^=^_f%ef!SoSe*+u6KE|?50gv+rj_lFzlWP zHHepdH@=+#$i9rGQ(+ef$rLkzdDTdElX*8IqAG%# zb_lD*JudD73u}U$?hllm1bOB9l5-g|SuNN9IQh1u zjJL5$zz6F|yvGbyJ6>VcJSX<&6>0qGkaiyLH1k{R73pQ_aN^l0L*p42S0gE@W)k01 zUM|RLV8FC)ZK^`N8V_0VBO^YWT03fZ&It=&mOQFU#lBlY=HwGtNXXS+A@U+K$&t@h zxh}ndPi_pD4HlhYb^rHm{NS_q_&;$&-r=XL!a*a^vioZyK8tgB|2U<==BC4b#4K{h z4rTUZmJyXfO@}$eLPDl!c8ot?y+)vHrMyvBC1+>39TIfP(a_91xyqX{HRC}6#JqF_ z`-R#ts7zk(Qis@Y?rcvWk;uqkDX|aQ_iL%7{#h>Ud&iqw%$p88-N&J)FInIwac{l< z;Q8|sPCo|p3_|7Pt535q$0@E?gwJ)!3Gh&gh*Bk94`jlJ4S)T1PSI>Hy}V)M*s|#(~DogbPi* zX|MdZ1%G|MUM@W2Uk2utmybU&wRaF@Kl(1^?%k}KGP{0>UOx5I~Ye0+w6@NW$En^fa?^d)wAfvCJ zvy~wr>f5)G6B1QJLo_Tb@uYp>(`7}AI&R(^mp${LEk>^M2|pLc@~<_Kd#nFbg$C}^=cXo^Q>P4=Ge0+EWFAtyC42U4aQAo1l-z=g92Xtc zz1Kmnly?_7_xqyw-^Lv_`to;wr)R{=-Kcc#p8O5Lf*j6c5AYHS#k`!%!@S;f8G2B^ zA5IP?nt8vleD~j?vx{cmj+LU5dqgq~(gJz$$Hn#~3<45y<(TM-t&ZSKZYnPs&bAp0 zFZ77e$}R8S#mXmOn<``aPNd+o+|6+rG`qL=?{`Z|&0nTOkWRVB`_c_6*-{Aa(Ma?1 zAf15v&qM}{$J=1R@eo;AAMYJTbMr2J^eragwzP7Fsf;$M6Pd?ig1vBET zvNC1IP8H7mI{x^BLqOmx7uQF0hSxuT?&|KYJA;`rN;ujEFuclze^z6Kic>T+Io7PF zdK}IsnMvMG06&><571%~1Gr1&Z*qvy}onw%6`R+X804IhF+Zv2He3e`c;odR)%Yqqg$9yl{92T0OcKGkHQ=7%wLe#7)DXuIFGlU{5}TNx<}|N>j0N6cFq6T zM1aoay=Yx57NfeGw%d9K2fq#vPg`a_t-{VZ_`cLGxlffLiv_3od;4oTN{=RY z_0zv{r`^EL-p4pQ*Nz#1JckXpDMhaKcjU=?Heq!G?fvJ)i+b(P_oMmsbiO|s?dodu zadnDb{B`wiqute&N|k)_uPsgJI#1%y3Fp962DL`4yW|HIvK&=a$;h5?45GbKYz|e9)S}l z7L{|UMErk3wYrY@nr)d8C<~#rkbWMbk&=PTd7$cn=tQCa1ZjtstvhD8*qCjYBlU@xwg?{P%$OKQ1_saMZ;yG} zBZF5;V`8hx|N8N;*rHAK@4v5>m#Y-HL=ZvN&g@y=mCZpLklAV=Rt>hiDg&JnkmCYZ zI1m_o3>y$7Kq8~&To2yTz0xwWE77N#?(0;HuuGpN-gS4KzZoyWeZS)i%__mw!eS2?n=}p&UD7p+!^y8W2Hh92X!-g_ zHqqFxr*2>MmEr0GDvxv=8a3wKtP+nO_pz}4{=iW3G`YyFN@o<9hgzq04z@rYwi%F> z-Gckj3*ZDf)sT+0bs|hQIVmaK$a}#GC2*?wG#vf@U+eUqJ$N8lUba0bY)SOl33Aa% z4FSdW3k>W5F(nu+ybgCvKxq3E$!h{maFP0$VFAtIyKr4Jt7<5pb!et)vCBRFlGm96 zJjXINE|iqCe+WIb=>FT*xZyNt@=$`StL6zXT%S8P$Er6~chiv;Ln)vmomRa4-!%0h^kdDgNf% zCh5Nwh5r-(XsE9Du(O6oRab88&WrsQ;CH@6mL&rryq|uzZRM9Ph3Od&ib*rG5#-0+?B2pRkWg_ z0Vb|S`fYG>O3DU0g<4pMMR+fTp2^jf7a6^#<|rBYOPht0K}QW%4h6pV2~7hyJZxy27uG|DUzTH?nlt)am^tSW-jXg zKAhira*U0pr{kqxz3NzNlWuQUwEa|Mn;5yCtt1KcVi?H z5`IwNh60SO)}1X_SHa0(REd1Gu`!y}m7f78UKaR42i5S1Oe6)-se=#;Q2FDk{qzlD zTR_l1=;LMqT7~dgWe{vlPE8pt|M>lVY+ns&o7JD=>E9W4Q4F9ows-hUbrb(;k4@RX-Nfuj%awPA;W8D`=fsce|I}>*e}tThNKWG>DfTHui;?fjdx!Xu zv#~DgQ2F^e5ZlSg#6-5UGn^~rMEKs4m7ijXQgm~R86V$eyKV3aPHB1=kxLAJ_w4%& zEkGmshd_$6Unbz|a7ZsW-B_+%u2DiHZWPEUlLT1&9NG$rp8=^B^bc78`tAZ0m^Y^m z{HqcG^w2fg2Ppi0oW?X67*Kgtw^i~u!AR4c&Ek1fUF2E_>X*Z_@+ell5%}V(nsB2U z2HD$MMU$1%!px^zTajVuV9%H3)=vwRqP6v|*p>;H`IF$pYHmz><>3+ay)o@QZ~N@* zb$uOYJMnHjhjMq8{CEELb41L->2OtFKq|=wl9>gGEET{PPmYfp6pr-vZuvDx0|yG; zwTu1qdqu z4oQ2X?_aDPOtjf8Nc<{jNoSAwJ}|*#BH67-03{?U-42L`yg48@=kptBI^n5yl(-8q z)Bk46l#lHm3d2Z&`u$LhlZ@gQD~pVtKm3UQ9pOon^=iHBfyx1XH~Ztq zzckpx41`qyQ$&CQ(EO-}gJTNO@*_i`|2=ToA|azNB)X8Z{EUR}5BT}~8UTK5WK_pr zckIkv!0zn~zRWiwT3+TC)%s5XVg`yLM~_P-ixJ$8BCGEvbzXeAn z3qrgC3Shy_UVGyOSwi4R@TwV<2T{W1^Lyd)g-Z0j#_uz$rI3s;Bwsf!`hf zljpNnam{E7EE8qX#~U2WPzv&VLFu3a5*3LNFCc*_K|HqM`a9ZO zkqQF1k)Kevo#MQJcg$rumi7hrEW!j$rXi*vRyaCnuqfCsU%!upjfY3qmsD2S*49N( zc#rwk&srfngM1h(nxHjY9tF`{J_RWDY2b@&{T&<-V5a(bAyw^$v^|bi!yl^D|S)h94ngzl> z7x;NuF#i3bS;K`w>ZSfd#nGCMnf4Bj(mrAC#t^tlI2niA-CbSOXfTo`VZtb~hK3wg zHv_xy{I#j8Q?wtWwbWlt{3xIx`kM?H_kok=Kq2zF>=w{aU#R<7Z&03#5(fh`qCNz_ zA~aEGdU#ime+I_8&q-L$3n3ICLpoNS^7soSWDC1?Z0e%Dj)pDu_f{IwzP{_*6CLj^ z!ojxrKK~wTy2N6=y+dJW*k~q7iT!zh?g zenCeMybpL~XJ;#+IZ!JvpGC97kEFEpUG4}<3ZtlZSdTiPHs)fgSX;y5gvarjXT+MsV~F*@ zxAhkiLk7Miu#ksFB}L3P6J*am{sX)%w`bogg0qKzw2`0t-^=~7;3k8_?vgOjRMxQ| z3n8p(SqHmq-eY5dB&w>a6t3D(Mp^E`!Oah+cGsT>3c9k~Oz=TQ(Qq}LSdq%DOs%ep|3G!p51(9xaU-FcgQ@|1bKM|Xh7!}y z4$0|VkfO>9w7Rf_>V4B=b2RLYppT3+&KjtaLY#05mKjpy8J_NlU8iyhdg&_1QQ)W^ zPk^zu){EiB)*DRR4hyJOVNg));1_bXjFXGq6)!KJo?P}0XH}&Fq>BcC=D`XM4)YU} zKmP@|^)G;#3vtQ4IY(|8mX?xY3V|W~+W)c+pM$~0(jD!Ji5c}|!eUV;P8i#R=o-|DR-m^WkSRGYHqWj0>dIOTgjiR)3}E0CYlrsY7pRNkxkD0TGcD z3-X@4)&Guet9=RGfDI7V{kLu(mjxp2nX!k1u7dJ$p+m!1;iZqd9t!5YazLrtJJ%H0 z`S@5Z+dThoLbJuduKoRGf4JNCq3ufoSRIlSX+bIXOjH!VdXT#yUX*^CD3pO-L}BjV ztamuu5kX-yM%cR_PJkI8aY6qXLQPjy6Y=j8C*Y@)mO^r)l}Yeu`;ompI*JbikP;ic8J?Er(Y@pLXEMn~;N)0-`ic(ZU=`NB;5;DK;=)>b)SmN47rd`h$| zD}R&ewsE%pA~#T7q(CCOLB0#(K~_gFnURs{X$D(Rd4X&I37zg|shH=2V43#z_8yw~ z-`4~y;a0I;M>eh1)6gLG_EvW;2ZZFR+LxHo?d%N-J|kXhq-_He{%O7zMWs>=+R94b zERUTy25Jf-B2wVpq^bckltv@AT2NB89B6t|{zkoD-fn6Q^V*s6V3-Exz&KT~(WufH zhfIvjN$$H!aSTFUK`Lu1%?wx0rbYEz}pC`flZrA3D6z5ny)R4NAJnckOT zEifStPY{gl&lz|`_mp$BKWb}3mQ#%+9G!|Uw$)wTdV=ZUk3xra#N+gzKAnj8|LRi8 z3ho{q4$9aQ7=$J!AZRs1g>5#{1YD!3vRHeDecAChXGh8e2Hj@|E?3n<5s(G~2=no8 zRfkmUV(9bWJe%|XqCPE+Oo|l#g?KL(mO%&=2W8q8;1ZYj{vacIc?yt%P;P*25E=t| z@ajjuj}R>GeKqtQMPxeT(1{(S+cQc8AwTSYHmUW`%AkbWLtM;8{3^vKHA!^rT9#tMfmf- zR#N=mq7N5=u*M)+R9gwPwEkaQD^f)eSa)0V73%?quwhH|Ub%YC@tZZ$Y z>UFQw4>xa_r=(c+mRJRY?v#->o)JPqyCQkS_Csd0A=Q7B0noNBRrn^V&z_mMy7PzC zk1J0aYIEkF12@}pv;qZU*E{|RJ*gHoDeTm|3E8!QS8ll1puq--|7NnYYD_+D7!RF(@E?f_H|BjYF=8Mlgly{+%TE{>ZE@o+|D4h=b$SEwoj9QVezHvQ#GXYff? z%}td5ew%vf9N`LFK?(_Uw@6M!Po7@he+JwG2<9|QgNGFP$sI1n_3qRj#pX@!6K((w zySg5#SsKPD4>rDyd1*NbE`e`iq*O0JK;#LbE`9leJ&g|^ia51!#Oo^XLJ(ZpAp)u( z=`qOXR5&jlhvHKB*N@!#efzcfueSDfxCDLqia|7lO{_ktjHKPtkRAdl ze*1<2HZ3hCrpuc^Sb)4KMmU+PwYB%HHc`+MfQW%bpv)Kgggz!)BBGC6$h}2!cM#~_ zWyICD=UAjd^jZH3)+*<@s?SotA*0|`&U@kbh+8m~6kD-DGOvUL%G#Px0Vku~T-V*K zhYA9PQ4S%z?UB$%0`n4ifI3zdz{>Na;GjNj2)t1-7IY^8;gl5ANO2_@9-6P2gulbToEy5)Y)viahVM z4LW9Weo#>*OVs5dZ(65H{m$1aYimRUb{J`qe|9$rAq1SQW1(Fcy%khVs6liO+ zB6J=6>f7+E|Bwql^)F~Q4e9$xU;{m(0A$spI5he_&->xKm92DhjdJK?Mc6#|a>|d| z#0Hmat`QD;aumZkOMrxJ%_i(CAcxP#PvT}~!bYkNBbhJ{EBr1&HFM_B&yGbPcQFNv zH3Nk$62JwLQwG4vTal=7L?-9z<~AVsb)N43!Q4a7URAYfsQ+(?Nr8XX{qqjL65yeB z-6P!KS<%Qq!?E14SSsBZh{~&p@*W&G&n6dwZXLB5^_q2d&DVVqUy8?k(~82Td}9^y z45w!i+_{SCSQUK-!I@K|?6;cgW=*`FZ@*JY&q(g*JdbN0Y&6 zLH(CASby7-35&@=xN<=sAFNUuy@^SNs{>C)Kh|HlH1~Oa#2$zC3(<^$y!ge4%Z%DJ zdmS zmki5YEQ5RZo0>Y7Yo&#qp9;H3wS=L(k7AE%1O>qu-NDnQ-Jj2uEr6HTJ#C^;V)q!R zd}t~h=el~qr@;nIa^8={YGCo6JqI!VMFg#T3XwriMK5bBgB}e%7v*#-nI7(pB!>na8XcMx_c)km83dNek0V| z+qbf?n8jYYRMV58bflu%V^?0L{0!!DI9qO#1yO=qVT{$4_b#%-n(--&f2f75(@ow!IfzTT;oi4N~03r1nJ>^oW=y6m^UQ~~KAvi_sH(OF-d4;i3X z-QJ|p0gm#q*lH0NEL3xFko@^KzCkLszp4>vS$gDK+K#OgiOP%Z7nx8#AG~6J8{AuT ziunN6t`yt;6kqHw1dlN2hQX261a{`uU>$E6b`s=?ebBBGp{|`T?zSQ^I~9&3dKi(j zNuf~2Xop!vvlfabgJ4j*16xhWANI8ldpvkm6scJ0qnf(2uyV0T*vucZ^^7uQ1^+Ar zAhNX7258*#Fdu&vBeQsZ8SBk4>(j)eSqTZSG3|UIP&?0zj~PW)k)O{%qObuHP?@%6Ae7scaD$QQN zP3Rg;T0QdM4s~QG>9NsGz_zsmgu+zGI*f-?B~x3l#uOfgdhqs36=S8tGeOvoAajs+ zeSWyaUH@q7_``{$Vl5(l3ls7KX~RX2hP3&fzjIuQiR@b#(s0|ef&ky9VTj7}g3so+ z-y2w1PzG%RJhyr*xAe;k3PV7WrSav90x)urzMBJxr2&wAL=UdR+BU4_e^1fOtgM-z z7XhHLUyb(ojT47zstJ)z*K>1&&=pJT9ctWGA}b_i{8b4Jxdzr1O-SUj?$mTWdBW8e zuFb;(f0|0VmhS>Uw;vr}>d+#&x52jnSkF{8i+JsmJh?br_G3p7-V(o4FxZ&GG`k~O z6O3$|V`O|we~f;Py>-za?8Qd|)QLC84O;Vr>u6K&^y-0o3c`Nly|^#O!I(Q`T=%x) zEdPl99&I!T;Wrb-B;bMJ2QF5ln_)#jW@sIX0R)$4K<%u9i%vc;PIxQ=o3Jf7|^ zbcef;`1!M3rjkkE$y1fh-5cCfpWt3Iz%n|jhJ=ScNG=Sx8@?F3LK-8Ov>ogIonx3u za*H<>Fn2B#p3b4?Q2$%i{ctx+qki`1EnX|ujd>GB0dO>As;}%)@ylyitGFk|#5e#y zAMH(;-p}La32LgxR>7<}ONiD}gV9q^*u4)}N1~S=2rPEz0^R7&%|jTmS#KYo_+_n} zmZOwcO&CvHW&e2lHqJ;*YwE>jc9BJRk)dyY$7Zh{_UJ%fN|v|xXgwsdk4jL{d{DJw z&w7xxHHmqyD#o>0Gi%_ykr4ySUAs4cFG#(Ap9Od>E0j>co*+WjeQLTF8K0`_05>Xw z^?A%2h*9HrsxVL-T2sxjwU!RB6apAgxSLIZ7mwL~+llf|p7Tv11W#Mr_JMjS;|)T4 zvavnSa*0D4l>5+VHVt-Tw!v?Y2M^@p!&Gy)ZD?o|6^_D>zETvssw4xa$zjhW8vnJm zm|Xy9IH+HdJfi@!PEB-bt7HYjV-gB9%<92UK_a4~wvkw0NtI!5sw12X8pXJGl3xGp z&6_8J|2(-wj1539;O(u=jruObt-%;_Yq@uA$Yg0~YG^qY*A!bmDmYE#2?xl!E1ce;^f@|} zs>t#S*tFp>5Af_gk8N8W+b^f}{f;a3%SU=G^hsiJY5kN%Mz0-)g))k|wBtkRsa;bX zn^`*W{LzNCGQ8Pj>e^%a*qQIpHU^T^@R~^<pdSrO0K(pq>_v8Tv_4b5Qg`|U zY=9<4$v^%8k}Y51-$25pm1v&J&(x>4Gl3WZun;hIT6I6WZ!<s8^nod3)CnBw) z-naXPi#eiu&edc8BIW(>2s!5M}Oo-)|ySuC*?OwV9!Lf;fU}&D% zf3WcKL`PPhvrQHL!9K%_={B3VWt$1xl3qV0w1GiEDB3>rew=xC4@G*#t^+S$7z#Z; zP6}$b=Ok3yr{GdhmpH)h`{z}3+`DV=pjs9#Ku%Z}tmrsiXBTCWJA06Dd_iXPIkfos z0vsZvC||_1tgaFs6TX~nMWYFe;E!rg#{WY&%XE)PP%;30hGbQYU>qVFjcd#eEWQ5J zo65IPgt~bA!~3SNmg2_cnvIQUWxZO|-Mf)UC45i|O6QBq5r%>7WZv6#w!ZC8BqZJ- zqhCYA6J9>PIoMa#0i4y1_c2> z5T!ET6FFg?t7ns2dALhhJVP2vDyB^-rYp3iJ^QH+mqO6@MX|6XzUQDz-k_`C{GlGJ zYm@A}P^P!Hd9auuf@3!}G<4y=rEi*bY`$~h`u@-*glynlBhqoIC@l|oomp)SQi@Ofu?`tz9pK5!&!tz3F)9G zZ9jw$$91!6m~*zN7#NUkBCXS3ub#TyHrQWsskOkY7is1oCo5uf>gwu}3*Ul`Oo)Ks z2Mh=*%F3X<_A1AhB?7$bHuj2GSz9~%{bi;($e=)CBz#1XJ6y9CyKD+RZsb1*{UUFG z6riz>(~ePw7Bn$kLj@88odyC8Mfc2b*BoN-g{|gs@=xzn7Ge-xHyFGt*5qcZ)^J9S z#5Y`pD6=}v1C&&IoL1dpSy?qRtKFl$hjg^}Q~dm)h)zwiD;pTZ+33i~gxT9ubX35L z_y8{K!`zCwOd5{7k`h&bkGdfBCnx$_65eM9+= zR^9N6moDKR6C&<*yl3KL;*$&tdwu;NCE>FNm9T15R}0CGpC2y6h)Oo-(u7^fmGqVY z#_QL4$jLF8QBhpVx;5gQy__gTaq&2Gh+l_>RBU_FNQ?@1VQWx#ChnajUN~8e|&q{7DpCq>j|Z! zQr4yAvBTDZ8G^*p7zYB*8DXVle|ts;xpl#sx(3c8wSXcj$%q!1vLLyk#gaKgU42o`iAJEv-lRI^lIM*0CWEQRuQNQs4)8HpPd6YXI?%)k&oQt~?^&_TsEb_~p zvXu2~tW640bWbAA;(6LEt*Zsyitah4KQG?Ixzc2tg@)RG2;+9ib!-)U)w{K&8b|G- zaf2Xs;GG8j*l}9e&}|+qiF{(x+Aayk3WDb9|`8M3Y(ss zG=6Pxr_@nXyO2o_(%BFYt-QDqfJ@pNN&&=msFc_ztOP84} z_BQp3Q6EeARRJm`5`t*g+YP{}AbX-nXho~KSHV@lA`d4a#*WSR(Z^TW}#DR?IfXw zLd2hx@;M_9EI5KUjG!9rOU+nc|7M?I_OIdQP0zPKyIW()fQ~Ua$h9%;OO1$NXJC*G zrZ+NaQ+e`s=M1TFwj6wvBMow;!AGjhKk_0O;^Un|(>~s7JfU6~3 zkCXRa#Dt|3gxf#W^SHIxcWZHgf&41JO+4Cwk-PCju$)-+hCnn5HSBs?JjmLpu4R_J z13CvxtVk(dJ&J^+QW>syeZ72UqJAZu;TW`mvjVWQ4ks$Jjga5r<>C^!$Bn-8%`NlZu)$nH@mp?dNDLR}IX zopureLJB|L6+A&z@%muwrKryyeufNQ|8Yx*9|4!J5=0syODDl|#fgFCsL24SNudLU z=9onQ+C}zjcXV_hF5~L$aepX!C1DT*21*ojDCxU19So-1X4A}_yhZfi-{ z1r-gBEe_^8mCqo*h=rg3R}IIOSwDUu(j;oanD5?G45ovB^y#RVMc5vmA8)AStg48< z_xzn;4`J*JVWPweV6UexBP_8lx{cFk9ly=8qg#w>@}n!_MHW8!Dr;3f#A5TB7Dw{F zeM?4Oq%)*d$_ff7uPwo{7EMk2lhu>YX8;R;nQid$=I}MFzdzD0ZlTEcb<#x)?aD#P)xZ zMe$i_h6i2$5H>t$JyI3vk5G5Ef6iH2G(E$RFVwufrJ{mz-+Zd)WfmAxZWF&gdIVOy zD8uK_d^$Pd_l%!M7(Vamk-EY6hiJo8cyy=U5$nOC&6n6lhc@0(6EG2f-Xr=lP%uTY zIC5b7dE*puD?~kbla$p|bK1RncWG`?Q=Vjf=*lA-#<8y%Okx7-8v^3N>NBY`;*l|945q?vvY}_!s&ONp%-DqON{{qUE)6ezr>q zZ*=NRL`b8dsEwPYAT6#Uu;hqv!{Me?p4#rNg@f&4h~&giVv09K`jLqbJV^3V01WDg zdml-r1)I<9p_;pPBAdx5ayHSCds|=dI(qbz)^-Z#?hFu3c~K4sR_?qa^O? zj+`zw34*Xk;{v-Quflzj8#p^=V$yrdbbP~mYcGW2V&s~pxVN%$Fy`hd5!cpS4Gp(i zJ-_}j`+)~E2@bGkcw*HJTUNFi5+VCq_Lp-JtTI875nI1=c{)7!WUIJ$wV>uu3xlc7 zC@ApGs@m6at#&HVq+WHKKN-qMLR0l1SyJrE+^6>Vje4&(pMy6j6STC5lfw6Aw3HOi z-&6&&zEs&^k%vl)HD(;UA4d>v7ok~u-RJmoou77YW0)grR!!^xgJ-O7h@urApTO&> z_Ae3&QX9k8oGXjKd2FpS#tm2p4P-g?mcy%)Fz@MevU&kRR@xzJAl)!f@Ch(^r5*t8 z!kSK-uh*`-btKx;6&3C_%U0K1lDzZvqJ>Fd6MpeK#a!?G81GK3^rOAH#a!#Qy($eo zHxBWO{=>mKMi2yOHNOJuA*G!eY1Thb#A`&yTN-!`_;3Q zqWj@e(!!4vjC9MTYr^QeT}v_&esWGVjBxl`L9!I1bJfucpXvz5jjx^U60f*gedVY` zc5Y>`8M#&)Uq0f{!inNw7Utpv-$zrEaZ*NZs|+YJp0>BY3gWj=ou3$XO;grnwHkO% zlbrOm>LtqfYe89GjQrD&moDQU8>?W-7c2^`&3EkK0KxaMvf*pr3=Z#>*;E_JEt*xl zJbcp;*TbROZ4AsrWxL%Z6^8 z6lj@=H=e+xVg}9Kkdt^F^xzV{@MHZz z(#xa(jOOmM0Ky15Av?e6#CcO!97p_8CmDh4$N8Xz&Y6ibMg3T|mD6srLJmVudsgqa z@Q2L=_?~m@;V74)wy2@_SyevV>2ynZ%B4iN_=91+|DNOE!70Zht3Ag#hA-8nJ^Lo{ zjPZ9LWQ`7`q0-1^eqH-g<9(_mX->R4;IZdal&vj$;`UKzh>lxHSAWd&p4n-mLoU+?U>Y!RRLe4}5q#IiW1+qqQPw$LtvR#eVBhh>v6%6t|@kEqoeZs6^s zYMS`)F2)*n2KNtZgsOjKK0;4E3z!SNSvfZuT5j1l_Jo06(yWbU?NAd^3-CF&qcxw> z><<~-T`tyO?+}a6_%S}3X8Kc>Txh!a3QEW3N9SyTW|czk46|$xWlj&hY{#oO$Nj2u zA+4ZFZVxTr`189`tI7OSbJGJpS07Hd7Tc41Gt}KtRyd5SDHg-NXtV9^cW(Kpbyp!$ ziqF*IEYV2+usjIpVZ5iqkik)Cs~J4`0)_%D6LaQmyh976pct+Ykkbeo7hyo z($5Y8xW`jr9LDFRD~A`n5^{3|U-bu<2^0Gq>^aVtOEL*R1ube)8d3BCRxC1F@-}P20 zWH-6QV!K^x6Hz8cQS-tpugWS>E4fv*nlx*+ifJXF z!l#{Q%Yw7HVolQL#nYR~)@kP64RVNVta0qN`y^v(|IXs9b_hkIaeG9>7CY6>gEvlB z{L0lnm{>X)AkcDu#kxqvulK)jQRQKw2F$Jw&*Zu zQn$Ke5z{$8B=@96HMnE zJCnAO5gVe_>&j=4uLC=Tnbd-S%orerNXc)vQ{+uQS% zTv^Z2(X8rTE{8(9nh`n$Bkzr!SJO_Hlt1uIZM~~iykjxjzv|sXgfWddw_a>tEKhJ= zJZv-Pdey6-m^hUX_w(@0)AS9@4`sa{vy>pe`Yd-TF+^*8NNg{wr=!~0~;gMaG=l{-9&cbs8! z8#`!cv)doPieT7hYoekEH4R#F)EsPF{Fi+;HWZzTcTRjW=WSVT%9T}8#OcLeps{3B zAii*iHky#5y7oFY%A8Tg`@mwhWH4pfqwxcq(7;e?Rh29Ft8uxHPetCXTd==LSLd;` z5@B}_Gm3c?r`5ypzT$`4oVfhAcBRG{|K5%}^Lfxh)gmp_HY~(J5D$o?(m>qN5PO{k zyPPzUWxB`+2mxDy&_bmjx?MDP?VMV@(OH~tnb2WiK^}jJAuDA@AZsDeRFCWiM_QPO z%|G^Y850Zr+Bs-yIjCN_vc;PD43a!gu9~aoJ}P+XBl*p|Z~L(ZJ4^}t_|j6&h{X~c zo23=Qc@y`IJj^NHwX#dH0!|+d3~;&4tE-x1!9UjNS`)me`rq`NdaLi%CcG^?R^fh_ zA!}n9tdu{1z2zo5J-6qk63H5-qB|u+o1-8W_4CFlHX+R%LikkgT+`KY22|ZfPKiaz zTeC`M(=z&5w8Na!h>@MWP_(<`&StrM@!ndjJ&Pu``A4G=sq{=UYnov%{l2+0tjQcx;#nYhRn#q0 zH>OhDhmk}yQ;`2 zpJ{h1yDYV8%0h5{fqKfTM+Wy^IC^{*vdS;gn+ElNC0~&y)<$iyQ!9&jX0LN|?%nJu z^2>D8US<749GtluSEg+4ymt1I8MD_?w#4_)wA~yvmK|oJAVDr)n}4N z7OP^NlW8_T<^;XFQu;6BuA2P%+pO;s;f8qZk9$3_wQ*r??)^&~POSY?=w3GRWm~hw zrB_1S?s?p28l}*R#E)4F)9bdxIz|oM=U?}yn$;}SMl0t##nWeKzTZ{6%&QrF1^cA@ z3Z1lORA|@;W8!sI)vO&+89CLgEh8zv$amRUJnnyGaSY{6(HhU}HDXxJUQ|$0;^Nvb zU5>tSuE-;s*(ocv<5_h}(txIaOu#VhP91CB_~o3MU9G}WRqV#fFem@DQ^ZkcOH2AIC){e+CZlGj!&1M%P9iwW zQ$&Sd-c2v0ttFT~AU-UiYS}sFZ?Y67?}gqe+Kb)2^ZC)|)|cW-nNcc2GGHB1cYWS{ zhB{FZJ0BA*Ip5=3sMRYGNvd@;SI=a@Pe6mwh z9f8z@>CidzwYR$s0Uh6ijSU*_;Jhu{+X<&@GtNKBQ)6Z0X}nD()CBbwphsLKg{H2oc~t8bZWuZ{50YstdJ@eym#C-^M*HV^r(0U^z5REh zj&c|C)p4~baq;<#se-&(YcabQoBhT-J3d^QhcuAr?mVM4rr<6X1dKXI88S^Yx)srTR)u%9c?WO3gtrvgm5)(XIvR5fN z2u6aYt~EO=V>`}_c@nyhYNlJyJPLR+Nt$Dsf6cG>hF`$l=RupSd;`gSGf(QD z@HSgpDQDxc>RHmQenREuH;lrSoaFpPU8r-K+UKuO8!GtdhhMx*S4WekS*pXkbgjE0 zNoC$IO#hD%1rm-*JjeC7F(VN#2m7*hT}hBfc#C5#mpj zglZ!Hb`P7+tlg@-nGMu8K7IJ-TH2)!16;)d#>QM~eJ47q$4Sz$=*m?SW==W#Z*Jr! zTbcQ$c~#xTe!)t*{Y45B_ny{gs#KYG?RuMr;p_sX@+yGup9YNNym73_9 zCrW`qsuHqAtaC{!3O90`qQ{E{yH^4{{p&wUyzS>r5sLrE(qTz82%VfsKAoX}PE*v? zP)@KD(mrjUE=SW(x}V0^mCjuFL?fvyedbhK(cG{lnckpl7;3v2T;l}O2{+|Nz<*=6TQXfDz3s^7LwjJ=Y43f# zrr^A^fr@(Mr8{9PJ}wr!3K+k_=;g*T^}4N@cRaZ1{3rgC)wg`>#@%&m)|Xkmyx!28 z1u)JEi6n~6YXDONYkpWm(0th}Y^a^>i8ZPR}BDkJ4;bLPSdOLGi5 z**%P;J^s)^{j<7tq*n5IVYt%2H-&tqY5!MDwbiMqdcHZ9Q#}) z{UK?8cfvvQ$6Nk#_wk8qUNO{NT*J+Wztp<-_a31X-=K@Nug=}BPCqfgm37~qDvqwP z?4ofJ){K?Is`e!H-+f8Xuk&@uus=773_5+9AiMZrpt5ObHFY9bGKyaY9Awea0O=Wp zhXb8#$ml^TjFT~?@_6J83-?;M@Z4`ncv`HbD0!>-IB0z$mvJSDBQ{&sg&I}QYbYR< ztC7ZHj*p2{~$3et#tCwD=65EFV%Mbk!QT`%*M91 zsv@SI`Ca&qY^e4rde@Y1c*SN-7NbW+23rGN5q}3KS!OobS+tPI1so>poUO#uD)nV8 z`sIlHs`K-)@#XFIZu*c_(BTn#(rN2Dxlv8nqEoG){_CAKs9ao{7QJUi_=E?kxa_v9 zdCDo?)#)FK4Bi+w`Sz!S&~}}9r^lxj@#-2T8x-F`kkwqny&qj8yPG{ZdZUMdW#yXPWTtjJ_VoN^Clc`ZPyuue=b zV)ryXD=Vb#=btXuL^5l2|Is`gubv9{f#R;b*&G#`lYLoor&fVrGZ(N}5z)@9nCcfw zzOAj2A`@Aw%4Z|5rHQ{$Af=$dos?wsI8yJ|h3Fk^KCSsFRjolQQruOm3{_OH9i(ou*d7(s!jCuStE_%1j)2=F^#Urc^}^9uf7?E5FKlb?2)jO|za5dr&i) zR?FFEtG+IkS{X5Cc)^Mp%Xn60j!B>#HGFNZ%;uYkNf+pzXA9^hBdao&Gz)+y@zEad z1FZQ6`LMy@?1r+(ybTP^)Y0h;eSaJ*&36_R3H{1bx=JsW_(th4M_oQAY1pZtOow$h zRZe0rdEh|UC_|mbqR3f*v{%j|aA@<7ob337_}+bSmwM5J_V&T$51C2U!P`GHE| zdI9I3*BZOs#MS1mcQ+-z=r&JoFwZ?4nk$QBRZNxAtfIZGU(o!iD2ClFdqwj=L+rw~ zrgom#?3sg=2T{!qVq2d>J*;uP)n`2RX}pFhd4sPcQJ2-ev(M`73F z=+A=`%ityH!+-{+MRcc?%ImtUZGpUg^V}~er|rw*x~(eoG*}fYq>NP78mkl93g3t{E3Z7`%(Wu@&d))ws6Bl zh3e&EU26%re3-bgp3U^&fkb_K$wmvEVbftP_Yms>_V&LG!_7l)W_4$lR;A4;QPcj? zmK>~U&tR&+t*6#^hVCkdPCaPuG?j9#An(3sQ-+^`TJ&-G%nXb^H+sHAPQnr=~W+Y+d*23MBW3JdpuYff)X_zv5r@5EbsajFxR`h*J;pVLOOli(SO?}CO>z@+z4E^hmNamMt zk`he^-HZ}8!`JSFHN*t_30anLHF1=lIQQ`=ZnH; zh#mG(4bJV(<5PML4~%0o@@$qmlG49hI;p5qY`LOo%em>V>YMRvRTJxeg?I-T@g1m-jas%B&u>6#y?lb zn0`?7bFWgQY+x<7PDzSUz}wYqMy>Z~p-<93lfSPZG)sT90&(m8bN4<9I^e9C6_xN? zQFZOwOeujO%3m0ZqDpu!%!1JlyvMu!Z7Juu?j2bZyUg|vPpX$Wg2@4YE6-*)P8oK6Lrl z)uL0=VKcuP@#<$xYejqB3q7PY8-2woG{GgPg_6#*Dh_h83LdZFt~K2m31N~`8n3^~ zl9?*L=@=c#+Ak%I@f?h$R49Ik4$Z{xmU|^el6ZHfoZ%5QS$)tSzOOpfk~wwoclq2E zuB@tENL)2@Q!2Lq&YEZ6b{%WevDAK}Me2EoS_ZcbYi`H$>YVFncA+)D6BTnMuXN^> zN-`?bpUDx^(!IQ)yRDh#ZCbLeS(&7~Wm%){61Y>jD#vT{HBfZ+c|hglZiy^&w3qE) z3m$(BlDD~whxCY2f0Co1^2#l+-qBqf^CGSm8`Ltk<6~a)C%gB}sN|1#?FwiLFzp`w zNRcY^aM4am^0rOX@Q#{OXt~kA5$1W=L*ZGS`yejebg^%|V|XyHE0!*acsS|&y8Rk2JFE2W2-Y|$C1a}o)sB38=8xLZ(7CB+*|>_G{J`EUAxYKiLVIWd zol1eUZhWtb+LT4}T;s`x!_f@>xrlJ<6$bpKSCzt)#W|zL{mSc0q_cDxGyj|~-buM( zwDKa9+b-e^75@`qPOqn(s-hj6%Qhajx}_==Q_Y^?ODU6BN6V!;@^^<^a^u7+iy|jO;ywPxT<6z9kSR6Y` z*i3k6Tw9Ocp|OmN5Z>N_MS2800g!zZR}Hg{XL0}Olq>uA?bLt9R~k!41AcOpM!ffZ zAO{6(;sfCybuX{MFbj^$ZXmc050sD~xIR4$Id_VfkI9@)q`fnE^Afvm9t1)~%FN6Z zK6MGNUGN_Knd@Dqu1Uj$-3Vr& z?6HbUX>5W!e@I9$5OXfPJ|7iAzr2s?2eQfQhV;aMb2*kJo{R1p|B*sH<5SbASh=3eDK2DgQIUh*zL_=f@i|{$hTu&35MN4xA_s)0cd2O&A5cq4`gS_`AY<+!c zR%i4s7j5gWNss>IuKhN&_vP-u`a}PfcdK8?b%iFBWKIEBHExZnnrv_lI%tX%>xpbvgUQ^)7tQ=cAwNakkFZ z_O8CS_L;1*w7Ni=_UO@W)0@hB2C0$jILc9H+S=u&Ah{o@K@ecI@x8HeX2(S2)r9b1 zs#ac9I!~>~GgsP(ra>#p+@l$pgJ(hihq$+Zs1O)C}8+^a-oO{k4_l*A-|H~NP=c~Z8pJzR5 z%{Av-zu@s5ygbf1=E>sEqh0w}xzlh=Fs_wdqWMM0QHl?58EBeCkMf6S{mCv<{qtCb zf;`Cn*JymhQR^>1c4W?U&P;rwdo>|TGm$pAdQ|@B@)CAA)6bwU%}+c-xpRGWXnUxs zv*8129m!RvHwk5W{fAUyW33A^+U$F1JK{|CsNR%Rxo~iHz%!coZq$iwJeA^=p4!@& zL;CW3B%b`$S-Z*X^u;2+L51*`9~PsU?e@htT}@oRFlaK|?i)YtHXmbzw|UR;qsY`z zCo^KhgL~+`ymxSjo~fXKWZmhwLV~0G&oq1C+&dxVHQQ;}adqJl$u~5X=~|TXN(HRV z&s%=w=wenjizuk*eDKMl>NEQ;$9Fm}GYuRhpNHwoV%odja8E>yi zt3r_W8h+Dmmtl}dORwQ*;&}V_t%h(G{`E(d3ml`7Ci$wrh6*LHZ(alS#!6GJ=L?wn-zXJ5;3AFxW_FRH>ExG5V_?j0*mgS^E{Mxv9hOVE#_{I2T`>?;9(~OqM zKz~@}X#wD^n7r*$tsk(@lVE05ZVG$lJ%39|^a(DD+Dd{rl%?9UCYahP6_in&B7U3a ziehricmCrp%i0QzA%ggp#|2Juz&qJ(N}G1LhX3o>oc1g05mvtkmaR{FqH}S$WGh@s zAgKu|LcvM&YPiPb$W0DiUtYk&VD`F|rT$xkM%fEG7D}-&IQHN`)-HcYe zoM~vxGmvrS^gF6z-*+d2a8;_Jug&H=U&-1!@g^wq>DI4OUXIM@Ecq2Q;V;^#^xi@@ z=JQftMzy<9|Amg>lE#+tl7XhU+_(6v>1MXieytrgWE*hp5$K8~qvKcDD0F!)LLJGL zyr)8~Iu*s$a(5WVsgE*tt8zB2Qv?^}1$rVo#p+deGa| zPdjyJc;0W!K5=Gy%2aS$gE+nMS|B|)(*j%55;FbeCY6!W;*_zS*}DiMy^W2Xr<7)- zoBU>cr^oMhx|M0T*l897uEIG8w`=+Qr}HfqHM{x?uGbjNto6tgec|tPA!`*Ld3$Cd zytwJ9#iKG)gkhYUvy<0c&reAa2pWFF&#d;6SN&!F`8MTpwd*wXmrECA<0-gZU!Cx~ zrnwkx|E{_x+@U=ni*5YBv zy%e}p$D6+?CfbXqpk)574SirnIpb3&Hm{CA{V}Qi+;%wAiD6CG6tX=#o!A`M+%(ht z_?0_(SBnXJ4N1-;1XB^{($1Trt|EB#5l4TKm zozwouC+%%{vX2od!`TWhmWkeE>g5R#c4i}2SPpmYD-Ly0Mx(Dgls;WzpvfBZINi-Q{cS z2kBf2T3Q<(Y6E+;cWOGOBcJO% zAWlgK&W4)yBgc^eq{qXheD!B5A?23mV>3S-epOCuq~%L7MYb^DyCkYs%FwXVYSipE z+TD|>mgIACl7eid-f?GiXWuOey#e1+*Ykl^qpDLXvqyCzi?R3W#e-6Zg~S`}_30<@Z`#3aI4z2fb^K0%HXS(nUnI2iS!MP4blRp|}w!Qn8KXMMJC zd=C!L@+l*W5Aa)6@}|6Rdwmf}(E)rd+vnO3N|0^c*l+cW-;0D+!<-2Nq_Y;w=i=h+ z$B%uMHJ#42y!tticttG-a4f%z)6&y5E`mr-Z?4>z1LuzO=EiabeY7roX?MlA)RU_N`4y z`kE~8P~@H0PT=s>M^byxh41p$GeX9%jh0>Y**D8c2_7s#y%03Xx>VTf8*Wpm0`cf-C2Xi_IwI;pMsMQpPyFH z{c#x|hZqE*t7(>2kqROoKmXi~xDt6n5SS^!>!}6EkT&3l^6wxTiy;vj$H{Ewix(;E zUhsQPZ;IkZ56^NZmHrXzE?J@v2wazgB|`0Eaq7~ybC74gS)6uG?&ILMvOHn<;l=H; zlb0|UBIh(utl*3Nb{L^7+nN4!P?RI6OfNW$*p^fsBDwis-c%1IE2(sab{EXhF|#>K z!Zp7S`8G907Z_H$ksYTKY%DObwSCLU+4u(A-hMnO>Ruo`MnKgY7M$nH+WqxFAH!hQ z(IGM=h$f%`dKF-!_t;>QAd&hKN$;g5O!D-nN zgtT#}1E2|jb#o6A<5(ziaJCJu@4bVKS1YJf4W`q}Q&%Q!VU|Zz`&3<5i0F&Bt%Ot) zh^>^ZM-PvHUvCRaZxQ4C^5x5Kxef3x%k^60d98QJ;IPI=po4^c4~NUr-mxx^YiV(< zok*7hn!)Y^g+!qkD~Up3!1~{Rzov`&ppOMSH&3o{tJ-&n!ns@CgKW{K#0ySGBWeeA18ah2y+k8Xzk?+O1<;^xmUm&3!jT)U0;=B zTUt7S0WZ$Xvbyy6*6727bmh`JcB>~97FNBdz6=l4w5mV0Z(iol9BHOVZz72mHTyj* zdF=>ytgnD{dfnp_;U_P!n?5WQ2LCT-O0=k&wsei6D3W1878hp@+XkhkO|s!yx(TiH z_Qy_NmsL>_YBm5KTh+mkeT9n{<7qX*R# z6xNh1EGj0}Ffe>>08tOMB z*4cf^6$o`C6IyUY3h10LA-I5r>wQlWRoU&^BR)b((r+Oh9kUd;QhTS_o9ZKA_ca83 z?~DF{w(c>pdp=FE|CjT1lnFis<9r06-6Q`DGXhs483`}mc)GjxLH4ujSoCd7w+`YB zR><1h%NaFN=uonEO-*Exnsuz+6*hcetnl7ahRV8h1t^hVF zx`3bx`U1xa<9uv6i!^0WMtf((+w)B*rCObkVZ_C$2P`y&llmw5lzmsC_#tODTe;PDByhThXr zq@j+!$8n%)uiYV`OYG&pAC!Sa@?Vj?X zWehXFx%FbGpYTOED9Af~I~|%^?&60H#ryfTX5BA<%UIK6sp-9{PfbgsiAntojU_0E zFYf}b4TPl{kRt~oWjR*wcJ&_41BsK1(FiAcUUXd~8Dcpz^R?b+w3dk&G}Vx(Eb=s_ zb)4h?5(r=KAi4O4qmEI8)5b&G{#Yv2imJX$6dz;fBOb|sE=Gs_@4G=#f5V%jQ z4wutKJxG2W{TvSp*2*~#afQO@_b~I@fGD~+sgg1|S(MY0+nr`tw3t%*$3!cpb*i5+ zl0L1Ce&v>(x+;5Bg{q_T{KTl4FSpdyhrF-;|i z2-VinF|H@L8nO;ri7=RX4Gz9FN8OXqg)#h7-vYz%^HT*C(zx2z^n4a}mpY!v2$u+- ztKaEWSo1Z#Ndq--?;zSZO-rV(l!4ux1ITdd6!a{B@qj*1=<6CAJGj2^9i9G3x+aY| zuU8((_+1OX?E>AkOc|SnQ(EGU5V!N}fB7y{Y)BgV{p1U-SGWa=MldaE^wG8kK2{*g z710dv^|WhNKZtN`kdKZ!VurVO#r0&&xxq0gh0Gus->>TO-qx&Tfg7ONCc||3O)U8Yp`}Y zIBKYmZU&<>#%R6(sW?3lGS0b%o~f#~ugXlmi8+0iz<>hG4z$C^$1gLpmWonQjRH4b z_RAiW^ql$LRK|AhBf<%r^jyJ#ERy!ZQpJV_zH2)-5Q<*CSuW72KVi~aC<6+gx|@sC zEvpuCsW<28K(46>IMZrr*&wE%MqZOSYX^_v^?M6d!O|l>;ufgS63IVfIei>KZx_ zBZ!>%S+x;Y>!$>^2kmtY&Wd=H`k8e>R9RUBvajsj#+&e(p06Au-S6Jwg4-Vo0X5?M z3$gM*(VYNHJ?~SO#h?%OFtHBtY(ZDWW}#ha0)&Ola@VU*YPCJtMTYzX4ZxxinK|4Q zT(d^`5ODs0*Euqh3}WXEyCbBIbk6XA9hhO!7Cs=|0Io-x@Q5+sRiV#K-ZKcm;b&4j?9@*%2 z=|u(ru}o6ZyVtb9v}okS+FVe@fkNNbG5^-FdH&3l6wvw?-r4~&BF~_Qm_Gvqd-kBN zoZO5Ipg7}9PQaPo=gJRlW1Wwn{K)*2J+&2w7+$ejk2qOb`X&uD0EYC0r zf>HtOwDUrL8h7hKSkTzi3@$$V_12IN4Ta&-w1*lS3# zbOcfpd^GXEM-)im_zIOUuj%wQ*4Lnx2TrMU>Imrn%34@heDZOG&riz9Y2lgbE#i7u zgMQKN>17**o;K!O`l_Ev#SVo1WHYUhUgxR#G@LkTPNR+ZlbF57RUsB~16>?(R>4C( z7Mw)EYP`IC1s1{8%7{Umb_Jzf1mLCy^_e369;}v;*+(}|Q^Id!@?y(fLbqyi=8IZD zJ?B(OpY?o6^>x}s(a5RNChr@X2&4$kfw%xTzt7XYa1PgN7+(EgEE}vb9)8lzMca1! z9MFJubafZMT5J#DF^hmoYIH)IWJ+^0zStpse7v0Z(gr|eS`#MPnzH1)y;rd+0sh>Q zPxlN&q2gKWEki*`UKIx~S~gJyU~cl^5ePy#j9i5(?Jb{u4yx{f&?kn68WH{XY16`J zr3dIS+@yawXfR(WREOzNpsFdC`;jLS@)17<=WfF?V04ddUi3^G@T{~DXbV`&&R@W< zIRU&znc3JRLASdVmK+$d8`C(?HzSuqa(!E>==P+i5+KKi!p_{ozj7cWWy z&6f_un)6H}MYG-(7o;P$qO5Et%}MwWp<&1xW7|_1Q9OcN6&qwU@R;M;fTBv5nq`q~ z{ezd_A+ry17s=UKdrzOcLDdfqepQ%yS1SjT_j;I_wgpkCo9RIwP_F#NqQ?TyjAgwjt}kYXs{I31XZz-KD?^ z0MFwjAX?uLTxs?tf$PA5-6A*^rJin0za09y^Tr&2VAXACuhKP9zWjKHo};2swO3fMl5Q=v|K+qfRvuUWDf;e)#wFf=q@cDMz|7pQ zV886R*$|6QQ}h6Sd)v=JJ0=F*7g%4`6QSedS5mTm(vA%G0xLNUTRTWe=blT75B(r+ zcX-H?C8Ozbr+O?TzN2qBbv&^KV86Q3VCE8w7_oTpd8X6<^QgQDCDkt^pbJ;-k_#)I zzv=BS{-VBBo$RmB!Af%{FS`wCEmV=7X&u2=R+S*Zxj)EX_vxyy1yVc0%(Ea#22h+B zFaSn2a->&~l|6?;)OgeKW!<$&ARS@DZMzN7KJlJPw}3F&4{EINvXeI#KOU=~xUPhk zm%gM1RQ}6X6z}AKK-t@zoFB}W7r={(h9#7Z-Dw-TO(D9P;D{G@fSZoDzO%%Cmkz1I z!iA#m2X$ods~kKyu(Oxp?0ruk0SBGL`svTVqu~2Q!M}yTRcL_279WbmY1jCG&ZtQ6 zdD3`QhBZEbH=60WKexVnWhI?v+V8cwe2|Y$ft~?0R5ZHLYQmfY@tRw&(8`Y#)I$9S zh&I?--CRIUWwrr@_?$=8;U6T{tdM2VVrtIZj)B538=GB_Zk<t$EL2l)yC8986 z%#+Te^wSkgbV#K(XsUM=Os3^F$^vs3DzWdv3}YexsVgCbuUCkJoDW{QsI6llBEYLG zzkvxVhbAa0@|0{Hec-gyh}fj&kLLu(72&7Mg8yFEV*36m_kcX6n5mf#qEw3Y6)WQ` zU`DJ!AQ0|*vT{?B6w&w6q3vDi4K_JJH~or9vB=n%UMf((p)L>HY&ps%u)x%akCTA1 z{`ayI1QUzUakoZBt=Ma`@y69lIYjmO>*<#pl#6j zMCL^$`(@YdrhacMUVil52xUDXeo+MVQmhY>`Xnf8#nX#hf27jW|MdDSxYx&EQP@M# z+I9WtZ(XzxVs=6}KXob}-uih3MVB<7$|@)+H4VDl)jPXS3jF|47^VH7$dicD`_nw8 zkgomsF+Nv+Q3*UogU?UcZ+38q_U?l+k_`@8PjYkSw2Raou{iRYNAYoCNyEcOq5S|` zp~ML~_sMfZjOa}5P8F*lxONTaRcuE5>qW&@Y{ zB-lq#{QyNJB>4+-ROpE?PtasN3j)mt4;~yOk5&2{$|D7V?bjNq6(#gjJ;NE)hCXz9 z<|gP84#dhpTN;Y&&aC@(sBb~fK+6`&cBOX^EkTUKUbMKtG;pk-Vl5q-uKRA^p6x(! zom}TX6sdnqwS8)qO;R%qC&oWVUz%Jz3wJ3K5|RtS>dAL7gnN)}%M=I&5fV9(c(t)9 z06ZxAWA9;Yh<~ct51#isOt47?U%D|nJDc8dQLCI!g5+yN+=pb7GS1;-H8dVWJzGCW z$p%XYe+>Eyrc3=#KMt`5K9m6sBQab^Mp#%0$ix6<-D5#n9o7VD=4+3jc!+Tz2`c#e zA476hgCv(5p%;@~x$@A~gM{!x@JMKZ@`Hzz>}asX-lly1oQf`m$NR`xTN|2rkQ{!A z!l;zx1Cfuvn?vHwT3pI2Cu5u)-oD?tMm$ZeJNy? zR0?6zp+t`yU2s?c2XZ3B|3r26Ahayl)C;8{tK5|A5jsibu0$YcCFjf?g|n-ex;iOH zzZz!pHO;~e4a=eCx&}jA9(116*txYGA3^-(0tXOA;NXZ%hQ5FQJ`aikn$W0g2T<1W z=_d5_J8OrM6%^X*PT(594A07o-RZKq$8POEHWHvUION7duM08A;JAxaYWGH5JhVO!Uq8X zprOPi2in?dY+gP-fjv{?a<}07LK{D3+h~X>i$iW2?Qe07L$DX^(a7lQ4{O`1Bq1c+ zj6n?rOw(EPYQQa9K}mzYq;FcSqp2y1ZjC7F>Fc}1rdIA`fAu}uJ<089Y!t3r6VpwK zQ>NHU_`NcWNfY)4dR8;YFiir%^PT0u8+1{R@_WT84WGkW?<*0|-!bzH=u;wGR>sUd z(BG+Z-WI2rh?zrAGU4OL{Wb-un;O0jM>lQA_MRt2aT&k?aH#O}Cg^<2F!Y0o9_D_a zD88m7g*2;B;*Hk75GBZEq6M-e<4di|S3_g*e3Kv^(MDRhED4(-kF=}y4m$H(_$am~%i5dMY>(h^MI@58n;a*_?akhdc3o#M?qo>xQN zeun?l_v~xxGBr9_#gb^{i%rfj^@BXh?|$L%LNOEtL(D9WW$o)wr#k&oG{VDqO2Xz) z303H9wm>o6DWr*)!_2}G0D)uL9Ni&~O-(N_usnLIX9kq%>{$5MSaGt})(NLh*OMge zl9Z#N(K7dlxx_l{tndl_wj>V+R$KIN9w$)l9ulfXkUA*%naQXHLNKzK~eZZ;k)a^0v$ z9ieO2?3m9F@!@`aja)CTV`F!b1yvU@&XPJwiO*yWwWOWk(6+xH^{@A)O3z=`FQTC2 z3WQ}i1SjH(`l?+P7nAaB!#O#1Ax&&kGby6*II#APWCf586fkQCH|OA*>LrbKF(5wut^84-_w}KTgP@Jx9op}_ zs(+|;wtfHg?L?SljhR?NaojhOxZ7r=bx~Qi5(c(l;C^ADIfgrr$IIVa%eW=l76VX=A$D=;{wR48#wk8IFnC zv8k_ltMs0l(UsXCed8`eE@oy8Z|@O&DL&29>va0>-xkoA)r7Qd*m}AddI{iVbSggK zZV>hw^-#};V-jSIfkQScRgc2a1u@o}=Q?gLj!76bJUtra?X8fK-ghRmGu_{x3XYvX zrziA46+JFTb?9Ni=!LUqrBGx1?EM(DKkHiQ_vGI;ITDWqeF}er>aEx34I{!Y8euoIQ*xTheiExQ@V{)uD?_q@6W-D^YxW2w7L*@%V=X)quBeQ=r>- zk?33m3A~&6UYnQ-q^QKlv%(95)sBv6E;wncwv(_x4W){)b7CaD}a5_D{dv#tL9rr?m~OvTtNNw z6=Ug}H*Y2`ji8<7eROPx-@2-hd$E&mqT<5B0CjLaZE!iXmN#Hi-OXy$6xDU=@;GQ! zB`)0|^@I;4Z>ixNx=9idj5FvulAAYw&}6_R$ep4>4RUDQco#&&da1htH|~zI+w-+s zmWa1EHHCe%F7k9a!MUrm3i%@-Z=*~!Bg(ynr7b? zyuFXZ_XFE5Bw7Vwle2#EaK3WTIsj`IJx5`oj2Q0PNadgV=x;>gHt6@_yKMtla3{RV zKB2!*Zxd$B&{R-@Pfl-Yoau&RiPE`nCLs(8o4e%RO1Ur z6dEITW&g3iy4rX90yU5GHq71vY5JMZ*_R3kVul@^u2$bHLJ!0)=ZeDF`~z*#x}ju6 zlc^Oq9dfE1p(^)2mop(<{!Mwys#>;jBLtU?^&kc1M^a51E%{1;J!Mj>i)@{4+eTS< z9fT6=NtL!R5`f)fZASKc@V^zNgIaLTp$rMrK~uQjc}YXVX|{S>K?N*O!Xt}sq2X)Q zAP#1by$wUOgKY>l6l&b#n&$bMOQn{)OVs{t#xB++;!ZuP>Div%G|6xn=Kt2AI44(> z_K|=rBx^dk*RZ>S{9nDKYC1pK*LK__OQVH0P3I&54=2bDz8^@53FaIHPO8#(Q*92U z3vZ+cQh+@W)NKb+ULjdSy9j%$rlDCP(g=h>-)`y(jaq%E+{;HL+4xsO19_Y$#`i;0 zC$aACCeWxGW6ZWUH)=4huJ)E!@70I%;r?^U z*JMEy^akPM{7J7W4n_ERhhg=#wMv84#gaVdRSo>_$D8qzcnTjO^x_q0A3?rK=HOV; z$6nHx@vn04QgQjjw-cv9$Z~k`tG6O#+S{uxK_sai=|=GkT1m-pdSBEx0yhH6`#(Y> z4eA4z4UJisrt}COmyd)e6R+pIc4a^t&bn}YGo1M&6k*$K zaHIf>X;Og*B*@9h`7pN&x{)o0_Bh;2PA)EChe;^#4Ejs$%MrZd1@}9jFVvrH@#h|- z=~ewq$<78TM@`60oaE$WbEkf-aUM`A(ScKQ=U*WqN=6@EXt6NpN;hfi*(xNdJ!qjv zjaTS%@4`p8(f#+0;IpOgwqcs;pGZEqvd`A(2Pjk>3(SrpxR)^?||bO-D9Ief~C< zzu64j;`CwE%SXYDvK7qF0|EmRz*^G)<^mE}^AT9lM=cKVKvo$OlXP(5KH5^a;<%I# zO<=~aSY5wBTrQMepseNOeQMzaR4IOKVcbSV8vg<9_QjB(ajD1p!-NeerO{a*$<5A| zz&EEH@S+!MZl+?x2R=PU=lW~_n{X|@?m_mh<67d;l%ly@e{3#q+DkNbLfgCo4kOp3 zprO3Pua$N0G1g$KtIS?Bd;~1hrI7Hh0!WjVmhzRsxfD8yxHA;vsD=>|(?U;j?hptQYV_=DHXIYP^7+@4z!-*E z#-M;g?!otA#h3Mg6R|O}H~R^!q*sS22AIN1Iz(F)_xpJz4F`F8h3xY8hD>AGnl6|A z(3eUYpyO9@(Noj3wHvtS7fi!(K$uo4Kt1C~k$QDXI-9if8V^iS1vCdi$|+Mpe*Rf$ zzlaK}4CjG@(tchlk=Wj8XMCLZ>CC|UpAF^8+fNNJf;Bjy>-4u68534eqil(kqg?JsE3;WHanT8s{l^LF`-?CD`M zmYOl5bnl&J#>tvYF#s$}jaI7RadHRGjAcL))aQH=yqCF+{jeted^18hE$!1q0Hey& zZw_Jcs!jpK4Y_A*(W|ty2{@J=eT>xwc*v5@dOw|7LE}=xQpaUdx{5g?3x&!G<;Qtw zrN<^OGkk=!g*x=imr#62KU6rHJZOYc%ke;%N_Mzwmo$t{vtKRgCO`j`a zX{+swV66a*j8j7>ehg;yRQZo^+_PPwm1IBoR%AIZ`QHrJpL`WD{+_@!vL`gj7+P+n z{kvfcMGHRh_*#%VY z7WWR+IoR_pfZ7}R=g!)(Uiw5Un54U*^KuRcr}kdV%mQyZ)fm01nQMRcXc*V);5%os zm!=$VtS*){J$R|0XcU&FSOQ{@1Ax@3onkj7nAIzWpvd_9=WPJw;oA*e4`=?ltxK#Oo2xS0aEqVZs5f z(icF43REIbftS?f|6$&tAub`L*Ek~R$xK8k07`IX;&^~&F|(WcXL>1Hy{g+e@GHSS z^|GdBrZ@!HD{5hYn3j`i9s+rA22f;!+(!gD%HO}cukhJJ(5chMMnVU^1eF%}W7~^-YA?#^2PP zsi#H9l{_l&`lr=GvVCU3qEMNMf2Z1cTzBE@DU)vN@QckNjuT+38 zH=HP#(q;VqJ?rTy2I$&A<100fi)lI^^jmy$^%$z0`(d9OuNwff08Y8$!>_ycoY$69 zO>bsqc2LLR)2zjI}9jh;C}<+KRh1n}?5>ymsyc zJ&^x}D(F4U-OMpDvbd*&NzELSn|XsmscHH3I~ZlIt9CjRg~W}qhQg^+;DyU;wABx? zVa?-msDrUy)h*wPHgtYi?JD|g85JL@gukt6mJS^X&}U`{Wm^|omf7=t{B1&qHGX)u z4Yqg*8JT2&bEh}vE?YjN4*qO!Rn#2A>ifCF_O5*yVT!f~WUJ-na{&Jhz3y{CMvhwzf*8vvIyWRMb$e+|TK;I(FI!P@F~!4Lk+Ai zjg&Z-ptSn#==6?#%^kQaTr#@(#9ZjU=3ax$D;`qshXuaU*48Nps?rlGv_)T5XOz+H z$W@Nf$^P&OplWZH$HzC4i&uWR)rgB<7Y9Vka=(Y9pNfcoWbMG5Phi)>Y-NYx4XG?K zv4~$bu6$64-)CX@>a13N<#Dl^>2kMmPyU4@4gJJHC*_aSp*RtZ%=?0 zd-5NlWyTBiBiLoBMlrUdc>qg<-tU@QP2J~G#VYr;c_BPkkt9oZ2A1$=$}ePDxVmPv z4%AvZ)#%~Sfk4>B%(kW!YN28``P&-~zQm;g{ry1O>LDd=}Q6Dr;QU9qY&y>uX7QZ3^8^^iJ&0jUbCFN<;Y`*6dv5egNSTt$8 zU2ea?%P#EKhYXvmqDk&=D9MeVX4th-T#ldIXG0Ach?afBc7qT~!;2wQllJo?R{U+x zzi&v{9SPX;E2xka^?A(9!s;6tF45x=ZW6wTtLR#v;fR;pte;PdZemTvdfrXZS4e7H zOg1&vdBs6CKa*^Ug2sW9X^eEG39;`v`}UzD9QdO4gkKk;J2d@J%B;HCZ^RCEc5J)w zn->(Y6V|PsKeo9ON{FqrhTj@_cSoJnIm2MmAy;q{h|0_}JV?56r8>iqdq##YyXG$} zOJ&*U&~asaI}-4?G4>+;4q>LYmmeF6wQ+HRSmw=K!>tJ%*)VJBN`4N5O?}EG_7Ptv(ocRFUg3lr56 z+wFejrO4*fVE7-$F*elYMN`6-xra75j9M&b@SyC;1$aitBCj!L)wUJS_Ot^b6Y z`-R9Ba2oxS(5>@;z2*QC4mi7nB}jxEnU?n`0&wsx6Uwq&{-#Zht*~H%bheORUY5w`%3;oTm-{wEu%1bie zgNY*it0KJap9KhwVawyB*0(CZm@MozlZ$CtAJEw+>1%~y>olfG@ju~kTaEHf)X<6W zw$9OE*0vXiy&Nu{+4`*)=4%Tu)+b@CiE^9ud6?UT_5=M9(g`>EDODJ?D5>n8v2s!` z9{rQG&t>Q({qrSc_j=1y4gJ&q@yWRV9}HxrVAUlo4qyoPeYO?@=1}e1X)c~VyWQka z6h0P!{9YB$ZB1;tB>MNATs;O*Ru-`6?7fdUd!OUZ#13Z!iHqQO1XRlf*hvlhpplSg z*q*{3)EBUJJy$BL$2Ly`hTff=CMEp39WxUX6H)oRg55^Onh@jZ5G2m@1pWX_T-8j|Lc{z8bt4_E8-g8-Zousd)|N1tGJ0r z=A=VK7$oQbW*#3m)X~$cH`EyxrhZEz6F_55*!{q4NuzGA&BP9MrNtRoou;QJZ6TPi zecxg9e}4yxCs&03k9QEy-t_*c2XD)=(9X9!W}2BZ10l{&`tnOgEt1AqGDVYyh_!Rc zWF8Ua^Q!YfThF=LG9cIsmDmjpVv%zaS$JoVCfWGSFH^!A#M1IP)p-%>wRm=EisFRK zQrfoX5j`HoO_!ZV7GA^^SMp<{T%(|(b^h0s?qYN)+x?8pX4@r@YdmVREs7Q+nMCIH zvKSMbL`Ix}0g3vO(=(-7XdXRj>j=oy_nxG0p`VbZaARoORe4JTBhj{N-Y)MyOj`KO zeA4?a((3B#Tah1& z;KuToLGpcjy_)P48c-4XhVkY-|%lI_n=YEn*1yJ;=}#1|l| z6rIH8ScGkUcAff+aUnUzNl<8G3$1}^(m*%je(<#a>(__im$Fu?FSzsTHmm{i2&>{G%~XJa9EV+y0IDwK6q6$rxMHt7fIKAeb(0%?jcGEP>Y7Wg%%C~^79W1gDB590i4wluEQ(z89Oio z6itGNz2YYP3D{bBK$r@*{}F;l{J z)ydp|@NhWCiBSUXeZv(L`Qa1+j&Cmm)i_K% zv1#8T58d~H1&~0GIl-zN^_=XaAsp@^^a_FuPxX~_7D%=h8t;*UYrY`?j8x%6*O$Mz zj`LJ3dxM4W0L$oZYmuah_o`x&RiW_@byp4K4tgGb2@$CZ?=6AR-Q73Vnn&7#u zSf6k=+z`+sUzU1@W+?%i{Sl2*yml|l^pEi7`@AnaZX&y_QMk6);|&M!$mdwZ!J+t# zzyCv4&C)`~s@)FiX6h$1D%Kr_V78Cn4Ho+JJcXT`o12m`;!(`(XRa^v@<~E>p3fEk z?01}@I;n1cL+aVS`QnT=Zz+N055d>063o^ldHNpm0SU0|$i+jC+RADRz{SE^w@5&#%Mu?z2a z-ArH{-)?>Z-I}s;(osT_v*fy~pN6x`65u5ygKf(5>j$EwipEn?-_vYAbOo7qK&Q2e zX7?dm;PgBx)ii^pyz&eJ9AocU%zcZ-Yoo+4tHynHltN!EXfVHB+}0llMqu7EpFb~r zB~%g!2kC)9{{9!ZxVR)_ww3w*6VM^#yScU<+=}Rx;KU`Q3?7d95VjC>$SeeQY|rB? z)|1&aU^x|Kx4C6(?HoUSi?^W*psfRt9YUY}=dIz7u-rvSyUE{M5FFoqFNPh%FuwiJ z&K;PWldv?nu!IoZFA-BcKVXO8fZ(kA;ll??7?jKFFcaJ_-aB*apA8J%%x}>?v_GLM z^6H|bT0;!cT(-V$dU1g3*x#>P|3Bwbe?M+#mT?y+Q2Czyxr$i5E`V!xpQl`1n(0Ii zI!2utuW9$7dwdVI1p0G+o05i209blXgO#FUy%vzVp@zP6`?)xK(XCJ(Gx9MG@cLLl z0HuoG)fb)W-}X}=)Nr@4AXH>3tS2-8|8A0R_7=7&8-(p!TUjVKfkO*xAcYtXzJGp} zpALP_-aVW7Ws~TKj~~YoI1T1{6^)p3QQ!2HdUbXs%5!f33B_&xt@1VP5G<1>;OfIR z9i~|C0cjfoD=#c8lI7fvA{uWhBhvBNZU1Wo2aEAmlW#`kgmc2h0c!;5Iei zYxN~LEsd3%5n*;P_it|28!IoUxO8E@${|)tr3tdPC@n2jtVd{cV6Uu3XuD?!Ld~s>|dm9`zeLMYb@wo z16z0u;K9Z=lQ8nAzYDS6Wk`cy7*M|2H-i=tIOC8uxywc!dEFl$WFu5+`?b-q*x$b& zAnlz)evRKM&fdHEHczj)aO&LB zR8L}XFlD-O_ItBeXmgn#YYhOrcU&A6RH#*8fHts%seW59woX@-zalAVJKdWM#udtt z7K@3Cx58eV3a1#W{HA{BxxnShHgv>~K@#yt5N&uMXQV#J=xf$}-tnZ^Zez28o`8@L zK%q&XI0SU^&c^WTz_b+@xB{o7O18H7Xm7IBCY@W}5#D+WdqW!%2LAluL5JDJ{L|lI z@}=@SE)r4jhX19(K06LK3Y0myh2_!62~ok7H=wEA^W!RFAd&F9aLY~EHMFUShZG+l z59iXwE90qZ^kVd55W_ogjNx%G{>$F6P(mQ$a}Awq+6HjhPVqbBHS$idLb_DYZ3-Yf zUa-oZN09aL)XYGvNnf!7fOn2$19<}iMGiRdQjjJ_aW!5&e^0zUCx_E{ZK0EQg&4b*A^7{UD=<;G z^%|)wdh+GLm!9V62&I|3l;Aey~VgzqTLxY^OAW{>7hG0qV z_^v0`a7?aRWEqd;s$GKAWfkwg9tL|Xl95Aj+1_4t{`KEO;4JuyaK^+ExEKcD5`XHN zWnAWjVo-fNu+v0<-R|$hH+Kiuvg@RJ6!Jkt4+8sNgu^(KGoTrO77e}j(MZ>F>=?M{ zEd-Q7a2p5?TA%!yEsV%sz<#?72uBU;=G$ls9x&N+fbyG>r2WR)5@>wfLGO_f=P+A+ z5-9H^+9B+_bqx*eNV)|~(THnH0~C*pt1kJ6hfmq90XuYlwuUY>D@*$M^XKR_a2s{7 zR`fvMVM6TA9l(`50bj3$FRtryC?8Dg=+Iir&COL%Rwfo4Km-0lezRE+<-3ejM$kDR z^MwmUODb3iW&x8Vn>&f%Z;qz9UH%af8D(&K**ybrh=m5x+qcJvvA&DdZ^FaVXPjpo z9pEivahYX{Z((l1{vYFJ;qUKHIwprQ1HA`W&uCu+JGb=wU!5%+jEtf?b|26u7=s@} z@qCN;Wy|3^ut6^uFPJc2gy4bvye@u(K%)<2955(M-Ttxg-n;D@j#C7bW02y1-^lJc zK*~=5o3ah+ASo?U00A-&1Q4RQ0L-1_=;*WeLL`T5uGkDXW^LQCo9H;O8iC{@*A))TlE5-U=Q@hY9^67~72idXPmz0^ z|F($MI3zQqtqRHRcVuX!&d<+Z=_~tKQ&Z5@Igo=Qg^rHS%CE?cG+4Fjk*@1SSOW75 z@B`X=%qqZ2*OnAIU;)R#&f~_xu9|m8%XuN7e5q_^tj}(s9ZJD&nqPB}JGA~;{^YT%`c9e091N;!MI9fz`D3IT&%!EGSg? zo@-EAAMtZwW4)7S;Op0<_EaSn9-huk3WAeUDXsIoOc@+LPcaN)#}E+5o2Nf1w~<#I zrXmp4I)SEBa!k#@Ao+?(H(36b$|XiPq*z*6tuc@>g6|pVI;gs?|B3{LeL-z7hqg5s zd`zSl^tV7T9?+d^VNfeDMFnsY3LE7a)}az_xxu#7xEZnx=h~VCrfb)_%yOteH3;mncEPL-R0KCIDVNPrhSz>Bbb z4jpSFz6mVVOtbQ35hxW`fbiOLjDQsQFSB@V$Mx4m32yDl69@>f3lQ2S;C1PRa$qxyew>0T47hiRRdYa_#DSTcQA->SRY=xD-aLSVf2`rz zU`QB;cZ|GONofrvA00a=_`ccoAe%#|Kv0<3;S&TPuDJ&QCZp!9z zn7@CUDHSQ_S@|9U%3z3!i+o052g)%ERnl@+K(s>06~CGy)EnlI0qe(iPv4>c#H@rv z4g+A9!z-Y>0KTf7)A@AH^KTG(9~hu=P{xgiW3B@LXo`0-s|vb>%#v@pnR*BF>MtEY zeug{)PPy!5^DVDwCy;|MtR719S2F0zFXXq{Q#-52tPMLeG*t8S-=GLB5c1Qyqz~%C z4#*FZVRKLT!pKK&HQ9jX7jW3l@;Ab&Lufb;zHAaUbr32`bjd^h6<~Q+S6eFs{bmwPxDU;z4EL_9OI$N z|2vAi^8{djV&6U8vl>AGib~+7Iqlu_&&aPF2RIo9fOu$C0xx(i!xeH70_?hoK?2$z z&4`!|o1W23e_0vzCJ-8KR%qh{RPAo%4nHbdl!gt)Zr)y?UNB4sdzbha}Hu z)PhS5STw)N4FRRNa>UYLVZ6O~$mhCKeVSq_a>|B1M*b;d9~2TAUT%E=kT2l405M4*6RV=xe~1%K2@dg& zhFO6#?&wZJI*EpDmdnS)1d0q>zZBI&DR$6 zD9bc6p_CgcI6HB)V{!UjQP+zn?ySy8@sONV1_11cti-?R7s(F->$NQ5Rn5 ziz?y$0|Ji!c(FOQZspK75~6GGzi0-0Iz#}PoHYKpw$sf8oEzj3^$C&;;`rtww0;2) zVhVlQ-M69=6r%u>h>nL2k%M9je)&9!Ru2~i@awHScHa?2`GBV81lC|5Jc~n| zau~Opwx#BgGw*IXmv4UBRqJENab73=0C~0p=EzF$U)<*7c;WY2ON62#*mI+=X;P>_ zOMukMcMrU%l|dvDZ40`f(TN<^pwhT$Z*O0A^zVaX){en}Xhly!=7Kiem3x(L<{oEo zP`De}!J8b9zjH3AX6Q}zgE#GB9y`_x`=-f{PF#Y$9%{O0$Zj3JnmkI^^5-`u`|Q*M zMmB0@mn@#_XfwZO%2mxT+iIzs(LxV6DEWABeUR>LWu)eB`o z@SUZ1NA=^%wbh}O$JG31n9Z;LbP%8&wTQIJ{B_M^^-efXktG7c`2kQ8w4NlC52nVm|mD z4reSM^@3H+gZ@N7D~v-sA-{+A`0;pXGf5R$j>wifEkAJ4ZT|R*D%NtOjtOEvvvO7x ztnFCn$yFQs(&BC5shBm2^EmYy)1aJy8&Y!^4Y!+zdW;2rJVmpt7<3_w?WUoDl_VW~ zvkp)11@kVM>?C{d5`R{gF*;d;Q%`z3J3EDRRRvKW>Sk!gfNGT#6J+LwEpaa!`;IS4 zL2@G?AONYpT(WFDw?RvssDPQgyuZNuZ|&n3q=i5LLJ{tSpxf})v9ScUs`g|#QBzY> zRITvw@gXIwk`+|%LA5*%qe!mh2=J)(Of4{<2Ge@TDCd4Oq=FSPNTfrFq!hwqtv|UD~r!#;Mj83R*N2gliy&#W59kbe=9yR1M?d0ri1L;|?^AXEF zhT&hoA=Up-S^@dFJWPw@Cr+Hm?Q0%Ik~mO2KtPE*D~uHsG#C=eHT`%X*Hp=z<_iW> zcMf;!hWw{P$$Codc9W_cH$>6R3{I&oe;(9bW7NNV=U*e<5)WdRB=$u06qu3t5#`RSd2j>aS1sPN zAM=)abMAIq)QugxcJJ;^r#NsxK8Rg|+E#J8n;;JRXeVl639kPZhUp*!6LM-X1R9k& zx(z9C9}}n_hJIjSqO-BdMcGk_m3oDX#(7bnwmgGY>!06l zjMVyx!dB7_2TN?J{1qomkMtmRdSz=UM)h~l-R&CAP>KzN-&IvWWRmz%PcG_MVL=R zPJOW4A*&Cvo%|XNGBab4pG${cF_F*JmE5&!7fKz*zh=i~XjQP+KFis)|5y`E8Z#;{~qSo*L)IeKR7(2RRD`IQ5tFqPqy7R)79 zQQEc!)!jHm-aZ#Q*M8E}!(a_&G?N|e#wJymgm)*)(kI_0%T0&B`$>H^8JE{n+;^r{ z86z9VqLiVGPlNyO&7FenrUS+#4OkGpDpV(QRZ8p_C-Zf^bs9pnP5VmJ0v`r8PjsZ= z{ug2I0TpGoZH<WJKnQ);t#i9K- zs3if#&oRoOTjdI|R|JKHg|UZpM7j-JdLeEGhyQ|`i2w~u0|MJ|Yfz!+RhY*BLbs_+cjh26E7ZkMl?J~QyJjFYHS(NCeeMm8{ z7B#fpxa%K~_tD0YvggzLn=C%}H}y71MGEHiM_jJDA}M=${rl!-!|Ca1?>l!~pD$X& z5fZQI^n2$tZU=W!&vsM}=9ixwde3;GvgIzDG>PyUIONr7W*T4wkFkorol?`ngh-=4 z)3R_ac#&hsH#{2VUhEaWeiTqBc%OOhld~S4GrIm0(#TYj& zU@MG}xi6r!`Se~5$m+iu%@gGDW%tN@z&QJ{tl-9o0#Kg1Tp=q`!|ztySQAf0ToI8VR$tyF20I$B!-rA(w*rj2C|J z+UM9;zBMXh&%NiVE{<^qvZ>`I%Egh=Un?RMu;T)absW0SR4|=<{?C50$m942Q@fl; zIxCMFx{t-AU#NX_hgD2UroFM-0`Bsm6Fi!DvRbjKY>~1qC%S8rL$kB9Q4($K7r{YL z`Iy;1%I>}ZX;FT3oI{{G}Oa=#TWU%uQ`_cEc;rQ_z~=-|iUkI&zi_s9bm z$aa@qR#(EjN}a$O{B|-7epJ?KSaRL>tl-j$hmQSC4z^=Q_yJ=}r@Y+fl7wgXawh+U z3+YX{+*bY4-J?DJ0L6JzCsJzZzj|Zbr|8Q!Y>h6guP^Ooo!CmD?L45y`SL?R>sM)^ z{IU0oAyDtMRBlyYN*ROZ|9{lbw@hUqz0n3g%XYb+JYsG8wVnZneb9{Rl zETBV6&`wT2m0{anKEbYyXR|@zq&lj|k89*7A0QCDWzBlet?>jy3e^hx8vFO-^tjJV zIaW{1ydE3)`XlL|~Nn7%&fP1z>`$sxH9W*WpsRqPdzjp0IJh^w-9ta5ptw@~v zsNnAIUhKc)#vaRtOU>W8D!;M-`vL?Mma;mJy88KO#%n72`}<3|{7M8wZMDX>j&>;k zburPK*Y2*4x=3q!atv&bN1WSs_ti*hL5M3vivwQ2rIQcq!h{2xG7VbrFo2BNv0Fg3sR>m`5lxh`bI0&vzYAS8$;kw8W1Y$O zgO$j<=I~yGuER%;#5<345fl!9wF97ggOJ|uhf&Gc0fYr5+1I|mNuSiVI73{Wd5ty% zC6R{MA0O5tOLbn3mR5_Bv2k>)L6M^QKw$w?wxY^P4Inu5doCiRZI@-z*=jS7eLMbsqS)iQl0A#NmDvmPshFm`g zc379?$(I7DqWRB14}SGEp5N$|Ah%AKE)q0?C`9{(XA#SChkEk=!9C~#( z247+k6if5pcQ>}0y#Qv+A2yL@L=Ib?HDsnr*>}$Pgj@c+?#w*Z?RRKcn{4gnxRmtlG8Z z8^ldYH{IPTNh9YLVHhd&=3=%jh`XwhQf6TIu@mT|=z`Jl{Yqm(nVyFsiZEONnh$W~ z%m!NiWs(}b+V$!24Zy4?uU{*mQc}f{{DdvdzH8E!?lCjH8FS$NTD|ot>RK~+@W>GmaOS#-svv{WF7krAtACF2qj?TkGpZx^;^c))d$peVgU(L?I`QNZ^-6JAVV)axvTsGC6l$l$N-`BjtEXzG9 z%$M-Zs-JGl%v7w5Rc_2yKI|RlmZ0ru9!H_=L;AQjI64}dpP#>d$Bw{0#f1w?k2Sxr z@33yqi#CkE*EoTj!_W+Wey@B|x(|&rJf(9uxuFi>@C!%$R_F zLeg!VLvLMNTe!;Y=vm>GVl7+};69-ezI(rJ&dK7ib1S2F7odB;1%UIPQSIF zi^8?=dd3pkQ!ciqJr1$4JK~;JGE-02dx zMpcYbZMY?`XB^g5h~4S8ajp5h^P9>OtEy<|A*d2*e*L*c!%5wj#R8Qvwyt+FGFbh5 zl0_D9t$+B3@}*EUL%1z!x2xRWC3uR*xM;h5FW=a>lA4^;%=g?}jk1+`lmNx89&^LR z$%nB{_WPTeDiyw;U*K4{=;3(v{qfpr$Q_&I4aP?eG5yeMNfc#Z_p<+F6@@F;yr^$( z)Zd8mnBvYI^3iH=v(p~{5j7wH@t96 z?yL)Abq8v>}qxvT~)>3i@so0E9$}Kk+R5Y{jSSj5L>w zOq7+M%kJ5l>i0J({;FcygY4D%!T)>S1oTH>BY~oB#nnK>&>$<3XCieAZ!zyg&v}|- zzd>yE#~5W6vKiVgBAydRFL#k`j8~0I2HiE6EF6Wh27rvdnys|=ws2|IZI-g1RlH(B zf|qwvOt%j&rfmY`CrMb^vF}()`@pXdKJ7hwIXF_w3){=Hf_7u#bGii*qJLN+;=Tsz zlE@QkHeLzHNt#sZO{9{*VW+;>fBwC)oQdf;*p=^%6}9qned83a+^4(DM$9{-J-#(3 zyN>;cTT1y>Kvo;DU`jN`+R>5V)6^2`2?9Dn!QI!t4q(V(#TH`B3y&Hrzm3Y*uV0IU zcmwf6{+F!Q35^ehY+E!_G~2vEHPB9?UcQV*P`lx$OcE}`ew+6MO^=t@xt8Cvjg8yR zEl#6Q>pO-QGhPj~_l7FA;>HVYug^<4Tp!%Lc{8jR z<?W` z0{}(~t3bR+HwusDFGbQ0_Vx8mF)n!mK7yZ%i>qyN?b@{@f7|@{@UW2GZnuq`upvQkQrjf@rfPYB^=5pQTZW3V})Pwo7f+THCpeYxAt$6!M{Y9v-A=y$0 z#oPq@bv5N?lXwiIMBpi^Pur8pJ1}_k#T%h2)JEpWjjMe#?K%VMkTj96N*WvW0RYqi z6B^}kM@1-Mp9FMLo*uZi9J2bU(sWRt9WQGPDnYF!k$<6=X@lm6sK!C$E-frE*{($; zCCcFA0=TA7#}g8id}E}^cpyb7j_yS)8#h8TlOhx;S!bAQ`X|^AIo7J8ctZ}Qp$4kL zmI2)B0>h?`O%$|KjeV9eDAdn$)ghPzHCyY~uMd%j$%FNE#T}Dsq+Ni#82RWSAr(!_ z?`O8>fL7EX+-F_ULQHD{K=oU-;B;m$DZ`Rw+bLQ`pOICP9zOiX)u!W!zE>>Ch40c zXnh?QDU3Y_)F?%dfX}j=N8<6|cN8u52emG;|KxBMri3@yqCL|BgQ69Jc{Pq~+qR7y zRs4-n`Wgr)$vHp0fAu#f13<)sYRvjQ!@PVF8`HclEe>Id{9Pwt>Sst;j|sQ<@5`fW zB(?&j`1$!A-R%6Orbau%vXOy*MaOjRa|jh!g;Vz@48df=FUG(~z!4aMI)PpZUw$Ut z7mx>tRSWxu2u-MmFWvY$GBEHF&+jR2;Q4GhMJNocxbZF1Z`Vk5-!V(h{Dr^N?-8?!eJGJ9rPQ@d#+AE^OG_OodiWIdOp2=Ku@Ffzv^R2;=!5mv-oR zE?o$E0V~Nv${zyi5R|p@-ARF`Nbod@1-rIVR^k^IDa^rK{kjz6+e9}ahz-Qo=wch6 ztc4D<;$1TNVS-xFAdSygTfv7!w+k>mv8&c_`0Sbo;;p0b(kXRe|U}f}c#l>&XH%y1$UOYGvhEMpBaB z+wD7IHt^HiqeGQKLqY`n|5_b1J$)*CZ^h!sJ@t`Oxw$G7rH@RUhhI?WRPYEdxODJ; zGfnG3)gKt)M5Gmp=PJU`LvTR5Thb!&J(vc3MN7?FD2Q;exysQ8=DI63mN7G z$k>I87741URfHK$O-?2*-@w;ktE%`JJt#)(hS?5f0cWSsKt#w=Y@c%oj|>P(-nE6z z&CTSqCQ1mG*rcUDQ~p8)6J8tO&$sT^etBIB1Ax z(5(t(5D`-w&p*|O^%;R{=)H_30`JKCEmm)3`f8G+wQ!*KRWu_rB_Iv0&Ny5nGt1hd z9Vd-T2Z!ro)DVnE&rwyJ(>97x3hwy@-Bm3S7X7+s()L|J+(qMv`H9#ay2xoM2$-8v zr$8TuAl-uoj(|83%&okUADt^EBuFG~KkWThuLcXutsIgDS@krQa|)+D3RdGvyIT)ZI8G4i!`Q9w*@zy5%GT?`Zl8cL%9NJ<`Z?!x9v{&;@##^Ubb)!b z!1-SnO0v4U7tt3Ex;3w)Z#?$*68gFuYm~L(WhMukU5zX;I?qzf_3(iz!xBC+0D$Ibrg`WJn5q@!RgMtPJhzvA-Mt4|a~$;?eLe6B$1%U(h~34-r7$NQT1N*08A7~n=`V1j z2};3yWLFQ}hwP#SorM@IakOB9Ej}Y_W0~NOUnM!Pz5g;5s85uon$E;Yf^kRLE0%Ri$&{tWV?c~cQy>X}x zTZ}zb4#5G%@(;J>tJp}pAc~0iBG4w_Tk&K2J9Ixv4kc*}@{2P%T6)TD@sZ&#?s-_T zW$V^N48D&CfEB3+N$_l|q|L5<`%WUU<7!tSy_1=??ddLkPzvLL_K3VO0>Sq@zFrp; zfyVY~|1a_hfGbJ?&>7pG%PC6Pw%sAZ8*!RqJA&=6LcK`#>b?q>(R^;&hqBH)U zWVCu&t4o(&A=#YG5E7BMn%z_ly`#-BOw6VC?`K#kThuEJwJX1v&e%xNTm5jSpHk5O zCwH91A8~Wki8&q%7B1(OMbuC*0npHC@uxD9pid{_JH`P|G0E!)fHzBXxsY1>ZPipC zcnT?;xr{!%WR2cYyIt!i^c*n23_z*8eZ@@yFK*oh+O`$*|n?dBO|? zDN)tYQl^S3!MJC?5CqF)z(b3%j z;kgRNUvYJIx#Mtnb8rQu?YmT@^ zUqgU{qFrxE?L7Bss9mnqiMjaG2&8vzt*7eyQ%%ZfD0}Ro#3E&O<+a-kr)Ih6_lyH} zCj#A`gFp&_;W_#AkTDvDhHm%U^Srlr>8+DcWT2CPxC81fO72UhHh%YVonxCX{k?WA zqd1tSry;FqrOL$g1TEPB#ZP(CY6?3WdAtzA27F2|59}_J48~P8g!gTfO>QTrh}jy^ zTbrOEA^j}+~%%w_wSDpK0l-+hFPBljX^>hP!cDuznx>IeVA6Q zsvi7a*jpZmKF0|;4?C7qsNulvVYm(4W3m&3P6i{Hg)8UmS78nuIL5nT!*_gRV`p6h zAfn8NA%K?`~U%>ADHon|-+VO!iBbeFFIJI61fR zw~4nK&BFd>6+NpCWV`_YlI$D=hv4jNmz=^M<&mfG{wMJ7aP|breMAlqOB=`m; zs^8gykC4WnBCHIuYlAx$)JZu+)TvE;KtFoe%~jaEMp-_>SR4GH%rM7-x7sC;2!ppB z{A&X*jq6i+H`WMWsuPh{q%jI?H3Q(Se`1>2wq^loi-2iJ8(;i+_a+qqLb-xYhZL`& z$bezOPHtRSp1o!3p#Cg1CZw+mFUDsCK<26T*@7sBB(z9oTD1Q40llrr@gy=}*NOBS02Iwox>VpQIx)!iNk1)~*{Wx#W7R#DO z%emQgRq-NLsNGn@@fk10X!BUu%-`B^qkGkmaIT<>5`n#+0#ze1PRUl}pMLXKq@0^B z>)M`p}+BNM{msy_iOSzBG*4>*$xy#M0djYo09W6{)f z4wNa$MTg`~^fO_%Ha|YPhFpV0yb_4--x?j!X|0>*nu+5X4_ibRN)V3+qq&D} zts_U@eQHE|$~uSik8=5&%i6 z@%ZP;^>Y?ANs-v>9GG)Ak|$qyLknyGG5rA%k-k?fiAS?v!zal;$Hu1qA#!(E?ohe3 zGbC*Z+qPqPuVbhH!*`#)0yo3GbIG&UEl?EeGSs?09pqD2*eq2gF56SbZPCb=JI;4` zr)Sj-Fioc?r>3;vc?xqH2nY`iof3^Br6dY=k%y+h#F|-lX4u2RO-?JZVV{93NPuch zjy&K_=!13_+^Ik=Tp}hH;(5VWC61B$B^SxvJCRcM!{p*spgDB_=Dp;bT6@8{_hk((W|e#C@nK{0D|a4%2yH04NKub6upgsFAK(!q z)drIsf~un%IKZLNmWix2xDpMZ%`B^P5h%Y%fWkM|Ci01#n=?j$%KuwIswPq(0Upr+ zdxC`x~8ff#L3xo{sM85=$&_P$qkcnt7&8umM_e3z0MYr>4>8kKe zFrJY+9Do}L5O)4qv~vXbz6-F#=N`+@>56Rh+Y#~xyZZ#$R*5G?(2B;nztF|mIUWa? zRKi!9DsK$JD${}U6OYb$?9`{QtMEfwT4U!k$FsZRoypMv@?AUB{~8BW6B^1Pr)LBV zc%}(%KL8RpUgK!|$>X}IM>dNd8gJ(?d%u#GDbI%Dx&DobY}XHKO2DCW=bnO)vqwRdU>Qlcgfl=>ilYPXEk0g(-&hlBcKZ4i`bq>D1%ka-a+yb6(Nm*?f4Q^^U+_ymH{rZYLu!h8tYD`Jv`8sod?0Pkloh7n9?Fcm|t~8$sCIwRkfqwBRzxe zezhR4;p{z`ZvZQ1mP1*?n&qP1x7YidSNTcV2W;N!V1CPb!Ah&!X`ZTToU681%@1Iq z$(4vcZ0oEXwUoYC(BM8*EwF&|8I(}HO?T@ketQP{XJu5nAtn_eS&n-nqXKl+P&n}t8tjeSiXd3Q96H)aP&|J)OpV(s;=!>->dnsd;1r3J z;#P?gP>JM=l-`L`>^76jnzzTzSuCx$r4b? zS1hmPUO)-BY^;;zK9z;iB?0LA?c~~7$?vtjP6!m0CkeK-qP)r*A>vc^Lo?zykN=sK$<>VN@ z2Wf>qAohKH2Y-c3PL{5t{8M2f!n@vY`?c%Wi=o}3Qt(F^^~MeRAEu9}(%=P@`@E2F z?JmuBz^*`$Myph~oc{7v!Qm2t<0FAxHK7;wm}=js;@gI;oaCdb_WByWn<`9~zOyN_wNcU9BM?*q{>wpdXQd+*DZqEOlXr%F&4g0dDfp-xXTVBg!ouT=R2 zO@us|+0x_(_5WW#c<9FJdr9Rwsq|AB^v};?UMs|zs6Y|*q#x+`Gj8Se4(~fbCz66- zR@h@1^s_A(EQ<4J$j^312YZU&_*&B3+&hVpW|fTwv7?Q zOH=U7G)#_4JA11tCo|}>RqX=K!!6N9f8MO`j1D8+g7NkiZ$uj3YbCh4nJ=-=MO2YORbGrTEL0TsRdPLR3t4wkWC`riQ^S{%NXL2l-Y44&%Dg$E-WuD)Or?gXQ@9VQs zT8{tkgVCNrFDct>Rf1nLA21ucjM% z1UvKwV2v6Y?&}IsIB)IFnVU9@i#uYxyc(D!uU;*)`798pEo?Sw$*acBeCbygx08!> zvMNR4=l|B}RnX}-bY6{~n!8&Z+ueuS0-_C5LKai3Mdy`7d=NU-r>*;rcYymi@AP-~ z8CgXP%BCyW{r600^o}v9h`pgOJv~ti#gp;j2wlG3ud8P7!CIfq+^g6HV3_jZ*La_4 zzlajR@t=^$`OaG0u)RB3dH;S8m^_q7v7_tJ7ZbeL@nN6s&Kdh&RdtB#bj=4MGo;`v&d zd2>?(pqX^9LVp)bC^8>G*OL0k=Zp0Z~GJk7?~1;;`wi@ zf27WRQZlc1fxsR;`xueM9cE?A1O7D0fmk&?FWEvN9BAVAffT61&!D0DgP zWF@PdZ4wuD+4pqJJ}_^jL{2!u;%PD7vmB-ngd=jD0#VO~GpA;&1Tz?QB^0BV&i!U8 zliZ|wmi7MS%lt?NTST$e0-_M_2E39&0a*GSxQ?{8fw~F>!%Qd5%cvnqUxg;#hZs>$ zpFWMJcRt`Vtc(1Ris!O|m}2zY40MY1b?EXoCz1rlCX|id&t^P6iYI?6xFpH!Wv^Mu z8G#L}F*OfwOh6VF!Fa@BVLv`>rj>Z|I6i|m_?^I*ypE4U_#{6E2B;zJ^9ds0pt_%C zj5m|iP+M@iWiTVw(Lqb0>)mJ>b;UARYP}5)%zx8oj}BI>WTnfx&{K>f_1F4k4Z4Xj zX59^&u)D`5CJ4z}8#BKpZkhBObhhm0-F{%aPdYW}$SSjDIa_{>Bk?Cc-P^E`qQ@E; zrLg>+;T&H>;gpD@y|cUYzO^$p#-ctTnVnPc>x2n?N#}k2`S+u|9wBan>-UVsO9GIK zbu)bA2ZHE7TLaT%C<=Uq1L@Yh8CK1ovBeX9HtC?(&tNGGg%QSShofT1-ciT2cDQQIYpHrhT2>+Ii0pa2z|C&Zh z1(Z4T56TfhSEMaW;T$=0=@k7OaM5wJJPen@IS z`I*VeT5fq!EO)Swe)!*W)h&g$>G^4sq;fv3W9?74rhY%Ro61_x%G+8YT>Z?&s-?46 zaBS=ou7sz`?7>GjWm)jHl_~(&A_)Gor?HSR8(?qr5F3n=Gu7SqLhHCRzbBAs4hV z%hpTIJu}av8`o`L#UUhNWqF_?e>wmuYvH08F;lu}l)p(?fPuS?jpN?J#Y^seOf^<+ z>s(CXl0TsK>JF*5NVNzZn(*dCG?u#f%?rI%c!t$k_UBDQ=RftUL_a_2gVld6jc85O zp*80*tnw9WQh6)SlM=e{1=T4<)uYtIG}v$3ePXgcH#g?=8tjy-Sf)P`DcLzMlTKK`=RJw;&q2Y3N+S^3?2mze-HaG?icwP$uZ00g5mSFV&kf0;kI9hd+HC^ng0!9s!~z5Mn)=dP$7?OT0p z!K*!aR!4}vik3!PW|*t+q@l49{!9>@_S2J2WWYv|`aD+f`|0Hs76FvX-8t4CgL0wn zWz4k|1k+zg|6TFDS8&yqkf7j&6u(yt$!rRz99U0OsMm;Vf~cCnLVBg?H*YQ@%|#$_ za4ICwZ*%*A_1znc4WiS2dF6JwkCNs4N_@Cs$ z?eAc$jOJ0j{}dD*IX?(|+Q^9{_$i`(4z2?GXWz*<46ve_v}bU4!+aoQ*SxzK8~t7G z)Sp;O;a$|ccZVS7uvf`VZ*K;elMyHa-Qc+}@Q?;S68sgd;0hM!d(FMa08FdF*di#a zyPEWWj)wlVCPXP%w7w+BpYMd-nac_`Mptwd3G%uOcJ1MB?zvRsx%8z?45|-t`*_XF z6xKF}QA=A6YT%VCzd3LANBHkKYwa+9k|>@>;a{qHd;`qclM>qsc2n)e`xO7jZLmd~ zz3OJBKKl=HzJ8|@!kQ~4%CK{)yTDU9IJBdF|7P(EVdBQ#Q(3Fo`WpJHOIA9q;MDpb zqGs9al0Mw%Na~$DSChz$j3ReWant(r-x>YQ(K7ZF>Itv;%}Hq~0NIQ(RmuTr$H76u zGtldXvT_KOnTLv+*ua2x^)K4=BqK~@KEySP8s9&~@5x0@rtNUYaY9QXA`xj;pd_Ho z#MVLQ*8f^n?EXA+>Q_{_Li${4YJnpZv+~YLlc#k>fn^{HRqAb1O~574)cu%bPo)R( zVW`wgN`ShQDGP5qGS5%W1oca|p zJl4*Tp9xb%cKy?D z*S}$!2B#+~f4bHI(%|Z}Y8GhSn!F=Rx4?R9| z?{=2bhPZ=EQ!OVPctvSh?)t^|07|WBr`2w4w9Onn?XsemAiv_gLQXh<^V3N8A!mR%pO^jH?6LiMU_eI)AGT^lkFtSL;!3kufg+5iwMU?$3%~SG<_+3^1`cPGyNZCS``L+3M%&O25S%1H;LI zgJp96{%dlxUedPILGkA=2ZJ&SO$K=&2vWQ`{6eG&0_;#vI()Vbidug@RW}Ts=d}Zw zMmoRun0wx^`{pHX79egBZa0=vm3U9wpX97#|oarP9)B8 z^szOkmYJl&*T`V{?@g1f3@dRVRaTIXVo8gVKuKw}|{hQCa13EJz--a6JPCkjh>^_o(TV6&tBT zZTB8ptXOaDAfd4E+BL<%oyx7nimNtLkNPSZdRXwpuu1Suzt#{v-TwSE!pXW)6P=LO zp6#Aq12AJj@z%tE7}L-IfddB*wsv;L0Tq0B^X5%$>)da6%``n#bM{Duyl`1G$LO*L z3Mqe@X!=)i&y1IF4OhlSj7b_gD|XeW#h(gY@#WSmG>eR~W(D7#g36$$XMSUPo;|H& zvR-}fUa?i%oz~Ls`2B76+d4I<^j(%^GMA`5Jblrim$E9R*30Gf-i(0qqRg? zBUTCYn{m-M@KSUUFT5pu1C^*pf4~6<3BP1xxA4^qd$#=H8)$dsXM&oXjyg}-$0;(S zq@vbkYfD}F*wCC+EwDx0Z~JySvZ7ff-LkQxV{wakX(v;tKMFh2lEL9Ra2XoF6z+U~ zFEzV{3Uz<4voQ_&2=@Hl?HEIM|0I2}p-UV29s1L5DWtfJn_>RV?+mN@5Z95G%LAmK zk&^u?iZ-p;e&DsUj3ptFmo3{}xkKt_Pby%X_=S_;UK%d`O6>eLIW_uwl;2R!scV7f zzQ3IdjP3dwbV!pCTudE*_(0D?mDw3ce3|kV;lm>}=tf~^8{V_GbJ||36Vh2XJQ})D zFn|#os+;C#;ClS$p#PPtF{|0=I6X$UR&4OBT#NTTH0Qqjh)Pil_pln5hJCV}z{H6A zn)4WCpkwt`zl{RBN28tIXTIdTyL?3;3jA>KqSCKAIj_XecvZbj2;#5FotXGd4d&nJ za*xuo%ocB41>aq{1t<;x@MG`I1K5oc=OPY$Rj#w5M(q3C$gdXuKYX@|>~Fur3Mk*% z(NlTU{ENDcc;iXxP1ev%M=L2A{l#+-Vli!(JJZ~Grdv+@y0;tDbDZ_*Kn@&^Xg3~^ zOfN5({TlkkqkUlA&6|FCGfgW`Pki?c2n|KH&;0xL`il)8qzgR)>z($U!C~Art`aRjMEVPz%{y6p1+iXcw}UCLVdsV zS%@j9<`4a&L;Rpg_yj5iO-Yfg1tF4>3+U^}dqWH|POarqB>keJ^Knf3Vo%s`(N5Sh z2`Z>DRMyW{x+i+CH`hhEEFxl}yR-{*X##1 zPMHO~8DZ&fd6TwQZxLK_M*is4H+w`pw}mXF#*Q=)6D7-dT`h}em(ioKMU=E~LBMQIl><;DI`{OIfSdo9kS61Wq|kKfAm8x=x|y`` zpURcJwHd;fcLt~*@-kz#{HXAAP=pPg4OAHZ1r3EH%n!9NacVtA=Gg&(N6KwH4Gfe~ z#xz_x&Jaa0yuf0oR*3lTk}b0zpVGd9NhIUdC6{Ja_dQ&PHj5kpZIWUdiUAQnM}kvK zRHAhK{{qrpk?7bj&C=9a$w)IZnF8S|VIKB8Rb>m!;cW|H1xh<_t+zrN5y7<=Qgq0h zg)%-c0l|Vo%plT_=ZDD9Z5Av5D|^vubVzKB@xdRHMgk%t#Ov$T9NMt%jL-*@ zvSSm^^Yg)r2W`7rm6vNKlB8E#tF_Qm11#V>Slb>!p~QdQoW9W5*hqTCc52QFBNWf7 zBuA?_CtF*O5p%N-f1{SFd;+h7i8$QNDs98l6O4K*<)|oCLDo2x9*6I;E~P2P%AHlb z=vhGP&avVsvDhK=#h&XuKQi++!82m469NzT6M)5Cj(rUx^`y^D#_8uSxA9@s#QCc% z;u6f*GaeFhv^wyzpeQ}KF6UaX_sDzJ6cXoT)W1t zCJWjYBFEg*Q`nj}UANM73tF#CB+rMi)`fgzzSUJBPtezEh0cN{VqO-mk0OSzeE3)MY=>ay zkpLN)zO*e%mnySKk+M=w&V-F+8-0ofCgQ+=!VxVgSswxC7mKYY&+C=p#? z`p<(j>xbY@Fov9w;{~|EmCC?X+8$FEi?1(%MQ*)_;Otj6&6}vrm?X>qohrH}H|p zUrX*yG&PAgZeu6iL1)^!^s+RSq7BUJPlU@G#-C6jlF2p-eKUgFASMd0we^7bj}rTi z(^gj2I51vMss7tlpU$-uQWUuzzI1%J>UGy;H2kk@$XsZCNxOw($Da)LXOLsd-<+#B zEc25N-jE)hnmm_tby29$y%XPn*yX4FltO|q*#})y*t@mW3l!qdrV5nUIILB0WlYUP ze*wwX=$45YT*pbn2f0}QNLlJ}4j1$PO9L34NS?Fvd1{(`dG7M&{g*zLJqE8h-6o!O zb~U6PEe@^tp`xFKJ;ao}L*$J%-6n*)_si)kia$S|0hDas10B>3KpD;eorjMV2y3FXPgS&aTNZ)1pcZ^O|xy`x9{m z)JN3q(6VoDB%Ngz^(Io1n55NJtHG!FfjpJ)BOSNa_hNEC4Tc@kwSmDgeC1RBLu?A0 zpW7!eNwEltm%-5%=70E9nqNaB+PG-Xb{wU9QsAOMrWEU=`$6bndd`we)U`Zd@?x}Q)3u9{!5~A=R(DAzbwQgdk`S5uc9UCnM9R0 z<;JYzo)z|l2tVwn3yX~sI5aKv^Al_Y-|GjWijzOD?s!tfj#+@^`tri0@!f5cv1Sk2 z=fuTP)a~G}t}c!EYF?iva?jQLmJZ|wCs!H zD-=EZ;Q^U=gyT-~9Wm9wdCmuqRk_goD4xUhFWxk86_qIS85-2R`t(Cx@KpE+IOT*j zaq4I3X(o3&yYzDF=v{2mz!X1Qu~HPy1W(=F*84JYd7a@Zj>bwTo;%ST~WThQPl_K^ktI6@AL(nD`c%?%pJ}78m z{$E%4rQPa|t&=wPhT9YL5YKPdUJ3AP1Y}&Iz&#Nv0uhFIsidr1JV6_#q0XOO*RHDL z!2j&OUFD~S1BuHMENTe`Qh})+?fH&j0q_HVz@|h0MvYI0yojp;nUV(&5d3hz)eGmb z)!g2^4#9FwB;dfFLJy_WE>f%BC_DX0$iLji?4U82O6lkI!02+jJLmr0ItlG4j zwBwVO3mCTHW!-^c4SP(YkzeCcB9Atp5h_2-4eK}C2#kJ(HF`T{_lttL{Z!VEek&=d z`j>$}Tg4jh=TGsxH0O>{^Ds4>4b$pE20?k_#c3;Rl7-p!{nOSlIciwhtAl34RLN{5 z%Ikxm^BwqLI=yWpkrK$ZDsCm+!OvF48#X0?X@bP&&ZQZ;%{I&L6R&J3JG7vxA{(G>^ zy*{16hyh=Bkt0w#V`?3BcZn$5eVl1Qy>o4S7aEbePi61fwv$5NPO30a*J;ECKlc0g zQ~13}1MKRp`yK!rlS$mFRqcd~0F8vv1`onOB*u{GdZbrzaU=}=gjocT)W+=P{_L5X zYj=pd{vNoIG*^poa!cD6i`S~$J7W{u*BEnoQ4DG8WYwb)29J?x%Oh!3-K8oD^K9Db ztM{U9o8{uDYl|u{@JppQu+YpH@-s}>dhf~9-C3J2#xdCAzmOT^XovfVzh5_!SspE) z43}(cSwxMc8~Qoy(R;r*(g=n_U5wv>fs#YakTmt+@L(hxxpNuAp>LAKE8hBG_F=zn zWs~A^8@vVrqv+vZEB(etvOQ`{dXDjv#%0B(rnc+ie4Gkd*_Szg4by<&nT!lwkB?8< z#+^RM%a)qEK*9V7;ocKZz6hHjo@n%6MV>61BHlJ|LrMeF|LK%-DcbW#s(pgE4IS(Q zdS#j9Tz&-s5`G!<5q(BD%F&^~>#LO0Y`&VqViLZ0@2Ga-ZWiUs3khRZWGL+r)Z~8Z zQ5?YAN|L3(=R<^cCQff0SICq97qv5w%gu83n^LqlY~X&TYg|zN>{JXR2IUaF#z5?@ zMz%_t*w(MDW~Y-2uCn#CemSai&oh9gMBb z9Mw9cTwiHE^2!7O0&It@f&vxQ-BBNEv+EA~exd3FQ{t*{8d*fSUog-AES(>Kp@>;# zkKthC0m&9A?=gM3{DsqyN%ie%-v=kjXGGab4Im^xu}{XTMU$yTk0uB}?(z}2{ftFe+`0MYvsvHW2nVd+#RMos*GG%4xB-f6HWqO6vihi|^H_Y7+LebH?AoY}aY z*aeGJdKLA?J2BFp<2$ZmX{Tuo=~hbaXVm z?nSk254V#wIudt65b#Mi=XcYX6#ogkVr7cnhIs)+Im@Qio3!--gMb?>(|`zPhvq8G zIe7X<2D!gYl%|{4;1wzDK;6I>HYR20bh!BStBaS`k47YD1bA-K z{m5M|JiY2|S{P}zI6NTDJXqW+%4|!Xd!wDzua0da-8+#QXU3%9nOH%{a){>Zk!s01 z!&FWZ6fus%6EB@TUsi{1*vtpY?=BmEq?{nwoPrAT>idZ92YR-huZRo`5en9Q8DP`C zr#mON-z#VZ3tDf#aCl5*>9EV}($5o!QM2+`Ib^?(vhF4+XVCYh4V>6^ zR$vOm^*=%-?L}p5%*F3SvB$6k)yUHOk=)8s*`lw4JUv}*Bu0w1mQQUv2u)ZdGW<46 zgnf*XqJaTJIKM$!yS*_dm$i5!#g7vtAzl5>i#hEwpa90e$43;!OhL;K!%pr9@?(ei3>sFR&UVjbO-etjXzOoLw|+D;EPM!` z5&a1p*x4;JERfMv&tT zT9{u%L-=8?qEsUn#}!kilh;IdCnwkmzgn9G%)w%+BEGA@opPFLD%Nwug8D_1pHT(F3V z=|!}y?QiS2Lrh}rZO=|UNwuvL@AQ?Y>%OEzcTx(b!M05( z;BSIu$VCkpD0UcwM~O&Brp1t6Zs01?)bZbg4d`w>W!}`#V3@1BS@f-VLvJJh;UnRe z<=Elh-bL#2=mkI4%ssetq7XGjdHH6E((k9=^vRlh_}ezHG$-=Sb?(D2DU>hUpo(n5 z1SXQx6v?%6Bo|_C=u?=_$z-NLxH`!nsvL{363iMCn2y(5uMD0(nD?Lt8~gs-8=s_V zi0a>m!ofN{!~X-{(0%Gy2kWUGuBNp@REmP=i#hudWz?I!>DQd6$l5`2_FXw|y<^v@ zT5DwmmnHhyUX4DCTkU(cLmdMlzcrOF`RkRyy!1Z%(-gmj`!?%{r?VIarCaU2>zJI! z*ILGZmtAlbyKC&2&l4$`Ab3f^%RW(+@O5*gKRR5tsaWqH3WdeLymsEpMcMt>tFOtz z@>7{Zv@4nowjZYV=ZBitgrMEcdBOtrxm2~b((?FCLW0{1?)Xm@tY}*}(ZvrOhn!@4 zJ1RAB#Y#%c_DxM2XL~{BBM_UUjn~&XuFv`+6KndfzQ3|SabV8Ju(!uFuk1_?uw2p8 zZ{2?x4s| zH|-HPEeUj-c0M%CJ(ti>ZFg7;96tPAX~)8L=Y6QDDa z{aUgYn%#34Zegnh&5-3FVZ>`%*3}@jfQz3z9KV0Dv`km8ZrPE9Cg03Av5OiC=nNmC_nHL>b+{ zZdrFNe?qk`uQ8EdO9`AMY_+!Ah{i(OMuG-$onRs@X3|ArfG&ST8KA)27U|=6_}+MZ zl}T-KG+0{Jbcce)aT+^$1$xTe?}^n5U!IV%ITXG}1x|z{!;I$i^P$6a=iB52-%va? zy@_gzR+JcwTYYLa{6#o!>zlbc-A{_kPPM|TZ|U1tP8x{Q;ZoIi{O|LJR%N{e1xOqG z_N2b@c*$;yHwxizh*61%wFAS$nlH}0oA<_3UN3T*b_!I6JaIi~YRt6o&F5czg0L&~ zMN97#)aOIp*jjFp&+(wyWAFaQMB|m6Z`4$UHByH(Ng=EOg)UV!1{cW^6UyL{lgD<&UqORB&cDpO1 z8Dufs19UV+tmE#xe>>zfW%Yl8mH{p`gVv_z^z#RCn~~mu>|S_KRpSLNqG~3s#%MSC z6N}$&b2!pUeSe2>ltYnUTS?YM3BJSHlzShUUmaC=ox8s^h}W@CJMK(dPmY5Mlpu7@ zn14Q29Msul`$=>qh5DF~83ev!{2&=lP#m#085DY|veLS|ygW2D-5r}(1pI#PD}H*W zB;asyxR!37Uc7Gg53K-JCvj)isnPmlFP3FLu$d*-na$ZHw@(e;7;g4poU&4E%u+-* zyGK&C@Y5rA1LDmo6xw>?A@~q3kw82I0C=BkW4KmapVJ*UTanjUBag3QJ@Z6&vQIj0 z{~}P;O*wb=crU)0_W)`eoXoi{+!1BR7S_iHV*_YDe~_B6xTWAfITgY42Zzz@ZuHy! z*7C`U)q6@ndnahCu$-%i;z=#kJ*%v1-jsVyt-^S5HkO zD}IPQ=1;g`&I}8F{hVFIT>vXxNr;J|5GRGwM_A#{vV#=z`?tI(ld*dV84 zo_to&6*CKf2k%!mm=Dz>@BCg`@5*wq{dTCbwzE$NG<5Dy?;yd5TGJ8p)YAL4PJm}E zSIARD$7XlWy4mloz_;GTUG?Z0M{_u|&iRW&0mw@~yrQyJ(sJuvr<+GlhM6sj+IL*a zT7gf8wpscF^8e2PYnj3G!-rn15PGM8i(79txdk6?O^yJH)S5MGc52Nnw4P2+PG^lFhE1ZYLgIY5eNA%2>T=_fV^1?L zKGsZ*sw_TiI~9NUut4& z4Di}Ua(D(fwGI4XHE-`%lKY>5%y zy6g>?M$E-3+IB`$RU*ReLxl-4EsGLt%oBWRX2j#>Hf}bt?ZThmd=j^tVXkBh)cSaI z=Me`axvq&c?UV5`D8D21&paBK9keDNnK1j^Wjww1e3x9u7G{vMtL(nnXUu5bJe=i- zDJ1D@WVk1`maff`(O7%*+3O`Mlsu-4uRm{0d6%}(W8iXCYHW~j^pN=y3g@bMb2H~Q zGO7rBo7C-m>JL20tUrLpP7MDfb3FiS+IrS8YyGb)uZ@2e612#jadiHR2uNXH&$+pl zX1$VAXqWw+4tW-F1+!jX9;4h`L&~lP z#8N}ZSP%%ryba#e4EH&b$ztS!A#_^p%a)n_(k+L44yeq!Qd~ywdW)7hrJ5?E80m@E zlN8hKgv2%H4J!$8EWay$;pbTh&b>AIx#l4*zrqxPKsr-mci>5Pz{O7n3c$_M0UA&p z&6465za1iT{Bfc$IylY#GR|xJ9LwEVdt3H1+m%Q1PYwBMCWeXsiAwp=)^H1>1Ky$ZbdWWY~9tH(d?7ij8054QW%WwXL89RDf8x? zOnw0v_iE}io`!OfNgDH7ZCU|gB=(!$-3$v(gRXzUp~H%ps;hV=R4@?U1(j=CQVzX;g9y!d=vu*?Hsm~)`)j6&9+xHu43V&kVib`mQCw22uV1AC%f-` ztX4np?*EbUfC3yLO(4uoC}7-yN@LlOG=MGt6NfS1g&x!DJ^nJ=7)aTVA*G>)z1n|Lg?bqf6*F^%((a$@ zo;^ePE<3T7Y?3U*`8`)D?1j7SBNxm#L!+tnUmumfOL_M5B zm|SS3lq$S@DahLZf}{0WDe@86>^0ed*0ulZI;jf1(e_|(hvPM)^-AvBYdI!207aGI zGBG9%4V0ZTe63$98;eL%i$8HnAlR6!lnX+9ZJ{#J;MrRFfts~F0B}@9fCDoMh~KZ3 zgHQ$(wVq=yI1rK(!YQKsSO4uV@4Q>Pv-0hx&@(_bV{2;8KPmU~%aMJ&ox<(eLwpuu z`jUogqp}g)dQfzM3SDoWP2VH0gaU%YQ2Wm^$h073xYzGIuFUwb+EtYsTrh0>zH^P1 z9-f|v!5*3!ARUbWoE+%Jka=QA0rdToBZ!R%5Dv5Rn){FCog8#lYzMWujQUQP-MIJc zJYgf4{|uc6EA8@zXhbpCNG95C`rtP+J@*w}16G~vi&CqvphO14O(*Xelq%SE`NMwX zFC2&n!NHB51GZpfcoXzw{Yvk`)FNai0T9qcz%^V4V1gPEk0J2kVrr16dXL~`43p016BZWU8 zC_%N0zacW&FF6eP5`_Qg+%lBYvZ<51S_zIT^==?7u!$**ivmI?2MU-kvYA%&vp%y#*xIjASVs7!wRGQBgKB7HMy_=||9Ah%HcnQ7b{KzJH37YEIt$0%x6xvPb1wpAqpx02qpW3_*~kJgA6t?RLRpXM7hLD7`>ELT17vVt+P7 zLqHzg+XXPI1PTiR{5PNjxn82YznnD#zBV>VtfV$J?Th5sdi98#qe@Wjs#yXnllj5U zD)fHGFRpm`rhvBt7YLUKNyIz@XuT1eg0Z2YYafnlT4YptFpJWHfoWeJ+YHnb9N={j zwqF-CEZ9(pQwtdei9|Qpp10<5FaqLhoaO#ShjAS~%tm-v^!>XPxv8)o3qtVLNP+KG z+Z)&fqkB5XW879Ugsnp;Z|wb&gOCSWd?1*BwKy;2USJ??&jl4YO(`VV18~qB!h?WW ztr=Zi>Zoe#`DLIQivJPsXePmx{(FEMx3;w8)tE()wpKwV0@ly-uS-#9g?R5j{R^8O zXH#1{AEPi)U>X(8CvtNtQw83$EQe_P;&#pi-YDWm{|jdO=Xr>_~QCc87R)YclDrFkcmVh(Da_&Rs=DvG`Q`)5}_Ftee~UQt$uB3q*IrTyn; z6Jd#(>x;W&-(MhNKJ)E;rcvZ8Dhbz5hm%0&0$gUPtqf4wTD=H7qUm)uO^d+-!U?Wx(1bv5;hM|g1;G;%G zUo$FMcgEY#eWZ-A`P(DY9BLIHrBjo#yyB81c*k>weFhT9#vmNE%qy@}wSO$u!N(c} zow70sI_2JXQ)Ev70L}w|TtN%SvAr64->m>?LRp0pl%Tcm+tsFI z{XTeQ;LPPgj-LKPGd*bREg7T$Z_=c}mqbaszsH4@CrWZ+W>`BCI4s+*Vis&C=e)-# zt@ZxOf=2STo6V%M&HIBhMD`q89ihJE+(K!?8ve%qgB1QpkqiXL{3l_A2q2LRU<6PE z8&aQyN~<22{1T0skgrdXqqOGw^80sar&^JMSQLRhg;a?l3&2OB?VgAX2)ngd3D2 zHnAOmwi~!ZL5cW1I+AYa0l6lc4e8|qE@&S%VEKX{#D3t_L|PI67fUqRuYO-VS?O&6 zF&cDlz;JxlUepd@14BLkk2F)|m8{FxYiA3Zn>v@iJk^nW^GRW*g+-+cpm@ZN1jNiW zMVP$zDY|2T7E1X3NFxV2Nih#W>Ie8!#ZkY#I?s*ynLkb(1reyvb|gUgPN4DYNIkEJ z0~aLW(dUH}p_kHCvY+66^jZW($7IL&h-ysM*_~bF^<6aX4l%+V^qb4gm)`_n2Yk+GeTmT7O4j3h8wGk=A{J@f* z_u}w|(shk{8DmgQAR{6ho-m**0BfQ-RE+c2{ZMwt4p>Z|K|;s}IHn7Lp(`tC+h1T8 zQBBEHg4gwDyWT&G0tKDPtr|&wGttXYzN@c+hzvei&;$464<(`BACXez+U+-+ya!lG z2{&afZ3VzsBiputAgUG8DFslO*o5Ac%~d^Pi`0YAti?W!wo??%G=I z1WGKp4-#}oB{1y|ks+Qr6lolqwa7GkKuaPxbx>rlgGmB0L;?qiz{hI}UhaN`h>Dau zVDAOOJNT#a*Urg(+`s-a136lV>hGUan@Mhs55n7+`KuG1OY^f~+4r)>rU-Gkm>3@} z=Dz`x@5J|VqOHTCpi{=_!xuWNOZ znHbh0BR1gYD_{Wz$KF-$g^upqBEt!Y;nk1LSl*#1uIuEMpW*5xf8$Ct_QIfDwbgf zulXL)tcBb@Ib>dAKE{l2DNOzNFz~ zg}?=Y&jXRqBXz10&I|-_4_=#TfL+4q+y2YF`}vcC(ITOgZRXtpK=&vIuD5bqtr0o3 zly&LOUG2rMPe0DG$PCHTY#NpTi=TvRWeMNKZ(mXlxxd)Syiwh>o>^$htCRP@vBV1H zFckm7*l*9ob4ltT1mj^>xCmm8O14fbE{MRA`lLrjlCH&t@dT#Cy z;6b=|;N>kb_1&@z+8&06`~Q^nD5$&<;%yUb>{CPtFcuk5@*WgMQe=30@}GXUJF9Uk zC?B(yx&7;FXTP|`bELGleH+3Q(byr7Am7r19Y`1;>YT}z1_(tF=y?}P7NNtBplhQ* zN)(F$U%yJ})QSJ2$Y&hLm53`9!3ZKCOMvAAaHaO|sa-%RcYz{@rD}iXslOfL4PEls z@omhPrzfD~somg9JesUvao~WJV@Kt$EfS-)M0^(pm>av^LcY$qx)A*Tw;W={g0+Rk zarbY~TF(tvtxC;F+*Z4Sz5TDY=aX098>m_rLIj?eiUzC2D6E7Su+YIp0YeqKPV`f| zph|)?_t5@O7=$rNy}ADeQt&ku1ARke;OmnkfUSVKyJoT^ak$FDkdJ=@tfBw!EKHo7 z)|}k64oc<&hmgk6Y)?2He>9i&HTobnU}B7(gy1XeFXTHOKXJm(8vKB8o)0r63>`0-#CJUMA{7=Yug8&GKXnOXj2@_|yT3K@Qh$oBwL*r%l? zg@bCJ4ZQn^lMVj!h*CZvBmkh2>)+T0MK(t6y?+Kh4Y+d~c632p2#3r}Nhb&&GS+6s zWklT_x7o1@vi`!}9>YUG^oB@l_niFp-JnQp)zuMi0E%}Q(?9|@>cinfo~b)qEg_*7 zbZWVO|Cu8s5OCZe+;#d@k9YnDuu!DAfv`z!VcZ)0Wfn5o4^9I-7zGg0G$c|Ixfc81 zDfrm&h{;Z|tsX*BW(4}1ER4tl=|518yw?705*E0 zffsuO#zblw7Tp3qN68`#+5pcyW^VDM(2{S+bd}6fa+k=)s>H1rko+LcmT)V?p&f16 zRHFu__XzLn&~=7bbk_NsKc$3qr)cAVs3Q-rkrl(1;7?xV)`_dGYs*E{jN>=1NTjyM zbzeZQ;~{=}&By_SdF|ZjQr8?x?x{|(v?-GLWO@&>y8qNupvEyC2(|w>7v~{SPA5}n zb--Ln5bwcNSzvZ+*A{DtJvo`AO^9D$2L*-IPqaW9H@IvLf$>)J|IdjS`}=}@wS;vC zmd^ysARI==FWTV^71$aHN${%A$2e0`hVO2xNqF^@t|}rrBk0KI8zp>q%+;>itb7(2 zr#ubc?x!pQs%QpEI&_z)PM(YcmgXOr(npuMXs~ur-tZ%T{-G=76+d)Vjg5dt+}y6# zJw4O7kJFi`#ov#%E$yv46>Hq$UKqmZyZgJ_x5g6HUjP!3cu_O?G!FACa6hp?^g?bd zMP(>h)H0RBt=g{{=##1;yM}HINbX;3LwOAxCmYDoh#u`!V-V$WPC0uD2wF0`9vnZ0 z%akM@5$)Te5M?ml#E8peFpot{l|k~WF%8y7FsX|z;UjdczNP)ga&i4MGas*+kT=c%0su=cgD!bOSRq<~%n=+jtBzsAx0F9Xf^+b#mkR zR1+2m%MfW>=4gU^WHVVN4JGyPnE#8VQt;j9 zu7WfLBIZW9*`Cj?m}uTP2)ZtYRI}Yj_pooZuz-X2N-rT?4$+^xg)q==s+Wa$!1SS| z=>K{B{=#!VP&R|Y-Mxrvdwmv>xL(=5b;c`lvAw2g5qsj4XyXS$+~uub!{U0SbzV3q zF&Wl=&0~==1pRtz{Q9n3uLKs+!ooxQ9P)=0mBASvPNbHJZGP9At5P|T_VjZ*xYpe; zHtmhB*3Im0vYv~H2A+=wj>drW*JmScZD$i^npk~+xl6eAIF{Otua6you(z`437;DF zQLuN3d23XA3LN%86;5ab1Ie4iLsfs>nB(^s7XzLx$7+> zv4!J%W9vv~vJ_yuG8;=Y*@Y4vU>#Pwf&jQH&G!D$@{q0xC^UuT?Y|%YRbl=yoQK_$ zobCy=CMZAw3rw0Vp@>@(iUJ@6Nlr$Y@h4s+vsOL431a#@MkzzQ`upEMjVb_rrdB(3 z|Cb?bx-IALr!XFV8W`n_`qUFD7Zk6deCS}tQoYdS81NZ^D?_`k z+M-0tV@Kh;<7hM-SLF^uGQURs8r)NSwlj8jRL-?NEP>C+q5IDb5TvB!K8uxWVn(^{ zw}L@Ml98zbNmK@}5tQ?B>GB=Z+3E61Og9w8(N|jAWL=Q%$jqmKk}Ka{)h`Tlk}5Aj zl^}}Ou9=TdkHEG7mx$>8?b)jZ;Uv-&|FgG+S=dsum=w31$yl7!!g@A`2!>KXFGd?`=#Tz)fGkkQ?y2|S7@2}fS%q5 zD-IIgo~+QGx*D6cr^kVf&jV>6KyTrEFEA4urvY6hC{Pa~>K|%pfd~hA-p|O=N$HA4 z4xZ~L|8wW)+{IR!=xV+!4vX8&XoA;;)7`Oz@0xo2_2rRqutznAL`h0fN|}K0IS~FT6qf%W3S-_T+y!m`PP)LCV>eVERE2)B9cSQuXW#WvmPe;WK0p3~5O-Ihcv)T9OGKxZRpEvn)Rc1sS`SgUs+2^>n#vdc4$t8K;Ek2*eS zcbUjIPM@(kB1GTx?Y-6ZxA*RRfg^QEhFr+1J&F*C{y*~H?thKg{z+fc{Q(kVLk;E| z;F?s|G`4g~NBrJd=vPU$cf>{tq|XeuC!4fln@(tLZ;o0GJwBva;RuGaE8p0@x|&B@ z2IlXK?zIj~zRyK;2+suvA~AxYpFQX;bEFZ_9Od^|aU2`Znq2uVUp z;5m3TOUBqN--Eh9_q(YXPNY+fK4JEydLIk&%jmZms>Vp7=&8-_m( zR5alWfyQd`>eLHfPIU4rO!t9KmO4}^&OVUdOaunt=L$jV=t+Xe5TkJA*?;cN4Q;A= zY(teOD`l{AK-|6HytG)k@6O^dN2j7WD{DKo-^XJ!U6-H+2>d-zUVXNLxmD1DS-IKd zDChPJI+GL^b^f>p-vvlbKP*9+kKeVK*GE_kX3{xoEJy(5vHUFMvRjtb+VhMedGYm) zdaxQ6Tti zdI)rjZ|8#|1`4Xj&Edtm*OXC++%C}h?cX$;zNe>NdqLN#sstKZJHBAFBiTCY6*fz? z+rx*3*ut#Y;S0T3JQ%#kj^?L3nc4jI0IeJ+PrLEOr8U&B zK|Ob3=OZ)2CdjNV#DQF-pkrFXaX2iRH^QnttgkR;c)0yJt|LC~^jb;h{M3l|*S~IO z-hai;{>s4HoeBKXhspa}r;6B=`zR{lc)|ii6Nio}9-(4WbAX@r4%nFg9idBZ*jSXT z&w<9@uN$N^ZcDqX+qQR7Bk#Bl%Ql~McE8YcgytwM=B~x>)~lL{=P_N*t``EG&6;1Q zFn(KnemvoLc`7UIk+zZIa@TaF!QZ>rRU%4l68LPe8|&VZi{pXd{b#G<;M`2=6pxCE z+I2)?ObV7|olB$dpDxZdd&J9^DA6MtbC`TL;)T;Q4F~_ZmZ0=j#`!W~K7#!=QORmS zV$%vOz?2>w{mNiI*X{bLFgT#um5_-iZ_-t|9?j(^x9}iM=0SKcr~k3)OwNazQEqCg z?DF#Rt|L+n?Ygw(sn{gT`D675$sM=77$NEujg7!1IAVd27k2CQ3TAL00TU}{aQJ8ZErUPU>BcD`6^Ste^F#=@;OPn z^~jUAu$K6!O0J{#{PU0t;pGl!zJ}jpwYB%PxY$2Y+7atkB~ zQd?_;M}}X50z7)4s$|g()cJR?&95O?d>EKgFxQ*gF@`TDwv)CR_TtQ47+Vvn-QCT+ zJ{TwQ_Vm0kw9Ca~UC^YbD*Y2(m!`^>mx-x2tT1W%s`UG#SFIw<@5n3NZ&8dfxpMM6E9hz1*->rLD8DTw zk};epF%{Br64%6MXKyk#8v>tkSw&T(-N=vFyG2FYf=AL*A|qp;e(1hd3JHL)G4?|v?4HWc#ci-& z+2a5B_vA$Q?c@HwUH;=3E=|L8&9w1%ye&2=zdrT7cE_zmRaI0cag$_yknklPM@ZV~ z9~tx(J(6x0Kl|eTJtFxqdeF8a;kz9`F;^cr4zhuj?vd05)W(wU)UdQ@U*s_+2`{2-LU7yD?NcHseZP-C6N<5bDA@flG-PpP$7`B_FIW+u5o0d*f94?mq{i=8< zN$Y%pzWK{Vc!a)NO~&-JmIpf`)S`X&yw18ZyC+uF-{dpbv?-jQU6<%LVGQiSvUx#rlSQe8}oo0Ylp1SlH(5phM4f^{TqJ zcWpz$p;BYCZxQu7>t9RcBD8$NC3mju)h8gdD7QBpL3Wx2#CGC)KZx9ul3Jkn&0#x_et;XFI{%w!#5D zs%8)u8EzhZNv{2Ld48G~eZo+6j$(K9MEQ ziC0O=riGt6o%-*d`|hZr5dG|Poxu^h74ZvFh82e07*0-S$D#Bf;fJXPs#3j=9(|J) zdVTLf_V)HBG~>;7SxG(>uHOSDt7SbqjD-2E(Y`0UduZy#liqh9cTZ zwxDL_%G82}#+CLH8C)vJO?H67D*Q<{filwEaOe$FzCjWysywG}xT3S}RySs#roYPJ z_A?M<3H2ljG@iFTb?#zQv*@me?!+f9beM&_8#eMy&nF(-%!3X(D;wsD+4lLXQxYYf zOC2Ygq?XD08|Br-Ih@{TiKqG1v+FSB6)N<7`9eC0C#%Lo0$Gp_t85KCXbRaw2Pb|= zcA0;n97>WspIIip@L=})bndLO6FNuMqC#n6LQ^AL+t|LZply+KrB>3j=foxMxXqbl zulCcW1zP;z|2fHE@udC5**RUiz-Ogzi};Ryrg$A4nR8$@3$-d1J7nbXwwmSXAvTmA zf#qU{6M50(&>*?|Cs*ovd2M;l_~{zAK+If*kz+W_N|ac>J+;dQir)S&|=c*p> zBwF@5-91jN+kgL>J&6m`Fd@j~VV`5qkxX18ZLgLP6AlsAFS;p<4&s8LdU_te0RTLh zkhVTIH&Rio2_5N@6X#izFwxt-A>x_$qa=~~XEr?&)SWyQ8L=uG)O%CH{32~BxtjOS z*Tps#r5R`A9_i>r<-D?gElXUlCKAak-IusppM`$;Vzp4s#?fC#=yogUr`;(wdo*;4 z^CN5Bwv9Du)m4V%MLMMt_u$Hp(o<@YnA@GrjC~Bor?9bkJQJ<>*ziNR(#XD%y2vsm z&4N4=H&n;WabEH36%Im;EijjU4bJSj-JeI%|3E1S&(lpzynm>|RaIGjc4ruk$UO>7 ztZKiP5U>T+t&*lOeA+HZO7#F>j1-UA!w6)ueyU`ztc`ragXV~naB)BDPW94z=joWW zogz{m%1IRo8ywT(J8GN7W8=DBeb>&Lqf+v&c6r=cL9QE0t&+3la!xDw#0xf4su3!n zAFA?&^l?&o>BHwP^$6}qR17r$^4@fk49oK|aV@3kYu!tqulg~{cl6r>Qo6FrBl0br z)Pi7!y{+e*bT<8ml~WHMM|1HwVKe$DrR=nIP)CmN4#{f;CgkI>f2QiUQnyevM($Q(g%Qt{V=Ahma_t+73%}o)mA6Mu(9y&?zj6@elrd{R zUbM2#lz}xWE0I+tjupl#jRZDxiPvDzW))(N5%pG8h3vc6kZ4;H%6q5vCEDZq-aW!Q z42ICD-s&S>WS>fliCq2Jwm#}dpe^3@aV!}Ow4zGQ5Kt#)bc$S_wwWDT`z04(`0Js4 z-4K7QR`yy$zn_;pjlz-q$31i@E;DPCio{3pxM$ofF_CtTP8r=^t5M~f?_)}5S?0>_ za?A*+M6%_lG$`*(HWZv5u&Z}xXvmj3n5XRoP=&_-{RdNmyvx8LpVMScATS2#%}EmdyD1cKJq>7{1pH;mNHp-4|Q}6QNzfBw=wX1QpRf4#8GWMIj}h zou1ytyv?mn5U2r77&xJsf<3)+g$Z}CV`Iu*XU`}nw6(UcVlY3RL&+E+H;`lOHy0P@ zHk!LxuIQQ0)DAoTdut7;(>VO!5zPsue-X4Cy4^}2o+*wF)C{$Yro->5qaRcCz9Sih z(o<@NQs3T(ER-V79rX|;I;AuRS^O)t_G!+Vch;wzLMwaVA{_bHc z8|_2@CyLa|vE)y*$Q}5=)A3`anLb}=o8Qn~)@8mSf5an3wP7(wlGoU!`tGntF`wgA zW3D)rN+1*-aX_ zR#ySYVMK0`K{PUG0#ts(>Ua89vr`t{G*q7A^qTMNovYUzJndJGFL+7CDb_FbagD~F zj;?x?|I+)z@t;3i6|@c;Zw@Ib;Zl4B+crlN_*wfkN51NBo-W-O6rpHuj|8^P;Ut;e z2V9~puOl35H6+GQAUUDr@XoT|?=N{$I(i#3my_J$tGrh`2-H6zC1Uneh!<*6u4bUq z-Sl_edAI5Ib#TM<^T4@&8D&WudM3mx_`n2W)`Np8k8sy1#-clH@sQmrh|Jr&0T|iUEG0)nQ0(QGn?(uouS5SgJ4uiiBcz(te ziZiEwR!i@dFsHD+d!(SVvQ|KVA>Y`NYciS>YaB@L@cF1Z_u{CCy!b;|&s7zz<>^m8 z4Bi9AjywxdCUwkLaVLM`7Y+#0E31eJdH|{7 z+Jl*=e!Cd~g(rH~y4~@4@@76OFV;TTC9RKS5x1ri6bapG?$SL(98pCq?qHQ&Vbty~ zd7-2?76lQrEbAq8Z@J`;uVM4z**>?VWsw{eZK;`sk9NO*O6<$eln{_HtLyjWMlt*f zO~Cp6CS-IKu(B$L%^VY!4-vNOpUczLa+~|6aymBBVz9)gru2T$HhHxZV2EN7ZzZ5H zn{*?gjatTj(QoXK@% zH&_4bgH+GT$}?51%v{^~UGq6No5#uh&!3?f2D&I6`SG6d-nUwVQ=R(ieI%Toi>JjW z@sdQ$$a5)49JTq4XW7^72gm2j8FmBs%SHq_^3?m9SeY+r#+tRs@1^?UA;cI6z%oN}v5J}uQ)aBH==!WK zUrvd%M}uO6)Md49f^=RuN7HACi@#)6>%Nl>;%c^@6U)?>9GEW~WmXa9I$Mhcg`Um5 ze~O`(&o=O~`Nu`OrSlCdb&a-_sdK=Z-=6iYjslYeY2UXMH~$G`wpymSI=EmJ(*9_* z{k@s)q;eCx7K%KX0MX6Jk>tmwCZEF!;n7hrfYR(@YGpFTsPEDh9KTuW-nL44VEg$? z0YMdoEH}ig^`%%U?k{v3RPT*C$GdAi_QjZ-AstPQ5MInI}IqtIg0y zvlGv@qg&!S_PfOrb5d5byUw%VF;A-R^tW;80sQ5y@^ADM;^J2nIzNS({&sr8HnYo7 zW4rDsBr*1DIrHjOuGZD@37!t^g3EauW6KP)b}GIM?Ce$!6=LMdn=RC%lx{UJ5Il~a zh#9Er#*=FDxp<@cEMq$pIT{$!M8c&kO5*Nc=4$nx*VB&0amFYq=1F+=a3!d4wTsCO z?(VKftaY-m_Tqxi-$cKo&>Gh9f^l&pF)C2}0uk`eR_7Jh$ad{#HE!Zrtga@a4lX;M zf`v6{2l!8pdBwaA2P>t%IHnv!3cvQzo znb0%ZO$t!4@o{iC6vg$<@j1w-CMn%;Nonn_kuVXO=a#BybQI$_Yj-rks63ViS4_00 zXK~`94Lm7?mI1-bWZdA}o7H>1Ggu{Mk0RTswI30H?Tszk;j5IDn24-iRTA$J%1h0n zC>A(Mm0qeo!=O<~9ADVpRGRBoN|_||>g80}q1_@+u?Q@XyMNMF?_{RWGB`zGJnpHm zznQBILbZP3kt%2rQL5@Sx;`o8dQ!fzPe)IzfZK%-Z5@@z$FW}T=^8{=|1eFnkQsN} zceJxg2#@8Ch2@Z0%l&4+W z1+Ij;=V@tXy!{}FB4F;=V4OAqPwizC}om_ zNF8fi|E%~fUb8##y+!%?reZgRnU1j;Uv~D9A1x?s2DOmabmOdIP3*bV;-H{}zoi-* z=Nc|B8f})hE3cNB$L@v9i=WPs>V5Bi>xo54Rp{55(`Uz-pWKzdu_yUSG@uP>@Bft3 zdSB&UNNL2r8ojy5dy;_-m#NsWTeej_+bvK2K5?>oukK6r=5HtAiR7ZPQ7l7&&l1T? z$hC7t0RHI-l`3oH4j)@>ow)mALRy1mN{xDcg+~nYcf~JhW-7%xl+6apYq{=HXA+lo z_LzG}EXjEdiB*QyW|p@u&HbdAS!Nd*ecXUu3t=Of)G0jH_-<9wEA=Vf<1JyxU4KvP z=X0J3dD?hCeJ*UDU9GL>Ji0gd@;v$^k90n*d96V5kG{$YESTQp@@wnUWXmwF_*+NCx|j@maF0zd@}>5n?e` zl=tZQQu)mAZ-xb5Bb%JKi`*i;C&r^voC1|G#)Qs7aXH%<3$0i#8oq=N7A3bo(Z%SO zXn(xYN;lNR^JUrU1u85boyhg(p5@#H4kPVX_DT0FgI`7SvfZK;4Yn8UHQ?f!HSOr#SbrYN6wKrte>0_-*#laSfeiR(;;9qqmCN019`lumaw}J1_e>b*9mM- z4XVa{t?&)&)Zn^{52O?wG!9S$SD)iQRNS<-w>*x( zBqLO@q7AWWNBGiIOp_l;ow{(1B*oo!qHg9`1RJNs7!8WZV#b(t2Z7O+u=25eOOxH(>%xD3nV?NRh*JUeq{#hF(=$J_d@h1K=_J~tKnctSBDddyo; z$|UHBY0d$qMfHu>!NUJot=~=^spWL5nMpi(B(&ogmt8cOoX8#Ac`k z_d9$Ad$CeO&uUDnW2rB)xa^p#W^ESZVCa?~q3;VCAM}q?$9%pxZFD}4ck;7^K4=A? zX!h=2MMcZ_&iXlCN2}Jm~BuYKbPR+6Lle?aJnEI zpI54oX0EBPf9OEEiD-hh&crrV^0iw`Iib^kli)(&RD=LrD zFw1=K&7wqVTKEkS?|*WzT4RyI_$<~?#(N(+-al1&Irx0dwg2H2C8J2ql4xUs^T0yp z8P@?p=#~#O?3s%Hy%QDX7T|l~`W*v}9M7e0g5&pK4!U3(Mms%Y?W!@JfS0+~^D40y znqn4V1G5M>5#Z|c#q2KX7o@P_odA#nsV*09G9d0+dQ)oo5(S> zBA7U72%D_2|?$r^zb{O(2e>_r268kWpO3pA>4T&tN z(M)3xAdfkv%rtNEccs(&WJWAXs9?o|65=@XO&tQwemg!@u`It9ppU0v{`*bXY3n9! z{n&vKXM1rzvHEguo{t6UO1KX^QB*p(5864`254+6#QQg>rmFGA<~p=g`8<01I#pZl zmT}1MTMm{~B~hI1x_EV?oDW=zw1yh^YeO0(d@}g1mGsHckQNsMr{0SuxR|vJD}2lb zUcHD*^A*+QYW&qVxm1_W=+H!`eh52EYk+skeW|OV!$GU-s;P138C4srwxiuIHjf9{ zoEtA*I2YW%r~GiGTkSP7Q3M1of2G!WdDYL0HYK#7CIUmKo#wY!=xM!IBR;$gxjNpP zf|;pspUaPYq{!TBl48%2+PR5)(7X7UtKhl1i)%6gwX15q`KfEhd5%`ER1e@0T)e&g zURaFx7eyah48FO=Nnq!ia8E{5h|J#$ZY0c)QzX5WAkQJRvPF5w3sO41>zYo$vzeAB zrh|sVx$mdC>S`h{RO?B(G-wrXWNVsx-t3=Vs#eLGMH{dgG@im+TO^-K5?2Z8tiNS6 zI`9S^V^-)+@Y6iRFLJ|w5Frl$w#!uRUS6#3RBQSP^!jw)(3dum(HgV!3Ff{~NIFWpjniF&oUB&L+?Rpyek{p^{7 z&$dLv<)hT1g*vXKJ-IP%k~>9YDWA|&%H-G89J+C$Al7hmgG1Kh*^9Gf^Q4C+zdi(b z&drLsz9(I_d-ADqbU|}j&5K4;7f-~TXDD4(Sy0|8lc1hS4=YZ8`SO&;)K#zX_ZB)j z9;Byv^*zHwO+}&2T=^!4D#?L1>+-nKAeMNVou}08p~Z$qR+(FO6z<+_^ZVA* zQ(nDWH2ES+w%AR5)<)B+e=|q?tWm|J9n$1H39@WR9eT|JZ-<<+WgM6NyDzt zD>n>=&LmI#@S(dg7X^6QH9OZ&lCnH0%edjqc#Az4z{0DpuTlTuWoRtsy?FrbBxmi< zw;w_-XnqQpG*~g=QeItAey%>cq;T)vF!zf$HCLx%nOo2IsJwj9H`{)0$9$V*?Lo>> z4DtEFoLb{P)vaQ;Rxw(7cJ@re403T@Iy%PyMJnAMrer zhL+|6T*rg^p`8{xDKY{FI2iaE7$R9d)OmUga8 z+zIT)QPI~*-0C`*GZh0qS&79Mjd@B+Oos6%upYT>prkSyor|&f1X+6VY{e&TgJTpN5kc=N{HI$QV~0 zy+!hM{rkL)o(o0>HE*k9oTx*pQL3p(keysNX|bC+iYlg$%2raSKQlj2Oi!1CPyOU$ zqR-={5ss%)vf$_HOM7{4A}4sh_^J`Kf=Wsp`$qCC0zPFrlkcwHXXz0~&AA%?<}-M6 zL+LxP*{rpQ)sN8Y;g+pBjf%%{M4!wrS&8>P(6_J!VjXuMmhtV59YJYk5?$+(i4pNR z!g1!5`A^SSGJWarR)tL|k>wP3R%NThHixj*PN|$%WZsx>W0cs&iMp+Yi#{18!y`*W z>!I;&Im&t~v`R5s*HFYIUk(%JfGkDDV5;Lcbol!`x%s3{ zrp=m{YXaxae=omnV5O>=j;+bJUAAm|o_$>miRKnnl}w=4MmmJp5hS zphnI;oood!98Vgh=Itjmq&qK_VB)EUs;hMsmrD-KW!MuhP?0He{Ez zjY&y4+UHT$;oF`@xR8}3Or%DMsoZ>cC6 znKBuNFj$(>x5N^FT+u*|;_PM-ZTA)n3dmVW>n!^g<*H-q8zo#p7Rq)zre*4=n9bt* zbvW+=uexW)+uwLRwxFt+_kv5uBbUpk@F^MVjRo4o_8e@+k1pf&-^*xl$txRd`0b{T zM`b`ozNO)3<=QRBj`}mX^m;xeyVmt7?6e=(u+671e|7tig=OC6ImJR%sngt$aja8O z*{Rgx*Vp|rN2`ic<7|1h1j6O$a@3fiJe(auZew%`sy(itqcfS|WSW1ibi=ZRJmh58 ztiG;qPRt4wzcf^ZCgHR=mI}~y(p;_Mm4yX5f_vMYx}+*AJoEJ#haGa@`(mZpo|;^) zvtAFDOnVJB<^|@4ji+3bv5#Ui*w7;q0$&o-7r)32mT{Ub<*9a|assH9&g3o(btwIl zo7^ra+9&RmI%oXk(ESbW-%l%WdS(c18=u{!SCnOU>w62CYfO*9te!BR>gOk9H3h@2 z&x;cmKIZGS==Hk*nGUBW9WK4oqkTk2TY-|&kWmE1F;Sm}p|qZRHocT|KTVNKuhO(f zxGpspx;!W>7j409Cai^+9Iw#_%aYspH+V<(dzG!i#xm2_9f+T${Q9O_ooHMhrtsi7 zEMB$?IlO-9^4GBfDEahbhb~`cyg)1|$Kz-Hd;5NpzXx?>H?LPsB?ktp4PWQlrlN{3 zr=g3-xRpOr(@{%Vd!mvyn3X)3KQfqZv7q77F(E>Za??H3Q@(qmHS$hNZT(Pf1pcug^~%A6mhiRlW8c3tl{67EgM)~a_?Q5@hTm!_Uc{A_1rjrByq#`KrqK!HY5Cm%4jBa3mG6nQv9yQ5EU3I)7`tG~9;ZB%*++pOlMzFS0%>u*$h7V)Btgd|b>J{f|LSEK zY9Z^mpi?Ekc3O5!DJh+GaUP8m78-=Q^F$ZUso#t^@~y)xMVT9}aq{+srFR)5-;``i z-oxH~mF>q86=mfsqV{;&Iz{}rea2xFG5$5-+T?r*4n$~`Hf4SEd~0x06WB7Kg@>Zz z7yhoTU!`QJkx!K%(b+q}j?s2t9^!83Tc$OC(jB2H_+rS!9kof~n3%Fl?X!7I?3M9L zk2Ldf+5K=F$LOejJCu2_DkDqU@57GNjl2Spm`=$u?I{xmJau%`?84riDhItZ1qbbT zQ7)CInY6R+=-~Ec^9t^%WPaQx+gwkPqS3?jN8ty_^b~|DCdM}1-8BxI<`d|u`ip~h zbit@>NR=m=RVD^Gy8D%!l&YqTxjRq-=Bj|m_i$m-s_B!+dvoivXaZJhIC*vD;DmQA z$_-`birr21n?10Ta(DWMfrTE?NmYyZdyWOoi7Ot)RK-B!b>jwj~f)&6g9mFUm@ z#Uf7pCb3keqd2%%$7WNBEtQ>_)hd(2S`_KoIIK9i4$5>V{65puHc`eolVVaQT|Uy^ zT;|VbZxZg}h$5mxX+OKYLA#fUu3B^&3&uS=#PIjm_l#4=fW_R_^J+~TOS0M^A) zO5iSj$AmN2#X_|G_1AP?tnYJ$-n7}1`Pi-B553bQu2d2T8;-w-|4b}hvP_V8mMc>h zv)NE5xzAqkg}Hwh-&Q7e&1uD^v)##2`)C7R3Rcogk+wb@X3*oAzp z^|&+GbR&lpM=dC;MzJwiYhx)TQ7#1kc#~4G-Cfm(ZGBW;b~$tpZkszXoqT7XKQ$=UTcUfl z*J(=zH?b$Y2her$`lq{__veW4)eBauEkr7l3QZaN?Hx+7n2?N`7&g1cGgI9PF+miL zyWS-|_K_oNN9A|2B@nlie=4%*Qh!LUVO^Mg4X`;u9(q_hR9tZTLZ7(aC-P?1u5EJX zFZ_p(+BSe7+b$Jz>t4yy`wW)@VHTy0H1Y! zgoq(gMA3IgV+O|WZQ<1v#6;fO+J2%@HZ}{c{m>O()^I$=o5*33Y`-+dYna+QYced? zns@3^!cPlQaA^D2Zui=2w>;k)k2`F9cj)TIZVOk|4N;qja>GTMR4TOQ`7-6{8szIv zf!);idbnU}oJ``eGrl{j>I>s@9wj_jgP!UE!{C%$yQ@)A6N!5QIM3%Be~}}HKH&y5 z2$(}DGAh0fwHKH%eMh!kN5W>!hZ=_R67Qfq7ehLVU(Z{JGA3VLpTBs(_~J=UMg>Y4 zxK|XZ8c4UkbvzyyfS2hpPQ8VzFhP5_E+m@zvhZqG2mY{b>+3TQZ96|qqmwe1WLBrZ zD_yhT`{cR-R@e_!JXxb#D13jjLqcQZ!_?r{`;*2G(_I2%(-egoz&&~AXyl_7O`8m; z1=%=6xj*jBJHEj=brgM2(hD76+H1*B^uO9W�)E}VN|6MGBC8ZR?eik;+1)cU zXZP$myI-b0D3_vc-TQ`ry6^jYyx2~1CsammH2lp=X8x=8r7LOgo(BDz%a-g4-OGv) zcyi~r_?g1Kr_CMb`~y#WwWq``AB;R)JfPpT*ri14$dRfW)p4TKWFdz6MCJy&*grqz zd9W{-u-AsOb{0I`G(VXwcK`i%N#lbt!;Z)LOk)=6hGld2>iedyYrEFY7P6eLe8Ns| za3FMgQq)0{PVd7y$F#w*>bB1A@e>Z&;WoBal5AD~iOygz7pz7*I2h5|HJI>%9T@gB z);vu1YIdDR-QL5sZ_{@#IQdvU?ibRY>!PtLQSj%?j_2JE9fn$5vU8$zl}6kc;swYu;U4k)5WTDW^$hOk*ebWLdMGa?==9vpSyy>#^g*O~Ud7ZWHue2tZS29R zGD3Y`ym+Ucu!cq&V}sk&Mm-)3K;5k6dXh;rRX~#+;avoeNKfYm|6-J)S46Q22IY{5HSj z?Va+dO~s1M-vz(?HTl3pY&62uvwiTU&x2F^qd;_eK)}3`($bzku5P$=`SPHrTkHIE z0c%$6PtH~^!fr_eYk%Unykq#TC=JbVhp7Aoa`aoR@zY$#;G3NeE1orU8dg{Lth$q@ zc27^aufW%UHaxI*_vzvD%E3zwCTLPOBAbiti^6<3jk^p<`pTxzjfXtdSlcFT(+jj zDO6dYFTE^!ba^+$TtUF0NKvh&?ny;v(qld6GOrh`kBTY=lvs zNoGjsn%!HLB;^)7I3Ll>t*=T+SnsN+>*``Of8V18{?3o;SxW@PCe=v8C_WprbzeXp zK(*;5&KJBwyNWO97&NT9pwP2%q(pedx&bws(7loP8x7r=Te?n6JZ#v%i(izb`TMu? z&L*)1iuO>+h5p!v7;hf-KWcv3crK^vUMF{Q4MR~_O0qI&p-q@m?r17m#z7ff)_J?~ z`*UL)9L==tWNbL&Psv)S?nvIFP|PKtwUG79b1%Qpue(giFP7EJZ7P_3cY(s(BKO&& zU9zw6!G_F`kmd1laXc)xtlhm-?fpOp?8u!pnc9Y?p%dmIsitH*9eF(lUi@QB@$?Wo z{MVxa#kU1UMTLZy9TfMqB!j#_hV5=`eP2`8z_ohQ>}69ZvRGs~`Um8ZQzd*S58-J> zEpR`RO=}+cYMc4_ZF3BDZ}u8$dF?tUPPXiCf@+oP0prqzUP<(}ZQHJ2yB3=ZsIREy zNq?>)^jGTuHB4-5v;=BLHyCP>&@37lvUvr34f#Y}nnnG~v_%1n$z}bV{)G>y$pm%b zyQI84v)FxsRS+&uxxA$y@pT{Y1+{?G^P^@awLo=xDDL(C`v@;Q-+pl7GK0@sucNE4 zf1}Jc#jPb+eO$Yzo?QI|T#y(xb z_?Q{5FP79XF)<;c1TOg>RyBEbMg;b|fkwpmR3f@kOf32C-JeX1jd_s9tyddqG#X&N zeEHJY)YNov0CR1qt6B>@mP3}7mV;62NbBPjy<28>MfOe%hxGHK90AQ}X*s#W`}SP` z^#qLZAt$fE4}KezO1wU3GM3GyrQ5NrOq*WH zW&k+A%(jAD&rcgRu(+$gzU$|4Q5UBK*F+jS?ZMS>xUYJPfhvMzq85t6#oI#^1@(1b>txl;BhxUVN99qyBz?zPh zw{PDzq;%VwOy;;(k&3^*%Zj)^<32wKXmF;286)g4d#?Zs##>(m7DI` z<=@uc&YC{+@7MAeON!T4DYbaV{#8V(Zj$->j!Hpdvek$ZMVUtYLySg}?!HrHeEq)}0{t`ThG1d+(*B7s>5Qu=o`Hq=590m^TSdH$Ng(gjR&wN zb{XX)$SGHff_=$9|7Z`@euA}7kIK`Y(){*LQN^l0-p?%mfpZ-(a#>SFJYoO87F7&S zI4os7cF9e-fB#@1W5`fjaJ$4^*4-*x)cW=7yPG(Mh=U_s>x{?luaWBGq;kmdPoOmkVd8q?!;OR@CGCl79@dhqb3gVsx_xv>f( z22^|Bd0pAKhsX6((bC*SRTBfQ{(SQaQS$+H{`q^tarFU36^xM>IeDw-hpU_`gMz9= zd5g+Ker7LX#SHcDrrJkpdZ`5@$({U8h_@PCc+E$1yD5U!s4QPrY(0PSd=X_ef8nEO zISPONBmb`l(Xp{rBASn&&{q?%Ps*SGJT1OeV+0 z?IA=u$g4~>t#~B&d2mnxV6ymo_qy}igQ~9?sCNx)67b1ztltvh(cqvu{Kkf$unLD8 z9H(8Dw)<)>kBEcN>$^I=W=g-QbWQt*BVn zQbuX0fjLfl_w;ue6_q1fw{BI1;?q#2mc$t6h#&Q%8s+26eD}rZGH(|M427%GlOYkl z@p8#s5SeP<+5(=}jqZXTr#nPadmw@1Qe|WI%c=;psu$8x_7PnV(t`uDZk86D7NRm2a>F(ER_Bj4o zL|JexbU3h(IkH(rCBLKP#fujWnRW&i@@wr)ybUP(9ETT9`}z%Esu1xjUnwf;2%$7{ zSnN`67%sa@WTXsabTY;=+v1bb+v`N&Obsygw|6Q< zOXDuJtI(G8q1wYef0c^-IOaAlJT`57-ZCfiSXWA)088Lk^AtI>X_li0#eR24uPX#w zxOAu-=3?g?r=tbKX&6#iZ=r@cZL9Vi1HP|ZeZooDTxc{>Zv07vXS2fq%u^wBy@!E} zZfI|7>jk#ksmykMTDokJsLR7o`z9ROg>@me^VB<2iVm8a(=^83U+aS?)o+dpD?dt8 zedN3$(*cxfQJzA$P*#2iKQP;|zCkXpH6WbpJ2TP3rJ~mq9pqYNWXB>~nUp>qw~2W( z8k$V4cqvNp=6~qtSBQ!dgBoINY#e8xKDGxW!Vw*&0M#L|=p@Lhpl(|=6tOsD>R9q|^b)JBsPz1}&v-&f&>k3vOKl&80g9?dMR_#S!F$CSSp zP|zd@WJ^aXA`ToyhbQNR3dXxr06SEs%8P!yHTx|UVyvUUnyR7&6=*d-M|gjJQSc$u zLjKq@!LNUW+p2{cLN?@?V2?H{D3}m%pj^1{=zhnkKzXmv$F0zu41mvTIG(Nt&TZg% zRVzW|k3Uj8V?`v%VH;eEr#zqvS1VDr(EB-eK&WU@MYC76C^1I?#NATNY2aawBS3@q zTo=7RO1-H?pE7AXIr@o28zca3BKMJ;yxCW2zMvpt+>GMel+hA|dI=ChEXPEj3>pyd zn~{jNiLMgyqXOfn59BjOZWMQjSSUlokSOzoS!!Qw``Ir}VEgzt#L|-^n1IR60EPkH zT?ty1V|q$*aut4{KM|4|%{ZaSy(>rg2E&^t?6-?b*@@9Ucl7vU4#hhi{R7Bm+L)(@ zM~P8Xa- zdeJxyGKG&4%E=-}%B2672AH=8l6cd@xy1LV}EfLTz7xmblj* zxNP>=2q;$c#|uSyMHBr-u&Xz&T#0556ds0a^_^^&xN-AlVsjz02i`X=zZ{TygrEHG z6n6JB5d+w+>p?sEm01u?4UyzDr9VEqs3+!k@zcy+ME)<2W!NJC5a)8B+Bb#7$`rJ_ z6%rC`+^xy2Ten=e@O`pJ9_;Ci@Ql108#`2w1+X1n>O(a~1ONy(DXAmj{n7t`YM10j zNy$Xm4pqeQ*^r_PXX%H_>mcUOeaK)r*A7eZB` zMLT)NLuWX#rp=qzUVaKsz`xxxxlc$)NO%AK8bk){K5j14+wyl1kxh{8rn#}q&BI9~ zO}?R{0RI|+u38Hz;vOdns3FJ2Nn$U%G^`giwGxnD#Y57m#%a6v=*c0#@ZqLR&pMNY z%kM8xEhOVX;3s5LBTb%B;>_NRRpp$~7gAC7SaeJbxVX>gQKE1O3!hHnF6iv)Dm9)w z3jqd`&QsH)r5fEN*@T^UhxxFFESsu3?+ONEdf~8nZnSYoBy0Fc#OasPVmr^=FZEcl zYE?I8c_oBp_#eWkG(>Hxop;=f&CGZ?Rt!eL7Wg(6JWK)Ny^T7sGqs@m#gKbMPH|n5 z!~;m7u(iQ8>+}cR^XJa-o(RhsW2d8}L&vLI3~)J3knLs;h(Z$+fdz3krHTi@QAM~> zH$K#Jj6y~2_VS&vaK+ZQm0Uw5t=ud3hQxjNGpxRM0uOmpZVgsl0}hY& z`TYykj+-{}$NyMHyd#MNOPwLzU~QzIZ(sS-hxhM!flAF+BxS7%&`%~LMkhX(6o)Wt zYeB0UBL7dk@!(h?f%|-B;>Ka?B)}l;;jJ4oYk>==UhSkghz zK}NmBMEl}|?-X;WJ01dRXJ%J%Qx?d4DM<N~-0fo@W{jEqdm+lkc>9kX~-i)b@& z_13!`EpVINMBHMaWx%6Yj3)TrRQEHVG!3N6d-7X?iL zY}l2YlAi2f0}cQQc?esP`<%qJ7{K9B+!a65Dv26emdi@Ah6R@G{knSZBi$Y}$-H^=hb43%36U;m$nutxWg}!19n_A4^ z-i(Y)m+%?gMI=E%+rMiQtd{dwMGorEMop*xlx4g-FG|~daO3v4URYIHfL1A1WomR? zl1cxVC#U6iD=h5rK)V&QFPj06M8<=XnaPi9*QUWGIAXAq{3epoEe-S11Ka1$?I+s4 zPYAe*BIq$Ucl}7m6iy9u@jO|EU{LBEmi7+)oSN^H`Sv;t%=PQn!+Tz< zbgtQ}rRDe{WaD`2*`;EO-$^<^w7y`d&Vr8j>_P4)`8clNc5M#CWE}nsl`WX)=g<++ z%WT|OVq>O^X%e`0hbaQ^S7gz9Oj(~_NQG~=3o3QCdwnMfp^zpl1JeGZg%dj&{6v?cF_w{JkW257n8;!EDA5?S!3OPiDD#by{vS z_Jw&ey(clQ^&@xYziv!UM|p@`6imB}ynI%3A0|4j;TnEp;gXN(x)-T3txOR<_2nOb zRQzCf=g@!;85rwUe<_A&`W1{cGkX?m+ztu3OOh^2{ncHT| zwwk|ZvL-wTJM~9Jr-L*a?JPGPo)z3)6jmDIp&F8-KmRt-Oa{yvyeKLg4dAPkunV0| zD(EhpK1C}yHg#62kjN$;hFY3u&>`|Af~Lk_^mrc{nws82uga*YS%q0M95la)ggr)` zxfZ#98a}nayH)yDzETuKqTV_wMybEXq$7TAY^T|5_1Aaq;l`N{a6N~LeWtEQMX~o@ zUNy#15l^$?n3|@TkW#(x^q_>VQmJE7Qc^*8a*Fr(YN!!cuk~zP51UwBI$K!v{mYqI zlmxjz3T0QI?ic>g+|mD0-tn(*n{5I7^Z%6i{i_51w;hl@vvhEc%HWQq>Ns*LoxKP4 Kr2qDpfPVvEUp3$W literal 0 HcmV?d00001 diff --git a/doc/doxygen/chapters/images/starvz_visu.png b/doc/doxygen/chapters/images/starvz_visu.png new file mode 100644 index 0000000000000000000000000000000000000000..bd4b6b62559c6ec196d5b46e3ab4aea542d1a7c1 GIT binary patch literal 118157 zcmce;2RzsR-Z!qTNu*RpRFn}F*&`H{RW=!^?46OVQbHL?LRQ&AvUfs8$=)k_Q}%X0 zzkcVs?{i(}oa;XK^?yA6-^V%U*U``Sv)=F5>$zSZcUfui{dAtEB$e@#N{77@|5 zVIrdKuZXweCyCv1LHNgRGYORkL_}nkg#T{o(xH7wM0A4an%HIehwsKaZEwpfZ0!1d zk)@b{e$?ehIlW0Z-HE$eF1sK54t~A$p2Fb1eyqOdQG=+*lmoZ2zI=#M)^j~{##Q@z z67ev17W?@bieU({w2GyHCZv z?Kl}CB0H{-&;y%)BBCL2=Og?{nAW9)n(&8k65-y>zpL4G!h9FuC&Sx(vbGWaFuO&f zV+-LA%S5Lp3I8N=BqFmR{L@8{=>Na3nYH63s)=6Y5z#7UF4AHF`Ot z`}DBd*a4cbkOpnT0Iq`Y(x!0TD(R58(k8E}sv2!WobkcQ|F?_D2`DTq^z*Cto8n?+ zeOaV+JMZD23Y*R&XKA8CdGD=JuBlgQu?sE_f0*>zWk<5wK_tv7(b1#7FqT}nbLURM zhrh#Pwrv?E8@PCM$lCewhuuWm-TA(B=Gw@~$sIp_+@6=z<=jm{KXa1>M(!XYVV^9~ zu==JZ2EPhBUXLzmdf!<~;u@v&^z@$95iYZ$?Zdi0Sx>BgFRiMi>Zquywzalag$Whr z<=yT)%peui-`Dp+`Lo@e!Oka_9d+0#`D`Xv$5X2%rKI-n+t<<2p{=cbXyZgwOpHtj zuT@R><#XrG-Mo48R-AlF-1xYWv$OL7w~Ja|T>kWxnYMn8N=$r7+S}Kc>ab)X@;oFY zq_9v(VW}(Me(#ZUR04KsadC0Exho6Z&dHM*v(IrpODik7Iy$OqYGY$#smdRfG;E_j ze}1JJZ?`aiMM_F4_>!@Or6tFuOBw0u>YwjbtgS2z+~DKujfjZg<>g(N?36Pym>sU2 zpPz3SY4HEhJ*T!TCw>@{s|L8aPDOG|6>?WXeC(}5j5 z*`2MU{TtO`xU0dz!50H185v_17axk0H)}di^!8rBC0+AlP|q^EoXTqVb-6n`VzSm@ zdob1yFjSK_B%cat+j#>DK}zWt+Wx<`?g>(i%+ zDydg<+v}tzJF>~CsQB+!zHVu0IeJm&6eFYCvuE>`vYi7jxw;ltXR5_tQ2tq8nd0N) z!#QWt(a|-~Q;73(aL6lr*YMIxIY3f>i+31y?_6HeT>Wo?(#Kp-Om>89*t*l*A1IK zs^nOX%ulqdsj9wVRT~@|%e7yaRMK>`D)pe?GVR*8XODrt{!JUt(7bgU6FoByh1={~ z!l$%l&VA)Sf4&L$sF80s^fRR4?Dfv$>wn5 zU^lC0Vlvs1a(7~4LWQL%>PA2;(Vo3~J3Bgpf`i#^XZ;!(;lcUK#YlG+IJRRQk<$BB zP*GFwBB61>)r*u*cIF1KX=z_xF%Wjx_LvH5VPU+Xe?9%kjmBf;X^Nka3n_jzVonzE{@MfUBbi3(1`CWpC3 zrP9*UlUHBVEDjrvDG7%R32m%C(9$|VCr0o4BTQ&xxb_1U@ci7Im5t4jJx_P+*m3F7 zC0Czk&vvr0CF99Z963Tl#r>%>Q1bOpFY~KKSr2?vGGT)1xzs#-d~(jN#B@11Ick|Er(Gi>Bj54aBxPjG z`^gCU?qWCT$ik9QDCi{-T0KGL9$Q%N>N7Yvh_8r=i@TQXDeg@Z!fmdNW!TWIBSQS7 z=;2iU!o3Tv6@G1!GP&+vUOz`069zQ#XzyK6DRc;uQc?;)ZN?TC?Em$v=!t@YLIY2D z@AKo7ls`iF7RFmsIXO8S>cX6s^+y}xa65-9YnywMzf@JdtZds&XJ=>EVC%HH_@}b6 zGA}Rh^o=*N?#Jj#8X9IlcndD0ln=kqib!3?7sq`JAr;yGF^K0uw1cOEqvLM6{0+;| zvY}^GAs<>>cgXcgZwco@2UWLPnKu@Z$hDU?Em+c#mG!aavoJTmxyozNFLCi=Dk?Ma zQxTE!5_fWn0K(JO#SiQ1>QE(7(7TI^i@$yq5uDBqK@|#=oG(41$$m&Oh{K(gUfFE?s)#+*mu3DP(r?WpS&a^E z+*e)6v|-05+T+B;k8fr1Pv4(EXhz;b_VeRDbfWUK=0ZY+5u$GEsC}MC*rcyt zKgO;lY-Ywj(nffi;l(o@H8ry4Ki*O`e&Al`}R#nXpMt|W7ds!-H|9fRh;kY%VVWl?6Y%onh$%reVhFK{g1F| z$Q$K(-^#aNdDGU`mTNQRb&S)X;uRJbi|_R)P3N__m8rr)=Z$p=X2n>+UaY~Qq9W9y zkGS{J(oRbQ0aB8Z63v?rYvSk3=g*&mf`V#4h|3Mx<`&g7G^gAx6U=Bmm(=Ap+xI}! z>u2rw_~SSgZ=#wTv+Wyx2V`EPrly93baHh&oGdg{xP3cMzdqVJHD2INN=l0Cnb*}S zco@!U$N4LVX1r+xt~c?nB1Eh_3XJ$-Mr(8g<-^jiN9$%&d<==v-DnHyyH zi%6)cwc`YTXtCp->lzyinD>@AuFO}sS>x#oX*&EV#dgfk%j-|bTp0??a~&LA6BZKW zdd@4Jnz}c7XUJCJsYiU1H3~ps*lqiE?+)ZPr{^^7e6(%XTghk#6Y`+d)zzONd|Zyp zvr98W-MKcVXxdk=UiH`@@Ba2pr|9tx?4#85^ePmE@85%koCW6?ZNI-^y%BJZP@Bxm zJ}I*ZIxK!lO`Vx4+>juNGM7czsq5t>f9gqpw0kozO`D7Y=(ym4?js3s^7Av-&Ye z9TRd|eYXENDrZlB|19pzJsq9T${z{MySNAI$JOVoT(q)^$^>9g$@7C^vya7E)1pI$mjtYk7=-ndHrjw!0MUwduILckoBxgUJpad;1kU632y3o5{&Z zv`!ixi%8X%4$~EkhRsR&=srL@xwbPp02cd?Upk{myl2lcR-I{g!P;E2W@bhPfRV!b zMsBy$%0xqh{PE+#=pbP{4+cAJy4R|m*F{=B5)97a#Ih((tXghyE;2wWkBC(jTkx z9v(apY>w^BAReL=vHQ!Ho#}>0(#zZ1SFm7l`9f>6at;psZp9Uqm3Cu^nXyq5Kkp5Y zH6|<^M-7r)(2LK~+A7@r)V5>ApC+(OeGI#KeSO80nVGp%WapCTt34@V1 z+YbFvvjyub-aq;dIEoPdH&RBOeTy(FCDXQiFJKK{-mDVr!pL4O>qKkqhH)(fiB(>*wX8MQ4Vpo&S^`L5H#9^(h-0 z5WSyvK=gk2MM|c6x#b$b;)ciAH(XSBmNBBBf5!>DBbhy6AA#wCCq`T#`~lqW|B*=% ze(8V6E`^h0wh>wU`tk_HX4di$;hSoTm06kqL;2J^O7e%?#pn-va^C&+BJ{u}#kE7% zx4CffqJq3|Zx?e*Yb&6uK##P5AfGKT*n)|?;it(zb1|>?>RwsSWWK|v!j##4C*s?1 zX0&->+kR!Wmo{12jYN(K;@Sg2u0BadkCmC$W9EEU6OOj0t!s$p@>V{5W_NWyDPvZK zXuyKWV&E4ab2DYthwwc6ktwxwgSrlVgS7(Qz2Re>RxK>l22UGGWk(jGddEkXWkyo+ zZ4_f!sY8n`bL0vJnKgf>Cj|feVX{oee@A`yM`mZsSdzBE!8@(@bB((-OLS z!frIO@*mx{>wp;Oxq2Og{~3l!xX zDLxYZ(qo3rMY-=PKs@S+we>un5&={skAP&Fn#wqu=;@s~a|UGS(4MEqFBye*hltUu zchwvva+}%WEX$(P5pSsBd75Y zSPtAsJ=gk*m{|E<0e=2NV`&)~SH;BohlciU-zqLH9@MFx_b??gQmhO7bpuuNT~N?5 z8k(f2{RDZ&B}S<*q%5SYZ=k<_`_7$AI{mJspuy(?f&&7SG&Q?hb0irqo;cw`iVG1b zEW<*=t1{keb}sZJ&)CQt+BvLhlMRSzjCDdlEo1FP2?MPz3O zl7cA{m5Piqr%NPZjnL6uPAO?@Yy?r#5hr}tZ_Cjz_gr}<|B8Da9~9;LW|kZT!t%#M61_-)52T4(0UM_WzJo65Qd5VZC|Jt9KK6Tl-rC&! znv9IhwQDz7gg`W;rGJ88j11L7Q~|rv*Vp%`&$$s*hrOJW!)s$>6MwHQO@D4~?#Q{@ z;RPOI?-P`g0Yi@+KW=!(o^beIBj056L_fBDx>F2r&&13emrO%Lv-MGUQj(Y5PD`NU zs;a8W%qV+VD=Vve_wHc_<6otsqN3dQO!x$;yBN!1W^Cf1g!52Tn3zUW?sg<=78aQI zU3C}h?d|>AV}>ROik=W3k6+3#Y881IrEhH9i7x?)(f#rEj<)tI6q>p^fYGWRGx@Tk z)YQh>+Mg{3uCto(xf~V%{`&Y)vNN59l{L=o4o=y{3%55mHWpCzCsXVnv(XRY-dA5x zUUoDwHXfK+($LVD4pe(9sdMGZ6(#LP-WxZ*qw7XSM^8V@Lw$qNX3qT%D#XrRyTBb| z8yj4Alh03f2ETiETVDPYDKSbwi;*?3UszaJFHK99c^~lPIn9EOmKJV6GhlV zETvlg%CBGP`T1{KQHzQe-~aw5?g%~Mc@XV4cwruP$+!&{CAj!&n_yVso#LG$<$Kxk zgo%M!!kpJ_<>biM$^RuBRI4bl?PT^)?9hcWZ(24|Vq!X4 z$Jv`)S9W*ybiEw4K5fMK{lIs+;WHaUqyc4$V@A6^hp=$AC*1Jt=Dx)BVa1EM>PygR zsj8Q(9;cse+aE-mVX3%L$g(>kpNpgG%CY1Y$KegH*H4tp_uQm?eS})mIbOZuqr^s% z*cI{2N$X|Z=N_GPQu3}5rQ})6PiIfc8&1{o_o^>h8Rj3W?Q*}#Dcn8ko3)TuPZt#2 z;lEd2DSqx_(yDu{9lLtu)!v^vscf1BR0j{5Pqc9Y=b`G~9N52q|KY=j!L361Z7mkI z$MFP8z6lNe*+(GqSjLGTKW5!F(b5Wch(0pfBmduYy$VJj-}XG)2To2-*RNmC4UhqT z@G8~j;pMd+`bM~!U*yE{xnjJicrKhh8>fB;<(rMFQ<^v~A)!6TDnaf zR0pI}8(bc+ZcR;%wWpnKlQglrrziQIrxVe3{8pp#Mn>aF>bdmv^!-n<6Fsc(B`4|W zr)Oq%Y<-kdktuPC7a9&$M-Gh;VZFFeb(Vl}u<-KQjkly!y4z2c^!>50umIO+ld74V znhNvt^V{roraC%JJ%6=3;WLNcynbCHLpU3P9)?EPShKmHUdNu7*8%p2;6?AN*YfF3 z=MkR_@Q@3EKfolI*szsnrl-j~6$DO*2uL3&G0?!n^kJDhzx2JE#E#gYS$ht4_TLK&jIOT( z0um%T)YR0V^itdauO22qU^E#DfjK&>lq#%UT`6Zmhd~FJF)jc0;dYN)4qTC;!>)O#%7h32Il?glTWF3Yg^l` z(-S(OBZ`oUfv4nHZie0=C;VnOHLe4cJ$0qGh8MjGef2<$SI?h^;E4p`fN?UOpf*a= z$oos2+PCw`dZMGq^X$!!w)>+;ThG~h=N=7$DC%W zxKOv20BFGPV%CUqPw?~eU$}6Aj;;rf@W6os`*w<+Abn0i`03iEdzL3Cj~#RRU9{^s zm&xsXyL3;_y>7+&1_n$lEFa_JEzHc6KWSZFFy34hL{Bg0>p%Aj<}%I6&PHudR82R4 ztPXAr5|0iu1rajcY0WN0*#S?%+FN~|+gVZ7l->LFsDH+O{Rs z1veWyU0m*)sHpo>&OI>Igr|H10*+8n_`iA6q_Tr^GRql)Z(?F1+K5%|p_yV5A@niA z;_~tWSDfM`$v8edGV&A++||`mv8c0i4RW_-(C1H|h)GCdh zk9Jm(Y^bFsMQPlUGCZZ>c3$tkT3Xb%csmLI`!;DGSvxZ)D&uz!s|k<^1s2!|Q3QCE z(HyMuQhli~7sZv#98l;te{4;HcRZ(fCa?G_O?ISV0$-yuwOz)i)L34YDV0vUs+(hZ z_xV&b^3~cW-$yQ++J!}bqh5dv_9h@8Gd=w*8;pm8d!9nDtNZ(_e30FN&W=iR*82@= zsiCp4m9=$~fsyJ7zlys0dRu-C6&01ccR%?1Qv`em4uwS(%_K__!<6(DinCEhOpGq{ zcE5^Ku0S{?Wo5}~nbNj{WmUmkLqkI`k&(k=W8@+zJIN&_Vjw5Kind(}o|u~2==Efa zpR;2i+-{cxalTomEuZLkcx)Tv6(IdJe|@@lH{FH6kC4P`B1F-KeIp`z7{eUujLi++ zCncFc1KRq?rMQQihvz)y3xFj^=%|X=hLH1RHR{0%y1GJ_34*7rtnfhhCXs^7w$PNU zx$(P46hNZFk5Mz*;ByZn-Cws<-tFaG=Nc-+xv?hv!2kF>mfB%;KSl`xm?BGG)urT0= zVN-c{i&9cm%X4FlQbA(GD%gYf?qT~fLa4j4R7h~gu_ixso$-QGxclkX-@nZm85y^2 z+qUz`d4q;nf&iCe1=XZ$G@6vNc(X@s-L|cLvqx1ASfHznj*aoD1ytPIxqUmSNP}I+ zo9ZDxUfx?Od<4ygk)7R#py+b6ZXSqe$7nu}KdJVeJ1K_E=Lk|FKF2xtyd!7`=s1QK zbIz6j_yOVze@{W<(BZ>_0|TcICj?PEvSafXqmO6>B|Q&=#KPj1Nn~7{yGhOF_lC%x zH1EcO+cY0g+Xm!ByKv(|Y`Q9b{+twWx4&{_M{t!QRrxvH%}O6Sxz}PK@E=8Gz%suP&xvFLK0fNsl1ouZUOjBKbVsNm@ z-`2v5Mu1>q@bd7a=kC{(mw)Z!6RzAlI%;5SoM`?Rwd25*4ViHhN>zQmT87b4g;&AB zx%Vnwp~{kqfZEwicC@mdkC$Qcue^6*d4_$OK>lP1E;9hw<;$00;|9%vGf^KoVly|Y zuxpoFx?yvMn!%kre_%l~y%`>f6VTedc}6=TW$^G}Rzbl!RAa2#LOiNC zd020`y49iZJ1sR|k6vJB*Gf@F^Q1m!Y-9wUc`rp7i!t`ST}O zq$4zX|5R(jMG;lSa#2S-Ly5Mzg~jLT_t4+S zMcVcYC_Osn0y@%_beq8s_5T2^;JfQJ^>ua7LNkH>Kuk%FUWmL=i#9<-w8}O|W+PIL zro3tOiavWT_I{FBLM<;pU;XxNlrgzmw+?;xrs8?!=XcS(S8Vy>x0d#HfTWnHsI$^z z=lrjelaY;%j&^i*#wCLYQXSA;UtMZz-&|!b8``oDUfK*}zlN~({&|Qya-2g?7P~(% zFfecR=@E?@3m0`+Jlvy4RTLF38MUy(rP=v}j+9u8{-9^6hr4@(iO103;MFTv1gu}I z;MBQHI?nh?+`fIAn@So^=$$)v6cqe%dte!ZNdkB%CEeE7)s0z=R*;dYh6x_4$Ir{# zR#I{gf{A+fJ(tPfzk7<|S6MZGMhpD(Y1j*y8KUK=zh`%L-2Qg0S26$cfPu!OfoPsqVMkBz7^SmOTpAsRO3TK z;`_~#lad%*Ax+?up=PuAyCCRh=0k}^2R0zkBUf7SfSUczucGc z@TQ_e6f0~)vj80pjO8U2&u7|>XS*u%k2g+Sf5!2;;cc5?jXZjw57UiOvJAf{i$1!<{7#SPB`fZ)$+SDA$hB`d1fB0j%-VswAL3N%8ai6uPpVq6&b=~7FZ*dV z7Fh2!ZJVINZ+XDnd(2FfqpLmN*-zg@)kZT;;)s(?&|HiE2g#u$m;Ew+lJXV_Rd_zj z;O`Ogy)3>Y_1n?2WFQBmCezs?l+58J`m6CE^ z;gKD;f;k0ZD>|tH0s<)Ofszb#r*LZp1)Vx`tQxjo@VI2!1tT!*)vNaeZ!sI&uBX`T z`}gm03%7u0B;RrWaW^_cxP7w^4Dxua{tL@E3?a%GX2A(kGj1w$j-5|#VtKxP{W>Hh z9~J~ic%>mPA76X>#)9+%QBhG;ByH{YvS&Iz-PyJ_xo6KF!L`}?q9?oTZEcSpJxb91 z(fGWdKmYyvcg^%OLY??hFaBRS33+NBU<109wVXgGuNBj!OV-Uv>IV-VoE9M=At~hr zP{aR$PP+Axu&rI$PbwIuaP~Ff-Jz%Nri11RoC%GB{jZqRFz>%2Dchv`t;bs;8h>+? z{R@7U%)Kq2p26=ZDd~wrdv096uE#KWD=sZEvZT7&n_yu_M@NIZR99D{MS_KT25*)P z86F%FhDoIm)I%%fn1xf5*wT^=FmJLeJ&||8b}G?UsIHPFue))IEU5F^saLQgTiV)~ z4nwk2RZ*ccBrIZ=Bsp}xLTBfN(g-UG3JRl3YUU?$-yy+)kB3#M_W3zB^2WwP=L01* zH8n@^1d1VyIpQI3U7Z>xJwDk1a@l6sKs*2`DD5&^Sb?p@8Q)K4Z zw}a-;)eH>`VynUJVKe6Bv_WHPZB=>FrIf7UxUudiA|WjN1;xCLRlsTWp`~S-wtGTC z0(k6B>c5_&=*yCP5?HJRmL~3|;|Pv6G!s|7Zzn7ddI-;b?raFZD55OM%9>60S3nv= zj7jrvg}lE(x+6F!2#5<{-c{s|p<%*@4<`e@s~0$APd|wlDE}vXuEH%cGuSrT6XH5C z9jqr>*wQ*WgD;L=gsjzn1Ab@O%i}T1LTArjQ&T5A7-8oqjb5ka*^w)rhZPhRD{q)| z2zO52Q=d>!z3=u!-|*=Wp6ffQ;H3hw%p}MDiR+b52~f4qB7Fc1F zirtV10RA34Q2V4+1lWfsF%Lx-Xxc_Ie02JMLN8rO(TyA(*T@;A%VVA+4D|iSkA~)E zT7=ZBFZ~5Mw81>HEe1T2k|v?O`yYY-YP_+&hOOJSS90&y;$oC4G)U=C{*hn5zG$&; z-}*@66qY0*+|um@WWI~m;gfkEArF+Bm1V~L4%-d8CqAADKxfC^!?0K-CDGdsA3vT8 zBM%SZpa_-^eBkX!9hY|W=8H^?qF}J zCpZBj9q{oH0YdGujE;Qa<#p-&d6jI7`$`#y!dLkJPA+9-VaYj3gRSiZIe-mxmz6a! zbq@qXz(82UNCnykT?v#_gcZ?a#<4H8{a+JLCH1m;!bdKjbW=vqlfC;Q*VFSmfm!_e zvSr{3lmP7Jp!}pwxIsj7<0ao@f@0$JYu8i?9BdU78b9Cv4)0rpI1ZshPfyPmFB%#f z)zb|WGD~-N(mVYmnYfiVLW*CL8IFNrlNRp$W$D{HoXsSqgV{Ureo~+a)PDAl^ z73Td0zB}7nSo94{j3VSxugH}BP%d1$vzIP%lbPCpnZ|a;79D69=Z3hM+?9CRRqxM> zTa%A1)2@Vs%up*TT05+r9r&u^yMF9y>y+M@O=9P_+|Tc)qk=7m4#wC@MaLPqIAyN; z$EC#6+`8X-$FY=RcRuZrs;cp(2OkUT7Wc>$ItG;`1#cZ@z|CCv{rkCx2Z$*_7)Mh; zu0mwZ1}zaq4YI`i@881(!kafORA)5bMZYSD6Os_R3ZOg$3s)a>HjC^IR<#V-RQmZ# z7Xn!X1mIWU0dZcsgz{{j-SOp%@RGeooSPhl(wz%|Mwi;_>o0;1Roo*JfrrCHkrG21@$(~pQgr6rIRtSkfBqyep)ZfN4RE3*0E5R7 z3OM3uNCAmNm=!PO6dQk8+i#>2yd zO*6rux3I7>bz;($YvLrswA778E!c8jhj5aT&KElzS1V0F3R*)vFqL4{x$eCS9ZCc=Hc7Vf@|b zt{bO)B^o9c5b6W)fE%M2I)N>UesR~^mfOF$r(9F3p`*hA)%wQ96yOYK6w(s==t^LN z0!!iMA)x9+cC%jgpE-zfeTxc8a@O!B;XJsxxg}7FTLL-5mMwkN)}~5MPM*$7crq?A zFOQ{GLT0nWO|Z4SsX5h=Eqebqa)t#DhNQ6&@L{{wQL3FHaSGNl#~7f0Q$vGn*zse> z79E_dtkOX|RFhv5mOZrCfbVlCQ8yF+v#03v7x6rF%am7w;D-Ry2#ES7msBj zI}7=u8EQ(4?yY}jH`0VUmaVV@k3-2;5QLP0@CiN;OyWhX+g@O1z9G2YUQz-P%B`|l+>2^SjK0Cg+q>GISqZ$)zgyjIebrWkcrEkf z&22SU55VFS-;oB;*B=3!0#6FF9Sm&Nu$M9)!TkfcUtL?XpW%xq949lAoJfS%W}uU_0aH)`gw_21OM=XJxJG z>{MTWtPm$}H(V2e@F!ONK@m8cpo83{=_p5_@EW!Q6ca|9*T&yT+2A@r&32bPmW1W- z_l*shZ;TXbN(#uGUA+nrgVfNEqM}DdTEyG8`bv~&r$A=fY!nrnEzxeAash^^X-C#g zdf%*nXw0aZmW!4nklLWU(oj&$wQzs<@Byhh#iZMDf8K1hO!zxRHVO;OrUYf9hHYSA z06iFjsFmX7M(PD5fn@#o2Zx4epsH-FLZ24w-x%B@Zd_-D}w9U+l zj7vPt<$V4i2jjwV#Ugz#@W5VXUR)6B4pk!5Eg6ovos*Oo$7kK(~2r)_bD$sQCovtBC&D z%?<;RK?8?e;m0Tw9T~ZbkQS0_s3q%J5C8N|J}1kmcv+Z468Cw$V7^uQ4FWVantcth zK;9Qi=YGgN5FDivo9JWkF`kd_=wA-u=q2UmTWoQYi_XJ0(G4$&schzHdMEX=%dEy^ zKFVwt7Wcl*%uz31-5CC1HB9t2CLT5j9-wiF*1*Xwm8HA2+4J-6VWJBW*-K^3VYkjX zFivm=IdA7152#tOvUym3sL-0S_-?S{R}KeVT`cj^4aP%z(vSNcuDJJkcGi-{TS-x| zsij5l{{7Mc3vf~x^l^I{isL*I*aIb@(T4YV|U_D{lmB%9_ zBy{@p6;K(L!)~swX~*sEK}vMqSfA_=BkYxjOyI_+$##cT2QZFJH%F6+Ao+D(DLM*xB1J{2aLt(gMbk8%Kiy4+=n0=-$tt6Wd0h)@z5LYdwGde09~~ zk?^nKVTn{eUkNc{Tlftq3P)Luw+NeB&abv!>$vvq>Ni&(&wz9HbndT1BQEAC7JV#2 z^{s9#AD8Zu1DJrk9Z;sn>1zI&eO9__2=T+;pBik*aovp4z??`64)5&l76MZTrzj}s z`tjpqu~ncXlIJe6XIRdjy~$;+o7$@Wf+{>+Klu^tWHzFr6DhAi$w4?B{zslZeTsJF zUK&qB^!`~q`0TO(fHH%hzLu65qT~pb!+3)jstV(yvwX|42d&rY@1^DHn5D&=nw<T#ca!p(D=HpV@N@E}_kCWZm4|T@ z;2rr2v~Ho*-=c(6p8@m%z&wxB(4c1Rw+stF+YDh`^HifeA9tQheL@fx3@j!NF>Ba-xK+)UTN(EDE?!5FKavDBQ*HwV3sQL|Kn!@$=_8 zjG?>Q+8L(ZNsHF;e={(Xh5^+2`^5@Ry3pA0aEY@V&dtdRU0VCWT$C*P^l0GGdFy$B z)!&#VBYu`x&Cx~?%h!E-tJGS$7?@C0F%w?u3a=|bOj`W-)^~?^!Y!X zS4u|4+j2TQUX1<@J7q{`Tg=s#%(T^NW=1@zcB%5aT3W)4`&_#ENP1{9O*$<@fAJgi z4`VY#>PBNhebw3=iW?RLkDlcb6cp4>fkC5UTMMkWn{L+4U(S1o;6^b+g+c!S%xZ3I z%!kP!PU|R^&awcWOeO-Qnd3QacylFxVD@n0)3b4BJ-am=e$_l0PJS=$eQbig>CQ@X zyPOkGTYW9za@5x5y>c@%7l0`s^x`~`a-9;3U5riE!9^tw-V> zY5^1tNefZ0XV0GD9`Ii(A0E~tC-(yuv%Y_0OHMT&{`bnte$6ASsvKysFdSx(KjcR~ zLpRJ3gG6G)$TUkpCITyrHGtskIP7nXl-+Ce`QVGXUJflYz^)fBTtMpksw2VLgSXsx zJ($zyKU>Dl&CS)AaRHZ6x*3ysdJDOH+VRs zRUcNLZm*c$3@*8n4t!0pF-)X!r&aUQIWln_P{d7MCB9u+_o=!#qfXEtOF7rbnYHeu zyiMs`g!nv_o-na~ys6;&*q_uPr{>z)-S5a}-<3URkPWQNbuP%|aIpVjZIGDr#_pQc z%0`sLjFE2uGutioBojW4YPY0Ab^I0f-jjUWato?CV?X#iajwTTDJ$^p4-qabI?yK{ zJtuH2RaLinUeMC6KB9M+v`vYOx60s8K-8bPm+L&D8}}xA3q~uGKQIIOOBhQ{oX9Vl zu59{d4P9gzj@e*!STcX+LzxIZ>PSfj%-AKRre6GP?Gn@RNVI#RjhxS}fwD=qDW{tL z=c>~)CXm_!vyr?}D-;a7m_K*RbVJt_Qu;ovC(2?uD{okeQ4NXv5L*;am@!pJo;>g>jk@;=2k#ofIE#9Eq!#oyV=9MlVQiMo#I!n zl$Mn(7|G*A)eo-TD*xb2W86M7kX(5G9iO#%WT%8kIpTb{_Aa|I*3#BSJD+2BJNo+i zaON0HBye#^G*+6DZDcsPLwnk#=Kj6NGA8SAiphD?m4?arSBarzLx@})i3tO$m&$*& zJKULah+l=hK+nKHr1!vDMHy|9dQNW$d5m=4M%ONYgiGw~ zi-WkaXlMvMupiX(hNYz?i}J=#EAzGGIrME@hOM35{IencMv0DryEBk-EkWcD zjLn9Y2wc4AS8)%Qpk#YfS@~^1Kmc;X8mGjiKC$^q=vg!RRUi$Qefu0IOQ^P~DI4vh zZ8fI*hW2foK0Bw_Q$;8G?AB?(=fc(L?<0P?Cf(VcsVUeq1cX%;R-@hIyv(O+(0m3Hpj{ByD211H}>6&&UQPw0eU3s<_ z`S{dQA~Q0YN=jT2qRxK=anCc=F!xoQT(oq^g>*N1;QJa(IERRTiHvHn6aJ4Zfc4WH z$+al`IC)xAn3rI)L&nw>M z;^oa@{>z*#31n9i&so4F9Cjox4JL=<}9Gy?aK{E+(= zupEX8hDb#D&xrSO2(s+D)5SOXO{rgdF zZsN#qBHRtAK}JrFJULDW^#R+nZlG=)u41hXlZ zP1Xt;63i1!yCXDp<_#^61!|Elw)L#;i2?> zo5BGk4gue0s>_iuNO^!7M^IB#od0&*)RH?O@BROVi3aBFThfN%f*;EpZLO>O-1+M~ zR)H+t-`g=S%s0?Qu6j|m_PppriABWqF!A=Fk``km-fhNPd^f|2(0j0lzx9}*s2roB zk`%acz5Bljt~*Q~gFy>*j0&R?csPW~z4O{72yeAZo!C~xMj-Uig+6Cy0$WavDC4Y_ zSvkpU4!FswsGRk`?jNJ_y5b(3t9&!|OiW5hY<_epKt8;w`L4#CMPLz%Qlr(r{rlUi z+78rg@MGp3DFPS)7&yRt6EyGw2t?U-(-~c|hZQ&N?vG;#=VCn}&4Qtg1617M{L(^n zbchu_S|9Tu3ZFto@f;IV5GKG(3)g^_2O)nzjmBmMGB#=beER(Pl$dQ$(qw}%-Z-a9 zL-9E^RY7Uk1Sa!;^_pVO_Y#u+=H@Wbun+LpqZ!knp2#fQw!lG2BsOM zUJs}iR{P`Fc2upR*D329cJ<-XbRHYnL03hhMofqB{AlIa>Bfy2;{8=pACB@=If`tY zuoe$iUpVE=d93E4QmmE#aHWiJDs^m=xl%%&XFyqK{`0f9*6nM1Nb*u+ISMNE|Gcm5 zVX!=G<~CqyH!{vK+$!Yj9o0$emEja`5OO!f-=<~j{co=W=BZi~ZmSxZ^3_>qvC@@% zdA-z99>$KRNkZa{tq&4lf*?24Ch#hxxsV@oCEfYtAO*!hPfsnvh!S|2L`=-g!#oTi zqai`qI2VA?h%?UWhvv-UdzmB|{Hk>9?d;$(gF1*vV39(HM3}Fiyu94_KeV&~^wyC5 zuL3#r>p*96?(`AaeFH&?i;<4wq9}KkLQFNWw<+`mh75(}A|B1G#Yj6_=cwuZTK&!S z#&1Jv%zpQr#+Njms$)o_L&oapxILOLf_#0yMKC~i(M3C(64iEW-2&^>!a|#! z5+2I6YuDh4B7{K8Z5Eyvjgjcpf#|<8rhJ^fB3Q<6+?Hnj6#EuK+mWx1yR3`3MTinZ zAP}I2W9ibg?>ueq;E#!sGx9gtpe&gES6__O-@4@A42pcIt@V}GgfkDKtC9N^H$5mY zFds9+>g@+yG#JAIOm&vKotKeEJUVjppa{*{jEwhA#3gDJ!kiWZ-OYDhJTp&bz&L4v zCswabrTVY(BLA4@m+XLR=nqvQACt)Fqe1CJHGD^p9~E~_xw3AH{Il5n)O(t}2nvLM z4>dO%9U+y6^&t18Fh(l)>V6GVuETkv;BNJb_tal+npb=dOaBRhZ6^NpnsOx>+Ofq^ ze-KXNG~5#2yu8++B2uo&>3r=J*^Rw7PF@rpeMG?2!;0}_3kZvmi=a@%m4qzEt4>|( zU}j=!G0Nx_w#Hx@;X^UzQL1gg?K5h?)ZgDvy!|oEdt`kfs+pK5ve=Gyh!W*r*B|Lj zKi>4FgO$#;MY(;vCA+w-E;{B3K+W(5#JtUyP?S#{g^@LLO)PT%-_j)a>p@7azA3x>S5(D|8u2E?i%Z@f+znMOJ|z6>=Ym zQL$-XzI}_v1VnM%-G}@dc+U;qbI|XeYXC7K(r=dnwDKoKvU%xjWbT*|QIOoCZ?f*Z z`R1#E^G*gf#E_ScGUy?vwmBTlo{?H#mrD zuM_x!bULhMV>l>Oy`_{#2Zwe{=oWIhb}qoPRJ)T9-Nh$Z07HcPRoJE+jsE)e+lS49 zq=x#W>&c{38nwolQ<^sVTAd?x2rufYah{vn#m)-v8N%DFAv7v@?zVV=k?}mLCrSHE zHIh~N7&`-Cb#>hyAzQA+j^2q-S6DTs$6ogI;V(XF782e|fd9-aEZCTv>qC#OtYl(i z!%Jr{m{L1uWMEKPRTXmK{&lu@`@a;^4+e`a2Qw)0f9!*z?)}Vdb&KS_s1ESgVJ?C~~vp2MzpykEQ+|GIlCKM_n<_yON0Pm>ZOgk^UU zgE=nZw=mp=_gFYMIM`FaIMcyrJ=$l^Pwg*XF2c^xKyp2Y z!@2Jz*F*lUc!dPQ3wp+$e5FQI@Jz_z$!AHHT`6c=(A{XG7{3UcVcrYEUIu237XlJ6 z4h5EZPBm>9Ow}LoM%RstcmjX%hMRM|HBtlU2s; z7aRmPMwWaNHtguzU;inR`?hOp-Lko>dVaEfg@XDu(`du1!u4?7Uojm;0g+Z6zfGlM zDp|RuUpl6q$iEpQxleE`bouOXTXR8`V9DG1ao-YE!T#Hw{Pg$e$xg;+eL~#x(EqqbHpo zKeKTnR&p}(JE+tF19PpCIT;1~ZD3%nFJ3RPA2YoLj8+Y?H%S`fFW{{uVmvlOKMeXp zePV`wl6R5|EIMG*>?A&!v^f;x>H~6~d^`IB1H&Muhb8gum-E_UaQ*PQD!hh9w<@U7 z(AN6km7DNNrNJXuSO8Hmk%ve}Pft(I#zMF$V`KNz?*mvqyLRk&>gM(!`?g3q-ogQ_ z`5P~)kbcV^QMw4EAR{G(ca3a!E2eWrzLZMP0l~zI0Sjo4z#U({xY=>ax}5uE-tKgd z5<-NJo1y+$!qRfbRc;`N$H$k81SLD=3(OhdHE2*=M4lrH1^fe*mXIjc_@2rfd20l& zIOy@2!=4Zf7^UCEDkcCsA4b4hQ!}}{zPg%i)fp@gyW458#|_cpqgGjXK@e2riziRs z1#7}|b9`cAXNGY*CXVc}e8rMOk(d77f#Y>V&7GA{@FU%3T8Q5b&t-Y=oBK?T4-a%XgIu04h&P3rMd4bT#S zK-GmvydviGX*{h=IP{bi9on?~EY5lHzyE2{;gFiCAAm%)=%kS6O& z)!DLq)|73^dA+-+zoEY@jCe1!4!iFZ0 zLXTG^vC!Q^c!8jA>v3RWkQ8%g0~WTXQTlm6HHbtZ9+_^UieY8kK>2ttMW671Or?az zXSB6AZB$kebucGJS3Qm(q>=b|cmnatfk3`5#nRl8i^*%<9;bWBuBBZ4U`~<6!^b0J zR4u&|j&cd*&8%eA^iiA!o@^WNGo-)6R8*^oO%kG&Rb1V8NgW}D1+q1D+TbHz8Gy_u zGjl?d6e0rnP>(0viu)NRB(uP=`W!T z%_}BOh=jlsIov5me^XNOh0{HZlIQHcgaZ>18DE&(A_@2Pc8(=x!9iI%I#zL4U=-y! zu3(aO!5+}L-%n;<`qkBXgY-jJY8Psc(Ie`DWSmo#7CWjv#Pbc**ke{TQw1yVpG?sB z@M0r`ykP2VtPTmmtXRXFy`ERCk2zwRn^MRrZ{2G-@o&e^xz`XzLQYC**7t3{$9tNL z>_mPV@9H5)jhU@Xeia}>iw*^DZo8=UQ>G=4K*e00t(UVpR}P!BQE zlC%-M1L4$NyT+!DQ&V?_h!VuEFTz?S@fVD5ojF9J)+qhJ#)b{=>3A^m7IO}Ow)gMd zqvSs1cqOmuxI&`==aV9>3xUw;R1-5w8;;r4$42RIo@Lv;+dW+^Em`{Y3=y(#{rp75 zF+egtI5^*=W-1_mqmoo4S}HjDRvgB*)VMZbOz+s3;V;?Q?X$4G4x4+thaL#Z^w(<^ z=prK`E*Eagzs7cfaE3%R)K?Ep9=Za|u!uLHk-3Y_8o0ZX%E{Hgdi9F%>MNwEq6D~LZOjk9=bCzccVZXDL4qU19 z+cyThU_vR^Fuje|dDXW_>jg$5bfq`rx;xpQC2gN-fLU5#J<*0V72%CUv$rNWd?jX~ zA3`>AMnWY~NoZV@%>5p81vGk0Fp5O@6lvYl)dj%9U_3+tkOvV7G+K20m*d`Z0n8{&P6KMOH>;#OHG|Y@iljBoK&V z{S`|DS3f^LoCxp=ZSta`&ieRN>2S|h4UtlqxLv=#zmU+L%aA0Wa}&3StU;QX<5DgF z{fe4V^mXOejXSMg@Pt|%9^|i>)wP2{1X@sq-7;WhG-_Z@`-zHAnOY~@rr%|2rZ96} zz<$Bt?Xzn~_sqW&~Uk@xlw$o^a$&f!1uZX|xfCZNqgV6f2t zhUpsf`^mFUT0ZUxs{0kXzf4+AAvC+Q+BiP2a$vBpeyOzW!I|Zd4`dES>s#I>3q)NV zzD2RSVT9`Vv*^&jKSYB+m7|Tj0PFz~}kRhwi9T%|Jc@hW*ZX%2pC`k{Bi!b3OMoK4!i23>X9n-!g+Q-!YsZ=)7P8L++Em1AUfe zU6fYN5S$ep930>UglPt)%+UJk=(IVXNw}pums(P{mqq`Rtygb;mHNLW>u;{$5d=p=OR#z#=>A-tluD)+ zfBeizQO1a?CyD3y%BH-G7pt{PBL{=giFJfX*Y+1*f9R^8qXYU0EOG1Ckk+aXeF6Oz z1d&6#-{20QTF<)%sQ?TB(XK(pW>--;Z@i%LYuxEgGlmwd707s0CC0{ z?n!YE$t_PER6GSw#o8($*kte->mYKhP(tY2F<8ssC)*5u2ar90@*)S9pHNm9j}#gd zTRLXQ!dY-1**NT7KKy{aStYrZF-QE z#l?imfvr0liD^9)9 z)JHu%`OD%lOn3n)&wTlU)}lL|3$`l+y+kj+gKmyFLm;8TCnMLW00?Kb>Ixo}<=As@ zTsf5|t%rsiKZ^%tSlXkBxhcu{12d0YN6YMEiyO-C)ec`rBE1B740B0TcU8*oAOhLW z7J8P4U`D_ZfR^?lYc^+YYHEE?Ph7x_uQ=ZTr2`u3@}oF_6LL$=pv_Bf^vHtsF|fW z;#7#%<|O~Qr}x0>MEr%rmEvn6dEUKy6df>s{9x=6l#(zY*E$)pf@hG$fTh+PAq*$; z#?6~B9j2iOL5RRil9ztr&tLeh*}{1m;pZs7GVyIlwlGw8 zRjIqX|G@0U89gS-+UaQ^9>)+SmoJ^&_?@Xkh#I44zaR~guR{?D?-GD`(6yq`@Ii?R zLf*)ZeTptzEqBUpNUTr{+*+db{mak#;@hH>M09CU{kPOP&+y(~!uA*MyYGn9&d7Zi z9MPPceN1}G$=JN4N#Sj5FJFe8B#-Iikz($D8Fqv(S6j7Ocu!H=o_Y6*6vn4*OU;j5 z6nBJP7OGBlbWx3>bnNbXAD#MP-Y1H+y7sc~1W#YvgYmh`*J~DRp2T0AUyc19J(c6K zd3rjka-|w1DVg^Ns~31~c>K)EGb5Fexm7uy5*_^w38JL`D~ca|U0uilySg9+s7zIcc@x+{f3I6AZ$5FjUs+dca= z;0Qr5`VQIndkX(A+HcRYW2S=X zwv=i3#`B-59eU2qR*(VDF)D4S=L?K8wd{dv!LG_zv_k#AHPzSttEtWeE>P=gRxjIfz%)^S<2Pg) zgcbtu5zV*xmT@<C#eDpLz9SV4_3ueUmV|aXv zbDy!=w<+WQp|f=^R7V>2yF z!W(hKi+m~mh_0rs&v4ltqlH?HnJxUr#3L$oab|3ikY{Y~VjL$^lR6uMWtiJj7A0JjDCUC*9&1s<<$C z6q+E6Oknc@+NOlGG$0VdCWV88@OZ7gW8sJH4=o}gxfp-Bef2jIL+tI_n>TN!EV>6K z4ax;*>VC$1_BoWplu0Yc<3;fHP)cDCLoiB)h}Kt|mwpt~A5KnKjI`EoloTWb4T6^o zRTULqpyPu#z4ZLk1q_C}9GNk<4*(N}Fn}>C0AJw2bT*@`;D2mur5bRSFuGO$Txn}% z2;8uu=i$muf7azYP|FM%II_7YPCg-op~a5dq4j1YS- zDd~Y+;JGvn7^vVMdt9bRO^BF*q4==(5fWcbAd}|iX~lkUNf_4BAudr45K(+GN|Rqw z3~m>{{O9QJT^~{-f8c$o?b(f@U;v;IYTEE{Rn+uKN?S3Mad?EYOW5IT;QO2q@FULm z_e-*~Ti~WAEi`SIbVq5jO!xtV<_@2+rY2i-M9F%y#3dyO6&w$%K@qORZjbjg-NO^7 zf&n$WPfSdP-t9ZK?Apl8;$BH``oEEQ_%;rANY1HbmOwTC=1oo;Pd_AwUt=SWDE^oyxB-S6$AtwsB0to_EO8-i(kUC}2S zW(AMvdnr{1j*Up>ttb1sUzKBfIB?oRpFdT4ChF|`qX(|xqKjs7bW1PSo{v6B;9FIU zzc794` z>P@;Rn)DsJ71zG<9@ryy>bL()EB$=hoMVR#V~_f7#T#?`rx$uymHN)~om^l1Su@R= zK5*K7m37~;ndEZpm-Eq)qvoqFmf=H2qUCd^{SGZ1TIR`ZDfrpcX(L=9mvi*u)Q|9; zBd?Cuu_Sk`KX+L;?o!)7K2@MC_AqL#B4o|}g23!QrBGYG5o^WInP11Q{ZfD2w#z6` zcX1&*@X&3&I#1)M69dbc!y|)(BlcYzzK<*4r@BT9j5(F0SL}q8T1M=s5fP zdU_l6qsBVtS^t+80xk~tQjEC2BIE~)nd48D!Y4Ig;T zo?#%Uo2OS^SyI?(2Wi=9&2BKY{f;SY+*5*IK6RsyKY+#@`2bnv@?@NTup*l>Bx`o4 zC-C33zki_ZAbPkpz*yB}@f#Uc-V2cM%$&%cn@Yf29XoxAd~)RMmy^AKdyq+;KmB7b z^77IDmJT*Pv|}gWTX;|WJhveqCF$67+(lhX38Y!-$c++#L`qhpQd~@o8O%zQbSOuL zSvPtq5+f0dT(upD&q$w0t2a?kqe_2%f8%{qw`J_`3QGr?46B*lOiX!6Obs7gVUu5M z*O`vTW8;^|g>qfnU%!@siy^S7u<-C{^6*AqKpI#?LJPda(Q#0E-^N$GvMIzbAYhG% z0T^;XYb^D`q8MgFm?cP9yd*&ECHK&OzL5EcXCU_C^l=>I8QesL%{7R*3T50J;YL=w*N9edj|#F<{wM~mf*w;Hn9`Dwl0G?BXu@w+?UAFH<_*Vl6#o6yV*f`q zDt@{A{@hnkDiAxbzKsvs4^cBLYJUU8Ty*MzD-SS7?{8xY#Ria$L9D7&ZsZT6Mo11I z>f)!Az?p^C0zcU)zRit|lc;oYVCyK+jh574^U6sZ91TjFCT-^EL%lrW&2TXRJ!8VP zN*Jv}-feH6!?xYSb|dXyiS+bnB>tk#`+NE+4kK71;9Pxrn_Ppk3&vcKU(T_5wW9?IpaO zg@nEXQbm_morR-=oN~GZWBLgXXGFdZow(DMxl%H8DE_VD=F9>mX!Q}%>r=R_ZblB-UZB*BwpnC4is0gTXnhP ziY=emSXyFh&;8g~csQr+I9C^7TA&B~AMz0cLqn`gF^#VSJOabgq9TFVKYfGa^YcX- zobE6VKBnjO>mRg4=kvuO0xc%GOJeG2R`(1xjIi4xk2%Ew?ZZw zz!%QR+PbG21=B)t6H6(`l$XnPdi(fv`g~!>eQ927fWQHl&OL|KkR}1}9Uay&G|cI= zGul7!_H86F;*vfV{b26pK=wW64ald=2#11U!9$U>fZy8h$aOASdQ#%al&iKL~ zNCbvkBWSdAb(P6?k#Ai*?9bzX;i-wqe&*tG!U4J#Y9P>?Q6O(;gT8e6U`G`ivb)_{c?T zr*flV->v0F7-vbAEc z`}uPP@gI3b&vK26V`$QKxPdAs-oKx(!(C-ksKJTi_93T=c+)>sHX4shzFPnOY)fj3 zbRS1t5VVPeco}1lfWLrP&`ECEw8^6wHfy>n$>?!_fDuhOx_v1LC7csv zI34aDz)n-I!S;M*QJkfgkUkpGGKt#}6q|gF{1MVvqaLwBkS2$9#t?(P*HZPFi$4_CO8;Rb-|p zCh^F~js+{EQDa*~7dezMpXZSi!SoEt~-705r0gg;7c#1bR5>ztM+D z_1cC=vjw-=2yDP*+nBvB!c?`^*OQDHoslPx%#+V>=2An@h0KE10H91*TQWV_wa>F^*RYxr?9;057v^vDd-0}>z|(d z-(Key6%o@KayG~-=_x||lXuFL5MELDHvL-T7Fb(KzN11jQD7e_; zyEr?4`tA)%fPeB1nUZfis90`+kc6a&>;=57whiBZRs(XR<0K1{sx*1#^ zuyf$SG(GP(KT-=;19myUFA;d$dOX zQdC%2mqU4V7GOXuk~vei0MrLdFOuGPx!%G22p*9&si_AfB;NP-K2e<~N^AM{^Glwa zI~oF->sD2ZAJyuGX^3-Eh#9ppJR^(hnah?tH}?JSx9y>k0^2BILEw2aiBRf=#aM?k@a3okqUmu1A&|%bqno)=UAxoem zhI6_aIU0-vxQHlgg0ENnxN;LbOv-O#M%w;2SVN?P`xJkh)B1(k7 z)fqDnD}aKi@jxBIbBBV^)7`z=?IWpw<8n@R3I!F_FwPf{gzapYKf{?1OxitcR@ksW z5LKi}Qxkg~rP(G%*Vn{{?_ezx!V64;)WqAZ07-!qg8&iXJPLg8ODH2uG;pQ?ZmMKT z;Tr(oIo|hU!(FkH4){NlwN}D1A>qTz9k6u;AQ-PR$Cjb1`U09&qyR#vlm`TVuH;fEbNX0 z;Y)<>10j)|jnDaLd0E-l1IHsySbo)<+KcCqnUZLJzSXC?UZ2a&$$8V>&MIv68Y*$n zHq&|>;1ISz2%|APNQ=J&6Ww03kJdQPu)sh|!F)^7>NU1wf; z1#@h2Hb_Qcl!#&(tr+UzRAc050D}mmz2EO)J>prJ?=SI{lM)h6pDZ()h29Ix4SoKZ zl7rp{CU}mHN(DU~9kYb1eBcszs>{A|Y!kRhi^{$kkzM6p#N&|~KNcqK(Fe>Ov@`;B zgJ%@P3y94`@6i4k!6v%tBeAXv>?Bl0xZ62`FJq0B2|xZbI_vQse|0uRMiR&*e&f=m zONc*fm`EWsJpa6~oehr)fh7Q$M3_Ligk~GS-jpZDb~ZE#mF|SmVIT?k`pjcNQR4LDNa_qU!#9V#59} zxDL$%-DExL#4rq7pu^Z|h)Lg5jAdw*0Uygk1ddAYN%6+O+uK`^GwjREeiC0*TpG9( zqn$E4LNtsWrvH_2hVE~;O8-w8^S^%Q|G(Tu#CDRXJDATuQzugGG7p5%7?D@>HK%fZ zQLx!kfMRB}(#;awLjPpqkt10oh=DhicC)gMo+C4H3*=NwmJ$;dF8Nm#bPTBTZaTUi z1+vzEEn+4kouXTU0O~K4_~2w6ppp4A5ev2;q%Iz@Cj3!1Z^Bj;zLv2l?0`T)^XVad zDVvV?tkf3Lak?d_O;{vc>qkb`pd|)bn$Ybc0!<(O5A&?iWsRJ>;>>NQ`Rwof@?%|SXvE^$sAl$9u1Ib4L4vRA zxreyIRL-3Fx#8B@Oci{ed&i|)wl?mu97=w?7jNECN9JcSFhNm&X z&RX6DO55aQkL3~jd)p(=Wv3XdzA4xtSw8NYsguV26W+cRt)iPU0wObYvvb!@A2fKBcF1?v&$1Y3Ry;Zwfe@L>Dl{|EE`2c ztV{VM4+jAmUp zT%5U;+r_%jwU3fC{<%fzdl~u`$LNL^&n-WB-zpOmnh)wGo>|h+tGbo%V&B3IdNBy9 zW@2dxW}aBZmDyES;0)@~KRvd3UVz{M;zxdC8a693vuXd~nUAU4SjqS_?7DHqb9DFi z_L=M%l{5A}V$9uX*F&r9@l!@wlR~ASBR5Is;fa($?b~(Vmrs+jsgVXXSPZ(JIr(0a zZ?8Iq8_O%3@fUJM3oxog(1z)2H`0G|c2!asU@!TIFe$hlw#U&nMkw zWo2vn2co_iII)DEA#Z>CQl|d-g|;bs4--{Qhk{?1bl*^?PgtD}C`dO<)6FYqoLw($ z*ABMhRLRhb%hR1KSvCt-CW;lo?eFe1{49HHEaK}By`bTkV*enSAT`O?i9M!({WvPp zXCNe^oy4}Z-HOOLD0)DZLTA67@VzAf6}Sk(pJqFoMe%Pmrf7wcP=}rTw+il~OPd4lyL!^6#q3Jm_PdN?eS)t6Uzh9FP71SSf`Qm9)71_v=)-AhmZY_Usq zF*sptYz!!JAn;2VYN8%NQ!WnjifloSbU~D$fND*j|8cV=svhMsTF>n2ukTm8Cw^Qb zEjLNi>*1Q@E(oQ;1G-D4Q14AoE`{N0a$-qrRRuX)u;4w)7uF>vyiQpXR0&_dirt-m zBVK-9Nbns2u3w>1WY7s;p&=uwGFd|=AFjHXU0prU(!wJna~Vo9J|iI6e6eq!Lb=Ot zs?wf59pLB({Occvi{CF%)}#A+tmXDvjpXt+=frgzZ`a50?EuMXB?TR>Nr!^aFJD`l z4!}8Z=Xhe|lZ*Z@949>bc5N1EA-h8m+UtYjHWCLNkyox?HK>$xB0cqCBvCP=IiFkg z?u4yg{79_cskjo~Op`)Q&&NB06e`u8lvcI58n3BUXrA?p_*6nVWEO$98jI<+|1hH2 zNyxjI{GNj4fNgu{>&J^Fu5OO1uWP^OqL-(|6XvWpVXG5QFMg`AWZBnx!A#^+xspQ8 z!VC{y?Q~WJXK$5{efyntH{e&{o~`E;nNBjLRBel&IUlxT0Uz@1(exwNb@Vj1n0JOq zd&F`j?HQMmcKKag2qI7#&FwF%smCdWy?A3S+B{O{15N_cPCj)cji;?`PkLlyM0IyMwNRrQ_y_M8CJldZN{>y}3D2*g7mW z_8po+^eLWhIXXmxfC(;2B&>AK@%nN34%K_fIa%d`9xK(x3hwZWGdxVRVb_+MTN8z= zjLvB4knyMqGX!f;9*wnc6G@twl5M;2Tad?(n&;Tv1b_usj&&AD&hcFa6I3flR7gm( z4#5*_waegbf`iQ(@Y@_plD^n&Q@fAGPC9-bcU-F2c3H=$&D7rKoWCc60=whu+joa^ zIdku}kpFSK>Z8aRr>EUN^i6wh_Q4OAJ&Fv$kMF6m#9F?v@i7>Xn-=qpTsgH@HD8FB z_xQJij=;tB2D9U)rfknxu;ih42Q<@uh_r)m|KXOtw>^Dtb>$3-ZfEP$jy~dU&gJKE z%q`FqJ)LANT&^PiE|guVpeMIH?66qQ-MyK*^cPem+pK7KB6A9ECM4(dTz085oXhxa zL+|gVj?WWJ1!uRfyPGpkr9iL6>dEE9k$ND-7KjN6O?M8F_}-&y;&^FoccVpPHPtAu z%2YSe(3Cwl?{zM>fJ1(vcC#*HL}6l|)6IZRM%K{51nZ4RFLc-|0he_pZC52F;(UK&_J3)Ss=JmTz2-8y4oNv3{|= z`jolp&;D^a{>&+&@m#|?bgk5EdH&6~?PPd&6Nft3OI2tM^m_#jU-Hke$&;5?oo=YVQ z4&{vD)?!?ff8Nk-^U7^u)S>8lOHCUqsP=yU5fL779%(thKCZVxCkLzUmHJRiTp&Kg z?&n0VaBws_anLfs3cCQ{96uf%Z{IqDX;D=B%cw$l+m!x2%Fu1sZG~=p7JJSAa=QKW z0(U`{!lAuADv_hAE2j7LM|85g9i|TRrCB*xJNxT8`}a)l?)*HxcRL$yuEFNF@0yRj zwtp-)3j4FS*BU-rc>3Ve?ZVv-ZH{m8$8&Fnh14;pJeKB6cU~6(MgY65AKsO&~6n&yWa11eNv>(QStpPRX$Z}IvQq%1$KJ&qRXqgMiGymDC!u;Xxvdg$q@Y9x;WoE zi-rD4QPj@lxx+72?%;#uXPV1pk@MgfiRI)h?WAM78pO*l4+|`M)@k@^=V+?Nl`J;j z=CPI(=LvJ4$xZB0SzV(|qh<8W%umzmef&3`L6JOGK|<~5=*t)VT>~v$Yg};5-^r5Y zG#4aCRIpRVC-%+(qCupZ#{>oRUNmc1X}^b=xfXOre4R_rMC$8bSr>21(i0i>x2azL zBeS*Ygsz5utfuMZ>@1mVJ*@%G0z2nBd2x(iiW)iT7(GvGU%)G@a_oIAQ!otzi=)Nd zd539xd+Fw0>#%n;tkSmp;XSq+eA3$X$``A+MFeUWZ!4yyich*rvP}?E%x?|UAf}cP zqu;YMV=*=_#>p>iQ#5`q5*RgF$iI3_nkyr>%&1c%FPcZpVKwPokI{vY{4~X0S7cg` z=gf+`5myp;FY>pywfY7pmqtZ=}buK!I^#LRbm zBsC96Ygonk;lp!>8B&SEv0OCd9y~Q$DKDKEXQHRJ(;cHT$`>v1QZJZLQ#i~P++-n{G^pmrBK+>cge%jtj6VnRPeRf2 zEm|<$73R6#aLYN7VQ*T4J`11;EPOJmHpx?;7rN(s_X%F@l>1c4?RDO09}4kuXnsq# z=2rJ4-7ur;%bC0ScA|fN>f$vzx|&R*NFMa&fYaf3!HxTz`N-Y&Nj3URG5Coe-Lv!* zfmFs0ClZ(sh(iOQTF&$fLx-miiCRDN*azF^LJ6OwE+@`>6U)f%+!b;n+0f9JU2fLP zO+oW`xmHH!nF6-pE-j^;PEN7J?ywhlopo7#ah~RAP@{RtSG87CzWk;gPi=f|F$&*s zY5ug>a~dI)aw|R(QUR<52i|r8V93TwP7Kzt_y8&o0@?u48ccFuzOy7#d@}UX9i0a| zI)g@Z?7I6}E*6C^%$+{e7|~^vnXizk^GG*wefXD(rjoL~j!{Iurr{9hncbfQ16K6$ z&7VIW|2D)snIa1EiUC4^3>&zKJUYKol+#PA1QOYKX+87ex#w_q9aHhbAeR3Q~wv?I>?Rsd?LI6U`m9UHN1CV zN1dUU2ZEw71SJ6Dsj$juuzz!XL(B8-FWcKY&w5ApDt8-Q2+|6u(z#=BNv`5qZdq}m z;<=*EH6QbHs*h|Uat*U`IjDl8TfcO@UlGSQptsk)u_#`QkKayB-QLntgP|_<5pt4MvLHbxpHFFp4T`4M zr<<5`etczgQ&C=XIka?f=1@6Bl~zVp5~rdLW4TVoUGBsZ$Cq_?Z(Pbw&YlnSEPu@W zaqQTz^?O%Gvb{TuRr!kq-r_MDGdEZIa1Qqt_z5<#Mg`9T*Vmvu@#V}T`#k7U2UCFM z(gjFW;l6-k79}mwUf?&bst> z#l&9gQWTsETwd$@yWEUd?2^lFMZy6E&veq`{uc_7?NqGdjXn&c-Q&+60MnH>n||mD zIonGt7AqXQh0+?U^@a=3rUT_|Xvnx;I%5qm#28z8ii(Q((>K^DuzHn%h!w~fX#Eu< zzK(#ep(|fhM^{qdu z%=e@)9laXKDf(rxdHmLwuCGSur5~#4nPo(CbqC4{si@jpJ7nf-o+hu-H}=Ur5KHsI zN)YA6z9h#65x-Xd-#o|IB0m`r zw92#nrA;`2*e4(t0KE>pl+Zf7O9FO>u@fOhBv5N?YfbJY^SMM4MamK``@GZfT84hO zH%?X>=ZcE`+uG_dU**!oE{F9?3p|+}kLYllv9;7_|7=Tk{Iox%=To=rhs@no)!Ee9`B&Io-pm-=moVbXY85)0) zpDdq=6|&HTCM6NR*OPjy{St6bZphz;m*a`+U)a&spHO}-a3~%4i=QW5M6*r#iHvkk zbk(O~RoqI`r)-pC3gu#-i+*-BZ7cAfHGTEI=1+gk@_VV$1aA{sJZ#;!prZu)>T9hG z%R>Zm_=XWPeGuiAvE}z)=o)KTY3ae>8-(z4bW#tfZ*5qcmAc1b%G6W=tCJSbxVuQ? zuF zp;3O$za{vTb&vLiTT^95l-uhn*TsL16YcHgPU;z`%k>G}pKv4qNfmO=ono__O-($5RL8SZ7b42tDKe zA)WrAJA8EO_mb4f+6?wTOPHi_d~WAw8z|k!*1OquL1DiR`txt8l=+hfV2YjqZODk< zJj7M6xc6X?_YWL*pcV^v29xzx&toeS0H2=+QLmWF9U__HzIe2~{cU6W+sTA9%jr|7 z5niqupB{fYR2_CAPD|CEt9DH|BKL7x*5kBK7H&`YUt3o zySrSa%jCg9x(Ahk;WQR6EF=@D+$+iuj9I>lz8rqh`>MP3PNRRv5nPX``&Y~NPdw7h zAVx#0ukkpLQ88kXx+7PL%Hw$UV6*-Ei)z zo5waIqWuAG`*huQ{@IpviC<(_+RuH649XbRjLc9yDgV6CfIU?yqBwPGPm5}|k{R#K z6R`%nYdAjH|FvJtDQ@Hlz4xB(D+Rq1ZOq7^W@JF^*ZHW3_&t+jW1M88?Pg%G4Yj_V z8;P&&H`Le8%i&*r$|vzlZO28%pzc|hK%xs$taXz*MTL$Vigu)pq}W(XD=S(iCe`eB zy5|0Vy_-pV1-A$v>gZzY==yMQRFj-7K}%DBt|{K;$dB-rt)AA`2L3KPJTthGQ8(Z6 zkv#sj_3o;CmG0ls@v0ma@8Tv#{4T3>}jF@_!)OD2CuFMiiQ z6gJuQKD{hEF2ug)>G-QeZp{n>Hr>Ewx|7~PYJc<7zZojCDSFW;l3JAr#m%)^MHDZP z7cc!Pd3@FQ2l|mD^D``Ez89E7*@6juEHQ&Ff0Pi~C4UhTGiNI+LP}GJ)k>+QnSI|_ z?tY%^(R=pj!(@-Ho@(%y(w4fLC$dVWPptCIZsBDn9h6upos}veymzIHphv){0=^`D zCZuP)h5+whrC~qebTIhR{7AuI^Cl8`fzS5NAMMwcdG0Gywey6t#D5;;Yilkjy3IM^ zK>dvUrlKDEBfeeT=UUGykZYc@lD1rM`!$ln%Orj9;@OLi7rj5DKwC6P+`I%T+#><( z;CULvgQur9Ig?GK`?+-Yb7Qf&XaR?(`iaci_YzwLa%jaUCWRLr@}HXIye=w|y63B3 zF1hU>(>{hpA?vfA)AP;_vkuOUSxgNi0H&w*m3d2Gz9s+Javx*eX!hue|MISItq_^g zLiUVMugtN@;oE9yCb=nwkvv~G(rxLK+quJT&{8(oS%tB-INx2lBF}Tub`O2%fk@zJ zwGU0l0uWTdm4y8oS6p+2O(ll4{0<-+jm=WNzVnR!i9erH_RsC|=u1)P|T@x;-isWl1xx4gdXx0;=>Hphmanp{bo zeWSDC^{b$z=Ee)5rkvW@YJaQq3Co$?7iV%=zW-QY%Dw&P5c(Y%%^)?lbwB1vj!H## z`fomx)IO3RH=A?ZvO1L|k@#3~w)XmaDKaCejyHmPpnUU?Hrh()$xf1=xhH52js&LK z!|Sp+xR0c(?vB~krr_zp+YTe`sanI4<=iAfW7(aPA)-~o<%&id)_coF%>Z!_O$H0RF zBYtD!XGUZm4WcK%d4piNn<$m%qF&}0d3#Qt3TJx+mztl}x%ZrVxMi^(#gitUnP8Bd@vcqDmY^14Hg!&{t=w1 z;Z31}`r4}c+AaNaK{xM6<#0;Pyb1ZcSDC6Ocj@Ukc~N{A>)p??yzR%i$QuM1Z+?AQ zMqcK&c=WSx!o}qh3Cb{q_@lV{d^huL_JS`u%6dFPf01UlkW#`I#O2e?$jDyPVSJK@ zKo)5z>+)9rsXf6ox49?ccbqe64g7xF%!J)ZgOg5)Wq(s6g^%wAWnVxvQHbY?9${Cb zw%Ky@wWZjPT(_ULH{X(mv6G!@c)je=qrZT3DwBWz^A`p7&OCynCVO5!>*;*gynlQE z7!)5GsmcvKCo955Txc`nx6eFO$^1f9B%#P&wRl?jd2HC`o=>#5oKB^c?0a?Y(|)ZfS7sOE6{7->cu>T_vxc8kY+3?ofQPgB3IukR@-7s5$;{B^XVr?5HM zpr6|8yV0KRTplAtn&CV6Ak@z9oi3@VQOXdU0eO+EO?>rTZ1r8{cLztgwo(1PmH5M5 zbG60c+-8BOBe{E<9}1TbtI5 zI=#wjIIahC+}!f3ewLO8AGEUyS!`9B#n~FDk9p&6Rg%b|Li?lZRbsr@WP;~Y+eu}~ zpK+5fq;eD$UjC4H(MXYCz>!C5-56)RE9pG>LCbqw!ySDu0!qTo0-w}|b0uw++q{Lu z_i8Wc93^Q`n+=}1*H*&Zbmu~F2Ia{R#7c8AOLHc-cI(F(@X-(Psz-6DsTOj72xrPv zsrn*YL|iEM{h!0PR;YHl#6LV3EEp_!OX(?lbCUI*568~mucemxOF=T_W5iS$d+H%^ zcY$37&-g+3J#YUsg*&f9h!rH8u06nt6(M= zy}N0fu)UF(KtW=~%pXtEIQo~7P_-n;-hoQ01wZ}g* zH^l*hX4?osVB%T6qPmBE@du z+q?u4ucwJD-j5_%L`=|RO4cH6iTqHcZSP&V98S(hMe6z7YK7dpnyn?Ys3+;fEEFOP zqF)I7kzLrXcq3TQatmXU%~#tuRAugq)^7fWPeg^)+L2R)kqW-tQL#2RX9H?fa;?G= zA22m{is2KH)SWX@eqS?lFB1KH&d??&^_im1DCaJoo?h-<=LG2bR2!?PbZswUTPV3X z?QWY>S;Cw;JF1JXSGAQLTWxyp<6(0gK|i7QsvqD6jOYP@!{kx%b!#gxC#N0o{eXY~ zW)0#irDouuR!S;aS0Xv+<0Sa+s4jfrdGL+GGR*8*KL3R~Di2kD?tM1yerP21j_7NS z$@Eucj3fO+N|X(hf+ct$TI-`Nofz$=MQ?rVdT@bk>C+|>sch4+XXu16+PeCCu(>%* zF%3JL&sDm905lH1n^CcaGFdl5JjoO#2PJB|%Z8Vp@z!iR)^I~9#^kQHevDq5YCiWf zv%A`CxqEpggfkVz%4H%y?lw8c=Jbf>s8z4t5ZBw~HAzKsk#r5deTV0=?)tapZ|0rd z!pHnRRLxtHmLyfSjC=?*&lS*^G&t`7nGb}Hlb3!68~m_K!E(ev8h=O)k7W@5nDumt zQ&GB4>(R@IKdu$>KRwb+67||v4vDZ8@+JxFJ{1>MXPf?)i9cF(+Xm42h;w#cl(O?Q zZDCARc1VOkIFr?2Q`0rot4rx99?qb603=VI25hb1T&+=@9Tq@9qz~sQNYPe-%+nC` z;VCKx+8}wJ>+(Egx?v_c6WeEQ=c&vsxI1qI?a3mlQU&sp79ld(&xEs|{m=?%j?5-M zenX92Tj;iA#D}I&{7bAKTb|5+y^AXDI{iZB#i}7#EMUF7RPu;boDMfg zb=P8{k9M{?w$GyEfQTS;gNK?kvWL5(|wWF-o}<|L2L}? zx5yWDjLXaoY@4H{$iy0h0!Q%JF_ME7`k+>ux{i?%{4A=Wq5@&&BDTt%O@V_q)(AO6 zJxb12W%3N551I+UJuspH;h`@YU$jzE z3pEeNeWWnCnRv5ZXp3vRIrAEtMLBzPlcDeQZ!onwy zAKR7WLRt?pFbBuOh@P)X*kcK03Bn1JgzEuCI}q>);P~cEY{mt*6?!|Ydo_LP{T*Tm z8bxec0jo;V?-%c`-U?ejk|d3_{r~0ypn{=EkImuFa@wo?tNYlh?oEjo^LMOfPd`mC zGUe7fWmcCNs!I36-TZjfW440CrIT+(f@!ulaB|${YP-=O_<=FujOXlLr_aiVVx((? zNUpriM3n@Q5Bf-6)9OW(G|#(J!kgSKzqx$~-Us0RF>4%WR#j7jlRSI+rC^$yN|Rp9L8NQyLi8VMMi=G^jJBJb6?LPr&l#)~(K0Kms4&B6!}W$)P&}{U`1JJ6kKQLvyaH|E z-u?SB5bnW_-^2C7pelbnv=F#Aa*yl|3c)H}#WXCbVHCBaIX$~?Jz#YiVsH%TvHh^Z z=R)jR45LUAkF&+_PQ9Y{`1p)>aG!&A|LeGBRJq}QPWOB2W=4qhSfm-S>DT_HYl=}~ zizjEw3yzR1k8`t@lzw1($o$^EwvKDgtzyDV79WSAF4(#_IompI((Wfo%@*A@hZ&Eb zqc3)-63TZD;tN;_SN%Nrsb*`4Vm}5Lu`dC89SoL{k-^OkGIUa#o|$T^@0*X`;Q>GR zR8Q75UL_Mgz_Lk-oqvN=aI_>S-zjd2hRqxeo5@mnzKdU*v&+#tyENq(XFFeE(JmhA zGtRC0;lVu?=9exsCf`skUVr`i3Wbc4_84S!Q*U!m z#Qy=G6!j;u>5S@2TLhzQgxUN<{WGE0ve0-x~NK(%(*_$PwJh zNo%{OMWI(cCvUni#goc+SFlm)Q{K$!7fwr|8`u z*sH#7RC!wL+Zt&09c0p^x_`^?pwo&{CH1BAi7`S%wAnk8`Qa1dIdrer%nhupCcu3J zpO{E^lx*Ga-#w#y^*TpMJlIh)K!i`fFx8l(O=xE^9))Gw6fV%=(6l|bA|kkG!FY`D{e<-r z!O_sO`}O7?f#sd}>9kI}YJ1|CI;2Rgnl+f>(~k1z-f`EIJ=^i^q0G9EwWhDB;pyYa z^8C*`<*19^xNbA!9Sh%<^Y^x#dkWirG@fBG*WXJB%r8Zgt1^l~ zD7{f;`0odu;0}y!3(e|SU{wcIr+6R4Vc04;CQ+C;FDxn95Ai1S_sNbZpsvidBCzB2 zV0A!s3)92#mTdz3li9RKdv&(YFj;j4o@lisZ*t?}uQN{ctPtICoFPt4I139gTFC|^ zsEa**Szo-saF~PgQ^r<#TO{Ps`&rCQ-(d$|?FqeBAT2Ww`-NOtvg>&Pe>Pv$ejw!g79^=KQq$!!hzY%{u`(2M)!V z=?xhij8>tYdN4ZT7W|MxIpJ}kn!!?j?zf88CM7mf+LB$3^{-yH_T3j5$(d&;Bva#~ zmUlcawQq<3@TaFwXLKTmS1;U{g&oxeNW}{B==n_~?7KLEqYZcw1?;r6ZfkI2Ut8p@ zTNCc608 z|JueQl&_&X#eSE?Je|dSqiCv^Ucsg*ez%XEVSO!w{Utm+Ej>ND zN4EW9`KO}kcQ}-OjWj^GZo)ml!RCTth8nltYg!fSi+_YD&2K0jieb=66l<_5D=@Bf zOwCI;q;`6{?-TkhUdd<3;1Pjs24Hu3=pMjtE#XZt85>k!5&ENeJ@~bLB%P)?bMKGaF7%ne zL4B@C^_cnuA3UUJQKiNkubUb=6xy8BI1NMzc#p79s$_z}a&cwG*C83Tf;0A%mfTb< zSGO&zhu8Z+O9RZXL4)JBjagg^{#TUhGijW_|jj-4i{!Lmwwc1X>DdM325! z77kIodT`O)pi*>na{AX-A`w-7b?WV5UfVsr(!a2S$UQS1Lj!9*f%4o5XM39f zX0rqFZ`>LknLa*J;D4rf?^1!_Yzx_E%GXxMDSAT9Ked<4x=g;0vYu}kub)Ym7tdB6 zTLX^!ua=8sAXJgPW&<1B!@N?seg8=gB&O4GjW_?M4-GV1C|tmER937f)^#FR;}l(( z)OLZ4&?UcLsHQ^ADY<*vUJeWo4K!W*=xj6F^h6}YkW0a9bCe~@Z{SzL*+M4}t#&E_>o5m%dJYX9~InKmc9?k-0i*4pZR`osn0 z6Q2FUUHPBm8fDCDO>&Oalg9JV^;jsB8+1F2OSibPco=>KugKFIJ6GG>(!jp+0$P+s?urH*i&q2$LioTn8!mu3TWP zfAV1R*o{*;aW~v&cqkZiwH=#T?tGf)T?i4rsW^KfPG$ez*Dtwr8}X`%b+KST)%t|; zA}hV^(;%-anYEI`?{83)S_bXxR2+lLOC11defF3G!FP&ATz`T64hq38HW&*x+mt?l ztrC(P{ocK>u6dS`N2rTPI^0_ALO0!w#7kQvkoLEiMck?AR)GMmm>;M0s=XspRc^qjK>}eH) zSI!dM?OUozPj)HXsmX3Od}^{=WG9_)>(vf<>NOI#Q(s6(Nv%SE6JC+Te65fj{MvVg z!9)56>2^@iv5Q|B0_2p{)V5Mnmt%|$_>w;jbN8+yf-guh%408^?EdvwU;dnWN}H0O z$n6%h&1X~F@7>=tv48UoTS;ZVz4P*J>uQy?=bJyOOW~$zL9@NRlcXzT0 zzgN(t9QN%%c8Iys^Y-?HIw=(WGy^az1CiaN)-kZbIKq@D*)S9aaHA2Ne8_c^k4TD# zwE}5JQp+}|xKp#zTPVK{9B=vF^0jUJiKe&S3A13$j2F#PYwfp}TswUJ^Bm)z*;$&j z8WYN}BTwkUg<8#CP#WyCN=-RZJ@aq}J|9EhGL$(+W!e%mnD(Je3=Ji1frZ21qZ7ae zFto;eTAsM*t5ZSB?2A?(@heNi$!~6*n}{+$VnD0yI{~}P)Eb*|7w&T%&3%)b*R}ke zUxreaVUvmG<_`^Rwt}6#A79nPymk{TzN%AxjRT(vy<>2-;mS20AoocUqkZ#oz!wl? zP_faZvyne9{w1mA2!G6wBtv=h9Sz@ucKegEOK0nr4##V4A8FGPdQ)A^y?bD@-@(uC zwtJu0Tq&nTxyhtPu-oa*gq)%=5J`#f^@&#wryX$GNx2TgRi|hJUU*@&Y&<~p4){s% zR5sE98w`7xlhte=eeW%K_g$uwGLJMVnRs_TSIRqZGVf4h-j~i3a!)!rlI4_fd{^tt z8XLa#@g=?DlT$Ev5%YRp?)b1Md2MTbRgeFPl&UND%5SE==tb@W<1vZ-@Oe6XID~91 znw}PU@7}G~gCfZU6AbyHwl>wsmAp;WfJ-7lUO3-#lNzg2_jkCl>}8qMU zbu%>mfu~7mk%N*%h&VrZzyKXG$bMo5ou=GlyB1XE&SOG|G7&>)m9i5+XR)2L)Ri9j zcBrn+iEGNrrwQhNJD2&Yr!@1Y$FI3}?D3D!vWB_T z&ENd`wqh^z3RvvE{E=+;?Oy5&FC1U+8c6rqb34Svg)NQlHO{oX{O0E7$1p(z zvN?M}da?cL`|qBd=@huN zy~R%6rt@|SA64?XeS0%~laJVO)uu>&?5lcJ6JT?kElfOT$3q8PKZ8HP34E^-pKG)AQVhL}1H3<=D4hlAVMqzNig~>WWm6fET7i)KPUK&-v`<{eG*GR- z)FL166ge`l`0V@xgsS#+^#v-09o%u?XA7!;z5{PFX@zcY=V!m8gMS&UA)l;i*rMxF z#5X^Efs~|?PQQU_t4r1|Z5psI0R+Y1nhiHexxXNxf&6d6Q-vlm@abdB~btZoD5sv%T@hS{PVor`rP{T13u^- zbgD3x+(G-esVMq`t0y>rub7j=OE=_y0-oIcJ^?q`n;(-L?_JUoMQVth_-fO-z)Ta z)ede^dnECKq_8*q)EL!a0j51$?vy<%DvK&w+q$N2N4xj$gPMZk6rbM*#;EdX(}jva zDFPA^R(g$+Fya1A$y^?#tN|@j;xS@kb-?t7Ay4|@oo)TRZ-1tds05Oa^r-L{)n^}T z$XfFW44$_~Yu{B=1}CFxjA{dr*tZ z`CE6;JyhMd)^9RBE+b#T9dr zmqZC6X-v>G4mj!e!Kn+meq&TC!`^38_|;EE{+|1Tg^s%Y%+^19Rzj+*c-G>SUYy*c zD-zK=-&p|4d8_5qs2sp%ZhEe(_TI{!@-JKlA_isxbeV~4H@cS^#^PC>I(Gq_!^rYK zz_}7Yng|Kb&w6rm$#ZwoX|G6@R`w=V_D632=q^h_Z}*k|cw2PKx9Hg)7SE!Jjy_z@HO)%Z8G6-_0QV%F~LIWMT_(H&fpIQ8@r@G^bac)jaDyG6>i z?rWu)w8y=YRUWA)7_lg77`~+ISdEJLZgrj=*I316zByPf|KX9ZQS@fd;*%#O3{`X` z>{*!bp|XkU<`x!&`HCES2JMgeiAd;{goh7JOnkiIMn3tP_VSd@xOTo6%j4AE%+z9g zIi!xb!nyG*#z&@MGX6uN>{IWCN@RG$S@p^_@Abc^)QP^qVvNPUKg@c2CR>!Pg|&Tc z;ko8rXGdZ)a{u7aEZ=Rv*0=0DR28_lZx)WpiN}mRN#+=fnuOC~US3r-HJwHd5@k?O zkXTwg`NEgq+&@JPn z4XoylAj;go(JFw}%*Kku$6t-V9u&4FdpCaFUk=vT&+YtNQ{8jF)Vz(nx4wh?jmKxX zA_qlttA9ns(JE3ZioV)5Bc{7v=JmDit&Z_mj(EyO3O<_0LZ;rFQsfDfrsxOH7k>IJ zkCK-zMU&+|ssaIF<1Y7V89^BdqwLU#P2>A7j5Q1fZCN^?^-5vA2Dg*porC6^Zz%CH zSxj^oM)hu7H&tq4Y3Wb0wYS$)+ed$hho_AWQ`(RO_f(Tec}snXeA^xxxbnAu@Vh~p zE&RkY!_;e@qsx{YK7X$GQb}hP6P6mF^3+rX35Y+?vzdbgEFXbh7+!~31Jjw0f6A4G zI5{-cOS%o^Ar=8gKj^|4=^P;cOp`3JO^`q2WMu^g2JWSYLjq74Jpmy@#Mjl8nw)7!yVnd~5XyetcJt!Zi;$1{ zt$|Iu`38oTppg{4DCt`}B(!UHe=}wd^MKJqE(p`b{c%gr9_nBQ5}21Fr5A{KIY+lq zSjRvRp|8xn5hg6i+egPsudcgL4-Gdk5QbNHU1hoYf|>j~3JG#cf%v@##TxREGZGAndT#Q5 zBYsf)cZ8uB{e$wiVhcsn+P_1@!;P%JC1Efj^!NGxKfd>0A?ts9L-o?qHPVrMRIMg9 zqIV`93qGWKcXH3@W5E``y<$8$I!hxzw=mx?o++;o&LA#T7W3 zNXEv-ImNbK0}^Vgsxc@DljBOF>l(SX`9n-DS-QEhkdXYj#G8NcoF2< z~{0@hWV%48;@paNxwVHBe%bEOofC_Mri^m_d(*BH_FD?^%TH0U|@eVi=RtEtQa$ z@m##e&cm$+Y+B-U6%*h6S16e7}o1MHy%-*MXdnXgClEk7lGGh5mUSZJu z^M%WRqM^G7saANRC}I{KDpl{ZH(iMvJN%%Qd9*%gmjIk367*`YT~W!y_kkZXPpp=q?6(r9abEG_aWDjpZbSN-`uLSY`kXyHO zi|wN_GRAtIxs(;(mA;P4M7Kz@h>JD=lC3T?gRcQh<1V^4-GmoIN>x?f+Z1>@=xQp8 zd;h#-Z~v7L(T7Zd)*v=4g1qFk^ztgFk%BJb_+!B)zr@4= z+$53={zBa9Sohg7v4KA!WFqms*Qc7|F<}HW9suPHbg)#Fls?TZG0!CGPilM@oA`w- z$9$`5pg#7f09tTTQVB{h6o?aXjE5Xm_*)2I{D}lwE2=hVS2E)T5h&zgsbZiL8bW6> zG{JAjC_D`5Mcy8@tY;Y9JPNN6TH2OJB)=w*EHI2&VwelDHnjRMEwO#`>zH{>hy7tn zkDoqs9`e@~lB1Nst-^gr5e%Y_D3p2bFpI+@)2-0B>;`g_J1i32_4R}$A9^9s9c&nG zYZ{Y=M9$=(lPKgByj^xc0tWY|uC9&{Qt(rU+-`sz_n|{tjZl&vff4qVK)caMVjVnw z4ABsDCyAoo12^%~CDF(_i-Hf>j}T?d)Qa}hn~UmNHp@_CFO}tvVE5Yq!!%sgd=ai! zvz*poA1(Cz;Y<<;)oC#?sC+$6N|KRqabbRxWv4T-RjR?^@?1->Nf$oe}$ znHJB<%{`7uPPEgcynmK}u^CQ_TG*=o!NBAs(Wu0VLx=dve7g<=Xut$)Xuq$V!XkbV zd`nGk5&JH6EiEusA8Jt#;SS2mIyg8Gb}?V(pqK>b_o5FDy}cO6qX(M_I&o-o!>TwuU+;91tPS?%2SGhwP_rYs)Re#m+v6UJ;RQJ~7{vzW^CGc)CCr zhuYud&_bXzD`8NTmp6^oa%(%3-k_|C^@eT~F&{@%Q`y<@#uB#8h!jc)$dS75u`fXG z!uLNRWS)><92m8_GVOx|lW;Lb*?Uq|Rr11T`1IDG#Sl=$D3%^jY*bb#_EXSvA_eiW z7pXQLK91zhJY)Djo`U=<3V@I5noorg#vyE`3I!r4(xBPT48GI0KMY{_ps!yMWjq(v zKB(8TY}6f7`TiTtx~Yd|xC}Nw7?0y*wY%PbLJ|Cn> z@D0!GD4{Eml2=ej(*9OQoZz4d!=hMg0Gc2ec>OULc=%QWd=79_9S)F_Np2R5OdU{Z z8`mA*1%1@C(nvicBQFSDAw*#xqAg|H_ZaG>VqzY+y%67J(}23g9y+=+Mn;*8qgTmUd22*skKE*vsS5cHOl!#y$u)dAQ?OL6*nJ3M;q9~>z|XqjRI zOD8u2v4mIn?p?cTa~x?aZuPaS zZtTf)89&9~f0_%DuVQ|rUn?9I8YZ z%joGv*>(zb9(SZ~FSd)+zG=(^br?D}j^_K!;ul%{pnBC>+)|nY5p^g+a&mA$iggi^ zk7)cDJv|=>yGJ~XkNeU_1hRyzoG09E@>axJ6-g!6@5rmDO$-j+4GZ%f`i>>gUVDem z>}#J`FwC7!uO7cI*Rre&jj#sn1?&&VZTLgtAErQ()@lV%ufyl9Tfg25AJ#Bzlk1O( zZ36#Qo3B4bKd*0T$sxnV$|^14{Nm~#f8RN|UI-XASa5N3(^9UhZZxD!ymgBbnGXC8 z{F=~iM6dDIs*FMFVME@hZ~3%6Jw4~QxBt;^YQy-xx?|5ti5kMHX&?`t6cENqPvU(DaCWi;OfAc@FbQA1U?{<`D1A*N3S3i>D6oMiScm`wRq?} zGBPutN)+_GBd8-g$X39yn9^o-uE|}_AX-q}i!yw3exQE;n6vqVFVb)F+`e*Yey?2v12|EP1$g*h85AizwcA|{pB;y!7h%2 zMFHWF2qjNQlAtYs)s0i`=4;c$5%JMe4rLc@JiptVuZ!t>9!V@rZW{y5#eF17Y;#1U zOY@h*5gZY#;gKThJP2dY_Ox~FS1h-!7T8Dz8dRS2oa~H}y!L|G^&!sGe(xU+pI-Q4 z5=thKRK>>~AqJ#e%O6G{$iUDn82_0JAt20b5%OSXc0bgSnzv2Wz5ClOR}hC%`YfkRF-_oH{SVf2VV{(P;*bhEie-1Fzh218)Yh5y&l z>BJ2}mY|-Yi6;CP@QiUFJYG(woNOK}I+<0aCTq}_`PT0N8Xb_}Nxxex@1}-a9u^RX z4jw$SwP#bcluWhMIqv929+2JW8%-O^3WYp=689yM|6ww`{`_vT>5OklHnuP&!c-FyE$yw0rIR}wQfwsY419RzX`!7hAaI5~ zj1-7Hn>q6iqK&)x&b`2syd_T?Ac&a#C~yy_mj3gLHg4Z-tOIX*upzNU+JBRq2qKh; zNJ*b2;;;v$3~&~{gmzwz96NT;mM9enS86q8FV)*N=w+Kk7Hfp8;zV*HDg<jdEcjZf@JI$0krKg>$D^;OkPTydf1JdbDEH+6TAM^-sa~2jZCH z(^iPG@UdLQan8-njj>a>kYl-sK&1)09O*SJo!#>T!onAE(~!xT^Dlutlj(|jp5yv@ zA=WS@Vy+J^RS=nj#@+ibbA(_Icv$lfUPeRe9ibHZji)hShC|Vu{{u=U2-~i91bh)o zYV{P@eB%PD4p3+vdP-(6Y>fa{n4f<_S=nRxTZTpkkq5W$QIn$#hKg0yz)vV-L2Vzx zM47I=3xWPEx;*!PFI7hpY`KvCG&MD${))JNA9ECqO--Q;+mC>qfiH^#E$z`G7^bDB zrNJ`fbxRBXv15ih34MPr+@_Dby_my8UJ4%F^W=*> z!k)yO_72!&nwg!^(%OLtfBJMjx(^~g(=Auxi!agJb*1FA4Izu#wr)z=3Io+p^V>1# zh~^2%MONTa0!Pub_;Yp^LM!@UwZP~I)D2L?yx}2o@jdGii+A^M9e``#29jm`^A713zNx9P#EA(I2*#|&=uu)Iq;GkPy`rnf;V+*)IpNK5 z2*!)KjiYy{kx;>XVHNzcTwp{^fjWjYa(()S5d zuCOZuSyCS<@tq-5VL08Zw}e;=cEeK$)Nvx~W`B~z-Y=_gm=;nUFF@!JWj#Y*<5eXYY6g9fu$o&cf32}hsGXF_=}9>>UXmx!B#iw^(HnT@P6a6Qco49_ zTDRsU5*oY7Zjeu&gYvq_(#lecNzZrWaIz%`@s!NCoXe1iczS$fd{zDcg4b3m$GLvu?n%FD@p zo9?fqlT2T1YUjO?S1`2!fR@;81db!8gwXpAdtZ5R1v@To$+_J3y!(MGac+C~LjDB} zS^m}`29))J`CmZTf1pH{M2l`dOK)xAD2`q4%DW_zF!osRU5ya|MZK-BZ!@teUyx5U z8Z_;r1@hEM4{nZ>o|4?81X z;fO+=isBHggd4dzx-s+q6=%33ApCzTIG9<`S{hkG+uf=> zF@|Dv1h9vg0Gy1Vj{MZ53S|k7GIWT`;&zk(ED}FCYF=u4$vwGm+o|rZVoBR!Z4Y;f zLc$0eLS~uz+XLiKAU;E!fNBxOddeP~I&?5CPho%@Xa`{o3gA^M02>}@=Q23zdn7#C zZn)W{^fjHLaCP}JPhfNjh!*Ge6m>4A-JR_l$F?y~_0hDf*mbG;`}+eqlcC(t!6B@- zfBR;)@5f6I=`%F?GDG2A0Ri^+u&qK84z8dhPNZn~sY7D{FHB#ktB2mWu|A9qUKsF70hs~< z2&hr8#B}-%tNJe4*)+txr$jn}pvnOz!iMVuu)n~%L0Uww^LyP;CV~geF$NOn2{kn+ z-|wgYcyV5qQG7j{D?%<3@0h44EICIU#c~#ujJ#(~ee*xR=3&frq+$Yailt!^rmCGm zLW!Rvn&4pfp}znb2E1?t!3l2_6**kBw#fh+dLZh|ezPhHv-BDn$6$s6lnC+>H*ao~ zxz+du#9MxJl^LnAuMUw#|@xW+EGLRcZ*nlMi zP_vP_B?yID6?}jQQX-dM696el_`|{&LaAV@R#-MfLsWP<>9P_VFI|gZrr#r$2NWPkERB= zC!)mK>j!}C)^=ZM0`Gxf5hga+{NTL1XZLP^+lgt{E79c*7b-la6?NHxS=0IZD- z{qe(xh6E{i%6TD?$2y;MJRS0tgQD~(foc=tafAn%2bia)^z?SgScC6}JPnV|kas5= zmPjg~CT|E6qcej9OGQaZniA^E?+^nsG-3aGt!e<&EWZ+>B{dz)BC%Wv)CpJvN#-9| zi0MDEkmxo}DJdUtF9>bsd@d5@FQg$DE#U}%2DcL!+hZ?5?{%*qydU^YUaiB(iD9eY z_8TNvy?qC)21)1u5=_*$ZQI7F8mn9CItC{V!LPX-)Q&6OQyhZRbk|0N zEC^CR7|v8xkb$`~bO~M;;WAL>A;u)$aweo#)F}otx*vECVyN#r|xcpB*-PGRE8ACgtXl zVEK~7TtKIUsCHc*~fy$54OG@Df3_2XO)DAi#w|QdIOipj8}J7_?b}rsZkz zT<>V{VH^bQ?d=daxsj>+7xFaesH!%J%^?623D?wTdKQr3ciK(Btw3#JvNN-@?eE@= zh`2$r*HKwM6(20fx4oZ3m&H{5YSn>p8ei^wm`Xd#qzdRl6y8PEZEL*Bv z{UJ54XVF(Zd_gO1Q}fw&F2*(a58~oZC912R^pq_ZY)AuWI)LkeGqV698)`=!!CxUl zO1))EvPK4@cDddvOOHE)Kdesh$}@bX!5&7yFc8`u1JzOz%^^6xwxwnzjHt3HWNlox zE&B`j>}$wIlI4yX}EljVAS9V(V> zrnV1KTw3@VwLHIopfJ9Fbb&vmvUi>I;zqNc<2TLt7?3M|2i&8XcpYsBn`IPmhp1`Nj$>o0BY z2*B&|lz6JU2uKE&69Ea^NHTyq9BC#TXCprBE&ouHl*A5`i{-@)Y{=H`KX}k(`n3W; z8bbD-W0xK`sCAAJ_vGv3Bs`?xPV~qlUW=9VwdYyut5@)f+txir-HwtuiDXe$q1P6L3i)>(m8#4 zU}y*u^0O#^K{y217YFkXe*W2OPoq2-dL-%npYRfa+mrVY4V0|0!`X+<#v7aQc-=&Pf8+IRlu)1~pWCB`x&MIn;Q zfUnPCH}gjkP2!owMV*GEtR?>T4IaHjeSqrMbptv*j~+cj(nx6OqZf?2fM^wsf3!ya zhg1()9n8cJ@Ax-V>lrm|{ZM&l0w8`I9B%nebhu zM*x6>cL|Ijm+OW9Ye;PwhTyPHFr|mDI5(oam&F6x?a_0k;LX@+9qdJ>VXUs+gm1cG zE6Y!GBCyay>xMzp@y1A3wZ6lmzkh2KI$h5$SRixW%5tm=i5MCsFvUTCGL$-T&cOB-jwFOSH;iDG`d5FY zb!*RAWIB9%N|b<)`VG^`BzjK4=*CX>mO-LW!nB&wSNbuI4fGur)b6DECML2y zpDOz?`V)nBYhHPxaZ&qWvF9qsMgC|yjV#KkZ5Xly#$iZa*@cltm1NdS2_yjbu}f6 ze0(+KH^Fo2l~ZFJ>ihVD!JXpjuWNc1zqv0>YYB*k{F0z>|NoE5^XKZ^YTRCIH$!k~Bh0?cI7e17BAn}xsGCXFk(~K=1Z%Zn?f0m~spNp#FPO;U)&1rGT zUB_#VzrT8eTRr8B{TXSq$IAPzUC5v%1s$Lb_4~YH*7)`_HR+;^v|sW=l_MZ5!e$&-EO}^4|R3N!6iu-NcinaxGexyCaO8R{ew*WO3;WS&J zAtq=krkH{ihQ5A{O-PVZ-?(oo`4#N(u!^VLq4GvS;nWZ0ejg`i%;?qXm0xA8PHCr_ zFX}s6`h77f7H};|Q#$5s^X1)oSH>Li*CbE<3289^!O0<#wl94x<3!oaOA_YTgcm+a zI(=RrzDKYt1|K?3sg_oza^ws9?;LGJ^XTwgs=;>2VKUa5mD@#j6bHcb3 z#BrjQGBPq!{47Th=gtK&)diHvBz6^8g=liaa|kCf)rREU+$j{IAhH8YPk`olMH++` zBO`Yf^`b_DmvKbI{_wX*(2x6w1uGQbz{SytfoMP`ONcixq<)X;2F`vFYGi6}O+tQb zNm^POj5VZw4s30dSd6X6fAnY{njUDEu(2KQXJBBk?!r%S0-AmD1T&T%o}R6-Q#bc` zgA*0)T?)hmtbQp#3&O%3%ADjyFx2@VXNZ?RP@X+ZolCI(%_cS!Ojjf;K^e)=q;cg5 ze;ah`7ONk;LxO(=p#=n}#bYGOD*YxPnc2BHumz@(EXi9DONQdlJ!ZnrQkQ4H)F1JEWv$Yirfcj~_kf$F=tC*~1jGdMpLN2q1n`b^@GE#Cu%z z4*HP7505370y(jj#p8PL3NUN8b#rz;(jGqI3K`k)$w_qfqD=_HlTBl9Re#@|)-IKvBK;iIGy%gR(`ZblEA49&zW&k)x&T2zhA2B% zc1?aAl?;c}C!&my{eY;;D0x1;T%bAiZ!Q4E5b-uZBV!uQpFcn4o~N7w1p-URYqGFt z2A&??wdL5iCRx8wtT{$}8$utxs^;p|tH>i6ieul19d}ycCNAxPpfH`g%&oFAPplrq z!3!7oyZ6%v;`Wu9RBiAYJkL1q;8D3V(^imi!*$rH2w+eY6v-PhCMAoglKe;*E2vU{ zIs{%-_wNqvXWY@7b&kA(uP_#9S*%;*tDghgE)r77B8zk#6?A)e;*92a=GyPgZQW-F9u041_OXo z1)hQH2`WijTieeUmY@wGy8DhaE9I6g0@+5b zy}e#YzJWbOC<_6Ih9w!^4{L-ZK?rb%*53HI2WDU8#O-8FDIa*|5O_QWyg0MZke;cMIO znAbCmZc@R~>?)Z>zfs|fbU{2doSjCKL-a{+x#U*Aq)(qQ89b3J>_4mUWblWRZhy-P zjimbCq9dkK#vHF64~Dd;Gma@*cN8ej?MU16%q?vcIz3jE@2%sXv?oNJ{;}nK=aviuLqo=_o?{BsAKVEzI**qfMCL9ks+*%7BO1%?nAdDQ+Bo{@6O1{L**iMI zLo_{VhD-{7Ky0nlR1TV(W+kCUa7!az)%ox|EGc;~e!@qAJ`i5Ds+91d6bvgNG}%#( z`X7ojisN~eBn2rq3W^|EGXZCvtakYQHhyXSN?7am^{G0zySggvyNeo6K9C-Ug8CA| z`MR-p@0xbKn)`I>Y0UZg0OBv7Ga9(1gf|}hhS%a z^>}!tv%iz8URQ-Y?pyInh6LLq^IAi9Xi55l>uYlx)1gXXzY)M3}G#x)BWH9 z;1q2*8o(A7)Aw-X?dho>nn8MgbDEs4QE*X>tIE1UQwPMlXwuz#_X=T71x!$tl|=Y> zqQnLsL8!@~0D@t?cx-d@spRI*!@~iu2ho27T*ez&1_%Qig?^&n4P5&=kcq|&W1$=r z4ZF89XZA+c!CUvmfYZ7)$I;Q~Uq6L=3R1(_r{|3{HMhsHg7LYVnHexYkLYpYCNMC8 zd;oZJZf^w;_eV4z);cFrlTEB6lh_Eoq1s9OyP&VYN zej3_DOYTTA2s(?H1^+;kxx|n+qKHoBJx~L*(~&EYWMFj&UuP`?1MzAnas+y4Y{_$? z63ub&C^9DF<5_->;u~|}6oSZ1_MT6oPH6+GF-en!rjJ$&Oh3GD+7@$grh323oA(i0 z6fV_xb54ppsHThDF#chhHCeqf?cB@Ldsj{8wf@RG7rE-<|G1jSH`9FN)wDtZ`>w&Q zbt|GTiR|{BbgGYdPk)VX()iPrUyL1%1_gajSA^qLkF|=tP4uU(Xi}y;TGnKNyg7|d zu~#hY?}XKI-xR9)Y%ofvrY<@;aUDEJ%xl2i_QWKhoKj?OF)<%QuQ5aB3BpCv2uy;4 zV89~0NhTcLz8T4jlH0?M5~hED)Gi)kEx=8|!HA#?R+)?v!k4!8eHD%}w+Z^2RS-+} zL0+dmOpZDJ?}K7508hl12o4E}RN9B5l-2JT?0Ec0ajJ`GO%Uv&ydX(_kflL_tpH09 zwo^kxbP)Jxwg&2p(7FgEa>j*knAP}|z?pCb4Qzpf2Pe=d61FSFvZ+90!otD=$1`}# z`}{Ea2^M!#blUI4D`HoFNhN*)lsbYQ>GX+$%^b2vZ6ZyWUWL4lUnnpY_XkOZDjpuC zDjWit3|M`4p}mY_EHw0?BM>sduQ(v#0yI<(& z=qM5j(3D|oN9<|kFTX$i9zGo+RRjhLw~JUj8}tGRtD-ZLI3}P|f|!j47E3(qA-2DB z=gc_<78X!t4svtfQy!MC<&UZdAxq}g0CVI9oRyf=%gBx+K`eLg_RX8v+Kkbh$4l0v zZ{;o;U2b2yjwCCDDG6@Yd5W2OL@x|AWKQQo?h%5)txr8tV96!O7~N_k$OvF0`L#?N zD=HAz>*NV{28!NlqYTZ|loZ0_9RDbkDVTI17QkHDNfgxDv9daHy!c1Ja1ldpXuCcv zagWc>FcLpn=h4^6Ux~`^{2(;y@7=wNf7w$J@Pq4-b{0HF5*pJ!RH6l4ULHq;S&Z$X z?m+ipc7DD{)E{j+9~@~YeH4tV5>RMCwot%+K(gK~-!1H!UYJ(X%IQSrR zBNdo&c7vv6)^&k>V%=#y3v%+oP*AVb1DKj%-{6=6TIq={D^~3CM}{$C&`F!R@d6?g zAT;Ha7EEYaMw-`8EuhQ>6VB#Ww`d75QeS`B>*0J%43B-l;ymp+pgx4@?e>p`(g8}lQrl7*DB6wU?J8!ll z5yo5!Cc{L<#TDh{$37Mm6rcpGgrDN=BN)5}A!})A2>{`;!2z41Zg9@Hw%^A1&FQUp zYwUDbAem2|As@JWoX$RzMVm?St%sP%8A>R}q zhPG4JYPf?H;Aq83ABeLYwPMpvy_n1ed^4~ovdZU_rU~V|oGo~$;wj2D3#>T^#vV-Y2PMor` z7}6}Bx&`Rt1jEOc>?cgYMVM z;yj*!u^8Nv)#bcZe*LzY_u|tk36c`X8u#xX1~Kr-ML)CtIDc`6+r_Q71@<1BeiD~h zC30yn;H0c4}Z`I#Hs42tFzJvGBGmZ@q>tz(71E+jb#JH`SSF8Z_O`= z#$vi9Q5}OPo7jLTK(Yez$Xh8X`51jr%Hm#Q?icI`zfhvnAmIV(;OVS(2;F33VgO`i zIaPAPQzMFdc>g~A)~#E|EoBuz>`PTk@Sf}znpkhSe)A1eGE@v7P7r?i7=rLP2Bl2z1Cs0qLj$+==m zJQu!SxCMkN3P(8V!-a-kfTWz^Z!aB-*D_`&n#W>M(y6uM72$(rL`5!NLaAv?Ceqxcx{R-eEa6$&4uVk z%`N@J%wvki(WZPvM0WOTDB)L0Fib|?9DiwW6Sp0wAJC=CU@HP&F)Py`7uP{l0u|2v z`DTjnih-{Z#@rzLBqno9H4WA&3&Jqf|!VOujsI8zPA` zG$;u;XwsfI1S`n85J(nOFY!>SYVcIcN=x|}sJc&+6ZCJ5LDgQEJHZ@<9-1-)y#a~D zHW?%jfzaiL>(*P9Q>RiYGtJit3JE!j?ykD=)dMRZFOEA}S?!n~(Nv3Wg*;?alh00@ zF_2@>Rs>rX8SgR-br|QLgQAK`oytJ0Oia&D-sN}HAx_gxY}oLjZ_V+!%TTm|Or}C| z6z~;fBNeFAz^d4K<`%vBe_I;q6@RxvDvuZ0ldb>fh=_vcYg1<&A|_iJuZ#4aFI`VZ z9EmT!o17g~r3CVI;^fJ_evs$FOJhCV0Sr=lz#uw%S6oUV0V);bCzf3^CO{=x+nAT< zVE&C>Wv*U9&65uz4&80Y$+0*!H3^m`VfZ)&5v$J^p7|eIp&$Q!^qM27Mso#SIjz;E zjt=bv=d(DPw)%XRwXqrFI`Lm&PRskp6e6tr(Rq;f3k9KFEn094>??@aXPcDAA!8YT zr&GRY>|>n#u)2fxi;QQeVs2tEO zPIo+T_^@%I^=XXdxw|hSpLwJ)#wTaZ<}3YBI}3=*@)U~my?)@-H|@5?dLsK?Bwxg_ zV{AM6k)d-(-43Iy zcC>rAx8J^c6*`Y0wFOU~O1X_|IyxS1hY7(VZZ00b%Se6x`vc_FA!=E}xJayiR48f^ z^KDRp0hqxCrve9uevgS9V7i#98rafo3X&cgx3HE)R652I^75P#plm}-wmr3n56W(A zO56lcDKNvhytG7?W5`9lfP26af}YJON}x?W7`+2a96MV+Cp05H-P+!sn5UZwCR0GE zn5%kzc$}@cqQ^=?RGz?wY zXFq>77u%VJ+-U@UMs00K=r+-DIEn7|l_t^CTk4jL0C(m(bO`G)Q#%WzT!;a#@d-#e zaB*1aiLvA{_G3-|tPw9_+Yod`YwL9V9FQ}e%PaeEJ zoNw_(^k-xJRjan?wyGJLjaf{-Pa8&Koj+HN+HqA{P?qoEjXo3PZ#CrWA~r+CW{#@T zgfU?xQ0B&QqT<{CU1oxBy{xf=fy2`$ALTaU3%-6z!z|(YU*cw}v7%w)(mBo|$Nc^y zTq^bMB<9@q?ll8vU-OS$vj6N*h;+|Ot>-CrESy=>c3a13f<7p~UNz-GH5FH+tC%_<0EiDvgAk<;DN>_NE$d!>< zEvF2UZ?8(S0gcArwZ_CB%Z!o|5|9z!r4^lO#GmpE|CjYP5S{wf1pQHc^}BCI1KCpQ?bh^E6|pe=Z)NIEU4`|`}qWDYZwZz zC^H2hJUG~9TNF^0k!3H$82oIG1=E{IJ8_u%&-aoq%KuH!=UEUx@g4J?&nnwZtW=I) zRR3oe0J3`(J`$UE!};z2p4sjk4A7GQ}aMO z9Pt;5|9I+jy%8nW@=`RkJ}2zOb!ejFJjP&&wl~t4p^GRl=$(y6Z4CUB0Sczlt;sT{2yc% zk&(M|s)N6}?eeINcWZx{nnDOm(d7PN*Bdc1=xc zZtfidvobHyNmS29(hcNHBzvP2PEEvj#)}6J^LDz%ci9Ub-4@K)By}#pcM6nIxZm(LrV0Ul+11(k^33aF) z2$dNS%2yDGk{q8w>j~F`!x~8!dND;+On3i3c05=r&JcHu;vrwUICXP#PtR8nOE9C$ zz)%7LDPj&mN&kktVBw2Tadckq-xIf<_EWySQzL*(IL^|`oUxS2)p&EKcKs7oWcl~EG} zPBWOyAmaKG)r4br;Q~{OUG@(wG_BIZ=_v&S54qQ<3#R*p~l8zeA#FAkpsoG0jAh)hKd8rajv4N6YgE! zOj~j*kM!hqQ!;$A;bHx}^n*klm!hRionW*9= zHdBxKOyQ9K*5HMONxn7YiYj&LosGia)R!+hXU@2wZ&cxKijn%SqoaQUS=m=J@Thfc z^XARfrp~gm$Yw6Mxed=P$t0>9KfhZl+WserBmC`v6S{4FGHl^*_xZu|%F^5%qR$en ztS_v8V$bVnYhy3Ni9M@*95d=nMgpG(2WKgIo#~0Sp>PO=6ukcqiii*>F<#(qM~k^} zvwg-m4sn2=IDshdSU%tJ__YnX<@u1D%+P!{e8KC=6$UB=V9^+qCiqX&{Zbb1wtJ~T zA;imf0b`J$g=S~-pccAR+j<-o^r&koP7kTsH+z>As`Q8tr{J+v7{qdf9{6)~z!7lJ z))@jm2NY;>a?+2q;d&TkYZw?r?Va`6e+QZf51#)q&V@8~p zFW%Lch(?fR!G}gtjiE#|j)rlG_#7Z)o2aRAZ0@=8=S#jD*1P@{hsgb%{}hMN+#$(e z#z(3*i2Amg$C?kXOqX+FU>LV!orivzG000{1X4?LGsYMoK|p^<-&{Lx_cO!3*46@#`EejfAG*5%(Rw2VC8P~1 zeWU=5xzh!{M)KglarW4y*2&b?gx`%Lwle{qUS2DRRC4Ui#CQy)H#K0mJAo=Z?rMEsbs{)=31lftBKf+;L4y&r6ro51|6% z8T#_Ys_n_8nL$9RplMcBOQ2#eozy}MA8ng|;Lbw;~7oIo;0&#tS>`t{Xpc~(+-`*>_@Y(*Iw29#_7bp>xmL-3mI(X`FDD_k5r zKq0`7<4XXBk7U%FMiJvja(8#%GDz-n@|o9&OhNx$W(I=+#JC}d8VH4t81;Cx(L0ip zt0v}*skZ4u*bNB$qlXV)pr6Lgt=u0U7k7v@;OEN9m`>2l-+Td}LrwkXfB~=^y&#vU z&SOve3!>1b+l0;P3{jsmjr0U1>Ak?utg&oGURYB_a&Wsj<)Ai&rxo@y!7r~EQ{mXc#kC4}k=YHHZ@)Z8p@6k#$BO4nvToBGlr4 zq#GE1@g-9tV*ymp1QIqaEqY`*dq%$d z{|nT#F^cs-AV(r3EF^?gU+0leK8K)y+)zG_%pDk@y_-j?*EIf+@p^5JN`|89(UXgj`Me*5#T8S<6Xh12IQR>;u^^T=9sQ^ z#Putqi8oOJrzhRU{Cx`l_mb^d2^m`Cr*Z7{hbgi)Z-^A{}I&)*LrSQL%OmNiYMfdz< zarK=(`%*?I}#w@5+Z|6X(O7eH&fBK@lS zm@`uE&&DW5+lLRAAD9)y{*w=PVWrpPf2lTY?liTx%Lk)<*AI^%U`jv$yilrCLKguR z=~SM?366E(e`sqyOLWZxg};$iT$*NwBxvd5F>de@8XV-;@TCs;iz8Ba{;)hHfo=Y? zsRz4-L@6_zacrN#((+kuO~9F<_B!<5Jy*mC0>yE9sq$BO8WQ*Q$cGj?a*=lKIbQ@H zi{uqiK|xnkd3))Diyr@bPRacSCCZt(x#TAoWiV#j;OIRUgUGF=qr=28g{sYey#pni z)odf_Owj`QI8))6!YRj;FPiTfyn-_@&@_^y7WRuCe=_SJaB&)%MhJl8 ziGb9+e~N~`z4A4`Hv3=9y5T@}Gcz*`CSVE#h5e;tEZ1K`)CC-&21~~ePj37Yg@OXM z3ZyL{ZUzAk*mj_JM#_Lv;TX$d%mj@?Efl&3pn5u7!3<v$ZV8&H8>spB z?|c4_o?N>(fQF1lEx{Yy>(4L?vE@;g=N?r#TQRFo3cB5vlrOBkM;F$%|C8d@ZCJ;S83~}yne~G>w6r< zl`+eWz+s8hR@A&+)jW# z8+HGhC%wknBDRO^U(uxV3dbWl!8}FjjWQo(5~5>qotj9~LH}^;`9dt)t2_Dqvcb#? zlE6vO4>=nl3Xq$52WZF}KU&#OFmfdhTkUHBjB>`7qq%chWHRp%>>fVDBRPA?Gk~ zxa4$xC=nDvpC{KCcjK{ zYI0njnh~U=VrU?4`OQ&nE{h~^(4CXj6F^^8ke6Qsv;F8!1&lmFOP3CfD6G9m#X)pE z($Dg2N59t=ECQ>c%XCuzSYT<-HWg|fOiE0Y-Y7#Xl;ndKJ?6n(HG0(h|Y%`U-j_=HdU@&H8WnWp;C9db-l<2CsKP6)UI88Z#N|b~95`J25Q`dqfrr zJTqaNuQG9X|N4cS1r3lX6CGnu5<4V(^l^vi}zX;!ZO3+x=SnpU7 zgkZ6QIR*w%d3pJYJ*JvyjRno0mB zb0>#U3sQy}=qzoPDGE;5@r6R0VavE!@Xz@9dj68GJ^lqv?tR-BD;=*N4PmUJp46NY zEtSl3TcX_Lq_~)W_THiNO*e%0ye2r?MniNLu_^&!L3CLC)v^f9^VcB`vacH;NVG(e zhb=@TW&|B-*f8PT>jtbSbar6{fVvn{U-~EDGFE80H-oF=U!#v5(YMhD`>%u!q6a~A zf;h)Z(PV<0BvA0m-nSa_rOMAkwUCCMiR=WgYe48%PoPS3c&&m?6 z9t43189dg=>5k;V$N()^IEY(*+3<(WDsoZy>d?RdG5!LEcBW|P-+zp@Q;(NP1r+&6 zmnmJPtyy8r0MZBA5sEQ82h<=?T6EU2D~~(@PLq6)kXL1E%n2mUU5JKFO?vfzc5eA@ zTLtGE+{eN3_(uXEMA8}RX1zCJWuv4c!j4A-1gL@ms-eL=m7JB8m6?f;um(%&+g~f# ze4Tf_V^;&)9~{xYaO^*EMDX&W96pBfL$nI#1h{~ClF3(7)Bvb}1P+R2qRN)L6g6(w7VL9 zD{3hE>UMiLqK$Up0`1~TzgNV9#_Lb5agXP(kG}u2)ZjPy2n!N)D=?CE4n!`!6gB({ zVWGGP4zdL}LN+$b_8}lbUT@FVw(Gv;{a$x+^L`?JBnO_LD?ft{C0%@#4EHw`X< z5tkb~B0$wL+;mDA-p{7gl$3`}kx{@=9V|S+{F{$2@Ugv$h$uz@a-&rs{t8jV-VO`T zqg=QH$A?}tDInws3NrchPl3AsChTFm59nJ|lwsSp!JEM0=4)M+eBWOE5PG~z`lxq=Hludv!h-IlViZ*LO}%}7R~h?J$kgHyc`yv z0AZVIYrU(!Vu}nZI{=ascKMJEOOX>2hS7Y3b(#=nS_9P5Ndda+0gmsBNrZb@=6VcX{F*H5)5)`CYd(0$T0{bjB5H4D2%iGFCsK_bbRJ(d20aU_em zx}JwMwC+~Rr70TIUny%A&ui>#JFk^=N1xv${LY;Bv#@-7Lz^RUK6Q>lbE56D_k8s2 zR4hQD$Vf4LJ!I8OiXh1p%}B(W#VvM)-t;WwW{SQCZZ<(*`)Y z3U>MOFQ>xu`x}?9topaLi>UIidctk!ckP1y&qg+&pkgp|l$WPryU%s>WYg^x@CMWZ zx|tR&N3+@99h?5&kqSYcZL~ou4(8Shy}m6)cm3j^v55(G&oen<#K=arE|=v+u;$@D zm9T3ibidJo0wBs6#6J*Lkykb|-53Cx4Vv18m~t#+v|xb=-oO8sv+mWaFSSF_0zs|^ zv?)djQb_rmxo`kNtC(<|Fck{gf|mcUZ$a1LYXX)`$KOiN0PB$#4mgk@zVMESeHpd_ zn@NS$MN49_9u2JpK7O9dZw*V;wZnVZ**CDcWyuN-;}ip|4EipCrBndL4|SgYjHUoE z4-E&pmG!l(uhmOT(nLF6dKa|w@E(~gdcnHtzloeMSY$=ylv}A1QD66hIFavfLD#*w z3?f6rj8^1Nbu=p@Ij&OZ^Y|UuzuykeVED26GXTTrrO=?DX1?B$8@a4H3L6Zxu=z{}e+*rt-Cq5>h;TZ*#{&Dbb|V`)o#>50 zT<8Rf9;y*gx6pYCxix2m&|+-=-Vp4wf6s!CJa!j@67h^`UmT-{D~0{|HRBiW-bqQx zAluPC_g_R+3V%arASVJ$7DI;wsVsJBfR=+18gfbmAr6%NlTuPk(^*BB_hvJz6L}6D zTq_Y~=3J^ffYnIsVVR87Zl0cJ(Z&K#ZlKmsS2s<&*o`Q{QSK5a4-WdO%1V?d8`v6) z9R9P2^8eH3lXwnoc&@bx&JZP`4>~E3Xic`5uUo$!kp#r9hfhjisqrt#ARnIWJ&WZD zX$O!aR)>Kwcv_Cz<)hsnF<*s7>5j#gzpU3pvDK_(7P>)<9-;wNg7GC_3?B58o!VTG zpHDR2s_Wd!R&Y10XGN+p8c330-i>JsCqlD>q@DFTM_pr?yxiVi9Vlv@0TeYhRzJ$i z#l`NNsJ6lYO1{mgtcPMqS2sdJ6y@h584hfVNLCyh5vA=j0g}7H>;xg)*%oLstrYbU z(}w80P$0le8R-50?q(S$RN*K^2Bz%}w_#9XU<_j!=p+jKgU0_HsXxa6^oJob?&dT6Q|(0z6{ZnSi0|EhB@uQ8D7=Po&dHtE#7_R!&feo3(T}%)vJ_dW zLYEDf3D}5}*H!-_{+dFcU)KxiFj!FrMR~YnGqYjT00~>0W-b3H#d<$%sLeAqFzhOW zFw7=#XQJmrQ0l{lc!KVVyr@ax?A~{dAt8;x{A0yCk07Eujx7=C3;6 zDI?&WVM^(_esrciVM$>}#=r}oOV@Qpye30$#+?&CFQ#H&MKji4)jM;;=)LlZ*%v1a z1ZI2<9u_psr#6e;QCm78uIV;j^SbM(&N|8ivr;Bq_g{C_RSpTm&<(6SHqp9lv{cTbaw}Zn1iZ*ER=iC7q<7aXD zg_wQngcUFZ}cpUDF@FK0QWa|2sDYCTA0g{lACOfSSRSv5Zv?kFiC=FR;RzW`VOcYmo1!VE0O*-Czf$lgOu0DgK$wnYSDT3%Li@(tvywPF+0Q#YF9 zFTp&4AXUpoh96%Egpi$PADs$aUU=P83@scu5_k^0mvbiz+mB3vhzy%8%*k=KEiEbe zYyJATCS!W{i`IP5CoM9fCI(Y-0~@j-*zXFqH?7z-H0=wqdzT!a2N^6l zfv0LAF# ze^@#!J>=OATTlErLE9LgScpgvkvLG>W;?9Zp%cHH0v93hYinzZ!Ft-{L4vd_6>r6) zDFiE&^S&JXW|Ms!WQISA;wZ)^&}gvghzoe}cXtu9*(oSWta`PsTv-OO5zrqp3hP4! zlOIoR7AIu@3k=wjiUuQaVh|``4XuYxEa9~RjX0eBWJvPll`_@syQ17bDN<8Z#B6M= z7+Aoj@65I+PS#5;zjo7~hd6oO*j?k&MicH3n3MBm%-5Y@COrpQTp5TFV1I+O|FDuE zA)sC&4hEuOtkzqjlk&;b2B3VAqcWGWwY6YSb#s8faIBcFHB^Vvk^0f@i0#SR5k*Bs z+BwWp70(xrlqe-QuiU1uhoiPO8{0J$_ikQTTP$oGVq)!lyDq2btYrgUu76Z&=f85G z2%Nbe{twmax_uPBfA}MK=sWNrq!w)x_NZgGv9X<~x+% z@o%0dv%fWnwupI@uI#&a->%t>>=>55KtYI%V;uTaDA(k{;nx4Xa@O!?D3u|V8#p@v zY!SkIVg@RHOV|{R0BNL#(Do*m98LZ|ay_GtfO{fC*sT~FW1~XX1_N9KJNR}R4!&Q> zV;0sI-!O9bK_8|tIr%4)h?g@>8Jkkk-XeTqFpCGqz;5CIYtd>^jaTD;vW@M)|D`DY zp7*98C54#%g6=9Z#KWf8YZ6ui7~A8AF?={IP`pCI_wx1Qze{9;8v7MM{+Z&13t;~J zHYWlo2i`6M&01D(RVAe_(3>DdhC>P(k}MKVcG!(`<_Mjj+Tt?_l|7*cQvXF=w(7U( z1D|29xv`fcq%`#B#4OMSBT$oIQCQK;qYI6$dW%c7Cdikt-h>AN%)%Pl+C(`yzb&WO zcJ*9P`o8N;*XNK<`X`U2SKoguIk5Xa`(J}EZ)=ePA3WIq;I__Ns=F^+4sy+&Tx&{x zd7Dkih;5QXa`h@jwpTNS&iymx_w~*Q1(~*{zMq+B@0UpHfAQQtZRFr+O^E$DD)$x6 zGzIC}Q450|-@v0m5Nj|L0O=BzA#kb23DY`tf*_!P!`#@Y1ZNe@=P5fR^*eWwHrm~k znUT?vW`J>Q3UYD}N$iYp=Yx_mGYilL%zW^JEg(JoG>Th6=jKg8AJw+m^n8!wbi+k6 zhEEi{GkpYn9PQ{`8nb3@1s6=We0KhLZ+TfVfO9;@(O{o*-o;>>4jSH$?FDVt%k1Kp z11tJRPASe<%`s06D}LXadv5ihqb2q7bHU-j3h{go@0N)#JC3S%@KOJ0>UKH3rfo*p zN^76>W;Mr!f_-&w(o{9XYwa&@esR_yY0iyNg52rTdVYr9htqO%hYNqpj8wvWZqw-7 zEa?v0$4dbZILD6#w|I2gdZjRX*)!1`cXwFda?3(ub}W3kTCu+=d}eW8^3xRN6yIVW zr14{NsF`B*pA0T&4wLCOd^LJ3X};3vL%*7Cr>xs?&!PFPhuh&!JrqKoJf)UaKd`Jd zqrvJ_{hj5?%wB`;QF3~F5F*P2hZ4qYF7{Nkvn(#-S5wz&SBCWhCY z1~DU!u_Nws31ilq6-OJI$Sy&Xn}mM$#Lv(yy_t1f=CwMWWx|x>@BUYi%9)QkYt}GX zdx4x6tf3R*58zgi|KLiP<&$1Rb(iSnP`oLo_}ziH%_#XhSG~IK@$dPK&;G1l3jP1h zuWWhtvz@03I>+M&4~}4wqcTB@6mlpGrYMJEJ7*On&qU^)H(DraSocz$=mbmymp}vz zIKN*EsvER8!rL&m1-r8a#4Zqfi$?7A4u3}AhSv>->!I>+%`E^1P~`N>5=aN!dH@I` z6J-9yAZT!9)l<4~!vyCW3)p!j#=(~&L8V0`!`GLd7DO;6i3@+8j^X2t-?F)hfkm1HQ(Edf14NK1 zThM>Tvqu>XYs;Kb>XUBN#~daHU9W8 z-bIM1=sI-WcyZPmb5=8^Z0`T1ER$3veAta|zj5I&F4X1k+KaXZ&I>x}9^0&rr7|3i~{ z5~c340I@Cy&dXig^brRRDEN5$$ci#@!QLh23GixW*P)%2_b@>*W{4Gbb^O!~-Foxb zUJV5$;&6G!8AVD*8K7PaI-)pcpO~1K+>tzw0efxA>guv2EUuL=Uf~Gev(^xbC+|6R zQMQFRNhQm*152DSA_rb<5F|MugEX-YmT(RvXCD{@SV?*c=nhA62R~0$XQ6W*wt|yH z*PBZq^uw96Bf|s`o$9a;1F!n4gikUgfS`biQ0<-Hzm?r*KOY|^!QIijG`urtcGeC) zUl?25z3cA)Sj>n&%WyT$9R|an5bi*|mvb!(8hBhg@ygGhZFp2~D1duD z1U`j;;p00OpZfVZBV@@&j-ws>O&toT(*6l|8s;cbISx9^OCfRk!fe=sEbYyDSw#h? zMfDV94oHq+M%#$&{_XMDWAiI9c87kd-$k$s^HEvAb_zp9j`Hz6)_w-S0xuUYJ0JN% z`wkCMs_Excm?mI;CPr#CHZ&lugU}v0*k9|`86UB%flfHbegZ11BS$8HUn25BxW--_Y?Sh?!x>iC~44sg444o1_a{w!R}3nk4Li$bzcb3xw3^*)t`T> z#`t{qj-UfWa39h8K`g;Q+jx~Ya3mp@KIS}Um7*5U(hQO#4iWSjkWXpApdZf`rhd-| z8*6@DWBD82IQPB0zGh-IgR(A&9kpuBx*NRhV$TWJA1@BvR87r6PEKhqKXo-VbX@fD zK^Pzo!ons>%0M7zuMHw%3(2H#&e1Myr_qxl zfqENtd}ilCR___qAcnhSIi4VgVw6yK{yQSCihAC+0svA-^MBoDd&ClSVY~+B9dP1J z#&u>=u7FA+J2^(Y=n#BZS!+iXDR%Q2^J0LLxwrg5-qzvUF{3o!^ z_eei9teTK=XAh`CM3*N)z;pvGS?i;9<|yQmOw#W%tcwLufdi6kh{%i$9<^zGYXUNL zh?j^eH}v?GQ3_29;Q|~V_{kLOgG~{l8aU%9VjBGWT{lJM=PwjN;zbJqjfU9*CHj#F z0ke|#npL#42;6|a9_P~Mh|`-dx~~~l!K4m+1X^=L%(sJnJ8XQQ_}x0DIP!Tw(#J$7tx_|)Ht1)uUt?EHtn z|E;a1_;+9N|G}^LJ_qvvo(Jkfh6o0_J66iWM?giuc{GZdAU1J2&F@tHza~R<`;p#6 zZ6YGMhyce(-D+sX*Y9$w!_!1afpHedG2i<8O&sCm0d(jZpnObxz`EDFc5PrJ=*0^$ zjC(*&8D|_y0)0I_qdb7w{nbG&2z+oeIdN~@BFgVvQNlOkON9^kAD{z+Zx$vUQEx-D zgYPOLYwkVNk(GQ8JG*IK=wX?_3a7|<-UdQ)6k)z0%)~PFXXCS`sB4CWou9PXp-j`a zy2R`!oZvgLjS(mM`ud3c3&9meF4q~qby`nEzh<^CLr(}j3SPD3ZuGprr~?LaK{W-n15`PL0dq$OJWJg2 zbj0D@gN<)(zdmvaaTxatK1^B%!fy*0?U%(~bL};N5Uh zuQg~g^_^l)df~F)fonXpDW;K9=2z9!?X~-AEBYrG-8>#WYOcBSInzu-LE#Zz9yTLV zU@hk<j+C9<`y>13up=o3umuj*ZDDj~Tbza~1pm1$w5lP2f&V!+x>~7jxj>Cff zdhlC=T2xt6a{=xRCYzbJZWRDkS~rxt*q}$c=Bm zrG6588HK#zuV3{raKX8vugS?bAMRdNsjzs0qSi(8J7J33ZaM|kha*dpkuxC|fIbPyG(ChOs2-g0k}%F| zZ$Om?X2G;%MUcXeD}|kxAwi1v9Cqso2SK15v9wDFk_VZFniQ=P=Os>+rH$05d&LCD zY@LUwfGXe$P-QSkjLk{LL9}y5P-rxpj$Q%15!_Bl~n<-g+pvhf`&o&Gnf)l z7jJNVSI+7v*Y@oBb9)2}44s53js1bAC)K7+&C^<-LC9FZp^0i5B{(b0%4V0Chfip~Q8%1BN|)3+>yI4-+;?v$_ug53z)^Pd>^aU8`H z&USGB1;8_a8Cxw_bL1$6V5V)`GH`D5YGnTWwE(OXpjKyBSJB&I$l?2NxDi@Wd!Iv*Y8b3KUs)yBTy5gm%Oec@}~{D|wUcce8dkJsjyO3{DWsCb8w4;o#_CN+r(Fn*~s6;IPH1 z7N3|Xbg5+JDztW6T<7uEgSKTW6B8FE;&`D3cz)C(9JLQDbrcjfZQR&1HgY@?e~A&A zf`Uz3SL{D!QP+^qQ-~q(B{qFI41}u5Op#9&>)^b8?fUtphxn4 zWD6V|q&79=`ReMiqH>Xn8P%c0Wl(&h@^I9(n9znN$_XMPrnf2$*o#7ErDeVi@qub%Mt5jgYXvoG;L;lhFfX7ZD+ z_h3yZKZB^RF5QFFt`XrHS;JdziHgojK&G7;yOjO{rcDH7(9?RC2O$SQ6Bv>;*hzU{ zys4rs!$=rVT=PCmn8q-J__lFKv@-O&=e{cW@p>ZJr7P?%Sf!Ru?3iwC_mYB??2X@% z+vj=_QA9~o6AF9Y6;+3oV@}QwRB5?h=)~eDmv{A@jZ56w>aA)UOO<7R71`4Jtl(8* z3+I)iQou{V1Y-0^teN4}L#`l>ZWw2Q{i5dIG+kC+4o)&suuQM?3$xURKGokN^ZfN} zA8}sD3CH@~qfn2$KnFYtWLCsF{?JN2u z$s=qQ!jJIhFfM9vAR-n;Aob1P0|)+naxI#hKQylUTv(1}rDPGH@NhgGa45lPy6Xz% zE5E4UUj5W3dOIyrN-UX;Rc>-|zfXTle8FvPnRd}c#hk`QHsW6;b=fRNvw7CCv!~LY z`4db8s68CZ_kN=y8wOrE#Gb2Y*V5Rt@~3S0)BS>RJ|;>es{_aHk(bVRAM#TT5?QIc zq8aFBHW`o9o2gQc(A9@O?rD}flM){*qoOzTlKAVFTKMbe`{6w7Oe6JE8eoE-v1 zb`t-~kb?Nrc-7794q*m6PRiv-Nt4sM}kN~*WBe#~BM7qbDo$Wx1s#uVc7BnuvA+gefc`}i_AwjF(7?{u+zrKHDiZ+fNw z`9j6pPKMC-hC8eyjZK$G@VH57BRNJ|&AUUA-Iq59Y5=s(nohLn-g@}$9Sr+zQt zml1%s6tq}22_Is0*kyUS^R-@dbYJz~E0Gk% zdd*d;IoD_9SLkf(9O6cG*=j;$y+n#~!{5H8K3Vk8c{=!3p9h&u$!aC-w8jOvd3#;V zR`o9P9{u{|SXOy=v*&t>k+HWQ|K#Y7Gk@<(GNLuT8W=+5rP8ZwUT&3Z)@^@!L=FN48(k)O$S8L$ftAt~|13H@Gs0!@h+J_-NALIk6WOk5aXlQF#c)TSFx)!Y!WCn{4zeGpV*{p*xj$g? z6Ruqd9{km$FAx15{R(x`1 zW-|?qI6wevYsbXw@VaPnv}_EY^=|wr1@Ukqz;O}9<281IJK*YJN;?6$sBS za9mj(^yFCZx*lwPB}991FVdJE-T%tg}gB zoJPW~-$Cj^rqMlOR}VZxPPEahCLA}*-O6ZMF~1n7d-yB{hK4Ed&Cbrw zZuTsT$2d2j+Zd}hgl-+`cqG{H<3jB(V67=#muxE6X~v$b{w&5!sZ-<9VC|Q)#wxmO zTh<=S{XT*mBFBr}$Wdz#vDMzBKg{mcQc_As;Q^8?=%R=rXKGs~!=0scAR zih)%_cnz*992ZzF8n*gg2+pJWAPQWBRtT?PYQM z6!i86t;C#xxg|o@n{;+Bezi+Dh zuQ6`hD}HNn^kajI!r1pyB;N%o%ebnuUllK&HdyVf>74CVwU#l;i`Wk$*-?igkfcI{ zPgs@Sf?=v=>B-@(fdF%MGIUozwRu8KyXYJ8_|hdQQWKY}Thk7nV-4oN+K%017F&0b zV6VP;8u%<5zURce7IYNB3)0B7GP1VLK+%b}qvbHT7p`-l+Y)1#e`?E72^}v87QN0Q zeyBapJ;3XpX2R1rWzrWN?KJ)FFY9KMO$2t8vwlpF(JeIJ+W$dp)+Mxrh5vUD6?}NM z9b{vOyaEnfjLOc<#fazYh+o^cZ^xm5F&gj$ohD!mv@cO#fh!DU2f*%3#k#uKcLk27 zEJv6NYZ}y zUL4k|M8_qE=`hNZb7Es9gv=N#6YjVj+Atd;&@as3K=(!CsG1=+Ps-_$&fVb!Mgck7 z_jw4?UfNiCXzk`CLAA*G8_b%hTM`p*-nb8i?#nVJ|X9)=7 z9wD5ur*9CU?8x0a?lJ&JFZEGoiJpxwx1aeC$JO>BaCpA0I z4t|XYb*~ms=EEh{`7p}1hdPcN>u^WKiQC>lNhxCY^Ch8)b4HI1y9Gd^nVp@TiR(s| z_nqn7`%<5f4dZR={tCU?u&$WzA-TMNd$~2U zN(+Z7g1`sn0@$a#J6X~XS!N75nKK3a#mx6%&+WEsB`Hn<5{HCrLPGe z#ry|G8>@1M_oY3#Rbbs?bBc}jl)-CKr-I>!`b`}XM$^5{p+&qyuZQQp{G;polqG4QG$en@#x^mR;w1}_fmpmkN9`HK(vrDn5gqC%DoqCTZD`}}Dyq~sgVS5LIs ztWVkNU0(2M$ExDIda~50M5dRx(Raq7p5tY4hOZ>6+YOMZde`MxM4=DREwa+~xBNW7;H#{4+J1u5yK5S(^FmlEu)KN4<*T zYV$)(1px5|(bMngQ+WPV!wnr?=7f3Oh7HfKJuEE7kPezChpoCPT$WdU-gfyyNJq%V zosF{v?L1~4(P}CMLkorRv5IDSkIS>RlD|#uXm9HDDi>II$9+-8!_cDf(T~`}eY)2f z?`#Ur^m^W}JMl)9Od0vGmXDM^+?l5;YU5qBRqt0-70Vno;lCnB%9J{j(D|YANt~;#M}^}4MCCgnE~9x5nhK~3 zy=Z=XIZHxtF#39mb0fveVf#y>t)9Fd1F~-~6IE5+YMf!G*z1|E=lcW&If85iUQ!=a zN8p84R8~4*(7_D@E&+LudPtFQMx-lV6A2zioZ!|thm zSux`Y`4v}ACyVf2Qt3B?pF2WstB#6tXXWNE*U-3dTsF?JWA;}H9OcOD%{-pxRPyuc zZJT@Ld`mVpB#v*)7o=z?y2F~D)*c3189+M9p6^0PR6tn(?3nDj`rf17f}6`f&KC8W z>oJHsyN(=BJh;7iJ!jF!zO1GN=?`&6w0e|-h3T{2x|`*vhfk5%S(lr%_Lz93(2@1H zB0Jj@J)+|ZxM}S#wN2~@k&A2;<;;qeOH^^*ar4IA>&a%pht?%%t6Ki}IB>bWmXo?Z zL6Kgy92M>DD5K|30WyxGM{p(e+8NKCG$K!pGhUD_V=L(%$X$};OiO(=x@%XVqxre# zQn{jn{^YyCV%Gf|{J38=ah>vsS8TJ4du6k(y}ZyvGL{Z0EmyZWl=8;tVaPwjPxtxu#Z5|b3H=d?849d2ZJ^g%OItl~lYq0%*fGS^F$>WDIvS0^K9hIy~)t0sm$ zs#n^Nf3NT!-yZApv86O9D0)w9|J}(A-sZkeCv!jNE&Uv5_bN7+x-mc^wRqml>D;Wl z>96Ibw;msY+E02q+P;+5+k5##o^!(fc?S!j7RAW;=exRu)^@DwI(*sYG|= zbsrYY??z7LI-9gLQ!QnKNm#b8>o7-` zmV+bjU+=0T^pR$H-AH0h^cON&C*M50INXepep8vuf|1`d7&4trgR_~HxwoQvOG&OD zIXpgE>)HA$GyU12trNfQwJ-3jjXGdRRBOEN$R%L+eg~G1mf*D=`p`KzNi5^aNnZvB zLBIKiCdJ}hTnJ-9UHyIgzkV<+zdQSOPde{*r!e)Cq)CNki>xZo!*Q}(jjacEeZMxe zp40xc-oC|PhhL-VRbI!cNL`P&d*!;R?H2QKj@u}@$!20txKuFX&z%y*^be<1rvK*ePY5jjWHp^y*NZgkMFR0n5@@Q*>}p(y1onUAgnVFR!%7J)Gq{2W;TE z-(JoKv=lG*qPm2%&)=`(N-@DkL;2R~PSB{ln$lK_Q4Gqlq|iR7t+BhAf9@PRm%sA& zM?JLOBbU9*_Xg30I}EFd_E(%+SGI8M7Mp5QeexbgVe)R1q6Rm z6!p3M-CS=Y?OIWXkgLyt|O-C^%7I2Q&@^rtc9J#$1l3B4J6IeI8ABS-rQ^Cw}>KkFmvg zO6x;WaJ#WP? zOH?@6*d>ayHCi7{{)iGaaEsfwM>Q_qRY&-&26x!NMDla*?0NbNRyLIn*DMEIxtua5 zRZ=LhxYn4-D&nYWjLNo3t}7zH=HK-d9XvkKLbdN!@b+IFh3RoFueId4s`rHNTbLIZ z?xxtB<~6oYjood>cgL)IT?Y@Tvo5Oo#LIq&vv*9Lxn&{Z>udb?JDVUi|J*u;0tKG~T<_8@y4w6( zbG0vGQ*-wjZ<(1`OUqJtbFn+vgkfT23$3vDZE`!0DcR86n~n79W3(TWby)f|$%*2~ zwxQlrq}8lRlIp)yEA`mtepJ>Ue{zV|WA5TiRvv{TgR$`{Zq!Q;P%lB!1W&17P^w{Y z1$*GJoBFR6f)Out4GgkC#$Z=Z(K&_W{TpXe?hEoz6{&Rf+XC%vc^8du757k(sL_aDXLfG?GLWb(*uF>sWLRWh znK3%7Bdc18(J2PbZV(S+515Y>7hPvA$r0UqFeCozl@nIYn~@(n+7mPP8C+QAe|)&TZneyz zLmc3_WUmd<`YY(xPzbd7JS-`ncAnDsSnB4UdDxtn)?e>j|Eo<_ z6HS>(1ESL{DYc779TKy=?!CXehQjayP3_kNJrImq35$JD`jUC7(uY`Fuez~L3_hoG zIUEsg_ojyC;C7+QKVDcnn%(bZT;=Ris5;fI&e8KNa?*0>gYL6;3taqHj*7pjOaEc4 ze6e{~!u+;goz7ue7niav#dK{=HGV;{omDwEbstUlbjO5V5oZmu)hJCR3GBPUrKC^wCR+8JpqE3@kQ)QohIt!N>H9L64cNN7Or;Imq{S~cY ziCObrK|dIzNL(tu7ccv3KDlBd>8D70<;8n57rq-0-2GTWdkUJa|7dzA$3A4LSH9&U zds3oeB^o=SqSF)g59jwj3{Uk9I5jFH^y=AyXzSXLAEn!O9Us4+R8^k|(YfZ@7)6~As5v(~L%U{aU7<94F33$58r!sbjMNGTXL7I`zEik`iWX)fU+88Y3}n#SILTM z>0pMV*Q8q{4D|EuhX-febJvUYbd_se%dx2tV1H0mTbCUFplN4~jME+m)f_2VQde21 z)5M1^CVxSp_2QvHWtV+zo~R|Th*{gr+_@+Ei?-wVgV!1LMX%dDcnVp5?JBr@eP)+% zX;Ofj9*xq4{s&3r+Z&%Ib_KAWqmB{y5$Ngj*(>{ka%4S6K*zH5tNDb!$nGAE)3sAs z*3DVFB&jx7PV!lA$Y68;#V4JrZ0`s`xxusuXsR%`Qw^ignp_CYHW&)FBc?xers{h5 zNF-!@9D*ifI@&R)ei~a6Yt*ND9>-AgOR;uIbFhWix>aWj%`f(_t|AG%OZ!eg!};N9 zMck;0ztVwi%sQJbOk%83GwU6NBBd9nbHtOR7caz-=Kku^B5m#cVQ<|WTb_5H?t+rd zVcIb}C-xzo?_yjwCuF|N6jFl`XWPMz!BG~W7iR)LOtjpIhU z1=W+%gPCd@PX+Us<(S!0EuMWLZSZ=*a^hV`BlmI6kb1MM?PTx>V)YOCNnZ>@|L|xM ztIHz|_@L|TSz={r$ekQ9?iZ;{n?D-5TX!adp-Cx6n>_fn@wrBCW9hTOcNs%pw$xYN z>RZ}bR99Z+LoQ@pY8)9mWJ(o!-4s?6*8*J(ZQT_}PZa}ed5x$k>a>s|5o9k)*ItCs zWrz?7(zzMo_zRi}7#d)Esk2ET1UOE~t+O3UHCgbiKHi=3A&Asi-sDMk5Vd9P{uW`Y z=AydG++V>-JL*ovM!Wo7tifsf?PlFhv$~XYbY<4h+-v?=-8o!Be|_Pw%?am=w%3d! zQ)_l=)+Bk0w^es~ksXfOeAL>yYb2(hJy*+bb7F1fHp9>+vo}|>4r+S3N*>L=$PmFf zVtehFvYuBFTiCr%4t+l!9C3NyScwRl<7uC@oU8h2vO&ASJ$wFrQ*1$usvPq+3dy*` z>=uwtr>3WC=Bw4ZzDFUDmz}MdYZdc#^_sNT^4tNgkBofQn9GZuoiR^Zu$a`;61P3o zM&?kFqNUwvPqS5?X=r45yqY#*Q?IC>3zPH*y*r7Tm91wh7BmDyq=h{r*=~* zR#K7&zWR!8>f%qAnjbwA;ILH4Ic{?|Gg4}=Fdr**TQHq4GTf6ki?xk+#7Kv^9;gN{ z8=U?a`PJZIAwxcmlk=3FjdBdRcGT2tlqaPok!Mf%#Y^u=Uv|eVp500zIe(SZ8v+Vc zUte+m_Z)}vzA>0rjC2;X0Wq?_Xw=C6ixFism0Py0(b6iM0E(_wiY3CQk1##+DQ({B z!7rY8RGhYeHr8b^Ab-d*anrXO-nZi`53>*T52bSo;_&}6&@m=$Q4?ga zq-Y}WRhE(VGnqrHc~kb0q+P1aI0}@h3;jYE6miFi&N=iyr ztzGx+k;PLry80mJT_!%o;6(UI-;CtD7*lW8cGPlh%lD7TKT62gxu2$PKW22+BEEkv z6l3|yy~X(B+HPmfdCl7m^ywDdlVF)HX(}t0z5Q1P#h;M>66?&` zv!u(NxjTI6phWJ>Tlxzr@ejx&N?V`rJ`YBN9&QK3N^pq5XKWcp*_kF4HUK6NaYOQN z`c5g_(yJ{wb?C3nCU#bPo>oS-Z__(_=O>zY84PKFKa^}^ZF8g?683**%Nch%UXa-J zV`R;7Mv)Gw-7iMEW?t9d6x9%&yg4Ny==^z?xA-}d|LU;Lf_2v8fCbqLZjr3eeRzmxReX}E4wWoEeLKH@LEz7=%TJ7^8 zbF4l=zx}{Odh?Yd?qnsVsfwcGs>Y4=WGCLgxd3z_kIB;ZHWO*q3cTEg3vZR$%a)l2>dcALy9S+b| zrDA3u>Zywso>v>)y{fn&`A1HBy2``N6fdWUI?&#p=nA1}f>zt~1j=Br10fiIVZhV! zKC|}hl+)f(Pv5RHd?+kc+`n$qt-aU2L}=a7ta5F)GS~g?b)?Wv z*OxsR+m+3t=yC)K@*k76Hn7_0mpw4k{YG1#({9CX>96Mbi>Cd_S(2h3dtR&@Zsg{z z(b9!BRA8Z1^7BGH*QnP0;``)BmnAs%qddhO>7UT8LnDc6>PnlgCY-PM#?sO3Sl-$x z+We?vUO6FLIw3(la!2|^pO&aZwDPwMepV#M*Wb)GlPM`)dNF2EG@v|yaV6$dzy%Ec ztc-31PKp-_RGBCnR48e0lR2Dv=GYokrgyzIW9)D61=9IS`*ZD~T?u)f??;>3#Vn^w z_jTP$>-yv`9`ETYXd5<`6LQjdME-v0NGfI#SeSPB&yKd znA$e>i)59iw(OPsEiUV#fw}HzLHHeqZSP|r8FbDqHm@D&IVW5bD==4OYjcBP!hBa% z{`<@3=Xl;wEjyg=Rihg}#W>* zm+CngWe4*hQOg;Tv!-XOn~YYo7Tc+P&M5QAF}BUU*v8;Wog^4~B8e&|%VU2*KAD@j zc8kZa)yzJxWYnAP{}S7rLopxd=(Oj0c@nF@=cte*ft!yOwqAJ}>1_8pe8A?Eh2+V& zP8-R~!S7`0&=1oy(NAH{c ztEELFQt5|$UaYfS1V@(qU5DuE@BAEZYFx<)DwX5`#m-7YNNF?;+lyn zr}|$tGIj6HOy73t=MMehmj~oMX{ftpFQ<^|FP#Bz<>g5erQSrv7({P)-%vk-_b76-{oG5|$>+AUKRO zcBUT;Yn|rV)Agz^|DFfAhtcBli@ic9LdcxITpMSC->O`*7<=!hYLM`8=`aOvbk^DG zlC-N2g9JQt@2i zq22J87%Q!EY~ZcZ%dv_f75U1EkA;qhG+*qZ%u^4a&zi`refUaEB|~&?Vt40eD(jd_ z)fe5DM!|}3wi(s(VZYSm*gs>IXQ5*I>~VP?fBsiz<0Q^$FP|*In^{|;1q07cEmJE} zo%}%ba?<$>pWVyuRKAelso9HXu(N^>um(rS}Q0Z;;s zHJh_K={M83`}sU09CwBRF{EBu5(Ol~;n@9@2BGAG*!FZTh^+ zI`J#)jU6DJtSls?@T}+@8dtww;Wt`CsS5ugasPd+$?Tdq>B@uhx)SpxFkt)@$b&|+ z0G$a!BNahF!peUJ=Rjbysbus7B(m$0yBUx=0E}|gqA@5iP?`!W&=3?$9#@?gieC&t zZ;meOJ=yLX_QQx`F2#FuBsLMptEW}{c|sc|aypEMCeMsAS~G;o#KBxysJDh8O0JSV%dnRHR@omLZ!%OHw_kRqCUmcI`&Ixw5 zw$07Uj4Y9N{f{3jGm5V}(ia-dtpWha#}@Y^c!E%FaevlEy_UK%5RdSmiM_2BD^ZmA z=kEQMuCM1cDj(BcLz!OhV(Wn&Gc_2{>zpRiLqe;5etv+k2P$o`3*do}OMI7wCi3E) zZ;DK|YH{#RF+&e9n;rTlvaGP?7#9Afwg^ARSFWN`p}R$m42~t7tWDXr!QH08-Ckhq zOM5k1mQa(+dQLfJX}o>3@bTBFGrg7(JN=JC!YDlN1iK=|PhDni7PT!U zLb)76LKkA{Sg%^VY$98FEj(|U83K~_nMufh$+)`$(ZHL)VCq-;nTo}lM(fCIf=v%) zN3R4?QdpmNY7dGkYpezTSzSeujcn#cUhKFqm*wFLU6ZW4xJUhY{Ex;I72*S)6;@Ps z?+3Vjb&zZm68r-8&rAiAP8wk*Qu+~ZyOdRzuGDnWZ&|{4Y6`jg{%$dTzi>&BiQk=R zO??W5N+@m*hDT5Tp*is)tTNz1j7nb&mi$|()8+d2rbLUV=`VIGZz`to$ONQknv}+>wWUeJF>A4Atjk>J`O!c{0s1lkV8);(tt5;cw%B zPtF@QMgbx=l(7JjMZ#}g1?PN@RrL7?c;A5#4J82M;q5V)RVW4w*06qK94j!NbP7Tf zLgSbw8_M2M06^6H-FY-!*BQO|p$Hl3tGNDRM)+TOaRovSTv5UGx$A8{7X&u17Oa-Wm10QgeNv+88^0pXP{bG@h->*eyjX5LTICeQQHwEdJa;QX>& zg$ucl?L9_#x>Z`l`PN$R@Oz)7xV6=zdGk4tGgeQ}DfeAWFk=A5#5u`KpGFhY+G&HW ziSpzl?FZLIUn4D?x+s>CpT%ktY^7XpqC2A(EO~J*_A9ZqoT}P2(CQG^paiIT^}PT(P-CXp za$bfGZt33uu&=!EsIYe(7EqNpUh=r>Rq$xabJ}Iv;@TWIHwF*_Sa#&_{{XG!A&tTH zkv-l_EkCRlSMTK&U9vXZnyRE8^KXL8de@~Ll@+(H!C!va+Wu{7{_4=v>{I83L;xmXsAc&tN7k++3^aZsVZMY=fUZ{U)*Q?|vdM%PeytH*h5 zNot3K)KC;FCWaca+eGba@VfTxs+5i7Dxsj%vzY#|nu@%lYOKdLUS2WGX2b7aoK5S6g?Ro8vGdfE`nC zn8&)Bfvr)&hV?vK5u zE$yA42Th{sR?4%i$88%N?UX<-kpI?FNRi={O@61bX9BM_q5e`-@DdrfUoh2O>(_XO zoJZGEjHgGq7VKFwXQIrB?8d9r^1CsJ0a=R5>eVCaf z-;R@wk8oMGj_T13@<&G{D#l()9YodsK!;G5QjraqJ0Fsfbc=_N?|rU@d4+#a-mfno z*=xJvx%la-+g-1`sQf7m`=)O*S_c;IT3?5`VMkXoVn#fI`@=UGyJ=@l&8bDNuZ{QD z5{IZ&{gvnw+Za2ArG?%CmFGo!Eyt8}Ar^&UZlCqjh1fbMZ_=vxU|uxn<>D$lXX#8A zG!BVsHm?sn+AG`@rokk-p+F6qYsC(G3%#g`a_6MO1$0E_YBSXsemlhGlhkzx68Z)7 zoadML%Nbk!{c-5DItb$osptHKk~L<&r{pt3qHz^%%?w*${^w+v-PWu7?k4 zdLonqlqpKM2-3*}_eUPy-`jYEx_>t95XKvMb>s(qRgGw|vlRFtOU3G7s<*Q6dQS@` zstT3@X7GXxHFL>WvhXe2SzdTjNHe+YIZ;7WZ(svrHSaKf+0XLfiS>O+~r zx4$R2pp_S`CTF7Zy18xLv#l*!dLPtv#ea6k;oV?U&pJfaPvsze}ZoymTuju3K(IhP99j#HQAQ z@ZYVsh`?+~8U~6mA~`!Do@fqEwANN zF{G1{;i!-c$8thEa{ld|>p;FoC=#^Hf|{>rgIij;X7ip96mX-bSO2NVBXwW*b6_2; zARydcs(l<@D{)W`&Rv#mGn}UptcCvhC^V>r2E zJi8m(K1S4S#?i##L6XIE1ZU?drc1EGbWx-X!wBVc47=fDD3Y0y4&qAX-%%iJS87u! zGjr1N37#bqB8}4Nqkg%=__4+`mE2i`(5^hYWsTRCmUYIob^ksvMa99=N>_#PYnYaF zc^14|9CXxOuFqC{OR`EI;R!-S!7Re1Q{FM(Efv-hbJY?S$XMl)&NxP9ijQZD79vhQi>ci0l%*UCKA>=Wq^ z@1bY;xw^MbUvff68Y-JjSysPCVfXn3Y|AHXK#I~Eyj6bd?kyZhHT{{C#aOSH1-BW9 zoPRvI+g$Hf9z4tIDt#W0kyY2joYED%UOSmhYuqIbf;k6&Q|MY6=Mnf-tZXt0V?Ypy z=Rd$wL^U z`!SEcQkd*r)>%taK{xX}M`i;K&bTqw zcgUFqtJ0aTPUjNPh%e7`|4lnLat1hfjV)yy1HX*R@RI*)9Gn&)I|Z4lAo_|FWbOcz z|G#N&Kz0qh6fj_P8cXwuh>gt=u!jOh1B85KxNju^#%g)`ntqd(Zc#+UofL{myDFDu zRR7QT7-F@HCG~S3CEjoWCt0xJ0Da9 zpz#Ae>&%}a-7LDgqptZXa_+l-IdHW>ABR)tRXzPJc{dXe_*SmNZWbDgN|=nG^x!1U zarWM=gkUYD`WOji@jv4*e_LhvT6u3*%=!@f3jK!Dp$=jJ5t4=Ab-kiuN}?HB3mx7p$Z1X(6G&JY#FSkVq06;aB`eQHo({mq5- zyTDHrF_lK_J&!K8I(6Jf`O;M(UoBXURWs;^)Pt$UY`hO$`P5@i|8}WBt*H*z%`4p( z7)T6kb87MU`y_0Ot}7!6^4F;~uaR>J-t$}%-hfCW5E}6x3JHcvUI8J;f5R_;X|)2F zrNPW(2t?rmDZR&vA7VNPTnAwYH{o1M?+C+zlwCq!+pDVlt<#L^vfmX`gmM>n31OU$ zTU}T_7dw5=o3!&tzDTY-Xyojo;56$A%w|N(Q&=^n2BJ6pZ}y)Apg?-PW!P| zg_`H;@QSB~!csgW3-o)rIo$=?Puq3nGE`okgVuzFunnEuJA z9B-ITG^Mgx+IQb{zA|MZ;Zx*5e^D(c-12A9P6FOQGFuE4*^t$f+Ow*d! zP)N}$+vYw3hC=Xtt(z+Z@>@S$Q5&hY7Rs$vyZHJ>8hM-@mx>ZpV**R`~*4V$ECU4tkcL}s<~ z*9!C_Wo&!kNbBe7o%80_^(#T(2|gn#a(2W8j(f#Xe)}~mTdE=@p_ZeriG0=y6qv3aSFBE9@qM@> z%&a*?$jr>TQ#9nA&bNqOEO?rYRxQT6f4sN{b!ITWi{1(*gz)qOTR*25UeUJojE25z zT!D6(4o?QAv;Mw_nhkn#iTN0=ieO;cZa|y#D+r7Pog7&_+x=#3IX9kWh4TS(ZN7vv zPDP`^X?ptlSi7mpnuKUgly4x`9Zd}qlzpgjWS_UBSm=xSLOn@@7D@U9CXvmBME|~> ze-a%xrqpeL3D%0Ix;U=V78L#`^r0}WYokHHnkPWyjbcwrHkj}9U+|fJ>q6@(NHBsB z(1|tm^oH$IQ?z5wVn8qe1OiSD!0Z`oLls7*1v%;MnV)gMfsU`e`}5`pKlDg%v--(C zuGl-}apej=GU5t(wv)zSWTgdEaWXqv|*7wnz2 ztoLkh`QDvrjzHwA7S=AS=*z+Jarncb?z}d^>Ve!|+uRK$1Cnr5 zZ9Ja8>D`+sWT$5;CfU&&zrc0gqR*Bj(!i4y;km5^f((F72E;XjFH{6LsHRb>fWlo; zes%_A8g=AtY&w5Qh<8v!;PHgTKRDnR`{Zb5WAK`;RH}hSGJ_e%@dw^PEbSEu?5R?&G?usqGcmct0Ppp4?yE#4yn| z?2TEgwq0!fi4l!#l~F_f3Wbt{+G@S=o`G`PWK!Mk8~Mw`rp2HS`J+`5EG4@m2G>`+&^+>oCW^KAvWGWY0$i&+)26&Y%MeXh>a9e_S5 zX{=i}+N(8QJOKTcw>o34@0)x>$n{cDW+Pw5VTrHqe*avet;ud-Doa&^%VSZ9*6Jox zt6AN`#50R+m@PMB4qjMK^wKtA1lOWC!}3$CwC{=9-YW?9WH%=lE2@4j2dJ>>KHva>l@0TAt5*at@~zbk<1suujj1G*TWaw z?_D#ST5?#P|BPGuBYpgUQHz|G;Rx5SE+eVhSY24GU$@L#XuXx|1~G*Vc7c7` z4mJc)S8D5 zi*3u~`^&2q$0mJ&8k4bM7EjtNcSE~D@EM1zw{RfvdCIqghANM< z<9m$`X*B#-N&TweD1sXAg>d*fR+I-h%+e+^i-q)jVaTE-;u8||GyRh2+3Ok zof~MV4#pgUy!t4~7wvt0NW!9`*l+mHM!RzRq{l&V4=m!Jm(M_ddsRQig~<39)&6qV zlELEzoEgm;JzJhcYgK}!G|bm1hqn#QP-k&(j=8r+^+7 zlrO6^wxxj|)Tu9V2N1Wf${Fvy=8)nIb0{j3s+@+&C@Wa?^GJ6kw+#^MOi$T4$BED^ zFO077^qTj+peD9`l!eGm#oBZQ@0aSua~}U9h3anfkA##4L+@68Kc#-64|TefDt$W> zo%S$Sa9f*|GhOi6P227u5>ElC*??>c!xzurETYu*9g$_&-SRfS7@?1}kgnmVcZSQu zkzck|okA`Dp6s2_`M*L$D#yU^f75_;TM& zP-szC^ZO%&Mj0BcHk8cv4IoA&uLwZx!n7<%0}kI#`U@H$%|Li=ynsgKWp|FrMR&Yb zd4V&{g9#}?4J>}D-)}p|4wapE-S;n)>9I~RWnCN%P)0`PDwD0U7s*(U!pG`rd0Ukk zTZD{s>in6&vdfEVER=8z91(=RoLq!FyeRg)X zRVi2bmNTGqN~IKFjOg}Ga3{kv^BzWDqqWuZY;P#}kbFNe679bFLUyWbIUYlWx9uRHK-3v|!_0dIDZ-Sy-Ms8J4Oqgcs1j3R3F;{zIU>rDsk0~w-tHW-^U24X3i&)l zn$9xO!kll7>qTgi2@8sKn^E7leecOl)A^OEAFxsMfZeOthXZ!zCEz%ZX;;b{w_oi@(i=ttd@HuPT+L++cZRkG$p7YQX0dbUa3TneqK$kIbltZQ4YddbIOXN2A6fCC)9C{TbGc)lbahR@ z93iycB3ibKAP}3(*HLPHFGTavFmr(8h5BNswk(o`fyj&I5i>3!`A-v` z;744OAdeT^bwABY*(_ir#iNUDVukQ|R%^I zV~fk~)Ys5w&?iT&rK`GVt?Bt8{s7+&_SyU4XgGP{g&roSG0j)6r=6DjR-d!pOY zB{zs+^Xo80PnXkkz#{sitl%Lu$e*CBM)lGznj^5Yncu>uBaLNN0>*ks-g|52jxc+; z!%=Grx+@zd>$5E2)V5Tbo%o3#*jHBesd9fG1N*5u{9T`Gdk{9R59cj&26M9n0MZgP z^d|#J>7Yb6=0b|+>>DAT4*-m(REjxR@82*&f^2hD*ejlyW~Dk0Z1b1-_{b|Kqx#;{ zR8X{WF?1^3A;G0Cgz%np?XCT9{SeHZV!>=;FmWq0GZPHV{54w$qTj&yJOvOS2O=gj z!1y&&(>#zFUu71l!~CDyFhf(O!cc}dr#do4gk*0Ays?){_4V;)cme1F>Z!XF8mc?< zGQE@qhsMHNh~hqM=6z)eBe2{6hBGjf9Gsd$-shIY$=P|#>KHgqpO4egk|fN($1sxT zY@}rT+pk&S89H+(|BlbVZUFx=Vx3+E$I@~e9_`j>@2r-F9j{o%)LKg}Pt=bvVd1XG z<;R#y#6lzeeNfZ+u#n4*h92hCmOiYaMTx-*{eH__Dkmu++i6lZkt{!=fxnqzhKA@s zNg0LZxNX|(>F!4E&=NG#*_e23dU z=vdM%8}jB%(L$cZt95pOt&8ic#?kHS5ez#VLY?z&S$+wi*ksNQ^A>(iq=XZ(uYF#4 z{Ji*(oMzO1QoBZxXQFhJUi!jgA4)HWUi~cp1!u32?V}&LD*Y_1KCVdMN zEkV1hIjt2_PbfPKl?JZqMErb^5f|m$-Uo^Hf#9B_Pt0%e=cMibvjM4FB$|HvJ7P%A8DZm9IF1{K)OX`=b6JZYhtI7>ug zNqoPA$BQ|Ut}WYp47iuRt?|BtsAE+E`3Nr>A~thXHiQSzKA?g z;j{MKh=lHG*@^5?bFH&3M7%Nq8c7(rFjA2=Bu0;ALlr0V6GlVm!Q#8rTH+ zvXaL$qPygPSDa3|_}n7;3Fk-N%)dR1lZG>$l%p)aD$N(9zO{>3kyMxA!6TTFz^J;8 zj+3WBSWQNKFq@p_z#*C#mHy!#g;@ZLqhU6rlu9$&U%o+;cyn)?O64cVu>5-I=-`R1 zBULuo{pZ+^pi+a;{C79q_dW!CRfVzCH($-^z>y{LWb zN3*0h;j!Yqeg_De9-n~r@4T)A=3On`Kv2!!U!z%N8B0?Z{2@%PDwI*vDQJM_fGJnA zGHDl^lcSV!AG=5%cTN}qEb-x_IEZXElefqzR-wBm*ve5uk<)8~PMf^ByLuva*bQc0Gn%vyQ*z1gPPy#|fc~x31@)*!a-_I!4c8VugjL)Wp)tYk03VWRa|kw}6)g+~2leFp`&b?|Pu`2zXf3$81-Ln%N}ZpSb=zKD^*2$(kz|vi=?0$KBsJbR5Dm!l(FC zK0+Mzelg55&WQB|X8555#Vjb8DiM+O0B;xN>Gt9{x)jb zan%1^=z0BMI0*g0*t}%#9P!k1$}H9M;ly^6shU#e(iy7N5*-V}Wj~#R`RPz@wAD{* zsBQpg;9l+TzdpLX__b{*rb#2fnq5qdptcXi+mS71Nk+<(A?bL=K)I zyWUt1lx_5wW)@Awh&P0?{vp(DfSUA-kY20UD!gu7$*jWHJod#KHo6G5E~Y2lAFz9R z>7=~tF;X&odS8oN-7Y7l5U-yNmOJc;eeGx?H{p69@>jQKQ@=Ry-vQ#~&nN8gZFtSk zejTX`)V~WJ@iErOVt#r=a;`l|Y07-n(yOU(S5-)p(w|U-6Sm~ctC2*kMtbJ@$62Xc zAgqmM`2mhL>1^MeK`^7S8PAAxyy!>J^OgLBf^BW8)y(HLCMw~G?!JpR!))-rGLyM# zF0lHXxPhG)jpDI1ialu}S=H!TlyxkmszsK-6urmb7~}Q#*|w7yqYEd<;I@6Nwwjx6 z{;sSt+gYV`L6HSJL@M(?b%G=wnQMc`4_h}b-ai8Ve(cjpsDK16c4~CB979k0{^gcI zT={Gix2D;&pUojNsyBks!^QQ<{p{ouiqIkzKF`rSN)MamfHe~w@nYW}$G@gg3MBSr z+LLN8P99E@HrjCws%%$P+6Fxg1|Hg~Q2#|#%T(0He?|O8tONRkx^BjDAsO5*m_a1M z@u`>l%o#G3-T&&0i)^{IR4^3vLr{V3JlVNRwIdkf=W+7Tro6y}pCe=++2uK>mAaqb zy*lyBP>3_VPF9OmPuk-vPpcx4nlM-Ai5PDgm0}7IF;Byj7$n^%Icuwqq5X}s60+T5 z+#W0+mWL@r8k%;cwFW~9yWE}E>?NN65|4~RBOg=7h2<-1JJ{^#xOuK?<1Kx=3JcfxkRy@d$=pb-6=*zX-i#VTt9#hGs zV6~r{z?v0pscf-ZG+gBos!{RdnBePPv@J;S`Dopct;*WRW##jS8YmkGgl%+9f<@1x zNg!B`(qdXBx!=96(ezRVVagOqJM5W{`&*M5QD58;2FV=B?5&lYFGrf+Ln)P5u~ReG z|8jZGlX*|6CiFUf43>zGaSz1e`oU#Z>GX-5YVfu3J21Dq(Q`O{58U8VU0$0u_4tUv zI%G@U*UvOHxyMM8t2t-EMbSiMIIG^JMmG(=oJ-=fxe?FfC)%3s;;KLI@L7b!6**v> z*CNEqTXBNL#c*?=Ln4sReNn(pbsHh|0K1mgUIWYHU#(j(l)h_fjd=TCGB+zWX zl$D>*yhEij>h|5VGhrK!TPfNc+bCm2V5Qi6!2QEZ2Ro8|93#iK)><$;a`0T%pp$NV z>$C%yP(E*G+qlp5F`hP}L&<%vH5YR{ei~A_95jnucoDN_AD-iG%-DOpGOK7Iz&xs3 zGmQU2R|X4VxWPzO%hdVj(ue(NE#D=+{1*l-BaIh@b}+~0q()Yd<^B)+co<6InKaL* zbfGqVq3ta7CMML7TW-TLRUj{#R-8{97bRxuulEw}VK99U#@h_Gy)>}(o(m{idx>|P z&U%;^_@tGu`1a6obNg#>@v*@7ZYPgSO|EC8B{*rPn$OK5!x|iA%5=hCgj~Z5E(v&K zzBfD{{goD917~EW?#XC6NLgkB&Gh?}k&d(7{MJDZs0B^au~`TE3;J*n4pk}+&24U~ z)u-c?jY|^e4Th3WRQO)8G-y3F%Ecy|lxqF?XTVBZ9Y0BLD1N_|zUKB}A(=Zb*w82C za%EgsAMjm(oH2@ztE8~*5kcoPqa|VJhcYo`Miy^6%5uC|OIuw7>u9BR3!T0ca6sI;@o+05&PcvW>3>ErG8Tz&mxC8r4cyYC z+Bo=bzpACEk%IXO^~o#d+L~^|9p`}7F-=zq$(5EmPoqW%OSE>R!Q5CWis~P@Kuc}M zaKz{>Ev~oVxA%~vqkUP7{$ynN#=YG&AhNl5Q6T{>SF`c$dl#QSxznCEmXj8~3w&m- zHhuC-(Pp2zXP_P)9P&x!`dziq+*NDhHgJw^BnmIT@b?4o9xYxCxJ#_MS@&W>*0Aj@ngbHY>NbR>pMhLRSMdP}LqNWc!&d*;@Z@Pup zutTPgiY**xZB2!#2FfwS+p=ciC5YQfzBcMx&dygS{4~~IUcpX_BfRZ&eT}QL@giL;Bk~OOO0A|e zeL~PU+^5>ij?YVya6dc9JEeA4-}UgWEXVS)P)vR?Cj-UKey3iwQdB=8?dAIZ+w+c}9aCf=f| zJ6GAZWHhcdnA_+9JrUvoorD|C(kwIv(SAijlq{YrfGl5 zlp}F1EYW9hzJ)lP!?eGX9f%ea-wrK}m>&xx{WS$Edo@n~jmZzD=L zPmba~BxPKAKiB{j4KEZIhycZPBc$wD_yigz`pKu)-C!VK)~$yEB@N~0vS5GHX?>(w z4OjgSvni$upzW=M71SYWPZrLBcTvSKA^wY&&_Y#424vePE5YZdh zzjgxltU7seVtJ!o+r&jUCq+bHn9s>ZF{flqV^hpRiPgYp;o*Lm7r8afd*pr{H%j3W zkiMp7&xEW`!V4-H@}KV94kkIUEyH>Ld+m%JHcK4BPzaEHVb~{z${0Q-yOqvmvRDzzqvGBhttKDG>?Oc7((gb zlinnYXlt2gnMmnuyr*5dbHfVuAu? zng{w`y~^9ztDm7%`pP_d{ow(LI4x5ZiN#p^$o9pczb06l6VqiCUavIm*G=P;wd9Nz z@7MmGEk@z;xOtPol5xv_d6JTi2V`qDsjfSDgD;@plgrd`HW#L${y2wvH*R%iSx#x^ zYU27i#1Sar@a^vgfrxZyHiRjTHoQ3i#&h?GtBbLThYzX!>6=V5xs%9bkjTy5!z=6_ zR$mduH!Pox#6MJ&qPHhc{EZY>fPXj3^;NgK!Q%VUbk8lS1CCKh(snMztKT)S!|_`H zgJ3ss^Fur_*)!CGe8JC|LFh3Mt*cMMGE9;1xgC%y2tSqPvnEKS^Ru$pK#S2vZiGLQ zUe>y_J@bu>x8wW0&q8e47f;FcMNc#e`67-K;#_1oTXEeOe0rUq2p}0E) zbpBWURIeToLT?iW%%mbcioeq4u7{nWBE0LM?9H=z|JUbThy?vKhiT?r5z)Dwq(x=6 z>KllN9UJ1uWmoGghxJFiDyumGxw15+N?|iOQeMDBC^wYN^~sgi_HE2paXY=`(G;}1Kz)7*(=l~Gg>B}(eK7n-s0DP& z%bt@*`($iUM29$xFUe!5lO$#1m-gq?`8M5-jGU1M-myPW3MRYp`GzGszy#Z0110uV zGrP-bGTi{@kQXp2AXjXxBh?Lv8F^}z#P)YIxjbL5!U?&`=Y@>Z|L+ZW{8o|vXWG-r zQ(`7PX72a!#9u@oJ?E1I!?s7tno239A_PZAjV1jS=>1exF|MhfSxnPa(kl_{Ut4#;2z_gZUOL;6t?f~oYC;iM)V{WPi&gKqYqfl6 zlN>(SRa+C;{j5res@gdy3T*Lqn2-7uX3f*bk}n=mI!c?o z`Q3Qz5qf*6qCGwGm{KwVJ${rD$a1!Cy=rvtf7qFmuGyU(rrKBI7f-qT8 zXfjxoZ!U5JDo~8 z1Y%N{n*!bB%da*wvWSJLZs)~Dc?$%bYv+9f#5u^?K>#(|PH5u<>|Z;OYI^RY-|<

Uh%URi1 zt+YzL0JQJkG7B6TOM*?GRZJ;-QPjHHM!XA>UXKj=25Tr-gEobZf$emmd8NRXg&2`s zxS+f^hPSsb#v~EeAW?!msjQXX;i&t!|EFxh@yQ^QnT1Aa2s!HDWX)`zExMDN)!?HC zLOS!1wQ6G4xbPyf^MAjS;^t>ZXz?7Rx{gQZB+~`m#38b{&_$6o5_F6&pnR^^VIe`i_~LI^EiF-7^@vPx`-#pmjAewbOc*4gyLIL~Pu}lW#+-^68#u%>UH{z%_FAJEUzb zHR^q(DN49Px4f$bQ)#Tj=k;udua<+~d{So%U(0MGil>z^7qRZ?lk$|69X5VbW^|4! z4t|M+5@@@ChG{VrF3k-H)A+#f#5E$D=idN^ihi2MO_vS=bbwTX;*TC?F_?WY!#DK$@LQI(_LRL~?b=`m{cB0FTNc_v? zChWcXua?>O722-FN;Gk3 z3|rS;{n1Kjsxzg9XnN+oMwcjZhaL2RK^K6+VuiiIoW523(?OvMt@csPv6n!UF zxMmhd%G|NSE;rXxBy}#eZTgbU{J$JUj|@@x1jr5+1NcD>hE{6e3sM7D%T`!JDFnpx{e$i ztSK6&46E#}yxmvSRP~UX{f_AUqFt1{%ga4WN{kN*&=l#W!Z?n^Pf$`p7q&E4*=$*t zJwv+RuCKqR>^|1I;r`2JUAC)IO3=9G%`URF30kmh{_l5lxcx(taY~530C=e%gEKCb zulayz5Urmm!g#92=M&}d(pQ=h=t-8ZOx5DB8W@!y<^ zn;)Jl9$Qugm>3W#_Qk;?OI_c5It~w=PfxkK-*hZVr#57V^`{gk$jB$AaR0^rfDnFx zdbd3z&XPK#Szukt1GfV7E`S>>`X*b^SkEJDK(Gb=5e6`CTF#rqBl+sMQFJf< zLo6OmHzuRbw-C@n?9Y!aMc;DKJ*U?j%k5=}!tX+J(sE6!tmpi`ppyX3IoS3O6wd|^ zYa-S|L8qRO>?u-0NT>Ez*;j*Sj3$Q>cGS+60vX9H9cL015xO&YHY=rye<#cf{?nkHcwrix4aPNE~>fdFIovrj4mz;7E})sdDNE9}0+ zztgMV*OfY`O~j%vFHh3_o3!W0SjpISdE`ESvLU|B1BE6RrE-*8s_PffLN&M(9Fy#2 zjSe#2`J2pa2M!%rI^G3>JqjHRw7mol0fjRpxRqiD4_7N4X|+{$QL~>m;AR0I7Q^9; zRH3aZ_UT882pmucED_;t=er=3Gk%?@9lO$`rnHj^4qgNbU!s(|`M5Ri8X%Qv&2eYibxU zo;prNLP|Mp?(x8GXg+gl!S#*NVV&h7Zs+^F4n43OzVLm7PtdC?>SRVCtKs3R=q_;m z*;{<^>0a##_?#e#LZeuTpT#=x?d?gPa^xAt_}AN1;rpQ}PBF=V!s4-UxE$L3@7S9z zrUvP}4nK>i3yS!XZx)u4@jEuDWCbN zGS7qHy)E=9p3cj9;sHqAA074_fF-c6lRAfrDT{;guvYx zd4USH(A(H2nd+2`Y}r4j4~fpt&5F9Hn&~2-kbroG#|dZj9+b@sy2o%wtpn{^;M0yTmf*M3N}7X z#%O;k%91Hb+Jv`ORg9`M@`Lfk`#9Lxe-Cwv>-3WY} zbaa20z$-46qQ|LH>0uJDlgIhI^S198Pq3M@9@$UWH?Im2$x^hzKiew?`g<77`V`g8 zWRV6Ms^7gG=e2C9ICp`qHTeYEj>_ImUvb;*M%n}Zu;xU?@=boin-h|n6KXJr*AmIi zdNVji*~)KQX?rslN^5=ismgpVFLD5_WO)~0I;?tce2l4K6lILT_Py%fJuIgmto;ib zJDvSSr8^9#E*6(@9UfnG%a;wH-5lsf3A^2%$zcY z+Svwgi`LXL?*Ff?^Ny$bfB*kMQOK6eBuTuHm27dcO7||3`w(RUZ zvdP|ikKc9ryg$G1?R&d@&tJ-M&hzzrUeD{gp5uDl4Iecbogw*ParrM$%ZBDU`9T^-ugGJ|--W7* zlPyf@@Y#qmpQ48wJ4v2MHp^~{P1WL|=KBLR}rfbR$l!!ow z9Z!$LocA|m%Bqvz3;ID{F-NTUNTsHa$R5$Kc9wQaZps{N!|4=*hs#;>lk}&b=g!?N z)bFlcH2;pba>z65NH>XTayo!2MeEngqeU<89BXmb6TIHCK0&vX1o$el#?x(D$oCc7OO`~I*uF~mE!krR3~Q0ByS2zA=kZY@G@RC}t0q4^%&WEh5S#}4 zAksIrIRkPvTM!ANC3bHPyd0Yw%^2UBhppH2sUa(lN?gi@zshi>jR+v}P{sMJnXEs? zLQ3!TBb+ccKL0-IX%Ix73AU78NT3h1(2X4;RjO#sguSU-I3}|8E{iBS+>r6qLDHvJ zMaKA5DP~nNGmC;1mckVlxOa0ytnMw` zaam2uvfxl@R(1)Csi}idELVm@Og~3} zyQHj^ko!X1z!=dGJ*SuL47ttCcX~oig0otQ!>NZF11LDF&n{ra9VK>pbO=F_HCMHy zoFAwnXuE-l%_~sYQtideam96q1M_gYH?xTeL(+oIL@?%I8=L28g@X3Uf>mh1!&2RB zN}I&-@4SMS#iSybp_KK~{dP97h6kFA19ADo{G*}_-&4fyL<$q}dlxy~qqPj4Yb>W4 zBl^I6`^vY?%Ch2rMqfkK<(^Eh2LkdsR_u_at2=odi*Ns(Gv{q{aRmmtxjm@HNh8Xp zub-Y}{J3HM?s$#=-Ws85R(@Aj_I=bmEcvWnFB>?tQb|Fm>{ zTN9PUOD)}*b}c1dT&N#PIwli674L;%IywD9el>d)QmA7lNVmZ~)!s`%*d%Ned(UlLS-g1{u7OW8cccv}XfFkf$l{)b{m!s?Ecj1GU+cktoM{=+Ud|3?E%gdYjm#^`XsL{-?{$3Sd zYy5?!ejbaoC1qTPqcSHu)QUf_N7*n zl?7u#m7Toxr<%9@v0Sg{&(kmLyOKCt9IHOM4P`@xhLYRfL0~TdPZK(xKrpQ_hWZ1X zOrh@Dff_U4cXBa)WYoQyPb=xav3=ftPn2P2HAphQsF-X+pS)ubb;oYa9xF9)g>cHY z$LLd%Al%IIQ90l3)?ObzB1Z&C7rp4594SkQA@9HqH2uAvjoMb_ud;O`^>PWXEg7}d z{llP+u{fqJ@jW2#U!S=&6sWmBIJIaxG_N(UT^Nq|hA$y53)iJ4OaAPzKhIX06eEz9 z#;>gkVlce@QpiX1KHM>P2+x-J!`zj0f*nG=M(oDCZWf!-G1rqG&~f-;-d)5hJ6Dv) zkfiNfrC6g1v)um#gpXSjScr!5qfduTA3ip0ziHUEP*Vhs-d4rX7aC0cn*MQfuVP6S zbr~acVfR6$s?7t;`*zof>#K!d*ZMnq;&qo6*+-t>gnnA29ef7+MlnP<>Il}CvhCb{Hz}ZZJ4A8*79Wm-ju)?~Zx^iFOUIacz}P^Eupo(tf@Es|rD1mLg{I z;PvJpkJ6FcdhvzJ(Q3ckP3kpVW2;~9LeJ*bC_UYJ1Jw4|{*#|TzzO+zr7H>Y`W>iL z{Gv+U_IL))`!FFsQ(HKc$_e2>9m9ak$>a@pR-2Y5Zwz6C?XKZ5$DZ-Gv)o841^@Qa& zJsnZyI6V5QuZEuIjb`UQ$DKb#eEw@d;~e`Dzpj537{59}!`o8JKF3_DDM?x0t-+ z@)V1@Dpk=v^2>C%a?d4H1AVk7W5-7SZuO$Q3?JDA4aGrY#9YU&HjNPqaXo?JJ3iD# z>q^QUg{9yl9@Hq@idqeEEjX){nf0GAI98t#iElKVKH5JS9gc|wW(OS|*AbP?qH(g$ zBti_)x_b?=9(tp~qV{EI^V7@U?s9bKbKYd@M8LiX#;L;vVX&hIzgc|%Tqf>GS3CI6 zDFk#1N&@^vcViWZVXOuo(y0w3>&Sfzor(z`b)DZzrwW#oJ6qK1;Cte&7?AO#j)Q#)Yf1pN1 zwLhQso2pCwdg8sdj_hoggVjzr-#b{-#q%P4h=8gNLnuq|PKAvLj4!Gb82UFPE!oUj ziGwh1{4;O$yCwhg7?)jrfK)J-%UP!j`TsZI9yn(TeZ6-ld3L{maUHb>WvG&+PLs4E zSx>bm^;HwTbB=KTdi)h)>;Cmd%(b`fHu#CTO zL~!+x8*4e8}mp!=p-3V}9;Z zPx{=z%y~lSMgD8)sJsFdb5-*R5?$Io)7FmE2E`Uu<|`=}Da;geowl*wvA;KNR+uu@ zD$duDdd2wsMbdpD8#WmjY^GEo2eu4mm*_8{xD+~<0^IE%;V}LDWIY3gAFlgt( ze*8c!tw1hq7XCa=O)5N|(b%mvybFtgfy;HwF?qg3jiEKST2JHPL-VGuWave;%3YgV z3{gg^p&p|JPOp&4i5FCUb+qsW$6sGuA4E1RQ1TX)<+4;cc}d<@u3=i9 zr9TtDGUlV1$6?Agc;4FZE9RC#5Xr{uYdb2w1Ft(#+V=`pL+=Q(8hNfG+lcOIs48n` zv;X0}Pqq%9-OZP~O>lhgA?nR=WS~j|7c&1E?eE_$7b`pE{v2&Dosfnm0{Wk;3Ln(1 zqFF5r-X9WP6^`|dcn7v0zIk||z+j%}+uCt!pLX>r@*j}pw2N&i4;fDShJl8b@5;F) zgw+i1omATE*w+#LNWShf63TLZgSviNGS6b><>WmM{MT{x9(jd!w3`}y=B+`CPc<=~ zIGx3Be|3@S*|>1G!9NgDRx`C=Ic{*&ip<$^ep1ofR9;NsDWZw`{ZXT_T|^Uto7dv_>a zt=}=M?@jD~YjIG#<#21C9clU4-Y5CxQtpRTbbm(k{?eu!y*bOsPraXSvQVCJM7p0m#<1%ToYLAg}?k*InWa}f0;VmG6X%7&-uV0x62}6PV z8Pqs)R}@Wef0g=OE())$wGnb(Mbd%{w^8b*JCuRVEH6_L5q*e`K9~-;3EW940d|08oj)-( z6lxgj?NwTmlCZiM;#P zK4>w$F30$BHr#uja6=GVIcaj?X7MEfK`D_sRv<8U{IWM0W%>&D<@TDG6lN94eBt z1l2j|VI-1OSnzOhg@lEj&4Y=YU0vJewo_6Vu&@DI)!j92mXT)>NgVdH$9PfWxPrMW z+E|p&KTu2cZN{$eZZBq@Xum`I_e{k;U7=!!xewNTy@)qhTm zTlb?0JkLETLbDU~SZ$0eI4hsqY|^R-g5pY~Y+6XiZF`-Z+}xmyl$5%@oi1d1Ypbk` zjG}06R#xlSn2x9&^zAO++JTPxJC8;voO$vOpWmxz+m0+zfu_EZ8t&fv*0T3nR=Sl3 zH7-s=ErRa&H*adt)f1^VY*dDA{mBi+CnnC9OH(MwQm46d!o<<5#whTnPFK*7i;_49 z8M?aqlebJ_4fvIzH2f(O+{W!U#&NE|!^?-1*o|F`T*Ta<1T5VRfvEHMx0)u#3e+a* zRM#-+E&`G+3rV(F*u8z~yEAr*eO2yb1?-!Y6VU)Apb6bEd{+wbGD5Vs*bU5DC7v*8Qu4f{PXVuXJ?pR#yhp{lc-2;e*{Q%9$Fs5;?3Qe`H2$lfzD7ik_If!Uw2Y8E zrV5i|XbKz*&{e;RL21U1D?|B(tBF4801vr@=?x^d={M*%c!^-b?=M}xJTNqrDCqd! zLQO@*(AXIJ(xsT#Sjq=z{wHD)>wc521tu1je1cG!)o2M=p}qYMU_WvF>VPHnclyG+ z@DxfdLd57bKu-axu>rl;JvIhhW#A`IPkBYAqd!Ru{hn|I=67%3a=k+HED_x}FW#Yp z7eb7~OK@FS+`_^_Lc%8*?ZOZUA2U#1I=)1E2BN0b>*paQq2yj&s&VeR*)Z5nuHaH>6sOP+?JJCX?pwzUP+Y?IE4k3q#xa-T=jd zZZACB-vEk8q7a)VK>trSp8h*mPafMTF&rEm^fTjNx>MHQAM1DW7SmtUNaFY-(hZy< zeA+9!MnZIiH%*gp2}k zU!x`4C2)3jzIyd4XgdQ}b@v%0`p>7=m0|O)T$CzBTmv9bIvyNe46uI$|0 zJMi8=&7sXo#hb|AzkhRba)LQcPUCo-`SJ>^V&{ky)jZ&ArWqAFB;`6)dtk3$C5D5He^j(0qXpV~o(F?$pwgYYb zN-Xwx_s9qpDd|fKik+-F7+x2ydb~00RN2zQ(2|l$;@z z&?itvzefGQ*VmUu;FWZhC(x6j<)yEdNQP8cB4G|gOYX6-7zlPTzq7-UefpFv2nk%% z={QiHnV6VZT3YJp0PYID%fm=nhm(CPQBhH9YU;e^x;lf|UttI9<1aO%{?nrhN=hFy zGnW7tSjH6Ed@|)j2>So+R_uX1Sh*RQnfKsHad8WZi|1S*1OP3}Fku&RIYIAMz?+rlWIxx|Eo8@AHP3ivb+ zyZic-tHLW526}sYA4|SKnR}{hYG$gijBdoBN-rZ>Sy%+Vka-Dsi)}7v=c7VlRs)tE zhUd|vTEUG<33*vIZEbBvI@6Sbf`Z)Kj;5x00LB8z5@wQwKfd$cfO@)oV&{GQUi>1z zQQV}gEZM5Uf`X4H6tUP{KN}hhw`W_BD$v6pJ|uYrb}e}Bbnwx3UhEC&1^pTyuL^dP zfUapVDAGd+a zZ!#tnxI zunclyC!vYe3S1u?}Ir`kIn5 z1EvH5fNJvOdU}(yv$G&xAj|lXT?uWCGyV>sf~HXwu6>aD_3PIaqd7MWitI5u&!Ok!I^~g1DumiVQ%hP zcd`Vg_s_PrVCZsTu=e*MA?}+~7(?xCZJ@Xp$74pq7l|2*t@TAI6XcWTr=~XdM{Pv+ zeLJ~YO@Z%zj7RFiC?(YlJ}dp}*9W}3xBOV8i(B^bK zd@-66yvy{>G%|LMN@O>j~&A z1Ox=&`PJ>Gg3o#Yegq);FBnR-f%?! z;Po8wcai^p<`PM^IEY2Z!93kIQ?F5R^i;vGe^r8blaQ)9IiN&QdNbt+!z^jaB`7`tL`%Tm~ zMa9K;n3$G##Sw@uMB5`s9ez^KOVFy_OXcH#s z*47pjO80eEZQP1j% ziWA7p+}zz^!O%NBus&XKFyXoZQ9TgJ0H|b83tpI?7qOBa4@$1AbQ{Q0g$IT8+^-L} z;2uY7&BfV?*83R5h{i#eCJ6NQ4g>%Zus+ioE{dL^#K}Teqq6Jrr|0C2`nk9DLxU`+=^9M|F6(va|CMGy$?n*QuWvXANlJ#N6B-7}9kL3PC|Z zZv60s1QCnD9Jx;q%>L+NAiCMmPrTXhake)!*A^We6_p_Dy5AWuh=G9tkTsx46A}=x zv#|*SpaO7FF#TnbVD5hsl)zu>*S>)vgzTJ`hi7(S;YnI=e)X|zrV9J>%0uvOE`ifG zFAk+tRaI3~RE&&_tgWrh%$7il7Ku!B+m^Ag$R3;kiyQx&pUQXC*S~~u!}RtFZ13o} z*#A*O92OZl0BJq^GYA|sqkuqMAS}8&gpGhf3`-73-@u^xxVRV~IHWG3U#4X`)kbaC zg&S=5mQ@+F3Jil3m}gz)+ha+osVh)(t;fg5>s5DvX&->L^#m<3F+}LWf(;RLEW`HR z9(fRQEgn)+CWc7aM&7Q^{1Qmu9ItLF!5mwYa-gjbB)%PJy40IyW@fhbV;IdJFD-qz zzJx#sQ%3n*LU^Sc8yKWa7y@t_BIEf;iy})TaGt4&9ZG1I;B#NmVi7ksH;cL*Zk`_P zJbLsZI^z`)fjEuAS*+W=}T`}e{420MMI2Q5bsV%3lurS)UX3tKB$>U*e3wWNN z`r%P1DJw(U>>)dX*15r&f%NV(A&*n1fRvkvr1VGVbYB2KqdGb|xOjN5p3sNRAJ%UQ zz1g*fnXQxwdl%4Q7myrVj}$E>UYw@Cz`{Z-QZ z`?Dd8u^P`w@SA{CHqy)sX#R5<8=GCP`g0(xlpUZa#rTrJA0XThfyj3mvlhH3Flk!l z@QpP0n{Rr)3ku52%#0OwEy=^Oef27$ZSLsk2#heS#ZbpYMy3TUHs}|m;X|;c@ro1| zoKH6nAK#C|LnrI2(s+1y^78WF&HzbYQc?oeSz1=Mbju&W?{R?-tjA1^jCjranH?P+ zDJY^L?8t;OJ(iSQz6Jhk{jX)YkWf;_Mn>{_p1V0Z9*q95A*6X|J5lM(&(CjDwHM`q z8N+SzOj5E5f_|fc9q3*{jMdklf<^(L3ZmB@DHh@<_sfxhWAu)T7>K$-2nz`f)hn@# z^~lrE)3X7qUR{0Xy@BXX{p(@=*Xrx10NSPCK^(d*EIi)QVzx8iQC`ss&iPry!{X)(eaXom%_#mfpXSn%4# rMB59zTrmFbX`=nni}-h1-`wSe(zU7Cvl{R^4L&yacI`fvUpuxC|1 literal 0 HcmV?d00001 diff --git a/doc/doxygen/chapters/images/starvz_visu_r.png b/doc/doxygen/chapters/images/starvz_visu_r.png new file mode 100644 index 0000000000000000000000000000000000000000..3f5c968a7fdaa0332ae34d785370cf2c1f54c050 GIT binary patch literal 57891 zcmd43byStx-ZqT7ZIMuv5KsiAQ7MsBm!wLAw4fj*9U^5SAs|W#Qc9|a1p(;} zK{}*cc(0}VJkR@nWl_MyN*-3Er%=zlL!tg_oQ)3;^GlCJy-y01zY2bo7@EdnLoxQ7?Ud1(6LtmHmX!?s!&*1F2Q!=oO)-9R25!BXcUHU?Hf;5a~Z8>JXf}h2amQVEy{nng2%gMxBND{rps+{VA>&YcwHB@+r0c?c3e&Zg=W`@?bfst|w(S zH&i3)yvWDLH_@EZ+LETF5v=<5);&qw#kB4u<%CBUzDK!lBuz<>gvAcMd}^x|-e)YJ z|k}cz-@0ozmmHP$#HRmS%%g59gJ?vm;q9s!#r27Ue(PsD3=%i z)s%g!E-^8&p`k&2ao@MI(p~32Ni$Qfh!ZZPl^J9fUL?7~5+H50VNB|BowB|$K|U=l zP1teDWuh%JEH_-xQb15Jiucz3r5wFdx8qWN>#O4#KHlEUlrnek7G`8vt}RX$Xv)b| zW0xkzy8I zXX)tZPM%DVd2vGY?tEg5=$$)v_A!cSmoRg3KGmn5I{oR?cx#5g;jgW!i4eH0*_y6%|_QPHKJy}jQu6U+0h2i_MKzj*%q$dM!S^Yf{x zsV6RnPOtqewar~9eIJ(i{=Lh}>>$Q@ijU9E&hC|v?GWb5FxQ@bCOY~kgMUbPI7hE4 zK33R%tSsW{6`|ED?uV`NU$lSynrhiwqNu2-{o_??DrbzsNMGM|L&L$rLHorCz488L zkr!s7dE*_gHpJZXRUXz23kwV57a}|b%-j0L#!Su4C%%7IyzO`1C(d3Mn*<-iX1LUN~)z$dJ)*z*gicWYpt&jpt|ei6ciK`7Z<0hsyf+KV7oZcJ{$f6 z-#69uvT{HsuHnGRt9P88>8Pnco8tD-9XZmKu2Zz?tX3ltqlyh8JN;{^myG&UAGVEh z!d22;yUscEYXRm5)U3%j$Wr`Y+; zn>St~wGqOutMfwE{beKdZ-#5aD^GoGN>V<|t)1AFnfSTbd1>p`tseVmvaAOxPT!~~ zD=*)^eS4BHtE$sdx1;#4`9?o_VYm5uDQ<4=BFCTm_wDMD7~;=2oD z_=tGUYNkXdVbT59Sm>={pr;{ujF>x-S1 zPaZw$dhgWO+gqBYw9Rcza;?@^@<`2Hu~S-f7cYKV>T&*AywtOI*RDjhh2@zWnb$ar zqH8nr@=m_S1meD24i~^gyAf0r6rKeIQ86$?K7W43tcBB}v#LsV=gub?C99?%?rn*W zkGHU}&~usDzO+4QBT`jJ`NizlfwSD_!XCBYlv-w(i7PxB}#fu$0rRyKJPk(>HSXfZ- zMW8FsGD_m;S=ZH^hYz<=aOo80Ncl5h`l^Gu=BB5mWn_-Mj=A{c+_`g<^{9W?AE-9@ z@@<3FA@ULD?6LiSzMs2Vt+#&c`&sgEkon_o&v&b*}a%1gX zO-+HG-rnBNpFfxMXShP>F0`lPHTqf;F6h2CDQH2Ej}&P~@r)35)Lf=J&rwIbhlOIN zFsTcY?~|*TDZLzSTg0D!=?W9r7V)WixVqYc+~>L0zEf3SKyxD zmH7Jg>ztgN_wU~iam&ieUX2nHy-~4ms@I4KJNFZr_)U#>LzLIK$o(q^HZFb7FDS_5 zObK$#9vvAuwa<2@|5AJ4L##A*9v*+;owyBK?|-PP`?^Cux8>@qv(}Y<^j(fZ0n!A5KaOCW%p1Sf)YG+{MjPTXZ&X_JmWYaq z>dZuLq5Vs}D<)~Gr&x~AoGygz=PcVBkdl(J-iWdj{_-U@cU?zEm}h>eQB{6XQFVIW z%as5=Y-n2HpBru%bWRtJ1v~l}mP>zeH zi-pQ~_Qo5H)}2Pmiz!F8mRzr&m9*sA;Fg)4b22P9xN+kmp*c9rX{LWWMSDj_)#26U zW%7C)XqFZhC8z>qV$1i|P7DnW^56VSp6=`CclFw}wl6PE@@VIuGORkd-Fb&GIrG#v zm9AInD{Gd4mo_3rk|)_8r>9$}s#4{Ro952GxM*Ntz{JFar8kO?!D0||U3HoLa$wBXe9NvgRa>H~l6+`03wznD*)*jOI$$1>^6v-}B!wlX)& z#lzb7cyyJX6iTM{$e$J5e4;^D2Mf~ZyiUipTyst%x(2#<=~KWtA8HEp&lF* zWS!^2mfoRsii|#v2@td2r8^^t}+0O-4S@4k`reNMHpvG!AZb3NN-m^OJML_J%?ePbPKO{dWAB_IZN zPn^=I2PvJ{>c|_^g2#^@HMy7B+uLv3y0v6=ECm}pKRCdi$<67DMC>kjOPJE)G zzox%`>bjAnQk6zG*P>-fN`J1Yq2bKAbDE1%Ur^|+Ioj)c1vjcUZGmKhA|rLV9EYE4 z;^@Sdc3T+?$#SQ_yr7mQTz>iS%C#o#3>PPUIU}R^qN25-8u#x0ih-H@Qnm5XQ8t^h zhYudGb8sAIV#1OmYgeKJHmkaOn7p2dOxKsKr!omlD?eVyQ+vETYyJA+!;7VEzZ|Bz z*tGhyb8_sOlT{BLI`nV{>GRvVljAQ_34hn*-a8aU*Yxc7);sQy+zm*vF+bXP@yUK` z`Rwd$V92f8w$*(7x?>;pQSx056K%)V47UYH=h%;{-IwV5{{1`whf2wvnSSiHf)1>Z zkrCr<+qO-4JiB|>V(R>ZLw{2_8nfS*cnjw7;ll^~*Lu8VvOW7r-Eeh_fa0@|kXFEY zJ^&IR=fR;Ntl;(!A0#&Xm+8;hg^D=MVm;8))7QNc!cfj&ZBIr0z~Zfa$(sOBv1Q8^ zrht)f%MxpAYa!fu3j4{9h1^zMt!$$jAX?{zu^^nN(|zaZ=)wV`rg}>^BOH6ai(TCO zxw&D9YK_zQ^y$;Yg7)m#MO-QNtvu>5&auQqY!wcc662bmK|C2oGt<)}*xC(oa#)0~ zA|i08=EcS`b8w_!fWYdl?qy!apPwCj_wF5V$7EaPK;PdF4Z8(aM>xiHBnT%?oWNLL zoRoL4vN~%!blKF@)XM6Vpq|sO)ddg{J4eST5vQ#C_x}z!E(I! z*It*oD@?MNFTb}RZwcc!%OB+i&ZrI1a~mBVt}NE~XB3YN2*@<5p|$vUSrmMZHa-6# z<@<#eEuc>zt*5(c$;7i2hccx$_>gv7b?F{looj zQLc*`Zf+acs9DY+{u~?@q((6cM3jmnd#swLl=(G-=w0nnH`n=*`d;_VU%^~j`}XZC z*;qB>T26EwtWMGRYwLL>r3N5u9^g5!n<%%{FO`+UfW7f{&$HfrwCRlQ(WodVbYf*9WFOcIN_f6OI zcO5Uavtu`Eay{Nf(vgaWEx25NAc8{@D^C99nZdTq8-THwu=8r8#CgK4%ep7OVXNa( zyoFV-Mv4dl=A^f@wJldNx`TQdnVOEpUkU$OQ*(fl@@DP=@YC7LDZkkG({g@s8M<-T z{Z>23h4ho3s@3}?oyiU3GkNasUx!=KO#eA0cRJ~e?31K33WqNTDTW+czQ0nZLW2$M zzr}Y-YL)d#%`Y=9`pZH0)pn_Oo##Dr)=HxL81yKQL%;5`1xR!2sW_68gzZVG`mMVd zlUxqkf7D*V%t&c zoy4DfBa!a9Z1<9JF>E97KbIN$g zYV7`>acgEIbD6a#$eX4aOmxhNYqx!!Y1;Nj$fG)ZSZ+LjLh~?#|1AT9*RSu8m|p1b zxx1hj>>^~NVj(%HcX~V||3^{SyYBP<8f1& z0#&cN=&caiUfC+8aiP@DQ>Jc1t?M7uxv3&vE=28KVJVlfNtzOO?RP^-4Oibp0IAi-`&oTI) zJh+F(!%Mf+rm{n4Y(H_;MaO+OvZOYKqK(?B>Uhw8c46U4U19v6b)X4!n$}bu!c$XI z(@cLU;*LlpFziM5yR^h1`5d#}$h>3|g*4kGU#KEDK`mFa+-Ee1nJL!?L z3akfbS6rCRInDl9oa{WzqdPG=s*$!?C$`4P&%f|F;CRg={UngEd#RPXp}Vtua>?6tGCT?48P;nCC8&~Tb)JKcUF zw*~jb0c`WQ-IwX6ao`St^DQ7U%~kSREw>?evrWMRuy>wko?5vY{s>;T%e_!8V(Jy zGJ6kkp1nKIN)Z$o$a(E;v|I=`00oFilp^JfRyuD0(5yjOAnR;4n0^;Ut4^$5#XX(ox=#(>E}wl?)mylED;K8N)LBY10Qy&+ zm$97Z7adL>8#kr0C zes$gO&`(WE!$p7_9qfJ6@2E94I;y%Re6QbH066wsd1WP!W{xqW4k%KcnKyP)pd?`{ zrQ?q0wm^0{&C6?xZHn>)l&b@KBQ>w7sR>#d27W*hnsus1HY5m$xPmr6r0q$Gr@BP8 z@vVMt@?Fr@XJ=Cb~X-8o=aq4V-_t!_dfxoy!7WcdsRe zMR#1)8%X-c&`?XE-DQ+$xF(ie`7zhkyPD@UE#DmUDc|>L`i=a$Us>j{a8BUqYUjqB^{De4*fkDSGBRMGAk+L1AG>XJ<%A$Sory=tA~Cdt7IKe9_LvsH1?nu@c(b+cDp_ zYG2j2wnm-Oe_*n1lo%TPP z(|%VF(%VQ;+2^GaK*g>s##{86QV6&C@@Y>=h88SZ8v2 z-;2d4eO(ljjGn$;*{G1#hJ?rGb1CUd{9GGXd3=2UUy%mscS{-(tHhLBaqblmXOQ_J1Xxjue=0%y+bAthCHJc@r# z!*LLNjnm@W2^XH{2!=( zf1Po;-GZklwTE5u$%?0uk+l6ubbr`@aq%)EcdB;bz09|LPq>sEIWz958WKnHANfMQ z5Y_LuM9$}3uHWdkZ=M}~bY{O_4UIqhKb)ebQdWjDIs3Wc%b|Ue{Lk|9bPH&j-OWDK zw#T>FOpCv|sq+XQ@h1!5#`kR@sAlZ$mdRJQX_t~~zql=dpf%G6NbZdP>_K{$UxikN zS9#r4g?IU`F5lGon&dK;UR4g7!-L&{i3ejELUd+$Vq$t3145ZmV}7h5bb<^uCeNblGW!cEB8!g zvxolP9C$3#om-a`<>Rw&EIIdC*7H3x+2?vraP9mvU*FhOC-q*?hK(FNJT};^?#Nm7 z=npH&k-fCkdTH{=cCzdoPV36#e`Aajr!0EQsmkI7b`GA-^}%@F;pcL4a`{+w`zd6S zO$!oqss7sf_p^XST zP;y*C2YO^#PMm;5uHLOkPH`pT+`@36&mRsK9e1(6N0#c~!M3&>&vv##yj*mWauylm zeg`CAb`w8?gh(b;Qr^B*V5UqG{v9OnO&IhNuFit`ixc^0$(n_Ng2J_H*UZh$H8ftP z{td0lWs?irl#g%zTg*kcnA+jMDXXiix;SWzHX(hyg!5ceHlGmXhH|FVogX*_6hoAb zJ91{`=RpwD)QWIUa_bac)6#+yu?9$CPX*DU0cQ#hDj0J)If0(K@9&(p8&U9zec2N7 z7l!)HpyO5k;X{W!o;-nI$ty12+umN_zPVwLxr~7T&_f3ev>fg4XIk%s_$4bRhl4{s zO|v=wmzt_7bZahNUS*nx#`_Ho4dGB%*VVzjZikPBlXmddtEfLv+|SQXIsUSjuWtnJ zt*bhZA*SV7_QC^QLUn}c`ymJ@d~AF?#mtyEeVP};Lmqbh50ETrhY18CluKAYVGQos zu-B?({K49OjuC8r6_nQN&wS4@y;5un&!I?N9eI>5x)lxoKENL$~uto0b-tks`rMXV!iOs_q_Ci*|YwUi03nwc?`ceIs#ndU5f@ zYy6VyT#cYTKL;o0*Sfk8o}|=NzB6Z(T%=?nL0ljRJ4}pp=4B9=G_r1fNz=*`5fRDn zfbfeP3W5Nl;^OWj-(q04LM^d%bj;8&HZlqh4gGwZAFCZc*ko_1drPVYRyZ7XxQri~ ziTm!JWMeHsmYEVjxTvUzqiq936fQ&;j@f=#P$%Uh;rA8f=d+5wgT#0Y%6myHz71M8 zY_a|$^z@@Y*-m%mI;`-BfSchuuvaqg-#?G73RH-f8CT%#=dj3Lycif5xS_94K~ImV z8Xg*QfTr&5zG=kxhwFd)FZiig^zY!O%AxDkl+@JJ3=Cqpjf!t1p>!tzX2R_#cA7Ji zmiAO&&B@H%y=M=tu!A`~IHCYrFU1I#GB!50i8IhX|K^wHke)sCvF|}ZAU0Q3SNr<< zHlLLwuGcFG5ne?SN8e^;;m0Yt@8DkW@-|jgc^5TYCIEOUCV>-L^P1j-ap5!6Zm$E1 z`~1ZVm>~m@tdL1D?|kQ5Iq=^A7waetW?|6ZY_q$IrW;ZCv> zufbq7vW?0sDr#W_{rDVUN=H2S?+rE9mPL5N#I}Nr{rWW%FK@P~Dqu9K0G3T4%hbYv zkx}d2H_6vHji@DO1fYYxoj3+F6pN6S_Mo>^*TKbt@3&;hL zLcV?bt+7$rOJ9%M-r=_z^tUWAvImxm_<`;j)!R2c4oGvstp*LQBqQBJI_NOA`YD^Lg3^S(qYg@eEj`!gJAWA5txvzU=%RFCLwW8;;6BLI#MQ3 zLd>+ayKv#zSy|9-ank`vL7VvIm1bUF^4alT>_c1I9T>1s^G&_Zvj6NqmZ`rWRgnI5L6zkT!OL0Z~m#M5y6 zWMIE!Uh9g!vh3o9-KTPpj4$xV?4_AZ%kkvqy&Imbe}F!|Mb=^GdcQUUII^l!)8t4On_BUQTxi63;&t{FKAeQ z{!F5ICRnb^KG%ui>_3>y#bC-?Sg+eUk^EmjcwIQ0Z&qD*$Yf6MK(t!{jl6DqMQyue zfRXq7n?8o1$DKFQgwyy#HKjD=PjV>?>{qKvJ1Bn5sp9TC7AC6l=s>r?xDSh7hLOJP zchwFx`xeKCRTlH748Iu&l8UvKO8c<*h398?qK25TzIT5>Q{lK~-JIs$=GlPs)b8e` zztiQYcsNpmM#J}vu^pUa&T<<&M{|%Tl|OmNJ(nt`q3+d=h&+c{0d+=^VX?q{42Nkc zMeDe3cMeC@kj{FOdhKDP(kM)1?J;`H@jIs)y%KA{WZ=}{qDuQm<~A0QD8%Ls(RYx^mQiza&e0Hxy}TY$v4y$rvZdzaqvHUnLgJSPRCxA>hQ?b)ch$Six# z7qPdZPqM3joTa$C<1T@@m&(7=?1rh>%@5B^&hyT1U%k>x)R1$G_Bq-ntJPVz4rQJf zB{0=D^|3`1IeP16H-J)eoknhRsxwa>DD+$HS>*$~xn;yrblfiz3WC-7O^j&$@U%vv zPO4c_gWbXq)GZbo%ATeg^CiMEzD_;cE^AwM;|Q4D+xH$KmX_DR2LQ*-^uz%e3rI3C zdFVTJyy`NYYl~)p5(kk-!SNu00Msa=6RE=fhaE+KVKxX(klXiW>a5DH|DLI1X_0+b z5sYl!vv=<$CYDq=B7jvOlVndWh~1Z>bGR>e-FbI)*2$}{q~+wCm#6iCQxTKI!K%QD zi0Kbn?E)L{DfjL}Uxw!pY~8W{2B2G5YNgz@#B2?J2c`M2_I4tb$kIxi{ zr4W-78qi80tZb~$$wRrZw6uh{7kT1qZbz}Rqn+J6a4&4+w@!0Ii0HMLXX}63AtfaR zv6Bg&Q3IIypjqka7zdjfkuS2UAU!U6&)XlR7szyS)| zLK(OE(u;r;jmIbif&UVsU4d^PM3XBbD`ae(SRxi|LKHHnUU=rNwT4I+W#n6SzMBLS zfqTv%?2wu|UG%f06S+-r7bv(l9|LcOSSll(LO~G_78Vv1)ci(@Y`f!OIyx-{g||1q zP?C|6F@@oVKv;$TkK+MSS0LYspnSJq=c==Cl21TH!nH!4#v-n-|85E_p{=8na{CP6 z-|hn^$Q2zN7BS|F7cTVN8Sq8?XZPcePy_Ye- zwnlQAF6E+JT$6CZFT^N_9Y4v0`|Rkr6c`wY07v1w!MPQH&lAM(A4f?g&>5UtD-B_a znL8rHSeu51hA)i-FXC&@;CAC`;gLp#hMu+RlhA0nO86s6XK-xp-@6y)^5*(no!95~ zcA;a(j-5U|0}zWhK$zP@M%LQA__G89i*|UbBAcpGFG=HNsaX_jD%&LU4G2<0cd(E}tADB`I=kQ(l zQ-DtJ8f#Z$ri>7>^)O57o3gXF&oggVN}Gl;f9y3fjb9FfeW|Lbd=5dddHd39Ce#o= zAD_+w+XT3(h-QF(Azx*DEjVcUnp0zb93-^sLb65tAZXp*gAy_h4tOLtJtO37Kp#l z(tm<3i_GKT#DwoUjE0W@5q5?@zxUtHf5F z<(=^<6p~hCWUZ(Xbwx)<2N-TgKfAXx2o$(ux^6KF$`ZDFg?+;-1ahrWfnpTeO5Hbs zabfRQAn!U<6OJ`=AqKoVrzI^bJA3cmy=L42GK7C8FL$-IT`M2JQ0}RDe~%X&AA6r1 zV+3rgbCmp=h6dJ3v!)i5sFyEIkaZpz`5dI8wEg{Bp}a2C|Nc;nDhPD1i^VaB4`?4*}+NWhTl!kDF!B zGB7Zpvh*{M(eklE5ZHc{G# z9PvyXYGC~@MY$*yYOoc&yXfd>7P;W2pxivx-1u&M776W>0NF&URCe&-HH2nhz7Qig zxTh<#Ao1FTc1cM3cxO;ICMlnUnvJ!*V%Uwu6i~O}i}!I#CKoO|fC?Kc^CD1i7Zsp3 zf{luf2i*gtnZ0|T5x+r!^!OnPic#Z9ZAw1@GGv3#va_3F?osq1P2ZwE0k;SGNL3X( zGc#evZGK^4xGstjcHW^wLrAS#TIwNthj~U=33nVxrvuOg{Gmn=<#OCj({Q4Zhp>3n zk*G`8 zy5@kj2TUFjp##hx5^~rvX&GZ61pZ;^w>yc9=R=t&6O+N`=olP4b2BrFH>j~cao>BW zRb6+x;M?!*=(D(5PUYwupOS)9HeT1%x5>-bAvDg>5R{v14=U5RW^xQ{?ybA+ME{`A zg$oxta!lR?Q)6*0*ps%n7XDrE;ZS%rhSDT_*&GL3TGelGiv-tA(1U~i?;oUbCcln^C?n24a6rw0&JUhDwjcN_dI7o&{7uphnAncv9=+t| z6oj2iG8w+CJ~!%~Zx`13W#Y$&~-7$eL&HH)@@KLJC|o>Hq4lvZgw{Z*(M4cz-j(G{kEO z^QH2VQTL~o?7p9P=%4GcVxo96Id{`jx>~)6h!vNgTw`9fp<8-3JeEGUO0wXwP4@Vq z=2v~(GI9a0t}-*t1Z?~Im{I0vbk&}BeKhJR1L86X{^NRA_;1?V{T{+MQ)vy8MV+Z* z1AVbA3KB^K)tlF^r9E|>c|hgm>6w;n;O?&C`XYp9V?pJBQ9F_k#WuW=Djb=dC3@Ez zk|=bj+Qc%yW(V*Ip_NP zE|2DnlP_BT3I9kuJgvl*sU(?KQdXQ5u2tW*#BpxH|^pkq3JO1^9 zq)^YcyBYarJOA9>=)bAGLOriVE}op~5WkRENp`qI+;-#gcc0-d<@@)$Lrl(0`$kFBna}LJ?+u<){8zX^2kR z2pjNIHI&doRyh$_h%vtho}ZhVRAecG4V&qA5kG^O$iW)w6&;VVKPf8?GSGO(qHQsJ zYTv`(egN&0qW-H38#c)f=($N&y>^Y2(yQqYo#B}pYEpZaZ?>}Q|Kn^4v)yJcrpnyy#LnP{&z5m2amv1alf{0N1UV;MEAY zCkWGG34_oiYpfA7t|v}#Yvy>a`KtbR_(kCV!Z)a~AP-ik1f>Z7rLf;ZNP3;*#SUjA zdNBN?B$FC;+zyK4lApPKiB9#u;w4DolzCEgiAEp=fK*QpbW5NhDPP*=(2?fv#9o-$ zA6`g?VAAAdWs{qU8TB1IcA%LkF<~e4P(-VsyCc59k1j4y4fxZbN-%=pz^JZkAsr#X z+o$^7yD0d2Y}XOTN%^kaIm(2qaQN_HMC)wXQhj{njxRDC zIs0}3P{fDrNssOQ~J^tKsP(PWy>Bt_|-Nz3dpr~%F%~6YV@KYe>OR5Lh zR+k=uz0S|u5qmdVT3QgBLtw?Xa}8aHd-v=~*DH-gAFHC`4E85~dX&+?j~{k$_2G52 z>bbjuBq4YVep&P{-5-quHMOe6nGl{+as|RNH5{7I@bGF=Q4tYzsi6;%H7Z~0_p91*7SSdSF>b2e z&^5@&LU?G-T1A_S9*I%7Hr{a-Ym2`czeNo5h%%t_4b2gI`XQDXv+Js|WOAB(rlnLM z!#UG}b7rz83E9QNUIfrt{uCUA5(IB~j$!U+t^b=tFg`M}K2ynvqmH=$Tpng+ond+| zKS-SWxa)E6vkdHaZmF7XwRzUzZkyb4VhTrwgS|bXk!1+z|ELDsx5CPUEW}n~a2fpQ zPkQ@Dh<%7`9CZDlnsqd16-7q&pp3In?rxP}3P4N&1Q`tqCjGY&>->`iP!+nU#E#s` z>}>FvJbCO9w~NF=oi!3DqVBjW`md*i%lkgPd!T(Gd^4UL-Mmy(xv-NEAx4Sk-S+}M zSyYszp&=C| zpk?HfCp#(PVq-55jo9=In#=p^oHIhtCh~my*_D~SUp|4t8F1d1DHzV%4Zk9R>qPZZ zqFa#DzQ%>0OI%5f6gz%shFumQjgRU$#qr-3$u~_-y(rI9T9S>Ym3C(;#oKqi z^8Ss|G>>yzx3X3IN}|1*`YnTJMpk!ZQ2a+wv3YQ)N}Q^TFfN%SQL^eVIYq`_92ZQ_ zvW4WAG9^&m>?Ob1YaDz}1sQ$f4&gUg*+Gb%qf|EJqdTt1YTdd&&ES~=hwyM%bzWy1 ze>z82%scP-3_jV3_eSbhM&Aeb&=$o!=sKs#+NKmIMeqvas@1IP)C?;A6UrK~w_q>zn-2)Ui+Jjve zPim=EQPU;~pD|`uV+L+cVX?lIdfQIYFHM&{NS2rPj_6(9V6|lbt6xr461x*L4+LCu zun(5&Zo5_2L`%+hrQ6GSl5b_{-Jge1aD4Q>6uZ>=bX!Fh7lr=2r$z@J8)1Z5=Np zB+7Z>$ZYd9HM_=x(tbyenpFaoH4G$;kHq(8;*tR!!O2isSM}eukN#7+~^LA z_|_{ULXpW*Zf>$0GkQ9YnL%DtL1O#%RG*zGTE}@PRWm71hvdIV^yXS^txL_KqG~8z zs{Jt-U^aR2Sx@2LVrdL3dKvG_OME)=h(Snv)kIr{m*}z9`f^@<&<>I0{3&>`jxc9Df70d3s{fv+p1MDR)n+$G(;}4vH}gL~N@UH;XBv>H*2^!Z z{o|3=9HknO+rC=1&f%t{TiwoBYc#7nboFlu;x{+|1BX0E!ZbVFXI$PaG{*?%Y7 zD2nohyPLbF!Oas{lbl(TmoutT%heW{I%^H&I(L-wytD52P#9O=Wz39Xb-rKs6RaA9 zA)wJ$iqa#Dif3dXj0cn$G0LfpLsq8biAVspV%(N82#qqEKut6tzwJ!M0QZKsuBx9q1}a)%XjoWd17 zemE3Pyhg_zc|ANU;z>9xlD05;g$<1-d!=f4PeD zNBxfS&u;rGNTbls#6Z8_*|rz(CPH&7I!Cm#c`Irg+!hix6F#NfisQt%RrR$h5ANlv zNMpa4Hh}rsmj(C^p&oz`ID+_`iqzC2XKwCNSJl;Z1H=cFPB|g1Py*KyJthgUvC}JK z?=xM|@`BLYRIO|0@a0VjUwy2G@rn`~8XDtF>eGUi#j4Cxq@e>FQAqv9#@^gmU%`-( zonm>7a0~@Sbr$;cOn`h5bxHpCmm$*GQZTIUW;>v(A3c*p;cqg>NTE1u3f`OyyIjS! zvz>jzrs1CDNeInPeSN#Z@zD$DIRZ|JzydRoz#*Q^49C8zq>%PzTWXHaS+S(~3enC`6I&xDoGJe8|EST-@H$a*eSG#4~ zjznCzSg@fJ*U-UR)EAxvC!`Q_H3M- zSa&UrW>oGUuudjFd2U7(JNx^v{bD595V9SQS5jdQB*ZDH+)TN2fxA=9rq@v8qhxhq zYOYIKP|dq>i!TPQSD*ht6AjS=!vhD{l;b%yvcN^-V`9#$97Ft}?+T0*bbVsY4cHW@ zM1c6==~W==!IS8a5j%SHUgbcp_5$!dWN7(Im2r09--xn}cNNTmMkp&Og{5`pnuVZt z0O~;*2YHIUt)#4cfSerZXrNm3!IP0-HN)bi+7-xK!hB&@jGweoVb=-i2d#wZuwC~b zi7zqbRNR%^uGR0(^F;J~^7&m(RiUm)uIsZrKj|IOOAS>^ShxtIPvkqwlAM~_m2Z6m z%?=Q;H94w7j_*J zsy}Q0mw(C>>ocPR(_j2fA7W;mw;Dsa1Ybckb76hOwt0IjCl~@kvY^oYG(sd#j|eq% zcZ&)M6%=ZkKH)v&Gx+l-83`Gq>{RNBZ8FcHDrAC^@$s2oYBN1gx+ll+r!Io?V66!7 z%nYlcooZvG{y2kw9^4nSU_e=s>nwv$%*{OoBNBWBl#zmh0{tsD?Vi5;N`7Hi(u1ol z?vrt37gOrZIGX4$tV!C%mg(Fd5CNw{^tRBX@dTbRbL^rALbl^$V*)AJhSi7SJX-H! z#%^U^ga5*%UK4uyQ)j1^L8Tpq>9Yf3df%^_AV$H;%Gw5J4^x&l@@`B?QE>|itRv!s zRdD}L$VaY&c^hr!6URIhZW($x9yC1XN-&Qe98NLB9-+|uwPOc5+HYZr=H}*t)WOG8 zsg61Jh}^W)v*K>rn&bWGElGS92Zzgl%(xKiyrMca?(B}$Og|i z@v`STY0bmVp7r)E`OnYX2E=INZLS+y4E|E|(G#XzqUdS{*V+Xul&9E>6?L9udOV|a zKlLezqvX)_&Y_9r)2EB?Yhg3vp$vGA43Y7|ZZ`>kg^tK%aWwQG*UKPe16eT#$_3UN zSoWPI`eTott=nZ;Qfy^Nij(8xe_!lSRL<#k&&zpyW`#te;apXp{T^neN#myn8T_$+ z;X4neMg<0{=x)p=1z-z8g9;7~#^ZUQ5qvIfgjGPnZJp z#d8JFuMQm*&+W*Y3?NQ#hUW>t#b10%a!+n%_53W0P*fuf2(XXNRQWv#6UhD4MWfPp zR(x~awz)h;8*zu2-5%ieAox0_FOTk0R&K=OuP#;AwdYyt5nFB^?=@7%0ph+opLE9y z{@Vp%r9wn|h|ep)^HR|Ih9(&+8yhr<$fGL-!928Pp>@f!yATBajPbW9>uIKiAd@oIV|6 zLAOmJJK&b)cO39&p@7!uHqM&_ntYJ)+YXW@u_gZ0wac^~otI zsQyzwejJ2BOhZ$E0tu-M8x)z}GcPZDg?sK#$jq#-tlUY0+`xs47jGCC9OBUxF>PYa zWne*pcX(J~)>Ku>)zn}yF|ZhUa*BD2L{_BOhq7XOUB7@{Z{ev)+oM+>`y&P72&on| z4i6(C3gl3*D6zjNDVf|psUDceQ+Py0i;>tcJP{Ebe29vQAHKR`KQ~=7uU7xVWEZih zqq}H*C9lXGq@Xxf@(xwy=+O^YiNr>G0F;?6B+#H|W@jmRPRd|ozs2gaJJb(?`#t(>bLW&EU!If{c8XFpYt0WX&Si+@t@2)pKf5)&Kk+XfUe{#$yu#t6bTL&}^3hY+qV?%jU|;N`z)1cR~W}AB66T7Cs1H z(d)-65D2EGp@D$xMK;cTa1RmrAZc=E`Z~!-YesME|KjXDz`1VU@ZpbAW+g2YQAUY0 zNLEG?$qv~KnMst9U6E8IyO5bZGP9+W%E%TOWhFvn&-eUz{_p=i{>M9x_kA75^E;mB zm%iW6xbN$}uJbz2^AZMaD(uAzA3^Z#jf{`_q5i-b)$TRW+_KxPTVLJ;uL`#sO9z=)3&dN>Qg=fmT_#f68VCP>vcY0)BD` zTR630IMvu_i);dUW_@F$dQvhNI^W?L6N~9|NZwb(ESgKFWG7a?V5{| z^EN2tzLzg(YPzn>|GFG?=&uAVhiAR>c`->}v$S6)H*@6OZe~^7&B1)WEO|#xyoaG4 zN4>C?!^u&4j_t&y-opwIjKX^@QrWujkz5=eq|Yp80|WiW{hZ{FtghF8vN*Yy38=KV z+{~X1LLxG$eZv{mMF-?BX5S45Jrf0rf4|q?U1rB)s`GbYyQIzR14c6FMjO^iS9g{sCm)oQG%C%Z<~Rc{ z%AH#>9smAyJ7ZXy^m^8G{S%fA=gfPO5-$$~A~A|c)7N^C(Kya|*UZ zw)XfVL5jdS+aosH1#I?;wU+oL27Tm@sC{`Yi1*b&eEPVA%z^sPg=ZRn`?CJA7bB>r z>E|Mqm|v-wmB?}AoE8sAf&?sXy4#Gm4Vmq^fEgAy`mkI`5S^a4BSaikQFs-~j8a*7>*1 zrw;lTAo%xzO@p=&kWN-1uX-Fd#kYS$O2NqDn}4-xGnm-HAO8E$ogCYTOqS0!4n??~ zZf1S#bd%qT!b2w_b4;pZ_gz9%eXqj@IobK4xTkpV@87+Ph>XPgr@r?(FAs!w3v_I% z@A%>BI6V6+oPy4zruKH6q!Sa08rR=bnt#twPr5`TR3zQ!bY?nH_N|}Og$@7jN2~2S z7}k;h_M`d!c~4dOzjp-AG6ng5<(U&7w$EImrU(_KbQBVvADJoA4d`%6OWfkIowwP9 z>g@7Wy9k=F?JM`LSXdOnUXcJgyzZ{Y2PcVL6ToDVCcdf}BClqud%u^c&gwepzLRAG z_RhZwy7=kr#`N*5l}bFPrzl2#@BYVpR541vGoM-a^rrL}IlqT@)li8~ef;>Q)#YmH zuQTfJ*{L(Ud|Thse|&7}vdzw~xwDKx2D^3Bc)^?3$m%9SZ;KiPrP5%0`oMe_I`+HD-|GKzwFj_U>~KK{-;>=yD#IdjoWmKHvPu9_J}xf zwsDB-Ny{rJ)qS5l31VeaPrqXmm6YpBXL_9L#(}K*^w5CJ$#lQMS{4_F6uQqc8%Ez` zEIG^`$=V&C&E<)u+t@bONKdc5Kuoq-5fr$csK7vJ07fc~jt+@%6cd$lQ_6 zDWiz7$7drW6F)r}TAmn-QZn3mn)*`5*N7uy`%L@Rt!z8x5#v>sHMo4&?a&5B2k34Q zWjEf>x>T_)g%iGT=n>$IC19QwgabW=Qx1=Zx-`-A`ESl*&%tB<{y}-`iP|P_mjx!m zqb_?@0{6%yG6rI``4U{Uw zcYOWFBpv9%NmJ&}@HI1>Md7Z;r6bb62Iii4=5N03+?PtlvHjtKS^b|y1u?k?Jxtj@ zeaEKzAOAA8+C(t)20nkCYgT&QYxdUXN0TAu2Lhfr{kZGn+NoE7^BmC%=R(-|L8OGp zo=UOUVpPqpT|?fQ0T-T>!{^p+#alkWeUy=b^1VY)F7K0=vdLUoDvyS}LITVp${AJ? zr4$~YKAX>cJJgWI_V~=}?l}X$k5`U*mr97gsFu3(4)mS-aev)1jtD^oM}pFKEslehYr!@*ppv>@LX7+!YTe(kr|#M zg(2A{iiSNAgw))y_qr*2aW*NcqP~o8H~n!&AqfGm_)vH)IOpSd6CifRCbne_a>-%i z`m6uV8mYF z-M(GV=Uqqcd)2e-HPsxCqw4oWJRFl~VST(YM&u+#lt4*TDq;v#K-Kz<>mLqwz3A4? zUMS$<Cj=t7<=Ne*chIgZdOLMzaJ{fH(j6~5M=#}O9bvn_YMt*!;$G!$W2L;xh1?rE zCuilgc3%Ey@H6KM!Old#K)o93<)Gp9Dq#FsLz;8bpJqutk@;MC|L4m5O_Fo|^}+-p z{e?ff*Q;_tmS=rY+epUud#N;pRMq!U>sXPKG#N%a2aOvd^eG+GKmW3hSZ}PkK}T&f zK`DK;?xn!|IWKb0eKjG38IM5Pe}@32w!pbmmDL}o zV_STj?>tYs_9D2px>90brw$)iao*6%VV~l^_Svuhau5t55{BGkcSP{i8i$*xoVTVc zcYf#=)KwV?^>J2yz%HaPxv@j^oO0)_Y|4W^1VKMIQYrDP($>~kyuu(kB^-V&Rq~InaFvf-Xf&TpLAmvqeIH0 zE`{q_bu!nyF5ECoX$+K>%`A~#Y?;idQD--+2{^a@_jJ3+wfN16c8ga|r84H}t6lss z5WLQV!<%RHT~*&-qmuXG=HLEiUt{w=o44QXHGlA68N;xR-W!1l;q+dc9ic|FL?o|B zV_7fthIm^QgN8EsNxSOE-SdqL&s$Gr{>uG%Rx@Ib;Xc>%uL`291igrhc_Kw(QC3>* z>6T23coRWRugo5mn2mx(m9_e>HEib47;TD1D;$@?YbZgAqk?cU9SKg7fccT~*SkW;~_DZlUDk&OTLp z{o|F>QaTVVyDF6yYvJlKY?@bVGWJ90#Ok)_yT`LWo$Cb5&|zYOR8l7C z{-vND#})Vft4W(n_*QJNF5}u>oZ}`oWku_yS?7OYHau4x#?INgkIyASv zqmUYJ-H?7Zz`a@P;LGQB>EoO~Y^xrm9k0KG>RBn1vCH_>nVsYpI&X1r0(AOx9Iyiq9R!XiPk-C_ff55 z8yIJym=!sBvgU=*2(Y~rIviI>&=1L7J2=w5zRX7WWub4;)dsW9F;@zY){Dq_{Y{>| zNu?)19Pfpba96H*V0!u}WbJ@g;4m8A(hehc_4lV!rhDffkNmn36cU_bwnydujYx}c z0pmLvC>uT`RWm%@;-uRfr~r&sZOC|uj`Q>H@g^IS8gtgWWzXJ5)0O7SblqjIjg{&> zMLHEves~;EMfIVwl$G?Lf(#U@nxZf}wJt;YBD;Y6l#=}GtpUs{r>hQc(Tgb}aO|5? zBkSKWKufPw5aNo0P3-Mx_gNmj;ZFleSJl~VdSFaF+$&}E2Q38T4Dd?W7B!bYny!<> z4cgq$XUT+(DjHXr@GLrrhKo<%d>^RFtE^-dN|fqFSs0aicysedK#uuHal!BeljRke z^r-EZyNcz2^5$L}=aD~+%}s_~cXPM6G>u8c-47CpQLSp0TOY`w6c`yPVOGyh_BQIl zqgwbVy2CE}EHy{G8cRgUgne-d^$V%%C*?h*V#LO_Z}#2>i948AQBhw9Y{B-21PVQ) zZnIJ<%ZW4;Zf502hJsbIq{Z-o{BkdXfrg&4PG@in#;az0j@NU7j!8XxdMdMa+ z{qU=o8=ve_30^e(d|z$l&p?2`K!;eI%&_Ev)VWjXWks9T1^OD!if-)6O_d2bL42C? z-f8jsMEDb$&)m(euM~|^c?Krs{}vwW;AGj|JwWg7nsH9Q`fyNd!Jm;B`p6aE{WLNk zr_)cX{jio(Q230zwYs_*@ck~ni}e5&xdWj)cmvc2@+8td5&(To&3U`5{B;TeXgXn) zfbnN@`t*H(r!X#n%lgplU9@K)KYM8X?n)N;jZgvuX}rj{lc=n!8gI|}g=P&a)YmyP zu&{~>2w>d3yW05J9dSeGTyqZZ1^PPL8$*iL(dPUGYVGL<^zG<=0Im4Y1|3NznjAtx zLNGLi{^&74H-es3bPBal3@^#Y#djwpkY@UYV0lJG^#GefhXpNUl1Cd6@x(_gKz_ug zPH*J(qwz!6jL@&@*7g2q6~x1NE$_WkO4{i|PL`}P2UNerD40Fp{-x6@Tb*1tl^R;jo}`JL& zl%sCgLD_x|DHZy~HJ?9c!9UmDo|`&0I{F9Pyg+vPUUDY~ZGof&bhdnJtDm!5SX#m* z3qU6-Zr}s^SI3~7PzZp7{GZnJ@FZ0i0hSS9#DIzb@=(*#K6>~t4Ems!RP8gCp#6%& z<{ngCcoL&)Nk@4P7%U+LZ!ieSOG_n)+r!N?L~Lv<@X49+o^oJA*y?cX=EaY=xITDv z2Hp<@^#>}sr0SVoc`dkColDi&apY-ph-gyMb^4k8%kGBVi^>0J2A|KaBkot1Hyt|1 z)w=8UiFD>^^O2bi#KyLj_)v)r#NB7+wVzZdq=ZMaTkqdCpH8j%x??zhl96L2p=F+O zvc`G6hvAnEt#N!l(+UY%n)UBmM-}&#Ha~8*a*J*{#KV_ByJ_3z#uP5oz&%wQzJ7ZT z93|%MTx}I^ju=@KTD-dwpiD*Fe|&aGGk|AX|ICn{WPe=#V>ncaaz&Z$G+9p^2@9_H zDL`la(&W@3|BK}fb&-9?a%DH-mZFCpw22C-AD|gIs~YZ(QohDeDJm zxD42S+XZiQU*@(tr2aM9qO((LJh!RbrY3O z{ZtI7t5;OIBvEty!{9hG&K;26MI*}~Q4L82*BM?|WkEsP2R0;7qTqofl4L@HJC{Uu zd0}<&4G<9M39?>j<@DMW=#Sw)pc{oh6?$L&ovh)t8$G}P6cZ6S_?x;z9!i(IylGf9 zV-Uuk-MgW7+P(J^{2;edP5?@H5f&zn4lFvPSQG#%(a1)>g+rmSqM`y04;DPIS%)(N z2B?%cjI)K;;t(oHh}#`Qzeux8^H1&??ov1<_*?jwJbU**J*VKDg;6hmwUhO4`hktn#K#e%B~LypUYpPz zyuRHtG3VSQuPxnAnONSw<@BFYuIIYe%MUpQ0_-<&7_VeYGmhUD@?YLhCBShW&1s}E=BM+MAS-8izvm{SMz+6%ez|pQop#W&HYy;u4sv zV`F2GVzsoWJ5yy}qTO zglzR~QQ(QmNl6bE-oWGHY1I?vCgg_0XPHb*f1!B~njfqV#6(5qWM$Lz3)OG2-35A& z#!uUJ`A#&H0NC}D!gGhvcE4>Yp*a-V`N$l$ng$vcyr zL~~1v&B;CR5&_Kus5_kU1oZOL&|n5<7_4xB(U`G7OHY4ZLE*Wexw@6rXdsXExZV>I z6!SFJ_;Di%gn2{5ZlGnlb(NT=5Yy423kLP8k2A_p#3IUb+8=#`Ps+~^$>M)nRqQyi zPg7$>BD?+iJ$Ce8;OL(mfOi1KHt6p9D{{?d@Pj}MCzWg`ZLXX-v!TU`m4N80Yc+H0QK~-N? z`*jBg2aypTF0SCva+m0o{CrZ(fG$2XqqMF&F6XrC9~S21GzNkFw&*g{%OLr{EwBTj zc#e92Y=lF=GWURfT6UxP4DQ?mtO1i^@ZWk)X(V(gaQ-DKM~ zhV@`1LJE)}yg1Bwx{cHc45e@F?O0=w6QDQ!six*SL@m%);1LmUX(M`L5-n(MFrwsAnAt!Kc{76lp%|N844Lj+F)Zf+B-!PpET za_h?^?J(vVgQ*7-rc}V}38uJ7^<7<&!>icY3IWaEHg{t6pI1@I%*Z$a20}hB;so@f zfJ9>}4Njz(=2uc_!VW>2q4CD3hQuGN`ARyZ!Y_PxVObnhVdBlRg`)b9_M=e8J9m7z z19jmvg0?_qn|gVcV0iJP-k(1wKo(H@&xW==g@$?Us;)k3dEO5u0-S-;va%*nyMQ}l zhv@L&!N!ITe1!S+*3ASu#J?sSLEW$}9EM7t`0?itpQ#2U6NJL-2ZaEHfj`})Vv)-3 zp#Ou^lpJyFTbYMGoUD zXl9W+CoAh&%G&{3QhJb?*^^D@%N^bHJDaNVx+13nYJ{ra|TfoC4?{M_S*z=0zgq^39MooH)oqh(=od8S{rwn<1tk^aQx#vosx()0ef|0sLM7Nb02SU! zO$i>t%4D?wI1!pRjn6T#v6aC)3JqL^J;Ph3Xh6Q56c|o1wj=-X#dm{1e(Xii(!96ajexQ zu(n*p$;Nx&00x-R(a`+Hr~=(9Ntu~XeSN3@u?V@o%nNGh){fzc`m|f)5gmFur z_0Q`wd~p}-M$yQXp|;y&tkK3&N$@!Y(g;uC9s~Gu7RaM>} zuH>E3$0ztO&{E$eJgp{!7!(+rRUR7a{qgJA#j!oVqD;1UeYUTC+n(ue+6}|)#WJ#q zcTM(F-n0KV6+t~eEPmbLnPt3I*>f2V4h9Apnw;|TV(m6&FX8xYoJ4)}{D$EHyGg4} zWu4aFmtNH$v7NRNICKaZqYmmZn7H7SOu35Ny|~cERb!vF?@9KRzPR1rO^iHM>rEEd z5iD=A_#N690}Yb8iT)}_!}$4_#^Dq}$xk6n!LLS-UjH)p!)E^j->|(!QfKwfc4ZMI z??s|Rozw)TV;lP@2whjZ5t-O+{fYJm`KwV@z1&E1Lt{}QvpX{Fxdw95`SR7}uT4!k zv9Z127=;O0YT;xIFYzRwUFpmDa_xGa(u8Sw1Q}}W>XY?y%_S9B*)>{tf#!Wp*`bE^B)dgM_ogT5 zqD^~h2#=`L{Zdx$rf~N-|~N(z~|mLFI(Emk45( z_kLQCzEPFx_vqTw1E!YcE|TWJX$@b5Y+5in-H z2=}-&0S%CpEXT`0f}{9>V!vmHu13ym8rPn*Yho3QCqEA zHZ-(HacJzmf}Yst=}nvT&|8h(G{v6g{P@l35z8&Q9qR9!LW+jd(dmgz{aoGEE+)hC z*~pffJEvta;Gs#+mMKx-QomzmimDj0^R;YUE%SU$rADq&J<;}FwaBn6^5)So;4dsI zJoBA$=A$xe#M|iOglf+r!<{n@T<41wGOXja_MNWh<1=!1G`i-WOEHF)iqAC7@?dhn zgORS{Sx|Cdlm}`aX6pc`82c$EVM(e_Dd)0dfqu&|@O=(FD|0q; z;7l-|n~ZHCyr8q)-uUZN2M)HU|5c;#=d8HW@Ml4xtG~w~TG~8zayLCMD0Npxe*M)`>Z~1<_BcUl1Hm0A zg4tUbn+W6%MDev?n*R^7GP_aU`NAkj_wsCxQ2PMx2ZWDgBKf?;!Gn;>O6P)+vWe-) zThbxBtqp`RQ)RM5t9Rai^5h(HT_gt1fJ`(sgUia?0J{JgD=jHudZIO>B{kIbS#%ye za4+k+9libivRyhz-;ma|~3_)qPQ3?he`>GAD{3ASauc zyf`&O?PT=%TxW7xS8HoXQ4X?2>_!mEH18p3*Heq_ov5gaT5V_ z0E{wplsDT^Jrq5ld+9md?m|C?@Ih; zTVL3Y#p);&-Z2jic+J11CzCub~PiYNUsRUaEDLk$kB?4Zc_L5=oJ;A|lyij9)L4LJ>!|lt0 zao)lL_o~SL)z4`|n%XZ*%lqEn&!#r@=Q$EY%&vMd+M;%KXIdrb4$ik)!lEY_{b{4v-`DigZ57&{)Iz7 zGjSv&etju-<_yJKd>ZN+v+mE|BX}<2u~M#Q}+@S(}HhcK?iUa-uXUQ)X_mB%`v>0!A>=-z`+ch zy*Eu7hf;XEUu})kfB%K2rS=?#EB}1|%TCUBUAxfq)KsT|_>|c1az%M^)fRtqzuB{@ zwQQZXT7auAH2x5P+!aRY>+s~O_6jdQ^bu3vb-q(9l zpHts`dDG$&vT}X;X=+(m;mrk472Umz*zTvyN42`L z5JH9@f|<2Ccidz!F^cu6gfc@Vb{wiuiXSk50hBQ8x7kjgmU9f&m4G^di-xLEZJV9h zhk6K57)w~*dJlLcfXSAq9P!9U3>TVrG2gL0VBoPzglI!o_TpWmEr*Tr_zoZTx@RqPyA(Ftr6SW2j60yMDqCr5;PQezihrK?U z5Cg3KDb!wYTz&$uh2OAb4h3BTEo_Q#SJuYHxu`J^dnj?!i5=jMqR_#NU!DBxIxsJV z;|T>j5YFjCKm#eI!9d4wRgCMj#^FYGeDke#6S8a8$tVd^2QIC_OC1L{EEJ%jVG0lt z%!d_lF>7nTR|GQu|3PcXkq31zCB)zmqH=1U5*nJHplGjPQk(!gyVU!@W(cb-3~6j| z2CerX$Lb>4O-ZpJg+FZK44aG>f#Jd~8nsSj?$n@moqo+^F2@4p(0}zCnK4P8@R3ot?b{~W4zy9)_{}kmk zKkHi_H#>I9X-;KY&P$YU!(<{{7}e>}s6`La|KizOjI;kM2Ky8E)Z$P16l>9_uc5ab zI297&<1yFt@s<`y0mV7{DSHLluiK@WE6&Jtbuwf%zg0 zt4KeRHOWaTSTVU%(*!oOWw!CZi9zm5IPGRNsyv%y#Zq5S#)E`FGff3ZhE;$T70RMuS`Hc1V)fvl_GeMqX64=hfVH4^ngf5OG`U+TXg!{ zeboaP2^o*cH5lERqx7Gx-~SntWwtQZCzH2-cs-53UY(?BayDOnb*O#5y+Lw$`1dK! zdF940U>@eyNk4tSqH81AJE? zT?4L&om#TE2|+xb<0I8mvB~e_a*@8orFr3M_iqJv7mjP|=`GsGOHC^6x$;xu_9>p( z$L(~IERdG>Eq&Ar)1h>onE;mUDK_{U$T zJcAU+#|JV~pUS*Vn>K+k4;U3?_yq-0DDvhkansK3Rkl45-C#xzz=C54WD$F{F8DXriKlnwS(rfuN1KKXK4X1*e?eG~1p1 zSPc*y_Vd%HTL?rm3=jBNSqbW-Bc7C%9eD!#m%Nm8SEeW)QzCIuC=b9RrTF+s3!+2dBnLhLP{48L~ z35kh{dqlbaSD<&>{{(skS<-Rd5|@A=m9~VH-VuPnG{MFHa=jQ(S{d;ji0XbKI#D3^5sQrfy zGyak+vgzAMoW#(m_6z89{AeK__R9Ys;BZ@OE7^5cQ+wvPVK}6G?!Y6mSdx)nU(3?( zHrCr-Xyod4>{R~UCV~JqMzw`4ey|^%PQRk{l&S}8NT3!B1)MP`<4kM9q&H1Upj8UU zmYl{(T$ev%tXjCOOc!?Bv(wbQgmvjZCrt-Uo0g!Z;U;Lu8LKQY7pP*iGVt zk}UAN%fX3-mKKl!-d!8o5J+T^8-))ai8IIc#|)qr+jkc>0Z*jc8R@l_BHM{gXEVf>E$t7drV+$9Wwg+K;LKc=sB72Y}v-%?%O&FD7)xM|LbpB%y9TO@7d%vU(B ztic?Za&YwWJ4(1OH+&Pqy^@(YdW*-7ABP+biz5^N!1Ggn!`TE%7+R5XMl1oK??qS5 zk<0Hmq;;O=MZ%aAUWA=0^72xb=>W6mFS<4mp6f36oN{g1wM$)u#n;5*3bzv^qouF~ z`vYbL77o&J-@9u6^pK5$L@6M38@DyIw%#~lQT!1pmAdYd z5sp?^O@9rS&`32zfdZhQ*ynD0iGYyV6c-i<>z8q=E0snTyGu%X+AcQ5``z^K*uG;2 zRLYs{fH{pM-C?2N%{F&E59H4b6?c`~Hzm3AsxPW;K-xXJ$rqc|Ct50k3l)YP0Ii{!iI7vdbW+rJGJ zItSidWSfu?ecWANPY|&_$M#A%#XtB_dBDE_UC)znSVGK&90@*O(Ho^8AbALw{ozA& zOJF>pr4u2~wa_jylXJ_ZN{(+$s&e8Gg-3|jo4uN0Z4=$PBxtQp*3qoR`8@&!k}gT( zCs(uRO!-UPS5FaP4##_#CgSdw-Y;4&doJPYl})2ow@yymJ3Jn9+v^wqGP`Lcb`f*~ zD7rzsN)>w>ikE_%lk`GH(1g7g2A@Id6~_J$SUAB>eFe@wU^?(VUIWXMe3x=XcvmZy zC$3f*tuA)7&mH$u_JhH~s|GW%H>;CPgGoJZ647JCvw)*!xdt54r<%$xmfbKvqxs=u zi`s*Y4*=Vu^uZ67hN^>yz`-MXIEl22S^Gpp?jWJZ*$3q`IwANMH0iJrFw)rfkqk7{ zXy7XfZ?n!YEOhGVH{aT0<#=Jvtm57V^ub=|=F&&9zHpZ`>T2N7WF8S78~!{J4DhJp z{ffepUe{0g=x;U2#E-EtWeL5tj-Mb}_|g~FBA|tahQ?{%PAT8tX{o7-!O#KQ-qE*{ zdvKwqCjNVa1mGbO-jN`|6i2Fry_0X*0P|8QYM|U}nz|2?+y*yX5av zn|mS?0LK8C6qpokot=BO&CUj_X1MnMrpq1yu^`>*-ZMEA^AK81Pfs&yPs}gjJW>sO zwD#9asgN$<$PYsw?)h^X0>|yeQl~sLx{LZ`c#nRlwY#M&hn51WCzw1nv-%lZD|Dh> zK$&sC=?*Gpgv>W-THsC@8No*DRsWg>$+JTrI$1Q-armK!vAnbta@ar=y8$saS?qAU zOw=a(Egkt7k7|MHptk6-rQaLQF$`(BenM|5;Qdc@&3%M$wFAroYYh+i_hgxjq>AhX z0(GSDsQ#b;r4*hF^yU}(_t?@Br zZ>t}689|DjpOK;5wUtBDStNQMP#4HXY2B*Ej@7IVEWQb^UY+5m+PD#7Y9GVcnK2zX z^PB+Ne~pK^OtRl(-#lSic)41X@q4JL-nC~;_n*Ao&TDNa&)@g#mq9>|?ug7f1*(gU zKIVp1mO2_&uFby>{8!)qM1Ljn9DT7>hl_*Ne6qQhR{0KE1Fr6Y{q;q{sxj^kUSE19 zCX>1!hZTgl3vjxB2J(9?g*?}7Nqi}4ckW{cDjnb&E_G|E)i_mgnZ3R1Xh{0S7cXDB zjh$br8)AJXx%@-q)2*hb;VrBz!_kO%*TAfSvCB$mc=+SB^VzeThf=zL93XhoWyea( zsMworhq;o(_BGoa3|T`P0DU2CW^5xF7|b01E#y5czJospfBqbd8Iw}2dL`BC2EqWq zwDle+RZgE?PcX5ttdmkw+N2PG6!>HcJ-L?{ES`QGUrhgPT;k{x!j`JTny2!1Dn#vC zaaDeu+-p^W@B?!qE*fkR5?=QF47 zB%e^!3xCxWP;~V|zaQB>LkcqpmMB9d))!bMfni}oV62#%V+i+XU*A9U+}(k2r##+~ zEb}w)tdLz^v+eXh+xNSy?<5p^TI$b4_omwc@D#d}-_p>Um6adUG?bmXcFPAF=pBT| z|4!eRD^#L+!E(ZxBX?Hq8VOTQ{XQMnfQdbdsc2`TaUZXB$JrU13|tNDZ+p(s?$Lpf z5IdofD<&p#%Q+VJKw$Eo-Rt_sC9RlPiE(T5;XuD4b{lj z;ky0Mu;bge&N5fM=P?|bUutU&Cc8qBcsbbFX*%%L;usj}dn(!5)UvyG;%?DZX*_CG zs>$+emeaSVn_0as&~}=_9nT#;mj}eg5d$zhTZ%o}5KG6xUbFcyQlTy2qLA<=33|8z z^N(^klPtzShAcZi<(axtrw;7fWUL`3K7Urw zyu9yPF^XDv0FYmfiuuf+eRgN>;=?%NrSTbL4TODYs4L&^z)OCrPL|@;%!kxpV|JF1lw&O6u~_z%1?VL?HdLKYVnQ*GGrdw5uV2Oi)H zrMWNvZ;8u`uooHjei(v5V|Y?=_wH-3#Hfa0LLRCjavUELZy?u=v}bU%p%xLt$T;w_ ztZ*WL6cw8of-y)^rLU0Kz)J;ImKdw`z1)2jCQ~Gn-l8StKj&Aaun%jrh9p)sKATK1 zvryxqzK5l16TmrwbT2H2qZCd*c>0>@@@nbm6k-xrM8qDzXQrm6_t;ShqwItMO&$ze zV8H&+qNt$Y+qV*zdj!A>x@^`eR9=Jor2teXN{kTXA&2Z4A?sormisE0P@6t^yssqGwB@v-wkFy z)E07PEP%a4rEo;2?MzHz0RhK=`=YP{bec33O+Ix`PWBeow_;FONxH9y->*0cC=0zZ z$3oDOu3zVS{Na(Wucp0YItJ+6LE)Qbdg2-&1XwR+pB0L6Q^yqE^zUK{0f;oiJ>}&X z`7)wge^U~k7}(p$Q}p!p|NQ#33w}zvxg#c1vSWdFSUQ2kX7A)ywO>R-O34Df>QL?TFogS-N- z9e_U?qh-!<`ik%z|=lvaLiG#U(x?UhR6V`F7hRBrLK z1s~wz@?w98GYBsi?P8F6&_#X&iy}y6!FP@sUIM;=nUcR7;;5#NiHQ|s1a3_j2^kPp77* z1K_^_Cj>lmG*AxB!1xNpHh9r2qT66=1#&HdR;40l6tRD~A*BC5Em~Y$U2&~gK!Azi z{90dl%M?B`K&UlZ;1LZ1(rQ$x*i*~li>p+pI5<516njv|ma1=+rI4nud2jF~K~w*` z-WzqBRQ`QhYW|N(?cQruPg}P1+Nyoo$6L_+Xa_~RY)sO{?a$cR7hev{R{a(b7M42y zO{V{!+q}gslfjxh%Ev0a`yxaDXm5*1#NuMwv*&zV1DXpM@q|$xU?5@&Q2cFPn3564 zoVRlYEz625fDvlX=>Un=~i;dhrTOWM?-@glZ{6PDjw1!kaxi!yb2E>*j=HKgJw7q z=3BRp3)vjN$Y8W8@WN0*T$cN z(I2(CWNPhokOPn>+Em!Ln=KIp;l-Y0-g?2O{>$+$I4oboyI5H+mVu#x7k8OAB-YI_ z#qWU?w%sTTRgD3G!~Hn=H8-!ZNdxlUKKuptZYaWhq zgq=Crvooa4vP2`Vv9%T6H7#YlA|lTju9%x2G|_IRoSLcLHR)J_AcHzVb#+c^9^?Z@ z@otPdDPnRkifJ&@KdjxM_V8Wbgqh%r+&J=khXgnkJ83x34O3QBv|O|KDZ}B7-U^^} zNqr``WVOTK3*k!U@r9RS4u>&4xwF8gH~;&LKrMz zyfwp<;cO7TXX9vsZ+gBvcNI>*Wiu@*%fIBKroR}=I13)8%qM&pN};*%SRC)AiTg%Q zAzy)t20*eg=$F^7fq;)M!XfQ^=MEgm<(tv`Yynw~X~zyt=%%8gGT~7OR_MTh5O|su zG-JR{dnX)m0}`&h65}~vPZhh(e~b{h)tX@-2DXd2_dg>r-+Z1tanZTB^zT_yqaW#q z8NGLFZ~-bXMj6~@a%E)4_H-kzF;9k#_tD3~PnXVm_(K*HoU|!wY-)}mFnQ87lIXBw@1$2 z7AYE9+g$4(B~>m%)y3ucx9zYve0YpxDP7Gv;(s@BEB&(8VhJ)29e&*Xpf;< z``AMx0d)J%E63>2o41aAuUn}J`z7u0@}0o-_Oj;*!eNC!4;|bnM-7<>D5e-KJenrm zX03cJ>mtIERQK8}`+6!)D-K53 zpo6)PnVH$v)N~w-?llXNI&2~ccp4NraBaA1N*`yy)8Ife(aT9mNnuJaT*eSVLA^)b z);}-+6My_^tMlEp-={Zb)_D%S~T3<~1zhAzM zUj~Zn9~wG4J#A$)jWp-bfdhC?(ZANMJ@6sBlF4K5`kv*VFl^B?ubSN4%RL6eKolRx zjvPVj*vtq5Jwy;7&`&>-j;GLQnhD0-%YPW>`Tl2cXx3GMb6oo-e8LM5jX?82E{E)7 zCJ(NPxD!Ml36`~~A;gkWbD+kC&(+nL`h}Cg{(x%Ce|29uo!B(5xo5K}g0?uDHR$W5 zjQgkjLYxl7ftzJ`Ib?ekk2oroZ&;pYki-$&J%i0b?!uIYqDSo6(tVeiLo^zU=)8 z*?^`$&)#;7{JkGCbXHZsb+Ph~PNRUlsc*6UPFAIDt646ChtmxqstjikF4hQ*bNUCk8kettMf@IX#;*6`mcF>zT_%>cK1S59a- z>cq*&TbUa)tqQbV@4LNRA?K>tiO8IdtPw^bLqlee7y-G5^y8l+DU*ag2V=k;ot$tW zASO+I$BX+`SsB0U%Gz^hy2&Zu3Ccs#E}T-~C#)D#e39Gw`R(LuFRc_nPM(>h1bam!Z;VqtNT{I{vE|q{Qf-F6dTM5#nPRN`EWT{JIQMg!D55BK@`W2(p zJF=*Y-B(v|plqrU7&=b>ij^FYE9K5nksXZak8JhI5hm!2faM|@0PDt@1mKIku!V-E z5P9nO23iTSZT#I~e9T)%jH9xm;wR`b_$!U%#3qE(XULfC|1d%Qg<=bGNEA$(Nii{5 zI0jz7K0*@B<|evn7@fMoHyIlk;2J$3Dg-JHWKQO5PObG%<^2mJ9sf!p#cOMOCb7W`;0JguJ9e=0&BL>tOp;Aa+v2jMjl}SBA$fEE zhbjU`5xxx_O4q5OXR3WL7smWN8BI;HUx*&UXj)W9$FB>4;aDAnf~@ofU14EiwEHlt z3rY|fdH9&2OmV9v-@O0B0-xfNk|pFfcf`A22w8fnAKvVp<1}pnYfM1+FSopQMt>2iq-4>jHyIXUDxle?kQEm{`9{6=tjQ77c)=c`*GqeYhqEwk^l<(N&0?_T-P z2T=oQh8*QG`58$YOgFg?-SRHbM=FkS_w`^?y00!fq#Qxbg^~gb3|U{zmoE?iZd>YX zYHWm%)ExROS!e3jjT!Qaa8=k5(%# z25Bvpr8CezWK(19|AuXlz(F`5NemprUhqz%$U(JAHHuW(2k%_PXH7 zg8Jv{yNM$niE70qCT8XdOECDsN_zMHJqc3H%{c;o5{>M>ED1Op&?4Z-^YZeLg`K&1 z^L2T-UnA zd!TXqin-z#5B_Fc7GVFWQ!kxLA?XHr>?O!kIEK+~Mm3D==R=DFF3q2p_gbQY2<_1+ z<2qO^es(_+*o@bPT9;PY_}6j72O*&yS=K1-)zukW)R*?76f-e4#(Yl51QeyEf1>@% zw{rx&n(Y=dP@f?b(}Xm)wu0RF*gd)o+(`5o7`AP@g`kMXO5K8z5UGTUiVCEzsFS*3 zh%P4P47J`qQOpwoo#_Qq@sJQLOt%Gi=j*Ek@_1sRsr5^8JfMfXIJ12FK?(l?zj>pc2BF(CmJmwG1JY&fjYPJtmaV&MK2X|#C6K`B5?f`Q@- z-Yhh9va=a2U;)XD;{M_>M1b|{*H3I|7RHu!!>lJX%JDJ*A#RmHZmwr&2-Tny+(>cd zhDEnOzkmN8hYI#N&h+N%j$0vP#Y#69M5xjQ=@MZ~LnH9V)LM$f5HsQ{13Ao!1- zxf}C^%Fpo&2uPxpges>X@%_lY|5IrV*4TWiCT+TS(+22EF>o>*at|El$pN6B;@rU; z43I;uL1A19S<|yxhWsd#+4bWU=#8E)+Evi)!;g8-lOFE1s{3W#LJZ`m28Fg>PNV(( zK@ja6mnP4u+fB|QnuB;zrU0QJNVKszstFVu81e!p@i;p=nwgtVD|;FUVn0SiL~IWA z16v1^Y~o(-%L~3Vp3`?` zjRVkWBU1_p7BCNq4r&0wLXq8}&BepB zO$Orb4}Tu%{+z&<$P9?HVfdHXCw~q=tWH$J!^4!jRl{R{`ODM!$^lAg^jpjDE4g@W zFt;e`q)jY1xWSRkH~?W_O1WVJHsWVIpiiFw^B)ut$jr@cMut#T<;{TChg;jbXU`Jal3*L* za0W~bbysIoQ=romHTS;J(Pc1U18TO1uGVj^(Q7zR^w{0~jG zdc(JGbL14ZN<#(Z!lsLz`$oyhJH!H=n$IG5TsOK)bim++!63RjUxB7Wn2YT&996x` z{$a%>M`!ZKq4jNd_o>5&l_Ow=JO$YB%$uYCTX|<5Rpb8t{p}zjNs*!`G7~DHs8o~^ z8B&H6qGSj~iUvoe0cA`w7o`vq5haNd35Ak62@MX3(xiG`cbxC=`wqXgp1+>8p8dyJ z9JTko@8R>guJ?4Qt9O-(a@5an+VTxykxPZ+Fg9L!_OZW7A#4Dp1KT8V=!`a8550j> zq_VtxB(vjE%9S%z#f~MfDG6NB)lyux&%#O>{Uv24N5}SESefD+m#x*it?BnJQXZkc zq9`He5fv~_+r!6?w@?y)rnG=5qEH8OMw~swho`srmfQZclA7t!!cmCYhbG2uCcJ$k zZ8eWE&BAf|>Wddul_L>;3B(^!F>oV@uePes6n?{diITedihDB)8!)MYo!xL;6>UTx2>$|Z8Y zSkCU;nL76+4vMqCpZz zBF^s4x?7<3>#s`t!yYOA#LXmZb4^BV{Q3Vyjqxd#R)#+(#O9o+P6UoO`0H<(k9yXv z)1BDM?&ZfqwNugtrp@1dZ%3vrmtR=vDre-Q}IuhNzr-Lbi>uZpMKr|?Y;S$X-%%aTthdX7KWTR&-h-uSH8F-KM} z@^PmnKe^;;+_J0Bcyn) zw8L(^+w*hX7E|k|ocQZoK56zy{dHpWyzZyrv|57Hc1Y!o`a3KA_9Hm!#?PvED&B&R93mX}e65a_Q;FNo?x5_im)_@A1^2}xH+3xQ_N z&Ol$^YHHRvI4Jqdv$RA6x_-jZqKR#@k2~ks*~dKE>ghb35D<7{Xbe^ebn%S6ShIHR z$wd~3s=2g*=K)0O1T3p_y-M1E&rT~}8eZ{Izo%yR_FBPgusFF5-K`Y23x&rMA|1;2r9hvkQMqUU8(d*6TG zzHKd=$B~Lu4AXkmc?!W$KYO#sZ0rrbf9uY+=;?`4a#B;LTrvaf2419i^z>{&tUB~V ziD!LDCk9~vRF_v)O6x;1A;DS?F@aLCE~j&?EV(1HK^Dv_vA2H!UHz|JT0qg6G=2Cv z7D7_Qky8mGx(O3r6&IgK-EyK6(Z`|;>5Gq4K39EGsAEGV(uiUub=Og;w`eL>uMVe< zpF8(?e)5dL&XDLnArv4%Zi#p&btu%(%+iX918^Q!x1Z_m-UtelFHCKENS_BC)V-A#ex!5MOh*G0ydl0i<}u@XzQ$5c=(q>?@$A6t&J2;-o1h9 z&MS&u-)lxvFk|rkBP&PQvAe>)oVFZSQoY4TN;7=ahZ0&wab=L0-+iG1x!(kudwjX4 zUs}mF8h{`~yH_1%Bg%iNHQUk>zb7nxP(VNfW1cpDE_D0wNS=EFoB$DPV5_9Cg4alf z-FVwllx>9$nV^rJM>DeR(Xyj}SXPYnjghV~XAbX-cwtBZC^d5A=+R}Zp;a{P`scO* z7_p%o?o_D`T8iKT!jqOo0MP5S|66xdr!A~YMa#*It3 za%Eo|C;=NhfBwql%PH>V#%@Jbk@Mn3@}`e4o1sX6@@DDl|0Xo(3(Bp)KYkwsUI|Ml z^G(lV@UUSOw06Rd3f*IFE~RV-90M)Ns9_RSgjo5jX#}>wy+ZlXe39$GGnt#KGND<( zHcwW~t~>ki-n_6n^$jdiu!J*b76$0Gv4sCj6Aq{j9y(;mb>tDy?`sjf(;g^D4uQM6 z*>P_9v>622pYrM?rJJ4XQkJc#Z;XKj>^xf9)wPc9o%;v$WZ|B&Nk~fS zF!ccy51bK_;u|;I92|yvJ>C+?+w-yw$mzIc6&LB$%I|a8$9?7}Mu}hRQ83l;6vRIM zx9FFw798|s84W&E&sBLt=rvF&sC-CPD2Kd?qC$;`lX7Ip~J35%t598508`0n*>(HyIon;J?ny{{w24~1YHEw0CtmY z70(8xeg(&7z|XF}<7fWpwVa1rW863x(vG24tx@q>jd4*S2m~y2U-oiFTCORz(`vxg^05;_D zM6YBW;OckBWHrfcBRmD2DtZr~haBx8FRek^a5+tAz5l%BL#U1C28F7TKlf3kKcw~T z-tKE+ANct3WB&>3g& zXTQaDS!87;B;=^6si~^21MHxP$D72l|LLp#v7L=aZQi|VnN=DE)WfPZ=jT?$T(}@e zF;l%1cWG^?cymFhJ#X%ucIMB-x2l`SnQDtuNs1R^yA&dET+TqHfBZP&=5Dw}UUteR z=%?B$u>AcPJ3))nU&(GW61#mfXmRObv-|uJs4aZ3gk9pqUcw(vwx&HkC;UOACK?~r);4E&r#RLB zRqn=RK8pFPJ{FZ`&Ul+0vbpWYMIPHN=M-*k(NOj|wk6D?w6ge&f~2yKqQ{c#S3YN> zMyG!%>ip|FOHAr_YW~Nv_0ij!Q(mpP``%>G&}!uk{S%jz9qN2gjzZb0Xl`^%(l+tx zG?@)b2Bsd>T^r(j9kqiZ(>zVuD&`{jONO&+bM1$$a+3DwVBODn+2_{@1}-_gVrGT&EV6A+#O~&>jYTKiA8iPCu5HYN`hQ;GvdXCDIXN#fXbYT|cgt_i{3zLc zY~YJoavakGdu7&D84F{EMc$qr0-8h?vuBS5)n8@wbQp`W*#6W{b+h^jJ5;&)uRh66 zB<&jVJ8K(M&`*v>$<$6~1JPL630MY%nNV2mgpdY#d7=X{oI;Kqsr;UD3+K*{A53q{ zDJU>Wm~L!Lj*+RE+1lb(>O)YC0{bvZ!^|g7dJkW-ZXF9{J+7Y6QHx|5k4A1&L|MT{ z=FRk!kx45?BCepmau-2Ud=N#HZ=Y7C6M=W<0O|uU^YhVn|0lUK$Op_7Wj2a_H+Ofc z4k>*mDN&MAzAto<<}3bjTjh1YR)iNMRbT-OO20zz==$}x@%f0>*}`Q^G%!4tR=1iR z(I7s9NRW+Sht}HbfR#w2V7cwkmF6}jr4>@suY3Q`<>i9CL(t@5PWl-2((+$;n=ceK zEWJ$XYeaU;=3#l>N;xr$5H5;A;!Lk#aRQIz{iOA2g#QPdGeF;%l&UFHQi)KYDB|=O z9;|O@7&d(P_l5xBDbE;_zeMuu|3zv1bP7-h6e>%;@55Fk^a3ungF!ycz+W5}087%w zbsj_iD&K!Jd|RWbJ!ul*PS;MJR8mvBf}e-xD~9^eh9w=;gt`bRj|asy^$^`yzu?|I zdMsVJ@t7{zLE|h00JaC}R-#y?e)1WagF-md1Q0 zk}aQAxKR4D;$q|R@#z4YO?^j(rkKGsujRsKQodE~Xrj$Oa~WVE<*)m@{f)~2>;vs2 z_O|H31Ec6bSSydlTt0E;=M<-N2??7}u_F1Y-x<Ve52m9DKEv+L`@X%0G8F=0m(G{b{rRHno!gW^0%c3}TnNM)=697Tvcd%gG z+g6sAM$OAOUXf_ozLI;lA(?Y>LUi2>Le8CIa^O#baU9#S1PxOTgX5vdPTm9|}8Q9Po!I=-@r((&O#leS(I=T zHO~!C=4AN#`E@z8OHq41f_bC6Ptq7TVW#m8FyF5>o2Lvu9npb=&0Xdj0a{fS@3Ng9F*}Z8vkz1!jde7OMt7 zkU1JUU?E4jd9!Bik!cYOX=QnulXbktzs~8|cW_y_jBpkJ=Gc|;n1SN5_6kLOis$W} zJ9j20x3CD}GtlOWgEmlXhRuwcbxf-km9uXqgp?p$>c$K;(w$`xf zvNAI9h_-2n@K(1%cMtI48sHKTm=)Sd-bpZLK6x5>SkN@-QV|=42#**nzpYAOl3}5c78Vkc>gYPAyFOO>04@&odX{t;(O+P1qpJy1RCbRNJpi7VVIQ)FTv4GYPBJ z>MQ?ZDy&jhy!l6-BE7;6v;Zm+5y=_N1taH^cpMQ?N^IjjiHOl`db3-Va&$M`SS-17 zV9R6);j^kH%O4+ZAYNTt+l?*`%^_VJO#kj_Sx=rYcaza-bg#Wr`u}%2;#PCaC&i5f z449Z`!H2U}A?lnnXCGm7U=m608*gVZ%8s4_aC*np_7zs!w%t1D_GDzeP(Xk24Ec1= z+Qz08b23dLi#yUU1<3~)84fI4aCT4((wksRYcZ7&SH#NmuB>dtuwjsq2-lhVX8q_R z*opekq2+LljK6gvBX*o)E|b8>Lb>U(^7cBc6f*Z`nOj&`CvS60Stz~pL23Gf2Tu7{ z-k0r&`X!mseHN^C?c&lgax6}~j*Yu?{W>GJWuiu~5RnYNVIiCA)ifOddV(#7%hI-4eItTEB*i@e4nkU3)cE%tJmnO`4@Aru0{L^ABaB@TElZ z-anycJ+T}}*Fl0h5$=MA5Fw_H&bqAIJ^e0^t!=C~N|D?1B5v#RODgY6(?6JY2)~^@ zDt7C0r5Ym(8(*6(5zJU4IJRpQ{`_Mhkli^ zmz;~-v>we3;rG>ACeA4v@v3u*e9G=WD!OXQOfu}z?DpvG$KMV1G)5Y_3}{r8RoJb6 zxwA!r)AZ&A`@W7HXV`E#p>={`%C`YJ_def}s@4pY%N)=m?pdweK7scBi(QmKz zP#BeaJt6+m35{#}EE5JKPZ^LbsUJ01L!zj`T1Knmsl92zCmQ#@YKgk&?{qcCNa?Np z?44C|Q6Vn%Q(WtHTm563Ui#KP zuHeY)SECeK`uqJBapRiro4czb$9?T1Zjs^j*xdGN?$xg`tA1~un-b^mv-vcQ(vIr&Px zO}6_aZ)i3``nhrASJ*WepAnziZXz|&Ur)q;>A3zs_z}5hlA_47$>&zhvb9~jcrob8 zs8}71xdBphistKV%UYkh^631TO&%39v{C}IBj+ZKzqPi@<9Vkw)BLK|EOqWBw=vSiPXOSog*HE3T50FI}&DB47o2A86W7B2l!jZ_Ku-siqZyP>qx} zfFH+946R-h5D+&{s9{7Q%0lZ&nuA;a(Ik0=_FQgmY1xW<0cFk}FXv@i3kIYc3qRPX z`-nGHwMo8(=x~)09wPAaQPCOuH@7j5&5F`>?P9K6p>DM)n2il)N2+!wrG#~FQh}>L@o#) zQG86GkM1;?9tqI?@63_0@dP1@=+kSo?7rWbpD_zkfFc2rH021}-)O5?8m81v{bz)0 zKBgIK-axTI7fm6mhF+d-=z5%BFF^f6IuV(!^Di!5WoxVSL4K3H&9DN&uX14#l{ed6 z4ZSPF0e&U1;?OJaw1#-42U3qbEd-u*phzJINqviGW8K$PISFKX^0Y>Z(3VsPxw5h4d zCTa6p=y9~ZSjB|oWm6NAd!0{eH+}B}hyLo?+Hm{}9H#P5VuXevFeQsGVzf)Hq_k9> z{1f1BZevL57kPQNw>4Y|&K9oMAI*ajVo#wZ-%7QChLbWnYqTJ|g_}lqhni32Qx3>+ zUf#i!1r`>)C^|~+cI2MG8USXy>XL0LO%S3vl{Zs=iqK*fGPa&Fo;Xodq?Pb&_)_oPzyG#U z*pu}_r8aY0&rr^!q!pou#RjV}JarWo2;hKJW?JS+x;W}r5_ z91m5ulrI$i_`>q@-G~&T6sOHE9X&szWc_g+4@Z1AL+XmJrxz65#6|Dmd++zw@qGrN`!_y+Da*0aCGom?eT!3@u zHZi!?z+=A}T5~hA3mymm*3r7Z(?iBM(C5IxgVm;-A9VOaJBxeH$;qZoplV=oFf`P8 zg2E|bf`g1cH*u-nlGdhz^#hYvR9nQ(| zph2apUuT)Yi&H!&6V3rg;}i1X!%BVs9N(pm`(vc1{O}NxGYkwmnlk@^hyP`Q%%8(> zMf3WUMWjxLs$%;0cTStH1^qMQa$9P&aP_oWsW;&{{wS}Vh^pPqcG*CyUUbW(T!-CnN$PTt1H-}E}q z6nlOD2Cso=K9FU7#u`#hHl7i#fshib(g^D37Ey`SM3wd@enoYy{-DlRs8T_ zu+;#UcF9iAKyc8txd$RYBh4p|YhlW5;e#`}RE&Hw{-8{bly5irk+lcz(fW2?0Oh@jcAkV&g_NWuGJa zc&28EyV1RoX>8lKU$Eu8c^Y_@w2rC19m@OjnfCUuMJ0smbGz%ld z_qsZph7zrFLQf%5GT`XQ!vk>x))MYVdZ{0OC#?3EY=->#$yc$J@aJevh+TjeP6A3@ znGOE@4h$nJOhbLW`Jj~67DgMmZrmun=DwZUkQs;9vr*tpGctOB@t4iEoO>pB(iYjE zgM0Uul$TF%$T{hHe4Z-~cm!-O(}F)Yj?>~Hof%cvJfoLp_?vV)v-aDtKh6)yPJ<6nadO( z?>uMDGnRd{p>Rk0_wB<#CS;Xl3mfCPoV?s01jm`()@gY<`r8ic&dRI1>h5-)CAH=| zv`}+0ecg}Ya2k$v^>;q<@8=$myKJi7;=Wpa#z#6E?Ecx^M%CJs9h%+rHQ2%7>@4xN zPRiRgWZ_$E`~OXF=LyvoOW51k;lpEqRxzqGsSJ5cR@Q20ME%|Y>JVybYSAz0*V)y% zcu005OrqV?5MZq9AHX@fd9#tgCN3y)a+op0^`BU3LMb28V#Mfxc?}Pf>bj%x-;1lAW(5Kh&KD0m=PQ}iD zKIdiKhP!&xFs4v(&Zh6Zx+d~)IowhO=Qof~=PjIWOmd-~=VqPZ*D-cevv+WsK# zJ@Iq1J{!&2wHJP_Frss-)~xCIMij6$Wj!hWn4V4Bh#FDIU=)Zw5)vm`G(rZW=WhDy zj~5+!{jT7YcPcIlec;>f4B;n4jrxS3Zoy4IU0Y#`6N+LXl{Y(E6>D3;^;XfM)|R>yU{@aUOH?+Jeg6nds)1AB^}TuuwG|IE<#c0x;+T`5Z#@L7 z<=AH@ExC#;w-V~*7WLc3JR%TNvmeJVEIcZ(OaGQ4gN+)%qRqEq)x)V+rQT=vv^Ra8 zu&60^z4auzzXRI^f~_KZ#(D{F`o}T(KJyQ@fh8gB4i6U)B;FVdf;5jCHyVJdfsbI% zpzs&o*mwrZ`qdr3<$vLqrrX0@39%-?y&Ta?e+K+E;65L{`|9X zak|1vAHR~syO=I%67CNH!0-iGo15#2j~ze0uDY5LEFWGji<}ZFWW8Bh)}d(bIZ_A@ z!-)Fz-E=p%8z@6!V?QEcfB&u}Ii{$@|8!MBf@zl$1FyR)<5LVJUv5~!X#ENpr*^`tLqvRiU<5%MYOJE{Asd`mznuLd*_F<3dC+&jBN(Ze<(w z{E*ll4)I?po$;EQCgaDHmodtQ0|#`Di-<5dNF4J*E32Npdw<(9>FdQ)zS}!3W6DL~ zzJ1TYD9@Rp0b?CR20bJf57Cyj2g^&voGF@2twcgEyuLV^(BgI4$Zr$wB~eU#jU zC+DM@g1ResxzgU=tG;9>7=Ets)pzdfq^yGL+C}FybgqknWElbo3i=-tEqtV4p7S1DH&Hck4FJX%&?5#srxWv`Awt5jy4ZDw$m?R|={J?b9EXIn-F7^XBUqgw?3NE)Y zXKtGuOF#Y@*BAttlN+#C%=%<>bRvK;66vP;dT|S$M4l|Tqg4IMBb@pD{ylud=XSbD znw;d$w3KBgr(~tD_39gR(UaPrc~ooD@Yv#6QK}#&$;Xv zD<^TEK3yrdg+9P1guOOU&IthFW?MPSzHY0WFx{e^|8mYckD;=UFoHHBSZ6?0AK{)F z?5H!t(XkTA5b*z>dRP5?4Y`%B?xFZE-j}0l7outy|l0G~}wO zF#!#4Jx`qwZ_q4fDxofQ0@E`?xp+>lFHQ5CzJKtB2}j?(vvU+5D<|hx(LzJaFAe1!gH&8uU+ z=*<8F7jB!?!&eXS$;iy~sCjCHNhc#KD}0ihTI!e`{nUt?K_7%134M^Gg98jd>N4^f zBuSC1vAS!#LsbxmvamR46o8C_gjBd_^#}_G>M3XM#&-?tZM$^Srg6-TyCHj09lw$g zZbtF{hdCCYBpOm4$VZPKPnBr9H^*I=i=6OM$ili(zWC`*(th&Bfk&e62-Lia4J-SDbp!|89($9&f|BIJY%y*iZfgw)I)pE_QmBXGprHq2_@p+jfMIGL=`X<#?^ z`D@nv$%mpzB&}oU;e@$beYwutfkUveeGrP&!9nQ{Gcwk@xF~2igt)Zd;h7C!gMyOF znmf`?UwmhpKuluo1P#(F8rOCSkSY;RwDrp$?Go}|yx2^5^x*xgR;-xHjbB#Cz>f(N z63?ICRj`dDc0|7{spl3A8}saBrwum>U%j#-`GC96_0`Gv9#e}+yf60c-299wV}eUl zMg~xd2Dq`xt2KOD1d6_P-5tfAlJ<$bfu@@Eg^ic}Z|2bn)$A(V|Cr(r-C8Z|U56RK zfjtY$xz)%QY;i(jdKGI8Bi2rSK7VJ-j^k>CeQ-N-iUB^}5ZOc6JAWb6F?YAD+^U=v zFh1ia-%az*D=cJ}%VdfBTk*K7 zalC`KYa{fGkkv(tzVSvO`aLAguL=sjuAcgHEgLtF(Rfrt6W6>NgEtJpa@V|Nho{sD zk%Wi^fEVC^*%XnGpa**=?65Cyv77~sP8J5=g@*RF*hhyaE&Xh1DZL#e;|Uiv7T2~4 zAVc)%eXm6tknJmHw%H+ugWHrw--!5u|P!~CxD z#@@-A3}q&`Jar#ra=+jx))VE;iq4@(bf|Z>UuomEn^k*C&Tn-QCdwfA^7obBdRIyHb z#6?Pyi-j*Xn={ndd0uCl^6^;RVrgP-(oCpziW#=Fc0u2b3KTuF`16V7%H9s%6N zqm73ZZ4DZEab@(|Tv`l4k?bQVU2(UXJK^_4WlxU)EiTUh7Lv`$CKx!~pmL@J`!2W6 zHE>x(K^huI)mA(N(JDy4F2?673r7}!yKpnhm5&s|z548y?cZM)Pz>4jE&--{pG8Zu zL#&%S)5q|l5vo?d73OWxf(44Yo|ANSi(kD;yn3|;xgWyPiKs}6@0nX!IjmT*3I7r2 zAS*Zbz_bW%+Si0fjKp77)Jx=D>2kmHqUXYVQgOGHXY*nXGS4h{uWa5lIr{3eQt;2H7^e_oO6&~keRW_|-hicf-qbvrQ$%upr#@$j8{jSUUOI(14} z9FvsP^vyCsosBgh0~HTP^D)VSk?4pm%o4CydyQ|)hhz#!799pZub*)kxEw-oxC|? z@VDw}z|~3po`kAhs9&6PdU#N}bWr-+>UyO7D$-br10Iy3H|9#>{<1OA74__c=uvNz z*0F5IgrdU2N7>m&W}HpwD`avubXlVrPQF0D;4*B~TzS^$DzWz7tq=ORiu81VK>{*B zTtcgXkmK_eB*gM0`F6IKt)M}iZyZ%SEu{UtP1p~#(r&wu3lRG9W3Rm!EfU&`?u?&} z&8UH3)eFxnSQTl(5A-&0zJU_zP;r%)rzcZMXKRx_X{L27`PMB360)~$ohw#VRh3q9 z$sgZ;47YP~03Mu-PT`MbyZ9a$rL=q6asa(en`%*vgMQz?@Ar6RmVqB#GJAHode{s~ zxOLYTU3fu>&sq}vU_aRV7sZ}@nd4g~9jE_qY4J>Yb9}~N%)+R?S$o{KZv7^6=hJ42 zHI}jX{B5wWjg6HXZ`>{U!Vj}Bd8&!-^dHf%xU!Q-uN}j}B|7|e%G|-uPDNM!?3{DN zE8x?RO>R$iityx%0!3UXk!a{|f0jz}U&<5g>F94(t(ty&0d50g<_?-UiB9I2b9$yK z?KM`B?z}VYROLvNBV6p5ne9YgyFvv6t@nUCTn1o7ap6b#tKL6<0AK4L{_#uq-1$NJ Vxfcee@P9?qr_MCIsBioG{{e)U&=mjx literal 0 HcmV?d00001 diff --git a/doc/doxygen/chapters/images/tasks_size_overhead.png b/doc/doxygen/chapters/images/tasks_size_overhead.png new file mode 100644 index 0000000000000000000000000000000000000000..71495892538f34b94c5fd6a79a481040a9f55f97 GIT binary patch literal 11755 zcmYki1z42N6FC0f(cK{3q9CF4Q70{u(j_GzAR$QCi-1U@h;&OgNY`P|EuDwb-EqYI z?(=>Azu)tJp0{>pc6N4lcXnpqiPY9qAt7WS1OR~KnX2Lo0Dyu3072qoVIWYlM^l)B zhPJw%5*m%hKyq?&{(B(+07N4IG_b#qfCIg~z*C16G=dEP;Rp~dWC=zC02~C+AUYU5 zQNYWKsR~DqV(Kw4FbE3^YiMZL+1UjL2dAc{*45P^5QxFS!L_wDOcRGso+RuiRLra& z>K`J}=>5g_=!UMU{eAQ_fN2eQg?R>ZL~{s48~(2xO;7KJ5d{E-B8GQ>|3pLs!((Xb zM0BwBAqb+C02JuH_0{iiJ*K_Usp$jJ+1Wx4$c8Qe_TTS+0sbuJEGB5f^_>JkY5=6B z2H}_s1T7JuB^vxsemFV@b6KKu+|URF8q*&%VyH3e5Pg`_Fw}+a8bVYDpy4@awvLXD z?Cfk-R#rJVxz5hc<>h4y3kx92nUKj_$hC7KmcSrjHHDS1X?*DzYj)l5un0qt!-Hx?L7xOtXWD5tcik(p_Qg8S8 z#htJcT6qUI2oP=i`6bNn;t*5>UDtS8X;1&jd~sn}e?YQhIZ-95gZB~0Gb!OY6@-3t zACCuEe*hiraOB=HsVgO7rD zmNd%MvO*o!&$7IaNVcJlW;ot4jf{*6xwo@__I!D~-*CNDk%A=bj3xS`eKc8UCFJ*tq=n^rsa=jJnnn4sd!*7I<|V zEW@bI;-5AG0xy-E0ma-fa=4x@J6K%kNW{i?p>G`49&plBN6U^8fzVQGkU%Eugi-AN zv8BSVM*im?=AuoYmu%E<$=sfA*4AF8k!p3ynI`-Yrt71Ug987L5UUz&PZCjVw+}A3 z+Ka<3ZCzqW&k0ia_NF>;<+G3|?II$6M(d2#5z|?$P{s^;FWvp2@*6#BlH|z3qS(Iu zTlc0NwtB$ih-)cmM=f&y)F*D$mossgG-fmWCR1SuXp3e-pl7K37L+N%Qh*Nk`jwiT z2A%7;*P^+2TDV`Bg(F?=oq{|%{(z^-sl-s0oAqGs_Q7*9QyLt3h?gi`e<*o9a*SCwor9(e&*2?$%jL=>d_lg68 zPgllNTEG16Z5bVyIr|C~rz}-nbyi-tt(P6baKXgQ*ELyHipd0AV^}PzKTg`^2FaMP z;3nu7#v_K5B_NrsuE?V|So_bRZFKq7QrMuo!Nfx$YM%A}_u)E+l_JE%aAnqtC^87! zHZkScL3B%dGc*g+XHyAPMsiPRKkr^?(PmZ>>PlmGG^R}c%l7cLe+Q4k~gAdOSafkfc9XaT}!R{zelV3S4N3jXQNpAFGNtxFxkubc5g^d z+OQybnD+06y^6TksX4#Hfc+P)U!4zw@bco^fy98-XBr#!1z&5KzlrS)bpEv0*y%FWEGN zO89qP$!Q|3AQ?sj@i=VKG6QM@@_X`+P|SL#^Yr?MD>6A9ki;T;V+Z5nz~0Cp^j;}R z)A*%^OkPNleJTc+`5?g;=Akm{#EMfTdPlt&%pvpq!CMQV?M4*wspcEbBdumx9OM(I zH-NkyPNY}#`)U@&;-+FfcF?09NPBlc42;_ge{B9J0v_QR2#(x6O?oCxZbK@&lzlNk zEc@!xKLAMe!E1oJMQ!ABfSsR-WoH7DT?eqD4llR>*>VD_`%XqPbkIxTSp`?v*_EO* zyfA$WVvfELG^81bvyNKuzOrwB7lCtVb=q>u!7u75hkty|r?lUC01Ih4c`qiQXkdfuMslI3jBVe*S}lx`cLSC+NyUZU(?>;PO2Rj;^qIFRjiPr0-q2XI2b$D z((CbErH&}CKb&>|^kxlt`JZcug`%2IXAYRIJN9;y>-^w_S;gPW30SG7HMRe4mCS>B z7di#DC4F|^%?ML0fUfJ35I;eXIPRCI8x$$hpHh*WYc}Q3a|!-cIiZWVhE5HmAuJlHOkP= zJLlxmYi<%GC1$*GkXl8yv0MMy$ae;qN}JSZK=@-XZ#H`XM~ z>F*eKFm8-Vr6BKYAyn@4((rfz==djyZ@A>>kOZ30gtLLg!zrE*$ZvTu8wl!i$%y9R z3J+@rBV3l>fJ{T=3FI6{;J`8oACy0O@G0fQTk zQu?1+l^cbp2Ol&uKlr(Wgz7=++tX|1`r=IL{~txe9hJ{(}x*LZtqVYqrZCpX9eIH9x zJs&Y2emkJEhb2)Xe_?+ASW1&DXOJkPycrrQzcAIW5g4b$Dqn=By{06$A%^7iGo^$! zy?Ank7?f2lyocjTBufT%mm9rPC(TEaC?%4un@9wl;1yG0eMBWKouGdr7cbLS(3p)F z2Q!D&c}XlKL-3KmPq%V-hQE3G0kQNHLNr#KU0NB>hyXWz<~AN6*0{#+J>ZgS1!hYm zPH93WPEZvAmi1XQJ5R|G{ggJHxp+aZmRIU^fysp5u?vMu8KXIaO^^AeW^8E8jQ3{_ z=~C~66v~no`UOXH|OKMNx4=MBRf7U zgf-O3yiW3c0w5sfG8{#70ltd7ir$t{>2!6S%Eh^JT?R^hZ_C-f4f3VdSt~&lVrSx*!V`x(W<3K$<&R-^B*$xZ>u5^iF9cr~x-Sd|#JJ78FcvBMIEn-}( zj+3`c^3Lo3;-B4ep^`BA{96)EC40Y85i=`k$Eqg!Akn11~p7(*-$xbVL#a18hBcEYJYP^D>!BaX)8}t>{P=?u1#{G^jq-3cHuF$ ze(mT@!z~6tw$Hi63gZlFz^p>xDC>9kQVQP9 z%4m9-6VN*cUd&J%Z}bc$wj9*P?ohqY+yA{b%MFga-9|lTz_$1(=Y81tpXNyb2gXB| zgRo|Da2V;DzG=A40oZLmx$(7e#dM52XU|s#sN$r(ZLx73QVRuMYCb{*rrWsg9k!B# zm%kiK8f;uEF$=z@;P1(Me|KLXbF*=5CcO+ZR&PuG^XQ%ew-XzMV-ceQ5b7o&@z>|9 zg5t90MmTs+v`m#*LzH|t99;^FB7YL%A zla13m9rzhP^_yaj9i&Y51I8VtA;^sz?70i`hPa>tFZ`Te1q@(0Z9LZH*6JH%_o1+H~7FD|NZ2fEk436@G^yxq^gRw^X{ZzWEE!Kpkq0&I3YA|fwy zvYGRafiI5Glpz%@RL{RLmGv^~rfP@Gop(RYnn^YlJ7ZsDvx^>qKQ8Dc` zdGskNXe~g&{|%G_+|Xw`dKY|I4>=U54}be*R8~b+x*j-u=E?eO>jaAe!E`lZy7;Fz zqUeLZn?+h)1YpfH?RkwfVqu8Ckco6f|*a16Dc`%)KVX_aR zcU6%u#zF-{#y{S`lAt-FT+QiM@oxSA9CMYQZ-CfCYdHOyZ;T`2=xz(cna5i7Ur*&G zN+L+5^1RD+6+53fcGJ+o zpA7S9+oF70_MIJ^p=ei~{uq=bhD|P&NB5BYHpn#DLBW{-Q&aF0-EB~bNwTA`Goj_G zRX9ydw%*IrXaHq`uKqFEJ;FnA*D`i+;qM3)Czbz8#%E@A6NZVqJmW>)Q=KLu>|~JECtt%->$mAkyW@Y)U{Q*20hLsr9jSfGBYL%z|l&9IoZ7bC@_TD#NKG zwAC63nPbchr^89GV;wcfnLfDq9du92mw8P0B4z9SHKd3qw`_Fo!fv9A4r?^Sb5l^{ z9Mk>&p^tWPBqq)FTJG%vpyqbnZs1XO5Cz;Km3fS8t|R)LqSnd9G|s4M;N8E-A|{bj z&|wXSJ#4VvjqbT}O1=ewW$qExrVYv6oo)viM4d13Qv?d@wO2q0+qdrok8m* zGN&QHT@9ki?7YMl%2iW% zD{XV=sPJ|_wlic<7h8xuz-))`>4kZkvrH@9`UD`rK@Ks?9k~`oZw$d!aapEJu}5j4 zP4b9x1$@g8as9NC4X{WP-|ml{vZQ zS|HO68}dCI5YT2j(u==+FF+0f$E^>j3S^7ixRJx(oROwdAsfQA=IYq;1&^x)@kV@Q>c zyOQmeynQ9uK?-(I5;G&!r1{f#=^-)rkikz<8!oeEt(s(Kkpzl>_;}u?;Xq{>1zZ?lcdM~SJ{J~nu*GQ20KWPiCx0SswkoOM39(BrOwbS5AT)yIC8iV zMk1H(lX0G*01CJ&rZAk*TJS$YLP*R*iSq^oDHI>`*~jc}N}MCEH26$klMQ+gV~j=p zqBm=rmHy+-E-8K|Q03p8QK6;I1~pRY+Vp-LvOk5N!;US)#e-5JOX0(=j*Hp6n7^%f z2mVh7s=TP!#Y0n$*WC?u1dwtL9#n-h3q&LgNW(Z)^`5Rq|5YCSBECoChj_vc?yM?z zrc1!FN2#H&`(uMzHC{gjNZ7$b=-2@*Ob`=H@H#Wy@^bULKHvN>MS-wINnDt_P+=g% z(@TxcL=7)(ve(ESl~?}}>vrn*N2=(OH8D`M&d=B28drJr;v+fe>qrBqU&-3@y3jB9 z+vT1*T3p){p6U0*0s^0-f}BX&f7g#<(L#GIWRTW31?zL_L=ZDB&-t~6EGQDb>rIc0Sz)Ib`&i?$Q?;{qY7D8Mw<*e%?;+d3Xo8aX!rb zf-Cf0?LLV3`jC$RU>|8pd~o@=5fYD$j7HyLxSTox<}HMPNE1YQAN{u%H)&A0gokWOVx%LCW6*P+?quNz`rfd{b#5F>vb%A4#!l zBLoFPx^9ywjY28b(Jk8K^7>YIXWWpR`W>n{N`w*X^ZriSHnNBrTfpG2!r`>1%`$TV zqw@WZ?<<1J6cf~;V|}&e%<$q25^1)w;F7pM6#U&wcegN9k_p7JTS)EA!&hrWfpc=8 z_zzKLs|T8jomKU^S(OtX0H{T4mMaCgvQ`0kV;k^EHuv!2;y37RJh$2}w)dUQ12PCu zK^~3D@!i-esB82?_UpfkbIrRm!4GY4Q!p!ecYbLui8Jom%}+w$=^JrF`R=G<{FbB}@&dz1k?{zh3g@M-gyt0`FuLC%%rj2JN! z;t9}RfeCIgo{xoH)`zp$eq*h+`bO{0DU07;L^g>eIn$Z{0^vX?Y9qeA{td*=7mn4t z%XRet%r%wC8IsT0!uf<^gb+IljtLq*qq!jorC0wP58X75ccFojn8X2v-1*B5o>jEe zIP-j|-)5gHJUi|P0qc>aIDp@xBVp!+vbxgtQE zssKb&e+-H*e%)oG^L8-mEvKKQ)L|nk7W%*hb_J6IH84k7xp_~YIZFAcK7g{#IL+vw zFAUeQMo5tYAC?;Q%T4ds8;6-L|0&snmJ!}Q2BOE0iKh6cSb&9}qw@{@mG5sWtj&*c zs3we9s;g;yUI4bIt+o0QIGGS7fgMt@2lQr)Vi4c6Uc6P9UcWd0ZBiREw<2xqk@FJrOAM)jWo&^V5 z#hD#jn=mSf0)aV;SEC;je9W9PF+%hXW56<#ebhS6mmcHKYUAg~t(6clL7Q^P;WT%S zcNvSEAR{5z_A6J>RQ zNY*nBFnzlPD7_X?I(=%#stWa20mLUgA>Z+W(W%VC>r1!X#c3@fp!;sDGHct^nK>J@ zsX~A_hrz#&A$WhE0^#gw3n(3SLBhP^0hh8J=z~x)Vbr0wRvUM5X$uQ@9+-^&u_f>; zFcfh4wgY*M-5Id?Ge)-UmJ>8}dK`j>t1W#PN)i}$f|$FLk) zHKhhjTWRe6uyf@`hhK%0C2cKwc+nD!Q2e*#2y$a{rr$}?_9 zry5@4c;Q-R66NUNg2jg)g1o=#2uDa5*g6s*Lr-Nyg(*yK&u#SoRlYh+HyyBrrW_jN zJ>lHuw;-HCw@fOQFWQOz{cP0J=tcQNUhygElt0~%*++%L(yjzZ5l>tx|6K0-+7X^~ATK^xDK_Y_0Fg6h0)XX=Bg)b!A31QnhE)l;EZ8>;7mP|G4p zEf4W`BlV_yQJ#M#A~K6z9~_3C%eN?f<#it|_N`A$SD}R{fdKXrqi6R%Q0|;jB7_nh zAjchL@;Rn5GG|;;IeF4DUqU!j(nMl$Pw;J%P%0qrOnkzBnNCo(-+58C&J5hcmv`sw zlODSgySwvQ5B#K2gk4%H`ts3ds0dCe-#7)51ILgKr_V_Y0N&q;wXu9BN8E#9Ut&Do zuYgfJ8Lp-H`J&yT5Sk;h=&_`;)VOixt4x&_Y%V1F4lE!=r)y|33Hhp_uy_P*&(XQ; znooDT4?;sK4{bDmP&KeiEUX5My`Dynqp zX}E@K>b&rtBvN<#!L$SobfLX= zZH8SvCBW{2b`M!|S5#(i5`R~3aV9K>o+-lIa@5F2aQQd(J{_F)Zo941N`fc^UW_GH zl@XPh@>JdaACu%-mS@C21g@1SXd|7Cx9ZK@$6$AFSRqF{<>z?r-$DG|x_$TEgZqD> zSkJZaa`t`gx_Ss8)cvI9f6M67LT)OKJZG}sJ1f*aB3)tz1@A5j3Z*S)NPhYfyvd`k zBG1+l(i+44GA0-=d-oQQ^j2LS!9FH{4GTjqd9KpYjql00s-Mk5l>mTP;=f#gdJang zF1KJHs>@<-Kb_x5R1n%^*q7Fvfw+Yf+c9Q0dxVm4xfQ`EKxx!1)mQ{o02_Gyr82d% zfOsDlA&y25xuQV7tWOA%`s3BwGIMFRT1G&B{oxg<2#UnJl?|MCaORmFgJg`^boF@4 zQdqcc{+q+5Wsw?Kvo&M@h4-slAD3LLU}6lf!#} z*oa33H=!sqDA5=%)RY?`xB0$Z5LalWY+>7prlZ;BQ%kucwXib|?7+R`!rAh%D~{cl zQEd-kEu&R9_$QsqwZm1X>$P=Hb6BkMG@gJRAfZQLipbrfBuoALaeTqPO z`y*)D99D0>#pC10T;h*?>PA4EoVV6&@vmWn1%UkYxS&ZBL-Zc8NLpia*%z7Z+-ZH@f&<%|-W^=afRvJ&+tx8bIrHm^@MsYuGt$sd z&%E-d;*Ttb`YDZAP@gW#eEmHu7+=nphVaUl-DB>t-Y zqLy`S)?0=foZlI~-o_0K}!-!iI_At}{a z3mOZKOH*z%yFo*8sC1IZDMg=V6BC%fnlRA$eNp9Ik1)?Z_d(QrtyA600g_@sgw>t%P~2ObsT3ua`N_|kql@YOCbMlvNZK@gEWbwjj5LE zrRMT33*2|UfLQjPW9>K%TC5b*F(J2pn4ud@MRs#E35Cn&TNtm>6=)?Cm0EFCC*`?_ z^zjEQ!-|)&df$^V-|~*wLpLY8wBOV^57zS0u<(*kaBHpRos>7Jt6x%RV*z++XV&!> zzpP3UC$3b8f8W=){@Q;JYyHUuwDPNWF{y%}ciFGVcO708R|B!G5q}9BTX2En+{ULb zrsx3^%H`RJEwsER$^IKNMZLds(0%sssG`f4FT#C2LrNhF3rkfvmZSa8U8#^XW-dvh zjf{wi5y(VW1S}6JnZy&;9_+v*RS&n&N54>zGureZwu;$NEjO- z?`~nr(l@7W-4wKE49s=MC#>o|r@l^_kutO&Tq4Ay3sXrg!*pT=|_*e z(ZQr<5WB(6H#~XXtQ$rVilPT=KMI6AJmM4ysh7!J3L(j9WjIVj%;QYV`+hVn+U>UM z=Ak=D?i95=TQkP5T+ixJ-|^!)4IUw`Hbd!(H}h32`y%L^uKV)5iFq{)@Y-66bRL|B zhlJJKgN|eKSEKkY+uIqMjGK0m+i`ND@eeFT&(aCA5oSBxny`QOq%51w%5R+BF>&{) zIzRsBaVj;4Zae9MsGFCE8MwSuVMdhP%mv~sx@iC*PwcSnHQ-hbJD#qpviwrz$H}G} z42`dff#E`4_7vLe{K^HZw(&#CcFfEy-&O|moAT!t%8>?g5Q#^dU>Ou8e~S3+3?Y_% ze7h!cxBes;8)UuXkc+UGq$rJNeY?u~)>`(jN$@V!NuLT^eal>Wd)g&an;EOQ*oJ=9 z`kS>knMR}?emvt1U#m&WNd=1pQEcT+yw61;OxY*d*MG!ZvTTCrWnDFwRLF~EBP`Yk zMp>_*!S(T71tv_^@Xqy0flhA&s_f|E*eQ)_o+Glz`nzp z7G9QzelpX?cH{Vt+EeLiGJSEky8y5-igzLEVZd1_kT zd^7XA*SqoC+97NE)~`K`@q0{uU|3a~eU0Rw>Mttf0LculmttT>z~xF%NcNpp>Mx4* zDwjR3KBuMaeND>rg8q8~ij|YU*txyZjO}_&0zCg@uRD4G5g|#=qZ?I3zE3x^Wq-`3 z?XfAuO&GNgY=~jB6Wz}6J+%(zrdpHv>Eq;}&11r+{R70t(UqE2A*W)qb4ysm1@2@@ z6E4P8#}*gtwq*l!o2-v1o>f$KHX8iNUapy3G|c(@rRrGwc5O;!b8=@X0SzA#zeav}rF;Qtjec|cnob_?Z+@5+hVl{Db{@hL{`b|_*lR7^a6yE(*PybCS8v3_K zbvv~!p3Jf2b8}EbcgzHADN7qDGxAr(^6WoP(VEMji_A~kjCU?uq7syHYIX_3GWyMR zAQT*zG&n}|qXo{#e6UMj9Lqc9;;xsYIhzvV$Wuwx?fBlhpY@(`Io_84i|N%}d&e}+ zKv5?fEkRXgrxB^(3Y%Ed61&QO0`=Sa^|7tZ1ks`z%vjZVea$SD_AY+IV!X!bg4j6K zarM`k;&aiqKEnZGoz3IZ9gF`y1^=0ylPi26_iKeRcQQcFO=NjsH)>+ zT%#Efo6hhoUo^7WGuYCWnqAhTqpoOi9w>d<`zyZ9nqZ^s#qT+DueiPHH-7{4g?Sv2 zL+{BYq?{s~8Y^vH92)jyjFsr__!u8K8u{x9%on# zw%M1}q-m`$>2_ou^K4y6Es0w9%Nuk_>^AfWE2t8u_AZmg)$_KjDuYp9l$$n~>U(Gz z?^rXi{{OR2|6eXDukIsPfr2f~Z5}X3+x$Lw`>}2llXFN59)Dn`E;ag=y)#n87W-Mz z<>VwNrW&V@vJQJ0^cca$7zey#YDCd6T0)^@S)sbPd=P77=9?_fssSVchR=mCi q*T?5Gkuo;_Pv!sLIBb`r88~eK literal 0 HcmV?d00001 diff --git a/doc/doxygen/chapters/images/tasks_size_overhead_py_fut_pickle.png b/doc/doxygen/chapters/images/tasks_size_overhead_py_fut_pickle.png new file mode 100644 index 0000000000000000000000000000000000000000..0f299e22adab92091283efebff78bbb4805d6f3e GIT binary patch literal 101489 zcmc%wWmH!0*EN7{LK^9oluqexkdRL4l9EQcJET#%yFt1`Qd+vZySvWD-~SooJs;1f zGk^i?8}5C@yw;j?uC+rI)=>9hRc7hV2So+6|##%S)2`sLk`Y?d5NyceW5DA^yg{ z2i*;onmQKxBk^!ChoD!e!!zPL&n_LHR;HTKfU<7)rj^mVayYaY^T(}*h8Fuf8b1dh z^U!#DNLpTFEoV>$CF-TqRQ>ZW?NyeQ&uhEhu7{sS+Uf6OHx7n%ys!{}cQp3|Z(o95 zzMXtm;qtqjE&OAdFK@5nrj|$&n^ZRrPn0ltiAgTeLn-E^eEJgiNSG&KQcxjgZ9hOM zZ0R7wbK(7#o#g`?w4Ui%@4=@i(vL6Rqo(3|t($JZ>^Feqq0MbA4sbYz6uKW%m$z!H z_~`-g9G|j6C%w28#vvoa`9SUp!B9i0X2Lu8qJ4mh@4)5)a8@Bagi&XKTM`&@NQgg> zjry?EfVe&}7hJIrgBc*3iIobIw#wH4S?<5E%7_aS<%=rgmmmba?E41J?-wdwwlGUH zdhXjjp^vBpRYC7aIR6kJ_=#khQNs`T*k`3Fps66J3Og!bTp~KWZGv11fXNE}%f$xu zgzOoBmIysz@a8krbnx5zUPUzyV@2;5xb`zizX)RWtQ;`)s_G8x>7b(cs zz+mXZNFs^MI}_G?+;6xOL5<;f-7@;yzdn~Dsli$VUUdW+OjN#^l|Ga@q&P%%LVqBV z=%!fagidSS2`Jy5+rmd7# z<^gXPKp21^Lbrx*5PljYNnVUP9ID&>dELqI%^Btyj9u!juNZ~-kA_@SB^Yz2J+3{L zJ<>hoB$;WFg;@Xoz0KB5%5&9o<#TuD=@8Ph<<=B96zr+D zezN}L{i$Ks(h$U@&L`th;!=CdeWrNE4WAs46%Z_zEmkR3-7OnxP8(k=?3npc;i9Oo zP`wC)Eg)ZdWBj%ztp;YEeV&|9)O5`ClFJS4$@Cu3A(x6{i@!`5iZ6|CA))y~f`kkc z71Kd&kZKbCK|q>D(k_=Q?^?1}8d+-i)1cIXw1hN{biEYHC-0vI16+T)2M-hWe!7x{ z#ZJXv#SX{vlex<}sYNoH()1C!0!7^;4>TI|xbGPM$_x%s)<rL567CE zedaF3o=&9HnE1%5TCIE0hW#wt&q zc=5ezP7^tZr+n+M;|3zpQCTMW3gwfywq&ng&t6XgPoprDFz_(oS|Jw&F$TE?Ed_ZK zm=l~u6h|CaP*yl;w^}*Q?9cS_^v3i`gcB$Jm06T^O1Vt*U}0y)N~}vBNi0eVOrA=N zsM@oXcVd1B;+|g|@kqIIy+Fav$KJy(V0f;YtPQWXtV`AH(!^-YHy93x`>vX(nilsr z*_`mJ=cJ}tjwObPvdzVi+7w4;-I&w&oSx8))XkPE)?XO2wF|@F7#g%15{}T^sUAcg zAYhCAGeqOMlqInG%=)k;Vt#~9m=p|-n3<~YJdNb=y&=k&>BeYGY_zo7bEIgzZ%}I# z_%`8$(cswJbEi09>dt?|aXE5ze35l$ zd1-#_x?{1Uwf-Z|ElDnWK(5m?{u#JT~1o~`-dsAEB#{0Ra!F{|MBSp$GnLI!JcQN z;cy+ijil+Msic$Y$Lf(UpT1BT-`I>V*O*SIL>i6Arzob}rMTP2eRJN7v{|z(T0pQu zUYW0!b<}#U>pzlPkUu0j3|TqTN*C5OUtUOq;BCIZIsmpXH)$m zRwQOFx7Q7E14U>&RMlBk56el^qG7GA>mFiDy%GJZvn;zrsmxw)%TQHj%C=U=Q7gh@ zI6e8U3oSS}a+b)vOtp}q_{oLsde@()pefQV;{Mzy(I}}eo%5FS@~Qi|s7SG{vhZx; zEO(<5W0}e=Whgm0c`N0U=4yO+e9{k6s*`8Smv8DN))qPhIc2+;Tyb{$Jxp2c;i*pi zOW%S#^RG(p=aQW?-3?$TD~m)Y@g`^v5%%%YE**fS36$Tjl0Q``@TjrOMZsY6vPpIq$|Hf*~c^*Z()s6TLdCRl%7;_{wY-)_`8 z)Jm(DwQ;^oxvYONIoEpd*%Rs`tmf5m9zJnGDaUK!`oik4mHyOvaN~MogHl0A^zMwn z^#eW&tq1Cv{=GqG*Gv=(F$3SSKoAj~2ibG(W6|Tp{YtHu4Ms8oQ}{yEjo^jrwTqwN z*?ClrAU4%S#*d5yc^X-ESpwON47=3E2g9YX{sbB(923OnpSRW9aoZ);-gD1WObD2= zZP+ck_szEvLn#|J5i8e2t|R=O3Afe;ZKth64;aL=#PaD(<>|Nn5=Y>^+rSx5{G$c9J+FyTpuj?*#z zH|QFP_NWF7MOw5_C!+qh8?1mOj6QTUgg!AE-cl>^fAd0`Ac7)Y*PZOW7z2=i^Pf~9 z)I>sjKA5Cp_-K0&n2>+opl%)$zRvV@vUMb+0nIyqS$RLIK=i|47~L!i0H`A2_3n)L zv9?iYBAbqGqW|~4h?njq;jEi~nuwgnT*z32zIW7r|GN-+1?N1P^1t_+Hxxo}mgK;z zMkSj#=JtQAs1{RU{oKI%za>xfUA40be5b()DS6O6D5EnwaszAwW`h=A-YD!GZ!r6u zLjPSomO?_W0W6~hx@1PPgD{26oo>Wn;5~@;_;0J1nomue-Oqw9@tj~FX2}A$RtPZ& zY&!{+&S~@$e$I=A1JM)#lD0Q*QD8y}72weJj$M&pLa4Enyd=lCE5glj8nfdR;bPX0 z8|Ff=?O0Jx_OEM8E&mB5pJ#=z^`rT3?Ryo-iK&sd5%l^Of zw8M;raX@_id*6@1)p2^azVA0wHxyFnm)UD{dx5ul z1O~bb`h~2A7XHfF`5%USVG(bh10fh{_ZQqF{{sS^{x92;9XL{@01#}h|C=H|B2~d# z+Yo9#gWJGfFIXd;OrW<=z4GK`v!`;^VF0J%^xOErqpCO>5CN*cz8|MtWU z9$h5$K@)~l?2^H!J`i=Z!JV1bvH`?%4V$mI*>^n*JuoyW>Xm_Vm zP#@Hv=oj`sFWZo_^v(=Nmu{idP#q?c!|^{IvC$Qe8^f)3*o)4)7(%A@Tmcpc%sL#Y zdW@r_bBllIv_poLXDNss8V)PQ7ll(@eS=Chsm* zG*6UKE|CH=@%b9RCzB^3)+)v7&a-J(bnOp2e@k) z#W58og|LJaid-)i@Z6s*g{HZ@hM z(_?8iu!Qj;lhpMvZb9qdOV}aT$-DU?QUVjuAH8V6si3ChfMQK2UMlCTE>8H0H*VAJ z{Y&Tp>4D7BTqJPc4EV7KNT1|zgwHEejP2Siif`^JnBYUCURqdgBK?thpDnPaHGzy@ zW+%Fz8*Z$WNahrfq6g?rP-BGQ?Bwc`zL4s|(YP(g)Gtj{QoLPu{=v6mMjG&m17BnJ zIz;|Cod4r0#9fhE`hN#cGlYSf75D(c9p3BZvHeBP+iWSa*?`aO65&MfBR8#jH z@hcBXPW^2zD~Le%f`%E3r|#2WbzJ=U_(sG-aQvhVFZWEJS;{K^jlSyysgd>EgypBB zk6?mnr}n$Bk7isW>3+XZz2snMsr79iV;E~*S`4dXf|Z62KDwJ%Vi}BHiNgMtSPbT) z0DDgT12bD_>NvE`F-#8g0)B9CaJM$7+r2;r-uBZD>_PxcQG zN$0G|77onjlPaUh4op%BHKOXYj?;~%g|Be2U-`xmNglKiqcKGf*Y}$%6-%F=^(I7T zUQ2$m^+YZna(}nHxlERb+nz3|@_DqbD<%eOr>+(zL`xsqW{ryoj-<$hN##ZIuZFg{o1VOl?QNc8`o(5RoPcIDhG`6m?}sTk{UTiW z3|PhhjeXj7;_lwQTWDb`=e*qmV^poXEE{>HwU5JC3~UiXq*lN$k1F313gWFZlS-b} z(dnFjap0JwE|VToND4$14A5A@8jmUFA;>tur+%|O>iyP*l!~_dQPgQQ(#`DSfj+-| zY*6@EQA@w*mh5x(ITxU!*zqf?&c9d_AsgSI2tn9CFUEslXZdlE`T<6!@{REsur63J zB+E1Avf7hY)8-+q{RHVu>|efay*8qjq$>ia2;GqvnQt$)K=B(KJMTnZZ8f3J*34!W z3cJC+WrS1t-fKieB0~aLl$19zsOyQYZfBmv>pkrIBTGgz+V7PBw4ZTb{;2QuwG@TY zF6Bp*rmgZz{A7E4+`v4!OMxg2LQMyf-qW3oTcLM-wYu-C8!`7_y4~J)KOp4%iGdZ1 zr=hGaI}6YhS#xrdfLlTUM5Maw%WKogt@H1HR7j3B4TYvywW+zN=|yX9>A z_BWbk)kppPg-M?v4abpHJUf;iRxp2;lm>h4u{EHNOG}FqJ zUi?@Ekbel{v>ux8*YqbA$@^il1j|{mB2>-hwlIQ&prFkxSzCC>2-$uSX83pf)qC)r>@{v zc4}etP@3b)3`bbL4L`d;5lJ)x?S6m zu41eget+)0pCwA=9I9v$eU_i98obQtU!BS2|(j?KikI|Vp=XuBn^;qmDLJ_TG zGOAwyYD|?p@329t5H`G|ga$ByGPMhH%-U4!%4L90lb^a-% z!!@p_@9ttQpRgKBbHib@YOUp4X}(l?U&g{Of74CfcK`j!$U(V!>K9wY4X#u4WU>Wm zHxj@!JEJ!miqQYgf;sN+FQfbOD&6yFdCtpSq{vptxaB;OltpDu7VPd#e^J#G1(8zi z56lN+`}KVO{%C2;{QkjVv7ay`2_e~pbanrs>5{5N)r+Xa@8s>{?&scbC^8J$p}1zGna>z-7B?yi#%f{)>Iq^oqLGU2o2B%`wbzNvf}^zuO@s^U!4@NaNF%6K0-` zr#pLJys*sp)i&PC$-gPKm6+P6c+arV8*f6)9tn6%cG_<;WC-fn89yl>v@&cBKQ&Go zd1tO5I!ngm@beU!k7l6LWFi9}DC64c?awHLQOYylurp~kT0}7Q zR~PU@fV>jBJb|T)*M2D|QfX)uKt<$iAq1l!MXOKC!r zIgQKKoroYFRRu%sDVxZ~2V7(a7(dmIXLd4nK>qa+AE|)pPknIc+@9@?4q?WyFh~+h;Y^HuVXC@33xc<1iTFT)o zXep<(BKIKyDtw}80<2;(3VF6{=FYa8x4en#O1I_RY*Ow-0C=W*Q+|X@)elTmwB+9s zuV(!4r=m%mi8FmighCqVz6jlu@@3a)K;WIzfWtI+0 zHl>-#+xGFjp6XuJ?tKRgFb@l%pD;^rdh%n@z2Q7ZD1UJ62~J4i$b2h5e6I6d^|$j% zocH5o`xI;u~ie>u9F~S5vdr4PYDSsvec)eVwFICjEUQm zgzUY`#ft7kM5?hOWSm7cg8_3RYejZ>FF-%2IZDx)gcoBTK|s#upEyT9rVt6i-9jN_ zf%vm4e9{EdAy#v2J^xi;cm9pAspkWN>k<9IB+nM&+q{jhwDZ5`dO}gjWaXfp4s^mJ zyLrQ$UQsBB2F{a7t-zFa{i$U|;6-1%cl}}L8(-$iIHv2#r#TZz=YH0n{mY?Qzp7o6 zu|#(}g+(2SWG6kgAtTGO-xkwSs$2J2@A;|Pvy_TfFgO;%07Lt1k=Il<=7T9Lcsfu{ z#c+CSk!B)enI=vEquZY%!eSUP~>sS{|q|;)%C-<(W8S=j&6QhG;jz3#GvZGlbd+QnFym8Y=TsHrlsqS}{ zyjWE*3?R(79x}@-X`MWtq}20O#(G?`$1AI$)Z#c41MEW2^U0ja!CFq7<>hVO;&HT_ z_0M%x{Lkx~Dyys*fVOtMwW0a7p0cCRu=?`N_SOU6y|?!Vz1wbuu)BaCcRvA1#^L-g zfUp>@aQM18w6yj!ofRR(URWF4yp*4XrM)irfGS+}eMN zX^&1pEmpBqj_Iq73>fJ}9$6*Y7oMVDd^6}!TD>*(+zp&zJF@Trhx7e- z9TJ$57BBa3fqF&7m;ow1JN3(iM;aEZIzXjfh&>9I+w@9B{EpZPKkKtN6iSi_Nz0$hLUnb2xXml2L#%T7aQgSxttQSm#^p{gYAoBH zG7vjvCL|2pkf3>zkOcf?)Nslsb?m*QkS3aJq%k_XMYp`kZ8<^;J)LK!JnesNgQHNgvNS8dZ_!{Qyix+uNu~rBgOnK})a)_&yx07-t@&b< zNib*yK!JzA^09btecvc1C4d}OJs~~^4fOuv8jG3YS^n1@Kma%l3_M8pH@!kf;vg-E%sFWbCQOT6y&}b?+pS7pw(x}s&gggKd zmehpr|9DtEiCRGlsrgSAQ2gu3_uT(;E-E&c-gmw~@ubnZTcUL>Ikfq7Ab!M^^Q@+v zNtx^7`ghpowiq(jDtGL-Y^CZ~?%Gb70nx@jAhsL?eBNq5&->NUt>MvICsKlP2 zm+&8TKmZ}^U{S}ummdjk?X6_Q&j8eoKvePoICK$mt_!O{$ZzR{7ed*`+D2@*GHTh_@N6V54ceak%lXOkTY5Y_w{RQ8@v?U>J50DOvA#oZy*GB1YOl|< z;Zy4L9Ns4RoY^Hrg_hQo(dI5x*sB1~m&ht5=QpMhZ~ic8t95-=|9szZWy5rDdHx)s zZd25BlXn=KFQkfTESUO6U-A1%GP!CiK~dx;Zp%Z70# zrs`$CZ|(CW8OK{`Yk6lDno4&yuchj^x5*Rtw$SG>u)btR*HDi#Gz4MGYEI%fXUnm9 z%5PG!UqU^F?bTu&;*JHqw0kFX`KvGpOYcc%;pg^)miD5hWTgIctkcWuw!^QGm|$#id^SjUph>;n`1H}4xKn8rtaxXS?etWr>UrcV8GJV9z+zN> z!*+C`ZGU$@i&7tNqHJK!kyj37tHk;oAkD^Po?{s9N90L@+h^O|I=WtLL#&j$2=rG} zsD$)G+p38t$(2sndH#A>VE!5nyN8%&dGi4(#Ju9|SNZpxF}uIynngF4;zHnFBT++m zj`NaT$k;kdCN^5N6e%aWF$|^a_Fd-m0K7 z1YWxHqKJI{xzfxt4old6wmgbm^mav&j<4LNZcGy}57LsCV2FCMEoqslArxl{Tx3hW<=q` z28V_c{FNC{NR~zGg4acsZF~Qc|8n~fJcL8M-|tz%Q@2hFRHk{%jXrmA@=v*+baeOy z^8V8|GB}`D1_z9RqevOs2WfEZt2Y&4lVH4b<(%^}9pWz>n1G|+=6xief8ZBuEyOJV zs~>sVdfTQDRk_fr(pD|68A%9gLoedYLE*%Fzg~Ku7MR=a&jzlVqC^@Zv)QBz>W|%0 zvgS@^v_K9jm&_d4p+OCN`<$8xn+XYkTnwEp zyC_SW_4mwDU+A6y*ZP08tqJk}FGI0aRX70jm7t?58P@;eFh$*6Zf@zeKkaE|1Rr~r zJkCv^8lva2brl$jOef-()n-_hJzf5|c*=GX3aKTb4W2w=!|zZ8#&YNv>WR%5BOw$T zs;?JxxFtCugrcwhj{V|_p|qJ+0SOcE0SXspb!^PvlR{=K$EmEQM4mffivY7F zcrBH0AKI6F9jl!5{08wxAVgbY32rZ}a8iyPg4%w1D=y?`mEyoG8ELAw)KPD|Q1Oa2 zPr{M2Q+cbn`E?SdN}8{{H5k{z2mgc?1cHDEy)8FC#*G^d<>}HT`w3U|0HTJen~MaG z>3jmx^Wd&;2P$?^)>8U1jIltDeW)0B+-J4VY6DG%z9%xi5f{&}XNHYqmfyW*<{k4Xqw zKnJA*1u{>Uwp|B#dLEHJ+chJorZ1&`VsP%dlb+WJ;#K#4l<<@ui5tkF`?+$#K`K(1 z^7bhctRk+Eqkt9B^&u=aGz!rshIphRJ3oFJ5y@o^vhl%<^-${Na(E?L@-YGV&?*c? zzFfKDB9WTnmHDK9OaIX{dna!Wx`r*~U*#_0b4!(s9)C|)&MotcR;2Eh`_(cm(X-_w z!OQcWWY_T^M9UAMd}lsp=i#;9rLMM;p>`(9FUUfd)B%7{0bn^4h97UMd8$`$KHHuCqb@Bl^ndxoUlGn_(>`-+OyDKxAM}vatNpan zth-XfF5)tZq#PF~o2Tb+s`<=oanTkUmLhn+U)90wUB%{I`K{h56uHA5E(Spo&4Hb* z;-vgxjT8tBMq4GEuA+GWK8t~=uN+Dti-6Omyz3 zaH#EW+pD#7-5QP~4u-CIdDZ{r3RbYfz~=);6=!%EeM~Kpag!q91&7;Of*^D|gRIPW zX2Fw1n3v*Y9_=_|!nELZz?`;nw;cao;P-lsTj(qvU+!^^4gIFw+_6tj7?>8~&GbhZ*c?D}`q9NzaBw(uJ!()6o-U+`oe)t2w+zk1K+ ztUh#cdt2m;yz!~pIS-N=KQcEZ$SeqDs+g}O$Q-F&<~+g$MhqKnjXC=`u8QW$X4?9E zumMBpSWjN|nHyC1U?l22;I~NzTBZk{$Z|(E$Cr z&P)D(1(dP%W)9Kws``hmpKlyk$hC&>*60KN{9s_~H$$5>0hmV)8Fgm83U>A-%{lhQ zJg<{8$1UyWs>{iS&^i)FTJoV}4D60eieEERD(kj5#LUQZ)p=Q2miNJ##vVJXUogUk!9ZrZJX#kAaLQwG)hy2n|KgVpF za?Jb}ilYy6(@HFhO^s=>k+yNo%Ra;LH|GIe%%7*45hC2TW7c}n3qIFq z=rym4zDoNW3SuwYo*@g}74-)xs{z5~$K3liDkYGRj6BMDd_B`{sP+o0VC)EaAJOd1 z`;l`ASY?Ei@iW5-_{=w7MWF>XEb3M8ayl_Qe7EsfxcCV$H?Ygz{d@4O!g1C_1PmO# z@WZ+A25IS)&=M1{9OSoY2k8G-LB3q);Dq`BH@0q9t(6b^OaA#=x@a7Ze+&HtbCRlV zfi4{LvvwU==ydlRx1>c1!-z@4wSm^JvC;kw4j)MdWJt8Lzj#`QfkCi{2Uc;K2Iy?$ z{cc+I(!CC5Fl1q`djm@Qq4b9syxrE6zfSl~2-xkH?T73@v6JEe3Q)HYNKWWbcWY zVM68+hTn~N-{uo6GPfVkN&LSx?*|<5=})nNmgc6B5zBUh-hblI zCL2~k5-dhXu7jqNglLubHbltd!4wm2P@=?QvyAssZUx;6|C8ucFbdLG&@lXXfNR7> z;$#EtFf2HFW#LP@fWr8k?Or=<<&LNA!HpDE0V-z;oT&S#mq8k^;+o$&N2JF+v_bEX zUDt;3U?@i1*hL5J1&>=9N;hytLhCH=V~4DfD$%tzXihbc94*$&+TL`IZ<;1Hs%dIk z=Ur~}>W}5ugHF2<(GL5CcXW3%5iqT6*S!&To%j`bo(qTp&F`P?dIMl~Pus3O<*t=Z zvG;w!LahB7g@u2PYdE=*t<*KoHGMwF3rJ&v^b7lxErd>|21fs?wNml(+*g}3Q5K@5yW=eYpHFk(C>g3w9D z@=-g*#3)@|X>-jQh+_MefZ(?|X>(pwJO7Fop1!dckBqW!PJ4gPugj+++}l|IEhg{2 zUp>{Dq~7rZNZzEI_Bjlw%TFGmFK*o{9angoHK>HLIcvw%FvIRPRg}k&DL+X zE6SF$&nM$&RBVu|539aTZnXZN5t|KiXBe$(_9abE=O!s1*)E9E=~H+fip^#DYHb&HHUDkGu=_l~@Am$8(2v!^$`_ zWhpKiR`U}8dIrLMZ$SLY4mz;ih-?#qXEk@P$XcUrp@l$Nqbm8^e1e>;g$lamC}Tn{ z2?>~?^R6}{o&_6&{Qnrg(<%0449Ju#<$P1d`^&NKT8C(1FmZmDY=+1q3p}I~qjt<^ zY;MfaZu^j<&h-TWdPFMjmZFYNOo7;J-max~Ct^Spj9E~~#6IbdYYuC7vjFRHDBt;L z1KrIDAX^Yf6zp+-Hq`3+QJX(-vp>DY2<1Z#>4k?)#quq8I1wPBm-{L7e1r@I3X1n} zV&mhM-*>+#2DoL`+M2@P&@U!Iz^ezprtg}X1Q&b=nzKnHlJh9%=RsU;?(;|EdxCOR z;ivoO`=gt9j(8WjvvhBIIIMJRm6y>>t;g>EZKL4UKj2Qli0ndyL!fS)Fz4*{_A<4g zEI@8YiEMp=*mQEL%A5NV-Q$@N1Q!8(o!4Z9t`ReHy5WQ6p26y~l z7$*lspHZZKNdS_KlolV)4WKPOhy~u~&lNkfrMqQU%J6gNjg4pe{nGTLU+(g^f>oW0 z$k63b=F`H+yiO}Gv+ywL9d?(MCcJUn%A!ykpE|nyT;49d_+n?i(KcyRQ&&B~E<6B# zaM4NF!TZ!0z9HoMKk)<=8lD)_T4#x73N`U)VVA$3NeT1=|;*$c2#KeSn_ESgs0wh1~hL?rQpm`q9;871#_7v3QilQ24>QU?Zi1J-IyK&9 zh-{5vp#dBYY>{xj#{v!E?rS{9ug9qi3HP-~6YKLJBv zi4PiGu%{u&_(z|YE*pcBohr~e>ukaCgdyyIC7W_Xeew#o-tW|(B)-ivQFc5It>MI( zYX7`pP@;>V|1I-s_ZYHWP!}6}W}K4(XBQU(2#AAXCl2n$|f_ZD!lp6JJd(dprBWUJ3G(gaW+e|fhGc67EziITIM zpo(BNe&B#ql6Bwq+XtCdF z$@fTS#MW3;nIuBsPHictla43*^X)vj##{#!NOTFZDegb1zMo`Qht_&qSW)P8xQ$jg zJ6=e{%>dWOq($_Q#a)+)bdkb=)LeN^ltWUxj}sMEAXw12zLnpb@%|+g1;xR*^Jq@L zMV_FCZQs54nFlQr(KqOAkR4lV1~_;WGQ?A)8a5lSrFoSdFPbL{QfVW|Q~T%uI~$1Y{vKMg&|w;W&DsN9WZ9!{}&b4rC(%ETUXQ<3gF`;Jp!8Aw7SA`bN z*C(CFL}WGI2QB5c-o*!;$%!FMG|V0%@yfE0#^=7SEW~1T3TKvOKq@3<%r`9;!-#c@ z`TEyU05}CZ_yK7au<4BO$-0{!p@HM3pz97cpF%*(4TY>78dZZ+I?XN$juWcG+VS@9 z4HW0?L*e7qRQ&fK6P+JcAx_;iWC*^f1z*eJwv5V}DO>SRw~<9`5CBk>m2z9jX(sI{5a~Jry<0;jqcf?5y79p9DUbVNSO(^t8$Tr&MkW#?h`FIx}`HgUUHclJ|9FvR3zI?1AWbkJ!M-w77>RD z2&67z)PE;Tdw8dre$bV)BOu;1-H)MzRZWO1g6*<# z6%CVzA@kERZu3UTN4L4$?WZJT!|5yRxS0s=wsU-COJO`vG7RzR5niQM9Bqa@H*K}k zTP0rq)l7y_vQcql*=G~MAh;}k z^7LUx@eJJ)OWmQ4K(Nu56lxrv@=a`_d=MsH;?9R&@_;`McAn*t3?iRs=yF<)%*c{W zz1RMt*>8Immu&Eo)F4)9Xu3LxJ3F>~0vT=p(pKy1gponrS_IlQ-^rGP1@FX8iJwo3 z=YowqIwiKIKQ2Eg)s2a*ymV#Y>-~+j?l_Ayj2I%#*(Hj3C6Kg>5#*dm)~ifp-NkGGL(lS!3T6|H~PuFj$ZyjgM) z;S-_*tqXWZxO2{Tzd!AM;g$q4>TNm#V7fq{w<5ToP-) zSv5sb#A*6<=C}fO4zzyaw|whE*R?LD2s#b|o?kn+i5+JI1Qe}G5PuO`QV-{!UT+6g zBYyZGQ_fihYo zB^>Xca=5eMIMx~5KX_pHutOVFF6m@tgPU)v(C7|1#66)#f#W__=71(kNtVS&LN)it z8^_6j{@;M%j4ANuwc66ZBOeqsd{}QosCKQB7%L#~7wOq|U=4|6pTiG=yfrjgJvY`( zR&3*mn-Tn!B2APC_R8=v#py8s?*oo-bCqJM1E;h?F<;S7%M5wRV4cSWfT)*+cNaLP_URUm1UZ5B9hX>&_2kG8p6TrSh&q zcqP1^XOD+L=)LPSBM07jYghQ(;9A78eJ7@ z2)e#%je;PpqzM2YA+ZNL@R~U3Xr4uurrPyxuhUuJd`XUeojB5-NExwD2uy_Z27T08 z+K|n*tEiyO#1`q^6bqydEy&;Dto0KEQN=U!oc zL7{FqqtSk%+NZ_`sSg9VqzT$k?(uD_6KdH7cl`>q=Cfpf?&2M3EPmstE)1KW@%JRK znUK)w_~-lIXVKE{^83AxMI_k={1;oHoT!(ij})I7{rpMd`L|3Y4ODT5xCK%~8VYF*&<#>c<=oCjZVBm;D6+)pH3=k@i{ns6`uwGjj750aFO9(|IX83>kt+q6U^Y49J)9hisV$< zWKXUNdQ9v`POB!%76IQklP_eLgkBpuKEkcV^RhtEtS>WGJ#Esj)N0eBMbp!?PM?)B zkp$Q_j<}fq^Jn~F2c*Zr25GBh167YhMBfYVA>8wgKm*PN0k}UN@l~;rptS^HOvP`g z5^;a@uI#P*fj4d1vHKb?zFCG3Dn4){5@H5OK!_0}w?CMePB=5S7gW@hDDpygbiMjx zZ<>$5aKK6kwSZoyLyxz}0O$MTIxqN>pa9hwAw*r?TUr<;)g&acViK@+Cmk2q#6Pey zBF8uq%ll7MnuEh*5s~Jb;(ZNcKOn#d$l4?VqP#Vlug%hG?I*zw?K;Rm>{@a|mn5To z*!TFuYKkj-6V9O;pLhT+WYXtYu=5jMG7l^Z=DIBofzt8RX}-z;2f1uVk`)!ZPiFuY z1BznZt2~Xj8C`Z7q9HsasHU=|yQe(oRXt5Gz1l(oe-&CTf{RfQXy|B^LR((?KigL~ z4ct4Ro(>!yr0W@kxEXX_%_UfCA2Ba_*Iixc)T&wiq0abAnw($pIC_(z%{;Ai$Uay1 z(tVoT{A_&Qu%Bfxt%rEravZAGo&~l;4r8AqJyxu&c+0e0JEPp!jD0BDpZ!h`qzf&C z7Kp(e@5mYC!&a6=?u6e(vt?@S-gLI|@;W=ABl_^0HAlL6Gcxys`RPNs0?Rb32dz(< zpo*u0OJAoS?~WJeHXi-e_;KUtORKsyye!SI*Y5@`!sqL}WFJbDxRIIhOQW~}6Xeti zcqQ{yc&%!2!D82gF<=4yq>GK0zhy6{N+W{61_rEU0VbT=uyH0UjVMCtAK5(4{CBP- zKue2dx|W%g)D{`=&a|OSXL*9m`&q|&32Zl57(VGX@a7<<@kZ>nNDLw9Gf`Q*I)eUj zsfDQDz+WpztMnn#q3qV9D)70t4efs0C6v$R);zZnppA}AC6hO& z0=AiA<|UgeC;#F7Tuw59kg@fNFHrO7SagCYG9nGL*;@TFpbfFYfG4xHMSv_BNC%j=^y`QHc^4*IB= z1@Xq+D_T~@?KGReAIX4o)X8N_L8@<$d3+eyuk5;hp4Q`+Rlhm9mdKC|D?)Z{)#NuH zx|&PjkW*eT2`pqY$WoQ}5)kI4{H4q4IBj=yiUFhps7G1nwD32n)R9xIf>bwR^9Oxq zR`5Owl2{u`&)B!^L*O!E03Zt`0pOrc2TP2cWZJj%b2m~!`pE_FOIm&+UT;sL-~In# z>aC-y`o3`QeFy<*kd|(glI|1`BqXI%K#=Z6y1TnmQo2PtrJEzop}XU5zrXjr<)({WPm0h#f(Sct6<^lU}RuWGJ!rIOCHCN zBdz!S`L?-bbe8$DM`z<*fkkDR9!cftL{#-bS<@fm9=|lx_>_qY=MAV3w{YzLz-|0Y z@MrZE^CIu34X+2*S;g=)8Ql%aA2Wt_M#R8o9eXFmes2Deu#^{|@R8Ky!eec9a4eHC zM$(!RyHE_j3OKuyHEBXE5XU(@Pw-L_e3DGq`vc?rCyYf;3|B?c=ehPvYBj4^3KmGo zGCGfEoX*dzWhqrEWxWi`st(UpmnYh$rxAjJjo#mnEZs|)MplrzAXEofaJQsr8&K>b zY1nl)Y3A-SV*nfWCz=A>D5@Kd2?0_mk1%uI&&nBY4}n-H82#NJB^hH7T;4=?zTMl{ zx-8;t8)L_w->PXxq21o6Y#(PoM6s`NP7K94$EENh+iq_02F)Yi$6iId)!<5)K0l~0 zK-WQE$@mIcG7^QGew#kr^u;tZi~MoqNJEX)#P#X26gf@6B4aKhjA^*EEpGeJ7)|fz z3{#1!Qv;J?;$_bjldGf-zN0jHNQ3!%uaH{w)}EhW^_)0I=BjYGIyMiSD+$(>1jk?Tz2; zzyq95ylHzECFN!f@jc}33`HW24hY=s%UFae33|G$L@z3>q`bsag_Epz0^=H-dB|Rq zm!N!uoA9CR(PI5n!8BSl4I<$@#i#99GD$;D;yLUf&-+Vz%jKO+eq)ozvqvnBV{Y=t zuiQ1AURdw2sE+(IUjfbb0lL_wVRLQyJ#t?%@@+K~C`=bxD{0#C*A4`TGKrSbC374u zZxaQ09%tV5f_qLy0>}bs&BWwv>3&2=d-a2~*}S07C%W)-sabJ0nudHLK{mhulG42{ zIRuaR1^ZtqW{;S<3sy~^k-DT7Da`PU9UV*+s1s!ofvlfqyI?cx`g?HNOQ5w%l9vU6 zn4j>IY! zoI^|Ag}jp#Hx}U}T-@Ucuf;+)miQS9rW2hvMJO6k*z+)Lz35Hnu}u^ND+gIolb;DB zbbPg7yE06k&b|{b^6Y}@J9ykFM?}UnJ_MGGgss!pcmC4pSFe{;EVU9#1(sdKJyxTk zu*wYfW<%pq&m{#PQJoi%$B`nxTOPINqhWqEQStcdX%P!;=iYSWY!@_32e2U|1LoDj z8R6LU7QDgMFsL{5XBdepneHc)W+?#x`TKtBhAZ-SZ8-3;jDG*}?x(RY?4S3J&)&%V zoiC?M_X6672hjf*fkibw2qILpup}VltJx*(6`onh00JY7YjMzBfTMwVWLJw++f3RN zB1$Lv+Qo`@TNKOJ`h4qrFu^cIcv$k-Ez)C;dq-JaIRaEa6?@$WR{Gn6UJJNJBS#m2 zBhlxwzkddDmWC>c?^;*x_kB7VhLpwna? zPe|(d@Rww|bpKX=R4rt>I2URO*JGGiK0_}PX80D=(o%j-C(gm6V)TLoydHLY#sL!` z6(ckkrIuG?h+?r*3iCr9ys~PC(03SvQ6jQ8!q`K3=})dKSp>Zx!qZeRXo@f`%(6)2%_Lg$u?1swUyj0%zYr z6{4-<$MVUr`AS`aVT)Z^Y*ze_EGB5FeA1)@J+sJbUJ--HwHN2>haI!Ea*&Mqzu6tz zNYVS<;27>KM!pYN3gzj)D`JMLcCfj8BPT5|Qhc`i5*|BC_30`ig>+SyJH^{?ZCT?N z+6z$nwpp|!Q$zSLo&8I!PP!qnYyY7B9qkTp0}mKgwYF~IUufRcybU%cA8r2aF*w@3 z5sANC*5V~ty;6v)n^!TjI72QI$|HE z>K7EB3>gEpgd9yc6%@$K^i286vbOmW+JsZR&%Y;-zXWZe-HC^8^JOFu$taKuM5{Qr z$G78|U(!<*6I(I$htWH3u`q)^bMb#4If1q1=bKCO)~&0&72rc%V{*V(aXD1eekZ2k z)^`naF~fps4D2FmKXhlTZ&?=Rvih?egA8Bbf+NMkZ$8S;A0oCI*cBgdXC7ysE*aVN zEaM=hx06zd%v~y5#*Yt9|p$gD> zjJuolwS4CU0&Ce6<12X0Vl3O~;X|-2HSFp#4O1Wx5WoW`yoWDa+P@n6=$_sG#ZE3! zBiT)NTzGw^bFP$+jhvi`E+F5O!Ft3=Ts#zX3DS|k!RiQo03_2;teJAdfQ?w{~< z<2{!chMkYA+Sa4y;(b(1{pEe+)s-LEEwJ~%RnT<6Xj;I&8Ko;#k1BF0e9DpI_n}XA zjJmffUN zFqMI8Q_?dd%it{E8=2P=;*L|wrboRNmmSeK!T=@_qp+1P3Gfd=MTRC1d?oZiLzJyf zHCQ15@2Ij>%Y~mUuLk~!%sE8O8W`=z9_7`$QJhB|O?~cKJipC8JsvAo2|k*1d6<+v z?e0EiMc%VkKAP?Ng`pnZ|2n^u*bn~Z%1h5wuno5!kr@+Ch0 zmH`nP0PIcL)p}M;9j+;jQ=T9w3gbur7f}FB?h%#)C?#PP1DmoAbYMB~1dfls0KQbK zb1F3utrHmEkrbLF)uy z7iDhWXY;v%Pnc*z*qAvC;72RrOjmw0oo`b`58dY95B0^)E1M-5T{_&MS~75_Njsv7 z^%7cV(GFJbUJn@Vlq+1A%ye!}pqrAzn)nS=VvE0Z?7!k$A44N6N>E6F~Dwgj*!&Icm9E%QS2Z zMLOJ3c5@8G#=v&oe*5vSiAo=$>QUDJB3dfNG(V&>z8Ng+vj@$UoN-+huxAZ}Hn3UM zkQ4wP=GQgc!QS?Foz2IYo(j(srOCoedgr~lfcyyxQ0RN(naBIEp)}0WiHKhH8o7>) z)Y;I>S-koS6vb_Z*7igll&@$?5Z?CJ?I_w^vlC0&+V4>WQ2!_KP5H?sN*SPPD2knp46awNnC3iLYLnb!>TCQiG>)&^ z@}E%u6aw7KK6hafkcP=OB&uRuJ6zJ42zj;08i#6QDz=mIdb^NUwO`rmwuuqYb~x0dedd}vuffs@Rfb`|Up*9ofLzJZHzm!BQx?U1~_({Nldvfy@5<#W(*#~#POi5A}Ny&^m?GY~su^`L$^ zr&-7G@%oqge^UgQNJQzT>>(&5dQqz4ZqwZFBQPVbI<9zv>2YJoe-aX#4w3e<8Y2; zL3_uge~q9wgDA3q?`7%Bg;XS%pn{{F$QTjTXUJ>YwF&aPNz=ONF0&NwwUB!tAS41D zG@XAuk4nn^?MzKUD+;4@qX_3WznHdsO_Dz+aR#pk$srS6JMlMFQ2#<$r2qf5K|!<- zc38H?_Ub1qmPhmFn=|)ZjLJ?BqbC72O#!wSD|d~?ksi|CYPZ~gD*Oq=R@fJGG7Q~( zIxN&I>J)43Yoihwus^{Ew12PnbnCwyx_YzpG90%^(%=i&k4y3TZl_rV_w%ZKCKBb_Wh6p-d6&X0fdwIQNa?E-FJ;{Nny4!#K$0K# zK{5eSdWuW&-e`TxXGM$pah|X}%XbH*NgP;p4xdeknwSITv7G<3EoM4^h6}qy5G<*L zm*rL$^fG7|FNuEe=D(Y{_i?peeDIay9DZ|6@IuL$9r%5)-MzM9q4%-#yRUR_Ef?NN zi&Vo54TysTQH*0NX214PqPK989KeT*1eFnNK;hSib(qMig?GN55YwNP`*AjZQc$o! z219^w4o!JGad;&5=*k>54nsa>c!k&W*u{9(U||LqDax8abq79Ok-`yfe`^LjAw7vv z6C}Se(xjUm{!x9E6hG1kFulDBnO{TmVR_M=%$=^Ab=t%(Nz**HEYnk268TQJ!TH#( zC3LWg$AWBq7Ehxm@oc&k(#GEiK;c4xh?HoiTKOHi`qA464}o7}jg`Tw*^fU23MXbT zJ45v9?6H0^Az(CmyhYgaIQ?_|{>~pU1ZE1Rq`ZAc5o!hhTA{l33oQ;WH3;8SQ464B zGZH}FV`OTzu7ph?+D`=L1i|(QdL%f#q&P= z`NmDyu~mJm`8nZA=0qua;ynRZhmd!Z%!t3}(Og-w{Z2Tw2LR3<<0z3d+a|s--~`2k zx};r%mQ#YT#7qsI4f1zH;BUK8X>qkyJE%v5)RRjD{3To5`flQHm}2b@E(gex321{a z;_WN<_h_VK=`Qs+FOQCq{`-Jfq(A4_Xj)t$FJi=oR2sz^uA@kR7NQLVRCeD!hoHl5CuH9Hq#6NyS8y zr@9cf5n`XmjvM>V`={V)W++D^fs zVWvJSK3zDSsmfaKbB0V!`F$o_&aUi%`~1hO;xZYHj$z}4@6?N_P|;GteD&)Pyg+1i zstk)`N(9g4$E*vw%!+Rjtt|D&!9RL@O%)cT>SyQaQlWBH)Oc15g4Q&4S!g@X86YYbvEqtb5aLyb z)vZPnNw$*kK;uiP>#Aw)Vnwc?vo8Kjc$FuZv$(8ZUtL%#2|PoFk9trQa^xSYi!~&q2U)WI z0rdnlnHe3~F(%7B5o^9h09?AU6d2$EgzpYKonw{aZfr8^xXyla8Vy=h^%^dNH1 zYv(Ai$bf$h&cbgj{h$@K89E^RE6|<;eUh^Op?DIz4-|O+P4s}i*P9b;zsWpy_O!e7 z`+CNIf%>39I>qTJ|7$U}RqfBg#jE=Bxy0b2L{sVw>&t;Z@M+D`W-BLpApDC`ICf7j zzp<(*d!aYKkbZ$9j$vCe4FIHjcH4Y?q}7jsFf8y6q$A6G+B4BqZS)I4D6n7^(v)jQ zW_H>*giWA^MqzdNOz^F(BLDs3GL{O?lRkVLAm$7&GStsnJ>mL7t6LIs{VzkO|Eu<* zU%KK8;v|?iVe4-D!TUbLZA3_U7WkdVtF1cT#PuF~HX4`o@2Z5u=MM*2NE^y{^@^c? z?aY9l^o6huV=!-882P+>Hwtgi@p0AE z)zu;uif$(%DCJp&Q|8BpDIpeEc!AC}trFxOVWA#>1xc)a;F|Cp2J z%*dvqWa`t%#$2}j;o$VMs&WyT&!Ez<@MKP|8jbGf3cYFg?#%z7JCfg*bFGL`W0=bS z8ZHx$5zSeQX1TIoxkPagX8B~Q&7B1IBigIb3dw|WLDuK;X$zj;Voj)&Ba#VlgX3A@ zN&IJ}F*v3xnsoNGkZjaC{Y+n9eu&OqpW8b$4qwL6q8les$9AUb%@Fs!3}U;zn~>-G zo-$HHVdcZ(e(?P!DjqO1;>Cz`&%3JRA5SYQtv{JRA?TbFg6E{vf-`iMu>L)CM=*O6 zrjnKI2EAOH%W5R^{ITBlMyo+Q8h%|EIEm8nMg*!ffp1BhG~i(vuK z2etC|$XFQp@8+}^xc2_?pH!P6+d8JI!{4Nn!FHgv(4%5ibWr~!b=8?p8G0!$11Eyn zuCLO8_Zu@!zR>uB2Yvt!Yu7}E+E*D)J@~^Dn0J+u)dh52tXb(NsVbhjPG1BhKm8oE zA?0WNTPyvbdgoz3oHf?EB*1Bvdf6I>u#YIBaFIrGsc>oUBRc)D%*Wx{>QYg0zB0>7 z{_-w-YhiDm)yKtZ61@|oFuit1GRHDXa*zA*6ISS-OA~wpWb@&=>SvJ{bO1xF?rtxG zIJjU8SNlRj`>kGZsL9*-x9&Pk@S=w9BLG%F4=lbea{asH5Zf%kRDvH13c2As$hl>7 z8IJ`!FS)8m)iq zL~P$aeS&G7hLX4-E8o^%I9=k4P2Gh zAcTm54ZSMkGdW;-SCy*gb^$voi|D-&-Ug?F=4xf}hOa@v0KoR^yojn6H6tvd|NP38 z^lcstCs(Ya+` z*SusJMP3P5q?EtE8n1iQX=^;&hDLviyh8-~w7sKH!g1dhB7O-3PI_lhi`2-j>cv`q zqO6SAGCU*sdY~&d-lfA_61pgN``DsvW%>S*4ZL9AN5$}_bfw7lw!F{GBWCVdm(cbS zSGeklR+mWR3CdNI8+d64d^9HLOg#Uc^8QWLYxiRj>HQ*Z9<6p=jBiuRIywsvg@aJmFNAenjGk7f0D?y^pXtl2RaT7K1a3g z@X4_8wpJUz5dI%!+lK~+2#(ae=$of(sq1(;|UEq zuCqz!sYFQPx6ZHH#Pk}*GbZe7|6ibJ3iqx|+p#4EN&Rf?W>~7_hT{DpNz2{Rwb$=- zDcuuELzNF|ITpKrUUW8Vj97TfAATU7aUKPkukKog7uzj9NzvwI>ges~%+OAO?-Hqrp+2m2T8!&0DOJA74}%UL(E za6K`W3>O!UQz9vPQhp}l6+O2liSYfHw(48&0@!iC#9NPcAcc%nIipRju?!BNi(-1# zE2lo25JQio^KBCp)3Wm2kU)M~A?vc$G%Awd1y`{c5|;N8vg+*c3~|FE4P2|p8I0Jl zA$R#RekAeHc{CY%>BiX<@P+X)BzNol2RSItle$CiufLJmrB^nbl$X;-cOE=u-ZSSd zjMM$l6SjD!BX{x*%Mtgl*dF-$l2}NRg!`NB%1Xn^A}v?(47F_Wsyz)Z;uq#ZlJ+*s z7U5P0GwrRmG)+i)mGw0tGJcX5@=*rAj3GFVr_4jEbChKEo5tfx4f&U7=e$8h5vqRw z5@oa7U4G~;l%em_!SISk`~)|ny{>B`wSM(3gfNuBZ`kx5n8XTpPp3c(I1fbuKCVUK`aT(>nYu}jQ;#qQBMQMhnqn#U!)Ufn z(;pXHL8+*3XRqI_`N!@}((RY#%KeiQsD%XCO7h%xPs)rvRw=|wY>oLX>U)n(KoI!b z#IDH6`)P`Qh}_=@-XtrEQUG++=^-0Jv1iaqYvRb5jh^3zFzD%JQ3c4v&uJo45QxV7 zK50<6n24c*^P(+%8N;GOwdGbg^lU~5vb>-{75E4Oa;5^m&wy8}XLokTCj-ux+};qke^ESG`h7w zC?);8B()oUGH_$%ubhL~17OA>B5C6Xg%?VBKQ#2rhyfw&_X6KKWx#;f8(+t>L0s{R z&=OxyGYUoIPgGDYhFDf5P-x63078m?6mXBjgY(Mma~vkU^4ziuJBe-BlatQ`Yj9tN z4(Ft$>=5#Js12N}vj|KA)$`f5`8{W`9|LlMI}a<6%K*gXwteH*2`w#gJ1@($DxkA#uoQxw8YZ~caV9JN=n$g$Fr zz9#j9`v#<^F{{VIc1voj^we6?^D5wG&io$!SL-<2M|zMHjsMn=CBr0mY0v+KRxGrw zU$)~suJ_F_8le1>?CqJby2|}o-dCPwcfF}CW1k;)1{1C;ABB9e0Cu|3_xc%1iW*#D z?yhl)t3f5z>3~6s(|At>S$PzE7Z!B!7=YFsEoxACzWghxJ*>!uj=u6qXun6;NoNs-XZ!m+!c0M5ZUP#TvoeQW(vL7#^pUj-Mb8?98V8*_6BBir$y~#zkZBMoF?& zyn?9&0u0Pu=Loo{A`8xB`8@K(B|~i+1?UHxK)p}^{{@E-sU4`KFVKGgvn1esKU^vF zOVk=VqGAljZJIR8eMo-9yUn8^nne8Ig1Und&v%%&lDbhNv!-pyty@b&VM^jFj^zdUONGqN@yEH2S4k-M$Ui=HiV+oCHq*r>ec`uu#?hx= z3J4^x{oY2ZXEsWGu~LsO)bt)24lK}szZ)P`?xRJsUAs;pO1Y4-x9}jZq?*629>>$< zq?6!}<@>qqNC&HhQldh|WhQb`9CUOwKK0lY$18uyU8+FIm#o9=dT407I$Hs=9uerv z-i=V?fqU&7Bq%=ZRR4g&SZs3KX_i^8BZF=6d8O9g@>18QR^3f;CX#p3pj>}fQ{PLx zy!OkX#YNfto>L(@_tG&?B6{&y!rm~5ZTNoG?$0l(-C*Njb+y})eT86MP8QC^jU`;L z-_(iBlH=vP$ArH6_ACjG!}WE-|L|x!DiHAxG^NBfguX z)|Qzp^tx9(2Q1dI1(Y|`iUWFin#g?;?hxqaM z?-f*`$Yv_PTNgPNsW}55n<%j|@$5M`r;{b0v*#Lzwu01H#>@TBW~v zv|zFY2EPaN$P@W2)0g%95oY<_)dA_NDGs~!q*I!85l{|(zf z2?MoK*L`q**3;j>!RYVz#8qE;rahoW(KrBlBq_Zak# zruWutHhoCK+V7~q>P8A^4cSEpPSvR9mq4jgRAcP55-G^LZ2W)+jMImJ1YaWELc|rifxNK& zBfXGp-KpBHUkJTq38#!yfJ95osbR0kRPKF272U2E<+n)s8RKN661jf36f{aI1W9V& z0f{o0hnYdR_LT~V&6g7{3Yas zgsP>f!fEJo#3N0-F2Ov)x%wf2p{nB~+TG#+BC0V|g z{7hh>v0(8rVID>+G8R78OzDU7&F#HBS5VYUS!I=i$$}1iqVXqz-$oR*5@$}*+)L@a%U!>X(!{cd^*E@aZe4)_c(f%G>arx|5^T}B> zOQhflq47(0;>^ApV0iMynKS7p;m>1&ckj(>Di}gruV@5_kSGH}L#QZqv9^X3&GH`2 z9otq~K(KvT_FqoaH^{rB{XABoD8GqDuAc<267 z^@l#%-(nF&j{|-DPY%bo3n3FfYLu>i%h9`zdMTPc%Ibcpm?v0su}cAmnpVg)gyL^g z7gSGpNcYNoPMTOuYa4H%aTJ|*^g`$4M+8o zm@2O5mSC)c)@pOi=k?yyQkpKMS(VKkyjYjTWvaO@<^%3q14Y?+>4W=s$L-e3v96aZ zRiwPOQ0MlmP5ZB?dnjEdK$ABt=9w|Qy(4_W8P%7XG@_I@`1f=wl%o=5jO{UGwR(ux z(yu^rJ(9Q4eS_9kk4v)DIxc8dqe@r4oMr7L;UU_jFg3S*d7 zX1_f3(@E{v+;f_p9*6938m3TX&=pKnet->6)lYTL<=NA|FTe-w_6!$UQpj`h=5}EF z&<96!fhGEjwlpV!BAhQpbo>_~6MsG&Q1WOZ6Z}YfMeuIK*Z&y2rSAEVDb0a~q*u)} zCQL1#ag;TqH_q*;sr+%Hm5bw9z~(Te1O9u`_=WBnitm3E1j$1ddKx^;lT4GZ?zY^Z zvxWy_0>g&MYSG5JZl;5++c>Y7*n)H3qI`%1Guj62H5c8o&9Ifq$>A(^-T_V|%MTd= zD+|-9)xl^49v{9AaJKmL&EIY@pcT{lRpQPvzgiTC#nX8qp0YO>UJwBdYHdqrYSwH)drEoU?kVDuwq~+V7<1uP) zTiN{L>&{MZ$0ozMZt41@t~Zv?XD9g@P>U#L1_^Ln{){jJFxNr|2i8~AsQxIHm$8o+ z6ci7O5l=YxtBT^d>7F%m;M6vaiSA25ru)Fwaj=Q*Rv+tAME;-T-loaO8H5ip{~?*S zZ?TCm2agJnoeL)wQLsn%<^HiY`zVf8z_-q~7HEMH9|9;=N)d@$=|Z+NMy@UjGh^MOw%Lm)>34#66~X?i&;jA5S$~ zfnUGzf_!f?ovQ%bn|G!_J=!z(Y~al2$)U_N#dM`R?ZqJXH7OI%iynS}?21-Q#ln{qgec)Gpe-fOL2E z;sM7P>sXV-7o`eR*ZhZTS#OTs`vs8D4T@F4 zHx&aR6nQ38?}jk%&r;3AB(X7!M|Y`F6zx@#3>EMLD~k2W?5-aOO>@AweC%IUBKP9b z8^nsi2tSI~m;0>lt(Vj;5BolG5CjU{-X%ZR{3=S_C9N@wyKY;v=vuG1g4Py~sUE^l zT=bL@u~xmoWRCc8fBzX%`*KY6k$@VpbeX`eaY68pv>!fL&)#7qIiR})!OR?6`*p9x zrCrXxF#6Zyd?f$-V}m7NfEqkc>oYLW;W6(6r~BB2t`W6T)L5-gN5@mX=7)=T=ZG)mKo1+hK900$3buA8 z+dzq+$xpbwUt}rDVTg4a`RyDopHhWIzjB}U&QcUN4v8`Oje&ZfwQNE%iO2LC;>|B3fA(Q9#*~kXw%86+P?@b*F8z+MeiB3u;RF_)rCb3{R8g1Y;7!~& zzXyK2!R-AodehiwkqkXK)*v|2`ucdJ`$hAubvCKHD|zAl=vcFo&IFl)b=noHWLMHJFlT_T!zT zIfByEDxu-emL>i13!laFrt^PSu@u;v(c{`jMHwy%$k>($8u>1BCFUPm_lC?KC!ckg z@P`~BG+&r7WjH!IxMFgInsaP|lBpH(Q-oX4?{9Ijp z6<*d_y;5?1iTx`{2(NtanI%Ze8OaLQn;tHCydTa<;!^w-nm_GG6=>av2TmT|R+!Dy zY=b9BMHUtB{p{Qj@lLh4U0k>EYOK;qIv(vcUH;zY*t9(jX#!h#Fi@ z5P%7VvNX1p5Z%lxb&b=rd-%F2jK-bdjn|+*Z`d*K%h= zXtq?|Q366hC%pSj;>Y*Vpg^1A$aK6V8qK9pM%7))fmvQZ_+$g90mM3=6CqyxO#QT! zM5ewIB|c8OBAt3owqU(Fgo4UV*I`tqa>*I4hnHCU6tNqE6_CKc4s*#1m zmy>?5$PHryfO%QJ39Z*?k`8dW@b6{&$aC^oKWwAsjeit0qN^G72o)yvXn$z2A6_-GDErEsHBIfHvG2Eq<=x#-P~C*X9+X&9 z*wK~m@BYyW4C79DZa8XtGI@Kbu8TnBWBP?GpUeVXJe~9kq-!McUpw*wK?>NGBR;ow z?b}Ixe#N7AvxKT&h+99r*PuQ!%Q;@|4ZDn7ET0npRQw#(m$dJd9Kc$2-`Y?L`#DZt z(!%^#U_AX>GsH)qUAA$@zxk#~nocG_5t3nU6M&X5=&IsKWuNilYY5Nxg)C2*+0Jwk0oRA`FeYB~1A}TxG+GCD(3mc6tku2G1 z@`12*L^U^7U_2PkzK`cA-3~tv`rN}qOui<4e!6Z{l02guGL`j4b)RQma@O|CQk*Zr3y;=g8h0gvE>#sShH9w=g$71 zB5^At7WABvXPPWQA&z_@`NUs#jRhxx%&j5Yc*UJr8wWZf^vlDoEiB15Hw`h;E_D~n zkb91oD2jQgj+$eHePsh<(OSQNNmM5pGhjgARDTon&UA()JKS-}!rA-A^QulnFEY~u z_$%INc0{P|X1fzS>|gYPIm0RN)+N|tI;-P*ry8boVV?-B#KMs_*1h&6Q*B6*mwV!S zREqhP`>78J(BiSlImyJ1%$@tm_b;aWP~t>gwEpI@sxlibOM}x70gn;a=I)4}@f7s^ z;61qzOPYRqW8G3zZ4hRC%N0x#RCGkp4<(}?p{Gko-L~|()ZufyY8dT^?||+Y3Rv@w ztIrvfu--L%7V_$&eBP0uc-u{t#$-0vx!#PxdJC)_t!A#~RL#iSC<0gY&F`2m>cd5f zFej$4U4$qS1HT!&VUMwV`PXM%V1pQ|%O$86jW2lqIId(K)^#(saCI}j*sN_SP3!q) zb+=X=DqMPXFTdfD)J2+nli~Bpb;B%4O518mXKBu-sd5{3A3gaeC$fcvxUzTrXf<(c zGI84%JEq58KUWAcKes)^7J+&pYtMvDw1{==Rr>q21IB=hMjquO)+@Vi_TLv);H(Cm zVgFD#uFlwBPyDj2LB0}2J5_b+bBJl9IW`r2*OAxE`Auf!#3%Zk^dUnoVrX|b6(q~_K(roFd5zJ#2f|u&`mUW zE&%TjLYHLdts9^W8cAmGPNNzSTfkFiOq7xa4)#GCBN#Gf@y6A(wzY`^kii8lcL=LE z=jcEjBYP*7XxKxu;R~$x2|lj)5CA4olpzWNkp%+uTE zhKr83H?dP}?gbyE$u##A^O%h!m3wAh%Pgp)w(VAR9gnw_hAn-XvZui6v=a08h2{J6 z;+IrNvzaN-F=09+E9>v*C%0?h z8iZ;Lo|F(Pr}-h3^k$B_2x@P;7BKQKg;*%|U+7U{=oYso_m0<%YWfQiuT_T<{VJ zH3+NT@FqFddRVm167vz4A=JE^F!KKt@BnXHo!B4&SJ3rA+QD*P-0w}xrG#EZ6DI7BO@Z$HUc;(@BpSJZ z$5WH<$8nEz#AycuOI6q(gl-n{aEmy#itZ8m6M8Z6LPYlh7z_r67rC#e<#0G&HI^dq z#(%c()p@hRQk#nU;YTBevy5LELf0)|UDWfC58hs9e=?&(rueo@j2-Z8Hgz-#U;C>m zQjl}zrvx;*a4JMKMZ?*4tt!)opWp!RYi*y2MM-@T`jOk(1e0;jIVfR)CUqemTjJ9Q zgK@vJi}mvQ16wn97M>7{eFBq_th=VQ=RKMtV>$x~j;KKT-4Q$)$}AKF^lHI!7IYS?c2AAB zDqK(KjuY?0{CppSjoQyyYL{)dLW}%kvxu{U&b{)8r}rYo^DRyQit7b+h_4iC_zPbQ z;7?)qo5z>Q)KkxIqsdO^dLF!D#(ctJLUVc^RDn@*k*|M%zaBC>3!@1sHgWl9G|wdT@~X+==+8yo!jG2)wqf~2MQf42VQz>L$S%Zr>#fpCgO^$Dpbjr z&JUVF%qD)J(ZO@*&&Et5Ur8_wE7X>1RRtocXV>d<+;TqjX{YyFb8R&ZbHQ@a?=A2c z6kd_f0?IZgX)`})F{|h%Y~u`2fI23gL;vJfOsa#H;vu8AdQaLlI4N7~sZV>0bNKgB z*YIpppA3epm?1fZJ)Z5T`2IG)D?tQu2=Pw1%rTWYpL3TcWXXOrRrnhQ4Qg;FNJWrCeeBA(Oc7BFh9Kcp}OV-7iVU zo4ZSHqdGwPM8gXPYMx!xOkPQimb-+x})<48XADo;{ zjjzK7kIv(vdS6X5UPWY{7{kB15^tPxL~u2gLShZGyO02tedJEPsH5bZ z-VUX@^S?D745uEwOFB5OcX>=^1K5)k$RxH{D(tcILPR)TGo_WTM7Cu+PPO{TweHIl44#xH8l$PF!%^3;I9-e!7b6BHsZ` zA$974W8YMq^YaW^{zla*>~jjWcNVNf{4XXm^Qf*muHVwMoFD8q**7~kVfpFYwu*>J z{{I0YJN8lcHJHk^wk9OQ`IL`;X?Smz61U)#3sVVOchG<Xe^(K7l!?|cZDE{D6> ziAt%KK{PGGJ<x>`!ai5{TiXhDD0) zQHvM&W3Aaz3DZBX-AXHH!I4)~-(&)8{izhOy`zfWd;G)C=)>dOZVg|LBlq8B2_5BOLj;q~Q7^*^Ci26-u?0X4BMSnxKb6irA9@5 z&x47~_j-i;>p+}y)ZRt4*xHhD|5u+Hp=3Y+yebeP1VkeAyz0MJZ2p_LReg7Ck(awJ!#RD!=Erm zOtHONrO^ug?~|p(%Ga$qVSaBp)SL@sI} z$`I-rMF`m}K=)a_AMdy6pU>qE;kjI*W26n!N&^r&3soU+L$ol_V9)$EJ}{{wEt zE!3%++a_^4z%pZ08!W`6KaiwT_M{813eV~B8n44U50=u_s?qUZ#~&vOK&)OWXujTC z!YMpkrUr{NaNF5HGTI<-4RPZvfO2g)U{Q*QkTG4pBhY9h?D`4rY5f^i{y68O6z+|- z-H##&mI`Kz&|i$Bbjw~hYq7^!dY%Nd)@@{6lrq`J4UP$9qPd@2nWR>OaMpN$5JX9K zUv}RnIEqMJ<@z%(lo#d%;Iin?0En6+-4}fxKHCg`EwohHLjI3!qsY^_?HnRd=6f=& zqg*~*%^z`&(@>#!1n`C~mlAO`_2?_O+QnXK*}rW%C75DOjf?j%a9P2e5l8<*@8On+@}#aWb2LgY5}K{hQZbIeoLAA6xsI$pSaxJb3S>$=K$(umG{&| z@O~Vv&afC~XUrv)#P;7n>N0W8<1=RzKr8zOsYJ&&&(rC$atah3GdK}A(FwKh`h?jc zZigEE>He?Q@}%WBA73J4BT$e?J_lhnG8zq$@l9r-luibppBK-ZY52YcgaiC9yRBE< z0ZXxj&uY3}|A(b(4A1jZcG>vWBYHYi)&BnIb*lAJ>Q?@~ zO($MZGxnLj{b+djWy?QjM>b23vdc1jaLvpLMje78(3dgaBl2#&ipV;2%IWiXc_Quy zl0v4@URb@Cq5f$^v!C_3!)8VD0J9=m+A~)?qu=r8D(P=3SZrb@9F%Vdyb_W6wE2to z((DMoK2z1KBlLc$fr(>K(!O$m=!$nv0w)Fr&Jt&XlBDcN_Nxtnw_-t%==*C`*P?&( zS~Qgjxja{am9re@Hs2i_MN8P@(+IrwGCbwH?RMm_GH+C#Ch^%snxIC%72l`jdg? z#5@a) zJ2xlr!8pKfOzfn2ho^x}pPlkl0UHRvO};9)*ywf#(Shy!o#T}N>am)3=a^fFUf;Sv z6kxv)b_8u~N-@%fjG1q*{qZavQJzqY!kAdvn@dbL+X&$q=Os>SySzx zrjpKBY0jD&eqzs}@i7z7RHVTKuh>c0xw#F&N=qsDTd&vAnLQ%=m#@)KnG0UIgT}$u zNS3PcuWBbi(gTt}@qK?aE+I*z+`Bxyyc@0BeBIshX+b-9X zJmc`nnCkHyK$nfszdMt5o&bHm5RbY`oYF*rW;@(-M~|H<`p_U_Fz@KkvpOV#=d%0? zT-$D8#Y?A6ZLPj~h(g`|?x-rN6fdT-qr;|p`9$wGFEpTp>{X-vx}TtG$YXT?P^l`U z3>VrMcC(bsC`*kMF!tMCN;GcJWHVVyV>Hp}to~loJv`nlxcjm^OG?+gs}4H+vzZRqvUk;}6Ov zj_}v(E35!l{QUL)Nh6+kF%_j5qj7~~>J%TRs9NW3sx=8K?Blf?48?PpSteM=R@{y; zEgP8Hh>J3tlqLwB@R(d!&ouVJRTJdHS3%jr*Iit%Y3v(9dP(E3$Lr~5*p%C{T*n_K ziwu%_yDiPenxWbj1nmfb;!#9K-5kZ*+(&{;MrX%gO7*w~#zn08q+Hm8ZOpx{0@Sii z_FQEqa$$`)bVvtBuf4K=hMPRu75fKnU21*5FR(NC*|YVjqixj9m>Rnm&BI|xyFe|R z9uzvVsJ$!92BY$aMW-$U1PeYxUW2ZYuaVmbnFF&!Fp@htckWn<`01iOzu7Yu?IxKe zHuK<3Nx;gHSiLfAfw&SyffhRdfYi;Jw}2nKmjfgUb}lA6I@dg9Xb~qX_b}(hwR2om z&f+O$m$Z9E3#PQNU3dv-$eZH8sF<&@SJ!4fY+yfZs@4`{I)Sp9|4Q<}d%r@u3!3TI zo>$ZV4MS46Gf=zGf4QLJBuvMtB=B%%^r*?)VQ-PxfywT8IMR!|ApIFPkkM64rrvFn zI~UN(zRt;0kA88_-Uv@(z2%6p+MN)}FTTAfr?|e%MT+06)~Fvq_f=o}oB`A#d{520 zN(E6=)~XVwUsp!GJEJbFBNFixiZt}ODH@g4EE+E!1|W6F6F@n;o5f{cjli?-OR{4~ zraOog#OHc%d8{k+3Z?>cimLRJQPrA{>uO%Xi!t^eB9N&XrubT*yW-E}dVdj}cDl0p%8iA}L$vmBlhMOFMYQAJ$(Z|POg z>#EDjgdLa;Qus%B6P@Zg&(G#NLd<*$zt|ISeuj+$F*Ps0j<E{ni^o7qWsk#87o(zM>Yu)wSA&IAy zvN0wx^VJM~)a z>H4&>EBQ|phmu0!6#(nm!%v}OpT7a(Mj@+#1@CnCoYDAxk#vIN*F3TGC=kjyn)Jo# z9Q5rW$rh|OGxOQ1$*~3=aT$Np z_KyJE%YvFb5|OCC8S02>;m11(NmF2FyT?9uzfJ%oA?BmRCT!f#GG+N&2p~S!J1(h`Ef_B;+2MsKd}z#@(AQ)dd`uESfTJrsl|h7-<$Ydk z{uHOgE~NBB4T0=esmRXH`yto>hHdF&@hH~#&HDG0;0c7blBYp)n^Kwpofk%}OipE$ zK2(sOYqJ1%q%Ne@O(|%pk-bzfq)W?Qel(F2;&Dv7C=-x&x5|ekk36gvf%E=477~^X zCGsR6iLtq}my!q`Ljb!h2kHQ$1sEMe+*vc7x_;6g21!nvHyy>HAoz}f6rj2M%@;*; zbOHlj3Ygu&A%0p53bBPp1d1DW(Gh1n(XkOU*k7N74ui6qu! zS7l`u!`G51C> z6Cp8X0(FIxE9V1nN#AVLILqq}FCw6(#VEt!W-E#6(^w8f+>2Gv$(CTcpbxD~ljq1$ zGpESV+0j*}&F{5zEOX=~OR1v^o9xKD+hGzyS}TY=(q7YfEh{^9i=vbv{$8N(N~Y(v zUY?fp8fI%kjrTY1%9P1CnO#g_v@jKu#kEejUf0oXVdTgry6jc7+v`V9slY{T3=SPx zixcVmkIj$eJ>aSF^MJS4ivlF*4Sd_M+udd&w?05}Ru(Z_g;-n=oFK9vS4Y6F#ia~h_Bn=%z#b#eI8#L1=Vyc) z$Q;0XU*ZH5Bl*Rz|5h|L7};#i7=#dV8;T(II>kQZsBocPUVA3LIYw=+{Klrn`d(eI z?+Em1l&ly-QKkj?=(=K)MncEY24ioxvA5_*LV_BA={n&FBtCI-!6-mWz+utng3^Ca z*5JcLK+q)Y!^ab@FY4+zPhIw&0T5w2m)FdbLgVzJuq0(Mcen@Nj-!!erCAWjlr!Wd z{tYZW$-2vQ8BSda+s!6;5tw@lQ@r6@^u8Si2x!ndfp4O4vGSb+a%{tKbtJRn?cCO$ z4Y)Ehqz2%#fk9KxYW@-sq;i$|E*ir!fTyejNIk^t5y~(4`Wmb%*+X$rg-|uVZu3oc zmA^Tq1ep%v5?WbtiTpPgbVcca=Q16Bs9xSe=B|%P;YAt~_&gf_1Uu3Oo;Zw<+x4x!U?*K)#L4{ zdK(wwUG(2~7#kEL#COC!zvhVqrFp9%{xBr`Lw}H-%1_r*gQ^MT8xJtn!NrToWrC5> z8XAJh(du+{IX~M8_L+*YG*A&*e0NOQCAXmm_t_Lp)V2ExoMO6(R5A7ZzJqDox)}qi zI6MioqbB2t*zZ?xDjc(A2X@hR^nwk{6QJRTesZsk;lv8^Sc`Bt5|lrLxC}UPzcUs5 z_WNt3(#47yFwxNisI)Tcriu&!@>4~N4~!%pb_xgKC3G!BA0^(uCMezkPMYW`Zw?uM z$_3V?+TRj2vO55KvXQ^BnRup`i<&?YXB9eLGbB54RV*i!Aoq7Y7Woa=d3U>V= zJEP(`XK*Y5!G?fhhQ{D9v@4$3#VEY~i^F*36l%;ZEIXJ5*>NSNT(pfW?s53tOorb8{ju#JTQc{=F|PqeI_{yA zj8}`c$hPWDqwxXCQw=CLzurH#0s3y{-Cc+m=h!O36>CV7)~gSI!gTGf@sqg`wSI4< z!{fi$mN^z}VO~G0IqQe7lQbc^HML8SJsY)Xni~Dl|Nfqu*mYDvMOkzM)iVVVB=p3A1nf-vMp1Q_erMEoffC5$dZCMaY-;>2m?$u?Qa1(jU5z&-K< zM2yjUm6fE%rfU3-1>yr8WB#?(WbNFSgLgGz-Y;jVw;6OmGCL=W#4=!rIi7p^iKsjq zKVz@MQCma2RLugsu{mkp=I5mVRB=ql5FD9_o}Xm7t#Z(7Esc~y^{A6p+cI-9txYm1 zYS(xemWqr0Yd*fSAwcaDQW$)6Cn?fvfOP`0AXf{BMp zarl9*(k3C^BeV*G6`Q;F5+8AIBNTVJuc!P~@MG?r?%Cu)dv!Mvj>h6uPFfM_r33C_ zallNkuryIM*_>gfIlCZG=K<~{p-ZG&NU7Q&IivC35P^bp_q?d!wG}}k%e3#o?ceCL zI-;Z=2$E{I6DYB(eLaDsLXg~#e*}nz&H~Awern*~Z76bhU`>bNjkp6_|006W z*G>-2L#=f|wIOJnTaY^tE33%+)F&5lubUJgQ*QTq-3WT^BnOtPV4ozdCqD%8I?Ug4 zJ7dEPm~#Z-93drNHsw(@uG(uWi zz%)+M>afi5rs_Rb`YrhcE*^NyYoY6m7Hmbs><6#Iz=VH6Ee?v_xSQv8oO35Es!zx~ zC{4aR_ z<`zZ^Q*B}sFtxZUxA&FRCH3~YksoGP6MKc_Q=ISoiOC>oXmS!TIOsk^420Ie0%%>R zFAV;=^ycui&4Yn~YZ{eRf=cEVbkvP0ts1?wxJtSO1+AhO+ImW)z(W-+N;r~Hlk}UH zy{ycQ^bKCtfVh|!Z-J-Y34W(b-z%rBo6hM9T6W*49bRs~a{@3M4a=5z<*_F5Td+2f zjwEajzkv1HIk~M^6c2H2()ltii-2#e8I}g_l+`p-pp9`!ssrISxnU)Y)wO_+$_8dx ze#Wfu`iR!aMW1w-Pi@58eLVK2p~74A=&A>Sbq#|Pi=$Gj5Cc4M9q0GzAH102kIQpA z8-x0$fN-r#7MMY1YH|cQ#wwSPwh8;U zE=%wbNp)eWTu^^OQGuN{wV-pQYKwDHxLlNfcrGaJxfxs(;?j*z&cv$OLg`R=R4U^Z zf%Vh77Co!!cc55Izeh%ePAnx$aLBWk_&~f@DQ?|gbPdNW$3kNWu_!9NA~oMcoysUp zJqY{`ZUE?@7fd*{2EhEr; zexRq*g~?>@wyXimy|AlwEN!L|0>7d!eY9(s_GIyM#xaN2RQ~`wP!;DxkM*hU+fy<> zG{*xE0lpy0xTz@!TU|l)>p7j7;gtZk>doN(biqON*Y5C<5r%k`FJ;ML6-sFShha&W zzbI&f8jnrPm_El9EmJ?qO^oCqITobFcg@;E)&)Fgqrk*no9Q%zPPt8PvrFIFN{tj# z6ib8|GvS-z3ZYplE8k;51j3H!<#UJ3c;0O8*%jp(Y?O06_GBDn2jRpG

FRf0u_4 zqAyv)pxv^Ki#8~*qAyBevItkTymDpxh1jwN*5n$0c)w@8PsH?fI|G8;EvehwYrk3 zeMG%5mx6LE8o!M-yrVpyy*XP>L!Z{A8`(PR!(!8GTi57SqE$u@`2`5S)pl~h!Mhvy zjpXj*-FM|`Mj?Y8(%oQMZQtYI@R6=EDTRhYTf4s#wm7=ET)%ShSgIZHCbz)v!=4#3PniiMjM;8Kf_MsVTq&X8Q1i30t7WME>QdQ6n$IGj~i=J7#q z=d_K?DuNF34xxk5h72NlaQOA+xk&E7g!N2#O3h?#>Ux{w3q-43568YV&j%9uu^2H8 z`7v?a5u$Wxn@}+`KF<`P58sHi__tKz%my^A3bLDgYXq~e$~H}y_IiIXVEP;?6IJJx zJqO{~`1pfh#ig8Eld`M4b5D}yX- zQUCt_7Exn>6keZEaLRzX$%myb*hQqkLpi`-V0PUidCe>D&(cUIt3-{Jw%9Q+u?Q!{ zjc`E~o3al^7wtO`ect5SZXrK+1SV~MlKpeelz|Cu|7tMW+KuUqn_smyFeM_Px#?)+ zF~Tac^b1k83E_sjzg*GnJfvmB?xzK7vnUkN>^vUE5y$?axtUeG_`k)(%g_?TcP_OH zuU_(psi3zi=xoP6tsiM^&+ z7$PS477O~Ybt%xuY7i2_k-gy))(aTxIv3m3bQ5*;^ppvNR**P5_%=PNwY6K;>TEJ< zE@&zQY2pTiA1az-{IWGivBna;yZgch7WCq+ghz69A_$F35^w#tiMPZ ziCs=A@97zd7ztC*e6L0^#<0Zb7V+y!LP~%lK#<4TIZj9WMoB8_XGR`#nBHV40=vhB z`-TgZnK52fqFw9loM5(l_VSKAdIc7+vvr_JxNh+kW*Dn}_`bu{n&X3;)Ax2=Q~&PQ zdHzeN{I;92_29Q2L#OrA24kmZ7sg2c0*pnTK&M4AMe&tAUZ*e8FRzMI7azmOr*N`_ zh_+0=uu=+av|n^rA#op`+)+A|8cIrc4EL!~%Eh`+bmOjFI&9deQd_Ypzo1{S_MA=Eaz(r1~TSAAZ7-#bw&b9Cs|b zzTbaH0)g=zN@U?nPramumEDh^W@Ny%=@Ba`g-%^(QbJ->QYV)nHd-zmA zkc6W6uya{|S2T6Z%ud^SEps3Xh?ms^{HTxE+2LWN%(6;F8*$?}(gj04yo3YhNvWPf zgEE+t(3B%k->#j(cfN?I{l#FdcZ%PD1x_=bdM%#32}I;-I_V#=%D~{Kp(=`*)TF(;WRoEg z39Zht(*^g!cN#ZY4{DZDer$&w<+^M`5&1B`FVy(u%$(8h729DHYJV~Y8!0Y3Er@Z9 zNm>w6KxM606z3O=PUq|~FRgZLM;*{6Ue>m)Abm|Iwi7#9GMhzdWZn?cm=n_ur2IpV zifNNtG%B}+y6Z25pE9@1Al(7Rzh~^4$lMBD2DagzBv;PgmHA7!qZ68djWM?AM~5UV zGZ=e4XXC=5R5)s}O)4@n%mwnro%lhX%b&;YEeJQs90OG}%(n-{2DH*#sS{iO7gCrk z{G*#_+Oaja+IF6=WmFXRITT%nBsry>>op^fgQnY+Q~ZT55?}~b-mY%e@+T1}eqT81 z_yq2%@W6W(P={lBYjX{oN3OruRAF_1VCu-aRk{(A0$ZW?$#+tP_o)hnx)FC7`pI|#Ul)Rhe9*R>5*3ka|}EfD>#@|Zr+r+4eX$w{Iy#wVg0bEvd8-JpVoh#R|l_bIqXX`JmvJEZxT{ z*)luVA{IrY{}z4gYJ(E0q^^j?waoH;)Mqpi&4HH3(|0&;Xt9u)Xfu!tG2@WoSx(qo zCMW4(RV9%vZJ@fMjp_hF#V)hRxhNV*Fm>WwDs&OlYYBfgeBPG4DFwkgX0^O_43D`_ zqq$=_1hDW%%<%85si3ys$IFJe**s4LnxDD_IKNJGKK8sbT$+Wb`X<#5p1t&w)nL3C zT26;s%8b*VBI)xJ+nVSpUR|egQ;^A$7|+lD{n%)lWGA+;zhLx3bBkuXJADMU#m}m z(8wC+2qj`2H`hXB52Gc{W1b1CF4)`W5RRnEy%+$jJ0pCxcTMQ69g}U;&+WR;&~zY+ z$@2YydC<@ddA0MQ=ol4C=}q4G;TVj6_(@>fee`#6x~;>by|Mo6%nAQ}2}Js8VdCXV z-JqrndJ_xyNn$v7$HB+Td(q1FzTQ#{^v}?*-%{bSLYJPNfr%5%B>C8%D%A!Iy`y%H zfa`zcx%y>uTj`=A2mTt0SbSpSJ&^(3u5|$$A`H$ZPiV3(RQ;AUh$HLhn}V45SX-}1 z()0^jSZ51e;|VYXbzE4ggJ&5jPz3fb>Yw_g>$;__&%jBr5diURy8K$*-EjTmak5t> zs8dl-l@Dq}A8-i|J7z(y7qM3aXB9Ewr)sl>PL&nql`IBgYK!8-5Mr>uWn*Z|qQc|i ziGwjjJ{e|XrCQm=M8{U$i@TlTD_;s@qDNZILCY9^-8J-O4GbNa)Wa%6YAjdPKMOgZ~7L@=fr@qXi`QW`3FaR)sHLo43sb;TkvdVCo}OBXU7 zBj@Jbd1<0aG#LR`uL=-)NCOUS8T=;Yhr-yu5Dizhzqque0JFqo=w$Q6!@ETs+zQhw zA>-j|RkvD_Xo^;D9Y*ahzke*g3_F4qS+(}RvvcDCh%pH(U4 zz-jE;c))xL#n_cm7eR2p?UH}B2MX{#|828PdL6Kz7!gYjc<`5k_Hc-U>=bb9Tag`z zUd$YfUOWP0hjdptJAP5gE^7b04?#kDnBKd4cr9@yWS;oGmj8u_zKRs+E--WW8|7gS zG9#F=uje_p)*>^uoXN`lyd%JwM65c=(OgKkzh@Pxq3OFxg=peE&GQG6hv3xgVvF6a zf5yX=d)3X=A%%p`&6sLf3sBC~So17ySl?h~@va%=hWv8Ex9#?8@z;8ClLv~nF0{i3 z6j8FT{li5Jsda{l8>JQ^0tqmmEf~)vMvrN!{L~C-Y}$t+jxcnBB5}{Fyn9nk5plV6 zljO>VkT7ds4F=5o#4RCcDFo(j7g1d6sDSXZXmJwo^QNSZ%qoYehPZz$0P%r0JopQSE*zbh_=Qj0MlxZi%Hl1>0 z_~*?Wd9TEgHI{Jo<@d00#pmv^l61Ta$ZzG4aMXBG%w%2+Dmn03P}g$33dn6}X;8a{ z!S0mDVvC)7tHzAq6B_YW8VA3z5L(UIs-;(jQJ&`jwnPxPG8D%mjT^vfUdYOeq(g}0 zAgDlGJdvyto1ij9c^hTQSFWT{eexh%7tSS3<5ZN?0P&%O42%FUwypRjx28%dtvE)0 z(noSdfNQ)uQJakonCA6<65qoxLvr?I>&=@kx~m%<5B*1xPqou??8h9%pQe3R#to+E zkaTYHA!wI8X1;0mztGHNy!we}G3}Efz2HU+B9)v^#ml0SD}vx20>a@P?-%Y1J8$k` zL~ZnAXf_Gw3`E&%>k@l?s?RS_yW!)-6Ci#r&{z4zze zIXB|m?_;_Ib(^kw6}^f~ReZGKFi?+Z@ppM#^J8>&!g?8d?pjr$sj#rXVEcw$1(%+0 z6?!rBF z`1dlB>*={A*Z2B*em{pX=E@J9-9$=x>1Yp(C#KaNyY~LwsT-<0q%W&1)5YfaB#rrj z2+gz0CE*Fa{|A5mvTg$eUS{Xoyl_^H_u?qTJpk1dTXZ8B5dHW^?36rR`wwUJ1~Hh) zUTVs`zNbk0ersLfigyKg4|1&7PyiAB?qwcoQ4(DUN`o>g_g?;QFv?)Zl-Y zP@0?qo91V4_qhME=|O5JPWDCw^KdoWhj%~-0$)*>EG^6UFn^zcP>s#D3dJ}uiAC$z zPt#vOQc6}p1n6s0=KF4@qT$9}`}$PI+x~LDr-93_LgZ>+ zDvz1kj$k3oC0)`0Mn~w~+3&iQr~Yp5=rFTwA$aG^OtwytNA)mH9}hot zXbC@zkCLlJNJin79wkmf0V9bRCdO@U!3C`L^f!tdOaPOCjD{Fz7;(;pNcgi@^U@Za zl6Xp^n5hjQJBe*Iv6<1=F3h&H(3$*0H6x=pHZ=_jWDHzA&JW+WiOOR6zFw@xD`%G* zV5$d+BTL7og}llaJx*X@9$0G38K_|P|3d87YU&c)gZ3T=R!TnPshzqVRyi4ToT)XI z_4_K!F{jfA>UyK`J#37Qe{wP9hSRm3fSV8h zi{ugs{A*`$Wr55d+ph==Zb7fJ+=L}^25=}d4Uf&!;fXH zdxvv*^yvyuUv8mNS%yo<+Y3T{_}QkJw#cb>eS&ylFKKmz%*e!lOB-0B1#UVkmjxqe z@7;-h58B#n-e&yO7XbKRd7tQFyx%33Hg5yzJ(h#Ls{m^l7i-5GWd;^aQTAd@c-lQK zwAtW6jV3tl5MY7Y+O*sZ0W*v?_2oy9C)dCmS3pWH_VW$GTeh+s&9uFLyvpGx632kQ zo*qb%Tynzve21&Ic8%J!-`8cv5Xi5dt-=~x6pM0C^vPOW-mE5@ui0M!{DkxhZ^r=n z_{DUi@}@jz`suw2FD#3rzE&IRJYe>=8?HFX{hJoBl@lw*Lbqkn=yTQH+pl;wFKNz) zpFt%@CJF4O^0kZdpu_U9(l6rSCPi#$@~pEQv4935s?nUl!KQV&Gty6@jHL6qjcYjz z0v&(X_qBJXI`gadRG-NWbuoUq^ozSH9x@Z7(1Yy|hN__rlIeAIkE4_quXr?HrX}|Nm?qE9nQx>_F44=>W2l#nMjj`HG8+tb z@$x+7SJJCkGcUXlEe=+FfDk7n(GI8Wt_q&Io)xq_@HOl$cwu?_E)5DEDEjHK?-*TK zB8MCNu4KfQn;tcJu8oo7ae}|LKS&t>Bg*||;CqtWPzt*A!F0F`hMH#<|3&}zz51Ny z>gqJv_v(1(Y*D(H8^E~kfRsWF(1Wyy4px8188j)XPFz}~ScrcftYJq=&qN0T>8o7_ zAj_7x#hl;AYH{4)a(i?i++ldF`IDe_dFrcZxUas4n6>lB8mbjp8SOI}nl>RXdo3VU zwo|K!q2tiRT_3kt=nE6&2_NpB&Uu81H#jf!vSXZ> z%}`X=Cwc5UdTvOduvT9CoxB($k-4tNGN-OR4~Jwe4yUa2PQ`@~pasIyX;Jtw;n8)# zcvSO=n{do~OY>-vkC_EHJSW0;Gih*&EiIXHV?gYY@sMXroZi51hahPu{!XccV4R&6 zuOH+&d+3PqxwM3~1ZfQ$xTBO)d1J!kdD@@Qd5*FFxp5ezHOSAGvI;er%=?l^-1!4d zS8p{9pMBzm7D@cK#Sn2+lJ1_?!qF2H(D=P1*v)oN zm6c$IafNNt2=M*>eY=E;7es z(T^Ael5Wc`=IVW#NQNJp`pjC->axE{on(T^JWcM6g-x})b-yE=jdiraWzKQjx1$#& zzIodNs7nKVy&*J-MTsAdS$%*51n9BqOhoA(gM(quDZzA9MRogWh@>YHkw@S5)zcMR z;>(~KnV7ZeGu_Wf6031y*A&Xy2J(ObEk+-78E&ekIQ4ay+S9dN=k zwK>dm+C#L_B)(>^H=x16tRqPMGr+wWAEWP`)0@*@i($nq)vDa~3vVw0re$3$E4(iXO zdLqGz-tG4NN*ovRwrW3}?H!qe5~Kd@pdleDz0s4cqS|Zqszs+?03I7e4k0Kht%nd1 zO_UThahe*i9`H{DlTq960x~P_5cagR1swSigy%Rb82nB%mQ1ioWXTRQX!dA%1dsna zf3CyUCg-_`j)G8cC!O=127z)Jzxq<@4>vRI3;CKSa{B!0GRC-*is}=_knre8VwOnU z@JdvnQuBk)--t=^=ejNIRJss`J3gKfLa=A5Tft8geXU}qL4 z%B2$P!N+$8XuKVpKil+bQ+tj4rJnQQ3Ac>9*$=FfB(Io(O2=;`a9287$!7U^u~cP0 z!<-l8+jMWrWiI-^&#VJUwdDQ&CvbpszG9d5@?*2lFXnbEQkIH*D-+DbfUSg!>hg!8 zT*qN_&5yDx;E<{R4kci#4({w5YuFI|i(R-pv|(#l29lU27e${6PgUgC%9&f zWmSEuVw`H!Vv}QRk)fP$Iu+-#-`Uan|BJ;8ps+^g-3(*jg^Q(^53fIlc9Omxe?~98 zc73mVW;1Kr-L+E7N^aEwbd&WE)?i`B0$i$-Or>%(C0Xm$D=T5#zFsm?x2 z2>5;L;`9=m-nVOF-}{2=y&q8&VI3)^a|BuYBoeiR%DPo?yH2sLUOS4 z*@u=f23!(JUat(TY7zzhv<=MUr{U|_0G#TjEzmy=%uY_Kos+*|oxU^@fQ@NAxveEY zky6Gk4TzMX8iqkrG0*OyY_0mTtUtRQNEqBj(_w9X8!JfoCL^wAqOLVvm{5*=SV^R? zl*SIk+NkfDtz|m(LzQ2ro_||2T=t1iZ#v_Rsn8=+SftZpMQ6k8Th0-FqEnh;mJdvs z!nZ&x#2aoZs`|yv_|H>fa-sg`i7w#{{Cf^eD!MmM^~!{P@|1c@FTWwgl_|g?*S9H$)RYa-<%7qEk1_YS zYc!SP^_cFVqe^c4F^Lr@NUukGLv*GEuqeW~=T1(^C_Hc^|LVdFf1dJi0N`sy?|($HCEZ6#|DoA zdSDg!{~*D?#LL`5gq4=@h$-dbgg04r-u-Un&#O%y7D}`-_=;Tav#^E+w*2^y=Me#@ z^Dpu{H~gdjP|{TR15SE|q{vza_s&sQ4%{ojU3W&NZK96aE(qSbbb_wQpt{CUI)0olcHt=}rrfNqWe z_N_l%>k30RGx;I;R8k5d`_I(D0PucCtzr-e<*+2?#}M}|r4K)Zd?`U zxPHUzLw$Ary1H?po83*EE5hUb{~crPU@Kvd%-iXo;lzRZbl-h_s_a8Rut>slk@<)c z^M1Vm_3*Z#JT9Zw*a4Q?Ttr}GKWA{TOn6o-tH$16@4Ezr`PEKsN9v{K2e<$XA=6Wh zS)PG=dha>0M(kvWUS(IWXy0FZyFwttMDQ~@=um>tGHyG`EQG9>7wI2+L&9I#y@Gi* z)I;&mXc3J+fES#6KDtGx*+jHA-1kr1^Uetp*M31Fl-~V`2|$2_m%8<@stzn2W9^QM zV^(cGBS#C;n)-kH(SzE$-2LqW(y2+UwZ0tU#6NxRjhEOUa6ASiVsL)!tj)Fc-+34R zziIK{%IY_(-BY#*Z!f2(j!QdQ0eu>(X+s_#`M-L^M+MA-ZK-~b72%C0tFibUI0ybaaUqz0DtpqChb5#IkX1}z9F5b$r&e#NiyM6u zfwP(}zd{M%WR5tP3EDBVhKxUWN1fb2{7%c?M0cY(Sn!`FBpI;A5S|{`ZnpXyXBOwZ z=}K5D0?TY5v%34N39KL9kFLk}ha|ba&gEJ|1+mrdQ0i05KEc0@8UtB9?``=9c9O$G z;u^lRf-g@a@p$M%l2qciG1y8CvE#C(uCXTvQK64{XS6>fzXB9U``gk=^D1CiEOu;m zhsCUW8;XCI+Jg0UtAN2gjti(59F~aq5%|;bf0|fSC`XIAmVW#TN}}cu1N@MxAMkfS4Dpi>4On0JCcMUXR-bQ;aT&sW*z0>9x1i>V4QCV)WZqNOS-yZ| zI+>Hy2IqiJj+;BqILF{KcNB?22fEjAj(vXfB2)14GRqb4Yk%xQRJ6lcV02TIx#*5; z?f)>V+vI8QhQw62LoOn~{X&w7=3ocE=KViA51mnt=*air>{&qM<*1-efw?Pi-pLac z`jVN>0DyfNOS1O14h916pqKfC-^fTj_Lm<%s&x-H8+MY4J&(be^PYrU?yEt_DGa#$ zUoHl|)@@($lXHmaN%=kWCM;$%&=6?eNgQcg2^`-x9H=}KRwfAY6P*6z0RRQ00nPxp z$^V$Aab+q19ku?9a<>MXnCK9R?^s!u?VsVLJ6zDG?R2i&q=zR8Y zot!H0=bvCnF%nBfJC_bSa|U{7|{NHxfV<>F=Eo;Upw^lw9rny@5?#zZsn5amTl zX3ay*E@j~z5sgP+=&|)3N@d^zzYGlaI+X|-afj|67f(5nUT=^pB1-{BOQ2ljIlo0t z=wYXI@sTsSg}U9ASkYeMs;2HDznkwXdcXWEw(J1WtefEKA8!Fwxwe%X#_0LO%CI|w zFq@7Sl{`RAliQ~!MvFT%_+=w=e+s6CU$7KbBTn7a|`*tE$!4J*A zI9Gq{JjPRP&c>#Pb--6$5dGUTm>@VbnA9Vr`{ZlqoNxpx_*9eCd_XOlm&KmDk(8B< zR#-%yQ>wOnIs4@@) z$#=xyf}k)!A!vl=)mX6xl7w;CmD&D|a6adx2jUi&n&i>&PECtC95+%<=l@P?4eQhV z#tHa4q={5GILaU9T0V;HhEI9all(~Tuu@=p?VG}2M6#{u_V8NJ2+1f|wV!JIH47jg z)MeklLQ+g1LaIqUqNLe(PJljIs<4H#>Oo=c?hHLjBjHugLkJ_Dc@E_<-;50<}SvA7!nzrTOt-DW7zO@DPGd z90ayP`deJB-rBfqTlSb#Ey>b<@>(|2KBF)o!e4ZQfXiHxTE=Oc@i%TB)>ve_68cGP z5FGHw{2(-C*&#HM;e4R<)RPXkU?2URzQB5P26r=HlVPU+Xwa6A4nC`(Kv%lJ75?B* z?d1gQOf?qkdMm^H0o4uNLQyjQeuLO%xd3gw9cNLcDc0gMtqf7~M>P;dXHt}3B^+%h z(jm;M)|}pwJ1RV5-Wtzc6k_Qn6(Y0j9UjU*_&%=tru^JZk2M|QXGI{-%z)UjXV4z% z!r3KcT#22d;h;xhp=ICR<3y~0fg&n^_0U35%1O97!QJJG zN&KUWAss&7vkq^{=EzKUsLq6ht2PoXLE!s-N26@If7jY6T=1TT zs_H@=#;qc7xPl$(KW9p1g>1<`Ui!q!9p^lPDm-I>%{0 z_fs^%YKpb`tdUsV5Cfs=4hgT5fZ@H=PLt8i|E*-+} z1mbh7N=5u(Ot8Gv%a~v-V<_<_OwznU(f&8!Ym#R@?r;^bJvFt&zxkqS_{&mVRSib^ z^a3B1tc5(#IPYLGL4cO8ZWf2Zh=o6paYZ24T$mfK9y>o(8ve>9zSSQg#)_33V; zyHmQmyHh%)Q#x)!Ktj5^JEcofy1N^tyPJ1-zQ6Y$FU3n?=00c6-fMl<9$Eg<<)NHy z4OF3GplUIYTQ0;YPyWNBgoLs}1*o}u^3|9a2>IUcud|hXhf*_i-H!P`fI&e|z5*Y1 zO9aCG01JjP4f0AaAmzwJ-a?E>?hRYz1~HLi(K=6N7?_ zKl2t;x9yepJ4Or7n2Q%0qDsB}B!O9-93E_kEe5AjN zid1}|BvuA43pfvUkqto4IKs=XKmV?8I7<8Lr*}++{U!)60n2`ow}H;1IQviUwuY#v zepMDU;bsB4@zL2XOO+T9(DMF0WUYAI^+_u$l{vGkPbvW5=&Pp18sAD=*W1;e1xw28 zGrz;+wfvOS;5rg<`jQkNF;OeWQ`gf%RNb@|u%RK|b z>Cfr@$bt|xprpzl_}G8aWm=8L{^7l@7{4NmYmmCf1if>sy#XAg+NCRK_ShJ0#2{vr z@Bc|z4dTB!_}s4CY`?f0_AUv-csJ&}U(@6mT=S#P>^TEVt)dC_7v(7o3=Nk0PE_7U zOPbS>wrttuWJjoz zZJ4+>k#ey+=c}CE#QK~T%|Zo11<{!oLcTSKOYw7N?OD~^JD@I~qglS&HVJ*YvDR&V zTPP!jFGiH2iDf)%db+Xf6#mD2PVk$EhrL-k%{F(mi?rud+m$7>z8Iu)r?!ZsNqW0N zH_;FK9rT+~Rv}c?2b$i&Tz$h8J|U}9#5 zWiYD-_ex2P1JNV1r6RqiH1j?))yO|p*B(;nCL}TgcfzM=#vsE;Kb$Xq4Q0bXl#J>I z>4C}cd@hQ$Q#VJYF2y(=PxczcIA3+9BzLMBR~U0;xP*?K3Z+-vBz^U>F)O&1|It%pjr_{{^ z-wd+>*#`2?_0fQp=DcvVb(D|}{qMA-cxdxB6N_Lt9Ue7YcfHaR#(P-}=ywTJt$uj; zgihk$mY9jcQhH2n_Q>*}&kK?93sAW0xKK?$Bzd5L2X-_dMvYCxL!Ie)u-2kC0(Xvs z{2XXFSS??jFw8pEobUGkrcgWSZ2B5*coiRSurqS|kJwlEYlPH&8ZNDvipVk9NZ{gs1c~lo%K<-%gW2k>hZK6VtXk`2NSnv83cE=%_%!v>Y!hwo zT6HYDZ(g~W2W3MOQK9%+3>pAdOs&y=kaBym2VGsfE!R|z3o7iJVSqy!UXrb>)H*od zeqWIHpTq+w35z&Nb-gVq)h6?9%9o0RIDw>Q;Wa zzEMNl52(z`n{o1%n(UZI)qmKo*EYJSk-oXPVcX)sAF*A8r%i}mm>Fy+GD?OpOk~+4 z4F|)xga7LB=!=Uk!u}1ooC1h0Mmd-B6y%kMJuwHHPD?A3bbj?xxt}@tvbL(}KJCdY zAwf?NjKzJcYsJ6E+-`&p5LHoQNgpa}7op#8daUphe4r8`-)Xk{NvSqRp?=#%+hxEB zDN+>zCmcEcXkj!i1j_O0f!$NJzL=N#?P^=@a*Rq{6M3#mukUQYG_nBY)}YihyWkfe zOiz`BwXbdDrJJP6A|b&wD)nq+uvjYWP9~vswU*K|9X>~>Go7KkY#qO4AvW1D@Z0{yyELj?HCQ>N zDZ7OI+|kYr)H;Ac1~%)$WM-ZNF@XHA;0^-*K*3a(eB17={$(TNu%z?(#?jG`V|cB& z@}llpmGa+r+cy`tHE|r!#dI!le<*5Y^?{_AhTA>5>T{uaBwFR&8=b$(sXuBqxvjZg z+?Hnh7AL%=HR#tS;vh6^!&_skK-@i8iC&zB!On}a;)$cBP3L}LH**I64pXk0NOZ9L z7$l-1nCLihu$z*B%D*7-{JdB!T8Q8PFS^HU>Bz9Q;5d;?73MeGT$yefp;Qsf^1&ZV z&AE`=;PWeD>~n7*XmiNs<6RTG9BuhD?2vth0Ni?l80T|{r)=<9CkWn9_SBeRYRzERqoSpIAxZ}P3l-9(Yk=O^5@*~i8Loqk>OUKv>R_kLs( zmQI~nPXVFctOL@F%XPxTI1O_kqPY%{1iS6QoR?uEe2@NHf{Ge9ULNlE|k0;Zfk8#_yT_;e``YaAZD^wLHZ@I~! zo45={IO~Fh!$&nq@)>=g`}0yKQGrgGMNl?f8C#R8a7{21Jx&wKMr=v0g{+vafel-h z2jgExVDsse{w_{XUDm10{-`h7P2+)cL}I%v{;^$V**CY(P@x z-}UhKA!nts<|f%O?aT{D=h*h4PKw<2D7P{?egCd0V)3VZ$8Y~|^1o#wWOC}y<&Xag zjm}y{k{|DZo2LV`UqTqw1Jqa51>x#I?@}>_P6Phjc;+sPJNy1#blA6!!eIT~PrEcN zfJv=GVE1-(1J?H^ISMGlx?PM+N=)q+%xO}?vAr;vL<06Ex53c9QMXRu=*>xY1k2Af z&ebOQXHiQeIn%{N&@(%B$bh-Hl59rsL_H6I!K1-bKMP))j}Xx zFC%|$`o?|U>W#eT4|R3sXxcnVN1Zu}m4bCa-rE#@c5->BaK6Cnb|vMXSx&b4twT?^ zsxU!ApXB>tcPyLJLG^p@3M{p)fSVmFgih(lY_b=rv zzayh(!|V-ZV#djzd>%5YePu{jo!Pq)(ccOXLZJEqsO|&ilAm$N?IRf7D(nCZ zfXw?bWjgwE0Rtx7c8*^L0>6rQRzYfy8;%n`k6D|JkAG%hF)K#R%E`^j@A{~pI+aMK ztkJ=_L?BSVKGiYgefS<9Hgivc&Mg0o_i3r?8OZ-D1Gr% z?i!yhU{EHjtHW{bVE$^>iOVtfsf`I(7J!3we3ew>Wom~7^o}~(Q=x!{V%%O0n6~5w zLQ(=A>>fyukyu7qQrqH-%#F5P*0;kld5kjLDld%ZOn#)PEx{3)Y21Ep`6+>V2r)w-{Gv~!};cengAX}JHp zTE3!pNphjLI~UI!7mQ6)=F84+h3#93=+jDZryitku;ZjIMOBOttE!= zt-EX}$N9&2h3zEHg2?rHg+Cj=>e~P;XwSRC)vi7s??Z6e$GVx<-vilXzRL>5@e zm0~A0+GojSiDOCFL1yz&O3LOHp@RcAZ3dr_A34pw8-dAV%M-kjMMem^r z9bx^r>wv=9I1s`Y;8VoaCIT1~%coz+kpvf2Bqli=R`MVYp+;NY1g#h;ITP76|NDX* zL*ERBEAHvel?Bg0VZ;-x;*|K{l1LmiKaVt|L0% z4C5z^WxOQ*~U15uV4`DscZ>J&bg8cnN ztBbBaw|&=l9Z76|r?S3=a7#bi=HVtXByNw{XM8AI9(Mw596E~x&B25!SZ?WdIN;C+T(h)C`lo?BQ|YDK)Yaw6t)*4& zB5xZ+Spa4rj&%@5EcOo^PZkY|*UU8i)V=JV^fneyuxk(lA$p$_d;P$TWO40g7M=^^ zyA;u!id6tX4#TelVuJ)`Hv3|bOKaKDF5?}F-(K+|pG}f{(HP(_0k^nI zBWnb;g>&H{YgA$;w&FJKlp%@L{oXt8=;8DQd1npC@nHMZ0)^}q&{odE#k8Cxu;^lr zNUeTJ-Ro4*HjPA_R{jz`h{2Vs@5lzV7EEua0anQ=`(4W#uP-M+10ED?qfWaWm~Hb; z4evQejc|l@5rL?+*yGCxL;RHfs!Q+hJQ~rjxXukeX$vn+&7x8|(m6Rw8sY*G7k{3n zpSTyDB=raD1JJ zmw{X-O$F_E7J>kp`Klb0eip_+>+~y$wrq;GMQJ8Wc)OqQXS>r=hspmfLwIz&>A!Tg ze^`N6bW#EgD_-XB6v(73e5y=*z>4z(Z5Q4<_+>AW97*&hPu z@%OhM0l|k-gREvY5MvI^*w3burWr7QlzQ1tPPs=ZeVKFp!$70#g&5vo-}>rhi=QC< zA(T?eTeOg48B7=zOYw45rMtn5Q1!JA0()OEEDke~%t%IqBj|+z{ay#|Rx`pf|C=^O zAamfmP5f^`q>4W_iYI#z_0UMMEE+ZUc#C-qHY-{fCsSN^vVy0LcfLd!*XMD6- z;sV4a?-mdfaM}Sd$Pwp3c&#Q7Xc*&v1eKOXnf{xvI4U>;TsIb=$pFrS4}+AHb>S!D z&9{ev8uWR$Dv&3Bv^ourYV!@{k)_1e#Lmg9+AaIR zD?~m8p-8gu_fqKqT?*I+1iS+uH!5L5VXIhsknOC<_Tl-Tt(UE(QLDS<=8?bt<6fW% zo#edKYB?Uinxvb-Pn-#K1M(w%>sroE5NBNCT$>a4Wn5>N`fhpt`LWB?)OKD)UD{G%%Q%QULewm)4A5^uh-&+Y(+eWdAiT z`2M8eoIx%sIh(`VLs5jmKR(I`xbI^lH03G%?`E3=drz+8ChEKBwF((o)g)4&Fj`?yRKLSS*B}54{UsW)$tfD+yF) zA_++;N^gjdYX81bGxPvm9Sc|5Y~Q+T`~^?rkfEBXor?BwU9#TehtQq1<)_-bmH|W@ zwNxyg4zWcDtDaLm$=NED825`^DD-I7Xz3__+yBTuDA5vd>yC4t%gu}~KGy-N?3Fk5 ztAgFooQ=kBgZ)^WS$HHm*=xK{GxZg}J1Of~H~{141I`A-W-y?b1t`*y@>9q!Zm0fs zivVCMey^1D`{I)BcipM#gs)dSn4fjnGUW|)^`flPXBa`yYhr=2qt@T;8f)Plt9(dU zttl*?F3PY~W&ZYN7sT1MG-)2vZ{%kvd`KVcs3@Amjlvm~#O<)<05GY8Q$g;>?di?u z?n5;(g@%!ywg1skv)r{B@|b!9xuY~XG{wgA8~CtAQ)A9t_j zZpX&|~TuXb_`l>X0_lFmV0tiX~`a+gGJ_hi9+@l;Iplt>UKa1_sW_F}CR#-g>noy0N$keZaBP`>UK?NmnqPmtv#2(FF>3&WwaY=_PRR9Yi0~ah+Ys zaF4EOFdE!8Zfd$#A@mkfT9%-~pf}O0+Jh*7cnfVr*dXu{@drRd)(*h#?^KaF2oc3# ziJ(90e4Ypl;tKg$zRB|d+UNHXMsquc=6I0Y|2v?MY5!srG0;2&WNkmnp7PVzkA48) zT#o*+y5j<8m9l%uj^Pz@Oveq+^fSFg$p}5Vjy40BdB`MHg|FB`1CKTAQu@&+NgHI( zF$Xr_ad?Lf?zWXnWg@W_166gj^0!;m<>O()46?sE-_Lm( zCIKUNr$+ZUD*_u@(Iw%~_-p`F>OI3gfG;BUpM$hD+xzK9&O^|t_lJQAERehR=7Hik!R*U3V z?(8I=^zW{R%)WbYDKJ~`;yZ(|OMi4cof;#OO#QKP+xOl5E)*w^iLWP2=e;+XqhC@V zwO;RaGGOBxzoG$8K|F3_ejK|A>ini<+4ep@oq2{WEO-wHhm50$YyR#VFP~6!cvs*% z5)wjM*>?dSyDRR8iWwUGg=_Zn2e9-bSb1R>?j2)OD>XY|c&GsaAf~Na1;_?Qm0^74 z|F1#o!)5;P0?l>70saes*gokT6y-OSKi;b+S-jUUd7Aj%*9OBX^fM7Ut~Z4flB$ml zxmO+$^1z{HQoAz4AZRiKo@`B|Np&C4hRzb4qUHZn2p{C9hK0qo+How_m6zOT@vt@K=frg z>>K0@)B2hi>=Pe}EF`tD4zYuQ$0>Q_EcYFR8yz%c~#&(^rZ@**?V#7=}1PwYCyJ z*yy(|Mytn5mxZ%~E<#fj4eKn#g1ba!VXh2h5c!hbPza%<)!ohGAsFjz}7Jq6D?i;_TW2*9Qlr#hDedH<^Tc8L%^IwF)jy^PGtB%`hOtKv$bSb0{DPQ zdE8@B_$Y&wx%9A0dGY4&-Js|FHzd#Ozv^G_;4dt^rNKUT^c#$3+q=&&b<2G^hjxeH zF8S+I_p*XG&6sQ{7N$Uv@6b$m%SO1Ve=HFF3wtO5aR6n0`qo}%nTT#P8Hn=0NP)XF zo_#ROZ#)dc3KM$kwSZMS;TH>S5i@P?1DH4-BVx{Kmy(b?}v)1_>zXH?=+S75z9G6 zkksiFESV+g;;*=R+%z25_QQ%!I^gBT(|Ed4h#bn%vky4w~h zn&Ny5f3zYB-=~kI(m}S%z0o!BSNi5n&)UGw0YWo05<2L24JtT5fQ6xPSWRo&pHBkY zRFen(gPsd#)%-uIz+RgRSjH&p9pl*z+>lqcphC%o81~-~hO(@hZ1d(`)oA34q+=FV zSI2Q`N?2l*SYqaKbQzU#*ka~jX=)>RVqy9a`G;$@gTZ8KdJtG=UA+z8VEa9!4BbB<(Q67wo*&`D3=6(-HOp7t@6x`Tg1Y@-nX(Wf;0&s1sioqfEu9QT|SypZ)U@)`HX<( z)>a_5zU4&ZCOJQ7E=7|aVC_3g=uZ)A8IDbdc7Nsc?cq}jd+%`{X;*G$I7 zHACu(r~{bVllXeGpClI6%JtV5Tdpl(Hrhymn6dbw(MQ2&od)2VAQdmaInGA2d24-3 zPP{6;3jQv*8k1YoTIvyKrnoBg(S4ud15d=t6`=qiQt&Y!35bJy=ryh_>=e(Rg6HZy zo}N82+e-R#jybz$58$}uuxnK@H=m-WQE!pxibTCF3y3EeiW0L$u55b+D&AD$9?H5I z>l^w%HIm@BICNCyfYWt*zZx*n%Vn1})4+PF+zXE|mQAl(Q*E*~_5U8X4M97!Fz6uL z6dmVSnxH&rIt)yI;R+@MDfE6%jXPDi7GzOMbTSY9}H18{7}p6*^g#PFj}EQl3s*j_&%V>0TWDF*LxHtd9d<-o|3pPW}~< zbB=Xt<&BVN*wI&E9??=-udyT%NW4SfDaLk1me^yWe1P7(!OCl^C(r8~4Ay986e|m8 zlXu-s46ZDFC{ire)?t1Fp??|)2?-jW!`eIHQgX=yZ;fEByP&WhN9N!2OHwjZ%o!-! z_mn{fog=m90zR8P>c>sbR41?0Hbb%bm6O6TGDmUPv+dk~ZduYt@TV=q@ZwtM94O)e zKL7X2I9RznCG@WQ%xeJb5k#gDeML+f(twCxm7EnPRW|un=!XMA`%Rz3Ia!T~Q@Z$8 zTyU|4iIa@8`qBu2%nxM_A}9AREyHb>x{CLu{ua`!(|CVj5VyRZ3UgCMAhU`&sEDrD zQDV0uQ2q2yy^AFofy@CWTLLRo*IL(*c~i6l%@0XrZ#CMbb~lh}wwbDixnkxCzk>8Sr4rdWn9 zL~)g_6lebPR>XQ#ii%hc#Ejxma+G;rl)K zMN2Cqm5@ZI4u@`c@l@`Z3m{28GWYgIAH3xL1_&umZzca3$Xd|z52ltqqe`bdQ8H7z zT_RBNdNEOLx`c{;fxKN43(E-Dgsg!~2v97wYBs^M;)!hu91I|HpFsmvZ6ajC_`>;` z80PrNydxik#CJ0`;Qu}fbl{sJc))+4?jvhK^dt$J%83enYwdpQHtdFFTzocX6*CFsnat@l4 z0S=Ikj2EDeQ{E7AWo3x~f7j1bi1#0{_W4E(j9*th$44mY*8A3(+2BvFs^c_wPj=#D zPSyYsNi`of@v$fXLw+pHMfdR5U3JIPhHA07n@HD=2Y1O^fl_}jyF$?lK}4#bL$m&t z`9sHg-5pxYHlVC`s0aB{oLICS-YxuGIIK&9vc;5e^3zRVD?>eYuoA6cV$tP5C$4hj zz`26&9n-d}n#|)mtp&CTV~W{bcLk!0K&$+S=-bbacM=o}Yoxu(GQ;;(oq6w}!u5lwjYlwKT2`e4OCfHDRV!u_|nQCC{ehTxO;r1&tI1?=G< zsbQ<=6N2p7>JAvGQ^r`Z0PYynrUcL6Z2EndAen<&etBH1P28@8G1wo4i}7<(^=$P5 z@PM}s#{iScgdg|Xul3EG(O+%z8pD0sQ$~UjK|MtU#Ye|^>wF>|9D240>!M&E&HMjC zztpV75k15Gj4!UV{PLKzw*zwt?p~$QD=GKetE#f=O@AaZr_*XMy%Sn*XDO&pGb)aG z*!jqnWpmY3<7#CwcEzbQeQ;hq=(ggD$V{JG8389r8SLx_%rM==q@I=973igz4h7W-t9ie7()KIPR$AfCZ z6w`e|b=o4PC1wQY`b;xiT@;=5R1yrD|HEFh5Wh0yO6 zV`AE`9-QV=nOhc@Pl+o+u~MosD!7znJh3d6UN}M3B?j1kJAM_8KQ4V+E@&S0ME#<3 zfqz^d|L~dhmfg^QLR@guNSvlT-@@v+$b;{}#A?Yh14B(Z?Jo!M`lKD`BuYY4yw(`a zkAnHolQwtJ!f8!n?H1Zv;s`=I30AdW%zgy2LPLFy)4Ru9s z?g(dmVpAhH5kjKbsVuUB@v65qldu!~GG+&TVkE`Bgjvn<<|u&r@pK9-D=SXN38YnU zv4lti9Ma{$`*yzxrA9g{Zb_0q#_Tv`IT=qpUsH2fEz%18sPQ4I6+Xe=qp_G-#Xm%| z*>~v>^JwQV@9LzLy;FnvsPM`$e_4WbltO>8b<4n?8!L8JV81cqJ@R^?wzhsr7=A6UMhgb$LxvWY$p|Z1f`+@sV{r{P0@wUzW zH~?N(ReFB$2AmMZsxxJQK^sJ$;M?Zx}|KQ0NZOsh*orLG?v*|~zU1-y6Iix8I97y}|C^D9(Q z13lP3ODFKykYHUYe!oaa!kcDH+aXVhAg4*F3cy>4zp}Qj<*z4ogBg=?M6bY}Y@*UW zDt$s`#@p?80c%l{^9qkb;ef>IC&Hhqaelk{E{J-=ULbkqtf;>>UMs}l|4wmYNfFUP zQuThLj~{In>55#+m;oq2U>nvwom?daOs-;@RnN<-i6Flf^54Ks`m;7^)hcf84v4>H3M4FQ3*{o%~R7UQ737BesqMn)oMN?4>CSxpI#aI{mi? z!*0Z7(u>1VC?jTi`gaa`S?)#G>OB~T&^+XM&?;4{Ql99ZDgV~=Al0kY$2@c;(moM| zC7fSuBm}#7Jj^ofx#*S&1S)OotTU%bqw4&oX0gLXv^y+4`B74C42V;K1H?85s#8DuOqCZR2#FlpnuOg=O($%LhQJHRkeT zq0VoUdC*^wi0xoR2U^Fw{q}ls!7%>vv8|nH&7heWLHLGvqXx)+GhNx#@m_}vkXPy3 zEEwn5n^74${iGo@H)kB@Kqf(EgT}VzYIvnE*{ucw35lT{{4TgIOr%LGi1L9m$H)bFO#McSLub|61Tqb|h4epw^AG@nRd%APXigi?lT=c6^Bh)9 z=B4$auxO~9j%gmH6;^7NT*7FS4^Awn;YdY<)YRyZ@`XMN{b0BwG<9(!vXqq80Gs8K zqRQ;QZ^aB`$Tej6Qgb9-F$fc`U@X=hWD4Bs&*dz2@N!1E|HHT?O*MXEBEtHYv6wiWkLm1)FV~6Rq}Vr@gA!3WsU!Tm zbH*A#c=+WpmfUEw)!T`-TIID>Z;u@>L3+ze{;#yZO2M%FzoM zs|sF$rOr*j8BM4+>g)jLQaIor`1GLbr$<6mJL!%v$A+mMJy|S>g81YhR`tJ4(;OfX ze(ngy6`Od-1xKnfmi%#;aGOs_s9`+HJwj@B61M|pWRPf42Gl2Ov~1Qg!A zrBZ`rgzzOt$Gebe;_aRkbCY>gVC`@DyWXlOwXvVEJ;$N;x^X%2nZW#_uOcZ4)@#ccfs~# zw2ux?`a6Ye_Y%}TyWZ>;g_o6nUPdBvXlwu5naNP@?CJ7h=Sl{@oT>d*&=g6CJwUvB zQEh~uaiVH-EMy$1B1!z`=4;XYJFAA$gs>{FLf#bg1I{nJ7F{ll5kEa6c~Q%nKVFNe zFd_T-=3VD2uH5LH#t$%9Rj8SH7pWZ2yI$brY|&P# zO!*tgnfIfCn=Q&nF{<51bG$hj>40UxCBPX^ZJ`Al@vVD_B$2y+Zm{$OsA@<~R}lgKZp}A-6zZUNi(ue0qAw`J5>y&hbSi?3@2?ujaI_!;o?h$8hD2ZU ziu}J_NNGs0-;l@o1ynN;L!j_n>#6qPU55!1B>WE$|CgwIV@+T|pqZC;^ zf!P3mlL%THS(?;0MswnqOGableIoA2q4~ndt}S?m{Am_VkrVSpW{WU!p_QE;gD^}+ zL*jk28<}>nLJi+eVfq~(J8&RUam$T>XA4m-LNT7xYRKrazJ7h3W*Ydg@u=`=Y`hB- zuYdGQB{|$IykWJ#W4SfQ??u7BMb3CRCa4(_O;HzW$Uo3zuAn~(AK5>AHtD7u9GzWl z!MM##jo4ujO%T1kWGA%f4RoG>fIX6eu>bW^wY1j8QMZ}iozi=nY%4& zwU!%LATGX}ROIL`e<71=MC4r*v)P}4pl3r-X*-q0*vpx4sEOiw{vTnbsB{c(8vc$F7%sP*sfgyX zbmu6IRXel^qC~+`>M$~5VHGOD=DZO+|E4Z_2_WufR z3TOlG@jxFf+?EvR|6-K8w`B3E6=zllfN3IX~Rl7`wU@cJ7Cw%h<)Yx0YqXnl4RHty$LGaJL! zEElX!<%!=)6f=0cYi=Lw;+4MU!eHR!=65Mildku;5{5!h(GFJXcNmKABvRln*8IXB zi-IlAvr&o^8(xjXM}Z0ClsUQZzdcynT>ynn{>ix@IDWL03qJ12=iBrKjl&Q?MTsLT z81}utO^P7vPKXa{RhKv?x$i!`q&XynwfRLsxC}i1<MdQ7Wb2V znX<0bj3`yoM&~=r$D;tt2j%j-kjW4K?pCvL-k+fYgPF>WE`eUP_9%ubsTTJ5Z}lw= z%CpaQHS<2jKL>g_q5i}#2&BO9(CZQL^Lh{I27wda9-G&6TK(u5DhF_Sal#1Kxj$8Z zl(NwvS;Y%@9-B3EksZuhYS)==PXlM4N&>zng7Q>w*2O!%E6Dg{ggl6s3 zuzNnMkI(#&s^=_z)5^7b^NYP3&T~;kKXjfE3g>}*0Bd29A>jUnOtsn?^8-(I*Yi6* z6p0vmhrkwvI?lk!R}MMqZ?d@!gALVm-8WxE-|2_tMf|!&tnn`D&zQCaw-U}V@k@?Q zUy!5D-wBQRj&u!z@uAWa_@l0cp{V59Ft(#WBkag3pDpd|CS)nRbku>7a1ucbz!mq% z2%Ify_{}c>L!R9cLR+}IBU)-04*pDFXp+_g_84H=nA_v^eRrtYd40Vf3gR@m@9Hd1 z)`4X}n!J%}>1@fgzO0Q5i~4wSOeXofa-5`mBI5hqQG8|!yi4NlZv0eJg^l=Y5wvP8 zGy8TNZM-sZKy;zmZzp>C$U+H~R`Wy)&#`I);-vY=HIBD~(DF5LH@0x0Gv@%5ewnseZ zjf#jy5utiCgnz-p0&yi<9=t|KBY%2>udJO~w1h~RVd;LvV6pfm;Qjbmn&QbB;e?OA zFY@_~Q6re!s!yl+v*_@g`wS@q3!|vnE$ab8&~t4`0Sra6oR3E)zlxS`9{t;y7&4ll zS?yR9c~|4ifS3|T^)yh9jm|NZrUjDY|7HHl!IZmzjJ5u*Px3CMrlO8!%O!dSE_m=n z;^YrxvaeYB*-zjQwlRkCcrBk|$ zAwG^ar&-zojAT9i1c;0QX)QoqmkA_C*x9NA`fxc5^mn4R=TBc-e-C=R{2P888Y$-O z)dvoE5V=7n9?@`vr@u@3{z!$6k>;OxOuj{b51_6Rpo(EsGmTv2YQYWONFoSJ+LeS2K8z}qbC+#u6t|8^H-E9p}< zwUrD9+V7BU7@|en$x1g*Knun7oMLy0QMDP^lbZ~1rbLtD<`%-g=Q(RUAvgFy%4K8P zWvtQm@+rzaX8LUtVAKeF>Uk!(=uEsCAbnK;Ka)JH?kW(KGL;BNUKW=DRjVXBQv|XW zzjBc;^3t-UlNAx7ndi9YHY}rpHACqbD1S;@v5G&KYi|$6lwCMo$R{tHt!<89gY*44 z5}0Z^t4u97NN6qD#!;5`J^gc8Pv=`G91dT*QU;(QYdW zXBg`)W1_w}(b0-8I;S26TmkMHFBDzkS2SSijTiQ4WLll$jkf!6JQ!tL-)U%JWO;&q z__wDKiS|#gwR&rnG^k!B?<(pk7?m+BVGVg5{)n-n7EHytFeNzXYt2FjBBC65sj9yy zuNS_PC5RgoNI8g1nXM6j?AVCikd~ic*=K?GFNiiS!8nsVzC``sSXq25<+k&zpJ{Dd z*4~6jCD|&FNW!fp$`=oVf6JV@qf5LJjK`TKf`PY6>M zEGbjdt{$Ey ziJHEKhn?tt5JT=ZYn6)76cJETFmx_-0-wA64f7{G{w)cj*oN%9gQTgyf_1`$kg1e$VHoP zV+}EZ&m=+dBVY3@*1w4%U3{)sMRX zALH|GiBFWAD(&?H+G=$F;*I0P81AEiG2ah+VF97N`wK$+g_=l_Q%S`tb`8}=iju!jZeKtjCX3Jv+`U$^^Xw=6A$6;4_^JSWyhzl6p* zM{H<2i)M7AsK18BN|+2ceKIA!9Z?i*p#k(tWA|vcJy)D51@Eaw$9cPb6CE-q?jy=i zgqq?o7v@CYeOs|Vr3#0&2Mmm|=|r+@pXE`fC7I_Qxqo3&O|tIl1OMYv3C4HSGj^4~ zP2&=BF)-8+gh+9v<~ubis&C?@cnJKs*+*KOh-=*|T^=!do{IYifq+ITb+22xBj1(- z3^}SUJznW{*Z5wIhm(gwb?m34&=)wS zq*|oiCuuJq?o@T6sWJoYNsMbAEyakT#|}I%o2p55+v>DQvzsY~+j5Xd&#m%TfA%e$ z2X=x%sDiJ#mI#<7wT_$C+Q&;UiBj1ofr}Rp2YQLFcm^T@|C-1`x320>;VF{zpf%bJ zmMD3kXvyg9H*r7H5GGnv)Ckw&JVIRvbea61s#z|zwK>{v>VLsRoBqhSB13b_b-IsX z{8gytQ52JI9%H94<|?8~wPdIL>XFlz)LjWMF%%F$?7$FFVX9TZUQTHy&ZN%SAiPbP z#;R2zBhA{T_|-Mc$EWX7Y;FAM76=2?H6Z~5HN-$Whd{mQ?uh}ss_@V0yN=R2O=vek z_K(*%46HsvOq}t`Agtfh^|r5dKy}GTUvVYRHePdkIVo=V=F);+6D)5=!SM6QSKDu| z$^d7nIlHMwO7WTr_CQK*jBw-SpPqTgBfUTTO=c$ajZfxh`2dB$(%2!8lJo&Wv(-Vg7GtVw3gnw53k2m3l_pM3yx2 zBrUP*aRz}#b+qiaQpTKysyyV`Mg2iNd=01fCJ&Ux?SzLnZDLN0v_+l)US(yepi#A4 zAFqKN5g-Ui1j*C@5ngv?o1m+iFNdRgP}wlihs{ul6^80M%w?IA-q21qO=w=$tC zz&2h57RFDdAL~y`Y2R4|vXUpU1S(GuaxT**wER;gMVfad5)lClJp7IJ(gE(|YnaT7 z!M4l*S{^XIE&$?$mSVMkH@~50jG!#jg4OIG{0{Q$9&Q}lVdK`;kQ1cRF^AdXL64Bm zf0b+r>#maAfk%9xBqT?K!_DO!k`%I{gr=sG)gnRUi;cw1{U1*e7>)ogA|ko@WYz}U zf@*%4uFg(Ex$C;#rSRxY-S`|<(3HW_ifb(ilEXE z@8`#ULHZ}M+IuZ@D`+m4+9i$&*)i;hO8dYT88R?qiW}JEZ2cva@PV1k267Y+df-6v z$$qO#{ZvA0MbIolSFT*?2R#CB=W?k>lxct6tIbMqoPrS-^#Z6#Ynu(Pv}VvqvCL;V z0u!CHK~vLrznEa2ZIjI`!lYE~5dYJk7hizaxQrz2$`VNA&z$5O(cuLoe0vlHLa|HS8u)vP<-7Grkn^TX!V>pn zcw$%(RYwy8ahbz4gqNY=ES*!rQWL&f(PVMV85v5HlG#XB2#{l}w|Y^ypXW)CexuP6 zh6KpZoH$}a0;*7EKY*SV?%>$BfU0??H7JbCQKXhXLb!ZcTt7q0TAHR}?RS5osBlZ? zTmMJo{>NO^)k4==#_*k1*wkD!g8TBZC}QU|QFL+j*_S7Kj=tcbuzx1Au9F!xNq$Lk z)1W(UQoP+>Y(>VmPXDhx!!MwL2!xUysq^;#^_-CZ)fHTz05y;0qsm>2?1GGpDJv0R z`uux zhJu0^d?*o715IWgBdk=6SgV7u{ZN7D(KMdv$m3}>)O+IosA#yT9f%P#o9B6`cUu$% zv=L>%=bcKw)>z~bDC7M$iYITU$6BRYr=1*vYxe(7$j?%I&nUZO+Met}Jrq`ADh!_n zK+s4IUrd7LX&y@*i^NqdUu^TUwm3SFf_mKU^tdc1!(*-@*TdQdW&OXg`rTws>us*O z86PICL+iC84M=o9tPf{fbht&}e9o1L!tH(VnB#c;43|75`~d7AB&V zW%px?gbOw5Td0Ot87o5ktodCepd=W%u@T^V#gj9QMl{ce2%H8RYE-}aBU!DRkBx%_ zPbhTuWP_w_c0sodC+Z{cwJe6?)BGa$XV!G2Igdbg^z2(rRmkBN14bzwAgcVoh zd**wKFPeHve;b}OiwFJzXet$m!OxBS`sJlyTQ57#-y2;795&l(RTuBcKCg~4(aH6S z;#zp#9|f~@J%^~<3z?60Z(Co$1HXZj1eG4Od%n;m7YNGZ1?vx)Ds$pV)D%$iwrTo& zM;of8-e-~C!h;~g$cRm50yE3D2t(~9a(yxKBqq&2{|8EiTro>vqjm0eR7+=x$SA{&y;~E-YGHC-n9&C-h$jZGRijVXJ=Z;D7z+Pz3z? zwzIB*|8B07k%n5E0ANWVzu?p7y43^$@FrmkjiLo|cE`lCurM(za`aDXB4k=roboPp zDzT4|PIh!JHTkmZRnE;Mya3m6+AH!Wj#~EBOuN>D3O1@=;i}vSR9yW4K!F5AY85bA zxV0`b&cK|Z!i9Pt40cW-SqNANzmgeKlxYi-_<=hMlm(2+Z$Yane()n0 z8#1#hRVP6wM~jm%So$ESbp@#9{?<G#NSwFNXe;Yxp4YYgUN7F>Hg{>#P#rh$ ziuU!b+#4WLO{RHgU=fdY*gFBW+3jYZ#@N!h2pdHw;LyLPG`=Ywn%L^HLHqJwGP=jS zS~W(OQqS|_Zf^{yJ~l#{FMuYN56Nz*vD&f7QOdD_6QJlVS*5svjQ_nyVI&+%zH$1e zU?Ik_gqPeVli)|zkwW+!$0c>7i zkMagKYZ;#0pa(hfF3LYlyISz=PthL7IlFVKrtPbYjf03pk8$mJ1d4K?YeWa>5(&Xv zBjHzRyv(&I&gl>t5{4Q)Isjm-10GInXNL09>cS!P1>~jU9E7UT__-zvGd%Dgs3ny^ zsPq3sI@~}K1Km4m*@$rfFv64gGRvFvs}ga%o-^{>#oT{Lbf;=kgUMLOpYCr;1mwDR zW7nrkYhq*+z1e)EsnlZ?wev#qBVV$zkiIPc*q z8DD(^1ZLFvFsK>mD|f%maBhGGM0!%jAh)W;X*Hhy^lyQV>LLolubaIr=}TL$;I4IX zsP4f>zdYa(8fV}8yOY}Dv5t2N>fk*K>O#+V>=@2529nIw=*~7;U*5|y1+tYNhWQxC z!0AxU?(5$kniPK6P3Atcean>Zw2k$v4EV|;GqCWJ26~xVGr!C)zPj-y+yTjQ{iDM% zDfv$1{h5UOQyRrv=mBv?u2E2y9Zl_C<^e)5;K$I2iXRYl<;gVi$YQ@sXpfD|ZNdY| z$R5F2cScD~_!F%PUQs6?)@vPHcXcik49mANX{cO8FZVZEW!7k~@FTk)@bJe7J?7YH z`B7_Frr?@>k2jr@yq?CzY3otHaK9+~^R7?$iY#h!{bwd^11v6$CJ=s5OR~N=Zg_O3 ztVhdr#1t$ORp9mh1`m@ovmdJqL6)T=LX<-)6|c+OglZz>nIw-*eh+WA@}7gRLK8Zu z(tV4iHEU$G?0m|qcGW3Bf#Qup>GXDZ$YpF&VD@$hho;lg4e`k3^}8bK4+iiiZ`=Sq zH@jJOG?T_EG2`;EmtJwcvk3?88bnEU;!~7+!MCrLIu(-+dc`KTP~Bcs#WvWtte-!G zZC+b=P#=-t((dN_#nq#f9c0sv9n1#fe71|97Q1*_Z=nSW#XFi$`zOc^T4=it*3ovF zi0)q3UG~eS5>ouur^+_hC^Od}9&hEA>4B2>GW0j87{_Z#Da!wzdDRRwjUvm@A@h6(H34ACpJqv?{{fhDC&-Mt0);rgB=dh`{B9@ zJG&=n^Z{`LnNK!!`89APbdp7!1ncKgu@-@jO-~Bj?>5tT$u=jPPw`HQ zg4zhj=5dC9B+k~y; zMPx_oC2iXc`mUJn8i+$8i{bi>+pS!0o5D^c9#i5RT% zdRoY@Ok{%X#x`vD0!0L&g2BhVW#Px_Ud)dWGRBiI#8tiw{@H~mVJi2Ui)F0Gok-Wd zwBLNR$!mIMP~)vbmL_HTNRo))vJsQw=lwII0piKDFlunxMu4;0&>1z^XTbeo&Q)Q< z(0`p{tQb-n_P&cs`)%kTJ&IE)qQtMB7V)qbjoA|4B6hH19B3(gIZm7$1#c&F~` zfm;;7uA+z<>F|5!Q&t`Z_+&JH_>sMougO`l_s0{`Q(;%MlWLoH`%KFlF;okJ!(ZD=+?aVFW(wDF%lCn5PB!X(L&m zJ2iy;{T*sy1+_4A&(eFRtl0w1(YXe@E&;(pAjV#>12>T)sH%(su`6EJR6m%v|jkcxVBP~%%MpqE$4eHt*C$!+H(Ux71&~xiTo6v zW4Jf8SOjP4f8yf@oN@8bOL*2UyhHic9K=;(SZXSd`m`l!SWoYRHI8Sh-bnw!j;=MC zKME&i8(zm__yfp0NlKE-5w4fMyz>RD%;0_-?4he+Zr{#sZgu?XPaVyD7fxbD&~+KH zD~LT_yKdyV&9$Nh`&S%VqbXz$F#}5>KB!R`L!Z1eriP0of{RLNyd;4k@DzL{Q zWW2|Y!0)l}vuE5IWNGbz9PYu4XkC$ad5sWtI&K?K6FGG$m8NNy^7d(2+RUnz4H+f5jI3Z}c;4>K{V z+h2UU3j92QmhyizqG8x<{NqJVjYX%|kG~zO_1M*yelZ(PHqT$pk#x#zo~_bwsIbyz z=P6tGcd%wUx(9z?wTL2Cg&16^?qg%jKU)h(#VB|c8NO>bG*`iX2ft1f<(I$3`miqa zw*9Yc;Q=ZTR?E_%XtEcTa*q8oS>qR91yfvLK+yzH-ar3_#0#Phosigh88Vuz{8#6Z zB8C_ZuNG95G`)0$=1_lm6U?rbH_b7dG9{z{je4KH3Ld?N5|N z$wr0>?1q+{LDDU9RpyQ!KRAlJFUx&_8r$^-B~J^wcZ$T{ng!nq$*s3^bapRL9?3;* zP#+luI<^v$`JL)d3(Kjb=hRbvm^|AmOon^hyoyTeDqSp%oRAKCpYnTs<+gum!|bE` znbG#iWR<`G)TVF575rtK$w}Fp-s0XAop2MX;6`>F4yHvmmFP+9RWMx85}#_0Ftxs! zyqkVyuOfR^-NW?muBa*}>cgx}O7{-x+4l&RDO4bVG?LbZ%*X4uL)#>}eSAKKZu9Z# zMe4@>yI6wl*WPR)Oy8AE8wTko!cR$@PjNv6%r5=MLJyjRuE+0zQ7)Y2=DT+ZX>ZQfzW$vNh8gAP=aX~(R8|@UpN3pk<|1r zS?sCekDmk-xD#=XI!V}{*zN0i6pmAfi5s8jSXR_rV9AQsFlE$hHi zFsE-rI+QnPSse2L-^O4pXnx!`K_=8h;DLEl+=#AXSqCTzUD4DiY^^}B4EwE$u@Cu{ zQHf7^dzT1yP$!_*@>B2kRFZs%Io^U18_|9VLD+|(Liw4wwu*Yf2b@SU%H!TAWR->5 zT-W!B>T>N;-74`{8WiQwL+6FWD^}`&m!2sx!AY!iBYP#T{?6?D#_;yTB$W%N`-^uP zL0}5E<5}qI?2UfOVxTb^a&ldX1sW8rRH5y2V3G{{sedzYa~3mGGQXU?OB{21dgaKhIGSnSx{qYUhPuuC=&Fg+xcJ3cp@=0K+){o>I3iy6FW znV`1m8ZYuz~I;F+|yTNx233|^9;p@Q({GswC&T;X` zUu4e_GKyn?I2a99?<_*c63r6<<;IVk4vo&buV$f=)RiGx!$BcFFmcHu4Z_hD&TcHgGuQzIGew*6jFBY+PgZokC!MaoXZR8V;I_#eJFcQ51u9LPDDmy!^Y} z(f%KAZafMbk6xwkZ?;E|i+~D;-RAx4E8hAQu-dp41Z-kjZaTDB9@C<^IR)bd{`BOq zTPr>B-UYlG{cPbU%CO|K$(B{S+r!2@AAKv$$K0Armp@tj8sK)b#INg_3bzU?9==>k zuE1(nBtun_Wvo&2HYJEO&k&^r?|z;V{@31t)yKK@ha;4Bj+Is3S^Fju0?4IqcIcoH z%?h#wEQwk?6eE<_c+`)n1aBoIXVfd3WC+n+EU$PH6l@blh^62*Dv<+En0}!@d*JFy zt=uT5g8Bqxzu0@q^5M%9nRG$XbM@wO2-rb1q!d$94Hm|DxKR`qucNQ6kx zgmdF+P~}4e*3RuIzA^tD{K|%+AK0?N(DK{PVAc{Ga)8iiG|!U z>ehcP=hefJ?v8@yz(geRXelTnI;js7T`$0O_$og7oJFHBJ|tewoc({=&PZE-F|tLL{4f4GjKfx3R0k=xDr zdEk$jo0Hq+fBBMNU=Vn-_W-&4nFoOmJnbL0U+@AYpMlwGfx}(D>W)m`zYb?BA?1_E zH?K_OEOMch(M88EKHeed+sMVEoJPc7ap{&;kU+3P`E_mq2o`O6Bqu=<`+bp#!Z?MZ zBqJh=awy)AcQB~5bmXGWL|ARlf+1Hm98D$cWNW0T{UScRWh7ctf296=A&;$vMEq9= z$oG*0i$1D>*WHw$dy)IRE_JE&puY9!J$_xb#1tIMo%)91@7!`?@wjX}bFlsOSgh7| zWZ0FhQ-FrO7#S*{H?5Q^>Cbl(>7=pHQM+6vC%cKR@Nou(trFQ){y9!>e6TAX};P@vL@g6`Si=fjQV4p15ReTdrY_ z%&UoggSv0$pleDe)Yrt3BlkS`pPl&MkWa{b_HtEG_7kfxdvcW$?7xjc35)rcoVfRr z6{SF?;Rm1?knej6nNr*a=%5KL0q8$(dgw%cAHaCkj=7`Dlyp?6#*BYahB=Y}Yhf7efFhg!1iT?d-AZbdkh%;5( zE;uJ?nsM={TONx5&ntBcN$9%w3fpj5JO5JCoXtucfeuqEC%l~f#l;hb+G%|00>$oc z6&^uiONbUH>GuX;`3|C6wBm!yXw7%xJY7X$WQt7T6*0tkdJ?AV7lH^|^?wL|f08@q zRoLkU+us893q2CQu<;7{zuYZ&`*pjsxt;k!Sf&lfbayWlqHvVmOWT`CTyi`3qDv4^ zq(Vf3R`U>P_mH@&A$}un`hqnkp6@^dDzN-DUL*Zr0`jI|jePGOPq@x41cm=MMP?Ir ztEjkDb9#k%xB8-63s`*4fA#Hub?LtZ-{4Cj`es_CO*=wzwdka^D4NzC)TTy-X14(? z5tM&qJNoh77fnxl3?)*z6;7}Qdcd`6^6|m7Qzae?tZ$yPS7mkc9F=;#jbTL3sr17m zE?G5E*&8zgv-|LMLV%L$B+KjlW$rx;7au$3(h$k5EA;V~%#Y{YkXNp)Tj!ipwEWXE z=)ac+H!)*Rdlb`u$TC&BWw( z4hOLx$K)>Row2c*Bg3~E;J_y95>UprXwpDk#VUJG4YI4;E@qu!Rf_Ej`1n7}=aQJOPrq z(uXjLN1925vtjo*3%0Z}D#x%xT#bw>B022JT9^XaBLt>V#mz>KHj{VVOcnHP|y6+fa zm8V-X^{f!%plD0AJZDSL`f`kogodTEg4PVt=C@dX|1OVg#s>J~>!cSF;U+?B z$oAZ~1>r<|6A+p$Ml=JAgyuVeaYt$$X~bnV#~Veg25x)_q@D)o8U>N$c2!lokI>}L zV&^uhuV@i5OMn0FNC5r&Z$N|T$B@df+D-E>O>ez|dou;y@rxE=aX%J3REnv3AmRWK zTf$A2-ebJ`7y5&n-JJP(iq6w!icb3dl-f;*uby;(a(Nd~?sIoxP#3f@nGHEo%7Dh1 zAgW~n$+R?v6Fw$gLfr>}o?Vz1a#}IU zwyHT)@3@Uvtw7j6LJ!^g$_3C2;xpFVilKSLeQwjcN%qIpao4tiPwHfo8C1{**IK-@#`Kj>Hxm3{dHCpzuTvRe#GCiNd_umqbA;q~G z-^{F;&i890Mt(xAQj8Z{%47BmWtq7C6b+Rw=D~_Dg~`dVZD}BvLPdPJBr|l~isF|P zE+dR>E$k6;v>YBt7}MR$ob2{%ve2zJllVOCdb(gvo3y~!s&;9W{_xOV`Kw^@s|^=^ z2k*cgSZt@&V{$KBJE~L1E$%`*(nVkVweIcWv)$O!uzi#Fd5tv!n#UhY58V^S&_7PZ zyUe(}6KtG;Xn_W(g8;1m{wwor9E)_x>x7cz)Uccnp+IxzB4* zZzltX$NK8vjfwM^>_FSq+H@OcWbhj68ArbwZ?8=h5vnuSco|A(`S8}sS`@tVv`SnX zlH0n#`uVAk@Yieau;{#~ruZx(@7vHfmO74|(^o(L`D&2*#@io*y4}=XD2-$)RU|;k zwEp5#=^-N*51B!(>R$obx&>)C#5J38EFk)fJZ-nuqYe2Q?}KV9x_QKFx?Yb!`wt9v z5Mr97Ph7Yxp%hqsT%EQFdD0I|vEH_^dm4}d=3xCsOz9xC_?i^!Rf;SqW;%+x)2Z{p zyfYJ$xKyapCHvowv+%Omggw@1JS&0##|QJA4&KmQuG?7Ndot6%KoBk^6& z-yu}NaD{{$>8)AR%U|4A6_Ybvyly%pMI>ENp7x#^m#dHTKG%RsPR6ghrFGJU`J*R- zZ%3u$$}V@ayefu13;)h68=f8lc;VWIyu)OX5Zr>3v`*bed$tqIG6*DCsNwhN?fd{+;*dikK(FC* z0RvbTN zz;b!O4h>t3z$ClGhr!30UYl}bP#(#O{Lhg|?ctx~q{y8Yaaw zYnfUU9O{XF?sO@9Kgw9;4N-(GQ|33DbtMw+2v2@QrJyN*5Wzk0uq5gG1$&MXQTCr) zI+uFC?{n<-q25iq?GAFeq3R!nn|fnx?BUh$io;`` zpA>H1{K3d^`DF_mh;|;i!F6<9$^>Wn6Tg-hQ9W9m$X-hhFSm-a_=TQyyZ-8xaTKsQ z08&THFbR3XxB8Hs3F!tX;uPPZ<$QokNNdcH+X5eX!j0UP#&#DDKFrr?$-!ekdSWjC zy~LElc*yf@s|{Crg4q=>NK8YJAq1}4zO2I;QU;;#jF^1<=zRgZPRZ^_c=b+3_u%?3 zlbQ_;cD#}-*Sg_3=EfG}5UfaM)H#&XB^RC0%Rb!qG z8pt#r-}QW`&rV-Nw1oF9bS75ZXs<-$MVGW-rqOAsk4s|~^Opyn1ZuurrxAj=4xFWu za)s>0sh{%Rm+SK}dYvY3c6zN_)IY2}>J;FN3S1^z%oz-2u2nZAHnD9lnQgpPZ_hqb zrujZ>PG<#o)r9%3Re#3ESk0zw+&{!f-0Up8GBImr5x)?e>CTlG>QAa;Z-4WDSh)9x zFYxZhgA!N0fk7HZ6{Gwc=yyFiz+e3nCbwLofz#Pgtt3-98+r=XrcW&{$(N-2snsn+TVd4tO z;wKp-d;)?vpAFu6S}~Xe6K6#X%^w3*K~;9gO=Gv5NlQc0z<#xHbEK(%hpK#%Fb!jT zTEtm(B(DzQ0D;~{l-Etwi&m&ADZ;?#QPa;jtrxdFQY$xS1YZXKa^-|op6vB5N%EE; zpPR*ab%xRbp+R#It|tcN^*+Grdv8KSr1T`x5%pf9T@Wb`H-TlBVcko%rA^3GKrYFX zwn4QFy(!Z%j5_Ht5PX{Fdr#bR$(}9ZZO(Ne4@sUevb<|8jCO`$DV==5Ue3-Y6!hU< zo~$1KzlD#l^G&qVUDm%`0#iGM8vEoFbGm~w;pI$DgizLi*(J3OWt&?V$+cC}-cV5#`e)Cr!lfR^2B3M~GkD{Wi^(S2Jt z-Cb~0b3LoC!M)rl1Mhe^N|t_)jFRXs%2i3R^p74>&1^-q{+W23*fTESh1$ZqQZKWM=%AEx_Q%q>D*48zr zGm1L6fN%};FgDoxub-wAHPmg~WjsSeCr2yDFaEPGA>|X8E-f}{+d%o}txLu1&I;A7 zYUqYR4O7s#z{(Ia1s}U6r5!d}8ZH!uINPKwpS&~MdZaL3fQ`&7h>b!Ccv`e(uql$H z0^t0$OBbTEhl%2^3pnZ!J+gD`glBIa!pEyZ!#~kbcch}g=2cbiXTJK%c8QO43cX}Q z-PMzTTwwWN05S5QNDTj0k&j@x+q^np|G*U zeuBs)aiDZ12$XOJ%#w41^32WGKoVr7D2^JAY^5aNks)HDkQBDULo3TXo%5cbrDcU} z%ImNwPB&fxlPbuCuXlPAdM9h%lbxI&t^aQSRdw8T%UpiUAl~{F+AGe`G9KrG94Q*F zy1p=5)eqmZ#vn2nmfK7wy0V%+vzJ!6Q<<^gN6M_9fd-4a9AH}{#1=Pp$8L(@Pf4Ue zO8)1}B^61JbV=qXm*gBCivaw-C=+V~*rE`m0JH3O^;Ao^^f(dbIhEgcQKv#&NQ3G$ z_w-ECUI$=?<4hW}CsUQY#9yYHf7LSO*+gnX-^}s5(kHiNeAy?g5tmz~2CL`8ZAg0=C!Ztm z*LbPmAga0GhkW(yL{gR>k$J8TS-A|^uw7uII-kc1_`NGQak~_m4OGG&1SrFo1Gh`@ z+j5G0`uDex(>w>4=T}14LAy0G=0##V;u-Rj2K0F@guFg@1sV`pye+8?Vt^x!JuIX- zPr{wk?^jJmcr88o)-sXRsat-CTc#tJJ)qQUsWTA7Uj*RxD@N(Z}rp+ zge`CupXs|~y*1P&TBvwmm;U`Y8lHA5$7V;hr_;G3XAt0rtkIH@VT66woi19`KjPDvDb+o8(M>kAd9BnQE$eZf!d;Zxzhv-mzK1ZoP`S-}y5qb$6mXbu$#or* zX>lLRj5@h#yWJswdKt-$BTN@4FPPw(UJJMzec=I>{JmvESXoiVm{li>P*eAMN^?XBf<#WNvtbK7qh{h4V*jrTd+k*)qA z6qcD3|lupB)R3`AT$W7Yczm7Jq)G$z}&8fvfOzo4Un$c-wWzH(Lt`V9zL4b&R^EpKO zD$BA-T3XxL1zgfwSb!&ZI6vt2M>t93xq=^sq(ExdVQJUZBTx7J?Z;ytvW!LMKy9 zAQ>=IXsyoCK$dhmt39OHc@m1?>r^em@5{_pZ=azC1f6gBIZK~oH*J%gMPKc6GD!sv znv}Tk@PFz}R#q72Ib;c2||NgPHbW4BGQ5|a;?iuQUSG_ zE^kf{;E1vkbp2I9597#kgBm2O})lSt?fBkP~viDLj*tR)j$mAf_P472W3dR^{F50?;1FCzEhG{ zn1t%_tBzn z%g*fvD45bbMoA9r($HlO##nY`Q|ieKF@>r7zNLitpayC7$355k#+2PJd#PBsz-U+0gnk{L=4k^MT)<_24MfnBHzwq3qz8y7tUcaaQAFgTD5}$jpG?&vvS;{GDR|XyY z$t@ES9czpJCRWLawVe83i4$i4w{TM_SyTR0n7x-ccmz4>Q*S3R8~a3`+wDT`L}nOn zf2f^}&GoQ_KoBi?R*|0-b`vb`TlkkCPxzB}shE%DKZz9qTuUXn%pXrF8GrH#{Ejd;GX{$jsg?ogBZDzc)$e z1ZmEL`43=!J$HZm#qT@(#_*rHStdVx4?UW;9{^EPBaVhfsMFOSCE}S!1nzWto9f}A;`Qo_k%5% z86m{aP*upO@dYY!+G6hSK4?Yo`jcb9-weH-<*89v3wABoE7#JOh%GG&UI+u%P-UAJ z~ zW0&^}n)v5SRyn(`8w4p{o}G1zP7CCIeosq6lVPriH zHmQ1`+@?!ZLZWj~&QlYnD$7qK`||Zkr#dNYFT3MrV3B{-vXs~cY%i!3ZOlXHIO#6< zk1k3!$b}o@FW=Sof2FKz4v<%n$-by=638C#-Ih@Zo1!4eD1wh%18a{5MrXn-N3Ai$ zda$i@8-wQEMuABJ442*Ylh02qqljh4Jacb<_RZEQBvF!;!ZuF*lHyqhv9XpQtRdgi zd=LIf2mG1yFhvMuN?9&CFs;WxxbM~1&@?2J3RFn_&& zh4+q#Q9#&e7^=?m`k;Ns))9VJH#)VKM>u3z(@Kr4G)jUP0SS8T(}8+UO2sUS{@sme zSzf+_RrgnkBMw*-zv?~AZM5TS%y)#(E5YBhumlT{NF@!IriWG>6>-_0+hiV){G<(T zW%xW!LEeeF^1j<<%j@*ul%^xRKW3) zqHg9?1c-6rrx3!-IDG9ylIBs1Gln77x0-7L4}5+Ef+2Wm9DnPHRz{UwQa~LfFXGMp zm7FUTitzWv*&NW(-!-L<5wd`mojy2Dn9p$2aX1<+Gnj z18R%t!zLjd^%R>6Dzxx>j-c`z4HwJ_39VOL*mtoa{a;Vp-VTrNaH0Dsn52K}3ODJ! z^J0z=#Z#_~>8ZpLvo9Cq3;3}i_n@GB?FkS46d&>uk!;m!#S_GZX*A>w3msf_2Vvi@ zFK@FmH7?&@dNX=Q_{s9kH@%&EDs&29_b_GsbM79up!Y4HmBcO-cKu(KZ&8HZJ8Jq^@^kM?`{?SMJt;;7c3p| z-Oie|h(N{~bVK2_ROSfbMC`~Q=W=Iz2n-X&q-BlU^LS^6YZN(i0C_Y1)G;RFW%BEj z5W2yl`E>IR#ak9AJV#{N6i8-2m1nXKb0(YGwQirEkWhe#&oXp)mB|h7B?n7PIwNe# zaU>*2F_c@*$4vz(o|Z>?s2%9B-=*zhfw(Y@MD&*zf4F(R>+h9>=|!h&V_$rTe}6mv z%db~<+2(n~{cc+~4K+5_qQ+_81u0&^&7%iy7_7bM>}I8)b}Ht8@=$I!;ZQwec4^m9 zFCsaa=7?7!?YX4)vQg>|+a_0TGm}K*Jm);_fku&)Eldd7O0+?vPpn*l(?JwzS{Pv; zHcF+TZtx(kS3RIlE~tRb)fqbZ0t!1+on(?xJynEL5S?|{-dEoIvwxks-~ir}b0SAx z=DUyWwLC8tNgU1q z7<10JzKM{S4z30RJq<-bAMi~TD-fvK33VE%qV6$VGSku@HHmO7k)y_>Tg!+u6 zU;{q7#c~MOf!2;!PJ*=%c=SETc~QkSd|MGbFwyM${cI&AH(O1iF-Xp(-=W(u=m zF$md=D!2ZqCVTVZ|&LS>GV=a7Z}pRn?o=Cjo?FV&}%&yW*ZGnrNEVCtcQQ zrbkx6yqdoJP3XolR4}Fajs+CnFnE~a3HQ?U+K0XDsgj=Szl>er$M1LZmN}VU0sDHR z*S$t;6*0-Hc>8XDqye6+l0sD)j5=Pu@}nZho#n3(D0^F=9Uf!_+tkG8C8mqTAkwr- zQ`rA6r&RsHJHSb`PPFsSRv!RWK0$G9hI`(aJ%gVxrOY^6+u37yVwaTHWf1>4P}h(2eIY z{_IGn7+)Teqj5!qfB_MwA8OzkDTeya!3q~>3W2bZUmI64fn;8dsaaQsgLW^Zsd@m~ zp){_u)*bGf%4tbwk8dHzK+cH=>mKYQV|BlditcvmP@i@yRn^Sl(b~wugG#4tQM0RM zAthDp&<Z|B$hy}0m3 zDm58XC+}s#;}pIJMVWr>?xt~lo|L~Ac$H0XE5xI+DwEGh#n_jjEvD(}Cs$ElV zyan2cwE6BBX^8%QUBZO)+Rv)i|3>>l9v8O>BIt&f~Lhww2Wy&snb}D47#f0 z8G*|>ew}v3{590sep7xkchxmpP}tJ;iUimBPg_E9>=ZRM{mW&;!^XPBLNzC0ZrFN- z?7`eFkVqkKhOd_GF#6P-ms8ye+L1Zn^Jwio{$HfJ3w`z2jNx8luPsaZ=|cp1a#E;h z@D~XmcwPsC1@$rTDSUKHG7VcD>n@V34Wm^M1pj^lWgl6XUT<#ygilCFfPqy23&a~E z%Nlpaw-|ai#R=y<8-S|R*kM_O6M$gpbjFfjVz{zL%3?trGYE58uHqS;DQIwf>qBgJ zQ+o*SkJw$rB9gEEH7}JL+V+&6`-&;}ZM+3B2wqg}7|^;p`a6wCH)2E6bi2L6#}lf# zFaPpp!@@XN<$S2>a~(&@ysPeZb+N%&WSps&H0}BzemApXve4BjRTpifr5^xV97CVh zK&XICBmC^Br&}-H-&6o*G)h2AJ-_lkJ31f3UckaI4!^kIVtAlJ;w6Jh!4lY}T6tAJ zTjk^QLDgVyhmNgu+=k2GKS2W^g8Wb2PyZ-%x>X_DKh@NTUnu=-@Zk!|4~;a2j>I(x zK#(cSdU8xC6J}hK@9geBAvdGumqwtb*f?v7Lz48r%W&4*m($VJPm(7LT(TjAGNx*h zp)mI(#`)^Q(@}#+f(Vt_2Zr^3pwriAMo`3!b)n!oQwDOh>}7I+U=sQWv4#iZ_p=OH zzAb8Q^tD#lvyapH^FMW9)unpeT{I9hJ|ywgeOr)zXu~X@omb*Y1MAVM=3;Mr%Q8-8 zD|(aAnEZ>q{RkP4V-Wsmi%$ zu=Vlm2%+w)07iV*C59f?0ifiai*K}su?FYLCda~JY?<;1$I{myJze7@5P8#%W2UB0 zyr%4|JytY!I^uMx-3l^Sb`&@GsVIT>YL?9?E!b|bc4 ztTR?fbY;;HzPabSuD2U#mUK3X!^s?lO@(~G$Q{M0NBA<6d7)R;RR;61q0RpNh*6h1 zBxUP}`7@35gg-hKeO31M#h%{pOQUG{fmma^A`*!4sWhlJww{2mwqbq+W`5>nuIh$fpnyAkIYQ@EDuIK;M5vDe=Su$PQ$B7}l z@YWxula9?&_lweDbO9iN*0cr zShnG4xo5e#^#)b76IK(pl=~25rJT2nyza+E8oXHP?62$|t0C8X@SJbLn$>fiZ=jvz zXvM-9ypx;(X?!SBKQcPf%JOm@F)B%@%PXc(V?sFS(sKRL+|rFTJ(^<(E@{+I_xJSF z7xe#J!|*&Z;M~UW9Bz!hmCaB$b4+i4Hz;%#Fb@|Q2E?xgMfWU+k)h6G6hd`jqp=~- z(qYtv8ocqK14!dxaKZmJ!qxQ{Lrl?jI;)rPj0}3f9M;LJYm&oYfFDT0CXU0|bU>wM zwT*geM0Jl=oXA*%~-d6-hLt&i?9^QoD#$MMRW7hB3 zVUR(wtX$H`AZwa?We7SQyWz?f|JkqpIjc$N*>IOA{&)JsJ&&#N@nR78k28}m>Qkm? z?gDmC#82H-ZWh*W-KL&0XPTfFs(OIC701d4{%iu)Ch$@4(1J=3)l)3TiUbG-&%lw8 z8I|V1dC___yv!p1f;I~WYe3D&T-e#{YIsOwUXC zdeWsx?!{N=7OX+D`{9!^WP1Z)Y3PvQTOG+=>bc~r6=MZI<3^cee@s%d(O|)7?HFO4 zKC}nP&Cx>J=8>L8bp0aOTO-&UzIhU%|G+!-NA!5pHKO=rc>z{0T=ng9eiI=GpyheG zXd@1&;U=DYv7QXTM>ygJWW#Qd#{p)pTZ~=$TXXTWR!c!*SS(4tQwZP0mpZb(AlIQL z!cOh~s{5*_xPorY10+cB;7%Y&g1ZGLNN{%x&^V2IBf*^ncXtoLH4q5F3GTsNg45WX z{P(U|>psjp%-cNl>aNvQr>b^U?Qh%JwRt|axJl{(Q9ZN6>8KeCG7givF#1=@FxW66 zxV=B`cWew^_NG>2^+WF7wr+>5Al@ai#p&L8UT_+RDV!4CW}@fWnfR70E%;qH*>=%% z3_IDbes#9Jn*g+SELBMpH=5o87X5NZ@rKmz5_A(W0D)Hqms82(`V|;!%_0$*cOQhV zW1sFT_`QBapP@DMbI!jMBv}nrJbP#4ycRLsQy6x_nB%Botdu@HQ~T=%JY?m=`I`p` z9d}SY((9|cev+(d)iHv)oj;#36D=MF&ru&ty1Yy3?CJ{$2Pt4aU85Te*zff3Jd<8) zYY>Q>=I9V@lSW^0F+O_Ry7b9MlWQ zAM0g7)tonJvSg9X7@_Ak=7Pc`cdNJ2`1hFPCNH#Ap7iW#)hI-ev8S{_WUFHJh&806 z*mPD%hy2IS_|QsBAIqwft-O5R`en%RM`m_9_>*(Yu6k-86n4g@7G{u*h^nfao13kW z5<3W{<$9?vW)O}JaoFB(&^k-bZ;~|IGa>wfHycG5QmD{3$L+Y1XV$-2XA#0WrXW9R6jtid{$DFn$A>GfwUE#hRU%q2zd6{QIFF4T?i~`qB z+BVzu(YeTWQ3rBh8{d3h_n2{->EfrXfcSHbk70+Eapr)kZTYgctK@FT9%KVaBo>Xq zD|+~}J2ebF@7f5=hCcISQa=RrV)jT_ZV@T2xKXK zG1gz&O$zkWe?`g9JlTI;U$=Z6Tb&&STC*T)%?O^CUR=d89Cm|-{RDgoH|^hzj{`7r z#+skLOG)!?cY%kJ?={lqss+Z+*H8y`o69xs_*aR*qZ^p3W5T?kqbN#_NF6Z-4I#8qqijq-lS3Q5+$sDw7P2yoOQ&1pVzKS_m-ea`vGvRuPim zgnq%_-KkQvg6o_(Zys7!Cz<#To*&^3-x;$mxc!=8Sne;T8x9QyH7d}EIE{mznb`#5N(N5@e-5d9?_p!MxQ!~ zN`~*^1SqH8cK(HmYWIO*^YN``c883YF&#Mv8_3da(mp2+ONQ=vSUSjNJEGekcU-ar z49W}Lv#G|H3znpg#8Fw__{*@a`pQ{I{pQH%+jpvNx%rMmk}S&-t#zjQ(_)msRBGVU zM~om6(H(ss%&Q*4kAY*8{dWbJV$a{~068WmI05!Dc0|`s>N?!r&ORsEC%v0r%>mPA zhe&HLfi8T#*5H#mGbY3V}9G^~udFUEV=! zi)-a7F3$k^yWzVbi0kR*V_TL-9E%#O!2C?xH9sm0s6;M6L8|J?a|kmq)iy?u{i0ue zWwd8h_fG2SJ`J@ZbpmB@zIpM?4hdOHR&EU(*Vd}Q)dX+rMIpk#s~-O@%-eHCrUD~W z5{5`!kyZMK|4QNiw8NEoWSC!6Eb^@!GrWD?N-at#xrFi+;a z9F`$b0wJ%z!qgm$D+i9_brav^pbDqr)-js)qIcKQahbjbZp)BvqK^ z?>kLV<77R-`wR2?Yir=1SJ>_Bm@#{=xmtqeofb`l;g$CVXNHs5JSZnlzICXI_^jNa z^4hKQ{I>b2KjSukMa>JkMOQt2La5Wa^6k}Otks5%1b@m}_1C(u?M;{Ku;t&ZCl*iN zQtGlw*>oPqJFL_drt-ewdqIQK*dU-vvbpG;M5Hsi$kllU4FZAci^y_%V8gPI+_6rb zx-*?_JcZRp6D@^kvxC%X?X{Qut~qD15U9M2JLNsBS`wQO;VcZ+6|UiOJ^5pzFi{-%BILOtSKJB zE4zm$_tZU=U8?J5MkMZWcPsm0hg34dbY}B!IXP=ybk$tt4K7MH1+OenKYcJ@y4jlJ z;VG!deqk1h&-gTCmXe*W=I--5xmKU<{EX`7okaE0Z}?oqqo%z?Z+*eYSlX`+Tt2reYwC6{pU!?brI;IxZbCj)fo0~ zPye_;>{d8+wCJTicZ9w0C*58Ah+%o;%w7AI-z(=~uu3vZ9&0;viQgKO2QjZstYDcT zn^}vJ4j=dutWTC>_O_Zd-n&eQD(iTQUVJ?C;jQ=heGw_AiDD;dbZ|I-HES_ZleTY_ z>(X+cGe;0rQ)a|}l-iIpo$6n;Tpe|PXBlMkmDOML>=%1|L>Zc<|9P=@BP?rEnD|_2 z>OOllSjqm+C};E43vg(VkP37M}&)JwC=;~?+%Xj*tnW1 zJ;#o^xN&o2fqGy0kE^4{M{wUBkjH%Cs07_2)SvDpcL&`3tD8?E=_Z(HhR=BG&irPJ z8lXD}bKG2Y-6wY51hX+bajWiLmlev#P1rF^pBH|nHTZAUeww4*Y2iyAw!cg$ihK#r zdu5UK{#OM8h7s9ccITdx18aQL9nRSeI->26{MK+1(J=+)W>PVRARPx#A!H5s4sIzK zmMFifEIETzmCJw>guRV$8M=*tia->EiSk3-2n48$JsQr0C_?ndZB|Q)xwIcoGTciM zvO-30h`PNYB1dKs;k(*^ZJL+#RU%Wlmcf&Rl$2FQKn5=|xbUa@3RAKio*Y-g{E8SE zpiwDRON69T{WJAjNm<+NjpP;3(<4Kd94mWD@t!}$o7JULQ!yAjbpjiRU0@v+GNlB-O4zfh*=YGh~m?%{g3I#{hw@+!sPg^BnFWVT;p`F#K~ z61bpcr|Q57qE41>>IDJ9xtPu(*l1)dUjwK&#OL5=SbsdWNNyx!NLsz+4)9NK_BABo zg5#P_v)=92nHsEMnFuI}q^0Z%o6||)bGg4Xr3Q9zZPWYSybo zSJYnubpR^X7}xIGgI6-canxhmj_&?#9KP@@qhP6@j^CAu zW~(0;hFll5TnES$WUwpT{`{1@3JikPhR)6FPRyS_VBxSL7a)sKGRaUnEaGvVtxL$4 zXHomKc{L8(oy@mTb@L9f;A<8CGyo?gBiY48wmFgl`L{N6>Ey=%E{w*TReaSrSo z|B)8p**yI?M;aN!(U?G)9h0-Ho9vgT9=XE7u=d9S$X86a>ihJ?L0@7k^4oB8!)WhJ z6lvxb<=&ff5%>XMyY)-MoZdgUyc+YGErF<+(Uy(gfDh5fj#hwa&tejftypsjE)Six z%s!EvTJ;qKJrg$A!-#}VEKO68-ym)pT3J5 zakgf>lZ+M4l%faqW%j$)Wbc|97A~>dy_(6+xf`)oEDV^5$%E?z*WVAb7wY<*9T_V^W*HGa8 zTt?&E)hW&k*8o0{vgUE}#C~6g4su_~kp!XYO0Tjzk32lSc8WBCc5!||3a`T2OkFgY zFr-YY#Fp-dJ+h`G%jo!DYH-<}&PE>m$=rL|Ij6(DbxKp}!?qgD?@a)uO)8JxNz6kX_*@7E*KLA9soGz-4(DEYbdl;8hmJZstaCbOHJ~}CxWJG zFSYTZ-W|&``yvciJoKn(5kY@3E3}g2W^B{hB}5frVY=eSo~1uA9jhWDZ#OOwX(`rK zDMXIIl>6q!T3dMyvnaA|S7zcPApM25klWq&SxbxbNqU0PY>{{GMk|+BbVXJsmOH*9 z{uxY*cz%#~apL{1o+8CRY5k!^7{3>W74COA8E7+}J(%xdA**fK4~s0A@xwfGPdYCu zpU_m614KEw>i*C&p2)+fN)=ZOWP&LjUX1zFPJg~YhHLH1vJlgfqzZA>Q4o{A3(^d==0$zH%j2eN^V|xU{~oS zel|KIQJHJ0)%o{s3n|gKOV1B0aK_6P_yGaJjRv=+f1@NcP z!53LxC9tx6Eeyq%r@$TE5-QeOc_ihcoYo?Dra2aM8W%jJ0T+v1SAf9I>gdafPn+{c zF`b@hl*Uly`KyYpd)&XBX1qXY9&t1CL`J{nyF`p0JxHd?&{vvJ=|92JD#Rk7dBeo7yh3Byx*@U~u?O^H69BlOv?ye5ql*R?N6ccR0- zd*M&+%3VD>>VD$k{n}Y(;1U@%syWB=9&uKn+KlhD4IMB?zm6$Y|E{>U)@ZiS8xSGn?jds&u=zg++D92JNfh*T0lqcL;aYdPxtj^L}6$} z^5^3gv5qToom>dMb~ESSwrqN~<2VEN4NSeB??k^L|5{xr&ZqWP~u1L50?pM<<^_)nX8p+PPm04IylPzN4ys#z8bD)zELwTNOJi z+C%o}>#X;zpLdXyrwZg<8c7rDML5rhzlpRfJIE)P7>VdFxig+)?VOdDd>CDI2)g4* zt8-Mp`6ldRr+sH=g&$hJ@Mfzir#AkC7Gi(^Wb|_C&8lhG0t+1^|F(N4ZuDnrUjzCg z1s$BGyrepom@>cn`aGT*dL%K2xn@Hq8sqzV@!8$OWxqp_pRzT)Z!Tpk9*)Z2@Xp6K zY1*t%CjZjo;9Dyh?fq(xQ_j#dI2;I|MMv$fD>033A|e1v=&VP>WtoB$E7(zMEOyoG8_NQ#c2L`Jji!Qw%h z8pnubkVn-p@v}cazNrw3@DtJOD?VioNGE=SufhUB&7!iqFfESy3K% zJ_X^Y#t;SSo~G5X;vAwQp=osBZP5>5qUlckDn(9&=I^F^#0|8Y@J2m;l%t?!)<^^Q z9J3#*7GM3+phxpNmWwJ;^KycqWpvpiu|zv$$U?is+_Bq-Sr4(tQ8XouE*;B+BS2ud zJs|uWSB!d^z$15WIp0!EM{}e0ER{rO>+T2N)Vb5M7Draerv+^Ga=0}=PMG=Z8}j7Q zpA)-7Dv}(a6d#3j_3G5yfS9udv1$?8oAc<7a^=sHO!Rac5=C-P5y)(Gb?D=vv1kO5 zPO|~Ffo55*4+sj{mIG9(Tl>;2>2+vgE_i8JCe23DNt0pcR4gJL3g5|Gqj)D-xoC2k z5sDqKp3srHbJJ%x@-Px-WP@o6Osk`qFuQFc!23~ko|!~!_e)3#48Jo-(yx5Vf+Ujl z=qUsbrL8^t6XYx~hREW7tILfM>9m@R`UViO5)LUz51us9L!Qj~U%g8J2YZDH-v8gG!7@4xRJ&1Sa~p(o97>}Si~tfkAKxY-`d9*u2J9&N)dPI3%} zj4?hlp|In~51QCUYroKM>J{1U*SVD<1~&D5v@qRR{lZAhrZjUAo+a92WXy1lxyxYy zt{2D7*iN0vW~Q^$5w&WNCD~v@vL49&oBkB4vYWK8OY?1JU5eGmAG#qp3!DX#femd< z1|MPp0C7mJQ}WKtm!S=jA7LwbP^4=PMO;hC83B+z^=fV9LmE6v{0l5(f4zKLm8=c)To&2dOE@Bjvg^-7-#ZUj12s*seN#!(R=R zkJ0LCgovojpc9!HZSAd`q0^o*UBe3j)4i>0=eRFX6$>|h&Vh9(VpeH%MYC7&b$(3G zaSgURE8U+5){z8P1C9fN)UTvRu`BX)_=||8T%5#h$COP0te0y(5OZHVzqU<@)Q9Fv z#D9{eXsa#&s|-R3?hkN2P@480P+D!REMkUMlMbu(AVe)xJ4tHY*XCaVS1 z_UZ~YZ#TR^R0=%h3UI}iz}}o^?F9C%g$O+jL2Wg1>I%+<3m1B`;Ip*A_xV6+)3l|) zL)(H;TdSN?xV?7%+zv_4n)6xmqStlii=D4J5W5MCCdnCuzeZofDmXjwS38kDcdlov zK!i6U#{b5<0bak1`2LJ1b!k1J5aFT0vsQ@A`FL#T?S{) z7jx{ijr!v4TJNbQhdhgF-HgK?Ei&ox29ruaW5ys5{L(Qv5M3DG$Z;-t=6UPA$CaI^ zO&FJ6#gm9mRv0)i+g4;9rL(Q9@C4=80d*+Ph`0ppFmfga(R~bstT*m%@3N~iucYRR z_S-MN#}DP)(HvH5OKwGmW33ow4;c<=>G<;azv`(NZYCCMDrk(dV|`IG`T4n#HqkX9 z5$=mrRaBF4CwMIkR4iK%SFN4ya7K!u@7pqz3^8JJzao=ubz-E! z2@y9^rv*m1|4dmV;2DnaXHan`K`;(H#b$>D)m{zN3Nd`UJgyS@*wXE7w1=}}OU1Dv z7*YEHqVRm&8uC$`5+6xFgN~}$ABr%bPhQWt2m+|oq0$wF?|E^#OydE)MKyhodPs7$ zC*S&B6T-ZQ%xy4P#JHlUlVos-QH+tm7>cbge@b4xhtKqQdfy^Rb96ugpCq?V&5zlzXQBw-jP8c3*&;E8xPqTks|193??rfxFFsdqGyw7uSCQI_TR=U7(PzLMSEWi zK|!6s>s1F|>|{lk|0)Qwx7XB1L+K7#r7pITZ`XmS; z38?3(9DH;^TwqO7q(3ZR=p)nn{_j6PHN?J;R_`Q5xtqp~em5yiE+nLt&dolJAFUg_ z^O1vH-m7lHF95G$%bjQ0Pms8)aU8)uO+U?EzmUrAHl$#Hd>GV6HCWnUfVUq3m+fKL zRoK<;k^yM6ZZ|s%X*lW1ZH*Mxp=xk4k_5 zwxlt3FNoS2j~~~$UTdH-2_rJQ6}(dVKYRsxASl@54GaNkUeIv}MfFl(TPs9}|(Lf|ALFhyGi2XIosegHRK* zGdLOMu`)1wn7wc1`*3*RsD{yL`~^_{7CdL@V8Gslkrmb%1m+$K=+lZiF@{}j-aU~& z(qA0YNa_AV)@<*fpHPPYWFGvVI0T220HL-w$%fx3!V3o6*v9Oh6r=#wN=s118_{yRU{55ouF)_QBcQlV(Kl?>ye3eb z)gxMVw8}L6cf#b)rZXsGk#!JG$vV7#%F`4=Rf4Ccg_=RohxYnjrf}tn_O9*)TVW z3fkw;aTW~bS0?7mHt^Pb48Y>w9vwF){ZVv5M+g*fMtzw?ZT=i6MZ@gPaF%BI5yoJ) z;Nx+?N8JWs3{s{gX5_p-M=0Do3Q6uiv2T7dRZ4nAv$bH~H&7{QSy__hp8HmlqKzbOfoaR?8Chld-Tv-j{4fR1F1Xg-kYuKO`Hns%BRbioll6S>xrw(Qp&}m{ zPH7iO^)IWg=^f)I|05woQk&eB4{2Y%$ATLLX{*&KjC2emt1e?mjE;Ds)$#renoMby z{%PQ5YfLH*6Il&>tZj(z&r^Z(iJuc9%@LR?n5HC!_fi5M3VE0?l1DHh9858TeXZ`>h7!oTeG1F^E zkDLH{jtv-)@NYgK$3QVzLF}%|)GADTPAnWz%Rm=IF;AOtCU%j%pi`r1ai-HU!S$@0 zlm}C^boC+ku>)Bw{A~(ouK9(~BT0Q0=oQ7fY)yS=CY^K79I39PMZ+crUb;1{N)+pS z7KJ`MTfX&3kL&R46cVJK8NNa$u+|X=I^TzNsBtL|-EAjaKu1pdQvj@XCjf@(*J$W> zR-HlBSp`}^Do7)=0Uh-c85%0oF{@+SXNlH(W0aq7xPeQp;a1m@vQTN_0FA6_gsmc9 zhQy9Hqcf#v)te&L-|V!=KXWUNEdDzq^^2!S8@#!baTLw%H8a^_t|d0qY1^XU90f@N zc7@&ON{@dqHYj{cD0)ZzZqi25tztZNeOb%LzJaBE!L;6Mr9snF^A6j#i;im3XiU39 zK38*ey)h@`C&APQ!j>i7-|oHfsv%LT{t}vLkmj4L)FO7mVT}Ci>5D={RQ1cm;+eWO zK*P%IHQ1wHiOuw^WC}bQ)F>bJ^jQr%cq$f8Bn4hclZ}(Pnd?2$7b2BDXXjOE)3kV} z_o%qos6~HiF%u7O*Gz4|ZHP-96oC>9iPR+1Z#D7fHU83Qz;sYMlU_JH<)`uOi$<-& z9-;iqPaws_;~|m2TXxCA{>{Ea!6-{1pwX~|wrFMBGEUZ45Cjm-0vPdBM0UrOX#v(; zSTp$XjGbp~*L*Of1WNSWTgs168ZpMgt@}YCL2AJ=_C>}8eGC((eK@0g)>ci zgNl%MWz~+NX*ut${~d6C__WYZH)$WNfRbqj8KRjTxF8J|Co1t9uE&B7$*(f6A=ZKz z_$do1mD+{|vs~i|0ftp2r%V|=4nsHmXA|iuW;0Vc(Ith#+ANc%t#DR;q(}Bpq5X&X z5kV1@(H_SdR3=w@y=LUHl&&aiQNN%M+P!$BS)DJUjSW8;@pXQ(uMv$z9b!u+i1lOh znbRCGIsS9&iJ=h92kad!2|a=lQcJoI!_e(1llBoAAcZLFZzlykS5J8~q#X)&X{5megU_7F!SDfvP=|oh!MLe54-yo-{U|m=n66d>Mnj>xfh4kx>cQ|Sa`xTG@RS*-xM50cu zp9CASa@YI~z9?lm2Pa&m&p$tIWxolmnL!TUM;Aho^7#E#*QseT*2}+G7=?JZx=-_Z z9V^@aXv-+YpJ#b?Z%B&g^x6X4Xz*eSn3~VhX{33a^EYhh&>a7j1z$i>Z7ulZDad)B7H#^28e}oH2Gra=$8}%BStb9Tg+@@;p zU2>a6FZc@En$jsJ-75x{J`VWzHeyC|5(i*zs38}}*|K`rh)&8=jn>tO>gaquxr4QG zk7%DmDih=zrr7}N{rw!{r!3HqKNi|q(Us1c6rZe9Lv#ucMsQx%_GTb8_LGO3hP#i8I;$WE*_3N#+G5- z{AlKx@gQ&J{$NaL!%t?C3UFRM^*2vFDbM^e_$B7#N65mf@Dj*<%X5TDkM!KKpQ>Lo zG(1$iY#cq+D|W4o92oa4%^+Tp1Ga$9PM)pCt{>)Mk7l2X=PRNI`fO%cger^=hHPnZOT)q7rT9;U9xuY9ldusXT=c}f`; zF!w@4#@eHlk%}x;l)tsDy)<}S3iGVV#TXTH{#A#HekBYj5J#hTE(8UA-$cHY5(4Jh zvAgy~pMFX$VE}P8KiU*;-w6mmcKsgbt*IR2cO~d5wZCB zIMaiC2!hY^}OHp4m9(Khgs9;Xvo0|$&Tomj|L zx$+R(Eh7h0&|=ue?{BQl8uDR(q4l*RgD&9o{P=9@VQC*{P!E%wzygR}^h*|7=ic^Y`!rmP?j2v$SZY)MN<_{{j|XJSbXZ@{S^QI4CgH05!Jl>* zzsXhSWY{#%bF}_5_F^*NQy+n>^w1<%4qZ)~`F0oOt4|u=JQm@BbYq%iL)Fj6vBazb zPC|ij({0`oT%-C|7#^8*YmYAi$bbR=&SIH^(|Go%5>hbkTgolCcBJh5(NybZ#R%#KYF@}y1MwREnhRe@Yv2fy`E8baa8noMGhEqU$32C zwUm7pus`{rno5yOhLT&N+GW3v>jXBr^CiN{W)^;M%pmnhrI`8swo5*1GH>_QArdo;bPZ_52PzqjT>Wb=U+#xoOmW3i{w$rUL znp{KpzPgp%l){G2kQxG?oW7X&O|j;BR`?DKdt^&{j)bx=C7lp=zuXRWAXZ$9g7&w& zh~JlW@}9LI{JFVKw(_}M;XP2>{_P~swxr1)I8@mY*@~s%aCctYXQYB>NMR8)jxFpZkWPE3@|-_uNgy6SjNflU6CKw(Y0A{$a%dDSFI7`a(K!b5cVQ%?(l% z6Q$hRbhTm|1HF|`MLy&K?bK^lP6CKU`r13|h(&TlN~{5w;%CK0KA!R1ePx_3pLRJE z71hOru?%@xEzw|0*S4;%iW?K^Ui>H-FlfB~sF(y;VkHS6U9jXUr^~Ix5~D zm}2?sdIqt2k|*6u__VE|XmIp!D{r0RD>*~hrx8P2S}VVzn0@g{r-tY-FPxRQt5*eGVffnnsEY-#h z_K)@8lue{{$yJ(4Hloj0Gr1uEkBfogh-kWr0#Ev}OIp+R%w!Aa+^EwnU)l2GXY0Xk zMMuc}1ct{^X^H1iqXtbn_Eu?kRBFa-ywB!HV3zk_NL_-%gT?GgmWTLqlq*N#`BKOP zD^9A|5M%_oHn@cKC7XVrm}7tDP~aj}#6J%-3&9Qon45r`pDa|@Gf<2g2lW|Z;2-}O z(Zn^ZDUy?gmsq*vN!&6SfkglQ`1k)K(QPmIiuHfj6_?+Pb45t_?}lRIA8|w5{&xWp z=l`Kl`?!K;^{dpfCk`nd-K2vbz4Mkj-xTN`J|O#yXW>Y=dqITKjqpAUEE2SeD26i< z3|5VlmCm}U2)}^wv3-Z*=N-62jeDG%>~@{!?sD=4M2}wz{K?$SNb7S!#!i&aB<^+x ziuSny1G)S$4T7<0`#fYdCOg^ocgytCN$ub}038q2v6 zn5$@pIURmHy|B6kj{_>REF!oAW!4z>xCh4MgYj;w*Z!4n!cKhoqDI!Cz+_Xiz$FHC zz9dK^L)V-bq*_ujL9UwHj>);-ukvqINP*`!Mv32{0B)Z+hvN*pODjBW{1O^mzW5G8 z>2Ra4+m=S1PtNG@tV{)i>^BUYV{$30tN;+|vQZr)v1%EyAVY;1Uva{xuo+zKPa9r} zA4#UrdR*^P46PHmsavF{EY)L6I3|a4@=H!FpVsFwBd}Wc zC%BVSNFj0c{ja!r2a{&P`)<4#A4EB0!&%phv&vsx zz~IcG`PTv}7`^iYu*hFjmpwbI5@$tH~Bx|G4yp=w&(>vrh9hRJv9N(eD-s|fm(X6!re<@aK{cNZKE&t@R*WRor9muXyB zgKt;%hV%LU4csq%*L~#f_GIt=_J`~>pLC-5xheZX7#Su--oa|-r3&N^0xFCT&sxP* zFe6;YC%2Wc%zp>$UO%`qBSwtjdg^jnVs<31^~>iMPr`D*Xh&yA#c*03W#p4lri1&L z|3|bf>|n;AdMV3qB@!G9NFy|;hEK?)a6bJRLb!;~naOcbd50%a?#cje>m#O$p~Y}C zdEQAq1JO1a<P*$EXrkR3H`|N@Ky}{$d|{ORK@V5}3ShZbgWT!sdID%&g9qGB{zYh^?Wr%tOQGLOn${|3p`2 zJ?B8+FZ*GMS<9-~dE^HQHMdk!F(dHe#S0OvOFqx!^gY$xR^0=Bl>M5qhR(j||1j3a zE}1Ocs10d{YkZI+3qRSy!q}Id|2I4SL>R!YDg9qeT#VWZ0kzlg|EGc;yA*eH`1aGv z9)HY5G48}in!#83nX5v=;Pl47{1=#fvkOH?eehw(e}kh+9zYIQ17P^szey_ofcr4~ z2YUR8e*-@dQhSFF{7-&>L=u;mB>o3jj-fc8A>rGc^^(_KZv!;AHWI@_5eCe*+b1zx zhyiZMX?d5&vlm@;o}Yg4zB=Y->=#IXd2GGwl?pdT;o3ot7pQ6;Ih(@I+BGL-K?m{A z$~{&0-8ycZ=a0!?lx|-Dqfd-k^QlI-KE@fL#{@Xfz=6EMM{*JjkPIE0M7%d*OF^YGI-c9dshX=o9uHwW zumjp`vA9zw5Q9HWTJi+ALg)cQpb~{$KkHIheH*D+7Sja_zd#rz@~@!5gKzU&+wI9t z(r!fHZ@B?!K%X}PO?q``nk6ci;sdPK*9f0+O^RDzagr3w-Y3Qpu8jn6Vb2!%^K&J@ zb#vKED)x+_vIpstF+w`o>G^nBP`_-rVS9NT36O4~DGri4{l|&0GtPfTm3)@#H{+hU zc5b@AGkFP48(Y)4uqX21f`)d9DH@*~GV9#_9 z31ob-mJ9Qoc^#{8N7@b10%xnKoQCpmxXin*pR|yFoB9v8={8Sj9Cz2j6a<3{d%JTc z5aJp0IHTve5g;h~mml!j$Nl0Kcg)vIidK)%>Nw&%*073g%bYohdTzI21U z4ecNRKvgiXZ)f?0&CMJ^L7%K0{8UhuGJx6teD;y%O!jC&wVgt<;4r?%~kh{GbVT#})h!^PY`F+q}aL zvr#FhViWJkaaD&kDTiGGSh%XJ_(*&Nmp|ET_6F(lqjHHGDpXIpvMPK=Uw6rC1!rbNY9-x8cuFot-R29C-;6&LK0TvpJx&@&*Qfr}1}{2#n>F7xewdn^2mk<`>w6tnb5mCfK{ICy@C)E%=j32v7hvIF*Wln4 zWakj%;b&s!6l7-?_m8Oi9}VoG=GG9e|8oO&8V*db0oA`A99;xut<@}CB%p2%=GG3D z5>QWoi>;Q3d=tm0{E>?Ex_bTer|9-s4aBSNE P>j84_m87aAOg{ZDC(#=c literal 0 HcmV?d00001 diff --git a/doc/doxygen/chapters/images/tasks_size_overhead_py_futur.png b/doc/doxygen/chapters/images/tasks_size_overhead_py_futur.png new file mode 100644 index 0000000000000000000000000000000000000000..7d2c282cdcc32c7db0c47c31a4cc161482ae697d GIT binary patch literal 16345 zcmb`uc|4TS`!IZFjEq!D@ zt_GGKzho>Dpix*fnXxM^8UP9n$Y?S|_B}&i9}cB-H^Om?7A;!0Zr#3p`_7&{iy+9O zN00LI@>nd^*RNl>TrQkq)WjriI4-yC)qw@fzNbxl_Ifj&GeYXxvNa2o!P=tk=r zMmzp{k6yUY7oq|{U1a?rkfSIA_1|Rk?#mFnQ5sFQ15DE{_CL5X`sa2T!>cGlWpc7J zjYg7XfZ=Py@u0aYtt-xsV((={n+Is~=FuqdFOBBPqPfawa{MW>H2Bw5mgXyCv1D*P zGFBZUWmGnrR#^8z_MwjT=BkX6CL@)XmxB~Phy4COsbKW?HgNGBSDWDaVCXhnv$F55 zBYptP`7Hm3NmxBc3jh+>z1{L~P%^JeVdsgrn}38Y36>p;x~xJVQRsK;1KH~X zj2#Z~ISXxEGa!`z?N7B;fd~g`?Cb|^LE5Yqrz!ixG;r%s3{jZ^GO{XFaU@#PL+l$Y zz${kmzDT`x%W|_-!n%VI12>YN)-22lfBN{rhk28;@jL2wEzH)u7@_a>9G52lw90C7 zTfH6k_QaT7!H+~RVe_93DG^5{n^;Plm!5g)QoS%c`K~(c-@sWdD*u3E?c@XFpI*^% zdwFr^9#A9iV{+K&_bmJRUCNaIj=(AmKJ7Tf@uX-DOLQ^JB?S+L$6YY$G;naf0l<(p zJjK3QL{BnH-t4$WmOQLPbHy@PRvr5^)soTQx3a-Cl_j)Kk$XyWvNwKunR|Jm1c$uMF8MSA5~`>Buqmj}1()d#J z@rRe+EU+}Kr$@U*c@^GlF!YXoVZ#>SuBgIl@uE#H2a|r>fDONipz4`Dtu}h;tHN`y z{uic{tuq)HcZwpORY1YKq+*V8SY|-0!ixqLM=z!&MW+wD=f{HX2;mvy4n<4>yLtIo z!Hq_jomfz6I)Bc8ko_On&(xrGuunORd%sq~lTz$3MJ4Lj%BLRfm7v}YhJI7L8R7WTOHNC?gFB@iLN&Z-u{TIg17t&^2 zUjka6<4AN2*4NM%oWItWWImtIx4YK17fL*Bp zcluvLh0Ba}so1^lSiAv$+h5UM!Ve4RZFHy#>yuSpbs>vs z9o8V@@`izoMMAw#GgMfjg7V2M-;H$0B$O{#Cutv6vpl6)S=g(`#!apQkISt*#j)Q? zX)eRxunKSZ!>YkIpAtP4~r^KAHJGl-S+kkHJtpc=Rea<)F3KTO2hstzSN%D%jdP!F zQuNxk{q?W3Z&oI4#QYtc54A+c10?l`i}#RY8z6n6)nPb#9VZhR2j*_7Rq7;xEa{vyiZ7lPJ_^b~8wmrwW`nKRti zsr%?zDMVGipK5sXG!tv=qF9tAov6C{O0-}rjnu*S=|UGFFPZwWN4i&uvnQV%rcsVe z+YsM-*N}ZqMQv7o*ZH^=gxzba3tefO!697)o?XUyEQ;lj8~N%T-KJ%`3d${9D-Sb&RDI3@OK z8!oLO8x?N@SbTMf)VVXeu(mIm1Tfc6Z5F?Ek(d}Px?quv$Gl7xn^RTe?2RdcNM*jl%&yDC#L)qbYzpzboj3+V-_u?Dm&=Yxa+*C%{F{SRQ_5t-U zs+?U|Z@tA{_NXXj34RIfMPL|vMz@BCQuP^6WFK468!6SpGaaL6`a&i1c~72nQaDR6 z2qIN8G1Ugfq?Sf>#RlG3{s(q2&}RIA!u zDbmr+D@Piv0QRpNIev$TQ9KdGIpNW;%$*LFWkm`$weTCh9Exe(*l3jmaJK^e1WW&3 z`rF%=tx}qc6 zeYei!j-2mTF=@NxTec86#LwF3n`)x%Az2ooPMs!-sQo_`v`MwZ17JO?p@lJ3Ovtuk9T` zCH3PCvZ7bl=dhj(f)piJ=le9u6%8J}{0-o$rWd1q}kaoMLg4VzkB!Irbs(!_aZ1xEqbMWef9} zYxdtz@U_?A`9L=Iy}M7mI!$s2if{Do=v05|a!7OdVl$8@5VDVSCb<7&SjX>!g1}=k z?{TBgtYIC1Y0GBt=xb#YxJ&lvImMk-+4ko`Ch~nF3ytn%7CW{AhQ0rq2=SP_GFZG9mvsAgtgxfCW$8<9fsZp-sV7$U}T ziDcA`JYk*+88P6vi$roxu=JzY0(y714H?Xa(9!hwgKIE&*6)jA@pDP^MbHIftTeAu087Dgx@DW;EmMv z-FzIX|Hv=QR(o>MRxJ&nb)tjww?9*NMz`Q_QM%GygtBrYZ=|<9c3zWh#!*c9tKub_ zK#stXd132z_1)wIPjLQj0EN)nAT67o_2qf$Ewb{-kgXKH>gdQ7aqy(_!CPc3fpxAK zeayFIURXm>w;|6*bS8qjqQ-AD{w8)O{%Ufp5(Q>S_Zdm&Wq!mgm^z2t-%Ey`&)@Hk zY^s=V)G{Gwu^*Fy)_|)17joP9a*#K2LX^4OrKw-8AN|C^50Q?(Nb;{kb%_CEt#C`4 z>UebS7h)B;J7idS7kU0Vu&EWZY(vPK+h) z2M&6UR%uw=;v6=(x@jc+KH0F4`8q;luP!DCCAlczlCzVTAA%a1+f^Nxs_fbe?jbZQ z-WOsbf8>(FmGxJLM;6ea7llr|m-?H(Ps3Bv48}9)mEh`kbf%$f9tIyTcg#Ac zEs~_ZW+mP5JE_)&8e%r+eQTBc^Ssm2O6%0TL|MI{@1pMksOndF7F_L3r%r|ZP!4)3 zolpHY(GKq-{!>1tT#W=l%X~E zB}+jV7MeiLw+#A=hME60nd^BZv-FMzIq{$zYBi@&y0v_ww*99GbPO)9D_|zYykc{b zYvk%x#)}5nobdM89q1AU^GGETgFl@EX)rY~zk9c+@9HHF5(qO`*dYgr&Oh=Gn`RGj z&O{=pHIUQgbbr3oUOMC>Oh4-RZw9MzvCAFdkO7+0k=k;yR*s?wirv_qEnajB<9uQ9*FG<=oBm=O zQQ#fe@r?(v>6jhr4X2tjP~wHG;Rt(ut@l?Ym_iD(+R!_U5eD;oRe~9o5Z7S;zvxFP zmydUg0l4ezd}h*`l`;W=Msp7l313J`Zvfn~ocUai0**v_STR&J_2q!b(H&w*b4@%; z{u1Gv-ea=hx@$cNjb zsnFRb)nrREB*DKgAD$iTBwi+a^!4Ht%oNkZ9 z_EPOXs`~{qS*&t6Hm~=k;tGICb>YzkO-c{otglzf)=X!uT6JRE0Sxx1bao{>u$IjCxe0JMr$3wn;^2JE9-q1vGs=Sx0R0{fI(?AX zV`hAx-Z7JhZ)48ttnc3K%*+Sxozc6>WaWQrDWR!L&npx*+V*B+p0O9@(x+gc5ON z^3pKdNMezMJ1yv43kQ7vjuv)|9_!g5rI5bWbW0>?)#*n9(7HPZ^R%rkC+E!opq zb^zUM39$WHn|LF)>>&zIAn4!uK9aWvvIj2&!XTsH94MT-_R1=WxA;Z?KH9)X4ad;U zKUG9-;Q)8Vrn|`H6z5%c6>r!6twg|GK_h;!;SnQ_P_NT=;FJH)j^k!0Ae z%B7x$KQ^1hZ(c|I~F9^Dassu&xJ^oB&+3az`^cfDv?1bLNg3lz^$ z13A2M2+w^3&qp6F`Hdik?2a^30QSTYIWOVi@TO(d_f>0=zS&iCmT$_?@LSDV|)8m zRX=@%GWytoR-AgLgG0AR51^OfmZiW*av+o7Xqmj`l_2`wM%X~`pn(fwi@$qmSU!{6(?W>IR$%J9RZ zfU$8XD!h@&))!N3=@<$9xD;TlBZN}vr61HSK5#r9c8mEHL++R!85bQ3<-LarBl;Sg zZ9#)nIv+`$RH0?^jK<`4&eX1<4=wz~sAi6Mz{x70?P^%_Q+RPwoy4Vn#8bN8wj`u# zjs-qIK2!JNNTY{a(Z}?Vw2IWR%xhE8(CtoH=!UQxHbtjqHUz-xEFtK3cT6mF zT56b(VBN~O(yrf}iAQfCZPg)-g#Ek`ecAq6a_pyTaO;cq`5*-nv3{9k9cpTaW ze&iRDb@|NC)Pt}~NtwWLjSF^t0`fC5Vl`}dS0)H_l9>EJlBy1%8Ib&j*_tqzaS>&h zqYOxpL~V!;j5`NJoBYcY-bz%AHy(#l?f5-J?DduTt06m<)zVwdoTo-(DV~x$c5Jwf zHqZ*T9V=i)bCFMa2^|5f&(v&4{~s@;&M7E6r-W}SQ%jTBXkzeA9X^O=COYSZwHl%f z_iXi~r8vjC&`fZXnGbhA#$TL6m7e+OxfL?ga%c?=X7T8&Wcj%(WzV78cnF3_U&yCK zj;g9N*PC@(1FRb~#>q&BLr1=~R>^CY*L|M38wEjsHc{F?YPBroNH>xnZOS? z(i<;^v}?8Og~Y}J6FH%!)qU^~G;V@(Y|dI)evE@x>LJzg`gs9$J@Z*u`ImrmZ0aF& z4N;dQ)6i*sSbp4YraFz*-iV&(`!J>R?a(6;$JVae1+Xjrt;ajvbCOSu3ftpK2K*pTK141wx$m_+;do>@2)p}M zDqWm_eBn|%e?Pi!^k?O3=zgUXOQkbCk^3)8r)QlzsrpGBgTEBeL6tf*y3r{)v~ z|8Sm#Bc=VRXowUsu3i_eb&lIRUGRALsQ9h*i7NZU_6aCrjU_z$4#`qY)`_Ox+e6;q za%RI@OP=pM`<9DT9BElr43B=h!YC_)nsY>QcRiMnkpp9is0t(H7pW_F-~92E_#6-N zC2s0dsnmT_({ahYlVgl~68z=a1TIX96gZ+wey7U9ju7HPpL~w&&%!1bwQueI6x%b(0E*#)8=^bli z&$!g|o9nOZH7!|>i8|RbvQhR}m@PT>o5RP94KQDbOn)>&yNo`c2bonev%>4x7ejIn;UGM>spsjP>GgR8Rdscx{_^_+{Y0a~MIG0d_{i zQOHpQ&(kwLP1-+>vhvmaApiOd?c|&Rp_-z^y`oLj6ZD%KXSm&qhJ4KJe;tzc-2{|Q zC5`)tXteNSP3r0GUAykB5P%1G$~-pxP8U-`z&fA*uH9(&(%xG$ghYWt6oket$(XT0nZ4Nf|%F{tgjQg z*k6$TdBUp#^_ojtd9A-jNX?*1m!B1Bn$I|SOB|>N4q#7-*(uY4n^ZL6sAS(sRH{sw z$ATUe(W1AX`K^Z8(@dW`l0*Tz!NEKqtx$WRjsto(G<`+WxH_%6YrmL1eK>pi^HIsR zvCQa+Iuf|cDEYZcR^kX_YbI9^R=Hz3cZcW>fmYoy(8;KOi15OuZ<>fL$K2-qD%}Qj zPZj)XGU_e%aYkn|U+}DE-I`#xUJu=dC5W zJr?K~=d&0SH8fPfOy&8Hu))M5gUZ_1Hr%)P{OIg~kAw*@vD3x8+8$^hqsQrip5WS}0S5i)?Rfz(j3<`1khxtR?4e@enXR zZmpZDUCV9-yiH|q99i@I#HEMO%hlu;AzQuT6J>9m9B4VKGmG0-ch@wchPBKF7g3*? z%xPScma3`YG($lI(}txZIsVMhT5^*S9_nI~LS|SE`4-c45z)NtJJ$T?>TcR)!Kqth zdLHq4>ZQ=L8}XFu2Xpp`GAH;AoF2_v%vr~8>g!XUS6<~d6S&0wL`5v&T8yKGb3XH^ zM5HfUiC(k>+YJ50<{<+)a7+0Y&ZvK$bP3b*Z0=!>Ok`#9V|$EN8-$119TMks^_9Uc zCRSkE3M;XA>^OQr!mZNHex0J^#&GNo%M%0^zrCNiOnUKi1%VXzRN7L*ob(@AvPGwY z`l4vD#_!_qs9BIjlMy`C_}pH|B*yZLDya2%3O=uFqPD7cNU1+@)6aRz@1Vs(xWo#o z0Mg*VMjoBXkDXWzy=6;ifBA>ttO*~=6$Ox@KXRlkm7HLEbf2j29Yk% z{PHpWDL4T8a8E2D;a(1&K|>}ae)2iz-#lZyi<;|l1q6(3>f@ZOO%hp8&%Xt>^E+CM zj)FqXt^Nnm4eONBuWx^)u)Swsx@BMJEX>pqs7q?2OV5|6)-|hbGhAUAS19PDw(Prh zwF3^QmfBlQ zwr*LhvwbPJ`&z&yx1@^4RG1AX$MVAYbaB^~MLO3{Eg{`2D4MVdZqJEj!^U30-#CQ_ zA4v8Eq5X*sjCEK0FNmtX8=2uqP3v7m`OFqUIaK8v-`_`KBOLuU^V^`P@{}~WoZb&Q zn+(D&_Y6R{R&85ZhWa%2XlDoyYp2 z9`p~5(vNC}5wycF0TBBGwQv#d6-6$AL%%&hd`^jk6D19GpYG~FQ{7ZkT)hwgZA*DA zz#9&FC-P8k$osU*W+ta^gY7fn_>wi!m9p;m$qIrKel~Kh0|q-c$wd_=3qH3pm{oa1 ze`ii`>n2wsCaSFxnv}Pj#0Odv=f?3>VFqb?AqO3ZrWz`d#*`rDJ!bC6WbQz;a$Av> zjtSh>>`EA!S%_>zO6E9{E_9~D{m4<|AgG#CDteg1+In2hc-Yf z?GtHH{-GY#L;@}9RNxpVoj)QfvYUq`tUc#Op2sD^AVclLGbwL)Z6PyJuv_vAreJ<~ z@J9N&J7aCc=aO&Fx}gKqI^;c`O01UQNLc8HOErfRkR>NJ#f-c})jUu+kcEQ*AINzf0dJv%mjCicMA)=qS|KJ>W0EP3`U_?q|=6$lq} zP@|>mm$oWwZ->24e?yNxK=v^hmE3NaN6_}?jeHr3(yZ#AWjDooVhNr>9I7Ul z%E*h5g!{dImTgvaL$?lQsQL70ro8D6YEZu9P=|97g?_TAH5}VBTi2JfB-O zEp&-T*pj@Ty~e_Cri)0%7v|IE(=pG4+Ov2*NZ$)-nG=d}DS8F^LhR>mDvc!hOkT*4C_dhzA=))9p3>yu3aOWHcx@kc!c5t0?jvPp@Q#Pv85<- z+2&(P)ywcl>jpJLQ(;&v4i#BTK9Kj{2)E)5BrHG&*H?eiLL?tYgw$ggNO^x8sq-aN9v!1%JJb@3CG7uRcXgQEC$43& z^N62`A9cX5kCO64=HJ15;m;$IrXu3vH<948U#%dpp6snsyY&F7vl2^)h3D%7eENX; z=oN^Qjxm;C>(8Ty_J4sC~+Y?0?+MI;ihK( z!ob{4=NWgzUgpD5B65lMtU(WqQrJeJS?s0;sPIw3TufB31H@(6T8nmx8Ak)3*U;^u zOY`!DBwZ2?9sKjgb)jEZQ_szPGiI`prHzRi{Lxpw#xTTHix!VW#e&ag1|fk|Tl8i1 z>Ty$6Gx3z;m%gL=>EnfH7ul(7g~#(10i0=n!O_gWEyR;VwH=#}-Gbhj|-~;53QVGdv|Zs1sF3 zc4=UutQy#y%BnX&4^LU_!RAc%kZZ`N!@=irn4fL&lp|+&WoAE~GGyj~Gz2zIEtuau z2NSgw9+W3TUZ&-E|GWG`bI-Gb- zTS=*C8#p=mZOD2ZiiPAq+3P#T5e-=%lWr)loKdtLoXoX2E(t`Zs9>YZj2T5c!O1V* zQ5SQel^QMX8u)xoPOBZYEk-ekPcijsw8j=yP1)TL!VPf!&r2!yV`jA~ChE%tu~%MX zWo;3l;wiUh@kR<@k}n*};EY;B5_bz2 zv~wt1A4^Fxk_N`f#1D<37Q~SGs<4t~xt2oXcl2l?@|*Zu7pOSMK8`e_!#;*=>R{RW z*O7`iE1;3{nu?_E4;()$$Ib-%=5SXJlPVR8^68kLYh=~rsGUF~+g!-g3}wnXW{&FyFFO2RMUT*Lm@Ft3^q`~z!-ePen> zES5TWqcGlwFk%0*p7LBI6ZanGw@@7goa>1Pjut%ZC{&^UUXMKH-WXx?M_j~OrZSiG z-+?m^Kr*~Cx=Qtz9qwZOsNfp~N&A*(S2KImXo^{_BVq@CE>$c(DY-F%P_=KMrKBY| z*UgV7#pO~yJ-bxZqc4^Dav*TeoCndspz@;Nl#|4f(H^(~-S&r*yw^o{4`>pj>7k2n@Hkhs zjXa+XebkfV%$pVabvIXEkI}=qDi=cDlsy6GI_K<59JLLakld6?igm#_17>{L5}TfB z$8Dh{r+yP`0=DRaJO(^~y4oK;NJm9q$YJC*i)MGXM?Isf4*b=?JS^VngCshZm*j17 z{Im!!+Z{mIkyK>bG{q zZt`MbQH{CYsVM&QduWU@>s$+45GdJ`EV)17H@1BbVrv_gpLK51jgUaYxxV(y%pSJ;g@&))4m_fF+YbJ5_W)-sWE8wrdbj^+6T4vq~~g-cei7MppmoKMK@sUF29xudaWUQw0lPEhdRV7T3x@rMmWI&)jYEP0DIit>3rF7ZDjuUrI zFAMM`(2lelpuxvc=Ooz@a;fy(O3I#vyTG5*rBgmr+h`J-ziB)9_r6fD#!Ih=2Tuz= zS;!7& zzHAqH%lvf;$}EF(nzBhD%$T2(`-*eF$;X9-4ksm57}czR*0*S`9_%%1{dBQU%|20J z3Uq`PN5_TX(t==0|DiOfu(j7{-8jZ*o9!(Et!YgMbwdHOp@s<$8>QU`s%I?&HwM`p z-cZ%2GVPGNQ{g@z-~{u$%cdG2u!j)XHT-=D>~xZV!4#j6K)1(8s*?sHwJ<9ka|@Wm zXUX3~VPJu;6F$ddR{nrR3iTbFF1ZnlObt9gphnx)Qr^LNB5%0MV6^LkGoH}0LB@NC zyq}1y##7wiCm=5%GKADjMu#p?^mKze)qIw`I39_BqvCL6ZwR;n0`?ORo@^a7k0LL- z2DVpqq9rS3KSWhF$gZfw@NXv$+gIBDa^j${lqh-V%7y3&E3|Wm{SGk0xRA-~yHm?N z1MSnwu)LSqFF^>b>^UmVX$UpFdP{pO(xm8C_AQOJ93ebwO<)Tg8^}GX8897|pbN^b zFdsVx4zp8ZD~$HklFwS$5o|H07xIE7vm*qKn+;*ca;ek~+p1j9p;_G5@euJ7v%7dN zC0gULQ&uOOW-R?APon#9L&u^Q$(t_VDQb_Iw$`GRvJy#e2018HsF~%&b1_8Sa!~fSt2gCh+$et{@gJ7P295+tg!JynAzC_)PBdU z2C|6JQ(eTyrQlFT0kf<42NS#+P~xgGg5U?+FYFLBDlxS9!Tt`*eQkmYP8)&ooLEGXizzr`c7&$@bbLWv3D0;q%m<9JwjHO;EEth!h4}?^KVbd5K6VCT}D01H) z09zc@Xs|YKB0Bexa;SphO4WxEa zG`RiY1oU|^@W}HQ3^4Mi=w*GDqCVf_lnZ!PH*`0i`3pR*A$9}I0(eB{JyLW83YeT~rjV?~QzOvaOE55f_aUD*0#SOS zHX^{_wu6Y1c~g*_*L1dU&F&cVmAh};Nz7s zP640JR>0tktid#_2o>iMGwJX+kQR(f3v}R(v|G5`k|-oE1tHBHoU-=ValS1)iuH{J z6JFoY!obd(ObbypnY97inBy<@f(~(jrJ3`TQ zJGjyeW5{Bkc!c7Kr}Vi<>93{sO7P%D>GMutCe()RYu%UgQDl}mwy~J`;g{UMjRWv` zqZmy|fFvQ{Fn>Gg6JA-;_Sj~A19SmjsnO~SEAMs%a^^nMMm(VLJ&*p&t zRud!ZzOnJr_Sx5U!AKID8X`zvxMa)0OQJw+E0@PD`(Bx4uinjirbc7Ha1Q1mcKWXh z8fC@zH2mq4T`UTEtWX^1_2AbosN*Gf|o288LqU}Ag>h7-Dw7Js4 z(Tf?Xxb7uN(XweieJy$DfTYV@5=&L;-SFlb#Iwg!58J$U9gB^YLqyou3D?1R|XYunr zZrQccrp`A$%XGzgRm+}T1!4TC;Q{&*|dpTS)hy=*7oVmcHr`j!Bom4 zRw@-ok(d8PvMa{Lvh3fn;lGep6PH&yzzAyBXfCI@Z?BB>@fHVp_gOWn!YkoR6$#_+ z?TO6JBZ^h8mcLIh;?XhlsWy>$!ChbtxCOT?tc%cqq#5|Q2JeCHf$Nx5g# zYE^7_B3N(wP-;x>{_`%;;0|e-?&z=hFCT2cd0rvYoJ-6>GYXP5_OsljHsbwy%i1WSJhTDEqZ50 zQ)b=BlG;OEJIOc730G3)hhFFU(3Osyk2J=ZHnXWjF7_0HHGCP}FeqFJ<4Lz?ZdsJBN z0F__VXYSoZ-ihM@V2;n%Erm>2{Zv*cv+h;U_MLL^ejO0MIPt)8BOse0;L)8$twf{e zL|s2W+@GtY(rEpgbWCBpFz|%n(>pQmwR6(=0fkk^7hgX@tge6N`8M6N$5R-ZpTX(d zCURr64tJ@%v+yKU_A0DxSlWJho4pdFX;LV4;EV(adUuOjf4~aV5h^$?9NP6&Y8tRv zx#ueVXGo~XF@&7duG(Dq<37>Hc7&?f$^Y&26*V>73Tftpex*mgW$H@$-jbddmtNcm;Nd#FCgHzTI2?20 zP;CZef~J6eu;!1ip$QlqgZ`RN5lq)Pe*K00V7NGgySQQb3|FakgVYJi2td{_?SJN+)a(7z{Tk2bD@z_q&JrI#hWcP`Y#hV+sq>{K$pJ)RF3X(b&;8HRq$C z{O6z5)P;pxI1ASi34Oog*6^YXR|ByM#|P%G+>tdkwFLlz*yQgy<}eJxuC}oYuq|w*$(aRT%4(3*4PqrFt+Oh@;Q#E6Al*-m@8)a52+f@8b%4i#2hCZCyq5Yuh!rMsyg&21i zNoyh!Q~t|CQcTn$+z2&J3_h-@Mk}~5MokYHdLkE=cP1_+?|&1={beQ{HcbC0nUDO?U>3uQY)N8L$-mP8^0L_lWF+N zGrHS{s@}BNEL4b_QkZ%740x;|gHDi+XrtUytEe^xF^LxLKaQ;($B>vH|FP8dj{+qE z#sE%v48x=($=Z zw{G~fFKjuq6XTS0mh8ZORmy>9eRJa1qyk9+bFe)&kMHvmg{8H2?YQn*WAWRcpRYD4 ziIn2??}PPULtoXB{Uiru7{%J9U|$|_aNkDN0jhYZjCS#i8VzQKUr5Hc>VvFW66g~M zFN&1X-1!Ql1;wM|m*q>bTZrrS%A2Ae@uo;H#&fl4>IKra_&+xiOKA(vorW15ROlrthLM_+B0C#s0jox}zaH`E!@EfaGt?D);ccn!C@hC`iBzEIhhd7m5l1T3 z+g7B++Uo+!9TF`}S3x3z;OBA<7X+@)F*4euhk~K|>NGmq3hQ*QT>NL+J2oX#1xIP1Vy|g}f%d`o`(hA+z9)iP%VEQ{Yq3ve7aSeRX8F*N{|xir@ZGbR&M(Jcs{g<1xbYxf4)Ooz<}DS22iPEZ z^>-yKaZEct9}1Pz>&G7d{nju^=i-BkB6H#juy$#}_`dn$Bjno$S?*%BTT_(|Nw&$n zO;YAn>+ib_uy>EhM5mMagOVapB=gyM*yn12sgHlJ*phSamk-k7BmYTzw6dLh2m_FDz>I z%cPgk`^$S$v-D2IFV#s;5R4d|eEAV;JZT9Jd)s332fOFH@T69xf6lFRh zz-@T2r{=(N_p_E??J_^D7k!(vG+{J6WA@syyV(Cun*aIC88X@#FIoNobT1mTn>$gt z61Z{?0xWHNOXEj}@t9wNOJ%)WUVm6`YpZ$Z<<5an5Osbpb1kUS$`RWac>4E=6T>!k zF|Tw^%fHOu>jw-+I2u=aBWrwRSC|oOKf%i9T8`e}Ll O?j8HLzuxA0?f(JQ0)4Ci literal 0 HcmV?d00001 diff --git a/doc/doxygen/chapters/images/tasks_size_overhead_py_handle.png b/doc/doxygen/chapters/images/tasks_size_overhead_py_handle.png new file mode 100644 index 0000000000000000000000000000000000000000..e876766580cebd661c2a1e2d41a21774ffec9849 GIT binary patch literal 15509 zcmbumc|6on)IWY_jEpEtN*e|(+R#cVGnS$i*(x+jrLv3`JM)=VN=2o#$dZZ>%39(x zl`TXi>%@pGVT>iSn3>Of@AN#c=lMS0@9+2f=jWx&=gz(7o_p>&=f2N5_jApDpUte9 z3ugiVX6>}K-VcC0{3ypH%E3P#_$9jmz^$|2+9?L`ugA$W9!z%jmEud>mPF->y}DKJ1?LXG&+BJ;m9_h@k-(8X61={ADmanG8=UL&iTvngV}4r6~bYCQ}O6 zBV~RmP8ySrr4)VnApP)#nHM3Yq)18S<>eshoJ+x<{$w!r+Y+38)2FTKd4J>vT(e?; z?ZI;Z%&w6A!`)jodl3L6u+!SYDI}58sdzDMrN)jUKNE*ZFONyx31Gb$0BW!(DxR~KGROs!q80%sr_SZjYNZoSiH{iUE!Mti|&)w5SudOsti z{Oe;i%d)|~??ya`m-RWGVNoMvM4bU@w*$O^%ODUxF*L4G!Z8CstfJsmDVa~@N#NNs z7vRzx>KLUzuu{}}-V9jJc`L8|-G9rq?!UL!XX2v`gTQRfduJS-*IaclQ(6}Rj)$xS z)%i8be{9ATNaf2{G4BlnRXaScAuVK;rC>EMCs4hAV(nG^Uh_L`hs&tp}Ber1Ea>%Pv9PS|03_o4di7 zzKfDrQ}wiTna{#=5O@ z8ToE60j@%G1^BX_(x}ZervR0mOF-=X4<~!oftXmXTxF`8a;EH2vO(OHui$B~y(y^F z12~NsxU;LFfL;?R|Ix7O5LlRXIFmia_BuF2j#P92R}w}D;42Qu5ys+daR>Y&&TrG{ z_7N+AuB}>(oN7G!jtsgTvO%Jj(B(_loZkXp<6KRl0yFo)+!P1wRnp>L{Op8$(kBJ` zs~hA3tNh3IorBYGfBD_WZzK8_Fz-wiSZ-31Cgb6doGT+ zvn>I>FLU_2T-;Sn11FEHgfZaM|5(?V)rw<&uVC*EQ&I++1%1>eJk-4Fj>W40o3;|l z@lOYvzd>;iRD{Z}*e0Ro$WkcXozsQrufvxBA@1o*iB=9``QG+DR0_$f6DmCs4A6gq z!%gp#I_o7V9|?*LTt{e7AaDB#E`oe6JDuc529nbY(KC1Ia1UQbs z5dW3lk}TVlu?O#x3elBq+ROobK!#vt{zMv{aJ~2&Hesi(S+~g`bLMpFmKyv7#-&hqyOkqm6WX9KNQ>9(&2DL@vx^4&qD=FFi!pEK604 z9|{?tbnRQK|_;qaL(v3B$&;0PXBMW9>d}4{o_; zwtGCrKT%d-rp;!t&ool!;Z81|$!94HuDj!z{s}ksU>SC+gZ(QluzxF}odq-tlZA<~ z`KoCOs#knOW<@br@6bz--EXoVhd=(Zx;^$2FLjR8Z}vsd6WEo7l~#92&0;ab;=2~*kxK{r%Y@Ny3-!e_6-Y%JYRS+u zM30(E>B(1T&4H3|@)Bh&?qGj4tAg6Gc5h$uZKOr6o@esc6F#dwIf>}k$3VU6`KG|3 z)lV3;`Hc8CZ{_ieOKv+GQ+T&n>_7RY7gWihdNR4ghH#fo;XM+cOOiyde>#XeaMK2< zZ@dw>q(hHXOu`nu|G#cO6vB%`W&FM&kr za1eQzF}31oS3cWs0ya;i)WDXnGB}}4ozIiw3F}d5Q0m}^2{_S`hi;1(`e^?+~8mu zFIfEhwt6%msP2jUikTn6yRv8NoVw~$;#I`6_2c(P7R28c$9zK80>c)Zrv)aFHPsQp7BZTHKGHS){R=tW zuktz>?u-KJNWF>USe1zwfu*YYygT&uUsv5)#g5UvMI z8s%(HZF`mwyVY=Bjxq!*73>zsDExCv5J{r_NuuN>e1OLJudnv$`glaR> znmn$A+5o^`-5~yXm2D)QR=HNi_F6Nh2eQ&qp(_ik;_+BEQZ^xgpR8xWxV30yOi9tbKzOSl$`T~SM(8d)7s$V*Lo5~{U{5je>UelDJq|F`mQ z1bFvE$zs(^O1Exr&%o3~<7>%YW9&D<^DcdQIs~|z6r#PeG&|TDb&$joB92JxPL=H4 zfF)%C>Sg0DRl|Qpw(}7!>8AtGwV`F-z)f_fKOPfnFjrcaJsF7Nkk^bB-$VXZ0n;O&Y)z z?$dBX=S1%4Q#(uX(md7a;C%$Vs%>R9ob43C0vZn3%K^E7Eh?nt1j3!QG~|_;N*C{a zj?aqmgof8l5iM&nae}k` zk%N?9qXnoVwsR-b2|qS?jQC7Tbvqk7yjohZhkhd|iE!rd3@Z%x>n4XA(0q#=#WVSu zcTpGMZul3WAFAfoHz}oN-JAPuxQ0y1o^}zf$`w1xL=%uNnYAG^53ag4q&E#4dM`FR ziEO`*T!;SgQ8CIZ3;p*wrJutcaL0E3hXlW29eizV(V~E3scH<$AOAx1PA~T}5SMAa zIBTj-ppb$)sNedM!~Py4Hp%fD334Rm&co;X*x~sy>Q=MX+^?QznU#+dly6=JiV|FM z@4$@=Q5w-^(9~7OhPSmnl9)n=E*G=>c_bq|mcHsVg_#>19xZ9+nOvH%+osK^+NoCW zUG^kYXN&y})B@o+i5}aCpD$$Wc49ER3!fmoCv1sW$uv9T8G$4MIP|n1p?in07&Cjg zTbO=Fk+O=|dK+PoIYS9lkn~7Wvm8r;0Mt~JdLtI-4Z5H=a3uy(-h@%wgdM~m_D{nO zh{gO<(l4VwX^33x>UDTkgD>YtxasRlixVg8pb*YXUm|xzBFXpKW1ooY451pOY_Jk# zy&DtNs|Vytz8J|`R(pw99;ZIh)<|8FA9!JK*iJep_w;0AmwX8eyQ;&Q13S&1%9S`r za=cp*GY&1OFIf*1l8t`F=z;{8nBu!RyA4{S}cv#pHjhN|_+yjGWq-07S?>gC(dF95=u zjgKV<%Euk%FzR^45B^D{Ji9%%kYASF5+t3gfe=V>YleRq#fTkyMQc0IS4;27*j*Rc zh6QmeGe+1gAA4c}Vd8unHk-TGr%c!Hlx%eexA-E>6Ria$|0rG!VzIeZuTgoK9DaMF z5)RoO3nWrV>k}a2xwH!!_pv^A5lexZ`fq9P6Mc~3P1ChEp`F*%e&!X=VqWZpv#dK> zj1C3xigSjc-a-5%d{Btyi-*Z=)sY&>49MxVrhJq@g4BABiOl+_-(Ua5Ulgr z{O>O%H`lJ#g+1Q1iwjc!&e>^4-6A{kRRdjKfByl8uwJSCU!eWgq?wtWC`T z7rL<-y;GNXCYHkGrkFB>mdt%vq$}M0Pq*2k*%7bgjxA7Dy;X$&>W@Uls6)nmU*KLxKUsAVqYWEzvjy?{uYk#R`Gjs?f=lu1_^c&k4F+39| zVM6dU#5=ZkBl{hFG`r7^Qy&QG-V26h;N%2NeB=wTSt*@qD^pm-N9cI<=!nsHupA`( zBB>ayii9Mrp&4_{NZ>+2J=8bfNUwOL@0rYhRQSH<_7$&pS9HbZI*DZ#qx+bXdmBM{ zt^6#_`*diD``Z%#Z2a}++mN(kDRgPb%awg^#Z8@8c=>A|!bbTZQR!A*+PV%Z zEw{ZgnOC^^8f?Yo^IA*1A5G8S;+Y&S?HIBXu@Wf+(pX6uuUKmD?A$3iDohlA%^aJC zR+|+31LFx@TRB%6U7cmtxpgE1I?6w-my_8O+~9+Tc-4^m%~(#dFtd@&?$aSsx(`D7 z@1fi+Ji_-W5CrdNR#^vNxS)r24Bo9-w=8oOMvS4HUL zdg*Jvkt%$)rDCu8*<-I44wGYzhG>qPp@3}`8)0c;CCHVwF?lNHOzTKRV?3e@ePcx7 z@Q6Ym73OJF-O9=YG^|_)n}`*-SpocQP9gfqMmiAL(p3~c7q1#Ue2tedL4#XZ@pq4C z*l+h+=v$&kc)`Wu$>8o>Ug2{w-21JvHN;GC6(Pl&kXIx{90QG7Z}T@SEPOA=Z+9r% zVACQN+cRG$9H$4UslnLh%DM@?`o;P50Xk=fR`38R&GRboS#WQu>i zyeQrTKKMZpU?Lmw79I}}5y4273t4MifFly1hu)8EgW;6r7HCt$@rWHPhpD*B{2ZjgaD{=++afgS@c!_QeHVxnW)?i} z*L;LRX2Z~;yEzD((s|!V7qst5gBwsZ#pBA0JVfL|}e$vQog%%vP zR!bzhwpL^z98iEy_IB0SreFNE>c;k>;p3~|7?pay#P1U~0g))6(CH_e5J&-!Lx)!R zXp;JrW3>blkXw)iP(w8^?211uP&^p z{PaPrvyQy&UE$o-P)`GWINahUWb~D|DRcLS(#Ic^aYXl`4h)-U>>Y{HmVR6X$7T8; z_c~=}{Fu?j$4==ix(I`*oCUEGPR@aR9p}f$VQ=yyLDO$4fU|z)jnJCK;~Pb5-@EYb z!rRn+VPoZK5zB~v3jYrp$F)5jiL)tx43 z=9^sbgH8MW3Q<%%tcf~#2wS0T?1o2lVty+YBy?@=yb@K3O!JoIL3?S)FzKLran5y~ z&>3Hjhkf$eVGMWG11XFZmzK#dhM`OFB2I4Lpqoc4p9OWm6z*#C38?pj$jI;w%r=w4Xb#;tE5#?ot+{Z@pkAo zXY*K8B&z&R=^`c!paKRnBu})1CIbzmN1@v5!&$k|Qez;Kvk%C;qa^B2lq3l2j4(HZ zD({ic3+~<9NRu7hvG94D8$9+uxC(8=+n_lH!BY{dMZ@i?j}u6Fe^O+Wxy}aCBxv_; zK`bh@PiLIJ1h3kb*M&{5kw&B5&=tMA$ty1X>wuW;>Wg^8 z<9^H$Q$rNtbkCc4t=v@FbW8p(sDZ1qi%|(YgA<`<)E^cNr;GKm{m}R}Wl5YNpApLG z?;D|CIRKqWJvw@4!pMw3@;u~&L>kg{u{$u^MjqzSg8z7P^2^8F1<;eFYt+SG8yaG@D2B$XSKF;IR6sFLRJ<6LX!N zofkaT1G=^6`@Ppor+KY*!eH+JVvVDdGp~q9v6V%mOP%0jtZ1fTad!3Ia*~^=b=ZBIm^C|76QUmFm z0khPT7j(at(#v2J{A+D%9l6I2;8pv=iiNEkn035X8)2}|$>>YK`t=ZM5&Y#ek{*@V zm7y)1uWRY0PPmfk^Jtk?^+_4I(lqjCvn9vP|XL^vFN9oguU)gA(ArdOh{ToI+6TMI=b zm*bak&q!U3VQ@Wi^b##y^!EhRu+P@u>5?5HmSB%Cc=D@Bj~(>j@(;_#-R{zz6Bztj zxOjN_RX%tk6gCwwohJu^gagL|LI}9%{q-Hx=25(_T--#iC!Fhb1bgxY&*wDL4es#r z#kgp_>;lven}swNLKUje2M!nS@mvF!pK+Af4d3G>2+I6?2i1}1 zdf;|hp9tIMlRRAXx6}y>U8I#{4fKMcPN9P5vnaLbMKwl^n+Mv`!N$A`a)y;;>ZF2K zJ-DbA4HdabCb+)PjyLOq+poi5R>YhBR+UjfJxrwh&T@(26-%DIndI)lpJrQCBHgaNJt9Io&)wa3VcK$67_Nk6s66Cf%+xaBlr=YLC3r-6%B zZih*j5mpHQGv`$DB`Zrv-Ma-_SxfdMf!j~gDv^Pr9pb(lz}o*lhbH>`150AfA5VbC zVdz6GQPv2Le+){#?-sCjX<@Om`jT5yb>Sg;_RTkJcJO}eNY&a*__OcXLR|Fn)g9D) zD89Vk^PHQnRVRfn>Au;F8P=kXkM*#b7C=ukNR*W=?oD((upQ{Bh&bGbYt#_=Go1>9 z*w;C3@)7Mmh~0gpJnHJ@%`S{N$*|?2%*_PAszeP;m#|_r;TGp^Ado6peu$lKlweik z%Xw{+E;yBr30jt9+f*n^}?w>_7f9A>9UA`Q7 zEIVe}O&8z^Z#vlRBT+?T>tEkA(?6{s-wG3FdFk1a(kb_Zm;d zK-_sG+Y5Qst}+09@h67K_KT}!8WHkFthCP9wqo7<=r=78ko(V`5tFVTu<$FHv zI=D>awq}L*ropx|N?U$>CC!Ijuo;|zcx3EKAVW1J=Zjc$52?@@J+eUgyK7df5xCi| z(xYOc#;`qXCNi6m@(oKe<*dlN709r4_dw&rOn$*U!2(V$t7(Na@6t}yXMQlf-n7{b z`tdfn`=?++>jfsFKSGAwg_>ti)}y_RsWn0mfozUN1*V$9)*Sz z<&`aG{briSr`4j%aCQ)8+FW(dQ=aj1VF!D3GIrV}aO%1E5|u#uek@7&x2ccn;d%=W zQ?Myu+TLl$iejW4BUznLfm9ko9G5J~*wg{>FVzhcE$v>b6?C#@CNBE#0eBjW*d!yf z6>Wu4>xq;l=|v6VRwHZ?eysapg-*kNsp|duO2ki;)#33qkaLHz<>Oe~-0GPDJ?FBR z``K8BH+IfVY$=RtIxyo>{0|6L>E?;kDQ97u_!l0ps`zK0IoLr>6EEsu_byjQO0ENIy_^QRDpCUNaN$*`2!DfnyrX!o z3fh`)b^ty3NuDdrg2y8zmBM4h)t7ge?hu8a;9U}Y+PnJFhsk;#zhWe6_BO*~v7n-< z1wu-)wFT%o!s|iL@OlRiR_H{Kz!&tiLX=V z6G%yUi4xP3$do{HEE-hE(K%cv%-Mz(^6WD?#0Kb^A*NofVFS>336DZ zFkN4H_HkXHb}#`+za^R9Z+?C(HPw_rdb(wsNOz6ka9X+oUK5)67P3g!|BsSzxgst) z`UcPS?HBqWF>PY_cxc#$Hc0(Zp`*?*7i`T;W^3kh1>ziB^Z{5rs;@_S-yb~dj z-32H+-xPn3KS89vaH|0Y|s^N-6yl@GcA9=KOw>DSGR+Si1h}(dbZ2D-FseyhakRnb^IA+c3ToW8or;1l>#!Jf?<^~B9C}PRcSa8Ge zRjw&6T14m2CUaj&hMtIHHf7Gr=NpInF7Dg)$7dhtDWZz9u8Mus10<<-35Zj~3fsZQ z6wqUsC6Q#lE1tI33O=@J+1Bs3>rF0#ioGr()~m?&mAK^9QX*RLY=T1qXX@Ipebwd7 zQ!a`bUa@|b(@PC*(xA1Nz2LcOJ(=!)IT2fB;=D70N~HWW_Cbmp#e93Q8hz*P?#IyL zx1>or$>aIAwq)b}B)lI$A=Jou(?Pu_lj`TOzroi)#r9DS?fu%RHqP>zPi%N1wU!j2 zuj|Mk?H<{n&>-83Fx80=r-&0xkH=ueyDwgnbyVdaIw4d*};VE^g|gJ{eoJ;|FaW zj%acV0;)fAg|2bY>=SrZ!_(>Tj?Do5GkT{&v||n{bg^WLCOA$4G3tLF?*%=!oUI}?hFbV3l)A6-H^oW$~bH0eA+@Mm&g&D26?vvx>VwxmiqL*Y_C9hLny5;t<&#;^iCeZ}nQ6RM_OOHa z7ZJKtn5T$^9zsFk!oH{-;NzwOG+DxHZOw8Je{gu91G7B$UrU@lL~d97key^xeW2Er zEpf)d#gY@%e^*}tmw&Zlbh%>mna+L+IC>O{FikSGA9E1vsu4&#_HlBfT6HCHuqrH< zC3)pC*=QxS8IOurWeh@KU)X_l(dT|wIi(A}ojr-r5)ci<79uF0@yQ@jICrTn`B^l$ zyjujzF8Y$VW%#^fxfrb#8`T!Qpq0%Wk8)CDEDcxb;JL!`L2)fyqoO|8%`<@~RxS!E zb-wQa_kHWpvi8_TaD`DnU~iv~+$PM?QCM7$=CdLsA@g-rHE;*=reUCSm__}ra#od5 znK|t0CXF;l=qxH;6*fH2q~-?Cp|h9a$_i1f30a>jLu@`@^s3xZXi|szWovejvWe4UIj<~z69#oKQOvC zCs+GP+x+n;80@!;3elK$l~?aHw!U7|qjFFetoPOy6+MuA?WLMP0#=}Z@8jz7LX)<9 z9SF^}Oty=4$pp`q{_65IE#^M0PyliIcw?^^SeluzEA_o5uu38_eVfD9ijH*V-mHs+ zG`55O%&x~0(|lg6z^a_c+}aFXYv<8{ z(ZW4Q4tpzTd`B(yyDG54h|H1I`y!B25Q|wU5}7|2uffiB5XYm|sWKk6CrbikD~!Jg z>T8dVv#&s?Z?&1oOkAxgJ;*;DwO17}P{cjBeTmn4M5#ij))Mr4`68}xA{mN(t_#kM z`yfX5p~Pyfz=@A15u+~~Me7H8ISZ!#o)4b*?&wiRPO+yhwIvQ!1)GuXn= zz7_fS1UcJmiZ|yPS>qPa;8u+&;G#dOF}`ye=!-cLeX0E>yuE3endr>e?*u)m{RX^! zL8vJFjqLJ5?GCUD*0o_BH%YwC@WMJZ#@Xg@PVR1@LOdkpgG#oAtQ%J_z}sj4iDb9T zlR99yeMHK`zc9Eof(!Pffa}K=i-N+SROEILQYeL?BW}T$&J0+HAPJc5ohaaNn`QBZ zu_~kI=L1QKSZ=j;TIL~%MpEZP3DI;lMGtX+^&YkPFpsDckWieFnE|Z6OP)iVZ(@(( zf(6$zug^2h;Ctpvo{QxgCN-Sz>48hri+qbvk==5PvGD>vpvQZ1{v@{*Mo~Cd;l7Oa zqMB{&M=~q{YmsNtyAZ>11z*nvHfj!{A8jk9X)@vwhAY8y2;SMs zq-{%UDY42^g+hQkF0<6KZ;o$zJ7i0N&d#-t`K+au_@$wPOd+fy)DrWGL zb_f+p^g+|HB6Qc22NI&$YLXGb^A4=>_qCan@!=J!ZVkNgb}vz>>G=9So$fif2T3po z5*s8Nn7AD*^DdM$!<;L}TMc1`5a)!QCUUwnRgUpC81^^sg!impTda%zypE)qX3M$PPY!7An1TEyi_`^uR%VA+QQ z;$S_gW$e(SU{5T(h)+~v3aIXjE}wBF&jU2>M@7g;CSacN>KOyuuWO+M&h4^7DT4E$0=kE zY|N(}F;S*z`s`_|*#XRlEJcs^2$zp*MriMItL&g>Ygs40c2Hj%Na(%`W7Y6XShZWJ zuvu&vmGv{Iw$?0n$-pW@c+?`XpWfbJI~ONNJjdZq3KOV;K!VsLl=e;T^^6DostJ%k-$K+a zU#D?~=Nt5#{Oh5Wh?$|0xes@WEs-#i{eX)yk*30uH%In&8 z92fsoW!RaLMWX#GzjfLZ;_WLRHR&7ObYiNg;^wp4dBw@%n~NEEdeE7NX!g0#m&S{D zm%xo28iI~;xN#BV6R~5fwkImG;*mrmv+XL+^-Bl)$%Ii=#*UGRRlBqQbR0u6iOdq1 zqI|XdvVX*vb0teej@LbVBCDIkfAm0F*b{$5PUU1aAd)X9blWY!&3BZRCqPg+78-lx zt>szr{Ti2}WM6{k*1e+A=_;MDVZ{%8tZXN_BH?@NV%+?jWdgfxRIHGEZCAwy&zimK z7TAKSbzkW99qd<8QAW~^WQ)xTetPRq9V6l9+psbE6BfJMOj>{U28gvK$tf}oMk;$HS3DG8mCky)F(Y`)5fy z!$O#;_F~Tk3pftO0I*3#|K|6w9+VbnhM6sfIVsILD#o3DN|Y zNNW*yMP?k#skO;))2XM&A#V*XqEc*-G7fJWl!U&x16DqDDs|3QtNKy|L-ccorw5-ZSH`oo%e9@x!Hg4KnUgyd#&!uFJ|r5>f}6pB z>oS6|m5121)eAp#Pht8RM8!-6{MSn_#3GTFtayD|IM6p`OJ=Iz$I5eHbwHdoHF1Ft zvj9^D7@!fEta_enp-3{LPN6I8;i$dsca^bg;QeQR=wllX1+Fkfy1$emou7>%iB{LIe{B|KAnciiS>ce<3dz&Q}R1%lTJ`DSLcJmWlM0lpc!jWLn!Zv zOrIhC4v|mk+rJM5au@Z0(Wwr0BW&0<(%=(Er<^2o&-D}eEEct}Gc;r0-GzWaRT~#_ z*iTDTIx%}IEy=;;GHV+aSKiyt}!)Dg;gLP~y-M6w23 z%vF7VjH$(?YA zvoP-xDGDxdmKWZ!x)=y0GOaFjP*0dM;0+#yAy-FCNP}?$2}kHIf452+QH)mT#1ZfT zb`a-G?ZBQG!U;)ggznj;5-orC0(7~_E-~&i2^7OyV26dln`B?>61w5`WCg)=w6MCu zC~h7;z{(9`vO_hzcE8I2Cvmpi?*7W6P&u1q@!NnUgQkqBZ45!30Y| zvyM;!UQC<4wGg$MvcYibs*JKQKc1DF%9)kY=}6?a4(VcUMrLMhTYL%d?Vtw!z_$2a zeW^mxfVrA>mijyf%m&IzW8g^bY$m@ug!k#VTatnTX+yHZQX?S6Fmn!CB$>(+s4!Mz zPj~g0Gf}&AAB6UW4zZMaXv|6WM}Nuuo{%w>pOCJbQ5{cPU~*i@c|Sh>0vG?V4YIK3Frux z`7S35U4bSBJDG>h=ww6mojsD8_d4uq!WI$=tq-Nh<%U` zc+ts=Trd^a38ljD)zD;EiNb|#5OCJmoWcph?9$CoDxnvqqRY}9G2G@AtH5ykLn*8R zd=07am+5Zkij_cA32*CURipYnDh4}u;!YN#Wku6XRr*EAd&D^5isfK{L)#sU>@edP zjVt`lcmNXTN!ypxH{k0d-Z3^adc_1DNGA9;dj6P8m?FF2fb6T(eO*BcdP=LAg#tV!W-6}o*A zCH63wG{f0KHtMzlBwxo7JIJT?>GbK?U(A*ZXvYlaq1pXVV)42I!2J}$5m8UC0oTYW z8p0W#WhW8WR`W)grPto}0}x2@B*Tt66*~5D?s8)rsp5h2gNw!$G?q5Pt0QvVq&l!Q zQF0($9X2ih1hzhsc>dC19H!$!+OQ54?nOxEdb|hNm#ue;$vr1*?bC^&GL9af3-@FA5caZMtS@EkKd!-;is1EzaLyGg7>jGb zvlYp3=VjHDNd7u0yfN`x$3DjwpXVp5rhJ-+{j8D_@{rJdmiMVVe5q9L#!N~F^nj;$ zRxk_+xW65gpNNqbM8d#<;k zk8z+zZzG`Q${6K}e?cPmFVE<9!dBFhf$L{wCRBBNNR$evLbS6g-BJ2d(fs)}Rd`JoDZI;CmXKE|GbGk~!)OXXoNfnDBy6 ziKHdKxid{hG7kLE@7)ABYk^I z%V?Qs6vY68m^PVG1|-A-ryyuLV06wLMlHOf$5OCg{NE(uF^2(xt^o*#dlI3h!SI`a zeZ{xuFu0>CAVhx|V8buA-c@IC$BlqMZz&LMV_+sqDV|61aoZRm;_e*AZynG`fbYEt z;Dw6+`<1l+@mAIUpB|M8|KIQA{d;duc_VSN)xGum@%I1T+xySA1BYlC_uf8NN5O~B z3s(H+JAFS`_N#mx<>|POcmM01z?ZqHF1(^z5#E~Q{p|ml)c^6Kp%q*=QFRHxr7VUQ z@XtE`)?pkKo_>M*-(GoST&_7dua_J0fLL+EsX^1Zo{!9b&iKaD*BHO@YF=^(%YRxr zuH~Tcnkcw7v~xjv3RgN-raAW5SyKLD-PO$Z(i7JNv0TN4r`PXUD>v7wmXGe=e-uVQ z2x?!BZ z3dI?8E15-ZQ<_cL*{iY6#e$7&wwkNSc&U=V9qd}rAM&U@-R76|S3b4D{<*y8P};~} zsvQ?Ss8_ys+k&*kiVI#9aM9@0nM%wb3hiDCZp0m#effq<2Mrc^t{LbzT=sV;}b&%M*`1HlD z`SKt!aV7=CWh^|@HWYlk#QX`jt=xRunfFNfhnoXm{1q70H~*N-=ngp^RGN9>)hIW} z?CT4bcjIIBY!LfczR3u1SP56ULL6TGG-r?227-{EN5Cy^M*jHL0kC-Y_CucFWk4pl z7X{qIRl(NJ%9*EqbW*k~V3^@*;fsTx1@Z?qksb1;$zPt3zXi?3Q*g0dR(DX#fBK literal 0 HcmV?d00001 diff --git a/doc/doxygen/chapters/images/tasks_size_overhead_py_handle_pickle.png b/doc/doxygen/chapters/images/tasks_size_overhead_py_handle_pickle.png new file mode 100644 index 0000000000000000000000000000000000000000..d0b06c2ce5b37c6818fe706eb4dd622d5db369df GIT binary patch literal 96256 zcmc$_WmFtp*9BO(1!x?CyIYVD2=4B#3BiK9yA#}<;1=AS;1=B7U4lDI^W^>3{GMO4 zXclzQRNXrF*xqMXsDhk0GU9tg0059BB}9|}0P++7AiCgT!AC07WZnY+0+pGtu!5PP zApl5(CM1|B#A)L6pIv@F2oyjU3RVz~p`-MP2WinvmKF)@XDEnTgfv3ZhlQ|Mti((@E@Pov5@MnM=YPQ8v6x6g?!^^lO4q@yQlSY0Dc}D z&5lUPYi{HY%b-TRcA2PaRO+mMSku_p_jWy&9_?Uwh}}B+qw9r*2oTUd;G?_-z1p9J zsc`vS%@++=6v*4DxTz(R#HQ3wA`m4FUtv;+_fm;?DgS*V zefpOTI_1@^C;=H6-Uo7D0G0+)^(TUZFB%I}Vkb5ifU^$aAqbiW?nq!MARz`In+)J+ z08s;CF8C4wMpHocCssOa#`@<*$a4RsbtYWcC|{6_Uy=aysxJ(@UnK}HNAN>5dLGJw zfEWnBI*5Se;{ZOQpHQ|b4Z@I*U3P{7nhK(-prZoD6_Nu=GvrDDYqr$En*Ya8}-#J$e@zk1-y18fE{QM4SBk3;zB#5j_ARbLO zAi@R>l>#?Z2zo#*4@4_|7^9Yle;|BHD)bHe-lyxQDGikrj!~$c*nRe0k;=513Tp|X zBjk+lq3DWmM6O8DnCS;CqB8$_fy;5UAU6AjWFC#3Y z$s{ra#%u++_PA3)P2qSwG6uVq8o%DD!I=kMcLsf#s)Cu9I+i@9JO(+TKN5-e&~Aj= zL^c}epfjPWN1FHP_8@N**dV#swsNmRp7};~T6ZO_I3)Ktd3wPGkr`NBoo*ZuaZU*ePM%|Ayp?+C$5@Tkhfc&SUQ_?BSBa(surclpSN1bSMhrietf=cqL}qhtnsHk#q^1X6DN<$EXz8jT_t;b;9$c_u1_6JE=~zdok@-&p?f8P2Zy38%2!-q5Y|?bd3xN{sosr9bwJjoOV#Cur`}k3x?Sa3%g( z!U^5V;#mEr{n+9$aiLSjg~Ow!ChB|7qq(19h_dE-Fq)E^ENl-PDVrV|)tdP1r<^bv z9b39jnNH!B`)5lkWVMzKAU?Yg!F|qjss60Y>wVqj;_9aA&U?#wHF|w|nSF0@Wp?Ab zXTGPs`8Y2>#oS%dpS4N6xwg$uY(T8=p5)y$>N2_uj|3+TXAwOv{xR_~?&pTlFDt(t z<4CDUVMxi3G>>ZL)|N?HNWXLkM#M*a!5<6A5rz^<6UOQO+;!N!+vU2!u|dX{^fzjwTkFZyT9YZ2NW)S2G{v<0GkW(IB}7Z)wZ$4) zN9~vT!4tV9`D2pfkhOE|OhG-f)ujvwo|a3ji-tYp*+D~u&#L(DHbu4 zJLrM9g(5T>sqU(7fcr?)s%fR8=N@81vlU(0RhCn#RA#5Y^Ho)5#->i!Q9HupPiE?U zH(GFTYj`GK+O1gl`n{h-h=EV@2U7{>S3+U%NipUkvV)R>X!e~^~S}I|NJ7V zmLHq?GAk}CNuE}gLl$2)E6X;$>GA8zkHI8bW*lRrm-xGy-GtrJ8t;Xd8D>OG*>>z! zy@!^&h>^4{>xi|R5!X>(&!ju6FYSNZMjkPU=ZPQqU_H{c2DEyZ)I261rk@wCFXxpu zCnx!Z7^2(b>}6HJjT4EXgpw=(xKROsPXGWsfDifX1Arq702~?s0CzF~;Mk-$D}Dz5 z0rso3xCrq2_Vu%^Fb;eK!A3&U9{e-$zi(Tym%reD2P8!VRa_R2Gn_SajBffnOy~$< z;Soq3m1Mq_^x$ge4E|K~BMdR(D3Z2l!Dgv^(OJ>e#lf7l2xZbRD2274|6L7o|Cn2l zVj3G-Od;Ggel*hB!GGHJ0yvII$*Uj?wVuLo=82#_JZWdwS|A)g79XRW#Av8n+9{@HVDbj#HYvy17 zw@&?c_aQuJJD`}q=LtKU?Lem(Kn1`6P8czRYmdMGx9E0U6tr6@<_c;k7DyUZJ(3&; z-~gx&1T~3#0~8>BLJQ<|I0yV^gP zOtJu)lk=eeGZ=t-p@t_v5aS4?lXEWeEdbM(pH%3KYXwf0vjd}B(ituu?L^LDoVQNqa zS>muLFH1xkch>IfT=0@m4}stXJqzIbfbVb9!Pg!Hi$V&mZvhAYi8$Cg78af_>K(yT z18*hRHa?S(Z=c`Twl(>;isUayNVlZWX!#h5qUsf_|JL4(ihU}KM9ytK^XL}w&tQ1Q zl{Uni@T6e3+dgyr-$I{}s0&f-!Jh2{;j<+zZQKvbd$!1%l;8@l-Gih*Y7fUi#KoTJ*8hcRK5#EKtk`VlSPUF%1% z<^P_!C4SRQbU`Bqk>E#!)R+~jDDVE=0*+xAr$*2;0ZvW z;kC?jer)*{VFyt2bZj1jY7BHj@JMJoLg)vB(IQ;HHc*5qlt)91E~J94$Vs6{#2mna zL@Kqi%LyeBlY2M_*m+vCEY%s!O{$UmT%AS|Z)2`98r$(Y{1+Av@32q1F`2*ON0*HI zgvdlc9b(_FWwOv_#XTPmM(3QD_qgBbSo5CLE8iU(_~S?YXRWEiN*jJzkkEX{XY4G= z;9Hsoi0IZMpc;tJxr7Cxtsvi>5?>@hAN^?Q9oe8*#`||AOzhJ_=CBCE1W?ZS63GHa z77U3xe9Xy>UUi4F-+w!^>);!wEUz2@!j{r&w-!@%AFf7Yk zE0Khrp*Jz)T_lhv1+X{G>qS1U8)fTHrXhd$w^42WWgiMzh2YuH{seyuq!ldGUpc;j z0ZK_!Qx=eEFNjbF&qfiF|9ut;8i7>KGjR$;_CU~SE)-Ju)RqXbsGnc*jwB)aSG@j_ z;-L6{H;|l-h{g-Q{+_g_>1Cczd^yF6^oM;-@ZA5L55kA{|6`UOsSF7FfUFX~kkO~q z_I~WYJ7gQLw7G?n$|Wa0!w;p=wF*!n@s>X=={Ebg8{wQb6~MIrU5@zDgm#DQIRHUG z)VZhea{(liY{7qvI7h}ZN-d(P>3qJftDUJ#tbhMGT>h3l^MH?G84g}^+KPeKATFb`k65c|Od1}(t_cPXyUEsbCNw9~9 zl+&j0^3P%`0(m>CkUpij`qJ9=BnQ42e$FcKDJYWe-yYppgz>OXp?)9>Fi{8|{7?eo z0zffBA<(2E6nXp(WBPC%77iS`qWb&f%=z+E~L)m?> z*g-BK&?=F^%Kf5g1z@QD%eUjYv+s%QqeLnqWxX4}H;>$TY^nRwH-+FY%JY^1U4p|t z#Oj9JRQO4VJ=@jG{jaCy6N&4V7;!CWsovPUkv z-XsPfK`5jhR0#vnk*-M6+ou5|XE&*N7{cCEwciD}pkjHB-ROp7PWvs59^G08-rUtc zW!kQkl+w5^zvm&3Y*qQi?MMnL@5q7D=l-@;!vb32!pQSJHa5Br&$a{r2!OBG$drXpv`z9w!xe>NTod99hF_X-1a8)QwcSfcM9EG0uC<7i zv(yn$6ORe>mHAT9Hx;V&QF~7C+0RY&TS1(UX=DT0XFdg<&H^XmhgCPSYek~&lB!kg zw#P%iQ!qYa8f@@|pb0gKSs;a)lFLmHQ!S~-UqsS^O@v;aV8u^w9u=JRe3tq7L?s?U zBGVcBzk<8f*W8ml2{wwL{>jrzUlKEStL;sc_x;qNb9u*Sw`C_TR&;?u+-dLE=tIN_ z(E0rG;mxyk3jWLOdDq{(;J*DruTrvznn6YU*US_CSIhRDQg)-&lCOu=J?vYA;7ayG z%7qDTfpJ;HU5I$WsQ!Vp2L~wx=ZAF|gEl3kO!e|J@`!acXW3geY;txCIT^=E{{Tjl#u*val&D15V(1ySZ8N_bzDD;|B5$Z=0tnz#Bx zzhr^A6z0ypJel@Mw~szUFTpBbYER_{C}(M~*z@7>KyXmp>fKwTnl3lci&wpFx1uer zjeKD~2&mB_$HyNXT&)L^8eW;mD8}7z)JaWQIoImCo7mq(GPCbBSR+YfmZ`#gE+y6F z2xx>Ai4qP8jJ~+4WMVZ=nAE)n?cfJj_h%e6X7SPJ$|roMx4B0FPR@DP@)@_MnR?4tAcn85RMXBv^b>cnEB^3P-53_;P~@1wblnwn z&!{@yuc>-(+8`Wa4p0b$hzXz{1JPPmj0)VpPDQ4_d7r&11d*S%ZL4q>{wt?fa4PX; zm}ahgmm}a*I0#gGJzaI^6Cv=~E$PbZU;Nt87(e6FB94RkX{Y;DW+%~GZhk_zCp0C9b^?*3iF5kv!D{Hw%q9QhZPy+rtPak_ zusK^rQiD1mA>^fAbs9HFzr_6a*7orF{Mx=JBTo5WFA;=#0A7*;*>a{v?eh24EOdgr zj_nJ4ue>h*j!}9|MBlZ10}g<8@`d|TAFuS~A?Vzj6mqmf0mP2! z!qww(8J@kCg4?8+=lxuiaUGzkBb1$APdIl;r=e`#IMTGB8l2E^Io0sU=mboJj3_BO z?D9d>R056RY{D0IP_fRf)h$c0dtXYyPV<(r(M22SW1JWIGf_yKm2A7B%ri{(^u<~- z=K^)ub}HSzZ%zayePllx>v%Ek-vI20US3ES?RE=%hRspUjR~CuFpR06FFV@kc0#A^ z%u#?}(&2+I`}{vvSF>C8M|ukQ2}*rmLZrCFGn%Sh%PSn8uWqB|sg@@P)Kc_5Fp!;! z#6#UjDZ&fB^#st_HRxd<pM4nSW;$*9^UR5q*S;^0}dA@cbQufPUC^4<3buwmv z*q@P`+tR{%Z}yAH-NM^7#kBXk?PbL$#NrI*qo|CC?xYe$hOgM6iHGI0g*dVn37=qs zTKjeDoAZRj%Cp85r-d`EIxww$PF(a`Ti+^)VX@Vr+qooxD79x=(I?=HjF!`*2_Y0fNmR zKJUJB;b+#wOUKF?w2_#_l>Wt}V?;;E`-6E7t%c6}vR7Pv)u(kaNbcIv-EGEZBQca;FV2y`=8SkQtFqX0pD1Q2>zP-3DCSmj%NghB$N%pg*@BXLI%>niBqp7chznzq9 zgxd#dw3O{Fm4!d|78f48)kN1E#XGYMKs`8-23l1VLa|gxhqJo75T0(m#g{Qnm1$Gm zw4M9@jC;kDH=0e6Ved!tGgtqx4@ft4mK`4YM>kl2)2aCMv@OJxfA_TF7dg9DP(ft7 zaqZ}QksmjDzzw#^7WE9KeGDvP8mWkTg4?2I8K#c5s!YMVlU93j0=Pb?Mm34Kmu-$2 za?4DHfn%p)E<=G|$>khHR0>qd z?oz3sxE{5Z2yb5>onu0+}ufDIVSdiR&Gf$upT>~@Z!0-O5LxCXy8`L&4m{Wdxg|3 z*{H2U-YW79R$)A9)e+f;_V23Yl;7$K=xVy5LSZp5BY(+$=pzBb#hwS>1-j5LyT=7F zk%fzT8Jij^WD4C-XY7-=7D8cjF&Pf!7ym;8PcaP!7#Y|wT5>?r%)1L$etGgvML)%6 zV^r;pz{(XB(Cb>OkTeZ@=7d_Y+cQ z`tG`F*Y6mGz1sBBQmFj?19R4F{TD?z7;ZkKM%oW;o?YF0JnhYw%vU$sFRQN*6hs+v z$j$|wQ4g+M2+qzEB_X?~E_3eWEK5M^iu?{q%gz!pw7pZ{&U7Ic$8v8AKWeHh`Du3&C{c%RM-DR8rj?^l!gbSr`c5GeLWrr^|GwZ$}tatlOp1m!DUE1vY?G=91)5hFHX59Qpd&MR==5k{Mw#BVzygq#GMyiH^4UOpVMiThqupgFR zqk_zT|A@=lO}=Rrw6{I^E$J4E4uS$B<~OC8Dq4y^t7cA1`?FNiGyC9HY^Z7bu-A>l zs2=HKS2c;Io!b>AdEN4R)Is7koE~4AKBF%_@diAoJ_sU?#n!6e{FR~$%E^wZ&pOd- z`VZWDL__`rx2wt`TC0+CsNnKAh1p>}+$VZ{odi=z?0%%Nk)aYF3hue0MZJxF(n(A& zL=j=x@bSsun?pHHcXSDyEl)A)^{}3D;}#>VHSk)qd4L#WYy+FWmM_7 z4m^6S1yB#erD4T?`X8Dwo&gpH?!uvn1Vk{SAta%AO(4;Jd(X7BNvdj4B_P$RB4Qjx z|MSXZPh~+p6Q=XH-phzz+2j63w~tU>=;S1QXm2}6O}A|6`i{z_(d;jMEN{>$#7{U8 zz9g@2U?c#GQGLe*VWZPJ%y&Zo1eu2!r?_$NA#HfS+WxRw5CSx}2-iHTTF=i_F~IKL zBiYP4+)AQPv_4_(*ql%;SyFQ@6; z1Z1ZmAx!ibHN&)l|sfKBxTU-ASz5AP;tr(awN+x+qoKE`c z6){@rln-2@Jev^b*dgsco!#vvf8KhN5rJ4Ot3POFt_xr~0rKa!uH2>u-n*6_Be-$| zE!N$-UAQyXs`(GFhJ4LZc}!r~X9*Eq({TbPV)|7{i!fqC&W;GTm~ib};H3I5zgTMx zqz`Ko>rsO`J8z<+_6p;CPKU=i-pSe8FAT=lBGrD`C)w?fIFOyGhcV_Pa!j=wbe%h4 ztcs^thKHz(o)Yj+%hz?y{?eYO)M)MS_-z|*-o$6wLpFaKp`zc@B9KxckV?R#(FSPj zNPb2n9T5t-zu>yy9368QZs3RCg|>+`l?MsEL$q$`k_;^pVi&q;rH@E@qiOoaaKUXf z3>o8?hv5Va^Gx8!jv898wSg=vlm+{-AHCQBrQ!4+HwJ3cP=R!FRUOvmyV*9j> zIr|x@Ga%Cg!8h=QZ(KO#3z{u)&{a=^H+uDRZ=3Wn?Vkff?J(1iLg**{hiEAchpa1Q zm;cqtLU2KO58#SuWsd!v1;bj^225hF>&#%3W7JaAyYClKXk8OoXrK6^>3MlK! zr3(7*^H7kj#gC2%O&`T%Q`f}4i#hth(-onTwkZ0oV~p3v;r>m z;pvAU$BQhiWv>Ue-(NTWL_xeu@;OYli1-xAJqbcAQ$qq$lhH`?<~)_1Z4MA(T*<<( zZqIpFz~~&_wm48vK!lQOqQTnfI%oI{DkH`|U#|t3sjDs6{{2KsEk1Kc0Y!w?a;zNv zxLJ)PRSr0ZFuZ*4gm%!qi0OS}fY<dAvb_u`8Nb)8?c>H$h^9nVNk?rf6A(X1AqPrGAI?{2`u9IMP{$qTMw z+7Zoz4_S}ueoGpMr}Gd6lX0f{Nw{%sOsoZyCmi{skI~jsBjCt?#{>?DAaHUOb4YX# zhXd~g$G!TTmRD>rnV$EkR{*nU)mYf*`N{%>u}x>!e+&+8L6!nT z%66xF3h}D)*`0@T_aYo!U)|BMb0PGv(d^BUR|e4<+Ihv93*iA>!ZjxX_+**jSEDI? z8)Nc!CO+e6+rL&m48H7?`Uw%=3vtNv&$jQJA@Q`!1nFG1k?pUhFY$cTOr5`T-;!Gi zY!PB_RwO^RR3_yhxq{)hB_gY|`8QBG5XWY0Fm?<8C;u^#_lt2Ryc1TQOYeSeoA&>X z0h#Lkw4#~aFDPtVF2^#2VtQ20FusZaAb}@HC6tsM?dOEJ*KxPL`VRINhg-wiD)dSW zAGV#Isja5(RrL8epemsL$=ohRtoCK8?Zs{7cYc2weN*eM9xIhkoID)y@&Bee2QY6) z|MO9%hP1Zq8~Y24&+?3Yk5c$^1hT|a7g@6>^#K)D!igR9FTclhn@Z?EM$Wbu!fuEd zk3@h~Kw4MKUrdRuQLT9Y=?1j3$B@ly?eRkqvqfo0Q9B8Dt4d3@*7FvPS=t>&DF(4Z zU&tW7BZ}Gn1Hu1Yo)Ffh2pi0(y(e`v>%R*;)4}gJ{A~)H6RR_~Fw0<7l7HZ0J*yWu z@vWL~y*&2$J14f(FfE_mEl$O``W&F?JOhrsw_+$M#NeO|IzVB$tjx6)Xowr#701dG7Z zmykaokQ+73Kb7s$xW2P-bIJve>IO_I*YszbMIiq?Q8Jk%9sL}2zU5>;rU{*dK>x%jYQE^=<3^wgdEaFK7= zLL<&AF3x-L{d5`1SQD8{9hsZb`=|hTZspFxM>P2dJ!}0fqghokvG%2FS?R{%I`(_w z-*nt{@Wz#-QJ)f>&skrRO|C29KjY7YuY1Gs<{-_89F#8IKC@6=g};ZlWfG7B@g=V) z$y_^0ci{`7dS;V9_bV^*H$eg=VS|-q>wSaKg`vgIo%d3|%CpwS>gk0?j9!p-eO3@|sBGbIx5D^$;&)EG9`)R6cmui2~ z9XP5sp_Ae_(~rS_CbmibLqt4T4r~6!bIasx3`rLqij)d!0D40Tp7m>9(W9wK|J;?> zbruyA%`?Uck9cGOwS2wzx!|!>43&HmGEgOOQ|}%5>NPoJcsge#A6nQ4?quX$AXoH5 z3q^_`{&p}0SsuKu4gC!ad?e|K^Av@Tp+`D#X_!-QdbS|Xv5<(d#^B~jq7tsIOxEqg z9{LE~ma1Gol}xho4g7xh9~A=aO!hYa4I+4({*dV9?YJIywt*vMYxG6`K+`Nslxpyig*4 z)sa+lJLM0kmh`=Y4H*{;FkEd1rZKX+*NzBI!9!&bxp*y>Z90l1369fNZW#ZH>NgYN zFh7%QWexf*Hy#u9EA~(EZ_F4hQUN%?X{z`T=!O_TX0D}Rit$%!&Gjvj)VrsR1@V5F z61vrxIDaJZ7fl8Y2g?zttTyUnIIr(3&7KYe8A;P2oM_ph*^Bk+e45iYsKaYeVc5Vx z#~)ki>S|w&$H>l-%@~+30!l0gd}AT2#1IB+;P$xXGh@-|`s5h|G^D`#CX(1Jp-yvu zcjZe7=cU`cX=K48{@Wynz(K6cGTLL0FR+H;riNtUYb25#sYtPf^I%tK%mt=(f>CoS zM#e|rl*2++Pw!Szl@-tJ0gqTj4NJL!{DrRTGer2=ph+Rvdwgqra|$Oj!f5vbgzX z`tMYDIA3;Tl(d3SNGgkIU3;{=PIbEoHywg7T0DV=>qXZo;0acsSgSUT1WUE3NHGd2 z?7EVQtOwqg1|Z2m+3!N{e#Z~aWmeUs>fXNFip<~vRWMks-46o>I&uE)*Ep0_95_ll znD}`#Zh<-w1+|6t&36kmAw|$i+j?mCQljaZ1_Emf=49?4l7K#810iCoHNQAUkvE_N z`Z{4}z5}KKrsDbwBc2|QI}+9|zgB@KX^!M1uIsM?qmtVe)u-kwM0NT1aX7|s%v#)9 zA6?lIzWasMLo3-V_XVNmVi!4kw->l=TSk|+Ad%>rSHI$(qky}g7zsH3e#Qz&`!`h_bL*NF2Vu)7RfO*DOuC+3<2t}`oao%&4%qg(KW;*WE zO5ct^a}|padTA*MFJqCz+Nyw}stgvmjTJ)eO9PU>s=Kw*zUdNzP=CLA93L3FuQO?qRV#YF3XG*Tu-^WPcgI>&PY5GIWTy`sHLeHHUFJL4?d#km6CP z$Md|Di`(=o#X(iI`NxsK6<2@;e5ak!R0VO?w9qG)1M)z%E}waP!(g4^8+9hwUp=603m%VcfoFN@=b2y>il;ug%(p=t z@sa7N_s6Uv8~>w4o7aUZ5{vg&nSSS`T{!5-$LuY*-104n^>kn+`~2p+Y_c+ORY%>f z--PMHN4@EuvI*T*gVyJz-hu*t#>|1hA0Bk0CE^gQ{H@x*jdI^X8kpBHtFRW^IH00~y?keBN8; zO@`Q+>1lkdpf3yw+>q?=ebicf&wrTorTF(Ffjb06IG?%jgMUfs-t2ojahTGsGv+Ej zvvdN7SzX&~WfinTv_J0?_;zQ5TH0Ubc?cKqJmjwfd{U+^qavS;!FcS-39F!Fq-|K&z11D*@ry@Y4HAC!$OklSz@=z}d#+A%i>9%9FX6k#Yf zj^0b%YyVeuxGs=WcUC&bp1(KpvUMLef9MD*bqyJs2vc;&ckB4CWkMH@)rqsz!(EP-XP z94C=L$1q5=TO2@e{noae=g?m+0^4p9PA9YJOh+HG*@~x@xc_6&6>QHhQpuOpqNc3d zu`f@XTDjQ3Q*t!G@8x_c$HYOokI3}Ce3!i67j^XACvS&$|B>Oho85w?JP5x1Sb zdw^-rL)hs-Xa&1dzJWXHTe=AX>rB9W!yd+(Wk=?1vrQ56MBx0d5=GTub4QH+ z86h0PkedfJlNQ*+{Yu<^D{7je)q?kglbgRB2_~7BF63-(oTL5Bser^_OC)c&7S-2Z z$sOk@wW#QyU8jyvn0T|NZI^b-njRL|fTl4%JbA)cFmJil#>@~a?df0XQ4_+TC%AN(j z9JZJoS-NL24l!_v5Y6SrtHJ{NCSw6dW5BwD9Vp>|h~b|o36E2UV>4;3!}Iu_N0+|u zSAJPgmKB1lAtdM>$HgT0aFop<8Abvjltq3Y7Eg=@pY1PiGUzAz*hdYXQ>_T!t( zPHSWM^t9N#8;QKtx7ZM`jum&nIuo*NoIr}+Imyz$;ZSBek>B$Rg{g`!l$Rx(I3Tp} z8sKhi8w)KIM*5$E^Bukera~g7=iFv>?%1-VXv*DD8GQs0TlVA1QXWe_83N!5E4#K< zc{S?+XDWWa>H5bJlnG;8ViqWX52tHEW2MTQNQ-}!0H1Ts3pC$SauY-jd57n-E(@XF z`Xu!iLlJB#2G!ngk`_;%7fp<>;lqVWE&eglF zNa?(peg2!o6;y!Bu%Vh@f@{Vh%1L8)?#t6+Sj8TYyvD8M2?B~}*k0g7#+q3+nej@qFouhyy^xr9Zvsva&DvKVj871hBRV5U8)%o&qxj zUTD$we?1g=8Mh`Ci6fZ^uU9FTVwEf3tHk4>{$x;K6T&Isgg#L5l`FI{t<7ukej}~H zB@qsIQ6ju`4W|41V0oBAZq-@W@rbJTECpZewg|48Y*B0krgh(*)AB|oT zy1Z&oQ4FTcp`Fj$!96hv2N1qy{`uxkxgmk~bJs8+M6?PY)-AB z2q{`24TR+R1Ok#E{z^u_E!s|V`y^EVW9#K;X5t^}klWf0)sqxdE{JVCU3Q+{@c9mX zVXoauZ@0Y=LuZQ)ZNqdcYZvv~pR)_umd|7cwVp`4%Z=Y1YHPkb=NanHZgh0hL0csk zWqjON2#NhgUD3E5QSx=DsrY5Jm06sib%H29-q>pO`L~@nF|b=348|cM*y}J{RVbev z&r={JG87<+C6(SLeZzMI4xC>Pzgtxp8VR12#fWr)&>!Ba#5(oP@wgEsu(~i+-L(1I z=XEhR0Y~N)VUi0Nyznw(8&M_6+ zQ^8w5YbedwP>?R2)Xuv%wLh?T>U9<+`5K`=`I&RK*j)wNdAG4r zv9pMom@GkA+A;(LwXf6Z_j@Y29j>qbp!-tuq*czzmn|9)QDmDaX1V>sEW)!u*dTQ0 z_9}TExQ|HEDf{qlY7OiMU`25v^m-%BEg~9iSNo)D?eL|`4<{S}^hGg=l{Jrvm8yIK z-8aVPA+F>rx_LY?Q*=i~S3?o(Ea*D`$QhhW*!rl{@HVkL1!k-n4m|?l&_kKT%?uUi z`@)xOC511p?1!t+f71b{X6s>&b9RG=Y1hcUf^)F7L~swz47<|xY(5QSMGKOPbBXYUK2)yM%1(rtFsrhfyPA_UqDXhMu^ zC?@y3KgN3$GMX`<@;s+2+)&hY` zMs4|WXvJC+4F-}q?6w8Y7R>E@n*6u33K7|^iY`OASnQ6VKb94${AK*)`^QmG>lRJf z8><`uh%WMbNf@n(+as(p4`h29wwuw4-Hb1sx4;!T))zfh))bu%5p#O+sT4stZKEJm znCMu=8q&~v28oHgdn-fm)S=62bZ(_8A=-R#XlVp8T1Mouh4ul0fO#W%YUNyaJPqSi z)DbR8?P}9La{wi3$$;_ulZm^oY=8DVY}lkePMl^T*|i@h z^pYxFcnMuWI^0YyHgNX=oYRZw3a;o=Uu{m=pPQu9Ef4V=$Mtc;>S^Y#L+Wp0@=*6L zpDFyH1j?4ozmtLvA7VlCd*(!>;o7KQ-f8_p)`0(#RHPM zveR)@)fH`%Czl7&0p8ed1~oB(RzlAGk2zqR%DeyNlLSB<&~GcN0}ed?7#80Bbi@RI zV>#`N*Ar%&VkiNtZp?@cE7mDUfWs-xSpVo2Is35a?+WAEtRAi{A$uM&bfu*5Vs7H> zAAP5gn~|!>Kg7Hm_6K6XEkf4`@NyadrCWWBIV%QvfKcAIs~;>7rT^PWKXJpXpYAp_ z`cVf*+1vEMg_I??!dVIa7&U5%Wnv934KEcOj(m;OL39(VVDWeWMV@^ih>yN)06WDh z;yMVdi|!?UlL4pqDJT}82{cO#U%nqGrlZ<=PqNbN{Ck7AFWph{`brqZapIDvyeIN* z0{Np{Io*r%=uxuQJUNMt06YVs`TfeMBlU=a*Sr(eo?aP`x=O{}tPxExBw zXDSu#&` z-cw;>>jWJg(S}J;_8%bu7cfmy;ZOFW%zbbDdbm7zNCe~aJI-5oWB|e{~9v1J2wsX`=pB=u24f$)Us}>I3xki;t*#fwMWJ$>HTm^-Ql^V8HBeEoA7d5D_XOf5k9z+(K-3yD?}m%FA|e_MS`g6 z<#a#!_&CoR&QoaAvy$Ikgr|hpYxP(OM`|o_J++d%G{|(OO)?V@9Ly?q&nvor`|X$b zx6*JlB|J&6d+f4P)h&OQ2-Vcp{OUQGcQ==C>(##dSJeKDzuDqSlfFFD=FeKY7Me9a zaY;%Ol|mN$zU`FLH4bbkot3ewg2v-WG*ZctcBBYx1enEO%)1DVj$*x$5>cdjE(vs z=A0f8<9{SdL-&}S6i|g1HQSO9Jyh~9YX9oC{%KBU&1UV<*R&N@!0cFx^`OP>$d(Yq z?z}i%r`X_8Rb%^LX?T(y`Sa%`Q3-$T73W+ph*T@2eN7U~IG)-l(c062Z|+nmDT8N6 z!iGB!J-Dels|8X3!RPmMjmCl6Vc3N|Qk&`VrQJElqFdwEjoous(V;uOF@L zI{f^Drleg}rzqHvT0q$f;ahM3;CTRG5PiZz6w{JH1r)vQoJQ00!nD2Di^a;H#=i?e zZ0!0g;JMMk2ipytkLD`Z%6^;H;4vx?5xICb;btfjO*frQ7Ew^1yQp=>20 zSZt*vI$EUKdz$f512u%Pxy6ZF_nSpZ_GE4Zeg#?p4zPVDo#g!;S}5Z!{PpKc>Z3Uv zH~A@ouIL}|V;%rJDOFQz;ycO4!bXw8mYS6@h*qiqm!Y-(pA7?w4#LivoLAo%z|hCu zO+Yl|T%kPM%#}!bT@6cGKIRC&uiwM0aohOMM04Up^vUDT15;u&7XFL=*)rd_qTgtR zC}9htRs0d+*Y%#Mnm?rdh^&y*#8n_ekZp+~8uK_kw>=({f=C=Fddo2rRC`s>T9ndN zLVulh#&UxK?+>D%db&Jag#TY+#CwsVaOehud^z@$ZqI|6I)o_vm(z04*`UT-I9v%} zAJGC*-_X3#WrL|l%U8Wd^X1Pv$c&bL-`EY`^$4ei(64$TDIXJ;;6&a>7lbx<(qDa7 zvgSv3xqfMYRAWq^y{Y2VzpYUf>oTxRAmer7Ds8;TA(_f2>C4i*F#humV2oq?g^Y|*Eb zTeZSxWz2vBB9K{cAe){muV?FNL94E>vFr~kHG6~353+U7xzBI`jJpfkE;iMCBxJetgO@f_^khGhd{8IEh+H3 z^tQF_UW%8j@MtWf{wl({ESG%?zu}{&n!DOQrLWc`9zmsfnUncI$jypuuD>;YQozX{ zazg$fA;coeR)U9qzf$$5;d+$L4P=NT>QC=*KJo;#_bTELU>ChVYbPO7x(-H2hPMRX zdq!;ZOl|l|JARgD0G`z890rhX+mDwQgpUEIE_cG~Y@M&MZ}_eG8(1m!39J-@)NIur z(rfYt>*I+z7E#Vj+dQ6z<(Z#;Y;fC^L=}sVrDAfKKY_frs{fF zUncsRch*M4nc8BWMMaZe%lidj8Pi9?zvx=QXxm>l6B?7o;RcSsu6+cD1RFti&Ikj* z38oLPA2d{D4$OVFsnCpgic)U!zq_^Z3fa4iwGZp@DU17>=!tT^_kZB_*EQ5va5j2W zGsr|)@PT?|zus$*r1@1MDBz>~2fkmf+D z{PB^3AqBw%9T*0H^fGWo*ikzzyvr;#5S``WN$Eo2ZQQR&smmaRKMS2sstiFEd$la| z5e)|TkrmrZX-$4!=FPohoyqFw zaLr-AdQQ*(hp4j*sHzLM^`b#SLb|)8r8}fMr9o0!y4fP#64D{vNOvgR-QC^Ycln)j z@BPWYwKsFmHQzCwG3LcCftSXM^s)Oe&0}!lR$$a{+R}OdRy(Y~JHkRV!LyPF!VN&6 zDc{p{9~X*yr8&!1|9BRPzVb)ZGvQ2ax`%_liuoV%gQX5|Iu%mlh#2}^%CNbi;E0=- z5YuuBpEw&YDYeWD$D0HS)OtIPlzh+6VDKzwu>sm6g2HGLO1>-e=rjqYi zCSlb$odinpphA+rjLlvC;2`;&#$EB8zr;0uUp;!TTF3@J^5Ue|G4hX^sT#*=IXL|5$MG?uD0A~fUB_`s_2x7x!V-2O0RLtrH6o4OtbAI)4 zl=b@$a?-D-#=9dn(wwV|A#}L02{gzeMHRgi-_wYs#k||9-iGyNhBz+qrJMgV;Mm~r ztM?`1PpE&?4d!dM67iEMOKP{YL?clM?+4U|Jmty)2^UG219bdf{>_YE6rVmUKfRY& zYZ|USZOxWgx#ryXBNKRy3{0srt+WgiR7qP*^#m)vNxIzrQI~n6=|-f-O>cC<4jiCs z1mqW=Plgl`3m+xnRCPF;P1@h$6CsSm>}I|??vcus!?W^b=W|Iuw^LfI1q;8B*WFmg zq-U4eM+@vXm1go%5*4>a^A@XrI(FXBH8cr0JLQZth8UAAjej3W{})!H7LnD{fb}DX z&+oMK`+?Gb;rIl*+rP;IGDfcNZ6xAR_i1`ld)sReG*dDnj@{^ThJgLsv<{C^d7(fnLX9yO=q@Wlxg2AgmgGY+mxI%mDBld z)~+&%LCe8$qsVndFEWbNjHs}78Uwq)Es84PW-E*hP=6lhp4jiI2faA>Va^ES7PhUx zW+Ks%<3H{kL5U*OBAx?lQBcd#?r(eP{EGLl-k1Id9 z3B)9pjpfdc4^F3KzQKCah1{gi-<;$LTf1ld!d@t*+D*7wcJ1U0E*z@8dH9-IjE)E` zfliyj9^n?0*Zcf|l2Akd!>r%eDK6Znuq1$_yL#_K3bmow1rN zEZ0g*dBF5-mWTM^E`(!|(I3h?ZS3lbvAwu)EjyLqWd7DpIp|M$*xjm~aRa=F3^BA= z0xF8c20aEE%w17<1-X8fKd%qjIO%AbB237h6HtLfo-S59OtwnLl1<<#ZGCBq( zzmdka#9UfYi`COl)#(<;yrcb4d(c0}d`p=lFOzbNtSWj1A7FX7Gt&$685;8T(eg3y zDA@wfoHYm04Gh~2Jhy^8RKwN}!ZrrF`_1xp^Cy;>U##L{kHf>q)S@?iI>B~;ythvGKezVq#r$nhc z53AmzXm~Z!H<#WK~{V7g

haoHA-9Bvl|q8&-90Lm9sYHnbZz@9+@ex(9b{BWXYSTVe%ePt z&k2${%oHMhJqXGQj(iaaQYxIKe=y(>sN(h?7mr1^&lo-~E*=Np9+!Vf4Qi<<*XCmQT!0?j12=d6 zM|u}(I+DqQ-0JZgX-ZmIQ*jKxe|?d{x948y-p;*u!{iYM;EvN|C#$#0%NQ{9bpZ>>Yj9AN4 zmFstj`I`D~*1;{2Ws5!9e1L8c<6cJSm@i)CCrlCVq(j?}f5%$#@Gy2Jp{8_b5y#$N zd$q(n=Wz@b`p)-#KPj;d1AX@9vh{iDi(9Sg(o2P@iwJvdb3#+opmyccHWsF6+1rZlJw!b{^6$OvRWkPyLp{|BX$yI~>~r*1iY-Pbw4u68ZV3Yg@zySyAUw@U*Df(`ZN@U{ zeqCA}g+^y_Qv)qNe4M_}0(1KfAFY!BQbVsN4cnK_ZQ>7H>^>^)7nA%c?KU)pI5YfF zC)((q9-S;Ui3$VAej0nzf9i*gTXSKiR{gPL|5yr`djw9K}IsGyY+Pjs|zFC&0iQ1}& zBPAut0;y%_k55hW)}HJ$_PtVJg{fp3Z7PZ@@b|T1KQgC|g#d8beoqI}K~R8G=ykt) zL&z=o$4G!5*D-N@IsN5^uwGFzmQOl0gXNL^D3ZRx)0gMNsJobeuc9eq3JjL!B0zf` zQ~87Vmc^xbSLN+VQEQz^ywBq&AbN#+BLqWa8-!5@_y+G!MM=jDr;WVD0!)l^xFsGR z-8l+yUvN%r67b>spaq-5*`~hVH!V*U7oh%ra`XmA{xnGO(o)h#--+UtQ7}fwVfj@_ zK;aeiS{TJMqgn*_vV2atk#7z7U20C^%z`rO59kZC6>2_=q8a3ZOwn^L&9FAQWtkgE z?Q{`^l;K3n&X`Bpt1_{QQqj2O!fLY1WVu+pZZx-YHUvUR^lFmy?Ry}<(?SQf30@E{ z@&C%d;KwRstI*BO`0Z=Tdx8a`zd}}qk2jcdgF7F? zQTl_Ym6a&gIG`dLa_;L5@sC&a+vOxk!W@Xwu-19--tN;=lL=}Nzf)4j-*E4;L58I_8V?aN& ztsqeFjDHkHAx8(-5c$|@Zz^xA(f?-6mi@!nxuHK%Ph5Cwzz;sy3@;Q@axkUNDhaDQ zm9xn#F9~f+$9WDN=~OzsPC=^q7E*k<55#>AmgsiBy+*m%v{`<+H1xgmSXStvOesPZ zMw0fr@Dsgl^^Rh=wu~8|#|`@h#1M!XWd%4=a)0@p6leTD-6-M#AR~HGR!Hv~7{*@V zgnbIW0Iog`mL&j=<@C^wI2%z+yYMgjv3&daiXB7ZF3 zRIu2qz4SkM7-nDG$dsISOGC=qcpDTw3pLjWVrZK6v-3dV-{+bVV(^SJx6;RISNPNl z{2N(~o1|hGu9iVFS?8*_0|f9mNB}VXEk5SkTS1o4&IAiN8yxZCrGGe4L0jix5u#d$ zraxE5Eg)|#?w%jRz$D3Twl(b-blW@0B>G~RSw@WY=%V}=^KxSkb<-a2@zw(rb5o>C zzMS8_u%kFp9DO~d{yx8{C_w|Di>cr2Jgd#w3FJ=1!1tMvdoAo0wex&fF@mU9UAzI9 z00zm^+OHpiK|1)O2HW=0ls->^tf0rs$xqvbYBy^)I2{`KDrQuNj1tfFMnO|NUX?k` z7Z$Rz+WD^yMbpOCtN1T{x4j?EExs3`K{G-6{LDQsstHL7t&|h(foSH6UKlzNi`Hwb6}Qw*SbE3JP*^4Z%#b%^oCeUjVKDm_ZtYxL zeBfh}t>HtLql?50>XwxB5E4MOU^dVG+!^Hr6-EF6iwz`n478x&CgX3StNjXPnsUB$ zZIrwa-LBLOO1<1k+jbpula#;Oocgid?smH)rCJXC)I83u9GqH(=i;$8CxQWUmHj4Z zi{YcT%p+>Z2v2lSXaT*pNqZHM97D=MW$c}Z6zm`czrucQ8)H?D>*_Qh=FxIED?{C} zl^WEiha3N3z#mu_ z=noDaIO;%UWxcY`*812OL*%JQ~ zXWajO75`Hcu`_j?E_CXq)Ur~ynP!p@M~UNIKz>XRx4sht=J;ylDD5=@zku36M|qtB zH@w;$K3mlCDwF+#U~$-&&6c{?o)(;kE|WOB^7}~5W1GF`XEHHDjHvque^V_}ynNi> z8sDACZ?_8H~;(KL5v zd5q^AGLwZfeg_3PPu%Nmx3p?$H~!o^YXMyd7o+;_#v2L(uBFHx_$*sa~V9>TVRoJ|Xo*4ddvZWR!>S1}zo5dD};FGN7w4bvxU^VE88 zdu=94|G_+$^PpE{g#Y!adur@{ld<+e3Rl_18NH}8zoxC`^!>yKrAj~UTepSjVDhun zfE~_TCMyqS%S`h+cp#rgGo@?T!#ppX;a@8F+`1Cn5EcJ*77$U$2C4ghRW%%@_nK5X zE5K6WqxzZwCzIA}f+3dM5CIClbD+7?WSzq%g85j`_EtolMTo1aqV6qyXks83(*QX~ z9I$x;N_j)Bg0o@RZ+=(Li?~J@b{;{aqRK*8cf{IMT zYk#yiCtE>YKKB_WCIPd$X-uP~0DSU2tMvbfQhQ$u z7XwzInwO!oty6x9$8VCsW#Xs`ahljh;mUzhwV^w|3B0CvRXDPl@5CO{IR;+fi+VD~ ze?^D#yjiPzJa96fAziq_1ok#}HxlnQ-M>t3Nv^}dfthrJqW-_rs=_~pzLRFk%TI_diZtVx6W#MTDGjSHmRUt0*G_C z&9hu|O&}}A;T&^VAr6wqwLN8^<8nl%-|0J?RzNcIsU&U0F5Y@&Jl7s%aYIGbiP5*h!})qdODo{i6?4!8cy zh(W(ig*{#|WmW%2p#hovpK{YbYCzF$4;uqRZ#zD< z&CQBCk}Ry&ZT{3c#2t-Pj2ohl0x^AVC9w9dyl#A6MEj71KM?jPhTie96xN7a&+?985d0#peJK6C{q0Zo zui}2~TQJgU1%!M(GN=ZvJ+`v-I&ClxvGqd#*@xywaeLZ6ll}*VVg#^QZF88lfS-zo zKTivNd>1gl5);@yt#$vTD*$wo?dkDZ782OIBX-O~Cm{o{-cUwzM+S>n1s~WtSqs1{ zyCw`E9(pZI?AEtTnCdVt`$GIxkQe%SL z0(LO8yKgz@a^&xuKZto+*~PS{LT!aTQI-47h==Tn0<(zZBiFq*^9&p)7-GM*7Ey8b zP&%)${?{syEmdOpaT@??5$7p5R6-c{E&)50eO=?mY@7A8kv?Yet@&;J+K-)c zkT08L5`~gwW%YGBodw|!GZq@8`a)$l^#EipL5$&2*G}+7T|G}@`ptNWWBm{yif<+< z?T0G~;QYwkN+csNi^uM>?!&p4F|WUM1Z;96sx)47NGJLKr|~WV;os2|(yg_z59-QJ zAQ%NFA|p1%RIf4hv`LJpg7H{kvPh)T#Q1ua`a1K<+;C#V3D+Gl7D)-pKBUF`H(JBU zoSc8@^S}48{n|{OwR<<3fr~%m+Vc}aEqoNx?|#0N>6lT<-!KIWoMYU61pQ?bXds)! zCBPgWowrFJpie2AW7#x=(hnQ?j9LCugV6NbTYBgfS_&PU9Z-@cVJe{zrcY%baD=kS zMObaZQS5yq?W&d6Spi3sKpOvBBR>%Z#C`ucY6l=n$WHZRoAbP;IB5ckio1{k6=8aE z2ac1p*s2~b+7fS_tX5J4T>d<5O&(nzQSWy-)6F!>oI2LV zHh{tZb#V^=q57$z`ClD`>910WdM1OFc6?R4X}%ws@y9o97_ocxp-;K-b~Y2UI`V*{ zgs*C1`XGJIks+Xu8uIwjSM}H6p{z=nvq$EWw&iE#KbrrUS@;1U$6nW>fBDo-aKwpS zu3@70dor=&SbvY-RI{$`ZFg99`Ji@3r8JT+mqy_ua&CJ;|Q3op4olRX*-@C zBZ4iV95}6|Ht^DY9vIDc_ICXf)X{txmXnd{5x8?dY*P7WwRz|YdVo$29{rT5i75`* zh#KQBS;Pp}H@9#qc31qSX=#6{O9?O%!Irb6( z^?=Ha-T)nAfN4=>!=f$T>;DGRS{(i{VkBK&0#qg9u0GA=M@4mJEPC3NW1*=9wRWz) z+8pF3X*IxO#xz)#qJ<>!oI} zS8EQ<`~1O4WX}G%wLQXO3$bQGaA{dF_Dxnv0W~$?3q_X9J}{x5vuL*keFv}V{9*v) zQ3&_ToW=LFWq=SEMEJ={k| zX~SfKnS?WKf4d%8@y_@bGWC(XT;xI;6G5$xHVrLxIY0Azrch)4i&n*mjbD8y()39~ z0)>}-qsBQerL)bP#mkCe4~L_vPN;tYU55Pt9S4LCMogkJ{7g^?jTJN~R0rs)*S9c- z%HIK3aAY@43u;@I_24I#C0~YJf;MU_pyM%%Pls6(7@#A}lepC{9t0q85`2g%3>R_l zFCroRi~d`L2%Ki|&%m0C^td0u+OLGbpkEhf6{w+u|qr?mf)8t%! z)yb<+jgc^ghLjI{Y#Oruxm)96QVOTKnfK>Gx=h4?(u)^A=p4*vB+mf(IVmy-nwGdL z)^5JFUiF9%FM5OA{MNdW7Pfv@Ueo>^KDLofF>?ol<{4#liX*3=R=P?vpF}p(-RYA^Q zhAs~2b8NT;3=2>$UPvSh1bc{^L2K(*L{(&b<4ZKU7(G+*kcB_`EtK-$BQCTN=rZ89 z-d%D-JL3fqQ`k2)bFTtq;p_u#2oC7!kpxcLn;fLCzJ?S6>2Bi* zF^cHg66y>A(qHj$-^$eVH2by?UZy16hthTH9}N;~Gm@oC55*`k{0p`@C*yH+^bSx} zj7mtHf4|%DH%{^eN$g3eq>nlo6_*es2-8Aa^Tnz2H?H@rhiNxTewN;LL7O(uA=1+? zbvr*B7nExu?uQCqT>>U+*(O>wI1+yFA0T_9>m!|%aRemIG+b3WAE9z&-UdrX_c#=C zg1C>|@tD;Dl#!TUm@CB;%-$Irl|$k3hdv5!cG6`SV@7-7q6O3+II1J(~(T?^h^pB0YT@6`m{&WhfJUIUVDp{y6@2tiGntOE+)>D zk^E!du&e|zdICDF)VA5z4CFN&2v_LS_H1*jAySrVMtuPLfQ?NBFDLkQzpV;P3!%^z zbp&#;RGOBi)&Icn3%A5FonTNL3{l}yXk}DGPi!_y*RRXcJbQmc9L!`P%O)c4G77W7 z6}h;?AKyPV0X5$)l>bT{!1}@+jsud6qRNt=FB_}7Jlq!P6)!#aKg{FJy@T1VZhVeW zZ!Lu;sr5b81#giHxNg2!9A5e%oOKbq%qkHso*!@57SFYlst(`X!&acCdA-FT5;OAC z+6?G$0R^BSg)FhCPraSu?}%chhu~tw!F@IZQ&fM44J=iZ$yMare;&Hl5)F}BfAlnl zAIC&AhDAqO_lEA6EML^f9&tpHq$GQzhow!QGekujPeA@LpAl*(JA(at@Uxue3`4l_ zt&c2%0|zV-fCENfyqflZyR1lygtTnCH4Xhh0oUz}()V1f(crg?sJYgM1R_*k;R}O5 z)82W!?o`|6-=awKN^9TIM^^*;idCwJY^IAtZ+9GHF3!uL9K@!Ux1WgsZ85})i~IlA zuA7S`5{8~Vwe146@0Pq~j28ZQ=qo_Odm&^tD2yvob@eV|q6-`&wYT_=Cp?C~yRSY* z|1Ll-y?y^saTAg3a!&j+R40qU0XDpZ~9}%Y#N$HTNEf8B4g!0e{kD zoNku?nxzTOBUsR=weI2T49UbETRho#KrDaGsBe;<4*5>bzt(lELdmieTju=#0$|d zXt`(vZ~#1nEE19AJ|)kt-ZnERNChqNGCs!~{>?Vo_MSDGfO};~p)z9S04cw3%Gf3$ z<^03v0NvO~^z8T$V83hkKt)Xv<%BL8c#djO0!u*H`r&`8)V(`T7w$|m2MdajRX=N{ z{tKRWP+$Cu9ziKLS~g+_t?GHF4&&uVqNO$Ecueel#NsRDl^5KgW!fKs{jofw@@$-5 z?X%9~gSf-_@{oC{v#PxR(L@juKugI*mtcX8F@DYj5yCwwfT>dE7AdwbpA)ZY#;weo zB|EbR{6q_~G|Ioee#`yVG(pBiHyb{ETKYX_6Vq7}n*pwCGsae$!v3BxUd6Tk5wC$%m3H1tu(zyMs4zYnYEtfjbA#C{jFHR@#i>%2E;M4o zufuMN@VMKxmB#RfYJZ}a3*MSe3%scO(EVQNWSu<_!=C7C{cjP@W1;3b;3ew%`6L zydQq`+JSJ7PigW*aiq8W(UhmqEeA9l1m2$R0(dY+x%Cf+|9q)z6Lg9mqhx$H4gCjGqFgh-9v({MYY4~L3>Q!~u5cx`?I!TMbCnqV}=G{MQusf_Mfp3N# zW9K@t?ZXVd3v&hjB%$mhy-EOH0KwZ3`|)|C8MP|wkGPzt2%Ilb4Y&0UqfkZ{KXd68 zvE|EuMhGqBU(xkcv$`lWsh9H8>uY>EXC2LYcXFX11GfZQS70)Qyly~Fs^z9DkCSUs zF0o?UDv&RG<7M~cf2l~E>bmR#&O0@Je)b*Y_P=?V~Wi#;W~!MFU#M-IvQ;S1;2rmg%{%n zyOX8ms%IK6nGLcQEqK~YKVfqA;2)zmdg1J=@=J%Z4QPw}P z7!tCPF2}zFf2|Kw7m=mYC?%BT70(e!bTmJ!fcLXTfkDH~1FXzHZto@|7n;%hyd7nw^;+;2iJu0>) zAmY8gDM=xfQLWsj%|2k@8~611+v5%e`Bu${ELP1Y6gRV0+$+o`w9ns}{mgWDUuI%E zrt@QsE?IgE{O8nxNL6M&rLKK|VT<9zGV?PP@-lcy#(S@0T=&6(7<=_9_;B1z<0Lf@ zfT#xEF{CTDvhDn=3+EUp3#$A1Ah@|=>r`_x_jp=S)qNejcoV?Mdr0D=QvbLzI&~?2 zvxgPkvnX7}V&`!Z47RJG9>2Rw9fh%<|FHhE$El>f-H4m-yGX=a(;pTA;dz@~x3d)i z4sD2Q;Ba%5pZEn$%=53VNXcp=T2vH2*?x$uv`hCU)F<*;)Wq%*DLsa4n}P`TtV@{| z?Uf+Z3C>zf^m8EpeohdX>_GHN2{B1eR%l`Cey6#VR;rvK=gCLbn#vPN?ET>eD-N_e zY>2XR#s1`1sqeIM5qezg_>SGU4eG%#4m1#Z?`5LCUi~6Dza`njGRFwHQ2G4Ct9l1+ zm_Blf@3{Q3hOf}bWWwhDyM+GlrIF$qAyWatA&yXB*w9f)V#la+n+7%}d--{Nk?T+Q z=E0Rv5(|-{%Q-S(QL)G~lz|~4 z&V+HFav9mI!_NLv#ghSB9~-Lboa!83`2r)&Ib==dv8Po2o|01ltaFTXx0n9$+_G)$ zLyX{&so7XMy~j`p9&|}_2N+9Q+(%I%h%kj{=mV7whSjl!R{M&}&K^TZzMhG+@Py!D zVoM=p&}HAr^wrlJmx43~+Dly18WsLQqhSy!@;oNHqMXFC+k_-qDD3xuYoxpMj~&dn zr?(&W9sAa37O)iabfVvzLe(S2dOtz1lrS?4)XuD-!x973?H#fiI#DRWw^py;^4SQ# zmkw7Z*^1g=884`HR)IC+o3X3%nb&{yN#AKG*uHGoY`R>olz7RwxbCrwCviPEA+lA7 z*F=VMpVS56-zfgz8#7^GMF2G4cj0~tVF8Tbpua!Z3J0a2_FyLl$aXXTir>8c*wUvf zWN2rVJH_ahK8Z+%*g*^U!jKIvJ7!b)hqY{fsq)$WC_;347P=Xl_L(K*;G=5F|*?(5|IM$XO3^jBfb z7_xnhqgyhQ)VLt(B{jP(PbJ%U%%6`na?zxZJzDxmWb?-|p|SKQ52T77qmw2ZrtU&B zFoCN5g;#!Z#6lzl*Np9$AmD?@Md@ua zUPAY!w})-HzA>`?dr9;AFOZ4ksSw2qoR>tKyvdpLo#B!m>j;l;61HMie4OxSDf>FHy1YyBJg{55dBg_ zoe`{{+w<00^Idu$_ADMH!nmP(Kl<}fNRj05AYnA2*W!=3zI);^BZ(XJ=F-;wC$L!f ziW>|vKG3W!(3?L`x0l`L`epP{Ls@ZOGjKzLC0`AomdwYu;ku@j4w^eHuUbm|B}DI*xk z;i4wRr<_RvPk-g$-o***xKEP3Rl-t4j^9#qhUXiO!BW#-(X#4a9J_xfvM-y;uO5|1 zf@rsA8u`b0S;Yw`Y~ z_iG4@$+UF;B#x$N^V_Vmzs(IqbkxA<5*aXkyeQYgp0oq<5I(5&o{Z_mm#%0m*9jl| zwsP#+O7XE>t>aXcKB_HlZ5lq9=eNcXaLwn*^@l~DNEQ;e3B0X2#Bam8% z-xx#x{+fOAU7=x6#U^jz%~*2Z7wDLioYHJBbfquoxl{?z9maqgbc1e2b(hvQhR4V? zFtaAXABvGh2zCAo{BDW{eFpUWxC7h?I!MX2GQZ9%qv%)7p|uLaYKW-i@*6_j!$1VN+8&eBw-6*5df$3Pwm28ZoZZkWMY(5e#xICsQ4<<=H0&DG<)ZV&=>GMc{ky$61oy@ zz8%FZr>H0h#Y>=1A0WMFJVE`Bk4Dsfm*G9ERHj?+r}6sJYvW5d2*AJszlpgNmp?34 zZQ5MN+SejkSPB>UnGB|_g`d`|Nj6B3>&m3oW zNck^|CVrxH%Y=TlR`$G>Tf>(tkOk0xa2HHSHE9 zgcz_z*#@LG!a!=HQGwicbXeExG0B4B#Y;YCCUAP8%(cLQOx@PuVET`-;<{y7wHCht zkzc(cAxZ9dm?n?R@sw4X)GyUqE?zMdJ`Jz%p9{L7=uEI;560ZSZ~hBo^7rq*F^QVHb5N>S?X^R{hKxik(!-=}eY6+@#!Ptc!#fD;M3vSz!?~ z#7BW6a-VSX3=#=&oHVgHJ{To6XgGv_Gxj(rE<_j5{Jx3Y+ZE;`3FNE8zWRYNS~sBq zR5mvm^>L%7m`ZU|-*AM{1=SL@99k>ln!NT_<{DGtY>s-F_O$+Ljjjt0a&Eu?iAIG9 z9-JTY?YF(O17r^P-!t{-f8`~RVn{0KYgdr}yx4$TOS{-VuwkEqiKRaL7fpIKIseql zaZfb#iN3g0DQYy2>k<#kLE87xblrS{qILO`5nJ)z~|%5dJ(m9)<56 zjsS~i5qSpmn2yL$thHzAN8i|wQA30MS=q{4wEATdhuejX6PA6@MeH)s67}J`DNJlb zF}8+=W-J4L#>|gMW7E;rq0fMd| zn*g`c<)tQm6*3MsEPYvx_HCD0b==jZ)AMP&Db^rQLG;R{aSaHY$}FkHBQB~e8uy4q z4%R`2CWA<$@;%s-!s>vH09Pfboq6Lbb*VgB3p0e|oI|0B{C-W6+X*|;yB%d5MT=Y; zQ1Z~@ET9mNn$-vhfld?g-n)eoi5Ev?FgqK+d+V{1STdO_UQ9Ddi|UWJ<7`4kz^ZYF z_gQ?^<&j7kUao6dyw#$l_|;K)wg6h@rC zNaubS#YZ;c;A5g8$=Sb%6oV6C0VswB@$$T#sA&V(#go@bzG}sa2&y0zHPeL`sP)3{ z^DvG_#KqXk?8l_gZ8ssFU;;nvI_1ca&hH%!L<}RZxv8Nk5COz16o{x=RHShbN0r|) z)x<^10{e`5bVPleLA?_HX^wEAo6Y@rQRbDwxiE9)PT{f(X?G^pmBZqU4^}iE?YhHb zx`I|DvT9%$&!f5N$736H(P^kPe$)@&&TXl=5**$iM-LKWEC3%K2qhXE>efM{UU(%>(GtB?UN$A&&mO^)xka-Tb;sL+s;>j;`NV~CGWH1?S6VUbBFjPI z+)=IA-C5|3i{N}yHlm&yOYZ|V)cJb|Sz_WQ;aj1iR{InJMXwOG0{XGi z5dWObEOAR`f@Ke>k%Ax`{{cfY5+(e^T(x;|U$LzP#4#<$>^+A1@|^P+hJm>;{ADgs zc%wFpPZppm_Jv=|1Xf_s$uHts1c{Y?Fma=<1b`c&LFK8l27ZWflEL4<&sQBy(&AE>s%*l8duFkNo}zAXt&B_%Rj zh~-aVEr1>`CqgZZ81D~;OAtQ#gI>d+!NC@Z?|UygnnE7m+)J9b6}P4~iC{Eo zg>U_a0D2#)iSR5bpbX(`kE6ym0AhJKcnnR~qC2z|j@Jn^R6Q3tAe#$jpc6Qp)Z|-` zzP2{7-mYwXoUA%O*LCX57+p@deC#!tO?u%AwUX4coqM?Xh3l_Cx#&9GqK)h1{8hp# zMl;Voda5hK>$lG?8g?2R#IsLw%4dCIsBbn-Hb;-%9JE-7;m-`yh>|N{9MC6f-Qgfk zrWp2dh9$5uWEY8xTYJy*fa;p7V9YUJSV5ce{->~!7e;ewYU;N3075u`GW{Rz$ zy{iNIY&&HQ65i#n=Y%uq5B-0zB^Lh=d=$BH=1C24D!>`SK7a;w1w|I+jxlUp z)u2^gJG{99&SsMP7`)=UVD<+vLU5*Vs->AN%^zP<;7uc(jk1cG8V1~AZPvS%&4z62rl($YM?OkH>f}A}M%a8liLC^S*p41OLRcrGV|68f zJl2{1g}O)@LPt+%G0LP97%`Frvx1ke(KS1%zC}R_}72Y>SQ8QUDx_udadr!AlZe39j zHjAkI=+*xC#3PG?Yq(9Euy^YQ-~(T|bE{%93gdSNMJ?2FWZebZ!#n}u#5k2m8xP~8 zKXUDoOZr_NxnA>9+FcbC*#s2DMDM4if+)p8WVQ7&*LV~CgdxFuztJfOnXzH@F!h)U z*j+_#AGiKjNVy`RgkXYj+rDf|;-^fB-<0ZZ+H1qN#0GZ4t{?J6x0U(nKVhBs-tFZH z8}i(=BZ{t&o@fLbIeMX&{bJPcEc1CKk#!tFEq>u@DSBCD(0HZFz+CqMq?+*R$}^)k zWp?Eg7Y@w(`ZKHYXb?JtSe%lfbQo#lU1??QR4H!3pn|!>(WIsr_#1j$k`O+|aGUx&BZBzM1je(Gb~8|q z_nUFjNp-_^V*|%DR>p&N$NEOSZt7qLea89R^jzCzQ`X{+Mo~e!26#sYax*u}^Y*uW z)Wp?NY>fxP6cqBCAbi7Gp4Cj;Cet?%Uh;!JNsHb;B7UcGZZS-By&oGff4NrX>1i7^&sYWvj6jbyGB2v6;0(JlJ?@DyL!7p1yP7{pR{;%pA_8eqyP&t) zErs9v-g!`K6wGRV3qUUr3a>y##}+B44=8q^Ht&#Ba4`zd!ED`;PSU!mRjqC@5) zyKU+jv`-^(MFz1BK;O-{Fm0Xp^+R^^psglL7g$mnAPL-5Hu{bDPaAmw!wA^xiE9ci zTG(o6kO5r^?nM~7y>x|0(sv{_59-$ z3Ui&_pZRgVmcB%lrBCVk2qIbz{GPSig2D@Kp3-HS2;B^w`+)Sfkx0S=^a48#W}H;D z!HW>#;lVF>8aOFb6`#osaiw6#P-zz+_?I_Pj09sojbVS>i(sbmWmkii4gWj<|~q&G?Om-{?JGfN0IR`D`M#o8dX%R+RiE@lM&PoKUj`u zN71wv3f>40V?Njac{{bIA!;(ZcZUaFuTfSh6@+I#f^>8#f&oqmEi2Ov<(Nv}HlcN7 zxZohN=+t5%j!Fg&^A>i%86aW{UZQMCKvn~Ujn4}%z#3IPRZ0Q`5Ob((wAe!Kzhz4~ z0_~v0-q|Skz7s{}$u?pC6ka||&^#9X@5aVgx9+*lFW|_Xqy`qKAp`$mFTTjv}%G>Da$St<_uJPgP;(D$5# zxnxnIThw{|P6-Y7O9>azl8rB(>|`6Q9_#vRlG5N=U&2q)h2eS z;0-;cEN9d!W`>W9ULStErQNr0zAV<)k~~dT!=1pK7`XEecekp1vPl+TK&dIQk!y}LtVzFth=1?ZKx`@Ia1 zWR75BEmka?`0jTgjBOyyhzS#$#=wnq5n#PZVjeM)vHS)_E|)HlYfV{o#G z&@~|-Skl1+LY04NfBiV(XGth&?dtsQkl0HIqC1-a$}(MN651#?nBGgSjSlj8-n&juRsu|_NoOpnqMvie4pxxLXt9V#JlnT`#* z$t@qh$C5vfEU`FqN%!{-z&WxS#kMy+HfZAy{!QoA{{&a`8AY$#3*5&(fH$;}qe7+j z41Iuo638|({UlOj`SkC0=d>1MUu;A})#w!CR=#W%C9|^p5;g zffLFl44ar=g~^*%I(e-Cb@1w|mX=HyGPxpxt=Z=U??ytXq1)+leG+R}W%7v&ntQKh z6b@xjjPz_~aDJG5u-5A0arNVF+8iXV2-#=snRrL!`XR#d>(s)jPWE^)%$0BDU-b1) zb9<=ctuHN93vuKcHs@4BjAZ)3^8XgowhLD7p$7F9W`wF3y(91mYH4r7o?GmVX3|z= z9`iO~LV>Tj^beovL5nwJ`frD0ph*XYB|uwXjuk(Lv84cvY~VeRQ_*_Ntz88 zb%Vj48-h4s0474rcT^CL_d7CPh@}LzyXyRCN}(Z$mAEJBLMNoAMUZBwG&5T|7HHq) zf|i#pZPwn>GR`FQ5pmmCqOHgMSWkChB}Dj(+9Bn6z_WK+ z0Hnn}o~F9FZ5(c}3S25tVEpsYW0SK}A;NfkxDy6i4n@6EJjTd_DijrV)d*InK1DecvrZ74)IQ5H1zslkB?b5FIzzBfJlVoH~asXjN zG(ZVHgF8M)ai6#O;T-dr;Pjm#3HP)@P3G|ThRCm9>XO6oqp~ReB5dGkia%k5j)H2b zFeZ}hGHX`_#r(3$(8DoW@{!5i*>Q7uk&6B^r$YKM5Lc1gg3EgNZjszMw(q&McjMk` zZtaE&ciuv~0@hLyNJ~CM2(%Z!AOxfITI^wn>^>5bT)92|d2thjNLKT@s zIbzd`qKZCJClIU$@;=E$4am-{FP4|`VPpz?QOCXMtRV^ls{AB`1(X?}**hcCkupEh z+#uU<>i+sT!nE0gSBepZp)zV=0*Hv$Z1$$7a5(OW5Gi?)FN9ghq^9Jyh*TdwiY3V*pHwjY3ePhifUhep6Byb*e)-ayjJffg^}u8tdv5DKXKSo7LQM~7eJ$e0zTz$L4pWD`~hD@a^G$*dM1Nd6Xrlb+WwLu ze_jmth!DaC-bWwA=JL~x>@s!)P(&dU>q1ijYXed9b$||95*V!cd~<*qa7ZvPWE}B9 zVY)qB&ayY8RVt#vg04cYk}SEOjT$M61)Hesfpsbe7i5n>05@YN+yr2SuQmFOPk6W< zC-qLbu|@7D;(QXWAS?s z!jy#wMEyGtqV4I6ItUNC2R3TV`C{K-Hgss7~$ zT6<6dGqU9fN1^~RLGLJpMCIONAyp2XogVgAjxI+C4Tg-*U%Pa2canf^CW^0YcWC;7 zG`qQlTvNjlJk7l_EtHT{+EQBKUv$~cFE|X!ZTC%ZH1CQQjU${#l5&YHg@9?kBh+~G zXl0)vRBVd>@%0i!0@bzW+FrHQ4^ha9!mX2lr$~*s13k)bGu}PCDP2}nvoJ0DpjMmHc0s_n#xbCOhA&satye~%^4_%WsR1`g$eODRMYND=YC?rl zB@lPyz56xL(qFsvr=fPJr#ERpHwXR<*R9#!=$GR%^ZC!&1o-WAnn%<7&>%Q=5k!#= z3GhUaDpU{*2|M1WR#AVi^C8rYoyvco%lQr*2X__0^}C{02DhMl!rTz%M%_LLxRl?|h-%y^AF;{K{1mV2EX))l^$|6q z#VK4oj#DtUL7UMyM_c)mLjCe#QfV>V4%sO$ChW(O6s$btdgcGA+I7gn`ylT>{n6oJM zt~Q=4r8y~I?B0@zX7d)CXNP`4lfll`LB+qkePu`yu(uZRyO@GB`fU5-8xEom2N1c2 zF|dA*^*mkx87Yb`=!$PXxloI_YX3^@Ic&AnF2EF}elrE@`2#&GHj$K7a3MLD6d^C+ z<8t16H=_d`Oumct_g83S$QEQvH-tFf^(c9ER)mZ#voXIu&n|4$%e-EcpkeD)&igw3 zowkXkSNv@L@kN=TZ(+HG`o|$QB6lk#Wl%okfPH<2eBUD%I8_QzS?eBEV2t{sCJgy` zun_ryxse7Dt!|uh^13x;l4XYha-J%^Hk0CJ#OTOkG*+>!TdA)#Jl_ zy_x3IQ#9cBR&ITrt}$uS6_g0`9YT-N8-RD`PPGLPOeoU`6{}#Rg@6 z!1O_g?_CMHpxidtVUXGC(yZZtK=aMO3$)2^FJqwqN{)Nr$q9|X)=otjs{m)<&5M}k zr&ENZPn`X5nw=Viusxo*b4HXMx1r3@i_zNH8O>(`k(>Pdx(XYJTMX1^nFudB`_(*L zVpfP1j3l?*QyCENJ5_5p=_S&pC9srK2Y3Aq4lqxrhK$yqYcj0 ztpovl9XHWe)F7a(ODuQTFpRuF(vHmjF~^2kiUufC-L4^(6oCDRkO~?dbAv66vUPbN zU^*YkFI0HJAaQZZx7fqb=IO|Bpl3fU6Z~XvR_^a5ypUWWbQH;#X9w4Iwx-$ngh(ZB zG)poPaFK`BuX>5#0b${;;RJZ3ZvOI*N^qHivVu1Vm`3?(v(Kb1Un#ttN^TN3?Iuax z`|%>~H%fo5h>%*tkt6XV>wS1u#*R2zYxuznlOrij}Jv*LcbP|8s9WHsyO-2-F3c)dKJeSh>&sbPbMmzdlBfi)=hs=q=qBY8(Xavx}&$%hdfH?XO5 zQ9&D?5_r{^3}7hQ7nSKH(q6<%;w;$}fA z;)OajzauT&?}89yuf_V6t~M0v=^pMvL>(xcHHE!mG|DGpP>76_X3Gd5m^L9nXAt5A zBbS84sl!BRYt<{yyFf!CT4;Hg%-*R_hKSOfxnydxbO2vwo6{bv6mQC~zG9`YrTR>i z#RegNa7q1^h{D0L#Ue~CazK4)-97m4Vd@u#l}#Yxdln9uUD}R9tm<$CDO^#}>;Z)c zov(-UeXW*h-nDc+Z{pirEcLXndO5D$`B|IJ~jFT z>7xa-*!D5mAU!8p(k+Lo#7P5VkT%3p1HFsj*vhuY46pkQ#;qpTRFfo;v)oU7T@1IHubPYO79&X zBe-~xZ4K7^Of%479;ZUd#q>b8;q^7L_6ar*jGFjMDNig5Fs z4(|_RJ?mHwPatNrd~G6( z`MAPzivlVihs_G0hIK!3z>r*fNC1JD#)$C+-dWyml4gD3iT9Yj_jBs0RgQeQ`WN*} zm~#I;;TcxAzawj*27#=hTIr!&Uu?P>`QVqzF?$oryg!|lf_;wstWwnS(w$UQF8n8C zGNOT2z3yvrKblqZ7De!}+v$(xIUEm(T9qb9F!4yB`mbMR*(LH`9q0EkESWem5-@=Y z-QFw6DXC09L8!4428;>&<~N0S z484{gC(&=5`8-(N;@U*?aUP~_4N~Y-teF0HCgVCt)%h`35XQL4Y@7=g2_O$-$;dDhwoJc z%H3dOzXBHRfa|dt9E(2c)XJUW!=u4BGSbHq5ryjOQ#ujv9=)|I{eAZDNgfEiLK-*xyKL%>FQ)o!vN?y`V z6M*Ky-RFiqf!Mk)MU}UwH^12MRe#<0{!s_)mNjo1=&yI<9${4W4u!baO*qJSkmQBk z&OygaJONps*vB^Nm65h~numlkouYf^jpuzxcVe3nKLdrN!;(m*8chX8^k&j=nT;yR zUAcn67ofjjpydDBayI!;V;2ZsXUP~=3JPOJF|?GU?HYIvuKi5cnUeOJ;`DLKFZW^v6ct`)yRLJ@%ZiAr^yU6Dc&`Y7KAlyr~)ul`v}1DFVr zes{POnM&xTGKMw0#D~9nC1GZHTo@5fixLQ3ECe+%A5MQ8_V&uI^3j8>z|q??(bz)0 zRp-HLCr(F3it>5dZTPVV4Lf3CP4gzSo#I!&CHn>*fQ>W2*DywLJTM2T8bsc5Hkiu* zyBIV^rmVMj03q&#^XlEpgfu0G6l9YSOGv+8YP{)o$+r2ixMF~$Y6(IiqI8@v=!L-0 z;RyzI@~mftH|JS97$dUYZY!Sd%_4ej`~(>mh)a;p5%@K0R3l7x?cQ1A|nJ*(cY!R*j%9x77{&W~=uJk2&o$R`K-{Jw4TQ-JjP*MR|*ZdT7KM3ZRujCI1XMW^a?gM-6R$lHeHB zItf8>UHTKp_yhMihKuxwy4mqE?!TfI z_J{%$Ja!u!ZJW}q2KQg67z$M%g`_P1(oQW?HWIs5;xQt2fk}oVGPgsal7$ zdu*n!V=uM2kz5pwRjqnEZ+=T~Li}T#fNe2okBpxbA4d>$Gqj4n zZdNgxKd^Q*Od=!d9CExY48kadwZ|AfVm{_RR!*3~xU^UDm-YE!`HMrcE{3?U|EYek z#X{%d2exI86-<-X_~|2@uPt?7Lu<)3EJ_DVIayow2v*l-QpnZX2~PG1=rfY&jZ2q( zSrbi7rcvpe6aU(MIZsUddwJHewoSSfQj=rG5K`$5H`U4&umb#Qltmc9I^);lSUAC; zJKqFOeFzF*r63aT{FqZ2U41FRVZ@%~5eYk_4(f4+QNrlUw2L!WHRa{=od&nMlTdf_ zXEhDR4_>T|a(6;ySXm{L2?MFljK(HLpJD?ojU7~}4qK zlXD?k6Ed9%oISU4G;z~1SsBf5e=Z>FulAU&kN?D+ed>S8Q~AeO7V@;(V)0sBAxrCB zP4=|T<;?rS?gQ)`!l*)a$c?wXFA3@b5n*JZKtk0AEQxsFa;Z-pWH#8LJn()# zSgEul7*cR5M$-`;QEx<2FE?WRyZv*(QM~!T9r9-Hc_9lxCHat?-7idIVo{9^DMBe2 zQ+x@*3{7Mos@6iozTOkTd~mMVB4A5Z?%4Pf}O|hp<%opT65{w z&r_5T_UT-I(|(sJ!n3OwcOuzS^RFey{LF3&I#!hL6WahH>HAYyEeYzx{=lg-tJk4gN? zGG1ys_s_`qdo;(w`$NX7(PLLIia^EZC=UBho}jp>u}n^*Fq>Gp#pSbkzQyW@H{mF4 zadh-8_I5;qhRn>>_dL@H^AG3tOOMd(7jMSLzPBB84p)JJ)>fmNbZyc<-aB(SKScxu zFE5Vg_6w{{o;xh12^+tgXlAga`(YTBgLTDxcFl z47Ia>?SW^wlYxS?2FG{2sBYlw0H&_zrD8=obHfwApcO~t+TP6TPBhW*tK};LC-8iI z{d0Z;?^pdhYo#dG&quI0NIoAu|6ZIW~EnaSCt`6WewXfcqCl*bokwm~Xv$KtTv-+}nv_ zJmO6)PE#1Ub6s*>W<4i}c^VSA9wav));G#ZE}d+h=jsfs2NISN25<8;6FN6;_!nxm z!YB3a%Ts6y(NMY^e61^at(V`>^m>Cl%WE2U{^n3&B1}?}cEgCmxmE2#EZ%n*n@H3T z&a}t|(KA0fJ1@F}xaw~kT^%)E8QyOH5-OKzQ&BRib9&ySe;bb+#j)Qbr}et)SMYAj zu1$2$V~oCFTps9Rwr1ii`MRy$UPA*aehHS=vxla&@+vw=1fP&`ejr{I7}{u9ZYRfa z!AZFr*e>LR;E#3eFFyTm>Fob4T{`;5Zq9%Ywg~zxFg8Sznv^5;)1L=YTgjVDac}c= zY4{=t2IyynA&SE)6NsC#X)oq~1}0qW5JAsL9Cnv}g)#I#*nPL@+*U>_hZk65=m~ zeGDGyrHL#!e>vu>r-{b-k$DqL)0M_^{}26yE8)&{!oGgWCKRsat-cT>nimP5s(iIa z^rY(3AF9jG=%CAqteXQ!K6bX;@ga?AL^{|Dq=B!>bu)qv*}0XMjYPVz5AdUQ-{cXO zNaDyS@_R$yUpc!e5BpdV%Hrp+0Sr~)`Y1T&rC+F`a05RrWte`VT%pShR~@?eP-qwEk4 zUlsyTn|J)oDaEW5+JZ0H1_dMrkh)IQ7XU{vaY!|MyA98-CP-hI=Nvp2y;dv4-m@aW zj-(i6V9TI+=peK^$?&7zpU2f%+`V7e)psCmPqKo_K7+Y*QRSjP5`n)ol>V0hU9a_uLg7#wMWbfJC1#Fe zPo`ESo{_zg*)KBPMb^{ffe{6AAx( zn+c=NZ(7730lvdd+?~sRAR`GfFrZxbp0!v@eh!=Is{vf}B|J(T3Xgo`{#jAuR3ek{MFd!h{AjOU8 zuZ;r5Cf!Tdcz1{EAg=xyIyf8 zXh&k1(eR_dH?2TR=?Rn*4kXTZF3AC0U>Mh4D;6*>P!-x&^blbo@E|oSID|E(PwU--cOwn!feZPEL z%aMs+ExuGWJyu_Tnft&xc65JG?VABYL?c0_UJLu^tpi{<@ILZ0MlBJ8s3)BL%~~~n z|GS&SLmsN)IRlsNavwmn2ly)vA}QT${LIR(TglMaq&Gb>=hd}IRl-k24Z>P$ZS5dF zKEbXpU5$<1dpLDHD%M@5x-?;6j=j@zc)qzG(S_9Zd`z9uyVvz{b77X}MqLe@05OZE8x@*QJvlwOFvZ26Kk56N(JM$s7UCwcOluH1HGw z3AhH}Qabtma(Ay{Jgsr&4Q==Ea9rLEX~#@`7`*Yg;Or=qaK2__ba5j{YGsr@#dHX6 zPIAw zNlJ}m%5I^OY8rsi_%V}?0?z>^&tXQVs6E0jt`l~xGl>b{@_FT_kY`wjgm2&1*>uEW zM_EKNT1dz9xqZYtyrVFNeOBd%V9KRiy7C5W!Q0a8oOgCN@#(wSWgIwIZOCbpT0Vzu zJHFh~J1O_4Rx65+o*=#5ANa)>!wr#V1TlVC3FTjgV;H+bSG?2g_hW15O&>p>9Te&AsFj=A* z@-wiH+z`BRQ8H8y`;=LPHbSm-dpp1tx3pYQgQffcT$4tKSKmu)M1&yP@mr7w^fPC5 zPcuoW2TIhGC^6H{Q!>KUIun=q(Uaz8EX6q1c<+;JK>=e82(e99!Di#-d}#z4wVQ zC%6G&yRkz%6NmaQ@d*SkiSg_-=(ZXjmwV*mJux^s}zIHg(t&!gtHN&T1ADlGp-M z2MEjnuLIn(C-NS|MBovm0ViWeN1PSwqor__=WhV@yW!oACvKG{yscD}x4F&NX<5A_ z=-@iL{lr$2lysUYr@cHSOq@2_NtfdL^v7*BulI@er((?PZAR4AMOxCvRkJpkkcH@- zq#g<4T%=)d9r#+RY+=KLL+Jxvp8{8gIcnC-ZhE zpfl%6X-9?yk6q_TDC+|e{0UX6QQ%nW8MJKRk5T>$QW5b`peFol_m zw|Wkww$@e;QGa{3y;bm{3#&U<)G-StE79wvhyyHhU#^M)ZkozaU-Ok#YWsE3Qlr&_ z%=7W`%{!PqQhjluW5UOr$qRM}#icFCE;TKwl;Ec@E*$}YJu`a{9Q`j1x{Xg@WQZ8R z*@RO`xr}!g(x9m;|2c8}Xmk(??7_5z%APq$QR9u|rH!z%m~QFI0Fjc>0s*N+c(st7 zUlwpBU_Yjum0fTnG{|C{Q%tsUEh+iOnD_R}w{l5_wF$#R%G(bzr%~20inkjqh(a0G zDKt|er`kk`!BOJnLc;#dY2lY%I6A!Gds=~naYVFuD;2Xjv-0sTC(tq1NiXO5T<^Iv zwjZ75GCKkOt>p|LhH!Vcs%n;SuLBjN@j@b=!SQ=+D?=do2EKXa(%F7iM_P1Gi2L29$)?#d^`ZfCU%J-{s*ghnLB)K$}4g5JayiVyNDv?nsZ8a5pYLoW4 z0O})rP{M==KcwX0IS11};)@du;5}S)2%)>d!HJ7hegJv18p;FK>Y?MKXWM6aWGE;L zr%pMDbJo!fVk+(XprENK@rbn~tUeEp~A;RPI_=^1DgNnV)Jl@-9(N)0$lwPaz_2|NGR0H< zGlII${Zvt-dN|axO1`=yrYG|xqJ?qi#O4bGd+R;Dlx#XL*9(kol4}9J4<_yRG?1}& zw4hVIlamY2*Rv*AN+VBHB_7&3WPQLRYOwEyqfT1sA6LPbJ~R;mKb6P23k(b=A$~>N zdS2t@c#0F_#dB4y82kNm{Mc51O{|&4y|v|K#54A>#zLy!LQk|>#M4VF(twU?{FWOu z5`DY$T!c;m_Vi`uyuDJPD)Hcur}*xRsCfNNtZFq+vVkk+mlmr;+9F}L`tFu5HE`@~ zk$Lgg;+ga%?XWNs;xk;(BIsB(CM#`qackMm#-LlodAwl~MT77^Y4gNrwX`3R`_pP5 z#c|4wxD22SC0yXiH{cshL=8X1)fgjxPm&z2=J_CgPduT*FQidNNl=JcIg5M+sn2e` z6n!TNW;6WbGWW;GeHW2k?<}8KKgVZOK*d|yi?ZVsBdw+$mL2PkG{69G9sB~@v*Qr9ki-}j<2$|Id zUfe&qwXu*Cy=*0;iauxxRajE?J2DHTLm>(p72AOyeeXT8W7s> zo5T+TwUL3^DUyj_p)oP@bu?$nGKJ<^@rP9#Dcr+VdlsQ<(Koj*!r@nR6;_&imNZod zb3_3rsR2Ky--@^?#v$fi4}=}&sQh4O@%xYS=mhx_V;oZsys))CJTg>L-_QX;H#3K} zZ0^xjz|o-ZH;A#X*W1Q$H!UwSQMwD-caZ1=xkfCR5dlP~EOQ6uZ+MAtE@s8S7 z$rke^H=D?Bi;J75sKBnibBOW{>v{EO7qI4gUUui<++SSV-w7C^I{;uy(ee>Cs^sRQ z{0zU%$JXrxf*&W?9e)jP`g_s0@Q)oUi>r}t#WtVh}kwoz*`BOxyK4@~qgsyv8> ze#e5D$S#_R_4W zk6l(6&p3X&eY6r|IuCb@VIzQZafz6B)27!&yU4f!qZ!lj6MW_PF`arownq~_5)$#7 z1YS9Iz#vtOX8Dp^-2Uv6n|>C45#8{uhs+X(E`dFqd@B)Nr1$X0(}JWy+G`28&|MIgldUe*5OM@s=v?B%mvXpM{lNWeA*g zCM##>7_E{Mj*k>3)E)_?>84n{r;s|Flfw0D_B>T4Z`VzIC5zbp9q~Fed_rE70H#QTM4H_j^&ai?H<$7Xdgi|N zgz9{wzIm8?U1IYb#Wc&2eYoAa^Je`PQ~PBu2+p2c!aesmgyO}{x^=Wwp^NRSmb1OW z(!q+%pGqwSmSFX+ISy0u{R=|uUu2Y$4LRIP)r6RCm6Rbq6c95z3k2BbaRI-5*_Z~2 zq?tS2C2GY(sWoMh6dZp<^Y`|eu7-wgy5M1$&Apr=EgL;9@fREXe z9|s^Q2HAGHbZ#q8$KM7*V!Tkm?I5_TSg4+#sXwC(b$&hX)M*piLz448X(PDU zb6QZDyG!lNJDN34f0)@!Pv;*Yw364nt<3{+9HuBICla?8vtxAsktQUlg>s{sDOAR# zslQ_a>7f7Q)HGcInGMk(KtGVV&Fq9`unT_Y)_4iFj3yD-jMP3>5*kHoQ*Hc%n}iOC z(JhU)fu^k6`S-gD?(-+B_F2`?+vELD$p1>$IW4}Yu*+~(ise30O#fI|At0s5dUX2t z*G?7iGsi%@z*APncKP3JEA4|yzJJ^~O+2<>$%hP?;?!d&_k^z=n5*Ke=79sbd|AkMP;nHt5GE2j_` z@SV5|bS)etfT^lHKHe*6IiHK8jUoUPkW(l3$LTgUm*$fIi1~d{zrLrgr-vXAn z@ThM;f(QU7mT&QT_k{rl9H49TLwO}2$Jf-}P7FxAcX$4m%;4~OH$`P_BAfu90>VV5 z7+muZgmqqODS&NvbN`rN=-E9~@z@MM?C_HxAgk*V8)*osVKWC@V}*(%7``Cx_;B91 z3>dsy!W@=r(VCe4>@~|WGLaJ;Ym+}Nh}UnwmzRK;hR28|Reu`QQLy*Y3o$-~7fg?g zR;eeL=3|Aw;C8lt&(Gzm`$+^{Q5(ROOQ}B6T+ytidn>7|hi!x@VdG^1g4553sCQfc z3k5o`yiIN@AthF1>8~tbpGzpwGfwD^UO>*2y7p%E`Uhi|e)QxQd#O`THc&J1ZNj@P z?#$*=Rmoi(r=^kx0PIKFck@Y$H{7%i4 zgATiMFcA^BGbo<=^~NRKNdA{K*J%<`BsL1Ek{RjkTDqG)EDf11nKsmO!oYT`VwZY1 zx~Ux$xzf|e{nF$q+b*5-8XEx}c^+rgEZ!sNIPnI0Ec|+gDH8L-h=sNg)~Gso(*f?f z*>l0T>k6UmdUmvRRA(UQp8wJCrpHmz{Jcr1LY-svcF;Ljkw!kd?3~Oke z$a}g{xf7d-x<=XuM**D@SY`79mmj&@(N$TR|CyGh;qf6@`cu;aJJ~NM?r-un%?VG| z8L2De--^;-b!h$H=Gp0X#!WQqjV8zbZu*xychp{Ri$i?fa9}~=PwXf zP?1N>53NzLtC6r=^)M8UU)H7&RDwZ#BuOAU zmLa#(84cMhd6q;HO=_6ti1AkGj$a#O{9K{0QeCa0RiCr6n~`QjSP}Q}1I#S#vCFay z7^R9>azy;A$bM=_w)kY!z`%22F>xT>p`lvS zfwbi@1F9FByirQC3=Zb9Yj%qY8?G3XyE5I~6I@z3rH-u0uBZO)VC02Zw$I`vU#V{~ z{Wz=Ny|0nPyE6sI)e#|FCQj*?rX1g`)I|hT!EFc(`K3(VOnFHS9H9piommZXP8(D( zK>ez_0hvkdnVY8%Ene${)~u72thS@|u|H+aQ=>j6WOp-&^<8&yBb4h{N-Sn+Fnsac zcI#_*0mN;~t)~|JBkD<^B*z5$VZlLiSYK4rf7|J%W;-7B7T&oPqpe<;W=TBBS_u9# zBk?Xl6pc4>XU+cMnnG`0pS?|N3E^pf8fW7xpANwbFDLa)mV;_g;;Yp9AK+Rb-nihH z%}0N4M&>NtDQePYSRbr`b6i->toSZ`Z`%sDgxC3F$;uXIG7n{m5q=f;AJR&*bDJZT zD;0pvK+qS_(sMQrcD4f|X)fWr(Wo9KUWlL95K|1)v$|l6FOiX$9vnX-oNVwWHiChS z9{lJBY2w43H1Ys-hP+gqERX05F5;dZap5&ze+)De?sf|qjTg0&lJ=(#!7JP9Ewjr) zM8-mf<{ueQ7yViAX7CIOYd8)W=y6Kp2q^HC{5?~HDxYi1!ZC6Gk1kLcR(*`cnZE(m zSA1G~2=$BSV#1n_46Cl%ovt&+5&J?dLfMOUo>e1DOXx>*E%0LQTZe=}xuI|EeyIpV z%(wwntRG0}UYqL*-@X8GPNc1L>~bM4zgz$nh9=B8J6cs>9eF?uG3M(wR3^_~lNg;5%0P7={;jQiVB9_&Wm1N!c@W>uB9VE_Rgt^Bx_8x;Q&t zAJi_$5y*Tv;(T%H6y96Gp!_l?>KzXqC*+@gM{ZCjoHi5RZQTm&)iRQ~ubqqJ#>m53 z51&g5xnDjR zOS#3?AXa!6f`)O5u&j;L2mCB7r*`loq#Ep25gC7 z=2&FXA7Rm7sd@hO19on6MZh@y4y``*p;1AZ;GEp^YT{`R=qK~9eQJKHt(d2Ij^ORh z{Vu!wsO?EvV~$MkJf`4O{Lx|d?#s#IWb$3AI{oQxAxUdWPKZlT8kK`^jbxKOtbF=O z5)^l?k$fMDbggWgf$6h7^s~@Wk&fojI_ys!CZ(=5LnK52aCkJ`j%KrmBVF^SN8Gn? zpK1q~RhW+wUb@5L$wMHM>|j^2J?U0;qGjr(b7-*WzM+QcTVPs8Mfh#@H5;}OyLhC0 zDSG_hFNb51{#*XQ*+Nb80F#Q37tt-tZ2A5LKY38^Sa{OGVj5c%#o7H-cQs@9Z15T2 zI482cv#0B}+wEPuY1}oxskS~3AD;16p8uI~-v$2~zU5Y~*_%XuLPuX=Msv3BNd&|^ zzf+&qzPXkzuVyp{T73t|d{=w1jK|Mpkc zAXGaH3q1LFF|wv2X(%rxH)K#LCfVR5J72-BOq6NLIO$3+JOJ7Cr1PN$FT@JH##kUH zbFXt4Yv0&(^dac!xWlZQ2fKx2yPugO+Ol*YQ~yeSDTsNEurBzgH_6fmmxhD9&xP#M zxcWaDAd5T_A+~*M2a-N~nq20{Qe^AR)Y+yItQ?fo@E59%{ zm-X`@d1VEyT%li@Nt}Q3(LuMGZJNXbw?o+F0?`bkezPmriog+xDo<_vw+6LfXZm&K znn4IobHrCLI1im7<#Y)L?xLB!$WUrGh!Mb}c>PVQZCRv0F*)uI=eGi?8;*jQ&3#Cy zcW|aIxIJ5b8R@|Nh3U}d4Rm4!OYjHFL}z#ZUFWiyNEUauTg^Z0^NfARU zgTP2_x@yf_X`<$bR>Jt}-ta22)97>0>R+ly+T$VzC3*>!55hK&4(lDA!M}mqOy>i5 zGn9-l2{`5t?*3M+>#>R`=AmtA@>tFoW{ebWyRA9DKDZDneY#;v{rU$D_T+;UMxD_q z7Uo&Yo$#Ddfuwb;+CHp=^Rivr*})z-hM~#k+IO-gkS77Lo*y;tEESh%Y;ENqq`kfA?tQ&-SNX+PV7F~pB-P8gA>P+$ws!fAYP}{5>&v5XL zYOvO9Z|#6AgII^1K0Ku;*38cQcq@ia^-{3`C6Xeg8qNGq+IQoL)4~b-O1h?)VpZ#~MI98ozVZboct(4w-hZ0hO;*@FVCQw1l`O zbS+vf3Ba-?(5-$RG4p=f4=edk2hhd;Tka+mPD42_u8u23FsNEV;m;xzC6w(4(i4N+ zIQnZ9#UVjrsITTL;cKFkZNP;ij&`_>3ZQL@Z@*gTBvv~%8)&I@AF4mQmb74>DB&b6 zvOnBeX!D^J81kXT0zU!RiUN6MxocxO>319cbbu$s&yv3q!{y(O ze-W($=*lYdq4f*h;T7;9+4O;gM0BezVtzg+_{7xF?;-#^FoJ^pn?$roE&V&J39i}8 z6n7_kF(GCWfZJf!*hUtcO%xIU9P;YM>EWPF=;P;c$(MO{hrBo@svV`=?O9>9DD@cZ za(WfbTk%|c{r}1Jb|(w0_L9m>Zuw=Fhk@UmulXj})_(NsY?MKE5KncsNGlO#Nn+a#^_; zHREw`*^D4LX{6fZ5$HE!leduVfEt*$I_!}@AkfV7wVxoqp#x{|jT5ldlsYOmZ-hW*eMhHD_ z9&0;PIADjB>{0$eP6OF)6nK|jJQ)(aMUCnR)`?0)Oj#!e8%!F=ALKNCqY$KBh?PAQ zTi^YC($L$sgtY;AP-id0VyKX0+OOKlpdajtc#b;?f4hn)(}$(Z{8d zka6X$dYYKM@dwZ5U7_?tXQh%k`cv|%01~?QT`Ky}K;7W<4G@Q2|ojz-cr5^u}FqfIr zcSHw5_TNQ4A8h)71Dz~z%h_!&^or13G>8cXFe@8@G|V*dnb>a>x#he#=H4XMct$~T zdfWBP?yMI6ZQc*fZDCwTezkH6_HJ7RXP@`Hu)J>5B~m3QR`b<;?QC1fJ69pvY;F<52w0f~}67L)1bThH-Z z=@9I4+5nQ1=n1I~EC0+CXO!F_8#lTi^Q`(6DHR`Wgh~&3ZvIpXP5vjc%;a8*jpQ-2 z_FD~pW{kqERfMerfa!{_e=HI~rs;Ls=YH+?EeCrd4L@DaxX*I)u^NO8WE?Dv_dl&L zE3*RPN+6L(pVOO5`OtWAZ3+AVgoAWY`#Y^geDPFIsa_Dd)`8!(OJRJ|+pr476l zQXWEx0xf{lu3#Sj?XHJkh;D@>MCGLt>xfyO%_iDsO^RKQ-x)rfGimB7iF~!OY&re? z?x%`3JTML2(hT29pXqGs%+IEs7P4=kGvPDB5wF7P#whYG&wAS0Ty{Kd9NaZNT?2Ux z1!VcD#5j}*@-ejoM8NJg9sWf5Mp57#T!Nt}O40BxF9TLfYvuNjWr?~2*i)D0uc47c zEDK6Y!7~Pkb&=d}GrmraA-E;HAlIl}DrQ}xy%x_U&er?eGdT=J0=1-2AJL*z$+E>V zYU7O9qmI>)uhkAgu@KA-dAuxmt~B6@)>c7llJ zn}XaP97H4$Wsec;Aw7VZSU6GYCtE=m!543Ga+*VHBhb@Io{2m8s1LXm-sPDTAXB^) za|KhwHhUe5|VZ)V@QJ`DT1O!lHIy^gDMKvS*Ubln*vHq>%vT|MxsX={Gan{kJD z9K@+p)WY>ze*=%hy9jsLWU|TUmmNF++v~TrObP5RJNtK{^}gpTF49)Hu|QCnWXlE0A#)By8SLuQ_|y!U5JMeunYMQQLG-&YP?gR@C!QI`RAOV8A1q<%(1b26LcXxMpcfL)o`~B+q zcZ%ZF*)_dqrhD~Tt4o$VVU?Rh?Q$=T)Bnz13{sqh;x7gnf5tl^`EYv1jvW4NjPl}c8H!v=btv?<6x+E z(;c~M^{!OS#-hGGIY%TxBv1uQc3xVpf2^9!PcdiV4x+-YyT0K0<)(E!tnl7f=uO}W zYgq5f2V16B>(9p#9fzLojs;)oOR{HD_IMiH; zd}UQJtNM#{gqPzx&VAjUmYHLAq`+`IB-4f{#!g^eWVHiO8Y(;jpdg*w#Z$kM?q+s)^(hKSw@*G-vC&esL;k!p~6SVLYq3ux-|PSTjfC zuNyIO4He@ATN|UfT=FwX)dV1K8NN2(O)xEmLH{DZsmXRA?0W_O)dCY*3G2A%*ux;n5kUZ;x0 zWUs1$;|0i0vWL2aF>{+?*a6vnP1 zN4=S~z4}kB0tiEhJ3g)^Trw3>PtLEUmmxT7Gy@HLQgxUq6Ue(LAl+=t&Js+5L@`z5yuU7KmOg8 zxMX9u-0jCnvApH(y$+URO+y&Lz}reo-akDwE@~<4H=Ol98IOgy3MxO_z%v_^B9N3i z^xPrLklBn@1--mo=d(YzrvzG=vlH-vRgr)Drl#YmuBRQRDiF;6cK%?yfgwP)jal?u zi{VVHXvF>@*ZFrf5`AoPe(H@oA71qb`G{bc>?tdJGJvlLlQ0@PPhof`LdK)f-~j>f zyY+so;Sf1+y+0AR1u#ihN~ylfo_k>AQe@Y40EP z2ie*<>$vEv)A(Zu+^1D1@N_G-aC%vnhqI9m3<6!P=Ld>GlJOvH1smX^KMZQiBb87& zNKok=I(=2wjP5mh8&7)NQk>c@kE)_OU@T}8bA&8R0K*9ysr2&F&{GE>ct0K`cr@Jd zyMIRj5N+6x8;B6>Z20%V$PgWecN1TGd!TP@G%4~8Xei8)f4+sB%kw&k81iq9R;N?5-Lb}Y*j_+^HvA;e^0T}>UwvWHW69!b`))ZD@y{> z;}r0aR|P;A|G^;^!I;%RL|>!@8?$s_;@Wy5bHqj9?|=wbT%P?dVr7|nIKY^2UJ)!g zKLi@Vxxg7HS#4kzm8}l^XlD=@kx+Q-RZiTkaI*)IFH~agJMQG%tApO>k8^NB0`q-K z`U7(TlBXK>c|dTc%qe4VGhJT|02yik6b^8$>IcG;FD@Lnu#Y30=3hJBL2eucSU<>1 z5WeKOfC+4MZtQS`Vv5V@h;6G7bR~1w?Kl{$!@99J)qcH00A5WG!=j_pMg|Gt?O@}y z@Y{$AObR=2f_(fc0A|vTM~uYDD)TO3$;Jr0*hbFSkLSqm+q>I95tIxr|M%@yA_XEhX4Rr zJ)Up9W#9wXA{P zR-@gZ_$UYr7}{L*$D}&Ct5OY$psGWMiL`f8zZ-+=M zDj%lyO+*}F3is=boUS=^_*-I4bA}+Jv6I`b9JBJ=wGq;K)>eFXLsTnnjIfe!zt{2l zmIjU;`rdyUj@@cxAs%14O;zx4(oK*qrE@1^xpGb9`QERkutqByKTm=P8957jyUyrY zlZSxux#JwDz}&se{Dw@Pa4(R)7DPg6{5mADgAD2}Bynb8+7&Jx3a`4u zW?(-hsalt^xH>IM^lQjPQJF6nqJxH2lA6A*B`e2!-u0r2BG_>KE&O+x z+_){La4Vlne<>%Z1^AbsT}&HDjaXz-Jgf5|ANhRa6-e6UD~q))lGYyf!kr1QZtQ&| zgiKQ7(rj0eZX+Ngv89_NoTjqLgTeGYbQ{+CGV->MEdhS=!F~@C8CI*hOokz4Fj0 zioC(meGIh475?Y)Ero_=KX|eheUw7@H$0|@Bq%j-saOT{W4qp`_fn7MPh`27wB^OL zlXEbMxc^9$ZvCPThbHaz&8Ygcx)r9Bz*WGCd?(>a3@|$=%cS_CN+kz!5W|L@I|3xPLBn{S3Aib72PRgws#TwbtC@)gd?X} z(%bQghG*?*n^(k>a4rK{_=g&n52-9<{NdHw#q|$@U37<}Y#ITeS5j`x;)n&svP9i;@ zv5VigZqU`g@T)}Xb*%O=%q6r#$hq_hwXk&C587^3V?$C8a{>-JYwuX!{k8Y$Z8sx~ zep3X|UE5ET+Tp~+I?xTuPjF?KP3Uc8MQl-K^CIvWC?RuzBpA5wO<=E0HazEdE<4NC zUUE%f1X(>my|?dV9wAqjmcnzB6UQQMqJq$lRW)ax-5RbNE>OWIo{Y@i*oF+ z<+aJMXd-nVS3UmKcKbv%2vvibM7a3y_cRk`VaX+3rc+nfVEmcY^jV@u z{m%eeuau+t0F6j}IJh!A8|L%a#WqofP@9M5)tm66Yts%2O0eXRB}nG7R@0J^*Eu6i zwHSx=5NQKu&|4junqyg-`wO*BuKQ&BGnazZKdn#i;2px~A+cIB#Czz3cnt_~&gFy9 z1~k|lGMo{o0B8R^16aA|z9otP6)2x)gfuEA&!0RV9e-!@%`)sU`0|J88dDn@3Ow)u z)^f?487jk1AV;x!D@Em1s|Itrc<9x|ko#9JsI|)z#6QZ{mD$M?&~awzsr}V3j|Uo#-V_qA$ zYM&zi{|h}wQO%nYr?c=Xea=*D)_#9XZtDh6C(J(B^q)F3UDdOPOPS9^?xujYzxm2^ zjE5@Z+VY_9s%Cqw+uchynX+Qg~$6nW6DCOxKpP3pKk`jg=c6Z3A50Yh>pK?6yUb$q8NW>&xQi$K7HnVV; z5XOia!Qz5=ZNpv^(WH|x+AZ=GO+>;%+&5`Mw8`VO1YMNy{nO^1o`ETIRHOdMc~WIF z6M-8?l6nX(-92ojdBE#fOGhnn1H&n9677hihDC_53mllG!6@Q7Rduxrn=n7jbQv46 znZZ673;h?++kzD|MPizVD>;a_?;G@##E!{_zXuwUoCt3kwO-HlXPR|jS`b@;lPQAj z+xHILlnhC-gUG0+swg?x`c`;&M=myn2eso5+xltXV2gP%oKX`$!Skyh?WgQX80c_N zU3szhwE6&*%qh<7y3X1H-=A_G%yjrS|1leHY!KaV9AtyaPJql&6-NJ~+6Ky40G}Q{ zD4EI|`QSZ0q`!xD5?EK?9L-|_daV%04>av;9rf`LXPh8pcp;nRq}!XcgW`W0p<3c^ zlRO}ODv#JDw<()KvqvNb%WPSw@F3qaZ{ojn zK;?ahFLwR(%kAE1qaGQ0jzE5HK(Q;_&Hnw$%T7MNk;41Z5#4e1j^1nic-_T>9LCT0 z=vTLRaGWyp%9e?8pG2{hsAVWL28SbovvyzcauDfy&_OB=Tvwn$$UbF>InCV7qAv&L z(KT(|(dp8(!9KrwAc^+-r&x!%vHniN6& z1xVgFNOx-7)e9vY!y0O?FfomNl8_reF@Jrbi#qKAy>|xGSa@g%HdgC_%I!S=q5A5H z_x`(&9~x#0AKv}Ri1DAj()6rRPydth01XOy3zxv2`gqY5HDVSw%8pEy($(TyFe zJT(MnQYWFoVc+LU<13X6laI6|sRwsNA{Y1ulI(HJ@BT0rr(hXsP3s@Ws_N@cOg%+# z%IPi4*DkiB5R+!B7hV%lf5NXmSS6P{Zag{lWk&Q=Cxaj|sOqddchP%zAwJz}h(|N5 zYUOptl5>^WQUzVK8gTiJLTsk1-eu8O`W(VISu(|0u^0#Amew-$T|=&6hfo0mu{-(K z{YtXKgvyvNQ8TIIX^)8Q%PG)B!B!gFuTf zY@~HPw*=yLr0ua4=mcrRc#Cu%UDb3M*d-yt=i?RRzirBFQI#eX0!l-!R$O{z{rVG* z7)wV+jWfyB3(-4`59>k?;`oqjroUNfNl1Tkw~Ze;@CWNYTLw>bvv`=A_dj0_S)ZEFLZHc6 zznfIs6OoRjyYvrkJ+jgV)0D642asja^yEc2auV1#Ne64Pu2VSL=sj9>E1<@JbtAW|`HE`(sk?gA_mV zm`n#aB|+Q;(BzUZUOQEQ2u2A zx!+-sN6|}=R84@8TMePbh1$fJm12UzE^iHc*yR}^!7ypV|j zP?dNt>M6r;giF*7d$d6o&l^6A#&npp3+q1s8}r1lqZV zvqIyT`=xNRdmGat!l#f$NV{OFMPtmViLn`q#7Oa&e|cYn9yOknKkJ|TK?BzX-}VZ* zarV=mF1M24>4EVhPr#w4qUPDu5N&OQN`;g8}h-mm2SYNPS>m($P?+qNyC zq=d`Q*nJ!PA(Xee(K7+MAyv}2fy%jrOtD86M>xIdte1=$nB#)eCQSka}On$>OMC}O|e4(oU zj^1Vs@A>m{|B8&lkOxD!Y|b%*;dhn!80ZyPE@alZI>zb`S?RwNbvXm*8^`s1c6mab zwF*({A)*)@gy95MQ=VfXj?^f)-FuH&IQ@!08C0NL?77X9Y2;Rpb`j9l4&05kwV zpSRcM%k_sY!ajh{8h?$W_ZQl#5!s5Yv<$T0%k^xYlI?%09o>6Jm)ARIowbPjX5c8J ztaZ*&$~h1&ZDSosQki}GX%P_pFw;!B377FVBwp8@V`uHyMwjb!rq9nskcaN&RDcn9 z#1=uif2OQdz;qY2R9SyfX)>i;oKXA$!4}W{v`I=xOf}Kp z)xDlX`^W!mArf3F!-Zb~Iu+)jbVmby+~Q%Y+Ztj7@=RbbxlQ;m5stRx4fW=AK!|n- z9c)>q?ZRYFHFF@|AO0aY;i8=tP`mm#NZve?@xiJ#ZHs}V@&2r4G=d0ev zCXFU6k^WP8yiij8U6rc3?HC8%(MoFSL)G5b^5?G*u{49f7%Wc6g<&Ipam9#Gs!O)k z(LJM2;C!oE`f|8Fn(uPP@m1VE5M@v#At`@ZQiQgJhrH_K6EUgPl!L3>^+19nV-GZ^ zaeRoGJMAWq2y#Dr9|LT}+?Vh}SE&*2Pc2svP743;9RK8|k`v~Xo3nCtp9AUc_=xbA ziR-oYnf&H^1)kpE*x_~loo2yJQ0ImjT*#P1IqhK9#?oxOLgL2-esH4j#9i;lK;Q}T z7T=@Q1PQ@kv*kt{n`=?+3#!b2_*TOcX)msjidgXF!p^R4`kO`5-)j8<2n(LP`aIQ4 z8W(&tZ^o)5z*M%#{;!?Ib20h(JBqiINVIMb7u}IytP=!p?Pn{j+*Wj~FBVS_nPUzt z=)z$aG2;f;n^O+lf1dP~)yd{FnD31TC}B)%Mt-neASP!EVj5Vnr_N@g^EI2)z2G1? z%yw`q3@Pd_`q$gwGn-kr&%8eVk&~>glw<_0O}T)2edn#H$a9kq87Ot739E1w&hu=w zPjSy)bt3Y`a0bsEXANxM%tjJYhtJPI4}{B&jYuxz#z^!I{;?|g?gTyxOuy)BMHgrZ zbmbijok+l4eJ7I-T*c@1sMkzodUB{cd!|YGps{-E}*U3MOZWt{T%uG&;~aoXPu zS}l~r&nx@$21-H7lOSgotPSy(jU9VI&SMe_fO;eIx7n~GjV5Uu{4KW)+ImJdX#UiR z5XI^rb!BXtCOcxMkMOHM6LdGBI%@A(-Zu?S(=1fKB_1X!CxU_)cWaG*6Y0Lr`@Vbn z-0t@DS+2&m@?c(+*PZjd&=qsN`q>%^tp6AVVCa$qymt&jrm@!oHq4eedaR#6kUvkt zVH3o^9 zyb+M`1eQNghgY+>?MiHlC)`9(G6j;e*>Nih#|ZaUD-dHEU~46rC{G$(_IF)lm6sMA z9&z!8WD-8h{zN^aNeYM#NDZ$dUPCs@-@r!`cg#$)=%!*oY(NsuGLv6A$(M(i`?_tU zyCKVH8(o3Bp{-Z!r?N^YI4!48!5AC9Ok+hQz>UKTj-4CpZob68DJTm)IUuTe-`jFi||{BQqewj6#w=6jUhz8;=R-;`_)37@YBtcE}Vb0yBvs+b7vK&DG3 z%qXYYvNy%4qoqA(yi9KYfvb$Ow%Y;=XHP2$P}5lWSZ{q;X8dRGrf0029Nyj*Fm z!-lnQ%?K=eju#n$LD~Y?lEzyVjOiJqU%Cx<5_i%w?HA2V;yn{#PAJhb2?C=f0dF*b z(Dll2VLU_EIYw!)%Rfl)29IfUUf5)ho3J@c5ts#7M-eH^7(5thGIpNKa3U%6Lnb}r`QdRgcxQUsC` zJ(_bqH$nJhS40UgM^Ok-Jwe<0@6p{YbZ$aerV$&-!4WD=Gf{wwaIOW@LOAFyGC8#+ z7u|sMl1|u5mjC;hx+Pu5P0hWH;S}X94Doo`!fg2mks^|Wx$>1`mFcE z&I3?_6G6BGHUKNvOUf&Z2X=c}FP4G5+W7EtU)F>Ej0$2s3Yfml7z!|a_F$;QhRLwSWl6xZ9^eb6|_bU9?#Ekr?x{cBDyY{^_yR!;0H#v+m z?f0pKbV9u}An+mP)YZXQu*@Jlu)4YMsY(o_nt8K0-daU90@j#Oo@X-9Vcx@S-K*t7 zPOT~SFM1=txMr>iGw~385C6L7;r6HPeb`iH03Am3blWzye3aEYxo`wcN$ZvVgl3oA z_y9&|Jub+Ut%9ZOSgmeyZDOv6`|P0DL^F{&{-;@BpqvNH=wW#Nfk8i4Ooq=vL-BRq z%~j6ZB7Ktw^Je%A+xt(R`r5lYobscak@cX6a!4Sx2iO>2;xFg>5m#gYcOih=^0D4r zEyt$VZ?zE{-Xbq&5J%v2ma-4AV{awKYt?;PqPLGp02c9?;rb z$g6{`k;1D9d zAW6rVRNBmVeC&V?ndsEHLo9sZJ%%YrrpM?%W4@;!)Vwplb z?;AGBmGLdukhJG=YVYZDdnB_`4l4gn;zGQyBK<(b!2PzN_glQ1{I&K-8=XMl3WB}^ z8>wxSWBXoa8)P*L2fS{>tpJB)1mWF%4|SF=Di5~FV1%lWILJZ9?ZhVS*^^PasNGyW zm4d_%MMv0Rq|rGA@b1O0q7sWNbF2>9;k~VK$i*J+n@`B*B?sbwWZGi*gXOT+bSB=G z)DUwc$kKolk>bDluB*H4M1EH2lDcPtwrS&dx+O+0DD2pP#llY@VCF@e`d|rh7B>0S zWo8l*w+1nA0&_M1ycly-o~0ts_!g=lG%2dN!oHZX0|Zpm15a@S@(CL zY;P4C8GGw1etL<7%onkf3DjB=$V%~Ccp3|{E}Aa=rt|}#FCq%_iVQ6aY7wAbfVlAc zRkIvPpQl1fNUZrZ!~MsaZBo{VEasywXQKNE7j4P{_f=Y(KSC=_VL|gq8+4R z)ZAevW(nKEVKE@O@kLQ{Aq~ry-&dd z=-LA4?+O|fKpy)7$BW$w^z1K5sVrQ#k6vf!m;6f15xz!2{7yI8KtbYS#9D~B!`t${ zF{CzYg6Js$uN^sG3UX-sw5Kw6$>+)_;8*cHjcc{)BnM;C)qUn2oUh{(jgN+0*IOt) zg~b<-w53gKwJd1o*vDIQt5WELii+2}wGX)bu=Xp|*g0xC?FjV`3ayZAis5R02j@i- z&@xrsAI!QBhV;{pUys2&Z+@xve1A1w6+YG3Zit9QOFu}BrgKVYR8(Zt@`^gO-%Mn~ z{Hu(uFy3VKbXCn(wh=^-V|`sGn=$F1N$@#O%P9Z z^wzHcpb<}TRw&-wNF>MnZOvUXeQJnH30l=kaOF1X zr$7$Y;9_a=fKnE49Sb$u%8}3iJv?FMWZkWLz7%suBBU&LWgnKV>XutkurzNd4LEdg zrKoE>ezPXC=HN?~bgrL)x}u}zWe1J(yL2jGAH|OqG<8NF^5Ao=36c&7NZm5${LiWo z!8f6P=2!1i$NmRXfY#UPW-15{*Tl>5?RWwP)Pr>F>0CGBK$u8oqJ` z+r7Qv3U(}CbB}CyI3KBia2mLD`W^OW6mZ1*yTB}e9eh5wxu<`1wJZ%IPI+H~N;Q$V zJ#<0=eOs5|E6cMzP@+4YVpo=JJ{VU2FOK09_EPuyc&{ViI~>uQbN%9iD=e_`L98PR zepFBG;YQcclgCNz;bj-+T!1BF;1P8N;Bj(jR`y{412B-1cx<@=$J!4_czjf_pcqZ* z)t&-I`L=XzyfMxJUfPDVLY?Kx(1N~Xxlj&W9n6l4!J$RbB$tX241eO#A+6g1KH(ax{v+mCH&6;;HC|;1 zk_{Yv`;gmd3Vgo$TO5IPxzeo&3@s48QOhG^!JPIbT?iL&BGX!Q2OMkVZ{@ekPgNL++y{IQyutSI-E_d)2AJGh!c zM=1&)**B1{O*btJir<&O_9?k$xzZBB_}KqS(%ept;vgIJvpk-{d~brB^E;Vo`C4ck z@6Zij7WDRM0E@)Z^n8{A2&PgASBxN9{EbRe+W~1uBlM8c7_XOJ{o~K~L!H#bGjVLv z2tiePNaeKXn&%M;5m0)`xLYSm{}^@P8B{NqJR1yXLBRC~Z2(mYOxswIpGr{w(f?sY zXFAWqfa5ci)+9xs6O6FFzcDcJ{s(_mNI?Nt%Q@}wp%CQe)cA$lT^WBH%5r;aG+pjF zTU*Q%>FAI2t{rN-2!3YwVUfm{WE-_V&BB4KjJf=#BBJx`oGrB;G;-Pbv+LZKQ%bpEp5Gyli8n-k;l98$iA-ZIPN9DrzLmiHW zjwXalBrhQz*pdoUYACf>3%6TXvL3D z_63KMe}4W?pmdp?pVrIUuGFi;7Uc+%(2nf1aq(BC_)oO(EKl0-LD$D=IM%K{zp;uF z>9GC1cOG^j{S4)q!cm5u{h04Gq1#b8ZeDy5BsD0yP3rS>+Bg^w2pxvN3YG6b^>e@& zlpblRI!#^peEfx&+NT$#m?LyG;60Vt`AoQqg3m~^`V+cGKa%?rdPk%s1XRfv!1WeT7{T-<)~+cn)zQt)d1n<4P>`_JWFx zZ(~Dg8SgIv7H>7&;TAm5Yp-^Y`Ga<2@M`8vrwP~(UO)eH7eF!ikYQz~xBUf>B0#(@ zCIZx%sJd-wZgpkE7viHo0&GfPCH7Q+@rN2prv^$2xWMt>P^b7?OdT}9qoI86KC|6M z>}OCt03EbSQjfbg4AnaWGl*5Pb8xKd$WI(m66n3hy>ur|Vt?IRaV&vq5EImrEN4Jh z@M#4S4-K0y0mg|esFB=g;%wT@3(?tu?o1F{{hJoQs<@@zO~r?9!7{{O=%d&)XepU5 zBj#USJFn6LY4KqY#djl@9OFplIe1 zl*`bgYZ8Rb5o{c0lje$OGGH0qXr-b`{_V$pO1aR_Ic9?wcC_I9J0k7r_RVlNfLT?9~ z7mm^AMC04>nVAGWSta&?MNgyav-T>N##r0B-uB$&$h#`yrMg)Sz7S95A#zHjE+3~J8Iw}3!WFQ z+Jb5Ey&o8?HF6>{@Q3r4hoL6Cebgp3=8n~l)Jf|~IF{~aa7jf(7}-r)^zL=o1Y5|F z+9Q-A$nWX#UmwifK@w5?V(G!RvZJQd`sBt}J%bHnLH*T7do3Ve@g%}*U;Q%+dRGX5 z6ndRH%molTBxJuLIw;k0;aT)`%6~8-iMK*PkB21KbPld9d74*V%JruozgsW}*NLY@v_ZR73F z%}pKB9WqqL&X)=gtw=sSm$y2ERU^!_-d$xr;djtdwtqtW3cn3Klf(FV0faQ;g$UNW z3rd}ZG9{BBG2+k@)mvYh-BT9AteQN>>&v1gadxXzz}6@VdW<>Q%5q&0G<6kqb?Fl! z5(YMEG326uJnA(Zx`nqXe`e|PrZ@$PcX|r19<39oVclm2-8^1%7)PN_=J+T_GGIqL z1KU@a;)>N%^y6)$_*86I;vl@b6#*I2J145SjWx+w5jdZ>ep!%1i#}Y-XC5KtRnJ3+ zl(38iGvXes6p`#>s72}{Sk2#g*J_!ZqEA>h4AB)1(Fb8r7Auy3NA-ExCR9wq+bD{Z zF0YYOoAAwqH(c63`iH2Ud#QH?$Kur!Exd5u>l|D{Bpw1$4^8ZkVd$I6*ohbRYWbeE z{{YNkcjxe^pz%`R?f{qrVn#`+0lnA}D+p<+zdqaVc2* zPKO97cJoMkysku`Cttf>_X^zCA4#pexQ$_dqjOOnhtv)bn_pzQVG_a5g;{_ zgKtaY9-8X)wH0NVXNB1B)*DziR zJoH(pn^cpYT_F*(!65XY()2o%Ni}qzUDmJ699np#X(n)N?$V0v5^yHf@S*JswZ<*m zdyqA&wJ<%)QJQGw;)ZN)#CQT)7OyhQnXiS~+?#N}aeRChGxQoaSo@&3*%rgR)4<1O zUU~E%B^IZ|5^ zSfphHKT@e+NIU9d=cYa@F-?IBAl)#fn?xhq1F8{p)#n0OiK0Kze#$($QSF!GK=JQ( zOIM0(?Jhi;jDau6OXlM>dIJTYc`)ajf5l5C<$q&E@ zX_HD7H+$L|(XPUQ^`}5_33)9LuNO*#W%Le=)J8L{w}cCff$*f;BsVgu7B)tT@IJ9x zM!CuZ{+au(86>lZON!hO7mf%P-N5voXpo;?rer`wZ4o0IxuaL#%!4$4d5t0p8-_tXnF7S8`CF@OTquP4-jT`bxr32$7|xaf+cXY2s_LsV`wtW{>`- zOj9)AaJm)#dPy2w!!n!kgBNBo18t7zI@iyJ_vQh}^Fm1DOSdq$2(^Uwcg?1mwu$kd z{NnG~M%3|}87A(#6TzplP0HsNaO_i zP8vQoJ7*wRv#a^uPULZf9wqFd(?eHpO-E9|(r7sPLAQ;P1tcIC;FtQSBhWpV{{X_R zYJl4y4L#;0dCb@y>*4GDcI7eaGqp(b7!YG-GT{W_e9B`s}9SAxReB@zbX z29}{UYicnWX5V>5=V9%3Cp8+R$B)Ld)&3HM8Ir{h@E!|m2@&V!64$x2YO))sNiCkC zC5mU@ZL-FYHP?;o(4r*GmA3t)@PZDV)%55bDYtolOO$om>IiTTD<=0h@^0qhE^2&)01KqLXkuVlTpn1c>#7U(o^A?p)regmR6sjQo&-=QoU*wHm*rl)8;( zC!BS6cd|?GKJ@lwC4vfjN=V@m4Ap06Lhd9p&R)eYu|NqhUs$T~x&8=ojGyE488 zEU*8HKh-AdDwVhJciylz$bJEtDkk?qKE<(1T%8oksQYq3 zA|6R5`V_i8iAFt2lltnXc%B2kk2wt~l51=i85FHp#q1xZ)I?2foF8yQ*2A@QSa0T< z$ny{zYP4MBas3I@ zpEBZ8fLm?cWkOTfQt);r911u4da*9ryU$y#(O}=nhWgY*ugbUKt$l5CJ z4=i>fARQ%hrpw+%SOH+TDXp?RseH09e#>RWZnsv&MJ0lbfM*2ASB0?!oJb6EIGan; z%d-0C{>F6o$H;eG0cgSeHN!*TB{1hJ-^(8)lD>QfhV6L4ge;&$4roei7o?I0B=Y8% zO;9C|A_Zy$ofN;nYnoDXB0KQm4mgL?16C2}uV>^@RQ-+gm4srkejyzWawt;dwtpJ* zy^k%$FN`!SeZckU2d2Y>h=JtfcTlR~*9x~}T$HOxU79+XXtC*V?*h;yBN>sbpj98{ z7*t;CNoJ0mg2oYa*@bF-^-n^k3wJkyDnSjQ#+rn%XK_9h0e`R6xDq~hK%$?z7xHeY zybdA8#p+hg3LKczw8mE3sAi5w zA2ke3mirluq1R-Xna!7n=xLSpth|@|@pm#V6yRiR}lhcv3a)Wx_L14~u#=^uT@Kv8<|{@4`@6z}l#z#=(%CE+Dc51nQw7 z=qat^#Q!7}h-o_(%)Qr=Oas3;DTZWha}EnEw}JUu`i0K?;Ul|7mUuhg3Co@fTnjN0 zH`&z-cNz~`;YS|0!(IU1Q;0c7BTZj>;vQPhPOQ7P-(2K&4|)5f+ZcsgKBHr1<5swC>Hy`tI0PCy| z?#BHcV;|3*)di$MnHzv5O!a}p@edZ{IG>$Naiv=nA$~jNqbT0}AvTj$$@(&muTn#;qkBuS zB6ob#U(P~M#GDiLJw5i>Zlp|+USU&_~*birxkJr0l=FdXP zq);C1nfNqV$Jyhz+aeu8{Dc951==;pE<}C0f$u>tcypvyY(qcVKq}np_?4g!$VoIM z$O2`(`H=_QYlZSqz6kVHxM2)utx!aP8Im%pvq&R|*K5RLjl*2DqqwNwc=Y>gi{StI-m(DFO!VMxfkCyPN1pkrF1oWf1wQQ8-??ktv4NAxrwf ztK1SIJ_DpTYJdg{kZv=6FOCJ+tslC;sq}#q&8MrmcH=0e?U2^BjGd!ZD3@AQSRm#W zZoiy?Po{J%=j%=R{sj4CgCVHfLgEIIo&*wGx(?5WG!U9NV4X?{4@W8f9fu9T3ckkP$wJ4ygpN}x0d z;n{?ZsOQR@Un+&hQXW0^me3=+v_mDnKJU#Wx&_V6cDaVrNbmygV^g6`>I1nqMa-9l zqVs0p4@r~aj0e3n1cv9(MhGB#Y-B#`D z7n!Pl9SZ+*=w7q7IXRvcuYRGGo-!R#p~2;GRJKWCrT zkj=XfU)#Xds%>Ig`;C7_Xw8zF3tA{l=qx021F6Xt1W$qm=*NnKNByLgMRxOpEylDE-Q)$kq^yH;BJ%zY8Wo+N=%mF7)5C-k zSMm-1I;dooE!%JBR0#0x<)oBB+v0gTTux#-(x&@;rj4!33qqHvU(pXI^arkeU0A@N zV5w^&8ypBqMM7t)uF5FO;3b#5;&>s<8Q)UH*kjl+y$*BG<1rsOY)_9Ve2}QLhA}u< zhj(L1D`lLeG{OG8i_88r*TWY+t52lkJ4x3lV!!zFZt8A$`QE-=Iog_eE(n3_xcq#r z@X3Qz>w#tivkCKk2mO^2@q_GEd}gje?MxF5g%q3b$y-cg(G~Q~ga;f)$cgiFF`tsSA$*VUgFT*OG#oWt)+*+VxTNG) z5dP9e?y+XJEQYQu8uZWB0e($RTz*qc0qL(y1JlVEY+{)?WYUon>dY~GM6=II$SU%< z5>L}#?MnHWB_0ywI7Q`&qTssr4RPKh#Y$QKys-MWm43b#W~j%~P8qK!bQNBt`UTRM zK48}&e>|2wET*aD76`2Nhn4qsYwhU<`;XHY!QyAg_9tffCjn+Us1E|~KV~xCO;)2qN=8gQsIAEj`j9*9o>urfs$X@yRnx{Hs)E%A+hPA58FL!X>auy~; z8zas#QITa{svZvR-@gr*i6J(5g+CDGLaCKGoS3~0#bfY?GuFEkvHjHQ_`Gl8e*DQi z?MILlnsrL31c*MxDHw9>z$hP~fID>0UoXKG=|CGIMd%kA1yH=8%)|v4zM6_;2KMqn z2rQpl*`mlK`JqVUZl-ydo*8JKFXG9e{kE^%#I+P8LMzbJSd%UcDpO_4p-KtG7GkzJ zE?KY9j?zu4>PQQAWQ-K?-)CA;QN5GU83Q^?C&z{LCDZs;@H9OeqA8us<)klSXL-?-Qz8biS_K z5u9x@&5s5QkAvvFnB;Dn8S=m&YdkIjhCp?)IsWYTRG4Hso95<59Um$<0Mv_LD?E(J zJzX|s2fj`50R0foCemoUdD}BJ<>iT3G*bYG{lM4rou-EtSJ@y7-fL4H zkOVTwH7<|zD{m!Zjd)P>*TBLikMiesp z12d&)LOK#Lg5{wdM$ivU;1X1pEvDd>t>k5b9dljvtt_mL&?Nc{jHPddC|HSJt4QiW zFvNy^f6yYK1>SbdtGssBpb~8!$IqGBzzpcxVxk-49q!x>fRlj+pM54-3F(QyXQwfC zffgC z_uXqDnf<$bimRrzjIDRb%(0Wm#fu;SFso3}=&;NWPRjm2o~|k?j;`4bPJ%;lcL?t8 z9^4@iBv^0@?h>5f?(PuW2X}XOcXzw}{SWtPWOdK!b*4^L?UG&cK<6uaJCIqFG)stG zu%+h8dWT7P&))^Af^PNO6Phw|RNCb4AT;JUm8NlwWgf;!4}hQvkOz35Z)YnAt|VPo z?}0k9Wd;76;8y2t+c`nj9bH!ml1DA)in zhAdF6VCH686<6<-1HY{B(g&(X#)HkcGH^>grD=C7CRPvqDg4q-w*m&3gZ0^Nk@g6n zTBB~$yR^%`P@IhlU615h&*9NQtLAU*MyXld-!+uR5QiEZ@J+J%`~{VzJM_Goj9}%I zy>e{je2Nd6phXkerh2TZN8|k)$xnG<)7x|GhX%xYv&V^E$RzPhWL+-`ieE8{DiLA! zL>^Hw1SK&*2FYP7=C!iI5Y1LlWl5nkk08gX5ebo5gXJNLR_+Pn@0|;P5=?=ceud#B zK*YNWU8l8) zs!zwvM#N_?PDL9ddke2bsG_G&L_XXd80HLuZT1uty+{h_PrKG`RhS0IPmP=|(@L#i!w}P9u1GRAZ#rGc<3OJ?ZD|Y?;;;T(ZAR=#n8yu}X5X z);3L0d$6IeQBS^|cO4{BsmDnEh>hpYkcQ(;It-{9;*6%DP+wDK6)Wif@;=K(AZX&E zcoIKKaMRAAnYs6BC>qlhDGuo*TQQQLUWxecDDw%6wC|{x zVdgIQithu#o9n-Oq9&;XDgfw6ksl-zX`{-XAgeZ!@#qjDMEdJ^jF#KZR|Ia~nt#>@ zyW77yUW|&J7XJ~xeR&1Em9!K5l#O~-X;BJz`RF?53xYN$O^_1S2lV=G$K|7l5dKU4 z(~R!Vqp)cg4}g@Nu}Kp-6IkeqeRBm{F*c)CS6CE@g=e%c!aaMT`m-uPE-)@;%_fzG z!!-9%0r4MUw`$LEiZ9veP^Z$SQ8z)?ZL?B|pX_UB_9|h)85xgT*C*n_GIBAUV({jr z^Jh`?&?9$#UW8*q?E-6pM-BUAUNb+}Gd-Ti%zy5MARo+-W3A(83zitrtsrjq2oFH~ z0HpkCplLJ|9_8I|zEeaVAO5mDPzwgg(3iuSlCVS4bDLtK_BbnTa|YJ2UA|A8NAi6I z5k+^jUc47w|1j78!+7~$O8mbxv$((C7C7mfg^OI# zC-|T3EM>J65g30gZ;WTLx>|6)hZ}izxAeIWuHG19JlDV|aE(a;0TcO%ND>*^%HSjOyo5j4 z$dNG_E!n=lYUHH1b*k{uUYmJG#y$dG>U4{urIN-5B(Q4^;Ts3p+Y3w*cD%7{zM7&4 zq2i-cu=xbxs2gI#zl_xAshRBP$*rN~SB>f#g0lxn8~@7;S{`mf*!mAj_Qk-3=PMHu z{#Q!=i*EaU$pd?QZ1k9&kK_l^Rqhm=(vjl!&RY8_#F2tSxbMaFX@;Ma_d3&_)I|j< zJLkp*5bg&8SgiD2W3QDteDgqnS=zBVdG<){UG<(HR|bFZ_!+}ySv6pQlEwy}Re&K> z)$*P4EwWuy+9@hO3SY|Pb$0>C658rYD@SnyrJa@`yd;9LMr-<}@4<8*0)aC^;nA8B zJ4>zWc`IPXtd5%Fd-5<8Mu53O%vxWnCSKov6P+U5*R2zW&Vde?2`FsG7pv+U>I|** zW%Yr6P?yKNOE^Y|4Mk#>pC;TjXE5vO!8V?!{#1u{%^~YCSjvj$sbFMnts91W9e+vg@|nYH2X*X zzt$*giPd)EOz*5R+l^ph6MnDLD{tMpD~$2ws{J%To*D{nNj+?Nmej`_)_|%dp=Fm4 zK+c$*Hb@H#s5p)m#;O-SRPX77@m|oscz<3KDx$JsAfDs8&eEfO#HYS_@*CugR+4&FNqu-j%r4-7wL;2!V;_LRQ+={mC*ZPx?s{U!_u% z?DJK6`t79vr`M2}gdDfQ!DcC?AW@)_I}I(dt+w=PWIPmFFDVZOrW-YTNiYfiFXRm> z9(Yd)M@=wk8uGuS`WxMVN%pBEj8kwGXz-hK$%d(`)c&MLR zD|_3R5CWOkN4p1BQuhm2RsEQ+=ssk_S% zF2AXFz5@6bo#<;!F55%3W!nu|cnXK40_Qga3qs5kb+WNC`nMAGA;ib&<>J$&g+E0X z&$gc9en4b0B2e?|vnAw60lbkhxVbt2=!48teU z`DBZs`4r`Hur;)v=DhzfTm|YPq{W71;Ps%kW@_3G$Zv&E2CJhn@Km~flEqzrgXSWA z5?|mD1;|yNDki}aiF~2%3Ynt2Z3IK`W$_)BpX_PrdimufYO z!I4BL=|(F8DR|E4XXum*;aZ{5Wsj|u0x{+pud&* z2A9IE^T>Ip_{D?vvH_`?i|hhtI&VW;&G8d6=jkFuuF>&OO`jJ|F9X5IS%@qh&~YEk zB!3BvUZnc2yMku^+Jn_u{Zs{yM%3|Ul>bUm92H{EkwY4;c@xlh!P3G$f$nxIL;B1g1!syq*-rG?zee1&BXb84Fc0wfxowD~ zreu47i4sAe7W*-Q7x*Zs!q`uF-uOGw_B;;1e#cbit2|iHnYA;zdFKB9E7UiE%Zjmd zV`*eti4e?0RuLy}bkQ#)1^N#j=7B9%d*P%zxF$)B9N7o=B{AzB0+L>SLQ9WoV`67i z@ww@18x3Waok=NT$iFL^_qQA2!dO0tEHqC8?bNI-!&`x(Lev5O|G5D^1Ns8*sX>}t zO9?WkF2jKf9}fNT%J|`S9Gt-pZU4$sFPzWE`(bUvA5X9o)m100N%YUX{QC{4b-NW! z4)m`#a~7nFNXs`KeV(o4i4d)W{$LlsNP4^QMZh= zd$k*3$fvnsPNEdg`YfQ?G$Ts<1vii{e_bHn-CNX#_>qe(@{atnhUP7$C z50hdI)hf)e%+u}h#TZqXPQ~o2QQ|k<&yT(+VhEvD`E8gc)g1rW$~T6VKO@Gv&T9CK zSYo7Ce7c1Ypwfs*+J{PkzlVIQcWPiPm(zq^de&w9$DR%yG)7E%QC%W8L*RRzKwCrV zJ967T24m?^x}w_ZcyBWMO1c|!l>M~{ao0M>iBM2`_ri{V{|0v5 z2J^)@e2N95?cZ0`t2re9NC6_$W@h7H!jW^&4abiUeC(};44l8E|G8q^SiS9ahiw%l z0D{WwX@D+O#Bx!_{(3<|@+ zpFoyZ<_fS(hhU?MS1{wHfV|1uk$i{wmlroi(x|sC5=Rv{CAUrOd@I~P=Lu=yk3a2F z_>C+tLY8jWALM(yo0s#dSjf6+Fq5g<=s6HPd*YaGwgEYWW3fvUciT$Oe zOL_Hd^pq*lfyCO3Q4fz}Xm1;NX4P}G6h@&i_29p>x!bI3kpeSr+vL7o*njS3oN!8!yLrO;nRt^KYcmyl4*cV zX*{&mWF;yHw<_|@;eP0J`@Eab&VKyaTkB{6l-c(3LlD13c0A4dBy#3G<^yRA9lK|E zMPHWtIk#L9)}9vfGqvfeqQu)cwd!_@*ZNS#_br6k-)kFM-iBGd5M>|>s5t?zE-|sz zeiJG(RP-a{Vc|A>7hF_Cbm?N0=ay!P*BoD~!}QH>%NN~0s#co+8_0a$Vu`>Lf<&tq zV9sLLXl)rU=0#&w5tAB-gC|!;7*cp|^=zbdjX2^J4ii3BZC9cY-uFjhEm5Z`hW@X7 zqDB`-r!a+6abHLUMly1WHc%n`m^1dozj@KQL8kQyNkv0$aeG3oKl<~RpVj|Z6IOCe zKp!E(tEQyf#4=*s=164+LekopF%+dJZ_xDgjfD=fsK%sE?EZaDbi>om_JP%U0bw)V z)nhki5z1(2&Q)>~VF*p#g_YW!Q6W~%V-V|;KAA?fb|VmaFvEa|CK*D#$tg<6%$~v@ zi(^M-966uly$ARjqt{I^FkQBsY@Y}jj*556pd`p87o#y_;QWMnd(>$Ol~|+!*|6ZUlcBeFrpD{!=%!upEijkT)iOemE$?ul0d68_a{O? zxxYy?yzHu!Q(dAy_W|^~vjNv7@oai7_y^2`K-jg6>$>E2e9^A417fWBCcZ%iQB~oO zb0FwD-kR6wbtQifep~^nO$}|O0Ln7U?Q`y;2C>l}?18QNZmEnM!Lu6)PWC=qEc(WejSMj-}8vd#1RR z(vdsb{gx9)2NRtQ)!u3kWem||7q@$i7>&b(4swTrHa6H;c$eqe120}L7n(`CN|n#5SN+U*H03 zI#aIBF&Ne?2LNv#iQS2G5^5%`%9jkOUwEfaod9XHW69AD2Li_wzE@hs2}&atBMV^~ z50!Hu0FD=~(@5Y2{t_Q%C*yVn zShBYx-l_~w1VCyCJb96>5<8I zVLY$t2c*VOnx&1#*OXR>tc*R4BpHd2hY)C!uG^!l>Jb}Hh)qo3v8}N1c{(Ye6FM2l zfJXj#KGY=0J?Ma_u-6o4^r#zObP5#U?bfrq$)?ixN8{)PAP8}R-a==jWjFUXd=sQH zP8kov#|c8UtdH0xEeG82IcZ9%neb~Y3MF7CgVk75;aRh3rQ3>mjA=zrZMWz)9R#G9 zA922Yg|@I^=R25~M`+FId;%c-*UeA(Yfg-!qvFxXtAC9BD(O~T4mjWbD)zHW9r4VB z?|L!t(2IuP$2cSoW|>E;7_~Y6&T`4ixAXE_O1$m^0svIkX8VVoP#au7Ry>vMHfp7{ z5gouZO;tiF?|oDT-3~EAH(bRqjcaWtIQDI5Se+P36K4hR^S+Cu)B>-nGfxBt6k~$w zSI~R$|1z>g1^#O0tS@oEUzM(>PCz9BhaBU;-KPCPXu;;^e7b!>pnU*5H%Gq%GCe?* z4fyTD+wZS`!zVhnld2{Q8nBeG?mWIuH!r<;Ia9y+!A+;-aa90)6c44)Hs-E>o(INCrBcR%hHQqvG zYi}D1Ru8vCq0ao9%d{s_T9Xeu>;|F?e`8kisT@DD`*gUJcI-@ z**A9XHB!mCG=*Njw8B%MCtnmc%=bh&1v(2>FBEl;ELLOQF4Y@e1XYGQK4M^TejcV- zdBj>p$#A`8(jmvl_^?8J6D1t$=j|B6D@dApTwq#~&WawAI*(v+*ss1&aPO;iWxBsH zpFhNR$ldxwRYu7=j3LwPV8#P$UMB9qd}STG3JNI@6x#0@hurgL@Slc)*Iz-T#Uj%f zmQs`4n}}yg_GRu^hYU<&+ia_y15UBgFE*(PK>! zur}i0M2-Ix4Z%nfjlPX?gq^$)S;mmJH+7`8}D?hHv8at&Oe(orDCRmo&ykvkgUtS9)QJV|t4eEf={MiWahl zWr<iOifuyr8{Y_Dt@Ux@e1m#**9G2 zW|FgKZPBK2WOHSu_soaAtb6HEcN_h;%4&1_wGE!ynI}>OwY^CF3mq4Or*Im&c(@nl zpF!Sw+7@Jwpk!|&J?=%G+td*pEVqEVWQLriW6Ndhlph)ys^b;@<23R|*jNuDCViN> z@U#=Ny<@h#`o*DXH5Q-2#OH`K*Jkdh+i7t@C={$a86QHODZj>n_$I&R6hwgspO4WV}DQVF6!nF zaJQAaks5I|nD+G1rwOinBa(+L44LYZ>j?ki+L??mf@?hb{R2#M0={BtSZ-%YZ|xaj zf2s`>3O00*Y&EMJ(#k_wf>BFetOF^5k*Yu^sN2;zHI7^&&YR-i{{w}(+j`eWefmW% zil=6;?LY^eCYNbUU0!OKScc5W6nskPD(itZ`rDmvO>xI|yRR(Xhp;f!wzJR3N5%u2 z;71n++RE{%f|=a`f<1!EOm!E&BIy*d#g{5UHX@&>`{5{wcw_lo4edQ+e=jtD>E#m` zXMT86zo<5();J1Qu=2di(XPT%VR)-0(xfMG{_8f~2DSdhUDA5$=6M|wN#jT|YVi0& znO@3AgO!vJ=PmDP!GQuvh@|h?kqth_G#OE~h5j`-I%x+$^IomRPL13_X=ByjKEr%^QNYO|Pdh~OWyh!a`eHNr+zjVcfP>rTwP)ZUJ7 zXCKcg&3^Mf@Cn~A7RN{4O(&7XRAwVmP__cO0Iyi-g`ndFypDTo&S02K=JJt4xbZ_z zy+MC)_s88~e3MPqKr6cC{cMunJhgo%^{?haNpxH$SbmH1$fQ47*aq(kV}wA8>ysA zbJGOM{z1^#*?}xQE>OhT*}}4a=iU|zdF(RS+t++xkmqdIu|)mJHh4j5{x1>-D~uo> zVC<#L=AHkKNVIPY{oy~la?FyF0AR9JIpbk}^1%j)2>Z2&3E_z+(&+hmiaVN=ikR1o zEhBxIsm5jpvC>m0U=t7hclLy58wQ?}&S#g!M7vMU zhZJvO=1LyE2MiMni7UO?0uqBkTNQFOeGngg&WsSFoy}~!D zT2Hv-6~c}b$9*2cDqZd)$@L(m{q4a9)`~X+Eqc%VHfYqVy$g`gpD0d?-+G0zRc$cb z7H71UEp6Yp5Q2Xb&58SS5_#T$NAuIOOnObuf|Nv!5*(K2u=T#H zz@yQn(?GT#RFrkH6><&N5ZyWXlx9|2wl!JQKTZm6;| zDy#m!yc3h`5GvNY8yW3QmtP7EP{0;tACrKa#9mEI5kAwvN!` z%TZ|*OSs9CEN&)>wiF=Zya_4p$uuzQ2HjejH?G zxu0Di!?yq6B-G{mNUx*Qq@H=A7Eh~1!C3DP3*&&xd?ZFF344004f~0u*Ux&{`7eG- zY+PE<1<^EMSQ#Y-o^0t1h3FgG{-&>$oZOe87a({buto;1RGu%ImEF*(`oT3g-@Is3 zJ@p?&3UU}Z<$0CoM&I6wP(P-U05Dd@rMI?w}XV zpja2uSLvnvX58}+?3cAAPYjntH>GO{?+N^=V7Yqg-ur{9BNo(6V1gqU_W8%5M{FpZSq;63-*GLY z<6Z}+&zAiI{~Au7$5-l_(h;V#N?CNo&s*yO@$I7~q65;ibiri?h03NXcPJ&8NBfot zuOIj0eEg-XFFPA-43TQFpZuViY2?je@r$!zC35}4W$0-{DB#OFOcrR0%fqa@@H91` z=|$`)MWCGmYD8eZAjk%Wz>|r)e?4*<7U&QoN);ZC^W+q z77oXdJ>Uye2m~LOtZ6`jIX;afW$%>SUl?$Cli`>Y- zE)N4aFOQxTy2Gu|A9wyxr9J~eww5~v zNdI)k;*yPAn}f)Py0<|@X752SF8Me$NZWPLl}7eKm6ODItFvDD6dY;ri!*wvpB{Jb zyK2#Ukifcmk}3SD7s4S>U^+Vg%ShzHf#D5aDXxG zi3Wn7_aaV|<>~C@dw%B6jS8pd2_qHajY0{>=OLY>`SNj?Z;u&xV~HbM<2m?eNqrxS zNc@e_9_Qt?D~?*BZUX!kNrfq_JKtw31EQ*-Wrn=8^=ke3CUiv!s{xZpI+m6r!q%@C zHWA>_et89TDcNoZR7tuj*8E#?6AZ z`o2$+s0r>9LdTt+0wE4{CJxh!QmRD8P>96B_m?kUSMb!MEWTHnyUtbUk`lL?F3kIB ztJAutm`Wh~^1FIuUwoYJV=f7Qu1l4gN9kV}~3PB-u z8Jyerk}g8mZfLVCLo7v`BD2T!iDxT8{z)173F(O1nUB~2?>hut3p2Ww@H-gC(CW&H z6(7Vo*dYO(+=@XnDvp)ciU==UobnoBAWm}ydtxk{4dM_amxGW@h6Hhb}R9O~=Is-TbX6#UG(EN~|gmizjOB;pN=-#;N8TyP zoR%OWBJ47db6(lTk`{l9(`m)r!!#@Uo@2p1yg`JN$a(xd1YJr?Rv@-pVeS{oZs|

tIVP(6s{XQi}o||b==DeT9;Jcu>T#_6Oo~=tHscaZ-nvX>**g9 zQFz@G#_gETegQDbc{1>O`b2isa7F~vf7K11x4?{cXy_`xCYM=3?!i{yLx3wi9Fi@f z9SvFxzo0}1{u^s^g8LMlQKa7|I6b%^%udFU#K=CEuF0<7%Y8|Ljp3AN0-kh`5CSSH z^A+6pulVx|b93|^m||jb36{$qro}w18Q+^#^{X~YY6v2W-W=BK&70@q2!+LHpK68^ z$?xmw$erq7@W=8Ay9>T)9W)B!W2C^qbUfdkifs2UAt`HXPsBy)7UAWI7}dwni#tw_ zTyQG3BWzERqoi-2DE?v~#>GjT$LeN_{i+7eeX;mdfPXf<4LCmZ<@+GGiy;Y9-G&I~ zji}o6_xFR^ygHS`Df>|CbBka-JUpLa0J*t?gE4oOPBzo8%ZV1aS-2Vtt2d=)b*E#7 zur}*7KfZGN%QfPL_*MTwLtNUqm)>CLSnZ~dF8$?ly6ox}d-R~j3Z)HR9?kwN+mAIJ z_H%}9wn%u4h-Lg6%{JHX6lAZP*%=tbi21PL0&+rqHmURm@78T+qtOo4V8AT|1m6kI zCbQn=#L2;dA|Tv*?}3`NBOOYEy|z{xARZtVoj;T`rirKZ4B_u`$Ry3408IjIyB~$X z4ri3&_vtrc>yASm@Z&M+`@Ji_;oxEt{r>%l84Vidd2CkzyeNz$iV7O|X@6iZzNi`R zGeW+sp9d1UWt4*V<$KRMvA^J;t`ntyKC)5NSOk@Pcb~nuvA@4LWi#!Mj3MiVq#9}% z9u|LJEqiP{=G;{g7OnX#{yvN4adB{%TorZ!>*puPr~{3;Uh6FxUd^DB@MdFbsJ>!Vy`}&j_^Ha`&%% zpTonxl+L19fg$gs-H=QL|R-me$mmbP4AaR;YRmtrg9u`ZB80K zyT#j>ku_2*lm`4VnRon?SN`DRlR>=&{3XQiCHF>K=nws840*0>WV)v2tFv@kx%@9t zETt|VjhWP@RL+`pVJ|PQrOQH@Fb7_DUqN6k8m4IhXZT-Iml}x&(#DD&0 z*@hJBdO2Y=`=s>(xDMouxFo5%O3&||{?LbNczf}5fYJJW`=eX=Lfk$y;uq78g;B42 zATN^}jRfkU)(ESCvVcSFVcLDA10L$lF&&xSq#cb!I2QEE^YoEA^XCCI=M4c7_ec8> zBM5Pqszs}BF-)v}4`{{c+5!MkZ14Xx5`f$PWmem!)$ zG%*rtO$lWW&6h2h!1LclusJ-&ZUo}@x2PS~-IlwLE~-}tB${;6X>hJisU=&SFd2hZ ztLuDl=rVG&rFmf9Ks$=rP7_x&7C6*m;^Z{#x)?!j2Q^vyYo1^UWn zP^H4&3PpxwN6zwJ9@kKNdzK@#)S~^tRLsU63CdVAf;+}PGNsxi*HvRIs*chpB=wQ} zD+xts3GF6noa^g*3h$C(XsK9E>cp&pS?&m`5-GU7vs0#;+i_ay!C+nWrTEO^I<}b0 z$yfL=%eWW8;DqT`Mu5C&&+J4~C=*XOo))N5v2g&G=~Hkr7isjo`5VQ$X%}u0tIS!>9`~*gOQF%Y{T2jO0xB)$_$el8htJ zDrp(<-o4JAQ1&;k(uL)jDkE-byPd$6EWL?x0cvN6YTObBUmu3#q}%&~-^^F-?CZF> zcT#)Wq*XHzxThyXvaTOzH58-$(%|TZ8cOv(Ewzjr)JiF>64&#z`h(9Z&AA{By`W9PRbwfTH`Zt z%-t%yx6R_To+A?rT6*E{Jjn#hljSJOn%~yhSPs)&}(XUsubwNVqTTn6)7(obXQpfbr{t7!>_Z~td9(W3D>(PW3pHxgjw1#8C~Ll# z?(;)>togZB9metU`u6S^OK3!JvV3LIUcPYdL$(!)haSGn@&Lz^Y8Ht zAQA8sr@)GmV1{sUmT5E`xk;*&sbu~&P&M+*D#bkEeK}eqUi+HEs3!Mr*_{zrqA%1O@cGKkLa5O!5p~~*9?d0MQL~s9AwvV1MjBl47(kXwv z&%_(2&Dv_a$NhF}^yl|@I9fQ7AhMv4E=gl%|I}4B>oU=bC*(ubI^!{|xb^n$15t4B z@;hLlky1vyhtf1^Sg^-A1;&>iIFCZ}UJG;$swBZk?d>8%3qxMQNRvauRJ1wlI=tnI zIJkUBz)!xNsia3{O8sj8K_+eE(m;JYk@n)Qzf#+j2A^lw_A9++qvcZVXWzxe#p>$n z`g%41Ha7O|NHQlI+bFFfS=Zhe7-344^gsV4X3!s@VDsbW{Ls0vw(N_`YT&$ai93p2 zq4#+PG~H#V_Bo6j_`iMvRFvTDbIhND%?vg=E{~53NieQFTCG9jnRcgXbYFM~#?o>f zOz5=WTwHc*!H+F0S0GWVJuH>=@<_RIRK2d+mD#KyV2jkHuC(3W!lgsq9Q)q(6^FUM zr0z$(+VX9fJWSI7`TkjSk(Q17yZ5V!LrL>L)PD$tl_7Z%zqz&BgbYHRNOU{GELHkn zT%Bc59MKl7hr!()g1c*QclQt^XmEEAuECw)E+M$PyNBTJ?i%EE?!B+-)#C?M@S|s1 z&hB&8`qn<8D~&v!7@tEzs3T&BQkY7=5HsoQI#s*4*0?gHFhWSWb&_M@k_lca#D1zf zZ}us#w%OlzeN`U9D;ZTTKKyET;p6)D(jXx8Fq!F$c&{uDqaPX#Q~>;cbK4ig!Z9R5 z?*8F;8Kso3a+J*;=(sFmpKx7|bIlZ+o4fzuczFS$dVD~Y0XklI80ytC7iB^_JA-EL zOu^+Xv&Qre!KaYyB`$0P|RhFv}STvjTY0nlDbfl)y&952y4x>(w+Qp z!M$Wt@Bx?Vhcl4TgeHSXrz@#r=A)y%mAcJES1B>i&uM~$iPQmobkSzyFCJgu9fS}= zvAnJfsiIiPi8+aExD$)TA-Xvsuz}ySl5DVJ$sxRcUB%o9lznep{WZ8LW^S!mM+c03iX+19(7q_V}85@zf`8 zqUWPN`@-llx=@sX={&TE%V(~@AatVdyQZe5{yqKy5aSbD+h9~w4RUV<^vI}kqn^O{ z*3Wq!MV4tW?EW&OrC~3p090~DF)S8MP%6sJOIm8I&X-bL&M!+LD1&u2>ZIn#92JUY z2+4;RU_h{F>%ua~)=))E1Y)fk{D&5VSb8f z(l2A+rZ29V z6}VK|%e1e(V~mOejN*#WN?g;W+PQEvPo~i1c?zNge1QG4s0Feke48x2DZA8^nBjNbFyCx~IVVkxwGf0NSm!{P z$ZwRe@-dvjM?MsnmgBNLZ%DPUujOyi8-4GIm*xs(v!j5Bcyj;|3U=q@Q(-ZAF2t9OpiZIF)6HW9W##5JYv+e6<6u@~r2dLea0p@^ zJhO9Oxj8xe-`~RIk~hzgadBs-;kt-96~myqV59ZEVHnaIq8x(t{cZeVLP6fiqo>fJ zr7Q}N#)e=+1UxW<6MK9c`RsIx1mTWr4Pj>T(y6>h+%-35M(8U zpa`r5LgAh|-fqoJzBWqy@G2`o}>WhG)$ zUL}*D1)A>(4iipdvGk*&%FUQ1`Xvk;C?8~Ehz2s`uSVyA2Jyh#Ks8pdAQ@^Y`9Y!S zRu`nMB-4o$%Ta?j{8>qn;9#lgv%H`LfJz}MG6xQ+hbabMiC}C(y3j`!EHj!@DbiO% zqk%wnkP}V_?VxpVLN9$QB0ztMAh8xzzkn4A^|HLU-SO6p-u*CBQo<}_2d~5UM)$i3 zcu@9+h~6}TCx=wH6iygE9xe>EZw|A;+GLK}=S}aCN>-M~5lqb2-2)+?LdyI!go20w zwO9vhXLoticYhWhPWzIusQq$-s?#LZ;-ua5D-Jm~Qtl0+{}IV%IXDt~rk?KpOx(p~ zON^YN#vJwKb?fxI?Mhs-qz?I3s`v~i@$*qoVx9Ca8_Z1(Qskx|@Y-NYkiSbedKw}l zxD)&gjl&Hi22m$~y?@@U)?}rer7%B5g9eiZ24VOkh{(k#2z5h)!Xb!73ez=%{o&)j zFUCaPj@Vb=;VCO)p8CRf#e&P@ z8YPRg)C&Sk6w6A*h-o8sb_tC11F0xHM&AgYlhol63NlVg?Tl~t<7Fdr`#7x)lYrI< zgG|VRg@AIt#s|1-0xQH?E^ahcsDTLQt37FK9#zK9<88?kD1Vpe4m6%J`1ZF`SBu8| z@249EJy03XRHs44H7y?Km^9>+h23H%{)*69O^t(**oP~)!p0Q88xE>BdDSre5k(}} zLU?!#*Z5qHf!n!|XZFI+bRe%E`9d}eI|zvxz{HE91rajmB;aqHt5hsEn@pe3#;DfJ z3r|hTtTN1D$bN~jX{}=NK@Z~S;ZAK+Xd3Ad&Id}*83E5iEB-|t@8AuK{sxgX9)_We zrrv<#VR?b1X16D$?}ZtS|Ckjb9C{<2=!XFnO)S)PJ*FR{j}WzDZw&G9d(^-@HZq}K zsdanbW0sSfb>0LXs`_eM*!x>hDaJIC4CEvk0}6xx%8BDG?P%&kMrc_X9v{|F?7>6{ zt%pahk+?=Bg1I>!!FDy>Gm5AeUtrMHo8b9c-&iUdLx*@l!AzI%{l$>*d-tG}HNoIO zo`JAl3mC)-wG8I&_GQg3(9z_yfkQ|iOZ(c*Pf_>O7>V@kL@E30)PF9XO2;sy@}sKH+xPpkE9O*0XlG|6 zk{BUhpxA;8-Un7zp-)e1ESQ!k?bsyx2q$}Je}eT2s&^kpR*oIol~zZhxg5u{^{PqciV3u)a>aLP6Nu68UIRS*e;{Jt~m zqtR_pvSITCUt`Z4vK(@WxnjC(iLz`=ZNMO#MqD}&34O2BMgQ=N$NQe~OJ{$Xx~aTM zFN>(6^q@vAKN^gT|AUA<$RDi~broat?VjeG{nieOCR z9vW!-#VaJC?#X)2iraFP^~k?Axu!p{#Z1Ni<*6HM92|prr|0#gj4R2yEbw?Ga6Q>~ zs{S5bV};%6H?Tdx4$DB2LI-me(_-RCPn4!@9=%44ehEI9Do~@6%=@k3a{ir|3W!Ub z>pwp2IaQBO0F!c|Z-c8X1g^5a6DkQ?oi{;+a!i~xsbnM1fu*r-8ZSG@j=tjkIEgB&>9-34iKjxbwgXD zCrS1h<^HmVvb~9I0{--LUalDX&TZ4G$Sygh5AeH|G%slDosxZ_E!1C|tiw=ASid58 z2ITn+h8=I{YGbp&(rlvlRYK|hEs$w>vRV{7?RB~ z-e)$xcIa2_vQ96BlM)7SNzZ%6DIk_9{u21E480hP|9sv&H z#d?RloZKi?A6Y)6gE!Nm4m*uXtDs?`A_cE@MV|IKl)I=GZy4vx&S`1g$0#}z#R=l$3=QrDBm(z`nF$Qi)%5pI^%AAm=a!b1y#$|KI2dF#MpjPt-m{gL$g$};VlgntchGbjg{Ov^vd| z+r2dF=`P{n4`(H}4Ds=|HomEr#Ju>Zs7$ZMQyjTpJP!^j=N;)02Z2esutlU4#v8C1 z%+A&)ZmxBzJ?jz^6Mq6ac6E*D-}_l(8ClVTNsl`ODWD6ZAHuia)CWoBT{(;r41B<9 zK&`vo)8n4;U_!)b>cdx9>yr$wwf6R9Vqrf)?#5K@pLWq)uomD`Ihn=F>%C*GN|;9L zRC-r9P;=|{AhNdhFp9eROdX9xKDh;t8zUIf*=l?9P_-pUa_%;=#rf}iDcj?05n@e^ z{tb_|+#$Hq^4Y=XrB^!EjMIv<^-~q2=akCbG2onU)%N%2+8bb5IvwPSBG4oc_3e#0 z!7}jkdmm5dVG?qtx&6J5Xl{1%Bx0nx^^f87rDnD}y*nv5`IE)#RWD;#6pHM)kHo`U zP-A3-CzM&Q*Q>k}dtIXe^YA(%R-YflHzGMog9g_X=YCOb7QOATE^wQ|B!G?(Qs8yx z)#Pz?mX?Ntq3FKs0ueJX)YRovlwNX>%3@&JqNK#*EX781G=ONcTv!(RaAC3`G_cj` zYO$h~F+M(iI@juY9E#kuy?tzbilGV3+&?hS=Ukkg`l;UM?ejzikBbZ&x7Q6d3h}H) zU!6IiJZ(EneRzD9O9Z+YG5PvrzCRsoENV~SLj5T|VyLNU{CnR}rqBOb=zV;HWF;x3 zorqs*zYEik;9C-)Yyc+!Z!$#Rn)LF*>i&z94kxPZqovx}YRkmP$Us`;j416YHqM_8 zzgIZV3#GMI?P?L#?W!&`5`$*ajKRg4NaiA?ELJ%_6?h;_(du@_-DJlktH?BnE7JAz zr>3YV@$qj}g)sXy0!(UJIfbkY>-$zdDDZcZDjp(kq_sBrL^?XUvydS9@Z`isE~twJuilf2vsYMxSmxZlqGUIr|5ruQ~c!-H6eM zRyp|F777039~BvN-CM2eF~-WZdeo5Tio0;Soipl<_5?+WXw8;h0Zmx71*-MiEc5cB zInTl4${g_)>jzVUKe*H_n*BP+F)SS%BD6#?;0VM4;}R$ZQG>NJbjTE{;z8|JAT%p?3N@}$y4uM=R3p{7xfL@87a)#} ze8;af;oulgFIX0W>O~dbH<{V3s(%M$hBGw6DEU-VOB7gj(-o@ArENw|yn+}no;FLk#I9~fwAsjG{GN2(Ya5+casev18=cG7R_kOs6 ztHYVd+nEAreb2{g^0l_6_9%<1Bsz}mo%H272S>ftRU%jvMx0P&^-O-J!d!;rg8@`x zdV`0Dg)}zAqIlAJ2hV%CbJ3keyyf6=!t_x3I72m0KXh~U0yHbTlfz2tj$?z zs`>^`&AV43phCxfeSfx6yQ7qqTfrc|(F&cXa+dt5YJY*1jBIN@nc)oKLoWmQ7tq1Z zkiS1@$?;;{kX7R_H$5~jFK=WjXQ8NJ>1;i%xQ|5W<^D`JyA@#pfbn4dn=y(;kGk3n zV5}?%8jLO)`SH=dad`ZS~mc1JnpPkM+wy69_oUW$QJ$bP_*H?wFK$fJ8s>FUjBX!^x zgqsIh6purNoPy85F|+}JRUf4PMZEScge8{Og3srb(v@4WUy<|kmagtGot+RMjp)jS z#+PP9Bhckl!7<{lz63}Bi*QCNI5cJ9l;R6?D2Pr3c2mSCq{SZ~FV)l%A|hn73Fhxj z=nf8&aN(dn*VIslXr{v7f+ov&H6p0u2(YOzBD7~!eqGbzsVTm^$`(++@Bo?Ie{4J( zFNe37tp|+_J9;vQ-Jz`uIO8m^>5_^1sV5_L=(B1u49ziiXPbe?GNFw77c+nT5j#QX9OQIgGSsN z2{EzlCv8cCfDDnJ8GYbVnK^?lze?>Ia?rrGvx!~UNOu#ufmUB}+voi19NndpT(I~A37S%w}nF;1}3dEsxJZ0#Qs6Jr} z?ezCoC+D@aEWb&n(Z?r{dw+j>ZL1udnqe~(Fhtdu|Jkl-W@cj(Q;}E~T0@GQEY6XZ zA@?2|6Vph?NS#7~R`6C0vXLMk*cayIrjr4NkvtlSJd9$+2L+AP_mVl!yCAJZ?$G`s z>TlmZwkT=L>CDwr@!qN&9DSPj1v@YRt7^+y}O^4U0Q)U;#`MzoJ z3eyPr85`%zj@VYfG^&&?4(TW>|8P$fp+%b)!eQzuj&R@%`h^}!8`9U&0k1h0&?nVm z#gl?55`$wjf~HL zQ!1=*XJ`>YHkPD0^vkl;5P}=`u5G)`f7N&%YK@PlIVh)rEOmg(lFQBf_@HAsq22mV zB|1%No9^+^=T;VB-tED0gFgXD(wG3+r9(PoO(*n>1VRh%rcbB+pzPWy&gen*Km-eDYBMPTUx;y>IKI>RfBs zdXMl!&w#doW4zfUz6DD01mg5V$mT=JJK)6?*tItPhw!Sl8uWd6Qj}&EseYZ+Rl>VDt{#K_p%E!uNxa1 z1zG$#L01oLwoHjAXw+Qz;&;lpOu%9c#ovMP-NguK)vn{Yy%{JtYWGYPwb3!0 z1J96RCDiSFP3xnryt5TA>JqF|iuQ5~kBKp`CWB$DO}_g8tzT+bhybbitw;3VhmV}B zQ2yPq*;7tZc+O@~2YcV%)eD{f;2j?x7Fw^y?ty)nBV~oEwV9cjJ*}W}7pZijk(0-C zb#$bxw21XL^W-|!e!uUU#b0be=IZgz=EDnP%?tgB;^h3)5Z7qoGxNu~V zqOQFF1&%HWZoR>p1*Hd32_|7tzkKD(7`=(cW+y4hD=OQrzMarqVCGaryN{xXyEgB7{Iq5Fd1wmfEP_}?8}=>V+zY@gYs;fo8L1&o zG1d~#*RLdq&6I@A2AwY{6DD7W-p2@ z%EM#OScg(M_t+B!hOiW1&9XZ?gny5ynQsiu>weG7!oOBx_tba>b5f$!2On?f&m%i% z?nEnA9PQ{RZT&QLxNRuQ;oAZ*`@AHUl479`MWfW^bDWeIXb7PbT(p)}UW>8a0SE zv@-DGF}np>?Ch?GOFbS{geu!!H+v3}VUOzjv8n4kAI{6UZm(2*qnnzH$B2vID$Zb# zn!C-c_4}e)TWw)cl89v}BMFvl0FDbs@i7kwwQ8XZmD+ibKpPRbDKSa(*RJEuW9CH+ zEA(%Fa5%11eEkh?uuekpciS1G7NbU^Y_LInrjq|ls3@aozxVhta-iHXiq0h^tsH55 z;h^98-%lSgn;e%D1U$;#8<{zT2d|zSNzC1r{&*~H8r|z{c=8_C@x;sA7p}vA;fR6-1y@5a+&87 zL*V06&)OFApW3T+_BCxkVE>%8dgz;0NciQ7KUw3YYj6Zb_R`iljO}rWlE-TUo(0%m zSG#rF8&cBq;Kfx z?=D$vW!;W_tvu_R2bIjsl%jWjRPiiT7}rRS1esv7$Z3601e!oiqg3iL%<}5q0gbog z>&#v&=Pyoi{fGPd=H}Ghd~RuXbtgFOW58zN6|*V^23y z3ApaflL?PzIr9>(0li=~?bn`dRR0L$jH+_CsW#bBUo%U6Mb>Q~w@tq+0QNaRdA-fc zuY;|Tv@H2~L4^g#=$wxK)u%^M=N&I5qec4$5kmq2DisBYeNaAjTrWjVo)^j{tIo~> zPa|Sfx8dfQZgU$XT7<-1W)_RzlkuF9ATvhurcHMqD!>YtPm5u>^;)|_2k zp+WI*!0`H#ABBtPWK2%6wMptc%tI6$(gRSvIa3eLZk?F;WyxgXY}3=D$Bz}CxVgDO zap~~V()|M?O2WS7Zk(9iVW^S%?%$))aj5l!@kmHE8lCpM+1PaO*~eiBv~>*-kvQ4e z!TygPNenbm{&K1Sme6MB50qm{7&^dX!-9r{bQwD$OO>g1DdPlsed5?}2=$LEFm#!5 z-+0XPxhC*x11-5#d9>gEl4Ni(92)EDhQLyz85S`Plf@4GX!Uu*W11k;wK1Hwdvr(- zf;K$S3JKC|aS{cvad-FMznJ7{XiRR*EN53&+c)Yh>!QEc=Wk(|?4aN5Q}(A zT^vXgG4?8NhW*vP?yyLD_rvZ=X8)J7|GLaMpt82r=RvyCP%T?$mi_uXp@!QcXJS{Q zjk~srV?Q1dn3x@t&VgM*qb!x@+*d+pYg7Fdaed6osO|2oVxxHC8bjGnczg>=qe|%O zo6nM?9uW?cT4r{hXuIGZ`&jB&6ABTprJ6>E7wqobebks1^T$fjpa2T#(HL>@8oR9R z`dUHwV3I-9hnijam;dLh4NM?k54dnf8};oHQ{F86nmAo_Y>!v1F;9ITGk)FMb-CSF zY+Q}c5cpgJS)WB5YK5lKWT($*NiL5Vg2yh=*?oD@;SG7bkYjJCa+^AvLGuy2zyWc-scKYF$ zYP7$TiDR{)&R6QnNRt8+_F-+Ze|I)ZvjVGPR>-gaaORrh+r`EOr?VM9M_n*^WD-ao z8L9F4QP1mHgRR%~me%c51xu$*>x`MtbFo=y^@5nI3KqAm8WI(d^4J?Il0^w~exA&Noyds( z$jy9gc9-7mV6wANsYBf67}Phi=2zMEd^)>tWTfD zGm3ch>R@!6%3Ia?RwTKxQhPvMP++JIM`S=3OpVK&H zRqeV#kAJPpjn0inCQqOPAzT+6Hl1 zyJhn9B>eUKd%s68l*tKyU6cVV(?kEOYA^!nrlFX~zJVy9_c9cx-JjF-66MyLgOAk> zS+=IP3KOI2tcF3q?8F%=tOdQZk>S!B9rDP@pC*1BPK(xz)piKpU$A6^Iv?a4_30tl zYn;0L>9k((CL=4GD}K`X%*4v-Q6weBS{*r5VQu;3wODN(5I_NJjKN_(-O)y#A6#J_ z-xmHaP5~~mL?Q_8izHTpjsD=vz{6vDd)TU_HFkHqUJ;KDVw|6=F!_qj>-RE|o4X#A zbYHes2P+;*OC9q2XT(61>=@Mxr<3u==D0=VqAYgdyYKyWq!u|zSih@co9wZ9gJ4bi zVD`4!Pft$)sXAw)?tuMZF4gFdjCrpNQZ`lo03-f@*Dj$`HZ9-BYOXyxvf%SU8N` zr%R&{-N1$lzrJ+(_)TT!HrY?m!k?u|)=+B2x~%`fi3sHRlESr&LbUkP)7f|L(bSmj z?HN%%mHvzG{k_MFwymiKcnh|h{68&7jEzl$D?ZKS2J}WObr34&7(du~UV@2~k9qD#^bi2Gf6tUHIku*X?X&C~SLKO;pFJSiR|@tY?spWZY)W z6{O7?PYfL)gg5(`yS0VK1_rVyRx=AjA&3KxfO*EdHHr;P8Gdjn&!v4tb-yrP;cgLu zgbZR{Yjr*PgDj$FyxNiEZLP3S$Vzu;%3p2!xqhCZ{Y^^JqjwJRODL;^3R);qeANRK zNL~HqFAIQw3giC=HW7obuIcIUwwMvCS-Nag$)>f|8PLS0KhP@`CeyU zy^mixkL=C@PV_-%>bVQWA4pY z_P)PgVL(WDaqUJU@(Z_}6o`grYo7LA(d_0IBUGjs?H> z(i;l~C?8O@LueMTCy}!hc2sZy#Rq}zOkV9KCANvEDF7f}Y(jm02J(8E75RF++B&^E zkn1@jHjF~wioxJxVK0rGsrTb>lQlBKf-$;g7_n}X4*i*f`Qhaa{F#MAYnoC_y$ow) zYv)ltwsIMOR*Yml$fDnlasREnN>(-9N?WT>l76G~FS|$SC?ga~SVL#<@@EY`!td?u zTc+89+VO^RkyCd2C+zRPJbyqo&+1kU>$g}VwWXPyEi=!uu&SiMLdd7&)Z7jb2kN}JR4-Uh)g2|cO z>J;)2a_BHmx-#~QwIiWU(QU?5o}_(mym`*tzT<@!0h*B?(e!DKUEdA-F0Qy{vd+~85YsHOl^4GW5cnc*V{j+&bJB-w^GsVP9y*#(xd{^5e zuo-~23uOYi#jxS!OU46k zt zn;&Njnv8=Y3vkc0xeI@#K*F56b%|$lgeFBRbm;I}eU0?tA zuju^=z;`1e=9*W*MG*6G_fQ_$=L9ZN#Np>F$C;KT7B z$+48fb>hZG5N$350cS3O&%najts4ZCekt>>xU(9?fO3BXY!%VxNL-Oi9D9SUdH3fw zAB`CVEu181;BqBF0hdd}+>pB(+Yk~qKuTot{)&B}{Pv7!BDQ5$)Bbta_b+!&g(!`m z9=|JE=t~LqNM0D9a+j4W3+*Jj0nJCt>G`n>jj6~k3BYT|^NQtk4P*zyam4F?N8E{b zB5>c1(?iu{GQ7qRjaKo6d3Nd^4J3u;os6V|?ej9yW(gmFgYPL(1}s-g%cZ+OHJrQP z|B!dP!E(t-+m>`h#MEi6OL}=`j@zz=n^DJ|ac7{%5cH=|gmrXB;kSy#?zlAz?I!}h zWF$PCqtAigZHn+8TAvSv@m^uYDSJzl;SlA07)O;bh8E}L6l!Eg9t{-ce$g60OioVE z$Ll8~2!)$dAx@vS87y^heh{Vim=-XGBB1I@6a7wp8Eq%;ISa+0bPxEjJy$oBow@8{}GO$_JL z#+B;BlnZ^yotGIO0bA8K{eQR)uSbjt2jAXUf&n>AY%o-LDre645V_1!>^E~`>v(2# zu_R;&=t$%g5IIQikhKS-f_bjES%(!8W~>Zpuoj-p&b1}BAV%0L-i~$5macLq%3VsOdOf98hNqyClwLN%$Ti{kKQRSQZmxm zU0q^2oLZmKo|>=T#C-7rzVOkAcd3jpLH^Z!EXr2R(4_V!C0tTqZP~Osq>muO5u**1 z0Y@O}sl^jSTfpEkRxcWIMAf{BM6ls;L}f^R7@Unf_WN1V&Av&$2|iUHC4tC<%nTqD zSs)@g`B07R0a^shgM>*%>RQ9V<`5q@xu-EoL76am0V^U0L?rTAmtox~o1c|BGjY4Y zn+TgwHjo71oJfm!?e1oz)kjTb6X{01Gb>k8#;m=$+V}OKip-Yfqu+p9+qFZhYiYO1 zlSDGd`oQ$;2|$K2a8snH0-^&HQ*D&RE*>d1q8H`A|IjFWHEc-O%m$@@ zSEjhM-N<+K-p}rJSS=jRaiQ*Y7w<@tuxV3p21D03m$u%TGKCFJHykA{i#dFRQr0o_YgkISs#o;V4&y_|o4^gNEv5n<%Gr@FniB zQp>t=a@o5fLh1H$VD%uVrxnViyfRBPwq=2X zmH|bAp>-%By31@cUrD34Ov-iV!dxuMV*1`Dhb8bK zA#>6aMibqiVWPk?vH+IY7g?MJxIJQXoEFS|a-qTV#0K;4O z&xY(K`%fnLnvTXp_~$7@WGtz;se=&e!_8EXQsOYq62iqKl2DnD(Aj;HaInj9hoR<> z%mgm+0Lbfyu?i9jWiH(`23(aXoC#GD6IEogO5?a*PKXM%>YD2n*y0y`SAv>za#Q+( zNL^+f@ov&@=l8Q`_G^0_%-Mh*By|yNB}f88ltFtcVvJBR84sCj-*()#WX0*u(n4H<=gRA z@#~M;!naK-JUxq=qTAWe%yos<*c3V9Y4D{66+9^f6|(aNmwP{M|Q6d5pz0mlA4pde&!LK{nA3%;x`2)LT)PAX?6l~Ao9CEyf7acGV4r%@fI z(h2qoHUEkKD7O&jpho}er0E6`?@b~V{Z5m?7p{Jy zEB+Ff+B@BRrjel3=Y@0jx`zoaAaw8_GBf+_ADOAZE9t`+h_HK8Q|tX{@J~rnSHz04)U1&JP8vQM!Dn!>KVCy*hWhJJE4Xb%_hJ}kY%;xuo)Qng8l7ZMgg!8R1lHL zsPgE@s31V0{K_xvi6@mTvc;B|jhm7%V}Xj&B`?dL{MFD@>-C@_ReP0@2#^qv;4KL1 zdB>R49%Y3leu(md@H1HjsY>&^=(v>^SA@Tg4qfjTzF=X( z+Ek_U4ww7$Rh2_3O1Hk8nQ9NE!oJk!Akc#W@VpW;hawf6= zvn7LtF>W*t!GLi^&H57x)~yB%n-VlZ>!Ri;#yI%n<9~xHFTn0a9DfU$SE^t?pt`v^ zI0lr^`1mG^neIvQhL`U51=2rx;FC4o?8#AvM*xW|3`}*CbYrrwfmJ@C&^g>}O-ey! zbU%L@nn0SZe<0>(@pyo8BG2vYpO=WNG$wNVt2@s{H%k#rb^6el93i9ye zs!@4(=9x{r{E!-pkEZXWGu2=deu71NFuss}Hd%KfN6#lm$BZhJC@d1C4HN zas~n)RcZ7Un*O5MkfL1Ggfm52BwEr2A^BRIJlD*0bB3bbGicKgm^X7^;^VWj2L?dU zYxqYYTFnE(5B*0xpiLh}P?8Ex9xXfMHmWpv&)Lf30f}CF_~WA(LY+3CD6F*=NTa6x z5btavO^_;(ha)FPTfyQwDagPvkr$)vZLsEXfqQ?8+TGPFA>iM1JC~dyLIKuARbPPc znp7<_W+f8heorOU>h4%zZQa#H7mo+pN4w!BpUE}@BNru8I(-0$&Qm6FIRw47&(%j8 zZN?tI@uI5~{iV+ycs0JLprzozG@sg@CR^=977|`4%jlk1Mf`kwJ6#Cx^&s_wdvl+- zhw;AD7)W~f9S#*b>DB$#$P|k13v;~qP>UF4Zfy~c!!%uitJ5&LgQ6vUcU;PB>l-2C z1FT_1M2Lu+NdCI3w(b6<_v&jKE{KeX0752Vr~v*IE)jyo%_Ui(bvOsBn6c|#20ySF z_Fz)xWAa-5A$dbCJqP({NbPv2{H)?=P_)74B+>8Z24VmR7R7K+&gWTU*V@%(ef-GJ zP6OLl-J7I&eZU$;du_Uz$fmx`$c)vW1DscY=_s9iuYUZknUc$SU-oTb+^Y7#P)# z6x??=L-n&E7ziJS9Z@W z(E+r)P!^Ayv8ySgYmha#qkoK)j}~6GCOkQYKBK!o%ZbHQ<7x~S(2WJXuUktpRPw;Z zN(257(^PDS@=*8;j2wUq@u*$oU-5~}?Wf$uaX{NqXyab8GtoBtr;GSC7qTDEP01s5 z8PCZZUk}aMvcFp%s+E}IuBCYCYz*%Hrc=lWz6>r$zHw5`6I3-2m%M?_udd0$m1dY^uGuyzYYs*N#X3(&KQJr=>>}$=5hh zL4lKkulKGVHY^rm+C3tXuWCk-03lE44~e1_%wWvGx(~z+guo?&I5vKRc%{?5x5Q?SchMj_#74AJog`<1Ns(YbO@3m$)Q9?k(d$8>jv zu4jc%I<|MIWsyKu8e4d~!bXElYs&l3s_iEBCS}J-wM&SAQ)F+{!7Vg?Ms3cv4nvxh z3c9BU0r@8O&wN#EUzm}-!FW?K)K%W&8^TqNlN5Gz*AlR?9-0TPNazP8H>qmGV_}gw zX|M5TWZ+=*pQHvNSvjFnqi$U&X~sC)eFD1Iacq80faa5gSMQQ6M?s?ll-B*PFm>SCNXE~=sJ95-Scse(O_OgwNkMrionx^bU)fn&m5sB%)Aw+(W)}gW zO5dx^bl93^#GM}nRDc!#F!tT5KCvgx@o-U9NKtWA&C7D#_DxOA;f06WR!rplY;}Cx z;S`VH`U^Sq$*lbU&euCLLA z=tP-KR5?P?8C>%DKWsFvo+EQ%y%UfmYj6ywtG|cHlCSd4w_C+JU0$GO^%@UKNo^0K z+pL!6QyDaa+^=UD0AiBs|M#{oEu`EntM8Mt?avdvj5vFQT%6@SvD zRo^>!Vr#$og|pg4VluR!Q7-TmL93F!I8-^OcSCrgemejXC{RyBTEK2Q_dw4AU`NYu zrS5lcM^;idpA@cVLG(&hYTT86l48Y_^T z+|4<~#=*J2H&b)*Pz%+}HQImo2y|M{a@Et*yZ+UO9RbwH{YlNA>IJkm>)+!kqRO`N z#1B_KUGc|_^q(%J3F++I{ff-Ufa9-^+?>wSW4B)u16uku6!HR3F~fgeRo_0KqXTZ7 zMDQtaLdWx-q4$A-fo?>+PH+1YJS2%&-v|^4j1fR-oD31RHA1ykMRGwaO&5!ug7Pi$ zM{_jwmWkHGW_tk2uVacV>?iU1EF%*~?8LKJ1Nr)w0T&()Zqe`du&c@5e0do`UvIXu zlB&2-LkIkkTj$~A)1SW&T;*ECf6COyaG0EUb*l^t^!X(}!Afpzirz9me=(TK&Qqlb zK^)%My4(`@H8@wI?4B*)dA2Sn+TsMJDM7Nf4k)7H7s|%PEV$W2i28a^K)6Nw z2cqrwC3L#ynwrx%w5+Kd!slm{i{%EpwbiM+`HI6o$Yo`bZO&%|FE5clw`K|s=cL(G zN0tc=ruv4YuiT1`Di(g7@j35bMd6a8#@a{^F{%^(AHL2pzOLr|`a#nejg1CPV_S`l z#{~bQ~Hvpc6j?-s&Bfo#$h6{FH zZ*Qdg%58xXD&Q#k^txE1zlw}&Fx}4X^3M0@ExOhl%+dCK5fv{^J)g^2qjSC@p^yWF ziAS97?jGZ_W8S38K5ujtSf<7QP_M3x`?EU@TrB6JA|TvcE{u1|-~gD9Gn=N^!_ic6 z?bgOfdD_BZThk0XbbCi}=V2O(>d3s7qk;JGF z8bXuX<8WJ&nVWkZ1P=g;B`HAcm~i&HAs&-CcRa4%9|8}nyAwn!e&hj)ys5O?eA?meXE+kn3?U|n2yS4XFQx>p`$0ryayw;?Rk04 zE-&xB6_Ne7sZ!Gnc+~L2bvmTcJH-_HxQ)FT!6M+);;;}}Z!5IeJMpGF*7xq5PWnEd z!S?igS0OBIL#JDwP=M{_#lzhOhaZEVJSxWPChsf$HC|S?bqcUZ@_CgUi{Rr~vN)QY zFH{*D8)K<@zod+YJ2(}xvPR|^E>^ec2vMR+QJ^Dd^UQbJYIpinl!xf|WZYeDcB

BZ`ER*l-~1EQj~2uJsNv|*)yto? z0AlSOc$(HExx)%M)HDdl!BZ`G}z7#)-*sr^s4|mCK(I)2Ri&pay9mb&%;?)7MFd5=1Nv7-VZEKbNp;#_; z8vll4@hvqP%&M#7vy}cuMMZ&qeY19lBR?8Lf;H#MzU1+lG&>kqUrt|cBC0D;7wPI? zsw3l)3MOV{o&BsKJ_RT5SOYLTpDL}~0%Ut+~#IA9d^@**Rhsg zn}k*iMLB|u%^`D%-}bHN!Sm#0YsW`hdD9l^I)t_sWvy1rEKlhQxdO{7H8HUykyT{3 zL&)a(n@*%H|%Pu1|>cmRNFibAC2y}htdXFlPa z!F4^F%2=P3!#`b_PDhV!_;k}3tkV-?K51e}jH=x%ss3x4ZN0?rLji*duHBWsJ7ga` zNXR{7b+yIyN@M0b2ru^S%O7DtR3JU6QZ(y*@a;p-vez&8kZ2DI@s&C&o&sRB5rpQ{`v9(1U zc83fb%sEe&mFaS&YdW#nNg)m=uoz75ti+b)pYNF6YuWIyu;v}7^3pKANwaY4P8E-? z3i$9K;ZyLIP}?jQgw?tnq7d_uUV?;yAXSJhu>7Ofk5pu3uYVa4&W2=9#ZWY%Ys(4i z${j1|SMF5z>m}#$m=g9Ql&@%VlB^%biYSFlCUB0}JVacsXB?r4+}`d|``Q|!w#zeS zTSDR9h9AHS!K4(5@?DJuWZd+#b~YSin{<`c?kdZ_Rx(xF*dEgivq!MQ6q&UpR5aUG1ZP&B2EL#8I8zPD_VC4t6v$4!R1n4Yi{~#7(mE0Wu8$C+} zvz)Ohk?c}^_6}l9931EIf8fSy3E@bZ<~?-ZII5!b54@rhn&v{oy1i@6`i2Jc8r{rJ z$6{Bj>wm9mEf>9o5~iWV#BsdhU%$iX1+&Ccwt)GATM1H33U>xQq@Ag8D2@`%5<5J+ zpw^OwMk8Q6IFux3z;M!rYt_Fh5G3~{Lo3YQ0pXc5gcvpjtL{ucSVPO-lHb4(qMEr~ zK2WGKo@Mbq&x8KR!9WRxc4DXhS@=eVvBPW~-@&o1qc|t0_~j;%+-_{`_LlOM6wso- zo7+D(e$`RDJt!lM^zXL1JqYUdF*L}l^88sUpZ)T%DeU1k3RjH%^`T3}@ht6Yy=f}L zjYXN0dX+SASg~ntW|MtAt*MK|{_YF+9ae}&qaRqJkKf<+bwx3-8KN?ITSUK{J7M&= zUte)MnZRNukA{GGFe|sG@^2Ul;++nM=(UOl7cg7ww~st-O<)OQyxEY2$Z{I%L!@)9 z2iDr)Bby$KGke2wxt&%37PJUC`M)#Gtd6W%4hr;;z6W1=t=d3>2nUJ``VD7_>J_P(Z^(00oq&u&p*<#% zNm1JX^KuI@e}-n?y4H<>NzzjJY`*cZN$C4~#1l`A9@al-UCsXgnLbTSvRYka%3fhK z9c^rltrj{R*$>t2UhbzBQ;RP*#<5`Zpb5mq#MVDv)9qGNGV$Bned}u7-(TC#YPC6^ z#<TjxBTILG+r4Sv?B`->QphY^Iru^sm z%vyGNxZI3&b`^iR(VA0SYyQtyijIL{It-R8gFv30x!Mb-vo-p<8FYBo=5+o3UfVcV z5c0d*2UV{%jz0IX*rvZieT1N*ma5EAH$NU_hgFqseD{zp1-saPB)J&bB`!{x>zxqE zhg}N{ODe)Z&)#yS!sHPQr#_*;Ra!QxffLUC^eS|uPP@|H7dvdY^Sca5?)Kx=C0M9B zk?=ER5~F!M-1iW9rBtOPoj%kVQ4ihx5OfFt5G_dH!3Cs`$Hb-{toQhBI(b#CJD;8gv3uXkDwX_z_PEa*NdyLXfqhQ5!K{a`K15QKfM67~ zFrEM8J&&)~T{%@NgChogyh#EHskJy|zff7BUIRXnlmn{&ApTFe1;G@b{Bu5BdtjNQk6*XX)N_bb8}UC}&;RCU!lBJkDJp=wT5k(@{rs{_K}Bw_D0nXlxierGvSKN0e;mdeyPK0_niHAbA#iqY z7bIlgSBcYEq1lqdBpHD=im_iPBEl+`O-G#H7w8lx)I!UY}V$#N>e5r{@TQ*#8miEA9fHB-_;FNbW@=Y7CmBa{t({v z6!)g-9tFgV|6csQ=J6Y3rj`6r{IB`I?jxi7?;xho^WwSAsX$M6Xf6eOS@#~Y{+g0^ zAs^i@9_vnW;;|Id%VY$|)L>Xnwx)6Up(&X#JMGyaHQsM|a3#VJQ z8g3+1)JXz&}%KiO(7*Cv&}LZ4z0YW3Wlyvm5bHU6YbzD#(yR03f>9f4!L7wW`S#3s4JT~6t5_A4X=b*_Fu6cm{F zVYr_O@k$x=IV}tN;$fqU#9isk_WY~S*o2bMGBzi&N1mQTGx`B3hn^ET_NXwRzXu)O z&F8=oFN2Pk-V$`uvw}4d3JhK_VNCNPmnA`tXr9~GcH;RGPoOJ*nXF%Sk|OjZRhA|K z0mgt%Pf!0@d|CW@br~BGeWf|w`9PQD6RMnZr0WmaROV=!92D`zNhTpT2%!ZB=*1=7 zOy#Da%dbV{#lmUD)akOfB=Ycp*7CWz_kB1K+eZP{|8J-__PrE)ft9ntMOqEzKf|eW zU}@6?)e_lWCtRYpH(lS4cOR`{>0mTeh!6UdI@NE~w}HKX%^C*5f2UT~On=)P+%VjC z1%OGEMzHVi3|QOz1s2vt!$YZULuT%j9Jv>WHwHsS*>;9WwQFRO;D%{V+sJOiHTa+6N`OL%g3x z^sl_VvoTA_$N#8Ql){ao zGYl!Vpskc^WO#r*wss@ZXR&;O8anZEg4?f_h1>ELmE4=BgZ+y`i42!zY zMS8y6^6=`>Fe1@^88G=VP5gCpH}ebrTkWxlF9Tb$;65bvLbXLk37%Gkb3;dgnnx>v zumUeNsMgdX3&N#>OIs8;M>S7B5i z)3&k#X}o1>6Ijjt3D8XQA(?z*SI1;cr>Wp&;JoZe-_%u`OO*t~%B@`n>4k<^bo_Ao zazpm$K0e6=U+&naU*%(INZ^Hz8ARi}0)?ZnQc}`is zSN<1u5$`zkcgusZoaIgrWz%Ml+Y%?UNs8Huhm#52(FehyBy7t^nd>g_6Iq9KKX)h1<9kxzX_ zF!kl_JlT)vumNUA*_&?4bT?J!Wr3;0fWU%Ie0qGZs(>R{1WU0h*~(J@OG_R=k8}y2 zudVBcN;vKDIL|_LGD*%~*0&#&@}|1@Q3~o0|GcO#|9H{Gq&0*agrG{SGnW1+9CteoI1+*_=xWD!N5 zyfJ@BLb8*8RdFg?W19nF;~6y_!~mDPcyRk@_HUK$=xQ7Tce&yckTcy~hs}KrE-eo7 zf&xjthHcB+yq8~4LGL!M&1Sm;nC%j4)SH_SnuV+fBUsLDp7B`w*9F$^zvuIgTqS|A3G5dQ&{=ztgNL>4lH3lWXiU`f0>m0n&%aw4%|}V zA$)^l0B*C_*!Z~H`I7X(_QUQO(r4(OQ+j;%@CnVJk}@>9b2rS z*3}wD@Ags`d$R+1or4sAc)`ob%F_9#&8ZNbSsd=XzYV#Z?9m(gI#J*5=T~kPBmOrS z@+K5P!m5%#h(SHU6WX}_E>Go7hFOCnJbpNEB$6gAAl@2T= z%jUtOU6(vTS$Mk5IiKefTyM>nUK|;Ak+0ICnSCVSOp2rqXkc7tbix&+g5~?~(g{!N$p#sgTF&YuT=E3~7wytV9$XQ)jSm@#1Il+IO_G_J| zq{WDpnG%Nc7kHLXz8gQPJ%|lCSa1K}V94hg6Wo8!)93hdZ`kEI!n)aAzED?L6`%W! z0$XMuAPOsrCs}V)rcW2kJmrYf;j`J+`T6BmN8Ak_PUbs{sMdYGxpg)$D!8e@!wG$P zFddm7;z=XsEs1EU!Wb~xP5idntcFXV+fh>QQKQYNpZUOIs-j&Dq`A1fy|1@xo^4d^ z!sbkJb~+Ts&-lCsBZ8EpqY21+Zb3~}<+4o|>&hAS(MR*6R>LSnGHG!JukSuhR`YwB zcbn+r8Sx5q<9{znteWT(~ z9CGA#5O&c5naaqes1osbGm-`NYA_i>{d#gFLXcc<+cmS&`q7r_aug^1VX-KmXqwy} z^m0G)?#Pwpkdw1U-|jp#%I8$#GoC@2bg_#{Jf{8m{GvWMSZwoPd^r^5SW!4|y(84+ zxK9~Gz9N&hsLP;SBFk!SK2$8jlQdklK1BiYjt^V!c`&IE@a=ne)opDkTK-5%al7yoRR#+uKQh_OguVof}_R!I&F#PX5MCSd#1 z@~a(wfHq!(-STFC6z3^#c^PD}3_*lhB-Qum4p3n95(f+V`!Ib>QXQQ{9P8~U#wMD} z)foi@^aL!dKs0I0LV{i;Qhig383lXnZ8}=hQb4`U&Lm)Abs;zO^b7}lr>v~|f#PD* z&(EN+-R7RBYodgGzU-c#gx3%D{Bc^M6nW{V3}_2h*m(Vmc@{dIs709MtON|SCyXOnV)8# zkRgniy?sojI5N8|d!cUgMO+;FR!mmda>+#Kl4AxJYb2y2_;fp5A*TuE83QJV!O8U$ zvBTPYhQE&o!g)NB#w4P(_Rp(sc=r!WJ>6FtzmtOX*4q^?Vkjwu9P}n9CN6r2`P~~_ zwoB4V=tEG}w@*iZw*S*9z z)fRMxIxTd7LBI+s0C#k;)A@M#G0m1;`e6+fK)SmS{fDgooA>LLR@c;Y8+v+ru9z^C zbgJ4@tbjY3&pzzd1EB!l@#FR0?y%D%M(P(2j7i537$$~5s?(m!9H3!&S1d#F=~J!g zCYrYZpdDJQxk*i>3utkJL=Oqh_x4C$AKM?bn3qn`>Go*298Db|;Cv6eAdqv#4IK;E$n7rH72{+Zt!Xw#u?zrYc*lwjQ!~km8Lz0Z!o{aj-17cP@mqi*;w0o z_wm~wQgi2`QT@q1$>)Ktix@^0MysC3mN>IGJemq_$a))&;XmJsm}yA*<^A z_wnrh(`w6fQMk^VDHD+7OM~@~tihE&DL=#3_WHd`mOfFE!ezDD2N0(%^%ox9OgCD7 zyx-z;=Hz)VO=Kw@U)XT;^&y~4{hJh707TZp%>6Uy7;}8Aa#>|j{=8)hx!>h7(jJbL z_vYyq+MF0}4|D}xqpDuv1#B~>KiwV*_Jx0s)=hy(ZZ^z@!lO1m(KTE369 z9wX=;dej43y)-Z~J{HLT&c5IB!a!JB;tJ?+7-NcMFSj*3$CIzwu0~Mjs#x7@`ssd` zE;cYV`7Ez z^y-m3e3JOB&Y=(#ldBI&z$TqSr}V`9aRVa~O5^6CHkm6}sKq|o4BTCLX?;aYlg-%k zy;#%r^P5Lm3_<29IH2^tv$Ey1P^1VLZOS*=|f;q7)9v}i&aOcDc8pECFa1e>UE7ViJ3 zCs(zfHl|Y;_35;mGB(SmS@u)PM1eGA6d>b~1uo_Cy!hQ&&$e>Y%a77B@?Jw7P~PQALmlau-N(G+&OB}LVXb7aAuSE_u2c9;Hg zt;LMW^47C$LqdsAH)S83=FxqS7|ygs1Q7OeYo>a? z#Qg8*hd@E&k78rFoRZ`(Bw|Wz;gTYM50_h=XphummhzNX>0vO!gG7g)Z__xCPd3n= z?*}i|Pb#>~C$lrpPBRa75~k|Nb=T2;aqd~z>3pUViH3qgV$?ZWDH=(@W0jqE$p&}` zRhcy4`fQ>c!O$j`Z3;p@T(A4Gdf+>(^AuO%%FTwx5Z!7vMd%lb8x6-e-yXCPu}rfO z3m!bZI5FrGZ*4Q1?}(DS=pGKh|2r*R!E$j0G(y3My)v^`SAW`^KndJVbLyl6M%OWK zFR3=meQr)NXLIm?ep!zqQPTdS+foSP@cAyj%TtbsKcmnWp4Uy-M}2M{>){e-0!52X ze72<3N#aRw5Tr^&KrTd$S-swDJD$U@d2um9!~^JG*FaGkQQ9qIZ*RyK8`(V0M@xqN z8YfsO48bie(l9m)Nb1#lhGV6W0wi}QfXv-PO#zkHt-#N3|5Q^`v-@W;6G8Z)DA3_p_HL$Th$ce`AANtqRb4Uhf8b!6hbcpMY*;hqeJh z?VS^WrKP2lS|yj`6HAZn>c1GG?URg`&`-GvFrT%aUkJK9b~#O@V;OW+dPFGDqll63 zbf@FVosP6WMej}3A+Y(T`YfZddIjRPB*0HhNN4X9*k)j^CpKn62_Ah{HDyIAS+qC*T%Gf$@6an$e@mxjvjv)HKJ z_KzX@0In{W=aat$KOl)&pfYSNe6Nq_C`+HlY~fH@o`(fk!}I9GEIhk4R5k>14%mBe|p{QW-R;}oR_P0 z#^V|6PR^Ne{*ci2*9D*=O;(*w$rubP)ag37_JAXepxnL(>8NPrg_;lFiXFv0xk+CPfQOl&&Qt+cocSwiROtquB$GZ|yxbft*vGHG^pd$;Q zz+quRh2kI=6bLLDfa}C&_v)j2T(Djat^Fi46wef`ZDV zrIP{s1@~`$oQPD$=_2@qL{&;;0;AJ~OZr)nT?Z}=7BKs}j$tGOovkfPP2F z7jj376b6P?r&4N~yr|Q~X`oK9G5B-X)>_U!C=~~ib&_pA$Y6G3-vrWeMRIX?xE)=_ zP8T|TI;qygSgbh}(Am|GE?L*iD7jqs6FmsKDy_As%T;L!-~uvO7;u2se@flR7_iZ2 zu=iJKZ>lS7)zvICYAqM9jX~}o%38+&VxL-&Uh^SyvDTwcbxiRSo2y}Y% zoM90CyMV+7We+c?s|%vF_34flFl}0tEF=uLQYG9Gr5L$kz=K*tzuRC>at@R%(o3U@ z?{eX4{;qbd&(1mW7OcrSPY=83& zR4c<)TOz9sAj`C{TI}|@q}&csfF|YR_2a#qW=ckI)q~|Fwk)}FMWG>z3?CQpk(0TF z*6M7(a&`fXm#<_m)KuelC}`C~CJ3GOAaF9X508;?vgveOme7J|NqKkEzK7RrTUMv@ z(aXbh2{MlhINyNXHiCdN?6<1M&3xT}(a8KC0!IC=y2g6SSA%Dj}L;f5!e>w_VH8(ENE^rdpYsZIqsKARv5bHQLB`K4}0FNN7Xe`1pVZ^XU#w zV7m)a<)*0AC3v{lhl9{zJ3F&KO#!!kfyfw|E-X_+rq?!_Bz_WKDko6kbc1NUiLKr%PCSqC}(hRAa;Uh9B0{geAuxtj1|BiIwdrW~`wHD4IU`7EqI)Txrp zN5SYTAl&Y7Ez`mCdN_dd1T?r8YXLKv*74;(VID3;_`_NygYHhZ_b3{8Qj3Zi5sJDp zmFYfX>lT-`D-$-2s}2kq9m=!WBB-lp31k0QjL_Oa*TrsT`w zxqI%sKGFFeAQVl0Qb3>+&fs+SRP<616EoZu2f$4qZwhz^KPUdi15m+L@|I8RHv+I^ zXmAX=C=t<-um(m}IcOvzW)W8s_D|sXB}j+W7CSa&)-bZy>!V}CJ1aKZi2o`OtwPS$ z2nHZOmymF-h|v*<{YnlHC5ZXK{pr~M#dJEKh5#BjeP48`3*AAe*zofHZG#89Mw@VW zjJUWwnjRm5mS4tm)GB5E`0QIdP>6`QxI%&u%&}iG;NFC?NP=s2)`;pgfP}K{ZkXKC zS>R$lizj$Ili3!Zg=H_8E&7kpr_a9&6pIn)>80+M2tW9$ zFGM8a^t#8$(;D;1u<84P;01S3Ibe&Ww-@LEvkpM1K_5X376!hHa%sxq9i9=ImBWyG zp>FKN0UqVogt+YJ0a5Qq|2Q-c+%lcC45KGUB!ZF#H|eJvfb*kVYBWnV9|ZS_%Q;NI zd&zBj4qjmcPfO$F?wpB&Vf|||Y5>IbH5~F_gZbkXSu9FbWvwMfyW2MsVKMSc)Oc%7 zyHQL!sO{L@r(2xF(ut)eW7_nH9%NKh;F&z#);W+nR;l?utC;(ZEubOZvD`X;xez~` zR>pWVo3kb-V$QsE9#KwU2Wip41DC`q4qzi;GDRr(d zuq(RRr@_f?7gj)x#LoYvFPl|%gYcOUG_}ZnAT?N{&fsfnUEVW%{x%KuY*@!+0O9R} z=#$hWgoIS<%l{d5sMX;lBuZPty_p~9$X@VKadV>Dw}M8mO;t$Hh65-^eZeqZ#AFt& zlO8D?PQNgn-{eINiH^R&4FdBHRHJhtaj<%~sM3bO@qFxGps&*doERwC9P zCHx5j9~<*#xnpWkqEtRjfM3DdIa{fk5Qxx=#L;L*0z_RCBi}*;bNGQ!R~q&AaC9|L z&}i!Q7Gw&egwP-8QLSYt(W<%87mo?##5xSSsF>o1wmPRRmOO@xQ*7cI}=@@ zzcxB4okSl4E!IK%U@Twmm|l!`AkM7TCVj1|wMWoy?XOp+NJt$sKBqW9kC}$1l(v}f z34mN%+d80{92(wzataEKzijdSWCTYFpNX?uQ zrItKQ;AwOm9GlIUCTjo^#-JxUB_|pViEjXb8Ruj^wSq5~k*7)vtS$P@i27YTD8!;x zG!t39gORNcBI~Zau@V3>2QW}gWFy{}f@n|X`$rNtyNj{}F`KuJ69zt&>dK3}+8tt? z&GbTETmZ2@L_B-0iTAhmZlN77s{?6U4~%Aywy$b!eNXq}*khXMZl;=f7brwi&Ggq3 zd9*j|TzS-NR*H+QitiUdNFF#Z;LWGAlo^*oc&;ZPhj^t$;^iJ9E-46=W;Q+p z!N&&|FW*F}wKY#RorE2?z8)|zI6B#kcSKA~wDkyB&jWUjG*(zCDL6zCcdkco6B8hI zNr&Og#=sX63)_EWsHIpkcHH59;|~M}hoBoun-1_y1$=6s*i;u`j^VlRMbqmI3t$;= z#KrCH0Uzwyxr^D1iiWH#lRfEYlu!O)aNds)S>gqdeRN6;kbDTNs|DU3woT@`{WwN< zh>3&Bx`R;|k(1N6G-6CW3!?5^EUdkU{Y$vw;?&BYv1pEb6%={V$<|E`w;p;Hyg$?` zj*|xUm8n~>fjy!4=TA^q?tlk3F>&o@T%I)o(@CXL)lVyF^mM8|lHSU6*1r_5g=v|E zdR>H6YJ%{2m*}A-bw@P(P2UHn z`+w8^TMEIPEWpf?!2;Sh0rw&+vRAl&z81+9yJxYOQ7fl-8@#e02yls@V4(xuv5mbI z)ql7Br3%Ffi3OJ6$bxF-yViuWpRx;x;Y%U5Y2U}ZMG8qGU4-ZuCWUbS*&dx8;g+;! zF%t$q4h)%dfDo>3QZJo1;Y!wYA-BY+RoB}ztip&o4~(Tz)aOU65)+H2VL5 z6U|3c_{?1O&i~h?59X~(ZCrOz=6&klo zJcWKHeAK%yGWm1RYll0Fgt7eiL6EkPTN-Hd4R~##FzBfPBEA21Tvak4PVHu8crkfl zP7wzJ{^r2y5f+91(MLjuIBX$XtcxIINWG#Za|rW$J5$1JvR>6<@%nWB9Seg9xdbP$ zeTaGcCQ-U>nX$gX_Fpt>!|ZMCFqE>5HF6zpzER^oxk{Kin{=2Vg|G!R6coZXWD{K2 zfV0+TQ%k6f53hw$EofkMudoj!A5ku znqK)7s-PW;dQwxQ*Xp-^R=SE21rhv1 z-zJXk&fF(f)bE^<(;3uB;PaNFCk}69^X!KNb&{dj{5hQb3l{d|K|=1`2frE5SB6u` z8CX8WZf=~TPW1)T7{}}~Do%%hY_!iPbRz$4Y_0gW5v9}T>4$g^e%Sp*2Eu7R5dkOd zZNHC~X?fwr{t`U*FX z#PVHj{~bdsOm6PW-#(yYBKTnArwq=*iIy4ysKIaBmL}Z4Rf#-*pdCRY=j)$|zCMuh zU~gYaH197ib-|<~^pW__QU}S3sOl%-g`xdQQwb(kcu(MPz-n4njSBAtss3wERJm6d zf?9%vhmjPUb$`W;CFx%!fQB`Cc21&wxLiw|a$oxpp0tt@E8X5h^mOPjGCw3nGUOV- zFd#&@_JK1Gkl2tKge)-r@(%ymEBafBO^_gT1Wf7mV=X?G*&oxd8;lak!5)iatknev zx?=HgrwfJ~d?r_ryh2clY#NxN5U$P6Tr*q}iWz2P@EgN|2e@p!1gp(J=#p#>&;T*i z7^6$@g(}xv^31)rRWJ>&J>%&r(yepC$3Px|H=z%aqAAjj-&t*xKl#hGvndh(@ zyRQt@Kmu~J)NO&+;@P>k1-8zQ?(>MkWZpgwJD*TU#H+OYuB+lC>!{=I?d0ol)7;W? zPUIc_MWhC4=BbnVK*|;Yi>^mtDeMkcZR?eKHkZ9)x$FgXIy2PvuRpyQ(}!fz0k+x{ zup(%>tp`Y-v$BB$w%G(=UtDU(ny&!jlY)Q4;6srj?t`bJwRdh20q1X>vC>J01`rm0 zbptd$!(xV>ZULjS#HXPtMK%i{jPQNurS_GFzX-+DFig&Ex(z|KAgu~*+MjWT>dz9| zoMRDkKt2q&q(;^wGH0N74F<6M_a@~=JVCf)eGS{&VZWD*vIWT{q~`hg6hn@`aM72t*jW*b~=#&_DwKlui& zE6P~5_Sf`u$lC{sXB2A@QO#Ri{vL4N>b0~5Y4r7Pw|n`<+-)k=T7D6166wx5Z(8j$ z(E`5iatFGAo+(>(^h-Dk5QUBQtX6CFI}F1BFJA>U=$y_-5Ej*FODIhAhAvW!^E76X zvCb6nd@IDMl-Y{|pCgGSoho)ymiNhWC_J_yHfVzDdRme+JT?{w!AV@%-eoYFV5u2d zr@Aw!gxMPV_4U2}*zXFf9IBBfwx!Pfi`WB5^NgYs3KBKy*+9^r32R1;iQ4t+`|X}> z?7Y~={SJ;)Lq+Cj{dKR5-KpE>n7F5}ug8@dZJwhGM4PFxgv>VJy>Js>xIx6s6!!)+ z%`Z$ffdgKWez3a;!Kd5LE=YJ#T6JuuW+6VIR|4>a&~QO$5f&JQG7loLdP{)+$w^9Z zmGgf76A%f(U#HSn)a}!;f1x+DvC*d8@@2jK&Sb>fK?xJc*sUSvaUB4z`*M7|x3>Jd zeLiBsKT>|*{sOdYWdpKjo}w>JjIP-7zsL|m>_5A2l5)pZ!VFAwX(T-8Z7-SH$}6l_ zjoD>;R=?H9gKeZmud=qgtkr8YyCX7(w={n}8ELj9%Ix#<;xyZ7^msqfKv+nfw3i3! zR;bqK)igb%WP(+Z?*;;)M^`v)HS&$3sfa?eKeFDtC>q(UKN7;D*Vky;IBqw5)%B>Q zR@t0Hy8u-BybYhp!NT07TJvp1oVBT#F1pKz)4Yehf#$GTG>w|G?ULQr^q~}^eC7E} zy;N}oJH8vP>vuj8=Axse`tKJm>tOY&MXy(|MttL+Ht_9Aq20ytdbQYo#Jp99o2KiY zWx#`iE4(~Ia`IxrW>|)o{F{Am|M+<)XikE3wtx}D9O#&txt6Feh<$!XRiRRcMC zXxnzz?r>UA)fLvrX}vq$w|bAj5;dECo^L|?WraRCf+O>wKM0$J3?2;L#Q4>G-rMt| z9WwmmhyP9m_G8}bp%3-CJ1TlfrGWPDMc{qnUq?xoP z;qi5m$(2*k=SStKAKP>3HM}t)s38&TRJ-_dz*!KILvD9rvubQf(398=eY$t@@uH>` z^XSv`N_|H8L5eB%^uz$tuc@vt^6hH(?T(CCx3q!!@i`F@R>6A%GK4s5h}7AKYy|~B zXi39kW#CglK$%K zH6tUVJ;tTg-5b-T)zJoT0nfXiw$ktEQ*8hBoH&ZPJg<-ohB=Dd2W=4LvBT`C+>s2v z%#`${i2x-={YGix`@vb;ovSQ7guU}kz~|e}bSt;3-a&ezQ!awzhP2NmL_;;XYx@W^&bPFlvv)z?)QOyDW)AU`!Qx+p`f# z(YHkJt2YrrCwS0WIWfckE2Wb7+{Sd^K=UEDi-*SEHoz(Qu*nHe#Yn~7S-3sC>NYI+ zCBk0fmVmh@hlC_+)Ay$BICL_`E+>?~l;g&tRooUZH!;w8dU|GZR9ESB;rjzBL)(>B zHHBE*2=oYMv+6h#@^kj6th`?VK^;;h5Po^BfaeUUBd@* zHU^3025MJy29b80DA`_((0>tT|7jit_c1u)T}b&Z}}vSxgB%BY2?dav$}UV=Vw(aUE* zkSnn!MvOT+HM-_#cD3c?ad2pi@HDRQ^LPcf^TvF~gJ7rM+xs1RM#|o@2xav~Ae)EE z)7igJvBa8){0&>NZ_+gOcNb-z-KE3yHKs7opOf=z$7w2Q^3ttzx^^dXfM{Kyo)bL_ z6e&6t4>U@z?rT46ct1WaCB#@$#V(jxL=;%iU&#CNUv}26J`Oy>^AcM!#-@-TfKoR1 zaJCa%ez&zfRTF8CkB^U(LTsZ79lsJKtvz*^0nKCgRzBT=ptTlZlg=e@k$NunDYn=W zue2d4eFmK=cnj$1_6Dv}Qc-~;-(OJKt^Us9;rSi?RQvBlHr0gRO+iMTM6A`aB;^jf z43!6iBut7B+)B%}m#s85ZSwb!M1<;VPII&(4E;dchgAl)B$)kWnI2 zC5w@$5LayLc#l}^hRv|t-EL0PZGYOXK6M?x3`ebdD#&;=5;Chdy;AkSrOJ}NMp9?@ z5G?)8?y)wQr&0C1(7AGluxGQ_eu?Z4@hK-VlfRRF{i2zbwRMQY?r4JQkux$dQw&O7 ztWm!myfa7p-vQDuI-crJ`|7_j?7@qMqyG+&_rDGQJ3xM*nn1pz=23@Ix>n=5SS`ez zBwMqcU+c>b;ytN4Wj1vnKKvLCv9qfR5m(9VHE@h%Qi~q(3grz&wOVb}Dik0~2{a9M zF}pmsB%0}x@>!(v5wY@?ry<7 zI3ZYY32$-Ez1?5mey{3P@t3N#myS8+kQ@Vkt;!cVg)Qz}KNE*)I?fF*g^x9YN@0Jr zJ@!9d=$@!Oxs~a!RzDBArP$+O+tegY+KyilEmo@@ti2v(<#vCfA1L`Co}p-?Vd$a2 z+=Iq>GkK`7<>}jVIqFBjy|pAo%qTsn!Pqm#-2VXs_KilD&(H4#w-(D%@$3v(0D-~> zWes~aQM!1njJRG`K?lC-Fg9~ySR_NVv^?(TC>dJukiMc#N(k`1 z{guE8O8Ugw=qwlHD#+$-#K8XbK7f_A|A`{cD&sFy{b#5#E!$?kyQix!Yy7Ad&HejF(%Q#Y2@w-9+Z!#b4U}j->p=`L-(NXoGlP1xXfD59*)z zt3*P*F-~i?Cy%O_v3 z%Ew!Ve&xvf>&U#KNO2iWi?5M7O~h#r_`wP=|M-G$eG}<(%nNY{If_0(F9y z;#PYvf;2M8nmka7U7z1**^7LAGP>;|G|Tzoixu$N+9Wo+DtV4py7-f%U26@3)OGc# zSz*>m@L)A#S$Z-mwV}Msq z208}4x%5{y>V`WHB6qo+Xy+S?KRiGlj&7@E<#^n}b0QG!Gb6-{Sv8Ez3~IqOq_K?l zO0%-5q7~$`5nvN0O{IJc&;xF&nS8N5+A^SHqLH@@H|1cQEezZ zIxGzf-fW^6qzoSZvfH?kk;XXpOsZ;Q-L25?^_+2Z<5f3LSs>Nx7a4v6#;7i0qPk2g z`9P`!B!}ub6#c2 zJrAnWiDRL;@&v^xCxs^jrhIOQFNevHJsDxU*JSg|$WT%b3Y)28BDRj$2~}!f>4LMt z2aUFVLzFMN#EQ>PFEcH6TEoOp+m|Oc!OfOn)zecW^HIEMrPSdvU&r0C08e$HUTuRK z*|ER-#aw43mq-)J%cxk8X318Cu-GFJAsNu*?h-N7`+ed@-918W(nQcK;8Va26iYmK zWZWTA4zaWS9~jk1a07r!1t-J76cLA4#vD|Yp{!3;~~?B<5tkQK>4GzlCxPKK7$#c4AFQ$YUX)?3swd2yT}T_42u-~>&eOvWC*~; z5w6W&Eyb4V|FSr2!cr!RkiO8fw6j#-{kGxMko~wlcEf22Yht91jZXGpXnYpsesOs~ z@6SW?&oyqG2?4x8oH`gf`rpH^_!2THh0AXP~*g4{cPBkokCq{Ov(A>Agm z>ag!g!GaUa1dqjNKq6cn-^54T>iqX}zj&Whb1B{0&QCpU{QfDhq3G&SD})$`w6I=x zs7`Vjkwsyw0@GJnOnxsg;BBH7e;HpeR>FT{-s19sHu#$=?XP)uU;bq?+z4fSP!&=z zp{@xcUy$_bEDQRc_gFunEJ93JF>e}2S%GIKgDRymT6RLgT$Es{pe8msr#e@$?ctiS z*ZXjP<2JOe{XM+El7smRNre{aPJjx@84PNP*c#5pPbxA|uYJCyo~av_ZJQg^nl@ znFCp&RkA!uKnXH*z=aA8FVh!n2%{q2P_LT$$oD;R-vn~0n^1ibEO=eR0hn1vi9Xb6 z&;1N#0<;>bTxlbICN(kroB9Ax7`c27+%f4) zQ+Xhl27U&0H1o4&qidsO`gD4ytWqo#5SPLGNkw9rR;^@ID=SqnrEKJ5(A(tYNkSfB z*b;MJiD^hk!^lsB`X{QcHlhzHCeEaSwVXo?7F-{{#hr_lkSILnX;JvFDvVAKX+^c5V&PrLuuywd84H{WGs20yG@5OVpeJ zb`Fp1k@AM`TJ=umQMt%HG@?ch;V{GaJY(azzH;%l%RV%+x`ashb0xyBSJ!&@&d!MR@G^?AC1M<4AvlAV{LkcE{u)vcydx7L$% z#)#~Y1)Ok*_}@?iJTfQ&ZzNHXnZdy%0ad04q~uOkLB7kN@9S%gw;M-hVt+k>`ofMc z>`Y9=A`%k&)%++TX9{qB93KnO{IYpfT7Ex9~v8hoHyH=+9sLgQx`vXA0oN0uLf+*cjCD|t#5=f ze!n`t%B0o`(?B8-I&7%&K7Lqa^x+cpxfM7bWlLPPUF84P*4}6zTWtUG@d>#qvGVOg z&zkq={o=KFkkO;DzRlVQN-YVYqvIjl^Iq7pDE+1!$5z#pHUP$ztMZie#qUc%mnFn0 z8bH`-o}kkUE|r3^CIRo~>jrb2FvRkkZNCyYI4Jkb_<+Yh(fqD7{1%hDPzXz9CZ#oo zvB~rrw-%Ek(fB>?&|-VeZwP{UTm=hG*HD5r#`T@jeW;=08*o|5;TPgOpdmY)#5{iZ ztydo>XZaDnX{jfMk^t+lE8Cwztz+Zuh;*!U0(@kfgG(NR{XXMQu_)7y!jBs_1D4X< zGXpuaJ(_D#WgpfM%WQdUgAP^Q$>z@?LiemytL@oI&o3;iE zz5_OksNF+sIxJ#*b$+V*>*;oRD&zO*kFk0sl3xR1>Dyk+Xpzo`h@7rv8h;Lv&Bv?0 zyI+_#S}hNSkD5{)d6#+r2C2dp z;~({$0Zz-}t_REv8|nihlL(yA0+sh`M3+}GvY4Pi$Jei}a7akE_rjisuNps~{d3$4 zgjVr{+&J1j7K$p2*4muAfLM|2pMO}tFnqbtOa>b72H{xzj{11MVMc;mUmRja26&{N zl%Hct*M8m1WtgRp>jqJ&0=(t-z6A=H^f#O9*?O9Yg2n0DSbUD!lZMUiTe>uS{Mnx? z*U6b<vpDT_IoehFoTW|~o*CdEzdeThp*Hc7hp>8r~)O)x< z$DL$93eZ5x=Fr%bq@scJWS5Z0;72on)uu`>jL9GgDLXrxS}7;_vLV^Y5MB8p|3$IwPZ{X_Fy11G$2zQFbf_| z33w&2m8-oq-r^~fO>cn%xz|(pqxz{relf&?vrX2>P?f45HhNG>w+ZtPm09$Emq&25 zJz%GVI??-fySI=RGV&lB5rc1Iokh@CMA`-CiV*+OmJj71Sg7!oOOdg->T($>n`;bx zkSfJnPdeeTf}S2t&hb#>MUZwwfKzIuQ!{^hBBWBY`*cu!bRT=dV^s9@AcZyhhv9f* z@;fuf<+1(Zymz#89euFI^97F!1qT|3CJnkf2a$FzAH_fRd0_mG-gmbOw&PIkm|zNP z)Pj6>CJ?zz{1FjSgrrj{`nDnUTyVJ`HY&TAfKuW;TN&S- zMx*_PF7Z#%p)et>qF-d8+&XxkVX$hK?!L-M9Yb4joD<9v(wrre?vU{t;0_A2N**yN zJyrnzBr|fQsVyvkO#MXaLVe!Bq1{77>! zb74>WTnh(c7ArrUe$;filu!pr(AL=_cn^Pv1MUT_8l#^Q>a{tw>rfG5A@I1I@Rrl0 zeE0<1K${_`u4Nc7a4#`4EjgC!2ls5O7Z~j-u~ILGb*xYer+uq+tH#rFE+nQ~!0vDc#0>}4* z`EWGchzW1IbCf*MK>(>oFx#p- zX5ZG0+v(S@cFfB6ysq&JC}o<Pk{8~qw5(0 zqy}Tx%Hv#qU8$tXR|h^oWXb)#AWzQ<!kHORd4d2`E7Brg)%@A_>jMiD{=Sjjfy z=8^8(C6DB|zf9i}0R@z6kfRsT#T28YCn+S9CPpAOguXJl2<#f_4=OWnLwklO?_LU} zr|pQ-{5Ke-io}6O;6y*{p}Gj+KYc>%vdyy5{|n*O0`)zgNr;>T#G`o?-lrbd&?i zC&hGJs-r`4Cd#L}(x(1Wcljz&ny|JXHmwWr&C1K^{K_mXkqR=gTu5p*KE$}DdZ!jlv#h;NG&A>A0}J+KWV0`fhy@|q8j&7qX!gaK}8f4bI?#V zit*AZ%GFP(bQwSffRI$|uqip9sIDwuX8Kiy=-vqT%il&w&u3x%h(n>B7)fHTZ6t3$ ztA{Is>!DZ-%g6<3gWD?p5>Lv8vPlu|@f`t!H6aVfGtwpMH#76jvN@-!Y{USG2!HkW zyah$gF?^cJU}5o>_^hwOA`+igA#(=Vd$i!VEk^2}xPVJQFh!K%Orzz6pD9t%|JR0^ zFwC>(+e{tz*iZY^P%2o4sK&`#PJ;$K>HD7I-@Q&D#19pJ(zG8G(~AP`2ut0_=K5-KD5WXKAeC)qd$$b zeIp_jQC=1P&yss-8o#ZM*R;eqiuzCwyhcPCMY}?iP%@w}+wAUzj}u2;m_v{Mh3s=? zv66Vn<8ob8qG}PoX=5yblweS+rnJuZ6z+dNS&)i-H&(^+6kd4HRYEc7EU?vrgfSgM zPs=D!BH3@OO3oF;j&zgG0&64|Dwnoo<4H-9)u7x;=$)=vSutg8J50B^(_Wo3YH0kB z)J#-A@>HKN+VK2n7~^vW;=A1`Mmqo5Odo0ioNg{R5q z2LqM*?Q72qj={G|`ok&l?QR}#Al>=r$->KrN#odJt@e5jc7%Ba;}cp?nkE+IHL%AN-Txc)_H`-(Q!PgpEk~gs+6PdK-y9+vYH8E9JJABY$(`c4uKU^J(nA=7B<$a z_8Ig}d{SedP(|Mx{%)qHtv7HCmX&2>oopLN_obtUTClGif$M>`q96pWJZ?+ki?kDD z9<%7q9Cv#8CUG(S%0BB`ANCY%1N2~;w@p6S(N;o0{&gZ`jC)EH?6 z6T?D6No=Mp*e|}uEVO#dPDc6ql}7%4EkVK_o6y>@=C*zvd1y)+)k=KB*=#uHVAFbI z7(C2YR>*@m&odPV-PGG2uj;Q`02VR}MFxWm{;!Qe5cpBUgP#)Ma!RZ18l&(OYA7ey zNGDSWmV5XCQUBmKnp~F&Rl1?O-({+j&530g4%cVN zBuiDguPfn@Vzl*m-2RR2E^Gd*rx zSYqDCEk0~0eV7U`yF5fE^~YgoY={=15lFM%R*ClS%%4%vVg7<$BEL#%*p$4hs` z;d=IE0yUAL|ALwpNs6%UmK0Tt27E@jQWFNHz}F`@v_cLS5rF!?maiV!f$F2>Y$VE9 z+&u0ExlUQNRko3j!&1jT;3_o%UyguplxR?r3M?2Y6*Qx+(JI@fJ`gNmwS>6)nHh0A zNv`OX>QZCe-vmA?LL7ynZ2$VhCP3yh#wVE$d#-6K0b z*Wh%v@Sf235E|V6?j0$S&%`4@5&OpS`?O_F)6F6- z>@YWkMmyM~ub#9*+6Bbp6?ytYrjp8bDbLRj_6Ip$XXoA!LY5GcyL-E8KbyCMu3y?i zCTvs~;)3%}=63#a1AARjzzL8TmXq15!}AW3uBWsl$GCOYiONbkS{-szwwc76i@Xi3yZcn9FOl%Rn+S11%iunQ{Lr(3lZ! z=!%~Gr3)I-x~aEaLjJ~uIa)z!R9)irTJ*}^#eg(zSbJ}>6jLzO{qXCE;(TJG!Ah{2 zeT_#n60Xm`{`vU#c}rm8@vsQh|F~3r_E^z61aBh~JwQ%vf@Uop3)S|(ZTubs|GtvNk2ac508aekK*@!h zZxqt+>FFP{+79Ll*I8MSLfZX!w6G%~Oig3Idyl1YR@Nsj+#wMRM*J2?Vv1gGRq1T0 zxC@30xjVVMHb=&)lf!xwjFsorYk&8LP^z9R|5|?oGHaKj@Sbyod$2Kp$fQPgWrQ*~ z4wxs1#)SpP!ND_R27^r zz$Qp^?u^i>3m}m&KaN~ONkQcLGZ@92P$TRCGrk6X)0umMs@VUE%KktXs z8v3xY%UcmH5#nZN`_R+T^{sb$ELP;p4R|0R=(pvmngom8oM#W5%CnQB0dXOkDFS*O zuA|+(%Jn+SHV+BGYD%goo*Hig*7MP$qi+ojRUCI_5DH_H-!X~*TJ#4mHQPu^(ZvWS zGkp8CZ0yK9V`0|2pjPaPox?wh4PrF9L@!Vd@AN&2O%hQqb>h>+Fv3+PVSyk9FYyp}HviVQ&P1*d^2>(q9J(gZ<(kd+Om^ts>1w@eReJh|=K*EsmaBzQ zF>Ui|ouFlS2x*_!wgIp(^6EF+C=M`->~P@CymqQpduVQE<)=G?shyP4@-P%R?RFX* z96b82ZhUDw<=I~-U1=z`T6lx{DPrc;$3i?GwDSgIdM$&_k=Pm|d6Z}latYuO0qKJ= zU>=-{hzYHSV6u?NfX#z$?YS^^;7LK~{u;dNci^AE#C8Z+^2=^rqV;yZ;XLu%ts;V& zJwJ95qcbIitD5%cue3Jd$1)(iEuabm&r_(3La9aXI3|apUyN9LLI$0s`9a-94j0n- zd4rjRghWq?qI*b4GfreAsU+5pzTMf6^G(BUZ(8OGZ8Y1ZG8+cH(BtDkemR-m{*a1D zy|W`6t)cgg*meE>a^!}Rbm%h8yuf1q35JRag5YY)pdUAV*p4HbqK~CJkbb#b$Iy*cvv2-G3D2um&GIxe#w&nL) zczA>`X*j17kI+`7c-X_kSC6^b{4y=29BkM+)ktw5x|&?jAg4yf)VcU-k3dBLf}uU@ z)=DFnVtXs~1q@Ce;@QL`w8b&EO5l;lJc6#o?K2zDWuP90-ivdxU^EHc40*C=G)S?X z1Ib9L1Q(FU2*9;Vf_#$iVqJ8A-4120qGq^)W2r{N9ET}5;KlWJzb`1v_d)px#zzOt z+`kwNO``k*4U)j`stq~uPFjLS~>uqAeNy8yf}iZ`aGx40T1r5l;;Dt$_ndeots|v-9CWC5OpC_oRAfDwBJ=#_%2!-i=%Zqx=WZbufX$ zJpKoXpukw%gO38ja>mBS`-&VL-E1t+g5t(j-C7%4iNfiAm3oAKeI3&pzmH^F~2qco6R>WO!+FW|EUOg^r;7hI(Fd`zZ;WJ&SOdap+ zdSW;b=b)sH<0Kxoj&l$lcKsX|r!4bdhN8>`?y?$1pfNAF8bb&}Zhi&Z&U{oM`Reqigl=1V>h|TAx2CsY z4|Lu4yCkyOFFHReTTYnxwDRTBcAg&poC_P@>CU?Dq_cZW%XXuK|F0z9(l9%ElrGF-z!1O-_2IbWWP9|+fFHBkPX&} zoLPZ^;#IUC4xR9)J(lWSm-k^fQpOm3%vt0uZVys+Pjr0y`tCuOyW(3&Lf$;;`L(ti zjS;dJOl!4u9Ko3oKwnI|w{O7kzK)2ah{Z=gJY47^@azDeM{ZVd-12g7q1=fBaFL;i z(}ewH8+x|2z&+5R@ujU;E&0u8q6`iAJFVRmvhp3nSJ1VWXxx$%^t8fh5J$x-XEsIBPEhe{mF2fg~g)v5rxH(JR9!e|_zTHKLnEqA64v0pF2WFg7;U zs89#Arg_Cod&(^%bOrjW;vbcv(1-*~LBbf(JGnGn7$@(_zt;N-EheE7479u%pczjAp@q!amlJxd)5Y zZpZKqHmNpM**~Ger2gk8lNX;jI@8!A`1_(ooKEXD|2(xaod^d5iLq)+y0u$p4J?2u zn$Br>+NedC7G3C#c%A);7BJ>}@LsJ1!h`3|#F*1$!R$AZuUG9%i&;*APIulVDDwbV z%m0Pp_esfiM@aA9si=2^X*e=vG~Pu8`VL#%q+SO~OFyx=u|B9j;|lgnQ?3mcOgp;V zYd+`tcXKdhyTxN7-u{C{#Jthm)|Oy^K|k(5O@x4yI8A?N9k`X%N}>cXFbX~UeyDe? zu@!msjbcW*jUGeC5XsD1_Q#63GJKfNR`Z%#br!-qB>Ij`sKXU(SBU;u)NTu|2c7Q{ zuGxtP^mJx#qbqEmNlR#Q+l{5sX<`~hLOl_4f7VLHr1@uLZ8`;68k}#GeR14atNkBy zq_~rcS|yytAw3F_$yon0A-@HQllb4j!or4_H~J*DkV-*^Fi{)p-^BT4g*>a4=H^(% zo6{llS1|~vB?hlnR14$da)7Sn)wc$8CT(F*XV7~sog$c4LsNl;ACLz|Go*Jf-A1P>`4|)ru#}>=XU=kmOyf0%6bO zBDc1(c#6GNL(Eq4rXnX7QiVf$ zm+f5F!G<4f#l?Tg`BFi$N#*@bD^MavX|28<)eD7@Fksiw7*^ zwd+8rQT_b?z;}FDm&6edi7o7Ef}u~L}H zIUCC{hih5-{5ViVpwGsDTr|R6@D*CyEs=O%N{1%)%I&0}q8!%U%F1fR@8@8&z5(Bh zzP%Yo|Hw#hFX^(~C}k3;TP7FUl&kE63<(Sc@s*{-EUkyIQXigG`PXIKH0+IuSJx&^ z7|DQwypa%^#_(GN{u*T3J_{-MgDoA(ei^Fu7y-gy{rBO*Ht#>+qC@_=`|pZtIaJ(m zpjd5e>-BlNss6Eof=tc-_u=6lAm)EOwxSVdR>OR{S^W!Q)GuQ@~GC4r4z+++@pGN^#7yy!=2p9;P_yZ);ym)wIOiA6J11Smw`UEWQq>a2c z6Q~g8fi+`#BNf)cA|Yz}q7fBY10osy@>x9c8lOgB@8?ApuKnbC973vdx7&LBx0W`3 z8y@(o&G$GQ>E4r}!0dX3=eqYR9Sv}guY!S05^3bs>fn1k%&p#YA6MI5%>8I1bp7^6 zN#*5tQ6^*{J{=zZ;dVTvOU?H7(O7q?)`O89Kl$In`p>`X*Vigi8U{@mbSdQKul6pSH&&9lx>ly)7UyQ*o3-4 zH4UL6NN9VB5?t#!Z9Z*{ZW}t!JF!bknWFHoNF+1lOEZoehQ0rHBZo5ZRKNxsqWo`= zx>*q3rH~F@?0|-vr{7b4a>{2nmOv(4H8{z_Rm181l^+Gg$~?CJWD|7sX4;&+{}3Qhd}=AJciG=J!pU&pBVd30IZ?} z&d1k3A)ibnq>o3xwlGn7oX*4q} z)_nLSl zj$?GJJME-o544nyo)@F;55sa3Im+XCC@`=F{gmj%Dy120+!PiNCj>2fZzJiK^Wa=) zkCuOw10p?-QF7*Bv+m-`!5AT@+S*1F*cx~9A8wnoa$cV=dfXjFgP>m-7@Dq9h!LXL z*$GsdPv;|XG~ls8BqYUwP)3hUk>a|UI=Cha2Cg*)DnO9kjz6HwTcwa@(ZRwQvF#6b z6BXSvFpy5ml(r@_=3yD^H}TBo%d9m}qt)h!cHT>Z$5vG7*82F7rHK>rzt=DQN+AND z-Zg>(U;ksFBJ%1a^7DL?)h3C~>yq^8CP3NY`h0)i^=UeL&T=NET&FR*`Zu4?YtjYZ z4g~+r4d438O@(qk5E>dlM%KK!v0ppKQKAr#o7-@mzcr!%%u!(#6zOPe4a*I~NJ+?x zF6dR!P^o1)i7T_eWTH{2*>!Vc9ZMnoq*b%gH*7(l`TCka%G_(g)tMbVb&1ajO(^UO zUG0GLpHN2Ywb5Ri$4p$v%DfW6j=21WMu!NI{D&E)tHJ6kn9{Q9C<-jR?5?>g*U zvZ*FtsX3AAc)T>VwIw9oDl3c2hiN%YgFwu2qd>&f;s|wW&+tiNjy=P+8YnuCt~I>6 z%opgi#t`xy41iFUz_pueFpt!#EhsFU$rchk7>!5tZgCnM@G)Qe`f1zuw$B&Vn61dC7b@T~k?dUL{Wijl`%mm&nCM`~ zHJv3|soCbG_385kvCR@-+QVgoM10f?E&epw`=p}zZAm@>YC$3wUp`T)p|V8P^S2%+ zo`#FqxsX!s@WWMjxH7hP<<|bo!xTb>Gdg?yFSOsw-7NG@`b^;9K(g1T?xa-x_Q2D{ zdK)aDzI_Sk93l|#6q%UYuzJe+us8^3ns6l%%t7Goev?pA?=vt%P0jDg8>VbE<`(?8(b)N=BqW)?;&JN8nKHw%KhIf>3t!kSg$fw`?s+3wbrE zZ|JYqt3VY26%@{uqTqA(4@KskD0uryGb0r*Yw!3WCzV2-s(Sc(%yEE&14#+c$m&{k z=$&7-+^n9}v6VZBK!7|}Rzg%Aq7qO=#tUHxz(e-e`v0?&zwC^kziH)3YJn2zft7T# zB)XqD_<3Xf)r%gd#t2edc;G4r_c%np@2d~gY^UA2J$I!#^k+}OWEnuJhRp!XpH{2r z+>RAJGAKkXR%?spzuO1}AE4(<2~-7w$Yrf;BIHw9EEHn;JKcY#HhVR%k}vsv-&1Ri zP<1rjcN(jW><(0tK=OliN% zZxmb>?|LPt|5i)UX47l4X)`eB1ePeW`r+CQkUEbAgOzK8*MW{E&gU20$Y$T4X7fJ-}4LUvD)Lr1%SCz!S z+Uv_Q!5nCEK~m=6=saR#=y)!TcR{$nDG9UYu8jzq7RjBOC_H_&{i(Hu;y5$lZXq{m zHPhwz##O*GI6;K+61EHi7b5o0X~@VxFx=(c)=c~o)B+TPpV?~AyGLRLwg(-EsChrM z4{SMf#$eDPO$v3lMtQb^B5A>w1E_<&6>x|Xfp2I&FviE8cIz=|*WTt4*AQlV6s;Yl{5@Y88JD#WP`RNzQ26FzMGkXg1zG^YiE%kA8M)aEO zN*fxW2>>``*S(*C!2fx2C@8##LRll=eh;R>{5dw53k?6_t_p*GqAsFmUp5~Qi6q{>tv6f+Cl zY+o+?*lG=&@pv#50Nr0PT-`ZWSCjZ076U6Axf6L1a&u9JdI@$})auj}us;N14+p~9 zT;Y~$_g2}9N{{BCVqvtjIBY|??DL$qZ{G>)83<k$@6@ug zW=Wp)lMANCG{s0I9!dON%VoA){0-#EMV}jJ0#-J*_YJx-bSEWL!oSR)(9PbzZ;_=9 zA5>IA&V=kO#1^!(NC*vG*AyHwKTHQVcLb6tg8drBTa%|MH{GFUBN4gtwWJOk~3*eI9;>+HKM3$37S68@4M+jc?$mV7E5LpBO zoEkr3z`#Lf{_XHB?5N59^W8L@C;}$Ik`Ijk+JJmXa4yvBe}VOGqMknQ@Uo^}e)$%< zl+WClU3m%-7g3y=6S=lHA0z>i=`{*U@lWy3M}Z~)k;e>_Kt<8->_H;pdk%`-~;&q?{bMjhl=?Dg~e4*P9)PZBlh}E?W_!HZLo)2RUwClqlZj81p{;^?39%#o# zL%#qhwG0W?No#BLA%R5WYC8h|t>|O=5}A&u>D%LQHcTBk(ibk@>Mw@hu^8mQJQ#rI zxpwD2hnVZtxdITrs(%4#ikO^=J>p=k1zu83;bkK!7C}ZGNShsmZ-f_Uo|GF101%cw@c}t82EvF1wpu3rAFMYOBi8B}a$`q%`2pE(@@70EYl?BtjmNRA7S_!JWm4 zDtmR3KuZ6y!Z7edr8vb1_k>T|I(Y+wn7FbYNhyS(b`0Rc5#2_REl`x+G7qz@;Ch9AbP)X7KVXjc zj`pDdBf=EzJON1LWr}q#W8Wq_?OW=cle?HF*SVkQM1;RA7QI}qB!RtOg=TFbUHcze zwkuwU+#4gCz>3L0b$Vir4-h1i$GVnmi*h(+36|_+B8V8!o-9~olJNCH49T^=BIOD^ zx4pNpO^<?Rn=;bYk-!b&$k^J3Yf9};MW=VAT2g^tNj5zGck-bt z!8Kx89T>h8kF}EM+EBDXnE@#xUqufx2tIpaEn=x3z=3BFe(|h6?dRT7N3uHkP=Ux=C-0Dn(U!Sq z1^>|8MC)fNA-}@dIn-4(!3pJ{2rd)j-}N!(NqpiScg3x9c`>TuUb9}>sEN{1m+ND1 ztwUUlm<%9(Fn)ieD5!!anq_@s<~ke}n8|?Ge-8`$Y06Ta4#B^hAn-y7vqBt|eC570THv$qP&Pt`GT?|@1=BL8w zCXlS`$D>LM#KuMw01?0>LOT;UMMWYE(1^6+2w9u{2{B0loDP~IU7jotOj&Fr#Mp1o zUxJV*sEc}_hI;!wXIoXfKl;%|v1BE9V9HU#A`e&V0nv0MLZkwOw6vFTJCh7)8U0&- zm8ZyR%&X{6`SqhJZLQ0W4&yq5ytBixza^;S5Yq{3pcJqx1=h^~f;YV$NTnb0Ojsi^ zV_YwkldeUSC3c^_PxYlh&}~h9Zaz6PtZ{G&ZGxkt&-@#nBTf3kv$p!39R#-@&(Y{x z!XqUDPx@>xMhM;Z(?? z$7E`%0dls7b}Ln*>;^~(LOiTM3h5ez9s#j-qW!@?y4MQetsmHjPGAs&H{J;A-$nm^ zMO@ld)BwwKJ>NNTW!$zhDh7}#$?i1i^ng9 z$vHo|JXB=R2D%eCmf_u;;9JVs1{~Fby9`7|un{b4L5ijW&`mZWye_kOU>>+|JvhHz zt4Q;xS@3AU6rf`MayT!6L-?cnqp=XTJ;)!4oivN4c>BwA^5hjc*CbN3@$@6F{f3$>D-}lhNV8nx!q&gcl zwo{P`93&VWYIRi96ecLKn~yX`CLswlWj2{o0mA#k`pTDAnyBvKAzdl2B!yw*Gio)v zyKg+aHC~)n)S~d|zG2mkOeFmE8 zOVo5OgELLUnanijw|}cOv#v!k7PA6_QD)PSb7ept66nok=r_{3(ia6h^K7U=rNMy= zQ_W;wwR5Vm-)GTz5j?YF8rrS?XDd^AA>(9-uw9$o;?ko!x>W?r^#QHi-_(MiWODLh zZ7i>Rz<*=gUm-A)6lFH`m1slBWfiAQvLNAg?Tv8Qfhsg(2jefbD1G?Ei$F?X(38lA zqBbbN0}XAllTcUp@3?AWwff;4!pB?4a{b;{J$jJYwW3qqF9N>-y}Hrk{GLmamIu-O zk4@B{T0`nSU}5kpqkp%g&qfg)o#grYMi54qsyNQIe?YC3 zDY@{R`utF=IKt{MQl?>Dy|UD4zk5Mx|0JHgASEL@k(I)QA(gy6*v>wY86Je-Xr<@S zjLn8tYe`RTOjpK;FWg?^2Hcx)5aY@BCW~Fpgp%ntZ+yjtY;C18QiQvY=Yf?`{g?X- z44Mx$21Wtamk1*RtWF8J1;<}y(zKK?^zGAg)rL?#Jq%tP*cKc3Bgsa3rOi>|z{>P`1+QP!cW;C7F z{Zb5keDALem;DE!T5!fM30w{wXQz2;Gc7BHKbB_mhfB?slac^S%&e@#6|C6U>z=n4 z1G7+>^j}SBto9@gHY&!})-|pd4_~~twzkx&bY8k(J4Rl5c>Yv7vDz5M2e0w9hnr3H ztgq{;OpGG|LQ^?HM4X?1l4R@oj)5hxir!rxn8AIn`j1r_4nn|YmrbNq08ggRlf*Fg zm#5V3^nMwSr-)xV?bB5eQN46Zfx&neXI zD&5?O%LEDT-q{Xe&J@~{l5#g%usW`CARvhH^bJG;8JAp6KrY*L9IckVWq7kA0`1mt zB4=gU_fOhQpgm;}6%~~fyA6-amQZ26vtl~i@98u7S~67^F$X3R(khdX^k|%3DcSR> zhKh$K`|sc3A29K`h&a?cawvNzkc~$Gsr2G!TLa##m!9`@l#}z53l8`TZ$Q? zR=1%9w@U<&M`oG$O+~Yz&HbgJEn{rpYR7C|TE%gZp|8)3I!gpxPA-u+xt$j{*drrp zu&}VyT4mCg%*_?pZB0DgyC>z9)SjP#A*P|tnx?U}PJ#Q4=|uL($fr-h3UjH7QsiP4 z;q>&ML#u9AuGbEZjv4&O6cp}k++VV?IB5=G1GB@l2Y<7^2|RLt#CpmBN7Hz&aA_nD zG(-pzRp(z}VPVS#^P3;5O%Xu|2Kn`-F3`fWMRL2%j%!#ni{9c}y}@Gxv80p=LRm5r z-4Qs;E=~5NS-eVjj?XOU-+|N@{;Ii-w=yUMrXo3*kUZ-U8rA!11HCAGOJ0}LrJIAj zAae4{hmcef2Sa6aOq$6CVLiRTprM)X-vMd*ulBz=CkyGHzF;=^`pzFt>DSet#f*(V zUvwg`G`1q+I-QK;yXPii$Yz2kQ%`hwhtGA+)1qt<2ABV1Z-j_Z;kV8zhGo+NF=>Ac zrKF^oWMySPoT4_`=pjQ9+rr4f3Bjk*oZuAO!g_i$Y`BM#ibj3 z4{k7;kiUVD_1pF=mHE_a4mU@iUK4}U(ew*4!CxuKhI210{(UD)uzx;Sa*qwb+dKtEX=TKL(%Tc za*e!eLzOCmV8qz3J3BiopK#*rUO7g`!s&7oReHkEbOxg$6>KV^a0}oKQ?jD&PKK1E zAEEw-2Z0{~942lvIEOz8R+-Q6eZrSqsCdbHOHb*;=a{saB&uN|pV;pIR-{28ySc(H zGgdH-M{$QqADop%vt)X@NY8AN9jVFqyF83#E7Un)UIf<9}~}1X~_WW8k4WBe=lq@9SKuuX&&5q*58NxhA`c2@$FV z6wv=M_m6g`Dc{yQ`ET!xhZr6A`z&qGE1+<82I<#2c&VwSvL3F7wV=m7BN3F}4)Ixx zXFz630zi=2dWmVyFx{{d86=xAsVXEY@I9??a4b7p84_9RDW1dky_nNc+1(kA)zTmG zC*5DIaLv`V!JSKl5@mSWKX|{PQDFmpLoFASk94ndO6~3KC$_f_pYNPGQIgYFizZMD zzHOdY-nRK|>L)fQn!vr>#=dz!oUf^&p(z*VNWsz4#HF$Q3m@cJsmVKjTt8a@WO5P( zK#)Y%{ESj?0^n9+V>p&}ye{S}a5QZ%D;rI~8)xcyD=!MBnEe%e>ma5v%9$aUzo5g5 zJA;mL0PQUq(m(DzV>vG$C)v_u!*Q98${ldf?|Ha_Wo8Xk1UGGKX#SPdwQxayu_$pb$;iHOt%F z7@GilXl*NP?g3=(V)H2?;;me?1kL^CJ2FC;a#;V@8>vWpsB;g(Z3q3HjsX?>H4q zR8($k?2=@5>T$5-FbZR{+nZ*hYMJl$FvgK9D#%$Nlk0(yU-fPJed7Z@>2i)*Mbmok z;y9DrY*GFIe0nTBTTQQ4G5PXL>skcO=bL*Ub;8-((0h@2+HcCGmyqAZmM0W4>NOWS zKQzN6Ba;cZr^?#G%C%LAUwb4cN=~x8LRn}O+>0lI3rKN?xY{}lt{*A{5L~4RtA9K^ z+2V|P;0F_O9%{vnK?+DIVtqPldG52WF(}DG^If}I#^+o3uKT7rw2V@`>uUJpNVRp+ zl+m5<9eC{gvjd}Kfrr^nUr7WmXHRp~^>wbmU+h3LtMnv)3FPWLC=g%*)8E2i^BlMlx%H1u2*YOds>I%td+enM|X?1$?_^8L^MvtWh{qs@Z z&>~ZP{YwmaF3d7S6L<4a!(vo)4}>I*VBl;@Sv8gO{O)uWIK z385<3IAA3GkI@`FTEQKyz|3Fifu;pB3-tsfBcmHSIdS;{j4wf9VPTS3$4}cEgu?VC zfIvO?poU`!UMCt*?|(Xj93Lm-_enTWKjy>Q-oCcU(WSG55)l)dk#xO(Vzg&I_l3Bj zJ9g^dbUoxn!0HjW5b-1wxtN}mJ>Esf#8j2Ryb)uhXg>HQ@Ca88ud?EM@aGy2(@)jt z;fHhsl*6HVFygoKlR?{>uR5n?(i9YvA|a^yk1(M&DDAD>}G1z+^Nnv1p2VEm)t zdx_`}K>O`oDbNgb({Sgvw{0WL%qr$aPZt9zDcE84&S3Z+r(!$_KBWQ>hNzIA19d?x z$Lb~~Ly{0)cLlW(5kJ2w_en(j`t|u6?eopfIjM48_X)Zk9vBf7!W@mig1FQCs;rew z)^8__T!J;qPEIluCy}l76($upQ2*{!T)B3YZc_*SfUis$?*0DWs@H_q{OYlfhl`dY zlBfc@=#f2jDAOMIZ8uI&lF9XAHgsWy&yq-jJ~L_1XAMn~h#$3QK&uDaJh_FM{rBO~ zB=Gwx30tx_Je}#VFFSmYP(d`Db?O>M{qT+g-YpG6@z4*+YjkpcxI}1c`4T6?8g)&5 z?H*iKJAF(pyuKSFkg|d)f@V@usZ?$NTl^+i{ctwP{iV&{nkZVg2I9vCJhRp+>2VI!;Si`tWI+HKWJ^St-pUO^%hhdl3y z1MnrRu$DcAU7eHHh%&@E7Xl?EV_&HR%5LTFiBoYA4v*N-Fh(qt`po{BX%-e1-zJKy z6!zHFj`zGePOCCA@wr=a)_{2N8Ybw6s2*`h5GHif3zVx`_i7#pZe}_HL&}3wb-~Gv z9WJnKvLH%j&~d-rb;__LaA=zPGNiw>4nR*WgRA+MW^9(|cJz~Q+kMYSX~Yo?WyVNKUmSWcHO5h zG$!_CF^3pV7`%?cI-&p_8c-wl4sma1KI|>BC7Q!T-tFI`pLS0W(6R~}rd&~TlGfIs z_Y>upq;C8$NyV}>o2TU)xSa3HpL~KA9868eK7*H{c_BgvP$!abw{GGK@0k>D;5?eM zevN3f#mC1WOgT1%!!U(*+$p|4Dg}g@1_s@skx(+0&)!*Ihhwdo!1O54X|k1ve?_ya zee*gl!kzn{oBrB8d1|A`F_Y!BW3?iBqMeTqw`DlCK)v0>=7`l%s`43q#OvMvi`A#hUeDoLgd}BW=0R>tMvs z7#1y3@5bJ~K=BqgxtMNZ-91|UX|MlGr! zLal<$u;a68d9frlehawGTxq8FJz`}gSJLHrJNfbZ10l~}t4hFs|g7 zlM8b%uG2EWkwdi zh7rJDN>uuIp>XoB2np~?+=>xje`Mg_>;=Xbv#I%~vJo>yvBoy7;pN(h* z1E%D@4>n7B-rnBXv80jnv{gSCrxL|wG>f@wb_@-jj10`? zUs-SC%!%$nowzw@@=(dzw*#c*{|{h51o$gh*ck7Rt8lIG)EWegE!Koae@qd-Z>)QV zu*TlngFbA){Xx0#E;U5(#pWoy)|Lh-4q+8@?E``2q8f}uQx^Ce`& z#qZxp8NsB5RH!;994DH&QvutkC7SrPC7dMIn=1`I_u87;5+JS9LZ1Q)OSz)d+R@t^ z2wB$5WRYlY@}2uu_VKP1C7I~0SR%U7p>;5EK$R{zQ&a|xWxc^Sme18$TP@Gp^MeU6 z@f+N8aHZE$&6gvbLu96( zDnAcUNnyI2|YSN@zmv3_g^q*W}e^&)gh{zd?>o(C?6ak5*lyY;gk= z0>fC;Dy4rcUfqBMH{#F6{Ju1nd~VfxD-Dr4+UUgWHu>%esi}BTgjKkO1{-`CYSOkU zMJ{Kl-p?uSo|bsze9UB@>j-$kVG!+{UY{iQ!_YYFg=)(40vL@BX~=#3#wRW|`+Hn= zOLb!cQR2Q97N#%=&cy*6ur@Z=f@okkt?hC*SYoB8a8!zh|J7-Y*uKH}fAf*QS%+G(xaLEmL_aqzYfje@kKQ z^|5~ZF?Y0o^Ee^kZ3;m)r5%fjA(2U!Embs@(a-s2x5%*CS*1Qd)jeI1Dr0@DoIq2e z9`#G;V0>6C(7V-@pV8N6$oJh~+}OUO_T`~sbq(u}HMiDMM}p*3Yomp}oUrid#4SUB zzrVa}&&?GnAeQ2@&S0=w9M)|wUtOC)G);sXoh$3t{G-mU?^l|aq8@R+mIyJNCyojU zIr${@j%Z91F$@haZ z@{6m_k>W;ofXdeUyLzi?J(m%5sq&B&5)`=AYOx1_>j`FytJ=MzL#fiidX3S?>ALmN zmOl(KlkC~u2??(UOBOG;+s%H*u1)hkmtaxO}0siKCfu5TGk8qmbZrA zGaZa-ygxN>a59gT&AfbdA1LAVws@N0ub8iBt+Uk&nH6@qJ44tDzzyzhaW*HvAqMb- zdXhI9?)8r3Az5NTH(Lj4LeLODfFKbZYrq`2OZqsGY&q zf)2_Wn(xuFGoQ*t5i)pEx0I`oo}LVBN;&P**ay1$`dE`UgjK_!hTGZIGmjbC3QZXjq)bFGC)5TR10n2Cc??a6MjAd4?bg9$8Cx?k_; zG=39};347BiGOHgSvW{Itmo{M`zjZ$S}{5!lZ}T1M^CRpXbn;++diH<9Tlfd1jy>s z#awtG8~5eDAk_f|B~0XCJbT0ac=l~!V<+gdcV^QMR5c)wY-+iy&U{9dRuj9(6X9Y6 z>-Nx}T1}Z)CT+71PUVH>rqA$?MWP9;r>W`bYM8^G;-I9oG>`N1ZDpP51!G*oL6tjK zix4%lzx=6sxkxl7Z5q3!NsP@xZtmd}>76#O*#f|vPdDZ5$|-IPJYQ{Jrj&fy5GBw0 zc&ZG+w6U?lQuO6@;C{CivM%olURK|2jByp0^!^AQ>|Lx$;b7DWFq#X=>6TNy>bEMg zG;EcdUTNUELmZBlDjt|^5MBwMoC5od!V2sSbGXXRHGeDXGjux4!IQ-Y zAtExkJEXZH`?kEH+LN%<_utsHJw_pQMxheN8@zc)XM`40s_18Oad@(E%Lp|0i2yUi zGBp)qa`F^TGzb_Winsh0G*s>{-JSudn*1 zA*_}$85v7X`HH9Ok+`f3`T}XtsyF*hcEm5AXh6ZifQ89$%{LH`cf82J!DYC@HpiXV zMV+r4h~#G{PTM81(IqJhu#?4#P#qoSxayhQJG&AV`my|SV%CSvW>YROQfFHGL7ocD zk_`ss2MKo+hWh7Iz60k1wjTI7Aj{3T5-`U8nA*_6T@#!yz-Z~W=;XqL zIg+z85XkjkdUh%K$VwwN2g=T1o}(!cM*2Nc-=xrFB@hx42Kbn#O97Xcw=jG*cqZeo zcQ#5%S5F*nC}3bIoJy=rs^YL755ZOREiR%b^LF-2^!g!!AbsS(wadCObgU3yjCzWF zf1xImLRpj-;0KEM0)wP8HKdpG9*&w$!o156R7$jdddTwbG(}BMT7K2tXr4#Mic5H? zn=79vV36nKR6LX@bpS6W5u?4m2Sq`Fhz+n~D*8g;4Yq^#XQ0VCu9kw#zfad+A18j0 zefEP;uij)92iJclV8~UdIeLNiub{c zs4w}6r(mh}n>wK`Hp-ywY~oW!HlLvW<$>a9F*gOIFL1&fl7<1IDFJUsQY~kZMvBuifzb zh+Mwc2zh46!PrlA{;Qs@I>OTmM6j3U7Z^?UjZh%;6{Mxnb7lw^TwIJg*_lqqjIm>Ln#C2 zA)mM8O{{eA!v%56rz{ZRL~k3@=$Q{MJcv>e5~!|Qv$+@V=3;vz93k75=@dV~0EaE;FT|WV`}ZYndrpAl@#*J6AAVGp zkt;r*{qbRCWn$-}Tyr^9h1$w@TW=++`Dbsz7j|~07T$02S5R&(7Q0v2A&H(*_=|XE z_xghCG@)x7@y}WayCdKy?-EPXDYRu8JwQJe1MaI(9 zThK2ydM7R#S*my4cp?ZQA1(lubw)-)H}O5jnXoPxtc`k+!6p7(kX{wRd^w<{#b%4Z zRo|@|c|_-_mf&poRil-Cbh8`@y*f9bUH6fA5%pvY#nZK!ojWt{lmW&8q3(IT^H+yj z65dTd`g18s6PFZ(mhBv)i8WvT4A&7;rt2g2^Pv z|JbG;x0ZzBe3LG*&UX_|F;(Wa#T0=fcnDXws2GGI!tmX?{xvhCfGwSgS#5N1K16FfA&6J2Zg#T5tYvs%OI>5;Mq}M}1gUChUA&{1qZ*x1|5}4UJ$t|Sq1wH6iN9!@0 z3^YCbT=V(GxSOY|f^GUn+Z>lNDgC6;cKtB_U?|{tHUmWnU=z z1+TWIdop(rg&h@~Mzfur(Q0$h>=qxMmp3IfS0rr1O)N4dW2h;T)p%6s04{uBwp_nw z!KeULCf(EGmqu#UjPC@TXTYub@}=|n*U(HT9?*86r+Xbyiv~{l3kv_tJi5SZh%>b6 z+R-j0Vxjc?2PT2fdz)b?;0Lro75`SE{?|4)55m$?UCY!UKKx;6>6b>lMhW8YAJsG? zGyPL#F*TD1#%EcoGhCxp*5$KqJm~aD%v}v6J^mM68#a1FqQWMS!7$(R2c_nj*%|4j zOwoq0n0zu>WrvTi?w&kaB&G=OM+u9*?(55i95J&61@{jxCWJo7%}*$f4xi8bzTIAG z?b+_%eyql?!vv0zN85&#<8hw}(zrO1tQTk9Grj|UE>3s|y5u^yEibV_3lJWD8Lciy z_DF$io5Z@}{tDch?E$oyP6Z9xoOIhN-MQ||BZ5a*bht=n6`oIp#-oJI;!xoh1=NNt z#)pg0gBF+IcJ=eUl{w`MzEIyXN=*ow zP1xdgd}@KZ>N4)45U=Qlx`d58ri>AjfG1C@ib2OLIhek}XXj+zF>-&2h!O8&Nk)3I zWXse?)|ti>q#Rg)6*s0d{7DEGlb@3UB|ZCaWq3Mlu&#f`J?ci6$$w4~r_{w3@NOh7 zM5OSES5iDntg7L&khpFlW#1YWj+l`~pcttrybiusv;xgxQFeF?E5`0%Zoz~gItUFE zLW~CkmWfVAn9GJq9Jp(4+>r?zpz9lT4&}odke^=Qh8aP2jc!RSV1-&~Z>}*4MWxNz z)X(%i=6fwqSQr8a;&y!3mfyt4!pvVU|38pTctl7{1OTLG1%n&9D2^0`r%L-4vP)n3 zuC9MRvgnIA+Cw(271xI?uF<7DxTrDEkwETekTV!SiG!Fk62!o1X-iZ~^ZD~7E$S6? z432T(#JO!fj6}OWtUh6&&LI`?knO)eN*bK2*GtB4b~LJv^%8isw4_SYT~Bd(a1mjR z6Z-Y^r7A+0Q(SSHz2P&O{oOaf+pOKw?G8R}dy-XgtRUVEt)Zb<7r(O7Y!$n((D(ZA ztG*F&9b^d9I>{bC_B4**HI{`@tdMEnAsp#XD*D@5WVHoL~dCN*$pBvFn=PC~09a zujlb|J_H>)3zeSTuU3=h`!{Grj0i_$6g!i|cwCU4p3+%}K*aCbbPL>-RSJmX)0A?G zyiA-|$hi@Q095_cm(&lh{HBw~-6kX{dI*9InFYst))KuY#hp$= z5rPj94w0{-JI^+`L=`tM=pw?wOQOB_86|9YgzGh!ztLAmt=ra>#%i`f(Do-lP`e$T zf}&jaEuE>AP$JXdU#G@GjyY6x*a$`(hz_n)NJK<5=fPvtBPeG(-$rtKJC#p<&g1Iq z_@N9p)H_{5US@!;TpB2uxVaN>XjZNh-XD(o?l@`@3mX6pgm0E>!DOz!TE@YuLc@0D zgHU5IA(Fqv?20)NhNF9_(Picp`Q%?3Y9QjJ6#DmX>34yEu^jf%tC;93n z?D$fys;7LuKE2B1c2#Fn@f?{%sWSn-g>q#%JdY3FTC4E4u?m_H0YhXRP`9{^xGj2| zSNO7eAc~?Lt;SCv8mnxsY-gP}Kdpk5iD`PmwmM$vs7s*nq5fR%L|L=P_*`u=bsjKq z))Uiz1y--;ollZ*netx%4Z}>n;{j3{t5OgiPiT#iu5+``Dw&{;i-)(K8<0>|FUQ&2 zHJ8}#bkrI?GM_|S222YDufU>1fHthD#xT)%>+F*;DUH@|VrKAO2vYEL#QDH?@z_@y z?xL#53gLDe>K~nuist(o%G`nIT;Z5obJYE{{$NOWegy?yXTyn7Sy^{#RY-3aoj+_; z*B4JF1#!{Q*(_BTGP&7H6xZv-Sh>8E(O=&j<4)Se<0hosAbVV#(86~emtMmdR=5Ag z=;SJUk!_@Q>c>V_^j3Y0=~aiqhoIw4O{K6()#GBra&tdzS7@=5%bwz*-_m;Kg=lJS zFV+;(Y7J!YdMctS|Dgmdf%?@Ok?3`Q&@V7;qUFzTg1bRVI3Fqk_cd+0b$KNM`v4t| z9i7naiC8&{O~D7~0GIXDlI8kd4~^Q`H+Ybq@IYr|YHF%EXd{ifge~Uu6W&a|O{Szu zX6q+C6uQ{AwEcyV^wxTtV|}#>mv|)q)(fg$TN`Ano#pCUox1_+mrOS-dgRxur+4&h zn|gdpb z$DS5dR7JOY=26JrdH|s+B*nlk(@nIY2Dz}l{=H&H#4h2M&Nd+jSHpH&S%lWu_@b0zS>@T)mLaIDHMiobU?QCtb*A!ThbUU1HW?M+ui>P+^ zb*5a8!sAY&^t>F-%%r;K2z=Ch*VcS{u~^fauGeQm4qRcb6AaMcqc;El+L{nw(x_|E|@&mHOP`IB4SdPFt6a6;<&xwH1AzT_?C9_`wGOB420H zuBJXbinqM+NRxYNyh+h-?>ZaI=4oauYSkCY-dm2Jj{904l5^x%cW_r`di^ni_2(3; zYqsbsTSn94_1=sXUR2??aV6~FRBlrf-h-Cm7>{x1gYncMhTHqkRFsN})rLV9&im4@ zUL+iL`UeG4oo`+yDc zc<7^cBXe>z_hP#+_k*q_laA-|^2JXSqFmF75UF(opo#-<28jnX(5?`{m*NVfsic5?b>4_TS`9=~1@?>B5r99zZmZ9l(?;Y-%Kk1~DpzByK_$BXj( zi;!J^cDYKM6Xhv!xsWp3V@5CgzPI|}$6-W7U5>l4cc|0}kYBw&gmpN46W8nWnu%op zR@uRQ)uSWp(F))HR#~(7nGYCE#!^v2n9n_um=eYTYfDQDwHl4RQ#o|kRL$}*hLn@@ z5f(F*0AQc`aPe!;Vs7$xkQXDzV`<5el&LA3nu}sH)9Px+`QlZbQBr&RF}`pO_RGC7 zYd3vN5-1s>PjCR@R*>IQydybp2^lwH$*EX0oR)y%Sy^!`^qogydQzS7M@E*tw?v#F zc)4jwFVtP)6(QNL_LEuVN~VZ3f|M@}W2yC4=?g!v;DRxp<+M4f&8gho+`UDA1Y7n= zVj$Dy5fWad*B#mX4N=eK(D_-yv`Hg@oB4zYs@|f0FowxcRj^&q^D;unmcVx$+Vov~U%*TNt0$`pBe+;0zsct2b#My^%+@6qM}nv z>tNp}B66z5Sz3NF%=i5W1hQThAQJH60cm$4#lk+PW_JI(53gdcea36+8)tkq10i|YxEM1L>JQGbV4Z~Q`qOt_GHn!xylub-S^&_{QV zTL)*OGkd2RbtKC>Dt4G`L+E1c;qKL$9^X58)%Oc=;t+>+CSp1~e?F6Uhd)?$GWL~A zyYI&=Ar*7A$!LTF{&!P8h}3*4Dq0OiYN^3=9OGLKwceo47vQ=v67Nntz2Xq^Q6`<8 zDYP|tT`ksRo~?-OKd-+(QEWY4js3zPK>qaQTaqZrYJ2*VldRAzyccL8Mmw8)f*4Aa zZZtu+TP&@fe+c~YRpzLFDJo4mg*WVK3Xq|Y=kzPx=)m8V`P?t|?BZ%RR=iHyv z&>Lg%p=~a;cpXPO2`+lj!ji3>SIWnskqtg`*s zQijd1W|-wV_MZj8?XmiJjKEAFP8m_GL+1&x{%t}@2b*ky`)F*0Yd8zx*#LOC<)2Ug#?FK7dG6aZP z4fld0?{RT}ymyo!z>EYEbT7maI)MKb3aP#ZK8HGaT!9!xQGAK|I|idnLW*t^M1bM- zoGPnkM{CUwv-XNJY8?^6J!Lh2KQtn6Tu`?q-|Zr*Ci1lRA{)U_5{hQCS{qA&A!Uxw z4-=`p&^~sFx$q?w^aOcG9Us4@>`zSG+-IDT!K1K<$PuvR*yW7v0iy7D7DgB>h{BC4 za8!K&T~>@l8uhwjn>c7|j5LO-8aT z78*>dQ00f3YuQfyy*#?(($^Pwii0D7uJ6DVDRCCBY$_=n8vqIQ`xxv7`cL2R~3 zjWs~q!)Dz-TcWd26+K?@Mx$%RVz(*cz1KI9&R#SWpqd=J3h4nN5LW!uqHLZo@Y^&f zKW4bvT5N4u+-I)#>o0T##5$h~g7NH-R&EeG`$G_d_4!(>Qkg27=W>+%NwPy!NAW32=4`(%E#YIE~;bEnuBEA%9;$OSR*2Voa>VZu7;DnL&;^A@@vMTo)>)pwIx`}o^Rri=6 z$fU`4TTxfl7rkF-^ghq6LGDDOo*U*1UL z{xxW1qf-p@`1&-GxS5_)Nt~}gxiHvZJ^AFGd^m*WbQ;FX>U|k zXUfcXaIr30rkeEf;L7E3D|j-0hMucP)z)UW_4C_$smijaAy^Y!4YM0)TwpeX+@Bmu z^WDs7iuUW-_J$6@N|n~xG3#mB!pR};*6PWrQ$2uqVm?eS)T?wZ7_s;$Ot_(*d^<~$ z93OtbcKDpUg~$!hVbSZZdtRoDQz{C~Rz(Bi>@qC6t~GoGg=E@!ci+<16j}AC0rVwv}|>0|QsRxFBFuvEI>Nn!R(l z-V&6?3M`OjWbT1|1#ozSRfsfstK-3V3I+9^uzw3*lJ*}kKiiFO4L0ri<9Jy3nSXp~ zbld!jCBlq-}-X1aJ&@XATXhO8`dX=^es)mJJR z4|=o@RzxMgd+kq1-a|*3D>lj&l#Z_j)59kJWg7kc+9BGaMQXupWORbkMw}zs>j7-z zOx4oMxSJGQjSRXkI5;^u*=#lLOZt@b+;k{~N3XLKS_&gT5KMHnDI~{odr8S$wa1)6 zx(8#9wSkMo3i@7C`cxRJ;x`D5myV5wKN8%Nvyk2S0%#Lz zxYqb^O1S>{id0*B^P~(n9)Hu0!}(<1dT~;wtainmm^J*uqCG7UOOJwl^SzDyN( zRYvV{_R|3m=F{iyVGvjfI??Cp-&qFsH+#vEcDWhd+P^IYBRgZQio^#1}&CEAzigac2DSX9_nZshS^xDiE4#wlt{UK=4mG_~%$NHp;(c?PO zQG<#8FAsD!u8%E6gXOKjuf+)gM-1xPGkkH0z$<~5km&9Ss;=oCO&t~s!I2X0y50i> zUGe0^R1z5HM`21u`Y~5BCBi5!kc3?t$6=!IFs#-z5J+fucShs=Uq zuliYlso3HCHBxH3nVqNa(`XmmrkM+WjD7Tz`~&^OO2*>T|=3Cdf zQ!8g<2H|~D5z9B%OS^uhk%;>?ad#typ*p$XPM4s}uFP6T)|0Hwdtya)2OJ{;m@jv8 zKMmd{c%v(J;>HMcnz!ALj~OcIC2;fD(vq0vxdGN6ovYm=*G30epmZ#e^{K%Yi{3s| z03E7fRE|ws)!Gv7N2p|Lo-M1_!`EAyIvo5z5)uaA!#f!r+G1I~P(}02R)$*Oet$oT z4FVe3%Sn0UKA^3z8QLaoM|){jK3fK^YIn2n^-xp0wk(XmZ}&011v+uhe-6=UP`6p9 zm%)jXdw`6PSmk-fB}a@HSr zh4$}nZMK9|r7s8TJTht{iJzPTcH-91=j?SBa^jZFG{*(bUVhW_Q$<)=+uze55%k?2 znR6+FfeNiY+4IwjZ^CJ|Az1rvy-?3c;_GcF#fv{(13PWWSR5Kne0cx2d4vsC#>i)o zzxi}@*t+|^0$U5(@^82=Z!ZnbNmRCM@HEefQ{1&z0HSjnvjeW-2r^c+o_!U@>-&Y zb(`5q&+$!yMB-ORi><`O)5R+m>xe`ekKkFD%hmfq$NLb3gxZMFN@6OL#o3^GEam;o zuUN48v+t$<%69hW7+#A&%+C@dF<=IdMlo4KdaFEf`e;~E!_Y6D@|!*hmqH>!L{T_a zxgH}q9FrC(S>2AnM%N@<^{E&f|1Ez&zYxG$n2Q6H?mpi}%)w~01-tsuYPXY)89QV7j$$G2 z@PR?(u{1_c_i&9~AF{A!p0cOb=s#3xGgMuxMfK@ZwxpUePew3tl}VZ8wnxYO4URh= za10I>OGXR3%m1BP^`2s%tm-NhRC8u<5o-5{@lm|2JK- zv9WWVu=7%oxhE+>Km;1ZYJ3@aarlXci2r~n#_2l{@ z@$R@ob>2C7{q$mM7%Q)Mv~_mLRp0aVIco2Zi%BD`V{__`lK7vCc&I4hMNBWypbuTK z95UC~p|`N2-z0WE6nrmhM!U6FuP?Mqt9@6LT%r`LL_oV6^2mc_n7j}?4nk-krW}9T z-!6;JUruITr5XjafAwWy8Pze}5W^tF!CurI)5{?qfp!xL?hS{|4APjJ-(xX#*dOJ0 znCWhFIzxIvZ)>@&QG!!rf-cn9m0hlQA`Mb%}!@+ zx&|;Q4h(lm5}iNsn + + + + + + + + + + +
+Original code + +StarPU code +
+\snippet vector_scal_c_align.c Original main code To be included. You should update doxygen if you see this text. + +\snippet vector_scal_starpu.c To be included. You should update doxygen if you see this text. +
+ +\section BuildingandRunning Building and Running + +We will use the StarPU docker image. + +\verbatim +$ docker run -it registry.gitlab.inria.fr/starpu/starpu-docker/starpu:latest +\endverbatim + +If your machine has GPU devices, you can use the following command to enable the GPU devices within the docker image. + +\verbatim +$ docker run -it --gpus all registry.gitlab.inria.fr/starpu/starpu-docker/starpu:latest +\endverbatim + +From your docker image, you can then call the following commands. + +\verbatim +$ cd tutorial/files +$ make vector_scal_task_insert +$ ./vector_scal_task_insert +\endverbatim + +You can set the environment variable \ref STARPU_WORKER_STATS to \c 1 when +running your application to see the number of tasks executed by each +device. + +\verbatim +$ STARPU_WORKER_STATS=1 ./vector_scal_task_insert +\endverbatim + +If your machine has GPU devices, you can force the execution on the GPU devices by setting the number of CPU workers to 0. + +\verbatim +# to force the implementation on a GPU device, by default, it will enable CUDA +$ STARPU_WORKER_STATS=1 STARPU_NCPU=0 ./vector_scal_task_insert + +# to force the implementation on a OpenCL device +$ STARPU_WORKER_STATS=1 STARPU_NCPU=0 STARPU_NCUDA=0 ./vector_scal_task_insert +\endverbatim + + +*/ diff --git a/doc/doxygen/chapters/starpu_basics/basic_examples.doxy b/doc/doxygen/chapters/starpu_basics/basic_examples.doxy new file mode 100644 index 0000000..37bb2cb --- /dev/null +++ b/doc/doxygen/chapters/starpu_basics/basic_examples.doxy @@ -0,0 +1,494 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/*! \page BasicExamples Basic Examples + +\section HelloWorldUsingStarPUAPI Hello World + +This section shows how to implement a simple program that submits a task +to StarPU. The full source code for this example is available in the file examples/basic_examples/hello_world.c + +\subsection RequiredHeaders Required Headers + +The header \c starpu.h should be included in any code using StarPU. + +\code{.c} +#include +\endcode + +\subsection DefiningACodelet Defining A Codelet + +A codelet is a structure that represents a computational kernel. Such a codelet +may contain an implementation of the same kernel on different architectures +(e.g. CUDA, x86, ...). For compatibility, make sure that the whole +structure is properly initialized to zero, either by using the +function starpu_codelet_init(), or by letting the +compiler implicitly do it as examplified below. + +The field starpu_codelet::nbuffers specifies the number of data buffers that are +manipulated by the codelet. Here, the codelet does not access or modify any data +that is controlled by our data management library. + +We create a codelet which may only be executed on CPUs. When a CPU +core will execute a codelet, it will call the function +cpu_func, which \em must have the following prototype: + +\code{.c} +void cpu_func(void *buffers[], void *cl_arg); +\endcode + +In this example, we can ignore the first argument of this function which gives a +description of the input and output buffers (e.g. the size and the location of +the matrices) since there is none. We also ignore the second argument, +which is a pointer to optional arguments for the codelet. + +\code{.c} +void cpu_func(void *buffers[], void *cl_arg) +{ + printf("Hello world\n"); +} + +struct starpu_codelet cl = +{ + .cpu_funcs = { cpu_func }, + .nbuffers = 0 +}; +\endcode + +\subsection SubmittingATask Submitting A Task + +Before submitting any tasks to StarPU, starpu_init() must be called, or starpu_initialize() must be called by giving application arguments. The +NULL argument specifies that we use the default configuration. +Tasks can then be submitted until the termination of StarPU -- done by a +call to starpu_shutdown(). + +In the example below, a task structure is allocated by a call to +starpu_task_create(). This function allocates and fills the +task structure with its default settings, it does not +submit the task to StarPU. + +The field starpu_task::cl is a pointer to the codelet which the task will +execute: in other words, the codelet structure describes which computational +kernel should be offloaded on the different architectures, and the task +structure is a wrapper containing a codelet and the piece of data on which the +codelet should operate. + +If the field starpu_task::synchronous is non-zero, task submission +will be synchronous: the function starpu_task_submit() will not return +until the task has been executed. Note that the function starpu_shutdown() +does not guarantee that asynchronous tasks have been executed before +it returns, starpu_task_wait_for_all() can be used to this effect, or +data can be unregistered (starpu_data_unregister()), which will +implicitly wait for all the tasks scheduled to work on it, unless +explicitly disabled thanks to +starpu_data_set_default_sequential_consistency_flag() or +starpu_data_set_sequential_consistency_flag(). + +\code{.c} +int main(int argc, char **argv) +{ + /* initialize StarPU */ + starpu_init(NULL); + + struct starpu_task *task = starpu_task_create(); + + task->cl = &cl; /* Pointer to the codelet defined above */ + + /* starpu_task_submit will be a blocking call. If unset, + starpu_task_wait() needs to be called after submitting the task. */ + task->synchronous = 1; + + /* submit the task to StarPU */ + starpu_task_submit(task); + + /* terminate StarPU */ + starpu_shutdown(); + + return 0; +} +\endcode + +\subsection ExecutionOfHelloWorld Execution Of Hello World + +\verbatim +$ make hello_world +cc $(pkg-config --cflags starpu-1.4) hello_world.c -o hello_world $(pkg-config --libs starpu-1.4) +$ ./hello_world +Hello world +\endverbatim + +\subsection PassingArgumentsToTheCodelet Passing Arguments To The Codelet + +The optional field starpu_task::cl_arg field is a pointer to a buffer +(of size starpu_task::cl_arg_size) with some parameters for the kernel +described by the codelet. For instance, if a codelet implements a +computational kernel that multiplies its input vector by a constant, +the constant could be specified by the means of this buffer, instead +of registering it as a StarPU data. It must however be noted that +StarPU avoids making copy whenever possible and rather passes the +pointer as such, so the buffer which is pointed to must be kept allocated +until the task terminates, and if several tasks are submitted with +various parameters, each of them must be given a pointer to their +own buffer. + +\code{.c} +struct params +{ + int i; + float f; +}; + +void cpu_func(void *buffers[], void *cl_arg) +{ + struct params *params = cl_arg; + + printf("Hello world (params = {%i, %f} )\n", params->i, params->f); +} +\endcode + +As said before, the field starpu_codelet::nbuffers specifies the +number of data buffers which are manipulated by the codelet. It does +not count the argument --- the parameter cl_arg of the function +cpu_func --- since it is not managed by our data management +library, but just contains trivial parameters. + +// TODO rewrite so that it is a little clearer ? + +Be aware that this may be a pointer to a +\em copy of the actual buffer, and not the pointer given by the programmer: +if the codelet modifies this buffer, there is no guarantee that the initial +buffer will be modified as well: this for instance implies that the buffer +cannot be used as a synchronization medium. If synchronization is needed, data +has to be registered to StarPU, see \ref VectorScalingUsingStarPUAPI. + +\code{.c} +int main(int argc, char **argv) +{ + /* initialize StarPU */ + starpu_init(NULL); + + struct starpu_task *task = starpu_task_create(); + + task->cl = &cl; /* Pointer to the codelet defined above */ + + struct params params = { 1, 2.0f }; + task->cl_arg = ¶ms; + task->cl_arg_size = sizeof(params); + + /* starpu_task_submit will be a blocking call */ + task->synchronous = 1; + + /* submit the task to StarPU */ + starpu_task_submit(task); + + /* terminate StarPU */ + starpu_shutdown(); + + return 0; +} +\endcode + +\verbatim +$ make hello_world +cc $(pkg-config --cflags starpu-1.4) hello_world.c -o hello_world $(pkg-config --libs starpu-1.4) +$ ./hello_world +Hello world (params = {1, 2.000000} ) +\endverbatim + +\subsection DefiningACallback Defining A Callback + +Once a task has been executed, an optional callback function +starpu_task::callback_func is called when defined. +While the computational kernel could be offloaded on various architectures, the +callback function is always executed on a CPU. The pointer +starpu_task::callback_arg is passed as an argument to the callback +function. The prototype of a callback function must be: + +\code{.c} +void callback_function(void *); +\endcode + +\code{.c} +void callback_func(void *callback_arg) +{ + printf("Callback function (arg %x)\n", callback_arg); +} + +int main(int argc, char **argv) +{ + /* initialize StarPU */ + starpu_init(NULL); + + struct starpu_task *task = starpu_task_create(); + + task->cl = &cl; /* Pointer to the codelet defined above */ + + task->callback_func = callback_func; + task->callback_arg = 0x42; + + /* starpu_task_submit will be a blocking call */ + task->synchronous = 1; + + /* submit the task to StarPU */ + starpu_task_submit(task); + + /* terminate StarPU */ + starpu_shutdown(); + + return 0; +} +\endcode + +\verbatim +$ make hello_world +cc $(pkg-config --cflags starpu-1.4) hello_world.c -o hello_world $(pkg-config --libs starpu-1.4) +$ ./hello_world +Hello world +Callback function (arg 42) +\endverbatim + +\subsection WhereToExecuteACodelet Where To Execute A Codelet + +\code{.c} +struct starpu_codelet cl = +{ + .where = STARPU_CPU, + .cpu_funcs = { cpu_func }, + .nbuffers = 0 +}; +\endcode + +We create a codelet which may only be executed on the CPUs. The +optional field starpu_codelet::where is a bitmask which defines where +the codelet may be executed. Here, the value ::STARPU_CPU means that +only CPUs can execute this codelet. When the optional field +starpu_codelet::where is unset, its value is automatically set based +on the availability of the different fields XXX_funcs. + +\section VectorScalingUsingStarPUAPI Vector Scaling + +The previous example has shown how to submit tasks. In this section, +we show how StarPU tasks can manipulate data. + +The full source code for +this example is given in \ref FullSourceCodeVectorScal. + +\subsection SourceCodeOfVectorScaling Source Code of Vector Scaling + +Programmers can describe the data layout of their application so that StarPU is +responsible for enforcing data coherency and availability across the machine. +Instead of handling complex (and non-portable) mechanisms to perform data +movements, programmers only declare which piece of data is accessed and/or +modified by a task, and StarPU makes sure that when a computational kernel +starts somewhere (e.g. on a GPU), its data are available locally. + +Before submitting those tasks, programmers first need to declare the +different pieces of data to StarPU using the functions +starpu_*_data_register. To ease the development of applications +for StarPU, it is possible to describe multiple types of data layout. +A type of data layout is called an interface. There are +different predefined interfaces available in StarPU, here we will +consider the vector interface. + +The following lines show how to declare an array of NX elements of type +float using the vector interface: + +\code{.c} +float vector[NX]; + +starpu_data_handle_t vector_handle; +starpu_vector_data_register(&vector_handle, STARPU_MAIN_RAM, (uintptr_t)vector, NX, sizeof(vector[0])); +\endcode + +The first argument, called the data handle, is an opaque pointer which +designates the array within StarPU. This is also the structure which is used to +describe which data is used by a task. The second argument is the node number +where the data originally resides. Here it is ::STARPU_MAIN_RAM since the array vector is in +the main memory. Then comes the pointer vector where the data can be found in main memory, +the number of elements in the vector and the size of each element. +The following shows how to construct a StarPU task that will manipulate the +vector and a constant factor. + +\code{.c} +float factor = 3.14; +struct starpu_task *task = starpu_task_create(); + +task->cl = &cl; /* Pointer to the codelet defined below */ +task->handles[0] = vector_handle; /* First parameter of the codelet */ +task->cl_arg = &factor; +task->cl_arg_size = sizeof(factor); +task->synchronous = 1; + +starpu_task_submit(task); +\endcode + +Since the factor is a mere constant float value parameter, +it does not need a preliminary registration, and +can just be passed through the pointer starpu_task::cl_arg like in the previous +example. The vector parameter is described by its handle. +starpu_task::handles should be set with the handles of the data, the +access modes for the data are defined in the field +starpu_codelet::modes (::STARPU_R for read-only, ::STARPU_W for +write-only and ::STARPU_RW for read and write access). + +The definition of the codelet can be written as follows: + +\code{.c} +void scal_cpu_func(void *buffers[], void *cl_arg) +{ + unsigned i; + float *factor = cl_arg; + + /* length of the vector */ + unsigned n = STARPU_VECTOR_GET_NX(buffers[0]); + /* CPU copy of the vector pointer */ + float *val = (float *)STARPU_VECTOR_GET_PTR(buffers[0]); + + for (i = 0; i < n; i++) + val[i] *= *factor; +} + +struct starpu_codelet cl = +{ + .cpu_funcs = { scal_cpu_func }, + .nbuffers = 1, + .modes = { STARPU_RW } +}; +\endcode + +The first argument is an array that gives +a description of all the buffers passed in the array starpu_task::handles. The +size of this array is given by the field starpu_codelet::nbuffers. For +the sake of genericity, this array contains pointers to the different +interfaces describing each buffer. In the case of the vector +interface, the location of the vector (resp. its length) is +accessible in the starpu_vector_interface::ptr (resp. +starpu_vector_interface::nx) of this interface. Since the vector is +accessed in a read-write fashion, any modification will automatically +affect future accesses to this vector made by other tasks. + +The second argument of the function scal_cpu_func contains a +pointer to the parameters of the codelet (given in +starpu_task::cl_arg), so that we read the constant factor from this +pointer. + +\subsection ExecutionOfVectorScaling Execution of Vector Scaling + +\verbatim +$ make vector_scal +cc $(pkg-config --cflags starpu-1.4) vector_scal.c -o vector_scal $(pkg-config --libs starpu-1.4) +$ ./vector_scal +0.000000 3.000000 6.000000 9.000000 12.000000 +\endverbatim + +\section VectorScalingOnAnHybridCPUGPUMachine Vector Scaling on an Hybrid CPU/GPU Machine + +Contrary to the previous examples, the task submitted in this example may not +only be executed by the CPUs, but also by a CUDA device. + +\subsection DefinitionOfTheCUDAKernel Definition of the CUDA Kernel + +The CUDA implementation can be written as follows. It needs to be compiled with +a CUDA compiler such as nvcc, the NVIDIA CUDA compiler driver. It must be noted +that the vector pointer returned by ::STARPU_VECTOR_GET_PTR is here a +pointer in GPU memory, so that it can be passed as such to the +kernel call vector_mult_cuda. + +\snippet basics_vector_scal_cuda.c To be included. You should update doxygen if you see this text. + +\subsection DefinitionOfTheOpenCLKernel Definition of the OpenCL Kernel + +The OpenCL implementation can be written as follows. StarPU provides +tools to compile a OpenCL kernel stored in a file. + +\code{.c} +__kernel void vector_mult_opencl(int nx, __global float* val, float factor) +{ + const int i = get_global_id(0); + if (i < nx) + { + val[i] *= factor; + } +} +\endcode + +Contrary to CUDA and CPU, ::STARPU_VECTOR_GET_DEV_HANDLE has to be used, +which returns a cl_mem (which is not a device pointer, but an OpenCL +handle), which can be passed as such to the OpenCL kernel. The difference is +important when using partitioning, see \ref PartitioningData. + +\snippet basics_vector_scal_opencl.c To be included. You should update doxygen if you see this text. + +\subsection DefinitionOfTheMainCode Definition of the Main Code + +The CPU implementation is the same as in the previous section. + +Here is the source of the main application. You can notice that the fields +starpu_codelet::cuda_funcs and starpu_codelet::opencl_funcs are set to +define the pointers to the CUDA and OpenCL implementations of the +task. + +\snippet basics_vector_scal_c.c To be included. You should update doxygen if you see this text. + +\subsection ExecutionOfHybridVectorScaling Execution of Hybrid Vector Scaling + +The Makefile given at the beginning of the section must be extended to +give the rules to compile the CUDA source code. Note that the source +file of the OpenCL kernel does not need to be compiled now, it will +be compiled at runtime when calling the function +starpu_opencl_load_opencl_from_file(). + +\verbatim +CFLAGS += $(shell pkg-config --cflags starpu-1.4) +LDLIBS += $(shell pkg-config --libs starpu-1.4) +CC = gcc + +vector_scal: vector_scal.o vector_scal_cpu.o vector_scal_cuda.o vector_scal_opencl.o + +%.o: %.cu + nvcc $(CFLAGS) $< -c $@ + +clean: + rm -f vector_scal *.o +\endverbatim + +\verbatim +$ make +\endverbatim + +and to execute it, with the default configuration: + +\verbatim +$ ./vector_scal +0.000000 3.000000 6.000000 9.000000 12.000000 +\endverbatim + +or for example, by disabling CPU devices: + +\verbatim +$ STARPU_NCPU=0 ./vector_scal +0.000000 3.000000 6.000000 9.000000 12.000000 +\endverbatim + +or by disabling CUDA devices (which may permit to enable the use of OpenCL, +see \ref EnablingOpenCL) : + +\verbatim +$ STARPU_NCUDA=0 ./vector_scal +0.000000 3.000000 6.000000 9.000000 12.000000 +\endverbatim + +*/ diff --git a/doc/doxygen/chapters/starpu_basics/basics_intro.doxy b/doc/doxygen/chapters/starpu_basics/basics_intro.doxy new file mode 100644 index 0000000..356acb8 --- /dev/null +++ b/doc/doxygen/chapters/starpu_basics/basics_intro.doxy @@ -0,0 +1,60 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/*! \intropage{IntroBasics, --------- StarPU Basics ---------} + +\webforeword + +This part presents the basic knowledge of StarPU. It should be read to understand how StarPU works and how to execute a basic StarPU application. + +
    +
  • +Chapter \ref StarPUApplications shows how to create and run your +own StarPU applications. +
  • +
  • +Chapter \ref BasicExamples shows how to implement simple programs that +submit tasks to StarPU. +
  • +
  • +Chapter \ref FullSourceCodeVectorScal gives the full source code for +a vector scaling application. +
  • +
+ +The next chapters cover the most important and core concepts in +StarPU: +
    +
  • +Chapter \ref TasksInStarPU explains the basic information on tasks +management. +
  • +
  • +Chapter \ref DataManagement shows how to manage the data layout of +your application data by using the different data interfaces provided +by StarPU. +
  • +
  • +Chapter \ref Scheduling explains the scheduling policies provided by +StarPU. +
  • +
+ +Some examples applications are provided from the StarPU sources for +you to try. Chapter \ref ExamplesInStarPUSources lists these +applications. + +*/ diff --git a/doc/doxygen/chapters/starpu_basics/code/basics_vector_scal_c.c b/doc/doxygen/chapters/starpu_basics/code/basics_vector_scal_c.c new file mode 100644 index 0000000..9050dfd --- /dev/null +++ b/doc/doxygen/chapters/starpu_basics/code/basics_vector_scal_c.c @@ -0,0 +1,126 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +//! [To be included. You should update doxygen if you see this text.] +/* + * This example demonstrates how to use StarPU to scale an array by a factor. + * It shows how to manipulate data with StarPU's data management library. + * 1- how to declare a piece of data to StarPU (starpu_vector_data_register) + * 2- how to describe which data are accessed by a task (task->handles[0]) + * 3- how a kernel can manipulate the data (buffers[0].vector.ptr) + */ +#include + +#define NX 2048 + +extern void scal_cpu_func(void *buffers[], void *_args); +extern void scal_sse_func(void *buffers[], void *_args); +extern void scal_cuda_func(void *buffers[], void *_args); +extern void scal_opencl_func(void *buffers[], void *_args); + +static struct starpu_codelet cl = +{ + .where = STARPU_CPU | STARPU_CUDA | STARPU_OPENCL, + /* CPU implementation of the codelet */ + .cpu_funcs = { scal_cpu_func, scal_sse_func }, + .cpu_funcs_name = { "scal_cpu_func", "scal_sse_func" }, +#ifdef STARPU_USE_CUDA + /* CUDA implementation of the codelet */ + .cuda_funcs = { scal_cuda_func }, +#endif +#ifdef STARPU_USE_OPENCL + /* OpenCL implementation of the codelet */ + .opencl_funcs = { scal_opencl_func }, +#endif + .nbuffers = 1, + .modes = { STARPU_RW } +}; + +#ifdef STARPU_USE_OPENCL +struct starpu_opencl_program programs; +#endif + +int main(int argc, char **argv) +{ + /* We consider a vector of float that is initialized just as any of C + * data */ + float vector[NX]; + unsigned i; + for (i = 0; i < NX; i++) + vector[i] = 1.0f; + + fprintf(stderr, "BEFORE: First element was %f\n", vector[0]); + + /* Initialize StarPU with default configuration */ + starpu_init(NULL); + +#ifdef STARPU_USE_OPENCL + starpu_opencl_load_opencl_from_file("examples/basic_examples/vector_scal_opencl_kernel.cl", &programs, NULL); +#endif + + /* Tell StaPU to associate the "vector" vector with the "vector_handle" + * identifier. When a task needs to access a piece of data, it should + * refer to the handle that is associated to it. + * In the case of the "vector" data interface: + * - the first argument of the registration method is a pointer to the + * handle that should describe the data + * - the second argument is the memory node where the data (ie. "vector") + * resides initially: STARPU_MAIN_RAM stands for an address in main memory, as + * opposed to an address on a GPU for instance. + * - the third argument is the address of the vector in RAM + * - the fourth argument is the number of elements in the vector + * - the fifth argument is the size of each element. + */ + starpu_data_handle_t vector_handle; + starpu_vector_data_register(&vector_handle, STARPU_MAIN_RAM, (uintptr_t)vector, NX, sizeof(vector[0])); + + float factor = 3.14; + + /* create a synchronous task: any call to starpu_task_submit will block + * until it is terminated */ + struct starpu_task *task = starpu_task_create(); + task->synchronous = 1; + + task->cl = &cl; + + /* the codelet manipulates one buffer in RW mode */ + task->handles[0] = vector_handle; + + /* an argument is passed to the codelet, beware that this is a + * READ-ONLY buffer and that the codelet may be given a pointer to a + * COPY of the argument */ + task->cl_arg = &factor; + task->cl_arg_size = sizeof(factor); + + /* execute the task on any eligible computational resource */ + starpu_task_submit(task); + + /* StarPU does not need to manipulate the array anymore so we can stop + * monitoring it */ + starpu_data_unregister(vector_handle); + +#ifdef STARPU_USE_OPENCL + starpu_opencl_unload_opencl(&programs); +#endif + + /* terminate StarPU, no task can be submitted after */ + starpu_shutdown(); + + fprintf(stderr, "AFTER First element is %f\n", vector[0]); + + return 0; +} +//! [To be included. You should update doxygen if you see this text.] diff --git a/doc/doxygen/chapters/starpu_basics/code/basics_vector_scal_cpu.c b/doc/doxygen/chapters/starpu_basics/code/basics_vector_scal_cpu.c new file mode 100644 index 0000000..a4cd267 --- /dev/null +++ b/doc/doxygen/chapters/starpu_basics/code/basics_vector_scal_cpu.c @@ -0,0 +1,78 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +//! [To be included. You should update doxygen if you see this text.] + +#include +#include + +/* This kernel takes a buffer and scales it by a constant factor */ +void scal_cpu_func(void *buffers[], void *cl_arg) +{ + unsigned i; + float *factor = cl_arg; + + /* + * The "buffers" array matches the task->handles array: for instance + * task->handles[0] is a handle that corresponds to a data with + * vector "interface", so that the first entry of the array in the + * codelet is a pointer to a structure describing such a vector (ie. + * struct starpu_vector_interface *). Here, we therefore manipulate + * the buffers[0] element as a vector: nx gives the number of elements + * in the array, ptr gives the location of the array (that was possibly + * migrated/replicated), and elemsize gives the size of each elements. + */ + struct starpu_vector_interface *vector = buffers[0]; + + /* length of the vector */ + unsigned n = STARPU_VECTOR_GET_NX(vector); + + /* get a pointer to the local copy of the vector: note that we have to + * cast it in (float *) since a vector could contain any type of + * elements so that the .ptr field is actually a uintptr_t */ + float *val = (float *)STARPU_VECTOR_GET_PTR(vector); + + /* scale the vector */ + for (i = 0; i < n; i++) + val[i] *= *factor; +} + +void scal_sse_func(void *buffers[], void *cl_arg) +{ + float *vector = (float *) STARPU_VECTOR_GET_PTR(buffers[0]); + unsigned int n = STARPU_VECTOR_GET_NX(buffers[0]); + unsigned int n_iterations = n/4; + + __m128 *VECTOR = (__m128*) vector; + __m128 FACTOR STARPU_ATTRIBUTE_ALIGNED(16); + float factor = *(float *) cl_arg; + FACTOR = _mm_set1_ps(factor); + + unsigned int i; + for (i = 0; i < n_iterations; i++) + VECTOR[i] = _mm_mul_ps(FACTOR, VECTOR[i]); + + unsigned int remainder = n%4; + if (remainder != 0) + { + unsigned int start = 4 * n_iterations; + for (i = start; i < start+remainder; ++i) + { + vector[i] = factor * vector[i]; + } + } +} +//! [To be included. You should update doxygen if you see this text.] diff --git a/doc/doxygen/chapters/starpu_basics/code/basics_vector_scal_cuda.c b/doc/doxygen/chapters/starpu_basics/code/basics_vector_scal_cuda.c new file mode 100644 index 0000000..6b2b482 --- /dev/null +++ b/doc/doxygen/chapters/starpu_basics/code/basics_vector_scal_cuda.c @@ -0,0 +1,43 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ +//! [To be included. You should update doxygen if you see this text.] +#include + +static __global__ void vector_mult_cuda(unsigned n, float *val, float factor) +{ + unsigned i = blockIdx.x*blockDim.x + threadIdx.x; + if (i < n) + val[i] *= factor; +} + +extern "C" void scal_cuda_func(void *buffers[], void *_args) +{ + float *factor = (float *)_args; + + /* length of the vector */ + unsigned n = STARPU_VECTOR_GET_NX(buffers[0]); + /* local copy of the vector pointer */ + float *val = (float *)STARPU_VECTOR_GET_PTR(buffers[0]); + unsigned threads_per_block = 64; + unsigned nblocks = (n + threads_per_block-1) / threads_per_block; + + vector_mult_cuda<<>>(n, val, *factor); + cudaError_t status = cudaGetLastError(); + if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status); + + cudaStreamSynchronize(starpu_cuda_get_local_stream()); +} +//! [To be included. You should update doxygen if you see this text.] diff --git a/doc/doxygen/chapters/starpu_basics/code/basics_vector_scal_opencl.c b/doc/doxygen/chapters/starpu_basics/code/basics_vector_scal_opencl.c new file mode 100644 index 0000000..d383ba8 --- /dev/null +++ b/doc/doxygen/chapters/starpu_basics/code/basics_vector_scal_opencl.c @@ -0,0 +1,74 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +//! [To be included. You should update doxygen if you see this text.] +#include + +extern struct starpu_opencl_program programs; + +void scal_opencl_func(void *buffers[], void *_args) +{ + float *factor = _args; + int id, devid, err; /* OpenCL specific code */ + cl_kernel kernel; /* OpenCL specific code */ + cl_command_queue queue; /* OpenCL specific code */ + cl_event event; /* OpenCL specific code */ + + /* length of the vector */ + unsigned n = STARPU_VECTOR_GET_NX(buffers[0]); + /* OpenCL copy of the vector pointer */ + cl_mem val = (cl_mem)STARPU_VECTOR_GET_DEV_HANDLE(buffers[0]); + + { /* OpenCL specific code */ + id = starpu_worker_get_id(); + devid = starpu_worker_get_devid(id); + + err = starpu_opencl_load_kernel(&kernel, &queue, &programs, + "vector_mult_opencl", /* Name of the codelet */ + devid); + if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); + + err = clSetKernelArg(kernel, 0, sizeof(n), &n); + err |= clSetKernelArg(kernel, 1, sizeof(val), &val); + err |= clSetKernelArg(kernel, 2, sizeof(*factor), factor); + if (err) STARPU_OPENCL_REPORT_ERROR(err); + } + + { /* OpenCL specific code */ + size_t global=n; + size_t local; + size_t s; + cl_device_id device; + + starpu_opencl_get_device(devid, &device); + err = clGetKernelWorkGroupInfo (kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, &s); + if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); + if (local > global) local=global; + else global = (global + local-1) / local * local; + + err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, &event); + if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); + } + + { /* OpenCL specific code */ + clFinish(queue); + starpu_opencl_collect_stats(event); + clReleaseEvent(event); + + starpu_opencl_release_kernel(kernel); + } +} +//! [To be included. You should update doxygen if you see this text.] diff --git a/doc/doxygen/chapters/starpu_basics/code/basics_vector_scal_opencl_codelet.cl b/doc/doxygen/chapters/starpu_basics/code/basics_vector_scal_opencl_codelet.cl new file mode 100644 index 0000000..9a0a745 --- /dev/null +++ b/doc/doxygen/chapters/starpu_basics/code/basics_vector_scal_opencl_codelet.cl @@ -0,0 +1,26 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +//! [To be included. You should update doxygen if you see this text.] +__kernel void vector_mult_opencl(int nx, __global float* val, float factor) +{ + const int i = get_global_id(0); + if (i < nx) + { + val[i] *= factor; + } +} +//! [To be included. You should update doxygen if you see this text.] diff --git a/doc/doxygen/chapters/starpu_basics/data_management.doxy b/doc/doxygen/chapters/starpu_basics/data_management.doxy new file mode 100644 index 0000000..fdbeb85 --- /dev/null +++ b/doc/doxygen/chapters/starpu_basics/data_management.doxy @@ -0,0 +1,753 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/*! \page DataManagement Data Management + +TODO: intro which mentions consistency among other things + +\section DataInterface Data Interface + +StarPU provides several data interfaces for programmers to describe +the data layout of their application. There are predefined interfaces +already available in StarPU. Users can define new data interfaces as +explained in \ref DefiningANewDataInterface. All functions provided by +StarPU are documented in \ref API_Data_Interfaces. You will find a +short list below. + +\subsection VariableDataInterface Variable Data Interface + +A variable is a given-size byte element, typically a scalar or a pointer to an application-specific structure. + +Here is an example of how to register a variable data to StarPU by using +starpu_variable_data_register(). A full code example for the variable data +interface is available in the file examples/basic_examples/variable.c. + +\code{.c} +float var = 42.0; +starpu_data_handle_t var_handle; +starpu_variable_data_register(&var_handle, STARPU_MAIN_RAM, (uintptr_t)&var, sizeof(var)); +\endcode + +Here is an example of how to register an application-specific data to StarPU, +the idea is to register the variable that contains the pointer to the +application-specific data. This will not provide support for GPUs and MPI, but +can be an easy start before defining your own data interface to describe the +application-specific structure (see \ref DefiningANewDataInterface). + +\code{.c} +struct mystructure *A = ...; +starpu_data_handle_t var_handle; +starpu_variable_data_register(&var_handle, STARPU_MAIN_RAM, (uintptr_t)&A, sizeof(A)); +\endcode + +\subsection VectorDataInterface Vector Data Interface + +A vector is a fixed number of elements of a given size. Here is an +example of how to register a vector data to StarPU by using +starpu_vector_data_register(). A full code example for the vector data interface is available in the file examples/filters/fvector.c. + +\code{.c} +float vector[NX]; +starpu_data_handle_t vector_handle; +starpu_vector_data_register(&vector_handle, STARPU_MAIN_RAM, (uintptr_t)vector, NX, sizeof(vector[0])); +\endcode + +Vectors can be partitioned into pieces by using +starpu_vector_filter_block(). They can also be partitioned with some overlapping +by using starpu_vector_filter_block_shadow(). An +example is in the file examples/filters/shadow.c. + +By default, StarPU uses the same size for each piece. If different +sizes are desired, starpu_vector_filter_list() or +starpu_vector_filter_list_long() can be used instead. + +To just divide in two pieces, starpu_vector_filter_divide_in_2() can +be used. + +In addition, contiguous variables can be picked from a vector by using +starpu_vector_filter_pick_variable() with starpu_data_filter::get_child_ops set to starpu_vector_filter_pick_variable_child_ops(). An example is in the file examples/filters/fvector_pick_variable.c. + +\subsection MatrixDataInterface Matrix Data Interface + +To register 2-D matrices with a potential padding, one can use the +matrix data interface. Here is an example of how to register a matrix +data to StarPU by using starpu_matrix_data_register(). + +A full code example for the matrix data interface is available in the file +examples/filters/fmatrix.c. + +\code{.c} +float *matrix; +starpu_data_handle_t matrix_handle; +matrix = (float*)malloc(width * height * sizeof(float)); +starpu_matrix_data_register(&matrix_handle, STARPU_MAIN_RAM, (uintptr_t)matrix, width, width, height, sizeof(float)); +\endcode + +2D matrices can be partitioned into 2D matrices along the x dimension by +using starpu_matrix_filter_block(), and along the y dimension by using +starpu_matrix_filter_vertical_block(). + +They can also be partitioned with some overlapping by using +starpu_matrix_filter_block_shadow() and +starpu_matrix_filter_vertical_block_shadow(). An +example is in the file examples/filters/shadow2d.c. + +In addition, contiguous vectors can be picked from a matrix along the +Y dimension by using starpu_matrix_filter_pick_vector_y() with +starpu_data_filter::get_child_ops set to +starpu_matrix_filter_pick_vector_child_ops(). An example +is in the file examples/filters/fmatrix_pick_vector.c. + +Variable can be also picked from a matrix by using +starpu_matrix_filter_pick_variable() with +starpu_data_filter::get_child_ops needs set to +starpu_matrix_filter_pick_variable_child_ops(). An +example is in the file +examples/filters/fmatrix_pick_variable.c. + +\subsection BlockDataInterface Block Data Interface + +To register 3-D matrices with potential paddings on Y and Z dimensions, +one can use the block data interface. Here is an example of how to +register a block data to StarPU by using starpu_block_data_register(). A full code example for the block data interface is available in the file examples/filters/fblock.c. + +\code{.c} +float *block; +starpu_data_handle_t block_handle; +block = (float*)malloc(nx*ny*nz*sizeof(float)); +starpu_block_data_register(&block_handle, STARPU_MAIN_RAM, (uintptr_t)block, nx, nx*ny, nx, ny, nz, sizeof(float)); +\endcode + +3D matrices can be partitioned along the x dimension by +using starpu_block_filter_block(), or along the y dimension +by using starpu_block_filter_vertical_block(), or along the +z dimension by using starpu_block_filter_depth_block(). They +can also be partitioned with some overlapping by using +starpu_block_filter_block_shadow(), starpu_block_filter_vertical_block_shadow(), +or starpu_block_filter_depth_block_shadow(). An +example is in the file examples/filters/shadow3d.c. + +In addition, contiguous matrices +can be picked from a block along the Z dimension or the Y dimension +by using starpu_block_filter_pick_matrix_z() or +starpu_block_filter_pick_matrix_y() with +starpu_data_filter::get_child_ops set to +starpu_block_filter_pick_matrix_child_ops(). An example +is in the file examples/filters/fblock_pick_matrix.c. + +Variable can be also picked from a block by using +starpu_block_filter_pick_variable() with +starpu_data_filter::get_child_ops set to +starpu_block_filter_pick_variable_child_ops(). An example +is in the file examples/filters/fblock_pick_variable.c. + +\subsection TensorDataInterface Tensor Data Interface + +To register 4-D matrices with potential paddings on Y, Z, and T dimensions, +one can use the tensor data interface. Here is an example of how to +register a tensor data to StarPU by using starpu_tensor_data_register(). A full code example for the tensor data interface is available in the file examples/filters/ftensor.c. + +\code{.c} +float *block; +starpu_data_handle_t block_handle; +block = (float*)malloc(nx*ny*nz*nt*sizeof(float)); +starpu_tensor_data_register(&block_handle, STARPU_MAIN_RAM, (uintptr_t)block, nx, nx*ny, nx*ny*nz, nx, ny, nz, nt, sizeof(float)); +\endcode + +4D matrices can be partitioned along the x dimension by +using starpu_tensor_filter_block(), or along the y dimension +by using starpu_tensor_filter_vertical_block(), or along the +z dimension by using starpu_tensor_filter_depth_block(), or +along the t dimension by using starpu_tensor_filter_time_block(). + +They can also be partitioned with some overlapping by using +starpu_tensor_filter_block_shadow(), +starpu_tensor_filter_vertical_block_shadow(), +starpu_tensor_filter_depth_block_shadow(), or +starpu_tensor_filter_time_block_shadow(). An example is in the file +examples/filters/shadow4d.c. + +In addition, contiguous blocks can be picked from a block along the T dimension, +Z dimension or the Y dimension by using starpu_tensor_filter_pick_block_t(), +starpu_tensor_filter_pick_block_z(), or +starpu_tensor_filter_pick_block_y(), and +starpu_data_filter::get_child_ops set to +starpu_tensor_filter_pick_block_child_ops(). An example +is in the file examples/filters/ftensor_pick_block.c. + +Variable can be also picked from a tensor by using +starpu_tensor_filter_pick_variable() with +starpu_data_filter::get_child_ops set to +starpu_tensor_filter_pick_variable_child_ops(). An +example is in the file +examples/filters/ftensor_pick_variable.c. + +\subsection NdimDataInterface Ndim Data Interface + +To register N-dim matrices, one can use the Ndim data interface. +Here is an example of how to register a 5-dim data to StarPU by using starpu_ndim_data_register(). A full code example for the ndim data interface is available in the file examples/filters/fndim.c. + +\code{.c} +float *arr5d; +starpu_data_handle_t arr5d_handle; +starpu_malloc((void **)&arr5d, NX*NY*NZ*NT*NG*sizeof(float)); + +unsigned nn[5] = {NX, NY, NZ, NT, NG}; +unsigned ldn[5] = {1, NX, NX*NY, NX*NY*NZ, NX*NY*NZ*NT}; + +starpu_ndim_data_register(&arr5d_handle, STARPU_MAIN_RAM, (uintptr_t)arr5d, ldn, nn, 5, sizeof(float)); +\endcode + +N-dim matrices can be partitioned along the given dimension +by using starpu_ndim_filter_block(). They can also be partitioned +with some overlapping by using starpu_ndim_filter_block_shadow(). An +example is in the file examples/filters/shadownd.c. + +Taking into account existing data interfaces, there are several +specialized functions which can partition a 0-dim array, 1-dim array, +2-dim array, 3-dim array or 4-dim array into +
    +
  • variables by using starpu_ndim_filter_to_variable() and starpu_data_filter::get_child_ops set to starpu_ndim_filter_to_variable_child_ops() (see file examples/filters/fndim_to_variable.c)
  • , +
  • vectors by using starpu_ndim_filter_to_vector() and starpu_data_filter::get_child_ops set to starpu_ndim_filter_to_vector_child_ops() (see file examples/filters/fndim_to_vector.c)
  • , +
  • matrices by using starpu_ndim_filter_to_matrix() and starpu_data_filter::get_child_ops set to starpu_ndim_filter_to_matrix_child_ops() (see file examples/filters/fndim_to_matrix.c)
  • , +
  • blocks by using starpu_ndim_filter_to_block() and starpu_data_filter::get_child_ops set to starpu_ndim_filter_to_block_child_ops() (see file examples/filters/fndim_to_block.c)
  • , +
  • or tensors by using starpu_ndim_filter_to_tensor() and starpu_data_filter::get_child_ops set to starpu_ndim_filter_to_tensor_child_ops() (see file examples/filters/fndim_to_tensor.c)
  • . +
+ +In addition, contiguous (n-1)dim arrays can be picked from a ndim array +along the given dimension by using starpu_ndim_filter_pick_ndim(). An +example is in the file examples/filters/fndim_pick_ndim.c. + +In specific cases which consider existing data interfaces, contiguous +variables, vectors, matrices, blocks, or tensors can be along the +given dimension picked from a +
    +
  • 1-dim array by using starpu_ndim_filter_1d_pick_variable() and starpu_data_filter::get_child_ops set to starpu_ndim_filter_pick_variable_child_ops() (see file examples/filters/fndim_1d_pick_variable.c), +
  • 2-dim array by using starpu_ndim_filter_2d_pick_vector() and starpu_data_filter::get_child_ops set to starpu_ndim_filter_pick_vector_child_ops() (see file examples/filters/fndim_2d_pick_vector.c), +
  • 3-dim array by using starpu_ndim_filter_3d_pick_matrix() and starpu_data_filter::get_child_ops set to starpu_ndim_filter_pick_matrix_child_ops() (see file examples/filters/fndim_3d_pick_matrix.c), +
  • 4-dim array by using starpu_ndim_filter_4d_pick_block() and starpu_data_filter::get_child_ops set to starpu_ndim_filter_pick_block_child_ops() (see file examples/filters/fndim_4d_pick_block.c), +
  • or 5-dim array by using starpu_ndim_filter_5d_pick_tensor() and starpu_data_filter::get_child_ops set to starpu_ndim_filter_pick_tensor_child_ops() (see file examples/filters/fndim_5d_pick_tensor.c). +
+ +Variable can be also picked from a ndim array by using +starpu_ndim_filter_pick_variable() with starpu_data_filter::get_child_ops set to starpu_ndim_filter_pick_variable_child_ops(). An example is in the file examples/filters/fndim_pick_variable.c. + +\subsection BCSRDataInterface BCSR Data Interface + +BCSR (Blocked Compressed Sparse Row Representation) sparse matrix data +can be registered to StarPU using the bcsr data interface. Here is an +example on how to do so by using starpu_bcsr_data_register(). + +\code{.c} +/* + * We use the following matrix: + * + * +----------------+ + * | 0 1 0 0 | + * | 2 3 0 0 | + * | 4 5 8 9 | + * | 6 7 10 11 | + * +----------------+ + * + * nzval = [0, 1, 2, 3] ++ [4, 5, 6, 7] ++ [8, 9, 10, 11] + * colind = [0, 0, 1] + * rowptr = [0, 1, 3] + * r = c = 2 + */ + +/* Size of the blocks */ +int R = 2; +int C = 2; + +int NROWS = 2; +int NNZ_BLOCKS = 3; /* out of 4 */ +int NZVAL_SIZE = (R*C*NNZ_BLOCKS); + +int nzval[NZVAL_SIZE] = +{ + 0, 1, 2, 3, /* First block */ + 4, 5, 6, 7, /* Second block */ + 8, 9, 10, 11 /* Third block */ +}; +uint32_t colind[NNZ_BLOCKS] = +{ + 0, /* block-column index for first block in nzval */ + 0, /* block-column index for second block in nzval */ + 1 /* block-column index for third block in nzval */ +}; +uint32_t rowptr[NROWS+1] = +{ + 0, / * block-index in nzval of the first block of the first row. */ + 1, / * block-index in nzval of the first block of the second row. */ + NNZ_BLOCKS /* number of blocks, to allow an easier element's access for the kernels */ +}; + +starpu_data_handle_t bcsr_handle; +starpu_bcsr_data_register(&bcsr_handle, + STARPU_MAIN_RAM, + NNZ_BLOCKS, + NROWS, + (uintptr_t) nzval, + colind, + rowptr, + 0, /* firstentry */ + R, + C, + sizeof(nzval[0])); +\endcode + +An example on how to deal with such matrices is in the file examples/spmv/dw_block_spmv.c. + +BCSR data handles can be partitioned into its dense matrix blocks by using +starpu_bcsr_filter_canonical_block(), or split into other BCSR data handles by +using starpu_bcsr_filter_vertical_block() (but only split along the leading dimension is +supported, i.e. along adjacent nnz blocks). starpu_data_filter::get_child_ops needs to be set to starpu_bcsr_filter_canonical_block_child_ops() and starpu_data_filter::get_nchildren set to starpu_bcsr_filter_canonical_block_get_nchildren(). An example is available in tests/datawizard/bcsr.c. + +\subsection CSRDataInterface CSR Data Interface + +TODO + +To register a Compressed Sparse Row Representation (CSR) sparse matrix, one can use the +CSR data interface. A full code example for the CSR data interface is available in the file mpi/tests/datatypes.c to show how to register a COO matrix data to StarPU by using starpu_csr_data_register(). + +CSR data handles can be partitioned into vertical CSR matrices by using +starpu_csr_filter_vertical_block(). An example is available in the file examples/spmv/spmv.c. + +\subsection COODataInterface COO Data Interface + +To register 2-D matrices given in the coordinate format (COO), one can use the +COO data interface. A full code example for the COO data interface is available in the file tests/datawizard/interfaces/coo/coo_interface.c to show how to register a COO matrix data to StarPU by using starpu_coo_data_register(). + +\section PartitioningData Partitioning Data + +An existing piece of data can be partitioned in sub parts to be used by different tasks, for instance: + +\code{.c} +#define NX 1048576 +#define PARTS 16 +int vector[NX]; +starpu_data_handle_t handle; + +/* Declare data to StarPU */ +starpu_vector_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)vector, NX, sizeof(vector[0])); + +/* Partition the vector in PARTS sub-vectors */ +struct starpu_data_filter f = +{ + .filter_func = starpu_vector_filter_block, + .nchildren = PARTS +}; +starpu_data_partition(handle, &f); +\endcode + +The handle of a sub-data block of a composite data block can be retrieved by calling starpu_data_get_child(). Or the task submission first retrieves the number of sub-data blocks in a composite data block by calling starpu_data_get_nb_children() and then uses the function starpu_data_get_sub_data() or starpu_data_vget_sub_data() to retrieve the sub-handles to be passed as tasks parameters. + +\code{.c} +/* Submit a task on each sub-vector */ +for (i=0; ihandles[0] = sub_handle; + task->cl = &cl; + task->synchronous = 1; + task->cl_arg = &factor; + task->cl_arg_size = sizeof(factor); + + starpu_task_submit(task); +} +\endcode + +Partitioning can be applied several times by using starpu_data_map_filters() or starpu_data_vmap_filters() or starpu_data_map_filters_parray() or starpu_data_map_filters_array(), see +examples/basic_examples/mult.c and examples/filters/. + +Wherever the whole piece of data is already available, the partitioning will +be done in-place, i.e. without allocating new buffers but just using pointers +inside the existing copy. This is particularly important to be aware of when +using OpenCL, where the kernel parameters are not pointers, but \c cl_mem handles. The +kernel thus needs to be also passed the offset within the OpenCL buffer: + +\code{.c} +void opencl_func(void *buffers[], void *cl_arg) +{ + cl_mem vector = (cl_mem) STARPU_VECTOR_GET_DEV_HANDLE(buffers[0]); + unsigned offset = STARPU_BLOCK_GET_OFFSET(buffers[0]); + + ... + clSetKernelArg(kernel, 0, sizeof(vector), &vector); + clSetKernelArg(kernel, 1, sizeof(offset), &offset); + ... +} +\endcode + +And the kernel has to shift from the pointer passed by the OpenCL driver: + +\code{.c} +__kernel void opencl_kernel(__global int *vector, unsigned offset) +{ + block = (__global void *)block + offset; + ... +} +\endcode + +When the sub-data is not of the same type as the original data, the field +starpu_data_filter::get_child_ops needs to be set appropriately for StarPU +to know which type should be used. + +starpu_data_unpartition() should be called in the end to collect back the sub-pieces of data into the original piece of data. + +StarPU provides various interfaces and filters for matrices, vectors, etc., +but applications can also write their own data interfaces and filters, see +examples/interface and examples/filters/custom_mf for an example, +and see \ref DefiningANewDataInterface and \ref DefiningANewDataFilter +for documentation. + +\section AsynchronousPartitioning Asynchronous Partitioning + +The partitioning functions described in the previous section are synchronous: +starpu_data_partition() and starpu_data_unpartition() both wait for all the tasks +currently working on the data. This can be a bottleneck for the application. + +An asynchronous API also exists, it works only on handles with sequential +consistency. The principle is to first plan the partitioning, which returns +data handles of the partition, which are not functional yet. When submitting +tasks, one can mix using the handles of the partition or the whole data. One +can even partition recursively and mix using handles at different levels of the +recursion. Of course, StarPU will have to introduce coherency synchronization. + +examples/filters/fmultiple_submit_implicit.c is a complete example using this technique. +One can also look at examples/filters/fmultiple_submit_readonly.c which contains the +explicit coherency synchronization which are automatically introduced by StarPU +for examples/filters/fmultiple_submit_implicit.c. + +In short, we first register a matrix and plan the partitioning: + +\code{.c} +starpu_matrix_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)matrix, NX, NX, NY, sizeof(matrix[0])); +struct starpu_data_filter f_vert = +{ + .filter_func = starpu_matrix_filter_block, + .nchildren = PARTS +}; +starpu_data_partition_plan(handle, &f_vert, vert_handle); +\endcode + +starpu_data_partition_plan() returns the handles for the partition in vert_handle. + +One can then submit tasks working on the main \c handle, and tasks working on the sub handles +vert_handle. Between using the main handle and the handles vert_handle, StarPU will automatically call starpu_data_partition_submit() and +starpu_data_unpartition_submit(). Or call starpu_data_partition_submit_sequential_consistency() and starpu_data_unpartition_submit_sequential_consistency() to specify the coherency to be used for the main handle, or call starpu_data_unpartition_submit_sequential_consistency_cb() to specify a callback function for the unpartitiong task. One can also call starpu_data_partition_readonly_submit() and starpu_data_unpartition_readonly_submit() which do not guarantee coherency if the application attempts to write to the main handle or any of its sub-handles while a task is still running. However, in read-only case we can also call starpu_data_partition_readonly_submit_sequential_consistency() to specify the coherency to be used for the main handle, or call starpu_data_partition_readwrite_upgrade_submit() to upgrade the partitioning of a data handle from read-only to read-write mode for a specific sub-handle. If users want to specify that the data won't be touched in write mode anymore and use multiple partition of the data at the same time, they can call starpu_data_partition_readonly_downgrade_submit(). + +After the task has completed using the data partition, starpu_data_partition_clean() or starpu_data_partition_clean_node() is used to clean up a data partition on the local node or on a specific node. + +All this code is asynchronous, just submitting which tasks, partitioning and +unpartitioning will be done at runtime. + +Planning several partitioning of the same data is also possible, StarPU will +unpartition and repartition as needed when mixing accesses of different +partitions. If data access is done in read-only mode, StarPU will allow the +different partitioning to coexist. As soon as a data is accessed in read-write +mode, StarPU will automatically unpartition everything and activate only the +partitioning leading to the data being written to. + +For instance, for a stencil application, one can split a subdomain into +its interior and halos, and then just submit a task updating the whole +subdomain, then submit MPI sends/receives to update the halos, then submit +again a task updating the whole subdomain, etc. and StarPU will automatically +partition/unpartition each time. + +\section DataCommute Commute Data Access + +By default, the implicit dependencies computed from data access use the +sequential semantic. Notably, write accesses are always serialized in the order +of submission. In some applicative cases, the write contributions can actually +be performed in any order without affecting the eventual result. In this case, +it is useful to drop the strictly sequential semantic, to improve parallelism +by allowing StarPU to reorder the write accesses. This can be done by using +the data access flag ::STARPU_COMMUTE. Accesses without this flag will however +properly be serialized against accesses with this flag. For instance: + +\code{.c} + starpu_task_insert(&cl1, STARPU_R, h, STARPU_RW, handle, 0); + starpu_task_insert(&cl2, STARPU_R, handle1, STARPU_RW|STARPU_COMMUTE, handle, 0); + starpu_task_insert(&cl2, STARPU_R, handle2, STARPU_RW|STARPU_COMMUTE, handle, 0); + starpu_task_insert(&cl3, STARPU_R, g, STARPU_RW, handle, 0); +\endcode + +The two tasks running cl2 will be able to commute: depending on whether the +value of handle1 or handle2 becomes available first, the corresponding task +running cl2 will start first. The task running cl1 will however always be run +before them, and the task running cl3 will always be run after them. + +tests/datawizard/commute2.c is a complete example using the data access flag. + +If a lot of tasks use the commute access on the same set of data and a lot of +them are ready at the same time, it may become interesting to use an arbiter, +see \ref ConcurrentDataAccess. + +\section DataReduction Data Reduction + +In various cases, some piece of data is used to accumulate intermediate +results. For instances, the dot product of a vector, maximum/minimum finding, +the histogram of a picture, etc. When these results are produced along the +whole machine, it would not be efficient to accumulate them in only one place, +incurring data transmission each and access concurrency. + +StarPU provides a mode ::STARPU_REDUX, which permits to optimize +this case: it will allocate one buffer on each worker (lazily), and let tasks accumulate +intermediate results there. When the data is eventually accessed in the normal +mode ::STARPU_R, StarPU will collect the intermediate results in just one +buffer. + +The function starpu_data_set_reduction_methods() must be called to specify how to initialize these +buffers, and how to assemble partial results. The function starpu_data_set_reduction_methods_with_args() can also be used to +pass arguments to the reduction and init tasks. + +For instance, examples/cg/cg.c uses that to optimize its dot product: it first defines +the codelets for initialization and reduction: + +\code{.c} +struct starpu_codelet bzero_variable_cl = +{ + .cpu_funcs = { bzero_variable_cpu }, + .cpu_funcs_name = { "bzero_variable_cpu" }, + .cuda_funcs = { bzero_variable_cuda }, + .nbuffers = 1, +} + +static void accumulate_variable_cpu(void *descr[], void *cl_arg) +{ + double *v_dst = (double *)STARPU_VARIABLE_GET_PTR(descr[0]); + double *v_src = (double *)STARPU_VARIABLE_GET_PTR(descr[1]); + *v_dst = *v_dst + *v_src; +} + +static void accumulate_variable_cuda(void *descr[], void *cl_arg) +{ + double *v_dst = (double *)STARPU_VARIABLE_GET_PTR(descr[0]); + double *v_src = (double *)STARPU_VARIABLE_GET_PTR(descr[1]); + cublasaxpy(1, (double)1.0, v_src, 1, v_dst, 1); + cudaStreamSynchronize(starpu_cuda_get_local_stream()); +} + +struct starpu_codelet accumulate_variable_cl = +{ + .cpu_funcs = { accumulate_variable_cpu }, + .cpu_funcs_name = { "accumulate_variable_cpu" }, + .cuda_funcs = { accumulate_variable_cuda }, + .nbuffers = 2, + .modes = {STARPU_RW|STARPU_COMMUTE, STARPU_R}, +} +\endcode + +and attaches them as reduction methods for its handle dtq: + +\code{.c} +starpu_variable_data_register(&dtq_handle, -1, NULL, sizeof(type)); +starpu_data_set_reduction_methods(dtq_handle, &accumulate_variable_cl, &bzero_variable_cl); +\endcode + +and dtq_handle can now be used with the mode ::STARPU_REDUX for the +dot products with partitioned vectors: + +\code{.c} +for (b = 0; b < nblocks; b++) + starpu_task_insert(&dot_kernel_cl, + STARPU_REDUX, dtq_handle, + STARPU_R, starpu_data_get_sub_data(v1, 1, b), + STARPU_R, starpu_data_get_sub_data(v2, 1, b), + 0); +\endcode + +Each dot_kernel_cl task will essentially compute dtq += v1.v2, +i.e. accumulate into the StarPU-provided buffer the partial dot product result. + +During registration, we have here provided NULL, i.e. there is +no initial value to be taken into account during reduction. StarPU +will thus only take into account the contributions from the tasks +dot_kernel_cl. For a given worker, the first of these tasks that runs on +that worker will be given a buffer initialized with the initialization codelet. +The others running on that worker will be given the same buffer, each task +accumulating its contribution on top of the previous ones. +Also, StarPU will not allocate any memory for +dtq_handle before the tasks dot_kernel_cl are ready to run. + +If another dot product series has to be performed, one could unregister +dtq_handle, and re-register it. But one can also call +starpu_data_deinitialize_submit() or even starpu_data_invalidate_submit() with +the parameter dtq_handle, +which will clear all data from the handle, thus resetting it back to +the initial status register(NULL). + +The example examples/cg/cg.c also uses reduction for the blocked gemv kernel, +leading to yet more relaxed dependencies and more parallelism. + +::STARPU_REDUX can also be passed to starpu_mpi_task_insert() in the MPI +case. This will however not produce any MPI communication, but just pass +::STARPU_REDUX to the underlying starpu_task_insert(). starpu_mpi_redux_data() +posts tasks which will reduce the partial results among MPI nodes into the MPI +node which owns the data. The function can be called by users to benefit from +fine-tuning such as priority setting. If users do not call this function, +StarPU wraps up reduction patterns automatically. The following example +shows a hypothetical application which collects partial results +into data res, then uses it for other computation, before looping again +with a new reduction where the wrap-up of the reduction pattern is explicit: + +\code{.c} +for (i = 0; i < 100; i++) +{ + starpu_mpi_task_insert(MPI_COMM_WORLD, &init_res, STARPU_W, res, 0); + starpu_mpi_task_insert(MPI_COMM_WORLD, &work, STARPU_RW, A, STARPU_R, B, STARPU_REDUX, res, 0); + starpu_mpi_redux_data(MPI_COMM_WORLD, res); + starpu_mpi_task_insert(MPI_COMM_WORLD, &work2, STARPU_RW, B, STARPU_R, res, 0); +} +\endcode + +starpu_mpi_redux_data() is called automatically in various cases, including +when a task reading the reduced handle is inserted through starpu_mpi_task_insert(). +The previous example could avoid calling starpu_mpi_redux_data(). Default priority (0) +is used. The reduction tree arity is decided based on the size of the data to reduce: a +flat tree is used with a small data (default to less than 1024 bytes), a binary tree +otherwise. If the environment variable \ref STARPU_MPI_REDUX_ARITY_THRESHOLD is set, the +threshold between the size of a small data and a bigger data is modified. If the value is +set to be negative, flat trees will always be used. If the value is set to 0, binary +trees are used. Otherwise, the size of the data is compared to the size in the environment +variable. Remaining distributed-memory reduction patterns are wrapped-up at the end of an +application when calling starpu_mpi_wait_for_all(). + +More details about MPI reduction are show in Section \ref MPIMpiRedux, +and some examples for MPI data reduction are available in mpi/examples/mpi_redux/. + +\section ConcurrentDataAccess Concurrent Data Accesses + +When several tasks are ready and will work on several data, StarPU is faced with +the classical Dining Philosopher's problem, and has to determine the order in +which it will run the tasks. + +Data accesses usually use sequential ordering, so data accesses are usually +already serialized, and thus by default, StarPU uses the Dijkstra solution which +scales very well in terms of overhead: tasks will just acquire data one by one +by data handle pointer value order. + +When sequential ordering is disabled or the flag ::STARPU_COMMUTE is used, there +may be a lot of concurrent accesses to the same data, and the Dijkstra solution +gets only poor parallelism, typically in some pathological cases which do happen +in various applications, for instance + +\code{.c} +for (i = 0; i < N; i++) + for (j = 0; j < N; j++) + task[i][j] = starpu_task_build(&cl, STARPU_RW|STARPU_COMMUTE, A[i], STARPU_RW|STARPU_COMMUTE, B[j], 0); +\endcode + +It creates a series of tasks that are completely parallel in terms of +tasks dependencies thanks to commutation, but StarPU still has to prevent two +tasks from operating on the same data. The Dijkstra solution here leads to a +worst-case: the \c task[0][j] tasks will wait for each other since +they all access the same \c A[0]. And \c task[1][0] will wait for +\c task[0][0] because they both access the same \c B[0], \c task[1][1] will wait +for \c task[0][1] because of \c B[1], etc. In the end, no parallism is achieved: + +\image html arbiter.png +\image latex arbiter.png "" width=0.7\textwidth + +In this case, one can use a data access arbiter ::starpu_arbiter_t, which +implements the classical centralized solution for the Dining Philosophers +problem. One can call starpu_arbiter_create() to create a data access arbiter, and starpu_data_assign_arbiter() to make access to handle managed by arbiter. Once the application no longer needs the arbiter, one can call starpu_arbiter_destroy() to destroy the arbiter after all data assigned to the arbiter have been unregistered. This is more expensive in terms of overhead since it is centralized, +but it opportunistically gets a lot of parallelism. The centralization can also +be avoided by using several arbiters, thus separating sets of data for which +arbitration will be done. If a task accesses data from different arbiters, it +will acquire them arbiter by arbiter, in arbiter pointer value order. + +See the tests/datawizard/test_arbiter.cpp example. + +Arbiters however do not support the flag ::STARPU_REDUX yet. + +\section TemporaryBuffers Temporary Buffers + +There are two kinds of temporary buffers: temporary data which just pass results +from a task to another, and scratch data which are needed only internally by +tasks. + +\subsection TemporaryData Temporary Data + +Data can be produced by a task, and consumed by another task, without +being used by other parts of the application. In such case, +registration can be done without prior allocation, by using +the special memory node number -1, and passing a NULL pointer. StarPU will +actually allocate memory only when the task creating the content gets scheduled, +and destroy it on unregistration. + +As the application will not use the data, it can be tedious for the +application to have to unregister it. The unregistration can be +done lazily by using the function starpu_data_unregister_submit(), +which will record that no other tasks accessing the handle will be submitted, so +that it can be freed as soon as the last task accessing it is completed. + +The following code examplifies both points: it registers the temporary +data, submits three tasks accessing it, and records the data for automatic +unregistration. + +\code{.c} +starpu_vector_data_register(&handle, -1, NULL, n, sizeof(float)); +starpu_task_insert(&produce_data, STARPU_W, handle, 0); +starpu_task_insert(&compute_data, STARPU_RW, handle, 0); +starpu_task_insert(&summarize_data, STARPU_R, handle, STARPU_W, result_handle, 0); +starpu_data_unregister_submit(handle); +\endcode + +The application may also want for the temporary data to be initialized +on the fly before being used by the task. This can be done by using +starpu_data_set_reduction_methods() to set an initialization codelet (no redux +codelet is needed). + +\subsection ScratchData Scratch Data + +Some kernels sometimes need temporary data to complete the computations, like a +workspace. The application could allocate it at the start of the codelet +function, and free it at the end, but this would be costly. It could also +allocate one buffer per worker (similarly to \ref HowToInitializeAComputationLibraryOnceForEachWorker), +but this would +make them systematic and permanent. A more optimized way is to use +the data access mode ::STARPU_SCRATCH, as examplified below, which +provides per-worker buffers without content consistency. The buffer is +registered only once, using memory node -1, i.e. the application didn't allocate +memory for it, and StarPU will allocate it on demand at task execution. + +\code{.c} +starpu_variable_data_register(&workspace, -1, NULL, sizeof(float)); +for (i = 0; i < N; i++) + starpu_task_insert(&compute, STARPU_R, input[i], STARPU_SCRATCH, workspace, STARPU_W, output[i], 0); +\endcode + +StarPU will make sure that the buffer is allocated before executing the task, +and make this allocation per-worker: for CPU workers, notably, each worker has +its own buffer. This means that each task submitted above will actually have its +own workspace, which will actually be the same for all tasks running one after +the other on the same worker. Also, if for instance memory becomes scarce, +StarPU will notice that it can free such buffers easily, since the content does +not matter. + +The example examples/pi uses scratches for some temporary buffer. + +It may be useful to additionally use the ::STARPU_NOFOOTPRINT flag, when this +buffer may have various size depending e.g. on specific CUDA versions or +devices, to make it simpler to use performance models for simulated +execution. See for instance examples/cholesky/cholesky_kernels.c + +*/ diff --git a/doc/doxygen/chapters/starpu_basics/examples_sources.doxy b/doc/doxygen/chapters/starpu_basics/examples_sources.doxy new file mode 100644 index 0000000..73715a4 --- /dev/null +++ b/doc/doxygen/chapters/starpu_basics/examples_sources.doxy @@ -0,0 +1,83 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/*! \page ExamplesInStarPUSources Examples in StarPU Sources + +We have already seen some examples in Chapter \ref BasicExamples. +A tutorial is also installed in the directory share/doc/starpu/tutorial/. + +Many examples are also available in the StarPU sources in the directory +examples/. Simple examples include: + +
+
incrementer/
+
Trivial incrementation test.
+ +
basic_examples/
+
+ Simple documented Hello world and vector/scalar product (as + shown in \ref BasicExamples), matrix + product examples (as shown in \ref PerformanceModelExample), an example using the blocked matrix data + interface, an example using the variable data interface, and an example + using different formats on CPUs and GPUs. +
+ +
matvecmult/
+
+ OpenCL example from NVidia, adapted to StarPU. +
+ +
axpy/
+
+ AXPY CUBLAS operation adapted to StarPU. +
+ +
native_fortran/
+
+ Example of using StarPU's native Fortran support. +
+ +
fortran90/
+
+ Example of Fortran 90 bindings, using C marshalling wrappers. +
+ +
fortran/
+
+ Example of Fortran 77 bindings, using C marshalling wrappers. +
+
+ +More advanced examples include: + +
+
filters/
+
+ Examples using filters, as shown in \ref PartitioningData. +
+ +
lu/
+
+ LU matrix factorization, see for instance xlu_implicit.c +
+ +
cholesky/
+
+ Cholesky matrix factorization, see for instance cholesky_implicit.c. +
+
+ +*/ diff --git a/doc/doxygen/chapters/starpu_basics/scaling_vector_example.doxy b/doc/doxygen/chapters/starpu_basics/scaling_vector_example.doxy new file mode 100644 index 0000000..26d00d9 --- /dev/null +++ b/doc/doxygen/chapters/starpu_basics/scaling_vector_example.doxy @@ -0,0 +1,42 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/*! \page FullSourceCodeVectorScal Full source code for the ’Scaling a Vector’ example + +\section MainApplication Main Application + +\snippet basics_vector_scal_c.c To be included. You should update doxygen if you see this text. + +\section CPUKernel CPU Kernel + +\snippet basics_vector_scal_cpu.c To be included. You should update doxygen if you see this text. + +\section CUDAKernel CUDA Kernel + +\snippet basics_vector_scal_cuda.c To be included. You should update doxygen if you see this text. + +\section OpenCLKernel OpenCL Kernel + +\subsection InvokingtheKernel Invoking the Kernel + +\snippet basics_vector_scal_opencl.c To be included. You should update doxygen if you see this text. + +\subsection SourceoftheKernel Source of the Kernel + +\snippet basics_vector_scal_opencl_codelet.cl To be included. You should update doxygen if you see this text. + +*/ + diff --git a/doc/doxygen/chapters/starpu_basics/scheduling.doxy b/doc/doxygen/chapters/starpu_basics/scheduling.doxy new file mode 100644 index 0000000..f8a9a94 --- /dev/null +++ b/doc/doxygen/chapters/starpu_basics/scheduling.doxy @@ -0,0 +1,204 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/*! \page Scheduling Scheduling + +\section TaskSchedulingPolicy Task Scheduling Policies + +The basics of the scheduling policy are the following: + +
    +
  • The scheduler gets to schedule tasks (push operation) when they become +ready to be executed, i.e. they are not waiting for some tags, data dependencies +or task dependencies.
  • +
  • Workers pull tasks (pop operation) one by one from the scheduler. +
+ +This means scheduling policies usually contain at least one queue of tasks to +store them between the time when they become available, and the time when a +worker gets to grab them. + +By default, StarPU uses the work-stealing scheduler \b lws. This is +because it provides correct load balance and locality even if the application codelets do +not have performance models. Other non-modelling scheduling policies can be +selected among the list below, thanks to the environment variable \ref +STARPU_SCHED. For instance, export STARPU_SCHED=dmda . Use help to +get the list of available schedulers. + +The function starpu_sched_get_predefined_policies() returns a NULL-terminated array of all predefined scheduling policies that are available in StarPU. +Functions starpu_sched_get_sched_policy_in_ctx() and starpu_sched_get_sched_policy() return the scheduling policy of a task within a specific context or a default context, respectively. + +\subsection NonPerformanceModelingPolicies Non Performance Modelling Policies + +- The eager scheduler uses a central task queue, from which all workers draw tasks +to work on concurrently. This however does not permit to prefetch data since the scheduling +decision is taken late. If a task has a non-0 priority, it is put at the front of the queue. + +- The random scheduler uses a queue per worker, and distributes tasks randomly according to assumed worker +overall performance. + +- The ws (work stealing) scheduler uses a queue per worker, and schedules +a task on the worker which released it by +default. When a worker becomes idle, it steals a task from the most loaded +worker. + +- The lws (locality work stealing) scheduler uses a queue per worker, and schedules +a task on the worker which released it by +default. When a worker becomes idle, it steals a task from neighbor workers. It +also takes priorities into account. + +- The prio scheduler also uses a central task queue, but sorts tasks by +priority specified by the application. + +- The heteroprio scheduler uses different priorities for the different processing units. +This scheduler must be configured to work correctly and to expect high-performance +as described in the corresponding section. + +\subsection DMTaskSchedulingPolicy Performance Model-Based Task Scheduling Policies + +If (and only if) your codelets have performance models (\ref +PerformanceModelExample), you should change the scheduler thanks to the +environment variable \ref STARPU_SCHED, to select one of the policies below, in +order to take advantage of StarPU's performance modelling. For instance, +export STARPU_SCHED=dmdas . Use help to get the list of available +schedulers. + +Note: Depending on the performance model type chosen, some preliminary +calibration runs may be needed for the model to converge. If the calibration +has not been done, or is insufficient yet, or if no performance model is +specified for a codelet, every task built from this codelet will be scheduled +using an eager fallback policy. + +Troubleshooting: Configuring and recompiling StarPU using the \c configure option +\ref enable-verbose "--enable-verbose" displays some statistics at the end of +execution about the percentage of tasks which have been scheduled by a DM* +family policy using performance model hints. A low or zero percentage may be +the sign that performance models are not converging or that codelets do not +have performance models enabled. + +- The dm (deque model) scheduler takes task execution performance models into account to +perform a HEFT-similar scheduling strategy: it schedules tasks where their +termination time will be minimal. The difference with HEFT is that dm +schedules tasks as soon as they become available, and thus in the order they +become available, without taking priorities into account. + +- The dmda (deque model data aware) scheduler is similar to \b dm, but it also takes data transfer time +into account. + +- The dmdap (deque model data aware prio) scheduler is similar to \b dmda, +except that it sorts tasks by priority order, which allows becoming even closer +to HEFT by respecting priorities after having made the scheduling decision (but +it still schedules tasks in the order they become available). + +- The dmdar (deque model data aware ready) scheduler is similar to \b dmda, +but it also privileges tasks whose data buffers are already available +on the target device. + +- The dmdas combines \b dmdap and \b dmdar: it sorts tasks by priority order, +but for a given priority it will privilege tasks whose data buffers are already +available on the target device. + +- The dmdasd (deque model data aware sorted decision) scheduler is similar +to dmdas, except that when scheduling a task, it takes into account its priority +when computing the minimum completion time, since this task may get executed +before others, and thus the latter should be ignored. + +- The heft (heterogeneous earliest finish time) scheduler is a deprecated +alias for dmda. + +- The pheft (parallel HEFT) scheduler is similar to \b dmda, it also supports +parallel tasks (still experimental). It should not be used when several contexts using +it are being executed simultaneously. + +- The peager (parallel eager) scheduler is similar to eager, it also +supports parallel tasks (still experimental). It should not be used when several +contexts using it are being executed simultaneously. + +- The darts scheduler aims at privileging data reuse, by focusing on +scheduling tasks whose data is already available, and on scheduling data +transfers which will allow tasks to be run the more. See +
+the DARTS research paper for details. + +Overall, the recommended scheduler would be the dmdas scheduler, because +it both takes into account tasks priorities, and it privileges tasks whose +data buffers are already on the target devices, thus being able to cope with +out-of-core situations. If the data set is really much larger than the target +device memory, it might be useful to try dmdar to ignore tasks priorities +and focus on data reuse. It would also be useful to try the darts +scheduler which specifically aims at prioritizing data reuse. + +\subsection ExistingModularizedSchedulers Modularized Schedulers + +StarPU provides a powerful way to implement schedulers, as documented in \ref +DefiningANewModularSchedulingPolicy. It is currently shipped with the following +pre-defined Modularized Schedulers : + +- modular-eager , modular-eager-prefetching are eager-based Schedulers (without and with prefetching), they are +naive schedulers, which try to map a task on the first available resource +they find. The prefetching variant queues several tasks in advance to be able to +do data prefetching. This may however degrade load balancing a bit. + +- modular-prio, modular-prio-prefetching, modular-eager-prio are prio-based Schedulers (without / with prefetching):, +similar to Eager-Based Schedulers. They can handle tasks which have a defined +priority and schedule them accordingly. +The modular-eager-prio variant integrates the eager and priority queue in a +single component. This allows it to do a better job at pushing tasks. + +- modular-random, modular-random-prio, modular-random-prefetching, modular-random-prio-prefetching are random-based Schedulers (without/with prefetching) : +Select randomly a resource to be mapped on for each task. + +- modular-ws) implements Work Stealing: +Maps tasks to workers in round-robin, but allows workers to steal work from other workers. + +- modular-heft, modular-heft2, and modular-heft-prio are +HEFT Schedulers : \n +Maps tasks to workers using a heuristic very close to +Heterogeneous Earliest Finish Time. +It needs that every task submitted to StarPU have a +defined performance model (\ref PerformanceModelCalibration) +to work efficiently, but can handle tasks without a performance +model. modular-heft just takes tasks by order. modular-heft2 takes +at most 5 tasks of the same priority and checks which one fits best. +modular-heft-prio is similar to modular-heft, but only decides the memory +node, not the exact worker, just pushing tasks to one central queue per memory +node. By default, they sort tasks by priorities and privilege, running first +a task which has most of its data already available on the target. These can +however be changed with \ref STARPU_SCHED_SORTED_ABOVE, \ref +STARPU_SCHED_SORTED_BELOW, and \ref STARPU_SCHED_READY . + +- modular-heteroprio is a Heteroprio Scheduler: \n +Maps tasks to worker similarly to HEFT, but first attribute accelerated tasks to +GPUs, then not-so-accelerated tasks to CPUs. + +\section TaskDistributionVsDataTransfer Task Distribution Vs Data Transfer + +Distributing tasks to balance the load induces data transfer penalty. StarPU +thus needs to find a balance between both. The target function that the +scheduler \b dmda of StarPU +tries to minimize is alpha * T_execution + beta * T_data_transfer, where +T_execution is the estimated execution time of the codelet (usually +accurate), and T_data_transfer is the estimated data transfer time. The +latter is estimated based on bus calibration before execution start, +i.e. with an idle machine, thus without contention. You can force bus +re-calibration by running the tool starpu_calibrate_bus. The +beta parameter defaults to 1, but it can be worth trying to tweak it +by using export STARPU_SCHED_BETA=2 (\ref STARPU_SCHED_BETA) for instance, since during +real application execution, contention makes transfer times bigger. +This is of course imprecise, but in practice, a rough estimation +already gives the good results that a precise estimation would give. + +*/ diff --git a/doc/doxygen/chapters/starpu_basics/starpu_applications.doxy b/doc/doxygen/chapters/starpu_basics/starpu_applications.doxy new file mode 100644 index 0000000..d9601c1 --- /dev/null +++ b/doc/doxygen/chapters/starpu_basics/starpu_applications.doxy @@ -0,0 +1,456 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/*! \page StarPUApplications StarPU Applications, setting up Your Own Code + +\section SettingFlagsForCompilingLinkingAndRunningApplications Setting Flags for Compiling, Linking and Running Applications + +StarPU provides a pkg-config executable to facilitate the +retrieval of necessary compiler and linker flags. This is useful when +compiling and linking an application with StarPU, as certain flags or +libraries (such as \c CUDA or \c libspe2) may be required. + +If StarPU is not installed in a standard location, the path of StarPU's +library must be specified in the environment variable +PKG_CONFIG_PATH to allow pkg-config to find it. For +example, if StarPU is installed in +$STARPU_PATH, you can set the variable \c PKG_CONFIG_PATH like +this: + +\verbatim +$ export PKG_CONFIG_PATH=$PKG_CONFIG_PATH:$STARPU_PATH/lib/pkgconfig +\endverbatim + +The flags required to compile or link against StarPU are then +accessible with the following commands: + +\verbatim +$ pkg-config --cflags starpu-1.4 # options for the compiler +$ pkg-config --libs starpu-1.4 # options for the linker +\endverbatim + +Please note that it is still possible to use the API provided in +StarPU version 1.0 by calling pkg-config with the +starpu-1.0 package. +Similar packages are provided for starpumpi-1.0 and +starpufft-1.0. +For the API provided in StarPU version 0.9, you can use +pkg-config with the libstarpu package. +Similar packages are provided for libstarpumpi and libstarpufft. + +Make sure that pkg-config --libs starpu-1.4 produces valid output +before going further. To achieve this, make sure that your \c +PKG_CONFIG_PATH is correctly set to the location where \c +starpu-1.4.pc was installed during the make install process. + +Furthermore, if you intend to link your application statically, +remember to include the --static option during the linking +process. + +Additionally, for runtime execution, it is necessary to set the +\c LD_LIBRARY_PATH environment variable. This ensures that dynamic +libraries are located and loaded correctly during runtime. + +\verbatim +$ export LD_LIBRARY_PATH=$STARPU_PATH/lib:$LD_LIBRARY_PATH +\endverbatim + +And finally you should set the \c PATH variable to get access to +various StarPU tools: + +\verbatim +$ export PATH=$PATH:$STARPU_PATH/bin +\endverbatim + +Run the following command to ensure that StarPU is executing properly +and successfully detecting your hardware. If any issues arise, examine the +output of \c lstopo from the \c hwloc project and report any problems +either to the hwloc project or to us. + +\verbatim +$ starpu_machine_display +\endverbatim + +A tool is provided to help set all the environment variables +needed by StarPU. Once StarPU is installed in a specific directory, +calling the script bin/starpu_env will set in your current +environment the variables STARPU_PATH, LD_LIBRARY_PATH, +PKG_CONFIG_PATH, PATH and MANPATH. + +\verbatim +$ source $STARPU_PATH/bin/starpu_env +\endverbatim + +\section IntegratingStarPUInABuildSystem Integrating StarPU in a Build System + +\subsection StarPUInMake Integrating StarPU in a Make Build System + +When using a Makefile, the following lines can be added to set the +options for the compiler and the linker: + +\verbatim +CFLAGS += $$(pkg-config --cflags starpu-1.4) +LDLIBS += $$(pkg-config --libs starpu-1.4) +\endverbatim + +If you have a \c test-starpu.c file containing for instance: + +\code{.c} +#include +#include +int main(void) +{ + int ret; + ret = starpu_init(NULL); + if (ret != 0) + { + return 1; + } + printf("%d CPU cores\n", starpu_worker_get_count_by_type(STARPU_CPU_WORKER)); + printf("%d CUDA GPUs\n", starpu_worker_get_count_by_type(STARPU_CUDA_WORKER)); + printf("%d OpenCL GPUs\n", starpu_worker_get_count_by_type(STARPU_OPENCL_WORKER)); + starpu_shutdown(); + + return 0; +} +\endcode + +You can build it with make test-starpu and run it with ./test-starpu + +\subsection StarPUInCMake Integrating StarPU in a CMake Build System + +This section shows a minimal example integrating StarPU in an existing application's CMake build system. + +Let's assume we want to build an executable from the following source code using CMake: +\code{.c} +#include +#include +int main(void) +{ + int ret; + ret = starpu_init(NULL); + if (ret != 0) + { + return 1; + } + printf("%d CPU cores\n", starpu_worker_get_count_by_type(STARPU_CPU_WORKER)); + printf("%d CUDA GPUs\n", starpu_worker_get_count_by_type(STARPU_CUDA_WORKER)); + printf("%d OpenCL GPUs\n", starpu_worker_get_count_by_type(STARPU_OPENCL_WORKER)); + starpu_shutdown(); + + return 0; +} +\endcode + +The \c CMakeLists.txt file below uses the Pkg-Config support from CMake to +autodetect the StarPU installation and library dependences (such as +libhwloc) provided that the PKG_CONFIG_PATH variable is set, and +is sufficient to build a statically-linked executable. + +The CMake code uses the IMPORTED_TARGET option of pkg_check_modules to define a CMake target that can be used to compile and link StarPU codes: + +\code{File CMakeLists.txt} +cmake_minimum_required (VERSION 3.2) +project (hello_starpu) + +find_package(PkgConfig) +pkg_check_modules(STARPU REQUIRED IMPORTED_TARGET starpu-1.4) + +add_executable(hello_starpu hello_starpu.c PkgConfig::STARPU) +\endcode + +One can also use the following alternative. + +\code{File CMakeLists.txt} +cmake_minimum_required (VERSION 3.2) +project (hello_starpu) + +find_package(PkgConfig) +pkg_check_modules(STARPU REQUIRED starpu-1.4) +if (STARPU_FOUND) + include_directories (${STARPU_INCLUDE_DIRS}) + link_directories (${STARPU_STATIC_LIBRARY_DIRS}) + link_libraries (${STARPU_STATIC_LIBRARIES}) +else (STARPU_FOUND) + message(FATAL_ERROR "StarPU not found") +endif() + +add_executable(hello_starpu hello_starpu.c) +\endcode + +The following \c CMakeLists.txt implements a more complex +strategy, still relying on Pkg-Config, but also taking into account additional +flags. While more complete, this approach makes CMake's build types (Debug, +Release, ...) unavailable because of the direct affectation to variable +CMAKE_C_FLAGS. If both the full flags support and the build types +support are needed, the \c CMakeLists.txt below may be altered to work with +CMAKE_C_FLAGS_RELEASE, CMAKE_C_FLAGS_DEBUG, and others as needed. +This example has been successfully tested with CMake 3.2, though it may work +with earlier CMake 3.x versions. + +\code{File CMakeLists.txt} +cmake_minimum_required (VERSION 3.2) +project (hello_starpu) + +find_package(PkgConfig) +pkg_check_modules(STARPU REQUIRED starpu-1.4) + +# This section must appear before 'add_executable' +if (STARPU_FOUND) + # CFLAGS other than -I + foreach(CFLAG ${STARPU_CFLAGS_OTHER}) + set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${CFLAG}") + endforeach() + + # Static LDFLAGS other than -L + foreach(LDFLAG ${STARPU_STATIC_LDFLAGS_OTHER}) + set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${LDFLAG}") + endforeach() + + # -L directories + link_directories(${STARPU_STATIC_LIBRARY_DIRS}) +else (STARPU_FOUND) + message(FATAL_ERROR "StarPU not found") +endif() + +add_executable(hello_starpu hello_starpu.c) + +# This section must appear after 'add_executable' +if (STARPU_FOUND) + # -I directories + target_include_directories(hello_starpu PRIVATE ${STARPU_INCLUDE_DIRS}) + + # Static -l libs + target_link_libraries(hello_starpu PRIVATE ${STARPU_STATIC_LIBRARIES}) +endif() +\endcode + +\section RunningABasicStarPUApplication Running a Basic StarPU Application + +Basic examples using StarPU are built in the directory +examples/basic_examples/ (and installed in +$STARPU_PATH/lib/starpu/examples/). You can for example run the example +vector_scal. + +\verbatim +$ ./examples/basic_examples/vector_scal +BEFORE: First element was 1.000000 +AFTER: First element is 3.140000 +\endverbatim + +When StarPU is used for the first time, the directory +$STARPU_HOME/.starpu/ is created, performance models will be stored in +this directory (\ref STARPU_HOME). + +Please note that buses are benchmarked when StarPU is launched for the +first time. This may take a few minutes, or less if libhwloc is +installed. This step is done only once per user and per machine. + +\section RunningABasicStarPUApplicationOnMicrosoft Running a Basic StarPU Application on Microsoft Visual C + +Batch files are provided to run StarPU applications under Microsoft +Visual C. They are installed in $STARPU_PATH/bin/msvc. + +To execute a StarPU application, you first need to set the environment +variable \ref STARPU_PATH. + +\verbatim +c:\....> cd c:\cygwin\home\ci\starpu\ +c:\....> set STARPU_PATH=c:\cygwin\home\ci\starpu\ +c:\....> cd bin\msvc +c:\....> starpu_open.bat starpu_simple.c +\endverbatim + +The batch script will run Microsoft Visual C with a basic project file +to run the given application. + +The batch script starpu_clean.bat can be used to delete all +compilation generated files. + +The batch script starpu_exec.bat can be used to compile and execute a +StarPU application from the command prompt. + +\verbatim +c:\....> cd c:\cygwin\home\ci\starpu\ +c:\....> set STARPU_PATH=c:\cygwin\home\ci\starpu\ +c:\....> cd bin\msvc +c:\....> starpu_exec.bat ..\..\..\..\examples\basic_examples\hello_world.c +\endverbatim + +\verbatim +MSVC StarPU Execution +... +/out:hello_world.exe +... +Hello world (params = {1, 2.00000}) +Callback function got argument 0000042 +c:\....> +\endverbatim + +\section KernelThreadsStartedByStarPU Kernel Threads Started by StarPU + +StarPU automatically binds one thread per CPU core. It does not use +SMT/hyperthreading because kernels are usually already optimized for using a +full core, and using hyperthreading would make kernel calibration rather random. + +Since driving GPUs is a CPU-consuming task, StarPU dedicates one core +per GPU. + +While StarPU tasks are executing, the application is not supposed to do +computations in the threads it starts itself, tasks should be used instead. + +If the application needs to reserve some cores for its own computations, it +can do so with the field starpu_conf::reserve_ncpus, get the core IDs with +starpu_get_next_bindid(), and bind to them with starpu_bind_thread_on(). + +Another option is for the application to pause StarPU by calling +starpu_pause(), then to perform its own computations, and then to +resume StarPU by calling starpu_resume() so that StarPU can execute +tasks. + +If a computation library used by the application actually creates its +own thread, it may be useful to call starpu_bind_thread_on_worker() +before e.g. initializing the library, so that the library records which +binding it is supposed to use. And then call starpu_bind_thread_on_main() +again, or starpu_bind_thread_on_cpu() if a core was reserved with +starpu_get_next_bindid(). + +In case that computation library wants to bind threads itself, and uses +physical numbering instead of logical numbering (as defined by hwloc), +starpu_cpu_os_index() can be used to convert from StarPU cpuid to OS cpu index. + +\section EnablingOpenCL Enabling OpenCL + +When both CUDA and OpenCL drivers are enabled, StarPU will launch an +OpenCL worker for NVIDIA GPUs only if CUDA is not already running on them. +This design choice was necessary as OpenCL and CUDA can not run at the +same time on the same NVIDIA GPU, as there is currently no interoperability +between them. + +To enable OpenCL, you need either to disable CUDA when configuring StarPU: + +\verbatim +$ ./configure --disable-cuda +\endverbatim + +or when running applications: + +\verbatim +$ STARPU_NCUDA=0 ./application +\endverbatim + +OpenCL will automatically be started on any device not yet used by +CUDA. So on a machine running 4 GPUS, it is therefore possible to +enable CUDA on 2 devices, and OpenCL on the other 2 devices by calling: + +\verbatim +$ STARPU_NCUDA=2 ./application +\endverbatim + +\section Storing_Performance_Model_Files Storing Performance Model Files + +StarPU stores performance model files for bus benchmarking and codelet +profiles in different directories. + +By default, all files are stored in $STARPU_HOME/.starpu/sampling. + +If the environment variable \ref STARPU_HOME is not defined, its +default value is $HOME on Unix environments, and +$USERPROFILE on Windows environments. + +Environment variables \ref STARPU_PERF_MODEL_DIR and \ref +STARPU_PERF_MODEL_PATH can also be used to specify other directories +in which to store performance files (\ref SimulatedBenchmarks). + +The configure option \ref with-perf-model-dir "--with-perf-model-dir" +can also be used to define a performance model directory. + +When looking for performance files either for bus benchmarking or for +codelet performances, StarPU +
    +
  • +first looks in the directory specified by the environment variable +\ref STARPU_PERF_MODEL_DIR +
  • +
  • +then looks in the directory specified by the configure option \ref +with-perf-model-dir "--with-perf-model-dir"
    +or in $STARPU_HOME/.starpu/sampling if the option is not set +
  • +
  • +then looks in the directories specified by the environment +variable \ref STARPU_PERF_MODEL_PATH +
  • +
  • +and finally looks in $prefix/share/starpu/perfmodels/sampling +
  • +
+ +If the files are not present and must be created, they will be created +in the first defined directory from the list above. + +\verbatim +rm -rf $PWD/xxx && STARPU_PERF_MODEL_DIR=$PWD/xxx ./application +\endverbatim + +will use performance model files from the directory +$STARPU_HOME/.starpu/sampling if they are available, otherwise will +create these files in $STARPU_PERF_MODEL_DIR. + +To know the list of directories StarPU will search for performances +files, one can use the tool starpu_perfmodel_display + +\verbatim +$ starpu_perfmodel_display -d +directory: +directory: +\endverbatim + +\verbatim +$ STARPU_PERF_MODEL_DIR=/tmp/xxx starpu_perfmodel_display -d +directory: +directory: +directory: +\endverbatim + +When using the variable \ref STARPU_PERF_MODEL_DIR, the directory will +be created if it does not exist when dumping new performance model +files. + +When using the variable \ref STARPU_PERF_MODEL_PATH, only existing +directories will be taken into account. + +\verbatim +$ mkdir /tmp/yyy && STARPU_PERF_MODEL_DIR=/tmp/xxx STARPU_PERF_MODEL_PATH=/tmp/zzz:/tmp/yyy starpu_perfmodel_display -d +[starpu][adrets][_perf_model_add_dir] Warning: directory as set by variable STARPU_PERF_MODEL_PATH does not exist +directory: +directory: +directory: +directory: +\endverbatim + +Once your application has created the performance files in a given +directory, it is thus possible to move these files in another location +and keep using them. + +\verbatim +./application +# files are created in $HOME/.starpu/sampling +mv $HOME/.starpu/sampling /usr/local/starpu/sampling +STARPU_PERF_MODEL_DIR=/usr/local/starpu/sampling ./application +\endverbatim + +*/ diff --git a/doc/doxygen/chapters/starpu_basics/tasks.doxy b/doc/doxygen/chapters/starpu_basics/tasks.doxy new file mode 100644 index 0000000..4fbc030 --- /dev/null +++ b/doc/doxygen/chapters/starpu_basics/tasks.doxy @@ -0,0 +1,412 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/*! \page TasksInStarPU Tasks In StarPU + +\section TaskGranularity Task Granularity + +Similar to other runtimes, StarPU introduces some overhead in managing +tasks. This overhead, while not always negligible, is mitigated by its +intelligent scheduling and data management capabilities. The typical +order of magnitude for this overhead is a few microseconds, which is +notably smaller than the inherent CUDA overhead. To ensure that this +overhead remains insignificant, the work assigned to a task should be +substantial enough. + +The length of tasks should ideally be relatively larger to effectively +counterbalance this overhead. It iss advised to consider the offline +performance feedback, which provides insights into task lengths. +Monitoring task lengths becomes crucial if you're encountering +suboptimal performance. + +To gauge the scalability potential based task size, you can run the +tests/microbenchs/tasks_size_overhead.sh script. It provides a +visual representation of the speedup achievable with independent tasks +of very small sizes. + +This benchmark is installed in $STARPU_PATH/lib/starpu/examples/. +It gives a glimpse into how long a task should be (in µs) for StarPU overhead +to be low enough to keep efficiency. The script generates a plot +illustrating the speedup trends for tasks of different sizes, +correlated with the number of CPUs in use. + +For example, in the figure below, for 128 µs tasks (the red line), +StarPU overhead is low enough to guarantee a good speedup if the +number of CPUs is not more than 36. But with the same number of CPUs, +64 µs tasks (the black line) cannot have a correct speedup. The number +of CPUs must be decreased to about 17 in order to keep efficiency. + +\image html tasks_size_overhead.png +\image latex tasks_size_overhead.png "" width=\textwidth + +To determine the task size your application is using, it is possible +to use starpu_fxt_data_trace as explained in \ref DataTrace. + +The selection of a scheduler in StarPU also plays a significant role. +Different schedulers have varying impacts on the overall execution. +For example, the \c dmda scheduler may require additional time to make +decisions, while the \c eager scheduler tends to be more immediate in +its decisions. + +To assess the impact of scheduler choice on your target machine, you +can once again utilize the \c tasks_size_overhead.sh script. This +script provides valuable insights into how different schedulers affect +performance in conjunction with task sizes. + +\section TaskSubmission Task Submission + +To enable StarPU to perform online optimizations effectively, it is +recommended to submit tasks asynchronously whenever possible. The goal +is to maximize the level of asynchronous submission, allowing StarPU +to have more flexibility in optimizing the scheduling process. +Ideally, all tasks should be submitted asynchronously, and the use of +functions like starpu_task_wait_for_all() or starpu_data_unregister() +should be limited to waiting for task completion. + +StarPU will then be able to rework the whole schedule, overlap +computation with communication, manage accelerator local memory usage, etc. +A simple example is in the file examples/basic_examples/variable.c + +\section TaskPriorities Task Priorities + +StarPU's default behavior considers tasks in the order they +are submitted by the application. However, in scenarios where the +application programmer possesses knowledge about certain tasks that +should take priority due to their impact on performance (such as tasks +whose output is crucial for subsequent tasks), the +starpu_task::priority field can be utilized to convey this information +to StarPU's scheduling process. + +An example is provided in the application +examples/heat/dw_factolu_tag.c. + +\section SettingManyDataHandlesForATask Setting Many Data Handles For a Task + +The maximum number of data that a task can manage is fixed by the macro +\ref STARPU_NMAXBUFS. This macro has a default value which can be +customized through the \c configure option \ref enable-maxbuffers +"--enable-maxbuffers". + +However, if you have specific cases where you need tasks to manage +more data than the maximum allowed, you can use the field +starpu_task::dyn_handles when defining a task, along with the field +starpu_codelet::dyn_modes when defining the corresponding codelet. + +This dynamic handle mechanism enables tasks to handle additional data +beyond the usual limit imposed by \ref STARPU_NMAXBUFS. + +\code{.c} +enum starpu_data_access_mode modes[STARPU_NMAXBUFS+1] = +{ + STARPU_R, STARPU_R, ... +}; + +struct starpu_codelet dummy_big_cl = +{ + .cuda_funcs = { dummy_big_kernel }, + .opencl_funcs = { dummy_big_kernel }, + .cpu_funcs = { dummy_big_kernel }, + .cpu_funcs_name = { "dummy_big_kernel" }, + .nbuffers = STARPU_NMAXBUFS+1, + .dyn_modes = modes +}; + +task = starpu_task_create(); +task->cl = &dummy_big_cl; +task->dyn_handles = malloc(task->cl->nbuffers * sizeof(starpu_data_handle_t)); +for(i=0 ; icl->nbuffers ; i++) +{ + task->dyn_handles[i] = handle; +} +starpu_task_submit(task); +\endcode + +\code{.c} +starpu_data_handle_t *handles = malloc(dummy_big_cl.nbuffers * sizeof(starpu_data_handle_t)); +for(i=0 ; iexamples/basic_examples/dynamic_handles.c. + +\section SettingVariableDataHandlesForATask Setting a Variable Number Of Data Handles For a Task + +Normally, the number of data handles given to a task is set with +starpu_codelet::nbuffers. This field can however be set to +\ref STARPU_VARIABLE_NBUFFERS, in which case starpu_task::nbuffers +must be set, and starpu_task::modes (or starpu_task::dyn_modes, +see \ref SettingManyDataHandlesForATask) should be used to specify the modes for +the handles. Examples in examples/basic_examples/dynamic_handles.c show how to implement it. + +\section InsertTaskUtility Insert Task Utility + +StarPU provides the wrapper function starpu_task_insert() to ease +the creation and submission of tasks. + +Here is the implementation of a codelet: + +\code{.c} +void func_cpu(void *descr[], void *_args) +{ + int *x0 = (int *)STARPU_VARIABLE_GET_PTR(descr[0]); + float *x1 = (float *)STARPU_VARIABLE_GET_PTR(descr[1]); + int ifactor; + float ffactor; + + starpu_codelet_unpack_args(_args, &ifactor, &ffactor); + *x0 = *x0 * ifactor; + *x1 = *x1 * ffactor; +} + +struct starpu_codelet mycodelet = +{ + .cpu_funcs = { func_cpu }, + .cpu_funcs_name = { "func_cpu" }, + .nbuffers = 2, + .modes = { STARPU_RW, STARPU_RW } +}; +\endcode + +And the call to starpu_task_insert(): + +\code{.c} +starpu_task_insert(&mycodelet, + STARPU_VALUE, &ifactor, sizeof(ifactor), + STARPU_VALUE, &ffactor, sizeof(ffactor), + STARPU_RW, data_handles[0], + STARPU_RW, data_handles[1], + 0); +\endcode + +The call to starpu_task_insert() is equivalent to the following +code: + +\code{.c} +struct starpu_task *task = starpu_task_create(); +task->cl = &mycodelet; +task->handles[0] = data_handles[0]; +task->handles[1] = data_handles[1]; +char *arg_buffer; +size_t arg_buffer_size; +starpu_codelet_pack_args(&arg_buffer, &arg_buffer_size, + STARPU_VALUE, &ifactor, sizeof(ifactor), + STARPU_VALUE, &ffactor, sizeof(ffactor), + 0); +task->cl_arg = arg_buffer; +task->cl_arg_size = arg_buffer_size; +int ret = starpu_task_submit(task); +\endcode + +In the example file tests/main/insert_task_value.c, we use these two ways to create and submit tasks. + +Instead of calling starpu_codelet_pack_args(), one can also call starpu_codelet_pack_arg_init(), then starpu_codelet_pack_arg() for each data, then starpu_codelet_pack_arg_fini() as follow: + +\code{.c} +struct starpu_task *task = starpu_task_create(); +task->cl = &mycodelet; +task->handles[0] = data_handles[0]; +task->handles[1] = data_handles[1]; + +struct starpu_codelet_pack_arg_data state; +starpu_codelet_pack_arg_init(&state); +starpu_codelet_pack_arg(&state, &ifactor, sizeof(ifactor)); +starpu_codelet_pack_arg(&state, &ffactor, sizeof(ffactor)); +starpu_codelet_pack_arg_fini(&state, &task->cl_arg, &task->cl_arg_size); + +int ret = starpu_task_submit(task); +\endcode + +A full code example is in file tests/main/pack.c. + +Here a similar call using ::STARPU_DATA_ARRAY. + +\code{.c} +starpu_task_insert(&mycodelet, + STARPU_DATA_ARRAY, data_handles, 2, + STARPU_VALUE, &ifactor, sizeof(ifactor), + STARPU_VALUE, &ffactor, sizeof(ffactor), + 0); +\endcode + +If some part of the task insertion depends on the value of some computation, +the macro ::STARPU_DATA_ACQUIRE_CB can be very convenient. For +instance, assuming that the index variable i was registered as handle +A_handle[i]: + +\code{.c} +/* Compute which portion we will work on, e.g. pivot */ +starpu_task_insert(&which_index, STARPU_W, i_handle, 0); + +/* And submit the corresponding task */ +STARPU_DATA_ACQUIRE_CB(i_handle, STARPU_R, + starpu_task_insert(&work, STARPU_RW, A_handle[i], 0)); +\endcode + +The macro ::STARPU_DATA_ACQUIRE_CB submits an asynchronous request for +acquiring data i for the main application, and will execute the code +given as the third parameter when it is acquired. In other words, as soon as the +value of i computed by the codelet which_index can be read, the +portion of code passed as the third parameter of ::STARPU_DATA_ACQUIRE_CB will +be executed, and is allowed to read from i to use it e.g. as an +index. Note that this macro is only available when compiling StarPU with +the compiler gcc. In the example file tests/datawizard/acquire_cb_insert.c, this macro is used. + +StarPU also provides a utility function starpu_codelet_unpack_args() to retrieve the ::STARPU_VALUE arguments passed to the task. There is several ways of calling starpu_codelet_unpack_args(). The full code examples are available in the file tests/main/insert_task_value.c. + +\code{.c} +void func_cpu(void *descr[], void *_args) +{ + int ifactor; + float ffactor; + + starpu_codelet_unpack_args(_args, &ifactor, &ffactor); +} +\endcode + +\code{.c} +void func_cpu(void *descr[], void *_args) +{ + int ifactor; + float ffactor; + + starpu_codelet_unpack_args(_args, &ifactor, 0); + starpu_codelet_unpack_args(_args, &ifactor, &ffactor); +} +\endcode + +\code{.c} +void func_cpu(void *descr[], void *_args) +{ + int ifactor; + float ffactor; + char buffer[100]; + + starpu_codelet_unpack_args_and_copyleft(_args, buffer, 100, &ifactor, 0); + starpu_codelet_unpack_args(buffer, &ffactor); +} +\endcode + +Instead of calling starpu_codelet_unpack_args(), one can also call starpu_codelet_unpack_arg_init(), then starpu_codelet_pack_arg() or starpu_codelet_dup_arg() or starpu_codelet_pick_arg() for each data, then starpu_codelet_unpack_arg_fini() as follow: + +\code{.c} +void func_cpu(void *descr[], void *_args) +{ + int ifactor; + float ffactor; + + size_t size = sizeof(int) + 2*sizeof(size_t) + sizeof(int) + sizeof(float); + struct starpu_codelet_pack_arg_data state; + starpu_codelet_unpack_arg_init(&state, _args, size); + starpu_codelet_unpack_arg(&state, (void**)&ifactor, sizeof(ifactor)); + starpu_codelet_unpack_arg(&state, (void**)&ffactor, sizeof(ffactor)); + starpu_codelet_unpack_arg_fini(&state); +} +\endcode + +\code{.c} +void func_cpu(void *descr[], void *_args) +{ + int *ifactor; + float *ffactor; + size_t size; + + size_t psize = sizeof(int) + 2*sizeof(size_t) + sizeof(int) + sizeof(float); + struct starpu_codelet_pack_arg_data state; + starpu_codelet_unpack_arg_init(&state, _args, psize); + starpu_codelet_dup_arg(&state, (void**)&ifactor, &size); + assert(size == sizeof(*ifactor)); + starpu_codelet_dup_arg(&state, (void**)&ffactor, &size); + assert(size == sizeof(*ffactor)); + starpu_codelet_unpack_arg_fini(&state); +} +\endcode + +\code{.c} +void func_cpu(void *descr[], void *_args) +{ + int *ifactor; + float *ffactor; + size_t size; + + size_t psize = sizeof(int) + 2*sizeof(size_t) + sizeof(int) + sizeof(float); + struct starpu_codelet_pack_arg_data state; + starpu_codelet_unpack_arg_init(&state, _args, psize); + starpu_codelet_pick_arg(&state, (void**)&ifactor, &size); + assert(size == sizeof(*ifactor)); + starpu_codelet_pick_arg(&state, (void**)&ffactor, &size); + assert(size == sizeof(*ffactor)); + starpu_codelet_unpack_arg_fini(&state); +} +\endcode + +During unpacking one can also call starpu_codelet_unpack_discard_arg() to skip saving the argument in pointer. + +A full code example is in file tests/main/pack.c. + +\section OtherTaskUtility Other Task Utility Functions + +Here a list of other functions to help with task management. + +
    +
  • +The function starpu_task_dup() creates a duplicate of an existing task. The new task is identical to the original task in terms of its parameters, dependencies, and execution characteristics. +
  • +
  • +The function starpu_task_set() is used to set the parameters of a task before it is executed, while starpu_task_build() is used to create a task with the specified parameters. +
  • +
+ +StarPU provides several functions to help insert data into a task. +The function starpu_task_insert_data_make_room() is used to allocate +memory space for a data structure that is required for inserting data +into a task. This function is called before inserting any data handles +into a task, and ensures that enough memory is available for the data +to be stored. +Once memory is allocated, the data handle can be inserted into the +task using the following functions + +
    +
  • +starpu_task_insert_data_process_arg() processes a scalar +argument of a task and inserts it into the task's data structure. This +function also performs any necessary data allocation and transfer +operations. +
  • +
  • +starpu_task_insert_data_process_array_arg() processes an array +argument of a task and inserts it into the task's data structure. This +function handles the allocation and transfer of the array data, as +well as setting up the appropriate metadata to describe the array. +
  • +
  • +starpu_task_insert_data_process_mode_array_arg() processes a mode +array argument of a task and inserts it into the task's data +structure. This function handles the allocation and transfer of the +mode array data, as well as setting up the appropriate metadata to +describe the mode array. Additionally, this function also computes the +necessary sizes and strides for the data associated with the mode +array argument. +
  • +
+ +*/ diff --git a/doc/doxygen/chapters/starpu_extensions/advanced_data_management.doxy b/doc/doxygen/chapters/starpu_extensions/advanced_data_management.doxy new file mode 100644 index 0000000..354ef07 --- /dev/null +++ b/doc/doxygen/chapters/starpu_extensions/advanced_data_management.doxy @@ -0,0 +1,947 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/*! \page AdvancedDataManagement Advanced Data Management + +\section VariableSizeDataInterface Data Interface with Variable Size + +Besides the data interfaces already available in StarPU, mentioned in \ref DataInterface, +tasks are actually allowed to change the size of data interfaces. + +The simplest case is just changing the amount of data actually used within the +allocated buffer. This is for instance implemented for the matrix interface: one +can set the new NX/NY values with STARPU_MATRIX_SET_NX(), STARPU_MATRIX_SET_NY(), and STARPU_MATRIX_SET_LD() +at the end of the task implementation. Data transfers achieved by StarPU will +then use these values instead of the whole allocated size. The values of course +need to be set within the original allocation. To reserve room for increasing +the NX/NY values, one can use starpu_matrix_data_register_allocsize() instead of +starpu_matrix_data_register(), to specify the allocation size to be used instead +of the default NX*NY*ELEMSIZE. It is also available for a vector by using starpu_vector_data_register_allocsize() +to specify the allocation size to be used instead of the default NX*ELEMSIZE. To support this, the data interface +has to implement the functions starpu_data_interface_ops::alloc_footprint, +starpu_data_interface_ops::alloc_compare, and +starpu_data_interface_ops::reuse_data_on_node for proper StarPU allocation +management. It might be useful to implement +starpu_data_interface_ops::cache_data_on_node, otherwise StarPU will just call \c memcpy(). + +A more involved case is changing the amount of allocated data. +The task implementation can just reallocate the buffer during its execution, and +set the proper new values in the interface structure, e.g. nx, ny, ld, etc. so +that the StarPU core knows the new data layout. The structure starpu_data_interface_ops +however then needs to have the field starpu_data_interface_ops::dontcache +set to 1, to prevent StarPU from trying to perform any cached allocation, +since the allocated size will vary. An example is available in +tests/datawizard/variable_size.c. The example uses its own data +interface to contain some simulation information for data growth, but the +principle can be applied for any data interface. + +The principle is to use starpu_malloc_on_node_flags() to make the new +allocation, and use starpu_free_on_node_flags() to release any previous +allocation. The flags have to be precisely like in the example: + +\code{.c} +unsigned workerid = starpu_worker_get_id_check(); +unsigned dst_node = starpu_worker_get_memory_node(workerid); +interface->ptr = starpu_malloc_on_node_flags(dst_node, size + increase, STARPU_MALLOC_PINNED | STARPU_MALLOC_COUNT | STARPU_MEMORY_OVERFLOW); +starpu_free_on_node_flags(dst_node, old, size, STARPU_MALLOC_PINNED | STARPU_MALLOC_COUNT | STARPU_MEMORY_OVERFLOW); +interface->size += increase; +\endcode + +so that the allocated area has the expected properties and the allocation is properly accounted for. + +Depending on the interface (vector, CSR, etc.) you may have to fix several +fields of the data interface: e.g. both nx and allocsize for +vectors, and store the pointer both in ptr and dev_handle. + +Some interfaces make a distinction between the actual number of elements +stored in the data and the actually allocated buffer. For instance, the vector +interface uses the nx field for the former, and the allocsize for +the latter. This allows for lazy reallocation to avoid reallocating the buffer +every time to exactly match the actual number of elements. Computations and data +transfers will use the field nx, while allocation functions will use the field +allocsize. One just has to make sure that allocsize is always +bigger or equal to nx. + +Important note: one can not change the size of a partitioned data. + +\section DataManagementAllocation Data Management Allocation + +When the application allocates data, whenever possible it should use +the function starpu_malloc(), which will ask CUDA or OpenCL to make +the allocation itself and pin the corresponding allocated memory (a basic example is in examples/basic_examples/block.c), or to use the function +starpu_memory_pin() to pin memory allocated by other ways, such as local arrays (a basic example is in examples/basic_examples/vector_scal.c). This +is needed to permit asynchronous data transfer, i.e. permit data +transfer to overlap with computations. Otherwise, the trace will show +that the state DriverCopyAsync takes a lot of time, this is +because CUDA or OpenCL then reverts to synchronous transfers. Before shutting down StarPU, the application should deallocate any memory that has previously been allocated with starpu_malloc(), by calling either starpu_free() or starpu_free_noflag() which is more recommended. If the application has pinned memory using starpu_memory_pin(), it should unpin the memory using starpu_memory_unpin() before freeing the memory. + +If an application requires a specific alignment constraint for memory allocations made with starpu_malloc(), it can use the starpu_malloc_set_align() function to set the alignment requirement. + +The application can provide its own allocation function by calling +starpu_malloc_set_hooks(). StarPU will then use them for all data handle +allocations in the main memory. An example is in examples/basic_examples/hooks.c. + +StarPU provides several functions to monitor the memory usage and availability on the system. The application can use the starpu_memory_get_used() function to monitor its own memory usage on a node, and the starpu_memory_get_total_all_nodes() function to monitor the amount of total memory on all memory nodes, and the starpu_memory_get_available_all_nodes() function to monitor the amount of available memory on all memory nodes. Additionally, the starpu_memory_get_used_all_nodes() function can be used to monitor the amount of used memory on all memory nodes. + +By default, StarPU leaves replicates of data wherever they were used, in case they +will be re-used by other tasks, thus saving the data transfer time. When some +task modifies some data, all the other replicates are invalidated, and only the +processing unit which ran this task will have a valid replicate of the data. If the application knows +that this data will not be re-used by further tasks, it should advise StarPU to +immediately replicate it to a desired list of memory nodes (given through a +bitmask). This can be understood like the write-through mode of CPU caches. + +\code{.c} +starpu_data_set_wt_mask(img_handle, 1<<0); +\endcode + +will for instance request to always automatically transfer a replicate into the +main memory (node 0), as bit 0 of the write-through bitmask is being set. An example is available in examples/pi/pi.c. + +\code{.c} +starpu_data_set_wt_mask(img_handle, ~0U); +\endcode + +will request to always automatically broadcast the updated data to all memory +nodes. An example is available in tests/datawizard/wt_broadcast.c. + +Setting the write-through mask to ~0U can also be useful to make sure all +memory nodes always have a copy of the data, so that it is never evicted when +memory gets scarce. + +Implicit data dependency computation can become expensive if a lot +of tasks access the same piece of data. If no dependency is required +on some piece of data (e.g. because it is only accessed in read-only +mode, or because write accesses are actually commutative), use the +function starpu_data_set_sequential_consistency_flag() to disable +implicit dependencies on this data. + +In the same vein, accumulation of results in the same data can become a +bottleneck. The use of the mode ::STARPU_REDUX permits to optimize such +accumulation (see \ref DataReduction). To a lesser extent, the use of +the flag ::STARPU_COMMUTE keeps the bottleneck (see \ref DataCommute), but at least permits +the accumulation to happen in any order. + +Applications often need a data just for temporary results. In such a case, +registration can be made without an initial value, for instance this produces a vector data: + +\code{.c} +starpu_vector_data_register(&handle, -1, 0, n, sizeof(float)); +\endcode + +StarPU will then allocate the actual buffer only when it is actually needed, +e.g. directly on the GPU without allocating in main memory. + +In the same vein, once the temporary results are not useful anymore, the +data should be thrown away. If the handle is not to be reused, it can be +unregistered: + +\code{.c} +starpu_data_unregister_submit(handle); +\endcode + +actual unregistration will be done after all tasks working on the handle +terminate. + +One can also unregister the data handle by calling: + +\code{.c} +starpu_data_unregister_no_coherency(handle); +\endcode + +Different from starpu_data_unregister(), a valid copy of the data is not +put back into the home node in the buffer that was initially registered. + +If the handle is to be reused, instead of unregistering it, it can simply be deinitialized: + +\code{.c} +starpu_data_deinitialize(handle); +\endcode + +So that the value will be ignored and not written back to main memory. + +Or instead it can even be invalidated (the buffers containing the current value +will then be freed, and reallocated only when another task writes some value to +the handle): + +\code{.c} +starpu_data_invalidate(handle); +\endcode + +if the data transfer is asynchronous, one can use the submit versions: + +\code{.c} +starpu_data_deinitialize_submit(handle); +\endcode + +or + +\code{.c} +starpu_data_invalidate_submit(handle); +\endcode + +A basic example is available in the files tests/datawizard/data_deinitialize.c and tests/datawizard/data_invalidation.c. + +\section DataAccess Data Access + +To access registered data outside tasks we can call the function starpu_data_acquire(). The access mode can be read-only mode ::STARPU_R, write-only mode ::STARPU_W, and read-write mode ::STARPU_RW. We will get an up-to-date copy of handle in memory located where the data was originally registered. The application can also call starpu_data_acquire_try() instead of starpu_data_acquire() to acquire the data, but if previously-submitted tasks have not completed when we ask to acquire the data, the program will crash. starpu_data_release() must be called once the application no longer needs to access the piece of data. Or call starpu_data_release_to() to partly release the piece of data acquired. +We can also access registered data from a given memory node by calling the function starpu_data_acquire_on_node(), or calling starpu_data_acquire_on_node_try() if all previously-submitted tasks have completed. Correspondingly, starpu_data_release_on_node() must be called once the application no longer needs to access the piece of data and the node parameter must be exactly the same as the corresponding starpu_data_acquire_on_node() call. Or call starpu_data_release_to_on_node() to partly release the piece of data acquired. + +The application may access the requested data asynchronous during the execution of callback by calling starpu_data_acquire_cb(), and by calling starpu_data_acquire_cb_sequential_consistency() with the possibility of enabling or disabling data dependencies. The callback function must call starpu_data_release() once the application no longer needs to access the piece of data. Or call starpu_data_release_to() to partly release the piece of data acquired. +The application can also access registered data from a given memory node instead of main memory by calling the function starpu_data_acquire_on_node_cb(), and by calling starpu_data_acquire_on_node_cb_sequential_consistency() with the possibility of enabling or disabling data dependencies. starpu_data_release_on_node() must be called once the application no longer needs to access the piece of data. Or call starpu_data_release_to_on_node() to partly release the piece of data acquired. + +\section DataPrefetch Data Prefetch + +The scheduling policies heft, dmda and pheft +perform data prefetch (see \ref STARPU_PREFETCH): +as soon as a scheduling decision is taken for a task, requests are issued to +transfer its required data to the target processing unit, if needed, so that +when the processing unit actually starts the task, its data will hopefully be +already available, and it will not have to wait for the transfer to finish. + +The application may want to perform some manual prefetching, for several reasons +such as excluding initial data transfers from performance measurements, or +setting up an initial statically-computed data distribution on the machine +before submitting tasks, which will thus guide StarPU toward an initial task +distribution (since StarPU will try to avoid further transfers). + +This can be achieved by giving the function starpu_data_prefetch_on_node() the +handle and the desired target memory node. An example is available in the file tests/microbenchs/prefetch_data_on_node.c. The variant +starpu_data_idle_prefetch_on_node() can be used to issue the transfer +only when the bus is idle. One can also call starpu_data_request_allocation() for the allocation of a piece of data on the specified memory node. We can know whether the allocation is done on the specified memory node by using starpu_data_test_if_allocated_on_node(). We can also know whether the map is done on the specified memory node by using starpu_data_test_if_mapped_on_node(). + +If we want higher priority to request data to be replicated to a given node as soon as possible, so that it is available there for tasks, we can call starpu_data_fetch_on_node(). We can call starpu_data_prefetch_on_node_prio() to have a priority than starpu_data_prefetch_on_node(). And call starpu_data_idle_prefetch_on_node_prio() to have a bit higher priority than starpu_data_idle_prefetch_on_node(). + +Conversely, one can advise StarPU that some data will not be useful in the +close future by calling starpu_data_wont_use(). StarPU will then write its value +back to its home node, and evict it from GPUs when room is needed. An example is available in the file tests/datawizard/partition_wontuse.c. One can also advise StarPU to evict data from the memory node directly by calling starpu_data_evict_from_node(), but it may fail if e.g. some tasks are still working on the memory node. To avoid failure one can call starpu_data_can_evict() to check whether data can be evicted from the memory node. Anyway it is more recommended to use starpu_data_wont_use(). + +One can query the status of handle on the specified memory node by calling starpu_data_query_status2() or starpu_data_query_status(). One can call starpu_memchunk_tidy() to tidy the available memory on the specified memory node periodically. + +\section ManualPartitioning Manual Partitioning + +Except the partitioning functions described in \ref PartitioningData and \ref AsynchronousPartitioning, +one can also handle partitioning by hand, by registering several views on the +same piece of data. The idea is then to manage the coherency of the various +views through the common buffer in the main memory. +examples/filters/fmultiple_manual.c is a complete example using this technique. + +In short, we first register the same matrix several times: + +\code{.c} +starpu_matrix_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)matrix, NX, NX, NY, sizeof(matrix[0])); + +for (i = 0; i < PARTS; i++) + starpu_matrix_data_register(&vert_handle[i], STARPU_MAIN_RAM, (uintptr_t)&matrix[0][i*(NX/PARTS)], NX, NX/PARTS, NY, sizeof(matrix[0][0])); +\endcode + +Since StarPU is not aware that the two handles are actually pointing to the same +data, we have a danger of inadvertently submitting tasks to both views, which +will bring a mess since StarPU will not guarantee any coherency between the two +views. To make sure we don't do this, we invalidate the view that we will not +use: + +\code{.c} +for (i = 0; i < PARTS; i++) + starpu_data_invalidate(vert_handle[i]); +\endcode + +Then we can safely work on handle. + +When we want to switch to the vertical slice view, all we need to do is bring +coherency between them by running an empty task on the home node of the data: + +\code{.c} +struct starpu_codelet cl_switch = +{ + .where = STARPU_NOWHERE, + .nbuffers = 3, + .specific_nodes = 1, + .nodes = { STARPU_MAIN_RAM, STARPU_MAIN_RAM, STARPU_MAIN_RAM }, +}; + +ret = starpu_task_insert(&cl_switch, STARPU_RW, handle, + STARPU_W, vert_handle[0], + STARPU_W, vert_handle[1], + 0); +\endcode + +The execution of the task switch will get back the matrix data into the +main memory, and thus the vertical slices will get the updated value there. + +Again, we prefer to make sure that we don't accidentally access the matrix through the whole-matrix handle: + +\code{.c} +starpu_data_invalidate_submit(handle); +\endcode + +Note: when enabling a set of handles in this way, the set must not have any +overlapping, i.e. the handles of the set must not have any part of data in +common, otherwise StarPU will not properly handle concurrent accesses between +them. + +And now we can start using vertical slices, etc. + +\section DataHandlesHelpers Data handles helpers + +Functions starpu_data_set_user_data() and starpu_data_get_user_data() are used to associate user-defined data with a specific data handle. One can set or retrieve the field \c user_data of the data handle by calling these two functions respectively. Similarly, functions starpu_data_set_sched_data() and starpu_data_get_sched_data() are used to associate scheduling-related data with a specific data handle. One can set or retrieve the field \c sched_data of the data handle by calling these two functions respectively. One can set a name for a data handle by calling starpu_data_set_name(). + +One can call starpu_data_register_same() to register a new piece of data into a data handle with the same interface as the specified data handle. If necessary, one can register a void interface by using starpu_void_data_register(). There is no data really associated to this interface, but it may be used as a synchronization mechanism. + +One can call starpu_data_cpy() or starpu_data_cpy_priority() to copy data from one memory location to another memory location, but the latter one allows the application to specify a priority value for the copy operation. The higher the priority value, the sonner the copy operation will be scheduled and executed. One can also call starpu_data_dup_ro() function for duplicating, but this function only creates a new read-only data block that is an exact copy of the original data block. The new data block can be used independently of the original data block for read-only access. + +starpu_data_pack_node() and starpu_data_pack() are functions that are used to pack a data item into a binary buffer on a node or on local memory node. starpu_data_peek_node() and starpu_data_peek() are functions that allow you to read in handle's node or local node replicate the data located at the given pointer. starpu_data_unpack_node() and starpu_data_unpack() are functions that are used to unpack a data item from a binary buffer on a node or on local memory node. + +StarPU provides several functions for querying the size and memory allocation of variable size data items, such as: starpu_data_get_size() is a function that returns the size of a data associated with handle in bytes. This is the size of the actual data stored in memory. starpu_data_get_alloc_size() is a function that returns the amount of memory that has been allocated for a data associated with handle in anticipation. This may be larger than the actual size of the data item, due to alignment requirements or other implementation details. starpu_data_get_max_size() is a function that returns the maximum size of a handle data that can be allocated by StarPU. + +One can call starpu_data_get_home_node() to retrieve the identifier of the node on which the data handle is originally stored. One can call starpu_data_print() to print basic information about the data handle and the node to the specified file. + +\section DataPointers Handles data buffer pointers + +A simple understanding of StarPU handles is that it's a collection of buffers on +each memory node of the machine, which contain the same data. The picture is +however made more complex with the OpenCL support and with partitioning. + +When partitioning a handle, the data buffers of the subhandles will indeed +be inside the data buffers of the main handle (to save transferring data +back and forth between the main handle and the subhandles). But in OpenCL, +a cl_mem is not a pointer, but an opaque value on which pointer +arithmetic can not be used. That is why data interfaces contain three fields: +dev_handle, offset, and ptr. +
    +
  • The field dev_handle is what the allocation function +returned, and one can not do arithmetic on it. +
  • +
  • The field offset is the offset inside the allocated area, +most often it will be 0 because data start at the beginning of the +allocated area, but when the handle is partitioned, the subhandles +will have varying offset values, for each subpiece. +
  • +
  • The field ptr, in the non-OpenCL case, i.e. when pointer +arithmetic can be used on dev_handle, is just the sum of +dev_handle and offset, provided for convenience. +
  • +
+ +This means that: +
    +
  • computation kernels can use ptr in non-OpenCL implementations.
  • +
  • computation kernels have to use dev_handle and offset in the OpenCL implementation.
  • +
  • allocation methods of data interfaces have to store the value returned by starpu_malloc_on_node() in dev_handle and ptr, and set offset to 0.
  • +
  • partitioning filters have to copy over dev_handle without modifying it, set in the child different values of offset, and set ptr accordingly as the sum of dev_handle and offset.
  • +
+ +We can call starpu_data_handle_to_pointer() to get ptr associated with the data handle, or call starpu_data_get_local_ptr() to get the local pointer associated with the data handle. + +Examples in the directory examples/interface/complex_dev_handle/ show how to generate and implement an interface supporting OpenCL. + +To better notice the difference between simple ptr and +dev_handle + offset, one +can compare examples/interface/complex_interface.c vs +examples/interface/complex_dev_handle/complex_dev_handle_interface.c and +examples/interface/complex_filters.c vs +examples/interface/complex_dev_handle/complex_dev_handle_filters.c. + +\section DefiningANewDataFilter Defining A New Data Filter + +StarPU provides a series of predefined filters in \ref API_Data_Partition, but +additional filters can be defined by the application. The principle is that the +filter function just fills the memory location of the i-th subpart of a data. +Examples are provided in src/datawizard/interfaces/*_filters.c, +check \ref starpu_data_filter::filter_func for further details. +The helper function starpu_filter_nparts_compute_chunk_size_and_offset() can be used to +compute the division of pieces of data. + +\section DefiningANewDataInterface Defining A New Data Interface + +This section proposes an example how to define your own interface, when the +StarPU-provided interface do not fit your needs. Here we take a simple example of +an array of complex numbers represented by two arrays of double values. The full source code is in examples/interface/complex_interface.c and examples/interface/complex_interface.h + +Let's thus define a new data interface to manage arrays of complex numbers: + +\code{.c} +/* interface for complex numbers */ +struct starpu_complex_interface +{ + double *real; + double *imaginary; + int nx; +}; +\endcode + +That structure stores enough to describe one buffer of such kind of +data. It is used for the buffer stored in the main memory, another instance +is used for the buffer stored in a GPU, etc. A data handle is thus a +collection of such structures, to describe each buffer on each memory node. + +Note: one should not make pointers that point into such structures, because +StarPU needs to be able to copy over the content of it to various places, for +instance to efficiently migrate a data buffer from one data handle to another +data handle, so the actual address of the structure may vary. + +\subsection DefiningANewDataInterface_registration Data registration + +Registering such a data to StarPU is easily done using the function +starpu_data_register(). The last +parameter of the function, interface_complex_ops, will be +described below. + +\code{.c} +void starpu_complex_data_register(starpu_data_handle_t *handleptr, + unsigned home_node, double *real, double *imaginary, int nx) +{ + struct starpu_complex_interface complex = + { + .real = real, + .imaginary = imaginary, + .nx = nx + }; + + starpu_data_register(handleptr, home_node, &complex, &interface_complex_ops); +} +\endcode + +The struct starpu_complex_interface complex is here used just to store the +parameters provided by users to starpu_complex_data_register. +starpu_data_register() will first allocate the handle, and +then pass the structure starpu_complex_interface to the method +starpu_data_interface_ops::register_data_handle, which records them +within the data handle (it is called once per node by starpu_data_register()): + +\code{.c} +static void complex_register_data_handle(starpu_data_handle_t handle, int home_node, void *data_interface) +{ + struct starpu_complex_interface *complex_interface = (struct starpu_complex_interface *) data_interface; + + unsigned node; + for (node = 0; node < STARPU_MAXNODES; node++) + { + struct starpu_complex_interface *local_interface = (struct starpu_complex_interface *) + starpu_data_get_interface_on_node(handle, node); + + local_interface->nx = complex_interface->nx; + if (node == home_node) + { + local_interface->real = complex_interface->real; + local_interface->imaginary = complex_interface->imaginary; + } + else + { + local_interface->real = NULL; + local_interface->imaginary = NULL; + } + } +} +\endcode + +If the application provided a home node, the corresponding pointers will be +recorded for that node. Others have no buffer allocated yet. +Possibly the interface needs some dynamic allocation (e.g. to store an array of +dimensions that can have variable size). The corresponding deallocation will then be +done in starpu_data_interface_ops::unregister_data_handle. + +Different operations need to be defined for a data interface through +the type starpu_data_interface_ops. We only define here the basic +operations needed to run simple applications. The source code for the +different functions can be found in the file +examples/interface/complex_interface.c, the details of the hooks to be +provided are documented in \ref starpu_data_interface_ops . + +\code{.c} +static struct starpu_data_interface_ops interface_complex_ops = +{ + .register_data_handle = complex_register_data_handle, + .allocate_data_on_node = complex_allocate_data_on_node, + .copy_methods = &complex_copy_methods, + .get_size = complex_get_size, + .footprint = complex_footprint, + .interfaceid = STARPU_UNKNOWN_INTERFACE_ID, + .interface_size = sizeof(struct starpu_complex_interface), +}; +\endcode + +The field starpu_data_interface_ops::interfaceid should be defined to +::STARPU_UNKNOWN_INTERFACE_ID when defining the interface, its value +will be updated the first time a data is registered through the new +data interface. + +Convenience functions can be defined to access the different fields of the +complex interface from a StarPU data handle after a call to starpu_data_acquire(): + +\code{.c} +double *starpu_complex_get_real(starpu_data_handle_t handle) +{ + struct starpu_complex_interface *complex_interface = + (struct starpu_complex_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + return complex_interface->real; +} + +double *starpu_complex_get_imaginary(starpu_data_handle_t handle); +int starpu_complex_get_nx(starpu_data_handle_t handle); +\endcode + +Similar functions need to be defined to access the different fields of the +complex interface from a void * pointer to be used within codelet +implementations. + +\snippet complex.c To be included. You should update doxygen if you see this text. + +Complex data interfaces can then be registered to StarPU. + +\code{.c} +double real = 45.0; +double imaginary = 12.0; +starpu_complex_data_register(&handle1, STARPU_MAIN_RAM, &real, &imaginary, 1); +starpu_task_insert(&cl_display, STARPU_R, handle1, 0); +\endcode + +and used by codelets. + +\code{.c} +void display_complex_codelet(void *descr[], void *_args) +{ + int nx = STARPU_COMPLEX_GET_NX(descr[0]); + double *real = STARPU_COMPLEX_GET_REAL(descr[0]); + double *imaginary = STARPU_COMPLEX_GET_IMAGINARY(descr[0]); + int i; + + for(i=0 ; iexamples/interface/. + +\subsection DefiningANewDataInterface_footprint Data footprint + +We need to pass a custom footprint function to the method starpu_data_interface_ops::footprint which computes data size footprint. StarPU provides several functions to compute different type of value: starpu_hash_crc32c_be_n() is used to compute the CRC of a byte buffer, starpu_hash_crc32c_be_ptr() is used to compute the CRC of a pointer value, starpu_hash_crc32c_be() is used to compute the CRC of a 32bit number, starpu_hash_crc32c_string() is used to compute the CRC of a string. + +\subsection DefiningANewDataInterface_allocation Data allocation + +To be able to run tasks on GPUs etc. StarPU needs to know how to allocate a +buffer for the interface. In our example, two allocations are needed in the +allocation method \c complex_allocate_data_on_node(): one for the real part and one +for the imaginary part. + +\code{.c} +static starpu_ssize_t complex_allocate_data_on_node(void *data_interface, unsigned node) +{ + struct starpu_complex_interface *complex_interface = (struct starpu_complex_interface *) data_interface; + + double *addr_real = NULL; + double *addr_imaginary = NULL; + starpu_ssize_t requested_memory = complex_interface->nx * sizeof(complex_interface->real[0]); + + addr_real = (double*) starpu_malloc_on_node(node, requested_memory); + if (!addr_real) + goto fail_real; + addr_imaginary = (double*) starpu_malloc_on_node(node, requested_memory); + if (!addr_imaginary) + goto fail_imaginary; + + /* update the data properly in consequence */ + complex_interface->real = addr_real; + complex_interface->imaginary = addr_imaginary; + + return 2*requested_memory; + +fail_imaginary: + starpu_free_on_node(node, (uintptr_t) addr_real, requested_memory); +fail_real: + return -ENOMEM; +} +\endcode + +Here we try to allocate the two parts. If either of them fails, we return +\c -ENOMEM. If they succeed, we can record the obtained pointers and returned the +amount of allocated memory (for memory usage accounting). + +Conversely, \c complex_free_data_on_node() frees the two parts: + +\code{.c} +static void complex_free_data_on_node(void *data_interface, unsigned node) +{ + struct starpu_complex_interface *complex_interface = (struct starpu_complex_interface *) data_interface; + starpu_ssize_t requested_memory = complex_interface->nx * sizeof(complex_interface->real[0]); + + starpu_free_on_node(node, (uintptr_t) complex_interface->real, requested_memory); + starpu_free_on_node(node, (uintptr_t) complex_interface->imaginary, requested_memory); +} +\endcode + +We can call starpu_opencl_allocate_memory() to allocate memory on an OpenCL device. + +We have not made anything particular for GPUs or whatsoever: it is +starpu_free_on_node() which knows how to actually make the allocation, and +returns the resulting pointer, be it in main memory, in GPU memory, etc. + +\subsection DefiningANewDataInterface_copy Data copy + +Now that StarPU knows how to allocate/free a buffer, it needs to be able to +copy over data into/from it. Defining a method \c copy_any_to_any() allows StarPU to +perform direct transfers between main memory and GPU memory. + +\code{.c} +static int copy_any_to_any(void *src_interface, unsigned src_node, + void *dst_interface, unsigned dst_node, + void *async_data) +{ + struct starpu_complex_interface *src_complex = src_interface; + struct starpu_complex_interface *dst_complex = dst_interface; + int ret = 0; + + + if (starpu_interface_copy((uintptr_t) src_complex->real, 0, src_node, + (uintptr_t) dst_complex->real, 0, dst_node, + src_complex->nx*sizeof(src_complex->real[0]), + async_data)) + ret = -EAGAIN; + if (starpu_interface_copy((uintptr_t) src_complex->imaginary, 0, src_node, + (uintptr_t) dst_complex->imaginary, 0, dst_node, + src_complex->nx*sizeof(src_complex->imaginary[0]), + async_data)) + ret = -EAGAIN; + return ret; +} +\endcode + +We here again have no idea what is main memory or GPU memory, or even if the +copy is synchronous or asynchronous: we just call starpu_interface_copy() +according to the interface, passing it the pointers, and checking whether it +returned \c -EAGAIN, which means the copy is asynchronous, and StarPU will +appropriately wait for it thanks to the pointer \c async_data. This copy method is also available for 2D matrices starpu_interface_copy2d(), 3D matrices starpu_interface_copy3d(), 4D matrices starpu_interface_copy4d() and N-dim matrices starpu_interface_copynd(). + +starpu_interface_copy() will also manage copies between other devices such as CUDA devices, OpenCL devices, etc. But if necessary, we may manage these copies by ourselves as well. +StarPU provides three functions starpu_cuda_copy_async_sync(), starpu_cuda_copy2d_async_sync() and starpu_cuda_copy3d_async_sync() that enable copying of 1D, 2D or 3D data between main memory and CUDA device memories. They first try to copy the data asynchronous, if fail or \c stream is \c NULL then copy the data synchronously. +StarPU also provides several functions that are used to transfer data between RAM and OpenCL devices. starpu_opencl_copy_ram_to_opencl() copies data from RAM to an OpenCL device. starpu_opencl_copy_opencl_to_ram() copies data from an OpenCL device to RAM. starpu_opencl_copy_opencl_to_opencl() copies data between two OpenCL devices. starpu_opencl_copy_async_sync() copies data between two devices. If \c event is \c NULL, the copy is synchronous, and checking whether \c ret is set to \c -EAGAIN, which means the copy is asynchronous. + +This copy method is referenced in a structure \ref starpu_data_copy_methods + +\code{.c} +static const struct starpu_data_copy_methods complex_copy_methods = +{ + .any_to_any = copy_any_to_any +}; +\endcode + +which was referenced in the structure \ref starpu_data_interface_ops above. + +Other fields of \ref starpu_data_copy_methods allow providing optimized +variants, notably for the case of 2D or 3D matrix tiles with non-trivial ld. + +We can call starpu_interface_data_copy() to record in offline execution traces the copy. + +When an asynchronous implementation of the data transfer is implemented, we can call starpu_interface_start_driver_copy_async() and starpu_interface_end_driver_copy_async() to initiate and complete asynchronous data transfers between main memory and GPU memory. + +\subsection DefiningANewDataInterface_pack Data pack/peek/unpack + +The copy methods allow for RAM/GPU transfers, but is not enough for e.g. +transferring over MPI. That requires defining the pack/peek/unpack methods. The +principle is that the method starpu_data_interface_ops::pack_data concatenates +the buffer data into a newly-allocated contiguous bytes array, conversely +starpu_data_interface_ops::peek_data extracts from a bytes array into the +buffer data, and starpu_data_interface_ops::unpack_data does the same as +starpu_data_interface_ops::peek_data but also frees the bytes array. + +\code{.c} +static int complex_pack_data(starpu_data_handle_t handle, unsigned node, void **ptr, starpu_ssize_t *count) +{ + STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); + + struct starpu_complex_interface *complex_interface = (struct starpu_complex_interface *) + starpu_data_get_interface_on_node(handle, node); + + *count = complex_get_size(handle); + if (ptr != NULL) + { + char *data; + data = (void*) starpu_malloc_on_node_flags(node, *count, 0); + *ptr = data; + memcpy(data, complex_interface->real, complex_interface->nx*sizeof(double)); + memcpy(data+complex_interface->nx*sizeof(double), complex_interface->imaginary, complex_interface->nx*sizeof(double)); + } + + return 0; +} +\endcode + +\c complex_pack_data() first computes the size to be allocated, then allocates it, +and copies over into it the content of the two real and imaginary arrays. + +\code{.c} +static int complex_peek_data(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count) +{ + char *data = ptr; + STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); + + struct starpu_complex_interface *complex_interface = (struct starpu_complex_interface *) + starpu_data_get_interface_on_node(handle, node); + + STARPU_ASSERT(count == 2 * complex_interface->nx * sizeof(double)); + memcpy(complex_interface->real, data, complex_interface->nx*sizeof(double)); + memcpy(complex_interface->imaginary, data+complex_interface->nx*sizeof(double), complex_interface->nx*sizeof(double)); + + return 0; +} +\endcode + +\c complex_peek_data() simply uses \c memcpy() to copy over from the bytes array into the data buffer. + +\code{.c} +static int complex_unpack_data(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count) +{ + complex_peek_data(handle, node, ptr, count); + + starpu_free_on_node_flags(node, (uintptr_t) ptr, count, 0); + + return 0; +} +\endcode + +And \c complex_unpack_data() just calls \c complex_peek_data() and releases the bytes array. + +\subsection DefiningANewDataInterface_pointers Pointers inside the data interface + +In the example described above, the two pointers stored in the data interface +are data buffers, which may point into main memory, GPU memory, etc. One may also +want to store pointers to meta-data for the interface, for instance the list of +dimensions sizes for the n-dimension matrix interface, but such pointers are to +be handled completely differently. More examples are provided in src/datawizard/interfaces/*_interface.c + +More precisely, there are two types of pointers: + +
    +
  • Data pointers, which point to the actual data in RAM/GPU/etc. memory. They +may be NULL when the data is not allocated (yet). StarPU will automatically +call starpu_data_interface_ops::allocate_data_on_node to allocate the data +pointers whenever needed, and call starpu_data_interface_ops::free_data_on_node +when memory gets scarce. For instance, for the n-dimension matrix interface +the pointers to the actual data (\c ptr, \c dev_handle, \c offset) are data +pointers. + +
  • Meta-data pointers, which always point to RAM memory. They are usually +always allocated so that they can always be used. For instance, for the +n-dimension matrix interface the array of dimension sizes and the array of ld +are meta-data pointers. These are typically allocated at data registration time +in starpu_data_interface_ops::register_data_handle, and released at data +unregistration time in starpu_data_interface_ops::unregister_data_handle +
+ +This means that: + +
    +
  • The starpu_data_interface_ops::register_data_handle method has to allocate +the meta-data pointers. If users provided a buffer for the initial value +of the handle, starpu_data_interface_ops::register_data_handle sets the data +pointers of the home_node interface to that buffer. +
  • The interface can additionally provide a \c ptr_register helper to set the +data pointer of a given node. One can call starpu_data_ptr_register() to realise. +
  • The starpu_data_interface_ops::unregister_data_handle method has to +deallocate the meta-data pointers. +
  • The starpu_data_interface_ops::allocate_data_on_node method has to allocate +the data pointers on the given node. +
  • The starpu_data_interface_ops::free_data_on_node method has to deallocate +the data pointers on the given node. +
  • The optional starpu_data_interface_ops::cache_data_on_node transfers the data +pointers from a source interface to a cached interface. If undefined, a mere +memcpy is used instead. This can notably take +the opportunity to clear pointers in the source interface. This also needs to +copy the properties that starpu_data_interface_ops::compare (or +starpu_data_interface_ops::alloc_compare if defined) needs for comparing +interfaces for caching compatibility. +
  • The starpu_data_interface_ops::reuse_data_on_node transfers the data +pointers from a cached interface to the destination interface. If undefined, a +mere memcpy is used instead. +
  • The starpu_data_interface_ops::map_data has to map the data pointers on the +given node. One should define function starpu_interface_map() to set this field. +
  • The starpu_data_interface_ops::unmap_data has to unmap the data pointers on +the given node. One should define function starpu_interface_unmap() to set this field. +
  • The starpu_data_interface_ops::update_map has to update the data pointers on +the given node. One should define function starpu_interface_update_map() to set this field. +
  • The filtering functions have to allocate the meta-data pointers for the +child interface, and when the parent interface has data pointers, it has to set +the child data pointers to point into the parent data buffers. +
+ +Put another way: + +
    +
  • starpu_data_register() initializes the handle structure and calls +starpu_data_interface_ops::register_data_handle. +
  • Then StarPU may call starpu_data_interface_ops::allocate_data_on_node and +starpu_data_interface_ops::free_data_on_node as it sees fit when it needs +the data allocated on some node or not. +
  • Eventually, starpu_data_unregister() releases the handle buffers for all +nodes (except the home node given to starpu_data_register() ), which either +means calling starpu_data_interface_ops::free_data_on_node (if allocation +cache is disabled), or putting them into the allocation cache. It then calls +starpu_data_interface_ops::unregister_data_handle, and releases the handle +structure. +
+ +Note: for compressed matrices such as CSR, BCSR, COO, the \c colind and \c +rowptr arrays are not meta-data pointers, but data pointers like \c nzval, +because they need to be available in GPU memory for the GPU kernels. + +Note: when the interface does not contain meta-data pointers, +starpu_data_interface_ops::reuse_data_on_node does not need to +be implemented, StarPU will just use a memcpy. Otherwise, either +starpu_data_interface_ops::reuse_data_on_node must be used to transfer only the +data pointers and not the meta-data pointers, or the allocation cache should be +disabled by setting starpu_data_interface_ops::dontcache to 1. + +Note: It should be noted that because of the allocation cache, +starpu_data_interface_ops::free_data_on_node may be called on an interface which +is not attached to a handle anymore. This means that the meta-data pointers +will have been deallocated by starpu_data_interface_ops::unregister_data_handle, +and cannot be used by starpu_data_interface_ops::free_data_on_node to e.g. +compute the size to be deallocated. For instance, the n-dimension matrix +interface uses an additional scalar allocsize field to store the allocation +size, thus still available even when the interface is in the allocation cache. + +Note: if starpu_data_interface_ops::unregister_data_handle is +implemented and checks that pointers are NULL, +starpu_data_interface_ops::cache_data_on_node needs to be implemented to clear +the pointers when caching the allocation. + +\subsection DefiningANewDataInterface_helpers Helpers + +We can get the unique identifier of the interface associated with the data handle by calling starpu_data_get_interface_id(), and get the next available identifier for a newly created data interface by calling starpu_data_interface_get_next_id(). + + +\section TheMultiformatInterface The Multiformat Interface + +It may be interesting to represent the same piece of data using two different +data structures: one only used on CPUs, and one only used on GPUs. +This can be done by using the multiformat interface. StarPU +will be able to convert data from one data structure to the other when needed. +Note that the scheduler dmda is the only one optimized for this +interface. Users must provide StarPU with conversion codelets: + +\snippet multiformat.c To be included. You should update doxygen if you see this text. + +Kernels can be written almost as for any other interface. Note that +::STARPU_MULTIFORMAT_GET_CPU_PTR shall only be used for CPU kernels. CUDA kernels +must use ::STARPU_MULTIFORMAT_GET_CUDA_PTR, and OpenCL kernels must use +::STARPU_MULTIFORMAT_GET_OPENCL_PTR. ::STARPU_MULTIFORMAT_GET_NX may +be used in any kind of kernel. + +\code{.c} +static void +multiformat_scal_cpu_func(void *buffers[], void *args) +{ + struct point *aos; + unsigned int n; + + aos = STARPU_MULTIFORMAT_GET_CPU_PTR(buffers[0]); + n = STARPU_MULTIFORMAT_GET_NX(buffers[0]); + ... +} + +extern "C" void multiformat_scal_cuda_func(void *buffers[], void *_args) +{ + unsigned int n; + struct struct_of_arrays *soa; + + soa = (struct struct_of_arrays *) STARPU_MULTIFORMAT_GET_CUDA_PTR(buffers[0]); + n = STARPU_MULTIFORMAT_GET_NX(buffers[0]); + + ... +} +\endcode + +A full example may be found in examples/basic_examples/multiformat.c. + +\section SpecifyingATargetNode Specifying A Target Node For Task Data + +When executing a task on GPU, for instance, StarPU would normally copy all the +needed data for the tasks to the embedded memory of the GPU. It may however +happen that the task kernel would rather have some of the data kept in the +main memory instead of copied in the GPU, a pivoting vector for instance. +This can be achieved by setting the flag starpu_codelet::specific_nodes to +1, and then fill the array starpu_codelet::nodes (or starpu_codelet::dyn_nodes when +starpu_codelet::nbuffers is greater than \ref STARPU_NMAXBUFS) with the node numbers +where data should be copied to, or ::STARPU_SPECIFIC_NODE_LOCAL to let +StarPU copy it to the memory node where the task will be executed. + +The function starpu_task_get_current_data_node() can be used to retrieve the memory node associated with the current task being executed. + +::STARPU_SPECIFIC_NODE_CPU can also be used to request data to be +put in CPU-accessible memory (and let StarPU choose the NUMA node). +::STARPU_SPECIFIC_NODE_FAST and ::STARPU_SPECIFIC_NODE_SLOW can also be +used + +For instance, +with the following codelet: + +\code{.c} +struct starpu_codelet cl = +{ + .cuda_funcs = { kernel }, + .nbuffers = 2, + .modes = {STARPU_RW, STARPU_RW}, + .specific_nodes = 1, + .nodes = {STARPU_SPECIFIC_NODE_CPU, STARPU_SPECIFIC_NODE_LOCAL}, +}; +\endcode + +the first data of the task will be kept in the CPU memory, while the second +data will be copied to the CUDA GPU as usual. A working example is available in +tests/datawizard/specific_node.c + +With the following codelet: + +\code{.c} +struct starpu_codelet cl = +{ + .cuda_funcs = { kernel }, + .nbuffers = 2, + .modes = {STARPU_RW, STARPU_RW}, + .specific_nodes = 1, + .nodes = {STARPU_SPECIFIC_NODE_LOCAL, STARPU_SPECIFIC_NODE_SLOW}, +}; +\endcode + +The first data will be copied into fast (but probably size-limited) local memory, +while the second data will be left in slow (but large) memory. This makes sense +when the kernel does not make so many accesses to the second data, and thus data +being remote e.g. over a PCI bus is not a performance problem, and avoids +filling the fast local memory with data which does not need the performance. + +In cases where the kernel is fine with some data being either local or in the +main memory, ::STARPU_SPECIFIC_NODE_LOCAL_OR_CPU can be used. StarPU will then +be free to leave the data in the main memory and let the kernel access it from +accelerators, or to move it to the accelerator before starting the kernel, for +instance: + +\code{.c} +struct starpu_codelet cl = +{ + .cuda_funcs = { kernel }, + .nbuffers = 2, + .modes = {STARPU_RW, STARPU_R}, + .specific_nodes = 1, + .nodes = {STARPU_SPECIFIC_NODE_LOCAL, STARPU_SPECIFIC_NODE_LOCAL_OR_CPU}, +}; +\endcode + +An example for specifying target node is available in tests/datawizard/specific_node.c. + +*/ diff --git a/doc/doxygen/chapters/starpu_extensions/advanced_scheduling.doxy b/doc/doxygen/chapters/starpu_extensions/advanced_scheduling.doxy new file mode 100644 index 0000000..60ce1a8 --- /dev/null +++ b/doc/doxygen/chapters/starpu_extensions/advanced_scheduling.doxy @@ -0,0 +1,314 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/*! \page AdvancedScheduling Advanced Scheduling + +\section Energy-basedScheduling Energy-based Scheduling + +Note: by default, StarPU does not let CPU workers sleep, to let them react to +task release as quickly as possible. For idle time to really let CPU cores save +energy, one needs to use the \c configure option \ref enable-blocking-drivers +"--enable-blocking-drivers". + +If the application can provide some energy consumption performance model (through +the field starpu_codelet::energy_model), StarPU will +take it into account when distributing tasks. The target function that +the scheduler \b dmda minimizes becomes alpha * T_execution + +beta * T_data_transfer + gamma * Consumption , where Consumption +is the estimated task consumption in Joules. To tune this parameter, use +export STARPU_SCHED_GAMMA=3000 (\ref STARPU_SCHED_GAMMA) for instance, to express that each Joule +(i.e. kW during 1000us) is worth 3000us execution time penalty. Setting +alpha and beta to zero permits to only take into account energy consumption. + +This is however not sufficient to correctly optimize energy: the scheduler would +simply tend to run all computations on the most energy-conservative processing +unit. To account for the consumption of the whole machine (including idle +processing units), the idle power of the machine should be given by setting +export STARPU_IDLE_POWER=200 (\ref STARPU_IDLE_POWER) for 200W, for instance. This value can often +be obtained from the machine power supplier, e.g. by running + +\verbatim +ipmitool -I lanplus -H mymachine-ipmi -U myuser -P mypasswd sdr type Current +\endverbatim + +The energy actually consumed by the total execution can be displayed by setting +export STARPU_PROFILING=1 STARPU_WORKER_STATS=1 (\ref STARPU_PROFILING and \ref STARPU_WORKER_STATS). + +For OpenCL devices, on-line task consumption measurement is currently supported through the OpenCL extension +CL_PROFILING_POWER_CONSUMED, implemented in the MoviSim simulator. + + +For CUDA devices, on-line task consumption measurement is supported on V100 +cards and beyond. This however only works for quite long tasks, since the +measurement granularity is about 10ms. + +Applications can however provide explicit measurements by feeding the energy +performance model by hand. +Fine-grain measurement is often not feasible with the feedback provided by +the hardware, so users can for instance run a given task a thousand times, +measure the global consumption for that series of tasks, divide it by a +thousand, repeat for varying kinds of tasks and task sizes, and eventually feed +StarPU with these manual measurements. For CUDA devices starting with V100, +the starpu_energy_start() and starpu_energy_stop() +helpers, described in \ref MeasuringEnergyandPower below, make it easy. + +For older models, one can use nvidia-smi -q -d POWER to get the current +consumption in Watt. Multiplying this value by the average duration of a +single task gives the consumption of the task in Joules, which can be given to +starpu_perfmodel_update_history(). (examplified in \ref PerformanceModelExample +with the performance model energy_model). + +Another way to provide the energy performance is to define a +perfmodel with starpu_perfmodel::type ::STARPU_PER_ARCH or +::STARPU_PER_WORKER , and set the field starpu_perfmodel::arch_cost_function or +starpu_perfmodel::worker_cost_function to a function which shall return +the estimated consumption of the task in Joules. Such a function can for instance +use starpu_task_expected_length() on the task (in µs), multiplied by the +typical power consumption of the device, e.g. in W, and divided by 1000000. to +get Joules. An example is in the file tests/perfmodels/regression_based_energy.c. + +There are other functions in StarPU that are used to measure the energy consumed by the system during execution. The starpu_energy_use() function declares that there are the energy consumptions of the task, while the starpu_energy_used() function returns the total energy consumed since the start of measurement. + +\subsection MeasuringEnergyandPower Measuring energy and power with StarPU + +We have extended the performance model of StarPU to measure energy and power values of CPUs. These values are measured using the existing Performance API (PAPI) analysis library. PAPI provides the tool designer and application engineer with a consistent interface and methodology for use of the performance counter hardware found in most major microprocessors. PAPI enables software engineers to see, in near real time, the relation between software performance and processor events. + + +- To measure energy consumption of CPUs, we use the RAPL events, which are available on CPU architecture: +RAPL_ENERGY_PKG that represents the whole CPU socket power consumption, +and RAPL_ENERGY_DRAM that represents the RAM power consumption. + + + +PAPI provides a generic, portable interface for the hardware performance counters available on all modern CPUs and some other components of interest that are scattered across the chip +and system. + + +In order to use the right rapl events for energy measurement, user should check the rapl events available on the machine, using this command: + +\verbatim +$ papi_native_avail +\endverbatim + +Depending on the system configuration, users may have to run this as root to get the performance counter values. + +Since the measurement is for all the CPUs and the memory, the approach taken +here is to run a series of tasks on all of them and to take the overall measurement. + +- The example below illustrates the energy and power measurements, using the functions starpu_energy_start() and starpu_energy_stop(). + +In this example, we launch several tasks of the same type in parallel. To perform the energy requirement measurement of a program, we call starpu_energy_start(), which initializes energy measurement counters and starpu_energy_stop(struct starpu_perfmodel *model, struct starpu_task *task, unsigned nimpl, unsigned ntasks, int workerid, enum starpu_worker_archtype archi) to stop counting and update the performance model. This ends up yielding the average energy requirement of a single task. The example below illustrates this for a given task type. + +\code{.c} + unsigned N = starpu_cpu_worker_get_count() * 40; + + starpu_energy_start(-1, STARPU_CPU_WORKER); + + for (i = 0; i < N; i++) + starpu_task_insert(&cl, STARPU_EXECUTE_WHERE, STARPU_CPU, STARPU_R, arg1, STARPU_RW, arg2, 0); + + starpu_task_t *specimen = starpu_task_build(&cl, STARPU_R, arg1, STARPU_RW, arg2, 0); + starpu_energy_stop(&codelet.energy_model, specimen, 0, N, -1, STARPU_CPU_WORKER); + + . . . +\endcode + +The example starts 40 times more tasks of the same type than there are CPU execution units. Once the tasks are distributed over all CPUs, the latter are all executing the same type of tasks (with the same data size and parameters); each CPU will in the end execute 40 tasks. A specimen task is then constructed and passed to starpu_energy_stop(), which will fold into the performance model the energy requirement measurement for that type and size of task. + +For the energy and power measurements, depending on the system configuration, users may have to run applications as root to use PAPI library. + +The function starpu_energy_stop() uses PAPI_stop() to stop counting and store the values into the array. We calculate both energy in Joules and power consumption in Watt. We call the function starpu_perfmodel_update_history() in the performance model to provide explicit measurements. + +- In the CUDA case, nvml provides per-GPU energy measurement. We can thus calibrate the performance models per GPU: + +\code{.c} + unsigned N = 40; + + for (i = 0; i < starpu_cuda_worker_get_count(); i++) { + int workerid = starpu_worker_get_by_type(STARPU_CUDA_WORKER, i); + + starpu_energy_start(workerid, STARPU_CUDA_WORKER); + + for (i = 0; i < N; i++) + starpu_task_insert(&cl, STARPU_EXECUTE_ON_WORKER, workerid, STARPU_R, arg1, STARPU_RW, arg2, 0); + + starpu_task_t *specimen = starpu_task_build(&cl, STARPU_R, arg1, STARPU_RW, arg2, 0); + starpu_energy_stop(&codelet.energy_model, specimen, 0, N, workerid, STARPU_CUDA_WORKER); + + } +\endcode + +- A complete example is available in tests/perfmodels/regression_based_memset.c + +\section StaticScheduling Static Scheduling + +In some cases, one may want to force some scheduling, for instance force a given +set of tasks to GPU0, another set to GPU1, etc. while letting some other tasks +be scheduled on any other device. This can indeed be useful to guide StarPU into +some work distribution, while still letting some degree of dynamism. For +instance, to force execution of a task on CUDA0: + +\code{.c} +task->execute_on_a_specific_worker = 1; +task->workerid = starpu_worker_get_by_type(STARPU_CUDA_WORKER, 0); +\endcode + +An example is in the file tests/errorcheck/invalid_tasks.c. + +or equivalently + +\code{.c} +starpu_task_insert(&cl, ..., STARPU_EXECUTE_ON_WORKER, starpu_worker_get_by_type(STARPU_CUDA_WORKER, 0), ...); +\endcode + +One can also specify a set of worker(s) which are allowed to take the task, as an +array of bit, for instance to allow workers 2 and 42: + +\code{.c} +task->workerids = calloc(2,sizeof(uint32_t)); +task->workerids[2/32] |= (1 << (2%32)); +task->workerids[42/32] |= (1 << (42%32)); +task->workerids_len = 2; +\endcode + +One can also specify the order in which tasks must be executed by setting the field +starpu_task::workerorder. An example is available in the file tests/main/execute_schedule.c. If this field is set to a non-zero value, it +provides the per-worker consecutive order in which tasks will be executed, +starting from 1. For a given of such task, the worker will thus not execute +it before all the tasks with smaller order value have been executed, notably +in case those tasks are not available yet due to some dependencies. This +eventually gives total control of task scheduling, and StarPU will only serve as +a "self-timed" task runtime. Of course, the provided order has to be runnable, +i.e. a task should not depend on another task bound to the same worker +with a bigger order. + +Note however that using scheduling contexts while statically scheduling tasks on workers +could be tricky. Be careful to schedule the tasks exactly on the workers of the corresponding +contexts, otherwise the workers' corresponding scheduling structures may not be allocated or +the execution of the application may deadlock. Moreover, the hypervisor should not be used when +statically scheduling tasks. + +\section configuringHeteroprio Configuring Heteroprio + +Within Heteroprio, one priority per processing unit type is assigned to each task, such that a task has several +priorities. Each worker pops the task that has the highest priority for the hardware type it uses, which +could be CPU or CUDA for example. Therefore, the priorities has to be used to manage the critical path, +but also to promote the consumption of tasks by the more appropriate workers. + +The tasks are stored inside buckets, where each bucket corresponds to a priority set. Then each +worker uses an indirect access array to know the order in which it should access the buckets. Moreover, +all the tasks inside a bucket must be compatible with all the processing units that may access it (at least). + +These priorities are now automatically assigned by Heteroprio in auto calibration mode using heuristics. +If you want to set these priorities manually, you can change \ref STARPU_HETEROPRIO_USE_AUTO_CALIBRATION +and follow the example below. + +In this example code, we have 5 types of tasks. +CPU workers can compute all of them, but CUDA workers can only execute +tasks of types 0 and 1, and are expected to go 20 and 30 time +faster than the CPU, respectively. +\code{.c} +#include + + // Before calling starpu_init +struct starpu_conf conf; +starpu_conf_init(&conf); + // Inform StarPU to use Heteroprio +conf.sched_policy_name = "heteroprio"; + // Inform StarPU about the function that will init the priorities in Heteroprio + // where init_heteroprio is a function to implement +conf.sched_policy_callback = &init_heteroprio; + // Do other things with conf if needed, then init StarPU +starpu_init(&conf); +\endcode + +\code{.c} +void init_heteroprio(unsigned sched_ctx) { + // CPU uses 5 buckets and visits them in the natural order + starpu_heteroprio_set_nb_prios(sched_ctx, STARPU_CPU_WORKER, 5); + // It uses direct mapping idx => idx + for(unsigned idx = 0; idx < 5; ++idx){ + starpu_heteroprio_set_mapping(sched_ctx, STARPU_CPU_WORKER, idx, idx); + // If there is no CUDA worker we must tell that CPU is faster + starpu_heteroprio_set_faster_arch(sched_ctx, STARPU_CPU_WORKER, idx); + } + + if(starpu_cuda_worker_get_count()){ + // CUDA is enabled and uses 2 buckets + starpu_heteroprio_set_nb_prios(sched_ctx, STARPU_CUDA_WORKER, 2); + // CUDA will first look at bucket 1 + starpu_heteroprio_set_mapping(sched_ctx, STARPU_CUDA_WORKER, 0, 1); + // CUDA will then look at bucket 2 + starpu_heteroprio_set_mapping(sched_ctx, STARPU_CUDA_WORKER, 1, 2); + + // For bucket 1 CUDA is the fastest + starpu_heteroprio_set_faster_arch(sched_ctx, STARPU_CUDA_WORKER, 1); + // And CPU is 30 times slower + starpu_heteroprio_set_arch_slow_factor(sched_ctx, STARPU_CPU_WORKER, 1, 30.0f); + + // For bucket 0 CUDA is the fastest + starpu_heteroprio_set_faster_arch(sched_ctx, STARPU_CUDA_WORKER, 0); + // And CPU is 20 times slower + starpu_heteroprio_set_arch_slow_factor(sched_ctx, STARPU_CPU_WORKER, 0, 20.0f); + } +} +\endcode + +Then, when a task is inserted, the priority of the task will be used to +select in which bucket is has to be stored. +So, in the given example, the priority of a task will be between 0 and 4 included. +However, tasks of priorities 0-1 must provide CPU and CUDA kernels, and +tasks of priorities 2-4 must provide CPU kernels (at least). The full source code of this example is available in the file examples/scheduler/heteroprio_test.c + +\subsection LAHeteroprio Using locality aware Heteroprio + +Heteroprio supports a mode where locality is evaluated to guide the distribution +of the tasks (see https://peerj.com/articles/cs-190.pdf). +Currently, this mode is available using the dedicated function or an environment variable +\ref STARPU_HETEROPRIO_USE_LA, and can be configured using environment variables. +\code{.c} +void starpu_heteroprio_set_use_locality(unsigned sched_ctx_id, unsigned use_locality); +\endcode + +In this mode, multiple strategies are available to determine which memory node's workers are the most qualified for executing a specific task. This strategy can be set with \ref STARPU_LAHETEROPRIO_PUSH +and available strategies are: +- WORKER: the worker which pushed the task is preferred for the execution. +- LcS: the node with the shortest data transfer time (estimated by StarPU) is the most qualified +- LS_SDH: the node with the smallest data amount to be transferred will be preferred. +- LS_SDH2: similar to LS_SDH, but data in write access is counted in a quadratic manner to give them more importance. +- LS_SDHB: similar to LS_SDH, but data in write access is balanced with a coefficient (its value is set to 1000) and +for the same amount of data, the one with fewer pieces of data to be transferred will be preferred. +- LC_SMWB: similar to LS_SDH, but the amount of data in write access gets multiplied by a coefficient which gets closer to 2 +as the amount of data in read access gets larger than the data in write access. +- AUTO: strategy by default, this one selects the best strategy and changes it in runtime to improve performance + +Other environment variables to configure LaHeteteroprio are documented in \ref ConfiguringLaHeteroprio + +\subsection AutoHeteroprio Using Heteroprio in auto-calibration mode + +In this mode, Heteroprio saves data about each program execution, in order to improve future ones. +By default, these files are stored in the folder used by perfmodel, but this can be changed using the +\ref STARPU_HETEROPRIO_DATA_DIR environment variable. You can also specify the data filename directly using +\ref STARPU_HETEROPRIO_DATA_FILE. + +Additionally, to assign priorities to tasks, Heteroprio needs a way to detect that some tasks are similar. +By default, Heteroprio looks for tasks with the same perfmodel, or with the same codelet's name if no perfmodel was assigned. +This behavior can be changed to only consider the codelet's name by setting +\ref STARPU_HETEROPRIO_CODELET_GROUPING_STRATEGY to 1 + +Other environment variables to configure AutoHeteteroprio are documented in \ref ConfiguringAutoHeteroprio + +*/ diff --git a/doc/doxygen/chapters/starpu_extensions/advanced_tasks.doxy b/doc/doxygen/chapters/starpu_extensions/advanced_tasks.doxy new file mode 100644 index 0000000..5e711af --- /dev/null +++ b/doc/doxygen/chapters/starpu_extensions/advanced_tasks.doxy @@ -0,0 +1,475 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/*! \page AdvancedTasksInStarPU Advanced Tasks In StarPU + +\section TaskDependencies Task Dependencies + +\subsection SequentialConsistency Sequential Consistency + +By default, task dependencies are inferred from data dependency (sequential +coherency) by StarPU. The application can however disable sequential coherency +for some data, and dependencies can be specifically expressed. + +Setting (or unsetting) sequential consistency can be done at the data +level by calling starpu_data_set_sequential_consistency_flag() for a +specific data (an example is in the file examples/dependency/task_end_dep.c) or starpu_data_set_default_sequential_consistency_flag() +for all data (an example is in the file tests/main/subgraph_repeat.c). + +The sequential consistency mode can also be gotten by calling starpu_data_get_sequential_consistency_flag() for a specific data or get the default sequential consistency flag by calling starpu_data_get_default_sequential_consistency_flag(). + +Setting (or unsetting) sequential consistency can also be done at task +level by setting the field starpu_task::sequential_consistency to \c 0 (an example is in the file tests/main/deploop.c). + +Sequential consistency can also be set (or unset) for each handle of a +specific task, this is done by using the field +starpu_task::handles_sequential_consistency. When set, its value +should be an array with the number of elements being the number of +handles for the task, each element of the array being the sequential +consistency for the \c i-th handle of the task. The field can easily be +set when calling starpu_task_insert() with the flag +::STARPU_HANDLES_SEQUENTIAL_CONSISTENCY + +\code{.c} +char *seq_consistency = malloc(cl.nbuffers * sizeof(char)); +seq_consistency[0] = 1; +seq_consistency[1] = 1; +seq_consistency[2] = 0; +ret = starpu_task_insert(&cl, + STARPU_RW, handleA, STARPU_RW, handleB, STARPU_RW, handleC, + STARPU_HANDLES_SEQUENTIAL_CONSISTENCY, seq_consistency, + 0); +free(seq_consistency); +\endcode + +A full code example is available in the file examples/dependency/sequential_consistency.c. + +The internal algorithm used by StarPU to set up implicit dependency is +as follows: +\code{.c} +if (sequential_consistency(task) == 1) + for(i=0 ; itests/main/tag_task_data_deps.c shows how to set dependencies between tasks with different functions. + +The termination of a task can be delayed through the function +starpu_task_end_dep_add() which specifies the number of calls to the function +starpu_task_end_dep_release() needed to trigger the task termination. One can +also use starpu_task_declare_end_deps() or starpu_task_declare_end_deps_array() +to delay the termination of a task until the termination of other tasks. A simple example is available in the file tests/main/task_end_dep.c. + +starpu_tag_notify_from_apps() can be used to explicitly unlock a specific tag, but if it is called several times on the same tag, notification will be done only on first call. However, one can call starpu_tag_restart() to clear the already notified status of a tag which is not associated with a task, and then calling starpu_tag_notify_from_apps() again will notify the successors. Alternatively, starpu_tag_notify_restart_from_apps() can be used to atomically call both starpu_tag_notify_from_apps() and starpu_tag_restart() on a specific tag. + +To get the task associated to a specific tag, one can call starpu_tag_get_task(). Once the corresponding task has been executed and when there is no other tag that depend on this tag anymore, one can call starpu_tag_remove() to release the resources associated to the specific tag. One can use starpu_tag_clear() to clear all the tags (but it requires that no starpu_tag_wait_array() call is currently pending). + +\section WaitingForTasks Waiting For Tasks + +StarPU provides several advanced functions to wait for termination of tasks. +One can wait for some explicit tasks, or for some tag attached to some tasks, or +for some data results. + +starpu_task_wait_array() is a function that waits for an array of tasks +to complete their execution. starpu_task_wait_for_all_in_ctx() is a +function that waits for all tasks in a specific context to complete their +execution. starpu_task_wait_for_n_submitted_in_ctx() is a function that +waits for a specified number of tasks to be submitted to a specific context. +starpu_task_wait_for_no_ready() is a function that waits for all tasks to +become unready, which means that they are either completed or blocked on a +data dependency. In order to successfully call these functions to wait for +termination of tasks, starpu_task::detach should be set to 0 before task +submission. + +The function starpu_task_nready() returns the number of tasks that are ready to execute, which means that all their data dependencies are satisfied and they are waiting to be scheduled, while the function starpu_task_nsubmitted() returns the number of tasks that have been submitted and not completed yet. + +The function starpu_task_finished() can be used to determine whether a specific task has completed its execution. + +starpu_tag_wait() and starpu_tag_wait_array() are two blocking functions that can be used to wait for tasks with specific tags to complete their execution. The former one waits for a specified task to complete while the latter one waits for a group of tasks to complete. + +When using e.g. starup_task_insert(), it may be more convenient to wait for the +result of a task rather than waiting for a given task explicitly. That +can be done thanks to starpu_data_acquire() or starpu_data_acquire_cb() +that wait for the result to be available in the home node of the data. That +will thus wait for all the tasks that lead to that result. One can also +use starpu_data_acquire_on_node() and give it ::STARPU_ACQUIRE_NO_NODE to +tell to just wait for tasks to complete, but not wait for the data to be +available in the home node. One can also use starpu_data_acquire_try() or +starpu_data_acquire_on_node_try() to just test for the termination. + +If a task is created by using starpu_task_create() or starpu_task_insert(), +the field starpu_task::destroy is set to 1 by default, which means that the +task structure will be automatically freed after termination. On the other +hand, if the task is initialized by using starpu_task_init(), the field +starpu_task::destroy is set to 0 by default, which means that the task structure +will not be freed until starpu_task_destroy() is called explicitly. Otherwise, +we can manually set starpu_task::destroy to 1 before submission or call +starpu_task_set_destroy() after submission to activate the automatic freeing of +the task structure. + +\section UsingMultipleImplementationsOfACodelet Using Multiple Implementations Of A Codelet + +One may want to write multiple implementations of a codelet for a single type of +device and let StarPU choose which one to run. As an example, we will show how +to use SSE to scale a vector. The codelet can be written as follows: + +\code{.c} +#include + +void scal_sse_func(void *buffers[], void *cl_arg) +{ + float *vector = (float *) STARPU_VECTOR_GET_PTR(buffers[0]); + unsigned int n = STARPU_VECTOR_GET_NX(buffers[0]); + unsigned int n_iterations = n/4; + if (n % 4 != 0) + n_iterations++; + + __m128 *VECTOR = (__m128*) vector; + __m128 factor __attribute__((aligned(16))); + factor = _mm_set1_ps(*(float *) cl_arg); + + unsigned int i; + for (i = 0; i < n_iterations; i++) + VECTOR[i] = _mm_mul_ps(factor, VECTOR[i]); +} +\endcode + +\code{.c} +struct starpu_codelet cl = +{ + .cpu_funcs = { scal_cpu_func, scal_sse_func }, + .cpu_funcs_name = { "scal_cpu_func", "scal_sse_func" }, + .nbuffers = 1, + .modes = { STARPU_RW } +}; +\endcode + +The full code of this example is available in the file examples/basic_examples/vector_scal.c. + +Schedulers which are multi-implementation aware (only dmda and +pheft for now) will use the performance models of all the +provided implementations, and pick the one which seems to be the fastest. + +\section EnablingImplementationAccordingToCapabilities Enabling Implementation According To Capabilities + +Some implementations may not run on some devices. For instance, some CUDA +devices do not support double floating point precision, and thus the kernel +execution would just fail; or the device may not have enough shared memory for +the implementation being used. The field starpu_codelet::can_execute +permits to express this. For instance: + +\code{.c} +static int can_execute(unsigned workerid, struct starpu_task *task, unsigned nimpl) +{ + const struct cudaDeviceProp *props; + if (starpu_worker_get_type(workerid) == STARPU_CPU_WORKER) + return 1; + /* Cuda device */ + props = starpu_cuda_get_device_properties(workerid); + if (props->major >= 2 || props->minor >= 3) + /* At least compute capability 1.3, supports doubles */ + return 1; + /* Old card, does not support doubles */ + return 0; +} + +struct starpu_codelet cl = +{ + .can_execute = can_execute, + .cpu_funcs = { cpu_func }, + .cpu_funcs_name = { "cpu_func" }, + .cuda_funcs = { gpu_func } + .nbuffers = 1, + .modes = { STARPU_RW } +}; +\endcode + +A full example is available in the file examples/reductions/dot_product.c. + +This can be essential e.g. when running on a machine which mixes various models +of CUDA devices, to take benefit from the new models without crashing on old models. + +Note: the function starpu_codelet::can_execute is called by the +scheduler each time it tries to match a task with a worker, and should +thus be very fast. The function starpu_cuda_get_device_properties() +provides quick access to CUDA properties of CUDA devices to achieve +such efficiency. + +Another example is to compile CUDA code for various compute capabilities, +resulting with two CUDA functions, e.g. scal_gpu_13 for compute capability +1.3, and scal_gpu_20 for compute capability 2.0. Both functions can be +provided to StarPU by using starpu_codelet::cuda_funcs, and +starpu_codelet::can_execute can then be used to rule out the +scal_gpu_20 variant on a CUDA device which will not be able to execute it: + +\code{.c} +static int can_execute(unsigned workerid, struct starpu_task *task, unsigned nimpl) +{ + const struct cudaDeviceProp *props; + if (starpu_worker_get_type(workerid) == STARPU_CPU_WORKER) + return 1; + /* Cuda device */ + if (nimpl == 0) + /* Trying to execute the 1.3 capability variant, we assume it is ok in all cases. */ + return 1; + /* Trying to execute the 2.0 capability variant, check that the card can do it. */ + props = starpu_cuda_get_device_properties(workerid); + if (props->major >= 2 || props->minor >= 0) + /* At least compute capability 2.0, can run it */ + return 1; + /* Old card, does not support 2.0, will not be able to execute the 2.0 variant. */ + return 0; +} + +struct starpu_codelet cl = +{ + .can_execute = can_execute, + .cpu_funcs = { cpu_func }, + .cpu_funcs_name = { "cpu_func" }, + .cuda_funcs = { scal_gpu_13, scal_gpu_20 }, + .nbuffers = 1, + .modes = { STARPU_RW } +}; +\endcode + +Another example is having specialized implementations for some given common +sizes, for instance here we have a specialized implementation for 1024x1024 +matrices: + +\code{.c} +static int can_execute(unsigned workerid, struct starpu_task *task, unsigned nimpl) +{ + const struct cudaDeviceProp *props; + if (starpu_worker_get_type(workerid) == STARPU_CPU_WORKER) + return 1; + /* Cuda device */ + switch (nimpl) + { + case 0: + /* Trying to execute the generic capability variant. */ + return 1; + case 1: + { + /* Trying to execute the size == 1024 specific variant. */ + struct starpu_matrix_interface *interface = starpu_data_get_interface_on_node(task->handles[0]); + return STARPU_MATRIX_GET_NX(interface) == 1024 && STARPU_MATRIX_GET_NY(interface == 1024); + } + } +} + +struct starpu_codelet cl = +{ + .can_execute = can_execute, + .cpu_funcs = { cpu_func }, + .cpu_funcs_name = { "cpu_func" }, + .cuda_funcs = { potrf_gpu_generic, potrf_gpu_1024 }, + .nbuffers = 1, + .modes = { STARPU_RW } +}; +\endcode + +Note that the most generic variant should be provided first, as some schedulers are +not able to try the different variants. + +\section GettingTaskChildren Getting Task Children + +It may be interesting to get the list of tasks which depend on a given task, +notably when using implicit dependencies, since this list is computed by StarPU. +starpu_task_get_task_succs() or starpu_task_get_task_scheduled_succs() provides it. For instance: + +\code{.c} +struct starpu_task *tasks[4]; +ret = starpu_task_get_task_succs(task, sizeof(tasks)/sizeof(*tasks), tasks); +\endcode + +And the full example of getting task children is available in the file tests/main/get_children_tasks.c + +\section ParallelTasks Parallel Tasks + +StarPU can leverage existing parallel computation libraries by the means of +parallel tasks. A parallel task is a task which is run by a set of CPUs +(called a parallel or combined worker) at the same time, by using an existing +parallel CPU implementation of the computation to be achieved. This can also be +useful to improve the load balance between slow CPUs and fast GPUs: since CPUs +work collectively on a single task, the completion time of tasks on CPUs become +comparable to the completion time on GPUs, thus relieving from granularity +discrepancy concerns. hwloc support needs to be enabled to get +good performance, otherwise StarPU will not know how to better group +cores. + +Two modes of execution exist to accommodate with existing usages. + +\subsection Fork-modeParallelTasks Fork-mode Parallel Tasks + +In the Fork mode, StarPU will call the codelet function on one +of the CPUs of the combined worker. The codelet function can use +starpu_combined_worker_get_size() to get the number of threads it is +allowed to start to achieve the computation. The CPU binding mask for the whole +set of CPUs is already enforced, so that threads created by the function will +inherit the mask, and thus execute where StarPU expected, the OS being in charge +of choosing how to schedule threads on the corresponding CPUs. The application +can also choose to bind threads by hand, using e.g. sched_getaffinity to know +the CPU binding mask that StarPU chose. + +For instance, using OpenMP (full source is available in +examples/openmp/vector_scal.c): + +\snippet forkmode.c To be included. You should update doxygen if you see this text. + +Other examples include for instance calling a BLAS parallel CPU implementation +(see examples/mult/xgemm.c). + +\subsection SPMD-modeParallelTasks SPMD-mode Parallel Tasks + +In the SPMD mode, StarPU will call the codelet function on +each CPU of the combined worker. The codelet function can use +starpu_combined_worker_get_size() to get the total number of CPUs +involved in the combined worker, and thus the number of calls that are made in +parallel to the function, and starpu_combined_worker_get_rank() to get +the rank of the current CPU within the combined worker. For instance: + +\code{.c} +static void func(void *buffers[], void *args) +{ + unsigned i; + float *factor = _args; + struct starpu_vector_interface *vector = buffers[0]; + unsigned n = STARPU_VECTOR_GET_NX(vector); + float *val = (float *)STARPU_VECTOR_GET_PTR(vector); + + /* Compute slice to compute */ + unsigned m = starpu_combined_worker_get_size(); + unsigned j = starpu_combined_worker_get_rank(); + unsigned slice = (n+m-1)/m; + + for (i = j * slice; i < (j+1) * slice && i < n; i++) + val[i] *= *factor; +} + +static struct starpu_codelet cl = +{ + .modes = { STARPU_RW }, + .type = STARPU_SPMD, + .max_parallelism = INT_MAX, + .cpu_funcs = { func }, + .cpu_funcs_name = { "func" }, + .nbuffers = 1, +} +\endcode + +A full example is available in examples/spmd/vector_scal_spmd.c. + +Of course, this trivial example will not really benefit from parallel task +execution, and was only meant to be simple to understand. The benefit comes +when the computation to be done is so that threads have to e.g. exchange +intermediate results, or write to the data in a complex but safe way in the same +buffer. + +\subsection ParallelTasksPerformance Parallel Tasks Performance + +To benefit from parallel tasks, a parallel-task-aware StarPU scheduler has to +be used. When exposed to codelets with a flag ::STARPU_FORKJOIN or +::STARPU_SPMD, the schedulers pheft (parallel-heft) and peager +(parallel eager) will indeed also try to execute tasks with +several CPUs. It will automatically try the various available combined +worker sizes (making several measurements for each worker size) and +thus be able to avoid choosing a large combined worker if the codelet +does not actually scale so much. Examples using parallel-task-aware StarPU scheduler are available in tests/parallel_tasks/parallel_kernels.c and tests/parallel_tasks/parallel_kernels_spmd.c. + +This is however for now only proof of concept, and has not really been optimized yet. + +\subsection CombinedWorkers Combined Workers + +By default, StarPU creates combined workers according to the architecture +structure as detected by hwloc. It means that for each object of the hwloc +topology (NUMA node, socket, cache, ...) a combined worker will be created. If +some nodes of the hierarchy have a big arity (e.g. many cores in a socket +without a hierarchy of shared caches), StarPU will create combined workers of +intermediate sizes. The variable \ref STARPU_SYNTHESIZE_ARITY_COMBINED_WORKER +permits to tune the maximum arity between levels of combined workers. + +The combined workers actually produced can be seen in the output of the +tool starpu_machine_display (the environment variable +\ref STARPU_SCHED has to be set to a combined worker-aware scheduler such +as pheft or peager). + +\subsection ConcurrentParallelTasks Concurrent Parallel Tasks + +Unfortunately, many environments and libraries do not support concurrent +calls. + +For instance, most OpenMP implementations (including the main ones) do not +support concurrent pragma omp parallel statements without nesting them in +another pragma omp parallel statement, but StarPU does not yet support +creating its CPU workers by using such pragma. + +Other parallel libraries are also not safe when being invoked concurrently +from different threads, due to the use of global variables in their sequential +sections, for instance. + +The solution is then to use only one combined worker at a time. This can be +done by setting the field starpu_conf::single_combined_worker to 1, or +setting the environment variable \ref STARPU_SINGLE_COMBINED_WORKER +to 1. StarPU will then run only one parallel task at a time (but other +CPU and GPU tasks are not affected and can be run concurrently). The parallel +task scheduler will however still try varying combined worker +sizes to look for the most efficient ones. A full example is available in examples/spmd/vector_scal_spmd.c. + +\section SynchronizationTasks Synchronization Tasks + +For the application convenience, it may be useful to define tasks which do not +actually make any computation, but wear for instance dependencies between other +tasks or tags, or to be submitted in callbacks, etc. + +The obvious way is of course to make kernel functions empty, but such task will +thus have to wait for a worker to become ready, transfer data, etc. + +A much lighter way to define a synchronization task is to set its field starpu_task::cl +to NULL. The task will thus be a mere synchronization point, +without any data access or execution content: as soon as its dependencies become +available, it will terminate, call the callbacks, and release dependencies. + +An intermediate solution is to define a codelet with its field +starpu_codelet::where set to \ref STARPU_NOWHERE, for instance: + +\code{.c} +struct starpu_codelet cl = +{ + .where = STARPU_NOWHERE, + .nbuffers = 1, + .modes = { STARPU_R }, +} + +task = starpu_task_create(); +task->cl = &cl; +task->handles[0] = handle; +starpu_task_submit(task); +\endcode + +will create a task which simply waits for the value of handle to be +available for read. This task can then be depended on, etc. A full example is available in examples/filters/fmultiple_manual.c. + +StarPU provides starpu_task_create_sync() to create a new synchronization task, the same as the previous example but without submitting the task. The function starpu_create_sync_task() is also used to create a new synchronization task and submit it, which is a task that waits for specific tags and calls the specified callback function when the task is finished. The function starpu_create_callback_task() can create and submit a synchronization task, which is a task that completes immediately and calls the specified callback function right after. + +*/ diff --git a/doc/doxygen/chapters/starpu_extensions/bubble.doxy b/doc/doxygen/chapters/starpu_extensions/bubble.doxy new file mode 100644 index 0000000..54cd67d --- /dev/null +++ b/doc/doxygen/chapters/starpu_extensions/bubble.doxy @@ -0,0 +1,196 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2017-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/*! \page HierarchicalDAGS Hierarchical DAGS + +The STF model has the intrinsic limitation of supporting static task +graphs only, which leads to potential submission overhead and to a +static task graph which is not necessarily adapted for execution on +heterogeneous systems. + +To address these problems, we have extended the STF model to enable +tasks subgraphs at runtime. We refer to these tasks as +hierarchical tasks. This approach allows for a more dynamic +task graph. This allows to dynamically adapt the granularity to meet +the optimal size of the targeted computing resource. + +Hierarchical tasks are tasks that can transform themselves into +a new task-graph dynamically at runtime. Programmers submit a coarse +version of the DAG, called the bubbles graph, which represents the +general shape of the application tasks graph. The execution of this +bubble graph will generate and submit the computing tasks of the +application. It is up to application programmers to decide how to +build the bubble graph (i.e. how to structure the computation tasks +graph to create some groups of tasks). Dependencies between bubbles +are automatically deduced from dependencies between their computing +tasks. + +//Figure of bubble task graph and computing task graph that maps with it + +\section BubblesExamples An Example + +In order to understand the hierarchical tasks model, an example of +"bubblification" is showed here. We start from a simple example, +multiplying the elements of a vector. + +\subsection BubblesInitialVersion Initial Version + +A computation is done several times on a vector split in smaller +vectors. For each step and each sub-vector, a task is generated to +perform the computation. + +\code{.c} +void func_cpu(void *descr[], void *_args) +{ + (void) _args; + int x; + int nx = STARPU_VECTOR_GET_NX(descr[0]); + TYPE *v = (TYPE *)STARPU_VECTOR_GET_PTR(descr[0]); + + for(x=0 ; xbubble/tests/vector/vector.c. + +To define a hierarchical task, one needs to define the fields +starpu_codelet::bubble_func and starpu_codelet::bubble_gen_dag_func. + +The field starpu_codelet::bubble_func is a pointer function which will +be executed by StarPU to decide at runtime if the task must be +transformed into a bubble. If the function returns a non-zero value, +the function starpu_codelet::bubble_gen_dag_func will be executed to +create the new graph of tasks. + +The pointer functions can also be defined when calling +starpu_task_insert() by using the arguments ::STARPU_BUBBLE_FUNC and +::STARPU_BUBBLE_GEN_DAG_FUNC. Both these functions can be passed +parameters through the arguments ::STARPU_BUBBLE_FUNC_ARG and +::STARPU_BUBBLE_GEN_DAG_FUNC_ARG + +When executed, the function starpu_codelet::bubble_func will be given +as parameter the task being checked, and the value specified with +::STARPU_BUBBLE_FUNC_ARG. + +When executed, the function starpu_codelet::bubble_gen_dag_func will be +given as parameter the task being turned into a hierarchical task and +the value specified with ::STARPU_BUBBLE_GEN_DAG_FUNC_ARG. + +An example involving these functions is in bubble/tests/basic/brec.c. And more examples are available in bubble/tests/basic/*.c. + +*/ diff --git a/doc/doxygen/chapters/starpu_extensions/code/complex.c b/doc/doxygen/chapters/starpu_extensions/code/complex.c new file mode 100644 index 0000000..221dbc8 --- /dev/null +++ b/doc/doxygen/chapters/starpu_extensions/code/complex.c @@ -0,0 +1,21 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +//! [To be included. You should update doxygen if you see this text.] +#define STARPU_COMPLEX_GET_REAL(interface) (((struct starpu_complex_interface *)(interface))->real) +#define STARPU_COMPLEX_GET_IMAGINARY(interface) (((struct starpu_complex_interface *)(interface))->imaginary) +#define STARPU_COMPLEX_GET_NX(interface) (((struct starpu_complex_interface *)(interface))->nx) +//! [To be included. You should update doxygen if you see this text.] diff --git a/doc/doxygen/chapters/starpu_extensions/code/disk_compute.c b/doc/doxygen/chapters/starpu_extensions/code/disk_compute.c new file mode 100644 index 0000000..eea46f4 --- /dev/null +++ b/doc/doxygen/chapters/starpu_extensions/code/disk_compute.c @@ -0,0 +1,177 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Corentin Salingue + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ +//! [To be included. You should update doxygen if you see this text.] +/* Try to write into disk memory + * Use mechanism to push data from main ram to disk ram + */ + +#include +#include +#include +#include +#include +#include + +#define NX (1024) + +int main(int argc, char **argv) +{ + /* Initialize StarPU with default configuration */ + int ret = starpu_init(NULL); + + if (ret == -ENODEV) goto enodev; + + /* Initialize path and name */ + char pid_str[16]; + int pid = getpid(); + snprintf(pid_str, sizeof(pid_str), "%d", pid); + + const char *name_file_start = "STARPU_DISK_COMPUTE_DATA_"; + const char *name_file_end = "STARPU_DISK_COMPUTE_DATA_RESULT_"; + + char * path_file_start = malloc(strlen(base) + 1 + strlen(name_file_start) + 1); + strcpy(path_file_start, base); + strcat(path_file_start, "/"); + strcat(path_file_start, name_file_start); + + char * path_file_end = malloc(strlen(base) + 1 + strlen(name_file_end) + 1); + strcpy(path_file_end, base); + strcat(path_file_end, "/"); + strcat(path_file_end, name_file_end); + + /* register a disk */ + int new_dd = starpu_disk_register(&starpu_disk_unistd_ops, (void *) base, 1024*1024*1); + /* can't write on /tmp/ */ + if (new_dd == -ENOENT) goto enoent; + + unsigned dd = (unsigned) new_dd; + + printf("TEST DISK MEMORY \n"); + + /* Imagine, you want to compute data */ + int *A; + int *C; + + starpu_malloc_flags((void **)&A, NX*sizeof(int), STARPU_MALLOC_COUNT); + starpu_malloc_flags((void **)&C, NX*sizeof(int), STARPU_MALLOC_COUNT); + + unsigned int j; + /* you register them in a vector */ + for(j = 0; j < NX; ++j) + { + A[j] = j; + C[j] = 0; + } + + /* you create a file to store the vector ON the disk */ + FILE * f = fopen(path_file_start, "wb+"); + if (f == NULL) + goto enoent2; + + /* store it in the file */ + fwrite(A, sizeof(int), NX, f); + + /* close the file */ + fclose(f); + + + /* create a file to store result */ + f = fopen(path_file_end, "wb+"); + if (f == NULL) + goto enoent2; + + /* replace all data by 0 */ + fwrite(C, sizeof(int), NX, f); + + /* close the file */ + fclose(f); + + /* And now, you want to use your data in StarPU */ + /* Open the file ON the disk */ + void * data = starpu_disk_open(dd, (void *) name_file_start, NX*sizeof(int)); + void * data_result = starpu_disk_open(dd, (void *) name_file_end, NX*sizeof(int)); + + starpu_data_handle_t vector_handleA, vector_handleC; + + /* register vector in starpu */ + starpu_vector_data_register(&vector_handleA, dd, (uintptr_t) data, NX, sizeof(int)); + + /* and do what you want with it, here we copy it into an other vector */ + starpu_vector_data_register(&vector_handleC, dd, (uintptr_t) data_result, NX, sizeof(int)); + + starpu_data_cpy(vector_handleC, vector_handleA, 0, NULL, NULL); + + /* free them */ + starpu_data_unregister(vector_handleA); + starpu_data_unregister(vector_handleC); + + /* close them in StarPU */ + starpu_disk_close(dd, data, NX*sizeof(int)); + starpu_disk_close(dd, data_result, NX*sizeof(int)); + + /* check results */ + f = fopen(path_file_end, "rb+"); + if (f == NULL) + goto enoent; + /* take data */ + fread(C, sizeof(int), NX, f); + + /* close the file */ + fclose(f); + + int try = 1; + for (j = 0; j < NX; ++j) + if (A[j] != C[j]) + { + printf("Fail A %d != C %d \n", A[j], C[j]); + try = 0; + } + + starpu_free_flags(A, NX*sizeof(int), STARPU_MALLOC_COUNT); + starpu_free_flags(C, NX*sizeof(int), STARPU_MALLOC_COUNT); + + unlink(path_file_start); + unlink(path_file_end); + + free(path_file_start); + free(path_file_end); + + /* terminate StarPU, no task can be submitted after */ + starpu_shutdown(); + + if(try) + printf("TEST SUCCESS\n"); + else + printf("TEST FAIL\n"); + return (try ? EXIT_SUCCESS : EXIT_FAILURE); + +enodev: + return 77; +enoent2: + starpu_free_flags(A, NX*sizeof(int), STARPU_MALLOC_COUNT); + starpu_free_flags(C, NX*sizeof(int), STARPU_MALLOC_COUNT); +enoent: + unlink(path_file_start); + unlink(path_file_end); + + free(path_file_start); + free(path_file_end); + + starpu_shutdown(); + return 77; +} +//! [To be included. You should update doxygen if you see this text.] diff --git a/doc/doxygen/chapters/starpu_extensions/code/disk_copy.c b/doc/doxygen/chapters/starpu_extensions/code/disk_copy.c new file mode 100644 index 0000000..2a40c34 --- /dev/null +++ b/doc/doxygen/chapters/starpu_extensions/code/disk_copy.c @@ -0,0 +1,121 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Corentin Salingue + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +//! [To be included. You should update doxygen if you see this text.] + +/* Try to write into disk memory + * Use mechanism to push data from main ram to disk ram + */ + +#include +#include +#include +#include + +/* size of one vector */ +#define NX (30*1000000/sizeof(double)) +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + + +int main(int argc, char **argv) +{ + double *A, *F; + + /* limit main ram to force to push in disk */ + setenv("STARPU_LIMIT_CPU_MEM", "160", 1); + + /* Initialize StarPU with default configuration */ + int ret = starpu_init(NULL); + + if (ret == -ENODEV) goto enodev; + + /* register a disk */ + int new_dd = starpu_disk_register(&starpu_disk_unistd_ops, (void *) "/tmp/", 1024*1024*200); + /* can't write on /tmp/ */ + if (new_dd == -ENOENT) goto enoent; + + /* allocate two memory spaces */ + starpu_malloc_flags((void **)&A, NX*sizeof(double), STARPU_MALLOC_COUNT); + starpu_malloc_flags((void **)&F, NX*sizeof(double), STARPU_MALLOC_COUNT); + + FPRINTF(stderr, "TEST DISK MEMORY \n"); + + unsigned int j; + /* initialization with bad values */ + for(j = 0; j < NX; ++j) + { + A[j] = j; + F[j] = -j; + } + + starpu_data_handle_t vector_handleA, vector_handleB, vector_handleC, vector_handleD, vector_handleE, vector_handleF; + + /* register vector in starpu */ + starpu_vector_data_register(&vector_handleA, STARPU_MAIN_RAM, (uintptr_t)A, NX, sizeof(double)); + starpu_vector_data_register(&vector_handleB, -1, (uintptr_t) NULL, NX, sizeof(double)); + starpu_vector_data_register(&vector_handleC, -1, (uintptr_t) NULL, NX, sizeof(double)); + starpu_vector_data_register(&vector_handleD, -1, (uintptr_t) NULL, NX, sizeof(double)); + starpu_vector_data_register(&vector_handleE, -1, (uintptr_t) NULL, NX, sizeof(double)); + starpu_vector_data_register(&vector_handleF, STARPU_MAIN_RAM, (uintptr_t)F, NX, sizeof(double)); + + /* copy vector A->B, B->C... */ + starpu_data_cpy(vector_handleB, vector_handleA, 0, NULL, NULL); + starpu_data_cpy(vector_handleC, vector_handleB, 0, NULL, NULL); + starpu_data_cpy(vector_handleD, vector_handleC, 0, NULL, NULL); + starpu_data_cpy(vector_handleE, vector_handleD, 0, NULL, NULL); + starpu_data_cpy(vector_handleF, vector_handleE, 0, NULL, NULL); + + /* StarPU does not need to manipulate the array anymore so we can stop + * monitoring it */ + + /* free them */ + starpu_data_unregister(vector_handleA); + starpu_data_unregister(vector_handleB); + starpu_data_unregister(vector_handleC); + starpu_data_unregister(vector_handleD); + starpu_data_unregister(vector_handleE); + starpu_data_unregister(vector_handleF); + + /* check if computation is correct */ + int try = 1; + for (j = 0; j < NX; ++j) + if (A[j] != F[j]) + { + printf("Fail A %f != F %f \n", A[j], F[j]); + try = 0; + } + + /* free last vectors */ + starpu_free_flags(A, NX*sizeof(double), STARPU_MALLOC_COUNT); + starpu_free_flags(F, NX*sizeof(double), STARPU_MALLOC_COUNT); + + /* terminate StarPU, no task can be submitted after */ + starpu_shutdown(); + + if(try) + FPRINTF(stderr, "TEST SUCCESS\n"); + else + FPRINTF(stderr, "TEST FAIL\n"); + return (try ? EXIT_SUCCESS : EXIT_FAILURE); + +enodev: + return 77; +enoent: + return 77; +} + +//! [To be included. You should update doxygen if you see this text.] diff --git a/doc/doxygen/chapters/starpu_extensions/code/forkmode.c b/doc/doxygen/chapters/starpu_extensions/code/forkmode.c new file mode 100644 index 0000000..785b2af --- /dev/null +++ b/doc/doxygen/chapters/starpu_extensions/code/forkmode.c @@ -0,0 +1,41 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +//! [To be included. You should update doxygen if you see this text.] +void scal_cpu_func(void *buffers[], void *_args) +{ + unsigned i; + float *factor = _args; + struct starpu_vector_interface *vector = buffers[0]; + unsigned n = STARPU_VECTOR_GET_NX(vector); + float *val = (float *)STARPU_VECTOR_GET_PTR(vector); + +#pragma omp parallel for num_threads(starpu_combined_worker_get_size()) + for (i = 0; i < n; i++) + val[i] *= *factor; +} + +static struct starpu_codelet cl = +{ + .modes = { STARPU_RW }, + .where = STARPU_CPU, + .type = STARPU_FORKJOIN, + .max_parallelism = INT_MAX, + .cpu_funcs = {scal_cpu_func}, + .cpu_funcs_name = {"scal_cpu_func"}, + .nbuffers = 1, +}; +//! [To be included. You should update doxygen if you see this text.] diff --git a/doc/doxygen/chapters/starpu_extensions/code/multiformat.c b/doc/doxygen/chapters/starpu_extensions/code/multiformat.c new file mode 100644 index 0000000..f365086 --- /dev/null +++ b/doc/doxygen/chapters/starpu_extensions/code/multiformat.c @@ -0,0 +1,63 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +//! [To be included. You should update doxygen if you see this text.] +#define NX 1024 +struct point array_of_structs[NX]; +starpu_data_handle_t handle; + +/* + * The conversion of a piece of data is itself a task, though it is created, + * submitted and destroyed by StarPU internals and not by the user. Therefore, + * we have to define two codelets. + * Note that for now the conversion from the CPU format to the GPU format has to + * be executed on the GPU, and the conversion from the GPU to the CPU has to be + * executed on the CPU. + */ +#ifdef STARPU_USE_OPENCL +void cpu_to_opencl_opencl_func(void *buffers[], void *args); +struct starpu_codelet cpu_to_opencl_cl = +{ + .where = STARPU_OPENCL, + .opencl_funcs = { cpu_to_opencl_opencl_func }, + .nbuffers = 1, + .modes = { STARPU_RW } +}; + +void opencl_to_cpu_func(void *buffers[], void *args); +struct starpu_codelet opencl_to_cpu_cl = +{ + .where = STARPU_CPU, + .cpu_funcs = { opencl_to_cpu_func }, + .cpu_funcs_name = { "opencl_to_cpu_func" }, + .nbuffers = 1, + .modes = { STARPU_RW } +}; +#endif + +struct starpu_multiformat_data_interface_ops format_ops = +{ +#ifdef STARPU_USE_OPENCL + .opencl_elemsize = 2 * sizeof(float), + .cpu_to_opencl_cl = &cpu_to_opencl_cl, + .opencl_to_cpu_cl = &opencl_to_cpu_cl, +#endif + .cpu_elemsize = 2 * sizeof(float), + ... +}; + +starpu_multiformat_data_register(handle, STARPU_MAIN_RAM, &array_of_structs, NX, &format_ops); +//! [To be included. You should update doxygen if you see this text.] diff --git a/doc/doxygen/chapters/starpu_extensions/code/simgrid.c b/doc/doxygen/chapters/starpu_extensions/code/simgrid.c new file mode 100644 index 0000000..3f3ff07 --- /dev/null +++ b/doc/doxygen/chapters/starpu_extensions/code/simgrid.c @@ -0,0 +1,31 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +//! [To be included. You should update doxygen if you see this text.] +static struct starpu_codelet cl_potrf = +{ + .cpu_funcs = {chol_cpu_codelet_update_potrf}, + .cpu_funcs_name = {"chol_cpu_codelet_update_potrf"}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {chol_cublas_codelet_update_potrf}, +#elif defined(STARPU_SIMGRID) + .cuda_funcs = {(void*)1}, +#endif + .nbuffers = 1, + .modes = {STARPU_RW}, + .model = &chol_model_potrf +}; +//! [To be included. You should update doxygen if you see this text.] diff --git a/doc/doxygen/chapters/starpu_extensions/cuda_support.doxy b/doc/doxygen/chapters/starpu_extensions/cuda_support.doxy new file mode 100644 index 0000000..a91dcc8 --- /dev/null +++ b/doc/doxygen/chapters/starpu_extensions/cuda_support.doxy @@ -0,0 +1,25 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/*! \page CUDASupport CUDA Support + +StarPU sets the current CUDA device by calling starpu_cuda_set_device() which takes an integer argument representing the device number, and sets the current device to the specified device number. By setting the current device, applications can select which CUDA device to use for their computations, enabling efficient management of multiple CUDA devices in a system. + +We can call starpu_cuda_get_nvmldev() to get identifier of the NVML device associated with a given CUDA device. + +Three macros STARPU_CUDA_REPORT_ERROR(), STARPU_CUBLAS_REPORT_ERROR(), and STARPU_CUSPARSE_REPORT_ERROR() are useful for debugging and troubleshooting, as they provide detailed information about the error that occur during CUDA or CUBLAS execution. + +*/ diff --git a/doc/doxygen/chapters/starpu_extensions/debugging_tools.doxy b/doc/doxygen/chapters/starpu_extensions/debugging_tools.doxy new file mode 100644 index 0000000..06f4d3b --- /dev/null +++ b/doc/doxygen/chapters/starpu_extensions/debugging_tools.doxy @@ -0,0 +1,128 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/*! \page DebuggingTools Debugging Tools + +StarPU provides several tools to help debugging applications. Execution traces +can be generated and displayed graphically, see \ref GeneratingTracesWithFxT. + +\section DebuggingInGeneral TroubleShooting In General + +Generally-speaking, if you have troubles, pass \ref enable-debug "--enable-debug" to +configure to enable some checks which impact performance, but will +catch common issues, possibly earlier than the actual problem you are observing, +which may just be a consequence of a bug that happened earlier. Also, make sure +not to have the \ref enable-fast "--enable-fast" \c configure option, which drops very useful +catchup assertions. If your program is valgrind-safe, you can use it, see \ref +UsingOtherDebugger. + +Depending on your toolchain, it might happen that you get +undefined reference to `__stack_chk_guard' errors. In that case, use the +--disable-fstack-protector-all option to avoid the issue. + +Then, if your program crashes with an assertion error, a segfault, etc. you can send us the result of + +\verbatim +thread apply all bt +\endverbatim + +run in gdb at the point of the crash. + +In case your program just hangs, but it may also be useful in case of a crash +too, it helps to source gdbinit as described in the next section to be +able to run and send us the output of the following commands: + +\verbatim +starpu-workers +starpu-tasks +starpu-print-requests +starpu-print-prequests +starpu-print-frrequests +starpu-print-irrequests +\endverbatim + +To give us an idea of what is happening within StarPU. If the outputs are not too long, you can even run + +\verbatim +starpu-all-tasks +starpu-print-all-tasks +starpu-print-datas-summary +starpu-print-datas +\endverbatim + +\section UsingGdb Using The Gdb Debugger + +Some gdb helpers are provided to show the whole StarPU state: + +\verbatim +(gdb) source tools/gdbinit +(gdb) help starpu +\endverbatim + +For instance, +
    +
  • one can print all tasks with starpu-print-all-tasks,
  • +
  • print all data with starpu-print-datas,
  • +
  • print all pending data transfers with starpu-print-prequests, starpu-print-requests, starpu-print-frequests, starpu-print-irequests,
  • +
  • print pending MPI requests with starpu-mpi-print-detached-requests
  • +
+ +Some functions can only work if \ref enable-debug "--enable-debug" +was passed to configure +(because they impact performance) + +\section UsingOtherDebugger Using Other Debugging Tools + +Valgrind can be used on StarPU: valgrind.h just needs to be found at configure +time, to tell valgrind about some known false positives and disable host memory +pinning. Other known false positives can be suppressed by giving the suppression +files in tools/valgrind/*.suppr to valgrind's --suppressions option. + +The environment variable \ref STARPU_DISABLE_KERNELS can also be set to 1 to make +StarPU does everything (schedule tasks, transfer memory, etc.) except actually +calling the application-provided kernel functions, i.e. the computation will not +happen. This permits to quickly check that the task scheme is working properly. + +\section WatchdogSupport Watchdog Support + +starpu_task_watchdog_set_hook() is used to set a callback function "watchdog hook" that will be called when there is no task completed during an expected time. The purpose of the watchdog hook is to allow the application to get the state for debugging. + +\section UsingTheTemanejoTaskDebugger Using The Temanejo Task Debugger + +StarPU can connect to Temanejo >= 1.0rc2 (see +http://www.hlrs.de/temanejo), to permit +nice visual task debugging. To do so, build Temanejo's libayudame.so, +install Ayudame.h to e.g. /usr/local/include, apply the +tools/patch-ayudame to it to fix C build, re-configure, make +sure that it found it, rebuild StarPU. Run the Temanejo GUI, give it the path +to your application, any options you want to pass it, the path to libayudame.so. + +It permits to visualize the task graph, add breakpoints, continue execution +task-by-task, and run gdb on a given task, etc. + +\image html temanejo.png +\image latex temanejo.png "" width=\textwidth + +Make sure to specify at least the same number of CPUs in the dialog box as your +machine has, otherwise an error will happen during execution. Future versions +of Temanejo should be able to tell StarPU the number of CPUs to use. + +Tag numbers have to be below 4000000000000000000ULL to be usable for +Temanejo (to distinguish them from tasks). + + + +*/ diff --git a/doc/doxygen/chapters/starpu_extensions/extensions_intro.doxy b/doc/doxygen/chapters/starpu_extensions/extensions_intro.doxy new file mode 100644 index 0000000..252e67c --- /dev/null +++ b/doc/doxygen/chapters/starpu_extensions/extensions_intro.doxy @@ -0,0 +1,114 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/*! \intropage{IntroExtensions, --------- StarPU Extensions ---------} + +\webforeword + +This part explains the advanced concepts of StarPU. It is intended for users whose applications need more than basic task submission. + +You can learn more knowledge about some important and core concepts in StarPU: +
    +
  • +After reading Chapter \ref TasksInStarPU, you can get more +information about how to manage tasks in StarPU in Chapter \ref +AdvancedTasksInStarPU. +
  • +
  • +After reading Chapter \ref DataManagement, you can know more about +how to manage the data layout of your applications in Chapter \ref +AdvancedDataManagement. +
  • +
  • +After reading Chapter \ref Scheduling, you can get some advanced +scheduling policies in StarPU in Chapters \ref AdvancedScheduling, +\ref SchedulingContexts and \ref SchedulingContextHypervisor. +
  • +
  • +Chapter \ref HowToDefineANewSchedulingPolicy explains how to +define a StarPU task scheduling policy either in a basic +monolithic way, or in a modular way. +
  • +
+ +Other chapters cover some further usages of StarPU. +
    +
  • +Chapters \ref CUDASupport and \ref OpenCLSupport show how to use +GPU devices with CUDA or OpenCL. Chapter \ref MaxFPGASupport +explains how StarPU support Field Programmable Gate Array (FPGA) +applications exploiting DFE configurations. +
  • +
  • +If you need to store more data than what the main memory (RAM) can +store, Chapter \ref OutOfCore presents how to add a new memory +node on a disk and how to use it. +
  • +
  • +Chapter \ref MPISupport shows how to integrate MPI processes in +StarPU. +
  • +
  • +Chapter \ref TCPIPSupport shows a TCP/IP master slave mechanism +which can execute application across many remote cores without +thinking about data distribution. +
  • +
  • +Chapter \ref Transactions shows how to cancel a sequence of +already submitted tasks based on a just-in-time decision. +
  • +
  • +Chapter \ref FaultTolerance explains how StarPU provide supports +for failure of tasks or even failure of complete nodes. +
  • +
  • +Chapter \ref FFTSupport explains how StarPU provides a similar +library to both fftw and cufft, but by adding a +support from both CPUs and GPUs. +
  • +
  • +Chapter \ref SOCLOpenclExtensions explains how OpenCL applications +can transparently be run using StarPU, by givings unified access +to every available OpenCL device. +
  • +
  • +We propose a hierarchical tasks model in Chapter \ref +HierarchicalDAGS to enable tasks subgraphs at runtime for a more +dynamic task graph. +
  • +
  • +You can find how to partition a machine into parallel workers in +Chapter \ref ParallelWorker. +
  • +
  • +Chapter \ref InteroperabilitySupport shows how StarPU can coexist +with other parallel software elements without resulting in +computing core oversubscription or undersubscription. +
  • +
  • +Chapter \ref SimGridSupport shows you how to simulate execution on +an arbitrary platform. +
  • +
  • +Tools to help debugging applications are presented in Chapter \ref +DebuggingTools. +
  • +
+ +And finally, chapter \ref Helpers gives a list of StarPU utility +functions. + +*/ diff --git a/doc/doxygen/chapters/starpu_extensions/fault_tolerance.doxy b/doc/doxygen/chapters/starpu_extensions/fault_tolerance.doxy new file mode 100644 index 0000000..f7f7794 --- /dev/null +++ b/doc/doxygen/chapters/starpu_extensions/fault_tolerance.doxy @@ -0,0 +1,49 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/*! \page FaultTolerance Fault Tolerance + +\section FaultTolerance_Introduction Introduction + +Due to e.g. hardware error, some tasks may fail, or even complete nodes may +fail. For now, StarPU provides some support for failure of tasks. + +\section TaskRetry Retrying tasks + +In case a task implementation notices that it fail to compute properly, it can +call starpu_task_failed() to notify StarPU of the failure. + +tests/fault-tolerance/retry.c is an example of coping with such failure: +the principle is that when submitting the task, one sets its prologue callback +to starpu_task_ft_prologue(). That prologue will turn the task into a meta +task, which will manage the repeated submission of try-tasks to perform the +computation until one of the computations succeeds. One can create a try-task for the meta task +by using starpu_task_ft_create_retry(). + +By default, try-tasks will be just retried until one of them succeeds (i.e. the +task implementation does not call starpu_task_failed()). One can change the +behavior by passing a check_failsafe function as prologue parameter, +which will be called at the end of the try-task attempt. It can look at +starpu_task_get_current()->failed to determine whether the try-task +succeeded, in which case it can call starpu_task_ft_success() on the meta-task to +notify success, or if it failed, in which case it can call +starpu_task_failsafe_create_retry() to create another try-task, and submit it +with starpu_task_submit_nodeps(). + +This can however only work if the task input is not modified, and is thus not +supported for tasks with data access mode ::STARPU_RW. + +*/ diff --git a/doc/doxygen/chapters/starpu_extensions/fft_support.doxy b/doc/doxygen/chapters/starpu_extensions/fft_support.doxy new file mode 100644 index 0000000..1a7c619 --- /dev/null +++ b/doc/doxygen/chapters/starpu_extensions/fft_support.doxy @@ -0,0 +1,82 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/*! \page FFTSupport FFT Support + +StarPU provides libstarpufft, a library whose design is very similar to +both fftw and cufft, the difference being that it takes benefit from both CPUs +and GPUs. It should however be noted that GPUs do not have the same precision as +CPUs, so the results may be different by a negligible amount. + +Different precisions are available, namely \c float, \c double and long +double precisions, with the following \c fftw naming conventions: + +
    +
  • +double precision structures and functions are named e.g. starpufft_execute() +
  • +
  • +float precision structures and functions are named e.g. starpufftf_execute() +
  • +
  • +long double precision structures and functions are named e.g. starpufftl_execute() +
  • +
+ +The documentation below is given with names for double precision, replace +starpufft_ with starpufftf_ or starpufftl_ as appropriate. + +Only complex numbers are supported at the moment. + +The application has to call starpu_init() before calling starpufft functions. + +Either main memory pointers or data handles can be provided. + +
    +
  • +To provide main memory pointers, use starpufft_start() or +starpufft_execute(). Only one FFT can be performed at a time, because +StarPU will have to register the data on the fly. In the starpufft_start() +case, starpufft_cleanup() needs to be called to unregister the data. +
  • +
  • +To provide data handles (which is preferable), +use starpufft_start_handle() (preferred) or +starpufft_execute_handle(). Several FFTs tasks can be submitted +for a given plan, which permits e.g. to start a series of FFT with just one +plan. starpufft_start_handle() is preferable since it does not wait for +the task completion, and thus permits to enqueue a series of tasks. +
  • +
+ +All functions are defined in \ref API_FFT_Support. + +Some examples illustrating the usage of FFT API are available in +the directory starpufft/tests. + +\section FFTCompilation Compilation + +The flags required to compile or link against the FFT library are accessible +with the following commands: + +\verbatim +$ pkg-config --cflags starpufft-1.4 # options for the compiler +$ pkg-config --libs starpufft-1.4 # options for the linker +\endverbatim + +Also pass the option --static if the application is to be linked statically. + +*/ diff --git a/doc/doxygen/chapters/starpu_extensions/helpers.doxy b/doc/doxygen/chapters/starpu_extensions/helpers.doxy new file mode 100644 index 0000000..9fdd980 --- /dev/null +++ b/doc/doxygen/chapters/starpu_extensions/helpers.doxy @@ -0,0 +1,49 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/*! \page Helpers Helpers + +StarPU provides several utilities functions to help programmers: + +
    +
  • +starpu_conf_noworker() sets configuration fields so that no +worker is enabled, i.e. it sets starpu_conf::ncpus to 0, +starpu_conf::ncuda to 0, etc. +
  • +
  • +starpu_is_initialized() returns a value indicating whether StarPU is +already initialized, starpu_wait_initialized() only returns when the +initialization is finished. +
  • +
  • +starpu_topology_print() prints the current topology of the system, and +is therefore useful for debugging purposes or for understanding the +underlying architecture of the system. +
  • +
  • +starpu_get_version() returns the version of StarPU used when running +the application. +
  • +
  • +starpu_sleep() and starpu_usleep() allow the application to pause the +execution of the current thread for a specified amount of time. +starpu_sleep() pauses the thread for a specified number of seconds and +starpu_usleep() for a specified number of microseconds. +
  • +
+ +*/ diff --git a/doc/doxygen/chapters/starpu_extensions/interoperability.doxy b/doc/doxygen/chapters/starpu_extensions/interoperability.doxy new file mode 100644 index 0000000..43f0075 --- /dev/null +++ b/doc/doxygen/chapters/starpu_extensions/interoperability.doxy @@ -0,0 +1,120 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/*! \page InteroperabilitySupport Interoperability Support + +In situations where multiple parallel software elements have to coexist within +the same application, uncoordinated accesses to computing units may lead such +parallel software elements to collide and interfere. The purpose of the +Interoperability routines of StarPU, implemented along the definition of the +Resource Management APIs of Project H2020 INTERTWinE, is to enable StarPU to +coexist with other parallel software elements without resulting in computing +core oversubscription or undersubscription. These routines allow the +programmer to dynamically control the computing resources allocated to StarPU, +to add or remove processor cores and/or accelerator devices from the pool of +resources used by StarPU's workers to execute tasks. They also allow multiple +libraries and applicative codes using StarPU simultaneously to select distinct +sets of resources independently. Internally, the Interoperability Support is +built on top of Scheduling Contexts (see \ref SchedulingContexts). + +\section ResourceManagement StarPU Resource Management + +The \c starpurm module is a library built on top of the \c starpu library. It +exposes a series of routines prefixed with \c starpurm_ defining the resource +management API. + +All functions are defined in \ref API_Interop_Support. + +\subsection Build Linking a program with the starpurm module + +The \c starpurm module must be linked explicitly with the applicative executable +using it. Example Makefiles in the starpurm/dev/ subdirectories show how +to do so. If the \c pkg-config command is available and the \c PKG_CONFIG_PATH +environment variable is properly positioned, the proper settings may be obtained +with the following \c Makefile snippet: + +\code{Makefile} +CFLAGS += $(shell pkg-config --cflags starpurm-1.4) +LDFLAGS+= $(shell pkg-config --libs-only-L starpurm-1.4) +LDLIBS += $(shell pkg-config --libs-only-l starpurm-1.4) +\endcode + + +\subsection InitExit Initialization and Shutdown + +The \c starpurm module is initialized with a call to starpurm_initialize() +and must be finalized with a call to starpurm_shutdown(). The basic example is available in starpurm/tests/01_init_exit.c. The \c starpurm +module supports CPU cores as well as devices. An integer ID is assigned to each +supported device type. The ID assigned to a given device type can be queried +with the starpurm_get_device_type_id() routine, which currently expects one +of the following strings as argument and returns the corresponding ID: +
    +
  • "cpu"
  • +
  • "opencl"
  • +
  • "cuda"
  • +
+The \c cpu pseudo device type is defined for convenience and designates CPU +cores. The number of units of each type available for computation can be +obtained with a call to starpurm_get_nb_devices_by_type(). + +Each CPU core unit available for computation is designated by its rank among the +StarPU CPU worker threads and by its own CPUSET bit. Each non-CPU device unit +can be designated both by its rank number in the type, and by the CPUSET bit +corresponding to its StarPU device worker thread. The CPUSET of a computing unit +or its associated worker can be obtained from its type ID and rank with +starpurm_get_device_worker_cpuset(), which returns the corresponding HWLOC CPUSET. + +An example is available in starpurm/tests/02_list_units.c. + +\subsection DefCTX Default Context + +The \c starpurm module assumes a default, global context, manipulated through a +series of routines allowing to assign and withdraw computing units from the main +StarPU context. Assigning CPU cores can be done with +starpurm_assign_cpu_to_starpu() and starpurm_assign_cpu_mask_to_starpu(), and +assigning device units can be done with starpurm_assign_device_to_starpu() +and starpurm_assign_device_mask_to_starpu(). Conversely, withdrawing CPU +cores can be done with starpurm_withdraw_cpu_from_starpu() and starpurm_withdraw_cpu_mask_from_starpu(), +and withdrawing device units can be done with +starpurm_withdraw_device_from_starpu() and starpurm_withdraw_device_mask_from_starpu(). +These routine should typically be used to control resource usage for the main +applicative code. An example is available in starpurm/examples/block_test/block_test.c. + +\subsection TmpCTXS Temporary Contexts + +Besides the default, global context, \c starpurm can create temporary contexts +and launch the computation of kernels confined to these temporary contexts. +The routine starpurm_spawn_kernel_on_cpus() can be used to do so: it +allocates a temporary context and spawns a kernel within this context. The +temporary context is subsequently freed upon completion of the kernel. The +temporary context is set as the default context for the kernel throughout its +lifespan. This routine should typically be used to control resource usage for a +parallel kernel, handled by an external library built on StarPU. Internally, it +relies on the use of starpu_sched_ctx_set_context() to set the temporary +context as the default context for the parallel kernel, and then restore the main +context upon completion. Note: the maximum number of temporary contexts +allocated concurrently at any time should not exceed +::STARPU_NMAX_SCHED_CTXS-2, otherwise, the call to +starpurm_spawn_kernel_on_cpus() may block until a temporary context becomes +available. The routine starpurm_spawn_kernel_on_cpus() returns upon the +completion of the parallel kernel. An example is available in starpurm/examples/spawn.c. +An asynchronous variant is available with the +routine starpurm_spawn_kernel_on_cpus_callback(). This variant returns +immediately, however it accepts a callback function, which is subsequently +called to notify the calling code about the completion of the parallel kernel. +An example is available in starpurm/examples/async_spawn.c. + +*/ diff --git a/doc/doxygen/chapters/starpu_extensions/max_fpga_support.doxy b/doc/doxygen/chapters/starpu_extensions/max_fpga_support.doxy new file mode 100644 index 0000000..b0f4113 --- /dev/null +++ b/doc/doxygen/chapters/starpu_extensions/max_fpga_support.doxy @@ -0,0 +1,332 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/*! \page MaxFPGASupport Maxeler FPGA Support + +\section Introduction Introduction + +Maxeler provides hardware and software solutions for accelerating +computing applications on dataflow engines (DFEs). DFEs are in-house +designed accelerators that encapsulate reconfigurable high-end FPGAs +at their core and are equipped with large amounts of DDR memory. + +We extend the StarPU task programming library that initially targets +heterogeneous architectures to support Field Programmable Gate Array +(FPGA). + +To create StarPU/FPGA applications exploiting DFE +configurations, MaxCompiler allows an application to be split into +three parts: + +- Kernel, which implements the computational components of the + application in hardware. +- Manager configuration, which connects Kernels to the CPU, + engine RAM, other Kernels and other DFEs via MaxRing. +- CPU application, which interacts with the DFEs to read and + write data to the Kernels and engine RAM. + +The Simple Live CPU interface (SLiC) is Maxeler’s application +programming interface for seamless CPU-DFE integration. SLiC allows +CPU applications to configure and load a number of DFEs as well as to +subsequently schedule and run actions on those DFEs using simple +function calls. In StarPU/FPGA applications, we use Dynamic SLiC +Interface to exchange data streams between the CPU (Main Memory) +and DFE (Local Memory). + +\section PortingApplicationsToMaxFPGA Porting Applications to Maxeler FPGA + +The way to port an application to FPGA is to set the field +starpu_codelet::max_fpga_funcs, to provide StarPU with the function +for FPGA implementation, so for instance: + +\verbatim +struct starpu_codelet cl = +{ + .max_fpga_funcs = {myfunc}, + .nbuffers = 1, +} +\endverbatim + +A basic example is available in the file tests/maxfpga/max_fpga_basic_static.c. + +\subsection MaxFPGAExample StarPU/Maxeler FPGA Application + +To give you an idea of the interface that we used to exchange data +between host (CPU) and FPGA (DFE), here is an example, +based on one of the examples of Maxeler +(https://trac.version.fz-juelich.de/reconfigurable/wiki/Public). + +StreamFMAKernel.maxj represents the Java kernel code; it +implements a very simple kernel (c=a+b), and Test.c starts it +from the fpga_add function; it first sets streaming up from the +CPU pointers, triggers execution and waits for the result. The API to +interact with DFEs is called SLiC which then also involves the +MaxelerOS runtime. + +- StreamFMAKernel.maxj: the DFE part is described in the MaxJ + programming language, which is a Java-based metaprogramming approach. + +\code{.java} +package tests; + +import com.maxeler.maxcompiler.v2.kernelcompiler.Kernel; +import com.maxeler.maxcompiler.v2.kernelcompiler.KernelParameters; +import com.maxeler.maxcompiler.v2.kernelcompiler.types.base.DFEType; +import com.maxeler.maxcompiler.v2.kernelcompiler.types.base.DFEVar; + +class StreamFMAKernel extends Kernel +{ + + private static final DFEType type = dfeInt(32); + + protected StreamFMAKernel(KernelParameters parameters) + { + super(parameters); + + DFEVar a = io.input("a", type); + DFEVar b = io.input("b", type); + DFEVar c; + + c = a+b; + + io.output("output", c, type); + } +} +\endcode + +- StreamFMAManager.maxj: is also described in the MaxJ + programming language and orchestrates data movement between the host + and the DFE. + +\code{.java} +package tests; + +import com.maxeler.maxcompiler.v2.build.EngineParameters; +import com.maxeler.maxcompiler.v2.managers.custom.blocks.KernelBlock; +import com.maxeler.platform.max5.manager.Max5LimaManager; + +class StreamFMAManager extends Max5LimaManager +{ + private static final String kernel_name = "StreamFMAKernel"; + + public StreamFMAManager(EngineParameters arg0) + { + super(arg0); + KernelBlock kernel = addKernel(new StreamFMAKernel(makeKernelParameters(kernel_name))); + kernel.getInput("a") <== addStreamFromCPU("a"); + kernel.getInput("b") <== addStreamFromCPU("b"); + addStreamToCPU("output") <== kernel.getOutput("output"); + } + + public static void main(String[] args) + { + StreamFMAManager manager = new StreamFMAManager(new EngineParameters(args)); + manager.build(); + } +} +\endcode + +Once StreamFMAKernel.maxj and StreamFMAManager.maxj are +written, there are other steps to do: + +- Building the JAVA program: (for Kernel and Manager (.maxj)) +\verbatim +$ maxjc -1.7 -cp $MAXCLASSPATH streamfma/ +\endverbatim + +- Running the Java program to generate a DFE implementation (a .max + file) that can be called from a StarPU/FPGA application and slic + headers (.h) for simulation: + +\verbatim +$ java -XX:+UseSerialGC -Xmx2048m -cp $MAXCLASSPATH:. streamfma.StreamFMAManager DFEModel=MAIA maxFileName=StreamFMA target=DFE_SIM +\endverbatim + +- Build the slic object file (simulation): + +\verbatim +$ sliccompile StreamFMA.max +\endverbatim + +- Test.c : + +to interface StarPU task-based runtime system with Maxeler's DFE +devices, we use the advanced dynamic interface of SLiC in +non_blocking mode. + +Test code must include MaxSLiCInterface.h and MaxFile.h. +The .max file contains the bitstream. The StarPU/FPGA application can +be written in C, C++, etc. Some examples are available in the directory tests/maxfpga. + +\code{.c} +#include "StreamFMA.h" +#include "MaxSLiCInterface.h" + +void fpga_add(void *buffers[], void *cl_arg) +{ + (void)cl_arg; + + int *a = (int*) STARPU_VECTOR_GET_PTR(buffers[0]); + int *b = (int*) STARPU_VECTOR_GET_PTR(buffers[1]); + int *c = (int*) STARPU_VECTOR_GET_PTR(buffers[2]); + + int size = STARPU_VECTOR_GET_NX(buffers[0]); + + /* actions to run on an engine */ + max_actions_t *act = max_actions_init(maxfile, NULL); + + /* set the number of ticks for a kernel */ + max_set_ticks (act, "StreamFMAKernel", size); + + /* send input streams */ + max_queue_input(act, "a", a, size *sizeof(a[0])); + max_queue_input(act, "b", b, size*sizeof(b[0])); + + /* store output stream */ + max_queue_output(act,"output", c, size*sizeof(c[0])); + + /* run actions on the engine */ + printf("**** Run actions in non blocking mode **** \n"); + + /* run actions in non_blocking mode */ + max_run_t *run0= max_run_nonblock(engine, act); + + printf("*** wait for the actions on DFE to complete *** \n"); + max_wait(run0); + } + + static struct starpu_codelet cl = + { + .cpu_funcs = {cpu_func}, + .cpu_funcs_name = {"cpu_func"}, + .max_fpga_funcs = {fpga_add}, + .nbuffers = 3, + .modes = {STARPU_R, STARPU_R, STARPU_W} + }; + +int main(int argc, char **argv) +{ + ... + + /* Implementation of a maxfile */ + max_file_t *maxfile = StreamFMA_init(); + + /* Implementation of an engine */ + max_engine_t *engine = max_load(maxfile, "*"); + + starpu_init(NULL); + + ... Task submission etc. ... + + starpu_shutdown(); + + /* deallocate the set of actions */ + max_actions_free(act); + + /* unload and deallocate an engine obtained by way of max_load */ + max_unload(engine); + + return 0; +} +\endcode + +To write the StarPU/FPGA application: first, the programmer must +describe the codelet using StarPU’s C API. This codelet provides both +a CPU implementation and an FPGA one. It also specifies that the task +has two inputs and one output through the starpu_codelet::nbuffers and +starpu_codelet::modes attributes. + +fpga_add function is the name of the FPGA implementation and is +mainly divided in four steps: + +- Init actions to be run on DFE. +- Add data to an input stream for an action. +- Add data storage space for an output stream. +- Run actions on DFE in non_blocking mode; a non-blocking call + returns immediately, allowing the calling code to do more CPU work + in parallel while the actions are run. +- Wait for the actions to complete. + +In the main function, there are four important steps: + +- Implement a maxfile. +- Load a DFE. +- Free actions. +- Unload and deallocate the DFE. + +The rest of the application (data registration, task submission, etc.) +is as usual with StarPU. + +The design load can also be delegated to StarPU by specifying an array +of load specifications in starpu_conf::max_fpga_load, and use +starpu_max_fpga_get_local_engine() to access the loaded max engines. + +Complete examples are available in tests/fpga/*.c + +\subsection MaxFPGADataTransfers Data Transfers in StarPU/Maxeler FPGA Applications + +The communication between the host and the DFE is done through the +Dynamic advance interface to exchange data between the main +memory and the local memory of the DFE. + +For the moment, we use \ref STARPU_MAIN_RAM to send and store data +to/from DFE's local memory. However, we aim to use a multiplexer to +choose which memory node we will use to read/write data. So, users +can tell that the computational kernel will take data from the main +memory or DFE's local memory, for example. + +In StarPU applications, when \ref starpu_codelet::specific_nodes is +set to 1, this specifies the memory nodes where each data should be +sent to for task execution. + +\subsection MaxFPGAConfiguration Maxeler FPGA Configuration + +To configure StarPU with Maxeler FPGA accelerators, make sure that the +slic-config is available from your PATH environment variable. + +\subsection MaxFPGALaunchingprograms Launching Programs: Simulation + +Maxeler provides a simple tutorial to use MaxCompiler +(https://trac.version.fz-juelich.de/reconfigurable/wiki/Public). +Running the Java program to generate maxfile and slic headers +(hardware) on Maxeler's DFE device, takes a VERY long time, approx. 2 +hours even for this very small example. That's why we use the +simulation. + +- To start the simulation on Maxeler's DFE device: +\verbatim +$ maxcompilersim -c LIMA -n StreamFMA restart +\endverbatim + +- To run the binary (simulation) +\verbatim +$ export LD_LIBRARY_PATH=$MAXELEROSDIR/lib:$LD_LIBRARY_PATH +$ export SLIC_CONF="use_simulation=StreamFMA" +\endverbatim + +- To force tasks to be scheduled on the FPGA, one can disable the use of CPU +cores by setting the \ref STARPU_NCPU environment variable to 0. +\verbatim +$ STARPU_NCPU=0 ./StreamFMA +\endverbatim + +- To stop the simulation +\verbatim +$ maxcompilersim -c LIMA -n StreamFMA stop +\endverbatim + + +*/ diff --git a/doc/doxygen/chapters/starpu_extensions/mpi_support.doxy b/doc/doxygen/chapters/starpu_extensions/mpi_support.doxy new file mode 100644 index 0000000..d07101d --- /dev/null +++ b/doc/doxygen/chapters/starpu_extensions/mpi_support.doxy @@ -0,0 +1,1342 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/*! \page MPISupport MPI Support + +The integration of MPI transfers within task parallelism is done in a +very natural way by the means of asynchronous interactions between the +application and StarPU. This is implemented in a separate libstarpumpi library +which basically provides "StarPU" equivalents of MPI_* functions, where +void * buffers are replaced with ::starpu_data_handle_t, and all +GPU-RAM-NIC transfers are handled efficiently by StarPU-MPI. Users have to +use the usual mpirun command of the MPI implementation to start StarPU on +the different MPI nodes. + +An MPI Insert Task function provides an even more seamless transition to a +distributed application, by automatically issuing all required data transfers +according to the task graph and an application-provided distribution. + +Some source codes are available in the directory mpi/. + +\section MPIBuild Building with MPI support + +If a mpicc compiler is already in your PATH, StarPU will automatically +enable MPI support in the build. If mpicc is not in PATH, you +can specify its location by passing --with-mpicc=/where/there/is/mpicc to +./configure + +It can be useful to enable MPI tests during make check by passing +--enable-mpi-check to ./configure. And similarly to +mpicc, if mpiexec in not in PATH, you can specify its location by passing +--with-mpiexec=/where/there/is/mpiexec to ./configure, but this is +not needed if it is next to mpicc, configure will look there in addition to PATH. + +Similarly, Fortran examples use mpif90, which can be specified manually +with --with-mpifort if it can't be found automatically. + +If users want to run several MPI processes by machine (e.g. one per +NUMA node), \ref STARPU_WORKERS_GETBIND needs to be left to its +default value 1 to make StarPU take into account the binding set by +the MPI launcher (otherwise each StarPU instance would try to bind on +all cores of the machine...) + +However, depending on the architecture of your machine, one may end up +with StarPU-MPI nodes not having any CPU workers. If a node only gets +1 CPU, it will be bound to the MPI thread, and none will be left to +start a CPU worker. + +One can check that with the following commands. + +\verbatim +$ mpirun -np 2 starpu_machine_display --worker CPU --count --notopology +1 CPU worker +1 CPU worker +$ mpirun -np 4 starpu_machine_display --worker CPU --count --notopology +4 CPU workers +4 CPU workers +4 CPU workers +4 CPU workers +$ mpirun --bind-to socket -np 2 starpu_machine_display --worker CPU --count --notopology +4 CPU workers +4 CPU workers +$ STARPU_WORKERS_GETBIND=0 mpirun -np 4 starpu_machine_display --worker CPU --count --notopology +4 CPU workers +4 CPU workers +4 CPU workers +4 CPU workers +$ STARPU_WORKERS_GETBIND=0 mpirun -np 2 starpu_machine_display --worker CPU --count --notopology +4 CPU workers +4 CPU workers +\endverbatim + +or with \c hwloc + +\verbatim +mpirun --bind-to socket -np 2 hwloc-ls --restrict binding --no-io +mpirun -np 2 hwloc-ls --restrict binding --no-io +\endverbatim + +\section ExampleDocumentation Example Used In This Documentation + +The example below will be used as the base for this documentation. It +initializes a token on node 0, and the token is passed from node to node, +incremented by one on each step. The code is not using StarPU yet. + +\code{.c} +for (loop = 0; loop < nloops; loop++) +{ + int tag = loop*size + rank; + + if (loop == 0 && rank == 0) + { + token = 0; + fprintf(stdout, "Start with token value %d\n", token); + } + else + { + MPI_Recv(&token, 1, MPI_INT, (rank+size-1)%size, tag, MPI_COMM_WORLD); + } + + token++; + + if (loop == last_loop && rank == last_rank) + { + fprintf(stdout, "Finished: token value %d\n", token); + } + else + { + MPI_Send(&token, 1, MPI_INT, (rank+1)%size, tag+1, MPI_COMM_WORLD); + } +} +\endcode + +\section NotUsingMPISupport About Not Using The MPI Support + +Although StarPU provides MPI support, the application programmer may want to +keep his MPI communications as they are for a start, and only delegate task +execution to StarPU. This is possible by just using starpu_data_acquire(), for +instance: + +\code{.c} +for (loop = 0; loop < nloops; loop++) +{ + int tag = loop*size + rank; + + /* Acquire the data to be able to write to it */ + starpu_data_acquire(token_handle, STARPU_W); + if (loop == 0 && rank == 0) + { + token = 0; + fprintf(stdout, "Start with token value %d\n", token); + } + else + { + MPI_Recv(&token, 1, MPI_INT, (rank+size-1)%size, tag, MPI_COMM_WORLD); + } + starpu_data_release(token_handle); + + /* Task delegation to StarPU to increment the token. The execution might + * be performed on a CPU, a GPU, etc. */ + increment_token(); + + /* Acquire the update data to be able to read from it */ + starpu_data_acquire(token_handle, STARPU_R); + if (loop == last_loop && rank == last_rank) + { + fprintf(stdout, "Finished: token value %d\n", token); + } + else + { + MPI_Send(&token, 1, MPI_INT, (rank+1)%size, tag+1, MPI_COMM_WORLD); + } + starpu_data_release(token_handle); +} +\endcode + +In that case, libstarpumpi is not needed. One can also use MPI_Isend() and +MPI_Irecv(), by calling starpu_data_release() after MPI_Wait() or MPI_Test() +have notified completion. + +It is however better to use libstarpumpi, to save the application from having to +synchronize with starpu_data_acquire(), and instead just submit all tasks and +communications asynchronously, and wait for the overall completion. + +\section SimpleExample Simple Example + +The flags required to compile or link against the MPI layer are +accessible with the following commands: + +\verbatim +$ pkg-config --cflags starpumpi-1.4 # options for the compiler +$ pkg-config --libs starpumpi-1.4 # options for the linker +\endverbatim + +\code{.c} +void increment_token(void) +{ + struct starpu_task *task = starpu_task_create(); + + task->cl = &increment_cl; + task->handles[0] = token_handle; + + starpu_task_submit(task); +} + +int main(int argc, char **argv) +{ + int rank, size; + + starpu_mpi_init_conf(&argc, &argv, 1, MPI_COMM_WORLD, NULL); + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); + starpu_mpi_comm_size(MPI_COMM_WORLD, &size); + + starpu_vector_data_register(&token_handle, STARPU_MAIN_RAM, (uintptr_t)&token, 1, sizeof(unsigned)); + + unsigned nloops = NITER; + unsigned loop; + + unsigned last_loop = nloops - 1; + unsigned last_rank = size - 1; + + for (loop = 0; loop < nloops; loop++) + { + int tag = loop*size + rank; + + if (loop == 0 && rank == 0) + { + starpu_data_acquire(token_handle, STARPU_W); + token = 0; + fprintf(stdout, "Start with token value %d\n", token); + starpu_data_release(token_handle); + } + else + { + starpu_mpi_irecv_detached(token_handle, (rank+size-1)%size, tag, MPI_COMM_WORLD, NULL, NULL); + } + + increment_token(); + + if (loop == last_loop && rank == last_rank) + { + starpu_data_acquire(token_handle, STARPU_R); + fprintf(stdout, "Finished: token value %d\n", token); + starpu_data_release(token_handle); + } + else + { + starpu_mpi_isend_detached(token_handle, (rank+1)%size, tag+1, MPI_COMM_WORLD, NULL, NULL); + } + } + + starpu_task_wait_for_all(); + + starpu_mpi_shutdown(); + + if (rank == last_rank) + { + fprintf(stderr, "[%d] token = %d == %d * %d ?\n", rank, token, nloops, size); + STARPU_ASSERT(token == nloops*size); + } +\endcode + +We have here replaced MPI_Recv() and MPI_Send() with starpu_mpi_irecv_detached() +and starpu_mpi_isend_detached(), which just submit the communication to be +performed. The implicit sequential consistency dependencies provide +synchronization between MPI reception and emission and the corresponding tasks. +The only remaining synchronization with starpu_data_acquire() is at +the beginning and the end. + +The full source code is available in the file mpi/tests/ring.c. + +\section MPIInitialization How to Initialize StarPU-MPI + +As seen in the previous example, one has to call starpu_mpi_init_conf() to +initialize StarPU-MPI. The third parameter of the function indicates +if MPI should be initialized by StarPU, or if the application did it +itself. If the application initializes MPI itself, it must call +MPI_Init_thread() with MPI_THREAD_SERIALIZED or +MPI_THREAD_MULTIPLE, since StarPU-MPI uses a separate thread to +perform the communications. MPI_THREAD_MULTIPLE is necessary if +the application also performs some MPI communications, or if +STARPU_MPI_THREAD_MULTIPLE_SEND is set to non-zero. + +\section PointToPointCommunication Point To Point Communication + +The standard point to point communications of MPI have been +implemented. The semantic is similar to the MPI one, but adapted to +the DSM provided by StarPU. An MPI request will only be submitted when +the data is available in the main memory of the node submitting the +request. + +There are two types of asynchronous communications: the classic +asynchronous communications and the detached communications. The +classic asynchronous communications (starpu_mpi_isend() and +starpu_mpi_irecv()) need to be followed by a call to +starpu_mpi_wait() or to starpu_mpi_test() to wait for or to +test the completion of the communication. As shown in the example mpi/tests/async_ring.c. Waiting for or testing the completion of detached communications is not possible, this is done +internally by StarPU-MPI, on completion, the resources are +automatically released. This mechanism is similar to the pthread +detach state attribute, which determines whether a thread will be +created in a joinable or a detached state. + +For send communications, data is acquired with the mode ::STARPU_R. +When using the \c configure option +\ref enable-mpi-pedantic-isend "--enable-mpi-pedantic-isend", the mode +::STARPU_RW is used to make sure there is no more than 1 concurrent +\c MPI_Isend() call accessing a data +and StarPU does not read from it from tasks during the communication. + +Internally, all communication are divided in 2 communications, a first +message is used to exchange an envelope describing the data (i.e. its +tag and its size), the data itself is sent in a second message. All +MPI communications submitted by StarPU uses a unique tag, which has a +default value. This value can be accessed with the function +starpu_mpi_get_communication_tag() and changed with the function +starpu_mpi_set_communication_tag(). The matching of tags with +corresponding requests is done within StarPU-MPI. + +For any userland communication, the call of the corresponding function +(e.g. starpu_mpi_isend()) will result in the creation of a StarPU-MPI +request, the function starpu_data_acquire_cb() is then called to +asynchronously request StarPU to fetch the data in main memory; when +the data is ready and the corresponding buffer has already been +received by MPI, it will be copied in the memory of the data, +otherwise the request is stored in the early requests list. Sending +requests are stored in the ready requests list. + +While requests need to be processed, the StarPU-MPI progression thread +does the following: + +
    +
  1. it polls the ready requests list. For all the ready +requests, the appropriate function is called to post the corresponding +MPI call. For example, an initial call to starpu_mpi_isend() will +result in a call to MPI_Isend(). If the request is marked as +detached, the request will then be added to the detached requests +list. +
  2. +
  3. it posts an MPI_Irecv() to retrieve a data envelope. +
  4. +
  5. it polls the detached requests list. For all the detached +requests, it tests its completion of the MPI request by calling +MPI_Test(). On completion, the data handle is released, and if a +callback was defined, it is called. +
  6. +
  7. finally, it checks if a data envelope has been received. If so, +if the data envelope matches a request in the early requests list (i.e. +the request has already been posted by the application), the +corresponding MPI call is posted (similarly to the first step above). + +If the data envelope does not match any application request, a +temporary handle is created to receive the data, a StarPU-MPI request +is created and added into the ready requests list, and thus will be +processed in the first step of the next loop. +
  8. +
+ +To prevent putting too much pressure on the MPI library, only a limited number +of requests are emitted concurrently. This behavior can be tuned with the +environment variable \ref STARPU_MPI_NDETACHED_SEND. In the same fashion, the +progression thread will poll for termination of existing requests after +submitting a defined number of requests. This behavior can be tuned with the +environment variable \ref STARPU_MPI_NREADY_PROCESS. + +The function starpu_mpi_issend() allows to perform a synchronous-mode, +non-blocking send of a data. It can also be specified when using +starpu_mpi_task_insert() with the parameter ::STARPU_SSEND. + +\ref MPIPtpCommunication gives the list of all the +point to point communications defined in StarPU-MPI. + +\section ExchangingUserDefinedDataInterface Exchanging User Defined Data Interface + +New data interfaces defined as explained in \ref DefiningANewDataInterface +can also be used within StarPU-MPI and +exchanged between nodes. Two functions needs to be defined through the +type starpu_data_interface_ops. The function +starpu_data_interface_ops::pack_data takes a handle and returns a +contiguous memory buffer allocated with + +\code{.c} +starpu_malloc_flags(ptr, size, 0) +\endcode + +along with its size, where data to be conveyed +to another node should be copied. + +\code{.c} +static int complex_pack_data(starpu_data_handle_t handle, unsigned node, void **ptr, ssize_t *count) +{ + STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); + + struct starpu_complex_interface *complex_interface = (struct starpu_complex_interface *) starpu_data_get_interface_on_node(handle, node); + + *count = complex_get_size(handle); + *ptr = starpu_malloc_on_node_flags(node, *count, 0); + memcpy(*ptr, complex_interface->real, complex_interface->nx*sizeof(double)); + memcpy(*ptr+complex_interface->nx*sizeof(double), complex_interface->imaginary, complex_interface->nx*sizeof(double)); + + return 0; +} +\endcode + +The inverse operation is +implemented in the function starpu_data_interface_ops::unpack_data which +takes a contiguous memory buffer and recreates the data handle. + +\code{.c} +static int complex_unpack_data(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count) +{ + STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); + + struct starpu_complex_interface *complex_interface = (struct starpu_complex_interface *) starpu_data_get_interface_on_node(handle, node); + + memcpy(complex_interface->real, ptr, complex_interface->nx*sizeof(double)); + memcpy(complex_interface->imaginary, ptr+complex_interface->nx*sizeof(double), complex_interface->nx*sizeof(double)); + + starpu_free_on_node_flags(node, (uintptr_t) ptr, count, 0); + + return 0; +} +\endcode + +And the starpu_data_interface_ops::peek_data operation does +the same, but without freeing the buffer. Of course, one can +implement starpu_data_interface_ops::unpack_data as merely calling +starpu_data_interface_ops::peek_data and do the free: + +\code{.c} +static int complex_peek_data(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count) +{ + STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); + STARPU_ASSERT(count == complex_get_size(handle)); + + struct starpu_complex_interface *complex_interface = (struct starpu_complex_interface *) starpu_data_get_interface_on_node(handle, node); + + memcpy(complex_interface->real, ptr, complex_interface->nx*sizeof(double)); + memcpy(complex_interface->imaginary, ptr+complex_interface->nx*sizeof(double), complex_interface->nx*sizeof(double)); + + return 0; +} +\endcode + +\code{.c} +static struct starpu_data_interface_ops interface_complex_ops = +{ + ... + .pack_data = complex_pack_data, + .peek_data = complex_peek_data + .unpack_data = complex_unpack_data +}; +\endcode + + + +Instead of defining pack and unpack operations, users may want to +attach an MPI type to their user-defined data interface. The function +starpu_mpi_interface_datatype_register() allows doing so. This function takes 3 +parameters: the interface ID for which the MPI datatype is going to be defined, +a function's pointer that will create the MPI datatype, and a function's pointer +that will free the MPI datatype. If for some data an MPI datatype can not be +built (e.g. complex data structure), the creation function can return -1, +StarPU-MPI will then fallback to using pack/unpack. + +The functions to create and free the MPI datatype are defined and registered as +follows. + +\code{.c} +void starpu_complex_interface_datatype_allocate(starpu_data_handle_t handle, MPI_Datatype *mpi_datatype) +{ + int ret; + + int blocklengths[2]; + MPI_Aint displacements[2]; + MPI_Datatype types[2] = {MPI_DOUBLE, MPI_DOUBLE}; + + struct starpu_complex_interface *complex_interface = (struct starpu_complex_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + + MPI_Get_address(complex_interface, displacements); + MPI_Get_address(&complex_interface->imaginary, displacements+1); + displacements[1] -= displacements[0]; + displacements[0] = 0; + + blocklengths[0] = complex_interface->nx; + blocklengths[1] = complex_interface->nx; + + ret = MPI_Type_create_struct(2, blocklengths, displacements, types, mpi_datatype); + STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_contiguous failed"); + + ret = MPI_Type_commit(mpi_datatype); + STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_commit failed"); +} + +void starpu_complex_interface_datatype_free(MPI_Datatype *mpi_datatype) +{ + MPI_Type_free(mpi_datatype); +} + +static struct starpu_data_interface_ops interface_complex_ops = +{ + ... +}; + +interface_complex_ops.interfaceid = starpu_data_interface_get_next_id(); + +starpu_mpi_interface_datatype_register(interface_complex_ops.interfaceid, starpu_complex_interface_datatype_allocate, starpu_complex_interface_datatype_free); + +starpu_data_interface handle; +starpu_complex_data_register(&handle, STARPU_MAIN_RAM, real, imaginary, 2); +... +\endcode + +An example is provided in the file mpi/examples/user_datatype/my_interface.c. + +It is also possible to use starpu_mpi_datatype_register() to register the +functions through a handle rather than the interface ID, but note that in that +case it is important to make sure no communication is going to occur before the +function starpu_mpi_datatype_register() is called. This would otherwise produce +an undefined result as the data may be received before the function is called, +and so the MPI datatype would not be known by the StarPU-MPI communication +engine, and the data would be processed with the pack and unpack operations. One +would thus need to synchronize all nodes: + +\code{.c} +starpu_data_interface handle; +starpu_complex_data_register(&handle, STARPU_MAIN_RAM, real, imaginary, 2); +starpu_mpi_datatype_register(handle, starpu_complex_interface_datatype_allocate, starpu_complex_interface_datatype_free); + +starpu_mpi_barrier(MPI_COMM_WORLD); +\endcode + +\section MPIInsertTaskUtility MPI Insert Task Utility + +To save the programmer from having to specify all communications, StarPU +provides an "MPI Insert Task Utility". The principle is that the application +decides a distribution of the data over the MPI nodes by allocating it and +notifying StarPU of this decision, i.e. tell StarPU which MPI node "owns" +which data. It also decides, for each handle, an MPI tag which will be used to +exchange the content of the handle. All MPI nodes then process the whole task +graph, and StarPU automatically determines which node actually execute which +task, and trigger the required MPI transfers. + +The list of functions is described in \ref MPIInsertTask. + +Here is an stencil example showing how to use starpu_mpi_task_insert(). One +first needs to define a distribution function which specifies the +locality of the data. Note that the data needs to be registered to MPI +by calling starpu_mpi_data_register(). This function allows setting +the distribution information and the MPI tag which should be used when +communicating the data. It also allows to automatically clear the MPI +communication cache when unregistering the data. A basic example is in the file mpi/tests/insert_task.c. + +\code{.c} +/* Returns the MPI node number where data is */ +int my_distrib(int x, int y, int nb_nodes) +{ + /* Block distrib */ + return ((int)(x / sqrt(nb_nodes) + (y / sqrt(nb_nodes)) * sqrt(nb_nodes))) % nb_nodes; + + // /* Other examples useful for other kinds of computations */ + // /* / distrib */ + // return (x+y) % nb_nodes; + + // /* Block cyclic distrib */ + // unsigned side = sqrt(nb_nodes); + // return x % side + (y % side) * size; +} +\endcode + +Now the data can be registered within StarPU. Data which are not +owned but will be needed for computations can be registered through +the lazy allocation mechanism, i.e. with a home_node set to -1. +StarPU will automatically allocate the memory when it is used for the +first time. + +One can note an optimization here (the else if test): we only register +data which will be needed by the tasks that we will execute. + +\code{.c} +unsigned matrix[X][Y]; +starpu_data_handle_t data_handles[X][Y]; + +for(x = 0; x < X; x++) +{ + for (y = 0; y < Y; y++) + { + int mpi_rank = my_distrib(x, y, size); + if (mpi_rank == my_rank) + /* Owning data */ + starpu_variable_data_register(&data_handles[x][y], STARPU_MAIN_RAM, (uintptr_t)&(matrix[x][y]), sizeof(unsigned)); + else if (my_rank == my_distrib(x+1, y, size) || my_rank == my_distrib(x-1, y, size) + || my_rank == my_distrib(x, y+1, size) || my_rank == my_distrib(x, y-1, size)) + /* I don't own this index, but will need it for my computations */ + starpu_variable_data_register(&data_handles[x][y], -1, (uintptr_t)NULL, sizeof(unsigned)); + else + /* I know it's useless to allocate anything for this */ + data_handles[x][y] = NULL; + if (data_handles[x][y]) + { + starpu_mpi_data_register(data_handles[x][y], x*X+y, mpi_rank); + } + } +} +\endcode + +Now starpu_mpi_task_insert() can be called for the different +steps of the application. + +\code{.c} +for(loop=0 ; loopmpi/examples/stencil/stencil5.c. + +I.e. all MPI nodes process the whole task graph, but as mentioned above, for +each task, only the MPI node which owns the data being written to (here, +data_handles[x][y]) will actually run the task. The other MPI nodes will +automatically send the required data. + +To tune the placement of tasks among MPI nodes, one can use +::STARPU_EXECUTE_ON_NODE or ::STARPU_EXECUTE_ON_DATA to specify an explicit +node (an example can be found in mpi/tests/insert_task_node_choice.c), or the node of a given data (e.g. one of the parameters), or use +starpu_mpi_node_selection_register_policy() and ::STARPU_NODE_SELECTION_POLICY +to provide a dynamic policy (an example can be found in mpi/tests/policy_register.c). The default policy is to execute the task on the +node which owns a data that require write access; if the task requires several +data handles with write access, the node executing the task is selected in +order to minimize the amount of data to transfer between nodes. + +A function starpu_mpi_task_build() is also provided with the aim to +only construct the task structure. All MPI nodes need to call the +function, which posts the required send/recv on the various nodes as needed. +Only the node which is to execute the task will then return a +valid task structure, others will return NULL. This node must submit the task. +All nodes then need to call the function starpu_mpi_task_post_build() -- with the same +list of arguments as starpu_mpi_task_build() -- to post all the +necessary data communications meant to happen after the task execution. + +\code{.c} +struct starpu_task *task; +task = starpu_mpi_task_build(MPI_COMM_WORLD, &cl, + STARPU_RW, data_handles[0], + STARPU_R, data_handles[1], + 0); +if (task) starpu_task_submit(task); +starpu_mpi_task_post_build(MPI_COMM_WORLD, &cl, + STARPU_RW, data_handles[0], + STARPU_R, data_handles[1], + 0); +\endcode + +A full source code using these functions is available in the file mpi/tests/insert_task_compute.c. + +It is also possible to create and submit the task outside of StarPU-MPI functions +and call the functions +starpu_mpi_task_exchange_data_before_execution() and +starpu_mpi_task_exchange_data_after_execution() to exchange data as +required by the data ownership's nodes. + +\code{.c} +struct starpu_mpi_task_exchange_params params; +struct starpu_data_descr descrs[2]; +struct starpu_task *task; + +task = starpu_task_create(); +task->cl = &mycodelet; +task->handles[0] = data_handles[0]; +task->handles[1] = data_handles[1]; + +starpu_mpi_task_exchange_data_before_execution(MPI_COMM_WORLD, task, descrs, ¶ms); + +if (params.do_execute) starpu_task_submit(task); + +starpu_mpi_task_exchange_data_after_execution(MPI_COMM_WORLD, descrs, 2, params); +\endcode + +A full source code using these functions is available in the file mpi/tests/mpi_task_submit.c. + +If many data handles must be registered with unique tag ids, or if +multiple applications are concurrently submitting tasks to StarPU, it +is then difficult to keep the uniqueness of the tags for each piece of +data. +StarPU provides a tag management system to allocate/free a unique +range of tags when registering the data to prevent conflict from one +application to another. The previous code then becomes: + +\code{.c} +unsigned matrix[X][Y]; +starpu_data_handle_t data_handles[X][Y]; +int64_t mintag = starpu_mpi_tags_allocate(X*Y); + +for(x = 0; x < X; x++) +{ + for (y = 0; y < Y; y++) + { + ... + if (data_handles[x][y]) + { + starpu_mpi_data_register(data_handles[x][y], mintag + y*Y+x, mpi_rank); + } + } +} +\endcode + +Then, when all these pieces of data have been unregistered, you may +free the range of tags by calling: +\code{.c} +starpu_mpi_tags_free(mintag); +\endcode + +where mintag was the value returned by starpu_mpi_tags_allocate(). + +Note that both these functions should be called by all nodes involved +in the computations in the exact same order and with the same +parameters to keep the tags synchronized between all nodes. + +Also note that StarPU will not check if a tag given to +starpu_mpi_data_register() has been previously registered, this +functionality only aims to prevent different parts of an application +to use the same data tags. + +\section MPITaskUtility Other MPI Utility Functions + +Similarly to the function starpu_data_cpy(), the function +starpu_mpi_data_cpy() can be used to transfer a data between 2 nodes. +It behaves as starpu_data_cpy() if both data are owned by the same +node, otherwise a transfer is initiated between the nodes. A priority +and a callback function can be defined. + +\code{.c} +... +starpu_mpi_data_register(src_handle, 12, 0); // Data is owned by node0 +starpu_mpi_data_register(dst_handle, 42, 1); // Data is owned by node1 +... +// Send data from node0 to node1 +starpu_mpi_data_cpy(dst_handle, src_handle, MPI_COMM_WORLD, 0, callback, NULL); +\endcode + +\section MPIInsertPruning Pruning MPI Task Insertion + +Making all MPI nodes process the whole graph can be a concern with a growing +number of nodes. To avoid this, the +application can prune the task for loops according to the data distribution, +to only submit tasks on nodes which have to care about them (either to +execute them, or to send the required data). + +A way to do some of this quite easily can be to just add an if like this: + +\code{.c} + for(loop=0 ; loopexamples/stencil/implicit-stencil-tasks.c. + +If the my_distrib function can be inlined by the compiler, the latter can +improve the test. + +If the size can be made a compile-time constant, the compiler can +considerably improve the test further. + +If the distribution function is not too complex and the compiler is very good, +the latter can even optimize the for loops, thus dramatically reducing +the cost of task submission. + +To estimate quickly how long task submission takes, and notably how much pruning +saves, a quick and easy way is to measure the submission time of just one of the +MPI nodes. This can be achieved by running the application on just one MPI node +with the following environment variables: + +\code{.sh} +export STARPU_DISABLE_KERNELS=1 +export STARPU_MPI_FAKE_RANK=2 +export STARPU_MPI_FAKE_SIZE=1024 +\endcode + +Here we have disabled the kernel function call to skip the actual computation +time and only keep submission time, and we have asked StarPU to fake running on +MPI node 2 out of 1024 nodes. + +\section MPITemporaryData Temporary Data + +To be able to use starpu_mpi_task_insert(), one has to call +starpu_mpi_data_register(), so that StarPU-MPI can know what it needs to do for +each data. Parameters of starpu_mpi_data_register() are normally the same on all +nodes for a given data, so that all nodes agree on which node owns the data, and +which tag is used to transfer its value. + +It can however be useful to register e.g. some temporary data on just one node, +without having to register a dumb handle on all nodes, while only one node will +actually need to know about it. In this case, nodes which will not need the data +can just pass \c NULL to starpu_mpi_task_insert(): + +\code{.c} +starpu_data_handle_t data0 = NULL; +if (rank == 0) +{ + starpu_variable_data_register(&data0, STARPU_MAIN_RAM, (uintptr_t) &val0, sizeof(val0)); + starpu_mpi_data_register(data0, 0, rank); +} +starpu_mpi_task_insert(MPI_COMM_WORLD, &cl, STARPU_W, data0, 0); /* Executes on node 0 */ +\endcode + +Here, nodes whose rank is not \c 0 will simply not take care of the data, and consider it to be on another node. + +This can be mixed various way, for instance here node \c 1 determines that it does +not have to care about \c data0, but knows that it should send the value of its +\c data1 to node \c 0, which owns data and thus will need the value of \c data1 to execute the task: + +\code{.c} +starpu_data_handle_t data0 = NULL, data1, data; +if (rank == 0) +{ + starpu_variable_data_register(&data0, STARPU_MAIN_RAM, (uintptr_t) &val0, sizeof(val0)); + starpu_mpi_data_register(data0, -1, rank); + starpu_variable_data_register(&data1, -1, 0, sizeof(val1)); + starpu_variable_data_register(&data, STARPU_MAIN_RAM, (uintptr_t) &val, sizeof(val)); +} +else if (rank == 1) +{ + starpu_variable_data_register(&data1, STARPU_MAIN_RAM, (uintptr_t) &val1, sizeof(val1)); + starpu_variable_data_register(&data, -1, 0, sizeof(val)); +} +starpu_mpi_data_register(data, 42, 0); +starpu_mpi_data_register(data1, 43, 1); +starpu_mpi_task_insert(MPI_COMM_WORLD, &cl, STARPU_W, data, STARPU_R, data0, STARPU_R, data1, 0); /* Executes on node 0 */ +\endcode + +The full source code is available in the file mpi/tests/temporary.c. + +\section MPIPerNodeData Per-node Data + +Further than temporary data on just one node, one may want per-node data, +to e.g. replicate some computation because that is less expensive than +communicating the value over MPI: + +\code{.c} +starpu_data_handle pernode, data0, data1; +starpu_variable_data_register(&pernode, -1, 0, sizeof(val)); +starpu_mpi_data_register(pernode, -1, STARPU_MPI_PER_NODE); + +/* Normal data: one on node0, one on node1 */ +if (rank == 0) +{ + starpu_variable_data_register(&data0, STARPU_MAIN_RAM, (uintptr_t) &val0, sizeof(val0)); + starpu_variable_data_register(&data1, -1, 0, sizeof(val1)); +} +else if (rank == 1) +{ + starpu_variable_data_register(&data0, -1, 0, sizeof(val1)); + starpu_variable_data_register(&data1, STARPU_MAIN_RAM, (uintptr_t) &val1, sizeof(val1)); +} +starpu_mpi_data_register(data0, 42, 0); +starpu_mpi_data_register(data1, 43, 1); + +starpu_mpi_task_insert(MPI_COMM_WORLD, &cl, STARPU_W, pernode, 0); /* Will be replicated on all nodes */ + +starpu_mpi_task_insert(MPI_COMM_WORLD, &cl2, STARPU_RW, data0, STARPU_R, pernode); /* Will execute on node 0, using its own pernode*/ +starpu_mpi_task_insert(MPI_COMM_WORLD, &cl2, STARPU_RW, data1, STARPU_R, pernode); /* Will execute on node 1, using its own pernode*/ +\endcode + +One can turn a normal data into per-node data, by first broadcasting it to all nodes: + +\code{.c} +starpu_data_handle data; +starpu_variable_data_register(&data, -1, 0, sizeof(val)); +starpu_mpi_data_register(data, 42, 0); + +/* Compute some value */ +starpu_mpi_task_insert(MPI_COMM_WORLD, &cl, STARPU_W, data, 0); /* Node 0 computes it */ + +/* Get it on all nodes */ +starpu_mpi_get_data_on_all_nodes_detached(MPI_COMM_WORLD, data); +/* And turn it per-node */ +starpu_mpi_data_set_rank(data, STARPU_MPI_PER_NODE); +\endcode + +The data can then be used just like per-node above. + +The full source code is available in the file mpi/tests/temporary.c. + +\section MPIMpiRedux Inter-node reduction + +One might want to leverage a reduction pattern across several nodes. +Using ::STARPU_REDUX (see \ref DataReduction), one can obtain such patterns where each +core on contributing nodes spawns their own copy to work with. In the case that the +required reductions are too numerous and expensive, the access mode ::STARPU_MPI_REDUX +tells StarPU to spawn only one contribution per contributing node. + +The setup and use of ::STARPU_MPI_REDUX is similar to ::STARPU_REDUX : the initialization and +reduction codelets should be declared through starpu_data_set_reduction_methods() in the +same fashion as ::STARPU_REDUX. Example mpi/examples/mpi_redux/mpi_redux.c shows how to use the ::STARPU_MPI_REDUX mode +and compare it with the standard ::STARPU_REDUX. The function starpu_mpi_redux_data() is automatically called +either when a task reading the reduced handle is inserted through the MPI layer of StarPU through +starpu_mpi_insert_task() or when users wait for all communications and tasks to be executed +through starpu_mpi_wait_for_all(). The function can be called by users to fine-tune arguments +such as the priority of the reduction tasks. +Tasks contributing to the inter-node reduction should be registered as +accessing the contribution through ::STARPU_RW|::STARPU_COMMUTE mode, as for the +::STARPU_REDUX mode, as in the following example. + +\code{.c} +static struct starpu_codelet contrib_cl = +{ + .cpu_funcs = {cpu_contrib}, /* cpu implementation(s) of the routine */ + .nbuffers = 1, /* number of data handles referenced by this routine */ + .modes = {STARPU_RW | STARPU_COMMUTE} /* access modes for the contribution */ + .name = "contribution" +}; +\endcode + +When inserting these tasks, the access mode handed out to the StarPU-MPI layer +should be \c STARPU_MPI_REDUX. If a task uses a \c data owned by node 0 and is executed +on the node 1, it can be inserted as in the following example. + +\code{.c} +starpu_mpi_task_insert(MPI_COMM_WORLD, &contrib_cl, STARPU_MPI_REDUX, data, STARPU_EXECUTE_ON_NODE, 1); /* Node 1 computes it */ +\endcode + +Note that if the specified node is set to \c -1, the option is ignored. + +More examples are available at \c mpi/examples/mpi_redux/mpi_redux.c and \c mpi/examples/mpi_redux/mpi_redux_tree.c. + +\section MPIPriorities Priorities + +All send functions have a _prio variant which takes an additional +priority parameter, which allows making StarPU-MPI change the order of MPI +requests before submitting them to MPI. The default priority is \c 0. + +When using the starpu_mpi_task_insert() helper, ::STARPU_PRIORITY defines both the +task priority and the MPI requests priority. An example is available in the file mpi/examples/benchs/recv_wait_finalize_bench.c. + +To test how much MPI priorities have a good effect on performance, you can +set the environment variable \ref STARPU_MPI_PRIORITIES to \c 0 to disable the use of +priorities in StarPU-MPI. + +\section MPICache MPI Cache Support + +StarPU-MPI automatically optimizes duplicate data transmissions: if an MPI +node \c B needs a piece of data \c D from MPI node \c A for several tasks, only one +transmission of \c D will take place from \c A to \c B, and the value of \c D will be kept +on \c B as long as no task modifies \c D. + +If a task modifies \c D, \c B will wait for all tasks which need the previous value of +\c D, before invalidating the value of \c D. As a consequence, it releases the memory +occupied by \c D. Whenever a task running on \c B needs the new value of \c D, allocation +will take place again to receive it. + +Since tasks can be submitted dynamically, StarPU-MPI can not know whether the +current value of data \c D will again be used by a newly-submitted task before +being modified by another newly-submitted task, so until a task is submitted to +modify the current value, it can not decide by itself whether to flush the cache +or not. The application can however explicitly tell StarPU-MPI to flush the +cache by calling starpu_mpi_cache_flush() or starpu_mpi_cache_flush_all_data(), +for instance in case the data will not be used at all anymore (see for instance +the cholesky example in mpi/examples/matrix_decomposition), or at least not in +the close future. If a newly-submitted task actually needs the value again, +another transmission of \c D will be initiated from \c A to \c B. A mere +starpu_mpi_cache_flush_all_data() can for instance be added at the end of the whole +algorithm, to express that no data will be reused after this (or at least that +it is not interesting to keep them in cache). It may however be interesting to +add fine-graph starpu_mpi_cache_flush() calls during the algorithm; the effect +for the data deallocation will be the same, but it will additionally release some +pressure from the StarPU-MPI cache hash table during task submission. + +One can determine whether a piece of data is cached with +starpu_mpi_cached_receive() and starpu_mpi_cached_send(). An example is available in the file mpi/examples/cache/cache.c. + +Functions starpu_mpi_cached_receive_set() and +starpu_mpi_cached_send_set() are automatically called by +starpu_mpi_task_insert() but can also be called directly by the +application. Functions starpu_mpi_cached_send_clear() and +starpu_mpi_cached_receive_clear() must be called to clear data from +the cache. They are also automatically called when using +starpu_mpi_task_insert(). + +The whole caching behavior can be disabled thanks to the \ref STARPU_MPI_CACHE +environment variable. The variable \ref STARPU_MPI_CACHE_STATS can be set to 1 +to enable the runtime to display messages when data are added or removed +from the cache holding the received data. + +\section MPIMigration MPI Data Migration + +The application can dynamically change its mind about the data distribution, to +balance the load over MPI nodes, for instance. This can be done very simply by +requesting an explicit move and then change the registered rank. For instance, +we here switch to a new distribution function my_distrib2: we first +register any data which wasn't registered already and will be needed, then +migrate the data, and register the new location. + +\code{.c} + for(x = 0; x < X; x++) + { + for (y = 0; y < Y; y++) + { + int mpi_rank = my_distrib2(x, y, size); + if (!data_handles[x][y] && (mpi_rank == my_rank + || my_rank == my_distrib(x+1, y, size) || my_rank == my_distrib(x-1, y, size) + || my_rank == my_distrib(x, y+1, size) || my_rank == my_distrib(x, y-1, size))) + /* Register newly-needed data */ + starpu_variable_data_register(&data_handles[x][y], -1, (uintptr_t)NULL, sizeof(unsigned)); + if (data_handles[x][y]) + { + /* Migrate the data */ + starpu_mpi_data_migrate(MPI_COMM_WORLD, data_handles[x][y], mpi_rank); + } + } + } +\endcode + +The full example is available in the file mpi/examples/stencil/stencil5.c. +From then on, further tasks submissions will use the new data distribution, +which will thus change both MPI communications and task assignments. + +Very importantly, since all nodes have to agree on which node owns which data +to determine MPI communications and task assignments the same way, all +nodes have to perform the same data migration, and at the same point among task +submissions. It thus does not require a strict synchronization, just a clear +separation of task submissions before and after the data redistribution. + +Before data unregistration, it has to be migrated back to its original home +node (the value, at least), since that is where the user-provided buffer +resides. Otherwise, the unregistration will complain that it does not have the +latest value on the original home node. + +\code{.c} + for(x = 0; x < X; x++) + { + for (y = 0; y < Y; y++) + { + if (data_handles[x][y]) + { + int mpi_rank = my_distrib(x, y, size); + /* Get back data to original place where the user-provided buffer is. */ + starpu_mpi_data_migrate(MPI_COMM_WORLD, data_handles[x][y], mpi_rank); + /* And unregister it */ + starpu_data_unregister(data_handles[x][y]); + } + } + } +\endcode + +\section MPICollective MPI Collective Operations + +The functions are described in \ref MPICollectiveOperations. + +\code{.c} +if (rank == root) +{ + /* Allocate the vector */ + vector = malloc(nblocks * sizeof(float *)); + for(x=0 ; xmpi/tests/mpi_scatter_gather.c. + +With NewMadeleine (see \ref Nmad), broadcasts can automatically be detected and +be optimized by using routing trees. This behavior can be controlled with the +environment variable \ref STARPU_MPI_COOP_SENDS. See the corresponding +[paper](https://hal.inria.fr/hal-02872765) for more information. + +Other collective operations would be easy to define, just ask starpu-devel for +them! + +\section MPIDriver Make StarPU-MPI Progression Thread Execute Tasks + +The default behavior of StarPU-MPI is to spawn an MPI thread to take care only +of MPI communications in an active fashion (i.e. the StarPU-MPI thread sleeps +only when there are no active request submitted by the application), with the +goal of being as reactive as possible to communications. Knowing that, users +usually leave one free core for the MPI thread when starting a distributed +execution with StarPU-MPI. However, this could result in a loss of performance +for applications that does not require an extreme reactivity to MPI +communications. + +The starpu_mpi_init_conf() routine allows users to give the +starpu_conf configuration structure of StarPU (usually given to the +starpu_init() routine) to StarPU-MPI, so that StarPU-MPI reserves for its own +use one of the CPU drivers of the current computing node, or one of the CPU +cores, and then calls starpu_init() internally. + +This allows the MPI communication thread to call a StarPU CPU driver to run +tasks when there is no active requests to take care of, and thus recover the +computational power of the "lost" core. Since there is a trade-off between +executing tasks and polling MPI requests, which is how much the application +wants to lose in reactivity to MPI communications to get back the computing +power of the core dedicated to the StarPU-MPI thread, there are two environment +variables to pilot the behavior of the MPI thread so that users can tune +this trade-off depending on the behavior of the application. + +The \ref STARPU_MPI_DRIVER_CALL_FREQUENCY environment variable sets how many times +the MPI progression thread goes through the MPI_Test() loop on each active communication request +(and thus try to make communications progress by going into the MPI layer) +before executing tasks. The default value for this environment variable is 0, +which means that the support for interleaving task execution and communication +polling is deactivated, thus returning the MPI progression thread to its +original behavior. + +The \ref STARPU_MPI_DRIVER_TASK_FREQUENCY environment variable sets how many tasks +are executed by the MPI communication thread before checking all active +requests again. While this environment variable allows a better use of the core +dedicated to StarPU-MPI for computations, it also decreases the reactivity of +the MPI communication thread as much. + +\section MPIDebug Debugging MPI + +Communication trace will be enabled when the environment variable +\ref STARPU_MPI_COMM is set to \c 1, and StarPU has been configured with the +option \ref enable-verbose "--enable-verbose". + +Statistics will be enabled for the communication cache when the +environment variable \ref STARPU_MPI_CACHE_STATS is set to \c 1. It +prints messages on the standard output when data are added or removed +from the received communication cache. + +When the environment variable \ref STARPU_MPI_STATS is set to \c 1, +StarPU will display at the end of the execution for each node the +volume and the bandwidth of data sent to all the other nodes. +Communication statistics can also be enabled and disabled from the +application by calling the functions starpu_mpi_comm_stats_enable() +and starpu_mpi_comm_stats_disable(). If communication statistics have +been enabled, calling the function starpu_mpi_comm_stats_retrieve() +will give the amount of communications between the calling node and all +the other nodes. Communication statistics will also be automatically +displayed at the end of the execution, as examplified below. + +\verbatim +[starpu_comm_stats][3] TOTAL: 476.000000 B 0.000454 MB 0.000098 B/s 0.000000 MB/s +[starpu_comm_stats][3:0] 248.000000 B 0.000237 MB 0.000051 B/s 0.000000 MB/s +[starpu_comm_stats][3:2] 50.000000 B 0.000217 MB 0.000047 B/s 0.000000 MB/s + +[starpu_comm_stats][2] TOTAL: 288.000000 B 0.000275 MB 0.000059 B/s 0.000000 MB/s +[starpu_comm_stats][2:1] 70.000000 B 0.000103 MB 0.000022 B/s 0.000000 MB/s +[starpu_comm_stats][2:3] 288.000000 B 0.000172 MB 0.000037 B/s 0.000000 MB/s + +[starpu_comm_stats][1] TOTAL: 188.000000 B 0.000179 MB 0.000038 B/s 0.000000 MB/s +[starpu_comm_stats][1:0] 80.000000 B 0.000114 MB 0.000025 B/s 0.000000 MB/s +[starpu_comm_stats][1:2] 188.000000 B 0.000065 MB 0.000014 B/s 0.000000 MB/s + +[starpu_comm_stats][0] TOTAL: 376.000000 B 0.000359 MB 0.000077 B/s 0.000000 MB/s +[starpu_comm_stats][0:1] 376.000000 B 0.000141 MB 0.000030 B/s 0.000000 MB/s +[starpu_comm_stats][0:3] 10.000000 B 0.000217 MB 0.000047 B/s 0.000000 MB/s +\endverbatim + +These statistics can be plotted as heatmaps using the StarPU tool +starpu_mpi_comm_matrix.py, this will produce 2 PDF files, one +plot for the bandwidth, and one plot for the data volume. + +\image latex trace_bw_heatmap.png "Bandwidth Heatmap" width=0.5\textwidth +\image html trace_bw_heatmap.png "Bandwidth Heatmap" + +\image latex trace_volume_heatmap.png "Data Volume Heatmap" width=0.5\textwidth +\image html trace_volume_heatmap.png "Data Bandwidth Heatmap" + +\section MPIExamples More MPI examples + +MPI examples are available in the StarPU source code in mpi/examples: + +
    +
  • +comm shows how to use communicators with StarPU-MPI +
  • +
  • +complex is a simple example using a user-define data interface over +MPI (complex numbers), +
  • +
  • +stencil5 is a simple stencil example using starpu_mpi_task_insert(), +
  • +
  • +matrix_decomposition is a cholesky decomposition example using +starpu_mpi_task_insert(). The non-distributed version can check for + +
  • +mpi_lu is an LU decomposition example, provided in three versions: +plu_example uses explicit MPI data transfers, plu_implicit_example +uses implicit MPI data transfers, plu_outofcore_example uses implicit MPI +data transfers and supports data matrices which do not fit in memory (out-of-core). +
  • +
+ + +\section Nmad Using the NewMadeleine communication library + +NewMadeleine (see https://pm2.gitlabpages.inria.fr/newmadeleine/, part of the PM2 +project) is an optimizing communication library for high-performance networks. +NewMadeleine provides its own interface, but also an MPI interface (called +MadMPI). Thus, there are two possibilities to use NewMadeleine with StarPU: + +
    +
  • +using the NewMadeleine's native interface. StarPU supports this interface from +its release 1.3.0, by enabling the \c configure option \ref enable-nmad +"--enable-nmad". In this case, StarPU relies directly on NewMadeleine to make +communications progress and NewMadeleine has to be built with the profile +pukabi+madmpi.conf. +
  • +
  • +using the NewMadeleine's MPI interface (MadMPI). StarPU will use the standard +MPI API and NewMadeleine will handle the calls to the MPI API. In this case, +StarPU makes communications progress and thus communication progress has to be +disabled in NewMadeleine by compiling it with the profile +pukabi+madmpi-mini.conf. +
  • +
+ +To build NewMadeleine, download the latest version from the website (or, +better, use the Git version to use the most recent version), then: + +\code{.sh} +cd pm2/scripts +./pm2-build-packages ./ --prefix= +\endcode + +With Guix, the NewMadeleine's native interface can be used by setting the +parameter \c \-\-with-input=openmpi=nmad and MadMPI can be used with \c +\-\-with-input=openmpi=nmad-mini. + +Whatever implementation (NewMadeleine or MadMPI) is used by StarPU, the public +MPI interface of StarPU (described in \ref API_MPI_Support) is the same. + + + +\section MPIMasterSlave MPI Master Slave Support + +StarPU provides another way to execute applications across many +nodes. The Master Slave support permits to use remote cores without +thinking about data distribution. This support can be activated with +the \c configure option \ref enable-mpi-master-slave +"--enable-mpi-master-slave". However, you should not activate both MPI +support and MPI Master-Slave support. + +The existing kernels for CPU devices can be used as such. They only have to be +exposed through the name of the function in the \ref starpu_codelet::cpu_funcs_name field. +Functions have to be globally-visible (i.e. not static) for StarPU to +be able to look them up, and -rdynamic must be passed to gcc (or +-export-dynamic to ld) so that symbols of the main program are visible. + +By default, one core is dedicated on the master node to manage the +entire set of slaves. If the implementation of MPI you are using has a +good multiple threads support, you can set the \ref +STARPU_MPI_MS_MULTIPLE_THREAD environment variable to 1 to dedicate one core per +slave. + +Choosing the number of cores on each slave device is done by setting +the environment variable \ref STARPU_NMPIMSTHREADS "STARPU_NMPIMSTHREADS=\" +with \ being the requested number of cores. By default, +all the slave's cores are used. + +Setting the number of slaves nodes is done by changing the -np +parameter when executing the application with mpirun or mpiexec. + +The master node is by default the node with the MPI rank equal to 0. +To select another node, use the environment variable \ref +STARPU_MPI_MASTER_NODE "STARPU_MPI_MASTER_NODE=\" with +\ being the requested MPI rank node. + +A simple example tests/main/insert_task.c can be used to test the MPI master slave support. + +\section MPICheckpoint MPI Checkpoint Support + +StarPU provides an experimental checkpoint mechanism. It is for now only a proof +of concept to see what the checkpointing cost is, since the restart part has not +been integrated yet. + +To enable checkpointing, you should use +the \c configure option \ref enable-mpi-ft "--enable-mpi-ft". The +application in the directory \c mpi/examples/matrix_decomposition +shows how to enable checkpoints. The API documentation is available in +\ref API_MPI_FT_Support + +Statistics can also be enabled with the \c configure option \ref +enable-mpi-ft-stats "--enable-mpi-ft-stats". + +*/ diff --git a/doc/doxygen/chapters/starpu_extensions/opencl_support.doxy b/doc/doxygen/chapters/starpu_extensions/opencl_support.doxy new file mode 100644 index 0000000..3d5d8c7 --- /dev/null +++ b/doc/doxygen/chapters/starpu_extensions/opencl_support.doxy @@ -0,0 +1,28 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/*! \page OpenCLSupport OpenCL Support + +StarPU provides several functions for managing OpenCL programs and kernels. starpu_opencl_load_program_source() and starpu_opencl_load_program_source_malloc() load the OpenCL program source from a file, but the latter one also allocates buffer for the program source. +starpu_opencl_compile_opencl_from_file() and starpu_opencl_compile_opencl_from_string() are used to compile an OpenCL kernel from a source file or a string respectively. starpu_opencl_load_binary_opencl() is used to compile the binary OpenCL kernel. An example is available in examples/binary/binary.c. + +starpu_opencl_load_opencl_from_file() and starpu_opencl_load_opencl_from_string() are used to compile an OpenCL source code from a file or a string respectively. starpu_opencl_unload_opencl() is used to unload an OpenCL compiled program or kernel from memory. starpu_opencl_load_opencl() is used to create an OpenCL kernel for specified device. starpu_opencl_release_kernel() is used to release the specified OpenCL kernel. An example illustrating the usage of OpenCL support is available in examples/basic_examples/vector_scal_opencl.c. + +For managing OpenCL contexts, devices, and command queues, there are several functions: starpu_opencl_get_context(), starpu_opencl_get_device() and starpu_opencl_get_queue() are used to retrieve the OpenCL context, device and command queue associated with a given device number respectively. starpu_opencl_get_current_context() and starpu_opencl_get_current_queue() are used to retrieve the OpenCL context or command queue of the current worker that is being used by the calling thread. We can call starpu_opencl_set_kernel_args() to set the arguments for an OpenCL kernel. Examples are available in examples/filters/custom_mf/. + +Two functions are useful for debugging and error reporting in OpenCL applications. starpu_opencl_error_string() takes an OpenCL error code as an argument and returns a string containing a description of the error. starpu_opencl_display_error() takes an OpenCL error code as an argument and prints the corresponding error message to the standard error stream. + +*/ diff --git a/doc/doxygen/chapters/starpu_extensions/out_of_core.doxy b/doc/doxygen/chapters/starpu_extensions/out_of_core.doxy new file mode 100644 index 0000000..c746328 --- /dev/null +++ b/doc/doxygen/chapters/starpu_extensions/out_of_core.doxy @@ -0,0 +1,240 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Corentin Salingue + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/*! \page OutOfCore Out Of Core + +\section OutOfCore_Introduction Introduction + +When using StarPU, one may need to store more data than what the main memory +(RAM) can store. This part describes the method to add a new memory node on a +disk and to use it. + +Similarly to what happens with GPUs (it's actually exactly the same code), when +available main memory becomes scarce, StarPU will evict unused data to the disk, +thus leaving room for new allocations. Whenever some evicted data is needed +again for a task, StarPU will automatically fetch it back from the disk. + +The principle is that one first registers a disk memory node with a set of functions to manipulate +data by calling starpu_disk_register(), and then registers a disk location, seen by StarPU as a +void*, which can be for instance a Unix path for the \c stdio, \c unistd or +\c unistd_o_direct backends, or a leveldb database for the \c leveldb backend, an HDF5 +file path for the \c HDF5 backend, etc. The \c disk backend opens this place with the +plug() method. + +StarPU can then start using it to allocate room and store data there with the +disk write method, without user intervention. + +Users can also use starpu_disk_open() to explicitly open an object within the +disk, e.g. a file name in the \c stdio or \c unistd cases, or a database key in the +\c leveldb case, and then use starpu_*_register functions to turn it into a StarPU +data handle. StarPU will then use this file as an external source of data, and +automatically read and write data as appropriate. In the end use starpu_disk_close() to close an existing object. + +In any case, users also need to set \ref STARPU_LIMIT_CPU_MEM to the amount of +data that StarPU will be allowed to afford. By default, StarPU will use the +machine memory size, but part of it is taken by the kernel, the system, +daemons, and the application's own allocated data, whose size can not be +predicted. That is why users need to specify what StarPU can afford. + +Some Out-of-core tests are worth giving a read, see tests/disk/*.c + +\section UseANewDiskMemory Use a new disk memory + +To use a disk memory node, you have to register it with this function: + +\code{.c} +int new_dd = starpu_disk_register(&starpu_disk_unistd_ops, (void *) "/tmp/", 1024*1024*200); +\endcode + +Here, we use the \c unistd library to realize the read/write operations, i.e. +\c fread/\c fwrite. This structure must have a path where to store files, as well as +the maximum size the software can afford to store on the disk. + +Don't forget to check if the result is correct! + +This can also be achieved by just setting environment variables \ref STARPU_DISK_SWAP, \ref STARPU_DISK_SWAP_BACKEND and \ref STARPU_DISK_SWAP_SIZE : + +\verbatim +export STARPU_DISK_SWAP=/tmp +export STARPU_DISK_SWAP_BACKEND=unistd +export STARPU_DISK_SWAP_SIZE=200 +\endverbatim + +The backend can be set to \c stdio (some caching is done by \c libc and the kernel), \c unistd (only +caching in the kernel), \c unistd_o_direct (no caching), \c leveldb, or \c hdf5. + +It is important to understand that when the backend is not set to \c +unistd_o_direct, some caching will occur at the kernel level (the page cache), +which will also consume memory... \ref STARPU_LIMIT_CPU_MEM might need to be set +to less than half of the machine memory just to leave room for the kernel's +page cache, otherwise the kernel will struggle to get memory. Using \c +unistd_o_direct avoids this caching, thus allowing to set \ref STARPU_LIMIT_CPU_MEM +to the machine memory size (minus some memory for normal kernel operations, +system daemons, and application data). + +When the register call is made, StarPU will benchmark the disk. This can +take some time. + +Warning: the size thus has to be at least \ref STARPU_DISK_SIZE_MIN bytes ! + +StarPU will then automatically try to evict unused data to this new disk. One +can also use the standard StarPU memory node API to prefetch data etc., see the +\ref API_Standard_Memory_Library and the \ref API_Data_Interfaces. + +The disk is unregistered during the execution of starpu_shutdown(). + +\section OOCDataRegistration Data Registration + +StarPU will only be able to achieve Out-Of-Core eviction if it controls memory +allocation. For instance, if the application does the following: + +\code{.c} +p = malloc(1024*1024*sizeof(float)); +fill_with_data(p); +starpu_matrix_data_register(&h, STARPU_MAIN_RAM, (uintptr_t) p, 1024, 1024, 1024, sizeof(float)); +\endcode + +StarPU will not be able to release the corresponding memory since it's the +application which allocated it, and StarPU can not know how, and thus how to +release it. One thus have to use the following instead: + +\code{.c} +starpu_matrix_data_register(&h, -1, NULL, 1024, 1024, 1024, sizeof(float)); +starpu_task_insert(cl_fill_with_data, STARPU_W, h, 0); +\endcode + +Which makes StarPU automatically do the allocation when the task running +cl_fill_with_data gets executed. And then if it needs to, it will be able to +release it after having pushed the data to the disk. Since no initial buffer is +provided to starpu_matrix_data_register(), the handle does not have any initial +value right after this call, and thus the very first task using the handle needs +to use the ::STARPU_W mode like above, ::STARPU_R or ::STARPU_RW would not make +sense. + +By default, StarPU will try to push any data handle to the disk. +To specify whether a given handle should be pushed to the disk, +starpu_data_set_ooc_flag() should be used. To get to know whether a given handle should be pushed to the disk, starpu_data_get_ooc_flag() should be used. + +\section OOCWontUse Using Wont Use + +By default, StarPU uses a Least-Recently-Used (LRU) algorithm to determine +which data should be evicted to the disk. This algorithm can be hinted +by telling which data will not be used in the coming future thanks to +starpu_data_wont_use(), for instance: + +\code{.c} +starpu_task_insert(&cl_work, STARPU_RW, h, 0); +starpu_data_wont_use(h); +\endcode + +StarPU will mark the data as "inactive" and tend to evict to the disk that data +rather than others. + +\section ExampleDiskCopy Examples: disk_copy + +\snippet disk_copy.c To be included. You should update doxygen if you see this text. + +The full code is provided in the file tests/disk/disk_copy.c + +\section ExampleDiskCompute Examples: disk_compute + +\snippet disk_compute.c To be included. You should update doxygen if you see this text. + +The full code is provided in the file tests/disk/disk_compute.c + +\section Performances + +Scheduling heuristics for Out-of-core are still relatively experimental. The +tricky part is that you usually have to find a compromise between privileging +locality (which avoids back and forth with the disk) and privileging the +critical path, i.e. taking into account priorities to avoid lack of parallelism +at the end of the task graph. + +It is notably better to avoid defining different priorities to tasks with low +priority, since that will make the scheduler want to schedule them by levels of +priority, at the expense of locality. + +The scheduling algorithms worth trying are thus dmdar and +lws, which privilege data locality over priorities. There will be +work on this area in the coming future. + +\section FeedBackFigures Feedback Figures + +Beyond pure performance feedback, some figures are interesting to have a look at. + +Using export STARPU_BUS_STATS=1 (\ref STARPU_BUS_STATS and \ref STARPU_BUS_STATS_FILE +to define a filename in which to display statistics, by default the +standard error stream is used) gives an overview of the data +transfers which were needed. The values can also be obtained at runtime +by using starpu_bus_get_profiling_info(). An example can be read in +src/profiling/profiling_helpers.c. + +\verbatim +#--------------------- +Data transfer speed for /tmp/sthibault-disk-DJzhAj (node 1): +0 -> 1: 99 MB/s +1 -> 0: 99 MB/s +0 -> 1: 23858 µs +1 -> 0: 23858 µs + +#--------------------- +TEST DISK MEMORY + +#--------------------- +Data transfer stats: + Disk 0 -> NUMA 0 0.0000 GB 0.0000 MB/s (transfers : 0 - avg -nan MB) + NUMA 0 -> Disk 0 0.0625 GB 63.6816 MB/s (transfers : 2 - avg 32.0000 MB) +Total transfers: 0.0625 GB +#--------------------- +\endverbatim + +Using export STARPU_ENABLE_STATS=1 gives information for each memory node +on data miss/hit and allocation miss/hit. + +\verbatim +#--------------------- +MSI cache stats : +memory node NUMA 0 + hit : 32 (66.67 %) + miss : 16 (33.33 %) +memory node Disk 0 + hit : 0 (0.00 %) + miss : 0 (0.00 %) +#--------------------- + +#--------------------- +Allocation cache stats: +memory node NUMA 0 + total alloc : 16 + cached alloc: 0 (0.00 %) +memory node Disk 0 + total alloc : 8 + cached alloc: 0 (0.00 %) +#--------------------- +\endverbatim + +\section DiskFunctions Disk functions + +There are various ways to operate a disk memory node, described by the structure +starpu_disk_ops. For instance, the variable #starpu_disk_unistd_ops +uses read/write functions. + +All structures are in \ref API_Out_Of_Core. + +Examples are provided in src/core/disk_ops/disk_*.c + +*/ diff --git a/doc/doxygen/chapters/starpu_extensions/parallel_worker.doxy b/doc/doxygen/chapters/starpu_extensions/parallel_worker.doxy new file mode 100644 index 0000000..582201e --- /dev/null +++ b/doc/doxygen/chapters/starpu_extensions/parallel_worker.doxy @@ -0,0 +1,265 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/*! \page ParallelWorker Creating Parallel Workers On A Machine + +\section ParallelWorkerGeneralIdeas General Ideas + +Parallel workers are a concept introduced in this +paper where they are called clusters. + +The granularity problem is tackled by using resource aggregation: +instead of dynamically splitting tasks, resources are aggregated +to process coarse grain tasks in a parallel fashion. This is built on +top of scheduling contexts to be able to handle any type of parallel +tasks. + +This comes from a basic idea, making use of two levels of parallelism +in a DAG. +We keep the DAG parallelism, but consider on top of it that a task can +contain internal parallelism. A good example is if each task in the DAG +is OpenMP enabled. + +The particularity of such tasks is that we will combine the power of two +runtime systems: StarPU will manage the DAG parallelism and another +runtime (e.g. OpenMP) will manage the internal parallelism. The challenge +is in creating an interface between the two runtime systems so that StarPU +can regroup cores inside a machine (creating what we call a parallel worker) on +top of which the parallel tasks (e.g. OpenMP tasks) will be run in a +contained fashion. + +The aim of the parallel worker API is to facilitate this process automatically. +For this purpose, we depend on the \c hwloc tool to detect the +machine configuration and then partition it into usable parallel workers. + +An example of code running on parallel workers is available in +examples/sched_ctx/parallel_workers.c. + +Let's first look at how to create a parallel worker. + +To enable parallel workers in StarPU, one needs to set the configure option +\ref enable-parallel-worker "--enable-parallel-worker". + +\section CreatingParallel Workers Creating Parallel Workers + +Partitioning a machine into parallel workers with the parallel worker API is fairly +straightforward. The simplest way is to state under which machine +topology level we wish to regroup all resources. This level is a \c hwloc +object, of the type hwloc_obj_type_t. More information can be found in the +hwloc +documentation. + +Once a parallel worker is created, the full machine is represented with an opaque +structure starpu_parallel_worker_config. This can be printed to show the +current machine state. + +\code{.c} +struct starpu_parallel_worker_config *parallel_workers; +parallel_workers = starpu_parallel_worker_init(HWLOC_OBJ_SOCKET, 0); +starpu_parallel_worker_print(parallel_workers); + +/* submit some tasks with OpenMP computations */ + +starpu_parallel_worker_shutdown(parallel_workers); +/* we are back to the default StarPU state */ +\endcode + +The following graphic is an example of what a particular machine can +look like once parallel workers are created. The main difference is that we have less +worker queues and tasks which will be executed on several resources at +once. The execution of these tasks will be left to the internal runtime +system, represented with a dashed box around the resources. + +\image latex runtime-par.png "StarPU using parallel tasks" width=0.5\textwidth +\image html runtime-par.png "StarPU using parallel tasks" + +Creating parallel workers as shown in the example above will create workers able to +execute OpenMP code by default. The parallel worker creation function +starpu_parallel_worker_init() takes optional parameters after the \c hwloc +object (always terminated by the value \c 0) which allow parametrizing the +parallel workers creation. These parameters can help to create parallel workers of a +type different from OpenMP, or create a more precise partition of the +machine. + +This is explained in Section \ref CreatingCustomParallelWorkers. + +Before starpu_shutdown(), we call starpu_parallel_worker_shutdown() to delete the parallel worker configuration. + +\section ExampleOfConstrainingOpenMP Example Of Constraining OpenMP + +Parallel workers require being able to constrain the runtime managing the internal +task parallelism (internal runtime) to the resources set by StarPU. The +purpose of this is to express how StarPU must communicate with the internal +runtime to achieve the required cooperation. In the case of OpenMP, StarPU +will provide an awake thread from the parallel worker to execute this liaison. It +will then provide on demand the process ids of the other resources supposed +to be in the region. Finally, thanks to an OpenMP region, we can create the +required number of threads and bind each of them on the correct region. +These will then be reused each time we encounter a \#pragma omp +parallel in the following computations of our program. + +The following graphic is an example of what an OpenMP-type parallel worker looks +like and how it is represented in StarPU. We can see that one StarPU (black) +thread is awake, and we need to create on the other resources the OpenMP +threads (in pink). + +\image latex parallel_worker2.png "StarPU with an OpenMP parallel worker" width=0.3\textwidth +\image html parallel_worker2.png "StarPU with an OpenMP parallel worker" + +Finally, the following code shows how to force OpenMP to cooperate with StarPU +and create the aforementioned OpenMP threads constrained in the parallel worker's +resources set: +\code{.c} +void starpu_parallel_worker_openmp_prologue(void * sched_ctx_id) +{ + int sched_ctx = *(int*)sched_ctx_id; + int *cpuids = NULL; + int ncpuids = 0; + int workerid = starpu_worker_get_id(); + + //we can target only CPU workers + if (starpu_worker_get_type(workerid) == STARPU_CPU_WORKER) + { + //grab all the ids inside the parallel worker + starpu_sched_ctx_get_available_cpuids(sched_ctx, &cpuids, &ncpuids); + //set the number of threads + omp_set_num_threads(ncpuids); +#pragma omp parallel + { + //bind each threads to its respective resource + starpu_sched_ctx_bind_current_thread_to_cpuid(cpuids[omp_get_thread_num()]); + } + free(cpuids); + } + return; +} +\endcode + +This function is the default function used when calling starpu_parallel_worker_init() without extra parameter. + +Parallel workers are based on several tools and models already available within +StarPU contexts, and merely extend contexts. More on contexts can be +read in Section \ref SchedulingContexts. + +A similar example is available in the file examples/sched_ctx/parallel_code.c. + +\section CreatingCustomParallelWorkers Creating Custom Parallel Workers + +Parallel workers can be created either with the predefined types provided +within StarPU, or with user-defined functions to bind another runtime +inside StarPU. + +The predefined parallel worker types provided by StarPU are +::STARPU_PARALLEL_WORKER_OPENMP, ::STARPU_PARALLEL_WORKER_INTEL_OPENMP_MKL and +::STARPU_PARALLEL_WORKER_GNU_OPENMP_MKL. + +If StarPU is compiled with the \c MKL library, +::STARPU_PARALLEL_WORKER_GNU_OPENMP_MKL uses MKL functions to set the +number of threads, which is more reliable when using an OpenMP +implementation different from the Intel one. Otherwise, it will behave +as ::STARPU_PARALLEL_WORKER_INTEL_OPENMP_MKL. + +The parallel worker type is set when calling the function +starpu_parallel_worker_init() with the parameter ::STARPU_PARALLEL_WORKER_TYPE as +in the example below, which is creating a \c MKL parallel worker. + +\code{.c} +struct starpu_parallel_worker_config *parallel_workers; +parallel_workers = starpu_parallel_worker_init(HWLOC_OBJ_SOCKET, + STARPU_PARALLEL_WORKER_TYPE, STARPU_PARALLEL_WORKER_GNU_OPENMP_MKL, + 0); +\endcode + +Using the default type ::STARPU_PARALLEL_WORKER_OPENMP is similar to calling +starpu_parallel_worker_init() without any extra parameter. + +An example is available in examples/parallel_workers/parallel_workers.c. + +Users can also define their own function. + +\code{.c} +void foo_func(void* foo_arg); + +int foo_arg = 0; +struct starpu_parallel_worker_config *parallel_workers; +parallel_workers = starpu_parallel_worker_init(HWLOC_OBJ_SOCKET, + STARPU_PARALLEL_WORKER_CREATE_FUNC, &foo_func, + STARPU_PARALLEL_WORKER_CREATE_FUNC_ARG, &foo_arg, + 0); +\endcode + +An example is available in examples/parallel_workers/parallel_workers_func.c. + +Parameters that can be given to starpu_parallel_worker_init() are +::STARPU_PARALLEL_WORKER_MIN_NB, +::STARPU_PARALLEL_WORKER_MAX_NB, ::STARPU_PARALLEL_WORKER_NB, +::STARPU_PARALLEL_WORKER_POLICY_NAME, ::STARPU_PARALLEL_WORKER_POLICY_STRUCT, +::STARPU_PARALLEL_WORKER_KEEP_HOMOGENEOUS, ::STARPU_PARALLEL_WORKER_PREFERE_MIN, +::STARPU_PARALLEL_WORKER_CREATE_FUNC, ::STARPU_PARALLEL_WORKER_CREATE_FUNC_ARG, +::STARPU_PARALLEL_WORKER_TYPE, ::STARPU_PARALLEL_WORKER_AWAKE_WORKERS, +::STARPU_PARALLEL_WORKER_PARTITION_ONE, ::STARPU_PARALLEL_WORKER_NEW and +::STARPU_PARALLEL_WORKER_NCORES. + + +\section ParallelWorkersWithSchedulingContextsAPI Parallel Workers With Scheduling + +As previously mentioned, the parallel worker API is implemented +on top of \ref SchedulingContexts. Its main addition is to ease the +creation of a machine CPU partition with no overlapping by using +\c hwloc, whereas scheduling contexts can use any number of any type +of resources. + +It is therefore possible, but not recommended, to create parallel workers +using the scheduling contexts API. This can be useful mostly in the +most complex machine configurations, where users have to dimension +precisely parallel workers by hand using their own algorithm. + +\code{.c} +/* the list of resources the context will manage */ +int workerids[3] = {1, 3, 10}; + +/* indicate the list of workers assigned to it, the number of workers, +the name of the context and the scheduling policy to be used within +the context */ +int id_ctx = starpu_sched_ctx_create(workerids, 3, "my_ctx", 0); + +/* let StarPU know that the following tasks will be submitted to this context */ +starpu_sched_ctx_set_task_context(id); + +task->prologue_callback_pop_func=&runtime_interface_function_here; + +/* submit the task to StarPU */ +starpu_task_submit(task); +\endcode + +As this example illustrates, creating a context without scheduling +policy will create a parallel worker. The interface function between StarPU +and the other runtime must be specified through the field +starpu_task::prologue_callback_pop_func. Such a function can be +similar to the OpenMP thread team creation one (see above). An example is available in examples/sched_ctx/parallel_tasks_reuse_handle.c. + +Note that the OpenMP mode is the default mode both for parallel workers and +contexts. The result of a parallel worker creation is a woken-up master worker +and sleeping "slaves" which allow the master to run tasks on their +resources. + +To create a parallel worker with woken-up workers, the flag +::STARPU_SCHED_CTX_AWAKE_WORKERS must be set when using the scheduling +context API function starpu_sched_ctx_create(), or the flag +::STARPU_PARALLEL_WORKER_AWAKE_WORKERS must be set when using the parallel worker API +function starpu_parallel worker_init(). + +*/ diff --git a/doc/doxygen/chapters/starpu_extensions/scheduling_context_hypervisor.doxy b/doc/doxygen/chapters/starpu_extensions/scheduling_context_hypervisor.doxy new file mode 100644 index 0000000..42c715c --- /dev/null +++ b/doc/doxygen/chapters/starpu_extensions/scheduling_context_hypervisor.doxy @@ -0,0 +1,235 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/*! \page SchedulingContextHypervisor Scheduling Context Hypervisor + +\section WhatIsTheHypervisor What Is The Hypervisor + +StarPU proposes a platform to construct Scheduling Contexts, to +delete and modify them dynamically. A parallel kernel, can thus +be isolated into a scheduling context and interferences between +several parallel kernels are avoided. If users know exactly how +many workers each scheduling context needs, they can assign them to the +contexts at their creation time or modify them during the execution of +the program. + +The Scheduling Context Hypervisor Plugin is available for users +who do not dispose of a regular parallelism, who cannot know in +advance the exact size of the context and need to resize the contexts +according to the behavior of the parallel kernels. + +The Hypervisor receives information from StarPU concerning the +execution of the tasks, the efficiency of the resources, etc. and it +decides accordingly when and how the contexts can be resized. Basic +strategies of resizing scheduling contexts already exist, but a +platform for implementing additional custom ones is available. + +Several examples of hypervisor are provided in sc_hypervisor/examples/*.c + +\section StartTheHypervisor Start the Hypervisor + +The Hypervisor must be initialized once at the beginning of the +application. At this point, a resizing policy should be indicated. This +strategy depends on the information the application is able to provide +to the hypervisor, as well as on the accuracy needed for the resizing +procedure. For example, the application may be able to provide an +estimation of the workload of the contexts. In this situation, the +hypervisor may decide what resources the contexts need. However, if no +information is provided, the hypervisor evaluates the behavior of the +resources and of the application and makes a guess about the future. +The hypervisor resizes only the registered contexts. The basic example is available in the file sc_hypervisor/examples/sched_ctx_utils/sched_ctx_utils.c. + +\section InterrogateTheRuntime Interrogate The Runtime + +The runtime provides the hypervisor with information concerning the +behavior of the resources and the application. This is done by using +the performance_counters which represent callbacks indicating +when the resources are idle or not efficient, when the application +submits tasks or when it becomes too slow. + +\section TriggerTheHypervisor Trigger the Hypervisor + +The resizing is triggered either when the application requires it +(sc_hypervisor_resize_ctxs()) or +when the initial distribution of resources alters the performance of +the application (the application is too slow or the resource are idle +for too long time). An example is available in the file sc_hypervisor/examples/hierarchical_ctxs/resize_hierarchical_ctxs.c. + +If the environment +variable \ref SC_HYPERVISOR_TRIGGER_RESIZE is set to speed, +the monitored speed of the contexts is compared to a theoretical value +computed with a linear program, and the resizing is triggered +whenever the two values do not correspond. Otherwise, if the environment +variable is set to idle the hypervisor triggers the resizing algorithm +whenever the workers are idle for a period longer than the threshold +indicated by the programmer. When this +happens, different resizing strategy are applied that target minimizing +the total execution of the application, the instant speed or the idle +time of the resources. + +\section ResizingStrategies Resizing Strategies + +The plugin proposes several strategies for resizing the scheduling context. + +The Application driven strategy uses users's input concerning the moment when they want to resize the contexts. +Thus, users tag the task that should trigger the resizing +process. One can set directly the field starpu_task::hypervisor_tag or +use the macro ::STARPU_HYPERVISOR_TAG in the function +starpu_task_insert(). + +\code{.c} +task.hypervisor_tag = 2; +\endcode + +or + +\code{.c} +starpu_task_insert(&codelet, + ..., + STARPU_HYPERVISOR_TAG, 2, + 0); +\endcode + +Then users have to indicate that when a task with the specified tag is executed, the contexts should resize. + +\code{.c} +sc_hypervisor_resize(sched_ctx, 2); +\endcode + +Users can use the same tag to change the resizing configuration of the contexts if they consider it necessary. + +\code{.c} +sc_hypervisor_ctl(sched_ctx, + SC_HYPERVISOR_MIN_WORKERS, 6, + SC_HYPERVISOR_MAX_WORKERS, 12, + SC_HYPERVISOR_TIME_TO_APPLY, 2, + NULL); +\endcode + + +The Idleness based strategy moves workers unused in a certain context to another one needing them. +(see \ref API_SC_Hypervisor_usage) + +\code{.c} +int workerids[3] = {1, 3, 10}; +int workerids2[9] = {0, 2, 4, 5, 6, 7, 8, 9, 11}; +sc_hypervisor_ctl(sched_ctx_id, + SC_HYPERVISOR_MAX_IDLE, workerids, 3, 10000.0, + SC_HYPERVISOR_MAX_IDLE, workerids2, 9, 50000.0, + NULL); +\endcode + +The Gflops/s rate based strategy resizes the scheduling contexts such that they all finish at the same time. +The speed of each of them is computed and once one of them is significantly slower, the resizing process is triggered. +In order to do these computations, users have to input the total number of instructions needed to be executed by the +parallel kernels and the number of instruction to be executed by each +task. + +The number of flops to be executed by a context are passed as + parameter when they are registered to the hypervisor, +\code{.c} +sc_hypervisor_register_ctx(sched_ctx_id, flops) +\endcode + +and the one + to be executed by each task are passed when the task is submitted. + The corresponding field is starpu_task::flops and the corresponding + macro in the function starpu_task_insert() is ::STARPU_FLOPS + (Caution: but take care of passing a double, not an integer, + otherwise parameter passing will be bogus). When the task is executed, + the resizing process is triggered. + +\code{.c} +task.flops = 100; +\endcode + +or + +\code{.c} +starpu_task_insert(&codelet, + ..., + STARPU_FLOPS, (double) 100, + 0); +\endcode + +The Feft strategy uses a linear program to predict the best distribution of resources +such that the application finishes in a minimum amount of time. As for the Gflops/s rate +strategy, the programmers have to indicate the total number of flops to be executed +when registering the context. This number of flops may be updated dynamically during the execution +of the application whenever this information is not very accurate from the beginning. +The function sc_hypervisor_update_diff_total_flops() is called in order to add or to remove +a difference to the flops left to be executed. +Tasks are provided also the number of flops corresponding to each one of them. During the +execution of the application, the hypervisor monitors the consumed flops and recomputes +the time left and the number of resources to use. The speed of each type of resource +is (re)evaluated and inserter in the linear program in order to better adapt to the +needs of the application. + +The Teft strategy uses a linear program too, that considers all the types of tasks +and the number of each of them, and it tries to allocate resources such that the application +finishes in a minimum amount of time. A previous calibration of StarPU would be useful +in order to have good predictions of the execution time of each type of task. + +The types of tasks may be determined directly by the hypervisor when they are submitted. +However, there are applications that do not expose all the graph of tasks from the beginning. +In this case, in order to let the hypervisor know about all the tasks, the function +sc_hypervisor_set_type_of_task() will just inform the hypervisor about future tasks +without submitting them right away. + +The Ispeed strategy divides the execution of the application in several frames. +For each frame, the hypervisor computes the speed of the contexts and tries making them +run at the same speed. The strategy requires less contribution from users, as +the hypervisor requires only the size of the frame in terms of flops. + +\code{.c} +int workerids[3] = {1, 3, 10}; +int workerids2[9] = {0, 2, 4, 5, 6, 7, 8, 9, 11}; +sc_hypervisor_ctl(sched_ctx_id, + SC_HYPERVISOR_ISPEED_W_SAMPLE, workerids, 3, 2000000000.0, + SC_HYPERVISOR_ISPEED_W_SAMPLE, workerids2, 9, 200000000000.0, + SC_HYPERVISOR_ISPEED_CTX_SAMPLE, 60000000000.0, + NULL); +\endcode + +The Throughput strategy focuses on maximizing the throughput of the resources +and resizes the contexts such that the machine is running at its maximum efficiency +(maximum instant speed of the workers). + +\section DefiningANewHypervisorPolicy Defining A New Hypervisor Policy + +While Scheduling Context Hypervisor Plugin comes with a variety of +resizing policies (see \ref ResizingStrategies), it may sometimes be +desirable to implement custom policies to address specific problems. +The API described below allows users to write their own resizing policy. + +Here is an example of how to define a new policy + +\code{.c} +struct sc_hypervisor_policy dummy_policy = +{ + .handle_poped_task = dummy_handle_poped_task, + .handle_pushed_task = dummy_handle_pushed_task, + .handle_idle_cycle = dummy_handle_idle_cycle, + .handle_idle_end = dummy_handle_idle_end, + .handle_post_exec_hook = dummy_handle_post_exec_hook, + .custom = 1, + .name = "dummy" +}; +\endcode + +Examples are provided in sc_hypervisor/src/hypervisor_policies/*_policy.c + +*/ diff --git a/doc/doxygen/chapters/starpu_extensions/scheduling_contexts.doxy b/doc/doxygen/chapters/starpu_extensions/scheduling_contexts.doxy new file mode 100644 index 0000000..71e5067 --- /dev/null +++ b/doc/doxygen/chapters/starpu_extensions/scheduling_contexts.doxy @@ -0,0 +1,250 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2016-2016 Uppsala University + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/*! \page SchedulingContexts Scheduling Contexts + +TODO: improve! + +\section ContextGeneralIdeas General Ideas + +Scheduling contexts represent abstracts sets of workers that allow the +programmers to control the distribution of computational resources +(i.e. CPUs and GPUs) to concurrent kernels. The main goal is +to minimize interferences between the execution of multiple parallel +kernels, by partitioning the underlying pool of workers using +contexts. Scheduling contexts additionally allow a user to make use of +a different scheduling policy depending on the target resource set. + + +\section CreatingAContext Creating A Context + +By default, the application submits tasks to an initial context, which +disposes of all the computation resources available to StarPU (all +the workers). If the application programmer plans to launch several +kernels simultaneously, by default these kernels will be +executed within this initial context, using a single scheduler +policy (see \ref TaskSchedulingPolicy). Meanwhile, if the application +programmer is aware of the demands of these kernels and of the +specificity of the machine used to execute them, the workers can be +divided between several contexts. These scheduling contexts will +isolate the execution of each kernel, and they will permit the use of a +scheduling policy proper to each one of them. + +Scheduling Contexts may be created in two ways: either the programmers +indicates the set of workers corresponding to each context (providing +he knows the identifiers of the workers running within StarPU), or the +programmer does not provide any worker list and leaves the Hypervisor +to assign workers to each context according to their needs (\ref +SchedulingContextHypervisor). + +Both cases require a call to the function +starpu_sched_ctx_create(), which requires as input the worker +list (the exact list or a NULL pointer), the amount of workers +(or -1 to designate all workers on the platform) and a list of +optional parameters such as the scheduling policy, terminated by a +0. The scheduling policy can be a character list corresponding +to the name of a StarPU predefined policy or the pointer to a custom +policy. The function returns an identifier of the context created, +which you will use to indicate the context you want to submit the +tasks to. A basic example is available in the file examples/sched_ctx/sched_ctx.c. + +\code{.c} +/* the list of resources the context will manage */ +int workerids[3] = {1, 3, 10}; + +/* indicate the list of workers assigned to it, the number of workers, +the name of the context and the scheduling policy to be used within +the context */ +int id_ctx = starpu_sched_ctx_create(workerids, 3, "my_ctx", STARPU_SCHED_CTX_POLICY_NAME, "dmda", 0); + +/* let StarPU know that the following tasks will be submitted to this context */ +starpu_sched_ctx_set_context(id); + +/* submit the task to StarPU */ +starpu_task_submit(task); +\endcode + +Note: Parallel greedy and parallel heft scheduling policies do not support the existence of several disjoint contexts on the machine. +Combined workers are constructed depending on the entire topology of the machine, not only the one belonging to a context. + +\subsection CreatingAContextWithTheDefaultBehavior Creating A Context With The Default Behavior + +If no scheduling policy is specified when creating the context, +it will be used as another type of resource: a parallel worker. A +parallel worker is a context without scheduler (eventually delegated to +another runtime). For more information, see \ref ParallelWorker. It +is therefore mandatory to stipulate a scheduler to use the +contexts in this traditional way. + +To create a context with the default scheduler, that is either +controlled through the environment variable STARPU_SCHED or the +StarPU default scheduler, one can explicitly use the option STARPU_SCHED_CTX_POLICY_NAME, "" as in the following example: + +\code{.c} +/* the list of resources the context will manage */ +int workerids[3] = {1, 3, 10}; + +/* indicate the list of workers assigned to it, the number of workers, +and use the default scheduling policy. */ +int id_ctx = starpu_sched_ctx_create(workerids, 3, "my_ctx", STARPU_SCHED_CTX_POLICY_NAME, "", 0); + +/* .... */ +\endcode + +A full example is available in the file examples/sched_ctx/two_cpu_contexts.c. + +\section CreatingAGPUContext Creating A Context To Partition a GPU + +The contexts can also be used to group a set of SMs of an NVIDIA GPU in order to isolate +the parallel kernels and allow them to coexecution on a specified partition of the GPU. + +Each context will be mapped to a stream and users can indicate the number of SMs. +The context can be added to a larger context already grouping CPU cores. +This larger context can use a scheduling policy that assigns tasks to both CPUs and contexts (partitions of the GPU) +based on performance models adjusted to the number of SMs. + +The GPU implementation of the task has to be modified accordingly and receive as a parameter the number of SMs. + +\code{.c} +/* get the available streams (suppose we have nstreams = 2 by specifying them with STARPU_NWORKER_PER_CUDA=2 */ +int nstreams = starpu_worker_get_stream_workerids(gpu_devid, stream_workerids, STARPU_CUDA_WORKER); + +int sched_ctx[nstreams]; +sched_ctx[0] = starpu_sched_ctx_create(&stream_workerids[0], 1, "subctx", STARPU_SCHED_CTX_CUDA_NSMS, 6, 0); +sched_ctx[1] = starpu_sched_ctx_create(&stream_workerids[1], 1, "subctx", STARPU_SCHED_CTX_CUDA_NSMS, 7, 0); + +int ncpus = 4; +int workers[ncpus+nstreams]; +workers[ncpus+0] = stream_workerids[0]; +workers[ncpus+1] = stream_workerids[1]; + +big_sched_ctx = starpu_sched_ctx_create(workers, ncpus+nstreams, "ctx1", STARPU_SCHED_CTX_SUB_CTXS, sched_ctxs, nstreams, STARPU_SCHED_CTX_POLICY_NAME, "dmdas", 0); + +starpu_task_submit_to_ctx(task, big_sched_ctx); + +\endcode + +A full example is available in the file examples/sched_ctx/gpu_partition.c. + +\section ModifyingAContext Modifying A Context + +A scheduling context can be modified dynamically. The application may +change its requirements during the execution, and the programmer can +add additional workers to a context or remove those no longer needed. In +the following example, we have two scheduling contexts +sched_ctx1 and sched_ctx2. After executing a part of the +tasks, some of the workers of sched_ctx1 will be moved to +context sched_ctx2. + +\code{.c} +/* the list of resources that context 1 will give away */ +int workerids[3] = {1, 3, 10}; + +/* add the workers to context 1 */ +starpu_sched_ctx_add_workers(workerids, 3, sched_ctx2); + +/* remove the workers from context 2 */ +starpu_sched_ctx_remove_workers(workerids, 3, sched_ctx1); +\endcode + +An example is available in the file examples/sched_ctx/sched_ctx_remove.c. + +\section SubmittingTasksToAContext Submitting Tasks To A Context +The application may submit tasks to several contexts, either +simultaneously or sequentially. If several threads of submission +are used, the function starpu_sched_ctx_set_context() may be called just +before starpu_task_submit(). Thus, StarPU considers that +the current thread will submit tasks to the corresponding context. An example is available in the file examples/sched_ctx/gpu_partition.c. + +When the application may not assign a thread of submission to each +context, the id of the context must be indicated by using the +function starpu_task_submit_to_ctx() or the field \ref STARPU_SCHED_CTX +for starpu_task_insert(). An example is available in the file examples/sched_ctx/sched_ctx.c. + +\section DeletingAContext Deleting A Context + +When a context is no longer needed, it must be deleted. The application +can indicate which context should keep the resources of a deleted one. +All the tasks of the context should be executed before doing this. +Thus, the programmer may use either a barrier and then delete the context +directly, or just indicate +that other tasks will not be submitted later on to the context (such that when +the last task is executed its workers will be moved to the inheritor) +and delete the context at the end of the execution (when a barrier will +be used eventually). + +\code{.c} +/* when the context 2 is deleted context 1 inherits its resources */ +starpu_sched_ctx_set_inheritor(sched_ctx2, sched_ctx1); + +/* submit tasks to context 2 */ +for (i = 0; i < ntasks; i++) + starpu_task_submit_to_ctx(task[i],sched_ctx2); + +/* indicate that context 2 finished submitting and that */ +/* as soon as the last task of context 2 finished executing */ +/* its workers can be moved to the inheritor context */ +starpu_sched_ctx_finished_submit(sched_ctx1); + +/* wait for the tasks of both contexts to finish */ +starpu_task_wait_for_all(); + +/* delete context 2 */ +starpu_sched_ctx_delete(sched_ctx2); + +/* delete context 1 */ +starpu_sched_ctx_delete(sched_ctx1); +\endcode + +A full example is available in the file examples/sched_ctx/sched_ctx.c. + +\section EmptyingAContext Emptying A Context + +A context may have no resources at the beginning or at a certain +moment of the execution. Tasks can still be submitted to these contexts, +they will be executed as soon as the contexts will have resources. A list +of tasks pending to be executed is kept and will be submitted when +workers are added to the contexts. + +\code{.c} +/* create a empty context */ +unsigned sched_ctx_id = starpu_sched_ctx_create(NULL, 0, "ctx", 0); + +/* submit a task to this context */ +starpu_sched_ctx_set_context(&sched_ctx_id); +ret = starpu_task_insert(&codelet, 0); +STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + +/* add CPU workers to the context */ +int procs[STARPU_NMAXWORKERS]; +int nprocs = starpu_cpu_worker_get_count(); +starpu_worker_get_ids_by_type(STARPU_CPU_WORKER, procs, nprocs); +starpu_sched_ctx_add_workers(procs, nprocs, sched_ctx_id); + +/* and wait for the task termination */ +starpu_task_wait_for_all(); +\endcode + +The full example is available in the file examples/sched_ctx/sched_ctx_empty.c. + +However, if resources are never allocated to the context, the +application will not terminate. If these tasks have low priority, the +application can inform StarPU to not submit them by calling the +function starpu_sched_ctx_stop_task_submission(). + + +*/ diff --git a/doc/doxygen/chapters/starpu_extensions/scheduling_policy_definition.doxy b/doc/doxygen/chapters/starpu_extensions/scheduling_policy_definition.doxy new file mode 100644 index 0000000..9102440 --- /dev/null +++ b/doc/doxygen/chapters/starpu_extensions/scheduling_policy_definition.doxy @@ -0,0 +1,704 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Simon Archipoff + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/*! \page HowToDefineANewSchedulingPolicy How To Define A New Scheduling Policy + +\section NewSchedulingPolicy_Introduction Introduction + +StarPU provides two ways of defining a scheduling policy, a basic monolithic +way, and a modular way. + +The basic monolithic way is directly connected with the core of StarPU, which +means that the policy then has to handle all performance details, such as data +prefetching, task performance model calibration, worker locking, etc. +examples/scheduler/dummy_sched.c is a trivial example which does not +handle this, and thus e.g. does not achieve any data prefetching or smart +scheduling. + +The modular way allows implementing just one component, and +reuse existing components to cope with all these details. +examples/scheduler/dummy_modular_sched.c is a trivial example very +similar to dummy_sched.c, but implemented as a component, which allows +assembling it with other components, and notably get data prefetching support for +free, and task performance model calibration is properly performed, which allows +to easily extend it into taking task duration into account, etc. + +\section SchedulingHelpers Helper functions for defining a scheduling policy (Basic or modular) + +Make sure to have a look at the \ref API_Scheduling_Policy section, which +provides a complete list of the functions available for writing advanced schedulers. + +This includes getting an estimation for a task computation completion with +starpu_task_expected_length(), for a speedup factor relative to CPU speed with +starpu_worker_get_relative_speedup(), for the expected data transfer time in micro-seconds +with starpu_task_expected_data_transfer_time(), starpu_task_expected_data_transfer_time_for(), or starpu_data_expected_transfer_time(), +for the expected conversion time in micro-seconds with starpu_task_expected_conversion_time(), +for the required energy with +starpu_task_expected_energy() or starpu_task_worker_expected_energy(), etc. Per-worker variants are also available with +starpu_task_worker_expected_length(), etc. +The average over workers is also available with +starpu_task_expected_length_average() and starpu_task_expected_energy_average(). +Other useful functions include starpu_transfer_bandwidth(), starpu_transfer_latency(), +starpu_transfer_predict(), ... +The successors of a task can be obtained with starpu_task_get_task_succs(). +One can also directly test the presence of a data handle with starpu_data_is_on_node(). +Prefetches can be triggered by calling either starpu_prefetch_task_input_for(), +starpu_idle_prefetch_task_input_for(), starpu_prefetch_task_input_for_prio(), or +starpu_idle_prefetch_task_input_for_prio(). And prefetching data on a specified node can use either +starpu_prefetch_task_input_on_node(), starpu_prefetch_task_input_on_node_prio(), +starpu_idle_prefetch_task_input_on_node(), or starpu_idle_prefetch_task_input_on_node_prio(). +The _prio versions allow specifying a priority for the transfer (instead of taking the task priority by +default). These prefetches are only processed when there are no fetch data +requests (i.e. a task is waiting for it) to process. The _idle versions +queue the transfers on the idle prefetch queue, which is only processed when +there are no non-idle prefetches to process. +starpu_get_prefetch_flag() is a convenient helper for checking the value of the +\ref STARPU_PREFETCH environment variable. +When a scheduler does such prefetching, it should set the prefetches +field of the starpu_sched_policy to 1, to prevent the core from +triggering its own prefetching. + +For applications that need to prefetch data or to perform other pre-execution setup before a task is executed, it is useful to call the function starpu_task_notify_ready_soon_register() which registers a callback function when a task is about to become ready for execution. starpu_worker_set_going_to_sleep_callback() and starpu_worker_set_waking_up_callback() allow to register an external resource manager callback function that will be notified about workers going to sleep or waking up, when StarPU is compiled with support for blocking drivers and worker callbacks. + +Schedulers should call starpu_task_set_implementation() or starpu_task_get_implementation() to specify or to retrieve the codelet implementation to be executed when executing a specific task. + +One can determine if a worker type is capable of executing a specific task by calling the function starpu_worker_type_can_execute_task(). The function starpu_sched_find_all_worker_combinations() must be used to identify all viable worker combinations that can execute a parallel task. +starpu_combined_worker_get_count() and starpu_worker_is_combined_worker() can be used to determine the number of different combined workers and whether a particular worker is a combined worker respectively. starpu_combined_worker_get_id() allows to get the identifier of the current combined worker. +starpu_combined_worker_assign_workerid() allow users to or register a new combined worker and get its identifier, it then needs to be given to a worker collection with the starpu_worker_collection::add. starpu_combined_worker_get_desceiption() returns the description of a combined worker. +Additionally, the function starpu_worker_is_blocked_in_parallel() is utilized to determine if a worker is currently blocked in a parallel task, whereas starpu_worker_is_slave_somewhere() can be called to determine if a worker is presently functioning as a slave for another worker. +StarPU also provides two functions for initializing and preparing the execution of parallel tasks: starpu_parallel_task_barrier_init() and starpu_parallel_task_barrier_init_n(). + +Usual functions can be used on tasks, for instance one can use the following to +get the data size for a task. + +\code{.c} +size = 0; +write = 0; +if (task->cl) + for (i = 0; i < STARPU_TASK_GET_NBUFFERS(task); i++) + { + starpu_data_handle_t data = STARPU_TASK_GET_HANDLE(task, i) + size_t datasize = starpu_data_get_size(data); + size += datasize; + if (STARPU_TASK_GET_MODE(task, i) & STARPU_W) + write += datasize; + } +\endcode + +Task queues can be implemented with the starpu_task_list functions. The function starpu_task_list_init() is used to initialize an empty list structure. Once the list is initialized, new tasks can be added to it using the starpu_task_list_push_front() and starpu_task_list_push_back() to add a task to the front or back of the list respectively. starpu_task_list_front() and starpu_task_list_back() can be used to get the first or last task in the list without removing it. starpu_task_list_begin() and starpu_task_list_end() can be used to get the task iterators from the beginning of the list and check whether it is the end of the list respectively. starpu_task_list_next() can be used to get the next task in the list, which is not erase-safe. starpu_task_list_empty() can be used to check whether the list is empty. +To remove tasks from the queue, the function starpu_task_list_erase() is used to remove a specific task from the list. starpu_task_list_pop_front() and starpu_task_list_pop_back() can be used to remove the first or last task from the list. +Finally, the function starpu_task_list_ismember() is used to check whether a given task is contained in the list. The function starpu_task_list_move() is used to move list from one head to another. + +Access to the \c hwloc topology is available with starpu_worker_get_hwloc_obj(). + +\section DefiningANewBasicSchedulingPolicy Defining A New Basic Scheduling Policy + +A full example showing how to define a new scheduling policy is available in +the StarPU sources in examples/scheduler/dummy_sched.c. + +The scheduler has to provide methods: + +\code{.c} +static struct starpu_sched_policy dummy_sched_policy = +{ + .init_sched = init_dummy_sched, + .deinit_sched = deinit_dummy_sched, + .add_workers = dummy_sched_add_workers, + .remove_workers = dummy_sched_remove_workers, + .push_task = push_task_dummy, + .pop_task = pop_task_dummy, + .policy_name = "dummy", + .policy_description = "dummy scheduling strategy" +}; +\endcode + +The idea is that when a task becomes ready for execution, the +starpu_sched_policy::push_task method is called to give the ready task to the +scheduler. Then call starpu_push_task_end() to notify that the specified task has been pushed. +When a worker is idle, the starpu_sched_policy::pop_task method is +called to get a task from the scheduler. It is up to the +scheduler to implement what is between. A simple eager scheduler is for instance +to make starpu_sched_policy::push_task push the task to a global list, and make +starpu_sched_policy::pop_task pop from this list. A scheduler can also use +starpu_push_local_task() to directly push tasks to a per-worker queue, and then +StarPU does not even need to implement starpu_sched_policy::pop_task. +If there are no ready tasks within the scheduler, it can just return \c NULL, and +the worker will sleep. + +starpu_sched_policy::add_workers and starpu_sched_policy::remove_workers are used to add or remove workers to or from a scheduling policy, so that the number of workers in a policy can be dynamically adjusted. After adding or removing workers from a scheduling policy, the worker task lists should be updated to ensure that the workers are assigned tasks appropriately. By calling starpu_sched_ctx_worker_shares_tasks_lists(), you can specify whether a worker may pop tasks from the task list of other workers or if there is a central list with tasks for all the workers. + +The \ref starpu_sched_policy section provides the exact rules that govern the +methods of the policy. + +One can enumerate the workers with this iterator: + +\code{.c} +struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id); +struct starpu_sched_ctx_iterator it; + +workers->init_iterator(workers, &it); +while(workers->has_next(workers, &it)) +{ + unsigned worker = workers->get_next(workers, &it); + ... +} +\endcode + +To provide synchronization between workers, a per-worker lock exists to protect +the data structures of a given worker. It is acquired around scheduler methods, +so that the scheduler does not need any additional mutex to protect its per-worker data. + +In case the scheduler wants to access another scheduler's data, it should use +starpu_worker_lock() and starpu_worker_unlock(), or use starpu_worker_trylock() which will not block if the lock is not immediately available, +or use starpu_worker_lock_self() and starpu_worker_unlock_self() to acquire and to release a lock on the worker associated with the current thread. + +Calling \code{.c}starpu_worker_lock(B)\endcode from a worker \c A will however thus make +worker \c A wait for worker \c B to complete its scheduling method. That may be +a problem if that method takes a long time, because it is e.g. computing a +heuristic or waiting for another mutex, or even cause deadlocks if worker \c B is +calling \code{.c}starpu_worker_lock(A)\endcode at the same time. In such a case, worker \c B must +call starpu_worker_relax_on() and starpu_worker_relax_off() around the section +which potentially blocks (and does not actually need protection). While a worker +is in relaxed mode, e.g. between a pair of starpu_worker_relax_on() and +starpu_worker_relax_off() calls, its state can be altered by other threads: for +instance, worker \c A can push tasks for worker \c B. In consequence, worker \c B +must re-assess its state after \code{.c}starpu_worker_relax_off(B)\endcode, such as taking +possible new tasks pushed to its queue into account. Calling starpu_worker_get_relax_state() to query the relaxation state of a worker. + +When the starpu_sched_policy::push_task method has pushed a task for another +worker, one has to call starpu_wake_worker_relax(), starpu_wake_worker_relax_light(), starpu_wake_worker_no_relax() or starpu_wake_worker_locked() so that the worker wakes up +and picks it. If the task was pushed on a shared queue, one may want to only +wake one idle worker. An example doing this is available in +src/sched_policies/eager_central_policy.c. When the scheduling policy makes a scheduling decision for a task, it shouhld call starpu_sched_task_break(). + +Schedulers can set the minimum or maximum task priority level supported by the scheduling +policy by calling starpu_sched_set_min_priority() or starpu_sched_set_max_priority(), and then applications can call +starpu_sched_get_min_priority() or starpu_sched_get_max_priority() to retrieve the minimum or maximum priority value. The file src/sched_policies/heteroprio.c shows how to uses these functions. + +When scheduling a task, it is important to check whether the specified worker can execute the codelet before assigning the task to that worker. This is done using the starpu_worker_can_execute_task() function, or starpu_combined_worker_can_execute_task() which is compatible with combined workers, or starpu_worker_can_execute_task_impl() which also returns the list of implementation numbers that can be used by the worker to execute the task, or starpu_worker_can_execute_task_first_impl() which also returns the first implementation number that can be used. + +A pointer to one data structure specific to the scheduler can be set with +starpu_sched_ctx_set_policy_data() and fetched with +starpu_sched_ctx_get_policy_data(). Per-worker data structures can then be +stored in it by allocating a \ref STARPU_NMAXWORKERS -sized array of structures indexed +by workers. + +A variety of examples of +advanced schedulers can be read in src/sched_policies, for +instance random_policy.c, eager_central_policy.c, +work_stealing_policy.c Code protected by +if (_starpu_get_nsched_ctxs() > 1) can be ignored, this is for scheduling +contexts, which is an experimental feature. + +\section DefiningANewModularSchedulingPolicy Defining A New Modular Scheduling Policy + +StarPU's Modularized Schedulers are made of individual Scheduling Components +Modularizedly assembled as a Scheduling Tree. Each Scheduling Component has a +unique purpose, such as prioritizing tasks or mapping tasks over resources. +A typical Scheduling Tree is shown below. + +
+                                 |
+             starpu_push_task    |
+                                 |
+                                 v
+                           Fifo_Component
+                                |  ^
+                        Push    |  |    Can_Push
+                                v  |
+                          Eager_Component
+                                |  ^
+                                |  |
+                                v  |
+              --------><-------------------><---------
+              |  ^                                |  ^
+      Push    |  |    Can_Push            Push    |  |    Can_Push
+              v  |                                v  |
+         Fifo_Component                       Fifo_Component
+              |  ^                                |  ^
+      Pull    |  |    Can_Pull            Pull    |  |    Can_Pull
+              v  |                                v  |
+        Worker_Component                     Worker_Component
+                  |                             |
+starpu_pop_task   |                             |
+                  v                             v
+
+ +When a task is pushed by StarPU in a Modularized Scheduler, the task moves from +a Scheduling Component to another, following the hierarchy of the +Scheduling Tree, and is stored in one of the Scheduling Components of the +strategy. +When a worker wants to pop a task from the Modularized Scheduler, the +corresponding Worker Component of the Scheduling Tree tries to pull a task from +its parents, following the hierarchy, and gives it to the worker if it succeeded +to get one. + +\subsection Interface + +Each Scheduling Component must follow the following pre-defined Interface +to be able to interact with other Scheduling Components. + +
    +
  • push_task (child_component, Task) \n +The calling Scheduling Component transfers a task to its +Child Component. When the Push function returns, the task no longer +belongs to the calling Component. The Modularized Schedulers' +model relies on this function to perform prefetching. +See starpu_sched_component::push_task for more details +
  • + +
  • pull_task (parent_component, caller_component) -> Task \n +The calling Scheduling Component requests a task from +its Parent Component. When the Pull function ends, the returned +task belongs to the calling Component. +See starpu_sched_component::pull_task for more details +
  • + +
  • can_push (caller_component, parent_component) \n +The calling Scheduling Component notifies its Parent Component that +it is ready to accept new tasks. +See starpu_sched_component::can_push for more details +
  • + +
  • can_pull (caller_component, child_component) \n +The calling Scheduling Component notifies its Child Component +that it is ready to give new tasks. +See starpu_sched_component::can_pull for more details +
  • +
+ +The components also provide the following useful methods: + +
    +
  • +starpu_sched_component::estimated_load provides an estimated load of the component +
  • +
  • +starpu_sched_component::estimated_end provides an estimated date of +availability of workers behind the component, after processing tasks +in the component and below. This is computed only if the estimated +field of the tasks have been set before passing it to the component. +
  • +
+ +\subsection BuildAModularizedScheduler Building a Modularized Scheduler + +\subsubsection PreImplementedComponents Pre-implemented Components + +StarPU is currently shipped with the following four Scheduling Components : + +
    +
  • Storage Components : Fifo, Prio \n +Components which store tasks. They can also prioritize them if +they have a defined priority. It is possible to define a threshold +for those Components following two criteria : the number of tasks +stored in the Component, or the sum of the expected length of all +tasks stored in the Component. When a push operation tries to queue a +task beyond the threshold, the push fails. When some task leaves the +queue (and thus possibly more tasks can fit), this component calls +can_push from ancestors. +
  • + +
  • Resource-Mapping Components : Mct, Heft, Eager, Random, Work-Stealing \n +"Core" of the Scheduling Strategy, those Components are the +ones who make scheduling choices between their children components. +
  • + +
  • Worker Components : Worker \n +Each Worker Component modelizes a concrete worker, and copes with the +technical tricks of interacting with the StarPU core. Modular schedulers +thus usually have them at the bottom of their component tree. +
  • + +
  • Special-Purpose Components : Perfmodel_Select, Best_Implementation \n +Components dedicated to original purposes. The Perfmodel_Select +Component decides which Resource-Mapping Component should be used to +schedule a task: a component that assumes tasks with a calibrated +performance model; a component for non-yet-calibrated tasks, that will +distribute them to get measurements done as quickly as possible; and a +component that takes the tasks without performance models.\n +The Best_Implementation Component chooses which +implementation of a task should be used on the chosen resource. +
  • +
+ +\subsubsection ProgressionAndValidationRules Progression And Validation Rules + +Some rules must be followed to ensure the correctness of a Modularized +Scheduler : + +
    +
  • At least one Storage Component without threshold is needed in a +Modularized Scheduler, to store incoming tasks from StarPU. It can for +instance be a global component at the top of the tree, or one component +per worker at the bottom of the tree, or intermediate assemblies. The +important point is that the starpu_sched_component::push_task call at the top can not +fail, so there has to be a storage component without threshold between +the top of the tree and the first storage component with threshold, or +the workers themselves. +
  • + +
  • At least one Resource-Mapping Component is needed in a Modularized +Scheduler. Resource-Mapping Components are the only ones which can make +scheduling choices, and so the only ones which can have several children. +
  • +
+ +\subsubsection ModularizedSchedulerLocking Locking in modularized schedulers + +Most often, components do not need to take locks. This allows e.g. the push +operation to be called in parallel when tasks get released in parallel from +different workers which have completed different ancestor tasks. + +When a component has internal information which needs to be kept coherent, the +component can define its own lock to take it as it sees fit, e.g. to protect a +task queue. This may however limit scalability of the scheduler. Conversely, +since push and pull operations will be called concurrently from different +workers, the component might prefer to use a central mutex to serialize all +scheduling decisions to avoid pathological cases (all push calls decide to put +their task on the same target) + +\subsubsection ImplementAModularizedScheduler Implementing a Modularized Scheduler + +The following code shows how to implement a Tree-Eager-Prefetching Scheduler. + +\code{.c} +static void initialize_eager_prefetching_center_policy(unsigned sched_ctx_id) +{ + /* The eager component will decide for each task which worker will run it, + * and we want fifos both above and below the component */ + starpu_sched_component_initialize_simple_scheduler( + starpu_sched_component_eager_create, NULL, + STARPU_SCHED_SIMPLE_DECIDE_WORKERS | + STARPU_SCHED_SIMPLE_FIFO_ABOVE | + STARPU_SCHED_SIMPLE_FIFOS_BELOW, + sched_ctx_id); +} + +/* Initializing the starpu_sched_policy struct associated to the Modularized + * Scheduler : only the init_sched and deinit_sched needs to be defined to + * implement a Modularized Scheduler */ +struct starpu_sched_policy _starpu_sched_tree_eager_prefetching_policy = +{ + .init_sched = initialize_eager_prefetching_center_policy, + .deinit_sched = starpu_sched_tree_deinitialize, + .add_workers = starpu_sched_tree_add_workers, + .remove_workers = starpu_sched_tree_remove_workers, + .push_task = starpu_sched_tree_push_task, + .pop_task = starpu_sched_tree_pop_task, + .pre_exec_hook = starpu_sched_component_worker_pre_exec_hook, + .post_exec_hook = starpu_sched_component_worker_post_exec_hook, + .policy_name = "tree-eager-prefetching", + .policy_description = "eager with prefetching tree policy" +}; +\endcode + +starpu_sched_component_initialize_simple_scheduler() is a helper function which +makes it very trivial to assemble a modular scheduler around a scheduling +decision component as seen above (here, a dumb eager decision component). Most +often, a modular scheduler can be implemented that way. + +A modular scheduler can also be constructed hierarchically with +starpu_sched_component_composed_recipe_create(). + +To retrieve the current scheduling tree of a task, starpu_sched_tree_get() can be called. + +That modular scheduler can also be built by hand in the following way: + +\code{.c} +#define _STARPU_SCHED_NTASKS_THRESHOLD_DEFAULT 2 +#define _STARPU_SCHED_EXP_LEN_THRESHOLD_DEFAULT 1000000000.0 + +static void initialize_eager_prefetching_center_policy(unsigned sched_ctx_id) +{ + unsigned ntasks_threshold = _STARPU_SCHED_NTASKS_THRESHOLD_DEFAULT; + double exp_len_threshold = _STARPU_SCHED_EXP_LEN_THRESHOLD_DEFAULT; + + [...] + + starpu_sched_ctx_create_worker_collection + (sched_ctx_id, STARPU_WORKER_LIST); + + /* Create the Scheduling Tree */ + struct starpu_sched_tree * t = starpu_sched_tree_create(sched_ctx_id); + + /* The Root Component is a Flow-control Fifo Component */ + t->root = starpu_sched_component_fifo_create(NULL); + + /* The Resource-mapping Component of the strategy is an Eager Component + */ + struct starpu_sched_component *eager_component = starpu_sched_component_eager_create(NULL); + + /* Create links between Components : the Eager Component is the child + * of the Root Component */ + starpu_sched_component_connect(t->root, eager_component); + + /* A task threshold is set for the Flow-control Components which will + * be connected to Worker Components. By doing so, this Modularized + * Scheduler will be able to perform some prefetching on the resources + */ + struct starpu_sched_component_fifo_data fifo_data = + { + .ntasks_threshold = ntasks_threshold, + .exp_len_threshold = exp_len_threshold, + }; + + unsigned i; + for(i = 0; i < starpu_worker_get_count() + starpu_combined_worker_get_count(); i++) + { + /* Each Worker Component has a Flow-control Fifo Component as + * father */ + struct starpu_sched_component * worker_component = starpu_sched_component_worker_new(i); + struct starpu_sched_component * fifo_component = starpu_sched_component_fifo_create(&fifo_data); + starpu_sched_component_connect(fifo_component, worker_component); + + /* Each Flow-control Fifo Component associated to a Worker + * Component is linked to the Eager Component as one of its + * children */ + starpu_sched_component_connect(eager_component, fifo_component); + } + + starpu_sched_tree_update_workers(t); + starpu_sched_ctx_set_policy_data(sched_ctx_id, (void*)t); +} + +/* Properly destroy the Scheduling Tree and all its Components */ +static void deinitialize_eager_prefetching_center_policy(unsigned sched_ctx_id) +{ + struct starpu_sched_tree * tree = (struct starpu_sched_tree*)starpu_sched_ctx_get_policy_data(sched_ctx_id); + starpu_sched_tree_destroy(tree); + starpu_sched_ctx_delete_worker_collection(sched_ctx_id); +} + +/* Initializing the starpu_sched_policy struct associated to the Modularized + * Scheduler : only the init_sched and deinit_sched needs to be defined to + * implement a Modularized Scheduler */ +struct starpu_sched_policy _starpu_sched_tree_eager_prefetching_policy = +{ + .init_sched = initialize_eager_prefetching_center_policy, + .deinit_sched = deinitialize_eager_prefetching_center_policy, + .add_workers = starpu_sched_tree_add_workers, + .remove_workers = starpu_sched_tree_remove_workers, + .push_task = starpu_sched_tree_push_task, + .pop_task = starpu_sched_tree_pop_task, + .pre_exec_hook = starpu_sched_component_worker_pre_exec_hook, + .post_exec_hook = starpu_sched_component_worker_post_exec_hook, + .policy_name = "tree-eager-prefetching", + .policy_description = "eager with prefetching tree policy" +}; +\endcode + +Instead of calling starpu_sched_tree_update_workers(), one can call starpu_sched_tree_update_workers_in_ctx() to update the set of workers that are available to execute tasks in a given scheduling tree within a specific StarPU context. + +Other modular scheduler examples can be seen in src/sched_policies/modular_*.c + +For instance, \c modular-heft-prio needs performance models, decides +memory nodes, uses prioritized fifos above and below, and decides the best +implementation. + +If unsure on the result of the modular scheduler construction, you can run a +simple application with FxT enabled (see \ref GeneratingTracesWithFxT), and open +the generated file \c trace.html in a web-browser. + +\subsection ModularizedSchedulersAndParallelTasks Management of parallel task + +At the moment, parallel tasks can be managed in modularized schedulers through +combined workers: instead of connecting a scheduling component to a worker +component, one can connect it to a combined worker component (i.e. a worker +component created with a combined worker id). That component will handle +creating task aliases for parallel execution and push them to the different +workers components. + +\subsection WriteASchedulingComponent Writing a Scheduling Component + +\subsubsection GenericSchedulingComponent Generic Scheduling Component + +Each Scheduling Component is instantiated from a Generic Scheduling Component, +which implements a generic version of the Interface. The generic implementation +of Pull, Can_Pull and Can_Push functions are recursive calls to their parents +(respectively to their children). However, as a Generic Scheduling Component do +not know how many children it will have when it will be instantiated, it does +not implement the Push function. + +\subsubsection InstantiationRedefineInterface Instantiation : Redefining the Interface + +A Scheduling Component must implement all the functions of the Interface. It is +so necessary to implement a Push function to instantiate a Scheduling Component. +The implemented Push function is the "fingerprint" of a Scheduling Component. +Depending on how functionalities or properties programmers want to give +to the Scheduling Component they are implementing, it is possible to reimplement +all the functions of the Interface. For example, a Flow-control Component +reimplements the Pull and the Can_Push functions of the Interface, allowing +to catch the generic recursive calls of these functions. The Pull function of +a Flow-control Component can, for example, pop a task from the local storage +queue of the Component, and give it to the calling Component which asks for it. + +\subsubsection DetailedProgressionAndValidationRules Detailed Progression and Validation Rules + +
    +
  • A Reservoir is a Scheduling Component which redefines a Push and a Pull +function, in order to store tasks into it. A Reservoir delimit Scheduling +Areas in the Scheduling Tree. +
  • + +
  • A Pump is the engine source of the Scheduler : it pushes/pulls tasks +to/from a Scheduling Component to another. Native Pumps of a Scheduling +Tree are located at the root of the Tree (incoming Push calls from StarPU), +and at the leafs of the Tree (Pop calls coming from StarPU Workers). +Pre-implemented Scheduling Components currently shipped with Pumps are +Flow-Control Components and the Resource-Mapping Component Heft, within +their defined Can_Push functions. +
  • + +
  • A correct Scheduling Tree requires a Pump per Scheduling Area and per +Execution Flow. +
  • +
+ + +The Tree-Eager-Prefetching Scheduler shown in Section +\ref ImplementAModularizedScheduler follows the previous assumptions : + +
+                                  starpu_push_task
+                                       Pump
+                                         |
+ Area 1                                  |
+                                         |
+                                         v
+            -----------------------Fifo_Component-----------------------------
+                                       Pump
+                                        |  ^
+                                Push    |  |    Can_Push
+                                        v  |
+ Area 2                           Eager_Component
+                                        |  ^
+                                        |  |
+                                        v  |
+                      --------><-------------------><---------
+                      |  ^                                |  ^
+              Push    |  |    Can_Push            Push    |  |    Can_Push
+                      v  |                                v  |
+            -----Fifo_Component-----------------------Fifo_Component----------
+                      |  ^                                |  ^
+              Pull    |  |    Can_Pull            Pull    |  |    Can_Pull
+ Area 3               v  |                                v  |
+                     Pump                               Pump
+                Worker_Component                     Worker_Component
+
+ +\section UsingaNewSchedulingPolicy Using a New Scheduling Policy + +There are two ways to use a new scheduling policy. + +
    +
  • +If the code is directly available from your application, you can set +the field starpu_conf::sched_policy with a pointer to your new defined +scheduling policy. + +\code{.c} +starpu_conf_init(&conf); +conf.sched_policy = &dummy_sched_policy, +ret = starpu_init(&conf); +\endcode +
  • + +
  • +You can also load the new policy dynamically using the environment +variable \ref STARPU_SCHED_LIB. An example is given in +examples/scheduler/libdummy_sched.c and +examples/scheduler/libdummy_sched.sh. + +The variable \ref STARPU_SCHED_LIB needs to give the location of a \c +.so file which needs to define a function struct +starpu_sched_policy *starpu_get_sched_lib_policy(const char *name) + +\code{.c} +struct starpu_sched_policy *get_sched_policy(const char *name) +{ + if (!strcmp(name, "dummy")) + return &dummy_sched_policy; + return NULL; +} +\endcode + +To use it, you need to define both variables \ref STARPU_SCHED_LIB and +\ref STARPU_SCHED + +\code{.sh} +STARPU_SCHED_LIB=libdummy_sched.so STARPU_SCHED=dummy yourapplication +\endcode + +If the library defines a function struct +starpu_sched_policy **starpu_get_sched_lib_policies(), the +policies defined by the library can be displayed using the \c help +functionality. + +\code{.sh} +STARPU_SCHED_LIB=libdummy_sched.so STARPU_SCHED=help yourapplication +\endcode + +
  • +
+ +\section GraphScheduling Graph-based Scheduling + +For performance reasons, most of the schedulers shipped with StarPU use simple +list-scheduling heuristics, assuming that the application has already set +priorities. This is why they do their scheduling between when tasks become +available for execution and when a worker becomes idle, without looking at the +task graph. + +Other heuristics can however look at the task graph. Recording the task graph +is expensive, so it is not available by default, the scheduling heuristic has +to set \c _starpu_graph_record to \c 1 from the initialization function, to make it +available. Then the _starpu_graph* functions can be used. + +src/sched_policies/graph_test_policy.c is an example of simple greedy +policy which automatically computes priorities by bottom-up rank. + +The idea is that while the application submits tasks, they are only pushed +to a bag of tasks. When the application is finished with submitting tasks, +it calls starpu_do_schedule() (or starpu_task_wait_for_all(), which calls +starpu_do_schedule()), and the starpu_sched_policy::do_schedule method of the +scheduler is called. This method calls \c _starpu_graph_compute_depths() to compute +the bottom-up ranks, and then uses these ranks to set priorities over tasks. + +It then has two priority queues, one for CPUs, and one for GPUs, and uses a dumb +heuristic based on the duration of the task over CPUs and GPUs to decide between +the two queues. CPU workers can then pop from the CPU priority queue, and GPU +workers from the GPU priority queue. + +\section DebuggingScheduling Debugging Scheduling + +All the \ref OnlinePerformanceTools and \ref OfflinePerformanceTools can +be used to get information about how well the execution proceeded, and thus the +overall quality of the execution. + +Precise debugging can also be performed by using the +\ref STARPU_TASK_BREAK_ON_PUSH, \ref STARPU_TASK_BREAK_ON_SCHED, +\ref STARPU_TASK_BREAK_ON_POP, and \ref STARPU_TASK_BREAK_ON_EXEC environment variables. +By setting the job_id of a task +in these environment variables, StarPU will raise SIGTRAP when the task is being +scheduled, pushed, or popped by the scheduler. This means that when one notices +that a task is being scheduled in a seemingly odd way, one can just re-execute +the application in a debugger, with some of those variables set, and the +execution will stop exactly at the scheduling points of this task, thus allowing +to inspect the scheduler state, etc. + +*/ diff --git a/doc/doxygen/chapters/starpu_extensions/simgrid.doxy b/doc/doxygen/chapters/starpu_extensions/simgrid.doxy new file mode 100644 index 0000000..7a58e15 --- /dev/null +++ b/doc/doxygen/chapters/starpu_extensions/simgrid.doxy @@ -0,0 +1,278 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2020-2022 Federal University of Rio Grande do Sul (UFRGS) + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * NOTE: XXX: also update simgrid versions in 101_building.doxy !! + */ + +/*! \page SimGridSupport SimGrid Support + +StarPU can use SimGrid in order to simulate execution on an arbitrary +platform. + +The principle is to first run the application natively on the platform that one +wants to laterlater simulate, and let StarPU record performance models. One +then recompiles StarPU and the application in simgrid mode, where everything +is executed the same, except the execution of the codelet function, and the +data transfers, which are replaced by virtual sleeps based on the performance +models. This thus allows to use the performance model for tasks and data +transfers, while executing natively all the rest (the task scheduler and the +application, notably). + +This was tested with SimGrid from 3.11 to 3.16, and 3.18 to +3.36. SimGrid version 3.25 needs to be configured with -Denable_msg=ON . +Other versions may have compatibility issues. 3.17 notably does not build at +all. MPI simulation does not work with version 3.22. + +If you have installed SimGrid by hand, make sure to set \c PKG_CONFIG_PATH to +the path where \c simgrid.pc was installed: + +\verbatim +$ export PKG_CONFIG_PATH=$PKG_CONFIG_PATH:/where/simgrid/installed/lib/ppkgconfig/simgrid.pc +\endverbatim + +\section Preparing Preparing Your Application For Simulation + +There are a few technical details which need to be handled for an application to +be simulated through SimGrid. + +If the application uses gettimeofday() to make its +performance measurements, the real time will be used, which will be bogus. To +get the simulated time, it has to use starpu_timing_now() which returns the +virtual timestamp in us. A basic example is available in tests/main/empty_task.c. + +For some technical reason, the application's .c file which contains \c main() has +to be recompiled with \c starpu_simgrid_wrap.h, which in the SimGrid case will # define main() +into starpu_main(), and it is \c libstarpu which will provide the real \c main() and +will call the application's \c main(). Including \c starpu.h will already +include \c starpu_simgrid_wrap.h, so usually you would not need to include +\c starpu_simgrid_wrap.h explicitly, but if for some reason including the whole +\c starpu.h header is not possible, you can include \c starpu_simgrid_wrap.h +explicitly. + +To be able to test with crazy data sizes, one may want to only allocate +application data if the macro \c STARPU_SIMGRID is not defined. Passing a NULL pointer to +\c starpu_data_register functions is fine, data will never be read/written to by +StarPU in SimGrid mode anyway. + +To be able to run the application with e.g. CUDA simulation on a system which +does not have CUDA installed, one can fill the starpu_codelet::cuda_funcs with \c (void*)1, to +express that there is a CUDA implementation, even if one does not actually +provide it. StarPU will not actually run it in SimGrid mode anyway by default +(unless the ::STARPU_CODELET_SIMGRID_EXECUTE or ::STARPU_CODELET_SIMGRID_EXECUTE_AND_INJECT +flags are set in the codelet) + +\snippet simgrid.c To be included. You should update doxygen if you see this text. + +The full example is available in examples/cholesky/cholesky_kernels.c. + +\section Calibration Calibration + +The idea is to first compile StarPU normally, and run the application, +to automatically benchmark the bus and the codelets. + +\verbatim +$ ./configure && make +$ STARPU_SCHED=dmda ./examples/matvecmult/matvecmult +[starpu][_starpu_load_history_based_model] Warning: model matvecmult + is not calibrated, forcing calibration for this run. Use the + STARPU_CALIBRATE environment variable to control this. +$ ... +$ STARPU_SCHED=dmda ./examples/matvecmult/matvecmult +TEST PASSED +\endverbatim + +Note that we force to use the scheduler dmda to generate +performance models for the application. The application may need to be +run several times before the model is calibrated. + +\section Simulation Simulation + +Then, recompile StarPU, passing \ref enable-simgrid "--enable-simgrid" +to configure. Make sure to keep all the other configure options +the same, and notably options such as --enable-maxcudadev. + +\verbatim +$ ./configure --enable-simgrid +\endverbatim + +To specify the location of SimGrid, you can either set the environment +variables \c SIMGRID_CFLAGS and \c SIMGRID_LIBS, or use the \c configure +options \ref with-simgrid-dir "--with-simgrid-dir", +\ref with-simgrid-include-dir "--with-simgrid-include-dir" and +\ref with-simgrid-lib-dir "--with-simgrid-lib-dir", for example + +\verbatim +$ ./configure --with-simgrid-dir=/opt/local/simgrid +\endverbatim + +You can then re-run the application. + +\verbatim +$ make +$ STARPU_SCHED=dmda ./examples/matvecmult/matvecmult +TEST FAILED !!! +\endverbatim + +It is normal that the test fails: since the computation is not actually done +(that is the whole point of SimGrid), the result is wrong, of course. + +If the performance model is not calibrated enough, the following error +message will be displayed + +\verbatim +$ STARPU_SCHED=dmda ./examples/matvecmult/matvecmult +[starpu][_starpu_load_history_based_model] Warning: model matvecmult + is not calibrated, forcing calibration for this run. Use the + STARPU_CALIBRATE environment variable to control this. +[starpu][_starpu_simgrid_execute_job][assert failure] Codelet + matvecmult does not have a perfmodel, or is not calibrated enough +\endverbatim + +The number of devices can be chosen as usual with \ref STARPU_NCPU, +\ref STARPU_NCUDA, and \ref STARPU_NOPENCL, and the amount of GPU memory +with \ref STARPU_LIMIT_CUDA_MEM, \ref STARPU_LIMIT_CUDA_devid_MEM, +\ref STARPU_LIMIT_OPENCL_MEM, and \ref STARPU_LIMIT_OPENCL_devid_MEM. + +\section SimulationOnAnotherMachine Simulation On Another Machine + +The SimGrid support even permits to perform simulations on another machine, your +desktop, typically. To achieve this, one still needs to perform the Calibration +step on the actual machine to be simulated, then copy them to your desktop +machine (the $STARPU_HOME/.starpu directory). One can then perform the +Simulation step on the desktop machine, by setting the environment +variable \ref STARPU_HOSTNAME to the name of the actual machine, to +make StarPU use the performance models of the simulated machine even +on the desktop machine. To use multiple performance models in different ranks, +in case of smpi executions in a heterogeneous platform, it is possible to use the +option -hostfile-platform in starpu_smpirun, that will define +\ref STARPU_MPI_HOSTNAMES with the hostnames of your hostfile. + +If the desktop machine does not have CUDA or OpenCL, StarPU is still able to +use SimGrid to simulate execution with CUDA/OpenCL devices, but the application +source code will probably disable the CUDA and OpenCL codelets in that +case. Since during SimGrid execution, the functions of the codelet are actually +not called by default, one can use dummy functions such as the following to +still permit CUDA or OpenCL execution. + +\section SimulationExamples Simulation Examples + +StarPU ships a few performance models for a couple of systems: \c attila, +\c mirage, \c idgraf, and \c sirocco. See Section \ref SimulatedBenchmarks for the details. + +\section FakeSimulations Simulations On Fake Machines + +It is possible to build fake machines which do not exist, by modifying the +platform file in $STARPU_HOME/.starpu/sampling/bus/machine.platform.xml +by hand: one can add more CPUs, add GPUs (but the performance model file has to +be extended as well), change the available GPU memory size, PCI memory bandwidth, etc. + +\section TweakingSimulation Tweaking Simulation + +The simulation can be tweaked, to be able to tune it between a very accurate +simulation and a very simple simulation (which is thus close to scheduling +theory results), see the \ref STARPU_SIMGRID_TRANSFER_COST, \ref STARPU_SIMGRID_CUDA_MALLOC_COST, +\ref STARPU_SIMGRID_CUDA_QUEUE_COST, \ref STARPU_SIMGRID_TASK_SUBMIT_COST, \ref STARPU_SIMGRID_TASK_PUSH_COST, +\ref STARPU_SIMGRID_FETCHING_INPUT_COST and \ref STARPU_SIMGRID_SCHED_COST environment variables. + +\section SimulationMPIApplications MPI Applications + +StarPU-MPI applications can also be run in SimGrid mode. smpi currently requires +that StarPU be build statically only, so --disable-shared needs to be +passed to ./configure. + +The application needs to be compiled with \c smpicc, and run using the +starpu_smpirun script, for instance: + +\verbatim +$ STARPU_SCHED=dmda starpu_smpirun -platform cluster.xml -hostfile hostfile ./mpi/tests/pingpong +\endverbatim + +Where \c cluster.xml is a SimGrid-MPI platform description, and \c hostfile the +list of MPI nodes to be used. Examples of such files are available in \c tools/perfmodels. In homogeneous MPI clusters: for each MPI node, it +will just replicate the architecture referred by +\ref STARPU_HOSTNAME. To use multiple performance models in different ranks, +in case of a heterogeneous platform, it is possible to use the +option -hostfile-platform in starpu_smpirun, that will define +\ref STARPU_MPI_HOSTNAMES with the hostnames of your hostfile. + +To use FxT traces, libfxt itself also needs to be built statically, and +with dynamic linking flags, i.e. with + +\verbatim +CFLAGS=-fPIC ./configure --enable-static +\endverbatim + +\section SimulationDebuggingApplications Debugging Applications + +By default, SimGrid uses its own implementation of threads, which prevents \c gdb +from being able to inspect stacks of all threads. To be able to fully debug an +application running with SimGrid, pass the --cfg=contexts/factory:thread +option to the application, to make SimGrid use system threads, which \c gdb will be +able to manipulate as usual. + +It is also worth noting SimGrid 3.21's new parameter +--cfg=simix/breakpoint which allows putting a breakpoint at a precise +(deterministic!) timing of the execution. If for instance in an execution +trace we see that something odd is happening at time 19000ms, we can use +--cfg=simix/breakpoint:19.000 and \c SIGTRAP will be raised at that point, +which will thus interrupt execution within \c gdb, allowing to inspect e.g. +scheduler state, etc. + +\section SimulationMemoryUsage Memory Usage + +Since kernels are not actually run and data transfers are not actually +performed, the data memory does not actually need to be allocated. This allows +for instance to simulate the execution of applications processing very big data +on a small laptop. + +The application can for instance pass 1 (or whatever bogus pointer) +to StarPU data registration functions, instead of allocating data. This will +however require the application to take care of not trying to access the data, +and will not work in MPI mode, which performs transfers. + +Another way is to pass the \ref STARPU_MALLOC_SIMULATION_FOLDED flag to the +starpu_malloc_flags() function. +An example is available in examples/mult/xgemm.c +This will make it allocate a memory area which +one can read/write, but optimized so that this does not actually consume +memory. Of course, the values read from such area will be bogus, but this allows +the application to keep e.g. data load, store, initialization as it is, and also +work in MPI mode. A more aggressive alternative is to pass also the +\ref STARPU_MALLOC_SIMULATION_UNIQUE flag (alongside with +\ref STARPU_MALLOC_SIMULATION_FOLDED) to the starpu_malloc_flags() function. +An example is available in examples/cholesky/cholesky_tag.c . +This will make StarPU reuse the pointers for allocations of the same size +without calling the folded allocation again, thus decreasing some pressure on +memory management. + +Note however that notably Linux kernels refuse obvious memory overcommitting by +default, so a single allocation can typically not be bigger than the amount of +physical memory, see https://www.kernel.org/doc/Documentation/vm/overcommit-accounting +This prevents for instance from allocating a single huge matrix. Allocating a +huge matrix in several tiles is not a problem, however. sysctl +vm.overcommit_memory=1 can also be used to allow such overcommit. + +Note however that this folding is done by remapping the same file several times, +and Linux kernels will also refuse to create too many memory areas. sysctl +vm.max_map_count can be used to check and change the default (65535). By +default, StarPU uses a 1MiB file, so it hopefully fits in the CPU cache. However, +this limits the amount of such folded memory to a bit below 64GiB. The +\ref STARPU_MALLOC_SIMULATION_FOLD environment variable can be used to increase the +size of the file. + +*/ diff --git a/doc/doxygen/chapters/starpu_extensions/socl_opencl_extensions.doxy b/doc/doxygen/chapters/starpu_extensions/socl_opencl_extensions.doxy new file mode 100644 index 0000000..ffac998 --- /dev/null +++ b/doc/doxygen/chapters/starpu_extensions/socl_opencl_extensions.doxy @@ -0,0 +1,90 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/*! \page SOCLOpenclExtensions SOCL OpenCL Extensions + +SOCL is an OpenCL implementation based on StarPU. It gives unified access to +every available OpenCL device: applications can now share entities such as +Events, Contexts or Command Queues between several OpenCL implementations. + +In addition, command queues that are created without specifying a device provide +automatic scheduling of the submitted commands on OpenCL devices contained in +the context to which the command queue is attached. + +Setting the CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE flag on a command +queue also allows StarPU to reorder kernels queued on the queue, otherwise they +would be serialized, and several command queues would be necessary to see kernels +dispatched to the various OpenCL devices. + +Note: this is still an area under development and subject to change. + +When compiling StarPU, SOCL will be enabled if a valid OpenCL +implementation is found on your system. To be able to run the SOCL +test suite, the environment variable \ref SOCL_OCL_LIB_OPENCL needs to +be defined to the location of the file libOpenCL.so of the OCL +ICD implementation. You should for example add the following line in +your file .bashrc + +\verbatim +export SOCL_OCL_LIB_OPENCL=/usr/lib/x86_64-linux-gnu/libOpenCL.so +\endverbatim + +You can then run the test suite in the directory socl/examples. + +\verbatim +$ make check +... +PASS: basic/basic +PASS: testmap/testmap +PASS: clinfo/clinfo +PASS: matmul/matmul +PASS: mansched/mansched +================== +All 5 tests passed +================== +\endverbatim + +The environment variable \ref OCL_ICD_VENDORS has to point to the directory +where the socl.icd ICD file is installed. When compiling StarPU, the files +are in the directory socl/vendors. With an installed version of +StarPU, the files are installed in the directory +$prefix/share/starpu/opencl/vendors. + +To run the tests by hand, you have to call, for example, + +\verbatim +$ LD_PRELOAD=$SOCL_OCL_LIB_OPENCL OCL_ICD_VENDORS=socl/vendors/ socl/examples/clinfo/clinfo +Number of platforms: 2 + Platform Profile: FULL_PROFILE + Platform Version: OpenCL 1.1 CUDA 4.2.1 + Platform Name: NVIDIA CUDA + Platform Vendor: NVIDIA Corporation + Platform Extensions: cl_khr_byte_addressable_store cl_khr_icd cl_khr_gl_sharing cl_nv_compiler_options cl_nv_device_attribute_query cl_nv_pragma_unroll + + Platform Profile: FULL_PROFILE + Platform Version: OpenCL 1.0 SOCL Edition (0.1.0) + Platform Name: SOCL Platform + Platform Vendor: Inria + Platform Extensions: cl_khr_icd +.... +$ +\endverbatim + +To enable the use of CPU cores via OpenCL, one can set the \ref STARPU_OPENCL_ON_CPUS +environment variable to 1 and \ref STARPU_NCPUS to 0 (to avoid using CPUs both via +the OpenCL driver and the normal CPU driver). + +*/ diff --git a/doc/doxygen/chapters/starpu_extensions/tcpip_support.doxy b/doc/doxygen/chapters/starpu_extensions/tcpip_support.doxy new file mode 100644 index 0000000..e34b89e --- /dev/null +++ b/doc/doxygen/chapters/starpu_extensions/tcpip_support.doxy @@ -0,0 +1,53 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/*! \page TCPIPSupport TCP/IP Support + +\section TCPIPMasterSlave TCP/IP Master Slave Support + +StarPU provides a transparent way to execute applications across many +nodes. The Master Slave support permits to use remote cores without +thinking about data distribution. This support can be activated with +the \c configure option \ref enable-tcpip-master-slave +"--enable-tcpip-master-slave". + +The existing kernels for CPU devices can be used as such. They only have to be +exposed through the name of the function in the \ref starpu_codelet::cpu_funcs_name field. +Functions have to be globally-visible (i.e. not static) for StarPU to +be able to look them up, and -rdynamic must be passed to gcc (or +-export-dynamic to ld) so that symbols of the main program are visible. + +By default, one core is dedicated on the master node to manage the +entire set of slaves. + +Choosing the number of cores on each slave device is done by setting +the environment variable \ref STARPU_NTCPIPMSTHREADS "STARPU_NTCPIPMSTHREADS=\" +with \ being the requested number of cores. By default, +all the slave's cores are used. + +The master should be given the number of slaves that are expected to be run with +the \ref STARPU_TCPIP_MS_SLAVES environment variable. + +The slaves should then be started, and their number also should be given with +the \ref STARPU_TCPIP_MS_SLAVES environment variable. They should additionally be +given the IP address of the master with the \ref STARPU_TCPIP_MS_MASTER environment +variable. + +For simple local checks, one can use the starpu_tcpipexec tool, which just +starts the application several times. Setting the number of slaves nodes is done +by changing the -np parameter. + +*/ diff --git a/doc/doxygen/chapters/starpu_extensions/transactions.doxy b/doc/doxygen/chapters/starpu_extensions/transactions.doxy new file mode 100644 index 0000000..e9cac8b --- /dev/null +++ b/doc/doxygen/chapters/starpu_extensions/transactions.doxy @@ -0,0 +1,81 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2022-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/*! \page Transactions Transactions + +\section TransactionsIdeas General Ideas + +StarPU's transactions enable the cancellation of a sequence of already +submitted tasks based on a just-in-time decision. +The purpose of this mechanism is typically for iterative applications to submit +tasks for the next iteration ahead of time while leaving some iteration loop +criterion (e.g. convergence) to be evaluated just before the first task of the +next iteration is about to be scheduled. Such a sequence of collectively +cancelable tasks is called a transaction \e epoch. + +\section TransactionsUsage Usage + +Some examples illustrating the usage of StarPU's transactions are available in +the directory examples/transactions. + +\subsection TransactionsEpochCancel Epoch Cancellation + +If the start criterion of an epoch evaluates to \c False, all the tasks for that next +epoch are canceled. Thus, StarPU's transactions let applications avoid the use +of synchronization barriers commonly found between the task submission +sequences of subsequent iterations, and avoid breaking the flow of dependencies +in the process. Moreover, while the kernel functions of canceled transaction +tasks are not executed, their dependencies are still honored in the proper +order. + +\subsection TransactionsCodelets Transactions Enabled Codelets + +Codelets for tasks being part of a transaction should set their \c nbuffers +field to \ref STARPU_VARIABLE_NBUFFERS. + +\subsection TransactionsCreation Transaction Creation + +A struct starpu_transaction opaque object is created using the \ref starpu_transaction_open() +function, specifying a transaction start criterion callback and some user +argument to be passed to that callback upon the first call. The start criterion callback should +return \c True (e.g. \c !0 ) if the next transaction epoch should proceed, or \c +False (e.g. \c 0 ) if the tasks belonging to that next epoch should be canceled. +\c starpu_transaction_open() submits an internal task to mark the beginning of the +transaction. If submitting that internal task fails with ENODEV, \c starpu_transaction_open() +will return \c NULL. + +\subsection TransactionsTasks Transaction Tasks +Tasks governed by the same transaction object should be passed that transaction object either through +the \c .transaction field of \ref starpu_task structures, using the \ref STARPU_TRANSACTION argument +of \ref starpu_task_insert(). + +\subsection TransactionsEpochNext Epoch Transition +The transition from one transaction epoch to the next is expressed using the +\ref starpu_transaction_next_epoch function to which the \c +starpu_transaction object and a user argument are passed. Upon a call to that +function, the start criterion callback is evaluated on users argument to +decide whether the next epoch should proceed or be canceled. + +\subsection TransactionsClosing Transaction Closing +The last epoch should be ended through a call to \ref starpu_transaction_close(). + +\section TransactionsLimitations Known limitations + +Support for transactions is experimental. +
+StarPU's transactions are currently not compatible with StarPU-MPI distributed sessions. + +*/ diff --git a/doc/doxygen/chapters/starpu_faq/check_list_performance.doxy b/doc/doxygen/chapters/starpu_faq/check_list_performance.doxy new file mode 100644 index 0000000..011b299 --- /dev/null +++ b/doc/doxygen/chapters/starpu_faq/check_list_performance.doxy @@ -0,0 +1,578 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/*! \page CheckListWhenPerformanceAreNotThere Check List When Performance Are Not There + +TODO: improve! + +To achieve good +performance, we give below a list of features which should be checked. + +For a start, you can use \ref OfflinePerformanceTools to get a Gantt chart which +will show roughly where time is spent, and focus correspondingly. + +\section CheckTaskSize Check Task Size + +Make sure that your tasks are not too small, as the StarPU runtime +overhead may not be negligible. As explained in \ref TaskSizeOverhead, +you can run the script \c tasks_size_overhead.sh to get an +idea of the scalability of tasks depending on their duration (in µs), on your +own system. + +Typically, 10µs-ish tasks are definitely too small, the CUDA overhead itself is +much bigger than this. + +1ms-ish tasks may be a good start, but will not necessarily scale to many dozens +of cores, so it's better to try to get 10ms-ish tasks. + +It may be useful to dedicate a whole core to the main thread, so it can spend +its time on submitting tasks, by setting the \ref STARPU_MAIN_THREAD_BIND +environment variable to 1. + +Tasks durations can easily be observed when performance models are defined (see +\ref PerformanceModelExample) by using the tools starpu_perfmodel_plot or +starpu_perfmodel_display (see \ref PerformanceOfCodelets) + +When using parallel tasks, the problem is even worse since StarPU has to +synchronize the tasks execution. + +\section ConfigurationImprovePerformance Configuration Which May Improve Performance + +If you do not plan to use support for GPUs or out-of-core, i.e. not use StarPU's ability to manage data coherency between several memory nodes, the \c configure option \ref enable-maxnodes "--enable-maxnodes=1" allows to considerably reduce StarPU's memory management overhead. + +The \c configure option \ref enable-fast "--enable-fast" disables all +assertions. This makes StarPU more performant for tiny tasks by +disabling all sanity checks. Only use this for measurements and production, not for development, since this will drop all basic checks. + +\section DataRelatedFeaturesToImprovePerformance Data Related Features Which May Improve Performance + +As can be seen in \ref StatesInGantt, if the application has a lot of different +kinds of sizes of data, StarPU will end up freeing/reallocating data on GPU to +accomodate for the different sizes. It can be very effective to round the +allocated size up a bit by e.g. 10% (e.g. 11MB for all data sizes between 10MB +and 11MB) so that StarPU will be able to reuse buffers of the same size for +data with similar but not exactly same size. This can be registered by using +starpu_matrix_data_register_allocsize(), starpu_vector_data_register_allocsize() +so that StarPU records both the rounded-up data size, and the actual size used for +computation. + +link to \ref DataManagement + +link to \ref DataPrefetch + +\section TaskRelatedFeaturesToImprovePerformance Task Related Features Which May Improve Performance + +link to \ref TaskGranularity + +link to \ref TaskSubmission + +link to \ref TaskPriorities + +\section SchedulingRelatedFeaturesToImprovePerformance Scheduling Related Features Which May Improve Performance + +link to \ref TaskSchedulingPolicy + +link to \ref TaskDistributionVsDataTransfer + +link to \ref Energy-basedScheduling + +link to \ref StaticScheduling + +\section CUDA-specificOptimizations CUDA-specific Optimizations + +For proper overlapping of asynchronous GPU data transfers, data has to be pinned +by CUDA. Data allocated with starpu_malloc() is always properly pinned. If the +application registers to StarPU some data which has not been allocated with +starpu_malloc(), starpu_memory_pin() should be called to pin the data +memory. Otherwise, the "Asynchronous copy submission" parts of the execution +traces (see \ref StatesInGantt) will show the synchronous inefficiency. + +Note that CUDA pinning/unpinning takes a long time, so for e.g. temporary +data, it is much more efficient to use a StarPU temporary data (see \ref +TemporaryData), that StarPU can reuse and thus avoid the pin/unpin cost. + +Due to CUDA limitations, StarPU will have a hard time overlapping its own +communications and the codelet computations if the application does not use a +dedicated CUDA stream for its computations instead of the default stream, +which synchronizes all operations of the GPU. The function +starpu_cuda_get_local_stream() returns a stream which can be used by all CUDA codelet +operations to avoid this issue. For instance: + +\code{.c} +func <<>> (foo, bar); +cudaError_t status = cudaGetLastError(); +if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status); +cudaStreamSynchronize(starpu_cuda_get_local_stream()); +\endcode + +as well as the use of \c cudaMemcpyAsync(), etc. for each CUDA operation one needs +to use a version that takes a stream parameter. + +If the kernel uses its own non-default stream, one can synchronize this stream +with the StarPU-provided stream this way: + +\code{.c} +cudaEvent_t event; +call_kernel_with_its_own_stream() +cudaEventCreateWithFlags(&event, cudaEventDisableTiming); +cudaEventRecord(event, get_kernel_stream()); +cudaStreamWaitEvent(starpu_cuda_get_local_stream(), event, 0); +cudaEventDestroy(event); +\endcode + +This code makes the StarPU-provided stream wait for a new event, which will be +triggered by the completion of the kernel. + +Unfortunately, some CUDA libraries do not have stream variants of +kernels. This will seriously lower the potential for overlapping. +If some CUDA calls are made without specifying this local stream, +synchronization needs to be explicit with cudaDeviceSynchronize() around these +calls, to make sure that they get properly synchronized with the calls using +the local stream. Notably, \c cudaMemcpy() and \c cudaMemset() are actually +asynchronous and need such explicit synchronization! Use \c cudaMemcpyAsync() and +\c cudaMemsetAsync() instead. + +Calling starpu_cublas_init() will ensure StarPU to properly call the +CUBLAS library functions, and starpu_cublas_shutdown() will synchronously deinitialize the CUBLAS library on every CUDA device. Some libraries like Magma may however change the current stream of CUBLAS v1, one then has to call starpu_cublas_set_stream() at +the beginning of the codelet to make sure that CUBLAS is really using the proper +stream. When using CUBLAS v2, starpu_cublas_get_local_handle() can be called to queue CUBLAS +kernels with the proper configuration. + +Similary, calling starpu_cublasLt_init() makes StarPU create CUBLASLT handles +on each CUDA device, starpu_cublasLt_get_local_handle() can then be used to +queue CUBLASLT kernels with the proper configuration. starpu_cublasLt_shutdown() will synchronously deinitialize the CUBLASLT library on every CUDA device. +Since CUBLASLT handles are not bundled with CUDA streams, users should call +starpu_cuda_get_local_stream() to get a CUDA stream before calling a +CUBLASLT API. + +Similarly, calling starpu_cusparse_init() makes StarPU create CUSPARSE handles +on each CUDA device, starpu_cusparse_get_local_handle() can then be used to +queue CUSPARSE kernels with the proper configuration. starpu_cusparse_shutdown() will synchronously deinitialize the CUSPARSE library on every CUDA device. + +Similarly, calling starpu_cusolver_init() makes StarPU create CUSOLVER handles +on each CUDA device, starpu_cusolverDn_get_local_handle(), starpu_cusolverSp_get_local_handle(), +starpu_cusolverRf_get_local_handle(), can then be used to +queue CUSOLVER kernels with the proper configuration. +starpu_cusolver_shutdown() can be used to clear these handles. +It is useful to use a ::STARPU_SCRATCH buffer +whose size was set to the amount returned by \c cusolver*Spotrf_bufferSize . An example can be seen +in \c examples/cholesky + +If the kernel can be made to only use this local stream or other self-allocated +streams, i.e. the whole kernel submission can be made asynchronous, then +one should enable asynchronous execution of the kernel. This means setting +the flag ::STARPU_CUDA_ASYNC in the corresponding field starpu_codelet::cuda_flags, and dropping the +cudaStreamSynchronize() call at the end of the cuda_func function, so that it +returns immediately after having queued the kernel to the local stream. That way, StarPU will be +able to submit and complete data transfers while kernels are executing, instead of only at each +kernel submission. The kernel just has to make sure that StarPU can use the +local stream to synchronize with the kernel startup and completion. + +Using the flag ::STARPU_CUDA_ASYNC also permits to enable concurrent kernel +execution, on cards which support it (Kepler and later, notably). This is +enabled by setting the environment variable \ref STARPU_NWORKER_PER_CUDA to the +number of kernels to be executed concurrently. This is useful when kernels are +small and do not feed the whole GPU with threads to run. + +Concerning memory allocation, you should really not use \c cudaMalloc()/ \c cudaFree() +within the kernel, since \c cudaFree() introduces way too many synchronizations +within CUDA itself. You should instead add a parameter to the codelet with the +::STARPU_SCRATCH mode access. You can then pass to the task a handle registered +with the desired size but with the \c NULL pointer, the handle can even be +shared between tasks, StarPU will allocate per-task data on the fly before task +execution, and reuse the allocated data between tasks. + +See examples/pi/pi_redux.c for an example of use. + +\section OpenCL-specificOptimizations OpenCL-specific Optimizations + +If the kernel can be made to only use the StarPU-provided command queue or other self-allocated +queues, i.e. the whole kernel submission can be made asynchronous, then +one should enable asynchronous execution of the kernel. This means setting +the flag ::STARPU_OPENCL_ASYNC in the corresponding field starpu_codelet::opencl_flags and dropping the +clFinish() and starpu_opencl_collect_stats() calls at the end of the kernel, so +that it returns immediately after having queued the kernel to the provided queue. +That way, StarPU will be able to submit and complete data transfers while kernels are executing, instead of +only at each kernel submission. The kernel just has to make sure +that StarPU can use the command queue it has provided to synchronize with the +kernel startup and completion. + +\section DetectionStuckConditions Detecting Stuck Conditions + +It may happen that StarPU does not make progress for a long +period of time. It may be due to contention inside StarPU, but it may also be an external problem, such as a stuck MPI or CUDA driver. + +export STARPU_WATCHDOG_TIMEOUT=10000 (\ref STARPU_WATCHDOG_TIMEOUT) + +allows making StarPU print an error message whenever StarPU does not terminate +any task for 10ms, but lets the application continue normally. In addition to that, + +export STARPU_WATCHDOG_CRASH=1 (\ref STARPU_WATCHDOG_CRASH) + +raises SIGABRT in this condition, thus allowing to catch the +situation in \c gdb. + +It can also be useful to type handle SIGABRT nopass in gdb to be able to let +the process continue, after inspecting the state of the process. + +\section HowToLimitMemoryPerNode How to Limit Memory Used By StarPU And Cache Buffer Allocations + +By default, StarPU makes sure to use at most 90% of the memory of GPU devices, +moving data in and out of the device as appropriate, as well as using +prefetch and write-back optimizations. + +The environment variables \ref STARPU_LIMIT_CUDA_MEM, \ref STARPU_LIMIT_CUDA_devid_MEM, +\ref STARPU_LIMIT_OPENCL_MEM, and \ref STARPU_LIMIT_OPENCL_devid_MEM +can be used to control how much (in MiB) of the GPU device memory +should be used at most by StarPU (the default value is to use 90% of the +available memory). + +By default, the usage of the main memory is not limited, as the +default mechanisms do not provide means to evict main memory when it +gets too tight. This also means that by default, StarPU will not cache buffer +allocations in main memory, since it does not know how much of the +system memory it can afford. + +The environment variable \ref STARPU_LIMIT_CPU_MEM can be used to +specify how much (in MiB) of the main memory should be used at most by +StarPU for buffer allocations. This way, StarPU will be able to +cache buffer allocations (which can be a real benefit if a lot of buffers are +involved, or if allocation fragmentation can become a problem), and when using +\ref OutOfCore, StarPU will know when it should evict data out to the disk. + +It should be noted that by default only buffer allocations automatically +done by StarPU are accounted here, i.e. allocations performed through +starpu_malloc_on_node() which are used by the data interfaces +(matrix, vector, etc.). This does not include allocations performed by +the application through e.g. malloc(). It does not include allocations +performed through starpu_malloc() either, only allocations +performed explicitly with the flag \ref STARPU_MALLOC_COUNT, i.e. by calling + +\code{.c} +starpu_malloc_flags(STARPU_MALLOC_COUNT) +\endcode + +are taken into account. And starpu_free_flags() can be called to free the memory that was previously allocated with starpu_malloc_flags(). If the +application wants to make StarPU aware of its own allocations, so that StarPU +knows precisely how much data is allocated, and thus when to evict allocation +caches or data out to the disk, starpu_memory_allocate() can be used to +specify an amount of memory to be accounted for. starpu_memory_deallocate() +can be used to account freed memory back. Those can for instance be used by data +interfaces with dynamic data buffers: instead of using starpu_malloc_on_node(), +they would dynamically allocate data with \c malloc()/\c realloc(), and notify StarPU of +the delta by calling starpu_memory_allocate() and starpu_memory_deallocate(). +By default, the memory management system uses a set of default flags for each node when allocating memory. starpu_malloc_on_node_set_default_flags() can be used to modify these default flags on a specific node. + +starpu_memory_get_total() and starpu_memory_get_available() +can be used to get an estimation of how much memory is available. +starpu_memory_wait_available() can also be used to block until an +amount of memory becomes available, but it may be preferable to call + +\code{.c} +starpu_memory_allocate(STARPU_MEMORY_WAIT) +\endcode + +to reserve this amount immediately. + +\section HowToReduceTheMemoryFootprintOfInternalDataStructures How To Reduce The Memory Footprint Of Internal Data Structures + +It is possible to reduce the memory footprint of the task and data internal +structures of StarPU by describing the shape of your machine and/or your +application when calling \c configure. + +To reduce the memory footprint of the data internal structures of StarPU, one +can set the \c configure parameters +\ref enable-maxcpus "--enable-maxcpus", +\ref enable-maxnumanodes "--enable-maxnumanodes", +\ref enable-maxcudadev "--enable-maxcudadev", +\ref enable-maxopencldev "--enable-maxopencldev" and +\ref enable-maxnodes "--enable-maxnodes" +to give StarPU +the architecture of the machine it will run on, thus tuning the size of the +structures to the machine. + +To reduce the memory footprint of the task internal structures of StarPU, one +can set the \c configure parameter \ref enable-maxbuffers "--enable-maxbuffers" to +give StarPU the maximum number of buffers that a task can use during an +execution. For example, in the Cholesky factorization (dense linear algebra +application), the GEMM task uses up to 3 buffers, so it is possible to set the +maximum number of task buffers to 3 to run a Cholesky factorization on StarPU. + +The size of the various structures of StarPU can be printed by +tests/microbenchs/display_structures_size. + +It is also often useless to submit \b all the tasks at the same time. +Task submission can be blocked when a reasonable given number of +tasks have been submitted, by setting the environment variables \ref +STARPU_LIMIT_MIN_SUBMITTED_TASKS and \ref STARPU_LIMIT_MAX_SUBMITTED_TASKS. + +\code{.sh} +export STARPU_LIMIT_MAX_SUBMITTED_TASKS=10000 +export STARPU_LIMIT_MIN_SUBMITTED_TASKS=9000 +\endcode + +will make StarPU block submission when 10000 tasks are submitted, and unblock +submission when only 9000 tasks are still submitted, i.e. 1000 tasks have +completed among the 10000 which were submitted when submission was blocked. Of +course this may reduce parallelism if the threshold is set too low. The precise +balance depends on the application task graph. + +These values can also be specified with the functions +starpu_set_limit_min_submitted_tasks() and +starpu_set_limit_max_submitted_tasks(). + +An idea of how much memory is used for tasks and data handles can be obtained by +setting the environment variable \ref STARPU_MAX_MEMORY_USE to 1. + +\section HowtoReuseMemory How To Reuse Memory + +When your application needs to allocate more data than the available amount of +memory usable by StarPU (given by starpu_memory_get_available()), the +allocation cache system can reuse data buffers used by previously executed +tasks. For this system to work with MPI tasks, you need to submit tasks progressively instead +of as soon as possible, because in the case of MPI receives, the allocation cache check for reusing data +buffers will be done at submission time, not at execution time. + +There are two options to control the task submission flow. The first one is by +controlling the number of submitted tasks during the whole execution. This can +be done whether by setting the environment variables +\ref STARPU_LIMIT_MAX_SUBMITTED_TASKS and \ref STARPU_LIMIT_MIN_SUBMITTED_TASKS to +tell StarPU when to stop submitting tasks and when to wake up and submit tasks +again, or by explicitly calling starpu_task_wait_for_n_submitted() in +your application code for finest grain control (for example, between two +iterations of a submission loop). + +The second option is to control the memory size of the allocation cache. This +can be done in the application by using jointly +starpu_memory_get_available() and starpu_memory_wait_available() to submit +tasks only when there is enough memory space to allocate the data needed by the +task, i.e. when enough data are available for reuse in the allocation cache. + +\section WorkingSetDoesntFit When the Working Set Does Not Fit + +If the working set does not fit in memory, we are in an out-of-core situation, +and StarPU will have to move data back-and-forth between the limited memory and +larger memory. This happens both between GPUs and CPUs, and between CPUs and the +disk (see \ref OutOfCore). + +This is a very difficult problem in general. By default, StarPU uses a +Least-Recently-Used data replacement policy, which in various cases will perform +just fine. In other cases, where indeed more data is needed than memory space, +StarPU will have to decide an optimized tasks order to reduce data transfers. +This is achieved notably by the `dmdar` scheduler, which prioritises tasks whose +data is already in the GPU, and by the `dmdas` scheduler, which uses tasks +priorities first, and then prioritises tasks whose data is already in the GPU. +It is also useful to try the `darts` scheduler, which was exactly meant for +dealing with such out-of-core situations, see + +the DARTS research paper for details. + +It can be also useful to try to tune the \ref STARPU_TARGET_CLEAN_BUFFERS and +\ref STARPU_MINIMUM_CLEAN_BUFFERS environment variables. + +\section PerformanceModelCalibration Performance Model Calibration + +Most schedulers are based on an estimation of codelet duration on each kind +of processing unit. For this to be possible, the application programmer needs +to configure a performance model for the codelets of the application (see +\ref PerformanceModelExample for instance). History-based performance models +use on-line calibration. When using a scheduler which requires such performance +model, StarPU will automatically calibrate codelets +which have never been calibrated yet, and save the result in +$STARPU_HOME/.starpu/sampling/codelets. +The models are indexed by machine name. They can then be displayed various ways, +see \ref PerformanceOfCodelets . + +By default, StarPU stores separate performance models according to the hostname +of the system. To avoid having to calibrate performance models for each node +of a homogeneous cluster for instance, the model can be shared by using +export STARPU_HOSTNAME=some_global_name (\ref STARPU_HOSTNAME), where +some_global_name is the name of the cluster for instance, which thus +overrides the hostname of the system. + +By default, StarPU stores separate performance models for each GPU. To avoid +having to calibrate performance models for each GPU of a homogeneous set of GPU +devices for instance, the model can be shared by using the environment +variables \ref STARPU_PERF_MODEL_HOMOGENEOUS_CUDA, \ref +STARPU_PERF_MODEL_HOMOGENEOUS_OPENCL and \ref +STARPU_PERF_MODEL_HOMOGENEOUS_MPI_MS depending on your GPU device +type. + +\code{.shell} +export STARPU_PERF_MODEL_HOMOGENEOUS_CUDA=1 +export STARPU_PERF_MODEL_HOMOGENEOUS_OPENCL=1 +export STARPU_PERF_MODEL_HOMOGENEOUS_MPI_MS=1 +\endcode + +To force continuing calibration, +use export STARPU_CALIBRATE=1 (\ref STARPU_CALIBRATE). This may be necessary if your application +has not-so-stable performance. It may also be useful to use +STARPU_SCHED=eager to get tasks distributed over the various workers. +StarPU will force calibration (and thus ignore +the current result) until 10 (_STARPU_CALIBRATION_MINIMUM) measurements have been +made on each architecture, to avoid bad scheduling decisions just because the +first measurements were not so good. + +Note that StarPU will not record the very first measurement for a given codelet +and a given size, because it would most often be hit by computation library +loading or initialization. StarPU will also throw measurements away if it +notices that after computing an average execution time, it notices that most +subsequent tasks have an execution time largely outside the computed average +("Too big deviation for model..." warning messages). By looking at the details +of the message and their reported measurements, it can highlight that your +computation library really has non-stable measurements, which is probably an +indication of an issue in the computation library, or the execution environment +(e.g. rogue daemons). + +Details on the current performance model status +can be obtained with the tool starpu_perfmodel_display: the +option -l lists the available performance models, and the +option -s allows choosing the performance model to be +displayed. The result looks like: + +\verbatim +$ starpu_perfmodel_display -s starpu_slu_lu_model_getrf +performance model for cpu_impl_0 +# hash size flops mean dev n +914f3bef 1048576 0.000000e+00 2.503577e+04 1.982465e+02 8 +3e921964 65536 0.000000e+00 5.527003e+02 1.848114e+01 7 +e5a07e31 4096 0.000000e+00 1.717457e+01 5.190038e+00 14 +... +\endverbatim + +It shows that for the LU 11 kernel with a 1MiB matrix, the average +execution time on CPUs was about 25ms, with a 0.2ms standard deviation, over +8 samples. It is a good idea to check this before doing actual performance +measurements. + +A graph (both in postscript and png format) can be drawn by using the tool starpu_perfmodel_plot: + +\verbatim +$ starpu_perfmodel_plot -s starpu_slu_lu_model_getrf +4096 16384 65536 262144 1048576 4194304 +$ gnuplot starpu_starpu_slu_lu_model_getrf.gp +$ gv starpu_starpu_slu_lu_model_getrf.eps +\endverbatim + +\image html starpu_starpu_slu_lu_model_11.png +\image latex starpu_starpu_slu_lu_model_11.png "" width=\textwidth + +If a kernel source code was modified (e.g. performance improvement), the +calibration information is stale and should be dropped, to re-calibrate from +start. This can be done by using export STARPU_CALIBRATE=2 (\ref STARPU_CALIBRATE). + +Note: history-based performance models get calibrated +only if a performance-model-based scheduler is chosen. + +The history-based performance models can also be explicitly filled by the +application without execution, if e.g. the application already has a series of +measurements. This can be done by using starpu_perfmodel_update_history(), +for instance: + +\code{.c} +static struct starpu_perfmodel perf_model = +{ + .type = STARPU_HISTORY_BASED, + .symbol = "my_perfmodel", +}; + +struct starpu_codelet cl = +{ + .cuda_funcs = { cuda_func1, cuda_func2 }, + .nbuffers = 1, + .modes = {STARPU_W}, + .model = &perf_model +}; + +void feed(void) +{ + struct my_measure *measure; + struct starpu_task task; + starpu_task_init(&task); + + task.cl = &cl; + + for (measure = &measures[0]; measure < measures[last]; measure++) + { + starpu_data_handle_t handle; + starpu_vector_data_register(&handle, -1, 0, measure->size, sizeof(float)); + task.handles[0] = handle; + starpu_perfmodel_update_history(&perf_model, &task, STARPU_CUDA_DEFAULT + measure->cudadev, 0, measure->implementation, measure->time); + starpu_task_clean(&task); + starpu_data_unregister(handle); + } +} +\endcode + +Measurement has to be provided in milliseconds for the completion time models, +and in Joules for the energy consumption models. + +\section Profiling Profiling + +A quick view of how many tasks each worker has executed can be obtained by setting +export STARPU_WORKER_STATS=1 (\ref STARPU_WORKER_STATS). This is a convenient way to check that +execution did happen on accelerators, without penalizing performance with +the profiling overhead. The environment variable \ref STARPU_WORKER_STATS_FILE can be defined +to specify a filename in which to display statistics, by default +statistics are printed on the standard error stream. + +A quick view of how much data transfers have been issued can be obtained by setting +export STARPU_BUS_STATS=1 (\ref STARPU_BUS_STATS). The +environment variable \ref +STARPU_BUS_STATS_FILE can be defined to specify a filename in which to +display statistics, by default statistics are printed on the standard error stream. + +More detailed profiling information can be enabled by using export STARPU_PROFILING=1 (\ref STARPU_PROFILING) +or by +calling starpu_profiling_status_set() from the source code. +Statistics on the execution can then be obtained by using export +STARPU_BUS_STATS=1 and export STARPU_WORKER_STATS=1 . + More details on performance feedback are provided in the next chapter. + +\section OverheadProfiling Overhead Profiling + +\ref OfflinePerformanceTools can already provide an idea of to what extent and +which part of StarPU brings an overhead on the execution time. To get a more precise +analysis of which parts of StarPU bring the most overhead, gprof can be used. + +First, recompile and reinstall StarPU with gprof support: + +\code +../configure --enable-perf-debug --disable-shared --disable-build-tests --disable-build-examples +\endcode + +Make sure not to leave a dynamic version of StarPU in the target path: remove +any remaining libstarpu-*.so + +Then relink your application with the static StarPU library, make sure that +running ldd on your application does not mention any \c libstarpu +(i.e. it's really statically-linked). + +\code +gcc test.c -o test $(pkg-config --cflags starpu-1.4) $(pkg-config --libs starpu-1.4) +\endcode + +Now you can run your application, this will create a file +gmon.out in the current directory, it can be processed by +running gprof on your application: + +\code +gprof ./test +\endcode + +This will dump an analysis of the time spent in StarPU functions. + +*/ diff --git a/doc/doxygen/chapters/starpu_faq/faq.doxy b/doc/doxygen/chapters/starpu_faq/faq.doxy new file mode 100644 index 0000000..0f7ff07 --- /dev/null +++ b/doc/doxygen/chapters/starpu_faq/faq.doxy @@ -0,0 +1,518 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/*! \page FrequentlyAskedQuestions Frequently Asked Questions + +\section HowToInitializeAComputationLibraryOnceForEachWorker How To Initialize A Computation Library Once For Each Worker? + +Some libraries need to be initialized once for each concurrent instance that +may run on the machine. For instance, a C++ computation class which is not +thread-safe by itself, but for which several instantiated objects of that class +can be used concurrently. This can be used in StarPU by initializing one such +object per worker. For instance, the libstarpufft example does the following to +be able to use FFTW on CPUs. + +Some global array stores the instantiated objects: + +\code{.c} +fftw_plan plan_cpu[STARPU_NMAXWORKERS]; +\endcode + +At initialization time of libstarpu, the objects are initialized: + +\code{.c} +int workerid; +for (workerid = 0; workerid < starpu_worker_get_count(); workerid++) +{ + switch (starpu_worker_get_type(workerid)) + { + case STARPU_CPU_WORKER: + plan_cpu[workerid] = fftw_plan(...); + break; + } +} +\endcode + +And in the codelet body, they are used: + +\code{.c} +static void fft(void *descr[], void *_args) +{ + int workerid = starpu_worker_get_id(); + fftw_plan plan = plan_cpu[workerid]; + ... + + fftw_execute(plan, ...); +} +\endcode + +We call starpu_worker_get_id() to retrieve the worker ID associated with the currently executing task, or call starpu_worker_get_id_check() with the error checking. + +This however is not sufficient for FFT on CUDA: initialization has +to be done from the workers themselves. This can be done thanks to +starpu_execute_on_each_worker() or starpu_execute_on_each_worker_ex() with a specified task name, or starpu_execute_on_specific_workers() with specified workers. For instance, libstarpufft does the following. + +\code{.c} +static void fft_plan_gpu(void *args) +{ + plan plan = args; + int n2 = plan->n2[0]; + int workerid = starpu_worker_get_id(); + + cufftPlan1d(&plan->plans[workerid].plan_cuda, n, _CUFFT_C2C, 1); + cufftSetStream(plan->plans[workerid].plan_cuda, starpu_cuda_get_local_stream()); +} +void starpufft_plan(void) +{ + starpu_execute_on_each_worker(fft_plan_gpu, plan, STARPU_CUDA); +} +\endcode + +\section HardwareTopology Hardware Topology + +\subsection InteroperabilityHWLOC Interoperability hwloc +If hwloc is used, we can call starpu_get_hwloc_topology() to get the hwloc topology used by StarPU, and call starpu_get_pu_os_index() to get the OS index of a PU. We can call starpu_worker_get_hwloc_cpuset() to retrieve the hwloc CPU set associated with a worker. + +\subsection TopologyMemory Memory + +There are various functions that we can use to retrieve information of memory node, such as to get the name of a memory node we call starpu_memory_node_get_name() and to get the kind of a memory node we call starpu_node_get_kind(). To retrieve the device ID associated with a memory node we call starpu_memory_node_get_devid(). We can call starpu_worker_get_local_memory_node() to retrieve the local memory node associated with the current worker. We can also specify a worker and call starpu_worker_get_memory_node() to retrieve the associated memory node. To get the type of memory node associated with a kind of worker we call starpu_worker_get_memory_node_kind(). If we want to know the total number of memory nodes in the system we can call starpu_memory_nodes_get_count(), and we can also retrieve the total number of memory nodes in the system that match a specific memory node kind by calling starpu_memory_nodes_get_count_by_kind(). We can call starpu_memory_node_get_ids_by_type() to get the identifiers of memory nodes in the system that match a specific memory node type. +To obtain a bitmap representing logical indexes of NUMA nodes we can call starpu_get_memory_location_bitmap(). + +\subsection TopologyWorkers Workers + +StarPU provides a range of functions for querying and managing the worker configurations on a given system. One such function is starpu_worker_get_count(), which returns the total number of workers in the system. In addition to this, there are also specific functions to obtain the number of workers associated with various processing units controlled by StarPU: to retrieve the number of CPUs we can call starpu_cpu_worker_get_count(), to retrieve the number of CUDA devices we can call starpu_cuda_worker_get_count(), to retrieve the number of HIP devices we can call starpu_hip_worker_get_count(), to retrieve the number of OpenCL devices we can call starpu_opencl_worker_get_count(), to retrieve the number of MPI Master Slave workers we can call starpu_mpi_ms_worker_get_count(), and to retrieve the number of TCPIP Master Slave workers we can call starpu_tcpip_ms_worker_get_count(). + +There are various functions that we can use to retrieve information of the worker. We call starpu_worker_get_name() to get the name of the worker, we call starpu_worker_get_devid() to get the device ID of the worker or call starpu_worker_get_devids() to retrieve the list of device IDs that are associated with a worker, and call starpu_worker_get_devnum() to get number of the device controlled by the worker which begin from 0. We call starpu_worker_get_subworkerid() to get the ID of sub-worker for the device. We call starpu_worker_get_sched_ctx_list() to retrieve a list of scheduling contexts that a worker is associated with. We call starpu_worker_get_stream_workerids() to retrieve the list of worker IDs that share the same stream as a given worker. + +To retrieve the total number of NUMA nodes in the system we call starpu_memory_nodes_get_numa_count(). To get the device identifier associated with a specific NUMA node and to get the NUMA node identifier associated with a specific device we can call starpu_memory_nodes_numa_id_to_devid() and starpu_memory_nodes_numa_devid_to_id() respectively. + +We can also print out information about the workers currently registered with StarPU. starpu_worker_display_all() prints out information of all workers, starpu_worker_display_names() prints out information of all the workers of the given type, starpu_worker_display_count() prints out the number of workers of the given type. + +StarPU provides various functions associated to the type of processing unit, such as starpu_worker_get_type(), which returns the type of processing unit associated to the worker, e.g. CPU or CUDA. We can call starpu_worker_get_type_as_string() to retrieve a string representation of the type of a worker or call starpu_worker_get_type_from_string() to retrieve a worker type enumeration value from a string representation of a worker type or call starpu_worker_get_type_as_env_var() to retrieve a string representation of the type of a worker that can be used as an environment variable. Another function, starpu_worker_get_count_by_type(), returns the number of workers of a specific type. starpu_worker_get_ids_by_type() returns a list of worker IDs for a specific type, and starpu_worker_get_by_type() returns the ID of the specific worker that has the specific type, starpu_worker_get_by_devid() returns the ID of the worker that has the specific type and device ID. To get the type of worker associated with a kind of memory node we call starpu_memory_node_get_worker_archtype(). +To check if type of processing unit matches one of StarPU's defined worker architectures we can call starpu_worker_archtype_is_valid(), while in order to convert an architecture mask to a worker architecture we can call starpu_arch_mask_to_worker_archtype(). + +To retrieve the binding ID of the worker associated with the currently executing task we can call starpu_worker_get_bindid(), it is useful for applications that require information about the binding of a particular task to a specific processor. We can call starpu_bindid_get_workerids() to retrieve the list of worker IDs that are bound to a given binding ID. + +We can call starpu_workers_get_tree() to get information about the tree facilities provided by StarPU. + +\subsection TopologyBus Bus + +StarPU provides several functions to declare or retrieve information about the buses in a machine. The function starpu_bus_get_count() can be used to get the total number of buses available. To obtain the identifier of the bus between a source and destination point, the function starpu_bus_get_id() can be called. The source and destination points of a bus can be obtained by calling the functions starpu_bus_get_src() and starpu_bus_get_dst() respectively. Furthermore, users can use the function starpu_bus_set_direct() to declare that there is a direct link between a GPU and memory to the driver. The direct link can significantly reduce data transfer latency and improve overall performance. Moreover, users can use the function starpu_bus_get_direct() to retrieve information about whether a direct link has been established between a GPU and memory using the starpu_bus_set_direct() function. starpu_bus_set_ngpus() and starpu_bus_get_ngpus() functions can be used to declare and retrieve the number of GPUs of this bus that users need. + + +\section UsingTheDriverAPI Using The Driver API + +\ref API_Running_Drivers + +\code{.c} +int ret; +struct starpu_driver = +{ + .type = STARPU_CUDA_WORKER, + .id.cuda_id = 0 +}; +ret = starpu_driver_init(&d); +if (ret != 0) + error(); +while (some_condition) +{ + ret = starpu_driver_run_once(&d); + if (ret != 0) + error(); +} +ret = starpu_driver_deinit(&d); +if (ret != 0) + error(); +\endcode + +same as: + +\code{.c} +int ret; +struct starpu_driver = +{ + .type = STARPU_CUDA_WORKER, + .id.cuda_id = 0 +}; + +ret = starpu_driver_run(&d); +if (ret != 0) + error(); + +\endcode + +The function starpu_driver_run() initializes the given driver, run it until starpu_drivers_request_termination() is called. + +To add a new kind of device to the structure starpu_driver, one needs to: +
    +
  1. Add a member to the union starpu_driver::id +
  2. +
  3. Modify the internal function _starpu_launch_drivers() to +make sure the driver is not always launched. +
  4. +
  5. Modify the function starpu_driver_run() so that it can handle +another kind of architecture. The function starpu_driver_run() is equal to call starpu_driver_init(), then to call starpu_driver_run_once() in a loop, and finally to call starpu_driver_deinit(). +
  6. +
  7. Write the new function _starpu_run_foobar() in the +corresponding driver. +
  8. +
+ +\section On-GPURendering On-GPU Rendering + +Graphical-oriented applications need to draw the result of their computations, +typically on the very GPU where these happened. Technologies such as OpenGL/CUDA +interoperability permit to let CUDA directly work on the OpenGL buffers, making +them thus immediately ready for drawing, by mapping OpenGL buffer, textures or +renderbuffer objects into CUDA. CUDA however imposes some technical +constraints: peer memcpy has to be disabled, and the thread that runs OpenGL has +to be the one that runs CUDA computations for that GPU. + +To achieve this with StarPU, pass the option +\ref disable-cuda-memcpy-peer "--disable-cuda-memcpy-peer" +to configure (TODO: make it dynamic), OpenGL/GLUT has to be initialized +first, and the interoperability mode has to +be enabled by using the field +starpu_conf::cuda_opengl_interoperability, and the driver loop has to +be run by the application, by using the field +starpu_conf::not_launched_drivers to prevent StarPU from running it in +a separate thread, and by using starpu_driver_run() to run the loop. +The examples gl_interop and gl_interop_idle show how it +articulates in a simple case, where rendering is done in task +callbacks. The former uses glutMainLoopEvent to make GLUT +progress from the StarPU driver loop, while the latter uses +glutIdleFunc to make StarPU progress from the GLUT main loop. + +Then, to use an OpenGL buffer as a CUDA data, StarPU simply needs to be given +the CUDA pointer at registration, for instance: + +\code{.c} +/* Get the CUDA worker id */ +for (workerid = 0; workerid < starpu_worker_get_count(); workerid++) + if (starpu_worker_get_type(workerid) == STARPU_CUDA_WORKER) + break; + +/* Build a CUDA pointer pointing at the OpenGL buffer */ +cudaGraphicsResourceGetMappedPointer((void**)&output, &num_bytes, resource); + +/* And register it to StarPU */ +starpu_vector_data_register(&handle, starpu_worker_get_memory_node(workerid), output, num_bytes / sizeof(float4), sizeof(float4)); + +/* The handle can now be used as usual */ +starpu_task_insert(&cl, STARPU_RW, handle, 0); + +/* ... */ + +/* This gets back data into the OpenGL buffer */ +starpu_data_unregister(handle); +\endcode + +and display it e.g. in the callback function. + +\section UsingStarPUWithMKL Using StarPU With MKL 11 (Intel Composer XE 2013) + +Some users had issues with MKL 11 and StarPU (versions 1.1rc1 and +1.0.5) on Linux with MKL, using 1 thread for MKL and doing all the +parallelism using StarPU (no multithreaded tasks), setting the +environment variable MKL_NUM_THREADS to 1, and using the threaded MKL library, +with iomp5. + +Using this configuration, StarPU only uses 1 core, no matter the value of +\ref STARPU_NCPU. The problem is actually a thread pinning issue with MKL. + +The solution is to set the environment variable KMP_AFFINITY to disabled +(http://software.intel.com/sites/products/documentation/studio/composer/en-us/2011Update/compiler_c/optaps/common/optaps_openmp_thread_affinity.htm). + +\section ThreadBindingOnNetBSD Thread Binding on NetBSD + +When using StarPU on a NetBSD machine, if the topology +discovery library hwloc is used, thread binding will fail. To +prevent the problem, you should at least use the version 1.7 of +hwloc, and also issue the following call: + +\verbatim +$ sysctl -w security.models.extensions.user_set_cpu_affinity=1 +\endverbatim + +Or add the following line in the file /etc/sysctl.conf + +\verbatim +security.models.extensions.user_set_cpu_affinity=1 +\endverbatim + + +\section StarPUEatsCPUs StarPU permanently eats 100% of all CPUs + +Yes, this is on purpose. + +By default, StarPU uses active polling on task queues to minimize wake-up +latency for better overall performance. We can call starpu_is_paused() to check whether the task processing by workers has been paused or not. + +If eating CPU time is a problem (e.g. application running on a desktop), +pass option \ref enable-blocking-drivers "--enable-blocking-drivers" to +configure. This will add some overhead when putting CPU workers to +sleep or waking them, but avoid eating 100% CPU permanently. + +\section PauseResume Interleaving StarPU and non-StarPU code + +If your application only partially uses StarPU, and you do not want to +call starpu_init() / starpu_shutdown() at the beginning/end +of each section, StarPU workers will poll for work between the +sections. To avoid this behavior, you can "pause" StarPU with the +starpu_pause() function. This will prevent the StarPU workers from +accepting new work (tasks that are already in progress will not be +frozen), and stop them from polling for more work. + +Note that this does not prevent you from submitting new tasks, but +they won't execute until starpu_resume() is called. Also note +that StarPU must not be paused when you call starpu_shutdown(), and +that this function pair works in a push/pull manner, i.e. you need to +match the number of calls to these functions to clear their effect. + + +One way to use these functions could be: +\code{.c} +starpu_init(NULL); +starpu_worker_wait_for_initialisation(); // Wait for the worker to complete its initialization process +starpu_pause(); // To submit all the tasks without a single one executing +submit_some_tasks(); +starpu_resume(); // The tasks start executing + + +starpu_task_wait_for_all(); +starpu_pause(); // Stop the workers from polling + +// Non-StarPU code + +starpu_resume(); +// ... +starpu_shutdown(); +\endcode + +\section GPUEatingCores When running with CUDA or OpenCL devices, I am seeing less CPU cores + +Yes, this is on purpose. + +Since GPU devices are way faster than CPUs, StarPU needs to react quickly when +a task is finished, to feed the GPU with another task (StarPU actually submits +a couple of tasks in advance to pipeline this, but filling the pipeline +still has to be happening often enough), and thus it has to dedicate threads for +this, and this is a very CPU-consuming duty. StarPU thus dedicates one CPU core +for driving each GPU by default. + +Such dedication is also useful when a codelet is hybrid, i.e. while kernels are +running on the GPU, the codelet can run some computation, which thus be run by +the CPU core instead of driving the GPU. + +One can choose to dedicate only one thread for all the CUDA devices by setting +the \ref STARPU_CUDA_THREAD_PER_DEV environment variable to \c 1. The application +however should use ::STARPU_CUDA_ASYNC on its CUDA codelets (asynchronous +execution), otherwise the execution of a synchronous CUDA codelet will +monopolize the thread, and other CUDA devices will thus starve while it is +executing. + +\section CUDADrivers StarPU does not see my CUDA device + +First, make sure that CUDA is properly running outside StarPU: build and +run the following program with \c -lcudart : + +\code{.c} +#include +#include +#include + +int main(void) +{ + int n, i, version; + cudaError_t err; + + err = cudaGetDeviceCount(&n); + if (err) + { + fprintf(stderr,"cuda error %d\n", err); + exit(1); + } + cudaDriverGetVersion(&version); + printf("driver version %d\n", version); + cudaRuntimeGetVersion(&version); + printf("runtime version %d\n", version); + printf("\n"); + + for (i = 0; i < n; i++) + { + struct cudaDeviceProp props; + printf("CUDA%d\n", i); + err = cudaGetDeviceProperties(&props, i); + if (err) + { + fprintf(stderr,"cudaGetDeviceProperties cuda error %d\n", err); + continue; + } + printf("%s\n", props.name); + printf("%0.3f GB\n", (float) props.totalGlobalMem / (1<<30)); + printf("%u MP\n", props.multiProcessorCount); + printf("\n"); + + err = cudaSetDevice(i); + if (err) + { + fprintf(stderr,"cudaSetDevice(%d) cuda error %d\n", err, i); + continue; + } + + err = cudaFree(0); + if (err) + { + fprintf(stderr,"cudaFree(0) on %d cuda error %d\n", err, i); + continue; + } + } + return 0; +} +\endcode + +If that program does not find your device, the problem is not at the StarPU +level, but with the CUDA drivers, check the documentation of your CUDA +setup. This program is available in the source directory of +StarPU in tools/gpus/check_cuda.c, along with another CUDA +program tools/gpus/cuda_list.cu. + +\section HIPDrivers StarPU does not see my HIP device + +First, make sure that HIP is properly running outside StarPU: build and +run the following program with \c hipcc : + +\code{.c} +#include +#include +#include + +int main(void) +{ + int i, cnt; + hipError_t hipres; + hipres = hipGetDeviceCount(&cnt); + if (hipres) + { + fprintf(stderr,"hip error: <%s>\n", hipGetErrorString(hipres)); + exit(1); + } + printf("number of hip devices: %d\n", cnt); + for (i = 0; i < cnt; i++) + { + struct hipDeviceProp_t props; + printf("HIP%d\n", i); + hipres = hipGetDeviceProperties(&props, i); + if (hipres) + { + fprintf(stderr,"hip error: <%s>\n", hipGetErrorString(hipres)); + continue; + } + printf("%s\n", props.name); + printf("%0.3f GB\n", (float) props.totalGlobalMem / (1<<30)); + printf("%u MP\n", props.multiProcessorCount); + printf("\n"); + } + return 0; +} +\endcode + +If that program does not find your device, the problem is not at the StarPU +level, but with the HIP drivers, check the documentation of your HIP +setup. This program is available in the source directory of +StarPU in tools/gpus/check_hip.c. + +\section OpenCLDrivers StarPU does not see my OpenCL device + +First, make sure that OpenCL is properly running outside StarPU: build and +run the following program with \c -lOpenCL : + +\code{.c} +#include +#include +#include + +int main(void) +{ + cl_device_id did[16]; + cl_int err; + cl_platform_id pid, pids[16]; + cl_uint nbplat, nb; + char buf[128]; + size_t size; + int i, j; + + err = clGetPlatformIDs(sizeof(pids)/sizeof(pids[0]), pids, &nbplat); + assert(err == CL_SUCCESS); + printf("%u platforms\n", nbplat); + for (j = 0; j < nbplat; j++) + { + pid = pids[j]; + printf(" platform %d\n", j); + err = clGetPlatformInfo(pid, CL_PLATFORM_VERSION, sizeof(buf)-1, buf, &size); + assert(err == CL_SUCCESS); + buf[size] = 0; + printf(" platform version %s\n", buf); + + err = clGetDeviceIDs(pid, CL_DEVICE_TYPE_ALL, sizeof(did)/sizeof(did[0]), did, &nb); + if (err == CL_DEVICE_NOT_FOUND) + nb = 0; + else + assert(err == CL_SUCCESS); + printf("%d devices\n", nb); + for (i = 0; i < nb; i++) + { + err = clGetDeviceInfo(did[i], CL_DEVICE_VERSION, sizeof(buf)-1, buf, &size); + buf[size] = 0; + printf(" device %d version %s\n", i, buf); + } + } + + return 0; +} +\endcode + +If that program does not find your device, the problem is not at the StarPU +level, but with the OpenCL drivers, check the documentation of your OpenCL +implementation. This program is available in the source directory of +StarPU in tools/gpus/check_opencl.c. + +\section CUDACopyError There seems to be errors when copying to and from CUDA devices + +You should first try to disable asynchronous copies between CUDA and +CPU workers. You can either do that with the configuration parameter +\ref disable-asynchronous-cuda-copy "--disable-asynchronous-cuda-copy" +or with the environment variable \ref +STARPU_DISABLE_ASYNCHRONOUS_CUDA_COPY. + +If your application keeps failing, you will find in the source +directory of StarPU, a directory named tools/gpus with various +programs. cuda_copy.cu is testing the direct or undirect copy +between CUDA devices. + +You can also try to just disable the direct gpu-gpu transfers (known +to fail under some hardware/cuda combinations) by setting the \ref +STARPU_ENABLE_CUDA_GPU_GPU_DIRECT environment variable to 0. + +\section IncorrectPerformanceModelFile I keep getting a "Incorrect performance model file" error + +The performance model file, used by StarPU to record the performance of +codelets, seem to have been corrupted. Perhaps a previous run of StarPU stopped +abruptly, and thus could not save it properly. You can have a look at the file +if you can fix it, but the simplest way is to just remove the file and run +again, StarPU will just have to re-perform calibration for the corresponding codelet. + +*/ diff --git a/doc/doxygen/chapters/starpu_faq/faq_intro.doxy b/doc/doxygen/chapters/starpu_faq/faq_intro.doxy new file mode 100644 index 0000000..dde106e --- /dev/null +++ b/doc/doxygen/chapters/starpu_faq/faq_intro.doxy @@ -0,0 +1,31 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/*! \intropage{IntroFAQ, --------- StarPU FAQs ---------} + +\webforeword + +This part explains how to better tune your application to achieve good performance, and also how to fix some difficulties you may encounter while implementing your applications. + +
    +
  • We give a list of features in Chapter \ref CheckListWhenPerformanceAreNotThere which should be checked to improve performances of your applications. +
  • There are some frequently asked questions in Chapter \ref FrequentlyAskedQuestions that may help you to solve your problems. +
+ +If you have problems that cannot be solved, please contact us. + + +*/ diff --git a/doc/doxygen/chapters/starpu_installation/building.doxy b/doc/doxygen/chapters/starpu_installation/building.doxy new file mode 100644 index 0000000..a11487f --- /dev/null +++ b/doc/doxygen/chapters/starpu_installation/building.doxy @@ -0,0 +1,267 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/*! \page BuildingAndInstallingStarPU Building and Installing StarPU + +Depending on the level of customization required for the library +installation, we offer several solutions. + +
    +
  1. Basic Installation or Evaluation: If you are looking to + simply try out the library, assess its performance on simple cases, + run examples, or use the latest stable version, we recommend the + following options: +
      +
    • + For Linux Debian or Ubuntu distributions, consider using the latest + StarPU Debian package (see \ref InstallingABinaryPackage). +
    • +
    • + For macOS, you can opt for Brew and follow the steps in \ref + InstallingASourcePackage. +
    • +
    • + Using an already installed module on a cluster, as explained in + \ref UsingModule +
    • +
    +
  2. +
  3. Customization for Specific Needs: If you intend to use + StarPU but require modifications, such as switching to another version + (git branch), changing the default MPI, utilizing a preferred + compiler, or altering source code, consider these options: +
      +
    • + Guix or Spack can be useful, as these package managers allow dynamic + changes during source-based builds. + Refer to \ref InstallingASourcePackage for details. +
    • +
    • + Alternatively, you can directly build from the source using the native + build system of the library (Makefile, GNU autotools). Instructions + can be found in \ref InstallingFromSource. +
    • +
    +
  4. +
  5. + Experiment Reproducibility: If your focus is on experiment + reproducibility, we recommend using Guix. Refer to \ref + InstallingASourcePackage for guidance. +
  6. +
+ +Whichever solution you choose, you can utilize the tool +bin/starpu_config to view all the configuration parameters used +during StarPU installation. + +Please refer to the provided documentation for specific installation +steps and details for each solution. + +\section InstallingABinaryPackage Installing a Binary Package + +One of the StarPU developers being a Debian Developer, the packages +are well integrated and very up-to-date. To see which packages are +available, simply type: + +\verbatim +$ apt-cache search starpu +\endverbatim + +To install what you need, type for example: + +\verbatim +$ sudo apt-get install libstarpu-dev +\endverbatim + +\section InstallingASourcePackage Installing a Source Package + +StarPU is available from different package managers. +
    +
  • +Guix https://gitlab.inria.fr/guix-hpc/guix-hpc +
  • +
  • +Spack https://github.com/spack/spack/ +
  • +
  • +Brew https://gitlab.inria.fr/solverstack/brew-repo +
  • +
+ +Documentation on how to install StarPU with these package managers is +directly available from the links specified above. We give below a +brief overview of the spack installation. + +\subsection SpackInstallation Installing the Spack Package + +Here is a quick guide to install StarPU with spack. + +\verbatim +$ git clone git@github.com:spack/spack.git +$ source ./spack/share/spack/setup-env.sh # if you use bash or zsh +$ spack install starpu +\endverbatim + +By default, the latest release will be installed, one can choose to +install a specific release or even the master version. + +\verbatim +$ spack install starpu@master +$ spack install starpu@1.3.5 +\endverbatim + +We strongly advise reading the detailed reference manual at +https://spack.readthedocs.io/en/latest/getting_started.html + +\subsection UsingModule Using a Module + +On some clusters, StarPU is provided as a module, for example on the +Jean Zay cluster. The information is available at +http://www.idris.fr/jean-zay/cpu/jean-zay-cpu-starpu.html + +\section InstallingFromSource Building from Source + +StarPU can be built and installed by the standard means of the GNU +autotools. The following chapter is intended to briefly remind how these tools +can be used to install StarPU. + +\subsection OptionalDependencies Optional Dependencies + +The hwloc (http://www.open-mpi.org/software/hwloc) topology +discovery library is not mandatory to use StarPU, but strongly +recommended. It allows for topology aware scheduling, which improves +performance. hwloc is available in major free operating system +distributions, and for most operating systems. Make sure to not only install +a hwloc or libhwloc package, but also hwloc-devel or +libhwloc-dev to have \c hwloc headers etc. + +If libhwloc is installed in a standard +location, no option is required, it will be detected automatically, +otherwise \ref with-hwloc "--with-hwloc=" should be used to specify its +location. + +If libhwloc is not available on your system, the option +\ref without-hwloc "--without-hwloc" should be explicitly given when calling the +script configure. + + +\subsection GettingSources Getting Sources + +StarPU's sources can be obtained from the download page of +the StarPU website (https://starpu.gitlabpages.inria.fr/files/). + +All releases and the development tree of StarPU are freely available +on StarPU SCM server under the LGPL license. Some releases are available +under the BSD license. + +The latest release can be downloaded from the StarPU download page (https://starpu.gitlabpages.inria.fr/files/). + +The latest nightly snapshot can be downloaded from the StarPU website (https://starpu.gitlabpages.inria.fr/files/testing/). + +And finally, the current development version is also accessible via git. +It should only be used if you need the very latest changes (i.e. less +than a day old!). + +\verbatim +$ git clone git@gitlab.inria.fr:starpu/starpu.git +\endverbatim + +\subsection ConfiguringStarPU Configuring StarPU + +Running autogen.sh is not necessary when using the tarball +releases of StarPU. However, when using the source code from the git +repository, you first need to generate the script configure and the +different Makefiles. This requires the availability of autoconf and +automake >= 2.60. + +\verbatim +$ ./autogen.sh +\endverbatim + +You then need to configure StarPU. Details about options that are +useful to give to configure are given in \ref CompilationConfiguration. + +\verbatim +$ ./configure +\endverbatim + +If configure does not detect some software or produces errors, please +make sure to post the contents of the file config.log when +reporting the issue. + +By default, the files produced during the compilation are placed in +the source directory. As the compilation generates a lot of files, it +is advised to put them all in a separate directory. It is then +easier to clean up, and this allows to compile several configurations +out of the same source tree. To do so, simply enter the directory +where you want the compilation to produce its files, and invoke the +script configure located in the StarPU source directory. + +\verbatim +$ mkdir build +$ cd build +$ ../configure +\endverbatim + +By default, StarPU will be installed in /usr/local/bin, +/usr/local/lib, etc. You can specify an installation prefix +other than /usr/local using the option --prefix, for +instance: + +\verbatim +$ ../configure --prefix=$HOME/starpu +\endverbatim + +\subsection BuildingStarPU Building StarPU + +\verbatim +$ make +\endverbatim + +Once everything is built, you may want to test the result. An +extensive set of regression tests is provided with StarPU. Running the +tests is done by calling make check (by setting the variable \c STARPU_MICROBENCHS_DISABLED to disable benchmarks) + +These tests are run every night and the result from the main profile +is publicly available +(https://starpu.gitlabpages.inria.fr/files/testing/master/). + +\verbatim +$ STARPU_MICROBENCHS_DISABLED=1 make check +\endverbatim + +\subsection InstallingStarPU Installing StarPU + +In order to install StarPU at the location which was specified during +configuration: + +\verbatim +$ make install +\endverbatim + +If you have let StarPU install in /usr/local/, you additionally need to run + +\verbatim +$ sudo ldconfig +\endverbatim + +so the libraries can be found by the system. + +Libtool interface versioning information are included in +libraries names (libstarpu-1.4.so, libstarpumpi-1.4.so and +libstarpufft-1.4.so). + +*/ diff --git a/doc/doxygen/chapters/starpu_installation/configuration_and_initialization.doxy b/doc/doxygen/chapters/starpu_installation/configuration_and_initialization.doxy new file mode 100644 index 0000000..277b6dc --- /dev/null +++ b/doc/doxygen/chapters/starpu_installation/configuration_and_initialization.doxy @@ -0,0 +1,51 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/*! \page ConfigurationAndInitialization Configuration and Initialization + +This section explains the relationship between configure options, +compilation options and environment variables used by StarPU. + +
    +
  1. +Configure options are used during the installation +process to enable or disable specific features and libraries. +These options are set using flags like \ref enable-maxcpus +"--enable-maxcpus", which can be used to set the maximum number of +CPUs that can be used by StarPU. +
  2. + +
  3. +Compilation options are used to set specific parameters during the +compilation process, such as the optimization level, architecture +type, and debugging options. +
  4. + +
  5. +Environment variables are used to set runtime parameters and +control the behavior of the StarPU library. +For example, the \ref STARPU_NCPUS environment variable can be used to +specify the number of CPUs to use at runtime, overriding the value set +during compilation or installation. +
  6. +
+ +Options can also be set with the different fields of the +::starpu_conf parameter given to starpu_init(), such as +starpu_conf::ncpus, which is used to specify the number of CPUs that +StarPU should use for computations. + +*/ diff --git a/doc/doxygen/chapters/starpu_installation/configure_options.doxy b/doc/doxygen/chapters/starpu_installation/configure_options.doxy new file mode 100644 index 0000000..2f47c72 --- /dev/null +++ b/doc/doxygen/chapters/starpu_installation/configure_options.doxy @@ -0,0 +1,865 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2020-2020 Federal University of Rio Grande do Sul (UFRGS) + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/*! \page CompilationConfiguration Compilation Configuration + +The behavior of the StarPU library and tools may be tuned thanks to +the following configure options. + +\section CommonConfiguration Common Configuration + +
+ +
--enable-debug
+
+\anchor enable-debug +\addindex __configure__--enable-debug +Enable debugging messages. +
+ +
--enable-spinlock-check
+
+\anchor enable-spinlock-check +\addindex __configure__--enable-spinlock-check +Enable checking that spinlocks are taken and released properly. +
+ +
--enable-fast
+
+\anchor enable-fast +\addindex __configure__--enable-fast +Disable assertion checks, which saves computation time. +
+ +
--enable-verbose
+
+\anchor enable-verbose +\addindex __configure__--enable-verbose +Increase the verbosity of the debugging messages. This can be disabled +at runtime by setting the environment variable \ref STARPU_SILENT to +any value. --enable-verbose=extra increase even more the verbosity. + +\verbatim +$ STARPU_SILENT=1 ./vector_scal +\endverbatim +
+ +
--enable-coverage
+
+\anchor enable-coverage +\addindex __configure__--enable-coverage +Enable flags for the coverage tool gcov. +
+ +
--enable-quick-check
+
+\anchor enable-quick-check +\addindex __configure__--enable-quick-check +Specify tests and examples should be run on a smaller data set, i.e +allowing a faster execution time +
+ +
--enable-long-check
+
+\anchor enable-long-check +\addindex __configure__--enable-long-check +Enable some exhaustive checks which take a really long time. +
+ +
--enable-new-check
+
+\anchor enable-new-check +\addindex __configure__--enable-new-check +Enable new testcases which are known to fail. +
+ +
--with-hwloc
+
+\anchor with-hwloc +\addindex __configure__--with-hwloc +Specify hwloc should be used by StarPU. hwloc should be found by the +means of the tool pkg-config. +
+ +
--with-hwloc=prefix
+
+\anchor with-hwloc-prefix +\addindex __configure__--with-hwloc-prefix +Specify hwloc should be used by StarPU. hwloc should be found in the +directory specified by prefix +
+ +
--without-hwloc
+
+\anchor without-hwloc +\addindex __configure__--without-hwloc +Specify hwloc should not be used by StarPU. +
+ +
--disable-build-doc
+
+\anchor disable-build-doc +\addindex __configure__--disable-build-doc +Disable the creation of the documentation. This should be done on a +machine which does not have the tools doxygen and latex +(plus the packages latex-xcolor and +texlive-latex-extra). +
+ +
--enable-build-doc-pdf
+
+\anchor enable-build-doc-pdf +\addindex __configure__--enable-build-doc-pdf +By default, only the HTML documentation is generated. Use this option +to also enable the generation of the PDF documentation. This should be +done on a machine which does have the tools doxygen and latex +(plus the packages latex-xcolor and +texlive-latex-extra). +
+ +
--enable-icc
+
+\anchor enable-icc +\addindex __configure__--enable-icc +Enable the compilation of specific ICC examples. +StarPU itself will not be compiled with ICC unless specified with CC=icc +
+ +
--disable-icc
+
+\anchor disable-icc +\addindex __configure__--disable-icc +Disable the usage of the ICC compiler. Otherwise, when a ICC compiler +is found, some specific ICC examples are compiled as explained above. +
+ +
--with-check-flags
+
+\anchor with-check-flags +\addindex __configure__--with-check-flags +Specify flags which will be given to C, CXX and Fortran compilers when valid +
+ +
+ +Additionally, the script configure recognize many variables, which +can be listed by typing ./configure --help. For example, +./configure NVCCFLAGS="-arch sm_20" adds a flag for the compilation of +CUDA kernels, and NVCC_CC=gcc-5 allows to change the C++ compiler +used by nvcc. + + +\section ConfiguringWorkers Configuring Workers + +
+ +
--enable-data-locality-enforce
+
+\anchor enable-data-locality-enforce +\addindex __configure__--enable-data-locality-enforce +Enable data locality enforcement when picking up a worker to execute a task. This mechanism is by default disabled. +
+ +
--enable-blocking-drivers
+
+\anchor enable-blocking-drivers +\addindex __configure__--enable-blocking-drivers +By default, StarPU keeps CPU workers awake permanently, for better +reactivity. This option makes StarPU put CPU workers to real sleep when there +are not enough tasks to compute. +
+ +
--enable-worker-callbacks
+
+\anchor enable-worker-callbacks +\addindex __configure__--enable-worker-callbacks +If blocking drivers are enabled, enable callbacks to notify an external resource manager +about workers going to sleep and waking up. +
+ +
--enable-maxcpus=count
+
+\anchor enable-maxcpus +\addindex __configure__--enable-maxcpus +Use at most count CPU cores. This information is then +available as the macro ::STARPU_MAXCPUS. + +The default value is auto. it allows StarPU to automatically +detect the number of CPUs on the build machine. This should not be +used if the running host has a larger number of CPUs than the +build machine. +
+ +
--enable-maxnumanodes=count
+
+\anchor enable-maxnumanodes +\addindex __configure__--enable-maxnumanodes +Use at most count NUMA nodes. This information is then +available as the macro ::STARPU_MAXNUMANODES. + +The default value is auto. it allows StarPU to automatically +detect the number of NUMA nodes on the build machine. This should not be +used if the running host has a larger number of NUMA nodes than the +build machine. +
+ +
--disable-cpu
+
+\anchor disable-cpu +\addindex __configure__--disable-cpu +Disable the use of CPUs of the machine. Only GPUs etc. will be used. +
+ +
--enable-maxcudadev=count
+
+\anchor enable-maxcudadev +\addindex __configure__--enable-maxcudadev +Use at most count CUDA devices. This information is then +available as the macro ::STARPU_MAXCUDADEVS. +
+ +
--disable-cuda
+
+\anchor disable-cuda +\addindex __configure__--disable-cuda +Disable the use of CUDA, even if a valid CUDA installation was detected. +
+ +
--with-cuda-dir=prefix
+
+\anchor with-cuda-dir +\addindex __configure__--with-cuda-dir +Search for CUDA under prefix, which should notably contain the file +include/cuda.h. +
+ +
--with-cuda-include-dir=dir
+
+\anchor with-cuda-include-dir +\addindex __configure__--with-cuda-include-dir +Search for CUDA headers under dir, which should +notably contain the file cuda.h. This defaults to +/include appended to the value given to +\ref with-cuda-dir "--with-cuda-dir". +
+ +
--with-cuda-lib-dir=dir
+
+\anchor with-cuda-lib-dir +\addindex __configure__--with-cuda-lib-dir +Search for CUDA libraries under dir, which should notably contain +the CUDA shared libraries---e.g., libcuda.so. This defaults to +/lib appended to the value given to +\ref with-cuda-dir "--with-cuda-dir". +
+ +
--disable-cuda-memcpy-peer
+
+\anchor disable-cuda-memcpy-peer +\addindex __configure__--disable-cuda-memcpy-peer +Explicitly disable peer transfers when using CUDA 4.0. +
+ +
--enable-maxopencldev=count
+
+\anchor enable-maxopencldev +\addindex __configure__--enable-maxopencldev +Use at most count OpenCL devices. This information is then +available as the macro ::STARPU_MAXOPENCLDEVS. +
+ +
--disable-opencl
+
+\anchor disable-opencl +\addindex __configure__--disable-opencl +Disable the use of OpenCL, even if the SDK is detected. +
+ +
--with-opencl-dir=prefix
+
+\anchor with-opencl-dir +\addindex __configure__--with-opencl-dir +Search for an OpenCL implementation under prefix, which should +notably contain include/CL/cl.h (or include/OpenCL/cl.h +on Mac OS). +
+ +
--with-opencl-include-dir=dir
+
+\anchor with-opencl-include-dir +\addindex __configure__--with-opencl-include-dir +Search for OpenCL headers under dir, which should notably contain +CL/cl.h (or OpenCL/cl.h on Mac OS). This defaults to +/include appended to the value given to +\ref with-opencl-dir "--with-opencl-dir". +
+ +
--with-opencl-lib-dir=dir
+
+\anchor with-opencl-lib-dir +\addindex __configure__--with-opencl-lib-dir +Search for an OpenCL library under dir, which should notably +contain the OpenCL shared libraries---e.g. libOpenCL.so. This defaults to +/lib appended to the value given to +\ref with-opencl-dir "--with-opencl-dir". +
+ +
--enable-opencl-simulator
+
+\anchor enable-opencl-simulator +\addindex __configure__--enable-opencl-simulator +Enable considering the provided OpenCL implementation as a simulator, i.e. use +the kernel duration returned by OpenCL profiling information as wallclock time +instead of the actual measured real time. This requires the SimGrid support. +
+ +
--enable-maximplementations=count
+
+\anchor enable-maximplementations +\addindex __configure__--enable-maximplementations +Allow for at most count codelet implementations for the same +target device. This information is then available as the +macro ::STARPU_MAXIMPLEMENTATIONS macro. +
+ +
--enable-max-sched-ctxs=count
+
+\anchor enable-max-sched-ctxs +\addindex __configure__--enable-max-sched-ctxs +Allow for at most count scheduling contexts +This information is then available as the macro +::STARPU_NMAX_SCHED_CTXS. +
+ +
--disable-asynchronous-copy
+
+\anchor disable-asynchronous-copy +\addindex __configure__--disable-asynchronous-copy +Disable asynchronous copies between CPU and GPU devices. +The AMD implementation of OpenCL is known to +fail when copying data asynchronously. When using this implementation, +it is therefore necessary to disable asynchronous data transfers. +
+ +
--disable-asynchronous-cuda-copy
+
+\anchor disable-asynchronous-cuda-copy +\addindex __configure__--disable-asynchronous-cuda-copy +Disable asynchronous copies between CPU and CUDA devices. +
+ +
--disable-asynchronous-opencl-copy
+
+\anchor disable-asynchronous-opencl-copy +\addindex __configure__--disable-asynchronous-opencl-copy +Disable asynchronous copies between CPU and OpenCL devices. +The AMD implementation of OpenCL is known to +fail when copying data asynchronously. When using this implementation, +it is therefore necessary to disable asynchronous data transfers. +
+ +
--disable-asynchronous-hip-copy
+
+\anchor disable-asynchronous-hip-copy +\addindex __configure__--disable-asynchronous-hip-copy +Disable asynchronous copies between CPU and HIP devices. +
+ +
--disable-asynchronous-mpi-master-slave-copy
+
+\anchor disable-asynchronous-mpi-master-slave-copy +\addindex __configure__--disable-asynchronous-mpi-master-slave-copy +Disable asynchronous copies between CPU and MPI Slave devices. +
+ +
--disable-asynchronous-tcpip-master-slave-copy
+
+\anchor disable-asynchronous-tcpip-master-slave-copy +\addindex __configure__--disable-asynchronous-mpi-master-slave-copy +Disable asynchronous copies between CPU and MPI Slave devices. +
+ +
--disable-asynchronous-fpga-copy
+
+\anchor disable-asynchronous-fpga-copy +\addindex __configure__--disable-asynchronous-fpga-copy +Disable asynchronous copies between CPU and Maxeler FPGA devices. +
+ +
--enable-maxnodes=count
+
+\anchor enable-maxnodes +\addindex __configure__--enable-maxnodes +Use at most count memory nodes. This information is then available as +the macro ::STARPU_MAXNODES. Reducing it allows to considerably reduce memory +used by StarPU data structures. +
+ +
--with-max-fpga=dir
+
+\anchor with-max-fpga +\addindex __configure__--with-max-fpga +Enable the Maxeler FPGA driver support, and optionally specify the location of +the Maxeler FPGA library. +
+ +
--disable-asynchronous-max-fpga-copy
+
+\anchor disable-asynchronous-max-fpga-copy +\addindex __configure__--disable-asynchronous-max-fpga-copy +Disable asynchronous copies between CPU and Maxeler FPGA devices. +
+ +
+ +\section ExtensionConfiguration Extension Configuration + +
+ +
--enable-starpupy
+
+\anchor enable-starpupy +\addindex __configure__--enable-starpupy +Enable the StarPU Python Interface (\ref PythonInterface) +
+ +
--enable-python-multi-interpreter
+
+\anchor enable-python-multi-interpreter +\addindex __configure__--enable-python-multi-interpreter +Enable the use of multiple interpreters in the StarPU Python Interface (\ref MultipleInterpreters) +
--disable-mpi
+
+\anchor disable-mpi +\addindex __configure__--disable-mpi +Disable the build of libstarpumpi. By default, it is enabled when MPI is found. +
+ +
--enable-mpi
+
+\anchor enable-mpi +\addindex __configure__--enable-mpi +Enable the build of libstarpumpi. This is necessary when using Simgrid+MPI. +
+ +
--with-mpicc=path
+
+\anchor with-mpicc +\addindex __configure__--with-mpicc +Use the compiler mpicc at path, for StarPU-MPI. +(\ref MPISupport). +
+ +
--enable-mpi-pedantic-isend
+
+\anchor enable-mpi-pedantic-isend +\addindex __configure__--enable-mpi-pedantic-isend +Before performing any MPI communication, StarPU-MPI waits for the data +to be available in the main memory of the node submitting the request. +For send communications, data is acquired with the mode ::STARPU_R. +When enabling the pedantic mode, data are instead acquired with the +::STARPU_RW which thus ensures that there is not more than 1 +concurrent \c MPI_Isend calls accessing the data +and StarPU does not read from it from tasks during the communication. +
+ +
--enable-mpi-master-slave
+
+\anchor enable-mpi-master-slave +\addindex __configure__--enable-mpi-master-slave +Enable the MPI Master-Slave support. By default, it is disabled. +
+ +
--enable-mpi-verbose
+
+\anchor enable-mpi-verbose +\addindex __configure__--enable-mpi-verbose +Increase the verbosity of the MPI debugging messages. This can be disabled +at runtime by setting the environment variable \ref STARPU_SILENT to +any value. --enable-mpi-verbose=extra increase even more the verbosity. + +\verbatim +$ STARPU_SILENT=1 mpirun -np 2 ./insert_task +\endverbatim +
+ +
--enable-mpi-ft
+
+\anchor enable-mpi-ft +\addindex __configure__--enable-mpi-ft +Enable the MPI checkpoint mechanism. See \ref API_MPI_FT_Support +
+ +
--enable-mpi-ft-stats
+
+\anchor enable-mpi-ft-stats +\addindex __configure__--enable-mpi-ft-stats +Enable the statistics for the MPI checkpoint mechanism. See \ref API_MPI_FT_Support +
+ +
--enable-tcpip-master-slave
+
+\anchor enable-tcpip-master-slave +\addindex __configure__--enable-mpi-master-slave +Enable the TCP/IP Master-Slave support (\ref TCPIPSupport). By default, it is disabled. +
+ +
--enable-nmad
+
+\anchor enable-nmad +\addindex __configure__--enable-nmad +Enable the NewMadeleine implementation for StarPU-MPI. See \ref Nmad for more details. +
+ +
--disable-fortran
+
+\anchor disable-fortran +\addindex __configure__--disable-fortran +Disable the fortran extension. By default, it is enabled when a +fortran compiler is found. +
+ +
--disable-socl
+
+\anchor disable-socl +\addindex __configure__--disable-socl +Disable the SOCL extension (\ref SOCLOpenclExtensions). By +default, it is enabled when an OpenCL implementation is found. +
+ +
--enable-openmp
+
+\anchor enable-openmp +\addindex __configure__--enable-openmp +Enable OpenMP Support (\ref OpenMPRuntimeSupport) +
+ +
--enable-openmp-llvm
+
+\anchor enable-openmp-llvm +\addindex __configure__--enable-openmp-llvm +Enable LLVM OpenMP Support (\ref OMPLLVM) +
+ +
--enable-bubble
+
+\anchor enable-bubble +\addindex __configure__--enable-bubble +Enable Hierarchical dags support (\ref HierarchicalDAGS) + +
--enable-parallel-worker
+
+\anchor enable-parallel-worker +\addindex __configure__--enable-parallel-worker +Enable parallel worker support (\ref ParallelWorker) +
+ +
--enable-eclipse-plugin
+
+\anchor enable-eclipse-plugin +\addindex __configure__--enable-eclipse-plugin +Enable the StarPU Eclipse Plugin. See \ref EclipsePlugin to know how to install Eclipse. +
+ +
+ +\section AdvancedConfiguration Advanced Configuration + +
+ +
--enable-perf-debug
+
+\anchor enable-perf-debug +\addindex __configure__--enable-perf-debug +Enable performance debugging through gprof. +
+ +
--enable-model-debug
+
+\anchor enable-model-debug +\addindex __configure__--enable-model-debug +Enable performance model debugging. +
+ +
--enable-fxt-lock
+
+\anchor enable-fxt-lock +\addindex __configure__--enable-fxt-lock +Enable additional trace events which describes locks behaviour. This is however +extremely heavy and should only be enabled when debugging insides of StarPU. +
+ +
--enable-maxbuffers
+
+\anchor enable-maxbuffers +\addindex __configure__--enable-maxbuffers +Define the maximum number of buffers that tasks will be able to take +as parameters, then available as the macro ::STARPU_NMAXBUFS. +
+ +
--enable-fxt-max-files=count
+
+\anchor enable-fxt-max-files +\addindex __configure__--enable-fxt-max-files +Use at most count mpi nodes fxt files for generating traces. This information is then available as +the macro ::STARPU_FXT_MAX_FILES. This information is used by FxT tools when considering multi node traces. +Default value is 64. +
+ +
--enable-allocation-cache
+
+\anchor enable-allocation-cache +\addindex __configure__--enable-allocation-cache +Enable the use of a data allocation cache to avoid the cost of it with +CUDA. Still experimental. +
+ +
--enable-opengl-render
+
+\anchor enable-opengl-render +\addindex __configure__--enable-opengl-render +Enable the use of OpenGL for the rendering of some examples. +// TODO: rather default to enabled when detected +
+ +
--enable-blas-lib=prefix
+
+\anchor enable-blas-lib +\addindex __configure__--enable-blas-lib +Specify the blas library to be used by some of the examples. Libraries available : +- \c none [default] : no BLAS library is used +- \c atlas: use ATLAS library +- \c goto: use GotoBLAS library +- \c openblas: use OpenBLAS library +- \c mkl: use MKL library (you may need to set specific \c CFLAGS and \c LDFLAGS with --with-mkl-cflags and --with-mkl-ldflags) +
+ +
--enable-leveldb
+
+\anchor enable-leveldb +\addindex __configure__--enable-leveldb +Enable linking with LevelDB if available +
+ +
--enable-hdf5
+
+\anchor enable-hdf5 +\addindex __configure__--enable-hdf5 +Enable building HDF5 support. +
+ +
--with-hdf5-include-dir=path
+
+\anchor with-hdf5-include-dir +\addindex __configure__--with-hdf5-include-dir +Specify the directory where is stored the header file \c hdf5.h. +
+ +
--with-hdf5-lib-dir=path
+
+\anchor with-hdf5-lib-dir +\addindex __configure__--with-hdf5-lib-dir +Specify the directory where is stored the library \c hdf5. +
+ +
--disable-starpufft
+
+\anchor disable-starpufft +\addindex __configure__--disable-starpufft +Disable the build of libstarpufft, even if fftw or cuFFT is available. +
+ +
--enable-starpufft-examples
+
+\anchor enable-starpufft-examples +\addindex __configure__--enable-starpufft-examples +Enable the compilation and the execution of the libstarpufft examples. +By default, they are neither compiled nor checked. +
+ +
--with-fxt=prefix
+
+\anchor with-fxt +\addindex __configure__--with-fxt +Search for FxT under prefix. +FxT (http://savannah.nongnu.org/projects/fkt) is used to generate +traces of scheduling events, which can then be rendered them using ViTE +(\ref Off-linePerformanceFeedback). prefix should +notably contain include/fxt/fxt.h. +
+ +
--with-perf-model-dir=dir
+
+\anchor with-perf-model-dir +\addindex __configure__--with-perf-model-dir +Store performance models under dir, instead of the current user's +home. +
+ +
--with-goto-dir=prefix
+
+\anchor with-goto-dir +\addindex __configure__--with-goto-dir +Search for GotoBLAS under prefix, which should notably contain +libgoto.so or libgoto2.so. +
+ +
--with-atlas-dir=prefix
+
+\anchor with-atlas-dir +\addindex __configure__--with-atlas-dir +Search for ATLAS under prefix, which should notably contain +include/cblas.h. +
+ +
--with-mkl-cflags=cflags
+
+\anchor with-mkl-cflags +\addindex __configure__--with-mkl-cflags +Use cflags to compile code that uses the MKL library. +
+ +
--with-mkl-ldflags=ldflags
+
+\anchor with-mkl-ldflags +\addindex __configure__--with-mkl-ldflags +Use ldflags when linking code that uses the MKL library. Note +that the MKL website +(http://software.intel.com/en-us/articles/intel-mkl-link-line-advisor/) +provides a script to determine the linking flags. +
+ +
--disable-glpk
+
+\anchor disable-glpk +\addindex __configure__--disable-glpk +Disable the use of \c libglpk for computing area bounds. +
+ +
--disable-build-tests
+
+\anchor disable-build-tests +\addindex __configure__--disable-build-tests +Disable the build of tests. +
+ +
--disable-build-examples
+
+\anchor disable-build-examples +\addindex __configure__--disable-build-examples +Disable the build of examples. +
+ +
--enable-sc-hypervisor
+
+\anchor enable-sc-hypervisor +\addindex __configure__--enable-sc-hypervisor +Enable the Scheduling Context Hypervisor plugin (\ref SchedulingContextHypervisor). +By default, it is disabled. +
+ +
--enable-memory-stats
+
+\anchor enable-memory-stats +\addindex __configure__--enable-memory-stats +Enable memory statistics (\ref MemoryFeedback). +
+ +
--enable-simgrid
+
+\anchor enable-simgrid +\addindex __configure__--enable-simgrid +Enable simulation of execution in SimGrid, to allow easy experimentation with +various numbers of cores and GPUs, or amount of memory, etc. Experimental. + +The path to SimGrid can be specified through the SIMGRID_CFLAGS and +SIMGRID_LIBS environment variables, for instance: + +\verbatim +export SIMGRID_CFLAGS="-I/usr/local/simgrid/include" +export SIMGRID_LIBS="-L/usr/local/simgrid/lib -lsimgrid" +\endverbatim + +
+ +
--with-simgrid-dir
+
+\anchor with-simgrid-dir +\addindex __configure__--with-simgrid-dir +Similar to the option \ref enable-simgrid "--enable-simgrid" but also +allows to specify the location to the SimGrid library. +
+ +
--with-simgrid-include-dir
+
+\anchor with-simgrid-include-dir +\addindex __configure__--with-simgrid-include-dir +Similar to the option \ref enable-simgrid "--enable-simgrid" but also +allows to specify the location to the SimGrid include directory. +
+ +
--with-simgrid-lib-dir
+
+\anchor with-simgrid-lib-dir +\addindex __configure__--with-simgrid-lib-dir +Similar to the option \ref enable-simgrid "--enable-simgrid" but also +allows to specify the location to the SimGrid lib directory. +
+ +
--with-smpirun=path
+
+\anchor with-smpirun +\addindex __configure__--with-smpirun +Use the smpirun at path +
+ +
--enable-simgrid-mc
+
+\anchor enable-simgrid-mc +\addindex __configure__--enable-simgrid-mc +Enable the Model Checker in simulation of execution in SimGrid, to allow +exploring various execution paths. +
+ +
--enable-calibration-heuristic
+
+\anchor enable-calibration-heuristic +\addindex __configure__--enable-calibration-heuristic +Allow to set the maximum authorized percentage of deviation +for the history-based calibrator of StarPU. A correct value +of this parameter must be in [0..100]. The default value of +this parameter is 10. Experimental. +
+ +
--enable-mlr
+
+\anchor enable-mlr +\addindex __configure__--enable-mlr +Allow to enable multiple linear regression models (see \ref PerformanceModelExample) +
+ +
--enable-mlr-system-blas
+
+\anchor enable-mlr-system-blas +\addindex __configure__--enable-mlr-system-blas +Allow to make multiple linear regression models use the system-provided BLAS for dgels +(see \ref PerformanceModelExample) +
+ +
+ +*/ diff --git a/doc/doxygen/chapters/starpu_installation/environment_variables.doxy b/doc/doxygen/chapters/starpu_installation/environment_variables.doxy new file mode 100644 index 0000000..556620d --- /dev/null +++ b/doc/doxygen/chapters/starpu_installation/environment_variables.doxy @@ -0,0 +1,2072 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2016-2016 Uppsala University + * Copyright (C) 2020,2021 Federal University of Rio Grande do Sul (UFRGS) + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/*! \page ExecutionConfigurationThroughEnvironmentVariables Execution Configuration Through Environment Variables + +The StarPU library and tools's behavior can be tuned using the following environment variables. To access these variables, you can use the provided functions. +
    +
  • +starpu_getenv() retrieves the value of an environment variable. +
  • +
  • +starpu_get_env_string_var_default() retrieves the value of an +environment variable as a string. If the variable is not set, you can +provide a default value. +
  • +
  • +starpu_get_env_size_default() retrieves the value of an environment +variable as a size in bytes, or a default value if the environment +variable is not set. +
  • +
+ +These functions allow to fine-tune the behavior of StarPU +according to your preferences and requirements by leveraging +environment variables. + +\section EnvConfiguringWorkers Configuring Workers + +\subsection Basic General Configuration +
+
STARPU_WORKERS_NOBIND
+
+\anchor STARPU_WORKERS_NOBIND +\addindex __env__STARPU_WORKERS_NOBIND +Setting it to non-zero will prevent StarPU from binding its threads to +CPUs. This is for instance useful when running the test suite in parallel. +
+ +
STARPU_WORKERS_GETBIND
+
+\anchor STARPU_WORKERS_GETBIND +\addindex __env__STARPU_WORKERS_GETBIND +By default StarPU uses the OS-provided CPU binding to determine +how many and which CPU cores it should use. This is notably useful when running +several StarPU-MPI processes on the same host, to let the MPI launcher set the +CPUs to be used. Default value is 1. + +If that binding is erroneous (e.g. because the job scheduler binds to just one +core of the allocated cores), you can set \ref STARPU_WORKERS_GETBIND to 0 to make +StarPU use all cores of the machine. +
+ +
STARPU_WORKERS_CPUID
+
+\anchor STARPU_WORKERS_CPUID +\addindex __env__STARPU_WORKERS_CPUID +Passing an array of integers in \ref STARPU_WORKERS_CPUID +specifies on which logical CPU the different workers should be +bound. For instance, if STARPU_WORKERS_CPUID="0 1 4 5", the first +worker will be bound to logical CPU #0, the second CPU worker will be bound to +logical CPU #1 and so on. Note that the logical ordering of the CPUs is either +determined by the OS, or provided by the library hwloc in case it is +available. +Ranges can be provided: for instance, STARPU_WORKERS_CPUID="1-3 +5" will bind the first three workers on logical CPUs #1, #2, and #3, and the +fourth worker on logical CPU #5. Unbound ranges can also be provided: +STARPU_WORKERS_CPUID="1-" will bind the workers starting from logical +CPU #1 up to last CPU. + +Note that the first workers correspond to the CUDA workers, then come the +OpenCL workers, and finally the CPU workers. For example, if +we have STARPU_NCUDA=1, STARPU_NOPENCL=1, STARPU_NCPU=2 +and STARPU_WORKERS_CPUID="0 2 1 3", the CUDA device will be controlled +by logical CPU #0, the OpenCL device will be controlled by logical CPU #2, and +the logical CPUs #1 and #3 will be used by the CPU workers. + +If the number of workers is larger than the array given in +\ref STARPU_WORKERS_CPUID, the workers are bound to the logical CPUs in a +round-robin fashion: if STARPU_WORKERS_CPUID="0 1", the first +and the third (resp. second and fourth) workers will be put on CPU #0 +(resp. CPU #1). + +This variable is ignored if the field +starpu_conf::use_explicit_workers_bindid passed to starpu_init() is +set. + +Setting \ref STARPU_WORKERS_CPUID or \ref STARPU_WORKERS_COREID overrides the binding +provided by the job scheduler, as described for \ref STARPU_WORKERS_GETBIND. +
+ +
STARPU_WORKERS_COREID
+
+\anchor STARPU_WORKERS_COREID +\addindex __env__STARPU_WORKERS_COREID +Same as \ref STARPU_WORKERS_CPUID, but bind the workers to cores instead of PUs +(hyperthreads). +
+ +
STARPU_NTHREADS_PER_CORE
+
+\anchor STARPU_NTHREADS_PER_CORE +\addindex __env__STARPU_NTHREADS_PER_CORE +Specify how many threads StarPU should run on each core. The +default is 1 because kernels are usually already optimized for using a full +core. Setting this to e.g. 2 instead allows exploiting hyperthreading. +
+ +
STARPU_MAIN_THREAD_BIND
+
+\anchor STARPU_MAIN_THREAD_BIND +\addindex __env__STARPU_MAIN_THREAD_BIND +Tell StarPU to bind the thread that calls starpu_initialize() to +a reserved CPU, subtracted from the CPU workers. +
+ +
STARPU_MAIN_THREAD_CPUID
+
+\anchor STARPU_MAIN_THREAD_CPUID +\addindex __env__STARPU_MAIN_THREAD_CPUID +Tell StarPU to bind the thread that calls starpu_initialize() to +the given CPU ID (using logical numbering). +
+ +
STARPU_MAIN_THREAD_COREID
+
+\anchor STARPU_MAIN_THREAD_COREID +\addindex __env__STARPU_MAIN_THREAD_COREID +Same as \ref STARPU_MAIN_THREAD_CPUID, but bind the thread that calls +starpu_initialize() to the given core (using logical numbering), instead of the PU (hyperthread). +
+ +
STARPU_WORKER_TREE
+
+\anchor STARPU_WORKER_TREE +\addindex __env__STARPU_WORKER_TREE +Define to 1 to enable the tree iterator in schedulers. +
+ +
STARPU_SINGLE_COMBINED_WORKER
+
+\anchor STARPU_SINGLE_COMBINED_WORKER +\addindex __env__STARPU_SINGLE_COMBINED_WORKER +Tell StarPU to create several workers which won't be able to work +concurrently. It will by default create combined workers, which size goes from 1 +to the total number of CPU workers in the system. \ref STARPU_MIN_WORKERSIZE +and \ref STARPU_MAX_WORKERSIZE can be used to change this default. +
+ +
STARPU_MIN_WORKERSIZE
+
+\anchor STARPU_MIN_WORKERSIZE +\addindex __env__STARPU_MIN_WORKERSIZE +Specify the minimum size of the combined workers. Default value is 2. +
+ +
STARPU_MAX_WORKERSIZE
+
+\anchor STARPU_MAX_WORKERSIZE +\addindex __env__STARPU_MAX_WORKERSIZE +Specify the minimum size of the combined workers. Default value is the +number of CPU workers in the system. +
+ +
STARPU_SYNTHESIZE_ARITY_COMBINED_WORKER
+
+\anchor STARPU_SYNTHESIZE_ARITY_COMBINED_WORKER +\addindex __env__STARPU_SYNTHESIZE_ARITY_COMBINED_WORKER +Specify how many elements are allowed between combined workers +created from \c hwloc information. For instance, in the case of sockets with 6 +cores without shared L2 caches, if \ref STARPU_SYNTHESIZE_ARITY_COMBINED_WORKER is +set to 6, no combined worker will be synthesized beyond one for the socket +and one per core. If it is set to 3, 3 intermediate combined workers will be +synthesized, to divide the socket cores into 3 chunks of 2 cores. If it set to +2, 2 intermediate combined workers will be synthesized, to divide the socket +cores into 2 chunks of 3 cores, and then 3 additional combined workers will be +synthesized, to divide the former synthesized workers into a bunch of 2 cores, +and the remaining core (for which no combined worker is synthesized since there +is already a normal worker for it). + +Default value is 2, thus makes StarPU tend to build binary trees of combined +workers. +
+ +
STARPU_DISABLE_ASYNCHRONOUS_COPY
+
+\anchor STARPU_DISABLE_ASYNCHRONOUS_COPY +\addindex __env__STARPU_DISABLE_ASYNCHRONOUS_COPY +Disable asynchronous copies between CPU and GPU devices. +The AMD implementation of OpenCL is known to +fail when copying data asynchronously. When using this implementation, +it is therefore necessary to disable asynchronous data transfers. One can call starpu_asynchronous_copy_disabled() to check whether asynchronous data transfers between CPU and accelerators are disabled. + +See also \ref STARPU_DISABLE_ASYNCHRONOUS_CUDA_COPY and \ref +STARPU_DISABLE_ASYNCHRONOUS_OPENCL_COPY. +
+ +
STARPU_EXPECTED_TRANSFER_TIME_WRITEBACK
+
+\anchor STARPU_EXPECTED_TRANSFER_TIME_WRITEBACK +\addindex __env__STARPU_EXPECTED_TRANSFER_TIME_WRITEBACK +Set to 1 to make task transfer time estimations artificially include +the time that will be needed to write back data to the main memory. +
+ +
STARPU_DISABLE_PINNING
+
+\anchor STARPU_DISABLE_PINNING +\addindex __env__STARPU_DISABLE_PINNING +Disable (1) or Enable (0) pinning host memory allocated through starpu_malloc(), starpu_memory_pin() +and friends. Default value is Enable. +This permits to test the performance effect of memory pinning. +
+ +
STARPU_BACKOFF_MIN
+
+\anchor STARPU_BACKOFF_MIN +\addindex __env__STARPU_BACKOFF_MIN +Set minimum exponential backoff of number of cycles to pause when spinning. Default value is 1. +
+ +
STARPU_BACKOFF_MAX
+
+\anchor STARPU_BACKOFF_MAX +\addindex __env__STARPU_BACKOFF_MAX +Set maximum exponential backoff of number of cycles to pause when spinning. Default value is 32. +
+ +
STARPU_SINK
+
+\anchor STARPU_SINK +\addindex __env__STARPU_SINK +Defined internally by StarPU when running in master slave mode. +
+ +
STARPU_ENABLE_MAP
+
+\anchor STARPU_ENABLE_MAP +\addindex __env__STARPU_ENABLE_MAP +Disable (0) or Enable (1) support for memory mapping between memory nodes. The +default is Disabled. One can call starpu_map_enabled() to check whether memory mapping support between memory nodes is enabled. +
+ +
STARPU_DATA_LOCALITY_ENFORCE
+
+\anchor STARPU_DATA_LOCALITY_ENFORCE +\addindex __env__STARPU_DATA_LOCALITY_ENFORCE +Enable (1) or Disable(0) data locality enforcement when picking up a worker to execute a task. Default value is Disable. +
+ +
+ +\subsection cpuWorkers CPU Workers +
+
STARPU_NCPU
+
+\anchor STARPU_NCPU +\addindex __env__STARPU_NCPU +Specify the number of CPU workers (thus not including workers +dedicated to control accelerators). Note that by default, StarPU will +not allocate more CPU workers than there are physical CPUs, and that +some CPUs are used to control the accelerators. +
+ +
STARPU_RESERVE_NCPU
+
+\anchor STARPU_RESERVE_NCPU +\addindex __env__STARPU_RESERVE_NCPU +Specify the number of CPU cores that should not be used by StarPU, so the +application can use starpu_get_next_bindid() and starpu_bind_thread_on() to bind +its own threads. + +This option is ignored if \ref STARPU_NCPU or starpu_conf::ncpus is set. +
+ +
STARPU_NCPUS
+
+\anchor STARPU_NCPUS +\addindex __env__STARPU_NCPUS +Deprecated. You should use \ref STARPU_NCPU. +
+ +
+ +\subsection cudaWorkers CUDA Workers +
+
STARPU_NCUDA
+
+\anchor STARPU_NCUDA +\addindex __env__STARPU_NCUDA +Specify the number of CUDA devices that StarPU can use. If +\ref STARPU_NCUDA is lower than the number of physical devices, it is +possible to select which GPU devices should be used by the means of the +environment variable \ref STARPU_WORKERS_CUDAID. By default, StarPU will +create as many CUDA workers as there are GPU devices. +
+ +
STARPU_NWORKER_PER_CUDA
+
+\anchor STARPU_NWORKER_PER_CUDA +\addindex __env__STARPU_NWORKER_PER_CUDA +Specify the number of workers per CUDA device, and thus the number of kernels +which will be concurrently running on the devices, i.e. the number of CUDA +streams. Default value is 1. + +For parallelism to be really achieved, one also needs to make CUDA codelets +asynchronous (it is recommended for single-worker performance too anyway, +see ::STARPU_CUDA_ASYNC in \ref CUDA-specificOptimizations), or to set \ref +STARPU_CUDA_THREAD_PER_WORKER to 1. +
+ +
STARPU_CUDA_THREAD_PER_WORKER
+
+\anchor STARPU_CUDA_THREAD_PER_WORKER +\addindex __env__STARPU_CUDA_THREAD_PER_WORKER +Specify whether the cuda driver should use one thread per stream (1) or to use +a single thread to drive all the streams of the device or all devices (0), and +\ref STARPU_CUDA_THREAD_PER_DEV determines whether is it one thread per device or one +thread for all devices. Default value is 0. Setting it to 1 is contradictory +with setting \ref STARPU_CUDA_THREAD_PER_DEV. +
+ +
STARPU_CUDA_THREAD_PER_DEV
+
+\anchor STARPU_CUDA_THREAD_PER_DEV +\addindex __env__STARPU_CUDA_THREAD_PER_DEV +Specify whether the cuda driver should use one thread per device (1) or to use a +single thread to drive all the devices (0). Default value is 1. It does not +make sense to set this variable if \ref STARPU_CUDA_THREAD_PER_WORKER is set to to 1 +(since \ref STARPU_CUDA_THREAD_PER_DEV is then meaningless). +
+ +
STARPU_CUDA_PIPELINE
+
+\anchor STARPU_CUDA_PIPELINE +\addindex __env__STARPU_CUDA_PIPELINE +Specify how many asynchronous tasks are submitted in advance on CUDA +devices. This for instance permits to overlap task management with the execution +of previous tasks, but it also allows concurrent execution on Fermi cards, which +otherwise bring spurious synchronizations. Default value is 2. Setting the value to 0 forces a synchronous +execution of all tasks. +
+ +
STARPU_WORKERS_CUDAID
+
+\anchor STARPU_WORKERS_CUDAID +\addindex __env__STARPU_WORKERS_CUDAID +Select which CUDA devices should be used to run CUDA workers (similarly to the \ref STARPU_WORKERS_CPUID environment variable). +On a machine equipped with 4 GPUs, setting STARPU_WORKERS_CUDAID="1 3" and +STARPU_NCUDA=2 specifies that 2 CUDA workers should be created, and that +they should use CUDA devices #1 and #3 (the logical ordering of the devices is +the one reported by CUDA). + +This variable is ignored if the field +starpu_conf::use_explicit_workers_cuda_gpuid passed to starpu_init() +is set. +
+ +
STARPU_DISABLE_ASYNCHRONOUS_CUDA_COPY
+
+\anchor STARPU_DISABLE_ASYNCHRONOUS_CUDA_COPY +\addindex __env__STARPU_DISABLE_ASYNCHRONOUS_CUDA_COPY +Disable asynchronous copies between CPU and CUDA devices. One can call starpu_asynchronous_cuda_copy_disabled() to check whether asynchronous data transfers between CPU and CUDA accelerators are disabled. + +See also \ref STARPU_DISABLE_ASYNCHRONOUS_COPY and \ref +STARPU_DISABLE_ASYNCHRONOUS_OPENCL_COPY. +
+ +
STARPU_ENABLE_CUDA_GPU_GPU_DIRECT
+
+\anchor STARPU_ENABLE_CUDA_GPU_GPU_DIRECT +\addindex __env__STARPU_ENABLE_CUDA_GPU_GPU_DIRECT +Enable (1) or Disable (0) direct CUDA transfers from GPU to GPU, without copying +through RAM. Default value is Enable. +This permits to test the performance effect of GPU-Direct. +
+ +
STARPU_CUDA_ONLY_FAST_ALLOC_OTHER_MEMNODES
+
+\anchor STARPU_CUDA_ONLY_FAST_ALLOC_OTHER_MEMNODES +\addindex __env__STARPU_CUDA_ONLY_FAST_ALLOC_OTHER_MEMNODES +Specify if CUDA workers should do only fast allocations +when running the datawizard progress of +other memory nodes. This will pass the internal value +_STARPU_DATAWIZARD_ONLY_FAST_ALLOC to allocation methods. +Default value is 0, allowing CUDA workers to do slow allocations. + +This can also be specified with starpu_conf::cuda_only_fast_alloc_other_memnodes. +
+ +
+ +\subsection openclWorkers OpenCL Workers +
+
STARPU_NOPENCL
+
+\anchor STARPU_NOPENCL +\addindex __env__STARPU_NOPENCL +Specify the number of OpenCL devices that StarPU can use. If +\ref STARPU_NOPENCL is lower than the number of physical devices, it is +possible to select which GPU devices should be used by the means of the +environment variable \ref STARPU_WORKERS_OPENCLID. By default, StarPU will +create as many OpenCL workers as there are GPU devices. + +Note that by default StarPU will launch CUDA workers on GPU devices. +You need to disable CUDA to allow the creation of OpenCL workers. +
+ +
STARPU_WORKERS_OPENCLID
+
+\anchor STARPU_WORKERS_OPENCLID +\addindex __env__STARPU_WORKERS_OPENCLID +Select which GPU devices should be used to run OpenCL workers (similarly to the \ref STARPU_WORKERS_CPUID environment variable) +On a machine equipped with 4 GPUs, setting STARPU_WORKERS_OPENCLID="1 3" and +STARPU_NOPENCL=2 specifies that 2 OpenCL workers should be +created, and that they should use GPU devices #1 and #3. + +This variable is ignored if the field +starpu_conf::use_explicit_workers_opencl_gpuid passed to starpu_init() +is set. +
+ +
STARPU_OPENCL_PIPELINE
+
+\anchor STARPU_OPENCL_PIPELINE +\addindex __env__STARPU_OPENCL_PIPELINE +Specify how many asynchronous tasks are submitted in advance on OpenCL +devices. This for instance permits to overlap task management with the execution +of previous tasks, but it also allows concurrent execution on Fermi cards, which +otherwise bring spurious synchronizations. Default value is 2. Setting the value to 0 forces a synchronous +execution of all tasks. +
+ +
STARPU_OPENCL_ON_CPUS
+
+\anchor STARPU_OPENCL_ON_CPUS +\addindex __env__STARPU_OPENCL_ON_CPUS +Specify that OpenCL workers can also be run on CPU devices. By default, the OpenCL driver only enables GPU devices. +
+ +
STARPU_OPENCL_ONLY_ON_CPUS
+
+\anchor STARPU_OPENCL_ONLY_ON_CPUS +\addindex __env__STARPU_OPENCL_ONLY_ON_CPUS +Specify that OpenCL workers can ONLY be run on CPU devices. By default, the OpenCL driver enables GPU devices. +
+ +
STARPU_DISABLE_ASYNCHRONOUS_OPENCL_COPY
+
+\anchor STARPU_DISABLE_ASYNCHRONOUS_OPENCL_COPY +\addindex __env__STARPU_DISABLE_ASYNCHRONOUS_OPENCL_COPY +Disable asynchronous copies between CPU and OpenCL devices. +The AMD implementation of OpenCL is known to +fail when copying data asynchronously. When using this implementation, +it is therefore necessary to disable asynchronous data transfers. One can call starpu_asynchronous_opencl_copy_disabled() to check whether asynchronous data transfers between CPU and OpenCL accelerators are disabled. + +See also \ref STARPU_DISABLE_ASYNCHRONOUS_COPY and \ref +STARPU_DISABLE_ASYNCHRONOUS_CUDA_COPY. +
+
+ +\subsection maxfpgaWorkers Maxeler FPGA Workers +
+
STARPU_NMAX_FPGA
+
+\anchor STARPU_NMAX_FPGA +\addindex __env__STARPU_NMAX_FPGA +Specify the number of Maxeler FPGA devices that StarPU can use. If +\ref STARPU_NMAX_FPGA is lower than the number of physical devices, it is +possible to select which Maxeler FPGA devices should be used by the means of the +environment variable \ref STARPU_WORKERS_MAX_FPGAID. By default, StarPU will +create as many Maxeler FPGA workers as there are GPU devices. +
+ +
STARPU_WORKERS_MAX_FPGAID
+
+\anchor STARPU_WORKERS_MAX_FPGAID +\addindex __env__STARPU_WORKERS_MAX_FPGAID +Select which Maxeler FPGA devices should be used to run Maxeler FPGA workers (similarly to the \ref STARPU_WORKERS_CPUID environment variable). +On a machine equipped with 4 Maxeler FPGAs, setting STARPU_WORKERS_MAX_FPGAID="1 3" and +STARPU_NMAX_FPGA=2 specifies that 2 Maxeler FPGA workers should be created, and that +they should use Maxeler FPGA devices #1 and #3 (the logical ordering of the devices is +the one reported by the Maxeler stack). + +
STARPU_DISABLE_ASYNCHRONOUS_MAX_FPGA_COPY
+
+\anchor STARPU_DISABLE_ASYNCHRONOUS_MAX_FPGA_COPY +\addindex __env__STARPU_DISABLE_ASYNCHRONOUS_MAX_FPGA_COPY +Disable asynchronous copies between CPU and Maxeler FPGA devices. One can call starpu_asynchronous_max_fpga_copy_disabled() to check whether asynchronous data transfers between CPU and Maxeler FPGA devices are disabled. +
+ +\subsection mpimsWorkers MPI Master Slave Workers +
+
STARPU_NMPI_MS
+
+\anchor STARPU_NMPI_MS +\addindex __env__STARPU_NMPI_MS +Specify the number of MPI master slave devices that StarPU can use. +
+ +
STARPU_NMPIMSTHREADS
+
+\anchor STARPU_NMPIMSTHREADS +\addindex __env__STARPU_NMPIMSTHREADS +Specift the number of threads to use on the MPI Slave devices. +
+ +
STARPU_MPI_MS_MULTIPLE_THREAD
+
+\anchor STARPU_MPI_MS_MULTIPLE_THREAD +\addindex __env__STARPU_MPI_MS_MULTIPLE_THREAD +Specify whether the master should use one thread per slave, or one thread for +driver all slaves. Default value is 0. +
+ +
STARPU_MPI_MASTER_NODE
+
+\anchor STARPU_MPI_MASTER_NODE +\addindex __env__STARPU_MPI_MASTER_NODE +Specify the rank of the MPI process which will be the master. Default value is 0. +
+ +
STARPU_DISABLE_ASYNCHRONOUS_MPI_MS_COPY
+
+\anchor STARPU_DISABLE_ASYNCHRONOUS_MPI_MS_COPY +\addindex __env__STARPU_DISABLE_ASYNCHRONOUS_MPI_MS_COPY +Disable asynchronous copies between CPU and MPI Slave devices. One can call starpu_asynchronous_mpi_ms_copy_disabled() to check whether asynchronous data transfers between CPU and MPI Slave devices are disabled. +
+ +
+ +\subsection tcpipmsWorkers TCP/IP Master Slave Workers + +
+ +
STARPU_NTCPIP_MS
+
+\anchor STARPU_NTCPIP_MS +\addindex __env__STARPU_NTCPIP_MS +Specify the number of TCP/IP master slave devices that StarPU can use. +
+ +
STARPU_TCPIP_MS_SLAVES
+
+\anchor STARPU_TCPIP_MS_SLAVES +\addindex __env__STARPU_TCPIP_MS_SLAVES +Specify the number of TCP/IP master slave processes that are expected to be +run. This should be provided both to the master and to the slaves. +
+ +
STARPU_TCPIP_MS_MASTER
+
+\anchor STARPU_TCPIP_MS_MASTER +\addindex __env__STARPU_TCPIP_MS_MASTER +Specify (for slaves) the IP address of the master so they can connect to +it. They will then automatically connect to each other. +
+ +
STARPU_TCPIP_MS_PORT
+
+\anchor STARPU_TCPIP_MS_PORT +\addindex __env__STARPU_TCPIP_MS_PORT +Specify the port of the master, for connexions between slaves and the master. +Default value is 1234. +
+ +
STARPU_NTCPIPMSTHREADS
+
+\anchor STARPU_NTCPIPMSTHREADS +\addindex __env__STARPU_NTCPIPMSTHREADS +Specify the number of threads to use on the TCP/IP Slave devices. +
+ +
STARPU_TCPIP_MS_MULTIPLE_THREAD
+
+\anchor STARPU_TCPIP_MS_MULTIPLE_THREAD +\addindex __env__STARPU_TCPIP_MS_MULTIPLE_THREAD +Specify whether the master should use one thread per slave, or one thread for +driver all slaves. Default value is 0. +
+ +
STARPU_DISABLE_ASYNCHRONOUS_TCPIP_MS_COPY
+
+\anchor STARPU_DISABLE_ASYNCHRONOUS_TCPIP_MS_COPY +\addindex __env__STARPU_DISABLE_ASYNCHRONOUS_TCPIP_MS_COPY +Disable asynchronous copies between CPU and TCP/IP Slave devices. One can call starpu_asynchronous_tcpip_ms_copy_disabled() to check whether asynchronous data transfers between CPU and TCP/IP Slave devices are disabled. +
+ +
+ +\subsection hipWorkers HIP Workers + +
+
STARPU_NHIP
+
+\anchor STARPU_NHIP +\addindex __env__STARPU_NHIP +Specify the number of HIP devices that StarPU can use. If +\ref STARPU_NHIP is lower than the number of physical devices, it is +possible to select which HIP devices should be used by the means of the +environment variable \ref STARPU_WORKERS_HIPID. By default, StarPU will +create as many HIP workers as there are HIP devices. +
+ +
STARPU_WORKERS_HPIID
+
+\anchor STARPU_WORKERS_HIPID +\addindex __env__STARPU_WORKERS_HIPID +Select which HIP devices should be used to run HIP workers (similarly to the \ref STARPU_WORKERS_HIPID environment variable). +On a machine equipped with 4 HIP devices, setting STARPU_WORKERS_HIPID="1 3" and +STARPU_NHIP=2 specifies that 2 HIP workers should be created, and that +they should use HIP devices #1 and #3. + +This variable is ignored if the field +starpu_conf::use_explicit_workers_hip_gpuid passed to starpu_init() +is set. +
+ +
STARPU_DISABLE_ASYNCHRONOUS_HIP_COPY
+
+\anchor STARPU_DISABLE_ASYNCHRONOUS_HIP_COPY +\addindex __env__STARPU_DISABLE_ASYNCHRONOUS_HIP_COPY +Disable asynchronous copies between CPU and HIP devices. One can call starpu_asynchronous_hip_copy_disabled() to check whether asynchronous data transfers between CPU and HIP accelerators are disabled. +
+ +
+ + +\subsection mpiConf MPI Configuration +
+ +
STARPU_MPI_THREAD_CPUID
+
+\anchor STARPU_MPI_THREAD_CPUID +\addindex __env__STARPU_MPI_THREAD_CPUID +Tell StarPU to bind its MPI thread to the given CPU id, +subtracted from the CPU workers (unless \ref STARPU_NCPU is defined). + +Default value is -1, it will let StarPU allocate a CPU. +
+ +
STARPU_MPI_THREAD_COREID
+
+\anchor STARPU_MPI_THREAD_COREID +\addindex __env__STARPU_MPI_THREAD_COREID +Same as \ref STARPU_MPI_THREAD_CPUID, but bind the MPI thread to the given core +ID, instead of the PU (hyperthread). +
+ +
STARPU_MPI_THREAD_MULTIPLE_SEND
+
+\anchor STARPU_MPI_THREAD_MULTIPLE_SEND +\addindex __env__STARPU_MPI_THREAD_MULTIPLE_SEND +Setting it to non-zero makes StarPU emit MPI send requests from all threads, not +just the MPI thread. + +This can improve performance, but depends on the MPI implementation to be really +thread-multiple-safe. +
+ +
STARPU_MPI_NOBIND
+
+\anchor STARPU_MPI_NOBIND +\addindex __env__STARPU_MPI_NOBIND +Setting it to non-zero will prevent StarPU from binding the MPI to +a separate core. This is for instance useful when running the testsuite on a single system. +
+ +
STARPU_MPI_GPUDIRECT
+
+\anchor STARPU_MPI_GPUDIRECT +\addindex __env__STARPU_MPI_GPUDIRECT +Enable (1) or disable (0) MPI GPUDirect support. Default value (-1) is to enable if available. +If \ref STARPU_MPI_GPUDIRECT is explicitly set to 1, StarPU-MPI will warn if MPI does not provide the GPUDirect support. +
+ +
STARPU_MPI_PSM2
+
+\anchor STARPU_MPI_PSM2 +\addindex __env__STARPU_MPI_PSM2 +This variable allows to supercede PSM2 detection when asking for MPI GPUDirect support. +This is helpful when using old intel compilers, for which PSM2 detection is always true. +The default (1) is to enable it. If PSM2 is detected whereas it should not be, this variable +can be set to 0. +
+ +
STARPU_MPI_REDUX_ARITY_THRESHOLD
+
+\anchor STARPU_MPI_REDUX_ARITY_THRESHOLD +\addindex __env__STARPU_MPI_REDUX_ARITY_THRESHOLD +The arity of the automatically-detected reduction trees follows the following +rule: when the data to be reduced is of small size a flat tree is unrolled +i.e. all the contributing nodes send their contribution to the root of +the reduction. When the data to be reduced is of big size, a binary tree is used instead. +The default threshold between flat and binary tree is 1024 bytes. By setting the environment +variable with a negative value, all the automatically detected reduction trees will use flat trees. +If this value is set to 0, then binary trees will always be selected. Otherwise, +the setup value replaces the default 1024. +
+ +
+ +\section ConfiguringTheSchedulingEngine Configuring The Scheduling Engine + +
+ +
STARPU_SCHED
+
+\anchor STARPU_SCHED +\addindex __env__STARPU_SCHED +Select the scheduling policy from those proposed by StarPU: work +random, stealing, greedy, with performance models, etc. + +Use STARPU_SCHED=help to get the list of available schedulers. +
+ +
STARPU_SCHED_LIB
+
+\anchor STARPU_SCHED_LIB +\addindex __env__STARPU_SCHED_LIB +Specify the location of a dynamic library to choose a +user-defined scheduling policy. See \ref UsingaNewSchedulingPolicy for +more information. +
+ +
STARPU_MIN_PRIO
+
+\anchor STARPU_MIN_PRIO_env +\addindex __env__STARPU_MIN_PRIO +Set the minimum priority used by priorities-aware schedulers. +The flag can also be set through the field starpu_conf::global_sched_ctx_min_priority. +
+ +
STARPU_MAX_PRIO
+
+\anchor STARPU_MAX_PRIO_env +\addindex __env__STARPU_MAX_PRIO +Set the maximum priority used by priorities-aware schedulers. +The flag can also be set through the field starpu_conf::global_sched_ctx_max_priority. +
+ +
STARPU_CALIBRATE
+
+\anchor STARPU_CALIBRATE +\addindex __env__STARPU_CALIBRATE +Set to 1 to calibrate the performance models during the execution. +Set to 2 to drop the previous values and restart the calibration from scratch. +Set to 0 to disable calibration, this is the default behaviour. + +Note: this currently only applies to dm and dmda scheduling policies. +
+ +
STARPU_CALIBRATE_MINIMUM
+
+\anchor STARPU_CALIBRATE_MINIMUM +\addindex __env__STARPU_CALIBRATE_MINIMUM +Define the minimum number of calibration measurements that will be made +before considering that the performance model is calibrated. Default value is 10. +
+ +
STARPU_BUS_CALIBRATE
+
+\anchor STARPU_BUS_CALIBRATE +\addindex __env__STARPU_BUS_CALIBRATE +Set to 1 to recalibrate the bus during initialization. +
+ +
STARPU_PREFETCH
+
+\anchor STARPU_PREFETCH +\addindex __env__STARPU_PREFETCH +Enable (1) or disable (0) data prefetching. Default value is Enable. + +If prefetching is enabled, when a task is scheduled to be +executed e.g. on a GPU, StarPU will request an asynchronous transfer in +advance, so that data is already present on the GPU when the task starts. As a +result, computation and data transfers are overlapped. +
+ +
STARPU_SCHED_ALPHA
+
+\anchor STARPU_SCHED_ALPHA +\addindex __env__STARPU_SCHED_ALPHA +To estimate the cost of a task StarPU takes into account the estimated +computation time (obtained thanks to performance models). The alpha factor is +the coefficient to be applied to it before adding it to the communication part. +
+ +
STARPU_SCHED_BETA
+
+\anchor STARPU_SCHED_BETA +\addindex __env__STARPU_SCHED_BETA +To estimate the cost of a task StarPU takes into account the estimated +data transfer time (obtained thanks to performance models). The beta factor is +the coefficient to be applied to it before adding it to the computation part. +
+ +
STARPU_SCHED_GAMMA
+
+\anchor STARPU_SCHED_GAMMA +\addindex __env__STARPU_SCHED_GAMMA +Define the execution time penalty of a joule (\ref Energy-basedScheduling). +
+ +
STARPU_SCHED_READY
+
+\anchor STARPU_SCHED_READY +\addindex __env__STARPU_SCHED_READY +For a modular scheduler with sorted queues below the decision component, workers +pick up a task which has most of its data already available. Setting this to 0 +disables this. +
+ +
STARPU_SCHED_SORTED_ABOVE
+
+\anchor STARPU_SCHED_SORTED_ABOVE +\addindex __env__STARPU_SCHED_SORTED_ABOVE +For a modular scheduler with queues above the decision component, it is +usually sorted by priority. Setting this to 0 disables this. +
+ +
STARPU_SCHED_SORTED_BELOW
+
+\anchor STARPU_SCHED_SORTED_BELOW +\addindex __env__STARPU_SCHED_SORTED_BELOW +For a modular scheduler with queues below the decision component, they are +usually sorted by priority. Setting this to 0 disables this. +
+ +
STARPU_IDLE_POWER
+
+\anchor STARPU_IDLE_POWER +\addindex __env__STARPU_IDLE_POWER +Define the idle power of the machine (\ref Energy-basedScheduling). +
+ +
STARPU_PROFILING
+
+\anchor STARPU_PROFILING +\addindex __env__STARPU_PROFILING +Enable on-line performance monitoring (\ref EnablingOn-linePerformanceMonitoring). +
+ +
STARPU_CODELET_PROFILING
+
+\anchor STARPU_CODELET_PROFILING +\addindex __env__STARPU_CODELET_PROFILING +Enable on-line performance monitoring of codelets (\ref Per-codeletFeedback). +(enabled by default) +
+ +
STARPU_ENERGY_PROFILING
+
+\anchor STARPU_ENERGY_PROFILING +\addindex __env__STARPU_ENERGY_PROFILING +Enable on-line energy monitoring of tasks (\ref Per-codeletFeedback). +(disabled by default) +
+ +
STARPU_PROF_PAPI_EVENTS
+
+\anchor STARPU_PROF_PAPI_EVENTS +\addindex __env__STARPU_PROF_PAPI_EVENTS +Specify which PAPI events should be recorded in the trace (\ref PapiCounters). +
+ +
+ +\section ConfiguringHeteroprio Configuring The Heteroprio Scheduler + +\subsection ConfiguringLaHeteroprio Configuring LAHeteroprio +
+ +
STARPU_HETEROPRIO_USE_LA
+
+\anchor STARPU_HETEROPRIO_USE_LA +\addindex __env__STARPU_HETEROPRIO_USE_LA +Enable the locality aware mode of Heteroprio which guides the distribution of tasks to workers +in order to reduce the data transfers between memory nodes. +
+ +
STARPU_LAHETEROPRIO_PUSH
+
+\anchor STARPU_LAHETEROPRIO_PUSH +\addindex __env__STARPU_LAHETEROPRIO_PUSH +Choose between the different push strategies for locality aware Heteroprio: +\c WORKER, \c LcS, \c LS_SDH, \c LS_SDH2, \c LS_SDHB, \c LC_SMWB, \c AUTO (by default: AUTO). These are detailed in +\ref LAHeteroprio +
+ +
STARPU_LAHETEROPRIO_S_[ARCH]
+
+\anchor STARPU_LAHETEROPRIO_S_[ARCH] +\addindex __env__STARPU_LAHETEROPRIO_S_arch +Specify the number of memory nodes contained in an affinity group. An affinity +group will be composed of the closest memory nodes to a worker of a given architecture, +and this worker will look for tasks available inside these memory nodes, before +considering stealing tasks outside this group. +ARCH can be \c CPU, \c CUDA, \c OPENCL, \c SCC, \c MPI_MS, etc. +
+ +
STARPU_LAHETEROPRIO_PRIO_STEP_[ARCH]
+
+\anchor STARPU_LAHETEROPRIO_PRIO_STEP_[ARCH] +\addindex __env__STARPU_LAHETEROPRIO_PRIO_STEP_arch +Specify the number of buckets in the local memory node in which a worker will look for +available tasks, before this worker starts looking for tasks in other memory nodes' buckets. +ARCH indicates that this number is specific to a given arch which can be: +\c CPU, \c CUDA, \c OPENCL, \c SCC, \c MPI_MS, etc. +
+ +
+ +\subsection ConfiguringAutoHeteroprio Configuring AutoHeteroprio +
+ +
STARPU_HETEROPRIO_USE_AUTO_CALIBRATION
+
+\anchor STARPU_HETEROPRIO_USE_AUTO_CALIBRATION +\addindex __env__STARPU_HETEROPRIO_USE_AUTO_CALIBRATION +Enable the auto calibration mode of Heteroprio which assign priorities to tasks automatically +
+ +
STARPU_HETEROPRIO_DATA_DIR
+
+\anchor STARPU_HETEROPRIO_DATA_DIR +\addindex __env__STARPU_HETEROPRIO_DATA_DIR +Specify the path of the directory where Heteroprio stores data about program executions. +By default, these are stored in the same directory used by perfmodel. +
+ +
STARPU_HETEROPRIO_DATA_FILE
+
+\anchor STARPU_HETEROPRIO_DATA_FILE +\addindex __env__STARPU_HETEROPRIO_DATA_FILE +Specify the filename where Heteroprio will save data about the current program's execution. +
+ +
STARPU_HETEROPRIO_CODELET_GROUPING_STRATEGY
+
+\anchor STARPU_HETEROPRIO_CODELET_GROUPING_STRATEGY +\addindex __env__STARPU_HETEROPRIO_CODELET_GROUPING_STRATEGY +Choose how Heteroprio groups similar tasks. It can be 0 to group +the tasks with the same perfmodel or the same codelet's name if no perfmodel was assigned. +Or, it could be 1 to group the tasks only by codelet's name. +
+ +
STARPU_AUTOHETEROPRIO_PRINT_DATA_ON_UPDATE
+
+\anchor STARPU_AUTOHETEROPRIO_PRINT_DATA_ON_UPDATE +\addindex __env__STARPU_AUTOHETEROPRIO_PRINT_DATA_ON_UPDATE +Enable the printing of priorities' data every time they get updated. +
+ +
STARPU_AUTOHETEROPRIO_PRINT_AFTER_ORDERING
+
+\anchor STARPU_AUTOHETEROPRIO_PRINT_AFTER_ORDERING +\addindex __env__STARPU_AUTOHETEROPRIO_PRINT_AFTER_ORDERING +Enable the printing of priorities' order for each architecture every time there's a reordering. +
+ +
STARPU_AUTOHETEROPRIO_PRIORITY_ORDERING_POLICY
+
+\anchor STARPU_AUTOHETEROPRIO_PRIORITY_ORDERING_POLICY +\addindex __env__STARPU_AUTOHETEROPRIO_PRIORITY_ORDERING_POLICY +Specify the heuristic which will be used to assign priorities automatically. +It should be an integer between 0 and 27. +
+ +
STARPU_AUTOHETEROPRIO_ORDERING_INTERVAL
+
+\anchor STARPU_AUTOHETEROPRIO_ORDERING_INTERVAL +\addindex __env__STARPU_AUTOHETEROPRIO_ORDERING_INTERVAL +Specify the period (in number of tasks pushed), between priorities reordering operations. +
+ +
STARPU_AUTOHETEROPRIO_FREEZE_GATHERING
+
+\anchor STARPU_AUTOHETEROPRIO_FREEZE_GATHERING +\addindex __env__STARPU_AUTOHETEROPRIO_FREEZE_GATHERING +Disable data gathering from task executions. +
+ +
+ +\section Extensions Extensions + +
+ +
SOCL_OCL_LIB_OPENCL
+
+\anchor SOCL_OCL_LIB_OPENCL +\addindex __env__SOCL_OCL_LIB_OPENCL +Set the location of the file libOpenCL.so of the OCL ICD implementation. +The SOCL test suite is only run when \ref SOCL_OCL_LIB_OPENCL is defined. +
+ +
OCL_ICD_VENDORS
+
+\anchor OCL_ICD_VENDORS +\addindex __env__OCL_ICD_VENDORS +Set the directory where ICD files are installed. This is useful when using SOCL +with OpenCL ICD (https://forge.imag.fr/projects/ocl-icd/). Default +directory is /etc/OpenCL/vendors. StarPU installs ICD +files in the directory $prefix/share/starpu/opencl/vendors. +
+ +
STARPU_COMM_STATS
+
+\anchor STARPU_COMM_STATS +\addindex __env__STARPU_COMM_STATS +Deprecated. You should use \ref STARPU_MPI_STATS. +
+ +
STARPU_MPI_STATS
+
+\anchor STARPU_MPI_STATS +\addindex __env__STARPU_MPI_STATS +Enable (!= 0) or Disable (0) communication statistics for starpumpi (\ref MPIDebug). Default value is Disable. +
+ +
STARPU_MPI_CACHE
+
+\anchor STARPU_MPI_CACHE +\addindex __env__STARPU_MPI_CACHE +Disable (0) or Enable (!= 0) communication cache for starpumpi (\ref MPISupport). Default value is Enable. +
+ +
STARPU_MPI_COMM
+
+\anchor STARPU_MPI_COMM +\addindex __env__STARPU_MPI_COMM +Enable (1) communication trace for starpumpi (\ref MPISupport). Also needs for StarPU to +have been configured with the option \ref enable-verbose "--enable-verbose". +
+ +
STARPU_MPI_CACHE_STATS
+
+\anchor STARPU_MPI_CACHE_STATS +\addindex __env__STARPU_MPI_CACHE_STATS +Enable (1) statistics for the communication cache (\ref MPISupport). +Messages are printed on the standard output when data are added or removed from the received +communication cache. +
+ +
STARPU_MPI_PRIORITIES
+
+\anchor STARPU_MPI_PRIORITIES +\addindex __env__STARPU_MPI_PRIORITIES +Disable (0) the use of priorities to order MPI communications (\ref MPISupport). +
+ +
STARPU_MPI_NDETACHED_SEND
+
+\anchor STARPU_MPI_NDETACHED_SEND +\addindex __env__STARPU_MPI_NDETACHED_SEND +Set the number of send requests that StarPU-MPI will emit concurrently. +Default value is 10. Setting it to 0 removes the limit of concurrent send +requests. +
+ +
STARPU_MPI_NREADY_PROCESS
+
+\anchor STARPU_MPI_NREADY_PROCESS +\addindex __env__STARPU_MPI_NREADY_PROCESS +Set the number of requests that StarPU-MPI will submit to MPI before +polling for termination of existing requests. Default value is 10. Setting it to +0 removes the limit: all requests to submit to MPI will be submitted before +polling for termination of existing ones. +
+ +
STARPU_MPI_FAKE_SIZE
+
+\anchor STARPU_MPI_FAKE_SIZE +\addindex __env__STARPU_MPI_FAKE_SIZE +Setting to a number makes StarPU believe that there are as many MPI nodes, even +if it was run on only one MPI node. This allows e.g. to simulate the execution +of one of the nodes of a big cluster without actually running the rest. +Of course, it does not provide computation results and timing. +
+ +
STARPU_MPI_FAKE_RANK
+
+\anchor STARPU_MPI_FAKE_RANK +\addindex __env__STARPU_MPI_FAKE_RANK +Setting to a number makes StarPU believe that it runs the given MPI node, even +if it was run on only one MPI node. This allows e.g. to simulate the execution +of one of the nodes of a big cluster without actually running the rest. +Of course, it does not provide computation results and timing. +
+ +
STARPU_MPI_COOP_SENDS
+
+\anchor STARPU_MPI_COOP_SENDS +\addindex __env__STARPU_MPI_COOP_SENDS +Disable (0) dynamic collective operations: grouping same requests to +different nodes until the data becomes available and then use a broadcast tree +to execute requests.
+By now, it is only supported with the NewMadeleine library (see \ref Nmad). +
+ +
STARPU_MPI_RECV_WAIT_FINALIZE
+
+\anchor STARPU_MPI_RECV_WAIT_FINALIZE +\addindex __env__STARPU_MPI_RECV_WAIT_FINALIZE +Disable (1) releasing the write acquire of receiving handles when +data is received but the communication library still needs the data. Set to 0 +by default to unlock as soon as possible tasks which only require a read access +on the handle; write access will become possible for tasks when the +communication library will not need the data anymore.
+By now, it is only supported with the NewMadeleine library (see \ref Nmad). +
+ +
STARPU_MPI_TRACE_SYNC_CLOCKS
+
+\anchor STARPU_MPI_TRACE_SYNC_CLOCKS +\addindex __env__STARPU_MPI_TRACE_SYNC_CLOCKS +When \c mpi_sync_clocks is available, this library will be used to have more +precise clock synchronization in traces coming from different nodes. However, +the clock synchronization process can take some time (several seconds) and can +be disabled by setting this variable to \c 0. In that case, a less precise but +faster synchronization will be used. See \ref TraceMpi for more details. +
+ +
STARPU_MPI_DRIVER_CALL_FREQUENCY
+
+\anchor STARPU_MPI_DRIVER_CALL_FREQUENCY +\addindex __env__STARPU_MPI_DRIVER_CALL_FREQUENCY +When set to a positive value, activates the interleaving of the execution of +tasks with the progression of MPI communications (\ref MPISupport). The +starpu_mpi_init_conf() function must have been called by the application +for that environment variable to be used. When set to 0, the MPI progression +thread does not use at all the driver given by users, and only focuses on +making MPI communications progress. +
+ +
STARPU_MPI_DRIVER_TASK_FREQUENCY
+
+\anchor STARPU_MPI_DRIVER_TASK_FREQUENCY +\addindex __env__STARPU_MPI_DRIVER_TASK_FREQUENCY +When set to a positive value, the interleaving of the execution of tasks with +the progression of MPI communications mechanism to execute several tasks before +checking communication requests again (\ref MPISupport). The +starpu_mpi_init_conf() function must have been called by the application +for that environment variable to be used, and the +\ref STARPU_MPI_DRIVER_CALL_FREQUENCY environment variable set to a positive value. +
+ +
STARPU_MPI_MEM_THROTTLE
+
+\anchor STARPU_MPI_MEM_THROTTLE +\addindex __env__STARPU_MPI_MEM_THROTTLE +When set to a positive value, this makes the starpu_mpi_*recv* functions +block when the memory allocation required for network reception overflows the +available main memory (as typically set by \ref STARPU_LIMIT_CPU_MEM) +
+ +
STARPU_MPI_EARLYDATA_ALLOCATE
+
+\anchor STARPU_MPI_EARLYDATA_ALLOCATE +\addindex __env__STARPU_MPI_EARLYDATA_ALLOCATE +When set to 1, the MPI Driver will immediately allocate the data for early +requests instead of issuing a data request and blocking. Default value is 0, +issuing a data request. Because it is an early request and we do not know its +real priority, the data request will assume \ref STARPU_DEFAULT_PRIO. In cases +where there are many data requests with priorities greater than +\ref STARPU_DEFAULT_PRIO the MPI drive could be blocked for long periods. +
+ +
STARPU_SIMGRID
+
+\anchor STARPU_SIMGRID +\addindex __env__STARPU_SIMGRID +When set to 1 (default value is 0), this makes StarPU check that it was really +build with simulation support. This is convenient in scripts to avoid using a +native version, that would try to update performance models... +
+ +
STARPU_SIMGRID_TRANSFER_COST
+
+\anchor STARPU_SIMGRID_TRANSFER_COST +\addindex __env__STARPU_SIMGRID_TRANSFER_COST +When set to 1 (which is the default value), data transfers (over PCI bus, typically) are taken into account +in SimGrid mode. +
+ +
STARPU_SIMGRID_CUDA_MALLOC_COST
+
+\anchor STARPU_SIMGRID_CUDA_MALLOC_COST +\addindex __env__STARPU_SIMGRID_CUDA_MALLOC_COST +When set to 1 (which is the default value), CUDA malloc costs are taken into account +in SimGrid mode. +
+ +
STARPU_SIMGRID_CUDA_QUEUE_COST
+
+\anchor STARPU_SIMGRID_CUDA_QUEUE_COST +\addindex __env__STARPU_SIMGRID_CUDA_QUEUE_COST +When set to 1 (which is the default value), CUDA task and transfer queueing costs are +taken into account in SimGrid mode. +
+ +
STARPU_PCI_FLAT
+
+\anchor STARPU_PCI_FLAT +\addindex __env__STARPU_PCI_FLAT +When unset or set to 0, the platform file created for SimGrid will +contain PCI bandwidths and routes. +
+ +
STARPU_SIMGRID_CUDA_QUEUE_COST
+
+\anchor STARPU_SIMGRID_CUDA_QUEUE_COST +\addindex __env__STARPU_SIMGRID_CUDA_QUEUE_COST +When unset or set to 1, simulate within SimGrid the GPU transfer queueing. +
+ +
STARPU_MALLOC_SIMULATION_FOLD
+
+\anchor STARPU_MALLOC_SIMULATION_FOLD +\addindex __env__STARPU_MALLOC_SIMULATION_FOLD +Define the size of the file used for folding virtual allocation, in +MiB. Default value is 1, thus allowing 64GiB virtual memory when Linux's +sysctl vm.max_map_count value is the default 65535. +
+ +
STARPU_SIMGRID_TASK_SUBMIT_COST
+
+\anchor STARPU_SIMGRID_TASK_SUBMIT_COST +\addindex __env__STARPU_SIMGRID_TASK_SUBMIT_COST +When set to 1 (which is the default value), task submission costs are taken into +account in SimGrid mode. This provides more accurate SimGrid predictions, +especially for the beginning of the execution. +
+ +
STARPU_SIMGRID_TASK_PUSH_COST
+
+\anchor STARPU_SIMGRID_TASK_PUSH_COST +\addindex __env__STARPU_SIMGRID_TASK_PUSH_COST +When set to 1 (which is the default value), task push costs are taken into +account in SimGrid mode. This provides more accurate SimGrid predictions, +especially with large dependency arities. +
+ +
STARPU_SIMGRID_FETCHING_INPUT_COST
+
+\anchor STARPU_SIMGRID_FETCHING_INPUT_COST +\addindex __env__STARPU_SIMGRID_FETCHING_INPUT_COST +When set to 1 (which is the default value), fetching input costs are taken into +account in SimGrid mode. This provides more accurate SimGrid predictions, +especially regarding data transfers. +
+ +
STARPU_SIMGRID_SCHED_COST
+
+\anchor STARPU_SIMGRID_SCHED_COST +\addindex __env__STARPU_SIMGRID_SCHED_COST +When set to 1 (0 is the default value), scheduling costs are taken into +account in SimGrid mode. This provides more accurate SimGrid predictions, +and allows studying scheduling overhead of the runtime system. However, +it also makes simulation non-deterministic. +
+ +
STARPUPY_MULTI_INTERPRETER
+
+\anchor STARPUPY_MULTI_INTERPRETER +\addindex __env__STARPUPY_MULTI_INTERPRETER +Enable (1) or disable (0) multi interpreters in the StarPU Python interface (\ref MultipleInterpreters). Default value is Disable. +
+ +
STARPUPY_OWN_GIL
+
+\anchor STARPUPY_OWN_GIL +\addindex __env__STARPUPY_OWN_GIL +Enable (1) or disable (0) using per-interpreter GIL (\ref +PythonParallelism). Default value is Disable for now, until python is fully +ready for this. +
+ +
+ +\section MiscellaneousAndDebug Miscellaneous And Debug + +
+ +
STARPU_HOME
+
+\anchor STARPU_HOME +\addindex __env__STARPU_HOME +Specify the main directory in which StarPU stores its +configuration files. Default value is $HOME on Unix environments, +and $USERPROFILE on Windows environments. +
+ +
STARPU_PATH
+
+\anchor STARPU_PATH +\addindex __env__STARPU_PATH +Only used on Windows environments. +Specify the main directory in which StarPU is installed +(\ref RunningABasicStarPUApplicationOnMicrosoft) +
+ +
STARPU_PERF_MODEL_DIR
+
+\anchor STARPU_PERF_MODEL_DIR +\addindex __env__STARPU_PERF_MODEL_DIR +Specify the main directory in which StarPU stores its +performance model files. Default value is $STARPU_HOME/.starpu/sampling. +See \ref Storing_Performance_Model_Files for more details. +
+ +
STARPU_PERF_MODEL_PATH
+
+\anchor STARPU_PERF_MODEL_PATH +\addindex __env__STARPU_PERF_MODEL_PATH +Specify a list of directories separated with ':' in which StarPU stores its +performance model files. +See \ref Storing_Performance_Model_Files for more details. +
+ +
STARPU_PERF_MODEL_HOMOGENEOUS_CPU
+
+\anchor STARPU_PERF_MODEL_HOMOGENEOUS_CPU +\addindex __env__STARPU_PERF_MODEL_HOMOGENEOUS_CPU +When set to 0, StarPU will assume that CPU devices do not have the same +performance, and thus use different performance models for them, thus making +kernel calibration much longer, since measurements have to be made for each CPU +core. +
+ +
STARPU_PERF_MODEL_HOMOGENEOUS_CUDA
+
+\anchor STARPU_PERF_MODEL_HOMOGENEOUS_CUDA +\addindex __env__STARPU_PERF_MODEL_HOMOGENEOUS_CUDA +When set to 1, StarPU will assume that all CUDA devices have the same +performance, and thus share performance models for them, thus allowing kernel +calibration to be much faster, since measurements only have to be once for all +CUDA GPUs. +
+ +
STARPU_PERF_MODEL_HOMOGENEOUS_OPENCL
+
+\anchor STARPU_PERF_MODEL_HOMOGENEOUS_OPENCL +\addindex __env__STARPU_PERF_MODEL_HOMOGENEOUS_OPENCL +When set to 1, StarPU will assume that all OpenCL devices have the same +performance, and thus share performance models for them, thus allowing kernel +calibration to be much faster, since measurements only have to be once for all +OpenCL GPUs. +
+ +
STARPU_PERF_MODEL_HOMOGENEOUS_MPI_MS
+
+\anchor STARPU_PERF_MODEL_HOMOGENEOUS_MPI_MS +\addindex __env__STARPU_PERF_MODEL_HOMOGENEOUS_MPI_MS +When set to 1, StarPU will assume that all MPI Slave devices have the same +performance, and thus share performance models for them, thus allowing kernel +calibration to be much faster, since measurements only have to be once for all +MPI Slaves. +
+ +
STARPU_HOSTNAME
+
+\anchor STARPU_HOSTNAME +\addindex __env__STARPU_HOSTNAME +When set, force the hostname to be used when managing performance model +files. Models are indexed by machine name. When running for example on +a homogenenous cluster, it is possible to share the models between +machines by setting export STARPU_HOSTNAME=some_global_name. +
+ +
STARPU_MPI_HOSTNAMES
+
+\anchor STARPU_MPI_HOSTNAMES +\addindex __env__STARPU_MPI_HOSTNAMES +Similar to \ref STARPU_HOSTNAME but to define multiple nodes on a +heterogeneous cluster. The variable is a list of hostnames that will be assigned +to each StarPU-MPI rank considering their position and the value of +starpu_mpi_world_rank() on each rank. When running, for example, on a +heterogeneous cluster, it is possible to set individual models for each machine +by setting export STARPU_MPI_HOSTNAMES="name0 name1 name2". Where rank 0 +will receive \c name0, rank1 will receive \c name1, and so on. +This variable has precedence over \ref STARPU_HOSTNAME. +
+ +
STARPU_OPENCL_PROGRAM_DIR
+
+\anchor STARPU_OPENCL_PROGRAM_DIR +\addindex __env__STARPU_OPENCL_PROGRAM_DIR +Specify the directory where the OpenCL codelet source files are +located. The function starpu_opencl_load_program_source() looks +for the codelet in the current directory, in the directory specified +by the environment variable \ref STARPU_OPENCL_PROGRAM_DIR, in the +directory share/starpu/opencl of the installation directory of +StarPU, and finally in the source directory of StarPU. +
+ +
STARPU_SILENT
+
+\anchor STARPU_SILENT +\addindex __env__STARPU_SILENT +Disable verbose mode at runtime when StarPU +has been configured with the option \ref enable-verbose "--enable-verbose". Also +disable the display of StarPU information and warning messages. +
+ +
STARPU_MPI_DEBUG_LEVEL_MIN
+
+\anchor STARPU_MPI_DEBUG_LEVEL_MIN +\addindex __env__STARPU_MPI_DEBUG_LEVEL_MIN +Set the minimum level of debug when StarPU +has been configured with the option \ref enable-mpi-verbose "--enable-mpi-verbose". +
+ +
STARPU_MPI_DEBUG_LEVEL_MAX
+
+\anchor STARPU_MPI_DEBUG_LEVEL_MAX +\addindex __env__STARPU_MPI_DEBUG_LEVEL_MAX +Set the maximum level of debug when StarPU +has been configured with the option \ref enable-mpi-verbose "--enable-mpi-verbose". +
+ +
STARPU_LOGFILENAME
+
+\anchor STARPU_LOGFILENAME +\addindex __env__STARPU_LOGFILENAME +Specify in which file the debugging output should be saved to. +
+ +
STARPU_FXT_PREFIX
+
+\anchor STARPU_FXT_PREFIX +\addindex __env__STARPU_FXT_PREFIX +Specify in which directory to save the generated trace if FxT is enabled. +
+ +
STARPU_FXT_SUFFIX
+
+\anchor STARPU_FXT_SUFFIX +\addindex __env__STARPU_FXT_SUFFIX +Specify in which file to save the generated trace if FxT is enabled. +
+ +
STARPU_FXT_TRACE
+
+\anchor STARPU_FXT_TRACE +\addindex __env__STARPU_FXT_TRACE +Enable (1) or disable (0) the FxT trace generation in \c /tmp/prof_file_XXX_YYY (the directory and file name can be changed with \ref STARPU_FXT_PREFIX and \ref STARPU_FXT_SUFFIX). Default value is Disable. +
+ +
STARPU_FXT_EVENTS
+
+\anchor STARPU_FXT_EVENTS +\addindex __env__STARPU_FXT_EVENTS +Specify which events will be recorded in traces. By default, all events (but +VERBOSE_EXTRA ones) are recorded. One can set this variable to a comma- +or pipe-separated list of the following categories, to record only events +belonging to the selected categories: +- USER +- TASK +- TASK_VERBOSE +- TASK_VERBOSE_EXTRA +- DATA +- DATA_VERBOSE +- WORKER +- WORKER_VERBOSE +- DSM +- DSM_VERBOSE +- SCHED +- SCHED_VERBOSE +- LOCK +- LOCK_VERBOSE +- EVENT +- EVENT_VERBOSE +- MPI +- MPI_VERBOSE +- MPI_VERBOSE_EXTRA +- HYP +- HYP_VERBOSE +
+ +The choice of which categories have to be recorded is a tradeoff between +required information for offline analyzis and acceptable overhead introduced +by tracing. For instance, to inspect with ViTE which tasks workers execute, one +has to at least select the TASK category. + +Events in VERBOSE_EXTRA are very costly to record and can have an +important impact on application performances. This is why there are disabled by +default, and one has to explicitly select their categories using this variable +to record them. + +
STARPU_LIMIT_CUDA_devid_MEM
+
+\anchor STARPU_LIMIT_CUDA_devid_MEM +\addindex __env__STARPU_LIMIT_CUDA_devid_MEM +Specify the maximum number of megabytes that should be +available to the application on the CUDA device with the identifier +devid. This variable is intended to be used for experimental +purposes as it emulates devices that have a limited amount of memory. +When defined, the variable overwrites the value of the variable +\ref STARPU_LIMIT_CUDA_MEM. +
+ +
STARPU_LIMIT_CUDA_MEM
+
+\anchor STARPU_LIMIT_CUDA_MEM +\addindex __env__STARPU_LIMIT_CUDA_MEM +Specify the maximum number of megabytes that should be +available to the application on each CUDA devices. This variable is +intended to be used for experimental purposes as it emulates devices +that have a limited amount of memory. +
+ +
STARPU_LIMIT_OPENCL_devid_MEM
+
+\anchor STARPU_LIMIT_OPENCL_devid_MEM +\addindex __env__STARPU_LIMIT_OPENCL_devid_MEM +Specify the maximum number of megabytes that should be +available to the application on the OpenCL device with the identifier +devid. This variable is intended to be used for experimental +purposes as it emulates devices that have a limited amount of memory. +When defined, the variable overwrites the value of the variable +\ref STARPU_LIMIT_OPENCL_MEM. +
+ +
STARPU_LIMIT_OPENCL_MEM
+
+\anchor STARPU_LIMIT_OPENCL_MEM +\addindex __env__STARPU_LIMIT_OPENCL_MEM +Specify the maximum number of megabytes that should be +available to the application on each OpenCL devices. This variable is +intended to be used for experimental purposes as it emulates devices +that have a limited amount of memory. +
+ +
STARPU_LIMIT_HIP_devid_MEM
+
+\anchor STARPU_LIMIT_HIP_devid_MEM +\addindex __env__STARPU_LIMIT_HIP_devid_MEM +Specify the maximum number of megabytes that should be +available to the application on the HIP device with the identifier +devid. This variable is intended to be used for experimental +purposes as it emulates devices that have a limited amount of memory. +When defined, the variable overwrites the value of the variable +\ref STARPU_LIMIT_HIP_MEM. +
+ +
STARPU_LIMIT_HIP_MEM
+
+\anchor STARPU_LIMIT_HIP_MEM +\addindex __env__STARPU_LIMIT_HIP_MEM +Specify the maximum number of megabytes that should be +available to the application on each HIP devices. This variable is +intended to be used for experimental purposes as it emulates devices +that have a limited amount of memory. +
+ +
STARPU_LIMIT_CPU_MEM
+
+\anchor STARPU_LIMIT_CPU_MEM +\addindex __env__STARPU_LIMIT_CPU_MEM +Specify the maximum number of megabytes that should be +available to the application in the main CPU memory. Setting it enables allocation +cache in main memory. Setting it to zero lets StarPU overflow memory. + +Note: for now not all StarPU allocations get throttled by this +parameter. Notably MPI reception are not throttled unless \ref +STARPU_MPI_MEM_THROTTLE is set to 1. +
+ +
STARPU_LIMIT_CPU_NUMA_devid_MEM
+
+\anchor STARPU_LIMIT_CPU_NUMA_devid_MEM +\addindex __env__STARPU_LIMIT_CPU_NUMA_devid_MEM +Specify the maximum number of megabytes that should be available to the +application on the NUMA node with the OS identifier devid. Setting it +overrides the value of \ref STARPU_LIMIT_CPU_MEM. +
+ +
STARPU_LIMIT_CPU_NUMA_MEM
+
+\anchor STARPU_LIMIT_CPU_NUMA_MEM +\addindex __env__STARPU_LIMIT_CPU_NUMA_MEM +Specify the maximum number of megabytes that should be available to the +application on each NUMA node. This is the same as specifying that same amount +with \ref STARPU_LIMIT_CPU_NUMA_devid_MEM for each NUMA node number. The total +memory available to StarPU will thus be this amount multiplied by the number of +NUMA nodes used by StarPU. Any \ref STARPU_LIMIT_CPU_NUMA_devid_MEM additionally +specified will take over \ref STARPU_LIMIT_CPU_NUMA_MEM. +
+ +
STARPU_LIMIT_BANDWIDTH
+
+\anchor STARPU_LIMIT_BANDWIDTH +\addindex __env__STARPU_LIMIT_BANDWIDTH +Specify the maximum available PCI bandwidth of the system in MB/s. This can only +be effective with simgrid simulation. This allows to easily override the +bandwidths stored in the platform file generated from measurements on the native +system. This can thus be used accelerate or slow down the system bandwidth. +
+ +
STARPU_SUBALLOCATOR
+
+\anchor STARPU_SUBALLOCATOR +\addindex __env__STARPU_SUBALLOCATOR +Enable (1) or disable (0) the StarPU suballocator. Default value is to +enable it to amortize the cost of GPU and pinned RAM allocations for small +allocations: StarPU allocate large chunks of memory at a time, and suballocates +the small buffers within them. +
+ +
STARPU_MINIMUM_AVAILABLE_MEM
+
+\anchor STARPU_MINIMUM_AVAILABLE_MEM +\addindex __env__STARPU_MINIMUM_AVAILABLE_MEM +Specify the minimum percentage of memory that should be available in GPUs, i.e. +not used at all by StarPU (or in main memory, when using out of core), below +which a eviction pass is performed. Default value is 0%. +
+ +
STARPU_TARGET_AVAILABLE_MEM
+
+\anchor STARPU_TARGET_AVAILABLE_MEM +\addindex __env__STARPU_TARGET_AVAILABLE_MEM +Specify the target percentage of memory that should be available in GPUs, i.e. +not used at all by StarPU (or in main memory, when using out of core), when +performing a periodic eviction pass. Default value is 0%. +
+ +
STARPU_MINIMUM_CLEAN_BUFFERS
+
+\anchor STARPU_MINIMUM_CLEAN_BUFFERS +\addindex __env__STARPU_MINIMUM_CLEAN_BUFFERS +Specify the minimum percentage of number of buffers that should be clean in GPUs +(or in main memory, when using out of core), i.e. used by StarPU, but for which +a copy is available in memory (or on disk, when using out of core), below which +asynchronous writebacks will be issued. This is thus the amount of buffers +that will be reusable immediately without having to wait for transfers. +The default value is 5%. +
+ +
STARPU_TARGET_CLEAN_BUFFERS
+
+\anchor STARPU_TARGET_CLEAN_BUFFERS +\addindex __env__STARPU_TARGET_CLEAN_BUFFERS +Specify the target percentage of number of buffers that should be clean in +GPUs (or in main memory, when using out of core), i.e. used by StarPU, but for +which a copy is available in memory (or on disk, when using out of core), when +performing an asynchronous writeback pass. The default value is 10%. +
+ +
STARPU_DISK_SWAP
+
+\anchor STARPU_DISK_SWAP +\addindex __env__STARPU_DISK_SWAP +Specify a path where StarPU can push data when the main memory is getting +full. +
+ +
STARPU_DISK_SWAP_BACKEND
+
+\anchor STARPU_DISK_SWAP_BACKEND +\addindex __env__STARPU_DISK_SWAP_BACKEND +Specify the backend to be used by StarPU to push data when the main +memory is getting full. Default value is \c unistd (i.e. using read/write functions), +other values are \c stdio (i.e. using fread/fwrite), \c unistd_o_direct (i.e. using +read/write with O_DIRECT), \c leveldb (i.e. using a leveldb database), and \c hdf5 +(i.e. using HDF5 library). +
+ +
STARPU_DISK_SWAP_SIZE
+
+\anchor STARPU_DISK_SWAP_SIZE +\addindex __env__STARPU_DISK_SWAP_SIZE +Specify the maximum size in MiB to be used by StarPU to push data when the main +memory is getting full. Default value is unlimited. +
+ +
STARPU_LIMIT_MAX_SUBMITTED_TASKS
+
+\anchor STARPU_LIMIT_MAX_SUBMITTED_TASKS +\addindex __env__STARPU_LIMIT_MAX_SUBMITTED_TASKS +Allow users to control the task submission flow by specifying +to StarPU a maximum number of submitted tasks allowed at a given time, i.e. when +this limit is reached task submission becomes blocking until enough tasks have +completed, specified by \ref STARPU_LIMIT_MIN_SUBMITTED_TASKS. +Setting it enables allocation cache buffer reuse in main memory. +See \ref HowToReduceTheMemoryFootprintOfInternalDataStructures. +
+ +
STARPU_LIMIT_MIN_SUBMITTED_TASKS
+
+\anchor STARPU_LIMIT_MIN_SUBMITTED_TASKS +\addindex __env__STARPU_LIMIT_MIN_SUBMITTED_TASKS +Allow users to control the task submission flow by specifying +to StarPU a submitted task threshold to wait before unblocking task submission. This +variable has to be used in conjunction with \ref STARPU_LIMIT_MAX_SUBMITTED_TASKS +which puts the task submission thread to +sleep. Setting it enables allocation cache buffer reuse in main memory. +See \ref HowToReduceTheMemoryFootprintOfInternalDataStructures. +
+ +
STARPU_TRACE_BUFFER_SIZE
+
+\anchor STARPU_TRACE_BUFFER_SIZE +\addindex __env__STARPU_TRACE_BUFFER_SIZE +Set the buffer size for recording trace events in MiB. Setting it to a big +size allows to avoid pauses in the trace while it is recorded on the disk. This +however also consumes memory, of course. Default value is 64. +
+ +
STARPU_GENERATE_TRACE
+
+\anchor STARPU_GENERATE_TRACE +\addindex __env__STARPU_GENERATE_TRACE +When set to 1, indicate that StarPU should automatically +generate a Paje trace when starpu_shutdown() is called. +
+ +
STARPU_GENERATE_TRACE_OPTIONS
+
+\anchor STARPU_GENERATE_TRACE_OPTIONS +\addindex __env__STARPU_GENERATE_TRACE_OPTIONS +When the variable \ref STARPU_GENERATE_TRACE is set to 1 to +generate a Paje trace, this variable can be set to specify options (see +starpu_fxt_tool --help). +
+ +
STARPU_ENABLE_STATS
+
+\anchor STARPU_ENABLE_STATS +\addindex __env__STARPU_ENABLE_STATS +Enable gathering various data statistics (\ref DataStatistics). +
+ +
STARPU_MEMORY_STATS
+
+\anchor STARPU_MEMORY_STATS +\addindex __env__STARPU_MEMORY_STATS +When set to 0, disable the display of memory statistics on data which +have not been unregistered at the end of the execution (\ref MemoryFeedback). +
+ +
STARPU_MAX_MEMORY_USE
+
+\anchor STARPU_MAX_MEMORY_USE +\addindex __env__STARPU_MAX_MEMORY_USE +When set to 1, display at the end of the execution the maximum memory used by +StarPU for internal data structures during execution. +
+ +
STARPU_BUS_STATS
+
+\anchor STARPU_BUS_STATS +\addindex __env__STARPU_BUS_STATS +Enable the display of data transfers statistics when calling +starpu_shutdown() (\ref Profiling). By default, statistics are printed +on the standard error stream, use the environment variable \ref +STARPU_BUS_STATS_FILE to define another filename. +
+ +
STARPU_BUS_STATS_FILE
+
+\anchor STARPU_BUS_STATS_FILE +\addindex __env__STARPU_BUS_STATS_FILE +Define the name of the file where to display data transfers +statistics, see \ref STARPU_BUS_STATS. +
+ +
STARPU_WORKER_STATS
+
+\anchor STARPU_WORKER_STATS +\addindex __env__STARPU_WORKER_STATS +Enable the display of workers statistics when calling +starpu_shutdown() (\ref Profiling). When combined with the +environment variable \ref STARPU_PROFILING, it displays the energy +consumption (\ref Energy-basedScheduling). By default, statistics are +printed on the standard error stream, use the environment variable +\ref STARPU_WORKER_STATS_FILE to define another filename. +
+ +
STARPU_WORKER_STATS_FILE
+
+\anchor STARPU_WORKER_STATS_FILE +\addindex __env__STARPU_WORKER_STATS_FILE +Define the name of the file where to display workers statistics, see +\ref STARPU_WORKER_STATS. +
+ +
STARPU_STATS
+
+\anchor STARPU_STATS +\addindex __env__STARPU_STATS +When set to 0, data statistics will not be displayed at the +end of the execution of an application (\ref DataStatistics). +
+ +
STARPU_WATCHDOG_TIMEOUT
+
+\anchor STARPU_WATCHDOG_TIMEOUT +\addindex __env__STARPU_WATCHDOG_TIMEOUT +When set to a value other than 0, allows to make StarPU print an error +message whenever StarPU does not terminate any task for the given time (in µs), +but lets the application continue normally. Should +be used in combination with \ref STARPU_WATCHDOG_CRASH +(see \ref DetectionStuckConditions). +
+ +
STARPU_WATCHDOG_CRASH
+
+\anchor STARPU_WATCHDOG_CRASH +\addindex __env__STARPU_WATCHDOG_CRASH +When set to a value other than 0, trigger a crash when the watch +dog is reached, thus allowing to catch the situation in gdb, etc +(see \ref DetectionStuckConditions) +
+ +
STARPU_WATCHDOG_DELAY
+
+\anchor STARPU_WATCHDOG_DELAY +\addindex __env__STARPU_WATCHDOG_DELAY +Delay the activation of the watchdog by the given time (in µs). This can +be convenient for letting the application initialize data etc. before starting +to look for idle time. +
+ +
STARPU_TASK_PROGRESS
+
+\anchor STARPU_TASK_PROGRESS +\addindex __env__STARPU_TASK_PROGRESS +Print the progression of tasks. This is convenient to determine whether a +program is making progress in task execution, or is just stuck. +
+ +
STARPU_TASK_BREAK_ON_PUSH
+
+\anchor STARPU_TASK_BREAK_ON_PUSH +\addindex __env__STARPU_TASK_BREAK_ON_PUSH +When this variable contains a job id, StarPU will raise \c SIGTRAP when the task +with that job id is being pushed to the scheduler, which will be nicely caught by debuggers +(see \ref DebuggingScheduling) +
+ +
STARPU_TASK_BREAK_ON_SCHED
+
+\anchor STARPU_TASK_BREAK_ON_SCHED +\addindex __env__STARPU_TASK_BREAK_ON_SCHED +When this variable contains a job id, StarPU will raise \c SIGTRAP when the task +with that job id is being scheduled by the scheduler (at a scheduler-specific +point), which will be nicely caught by debuggers. +This only works for schedulers which have such a scheduling point defined +(see \ref DebuggingScheduling) +
+ +
STARPU_TASK_BREAK_ON_POP
+
+\anchor STARPU_TASK_BREAK_ON_POP +\addindex __env__STARPU_TASK_BREAK_ON_POP +When this variable contains a job id, StarPU will raise \c SIGTRAP when the task +with that job id is being popped from the scheduler, which will be nicely caught by debuggers +(see \ref DebuggingScheduling) +
+ +
STARPU_TASK_BREAK_ON_EXEC
+
+\anchor STARPU_TASK_BREAK_ON_EXEC +\addindex __env__STARPU_TASK_BREAK_ON_EXEC +When this variable contains a job id, StarPU will raise \c SIGTRAP when the task +with that job id is being executed, which will be nicely caught by debuggers +(see \ref DebuggingScheduling) +
+ +
STARPU_DISABLE_KERNELS
+
+\anchor STARPU_DISABLE_KERNELS +\addindex __env__STARPU_DISABLE_KERNELS +When set to a value other than 1, it disables actually calling the kernel +functions, thus allowing to quickly check that the task scheme is working +properly, without performing the actual application-provided computation. +
+ +
STARPU_HISTORY_MAX_ERROR
+
+\anchor STARPU_HISTORY_MAX_ERROR +\addindex __env__STARPU_HISTORY_MAX_ERROR +History-based performance models will drop measurements which are really far +froom the measured average. This specifies the allowed variation. Default value is +50 (%), i.e. the measurement is allowed to be x1.5 faster or /1.5 slower than the +average. +
+ +
STARPU_RAND_SEED
+
+\anchor STARPU_RAND_SEED +\addindex __env__STARPU_RAND_SEED +The random scheduler and some examples use random numbers for their own +working. Depending on the examples, the seed is by default juste always 0 or +the current time() (unless SimGrid mode is enabled, in which case it is always +0). \ref STARPU_RAND_SEED allows to set the seed to a specific value. +
+ +
STARPU_GLOBAL_ARBITER
+
+\anchor STARPU_GLOBAL_ARBITER +\addindex __env__STARPU_GLOBAL_ARBITER +When set to a positive value, StarPU will create a arbiter, which +implements an advanced but centralized management of concurrent data +accesses (see \ref ConcurrentDataAccess). +
+ +
STARPU_USE_NUMA
+
+\anchor STARPU_USE_NUMA +\addindex __env__STARPU_USE_NUMA +When defined to 1, NUMA nodes are taking into account by StarPU, i.e. StarPU will +expose one StarPU memory node per NUMA node, and will thus schedule tasks +according to data locality, migrated data when appropriate, etc. + +::STARPU_MAIN_RAM is then associated to the NUMA node associated to the +first CPU worker if it exists, the NUMA node associated to the first GPU discovered otherwise. +If StarPU doesn't find any NUMA node after these steps, ::STARPU_MAIN_RAM is the first NUMA node +discovered by StarPU. + +Applications should thus rather pass a \c NULL pointer and a -1 memory node to +starpu_data_*_register functions, so that StarPU can manage memory as it +wishes. + +If the application wants to control memory allocation on NUMA nodes for +some data, it can use starpu_malloc_on_node and pass the memory node to the +starpu_data_*_register functions to tell StarPU where the allocation was +made. starpu_memory_nodes_get_count_by_kind() and +starpu_memory_node_get_ids_by_type() can be used to get the memory nodes numbers +of the CPU memory nodes. + +starpu_memory_nodes_numa_id_to_devid() and starpu_memory_nodes_numa_devid_to_id() +are also available to convert between OS NUMA id and StarPU memory node number. + +If this variable is unset, or set to 0, CPU memory is considered as only one +memory node (::STARPU_MAIN_RAM) and it will be up to the OS to manage migration +etc. and the StarPU scheduler will not know about it. +
+ +
STARPU_IDLE_FILE
+
+\anchor STARPU_IDLE_FILE +\addindex __env__STARPU_IDLE_FILE +When defined, a file named after its contents will be created at the +end of the execution. This file will contain the sum of the idle times +of all the workers. +
+ +
STARPU_HWLOC_INPUT
+
+\anchor STARPU_HWLOC_INPUT +\addindex __env__STARPU_HWLOC_INPUT +When defined to the path of an XML file, \c hwloc will use this file +as input instead of detecting the current platform topology, which can +save significant initialization time. + +To produce this XML file, use lstopo file.xml +
+ +
STARPU_CATCH_SIGNALS
+
+\anchor STARPU_CATCH_SIGNALS +\addindex __env__STARPU_CATCH_SIGNALS +By default, StarPU catch signals \c SIGINT, \c SIGSEGV and \c SIGTRAP to +perform final actions such as dumping FxT trace files even though the +application has crashed. Setting this variable to a value other than 1 +will disable this behaviour. This should be done on JVM systems which +may use these signals for their own needs. +The flag can also be set through the field starpu_conf::catch_signals. +
+ +
STARPU_DISPLAY_BINDINGS
+
+\anchor STARPU_DISPLAY_BINDINGS +\addindex __env__STARPU_DISPLAY_BINDINGS +Display the binding of all processes and threads running on the machine. +Setting it to 1 displays the binding masks. Setting it to 2 displays the topology. +If MPI is enabled, display the binding of each node.
+Users can manually display the binding by calling starpu_display_bindings(). +
+
+ +\section ConfiguringTheHypervisor Configuring The Hypervisor + +
+ +
SC_HYPERVISOR_POLICY
+
+\anchor SC_HYPERVISOR_POLICY +\addindex __env__SC_HYPERVISOR_POLICY +Choose between the different resizing policies proposed by StarPU for the hypervisor: +\c idle, \c app_driven, \c feft_lp, \c teft_lp, \c ispeed_lp, \c throughput_lp etc. + +Use SC_HYPERVISOR_POLICY=help to get the list of available policies for the hypervisor +
+ +
SC_HYPERVISOR_TRIGGER_RESIZE
+
+\anchor SC_HYPERVISOR_TRIGGER_RESIZE +\addindex __env__SC_HYPERVISOR_TRIGGER_RESIZE +Choose how should the hypervisor be triggered: speed if the resizing algorithm should +be called whenever the speed of the context does not correspond to an optimal precomputed value, +idle it the resizing algorithm should be called whenever the workers are idle for a period +longer than the value indicated when configuring the hypervisor. +
+ +
SC_HYPERVISOR_START_RESIZE
+
+\anchor SC_HYPERVISOR_START_RESIZE +\addindex __env__SC_HYPERVISOR_START_RESIZE +Indicate the moment when the resizing should be available. The value correspond to the percentage +of the total time of execution of the application. Default value is the resizing frame. +
+ +
SC_HYPERVISOR_MAX_SPEED_GAP
+
+\anchor SC_HYPERVISOR_MAX_SPEED_GAP +\addindex __env__SC_HYPERVISOR_MAX_SPEED_GAP +Indicate the ratio of speed difference between contexts that should trigger the hypervisor. +This situation may occur only when a theoretical speed could not be computed and the hypervisor +has no value to compare the speed to. Otherwise the resizing of a context is not influenced by the +the speed of the other contexts, but only by the the value that a context should have. +
+ +
SC_HYPERVISOR_STOP_PRINT
+
+\anchor SC_HYPERVISOR_STOP_PRINT +\addindex __env__SC_HYPERVISOR_STOP_PRINT +By default the values of the speed of the workers is printed during the execution +of the application. If the value 1 is given to this environment variable this printing +is not done. +
+ +
SC_HYPERVISOR_LAZY_RESIZE
+
+\anchor SC_HYPERVISOR_LAZY_RESIZE +\addindex __env__SC_HYPERVISOR_LAZY_RESIZE +By default the hypervisor resizes the contexts in a lazy way, that is workers are firstly added to a new context +before removing them from the previous one. Once this workers are clearly taken into account +into the new context (a task was popped there) we remove them from the previous one. However if the application +would like that the change in the distribution of workers should change right away this variable should be set to 0 +
+ +
SC_HYPERVISOR_SAMPLE_CRITERIA
+
+\anchor SC_HYPERVISOR_SAMPLE_CRITERIA +\addindex __env__SC_HYPERVISOR_SAMPLE_CRITERIA +By default the hypervisor uses a sample of flops when computing the speed of the contexts and of the workers. +If this variable is set to time the hypervisor uses a sample of time (10% of an approximation of the total +execution time of the application) +
+ +
+ +*/ diff --git a/doc/doxygen/chapters/starpu_installation/installation_intro.doxy b/doc/doxygen/chapters/starpu_installation/installation_intro.doxy new file mode 100644 index 0000000..38e276a --- /dev/null +++ b/doc/doxygen/chapters/starpu_installation/installation_intro.doxy @@ -0,0 +1,32 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/*! \intropage{IntroInstallation, --------- StarPU Installation ---------} + +\webforeword + +This parts shows a basic usage of StarPU and how to execute the provided examples or your own applications. + +
    +
  • Chapter \ref BuildingAndInstallingStarPU shows how to build and install StarPU. +
  • Chapter \ref CompilationConfiguration shows how to tune StarPU building process through configuration options. +
  • Chapter \ref ExecutionConfigurationThroughEnvironmentVariables lists environment variables that can be used to tune StarPU when executing an application. +
+ +Finally, Chapter \ref ConfigurationAndInitialization shows a brief +overview of how to configure and tune StarPU. + +*/ diff --git a/doc/doxygen/chapters/starpu_introduction/doc_organization.doxy b/doc/doxygen/chapters/starpu_introduction/doc_organization.doxy new file mode 100644 index 0000000..1c633c7 --- /dev/null +++ b/doc/doxygen/chapters/starpu_introduction/doc_organization.doxy @@ -0,0 +1,97 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/*! \page DocOrganization Documentation Organization + +The documentation chapters include + +
    +
  • \ref IntroInstallation +
      +
    • \ref BuildingAndInstallingStarPU +
    • \ref ExecutionConfigurationThroughEnvironmentVariables +
    • \ref CompilationConfiguration +
    +
  • \ref IntroBasics +
      +
    • \ref StarPUApplications +
    • \ref BasicExamples +
    • \ref FullSourceCodeVectorScal +
    • \ref TasksInStarPU +
    • \ref DataManagement +
    • \ref Scheduling +
    • \ref ExamplesInStarPUSources +
    +
  • \ref IntroApplications +
      +
    • \ref StencilApplication +
    +
  • \ref IntroPerformances +
      +
    • \ref BenchmarkingStarPU +
    • \ref OnlinePerformanceTools +
    • \ref OfflinePerformanceTools +
    +
  • \ref IntroFAQ +
      +
    • \ref CheckListWhenPerformanceAreNotThere +
    • \ref FrequentlyAskedQuestions +
    +
  • \ref IntroLanguage +
      +
    • \ref NativeFortranSupport +
    • \ref StarPUJavaInterface +
    • \ref PythonInterface +
    • \ref OpenMPRuntimeSupport +
    +
  • \ref IntroExtensions +
      +
    • \ref ConfigurationAndInitialization +
    • \ref AdvancedTasksInStarPU +
    • \ref AdvancedDataManagement +
    • \ref AdvancedScheduling +
    • \ref SchedulingContexts +
    • \ref SchedulingContextHypervisor +
    • \ref HowToDefineANewSchedulingPolicy +
    • \ref CUDASupport +
    • \ref OpenCLSupport +
    • \ref MaxFPGASupport +
    • \ref OutOfCore +
    • \ref MPISupport +
    • \ref TCPIPSupport +
    • \ref Transactions +
    • \ref FaultTolerance +
    • \ref FFTSupport +
    • \ref SOCLOpenclExtensions +
    • \ref HierarchicalDAGS +
    • \ref ParallelWorker +
    • \ref InteroperabilitySupport +
    • \ref SimGridSupport +
    • \ref DebuggingTools +
    • \ref Helpers +
    +
  • Appendices +
      +
    • \ref GNUFreeDocumentationLicense +
    • \ref ModuleDocumentation +
    • \ref FileDocumentation +
    • \ref deprecated +
    +
+ +Make sure to have had a look at those too! + +*/ diff --git a/doc/doxygen/chapters/starpu_introduction/glossary.doxy b/doc/doxygen/chapters/starpu_introduction/glossary.doxy new file mode 100644 index 0000000..ac501aa --- /dev/null +++ b/doc/doxygen/chapters/starpu_introduction/glossary.doxy @@ -0,0 +1,65 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/*! \page Glossary Glossary + +A \b codelet stores pointers to different implementations of the same +theoretical function. + +A memory node can be either the main RAM, GPU-embedded memory or disk memory. + +A \b bus represents a connection between memory nodes. + +A data handle keeps track of multiple copies of the same data (\b registered by the +application) across various memory nodes. The data management library ensures coherency among these copies. + +The \b home memory node of a data handle is the memory node where the data +was originally registered (typically the main memory node). + +A \b task represents a scheduled execution of a codelet on specific data handles. + +A \b tag is a rendez-vous point. Tasks generally have their own tag and can +depend on other tags. The value of a tag is chosen by the application. + +A \b worker execute tasks. Typically, there is one worker per CPU +computation core and one per accelerator (with a dedicated whole CPU +core). + +A \b driver oversees a given type of worker. Currently, there are CPU, +CUDA, and OpenCL drivers. + +A performance model is a (dynamic or static) model of the performance of a +given codelet. Codelets can have performance model for execution time +as well as energy consumption. + +A data \b interface describes the layout of the data: for a vector, it +includes a pointer for the start, the number of elements and the size +of elements ; for a matrix, it involves a pointer for the start, the number of elements per row, the offset between rows, +and the size of each element ; etc. Codelet functions receive interfaces for the local memory node copies of data handles assigned to the scheduled task, to access their data. + +Data \b partitioning means dividing the data of a specific data handle +(referred to as the \b father) into several \b children data handles, +each representing distinct segments of the original data. + +A \b filter is the function responsible for deriving child data handles from a father +data handle, thus defining how the partitioning should be done (e.g. +horizontal, vertical, etc.) + +\b Acquiring a data handle can be done from the main application, +allowing secure access to the data of a data handle from its home node +without needing to unregister it. + +*/ diff --git a/doc/doxygen/chapters/starpu_introduction/introduction_intro.doxy b/doc/doxygen/chapters/starpu_introduction/introduction_intro.doxy new file mode 100644 index 0000000..609025f --- /dev/null +++ b/doc/doxygen/chapters/starpu_introduction/introduction_intro.doxy @@ -0,0 +1,168 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/*! \mainpage Introduction + +\foreword +\webforeword + + +// we need to keep 2 blank lines above +\section Motivation Motivation + +// This is a comment and it will be removed before the file is processed by doxygen +// complex machines with heterogeneous cores/devices + +The use of specialized hardware, such as accelerators or coprocessors offers an +interesting approach to overcoming the physical limits encountered by processor +architects. As a result, many machines are now equipped with one or several +accelerators (e.g. a GPU), in addition to the usual processor(s). While significant efforts have been devoted to offloading computation onto such accelerators, very +little attention has been paid to portability concerns on the one hand, and to the +possibility of having heterogeneous accelerators and processors interact on the other hand. + +StarPU is a runtime system that provides support for heterogeneous multicore +architectures. It not only offers a unified view of the computational resources +(i.e. CPUs and accelerators simultaneously) but also takes care of +efficiently mapping and executing tasks onto an heterogeneous machine while +transparently handling low-level issues such as data transfers in a portable +manner. + +// this leads to a complicated distributed memory design +// which is not (easily) manageable by hand +// added value/benefits of StarPU +// - portability +// - scheduling, perf. portability + +\section StarPUInANutshell StarPU in a Nutshell + +StarPU is a software tool designed to enable programmers to harness the +computational capabilities of both CPUs and GPUs, all while sparing them +the need to meticulously adapt their programs for specific target machines +and processing units. + +At the heart of StarPU lies its runtime support library, which takes +charge of scheduling tasks supplied by applications on heterogeneous +CPU/GPU systems. Furthermore, StarPU provides programming language +support through an OpenCL front-end (\ref SOCLOpenclExtensions). + +StarPU's runtime mechanism and programming language extensions are +built around a task-based programming model. In this modell, +applications submit computational tasks, with CPU and/or GPU +implementations. StarPU effectively schedules these tasks and manages +the associated data transfers across available CPUs and GPUs. +The data that a task operates on are automatically exchanged between +accelerators and the main memory, thereby sparing programmers the +intricacies of scheduling and the technical details tied to these +transfers. + +StarPU excels in its adaptness at efficiently scheduling tasks using +established algorithms from the literature (\ref TaskSchedulingPolicy). +Furthermore addition, it provides the flexibility for scheduling +experts, such as compiler or computational library developers, to +implement custom scheduling policies in a manner that is easily +portable (\ref HowToDefineANewSchedulingPolicy). + +The remainder of this section describes the main concepts used in StarPU. + +A video, lasting 26 minutes, accessible on the StarPU website +(https://starpu.gitlabpages.inria.fr/) presents these concepts. + +Additionally, a serie of tutorials can be found at +https://starpu.gitlabpages.inria.fr/tutorials/ + +One of the tutorials is available within a docker image +https://starpu.gitlabpages.inria.fr/tutorials/docker/ + +// explain the notion of codelet and task (i.e. g(A, B) + +\subsection CodeletAndTasks Codelet and Tasks + +One of StarPU's key data structures is the \b codelet. A codelet defines a +computational kernel that can potentially be implemented across +various architectures, including CPUs, CUDA devices, or OpenCL devices. + +// TODO insert illustration f: f_spu, f_cpu, ... + +Another pivotal data structure is the \b task. Executing a StarPU task +involves applying a codelet to a data set, utilizing one of the +architectures on which the codelet is implemented. Therefore, a task +describes the codelet that it +uses, the data accessed, and how they are +accessed during the computation (read and/or write). +StarPU tasks are asynchronous, meaning that submitting a task to +StarPU is a non-blocking +operation. The task structure can also specify a \b callback function, +which is called once StarPU succesfully completes the task. +Additionally, it contains optional +fields that the application may use to provide hints to the scheduler, +such as priority levels. + +By default, task dependencies are inferred from data dependency (sequential +coherency) within StarPU. However, the application has the ability to +disable sequential coherency for specific data, and dependencies can +also be specifically defined. +A task can be uniquely identified by a 64-bit number, chosen by the +application, referred to as a \b tag. +Task dependencies can be enforced through callback functions, by +submitting other tasks, or by specifying dependencies +between tags (which can correspond to tasks that have yet to be +submitted). + +// TODO insert illustration f(Ar, Brw, Cr) + .. +// DSM + +\subsection StarPUDataManagementLibrary StarPU Data Management Library + +As StarPU dynamically schedules tasks at runtime, the need for data +transfers is automatically managed in a``just-in-time'' manner between +different processing units, +This automated approach alleviates the burden on application +programmers to explicitly handle data transfers. +Furthemore, to minimize needless transfers, StarPU retains data at the +location of its last use, even if modifications were made there. +Additionally, StarPU allows multiple instances of the same data to +coexist across various processing units simultaneously, as long as the +data remains unaltered. + +\section ApplicationTaskification Application Taskification + +We will explain here shortly the concept of "taskifying" an application. + +Before transitioning to StarPU, you must transform your application as follows: +
    +
  • Refactor functions into "pure" functions that exclusively utilize data from their parameters. +
  • +
  • Create a central main function responsible for calling these pure functions. +
  • +
+ +Once this restructuring is complete, integrating StarPU or any similar +task-based library becomes straightforward. You merely replace +function calls with task submissions, leveraging the library's +capabilities. + +Chapter \ref StencilApplication shows how to easily convert an +existing application to use StarPU. + +\section ResearchPapers Research Papers + +Research papers about StarPU can be found at +https://starpu.gitlabpages.inria.fr/publications/. + +A good overview is available in the research report at +http://hal.archives-ouvertes.fr/inria-00467677. + +*/ diff --git a/doc/doxygen/chapters/starpu_languages/code/java_spark.java b/doc/doxygen/chapters/starpu_languages/code/java_spark.java new file mode 100644 index 0000000..00ca5ba --- /dev/null +++ b/doc/doxygen/chapters/starpu_languages/code/java_spark.java @@ -0,0 +1,61 @@ +// StarPU --- Runtime system for heterogeneous multicore architectures. +// +// Copyright (C) 2022-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +// +// StarPU is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation; either version 2.1 of the License, or (at +// your option) any later version. +// +// StarPU is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +// +// See the GNU Lesser General Public License in COPYING.LGPL for more details. +// +//! [To be included. You should update doxygen if you see this text.] +package fr.labri.hpccloud.starpu.examples; + +import fr.labri.hpccloud.starpu.StarPU; +import fr.labri.hpccloud.starpu.data.DataPairSet; +import fr.labri.hpccloud.starpu.data.DataSet; +import fr.labri.hpccloud.starpu.data.Tuple2; + +import java.io.File; +import java.io.FileInputStream; +import java.io.InputStream; +import java.util.Arrays; +import java.util.regex.Pattern; + +public class WordCount +{ + static InputStream openFile(String filename) throws Exception + { + return WordCount.class.getResourceAsStream(filename); + } + + private static final Pattern SPACE = Pattern.compile(" "); + + public static void main(String[] args ) throws Exception + { + InputStream input = new FileInputStream(args[0]); + StarPU.init(); + compute(input); + input.close(); + StarPU.shutdown(); + } + + private static void compute(InputStream input) throws Exception + { + DataSet lines = DataSet.readFile (input, s->s).splitByBlocks(10); + DataSet words = lines.flatMap(s -> Arrays.asList(SPACE.split(s)).iterator()).splitByBlocks(10); + DataPairSet ones = (DataPairSet)words.mapToPair(w-> new Tuple2<>(w,1)); + DataPairSet counts = ones.reduceByKey((c1,c2)-> c1 + c2); + + for(Tuple2 p : counts.collect()) + { + System.out.println("("+p._1()+","+p._2()+")"); + } + } +} +//! [To be included. You should update doxygen if you see this text.] diff --git a/doc/doxygen/chapters/starpu_languages/code/java_starpu.java b/doc/doxygen/chapters/starpu_languages/code/java_starpu.java new file mode 100644 index 0000000..ebe9d4b --- /dev/null +++ b/doc/doxygen/chapters/starpu_languages/code/java_starpu.java @@ -0,0 +1,89 @@ +// StarPU --- Runtime system for heterogeneous multicore architectures. +// +// Copyright (C) 2022-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +// +// StarPU is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation; either version 2.1 of the License, or (at +// your option) any later version. +// +// StarPU is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +// +// See the GNU Lesser General Public License in COPYING.LGPL for more details. +// +//! [To be included. You should update doxygen if you see this text.] +package fr.labri.hpccloud.starpu.examples; + +import fr.labri.hpccloud.starpu.Codelet; +import fr.labri.hpccloud.starpu.StarPU; +import fr.labri.hpccloud.starpu.data.DataHandle; +import fr.labri.hpccloud.starpu.data.IntegerVariableHandle; +import fr.labri.hpccloud.starpu.data.VectorHandle; + +import java.util.Random; + +import static fr.labri.hpccloud.starpu.data.DataHandle.AccessMode.*; + +public class VectorScal +{ + public static final int NX = 10; + public static final Float factor = 3.14f; + + static final Codelet scal = new Codelet() + { + @Override + public void run(DataHandle[] buffers) + { + VectorHandle array = (VectorHandle)buffers[0]; + int n = array.getSize(); + System.out.println(String.format("scaling array %s with %d elements", array, n)); + for (int i = 0; i < n; i++) + { + array.setValueAt(i, factor * array.getValueAt(i)); + } + } + + @Override + public DataHandle.AccessMode[] getAccessModes() + { + return new DataHandle.AccessMode[] + { + STARPU_RW + }; + } + }; + + public static void main(String[] args) throws Exception + { + int nx = (args.length == 0) ? NX : Integer.valueOf(args[0]); + compute(nx); + } + + public static void compute(int nx) throws Exception + { + StarPU.init(); + System.out.println(String.format("VECTOR[#nx=%d]", nx)); + VectorHandle arrayHandle = VectorHandle.register(nx); + System.out.println(String.format("scaling array %s", arrayHandle)); + + for(int i=0 ; i +
  • You can learn to natively access most of StarPU functionalities from Fortran 2008+ codes with some explanations and examples in Chapter \ref NativeFortranSupport. +
  • You can find out how to execute Java applications with some important StarPU APIs in Chapter \ref StarPUJavaInterface. +
  • Python interface supports most of the main StarPU functionalities, and new functions especially adapted to Python have been added as well. There are detailed explanations and examples in Chapter \ref PythonInterface. +
  • You can learn how to execute OpenMP tasks with some specific functions in Chapter \ref OpenMPRuntimeSupport. + + + +*/ diff --git a/doc/doxygen/chapters/starpu_languages/native_fortran_support.doxy b/doc/doxygen/chapters/starpu_languages/native_fortran_support.doxy new file mode 100644 index 0000000..44afff4 --- /dev/null +++ b/doc/doxygen/chapters/starpu_languages/native_fortran_support.doxy @@ -0,0 +1,255 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2014-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/*! \page NativeFortranSupport The StarPU Native Fortran Support + +StarPU provides the necessary routines and support to natively access +most of its functionalities from Fortran 2008+ codes. + +All symbols (functions, constants) are defined in fstarpu_mod.f90. +Every symbol of the Native Fortran support API is prefixed by +fstarpu_. + +Note: Mixing uses of fstarpu_ and starpu_ +symbols in the same Fortran code has unspecified behavior. +See \ref NFAPIMIX for a discussion about valid and unspecified +combinations. + +\section NFImplementation Implementation Details and Specificities + +\subsection NFPrerequisites Prerequisites + +The Native Fortran support relies on Fortran 2008 specific constructs, +as well as on the support for interoperability of assumed-shape arrays +introduced as part of Fortran's Technical Specification ISO/IEC TS 29113:2012, +for which no equivalent are available in previous versions of the +standard. It has currently been tested successfully with GNU GFortran 4.9, +GFortran 5.x, GFortran 6.x and the Intel Fortran Compiler >= 2016. It is known +not to work with GNU GFortran < 4.9, Intel Fortran Compiler < 2016. + +See Section \ref NFOldFortran for information on how to write StarPU +Fortran code with older compilers. + +\subsection NFConfiguration Configuration + +The Native Fortran API is enabled and its companion +fstarpu_mod.f90 Fortran module source file is installed +by default when a Fortran compiler is found, unless the detected Fortran +compiler is known not to support the requirements for the Native Fortran +API. The support can be disabled through the \c configure option \ref +disable-fortran "--disable-fortran". Conditional compiled source codes +may check for the availability of the Native Fortran Support by testing +whether the preprocessor macro STARPU_HAVE_FC is defined or not. + +\subsection NFExamples Examples + +Several examples using the Native Fortran API are provided in +StarPU's examples/native_fortran/ examples directory, to showcase +the Fortran flavor of various basic and more advanced StarPU features. + +\subsection NFAppCompile Compiling a Native Fortran Application + +The Fortran module fstarpu_mod.f90 installed in StarPU's +include/ directory provides all the necessary API definitions. It +must be compiled with the same compiler (same vendor, same version) as +the application itself, and the resulting fstarpu_mod.o object +file must be linked with the application executable. + +Each example provided in StarPU's examples/native_fortran/ +examples directory comes with its own dedicated Makefile for out-of-tree +build. Such example Makefiles may be used as starting points for +building application codes with StarPU. + +\section NFIdioms Fortran Translation for Common StarPU API Idioms + +All these examples assume that the standard Fortran module iso_c_binding +is in use. + +- Specifying a NULL pointer +\code{.f90} + type(c_ptr) :: my_ptr ! variable to store the pointer + ! [...] + my_ptr = C_NULL_PTR ! assign standard constant for NULL ptr +\endcode +- Obtaining a pointer to some object: +\code{.f90} + real(8), dimension(:), allocatable, target :: va + type(c_ptr) :: p_va ! variable to store a pointer to array va + ! [...] + p_va = c_loc(va) +\endcode +- Obtaining a pointer to some subroutine: +\code{.f90} + ! pointed routine definition + recursive subroutine myfunc () bind(C) + ! [...] + type(c_funptr) :: p_fun ! variable to store the routine pointer + ! [...] + p_fun = c_funloc(my_func) +\endcode +- Obtaining the size of some object: +\code{.f90} + real(8) :: a + integer(c_size_t) :: sz_a ! variable to store the size of a + ! [...] + sz_a = c_sizeof(a) +\endcode +- Obtaining the length of an array dimension: +\code{.f90} + real(8), dimension(:,:), allocatable, target :: vb + integer(c_int) :: ln_vb_1 ! variable to store the length of vb's dimension 1 + integer(c_int) :: ln_vb_2 ! variable to store the length of vb's dimension 2 + ! [...] + ln_vb_1 = 1+ubound(vb,1)-lbound(vb,1) ! get length of dimension 1 of vb + ln_vb_2 = 1+ubound(vb,2)-lbound(vb,2) ! get length of dimension 2 of vb +\endcode +- Specifying a string constant: +\code{.f90} + type(c_ptr) :: my_cl ! a StarPU codelet + ! [...] + + ! set the name of a codelet to string 'my_codele't: + call fstarpu_codelet_set_name(my_cl, C_CHAR_"my_codelet"//C_NULL_CHAR) + + ! note: using the C_CHAR_ prefix and the //C_NULL_CHAR concatenation at the end ensures + ! that the string constant is properly '\0' terminated, and compatible with StarPU's + ! internal C routines + ! + ! note: plain Fortran string constants are not '\0' terminated, and as such, must not be + ! passed to StarPU routines. +\endcode + +- Combining multiple flag constants with a bitwise 'or': +\code{.f90} + type(c_ptr) :: my_cl ! a pointer for the codelet structure + ! [...] + + ! add a managed buffer to a codelet, specifying both the Read/Write access mode and the Locality hint + call fstarpu_codelet_add_buffer(my_cl, FSTARPU_RW.ior.FSTARPU_LOCALITY) +\endcode + +A basic example is available in examples/native_fortran/nf_vector_scal.f90. + +\section NFInitExit Uses, Initialization and Shutdown + +The snippet below show an example of minimal StarPU code using the +Native Fortran support. The program should use the standard +module iso_c_binding as well as StarPU's fstarpu_mod. The +StarPU runtime engine is initialized with a call to function +fstarpu_init, which returns an integer status of 0 if successful +or non-0 otherwise. Eventually, a call to fstarpu_shutdown ends +the runtime engine and frees all internal StarPU data structures. + +\snippet nf_initexit.f90 To be included. You should update doxygen if you see this text. + +\section NFInsertTask Fortran Flavor of StarPU's Variadic Insert_task + +Fortran does not have a construction similar to C variadic functions, on which +starpu_task_insert() relies at the time of this writing. However, Fortran's variable +length arrays of c_ptr elements enable to emulate much of the +convenience of C's variadic functions. This is the approach retained for +implementing fstarpu_task_insert. + +The general syntax for using fstarpu_task_insert is as follows: +\code{.f90} + call fstarpu_task_insert((/ & + [, , ]* & + [, , ]* & + , C_NULL_PTR /)) +\endcode + +There is thus a unique array argument (/ ... /) passed to +fstarpu_task_insert which itself contains the task settings. +Each element of the array must be of type type(c_ptr). +The last element of the array must be C_NULL_PTR. + +Example extracted from nf_vector.f90: +\code{.f90} + call fstarpu_task_insert((/ cl_vec, & ! codelet + FSTARPU_R, dh_va, & ! a first data handle + FSTARPU_RW.ior.FSTARPU_LOCALITY, dh_vb, & ! a second data handle + C_NULL_PTR /)) ! no more args +\endcode + +The full example is available in examples/native_fortran/nf_vector.f90. + +\section NFStructs Functions and Subroutines Expecting Data Structures Arguments + +Several StarPU structures that are expected to be passed to the C API, +are replaced by function/subroutine wrapper sets to allocate, set fields +and free such structure. This strategy has been preferred over defining +native Fortran equivalent of such structures using Fortran's derived +types, to avoid potential layout mismatch between C and Fortran StarPU +data structures. Examples of such data structures wrappers include +fstarpu_conf_allocate and alike, fstarpu_codelet_allocate +and alike, fstarpu_data_filter_allocate and alike. + +Here is an example of allocating, filling and deallocating a codelet +structure: +\code{.f90} + ! a pointer for the codelet structure + type(c_ptr) :: cl_vec + ! [...] + ! allocate an empty codelet structure + cl_vec = fstarpu_codelet_allocate() + ! add a CPU implementation function to the codelet + call fstarpu_codelet_add_cpu_func(cl_vec, C_FUNLOC(cl_cpu_func_vec)) + ! add a CUDA implementation function to the codelet + call fstarpu_codelet_add_cuda_func(cl_vec, C_FUNLOC(cl_cuda_func_vec)) + ! set the codelet name + call fstarpu_codelet_set_name(cl_vec, C_CHAR_"my_vec_codelet"//C_NULL_CHAR) + ! add a Read-only mode data buffer to the codelet + call fstarpu_codelet_add_buffer(cl_vec, FSTARPU_R) + ! add a Read-Write mode data buffer to the codelet + call fstarpu_codelet_add_buffer(cl_vec, FSTARPU_RW.ior.FSTARPU_LOCALITY) + ! [...] + ! free codelet structure + call fstarpu_codelet_free(cl_vec) +\endcode + +The full example is available in examples/native_fortran/nf_vector.f90. + +\section NFNotes Additional Notes about the Native Fortran Support +\subsection NFOldFortran Using StarPU with Older Fortran Compilers + +When using older compilers, Fortran applications may still interoperate +with StarPU using C marshalling functions as examplified in StarPU's +examples/fortran/ and examples/fortran90/ example +directories, though the process will be less convenient. + +Basically, the main FORTRAN code calls some C wrapper functions to +submit tasks to StarPU. Then, when StarPU starts a task, another C +wrapper function calls the FORTRAN routine for the task. + +Note that this marshalled FORTRAN support remains available even +when specifying \c configure option \ref disable-fortran "--disable-fortran" +(which only disables StarPU's native Fortran layer). + +\subsection NFAPIMIX Valid API Mixes and Language Mixes + +Mixing uses of +fstarpu_ and starpu_ symbols in the same +Fortran code has unspecified behavior. Using fstarpu_ +symbols in C code has unspecified behavior. + +For multi-language applications using both C and Fortran source files: + +- C source files must use starpu_ symbols exclusively +- Fortran sources must uniformly use either fstarpu_ symbols +exclusively, or starpu_ symbols exclusively. Every other +combination has unspecified behavior. + +*/ diff --git a/doc/doxygen/chapters/starpu_languages/openmp_runtime_support.doxy b/doc/doxygen/chapters/starpu_languages/openmp_runtime_support.doxy new file mode 100644 index 0000000..3d38fd0 --- /dev/null +++ b/doc/doxygen/chapters/starpu_languages/openmp_runtime_support.doxy @@ -0,0 +1,554 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2014-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/*! \page OpenMPRuntimeSupport The StarPU OpenMP Runtime Support (SORS) + +StarPU provides the necessary routines and support to implement an OpenMP +(http://www.openmp.org/) runtime compliant with the +revision 3.1 of the language specification, and compliant with the +task-related data dependency functionalities introduced in the revision +4.0 of the language. This StarPU OpenMP Runtime Support (SORS) has been +designed to be targeted by OpenMP compilers such as the Klang-OMP +compiler. Most supported OpenMP directives can both be implemented +inline or as outlined functions. + +All functions are defined in \ref API_OpenMP_Runtime_Support. + +Several examples supporting OpenMP API are provided in +StarPU's tests/openmp/ directory. + +\section OMPImplementation Implementation Details and Specificities + +\subsection OMPMainThread Main Thread + +When using SORS, the main thread gets involved in executing OpenMP tasks +just like every other threads, in order to be compliant with the +specification execution model. This contrasts with StarPU's usual +execution model, where the main thread submit tasks but does not take +part in executing them. + +\subsection OMPTaskSemantics Extended Task Semantics + +The semantics of tasks generated by SORS are extended with respect +to regular StarPU tasks in that SORS' tasks may block and be preempted +by SORS call, whereas regular StarPU tasks cannot. SORS tasks may +coexist with regular StarPU tasks. However, only the tasks created using +SORS API functions inherit from extended semantics. + +\section OMPConfiguration Configuration + +SORS can be compiled into libstarpu through +the \c configure option \ref enable-openmp "--enable-openmp". +Conditional compiled source codes may check for the +availability of the OpenMP Runtime Support by testing whether the C +preprocessor macro STARPU_OPENMP is defined or not. + +\section OMPInitExit Initialization and Shutdown + +SORS needs to be executed/terminated by the +starpu_omp_init() / starpu_omp_shutdown() instead of +starpu_init() / starpu_shutdown(). This requirement is necessary to make +sure that the main thread gets the proper execution environment to run +OpenMP tasks. These calls will usually be performed by a compiler +runtime. Thus, they can be executed from a constructor/destructor such +as this: + +\code{.c} +__attribute__((constructor)) +static void omp_constructor(void) +{ + int ret = starpu_omp_init(); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init"); +} + +__attribute__((destructor)) +static void omp_destructor(void) +{ + starpu_omp_shutdown(); +} +\endcode + +Basic examples are available in the files tests/openmp/init_exit_01.c and tests/openmp/init_exit_02.c. + +\sa starpu_omp_init() +\sa starpu_omp_shutdown() + +\section OMPSharing Parallel Regions and Worksharing + +SORS provides functions to create OpenMP parallel regions, as well as +mapping work on participating workers. The current implementation does +not provide nested active parallel regions: Parallel regions may be +created recursively, however only the first level parallel region may +have more than one worker. From an internal point-of-view, SORS' +parallel regions are implemented as a set of implicit, extended semantics +StarPU tasks, following the execution model of the OpenMP specification. +Thus, SORS' parallel region tasks may block and be preempted, by +SORS calls, enabling constructs such as barriers. + +\subsection OMPParallel Parallel Regions + +Parallel regions can be created with the function +starpu_omp_parallel_region() which accepts a set of attributes as +parameter. The execution of the calling task is suspended until the +parallel region completes. The field starpu_omp_parallel_region_attr::cl +is a regular StarPU codelet. However, only CPU codelets are +supported for parallel regions. +Here is an example of use: + +\code{.c} +void parallel_region_f(void *buffers[], void *args) +{ + (void) buffers; + (void) args; + pthread_t tid = pthread_self(); + int worker_id = starpu_worker_get_id(); + printf("[tid %p] task thread = %d\n", (void *)tid, worker_id); +} + +void f(void) +{ + struct starpu_omp_parallel_region_attr attr; + memset(&attr, 0, sizeof(attr)); + attr.cl.cpu_funcs[0] = parallel_region_f; + attr.cl.where = STARPU_CPU; + attr.if_clause = 1; + starpu_omp_parallel_region(&attr); + return 0; +} +\endcode + +A basic example is available in the file tests/openmp/parallel_01.c. + +\sa struct starpu_omp_parallel_region_attr +\sa starpu_omp_parallel_region() + +\subsection OMPFor Parallel For + +OpenMP for loops are provided by the starpu_omp_for() group of +functions. Variants are available for inline or outlined +implementations. SORS supports static, dynamic, and +guided loop scheduling clauses. The auto scheduling clause +is implemented as static. The runtime scheduling clause +honors the scheduling mode selected through the environment variable +\c OMP_SCHEDULE or the starpu_omp_set_schedule() function. For loops with +the ordered clause are also supported. An implicit barrier can be +enforced or skipped at the end of the worksharing construct, according +to the value of the nowait parameter. + +The canonical family of starpu_omp_for() functions provide each instance +with the first iteration number and the number of iterations (possibly +zero) to perform. The alternate family of starpu_omp_for_alt() functions +provide each instance with the (possibly empty) range of iterations to +perform, including the first and excluding the last. An example is available in the file tests/openmp/parallel_for_01.c. + +The family of starpu_omp_ordered() functions enable to implement +OpenMP's ordered construct, a region with a parallel for loop that is +guaranteed to be executed in the sequential order of the loop +iterations. An example is available in the file tests/openmp/parallel_for_ordered_01.c. + +\code{.c} +void for_g(unsigned long long i, unsigned long long nb_i, void *arg) +{ + (void) arg; + for (; nb_i > 0; i++, nb_i--) + { + array[i] = 1; + } +} + +void parallel_region_f(void *buffers[], void *args) +{ + (void) buffers; + (void) args; + starpu_omp_for(for_g, NULL, NB_ITERS, CHUNK, starpu_omp_sched_static, 0, 0); +} +\endcode + +\sa starpu_omp_for() +\sa starpu_omp_for_inline_first() +\sa starpu_omp_for_inline_next() +\sa starpu_omp_for_alt() +\sa starpu_omp_for_inline_first_alt() +\sa starpu_omp_for_inline_next_alt() +\sa starpu_omp_ordered() +\sa starpu_omp_ordered_inline_begin() +\sa starpu_omp_ordered_inline_end() + + +\subsection OMPSections Sections +OpenMP sections worksharing constructs are supported using the +set of starpu_omp_sections() variants. The general principle is either +to provide an array of per-section functions or a single function that +will redirect the execution to the suitable per-section functions. An +implicit barrier can be enforced or skipped at the end of the +worksharing construct, according to the value of the nowait +parameter. + +\code{.c} +void parallel_region_f(void *buffers[], void *args) +{ + (void) buffers; + (void) args; + + section_funcs[0] = f; + section_funcs[1] = g; + section_funcs[2] = h; + section_funcs[3] = i; + + section_args[0] = arg_f; + section_args[1] = arg_g; + section_args[2] = arg_h; + section_args[3] = arg_i; + + starpu_omp_sections(4, section_f, section_args, 0); +} +\endcode + +An example is available in the file tests/openmp/parallel_sections_01.c. + +\sa starpu_omp_sections() +\sa starpu_omp_sections_combined() + +\subsection OMPSingle Single +OpenMP single workharing constructs are supported using the set +of starpu_omp_single() variants. An +implicit barrier can be enforced or skipped at the end of the +worksharing construct, according to the value of the nowait +parameter. An example is available in the file tests/openmp/parallel_single_nowait_01.c. + +\code{.c} +void single_f(void *arg) +{ + (void) arg; + pthread_t tid = pthread_self(); + int worker_id = starpu_worker_get_id(); + printf("[tid %p] task thread = %d -- single\n", (void *)tid, worker_id); +} + +void parallel_region_f(void *buffers[], void *args) +{ + (void) buffers; + (void) args; + starpu_omp_single(single_f, NULL, 0); +} +\endcode + +SORS also provides dedicated support for single sections +with copyprivate clauses through the +starpu_omp_single_copyprivate() function variants. The OpenMP +master directive is supported as well, using the +starpu_omp_master() function variants. An example is available in the file tests/openmp/parallel_single_copyprivate_01.c. + +\sa starpu_omp_master() +\sa starpu_omp_master_inline() +\sa starpu_omp_single() +\sa starpu_omp_single_inline() +\sa starpu_omp_single_copyprivate() +\sa starpu_omp_single_copyprivate_inline_begin() +\sa starpu_omp_single_copyprivate_inline_end() + +\section OMPTask Tasks + +SORS implements the necessary support of OpenMP 3.1 and OpenMP 4.0's +so-called explicit tasks, together with OpenMP 4.0's data dependency +management. + +\subsection OMPTaskExplicit Explicit Tasks +Explicit OpenMP tasks are created with SORS using the +starpu_omp_task_region() function. The implementation supports +if, final, untied and mergeable clauses +as defined in the OpenMP specification. Unless specified otherwise by +the appropriate clause(s), the created task may be executed by any +participating worker of the current parallel region. + +The current SORS implementation requires explicit tasks to be created +within the context of an active parallel region. In particular, an +explicit task cannot be created by the main thread outside a parallel +region. Explicit OpenMP tasks created using starpu_omp_task_region() are +implemented as StarPU tasks with extended semantics, and may as such be +blocked and preempted by SORS routines. + +The current SORS implementation supports recursive explicit tasks +creation, to ensure compliance with the OpenMP specification. However, +it should be noted that StarPU is not designed nor optimized for +efficiently scheduling of recursive task applications. + +The code below shows how to create 4 explicit tasks within a parallel +region. + +\code{.c} +void task_region_g(void *buffers[], void *args) +{ + (void) buffers; + (void) args; + pthread tid = pthread_self(); + int worker_id = starpu_worker_get_id(); + printf("[tid %p] task thread = %d: explicit task \"g\"\n", (void *)tid, worker_id); +} + +void parallel_region_f(void *buffers[], void *args) +{ + (void) buffers; + (void) args; + struct starpu_omp_task_region_attr attr; + + memset(&attr, 0, sizeof(attr)); + attr.cl.cpu_funcs[0] = task_region_g; + attr.cl.where = STARPU_CPU; + attr.if_clause = 1; + attr.final_clause = 0; + attr.untied_clause = 1; + attr.mergeable_clause = 0; + starpu_omp_task_region(&attr); + starpu_omp_task_region(&attr); + starpu_omp_task_region(&attr); + starpu_omp_task_region(&attr); +} +\endcode + +An example is available in the file tests/openmp/parallel_01.c. + +\sa struct starpu_omp_task_region_attr +\sa starpu_omp_task_region() + +\subsection OMPDataDependencies Data Dependencies +SORS implements inter-tasks data dependencies as specified in OpenMP +4.0. Data dependencies are expressed using regular StarPU data handles +(\ref starpu_data_handle_t) plugged into the task's attr.cl +codelet. The family of starpu_vector_data_register() -like functions, +the starpu_omp_handle_register() and starpu_omp_handle_unregister() functions, +and the starpu_omp_data_lookup() function may be used to register a memory area and +to retrieve the current data handle associated with a pointer +respectively. The testcase ./tests/openmp/task_02.c gives a +detailed example of using OpenMP 4.0 tasks dependencies with SORS +implementation. + +Note: the OpenMP 4.0 specification only supports data dependencies +between sibling tasks, that are tasks created by the same implicit or +explicit parent task. The current SORS implementation also only supports data +dependencies between sibling tasks. Consequently, the behavior is +unspecified if dependencies are expressed between tasks that have not +been created by the same parent task. + +\subsection OMPTaskSyncs TaskWait and TaskGroup +SORS implements both the taskwait and taskgroup OpenMP +task synchronization constructs specified in OpenMP 4.0, with the +starpu_omp_taskwait() and starpu_omp_taskgroup() functions, respectively. + +An example of starpu_omp_taskwait() use, creating two explicit tasks and +waiting for their completion: + +\code{.c} +void task_region_g(void *buffers[], void *args) +{ + (void) buffers; + (void) args; + printf("Hello, World!\n"); +} + +void parallel_region_f(void *buffers[], void *args) +{ + (void) buffers; + (void) args; + struct starpu_omp_task_region_attr attr; + memset(&attr, 0, sizeof(attr)); + attr.cl.cpu_funcs[0] = task_region_g; + attr.cl.where = STARPU_CPU; + attr.if_clause = 1; + attr.final_clause = 0; + attr.untied_clause = 1; + attr.mergeable_clause = 0; + starpu_omp_task_region(&attr); + starpu_omp_task_region(&attr); + starpu_omp_taskwait(); +\endcode + +An example is available in the file tests/openmp/taskwait_01.c. + +An example of starpu_omp_taskgroup() use, creating a task group of two explicit tasks: + +\code{.c} +void task_region_g(void *buffers[], void *args) +{ + (void) buffers; + (void) args; + printf("Hello, World!\n"); +} + +void taskgroup_f(void *arg) +{ + (void)arg; + struct starpu_omp_task_region_attr attr; + memset(&attr, 0, sizeof(attr)); + attr.cl.cpu_funcs[0] = task_region_g; + attr.cl.where = STARPU_CPU; + attr.if_clause = 1; + attr.final_clause = 0; + attr.untied_clause = 1; + attr.mergeable_clause = 0; + starpu_omp_task_region(&attr); + starpu_omp_task_region(&attr); +} + +void parallel_region_f(void *buffers[], void *args) +{ + (void) buffers; + (void) args; + starpu_omp_taskgroup(taskgroup_f, (void *)NULL); +} +\endcode + +An example is available in the file tests/openmp/taskgroup_01.c. + +\sa starpu_omp_task_region() +\sa starpu_omp_taskwait() +\sa starpu_omp_taskgroup() +\sa starpu_omp_taskgroup_inline_begin() +\sa starpu_omp_taskgroup_inline_end() + +\section OMPSynchronization Synchronization Support + +SORS implements objects and method to build common OpenMP +synchronization constructs. + +\subsection OMPSimpleLock Simple Locks + +SORS Simple Locks are opaque starpu_omp_lock_t objects enabling multiple +tasks to synchronize with each others, following the Simple Lock +constructs defined by the OpenMP specification. In accordance with such +specification, simple locks may not be acquired multiple times by the +same task, without being released in-between; otherwise, deadlocks may +result. Codes requiring the possibility to lock multiple times +recursively should use Nestable Locks (\ref NestableLock). Codes NOT +requiring the possibility to lock multiple times recursively should use +Simple Locks as they incur less processing overhead than Nestable Locks. +An example is available in the file tests/openmp/parallel_simple_lock_01.c. + +\sa starpu_omp_lock_t +\sa starpu_omp_init_lock() +\sa starpu_omp_destroy_lock() +\sa starpu_omp_set_lock() +\sa starpu_omp_unset_lock() +\sa starpu_omp_test_lock() + +\subsection OMPNestableLock Nestable Locks + +SORS Nestable Locks are opaque starpu_omp_nest_lock_t objects enabling +multiple tasks to synchronize with each others, following the Nestable +Lock constructs defined by the OpenMP specification. In accordance with +such specification, nestable locks may be acquired multiple times +recursively by the same task without deadlocking. Nested locking and +unlocking operations must be well parenthesized at any time, otherwise +deadlock and/or undefined behavior may occur. Codes requiring the +possibility to lock multiple times recursively should use Nestable +Locks. Codes NOT requiring the possibility to lock multiple times +recursively should use Simple Locks (\ref SimpleLock) instead, as they +incur less processing overhead than Nestable Locks. +An example is available in the file tests/openmp/parallel_nested_lock_01.c. + +\sa starpu_omp_nest_lock_t +\sa starpu_omp_init_nest_lock() +\sa starpu_omp_destroy_nest_lock() +\sa starpu_omp_set_nest_lock() +\sa starpu_omp_unset_nest_lock() +\sa starpu_omp_test_nest_lock() + +\subsection OMPCritical Critical Sections + +SORS implements support for OpenMP critical sections through the +family of \ref starpu_omp_critical functions. Critical sections may optionally +be named. There is a single, common anonymous critical section. Mutual +exclusion only occur within the scope of single critical section, either +a named one or the anonymous one. Corresponding examples are available in the files tests/openmp/parallel_critical_01.c and tests/openmp/parallel_critical_inline_01.c. + +\sa starpu_omp_critical() +\sa starpu_omp_critical_inline_begin() +\sa starpu_omp_critical_inline_end() + +\subsection OMPBarrier Barriers + +SORS provides the starpu_omp_barrier() function to implement +barriers over parallel region teams. In accordance with the OpenMP +specification, the starpu_omp_barrier() function waits for every +implicit task of the parallel region to reach the barrier and every +explicit task launched by the parallel region to complete, before +returning. +An example is available in the file tests/openmp/parallel_barrier_01.c. + +\sa starpu_omp_barrier() + +\section OMPLLVM Example: An OpenMP LLVM Support + +SORS has been used to implement an OpenMP LLVM Support. This allows to +seamlessly run OpenMP applications on top of StarPU. + +To enable this support, one just needs to call \c configure with the +option \ref enable-openmp-llvm "--enable-openmp-llvm". + +After installation, the directory +lib/starpu/examples/starpu_openmp_llvm contains a OpenMP +application, its source code and the executable compiled with the +StarPU OpenMP LLVM support, as well as a README file explaining how to +use the support for your own application. + +One just needs to compile an OpenMP application with clang and +to execute it the StarPU OpenMP LLVM support library file instead of +the default libomp.so. + +\section OMPStandard OpenMP Standard Functions in StarPU + +StarPU provides severals functions which are very similar to their OpenMP counterparts but are adapted to the StarPU runtime system. These functions are: + +\li starpu_omp_set_num_threads() +\li starpu_omp_get_num_threads() +\li starpu_omp_get_thread_num() +\li starpu_omp_get_max_threads() +\li starpu_omp_get_num_procs() which is used to get the number of available StarPU CPU workers. +\li starpu_omp_in_parallel() +\li starpu_omp_set_dynamic() +\li starpu_omp_get_dynamic() +\li starpu_omp_set_nested() +\li starpu_omp_get_nested() +\li starpu_omp_get_cancellation() +\li starpu_omp_set_schedule() +\li starpu_omp_get_schedule() +\li starpu_omp_get_thread_limit() +\li starpu_omp_set_max_active_levels() +\li starpu_omp_get_max_active_levels() +\li starpu_omp_get_level() +\li starpu_omp_get_ancestor_thread_num() +\li starpu_omp_get_team_size() +\li starpu_omp_get_active_level() +\li starpu_omp_in_final() +\li starpu_omp_get_proc_bind() +\li starpu_omp_get_num_places() +\li starpu_omp_get_place_num_procs() +\li starpu_omp_get_place_proc_ids() +\li starpu_omp_get_place_num() +\li starpu_omp_get_partition_num_places() +\li starpu_omp_get_partition_place_nums() +\li starpu_omp_set_default_device() +\li starpu_omp_get_default_device() +\li starpu_omp_get_num_devices() +\li starpu_omp_get_num_teams() +\li starpu_omp_get_team_num() +\li starpu_omp_is_initial_device() +\li starpu_omp_get_initial_device() +\li starpu_omp_get_max_task_priority() +\li starpu_omp_get_wtime() +\li starpu_omp_get_wtick() + +*/ diff --git a/doc/doxygen/chapters/starpu_languages/python.doxy b/doc/doxygen/chapters/starpu_languages/python.doxy new file mode 100644 index 0000000..5bd808e --- /dev/null +++ b/doc/doxygen/chapters/starpu_languages/python.doxy @@ -0,0 +1,1428 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/*! \page PythonInterface Python Interface + +This chapter presents the StarPU Python Interface. It provides for +those used to the Python language a more concise and easy-to-use +StarPU interface. + +This interface supports most of the main StarPU functionalities. While +not all features of the C API are replicated in the Python Interface, +additional functions tailored for Python's ease of use have been +incorporated. + +Several examples using the Python API are provided in the directory starpupy/examples/. + +\section Installation Installation of the Python Interface + +Calling \c configure will enable by default the StarPU Python +Interface. You can also specify the option \ref enable-starpupy +"--enable-starpupy" which will fail if some requirements are missing. +For now, the only requirement is the availability of the \c python3 +interpreter. + +The python modules \c joblib and \c cloudpickle are mandatory to run +parallel codes. + +The python module \c numpy is recommended, but not mandatory. + +\verbatim +$ pip3 install joblib +$ pip3 install cloudpickle +$ pip3 install numpy +$ ../configure --enable-starpupy --enable-blocking-drivers --prefix=$HOME/usr/starpu +$ make +$ make install +\endverbatim + +You can then go to the directory in which StarPU is installed, and test the provided Python examples. + +\verbatim +$ cd $HOME/usr/starpu +$ . ./bin/starpu_env +Setting StarPU environment for ... +$ cd lib/starpu/python +$ python3 starpu_py.py +Example 1: +Hello, world! +... +$ +\endverbatim + +\section PythonParallelism Python Parallelism + +Python interpreters share the Global Interpreter Lock (GIL), which requires +that at any time, one and only one thread has the right to execute a task. With +Python versions up to 3.11, if the application is pure Python script, even with +multi-interpreters, the program cannot be executed in parallel. The sharedGIL +makes the multiple interpreters execution of Python actually serial rather than +parallel, and the execution of Python program is single-threaded essentially. + +For the pure Python script with python versions up to 3.11, the only way +to achieve parallelism is to use the master-slave mechanism (Section \ref +StarpupyMasterSlave). Parallelism may be implemented with multi-interpreters +in the future Python version. Details can be found in Section \ref +MultipleInterpreters. Otherwise parallelism can be achieved when external +C applications are called or external APIs e.g. BLAS API is used for Numpy +objects. + +Starting from python version 3.12, multiple interpreters can use a separate GIL, +to allow parallelism of pure python code. This can be enabled by setting \ref +STARPUPY_OWN_GIL to 1. Some corner cases are however not supported yet in python +3.12, notably the usage of futures. + +\section ImplementingStarPUInPython Using StarPU in Python + +The StarPU module should be imported in any Python code wanting to use the StarPU Python interface. + +\code{.py} +import starpu +\endcode + +Before using any StarPU functionality, it is necessary to call \c starpu.init(). The function \c starpu.shutdown() should be called after all StarPU functions have been called. + +\code{.py} +import starpu + +starpu.init() +# ... +starpu.shutdown() +\endcode + + +\subsection SubmittingTasks Submitting Tasks + +One of the fundamental aspects of StarPU is the task submission. The +Python Interface greatly simplifies this process, allowing for direct +calls to the submission function without any extra complexities. + +The Python function used for task submission follows the format: +task_submit(options)(func, *args, **kwargs). In this structure: + +
      +
    • +\c func represents any Python function. +
    • +
    • +\c args and \c kwargs denote the function's arguments. +
    • +
    + +You can also provide the function as a string. + +By submitting tasks through this function, you enable StarPU to +perform optimizations for your program's execution. It's recommended +to submit all tasks to ensure StarPU's efficient scheduling of the +underlying tasks. It's important to note that submitted tasks do not +execute immediately, and you can retrieve the return value only after +the task execution. + +The first set of parentheses allows to specify various options. +Keep in mind that each option has a default value, and even if you're +not providing any options, the parentheses should be retained. The +options are as follows: + +
      +
    • +\c name (string, default: \c None) : Set the name of the task. This can be useful for debugging purposes. +
    • +
    • +\c synchronous (unsigned, default: 0) : If this flag is set, +\c task_submit() only returns when the task has been +executed (or if no worker is able to process the task). Otherwise, \c +task_submit() returns immediately. +
    • +
    • +\c priority (int, default: 0) : Set the level of priority for +the task. This is an integer value whose value must be greater than +the return value of the function \c starpu.sched_get_min_priority() +(for the least important tasks), and lower or equal to the return +value of the function \c starpu.sched_get_max_priority() (for the most +important tasks). Default priority is defined as 0 in order to allow +static task initialization. Scheduling strategies that take priorities +into account can use this parameter to take better scheduling +decisions, but the scheduling policy may also ignore it. +
    • +
    • +\c color (unsigned, default: \c None) : Set the color of the +task to be used in \c dag.dot. +
    • +
    • +\c flops (double, default: \c None) : Set the number of +floating points operations that the task will have to achieve. This is +useful for easily getting GFlops/s curves from the function \c +starpu.perfmodel_plot, and for the hypervisor load balancing. +
    • + +
    • +\c perfmodel (string, default: \c None) : Set the name of the +performance model. This name will be used as the filename where the +performance model information will be saved. After the task is +executed, one can call the function \c starpu.perfmodel_plot() by +giving the symbol of perfmodel to view its performance curve. +
    • +
    + +\subsection ReturningFutureObject Returning Future Object + +In order to realize asynchronous frameworks, the task_submit() +function returns a Future object. This is an extended use of StarPU +provided by the Python interface. A Future represents an eventual +result of an asynchronous operation. It is an awaitable object, +Coroutines can await on Future objects until they either have a result +or an exception set, or until they are canceled. Some basic examples +are available in the script starpupy/examples/starpu_py.py. + +This feature needs the \c asyncio module to be imported. + +\code{.py} +import starpu +import asyncio + +starpu.init() + +def add(a, b): + return a+b + +async def main(): + fut = starpu.task_submit()(add, 1, 2) + res = await fut + print("The result of function is", res) + +asyncio.run(main()) + +starpu.shutdown() +\endcode + +Execution: + +\verbatim +The result of function is 3 +\endverbatim + +When using at least the version 3.8 of python, one can also use the parameter -m asyncio which allows to directly use await instead of asyncio.run(). + +\verbatim +$ python3 -m asyncio +>>> import asyncio +\endverbatim + +\code{.py} +import starpu + +starpu.init() + +def add(a, b): + print("The result is ready!") + return a+b + +fut = starpu.task_submit()(add, 1, 2) +\endcode + +\verbatim +The result is ready! +\endverbatim + +\code{.py} +res = await fut +res +\endcode + +\verbatim +3 +\endverbatim + +You can also use the decorator \c starpu.delayed to wrap a function. The function can then directly be submitted to StarPU and will automatically create a Future object. +\code{.py} +@starpu.delayed +def add_deco(a, b): + print("The result is ready!") + return a+b + +fut = add_deco(1, 2) +\endcode + +\verbatim +The result is ready! +\endverbatim + +\code{.py} +res = await fut +res +\endcode + +\verbatim +3 +\endverbatim + +To specify options when using the decorator, just do as follows: + +\code{.py} +@starpu.delayed(name="add", color=2, perfmodel="add_deco") +def add_deco(a, b): + print("The result is ready!") + return a+b + +fut = add_deco(1, 2) +\endcode + +\verbatim +The result is ready! +\endverbatim + +\code{.py} +res = await fut +res +\endcode + +\verbatim +3 +\endverbatim + +A Future object can also be used for the next step calculation even +before being ready. The calculation will be postponed until the Future +has a result. + +In this example, after submitting the first task, a Future object +fut1 is created, and it is used as an argument of a second +task. The second task is submitted even without having the return +value of the first task. + +\code{.py} +import asyncio +import starpu +import time + +starpu.init() + +def add(a, b): + time.sleep(10) + print("The first result is ready!") + return a+b + +def sub(x, a): + print("The second result is ready!") + return x-a + +fut1 = starpu.task_submit()(add, 1, 2) +fut2 = starpu.task_submit()(sub, fut1, 1) +\endcode + +\verbatim +The first result is ready! +The second result is ready! +\endverbatim + +\code{.py} +res = await fut2 +res +\endcode + +\verbatim +2 +\endverbatim + +\subsection SubmitPythonBuffer Submit Python Objects Supporting The Buffer Protocol + +The Python buffer protocol is a framework in which Python objects can +expose raw byte arrays to other Python objects. This can be extremely +useful to efficiently store and manipulate large arrays of data. The +StarPU Python Interface allows users to use such objects as task +parameters. + +\code{.py} +import asyncio +import starpu +import time +import numpy as np + +starpu.init() + +def add(a,b): + c = np.zeros(np.size(a)) + for i in range(np.size(a)): + c[i] = a[i] + b[i] + return c +a = np.array([1, 2, 3]) +b = np.array([4, 5, 6]) +fut = starpu.task_submit()(add, a, b) +res = await fut +res +\endcode + +\verbatim +array([5., 7., 9.]) +\endverbatim + +StarPU uses a specific data interface to handle Python objects +supporting buffer protocol, such python objects are then managed by +the StarPU data management library which allows minimizing data +transfers between accelerators, and avoids copying the object each +time. + +We show the performances below of the \c numpy addition +(numpy.add running the script test_perf.sh) with +different array sizes (10, 20, ..., 100, 200, ..., 1000, 2000, ..., +10000, 20000, ..., 100000, 200000, ..., 1000000, 2000000, ..., +10000000, ..., 50000000). We compare two cases: +
      +
    1. Using StarPU,
    2. +
    3. Without using StarPU tasks, but directly calling the numpy.add function.
    4. +
    + +The first plot compares the task submission time when using StarPU and the program execution time without using StarPU. We can see that there is an obvious optimization using StarPU when the test array size is large. The task has not finished its execution yet as shown in second figure, the time can be used to perform other operations. + +\image html starpupy_perf.png width=85% +\image latex starpupy_perf.png "" width=\textwidth + +We can also define our own function to do the \c numpy operation, e.g. the element addition: + +\code{.py} +def add(a, b): + for i in range(np.size(a)): + a[i] = a[i] + b[i] +\endcode + +We will compare operation performances with the same two cases, but based on our custom function add(a, b). + +We can see that the custom function is not as efficient as the \c numpy function overall. The optimization for large arrays is the same when using StarPU. + +\image html starpupy_func_perf.png width=85% +\image latex starpupy_func_perf.png "" width=\textwidth + +\subsubsection AnnotationAccess Access Mode Annotation + +StarPU defines different access modes for a data, it can be readable (access mode is \c R), writable (access mode is \c W), or both readable and writable (access mode is \c RW). The default access mode is \c R. + +For the Python interface, these modes can be defined as shown below. + +
      +
    1. +Using the decorator starpu.access(arg="R/W/RW") to wrap the function. + +\code{.py} +a = np.array([1, 2, 3, 4, 5, 6]) +e = np.array([0, 0, 0, 0, 0, 0, 0]) +@starpu.access(a="R", b="W") +def assign(a,b): + for i in range(min(np.size(a), np.size(b))): + b[i]=a[i] +fut = starpu.task_submit()(assign, a, e) +starpu.acquire(e) +\endcode + +\verbatim +array([1, 2, 3, 4, 5, 6, 0]) +\endverbatim + +\code{.py} +starpu.release(e) +\endcode +
    2. + +
    3. +Using the decorator starpu.delayed(options, arg="R/W/RW"). + +\code{.py} +@starpu.delayed(a="R", b="W") +def assign(a,b): + for i in range(min(np.size(a), np.size(b))): + b[i]=a[i] +fut = assign(a, e) +starpu.acquire(e) +\endcode + +\verbatim +array([1, 2, 3, 4, 5, 6, 0]) +\endverbatim + +\code{.py} +starpu.release(e) +\endcode +
    4. + +
    5. +Using the method starpu.set_access(func, arg="R/W/RW") that will create a new function. + +\code{.py} +def assign(a,b): + for i in range(min(np.size(a), np.size(b))): + b[i]=a[i] +assign_access=starpu.set_access(assign, a="R", b="W") +fut = starpu.task_submit()(assign_access, a, e) +starpu.acquire(e) +\endcode + +\verbatim +array([1, 2, 3, 4, 5, 6, 0]) +\endverbatim + +\code{.py} +starpu.release(e) +\endcode +
    6. +
    + +\subsubsection MethodsAcquireRelease Methods + +Once the access mode of one argument is set to at least \c W, it may be modified during the task execution. We should pay attention that before the task is finished, we cannot get the up-to-date value of this argument by simply using \c print function. For example: + +\code{.py} +import asyncio +import starpu +import time +import numpy as np + +starpu.init() + +a = np.array([1, 2, 3, 4, 5, 6]) +e = np.array([0, 0, 0, 0, 0, 0, 0]) +@starpu.access(a="R", b="W") +def assign(a,b): + time.sleep(10) + for i in range(min(np.size(a), np.size(b))): + b[i]=a[i] +fut = starpu.task_submit()(assign, a, e) +print(e) # before the task is finished +\endcode + +\verbatim +[0 0 0 0 0 0 0] +\endverbatim + +We \c print argument \c e right after submitting the task, but since the task is not finished yet, we can only get its unchanged value. +If we want to get its up-to-date value, we need extra functions. + +In order to access data registered to StarPU outside tasks, we provide an acquire and release mechanism. + +
      +
    • The starpu.acquire(data, mode) method should be called to +access registered data outside tasks (Refer to the C API starpu_data_acquire()). StarPU will ensure that the application will get an up-to-date copy of handle in main memory located where the data was originally registered, and that all concurrent accesses (e.g. from tasks) will be consistent with the access mode specified with the given mode (\c R the default mode, \c W or \c RW). +
    • + +
    • The starpu.release(data) method must be called once the +application no longer needs to access the piece of data (Refer to the +C API starpu_data_release()). +
    • + +
    • The starpu.unregister(data) method must be called to +unregister the Python object from StarPU. (Refer to the C API starpu_data_unregister()). This method waits for all calculations to be finished before unregistering data. +
    • +
    + +With \c acquire, even we ask to access the argument right after submitting the task, the up-to-date value will be printed once the task is finished. + +\code{.py} +starpu.acquire(e) # before the task is finished +\endcode + +\verbatim +array([1, 2, 3, 4, 5, 6, 0]) +\endverbatim + +In order to complete the addition operation example, execution steps are: + +\code{.py} +import asyncio +import starpu +import time +import numpy as np + +starpu.init() + +@starpu.access(a="RW", b="R") +def add(a,b): + time.sleep(10) + for i in range(np.size(a)): + a[i] = a[i] + b[i] + +a = np.array([1, 2, 3]) +b = np.array([4, 5, 6]) +starpu.acquire(a, mode="R") +\endcode + +\verbatim +array([1, 2, 3]) +\endverbatim + +\code{.py} +starpu.release(a) +fut = starpu.task_submit()(add, a, b) +starpu.acquire(b, mode="R") +\endcode + +\verbatim +array([4, 5, 6]) +\endverbatim + +\code{.py} +starpu.acquire(a, mode="R") # before the task is finished +\endcode + +\verbatim +array([5, 7, 9]) +\endverbatim + +\code{.py} +starpu.release(a) +starpu.release(b) +starpu.unregister(a) +starpu.unregister(b) +\endcode + +The result of \c b is printed directly right after calling \c acquire, but the up-to-date value of \c a is printed after the task is finished. Here we need to pay attention that if we want to modify an argument during the task execution and get its up-to-date value for the future operation, we should set the access mode of this argument to at least \c W, otherwise this argument object is not synchronous, and the next task which needs this object will not wait its up-to-date value to execute. + +If we call \c acquire but not \c release before the task submission, the task will not start to execute until the object is released. + +An example is shown below: + +\code{.py} +import asyncio +import starpu +import numpy as np +import time + +starpu.init() + +@starpu.access(a="RW") +def add(a,b): + print("This is the addition function") + time.sleep(10) + for i in range(np.size(a)): + a[i] = a[i] + b[i] +a = np.array([1, 2, 3]) +b = np.array([4, 5, 6]) +starpu.acquire(a, mode="R") +\endcode + +\verbatim +array([1, 2, 3]) +\endverbatim + +\code{.py} +fut = starpu.task_submit()(add, a, b) +starpu.release(a) +\endcode + +\verbatim +This is the addition function # The task will not start until "a" is released +\endverbatim + +\code{.py} +starpu.acquire(a, mode="R") # Before the task is finished +\endcode + +\verbatim +array([5, 7, 9]) # After the task is finished +\endverbatim + +\code{.py} +starpu.release(a) +starpu.unregister(a) +starpu.unregister(b) +\endcode + +\section StarPUPYInterface StarPU Data Interface for Python Objects + +StarPU uses data handles to manage a piece of data. A data handle keeps track of replicates of the same data (registered by the application) over various memory nodes. The data management library manages to keep them coherent. That also allows minimizing the data transfers, and avoids copying the object each time. Data handles are managed through specific data interfaces. Some examples applying this specific interface are available in script starpupy/examples/starpu_py_handle.py. + +\subsection PythonObject Interface for Ordinary Python Objects + +A specific data interface has been defined to manage Python objects, such as constant (integer, float...), string, list, etc. This interface is defined with the class Handle. When submitting a task, instead of specifying a function and its arguments, we specify a function and the handles of its arguments. + +In addition to returning a Future object, it is also possible to return a StarPU handle object when submitting a function. To do so, you need to set the starpu.task_submit option \c ret_handle to \c True, its default value is \c False. + +\code{.py} +import starpu +from starpu import Handle + +starpu.init() + +def add(x, y): + return x + y + +x = Handle(2) +y = Handle(3) +res = starpu.task_submit(ret_handle=True)(add, x, y) +\endcode + +We then need to call the method get() to get the latest version of this Python Object. + +\code{.py} +res.get() +\endcode + +\verbatim +5 +\endverbatim + +When not setting the parameter \c ret_handle, the return object is a Future. + +\code{.py} +res_fut = starpu.task_submit()(add, x, y) +await res_fut +\endcode + +If the Python object is immutable (such as int, float, str, tuple...), registering the same object several times is authorised. That means you can do this: +\code{.py} +x = Handle(2) +x1 = Handle(2) +\endcode +x and x1 are two different Handle objects. + +\subsection PythonBuffer Interface for Python Objects Supporting Buffer Protocol + +This StarPU data interface can also be used to manage Python objects supporting buffer protocol, i.e \c numpy array, bytes, bytearray, array.array and memoryview object. + +\code{.py} +import numpy as np +import starpu +from starpu import Handle + +starpu.init() + +def add(a,b): + for i in range(np.size(a)): + a[i] = a[i] + b[i] + return a + +a = np.array([1, 2, 3]) +b = np.array([2, 4, 6]) +a_h = Handle(a) +b_h = Handle(b) +res = starpu.task_submit(ret_handle=True)(add, a_h, b_h) +res.get() +\endcode + +\verbatim +array([3, 6, 9]) +\endverbatim + +Different from immutable Python object, all Python objects supporting buffer protocol are mutable, and registering the same object one more time is not authorized. If you do this: + +\code{.py} +a = np.array([1, 2, 3]) +a_h = Handle(a) +a1_h = Handle(a) +\endcode + +You will get an error message: + +\verbatim +starpupy.error: Should not register the same mutable python object once more. +\endverbatim + +You may refer to Section \ref SubmitPythonBuffer, and realize that StarPU Python interface uses data handles to manage Python objects supporting buffer protocol by default. These objects are usually relatively large, such as a big NumPy matrix. We want to avoid multiple copies and transfers of this data over various memory nodes, so we set the default \c starpu.task_submit() option \c arg_handle to \c True for users to allow their applications to get the most optimization. To deactivate the use of this data interface, you need to set the option \c arg_handle to \c False. + +Since we use data handles by default, registration is implemented in the step of task submission. Therefore, you should be careful not to register again the same object after the task submission, like this: + +\code{.py} +a = np.array([1, 2, 3]) +b = np.array([2, 4, 6]) +res = starpu.task_submit(ret_handle=True)(add, a, b) +a_h = Handle(a) +\endcode + +You will get the error message: + +\verbatim +starpupy.error: Should not register the same mutable python object once more. +\endverbatim + +As performances, we showed in Section \ref SubmitPythonBuffer, we add one case to compare with the others two cases. We still test the \c numpy addition (numpy.add running the script test_handle_perf.sh) with different array sizes (10, 20, ..., 100, 200, ..., 1000, 2000, ..., 10000, 20000, ..., 100000, 200000, ..., 1000000, 2000000, ..., 10000000, ..., 50000000). Three cases are: +
      +
    1. Using StarPU and returning future object,
    2. +
    3. Using StarPU and returning handle object,
    4. +
    5. Without using StarPU tasks, but directly calling the numpy.add function.
    6. +
    + +The first plot compares the task submission time when using StarPU either returning a Future or a handle object and the program execution time without using StarPU. We can see that there is an obvious optimization using StarPU, either returning a Future or a handle object when the test array size is large. The task has not finished its execution yet as shown in second figure, the time can be used to perform other operations. When array size is not very large, returning a handle has a better execution performance than returning a Future. + +\image html starpupy_handle_perf.png width=85% +\image latex starpupy_handle_perf.png "" width=\textwidth + +We can also define our own function to do the \c numpy operation, e.g. the element addition: + +\code{.py} +def add(a, b): + for i in range(np.size(a)): + a[i] = a[i] + b[i] +\endcode + +We will compare operation performances with the same three cases but based on our custom function add(a, b). + +We can see that the custom function is not as efficient as the \c numpy function overall. The optimisation for large arrays is the same when using StarPU. + +\image html starpupy_handle_func_perf.png width=85% +\image latex starpupy_handle_func_perf.png "" width=\textwidth + +\subsubsection Methods Methods + +As in Section \ref MethodsAcquireRelease, the \c Handle class defines methods to provide an acquire and release mechanism. + +
      +
    • +The method Handle::acquire(mode) should be called before +accessing the object outside tasks (Refer to the C API starpu_data_acquire()). The access mode can be \c "R", \c "W", \c "RW", the default value is "R". We will get an up-to-date copy of Python object by calling this method. +
    • + +
    • +The method Handle::release() must be called once the +application no longer needs to access the registered data (Refer to +the C API starpu_data_release()). +
    • + +
    • +The method Handle::unregister() to unregister the Python object +handle from StarPU (Refer to the C API starpu_data_unregister()). This method will wait for all calculations to be finished before unregistering data. +
    • +
    + +The previous example can be coded as follows: + +\code{.py} +import numpy as np +import starpu +from starpu import Handle + +starpu.init() + +@starpu.access(a="RW", b="R") +def add(a,b): + for i in range(np.size(a)): + a[i] = a[i] + b[i] + +a = np.array([1, 2, 3]) +b = np.array([2, 4, 6]) +a_h = Handle(a) +b_h = Handle(b) +a_h.acquire(mode = "R") +\endcode + +\code{.py} +array([1, 2, 3]) +\endcode + +\code{.py} +a_h.release() +starpu.task_submit(ret_handle=True)(add, a_h, b_h) +a_h.acquire(mode = "R") # we get the up-to-date value +\endcode + +\verbatim +array([3, 6, 9]) +\endverbatim + +\code{.py} +a_h.release() +a_h.unregister() +\endcode + +\subsection EmptyNumpy Interface for Empty Numpy Array + +We can register an empty \c numpy array by calling HandleNumpy(size, type). The default value for type is float64. + +You will find below an example which defines the function \c assign taking two arrays as parameters, the second one being an empty array which will be assigned the values of the first array. + +\code{.py} +import numpy as np +import starpu +from starpu import Handle +from starpu import HandleNumpy + +starpu.init() + +@starpu.access(b="W") +def assign(a,b): + for i in range(min(np.size(a,0), np.size(b,0))): + for j in range(min(np.size(a,1), np.size(b,1))): + b[i][j] = a[i][j] + return b + +a = np.array([[1, 2, 3], [4, 5, 6]]) +a_h = Handle(a) +e_h = HandleNumpy((5,10), a.dtype) +res = starpu.task_submit(ret_handle=True)(assign, a_h, e_h) +e_h.acquire() +\endcode + +\verbatim +array([[1, 2, 3, 0, 0, 0, 0, 0, 0, 0], + [4, 5, 6, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]) +\endverbatim + +\code{.py} +e_h.release() +\endcode + +\subsection HandlePartition Array Partitioning + +A n-dim \c numpy array can be split into several sub-arrays by calling +the method Handle::partition(nchildren, dim, chunks_list) +(Refer to the C API starpu_data_partition_plan()). +
      +
    • +nchildren is the number of sub-handles, +
    • +
    • +dim is the dimension that we want to partition along, it can be 0 for vertical dimension, 1 for horizontal dimension, 2 for depth dimension, 3 for time dimension, ...etc. +
    • +
    • +chunks_list is a list containing the size of each segment. The total length of segments in this list must be equal to the length of the selected dimension. +
    • +
    + +The method will return a sub-handle list, each of the sub-handles can be used when submitting a task with task_submit(). This allows to process an array in parallel, once the execution of each sub-handle is finished, the result will be directly reflected in the original n-dim array. + +When the sub-handles are no longer needed, the method +Handle::unpartition(handle_list, nchildren) should be called to +clear the partition and unregister all the sub-handles +(Refer to the C API starpu_data_partition_clean()). +
      +
    • +handle_list is the sub-handle list which was previously returned by the method Handle::partition(), +
    • +
    • +nchildren is the number of sub-handles. +
    • +
    + +Here is an example to use these methods. + +\code{.py} +import numpy as np +import starpu +from starpu import Handle + +starpu.init() + +@starpu.access(a="RW", b="R") +def add(a,b): + np.add(a,b,out=a) + +n, m = 20, 10 +arr = np.arange(n*m).reshape(n, m) +arr_h = Handle(arr) +arr_h.acquire(mode='RW') +\endcode + +\verbatim + [[ 0 1 2 3 4 5 6 7 8 9] + [ 10 11 12 13 14 15 16 17 18 19] + [ 20 21 22 23 24 25 26 27 28 29] + [ 30 31 32 33 34 35 36 37 38 39] + [ 40 41 42 43 44 45 46 47 48 49] + [ 50 51 52 53 54 55 56 57 58 59] + [ 60 61 62 63 64 65 66 67 68 69] + [ 70 71 72 73 74 75 76 77 78 79] + [ 80 81 82 83 84 85 86 87 88 89] + [ 90 91 92 93 94 95 96 97 98 99] + [100 101 102 103 104 105 106 107 108 109] + [110 111 112 113 114 115 116 117 118 119] + [120 121 122 123 124 125 126 127 128 129] + [130 131 132 133 134 135 136 137 138 139] + [140 141 142 143 144 145 146 147 148 149] + [150 151 152 153 154 155 156 157 158 159] + [160 161 162 163 164 165 166 167 168 169] + [170 171 172 173 174 175 176 177 178 179] + [180 181 182 183 184 185 186 187 188 189] + [190 191 192 193 194 195 196 197 198 199]] +\endverbatim + +\code{.py} +arr_h.release() +split_num = 3 +arr_h_list = arr_h.partition(split_num, 1, [3,2,5]) # split into 3 sub-handles, and partition along the horizontal dimension +for i in range(split_num): + res=starpu.task_submit(ret_handle=True)(add, arr_h_list[i], arr_h_list[i]) +arr_h.acquire(mode='RW') +\endcode + +\verbatim +[[ 0 2 4 12 16 40 48 56 64 72] + [ 80 88 96 104 112 120 128 136 144 152] + [160 168 176 184 192 200 208 216 224 232] + [240 248 256 264 272 280 288 296 304 312] + [320 328 336 172 176 180 184 188 192 196] + [200 204 208 212 216 220 224 228 232 236] + [120 122 124 126 128 130 132 134 136 138] + [140 142 144 146 148 150 152 154 156 158] + [160 162 164 166 168 170 172 174 176 178] + [180 182 184 186 188 190 192 194 196 198] + [200 202 204 206 208 105 106 107 108 109] + [110 111 112 113 114 115 116 117 118 119] + [120 121 122 123 124 125 126 127 128 129] + [130 131 132 133 134 135 136 137 138 139] + [140 141 142 143 144 145 146 147 148 149] + [150 151 152 153 154 155 156 157 158 159] + [160 161 162 163 164 165 166 167 168 169] + [170 171 172 173 174 175 176 177 178 179] + [180 181 182 183 184 185 186 187 188 189] + [190 191 192 193 194 195 196 197 198 199]] +\endverbatim + +\code{.py} +arr_h.release() +arr_h.unpartition(arr_h_list, split_num) +arr_h.unregister() +\endcode + +The method Handle::get_partition_size(handle_list) can be used to get the array size of each sub-array. + +\code{.py} +arr_h_list = arr_h.partition(split_num, 1, [3,2,5]) +arr_h.get_partition_size(arr_h_list) +\endcode + +\verbatim +[60, 40, 100] +\endverbatim + +The full script is available in starpupy/examples/starpu_py_partition.py. + +\section Benchmark Benchmark + +This benchmark gives a glimpse into how long a task should be (in µs) for the StarPU Python interface overhead to be low enough to keep efficiency. Running starpupy/benchmark/tasks_size_overhead.sh generates a plot of the speedup of tasks of various sizes, depending on the number of CPUs being used. + +In the first figure, the return value is a handle object. +In the second figure, the return value is a future object. +In the third figure, the return value is \c None. + +For example, in the figure of returning handle object, for a 571 µs task (the green line), StarPU overhead is low enough +to guarantee a good speedup if the number of CPUs is not more than 12. But with the same number of CPUs, +a 314 µs task (the blue line) cannot have a correct speedup. We need to decrease the number of CPUs +to about 8 if we want to keep efficiency. + +\image html tasks_size_overhead_py_handle.png "(1) Returning handle object" width=50% +\image latex tasks_size_overhead_py_handle.png "" width=\textwidth + +\image html tasks_size_overhead_py_futur.png "(2) Returning future object" width=50% +\image latex tasks_size_overhead_py_futur.png "" width=\textwidth + +\image html tasks_size_overhead_py_none.png "(3) Returning None" width=50% +\image latex tasks_size_overhead_py_none.png "" width=\textwidth + +\section ImitatingJoblibLibrary Running Python Functions as Pipeline Jobs (Imitating Joblib Library) + +The StarPU Python interface also provides parallel computing for loops using multiprocessing, similarly to the Joblib Library that can simply turn out Python code into parallel computing code and thus increase the computing speed. + +\subsection JobLibraryExamples Examples + +
      + +
    • +The most basic usage is to parallelize a simple iteration. + +\code{.py} +from math import log10 +[log10(10 ** i) for i in range(10)] +\endcode + +\verbatim +[0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0] +\endverbatim + +In order to spread it over several CPUs, you need to import the \c starpu.joblib module, and use its \c Parallel class: + +\code{.py} +import starpu.joblib +from math import log10 + +starpu.init() +starpu.joblib.Parallel(n_jobs=2)(starpu.joblib.delayed(log10)(10**i)for i in range(10)) +\endcode + +\verbatim +[0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0] +\endverbatim + +It is also possible to first create an object of the \c Parallel class, and then call \c starpu.joblib.delayed to execute the generator expression. + +\code{.py} +import starpu.joblib +from math import log10 +starpu.init() +parallel=starpu.joblib.Parallel(n_jobs=2) +parallel(starpu.joblib.delayed(log10)(10**i)for i in range(10)) +\endcode + +\verbatim +[0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0] +\endverbatim +
    • + +
    • +Instead of a generator expression, a list of functions can also be submitted as a task through the \c Parallel class. + +\code{.py} +import starpu.joblib + +starpu.init() + +#generate a list to store functions +g_func=[] + +#function no input no output print hello world +def hello(): + print ("Example 1: Hello, world!") +g_func.append(starpu.joblib.delayed(hello)()) + +#function has 2 int inputs and 1 int output +def multi(a, b): + res_multi = a*b + print("Example 2: The result of ",a,"*",b,"is",res_multi) + return res_multi +g_func.append(starpu.joblib.delayed(multi)(2, 3)) + +#function has 4 float inputs and 1 float output +def add(a, b, c, d): + res_add = a+b+c+d + print("Example 3: The result of ",a,"+",b,"+",c,"+",d,"is",res_add) + return res_add +g_func.append(starpu.joblib.delayed(add)(1.2, 2.5, 3.6, 4.9)) + +#function has 2 int inputs 1 float input and 1 float output 1 int output +def sub(a, b, c): + res_sub1 = a-b-c + res_sub2 = a-b + print ("Example 4: The result of ",a,"-",b,"-",c,"is",res_sub1,"and the result of",a,"-",b,"is",res_sub2) + return res_sub1, res_sub2 +g_func.append(starpu.joblib.delayed(sub)(6, 2, 5.9)) + +#input is iterable function list +starpu.joblib.Parallel(n_jobs=2)(g_func) +\endcode + +Execution: + +\verbatim +Example 3: The result of 1.2 + 2.5 + 3.6 + 4.9 is 12.200000000000001 +Example 1: Hello, world! +Example 4: The result of 6 - 2 - 5.9 is -1.9000000000000004 and the result of 6 - 2 is 4 +Example 2: The result of 2 * 3 is 6 +[None, 6, 12.200000000000001, (-1.9000000000000004, 4)] +\endverbatim +
    • + +
    • +The function can also take array parameters. + +\code{.py} +import starpu.joblib +import numpy as np + +starpu.init() + +def multi_array(a, b): + for i in range(len(a)): + a[i] = a[i]*b[i] + +A = np.arange(10) +B = np.arange(10, 20, 1) +starpu.joblib.Parallel(n_jobs=2)(starpu.joblib.delayed(multi_array)((i for i in A), (j for j in B))) +A +\endcode + +Here the array \c A has not been modified. + +\verbatim +array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) +\endverbatim + +If we pass \c A directly as an argument, its value is updated + +\code{.py} +starpu.joblib.Parallel(n_jobs=2)(starpu.joblib.delayed(multi_array)(A, B)) +A +\endcode + +\verbatim +array([ 0, 11, 24, 39, 56, 75, 96, 119, 144, 171]) +\endverbatim + +In the next call, the value of \c A is also updated. + +\code{.py} +starpu.joblib.Parallel(n_jobs=2)(starpu.joblib.delayed(multi_array)(b=(j for j in B), a=A)) +A +\endcode + +\verbatim +array([ 0, 121, 288, 507, 784, 1125, 1536, 2023, 2592, 3249]) +\endverbatim + +The above three writing methods are equivalent and their execution time are very close. However, when using directly a \c numpy arrays, its value will be updated, this does not happen when generators are provided. When using a \c numpy array, it will be handled by StarPU with a data interface. +
    • + +
    • +Here an example mixing scalar objects and \c numpy arrays or generator expressions. + +\code{.py} +import starpu.joblib +import numpy as np + +starpu.init() + +def scal(a, t): + for i in range(len(t)): + t[i] = t[i]*a +A = np.arange(10) + +starpu.joblib.Parallel(n_jobs=2)(starpu.joblib.delayed(scal)(2, (i for i in A))) + +starpu.joblib.Parallel(n_jobs=2)(starpu.joblib.delayed(scal)(2,A)) +\endcode + +Again, the value of \c A is modified by the 2nd call. + +\code{.py} +A +\endcode + +\verbatim +array([ 0, 2, 4, 6, 8, 10, 12, 14, 16, 18]) +\endverbatim +
    • + +
    + +The full script is available in starpupy/examples/starpu_py_parallel.py. + +\subsection ParallelParameters Parallel Parameters + +The \c starpu.joblib.Parallel class accepts the following parameters: + +
      +
    • \c mode (string, default: \c "normal") + +A string with the value "normal" or "future". With the "normal" mode, you can call \c starpu.joblib.Parallel directly without using the \c asyncio module, and you will get the result when the task is executed. With the "future" mode, when calling \c starpu.joblib.Parallel, you will get a Future object as a return value. By setting the parameter end_msg, the given message will be displayed when the result is ready, then you can call \c await to get the result. The \c asyncio module should be imported in this case. + +\code{.py} +import starpu +import asyncio +from math import log10 +starpu.init() +fut = starpu.joblib.Parallel(mode="future", n_jobs=3, end_msg="The result is ready!")(starpu.joblib.delayed(log10)(10**i)for i in range(10)) +The result is ready! <_GatheringFuture finished result=[[0.0, 1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]]> +await fut +\endcode + +\verbatim +[[0.0, 1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]] +\endverbatim +
    • + +
    • \c end_msg (string, default: \c None) + +A message that will be displayed when the task is executed and the result is ready. When the parameter is unset, no message will be displayed when the result is ready. In any case, you need to perform awaiting to get the result. +
    • + +
    • \c n_jobs (int, default: \c None) + +The maximum number of concurrently running jobs. If -1 all CPUs are used. If 1 is given, no parallel computing code is used at all, which is useful for debugging. For \c n_jobs below -1, (\c n_cpus + 1 + \c n_jobs) are used. Thus, for \c n_jobs = -2, all CPUs but one are used. \c None is a marker for ‘unset’ that will be interpreted as \c n_jobs=1 (sequential execution). +\c n_cpus is the number of CPUs detected by StarPU on the running device. +
    • + +
    • \c perfmodel (string, default : \c None) + +Set the name of the performance model. This name will be used as the filename where the performance model information will be saved. After the task is executed, one can call the function \c starpu.perfmodel_plot() by giving the symbol of perfmodel to view its performance curve. +
    • +
    + +\subsection JobLibPerformances Performances + +
      +
    • +We compare the performances of the two methods for passing arguments to \c the starpu.joblib.delayed function. The first method defines a function that contains only scalars calculations, and then we pass a generator expression as an argument. The second method defines a function that contains arrays calculations, and then we pass either \c numpy arrays or generators as arguments. The second method takes less time. + +\code{.py} +import starpu.joblib +import numpy as np +import time + +starpu.init() + +N=1000000 + +def multi(a,b): + res_multi = a*b + return res_multi + +print("--First method") +A = np.arange(N) +B = np.arange(N, 2*N, 1) +start_exec1 = time.time() +start_cpu1 = time.process_time() +starpu.joblib.Parallel(n_jobs=-1)(starpu.joblib.delayed(multi)(i,j) for i,j in zip(A,B)) +end_exec1 = time.time() +end_cpu1 = time.process_time() +print("the program execution time is", end_exec1-start_exec1) +print("the cpu execution time is", end_cpu1-start_cpu1) + +def multi_array(a, b): + for i in range(len(a)): + a[i] = a[i]*b[i] + return a + +print("--Second method with Numpy arrays") +A = np.arange(N) +B = np.arange(N, 2*N, 1) +start_exec2 = time.time() +start_cpu2 = time.process_time() +starpu.joblib.Parallel(n_jobs=-1)(starpu.joblib.delayed(multi_array)(A, B)) +end_exec2 = time.time() +end_cpu2 = time.process_time() +print("the program execution time is", end_exec2-start_exec2) +print("the cpu execution time is", end_cpu2-start_cpu2) + +print("--Second method with generators") +A = np.arange(N) +B = np.arange(N, 2*N, 1) +start_exec3 = time.time() +start_cpu3 = time.process_time() +starpu.joblib.Parallel(n_jobs=-1)(starpu.joblib.delayed(multi_array)((i for i in A), (j for j in B))) +end_exec3 = time.time() +end_cpu3 = time.process_time() +print("the program execution time is", end_exec3-start_exec3) +print("the cpu execution time is", end_cpu3-start_cpu3) +\endcode + +Execution: + +\verbatim +--First method +the program execution time is 3.000865936279297 +the cpu execution time is 5.17138062 +--Second method with Numpy arrays +the program execution time is 0.7571873664855957 +the cpu execution time is 0.9166007309999991 +--Second method with generators +the program execution time is 0.7259719371795654 +the cpu execution time is 1.1182918959999988 +\endverbatim + +
    • +
    • +Performance can also be shown with the performance model. Here an example with the function \c log10. + +\code{.py} +from math import log10 +for x in [10, 100, 1000, 10000, 100000, 1000000]: + for X in range(x, x*10, x): + starpu.joblib.Parallel(n_jobs=-1, perfmodel="log_list")(starpu.joblib.delayed(log10)(i+1)for i in range(X)) + +starpu.perfmodel_plot(perfmodel="log_list") +\endcode + +\image html starpu_log_list.png +\image latex starpu_log_list.png "" width=\textwidth + +If we use a \c numpy array as parameter, the calculation can withstand larger size, as shown below. + +\code{.py} +from math import log10 +def log10_arr(t): + for i in range(len(t)): + t[i] = log10(t[i]) + return t +for x in [10, 100, 1000, 10000, 100000, 1000000, 10000000]: + for X in range(x, x*10, x): + A = np.arange(1,X+1,1) + starpu.joblib.Parallel(n_jobs=-1, perfmodel="log_arr")(starpu.joblib.delayed(log10_arr)(A)) + +starpu.perfmodel_plot(perfmodel="log_arr") +\endcode + +\image html starpu_log_arr.png +\image latex starpu_log_arr.png "" width=\textwidth +
    • +
    + +\section MultipleInterpreters Multiple Interpreters + +It is possible to use multiple interpreters when running python applications. To do so, you need to set the variable \ref STARPUPY_MULTI_INTERPRETER when running a StarPU Python application. + +Python interpreters share the Global Interpreter Lock (GIL), which requires that at any time, one and only one thread has the right to execute a task. In other words, GIL makes the multiple interpreters execution of Python actually serial rather than parallel, and the execution of Python program is single-threaded essentially. Therefore, if the application is pure Python script, even with multi-interpreters, the program cannot be executed in parallel, unless an external C application is called. + +Fortunately now there is a quite positive development. Python developers are preparing to implement stop sharing the GIL between interpreters (https://peps.nogil.dev/pep-0684/) or even make GIL optional so that Python code can be run without GIL (https://peps.nogil.dev/pep-0701/), that will facilitate true parallelism with the next Python version. + +In order to transfer data between interpreters, the module \c cloudpickle is used to serialize Python objects in contiguous byte array. This mechanism increases the overhead of the StarPU Python interface, as shown in the following plots, to be compared to the plots given in \ref Benchmark. + +In the first figure, the return value is a handle object. +In the second figure, the return value is a future object. +In the third figure, the return value is \c None. + +\image html tasks_size_overhead_py_handle_pickle.png "(1) Returning handle object" width=50% +\image latex tasks_size_overhead_py_handle_pickle.png "" width=\textwidth + +\image html tasks_size_overhead_py_fut_pickle.png "(2) Returning future object" width=50% +\image latex tasks_size_overhead_py_fut_pickle.png "" width=\textwidth + +\image html tasks_size_overhead_py_noret_pickle.png "(3) Returning None" width=50% +\image latex tasks_size_overhead_py_noret_pickle.png "" width=\textwidth + +In order to reflect this influence more intuitively, we make a performance comparison. + +By default, StarPU uses virtually shared memory manager for Python objects supporting buffer protocol that allows to minimize data transfers. But in the case of multi-interpreter, if we do not use virtually shared memory manager, data transfer can be realized only with the help of cloudpickle. + +We will show the operation performances below (Running test_handle_perf_pickle.sh). The operation that we test is \c numpy addition (numpy.add), and the array size is 10, 20, ..., 100, 200, ..., 1000, 2000, ..., 10000, 2000, ..., 100000,200000, ..., 1000000, 2000000, ..., 10000000, ..., 50000000. We compared three cases: first, using virtually shared memory manager, second, without using virtually shared memory manager, third, without using StarPU task submitting, but directly calling numpy.add function. + +In the first figure, we compare the submission time when using StarPU and the execution time without using StarPU. We can see that there is still an obvious optimization using StarPU virtually shared memory manager when the test array size is large. However, if only using cloudpickle, StarPU Python interface cannot provide an effective optimization. And in the second figure, we can see that the same operation will take more time to finish the program execution when only using cloudpickle. + +\image html starpupy_handle_perf_pickle.png width=85% +\image latex starpupy_handle_perf_pickle.png "" width=\textwidth + +We can also define our own function to do the \c numpy operation, e.g. the element addition: + +\code{.py} +def add(a, b): + for i in range(np.size(a)): + a[i] = a[i] + b[i] +\endcode + +We will compare operation performances of the same three cases, but based on the custom function add(a, b). + +We can see that the custom function takes more time than \c numpy function overall. Although the same operation still takes more time to submit the task when only using cloudpickle than with virtually shared memory manager, there is still a better optimization. The operation takes less time than only calling a custom function even when the array is not very large. + +\image html starpupy_handle_func_perf_pickle.png width=85% +\image latex starpupy_handle_func_perf_pickle.png "" width=\textwidth + +\section StarpupyMasterSlave Master Slave Support + +StarPU Python interface provides MPI master slave support as well. Please refer to \ref MPIMasterSlave for the specific usage. + +When you write your Python script, make sure to import all required functions before the \c starpu module. Functions imported after the \c starpu module can only be submitted using their name as a string when calling \c task_submit(), this will decrease the submission efficiency. + +(TODO) + +\section StarPUPYSimgrid StarPUPY and Simgrid + +In simgrid mode, the Python interpreter will not be aware of simgrid and will +thus not notify it when some thread is blocked waiting for something to happen +in another thread. This notably means that the `asyncio` mode and waiting for a +`future` will not work, and one thus has to use StarPUPY-provided functions to +wait for completion, such as `starpupy.task_wait_for_all()` or `data.acquire`. + +Also, we have not yet implemented not calling the actual call of the task +function, so the execution time will be longer than in real execution, since not +only it executes computations, but also sequentially, and adds the simulation +overhead. + +*/ diff --git a/doc/doxygen/chapters/starpu_performances/benchmarking_starpu.doxy b/doc/doxygen/chapters/starpu_performances/benchmarking_starpu.doxy new file mode 100644 index 0000000..1564327 --- /dev/null +++ b/doc/doxygen/chapters/starpu_performances/benchmarking_starpu.doxy @@ -0,0 +1,83 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/*! \page BenchmarkingStarPU Benchmarking StarPU + +Some interesting benchmarks are installed among examples in +$STARPU_PATH/lib/starpu/examples/. Make sure to try various +schedulers, for instance STARPU_SCHED=dmda. + +\section TaskSizeOverhead Task Size Overhead + +This benchmark gives a glimpse into how long a task should be (in µs) for StarPU overhead +to be low enough to keep efficiency. Running +tasks_size_overhead.sh generates a plot +of the speedup of tasks of various sizes, depending on the number of CPUs being +used. + +\image html tasks_size_overhead.png +\image latex tasks_size_overhead.png "" width=\textwidth + +\section DataTransferLatency Data Transfer Latency + +local_pingpong performs a ping-pong between the first two CUDA nodes, and +prints the measured latency. + +\section MatrixMatrixMultiplication Matrix-Matrix Multiplication + +sgemm and dgemm perform a blocked matrix-matrix +multiplication using BLAS and cuBLAS. They output the obtained GFlops. + +\section CholeskyFactorization Cholesky Factorization + +cholesky_* perform a Cholesky factorization (single precision). They use different dependency primitives. + +\section LUFactorization LU Factorization + +lu_* perform an LU factorization. They use different dependency primitives. + +\section SimulatedBenchmarks Simulated Benchmarks + +It can also be convenient to try simulated benchmarks, if you want to give a try +at CPU-GPU scheduling without actually having a GPU at hand. This can be done by +using the SimGrid version of StarPU: first install the SimGrid simulator from +https://simgrid.org/ (we tested with SimGrid from 3.11 to 3.16, and +3.18 to 3.30. SimGrid versions 3.25 and above need to be configured with \c -Denable_msg=ON. +Other versions may have compatibility issues, 3.17 notably does +not build at all. MPI simulation does not work with version 3.22). +Then configure StarPU with \ref enable-simgrid +"--enable-simgrid" and rebuild and install it, and then you can simulate the performance for a +few virtualized systems shipped along StarPU: attila, mirage, idgraf, and sirocco. + +For instance: + +\verbatim +$ export STARPU_PERF_MODEL_DIR=$STARPU_PATH/share/starpu/perfmodels/sampling +$ export STARPU_HOSTNAME=attila +$ $STARPU_PATH/lib/starpu/examples/cholesky_implicit -size $((960*20)) -nblocks 20 +\endverbatim + +Will show the performance of the cholesky factorization with the attila +system. It will be interesting to try with different matrix sizes and +schedulers. + +Performance models are available for cholesky_*, lu_*, *gemm, with block sizes +320, 640, or 960 (plus 1440 for sirocco), and for stencil with block size 128x128x128, 192x192x192, and +256x256x256. + +Read Chapter \ref SimGridSupport for more information on the SimGrid support. + +*/ diff --git a/doc/doxygen/chapters/starpu_performances/offline_performance_tools.doxy b/doc/doxygen/chapters/starpu_performances/offline_performance_tools.doxy new file mode 100644 index 0000000..ab076bd --- /dev/null +++ b/doc/doxygen/chapters/starpu_performances/offline_performance_tools.doxy @@ -0,0 +1,1332 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2020,2021 Federal University of Rio Grande do Sul (UFRGS) + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/*! \page OfflinePerformanceTools Offline Performance Tools + +To get an idea of what is happening, a lot of performance feedback is available, +detailed in this chapter. The various information should be checked for. + +
      +
    • +What does the Gantt diagram look like? (see \ref CreatingAGanttDiagram) +
        +
      • If it's mostly green (tasks running in the initial context) or context specific + color prevailing, then the machine is properly + utilized, and perhaps the codelets are just slow. Check their performance, see + \ref PerformanceOfCodelets. +
      • +
      • If it's mostly purple (FetchingInput), tasks keep waiting for data + transfers, do you perhaps have far more communication than computation? Did + you properly use CUDA streams to make sure communication can be + overlapped? Did you use data-locality aware schedulers to avoid transfers as + much as possible? +
      • +
      • If it's mostly red (Blocked), tasks keep waiting for dependencies, + do you have enough parallelism? It might be a good idea to check what the DAG + looks like (see \ref CreatingADAGWithGraphviz). +
      • +
      • If only some workers are completely red (Blocked), for some reason the + scheduler didn't assign tasks to them. Perhaps the performance model is bogus, + check it (see \ref PerformanceOfCodelets). Do all your codelets have a + performance model? When some of them don't, the schedulers switches to a + greedy algorithm which thus performs badly. +
      • +
      +
    • +
    + +You can also use the Temanejo task debugger (see \ref UsingTheTemanejoTaskDebugger) to +visualize the task graph more easily. + +\section GeneratingTracesWithFxT Generating Traces With FxT + +StarPU can use the FxT library (see +https://savannah.nongnu.org/projects/fkt/) to generate traces +with a limited runtime overhead. + +You can get a tarball from http://download.savannah.gnu.org/releases/fkt/?C=M + +Compiling and installing the FxT library in the $FXTDIR path is +done following the standard procedure: + +\verbatim +$ ./configure --prefix=$FXTDIR +$ make +$ make install +\endverbatim + +In order to have StarPU to generate traces, StarPU needs to be configured again +after installing FxT, and configuration show: + +\verbatim +FxT trace enabled: yes +\endverbatim + +If configure does not find FxT automatically, it can be specified by hand with +the option \ref with-fxt "--with-fxt" : + +\verbatim +$ ./configure --with-fxt=$FXTDIR +\endverbatim + +Or you can simply point the PKG_CONFIG_PATH environment variable to +$FXTDIR/lib/pkgconfig + +When \ref STARPU_FXT_TRACE is set to 1, a trace is generated when StarPU is terminated by calling +starpu_shutdown(). The trace is a binary file whose name has the form +prof_file_XXX_YYY where XXX is the username, and +YYY is the MPI id of the process that used StarPU (or 0 when running a sequential program). +One can change +the name of the file by setting the environment variable \ref +STARPU_FXT_SUFFIX, its contents will be used instead of prof_file_XXX. +This file is saved in the +/tmp/ directory by default, or by the directory specified by +the environment variable \ref STARPU_FXT_PREFIX. + +The additional \c configure option \ref enable-fxt-lock "--enable-fxt-lock" can +be used to generate trace events which describes the lock's behavior during +the execution. It is however very heavy and should not be used unless debugging +StarPU's internal locking. + +When the FxT trace file prof_file_something has been generated, +it is possible to generate different trace formats by calling: + +\verbatim +$ starpu_fxt_tool -i /tmp/prof_file_something +\endverbatim + +Or alternatively, setting the environment variable \ref STARPU_GENERATE_TRACE +to 1 before application execution will make StarPU +automatically generate all traces at application shutdown. Note that +if the environment variable \ref STARPU_FXT_PREFIX is set, files will +be generated in the given directory. + +One can also set the environment variable \ref +STARPU_GENERATE_TRACE_OPTIONS to specify options, see +starpu_fxt_tool --help, for example: + +\verbatim +$ export STARPU_GENERATE_TRACE=1 +$ export STARPU_GENERATE_TRACE_OPTIONS="-no-acquire" +\endverbatim + +When running an MPI application, \ref STARPU_GENERATE_TRACE will not +work as expected (each node will try to generate trace files, thus +mixing outputs...), you have to collect the trace files from the MPI +nodes, and specify them all on the command starpu_fxt_tool, for +instance: + +\verbatim +$ starpu_fxt_tool -i /tmp/prof_file_something* +\endverbatim + +By default, the generated trace contains all information. To reduce +the trace size, various -no-foo options can be passed to +starpu_fxt_tool, see starpu_fxt_tool --help . + +\subsection CreatingAGanttDiagram Creating a Gantt Diagram + +One of the generated files is a trace in the Paje format. The file, +located in the current directory, is named paje.trace. It can +be viewed with ViTE (https://solverstack.gitlabpages.inria.fr/vite/) a trace +visualizing open-source tool. To open the file paje.trace with +ViTE, use the following command: + +\verbatim +$ vite paje.trace +\endverbatim + +Once the file is opened in ViTE interface, we will see the figure as shown below: + +\image html vite_open.png width=60% +\image latex vite_open.png "" width=\textwidth + +We can then click the "No arrows" button in task bar of ViTE interface, to better observe the Gantt diagram that illustrates the start and end dates of the different tasks or activities of a program. + +\image html vite_no_arrow.png width=60% +\image latex vite_no_arrow.png "" width=\textwidth + +In the Gantt diagram, the bar types such as devices (CPU or GPU) are displayed +on the left side. Each task is represented by a horizontal rectangle that spans +the duration of the task. The rectangles are arranged along a timeline axis, +which is shown at the top of the Gantt diagram and represents the overall +duration of the program in milliseconds. The position of the bar along the +timeline shows when the task begins and ends. We can see some long red bars at +the beginning and end of the entire timeline, which represent that the unit is +idle. There are no tasks at these moments, and workers are waiting or in a +sleeping state. + +\subsubsection ZoomingInGantt Zooming in Gantt Diagram + +Then as shown in the following figure, press and hold the left mouse button to +select the area you want to zoom in on. Release the button to view the selected +area, and we can repeat the zoom action multiple times. + +\image html vite_zoom.png width=60% +\image latex vite_zoom.png "" width=\textwidth + +This zoom result is: + +\image html vite_zoom_result.png width=60% +\image latex vite_zoom_result.png "" width=\textwidth + +Right-clicking anywhere on the Gantt diagram restores the previous zoom view. + +One can press and hold the left mouse button inside the top blue bar to select +horizontally, which will horizontally zoom in on all Gantt diagrams within the +selected time range. + +\image html vite_zoom_horiz.png width=60% +\image latex vite_zoom_horiz.png "" width=\textwidth + +This zoom result is: + +\image html vite_zoom_horiz_result.png width=60% +\image latex vite_zoom_horiz_result.png "" width=\textwidth + +\subsubsection ColorsInGantt Colors in Gantt Diagram + +After zooming in, we can observe numerous blocks of varying colors, each block representing a task. Blocks of diverse colors signify different types of tasks. When we double-click on any block, a pop-up window will show related status about that task, such as its type and which worker (CPU/GPU) it belongs to, etc. + +\image html vite_task_state.png width=60% +\image latex vite_task_state.png "" width=\textwidth + +The state information displayed in the pop-up window can be: + +\li Value: refers to a type of task, which can be assigned as a +task name (instead of the default \c unknown) by filling the optional +starpu_codelet::name, or assigning it a performance model. The name can also be +set with the field starpu_task::name or by using \ref STARPU_NAME when calling +starpu_task_insert() +\li Container: refers to a specific worker where the computation was performed, could be CPU or CUDA +\li Type: indicates the type of this block, most often "Worker State" +\li Date: represents a range of dates during which the computation was performed +\li Duration: represents the duration of the computation +\li Footprint: provides the data footprint of the task (used as indexing base for performance models) +\li GFlop: represents the number of Gflop performed during the computation, as set in starpu_task::flops. +\li Iteration: refers to the iteration number of the computation, as set by starpu_iteration_push() at the beginning of submission loops and starpu_iteration_pop() at the end of submission loops +\li JobId: represents a unique identifier for the specific task, as returned by starpu_task_get_job_id() +\li NumaNodes: refers to the NUMA node where the data is stored, the environment variable \ref STARPU_FXT_EVENTS needs to contain \c TASK_VERBOSE_EXTRA, otherwise it will be -1 +\li Params: represents parameters or input/output types and sizes, possibly indicating the dimensions of the matrices +\li Size: represents the size of the data being operated on in bytes +\li Subiteration: represents a sub-iteration number if the computation was part of a larger iteration or loop, as set by starpu_iteration_push() +\li SubmitOrder: represents the order in which the task was submitted by +the application +\li Tag: represents a unique identifier for the task, which can be +set either through starpu_task::tag_id or by using \ref STARPU_TAG or \ref +STARPU_TAG_ONLY when calling starpu_task_insert() +\li X: represents an X-coordinate index of the first data written by the task, which was set by starpu_data_set_coordinates() or +starpu_data_set_coordinates_array() function. We can also get the coordinates of the data with starpu_data_get_coordinates_array() function +\li Y: represents an Y-coordinate index of the first data written by the task, which was set by starpu_data_set_coordinates() or +starpu_data_set_coordinates_array() function. We can also get the coordinates of the data with starpu_data_get_coordinates_array() function +\li Color: represents the color RGB value associated with the task. Tasks +are by default shown in green. To use a different color for every type of +task, we can specify the option -c to starpu_fxt_tool or in \ref +STARPU_GENERATE_TRACE_OPTIONS. Tasks can also be given a specific color by +setting the field starpu_codelet::color or the starpu_task::color. When we call +starpu_task_insert(), we can use ::STARPU_TASK_COLOR to set the color. Colors +are expressed with the following format \c 0xRRGGBB (e.g. \c 0xFF0000 for +red). See basic_examples/task_insert_color for examples on how to assign +colors + +In the shown figure, the set of color as following: +\li Dark green represents GEMM +\li Light green represents SYRK +\li Blue represents TRSM +\li Red indicates that the unit is idle, there are no tasks at the moment, it is currently waiting or in a sleeping state +\li Magenta represents FetchingInput + +To modify the colors in Vite interface, select "Preferences" then "Settings" in +the options bar, and then choose the "States" tab in the newly opened window +to select different colors for different operations, as shown in the figure +below. One has to click the reload button at the top left to reload the trace +with the new colors. + +\image html vite_preferences.png width=60% +\image latex vite_preferences.png "" width=\textwidth + +\subsubsection CurvesInGantt Curves in Gantt Diagram + +We can see that there is a curve below task blocks, which represents the +corresponding GFlop/s. Double-clicking near the curve will display +the current GFlop/s information in a pop-up window (as shown in the +figure). If we only click on the curve, a vertical red line shows up, and we can +read on it the GFlop/s values of all the curves at the same time. + +\image html vite_gflops.png width=60% +\image latex vite_gflops.png "" width=\textwidth + +For GPUs, there are three additional curves above the task blocks that can be double-clicked to open a pop-up window to view information. Let's zoom in on the three curves during the entire execution process as illustrated in the figure: + +\image html vite_gpu_memory_select.png width=60% +\image latex vite_gpu_memory_select.png "" width=\textwidth + +As shown in the figure below, the top curve represents the amount of GPU-managed +memory in MBytes, while the bottom two curves represent the data transfer +between tasks on the CPU and GPU, and between tasks on different GPUs. They +respectively indicate the incoming and outgoing data transfer bandwidth. +By looking at the memory curve, we can observe that the memory usage kept +increasing at first, but due to the reutilization of the allocations by StarPU, +the curve gradually became stable later on. + +\image html vite_gpu_memory.png width=60% +\image latex vite_gpu_memory.png "" width=\textwidth + +\subsubsection StatesInGantt States in Gantt Diagram + +Above these three curves, we can see some blocks which represent driver +copy (see the top of the figure below), i.e. a memory copy. The light green +blocks represent the actual copies, the dark green blocks represent +asynchronous copy submissions, and the burgundy blocks represent allocating and +freeing. Double-clicking on a block allows us to view relevant information in +the pop-up window. + +Here, a couple of issues may show up: + +\li If the "Allocating/Freeing" parts take a long time, it means that StarPU does +not manage to re-use data buffers allocated in the GPU. If you have e.g. a lot +of tiles with different sizes, it may be useful to approximate the allocation +size, by using e.g. starpu_matrix_data_register_allocsize() with the proper nx / +ld / ny, but an allocation size that is rounded up, so that buffers with that +same rounded size can be shared. + +\li If the "Asynchronous copy submission" parts take a long time, it means that +the CPU buffers are not pinned: you need to make sure to use starpu_malloc(), or +starpu_memory_pin() (see \ref CUDA-specificOptimizations) so that the CPU +buffers are pinned so that the GPU driver can efficiently process transfers +asynchronously (in the "Actual copy" part) rather than synchronously (in the +"Asynchronous copy submission" part). + +\image html vite_gpu_curves.png width=60% +\image latex vite_gpu_curves.png "" width=\textwidth + +Below the GPU task blocks and GFlops curve (see the bottom of the figure +above), we can see some other blocks that represent the CPU waiting for the +GPU to complete the task. During time, CPU can do variable actions which are +represented by blocks of different colors, such as: + +\li Dark green represents progressing, it keeps polling for task or data transfer completion +\li Brown-yellow represents scheduling +\li Burgundy represents submitting task +\li Lake blue represents executing, it is executing the application codelet function. Here it is very short because the codelet just submits a kernel asynchronously. +\li Dark blue represents callback +\li Chestnut represents overhead. This state is not supposed to be long, as it represents everything that we did not classify as an operation that is supposed to be long like the operations mentioned above. If you find situations where some overhead is long, this is a bug worth reporting so we can fix it. + +and we can always double-click on the block to view relevant information in the pop-up window. + +\subsubsection TransfersInGantt Transfers in Gantt Diagram + +We can horizontally zoom in on a section of the Gantt diagram, and deselect the "No arrows" option. This will allow us to see a complete process of data transfer, as shown in the following figure: + +\image html vite_trans_arrow.png width=60% +\image latex vite_trans_arrow.png "" width=\textwidth + +In the above figure, we can see a long segment of magenta color in CUDA2_0 task +blocks. At the same time, we can see that there are numerous transfers between +other workers during this time period. This indicates that CUDA2_0 is waiting +for the completion of the data transfers needed by the task it wants to execute. + +\subsubsection SchedulerInGantt Scheduler in Gantt Diagram + +At the top of the entire Gantt diagram, there are three curves that represent +the information of the scheduler. Let's zoom in on the three curves during the +entire execution process as illustrated in the figure below: + +\image html vite_top_curve_select.png width=60% +\image latex vite_top_curve_select.png "" width=\textwidth + +As shown in the figure below, from top to bottom, they respectively indicate the number of submitted uncompleted tasks, the number of ready tasks, and the total GFlop/s for this moment. By double-clicking on the curves, we can view relevant information in the pop-up window. + +\image html vite_top_curve.png width=60% +\image latex vite_top_curve.png "" width=\textwidth + +\subsubsection MainThreadInGantt Main Thread in Gantt Diagram + +At the very bottom of the entire Gantt diagram, we will see a red bar, which represents the main thread waiting for tasks. In front of the red bar (see the figure below), there are some dark red bars, which represent the main thread submitting tasks. + +\image html vite_bottom_curve.png width=60% +\image latex vite_bottom_curve.png "" width=\textwidth + +Below these red bars, we can see some white vertical lines with small circles on top, which represent events. The default events can be either task push or task pop or task wait for all. The application can inject its own events at any desired moment with the function starpu_fxt_trace_user_event() or starpu_fxt_trace_user_event_string(). Similarly, double-clicking on the white bars allows you to see relevant information in the pop-up window. + +\subsubsection StatisticsInGantt Statistics in Gantt Diagram + +To get statistics on the time spent in runtime overhead, we can use the +statistics plugin of ViTE. In the Preferences menu, select Plugins. In "States Type", +select "Worker State". Then click on "Reload" to update the histogram. The red +"Idle" percentages are due to lack of parallelism, the "FetchingInput" +percentages are due to waiting for data transfers. The brown "Overhead" +and "Scheduling" percentages are due to the overhead of the runtime and of the +scheduler. + +\image html vite_worker_state.png width=60% +\image latex vite_worker_state.png "" width=\textwidth + +\subsection CreatingADAGWithGraphviz Creating a DAG With Graphviz + +Another generated trace file is a task graph described using the DOT +language. The file, created in the current directory, is named +dag.dot file in the current directory. +It is possible to get a graphical output of the graph by using the +graphviz library: + + +\verbatim +$ dot -Tpdf dag.dot -o output.pdf +\endverbatim + +\subsection TraceTaskDetails Getting Task Details + +Another generated trace file gives details on the executed tasks. The +file, created in the current directory, is named tasks.rec. This file +is in the \c recutils format, i.e. Field: value lines, and empty lines are used to +separate each task. This can be used as a convenient input for various ad-hoc +analysis tools. By default, it only contains information about the actual +execution. Performance models can be obtained by running +starpu_tasks_rec_complete on it: + +\verbatim +$ starpu_tasks_rec_complete tasks.rec tasks2.rec +\endverbatim + +which will add EstimatedTime lines which contain the performance +model-estimated time (in µs) for each worker starting from 0. Since it needs +the performance models, it needs to be run the same way as the application +execution, or at least with STARPU_HOSTNAME set to the hostname of the +machine used for execution, to get the performance models of that machine. + +Another possibility is to obtain the performance models as an auxiliary perfmodel.rec file, by using the starpu_perfmodel_recdump utility: + +\verbatim +$ starpu_perfmodel_recdump tasks.rec -o perfmodel.rec +\endverbatim + +One can also simply call starpu_task_get_name() to get the name of a task. + +\subsection TraceSchedTaskDetails Getting Scheduling Task Details + +The file, sched_tasks.rec, created in the current directory, +in the \c recutils format, gives information about the tasks +scheduling, and lists the push and pop actions of the scheduler. For +each action, it gives the timestamp, the job priority and the job id. +Each action is separated from the next one by empty lines. The job id associated with the task can be retrieved by calling starpu_task_get_job_id(). + +\subsection MonitoringActivity Monitoring Activity + +Another generated trace file is an activity trace. The file, created +in the current directory, is named activity.data. A profile of +the application showing the activity of StarPU during the execution of +the program can be generated: + +\verbatim +$ starpu_workers_activity activity.data +\endverbatim + +This will create a file named activity.eps in the current directory. +This picture is composed of two parts. +The first part shows the activity of the different workers. The green sections +indicate which proportion of the time was spent executed kernels on the +processing unit. The red sections indicate the proportion of time spent in +StarPU: an important overhead may indicate that the granularity may be too +low, and that bigger tasks may be appropriate to use the processing unit more +efficiently. The black sections indicate that the processing unit was blocked +because there was no task to process: this may indicate a lack of parallelism, +which may be alleviated by creating more tasks when it is possible. + +The second part of the picture activity.eps is a graph showing the +evolution of the number of tasks available in the system during the execution. +Ready tasks are shown in black, and tasks that are submitted but not +schedulable yet are shown in grey. + +\subsection Animation Getting Modular Schedular Animation + +When using modular schedulers (i.e. schedulers which use a modular architecture, +and whose name start with "modular-"), the call to +starpu_fxt_tool will also produce a trace.html file +which can be viewed in a javascript-enabled web browser. It shows the +flow of tasks between the components of the modular scheduler. + +\subsection TimeBetweenSendRecvDataUse Analyzing Time Between MPI Data Transfer and Use by Tasks + +starpu_fxt_tool produces a file called comms.rec which describes all +MPI communications. The script starpu_send_recv_data_use.py uses this file +and tasks.rec in order to produce two graphs: the first one shows durations +between the reception of data and their usage by a task and the second one plots the +same graph but with elapsed time between send and usage of a data by the sender. + +\image html trace_recv_use.png +\image latex trace_recv_use.png "" width=\textwidth + +\image html trace_send_use.png +\image latex trace_send_use.png "" width=\textwidth + + +\subsection NumberEvents Number of events in trace files + +When launched with the option -number-events, starpu_fxt_tool will +produce a file named number_events.data. This file contains the number of +events for each event type. Events are represented with their key. To convert +event keys to event names, you can use the starpu_fxt_number_events_to_names.py +script: + +\verbatim +$ starpu_fxt_number_events_to_names.py number_events.data +\endverbatim + +The number of recorded events (and thus the performance overhead introduced by +tracing) can be reduced by setting which categories of events to record with +the environment variable \ref STARPU_FXT_EVENTS. + + +\subsection LimitingScopeTrace Limiting The Scope Of The Trace + +For computing statistics, it is useful to limit the trace to a given portion of +the time of the whole execution. This can be achieved by calling + +\code{.c} +starpu_fxt_autostart_profiling(0) +\endcode + +before calling starpu_init(), to +prevent tracing from starting immediately. Then + +\code{.c} +starpu_fxt_start_profiling(); +\endcode + +and + +\code{.c} +starpu_fxt_stop_profiling(); +\endcode + +can be used around the portion of code to be traced. This will show up as marks +in the trace, and states of workers will only show up for that portion. + +\section PerformanceOfCodelets Performance Of Codelets + +After calibrating performance models of codelets (see \ref +PerformanceModelExample and \ref PerformanceModelCalibration), +they can be examined by using the tool starpu_perfmodel_display: + +\verbatim +$ starpu_perfmodel_display -l +file: +file: +file: +file: +file: +\endverbatim + +Here, the codelets of the example lu are available. We can examine the +performance of the kernel 22 (in micro-seconds), which is history-based: + +\verbatim +$ starpu_perfmodel_display -s starpu_slu_lu_model_gemm +performance model for cpu +# hash size mean dev n +57618ab0 19660800 2.851069e+05 1.829369e+04 109 +performance model for cuda_0 +# hash size mean dev n +57618ab0 19660800 1.164144e+04 1.556094e+01 315 +performance model for cuda_1 +# hash size mean dev n +57618ab0 19660800 1.164271e+04 1.330628e+01 360 +performance model for cuda_2 +# hash size mean dev n +57618ab0 19660800 1.166730e+04 3.390395e+02 456 +\endverbatim + +We can see that for the given size, over a sample of a few hundreds of +execution, the GPUs are about 20 times faster than the CPUs (numbers are in +us). The standard deviation is extremely low for the GPUs, and less than 10% for +CPUs. + +This tool can also be used for regression-based performance models. It will then +display the regression formula, and in the case of non-linear regression, the +same performance log as for history-based performance models: + +\verbatim +$ starpu_perfmodel_display -s non_linear_memset_regression_based +performance model for cpu_impl_0 + Regression : #sample = 1400 + Linear: y = alpha size ^ beta + alpha = 1.335973e-03 + beta = 8.024020e-01 + Non-Linear: y = a size ^b + c + a = 5.429195e-04 + b = 8.654899e-01 + c = 9.009313e-01 +# hash size mean stddev n +a3d3725e 4096 4.763200e+00 7.650928e-01 100 +870a30aa 8192 1.827970e+00 2.037181e-01 100 +48e988e9 16384 2.652800e+00 1.876459e-01 100 +961e65d2 32768 4.255530e+00 3.518025e-01 100 +... +\endverbatim + +The same can also be achieved by using StarPU's library API, see +\ref API_Performance_Model and notably the function +starpu_perfmodel_load_symbol(). The source code of the tool +starpu_perfmodel_display can be a useful example. + +An XML output can also be printed by using the -x option: +\verbatim +$ tools/starpu_perfmodel_display -x -s non_linear_memset_regression_based + + + + + + + + + + + + + + + + + + +\endverbatim + +The tool starpu_perfmodel_plot can be used to draw performance +models. It writes a .gp file in the current directory, to be +run with the tool gnuplot, which generates the corresponding +curve both in postscript and png format. + +\verbatim +$ tools/starpu_perfmodel_plot -s non_linear_memset_regression_based +$ gnuplot starpu_non_linear_memset_regression_based.gp +$ gv starpu_non_linear_memset_regression_based.eps +$ geeqie starpu_non_linear_memset_regression_based.png +\endverbatim + +\image html starpu_non_linear_memset_regression_based.png +\image latex starpu_non_linear_memset_regression_based.png "" width=\textwidth + +When the field starpu_task::flops is set (or \ref STARPU_FLOPS is passed to +starpu_task_insert()), starpu_perfmodel_plot can directly draw a GFlops/s +curve, by simply adding the -f option: + +\verbatim +$ starpu_perfmodel_plot -f -s chol_model_potrf +\endverbatim + +This will however disable displaying the regression model, for which we can not +compute GFlops/s. + +\image html starpu_chol_model_11_type.png +\image latex starpu_chol_model_11_type.png "" width=\textwidth + +When the FxT trace file prof_file_something has been generated, it is possible to +get a profiling of each codelet by calling: + +\verbatim +$ starpu_fxt_tool -i /tmp/prof_file_something +$ starpu_codelet_profile distrib.data codelet_name +\endverbatim + +This will create profiling data files, and a distrib.data.gp file in the current +directory, which draws the distribution of codelet time over the application +execution, according to data input size. + +\image html distrib_data.png +\image latex distrib_data.png "" width=\textwidth + +This is also available in the tool starpu_perfmodel_plot, by passing it +the fxt trace: + +\verbatim +$ starpu_perfmodel_plot -s non_linear_memset_regression_based -i /tmp/prof_file_foo_0 +\endverbatim + +It will produce a .gp file which contains both the performance model +curves, and the profiling measurements. + +\image html starpu_non_linear_memset_regression_based_2.png +\image latex starpu_non_linear_memset_regression_based_2.png "" width=\textwidth + +If you have the statistical tool R installed, you can additionally use + +\verbatim +$ starpu_codelet_histo_profile distrib.data +\endverbatim + +Which will create one .pdf file per codelet and per input size, showing a +histogram of the codelet execution time distribution. + +\image html distrib_data_histo.png +\image latex distrib_data_histo.png "" width=\textwidth + +\section EnergyOfCodelets Energy Of Codelets + +A performance model of the energy of codelets can also be recorded thanks to +the starpu_codelet::energy_model field of the starpu_codelet structure. StarPU usually cannot +record this automatically, since the energy measurement probes are usually not +fine-grain enough. It is however possible to measure it by writing a program +that submits batches of tasks, let StarPU measure the energy requirement of +the batch, and compute an average, see \ref MeasuringEnergyandPower . + +The energy performance model can then be displayed in Joules with +starpu_perfmodel_display just like the time performance model. The +starpu_perfmodel_plot needs an extra -e option to display the +proper unit in the graph: + +\verbatim +$ tools/starpu_perfmodel_plot -e -s non_linear_memset_regression_based_energy +$ gnuplot starpu_non_linear_memset_regression_based_energy.gp +$ gv starpu_non_linear_memset_regression_based_energy.eps +\endverbatim + +\image html starpu_non_linear_memset_regression_based_energy.png +\image latex starpu_non_linear_memset_regression_based_energy.png "" width=\textwidth + +The -f option can also be used to display the performance in terms of GFlops/s/W, i.e. the efficiency: + +\verbatim +$ tools/starpu_perfmodel_plot -f -e -s non_linear_memset_regression_based_energy +$ gnuplot starpu_gflops_non_linear_memset_regression_based_energy.gp +$ gv starpu_gflops_non_linear_memset_regression_based_energy.eps +\endverbatim + +\image html starpu_gflops_non_linear_memset_regression_based_energy.png +\image latex starpu_gflops_non_linear_memset_regression_based_energy.png "" width=\textwidth + +We clearly see here that it is much more energy-efficient to stay in the L3 cache. + +One can combine the two time and energy performance models to draw Watts: + +\verbatim +$ tools/starpu_perfmodel_plot -se non_linear_memset_regression_based non_linear_memset_regression_based_energy +$ gnuplot starpu_power_non_linear_memset_regression_based.gp +$ gv starpu_power_non_linear_memset_regression_based.eps +\endverbatim + +\image html starpu_power_non_linear_memset_regression_based.png +\image latex starpu_power_non_linear_memset_regression_based.png "" width=\textwidth + +\section DataTrace Data trace and tasks length + +It is possible to get statistics about tasks length and data size by using : +\verbatim +$ starpu_fxt_data_trace filename [codelet1 codelet2 ... codeletn] +\endverbatim +Where filename is the FxT trace file and codeletX the names of the codelets you +want to profile (if no names are specified, starpu_fxt_data_trace will profile them all). +This will create a file, data_trace.gp which +can be executed to get a .eps image of these results. On the image, each point represents a +task, and each color corresponds to a codelet. + +\image html data_trace.png +\image latex data_trace.png "" width=\textwidth + +\section TraceStatistics Trace Statistics + +More than just codelet performance, it is interesting to get statistics over all +kinds of StarPU states (allocations, data transfers, etc.). This is particularly +useful to check what may have gone wrong in the accuracy of the SimGrid +simulation. + +This requires the R statistical tool, with the plyr, +ggplot2 and data.table packages. If your system +distribution does not have packages for these, one can fetch them from +CRAN: + +\verbatim +$ R +> install.packages("plyr") +> install.packages("ggplot2") +> install.packages("data.table") +> install.packages("knitr") +\endverbatim + +The pj_dump tool from pajeng is also needed (see +https://github.com/schnorr/pajeng) + +One can then get textual or .csv statistics over the trace states: + +\verbatim +$ starpu_paje_state_stats -v native.trace simgrid.trace +"Value" "Events_native.csv" "Duration_native.csv" "Events_simgrid.csv" "Duration_simgrid.csv" +"Callback" 220 0.075978 220 0 +"chol_model_potrf" 10 565.176 10 572.8695 +"chol_model_trsm" 45 9184.828 45 9170.719 +"chol_model_gemm" 165 64712.07 165 64299.203 +$ starpu_paje_state_stats native.trace simgrid.trace +\endverbatim + +An other way to get statistics of StarPU states (without installing R and +pj_dump) is to use the starpu_trace_state_stats.py script, which parses the +generated trace.rec file instead of the paje.trace file. The output is similar +to the previous script, but it doesn't need any dependencies. + +The different prefixes used in trace.rec are: + +\verbatim +E: Event type +N: Event name +C: Event category +W: Worker ID +T: Thread ID +S: Start time +\endverbatim + +Here's an example on how to use it: + +\verbatim +$ starpu_trace_state_stats.py trace.rec | column -t -s "," +"Name" "Count" "Type" "Duration" +"Callback" 220 Runtime 0.075978 +"chol_model_potrf" 10 Task 565.176 +"chol_model_trsm" 45 Task 9184.828 +"chol_model_gemm" 165 Task 64712.07 +\endverbatim + +starpu_trace_state_stats.py can also be used to compute the different +efficiencies. Refer to the usage description to show some examples. + +And one can plot histograms of execution times, of several states, for instance: +\verbatim +$ starpu_paje_draw_histogram -n chol_model_potrf,chol_model_trsm,chol_model_gemm native.trace simgrid.trace +\endverbatim + +and see the resulting pdf file: + +\image html paje_draw_histogram.png +\image latex paje_draw_histogram.png "" width=\textwidth + +A quick statistical report can be generated by using: + +\verbatim +$ starpu_paje_summary native.trace simgrid.trace +\endverbatim + +it includes gantt charts, execution summaries, as well as state duration charts +and time distribution histograms. + +Other external Paje analysis tools can be used on these traces, one just needs +to sort the traces by timestamp order (which not guaranteed to make recording +more efficient): + +\verbatim +$ starpu_paje_sort paje.trace +\endverbatim + +\section PapiCounters PAPI counters + +Performance counter values could be obtained from the PAPI framework if +./configure detected the libpapi. + +In Debian, the libpapi-dev package provides the required +files. Additionally, the papi-tools package contains a set of useful tools, for example +papi_avail to see which counters are available. + +To be able to use Papi counters, one may need to reduce the level of the kernel +parameter kernel.perf_event_paranoid to 2 or below. See +https://www.kernel.org/doc/html/latest/admin-guide/perf-security.html for the +security impact of this parameter. + +Then one has to set the \ref STARPU_PROFILING environment variable to 1 and +specify which events to record with the \ref STARPU_PROF_PAPI_EVENTS +environment variable. For instance: + +\verbatim +export STARPU_PROFILING=1 STARPU_PROF_PAPI_EVENTS="PAPI_TOT_INS PAPI_TOT_CYC" +\endverbatim + +The comma can also be used to separate events to monitor. + +In the current simple implementation, only CPU tasks have their events measured +and require CPUs that support the PAPI events. It is important to note that not +all events are available on all systems, and general PAPI recommendations +should be followed. + +The counter values can be accessed using the profiling interface: +\code{.c} +task->profiling_info->papi_values +\endcode +Also, it can be accessed and/or saved with tracing when using \ref STARPU_FXT_TRACE. With the use of starpu_fxt_tool +the file papi.rec is generated containing the following triple: + +\verbatim +Task Id +Event Id +Value +\endverbatim + +External tools like rec2csv can be used to convert this rec file to a csv file, where each +line represents a value for an event for a task. + +\section TheoreticalLowerBoundOnExecutionTime Theoretical Lower Bound On Execution Time + +StarPU can record a trace of what tasks are needed to complete the +application, and then, by using a linear system, provide a theoretical lower +bound of the execution time (i.e. with an ideal scheduling). + +The computed bound is not really correct when not taking into account +dependencies, but for an application which have enough parallelism, it is very +near to the bound computed with dependencies enabled (which takes a huge lot +more time to compute), and thus provides a good-enough estimation of the ideal +execution time. + +Then there is an example to show how to use this. + +For kernels with history-based performance models (and provided that +they are completely calibrated), StarPU can very easily provide a +theoretical lower bound for the execution time of a whole set of +tasks. See for instance examples/lu/lu_example.c: before +submitting tasks, call the function starpu_bound_start(), and after +complete execution, call starpu_bound_stop(). +starpu_bound_print_lp() or starpu_bound_print_mps() can then be used +to output a Linear Programming problem corresponding to the schedule +of your tasks. Or starpu_bound_print_dot() can be used to print a task dependency graph in the DOT format. Run it through lp_solve or any other linear +programming solver, and that will give you a lower bound for the total +execution time of your tasks. If StarPU was compiled with the library +glpk installed, starpu_bound_compute() can be used to solve it +immediately and get the optimized minimum, in ms. Its parameter +integer allows deciding whether integer resolution should be +computed and returned. Besides to solve it immediately and get the optimized minimum starpu_bound_print() can also print the statistics of actual execution and theoretical upper bound. + +The deps parameter tells StarPU whether to take tasks, implicit +data, and tag dependencies into account. Tags released in a callback +or similar are not taken into account, only tags associated with a task are. +It must be understood that the linear programming +problem size is quadratic with the number of tasks and thus the time to solve it +will be very long, it could be minutes for just a few dozen tasks. You should +probably use lp_solve -timeout 1 test.pl -wmps test.mps to convert the +problem to MPS format and then use a better solver, glpsol might be +better than lp_solve for instance (the --pcost option may be +useful), but sometimes doesn't manage to converge. cbc might look +slower, but it is parallel. For lp_solve, be sure to try at least all the +-B options. For instance, we often just use lp_solve -cc -B1 -Bb +-Bg -Bp -Bf -Br -BG -Bd -Bs -BB -Bo -Bc -Bi , and the -gr option can +also be quite useful. The resulting schedule can be observed by using +the tool starpu_lp2paje, which converts it into the Paje +format. + +Data transfer time can only be taken into account when deps is set. Only +data transfers inferred from implicit data dependencies between tasks are taken +into account. Other data transfers are assumed to be completely overlapped. + +Setting deps to 0 will only take into account the actual computations +on processing units. However, it still properly takes into account the varying +performances of kernels and processing units, which is quite more accurate than +just comparing StarPU performances with the fastest of the kernels being used. + +The prio parameter tells StarPU whether to simulate taking into account +the priorities as the StarPU scheduler would, i.e. schedule prioritized +tasks before less prioritized tasks, to check to which extend this results +to a less optimal solution. This increases even more computation time. + +\section starvz Trace visualization with StarVZ + +Creating views with StarVZ (see: https://github.com/schnorr/starvz) is +made up of two steps. The initial stage consists of a pre-processing +of the traces generated by the application, while the second one +consists of the analysis itself and is carried out with R packages' +aid. StarVZ is available at CRAN +(https://cran.r-project.org/package=starvz) and depends on \c pj_dump +(from \c pajeng) and \c rec2csv (from \c recutils). + +To download and install StarVZ, it is necessary to have \c R, +\c pajeng, and \c recutils: + +\verbatim +# For pj_dump and rec2csv +apt install -y pajeng recutils + +# For R +apt install -y r-base libxml2-dev libssl-dev libcurl4-openssl-dev libgit2-dev libboost-dev +\endverbatim + +To install the StarVZ, the following command can be used: + +\verbatim +echo "install.packages('starvz', repos = 'https://cloud.r-project.org')" | R --vanilla +\endverbatim + +To generate traces from an application, it is necessary to set \ref STARPU_GENERATE_TRACE +and build StarPU with FxT. Then, StarVZ can be used on a folder with +StarPU FxT traces to produce a default view: + +\verbatim +export PATH=$(Rscript -e 'cat(system.file("tools/", package = "starvz"), sep="\n")'):$PATH + +starvz /foo/path-to-fxt-files +\endverbatim + +An example of default view: + +\image html starvz_visu.png +\image latex starvz_visu.png "" width=\textwidth + +One can also use existing trace files (\c paje.trace, \c tasks.rec, +\c data.rec, \c papi.rec and \c dag.dot) skipping the StarVZ internal call to +starpu_fxt_tool with: +\verbatim +starvz --use-paje-trace /foo/path-to-trace-files +\endverbatim + +Alternatively, each StarVZ step can be executed separately. Step 1 can +be used on a folder with: +\verbatim +starvz -1 /foo/path-to-fxt-files +\endverbatim + +Then the second step can be +executed directly in R. StarVZ enables a set of different plots that +can be configured on a .yaml file. A default file is provided +(default.yaml); also, the options can be changed directly in +R. + +\verbatim +library(starvz) +library(dplyr) + +dtrace <- starvz_read("./", selective = FALSE) + +# show idleness ratio +dtrace$config$st$idleness = TRUE + +# show ABE bound +dtrace$config$st$abe$active = TRUE + +# find the last task with dplyr +dtrace$config$st$tasks$list = dtrace$Application %>% filter(End == max(End)) %>% .$JobId +# show last task dependencies +dtrace$config$st$tasks$active = TRUE +dtrace$config$st$tasks$levels = 50 + +plot <- starvz_plot(dtrace) +\endverbatim + +An example of visualization follows: + +\image html starvz_visu_r.png +\image latex starvz_visu_r.png "" width=\textwidth + +\section EclipsePlugin StarPU Eclipse Plugin + +The StarPU Eclipse Plugin provides the ability to generate the +different traces directly from the Eclipse IDE. + +\subsection EclipseInstallation Eclipse Installation + +Download the Eclipse installer from +https://www.eclipse.org/downloads/packages/installer. When you run the +installer, click on Eclipse IDE for Java Developers to start +the installation process. + +\image html eclipse_installer.png +\image latex eclipse_installer.png "" width=10cm + +To be able to develop C/C++ applications, you need to install the CDT +plugin. To do so, go to the Help dropdown menu at the top of the +Eclipse window, choose Install New Software .... In the new +window, enter the URL +http://download.eclipse.org/tools/cdt/releases/9.10 +into the box Work with and press the return key. + +\image html eclipse_install_cdt.png +\image latex eclipse_install_cdt.png "" width=10cm + +You need then to select CDT Main Features, then click the +button Next twice, accept the terms of the license, and click +the button Finish. Eclipse will ask you to restart. + +To be able to compile the plugin, you need to install the plugin +development environment (PDE). To do so, go to the menu +Help, choose Eclipse Marketplace.... In the new +window, enter PDE into the box Find and press the +return key. + +\image html eclipse_install_pde.png +\image latex eclipse_install_pde.png "" width=10cm + +You can then click on the button Install of the Eclipse +PDE latest. You may need to confirm the installation, then accept +the terms of the license, and finally restart the Eclipse IDE. + +The installation is now done. + +\subsection PluginInstallation StarPU Eclipse Plugin Compilation and Installation + +StarPU can now be compiled and installed with its Eclipse plugin. To +do so, you first need to configure StarPU with the option \ref +enable-eclipse-plugin "--enable-eclipse-plugin". The Eclipse IDE +executable \c eclipse must be in your \c PATH. + +\verbatim +export PATH=$HOME/usr/local/eclipse/java-2021-03/eclipse:$PATH +mkdir build +cd build +../configure --prefix=$HOME/usr/local/starpu --enable-eclipse-plugin +make +make install +\endverbatim + +The StarPU Eclipse plugin is installed in the directory \c dropins. + +\verbatim +$ ls $HOME/usr/local/eclipse/java-2021-03/eclipse/dropins +StarPU_1.0.0.202105272056.jar +\endverbatim + +In the next section, we will show you how to use the plugin. + +\subsection PluginInstruction StarPU Eclipse Plugin Instruction + +Once StarPU has been configured and installed with its Eclipse plugin, you first +need to set up your environment for StarPU. + +\verbatim +cd $HOME/usr/local/starpu +source ./bin/starpu_env +\endverbatim + +To generate traces from the application, it is necessary to set \ref +STARPU_FXT_TRACE to 1. + +\verbatim +export STARPU_FXT_TRACE=1 +\endverbatim + +The eclipse workspace together with an example is available in \c +lib/starpu/eclipse-plugin. + +\verbatim +cd ./lib/starpu/eclipse-plugin +eclipse -data workspace +\endverbatim + +You can then open the file \c hello/hello.c, and build the application +by pressing \c Ctrl-B. + +\image html eclipse_hello_build.png +\image latex eclipse_hello_build.png "" width=\textwidth + +The application can now be executed. + +\image html eclipse_hello_run.png +\image latex eclipse_hello_run.png "" width=\textwidth + +After executing the C/C++ StarPU application, one can use the StarPU +plugin to generate and visualise the task graph of the application. +The StarPU plugin eclipse is either available through the icons in the +upper toolbar, or from the dropdown menu \c StarPU. + +\image html eclipse_hello_plugin.png +\image latex eclipse_hello_plugin.png "" width=\textwidth + +To start, one first need to run the StarPU FxT tool, either through +the \c FxT icon of the toolbar, or from the menu \c StarPU / StarPU +FxT Tool. This will call the tool \c starpu_fxt_tool to generate +traces for your application execution. + +A message dialog box is displayed to confirm the generation of the +different traces. + +\image html eclipse_hello_fxt.png +\image latex eclipse_hello_fxt.png "" width=\textwidth + +One of the generated files is a Paje trace which can be viewed with +ViTE, a trace explorer. To open and visualise the file \c paje.trace with +ViTE, one can select the second command of the StarPU menu, which is +named Generate Paje Trace, or click on the second icon named +Trace in the toolbar. + +\image html eclipse_hello_paje_trace.png +\image latex eclipse_hello_paje_trace.png "" width=\textwidth + +\image html eclipse_hello_vite.png +\image latex eclipse_hello_vite.png "" width=\textwidth + +Another generated trace file is a task graph described using the DOT +language. It is possible to get a graphical output of the graph by +calling the graphviz library. To do this, one can click on the +third command of StarPU menu. A task graph of the application in +the \c png format is then generated. + +\image html eclipse_hello_graph.png +\image latex eclipse_hello_graph.png "" width=\textwidth + +In StarPU eclipse plugin, one can display the graph task directly from +eclipse, or through a web browser. To do this, there is another +command named Generate SVG graph in the StarPU menu or HGraph +in the toolbar of eclipse. + +From the HTML file, you can see the graph task, and by clicking on a +task name, it will open the C file in which the task submission was +called (if you have an editor which understands the syntax \c +href="file.c#123"). + +\image html eclipse_hello_svg_graph.png +\image latex eclipse_hello_svg_graph.png "" width=\textwidth + +\image html eclipse_hello_hgraph.png +\image latex eclipse_hello_hgraph.png "" width=\textwidth + +\section MemoryFeedback Memory Feedback + +It is possible to enable memory statistics. To do so, you need to pass +the option \ref enable-memory-stats "--enable-memory-stats" when running configure. It is then +possible to call the function starpu_data_display_memory_stats() to +display statistics about the current data handles registered within StarPU. + +Moreover, statistics will be displayed at the end of the execution on +data handles which have not been cleared out. This can be disabled by +setting the environment variable \ref STARPU_MEMORY_STATS to 0. + +For example, by adding a call to the function +starpu_data_display_memory_stats() in the fblock example before +unpartitioning the data, one will get something +similar to: + +\verbatim +$ STARPU_MEMORY_STATS=1 ./examples/filters/fblock +... +#--------------------- +Memory stats : +#------- +Data on Node #2 +#----- +Data : 0x5562074e8670 +Size : 144 + +#-- +Data access stats +/!\ Work Underway +Node #0 + Direct access : 0 + Loaded (Owner) : 0 + Loaded (Shared) : 0 + Invalidated (was Owner) : 1 + +Node #2 + Direct access : 0 + Loaded (Owner) : 1 + Loaded (Shared) : 0 + Invalidated (was Owner) : 0 + +#------- +Data on Node #3 +#----- +Data : 0x5562074e9338 +Size : 96 + +#-- +Data access stats +/!\ Work Underway +Node #0 + Direct access : 0 + Loaded (Owner) : 0 + Loaded (Shared) : 0 + Invalidated (was Owner) : 1 + +Node #3 + Direct access : 0 + Loaded (Owner) : 1 + Loaded (Shared) : 0 + Invalidated (was Owner) : 0 + + +#--------------------- +... +\endverbatim + +\section DataStatistics Data Statistics + +Different data statistics can be displayed at the end of the execution +of the application. To enable them, you need to define the environment +variable \ref STARPU_ENABLE_STATS. When calling +starpu_shutdown() various statistics will be displayed, +execution, MSI cache statistics, allocation cache statistics, and data +transfer statistics. The display can be disabled by setting the +environment variable \ref STARPU_STATS to 0. If the environment variable +\ref STARPU_BUS_STATS is defined, you can call starpu_profiling_bus_helper_display_summary() +to display statistics about the bus. If the environment variable +\ref STARPU_WORKER_STATS is defined, you can call starpu_profiling_worker_helper_display_summary() +to display statistics about the workers. You can also call starpu_display_stats() which call both starpu_profiling_bus_helper_display_summary() and starpu_profiling_worker_helper_display_summary() at the same time. + +\verbatim +$ ./examples/cholesky/cholesky_tag +Computation took (in ms) +518.16 +Synthetic GFlops : 44.21 +#--------------------- +MSI cache stats : +TOTAL MSI stats hit 1622 (66.23 %) miss 827 (33.77 %) +... +\endverbatim + +\verbatim +$ STARPU_STATS=0 ./examples/cholesky/cholesky_tag +Computation took (in ms) +518.16 +Synthetic GFlop/s : 44.21 +\endverbatim + +// TODO: data transfer stats are similar to the ones displayed when +// setting STARPU_BUS_STATS + + + +\section TraceMpi Tracing MPI applications + +When an MPI execution is traced, especially if the execution is on several +nodes, clock synchronization issues can appear. One may notice them mainly on +communications (they are received before they are sent, for instance). + +Each processor can call the function starpu_profiling_set_id() to set +the ID used for the profiling trace filename. This function can be useful +when executing an MPI program on several nodes, as it enables each processor +to set a unique ID that helps to differentiate its trace file from the files +generated by other processors. By doing this, it becomes easier to analyze and compare +the profiling results of each processor separately, which is particularly helpful for large-scale parallel applications. + +By default, StarPU does two MPI barriers with all MPI processes: one at the +beginning of the application execution and one at the end. Then, \c +starpu_fxt_tool considers all processes leave the barriers at the exact same +time, which makes two points for time synchronization between MPI processes. + +However, a simple MPI barrier can be not precise enough, because the assumption +_all processes leave the barriers at the exact same time_ is in reality false. +To have a more precise barrier, one may use the [mpi_sync_clocks +library](https://gitlab.inria.fr/pm2/pm2/-/tree/master/mpi_sync_clocks) +(automatically provided when StarPU is built with NewMadeleine, but it can also +be used with other MPI libraries). It provides a *synchronized* barrier, which +aims at actually releasing all processes at the exact same time. +Unfortunately, the gained precision costs some time (several seconds per +barrier), that is why one can disable this precise synchronization with the +environment variable \ref STARPU_MPI_TRACE_SYNC_CLOCKS set to \c 0, and use the +faster MPI barrier instead. + +\section VerboseTraces Verbose Traces + +Traces can also be inspected by hand by using the tool fxt_print, for instance: + +\verbatim +$ fxt_print -o -f /tmp/prof_file_something +\endverbatim + +Timings are in nanoseconds (while timings as seen in ViTE are in milliseconds). + +*/ diff --git a/doc/doxygen/chapters/starpu_performances/online_performance_tools.doxy b/doc/doxygen/chapters/starpu_performances/online_performance_tools.doxy new file mode 100644 index 0000000..5521457 --- /dev/null +++ b/doc/doxygen/chapters/starpu_performances/online_performance_tools.doxy @@ -0,0 +1,877 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/*! \page OnlinePerformanceTools Online Performance Tools + +\section On-linePerformanceFeedback On-line Performance Feedback + +Some examples which apply online performance monitoring are in the directory tests/perfmodels/ + +\subsection EnablingOn-linePerformanceMonitoring Enabling On-line Performance Monitoring + +In order to enable online performance monitoring, the application can +call starpu_profiling_status_set() with the parameter +::STARPU_PROFILING_ENABLE. It is possible to detect whether monitoring +is already enabled or not by calling starpu_profiling_status_get(). +Enabling monitoring also reinitialize all previously collected +feedback. The environment variable \ref STARPU_PROFILING can also be +set to 1 to achieve the same effect. The function +starpu_profiling_init() can also be called during the execution to +reinitialize performance counters and to start the profiling if the +environment variable \ref STARPU_PROFILING is set to 1. + +Likewise, performance monitoring is stopped by calling +starpu_profiling_status_set() with the parameter +::STARPU_PROFILING_DISABLE. Note that this does not reset the +performance counters so that the application may consult them later +on. + +More details about the performance monitoring API are available in \ref API_Profiling. + +\subsection Per-taskFeedback Per-task Feedback + +If profiling is enabled, a pointer to a structure +starpu_profiling_task_info is put in the field +starpu_task::profiling_info when a task terminates. This structure is +automatically destroyed when the task structure is destroyed, either +automatically or by calling starpu_task_destroy(). + +The structure starpu_profiling_task_info indicates the date when the +task was submitted (starpu_profiling_task_info::submit_time), started +(starpu_profiling_task_info::start_time), and terminated +(starpu_profiling_task_info::end_time), relative to the initialization +of StarPU with starpu_init(). User can call starpu_timing_timespec_delay_us() +to calculate the time elapsed between start time and end time in microseconds. +It also specifies the identifier of the worker +that has executed the task (starpu_profiling_task_info::workerid). +These dates are stored as timespec structures which users may convert +into micro-seconds using the helper function +starpu_timing_timespec_to_us(). User can call starpu_worker_get_current_task_exp_end() to get the date when the current task is expected to be finished. + +When \ref ::STARPU_ENERGY_PROFILING is enabled, +starpu_profiling_task_info::energy_consumed, provides the amount of Joules used +by the task. + +It is worth noting that the application may directly access this structure from +the callback executed at the end of the task. The structure starpu_task +associated to the callback currently being executed is indeed accessible with +the function starpu_task_get_current(). + +\subsection Per-codeletFeedback Per-codelet Feedback + +The field starpu_codelet::per_worker_stats is +an array of counters. Unless the \ref STARPU_CODELET_PROFILING environment +variable was set to 0, the i-th entry of the array is incremented every time a +task implementing the codelet is executed on the i-th worker. +This array is not reinitialized when profiling is enabled or disabled. +The function starpu_codelet_display_stats() can be used to display the execution statistics of a specific codelet. + +\subsection Per-workerFeedback Per-worker Feedback + +The second argument returned by the function +starpu_profiling_worker_get_info() is a structure +starpu_profiling_worker_info that gives statistics about the specified +worker. This structure specifies: + +- In starpu_profiling_worker_info::start_time, when StarPU started collecting +profiling information for that worker. + +- In starpu_profiling_worker_info::total_time, the duration of the profiling +measurement interval. + +- In starpu_profiling_worker_info::executed_tasks, the number of tasks that +were executed while profiling was enabled. + +It also specifies how much time was spent in various states (executing a task, +executing a callback, waiting for a data transfer to complete, etc.). Since +these can happen at the same time (waiting for a data transfer while executing +the previous tasks, and scheduling the next task), we provide two views. Firstly, +the "all" view: + +- In starpu_profiling_worker_info::all_executing_time, the time spent executing +kernels, thus real useful work. + +- In starpu_profiling_worker_info::all_callback_time, the time spent executing +application callbacks. + +- In starpu_profiling_worker_info::all_waiting_time, the time spent waiting for data +transfers. + +- In starpu_profiling_worker_info::all_sleeping_time, the time spent during which +there was no task to be executed, i.e. lack of parallelism. + +- In starpu_profiling_worker_info::all_scheduling_time, the time spent scheduling +tasks. + +But these times overlap, notably with GPUs the schedulers runs while tasks +are getting executed. Another view is the "split" view, which eliminates the +overlapping, by considering for instance that it does not matter what is +happening while tasks are getting executed, that should be accounted for +"executing" time, and e.g. only the scheduling periods that happen while no task +is getting executed should be accounted in "scheduling" time. More precisely: + +- In starpu_profiling_worker_info::executing_time, the time spent executing +kernels, normally equal to starpu_profiling_worker_info::all_executing_time. + +- In starpu_profiling_worker_info::callback_time, the time spent executing +application callbacks while not executing a task. + +- In starpu_profiling_worker_info::waiting_time, the time spent waiting for data +transfers while not executing a task or a callback. + +- In starpu_profiling_worker_info::sleeping_time, the time spent during which +there was no task to be executed and not executing a task or a callback or +waiting for a data transfer, i.e. real lack of parallelism. + +- In starpu_profiling_worker_info::scheduling_time, the time spent scheduling +tasks while not executing a task or a callback or waiting for a data transfer to +finish, and there are tasks to be scheduled. + +This thus provides a split of the starpu_profiling_worker_info::total_time into +various states. The difference between starpu_profiling_worker_info::total_time +and the sum of this split is the remaining uncategorized overhead of the runtime. + +Calling starpu_profiling_worker_get_info() resets the profiling +information associated to a worker. + +To easily display all this information, the environment variable +\ref STARPU_WORKER_STATS can be set to 1 (in addition to setting +\ref STARPU_PROFILING to 1). A summary will then be displayed at +program termination. To display the summary in a file instead of the +standard error stream, use the environment variable \ref STARPU_WORKER_STATS_FILE. + +\verbatim +Worker stats: +CUDA 0.0 (Tesla M2075 4.7 GiB 03:00.0) + 133 task(s) + time split: total 3212.86 ms = executing: 1588.56 ms + callback: 2.95 ms + waiting: 5.34 ms + sleeping: 1613.67 ms + scheduling: 0.01 ms + overhead 2.33 ms + all time: executing: 1588.56 ms callback: 2.95 ms waiting: 22.83 ms sleeping: 1725.93 ms scheduling: 1726.88 ms + 286.388333 GFlop/s + +CPU 0 + 10 task(s) + time split: total 3212.89 ms = executing: 2117.19 ms + callback: 0.23 ms + waiting: 0.01 ms + sleeping: 1095.06 ms + scheduling: 0.02 ms + overhead 0.37 ms + all time: executing: 2117.19 ms callback: 0.23 ms waiting: 0.01 ms sleeping: 1095.06 ms scheduling: 283.86 ms + 22.029695 GFlop/s + +CPU 1 + 10 task(s) + time split: total 3212.92 ms = executing: 2116.18 ms + callback: 0.17 ms + waiting: 0.01 ms + sleeping: 1096.10 ms + scheduling: 0.02 ms + overhead 0.44 ms + all time: executing: 2116.18 ms callback: 0.17 ms waiting: 0.01 ms sleeping: 1096.10 ms scheduling: 284.40 ms + 22.029487 GFlop/s + +CPU 2 + 10 task(s) + time split: total 3212.94 ms = executing: 2116.08 ms + callback: 0.18 ms + waiting: 0.01 ms + sleeping: 1096.21 ms + scheduling: 0.02 ms + overhead 0.44 ms + all time: executing: 2116.08 ms callback: 0.18 ms waiting: 0.01 ms sleeping: 1096.21 ms scheduling: 283.75 ms + 22.029343 GFlop/s + + +Global time split: total 12851.60 ms = executing: 7938.01 ms (61.77%) + callback: 3.53 ms (0.03%) + waiting: 5.36 ms (0.04%) + sleeping: 4901.05 ms (38.14%) + scheduling: 0.06 ms (0.00%) + overhead 3.59 ms (0.03%) +\endverbatim + +The number of GFlops/s is available because the starpu_task::flops field of the +tasks were filled (or \ref STARPU_FLOPS used in starpu_task_insert()). + +When an FxT trace is generated (see \ref GeneratingTracesWithFxT), it is also +possible to use the tool starpu_workers_activity (see +\ref MonitoringActivity) to generate a graphic showing the evolution of +these values during the time, for the different workers. + +\subsection Bus-relatedFeedback Bus-related Feedback + +// how to enable/disable performance monitoring +// what kind of information do we get ? + +The bus speed measured by StarPU can be displayed by using the tool +starpu_machine_display, for instance: + +\verbatim +StarPU has found: + 3 CUDA devices + CUDA 0 (Tesla C2050 02:00.0) + CUDA 1 (Tesla C2050 03:00.0) + CUDA 2 (Tesla C2050 84:00.0) +from to RAM to CUDA 0 to CUDA 1 to CUDA 2 +RAM 0.000000 5176.530428 5176.492994 5191.710722 +CUDA 0 4523.732446 0.000000 2414.074751 2417.379201 +CUDA 1 4523.718152 2414.078822 0.000000 2417.375119 +CUDA 2 4534.229519 2417.069025 2417.060863 0.000000 +\endverbatim + +Statistics about the data transfers which were performed and temporal average +of bandwidth usage can be obtained by setting the environment variable +\ref STARPU_BUS_STATS to 1; a summary will then be displayed at +program termination. To display the summary in a file instead of the +standard error stream, use the environment variable \ref STARPU_BUS_STATS_FILE. + +\verbatim +Data transfer stats: + RAM 0 -> CUDA 0 319.92 MB 213.10 MB/s (transfers : 91 - avg 3.52 MB) + CUDA 0 -> RAM 0 214.45 MB 142.85 MB/s (transfers : 61 - avg 3.52 MB) + RAM 0 -> CUDA 1 302.34 MB 201.39 MB/s (transfers : 86 - avg 3.52 MB) + CUDA 1 -> RAM 0 133.59 MB 88.99 MB/s (transfers : 38 - avg 3.52 MB) + CUDA 0 -> CUDA 1 144.14 MB 96.01 MB/s (transfers : 41 - avg 3.52 MB) + CUDA 1 -> CUDA 0 130.08 MB 86.64 MB/s (transfers : 37 - avg 3.52 MB) + RAM 0 -> CUDA 2 312.89 MB 208.42 MB/s (transfers : 89 - avg 3.52 MB) + CUDA 2 -> RAM 0 133.59 MB 88.99 MB/s (transfers : 38 - avg 3.52 MB) + CUDA 0 -> CUDA 2 151.17 MB 100.69 MB/s (transfers : 43 - avg 3.52 MB) + CUDA 2 -> CUDA 0 105.47 MB 70.25 MB/s (transfers : 30 - avg 3.52 MB) + CUDA 1 -> CUDA 2 175.78 MB 117.09 MB/s (transfers : 50 - avg 3.52 MB) + CUDA 2 -> CUDA 1 203.91 MB 135.82 MB/s (transfers : 58 - avg 3.52 MB) +Total transfers: 2.27 GB +\endverbatim + +\subsection MPI-relatedFeedback MPI-related Feedback + +Statistics about the data transfers which were performed over MPI can be +obtained by setting the environment variable \ref STARPU_MPI_STATS to 1; +a summary will then be displayed at program termination: + +\verbatim +[starpu_comm_stats][1] TOTAL: 456.000000 B 0.000435 MB 0.000188 B/s 0.000000 MB/s +[starpu_comm_stats][1:0] 456.000000 B 0.000435 MB 0.000188 B/s 0.000000 MB/s + +[starpu_comm_stats][0] TOTAL: 456.000000 B 0.000435 MB 0.000188 B/s 0.000000 MB/s +[starpu_comm_stats][0:1] 456.000000 B 0.000435 MB 0.000188 B/s 0.000000 MB/s +\endverbatim + +These statistics can be plotted as heatmaps using StarPU tool starpu_mpi_comm_matrix.py (see \ref MPIDebug). + +\section TaskAndWorkerProfiling Task And Worker Profiling + +A full example showing how to use the profiling API is available in +the StarPU sources in the directory examples/profiling/. + +\code{.c} +struct starpu_task *task = starpu_task_create(); +task->cl = &cl; +task->synchronous = 1; +/* We will destroy the task structure by hand so that we can + * query the profiling info before the task is destroyed. */ +task->destroy = 0; + +/* Submit and wait for completion (since synchronous was set to 1) */ +starpu_task_submit(task); + +/* The task is finished, get profiling information */ +struct starpu_profiling_task_info *info = task->profiling_info; + +/* How much time did it take before the task started ? */ +double delay += starpu_timing_timespec_delay_us(&info->submit_time, &info->start_time); + +/* How long was the task execution ? */ +double length += starpu_timing_timespec_delay_us(&info->start_time, &info->end_time); + +/* We no longer need the task structure */ +starpu_task_destroy(task); +\endcode + +\code{.c} +/* Display the occupancy of all workers during the test */ +int worker; +for (worker = 0; worker < starpu_worker_get_count(); worker++) +{ + struct starpu_profiling_worker_info worker_info; + int ret = starpu_profiling_worker_get_info(worker, &worker_info); + STARPU_ASSERT(!ret); + + double total_time = starpu_timing_timespec_to_us(&worker_info.total_time); + double executing_time = starpu_timing_timespec_to_us(&worker_info.executing_time); + double sleeping_time = starpu_timing_timespec_to_us(&worker_info.sleeping_time); + double overhead_time = total_time - executing_time - sleeping_time; + + float executing_ratio = 100.0*executing_time/total_time; + float sleeping_ratio = 100.0*sleeping_time/total_time; + float overhead_ratio = 100.0 - executing_ratio - sleeping_ratio; + + char workername[128]; + starpu_worker_get_name(worker, workername, 128); + fprintf(stderr, "Worker %s:\n", workername); + fprintf(stderr, "\ttotal time: %.2lf ms\n", total_time*1e-3); + fprintf(stderr, "\texec time: %.2lf ms (%.2f %%)\n", executing_time*1e-3, executing_ratio); + fprintf(stderr, "\tblocked time: %.2lf ms (%.2f %%)\n", sleeping_time*1e-3, sleeping_ratio); + fprintf(stderr, "\toverhead time: %.2lf ms (%.2f %%)\n", overhead_time*1e-3, overhead_ratio); +} +\endcode + +\section PerformanceModelExample Performance Model Example + +To achieve good scheduling, StarPU scheduling policies need to be able to +estimate in advance the duration of a task. This is done by giving to codelets +a performance model, by defining a structure starpu_perfmodel and +providing its address in the field starpu_codelet::model. The fields +starpu_perfmodel::symbol and starpu_perfmodel::type are mandatory, to +give a name to the model, and the type of the model, since there are +several kinds of performance models. Then starpu_task_get_model_name() can be called to retrieve the name of the performance model associated with a task. For compatibility, make sure to +initialize the whole structure to zero, either by using explicit +memset(), or by letting the compiler implicitly do it as examplified +below. + +
      +
    • +Measured at runtime (model type ::STARPU_HISTORY_BASED). This assumes that for a +given set of data input/output sizes, the performance will always be about the +same. This is very true for regular kernels on GPUs for instance (<0.1% error), +and just a bit less true on CPUs (~=1% error). This also assumes that there are +few different sets of data input/output sizes. StarPU will then keep record of +the average time of previous executions on the various processing units, and use +it as an estimation. History is done per task size, by using a hash of the input +and output sizes as an index. +It will also save it in $STARPU_HOME/.starpu/sampling/codelets +for further executions, and can be observed by using the tool +starpu_perfmodel_display, or drawn by using +the tool starpu_perfmodel_plot (\ref PerformanceModelCalibration). The +models are indexed by machine name. To +share the models between machines (e.g. for a homogeneous cluster), use +export STARPU_HOSTNAME=some_global_name. Measurements are only done +when using a task scheduler which makes use of it, such as +dmda. Measurements can also be provided explicitly by the application, by +using the function starpu_perfmodel_update_history(). An example is in the file tests/perfmodels/feed.c. + +The following is a small code example. + +If e.g. the code is recompiled with other compilation options, or several +variants of the code are used, the symbol string should be changed to reflect +that, in order to recalibrate a new model from zero. The symbol string can even +be constructed dynamically at execution time, as long as this is done before +submitting any task using it. + +\code{.c} +static struct starpu_perfmodel mult_perf_model = +{ + .type = STARPU_HISTORY_BASED, + .symbol = "mult_perf_model" +}; + +struct starpu_codelet cl = +{ + .cpu_funcs = { cpu_mult }, + .cpu_funcs_name = { "cpu_mult" }, + .nbuffers = 3, + .modes = { STARPU_R, STARPU_R, STARPU_W }, + /* for the scheduling policy to be able to use performance models */ + .model = &mult_perf_model +}; +\endcode + +
    • +
    • +Measured at runtime and refined by regression (model types +::STARPU_REGRESSION_BASED and ::STARPU_NL_REGRESSION_BASED). This +still assumes performance regularity, but works +with various data input sizes, by applying regression over observed +execution times. ::STARPU_REGRESSION_BASED uses an a*n^b regression +form, ::STARPU_NL_REGRESSION_BASED uses an a*n^b+c (more precise than +::STARPU_REGRESSION_BASED, but costs a lot more to compute). + +For instance, +tests/perfmodels/regression_based.c uses a regression-based performance +model for the function \c memset(). + +Of course, the application has to issue +tasks with varying size so that the regression can be computed. StarPU will not +trust the regression unless there is at least 10% difference between the minimum +and maximum observed input size. It can be useful to set the +environment variable \ref STARPU_CALIBRATE to 1 and run the application +on varying input sizes with \ref STARPU_SCHED set to dmda scheduler, +to feed the performance model for a variety of +inputs. The application can also provide the measurements explicitly by +using the function starpu_perfmodel_update_history(). The tools +starpu_perfmodel_display and starpu_perfmodel_plot can +be used to observe how much the performance model is calibrated +(\ref PerformanceModelCalibration); when their output looks good, +\ref STARPU_CALIBRATE can be reset to 0 to let +StarPU use the resulting performance model without recording new measures, and +\ref STARPU_SCHED can be set to dmda to benefit from the performance models. If +the data input sizes vary a lot, it is really important to set +\ref STARPU_CALIBRATE to 0, otherwise StarPU will continue adding the +measures, and result with a very big performance model, which will take time a +lot of time to load and save. + +For non-linear regression, since computing it +is quite expensive, it is only done at termination of the application. This +means that the first execution of the application will use only history-based +performance model to perform scheduling, without using regression. +
    • + +
    • + +Another type of model is ::STARPU_MULTIPLE_REGRESSION_BASED, which +is based on multiple linear regression. In this model, users +define both the relevant parameters and the equation for computing the +task duration. + + +\f[ +T_{kernel} = a + b(M^{\alpha_1} * N^{\beta_1} * K^{\gamma_1}) + c(M^{\alpha_2} * N^{\beta_2} * K^{\gamma_2}) + ... +\f] + + +\f$M, N, K\f$ are the parameters of the task, added at the task +creation. These need to be extracted by the cl_perf_func +function, which should be defined by users. \f$\alpha, \beta, +\gamma\f$ are the exponents defined by users in +model->combinations table. Finally, coefficients \f$a, b, c\f$ +are computed automatically by the StarPU at the end of the execution, using least +squares method of the dgels_ LAPACK function. + +examples/mlr/mlr.c example provides more details on +the usage of ::STARPU_MULTIPLE_REGRESSION_BASED models. The \ref enable-mlr +"--enable-mlr" configure option needs to be set to calibrate the model. + +Coefficients computation is done at the end of the execution, and the +results are stored in standard codelet perfmodel files. Additional +files containing the duration of tasks together with the value of each +parameter are stored in .starpu/sampling/codelets/tmp/ +directory. These files are reused when \ref STARPU_CALIBRATE +environment variable is set to 1, to recompute coefficients +based on the current, but also on the previous +executions. By default, StarPU uses a lightweight dgels implementation, but the +\ref enable-mlr-system-blas "--enable-mlr-system-blas" configure option can be +used to make StarPU use a system-provided dgels BLAS. + +Additionally, when multiple linear regression models are not enabled through +\ref enable-mlr "--enable-mlr" or when the +model->combinations are not defined, StarPU will still write +output files into .starpu/sampling/codelets/tmp/ to allow +performing an analysis. This analysis typically aims at finding the +most appropriate equation for the codelet and +tools/starpu_mlr_analysis script provides an example of how to +perform such study. + +
    • + +
    • +Provided as an estimation from the application itself (model type +::STARPU_COMMON and field starpu_perfmodel::cost_function), +see for instance +examples/common/blas_model.h and examples/common/blas_model.c. +
    • + +
    • +Provided explicitly by the application (model type ::STARPU_PER_ARCH): +either field starpu_perfmodel::arch_cost_function, or +the fields .per_arch[arch][nimpl].cost_function have to be +filled with pointers to functions which return the expected duration +of the task in micro-seconds, one per architecture, see for instance +tests/datawizard/locality.c +
    • + +
    • +Provided explicitly by the application (model type ::STARPU_PER_WORKER) +similarly with the starpu_perfmodel::worker_cost_function field. +
    • +
    + +For ::STARPU_HISTORY_BASED, ::STARPU_REGRESSION_BASED, and +::STARPU_NL_REGRESSION_BASED, the dimensions of task data (both input +and output) are used as an index by default. ::STARPU_HISTORY_BASED uses a CRC +hash of the dimensions as an index to distinguish histories, and +::STARPU_REGRESSION_BASED and ::STARPU_NL_REGRESSION_BASED use the total +size as an index for the regression. (Data marked with ::STARPU_NOFOOTPRINT are +not taken into account). + +The starpu_perfmodel::size_base and starpu_perfmodel::footprint fields however +permit the application to override that, when for instance some of the data +do not matter for task cost (e.g. mere reference table), or when using sparse +structures (in which case it is the number of non-zeros which matter), or when +there is some hidden parameter such as the number of iterations, or when the +application actually has a very good idea of the complexity of the algorithm, +and just not the speed of the processor, etc. The example in the directory +examples/pi uses this to include the number of iterations in the base +size. starpu_perfmodel::size_base should be used when the variance of the actual +performance is known (i.e. bigger return value is longer execution +time), and thus particularly useful for ::STARPU_REGRESSION_BASED or +::STARPU_NL_REGRESSION_BASED. starpu_perfmodel::footprint can be used when the +variance of the actual performance is unknown (irregular performance behavior, +etc.), and thus only useful for ::STARPU_HISTORY_BASED. +starpu_task_data_footprint() can be used as a base and combined with other +parameters through starpu_hash_crc32c_be() for instance. + +StarPU will automatically determine when the performance model is calibrated, +or rather, it will assume the performance model is calibrated until the +application submits a task for which the performance can not be predicted. For +::STARPU_HISTORY_BASED, StarPU will require 10 (STARPU_CALIBRATE_MINIMUM) +measurements for a given size before estimating that an average can be taken as +estimation for further executions with the same size. For +::STARPU_REGRESSION_BASED and ::STARPU_NL_REGRESSION_BASED, StarPU will require +10 (STARPU_CALIBRATE_MINIMUM) measurements, and that the minimum measured +data size is smaller than 90% of the maximum measured data size (i.e. the +measurement interval is large enough for a regression to have a meaning). +Calibration can also be forced by setting the \ref STARPU_CALIBRATE environment +variable to 1, or even reset by setting it to 2. + +How to use schedulers which can benefit from such performance model is explained +in \ref TaskSchedulingPolicy. + +The same can be done for task energy consumption estimation, by setting +the field starpu_codelet::energy_model the same way as the field +starpu_codelet::model. Note: for now, the application has to give to +the energy consumption performance model a name which is different from +the execution time performance model. + +The application can request time estimations from the StarPU performance +models by filling a task structure as usual without actually submitting +it. The data handles can be created by calling any of the functions +starpu_*_data_register with a NULL pointer and -1 +node and the desired data sizes, and need to be unregistered as usual. +The functions starpu_task_expected_length() and +starpu_task_expected_energy() can then be called to get an estimation +of the task cost on a given arch. starpu_task_footprint() can also be +used to get the footprint used for indexing history-based performance +models. starpu_task_destroy() needs to be called to destroy the dummy +task afterwards. See tests/perfmodels/regression_based.c for an example. + +The application can also request an on-the-fly XML report of the performance +model, by calling starpu_perfmodel_dump_xml() to print the report to a +FILE*. + +\section PerformanceMonitoringCounters Performance Monitoring Counters + +This section presents the StarPU performance monitoring framework. It summarizes the objectives of the framework. It then introduces the entities involved in the framework. It presents the API of the framework, as well as some implementation details. It exposes the typical sequence of operations to plug an external tool to monitor a performance counter of StarPU. + +\subsection PerfMonCountObjectives Objectives + +The objectives of this framework are to let external tools interface with StarPU to collect various performance metrics at runtime, in a generic, safe, extensible way. For that, it enables such tools to discover the available performance metrics in a particular StarPU build, as well as the type of each performance counter value. It lets these tools build sets of performance counters to monitor, and then register listener callbacks to collect the measurement samples of these sets of performance counters at runtime. + +\subsection PerfMonCountEntities Entities + +The performance monitoring framework is built on a series of concepts and items, organized consistently. The corresponding C language objects should be considered opaque by external tools, and should only be manipulated through proper function calls and accessors. + +\subsubsection PerfMonCountCounter Performance Counter + +The performance counter entity is the fundamental object of the framework, representing one piece of performance metrics, such as for instance the total number of tasks submitted so far, that is exported by StarPU and can be collected through the framework at runtime. A performance counter has a type and belongs to a scope. A performance counter is designated by a unique name and unique ID integer. +We can start or stop collecting performance counter values by using starpu_perf_counter_collection_start() and starpu_perf_counter_collection_stop(). + +\subsubsection PerfMonCountCounterType Performance Counter Type + +A performance counter has a type. A type is designated by a unique name and unique ID number. Currently, supported types include: + +Type Name|Type Definition +---------|-------------------------------------- +"int32" |32-bit signed integers +"int64" |64-bit signed integers +"float" |32-bit single-precision floating point +"double" |64-bit double-precision floating point + +\subsubsection PerfMonCountCounterScope Performance Counter Scope + +A performance counter belongs to a scope. The scope of a counter defines the context considered for computing the corresponding performance counter. A scope is designated with a unique name and unique ID number. Currently, defined scopes include: + +Scope Name |Scope Definition +-------------|---------------------------------------------- +"global" |Counter is global to the StarPU instance +"per_worker" |Counter is within the scope of a thread worker +"per_codelet"|Counter is within the scope of a task codelet + +\subsubsection PerfMonCountCounterSet Performance Counter Set + +A performance counter set is a subset of the performance counters belonging to the same scope. Each counter of the scope can be in the enabled or disabled state in a performance counter set. A performance counter set enables a performance monitoring tool to indicate the set of counters to be collected for a particular listener callback. + +\subsubsection PerfMonCountCounterSample Performance Counter Sample + +A performance counter sample corresponds to one sample of collected measurement values of a performance counter set. Only the values corresponding to enabled counters in the sample's counter set should be observed by the listener callback. Whether the sample contains valid values for counters disabled in the set is unspecified. + +\subsubsection PerfMonCountCounterListener Performance Counter Listener + +A performance counter listener is a callback function registered by some external tool to monitor a set of performance counters in a particular scope. It is called each time a new performance counter sample is ready to be observed. The sample object should not be accessed outside the callback. + +\subsubsection PerfMonCountCounterAPI Application Programming Interface + +The API of the performance monitoring framework is defined in the \ref starpu_perf_monitoring.h public header file of StarPU. This header file is automatically included with \ref starpu.h. An example of use of the routines is given in \ref PerfMonCountCounterSequence. + +\subsection PerfMonCountCounterImplementation Implementation Details + +\subsubsection PerfMonCountCounterImplRegistration Performance Counter Registration + +Each module of StarPU can export performance counters. In order to do so, modules that need to export some counters define a registration function that is called at StarPU initialization time. This function is responsible for calling the "_starpu_perf_counter_register()" function once for each counter it exports, to let the framework know about the list of counters managed by the module. It also registers performance sample updater callbacks for the module, one for each scope for which it exports counters. + +\subsubsection PerfMonCountCounterImplUpdaters Performance Sample Updaters + +The updater callback for a module and scope combination is internally called every time a sample for a set of performance counter must be updated. Thus, the updated callback is responsible for filling the sample's selected counters with the counter values found at the time of the call. +Global updaters are currently called at task submission time, as well as any blocking tasks management function of the StarPU API, such as starpu_task_wait_for_all(), which waits for the completion of all tasks submitted up to this point. +Per-worker updaters are currently called at the level of StarPU's drivers, that is, the modules in charge of task execution of hardware-specific worker threads. The actual calls occur in-between the execution of tasks. +Per-codelet updaters are currently called both at task submission time, and at the level of StarPU's drivers together with the per-worker updaters. + +A performance sample object is locked during the sample collection. The locking prevents the following issues: + +
      +
    • The listener of sample being changed during sample collection; +
    • The set of counters enabled for a sample being changed; +
    • Conflicting concurrent updates; +
    • Updates while the sample is being read by the listener. +
    + +The location of the updaters' calls is chosen to minimize the sequentialization effect of the locking, in order to limit the level of interference of the monitoring process. For Global updaters, the calls are performed only on the application thread(s) in charge of submitting tasks. Since, in most cases, only a single application thread submits tasks, the sequentialization effect is moderate. Per-worker updates are local to their worker, thus here again the sample lock is un-contented, unless the external monitoring tool frequently changes the set of enabled counters in the sample. + +\subsubsection PerfMonCountCounterImplOperations Counter operations + +In practice, the sample updaters only take snapshots of the actual performance counters. The performance counters themselves are updated with ad-hoc procedures depending on each counter. Such procedures typically involve atomic operations. While operations such as atomic increments or decrements on integer values are readily available, this is not the case for more complex operations such as min/max for computing peak value counters (for instance in the global and per-codelet counters for peak number of submitted tasks and peak number of ready tasks waiting for execution), and this is also not the case for computations on floating point data (used for instance in computing cumulated execution time of tasks, either per worker or per codelet). The performance monitoring framework therefore supplies such missing routines, for the internal use of StarPU. + +\subsubsection PerfMonCountCounterImplRuntime Runtime checks + +The performance monitoring framework features a comprehensive set of runtime checks to verify that both StarPU and some external tool do not access a performance counter with the wrong typed routines, to quickly detect situations of mismatch that can result from the evolution of multiple pieces of software at distinct paces. Moreover, no StarPU data structure is accessed directly, either by the external code making use of the performance monitoring framework. The use of the C enum constants is optional; referring to values through constant strings is available when more robustness is desired. These runtime checks enable the framework to be extensible. Moreover, while the framework's counters currently are permanently compiled in, they could be made optional at compile time, for instance to suppress any overhead once the analysis and optimization process has been completed by the programmer. Thanks to the runtime discovery of available counters, the applicative code, or an intermediate layer such as skeleton layer acting on its behalf, would then be able to adapt to performance analysis builds versus optimized builds. + +\subsection PerfMonCountCounterExported Exported Counters + +\subsubsection PerfMonCountCounterExportedGlobal Global Scope + +Counter Name |Counter Definition +---------------------------------|-------------------------------------------------------------------------------------- +\c starpu.task.g_total_submitted |Total number of tasks submitted +\c starpu.task.g_peak_submitted |Maximum number of tasks submitted, waiting for dependencies resolution at any time +\c starpu.task.g_peak_ready |Maximum number of tasks ready for execution, waiting for an execution slot at any time + +\subsubsection PerfMonCountCounterExportedPerWorker Per-worker Scope + +Counter Name |Counter Definition +--------------------------------------|------------------------------------------------------------ +\c starpu.task.w_total_executed |Total number of tasks executed on a given worker +\c starpu.task.w_cumul_execution_time |Cumulated execution time of tasks executed on a given worker + + +\subsubsection PerfMonCountCounterExportedPerCodelet Per-Codelet Scope + +Counter Name |Counter Definition +--------------------------------------|----------------------------------------------------------------------------------------------------- +\c starpu.task.c_total_submitted |Total number of submitted tasks for a given codelet +\c starpu.task.c_peak_submitted |Maximum number of submitted tasks for a given codelet waiting for dependencies resolution at any time +\c starpu.task.c_peak_ready |Maximum number of ready tasks for a given codelet waiting for an execution slot at any time +\c starpu.task.c_total_executed |Total number of executed tasks for a given codelet +\c starpu.task.c_cumul_execution_time |Cumulated execution time of tasks for a given codelet + +\subsection PerfMonCountCounterSequence Sequence of operations + +This section presents a typical sequence of operations to interface an external tool with some StarPU performance counters. In this example, the counters monitored are the per-worker total number of executed tasks (\c starpu.task.w_total_executed) and the tasks' cumulated execution time (\c starpu.task.w_cumul_execution_time). + +Step 0: Initialize StarPU + +StarPU must first be initialized, by a call to starpu_init(), for performance counters to become available, since each module of StarPU registers the performance counters it exports during that initialization phase. + +\code{.c} +int ret = starpu_init(NULL); +\endcode + + +Step 1: Allocate a counter set + +A counter set has to be allocated on the per-worker scope. The per-worker scope id can be obtained by name, or with the pre-defined enum value ::starpu_perf_counter_scope_per_worker. + +\code{.c} +enum starpu_perf_counter_scope w_scope = starpu_perf_counter_scope_per_worker; +struct starpu_perf_counter_set *w_set = starpu_perf_counter_set_alloc(w_scope); +\endcode + + +Step 2: Get the counter IDs +Each performance counter has a unique ID used to refer to it in subsequent calls to the performance monitoring framework. + +\code{.c} +int id_w_total_executed = starpu_perf_counter_name_to_id(w_scope, + "starpu.task.w_total_executed"); + +int id_w_cumul_execution_time = starpu_perf_counter_name_to_id(w_scope, + "starpu.task.w_cumul_execution_time"); +\endcode + + +Step 3: Enable the counters in the counter set + +This step indicates which counters will be collected into performance monitoring samples for the listeners referring to this counter set. + +\code{.c} +starpu_perf_counter_set_enable_id(w_set, id_w_total_executed); +starpu_perf_counter_set_enable_id(w_set, id_w_cumul_execution_time); +\endcode + + +Step 4: Write a listener callback + +This callback will be triggered when a sample becomes available. Upon execution, it reads the values for the two counters from the sample and displays these values, for the sake of the example. + +\code{.c} +void w_listener_cb(struct starpu_perf_counter_listener *listener, + struct starpu_perf_counter_sample *sample, + void *context) +{ + int32_t w_total_executed = + starpu_perf_counter_sample_get_int32_value(sample, id_w_total_executed); + + double w_cumul_execution_time = + starpu_perf_counter_sample_get_double_value(sample, id_w_cumul_execution_time); + + printf("worker[%d]: w_total_executed = %d, w_cumul_execution_time = %lf\n", + starpu_worker_get_id(), + w_total_executed, + w_cumul_execution_time); +} +\endcode + + +Step 5: Initialize the listener + +This step allocates the listener structure and prepares it to listen to the selected set of per-worker counters. However, it is not actually active until Step 6, once it is attached to one or more worker. + +\code{.c} +struct starpu_perf_counter_listener * w_listener = + starpu_perf_counter_listener_init(w_set, w_listener_cb, NULL); +\endcode + + +Step 6: Set the listener on all workers +This step actually makes the listener active, in this case on every StarPU worker thread. + +\code{.c} +starpu_perf_counter_set_all_per_worker_listeners(w_listener); +\endcode + + +After this step, any task assigned to a worker will be counted in that worker selected performance counters, and reported to the listener. + + +\section PerfKnobs Performance Steering Knobs + +This section presents the StarPU performance steering framework. It summarizes the objectives of the framework. It introduces the entities involved in the framework, and then details the API, implementation and sequence of operations. + +\subsection PerfKnobsObjectives Objectives + +The objectives of this framework are to let external tools interface with StarPU, observe, and act at runtime on actionable performance steering knobs exported by StarPU, in a generic, safe, extensible way. It defines an API to let such external tools discover the available performance steering knobs in a particular StarPU revision of build, as well as the type of each knob. + +\subsection PerfKnobsEntities Entities + +\subsubsection PerfKnobsEntitiesKnob Performance Steering Knob + +The performance steering knob entity designates one runtime-actionable knob exported by StarPU. It may represent some setting, or some constant used within StarPU for a given purpose. The value of the knob is typed, it can be obtained or modified with the appropriate getter/setter routine. The knob belongs to a scope. A performance steering knob is designated with a unique name and unique ID number. + +\subsubsection PerfKnobsEntitiesKnobType Knob Type + +A performance steering knob has a type. A type is designated by a unique name and unique ID number. Currently, supported types include: + +Type Name |Type Definition +----------|-------------------------------------- +"int32" |32-bit signed integers +"int64" |64-bit signed integers +"float" |32-bit single precision floating point +"double" |64-bit double precision floating point + +On/Off knobs are defined as "int32" type, with value 0 for Off and value !0 for On, unless otherwise specified. + +\subsubsection PerfKnobsEntitiesKnobScope Knob Scope + +A performance steering knob belongs to a scope. The scope of a knob defines the context considered for computing the corresponding knob. A scope is designated with a unique name and unique ID number. Currently, defined scopes include: + +Scope Name |Scope Definition +----------------|-------------------------------------------------------- +"global" |Knob is global to the StarPU instance +"per_worker" |Knob is within the scope of a thread worker +"per_scheduler" |Knob is within the scope of a scheduling policy instance + +\subsubsection PerfKnobsEntitiesKnobGroup Knob Group + +The notion of Performance Steering Knob Group is currently internal to StarPU. It defines a series of knobs that are handled by the same couple of setter/getter functions internally. A knob group belongs to a knob scope. + +\subsection PerfKnobsAPI Application Programming Interface + +The API is defined in the \ref starpu_perf_steering.h public header file of StarPU. This header file is automatically included with \ref starpu.h. + +\subsection PerfKnobsImpl Implementation Details + +While the APIs of the monitoring and the steering frameworks share a similar design philosophy, the internals are significantly different. Since the effect of the steering knobs varies widely, there is no global locking scheme in place shared for all knobs. Instead, each knob gets its own procedures to get the value of a setting, or change it. To prevent code duplication, some related knobs may share getter/setter routines as knob groups. + +The steering framework does not involve callback routines. Knob get operations proceed immediately, except for the possible delay in getting access to the knob value. Knob set operations also proceed immediately, not counting the exclusive access time, though their action result may be observed with some latency, depending on the knob and on the current workload. For instance, acting on a per-worker \c starpu.worker.w_enable_worker_knob to disable a worker thread may be observed only after the corresponding worker's assigned task queue becomes empty, since its actual effect is to prevent additional tasks to be queued to the worker, and not to migrate already queued tasks to another worker. Such design choices aim at providing a compromise between offering some steering capabilities and keeping the cost of supporting such steering capabilities to an acceptable level. + +The framework is designed to be easily extensible. At StarPU initialization time, the framework calls initialization functions if StarPU modules to initialize the set of knobs they export. Knob get/set accessors can be shared among multiple knobs in a knob group. Thus, exporting a new knob is basically a matter of declaring it at initialization time, by specifying its name and value type, and either add its handling to an existing getter/setter pair of accessors in a knob group, or create a new group. As the performance monitoring framework, the performance steering framework is currently permanently enabled, but could be made optional at compile-time to separate testing builds from production builds. + +\subsection PerfKnobsExported Exported Steering Knobs + +\subsubsection PerfKnobsExportedGlobal Global Scope + +Knob Name |Knob Definition +--------------------------------------------|---------------------------------------------------- +\c starpu.global.g_calibrate_knob |Enable/disable the calibration of performance models +\c starpu.global.g_enable_catch_signal_knob |Enable/disable the catching of UNIX signals + + +\subsubsection PerfKnobsExportedPerWorker Per-worker Scope + +Knob Name |Knob Definition +--------------------------------------|------------------------------------------------------------ +\c starpu.worker.w_bind_to_pu_knob |Change the processing unit to which a worker thread is bound +\c starpu.worker.w_enable_worker_knob |Disable/re-enable a worker thread to be selected for task execution + + +\subsubsection PerfKnobsExportedPerScheduler Per-Scheduler Scope + +Knob Name |Knob Definition +---------------------------------------|---------------- +\c starpu.task.s_max_priority_cap_knob |Set a capping maximum priority value for subsequently submitted tasks +\c starpu.task.s_min_priority_cap_knob |Set a capping minimum priority value for subsequently submitted tasks +\c starpu.dmda.s_alpha_knob |Scaling factor for the Alpha constant for Deque Model schedulers to alter the weight of the estimated task execution time +\c starpu.dmda.s_beta_knob |Scaling factor for the Beta constant for Deque Model schedulers to alter the weight of the estimated data transfer time for the task's input(s) +\c starpu.dmda.s_gamma_knob |Scaling factor for the Gamma constant for Deque Model schedulers to alter the weight of the estimated power consumption of the task +\c starpu.dmda.s_idle_power_knob |Scaling factor for the baseline Idle power consumption estimation of the corresponding processing unit + + +\subsection PerfKnobsSequence Sequence of operations + +This section presents an example of a sequence of operations representing a typical use of the performance steering knobs exported by StarPU. In this example, a worker thread is temporarily barred from executing tasks. For that, the corresponding \c starpu.worker.w_enable_worker_knob of the worker, initially set to 1 (= enabled) is changed to 0 (= disabled). + +Step 0: Initialize StarPU + +StarPU must first be initialized, by a call to starpu_init(). Performance steering knobs only become available after this step, since each module of StarPU registers the knobs it exports during that initialization phase. + +\code{.c} +int ret = starpu_init(NULL); +\endcode + +Step 1: Get the knob ID + +Each performance steering knob has a unique ID used to refer to it in subsequent calls to the performance steering framework. The knob belongs to the "per_worker" scope. + +\code{.c} +int w_scope = starpu_perf_knob_scope_name_to_id("per_worker"); +int w_enable_id = starpu_perf_knob_name_to_id(w_scope, "starpu.worker.w_enable_worker_knob"); +\endcode + + +Step 2: Get the knob current value + +This knob is an On/Off knob. Its value type is therefore a 32-bit integer, with value 0 for Off and value !0 for On. The getter functions for per-worker knobs expect the knob ID as first argument, and the worker ID as second argument. Here the getter call obtains the value of worker 5. + +\code{.c} +int32_t val = starpu_perf_knob_get_per_worker_int32_value(w_enable_id, 5); +\endcode + +Step 3: Set the knob current value + +The setter functions for per-worker knobs expect the knob ID as first argument, the worker ID as second argument, and the new value as third argument. Here, the value for worker 5 is set to 0 to temporarily bar the worker thread from accepting new tasks for execution. + +\code{.c} +starpu_perf_knob_set_per_worker_int32_value(w_enable_id, 5, 0); +\endcode + +Subsequently, setting the value of the knob back to 1 enables the corresponding to accept new tasks for execution again. + +\code{.c} +starpu_perf_knob_set_per_worker_int32_value(w_enable_id, 5, 1); +\endcode + + + + + +*/ diff --git a/doc/doxygen/chapters/starpu_performances/performances_intro.doxy b/doc/doxygen/chapters/starpu_performances/performances_intro.doxy new file mode 100644 index 0000000..80167cb --- /dev/null +++ b/doc/doxygen/chapters/starpu_performances/performances_intro.doxy @@ -0,0 +1,29 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/*! \intropage{IntroPerformances, --------- StarPU Performances ---------} + +\webforeword + +This part shows how to measure application performances. + +
      +
    • Chapter \ref BenchmarkingStarPU introduces some interesting benchmarks which can be found in StarPU sources. +
    • Chapter \ref OnlinePerformanceTools gives information on online performance monitoring tools to help you analyze your program +
    • Chapter \ref OfflinePerformanceTools gives information on offline performance tools such as a FxT library to trace execution data and tasks and a StarPU Eclipse Plugin to visualize data traces directly from the Eclipse IDE. +
    + +*/ diff --git a/doc/doxygen/chapters/version.html b/doc/doxygen/chapters/version.html new file mode 100644 index 0000000..f504c7a --- /dev/null +++ b/doc/doxygen/chapters/version.html @@ -0,0 +1,2 @@ +This manual documents the version 1.4.10 of StarPU. +Its contents was last updated on 2025-12-03. diff --git a/doc/doxygen/chapters/version.sty b/doc/doxygen/chapters/version.sty new file mode 100644 index 0000000..7527bba --- /dev/null +++ b/doc/doxygen/chapters/version.sty @@ -0,0 +1,2 @@ +\newcommand{\STARPUUPDATED}{2025-12-03} +\newcommand{\STARPUVERSION}{1.4.10} diff --git a/doc/doxygen/doxygen-config-include.cfg.in b/doc/doxygen/doxygen-config-include.cfg.in new file mode 100644 index 0000000..e2e5ebc --- /dev/null +++ b/doc/doxygen/doxygen-config-include.cfg.in @@ -0,0 +1,85 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# Copyright (C) 2013-2013 Simon Archipoff +# Copyright (C) 2011-2011 Télécom Sud Paris +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +INPUT += @top_builddir@/doc/doxygen/starpu_config.h \ + @top_srcdir@/include/starpu.h \ + @top_srcdir@/include/starpu_task.h \ + @top_srcdir@/include/starpu_data.h \ + @top_srcdir@/include/starpu_data_interfaces.h \ + @top_srcdir@/include/starpu_data_filters.h \ + @top_srcdir@/include/starpu_task_dep.h \ + @top_srcdir@/include/starpu_task_list.h \ + @top_srcdir@/include/starpu_task_util.h \ + @top_srcdir@/include/starpu_cuda.h \ + @top_srcdir@/include/starpu_cublasLt.h \ + @top_srcdir@/include/starpu_cusparse.h \ + @top_srcdir@/include/starpu_cublas.h \ + @top_srcdir@/include/starpu_cublas_v2.h \ + @top_srcdir@/include/starpu_cusolver.h \ + @top_srcdir@/include/starpu_opencl.h \ + @top_srcdir@/include/starpu_hip.h \ + @top_srcdir@/include/starpu_max_fpga.h \ + @top_srcdir@/include/starpu_worker.h \ + @top_srcdir@/include/starpu_perfmodel.h \ + @top_srcdir@/include/starpu_openmp.h \ + @top_srcdir@/include/starpu_sched_component.h \ + @top_srcdir@/include/starpu_sched_ctx.h \ + @top_srcdir@/include/starpu_sched_ctx_hypervisor.h \ + @top_srcdir@/include/starpu_scheduler.h \ + @top_srcdir@/include/schedulers/starpu_heteroprio.h \ + @top_srcdir@/include/schedulers/starpu_scheduler_toolbox.h \ + @top_srcdir@/mpi/include/starpu_mpi.h \ + @top_srcdir@/mpi/include/starpu_mpi_ft.h \ + @top_srcdir@/mpi/include/starpu_mpi_lb.h \ + @top_srcdir@/mpi/include/fstarpu_mpi_mod.f90 \ + @top_srcdir@/doc/doxygen/chapters/api/bubble_support.doxy \ + @top_srcdir@/include/starpu_bitmap.h \ + @top_srcdir@/include/starpu_bound.h \ + @top_srcdir@/include/starpu_deprecated_api.h \ + @top_srcdir@/include/starpu_disk.h \ + @top_srcdir@/include/starpu_driver.h \ + @top_srcdir@/include/starpu_expert.h \ + @top_srcdir@/include/starpu_fxt.h \ + @top_srcdir@/include/starpu_hash.h \ + @top_srcdir@/include/starpu_util.h \ + @top_srcdir@/include/starpu_helper.h \ + @top_srcdir@/include/starpu_parallel_worker.h \ + @top_srcdir@/include/starpu_perf_monitoring.h \ + @top_srcdir@/include/starpu_perf_steering.h \ + @top_srcdir@/include/starpu_profiling.h \ + @top_srcdir@/include/starpu_profiling_tool.h \ + @top_srcdir@/include/starpu_rand.h \ + @top_srcdir@/include/starpu_simgrid_wrap.h \ + @top_srcdir@/include/starpu_sink.h \ + @top_srcdir@/include/starpu_stdlib.h \ + @top_srcdir@/include/starpu_task_bundle.h \ + @top_srcdir@/doc/doxygen/chapters/api/threads.doxy \ + @top_srcdir@/include/starpu_thread.h \ + @top_srcdir@/include/starpu_thread_util.h \ + @top_srcdir@/include/starpu_tree.h \ + @top_srcdir@/doc/doxygen/chapters/api/fortran_support.doxy \ + @top_srcdir@/include/fstarpu_mod.f90 \ + @top_srcdir@/include/starpu_mod.f90 \ + @top_srcdir@/starpufft/include/starpufft.h \ + @top_srcdir@/sc_hypervisor/include/sc_hypervisor_config.h \ + @top_srcdir@/sc_hypervisor/include/sc_hypervisor_policy.h \ + @top_srcdir@/sc_hypervisor/include/sc_hypervisor_lp.h \ + @top_srcdir@/sc_hypervisor/include/sc_hypervisor.h \ + @top_srcdir@/sc_hypervisor/include/sc_hypervisor_monitoring.h \ + @top_srcdir@/starpurm/include/starpurm.h \ + @top_srcdir@/doc/doxygen/chapters/api/fft_support.doxy + diff --git a/doc/doxygen/doxygen-config.cfg.in b/doc/doxygen/doxygen-config.cfg.in new file mode 100644 index 0000000..8e6ce6f --- /dev/null +++ b/doc/doxygen/doxygen-config.cfg.in @@ -0,0 +1,93 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# Copyright (C) 2013-2013 Simon Archipoff +# Copyright (C) 2011-2011 Télécom Sud Paris +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +INPUT = @top_srcdir@/doc/doxygen/chapters/starpu_introduction/introduction_intro.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_introduction/doc_organization.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_introduction/glossary.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_installation/installation_intro.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_installation/building.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_installation/configure_options.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_installation/environment_variables.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_installation/configuration_and_initialization.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_basics/basics_intro.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_basics/starpu_applications.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_basics/basic_examples.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_basics/scaling_vector_example.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_basics/tasks.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_basics/data_management.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_basics/scheduling.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_basics/examples_sources.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_applications/applications_intro.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_applications/vector_scaling.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_applications/stencil.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_performances/performances_intro.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_performances/benchmarking_starpu.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_performances/online_performance_tools.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_performances/offline_performance_tools.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_faq/faq_intro.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_faq/check_list_performance.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_faq/faq.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_languages/languages_intro.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_languages/native_fortran_support.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_languages/java.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_languages/python.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_languages/openmp_runtime_support.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_extensions/extensions_intro.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_extensions/advanced_tasks.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_extensions/advanced_data_management.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_extensions/advanced_scheduling.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_extensions/scheduling_contexts.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_extensions/scheduling_context_hypervisor.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_extensions/scheduling_policy_definition.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_extensions/cuda_support.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_extensions/opencl_support.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_extensions/max_fpga_support.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_extensions/out_of_core.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_extensions/mpi_support.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_extensions/tcpip_support.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_extensions/transactions.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_extensions/fault_tolerance.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_extensions/fft_support.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_extensions/socl_opencl_extensions.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_extensions/bubble.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_extensions/parallel_worker.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_extensions/interoperability.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_extensions/simgrid.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_extensions/debugging_tools.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_extensions/helpers.doxy \ + @top_srcdir@/doc/doxygen/chapters/fdl_1_3.doxy \ + @top_srcdir@/doc/doxygen/chapters/files.doxy + +EXAMPLE_PATH = @top_srcdir@/doc/doxygen \ + @top_srcdir@/doc/doxygen/chapters \ + @top_srcdir@/doc/doxygen/chapters/starpu_applications/code \ + @top_srcdir@/doc/doxygen/chapters/starpu_basics/code \ + @top_srcdir@/doc/doxygen/chapters/starpu_extensions/code \ + @top_srcdir@/doc/doxygen/chapters/starpu_languages/code + +INPUT_FILTER = @top_builddir@/doc/doxygen/doxygen_filter.sh + +#LATEX_HEADER = @top_srcdir@/doc/doxygen/refman.tex + +IMAGE_PATH = @top_srcdir@/doc/doxygen/chapters/images + +GENERATE_LATEX = @DOC_GENERATE_LATEX@ + +PROJECT_NAME = "StarPU Handbook" +ALIASES += "intropage{2} = \page \1 \2" +ALIASES += "foreword = \htmlonly

    Foreword

    \endhtmlonly \htmlinclude version.html \htmlinclude foreword.html" +ALIASES += "webforeword = " diff --git a/doc/doxygen/doxygen_filter.sh.in b/doc/doxygen/doxygen_filter.sh.in new file mode 100755 index 0000000..d61235d --- /dev/null +++ b/doc/doxygen/doxygen_filter.sh.in @@ -0,0 +1,23 @@ +#!/bin/bash +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +if [ "$(basename $1)" == "starpufft.h" ] ; then + gcc -E $1 -I @top_srcdir@/include/ -I @top_builddir@/include/ |grep -i starpufft +else + # the macro STARPU_DEPRECATED needs to be removed as it is not properly processed by doxygen + # lines starting with // in the doxygen input files are considered as comments to be removed + sed -e 's/STARPU_DEPRECATED//' $1 | sed -e 's/^\/\/.*//' | sed -e 's/STARPU_TASK_LIST_INLINE//' | sed -e 's/STARPU_WARN_UNUSED_RESULT//' +fi diff --git a/doc/doxygen/refman.tex b/doc/doxygen/refman.tex new file mode 100644 index 0000000..1900544 --- /dev/null +++ b/doc/doxygen/refman.tex @@ -0,0 +1,449 @@ +% StarPU --- Runtime system for heterogeneous multicore architectures. +% +% Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +% Copyright (C) 2013-2013 Simon Archipoff +% +% StarPU is free software; you can redistribute it and/or modify +% it under the terms of the GNU Lesser General Public License as published by +% the Free Software Foundation; either version 2.1 of the License, or (at +% your option) any later version. +% +% StarPU is distributed in the hope that it will be useful, but +% WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +% +% See the GNU Lesser General Public License in COPYING.LGPL for more details. +% +\newcommand\starputitle{StarPU Handbook} +\setcounter{tocdepth}{1} +\input{./title.tex} + +\chapter{Introduction} +\label{index} +\hypertarget{index}{} +\input{index} + +\chapter{Documentation Organization} +\label{DocumentationOrganization} +\hypertarget{DocumentationOrganization}{} +\input{DocOrganization} + +\chapter{Glossary} +\label{Glossary} +\hypertarget{Glossary}{} +\input{Glossary} + +\part{StarPU Installation} + +\chapter{Organization} +\label{IntroInstallation} +\hypertarget{IntroInstallation}{} +\input{IntroInstallation} + +\chapter{Building and Installing StarPU} +\label{BuildingAndInstallingStarPU} +\hypertarget{BuildingAndInstallingStarPU}{} +\input{BuildingAndInstallingStarPU} + +\chapter{Compilation Configuration} +\label{CompilationConfiguration} +\hypertarget{CompilationConfiguration}{} +\input{CompilationConfiguration} + +\chapter{Execution Configuration Through Environment Variables} +\label{ExecutionConfigurationThroughEnvironmentVariables} +\hypertarget{ExecutionConfigurationThroughEnvironmentVariables}{} +\input{ExecutionConfigurationThroughEnvironmentVariables} + +\chapter{Configuration and initialization} +\label{ConfigurationAndInitialization} +\hypertarget{ConfigurationAndInitialization}{} +\input{ConfigurationAndInitialization} + +\part{StarPU Basics} + +\chapter{Organization} +\label{IntroBasics} +\hypertarget{IntroBasics}{} +\input{IntroBasics} + +\chapter{StarPU Applications} +\label{StarPUApplications} +\hypertarget{StarPUApplications}{} +\input{StarPUApplications} + +\chapter{Basic Examples} +\label{BasicExamples} +\hypertarget{BasicExamples}{} +\input{BasicExamples} + +\chapter{Full Source Code for the ’Scaling a Vector’ Example} +\label{FullSourceCodeVectorScal} +\hypertarget{FullSourceCodeVectorScal}{} +\input{FullSourceCodeVectorScal} + +\chapter{Tasks In StarPU} +\label{TasksInStarPU} +\hypertarget{TasksInStarPU}{} +\input{TasksInStarPU} + +\chapter{Data Management} +\label{DataManagement} +\hypertarget{DataManagement}{} +\input{DataManagement} + +\chapter{Scheduling} +\label{Scheduling} +\hypertarget{Scheduling}{} +\input{Scheduling} + +\chapter{Examples in StarPU Sources} +\label{ExamplesInStarPUSources} +\hypertarget{ExamplesInStarPUSources}{} +\input{ExamplesInStarPUSources} + +\part{StarPU Applications} +\label{StarPUApplications} + +\chapter{Organization} +\label{IntroApplications} +\hypertarget{IntroApplications}{} +\input{IntroApplications} + +\chapter{A Vector Scaling Application} +\label{VectorScalingApplication} +\hypertarget{VectorApplication}{} +\input{VectorApplication} + +\chapter{A Stencil Application} +\label{StencilApplication} +\hypertarget{StencilApplication}{} +\input{StencilApplication} + +\part{StarPU Performances} + +\chapter{Organization} +\label{IntroPerformances} +\hypertarget{IntroPerformances}{} +\input{IntroPerformances} + +\chapter{Benchmarking StarPU} +\label{BenchmarkingStarPU} +\hypertarget{BenchmarkingStarPU}{} +\input{BenchmarkingStarPU} + +\chapter{Online Performance Tools} +\label{OnlinePerformanceTools} +\hypertarget{OnlinePerformanceTools}{} +\input{OnlinePerformanceTools} + +\chapter{Offline Performance Tools} +\label{OfflinePerformanceTools} +\hypertarget{OfflinePerformanceTools}{} +\input{OfflinePerformanceTools} + +\part{StarPU FAQ} + +\chapter{Organization} +\label{IntroFAQ} +\hypertarget{IntroFAQ}{} +\input{IntroFAQ} + +\chapter{Check List When Performance Are Not There} +\label{CheckListWhenPerformanceAreNotThere} +\hypertarget{CheckListWhenPerformanceAreNotThere}{} +\input{CheckListWhenPerformanceAreNotThere} + +\chapter{Frequently Asked Questions} +\label{FrequentlyAskedQuestions} +\hypertarget{FrequentlyAskedQuestions}{} +\input{FrequentlyAskedQuestions} + +\part{StarPU Language Bindings} + +\chapter{Organization} +\label{IntroLanguage} +\hypertarget{IntroLanguage}{} +\input{IntroLanguage} + +\chapter{Native Fortran Support} +\label{NativeFortranSupport} +\hypertarget{NativeFortranSupport}{} +\input{NativeFortranSupport} + +\chapter{StarPU Java Interface} +\label{StarPUJavaInterface} +\hypertarget{StarPUJavaInterface}{} +\input{StarPUJavaInterface} + +\chapter{Python Interface} +\label{PythonInterface} +\hypertarget{PythonInterface}{} +\input{PythonInterface} + +\chapter{The StarPU OpenMP Runtime Support (SORS)} +\label{OpenMPRuntimeSupport} +\hypertarget{OpenMPRuntimeSupport}{} +\input{OpenMPRuntimeSupport} + +\part{StarPU Extensions} + +\chapter{Organization} +\label{IntroExtensions} +\hypertarget{IntroExtensions}{} +\input{IntroExtensions} + +\chapter{Advanced Tasks In StarPU} +\label{AdvancedTasksInStarPU} +\hypertarget{AdvancedTasksInStarPU}{} +\input{AdvancedTasksInStarPU} + +\chapter{Advanced Data Management} +\label{AdvancedDataManagement} +\hypertarget{AdvancedDataManagement}{} +\input{AdvancedDataManagement} + +\chapter{Advanced Scheduling} +\label{AdvancedScheduling} +\hypertarget{AdvancedScheduling}{} +\input{AdvancedScheduling} + +\chapter{Scheduling Contexts} +\label{SchedulingContexts} +\hypertarget{SchedulingContexts}{} +\input{SchedulingContexts} + +\chapter{Scheduling Context Hypervisor} +\label{SchedulingContextHypervisor} +\hypertarget{SchedulingContextHypervisor}{} +\input{SchedulingContextHypervisor} + +\chapter{How To Define a New Scheduling Policy} +\label{HowToDefineANewSchedulingPolicy} +\hypertarget{HowToDefineANewSchedulingPolicy}{} +\input{HowToDefineANewSchedulingPolicy} + +\chapter{CUDA Support} +\label{CUDASupport} +\hypertarget{CUDASupport}{} +\input{CUDASupport} + +\chapter{OpenCL Support} +\label{OpenCLSupport} +\hypertarget{OpenCLSupport}{} +\input{OpenCLSupport} + +\chapter{Maxeler FPGA Support} +\label{MaxFPGASupport} +\hypertarget{MaxFPGASupport}{} +\input{MaxFPGASupport} + +\chapter{Out Of Core} +\label{OutOfCore} +\hypertarget{OutOfCore}{} +\input{OutOfCore} + +\chapter{MPI Support} +\label{MPISupport} +\hypertarget{MPISupport}{} +\input{MPISupport} + +\chapter{TCP/IP Support} +\label{TCPIPSupport} +\hypertarget{TCPIPSupport}{} +\input{TCPIPSupport} + +\chapter{Transactions} +\label{Transactions} +\hypertarget{Transactions}{} +\input{Transactions} + +\chapter{Fault Tolerance} +\label{FaultTolerance} +\hypertarget{FaultTolerance}{} +\input{FaultTolerance} + +\chapter{FFT Support} +\label{FFTSupport} +\hypertarget{FFTSupport}{} +\input{FFTSupport} + +\chapter{SOCL OpenCL Extensions} +\label{SOCLOpenclExtensions} +\hypertarget{SOCLOpenclExtensions}{} +\input{SOCLOpenclExtensions} + +\chapter{Hierarchical DAGS} +\label{HierarchicalDAGS} +\hypertarget{HierarchicalDAGS}{} +\input{HierarchicalDAGS} + +\chapter{Parallel Workers} +\label{ParallelWorker} +\hypertarget{ParallelWorker}{} +\input{ParallelWorker} + +\chapter{Interoperability Support} +\label{InteropSupport} +\hypertarget{InteropSupport}{} +\input{InteroperabilitySupport} + +\chapter{SimGrid Support} +\label{SimGridSupport} +\hypertarget{SimGridSupport}{} +\input{SimGridSupport} + +\chapter{Helpers} +\label{Helpers} +\hypertarget{Helpers}{} +\input{Helpers} + +\chapter{Debugging Tools} +\label{DebuggingTools} +\hypertarget{DebuggingTools}{} +\input{DebuggingTools} + +\part{Appendix} + +\chapter{The GNU Free Documentation License} +\label{GNUFreeDocumentationLicense} +\hypertarget{GNUFreeDocumentationLicense}{} +\input{GNUFreeDocumentationLicense} + +\chapter{Module Index} +\input{modules} + +\chapter{Module Documentation a.k.a StarPU's API} +\label{ModuleDocumentation} +\hypertarget{ModuleDocumentation}{} + +\input{group__API__Bitmap} +\input{group__API__Bubble} +\input{group__API__Codelet__And__Tasks} +\input{group__API__CUDA__Extensions} +\input{group__API__Data__Interfaces} +\input{group__API__Data__Management} +\input{group__API__Data__Partition} +\input{group__API__Expert__Mode} +\input{group__API__Explicit__Dependencies} +\input{group__API__FFT__Support} +\input{group__API__Fortran} +\input{group__API__FxT__Support} +\input{group__API__HeteroPrio} +\input{group__API__HIP__Extensions} +\input{group__API__Initialization__and__Termination} +\input{group__API__Insert__Task} +\input{group__API__Interop__Support} +\input{group__API__Max__FPGA__Extensions} +\input{group__API__Miscellaneous__Helpers} +\input{group__API__Modularized__Scheduler} +\input{group__API__MPI__FT__Support} +\input{group__API__MPI__Support} +\input{group__API__OpenCL__Extensions} +\input{group__API__OpenMP__Runtime__Support} +\input{group__API__Out__Of__Core} +\input{group__API__Parallel__Tasks} +\input{group__API__Parallel__Worker} +\input{group__API__Perf__Monitoring} +\input{group__API__Performance__Model} +\input{group__API__Perf__Steering} +\input{group__API__Profiling} +\input{group__API__Profiling__Tool} +\input{group__API__Random__Functions} +\input{group__API__Running__Drivers} +\input{group__API__Scheduler__Toolbox} +\input{group__API__Scheduling__Contexts} +\input{group__API__Scheduling__Policy} +\input{group__API__SC__Hypervisor__LP} +\input{group__API__SC__Hypervisor} +\input{group__API__SC__Hypervisor__usage} +\input{group__API__Sink} +\input{group__API__Standard__Memory__Library} +\input{group__API__Task__Bundles} +\input{group__API__Task__Lists} +\input{group__API__Theoretical__Lower__Bound__on__Execution__Time} +\input{group__API__Threads} +\input{group__API__Toolbox} +\input{group__API__Transactions} +\input{group__API__Tree} +\input{group__API__Versioning} +\input{group__API__Workers} + +\chapter{File Index} +\input{files} + +\chapter{File Documentation} +\label{FileDocumentation} +\hypertarget{FileDocumentation}{} + +\input{starpu_8h} +\input{starpu__bitmap_8h} +\input{starpu__bound_8h} +\input{starpu__config_8h} +\input{starpu__cublas_8h} +\input{starpu__cublas__v2_8h} +\input{starpu__cublasLt_8h} +\input{starpu__cusparse_8h} +\input{starpu__cuda_8h} +\input{starpu__data_8h} +\input{starpu__data__filters_8h} +\input{starpu__data__interfaces_8h} +\input{starpu__deprecated__api_8h} +\input{starpu__disk_8h} +\input{starpu__driver_8h} +\input{starpu__expert_8h} +\input{starpu__fxt_8h} +\input{starpu__hash_8h} +\input{starpu__helper_8h} +\input{starpu__heteroprio_8h} +\input{starpu__hip_8h} +\input{starpu__scheduler__toolbox_8h} +\input{starpu__max__fpga_8h} +\input{starpu__mod_8f90} +\input{starpu__mpi_8h} +\input{starpu__mpi__ft_8h} +\input{starpu__mpi__lb_8h} +\input{starpu__opencl_8h} +\input{starpu__openmp_8h} +\input{starpu__parallel__worker_8h} +\input{starpu__perf__monitoring_8h} +\input{starpu__perf__steering_8h} +\input{starpu__perfmodel_8h} +\input{starpu__profiling_8h} +\input{starpu__profiling__tool_8h} +\input{starpu__rand_8h} +\input{starpu__sched__component_8h} +\input{starpu__sched__ctx_8h} +\input{starpu__sched__ctx__hypervisor_8h} +\input{starpu__scheduler_8h} +\input{starpu__simgrid__wrap_8h} +\input{starpu__sink_8h} +\input{starpu__stdlib_8h} +\input{starpu__task_8h} +\input{starpu__task__bundle_8h} +\input{starpu__task__dep_8h} +\input{starpu__task__list_8h} +\input{starpu__task__util_8h} +\input{starpu__thread_8h} +\input{starpu__thread__util_8h} +\input{starpu__tree_8h} +\input{starpu__util_8h} +\input{starpu__worker_8h} +\input{starpufft_8h} +\input{sc__hypervisor_8h} +\input{sc__hypervisor__config_8h} +\input{sc__hypervisor__lp_8h} +\input{sc__hypervisor__monitoring_8h} +\input{sc__hypervisor__policy_8h} +\input{starpurm_8h} + +\chapter{Deprecated List} +\label{deprecated} +\hypertarget{deprecated}{} +\input{deprecated} + +%\part{Index} +\addcontentsline{toc}{chapter}{Index} +\printindex + +\end{document} diff --git a/doc/doxygen_dev/Makefile.am b/doc/doxygen_dev/Makefile.am new file mode 100644 index 0000000..6c81f76 --- /dev/null +++ b/doc/doxygen_dev/Makefile.am @@ -0,0 +1,127 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +DOX_DIR = $(top_builddir)/doc/doxygen_dev +DOX_CONFIG = $(top_srcdir)/doc/doxygen.cfg + +DOX_MAIN_DIR = doxygen_dev +DOX_HTML_DIR = html_dev +DOX_LATEX_DIR = latex +DOX_PDF = starpu_dev.pdf +DOX_TAG = starpu.tag +DOX_STARPU_CONFIG = config.h + +include $(top_srcdir)/doc/doxy.mk + +chapters = \ + chapters/000_introduction.doxy \ + chapters/010_core.doxy + +images = + +if STARPU_BUILD_DOC +config.h: $(top_srcdir)/src/common/config.h.in + @$(SED) 's/#undef \(.*\)/#define \1 1/' $< > $@ + @$(SED) -i '1s/^/\/\*\* \@file \*\/\n/' $@ + +dox_inputs = $(DOX_CONFIG) \ + $(chapters) \ + config.h \ + chapters/version.sty \ + chapters/version.html \ + $(top_srcdir)/src/datawizard/data_request.h \ + $(top_srcdir)/src/datawizard/coherency.h \ + $(top_srcdir)/src/datawizard/sort_data_handles.h \ + $(top_srcdir)/src/datawizard/memalloc.h \ + $(top_srcdir)/src/datawizard/copy_driver.h \ + $(top_srcdir)/src/datawizard/filters.h \ + $(top_srcdir)/src/datawizard/datastats.h \ + $(top_srcdir)/src/datawizard/write_back.h \ + $(top_srcdir)/src/datawizard/interfaces/data_interface.h \ + $(top_srcdir)/src/datawizard/memory_manager.h \ + $(top_srcdir)/src/datawizard/node_ops.h \ + $(top_srcdir)/src/datawizard/memstats.h \ + $(top_srcdir)/src/datawizard/datawizard.h \ + $(top_srcdir)/src/datawizard/memory_nodes.h \ + $(top_srcdir)/src/datawizard/footprint.h \ + $(top_srcdir)/src/datawizard/malloc.h \ + $(top_srcdir)/src/drivers/cpu/driver_cpu.h \ + $(top_srcdir)/src/drivers/cuda/driver_cuda.h \ + $(top_srcdir)/src/drivers/opencl/driver_opencl_utils.h \ + $(top_srcdir)/src/drivers/opencl/driver_opencl.h \ + $(top_srcdir)/src/drivers/disk/driver_disk.h \ + $(top_srcdir)/src/drivers/mpi/driver_mpi_common.h \ + $(top_srcdir)/src/drivers/mpi/driver_mpi_sink.h \ + $(top_srcdir)/src/drivers/mpi/driver_mpi_source.h \ + $(top_srcdir)/src/drivers/mp_common/sink_common.h \ + $(top_srcdir)/src/drivers/mp_common/mp_common.h \ + $(top_srcdir)/src/drivers/mp_common/source_common.h \ + $(top_srcdir)/src/drivers/driver_common/driver_common.h \ + $(top_srcdir)/src/parallel_worker/starpu_parallel_worker_create.h \ + $(top_srcdir)/src/profiling/profiling.h \ + $(top_srcdir)/src/profiling/bound.h \ + $(top_srcdir)/src/util/starpu_data_cpy.h \ + $(top_srcdir)/src/util/openmp_runtime_support.h \ + $(top_srcdir)/src/util/starpu_task_insert_utils.h \ + $(top_srcdir)/src/common/graph.h \ + $(top_srcdir)/src/common/fxt.h \ + $(top_srcdir)/src/common/starpu_spinlock.h \ + $(top_srcdir)/src/common/rbtree_i.h \ + $(top_srcdir)/src/common/rbtree.h \ + $(top_srcdir)/src/common/timing.h \ + $(top_srcdir)/src/common/rwlock.h \ + $(top_srcdir)/src/common/barrier.h \ + $(top_srcdir)/src/common/prio_list.h \ + $(top_srcdir)/src/common/barrier_counter.h \ + $(top_srcdir)/src/common/uthash.h \ + $(top_srcdir)/src/common/knobs.h \ + $(top_srcdir)/src/common/utils.h \ + $(top_srcdir)/src/common/thread.h \ + $(top_srcdir)/src/common/list.h \ + $(top_srcdir)/src/debug/starpu_debug_helpers.h \ + $(top_srcdir)/src/debug/traces/starpu_fxt.h \ + $(top_srcdir)/src/sched_policies/fifo_queues.h \ + $(top_srcdir)/src/sched_policies/helper_mct.h \ + $(top_srcdir)/src/sched_policies/sched_component.h \ + $(top_srcdir)/src/sched_policies/prio_deque.h \ + $(top_srcdir)/src/core/jobs.h \ + $(top_srcdir)/src/core/disk_ops/unistd/disk_unistd_global.h \ + $(top_srcdir)/src/core/dependencies/tags.h \ + $(top_srcdir)/src/core/dependencies/data_concurrency.h \ + $(top_srcdir)/src/core/dependencies/implicit_data_deps.h \ + $(top_srcdir)/src/core/dependencies/cg.h \ + $(top_srcdir)/src/core/devices.h \ + $(top_srcdir)/src/core/idle_hook.h \ + $(top_srcdir)/src/core/sched_ctx_list.h \ + $(top_srcdir)/src/core/perfmodel/multiple_regression.h \ + $(top_srcdir)/src/core/perfmodel/perfmodel.h \ + $(top_srcdir)/src/core/perfmodel/regression.h \ + $(top_srcdir)/src/core/debug.h \ + $(top_srcdir)/src/core/sched_ctx.h \ + $(top_srcdir)/src/core/simgrid.h \ + $(top_srcdir)/src/core/task_bundle.h \ + $(top_srcdir)/src/core/topology.h \ + $(top_srcdir)/src/core/combined_workers.h \ + $(top_srcdir)/src/core/detect_combined_workers.h \ + $(top_srcdir)/src/core/task.h \ + $(top_srcdir)/src/core/disk.h \ + $(top_srcdir)/src/core/sched_policy.h \ + $(top_srcdir)/src/core/errorcheck.h \ + $(top_srcdir)/src/core/progress_hook.h \ + $(top_srcdir)/src/core/drivers.h \ + $(top_srcdir)/src/core/workers.h +endif + diff --git a/doc/doxygen_dev/Makefile.in b/doc/doxygen_dev/Makefile.in new file mode 100644 index 0000000..551a94f --- /dev/null +++ b/doc/doxygen_dev/Makefile.in @@ -0,0 +1,971 @@ +# Makefile.in generated by automake 1.16.5 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2021 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +target_triplet = @target@ +@STARPU_BUILD_DOC_PDF_TRUE@@STARPU_BUILD_DOC_TRUE@am__append_1 = $(DOX_HTML_DIR) $(DOX_DIR)/$(DOX_PDF) +@STARPU_BUILD_DOC_PDF_FALSE@@STARPU_BUILD_DOC_TRUE@am__append_2 = $(DOX_HTML_DIR) +@STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@am__append_3 = $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$(DOX_HTML_DIR) +@STARPU_AVAILABLE_DOC_PDF_TRUE@@STARPU_BUILD_DOC_FALSE@am__append_4 = $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$(DOX_PDF) +@STARPU_BUILD_DOC_TRUE@am__append_5 = \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.html + +subdir = doc/doxygen_dev +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ + $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ + $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ + $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/src/common/config.h \ + $(top_builddir)/src/common/config-src-build.h \ + $(top_builddir)/include/starpu_config.h \ + $(top_builddir)/starpurm/include/starpurm_config.h +CONFIG_CLEAN_FILES = doxygen-config.cfg doxygen_filter.sh \ + doxygen-config-include.cfg +CONFIG_CLEAN_VPATH_FILES = +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +SOURCES = +DIST_SOURCES = +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__installdirs = "$(DESTDIR)$(txtdir)" +DATA = $(txt_DATA) +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +am__DIST_COMMON = $(srcdir)/Makefile.in \ + $(srcdir)/doxygen-config-include.cfg.in \ + $(srcdir)/doxygen-config.cfg.in $(srcdir)/doxygen_filter.sh.in \ + $(top_srcdir)/doc/doxy.mk +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +pkglibdir = @pkglibdir@ +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +APP_CFLAGS = @APP_CFLAGS@ +APP_CXXFLAGS = @APP_CXXFLAGS@ +APP_FCFLAGS = @APP_FCFLAGS@ +APP_FFLAGS = @APP_FFLAGS@ +AR = @AR@ +AS = @AS@ +ATLASDIR = @ATLASDIR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BLAS_LIB = @BLAS_LIB@ +BLAS_LIBS = @BLAS_LIBS@ +BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ +BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CC_OR_MPICC = @CC_OR_MPICC@ +CC_OR_NVCC = @CC_OR_NVCC@ +CFLAGS = @CFLAGS@ +COVERAGE = @COVERAGE@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CSCOPE = @CSCOPE@ +CTAGS = @CTAGS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DGELS_LIBS = @DGELS_LIBS@ +DLB_CFLAGS = @DLB_CFLAGS@ +DLB_LIBS = @DLB_LIBS@ +DLLTOOL = @DLLTOOL@ +DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +ECLIPSE = @ECLIPSE@ +EGREP = @EGREP@ +ETAGS = @ETAGS@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FC = @FC@ +FCFLAGS = @FCFLAGS@ +FFLAGS = @FFLAGS@ +FFTWF_CFLAGS = @FFTWF_CFLAGS@ +FFTWF_LIBS = @FFTWF_LIBS@ +FFTWL_CFLAGS = @FFTWL_CFLAGS@ +FFTWL_LIBS = @FFTWL_LIBS@ +FFTW_CFLAGS = @FFTW_CFLAGS@ +FFTW_LIBS = @FFTW_LIBS@ +FGREP = @FGREP@ +FILECMD = @FILECMD@ +FXTDIR = @FXTDIR@ +FXT_CFLAGS = @FXT_CFLAGS@ +FXT_LDFLAGS = @FXT_LDFLAGS@ +FXT_LIBS = @FXT_LIBS@ +GDB = @GDB@ +GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ +GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ +GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ +GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ +GOTODIR = @GOTODIR@ +GREP = @GREP@ +HAVE_CXX11 = @HAVE_CXX11@ +HAVE_FFTWFL = @HAVE_FFTWFL@ +HELP2MAN = @HELP2MAN@ +HIPCC = @HIPCC@ +HIPCCFLAGS = @HIPCCFLAGS@ +HIPCONFIG = @HIPCONFIG@ +HWLOC_CFLAGS = @HWLOC_CFLAGS@ +HWLOC_LIBS = @HWLOC_LIBS@ +HWLOC_REQUIRES = @HWLOC_REQUIRES@ +ICC = @ICC@ +ICC_ARGS = @ICC_ARGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JULIA = @JULIA@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ +LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ +LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ +LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ +LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ +LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ +LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ +LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ +LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ +LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ +LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ +LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ +LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ +LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ +LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ +LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ +LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ +LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ +LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ +LIBSTARPU_LINK = @LIBSTARPU_LINK@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAGMA_CFLAGS = @MAGMA_CFLAGS@ +MAGMA_LIBS = @MAGMA_LIBS@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPICC_LDFLAGS = @MPICC_LDFLAGS@ +MPICXX = @MPICXX@ +MPIEXEC = @MPIEXEC@ +MPIEXEC_ARGS = @MPIEXEC_ARGS@ +MPIFORT = @MPIFORT@ +MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ +MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ +NM = @NM@ +NMAD_CFLAGS = @NMAD_CFLAGS@ +NMAD_LIBS = @NMAD_LIBS@ +NMEDIT = @NMEDIT@ +NVCC = @NVCC@ +NVCCFLAGS = @NVCCFLAGS@ +NVCC_CC = @NVCC_CC@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ +OPENBLAS_LIBS = @OPENBLAS_LIBS@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PAPI_CFLAGS = @PAPI_CFLAGS@ +PAPI_LIBS = @PAPI_LIBS@ +PARALLEL = @PARALLEL@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PKG_CONFIG = @PKG_CONFIG@ +POTI_CFLAGS = @POTI_CFLAGS@ +POTI_LIBS = @POTI_LIBS@ +PROG_CLANG = @PROG_CLANG@ +PROG_DATE = @PROG_DATE@ +PROG_FIND = @PROG_FIND@ +PROG_STAT = @PROG_STAT@ +PYTHON = @PYTHON@ +PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ +PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ +PYTHON_VERSION = @PYTHON_VERSION@ +RANLIB = @RANLIB@ +REALBASH = @REALBASH@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ +SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ +SIMGRID_LIBS = @SIMGRID_LIBS@ +SIMGRID_MC = @SIMGRID_MC@ +SLIC_CONFIG = @SLIC_CONFIG@ +SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ +SOCL_VENDORS = @SOCL_VENDORS@ +STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ +STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ +STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ +STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ +STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ +STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ +STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ +STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ +STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ +STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ +STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ +STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ +STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ +STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ +STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ +STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ +STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ +STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ +STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ +STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ +STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ +STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ +STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ +STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ +STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ +STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ +STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ +STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ +STARPU_LIB_PATH = @STARPU_LIB_PATH@ +STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ +STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ +STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ +STARPU_MS_LIB = @STARPU_MS_LIB@ +STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ +STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ +STARPU_OPENBLAS = @STARPU_OPENBLAS@ +STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ +STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ +STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ +STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ +STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ +STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ +STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ +STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ +STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ +STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ +STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ +STARPU_SRC_DIR = @STARPU_SRC_DIR@ +STARPU_USE_CPU = @STARPU_USE_CPU@ +STARPU_USE_CUDA = @STARPU_USE_CUDA@ +STARPU_USE_FXT = @STARPU_USE_FXT@ +STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ +STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ +STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ +STRIP = @STRIP@ +VERSION = @VERSION@ +XMKMF = @XMKMF@ +X_CFLAGS = @X_CFLAGS@ +X_EXTRA_LIBS = @X_EXTRA_LIBS@ +X_LIBS = @X_LIBS@ +X_PRE_LIBS = @X_PRE_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_ct_F77 = @ac_ct_F77@ +ac_ct_FC = @ac_ct_FC@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +doxygencommand = @doxygencommand@ +dvidir = @dvidir@ +eclipsepath = @eclipsepath@ +epstopdfcommand = @epstopdfcommand@ +exec_prefix = @exec_prefix@ +gitcommand = @gitcommand@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +hwloccalccommand = @hwloccalccommand@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +juliapath = @juliapath@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +mpicc_path = @mpicc_path@ +mpicxx_path = @mpicxx_path@ +mpiexec_path = @mpiexec_path@ +mpifort_path = @mpifort_path@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +pdflatexcommand = @pdflatexcommand@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +runstatedir = @runstatedir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target = @target@ +target_alias = @target_alias@ +target_cpu = @target_cpu@ +target_os = @target_os@ +target_vendor = @target_vendor@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +DOX_DIR = $(top_builddir)/doc/doxygen_dev +DOX_CONFIG = $(top_srcdir)/doc/doxygen.cfg +DOX_MAIN_DIR = doxygen_dev +DOX_HTML_DIR = html_dev +DOX_LATEX_DIR = latex +DOX_PDF = starpu_dev.pdf +DOX_TAG = starpu.tag +DOX_STARPU_CONFIG = config.h + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +DOXYGEN = doxygen +PDFLATEX = pdflatex +MAKEINDEX = makeindex +txtdir = $(docdir)/manual +EXTRA_DIST = $(am__append_1) $(am__append_2) $(am__append_3) \ + $(am__append_4) $(am__append_5) refman.tex $(chapters) \ + $(images) +@STARPU_AVAILABLE_DOC_PDF_TRUE@@STARPU_BUILD_DOC_FALSE@txt_DATA = $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$(DOX_PDF) +@STARPU_BUILD_DOC_PDF_TRUE@@STARPU_BUILD_DOC_TRUE@txt_DATA = $(DOX_DIR)/$(DOX_PDF) +@STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@DOX_HTML_SRCDIR = $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$(DOX_HTML_DIR) +@STARPU_BUILD_DOC_TRUE@DOX_HTML_SRCDIR = $(DOX_HTML_DIR) +@STARPU_BUILD_DOC_TRUE@CLEANFILES = $(DOX_TAG) $(DOX_STARPU_CONFIG) \ +@STARPU_BUILD_DOC_TRUE@ -r \ +@STARPU_BUILD_DOC_TRUE@ $(DOX_HTML_DIR) \ +@STARPU_BUILD_DOC_TRUE@ $(DOX_LATEX_DIR) \ +@STARPU_BUILD_DOC_TRUE@ $(DOX_DIR)/$(DOX_PDF) + +chapters = \ + chapters/000_introduction.doxy \ + chapters/010_core.doxy + +images = +@STARPU_BUILD_DOC_TRUE@dox_inputs = $(DOX_CONFIG) \ +@STARPU_BUILD_DOC_TRUE@ $(chapters) \ +@STARPU_BUILD_DOC_TRUE@ config.h \ +@STARPU_BUILD_DOC_TRUE@ chapters/version.sty \ +@STARPU_BUILD_DOC_TRUE@ chapters/version.html \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/datawizard/data_request.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/datawizard/coherency.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/datawizard/sort_data_handles.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/datawizard/memalloc.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/datawizard/copy_driver.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/datawizard/filters.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/datawizard/datastats.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/datawizard/write_back.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/datawizard/interfaces/data_interface.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/datawizard/memory_manager.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/datawizard/node_ops.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/datawizard/memstats.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/datawizard/datawizard.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/datawizard/memory_nodes.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/datawizard/footprint.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/datawizard/malloc.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/drivers/cpu/driver_cpu.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/drivers/cuda/driver_cuda.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/drivers/opencl/driver_opencl_utils.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/drivers/opencl/driver_opencl.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/drivers/disk/driver_disk.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/drivers/mpi/driver_mpi_common.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/drivers/mpi/driver_mpi_sink.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/drivers/mpi/driver_mpi_source.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/drivers/mp_common/sink_common.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/drivers/mp_common/mp_common.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/drivers/mp_common/source_common.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/drivers/driver_common/driver_common.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/parallel_worker/starpu_parallel_worker_create.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/profiling/profiling.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/profiling/bound.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/util/starpu_data_cpy.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/util/openmp_runtime_support.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/util/starpu_task_insert_utils.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/common/graph.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/common/fxt.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/common/starpu_spinlock.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/common/rbtree_i.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/common/rbtree.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/common/timing.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/common/rwlock.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/common/barrier.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/common/prio_list.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/common/barrier_counter.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/common/uthash.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/common/knobs.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/common/utils.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/common/thread.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/common/list.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/debug/starpu_debug_helpers.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/debug/traces/starpu_fxt.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/sched_policies/fifo_queues.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/sched_policies/helper_mct.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/sched_policies/sched_component.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/sched_policies/prio_deque.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/core/jobs.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/core/disk_ops/unistd/disk_unistd_global.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/core/dependencies/tags.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/core/dependencies/data_concurrency.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/core/dependencies/implicit_data_deps.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/core/dependencies/cg.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/core/devices.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/core/idle_hook.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/core/sched_ctx_list.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/core/perfmodel/multiple_regression.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/core/perfmodel/perfmodel.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/core/perfmodel/regression.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/core/debug.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/core/sched_ctx.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/core/simgrid.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/core/task_bundle.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/core/topology.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/core/combined_workers.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/core/detect_combined_workers.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/core/task.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/core/disk.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/core/sched_policy.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/core/errorcheck.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/core/progress_hook.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/core/drivers.h \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/core/workers.h + +all: all-am + +.SUFFIXES: +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/doc/doxy.mk $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign doc/doxygen_dev/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign doc/doxygen_dev/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; +$(top_srcdir)/doc/doxy.mk $(am__empty): + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): +doxygen-config.cfg: $(top_builddir)/config.status $(srcdir)/doxygen-config.cfg.in + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ +doxygen_filter.sh: $(top_builddir)/config.status $(srcdir)/doxygen_filter.sh.in + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ +doxygen-config-include.cfg: $(top_builddir)/config.status $(srcdir)/doxygen-config-include.cfg.in + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs +install-txtDATA: $(txt_DATA) + @$(NORMAL_INSTALL) + @list='$(txt_DATA)'; test -n "$(txtdir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(txtdir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(txtdir)" || exit 1; \ + fi; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; \ + done | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(txtdir)'"; \ + $(INSTALL_DATA) $$files "$(DESTDIR)$(txtdir)" || exit $$?; \ + done + +uninstall-txtDATA: + @$(NORMAL_UNINSTALL) + @list='$(txt_DATA)'; test -n "$(txtdir)" || list=; \ + files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ + dir='$(DESTDIR)$(txtdir)'; $(am__uninstall_files_from_dir) +tags TAGS: + +ctags CTAGS: + +cscope cscopelist: + +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile $(DATA) +installdirs: + for dir in "$(DESTDIR)$(txtdir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +@STARPU_AVAILABLE_DOC_FALSE@@STARPU_BUILD_DOC_FALSE@install-exec-hook: +@STARPU_AVAILABLE_DOC_FALSE@@STARPU_BUILD_DOC_FALSE@uninstall-hook: +clean: clean-am + +clean-am: clean-generic clean-libtool mostlyclean-am + +distclean: distclean-am + -rm -f Makefile +distclean-am: clean-am distclean-generic + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: install-txtDATA + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: + @$(NORMAL_INSTALL) + $(MAKE) $(AM_MAKEFLAGS) install-exec-hook +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-generic mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-txtDATA + @$(NORMAL_INSTALL) + $(MAKE) $(AM_MAKEFLAGS) uninstall-hook +.MAKE: install-am install-exec-am install-strip uninstall-am + +.PHONY: all all-am check check-am clean clean-generic clean-libtool \ + cscopelist-am ctags-am distclean distclean-generic \ + distclean-libtool distdir dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-exec-hook \ + install-html install-html-am install-info install-info-am \ + install-man install-pdf install-pdf-am install-ps \ + install-ps-am install-strip install-txtDATA installcheck \ + installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-generic \ + mostlyclean-libtool pdf pdf-am ps ps-am tags-am uninstall \ + uninstall-am uninstall-hook uninstall-txtDATA + +.PRECIOUS: Makefile + + +@STARPU_BUILD_DOC_PDF_TRUE@@STARPU_BUILD_DOC_TRUE@all: $(DOX_HTML_DIR) $(DOX_DIR)/$(DOX_PDF) +@STARPU_BUILD_DOC_PDF_FALSE@@STARPU_BUILD_DOC_TRUE@all: $(DOX_HTML_DIR) +@STARPU_BUILD_DOC_TRUE@install-exec-hook: $(DOX_HTML_DIR) +@STARPU_BUILD_DOC_TRUE@ @$(MKDIR_P) $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) +@STARPU_BUILD_DOC_TRUE@ @(cd $(DOX_HTML_SRCDIR) && $(PROG_FIND) . -type f -exec $(INSTALL_DATA) {} $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) \;) +@STARPU_BUILD_DOC_TRUE@uninstall-hook: +@STARPU_BUILD_DOC_TRUE@ @rm -rf $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) +@STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@install-exec-hook: +@STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@ @$(MKDIR_P) $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) +@STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@ @(cd $(DOX_HTML_SRCDIR) && $(PROG_FIND) . -type f -exec $(INSTALL_DATA) {} $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) \;) +@STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@uninstall-hook: +@STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@ @rm -rf $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) + +@STARPU_BUILD_DOC_TRUE@chapters/version.sty: $(chapters) +@STARPU_BUILD_DOC_TRUE@ $(MKDIR_P) $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters +@STARPU_BUILD_DOC_TRUE@ @for f in $(chapters) ; do \ +@STARPU_BUILD_DOC_TRUE@ if test -f $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$$f ; then $(PROG_STAT) --format=%Y $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$$f ; fi \ +@STARPU_BUILD_DOC_TRUE@ done | sort -r | head -1 > timestamp_sty +@STARPU_BUILD_DOC_TRUE@ @if test -s timestamp_sty ; then \ +@STARPU_BUILD_DOC_TRUE@ LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_sty` +"%F" > timestamp_sty_updated ;\ +@STARPU_BUILD_DOC_TRUE@ LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_sty` +"%B %Y" > timestamp_sty_updated_month ;\ +@STARPU_BUILD_DOC_TRUE@ fi +@STARPU_BUILD_DOC_TRUE@ @if test -s timestamp_sty_updated ; then \ +@STARPU_BUILD_DOC_TRUE@ echo ':newcommand{:STARPUUPDATED}{'`cat timestamp_sty_updated`'}' > $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty;\ +@STARPU_BUILD_DOC_TRUE@ else \ +@STARPU_BUILD_DOC_TRUE@ echo ':newcommand{:STARPUUPDATED}{unknown date}' > $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty;\ +@STARPU_BUILD_DOC_TRUE@ fi +@STARPU_BUILD_DOC_TRUE@ @echo ':newcommand{:STARPUVERSION}{$(VERSION)}' >> $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty +@STARPU_BUILD_DOC_TRUE@ @$(SED) -i 's/:/\\/g' $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty +@STARPU_BUILD_DOC_TRUE@ @for f in timestamp_sty timestamp_sty_updated timestamp_sty_updated_month ; do \ +@STARPU_BUILD_DOC_TRUE@ if test -f $$f ; then $(RM) $$f ; fi ;\ +@STARPU_BUILD_DOC_TRUE@ done + +@STARPU_BUILD_DOC_TRUE@chapters/version.html: $(chapters) $(images) +@STARPU_BUILD_DOC_TRUE@ @for f in $(chapters) ; do \ +@STARPU_BUILD_DOC_TRUE@ if test -f $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$$f ; then $(PROG_STAT) --format=%Y $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$$f ; fi \ +@STARPU_BUILD_DOC_TRUE@ done | sort -r | head -1 > timestamp_html +@STARPU_BUILD_DOC_TRUE@ @if test -s timestamp_html ; then \ +@STARPU_BUILD_DOC_TRUE@ LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_html` +"%F" > timestamp_html_updated ;\ +@STARPU_BUILD_DOC_TRUE@ LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_html` +"%B %Y" > timestamp_html_updated_month ;\ +@STARPU_BUILD_DOC_TRUE@ fi +@STARPU_BUILD_DOC_TRUE@ @echo "This manual documents the version $(VERSION) of StarPU." > $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.html +@STARPU_BUILD_DOC_TRUE@ @if test -s timestamp_html_updated ; then \ +@STARPU_BUILD_DOC_TRUE@ echo "Its contents was last updated on "`cat timestamp_html_updated`"." >> $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.html;\ +@STARPU_BUILD_DOC_TRUE@ else \ +@STARPU_BUILD_DOC_TRUE@ echo "Its contents was last updated on unknown_date." >> $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.html;\ +@STARPU_BUILD_DOC_TRUE@ fi +@STARPU_BUILD_DOC_TRUE@ @for f in timestamp_html timestamp_html_updated timestamp_html_updated_month ; do \ +@STARPU_BUILD_DOC_TRUE@ if test -f $$f ; then $(RM) $$f ; fi ;\ +@STARPU_BUILD_DOC_TRUE@ done + +@STARPU_BUILD_DOC_TRUE@doxy: +@STARPU_BUILD_DOC_TRUE@ @rm -fr $(DOX_HTML_DIR) $(DOX_LATEX_DIR) +@STARPU_BUILD_DOC_TRUE@ @$(DOXYGEN) $(DOX_CONFIG) + +@STARPU_BUILD_DOC_TRUE@$(DOX_HTML_DIR): $(DOX_TAG) +@STARPU_BUILD_DOC_TRUE@ @$(MKDIR_P) $(DOX_HTML_DIR) + +@STARPU_BUILD_DOC_TRUE@$(DOX_TAG): $(dox_inputs) +@STARPU_BUILD_DOC_TRUE@ @rm -fr $(DOX_HTML_DIR) $(DOX_LATEX_DIR) +@STARPU_BUILD_DOC_TRUE@ @$(DOXYGEN) $(DOX_CONFIG) +@STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_HTML_DIR)/DocOrganization.html ; then $(SED) -i 's/ModuleDocumentation <\/li>/Modules<\/a>/' $(DOX_HTML_DIR)/DocOrganization.html ; fi +@STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_HTML_DIR)/DocOrganization.html ; then $(SED) -i 's/FileDocumentation <\/li>/Files<\/a>/' $(DOX_HTML_DIR)/DocOrganization.html ; fi +@STARPU_BUILD_DOC_TRUE@ # comment for the line below: what we really want to do is to remove the line, but dy doing so, it avoids opening the interactive menu when browsing files +@STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_HTML_DIR)/navtreedata.js ; then $(SED) -i 's/\[ "Files", "Files.html", null \]/\[ "", "Files.html", null \]/' $(DOX_HTML_DIR)/navtreedata.js ; fi +@STARPU_BUILD_DOC_TRUE@ @$(SED) -i 's/.*"Files.html".*//' $(DOX_HTML_DIR)/pages.html +@STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_LATEX_DIR)/main.tex ; then mv $(DOX_LATEX_DIR)/main.tex $(DOX_LATEX_DIR)/index.tex ; fi +@STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_LATEX_DIR)/refman.tex ; then $(SED) -i '/\\begin{titlepage}/,$$d' $(DOX_LATEX_DIR)/refman.tex ; fi +@STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_LATEX_DIR)/refman.tex ; then cat $(top_srcdir)/doc/$(DOX_MAIN_DIR)/refman.tex >> $(DOX_LATEX_DIR)/refman.tex ; fi +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/doc/sectionNumbering.py $(top_builddir)/doc/$(DOX_MAIN_DIR) $(DOX_HTML_DIR) + +@STARPU_BUILD_DOC_TRUE@$(DOX_DIR)/$(DOX_PDF): $(DOX_TAG) refman.tex $(images) +@STARPU_BUILD_DOC_TRUE@ $(MKDIR_P) $(DOX_LATEX_DIR) +@STARPU_BUILD_DOC_TRUE@ @cp $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty $(DOX_LATEX_DIR) +@STARPU_BUILD_DOC_TRUE@ @cp $(top_srcdir)/doc/title.tex $(DOX_LATEX_DIR) +@STARPU_BUILD_DOC_TRUE@ @if test -f $(top_srcdir)/doc/$(DOX_MAIN_DIR)/modules.tex ; then cp $(top_srcdir)/doc/$(DOX_MAIN_DIR)/modules.tex $(DOX_LATEX_DIR) ; fi +@STARPU_BUILD_DOC_TRUE@ @echo $(PDFLATEX) $(DOX_LATEX_DIR)/refman.tex +@STARPU_BUILD_DOC_TRUE@ @cd $(DOX_LATEX_DIR) ;\ +@STARPU_BUILD_DOC_TRUE@ rm -f *.aux *.toc *.idx *.ind *.ilg *.log *.out ;\ +@STARPU_BUILD_DOC_TRUE@ for f in group__API__* ; do sed -i '1 i \\\clearpage' $$f ; done ;\ +@STARPU_BUILD_DOC_TRUE@ if test -f ExecutionConfigurationThroughEnvironmentVariables.tex ; then $(SED) -i -e 's/__env__/\\_Environment Variables!/' -e 's/\\-\\_\\-\\-\\_\\-env\\-\\_\\-\\-\\_\\-//' ExecutionConfigurationThroughEnvironmentVariables.tex ; fi ;\ +@STARPU_BUILD_DOC_TRUE@ if test -f CompilationConfiguration.tex ; then $(SED) -i -e 's/__configure__/\\_Configure Options!/' -e 's/\\-\\_\\-\\-\\_\\-configure\\-\\_\\-\\-\\_\\-//' CompilationConfiguration.tex ; fi ;\ +@STARPU_BUILD_DOC_TRUE@ if test -f DocOrganization.tex ; then $(SED) -i s'/\\item Module\\.Documentation/\\item \\hyperlink{ModuleDocumentation}{Module Documentation}/' DocOrganization.tex ; fi ;\ +@STARPU_BUILD_DOC_TRUE@ if test -f DocOrganization.tex ; then $(SED) -i s'/\\item File\\.Documentation/\\item \\hyperlink{FileDocumentation}{File Documentation}/' DocOrganization.tex ; fi ;\ +@STARPU_BUILD_DOC_TRUE@ max_print_line=1000000 $(PDFLATEX) -interaction batchmode refman.tex ;\ +@STARPU_BUILD_DOC_TRUE@ ! < refman.log grep -v group__ | grep -v _amgrp | grep -v deprecated__ | grep "multiply defined" || exit 1 ;\ +@STARPU_BUILD_DOC_TRUE@ $(MAKEINDEX) refman.idx ;\ +@STARPU_BUILD_DOC_TRUE@ max_print_line=1000000 $(PDFLATEX) -interaction batchmode refman.tex ;\ +@STARPU_BUILD_DOC_TRUE@ for i in $(shell seq 1 5); do \ +@STARPU_BUILD_DOC_TRUE@ if $(EGREP) 'Rerun (LaTeX|to get cross-references right)' refman.log > /dev/null 2>&1; then \ +@STARPU_BUILD_DOC_TRUE@ max_print_line=1000000 $(PDFLATEX) -interaction batchmode refman.tex; \ +@STARPU_BUILD_DOC_TRUE@ else \ +@STARPU_BUILD_DOC_TRUE@ break ; \ +@STARPU_BUILD_DOC_TRUE@ fi; \ +@STARPU_BUILD_DOC_TRUE@ done +@STARPU_BUILD_DOC_TRUE@ mv $(DOX_LATEX_DIR)/refman.pdf $(DOX_DIR)/$(DOX_PDF) + +@STARPU_BUILD_DOC_TRUE@config.h: $(top_srcdir)/src/common/config.h.in +@STARPU_BUILD_DOC_TRUE@ @$(SED) 's/#undef \(.*\)/#define \1 1/' $< > $@ +@STARPU_BUILD_DOC_TRUE@ @$(SED) -i '1s/^/\/\*\* \@file \*\/\n/' $@ + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/doc/doxygen_dev/chapters/000_introduction.doxy b/doc/doxygen_dev/chapters/000_introduction.doxy new file mode 100644 index 0000000..c48c9b1 --- /dev/null +++ b/doc/doxygen_dev/chapters/000_introduction.doxy @@ -0,0 +1,28 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2017-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/*! \mainpage Introduction + +\htmlonly +

    Foreword

    +\endhtmlonly +\htmlinclude version.html +\htmlinclude foreword.html + +\section Motivation Motivation + + +*/ diff --git a/doc/doxygen_dev/chapters/010_core.doxy b/doc/doxygen_dev/chapters/010_core.doxy new file mode 100644 index 0000000..ec9896c --- /dev/null +++ b/doc/doxygen_dev/chapters/010_core.doxy @@ -0,0 +1,393 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2018-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/*! \page StarPUCore StarPU Core + +\section CoreEntities StarPU Core Entities + +TODO + +\subsection CoreEntitiesOverview Overview + +Execution entities: +- worker: A worker (see \ref CoreEntitiesWorkers, \ref + CoreEntitiesWorkersAndContexts) entity is a CPU thread created by StarPU to manage + one computing unit. The computing unit can be a local CPU core, an accelerator + or GPU device, or --- on the master side when running in master-slave + distributed mode --- a remote slave computing node. It is responsible for + querying scheduling policies for tasks to execute. + +- sched_context: A scheduling context (see \ref CoreEntitiesContexts, \ref + CoreEntitiesWorkersAndContexts) is a logical set of workers governed by an + instance of a scheduling policy. It defines the computing units to which the + scheduling policy instance may assign work entities. + +- driver: A driver is the set of hardware-dependent routines used by a + worker to initialize its associated computing unit, execute work entities on + it, and finalize the computing unit usage at the end of the session. + +Work entities: +- task: A task is a high level work request submitted to StarPU by the + application, or internally by StarPU itself. + +- job: A job is a low level view of a work request. It is not exposed to + the application. A job structure may be shared among several task structures + in the case of a parallel task. + +Data entities: +- data handle: A data handle is a high-level, application opaque object designating a + piece of data currently registered to the StarPU data management layer. + Internally, it is a \ref _starpu_data_state structure. + +- data replicate: A data replicate is a low-level object designating one + copy of a piece of data registered to StarPU as a data handle, residing in one + memory node managed by StarPU. It is not exposed to the application. + +\subsection CoreEntitiesWorkers Workers + +A worker is a CPU thread created by StarPU. Its role is to manage one computing +unit. This computing unit can be a local CPU core, in which case, the worker +thread manages the actual CPU core to which it is assigned; or it can be a +computing device such as a GPU or an accelerator (or even a remote computing +node when StarPU is running in distributed master-slave mode.) When a worker +manages a computing device, the CPU core to which the worker's thread is +by default exclusively assigned to the device management work and does not +participate to computation. + +\subsubsection CoreEntitiesWorkersStates States + +Scheduling operations related state + +While a worker is conducting a scheduling operations, e.g. the worker is in the +process of selecting a new task to execute, flag state_sched_op_pending is set +to \c !0, otherwise it is set to \c 0. + +While state_sched_op_pending is !0, the following exhaustive list of operations on that +workers are restricted in the stated way: + +- adding the worker to a context is not allowed; +- removing the worker from a context is not allowed; +- adding the worker to a parallel task team is not allowed; +- removing the worker from a parallel task team is not allowed; +- querying state information about the worker is only allowed while + state_relax_refcnt > 0; + - in particular, querying whether the worker is blocked on a parallel team entry is only + allowed while state_relax_refcnt > 0. + +Entering and leaving the state_sched_op_pending state is done through calls to +\ref _starpu_worker_enter_sched_op() and \ref _starpu_worker_leave_sched_op() +respectively (see these functions in use in functions \ref _starpu_get_worker_task() and +\ref _starpu_get_multi_worker_task()). These calls ensure that any pending +conflicting operation deferred while the worker was in the +state_sched_op_pending state is performed in an orderly manner. + +
    +Scheduling contexts related states + +Flag \c state_changing_ctx_notice is set to \c !0 when a thread is about to +add the worker to a scheduling context or remove it from a scheduling context, and is +currently waiting for a safe window to do so, until the targeted worker is not in a +scheduling operation or parallel task operation anymore. This flag set to \c !0 will also +prevent the targeted worker to attempt a fresh scheduling operation or parallel +task operation to avoid starving conditions. However, a scheduling operation +that was already in progress before the notice is allowed to complete. + +Flag \c state_changing_ctx_waiting is set to \c !0 when a scheduling context worker +addition or removal involving the targeted worker is about to occur and the +worker is currently performing a scheduling operation to tell the targeted +worker that the initiator thread is waiting for the scheduling operation to +complete and should be woken up upon completion. + +
    +Relaxed synchronization related states + +Any StarPU worker may participate to scheduling operations, and in this process, +may be forced to observe state information from other workers. +A StarPU worker thread may therefore be observed by any thread, even +other StarPU workers. Since workers may observe each other in any order, it is +not possible to rely exclusively on the \c sched_mutex of each worker to protect the +observation of worker state flags by other workers, because +worker A observing worker B would involve locking workers in (A B) sequence, +while worker B observing worker A would involve locking workers in (B A) +sequence, leading to lock inversion deadlocks. + +In consequence, no thread must hold more than one worker's sched_mutex at any time. +Instead, workers implement a relaxed locking scheme based on the \c state_relax_refcnt +counter, itself protected by the worker's sched_mutex. When state_relax_refcnt +> 0, the targeted worker state flags may be observed, otherwise the thread attempting +the observation must repeatedly wait on the targeted worker's \c sched_cond +condition until state_relax_refcnt > 0. + +The relaxed mode, while on, can actually be seen as a transactional consistency +model, where concurrent accesses are authorized and potential conflicts are +resolved after the fact. When the relaxed mode is off, the consistency model +becomes a mutual exclusion model, where the sched_mutex of the worker must be +held in order to access or change the worker state. + +
    +Parallel tasks related states + +When a worker is scheduled to participate to the execution of a parallel task, +it must wait for the whole team of workers participating to the execution of +this task to be ready. While the worker waits for its teammates, it is not +available to run other tasks or perform other operations. Such a waiting +operation can therefore not start while conflicting operations such as +scheduling operations and scheduling context resizing involving the worker are +on-going. Conversely these operations and other may query whether the worker is +blocked on a parallel task entry with \ref starpu_worker_is_blocked_in_parallel(). + +The \ref starpu_worker_is_blocked_in_parallel() function is allowed to proceed while +and only while state_relax_refcnt > 0. Due to the relaxed worker locking scheme, +the \c state_blocked_in_parallel flag of the targeted worker may change after it +has been observed by an observer thread. In consequence, flag +\c state_blocked_in_parallel_observed of the targeted worker is set to \c 1 by the +observer immediately after the observation to "taint" the targeted worker. The +targeted worker will clear the \c state_blocked_in_parallel_observed flag tainting +and defer the processing of parallel task related requests until a full +scheduling operation shot completes without the +\c state_blocked_in_parallel_observed flag being tainted again. The purpose of this +tainting flag is to prevent parallel task operations to be started immediately +after the observation of a transient scheduling state. + +Worker's management of parallel tasks is +governed by the following set of state flags and counters: + +- \c state_blocked_in_parallel: set to \c !0 while the worker is currently blocked on a parallel + task; + +- \c state_blocked_in_parallel_observed: set to \c !0 to taint the worker when a + thread has observed the state_blocked_in_parallel flag of this worker while + its \c state_relax_refcnt state counter was \c >0. Any pending request to add or + remove the worker from a parallel task team will be deferred until a whole + scheduling operation shot completes without being tainted again. + +- \c state_block_in_parallel_req: set to \c !0 when a thread is waiting on a request + for the worker to be added to a parallel task team. Must be protected by the + worker's \c sched_mutex. + +- \c state_block_in_parallel_ack: set to \c !0 by the worker when acknowledging a + request for being added to a parallel task team. Must be protected by the + worker's \c sched_mutex. + + +- \c state_unblock_in_parallel_req: set to \c !0 when a thread is waiting on a request + for the worker to be removed from a parallel task team. Must be protected by the + worker's \c sched_mutex. + + +- \c state_unblock_in_parallel_ack: set to \c !0 by the worker when acknowledging a + request for being removed from a parallel task team. Must be protected by the + worker's \c sched_mutex. + + +- \c block_in_parallel_ref_count: counts the number of consecutive pending requests + to enter parallel task teams. Only the first of a train of requests for + entering parallel task teams triggers the transition of the + \c state_block_in_parallel_req flag from \c 0 to \c 1. Only the last of a train of + requests to leave a parallel task team triggers the transition of flag + \c state_unblock_in_parallel_req from \c 0 to \c 1. Must be protected by the + worker's \c sched_mutex. + + +\subsubsection CoreEntitiesWorkersOperations Operations + +Entry point + +All the operations of a worker are handled in an iterative fashion, either by +the application code on a thread launched by the application, or automatically +by StarPU on a device-dependent CPU thread launched by StarPU. Whether a +worker's operation cycle is managed automatically or +not is controlled per session by the field \c not_launched_drivers of the \c +starpu_conf struct, and is decided in \c _starpu_launch_drivers function. + +When managed automatically, cycles of operations for a worker are handled by the corresponding +driver specific _starpu__worker() function, where \c DRV is a driver name such as +cpu (\c _starpu_cpu_worker) or cuda (\c _starpu_cuda_worker), for instance. +Otherwise, the application must supply a thread which will repeatedly call \ref +starpu_driver_run_once() for the corresponding worker. + +In both cases, control is then transferred to +\c _starpu_cpu_driver_run_once (or the corresponding driver specific func). +The cycle of operations typically includes, at least, the following operations: + +- task scheduling +- parallel task team build-up +- task input processing +- data transfer processing +- task execution + +When the worker cycles are handled by StarPU automatically, the iterative +operation processing ends when the \c running field of \c _starpu_config +becomes false. This field should not be read directly, instead it should be read +through the \ref _starpu_machine_is_running() function. + +
    +Task scheduling + +If the worker does not yet have a queued task, it calls +_starpu_get_worker_task() to try and obtain a task. This may involve scheduling +operations such as stealing a queued but not yet executed task from another +worker. The operation may not necessarily succeed if no tasks are ready and/or +suitable to run on the worker's computing unit. + +
    +Parallel task team build-up + +If the worker has a task ready to run and the corresponding job has a size +\c >1, then the task is a parallel job and the worker must synchronize with the +other workers participating to the parallel execution of the job to assign a +unique rank for each worker. The synchronization is done through the job's \c +sync_mutex mutex. + +
    +Task input processing + +Before the task can be executed, its input data must be made available on a +memory node reachable by the worker's computing unit. To do so, the worker calls +\ref _starpu_fetch_task_input() + +
    +Data transfer processing + +The worker makes pending data transfers (involving memory node(s) that it is +driving) progress, with a call to \ref __starpu_datawizard_progress(), + +
    +Task execution + +Once the worker has a pending task assigned and the input data for that task are +available in the memory node reachable by the worker's computing unit, the +worker calls \c _starpu_cpu_driver_execute_task (or the corresponding driver +specific function) to proceed to the execution of the task. + + +\subsection CoreEntitiesContexts Scheduling Contexts + +A scheduling context is a logical set of workers governed by an instance of a +scheduling policy. Tasks submitted to a given scheduling context are confined to +the computing units governed by the workers belonging to this scheduling context +at the time they get scheduled. + +A scheduling context is identified by an unsigned integer identifier between \c +0 and STARPU_NMAX_SCHED_CTXS - 1. The \c STARPU_NMAX_SCHED_CTXS +identifier value is reserved to indicated an unallocated, invalid or deleted +scheduling context. + +Accesses to the scheduling context structure are governed by a +multiple-readers/single-writer lock (\c rwlock field). Changes to the structure +contents, additions or removals of workers, statistics updates, all must be done +with proper exclusive write access. + +\subsection CoreEntitiesWorkersAndContexts Workers and Scheduling Contexts + +A worker can be assigned to one or more scheduling contexts. It +exclusively receives tasks submitted to the scheduling context(s) it is +currently assigned at the time such tasks are scheduled. A worker may add itself +to or remove itself from a scheduling context. + +
    +Locking and synchronization rules between workers and scheduling contexts + +A thread currently holding a worker sched_mutex must not attempt to acquire a +scheduling context rwlock, neither for writing nor for reading. Such an attempt +constitutes a lock inversion and may result in a deadlock. + +A worker currently in a scheduling operation must enter the relaxed state before +attempting to acquire a scheduling context rwlock, either for reading or for +writing. + +When the set of workers assigned to a scheduling context is about to be +modified, all the workers in the union between the workers belonging to the +scheduling context before the change and the workers expected to belong to the +scheduling context after the change must be notified using the +\c notify_workers_about_changing_ctx_pending function prior to the update. After +the update, all the workers in that same union must be notified for the update +completion with a call to \c notify_workers_about_changing_ctx_done. + +The function \c notify_workers_about_changing_ctx_pending places every +worker passed in argument in a state compatible with changing the scheduling +context assignment of that worker, possibly blocking until that worker leaves +incompatible states such as a pending scheduling operation. If the caller of +\c notify_workers_about_changing_ctx_pending() is itself a worker included in the set +of workers passed in argument, it does not notify itself, with the assumption +that the worker is already calling \c notify_workers_about_changing_ctx_pending() +from a state compatible with a scheduling context assignment update. +Once a worker has been notified about a scheduling context change pending, it +cannot proceed with incompatible operations such as a scheduling operation until +it receives a notification that the context update operation is complete. + +\subsection CoreEntitiesDrivers Drivers + +Each driver defines a set of routines depending on some specific hardware. These +routines include hardware discovery/initialization, task execution, device +memory management and data transfers. + +While most hardware dependent routines are in source files located in the \c +/src/drivers subdirectory of the StarPU tree, some can be found elsewhere in the +tree such as \c src/datawizard/malloc.c for memory allocation routines or the +subdirectories of \c src/datawizard/interfaces/ for data transfer routines. + +The driver ABI defined in the \ref _starpu_driver_ops structure includes the +following operations: + +- \c .init: initialize a driver instance for the calling worker + managing a hardware computing unit compatible with + this driver. + +- \c .run_once: perform a single driver progress cycle for the calling worker + (see \ref CoreEntitiesWorkersOperations). + +- \c .deinit: deinitialize the driver instance for the calling worker + +- \c .run: executes the following sequence automatically: call \c .init, + repeatedly call \c .run_once until the function \ref + _starpu_machine_is_running() returns false, call \c .deinit. + +The source code common to all drivers is shared in +src/drivers/driver_common/driver_common.[ch]. This file includes +services such as grabbing a new task to execute on a worker, managing statistics +accounting on job startup and completion and updating the worker status + +\subsubsection CoreEntitiesDriversMP Master/Slave Drivers + +A subset of the drivers corresponds to drivers managing computing units in +master/slave mode, that is, drivers involving a local master instance managing +one or more remote slave instances on the targeted device(s). This includes +devices such as discrete manycore accelerators (e.g. Intel's Knight Corners +board, for instance), or pseudo devices such as a cluster of cpu nodes driver +through StarPU's MPI master/slave mode. A driver instance on the master side +is named the \b source, while a driver instances on the slave side is named +the \b sink. + +A significant part of the work realized on the source and sink sides of +master/slave drivers is identical among all master/slave drivers, due to the +similarities in the software pattern. Therefore, many routines are shared among +all these drivers in the \c src/drivers/mp_common subdirectory. In particular, a +set of default commands to be used between sources and sinks is defined, +assuming the availability of some communication channel between them (see enum +\ref _starpu_mp_command) + +TODO + +\subsection CoreEntitiesTasksJobs Tasks and Jobs + +TODO + +\subsection CoreEntitiesData Data + +TODO + +*/ diff --git a/doc/doxygen_dev/chapters/version.html b/doc/doxygen_dev/chapters/version.html new file mode 100644 index 0000000..f504c7a --- /dev/null +++ b/doc/doxygen_dev/chapters/version.html @@ -0,0 +1,2 @@ +This manual documents the version 1.4.10 of StarPU. +Its contents was last updated on 2025-12-03. diff --git a/doc/doxygen_dev/chapters/version.sty b/doc/doxygen_dev/chapters/version.sty new file mode 100644 index 0000000..7527bba --- /dev/null +++ b/doc/doxygen_dev/chapters/version.sty @@ -0,0 +1,2 @@ +\newcommand{\STARPUUPDATED}{2025-12-03} +\newcommand{\STARPUVERSION}{1.4.10} diff --git a/doc/doxygen_dev/doxygen-config-include.cfg.in b/doc/doxygen_dev/doxygen-config-include.cfg.in new file mode 100644 index 0000000..07c1ee2 --- /dev/null +++ b/doc/doxygen_dev/doxygen-config-include.cfg.in @@ -0,0 +1,17 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +# +# do not delete that file even if it's empty diff --git a/doc/doxygen_dev/doxygen-config.cfg.in b/doc/doxygen_dev/doxygen-config.cfg.in new file mode 100644 index 0000000..fd25efb --- /dev/null +++ b/doc/doxygen_dev/doxygen-config.cfg.in @@ -0,0 +1,142 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# Copyright (C) 2011-2011 Télécom Sud Paris +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +INPUT = @top_srcdir@/doc/doxygen_dev/chapters/000_introduction.doxy \ + @top_srcdir@/doc/doxygen_dev/chapters/010_core.doxy \ + @top_builddir@/doc/doxygen_dev/config.h \ + @top_srcdir@/include/starpu_driver.h \ + @top_srcdir@/include/starpu_worker.h \ + @top_builddir@/doc/doxygen/starpu_config.h \ + @top_srcdir@/src/datawizard/data_request.h \ + @top_srcdir@/src/datawizard/coherency.h \ + @top_srcdir@/src/datawizard/sort_data_handles.h \ + @top_srcdir@/src/datawizard/memalloc.h \ + @top_srcdir@/src/datawizard/copy_driver.h \ + @top_srcdir@/src/datawizard/filters.h \ + @top_srcdir@/src/datawizard/datastats.h \ + @top_srcdir@/src/datawizard/write_back.h \ + @top_srcdir@/src/datawizard/interfaces/data_interface.h \ + @top_srcdir@/src/datawizard/memory_manager.h \ + @top_srcdir@/src/datawizard/node_ops.h \ + @top_srcdir@/src/datawizard/memstats.h \ + @top_srcdir@/src/datawizard/datawizard.h \ + @top_srcdir@/src/datawizard/memory_nodes.h \ + @top_srcdir@/src/datawizard/footprint.h \ + @top_srcdir@/src/datawizard/malloc.h \ + @top_srcdir@/src/drivers/cpu/driver_cpu.h \ + @top_srcdir@/src/drivers/cuda/driver_cuda.h \ + @top_srcdir@/src/drivers/opencl/driver_opencl_utils.h \ + @top_srcdir@/src/drivers/opencl/driver_opencl.h \ + @top_srcdir@/src/drivers/disk/driver_disk.h \ + @top_srcdir@/src/drivers/mpi/driver_mpi_common.h \ + @top_srcdir@/src/drivers/mpi/driver_mpi_sink.h \ + @top_srcdir@/src/drivers/mpi/driver_mpi_source.h \ + @top_srcdir@/src/drivers/mp_common/sink_common.h \ + @top_srcdir@/src/drivers/mp_common/mp_common.h \ + @top_srcdir@/src/drivers/mp_common/source_common.h \ + @top_srcdir@/src/drivers/driver_common/driver_common.h \ + @top_srcdir@/src/parallel_worker/starpu_parallel_worker_create.h \ + @top_srcdir@/src/profiling/profiling.h \ + @top_srcdir@/src/profiling/bound.h \ + @top_srcdir@/src/util/starpu_data_cpy.h \ + @top_srcdir@/src/util/openmp_runtime_support.h \ + @top_srcdir@/src/util/starpu_task_insert_utils.h \ + @top_srcdir@/src/common/graph.h \ + @top_srcdir@/src/common/fxt.h \ + @top_srcdir@/src/common/starpu_spinlock.h \ + @top_srcdir@/src/common/rbtree_i.h \ + @top_srcdir@/src/common/rbtree.h \ + @top_srcdir@/src/common/timing.h \ + @top_srcdir@/src/common/rwlock.h \ + @top_srcdir@/src/common/barrier.h \ + @top_srcdir@/src/common/prio_list.h \ + @top_srcdir@/src/common/barrier_counter.h \ + @top_srcdir@/src/common/uthash.h \ + @top_srcdir@/src/common/knobs.h \ + @top_srcdir@/src/common/utils.h \ + @top_srcdir@/src/common/thread.h \ + @top_srcdir@/src/common/list.h \ + @top_srcdir@/src/debug/starpu_debug_helpers.h \ + @top_srcdir@/src/debug/traces/starpu_fxt.h \ + @top_srcdir@/src/sched_policies/fifo_queues.h \ + @top_srcdir@/src/sched_policies/helper_mct.h \ + @top_srcdir@/src/sched_policies/sched_component.h \ + @top_srcdir@/src/sched_policies/prio_deque.h \ + @top_srcdir@/src/core/jobs.h \ + @top_srcdir@/src/core/disk_ops/unistd/disk_unistd_global.h \ + @top_srcdir@/src/core/dependencies/tags.h \ + @top_srcdir@/src/core/dependencies/data_concurrency.h \ + @top_srcdir@/src/core/dependencies/implicit_data_deps.h \ + @top_srcdir@/src/core/dependencies/cg.h \ + @top_srcdir@/src/core/idle_hook.h \ + @top_srcdir@/src/core/sched_ctx_list.h \ + @top_srcdir@/src/core/perfmodel/multiple_regression.h \ + @top_srcdir@/src/core/perfmodel/perfmodel.h \ + @top_srcdir@/src/core/perfmodel/regression.h \ + @top_srcdir@/src/core/debug.h \ + @top_srcdir@/src/core/sched_ctx.h \ + @top_srcdir@/src/core/simgrid.h \ + @top_srcdir@/src/core/task_bundle.h \ + @top_srcdir@/src/core/topology.h \ + @top_srcdir@/src/core/combined_workers.h \ + @top_srcdir@/src/core/detect_combined_workers.h \ + @top_srcdir@/src/core/task.h \ + @top_srcdir@/src/core/disk.h \ + @top_srcdir@/src/core/sched_policy.h \ + @top_srcdir@/src/core/errorcheck.h \ + @top_srcdir@/src/core/progress_hook.h \ + @top_srcdir@/src/core/drivers.h \ + @top_srcdir@/src/core/workers.h \ + @top_srcdir@/mpi/src/starpu_mpi_init.h \ + @top_srcdir@/mpi/src/starpu_mpi_datatype.h \ + @top_srcdir@/mpi/src/starpu_mpi_task_insert.h \ + @top_srcdir@/mpi/src/starpu_mpi_select_node.h \ + @top_srcdir@/mpi/src/starpu_mpi_fxt.h \ + @top_srcdir@/mpi/src/starpu_mpi_cache.h \ + @top_srcdir@/mpi/src/mpi/starpu_mpi_mpi_backend.h \ + @top_srcdir@/mpi/src/mpi/starpu_mpi_driver.h \ + @top_srcdir@/mpi/src/mpi/starpu_mpi_early_data.h \ + @top_srcdir@/mpi/src/mpi/starpu_mpi_comm.h \ + @top_srcdir@/mpi/src/mpi/starpu_mpi_tag.h \ + @top_srcdir@/mpi/src/mpi/starpu_mpi_mpi.h \ + @top_srcdir@/mpi/src/mpi/starpu_mpi_early_request.h \ + @top_srcdir@/mpi/src/mpi/starpu_mpi_sync_data.h \ + @top_srcdir@/mpi/src/load_balancer/policy/load_data_interface.h \ + @top_srcdir@/mpi/src/load_balancer/policy/load_balancer_policy.h \ + @top_srcdir@/mpi/src/load_balancer/policy/data_movements_interface.h \ + @top_srcdir@/mpi/src/nmad/starpu_mpi_nmad_unknown_datatype.h \ + @top_srcdir@/mpi/src/nmad/starpu_mpi_nmad_backend.h \ + @top_srcdir@/mpi/src/nmad/starpu_mpi_nmad.h \ + @top_srcdir@/mpi/src/starpu_mpi_stats.h \ + @top_srcdir@/mpi/src/starpu_mpi_private.h \ + @top_srcdir@/mpi/src/starpu_mpi_cache_stats.h \ + @top_srcdir@/starpurm/src/starpurm_private.h + + +EXAMPLE_PATH = @top_srcdir@/doc/doxygen \ + @top_srcdir@/doc/doxygen_dev \ + @top_srcdir@/doc/doxygen/chapters + +INPUT_FILTER = @top_builddir@/doc/doxygen_dev/doxygen_filter.sh + +#LATEX_HEADER = @top_srcdir@/doc/doxygen_dev/refman.tex + +#IMAGE_PATH = @top_srcdir@/doc/doxygen_dev/chapters/images + +GENERATE_LATEX = @DOC_GENERATE_LATEX@ + +PROJECT_NAME = "StarPU Internal Handbook" +HTML_OUTPUT = html_dev diff --git a/doc/doxygen_dev/doxygen_filter.sh.in b/doc/doxygen_dev/doxygen_filter.sh.in new file mode 100755 index 0000000..aeb0056 --- /dev/null +++ b/doc/doxygen_dev/doxygen_filter.sh.in @@ -0,0 +1,26 @@ +#!/bin/bash +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +if [ "$(basename $1)" == "starpufft.h" ] ; then + gcc -E $1 -I @top_srcdir@/include/ -I @top_builddir@/include/ |grep -i starpufft +else + # the macro STARPU_DEPRECATED needs to be removed as it is not properly processed by doxygen + # lines starting with // in the doxygen input files are considered as comments to be removed + # replace LIST_TYPE foo by struct foo so that doxygen processes it correctly + # we assume lines only containing ); are ending of the definition of such a struct + sed -e 's/STARPU_DEPRECATED//' $1 | sed 's/^\/\/.*//' | sed 's/LIST_TYPE(\(.*\),/struct \1 {/' | sed 's/^);/};/' +fi + diff --git a/doc/doxygen_dev/refman.tex b/doc/doxygen_dev/refman.tex new file mode 100644 index 0000000..dcb314f --- /dev/null +++ b/doc/doxygen_dev/refman.tex @@ -0,0 +1,151 @@ +% StarPU --- Runtime system for heterogeneous multicore architectures. +% +% Copyright (C) 2018-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +% +% StarPU is free software; you can redistribute it and/or modify +% it under the terms of the GNU Lesser General Public License as published by +% the Free Software Foundation; either version 2.1 of the License, or (at +% your option) any later version. +% +% StarPU is distributed in the hope that it will be useful, but +% WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +% +% See the GNU Lesser General Public License in COPYING.LGPL for more details. +% +\newcommand\starputitle{StarPU Internal Handbook} +\setcounter{tocdepth}{1} +\input{./title.tex} + +\chapter{Introduction} +\label{index} +\hypertarget{index}{} +\input{index} + +\chapter{Star\+PU Core} +\label{StarPUCore} +\hypertarget{StarPUCore}{} +\input{StarPUCore} + +\chapter{Module Index} +\input{modules} + +\chapter{Module Documentation} +\label{ModuleDocumentation} +\hypertarget{ModuleDocumentation}{} + +\input{group__workers} + +\chapter{File Index} +\input{files} + +\chapter{StarPU File Documentation} +\input{barrier_8h} +\input{barrier__counter_8h} +\input{bound_8h} +\input{cg_8h} +\input{coherency_8h} +\input{combined__workers_8h} +\input{config_8h} +\input{copy__driver_8h} +\input{data__concurrency_8h} +\input{data__interface_8h} +\input{data__request_8h} +\input{datastats_8h} +\input{datawizard_8h} +\input{debug_8h} +\input{detect__combined__workers_8h} +\input{disk_8h} +\input{disk__unistd__global_8h} +\input{driver__common_8h} +\input{driver__cpu_8h} +\input{driver__cuda_8h} +\input{driver__disk_8h} +\input{driver__mpi__common_8h} +\input{driver__mpi__sink_8h} +\input{driver__mpi__source_8h} +\input{driver__opencl_8h} +\input{driver__opencl__utils_8h} +\input{drivers_8h} +\input{errorcheck_8h} +\input{fifo__queues_8h} +\input{filters_8h} +\input{footprint_8h} +\input{fxt_8h} +\input{graph_8h} +\input{helper__mct_8h} +\input{idle__hook_8h} +\input{implicit__data__deps_8h} +\input{jobs_8h} +\input{knobs_8h} +\input{malloc_8h} +\input{memalloc_8h} +\input{memory__manager_8h} +\input{memory__nodes_8h} +\input{memstats_8h} +\input{mp__common_8h} +\input{multiple__regression_8h} +\input{node__ops_8h} +\input{openmp__runtime__support_8h} +\input{perfmodel_8h} +\input{prio__deque_8h} +\input{prio__list_8h} +\input{profiling_8h} +\input{progress__hook_8h} +\input{rbtree_8h} +\input{rbtree__i_8h} +\input{regression_8h} +\input{rwlock_8h} +\input{sched__component_8h} +\input{sched__ctx_8h} +\input{sched__ctx__list_8h} +\input{sched__policy_8h} +\input{simgrid_8h} +\input{sink__common_8h} +\input{sort__data__handles_8h} +\input{source__common_8h} +\input{starpu__parallel__worker__create_8h} +\input{starpu__data__cpy_8h} +\input{starpu__debug__helpers_8h} +\input{starpu__fxt_8h} +\input{starpu__spinlock_8h} +\input{starpu__task__insert__utils_8h} +\input{tags_8h} +\input{task_8h} +\input{task__bundle_8h} +\input{thread_8h} +\input{timing_8h} +\input{topology_8h} +\input{utils_8h} +\input{uthash_8h} +\input{write__back_8h} + +\chapter{StarPU MPI File Documentation} +\input{starpu__mpi__cache_8h} +\input{starpu__mpi__driver_8h} +\input{starpu__mpi__init_8h} +\input{starpu__mpi__nmad__backend_8h} +\input{starpu__mpi__stats_8h} +\input{starpu__mpi__cache__stats_8h} +\input{starpu__mpi__early__data_8h} +\input{starpu__mpi__mpi_8h} +\input{starpu__mpi__nmad__unknown__datatype_8h} +\input{starpu__mpi__sync__data_8h} +\input{starpu__mpi__comm_8h} +\input{starpu__mpi__early__request_8h} +\input{starpu__mpi__mpi__backend_8h} +\input{starpu__mpi__private_8h} +\input{starpu__mpi__tag_8h} +\input{starpu__mpi__datatype_8h} +\input{starpu__mpi__fxt_8h} +\input{starpu__mpi__nmad_8h} +\input{starpu__mpi__select__node_8h} +\input{starpu__mpi__task__insert_8h} +\input{load__balancer__policy_8h} +\input{load__data__interface_8h} +\input{data__movements__interface_8h} + +\chapter{StarPU Resource Manager File Documentation} +\input{starpurm__private_8h} + +\end{document} diff --git a/doc/doxygen_web_applications/Makefile.am b/doc/doxygen_web_applications/Makefile.am new file mode 100644 index 0000000..9d90a18 --- /dev/null +++ b/doc/doxygen_web_applications/Makefile.am @@ -0,0 +1,56 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +DOX_DIR = $(top_builddir)/doc/doxygen_web_applications +DOX_CONFIG = $(top_srcdir)/doc/doxygen.cfg + +DOX_MAIN_DIR = doxygen_web_applications +DOX_HTML_DIR = html_web_applications +DOX_LATEX_DIR = latex +DOX_PDF = starpu_web_applications.pdf +DOX_TAG = starpu.tag +DOX_STARPU_CONFIG = starpu_config.h + +include $(top_srcdir)/doc/doxy.mk + +chapters = \ + ../doxygen/chapters/foreword.doxy \ + ../doxygen/chapters/starpu_applications/applications_intro.doxy \ + ../doxygen/chapters/starpu_applications/vector_scaling.doxy \ + ../doxygen/chapters/starpu_applications/code/vector_scal_c.c \ + ../doxygen/chapters/starpu_applications/code/vector_scal_c_align.c \ + ../doxygen/chapters/starpu_applications/code/vector_scal_cpu.c \ + ../doxygen/chapters/starpu_applications/code/vector_scal_starpu.c \ + ../doxygen/chapters/starpu_applications/stencil.doxy \ + ../doxygen/chapters/starpu_applications/code/stencil5.c \ + ../doxygen/chapters/starpu_applications/code/stencil5_starpu.c \ + ../doxygen/chapters/starpu_applications/code/stencil5_starpu_mpi.c \ + ../doxygen/chapters/files.doxy \ + ../doxygen/chapters/fdl_1_3.doxy + +images = + +if STARPU_BUILD_DOC +starpu_config.h: $(top_srcdir)/include/starpu_config.h.in + @$(SED) 's/#undef \(.*\)/#define \1 1/' $< > $@ + +dox_inputs = $(DOX_CONFIG) \ + $(chapters) \ + starpu_config.h \ + chapters/version.sty \ + chapters/version.html +endif + diff --git a/doc/doxygen_web_applications/Makefile.in b/doc/doxygen_web_applications/Makefile.in new file mode 100644 index 0000000..6c6cb92 --- /dev/null +++ b/doc/doxygen_web_applications/Makefile.in @@ -0,0 +1,893 @@ +# Makefile.in generated by automake 1.16.5 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2021 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +target_triplet = @target@ +@STARPU_BUILD_DOC_PDF_TRUE@@STARPU_BUILD_DOC_TRUE@am__append_1 = $(DOX_HTML_DIR) $(DOX_DIR)/$(DOX_PDF) +@STARPU_BUILD_DOC_PDF_FALSE@@STARPU_BUILD_DOC_TRUE@am__append_2 = $(DOX_HTML_DIR) +@STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@am__append_3 = $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$(DOX_HTML_DIR) +@STARPU_AVAILABLE_DOC_PDF_TRUE@@STARPU_BUILD_DOC_FALSE@am__append_4 = $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$(DOX_PDF) +@STARPU_BUILD_DOC_TRUE@am__append_5 = \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.html + +subdir = doc/doxygen_web_applications +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ + $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ + $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ + $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/src/common/config.h \ + $(top_builddir)/src/common/config-src-build.h \ + $(top_builddir)/include/starpu_config.h \ + $(top_builddir)/starpurm/include/starpurm_config.h +CONFIG_CLEAN_FILES = doxygen-config.cfg +CONFIG_CLEAN_VPATH_FILES = +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +SOURCES = +DIST_SOURCES = +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__installdirs = "$(DESTDIR)$(txtdir)" +DATA = $(txt_DATA) +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +am__DIST_COMMON = $(srcdir)/Makefile.in \ + $(srcdir)/doxygen-config.cfg.in $(top_srcdir)/doc/doxy.mk +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +pkglibdir = @pkglibdir@ +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +APP_CFLAGS = @APP_CFLAGS@ +APP_CXXFLAGS = @APP_CXXFLAGS@ +APP_FCFLAGS = @APP_FCFLAGS@ +APP_FFLAGS = @APP_FFLAGS@ +AR = @AR@ +AS = @AS@ +ATLASDIR = @ATLASDIR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BLAS_LIB = @BLAS_LIB@ +BLAS_LIBS = @BLAS_LIBS@ +BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ +BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CC_OR_MPICC = @CC_OR_MPICC@ +CC_OR_NVCC = @CC_OR_NVCC@ +CFLAGS = @CFLAGS@ +COVERAGE = @COVERAGE@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CSCOPE = @CSCOPE@ +CTAGS = @CTAGS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DGELS_LIBS = @DGELS_LIBS@ +DLB_CFLAGS = @DLB_CFLAGS@ +DLB_LIBS = @DLB_LIBS@ +DLLTOOL = @DLLTOOL@ +DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +ECLIPSE = @ECLIPSE@ +EGREP = @EGREP@ +ETAGS = @ETAGS@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FC = @FC@ +FCFLAGS = @FCFLAGS@ +FFLAGS = @FFLAGS@ +FFTWF_CFLAGS = @FFTWF_CFLAGS@ +FFTWF_LIBS = @FFTWF_LIBS@ +FFTWL_CFLAGS = @FFTWL_CFLAGS@ +FFTWL_LIBS = @FFTWL_LIBS@ +FFTW_CFLAGS = @FFTW_CFLAGS@ +FFTW_LIBS = @FFTW_LIBS@ +FGREP = @FGREP@ +FILECMD = @FILECMD@ +FXTDIR = @FXTDIR@ +FXT_CFLAGS = @FXT_CFLAGS@ +FXT_LDFLAGS = @FXT_LDFLAGS@ +FXT_LIBS = @FXT_LIBS@ +GDB = @GDB@ +GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ +GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ +GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ +GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ +GOTODIR = @GOTODIR@ +GREP = @GREP@ +HAVE_CXX11 = @HAVE_CXX11@ +HAVE_FFTWFL = @HAVE_FFTWFL@ +HELP2MAN = @HELP2MAN@ +HIPCC = @HIPCC@ +HIPCCFLAGS = @HIPCCFLAGS@ +HIPCONFIG = @HIPCONFIG@ +HWLOC_CFLAGS = @HWLOC_CFLAGS@ +HWLOC_LIBS = @HWLOC_LIBS@ +HWLOC_REQUIRES = @HWLOC_REQUIRES@ +ICC = @ICC@ +ICC_ARGS = @ICC_ARGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JULIA = @JULIA@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ +LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ +LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ +LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ +LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ +LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ +LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ +LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ +LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ +LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ +LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ +LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ +LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ +LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ +LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ +LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ +LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ +LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ +LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ +LIBSTARPU_LINK = @LIBSTARPU_LINK@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAGMA_CFLAGS = @MAGMA_CFLAGS@ +MAGMA_LIBS = @MAGMA_LIBS@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPICC_LDFLAGS = @MPICC_LDFLAGS@ +MPICXX = @MPICXX@ +MPIEXEC = @MPIEXEC@ +MPIEXEC_ARGS = @MPIEXEC_ARGS@ +MPIFORT = @MPIFORT@ +MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ +MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ +NM = @NM@ +NMAD_CFLAGS = @NMAD_CFLAGS@ +NMAD_LIBS = @NMAD_LIBS@ +NMEDIT = @NMEDIT@ +NVCC = @NVCC@ +NVCCFLAGS = @NVCCFLAGS@ +NVCC_CC = @NVCC_CC@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ +OPENBLAS_LIBS = @OPENBLAS_LIBS@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PAPI_CFLAGS = @PAPI_CFLAGS@ +PAPI_LIBS = @PAPI_LIBS@ +PARALLEL = @PARALLEL@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PKG_CONFIG = @PKG_CONFIG@ +POTI_CFLAGS = @POTI_CFLAGS@ +POTI_LIBS = @POTI_LIBS@ +PROG_CLANG = @PROG_CLANG@ +PROG_DATE = @PROG_DATE@ +PROG_FIND = @PROG_FIND@ +PROG_STAT = @PROG_STAT@ +PYTHON = @PYTHON@ +PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ +PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ +PYTHON_VERSION = @PYTHON_VERSION@ +RANLIB = @RANLIB@ +REALBASH = @REALBASH@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ +SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ +SIMGRID_LIBS = @SIMGRID_LIBS@ +SIMGRID_MC = @SIMGRID_MC@ +SLIC_CONFIG = @SLIC_CONFIG@ +SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ +SOCL_VENDORS = @SOCL_VENDORS@ +STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ +STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ +STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ +STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ +STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ +STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ +STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ +STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ +STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ +STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ +STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ +STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ +STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ +STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ +STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ +STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ +STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ +STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ +STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ +STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ +STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ +STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ +STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ +STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ +STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ +STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ +STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ +STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ +STARPU_LIB_PATH = @STARPU_LIB_PATH@ +STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ +STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ +STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ +STARPU_MS_LIB = @STARPU_MS_LIB@ +STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ +STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ +STARPU_OPENBLAS = @STARPU_OPENBLAS@ +STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ +STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ +STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ +STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ +STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ +STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ +STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ +STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ +STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ +STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ +STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ +STARPU_SRC_DIR = @STARPU_SRC_DIR@ +STARPU_USE_CPU = @STARPU_USE_CPU@ +STARPU_USE_CUDA = @STARPU_USE_CUDA@ +STARPU_USE_FXT = @STARPU_USE_FXT@ +STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ +STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ +STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ +STRIP = @STRIP@ +VERSION = @VERSION@ +XMKMF = @XMKMF@ +X_CFLAGS = @X_CFLAGS@ +X_EXTRA_LIBS = @X_EXTRA_LIBS@ +X_LIBS = @X_LIBS@ +X_PRE_LIBS = @X_PRE_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_ct_F77 = @ac_ct_F77@ +ac_ct_FC = @ac_ct_FC@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +doxygencommand = @doxygencommand@ +dvidir = @dvidir@ +eclipsepath = @eclipsepath@ +epstopdfcommand = @epstopdfcommand@ +exec_prefix = @exec_prefix@ +gitcommand = @gitcommand@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +hwloccalccommand = @hwloccalccommand@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +juliapath = @juliapath@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +mpicc_path = @mpicc_path@ +mpicxx_path = @mpicxx_path@ +mpiexec_path = @mpiexec_path@ +mpifort_path = @mpifort_path@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +pdflatexcommand = @pdflatexcommand@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +runstatedir = @runstatedir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target = @target@ +target_alias = @target_alias@ +target_cpu = @target_cpu@ +target_os = @target_os@ +target_vendor = @target_vendor@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +DOX_DIR = $(top_builddir)/doc/doxygen_web_applications +DOX_CONFIG = $(top_srcdir)/doc/doxygen.cfg +DOX_MAIN_DIR = doxygen_web_applications +DOX_HTML_DIR = html_web_applications +DOX_LATEX_DIR = latex +DOX_PDF = starpu_web_applications.pdf +DOX_TAG = starpu.tag +DOX_STARPU_CONFIG = starpu_config.h + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +DOXYGEN = doxygen +PDFLATEX = pdflatex +MAKEINDEX = makeindex +txtdir = $(docdir)/manual +EXTRA_DIST = $(am__append_1) $(am__append_2) $(am__append_3) \ + $(am__append_4) $(am__append_5) refman.tex $(chapters) \ + $(images) +@STARPU_AVAILABLE_DOC_PDF_TRUE@@STARPU_BUILD_DOC_FALSE@txt_DATA = $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$(DOX_PDF) +@STARPU_BUILD_DOC_PDF_TRUE@@STARPU_BUILD_DOC_TRUE@txt_DATA = $(DOX_DIR)/$(DOX_PDF) +@STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@DOX_HTML_SRCDIR = $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$(DOX_HTML_DIR) +@STARPU_BUILD_DOC_TRUE@DOX_HTML_SRCDIR = $(DOX_HTML_DIR) +@STARPU_BUILD_DOC_TRUE@CLEANFILES = $(DOX_TAG) $(DOX_STARPU_CONFIG) \ +@STARPU_BUILD_DOC_TRUE@ -r \ +@STARPU_BUILD_DOC_TRUE@ $(DOX_HTML_DIR) \ +@STARPU_BUILD_DOC_TRUE@ $(DOX_LATEX_DIR) \ +@STARPU_BUILD_DOC_TRUE@ $(DOX_DIR)/$(DOX_PDF) + +chapters = \ + ../doxygen/chapters/foreword.doxy \ + ../doxygen/chapters/starpu_applications/applications_intro.doxy \ + ../doxygen/chapters/starpu_applications/vector_scaling.doxy \ + ../doxygen/chapters/starpu_applications/code/vector_scal_c.c \ + ../doxygen/chapters/starpu_applications/code/vector_scal_c_align.c \ + ../doxygen/chapters/starpu_applications/code/vector_scal_cpu.c \ + ../doxygen/chapters/starpu_applications/code/vector_scal_starpu.c \ + ../doxygen/chapters/starpu_applications/stencil.doxy \ + ../doxygen/chapters/starpu_applications/code/stencil5.c \ + ../doxygen/chapters/starpu_applications/code/stencil5_starpu.c \ + ../doxygen/chapters/starpu_applications/code/stencil5_starpu_mpi.c \ + ../doxygen/chapters/files.doxy \ + ../doxygen/chapters/fdl_1_3.doxy + +images = +@STARPU_BUILD_DOC_TRUE@dox_inputs = $(DOX_CONFIG) \ +@STARPU_BUILD_DOC_TRUE@ $(chapters) \ +@STARPU_BUILD_DOC_TRUE@ starpu_config.h \ +@STARPU_BUILD_DOC_TRUE@ chapters/version.sty \ +@STARPU_BUILD_DOC_TRUE@ chapters/version.html + +all: all-am + +.SUFFIXES: +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/doc/doxy.mk $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign doc/doxygen_web_applications/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign doc/doxygen_web_applications/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; +$(top_srcdir)/doc/doxy.mk $(am__empty): + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): +doxygen-config.cfg: $(top_builddir)/config.status $(srcdir)/doxygen-config.cfg.in + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs +install-txtDATA: $(txt_DATA) + @$(NORMAL_INSTALL) + @list='$(txt_DATA)'; test -n "$(txtdir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(txtdir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(txtdir)" || exit 1; \ + fi; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; \ + done | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(txtdir)'"; \ + $(INSTALL_DATA) $$files "$(DESTDIR)$(txtdir)" || exit $$?; \ + done + +uninstall-txtDATA: + @$(NORMAL_UNINSTALL) + @list='$(txt_DATA)'; test -n "$(txtdir)" || list=; \ + files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ + dir='$(DESTDIR)$(txtdir)'; $(am__uninstall_files_from_dir) +tags TAGS: + +ctags CTAGS: + +cscope cscopelist: + +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile $(DATA) +installdirs: + for dir in "$(DESTDIR)$(txtdir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +@STARPU_AVAILABLE_DOC_FALSE@@STARPU_BUILD_DOC_FALSE@install-exec-hook: +@STARPU_AVAILABLE_DOC_FALSE@@STARPU_BUILD_DOC_FALSE@uninstall-hook: +clean: clean-am + +clean-am: clean-generic clean-libtool mostlyclean-am + +distclean: distclean-am + -rm -f Makefile +distclean-am: clean-am distclean-generic + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: install-txtDATA + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: + @$(NORMAL_INSTALL) + $(MAKE) $(AM_MAKEFLAGS) install-exec-hook +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-generic mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-txtDATA + @$(NORMAL_INSTALL) + $(MAKE) $(AM_MAKEFLAGS) uninstall-hook +.MAKE: install-am install-exec-am install-strip uninstall-am + +.PHONY: all all-am check check-am clean clean-generic clean-libtool \ + cscopelist-am ctags-am distclean distclean-generic \ + distclean-libtool distdir dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-exec-hook \ + install-html install-html-am install-info install-info-am \ + install-man install-pdf install-pdf-am install-ps \ + install-ps-am install-strip install-txtDATA installcheck \ + installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-generic \ + mostlyclean-libtool pdf pdf-am ps ps-am tags-am uninstall \ + uninstall-am uninstall-hook uninstall-txtDATA + +.PRECIOUS: Makefile + + +@STARPU_BUILD_DOC_PDF_TRUE@@STARPU_BUILD_DOC_TRUE@all: $(DOX_HTML_DIR) $(DOX_DIR)/$(DOX_PDF) +@STARPU_BUILD_DOC_PDF_FALSE@@STARPU_BUILD_DOC_TRUE@all: $(DOX_HTML_DIR) +@STARPU_BUILD_DOC_TRUE@install-exec-hook: $(DOX_HTML_DIR) +@STARPU_BUILD_DOC_TRUE@ @$(MKDIR_P) $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) +@STARPU_BUILD_DOC_TRUE@ @(cd $(DOX_HTML_SRCDIR) && $(PROG_FIND) . -type f -exec $(INSTALL_DATA) {} $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) \;) +@STARPU_BUILD_DOC_TRUE@uninstall-hook: +@STARPU_BUILD_DOC_TRUE@ @rm -rf $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) +@STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@install-exec-hook: +@STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@ @$(MKDIR_P) $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) +@STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@ @(cd $(DOX_HTML_SRCDIR) && $(PROG_FIND) . -type f -exec $(INSTALL_DATA) {} $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) \;) +@STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@uninstall-hook: +@STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@ @rm -rf $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) + +@STARPU_BUILD_DOC_TRUE@chapters/version.sty: $(chapters) +@STARPU_BUILD_DOC_TRUE@ $(MKDIR_P) $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters +@STARPU_BUILD_DOC_TRUE@ @for f in $(chapters) ; do \ +@STARPU_BUILD_DOC_TRUE@ if test -f $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$$f ; then $(PROG_STAT) --format=%Y $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$$f ; fi \ +@STARPU_BUILD_DOC_TRUE@ done | sort -r | head -1 > timestamp_sty +@STARPU_BUILD_DOC_TRUE@ @if test -s timestamp_sty ; then \ +@STARPU_BUILD_DOC_TRUE@ LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_sty` +"%F" > timestamp_sty_updated ;\ +@STARPU_BUILD_DOC_TRUE@ LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_sty` +"%B %Y" > timestamp_sty_updated_month ;\ +@STARPU_BUILD_DOC_TRUE@ fi +@STARPU_BUILD_DOC_TRUE@ @if test -s timestamp_sty_updated ; then \ +@STARPU_BUILD_DOC_TRUE@ echo ':newcommand{:STARPUUPDATED}{'`cat timestamp_sty_updated`'}' > $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty;\ +@STARPU_BUILD_DOC_TRUE@ else \ +@STARPU_BUILD_DOC_TRUE@ echo ':newcommand{:STARPUUPDATED}{unknown date}' > $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty;\ +@STARPU_BUILD_DOC_TRUE@ fi +@STARPU_BUILD_DOC_TRUE@ @echo ':newcommand{:STARPUVERSION}{$(VERSION)}' >> $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty +@STARPU_BUILD_DOC_TRUE@ @$(SED) -i 's/:/\\/g' $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty +@STARPU_BUILD_DOC_TRUE@ @for f in timestamp_sty timestamp_sty_updated timestamp_sty_updated_month ; do \ +@STARPU_BUILD_DOC_TRUE@ if test -f $$f ; then $(RM) $$f ; fi ;\ +@STARPU_BUILD_DOC_TRUE@ done + +@STARPU_BUILD_DOC_TRUE@chapters/version.html: $(chapters) $(images) +@STARPU_BUILD_DOC_TRUE@ @for f in $(chapters) ; do \ +@STARPU_BUILD_DOC_TRUE@ if test -f $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$$f ; then $(PROG_STAT) --format=%Y $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$$f ; fi \ +@STARPU_BUILD_DOC_TRUE@ done | sort -r | head -1 > timestamp_html +@STARPU_BUILD_DOC_TRUE@ @if test -s timestamp_html ; then \ +@STARPU_BUILD_DOC_TRUE@ LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_html` +"%F" > timestamp_html_updated ;\ +@STARPU_BUILD_DOC_TRUE@ LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_html` +"%B %Y" > timestamp_html_updated_month ;\ +@STARPU_BUILD_DOC_TRUE@ fi +@STARPU_BUILD_DOC_TRUE@ @echo "This manual documents the version $(VERSION) of StarPU." > $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.html +@STARPU_BUILD_DOC_TRUE@ @if test -s timestamp_html_updated ; then \ +@STARPU_BUILD_DOC_TRUE@ echo "Its contents was last updated on "`cat timestamp_html_updated`"." >> $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.html;\ +@STARPU_BUILD_DOC_TRUE@ else \ +@STARPU_BUILD_DOC_TRUE@ echo "Its contents was last updated on unknown_date." >> $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.html;\ +@STARPU_BUILD_DOC_TRUE@ fi +@STARPU_BUILD_DOC_TRUE@ @for f in timestamp_html timestamp_html_updated timestamp_html_updated_month ; do \ +@STARPU_BUILD_DOC_TRUE@ if test -f $$f ; then $(RM) $$f ; fi ;\ +@STARPU_BUILD_DOC_TRUE@ done + +@STARPU_BUILD_DOC_TRUE@doxy: +@STARPU_BUILD_DOC_TRUE@ @rm -fr $(DOX_HTML_DIR) $(DOX_LATEX_DIR) +@STARPU_BUILD_DOC_TRUE@ @$(DOXYGEN) $(DOX_CONFIG) + +@STARPU_BUILD_DOC_TRUE@$(DOX_HTML_DIR): $(DOX_TAG) +@STARPU_BUILD_DOC_TRUE@ @$(MKDIR_P) $(DOX_HTML_DIR) + +@STARPU_BUILD_DOC_TRUE@$(DOX_TAG): $(dox_inputs) +@STARPU_BUILD_DOC_TRUE@ @rm -fr $(DOX_HTML_DIR) $(DOX_LATEX_DIR) +@STARPU_BUILD_DOC_TRUE@ @$(DOXYGEN) $(DOX_CONFIG) +@STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_HTML_DIR)/DocOrganization.html ; then $(SED) -i 's/ModuleDocumentation <\/li>/Modules<\/a>/' $(DOX_HTML_DIR)/DocOrganization.html ; fi +@STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_HTML_DIR)/DocOrganization.html ; then $(SED) -i 's/FileDocumentation <\/li>/Files<\/a>/' $(DOX_HTML_DIR)/DocOrganization.html ; fi +@STARPU_BUILD_DOC_TRUE@ # comment for the line below: what we really want to do is to remove the line, but dy doing so, it avoids opening the interactive menu when browsing files +@STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_HTML_DIR)/navtreedata.js ; then $(SED) -i 's/\[ "Files", "Files.html", null \]/\[ "", "Files.html", null \]/' $(DOX_HTML_DIR)/navtreedata.js ; fi +@STARPU_BUILD_DOC_TRUE@ @$(SED) -i 's/.*"Files.html".*//' $(DOX_HTML_DIR)/pages.html +@STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_LATEX_DIR)/main.tex ; then mv $(DOX_LATEX_DIR)/main.tex $(DOX_LATEX_DIR)/index.tex ; fi +@STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_LATEX_DIR)/refman.tex ; then $(SED) -i '/\\begin{titlepage}/,$$d' $(DOX_LATEX_DIR)/refman.tex ; fi +@STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_LATEX_DIR)/refman.tex ; then cat $(top_srcdir)/doc/$(DOX_MAIN_DIR)/refman.tex >> $(DOX_LATEX_DIR)/refman.tex ; fi +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/doc/sectionNumbering.py $(top_builddir)/doc/$(DOX_MAIN_DIR) $(DOX_HTML_DIR) + +@STARPU_BUILD_DOC_TRUE@$(DOX_DIR)/$(DOX_PDF): $(DOX_TAG) refman.tex $(images) +@STARPU_BUILD_DOC_TRUE@ $(MKDIR_P) $(DOX_LATEX_DIR) +@STARPU_BUILD_DOC_TRUE@ @cp $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty $(DOX_LATEX_DIR) +@STARPU_BUILD_DOC_TRUE@ @cp $(top_srcdir)/doc/title.tex $(DOX_LATEX_DIR) +@STARPU_BUILD_DOC_TRUE@ @if test -f $(top_srcdir)/doc/$(DOX_MAIN_DIR)/modules.tex ; then cp $(top_srcdir)/doc/$(DOX_MAIN_DIR)/modules.tex $(DOX_LATEX_DIR) ; fi +@STARPU_BUILD_DOC_TRUE@ @echo $(PDFLATEX) $(DOX_LATEX_DIR)/refman.tex +@STARPU_BUILD_DOC_TRUE@ @cd $(DOX_LATEX_DIR) ;\ +@STARPU_BUILD_DOC_TRUE@ rm -f *.aux *.toc *.idx *.ind *.ilg *.log *.out ;\ +@STARPU_BUILD_DOC_TRUE@ for f in group__API__* ; do sed -i '1 i \\\clearpage' $$f ; done ;\ +@STARPU_BUILD_DOC_TRUE@ if test -f ExecutionConfigurationThroughEnvironmentVariables.tex ; then $(SED) -i -e 's/__env__/\\_Environment Variables!/' -e 's/\\-\\_\\-\\-\\_\\-env\\-\\_\\-\\-\\_\\-//' ExecutionConfigurationThroughEnvironmentVariables.tex ; fi ;\ +@STARPU_BUILD_DOC_TRUE@ if test -f CompilationConfiguration.tex ; then $(SED) -i -e 's/__configure__/\\_Configure Options!/' -e 's/\\-\\_\\-\\-\\_\\-configure\\-\\_\\-\\-\\_\\-//' CompilationConfiguration.tex ; fi ;\ +@STARPU_BUILD_DOC_TRUE@ if test -f DocOrganization.tex ; then $(SED) -i s'/\\item Module\\.Documentation/\\item \\hyperlink{ModuleDocumentation}{Module Documentation}/' DocOrganization.tex ; fi ;\ +@STARPU_BUILD_DOC_TRUE@ if test -f DocOrganization.tex ; then $(SED) -i s'/\\item File\\.Documentation/\\item \\hyperlink{FileDocumentation}{File Documentation}/' DocOrganization.tex ; fi ;\ +@STARPU_BUILD_DOC_TRUE@ max_print_line=1000000 $(PDFLATEX) -interaction batchmode refman.tex ;\ +@STARPU_BUILD_DOC_TRUE@ ! < refman.log grep -v group__ | grep -v _amgrp | grep -v deprecated__ | grep "multiply defined" || exit 1 ;\ +@STARPU_BUILD_DOC_TRUE@ $(MAKEINDEX) refman.idx ;\ +@STARPU_BUILD_DOC_TRUE@ max_print_line=1000000 $(PDFLATEX) -interaction batchmode refman.tex ;\ +@STARPU_BUILD_DOC_TRUE@ for i in $(shell seq 1 5); do \ +@STARPU_BUILD_DOC_TRUE@ if $(EGREP) 'Rerun (LaTeX|to get cross-references right)' refman.log > /dev/null 2>&1; then \ +@STARPU_BUILD_DOC_TRUE@ max_print_line=1000000 $(PDFLATEX) -interaction batchmode refman.tex; \ +@STARPU_BUILD_DOC_TRUE@ else \ +@STARPU_BUILD_DOC_TRUE@ break ; \ +@STARPU_BUILD_DOC_TRUE@ fi; \ +@STARPU_BUILD_DOC_TRUE@ done +@STARPU_BUILD_DOC_TRUE@ mv $(DOX_LATEX_DIR)/refman.pdf $(DOX_DIR)/$(DOX_PDF) + +@STARPU_BUILD_DOC_TRUE@starpu_config.h: $(top_srcdir)/include/starpu_config.h.in +@STARPU_BUILD_DOC_TRUE@ @$(SED) 's/#undef \(.*\)/#define \1 1/' $< > $@ + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/doc/doxygen_web_applications/chapters/version.html b/doc/doxygen_web_applications/chapters/version.html new file mode 100644 index 0000000..f504c7a --- /dev/null +++ b/doc/doxygen_web_applications/chapters/version.html @@ -0,0 +1,2 @@ +This manual documents the version 1.4.10 of StarPU. +Its contents was last updated on 2025-12-03. diff --git a/doc/doxygen_web_applications/chapters/version.sty b/doc/doxygen_web_applications/chapters/version.sty new file mode 100644 index 0000000..7527bba --- /dev/null +++ b/doc/doxygen_web_applications/chapters/version.sty @@ -0,0 +1,2 @@ +\newcommand{\STARPUUPDATED}{2025-12-03} +\newcommand{\STARPUVERSION}{1.4.10} diff --git a/doc/doxygen_web_applications/doxygen-config.cfg.in b/doc/doxygen_web_applications/doxygen-config.cfg.in new file mode 100644 index 0000000..42d204e --- /dev/null +++ b/doc/doxygen_web_applications/doxygen-config.cfg.in @@ -0,0 +1,44 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# Copyright (C) 2013-2013 Simon Archipoff +# Copyright (C) 2011-2011 Télécom Sud Paris +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +INPUT = @top_srcdir@/doc/doxygen/chapters/starpu_applications/applications_intro.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_applications/vector_scaling.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_applications/stencil.doxy \ + @top_srcdir@/doc/doxygen/chapters/fdl_1_3.doxy \ + @top_srcdir@/doc/doxygen/chapters/files.doxy \ + @top_srcdir@/doc/doxygen/chapters/api + +EXAMPLE_PATH = @top_srcdir@/doc/doxygen \ + @top_srcdir@/doc/doxygen/chapters \ + @top_srcdir@/doc/doxygen/chapters/starpu_applications/code + +INPUT_FILTER = @top_builddir@/doc/doxygen/doxygen_filter.sh + +#LATEX_HEADER = @top_srcdir@/doc/doxygen/refman.tex + +IMAGE_PATH = @top_srcdir@/doc/doxygen/chapters/images + +GENERATE_LATEX = @DOC_GENERATE_LATEX@ + +HTML_OUTPUT = html_web_applications + +@INCLUDE_PATH = ../../doc/doxygen/ + +PROJECT_NAME = "StarPU Handbook - StarPU Applications" +ALIASES += "intropage{2} = \mainpage" +ALIASES += "webforeword = \htmlonly

    Foreword

    \endhtmlonly \htmlinclude version.html \htmlinclude foreword.html \htmlonly This is a sub-part of the StarPU documentation, go here to read the whole documentation. \endhtmlonly" +ALIASES += "foreword = " diff --git a/doc/doxygen_web_applications/refman.tex b/doc/doxygen_web_applications/refman.tex new file mode 100644 index 0000000..a3b749b --- /dev/null +++ b/doc/doxygen_web_applications/refman.tex @@ -0,0 +1,47 @@ +% StarPU --- Runtime system for heterogeneous multicore architectures. +% +% Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +% Copyright (C) 2013-2013 Simon Archipoff +% +% StarPU is free software; you can redistribute it and/or modify +% it under the terms of the GNU Lesser General Public License as published by +% the Free Software Foundation; either version 2.1 of the License, or (at +% your option) any later version. +% +% StarPU is distributed in the hope that it will be useful, but +% WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +% +% See the GNU Lesser General Public License in COPYING.LGPL for more details. +% +\newcommand\starputitle{StarPU Handbook - StarPU Applications} +\setcounter{tocdepth}{2} +\input{./title.tex} + +\chapter{Organization} +\label{index} +\hypertarget{index}{} +\input{index} + +\chapter{A Vector Scaling Application} +\label{VectorScalingApplication} +\hypertarget{VectorApplication}{} +\input{VectorApplication} + +\chapter{A Stencil Application} +\label{StencilApplication} +\hypertarget{StencilApplication}{} +\input{StencilApplication} + +\part{Appendix} + +\chapter{The GNU Free Documentation License} +\label{GNUFreeDocumentationLicense} +\hypertarget{GNUFreeDocumentationLicense}{} +\input{GNUFreeDocumentationLicense} + +%\part{Index} +%#\addcontentsline{toc}{chapter}{Index} +%\printindex + +\end{document} diff --git a/doc/doxygen_web_basics/Makefile.am b/doc/doxygen_web_basics/Makefile.am new file mode 100644 index 0000000..646eb86 --- /dev/null +++ b/doc/doxygen_web_basics/Makefile.am @@ -0,0 +1,105 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +DOX_DIR = $(top_builddir)/doc/doxygen_web_basics +DOX_CONFIG = $(top_srcdir)/doc/doxygen.cfg + +DOX_MAIN_DIR = doxygen_web_basics +DOX_HTML_DIR = html_web_basics +DOX_LATEX_DIR = latex +DOX_PDF = starpu_web_basics.pdf +DOX_TAG = starpu.tag +DOX_STARPU_CONFIG = starpu_config.h + +include $(top_srcdir)/doc/doxy.mk + +chapters = \ + ../doxygen/chapters/foreword.doxy \ + ../doxygen/chapters/starpu_basics/basics_intro.doxy \ + ../doxygen/chapters/starpu_basics/starpu_applications.doxy \ + ../doxygen/chapters/starpu_basics/basic_examples.doxy \ + ../doxygen/chapters/starpu_basics/scaling_vector_example.doxy \ + ../doxygen/chapters/starpu_basics/tasks.doxy \ + ../doxygen/chapters/starpu_basics/data_management.doxy \ + ../doxygen/chapters/starpu_basics/scheduling.doxy \ + ../doxygen/chapters/starpu_basics/examples_sources.doxy \ + ../doxygen/chapters/starpu_basics/code/basics_vector_scal_c.c \ + ../doxygen/chapters/starpu_basics/code/basics_vector_scal_cpu.c \ + ../doxygen/chapters/starpu_basics/code/basics_vector_scal_cuda.c \ + ../doxygen/chapters/starpu_basics/code/basics_vector_scal_opencl.c \ + ../doxygen/chapters/starpu_basics/code/basics_vector_scal_opencl_codelet.cl \ + ../doxygen/chapters/files.doxy \ + ../doxygen/chapters/fdl_1_3.doxy + +images = \ + ../doxygen/chapters/images/arbiter.png \ + ../doxygen/chapters/images/data_trace.png \ + ../doxygen/chapters/images/distrib_data.png \ + ../doxygen/chapters/images/distrib_data_histo.png \ + ../doxygen/chapters/images/paje_draw_histogram.png \ + ../doxygen/chapters/images/parallel_worker2.png \ + ../doxygen/chapters/images/runtime-par.png \ + ../doxygen/chapters/images/starpu_non_linear_memset_regression_based.png \ + ../doxygen/chapters/images/starpu_non_linear_memset_regression_based_2.png \ + ../doxygen/chapters/images/starpu_starpu_slu_lu_model_11.png \ + ../doxygen/chapters/images/starpu_chol_model_11_type.png \ + ../doxygen/chapters/images/tasks_size_overhead.png \ + ../doxygen/chapters/images/temanejo.png \ + ../doxygen/chapters/images/eclipse_installer.png \ + ../doxygen/chapters/images/eclipse_install_cdt.png \ + ../doxygen/chapters/images/eclipse_hello_build.png \ + ../doxygen/chapters/images/eclipse_hello_run.png \ + ../doxygen/chapters/images/eclipse_hello_fxt.png \ + ../doxygen/chapters/images/eclipse_hello_graph.png \ + ../doxygen/chapters/images/eclipse_hello_vite.png \ + ../doxygen/chapters/images/eclipse_hello_svg_graph.png \ + ../doxygen/chapters/images/eclipse_hello_plugin.png \ + ../doxygen/chapters/images/eclipse_hello_paje_trace.png \ + ../doxygen/chapters/images/eclipse_hello_hgraph.png \ + ../doxygen/chapters/images/eclipse_install_pde.png \ + ../doxygen/chapters/images/starpu_gflops_non_linear_memset_regression_based_energy.png \ + ../doxygen/chapters/images/starpu_log_arr.png \ + ../doxygen/chapters/images/starpu_log_list.png \ + ../doxygen/chapters/images/starpu_non_linear_memset_regression_based_energy.png \ + ../doxygen/chapters/images/starpu_power_non_linear_memset_regression_based.png \ + ../doxygen/chapters/images/starvz_visu.png \ + ../doxygen/chapters/images/starvz_visu_r.png \ + ../doxygen/chapters/images/trace_bw_heatmap.png \ + ../doxygen/chapters/images/trace_recv_use.png \ + ../doxygen/chapters/images/trace_send_use.png \ + ../doxygen/chapters/images/trace_volume_heatmap.png \ + ../doxygen/chapters/images/starpupy_handle_func_perf_pickle.png \ + ../doxygen/chapters/images/starpupy_handle_perf_pickle.png \ + ../doxygen/chapters/images/starpupy_handle_func_perf.png \ + ../doxygen/chapters/images/starpupy_handle_perf.png \ + ../doxygen/chapters/images/tasks_size_overhead_py_fut_pickle.png \ + ../doxygen/chapters/images/tasks_size_overhead_py_futur.png \ + ../doxygen/chapters/images/tasks_size_overhead_py_handle_pickle.png \ + ../doxygen/chapters/images/tasks_size_overhead_py_handle.png \ + ../doxygen/chapters/images/tasks_size_overhead_py_none.png \ + ../doxygen/chapters/images/tasks_size_overhead_py_noret_pickle.png + +if STARPU_BUILD_DOC +starpu_config.h: $(top_srcdir)/include/starpu_config.h.in + @$(SED) 's/#undef \(.*\)/#define \1 1/' $< > $@ + +dox_inputs = $(DOX_CONFIG) \ + $(chapters) \ + starpu_config.h \ + chapters/version.sty \ + chapters/version.html +endif + diff --git a/doc/doxygen_web_basics/Makefile.in b/doc/doxygen_web_basics/Makefile.in new file mode 100644 index 0000000..5b1e319 --- /dev/null +++ b/doc/doxygen_web_basics/Makefile.in @@ -0,0 +1,943 @@ +# Makefile.in generated by automake 1.16.5 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2021 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +target_triplet = @target@ +@STARPU_BUILD_DOC_PDF_TRUE@@STARPU_BUILD_DOC_TRUE@am__append_1 = $(DOX_HTML_DIR) $(DOX_DIR)/$(DOX_PDF) +@STARPU_BUILD_DOC_PDF_FALSE@@STARPU_BUILD_DOC_TRUE@am__append_2 = $(DOX_HTML_DIR) +@STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@am__append_3 = $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$(DOX_HTML_DIR) +@STARPU_AVAILABLE_DOC_PDF_TRUE@@STARPU_BUILD_DOC_FALSE@am__append_4 = $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$(DOX_PDF) +@STARPU_BUILD_DOC_TRUE@am__append_5 = \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.html + +subdir = doc/doxygen_web_basics +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ + $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ + $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ + $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/src/common/config.h \ + $(top_builddir)/src/common/config-src-build.h \ + $(top_builddir)/include/starpu_config.h \ + $(top_builddir)/starpurm/include/starpurm_config.h +CONFIG_CLEAN_FILES = doxygen-config.cfg +CONFIG_CLEAN_VPATH_FILES = +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +SOURCES = +DIST_SOURCES = +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__installdirs = "$(DESTDIR)$(txtdir)" +DATA = $(txt_DATA) +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +am__DIST_COMMON = $(srcdir)/Makefile.in \ + $(srcdir)/doxygen-config.cfg.in $(top_srcdir)/doc/doxy.mk +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +pkglibdir = @pkglibdir@ +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +APP_CFLAGS = @APP_CFLAGS@ +APP_CXXFLAGS = @APP_CXXFLAGS@ +APP_FCFLAGS = @APP_FCFLAGS@ +APP_FFLAGS = @APP_FFLAGS@ +AR = @AR@ +AS = @AS@ +ATLASDIR = @ATLASDIR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BLAS_LIB = @BLAS_LIB@ +BLAS_LIBS = @BLAS_LIBS@ +BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ +BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CC_OR_MPICC = @CC_OR_MPICC@ +CC_OR_NVCC = @CC_OR_NVCC@ +CFLAGS = @CFLAGS@ +COVERAGE = @COVERAGE@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CSCOPE = @CSCOPE@ +CTAGS = @CTAGS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DGELS_LIBS = @DGELS_LIBS@ +DLB_CFLAGS = @DLB_CFLAGS@ +DLB_LIBS = @DLB_LIBS@ +DLLTOOL = @DLLTOOL@ +DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +ECLIPSE = @ECLIPSE@ +EGREP = @EGREP@ +ETAGS = @ETAGS@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FC = @FC@ +FCFLAGS = @FCFLAGS@ +FFLAGS = @FFLAGS@ +FFTWF_CFLAGS = @FFTWF_CFLAGS@ +FFTWF_LIBS = @FFTWF_LIBS@ +FFTWL_CFLAGS = @FFTWL_CFLAGS@ +FFTWL_LIBS = @FFTWL_LIBS@ +FFTW_CFLAGS = @FFTW_CFLAGS@ +FFTW_LIBS = @FFTW_LIBS@ +FGREP = @FGREP@ +FILECMD = @FILECMD@ +FXTDIR = @FXTDIR@ +FXT_CFLAGS = @FXT_CFLAGS@ +FXT_LDFLAGS = @FXT_LDFLAGS@ +FXT_LIBS = @FXT_LIBS@ +GDB = @GDB@ +GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ +GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ +GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ +GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ +GOTODIR = @GOTODIR@ +GREP = @GREP@ +HAVE_CXX11 = @HAVE_CXX11@ +HAVE_FFTWFL = @HAVE_FFTWFL@ +HELP2MAN = @HELP2MAN@ +HIPCC = @HIPCC@ +HIPCCFLAGS = @HIPCCFLAGS@ +HIPCONFIG = @HIPCONFIG@ +HWLOC_CFLAGS = @HWLOC_CFLAGS@ +HWLOC_LIBS = @HWLOC_LIBS@ +HWLOC_REQUIRES = @HWLOC_REQUIRES@ +ICC = @ICC@ +ICC_ARGS = @ICC_ARGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JULIA = @JULIA@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ +LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ +LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ +LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ +LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ +LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ +LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ +LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ +LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ +LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ +LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ +LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ +LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ +LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ +LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ +LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ +LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ +LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ +LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ +LIBSTARPU_LINK = @LIBSTARPU_LINK@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAGMA_CFLAGS = @MAGMA_CFLAGS@ +MAGMA_LIBS = @MAGMA_LIBS@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPICC_LDFLAGS = @MPICC_LDFLAGS@ +MPICXX = @MPICXX@ +MPIEXEC = @MPIEXEC@ +MPIEXEC_ARGS = @MPIEXEC_ARGS@ +MPIFORT = @MPIFORT@ +MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ +MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ +NM = @NM@ +NMAD_CFLAGS = @NMAD_CFLAGS@ +NMAD_LIBS = @NMAD_LIBS@ +NMEDIT = @NMEDIT@ +NVCC = @NVCC@ +NVCCFLAGS = @NVCCFLAGS@ +NVCC_CC = @NVCC_CC@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ +OPENBLAS_LIBS = @OPENBLAS_LIBS@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PAPI_CFLAGS = @PAPI_CFLAGS@ +PAPI_LIBS = @PAPI_LIBS@ +PARALLEL = @PARALLEL@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PKG_CONFIG = @PKG_CONFIG@ +POTI_CFLAGS = @POTI_CFLAGS@ +POTI_LIBS = @POTI_LIBS@ +PROG_CLANG = @PROG_CLANG@ +PROG_DATE = @PROG_DATE@ +PROG_FIND = @PROG_FIND@ +PROG_STAT = @PROG_STAT@ +PYTHON = @PYTHON@ +PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ +PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ +PYTHON_VERSION = @PYTHON_VERSION@ +RANLIB = @RANLIB@ +REALBASH = @REALBASH@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ +SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ +SIMGRID_LIBS = @SIMGRID_LIBS@ +SIMGRID_MC = @SIMGRID_MC@ +SLIC_CONFIG = @SLIC_CONFIG@ +SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ +SOCL_VENDORS = @SOCL_VENDORS@ +STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ +STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ +STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ +STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ +STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ +STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ +STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ +STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ +STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ +STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ +STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ +STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ +STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ +STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ +STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ +STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ +STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ +STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ +STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ +STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ +STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ +STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ +STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ +STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ +STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ +STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ +STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ +STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ +STARPU_LIB_PATH = @STARPU_LIB_PATH@ +STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ +STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ +STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ +STARPU_MS_LIB = @STARPU_MS_LIB@ +STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ +STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ +STARPU_OPENBLAS = @STARPU_OPENBLAS@ +STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ +STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ +STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ +STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ +STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ +STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ +STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ +STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ +STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ +STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ +STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ +STARPU_SRC_DIR = @STARPU_SRC_DIR@ +STARPU_USE_CPU = @STARPU_USE_CPU@ +STARPU_USE_CUDA = @STARPU_USE_CUDA@ +STARPU_USE_FXT = @STARPU_USE_FXT@ +STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ +STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ +STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ +STRIP = @STRIP@ +VERSION = @VERSION@ +XMKMF = @XMKMF@ +X_CFLAGS = @X_CFLAGS@ +X_EXTRA_LIBS = @X_EXTRA_LIBS@ +X_LIBS = @X_LIBS@ +X_PRE_LIBS = @X_PRE_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_ct_F77 = @ac_ct_F77@ +ac_ct_FC = @ac_ct_FC@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +doxygencommand = @doxygencommand@ +dvidir = @dvidir@ +eclipsepath = @eclipsepath@ +epstopdfcommand = @epstopdfcommand@ +exec_prefix = @exec_prefix@ +gitcommand = @gitcommand@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +hwloccalccommand = @hwloccalccommand@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +juliapath = @juliapath@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +mpicc_path = @mpicc_path@ +mpicxx_path = @mpicxx_path@ +mpiexec_path = @mpiexec_path@ +mpifort_path = @mpifort_path@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +pdflatexcommand = @pdflatexcommand@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +runstatedir = @runstatedir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target = @target@ +target_alias = @target_alias@ +target_cpu = @target_cpu@ +target_os = @target_os@ +target_vendor = @target_vendor@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +DOX_DIR = $(top_builddir)/doc/doxygen_web_basics +DOX_CONFIG = $(top_srcdir)/doc/doxygen.cfg +DOX_MAIN_DIR = doxygen_web_basics +DOX_HTML_DIR = html_web_basics +DOX_LATEX_DIR = latex +DOX_PDF = starpu_web_basics.pdf +DOX_TAG = starpu.tag +DOX_STARPU_CONFIG = starpu_config.h + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +DOXYGEN = doxygen +PDFLATEX = pdflatex +MAKEINDEX = makeindex +txtdir = $(docdir)/manual +EXTRA_DIST = $(am__append_1) $(am__append_2) $(am__append_3) \ + $(am__append_4) $(am__append_5) refman.tex $(chapters) \ + $(images) +@STARPU_AVAILABLE_DOC_PDF_TRUE@@STARPU_BUILD_DOC_FALSE@txt_DATA = $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$(DOX_PDF) +@STARPU_BUILD_DOC_PDF_TRUE@@STARPU_BUILD_DOC_TRUE@txt_DATA = $(DOX_DIR)/$(DOX_PDF) +@STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@DOX_HTML_SRCDIR = $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$(DOX_HTML_DIR) +@STARPU_BUILD_DOC_TRUE@DOX_HTML_SRCDIR = $(DOX_HTML_DIR) +@STARPU_BUILD_DOC_TRUE@CLEANFILES = $(DOX_TAG) $(DOX_STARPU_CONFIG) \ +@STARPU_BUILD_DOC_TRUE@ -r \ +@STARPU_BUILD_DOC_TRUE@ $(DOX_HTML_DIR) \ +@STARPU_BUILD_DOC_TRUE@ $(DOX_LATEX_DIR) \ +@STARPU_BUILD_DOC_TRUE@ $(DOX_DIR)/$(DOX_PDF) + +chapters = \ + ../doxygen/chapters/foreword.doxy \ + ../doxygen/chapters/starpu_basics/basics_intro.doxy \ + ../doxygen/chapters/starpu_basics/starpu_applications.doxy \ + ../doxygen/chapters/starpu_basics/basic_examples.doxy \ + ../doxygen/chapters/starpu_basics/scaling_vector_example.doxy \ + ../doxygen/chapters/starpu_basics/tasks.doxy \ + ../doxygen/chapters/starpu_basics/data_management.doxy \ + ../doxygen/chapters/starpu_basics/scheduling.doxy \ + ../doxygen/chapters/starpu_basics/examples_sources.doxy \ + ../doxygen/chapters/starpu_basics/code/basics_vector_scal_c.c \ + ../doxygen/chapters/starpu_basics/code/basics_vector_scal_cpu.c \ + ../doxygen/chapters/starpu_basics/code/basics_vector_scal_cuda.c \ + ../doxygen/chapters/starpu_basics/code/basics_vector_scal_opencl.c \ + ../doxygen/chapters/starpu_basics/code/basics_vector_scal_opencl_codelet.cl \ + ../doxygen/chapters/files.doxy \ + ../doxygen/chapters/fdl_1_3.doxy + +images = \ + ../doxygen/chapters/images/arbiter.png \ + ../doxygen/chapters/images/data_trace.png \ + ../doxygen/chapters/images/distrib_data.png \ + ../doxygen/chapters/images/distrib_data_histo.png \ + ../doxygen/chapters/images/paje_draw_histogram.png \ + ../doxygen/chapters/images/parallel_worker2.png \ + ../doxygen/chapters/images/runtime-par.png \ + ../doxygen/chapters/images/starpu_non_linear_memset_regression_based.png \ + ../doxygen/chapters/images/starpu_non_linear_memset_regression_based_2.png \ + ../doxygen/chapters/images/starpu_starpu_slu_lu_model_11.png \ + ../doxygen/chapters/images/starpu_chol_model_11_type.png \ + ../doxygen/chapters/images/tasks_size_overhead.png \ + ../doxygen/chapters/images/temanejo.png \ + ../doxygen/chapters/images/eclipse_installer.png \ + ../doxygen/chapters/images/eclipse_install_cdt.png \ + ../doxygen/chapters/images/eclipse_hello_build.png \ + ../doxygen/chapters/images/eclipse_hello_run.png \ + ../doxygen/chapters/images/eclipse_hello_fxt.png \ + ../doxygen/chapters/images/eclipse_hello_graph.png \ + ../doxygen/chapters/images/eclipse_hello_vite.png \ + ../doxygen/chapters/images/eclipse_hello_svg_graph.png \ + ../doxygen/chapters/images/eclipse_hello_plugin.png \ + ../doxygen/chapters/images/eclipse_hello_paje_trace.png \ + ../doxygen/chapters/images/eclipse_hello_hgraph.png \ + ../doxygen/chapters/images/eclipse_install_pde.png \ + ../doxygen/chapters/images/starpu_gflops_non_linear_memset_regression_based_energy.png \ + ../doxygen/chapters/images/starpu_log_arr.png \ + ../doxygen/chapters/images/starpu_log_list.png \ + ../doxygen/chapters/images/starpu_non_linear_memset_regression_based_energy.png \ + ../doxygen/chapters/images/starpu_power_non_linear_memset_regression_based.png \ + ../doxygen/chapters/images/starvz_visu.png \ + ../doxygen/chapters/images/starvz_visu_r.png \ + ../doxygen/chapters/images/trace_bw_heatmap.png \ + ../doxygen/chapters/images/trace_recv_use.png \ + ../doxygen/chapters/images/trace_send_use.png \ + ../doxygen/chapters/images/trace_volume_heatmap.png \ + ../doxygen/chapters/images/starpupy_handle_func_perf_pickle.png \ + ../doxygen/chapters/images/starpupy_handle_perf_pickle.png \ + ../doxygen/chapters/images/starpupy_handle_func_perf.png \ + ../doxygen/chapters/images/starpupy_handle_perf.png \ + ../doxygen/chapters/images/tasks_size_overhead_py_fut_pickle.png \ + ../doxygen/chapters/images/tasks_size_overhead_py_futur.png \ + ../doxygen/chapters/images/tasks_size_overhead_py_handle_pickle.png \ + ../doxygen/chapters/images/tasks_size_overhead_py_handle.png \ + ../doxygen/chapters/images/tasks_size_overhead_py_none.png \ + ../doxygen/chapters/images/tasks_size_overhead_py_noret_pickle.png + +@STARPU_BUILD_DOC_TRUE@dox_inputs = $(DOX_CONFIG) \ +@STARPU_BUILD_DOC_TRUE@ $(chapters) \ +@STARPU_BUILD_DOC_TRUE@ starpu_config.h \ +@STARPU_BUILD_DOC_TRUE@ chapters/version.sty \ +@STARPU_BUILD_DOC_TRUE@ chapters/version.html + +all: all-am + +.SUFFIXES: +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/doc/doxy.mk $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign doc/doxygen_web_basics/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign doc/doxygen_web_basics/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; +$(top_srcdir)/doc/doxy.mk $(am__empty): + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): +doxygen-config.cfg: $(top_builddir)/config.status $(srcdir)/doxygen-config.cfg.in + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs +install-txtDATA: $(txt_DATA) + @$(NORMAL_INSTALL) + @list='$(txt_DATA)'; test -n "$(txtdir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(txtdir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(txtdir)" || exit 1; \ + fi; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; \ + done | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(txtdir)'"; \ + $(INSTALL_DATA) $$files "$(DESTDIR)$(txtdir)" || exit $$?; \ + done + +uninstall-txtDATA: + @$(NORMAL_UNINSTALL) + @list='$(txt_DATA)'; test -n "$(txtdir)" || list=; \ + files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ + dir='$(DESTDIR)$(txtdir)'; $(am__uninstall_files_from_dir) +tags TAGS: + +ctags CTAGS: + +cscope cscopelist: + +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile $(DATA) +installdirs: + for dir in "$(DESTDIR)$(txtdir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +@STARPU_AVAILABLE_DOC_FALSE@@STARPU_BUILD_DOC_FALSE@install-exec-hook: +@STARPU_AVAILABLE_DOC_FALSE@@STARPU_BUILD_DOC_FALSE@uninstall-hook: +clean: clean-am + +clean-am: clean-generic clean-libtool mostlyclean-am + +distclean: distclean-am + -rm -f Makefile +distclean-am: clean-am distclean-generic + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: install-txtDATA + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: + @$(NORMAL_INSTALL) + $(MAKE) $(AM_MAKEFLAGS) install-exec-hook +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-generic mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-txtDATA + @$(NORMAL_INSTALL) + $(MAKE) $(AM_MAKEFLAGS) uninstall-hook +.MAKE: install-am install-exec-am install-strip uninstall-am + +.PHONY: all all-am check check-am clean clean-generic clean-libtool \ + cscopelist-am ctags-am distclean distclean-generic \ + distclean-libtool distdir dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-exec-hook \ + install-html install-html-am install-info install-info-am \ + install-man install-pdf install-pdf-am install-ps \ + install-ps-am install-strip install-txtDATA installcheck \ + installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-generic \ + mostlyclean-libtool pdf pdf-am ps ps-am tags-am uninstall \ + uninstall-am uninstall-hook uninstall-txtDATA + +.PRECIOUS: Makefile + + +@STARPU_BUILD_DOC_PDF_TRUE@@STARPU_BUILD_DOC_TRUE@all: $(DOX_HTML_DIR) $(DOX_DIR)/$(DOX_PDF) +@STARPU_BUILD_DOC_PDF_FALSE@@STARPU_BUILD_DOC_TRUE@all: $(DOX_HTML_DIR) +@STARPU_BUILD_DOC_TRUE@install-exec-hook: $(DOX_HTML_DIR) +@STARPU_BUILD_DOC_TRUE@ @$(MKDIR_P) $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) +@STARPU_BUILD_DOC_TRUE@ @(cd $(DOX_HTML_SRCDIR) && $(PROG_FIND) . -type f -exec $(INSTALL_DATA) {} $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) \;) +@STARPU_BUILD_DOC_TRUE@uninstall-hook: +@STARPU_BUILD_DOC_TRUE@ @rm -rf $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) +@STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@install-exec-hook: +@STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@ @$(MKDIR_P) $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) +@STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@ @(cd $(DOX_HTML_SRCDIR) && $(PROG_FIND) . -type f -exec $(INSTALL_DATA) {} $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) \;) +@STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@uninstall-hook: +@STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@ @rm -rf $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) + +@STARPU_BUILD_DOC_TRUE@chapters/version.sty: $(chapters) +@STARPU_BUILD_DOC_TRUE@ $(MKDIR_P) $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters +@STARPU_BUILD_DOC_TRUE@ @for f in $(chapters) ; do \ +@STARPU_BUILD_DOC_TRUE@ if test -f $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$$f ; then $(PROG_STAT) --format=%Y $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$$f ; fi \ +@STARPU_BUILD_DOC_TRUE@ done | sort -r | head -1 > timestamp_sty +@STARPU_BUILD_DOC_TRUE@ @if test -s timestamp_sty ; then \ +@STARPU_BUILD_DOC_TRUE@ LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_sty` +"%F" > timestamp_sty_updated ;\ +@STARPU_BUILD_DOC_TRUE@ LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_sty` +"%B %Y" > timestamp_sty_updated_month ;\ +@STARPU_BUILD_DOC_TRUE@ fi +@STARPU_BUILD_DOC_TRUE@ @if test -s timestamp_sty_updated ; then \ +@STARPU_BUILD_DOC_TRUE@ echo ':newcommand{:STARPUUPDATED}{'`cat timestamp_sty_updated`'}' > $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty;\ +@STARPU_BUILD_DOC_TRUE@ else \ +@STARPU_BUILD_DOC_TRUE@ echo ':newcommand{:STARPUUPDATED}{unknown date}' > $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty;\ +@STARPU_BUILD_DOC_TRUE@ fi +@STARPU_BUILD_DOC_TRUE@ @echo ':newcommand{:STARPUVERSION}{$(VERSION)}' >> $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty +@STARPU_BUILD_DOC_TRUE@ @$(SED) -i 's/:/\\/g' $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty +@STARPU_BUILD_DOC_TRUE@ @for f in timestamp_sty timestamp_sty_updated timestamp_sty_updated_month ; do \ +@STARPU_BUILD_DOC_TRUE@ if test -f $$f ; then $(RM) $$f ; fi ;\ +@STARPU_BUILD_DOC_TRUE@ done + +@STARPU_BUILD_DOC_TRUE@chapters/version.html: $(chapters) $(images) +@STARPU_BUILD_DOC_TRUE@ @for f in $(chapters) ; do \ +@STARPU_BUILD_DOC_TRUE@ if test -f $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$$f ; then $(PROG_STAT) --format=%Y $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$$f ; fi \ +@STARPU_BUILD_DOC_TRUE@ done | sort -r | head -1 > timestamp_html +@STARPU_BUILD_DOC_TRUE@ @if test -s timestamp_html ; then \ +@STARPU_BUILD_DOC_TRUE@ LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_html` +"%F" > timestamp_html_updated ;\ +@STARPU_BUILD_DOC_TRUE@ LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_html` +"%B %Y" > timestamp_html_updated_month ;\ +@STARPU_BUILD_DOC_TRUE@ fi +@STARPU_BUILD_DOC_TRUE@ @echo "This manual documents the version $(VERSION) of StarPU." > $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.html +@STARPU_BUILD_DOC_TRUE@ @if test -s timestamp_html_updated ; then \ +@STARPU_BUILD_DOC_TRUE@ echo "Its contents was last updated on "`cat timestamp_html_updated`"." >> $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.html;\ +@STARPU_BUILD_DOC_TRUE@ else \ +@STARPU_BUILD_DOC_TRUE@ echo "Its contents was last updated on unknown_date." >> $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.html;\ +@STARPU_BUILD_DOC_TRUE@ fi +@STARPU_BUILD_DOC_TRUE@ @for f in timestamp_html timestamp_html_updated timestamp_html_updated_month ; do \ +@STARPU_BUILD_DOC_TRUE@ if test -f $$f ; then $(RM) $$f ; fi ;\ +@STARPU_BUILD_DOC_TRUE@ done + +@STARPU_BUILD_DOC_TRUE@doxy: +@STARPU_BUILD_DOC_TRUE@ @rm -fr $(DOX_HTML_DIR) $(DOX_LATEX_DIR) +@STARPU_BUILD_DOC_TRUE@ @$(DOXYGEN) $(DOX_CONFIG) + +@STARPU_BUILD_DOC_TRUE@$(DOX_HTML_DIR): $(DOX_TAG) +@STARPU_BUILD_DOC_TRUE@ @$(MKDIR_P) $(DOX_HTML_DIR) + +@STARPU_BUILD_DOC_TRUE@$(DOX_TAG): $(dox_inputs) +@STARPU_BUILD_DOC_TRUE@ @rm -fr $(DOX_HTML_DIR) $(DOX_LATEX_DIR) +@STARPU_BUILD_DOC_TRUE@ @$(DOXYGEN) $(DOX_CONFIG) +@STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_HTML_DIR)/DocOrganization.html ; then $(SED) -i 's/ModuleDocumentation <\/li>/Modules<\/a>/' $(DOX_HTML_DIR)/DocOrganization.html ; fi +@STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_HTML_DIR)/DocOrganization.html ; then $(SED) -i 's/FileDocumentation <\/li>/Files<\/a>/' $(DOX_HTML_DIR)/DocOrganization.html ; fi +@STARPU_BUILD_DOC_TRUE@ # comment for the line below: what we really want to do is to remove the line, but dy doing so, it avoids opening the interactive menu when browsing files +@STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_HTML_DIR)/navtreedata.js ; then $(SED) -i 's/\[ "Files", "Files.html", null \]/\[ "", "Files.html", null \]/' $(DOX_HTML_DIR)/navtreedata.js ; fi +@STARPU_BUILD_DOC_TRUE@ @$(SED) -i 's/.*"Files.html".*//' $(DOX_HTML_DIR)/pages.html +@STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_LATEX_DIR)/main.tex ; then mv $(DOX_LATEX_DIR)/main.tex $(DOX_LATEX_DIR)/index.tex ; fi +@STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_LATEX_DIR)/refman.tex ; then $(SED) -i '/\\begin{titlepage}/,$$d' $(DOX_LATEX_DIR)/refman.tex ; fi +@STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_LATEX_DIR)/refman.tex ; then cat $(top_srcdir)/doc/$(DOX_MAIN_DIR)/refman.tex >> $(DOX_LATEX_DIR)/refman.tex ; fi +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/doc/sectionNumbering.py $(top_builddir)/doc/$(DOX_MAIN_DIR) $(DOX_HTML_DIR) + +@STARPU_BUILD_DOC_TRUE@$(DOX_DIR)/$(DOX_PDF): $(DOX_TAG) refman.tex $(images) +@STARPU_BUILD_DOC_TRUE@ $(MKDIR_P) $(DOX_LATEX_DIR) +@STARPU_BUILD_DOC_TRUE@ @cp $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty $(DOX_LATEX_DIR) +@STARPU_BUILD_DOC_TRUE@ @cp $(top_srcdir)/doc/title.tex $(DOX_LATEX_DIR) +@STARPU_BUILD_DOC_TRUE@ @if test -f $(top_srcdir)/doc/$(DOX_MAIN_DIR)/modules.tex ; then cp $(top_srcdir)/doc/$(DOX_MAIN_DIR)/modules.tex $(DOX_LATEX_DIR) ; fi +@STARPU_BUILD_DOC_TRUE@ @echo $(PDFLATEX) $(DOX_LATEX_DIR)/refman.tex +@STARPU_BUILD_DOC_TRUE@ @cd $(DOX_LATEX_DIR) ;\ +@STARPU_BUILD_DOC_TRUE@ rm -f *.aux *.toc *.idx *.ind *.ilg *.log *.out ;\ +@STARPU_BUILD_DOC_TRUE@ for f in group__API__* ; do sed -i '1 i \\\clearpage' $$f ; done ;\ +@STARPU_BUILD_DOC_TRUE@ if test -f ExecutionConfigurationThroughEnvironmentVariables.tex ; then $(SED) -i -e 's/__env__/\\_Environment Variables!/' -e 's/\\-\\_\\-\\-\\_\\-env\\-\\_\\-\\-\\_\\-//' ExecutionConfigurationThroughEnvironmentVariables.tex ; fi ;\ +@STARPU_BUILD_DOC_TRUE@ if test -f CompilationConfiguration.tex ; then $(SED) -i -e 's/__configure__/\\_Configure Options!/' -e 's/\\-\\_\\-\\-\\_\\-configure\\-\\_\\-\\-\\_\\-//' CompilationConfiguration.tex ; fi ;\ +@STARPU_BUILD_DOC_TRUE@ if test -f DocOrganization.tex ; then $(SED) -i s'/\\item Module\\.Documentation/\\item \\hyperlink{ModuleDocumentation}{Module Documentation}/' DocOrganization.tex ; fi ;\ +@STARPU_BUILD_DOC_TRUE@ if test -f DocOrganization.tex ; then $(SED) -i s'/\\item File\\.Documentation/\\item \\hyperlink{FileDocumentation}{File Documentation}/' DocOrganization.tex ; fi ;\ +@STARPU_BUILD_DOC_TRUE@ max_print_line=1000000 $(PDFLATEX) -interaction batchmode refman.tex ;\ +@STARPU_BUILD_DOC_TRUE@ ! < refman.log grep -v group__ | grep -v _amgrp | grep -v deprecated__ | grep "multiply defined" || exit 1 ;\ +@STARPU_BUILD_DOC_TRUE@ $(MAKEINDEX) refman.idx ;\ +@STARPU_BUILD_DOC_TRUE@ max_print_line=1000000 $(PDFLATEX) -interaction batchmode refman.tex ;\ +@STARPU_BUILD_DOC_TRUE@ for i in $(shell seq 1 5); do \ +@STARPU_BUILD_DOC_TRUE@ if $(EGREP) 'Rerun (LaTeX|to get cross-references right)' refman.log > /dev/null 2>&1; then \ +@STARPU_BUILD_DOC_TRUE@ max_print_line=1000000 $(PDFLATEX) -interaction batchmode refman.tex; \ +@STARPU_BUILD_DOC_TRUE@ else \ +@STARPU_BUILD_DOC_TRUE@ break ; \ +@STARPU_BUILD_DOC_TRUE@ fi; \ +@STARPU_BUILD_DOC_TRUE@ done +@STARPU_BUILD_DOC_TRUE@ mv $(DOX_LATEX_DIR)/refman.pdf $(DOX_DIR)/$(DOX_PDF) + +@STARPU_BUILD_DOC_TRUE@starpu_config.h: $(top_srcdir)/include/starpu_config.h.in +@STARPU_BUILD_DOC_TRUE@ @$(SED) 's/#undef \(.*\)/#define \1 1/' $< > $@ + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/doc/doxygen_web_basics/chapters/version.html b/doc/doxygen_web_basics/chapters/version.html new file mode 100644 index 0000000..f504c7a --- /dev/null +++ b/doc/doxygen_web_basics/chapters/version.html @@ -0,0 +1,2 @@ +This manual documents the version 1.4.10 of StarPU. +Its contents was last updated on 2025-12-03. diff --git a/doc/doxygen_web_basics/chapters/version.sty b/doc/doxygen_web_basics/chapters/version.sty new file mode 100644 index 0000000..7527bba --- /dev/null +++ b/doc/doxygen_web_basics/chapters/version.sty @@ -0,0 +1,2 @@ +\newcommand{\STARPUUPDATED}{2025-12-03} +\newcommand{\STARPUVERSION}{1.4.10} diff --git a/doc/doxygen_web_basics/doxygen-config.cfg.in b/doc/doxygen_web_basics/doxygen-config.cfg.in new file mode 100644 index 0000000..5051369 --- /dev/null +++ b/doc/doxygen_web_basics/doxygen-config.cfg.in @@ -0,0 +1,49 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# Copyright (C) 2013-2013 Simon Archipoff +# Copyright (C) 2011-2011 Télécom Sud Paris +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +INPUT = @top_srcdir@/doc/doxygen/chapters/starpu_basics/basics_intro.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_basics/starpu_applications.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_basics/basic_examples.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_basics/scaling_vector_example.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_basics/tasks.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_basics/data_management.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_basics/scheduling.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_basics/examples_sources.doxy \ + @top_srcdir@/doc/doxygen/chapters/fdl_1_3.doxy \ + @top_srcdir@/doc/doxygen/chapters/files.doxy \ + @top_srcdir@/doc/doxygen/chapters/api + +EXAMPLE_PATH = @top_srcdir@/doc/doxygen \ + @top_srcdir@/doc/doxygen/chapters \ + @top_srcdir@/doc/doxygen/chapters/starpu_basics/code + +INPUT_FILTER = @top_builddir@/doc/doxygen/doxygen_filter.sh + +#LATEX_HEADER = @top_srcdir@/doc/doxygen/refman.tex + +IMAGE_PATH = @top_srcdir@/doc/doxygen/chapters/images + +GENERATE_LATEX = @DOC_GENERATE_LATEX@ + +HTML_OUTPUT = html_web_basics + +@INCLUDE_PATH = ../../doc/doxygen/ + +PROJECT_NAME = "StarPU Handbook - StarPU Basics" +ALIASES += "intropage{2} = \mainpage" +ALIASES += "webforeword = \htmlonly

    Foreword

    \endhtmlonly \htmlinclude version.html \htmlinclude foreword.html \htmlonly This is a sub-part of the StarPU documentation, go here to read the whole documentation. \endhtmlonly" +ALIASES += "foreword = " diff --git a/doc/doxygen_web_basics/refman.tex b/doc/doxygen_web_basics/refman.tex new file mode 100644 index 0000000..8e293d5 --- /dev/null +++ b/doc/doxygen_web_basics/refman.tex @@ -0,0 +1,72 @@ +% StarPU --- Runtime system for heterogeneous multicore architectures. +% +% Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +% Copyright (C) 2013-2013 Simon Archipoff +% +% StarPU is free software; you can redistribute it and/or modify +% it under the terms of the GNU Lesser General Public License as published by +% the Free Software Foundation; either version 2.1 of the License, or (at +% your option) any later version. +% +% StarPU is distributed in the hope that it will be useful, but +% WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +% +% See the GNU Lesser General Public License in COPYING.LGPL for more details. +% +\newcommand\starputitle{StarPU Handbook - StarPU Basics} +\setcounter{tocdepth}{2} +\input{./title.tex} + +\chapter{Organization} +\label{index} +\hypertarget{index}{} +\input{index} + +\chapter{StarPU Applications} +\label{StarPUApplications} +\hypertarget{StarPUApplications}{} +\input{StarPUApplications} + +\chapter{Basic Examples} +\label{BasicExamples} +\hypertarget{BasicExamples}{} +\input{BasicExamples} + +\chapter{Full Source Code for the ’Scaling a Vector’ Example} +\label{FullSourceCodeVectorScal} +\hypertarget{FullSourceCodeVectorScal}{} +\input{FullSourceCodeVectorScal} + +\chapter{Tasks In StarPU} +\label{TasksInStarPU} +\hypertarget{TasksInStarPU}{} +\input{TasksInStarPU} + +\chapter{Data Management} +\label{DataManagement} +\hypertarget{DataManagement}{} +\input{DataManagement} + +\chapter{Scheduling} +\label{Scheduling} +\hypertarget{Scheduling}{} +\input{Scheduling} + +\chapter{Examples in StarPU Sources} +\label{ExamplesInStarPUSources} +\hypertarget{ExamplesInStarPUSources}{} +\input{ExamplesInStarPUSources} + +\part{Appendix} + +\chapter{The GNU Free Documentation License} +\label{GNUFreeDocumentationLicense} +\hypertarget{GNUFreeDocumentationLicense}{} +\input{GNUFreeDocumentationLicense} + +%\part{Index} +%#\addcontentsline{toc}{chapter}{Index} +%\printindex + +\end{document} diff --git a/doc/doxygen_web_extensions/Makefile.am b/doc/doxygen_web_extensions/Makefile.am new file mode 100644 index 0000000..d63fb0b --- /dev/null +++ b/doc/doxygen_web_extensions/Makefile.am @@ -0,0 +1,121 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +DOX_DIR = $(top_builddir)/doc/doxygen_web_extensions +DOX_CONFIG = $(top_srcdir)/doc/doxygen.cfg + +DOX_MAIN_DIR = doxygen_web_extensions +DOX_HTML_DIR = html_web_extensions +DOX_LATEX_DIR = latex +DOX_PDF = starpu_web_extensions.pdf +DOX_TAG = starpu.tag +DOX_STARPU_CONFIG = starpu_config.h + +include $(top_srcdir)/doc/doxy.mk + +chapters = \ + ../doxygen/chapters/foreword.doxy \ + ../doxygen/chapters/starpu_extensions/extensions_intro.doxy \ + ../doxygen/chapters/starpu_extensions/advanced_tasks.doxy \ + ../doxygen/chapters/starpu_extensions/advanced_data_management.doxy \ + ../doxygen/chapters/starpu_extensions/helpers.doxy \ + ../doxygen/chapters/starpu_extensions/debugging_tools.doxy \ + ../doxygen/chapters/starpu_extensions/advanced_scheduling.doxy \ + ../doxygen/chapters/starpu_extensions/scheduling_contexts.doxy \ + ../doxygen/chapters/starpu_extensions/scheduling_context_hypervisor.doxy \ + ../doxygen/chapters/starpu_extensions/cuda_support.doxy \ + ../doxygen/chapters/starpu_extensions/opencl_support.doxy \ + ../doxygen/chapters/starpu_extensions/max_fpga_support.doxy \ + ../doxygen/chapters/starpu_extensions/out_of_core.doxy \ + ../doxygen/chapters/starpu_extensions/mpi_support.doxy \ + ../doxygen/chapters/starpu_extensions/tcpip_support.doxy \ + ../doxygen/chapters/starpu_extensions/transactions.doxy \ + ../doxygen/chapters/starpu_extensions/fault_tolerance.doxy \ + ../doxygen/chapters/starpu_extensions/fft_support.doxy \ + ../doxygen/chapters/starpu_extensions/socl_opencl_extensions.doxy \ + ../doxygen/chapters/starpu_extensions/bubble.doxy \ + ../doxygen/chapters/starpu_extensions/parallel_worker.doxy \ + ../doxygen/chapters/starpu_extensions/interoperability.doxy \ + ../doxygen/chapters/starpu_extensions/scheduling_policy_definition.doxy \ + ../doxygen/chapters/starpu_extensions/simgrid.doxy \ + ../doxygen/chapters/starpu_extensions/code/complex.c \ + ../doxygen/chapters/starpu_extensions/code/disk_compute.c \ + ../doxygen/chapters/starpu_extensions/code/disk_copy.c \ + ../doxygen/chapters/starpu_extensions/code/forkmode.c \ + ../doxygen/chapters/starpu_extensions/code/multiformat.c \ + ../doxygen/chapters/starpu_extensions/code/simgrid.c \ + ../doxygen/chapters/files.doxy \ + ../doxygen/chapters/fdl_1_3.doxy + +images = \ + ../doxygen/chapters/images/arbiter.png \ + ../doxygen/chapters/images/data_trace.png \ + ../doxygen/chapters/images/distrib_data.png \ + ../doxygen/chapters/images/distrib_data_histo.png \ + ../doxygen/chapters/images/paje_draw_histogram.png \ + ../doxygen/chapters/images/parallel_worker2.png \ + ../doxygen/chapters/images/runtime-par.png \ + ../doxygen/chapters/images/starpu_non_linear_memset_regression_based.png \ + ../doxygen/chapters/images/starpu_non_linear_memset_regression_based_2.png \ + ../doxygen/chapters/images/starpu_starpu_slu_lu_model_11.png \ + ../doxygen/chapters/images/starpu_chol_model_11_type.png \ + ../doxygen/chapters/images/tasks_size_overhead.png \ + ../doxygen/chapters/images/temanejo.png \ + ../doxygen/chapters/images/eclipse_installer.png \ + ../doxygen/chapters/images/eclipse_install_cdt.png \ + ../doxygen/chapters/images/eclipse_hello_build.png \ + ../doxygen/chapters/images/eclipse_hello_run.png \ + ../doxygen/chapters/images/eclipse_hello_fxt.png \ + ../doxygen/chapters/images/eclipse_hello_graph.png \ + ../doxygen/chapters/images/eclipse_hello_vite.png \ + ../doxygen/chapters/images/eclipse_hello_svg_graph.png \ + ../doxygen/chapters/images/eclipse_hello_plugin.png \ + ../doxygen/chapters/images/eclipse_hello_paje_trace.png \ + ../doxygen/chapters/images/eclipse_hello_hgraph.png \ + ../doxygen/chapters/images/eclipse_install_pde.png \ + ../doxygen/chapters/images/starpu_gflops_non_linear_memset_regression_based_energy.png \ + ../doxygen/chapters/images/starpu_log_arr.png \ + ../doxygen/chapters/images/starpu_log_list.png \ + ../doxygen/chapters/images/starpu_non_linear_memset_regression_based_energy.png \ + ../doxygen/chapters/images/starpu_power_non_linear_memset_regression_based.png \ + ../doxygen/chapters/images/starvz_visu.png \ + ../doxygen/chapters/images/starvz_visu_r.png \ + ../doxygen/chapters/images/trace_bw_heatmap.png \ + ../doxygen/chapters/images/trace_recv_use.png \ + ../doxygen/chapters/images/trace_send_use.png \ + ../doxygen/chapters/images/trace_volume_heatmap.png \ + ../doxygen/chapters/images/starpupy_handle_func_perf_pickle.png \ + ../doxygen/chapters/images/starpupy_handle_perf_pickle.png \ + ../doxygen/chapters/images/starpupy_handle_func_perf.png \ + ../doxygen/chapters/images/starpupy_handle_perf.png \ + ../doxygen/chapters/images/tasks_size_overhead_py_fut_pickle.png \ + ../doxygen/chapters/images/tasks_size_overhead_py_futur.png \ + ../doxygen/chapters/images/tasks_size_overhead_py_handle_pickle.png \ + ../doxygen/chapters/images/tasks_size_overhead_py_handle.png \ + ../doxygen/chapters/images/tasks_size_overhead_py_none.png \ + ../doxygen/chapters/images/tasks_size_overhead_py_noret_pickle.png + +if STARPU_BUILD_DOC +starpu_config.h: $(top_srcdir)/include/starpu_config.h.in + @$(SED) 's/#undef \(.*\)/#define \1 1/' $< > $@ + +dox_inputs = $(DOX_CONFIG) \ + $(chapters) \ + starpu_config.h \ + chapters/version.sty \ + chapters/version.html +endif + diff --git a/doc/doxygen_web_extensions/Makefile.in b/doc/doxygen_web_extensions/Makefile.in new file mode 100644 index 0000000..45f5d07 --- /dev/null +++ b/doc/doxygen_web_extensions/Makefile.in @@ -0,0 +1,959 @@ +# Makefile.in generated by automake 1.16.5 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2021 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +target_triplet = @target@ +@STARPU_BUILD_DOC_PDF_TRUE@@STARPU_BUILD_DOC_TRUE@am__append_1 = $(DOX_HTML_DIR) $(DOX_DIR)/$(DOX_PDF) +@STARPU_BUILD_DOC_PDF_FALSE@@STARPU_BUILD_DOC_TRUE@am__append_2 = $(DOX_HTML_DIR) +@STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@am__append_3 = $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$(DOX_HTML_DIR) +@STARPU_AVAILABLE_DOC_PDF_TRUE@@STARPU_BUILD_DOC_FALSE@am__append_4 = $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$(DOX_PDF) +@STARPU_BUILD_DOC_TRUE@am__append_5 = \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.html + +subdir = doc/doxygen_web_extensions +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ + $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ + $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ + $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/src/common/config.h \ + $(top_builddir)/src/common/config-src-build.h \ + $(top_builddir)/include/starpu_config.h \ + $(top_builddir)/starpurm/include/starpurm_config.h +CONFIG_CLEAN_FILES = doxygen-config.cfg +CONFIG_CLEAN_VPATH_FILES = +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +SOURCES = +DIST_SOURCES = +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__installdirs = "$(DESTDIR)$(txtdir)" +DATA = $(txt_DATA) +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +am__DIST_COMMON = $(srcdir)/Makefile.in \ + $(srcdir)/doxygen-config.cfg.in $(top_srcdir)/doc/doxy.mk +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +pkglibdir = @pkglibdir@ +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +APP_CFLAGS = @APP_CFLAGS@ +APP_CXXFLAGS = @APP_CXXFLAGS@ +APP_FCFLAGS = @APP_FCFLAGS@ +APP_FFLAGS = @APP_FFLAGS@ +AR = @AR@ +AS = @AS@ +ATLASDIR = @ATLASDIR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BLAS_LIB = @BLAS_LIB@ +BLAS_LIBS = @BLAS_LIBS@ +BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ +BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CC_OR_MPICC = @CC_OR_MPICC@ +CC_OR_NVCC = @CC_OR_NVCC@ +CFLAGS = @CFLAGS@ +COVERAGE = @COVERAGE@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CSCOPE = @CSCOPE@ +CTAGS = @CTAGS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DGELS_LIBS = @DGELS_LIBS@ +DLB_CFLAGS = @DLB_CFLAGS@ +DLB_LIBS = @DLB_LIBS@ +DLLTOOL = @DLLTOOL@ +DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +ECLIPSE = @ECLIPSE@ +EGREP = @EGREP@ +ETAGS = @ETAGS@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FC = @FC@ +FCFLAGS = @FCFLAGS@ +FFLAGS = @FFLAGS@ +FFTWF_CFLAGS = @FFTWF_CFLAGS@ +FFTWF_LIBS = @FFTWF_LIBS@ +FFTWL_CFLAGS = @FFTWL_CFLAGS@ +FFTWL_LIBS = @FFTWL_LIBS@ +FFTW_CFLAGS = @FFTW_CFLAGS@ +FFTW_LIBS = @FFTW_LIBS@ +FGREP = @FGREP@ +FILECMD = @FILECMD@ +FXTDIR = @FXTDIR@ +FXT_CFLAGS = @FXT_CFLAGS@ +FXT_LDFLAGS = @FXT_LDFLAGS@ +FXT_LIBS = @FXT_LIBS@ +GDB = @GDB@ +GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ +GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ +GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ +GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ +GOTODIR = @GOTODIR@ +GREP = @GREP@ +HAVE_CXX11 = @HAVE_CXX11@ +HAVE_FFTWFL = @HAVE_FFTWFL@ +HELP2MAN = @HELP2MAN@ +HIPCC = @HIPCC@ +HIPCCFLAGS = @HIPCCFLAGS@ +HIPCONFIG = @HIPCONFIG@ +HWLOC_CFLAGS = @HWLOC_CFLAGS@ +HWLOC_LIBS = @HWLOC_LIBS@ +HWLOC_REQUIRES = @HWLOC_REQUIRES@ +ICC = @ICC@ +ICC_ARGS = @ICC_ARGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JULIA = @JULIA@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ +LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ +LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ +LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ +LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ +LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ +LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ +LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ +LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ +LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ +LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ +LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ +LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ +LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ +LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ +LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ +LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ +LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ +LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ +LIBSTARPU_LINK = @LIBSTARPU_LINK@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAGMA_CFLAGS = @MAGMA_CFLAGS@ +MAGMA_LIBS = @MAGMA_LIBS@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPICC_LDFLAGS = @MPICC_LDFLAGS@ +MPICXX = @MPICXX@ +MPIEXEC = @MPIEXEC@ +MPIEXEC_ARGS = @MPIEXEC_ARGS@ +MPIFORT = @MPIFORT@ +MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ +MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ +NM = @NM@ +NMAD_CFLAGS = @NMAD_CFLAGS@ +NMAD_LIBS = @NMAD_LIBS@ +NMEDIT = @NMEDIT@ +NVCC = @NVCC@ +NVCCFLAGS = @NVCCFLAGS@ +NVCC_CC = @NVCC_CC@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ +OPENBLAS_LIBS = @OPENBLAS_LIBS@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PAPI_CFLAGS = @PAPI_CFLAGS@ +PAPI_LIBS = @PAPI_LIBS@ +PARALLEL = @PARALLEL@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PKG_CONFIG = @PKG_CONFIG@ +POTI_CFLAGS = @POTI_CFLAGS@ +POTI_LIBS = @POTI_LIBS@ +PROG_CLANG = @PROG_CLANG@ +PROG_DATE = @PROG_DATE@ +PROG_FIND = @PROG_FIND@ +PROG_STAT = @PROG_STAT@ +PYTHON = @PYTHON@ +PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ +PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ +PYTHON_VERSION = @PYTHON_VERSION@ +RANLIB = @RANLIB@ +REALBASH = @REALBASH@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ +SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ +SIMGRID_LIBS = @SIMGRID_LIBS@ +SIMGRID_MC = @SIMGRID_MC@ +SLIC_CONFIG = @SLIC_CONFIG@ +SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ +SOCL_VENDORS = @SOCL_VENDORS@ +STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ +STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ +STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ +STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ +STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ +STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ +STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ +STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ +STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ +STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ +STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ +STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ +STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ +STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ +STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ +STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ +STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ +STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ +STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ +STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ +STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ +STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ +STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ +STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ +STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ +STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ +STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ +STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ +STARPU_LIB_PATH = @STARPU_LIB_PATH@ +STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ +STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ +STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ +STARPU_MS_LIB = @STARPU_MS_LIB@ +STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ +STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ +STARPU_OPENBLAS = @STARPU_OPENBLAS@ +STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ +STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ +STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ +STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ +STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ +STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ +STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ +STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ +STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ +STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ +STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ +STARPU_SRC_DIR = @STARPU_SRC_DIR@ +STARPU_USE_CPU = @STARPU_USE_CPU@ +STARPU_USE_CUDA = @STARPU_USE_CUDA@ +STARPU_USE_FXT = @STARPU_USE_FXT@ +STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ +STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ +STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ +STRIP = @STRIP@ +VERSION = @VERSION@ +XMKMF = @XMKMF@ +X_CFLAGS = @X_CFLAGS@ +X_EXTRA_LIBS = @X_EXTRA_LIBS@ +X_LIBS = @X_LIBS@ +X_PRE_LIBS = @X_PRE_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_ct_F77 = @ac_ct_F77@ +ac_ct_FC = @ac_ct_FC@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +doxygencommand = @doxygencommand@ +dvidir = @dvidir@ +eclipsepath = @eclipsepath@ +epstopdfcommand = @epstopdfcommand@ +exec_prefix = @exec_prefix@ +gitcommand = @gitcommand@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +hwloccalccommand = @hwloccalccommand@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +juliapath = @juliapath@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +mpicc_path = @mpicc_path@ +mpicxx_path = @mpicxx_path@ +mpiexec_path = @mpiexec_path@ +mpifort_path = @mpifort_path@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +pdflatexcommand = @pdflatexcommand@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +runstatedir = @runstatedir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target = @target@ +target_alias = @target_alias@ +target_cpu = @target_cpu@ +target_os = @target_os@ +target_vendor = @target_vendor@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +DOX_DIR = $(top_builddir)/doc/doxygen_web_extensions +DOX_CONFIG = $(top_srcdir)/doc/doxygen.cfg +DOX_MAIN_DIR = doxygen_web_extensions +DOX_HTML_DIR = html_web_extensions +DOX_LATEX_DIR = latex +DOX_PDF = starpu_web_extensions.pdf +DOX_TAG = starpu.tag +DOX_STARPU_CONFIG = starpu_config.h + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +DOXYGEN = doxygen +PDFLATEX = pdflatex +MAKEINDEX = makeindex +txtdir = $(docdir)/manual +EXTRA_DIST = $(am__append_1) $(am__append_2) $(am__append_3) \ + $(am__append_4) $(am__append_5) refman.tex $(chapters) \ + $(images) +@STARPU_AVAILABLE_DOC_PDF_TRUE@@STARPU_BUILD_DOC_FALSE@txt_DATA = $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$(DOX_PDF) +@STARPU_BUILD_DOC_PDF_TRUE@@STARPU_BUILD_DOC_TRUE@txt_DATA = $(DOX_DIR)/$(DOX_PDF) +@STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@DOX_HTML_SRCDIR = $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$(DOX_HTML_DIR) +@STARPU_BUILD_DOC_TRUE@DOX_HTML_SRCDIR = $(DOX_HTML_DIR) +@STARPU_BUILD_DOC_TRUE@CLEANFILES = $(DOX_TAG) $(DOX_STARPU_CONFIG) \ +@STARPU_BUILD_DOC_TRUE@ -r \ +@STARPU_BUILD_DOC_TRUE@ $(DOX_HTML_DIR) \ +@STARPU_BUILD_DOC_TRUE@ $(DOX_LATEX_DIR) \ +@STARPU_BUILD_DOC_TRUE@ $(DOX_DIR)/$(DOX_PDF) + +chapters = \ + ../doxygen/chapters/foreword.doxy \ + ../doxygen/chapters/starpu_extensions/extensions_intro.doxy \ + ../doxygen/chapters/starpu_extensions/advanced_tasks.doxy \ + ../doxygen/chapters/starpu_extensions/advanced_data_management.doxy \ + ../doxygen/chapters/starpu_extensions/helpers.doxy \ + ../doxygen/chapters/starpu_extensions/debugging_tools.doxy \ + ../doxygen/chapters/starpu_extensions/advanced_scheduling.doxy \ + ../doxygen/chapters/starpu_extensions/scheduling_contexts.doxy \ + ../doxygen/chapters/starpu_extensions/scheduling_context_hypervisor.doxy \ + ../doxygen/chapters/starpu_extensions/cuda_support.doxy \ + ../doxygen/chapters/starpu_extensions/opencl_support.doxy \ + ../doxygen/chapters/starpu_extensions/max_fpga_support.doxy \ + ../doxygen/chapters/starpu_extensions/out_of_core.doxy \ + ../doxygen/chapters/starpu_extensions/mpi_support.doxy \ + ../doxygen/chapters/starpu_extensions/tcpip_support.doxy \ + ../doxygen/chapters/starpu_extensions/transactions.doxy \ + ../doxygen/chapters/starpu_extensions/fault_tolerance.doxy \ + ../doxygen/chapters/starpu_extensions/fft_support.doxy \ + ../doxygen/chapters/starpu_extensions/socl_opencl_extensions.doxy \ + ../doxygen/chapters/starpu_extensions/bubble.doxy \ + ../doxygen/chapters/starpu_extensions/parallel_worker.doxy \ + ../doxygen/chapters/starpu_extensions/interoperability.doxy \ + ../doxygen/chapters/starpu_extensions/scheduling_policy_definition.doxy \ + ../doxygen/chapters/starpu_extensions/simgrid.doxy \ + ../doxygen/chapters/starpu_extensions/code/complex.c \ + ../doxygen/chapters/starpu_extensions/code/disk_compute.c \ + ../doxygen/chapters/starpu_extensions/code/disk_copy.c \ + ../doxygen/chapters/starpu_extensions/code/forkmode.c \ + ../doxygen/chapters/starpu_extensions/code/multiformat.c \ + ../doxygen/chapters/starpu_extensions/code/simgrid.c \ + ../doxygen/chapters/files.doxy \ + ../doxygen/chapters/fdl_1_3.doxy + +images = \ + ../doxygen/chapters/images/arbiter.png \ + ../doxygen/chapters/images/data_trace.png \ + ../doxygen/chapters/images/distrib_data.png \ + ../doxygen/chapters/images/distrib_data_histo.png \ + ../doxygen/chapters/images/paje_draw_histogram.png \ + ../doxygen/chapters/images/parallel_worker2.png \ + ../doxygen/chapters/images/runtime-par.png \ + ../doxygen/chapters/images/starpu_non_linear_memset_regression_based.png \ + ../doxygen/chapters/images/starpu_non_linear_memset_regression_based_2.png \ + ../doxygen/chapters/images/starpu_starpu_slu_lu_model_11.png \ + ../doxygen/chapters/images/starpu_chol_model_11_type.png \ + ../doxygen/chapters/images/tasks_size_overhead.png \ + ../doxygen/chapters/images/temanejo.png \ + ../doxygen/chapters/images/eclipse_installer.png \ + ../doxygen/chapters/images/eclipse_install_cdt.png \ + ../doxygen/chapters/images/eclipse_hello_build.png \ + ../doxygen/chapters/images/eclipse_hello_run.png \ + ../doxygen/chapters/images/eclipse_hello_fxt.png \ + ../doxygen/chapters/images/eclipse_hello_graph.png \ + ../doxygen/chapters/images/eclipse_hello_vite.png \ + ../doxygen/chapters/images/eclipse_hello_svg_graph.png \ + ../doxygen/chapters/images/eclipse_hello_plugin.png \ + ../doxygen/chapters/images/eclipse_hello_paje_trace.png \ + ../doxygen/chapters/images/eclipse_hello_hgraph.png \ + ../doxygen/chapters/images/eclipse_install_pde.png \ + ../doxygen/chapters/images/starpu_gflops_non_linear_memset_regression_based_energy.png \ + ../doxygen/chapters/images/starpu_log_arr.png \ + ../doxygen/chapters/images/starpu_log_list.png \ + ../doxygen/chapters/images/starpu_non_linear_memset_regression_based_energy.png \ + ../doxygen/chapters/images/starpu_power_non_linear_memset_regression_based.png \ + ../doxygen/chapters/images/starvz_visu.png \ + ../doxygen/chapters/images/starvz_visu_r.png \ + ../doxygen/chapters/images/trace_bw_heatmap.png \ + ../doxygen/chapters/images/trace_recv_use.png \ + ../doxygen/chapters/images/trace_send_use.png \ + ../doxygen/chapters/images/trace_volume_heatmap.png \ + ../doxygen/chapters/images/starpupy_handle_func_perf_pickle.png \ + ../doxygen/chapters/images/starpupy_handle_perf_pickle.png \ + ../doxygen/chapters/images/starpupy_handle_func_perf.png \ + ../doxygen/chapters/images/starpupy_handle_perf.png \ + ../doxygen/chapters/images/tasks_size_overhead_py_fut_pickle.png \ + ../doxygen/chapters/images/tasks_size_overhead_py_futur.png \ + ../doxygen/chapters/images/tasks_size_overhead_py_handle_pickle.png \ + ../doxygen/chapters/images/tasks_size_overhead_py_handle.png \ + ../doxygen/chapters/images/tasks_size_overhead_py_none.png \ + ../doxygen/chapters/images/tasks_size_overhead_py_noret_pickle.png + +@STARPU_BUILD_DOC_TRUE@dox_inputs = $(DOX_CONFIG) \ +@STARPU_BUILD_DOC_TRUE@ $(chapters) \ +@STARPU_BUILD_DOC_TRUE@ starpu_config.h \ +@STARPU_BUILD_DOC_TRUE@ chapters/version.sty \ +@STARPU_BUILD_DOC_TRUE@ chapters/version.html + +all: all-am + +.SUFFIXES: +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/doc/doxy.mk $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign doc/doxygen_web_extensions/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign doc/doxygen_web_extensions/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; +$(top_srcdir)/doc/doxy.mk $(am__empty): + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): +doxygen-config.cfg: $(top_builddir)/config.status $(srcdir)/doxygen-config.cfg.in + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs +install-txtDATA: $(txt_DATA) + @$(NORMAL_INSTALL) + @list='$(txt_DATA)'; test -n "$(txtdir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(txtdir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(txtdir)" || exit 1; \ + fi; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; \ + done | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(txtdir)'"; \ + $(INSTALL_DATA) $$files "$(DESTDIR)$(txtdir)" || exit $$?; \ + done + +uninstall-txtDATA: + @$(NORMAL_UNINSTALL) + @list='$(txt_DATA)'; test -n "$(txtdir)" || list=; \ + files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ + dir='$(DESTDIR)$(txtdir)'; $(am__uninstall_files_from_dir) +tags TAGS: + +ctags CTAGS: + +cscope cscopelist: + +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile $(DATA) +installdirs: + for dir in "$(DESTDIR)$(txtdir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +@STARPU_AVAILABLE_DOC_FALSE@@STARPU_BUILD_DOC_FALSE@install-exec-hook: +@STARPU_AVAILABLE_DOC_FALSE@@STARPU_BUILD_DOC_FALSE@uninstall-hook: +clean: clean-am + +clean-am: clean-generic clean-libtool mostlyclean-am + +distclean: distclean-am + -rm -f Makefile +distclean-am: clean-am distclean-generic + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: install-txtDATA + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: + @$(NORMAL_INSTALL) + $(MAKE) $(AM_MAKEFLAGS) install-exec-hook +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-generic mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-txtDATA + @$(NORMAL_INSTALL) + $(MAKE) $(AM_MAKEFLAGS) uninstall-hook +.MAKE: install-am install-exec-am install-strip uninstall-am + +.PHONY: all all-am check check-am clean clean-generic clean-libtool \ + cscopelist-am ctags-am distclean distclean-generic \ + distclean-libtool distdir dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-exec-hook \ + install-html install-html-am install-info install-info-am \ + install-man install-pdf install-pdf-am install-ps \ + install-ps-am install-strip install-txtDATA installcheck \ + installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-generic \ + mostlyclean-libtool pdf pdf-am ps ps-am tags-am uninstall \ + uninstall-am uninstall-hook uninstall-txtDATA + +.PRECIOUS: Makefile + + +@STARPU_BUILD_DOC_PDF_TRUE@@STARPU_BUILD_DOC_TRUE@all: $(DOX_HTML_DIR) $(DOX_DIR)/$(DOX_PDF) +@STARPU_BUILD_DOC_PDF_FALSE@@STARPU_BUILD_DOC_TRUE@all: $(DOX_HTML_DIR) +@STARPU_BUILD_DOC_TRUE@install-exec-hook: $(DOX_HTML_DIR) +@STARPU_BUILD_DOC_TRUE@ @$(MKDIR_P) $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) +@STARPU_BUILD_DOC_TRUE@ @(cd $(DOX_HTML_SRCDIR) && $(PROG_FIND) . -type f -exec $(INSTALL_DATA) {} $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) \;) +@STARPU_BUILD_DOC_TRUE@uninstall-hook: +@STARPU_BUILD_DOC_TRUE@ @rm -rf $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) +@STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@install-exec-hook: +@STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@ @$(MKDIR_P) $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) +@STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@ @(cd $(DOX_HTML_SRCDIR) && $(PROG_FIND) . -type f -exec $(INSTALL_DATA) {} $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) \;) +@STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@uninstall-hook: +@STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@ @rm -rf $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) + +@STARPU_BUILD_DOC_TRUE@chapters/version.sty: $(chapters) +@STARPU_BUILD_DOC_TRUE@ $(MKDIR_P) $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters +@STARPU_BUILD_DOC_TRUE@ @for f in $(chapters) ; do \ +@STARPU_BUILD_DOC_TRUE@ if test -f $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$$f ; then $(PROG_STAT) --format=%Y $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$$f ; fi \ +@STARPU_BUILD_DOC_TRUE@ done | sort -r | head -1 > timestamp_sty +@STARPU_BUILD_DOC_TRUE@ @if test -s timestamp_sty ; then \ +@STARPU_BUILD_DOC_TRUE@ LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_sty` +"%F" > timestamp_sty_updated ;\ +@STARPU_BUILD_DOC_TRUE@ LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_sty` +"%B %Y" > timestamp_sty_updated_month ;\ +@STARPU_BUILD_DOC_TRUE@ fi +@STARPU_BUILD_DOC_TRUE@ @if test -s timestamp_sty_updated ; then \ +@STARPU_BUILD_DOC_TRUE@ echo ':newcommand{:STARPUUPDATED}{'`cat timestamp_sty_updated`'}' > $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty;\ +@STARPU_BUILD_DOC_TRUE@ else \ +@STARPU_BUILD_DOC_TRUE@ echo ':newcommand{:STARPUUPDATED}{unknown date}' > $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty;\ +@STARPU_BUILD_DOC_TRUE@ fi +@STARPU_BUILD_DOC_TRUE@ @echo ':newcommand{:STARPUVERSION}{$(VERSION)}' >> $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty +@STARPU_BUILD_DOC_TRUE@ @$(SED) -i 's/:/\\/g' $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty +@STARPU_BUILD_DOC_TRUE@ @for f in timestamp_sty timestamp_sty_updated timestamp_sty_updated_month ; do \ +@STARPU_BUILD_DOC_TRUE@ if test -f $$f ; then $(RM) $$f ; fi ;\ +@STARPU_BUILD_DOC_TRUE@ done + +@STARPU_BUILD_DOC_TRUE@chapters/version.html: $(chapters) $(images) +@STARPU_BUILD_DOC_TRUE@ @for f in $(chapters) ; do \ +@STARPU_BUILD_DOC_TRUE@ if test -f $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$$f ; then $(PROG_STAT) --format=%Y $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$$f ; fi \ +@STARPU_BUILD_DOC_TRUE@ done | sort -r | head -1 > timestamp_html +@STARPU_BUILD_DOC_TRUE@ @if test -s timestamp_html ; then \ +@STARPU_BUILD_DOC_TRUE@ LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_html` +"%F" > timestamp_html_updated ;\ +@STARPU_BUILD_DOC_TRUE@ LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_html` +"%B %Y" > timestamp_html_updated_month ;\ +@STARPU_BUILD_DOC_TRUE@ fi +@STARPU_BUILD_DOC_TRUE@ @echo "This manual documents the version $(VERSION) of StarPU." > $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.html +@STARPU_BUILD_DOC_TRUE@ @if test -s timestamp_html_updated ; then \ +@STARPU_BUILD_DOC_TRUE@ echo "Its contents was last updated on "`cat timestamp_html_updated`"." >> $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.html;\ +@STARPU_BUILD_DOC_TRUE@ else \ +@STARPU_BUILD_DOC_TRUE@ echo "Its contents was last updated on unknown_date." >> $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.html;\ +@STARPU_BUILD_DOC_TRUE@ fi +@STARPU_BUILD_DOC_TRUE@ @for f in timestamp_html timestamp_html_updated timestamp_html_updated_month ; do \ +@STARPU_BUILD_DOC_TRUE@ if test -f $$f ; then $(RM) $$f ; fi ;\ +@STARPU_BUILD_DOC_TRUE@ done + +@STARPU_BUILD_DOC_TRUE@doxy: +@STARPU_BUILD_DOC_TRUE@ @rm -fr $(DOX_HTML_DIR) $(DOX_LATEX_DIR) +@STARPU_BUILD_DOC_TRUE@ @$(DOXYGEN) $(DOX_CONFIG) + +@STARPU_BUILD_DOC_TRUE@$(DOX_HTML_DIR): $(DOX_TAG) +@STARPU_BUILD_DOC_TRUE@ @$(MKDIR_P) $(DOX_HTML_DIR) + +@STARPU_BUILD_DOC_TRUE@$(DOX_TAG): $(dox_inputs) +@STARPU_BUILD_DOC_TRUE@ @rm -fr $(DOX_HTML_DIR) $(DOX_LATEX_DIR) +@STARPU_BUILD_DOC_TRUE@ @$(DOXYGEN) $(DOX_CONFIG) +@STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_HTML_DIR)/DocOrganization.html ; then $(SED) -i 's/ModuleDocumentation <\/li>/Modules<\/a>/' $(DOX_HTML_DIR)/DocOrganization.html ; fi +@STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_HTML_DIR)/DocOrganization.html ; then $(SED) -i 's/FileDocumentation <\/li>/Files<\/a>/' $(DOX_HTML_DIR)/DocOrganization.html ; fi +@STARPU_BUILD_DOC_TRUE@ # comment for the line below: what we really want to do is to remove the line, but dy doing so, it avoids opening the interactive menu when browsing files +@STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_HTML_DIR)/navtreedata.js ; then $(SED) -i 's/\[ "Files", "Files.html", null \]/\[ "", "Files.html", null \]/' $(DOX_HTML_DIR)/navtreedata.js ; fi +@STARPU_BUILD_DOC_TRUE@ @$(SED) -i 's/.*"Files.html".*//' $(DOX_HTML_DIR)/pages.html +@STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_LATEX_DIR)/main.tex ; then mv $(DOX_LATEX_DIR)/main.tex $(DOX_LATEX_DIR)/index.tex ; fi +@STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_LATEX_DIR)/refman.tex ; then $(SED) -i '/\\begin{titlepage}/,$$d' $(DOX_LATEX_DIR)/refman.tex ; fi +@STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_LATEX_DIR)/refman.tex ; then cat $(top_srcdir)/doc/$(DOX_MAIN_DIR)/refman.tex >> $(DOX_LATEX_DIR)/refman.tex ; fi +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/doc/sectionNumbering.py $(top_builddir)/doc/$(DOX_MAIN_DIR) $(DOX_HTML_DIR) + +@STARPU_BUILD_DOC_TRUE@$(DOX_DIR)/$(DOX_PDF): $(DOX_TAG) refman.tex $(images) +@STARPU_BUILD_DOC_TRUE@ $(MKDIR_P) $(DOX_LATEX_DIR) +@STARPU_BUILD_DOC_TRUE@ @cp $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty $(DOX_LATEX_DIR) +@STARPU_BUILD_DOC_TRUE@ @cp $(top_srcdir)/doc/title.tex $(DOX_LATEX_DIR) +@STARPU_BUILD_DOC_TRUE@ @if test -f $(top_srcdir)/doc/$(DOX_MAIN_DIR)/modules.tex ; then cp $(top_srcdir)/doc/$(DOX_MAIN_DIR)/modules.tex $(DOX_LATEX_DIR) ; fi +@STARPU_BUILD_DOC_TRUE@ @echo $(PDFLATEX) $(DOX_LATEX_DIR)/refman.tex +@STARPU_BUILD_DOC_TRUE@ @cd $(DOX_LATEX_DIR) ;\ +@STARPU_BUILD_DOC_TRUE@ rm -f *.aux *.toc *.idx *.ind *.ilg *.log *.out ;\ +@STARPU_BUILD_DOC_TRUE@ for f in group__API__* ; do sed -i '1 i \\\clearpage' $$f ; done ;\ +@STARPU_BUILD_DOC_TRUE@ if test -f ExecutionConfigurationThroughEnvironmentVariables.tex ; then $(SED) -i -e 's/__env__/\\_Environment Variables!/' -e 's/\\-\\_\\-\\-\\_\\-env\\-\\_\\-\\-\\_\\-//' ExecutionConfigurationThroughEnvironmentVariables.tex ; fi ;\ +@STARPU_BUILD_DOC_TRUE@ if test -f CompilationConfiguration.tex ; then $(SED) -i -e 's/__configure__/\\_Configure Options!/' -e 's/\\-\\_\\-\\-\\_\\-configure\\-\\_\\-\\-\\_\\-//' CompilationConfiguration.tex ; fi ;\ +@STARPU_BUILD_DOC_TRUE@ if test -f DocOrganization.tex ; then $(SED) -i s'/\\item Module\\.Documentation/\\item \\hyperlink{ModuleDocumentation}{Module Documentation}/' DocOrganization.tex ; fi ;\ +@STARPU_BUILD_DOC_TRUE@ if test -f DocOrganization.tex ; then $(SED) -i s'/\\item File\\.Documentation/\\item \\hyperlink{FileDocumentation}{File Documentation}/' DocOrganization.tex ; fi ;\ +@STARPU_BUILD_DOC_TRUE@ max_print_line=1000000 $(PDFLATEX) -interaction batchmode refman.tex ;\ +@STARPU_BUILD_DOC_TRUE@ ! < refman.log grep -v group__ | grep -v _amgrp | grep -v deprecated__ | grep "multiply defined" || exit 1 ;\ +@STARPU_BUILD_DOC_TRUE@ $(MAKEINDEX) refman.idx ;\ +@STARPU_BUILD_DOC_TRUE@ max_print_line=1000000 $(PDFLATEX) -interaction batchmode refman.tex ;\ +@STARPU_BUILD_DOC_TRUE@ for i in $(shell seq 1 5); do \ +@STARPU_BUILD_DOC_TRUE@ if $(EGREP) 'Rerun (LaTeX|to get cross-references right)' refman.log > /dev/null 2>&1; then \ +@STARPU_BUILD_DOC_TRUE@ max_print_line=1000000 $(PDFLATEX) -interaction batchmode refman.tex; \ +@STARPU_BUILD_DOC_TRUE@ else \ +@STARPU_BUILD_DOC_TRUE@ break ; \ +@STARPU_BUILD_DOC_TRUE@ fi; \ +@STARPU_BUILD_DOC_TRUE@ done +@STARPU_BUILD_DOC_TRUE@ mv $(DOX_LATEX_DIR)/refman.pdf $(DOX_DIR)/$(DOX_PDF) + +@STARPU_BUILD_DOC_TRUE@starpu_config.h: $(top_srcdir)/include/starpu_config.h.in +@STARPU_BUILD_DOC_TRUE@ @$(SED) 's/#undef \(.*\)/#define \1 1/' $< > $@ + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/doc/doxygen_web_extensions/chapters/version.html b/doc/doxygen_web_extensions/chapters/version.html new file mode 100644 index 0000000..f504c7a --- /dev/null +++ b/doc/doxygen_web_extensions/chapters/version.html @@ -0,0 +1,2 @@ +This manual documents the version 1.4.10 of StarPU. +Its contents was last updated on 2025-12-03. diff --git a/doc/doxygen_web_extensions/chapters/version.sty b/doc/doxygen_web_extensions/chapters/version.sty new file mode 100644 index 0000000..7527bba --- /dev/null +++ b/doc/doxygen_web_extensions/chapters/version.sty @@ -0,0 +1,2 @@ +\newcommand{\STARPUUPDATED}{2025-12-03} +\newcommand{\STARPUVERSION}{1.4.10} diff --git a/doc/doxygen_web_extensions/doxygen-config.cfg.in b/doc/doxygen_web_extensions/doxygen-config.cfg.in new file mode 100644 index 0000000..29092ed --- /dev/null +++ b/doc/doxygen_web_extensions/doxygen-config.cfg.in @@ -0,0 +1,65 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# Copyright (C) 2013-2013 Simon Archipoff +# Copyright (C) 2011-2011 Télécom Sud Paris +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +INPUT = @top_srcdir@/doc/doxygen/chapters/starpu_extensions/extensions_intro.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_extensions/advanced_tasks.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_extensions/advanced_data_management.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_extensions/advanced_scheduling.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_extensions/scheduling_contexts.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_extensions/scheduling_context_hypervisor.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_extensions/scheduling_policy_definition.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_extensions/cuda_support.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_extensions/opencl_support.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_extensions/max_fpga_support.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_extensions/out_of_core.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_extensions/mpi_support.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_extensions/tcpip_support.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_extensions/transactions.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_extensions/fault_tolerance.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_extensions/fft_support.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_extensions/socl_opencl_extensions.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_extensions/bubble.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_extensions/parallel_worker.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_extensions/interoperability.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_extensions/simgrid.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_extensions/debugging_tools.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_extensions/helpers.doxy \ + @top_srcdir@/doc/doxygen/chapters/fdl_1_3.doxy \ + @top_srcdir@/doc/doxygen/chapters/files.doxy \ + @top_srcdir@/doc/doxygen/chapters/api + +EXAMPLE_PATH = @top_srcdir@/doc/doxygen \ + @top_srcdir@/doc/doxygen/chapters \ + @top_srcdir@/doc/doxygen/chapters/starpu_extensions/code + +INPUT_FILTER = @top_builddir@/doc/doxygen/doxygen_filter.sh + +#LATEX_HEADER = @top_srcdir@/doc/doxygen/refman.tex + +IMAGE_PATH = @top_srcdir@/doc/doxygen/chapters/images + +GENERATE_LATEX = @DOC_GENERATE_LATEX@ + +HTML_OUTPUT = html_web_extensions + +@INCLUDE_PATH = ../../doc/doxygen/ + +PROJECT_NAME = "StarPU Handbook - StarPU Extensions" +ALIASES += "intropage{2} = \mainpage" +ALIASES += "webforeword = \htmlonly

    Foreword

    \endhtmlonly \htmlinclude version.html \htmlinclude foreword.html \htmlonly This is a sub-part of the StarPU documentation, go here to read the whole documentation. \endhtmlonly" +ALIASES += "foreword = " + diff --git a/doc/doxygen_web_extensions/refman.tex b/doc/doxygen_web_extensions/refman.tex new file mode 100644 index 0000000..f7a8316 --- /dev/null +++ b/doc/doxygen_web_extensions/refman.tex @@ -0,0 +1,147 @@ +% StarPU --- Runtime system for heterogeneous multicore architectures. +% +% Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +% Copyright (C) 2013-2013 Simon Archipoff +% +% StarPU is free software; you can redistribute it and/or modify +% it under the terms of the GNU Lesser General Public License as published by +% the Free Software Foundation; either version 2.1 of the License, or (at +% your option) any later version. +% +% StarPU is distributed in the hope that it will be useful, but +% WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +% +% See the GNU Lesser General Public License in COPYING.LGPL for more details. +% +\newcommand\starputitle{StarPU Handbook - StarPU Extensions} +\setcounter{tocdepth}{2} +\input{./title.tex} + +\chapter{Organization} +\label{index} +\hypertarget{index}{} +\input{index} + +\chapter{Advanced Tasks In StarPU} +\label{AdvancedTasksInStarPU} +\hypertarget{AdvancedTasksInStarPU}{} +\input{AdvancedTasksInStarPU} + +\chapter{Advanced Data Management} +\label{AdvancedDataManagement} +\hypertarget{AdvancedDataManagement}{} +\input{AdvancedDataManagement} + +\chapter{Advanced Scheduling} +\label{AdvancedScheduling} +\hypertarget{AdvancedScheduling}{} +\input{AdvancedScheduling} + +\chapter{Scheduling Contexts} +\label{SchedulingContexts} +\hypertarget{SchedulingContexts}{} +\input{SchedulingContexts} + +\chapter{Scheduling Context Hypervisor} +\label{SchedulingContextHypervisor} +\hypertarget{SchedulingContextHypervisor}{} +\input{SchedulingContextHypervisor} + +\chapter{How To Define a New Scheduling Policy} +\label{HowToDefineANewSchedulingPolicy} +\hypertarget{HowToDefineANewSchedulingPolicy}{} +\input{HowToDefineANewSchedulingPolicy} + +\chapter{CUDA Support} +\label{CUDASupport} +\hypertarget{CUDASupport}{} +\input{CUDASupport} + +\chapter{OpenCL Support} +\label{OpenCLSupport} +\hypertarget{OpenCLSupport}{} +\input{OpenCLSupport} + +\chapter{Maxeler FPGA Support} +\label{MaxFPGASupport} +\hypertarget{MaxFPGASupport}{} +\input{MaxFPGASupport} + +\chapter{Out Of Core} +\label{OutOfCore} +\hypertarget{OutOfCore}{} +\input{OutOfCore} + +\chapter{MPI Support} +\label{MPISupport} +\hypertarget{MPISupport}{} +\input{MPISupport} + +\chapter{TCP/IP Support} +\label{TCPIPSupport} +\hypertarget{TCPIPSupport}{} +\input{TCPIPSupport} + +\chapter{Transactions} +\label{Transactions} +\hypertarget{Transactions}{} +\input{Transactions} + +\chapter{Fault Tolerance} +\label{FaultTolerance} +\hypertarget{FaultTolerance}{} +\input{FaultTolerance} + +\chapter{FFT Support} +\label{FFTSupport} +\hypertarget{FFTSupport}{} +\input{FFTSupport} + +\chapter{SOCL OpenCL Extensions} +\label{SOCLOpenclExtensions} +\hypertarget{SOCLOpenclExtensions}{} +\input{SOCLOpenclExtensions} + +\chapter{Hierarchical DAGS} +\label{HierarchicalDAGS} +\hypertarget{HierarchicalDAGS}{} +\input{HierarchicalDAGS} + +\chapter{Parallel Workers} +\label{ParallelWorker} +\hypertarget{ParallelWorker}{} +\input{ParallelWorker} + +\chapter{Interoperability Support} +\label{InteropSupport} +\hypertarget{InteropSupport}{} +\input{InteroperabilitySupport} + +\chapter{SimGrid Support} +\label{SimGridSupport} +\hypertarget{SimGridSupport}{} +\input{SimGridSupport} + +\chapter{Debugging Tools} +\label{DebuggingTools} +\hypertarget{DebuggingTools}{} +\input{DebuggingTools} + +\chapter{Helpers} +\label{Helpers} +\hypertarget{Helpers}{} +\input{Helpers} + +\part{Appendix} + +\chapter{The GNU Free Documentation License} +\label{GNUFreeDocumentationLicense} +\hypertarget{GNUFreeDocumentationLicense}{} +\input{GNUFreeDocumentationLicense} + +%\part{Index} +%#\addcontentsline{toc}{chapter}{Index} +%\printindex + +\end{document} diff --git a/doc/doxygen_web_faq/Makefile.am b/doc/doxygen_web_faq/Makefile.am new file mode 100644 index 0000000..4494215 --- /dev/null +++ b/doc/doxygen_web_faq/Makefile.am @@ -0,0 +1,95 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +DOX_DIR = $(top_builddir)/doc/doxygen_web_faq +DOX_CONFIG = $(top_srcdir)/doc/doxygen.cfg + +DOX_MAIN_DIR = doxygen_web_faq +DOX_HTML_DIR = html_web_faq +DOX_LATEX_DIR = latex +DOX_PDF = starpu_web_faq.pdf +DOX_TAG = starpu.tag +DOX_STARPU_CONFIG = starpu_config.h + +include $(top_srcdir)/doc/doxy.mk + +chapters = \ + ../doxygen/chapters/foreword.doxy \ + ../doxygen/chapters/starpu_faq/faq_intro.doxy \ + ../doxygen/chapters/starpu_faq/check_list_performance.doxy \ + ../doxygen/chapters/starpu_faq/faq.doxy \ + ../doxygen/chapters/files.doxy \ + ../doxygen/chapters/fdl_1_3.doxy + +images = \ + ../doxygen/chapters/images/arbiter.png \ + ../doxygen/chapters/images/data_trace.png \ + ../doxygen/chapters/images/distrib_data.png \ + ../doxygen/chapters/images/distrib_data_histo.png \ + ../doxygen/chapters/images/paje_draw_histogram.png \ + ../doxygen/chapters/images/parallel_worker2.png \ + ../doxygen/chapters/images/runtime-par.png \ + ../doxygen/chapters/images/starpu_non_linear_memset_regression_based.png \ + ../doxygen/chapters/images/starpu_non_linear_memset_regression_based_2.png \ + ../doxygen/chapters/images/starpu_starpu_slu_lu_model_11.png \ + ../doxygen/chapters/images/starpu_chol_model_11_type.png \ + ../doxygen/chapters/images/tasks_size_overhead.png \ + ../doxygen/chapters/images/temanejo.png \ + ../doxygen/chapters/images/eclipse_installer.png \ + ../doxygen/chapters/images/eclipse_install_cdt.png \ + ../doxygen/chapters/images/eclipse_hello_build.png \ + ../doxygen/chapters/images/eclipse_hello_run.png \ + ../doxygen/chapters/images/eclipse_hello_fxt.png \ + ../doxygen/chapters/images/eclipse_hello_graph.png \ + ../doxygen/chapters/images/eclipse_hello_vite.png \ + ../doxygen/chapters/images/eclipse_hello_svg_graph.png \ + ../doxygen/chapters/images/eclipse_hello_plugin.png \ + ../doxygen/chapters/images/eclipse_hello_paje_trace.png \ + ../doxygen/chapters/images/eclipse_hello_hgraph.png \ + ../doxygen/chapters/images/eclipse_install_pde.png \ + ../doxygen/chapters/images/starpu_gflops_non_linear_memset_regression_based_energy.png \ + ../doxygen/chapters/images/starpu_log_arr.png \ + ../doxygen/chapters/images/starpu_log_list.png \ + ../doxygen/chapters/images/starpu_non_linear_memset_regression_based_energy.png \ + ../doxygen/chapters/images/starpu_power_non_linear_memset_regression_based.png \ + ../doxygen/chapters/images/starvz_visu.png \ + ../doxygen/chapters/images/starvz_visu_r.png \ + ../doxygen/chapters/images/trace_bw_heatmap.png \ + ../doxygen/chapters/images/trace_recv_use.png \ + ../doxygen/chapters/images/trace_send_use.png \ + ../doxygen/chapters/images/trace_volume_heatmap.png \ + ../doxygen/chapters/images/starpupy_handle_func_perf_pickle.png \ + ../doxygen/chapters/images/starpupy_handle_perf_pickle.png \ + ../doxygen/chapters/images/starpupy_handle_func_perf.png \ + ../doxygen/chapters/images/starpupy_handle_perf.png \ + ../doxygen/chapters/images/tasks_size_overhead_py_fut_pickle.png \ + ../doxygen/chapters/images/tasks_size_overhead_py_futur.png \ + ../doxygen/chapters/images/tasks_size_overhead_py_handle_pickle.png \ + ../doxygen/chapters/images/tasks_size_overhead_py_handle.png \ + ../doxygen/chapters/images/tasks_size_overhead_py_none.png \ + ../doxygen/chapters/images/tasks_size_overhead_py_noret_pickle.png + +if STARPU_BUILD_DOC +starpu_config.h: $(top_srcdir)/include/starpu_config.h.in + @$(SED) 's/#undef \(.*\)/#define \1 1/' $< > $@ + +dox_inputs = $(DOX_CONFIG) \ + $(chapters) \ + starpu_config.h \ + chapters/version.sty \ + chapters/version.html +endif + diff --git a/doc/doxygen_web_faq/Makefile.in b/doc/doxygen_web_faq/Makefile.in new file mode 100644 index 0000000..3add6dc --- /dev/null +++ b/doc/doxygen_web_faq/Makefile.in @@ -0,0 +1,933 @@ +# Makefile.in generated by automake 1.16.5 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2021 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +target_triplet = @target@ +@STARPU_BUILD_DOC_PDF_TRUE@@STARPU_BUILD_DOC_TRUE@am__append_1 = $(DOX_HTML_DIR) $(DOX_DIR)/$(DOX_PDF) +@STARPU_BUILD_DOC_PDF_FALSE@@STARPU_BUILD_DOC_TRUE@am__append_2 = $(DOX_HTML_DIR) +@STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@am__append_3 = $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$(DOX_HTML_DIR) +@STARPU_AVAILABLE_DOC_PDF_TRUE@@STARPU_BUILD_DOC_FALSE@am__append_4 = $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$(DOX_PDF) +@STARPU_BUILD_DOC_TRUE@am__append_5 = \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.html + +subdir = doc/doxygen_web_faq +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ + $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ + $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ + $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/src/common/config.h \ + $(top_builddir)/src/common/config-src-build.h \ + $(top_builddir)/include/starpu_config.h \ + $(top_builddir)/starpurm/include/starpurm_config.h +CONFIG_CLEAN_FILES = doxygen-config.cfg +CONFIG_CLEAN_VPATH_FILES = +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +SOURCES = +DIST_SOURCES = +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__installdirs = "$(DESTDIR)$(txtdir)" +DATA = $(txt_DATA) +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +am__DIST_COMMON = $(srcdir)/Makefile.in \ + $(srcdir)/doxygen-config.cfg.in $(top_srcdir)/doc/doxy.mk +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +pkglibdir = @pkglibdir@ +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +APP_CFLAGS = @APP_CFLAGS@ +APP_CXXFLAGS = @APP_CXXFLAGS@ +APP_FCFLAGS = @APP_FCFLAGS@ +APP_FFLAGS = @APP_FFLAGS@ +AR = @AR@ +AS = @AS@ +ATLASDIR = @ATLASDIR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BLAS_LIB = @BLAS_LIB@ +BLAS_LIBS = @BLAS_LIBS@ +BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ +BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CC_OR_MPICC = @CC_OR_MPICC@ +CC_OR_NVCC = @CC_OR_NVCC@ +CFLAGS = @CFLAGS@ +COVERAGE = @COVERAGE@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CSCOPE = @CSCOPE@ +CTAGS = @CTAGS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DGELS_LIBS = @DGELS_LIBS@ +DLB_CFLAGS = @DLB_CFLAGS@ +DLB_LIBS = @DLB_LIBS@ +DLLTOOL = @DLLTOOL@ +DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +ECLIPSE = @ECLIPSE@ +EGREP = @EGREP@ +ETAGS = @ETAGS@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FC = @FC@ +FCFLAGS = @FCFLAGS@ +FFLAGS = @FFLAGS@ +FFTWF_CFLAGS = @FFTWF_CFLAGS@ +FFTWF_LIBS = @FFTWF_LIBS@ +FFTWL_CFLAGS = @FFTWL_CFLAGS@ +FFTWL_LIBS = @FFTWL_LIBS@ +FFTW_CFLAGS = @FFTW_CFLAGS@ +FFTW_LIBS = @FFTW_LIBS@ +FGREP = @FGREP@ +FILECMD = @FILECMD@ +FXTDIR = @FXTDIR@ +FXT_CFLAGS = @FXT_CFLAGS@ +FXT_LDFLAGS = @FXT_LDFLAGS@ +FXT_LIBS = @FXT_LIBS@ +GDB = @GDB@ +GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ +GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ +GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ +GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ +GOTODIR = @GOTODIR@ +GREP = @GREP@ +HAVE_CXX11 = @HAVE_CXX11@ +HAVE_FFTWFL = @HAVE_FFTWFL@ +HELP2MAN = @HELP2MAN@ +HIPCC = @HIPCC@ +HIPCCFLAGS = @HIPCCFLAGS@ +HIPCONFIG = @HIPCONFIG@ +HWLOC_CFLAGS = @HWLOC_CFLAGS@ +HWLOC_LIBS = @HWLOC_LIBS@ +HWLOC_REQUIRES = @HWLOC_REQUIRES@ +ICC = @ICC@ +ICC_ARGS = @ICC_ARGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JULIA = @JULIA@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ +LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ +LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ +LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ +LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ +LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ +LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ +LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ +LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ +LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ +LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ +LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ +LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ +LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ +LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ +LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ +LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ +LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ +LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ +LIBSTARPU_LINK = @LIBSTARPU_LINK@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAGMA_CFLAGS = @MAGMA_CFLAGS@ +MAGMA_LIBS = @MAGMA_LIBS@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPICC_LDFLAGS = @MPICC_LDFLAGS@ +MPICXX = @MPICXX@ +MPIEXEC = @MPIEXEC@ +MPIEXEC_ARGS = @MPIEXEC_ARGS@ +MPIFORT = @MPIFORT@ +MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ +MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ +NM = @NM@ +NMAD_CFLAGS = @NMAD_CFLAGS@ +NMAD_LIBS = @NMAD_LIBS@ +NMEDIT = @NMEDIT@ +NVCC = @NVCC@ +NVCCFLAGS = @NVCCFLAGS@ +NVCC_CC = @NVCC_CC@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ +OPENBLAS_LIBS = @OPENBLAS_LIBS@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PAPI_CFLAGS = @PAPI_CFLAGS@ +PAPI_LIBS = @PAPI_LIBS@ +PARALLEL = @PARALLEL@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PKG_CONFIG = @PKG_CONFIG@ +POTI_CFLAGS = @POTI_CFLAGS@ +POTI_LIBS = @POTI_LIBS@ +PROG_CLANG = @PROG_CLANG@ +PROG_DATE = @PROG_DATE@ +PROG_FIND = @PROG_FIND@ +PROG_STAT = @PROG_STAT@ +PYTHON = @PYTHON@ +PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ +PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ +PYTHON_VERSION = @PYTHON_VERSION@ +RANLIB = @RANLIB@ +REALBASH = @REALBASH@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ +SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ +SIMGRID_LIBS = @SIMGRID_LIBS@ +SIMGRID_MC = @SIMGRID_MC@ +SLIC_CONFIG = @SLIC_CONFIG@ +SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ +SOCL_VENDORS = @SOCL_VENDORS@ +STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ +STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ +STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ +STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ +STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ +STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ +STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ +STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ +STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ +STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ +STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ +STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ +STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ +STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ +STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ +STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ +STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ +STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ +STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ +STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ +STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ +STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ +STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ +STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ +STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ +STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ +STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ +STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ +STARPU_LIB_PATH = @STARPU_LIB_PATH@ +STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ +STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ +STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ +STARPU_MS_LIB = @STARPU_MS_LIB@ +STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ +STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ +STARPU_OPENBLAS = @STARPU_OPENBLAS@ +STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ +STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ +STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ +STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ +STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ +STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ +STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ +STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ +STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ +STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ +STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ +STARPU_SRC_DIR = @STARPU_SRC_DIR@ +STARPU_USE_CPU = @STARPU_USE_CPU@ +STARPU_USE_CUDA = @STARPU_USE_CUDA@ +STARPU_USE_FXT = @STARPU_USE_FXT@ +STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ +STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ +STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ +STRIP = @STRIP@ +VERSION = @VERSION@ +XMKMF = @XMKMF@ +X_CFLAGS = @X_CFLAGS@ +X_EXTRA_LIBS = @X_EXTRA_LIBS@ +X_LIBS = @X_LIBS@ +X_PRE_LIBS = @X_PRE_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_ct_F77 = @ac_ct_F77@ +ac_ct_FC = @ac_ct_FC@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +doxygencommand = @doxygencommand@ +dvidir = @dvidir@ +eclipsepath = @eclipsepath@ +epstopdfcommand = @epstopdfcommand@ +exec_prefix = @exec_prefix@ +gitcommand = @gitcommand@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +hwloccalccommand = @hwloccalccommand@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +juliapath = @juliapath@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +mpicc_path = @mpicc_path@ +mpicxx_path = @mpicxx_path@ +mpiexec_path = @mpiexec_path@ +mpifort_path = @mpifort_path@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +pdflatexcommand = @pdflatexcommand@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +runstatedir = @runstatedir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target = @target@ +target_alias = @target_alias@ +target_cpu = @target_cpu@ +target_os = @target_os@ +target_vendor = @target_vendor@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +DOX_DIR = $(top_builddir)/doc/doxygen_web_faq +DOX_CONFIG = $(top_srcdir)/doc/doxygen.cfg +DOX_MAIN_DIR = doxygen_web_faq +DOX_HTML_DIR = html_web_faq +DOX_LATEX_DIR = latex +DOX_PDF = starpu_web_faq.pdf +DOX_TAG = starpu.tag +DOX_STARPU_CONFIG = starpu_config.h + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +DOXYGEN = doxygen +PDFLATEX = pdflatex +MAKEINDEX = makeindex +txtdir = $(docdir)/manual +EXTRA_DIST = $(am__append_1) $(am__append_2) $(am__append_3) \ + $(am__append_4) $(am__append_5) refman.tex $(chapters) \ + $(images) +@STARPU_AVAILABLE_DOC_PDF_TRUE@@STARPU_BUILD_DOC_FALSE@txt_DATA = $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$(DOX_PDF) +@STARPU_BUILD_DOC_PDF_TRUE@@STARPU_BUILD_DOC_TRUE@txt_DATA = $(DOX_DIR)/$(DOX_PDF) +@STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@DOX_HTML_SRCDIR = $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$(DOX_HTML_DIR) +@STARPU_BUILD_DOC_TRUE@DOX_HTML_SRCDIR = $(DOX_HTML_DIR) +@STARPU_BUILD_DOC_TRUE@CLEANFILES = $(DOX_TAG) $(DOX_STARPU_CONFIG) \ +@STARPU_BUILD_DOC_TRUE@ -r \ +@STARPU_BUILD_DOC_TRUE@ $(DOX_HTML_DIR) \ +@STARPU_BUILD_DOC_TRUE@ $(DOX_LATEX_DIR) \ +@STARPU_BUILD_DOC_TRUE@ $(DOX_DIR)/$(DOX_PDF) + +chapters = \ + ../doxygen/chapters/foreword.doxy \ + ../doxygen/chapters/starpu_faq/faq_intro.doxy \ + ../doxygen/chapters/starpu_faq/check_list_performance.doxy \ + ../doxygen/chapters/starpu_faq/faq.doxy \ + ../doxygen/chapters/files.doxy \ + ../doxygen/chapters/fdl_1_3.doxy + +images = \ + ../doxygen/chapters/images/arbiter.png \ + ../doxygen/chapters/images/data_trace.png \ + ../doxygen/chapters/images/distrib_data.png \ + ../doxygen/chapters/images/distrib_data_histo.png \ + ../doxygen/chapters/images/paje_draw_histogram.png \ + ../doxygen/chapters/images/parallel_worker2.png \ + ../doxygen/chapters/images/runtime-par.png \ + ../doxygen/chapters/images/starpu_non_linear_memset_regression_based.png \ + ../doxygen/chapters/images/starpu_non_linear_memset_regression_based_2.png \ + ../doxygen/chapters/images/starpu_starpu_slu_lu_model_11.png \ + ../doxygen/chapters/images/starpu_chol_model_11_type.png \ + ../doxygen/chapters/images/tasks_size_overhead.png \ + ../doxygen/chapters/images/temanejo.png \ + ../doxygen/chapters/images/eclipse_installer.png \ + ../doxygen/chapters/images/eclipse_install_cdt.png \ + ../doxygen/chapters/images/eclipse_hello_build.png \ + ../doxygen/chapters/images/eclipse_hello_run.png \ + ../doxygen/chapters/images/eclipse_hello_fxt.png \ + ../doxygen/chapters/images/eclipse_hello_graph.png \ + ../doxygen/chapters/images/eclipse_hello_vite.png \ + ../doxygen/chapters/images/eclipse_hello_svg_graph.png \ + ../doxygen/chapters/images/eclipse_hello_plugin.png \ + ../doxygen/chapters/images/eclipse_hello_paje_trace.png \ + ../doxygen/chapters/images/eclipse_hello_hgraph.png \ + ../doxygen/chapters/images/eclipse_install_pde.png \ + ../doxygen/chapters/images/starpu_gflops_non_linear_memset_regression_based_energy.png \ + ../doxygen/chapters/images/starpu_log_arr.png \ + ../doxygen/chapters/images/starpu_log_list.png \ + ../doxygen/chapters/images/starpu_non_linear_memset_regression_based_energy.png \ + ../doxygen/chapters/images/starpu_power_non_linear_memset_regression_based.png \ + ../doxygen/chapters/images/starvz_visu.png \ + ../doxygen/chapters/images/starvz_visu_r.png \ + ../doxygen/chapters/images/trace_bw_heatmap.png \ + ../doxygen/chapters/images/trace_recv_use.png \ + ../doxygen/chapters/images/trace_send_use.png \ + ../doxygen/chapters/images/trace_volume_heatmap.png \ + ../doxygen/chapters/images/starpupy_handle_func_perf_pickle.png \ + ../doxygen/chapters/images/starpupy_handle_perf_pickle.png \ + ../doxygen/chapters/images/starpupy_handle_func_perf.png \ + ../doxygen/chapters/images/starpupy_handle_perf.png \ + ../doxygen/chapters/images/tasks_size_overhead_py_fut_pickle.png \ + ../doxygen/chapters/images/tasks_size_overhead_py_futur.png \ + ../doxygen/chapters/images/tasks_size_overhead_py_handle_pickle.png \ + ../doxygen/chapters/images/tasks_size_overhead_py_handle.png \ + ../doxygen/chapters/images/tasks_size_overhead_py_none.png \ + ../doxygen/chapters/images/tasks_size_overhead_py_noret_pickle.png + +@STARPU_BUILD_DOC_TRUE@dox_inputs = $(DOX_CONFIG) \ +@STARPU_BUILD_DOC_TRUE@ $(chapters) \ +@STARPU_BUILD_DOC_TRUE@ starpu_config.h \ +@STARPU_BUILD_DOC_TRUE@ chapters/version.sty \ +@STARPU_BUILD_DOC_TRUE@ chapters/version.html + +all: all-am + +.SUFFIXES: +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/doc/doxy.mk $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign doc/doxygen_web_faq/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign doc/doxygen_web_faq/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; +$(top_srcdir)/doc/doxy.mk $(am__empty): + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): +doxygen-config.cfg: $(top_builddir)/config.status $(srcdir)/doxygen-config.cfg.in + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs +install-txtDATA: $(txt_DATA) + @$(NORMAL_INSTALL) + @list='$(txt_DATA)'; test -n "$(txtdir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(txtdir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(txtdir)" || exit 1; \ + fi; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; \ + done | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(txtdir)'"; \ + $(INSTALL_DATA) $$files "$(DESTDIR)$(txtdir)" || exit $$?; \ + done + +uninstall-txtDATA: + @$(NORMAL_UNINSTALL) + @list='$(txt_DATA)'; test -n "$(txtdir)" || list=; \ + files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ + dir='$(DESTDIR)$(txtdir)'; $(am__uninstall_files_from_dir) +tags TAGS: + +ctags CTAGS: + +cscope cscopelist: + +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile $(DATA) +installdirs: + for dir in "$(DESTDIR)$(txtdir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +@STARPU_AVAILABLE_DOC_FALSE@@STARPU_BUILD_DOC_FALSE@install-exec-hook: +@STARPU_AVAILABLE_DOC_FALSE@@STARPU_BUILD_DOC_FALSE@uninstall-hook: +clean: clean-am + +clean-am: clean-generic clean-libtool mostlyclean-am + +distclean: distclean-am + -rm -f Makefile +distclean-am: clean-am distclean-generic + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: install-txtDATA + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: + @$(NORMAL_INSTALL) + $(MAKE) $(AM_MAKEFLAGS) install-exec-hook +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-generic mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-txtDATA + @$(NORMAL_INSTALL) + $(MAKE) $(AM_MAKEFLAGS) uninstall-hook +.MAKE: install-am install-exec-am install-strip uninstall-am + +.PHONY: all all-am check check-am clean clean-generic clean-libtool \ + cscopelist-am ctags-am distclean distclean-generic \ + distclean-libtool distdir dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-exec-hook \ + install-html install-html-am install-info install-info-am \ + install-man install-pdf install-pdf-am install-ps \ + install-ps-am install-strip install-txtDATA installcheck \ + installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-generic \ + mostlyclean-libtool pdf pdf-am ps ps-am tags-am uninstall \ + uninstall-am uninstall-hook uninstall-txtDATA + +.PRECIOUS: Makefile + + +@STARPU_BUILD_DOC_PDF_TRUE@@STARPU_BUILD_DOC_TRUE@all: $(DOX_HTML_DIR) $(DOX_DIR)/$(DOX_PDF) +@STARPU_BUILD_DOC_PDF_FALSE@@STARPU_BUILD_DOC_TRUE@all: $(DOX_HTML_DIR) +@STARPU_BUILD_DOC_TRUE@install-exec-hook: $(DOX_HTML_DIR) +@STARPU_BUILD_DOC_TRUE@ @$(MKDIR_P) $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) +@STARPU_BUILD_DOC_TRUE@ @(cd $(DOX_HTML_SRCDIR) && $(PROG_FIND) . -type f -exec $(INSTALL_DATA) {} $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) \;) +@STARPU_BUILD_DOC_TRUE@uninstall-hook: +@STARPU_BUILD_DOC_TRUE@ @rm -rf $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) +@STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@install-exec-hook: +@STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@ @$(MKDIR_P) $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) +@STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@ @(cd $(DOX_HTML_SRCDIR) && $(PROG_FIND) . -type f -exec $(INSTALL_DATA) {} $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) \;) +@STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@uninstall-hook: +@STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@ @rm -rf $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) + +@STARPU_BUILD_DOC_TRUE@chapters/version.sty: $(chapters) +@STARPU_BUILD_DOC_TRUE@ $(MKDIR_P) $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters +@STARPU_BUILD_DOC_TRUE@ @for f in $(chapters) ; do \ +@STARPU_BUILD_DOC_TRUE@ if test -f $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$$f ; then $(PROG_STAT) --format=%Y $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$$f ; fi \ +@STARPU_BUILD_DOC_TRUE@ done | sort -r | head -1 > timestamp_sty +@STARPU_BUILD_DOC_TRUE@ @if test -s timestamp_sty ; then \ +@STARPU_BUILD_DOC_TRUE@ LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_sty` +"%F" > timestamp_sty_updated ;\ +@STARPU_BUILD_DOC_TRUE@ LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_sty` +"%B %Y" > timestamp_sty_updated_month ;\ +@STARPU_BUILD_DOC_TRUE@ fi +@STARPU_BUILD_DOC_TRUE@ @if test -s timestamp_sty_updated ; then \ +@STARPU_BUILD_DOC_TRUE@ echo ':newcommand{:STARPUUPDATED}{'`cat timestamp_sty_updated`'}' > $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty;\ +@STARPU_BUILD_DOC_TRUE@ else \ +@STARPU_BUILD_DOC_TRUE@ echo ':newcommand{:STARPUUPDATED}{unknown date}' > $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty;\ +@STARPU_BUILD_DOC_TRUE@ fi +@STARPU_BUILD_DOC_TRUE@ @echo ':newcommand{:STARPUVERSION}{$(VERSION)}' >> $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty +@STARPU_BUILD_DOC_TRUE@ @$(SED) -i 's/:/\\/g' $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty +@STARPU_BUILD_DOC_TRUE@ @for f in timestamp_sty timestamp_sty_updated timestamp_sty_updated_month ; do \ +@STARPU_BUILD_DOC_TRUE@ if test -f $$f ; then $(RM) $$f ; fi ;\ +@STARPU_BUILD_DOC_TRUE@ done + +@STARPU_BUILD_DOC_TRUE@chapters/version.html: $(chapters) $(images) +@STARPU_BUILD_DOC_TRUE@ @for f in $(chapters) ; do \ +@STARPU_BUILD_DOC_TRUE@ if test -f $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$$f ; then $(PROG_STAT) --format=%Y $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$$f ; fi \ +@STARPU_BUILD_DOC_TRUE@ done | sort -r | head -1 > timestamp_html +@STARPU_BUILD_DOC_TRUE@ @if test -s timestamp_html ; then \ +@STARPU_BUILD_DOC_TRUE@ LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_html` +"%F" > timestamp_html_updated ;\ +@STARPU_BUILD_DOC_TRUE@ LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_html` +"%B %Y" > timestamp_html_updated_month ;\ +@STARPU_BUILD_DOC_TRUE@ fi +@STARPU_BUILD_DOC_TRUE@ @echo "This manual documents the version $(VERSION) of StarPU." > $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.html +@STARPU_BUILD_DOC_TRUE@ @if test -s timestamp_html_updated ; then \ +@STARPU_BUILD_DOC_TRUE@ echo "Its contents was last updated on "`cat timestamp_html_updated`"." >> $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.html;\ +@STARPU_BUILD_DOC_TRUE@ else \ +@STARPU_BUILD_DOC_TRUE@ echo "Its contents was last updated on unknown_date." >> $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.html;\ +@STARPU_BUILD_DOC_TRUE@ fi +@STARPU_BUILD_DOC_TRUE@ @for f in timestamp_html timestamp_html_updated timestamp_html_updated_month ; do \ +@STARPU_BUILD_DOC_TRUE@ if test -f $$f ; then $(RM) $$f ; fi ;\ +@STARPU_BUILD_DOC_TRUE@ done + +@STARPU_BUILD_DOC_TRUE@doxy: +@STARPU_BUILD_DOC_TRUE@ @rm -fr $(DOX_HTML_DIR) $(DOX_LATEX_DIR) +@STARPU_BUILD_DOC_TRUE@ @$(DOXYGEN) $(DOX_CONFIG) + +@STARPU_BUILD_DOC_TRUE@$(DOX_HTML_DIR): $(DOX_TAG) +@STARPU_BUILD_DOC_TRUE@ @$(MKDIR_P) $(DOX_HTML_DIR) + +@STARPU_BUILD_DOC_TRUE@$(DOX_TAG): $(dox_inputs) +@STARPU_BUILD_DOC_TRUE@ @rm -fr $(DOX_HTML_DIR) $(DOX_LATEX_DIR) +@STARPU_BUILD_DOC_TRUE@ @$(DOXYGEN) $(DOX_CONFIG) +@STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_HTML_DIR)/DocOrganization.html ; then $(SED) -i 's/ModuleDocumentation <\/li>/Modules<\/a>/' $(DOX_HTML_DIR)/DocOrganization.html ; fi +@STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_HTML_DIR)/DocOrganization.html ; then $(SED) -i 's/FileDocumentation <\/li>/Files<\/a>/' $(DOX_HTML_DIR)/DocOrganization.html ; fi +@STARPU_BUILD_DOC_TRUE@ # comment for the line below: what we really want to do is to remove the line, but dy doing so, it avoids opening the interactive menu when browsing files +@STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_HTML_DIR)/navtreedata.js ; then $(SED) -i 's/\[ "Files", "Files.html", null \]/\[ "", "Files.html", null \]/' $(DOX_HTML_DIR)/navtreedata.js ; fi +@STARPU_BUILD_DOC_TRUE@ @$(SED) -i 's/.*"Files.html".*//' $(DOX_HTML_DIR)/pages.html +@STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_LATEX_DIR)/main.tex ; then mv $(DOX_LATEX_DIR)/main.tex $(DOX_LATEX_DIR)/index.tex ; fi +@STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_LATEX_DIR)/refman.tex ; then $(SED) -i '/\\begin{titlepage}/,$$d' $(DOX_LATEX_DIR)/refman.tex ; fi +@STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_LATEX_DIR)/refman.tex ; then cat $(top_srcdir)/doc/$(DOX_MAIN_DIR)/refman.tex >> $(DOX_LATEX_DIR)/refman.tex ; fi +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/doc/sectionNumbering.py $(top_builddir)/doc/$(DOX_MAIN_DIR) $(DOX_HTML_DIR) + +@STARPU_BUILD_DOC_TRUE@$(DOX_DIR)/$(DOX_PDF): $(DOX_TAG) refman.tex $(images) +@STARPU_BUILD_DOC_TRUE@ $(MKDIR_P) $(DOX_LATEX_DIR) +@STARPU_BUILD_DOC_TRUE@ @cp $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty $(DOX_LATEX_DIR) +@STARPU_BUILD_DOC_TRUE@ @cp $(top_srcdir)/doc/title.tex $(DOX_LATEX_DIR) +@STARPU_BUILD_DOC_TRUE@ @if test -f $(top_srcdir)/doc/$(DOX_MAIN_DIR)/modules.tex ; then cp $(top_srcdir)/doc/$(DOX_MAIN_DIR)/modules.tex $(DOX_LATEX_DIR) ; fi +@STARPU_BUILD_DOC_TRUE@ @echo $(PDFLATEX) $(DOX_LATEX_DIR)/refman.tex +@STARPU_BUILD_DOC_TRUE@ @cd $(DOX_LATEX_DIR) ;\ +@STARPU_BUILD_DOC_TRUE@ rm -f *.aux *.toc *.idx *.ind *.ilg *.log *.out ;\ +@STARPU_BUILD_DOC_TRUE@ for f in group__API__* ; do sed -i '1 i \\\clearpage' $$f ; done ;\ +@STARPU_BUILD_DOC_TRUE@ if test -f ExecutionConfigurationThroughEnvironmentVariables.tex ; then $(SED) -i -e 's/__env__/\\_Environment Variables!/' -e 's/\\-\\_\\-\\-\\_\\-env\\-\\_\\-\\-\\_\\-//' ExecutionConfigurationThroughEnvironmentVariables.tex ; fi ;\ +@STARPU_BUILD_DOC_TRUE@ if test -f CompilationConfiguration.tex ; then $(SED) -i -e 's/__configure__/\\_Configure Options!/' -e 's/\\-\\_\\-\\-\\_\\-configure\\-\\_\\-\\-\\_\\-//' CompilationConfiguration.tex ; fi ;\ +@STARPU_BUILD_DOC_TRUE@ if test -f DocOrganization.tex ; then $(SED) -i s'/\\item Module\\.Documentation/\\item \\hyperlink{ModuleDocumentation}{Module Documentation}/' DocOrganization.tex ; fi ;\ +@STARPU_BUILD_DOC_TRUE@ if test -f DocOrganization.tex ; then $(SED) -i s'/\\item File\\.Documentation/\\item \\hyperlink{FileDocumentation}{File Documentation}/' DocOrganization.tex ; fi ;\ +@STARPU_BUILD_DOC_TRUE@ max_print_line=1000000 $(PDFLATEX) -interaction batchmode refman.tex ;\ +@STARPU_BUILD_DOC_TRUE@ ! < refman.log grep -v group__ | grep -v _amgrp | grep -v deprecated__ | grep "multiply defined" || exit 1 ;\ +@STARPU_BUILD_DOC_TRUE@ $(MAKEINDEX) refman.idx ;\ +@STARPU_BUILD_DOC_TRUE@ max_print_line=1000000 $(PDFLATEX) -interaction batchmode refman.tex ;\ +@STARPU_BUILD_DOC_TRUE@ for i in $(shell seq 1 5); do \ +@STARPU_BUILD_DOC_TRUE@ if $(EGREP) 'Rerun (LaTeX|to get cross-references right)' refman.log > /dev/null 2>&1; then \ +@STARPU_BUILD_DOC_TRUE@ max_print_line=1000000 $(PDFLATEX) -interaction batchmode refman.tex; \ +@STARPU_BUILD_DOC_TRUE@ else \ +@STARPU_BUILD_DOC_TRUE@ break ; \ +@STARPU_BUILD_DOC_TRUE@ fi; \ +@STARPU_BUILD_DOC_TRUE@ done +@STARPU_BUILD_DOC_TRUE@ mv $(DOX_LATEX_DIR)/refman.pdf $(DOX_DIR)/$(DOX_PDF) + +@STARPU_BUILD_DOC_TRUE@starpu_config.h: $(top_srcdir)/include/starpu_config.h.in +@STARPU_BUILD_DOC_TRUE@ @$(SED) 's/#undef \(.*\)/#define \1 1/' $< > $@ + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/doc/doxygen_web_faq/chapters/version.html b/doc/doxygen_web_faq/chapters/version.html new file mode 100644 index 0000000..f504c7a --- /dev/null +++ b/doc/doxygen_web_faq/chapters/version.html @@ -0,0 +1,2 @@ +This manual documents the version 1.4.10 of StarPU. +Its contents was last updated on 2025-12-03. diff --git a/doc/doxygen_web_faq/chapters/version.sty b/doc/doxygen_web_faq/chapters/version.sty new file mode 100644 index 0000000..7527bba --- /dev/null +++ b/doc/doxygen_web_faq/chapters/version.sty @@ -0,0 +1,2 @@ +\newcommand{\STARPUUPDATED}{2025-12-03} +\newcommand{\STARPUVERSION}{1.4.10} diff --git a/doc/doxygen_web_faq/doxygen-config.cfg.in b/doc/doxygen_web_faq/doxygen-config.cfg.in new file mode 100644 index 0000000..ad4d2aa --- /dev/null +++ b/doc/doxygen_web_faq/doxygen-config.cfg.in @@ -0,0 +1,43 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# Copyright (C) 2013-2013 Simon Archipoff +# Copyright (C) 2011-2011 Télécom Sud Paris +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +INPUT = @top_srcdir@/doc/doxygen/chapters/starpu_faq/faq_intro.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_faq/check_list_performance.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_faq/faq.doxy \ + @top_srcdir@/doc/doxygen/chapters/fdl_1_3.doxy \ + @top_srcdir@/doc/doxygen/chapters/files.doxy \ + @top_srcdir@/doc/doxygen/chapters/api + +EXAMPLE_PATH = @top_srcdir@/doc/doxygen \ + @top_srcdir@/doc/doxygen/chapters + +INPUT_FILTER = @top_builddir@/doc/doxygen/doxygen_filter.sh + +#LATEX_HEADER = @top_srcdir@/doc/doxygen/refman.tex + +IMAGE_PATH = @top_srcdir@/doc/doxygen/chapters/images + +GENERATE_LATEX = @DOC_GENERATE_LATEX@ + +@INCLUDE_PATH = ../../doc/doxygen/ + +HTML_OUTPUT = html_web_faq + +PROJECT_NAME = "StarPU Handbook - StarPU FAQs" +ALIASES += "intropage{2} = \mainpage" +ALIASES += "webforeword = \htmlonly

    Foreword

    \endhtmlonly \htmlinclude version.html \htmlinclude foreword.html \htmlonly This is a sub-part of the StarPU documentation, go here to read the whole documentation. \endhtmlonly" +ALIASES += "foreword = " diff --git a/doc/doxygen_web_faq/refman.tex b/doc/doxygen_web_faq/refman.tex new file mode 100644 index 0000000..19dac10 --- /dev/null +++ b/doc/doxygen_web_faq/refman.tex @@ -0,0 +1,47 @@ +% StarPU --- Runtime system for heterogeneous multicore architectures. +% +% Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +% Copyright (C) 2013-2013 Simon Archipoff +% +% StarPU is free software; you can redistribute it and/or modify +% it under the terms of the GNU Lesser General Public License as published by +% the Free Software Foundation; either version 2.1 of the License, or (at +% your option) any later version. +% +% StarPU is distributed in the hope that it will be useful, but +% WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +% +% See the GNU Lesser General Public License in COPYING.LGPL for more details. +% +\newcommand\starputitle{StarPU Handbook - StarPU FAQs} +\setcounter{tocdepth}{2} +\input{./title.tex} + +\chapter{Organization} +\label{index} +\hypertarget{index}{} +\input{index} + +\chapter{Check List When Performance Are Not There} +\label{CheckListWhenPerformanceAreNotThere} +\hypertarget{CheckListWhenPerformanceAreNotThere}{} +\input{CheckListWhenPerformanceAreNotThere} + +\chapter{Frequently Asked Questions} +\label{FrequentlyAskedQuestions} +\hypertarget{FrequentlyAskedQuestions}{} +\input{FrequentlyAskedQuestions} + +\part{Appendix} + +\chapter{The GNU Free Documentation License} +\label{GNUFreeDocumentationLicense} +\hypertarget{GNUFreeDocumentationLicense}{} +\input{GNUFreeDocumentationLicense} + +%\part{Index} +%#\addcontentsline{toc}{chapter}{Index} +%\printindex + +\end{document} diff --git a/doc/doxygen_web_installation/Makefile.am b/doc/doxygen_web_installation/Makefile.am new file mode 100644 index 0000000..7d764ac --- /dev/null +++ b/doc/doxygen_web_installation/Makefile.am @@ -0,0 +1,97 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +DOX_DIR = $(top_builddir)/doc/doxygen_web_installation +DOX_CONFIG = $(top_srcdir)/doc/doxygen.cfg + +DOX_MAIN_DIR = doxygen_web_installation +DOX_HTML_DIR = html_web_installation +DOX_LATEX_DIR = latex +DOX_PDF = starpu_web_installation.pdf +DOX_TAG = starpu.tag +DOX_STARPU_CONFIG = starpu_config.h + +include $(top_srcdir)/doc/doxy.mk + +chapters = \ + ../doxygen/chapters/foreword.doxy \ + ../doxygen/chapters/starpu_installation/installation_intro.doxy \ + ../doxygen/chapters/starpu_installation/environment_variables.doxy \ + ../doxygen/chapters/starpu_installation/building.doxy \ + ../doxygen/chapters/starpu_installation/configure_options.doxy \ + ../doxygen/chapters/starpu_installation/configuration_and_initialization.doxy \ + ../doxygen/chapters/files.doxy \ + ../doxygen/chapters/fdl_1_3.doxy + +images = \ + ../doxygen/chapters/images/arbiter.png \ + ../doxygen/chapters/images/data_trace.png \ + ../doxygen/chapters/images/distrib_data.png \ + ../doxygen/chapters/images/distrib_data_histo.png \ + ../doxygen/chapters/images/paje_draw_histogram.png \ + ../doxygen/chapters/images/parallel_worker2.png \ + ../doxygen/chapters/images/runtime-par.png \ + ../doxygen/chapters/images/starpu_non_linear_memset_regression_based.png \ + ../doxygen/chapters/images/starpu_non_linear_memset_regression_based_2.png \ + ../doxygen/chapters/images/starpu_starpu_slu_lu_model_11.png \ + ../doxygen/chapters/images/starpu_chol_model_11_type.png \ + ../doxygen/chapters/images/tasks_size_overhead.png \ + ../doxygen/chapters/images/temanejo.png \ + ../doxygen/chapters/images/eclipse_installer.png \ + ../doxygen/chapters/images/eclipse_install_cdt.png \ + ../doxygen/chapters/images/eclipse_hello_build.png \ + ../doxygen/chapters/images/eclipse_hello_run.png \ + ../doxygen/chapters/images/eclipse_hello_fxt.png \ + ../doxygen/chapters/images/eclipse_hello_graph.png \ + ../doxygen/chapters/images/eclipse_hello_vite.png \ + ../doxygen/chapters/images/eclipse_hello_svg_graph.png \ + ../doxygen/chapters/images/eclipse_hello_plugin.png \ + ../doxygen/chapters/images/eclipse_hello_paje_trace.png \ + ../doxygen/chapters/images/eclipse_hello_hgraph.png \ + ../doxygen/chapters/images/eclipse_install_pde.png \ + ../doxygen/chapters/images/starpu_gflops_non_linear_memset_regression_based_energy.png \ + ../doxygen/chapters/images/starpu_log_arr.png \ + ../doxygen/chapters/images/starpu_log_list.png \ + ../doxygen/chapters/images/starpu_non_linear_memset_regression_based_energy.png \ + ../doxygen/chapters/images/starpu_power_non_linear_memset_regression_based.png \ + ../doxygen/chapters/images/starvz_visu.png \ + ../doxygen/chapters/images/starvz_visu_r.png \ + ../doxygen/chapters/images/trace_bw_heatmap.png \ + ../doxygen/chapters/images/trace_recv_use.png \ + ../doxygen/chapters/images/trace_send_use.png \ + ../doxygen/chapters/images/trace_volume_heatmap.png \ + ../doxygen/chapters/images/starpupy_handle_func_perf_pickle.png \ + ../doxygen/chapters/images/starpupy_handle_perf_pickle.png \ + ../doxygen/chapters/images/starpupy_handle_func_perf.png \ + ../doxygen/chapters/images/starpupy_handle_perf.png \ + ../doxygen/chapters/images/tasks_size_overhead_py_fut_pickle.png \ + ../doxygen/chapters/images/tasks_size_overhead_py_futur.png \ + ../doxygen/chapters/images/tasks_size_overhead_py_handle_pickle.png \ + ../doxygen/chapters/images/tasks_size_overhead_py_handle.png \ + ../doxygen/chapters/images/tasks_size_overhead_py_none.png \ + ../doxygen/chapters/images/tasks_size_overhead_py_noret_pickle.png + +if STARPU_BUILD_DOC +starpu_config.h: $(top_srcdir)/include/starpu_config.h.in + @$(SED) 's/#undef \(.*\)/#define \1 1/' $< > $@ + +dox_inputs = $(DOX_CONFIG) \ + $(chapters) \ + starpu_config.h \ + chapters/version.sty \ + chapters/version.html +endif + diff --git a/doc/doxygen_web_installation/Makefile.in b/doc/doxygen_web_installation/Makefile.in new file mode 100644 index 0000000..767b5f3 --- /dev/null +++ b/doc/doxygen_web_installation/Makefile.in @@ -0,0 +1,935 @@ +# Makefile.in generated by automake 1.16.5 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2021 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +target_triplet = @target@ +@STARPU_BUILD_DOC_PDF_TRUE@@STARPU_BUILD_DOC_TRUE@am__append_1 = $(DOX_HTML_DIR) $(DOX_DIR)/$(DOX_PDF) +@STARPU_BUILD_DOC_PDF_FALSE@@STARPU_BUILD_DOC_TRUE@am__append_2 = $(DOX_HTML_DIR) +@STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@am__append_3 = $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$(DOX_HTML_DIR) +@STARPU_AVAILABLE_DOC_PDF_TRUE@@STARPU_BUILD_DOC_FALSE@am__append_4 = $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$(DOX_PDF) +@STARPU_BUILD_DOC_TRUE@am__append_5 = \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.html + +subdir = doc/doxygen_web_installation +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ + $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ + $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ + $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/src/common/config.h \ + $(top_builddir)/src/common/config-src-build.h \ + $(top_builddir)/include/starpu_config.h \ + $(top_builddir)/starpurm/include/starpurm_config.h +CONFIG_CLEAN_FILES = doxygen-config.cfg +CONFIG_CLEAN_VPATH_FILES = +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +SOURCES = +DIST_SOURCES = +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__installdirs = "$(DESTDIR)$(txtdir)" +DATA = $(txt_DATA) +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +am__DIST_COMMON = $(srcdir)/Makefile.in \ + $(srcdir)/doxygen-config.cfg.in $(top_srcdir)/doc/doxy.mk +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +pkglibdir = @pkglibdir@ +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +APP_CFLAGS = @APP_CFLAGS@ +APP_CXXFLAGS = @APP_CXXFLAGS@ +APP_FCFLAGS = @APP_FCFLAGS@ +APP_FFLAGS = @APP_FFLAGS@ +AR = @AR@ +AS = @AS@ +ATLASDIR = @ATLASDIR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BLAS_LIB = @BLAS_LIB@ +BLAS_LIBS = @BLAS_LIBS@ +BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ +BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CC_OR_MPICC = @CC_OR_MPICC@ +CC_OR_NVCC = @CC_OR_NVCC@ +CFLAGS = @CFLAGS@ +COVERAGE = @COVERAGE@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CSCOPE = @CSCOPE@ +CTAGS = @CTAGS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DGELS_LIBS = @DGELS_LIBS@ +DLB_CFLAGS = @DLB_CFLAGS@ +DLB_LIBS = @DLB_LIBS@ +DLLTOOL = @DLLTOOL@ +DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +ECLIPSE = @ECLIPSE@ +EGREP = @EGREP@ +ETAGS = @ETAGS@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FC = @FC@ +FCFLAGS = @FCFLAGS@ +FFLAGS = @FFLAGS@ +FFTWF_CFLAGS = @FFTWF_CFLAGS@ +FFTWF_LIBS = @FFTWF_LIBS@ +FFTWL_CFLAGS = @FFTWL_CFLAGS@ +FFTWL_LIBS = @FFTWL_LIBS@ +FFTW_CFLAGS = @FFTW_CFLAGS@ +FFTW_LIBS = @FFTW_LIBS@ +FGREP = @FGREP@ +FILECMD = @FILECMD@ +FXTDIR = @FXTDIR@ +FXT_CFLAGS = @FXT_CFLAGS@ +FXT_LDFLAGS = @FXT_LDFLAGS@ +FXT_LIBS = @FXT_LIBS@ +GDB = @GDB@ +GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ +GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ +GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ +GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ +GOTODIR = @GOTODIR@ +GREP = @GREP@ +HAVE_CXX11 = @HAVE_CXX11@ +HAVE_FFTWFL = @HAVE_FFTWFL@ +HELP2MAN = @HELP2MAN@ +HIPCC = @HIPCC@ +HIPCCFLAGS = @HIPCCFLAGS@ +HIPCONFIG = @HIPCONFIG@ +HWLOC_CFLAGS = @HWLOC_CFLAGS@ +HWLOC_LIBS = @HWLOC_LIBS@ +HWLOC_REQUIRES = @HWLOC_REQUIRES@ +ICC = @ICC@ +ICC_ARGS = @ICC_ARGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JULIA = @JULIA@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ +LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ +LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ +LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ +LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ +LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ +LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ +LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ +LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ +LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ +LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ +LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ +LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ +LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ +LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ +LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ +LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ +LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ +LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ +LIBSTARPU_LINK = @LIBSTARPU_LINK@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAGMA_CFLAGS = @MAGMA_CFLAGS@ +MAGMA_LIBS = @MAGMA_LIBS@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPICC_LDFLAGS = @MPICC_LDFLAGS@ +MPICXX = @MPICXX@ +MPIEXEC = @MPIEXEC@ +MPIEXEC_ARGS = @MPIEXEC_ARGS@ +MPIFORT = @MPIFORT@ +MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ +MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ +NM = @NM@ +NMAD_CFLAGS = @NMAD_CFLAGS@ +NMAD_LIBS = @NMAD_LIBS@ +NMEDIT = @NMEDIT@ +NVCC = @NVCC@ +NVCCFLAGS = @NVCCFLAGS@ +NVCC_CC = @NVCC_CC@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ +OPENBLAS_LIBS = @OPENBLAS_LIBS@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PAPI_CFLAGS = @PAPI_CFLAGS@ +PAPI_LIBS = @PAPI_LIBS@ +PARALLEL = @PARALLEL@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PKG_CONFIG = @PKG_CONFIG@ +POTI_CFLAGS = @POTI_CFLAGS@ +POTI_LIBS = @POTI_LIBS@ +PROG_CLANG = @PROG_CLANG@ +PROG_DATE = @PROG_DATE@ +PROG_FIND = @PROG_FIND@ +PROG_STAT = @PROG_STAT@ +PYTHON = @PYTHON@ +PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ +PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ +PYTHON_VERSION = @PYTHON_VERSION@ +RANLIB = @RANLIB@ +REALBASH = @REALBASH@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ +SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ +SIMGRID_LIBS = @SIMGRID_LIBS@ +SIMGRID_MC = @SIMGRID_MC@ +SLIC_CONFIG = @SLIC_CONFIG@ +SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ +SOCL_VENDORS = @SOCL_VENDORS@ +STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ +STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ +STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ +STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ +STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ +STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ +STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ +STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ +STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ +STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ +STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ +STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ +STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ +STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ +STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ +STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ +STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ +STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ +STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ +STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ +STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ +STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ +STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ +STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ +STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ +STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ +STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ +STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ +STARPU_LIB_PATH = @STARPU_LIB_PATH@ +STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ +STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ +STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ +STARPU_MS_LIB = @STARPU_MS_LIB@ +STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ +STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ +STARPU_OPENBLAS = @STARPU_OPENBLAS@ +STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ +STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ +STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ +STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ +STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ +STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ +STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ +STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ +STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ +STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ +STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ +STARPU_SRC_DIR = @STARPU_SRC_DIR@ +STARPU_USE_CPU = @STARPU_USE_CPU@ +STARPU_USE_CUDA = @STARPU_USE_CUDA@ +STARPU_USE_FXT = @STARPU_USE_FXT@ +STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ +STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ +STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ +STRIP = @STRIP@ +VERSION = @VERSION@ +XMKMF = @XMKMF@ +X_CFLAGS = @X_CFLAGS@ +X_EXTRA_LIBS = @X_EXTRA_LIBS@ +X_LIBS = @X_LIBS@ +X_PRE_LIBS = @X_PRE_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_ct_F77 = @ac_ct_F77@ +ac_ct_FC = @ac_ct_FC@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +doxygencommand = @doxygencommand@ +dvidir = @dvidir@ +eclipsepath = @eclipsepath@ +epstopdfcommand = @epstopdfcommand@ +exec_prefix = @exec_prefix@ +gitcommand = @gitcommand@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +hwloccalccommand = @hwloccalccommand@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +juliapath = @juliapath@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +mpicc_path = @mpicc_path@ +mpicxx_path = @mpicxx_path@ +mpiexec_path = @mpiexec_path@ +mpifort_path = @mpifort_path@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +pdflatexcommand = @pdflatexcommand@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +runstatedir = @runstatedir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target = @target@ +target_alias = @target_alias@ +target_cpu = @target_cpu@ +target_os = @target_os@ +target_vendor = @target_vendor@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +DOX_DIR = $(top_builddir)/doc/doxygen_web_installation +DOX_CONFIG = $(top_srcdir)/doc/doxygen.cfg +DOX_MAIN_DIR = doxygen_web_installation +DOX_HTML_DIR = html_web_installation +DOX_LATEX_DIR = latex +DOX_PDF = starpu_web_installation.pdf +DOX_TAG = starpu.tag +DOX_STARPU_CONFIG = starpu_config.h + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +DOXYGEN = doxygen +PDFLATEX = pdflatex +MAKEINDEX = makeindex +txtdir = $(docdir)/manual +EXTRA_DIST = $(am__append_1) $(am__append_2) $(am__append_3) \ + $(am__append_4) $(am__append_5) refman.tex $(chapters) \ + $(images) +@STARPU_AVAILABLE_DOC_PDF_TRUE@@STARPU_BUILD_DOC_FALSE@txt_DATA = $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$(DOX_PDF) +@STARPU_BUILD_DOC_PDF_TRUE@@STARPU_BUILD_DOC_TRUE@txt_DATA = $(DOX_DIR)/$(DOX_PDF) +@STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@DOX_HTML_SRCDIR = $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$(DOX_HTML_DIR) +@STARPU_BUILD_DOC_TRUE@DOX_HTML_SRCDIR = $(DOX_HTML_DIR) +@STARPU_BUILD_DOC_TRUE@CLEANFILES = $(DOX_TAG) $(DOX_STARPU_CONFIG) \ +@STARPU_BUILD_DOC_TRUE@ -r \ +@STARPU_BUILD_DOC_TRUE@ $(DOX_HTML_DIR) \ +@STARPU_BUILD_DOC_TRUE@ $(DOX_LATEX_DIR) \ +@STARPU_BUILD_DOC_TRUE@ $(DOX_DIR)/$(DOX_PDF) + +chapters = \ + ../doxygen/chapters/foreword.doxy \ + ../doxygen/chapters/starpu_installation/installation_intro.doxy \ + ../doxygen/chapters/starpu_installation/environment_variables.doxy \ + ../doxygen/chapters/starpu_installation/building.doxy \ + ../doxygen/chapters/starpu_installation/configure_options.doxy \ + ../doxygen/chapters/starpu_installation/configuration_and_initialization.doxy \ + ../doxygen/chapters/files.doxy \ + ../doxygen/chapters/fdl_1_3.doxy + +images = \ + ../doxygen/chapters/images/arbiter.png \ + ../doxygen/chapters/images/data_trace.png \ + ../doxygen/chapters/images/distrib_data.png \ + ../doxygen/chapters/images/distrib_data_histo.png \ + ../doxygen/chapters/images/paje_draw_histogram.png \ + ../doxygen/chapters/images/parallel_worker2.png \ + ../doxygen/chapters/images/runtime-par.png \ + ../doxygen/chapters/images/starpu_non_linear_memset_regression_based.png \ + ../doxygen/chapters/images/starpu_non_linear_memset_regression_based_2.png \ + ../doxygen/chapters/images/starpu_starpu_slu_lu_model_11.png \ + ../doxygen/chapters/images/starpu_chol_model_11_type.png \ + ../doxygen/chapters/images/tasks_size_overhead.png \ + ../doxygen/chapters/images/temanejo.png \ + ../doxygen/chapters/images/eclipse_installer.png \ + ../doxygen/chapters/images/eclipse_install_cdt.png \ + ../doxygen/chapters/images/eclipse_hello_build.png \ + ../doxygen/chapters/images/eclipse_hello_run.png \ + ../doxygen/chapters/images/eclipse_hello_fxt.png \ + ../doxygen/chapters/images/eclipse_hello_graph.png \ + ../doxygen/chapters/images/eclipse_hello_vite.png \ + ../doxygen/chapters/images/eclipse_hello_svg_graph.png \ + ../doxygen/chapters/images/eclipse_hello_plugin.png \ + ../doxygen/chapters/images/eclipse_hello_paje_trace.png \ + ../doxygen/chapters/images/eclipse_hello_hgraph.png \ + ../doxygen/chapters/images/eclipse_install_pde.png \ + ../doxygen/chapters/images/starpu_gflops_non_linear_memset_regression_based_energy.png \ + ../doxygen/chapters/images/starpu_log_arr.png \ + ../doxygen/chapters/images/starpu_log_list.png \ + ../doxygen/chapters/images/starpu_non_linear_memset_regression_based_energy.png \ + ../doxygen/chapters/images/starpu_power_non_linear_memset_regression_based.png \ + ../doxygen/chapters/images/starvz_visu.png \ + ../doxygen/chapters/images/starvz_visu_r.png \ + ../doxygen/chapters/images/trace_bw_heatmap.png \ + ../doxygen/chapters/images/trace_recv_use.png \ + ../doxygen/chapters/images/trace_send_use.png \ + ../doxygen/chapters/images/trace_volume_heatmap.png \ + ../doxygen/chapters/images/starpupy_handle_func_perf_pickle.png \ + ../doxygen/chapters/images/starpupy_handle_perf_pickle.png \ + ../doxygen/chapters/images/starpupy_handle_func_perf.png \ + ../doxygen/chapters/images/starpupy_handle_perf.png \ + ../doxygen/chapters/images/tasks_size_overhead_py_fut_pickle.png \ + ../doxygen/chapters/images/tasks_size_overhead_py_futur.png \ + ../doxygen/chapters/images/tasks_size_overhead_py_handle_pickle.png \ + ../doxygen/chapters/images/tasks_size_overhead_py_handle.png \ + ../doxygen/chapters/images/tasks_size_overhead_py_none.png \ + ../doxygen/chapters/images/tasks_size_overhead_py_noret_pickle.png + +@STARPU_BUILD_DOC_TRUE@dox_inputs = $(DOX_CONFIG) \ +@STARPU_BUILD_DOC_TRUE@ $(chapters) \ +@STARPU_BUILD_DOC_TRUE@ starpu_config.h \ +@STARPU_BUILD_DOC_TRUE@ chapters/version.sty \ +@STARPU_BUILD_DOC_TRUE@ chapters/version.html + +all: all-am + +.SUFFIXES: +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/doc/doxy.mk $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign doc/doxygen_web_installation/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign doc/doxygen_web_installation/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; +$(top_srcdir)/doc/doxy.mk $(am__empty): + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): +doxygen-config.cfg: $(top_builddir)/config.status $(srcdir)/doxygen-config.cfg.in + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs +install-txtDATA: $(txt_DATA) + @$(NORMAL_INSTALL) + @list='$(txt_DATA)'; test -n "$(txtdir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(txtdir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(txtdir)" || exit 1; \ + fi; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; \ + done | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(txtdir)'"; \ + $(INSTALL_DATA) $$files "$(DESTDIR)$(txtdir)" || exit $$?; \ + done + +uninstall-txtDATA: + @$(NORMAL_UNINSTALL) + @list='$(txt_DATA)'; test -n "$(txtdir)" || list=; \ + files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ + dir='$(DESTDIR)$(txtdir)'; $(am__uninstall_files_from_dir) +tags TAGS: + +ctags CTAGS: + +cscope cscopelist: + +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile $(DATA) +installdirs: + for dir in "$(DESTDIR)$(txtdir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +@STARPU_AVAILABLE_DOC_FALSE@@STARPU_BUILD_DOC_FALSE@install-exec-hook: +@STARPU_AVAILABLE_DOC_FALSE@@STARPU_BUILD_DOC_FALSE@uninstall-hook: +clean: clean-am + +clean-am: clean-generic clean-libtool mostlyclean-am + +distclean: distclean-am + -rm -f Makefile +distclean-am: clean-am distclean-generic + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: install-txtDATA + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: + @$(NORMAL_INSTALL) + $(MAKE) $(AM_MAKEFLAGS) install-exec-hook +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-generic mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-txtDATA + @$(NORMAL_INSTALL) + $(MAKE) $(AM_MAKEFLAGS) uninstall-hook +.MAKE: install-am install-exec-am install-strip uninstall-am + +.PHONY: all all-am check check-am clean clean-generic clean-libtool \ + cscopelist-am ctags-am distclean distclean-generic \ + distclean-libtool distdir dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-exec-hook \ + install-html install-html-am install-info install-info-am \ + install-man install-pdf install-pdf-am install-ps \ + install-ps-am install-strip install-txtDATA installcheck \ + installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-generic \ + mostlyclean-libtool pdf pdf-am ps ps-am tags-am uninstall \ + uninstall-am uninstall-hook uninstall-txtDATA + +.PRECIOUS: Makefile + + +@STARPU_BUILD_DOC_PDF_TRUE@@STARPU_BUILD_DOC_TRUE@all: $(DOX_HTML_DIR) $(DOX_DIR)/$(DOX_PDF) +@STARPU_BUILD_DOC_PDF_FALSE@@STARPU_BUILD_DOC_TRUE@all: $(DOX_HTML_DIR) +@STARPU_BUILD_DOC_TRUE@install-exec-hook: $(DOX_HTML_DIR) +@STARPU_BUILD_DOC_TRUE@ @$(MKDIR_P) $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) +@STARPU_BUILD_DOC_TRUE@ @(cd $(DOX_HTML_SRCDIR) && $(PROG_FIND) . -type f -exec $(INSTALL_DATA) {} $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) \;) +@STARPU_BUILD_DOC_TRUE@uninstall-hook: +@STARPU_BUILD_DOC_TRUE@ @rm -rf $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) +@STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@install-exec-hook: +@STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@ @$(MKDIR_P) $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) +@STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@ @(cd $(DOX_HTML_SRCDIR) && $(PROG_FIND) . -type f -exec $(INSTALL_DATA) {} $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) \;) +@STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@uninstall-hook: +@STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@ @rm -rf $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) + +@STARPU_BUILD_DOC_TRUE@chapters/version.sty: $(chapters) +@STARPU_BUILD_DOC_TRUE@ $(MKDIR_P) $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters +@STARPU_BUILD_DOC_TRUE@ @for f in $(chapters) ; do \ +@STARPU_BUILD_DOC_TRUE@ if test -f $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$$f ; then $(PROG_STAT) --format=%Y $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$$f ; fi \ +@STARPU_BUILD_DOC_TRUE@ done | sort -r | head -1 > timestamp_sty +@STARPU_BUILD_DOC_TRUE@ @if test -s timestamp_sty ; then \ +@STARPU_BUILD_DOC_TRUE@ LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_sty` +"%F" > timestamp_sty_updated ;\ +@STARPU_BUILD_DOC_TRUE@ LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_sty` +"%B %Y" > timestamp_sty_updated_month ;\ +@STARPU_BUILD_DOC_TRUE@ fi +@STARPU_BUILD_DOC_TRUE@ @if test -s timestamp_sty_updated ; then \ +@STARPU_BUILD_DOC_TRUE@ echo ':newcommand{:STARPUUPDATED}{'`cat timestamp_sty_updated`'}' > $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty;\ +@STARPU_BUILD_DOC_TRUE@ else \ +@STARPU_BUILD_DOC_TRUE@ echo ':newcommand{:STARPUUPDATED}{unknown date}' > $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty;\ +@STARPU_BUILD_DOC_TRUE@ fi +@STARPU_BUILD_DOC_TRUE@ @echo ':newcommand{:STARPUVERSION}{$(VERSION)}' >> $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty +@STARPU_BUILD_DOC_TRUE@ @$(SED) -i 's/:/\\/g' $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty +@STARPU_BUILD_DOC_TRUE@ @for f in timestamp_sty timestamp_sty_updated timestamp_sty_updated_month ; do \ +@STARPU_BUILD_DOC_TRUE@ if test -f $$f ; then $(RM) $$f ; fi ;\ +@STARPU_BUILD_DOC_TRUE@ done + +@STARPU_BUILD_DOC_TRUE@chapters/version.html: $(chapters) $(images) +@STARPU_BUILD_DOC_TRUE@ @for f in $(chapters) ; do \ +@STARPU_BUILD_DOC_TRUE@ if test -f $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$$f ; then $(PROG_STAT) --format=%Y $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$$f ; fi \ +@STARPU_BUILD_DOC_TRUE@ done | sort -r | head -1 > timestamp_html +@STARPU_BUILD_DOC_TRUE@ @if test -s timestamp_html ; then \ +@STARPU_BUILD_DOC_TRUE@ LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_html` +"%F" > timestamp_html_updated ;\ +@STARPU_BUILD_DOC_TRUE@ LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_html` +"%B %Y" > timestamp_html_updated_month ;\ +@STARPU_BUILD_DOC_TRUE@ fi +@STARPU_BUILD_DOC_TRUE@ @echo "This manual documents the version $(VERSION) of StarPU." > $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.html +@STARPU_BUILD_DOC_TRUE@ @if test -s timestamp_html_updated ; then \ +@STARPU_BUILD_DOC_TRUE@ echo "Its contents was last updated on "`cat timestamp_html_updated`"." >> $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.html;\ +@STARPU_BUILD_DOC_TRUE@ else \ +@STARPU_BUILD_DOC_TRUE@ echo "Its contents was last updated on unknown_date." >> $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.html;\ +@STARPU_BUILD_DOC_TRUE@ fi +@STARPU_BUILD_DOC_TRUE@ @for f in timestamp_html timestamp_html_updated timestamp_html_updated_month ; do \ +@STARPU_BUILD_DOC_TRUE@ if test -f $$f ; then $(RM) $$f ; fi ;\ +@STARPU_BUILD_DOC_TRUE@ done + +@STARPU_BUILD_DOC_TRUE@doxy: +@STARPU_BUILD_DOC_TRUE@ @rm -fr $(DOX_HTML_DIR) $(DOX_LATEX_DIR) +@STARPU_BUILD_DOC_TRUE@ @$(DOXYGEN) $(DOX_CONFIG) + +@STARPU_BUILD_DOC_TRUE@$(DOX_HTML_DIR): $(DOX_TAG) +@STARPU_BUILD_DOC_TRUE@ @$(MKDIR_P) $(DOX_HTML_DIR) + +@STARPU_BUILD_DOC_TRUE@$(DOX_TAG): $(dox_inputs) +@STARPU_BUILD_DOC_TRUE@ @rm -fr $(DOX_HTML_DIR) $(DOX_LATEX_DIR) +@STARPU_BUILD_DOC_TRUE@ @$(DOXYGEN) $(DOX_CONFIG) +@STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_HTML_DIR)/DocOrganization.html ; then $(SED) -i 's/ModuleDocumentation <\/li>/Modules<\/a>/' $(DOX_HTML_DIR)/DocOrganization.html ; fi +@STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_HTML_DIR)/DocOrganization.html ; then $(SED) -i 's/FileDocumentation <\/li>/Files<\/a>/' $(DOX_HTML_DIR)/DocOrganization.html ; fi +@STARPU_BUILD_DOC_TRUE@ # comment for the line below: what we really want to do is to remove the line, but dy doing so, it avoids opening the interactive menu when browsing files +@STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_HTML_DIR)/navtreedata.js ; then $(SED) -i 's/\[ "Files", "Files.html", null \]/\[ "", "Files.html", null \]/' $(DOX_HTML_DIR)/navtreedata.js ; fi +@STARPU_BUILD_DOC_TRUE@ @$(SED) -i 's/.*"Files.html".*//' $(DOX_HTML_DIR)/pages.html +@STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_LATEX_DIR)/main.tex ; then mv $(DOX_LATEX_DIR)/main.tex $(DOX_LATEX_DIR)/index.tex ; fi +@STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_LATEX_DIR)/refman.tex ; then $(SED) -i '/\\begin{titlepage}/,$$d' $(DOX_LATEX_DIR)/refman.tex ; fi +@STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_LATEX_DIR)/refman.tex ; then cat $(top_srcdir)/doc/$(DOX_MAIN_DIR)/refman.tex >> $(DOX_LATEX_DIR)/refman.tex ; fi +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/doc/sectionNumbering.py $(top_builddir)/doc/$(DOX_MAIN_DIR) $(DOX_HTML_DIR) + +@STARPU_BUILD_DOC_TRUE@$(DOX_DIR)/$(DOX_PDF): $(DOX_TAG) refman.tex $(images) +@STARPU_BUILD_DOC_TRUE@ $(MKDIR_P) $(DOX_LATEX_DIR) +@STARPU_BUILD_DOC_TRUE@ @cp $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty $(DOX_LATEX_DIR) +@STARPU_BUILD_DOC_TRUE@ @cp $(top_srcdir)/doc/title.tex $(DOX_LATEX_DIR) +@STARPU_BUILD_DOC_TRUE@ @if test -f $(top_srcdir)/doc/$(DOX_MAIN_DIR)/modules.tex ; then cp $(top_srcdir)/doc/$(DOX_MAIN_DIR)/modules.tex $(DOX_LATEX_DIR) ; fi +@STARPU_BUILD_DOC_TRUE@ @echo $(PDFLATEX) $(DOX_LATEX_DIR)/refman.tex +@STARPU_BUILD_DOC_TRUE@ @cd $(DOX_LATEX_DIR) ;\ +@STARPU_BUILD_DOC_TRUE@ rm -f *.aux *.toc *.idx *.ind *.ilg *.log *.out ;\ +@STARPU_BUILD_DOC_TRUE@ for f in group__API__* ; do sed -i '1 i \\\clearpage' $$f ; done ;\ +@STARPU_BUILD_DOC_TRUE@ if test -f ExecutionConfigurationThroughEnvironmentVariables.tex ; then $(SED) -i -e 's/__env__/\\_Environment Variables!/' -e 's/\\-\\_\\-\\-\\_\\-env\\-\\_\\-\\-\\_\\-//' ExecutionConfigurationThroughEnvironmentVariables.tex ; fi ;\ +@STARPU_BUILD_DOC_TRUE@ if test -f CompilationConfiguration.tex ; then $(SED) -i -e 's/__configure__/\\_Configure Options!/' -e 's/\\-\\_\\-\\-\\_\\-configure\\-\\_\\-\\-\\_\\-//' CompilationConfiguration.tex ; fi ;\ +@STARPU_BUILD_DOC_TRUE@ if test -f DocOrganization.tex ; then $(SED) -i s'/\\item Module\\.Documentation/\\item \\hyperlink{ModuleDocumentation}{Module Documentation}/' DocOrganization.tex ; fi ;\ +@STARPU_BUILD_DOC_TRUE@ if test -f DocOrganization.tex ; then $(SED) -i s'/\\item File\\.Documentation/\\item \\hyperlink{FileDocumentation}{File Documentation}/' DocOrganization.tex ; fi ;\ +@STARPU_BUILD_DOC_TRUE@ max_print_line=1000000 $(PDFLATEX) -interaction batchmode refman.tex ;\ +@STARPU_BUILD_DOC_TRUE@ ! < refman.log grep -v group__ | grep -v _amgrp | grep -v deprecated__ | grep "multiply defined" || exit 1 ;\ +@STARPU_BUILD_DOC_TRUE@ $(MAKEINDEX) refman.idx ;\ +@STARPU_BUILD_DOC_TRUE@ max_print_line=1000000 $(PDFLATEX) -interaction batchmode refman.tex ;\ +@STARPU_BUILD_DOC_TRUE@ for i in $(shell seq 1 5); do \ +@STARPU_BUILD_DOC_TRUE@ if $(EGREP) 'Rerun (LaTeX|to get cross-references right)' refman.log > /dev/null 2>&1; then \ +@STARPU_BUILD_DOC_TRUE@ max_print_line=1000000 $(PDFLATEX) -interaction batchmode refman.tex; \ +@STARPU_BUILD_DOC_TRUE@ else \ +@STARPU_BUILD_DOC_TRUE@ break ; \ +@STARPU_BUILD_DOC_TRUE@ fi; \ +@STARPU_BUILD_DOC_TRUE@ done +@STARPU_BUILD_DOC_TRUE@ mv $(DOX_LATEX_DIR)/refman.pdf $(DOX_DIR)/$(DOX_PDF) + +@STARPU_BUILD_DOC_TRUE@starpu_config.h: $(top_srcdir)/include/starpu_config.h.in +@STARPU_BUILD_DOC_TRUE@ @$(SED) 's/#undef \(.*\)/#define \1 1/' $< > $@ + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/doc/doxygen_web_installation/chapters/version.html b/doc/doxygen_web_installation/chapters/version.html new file mode 100644 index 0000000..f504c7a --- /dev/null +++ b/doc/doxygen_web_installation/chapters/version.html @@ -0,0 +1,2 @@ +This manual documents the version 1.4.10 of StarPU. +Its contents was last updated on 2025-12-03. diff --git a/doc/doxygen_web_installation/chapters/version.sty b/doc/doxygen_web_installation/chapters/version.sty new file mode 100644 index 0000000..7527bba --- /dev/null +++ b/doc/doxygen_web_installation/chapters/version.sty @@ -0,0 +1,2 @@ +\newcommand{\STARPUUPDATED}{2025-12-03} +\newcommand{\STARPUVERSION}{1.4.10} diff --git a/doc/doxygen_web_installation/doxygen-config.cfg.in b/doc/doxygen_web_installation/doxygen-config.cfg.in new file mode 100644 index 0000000..b71997d --- /dev/null +++ b/doc/doxygen_web_installation/doxygen-config.cfg.in @@ -0,0 +1,45 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# Copyright (C) 2013-2013 Simon Archipoff +# Copyright (C) 2011-2011 Télécom Sud Paris +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +INPUT = @top_srcdir@/doc/doxygen/chapters/starpu_installation/installation_intro.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_installation/building.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_installation/configure_options.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_installation/environment_variables.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_installation/configuration_and_initialization.doxy \ + @top_srcdir@/doc/doxygen/chapters/fdl_1_3.doxy \ + @top_srcdir@/doc/doxygen/chapters/files.doxy \ + @top_srcdir@/doc/doxygen/chapters/api + +EXAMPLE_PATH = @top_srcdir@/doc/doxygen \ + @top_srcdir@/doc/doxygen/chapters + +INPUT_FILTER = @top_builddir@/doc/doxygen/doxygen_filter.sh + +#LATEX_HEADER = @top_srcdir@/doc/doxygen/refman.tex + +IMAGE_PATH = @top_srcdir@/doc/doxygen/chapters/images + +GENERATE_LATEX = @DOC_GENERATE_LATEX@ + +@INCLUDE_PATH = ../../doc/doxygen/ + +HTML_OUTPUT = html_web_installation + +PROJECT_NAME = "StarPU Handbook - StarPU Installation" +ALIASES += "intropage{2} = \mainpage" +ALIASES += "webforeword = \htmlonly

    Foreword

    \endhtmlonly \htmlinclude version.html \htmlinclude foreword.html \htmlonly This is a sub-part of the StarPU documentation, go here to read the whole documentation. \endhtmlonly" +ALIASES += "foreword = " diff --git a/doc/doxygen_web_installation/refman.tex b/doc/doxygen_web_installation/refman.tex new file mode 100644 index 0000000..2d9d43f --- /dev/null +++ b/doc/doxygen_web_installation/refman.tex @@ -0,0 +1,57 @@ +% StarPU --- Runtime system for heterogeneous multicore architectures. +% +% Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +% Copyright (C) 2013-2013 Simon Archipoff +% +% StarPU is free software; you can redistribute it and/or modify +% it under the terms of the GNU Lesser General Public License as published by +% the Free Software Foundation; either version 2.1 of the License, or (at +% your option) any later version. +% +% StarPU is distributed in the hope that it will be useful, but +% WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +% +% See the GNU Lesser General Public License in COPYING.LGPL for more details. +% +\newcommand\starputitle{StarPU Handbook - StarPU Installation} +\setcounter{tocdepth}{2} +\input{./title.tex} + +\chapter{Organization} +\label{index} +\hypertarget{index}{} +\input{index} + +\chapter{Building and Installing StarPU} +\label{BuildingAndInstallingStarPU} +\hypertarget{BuildingAndInstallingStarPU}{} +\input{BuildingAndInstallingStarPU} + +\chapter{Compilation Configuration} +\label{CompilationConfiguration} +\hypertarget{CompilationConfiguration}{} +\input{CompilationConfiguration} + +\chapter{Execution Configuration Through Environment Variables} +\label{ExecutionConfigurationThroughEnvironmentVariables} +\hypertarget{ExecutionConfigurationThroughEnvironmentVariables}{} +\input{ExecutionConfigurationThroughEnvironmentVariables} + +\chapter{Configuration and initialization} +\label{ConfigurationAndInitialization} +\hypertarget{ConfigurationAndInitialization}{} +\input{ConfigurationAndInitialization} + +\part{Appendix} + +\chapter{The GNU Free Documentation License} +\label{GNUFreeDocumentationLicense} +\hypertarget{GNUFreeDocumentationLicense}{} +\input{GNUFreeDocumentationLicense} + +%\part{Index} +%#\addcontentsline{toc}{chapter}{Index} +%\printindex + +\end{document} diff --git a/doc/doxygen_web_introduction/Makefile.am b/doc/doxygen_web_introduction/Makefile.am new file mode 100644 index 0000000..1f96c3b --- /dev/null +++ b/doc/doxygen_web_introduction/Makefile.am @@ -0,0 +1,94 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +DOX_DIR = $(top_builddir)/doc/doxygen_web_introduction +DOX_CONFIG = $(top_srcdir)/doc/doxygen.cfg + +DOX_MAIN_DIR = doxygen_web_introduction +DOX_HTML_DIR = html_web_introduction +DOX_LATEX_DIR = latex +DOX_PDF = starpu_web_introduction.pdf +DOX_TAG = starpu.tag +DOX_STARPU_CONFIG = starpu_config.h + +include $(top_srcdir)/doc/doxy.mk + +chapters = \ + ../doxygen/chapters/foreword.doxy \ + ../doxygen/chapters/starpu_introduction/introduction_intro.doxy \ + ../doxygen/chapters/starpu_introduction/glossary.doxy \ + ../doxygen/chapters/files.doxy \ + ../doxygen/chapters/fdl_1_3.doxy + +images = \ + ../doxygen/chapters/images/arbiter.png \ + ../doxygen/chapters/images/data_trace.png \ + ../doxygen/chapters/images/distrib_data.png \ + ../doxygen/chapters/images/distrib_data_histo.png \ + ../doxygen/chapters/images/paje_draw_histogram.png \ + ../doxygen/chapters/images/parallel_worker2.png \ + ../doxygen/chapters/images/runtime-par.png \ + ../doxygen/chapters/images/starpu_non_linear_memset_regression_based.png \ + ../doxygen/chapters/images/starpu_non_linear_memset_regression_based_2.png \ + ../doxygen/chapters/images/starpu_starpu_slu_lu_model_11.png \ + ../doxygen/chapters/images/starpu_chol_model_11_type.png \ + ../doxygen/chapters/images/tasks_size_overhead.png \ + ../doxygen/chapters/images/temanejo.png \ + ../doxygen/chapters/images/eclipse_installer.png \ + ../doxygen/chapters/images/eclipse_install_cdt.png \ + ../doxygen/chapters/images/eclipse_hello_build.png \ + ../doxygen/chapters/images/eclipse_hello_run.png \ + ../doxygen/chapters/images/eclipse_hello_fxt.png \ + ../doxygen/chapters/images/eclipse_hello_graph.png \ + ../doxygen/chapters/images/eclipse_hello_vite.png \ + ../doxygen/chapters/images/eclipse_hello_svg_graph.png \ + ../doxygen/chapters/images/eclipse_hello_plugin.png \ + ../doxygen/chapters/images/eclipse_hello_paje_trace.png \ + ../doxygen/chapters/images/eclipse_hello_hgraph.png \ + ../doxygen/chapters/images/eclipse_install_pde.png \ + ../doxygen/chapters/images/starpu_gflops_non_linear_memset_regression_based_energy.png \ + ../doxygen/chapters/images/starpu_log_arr.png \ + ../doxygen/chapters/images/starpu_log_list.png \ + ../doxygen/chapters/images/starpu_non_linear_memset_regression_based_energy.png \ + ../doxygen/chapters/images/starpu_power_non_linear_memset_regression_based.png \ + ../doxygen/chapters/images/starvz_visu.png \ + ../doxygen/chapters/images/starvz_visu_r.png \ + ../doxygen/chapters/images/trace_bw_heatmap.png \ + ../doxygen/chapters/images/trace_recv_use.png \ + ../doxygen/chapters/images/trace_send_use.png \ + ../doxygen/chapters/images/trace_volume_heatmap.png \ + ../doxygen/chapters/images/starpupy_handle_func_perf_pickle.png \ + ../doxygen/chapters/images/starpupy_handle_perf_pickle.png \ + ../doxygen/chapters/images/starpupy_handle_func_perf.png \ + ../doxygen/chapters/images/starpupy_handle_perf.png \ + ../doxygen/chapters/images/tasks_size_overhead_py_fut_pickle.png \ + ../doxygen/chapters/images/tasks_size_overhead_py_futur.png \ + ../doxygen/chapters/images/tasks_size_overhead_py_handle_pickle.png \ + ../doxygen/chapters/images/tasks_size_overhead_py_handle.png \ + ../doxygen/chapters/images/tasks_size_overhead_py_none.png \ + ../doxygen/chapters/images/tasks_size_overhead_py_noret_pickle.png + +if STARPU_BUILD_DOC +starpu_config.h: $(top_srcdir)/include/starpu_config.h.in + @$(SED) 's/#undef \(.*\)/#define \1 1/' $< > $@ + +dox_inputs = $(DOX_CONFIG) \ + $(chapters) \ + starpu_config.h \ + chapters/version.sty \ + chapters/version.html +endif + diff --git a/doc/doxygen_web_introduction/Makefile.in b/doc/doxygen_web_introduction/Makefile.in new file mode 100644 index 0000000..079950d --- /dev/null +++ b/doc/doxygen_web_introduction/Makefile.in @@ -0,0 +1,932 @@ +# Makefile.in generated by automake 1.16.5 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2021 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +target_triplet = @target@ +@STARPU_BUILD_DOC_PDF_TRUE@@STARPU_BUILD_DOC_TRUE@am__append_1 = $(DOX_HTML_DIR) $(DOX_DIR)/$(DOX_PDF) +@STARPU_BUILD_DOC_PDF_FALSE@@STARPU_BUILD_DOC_TRUE@am__append_2 = $(DOX_HTML_DIR) +@STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@am__append_3 = $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$(DOX_HTML_DIR) +@STARPU_AVAILABLE_DOC_PDF_TRUE@@STARPU_BUILD_DOC_FALSE@am__append_4 = $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$(DOX_PDF) +@STARPU_BUILD_DOC_TRUE@am__append_5 = \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.html + +subdir = doc/doxygen_web_introduction +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ + $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ + $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ + $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/src/common/config.h \ + $(top_builddir)/src/common/config-src-build.h \ + $(top_builddir)/include/starpu_config.h \ + $(top_builddir)/starpurm/include/starpurm_config.h +CONFIG_CLEAN_FILES = doxygen-config.cfg +CONFIG_CLEAN_VPATH_FILES = +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +SOURCES = +DIST_SOURCES = +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__installdirs = "$(DESTDIR)$(txtdir)" +DATA = $(txt_DATA) +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +am__DIST_COMMON = $(srcdir)/Makefile.in \ + $(srcdir)/doxygen-config.cfg.in $(top_srcdir)/doc/doxy.mk +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +pkglibdir = @pkglibdir@ +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +APP_CFLAGS = @APP_CFLAGS@ +APP_CXXFLAGS = @APP_CXXFLAGS@ +APP_FCFLAGS = @APP_FCFLAGS@ +APP_FFLAGS = @APP_FFLAGS@ +AR = @AR@ +AS = @AS@ +ATLASDIR = @ATLASDIR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BLAS_LIB = @BLAS_LIB@ +BLAS_LIBS = @BLAS_LIBS@ +BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ +BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CC_OR_MPICC = @CC_OR_MPICC@ +CC_OR_NVCC = @CC_OR_NVCC@ +CFLAGS = @CFLAGS@ +COVERAGE = @COVERAGE@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CSCOPE = @CSCOPE@ +CTAGS = @CTAGS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DGELS_LIBS = @DGELS_LIBS@ +DLB_CFLAGS = @DLB_CFLAGS@ +DLB_LIBS = @DLB_LIBS@ +DLLTOOL = @DLLTOOL@ +DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +ECLIPSE = @ECLIPSE@ +EGREP = @EGREP@ +ETAGS = @ETAGS@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FC = @FC@ +FCFLAGS = @FCFLAGS@ +FFLAGS = @FFLAGS@ +FFTWF_CFLAGS = @FFTWF_CFLAGS@ +FFTWF_LIBS = @FFTWF_LIBS@ +FFTWL_CFLAGS = @FFTWL_CFLAGS@ +FFTWL_LIBS = @FFTWL_LIBS@ +FFTW_CFLAGS = @FFTW_CFLAGS@ +FFTW_LIBS = @FFTW_LIBS@ +FGREP = @FGREP@ +FILECMD = @FILECMD@ +FXTDIR = @FXTDIR@ +FXT_CFLAGS = @FXT_CFLAGS@ +FXT_LDFLAGS = @FXT_LDFLAGS@ +FXT_LIBS = @FXT_LIBS@ +GDB = @GDB@ +GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ +GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ +GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ +GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ +GOTODIR = @GOTODIR@ +GREP = @GREP@ +HAVE_CXX11 = @HAVE_CXX11@ +HAVE_FFTWFL = @HAVE_FFTWFL@ +HELP2MAN = @HELP2MAN@ +HIPCC = @HIPCC@ +HIPCCFLAGS = @HIPCCFLAGS@ +HIPCONFIG = @HIPCONFIG@ +HWLOC_CFLAGS = @HWLOC_CFLAGS@ +HWLOC_LIBS = @HWLOC_LIBS@ +HWLOC_REQUIRES = @HWLOC_REQUIRES@ +ICC = @ICC@ +ICC_ARGS = @ICC_ARGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JULIA = @JULIA@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ +LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ +LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ +LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ +LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ +LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ +LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ +LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ +LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ +LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ +LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ +LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ +LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ +LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ +LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ +LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ +LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ +LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ +LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ +LIBSTARPU_LINK = @LIBSTARPU_LINK@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAGMA_CFLAGS = @MAGMA_CFLAGS@ +MAGMA_LIBS = @MAGMA_LIBS@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPICC_LDFLAGS = @MPICC_LDFLAGS@ +MPICXX = @MPICXX@ +MPIEXEC = @MPIEXEC@ +MPIEXEC_ARGS = @MPIEXEC_ARGS@ +MPIFORT = @MPIFORT@ +MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ +MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ +NM = @NM@ +NMAD_CFLAGS = @NMAD_CFLAGS@ +NMAD_LIBS = @NMAD_LIBS@ +NMEDIT = @NMEDIT@ +NVCC = @NVCC@ +NVCCFLAGS = @NVCCFLAGS@ +NVCC_CC = @NVCC_CC@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ +OPENBLAS_LIBS = @OPENBLAS_LIBS@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PAPI_CFLAGS = @PAPI_CFLAGS@ +PAPI_LIBS = @PAPI_LIBS@ +PARALLEL = @PARALLEL@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PKG_CONFIG = @PKG_CONFIG@ +POTI_CFLAGS = @POTI_CFLAGS@ +POTI_LIBS = @POTI_LIBS@ +PROG_CLANG = @PROG_CLANG@ +PROG_DATE = @PROG_DATE@ +PROG_FIND = @PROG_FIND@ +PROG_STAT = @PROG_STAT@ +PYTHON = @PYTHON@ +PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ +PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ +PYTHON_VERSION = @PYTHON_VERSION@ +RANLIB = @RANLIB@ +REALBASH = @REALBASH@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ +SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ +SIMGRID_LIBS = @SIMGRID_LIBS@ +SIMGRID_MC = @SIMGRID_MC@ +SLIC_CONFIG = @SLIC_CONFIG@ +SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ +SOCL_VENDORS = @SOCL_VENDORS@ +STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ +STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ +STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ +STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ +STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ +STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ +STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ +STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ +STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ +STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ +STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ +STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ +STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ +STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ +STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ +STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ +STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ +STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ +STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ +STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ +STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ +STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ +STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ +STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ +STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ +STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ +STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ +STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ +STARPU_LIB_PATH = @STARPU_LIB_PATH@ +STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ +STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ +STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ +STARPU_MS_LIB = @STARPU_MS_LIB@ +STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ +STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ +STARPU_OPENBLAS = @STARPU_OPENBLAS@ +STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ +STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ +STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ +STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ +STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ +STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ +STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ +STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ +STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ +STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ +STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ +STARPU_SRC_DIR = @STARPU_SRC_DIR@ +STARPU_USE_CPU = @STARPU_USE_CPU@ +STARPU_USE_CUDA = @STARPU_USE_CUDA@ +STARPU_USE_FXT = @STARPU_USE_FXT@ +STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ +STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ +STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ +STRIP = @STRIP@ +VERSION = @VERSION@ +XMKMF = @XMKMF@ +X_CFLAGS = @X_CFLAGS@ +X_EXTRA_LIBS = @X_EXTRA_LIBS@ +X_LIBS = @X_LIBS@ +X_PRE_LIBS = @X_PRE_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_ct_F77 = @ac_ct_F77@ +ac_ct_FC = @ac_ct_FC@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +doxygencommand = @doxygencommand@ +dvidir = @dvidir@ +eclipsepath = @eclipsepath@ +epstopdfcommand = @epstopdfcommand@ +exec_prefix = @exec_prefix@ +gitcommand = @gitcommand@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +hwloccalccommand = @hwloccalccommand@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +juliapath = @juliapath@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +mpicc_path = @mpicc_path@ +mpicxx_path = @mpicxx_path@ +mpiexec_path = @mpiexec_path@ +mpifort_path = @mpifort_path@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +pdflatexcommand = @pdflatexcommand@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +runstatedir = @runstatedir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target = @target@ +target_alias = @target_alias@ +target_cpu = @target_cpu@ +target_os = @target_os@ +target_vendor = @target_vendor@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +DOX_DIR = $(top_builddir)/doc/doxygen_web_introduction +DOX_CONFIG = $(top_srcdir)/doc/doxygen.cfg +DOX_MAIN_DIR = doxygen_web_introduction +DOX_HTML_DIR = html_web_introduction +DOX_LATEX_DIR = latex +DOX_PDF = starpu_web_introduction.pdf +DOX_TAG = starpu.tag +DOX_STARPU_CONFIG = starpu_config.h + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +DOXYGEN = doxygen +PDFLATEX = pdflatex +MAKEINDEX = makeindex +txtdir = $(docdir)/manual +EXTRA_DIST = $(am__append_1) $(am__append_2) $(am__append_3) \ + $(am__append_4) $(am__append_5) refman.tex $(chapters) \ + $(images) +@STARPU_AVAILABLE_DOC_PDF_TRUE@@STARPU_BUILD_DOC_FALSE@txt_DATA = $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$(DOX_PDF) +@STARPU_BUILD_DOC_PDF_TRUE@@STARPU_BUILD_DOC_TRUE@txt_DATA = $(DOX_DIR)/$(DOX_PDF) +@STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@DOX_HTML_SRCDIR = $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$(DOX_HTML_DIR) +@STARPU_BUILD_DOC_TRUE@DOX_HTML_SRCDIR = $(DOX_HTML_DIR) +@STARPU_BUILD_DOC_TRUE@CLEANFILES = $(DOX_TAG) $(DOX_STARPU_CONFIG) \ +@STARPU_BUILD_DOC_TRUE@ -r \ +@STARPU_BUILD_DOC_TRUE@ $(DOX_HTML_DIR) \ +@STARPU_BUILD_DOC_TRUE@ $(DOX_LATEX_DIR) \ +@STARPU_BUILD_DOC_TRUE@ $(DOX_DIR)/$(DOX_PDF) + +chapters = \ + ../doxygen/chapters/foreword.doxy \ + ../doxygen/chapters/starpu_introduction/introduction_intro.doxy \ + ../doxygen/chapters/starpu_introduction/glossary.doxy \ + ../doxygen/chapters/files.doxy \ + ../doxygen/chapters/fdl_1_3.doxy + +images = \ + ../doxygen/chapters/images/arbiter.png \ + ../doxygen/chapters/images/data_trace.png \ + ../doxygen/chapters/images/distrib_data.png \ + ../doxygen/chapters/images/distrib_data_histo.png \ + ../doxygen/chapters/images/paje_draw_histogram.png \ + ../doxygen/chapters/images/parallel_worker2.png \ + ../doxygen/chapters/images/runtime-par.png \ + ../doxygen/chapters/images/starpu_non_linear_memset_regression_based.png \ + ../doxygen/chapters/images/starpu_non_linear_memset_regression_based_2.png \ + ../doxygen/chapters/images/starpu_starpu_slu_lu_model_11.png \ + ../doxygen/chapters/images/starpu_chol_model_11_type.png \ + ../doxygen/chapters/images/tasks_size_overhead.png \ + ../doxygen/chapters/images/temanejo.png \ + ../doxygen/chapters/images/eclipse_installer.png \ + ../doxygen/chapters/images/eclipse_install_cdt.png \ + ../doxygen/chapters/images/eclipse_hello_build.png \ + ../doxygen/chapters/images/eclipse_hello_run.png \ + ../doxygen/chapters/images/eclipse_hello_fxt.png \ + ../doxygen/chapters/images/eclipse_hello_graph.png \ + ../doxygen/chapters/images/eclipse_hello_vite.png \ + ../doxygen/chapters/images/eclipse_hello_svg_graph.png \ + ../doxygen/chapters/images/eclipse_hello_plugin.png \ + ../doxygen/chapters/images/eclipse_hello_paje_trace.png \ + ../doxygen/chapters/images/eclipse_hello_hgraph.png \ + ../doxygen/chapters/images/eclipse_install_pde.png \ + ../doxygen/chapters/images/starpu_gflops_non_linear_memset_regression_based_energy.png \ + ../doxygen/chapters/images/starpu_log_arr.png \ + ../doxygen/chapters/images/starpu_log_list.png \ + ../doxygen/chapters/images/starpu_non_linear_memset_regression_based_energy.png \ + ../doxygen/chapters/images/starpu_power_non_linear_memset_regression_based.png \ + ../doxygen/chapters/images/starvz_visu.png \ + ../doxygen/chapters/images/starvz_visu_r.png \ + ../doxygen/chapters/images/trace_bw_heatmap.png \ + ../doxygen/chapters/images/trace_recv_use.png \ + ../doxygen/chapters/images/trace_send_use.png \ + ../doxygen/chapters/images/trace_volume_heatmap.png \ + ../doxygen/chapters/images/starpupy_handle_func_perf_pickle.png \ + ../doxygen/chapters/images/starpupy_handle_perf_pickle.png \ + ../doxygen/chapters/images/starpupy_handle_func_perf.png \ + ../doxygen/chapters/images/starpupy_handle_perf.png \ + ../doxygen/chapters/images/tasks_size_overhead_py_fut_pickle.png \ + ../doxygen/chapters/images/tasks_size_overhead_py_futur.png \ + ../doxygen/chapters/images/tasks_size_overhead_py_handle_pickle.png \ + ../doxygen/chapters/images/tasks_size_overhead_py_handle.png \ + ../doxygen/chapters/images/tasks_size_overhead_py_none.png \ + ../doxygen/chapters/images/tasks_size_overhead_py_noret_pickle.png + +@STARPU_BUILD_DOC_TRUE@dox_inputs = $(DOX_CONFIG) \ +@STARPU_BUILD_DOC_TRUE@ $(chapters) \ +@STARPU_BUILD_DOC_TRUE@ starpu_config.h \ +@STARPU_BUILD_DOC_TRUE@ chapters/version.sty \ +@STARPU_BUILD_DOC_TRUE@ chapters/version.html + +all: all-am + +.SUFFIXES: +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/doc/doxy.mk $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign doc/doxygen_web_introduction/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign doc/doxygen_web_introduction/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; +$(top_srcdir)/doc/doxy.mk $(am__empty): + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): +doxygen-config.cfg: $(top_builddir)/config.status $(srcdir)/doxygen-config.cfg.in + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs +install-txtDATA: $(txt_DATA) + @$(NORMAL_INSTALL) + @list='$(txt_DATA)'; test -n "$(txtdir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(txtdir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(txtdir)" || exit 1; \ + fi; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; \ + done | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(txtdir)'"; \ + $(INSTALL_DATA) $$files "$(DESTDIR)$(txtdir)" || exit $$?; \ + done + +uninstall-txtDATA: + @$(NORMAL_UNINSTALL) + @list='$(txt_DATA)'; test -n "$(txtdir)" || list=; \ + files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ + dir='$(DESTDIR)$(txtdir)'; $(am__uninstall_files_from_dir) +tags TAGS: + +ctags CTAGS: + +cscope cscopelist: + +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile $(DATA) +installdirs: + for dir in "$(DESTDIR)$(txtdir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +@STARPU_AVAILABLE_DOC_FALSE@@STARPU_BUILD_DOC_FALSE@install-exec-hook: +@STARPU_AVAILABLE_DOC_FALSE@@STARPU_BUILD_DOC_FALSE@uninstall-hook: +clean: clean-am + +clean-am: clean-generic clean-libtool mostlyclean-am + +distclean: distclean-am + -rm -f Makefile +distclean-am: clean-am distclean-generic + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: install-txtDATA + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: + @$(NORMAL_INSTALL) + $(MAKE) $(AM_MAKEFLAGS) install-exec-hook +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-generic mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-txtDATA + @$(NORMAL_INSTALL) + $(MAKE) $(AM_MAKEFLAGS) uninstall-hook +.MAKE: install-am install-exec-am install-strip uninstall-am + +.PHONY: all all-am check check-am clean clean-generic clean-libtool \ + cscopelist-am ctags-am distclean distclean-generic \ + distclean-libtool distdir dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-exec-hook \ + install-html install-html-am install-info install-info-am \ + install-man install-pdf install-pdf-am install-ps \ + install-ps-am install-strip install-txtDATA installcheck \ + installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-generic \ + mostlyclean-libtool pdf pdf-am ps ps-am tags-am uninstall \ + uninstall-am uninstall-hook uninstall-txtDATA + +.PRECIOUS: Makefile + + +@STARPU_BUILD_DOC_PDF_TRUE@@STARPU_BUILD_DOC_TRUE@all: $(DOX_HTML_DIR) $(DOX_DIR)/$(DOX_PDF) +@STARPU_BUILD_DOC_PDF_FALSE@@STARPU_BUILD_DOC_TRUE@all: $(DOX_HTML_DIR) +@STARPU_BUILD_DOC_TRUE@install-exec-hook: $(DOX_HTML_DIR) +@STARPU_BUILD_DOC_TRUE@ @$(MKDIR_P) $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) +@STARPU_BUILD_DOC_TRUE@ @(cd $(DOX_HTML_SRCDIR) && $(PROG_FIND) . -type f -exec $(INSTALL_DATA) {} $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) \;) +@STARPU_BUILD_DOC_TRUE@uninstall-hook: +@STARPU_BUILD_DOC_TRUE@ @rm -rf $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) +@STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@install-exec-hook: +@STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@ @$(MKDIR_P) $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) +@STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@ @(cd $(DOX_HTML_SRCDIR) && $(PROG_FIND) . -type f -exec $(INSTALL_DATA) {} $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) \;) +@STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@uninstall-hook: +@STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@ @rm -rf $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) + +@STARPU_BUILD_DOC_TRUE@chapters/version.sty: $(chapters) +@STARPU_BUILD_DOC_TRUE@ $(MKDIR_P) $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters +@STARPU_BUILD_DOC_TRUE@ @for f in $(chapters) ; do \ +@STARPU_BUILD_DOC_TRUE@ if test -f $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$$f ; then $(PROG_STAT) --format=%Y $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$$f ; fi \ +@STARPU_BUILD_DOC_TRUE@ done | sort -r | head -1 > timestamp_sty +@STARPU_BUILD_DOC_TRUE@ @if test -s timestamp_sty ; then \ +@STARPU_BUILD_DOC_TRUE@ LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_sty` +"%F" > timestamp_sty_updated ;\ +@STARPU_BUILD_DOC_TRUE@ LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_sty` +"%B %Y" > timestamp_sty_updated_month ;\ +@STARPU_BUILD_DOC_TRUE@ fi +@STARPU_BUILD_DOC_TRUE@ @if test -s timestamp_sty_updated ; then \ +@STARPU_BUILD_DOC_TRUE@ echo ':newcommand{:STARPUUPDATED}{'`cat timestamp_sty_updated`'}' > $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty;\ +@STARPU_BUILD_DOC_TRUE@ else \ +@STARPU_BUILD_DOC_TRUE@ echo ':newcommand{:STARPUUPDATED}{unknown date}' > $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty;\ +@STARPU_BUILD_DOC_TRUE@ fi +@STARPU_BUILD_DOC_TRUE@ @echo ':newcommand{:STARPUVERSION}{$(VERSION)}' >> $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty +@STARPU_BUILD_DOC_TRUE@ @$(SED) -i 's/:/\\/g' $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty +@STARPU_BUILD_DOC_TRUE@ @for f in timestamp_sty timestamp_sty_updated timestamp_sty_updated_month ; do \ +@STARPU_BUILD_DOC_TRUE@ if test -f $$f ; then $(RM) $$f ; fi ;\ +@STARPU_BUILD_DOC_TRUE@ done + +@STARPU_BUILD_DOC_TRUE@chapters/version.html: $(chapters) $(images) +@STARPU_BUILD_DOC_TRUE@ @for f in $(chapters) ; do \ +@STARPU_BUILD_DOC_TRUE@ if test -f $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$$f ; then $(PROG_STAT) --format=%Y $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$$f ; fi \ +@STARPU_BUILD_DOC_TRUE@ done | sort -r | head -1 > timestamp_html +@STARPU_BUILD_DOC_TRUE@ @if test -s timestamp_html ; then \ +@STARPU_BUILD_DOC_TRUE@ LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_html` +"%F" > timestamp_html_updated ;\ +@STARPU_BUILD_DOC_TRUE@ LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_html` +"%B %Y" > timestamp_html_updated_month ;\ +@STARPU_BUILD_DOC_TRUE@ fi +@STARPU_BUILD_DOC_TRUE@ @echo "This manual documents the version $(VERSION) of StarPU." > $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.html +@STARPU_BUILD_DOC_TRUE@ @if test -s timestamp_html_updated ; then \ +@STARPU_BUILD_DOC_TRUE@ echo "Its contents was last updated on "`cat timestamp_html_updated`"." >> $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.html;\ +@STARPU_BUILD_DOC_TRUE@ else \ +@STARPU_BUILD_DOC_TRUE@ echo "Its contents was last updated on unknown_date." >> $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.html;\ +@STARPU_BUILD_DOC_TRUE@ fi +@STARPU_BUILD_DOC_TRUE@ @for f in timestamp_html timestamp_html_updated timestamp_html_updated_month ; do \ +@STARPU_BUILD_DOC_TRUE@ if test -f $$f ; then $(RM) $$f ; fi ;\ +@STARPU_BUILD_DOC_TRUE@ done + +@STARPU_BUILD_DOC_TRUE@doxy: +@STARPU_BUILD_DOC_TRUE@ @rm -fr $(DOX_HTML_DIR) $(DOX_LATEX_DIR) +@STARPU_BUILD_DOC_TRUE@ @$(DOXYGEN) $(DOX_CONFIG) + +@STARPU_BUILD_DOC_TRUE@$(DOX_HTML_DIR): $(DOX_TAG) +@STARPU_BUILD_DOC_TRUE@ @$(MKDIR_P) $(DOX_HTML_DIR) + +@STARPU_BUILD_DOC_TRUE@$(DOX_TAG): $(dox_inputs) +@STARPU_BUILD_DOC_TRUE@ @rm -fr $(DOX_HTML_DIR) $(DOX_LATEX_DIR) +@STARPU_BUILD_DOC_TRUE@ @$(DOXYGEN) $(DOX_CONFIG) +@STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_HTML_DIR)/DocOrganization.html ; then $(SED) -i 's/ModuleDocumentation <\/li>/Modules<\/a>/' $(DOX_HTML_DIR)/DocOrganization.html ; fi +@STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_HTML_DIR)/DocOrganization.html ; then $(SED) -i 's/FileDocumentation <\/li>/Files<\/a>/' $(DOX_HTML_DIR)/DocOrganization.html ; fi +@STARPU_BUILD_DOC_TRUE@ # comment for the line below: what we really want to do is to remove the line, but dy doing so, it avoids opening the interactive menu when browsing files +@STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_HTML_DIR)/navtreedata.js ; then $(SED) -i 's/\[ "Files", "Files.html", null \]/\[ "", "Files.html", null \]/' $(DOX_HTML_DIR)/navtreedata.js ; fi +@STARPU_BUILD_DOC_TRUE@ @$(SED) -i 's/.*"Files.html".*//' $(DOX_HTML_DIR)/pages.html +@STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_LATEX_DIR)/main.tex ; then mv $(DOX_LATEX_DIR)/main.tex $(DOX_LATEX_DIR)/index.tex ; fi +@STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_LATEX_DIR)/refman.tex ; then $(SED) -i '/\\begin{titlepage}/,$$d' $(DOX_LATEX_DIR)/refman.tex ; fi +@STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_LATEX_DIR)/refman.tex ; then cat $(top_srcdir)/doc/$(DOX_MAIN_DIR)/refman.tex >> $(DOX_LATEX_DIR)/refman.tex ; fi +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/doc/sectionNumbering.py $(top_builddir)/doc/$(DOX_MAIN_DIR) $(DOX_HTML_DIR) + +@STARPU_BUILD_DOC_TRUE@$(DOX_DIR)/$(DOX_PDF): $(DOX_TAG) refman.tex $(images) +@STARPU_BUILD_DOC_TRUE@ $(MKDIR_P) $(DOX_LATEX_DIR) +@STARPU_BUILD_DOC_TRUE@ @cp $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty $(DOX_LATEX_DIR) +@STARPU_BUILD_DOC_TRUE@ @cp $(top_srcdir)/doc/title.tex $(DOX_LATEX_DIR) +@STARPU_BUILD_DOC_TRUE@ @if test -f $(top_srcdir)/doc/$(DOX_MAIN_DIR)/modules.tex ; then cp $(top_srcdir)/doc/$(DOX_MAIN_DIR)/modules.tex $(DOX_LATEX_DIR) ; fi +@STARPU_BUILD_DOC_TRUE@ @echo $(PDFLATEX) $(DOX_LATEX_DIR)/refman.tex +@STARPU_BUILD_DOC_TRUE@ @cd $(DOX_LATEX_DIR) ;\ +@STARPU_BUILD_DOC_TRUE@ rm -f *.aux *.toc *.idx *.ind *.ilg *.log *.out ;\ +@STARPU_BUILD_DOC_TRUE@ for f in group__API__* ; do sed -i '1 i \\\clearpage' $$f ; done ;\ +@STARPU_BUILD_DOC_TRUE@ if test -f ExecutionConfigurationThroughEnvironmentVariables.tex ; then $(SED) -i -e 's/__env__/\\_Environment Variables!/' -e 's/\\-\\_\\-\\-\\_\\-env\\-\\_\\-\\-\\_\\-//' ExecutionConfigurationThroughEnvironmentVariables.tex ; fi ;\ +@STARPU_BUILD_DOC_TRUE@ if test -f CompilationConfiguration.tex ; then $(SED) -i -e 's/__configure__/\\_Configure Options!/' -e 's/\\-\\_\\-\\-\\_\\-configure\\-\\_\\-\\-\\_\\-//' CompilationConfiguration.tex ; fi ;\ +@STARPU_BUILD_DOC_TRUE@ if test -f DocOrganization.tex ; then $(SED) -i s'/\\item Module\\.Documentation/\\item \\hyperlink{ModuleDocumentation}{Module Documentation}/' DocOrganization.tex ; fi ;\ +@STARPU_BUILD_DOC_TRUE@ if test -f DocOrganization.tex ; then $(SED) -i s'/\\item File\\.Documentation/\\item \\hyperlink{FileDocumentation}{File Documentation}/' DocOrganization.tex ; fi ;\ +@STARPU_BUILD_DOC_TRUE@ max_print_line=1000000 $(PDFLATEX) -interaction batchmode refman.tex ;\ +@STARPU_BUILD_DOC_TRUE@ ! < refman.log grep -v group__ | grep -v _amgrp | grep -v deprecated__ | grep "multiply defined" || exit 1 ;\ +@STARPU_BUILD_DOC_TRUE@ $(MAKEINDEX) refman.idx ;\ +@STARPU_BUILD_DOC_TRUE@ max_print_line=1000000 $(PDFLATEX) -interaction batchmode refman.tex ;\ +@STARPU_BUILD_DOC_TRUE@ for i in $(shell seq 1 5); do \ +@STARPU_BUILD_DOC_TRUE@ if $(EGREP) 'Rerun (LaTeX|to get cross-references right)' refman.log > /dev/null 2>&1; then \ +@STARPU_BUILD_DOC_TRUE@ max_print_line=1000000 $(PDFLATEX) -interaction batchmode refman.tex; \ +@STARPU_BUILD_DOC_TRUE@ else \ +@STARPU_BUILD_DOC_TRUE@ break ; \ +@STARPU_BUILD_DOC_TRUE@ fi; \ +@STARPU_BUILD_DOC_TRUE@ done +@STARPU_BUILD_DOC_TRUE@ mv $(DOX_LATEX_DIR)/refman.pdf $(DOX_DIR)/$(DOX_PDF) + +@STARPU_BUILD_DOC_TRUE@starpu_config.h: $(top_srcdir)/include/starpu_config.h.in +@STARPU_BUILD_DOC_TRUE@ @$(SED) 's/#undef \(.*\)/#define \1 1/' $< > $@ + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/doc/doxygen_web_introduction/chapters/version.html b/doc/doxygen_web_introduction/chapters/version.html new file mode 100644 index 0000000..f504c7a --- /dev/null +++ b/doc/doxygen_web_introduction/chapters/version.html @@ -0,0 +1,2 @@ +This manual documents the version 1.4.10 of StarPU. +Its contents was last updated on 2025-12-03. diff --git a/doc/doxygen_web_introduction/chapters/version.sty b/doc/doxygen_web_introduction/chapters/version.sty new file mode 100644 index 0000000..7527bba --- /dev/null +++ b/doc/doxygen_web_introduction/chapters/version.sty @@ -0,0 +1,2 @@ +\newcommand{\STARPUUPDATED}{2025-12-03} +\newcommand{\STARPUVERSION}{1.4.10} diff --git a/doc/doxygen_web_introduction/doxygen-config.cfg.in b/doc/doxygen_web_introduction/doxygen-config.cfg.in new file mode 100644 index 0000000..7ae6aaa --- /dev/null +++ b/doc/doxygen_web_introduction/doxygen-config.cfg.in @@ -0,0 +1,42 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# Copyright (C) 2013-2013 Simon Archipoff +# Copyright (C) 2011-2011 Télécom Sud Paris +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +INPUT = @top_srcdir@/doc/doxygen/chapters/starpu_introduction/introduction_intro.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_introduction/glossary.doxy \ + @top_srcdir@/doc/doxygen/chapters/fdl_1_3.doxy \ + @top_srcdir@/doc/doxygen/chapters/files.doxy \ + @top_srcdir@/doc/doxygen/chapters/api + +EXAMPLE_PATH = @top_srcdir@/doc/doxygen \ + @top_srcdir@/doc/doxygen/chapters + +INPUT_FILTER = @top_builddir@/doc/doxygen/doxygen_filter.sh + +#LATEX_HEADER = @top_srcdir@/doc/doxygen/refman.tex + +IMAGE_PATH = @top_srcdir@/doc/doxygen/chapters/images + +GENERATE_LATEX = @DOC_GENERATE_LATEX@ + +@INCLUDE_PATH = ../../doc/doxygen/ + +HTML_OUTPUT = html_web_introduction + +PROJECT_NAME = "StarPU Handbook - StarPU Introduction" +ALIASES += "intropage{2} = \mainpage" +ALIASES += "foreword = \htmlonly

    Foreword

    \endhtmlonly \htmlinclude version.html \htmlinclude foreword.html" +ALIASES += "webforeword = \htmlonly This is a sub-part of the StarPU documentation, go here to read the whole documentation. \endhtmlonly" diff --git a/doc/doxygen_web_introduction/refman.tex b/doc/doxygen_web_introduction/refman.tex new file mode 100644 index 0000000..f67f8e4 --- /dev/null +++ b/doc/doxygen_web_introduction/refman.tex @@ -0,0 +1,42 @@ +% StarPU --- Runtime system for heterogeneous multicore architectures. +% +% Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +% Copyright (C) 2013-2013 Simon Archipoff +% +% StarPU is free software; you can redistribute it and/or modify +% it under the terms of the GNU Lesser General Public License as published by +% the Free Software Foundation; either version 2.1 of the License, or (at +% your option) any later version. +% +% StarPU is distributed in the hope that it will be useful, but +% WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +% +% See the GNU Lesser General Public License in COPYING.LGPL for more details. +% +\newcommand\starputitle{StarPU Handbook - StarPU Introduction} +\setcounter{tocdepth}{2} +\input{./title.tex} + +\chapter{Organization} +\label{index} +\hypertarget{index}{} +\input{index} + +\chapter{Glossary} +\label{Glossary} +\hypertarget{Glossary}{} +\input{Glossary} + +\part{Appendix} + +\chapter{The GNU Free Documentation License} +\label{GNUFreeDocumentationLicense} +\hypertarget{GNUFreeDocumentationLicense}{} +\input{GNUFreeDocumentationLicense} + +%\part{Index} +%#\addcontentsline{toc}{chapter}{Index} +%\printindex + +\end{document} diff --git a/doc/doxygen_web_languages/Makefile.am b/doc/doxygen_web_languages/Makefile.am new file mode 100644 index 0000000..b3b9289 --- /dev/null +++ b/doc/doxygen_web_languages/Makefile.am @@ -0,0 +1,100 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +DOX_DIR = $(top_builddir)/doc/doxygen_web_languages +DOX_CONFIG = $(top_srcdir)/doc/doxygen.cfg + +DOX_MAIN_DIR = doxygen_web_languages +DOX_HTML_DIR = html_web_languages +DOX_LATEX_DIR = latex +DOX_PDF = starpu_web_languages.pdf +DOX_TAG = starpu.tag +DOX_STARPU_CONFIG = starpu_config.h + +include $(top_srcdir)/doc/doxy.mk + +chapters = \ + ../doxygen/chapters/foreword.doxy \ + ../doxygen/chapters/starpu_languages/languages_intro.doxy \ + ../doxygen/chapters/starpu_languages/native_fortran_support.doxy \ + ../doxygen/chapters/starpu_languages/java.doxy \ + ../doxygen/chapters/starpu_languages/python.doxy \ + ../doxygen/chapters/starpu_languages/openmp_runtime_support.doxy \ + ../doxygen/chapters/starpu_languages/code/nf_initexit.f90 \ + ../doxygen/chapters/starpu_languages/code/java_starpu.java \ + ../doxygen/chapters/starpu_languages/code/java_spark.java \ + ../doxygen/chapters/files.doxy \ + ../doxygen/chapters/fdl_1_3.doxy + +images = \ + ../doxygen/chapters/images/arbiter.png \ + ../doxygen/chapters/images/data_trace.png \ + ../doxygen/chapters/images/distrib_data.png \ + ../doxygen/chapters/images/distrib_data_histo.png \ + ../doxygen/chapters/images/paje_draw_histogram.png \ + ../doxygen/chapters/images/parallel_worker2.png \ + ../doxygen/chapters/images/runtime-par.png \ + ../doxygen/chapters/images/starpu_non_linear_memset_regression_based.png \ + ../doxygen/chapters/images/starpu_non_linear_memset_regression_based_2.png \ + ../doxygen/chapters/images/starpu_starpu_slu_lu_model_11.png \ + ../doxygen/chapters/images/starpu_chol_model_11_type.png \ + ../doxygen/chapters/images/tasks_size_overhead.png \ + ../doxygen/chapters/images/temanejo.png \ + ../doxygen/chapters/images/eclipse_installer.png \ + ../doxygen/chapters/images/eclipse_install_cdt.png \ + ../doxygen/chapters/images/eclipse_hello_build.png \ + ../doxygen/chapters/images/eclipse_hello_run.png \ + ../doxygen/chapters/images/eclipse_hello_fxt.png \ + ../doxygen/chapters/images/eclipse_hello_graph.png \ + ../doxygen/chapters/images/eclipse_hello_vite.png \ + ../doxygen/chapters/images/eclipse_hello_svg_graph.png \ + ../doxygen/chapters/images/eclipse_hello_plugin.png \ + ../doxygen/chapters/images/eclipse_hello_paje_trace.png \ + ../doxygen/chapters/images/eclipse_hello_hgraph.png \ + ../doxygen/chapters/images/eclipse_install_pde.png \ + ../doxygen/chapters/images/starpu_gflops_non_linear_memset_regression_based_energy.png \ + ../doxygen/chapters/images/starpu_log_arr.png \ + ../doxygen/chapters/images/starpu_log_list.png \ + ../doxygen/chapters/images/starpu_non_linear_memset_regression_based_energy.png \ + ../doxygen/chapters/images/starpu_power_non_linear_memset_regression_based.png \ + ../doxygen/chapters/images/starvz_visu.png \ + ../doxygen/chapters/images/starvz_visu_r.png \ + ../doxygen/chapters/images/trace_bw_heatmap.png \ + ../doxygen/chapters/images/trace_recv_use.png \ + ../doxygen/chapters/images/trace_send_use.png \ + ../doxygen/chapters/images/trace_volume_heatmap.png \ + ../doxygen/chapters/images/starpupy_handle_func_perf_pickle.png \ + ../doxygen/chapters/images/starpupy_handle_perf_pickle.png \ + ../doxygen/chapters/images/starpupy_handle_func_perf.png \ + ../doxygen/chapters/images/starpupy_handle_perf.png \ + ../doxygen/chapters/images/tasks_size_overhead_py_fut_pickle.png \ + ../doxygen/chapters/images/tasks_size_overhead_py_futur.png \ + ../doxygen/chapters/images/tasks_size_overhead_py_handle_pickle.png \ + ../doxygen/chapters/images/tasks_size_overhead_py_handle.png \ + ../doxygen/chapters/images/tasks_size_overhead_py_none.png \ + ../doxygen/chapters/images/tasks_size_overhead_py_noret_pickle.png + +if STARPU_BUILD_DOC +starpu_config.h: $(top_srcdir)/include/starpu_config.h.in + @$(SED) 's/#undef \(.*\)/#define \1 1/' $< > $@ + +dox_inputs = $(DOX_CONFIG) \ + $(chapters) \ + starpu_config.h \ + chapters/version.sty \ + chapters/version.html +endif + diff --git a/doc/doxygen_web_languages/Makefile.in b/doc/doxygen_web_languages/Makefile.in new file mode 100644 index 0000000..363e500 --- /dev/null +++ b/doc/doxygen_web_languages/Makefile.in @@ -0,0 +1,938 @@ +# Makefile.in generated by automake 1.16.5 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2021 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +target_triplet = @target@ +@STARPU_BUILD_DOC_PDF_TRUE@@STARPU_BUILD_DOC_TRUE@am__append_1 = $(DOX_HTML_DIR) $(DOX_DIR)/$(DOX_PDF) +@STARPU_BUILD_DOC_PDF_FALSE@@STARPU_BUILD_DOC_TRUE@am__append_2 = $(DOX_HTML_DIR) +@STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@am__append_3 = $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$(DOX_HTML_DIR) +@STARPU_AVAILABLE_DOC_PDF_TRUE@@STARPU_BUILD_DOC_FALSE@am__append_4 = $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$(DOX_PDF) +@STARPU_BUILD_DOC_TRUE@am__append_5 = \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.html + +subdir = doc/doxygen_web_languages +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ + $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ + $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ + $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/src/common/config.h \ + $(top_builddir)/src/common/config-src-build.h \ + $(top_builddir)/include/starpu_config.h \ + $(top_builddir)/starpurm/include/starpurm_config.h +CONFIG_CLEAN_FILES = doxygen-config.cfg +CONFIG_CLEAN_VPATH_FILES = +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +SOURCES = +DIST_SOURCES = +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__installdirs = "$(DESTDIR)$(txtdir)" +DATA = $(txt_DATA) +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +am__DIST_COMMON = $(srcdir)/Makefile.in \ + $(srcdir)/doxygen-config.cfg.in $(top_srcdir)/doc/doxy.mk +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +pkglibdir = @pkglibdir@ +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +APP_CFLAGS = @APP_CFLAGS@ +APP_CXXFLAGS = @APP_CXXFLAGS@ +APP_FCFLAGS = @APP_FCFLAGS@ +APP_FFLAGS = @APP_FFLAGS@ +AR = @AR@ +AS = @AS@ +ATLASDIR = @ATLASDIR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BLAS_LIB = @BLAS_LIB@ +BLAS_LIBS = @BLAS_LIBS@ +BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ +BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CC_OR_MPICC = @CC_OR_MPICC@ +CC_OR_NVCC = @CC_OR_NVCC@ +CFLAGS = @CFLAGS@ +COVERAGE = @COVERAGE@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CSCOPE = @CSCOPE@ +CTAGS = @CTAGS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DGELS_LIBS = @DGELS_LIBS@ +DLB_CFLAGS = @DLB_CFLAGS@ +DLB_LIBS = @DLB_LIBS@ +DLLTOOL = @DLLTOOL@ +DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +ECLIPSE = @ECLIPSE@ +EGREP = @EGREP@ +ETAGS = @ETAGS@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FC = @FC@ +FCFLAGS = @FCFLAGS@ +FFLAGS = @FFLAGS@ +FFTWF_CFLAGS = @FFTWF_CFLAGS@ +FFTWF_LIBS = @FFTWF_LIBS@ +FFTWL_CFLAGS = @FFTWL_CFLAGS@ +FFTWL_LIBS = @FFTWL_LIBS@ +FFTW_CFLAGS = @FFTW_CFLAGS@ +FFTW_LIBS = @FFTW_LIBS@ +FGREP = @FGREP@ +FILECMD = @FILECMD@ +FXTDIR = @FXTDIR@ +FXT_CFLAGS = @FXT_CFLAGS@ +FXT_LDFLAGS = @FXT_LDFLAGS@ +FXT_LIBS = @FXT_LIBS@ +GDB = @GDB@ +GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ +GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ +GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ +GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ +GOTODIR = @GOTODIR@ +GREP = @GREP@ +HAVE_CXX11 = @HAVE_CXX11@ +HAVE_FFTWFL = @HAVE_FFTWFL@ +HELP2MAN = @HELP2MAN@ +HIPCC = @HIPCC@ +HIPCCFLAGS = @HIPCCFLAGS@ +HIPCONFIG = @HIPCONFIG@ +HWLOC_CFLAGS = @HWLOC_CFLAGS@ +HWLOC_LIBS = @HWLOC_LIBS@ +HWLOC_REQUIRES = @HWLOC_REQUIRES@ +ICC = @ICC@ +ICC_ARGS = @ICC_ARGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JULIA = @JULIA@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ +LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ +LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ +LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ +LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ +LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ +LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ +LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ +LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ +LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ +LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ +LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ +LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ +LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ +LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ +LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ +LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ +LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ +LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ +LIBSTARPU_LINK = @LIBSTARPU_LINK@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAGMA_CFLAGS = @MAGMA_CFLAGS@ +MAGMA_LIBS = @MAGMA_LIBS@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPICC_LDFLAGS = @MPICC_LDFLAGS@ +MPICXX = @MPICXX@ +MPIEXEC = @MPIEXEC@ +MPIEXEC_ARGS = @MPIEXEC_ARGS@ +MPIFORT = @MPIFORT@ +MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ +MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ +NM = @NM@ +NMAD_CFLAGS = @NMAD_CFLAGS@ +NMAD_LIBS = @NMAD_LIBS@ +NMEDIT = @NMEDIT@ +NVCC = @NVCC@ +NVCCFLAGS = @NVCCFLAGS@ +NVCC_CC = @NVCC_CC@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ +OPENBLAS_LIBS = @OPENBLAS_LIBS@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PAPI_CFLAGS = @PAPI_CFLAGS@ +PAPI_LIBS = @PAPI_LIBS@ +PARALLEL = @PARALLEL@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PKG_CONFIG = @PKG_CONFIG@ +POTI_CFLAGS = @POTI_CFLAGS@ +POTI_LIBS = @POTI_LIBS@ +PROG_CLANG = @PROG_CLANG@ +PROG_DATE = @PROG_DATE@ +PROG_FIND = @PROG_FIND@ +PROG_STAT = @PROG_STAT@ +PYTHON = @PYTHON@ +PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ +PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ +PYTHON_VERSION = @PYTHON_VERSION@ +RANLIB = @RANLIB@ +REALBASH = @REALBASH@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ +SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ +SIMGRID_LIBS = @SIMGRID_LIBS@ +SIMGRID_MC = @SIMGRID_MC@ +SLIC_CONFIG = @SLIC_CONFIG@ +SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ +SOCL_VENDORS = @SOCL_VENDORS@ +STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ +STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ +STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ +STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ +STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ +STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ +STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ +STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ +STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ +STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ +STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ +STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ +STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ +STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ +STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ +STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ +STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ +STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ +STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ +STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ +STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ +STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ +STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ +STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ +STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ +STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ +STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ +STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ +STARPU_LIB_PATH = @STARPU_LIB_PATH@ +STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ +STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ +STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ +STARPU_MS_LIB = @STARPU_MS_LIB@ +STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ +STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ +STARPU_OPENBLAS = @STARPU_OPENBLAS@ +STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ +STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ +STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ +STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ +STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ +STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ +STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ +STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ +STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ +STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ +STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ +STARPU_SRC_DIR = @STARPU_SRC_DIR@ +STARPU_USE_CPU = @STARPU_USE_CPU@ +STARPU_USE_CUDA = @STARPU_USE_CUDA@ +STARPU_USE_FXT = @STARPU_USE_FXT@ +STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ +STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ +STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ +STRIP = @STRIP@ +VERSION = @VERSION@ +XMKMF = @XMKMF@ +X_CFLAGS = @X_CFLAGS@ +X_EXTRA_LIBS = @X_EXTRA_LIBS@ +X_LIBS = @X_LIBS@ +X_PRE_LIBS = @X_PRE_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_ct_F77 = @ac_ct_F77@ +ac_ct_FC = @ac_ct_FC@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +doxygencommand = @doxygencommand@ +dvidir = @dvidir@ +eclipsepath = @eclipsepath@ +epstopdfcommand = @epstopdfcommand@ +exec_prefix = @exec_prefix@ +gitcommand = @gitcommand@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +hwloccalccommand = @hwloccalccommand@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +juliapath = @juliapath@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +mpicc_path = @mpicc_path@ +mpicxx_path = @mpicxx_path@ +mpiexec_path = @mpiexec_path@ +mpifort_path = @mpifort_path@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +pdflatexcommand = @pdflatexcommand@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +runstatedir = @runstatedir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target = @target@ +target_alias = @target_alias@ +target_cpu = @target_cpu@ +target_os = @target_os@ +target_vendor = @target_vendor@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +DOX_DIR = $(top_builddir)/doc/doxygen_web_languages +DOX_CONFIG = $(top_srcdir)/doc/doxygen.cfg +DOX_MAIN_DIR = doxygen_web_languages +DOX_HTML_DIR = html_web_languages +DOX_LATEX_DIR = latex +DOX_PDF = starpu_web_languages.pdf +DOX_TAG = starpu.tag +DOX_STARPU_CONFIG = starpu_config.h + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +DOXYGEN = doxygen +PDFLATEX = pdflatex +MAKEINDEX = makeindex +txtdir = $(docdir)/manual +EXTRA_DIST = $(am__append_1) $(am__append_2) $(am__append_3) \ + $(am__append_4) $(am__append_5) refman.tex $(chapters) \ + $(images) +@STARPU_AVAILABLE_DOC_PDF_TRUE@@STARPU_BUILD_DOC_FALSE@txt_DATA = $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$(DOX_PDF) +@STARPU_BUILD_DOC_PDF_TRUE@@STARPU_BUILD_DOC_TRUE@txt_DATA = $(DOX_DIR)/$(DOX_PDF) +@STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@DOX_HTML_SRCDIR = $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$(DOX_HTML_DIR) +@STARPU_BUILD_DOC_TRUE@DOX_HTML_SRCDIR = $(DOX_HTML_DIR) +@STARPU_BUILD_DOC_TRUE@CLEANFILES = $(DOX_TAG) $(DOX_STARPU_CONFIG) \ +@STARPU_BUILD_DOC_TRUE@ -r \ +@STARPU_BUILD_DOC_TRUE@ $(DOX_HTML_DIR) \ +@STARPU_BUILD_DOC_TRUE@ $(DOX_LATEX_DIR) \ +@STARPU_BUILD_DOC_TRUE@ $(DOX_DIR)/$(DOX_PDF) + +chapters = \ + ../doxygen/chapters/foreword.doxy \ + ../doxygen/chapters/starpu_languages/languages_intro.doxy \ + ../doxygen/chapters/starpu_languages/native_fortran_support.doxy \ + ../doxygen/chapters/starpu_languages/java.doxy \ + ../doxygen/chapters/starpu_languages/python.doxy \ + ../doxygen/chapters/starpu_languages/openmp_runtime_support.doxy \ + ../doxygen/chapters/starpu_languages/code/nf_initexit.f90 \ + ../doxygen/chapters/starpu_languages/code/java_starpu.java \ + ../doxygen/chapters/starpu_languages/code/java_spark.java \ + ../doxygen/chapters/files.doxy \ + ../doxygen/chapters/fdl_1_3.doxy + +images = \ + ../doxygen/chapters/images/arbiter.png \ + ../doxygen/chapters/images/data_trace.png \ + ../doxygen/chapters/images/distrib_data.png \ + ../doxygen/chapters/images/distrib_data_histo.png \ + ../doxygen/chapters/images/paje_draw_histogram.png \ + ../doxygen/chapters/images/parallel_worker2.png \ + ../doxygen/chapters/images/runtime-par.png \ + ../doxygen/chapters/images/starpu_non_linear_memset_regression_based.png \ + ../doxygen/chapters/images/starpu_non_linear_memset_regression_based_2.png \ + ../doxygen/chapters/images/starpu_starpu_slu_lu_model_11.png \ + ../doxygen/chapters/images/starpu_chol_model_11_type.png \ + ../doxygen/chapters/images/tasks_size_overhead.png \ + ../doxygen/chapters/images/temanejo.png \ + ../doxygen/chapters/images/eclipse_installer.png \ + ../doxygen/chapters/images/eclipse_install_cdt.png \ + ../doxygen/chapters/images/eclipse_hello_build.png \ + ../doxygen/chapters/images/eclipse_hello_run.png \ + ../doxygen/chapters/images/eclipse_hello_fxt.png \ + ../doxygen/chapters/images/eclipse_hello_graph.png \ + ../doxygen/chapters/images/eclipse_hello_vite.png \ + ../doxygen/chapters/images/eclipse_hello_svg_graph.png \ + ../doxygen/chapters/images/eclipse_hello_plugin.png \ + ../doxygen/chapters/images/eclipse_hello_paje_trace.png \ + ../doxygen/chapters/images/eclipse_hello_hgraph.png \ + ../doxygen/chapters/images/eclipse_install_pde.png \ + ../doxygen/chapters/images/starpu_gflops_non_linear_memset_regression_based_energy.png \ + ../doxygen/chapters/images/starpu_log_arr.png \ + ../doxygen/chapters/images/starpu_log_list.png \ + ../doxygen/chapters/images/starpu_non_linear_memset_regression_based_energy.png \ + ../doxygen/chapters/images/starpu_power_non_linear_memset_regression_based.png \ + ../doxygen/chapters/images/starvz_visu.png \ + ../doxygen/chapters/images/starvz_visu_r.png \ + ../doxygen/chapters/images/trace_bw_heatmap.png \ + ../doxygen/chapters/images/trace_recv_use.png \ + ../doxygen/chapters/images/trace_send_use.png \ + ../doxygen/chapters/images/trace_volume_heatmap.png \ + ../doxygen/chapters/images/starpupy_handle_func_perf_pickle.png \ + ../doxygen/chapters/images/starpupy_handle_perf_pickle.png \ + ../doxygen/chapters/images/starpupy_handle_func_perf.png \ + ../doxygen/chapters/images/starpupy_handle_perf.png \ + ../doxygen/chapters/images/tasks_size_overhead_py_fut_pickle.png \ + ../doxygen/chapters/images/tasks_size_overhead_py_futur.png \ + ../doxygen/chapters/images/tasks_size_overhead_py_handle_pickle.png \ + ../doxygen/chapters/images/tasks_size_overhead_py_handle.png \ + ../doxygen/chapters/images/tasks_size_overhead_py_none.png \ + ../doxygen/chapters/images/tasks_size_overhead_py_noret_pickle.png + +@STARPU_BUILD_DOC_TRUE@dox_inputs = $(DOX_CONFIG) \ +@STARPU_BUILD_DOC_TRUE@ $(chapters) \ +@STARPU_BUILD_DOC_TRUE@ starpu_config.h \ +@STARPU_BUILD_DOC_TRUE@ chapters/version.sty \ +@STARPU_BUILD_DOC_TRUE@ chapters/version.html + +all: all-am + +.SUFFIXES: +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/doc/doxy.mk $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign doc/doxygen_web_languages/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign doc/doxygen_web_languages/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; +$(top_srcdir)/doc/doxy.mk $(am__empty): + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): +doxygen-config.cfg: $(top_builddir)/config.status $(srcdir)/doxygen-config.cfg.in + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs +install-txtDATA: $(txt_DATA) + @$(NORMAL_INSTALL) + @list='$(txt_DATA)'; test -n "$(txtdir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(txtdir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(txtdir)" || exit 1; \ + fi; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; \ + done | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(txtdir)'"; \ + $(INSTALL_DATA) $$files "$(DESTDIR)$(txtdir)" || exit $$?; \ + done + +uninstall-txtDATA: + @$(NORMAL_UNINSTALL) + @list='$(txt_DATA)'; test -n "$(txtdir)" || list=; \ + files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ + dir='$(DESTDIR)$(txtdir)'; $(am__uninstall_files_from_dir) +tags TAGS: + +ctags CTAGS: + +cscope cscopelist: + +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile $(DATA) +installdirs: + for dir in "$(DESTDIR)$(txtdir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +@STARPU_AVAILABLE_DOC_FALSE@@STARPU_BUILD_DOC_FALSE@install-exec-hook: +@STARPU_AVAILABLE_DOC_FALSE@@STARPU_BUILD_DOC_FALSE@uninstall-hook: +clean: clean-am + +clean-am: clean-generic clean-libtool mostlyclean-am + +distclean: distclean-am + -rm -f Makefile +distclean-am: clean-am distclean-generic + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: install-txtDATA + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: + @$(NORMAL_INSTALL) + $(MAKE) $(AM_MAKEFLAGS) install-exec-hook +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-generic mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-txtDATA + @$(NORMAL_INSTALL) + $(MAKE) $(AM_MAKEFLAGS) uninstall-hook +.MAKE: install-am install-exec-am install-strip uninstall-am + +.PHONY: all all-am check check-am clean clean-generic clean-libtool \ + cscopelist-am ctags-am distclean distclean-generic \ + distclean-libtool distdir dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-exec-hook \ + install-html install-html-am install-info install-info-am \ + install-man install-pdf install-pdf-am install-ps \ + install-ps-am install-strip install-txtDATA installcheck \ + installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-generic \ + mostlyclean-libtool pdf pdf-am ps ps-am tags-am uninstall \ + uninstall-am uninstall-hook uninstall-txtDATA + +.PRECIOUS: Makefile + + +@STARPU_BUILD_DOC_PDF_TRUE@@STARPU_BUILD_DOC_TRUE@all: $(DOX_HTML_DIR) $(DOX_DIR)/$(DOX_PDF) +@STARPU_BUILD_DOC_PDF_FALSE@@STARPU_BUILD_DOC_TRUE@all: $(DOX_HTML_DIR) +@STARPU_BUILD_DOC_TRUE@install-exec-hook: $(DOX_HTML_DIR) +@STARPU_BUILD_DOC_TRUE@ @$(MKDIR_P) $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) +@STARPU_BUILD_DOC_TRUE@ @(cd $(DOX_HTML_SRCDIR) && $(PROG_FIND) . -type f -exec $(INSTALL_DATA) {} $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) \;) +@STARPU_BUILD_DOC_TRUE@uninstall-hook: +@STARPU_BUILD_DOC_TRUE@ @rm -rf $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) +@STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@install-exec-hook: +@STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@ @$(MKDIR_P) $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) +@STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@ @(cd $(DOX_HTML_SRCDIR) && $(PROG_FIND) . -type f -exec $(INSTALL_DATA) {} $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) \;) +@STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@uninstall-hook: +@STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@ @rm -rf $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) + +@STARPU_BUILD_DOC_TRUE@chapters/version.sty: $(chapters) +@STARPU_BUILD_DOC_TRUE@ $(MKDIR_P) $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters +@STARPU_BUILD_DOC_TRUE@ @for f in $(chapters) ; do \ +@STARPU_BUILD_DOC_TRUE@ if test -f $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$$f ; then $(PROG_STAT) --format=%Y $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$$f ; fi \ +@STARPU_BUILD_DOC_TRUE@ done | sort -r | head -1 > timestamp_sty +@STARPU_BUILD_DOC_TRUE@ @if test -s timestamp_sty ; then \ +@STARPU_BUILD_DOC_TRUE@ LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_sty` +"%F" > timestamp_sty_updated ;\ +@STARPU_BUILD_DOC_TRUE@ LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_sty` +"%B %Y" > timestamp_sty_updated_month ;\ +@STARPU_BUILD_DOC_TRUE@ fi +@STARPU_BUILD_DOC_TRUE@ @if test -s timestamp_sty_updated ; then \ +@STARPU_BUILD_DOC_TRUE@ echo ':newcommand{:STARPUUPDATED}{'`cat timestamp_sty_updated`'}' > $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty;\ +@STARPU_BUILD_DOC_TRUE@ else \ +@STARPU_BUILD_DOC_TRUE@ echo ':newcommand{:STARPUUPDATED}{unknown date}' > $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty;\ +@STARPU_BUILD_DOC_TRUE@ fi +@STARPU_BUILD_DOC_TRUE@ @echo ':newcommand{:STARPUVERSION}{$(VERSION)}' >> $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty +@STARPU_BUILD_DOC_TRUE@ @$(SED) -i 's/:/\\/g' $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty +@STARPU_BUILD_DOC_TRUE@ @for f in timestamp_sty timestamp_sty_updated timestamp_sty_updated_month ; do \ +@STARPU_BUILD_DOC_TRUE@ if test -f $$f ; then $(RM) $$f ; fi ;\ +@STARPU_BUILD_DOC_TRUE@ done + +@STARPU_BUILD_DOC_TRUE@chapters/version.html: $(chapters) $(images) +@STARPU_BUILD_DOC_TRUE@ @for f in $(chapters) ; do \ +@STARPU_BUILD_DOC_TRUE@ if test -f $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$$f ; then $(PROG_STAT) --format=%Y $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$$f ; fi \ +@STARPU_BUILD_DOC_TRUE@ done | sort -r | head -1 > timestamp_html +@STARPU_BUILD_DOC_TRUE@ @if test -s timestamp_html ; then \ +@STARPU_BUILD_DOC_TRUE@ LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_html` +"%F" > timestamp_html_updated ;\ +@STARPU_BUILD_DOC_TRUE@ LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_html` +"%B %Y" > timestamp_html_updated_month ;\ +@STARPU_BUILD_DOC_TRUE@ fi +@STARPU_BUILD_DOC_TRUE@ @echo "This manual documents the version $(VERSION) of StarPU." > $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.html +@STARPU_BUILD_DOC_TRUE@ @if test -s timestamp_html_updated ; then \ +@STARPU_BUILD_DOC_TRUE@ echo "Its contents was last updated on "`cat timestamp_html_updated`"." >> $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.html;\ +@STARPU_BUILD_DOC_TRUE@ else \ +@STARPU_BUILD_DOC_TRUE@ echo "Its contents was last updated on unknown_date." >> $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.html;\ +@STARPU_BUILD_DOC_TRUE@ fi +@STARPU_BUILD_DOC_TRUE@ @for f in timestamp_html timestamp_html_updated timestamp_html_updated_month ; do \ +@STARPU_BUILD_DOC_TRUE@ if test -f $$f ; then $(RM) $$f ; fi ;\ +@STARPU_BUILD_DOC_TRUE@ done + +@STARPU_BUILD_DOC_TRUE@doxy: +@STARPU_BUILD_DOC_TRUE@ @rm -fr $(DOX_HTML_DIR) $(DOX_LATEX_DIR) +@STARPU_BUILD_DOC_TRUE@ @$(DOXYGEN) $(DOX_CONFIG) + +@STARPU_BUILD_DOC_TRUE@$(DOX_HTML_DIR): $(DOX_TAG) +@STARPU_BUILD_DOC_TRUE@ @$(MKDIR_P) $(DOX_HTML_DIR) + +@STARPU_BUILD_DOC_TRUE@$(DOX_TAG): $(dox_inputs) +@STARPU_BUILD_DOC_TRUE@ @rm -fr $(DOX_HTML_DIR) $(DOX_LATEX_DIR) +@STARPU_BUILD_DOC_TRUE@ @$(DOXYGEN) $(DOX_CONFIG) +@STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_HTML_DIR)/DocOrganization.html ; then $(SED) -i 's/ModuleDocumentation <\/li>/Modules<\/a>/' $(DOX_HTML_DIR)/DocOrganization.html ; fi +@STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_HTML_DIR)/DocOrganization.html ; then $(SED) -i 's/FileDocumentation <\/li>/Files<\/a>/' $(DOX_HTML_DIR)/DocOrganization.html ; fi +@STARPU_BUILD_DOC_TRUE@ # comment for the line below: what we really want to do is to remove the line, but dy doing so, it avoids opening the interactive menu when browsing files +@STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_HTML_DIR)/navtreedata.js ; then $(SED) -i 's/\[ "Files", "Files.html", null \]/\[ "", "Files.html", null \]/' $(DOX_HTML_DIR)/navtreedata.js ; fi +@STARPU_BUILD_DOC_TRUE@ @$(SED) -i 's/.*"Files.html".*//' $(DOX_HTML_DIR)/pages.html +@STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_LATEX_DIR)/main.tex ; then mv $(DOX_LATEX_DIR)/main.tex $(DOX_LATEX_DIR)/index.tex ; fi +@STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_LATEX_DIR)/refman.tex ; then $(SED) -i '/\\begin{titlepage}/,$$d' $(DOX_LATEX_DIR)/refman.tex ; fi +@STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_LATEX_DIR)/refman.tex ; then cat $(top_srcdir)/doc/$(DOX_MAIN_DIR)/refman.tex >> $(DOX_LATEX_DIR)/refman.tex ; fi +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/doc/sectionNumbering.py $(top_builddir)/doc/$(DOX_MAIN_DIR) $(DOX_HTML_DIR) + +@STARPU_BUILD_DOC_TRUE@$(DOX_DIR)/$(DOX_PDF): $(DOX_TAG) refman.tex $(images) +@STARPU_BUILD_DOC_TRUE@ $(MKDIR_P) $(DOX_LATEX_DIR) +@STARPU_BUILD_DOC_TRUE@ @cp $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty $(DOX_LATEX_DIR) +@STARPU_BUILD_DOC_TRUE@ @cp $(top_srcdir)/doc/title.tex $(DOX_LATEX_DIR) +@STARPU_BUILD_DOC_TRUE@ @if test -f $(top_srcdir)/doc/$(DOX_MAIN_DIR)/modules.tex ; then cp $(top_srcdir)/doc/$(DOX_MAIN_DIR)/modules.tex $(DOX_LATEX_DIR) ; fi +@STARPU_BUILD_DOC_TRUE@ @echo $(PDFLATEX) $(DOX_LATEX_DIR)/refman.tex +@STARPU_BUILD_DOC_TRUE@ @cd $(DOX_LATEX_DIR) ;\ +@STARPU_BUILD_DOC_TRUE@ rm -f *.aux *.toc *.idx *.ind *.ilg *.log *.out ;\ +@STARPU_BUILD_DOC_TRUE@ for f in group__API__* ; do sed -i '1 i \\\clearpage' $$f ; done ;\ +@STARPU_BUILD_DOC_TRUE@ if test -f ExecutionConfigurationThroughEnvironmentVariables.tex ; then $(SED) -i -e 's/__env__/\\_Environment Variables!/' -e 's/\\-\\_\\-\\-\\_\\-env\\-\\_\\-\\-\\_\\-//' ExecutionConfigurationThroughEnvironmentVariables.tex ; fi ;\ +@STARPU_BUILD_DOC_TRUE@ if test -f CompilationConfiguration.tex ; then $(SED) -i -e 's/__configure__/\\_Configure Options!/' -e 's/\\-\\_\\-\\-\\_\\-configure\\-\\_\\-\\-\\_\\-//' CompilationConfiguration.tex ; fi ;\ +@STARPU_BUILD_DOC_TRUE@ if test -f DocOrganization.tex ; then $(SED) -i s'/\\item Module\\.Documentation/\\item \\hyperlink{ModuleDocumentation}{Module Documentation}/' DocOrganization.tex ; fi ;\ +@STARPU_BUILD_DOC_TRUE@ if test -f DocOrganization.tex ; then $(SED) -i s'/\\item File\\.Documentation/\\item \\hyperlink{FileDocumentation}{File Documentation}/' DocOrganization.tex ; fi ;\ +@STARPU_BUILD_DOC_TRUE@ max_print_line=1000000 $(PDFLATEX) -interaction batchmode refman.tex ;\ +@STARPU_BUILD_DOC_TRUE@ ! < refman.log grep -v group__ | grep -v _amgrp | grep -v deprecated__ | grep "multiply defined" || exit 1 ;\ +@STARPU_BUILD_DOC_TRUE@ $(MAKEINDEX) refman.idx ;\ +@STARPU_BUILD_DOC_TRUE@ max_print_line=1000000 $(PDFLATEX) -interaction batchmode refman.tex ;\ +@STARPU_BUILD_DOC_TRUE@ for i in $(shell seq 1 5); do \ +@STARPU_BUILD_DOC_TRUE@ if $(EGREP) 'Rerun (LaTeX|to get cross-references right)' refman.log > /dev/null 2>&1; then \ +@STARPU_BUILD_DOC_TRUE@ max_print_line=1000000 $(PDFLATEX) -interaction batchmode refman.tex; \ +@STARPU_BUILD_DOC_TRUE@ else \ +@STARPU_BUILD_DOC_TRUE@ break ; \ +@STARPU_BUILD_DOC_TRUE@ fi; \ +@STARPU_BUILD_DOC_TRUE@ done +@STARPU_BUILD_DOC_TRUE@ mv $(DOX_LATEX_DIR)/refman.pdf $(DOX_DIR)/$(DOX_PDF) + +@STARPU_BUILD_DOC_TRUE@starpu_config.h: $(top_srcdir)/include/starpu_config.h.in +@STARPU_BUILD_DOC_TRUE@ @$(SED) 's/#undef \(.*\)/#define \1 1/' $< > $@ + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/doc/doxygen_web_languages/chapters/version.html b/doc/doxygen_web_languages/chapters/version.html new file mode 100644 index 0000000..f504c7a --- /dev/null +++ b/doc/doxygen_web_languages/chapters/version.html @@ -0,0 +1,2 @@ +This manual documents the version 1.4.10 of StarPU. +Its contents was last updated on 2025-12-03. diff --git a/doc/doxygen_web_languages/chapters/version.sty b/doc/doxygen_web_languages/chapters/version.sty new file mode 100644 index 0000000..7527bba --- /dev/null +++ b/doc/doxygen_web_languages/chapters/version.sty @@ -0,0 +1,2 @@ +\newcommand{\STARPUUPDATED}{2025-12-03} +\newcommand{\STARPUVERSION}{1.4.10} diff --git a/doc/doxygen_web_languages/doxygen-config.cfg.in b/doc/doxygen_web_languages/doxygen-config.cfg.in new file mode 100644 index 0000000..e168081 --- /dev/null +++ b/doc/doxygen_web_languages/doxygen-config.cfg.in @@ -0,0 +1,46 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# Copyright (C) 2013-2013 Simon Archipoff +# Copyright (C) 2011-2011 Télécom Sud Paris +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +INPUT = @top_srcdir@/doc/doxygen/chapters/starpu_languages/languages_intro.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_languages/native_fortran_support.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_languages/java.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_languages/python.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_languages/openmp_runtime_support.doxy \ + @top_srcdir@/doc/doxygen/chapters/fdl_1_3.doxy \ + @top_srcdir@/doc/doxygen/chapters/files.doxy \ + @top_srcdir@/doc/doxygen/chapters/api + +EXAMPLE_PATH = @top_srcdir@/doc/doxygen \ + @top_srcdir@/doc/doxygen/chapters \ + @top_srcdir@/doc/doxygen/chapters/starpu_languages/code + +INPUT_FILTER = @top_builddir@/doc/doxygen/doxygen_filter.sh + +#LATEX_HEADER = @top_srcdir@/doc/doxygen/refman.tex + +IMAGE_PATH = @top_srcdir@/doc/doxygen/chapters/images + +GENERATE_LATEX = @DOC_GENERATE_LATEX@ + +@INCLUDE_PATH = ../../doc/doxygen/ + +HTML_OUTPUT = html_web_languages + +PROJECT_NAME = "StarPU Handbook - StarPU Language Bindings" +ALIASES += "intropage{2} = \mainpage" +ALIASES += "webforeword = \htmlonly

    Foreword

    \endhtmlonly \htmlinclude version.html \htmlinclude foreword.html \htmlonly This is a sub-part of the StarPU documentation, go here to read the whole documentation. \endhtmlonly" +ALIASES += "foreword = " diff --git a/doc/doxygen_web_languages/refman.tex b/doc/doxygen_web_languages/refman.tex new file mode 100644 index 0000000..11a9c69 --- /dev/null +++ b/doc/doxygen_web_languages/refman.tex @@ -0,0 +1,57 @@ +% StarPU --- Runtime system for heterogeneous multicore architectures. +% +% Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +% Copyright (C) 2013-2013 Simon Archipoff +% +% StarPU is free software; you can redistribute it and/or modify +% it under the terms of the GNU Lesser General Public License as published by +% the Free Software Foundation; either version 2.1 of the License, or (at +% your option) any later version. +% +% StarPU is distributed in the hope that it will be useful, but +% WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +% +% See the GNU Lesser General Public License in COPYING.LGPL for more details. +% +\newcommand\starputitle{StarPU Handbook - StarPU Language Bindings} +\setcounter{tocdepth}{2} +\input{./title.tex} + +\chapter{Organization} +\label{index} +\hypertarget{index}{} +\input{index} + +\chapter{Native Fortran Support} +\label{NativeFortranSupport} +\hypertarget{NativeFortranSupport}{} +\input{NativeFortranSupport} + +\chapter{StarPU Java Interface} +\label{StarPUJavaInterface} +\hypertarget{StarPUJavaInterface}{} +\input{StarPUJavaInterface} + +\chapter{Python Interface} +\label{PythonInterface} +\hypertarget{PythonInterface}{} +\input{PythonInterface} + +\chapter{The StarPU OpenMP Runtime Support (SORS)} +\label{OpenMPRuntimeSupport} +\hypertarget{OpenMPRuntimeSupport}{} +\input{OpenMPRuntimeSupport} + +\part{Appendix} + +\chapter{The GNU Free Documentation License} +\label{GNUFreeDocumentationLicense} +\hypertarget{GNUFreeDocumentationLicense}{} +\input{GNUFreeDocumentationLicense} + +%\part{Index} +%#\addcontentsline{toc}{chapter}{Index} +%\printindex + +\end{document} diff --git a/doc/doxygen_web_performances/Makefile.am b/doc/doxygen_web_performances/Makefile.am new file mode 100644 index 0000000..82d3233 --- /dev/null +++ b/doc/doxygen_web_performances/Makefile.am @@ -0,0 +1,96 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +DOX_DIR = $(top_builddir)/doc/doxygen_web_performances +DOX_CONFIG = $(top_srcdir)/doc/doxygen.cfg + +DOX_MAIN_DIR = doxygen_web_performances +DOX_HTML_DIR = html_web_performances +DOX_LATEX_DIR = latex +DOX_PDF = starpu_web_performances.pdf +DOX_TAG = starpu.tag +DOX_STARPU_CONFIG = starpu_config.h + +include $(top_srcdir)/doc/doxy.mk + +chapters = \ + ../doxygen/chapters/foreword.doxy \ + ../doxygen/chapters/starpu_performances/performances_intro.doxy \ + ../doxygen/chapters/starpu_performances/benchmarking_starpu.doxy \ + ../doxygen/chapters/starpu_performances/online_performance_tools.doxy \ + ../doxygen/../doxygen/chapters/starpu_performances/offline_performance_tools.doxy \ + ../doxygen/chapters/files.doxy \ + ../doxygen/chapters/fdl_1_3.doxy + +images = \ + ../doxygen/chapters/images/arbiter.png \ + ../doxygen/chapters/images/data_trace.png \ + ../doxygen/chapters/images/distrib_data.png \ + ../doxygen/chapters/images/distrib_data_histo.png \ + ../doxygen/chapters/images/paje_draw_histogram.png \ + ../doxygen/chapters/images/parallel_worker2.png \ + ../doxygen/chapters/images/runtime-par.png \ + ../doxygen/chapters/images/starpu_non_linear_memset_regression_based.png \ + ../doxygen/chapters/images/starpu_non_linear_memset_regression_based_2.png \ + ../doxygen/chapters/images/starpu_starpu_slu_lu_model_11.png \ + ../doxygen/chapters/images/starpu_chol_model_11_type.png \ + ../doxygen/chapters/images/tasks_size_overhead.png \ + ../doxygen/chapters/images/temanejo.png \ + ../doxygen/chapters/images/eclipse_installer.png \ + ../doxygen/chapters/images/eclipse_install_cdt.png \ + ../doxygen/chapters/images/eclipse_hello_build.png \ + ../doxygen/chapters/images/eclipse_hello_run.png \ + ../doxygen/chapters/images/eclipse_hello_fxt.png \ + ../doxygen/chapters/images/eclipse_hello_graph.png \ + ../doxygen/chapters/images/eclipse_hello_vite.png \ + ../doxygen/chapters/images/eclipse_hello_svg_graph.png \ + ../doxygen/chapters/images/eclipse_hello_plugin.png \ + ../doxygen/chapters/images/eclipse_hello_paje_trace.png \ + ../doxygen/chapters/images/eclipse_hello_hgraph.png \ + ../doxygen/chapters/images/eclipse_install_pde.png \ + ../doxygen/chapters/images/starpu_gflops_non_linear_memset_regression_based_energy.png \ + ../doxygen/chapters/images/starpu_log_arr.png \ + ../doxygen/chapters/images/starpu_log_list.png \ + ../doxygen/chapters/images/starpu_non_linear_memset_regression_based_energy.png \ + ../doxygen/chapters/images/starpu_power_non_linear_memset_regression_based.png \ + ../doxygen/chapters/images/starvz_visu.png \ + ../doxygen/chapters/images/starvz_visu_r.png \ + ../doxygen/chapters/images/trace_bw_heatmap.png \ + ../doxygen/chapters/images/trace_recv_use.png \ + ../doxygen/chapters/images/trace_send_use.png \ + ../doxygen/chapters/images/trace_volume_heatmap.png \ + ../doxygen/chapters/images/starpupy_handle_func_perf_pickle.png \ + ../doxygen/chapters/images/starpupy_handle_perf_pickle.png \ + ../doxygen/chapters/images/starpupy_handle_func_perf.png \ + ../doxygen/chapters/images/starpupy_handle_perf.png \ + ../doxygen/chapters/images/tasks_size_overhead_py_fut_pickle.png \ + ../doxygen/chapters/images/tasks_size_overhead_py_futur.png \ + ../doxygen/chapters/images/tasks_size_overhead_py_handle_pickle.png \ + ../doxygen/chapters/images/tasks_size_overhead_py_handle.png \ + ../doxygen/chapters/images/tasks_size_overhead_py_none.png \ + ../doxygen/chapters/images/tasks_size_overhead_py_noret_pickle.png + +if STARPU_BUILD_DOC +starpu_config.h: $(top_srcdir)/include/starpu_config.h.in + @$(SED) 's/#undef \(.*\)/#define \1 1/' $< > $@ + +dox_inputs = $(DOX_CONFIG) \ + $(chapters) \ + starpu_config.h \ + chapters/version.sty \ + chapters/version.html +endif + diff --git a/doc/doxygen_web_performances/Makefile.in b/doc/doxygen_web_performances/Makefile.in new file mode 100644 index 0000000..bfb51af --- /dev/null +++ b/doc/doxygen_web_performances/Makefile.in @@ -0,0 +1,934 @@ +# Makefile.in generated by automake 1.16.5 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2021 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +target_triplet = @target@ +@STARPU_BUILD_DOC_PDF_TRUE@@STARPU_BUILD_DOC_TRUE@am__append_1 = $(DOX_HTML_DIR) $(DOX_DIR)/$(DOX_PDF) +@STARPU_BUILD_DOC_PDF_FALSE@@STARPU_BUILD_DOC_TRUE@am__append_2 = $(DOX_HTML_DIR) +@STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@am__append_3 = $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$(DOX_HTML_DIR) +@STARPU_AVAILABLE_DOC_PDF_TRUE@@STARPU_BUILD_DOC_FALSE@am__append_4 = $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$(DOX_PDF) +@STARPU_BUILD_DOC_TRUE@am__append_5 = \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty \ +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.html + +subdir = doc/doxygen_web_performances +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ + $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ + $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ + $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/src/common/config.h \ + $(top_builddir)/src/common/config-src-build.h \ + $(top_builddir)/include/starpu_config.h \ + $(top_builddir)/starpurm/include/starpurm_config.h +CONFIG_CLEAN_FILES = doxygen-config.cfg +CONFIG_CLEAN_VPATH_FILES = +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +SOURCES = +DIST_SOURCES = +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__installdirs = "$(DESTDIR)$(txtdir)" +DATA = $(txt_DATA) +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +am__DIST_COMMON = $(srcdir)/Makefile.in \ + $(srcdir)/doxygen-config.cfg.in $(top_srcdir)/doc/doxy.mk +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +pkglibdir = @pkglibdir@ +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +APP_CFLAGS = @APP_CFLAGS@ +APP_CXXFLAGS = @APP_CXXFLAGS@ +APP_FCFLAGS = @APP_FCFLAGS@ +APP_FFLAGS = @APP_FFLAGS@ +AR = @AR@ +AS = @AS@ +ATLASDIR = @ATLASDIR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BLAS_LIB = @BLAS_LIB@ +BLAS_LIBS = @BLAS_LIBS@ +BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ +BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CC_OR_MPICC = @CC_OR_MPICC@ +CC_OR_NVCC = @CC_OR_NVCC@ +CFLAGS = @CFLAGS@ +COVERAGE = @COVERAGE@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CSCOPE = @CSCOPE@ +CTAGS = @CTAGS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DGELS_LIBS = @DGELS_LIBS@ +DLB_CFLAGS = @DLB_CFLAGS@ +DLB_LIBS = @DLB_LIBS@ +DLLTOOL = @DLLTOOL@ +DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +ECLIPSE = @ECLIPSE@ +EGREP = @EGREP@ +ETAGS = @ETAGS@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FC = @FC@ +FCFLAGS = @FCFLAGS@ +FFLAGS = @FFLAGS@ +FFTWF_CFLAGS = @FFTWF_CFLAGS@ +FFTWF_LIBS = @FFTWF_LIBS@ +FFTWL_CFLAGS = @FFTWL_CFLAGS@ +FFTWL_LIBS = @FFTWL_LIBS@ +FFTW_CFLAGS = @FFTW_CFLAGS@ +FFTW_LIBS = @FFTW_LIBS@ +FGREP = @FGREP@ +FILECMD = @FILECMD@ +FXTDIR = @FXTDIR@ +FXT_CFLAGS = @FXT_CFLAGS@ +FXT_LDFLAGS = @FXT_LDFLAGS@ +FXT_LIBS = @FXT_LIBS@ +GDB = @GDB@ +GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ +GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ +GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ +GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ +GOTODIR = @GOTODIR@ +GREP = @GREP@ +HAVE_CXX11 = @HAVE_CXX11@ +HAVE_FFTWFL = @HAVE_FFTWFL@ +HELP2MAN = @HELP2MAN@ +HIPCC = @HIPCC@ +HIPCCFLAGS = @HIPCCFLAGS@ +HIPCONFIG = @HIPCONFIG@ +HWLOC_CFLAGS = @HWLOC_CFLAGS@ +HWLOC_LIBS = @HWLOC_LIBS@ +HWLOC_REQUIRES = @HWLOC_REQUIRES@ +ICC = @ICC@ +ICC_ARGS = @ICC_ARGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JULIA = @JULIA@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ +LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ +LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ +LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ +LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ +LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ +LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ +LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ +LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ +LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ +LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ +LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ +LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ +LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ +LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ +LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ +LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ +LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ +LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ +LIBSTARPU_LINK = @LIBSTARPU_LINK@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAGMA_CFLAGS = @MAGMA_CFLAGS@ +MAGMA_LIBS = @MAGMA_LIBS@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPICC_LDFLAGS = @MPICC_LDFLAGS@ +MPICXX = @MPICXX@ +MPIEXEC = @MPIEXEC@ +MPIEXEC_ARGS = @MPIEXEC_ARGS@ +MPIFORT = @MPIFORT@ +MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ +MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ +NM = @NM@ +NMAD_CFLAGS = @NMAD_CFLAGS@ +NMAD_LIBS = @NMAD_LIBS@ +NMEDIT = @NMEDIT@ +NVCC = @NVCC@ +NVCCFLAGS = @NVCCFLAGS@ +NVCC_CC = @NVCC_CC@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ +OPENBLAS_LIBS = @OPENBLAS_LIBS@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PAPI_CFLAGS = @PAPI_CFLAGS@ +PAPI_LIBS = @PAPI_LIBS@ +PARALLEL = @PARALLEL@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PKG_CONFIG = @PKG_CONFIG@ +POTI_CFLAGS = @POTI_CFLAGS@ +POTI_LIBS = @POTI_LIBS@ +PROG_CLANG = @PROG_CLANG@ +PROG_DATE = @PROG_DATE@ +PROG_FIND = @PROG_FIND@ +PROG_STAT = @PROG_STAT@ +PYTHON = @PYTHON@ +PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ +PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ +PYTHON_VERSION = @PYTHON_VERSION@ +RANLIB = @RANLIB@ +REALBASH = @REALBASH@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ +SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ +SIMGRID_LIBS = @SIMGRID_LIBS@ +SIMGRID_MC = @SIMGRID_MC@ +SLIC_CONFIG = @SLIC_CONFIG@ +SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ +SOCL_VENDORS = @SOCL_VENDORS@ +STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ +STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ +STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ +STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ +STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ +STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ +STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ +STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ +STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ +STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ +STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ +STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ +STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ +STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ +STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ +STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ +STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ +STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ +STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ +STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ +STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ +STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ +STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ +STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ +STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ +STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ +STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ +STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ +STARPU_LIB_PATH = @STARPU_LIB_PATH@ +STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ +STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ +STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ +STARPU_MS_LIB = @STARPU_MS_LIB@ +STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ +STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ +STARPU_OPENBLAS = @STARPU_OPENBLAS@ +STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ +STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ +STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ +STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ +STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ +STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ +STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ +STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ +STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ +STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ +STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ +STARPU_SRC_DIR = @STARPU_SRC_DIR@ +STARPU_USE_CPU = @STARPU_USE_CPU@ +STARPU_USE_CUDA = @STARPU_USE_CUDA@ +STARPU_USE_FXT = @STARPU_USE_FXT@ +STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ +STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ +STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ +STRIP = @STRIP@ +VERSION = @VERSION@ +XMKMF = @XMKMF@ +X_CFLAGS = @X_CFLAGS@ +X_EXTRA_LIBS = @X_EXTRA_LIBS@ +X_LIBS = @X_LIBS@ +X_PRE_LIBS = @X_PRE_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_ct_F77 = @ac_ct_F77@ +ac_ct_FC = @ac_ct_FC@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +doxygencommand = @doxygencommand@ +dvidir = @dvidir@ +eclipsepath = @eclipsepath@ +epstopdfcommand = @epstopdfcommand@ +exec_prefix = @exec_prefix@ +gitcommand = @gitcommand@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +hwloccalccommand = @hwloccalccommand@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +juliapath = @juliapath@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +mpicc_path = @mpicc_path@ +mpicxx_path = @mpicxx_path@ +mpiexec_path = @mpiexec_path@ +mpifort_path = @mpifort_path@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +pdflatexcommand = @pdflatexcommand@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +runstatedir = @runstatedir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target = @target@ +target_alias = @target_alias@ +target_cpu = @target_cpu@ +target_os = @target_os@ +target_vendor = @target_vendor@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +DOX_DIR = $(top_builddir)/doc/doxygen_web_performances +DOX_CONFIG = $(top_srcdir)/doc/doxygen.cfg +DOX_MAIN_DIR = doxygen_web_performances +DOX_HTML_DIR = html_web_performances +DOX_LATEX_DIR = latex +DOX_PDF = starpu_web_performances.pdf +DOX_TAG = starpu.tag +DOX_STARPU_CONFIG = starpu_config.h + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +DOXYGEN = doxygen +PDFLATEX = pdflatex +MAKEINDEX = makeindex +txtdir = $(docdir)/manual +EXTRA_DIST = $(am__append_1) $(am__append_2) $(am__append_3) \ + $(am__append_4) $(am__append_5) refman.tex $(chapters) \ + $(images) +@STARPU_AVAILABLE_DOC_PDF_TRUE@@STARPU_BUILD_DOC_FALSE@txt_DATA = $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$(DOX_PDF) +@STARPU_BUILD_DOC_PDF_TRUE@@STARPU_BUILD_DOC_TRUE@txt_DATA = $(DOX_DIR)/$(DOX_PDF) +@STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@DOX_HTML_SRCDIR = $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$(DOX_HTML_DIR) +@STARPU_BUILD_DOC_TRUE@DOX_HTML_SRCDIR = $(DOX_HTML_DIR) +@STARPU_BUILD_DOC_TRUE@CLEANFILES = $(DOX_TAG) $(DOX_STARPU_CONFIG) \ +@STARPU_BUILD_DOC_TRUE@ -r \ +@STARPU_BUILD_DOC_TRUE@ $(DOX_HTML_DIR) \ +@STARPU_BUILD_DOC_TRUE@ $(DOX_LATEX_DIR) \ +@STARPU_BUILD_DOC_TRUE@ $(DOX_DIR)/$(DOX_PDF) + +chapters = \ + ../doxygen/chapters/foreword.doxy \ + ../doxygen/chapters/starpu_performances/performances_intro.doxy \ + ../doxygen/chapters/starpu_performances/benchmarking_starpu.doxy \ + ../doxygen/chapters/starpu_performances/online_performance_tools.doxy \ + ../doxygen/../doxygen/chapters/starpu_performances/offline_performance_tools.doxy \ + ../doxygen/chapters/files.doxy \ + ../doxygen/chapters/fdl_1_3.doxy + +images = \ + ../doxygen/chapters/images/arbiter.png \ + ../doxygen/chapters/images/data_trace.png \ + ../doxygen/chapters/images/distrib_data.png \ + ../doxygen/chapters/images/distrib_data_histo.png \ + ../doxygen/chapters/images/paje_draw_histogram.png \ + ../doxygen/chapters/images/parallel_worker2.png \ + ../doxygen/chapters/images/runtime-par.png \ + ../doxygen/chapters/images/starpu_non_linear_memset_regression_based.png \ + ../doxygen/chapters/images/starpu_non_linear_memset_regression_based_2.png \ + ../doxygen/chapters/images/starpu_starpu_slu_lu_model_11.png \ + ../doxygen/chapters/images/starpu_chol_model_11_type.png \ + ../doxygen/chapters/images/tasks_size_overhead.png \ + ../doxygen/chapters/images/temanejo.png \ + ../doxygen/chapters/images/eclipse_installer.png \ + ../doxygen/chapters/images/eclipse_install_cdt.png \ + ../doxygen/chapters/images/eclipse_hello_build.png \ + ../doxygen/chapters/images/eclipse_hello_run.png \ + ../doxygen/chapters/images/eclipse_hello_fxt.png \ + ../doxygen/chapters/images/eclipse_hello_graph.png \ + ../doxygen/chapters/images/eclipse_hello_vite.png \ + ../doxygen/chapters/images/eclipse_hello_svg_graph.png \ + ../doxygen/chapters/images/eclipse_hello_plugin.png \ + ../doxygen/chapters/images/eclipse_hello_paje_trace.png \ + ../doxygen/chapters/images/eclipse_hello_hgraph.png \ + ../doxygen/chapters/images/eclipse_install_pde.png \ + ../doxygen/chapters/images/starpu_gflops_non_linear_memset_regression_based_energy.png \ + ../doxygen/chapters/images/starpu_log_arr.png \ + ../doxygen/chapters/images/starpu_log_list.png \ + ../doxygen/chapters/images/starpu_non_linear_memset_regression_based_energy.png \ + ../doxygen/chapters/images/starpu_power_non_linear_memset_regression_based.png \ + ../doxygen/chapters/images/starvz_visu.png \ + ../doxygen/chapters/images/starvz_visu_r.png \ + ../doxygen/chapters/images/trace_bw_heatmap.png \ + ../doxygen/chapters/images/trace_recv_use.png \ + ../doxygen/chapters/images/trace_send_use.png \ + ../doxygen/chapters/images/trace_volume_heatmap.png \ + ../doxygen/chapters/images/starpupy_handle_func_perf_pickle.png \ + ../doxygen/chapters/images/starpupy_handle_perf_pickle.png \ + ../doxygen/chapters/images/starpupy_handle_func_perf.png \ + ../doxygen/chapters/images/starpupy_handle_perf.png \ + ../doxygen/chapters/images/tasks_size_overhead_py_fut_pickle.png \ + ../doxygen/chapters/images/tasks_size_overhead_py_futur.png \ + ../doxygen/chapters/images/tasks_size_overhead_py_handle_pickle.png \ + ../doxygen/chapters/images/tasks_size_overhead_py_handle.png \ + ../doxygen/chapters/images/tasks_size_overhead_py_none.png \ + ../doxygen/chapters/images/tasks_size_overhead_py_noret_pickle.png + +@STARPU_BUILD_DOC_TRUE@dox_inputs = $(DOX_CONFIG) \ +@STARPU_BUILD_DOC_TRUE@ $(chapters) \ +@STARPU_BUILD_DOC_TRUE@ starpu_config.h \ +@STARPU_BUILD_DOC_TRUE@ chapters/version.sty \ +@STARPU_BUILD_DOC_TRUE@ chapters/version.html + +all: all-am + +.SUFFIXES: +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/doc/doxy.mk $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign doc/doxygen_web_performances/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign doc/doxygen_web_performances/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; +$(top_srcdir)/doc/doxy.mk $(am__empty): + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): +doxygen-config.cfg: $(top_builddir)/config.status $(srcdir)/doxygen-config.cfg.in + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs +install-txtDATA: $(txt_DATA) + @$(NORMAL_INSTALL) + @list='$(txt_DATA)'; test -n "$(txtdir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(txtdir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(txtdir)" || exit 1; \ + fi; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; \ + done | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(txtdir)'"; \ + $(INSTALL_DATA) $$files "$(DESTDIR)$(txtdir)" || exit $$?; \ + done + +uninstall-txtDATA: + @$(NORMAL_UNINSTALL) + @list='$(txt_DATA)'; test -n "$(txtdir)" || list=; \ + files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ + dir='$(DESTDIR)$(txtdir)'; $(am__uninstall_files_from_dir) +tags TAGS: + +ctags CTAGS: + +cscope cscopelist: + +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile $(DATA) +installdirs: + for dir in "$(DESTDIR)$(txtdir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +@STARPU_AVAILABLE_DOC_FALSE@@STARPU_BUILD_DOC_FALSE@install-exec-hook: +@STARPU_AVAILABLE_DOC_FALSE@@STARPU_BUILD_DOC_FALSE@uninstall-hook: +clean: clean-am + +clean-am: clean-generic clean-libtool mostlyclean-am + +distclean: distclean-am + -rm -f Makefile +distclean-am: clean-am distclean-generic + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: install-txtDATA + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: + @$(NORMAL_INSTALL) + $(MAKE) $(AM_MAKEFLAGS) install-exec-hook +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-generic mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-txtDATA + @$(NORMAL_INSTALL) + $(MAKE) $(AM_MAKEFLAGS) uninstall-hook +.MAKE: install-am install-exec-am install-strip uninstall-am + +.PHONY: all all-am check check-am clean clean-generic clean-libtool \ + cscopelist-am ctags-am distclean distclean-generic \ + distclean-libtool distdir dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-exec-hook \ + install-html install-html-am install-info install-info-am \ + install-man install-pdf install-pdf-am install-ps \ + install-ps-am install-strip install-txtDATA installcheck \ + installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-generic \ + mostlyclean-libtool pdf pdf-am ps ps-am tags-am uninstall \ + uninstall-am uninstall-hook uninstall-txtDATA + +.PRECIOUS: Makefile + + +@STARPU_BUILD_DOC_PDF_TRUE@@STARPU_BUILD_DOC_TRUE@all: $(DOX_HTML_DIR) $(DOX_DIR)/$(DOX_PDF) +@STARPU_BUILD_DOC_PDF_FALSE@@STARPU_BUILD_DOC_TRUE@all: $(DOX_HTML_DIR) +@STARPU_BUILD_DOC_TRUE@install-exec-hook: $(DOX_HTML_DIR) +@STARPU_BUILD_DOC_TRUE@ @$(MKDIR_P) $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) +@STARPU_BUILD_DOC_TRUE@ @(cd $(DOX_HTML_SRCDIR) && $(PROG_FIND) . -type f -exec $(INSTALL_DATA) {} $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) \;) +@STARPU_BUILD_DOC_TRUE@uninstall-hook: +@STARPU_BUILD_DOC_TRUE@ @rm -rf $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) +@STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@install-exec-hook: +@STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@ @$(MKDIR_P) $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) +@STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@ @(cd $(DOX_HTML_SRCDIR) && $(PROG_FIND) . -type f -exec $(INSTALL_DATA) {} $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) \;) +@STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@uninstall-hook: +@STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@ @rm -rf $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) + +@STARPU_BUILD_DOC_TRUE@chapters/version.sty: $(chapters) +@STARPU_BUILD_DOC_TRUE@ $(MKDIR_P) $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters +@STARPU_BUILD_DOC_TRUE@ @for f in $(chapters) ; do \ +@STARPU_BUILD_DOC_TRUE@ if test -f $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$$f ; then $(PROG_STAT) --format=%Y $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$$f ; fi \ +@STARPU_BUILD_DOC_TRUE@ done | sort -r | head -1 > timestamp_sty +@STARPU_BUILD_DOC_TRUE@ @if test -s timestamp_sty ; then \ +@STARPU_BUILD_DOC_TRUE@ LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_sty` +"%F" > timestamp_sty_updated ;\ +@STARPU_BUILD_DOC_TRUE@ LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_sty` +"%B %Y" > timestamp_sty_updated_month ;\ +@STARPU_BUILD_DOC_TRUE@ fi +@STARPU_BUILD_DOC_TRUE@ @if test -s timestamp_sty_updated ; then \ +@STARPU_BUILD_DOC_TRUE@ echo ':newcommand{:STARPUUPDATED}{'`cat timestamp_sty_updated`'}' > $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty;\ +@STARPU_BUILD_DOC_TRUE@ else \ +@STARPU_BUILD_DOC_TRUE@ echo ':newcommand{:STARPUUPDATED}{unknown date}' > $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty;\ +@STARPU_BUILD_DOC_TRUE@ fi +@STARPU_BUILD_DOC_TRUE@ @echo ':newcommand{:STARPUVERSION}{$(VERSION)}' >> $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty +@STARPU_BUILD_DOC_TRUE@ @$(SED) -i 's/:/\\/g' $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty +@STARPU_BUILD_DOC_TRUE@ @for f in timestamp_sty timestamp_sty_updated timestamp_sty_updated_month ; do \ +@STARPU_BUILD_DOC_TRUE@ if test -f $$f ; then $(RM) $$f ; fi ;\ +@STARPU_BUILD_DOC_TRUE@ done + +@STARPU_BUILD_DOC_TRUE@chapters/version.html: $(chapters) $(images) +@STARPU_BUILD_DOC_TRUE@ @for f in $(chapters) ; do \ +@STARPU_BUILD_DOC_TRUE@ if test -f $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$$f ; then $(PROG_STAT) --format=%Y $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$$f ; fi \ +@STARPU_BUILD_DOC_TRUE@ done | sort -r | head -1 > timestamp_html +@STARPU_BUILD_DOC_TRUE@ @if test -s timestamp_html ; then \ +@STARPU_BUILD_DOC_TRUE@ LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_html` +"%F" > timestamp_html_updated ;\ +@STARPU_BUILD_DOC_TRUE@ LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_html` +"%B %Y" > timestamp_html_updated_month ;\ +@STARPU_BUILD_DOC_TRUE@ fi +@STARPU_BUILD_DOC_TRUE@ @echo "This manual documents the version $(VERSION) of StarPU." > $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.html +@STARPU_BUILD_DOC_TRUE@ @if test -s timestamp_html_updated ; then \ +@STARPU_BUILD_DOC_TRUE@ echo "Its contents was last updated on "`cat timestamp_html_updated`"." >> $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.html;\ +@STARPU_BUILD_DOC_TRUE@ else \ +@STARPU_BUILD_DOC_TRUE@ echo "Its contents was last updated on unknown_date." >> $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.html;\ +@STARPU_BUILD_DOC_TRUE@ fi +@STARPU_BUILD_DOC_TRUE@ @for f in timestamp_html timestamp_html_updated timestamp_html_updated_month ; do \ +@STARPU_BUILD_DOC_TRUE@ if test -f $$f ; then $(RM) $$f ; fi ;\ +@STARPU_BUILD_DOC_TRUE@ done + +@STARPU_BUILD_DOC_TRUE@doxy: +@STARPU_BUILD_DOC_TRUE@ @rm -fr $(DOX_HTML_DIR) $(DOX_LATEX_DIR) +@STARPU_BUILD_DOC_TRUE@ @$(DOXYGEN) $(DOX_CONFIG) + +@STARPU_BUILD_DOC_TRUE@$(DOX_HTML_DIR): $(DOX_TAG) +@STARPU_BUILD_DOC_TRUE@ @$(MKDIR_P) $(DOX_HTML_DIR) + +@STARPU_BUILD_DOC_TRUE@$(DOX_TAG): $(dox_inputs) +@STARPU_BUILD_DOC_TRUE@ @rm -fr $(DOX_HTML_DIR) $(DOX_LATEX_DIR) +@STARPU_BUILD_DOC_TRUE@ @$(DOXYGEN) $(DOX_CONFIG) +@STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_HTML_DIR)/DocOrganization.html ; then $(SED) -i 's/ModuleDocumentation <\/li>/Modules<\/a>/' $(DOX_HTML_DIR)/DocOrganization.html ; fi +@STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_HTML_DIR)/DocOrganization.html ; then $(SED) -i 's/FileDocumentation <\/li>/Files<\/a>/' $(DOX_HTML_DIR)/DocOrganization.html ; fi +@STARPU_BUILD_DOC_TRUE@ # comment for the line below: what we really want to do is to remove the line, but dy doing so, it avoids opening the interactive menu when browsing files +@STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_HTML_DIR)/navtreedata.js ; then $(SED) -i 's/\[ "Files", "Files.html", null \]/\[ "", "Files.html", null \]/' $(DOX_HTML_DIR)/navtreedata.js ; fi +@STARPU_BUILD_DOC_TRUE@ @$(SED) -i 's/.*"Files.html".*//' $(DOX_HTML_DIR)/pages.html +@STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_LATEX_DIR)/main.tex ; then mv $(DOX_LATEX_DIR)/main.tex $(DOX_LATEX_DIR)/index.tex ; fi +@STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_LATEX_DIR)/refman.tex ; then $(SED) -i '/\\begin{titlepage}/,$$d' $(DOX_LATEX_DIR)/refman.tex ; fi +@STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_LATEX_DIR)/refman.tex ; then cat $(top_srcdir)/doc/$(DOX_MAIN_DIR)/refman.tex >> $(DOX_LATEX_DIR)/refman.tex ; fi +@STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/doc/sectionNumbering.py $(top_builddir)/doc/$(DOX_MAIN_DIR) $(DOX_HTML_DIR) + +@STARPU_BUILD_DOC_TRUE@$(DOX_DIR)/$(DOX_PDF): $(DOX_TAG) refman.tex $(images) +@STARPU_BUILD_DOC_TRUE@ $(MKDIR_P) $(DOX_LATEX_DIR) +@STARPU_BUILD_DOC_TRUE@ @cp $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty $(DOX_LATEX_DIR) +@STARPU_BUILD_DOC_TRUE@ @cp $(top_srcdir)/doc/title.tex $(DOX_LATEX_DIR) +@STARPU_BUILD_DOC_TRUE@ @if test -f $(top_srcdir)/doc/$(DOX_MAIN_DIR)/modules.tex ; then cp $(top_srcdir)/doc/$(DOX_MAIN_DIR)/modules.tex $(DOX_LATEX_DIR) ; fi +@STARPU_BUILD_DOC_TRUE@ @echo $(PDFLATEX) $(DOX_LATEX_DIR)/refman.tex +@STARPU_BUILD_DOC_TRUE@ @cd $(DOX_LATEX_DIR) ;\ +@STARPU_BUILD_DOC_TRUE@ rm -f *.aux *.toc *.idx *.ind *.ilg *.log *.out ;\ +@STARPU_BUILD_DOC_TRUE@ for f in group__API__* ; do sed -i '1 i \\\clearpage' $$f ; done ;\ +@STARPU_BUILD_DOC_TRUE@ if test -f ExecutionConfigurationThroughEnvironmentVariables.tex ; then $(SED) -i -e 's/__env__/\\_Environment Variables!/' -e 's/\\-\\_\\-\\-\\_\\-env\\-\\_\\-\\-\\_\\-//' ExecutionConfigurationThroughEnvironmentVariables.tex ; fi ;\ +@STARPU_BUILD_DOC_TRUE@ if test -f CompilationConfiguration.tex ; then $(SED) -i -e 's/__configure__/\\_Configure Options!/' -e 's/\\-\\_\\-\\-\\_\\-configure\\-\\_\\-\\-\\_\\-//' CompilationConfiguration.tex ; fi ;\ +@STARPU_BUILD_DOC_TRUE@ if test -f DocOrganization.tex ; then $(SED) -i s'/\\item Module\\.Documentation/\\item \\hyperlink{ModuleDocumentation}{Module Documentation}/' DocOrganization.tex ; fi ;\ +@STARPU_BUILD_DOC_TRUE@ if test -f DocOrganization.tex ; then $(SED) -i s'/\\item File\\.Documentation/\\item \\hyperlink{FileDocumentation}{File Documentation}/' DocOrganization.tex ; fi ;\ +@STARPU_BUILD_DOC_TRUE@ max_print_line=1000000 $(PDFLATEX) -interaction batchmode refman.tex ;\ +@STARPU_BUILD_DOC_TRUE@ ! < refman.log grep -v group__ | grep -v _amgrp | grep -v deprecated__ | grep "multiply defined" || exit 1 ;\ +@STARPU_BUILD_DOC_TRUE@ $(MAKEINDEX) refman.idx ;\ +@STARPU_BUILD_DOC_TRUE@ max_print_line=1000000 $(PDFLATEX) -interaction batchmode refman.tex ;\ +@STARPU_BUILD_DOC_TRUE@ for i in $(shell seq 1 5); do \ +@STARPU_BUILD_DOC_TRUE@ if $(EGREP) 'Rerun (LaTeX|to get cross-references right)' refman.log > /dev/null 2>&1; then \ +@STARPU_BUILD_DOC_TRUE@ max_print_line=1000000 $(PDFLATEX) -interaction batchmode refman.tex; \ +@STARPU_BUILD_DOC_TRUE@ else \ +@STARPU_BUILD_DOC_TRUE@ break ; \ +@STARPU_BUILD_DOC_TRUE@ fi; \ +@STARPU_BUILD_DOC_TRUE@ done +@STARPU_BUILD_DOC_TRUE@ mv $(DOX_LATEX_DIR)/refman.pdf $(DOX_DIR)/$(DOX_PDF) + +@STARPU_BUILD_DOC_TRUE@starpu_config.h: $(top_srcdir)/include/starpu_config.h.in +@STARPU_BUILD_DOC_TRUE@ @$(SED) 's/#undef \(.*\)/#define \1 1/' $< > $@ + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/doc/doxygen_web_performances/chapters/version.html b/doc/doxygen_web_performances/chapters/version.html new file mode 100644 index 0000000..f504c7a --- /dev/null +++ b/doc/doxygen_web_performances/chapters/version.html @@ -0,0 +1,2 @@ +This manual documents the version 1.4.10 of StarPU. +Its contents was last updated on 2025-12-03. diff --git a/doc/doxygen_web_performances/chapters/version.sty b/doc/doxygen_web_performances/chapters/version.sty new file mode 100644 index 0000000..7527bba --- /dev/null +++ b/doc/doxygen_web_performances/chapters/version.sty @@ -0,0 +1,2 @@ +\newcommand{\STARPUUPDATED}{2025-12-03} +\newcommand{\STARPUVERSION}{1.4.10} diff --git a/doc/doxygen_web_performances/doxygen-config.cfg.in b/doc/doxygen_web_performances/doxygen-config.cfg.in new file mode 100644 index 0000000..b3c2c11 --- /dev/null +++ b/doc/doxygen_web_performances/doxygen-config.cfg.in @@ -0,0 +1,44 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# Copyright (C) 2013-2013 Simon Archipoff +# Copyright (C) 2011-2011 Télécom Sud Paris +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +INPUT = @top_srcdir@/doc/doxygen/chapters/starpu_performances/performances_intro.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_performances/benchmarking_starpu.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_performances/online_performance_tools.doxy \ + @top_srcdir@/doc/doxygen/chapters/starpu_performances/offline_performance_tools.doxy \ + @top_srcdir@/doc/doxygen/chapters/fdl_1_3.doxy \ + @top_srcdir@/doc/doxygen/chapters/files.doxy \ + @top_srcdir@/doc/doxygen/chapters/api + +EXAMPLE_PATH = @top_srcdir@/doc/doxygen \ + @top_srcdir@/doc/doxygen/chapters + +INPUT_FILTER = @top_builddir@/doc/doxygen/doxygen_filter.sh + +#LATEX_HEADER = @top_srcdir@/doc/doxygen/refman.tex + +IMAGE_PATH = @top_srcdir@/doc/doxygen/chapters/images + +GENERATE_LATEX = @DOC_GENERATE_LATEX@ + +@INCLUDE_PATH = ../../doc/doxygen/ + +HTML_OUTPUT = html_web_performances + +PROJECT_NAME = "StarPU Handbook - StarPU Performances" +ALIASES += "intropage{2} = \mainpage" +ALIASES += "webforeword = \htmlonly

    Foreword

    \endhtmlonly \htmlinclude version.html \htmlinclude foreword.html \htmlonly This is a sub-part of the StarPU documentation, go here to read the whole documentation. \endhtmlonly" +ALIASES += "foreword = " diff --git a/doc/doxygen_web_performances/refman.tex b/doc/doxygen_web_performances/refman.tex new file mode 100644 index 0000000..de13dcc --- /dev/null +++ b/doc/doxygen_web_performances/refman.tex @@ -0,0 +1,52 @@ +% StarPU --- Runtime system for heterogeneous multicore architectures. +% +% Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +% Copyright (C) 2013-2013 Simon Archipoff +% +% StarPU is free software; you can redistribute it and/or modify +% it under the terms of the GNU Lesser General Public License as published by +% the Free Software Foundation; either version 2.1 of the License, or (at +% your option) any later version. +% +% StarPU is distributed in the hope that it will be useful, but +% WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +% +% See the GNU Lesser General Public License in COPYING.LGPL for more details. +% +\newcommand\starputitle{StarPU Handbook - StarPU Performances} +\setcounter{tocdepth}{2} +\input{./title.tex} + +\chapter{Organization} +\label{index} +\hypertarget{index}{} +\input{index} + +\chapter{Benchmarking StarPU} +\label{BenchmarkingStarPU} +\hypertarget{BenchmarkingStarPU}{} +\input{BenchmarkingStarPU} + +\chapter{Online Performance Tools} +\label{OnlinePerformanceTools} +\hypertarget{OnlinePerformanceTools}{} +\input{OnlinePerformanceTools} + +\chapter{Offline Performance Tools} +\label{OfflinePerformanceTools} +\hypertarget{OfflinePerformanceTools}{} +\input{OfflinePerformanceTools} + +\part{Appendix} + +\chapter{The GNU Free Documentation License} +\label{GNUFreeDocumentationLicense} +\hypertarget{GNUFreeDocumentationLicense}{} +\input{GNUFreeDocumentationLicense} + +%\part{Index} +%#\addcontentsline{toc}{chapter}{Index} +%\printindex + +\end{document} diff --git a/doc/extractHeadline.sh b/doc/extractHeadline.sh new file mode 100755 index 0000000..dce77e7 --- /dev/null +++ b/doc/extractHeadline.sh @@ -0,0 +1,41 @@ +#!/bin/bash +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2022-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +root=$(dirname $0) + +( + echo "** Full documentation" + echo " - [[./starpu.pdf][PDF]] - [[./html/][HTML]]" + echo "** Parts of the documentation" + for doc in doxygen_web_introduction doxygen_web_installation doxygen_web_basics doxygen_web_applications doxygen_web_performances doxygen_web_faq doxygen_web_languages doxygen_web_extensions + do + x=$(echo $doc | sed 's/.*_web_//') + + if test -f $root/doxygen/chapters/starpu_$x/${x}_intro.doxy + then + headline=$(grep -A2 webforeword $root/doxygen/chapters/starpu_$x/${x}_intro.doxy | tail -1) + echo "- $x" + if test -n "$headline" + then + echo " - $headline" + fi + echo " - [[./starpu_web_$x.pdf][PDF]] - [[./html_web_$x/][HTML]]" + fi + done + echo "** Developers documentation" + echo " - [[./starpu_dev.pdf][PDF]] - [[./html_dev/][HTML]]" +) > ./README.org diff --git a/doc/fixLinks.sh b/doc/fixLinks.sh new file mode 100755 index 0000000..ee61fc7 --- /dev/null +++ b/doc/fixLinks.sh @@ -0,0 +1,58 @@ +#!/bin/bash +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2023-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +root=$(dirname $0) +root_src=$root +root_build=$1 + +files=$(find $root_build -name "*html") +if test "$files" == "" +then + # there is no html files to process + exit +fi + +for d in $root_src/doxygen/chapters/starpu_* +do + for f in $(find $d -name "*.doxy") + do + #echo $f + part=$(basename $(dirname $f)) + link=$(grep -F "\page" $f | awk '{print $3}') + if test -z "$link" + then + continue + fi + + x1=$(echo $part | sed 's/starpu/doxygen_web/') + x2=$(echo $part | sed 's/starpu/html_web/') + title=$(grep -F "\page" $f | sed 's;..! .page '$link';;') + #echo $part + #echo $link + #echo $f + #echo $title + + # we replace the link with the correct link in the installation directory, it will not work in the build directory + # there we would have to use ../../$x1/$x2/${link}.html + for ff in $(grep -lrs "Chapter $link" $(find $root_build -name "*html")) + do + script=$(mktemp) + echo "sed -i 's;Chapter "$link";Chapter "$title";' $ff" > $script + . $script + done + done +done diff --git a/doc/sectionNumbering.py b/doc/sectionNumbering.py new file mode 100755 index 0000000..d19cf9d --- /dev/null +++ b/doc/sectionNumbering.py @@ -0,0 +1,63 @@ +#!/usr/bin/python3 +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2021-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +import os +import operator +import sys + +files = {} + +with open(sys.argv[1]+"/doxygen-config.cfg", "r", encoding="utf-8") as fin: + for line in fin.readlines(): + if ".doxy" in line and not "foreword.doxy" in line: + for x in line.split(" "): + xx = x.strip() + if ".doxy" in xx: + with open(xx, "r", encoding="utf-8") as fin: + for line in fin.readlines(): + if "\page" in line: + line = line.replace("/*! \page ", "").strip() + files[xx] = line[0:line.index(" ")]+".html" + +htmlfiles = ["index.html"] +htmlfiles.extend(files.values()) + +htmldir=sys.argv[2]+"/" + +chapter=0 +for x in htmlfiles: + chapter+=1 + section=0 + with open(htmldir+x, "r", encoding="utf-8") as fin: + with open(htmldir+x+".count.html", "w", encoding="utf-8") as fout: + for line in fin.readlines(): + if not "Foreword" in line: + if "
    " in line: + line = line.replace("
    ", "
    "+str(chapter)+". ") + if "

    " in line: + section += 1 + line = line.replace("

    ", "

    " + str(chapter) + "." + str(section)) + subsection = 0 + if "

    " in line: + subsection += 1 + line = line.replace("

    ", "

    " + str(chapter) + "." + str(section) + "." + str(subsection)) + subsubsection = 0 + if "

    " in line: + subsubsection += 1 + line = line.replace("

    ", "

    " + str(chapter) + "." + str(section) + "." + str(subsection) + "." + str(subsubsection)) + fout.write(line) + os.rename(htmldir+x+".count.html", htmldir+x) diff --git a/doc/title.tex b/doc/title.tex new file mode 100644 index 0000000..f4fe246 --- /dev/null +++ b/doc/title.tex @@ -0,0 +1,54 @@ +% StarPU --- Runtime system for heterogeneous multicore architectures. +% +% Copyright (C) 2022-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +% +% StarPU is free software; you can redistribute it and/or modify +% it under the terms of the GNU Lesser General Public License as published by +% the Free Software Foundation; either version 2.1 of the License, or (at +% your option) any later version. +% +% StarPU is distributed in the hope that it will be useful, but +% WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +% +% See the GNU Lesser General Public License in COPYING.LGPL for more details. +% +\input{./version.sty} +\setlength{\parskip}{0pt} +\begin{titlepage} +\vspace*{4cm} +{\Huge \textbf{\starputitle}}\\ +\rule{\textwidth}{1.5mm} +\begin{flushright} +{\Large for StarPU \STARPUVERSION} +\end{flushright} +\rule{\textwidth}{1mm} +~\\ +\vspace*{15cm} +\begin{flushright} +Generated by Doxygen. +\end{flushright} +\end{titlepage} + +\begin{figure}[p] +This manual documents the usage of StarPU version \STARPUVERSION. Its contents +was last updated on \STARPUUPDATED.\\ + +Copyright © 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + +\medskip + +\begin{quote} +Permission is granted to copy, distribute and/or modify this document +under the terms of the GNU Free Documentation License, Version 1.3 or +any later version published by the Free Software Foundation; with no +Invariant Sections, no Front-Cover Texts, and no Back-Cover Texts. A +copy of the license is included in the section entitled “GNU Free +Documentation License”. +\end{quote} +\end{figure} + +\pagenumbering{roman} +\tableofcontents +\pagenumbering{arabic} +\hypersetup{pageanchor=true,citecolor=blue} diff --git a/doc/tutorial/Makefile b/doc/tutorial/Makefile new file mode 100644 index 0000000..ce9cfe8 --- /dev/null +++ b/doc/tutorial/Makefile @@ -0,0 +1,46 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +CFLAGS += $$(pkg-config --define-prefix --cflags starpu-1.4) +LDLIBS += $$(pkg-config --define-prefix --libs starpu-1.4) + +HAS_CUDA = $(shell starpu_config | grep 'STARPU_USE_CUDA 1') +NVCC ?= nvcc -std=c++11 +HAS_OPENCL = $(shell starpu_config | grep 'STARPU_USE_OPENCL 1') + +%.o: %.cu + $(NVCC) $(CFLAGS) $< -c + +TARGETS = hello_world vector_scal + +all: $(TARGETS) + +VECTOR_SCAL_PREREQUISITES = vector_scal.o vector_scal_cpu.o +ifneq ($(strip $(HAS_CUDA)),) +VECTOR_SCAL_PREREQUISITES += vector_scal_cuda.o +VECTOR_SCAL_COMPILER = $(NVCC) +else +VECTOR_SCAL_COMPILER = $(CC) +endif +ifneq ($(strip $(HAS_OPENCL)),) +VECTOR_SCAL_PREREQUISITES += vector_scal_opencl.o +LDLIBS += -lOpenCL +endif + +vector_scal: $(VECTOR_SCAL_PREREQUISITES) + $(VECTOR_SCAL_COMPILER) $^ $(LDLIBS) -o $@ $(LDFLAGS) + +clean: + rm -f $(TARGETS) *.o diff --git a/doc/tutorial/README b/doc/tutorial/README new file mode 100644 index 0000000..5e36302 --- /dev/null +++ b/doc/tutorial/README @@ -0,0 +1,35 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +Instructions on how to compile and run StarPU examples +------------------------------------------------------ + +% export STARPU_DIR= +% export PATH=$PATH:$STARPU_DIR/bin +% export PKG_CONFIG_PATH=$STARPU_DIR/lib/pkgconfig:$PKG_CONFIG_PATH +% export LD_LIBRARY_PATH=$STARPU_DIR/lib:$LD_LIBRARY_PATH + +% starpu_machine_display + +% make hello_world +% ./hello_world + +% make vector_scal +% ./vector_scal + +% STARPU_NCPU=0 ./vector_scal +% STARPU_NCPU=0 STARPU_NCUDA=0 ./vector_scal +% STARPU_NCPU=0 STARPU_NOPENCL=0 ./vector_scal +% STARPU_NOPENCL=0 STARPU_NCUDA=0 ./vector_scal diff --git a/doc/tutorial/hello_world.c b/doc/tutorial/hello_world.c new file mode 100644 index 0000000..894d71e --- /dev/null +++ b/doc/tutorial/hello_world.c @@ -0,0 +1,73 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +struct params +{ + int i; + float f; +}; + +void cpu_func(void *buffers[], void *cl_arg) +{ + struct params *params = cl_arg; + + printf("Hello world (params = {%i, %f})\n", params->i, params->f); +} + +struct starpu_codelet cl = +{ + .cpu_funcs = {cpu_func}, + .nbuffers = 0 +}; + +void callback_func(void *callback_arg) +{ + printf("Callback function (arg %p)\n", callback_arg); +} + +int main(int argc, char **argv) +{ + int ret; + + /* initialize StarPU */ + ret = starpu_init(NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + struct starpu_task *task = starpu_task_create(); + + task->cl = &cl; /* Pointer to the codelet defined above */ + + struct params params = { 1, 2.0f }; + task->cl_arg = ¶ms; + task->cl_arg_size = sizeof(params); + + task->callback_func = callback_func; + task->callback_arg = (void*) (uintptr_t) 0x42; + + /* starpu_task_submit will be a blocking call */ + task->synchronous = 1; + + /* submit the task to StarPU */ + ret = starpu_task_submit(task); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + /* terminate StarPU */ + starpu_shutdown(); + + return 0; +} diff --git a/doc/tutorial/hello_world_msvc.c b/doc/tutorial/hello_world_msvc.c new file mode 100644 index 0000000..66b0503 --- /dev/null +++ b/doc/tutorial/hello_world_msvc.c @@ -0,0 +1,75 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +struct params +{ + int i; + float f; +}; + +void cpu_func(void *buffers[], void *cl_arg) +{ + struct params *params = cl_arg; + + printf("Hello world (params = {%i, %f})\n", params->i, params->f); +} + +void callback_func(void *callback_arg) +{ + printf("Callback function (arg %p)\n", callback_arg); +} + +int main(int argc, char **argv) +{ + int ret; + struct starpu_codelet cl; + struct starpu_task *task; + struct params params; + + starpu_codelet_init(&cl); + cl.cpu_funcs[0] = cpu_func; + cl.nbuffers = 0; + + /* initialize StarPU */ + ret = starpu_init(NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + task = starpu_task_create(); + + task->cl = &cl; /* Pointer to the codelet defined above */ + + params.i = 1; + params.f = 2.0f; + task->cl_arg = ¶ms; + task->cl_arg_size = sizeof(params); + + task->callback_func = callback_func; + task->callback_arg = (void*) (uintptr_t) 0x42; + + /* starpu_task_submit will be a blocking call */ + task->synchronous = 1; + + /* submit the task to StarPU */ + ret = starpu_task_submit(task); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + /* terminate StarPU */ + starpu_shutdown(); + + return 0; +} diff --git a/doc/tutorial/vector_scal.c b/doc/tutorial/vector_scal.c new file mode 100644 index 0000000..4448f68 --- /dev/null +++ b/doc/tutorial/vector_scal.c @@ -0,0 +1,117 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * This example demonstrates how to use StarPU to scale an array by a factor. + * It shows how to manipulate data with StarPU's data management library. + * 1- how to declare a piece of data to StarPU (starpu_vector_data_register) + * 2- how to submit a task to StarPU + * 3- how a kernel can manipulate the data (buffers[0].vector.ptr) + */ +#include + +#define NX 2048 + +extern void vector_scal_cpu(void *buffers[], void *_args); +extern void vector_scal_cuda(void *buffers[], void *_args); +extern void vector_scal_opencl(void *buffers[], void *_args); + +static struct starpu_codelet cl = +{ + /* CPU implementation of the codelet */ + .cpu_funcs = {vector_scal_cpu}, +#ifdef STARPU_USE_CUDA + /* CUDA implementation of the codelet */ + .cuda_funcs = {vector_scal_cuda}, +#endif +#ifdef STARPU_USE_OPENCL + /* OpenCL implementation of the codelet */ + .opencl_funcs = {vector_scal_opencl}, +#endif + .nbuffers = 1, + .modes = {STARPU_RW} +}; + +#ifdef STARPU_USE_OPENCL +struct starpu_opencl_program programs; +#endif + +int main(int argc, char **argv) +{ + /* We consider a vector of float that is initialized just as any of C + * data */ + float vector[NX]; + unsigned i; + for (i = 0; i < NX; i++) + vector[i] = 1.0f; + + fprintf(stderr, "BEFORE : First element was %f\n", vector[0]); + + /* Initialize StarPU with default configuration */ + int ret = starpu_init(NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + +#ifdef STARPU_USE_OPENCL + starpu_opencl_load_opencl_from_file("vector_scal_opencl_kernel.cl", &programs, NULL); +#endif + + /* Tell StaPU to associate the "vector" vector with the "vector_handle" + * identifier. When a task needs to access a piece of data, it should + * refer to the handle that is associated to it. + * In the case of the "vector" data interface: + * - the first argument of the registration method is a pointer to the + * handle that should describe the data + * - the second argument is the memory node where the data (ie. "vector") + * resides initially: STARPU_MAIN_RAM stands for an address in main memory, as + * opposed to an address on a GPU for instance. + * - the third argument is the address of the vector in RAM + * - the fourth argument is the number of elements in the vector + * - the fifth argument is the size of each element. + */ + starpu_data_handle_t vector_handle; + starpu_vector_data_register(&vector_handle, STARPU_MAIN_RAM, (uintptr_t)vector, + NX, sizeof(vector[0])); + + float factor = 3.14; + + ret = starpu_task_insert(&cl, + /* an argument is passed to the codelet, beware that this is a + * READ-ONLY buffer and that the codelet may be given a pointer to a + * COPY of the argument */ + STARPU_VALUE, &factor, sizeof(factor), + /* the codelet manipulates one buffer in RW mode */ + STARPU_RW, vector_handle, + 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + + /* Wait for tasks completion */ + starpu_task_wait_for_all(); + + /* StarPU does not need to manipulate the array anymore so we can stop + * monitoring it */ + starpu_data_unregister(vector_handle); + +#ifdef STARPU_USE_OPENCL + starpu_opencl_unload_opencl(&programs); +#endif + + /* terminate StarPU, no task can be submitted after */ + starpu_shutdown(); + + fprintf(stderr, "AFTER First element is %f\n", vector[0]); + + return 0; +} diff --git a/doc/tutorial/vector_scal_cpu.c b/doc/tutorial/vector_scal_cpu.c new file mode 100644 index 0000000..aaae304 --- /dev/null +++ b/doc/tutorial/vector_scal_cpu.c @@ -0,0 +1,49 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +/* This kernel takes a buffer and scales it by a constant factor */ +void vector_scal_cpu(void *buffers[], void *cl_arg) +{ + unsigned i; + float factor; + + /* + * The "buffers" array matches the task->handles array: for instance + * task->handles[0] is a handle that corresponds to a data with + * vector "interface", so that the first entry of the array in the + * codelet is a pointer to a structure describing such a vector (ie. + * struct starpu_vector_interface *). Here, we therefore manipulate + * the buffers[0] element as a vector: nx gives the number of elements + * in the array, ptr gives the location of the array (that was possibly + * migrated/replicated), and elemsize gives the size of each elements. + */ + struct starpu_vector_interface *vector = buffers[0]; + + /* length of the vector */ + unsigned n = STARPU_VECTOR_GET_NX(vector); + + /* get a pointer to the local copy of the vector : note that we have to + * cast it in (float *) since a vector could contain any type of + * elements so that the .ptr field is actually a uintptr_t */ + float *val = (float *)STARPU_VECTOR_GET_PTR(vector); + + /* scale the vector */ + starpu_codelet_unpack_args(cl_arg, &factor); + for (i = 0; i < n; i++) + val[i] *= factor; +} diff --git a/doc/tutorial/vector_scal_cuda.cu b/doc/tutorial/vector_scal_cuda.cu new file mode 100644 index 0000000..0edc0e3 --- /dev/null +++ b/doc/tutorial/vector_scal_cuda.cu @@ -0,0 +1,44 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +static __global__ void vector_mult_cuda(float *val, unsigned int n, float factor) +{ + unsigned i = blockIdx.x*blockDim.x + threadIdx.x; + if (i < n) + val[i] *= factor; +} + +extern "C" void vector_scal_cuda(void *buffers[], void *cl_arg) +{ + float factor; + starpu_codelet_unpack_args(cl_arg, &factor); + + /* length of the vector */ + unsigned int n = STARPU_VECTOR_GET_NX(buffers[0]); + /* local copy of the vector pointer */ + float *val = (float *)STARPU_VECTOR_GET_PTR(buffers[0]); + unsigned threads_per_block = 64; + unsigned nblocks = (n + threads_per_block-1) / threads_per_block; + + vector_mult_cuda<<>>(val, n, factor); + cudaError_t status = cudaGetLastError(); + if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status); + + cudaStreamSynchronize(starpu_cuda_get_local_stream()); +} + diff --git a/doc/tutorial/vector_scal_opencl.c b/doc/tutorial/vector_scal_opencl.c new file mode 100644 index 0000000..bdea803 --- /dev/null +++ b/doc/tutorial/vector_scal_opencl.c @@ -0,0 +1,64 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +extern struct starpu_opencl_program programs; + +void vector_scal_opencl(void *buffers[], void *cl_arg) +{ + float factor; + int id, devid; + cl_int err; + cl_kernel kernel; + cl_command_queue queue; + + starpu_codelet_unpack_args(cl_arg, &factor); + + /* length of the vector */ + unsigned int n = STARPU_VECTOR_GET_NX(buffers[0]); + /* OpenCL copy of the vector pointer */ + cl_mem val = (cl_mem) STARPU_VECTOR_GET_DEV_HANDLE(buffers[0]); + + id = starpu_worker_get_id(); + devid = starpu_worker_get_devid(id); + + err = starpu_opencl_load_kernel(&kernel, &queue, &programs, "vector_mult_opencl", devid); + if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); + + err = clSetKernelArg(kernel, 0, sizeof(n), &n); + err |= clSetKernelArg(kernel, 1, sizeof(val), &val); + err |= clSetKernelArg(kernel, 2, sizeof(factor), &factor); + if (err) STARPU_OPENCL_REPORT_ERROR(err); + + { + size_t global=n; + size_t local; + size_t s; + cl_device_id device; + + starpu_opencl_get_device(devid, &device); + + err = clGetKernelWorkGroupInfo (kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, &s); + if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); + if (local > global) local=global; + else global = (global + local-1) / local * local; + + err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, NULL); + if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); + } + starpu_opencl_release_kernel(kernel); +} diff --git a/doc/tutorial/vector_scal_opencl_kernel.cl b/doc/tutorial/vector_scal_opencl_kernel.cl new file mode 100644 index 0000000..099cf62 --- /dev/null +++ b/doc/tutorial/vector_scal_opencl_kernel.cl @@ -0,0 +1,24 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +__kernel void vector_mult_opencl(unsigned int nx, __global float* val, float factor) +{ + const int i = get_global_id(0); + if (i < nx) + { + val[i] *= factor; + } +} diff --git a/eclipse-plugin/.classpath b/eclipse-plugin/.classpath new file mode 100644 index 0000000..2ef338f --- /dev/null +++ b/eclipse-plugin/.classpath @@ -0,0 +1,11 @@ + + + + + + + + + + + diff --git a/eclipse-plugin/.project b/eclipse-plugin/.project new file mode 100644 index 0000000..34631bd --- /dev/null +++ b/eclipse-plugin/.project @@ -0,0 +1,28 @@ + + + plugin + + + + + + org.eclipse.jdt.core.javabuilder + + + + + org.eclipse.pde.ManifestBuilder + + + + + org.eclipse.pde.SchemaBuilder + + + + + + org.eclipse.pde.PluginNature + org.eclipse.jdt.core.javanature + + diff --git a/eclipse-plugin/.settings/org.eclipse.jdt.core.prefs b/eclipse-plugin/.settings/org.eclipse.jdt.core.prefs new file mode 100644 index 0000000..c9545f0 --- /dev/null +++ b/eclipse-plugin/.settings/org.eclipse.jdt.core.prefs @@ -0,0 +1,9 @@ +eclipse.preferences.version=1 +org.eclipse.jdt.core.compiler.codegen.targetPlatform=11 +org.eclipse.jdt.core.compiler.compliance=11 +org.eclipse.jdt.core.compiler.problem.assertIdentifier=error +org.eclipse.jdt.core.compiler.problem.enablePreviewFeatures=disabled +org.eclipse.jdt.core.compiler.problem.enumIdentifier=error +org.eclipse.jdt.core.compiler.problem.reportPreviewFeatures=warning +org.eclipse.jdt.core.compiler.release=enabled +org.eclipse.jdt.core.compiler.source=11 diff --git a/eclipse-plugin/META-INF/MANIFEST.MF b/eclipse-plugin/META-INF/MANIFEST.MF new file mode 100644 index 0000000..2e7ef2a --- /dev/null +++ b/eclipse-plugin/META-INF/MANIFEST.MF @@ -0,0 +1,12 @@ +Manifest-Version: 1.0 +Bundle-ManifestVersion: 2 +Bundle-Name: StarPU +Bundle-SymbolicName: StarPU;singleton:=true +Bundle-Version: 1.0.0.qualifier +Require-Bundle: org.eclipse.ui, + org.eclipse.e4.ui.model.workbench, + org.eclipse.equinox.registry, + org.eclipse.e4.core.di.annotations +Automatic-Module-Name: StarPU +Bundle-RequiredExecutionEnvironment: JavaSE-11 +Bundle-ClassPath: . diff --git a/eclipse-plugin/Makefile.am b/eclipse-plugin/Makefile.am new file mode 100644 index 0000000..5b911fc --- /dev/null +++ b/eclipse-plugin/Makefile.am @@ -0,0 +1,35 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +include $(top_srcdir)/make/starpu-notests.mk + +EXTRA_DIST = \ + tools/cproject.sh \ + tools/install_workspace.sh \ + build.properties \ + build.xml \ + .classpath \ + plugin.xml \ + .project \ + META-INF/MANIFEST.MF \ + icons/fxt.png \ + icons/svg.png \ + icons/taskGraph.png \ + icons/vite.png \ + .settings/org.eclipse.jdt.core.prefs + +SUBDIRS = src +SUBDIRS += examples + diff --git a/eclipse-plugin/Makefile.in b/eclipse-plugin/Makefile.in new file mode 100644 index 0000000..4101914 --- /dev/null +++ b/eclipse-plugin/Makefile.in @@ -0,0 +1,948 @@ +# Makefile.in generated by automake 1.16.5 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2021 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +target_triplet = @target@ +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@am__append_1 = --compiler-options -fno-strict-aliasing -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ $(STARPU_NVCC_H_CPPFLAGS) +@STARPU_USE_HIP_TRUE@am__append_2 = -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ +subdir = eclipse-plugin +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ + $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ + $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ + $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/src/common/config.h \ + $(top_builddir)/src/common/config-src-build.h \ + $(top_builddir)/include/starpu_config.h \ + $(top_builddir)/starpurm/include/starpurm_config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +SOURCES = +DIST_SOURCES = +RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \ + ctags-recursive dvi-recursive html-recursive info-recursive \ + install-data-recursive install-dvi-recursive \ + install-exec-recursive install-html-recursive \ + install-info-recursive install-pdf-recursive \ + install-ps-recursive install-recursive installcheck-recursive \ + installdirs-recursive pdf-recursive ps-recursive \ + tags-recursive uninstall-recursive +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ + distclean-recursive maintainer-clean-recursive +am__recursive_targets = \ + $(RECURSIVE_TARGETS) \ + $(RECURSIVE_CLEAN_TARGETS) \ + $(am__extra_recursive_targets) +AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \ + distdir distdir-am +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +DIST_SUBDIRS = $(SUBDIRS) +am__DIST_COMMON = $(srcdir)/Makefile.in \ + $(top_srcdir)/make/starpu-notests.mk \ + $(top_srcdir)/make/starpu.mk +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +am__relativize = \ + dir0=`pwd`; \ + sed_first='s,^\([^/]*\)/.*$$,\1,'; \ + sed_rest='s,^[^/]*/*,,'; \ + sed_last='s,^.*/\([^/]*\)$$,\1,'; \ + sed_butlast='s,/*[^/]*$$,,'; \ + while test -n "$$dir1"; do \ + first=`echo "$$dir1" | sed -e "$$sed_first"`; \ + if test "$$first" != "."; then \ + if test "$$first" = ".."; then \ + dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ + dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ + else \ + first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ + if test "$$first2" = "$$first"; then \ + dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ + else \ + dir2="../$$dir2"; \ + fi; \ + dir0="$$dir0"/"$$first"; \ + fi; \ + fi; \ + dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ + done; \ + reldir="$$dir2" +pkglibdir = @pkglibdir@ +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +APP_CFLAGS = @APP_CFLAGS@ +APP_CXXFLAGS = @APP_CXXFLAGS@ +APP_FCFLAGS = @APP_FCFLAGS@ +APP_FFLAGS = @APP_FFLAGS@ +AR = @AR@ +AS = @AS@ +ATLASDIR = @ATLASDIR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BLAS_LIB = @BLAS_LIB@ +BLAS_LIBS = @BLAS_LIBS@ +BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ +BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CC_OR_MPICC = @CC_OR_MPICC@ +CC_OR_NVCC = @CC_OR_NVCC@ +CFLAGS = @CFLAGS@ +COVERAGE = @COVERAGE@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CSCOPE = @CSCOPE@ +CTAGS = @CTAGS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DGELS_LIBS = @DGELS_LIBS@ +DLB_CFLAGS = @DLB_CFLAGS@ +DLB_LIBS = @DLB_LIBS@ +DLLTOOL = @DLLTOOL@ +DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +ECLIPSE = @ECLIPSE@ +EGREP = @EGREP@ +ETAGS = @ETAGS@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FC = @FC@ +FCFLAGS = @FCFLAGS@ +FFLAGS = @FFLAGS@ +FFTWF_CFLAGS = @FFTWF_CFLAGS@ +FFTWF_LIBS = @FFTWF_LIBS@ +FFTWL_CFLAGS = @FFTWL_CFLAGS@ +FFTWL_LIBS = @FFTWL_LIBS@ +FFTW_CFLAGS = @FFTW_CFLAGS@ +FFTW_LIBS = @FFTW_LIBS@ +FGREP = @FGREP@ +FILECMD = @FILECMD@ +FXTDIR = @FXTDIR@ +FXT_CFLAGS = @FXT_CFLAGS@ +FXT_LDFLAGS = @FXT_LDFLAGS@ +FXT_LIBS = @FXT_LIBS@ +GDB = @GDB@ +GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ +GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ +GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ +GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ +GOTODIR = @GOTODIR@ +GREP = @GREP@ +HAVE_CXX11 = @HAVE_CXX11@ +HAVE_FFTWFL = @HAVE_FFTWFL@ +HELP2MAN = @HELP2MAN@ +HIPCC = @HIPCC@ +HIPCCFLAGS = @HIPCCFLAGS@ $(am__append_2) +HIPCONFIG = @HIPCONFIG@ +HWLOC_CFLAGS = @HWLOC_CFLAGS@ +HWLOC_LIBS = @HWLOC_LIBS@ +HWLOC_REQUIRES = @HWLOC_REQUIRES@ +ICC = @ICC@ +ICC_ARGS = @ICC_ARGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JULIA = @JULIA@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ +LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ +LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ +LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ +LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ +LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ +LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ +LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ +LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ +LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ +LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ +LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ +LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ +LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ +LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ +LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ +LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ +LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ +LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ +LIBSTARPU_LINK = @LIBSTARPU_LINK@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAGMA_CFLAGS = @MAGMA_CFLAGS@ +MAGMA_LIBS = @MAGMA_LIBS@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPICC_LDFLAGS = @MPICC_LDFLAGS@ +MPICXX = @MPICXX@ +MPIEXEC = @MPIEXEC@ +MPIEXEC_ARGS = @MPIEXEC_ARGS@ +MPIFORT = @MPIFORT@ +MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ +MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ +NM = @NM@ +NMAD_CFLAGS = @NMAD_CFLAGS@ +NMAD_LIBS = @NMAD_LIBS@ +NMEDIT = @NMEDIT@ +NVCC = @NVCC@ +NVCCFLAGS = @NVCCFLAGS@ $(am__append_1) +NVCC_CC = @NVCC_CC@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ +OPENBLAS_LIBS = @OPENBLAS_LIBS@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PAPI_CFLAGS = @PAPI_CFLAGS@ +PAPI_LIBS = @PAPI_LIBS@ +PARALLEL = @PARALLEL@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PKG_CONFIG = @PKG_CONFIG@ +POTI_CFLAGS = @POTI_CFLAGS@ +POTI_LIBS = @POTI_LIBS@ +PROG_CLANG = @PROG_CLANG@ +PROG_DATE = @PROG_DATE@ +PROG_FIND = @PROG_FIND@ +PROG_STAT = @PROG_STAT@ +PYTHON = @PYTHON@ +PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ +PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ +PYTHON_VERSION = @PYTHON_VERSION@ +RANLIB = @RANLIB@ +REALBASH = @REALBASH@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ +SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ +SIMGRID_LIBS = @SIMGRID_LIBS@ +SIMGRID_MC = @SIMGRID_MC@ +SLIC_CONFIG = @SLIC_CONFIG@ +SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ +SOCL_VENDORS = @SOCL_VENDORS@ +STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ +STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ +STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ +STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ +STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ +STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ +STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ +STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ +STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ +STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ +STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ +STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ +STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ +STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ +STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ +STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ +STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ +STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ +STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ +STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ +STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ +STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ +STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ +STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ +STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ +STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ +STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ +STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ +STARPU_LIB_PATH = @STARPU_LIB_PATH@ +STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ +STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ +STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ +STARPU_MS_LIB = @STARPU_MS_LIB@ +STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ +STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ +STARPU_OPENBLAS = @STARPU_OPENBLAS@ +STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ +STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ +STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ +STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ +STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ +STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ +STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ +STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ +STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ +STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ +STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ +STARPU_SRC_DIR = @STARPU_SRC_DIR@ +STARPU_USE_CPU = @STARPU_USE_CPU@ +STARPU_USE_CUDA = @STARPU_USE_CUDA@ +STARPU_USE_FXT = @STARPU_USE_FXT@ +STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ +STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ +STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ +STRIP = @STRIP@ +VERSION = @VERSION@ +XMKMF = @XMKMF@ +X_CFLAGS = @X_CFLAGS@ +X_EXTRA_LIBS = @X_EXTRA_LIBS@ +X_LIBS = @X_LIBS@ +X_PRE_LIBS = @X_PRE_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_ct_F77 = @ac_ct_F77@ +ac_ct_FC = @ac_ct_FC@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +doxygencommand = @doxygencommand@ +dvidir = @dvidir@ +eclipsepath = @eclipsepath@ +epstopdfcommand = @epstopdfcommand@ +exec_prefix = @exec_prefix@ +gitcommand = @gitcommand@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +hwloccalccommand = @hwloccalccommand@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +juliapath = @juliapath@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +mpicc_path = @mpicc_path@ +mpicxx_path = @mpicxx_path@ +mpiexec_path = @mpiexec_path@ +mpifort_path = @mpifort_path@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +pdflatexcommand = @pdflatexcommand@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +runstatedir = @runstatedir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target = @target@ +target_alias = @target_alias@ +target_cpu = @target_cpu@ +target_os = @target_os@ +target_vendor = @target_vendor@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +AM_CFLAGS = $(GLOBAL_AM_CFLAGS) +AM_CXXFLAGS = $(GLOBAL_AM_CXXFLAGS) +AM_FFLAGS = $(GLOBAL_AM_FFLAGS) +AM_FCFLAGS = $(GLOBAL_AM_FCFLAGS) +@STARPU_USE_CUDA_TRUE@V_nvcc_ = $(V_nvcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_USE_CUDA_TRUE@V_nvcc_0 = @echo " NVCC " $@; +@STARPU_USE_CUDA_TRUE@V_nvcc_1 = +@STARPU_USE_CUDA_TRUE@V_nvcc = $(V_nvcc_$(V)) + +# Avoid using nvcc when making a coverity build, nvcc produces millions of +# lines of code which we don't want to analyze. Instead, build dumb .o files +# containing empty functions. +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_ = $(V_mynvcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_0 = @echo " myNVCC " $@; +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_1 = +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc = $(V_mynvcc_$(V)) +@STARPU_USE_HIP_TRUE@V_hipcc_ = $(V_hipcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_USE_HIP_TRUE@V_hipcc_0 = @echo " HIPCC " $@; +@STARPU_USE_HIP_TRUE@V_hipcc_1 = +@STARPU_USE_HIP_TRUE@V_hipcc = $(V_hipcc_$(V)) +V_icc_ = $(V_icc_$(AM_DEFAULT_VERBOSITY)) +V_icc_0 = @echo " ICC " $@; +V_icc_1 = +V_icc = $(V_icc_$(V)) +V_ln_ = $(V_ln_$(AM_DEFAULT_VERBOSITY)) +V_ln_0 = @echo " LN " $@; +V_ln_1 = +V_ln = $(V_ln_$(V)) +V_help2man_ = $(V_help2man_$(AM_DEFAULT_VERBOSITY)) +V_help2man_0 = @echo " HELP2MAN" $@; +V_help2man_1 = +V_help2man = $(V_help2man_$(V)) + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +EXTRA_DIST = \ + tools/cproject.sh \ + tools/install_workspace.sh \ + build.properties \ + build.xml \ + .classpath \ + plugin.xml \ + .project \ + META-INF/MANIFEST.MF \ + icons/fxt.png \ + icons/svg.png \ + icons/taskGraph.png \ + icons/vite.png \ + .settings/org.eclipse.jdt.core.prefs + +SUBDIRS = src examples +all: all-recursive + +.SUFFIXES: +.SUFFIXES: .cu .cubin .hip .o +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/make/starpu-notests.mk $(top_srcdir)/make/starpu.mk $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign eclipse-plugin/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign eclipse-plugin/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; +$(top_srcdir)/make/starpu-notests.mk $(top_srcdir)/make/starpu.mk $(am__empty): + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +# This directory's subdirectories are mostly independent; you can cd +# into them and run 'make' without going through this Makefile. +# To change the values of 'make' variables: instead of editing Makefiles, +# (1) if the variable is set in 'config.status', edit 'config.status' +# (which will cause the Makefiles to be regenerated when you run 'make'); +# (2) otherwise, pass the desired values on the 'make' command line. +$(am__recursive_targets): + @fail=; \ + if $(am__make_keepgoing); then \ + failcom='fail=yes'; \ + else \ + failcom='exit 1'; \ + fi; \ + dot_seen=no; \ + target=`echo $@ | sed s/-recursive//`; \ + case "$@" in \ + distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ + *) list='$(SUBDIRS)' ;; \ + esac; \ + for subdir in $$list; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + dot_seen=yes; \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || eval $$failcom; \ + done; \ + if test "$$dot_seen" = "no"; then \ + $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ + fi; test -z "$$fail" + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-recursive +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ + include_option=--etags-include; \ + empty_fix=.; \ + else \ + include_option=--include; \ + empty_fix=; \ + fi; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + test ! -f $$subdir/TAGS || \ + set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ + fi; \ + done; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-recursive + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-recursive + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done + @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + $(am__make_dryrun) \ + || test -d "$(distdir)/$$subdir" \ + || $(MKDIR_P) "$(distdir)/$$subdir" \ + || exit 1; \ + dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ + $(am__relativize); \ + new_distdir=$$reldir; \ + dir1=$$subdir; dir2="$(top_distdir)"; \ + $(am__relativize); \ + new_top_distdir=$$reldir; \ + echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ + echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ + ($(am__cd) $$subdir && \ + $(MAKE) $(AM_MAKEFLAGS) \ + top_distdir="$$new_top_distdir" \ + distdir="$$new_distdir" \ + am__remove_distdir=: \ + am__skip_length_check=: \ + am__skip_mode_fix=: \ + distdir) \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-recursive +all-am: Makefile +installdirs: installdirs-recursive +installdirs-am: +install: install-recursive +install-exec: install-exec-recursive +install-data: install-data-recursive +uninstall: uninstall-recursive + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-recursive +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-recursive + +clean-am: clean-generic clean-libtool mostlyclean-am + +distclean: distclean-recursive + -rm -f Makefile +distclean-am: clean-am distclean-generic distclean-tags + +dvi: dvi-recursive + +dvi-am: + +html: html-recursive + +html-am: + +info: info-recursive + +info-am: + +install-data-am: + +install-dvi: install-dvi-recursive + +install-dvi-am: + +install-exec-am: + +install-html: install-html-recursive + +install-html-am: + +install-info: install-info-recursive + +install-info-am: + +install-man: + +install-pdf: install-pdf-recursive + +install-pdf-am: + +install-ps: install-ps-recursive + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-recursive + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-recursive + +mostlyclean-am: mostlyclean-generic mostlyclean-libtool + +pdf: pdf-recursive + +pdf-am: + +ps: ps-recursive + +ps-am: + +uninstall-am: + +.MAKE: $(am__recursive_targets) install-am install-strip + +.PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am check \ + check-am clean clean-generic clean-libtool cscopelist-am ctags \ + ctags-am distclean distclean-generic distclean-libtool \ + distclean-tags distdir dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-html \ + install-html-am install-info install-info-am install-man \ + install-pdf install-pdf-am install-ps install-ps-am \ + install-strip installcheck installcheck-am installdirs \ + installdirs-am maintainer-clean maintainer-clean-generic \ + mostlyclean mostlyclean-generic mostlyclean-libtool pdf pdf-am \ + ps ps-am tags tags-am uninstall uninstall-am + +.PRECIOUS: Makefile + +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@.cu.o: +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ @$(MKDIR_P) `dirname $@` +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ $(V_mynvcc)grep 'extern *"C" *void *' $< | sed -ne 's/extern *"C" *void *\([a-zA-Z0-9_]*\) *(.*/void \1(void) {}/p' | $(CC) -x c - -o $@ -c + +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.cubin: +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) -cubin $< -o $@ $(NVCCFLAGS) + +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.o: +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) $< -c -o $@ $(NVCCFLAGS) +@STARPU_USE_HIP_TRUE@.hip.o: +@STARPU_USE_HIP_TRUE@ $(V_hipcc) $(HIPCC) $< -c -o $@ $(HIPCCFLAGS) + +recheck: + -cat /dev/null + +showcheckfailed: + @-cat /dev/null + +showfailed: + @-cat /dev/null + +showcheck: + -cat /dev/null + +showsuite: + -cat /dev/null + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/eclipse-plugin/build.properties b/eclipse-plugin/build.properties new file mode 100644 index 0000000..e38f040 --- /dev/null +++ b/eclipse-plugin/build.properties @@ -0,0 +1,8 @@ +source.. = src/ +output.. = build/bin/ +bin.includes = plugin.xml,\ + META-INF/,\ + .,\ + icons/,\ + .classpath,\ +src.includes = .classpath diff --git a/eclipse-plugin/build.xml b/eclipse-plugin/build.xml new file mode 100644 index 0000000..957e8fd --- /dev/null +++ b/eclipse-plugin/build.xml @@ -0,0 +1,343 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/eclipse-plugin/examples/Makefile.am b/eclipse-plugin/examples/Makefile.am new file mode 100644 index 0000000..48956b5 --- /dev/null +++ b/eclipse-plugin/examples/Makefile.am @@ -0,0 +1,35 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2021-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +include $(top_srcdir)/make/starpu-notests.mk + +EXTRA_DIST = \ + ./hello/hello.c \ + ./hello/.settings/language.settings.xml \ + ./hello/.project + +ECLIPSE_DIR = $(shell dirname @ECLIPSE@) +ECLIPSE_WORKSPACE = $(abs_top_builddir)/eclipse-plugin/workspace + +txtdir = $(libdir)/starpu/eclipse-plugin/examples/hello +txt_DATA = hello/hello.c \ + hello/.cproject \ + hello/.project + +script=$(abs_top_srcdir)/eclipse-plugin/tools/install_workspace.sh +install-data-hook: + $(INSTALL_DATA) $(abs_top_srcdir)/eclipse-plugin/examples/hello/.settings/language.settings.xml $(txtdir).settings + $(ECLIPSE_DIR)/eclipse -noSplash -data $(DESTDIR)$(txtdir)/../../workspace -application org.eclipse.cdt.managedbuilder.core.headlessbuild -import $(txtdir) + diff --git a/eclipse-plugin/examples/Makefile.in b/eclipse-plugin/examples/Makefile.in new file mode 100644 index 0000000..aaea2fe --- /dev/null +++ b/eclipse-plugin/examples/Makefile.in @@ -0,0 +1,826 @@ +# Makefile.in generated by automake 1.16.5 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2021 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +target_triplet = @target@ +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@am__append_1 = --compiler-options -fno-strict-aliasing -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ $(STARPU_NVCC_H_CPPFLAGS) +@STARPU_USE_HIP_TRUE@am__append_2 = -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ +subdir = eclipse-plugin/examples +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ + $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ + $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ + $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/src/common/config.h \ + $(top_builddir)/src/common/config-src-build.h \ + $(top_builddir)/include/starpu_config.h \ + $(top_builddir)/starpurm/include/starpurm_config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +SOURCES = +DIST_SOURCES = +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__installdirs = "$(DESTDIR)$(txtdir)" +DATA = $(txt_DATA) +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +am__DIST_COMMON = $(srcdir)/Makefile.in \ + $(top_srcdir)/make/starpu-notests.mk \ + $(top_srcdir)/make/starpu.mk +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +pkglibdir = @pkglibdir@ +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +APP_CFLAGS = @APP_CFLAGS@ +APP_CXXFLAGS = @APP_CXXFLAGS@ +APP_FCFLAGS = @APP_FCFLAGS@ +APP_FFLAGS = @APP_FFLAGS@ +AR = @AR@ +AS = @AS@ +ATLASDIR = @ATLASDIR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BLAS_LIB = @BLAS_LIB@ +BLAS_LIBS = @BLAS_LIBS@ +BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ +BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CC_OR_MPICC = @CC_OR_MPICC@ +CC_OR_NVCC = @CC_OR_NVCC@ +CFLAGS = @CFLAGS@ +COVERAGE = @COVERAGE@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CSCOPE = @CSCOPE@ +CTAGS = @CTAGS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DGELS_LIBS = @DGELS_LIBS@ +DLB_CFLAGS = @DLB_CFLAGS@ +DLB_LIBS = @DLB_LIBS@ +DLLTOOL = @DLLTOOL@ +DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +ECLIPSE = @ECLIPSE@ +EGREP = @EGREP@ +ETAGS = @ETAGS@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FC = @FC@ +FCFLAGS = @FCFLAGS@ +FFLAGS = @FFLAGS@ +FFTWF_CFLAGS = @FFTWF_CFLAGS@ +FFTWF_LIBS = @FFTWF_LIBS@ +FFTWL_CFLAGS = @FFTWL_CFLAGS@ +FFTWL_LIBS = @FFTWL_LIBS@ +FFTW_CFLAGS = @FFTW_CFLAGS@ +FFTW_LIBS = @FFTW_LIBS@ +FGREP = @FGREP@ +FILECMD = @FILECMD@ +FXTDIR = @FXTDIR@ +FXT_CFLAGS = @FXT_CFLAGS@ +FXT_LDFLAGS = @FXT_LDFLAGS@ +FXT_LIBS = @FXT_LIBS@ +GDB = @GDB@ +GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ +GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ +GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ +GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ +GOTODIR = @GOTODIR@ +GREP = @GREP@ +HAVE_CXX11 = @HAVE_CXX11@ +HAVE_FFTWFL = @HAVE_FFTWFL@ +HELP2MAN = @HELP2MAN@ +HIPCC = @HIPCC@ +HIPCCFLAGS = @HIPCCFLAGS@ $(am__append_2) +HIPCONFIG = @HIPCONFIG@ +HWLOC_CFLAGS = @HWLOC_CFLAGS@ +HWLOC_LIBS = @HWLOC_LIBS@ +HWLOC_REQUIRES = @HWLOC_REQUIRES@ +ICC = @ICC@ +ICC_ARGS = @ICC_ARGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JULIA = @JULIA@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ +LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ +LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ +LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ +LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ +LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ +LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ +LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ +LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ +LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ +LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ +LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ +LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ +LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ +LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ +LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ +LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ +LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ +LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ +LIBSTARPU_LINK = @LIBSTARPU_LINK@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAGMA_CFLAGS = @MAGMA_CFLAGS@ +MAGMA_LIBS = @MAGMA_LIBS@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPICC_LDFLAGS = @MPICC_LDFLAGS@ +MPICXX = @MPICXX@ +MPIEXEC = @MPIEXEC@ +MPIEXEC_ARGS = @MPIEXEC_ARGS@ +MPIFORT = @MPIFORT@ +MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ +MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ +NM = @NM@ +NMAD_CFLAGS = @NMAD_CFLAGS@ +NMAD_LIBS = @NMAD_LIBS@ +NMEDIT = @NMEDIT@ +NVCC = @NVCC@ +NVCCFLAGS = @NVCCFLAGS@ $(am__append_1) +NVCC_CC = @NVCC_CC@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ +OPENBLAS_LIBS = @OPENBLAS_LIBS@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PAPI_CFLAGS = @PAPI_CFLAGS@ +PAPI_LIBS = @PAPI_LIBS@ +PARALLEL = @PARALLEL@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PKG_CONFIG = @PKG_CONFIG@ +POTI_CFLAGS = @POTI_CFLAGS@ +POTI_LIBS = @POTI_LIBS@ +PROG_CLANG = @PROG_CLANG@ +PROG_DATE = @PROG_DATE@ +PROG_FIND = @PROG_FIND@ +PROG_STAT = @PROG_STAT@ +PYTHON = @PYTHON@ +PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ +PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ +PYTHON_VERSION = @PYTHON_VERSION@ +RANLIB = @RANLIB@ +REALBASH = @REALBASH@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ +SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ +SIMGRID_LIBS = @SIMGRID_LIBS@ +SIMGRID_MC = @SIMGRID_MC@ +SLIC_CONFIG = @SLIC_CONFIG@ +SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ +SOCL_VENDORS = @SOCL_VENDORS@ +STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ +STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ +STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ +STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ +STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ +STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ +STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ +STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ +STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ +STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ +STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ +STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ +STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ +STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ +STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ +STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ +STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ +STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ +STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ +STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ +STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ +STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ +STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ +STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ +STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ +STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ +STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ +STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ +STARPU_LIB_PATH = @STARPU_LIB_PATH@ +STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ +STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ +STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ +STARPU_MS_LIB = @STARPU_MS_LIB@ +STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ +STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ +STARPU_OPENBLAS = @STARPU_OPENBLAS@ +STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ +STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ +STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ +STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ +STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ +STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ +STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ +STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ +STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ +STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ +STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ +STARPU_SRC_DIR = @STARPU_SRC_DIR@ +STARPU_USE_CPU = @STARPU_USE_CPU@ +STARPU_USE_CUDA = @STARPU_USE_CUDA@ +STARPU_USE_FXT = @STARPU_USE_FXT@ +STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ +STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ +STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ +STRIP = @STRIP@ +VERSION = @VERSION@ +XMKMF = @XMKMF@ +X_CFLAGS = @X_CFLAGS@ +X_EXTRA_LIBS = @X_EXTRA_LIBS@ +X_LIBS = @X_LIBS@ +X_PRE_LIBS = @X_PRE_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_ct_F77 = @ac_ct_F77@ +ac_ct_FC = @ac_ct_FC@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +doxygencommand = @doxygencommand@ +dvidir = @dvidir@ +eclipsepath = @eclipsepath@ +epstopdfcommand = @epstopdfcommand@ +exec_prefix = @exec_prefix@ +gitcommand = @gitcommand@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +hwloccalccommand = @hwloccalccommand@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +juliapath = @juliapath@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +mpicc_path = @mpicc_path@ +mpicxx_path = @mpicxx_path@ +mpiexec_path = @mpiexec_path@ +mpifort_path = @mpifort_path@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +pdflatexcommand = @pdflatexcommand@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +runstatedir = @runstatedir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target = @target@ +target_alias = @target_alias@ +target_cpu = @target_cpu@ +target_os = @target_os@ +target_vendor = @target_vendor@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +AM_CFLAGS = $(GLOBAL_AM_CFLAGS) +AM_CXXFLAGS = $(GLOBAL_AM_CXXFLAGS) +AM_FFLAGS = $(GLOBAL_AM_FFLAGS) +AM_FCFLAGS = $(GLOBAL_AM_FCFLAGS) +@STARPU_USE_CUDA_TRUE@V_nvcc_ = $(V_nvcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_USE_CUDA_TRUE@V_nvcc_0 = @echo " NVCC " $@; +@STARPU_USE_CUDA_TRUE@V_nvcc_1 = +@STARPU_USE_CUDA_TRUE@V_nvcc = $(V_nvcc_$(V)) + +# Avoid using nvcc when making a coverity build, nvcc produces millions of +# lines of code which we don't want to analyze. Instead, build dumb .o files +# containing empty functions. +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_ = $(V_mynvcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_0 = @echo " myNVCC " $@; +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_1 = +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc = $(V_mynvcc_$(V)) +@STARPU_USE_HIP_TRUE@V_hipcc_ = $(V_hipcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_USE_HIP_TRUE@V_hipcc_0 = @echo " HIPCC " $@; +@STARPU_USE_HIP_TRUE@V_hipcc_1 = +@STARPU_USE_HIP_TRUE@V_hipcc = $(V_hipcc_$(V)) +V_icc_ = $(V_icc_$(AM_DEFAULT_VERBOSITY)) +V_icc_0 = @echo " ICC " $@; +V_icc_1 = +V_icc = $(V_icc_$(V)) +V_ln_ = $(V_ln_$(AM_DEFAULT_VERBOSITY)) +V_ln_0 = @echo " LN " $@; +V_ln_1 = +V_ln = $(V_ln_$(V)) +V_help2man_ = $(V_help2man_$(AM_DEFAULT_VERBOSITY)) +V_help2man_0 = @echo " HELP2MAN" $@; +V_help2man_1 = +V_help2man = $(V_help2man_$(V)) + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2021-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +EXTRA_DIST = \ + ./hello/hello.c \ + ./hello/.settings/language.settings.xml \ + ./hello/.project + +ECLIPSE_DIR = $(shell dirname @ECLIPSE@) +ECLIPSE_WORKSPACE = $(abs_top_builddir)/eclipse-plugin/workspace +txtdir = $(libdir)/starpu/eclipse-plugin/examples/hello +txt_DATA = hello/hello.c \ + hello/.cproject \ + hello/.project + +script = $(abs_top_srcdir)/eclipse-plugin/tools/install_workspace.sh +all: all-am + +.SUFFIXES: +.SUFFIXES: .cu .cubin .hip .o +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/make/starpu-notests.mk $(top_srcdir)/make/starpu.mk $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign eclipse-plugin/examples/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign eclipse-plugin/examples/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; +$(top_srcdir)/make/starpu-notests.mk $(top_srcdir)/make/starpu.mk $(am__empty): + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs +install-txtDATA: $(txt_DATA) + @$(NORMAL_INSTALL) + @list='$(txt_DATA)'; test -n "$(txtdir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(txtdir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(txtdir)" || exit 1; \ + fi; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; \ + done | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(txtdir)'"; \ + $(INSTALL_DATA) $$files "$(DESTDIR)$(txtdir)" || exit $$?; \ + done + +uninstall-txtDATA: + @$(NORMAL_UNINSTALL) + @list='$(txt_DATA)'; test -n "$(txtdir)" || list=; \ + files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ + dir='$(DESTDIR)$(txtdir)'; $(am__uninstall_files_from_dir) +tags TAGS: + +ctags CTAGS: + +cscope cscopelist: + +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile $(DATA) +installdirs: + for dir in "$(DESTDIR)$(txtdir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-generic clean-libtool mostlyclean-am + +distclean: distclean-am + -rm -f Makefile +distclean-am: clean-am distclean-generic + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: install-txtDATA + @$(NORMAL_INSTALL) + $(MAKE) $(AM_MAKEFLAGS) install-data-hook +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-generic mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-txtDATA + +.MAKE: install-am install-data-am install-strip + +.PHONY: all all-am check check-am clean clean-generic clean-libtool \ + cscopelist-am ctags-am distclean distclean-generic \ + distclean-libtool distdir dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am \ + install-data-hook install-dvi install-dvi-am install-exec \ + install-exec-am install-html install-html-am install-info \ + install-info-am install-man install-pdf install-pdf-am \ + install-ps install-ps-am install-strip install-txtDATA \ + installcheck installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-generic \ + mostlyclean-libtool pdf pdf-am ps ps-am tags-am uninstall \ + uninstall-am uninstall-txtDATA + +.PRECIOUS: Makefile + +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@.cu.o: +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ @$(MKDIR_P) `dirname $@` +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ $(V_mynvcc)grep 'extern *"C" *void *' $< | sed -ne 's/extern *"C" *void *\([a-zA-Z0-9_]*\) *(.*/void \1(void) {}/p' | $(CC) -x c - -o $@ -c + +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.cubin: +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) -cubin $< -o $@ $(NVCCFLAGS) + +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.o: +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) $< -c -o $@ $(NVCCFLAGS) +@STARPU_USE_HIP_TRUE@.hip.o: +@STARPU_USE_HIP_TRUE@ $(V_hipcc) $(HIPCC) $< -c -o $@ $(HIPCCFLAGS) + +recheck: + -cat /dev/null + +showcheckfailed: + @-cat /dev/null + +showfailed: + @-cat /dev/null + +showcheck: + -cat /dev/null + +showsuite: + -cat /dev/null +install-data-hook: + $(INSTALL_DATA) $(abs_top_srcdir)/eclipse-plugin/examples/hello/.settings/language.settings.xml $(txtdir).settings + $(ECLIPSE_DIR)/eclipse -noSplash -data $(DESTDIR)$(txtdir)/../../workspace -application org.eclipse.cdt.managedbuilder.core.headlessbuild -import $(txtdir) + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/eclipse-plugin/examples/hello/.cproject.in b/eclipse-plugin/examples/hello/.cproject.in new file mode 100644 index 0000000..fc4fce4 --- /dev/null +++ b/eclipse-plugin/examples/hello/.cproject.in @@ -0,0 +1,242 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/eclipse-plugin/examples/hello/.project b/eclipse-plugin/examples/hello/.project new file mode 100644 index 0000000..4c1107a --- /dev/null +++ b/eclipse-plugin/examples/hello/.project @@ -0,0 +1,26 @@ + + + hello + + + + + + org.eclipse.cdt.managedbuilder.core.genmakebuilder + clean,full,incremental, + + + + + org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder + full,incremental, + + + + + + org.eclipse.cdt.core.cnature + org.eclipse.cdt.managedbuilder.core.managedBuildNature + org.eclipse.cdt.managedbuilder.core.ScannerConfigNature + + diff --git a/eclipse-plugin/examples/hello/.settings/language.settings.xml b/eclipse-plugin/examples/hello/.settings/language.settings.xml new file mode 100644 index 0000000..0ce55ed --- /dev/null +++ b/eclipse-plugin/examples/hello/.settings/language.settings.xml @@ -0,0 +1,48 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/eclipse-plugin/examples/hello/hello.c b/eclipse-plugin/examples/hello/hello.c new file mode 100644 index 0000000..001d406 --- /dev/null +++ b/eclipse-plugin/examples/hello/hello.c @@ -0,0 +1,133 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include + +void display_cpu_func(void *buffers[], void *cl_arg) +{ + (void)cl_arg; + int nx, i; + struct starpu_vector_interface *vector; + int *val; + + vector = (struct starpu_vector_interface *) buffers[0]; + nx = STARPU_VECTOR_GET_NX(vector); + val = (int *)STARPU_VECTOR_GET_PTR(vector); + + for (i = 0; i < nx; i++) + fprintf(stdout, "V[%d] = %d\n", i, val[i]); +} + +void scal_cpu_func(void *buffers[], void *cl_arg) +{ + int factor, nx, i; + struct starpu_vector_interface *vector; + int *val; + + vector = (struct starpu_vector_interface *) buffers[0]; + nx = STARPU_VECTOR_GET_NX(vector); + val = (int *)STARPU_VECTOR_GET_PTR(vector); + starpu_codelet_unpack_args(cl_arg, &factor); + + for (i = 0; i < nx; i++) + val[i] *= factor; +} + +void hello_cpu_func(void *buffers[], void *cl_arg) +{ + (void)buffers; + int answer; + + starpu_codelet_unpack_args(cl_arg, &answer); + fprintf(stdout, "Hello world, the answer is %d\n", answer); +} + +struct starpu_codelet hello_codelet = +{ + .cpu_funcs = {hello_cpu_func}, + .cpu_funcs_name = {"hello_cpu_func"}, + .nbuffers = 0, + .name = "hello" +}; + +struct starpu_codelet scal_codelet = +{ + .cpu_funcs = {scal_cpu_func}, + .cpu_funcs_name = {"scal_cpu_func"}, + .nbuffers = 1, + .modes = {STARPU_RW}, + .name = "scal" +}; + +struct starpu_codelet display_codelet = +{ + .cpu_funcs = {display_cpu_func}, + .cpu_funcs_name = {"display_cpu_func"}, + .nbuffers = 1, + .modes = {STARPU_R}, + .name = "display" +}; + +#define NX 5 + +int main(void) +{ + int answer = 42; + int ret; + int vector[NX]; + unsigned i; + starpu_data_handle_t vector_handle; + + setenv("STARPU_FXT_TRACE", "1", 1); + + ret = starpu_init(NULL); + if (ret == -ENODEV) return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + for (i = 0; i < NX; i++) + vector[i] = i+1; + starpu_vector_data_register(&vector_handle, STARPU_MAIN_RAM, (uintptr_t)vector, NX, sizeof(vector[0])); + + ret = starpu_task_insert(&hello_codelet, + STARPU_VALUE, &answer, sizeof(answer), + 0); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + ret = starpu_task_insert(&scal_codelet, + STARPU_RW, vector_handle, + STARPU_VALUE, &answer, sizeof(answer), + 0); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + ret = starpu_task_insert(&display_codelet, + STARPU_R, vector_handle, + 0); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + starpu_data_unregister(vector_handle); + starpu_shutdown(); + + return 0; + +enodev: + starpu_shutdown(); + return 77; +} diff --git a/eclipse-plugin/icons/fxt.png b/eclipse-plugin/icons/fxt.png new file mode 100644 index 0000000000000000000000000000000000000000..8d587bbe2320bf789de5964cf9759dc330be7814 GIT binary patch literal 1364 zcmZ`%2~d+q6y8c14+M2uC;>}jEsTn>0UOZJL87F9N|a;pC|V!{6oqj7Lb28cMFpfl z2Pg&s=@cu7=Bbkqz8~_-(e(yq`!3r2x=eY=^~KeLOulJQRTA9ssa%0WiTU z)-(XehyX}q03crg;G@(ke%O})EEw3mGm<(#KW}~;6B82+4GooVqrSfW>C>l!gM;Pe zF9( zmvH9nS=PxDr_YxeP>bmPowRhMAU`j9Pegds{&V>S$1*Z_U4yXT^U;2{50~yu`cAFU zNM1ZEuc*}OOc(}BT7jXc;8trrm#0-u@9M=b z3gs%z-%o~_hv-L<6McevKb&Os507eRb(ldLM)4;2?NI7a{oIUNI}3@QxvbcN(?_a*FD+ z)?*_@te8dhnm04?PWCbBxhRTSEY{EJzTKXMq`KqXju)^|vjx*@cKH)MH-*5mX3W^i zsowkrG4cT8PR{^LfAT<(%1nPWHlBGn+K&?QN59B|nL0bTw4eb0z@69HZy8LO#bQ<} zt*x!Oxw)sOXJllg zwzd{dfz4)%L?Srr($Z3>C>Vkgg05IBE-5LgtE=Pl`M#wCc9+j`Gihc}ymwo<8;NnhJZ3%ag4}P7;hEiAsPS6uXb@#iL0{X9Uw)YavpbLH3j$7@r*ih4NlU`+I^OE^BdB_}8Ld0lS= zm%}C}le-GptqDVHKc;`zsWa!D=vVi(AY)obPh3>v!Pd-YCVN-k(A|54sTmvM8>6}xT8UJ{1Njpr+xxH+TDiQ9C%0{}kzsyt{kstW$TJ3`%y#K$5G zGCiFE4e*9PP9S;{yh)K>USuy{vX>8mNFo!7@`%!hZxNE2@rM(#|DRCp;K_spoV7y) oGXcqpO=p0_6lOAmkO7-9QZf!P5tU7+KTHB4R9fKmtud$m1v|F|lK=n! literal 0 HcmV?d00001 diff --git a/eclipse-plugin/icons/svg.png b/eclipse-plugin/icons/svg.png new file mode 100644 index 0000000000000000000000000000000000000000..ee2a85ea6d383832af45e8595123bb66957bffc9 GIT binary patch literal 1751 zcmV;|1}OQ7P)X1^@s6N;AwK00004XF*Lt006O% z3;baP0000WV@Og>004R>004l5008;`004mK004C`008P>0026e000+ooVrmw00006 zVoOIv0RI600RN!9r;`8x00(qQO+^Rg0}u!XG~^i%RR915wn;=mR9M5+mwSv<)g8w_ z=iJ9Uc4ua1XUAoC1$K~Kb^&2mQmdtbG)vOxVy!VT8rsIDQcX$iTN9H~+oaaSf0`gH zy0tCPCL-2HY?H2~inK#O<1QAa144bk0=u(2J3GvK?%aF&$H=l9q_&zc_50U7=boGU zIltfccOF71rT7#{pWU!+n`AOcp-`wb7XWUzn@}i(u3yBQc}gS_3=IuYE|>9mJhf&8 zpja%TX&UR+t)s22t=g%SQq$AZYG7bMjf{*a+qTvJi)`Cgu~UOVt6%X_Y)lsdz=zVS7NUO&NC@4knP_iW^r zFRtOp;bDrFgHjF`HcsU@RBRi7a;Zc+{(d*g<-@IOczr=`xN$Wd3$J3=;5LSjk5WpH z@uxq(M&J6|X%7k>{>e{ye|#Dt6v1#CYgb>--k0|9*XW!7Il>oGIdA@A?%Q-fp?VD~ zae@QKUI8I!3^jpmk;!BMC}dJR`s43&=wyn)-|Xc2js~O66a3|GZ_qaXYLbaDDh_1R6YO|$fCEQQ^TP)o#IM_U{EaB9!06dAn!?TG(kX0U z_BJ7G+g7`F?V`WGA5)i{&*lN})cNq3lBrZ0Aq1Lk5DtgYq@Y|Xl1wJ4IEwmuKYo8b znv~=+X{OI-K?s74A-rxEm2!#X)D(uxja4dQx;^att2gMu<3vPUWaOtTq$TxH(3~2^s&AbLLjA-csw2^CMK>3@W)XynZz&*Ow+8c z61}~>M59r1xm>N;E>W>qWZ%AhtX#Pg0CC~gynp|G;_*1Wy}kJT{#tVakj-W}eE2Z! z?d|mS^&y0~2&|Oi)TvX%VlfJZLe0;}N-5lKH{IRcL?V$Ho*Cdzi~bFf#Nj}HwrcMH z001R)MObuXVRU6WV{&C-bY%cCFfuVMFf%PNHdHV;IxsdmH8L$QG&(Ray^QV=0000b zbVXQnWMOn=I&E)cX=ZrqZYHX|FS(3{zmm!yiJTc6D#x_)H9LN38(&mzTxm2#F zi82|hNt0U~={QH}$URnCa?M$PoJ>T~}-}`;<_x|_gc(^$#fVIH@04O+NF`i=7 z6Q?HVpt#0`V$8)L6>fvG0e}Y#xm|*^xQB#bJ#heV#Q*?O(*a;xOr=0aapjzImg2#|U>8RWu>T{fyYDi^j10{QXD>4+p(UqsL=SrgExw=agt76)_74?D z$EXrjGpopnMbqq;9WIZ|{Y~0KU0n3)42vTS3$^oJX-hYqwU*A$f369IDx+$ZTcJ0C z@@DGNKiMrE^1@>4?n@cNB8^$bMN?zWMjq<-MeMHe?!n;&#+Y1^Yvi*sSA#D@x5~|h z-zvUTDtkH$BqhE+m#!Mg>Ys(`C^Gy!p;8D0g5TFiK;DYQ)pNO(Q&avQ6HhlZd>8)w z4Q!;}-Q8`WqX-6r<1BRSOlw)5W@css6{nL1j^kTie73gGDN<8YW7pPVmN_PMEKjXa ze>~oiPPZ~CjviFilJ)TsjEp!=CQMCD0Ukga5Aut3Dxc4fhay4t=df;W^^J|uC4)gW zM!CqA(1TR%OhonTOZuoh5JV|>MmvYMP*$70NboOn_e%Hx^6}|e@2|pbKJ2|GC~s&C z9(vihb^p%%hOt7M*XVZ3`c}O{Q_8;H6dJwtXa!l9%q2`Rb{ugAi4=KM*6|3-ar$2+ zw@FB&xN1>y2feFUxUm*~`MU(5Zh9r1Iiw*giauqIBIK50Jez*)plS$8D|qi((*~l+uWh(z za2T@HysfmOs3YYfAARwHU2r+mN*DU!M9)ngE> zdlI#YayDzlt)N7SLD2LiDTWP}@S5`Eqv$5Y*?E#{Qh32)ZrBTojL`P)nBx+zxoJnG z%vC2qL?u=3wt+%EDdrtV`t!ySjFy!IK@#62=Toq`t9`}Lnq0+T2@>U>^=4xhm(~oF zz%`;B*^Fl(=;)Jaoz~|0H#rVPC$8l#8Xms=yIV?6X5eqOjK$48?FZN2-{yG-1Ozm# zpcNDpEIT%3E9vuo|I{?oXtdGQ314V#=?Z|pBWk#7tRx~ix!u}-<+QOmCN?m3S4 z)Y()D)&-|OeTG@1wqr3JUnrlXO`Tuxe3+zvZy&$V2LX9TWSNz?D{sV z*W=2}f>@>X0^QSzSlqY+Db)eXv^O_N^QN^%E(l{EFKhsrStS`oFHnFS!4_+NhBocX?Kkwl0Kc%FFpg_oE@?AFj zCYNjVL33ou7Q}9A6ATUMF56EQ9DvJ9woWDo4v$o1zlkwBq5ksg%Av8=ffNDvQOK&V zbEo)C>C^0eX`uuf5gkGyiUB~vkq8(Z2}7d15ePKG0*y3>!BJ>9997Mi{f~e|4vh@E z{Qm;X-qm)oKNhyo@YLLhg#wsoc*$8To$?YFz%d%N@AyW@BCkb%CL zJ_JDqKHi=ufZYbhCOsIK>Ebm#zP!1n+I)e+&U5FNm+LcLG;LeSOi5QI&EAO+aM zPC-yS0)l3OAqYhR`ndd-GZa?Hb8T8bHV&2s!^;wEXpGgGjMlq$H!kv$KNFq@Ff*>DvcJX6pNCZSS+u1 zFb|iYrw6}RTGPX2-?)WaQE0#$J_nGfl`^4l7#!6VXXvjrtZe0K)$)f$WCAgZ!xNM~ zAh!z!2D&-<`S}EVI<=ZES7{c+?~3l{67CX9sZ~oWvi2v<1UxRARPa{3@UIhQXJ1E) zxrxyc|G?m%gD7>(p04NZTz*C}+SGCfe{l3BI@HO{hr{EON#u^URtMXiDfmnmhh5i+ zMU~9XvHlLrjm9_cQ5167tVB$?pA{8{jtcu}!$u1_ljVxAIg3eEDiqBXd4>jGpN~QR z5_p<;pZfO~GWGR8e&M5sP2Ug-t0ixScUzldQga(0<`{3ZU_5>9P-g$^Ec^!|kqD2+x3siOPfwSX zm4Wr0Psq&7urKY_A?VyLs)E4My*}|p`5fY z#>Lv2f-P&r+qvE5bi&r^c-bUGW}m{Q{7y!>AzD7(co>wW(A7QGJ*(KvyFY7;~b8VfF{p*mlc&P!wmv3JcEm^mv;LI!6 z)@{>-)ISy9sK*Tv5cZN6*ia((ewl(-y{ly zgvglD)kaNoBhRUCM-$h}UvIUvAzHexI)1FsHEH34$=-PdUvznHYovREYDo_Nab7A~ z`PL7Xq*17K&1K3tcW*Ojbvzp{h=2JAq{9Z|6@Uo~!GxniFNOmHIf4|}BOL7=k^T-2 zCO-xfv9?1O8uW8IyUUWxhr25j7*rD1A*zs2q$CDVJ?SU42L3OVxz + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/eclipse-plugin/src/Makefile.am b/eclipse-plugin/src/Makefile.am new file mode 100644 index 0000000..83954b7 --- /dev/null +++ b/eclipse-plugin/src/Makefile.am @@ -0,0 +1,54 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2021-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +include $(top_srcdir)/make/starpu-notests.mk + +EXTRA_DIST = \ + deploy/build.xml \ + deploy/javaCompilerArgs \ + starpu/handlers/SvgHandler.java \ + starpu/handlers/TraceVizHandler.java \ + starpu/handlers/TaskGraphHandler.java \ + starpu/handlers/TraceGenHandler.java \ + starpu/handlers/TraceUtils.java + +ECLIPSE_DIR = $(shell dirname @ECLIPSE@) +ECLIPSE_WORKSPACE = $(abs_top_builddir)/eclipse-plugin/workspace + +all: package + cp $(abs_top_srcdir)/eclipse-plugin/build.xml $(abs_top_srcdir)/eclipse-plugin/plugin_build.xml + rm -f build/plugin/plugins/StarPU_*jar + $(ECLIPSE_DIR)/eclipse -noSplash -data $(ECLIPSE_WORKSPACE) -application org.eclipse.ant.core.antRunner -buildfile $(abs_top_srcdir)/eclipse-plugin/src/deploy/build.xml + rm -f $(ECLIPSE_DIR)/dropins/StarPU_*jar + cp build/plugin/plugins/*.jar $(ECLIPSE_DIR)/dropins/ + mv $(abs_top_srcdir)/eclipse-plugin/plugin_build.xml $(abs_top_srcdir)/eclipse-plugin/build.xml + +package: + $(ECLIPSE_DIR)/eclipse -noSplash -data $(ECLIPSE_WORKSPACE) -application org.eclipse.cdt.managedbuilder.core.headlessbuild -import $(abs_top_srcdir)/eclipse-plugin + rm -f $(ECLIPSE_DIR)/dropins/StarPU_*jar + $(ECLIPSE_DIR)/eclipse -noSplash -data $(ECLIPSE_WORKSPACE) -application org.eclipse.ant.core.antRunner -buildfile $(abs_top_srcdir)/eclipse-plugin/build.xml + +txtdir = $(libdir)/starpu/eclipse-plugin/workspace +script = $(abs_top_srcdir)/eclipse-plugin/tools/install_workspace.sh + +install-data-hook: + (cd $(abs_top_builddir)/eclipse-plugin/workspace && $(PROG_FIND) . -type f -exec $(script) {} $(DESTDIR)$(txtdir) $(INSTALL_DATA) \;) + +clean-local: + rm -rf build + +distclean-local: clean-local + rm -f $(ECLIPSE_DIR)/dropins/StarPU_*jar + rm -rf $(ECLIPSE_WORKSPACE) diff --git a/eclipse-plugin/src/Makefile.in b/eclipse-plugin/src/Makefile.in new file mode 100644 index 0000000..51d1795 --- /dev/null +++ b/eclipse-plugin/src/Makefile.in @@ -0,0 +1,792 @@ +# Makefile.in generated by automake 1.16.5 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2021 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +target_triplet = @target@ +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@am__append_1 = --compiler-options -fno-strict-aliasing -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ $(STARPU_NVCC_H_CPPFLAGS) +@STARPU_USE_HIP_TRUE@am__append_2 = -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ +subdir = eclipse-plugin/src +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ + $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ + $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ + $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/src/common/config.h \ + $(top_builddir)/src/common/config-src-build.h \ + $(top_builddir)/include/starpu_config.h \ + $(top_builddir)/starpurm/include/starpurm_config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +SOURCES = +DIST_SOURCES = +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +am__DIST_COMMON = $(srcdir)/Makefile.in \ + $(top_srcdir)/make/starpu-notests.mk \ + $(top_srcdir)/make/starpu.mk +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +pkglibdir = @pkglibdir@ +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +APP_CFLAGS = @APP_CFLAGS@ +APP_CXXFLAGS = @APP_CXXFLAGS@ +APP_FCFLAGS = @APP_FCFLAGS@ +APP_FFLAGS = @APP_FFLAGS@ +AR = @AR@ +AS = @AS@ +ATLASDIR = @ATLASDIR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BLAS_LIB = @BLAS_LIB@ +BLAS_LIBS = @BLAS_LIBS@ +BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ +BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CC_OR_MPICC = @CC_OR_MPICC@ +CC_OR_NVCC = @CC_OR_NVCC@ +CFLAGS = @CFLAGS@ +COVERAGE = @COVERAGE@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CSCOPE = @CSCOPE@ +CTAGS = @CTAGS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DGELS_LIBS = @DGELS_LIBS@ +DLB_CFLAGS = @DLB_CFLAGS@ +DLB_LIBS = @DLB_LIBS@ +DLLTOOL = @DLLTOOL@ +DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +ECLIPSE = @ECLIPSE@ +EGREP = @EGREP@ +ETAGS = @ETAGS@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FC = @FC@ +FCFLAGS = @FCFLAGS@ +FFLAGS = @FFLAGS@ +FFTWF_CFLAGS = @FFTWF_CFLAGS@ +FFTWF_LIBS = @FFTWF_LIBS@ +FFTWL_CFLAGS = @FFTWL_CFLAGS@ +FFTWL_LIBS = @FFTWL_LIBS@ +FFTW_CFLAGS = @FFTW_CFLAGS@ +FFTW_LIBS = @FFTW_LIBS@ +FGREP = @FGREP@ +FILECMD = @FILECMD@ +FXTDIR = @FXTDIR@ +FXT_CFLAGS = @FXT_CFLAGS@ +FXT_LDFLAGS = @FXT_LDFLAGS@ +FXT_LIBS = @FXT_LIBS@ +GDB = @GDB@ +GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ +GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ +GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ +GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ +GOTODIR = @GOTODIR@ +GREP = @GREP@ +HAVE_CXX11 = @HAVE_CXX11@ +HAVE_FFTWFL = @HAVE_FFTWFL@ +HELP2MAN = @HELP2MAN@ +HIPCC = @HIPCC@ +HIPCCFLAGS = @HIPCCFLAGS@ $(am__append_2) +HIPCONFIG = @HIPCONFIG@ +HWLOC_CFLAGS = @HWLOC_CFLAGS@ +HWLOC_LIBS = @HWLOC_LIBS@ +HWLOC_REQUIRES = @HWLOC_REQUIRES@ +ICC = @ICC@ +ICC_ARGS = @ICC_ARGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JULIA = @JULIA@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ +LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ +LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ +LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ +LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ +LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ +LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ +LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ +LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ +LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ +LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ +LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ +LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ +LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ +LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ +LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ +LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ +LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ +LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ +LIBSTARPU_LINK = @LIBSTARPU_LINK@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAGMA_CFLAGS = @MAGMA_CFLAGS@ +MAGMA_LIBS = @MAGMA_LIBS@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPICC_LDFLAGS = @MPICC_LDFLAGS@ +MPICXX = @MPICXX@ +MPIEXEC = @MPIEXEC@ +MPIEXEC_ARGS = @MPIEXEC_ARGS@ +MPIFORT = @MPIFORT@ +MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ +MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ +NM = @NM@ +NMAD_CFLAGS = @NMAD_CFLAGS@ +NMAD_LIBS = @NMAD_LIBS@ +NMEDIT = @NMEDIT@ +NVCC = @NVCC@ +NVCCFLAGS = @NVCCFLAGS@ $(am__append_1) +NVCC_CC = @NVCC_CC@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ +OPENBLAS_LIBS = @OPENBLAS_LIBS@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PAPI_CFLAGS = @PAPI_CFLAGS@ +PAPI_LIBS = @PAPI_LIBS@ +PARALLEL = @PARALLEL@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PKG_CONFIG = @PKG_CONFIG@ +POTI_CFLAGS = @POTI_CFLAGS@ +POTI_LIBS = @POTI_LIBS@ +PROG_CLANG = @PROG_CLANG@ +PROG_DATE = @PROG_DATE@ +PROG_FIND = @PROG_FIND@ +PROG_STAT = @PROG_STAT@ +PYTHON = @PYTHON@ +PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ +PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ +PYTHON_VERSION = @PYTHON_VERSION@ +RANLIB = @RANLIB@ +REALBASH = @REALBASH@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ +SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ +SIMGRID_LIBS = @SIMGRID_LIBS@ +SIMGRID_MC = @SIMGRID_MC@ +SLIC_CONFIG = @SLIC_CONFIG@ +SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ +SOCL_VENDORS = @SOCL_VENDORS@ +STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ +STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ +STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ +STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ +STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ +STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ +STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ +STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ +STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ +STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ +STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ +STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ +STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ +STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ +STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ +STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ +STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ +STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ +STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ +STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ +STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ +STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ +STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ +STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ +STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ +STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ +STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ +STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ +STARPU_LIB_PATH = @STARPU_LIB_PATH@ +STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ +STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ +STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ +STARPU_MS_LIB = @STARPU_MS_LIB@ +STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ +STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ +STARPU_OPENBLAS = @STARPU_OPENBLAS@ +STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ +STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ +STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ +STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ +STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ +STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ +STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ +STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ +STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ +STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ +STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ +STARPU_SRC_DIR = @STARPU_SRC_DIR@ +STARPU_USE_CPU = @STARPU_USE_CPU@ +STARPU_USE_CUDA = @STARPU_USE_CUDA@ +STARPU_USE_FXT = @STARPU_USE_FXT@ +STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ +STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ +STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ +STRIP = @STRIP@ +VERSION = @VERSION@ +XMKMF = @XMKMF@ +X_CFLAGS = @X_CFLAGS@ +X_EXTRA_LIBS = @X_EXTRA_LIBS@ +X_LIBS = @X_LIBS@ +X_PRE_LIBS = @X_PRE_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_ct_F77 = @ac_ct_F77@ +ac_ct_FC = @ac_ct_FC@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +doxygencommand = @doxygencommand@ +dvidir = @dvidir@ +eclipsepath = @eclipsepath@ +epstopdfcommand = @epstopdfcommand@ +exec_prefix = @exec_prefix@ +gitcommand = @gitcommand@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +hwloccalccommand = @hwloccalccommand@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +juliapath = @juliapath@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +mpicc_path = @mpicc_path@ +mpicxx_path = @mpicxx_path@ +mpiexec_path = @mpiexec_path@ +mpifort_path = @mpifort_path@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +pdflatexcommand = @pdflatexcommand@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +runstatedir = @runstatedir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target = @target@ +target_alias = @target_alias@ +target_cpu = @target_cpu@ +target_os = @target_os@ +target_vendor = @target_vendor@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +AM_CFLAGS = $(GLOBAL_AM_CFLAGS) +AM_CXXFLAGS = $(GLOBAL_AM_CXXFLAGS) +AM_FFLAGS = $(GLOBAL_AM_FFLAGS) +AM_FCFLAGS = $(GLOBAL_AM_FCFLAGS) +@STARPU_USE_CUDA_TRUE@V_nvcc_ = $(V_nvcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_USE_CUDA_TRUE@V_nvcc_0 = @echo " NVCC " $@; +@STARPU_USE_CUDA_TRUE@V_nvcc_1 = +@STARPU_USE_CUDA_TRUE@V_nvcc = $(V_nvcc_$(V)) + +# Avoid using nvcc when making a coverity build, nvcc produces millions of +# lines of code which we don't want to analyze. Instead, build dumb .o files +# containing empty functions. +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_ = $(V_mynvcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_0 = @echo " myNVCC " $@; +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_1 = +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc = $(V_mynvcc_$(V)) +@STARPU_USE_HIP_TRUE@V_hipcc_ = $(V_hipcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_USE_HIP_TRUE@V_hipcc_0 = @echo " HIPCC " $@; +@STARPU_USE_HIP_TRUE@V_hipcc_1 = +@STARPU_USE_HIP_TRUE@V_hipcc = $(V_hipcc_$(V)) +V_icc_ = $(V_icc_$(AM_DEFAULT_VERBOSITY)) +V_icc_0 = @echo " ICC " $@; +V_icc_1 = +V_icc = $(V_icc_$(V)) +V_ln_ = $(V_ln_$(AM_DEFAULT_VERBOSITY)) +V_ln_0 = @echo " LN " $@; +V_ln_1 = +V_ln = $(V_ln_$(V)) +V_help2man_ = $(V_help2man_$(AM_DEFAULT_VERBOSITY)) +V_help2man_0 = @echo " HELP2MAN" $@; +V_help2man_1 = +V_help2man = $(V_help2man_$(V)) + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2021-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +EXTRA_DIST = \ + deploy/build.xml \ + deploy/javaCompilerArgs \ + starpu/handlers/SvgHandler.java \ + starpu/handlers/TraceVizHandler.java \ + starpu/handlers/TaskGraphHandler.java \ + starpu/handlers/TraceGenHandler.java \ + starpu/handlers/TraceUtils.java + +ECLIPSE_DIR = $(shell dirname @ECLIPSE@) +ECLIPSE_WORKSPACE = $(abs_top_builddir)/eclipse-plugin/workspace +txtdir = $(libdir)/starpu/eclipse-plugin/workspace +script = $(abs_top_srcdir)/eclipse-plugin/tools/install_workspace.sh +all: all-am + +.SUFFIXES: +.SUFFIXES: .cu .cubin .hip .o +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/make/starpu-notests.mk $(top_srcdir)/make/starpu.mk $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign eclipse-plugin/src/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign eclipse-plugin/src/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; +$(top_srcdir)/make/starpu-notests.mk $(top_srcdir)/make/starpu.mk $(am__empty): + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs +tags TAGS: + +ctags CTAGS: + +cscope cscopelist: + +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile +installdirs: +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-generic clean-libtool clean-local mostlyclean-am + +distclean: distclean-am + -rm -f Makefile +distclean-am: clean-am distclean-generic distclean-local + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: + @$(NORMAL_INSTALL) + $(MAKE) $(AM_MAKEFLAGS) install-data-hook +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-generic mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: + +.MAKE: install-am install-data-am install-strip + +.PHONY: all all-am check check-am clean clean-generic clean-libtool \ + clean-local cscopelist-am ctags-am distclean distclean-generic \ + distclean-libtool distclean-local distdir dvi dvi-am html \ + html-am info info-am install install-am install-data \ + install-data-am install-data-hook install-dvi install-dvi-am \ + install-exec install-exec-am install-html install-html-am \ + install-info install-info-am install-man install-pdf \ + install-pdf-am install-ps install-ps-am install-strip \ + installcheck installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-generic \ + mostlyclean-libtool pdf pdf-am ps ps-am tags-am uninstall \ + uninstall-am + +.PRECIOUS: Makefile + +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@.cu.o: +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ @$(MKDIR_P) `dirname $@` +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ $(V_mynvcc)grep 'extern *"C" *void *' $< | sed -ne 's/extern *"C" *void *\([a-zA-Z0-9_]*\) *(.*/void \1(void) {}/p' | $(CC) -x c - -o $@ -c + +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.cubin: +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) -cubin $< -o $@ $(NVCCFLAGS) + +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.o: +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) $< -c -o $@ $(NVCCFLAGS) +@STARPU_USE_HIP_TRUE@.hip.o: +@STARPU_USE_HIP_TRUE@ $(V_hipcc) $(HIPCC) $< -c -o $@ $(HIPCCFLAGS) + +recheck: + -cat /dev/null + +showcheckfailed: + @-cat /dev/null + +showfailed: + @-cat /dev/null + +showcheck: + -cat /dev/null + +showsuite: + -cat /dev/null + +all: package + cp $(abs_top_srcdir)/eclipse-plugin/build.xml $(abs_top_srcdir)/eclipse-plugin/plugin_build.xml + rm -f build/plugin/plugins/StarPU_*jar + $(ECLIPSE_DIR)/eclipse -noSplash -data $(ECLIPSE_WORKSPACE) -application org.eclipse.ant.core.antRunner -buildfile $(abs_top_srcdir)/eclipse-plugin/src/deploy/build.xml + rm -f $(ECLIPSE_DIR)/dropins/StarPU_*jar + cp build/plugin/plugins/*.jar $(ECLIPSE_DIR)/dropins/ + mv $(abs_top_srcdir)/eclipse-plugin/plugin_build.xml $(abs_top_srcdir)/eclipse-plugin/build.xml + +package: + $(ECLIPSE_DIR)/eclipse -noSplash -data $(ECLIPSE_WORKSPACE) -application org.eclipse.cdt.managedbuilder.core.headlessbuild -import $(abs_top_srcdir)/eclipse-plugin + rm -f $(ECLIPSE_DIR)/dropins/StarPU_*jar + $(ECLIPSE_DIR)/eclipse -noSplash -data $(ECLIPSE_WORKSPACE) -application org.eclipse.ant.core.antRunner -buildfile $(abs_top_srcdir)/eclipse-plugin/build.xml + +install-data-hook: + (cd $(abs_top_builddir)/eclipse-plugin/workspace && $(PROG_FIND) . -type f -exec $(script) {} $(DESTDIR)$(txtdir) $(INSTALL_DATA) \;) + +clean-local: + rm -rf build + +distclean-local: clean-local + rm -f $(ECLIPSE_DIR)/dropins/StarPU_*jar + rm -rf $(ECLIPSE_WORKSPACE) + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/eclipse-plugin/src/deploy/build.xml b/eclipse-plugin/src/deploy/build.xml new file mode 100644 index 0000000..9d81903 --- /dev/null +++ b/eclipse-plugin/src/deploy/build.xml @@ -0,0 +1,7 @@ + + + + + + + diff --git a/eclipse-plugin/src/deploy/javaCompilerArgs b/eclipse-plugin/src/deploy/javaCompilerArgs new file mode 100644 index 0000000..cb57c77 --- /dev/null +++ b/eclipse-plugin/src/deploy/javaCompilerArgs @@ -0,0 +1,79 @@ +#ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.ui_3.119.0.v20210111-1350.jar[~org/eclipse/ui/internal/*:?**/*] +#ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.core.runtime_3.20.100.v20210111-0815.jar[?**/*] +#ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.osgi_3.16.200.v20210226-1447.jar[?**/*] +#ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.osgi.compatibility.state_1.2.300.v20210212-1137.jar[?**/*] +#ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.equinox.common_3.14.100.v20210212-1143.jar[+org/eclipse/core/runtime/*:?**/*] +#ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.core.jobs_3.10.1100.v20210111-0815.jar[?**/*] +#ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.equinox.registry_3.10.100.v20210212-1143.jar[~org/eclipse/core/internal/adapter/*:~org/eclipse/core/internal/registry/*:~org/eclipse/core/internal/registry/osgi/*:~org/eclipse/core/internal/registry/spi/*:+org/eclipse/core/runtime/*:+org/eclipse/core/runtime/dynamichelpers/*:+org/eclipse/core/runtime/spi/*:?**/*] +#ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.equinox.preferences_3.8.200.v20210212-1143.jar[?**/*] +#ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.core.contenttype_3.7.900.v20210111-0918.jar[?**/*] +#ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.equinox.app_1.5.100.v20210212-1143.jar[?**/*] +#ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.osgi.services_3.10.0.v20210212-1137.jar[?**/*] +#ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.osgi.util_3.6.0.v20210212-1137.jar[?**/*] +#ADAPTER#ACCESS#.p2/pool/plugins/javax.servlet_3.1.0.v201410161800.jar[?**/*] +#ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.swt_3.116.0.v20210302-1107.jar[+org/eclipse/swt/*:+org/eclipse/swt/accessibility/*:+org/eclipse/swt/awt/*:+org/eclipse/swt/browser/*:+org/eclipse/swt/custom/*:+org/eclipse/swt/dnd/*:+org/eclipse/swt/events/*:+org/eclipse/swt/graphics/*:+org/eclipse/swt/layout/*:+org/eclipse/swt/opengl/*:+org/eclipse/swt/printing/*:+org/eclipse/swt/program/*:+org/eclipse/swt/widgets/*:~org/eclipse/swt/internal/*:~org/eclipse/swt/internal/image/*:~org/eclipse/swt/internal/accessibility/gtk/*:~org/eclipse/swt/internal/cairo/*:~org/eclipse/swt/internal/gtk/*:~org/eclipse/swt/internal/opengl/glx/*:?**/*] +#ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.swt.browser.chromium.gtk.linux.x86_64_3.116.0.v20210302-1107.jar[+org/eclipse/swt/*:+org/eclipse/swt/accessibility/*:+org/eclipse/swt/awt/*:+org/eclipse/swt/browser/*:+org/eclipse/swt/custom/*:+org/eclipse/swt/dnd/*:+org/eclipse/swt/events/*:+org/eclipse/swt/graphics/*:+org/eclipse/swt/layout/*:+org/eclipse/swt/opengl/*:+org/eclipse/swt/printing/*:+org/eclipse/swt/program/*:+org/eclipse/swt/widgets/*:~org/eclipse/swt/internal/*:~org/eclipse/swt/internal/image/*:~org/eclipse/swt/internal/accessibility/gtk/*:~org/eclipse/swt/internal/cairo/*:~org/eclipse/swt/internal/gtk/*:~org/eclipse/swt/internal/opengl/glx/*:?**/*] +#ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.swt.gtk.linux.x86_64_3.116.0.v20210302-1107.jar[+org/eclipse/swt/*:+org/eclipse/swt/accessibility/*:+org/eclipse/swt/awt/*:+org/eclipse/swt/browser/*:+org/eclipse/swt/custom/*:+org/eclipse/swt/dnd/*:+org/eclipse/swt/events/*:+org/eclipse/swt/graphics/*:+org/eclipse/swt/layout/*:+org/eclipse/swt/opengl/*:+org/eclipse/swt/printing/*:+org/eclipse/swt/program/*:+org/eclipse/swt/widgets/*:~org/eclipse/swt/internal/*:~org/eclipse/swt/internal/image/*:~org/eclipse/swt/internal/accessibility/gtk/*:~org/eclipse/swt/internal/cairo/*:~org/eclipse/swt/internal/gtk/*:~org/eclipse/swt/internal/opengl/glx/*:?**/*] +#ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.jface_3.22.100.v20210126-0831.jar[+org/eclipse/jface/*:+org/eclipse/jface/action/*:+org/eclipse/jface/action/images/*:+org/eclipse/jface/bindings/*:+org/eclipse/jface/bindings/keys/*:+org/eclipse/jface/bindings/keys/formatting/*:+org/eclipse/jface/commands/*:+org/eclipse/jface/contexts/*:+org/eclipse/jface/dialogs/*:+org/eclipse/jface/dialogs/images/*:+org/eclipse/jface/fieldassist/*:+org/eclipse/jface/fieldassist/images/*:+org/eclipse/jface/images/*:~org/eclipse/jface/internal/*:~org/eclipse/jface/internal/provisional/action/*:+org/eclipse/jface/layout/*:+org/eclipse/jface/menus/*:+org/eclipse/jface/operation/*:+org/eclipse/jface/preference/*:+org/eclipse/jface/preference/images/*:+org/eclipse/jface/resource/*:+org/eclipse/jface/util/*:+org/eclipse/jface/viewers/*:+org/eclipse/jface/viewers/deferred/*:+org/eclipse/jface/widgets/*:+org/eclipse/jface/window/*:+org/eclipse/jface/wizard/*:+org/eclipse/jface/wizard/images/*:?**/*] +#ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.core.commands_3.9.800.v20201021-1339.jar[+org/eclipse/core/commands/*:+org/eclipse/core/commands/common/*:+org/eclipse/core/commands/contexts/*:~org/eclipse/core/commands/internal/util/*:+org/eclipse/core/commands/operations/*:+org/eclipse/core/commands/util/*:~org/eclipse/core/internal/commands/operations/*:~org/eclipse/core/internal/commands/util/*:?**/*] +#ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.equinox.bidi_1.3.100.v20210212-1143.jar[?**/*] +#ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.ui.workbench_3.122.100.v20210215-1525.jar[~org/eclipse/e4/ui/workbench/addons/perspectiveswitcher/*:+org/eclipse/ui/*:+org/eclipse/ui/about/*:+org/eclipse/ui/actions/*:+org/eclipse/ui/activities/*:+org/eclipse/ui/application/*:+org/eclipse/ui/branding/*:+org/eclipse/ui/browser/*:+org/eclipse/ui/commands/*:+org/eclipse/ui/contexts/*:+org/eclipse/ui/databinding/*:+org/eclipse/ui/databinding/typed/*:+org/eclipse/ui/dialogs/*:+org/eclipse/ui/dnd/*:+org/eclipse/ui/fieldassist/*:+org/eclipse/ui/handlers/*:+org/eclipse/ui/help/*:~org/eclipse/ui/internal/*:~org/eclipse/ui/internal/about/*:~org/eclipse/ui/internal/actions/*:~org/eclipse/ui/internal/activities/*:~org/eclipse/ui/internal/activities/ws/*:~org/eclipse/ui/internal/application/*:~org/eclipse/ui/internal/browser/*:~org/eclipse/ui/internal/commands/*:~org/eclipse/ui/internal/contexts/*:~org/eclipse/ui/internal/decorators/*:~org/eclipse/ui/internal/dialogs/*:~org/eclipse/ui/internal/dialogs/cpd/*:~org/eclipse/ui/internal/e4/compatibility/*:~org/eclipse/ui/internal/e4/migration/*:~org/eclipse/ui/internal/editorsupport/*:~org/eclipse/ui/internal/expressions/*:~org/eclipse/ui/internal/handlers/*:~org/eclipse/ui/internal/help/*:~org/eclipse/ui/internal/intro/*:~org/eclipse/ui/internal/keys/*:~org/eclipse/ui/internal/keys/model/*:~org/eclipse/ui/internal/layout/*:~org/eclipse/ui/internal/menus/*:~org/eclipse/ui/internal/misc/*:~org/eclipse/ui/internal/model/*:~org/eclipse/ui/internal/operations/*:~org/eclipse/ui/internal/part/*:~org/eclipse/ui/internal/preferences/*:~org/eclipse/ui/internal/progress/*:~org/eclipse/ui/internal/provisional/application/*:~org/eclipse/ui/internal/quickaccess/*:~org/eclipse/ui/internal/quickaccess/providers/*:~org/eclipse/ui/internal/registry/*:~org/eclipse/ui/internal/services/*:~org/eclipse/ui/internal/splash/*:~org/eclipse/ui/internal/statushandlers/*:~org/eclipse/ui/internal/testing/*:~org/eclipse/ui/internal/themes/*:~org/eclipse/ui/internal/tweaklets/*:~org/eclipse/ui/internal/util/*:~org/eclipse/ui/internal/wizards/*:~org/eclipse/ui/internal/wizards/preferences/*:+org/eclipse/ui/intro/*:+org/eclipse/ui/keys/*:+org/eclipse/ui/menus/*:+org/eclipse/ui/model/*:+org/eclipse/ui/operations/*:+org/eclipse/ui/part/*:+org/eclipse/ui/plugin/*:+org/eclipse/ui/preferences/*:+org/eclipse/ui/progress/*:+org/eclipse/ui/quickaccess/*:+org/eclipse/ui/services/*:+org/eclipse/ui/splash/*:+org/eclipse/ui/statushandlers/*:+org/eclipse/ui/swt/*:+org/eclipse/ui/themes/*:+org/eclipse/ui/views/*:+org/eclipse/ui/wizards/*:?**/*] +#ADAPTER#ACCESS#.p2/pool/plugins/com.ibm.icu_67.1.0.v20200706-1749.jar[?**/*] +#ADAPTER#ACCESS#.p2/pool/plugins/javax.annotation_1.3.5.v20200909-1856.jar[?**/*] +#ADAPTER#ACCESS#.p2/pool/plugins/javax.inject_1.0.0.v20091030.jar[?**/*] +#ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.e4.core.commands_0.13.0.v20201119-1132.jar[?**/*] +#ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.core.expressions_3.7.100.v20210203-1000.jar[?**/*] +#ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.e4.core.contexts_1.8.400.v20191217-1710.jar[?**/*] +#ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.e4.core.di_1.7.700.v20210128-2123.jar[?**/*] +#ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.e4.core.di.annotations_1.6.600.v20191216-2352.jar[+org/eclipse/e4/core/di/annotations/*:?**/*] +#ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.e4.core.services_2.2.600.v20210110-1654.jar[?**/*] +#ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.e4.ui.workbench_1.12.100.v20210122-1731.jar[?**/*] +#ADAPTER#ACCESS#.p2/pool/plugins/org.apache.commons.jxpath_1.3.0.v200911051830.jar[?**/*] +#ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.e4.ui.model.workbench_2.1.1000.v20210111-0958.jar[~org/eclipse/e4/ui/model/*:+org/eclipse/e4/ui/model/application/*:+org/eclipse/e4/ui/model/application/commands/*:~org/eclipse/e4/ui/model/application/commands/impl/*:~org/eclipse/e4/ui/model/application/commands/util/*:+org/eclipse/e4/ui/model/application/descriptor/basic/*:~org/eclipse/e4/ui/model/application/descriptor/basic/impl/*:~org/eclipse/e4/ui/model/application/descriptor/basic/util/*:~org/eclipse/e4/ui/model/application/impl/*:+org/eclipse/e4/ui/model/application/ui/*:+org/eclipse/e4/ui/model/application/ui/advanced/*:~org/eclipse/e4/ui/model/application/ui/advanced/impl/*:~org/eclipse/e4/ui/model/application/ui/advanced/util/*:+org/eclipse/e4/ui/model/application/ui/basic/*:~org/eclipse/e4/ui/model/application/ui/basic/impl/*:~org/eclipse/e4/ui/model/application/ui/basic/util/*:~org/eclipse/e4/ui/model/application/ui/impl/*:+org/eclipse/e4/ui/model/application/ui/menu/*:~org/eclipse/e4/ui/model/application/ui/menu/impl/*:~org/eclipse/e4/ui/model/application/ui/menu/util/*:~org/eclipse/e4/ui/model/application/ui/util/*:~org/eclipse/e4/ui/model/application/util/*:+org/eclipse/e4/ui/model/fragment/*:~org/eclipse/e4/ui/model/fragment/impl/*:~org/eclipse/e4/ui/model/fragment/util/*:~org/eclipse/e4/ui/model/internal/*:?**/*] +#ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.emf.ecore_2.23.0.v20200630-0516.jar[?**/*] +#ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.emf.common_2.22.0.v20210114-1734.jar[?**/*] +#ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.core.resources_3.14.0.v20210215-0934.jar[?**/*] +#ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.ant.core_3.5.800.v20200608-1251.jar[?**/*] +#ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.core.variables_3.4.800.v20200120-1101.jar[?**/*] +#ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.core.filesystem_1.7.700.v20200110-1734.jar[?**/*] +#ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.core.filesystem.linux.x86_64_1.2.300.v20180828-0158.jar[?**/*] +#ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.e4.emf.xpath_0.2.800.v20200609-0849.jar[?**/*] +#ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.e4.ui.services_1.5.0.v20210115-1333.jar[?**/*] +#ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.e4.ui.di_1.3.0.v20210222-1018.jar[?**/*] +#ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.e4.core.di.extensions.supplier_0.15.800.v20210110-1654.jar[?**/*] +#ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.e4.core.di.extensions_0.16.0.v20200507-0938.jar[?**/*] +#ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.emf.ecore.change_2.14.0.v20190528-0725.jar[?**/*] +#ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.emf.ecore.xmi_2.16.0.v20190528-0725.jar[?**/*] +#ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.jdt.annotation_2.2.600.v20200408-1511.jar[?**/*] +#ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.e4.ui.workbench.renderers.swt_0.15.0.v20201125-0918.jar[?**/*] +#ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.e4.ui.workbench.swt_0.16.0.v20201230-1610.jar[?**/*] +#ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.e4.ui.dialogs_1.2.100.v20201109-2317.jar[?**/*] +#ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.core.databinding_1.10.100.v20200926-1123.jar[?**/*] +#ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.core.databinding.observable_1.10.0.v20200730-0848.jar[?**/*] +#ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.core.databinding.property_1.8.100.v20200619-0651.jar[?**/*] +#ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.jface.databinding_1.12.200.v20210111-0911.jar[?**/*] +#ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.e4.ui.css.core_0.13.0.v20201015-0653.jar[?**/*] +#ADAPTER#ACCESS#.p2/pool/plugins/org.w3c.css.sac_1.3.1.v200903091627.jar[?**/*] +#ADAPTER#ACCESS#.p2/pool/plugins/org.apache.batik.css_1.13.0.v20200622-2037.jar[?**/*] +#ADAPTER#ACCESS#.p2/pool/plugins/org.apache.batik.i18n_1.13.0.v20200622-2037.jar[?**/*] +#ADAPTER#ACCESS#.p2/pool/plugins/org.apache.xmlgraphics_2.4.0.v20200622-2037.jar[?**/*] +#ADAPTER#ACCESS#.p2/pool/plugins/org.apache.commons.io_2.6.0.v20190123-2029.jar[?**/*] +#ADAPTER#ACCESS#.p2/pool/plugins/org.apache.commons.logging_1.2.0.v20180409-1502.jar[?**/*] +#ADAPTER#ACCESS#.p2/pool/plugins/javax.xml_1.3.4.v201005080400.jar[?**/*] +#ADAPTER#ACCESS#.p2/pool/plugins/org.w3c.dom.events_3.0.0.draft20060413_v201105210656.jar[?**/*] +#ADAPTER#ACCESS#.p2/pool/plugins/org.w3c.dom.svg_1.1.0.v201011041433.jar[?**/*] +#ADAPTER#ACCESS#.p2/pool/plugins/org.w3c.dom.smil_1.0.1.v200903091627.jar[?**/*] +#ADAPTER#ACCESS#.p2/pool/plugins/org.apache.batik.util_1.13.0.v20200622-2037.jar[?**/*] +#ADAPTER#ACCESS#.p2/pool/plugins/org.apache.batik.constants_1.13.0.v20200622-2037.jar[?**/*] +#ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.e4.ui.css.swt_0.14.100.v20201217-1340.jar[?**/*] +#ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.e4.ui.bindings_0.13.0.v20201119-1132.jar[?**/*] +#ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.e4.ui.workbench3_0.15.500.v20201021-1339.jar[+org/eclipse/ui/testing/*:+org/eclipse/ui/testing/dumps/*:?**/*] +#ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.e4.ui.css.swt.theme_0.13.0.v20201026-1147.jar[?**/*] +#ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.e4.ui.swt.gtk_1.1.100.v20210108-1832.jar[?**/*] +#ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.e4.ui.widgets_1.2.800.v20201021-1339.jar[?**/*] +#ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.urischeme_1.1.300.v20210113-1544.jar[?**/*] +#ADAPTER#ACCESS#.p2/pool/plugins/com.sun.jna_4.5.1.v20190425-1842.jar[?**/*] +#ADAPTER#ACCESS#.p2/pool/plugins/com.sun.jna.platform_4.5.1.v20190425-1842.jar[?**/*] +#ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.jface.notifications_0.3.0.v20210218-1820.jar[?**/*] +#ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.help_3.8.800.v20200525-0755.jar[?**/*] +#ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.e4.ui.workbench.addons.swt_1.4.100.v20201221-2332.jar[?**/*] diff --git a/eclipse-plugin/src/starpu/handlers/SvgHandler.java b/eclipse-plugin/src/starpu/handlers/SvgHandler.java new file mode 100644 index 0000000..9e1a5c0 --- /dev/null +++ b/eclipse-plugin/src/starpu/handlers/SvgHandler.java @@ -0,0 +1,82 @@ +// StarPU --- Runtime system for heterogeneous multicore architectures. +// +// Copyright (C) 2021-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +// +// StarPU is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation; either version 2.1 of the License, or (at +// your option) any later version. +// +// StarPU is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +// +// See the GNU Lesser General Public License in COPYING.LGPL for more details. +// +package starpu.handlers; + +import java.awt.EventQueue; +import java.io.File; +import java.io.PrintWriter; +import java.util.regex.Pattern; + +import org.eclipse.core.commands.AbstractHandler; +import org.eclipse.core.commands.ExecutionEvent; +import org.eclipse.core.commands.ExecutionException; +import org.eclipse.core.runtime.IPath; +import org.eclipse.ui.IEditorInput; +import org.eclipse.ui.IPathEditorInput; +import org.eclipse.ui.handlers.HandlerUtil; + +public class SvgHandler extends AbstractHandler { + + @Override + public Object execute(ExecutionEvent event) throws ExecutionException { + EventQueue.invokeLater(() -> { + try { + String workDir = System.getProperty("user.dir") + "/" + TraceUtils.getRandomDirectoryName(); + String inputfilename = workDir + "/dag.dot"; + File f = new File(inputfilename); + if (!f.isFile()) + throw new Exception("File <" + inputfilename + "> does not exist. Have you run StarPU FxT tool?"); + + String[] cmd1 = { "dot", "-Tcmapx", inputfilename, "-o", workDir + "/output.map"}; + TraceUtils.runCommand(cmd1); + + String[] cmd2 = { "dot", "-Tsvg", inputfilename, "-o", workDir + "/output.svg" }; + TraceUtils.runCommand(cmd2); + + IEditorInput input = HandlerUtil.getActiveEditor(event).getEditorInput(); + + if (!(input instanceof IPathEditorInput)) { + System.out.println("There is no path"); + } + else + { + String map = TraceUtils.readFileToString(workDir + "/output.map"); + Pattern p = Pattern.compile("href=\"([^#\"/]+/)*"); + IPath ipath = ((IPathEditorInput) input).getPath().makeAbsolute().removeLastSegments(1); + String path = ipath.toString(); + String replaceBy = "href=\"" + path + "/"; + map = p.matcher(map).replaceAll(replaceBy); + + PrintWriter pw = new PrintWriter(workDir + "/output.html"); + pw.println(new String("\n" + "\n")); + pw.println(map); + pw.println(new String("")); + pw.close(); + } + + String[] cmd8 = { "firefox", workDir + "/output.html" }; + TraceUtils.runCommand(cmd8); + } catch (Exception e) { + TraceUtils.displayMessage("Error: " + e.toString()); + e.printStackTrace(); + } + + }); + + return null; + } + +} diff --git a/eclipse-plugin/src/starpu/handlers/TaskGraphHandler.java b/eclipse-plugin/src/starpu/handlers/TaskGraphHandler.java new file mode 100644 index 0000000..4d0496c --- /dev/null +++ b/eclipse-plugin/src/starpu/handlers/TaskGraphHandler.java @@ -0,0 +1,68 @@ +// StarPU --- Runtime system for heterogeneous multicore architectures. +// +// Copyright (C) 2021-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +// +// StarPU is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation; either version 2.1 of the License, or (at +// your option) any later version. +// +// StarPU is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +// +// See the GNU Lesser General Public License in COPYING.LGPL for more details. +// +package starpu.handlers; + +import java.awt.EventQueue; +import java.awt.Image; +import java.io.File; + +import javax.imageio.ImageIO; +import javax.swing.ImageIcon; +import javax.swing.JFrame; +import javax.swing.JLabel; + +import org.eclipse.core.commands.AbstractHandler; +import org.eclipse.core.commands.ExecutionEvent; +import org.eclipse.core.commands.ExecutionException; + +public class TaskGraphHandler extends AbstractHandler { + + @Override + public Object execute(ExecutionEvent event) throws ExecutionException { + EventQueue.invokeLater(() -> { + try { + String workDir = System.getProperty("user.dir") + "/" + TraceUtils.getRandomDirectoryName(); + String inputfilename = workDir + "/dag.dot"; + File f = new File(inputfilename); + if (!f.isFile()) + throw new Exception("File <" + inputfilename + "> does not exist. Have you run StarPU FxT tool?"); + + String[] cmd2 = { "dot", "-Tpng", inputfilename, "-o", workDir + "/" + "output.png" }; + starpu.handlers.TraceUtils.runCommand(cmd2); + String[] cmd3 = { "starpu_tasks_rec_complete", workDir + "/" + "tasks.rec" }; + starpu.handlers.TraceUtils.runCommand(cmd3); + + JFrame frame = new JFrame(); + File imageFile = new File(workDir + "/" + "output.png"); + Image i = ImageIO.read(imageFile); + ImageIcon image = new ImageIcon(i); + JLabel imageLabel = new JLabel(image); + frame.add(imageLabel); + frame.pack(); + imageLabel.setVisible(true); + frame.setVisible(true); + frame.setTitle("StarPU application: Task Graph.png"); + } catch (Exception e) { + TraceUtils.displayMessage("Error: " + e.toString()); + e.printStackTrace(); + } + + }); + + return null; + } + +} diff --git a/eclipse-plugin/src/starpu/handlers/TraceGenHandler.java b/eclipse-plugin/src/starpu/handlers/TraceGenHandler.java new file mode 100644 index 0000000..2a36409 --- /dev/null +++ b/eclipse-plugin/src/starpu/handlers/TraceGenHandler.java @@ -0,0 +1,70 @@ +// StarPU --- Runtime system for heterogeneous multicore architectures. +// +// Copyright (C) 2021-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +// +// StarPU is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation; either version 2.1 of the License, or (at +// your option) any later version. +// +// StarPU is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +// +// See the GNU Lesser General Public License in COPYING.LGPL for more details. +// +package starpu.handlers; + +import java.awt.EventQueue; +import java.io.File; + +import org.eclipse.core.commands.AbstractHandler; +import org.eclipse.core.commands.ExecutionEvent; +import org.eclipse.core.commands.ExecutionException; +import org.eclipse.jface.dialogs.MessageDialog; +import org.eclipse.ui.IWorkbenchWindow; +import org.eclipse.ui.handlers.HandlerUtil; + +public class TraceGenHandler extends AbstractHandler { + @Override + public Object execute(ExecutionEvent event) throws ExecutionException { + IWorkbenchWindow window = HandlerUtil.getActiveWorkbenchWindowChecked(event); + MessageDialog.openInformation(window.getShell(), "StarPU FxT Tool", + "Running Starpu FxT Tool: generation of different trace formats"); + EventQueue.invokeLater(() -> { + try { + String value = System.getenv("STARPU_FXT_PREFIX"); + if (value != null) { + System.out.println("STARPU_FXT_PREFIX=" + value); + } else { + System.out.println("STARPU_FXT_PREFIX does not have a value"); + value = "/tmp"; + } + + String value1 = System.getenv("STARPU_FXT_SUFFIX"); + if (value1 != null) { + System.out.println("STARPU_FXT_SUFFIX=" + value1); + } else { + System.out.println("STARPU_FXT_SUFFIX does not have a value"); + String value2 = System.getenv("USER"); + value1 = "prof_file_" + value2 + "_0"; + } + + String inputfilename = value + "/" + value1; + File f = new File(inputfilename); + if (!f.isFile()) + throw new Exception("File <" + inputfilename + "> does not exist. Have you run your application?"); + + String[] command = {"starpu_fxt_tool", "-i", inputfilename, "-d", TraceUtils.getRandomDirectoryName(), "-c", "-no-acquire"}; + TraceUtils.runCommand(command); + } catch (Exception e) { + TraceUtils.displayMessage("Error: " + e.toString()); + e.printStackTrace(); + } + + }); + + return null; + } + +} diff --git a/eclipse-plugin/src/starpu/handlers/TraceUtils.java b/eclipse-plugin/src/starpu/handlers/TraceUtils.java new file mode 100644 index 0000000..14fd1a6 --- /dev/null +++ b/eclipse-plugin/src/starpu/handlers/TraceUtils.java @@ -0,0 +1,95 @@ +// StarPU --- Runtime system for heterogeneous multicore architectures. +// +// Copyright (C) 2021-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +// +// StarPU is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation; either version 2.1 of the License, or (at +// your option) any later version. +// +// StarPU is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +// +// See the GNU Lesser General Public License in COPYING.LGPL for more details. +// +package starpu.handlers; + +import java.awt.event.ActionEvent; +import java.awt.event.ActionListener; +import java.io.BufferedReader; +import java.io.FileReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.util.Arrays; +import java.util.Random; + +import javax.swing.BoxLayout; +import javax.swing.JButton; +import javax.swing.JFrame; +import javax.swing.JLabel; +import javax.swing.JPanel; + +public class TraceUtils { + + private static int x = 1000 + new Random().nextInt(9999); + + public static void runCommand(String[] command) throws Exception + { + System.out.println("Running command " + Arrays.toString(command)); + Process p = Runtime.getRuntime().exec(command); + + String line; + BufferedReader in = new BufferedReader(new InputStreamReader(p.getInputStream())); + while ((line = in.readLine()) != null) { + System.out.println(line); + } + in.close(); + } + + public static String getRandomDirectoryName() + { + return "traces_" + x; + } + + public static void displayMessage(String message) + { + final JFrame f = new JFrame("StarPU Message"); + + JLabel l = new JLabel(message); + JButton b19 = new JButton("OK"); + + b19.addActionListener(new ActionListener() + { + public void actionPerformed(ActionEvent evt) + { + f.setVisible(false); + } + }); + + JPanel p = new JPanel(); + p.setLayout(new BoxLayout(p, BoxLayout.Y_AXIS)); + p.add(l); + p.add(b19); + + f.add(p); + f.pack(); + f.setVisible(true); + } + + public static String readFileToString(String filename) throws IOException + { + BufferedReader reader = new BufferedReader(new FileReader(filename)); + StringBuilder stringBuilder = new StringBuilder(); + char[] buffer = new char[10]; + while (reader.read(buffer) != -1) { + stringBuilder.append(new String(buffer)); + buffer = new char[10]; + } + reader.close(); + + return stringBuilder.toString(); + } + + +} diff --git a/eclipse-plugin/src/starpu/handlers/TraceVizHandler.java b/eclipse-plugin/src/starpu/handlers/TraceVizHandler.java new file mode 100644 index 0000000..38445fa --- /dev/null +++ b/eclipse-plugin/src/starpu/handlers/TraceVizHandler.java @@ -0,0 +1,49 @@ +// StarPU --- Runtime system for heterogeneous multicore architectures. +// +// Copyright (C) 2021-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +// +// StarPU is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation; either version 2.1 of the License, or (at +// your option) any later version. +// +// StarPU is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +// +// See the GNU Lesser General Public License in COPYING.LGPL for more details. +// +package starpu.handlers; + +import java.awt.EventQueue; +import java.io.File; + +import org.eclipse.core.commands.AbstractHandler; +import org.eclipse.core.commands.ExecutionEvent; +import org.eclipse.core.commands.ExecutionException; + +public class TraceVizHandler extends AbstractHandler { + + @Override + public Object execute(ExecutionEvent event) throws ExecutionException { + EventQueue.invokeLater(() -> { + try { + String workDir = System.getProperty("user.dir") + "/" + TraceUtils.getRandomDirectoryName(); + String inputfilename = workDir + "/paje.trace"; + + File f = new File(inputfilename); + if (!f.isFile()) + throw new Exception("File <" + inputfilename + "> does not exist. Have you run StarPU FxT tool?"); + + String[] cmd1 = { "vite", inputfilename }; + starpu.handlers.TraceUtils.runCommand(cmd1); + } catch (Exception e) { + TraceUtils.displayMessage("Error: " + e.toString()); + e.printStackTrace(); + } + }); + + return null; + } + +} diff --git a/eclipse-plugin/tools/cproject.sh b/eclipse-plugin/tools/cproject.sh new file mode 100755 index 0000000..9d3c039 --- /dev/null +++ b/eclipse-plugin/tools/cproject.sh @@ -0,0 +1,36 @@ +#!/bin/bash +# +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +type=$1 +shift + +if test "$type" == "module" +then + input='@STARPU_LIB@="true"' +elif test "$type" == "option" +then + input='' +else + echo Unknown type $type + exit 1 +fi + +for x in $* +do + echo $input | sed -e 's/@STARPU_LIB@/'$x'/' +done | tr '\012' ' ' diff --git a/eclipse-plugin/tools/install_workspace.sh b/eclipse-plugin/tools/install_workspace.sh new file mode 100755 index 0000000..498dcbe --- /dev/null +++ b/eclipse-plugin/tools/install_workspace.sh @@ -0,0 +1,30 @@ +#!/bin/bash +# +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +src=$1 +dst=$2 +shift +shift + +if test ! -d $dst/$(dirname $src) +then + echo mkdir -p $dst/$(dirname $src) + mkdir -p $dst/$(dirname $src) +fi +echo $* $src $dst/$(dirname $src) +$* $src $dst/$(dirname $src) diff --git a/examples/Makefile.am b/examples/Makefile.am new file mode 100644 index 0000000..a0fc373 --- /dev/null +++ b/examples/Makefile.am @@ -0,0 +1,1579 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# Copyright (C) 2016-2016 Uppsala University +# Copyright (C) 2011-2011 Télécom Sud Paris +# Copyright (C) 2017-2017 Erwan Leria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +include $(top_srcdir)/make/starpu-tests.mk +include $(top_srcdir)/make/starpu-loader.mk +SUFFIXES = .hip +AM_CFLAGS += $(MAGMA_CFLAGS) $(APP_CFLAGS) +AM_CXXFLAGS += $(MAGMA_CFLAGS) $(APP_CXXFLAGS) +AM_FFLAGS += $(MAGMA_CFLAGS) $(APP_FFLAGS) +AM_FCFLAGS += $(MAGMA_CFLAGS) $(APP_FCFLAGS) +AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_srcdir)/examples/ -I$(top_builddir)/include $(STARPU_H_CPPFLAGS) +AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@ +LIBS += $(top_builddir)/src/@LIBSTARPU_LINK@ $(STARPU_EXPORTED_LIBS) +LIBS += $(STARPU_OPENCL_LDFLAGS) $(STARPU_CUDA_LDFLAGS) $(STARPU_HIP_LDFLAGS) +LIBS += $(MAGMA_LIBS) + +SUBDIRS = stencil + +BUILT_SOURCES = + +if STARPU_USE_OPENCL +nobase_STARPU_OPENCL_DATA_DATA = +endif + +EXTRA_DIST = \ + README.txt \ + axpy/axpy.h \ + axpy/axpy_opencl_kernel.cl \ + basic_examples/vector_scal_opencl_kernel.cl \ + basic_examples/multiformat_types.h \ + basic_examples/multiformat_opencl_kernel.cl \ + basic_examples/multiformat_conversion_codelets_opencl_kernel.cl \ + common/blas_model.c \ + spmd/vector_scal_spmd.c \ + spmv/spmv_cuda.cu \ + spmv/spmv_opencl.cl \ + spmv/matrix_market/examples/fidapm05.mtx \ + mult/xgemm.c \ + mult/xgemm_layout.c \ + mult/xgemm.h \ + mult/sgemm.sh \ + lu/xlu.c \ + lu/xlu_pivot.c \ + lu/xlu_implicit.c \ + lu/xlu_implicit_pivot.c \ + lu/xlu_kernels.c \ + lu/lu_example.c \ + incrementer/incrementer_kernels_opencl_kernel.cl \ + basic_examples/variable_kernels_opencl_kernel.cl \ + matvecmult/matvecmult_kernel.cl \ + basic_examples/block_opencl_kernel.cl \ + filters/fblock_opencl_kernel.cl \ + filters/custom_mf/conversion_opencl.cl \ + filters/custom_mf/custom_opencl.cl \ + filters/custom_mf/custom_types.h \ + interface/complex_kernels.cl \ + interface/complex_dev_handle/complex_dev_handle_kernels.cl \ + reductions/dot_product.h \ + reductions/dot_product_opencl_kernels.cl \ + scheduler/libdummy_sched.sh \ + scheduler/schedulers.sh \ + scheduler/schedulers_context.sh \ + fortran/Makefile \ + sched_ctx/axpy_partition_gpu.h \ + sched_ctx/axpy_partition_gpu.cu \ + heat/heat.sh \ + cholesky/libmy_dmda.h \ + cholesky/cholesky.sh \ + cholesky/cholesky_julia.sh \ + cholesky/cholesky_compiled.c \ + lu/lu.sh \ + subgraphs/main.h \ + native_fortran/Makefile_nf_dynbuf.mk \ + native_fortran/Makefile_nf_example.mk \ + native_fortran/Makefile_nf_matrix.mk \ + native_fortran/Makefile_nf_partition.mk \ + native_fortran/Makefile_nf_sched_ctx.mk \ + native_fortran/Makefile_nf_varbuf.mk \ + native_fortran/Makefile_nf_vector.mk \ + cpp/Makefile_add_vectors_cpp11.mk \ + cpp/Makefile_add_vectors.mk \ + fortran90/Makefile.mk \ + profiling_tool/prof.sh + + +CLEANFILES = *.gcno *.gcda *.linkinfo *.mod starpu_idle_microsec.log *.mps */*.mps */*/*.mps *.dot */*.dot */*/*.dot *.pl */*.pl */*/*.pl *.png *.output tasks.rec perfs.rec */perfs.rec */*/perfs.rec perfs2.rec fortran90/starpu_mod.f90 native_fortran/fstarpu_mod.f90 *.csv *.md *.Rmd *.pdf *.html + +clean-local: + -rm -rf mult/sgemm.traces lu/lu.traces + +pkglib_LTLIBRARIES = + +if STARPU_HAVE_ICC +.icc.o: + $(V_icc) $(ICC) $(ICC_ARGS) -x c $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) $< -c -o $@ +endif + +examplebindir = $(libdir)/starpu/examples/ + +examplebin_PROGRAMS = + +noinst_HEADERS = \ + axpy/axpy.h \ + cg/cg.h \ + cg/cg_kernels.c \ + heat/lu_kernels_model.h \ + heat/dw_sparse_cg.h \ + heat/heat.h \ + heat/dw_factolu.h \ + lu/xlu.h \ + lu/xlu_kernels.h \ + lu/lu-float.h \ + lu/lu-double.h \ + lu/complex_float.h \ + lu/complex_double.h \ + lu/blas_complex.h \ + cholesky/cholesky.h \ + sched_ctx_utils/sched_ctx_utils.h \ + common/blas_model.h \ + common/blas.h \ + mult/simple.h \ + mult/double.h \ + fortran/starpu_fortran.h \ + ppm_downscaler/ppm_downscaler.h \ + ppm_downscaler/yuv_downscaler.h \ + spmv/matrix_market/mmio.h \ + spmv/matrix_market/mm_to_bcsr.h \ + spmv/spmv.h \ + spmv/dw_block_spmv.h \ + basic_examples/multiformat_types.h \ + filters/custom_mf/custom_interface.h \ + filters/custom_mf/custom_types.h \ + interface/complex_interface.h \ + interface/complex_codelet.h \ + interface/complex_dev_handle/complex_dev_handle_interface.h \ + interface/complex_dev_handle/complex_dev_handle_codelet.h \ + pi/pi.h \ + pi/SobolQRNG/sobol.h \ + pi/SobolQRNG/sobol_gold.h \ + pi/SobolQRNG/sobol_gpu.h \ + pi/SobolQRNG/sobol_primitives.h \ + reductions/dot_product.h \ + basic_examples/vector_scal_cpu_template.h \ + sched_ctx/axpy_partition_gpu.h + +##################################### +# What to install and what to check # +##################################### + +examplebin_PROGRAMS += $(STARPU_EXAMPLES) + +TESTS = $(SHELL_TESTS) $(STARPU_EXAMPLES) + +SHELL_TESTS = + +SHELL_TESTS += scheduler/schedulers.sh +SHELL_TESTS += scheduler/schedulers_context.sh +if !STARPU_NO_BLAS_LIB +if STARPU_USE_FXT +SHELL_TESTS += mult/sgemm.sh +endif +endif + +check_PROGRAMS = $(STARPU_EXAMPLES) + +# STARPU_EXAMPLES list all applications which have to be compiled and checked +# Applications which should only be compiled are added directly in examplebin_PROGRAMS +# see for instance mandelbrot/mandelbrot + +STARPU_EXAMPLES = + +STARPU_EXAMPLES += \ + sched_ctx/prio \ + scheduler/dummy_sched \ + scheduler/dummy_modular_sched \ + worker_collections/worker_list_example \ + api/bcsr_data_interface \ + api/block_data_interface \ + api/coo_data_interface \ + api/csr_data_interface \ + api/matrix_data_interface \ + api/multiformat_data_interface \ + api/tensor_data_interface \ + api/variable_data_interface \ + api/vector_data_interface \ + api/void_data_interface + +if !STARPU_SIMGRID +STARPU_EXAMPLES += \ + basic_examples/hello_world \ + basic_examples/hooks \ + basic_examples/topology \ + basic_examples/vector_scal \ + basic_examples/mult \ + basic_examples/block \ + basic_examples/variable \ + basic_examples/multiformat \ + basic_examples/dynamic_handles \ + basic_examples/task_insert_color \ + basic_examples/ndim \ + mlr/mlr \ + cpp/incrementer_cpp \ + cpp/add_vectors \ + cpp/add_vectors_interface \ + filters/alloc \ + filters/fread \ + filters/fvector \ + filters/fvector_pick_variable \ + filters/ftensor \ + filters/ftensor_pick_block \ + filters/ftensor_pick_variable \ + filters/fblock \ + filters/fblock_pick_matrix \ + filters/fblock_pick_variable \ + filters/fmatrix \ + filters/fmatrix_pick_vector \ + filters/fmatrix_pick_variable \ + filters/fndim \ + filters/fndim_pick_ndim \ + filters/fndim_5d_pick_tensor \ + filters/fndim_4d_pick_block \ + filters/fndim_3d_pick_matrix \ + filters/fndim_2d_pick_vector \ + filters/fndim_1d_pick_variable \ + filters/fndim_pick_variable \ + filters/fndim_to_tensor \ + filters/fndim_to_block \ + filters/fndim_to_matrix \ + filters/fndim_to_vector \ + filters/fndim_to_variable \ + filters/fmultiple_manual \ + filters/fmultiple_submit \ + filters/fmultiple_submit_readonly \ + filters/fmultiple_submit_readonly_downgrade \ + filters/fmultiple_submit_implicit \ + filters/frecursive \ + filters/shadow \ + filters/shadow2d \ + filters/shadow3d \ + filters/shadow4d \ + filters/shadownd \ + tag_example/tag_example \ + tag_example/tag_example2 \ + tag_example/tag_example3 \ + tag_example/tag_example4 \ + tag_example/tag_restartable \ + transactions/trs_inc \ + spmd/vector_scal_spmd \ + spmv/spmv \ + callback/callback \ + callback/prologue \ + incrementer/incrementer \ + binary/binary \ + interface/complex \ + interface/complex_dev_handle/complex_dev_handle \ + matvecmult/matvecmult \ + profiling/profiling \ + perf_monitoring/perf_counters_01 \ + perf_monitoring/perf_counters_02 \ + perf_steering/perf_knobs_01 \ + perf_steering/perf_knobs_02 \ + perf_steering/perf_knobs_03 \ + scheduler/heteroprio_test \ + sched_ctx/sched_ctx \ + sched_ctx/sched_ctx_empty \ + sched_ctx/sched_ctx_remove \ + sched_ctx/sched_ctx_delete \ + sched_ctx/two_cpu_contexts \ + sched_ctx/dummy_sched_with_ctx \ + worker_collections/worker_tree_example \ + reductions/dot_product \ + reductions/minmax_reduction \ + dependency/task_end_dep \ + dependency/task_end_dep_add \ + dependency/sequential_consistency \ + subgraphs/manual \ + subgraphs/partition \ + subgraphs/plan +endif + +if !STARPU_SIMGRID +SHELL_TESTS += \ + profiling_tool/prof.sh + +pkglib_LTLIBRARIES += \ + profiling_tool/libprofiling_tool.la +profiling_tool_libprofiling_tool_la_LDFLAGS = $(ldflags) -no-undefined -module -avoid-version +endif + +if !STARPU_SIMGRID +STARPU_EXAMPLES += \ + scheduler/dummy_sched + +SHELL_TESTS += \ + scheduler/libdummy_sched.sh +pkglib_LTLIBRARIES += \ + scheduler/libdummy_sched.la +scheduler_libdummy_sched_la_LDFLAGS = $(ldflags) -no-undefined -module -avoid-version + +if STARPU_HAVE_CXX11 +STARPU_EXAMPLES += \ + cpp/add_vectors_cpp11 +endif + +if STARPU_HAVE_F77 +if STARPU_HAVE_F77_H +STARPU_EXAMPLES += \ + fortran/hello +endif + +STARPU_EXAMPLES += \ + basic_examples/vector_scal_fortran +endif + +if STARPU_HAVE_FC +if !STARPU_SANITIZE +STARPU_EXAMPLES += \ + fortran90/f90_example \ + native_fortran/nf_vector \ + native_fortran/nf_matrix \ + native_fortran/nf_example \ + native_fortran/nf_dynbuf \ + native_fortran/nf_varbuf \ + native_fortran/nf_sched_ctx \ + native_fortran/nf_partition +endif +endif +endif + +if STARPU_USE_CUDA +STARPU_EXAMPLES += \ + mult/sgemm \ + mult/dgemm + +examplebin_PROGRAMS += \ + mult/sgemm_layout \ + mult/dgemm_layout +endif + +if STARPU_USE_HIPBLAS +STARPU_EXAMPLES += \ + mult/sgemm \ + mult/dgemm +endif + +if !STARPU_NO_BLAS_LIB +STARPU_EXAMPLES += \ + mult/sgemm \ + mult/dgemm \ + lu/lu_example_float \ + lu/lu_example_double \ + lu/lu_implicit_example_float \ + lu/lu_implicit_example_double \ + cholesky/cholesky_tag \ + cholesky/cholesky_tile_tag \ + cholesky/cholesky_implicit \ + cholesky/cholesky_compil + +examplebin_PROGRAMS += \ + mult/sgemm_layout \ + mult/dgemm_layout + +if !STARPU_SIMGRID +STARPU_EXAMPLES += \ + axpy/axpy \ + cholesky/cholesky_grain_tag \ + heat/heat \ + cg/cg \ + pipeline/pipeline \ + transactions/trs_sgemm + + +SHELL_TESTS += \ + heat/heat.sh \ + lu/lu.sh +endif + +if STARPU_SIMGRID +if !STARPU_QUICK_CHECK +SHELL_TESTS += \ + cholesky/cholesky.sh +endif +endif +endif + +if !STARPU_SIMGRID + +if STARPU_MKL_BLAS_LIB +STARPU_EXAMPLES += \ + lu/lu_example_complex_float \ + lu/lu_example_complex_double \ + lu/lu_implicit_example_complex_float \ + lu/lu_implicit_example_complex_double +endif + +if STARPU_HAVE_CBLAS_H +if STARPU_HAVE_CBLAS_SGEMV +STARPU_EXAMPLES += \ + spmv/dw_block_spmv +endif +endif + +if !STARPU_SIMGRID +if STARPU_HAVE_F77 +if STARPU_HAVE_F77_H +STARPU_EXAMPLES += \ + fortran/hello +endif +STARPU_EXAMPLES += \ + basic_examples/vector_scal_fortran +endif +endif + +if STARPU_HAVE_OPENMP +STARPU_EXAMPLES += \ + openmp/vector_scal_omp \ + sched_ctx/sched_ctx_without_sched_policy\ + sched_ctx/nested_sched_ctxs \ + sched_ctx/sched_ctx_without_sched_policy_awake\ + sched_ctx/parallel_tasks_reuse_handle \ + sched_ctx/parallel_code + +if STARPU_HAVE_HWLOC +if STARPU_HWLOC_HAVE_TOPOLOGY_DUP +STARPU_EXAMPLES += \ + parallel_workers/parallel_workers \ + parallel_workers/parallel_workers_func \ + parallel_workers/parallel_workers_oldapi + +parallel_workers_parallel_workers_CFLAGS = \ + $(AM_CFLAGS) -fopenmp + +parallel_workers_parallel_workers_func_CFLAGS = \ + $(AM_CFLAGS) -fopenmp + +parallel_workers_parallel_workers_oldapi_CFLAGS = \ + $(AM_CFLAGS) -fopenmp +endif +endif +endif + +endif !STARPU_SIMGRID + +if STARPU_USE_CUDA +STARPU_EXAMPLES += \ + sched_ctx/gpu_partition +sched_ctx_gpu_partition_SOURCES = \ + sched_ctx/gpu_partition.c \ + sched_ctx/axpy_partition_gpu.cu +endif + +################## +# Basic examples # +################## + +basic_examples_vector_scal_SOURCES = \ + basic_examples/vector_scal.c \ + basic_examples/vector_scal_cpu.c + +basic_examples_mult_SOURCES = \ + basic_examples/mult.c + +if STARPU_HAVE_ICC +if STARPU_CROSS_COMPILING +basic_examples_vector_scal_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) $(basic_examples_vector_scal_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ +else +basic_examples_vector_scal_SOURCES += \ + basic_examples/vector_scal_cpu_icc.icc +basic_examples_vector_scal_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(ICC) $(basic_examples_vector_scal_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ +endif +else +basic_examples_vector_scal_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) $(basic_examples_vector_scal_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ +endif + +if STARPU_USE_CUDA +basic_examples_vector_scal_SOURCES += \ + basic_examples/vector_scal_cuda.cu +basic_examples_mult_SOURCES += \ + basic_examples/mult_cuda.cu +endif + +if STARPU_USE_HIP +basic_examples_vector_scal_SOURCES += \ + basic_examples/vector_scal_hip.hip +basic_examples_mult_SOURCES += \ + basic_examples/mult_hip.hip +endif + +if STARPU_USE_OPENCL +basic_examples_vector_scal_SOURCES += \ + basic_examples/vector_scal_opencl.c +nobase_STARPU_OPENCL_DATA_DATA += \ + basic_examples/vector_scal_opencl_kernel.cl +endif + +if STARPU_HAVE_F77 +basic_examples_vector_scal_fortran_SOURCES = \ + basic_examples/vector_scal_fortran.F \ + basic_examples/vector_scal_c.c \ + basic_examples/vector_scal_cpu.c + +if STARPU_USE_CUDA +basic_examples_vector_scal_fortran_SOURCES += \ + basic_examples/vector_scal_cuda.cu +basic_examples_vector_scal_fortran_LDADD = \ + $(STARPU_CUDA_FORTRAN_LDFLAGS) +endif + +if STARPU_HAVE_F77_H +fortran_hello_SOURCES = \ + fortran/hello_c.c \ + fortran/hello.F \ + fortran/starpu_fortran.h +endif +endif + +if STARPU_HAVE_FC +fortran90_f90_example_SOURCES = \ + fortran90/mod_types.f90 \ + fortran90/starpu_mod.f90 \ + fortran90/mod_interface.f90 \ + fortran90/mod_compute.f90 \ + fortran90/marshalling.c \ + fortran90/f90_example.f90 + +native_fortran_nf_vector_SOURCES = \ + native_fortran/nf_codelets.f90 \ + native_fortran/fstarpu_mod.f90 \ + native_fortran/nf_vector.f90 + +native_fortran_nf_matrix_SOURCES = \ + native_fortran/nf_codelets.f90 \ + native_fortran/fstarpu_mod.f90 \ + native_fortran/nf_matrix.f90 + +native_fortran_nf_example_SOURCES = \ + native_fortran/nf_types.f90 \ + native_fortran/nf_compute.f90 \ + native_fortran/fstarpu_mod.f90 \ + native_fortran/nf_example.f90 + +native_fortran_nf_dynbuf_SOURCES = \ + native_fortran/nf_dynbuf_cl.f90 \ + native_fortran/fstarpu_mod.f90 \ + native_fortran/nf_dynbuf.f90 + +native_fortran_nf_varbuf_SOURCES = \ + native_fortran/nf_varbuf_cl.f90 \ + native_fortran/fstarpu_mod.f90 \ + native_fortran/nf_varbuf.f90 + +native_fortran_nf_sched_ctx_SOURCES = \ + native_fortran/nf_sched_ctx_cl.f90 \ + native_fortran/fstarpu_mod.f90 \ + native_fortran/nf_sched_ctx.f90 + +native_fortran_nf_partition_SOURCES = \ + native_fortran/nf_partition_cl.f90 \ + native_fortran/fstarpu_mod.f90 \ + native_fortran/nf_partition.f90 +endif + +####################### +# Multiformat example # +####################### +basic_examples_multiformat_SOURCES = \ + basic_examples/multiformat.c \ + basic_examples/multiformat_conversion_codelets.c + +if STARPU_USE_CUDA +basic_examples_multiformat_SOURCES += \ + basic_examples/multiformat_cuda.cu \ + basic_examples/multiformat_conversion_codelets_cuda.cu +endif + +if STARPU_USE_OPENCL +basic_examples_multiformat_SOURCES += \ + basic_examples/multiformat_opencl.c \ + basic_examples/multiformat_conversion_codelets_opencl.c + +nobase_STARPU_OPENCL_DATA_DATA += \ + basic_examples/multiformat_opencl_kernel.cl \ + basic_examples/multiformat_conversion_codelets_opencl_kernel.cl +endif + +################# +# block example # +################# + +basic_examples_block_SOURCES = \ + basic_examples/block.c \ + basic_examples/block_cpu.c + +if STARPU_USE_CUDA +basic_examples_block_SOURCES += \ + basic_examples/block_cuda.cu +endif +if STARPU_USE_HIP +basic_examples_block_SOURCES += \ + basic_examples/block_hip.hip +endif + +if STARPU_USE_OPENCL +basic_examples_block_SOURCES += \ + basic_examples/block_opencl.c +nobase_STARPU_OPENCL_DATA_DATA += \ + basic_examples/block_opencl_kernel.cl +endif + +#################### +# Variable example # +#################### + +basic_examples_variable_SOURCES = \ + basic_examples/variable.c \ + basic_examples/variable_kernels_cpu.c + +if STARPU_USE_CUDA +basic_examples_variable_SOURCES += \ + basic_examples/variable_kernels.cu +endif +if STARPU_USE_OPENCL +basic_examples_variable_SOURCES += \ + basic_examples/variable_kernels_opencl.c +nobase_STARPU_OPENCL_DATA_DATA += \ + basic_examples/variable_kernels_opencl_kernel.cl +endif + +########### +# Filters # +########### +filters_fvector_SOURCES = \ + filters/fvector.c \ + filters/fvector_cpu.c +if STARPU_USE_CUDA +filters_fvector_SOURCES += \ + filters/fvector_cuda.cu +endif +if STARPU_USE_HIP +filters_fvector_SOURCES += \ + filters/fvector_hip.hip +endif + +filters_fmatrix_SOURCES = \ + filters/fmatrix.c \ + filters/fmatrix_print.c \ + filters/fmatrix_cpu.c +if STARPU_USE_CUDA +filters_fmatrix_SOURCES += \ + filters/fmatrix_cuda.cu +endif +if STARPU_USE_HIP +filters_fmatrix_SOURCES += \ + filters/fmatrix_hip.hip +endif + +filters_fblock_SOURCES = \ + filters/fblock.c \ + filters/fblock_print.c \ + filters/fblock_cpu.c + +if STARPU_USE_CUDA +filters_fblock_SOURCES += \ + filters/fblock_cuda.cu +endif +if STARPU_USE_HIP +filters_fblock_SOURCES += \ + filters/fblock_hip.hip +endif +if STARPU_USE_OPENCL +filters_fblock_SOURCES += \ + filters/fblock_opencl.c +nobase_STARPU_OPENCL_DATA_DATA += \ + filters/fblock_opencl_kernel.cl +endif + +filters_ftensor_SOURCES = \ + filters/ftensor.c \ + filters/ftensor_print.c \ + filters/ftensor_cpu.c +if STARPU_USE_CUDA +filters_ftensor_SOURCES += \ + filters/ftensor_cuda.cu +endif +if STARPU_USE_HIP +filters_ftensor_SOURCES += \ + filters/ftensor_hip.hip +endif + +filters_fndim_SOURCES = \ + filters/fndim.c \ + filters/ftensor_print.c \ + filters/f4d_cpu.c +if STARPU_USE_CUDA +filters_fndim_SOURCES += \ + filters/f4d_cuda.cu +endif +if STARPU_USE_HIP +filters_fndim_SOURCES += \ + filters/f4d_hip.hip +endif + +filters_fmatrix_pick_vector_SOURCES = \ + filters/fmatrix_pick_vector.c \ + filters/fmatrix_print.c \ + filters/fvector_cpu.c +if STARPU_USE_CUDA +filters_fmatrix_pick_vector_SOURCES += \ + filters/fvector_cuda.cu +endif +if STARPU_USE_HIP +filters_fmatrix_pick_vector_SOURCES += \ + filters/fvector_hip.hip +endif + +filters_fmatrix_pick_variable_SOURCES = \ + filters/fmatrix_pick_variable.c \ + filters/fmatrix_print.c +if STARPU_USE_CUDA +filters_fmatrix_pick_variable_SOURCES += \ + filters/fvariable_cuda.cu +endif + +filters_fblock_pick_matrix_SOURCES = \ + filters/fblock_pick_matrix.c \ + filters/fblock_print.c \ + filters/fmatrix_print.c \ + filters/fmatrix_cpu.c +if STARPU_USE_CUDA +filters_fblock_pick_matrix_SOURCES += \ + filters/fmatrix_cuda.cu +endif +if STARPU_USE_HIP +filters_fblock_pick_matrix_SOURCES += \ + filters/fmatrix_hip.hip +endif + +filters_fblock_pick_variable_SOURCES = \ + filters/fblock_pick_variable.c \ + filters/fblock_print.c +if STARPU_USE_CUDA +filters_fblock_pick_variable_SOURCES += \ + filters/fvariable_cuda.cu +endif + +filters_ftensor_pick_block_SOURCES = \ + filters/ftensor_pick_block.c \ + filters/ftensor_print.c \ + filters/fblock_print.c \ + filters/fblock_cpu.c +if STARPU_USE_CUDA +filters_ftensor_pick_block_SOURCES += \ + filters/fblock_cuda.cu +endif +if STARPU_USE_HIP +filters_ftensor_pick_block_SOURCES += \ + filters/fblock_hip.hip +endif + +filters_ftensor_pick_variable_SOURCES = \ + filters/ftensor_pick_variable.c \ + filters/ftensor_print.c +if STARPU_USE_CUDA +filters_ftensor_pick_variable_SOURCES += \ + filters/fvariable_cuda.cu +endif + +filters_fndim_pick_ndim_SOURCES = \ + filters/fndim_pick_ndim.c \ + filters/ftensor_print.c \ + filters/fblock_print.c \ + filters/f3d_cpu.c +if STARPU_USE_CUDA +filters_fndim_pick_ndim_SOURCES += \ + filters/f3d_cuda.cu +endif +if STARPU_USE_HIP +filters_fndim_pick_ndim_SOURCES += \ + filters/f3d_hip.hip +endif + +filters_fndim_5d_pick_tensor_SOURCES = \ + filters/fndim_5d_pick_tensor.c \ + filters/f5d_print.c \ + filters/ftensor_print.c \ + filters/ftensor_cpu.c +if STARPU_USE_CUDA +filters_fndim_5d_pick_tensor_SOURCES += \ + filters/ftensor_cuda.cu +endif +if STARPU_USE_HIP +filters_fndim_5d_pick_tensor_SOURCES += \ + filters/ftensor_hip.hip +endif + +filters_fndim_4d_pick_block_SOURCES = \ + filters/fndim_4d_pick_block.c \ + filters/ftensor_print.c \ + filters/fblock_print.c \ + filters/fblock_cpu.c +if STARPU_USE_CUDA +filters_fndim_4d_pick_block_SOURCES += \ + filters/fblock_cuda.cu +endif +if STARPU_USE_HIP +filters_fndim_4d_pick_block_SOURCES += \ + filters/fblock_hip.hip +endif + +filters_fndim_3d_pick_matrix_SOURCES = \ + filters/fndim_3d_pick_matrix.c \ + filters/fblock_print.c \ + filters/fmatrix_print.c \ + filters/fmatrix_cpu.c +if STARPU_USE_CUDA +filters_fndim_3d_pick_matrix_SOURCES += \ + filters/fmatrix_cuda.cu +endif +if STARPU_USE_HIP +filters_fndim_3d_pick_matrix_SOURCES += \ + filters/fmatrix_hip.hip +endif + +filters_fndim_2d_pick_vector_SOURCES = \ + filters/fndim_2d_pick_vector.c \ + filters/fmatrix_print.c \ + filters/fvector_cpu.c +if STARPU_USE_CUDA +filters_fndim_2d_pick_vector_SOURCES += \ + filters/fvector_cuda.cu +endif +if STARPU_USE_HIP +filters_fndim_2d_pick_vector_SOURCES += \ + filters/fvector_hip.hip +endif + +filters_fndim_pick_variable_SOURCES = \ + filters/fndim_pick_variable.c \ + filters/f5d_print.c + +filters_fndim_to_tensor_SOURCES = \ + filters/fndim_to_tensor.c \ + filters/ftensor_print.c \ + filters/ftensor_cpu.c +if STARPU_USE_CUDA +filters_fndim_to_tensor_SOURCES += \ + filters/ftensor_cuda.cu +endif +if STARPU_USE_HIP +filters_fndim_to_tensor_SOURCES += \ + filters/ftensor_hip.hip +endif + +filters_fndim_to_block_SOURCES = \ + filters/fndim_to_block.c \ + filters/fblock_print.c \ + filters/fblock_cpu.c +if STARPU_USE_CUDA +filters_fndim_to_block_SOURCES += \ + filters/fblock_cuda.cu +endif +if STARPU_USE_HIP +filters_fndim_to_block_SOURCES += \ + filters/fblock_hip.hip +endif + +filters_fndim_to_matrix_SOURCES = \ + filters/fndim_to_matrix.c \ + filters/fmatrix_print.c \ + filters/fmatrix_cpu.c +if STARPU_USE_CUDA +filters_fndim_to_matrix_SOURCES += \ + filters/fmatrix_cuda.cu +endif +if STARPU_USE_HIP +filters_fndim_to_matrix_SOURCES += \ + filters/fmatrix_hip.hip +endif + +filters_fndim_to_vector_SOURCES = \ + filters/fndim_to_vector.c \ + filters/fvector_cpu.c +if STARPU_USE_CUDA +filters_fndim_to_vector_SOURCES += \ + filters/fvector_cuda.cu +endif +if STARPU_USE_HIP +filters_fndim_to_vector_SOURCES += \ + filters/fvector_hip.hip +endif + +filters_fmultiple_manual_SOURCES = \ + filters/fmultiple_manual.c + +if STARPU_USE_CUDA +filters_fmultiple_manual_SOURCES += \ + filters/fmultiple_cuda.cu +endif +if STARPU_USE_HIP +filters_fmultiple_manual_SOURCES += \ + filters/fmultiple_hip.hip +endif + +filters_fmultiple_submit_SOURCES = \ + filters/fmultiple_submit.c + +if STARPU_USE_CUDA +filters_fmultiple_submit_SOURCES += \ + filters/fmultiple_cuda.cu +endif +if STARPU_USE_HIP +filters_fmultiple_submit_SOURCES += \ + filters/fmultiple_hip.hip +endif + +filters_fmultiple_submit_readonly_SOURCES = \ + filters/fmultiple_submit_readonly.c + +filters_fmultiple_submit_readonly_downgrade_SOURCES = \ + filters/fmultiple_submit_readonly_downgrade.c + +if STARPU_USE_CUDA +filters_fmultiple_submit_readonly_SOURCES += \ + filters/fmultiple_cuda.cu +filters_fmultiple_submit_readonly_downgrade_SOURCES += \ + filters/fmultiple_cuda.cu +endif +if STARPU_USE_HIP +filters_fmultiple_submit_readonly_SOURCES += \ + filters/fmultiple_hip.hip +filters_fmultiple_submit_readonly_downgrade_SOURCES += \ + filters/fmultiple_hip.hip +endif + +filters_fmultiple_submit_implicit_SOURCES = \ + filters/fmultiple_submit_implicit.c + +if STARPU_USE_CUDA +filters_fmultiple_submit_implicit_SOURCES += \ + filters/fmultiple_cuda.cu +endif +if STARPU_USE_HIP +filters_fmultiple_submit_implicit_SOURCES += \ + filters/fmultiple_hip.hip +endif + +############################# +# Custom multiformat filter # +############################# + +#TODO: see why the application is failing +#lt-custom_mf_filter: .../src/datawizard/malloc.c:784: starpu_free_on_node: Assertion `chunk != _starpu_chunk_list_end(chunks[dst_node])' failed. + +examplebin_PROGRAMS += \ + filters/custom_mf/custom_mf_filter + +filters_custom_mf_custom_mf_filter_SOURCES=\ + filters/custom_mf/custom_mf_filter.c \ + filters/custom_mf/custom_interface.c \ + filters/custom_mf/custom_conversion_codelets.c + +if STARPU_USE_CUDA +filters_custom_mf_custom_mf_filter_SOURCES += \ + filters/custom_mf/conversion.cu \ + filters/custom_mf/cuda.cu +endif + +if STARPU_USE_OPENCL +filters_custom_mf_custom_mf_filter_SOURCES += \ + filters/custom_mf/conversion_opencl.c \ + filters/custom_mf/custom_opencl.c +nobase_STARPU_OPENCL_DATA_DATA += \ + filters/custom_mf/conversion_opencl.cl \ + filters/custom_mf/custom_opencl.cl +endif + +################ +# AXPY example # +################ + +if !STARPU_NO_BLAS_LIB +axpy_axpy_SOURCES = \ + axpy/axpy.c \ + common/blas.c +if STARPU_USE_OPENCL +axpy_axpy_SOURCES += \ + axpy/axpy_opencl.c +nobase_STARPU_OPENCL_DATA_DATA += \ + axpy/axpy_opencl_kernel.cl +endif + +axpy_axpy_LDADD = \ + $(STARPU_BLAS_LDFLAGS) +endif + +################ +# Mult example # +################ + +mult_sgemm_SOURCES = \ + mult/sgemm.c + +mult_sgemm_LDADD = \ + $(STARPU_BLAS_LDFLAGS) + +mult_sgemm_layout_SOURCES = \ + mult/sgemm_layout.c + +mult_sgemm_layout_LDADD = \ + $(STARPU_BLAS_LDFLAGS) + +mult_dgemm_SOURCES = \ + mult/dgemm.c + +mult_dgemm_LDADD = \ + $(STARPU_BLAS_LDFLAGS) + +mult_dgemm_layout_SOURCES = \ + mult/dgemm_layout.c + +mult_dgemm_layout_LDADD = \ + $(STARPU_BLAS_LDFLAGS) + +if !STARPU_NO_BLAS_LIB +mult_sgemm_SOURCES += \ + common/blas.c +mult_dgemm_SOURCES += \ + common/blas.c +mult_sgemm_layout_SOURCES += \ + common/blas.c +mult_dgemm_layout_SOURCES += \ + common/blas.c +endif + +##################### +# Trs_sgemm example # +##################### + +if !STARPU_NO_BLAS_LIB + +transactions_trs_sgemm_SOURCES = \ + transactions/trs_sgemm.c \ + common/blas.c + +transactions_trs_sgemm_LDADD = \ + $(STARPU_BLAS_LDFLAGS) + +endif + +#################### +# Cholesky example # +#################### + +if !STARPU_NO_BLAS_LIB + +pkglib_LTLIBRARIES += \ + cholesky/libmy_dmda.la +cholesky_libmy_dmda_la_LDFLAGS = $(ldflags) -no-undefined -module -avoid-version + +SHELL_TESTS += \ + cholesky/cholesky_julia.sh + +cholesky_cholesky_tag_SOURCES = \ + cholesky/cholesky_tag.c \ + cholesky/cholesky_models.c \ + cholesky/cholesky_kernels.c \ + common/blas.c + +cholesky_cholesky_tag_LDADD = \ + $(STARPU_BLAS_LDFLAGS) + +cholesky_cholesky_tile_tag_SOURCES = \ + cholesky/cholesky_tile_tag.c \ + cholesky/cholesky_models.c \ + cholesky/cholesky_kernels.c \ + common/blas.c + +cholesky_cholesky_tile_tag_LDADD = \ + $(STARPU_BLAS_LDFLAGS) + +cholesky_cholesky_grain_tag_SOURCES = \ + cholesky/cholesky_grain_tag.c \ + cholesky/cholesky_models.c \ + cholesky/cholesky_kernels.c \ + common/blas.c + +cholesky_cholesky_grain_tag_LDADD = \ + $(STARPU_BLAS_LDFLAGS) + +cholesky_cholesky_implicit_SOURCES = \ + cholesky/cholesky_implicit.c \ + cholesky/cholesky_models.c \ + cholesky/cholesky_kernels.c \ + sched_ctx_utils/sched_ctx_utils.c \ + common/blas.c + +cholesky_cholesky_implicit_LDADD = \ + $(STARPU_BLAS_LDFLAGS) + +cholesky_cholesky_compil_SOURCES = \ + cholesky/cholesky_compil.c \ + cholesky/cholesky_models.c \ + cholesky/cholesky_kernels.c \ + sched_ctx_utils/sched_ctx_utils.c \ + common/blas.c + +cholesky_cholesky_compil_LDADD = \ + $(STARPU_BLAS_LDFLAGS) + +endif + +############## +# LU example # +############## + +if !STARPU_NO_BLAS_LIB + +lu_lu_example_float_SOURCES = \ + lu/lu_example_float.c \ + lu/slu.c \ + lu/slu_pivot.c \ + lu/slu_kernels.c \ + common/blas.c + +lu_lu_example_float_LDADD = \ + $(STARPU_BLAS_LDFLAGS) + +lu_lu_example_double_SOURCES = \ + lu/lu_example_double.c \ + lu/dlu.c \ + lu/dlu_pivot.c \ + lu/dlu_kernels.c \ + common/blas.c + +lu_lu_example_double_LDADD = \ + $(STARPU_BLAS_LDFLAGS) + +lu_lu_implicit_example_float_SOURCES = \ + lu/lu_example_float.c \ + lu/slu_implicit.c \ + lu/slu_implicit_pivot.c \ + lu/slu_kernels.c \ + common/blas.c + +lu_lu_implicit_example_float_LDADD = \ + $(STARPU_BLAS_LDFLAGS) + +lu_lu_implicit_example_double_SOURCES = \ + lu/lu_example_double.c \ + lu/dlu_implicit.c \ + lu/dlu_implicit_pivot.c \ + lu/dlu_kernels.c \ + common/blas.c + +lu_lu_implicit_example_double_LDADD = \ + $(STARPU_BLAS_LDFLAGS) + +if STARPU_MKL_BLAS_LIB +lu_lu_example_complex_float_SOURCES = \ + lu/lu_example_complex_float.c \ + lu/clu.c \ + lu/clu_pivot.c \ + lu/clu_kernels.c \ + lu/blas_complex.c \ + common/blas.c + +lu_lu_example_complex_float_LDADD = \ + $(STARPU_BLAS_LDFLAGS) + +lu_lu_implicit_example_complex_float_SOURCES = \ + lu/lu_example_complex_float.c \ + lu/clu_implicit.c \ + lu/clu_implicit_pivot.c \ + lu/clu_kernels.c \ + lu/blas_complex.c \ + common/blas.c + +lu_lu_implicit_example_complex_float_LDADD = \ + $(STARPU_BLAS_LDFLAGS) + +lu_lu_example_complex_double_SOURCES = \ + lu/lu_example_complex_double.c \ + lu/zlu.c \ + lu/zlu_pivot.c \ + lu/zlu_kernels.c \ + lu/blas_complex.c \ + common/blas.c + +lu_lu_example_complex_double_LDADD = \ + $(STARPU_BLAS_LDFLAGS) + +lu_lu_implicit_example_complex_double_SOURCES = \ + lu/lu_example_complex_double.c \ + lu/zlu_implicit.c \ + lu/zlu_implicit_pivot.c \ + lu/zlu_kernels.c \ + lu/blas_complex.c \ + common/blas.c + +lu_lu_implicit_example_complex_double_LDADD = \ + $(STARPU_BLAS_LDFLAGS) + +endif +endif + + +################ +# Heat example # +################ + +if !STARPU_NO_BLAS_LIB + +heat_heat_SOURCES = \ + heat/heat.c \ + heat/dw_factolu.c \ + heat/dw_factolu_tag.c \ + heat/dw_factolu_grain.c \ + heat/dw_sparse_cg.c \ + heat/heat_display.c \ + heat/lu_kernels_model.c \ + heat/dw_sparse_cg_kernels.c \ + heat/dw_factolu_kernels.c \ + common/blas.c + +heat_heat_LDADD = \ + $(STARPU_OPENGL_RENDER_LDFLAGS) \ + $(STARPU_BLAS_LDFLAGS) + +endif + +############## +# CG example # +############## + +if !STARPU_NO_BLAS_LIB + +cg_cg_SOURCES = \ + cg/cg.c \ + common/blas.c + +cg_cg_LDADD = \ + $(STARPU_BLAS_LDFLAGS) + +endif + +################ +# SPMD example # +################ + +spmd_vector_scal_spmd_SOURCES = \ + spmd/vector_scal_spmd.c + +################ +# SpMV example # +################ + +spmv_spmv_SOURCES = \ + spmv/spmv.c \ + spmv/spmv_kernels.c + +if STARPU_USE_CUDA +spmv_spmv_SOURCES += \ + spmv/spmv_cuda.cu +endif + +spmv_dw_block_spmv_SOURCES = \ + spmv/dw_block_spmv.c \ + spmv/dw_block_spmv_kernels.c \ + spmv/matrix_market/mm_to_bcsr.c \ + spmv/matrix_market/mmio.c + +spmv_dw_block_spmv_LDADD = \ + $(STARPU_BLAS_LDFLAGS) + +########################### +# C++ Incrementer example # +########################### + +cpp_incrementer_cpp_SOURCES = \ + cpp/incrementer_cpp.cpp + +if STARPU_USE_CUDA +cpp_incrementer_cpp_SOURCES += \ + incrementer/incrementer_kernels.cu +endif +if STARPU_USE_OPENCL +cpp_incrementer_cpp_SOURCES += \ + incrementer/incrementer_kernels_opencl.c +endif + +########################### +# C++ Add vectors example # +########################### + +cpp_add_vectors_SOURCES = \ + cpp/add_vectors.cpp + +cpp_add_vectors_interface_SOURCES = \ + cpp/add_vectors_interface.cpp + +if STARPU_HAVE_CXX11 +cpp_add_vectors_cpp11_SOURCES = \ + cpp/add_vectors_cpp11.cpp +endif + +####################### +# Incrementer example # +####################### + +incrementer_incrementer_SOURCES = \ + incrementer/incrementer.c +if STARPU_USE_CUDA +incrementer_incrementer_SOURCES += \ + incrementer/incrementer_kernels.cu +endif +if STARPU_USE_OPENCL +incrementer_incrementer_SOURCES += \ + incrementer/incrementer_kernels_opencl.c +nobase_STARPU_OPENCL_DATA_DATA += \ + incrementer/incrementer_kernels_opencl_kernel.cl +endif + +################## +# Binary example # +################## + +binary_binary_SOURCES = \ + binary/binary.c +if STARPU_USE_OPENCL +binary_binary_SOURCES += \ + incrementer/incrementer_kernels_opencl.c +endif + +##################### +# interface example # +##################### + +interface_complex_SOURCES = \ + interface/complex.c \ + interface/complex_interface.c \ + interface/complex_filters.c +if STARPU_USE_CUDA +interface_complex_SOURCES += \ + interface/complex_kernels.cu +endif + +if STARPU_USE_OPENCL +interface_complex_SOURCES +=\ + interface/complex_kernels_opencl.c +nobase_STARPU_OPENCL_DATA_DATA += \ + interface/complex_kernels.cl +endif + +interface_complex_dev_handle_complex_dev_handle_SOURCES = \ + interface/complex_dev_handle/complex_dev_handle.c \ + interface/complex_dev_handle/complex_dev_handle_interface.c \ + interface/complex_dev_handle/complex_dev_handle_filters.c +if STARPU_USE_CUDA +interface_complex_dev_handle_complex_dev_handle_SOURCES += \ + interface/complex_dev_handle/complex_dev_handle_kernels.cu +endif + +if STARPU_USE_OPENCL +interface_complex_dev_handle_complex_dev_handle_SOURCES +=\ + interface/complex_dev_handle/complex_dev_handle_kernels_opencl.c +nobase_STARPU_OPENCL_DATA_DATA += \ + interface/complex_dev_handle/complex_dev_handle_kernels.cl +endif + +###################### +# matVecMult example # +###################### + +if STARPU_USE_OPENCL +nobase_STARPU_OPENCL_DATA_DATA += \ + matvecmult/matvecmult_kernel.cl +endif + +####################### +# dot_product example # +####################### + +reductions_dot_product_SOURCES = \ + reductions/dot_product.c +if STARPU_USE_CUDA +reductions_dot_product_SOURCES += \ + reductions/dot_product_kernels.cu +endif +if STARPU_USE_OPENCL +nobase_STARPU_OPENCL_DATA_DATA += \ + reductions/dot_product_opencl_kernels.cl +endif + +################## +# Mandelbrot Set # +################## + +examplebin_PROGRAMS += \ + mandelbrot/mandelbrot + +mandelbrot_mandelbrot_CPPFLAGS = $(AM_CPPFLAGS) +if STARPU_HAVE_X11 +mandelbrot_mandelbrot_CPPFLAGS += $(X_CFLAGS) +mandelbrot_mandelbrot_LDADD = $(X_PRE_LIBS) $(X_LIBS) -lX11 $(X_EXTRA_LIBS) +endif + +#################### +# Image downscaler # +#################### + +examplebin_PROGRAMS += \ + ppm_downscaler/ppm_downscaler \ + ppm_downscaler/yuv_downscaler + +###### +# Pi # +###### + +if !STARPU_HAVE_WINDOWS + +examplebin_PROGRAMS += \ + pi/pi \ + pi/pi_redux + +pi_pi_SOURCES = \ + pi/pi.c \ + pi/SobolQRNG/sobol_gold.c \ + pi/SobolQRNG/sobol_primitives.c + +if STARPU_USE_CUDA +pi_pi_SOURCES += \ + pi/pi_kernel.cu \ + pi/SobolQRNG/sobol_gpu.cu +endif + +pi_pi_redux_SOURCES = \ + pi/pi_redux.c + +if STARPU_USE_CUDA +pi_pi_redux_SOURCES += \ + pi/pi_redux_kernel.cu +pi_pi_redux_LDADD = \ + $(STARPU_CURAND_LDFLAGS) +endif +endif + +########################### +# OpenGL interoperability # +########################### + +if STARPU_HAVE_OPENGL +examplebin_PROGRAMS += \ + gl_interop/gl_interop \ + gl_interop/gl_interop_idle + +gl_interop_gl_interop_LDADD = \ + $(STARPU_OPENGL_RENDER_LDFLAGS) + +gl_interop_gl_interop_idle_LDADD = \ + $(STARPU_OPENGL_RENDER_LDFLAGS) +endif + +#################### +# pipeline example # +#################### + +if !STARPU_NO_BLAS_LIB +pipeline_pipeline_SOURCES = \ + pipeline/pipeline.c \ + common/blas.c + +pipeline_pipeline_LDADD = \ + $(STARPU_BLAS_LDFLAGS) +endif + +################## +# openmp example # +################## + +if STARPU_HAVE_OPENMP +openmp_vector_scal_omp_CFLAGS = \ + $(AM_CFLAGS) -fopenmp + +sched_ctx_parallel_code_CFLAGS = \ + $(AM_CFLAGS) -fopenmp + +sched_ctx_sched_ctx_without_sched_policy_CFLAGS = \ + $(AM_CFLAGS) -fopenmp + +sched_ctx_nested_sched_ctxs_CFLAGS = \ + $(AM_CFLAGS) -fopenmp + +sched_ctx_parallel_tasks_reuse_handle_CFLAGS = \ + $(AM_CFLAGS) -fopenmp + +endif + +###################### +# subgraphs examples # +###################### + +subgraphs_manual_SOURCES = \ + subgraphs/manual.c \ + subgraphs/codelets.c + +subgraphs_partition_SOURCES = \ + subgraphs/partition.c \ + subgraphs/codelets.c + +subgraphs_plan_SOURCES = \ + subgraphs/plan.c \ + subgraphs/codelets.c + + +# - link over source file to build our own object +fortran90/starpu_mod.f90: + @$(MKDIR_P) $(dir $@) + $(V_ln) $(LN_S) $(abs_top_srcdir)/include/$(notdir $@) $@ +native_fortran/fstarpu_mod.f90: + @$(MKDIR_P) $(dir $@) + $(V_ln) $(LN_S) $(abs_top_srcdir)/include/$(notdir $@) $@ + +if STARPU_HAVE_FC +# Fortran90 example + +# - express the creation of .mod along .o +starpu_mod.mod: fortran90/starpu_mod.o +mod_types.mod: fortran90/mod_types.o +mod_compute.mod: fortran90/mod_compute.o +mod_interface.mod: fortran90/mod_interface.o + +# - list explicit dependences to control proper module files dependencies +fortran90/mod_compute.o: mod_types.mod mod_interface.mod starpu_mod.mod +fortran90/f90_example.o: mod_types.mod mod_interface.mod mod_compute.mod starpu_mod.mod + + +# Native Fortran example + +# - express the creation of .mod along .o +fstarpu_mod.mod: native_fortran/fstarpu_mod.o +nf_codelets.mod: native_fortran/nf_codelets.o +nf_compute.mod: native_fortran/nf_compute.o +nf_dynbuf_cl.mod: native_fortran/nf_dynbuf_cl.o +nf_partition_cl.mod: native_fortran/nf_partition_cl.o +nf_sched_ctx_cl.mod: native_fortran/nf_sched_ctx_cl.o +nf_types.mod: native_fortran/nf_types.o +nf_varbuf_cl.mod: native_fortran/nf_varbuf_cl.o + +# - list explicit dependences to control proper module files dependencies +native_fortran/nf_codelets.o: fstarpu_mod.mod +native_fortran/nf_compute.o: nf_types.mod fstarpu_mod.mod +native_fortran/nf_dynbuf_cl.o: fstarpu_mod.mod +native_fortran/nf_dynbuf.o: nf_dynbuf_cl.mod fstarpu_mod.mod +native_fortran/nf_example.o: nf_types.mod nf_compute.mod fstarpu_mod.mod +native_fortran/nf_matrix.o: nf_codelets.mod fstarpu_mod.mod +native_fortran/nf_partition_cl.o: fstarpu_mod.mod +native_fortran/nf_partition.o: nf_partition_cl.mod fstarpu_mod.mod +native_fortran/nf_sched_ctx_cl.o: fstarpu_mod.mod +native_fortran/nf_sched_ctx.o: nf_sched_ctx_cl.mod fstarpu_mod.mod +native_fortran/nf_varbuf_cl.o: fstarpu_mod.mod +native_fortran/nf_varbuf.o: nf_varbuf_cl.mod fstarpu_mod.mod +native_fortran/nf_vector.o: nf_codelets.mod fstarpu_mod.mod + +endif diff --git a/examples/Makefile.in b/examples/Makefile.in new file mode 100644 index 0000000..9fb3d69 --- /dev/null +++ b/examples/Makefile.in @@ -0,0 +1,8263 @@ +# Makefile.in generated by automake 1.16.5 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2021 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + + + + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +target_triplet = @target@ +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@am__append_1 = --compiler-options -fno-strict-aliasing -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ $(STARPU_NVCC_H_CPPFLAGS) +@STARPU_USE_HIP_TRUE@am__append_2 = -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ +noinst_PROGRAMS = $(am__EXEEXT_20) +# Make tests run through mpiexec +@STARPU_USE_MPI_MASTER_SLAVE_TRUE@am__append_3 = $(abs_top_srcdir)/tools/starpu_msexec +@STARPU_USE_MPI_MASTER_SLAVE_TRUE@am__append_4 = $(MPI_RUN_ENV) STARPU_NMPIMSTHREADS=4 +@STARPU_USE_TCPIP_MASTER_SLAVE_TRUE@am__append_5 = $(abs_top_srcdir)/tools/starpu_msexec +# switch off local socket usage +#MS_LAUNCHER = $(abs_top_builddir)/tools/starpu_tcpipexec -np 2 -nobind -ncpus 1 -nolocal +@STARPU_USE_TCPIP_MASTER_SLAVE_TRUE@am__append_6 = STARPU_RESERVE_NCPU=2 +@STARPU_HAVE_WINDOWS_FALSE@am__append_7 = loader +examplebin_PROGRAMS = $(am__EXEEXT_15) $(am__EXEEXT_16) \ + $(am__EXEEXT_17) filters/custom_mf/custom_mf_filter$(EXEEXT) \ + mandelbrot/mandelbrot$(EXEEXT) \ + ppm_downscaler/ppm_downscaler$(EXEEXT) \ + ppm_downscaler/yuv_downscaler$(EXEEXT) $(am__EXEEXT_18) \ + $(am__EXEEXT_19) +TESTS = $(SHELL_TESTS) $(am__EXEEXT_15) +@STARPU_NO_BLAS_LIB_FALSE@@STARPU_USE_FXT_TRUE@am__append_8 = mult/sgemm.sh +check_PROGRAMS = $(am__EXEEXT_15) +@STARPU_SIMGRID_FALSE@am__append_9 = basic_examples/hello_world \ +@STARPU_SIMGRID_FALSE@ basic_examples/hooks \ +@STARPU_SIMGRID_FALSE@ basic_examples/topology \ +@STARPU_SIMGRID_FALSE@ basic_examples/vector_scal \ +@STARPU_SIMGRID_FALSE@ basic_examples/mult basic_examples/block \ +@STARPU_SIMGRID_FALSE@ basic_examples/variable \ +@STARPU_SIMGRID_FALSE@ basic_examples/multiformat \ +@STARPU_SIMGRID_FALSE@ basic_examples/dynamic_handles \ +@STARPU_SIMGRID_FALSE@ basic_examples/task_insert_color \ +@STARPU_SIMGRID_FALSE@ basic_examples/ndim mlr/mlr \ +@STARPU_SIMGRID_FALSE@ cpp/incrementer_cpp cpp/add_vectors \ +@STARPU_SIMGRID_FALSE@ cpp/add_vectors_interface filters/alloc \ +@STARPU_SIMGRID_FALSE@ filters/fread filters/fvector \ +@STARPU_SIMGRID_FALSE@ filters/fvector_pick_variable \ +@STARPU_SIMGRID_FALSE@ filters/ftensor \ +@STARPU_SIMGRID_FALSE@ filters/ftensor_pick_block \ +@STARPU_SIMGRID_FALSE@ filters/ftensor_pick_variable \ +@STARPU_SIMGRID_FALSE@ filters/fblock \ +@STARPU_SIMGRID_FALSE@ filters/fblock_pick_matrix \ +@STARPU_SIMGRID_FALSE@ filters/fblock_pick_variable \ +@STARPU_SIMGRID_FALSE@ filters/fmatrix \ +@STARPU_SIMGRID_FALSE@ filters/fmatrix_pick_vector \ +@STARPU_SIMGRID_FALSE@ filters/fmatrix_pick_variable \ +@STARPU_SIMGRID_FALSE@ filters/fndim filters/fndim_pick_ndim \ +@STARPU_SIMGRID_FALSE@ filters/fndim_5d_pick_tensor \ +@STARPU_SIMGRID_FALSE@ filters/fndim_4d_pick_block \ +@STARPU_SIMGRID_FALSE@ filters/fndim_3d_pick_matrix \ +@STARPU_SIMGRID_FALSE@ filters/fndim_2d_pick_vector \ +@STARPU_SIMGRID_FALSE@ filters/fndim_1d_pick_variable \ +@STARPU_SIMGRID_FALSE@ filters/fndim_pick_variable \ +@STARPU_SIMGRID_FALSE@ filters/fndim_to_tensor \ +@STARPU_SIMGRID_FALSE@ filters/fndim_to_block \ +@STARPU_SIMGRID_FALSE@ filters/fndim_to_matrix \ +@STARPU_SIMGRID_FALSE@ filters/fndim_to_vector \ +@STARPU_SIMGRID_FALSE@ filters/fndim_to_variable \ +@STARPU_SIMGRID_FALSE@ filters/fmultiple_manual \ +@STARPU_SIMGRID_FALSE@ filters/fmultiple_submit \ +@STARPU_SIMGRID_FALSE@ filters/fmultiple_submit_readonly \ +@STARPU_SIMGRID_FALSE@ filters/fmultiple_submit_readonly_downgrade \ +@STARPU_SIMGRID_FALSE@ filters/fmultiple_submit_implicit \ +@STARPU_SIMGRID_FALSE@ filters/frecursive filters/shadow \ +@STARPU_SIMGRID_FALSE@ filters/shadow2d filters/shadow3d \ +@STARPU_SIMGRID_FALSE@ filters/shadow4d filters/shadownd \ +@STARPU_SIMGRID_FALSE@ tag_example/tag_example \ +@STARPU_SIMGRID_FALSE@ tag_example/tag_example2 \ +@STARPU_SIMGRID_FALSE@ tag_example/tag_example3 \ +@STARPU_SIMGRID_FALSE@ tag_example/tag_example4 \ +@STARPU_SIMGRID_FALSE@ tag_example/tag_restartable \ +@STARPU_SIMGRID_FALSE@ transactions/trs_inc \ +@STARPU_SIMGRID_FALSE@ spmd/vector_scal_spmd spmv/spmv \ +@STARPU_SIMGRID_FALSE@ callback/callback callback/prologue \ +@STARPU_SIMGRID_FALSE@ incrementer/incrementer binary/binary \ +@STARPU_SIMGRID_FALSE@ interface/complex \ +@STARPU_SIMGRID_FALSE@ interface/complex_dev_handle/complex_dev_handle \ +@STARPU_SIMGRID_FALSE@ matvecmult/matvecmult \ +@STARPU_SIMGRID_FALSE@ profiling/profiling \ +@STARPU_SIMGRID_FALSE@ perf_monitoring/perf_counters_01 \ +@STARPU_SIMGRID_FALSE@ perf_monitoring/perf_counters_02 \ +@STARPU_SIMGRID_FALSE@ perf_steering/perf_knobs_01 \ +@STARPU_SIMGRID_FALSE@ perf_steering/perf_knobs_02 \ +@STARPU_SIMGRID_FALSE@ perf_steering/perf_knobs_03 \ +@STARPU_SIMGRID_FALSE@ scheduler/heteroprio_test \ +@STARPU_SIMGRID_FALSE@ sched_ctx/sched_ctx \ +@STARPU_SIMGRID_FALSE@ sched_ctx/sched_ctx_empty \ +@STARPU_SIMGRID_FALSE@ sched_ctx/sched_ctx_remove \ +@STARPU_SIMGRID_FALSE@ sched_ctx/sched_ctx_delete \ +@STARPU_SIMGRID_FALSE@ sched_ctx/two_cpu_contexts \ +@STARPU_SIMGRID_FALSE@ sched_ctx/dummy_sched_with_ctx \ +@STARPU_SIMGRID_FALSE@ worker_collections/worker_tree_example \ +@STARPU_SIMGRID_FALSE@ reductions/dot_product \ +@STARPU_SIMGRID_FALSE@ reductions/minmax_reduction \ +@STARPU_SIMGRID_FALSE@ dependency/task_end_dep \ +@STARPU_SIMGRID_FALSE@ dependency/task_end_dep_add \ +@STARPU_SIMGRID_FALSE@ dependency/sequential_consistency \ +@STARPU_SIMGRID_FALSE@ subgraphs/manual subgraphs/partition \ +@STARPU_SIMGRID_FALSE@ subgraphs/plan scheduler/dummy_sched +@STARPU_SIMGRID_FALSE@am__append_10 = profiling_tool/prof.sh \ +@STARPU_SIMGRID_FALSE@ scheduler/libdummy_sched.sh +@STARPU_SIMGRID_FALSE@am__append_11 = \ +@STARPU_SIMGRID_FALSE@ profiling_tool/libprofiling_tool.la \ +@STARPU_SIMGRID_FALSE@ scheduler/libdummy_sched.la +@STARPU_HAVE_CXX11_TRUE@@STARPU_SIMGRID_FALSE@am__append_12 = \ +@STARPU_HAVE_CXX11_TRUE@@STARPU_SIMGRID_FALSE@ cpp/add_vectors_cpp11 + +@STARPU_HAVE_F77_H_TRUE@@STARPU_HAVE_F77_TRUE@@STARPU_SIMGRID_FALSE@am__append_13 = \ +@STARPU_HAVE_F77_H_TRUE@@STARPU_HAVE_F77_TRUE@@STARPU_SIMGRID_FALSE@ fortran/hello + +@STARPU_HAVE_F77_TRUE@@STARPU_SIMGRID_FALSE@am__append_14 = \ +@STARPU_HAVE_F77_TRUE@@STARPU_SIMGRID_FALSE@ basic_examples/vector_scal_fortran + +@STARPU_HAVE_FC_TRUE@@STARPU_SANITIZE_FALSE@@STARPU_SIMGRID_FALSE@am__append_15 = \ +@STARPU_HAVE_FC_TRUE@@STARPU_SANITIZE_FALSE@@STARPU_SIMGRID_FALSE@ fortran90/f90_example \ +@STARPU_HAVE_FC_TRUE@@STARPU_SANITIZE_FALSE@@STARPU_SIMGRID_FALSE@ native_fortran/nf_vector \ +@STARPU_HAVE_FC_TRUE@@STARPU_SANITIZE_FALSE@@STARPU_SIMGRID_FALSE@ native_fortran/nf_matrix \ +@STARPU_HAVE_FC_TRUE@@STARPU_SANITIZE_FALSE@@STARPU_SIMGRID_FALSE@ native_fortran/nf_example \ +@STARPU_HAVE_FC_TRUE@@STARPU_SANITIZE_FALSE@@STARPU_SIMGRID_FALSE@ native_fortran/nf_dynbuf \ +@STARPU_HAVE_FC_TRUE@@STARPU_SANITIZE_FALSE@@STARPU_SIMGRID_FALSE@ native_fortran/nf_varbuf \ +@STARPU_HAVE_FC_TRUE@@STARPU_SANITIZE_FALSE@@STARPU_SIMGRID_FALSE@ native_fortran/nf_sched_ctx \ +@STARPU_HAVE_FC_TRUE@@STARPU_SANITIZE_FALSE@@STARPU_SIMGRID_FALSE@ native_fortran/nf_partition + +@STARPU_USE_CUDA_TRUE@am__append_16 = \ +@STARPU_USE_CUDA_TRUE@ mult/sgemm \ +@STARPU_USE_CUDA_TRUE@ mult/dgemm + +@STARPU_USE_CUDA_TRUE@am__append_17 = \ +@STARPU_USE_CUDA_TRUE@ mult/sgemm_layout \ +@STARPU_USE_CUDA_TRUE@ mult/dgemm_layout + +@STARPU_USE_HIPBLAS_TRUE@am__append_18 = \ +@STARPU_USE_HIPBLAS_TRUE@ mult/sgemm \ +@STARPU_USE_HIPBLAS_TRUE@ mult/dgemm + +@STARPU_NO_BLAS_LIB_FALSE@am__append_19 = \ +@STARPU_NO_BLAS_LIB_FALSE@ mult/sgemm \ +@STARPU_NO_BLAS_LIB_FALSE@ mult/dgemm \ +@STARPU_NO_BLAS_LIB_FALSE@ lu/lu_example_float \ +@STARPU_NO_BLAS_LIB_FALSE@ lu/lu_example_double \ +@STARPU_NO_BLAS_LIB_FALSE@ lu/lu_implicit_example_float \ +@STARPU_NO_BLAS_LIB_FALSE@ lu/lu_implicit_example_double \ +@STARPU_NO_BLAS_LIB_FALSE@ cholesky/cholesky_tag \ +@STARPU_NO_BLAS_LIB_FALSE@ cholesky/cholesky_tile_tag \ +@STARPU_NO_BLAS_LIB_FALSE@ cholesky/cholesky_implicit \ +@STARPU_NO_BLAS_LIB_FALSE@ cholesky/cholesky_compil + +@STARPU_NO_BLAS_LIB_FALSE@am__append_20 = \ +@STARPU_NO_BLAS_LIB_FALSE@ mult/sgemm_layout \ +@STARPU_NO_BLAS_LIB_FALSE@ mult/dgemm_layout + +@STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@am__append_21 = \ +@STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@ axpy/axpy \ +@STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@ cholesky/cholesky_grain_tag \ +@STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@ heat/heat \ +@STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@ cg/cg \ +@STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@ pipeline/pipeline \ +@STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@ transactions/trs_sgemm + +@STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@am__append_22 = \ +@STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@ heat/heat.sh \ +@STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@ lu/lu.sh + +@STARPU_NO_BLAS_LIB_FALSE@@STARPU_QUICK_CHECK_FALSE@@STARPU_SIMGRID_TRUE@am__append_23 = \ +@STARPU_NO_BLAS_LIB_FALSE@@STARPU_QUICK_CHECK_FALSE@@STARPU_SIMGRID_TRUE@ cholesky/cholesky.sh + +@STARPU_MKL_BLAS_LIB_TRUE@@STARPU_SIMGRID_FALSE@am__append_24 = \ +@STARPU_MKL_BLAS_LIB_TRUE@@STARPU_SIMGRID_FALSE@ lu/lu_example_complex_float \ +@STARPU_MKL_BLAS_LIB_TRUE@@STARPU_SIMGRID_FALSE@ lu/lu_example_complex_double \ +@STARPU_MKL_BLAS_LIB_TRUE@@STARPU_SIMGRID_FALSE@ lu/lu_implicit_example_complex_float \ +@STARPU_MKL_BLAS_LIB_TRUE@@STARPU_SIMGRID_FALSE@ lu/lu_implicit_example_complex_double + +@STARPU_HAVE_CBLAS_H_TRUE@@STARPU_HAVE_CBLAS_SGEMV_TRUE@@STARPU_SIMGRID_FALSE@am__append_25 = \ +@STARPU_HAVE_CBLAS_H_TRUE@@STARPU_HAVE_CBLAS_SGEMV_TRUE@@STARPU_SIMGRID_FALSE@ spmv/dw_block_spmv + +@STARPU_HAVE_F77_H_TRUE@@STARPU_HAVE_F77_TRUE@@STARPU_SIMGRID_FALSE@am__append_26 = \ +@STARPU_HAVE_F77_H_TRUE@@STARPU_HAVE_F77_TRUE@@STARPU_SIMGRID_FALSE@ fortran/hello + +@STARPU_HAVE_F77_TRUE@@STARPU_SIMGRID_FALSE@am__append_27 = \ +@STARPU_HAVE_F77_TRUE@@STARPU_SIMGRID_FALSE@ basic_examples/vector_scal_fortran + +@STARPU_HAVE_OPENMP_TRUE@@STARPU_SIMGRID_FALSE@am__append_28 = \ +@STARPU_HAVE_OPENMP_TRUE@@STARPU_SIMGRID_FALSE@ openmp/vector_scal_omp \ +@STARPU_HAVE_OPENMP_TRUE@@STARPU_SIMGRID_FALSE@ sched_ctx/sched_ctx_without_sched_policy\ +@STARPU_HAVE_OPENMP_TRUE@@STARPU_SIMGRID_FALSE@ sched_ctx/nested_sched_ctxs \ +@STARPU_HAVE_OPENMP_TRUE@@STARPU_SIMGRID_FALSE@ sched_ctx/sched_ctx_without_sched_policy_awake\ +@STARPU_HAVE_OPENMP_TRUE@@STARPU_SIMGRID_FALSE@ sched_ctx/parallel_tasks_reuse_handle \ +@STARPU_HAVE_OPENMP_TRUE@@STARPU_SIMGRID_FALSE@ sched_ctx/parallel_code + +@STARPU_HAVE_HWLOC_TRUE@@STARPU_HAVE_OPENMP_TRUE@@STARPU_HWLOC_HAVE_TOPOLOGY_DUP_TRUE@@STARPU_SIMGRID_FALSE@am__append_29 = \ +@STARPU_HAVE_HWLOC_TRUE@@STARPU_HAVE_OPENMP_TRUE@@STARPU_HWLOC_HAVE_TOPOLOGY_DUP_TRUE@@STARPU_SIMGRID_FALSE@ parallel_workers/parallel_workers \ +@STARPU_HAVE_HWLOC_TRUE@@STARPU_HAVE_OPENMP_TRUE@@STARPU_HWLOC_HAVE_TOPOLOGY_DUP_TRUE@@STARPU_SIMGRID_FALSE@ parallel_workers/parallel_workers_func \ +@STARPU_HAVE_HWLOC_TRUE@@STARPU_HAVE_OPENMP_TRUE@@STARPU_HWLOC_HAVE_TOPOLOGY_DUP_TRUE@@STARPU_SIMGRID_FALSE@ parallel_workers/parallel_workers_oldapi + +@STARPU_USE_CUDA_TRUE@am__append_30 = \ +@STARPU_USE_CUDA_TRUE@ sched_ctx/gpu_partition + +@STARPU_CROSS_COMPILING_FALSE@@STARPU_HAVE_ICC_TRUE@am__append_31 = \ +@STARPU_CROSS_COMPILING_FALSE@@STARPU_HAVE_ICC_TRUE@ basic_examples/vector_scal_cpu_icc.icc + +@STARPU_USE_CUDA_TRUE@am__append_32 = \ +@STARPU_USE_CUDA_TRUE@ basic_examples/vector_scal_cuda.cu + +@STARPU_USE_CUDA_TRUE@am__append_33 = \ +@STARPU_USE_CUDA_TRUE@ basic_examples/mult_cuda.cu + +@STARPU_USE_HIP_TRUE@am__append_34 = \ +@STARPU_USE_HIP_TRUE@ basic_examples/vector_scal_hip.hip + +@STARPU_USE_HIP_TRUE@am__append_35 = \ +@STARPU_USE_HIP_TRUE@ basic_examples/mult_hip.hip + +@STARPU_USE_OPENCL_TRUE@am__append_36 = \ +@STARPU_USE_OPENCL_TRUE@ basic_examples/vector_scal_opencl.c + +@STARPU_HAVE_F77_TRUE@@STARPU_USE_CUDA_TRUE@am__append_37 = \ +@STARPU_HAVE_F77_TRUE@@STARPU_USE_CUDA_TRUE@ basic_examples/vector_scal_cuda.cu + +@STARPU_USE_CUDA_TRUE@am__append_38 = \ +@STARPU_USE_CUDA_TRUE@ basic_examples/multiformat_cuda.cu \ +@STARPU_USE_CUDA_TRUE@ basic_examples/multiformat_conversion_codelets_cuda.cu + +@STARPU_USE_OPENCL_TRUE@am__append_39 = \ +@STARPU_USE_OPENCL_TRUE@ basic_examples/multiformat_opencl.c \ +@STARPU_USE_OPENCL_TRUE@ basic_examples/multiformat_conversion_codelets_opencl.c + +@STARPU_USE_CUDA_TRUE@am__append_40 = \ +@STARPU_USE_CUDA_TRUE@ basic_examples/block_cuda.cu + +@STARPU_USE_HIP_TRUE@am__append_41 = \ +@STARPU_USE_HIP_TRUE@ basic_examples/block_hip.hip + +@STARPU_USE_OPENCL_TRUE@am__append_42 = \ +@STARPU_USE_OPENCL_TRUE@ basic_examples/block_opencl.c + +@STARPU_USE_CUDA_TRUE@am__append_43 = \ +@STARPU_USE_CUDA_TRUE@ basic_examples/variable_kernels.cu + +@STARPU_USE_OPENCL_TRUE@am__append_44 = \ +@STARPU_USE_OPENCL_TRUE@ basic_examples/variable_kernels_opencl.c + +@STARPU_USE_CUDA_TRUE@am__append_45 = \ +@STARPU_USE_CUDA_TRUE@ filters/fvector_cuda.cu + +@STARPU_USE_HIP_TRUE@am__append_46 = \ +@STARPU_USE_HIP_TRUE@ filters/fvector_hip.hip + +@STARPU_USE_CUDA_TRUE@am__append_47 = \ +@STARPU_USE_CUDA_TRUE@ filters/fmatrix_cuda.cu + +@STARPU_USE_HIP_TRUE@am__append_48 = \ +@STARPU_USE_HIP_TRUE@ filters/fmatrix_hip.hip + +@STARPU_USE_CUDA_TRUE@am__append_49 = \ +@STARPU_USE_CUDA_TRUE@ filters/fblock_cuda.cu + +@STARPU_USE_HIP_TRUE@am__append_50 = \ +@STARPU_USE_HIP_TRUE@ filters/fblock_hip.hip + +@STARPU_USE_OPENCL_TRUE@am__append_51 = \ +@STARPU_USE_OPENCL_TRUE@ filters/fblock_opencl.c + +@STARPU_USE_CUDA_TRUE@am__append_52 = \ +@STARPU_USE_CUDA_TRUE@ filters/ftensor_cuda.cu + +@STARPU_USE_HIP_TRUE@am__append_53 = \ +@STARPU_USE_HIP_TRUE@ filters/ftensor_hip.hip + +@STARPU_USE_CUDA_TRUE@am__append_54 = \ +@STARPU_USE_CUDA_TRUE@ filters/f4d_cuda.cu + +@STARPU_USE_HIP_TRUE@am__append_55 = \ +@STARPU_USE_HIP_TRUE@ filters/f4d_hip.hip + +@STARPU_USE_CUDA_TRUE@am__append_56 = \ +@STARPU_USE_CUDA_TRUE@ filters/fvector_cuda.cu + +@STARPU_USE_HIP_TRUE@am__append_57 = \ +@STARPU_USE_HIP_TRUE@ filters/fvector_hip.hip + +@STARPU_USE_CUDA_TRUE@am__append_58 = \ +@STARPU_USE_CUDA_TRUE@ filters/fvariable_cuda.cu + +@STARPU_USE_CUDA_TRUE@am__append_59 = \ +@STARPU_USE_CUDA_TRUE@ filters/fmatrix_cuda.cu + +@STARPU_USE_HIP_TRUE@am__append_60 = \ +@STARPU_USE_HIP_TRUE@ filters/fmatrix_hip.hip + +@STARPU_USE_CUDA_TRUE@am__append_61 = \ +@STARPU_USE_CUDA_TRUE@ filters/fvariable_cuda.cu + +@STARPU_USE_CUDA_TRUE@am__append_62 = \ +@STARPU_USE_CUDA_TRUE@ filters/fblock_cuda.cu + +@STARPU_USE_HIP_TRUE@am__append_63 = \ +@STARPU_USE_HIP_TRUE@ filters/fblock_hip.hip + +@STARPU_USE_CUDA_TRUE@am__append_64 = \ +@STARPU_USE_CUDA_TRUE@ filters/fvariable_cuda.cu + +@STARPU_USE_CUDA_TRUE@am__append_65 = \ +@STARPU_USE_CUDA_TRUE@ filters/f3d_cuda.cu + +@STARPU_USE_HIP_TRUE@am__append_66 = \ +@STARPU_USE_HIP_TRUE@ filters/f3d_hip.hip + +@STARPU_USE_CUDA_TRUE@am__append_67 = \ +@STARPU_USE_CUDA_TRUE@ filters/ftensor_cuda.cu + +@STARPU_USE_HIP_TRUE@am__append_68 = \ +@STARPU_USE_HIP_TRUE@ filters/ftensor_hip.hip + +@STARPU_USE_CUDA_TRUE@am__append_69 = \ +@STARPU_USE_CUDA_TRUE@ filters/fblock_cuda.cu + +@STARPU_USE_HIP_TRUE@am__append_70 = \ +@STARPU_USE_HIP_TRUE@ filters/fblock_hip.hip + +@STARPU_USE_CUDA_TRUE@am__append_71 = \ +@STARPU_USE_CUDA_TRUE@ filters/fmatrix_cuda.cu + +@STARPU_USE_HIP_TRUE@am__append_72 = \ +@STARPU_USE_HIP_TRUE@ filters/fmatrix_hip.hip + +@STARPU_USE_CUDA_TRUE@am__append_73 = \ +@STARPU_USE_CUDA_TRUE@ filters/fvector_cuda.cu + +@STARPU_USE_HIP_TRUE@am__append_74 = \ +@STARPU_USE_HIP_TRUE@ filters/fvector_hip.hip + +@STARPU_USE_CUDA_TRUE@am__append_75 = \ +@STARPU_USE_CUDA_TRUE@ filters/ftensor_cuda.cu + +@STARPU_USE_HIP_TRUE@am__append_76 = \ +@STARPU_USE_HIP_TRUE@ filters/ftensor_hip.hip + +@STARPU_USE_CUDA_TRUE@am__append_77 = \ +@STARPU_USE_CUDA_TRUE@ filters/fblock_cuda.cu + +@STARPU_USE_HIP_TRUE@am__append_78 = \ +@STARPU_USE_HIP_TRUE@ filters/fblock_hip.hip + +@STARPU_USE_CUDA_TRUE@am__append_79 = \ +@STARPU_USE_CUDA_TRUE@ filters/fmatrix_cuda.cu + +@STARPU_USE_HIP_TRUE@am__append_80 = \ +@STARPU_USE_HIP_TRUE@ filters/fmatrix_hip.hip + +@STARPU_USE_CUDA_TRUE@am__append_81 = \ +@STARPU_USE_CUDA_TRUE@ filters/fvector_cuda.cu + +@STARPU_USE_HIP_TRUE@am__append_82 = \ +@STARPU_USE_HIP_TRUE@ filters/fvector_hip.hip + +@STARPU_USE_CUDA_TRUE@am__append_83 = \ +@STARPU_USE_CUDA_TRUE@ filters/fmultiple_cuda.cu + +@STARPU_USE_HIP_TRUE@am__append_84 = \ +@STARPU_USE_HIP_TRUE@ filters/fmultiple_hip.hip + +@STARPU_USE_CUDA_TRUE@am__append_85 = \ +@STARPU_USE_CUDA_TRUE@ filters/fmultiple_cuda.cu + +@STARPU_USE_HIP_TRUE@am__append_86 = \ +@STARPU_USE_HIP_TRUE@ filters/fmultiple_hip.hip + +@STARPU_USE_CUDA_TRUE@am__append_87 = \ +@STARPU_USE_CUDA_TRUE@ filters/fmultiple_cuda.cu + +@STARPU_USE_CUDA_TRUE@am__append_88 = \ +@STARPU_USE_CUDA_TRUE@ filters/fmultiple_cuda.cu + +@STARPU_USE_HIP_TRUE@am__append_89 = \ +@STARPU_USE_HIP_TRUE@ filters/fmultiple_hip.hip + +@STARPU_USE_HIP_TRUE@am__append_90 = \ +@STARPU_USE_HIP_TRUE@ filters/fmultiple_hip.hip + +@STARPU_USE_CUDA_TRUE@am__append_91 = \ +@STARPU_USE_CUDA_TRUE@ filters/fmultiple_cuda.cu + +@STARPU_USE_HIP_TRUE@am__append_92 = \ +@STARPU_USE_HIP_TRUE@ filters/fmultiple_hip.hip + +@STARPU_USE_CUDA_TRUE@am__append_93 = \ +@STARPU_USE_CUDA_TRUE@ filters/custom_mf/conversion.cu \ +@STARPU_USE_CUDA_TRUE@ filters/custom_mf/cuda.cu + +@STARPU_USE_OPENCL_TRUE@am__append_94 = \ +@STARPU_USE_OPENCL_TRUE@ filters/custom_mf/conversion_opencl.c \ +@STARPU_USE_OPENCL_TRUE@ filters/custom_mf/custom_opencl.c + +@STARPU_NO_BLAS_LIB_FALSE@@STARPU_USE_OPENCL_TRUE@am__append_95 = \ +@STARPU_NO_BLAS_LIB_FALSE@@STARPU_USE_OPENCL_TRUE@ axpy/axpy_opencl.c + +@STARPU_NO_BLAS_LIB_FALSE@@STARPU_USE_OPENCL_TRUE@am__append_96 = \ +@STARPU_NO_BLAS_LIB_FALSE@@STARPU_USE_OPENCL_TRUE@ axpy/axpy_opencl_kernel.cl + +@STARPU_NO_BLAS_LIB_FALSE@am__append_97 = \ +@STARPU_NO_BLAS_LIB_FALSE@ common/blas.c + +@STARPU_NO_BLAS_LIB_FALSE@am__append_98 = \ +@STARPU_NO_BLAS_LIB_FALSE@ common/blas.c + +@STARPU_NO_BLAS_LIB_FALSE@am__append_99 = \ +@STARPU_NO_BLAS_LIB_FALSE@ common/blas.c + +@STARPU_NO_BLAS_LIB_FALSE@am__append_100 = \ +@STARPU_NO_BLAS_LIB_FALSE@ common/blas.c + + +#################### +# Cholesky example # +#################### +@STARPU_NO_BLAS_LIB_FALSE@am__append_101 = \ +@STARPU_NO_BLAS_LIB_FALSE@ cholesky/libmy_dmda.la + +@STARPU_NO_BLAS_LIB_FALSE@am__append_102 = \ +@STARPU_NO_BLAS_LIB_FALSE@ cholesky/cholesky_julia.sh + +@STARPU_USE_CUDA_TRUE@am__append_103 = \ +@STARPU_USE_CUDA_TRUE@ spmv/spmv_cuda.cu + +@STARPU_USE_CUDA_TRUE@am__append_104 = \ +@STARPU_USE_CUDA_TRUE@ incrementer/incrementer_kernels.cu + +@STARPU_USE_OPENCL_TRUE@am__append_105 = \ +@STARPU_USE_OPENCL_TRUE@ incrementer/incrementer_kernels_opencl.c + +@STARPU_USE_CUDA_TRUE@am__append_106 = \ +@STARPU_USE_CUDA_TRUE@ incrementer/incrementer_kernels.cu + +@STARPU_USE_OPENCL_TRUE@am__append_107 = \ +@STARPU_USE_OPENCL_TRUE@ incrementer/incrementer_kernels_opencl.c + +@STARPU_USE_OPENCL_TRUE@am__append_108 = \ +@STARPU_USE_OPENCL_TRUE@ incrementer/incrementer_kernels_opencl.c + +@STARPU_USE_CUDA_TRUE@am__append_109 = \ +@STARPU_USE_CUDA_TRUE@ interface/complex_kernels.cu + +@STARPU_USE_OPENCL_TRUE@am__append_110 = \ +@STARPU_USE_OPENCL_TRUE@ interface/complex_kernels_opencl.c + +@STARPU_USE_CUDA_TRUE@am__append_111 = \ +@STARPU_USE_CUDA_TRUE@ interface/complex_dev_handle/complex_dev_handle_kernels.cu + +@STARPU_USE_OPENCL_TRUE@am__append_112 = \ +@STARPU_USE_OPENCL_TRUE@ interface/complex_dev_handle/complex_dev_handle_kernels_opencl.c + +@STARPU_USE_CUDA_TRUE@am__append_113 = \ +@STARPU_USE_CUDA_TRUE@ reductions/dot_product_kernels.cu + +@STARPU_HAVE_X11_TRUE@am__append_114 = $(X_CFLAGS) + +###### +# Pi # +###### +@STARPU_HAVE_WINDOWS_FALSE@am__append_115 = \ +@STARPU_HAVE_WINDOWS_FALSE@ pi/pi \ +@STARPU_HAVE_WINDOWS_FALSE@ pi/pi_redux + +@STARPU_HAVE_WINDOWS_FALSE@@STARPU_USE_CUDA_TRUE@am__append_116 = \ +@STARPU_HAVE_WINDOWS_FALSE@@STARPU_USE_CUDA_TRUE@ pi/pi_kernel.cu \ +@STARPU_HAVE_WINDOWS_FALSE@@STARPU_USE_CUDA_TRUE@ pi/SobolQRNG/sobol_gpu.cu + +@STARPU_HAVE_WINDOWS_FALSE@@STARPU_USE_CUDA_TRUE@am__append_117 = \ +@STARPU_HAVE_WINDOWS_FALSE@@STARPU_USE_CUDA_TRUE@ pi/pi_redux_kernel.cu + + +########################### +# OpenGL interoperability # +########################### +@STARPU_HAVE_OPENGL_TRUE@am__append_118 = \ +@STARPU_HAVE_OPENGL_TRUE@ gl_interop/gl_interop \ +@STARPU_HAVE_OPENGL_TRUE@ gl_interop/gl_interop_idle + +subdir = examples +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ + $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ + $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ + $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(noinst_HEADERS) \ + $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/src/common/config.h \ + $(top_builddir)/src/common/config-src-build.h \ + $(top_builddir)/include/starpu_config.h \ + $(top_builddir)/starpurm/include/starpurm_config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +@STARPU_SIMGRID_FALSE@am__EXEEXT_1 = \ +@STARPU_SIMGRID_FALSE@ basic_examples/hello_world$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ basic_examples/hooks$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ basic_examples/topology$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ basic_examples/vector_scal$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ basic_examples/mult$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ basic_examples/block$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ basic_examples/variable$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ basic_examples/multiformat$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ basic_examples/dynamic_handles$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ basic_examples/task_insert_color$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ basic_examples/ndim$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ mlr/mlr$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ cpp/incrementer_cpp$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ cpp/add_vectors$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ cpp/add_vectors_interface$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ filters/alloc$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ filters/fread$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ filters/fvector$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ filters/fvector_pick_variable$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ filters/ftensor$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ filters/ftensor_pick_block$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ filters/ftensor_pick_variable$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ filters/fblock$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ filters/fblock_pick_matrix$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ filters/fblock_pick_variable$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ filters/fmatrix$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ filters/fmatrix_pick_vector$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ filters/fmatrix_pick_variable$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ filters/fndim$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ filters/fndim_pick_ndim$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ filters/fndim_5d_pick_tensor$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ filters/fndim_4d_pick_block$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ filters/fndim_3d_pick_matrix$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ filters/fndim_2d_pick_vector$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ filters/fndim_1d_pick_variable$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ filters/fndim_pick_variable$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ filters/fndim_to_tensor$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ filters/fndim_to_block$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ filters/fndim_to_matrix$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ filters/fndim_to_vector$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ filters/fndim_to_variable$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ filters/fmultiple_manual$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ filters/fmultiple_submit$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ filters/fmultiple_submit_readonly$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ filters/fmultiple_submit_readonly_downgrade$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ filters/fmultiple_submit_implicit$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ filters/frecursive$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ filters/shadow$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ filters/shadow2d$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ filters/shadow3d$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ filters/shadow4d$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ filters/shadownd$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ tag_example/tag_example$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ tag_example/tag_example2$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ tag_example/tag_example3$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ tag_example/tag_example4$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ tag_example/tag_restartable$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ transactions/trs_inc$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ spmd/vector_scal_spmd$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ spmv/spmv$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ callback/callback$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ callback/prologue$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ incrementer/incrementer$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ binary/binary$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ interface/complex$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ interface/complex_dev_handle/complex_dev_handle$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ matvecmult/matvecmult$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ profiling/profiling$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ perf_monitoring/perf_counters_01$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ perf_monitoring/perf_counters_02$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ perf_steering/perf_knobs_01$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ perf_steering/perf_knobs_02$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ perf_steering/perf_knobs_03$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ scheduler/heteroprio_test$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ sched_ctx/sched_ctx$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ sched_ctx/sched_ctx_empty$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ sched_ctx/sched_ctx_remove$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ sched_ctx/sched_ctx_delete$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ sched_ctx/two_cpu_contexts$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ sched_ctx/dummy_sched_with_ctx$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ worker_collections/worker_tree_example$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ reductions/dot_product$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ reductions/minmax_reduction$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ dependency/task_end_dep$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ dependency/task_end_dep_add$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ dependency/sequential_consistency$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ subgraphs/manual$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ subgraphs/partition$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ subgraphs/plan$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ scheduler/dummy_sched$(EXEEXT) +@STARPU_HAVE_CXX11_TRUE@@STARPU_SIMGRID_FALSE@am__EXEEXT_2 = cpp/add_vectors_cpp11$(EXEEXT) +@STARPU_HAVE_F77_H_TRUE@@STARPU_HAVE_F77_TRUE@@STARPU_SIMGRID_FALSE@am__EXEEXT_3 = fortran/hello$(EXEEXT) +@STARPU_HAVE_F77_TRUE@@STARPU_SIMGRID_FALSE@am__EXEEXT_4 = basic_examples/vector_scal_fortran$(EXEEXT) +@STARPU_HAVE_FC_TRUE@@STARPU_SANITIZE_FALSE@@STARPU_SIMGRID_FALSE@am__EXEEXT_5 = fortran90/f90_example$(EXEEXT) \ +@STARPU_HAVE_FC_TRUE@@STARPU_SANITIZE_FALSE@@STARPU_SIMGRID_FALSE@ native_fortran/nf_vector$(EXEEXT) \ +@STARPU_HAVE_FC_TRUE@@STARPU_SANITIZE_FALSE@@STARPU_SIMGRID_FALSE@ native_fortran/nf_matrix$(EXEEXT) \ +@STARPU_HAVE_FC_TRUE@@STARPU_SANITIZE_FALSE@@STARPU_SIMGRID_FALSE@ native_fortran/nf_example$(EXEEXT) \ +@STARPU_HAVE_FC_TRUE@@STARPU_SANITIZE_FALSE@@STARPU_SIMGRID_FALSE@ native_fortran/nf_dynbuf$(EXEEXT) \ +@STARPU_HAVE_FC_TRUE@@STARPU_SANITIZE_FALSE@@STARPU_SIMGRID_FALSE@ native_fortran/nf_varbuf$(EXEEXT) \ +@STARPU_HAVE_FC_TRUE@@STARPU_SANITIZE_FALSE@@STARPU_SIMGRID_FALSE@ native_fortran/nf_sched_ctx$(EXEEXT) \ +@STARPU_HAVE_FC_TRUE@@STARPU_SANITIZE_FALSE@@STARPU_SIMGRID_FALSE@ native_fortran/nf_partition$(EXEEXT) +@STARPU_USE_CUDA_TRUE@am__EXEEXT_6 = mult/sgemm$(EXEEXT) \ +@STARPU_USE_CUDA_TRUE@ mult/dgemm$(EXEEXT) +@STARPU_USE_HIPBLAS_TRUE@am__EXEEXT_7 = mult/sgemm$(EXEEXT) \ +@STARPU_USE_HIPBLAS_TRUE@ mult/dgemm$(EXEEXT) +@STARPU_NO_BLAS_LIB_FALSE@am__EXEEXT_8 = mult/sgemm$(EXEEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ mult/dgemm$(EXEEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ lu/lu_example_float$(EXEEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ lu/lu_example_double$(EXEEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ lu/lu_implicit_example_float$(EXEEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ lu/lu_implicit_example_double$(EXEEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ cholesky/cholesky_tag$(EXEEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ cholesky/cholesky_tile_tag$(EXEEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ cholesky/cholesky_implicit$(EXEEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ cholesky/cholesky_compil$(EXEEXT) +@STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@am__EXEEXT_9 = axpy/axpy$(EXEEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@ cholesky/cholesky_grain_tag$(EXEEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@ heat/heat$(EXEEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@ cg/cg$(EXEEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@ pipeline/pipeline$(EXEEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@ transactions/trs_sgemm$(EXEEXT) +@STARPU_MKL_BLAS_LIB_TRUE@@STARPU_SIMGRID_FALSE@am__EXEEXT_10 = lu/lu_example_complex_float$(EXEEXT) \ +@STARPU_MKL_BLAS_LIB_TRUE@@STARPU_SIMGRID_FALSE@ lu/lu_example_complex_double$(EXEEXT) \ +@STARPU_MKL_BLAS_LIB_TRUE@@STARPU_SIMGRID_FALSE@ lu/lu_implicit_example_complex_float$(EXEEXT) \ +@STARPU_MKL_BLAS_LIB_TRUE@@STARPU_SIMGRID_FALSE@ lu/lu_implicit_example_complex_double$(EXEEXT) +@STARPU_HAVE_CBLAS_H_TRUE@@STARPU_HAVE_CBLAS_SGEMV_TRUE@@STARPU_SIMGRID_FALSE@am__EXEEXT_11 = spmv/dw_block_spmv$(EXEEXT) +@STARPU_HAVE_OPENMP_TRUE@@STARPU_SIMGRID_FALSE@am__EXEEXT_12 = openmp/vector_scal_omp$(EXEEXT) \ +@STARPU_HAVE_OPENMP_TRUE@@STARPU_SIMGRID_FALSE@ sched_ctx/sched_ctx_without_sched_policy$(EXEEXT) \ +@STARPU_HAVE_OPENMP_TRUE@@STARPU_SIMGRID_FALSE@ sched_ctx/nested_sched_ctxs$(EXEEXT) \ +@STARPU_HAVE_OPENMP_TRUE@@STARPU_SIMGRID_FALSE@ sched_ctx/sched_ctx_without_sched_policy_awake$(EXEEXT) \ +@STARPU_HAVE_OPENMP_TRUE@@STARPU_SIMGRID_FALSE@ sched_ctx/parallel_tasks_reuse_handle$(EXEEXT) \ +@STARPU_HAVE_OPENMP_TRUE@@STARPU_SIMGRID_FALSE@ sched_ctx/parallel_code$(EXEEXT) +@STARPU_HAVE_HWLOC_TRUE@@STARPU_HAVE_OPENMP_TRUE@@STARPU_HWLOC_HAVE_TOPOLOGY_DUP_TRUE@@STARPU_SIMGRID_FALSE@am__EXEEXT_13 = parallel_workers/parallel_workers$(EXEEXT) \ +@STARPU_HAVE_HWLOC_TRUE@@STARPU_HAVE_OPENMP_TRUE@@STARPU_HWLOC_HAVE_TOPOLOGY_DUP_TRUE@@STARPU_SIMGRID_FALSE@ parallel_workers/parallel_workers_func$(EXEEXT) \ +@STARPU_HAVE_HWLOC_TRUE@@STARPU_HAVE_OPENMP_TRUE@@STARPU_HWLOC_HAVE_TOPOLOGY_DUP_TRUE@@STARPU_SIMGRID_FALSE@ parallel_workers/parallel_workers_oldapi$(EXEEXT) +@STARPU_USE_CUDA_TRUE@am__EXEEXT_14 = \ +@STARPU_USE_CUDA_TRUE@ sched_ctx/gpu_partition$(EXEEXT) +am__EXEEXT_15 = sched_ctx/prio$(EXEEXT) scheduler/dummy_sched$(EXEEXT) \ + scheduler/dummy_modular_sched$(EXEEXT) \ + worker_collections/worker_list_example$(EXEEXT) \ + api/bcsr_data_interface$(EXEEXT) \ + api/block_data_interface$(EXEEXT) \ + api/coo_data_interface$(EXEEXT) \ + api/csr_data_interface$(EXEEXT) \ + api/matrix_data_interface$(EXEEXT) \ + api/multiformat_data_interface$(EXEEXT) \ + api/tensor_data_interface$(EXEEXT) \ + api/variable_data_interface$(EXEEXT) \ + api/vector_data_interface$(EXEEXT) \ + api/void_data_interface$(EXEEXT) $(am__EXEEXT_1) \ + $(am__EXEEXT_2) $(am__EXEEXT_3) $(am__EXEEXT_4) \ + $(am__EXEEXT_5) $(am__EXEEXT_6) $(am__EXEEXT_7) \ + $(am__EXEEXT_8) $(am__EXEEXT_9) $(am__EXEEXT_10) \ + $(am__EXEEXT_11) $(am__EXEEXT_3) $(am__EXEEXT_4) \ + $(am__EXEEXT_12) $(am__EXEEXT_13) $(am__EXEEXT_14) +@STARPU_USE_CUDA_TRUE@am__EXEEXT_16 = mult/sgemm_layout$(EXEEXT) \ +@STARPU_USE_CUDA_TRUE@ mult/dgemm_layout$(EXEEXT) +@STARPU_NO_BLAS_LIB_FALSE@am__EXEEXT_17 = mult/sgemm_layout$(EXEEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ mult/dgemm_layout$(EXEEXT) +@STARPU_HAVE_WINDOWS_FALSE@am__EXEEXT_18 = pi/pi$(EXEEXT) \ +@STARPU_HAVE_WINDOWS_FALSE@ pi/pi_redux$(EXEEXT) +@STARPU_HAVE_OPENGL_TRUE@am__EXEEXT_19 = \ +@STARPU_HAVE_OPENGL_TRUE@ gl_interop/gl_interop$(EXEEXT) \ +@STARPU_HAVE_OPENGL_TRUE@ gl_interop/gl_interop_idle$(EXEEXT) +am__installdirs = "$(DESTDIR)$(examplebindir)" \ + "$(DESTDIR)$(pkglibdir)" "$(DESTDIR)$(STARPU_OPENCL_DATAdir)" +@STARPU_HAVE_WINDOWS_FALSE@am__EXEEXT_20 = loader$(EXEEXT) +PROGRAMS = $(examplebin_PROGRAMS) $(noinst_PROGRAMS) +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +LTLIBRARIES = $(pkglib_LTLIBRARIES) +cholesky_libmy_dmda_la_LIBADD = +cholesky_libmy_dmda_la_SOURCES = cholesky/libmy_dmda.c +am__dirstamp = $(am__leading_dot)dirstamp +cholesky_libmy_dmda_la_OBJECTS = cholesky/libmy_dmda.lo +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +cholesky_libmy_dmda_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ + $(AM_CFLAGS) $(CFLAGS) $(cholesky_libmy_dmda_la_LDFLAGS) \ + $(LDFLAGS) -o $@ +@STARPU_NO_BLAS_LIB_FALSE@am_cholesky_libmy_dmda_la_rpath = -rpath \ +@STARPU_NO_BLAS_LIB_FALSE@ $(pkglibdir) +profiling_tool_libprofiling_tool_la_LIBADD = +profiling_tool_libprofiling_tool_la_SOURCES = \ + profiling_tool/libprofiling_tool.c +profiling_tool_libprofiling_tool_la_OBJECTS = \ + profiling_tool/libprofiling_tool.lo +profiling_tool_libprofiling_tool_la_LINK = $(LIBTOOL) $(AM_V_lt) \ + --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link \ + $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(profiling_tool_libprofiling_tool_la_LDFLAGS) $(LDFLAGS) -o \ + $@ +@STARPU_SIMGRID_FALSE@am_profiling_tool_libprofiling_tool_la_rpath = \ +@STARPU_SIMGRID_FALSE@ -rpath $(pkglibdir) +scheduler_libdummy_sched_la_LIBADD = +scheduler_libdummy_sched_la_SOURCES = scheduler/libdummy_sched.c +scheduler_libdummy_sched_la_OBJECTS = scheduler/libdummy_sched.lo +scheduler_libdummy_sched_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ + $(AM_CFLAGS) $(CFLAGS) $(scheduler_libdummy_sched_la_LDFLAGS) \ + $(LDFLAGS) -o $@ +@STARPU_SIMGRID_FALSE@am_scheduler_libdummy_sched_la_rpath = -rpath \ +@STARPU_SIMGRID_FALSE@ $(pkglibdir) +api_bcsr_data_interface_SOURCES = api/bcsr_data_interface.c +api_bcsr_data_interface_OBJECTS = api/bcsr_data_interface.$(OBJEXT) +api_bcsr_data_interface_LDADD = $(LDADD) +api_block_data_interface_SOURCES = api/block_data_interface.c +api_block_data_interface_OBJECTS = api/block_data_interface.$(OBJEXT) +api_block_data_interface_LDADD = $(LDADD) +api_coo_data_interface_SOURCES = api/coo_data_interface.c +api_coo_data_interface_OBJECTS = api/coo_data_interface.$(OBJEXT) +api_coo_data_interface_LDADD = $(LDADD) +api_csr_data_interface_SOURCES = api/csr_data_interface.c +api_csr_data_interface_OBJECTS = api/csr_data_interface.$(OBJEXT) +api_csr_data_interface_LDADD = $(LDADD) +api_matrix_data_interface_SOURCES = api/matrix_data_interface.c +api_matrix_data_interface_OBJECTS = \ + api/matrix_data_interface.$(OBJEXT) +api_matrix_data_interface_LDADD = $(LDADD) +api_multiformat_data_interface_SOURCES = \ + api/multiformat_data_interface.c +api_multiformat_data_interface_OBJECTS = \ + api/multiformat_data_interface.$(OBJEXT) +api_multiformat_data_interface_LDADD = $(LDADD) +api_tensor_data_interface_SOURCES = api/tensor_data_interface.c +api_tensor_data_interface_OBJECTS = \ + api/tensor_data_interface.$(OBJEXT) +api_tensor_data_interface_LDADD = $(LDADD) +api_variable_data_interface_SOURCES = api/variable_data_interface.c +api_variable_data_interface_OBJECTS = \ + api/variable_data_interface.$(OBJEXT) +api_variable_data_interface_LDADD = $(LDADD) +api_vector_data_interface_SOURCES = api/vector_data_interface.c +api_vector_data_interface_OBJECTS = \ + api/vector_data_interface.$(OBJEXT) +api_vector_data_interface_LDADD = $(LDADD) +api_void_data_interface_SOURCES = api/void_data_interface.c +api_void_data_interface_OBJECTS = api/void_data_interface.$(OBJEXT) +api_void_data_interface_LDADD = $(LDADD) +am__axpy_axpy_SOURCES_DIST = axpy/axpy.c common/blas.c \ + axpy/axpy_opencl.c +@STARPU_NO_BLAS_LIB_FALSE@@STARPU_USE_OPENCL_TRUE@am__objects_1 = axpy/axpy_opencl.$(OBJEXT) +@STARPU_NO_BLAS_LIB_FALSE@am_axpy_axpy_OBJECTS = axpy/axpy.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ common/blas.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ $(am__objects_1) +axpy_axpy_OBJECTS = $(am_axpy_axpy_OBJECTS) +am__DEPENDENCIES_1 = +@STARPU_NO_BLAS_LIB_FALSE@axpy_axpy_DEPENDENCIES = \ +@STARPU_NO_BLAS_LIB_FALSE@ $(am__DEPENDENCIES_1) +am__basic_examples_block_SOURCES_DIST = basic_examples/block.c \ + basic_examples/block_cpu.c basic_examples/block_cuda.cu \ + basic_examples/block_hip.hip basic_examples/block_opencl.c +@STARPU_USE_CUDA_TRUE@am__objects_2 = \ +@STARPU_USE_CUDA_TRUE@ basic_examples/block_cuda.$(OBJEXT) +@STARPU_USE_HIP_TRUE@am__objects_3 = \ +@STARPU_USE_HIP_TRUE@ basic_examples/block_hip.$(OBJEXT) +@STARPU_USE_OPENCL_TRUE@am__objects_4 = \ +@STARPU_USE_OPENCL_TRUE@ basic_examples/block_opencl.$(OBJEXT) +am_basic_examples_block_OBJECTS = basic_examples/block.$(OBJEXT) \ + basic_examples/block_cpu.$(OBJEXT) $(am__objects_2) \ + $(am__objects_3) $(am__objects_4) +basic_examples_block_OBJECTS = $(am_basic_examples_block_OBJECTS) +basic_examples_block_LDADD = $(LDADD) +basic_examples_dynamic_handles_SOURCES = \ + basic_examples/dynamic_handles.c +basic_examples_dynamic_handles_OBJECTS = \ + basic_examples/dynamic_handles.$(OBJEXT) +basic_examples_dynamic_handles_LDADD = $(LDADD) +basic_examples_hello_world_SOURCES = basic_examples/hello_world.c +basic_examples_hello_world_OBJECTS = \ + basic_examples/hello_world.$(OBJEXT) +basic_examples_hello_world_LDADD = $(LDADD) +basic_examples_hooks_SOURCES = basic_examples/hooks.c +basic_examples_hooks_OBJECTS = basic_examples/hooks.$(OBJEXT) +basic_examples_hooks_LDADD = $(LDADD) +am__basic_examples_mult_SOURCES_DIST = basic_examples/mult.c \ + basic_examples/mult_cuda.cu basic_examples/mult_hip.hip +@STARPU_USE_CUDA_TRUE@am__objects_5 = \ +@STARPU_USE_CUDA_TRUE@ basic_examples/mult_cuda.$(OBJEXT) +@STARPU_USE_HIP_TRUE@am__objects_6 = \ +@STARPU_USE_HIP_TRUE@ basic_examples/mult_hip.$(OBJEXT) +am_basic_examples_mult_OBJECTS = basic_examples/mult.$(OBJEXT) \ + $(am__objects_5) $(am__objects_6) +basic_examples_mult_OBJECTS = $(am_basic_examples_mult_OBJECTS) +basic_examples_mult_LDADD = $(LDADD) +am__basic_examples_multiformat_SOURCES_DIST = \ + basic_examples/multiformat.c \ + basic_examples/multiformat_conversion_codelets.c \ + basic_examples/multiformat_cuda.cu \ + basic_examples/multiformat_conversion_codelets_cuda.cu \ + basic_examples/multiformat_opencl.c \ + basic_examples/multiformat_conversion_codelets_opencl.c +@STARPU_USE_CUDA_TRUE@am__objects_7 = basic_examples/multiformat_cuda.$(OBJEXT) \ +@STARPU_USE_CUDA_TRUE@ basic_examples/multiformat_conversion_codelets_cuda.$(OBJEXT) +@STARPU_USE_OPENCL_TRUE@am__objects_8 = basic_examples/multiformat_opencl.$(OBJEXT) \ +@STARPU_USE_OPENCL_TRUE@ basic_examples/multiformat_conversion_codelets_opencl.$(OBJEXT) +am_basic_examples_multiformat_OBJECTS = \ + basic_examples/multiformat.$(OBJEXT) \ + basic_examples/multiformat_conversion_codelets.$(OBJEXT) \ + $(am__objects_7) $(am__objects_8) +basic_examples_multiformat_OBJECTS = \ + $(am_basic_examples_multiformat_OBJECTS) +basic_examples_multiformat_LDADD = $(LDADD) +basic_examples_ndim_SOURCES = basic_examples/ndim.c +basic_examples_ndim_OBJECTS = basic_examples/ndim.$(OBJEXT) +basic_examples_ndim_LDADD = $(LDADD) +basic_examples_task_insert_color_SOURCES = \ + basic_examples/task_insert_color.c +basic_examples_task_insert_color_OBJECTS = \ + basic_examples/task_insert_color.$(OBJEXT) +basic_examples_task_insert_color_LDADD = $(LDADD) +basic_examples_topology_SOURCES = basic_examples/topology.c +basic_examples_topology_OBJECTS = basic_examples/topology.$(OBJEXT) +basic_examples_topology_LDADD = $(LDADD) +am__basic_examples_variable_SOURCES_DIST = basic_examples/variable.c \ + basic_examples/variable_kernels_cpu.c \ + basic_examples/variable_kernels.cu \ + basic_examples/variable_kernels_opencl.c +@STARPU_USE_CUDA_TRUE@am__objects_9 = basic_examples/variable_kernels.$(OBJEXT) +@STARPU_USE_OPENCL_TRUE@am__objects_10 = basic_examples/variable_kernels_opencl.$(OBJEXT) +am_basic_examples_variable_OBJECTS = \ + basic_examples/variable.$(OBJEXT) \ + basic_examples/variable_kernels_cpu.$(OBJEXT) $(am__objects_9) \ + $(am__objects_10) +basic_examples_variable_OBJECTS = \ + $(am_basic_examples_variable_OBJECTS) +basic_examples_variable_LDADD = $(LDADD) +am__basic_examples_vector_scal_SOURCES_DIST = \ + basic_examples/vector_scal.c basic_examples/vector_scal_cpu.c \ + basic_examples/vector_scal_cpu_icc.icc \ + basic_examples/vector_scal_cuda.cu \ + basic_examples/vector_scal_hip.hip \ + basic_examples/vector_scal_opencl.c +@STARPU_CROSS_COMPILING_FALSE@@STARPU_HAVE_ICC_TRUE@am__objects_11 = basic_examples/vector_scal_cpu_icc.$(OBJEXT) +@STARPU_USE_CUDA_TRUE@am__objects_12 = basic_examples/vector_scal_cuda.$(OBJEXT) +@STARPU_USE_HIP_TRUE@am__objects_13 = \ +@STARPU_USE_HIP_TRUE@ basic_examples/vector_scal_hip.$(OBJEXT) +@STARPU_USE_OPENCL_TRUE@am__objects_14 = basic_examples/vector_scal_opencl.$(OBJEXT) +am_basic_examples_vector_scal_OBJECTS = \ + basic_examples/vector_scal.$(OBJEXT) \ + basic_examples/vector_scal_cpu.$(OBJEXT) $(am__objects_11) \ + $(am__objects_12) $(am__objects_13) $(am__objects_14) +basic_examples_vector_scal_OBJECTS = \ + $(am_basic_examples_vector_scal_OBJECTS) +basic_examples_vector_scal_LDADD = $(LDADD) +am__basic_examples_vector_scal_fortran_SOURCES_DIST = \ + basic_examples/vector_scal_fortran.F \ + basic_examples/vector_scal_c.c \ + basic_examples/vector_scal_cpu.c \ + basic_examples/vector_scal_cuda.cu +@STARPU_HAVE_F77_TRUE@@STARPU_USE_CUDA_TRUE@am__objects_15 = basic_examples/vector_scal_cuda.$(OBJEXT) +@STARPU_HAVE_F77_TRUE@am_basic_examples_vector_scal_fortran_OBJECTS = basic_examples/vector_scal_fortran.$(OBJEXT) \ +@STARPU_HAVE_F77_TRUE@ basic_examples/vector_scal_c.$(OBJEXT) \ +@STARPU_HAVE_F77_TRUE@ basic_examples/vector_scal_cpu.$(OBJEXT) \ +@STARPU_HAVE_F77_TRUE@ $(am__objects_15) +basic_examples_vector_scal_fortran_OBJECTS = \ + $(am_basic_examples_vector_scal_fortran_OBJECTS) +@STARPU_HAVE_F77_TRUE@@STARPU_USE_CUDA_TRUE@basic_examples_vector_scal_fortran_DEPENDENCIES = $(am__DEPENDENCIES_1) +am__binary_binary_SOURCES_DIST = binary/binary.c \ + incrementer/incrementer_kernels_opencl.c +@STARPU_USE_OPENCL_TRUE@am__objects_16 = incrementer/incrementer_kernels_opencl.$(OBJEXT) +am_binary_binary_OBJECTS = binary/binary.$(OBJEXT) $(am__objects_16) +binary_binary_OBJECTS = $(am_binary_binary_OBJECTS) +binary_binary_LDADD = $(LDADD) +callback_callback_SOURCES = callback/callback.c +callback_callback_OBJECTS = callback/callback.$(OBJEXT) +callback_callback_LDADD = $(LDADD) +callback_prologue_SOURCES = callback/prologue.c +callback_prologue_OBJECTS = callback/prologue.$(OBJEXT) +callback_prologue_LDADD = $(LDADD) +am__cg_cg_SOURCES_DIST = cg/cg.c common/blas.c +@STARPU_NO_BLAS_LIB_FALSE@am_cg_cg_OBJECTS = cg/cg.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ common/blas.$(OBJEXT) +cg_cg_OBJECTS = $(am_cg_cg_OBJECTS) +@STARPU_NO_BLAS_LIB_FALSE@cg_cg_DEPENDENCIES = $(am__DEPENDENCIES_1) +am__cholesky_cholesky_compil_SOURCES_DIST = \ + cholesky/cholesky_compil.c cholesky/cholesky_models.c \ + cholesky/cholesky_kernels.c sched_ctx_utils/sched_ctx_utils.c \ + common/blas.c +@STARPU_NO_BLAS_LIB_FALSE@am_cholesky_cholesky_compil_OBJECTS = \ +@STARPU_NO_BLAS_LIB_FALSE@ cholesky/cholesky_compil.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ cholesky/cholesky_models.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ cholesky/cholesky_kernels.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ sched_ctx_utils/sched_ctx_utils.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ common/blas.$(OBJEXT) +cholesky_cholesky_compil_OBJECTS = \ + $(am_cholesky_cholesky_compil_OBJECTS) +@STARPU_NO_BLAS_LIB_FALSE@cholesky_cholesky_compil_DEPENDENCIES = \ +@STARPU_NO_BLAS_LIB_FALSE@ $(am__DEPENDENCIES_1) +am__cholesky_cholesky_grain_tag_SOURCES_DIST = \ + cholesky/cholesky_grain_tag.c cholesky/cholesky_models.c \ + cholesky/cholesky_kernels.c common/blas.c +@STARPU_NO_BLAS_LIB_FALSE@am_cholesky_cholesky_grain_tag_OBJECTS = cholesky/cholesky_grain_tag.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ cholesky/cholesky_models.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ cholesky/cholesky_kernels.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ common/blas.$(OBJEXT) +cholesky_cholesky_grain_tag_OBJECTS = \ + $(am_cholesky_cholesky_grain_tag_OBJECTS) +@STARPU_NO_BLAS_LIB_FALSE@cholesky_cholesky_grain_tag_DEPENDENCIES = \ +@STARPU_NO_BLAS_LIB_FALSE@ $(am__DEPENDENCIES_1) +am__cholesky_cholesky_implicit_SOURCES_DIST = \ + cholesky/cholesky_implicit.c cholesky/cholesky_models.c \ + cholesky/cholesky_kernels.c sched_ctx_utils/sched_ctx_utils.c \ + common/blas.c +@STARPU_NO_BLAS_LIB_FALSE@am_cholesky_cholesky_implicit_OBJECTS = cholesky/cholesky_implicit.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ cholesky/cholesky_models.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ cholesky/cholesky_kernels.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ sched_ctx_utils/sched_ctx_utils.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ common/blas.$(OBJEXT) +cholesky_cholesky_implicit_OBJECTS = \ + $(am_cholesky_cholesky_implicit_OBJECTS) +@STARPU_NO_BLAS_LIB_FALSE@cholesky_cholesky_implicit_DEPENDENCIES = \ +@STARPU_NO_BLAS_LIB_FALSE@ $(am__DEPENDENCIES_1) +am__cholesky_cholesky_tag_SOURCES_DIST = cholesky/cholesky_tag.c \ + cholesky/cholesky_models.c cholesky/cholesky_kernels.c \ + common/blas.c +@STARPU_NO_BLAS_LIB_FALSE@am_cholesky_cholesky_tag_OBJECTS = \ +@STARPU_NO_BLAS_LIB_FALSE@ cholesky/cholesky_tag.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ cholesky/cholesky_models.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ cholesky/cholesky_kernels.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ common/blas.$(OBJEXT) +cholesky_cholesky_tag_OBJECTS = $(am_cholesky_cholesky_tag_OBJECTS) +@STARPU_NO_BLAS_LIB_FALSE@cholesky_cholesky_tag_DEPENDENCIES = \ +@STARPU_NO_BLAS_LIB_FALSE@ $(am__DEPENDENCIES_1) +am__cholesky_cholesky_tile_tag_SOURCES_DIST = \ + cholesky/cholesky_tile_tag.c cholesky/cholesky_models.c \ + cholesky/cholesky_kernels.c common/blas.c +@STARPU_NO_BLAS_LIB_FALSE@am_cholesky_cholesky_tile_tag_OBJECTS = cholesky/cholesky_tile_tag.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ cholesky/cholesky_models.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ cholesky/cholesky_kernels.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ common/blas.$(OBJEXT) +cholesky_cholesky_tile_tag_OBJECTS = \ + $(am_cholesky_cholesky_tile_tag_OBJECTS) +@STARPU_NO_BLAS_LIB_FALSE@cholesky_cholesky_tile_tag_DEPENDENCIES = \ +@STARPU_NO_BLAS_LIB_FALSE@ $(am__DEPENDENCIES_1) +am_cpp_add_vectors_OBJECTS = cpp/add_vectors.$(OBJEXT) +cpp_add_vectors_OBJECTS = $(am_cpp_add_vectors_OBJECTS) +cpp_add_vectors_LDADD = $(LDADD) +am__cpp_add_vectors_cpp11_SOURCES_DIST = cpp/add_vectors_cpp11.cpp +@STARPU_HAVE_CXX11_TRUE@am_cpp_add_vectors_cpp11_OBJECTS = \ +@STARPU_HAVE_CXX11_TRUE@ cpp/add_vectors_cpp11.$(OBJEXT) +cpp_add_vectors_cpp11_OBJECTS = $(am_cpp_add_vectors_cpp11_OBJECTS) +cpp_add_vectors_cpp11_LDADD = $(LDADD) +am_cpp_add_vectors_interface_OBJECTS = \ + cpp/add_vectors_interface.$(OBJEXT) +cpp_add_vectors_interface_OBJECTS = \ + $(am_cpp_add_vectors_interface_OBJECTS) +cpp_add_vectors_interface_LDADD = $(LDADD) +am__cpp_incrementer_cpp_SOURCES_DIST = cpp/incrementer_cpp.cpp \ + incrementer/incrementer_kernels.cu \ + incrementer/incrementer_kernels_opencl.c +@STARPU_USE_CUDA_TRUE@am__objects_17 = incrementer/incrementer_kernels.$(OBJEXT) +am_cpp_incrementer_cpp_OBJECTS = cpp/incrementer_cpp.$(OBJEXT) \ + $(am__objects_17) $(am__objects_16) +cpp_incrementer_cpp_OBJECTS = $(am_cpp_incrementer_cpp_OBJECTS) +cpp_incrementer_cpp_LDADD = $(LDADD) +dependency_sequential_consistency_SOURCES = \ + dependency/sequential_consistency.c +dependency_sequential_consistency_OBJECTS = \ + dependency/sequential_consistency.$(OBJEXT) +dependency_sequential_consistency_LDADD = $(LDADD) +dependency_task_end_dep_SOURCES = dependency/task_end_dep.c +dependency_task_end_dep_OBJECTS = dependency/task_end_dep.$(OBJEXT) +dependency_task_end_dep_LDADD = $(LDADD) +dependency_task_end_dep_add_SOURCES = dependency/task_end_dep_add.c +dependency_task_end_dep_add_OBJECTS = \ + dependency/task_end_dep_add.$(OBJEXT) +dependency_task_end_dep_add_LDADD = $(LDADD) +filters_alloc_SOURCES = filters/alloc.c +filters_alloc_OBJECTS = filters/alloc.$(OBJEXT) +filters_alloc_LDADD = $(LDADD) +am__filters_custom_mf_custom_mf_filter_SOURCES_DIST = \ + filters/custom_mf/custom_mf_filter.c \ + filters/custom_mf/custom_interface.c \ + filters/custom_mf/custom_conversion_codelets.c \ + filters/custom_mf/conversion.cu filters/custom_mf/cuda.cu \ + filters/custom_mf/conversion_opencl.c \ + filters/custom_mf/custom_opencl.c +@STARPU_USE_CUDA_TRUE@am__objects_18 = \ +@STARPU_USE_CUDA_TRUE@ filters/custom_mf/conversion.$(OBJEXT) \ +@STARPU_USE_CUDA_TRUE@ filters/custom_mf/cuda.$(OBJEXT) +@STARPU_USE_OPENCL_TRUE@am__objects_19 = filters/custom_mf/conversion_opencl.$(OBJEXT) \ +@STARPU_USE_OPENCL_TRUE@ filters/custom_mf/custom_opencl.$(OBJEXT) +am_filters_custom_mf_custom_mf_filter_OBJECTS = \ + filters/custom_mf/custom_mf_filter.$(OBJEXT) \ + filters/custom_mf/custom_interface.$(OBJEXT) \ + filters/custom_mf/custom_conversion_codelets.$(OBJEXT) \ + $(am__objects_18) $(am__objects_19) +filters_custom_mf_custom_mf_filter_OBJECTS = \ + $(am_filters_custom_mf_custom_mf_filter_OBJECTS) +filters_custom_mf_custom_mf_filter_LDADD = $(LDADD) +am__filters_fblock_SOURCES_DIST = filters/fblock.c \ + filters/fblock_print.c filters/fblock_cpu.c \ + filters/fblock_cuda.cu filters/fblock_hip.hip \ + filters/fblock_opencl.c +@STARPU_USE_CUDA_TRUE@am__objects_20 = filters/fblock_cuda.$(OBJEXT) +@STARPU_USE_HIP_TRUE@am__objects_21 = filters/fblock_hip.$(OBJEXT) +@STARPU_USE_OPENCL_TRUE@am__objects_22 = \ +@STARPU_USE_OPENCL_TRUE@ filters/fblock_opencl.$(OBJEXT) +am_filters_fblock_OBJECTS = filters/fblock.$(OBJEXT) \ + filters/fblock_print.$(OBJEXT) filters/fblock_cpu.$(OBJEXT) \ + $(am__objects_20) $(am__objects_21) $(am__objects_22) +filters_fblock_OBJECTS = $(am_filters_fblock_OBJECTS) +filters_fblock_LDADD = $(LDADD) +am__filters_fblock_pick_matrix_SOURCES_DIST = \ + filters/fblock_pick_matrix.c filters/fblock_print.c \ + filters/fmatrix_print.c filters/fmatrix_cpu.c \ + filters/fmatrix_cuda.cu filters/fmatrix_hip.hip +@STARPU_USE_CUDA_TRUE@am__objects_23 = filters/fmatrix_cuda.$(OBJEXT) +@STARPU_USE_HIP_TRUE@am__objects_24 = filters/fmatrix_hip.$(OBJEXT) +am_filters_fblock_pick_matrix_OBJECTS = \ + filters/fblock_pick_matrix.$(OBJEXT) \ + filters/fblock_print.$(OBJEXT) filters/fmatrix_print.$(OBJEXT) \ + filters/fmatrix_cpu.$(OBJEXT) $(am__objects_23) \ + $(am__objects_24) +filters_fblock_pick_matrix_OBJECTS = \ + $(am_filters_fblock_pick_matrix_OBJECTS) +filters_fblock_pick_matrix_LDADD = $(LDADD) +am__filters_fblock_pick_variable_SOURCES_DIST = \ + filters/fblock_pick_variable.c filters/fblock_print.c \ + filters/fvariable_cuda.cu +@STARPU_USE_CUDA_TRUE@am__objects_25 = \ +@STARPU_USE_CUDA_TRUE@ filters/fvariable_cuda.$(OBJEXT) +am_filters_fblock_pick_variable_OBJECTS = \ + filters/fblock_pick_variable.$(OBJEXT) \ + filters/fblock_print.$(OBJEXT) $(am__objects_25) +filters_fblock_pick_variable_OBJECTS = \ + $(am_filters_fblock_pick_variable_OBJECTS) +filters_fblock_pick_variable_LDADD = $(LDADD) +am__filters_fmatrix_SOURCES_DIST = filters/fmatrix.c \ + filters/fmatrix_print.c filters/fmatrix_cpu.c \ + filters/fmatrix_cuda.cu filters/fmatrix_hip.hip +am_filters_fmatrix_OBJECTS = filters/fmatrix.$(OBJEXT) \ + filters/fmatrix_print.$(OBJEXT) filters/fmatrix_cpu.$(OBJEXT) \ + $(am__objects_23) $(am__objects_24) +filters_fmatrix_OBJECTS = $(am_filters_fmatrix_OBJECTS) +filters_fmatrix_LDADD = $(LDADD) +am__filters_fmatrix_pick_variable_SOURCES_DIST = \ + filters/fmatrix_pick_variable.c filters/fmatrix_print.c \ + filters/fvariable_cuda.cu +am_filters_fmatrix_pick_variable_OBJECTS = \ + filters/fmatrix_pick_variable.$(OBJEXT) \ + filters/fmatrix_print.$(OBJEXT) $(am__objects_25) +filters_fmatrix_pick_variable_OBJECTS = \ + $(am_filters_fmatrix_pick_variable_OBJECTS) +filters_fmatrix_pick_variable_LDADD = $(LDADD) +am__filters_fmatrix_pick_vector_SOURCES_DIST = \ + filters/fmatrix_pick_vector.c filters/fmatrix_print.c \ + filters/fvector_cpu.c filters/fvector_cuda.cu \ + filters/fvector_hip.hip +@STARPU_USE_CUDA_TRUE@am__objects_26 = filters/fvector_cuda.$(OBJEXT) +@STARPU_USE_HIP_TRUE@am__objects_27 = filters/fvector_hip.$(OBJEXT) +am_filters_fmatrix_pick_vector_OBJECTS = \ + filters/fmatrix_pick_vector.$(OBJEXT) \ + filters/fmatrix_print.$(OBJEXT) filters/fvector_cpu.$(OBJEXT) \ + $(am__objects_26) $(am__objects_27) +filters_fmatrix_pick_vector_OBJECTS = \ + $(am_filters_fmatrix_pick_vector_OBJECTS) +filters_fmatrix_pick_vector_LDADD = $(LDADD) +am__filters_fmultiple_manual_SOURCES_DIST = \ + filters/fmultiple_manual.c filters/fmultiple_cuda.cu \ + filters/fmultiple_hip.hip +@STARPU_USE_CUDA_TRUE@am__objects_28 = \ +@STARPU_USE_CUDA_TRUE@ filters/fmultiple_cuda.$(OBJEXT) +@STARPU_USE_HIP_TRUE@am__objects_29 = filters/fmultiple_hip.$(OBJEXT) +am_filters_fmultiple_manual_OBJECTS = \ + filters/fmultiple_manual.$(OBJEXT) $(am__objects_28) \ + $(am__objects_29) +filters_fmultiple_manual_OBJECTS = \ + $(am_filters_fmultiple_manual_OBJECTS) +filters_fmultiple_manual_LDADD = $(LDADD) +am__filters_fmultiple_submit_SOURCES_DIST = \ + filters/fmultiple_submit.c filters/fmultiple_cuda.cu \ + filters/fmultiple_hip.hip +am_filters_fmultiple_submit_OBJECTS = \ + filters/fmultiple_submit.$(OBJEXT) $(am__objects_28) \ + $(am__objects_29) +filters_fmultiple_submit_OBJECTS = \ + $(am_filters_fmultiple_submit_OBJECTS) +filters_fmultiple_submit_LDADD = $(LDADD) +am__filters_fmultiple_submit_implicit_SOURCES_DIST = \ + filters/fmultiple_submit_implicit.c filters/fmultiple_cuda.cu \ + filters/fmultiple_hip.hip +am_filters_fmultiple_submit_implicit_OBJECTS = \ + filters/fmultiple_submit_implicit.$(OBJEXT) $(am__objects_28) \ + $(am__objects_29) +filters_fmultiple_submit_implicit_OBJECTS = \ + $(am_filters_fmultiple_submit_implicit_OBJECTS) +filters_fmultiple_submit_implicit_LDADD = $(LDADD) +am__filters_fmultiple_submit_readonly_SOURCES_DIST = \ + filters/fmultiple_submit_readonly.c filters/fmultiple_cuda.cu \ + filters/fmultiple_hip.hip +am_filters_fmultiple_submit_readonly_OBJECTS = \ + filters/fmultiple_submit_readonly.$(OBJEXT) $(am__objects_28) \ + $(am__objects_29) +filters_fmultiple_submit_readonly_OBJECTS = \ + $(am_filters_fmultiple_submit_readonly_OBJECTS) +filters_fmultiple_submit_readonly_LDADD = $(LDADD) +am__filters_fmultiple_submit_readonly_downgrade_SOURCES_DIST = \ + filters/fmultiple_submit_readonly_downgrade.c \ + filters/fmultiple_cuda.cu filters/fmultiple_hip.hip +am_filters_fmultiple_submit_readonly_downgrade_OBJECTS = \ + filters/fmultiple_submit_readonly_downgrade.$(OBJEXT) \ + $(am__objects_28) $(am__objects_29) +filters_fmultiple_submit_readonly_downgrade_OBJECTS = \ + $(am_filters_fmultiple_submit_readonly_downgrade_OBJECTS) +filters_fmultiple_submit_readonly_downgrade_LDADD = $(LDADD) +am__filters_fndim_SOURCES_DIST = filters/fndim.c \ + filters/ftensor_print.c filters/f4d_cpu.c filters/f4d_cuda.cu \ + filters/f4d_hip.hip +@STARPU_USE_CUDA_TRUE@am__objects_30 = filters/f4d_cuda.$(OBJEXT) +@STARPU_USE_HIP_TRUE@am__objects_31 = filters/f4d_hip.$(OBJEXT) +am_filters_fndim_OBJECTS = filters/fndim.$(OBJEXT) \ + filters/ftensor_print.$(OBJEXT) filters/f4d_cpu.$(OBJEXT) \ + $(am__objects_30) $(am__objects_31) +filters_fndim_OBJECTS = $(am_filters_fndim_OBJECTS) +filters_fndim_LDADD = $(LDADD) +filters_fndim_1d_pick_variable_SOURCES = \ + filters/fndim_1d_pick_variable.c +filters_fndim_1d_pick_variable_OBJECTS = \ + filters/fndim_1d_pick_variable.$(OBJEXT) +filters_fndim_1d_pick_variable_LDADD = $(LDADD) +am__filters_fndim_2d_pick_vector_SOURCES_DIST = \ + filters/fndim_2d_pick_vector.c filters/fmatrix_print.c \ + filters/fvector_cpu.c filters/fvector_cuda.cu \ + filters/fvector_hip.hip +am_filters_fndim_2d_pick_vector_OBJECTS = \ + filters/fndim_2d_pick_vector.$(OBJEXT) \ + filters/fmatrix_print.$(OBJEXT) filters/fvector_cpu.$(OBJEXT) \ + $(am__objects_26) $(am__objects_27) +filters_fndim_2d_pick_vector_OBJECTS = \ + $(am_filters_fndim_2d_pick_vector_OBJECTS) +filters_fndim_2d_pick_vector_LDADD = $(LDADD) +am__filters_fndim_3d_pick_matrix_SOURCES_DIST = \ + filters/fndim_3d_pick_matrix.c filters/fblock_print.c \ + filters/fmatrix_print.c filters/fmatrix_cpu.c \ + filters/fmatrix_cuda.cu filters/fmatrix_hip.hip +am_filters_fndim_3d_pick_matrix_OBJECTS = \ + filters/fndim_3d_pick_matrix.$(OBJEXT) \ + filters/fblock_print.$(OBJEXT) filters/fmatrix_print.$(OBJEXT) \ + filters/fmatrix_cpu.$(OBJEXT) $(am__objects_23) \ + $(am__objects_24) +filters_fndim_3d_pick_matrix_OBJECTS = \ + $(am_filters_fndim_3d_pick_matrix_OBJECTS) +filters_fndim_3d_pick_matrix_LDADD = $(LDADD) +am__filters_fndim_4d_pick_block_SOURCES_DIST = \ + filters/fndim_4d_pick_block.c filters/ftensor_print.c \ + filters/fblock_print.c filters/fblock_cpu.c \ + filters/fblock_cuda.cu filters/fblock_hip.hip +am_filters_fndim_4d_pick_block_OBJECTS = \ + filters/fndim_4d_pick_block.$(OBJEXT) \ + filters/ftensor_print.$(OBJEXT) filters/fblock_print.$(OBJEXT) \ + filters/fblock_cpu.$(OBJEXT) $(am__objects_20) \ + $(am__objects_21) +filters_fndim_4d_pick_block_OBJECTS = \ + $(am_filters_fndim_4d_pick_block_OBJECTS) +filters_fndim_4d_pick_block_LDADD = $(LDADD) +am__filters_fndim_5d_pick_tensor_SOURCES_DIST = \ + filters/fndim_5d_pick_tensor.c filters/f5d_print.c \ + filters/ftensor_print.c filters/ftensor_cpu.c \ + filters/ftensor_cuda.cu filters/ftensor_hip.hip +@STARPU_USE_CUDA_TRUE@am__objects_32 = filters/ftensor_cuda.$(OBJEXT) +@STARPU_USE_HIP_TRUE@am__objects_33 = filters/ftensor_hip.$(OBJEXT) +am_filters_fndim_5d_pick_tensor_OBJECTS = \ + filters/fndim_5d_pick_tensor.$(OBJEXT) \ + filters/f5d_print.$(OBJEXT) filters/ftensor_print.$(OBJEXT) \ + filters/ftensor_cpu.$(OBJEXT) $(am__objects_32) \ + $(am__objects_33) +filters_fndim_5d_pick_tensor_OBJECTS = \ + $(am_filters_fndim_5d_pick_tensor_OBJECTS) +filters_fndim_5d_pick_tensor_LDADD = $(LDADD) +am__filters_fndim_pick_ndim_SOURCES_DIST = filters/fndim_pick_ndim.c \ + filters/ftensor_print.c filters/fblock_print.c \ + filters/f3d_cpu.c filters/f3d_cuda.cu filters/f3d_hip.hip +@STARPU_USE_CUDA_TRUE@am__objects_34 = filters/f3d_cuda.$(OBJEXT) +@STARPU_USE_HIP_TRUE@am__objects_35 = filters/f3d_hip.$(OBJEXT) +am_filters_fndim_pick_ndim_OBJECTS = \ + filters/fndim_pick_ndim.$(OBJEXT) \ + filters/ftensor_print.$(OBJEXT) filters/fblock_print.$(OBJEXT) \ + filters/f3d_cpu.$(OBJEXT) $(am__objects_34) $(am__objects_35) +filters_fndim_pick_ndim_OBJECTS = \ + $(am_filters_fndim_pick_ndim_OBJECTS) +filters_fndim_pick_ndim_LDADD = $(LDADD) +am_filters_fndim_pick_variable_OBJECTS = \ + filters/fndim_pick_variable.$(OBJEXT) \ + filters/f5d_print.$(OBJEXT) +filters_fndim_pick_variable_OBJECTS = \ + $(am_filters_fndim_pick_variable_OBJECTS) +filters_fndim_pick_variable_LDADD = $(LDADD) +am__filters_fndim_to_block_SOURCES_DIST = filters/fndim_to_block.c \ + filters/fblock_print.c filters/fblock_cpu.c \ + filters/fblock_cuda.cu filters/fblock_hip.hip +am_filters_fndim_to_block_OBJECTS = filters/fndim_to_block.$(OBJEXT) \ + filters/fblock_print.$(OBJEXT) filters/fblock_cpu.$(OBJEXT) \ + $(am__objects_20) $(am__objects_21) +filters_fndim_to_block_OBJECTS = $(am_filters_fndim_to_block_OBJECTS) +filters_fndim_to_block_LDADD = $(LDADD) +am__filters_fndim_to_matrix_SOURCES_DIST = filters/fndim_to_matrix.c \ + filters/fmatrix_print.c filters/fmatrix_cpu.c \ + filters/fmatrix_cuda.cu filters/fmatrix_hip.hip +am_filters_fndim_to_matrix_OBJECTS = \ + filters/fndim_to_matrix.$(OBJEXT) \ + filters/fmatrix_print.$(OBJEXT) filters/fmatrix_cpu.$(OBJEXT) \ + $(am__objects_23) $(am__objects_24) +filters_fndim_to_matrix_OBJECTS = \ + $(am_filters_fndim_to_matrix_OBJECTS) +filters_fndim_to_matrix_LDADD = $(LDADD) +am__filters_fndim_to_tensor_SOURCES_DIST = filters/fndim_to_tensor.c \ + filters/ftensor_print.c filters/ftensor_cpu.c \ + filters/ftensor_cuda.cu filters/ftensor_hip.hip +am_filters_fndim_to_tensor_OBJECTS = \ + filters/fndim_to_tensor.$(OBJEXT) \ + filters/ftensor_print.$(OBJEXT) filters/ftensor_cpu.$(OBJEXT) \ + $(am__objects_32) $(am__objects_33) +filters_fndim_to_tensor_OBJECTS = \ + $(am_filters_fndim_to_tensor_OBJECTS) +filters_fndim_to_tensor_LDADD = $(LDADD) +filters_fndim_to_variable_SOURCES = filters/fndim_to_variable.c +filters_fndim_to_variable_OBJECTS = \ + filters/fndim_to_variable.$(OBJEXT) +filters_fndim_to_variable_LDADD = $(LDADD) +am__filters_fndim_to_vector_SOURCES_DIST = filters/fndim_to_vector.c \ + filters/fvector_cpu.c filters/fvector_cuda.cu \ + filters/fvector_hip.hip +am_filters_fndim_to_vector_OBJECTS = \ + filters/fndim_to_vector.$(OBJEXT) \ + filters/fvector_cpu.$(OBJEXT) $(am__objects_26) \ + $(am__objects_27) +filters_fndim_to_vector_OBJECTS = \ + $(am_filters_fndim_to_vector_OBJECTS) +filters_fndim_to_vector_LDADD = $(LDADD) +filters_fread_SOURCES = filters/fread.c +filters_fread_OBJECTS = filters/fread.$(OBJEXT) +filters_fread_LDADD = $(LDADD) +filters_frecursive_SOURCES = filters/frecursive.c +filters_frecursive_OBJECTS = filters/frecursive.$(OBJEXT) +filters_frecursive_LDADD = $(LDADD) +am__filters_ftensor_SOURCES_DIST = filters/ftensor.c \ + filters/ftensor_print.c filters/ftensor_cpu.c \ + filters/ftensor_cuda.cu filters/ftensor_hip.hip +am_filters_ftensor_OBJECTS = filters/ftensor.$(OBJEXT) \ + filters/ftensor_print.$(OBJEXT) filters/ftensor_cpu.$(OBJEXT) \ + $(am__objects_32) $(am__objects_33) +filters_ftensor_OBJECTS = $(am_filters_ftensor_OBJECTS) +filters_ftensor_LDADD = $(LDADD) +am__filters_ftensor_pick_block_SOURCES_DIST = \ + filters/ftensor_pick_block.c filters/ftensor_print.c \ + filters/fblock_print.c filters/fblock_cpu.c \ + filters/fblock_cuda.cu filters/fblock_hip.hip +am_filters_ftensor_pick_block_OBJECTS = \ + filters/ftensor_pick_block.$(OBJEXT) \ + filters/ftensor_print.$(OBJEXT) filters/fblock_print.$(OBJEXT) \ + filters/fblock_cpu.$(OBJEXT) $(am__objects_20) \ + $(am__objects_21) +filters_ftensor_pick_block_OBJECTS = \ + $(am_filters_ftensor_pick_block_OBJECTS) +filters_ftensor_pick_block_LDADD = $(LDADD) +am__filters_ftensor_pick_variable_SOURCES_DIST = \ + filters/ftensor_pick_variable.c filters/ftensor_print.c \ + filters/fvariable_cuda.cu +am_filters_ftensor_pick_variable_OBJECTS = \ + filters/ftensor_pick_variable.$(OBJEXT) \ + filters/ftensor_print.$(OBJEXT) $(am__objects_25) +filters_ftensor_pick_variable_OBJECTS = \ + $(am_filters_ftensor_pick_variable_OBJECTS) +filters_ftensor_pick_variable_LDADD = $(LDADD) +am__filters_fvector_SOURCES_DIST = filters/fvector.c \ + filters/fvector_cpu.c filters/fvector_cuda.cu \ + filters/fvector_hip.hip +am_filters_fvector_OBJECTS = filters/fvector.$(OBJEXT) \ + filters/fvector_cpu.$(OBJEXT) $(am__objects_26) \ + $(am__objects_27) +filters_fvector_OBJECTS = $(am_filters_fvector_OBJECTS) +filters_fvector_LDADD = $(LDADD) +filters_fvector_pick_variable_SOURCES = \ + filters/fvector_pick_variable.c +filters_fvector_pick_variable_OBJECTS = \ + filters/fvector_pick_variable.$(OBJEXT) +filters_fvector_pick_variable_LDADD = $(LDADD) +filters_shadow_SOURCES = filters/shadow.c +filters_shadow_OBJECTS = filters/shadow.$(OBJEXT) +filters_shadow_LDADD = $(LDADD) +filters_shadow2d_SOURCES = filters/shadow2d.c +filters_shadow2d_OBJECTS = filters/shadow2d.$(OBJEXT) +filters_shadow2d_LDADD = $(LDADD) +filters_shadow3d_SOURCES = filters/shadow3d.c +filters_shadow3d_OBJECTS = filters/shadow3d.$(OBJEXT) +filters_shadow3d_LDADD = $(LDADD) +filters_shadow4d_SOURCES = filters/shadow4d.c +filters_shadow4d_OBJECTS = filters/shadow4d.$(OBJEXT) +filters_shadow4d_LDADD = $(LDADD) +filters_shadownd_SOURCES = filters/shadownd.c +filters_shadownd_OBJECTS = filters/shadownd.$(OBJEXT) +filters_shadownd_LDADD = $(LDADD) +am__fortran_hello_SOURCES_DIST = fortran/hello_c.c fortran/hello.F \ + fortran/starpu_fortran.h +@STARPU_HAVE_F77_H_TRUE@@STARPU_HAVE_F77_TRUE@am_fortran_hello_OBJECTS = fortran/hello_c.$(OBJEXT) \ +@STARPU_HAVE_F77_H_TRUE@@STARPU_HAVE_F77_TRUE@ fortran/hello.$(OBJEXT) +fortran_hello_OBJECTS = $(am_fortran_hello_OBJECTS) +fortran_hello_LDADD = $(LDADD) +am__fortran90_f90_example_SOURCES_DIST = fortran90/mod_types.f90 \ + fortran90/starpu_mod.f90 fortran90/mod_interface.f90 \ + fortran90/mod_compute.f90 fortran90/marshalling.c \ + fortran90/f90_example.f90 +@STARPU_HAVE_FC_TRUE@am_fortran90_f90_example_OBJECTS = \ +@STARPU_HAVE_FC_TRUE@ fortran90/mod_types.$(OBJEXT) \ +@STARPU_HAVE_FC_TRUE@ fortran90/starpu_mod.$(OBJEXT) \ +@STARPU_HAVE_FC_TRUE@ fortran90/mod_interface.$(OBJEXT) \ +@STARPU_HAVE_FC_TRUE@ fortran90/mod_compute.$(OBJEXT) \ +@STARPU_HAVE_FC_TRUE@ fortran90/marshalling.$(OBJEXT) \ +@STARPU_HAVE_FC_TRUE@ fortran90/f90_example.$(OBJEXT) +fortran90_f90_example_OBJECTS = $(am_fortran90_f90_example_OBJECTS) +fortran90_f90_example_LDADD = $(LDADD) +gl_interop_gl_interop_SOURCES = gl_interop/gl_interop.c +gl_interop_gl_interop_OBJECTS = gl_interop/gl_interop.$(OBJEXT) +@STARPU_HAVE_OPENGL_TRUE@gl_interop_gl_interop_DEPENDENCIES = \ +@STARPU_HAVE_OPENGL_TRUE@ $(am__DEPENDENCIES_1) +gl_interop_gl_interop_idle_SOURCES = gl_interop/gl_interop_idle.c +gl_interop_gl_interop_idle_OBJECTS = \ + gl_interop/gl_interop_idle.$(OBJEXT) +@STARPU_HAVE_OPENGL_TRUE@gl_interop_gl_interop_idle_DEPENDENCIES = \ +@STARPU_HAVE_OPENGL_TRUE@ $(am__DEPENDENCIES_1) +am__heat_heat_SOURCES_DIST = heat/heat.c heat/dw_factolu.c \ + heat/dw_factolu_tag.c heat/dw_factolu_grain.c \ + heat/dw_sparse_cg.c heat/heat_display.c \ + heat/lu_kernels_model.c heat/dw_sparse_cg_kernels.c \ + heat/dw_factolu_kernels.c common/blas.c +@STARPU_NO_BLAS_LIB_FALSE@am_heat_heat_OBJECTS = heat/heat.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ heat/dw_factolu.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ heat/dw_factolu_tag.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ heat/dw_factolu_grain.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ heat/dw_sparse_cg.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ heat/heat_display.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ heat/lu_kernels_model.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ heat/dw_sparse_cg_kernels.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ heat/dw_factolu_kernels.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ common/blas.$(OBJEXT) +heat_heat_OBJECTS = $(am_heat_heat_OBJECTS) +@STARPU_NO_BLAS_LIB_FALSE@heat_heat_DEPENDENCIES = \ +@STARPU_NO_BLAS_LIB_FALSE@ $(am__DEPENDENCIES_1) \ +@STARPU_NO_BLAS_LIB_FALSE@ $(am__DEPENDENCIES_1) +am__incrementer_incrementer_SOURCES_DIST = incrementer/incrementer.c \ + incrementer/incrementer_kernels.cu \ + incrementer/incrementer_kernels_opencl.c +am_incrementer_incrementer_OBJECTS = \ + incrementer/incrementer.$(OBJEXT) $(am__objects_17) \ + $(am__objects_16) +incrementer_incrementer_OBJECTS = \ + $(am_incrementer_incrementer_OBJECTS) +incrementer_incrementer_LDADD = $(LDADD) +am__interface_complex_SOURCES_DIST = interface/complex.c \ + interface/complex_interface.c interface/complex_filters.c \ + interface/complex_kernels.cu \ + interface/complex_kernels_opencl.c +@STARPU_USE_CUDA_TRUE@am__objects_36 = \ +@STARPU_USE_CUDA_TRUE@ interface/complex_kernels.$(OBJEXT) +@STARPU_USE_OPENCL_TRUE@am__objects_37 = interface/complex_kernels_opencl.$(OBJEXT) +am_interface_complex_OBJECTS = interface/complex.$(OBJEXT) \ + interface/complex_interface.$(OBJEXT) \ + interface/complex_filters.$(OBJEXT) $(am__objects_36) \ + $(am__objects_37) +interface_complex_OBJECTS = $(am_interface_complex_OBJECTS) +interface_complex_LDADD = $(LDADD) +am__interface_complex_dev_handle_complex_dev_handle_SOURCES_DIST = \ + interface/complex_dev_handle/complex_dev_handle.c \ + interface/complex_dev_handle/complex_dev_handle_interface.c \ + interface/complex_dev_handle/complex_dev_handle_filters.c \ + interface/complex_dev_handle/complex_dev_handle_kernels.cu \ + interface/complex_dev_handle/complex_dev_handle_kernels_opencl.c +@STARPU_USE_CUDA_TRUE@am__objects_38 = interface/complex_dev_handle/complex_dev_handle_kernels.$(OBJEXT) +@STARPU_USE_OPENCL_TRUE@am__objects_39 = interface/complex_dev_handle/complex_dev_handle_kernels_opencl.$(OBJEXT) +am_interface_complex_dev_handle_complex_dev_handle_OBJECTS = \ + interface/complex_dev_handle/complex_dev_handle.$(OBJEXT) \ + interface/complex_dev_handle/complex_dev_handle_interface.$(OBJEXT) \ + interface/complex_dev_handle/complex_dev_handle_filters.$(OBJEXT) \ + $(am__objects_38) $(am__objects_39) +interface_complex_dev_handle_complex_dev_handle_OBJECTS = \ + $(am_interface_complex_dev_handle_complex_dev_handle_OBJECTS) +interface_complex_dev_handle_complex_dev_handle_LDADD = $(LDADD) +loader_SOURCES = loader.c +loader_OBJECTS = loader-loader.$(OBJEXT) +loader_LDADD = $(LDADD) +am__lu_lu_example_complex_double_SOURCES_DIST = \ + lu/lu_example_complex_double.c lu/zlu.c lu/zlu_pivot.c \ + lu/zlu_kernels.c lu/blas_complex.c common/blas.c +@STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@am_lu_lu_example_complex_double_OBJECTS = lu/lu_example_complex_double.$(OBJEXT) \ +@STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ lu/zlu.$(OBJEXT) \ +@STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ lu/zlu_pivot.$(OBJEXT) \ +@STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ lu/zlu_kernels.$(OBJEXT) \ +@STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ lu/blas_complex.$(OBJEXT) \ +@STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ common/blas.$(OBJEXT) +lu_lu_example_complex_double_OBJECTS = \ + $(am_lu_lu_example_complex_double_OBJECTS) +@STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@lu_lu_example_complex_double_DEPENDENCIES = $(am__DEPENDENCIES_1) +am__lu_lu_example_complex_float_SOURCES_DIST = \ + lu/lu_example_complex_float.c lu/clu.c lu/clu_pivot.c \ + lu/clu_kernels.c lu/blas_complex.c common/blas.c +@STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@am_lu_lu_example_complex_float_OBJECTS = lu/lu_example_complex_float.$(OBJEXT) \ +@STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ lu/clu.$(OBJEXT) \ +@STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ lu/clu_pivot.$(OBJEXT) \ +@STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ lu/clu_kernels.$(OBJEXT) \ +@STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ lu/blas_complex.$(OBJEXT) \ +@STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ common/blas.$(OBJEXT) +lu_lu_example_complex_float_OBJECTS = \ + $(am_lu_lu_example_complex_float_OBJECTS) +@STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@lu_lu_example_complex_float_DEPENDENCIES = $(am__DEPENDENCIES_1) +am__lu_lu_example_double_SOURCES_DIST = lu/lu_example_double.c \ + lu/dlu.c lu/dlu_pivot.c lu/dlu_kernels.c common/blas.c +@STARPU_NO_BLAS_LIB_FALSE@am_lu_lu_example_double_OBJECTS = \ +@STARPU_NO_BLAS_LIB_FALSE@ lu/lu_example_double.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ lu/dlu.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ lu/dlu_pivot.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ lu/dlu_kernels.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ common/blas.$(OBJEXT) +lu_lu_example_double_OBJECTS = $(am_lu_lu_example_double_OBJECTS) +@STARPU_NO_BLAS_LIB_FALSE@lu_lu_example_double_DEPENDENCIES = \ +@STARPU_NO_BLAS_LIB_FALSE@ $(am__DEPENDENCIES_1) +am__lu_lu_example_float_SOURCES_DIST = lu/lu_example_float.c lu/slu.c \ + lu/slu_pivot.c lu/slu_kernels.c common/blas.c +@STARPU_NO_BLAS_LIB_FALSE@am_lu_lu_example_float_OBJECTS = \ +@STARPU_NO_BLAS_LIB_FALSE@ lu/lu_example_float.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ lu/slu.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ lu/slu_pivot.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ lu/slu_kernels.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ common/blas.$(OBJEXT) +lu_lu_example_float_OBJECTS = $(am_lu_lu_example_float_OBJECTS) +@STARPU_NO_BLAS_LIB_FALSE@lu_lu_example_float_DEPENDENCIES = \ +@STARPU_NO_BLAS_LIB_FALSE@ $(am__DEPENDENCIES_1) +am__lu_lu_implicit_example_complex_double_SOURCES_DIST = \ + lu/lu_example_complex_double.c lu/zlu_implicit.c \ + lu/zlu_implicit_pivot.c lu/zlu_kernels.c lu/blas_complex.c \ + common/blas.c +@STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@am_lu_lu_implicit_example_complex_double_OBJECTS = lu/lu_example_complex_double.$(OBJEXT) \ +@STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ lu/zlu_implicit.$(OBJEXT) \ +@STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ lu/zlu_implicit_pivot.$(OBJEXT) \ +@STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ lu/zlu_kernels.$(OBJEXT) \ +@STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ lu/blas_complex.$(OBJEXT) \ +@STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ common/blas.$(OBJEXT) +lu_lu_implicit_example_complex_double_OBJECTS = \ + $(am_lu_lu_implicit_example_complex_double_OBJECTS) +@STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@lu_lu_implicit_example_complex_double_DEPENDENCIES = $(am__DEPENDENCIES_1) +am__lu_lu_implicit_example_complex_float_SOURCES_DIST = \ + lu/lu_example_complex_float.c lu/clu_implicit.c \ + lu/clu_implicit_pivot.c lu/clu_kernels.c lu/blas_complex.c \ + common/blas.c +@STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@am_lu_lu_implicit_example_complex_float_OBJECTS = lu/lu_example_complex_float.$(OBJEXT) \ +@STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ lu/clu_implicit.$(OBJEXT) \ +@STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ lu/clu_implicit_pivot.$(OBJEXT) \ +@STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ lu/clu_kernels.$(OBJEXT) \ +@STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ lu/blas_complex.$(OBJEXT) \ +@STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ common/blas.$(OBJEXT) +lu_lu_implicit_example_complex_float_OBJECTS = \ + $(am_lu_lu_implicit_example_complex_float_OBJECTS) +@STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@lu_lu_implicit_example_complex_float_DEPENDENCIES = $(am__DEPENDENCIES_1) +am__lu_lu_implicit_example_double_SOURCES_DIST = \ + lu/lu_example_double.c lu/dlu_implicit.c \ + lu/dlu_implicit_pivot.c lu/dlu_kernels.c common/blas.c +@STARPU_NO_BLAS_LIB_FALSE@am_lu_lu_implicit_example_double_OBJECTS = \ +@STARPU_NO_BLAS_LIB_FALSE@ lu/lu_example_double.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ lu/dlu_implicit.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ lu/dlu_implicit_pivot.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ lu/dlu_kernels.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ common/blas.$(OBJEXT) +lu_lu_implicit_example_double_OBJECTS = \ + $(am_lu_lu_implicit_example_double_OBJECTS) +@STARPU_NO_BLAS_LIB_FALSE@lu_lu_implicit_example_double_DEPENDENCIES = \ +@STARPU_NO_BLAS_LIB_FALSE@ $(am__DEPENDENCIES_1) +am__lu_lu_implicit_example_float_SOURCES_DIST = lu/lu_example_float.c \ + lu/slu_implicit.c lu/slu_implicit_pivot.c lu/slu_kernels.c \ + common/blas.c +@STARPU_NO_BLAS_LIB_FALSE@am_lu_lu_implicit_example_float_OBJECTS = \ +@STARPU_NO_BLAS_LIB_FALSE@ lu/lu_example_float.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ lu/slu_implicit.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ lu/slu_implicit_pivot.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ lu/slu_kernels.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ common/blas.$(OBJEXT) +lu_lu_implicit_example_float_OBJECTS = \ + $(am_lu_lu_implicit_example_float_OBJECTS) +@STARPU_NO_BLAS_LIB_FALSE@lu_lu_implicit_example_float_DEPENDENCIES = \ +@STARPU_NO_BLAS_LIB_FALSE@ $(am__DEPENDENCIES_1) +mandelbrot_mandelbrot_SOURCES = mandelbrot/mandelbrot.c +mandelbrot_mandelbrot_OBJECTS = \ + mandelbrot/mandelbrot-mandelbrot.$(OBJEXT) +@STARPU_HAVE_X11_TRUE@mandelbrot_mandelbrot_DEPENDENCIES = \ +@STARPU_HAVE_X11_TRUE@ $(am__DEPENDENCIES_1) \ +@STARPU_HAVE_X11_TRUE@ $(am__DEPENDENCIES_1) \ +@STARPU_HAVE_X11_TRUE@ $(am__DEPENDENCIES_1) +matvecmult_matvecmult_SOURCES = matvecmult/matvecmult.c +matvecmult_matvecmult_OBJECTS = matvecmult/matvecmult.$(OBJEXT) +matvecmult_matvecmult_LDADD = $(LDADD) +mlr_mlr_SOURCES = mlr/mlr.c +mlr_mlr_OBJECTS = mlr/mlr.$(OBJEXT) +mlr_mlr_LDADD = $(LDADD) +am__mult_dgemm_SOURCES_DIST = mult/dgemm.c common/blas.c +@STARPU_NO_BLAS_LIB_FALSE@am__objects_40 = common/blas.$(OBJEXT) +am_mult_dgemm_OBJECTS = mult/dgemm.$(OBJEXT) $(am__objects_40) +mult_dgemm_OBJECTS = $(am_mult_dgemm_OBJECTS) +mult_dgemm_DEPENDENCIES = $(am__DEPENDENCIES_1) +am__mult_dgemm_layout_SOURCES_DIST = mult/dgemm_layout.c common/blas.c +am_mult_dgemm_layout_OBJECTS = mult/dgemm_layout.$(OBJEXT) \ + $(am__objects_40) +mult_dgemm_layout_OBJECTS = $(am_mult_dgemm_layout_OBJECTS) +mult_dgemm_layout_DEPENDENCIES = $(am__DEPENDENCIES_1) +am__mult_sgemm_SOURCES_DIST = mult/sgemm.c common/blas.c +am_mult_sgemm_OBJECTS = mult/sgemm.$(OBJEXT) $(am__objects_40) +mult_sgemm_OBJECTS = $(am_mult_sgemm_OBJECTS) +mult_sgemm_DEPENDENCIES = $(am__DEPENDENCIES_1) +am__mult_sgemm_layout_SOURCES_DIST = mult/sgemm_layout.c common/blas.c +am_mult_sgemm_layout_OBJECTS = mult/sgemm_layout.$(OBJEXT) \ + $(am__objects_40) +mult_sgemm_layout_OBJECTS = $(am_mult_sgemm_layout_OBJECTS) +mult_sgemm_layout_DEPENDENCIES = $(am__DEPENDENCIES_1) +am__native_fortran_nf_dynbuf_SOURCES_DIST = \ + native_fortran/nf_dynbuf_cl.f90 native_fortran/fstarpu_mod.f90 \ + native_fortran/nf_dynbuf.f90 +@STARPU_HAVE_FC_TRUE@am_native_fortran_nf_dynbuf_OBJECTS = \ +@STARPU_HAVE_FC_TRUE@ native_fortran/nf_dynbuf_cl.$(OBJEXT) \ +@STARPU_HAVE_FC_TRUE@ native_fortran/fstarpu_mod.$(OBJEXT) \ +@STARPU_HAVE_FC_TRUE@ native_fortran/nf_dynbuf.$(OBJEXT) +native_fortran_nf_dynbuf_OBJECTS = \ + $(am_native_fortran_nf_dynbuf_OBJECTS) +native_fortran_nf_dynbuf_LDADD = $(LDADD) +am__native_fortran_nf_example_SOURCES_DIST = \ + native_fortran/nf_types.f90 native_fortran/nf_compute.f90 \ + native_fortran/fstarpu_mod.f90 native_fortran/nf_example.f90 +@STARPU_HAVE_FC_TRUE@am_native_fortran_nf_example_OBJECTS = \ +@STARPU_HAVE_FC_TRUE@ native_fortran/nf_types.$(OBJEXT) \ +@STARPU_HAVE_FC_TRUE@ native_fortran/nf_compute.$(OBJEXT) \ +@STARPU_HAVE_FC_TRUE@ native_fortran/fstarpu_mod.$(OBJEXT) \ +@STARPU_HAVE_FC_TRUE@ native_fortran/nf_example.$(OBJEXT) +native_fortran_nf_example_OBJECTS = \ + $(am_native_fortran_nf_example_OBJECTS) +native_fortran_nf_example_LDADD = $(LDADD) +am__native_fortran_nf_matrix_SOURCES_DIST = \ + native_fortran/nf_codelets.f90 native_fortran/fstarpu_mod.f90 \ + native_fortran/nf_matrix.f90 +@STARPU_HAVE_FC_TRUE@am_native_fortran_nf_matrix_OBJECTS = \ +@STARPU_HAVE_FC_TRUE@ native_fortran/nf_codelets.$(OBJEXT) \ +@STARPU_HAVE_FC_TRUE@ native_fortran/fstarpu_mod.$(OBJEXT) \ +@STARPU_HAVE_FC_TRUE@ native_fortran/nf_matrix.$(OBJEXT) +native_fortran_nf_matrix_OBJECTS = \ + $(am_native_fortran_nf_matrix_OBJECTS) +native_fortran_nf_matrix_LDADD = $(LDADD) +am__native_fortran_nf_partition_SOURCES_DIST = \ + native_fortran/nf_partition_cl.f90 \ + native_fortran/fstarpu_mod.f90 native_fortran/nf_partition.f90 +@STARPU_HAVE_FC_TRUE@am_native_fortran_nf_partition_OBJECTS = \ +@STARPU_HAVE_FC_TRUE@ native_fortran/nf_partition_cl.$(OBJEXT) \ +@STARPU_HAVE_FC_TRUE@ native_fortran/fstarpu_mod.$(OBJEXT) \ +@STARPU_HAVE_FC_TRUE@ native_fortran/nf_partition.$(OBJEXT) +native_fortran_nf_partition_OBJECTS = \ + $(am_native_fortran_nf_partition_OBJECTS) +native_fortran_nf_partition_LDADD = $(LDADD) +am__native_fortran_nf_sched_ctx_SOURCES_DIST = \ + native_fortran/nf_sched_ctx_cl.f90 \ + native_fortran/fstarpu_mod.f90 native_fortran/nf_sched_ctx.f90 +@STARPU_HAVE_FC_TRUE@am_native_fortran_nf_sched_ctx_OBJECTS = \ +@STARPU_HAVE_FC_TRUE@ native_fortran/nf_sched_ctx_cl.$(OBJEXT) \ +@STARPU_HAVE_FC_TRUE@ native_fortran/fstarpu_mod.$(OBJEXT) \ +@STARPU_HAVE_FC_TRUE@ native_fortran/nf_sched_ctx.$(OBJEXT) +native_fortran_nf_sched_ctx_OBJECTS = \ + $(am_native_fortran_nf_sched_ctx_OBJECTS) +native_fortran_nf_sched_ctx_LDADD = $(LDADD) +am__native_fortran_nf_varbuf_SOURCES_DIST = \ + native_fortran/nf_varbuf_cl.f90 native_fortran/fstarpu_mod.f90 \ + native_fortran/nf_varbuf.f90 +@STARPU_HAVE_FC_TRUE@am_native_fortran_nf_varbuf_OBJECTS = \ +@STARPU_HAVE_FC_TRUE@ native_fortran/nf_varbuf_cl.$(OBJEXT) \ +@STARPU_HAVE_FC_TRUE@ native_fortran/fstarpu_mod.$(OBJEXT) \ +@STARPU_HAVE_FC_TRUE@ native_fortran/nf_varbuf.$(OBJEXT) +native_fortran_nf_varbuf_OBJECTS = \ + $(am_native_fortran_nf_varbuf_OBJECTS) +native_fortran_nf_varbuf_LDADD = $(LDADD) +am__native_fortran_nf_vector_SOURCES_DIST = \ + native_fortran/nf_codelets.f90 native_fortran/fstarpu_mod.f90 \ + native_fortran/nf_vector.f90 +@STARPU_HAVE_FC_TRUE@am_native_fortran_nf_vector_OBJECTS = \ +@STARPU_HAVE_FC_TRUE@ native_fortran/nf_codelets.$(OBJEXT) \ +@STARPU_HAVE_FC_TRUE@ native_fortran/fstarpu_mod.$(OBJEXT) \ +@STARPU_HAVE_FC_TRUE@ native_fortran/nf_vector.$(OBJEXT) +native_fortran_nf_vector_OBJECTS = \ + $(am_native_fortran_nf_vector_OBJECTS) +native_fortran_nf_vector_LDADD = $(LDADD) +openmp_vector_scal_omp_SOURCES = openmp/vector_scal_omp.c +openmp_vector_scal_omp_OBJECTS = \ + openmp/vector_scal_omp-vector_scal_omp.$(OBJEXT) +openmp_vector_scal_omp_LDADD = $(LDADD) +openmp_vector_scal_omp_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ + $(openmp_vector_scal_omp_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \ + $(LDFLAGS) -o $@ +parallel_workers_parallel_workers_SOURCES = \ + parallel_workers/parallel_workers.c +parallel_workers_parallel_workers_OBJECTS = \ + parallel_workers/parallel_workers-parallel_workers.$(OBJEXT) +parallel_workers_parallel_workers_LDADD = $(LDADD) +parallel_workers_parallel_workers_LINK = $(LIBTOOL) $(AM_V_lt) \ + --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link \ + $(CCLD) $(parallel_workers_parallel_workers_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +parallel_workers_parallel_workers_func_SOURCES = \ + parallel_workers/parallel_workers_func.c +parallel_workers_parallel_workers_func_OBJECTS = parallel_workers/parallel_workers_func-parallel_workers_func.$(OBJEXT) +parallel_workers_parallel_workers_func_LDADD = $(LDADD) +parallel_workers_parallel_workers_func_LINK = $(LIBTOOL) $(AM_V_lt) \ + --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link \ + $(CCLD) $(parallel_workers_parallel_workers_func_CFLAGS) \ + $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ +parallel_workers_parallel_workers_oldapi_SOURCES = \ + parallel_workers/parallel_workers_oldapi.c +parallel_workers_parallel_workers_oldapi_OBJECTS = parallel_workers/parallel_workers_oldapi-parallel_workers_oldapi.$(OBJEXT) +parallel_workers_parallel_workers_oldapi_LDADD = $(LDADD) +parallel_workers_parallel_workers_oldapi_LINK = $(LIBTOOL) $(AM_V_lt) \ + --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link \ + $(CCLD) $(parallel_workers_parallel_workers_oldapi_CFLAGS) \ + $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ +perf_monitoring_perf_counters_01_SOURCES = \ + perf_monitoring/perf_counters_01.c +perf_monitoring_perf_counters_01_OBJECTS = \ + perf_monitoring/perf_counters_01.$(OBJEXT) +perf_monitoring_perf_counters_01_LDADD = $(LDADD) +perf_monitoring_perf_counters_02_SOURCES = \ + perf_monitoring/perf_counters_02.c +perf_monitoring_perf_counters_02_OBJECTS = \ + perf_monitoring/perf_counters_02.$(OBJEXT) +perf_monitoring_perf_counters_02_LDADD = $(LDADD) +perf_steering_perf_knobs_01_SOURCES = perf_steering/perf_knobs_01.c +perf_steering_perf_knobs_01_OBJECTS = \ + perf_steering/perf_knobs_01.$(OBJEXT) +perf_steering_perf_knobs_01_LDADD = $(LDADD) +perf_steering_perf_knobs_02_SOURCES = perf_steering/perf_knobs_02.c +perf_steering_perf_knobs_02_OBJECTS = \ + perf_steering/perf_knobs_02.$(OBJEXT) +perf_steering_perf_knobs_02_LDADD = $(LDADD) +perf_steering_perf_knobs_03_SOURCES = perf_steering/perf_knobs_03.c +perf_steering_perf_knobs_03_OBJECTS = \ + perf_steering/perf_knobs_03.$(OBJEXT) +perf_steering_perf_knobs_03_LDADD = $(LDADD) +am__pi_pi_SOURCES_DIST = pi/pi.c pi/SobolQRNG/sobol_gold.c \ + pi/SobolQRNG/sobol_primitives.c pi/pi_kernel.cu \ + pi/SobolQRNG/sobol_gpu.cu +@STARPU_HAVE_WINDOWS_FALSE@@STARPU_USE_CUDA_TRUE@am__objects_41 = pi/pi_kernel.$(OBJEXT) \ +@STARPU_HAVE_WINDOWS_FALSE@@STARPU_USE_CUDA_TRUE@ pi/SobolQRNG/sobol_gpu.$(OBJEXT) +@STARPU_HAVE_WINDOWS_FALSE@am_pi_pi_OBJECTS = pi/pi.$(OBJEXT) \ +@STARPU_HAVE_WINDOWS_FALSE@ pi/SobolQRNG/sobol_gold.$(OBJEXT) \ +@STARPU_HAVE_WINDOWS_FALSE@ pi/SobolQRNG/sobol_primitives.$(OBJEXT) \ +@STARPU_HAVE_WINDOWS_FALSE@ $(am__objects_41) +pi_pi_OBJECTS = $(am_pi_pi_OBJECTS) +pi_pi_LDADD = $(LDADD) +am__pi_pi_redux_SOURCES_DIST = pi/pi_redux.c pi/pi_redux_kernel.cu +@STARPU_HAVE_WINDOWS_FALSE@@STARPU_USE_CUDA_TRUE@am__objects_42 = pi/pi_redux_kernel.$(OBJEXT) +@STARPU_HAVE_WINDOWS_FALSE@am_pi_pi_redux_OBJECTS = \ +@STARPU_HAVE_WINDOWS_FALSE@ pi/pi_redux.$(OBJEXT) \ +@STARPU_HAVE_WINDOWS_FALSE@ $(am__objects_42) +pi_pi_redux_OBJECTS = $(am_pi_pi_redux_OBJECTS) +@STARPU_HAVE_WINDOWS_FALSE@@STARPU_USE_CUDA_TRUE@pi_pi_redux_DEPENDENCIES = $(am__DEPENDENCIES_1) +am__pipeline_pipeline_SOURCES_DIST = pipeline/pipeline.c common/blas.c +@STARPU_NO_BLAS_LIB_FALSE@am_pipeline_pipeline_OBJECTS = \ +@STARPU_NO_BLAS_LIB_FALSE@ pipeline/pipeline.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ common/blas.$(OBJEXT) +pipeline_pipeline_OBJECTS = $(am_pipeline_pipeline_OBJECTS) +@STARPU_NO_BLAS_LIB_FALSE@pipeline_pipeline_DEPENDENCIES = \ +@STARPU_NO_BLAS_LIB_FALSE@ $(am__DEPENDENCIES_1) +ppm_downscaler_ppm_downscaler_SOURCES = \ + ppm_downscaler/ppm_downscaler.c +ppm_downscaler_ppm_downscaler_OBJECTS = \ + ppm_downscaler/ppm_downscaler.$(OBJEXT) +ppm_downscaler_ppm_downscaler_LDADD = $(LDADD) +ppm_downscaler_yuv_downscaler_SOURCES = \ + ppm_downscaler/yuv_downscaler.c +ppm_downscaler_yuv_downscaler_OBJECTS = \ + ppm_downscaler/yuv_downscaler.$(OBJEXT) +ppm_downscaler_yuv_downscaler_LDADD = $(LDADD) +profiling_profiling_SOURCES = profiling/profiling.c +profiling_profiling_OBJECTS = profiling/profiling.$(OBJEXT) +profiling_profiling_LDADD = $(LDADD) +am__reductions_dot_product_SOURCES_DIST = reductions/dot_product.c \ + reductions/dot_product_kernels.cu +@STARPU_USE_CUDA_TRUE@am__objects_43 = reductions/dot_product_kernels.$(OBJEXT) +am_reductions_dot_product_OBJECTS = reductions/dot_product.$(OBJEXT) \ + $(am__objects_43) +reductions_dot_product_OBJECTS = $(am_reductions_dot_product_OBJECTS) +reductions_dot_product_LDADD = $(LDADD) +reductions_minmax_reduction_SOURCES = reductions/minmax_reduction.c +reductions_minmax_reduction_OBJECTS = \ + reductions/minmax_reduction.$(OBJEXT) +reductions_minmax_reduction_LDADD = $(LDADD) +sched_ctx_dummy_sched_with_ctx_SOURCES = \ + sched_ctx/dummy_sched_with_ctx.c +sched_ctx_dummy_sched_with_ctx_OBJECTS = \ + sched_ctx/dummy_sched_with_ctx.$(OBJEXT) +sched_ctx_dummy_sched_with_ctx_LDADD = $(LDADD) +am__sched_ctx_gpu_partition_SOURCES_DIST = sched_ctx/gpu_partition.c \ + sched_ctx/axpy_partition_gpu.cu +@STARPU_USE_CUDA_TRUE@am_sched_ctx_gpu_partition_OBJECTS = \ +@STARPU_USE_CUDA_TRUE@ sched_ctx/gpu_partition.$(OBJEXT) \ +@STARPU_USE_CUDA_TRUE@ sched_ctx/axpy_partition_gpu.$(OBJEXT) +sched_ctx_gpu_partition_OBJECTS = \ + $(am_sched_ctx_gpu_partition_OBJECTS) +sched_ctx_gpu_partition_LDADD = $(LDADD) +sched_ctx_nested_sched_ctxs_SOURCES = sched_ctx/nested_sched_ctxs.c +sched_ctx_nested_sched_ctxs_OBJECTS = \ + sched_ctx/nested_sched_ctxs-nested_sched_ctxs.$(OBJEXT) +sched_ctx_nested_sched_ctxs_LDADD = $(LDADD) +sched_ctx_nested_sched_ctxs_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ + $(sched_ctx_nested_sched_ctxs_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \ + $(LDFLAGS) -o $@ +sched_ctx_parallel_code_SOURCES = sched_ctx/parallel_code.c +sched_ctx_parallel_code_OBJECTS = \ + sched_ctx/parallel_code-parallel_code.$(OBJEXT) +sched_ctx_parallel_code_LDADD = $(LDADD) +sched_ctx_parallel_code_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ + $(sched_ctx_parallel_code_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \ + $(LDFLAGS) -o $@ +sched_ctx_parallel_tasks_reuse_handle_SOURCES = \ + sched_ctx/parallel_tasks_reuse_handle.c +sched_ctx_parallel_tasks_reuse_handle_OBJECTS = sched_ctx/parallel_tasks_reuse_handle-parallel_tasks_reuse_handle.$(OBJEXT) +sched_ctx_parallel_tasks_reuse_handle_LDADD = $(LDADD) +sched_ctx_parallel_tasks_reuse_handle_LINK = $(LIBTOOL) $(AM_V_lt) \ + --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link \ + $(CCLD) $(sched_ctx_parallel_tasks_reuse_handle_CFLAGS) \ + $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ +sched_ctx_prio_SOURCES = sched_ctx/prio.c +sched_ctx_prio_OBJECTS = sched_ctx/prio.$(OBJEXT) +sched_ctx_prio_LDADD = $(LDADD) +sched_ctx_sched_ctx_SOURCES = sched_ctx/sched_ctx.c +sched_ctx_sched_ctx_OBJECTS = sched_ctx/sched_ctx.$(OBJEXT) +sched_ctx_sched_ctx_LDADD = $(LDADD) +sched_ctx_sched_ctx_delete_SOURCES = sched_ctx/sched_ctx_delete.c +sched_ctx_sched_ctx_delete_OBJECTS = \ + sched_ctx/sched_ctx_delete.$(OBJEXT) +sched_ctx_sched_ctx_delete_LDADD = $(LDADD) +sched_ctx_sched_ctx_empty_SOURCES = sched_ctx/sched_ctx_empty.c +sched_ctx_sched_ctx_empty_OBJECTS = \ + sched_ctx/sched_ctx_empty.$(OBJEXT) +sched_ctx_sched_ctx_empty_LDADD = $(LDADD) +sched_ctx_sched_ctx_remove_SOURCES = sched_ctx/sched_ctx_remove.c +sched_ctx_sched_ctx_remove_OBJECTS = \ + sched_ctx/sched_ctx_remove.$(OBJEXT) +sched_ctx_sched_ctx_remove_LDADD = $(LDADD) +sched_ctx_sched_ctx_without_sched_policy_SOURCES = \ + sched_ctx/sched_ctx_without_sched_policy.c +sched_ctx_sched_ctx_without_sched_policy_OBJECTS = sched_ctx/sched_ctx_without_sched_policy-sched_ctx_without_sched_policy.$(OBJEXT) +sched_ctx_sched_ctx_without_sched_policy_LDADD = $(LDADD) +sched_ctx_sched_ctx_without_sched_policy_LINK = $(LIBTOOL) $(AM_V_lt) \ + --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link \ + $(CCLD) $(sched_ctx_sched_ctx_without_sched_policy_CFLAGS) \ + $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ +sched_ctx_sched_ctx_without_sched_policy_awake_SOURCES = \ + sched_ctx/sched_ctx_without_sched_policy_awake.c +sched_ctx_sched_ctx_without_sched_policy_awake_OBJECTS = \ + sched_ctx/sched_ctx_without_sched_policy_awake.$(OBJEXT) +sched_ctx_sched_ctx_without_sched_policy_awake_LDADD = $(LDADD) +sched_ctx_two_cpu_contexts_SOURCES = sched_ctx/two_cpu_contexts.c +sched_ctx_two_cpu_contexts_OBJECTS = \ + sched_ctx/two_cpu_contexts.$(OBJEXT) +sched_ctx_two_cpu_contexts_LDADD = $(LDADD) +scheduler_dummy_modular_sched_SOURCES = \ + scheduler/dummy_modular_sched.c +scheduler_dummy_modular_sched_OBJECTS = \ + scheduler/dummy_modular_sched.$(OBJEXT) +scheduler_dummy_modular_sched_LDADD = $(LDADD) +scheduler_dummy_sched_SOURCES = scheduler/dummy_sched.c +scheduler_dummy_sched_OBJECTS = scheduler/dummy_sched.$(OBJEXT) +scheduler_dummy_sched_LDADD = $(LDADD) +scheduler_heteroprio_test_SOURCES = scheduler/heteroprio_test.c +scheduler_heteroprio_test_OBJECTS = \ + scheduler/heteroprio_test.$(OBJEXT) +scheduler_heteroprio_test_LDADD = $(LDADD) +am_spmd_vector_scal_spmd_OBJECTS = spmd/vector_scal_spmd.$(OBJEXT) +spmd_vector_scal_spmd_OBJECTS = $(am_spmd_vector_scal_spmd_OBJECTS) +spmd_vector_scal_spmd_LDADD = $(LDADD) +am_spmv_dw_block_spmv_OBJECTS = spmv/dw_block_spmv.$(OBJEXT) \ + spmv/dw_block_spmv_kernels.$(OBJEXT) \ + spmv/matrix_market/mm_to_bcsr.$(OBJEXT) \ + spmv/matrix_market/mmio.$(OBJEXT) +spmv_dw_block_spmv_OBJECTS = $(am_spmv_dw_block_spmv_OBJECTS) +spmv_dw_block_spmv_DEPENDENCIES = $(am__DEPENDENCIES_1) +am__spmv_spmv_SOURCES_DIST = spmv/spmv.c spmv/spmv_kernels.c \ + spmv/spmv_cuda.cu +@STARPU_USE_CUDA_TRUE@am__objects_44 = spmv/spmv_cuda.$(OBJEXT) +am_spmv_spmv_OBJECTS = spmv/spmv.$(OBJEXT) spmv/spmv_kernels.$(OBJEXT) \ + $(am__objects_44) +spmv_spmv_OBJECTS = $(am_spmv_spmv_OBJECTS) +spmv_spmv_LDADD = $(LDADD) +am_subgraphs_manual_OBJECTS = subgraphs/manual.$(OBJEXT) \ + subgraphs/codelets.$(OBJEXT) +subgraphs_manual_OBJECTS = $(am_subgraphs_manual_OBJECTS) +subgraphs_manual_LDADD = $(LDADD) +am_subgraphs_partition_OBJECTS = subgraphs/partition.$(OBJEXT) \ + subgraphs/codelets.$(OBJEXT) +subgraphs_partition_OBJECTS = $(am_subgraphs_partition_OBJECTS) +subgraphs_partition_LDADD = $(LDADD) +am_subgraphs_plan_OBJECTS = subgraphs/plan.$(OBJEXT) \ + subgraphs/codelets.$(OBJEXT) +subgraphs_plan_OBJECTS = $(am_subgraphs_plan_OBJECTS) +subgraphs_plan_LDADD = $(LDADD) +tag_example_tag_example_SOURCES = tag_example/tag_example.c +tag_example_tag_example_OBJECTS = tag_example/tag_example.$(OBJEXT) +tag_example_tag_example_LDADD = $(LDADD) +tag_example_tag_example2_SOURCES = tag_example/tag_example2.c +tag_example_tag_example2_OBJECTS = tag_example/tag_example2.$(OBJEXT) +tag_example_tag_example2_LDADD = $(LDADD) +tag_example_tag_example3_SOURCES = tag_example/tag_example3.c +tag_example_tag_example3_OBJECTS = tag_example/tag_example3.$(OBJEXT) +tag_example_tag_example3_LDADD = $(LDADD) +tag_example_tag_example4_SOURCES = tag_example/tag_example4.c +tag_example_tag_example4_OBJECTS = tag_example/tag_example4.$(OBJEXT) +tag_example_tag_example4_LDADD = $(LDADD) +tag_example_tag_restartable_SOURCES = tag_example/tag_restartable.c +tag_example_tag_restartable_OBJECTS = \ + tag_example/tag_restartable.$(OBJEXT) +tag_example_tag_restartable_LDADD = $(LDADD) +transactions_trs_inc_SOURCES = transactions/trs_inc.c +transactions_trs_inc_OBJECTS = transactions/trs_inc.$(OBJEXT) +transactions_trs_inc_LDADD = $(LDADD) +am__transactions_trs_sgemm_SOURCES_DIST = transactions/trs_sgemm.c \ + common/blas.c +@STARPU_NO_BLAS_LIB_FALSE@am_transactions_trs_sgemm_OBJECTS = \ +@STARPU_NO_BLAS_LIB_FALSE@ transactions/trs_sgemm.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ common/blas.$(OBJEXT) +transactions_trs_sgemm_OBJECTS = $(am_transactions_trs_sgemm_OBJECTS) +@STARPU_NO_BLAS_LIB_FALSE@transactions_trs_sgemm_DEPENDENCIES = \ +@STARPU_NO_BLAS_LIB_FALSE@ $(am__DEPENDENCIES_1) +worker_collections_worker_list_example_SOURCES = \ + worker_collections/worker_list_example.c +worker_collections_worker_list_example_OBJECTS = \ + worker_collections/worker_list_example.$(OBJEXT) +worker_collections_worker_list_example_LDADD = $(LDADD) +worker_collections_worker_tree_example_SOURCES = \ + worker_collections/worker_tree_example.c +worker_collections_worker_tree_example_OBJECTS = \ + worker_collections/worker_tree_example.$(OBJEXT) +worker_collections_worker_tree_example_LDADD = $(LDADD) +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)/src/common -I$(top_builddir)/include -I$(top_builddir)/starpurm/include +depcomp = $(SHELL) $(top_srcdir)/build-aux/depcomp +am__maybe_remake_depfiles = depfiles +am__depfiles_remade = ./$(DEPDIR)/loader-loader.Po \ + api/$(DEPDIR)/bcsr_data_interface.Po \ + api/$(DEPDIR)/block_data_interface.Po \ + api/$(DEPDIR)/coo_data_interface.Po \ + api/$(DEPDIR)/csr_data_interface.Po \ + api/$(DEPDIR)/matrix_data_interface.Po \ + api/$(DEPDIR)/multiformat_data_interface.Po \ + api/$(DEPDIR)/tensor_data_interface.Po \ + api/$(DEPDIR)/variable_data_interface.Po \ + api/$(DEPDIR)/vector_data_interface.Po \ + api/$(DEPDIR)/void_data_interface.Po axpy/$(DEPDIR)/axpy.Po \ + axpy/$(DEPDIR)/axpy_opencl.Po \ + basic_examples/$(DEPDIR)/block.Po \ + basic_examples/$(DEPDIR)/block_cpu.Po \ + basic_examples/$(DEPDIR)/block_opencl.Po \ + basic_examples/$(DEPDIR)/dynamic_handles.Po \ + basic_examples/$(DEPDIR)/hello_world.Po \ + basic_examples/$(DEPDIR)/hooks.Po \ + basic_examples/$(DEPDIR)/mult.Po \ + basic_examples/$(DEPDIR)/multiformat.Po \ + basic_examples/$(DEPDIR)/multiformat_conversion_codelets.Po \ + basic_examples/$(DEPDIR)/multiformat_conversion_codelets_opencl.Po \ + basic_examples/$(DEPDIR)/multiformat_opencl.Po \ + basic_examples/$(DEPDIR)/ndim.Po \ + basic_examples/$(DEPDIR)/task_insert_color.Po \ + basic_examples/$(DEPDIR)/topology.Po \ + basic_examples/$(DEPDIR)/variable.Po \ + basic_examples/$(DEPDIR)/variable_kernels_cpu.Po \ + basic_examples/$(DEPDIR)/variable_kernels_opencl.Po \ + basic_examples/$(DEPDIR)/vector_scal.Po \ + basic_examples/$(DEPDIR)/vector_scal_c.Po \ + basic_examples/$(DEPDIR)/vector_scal_cpu.Po \ + basic_examples/$(DEPDIR)/vector_scal_opencl.Po \ + binary/$(DEPDIR)/binary.Po callback/$(DEPDIR)/callback.Po \ + callback/$(DEPDIR)/prologue.Po cg/$(DEPDIR)/cg.Po \ + cholesky/$(DEPDIR)/cholesky_compil.Po \ + cholesky/$(DEPDIR)/cholesky_grain_tag.Po \ + cholesky/$(DEPDIR)/cholesky_implicit.Po \ + cholesky/$(DEPDIR)/cholesky_kernels.Po \ + cholesky/$(DEPDIR)/cholesky_models.Po \ + cholesky/$(DEPDIR)/cholesky_tag.Po \ + cholesky/$(DEPDIR)/cholesky_tile_tag.Po \ + cholesky/$(DEPDIR)/libmy_dmda.Plo common/$(DEPDIR)/blas.Po \ + cpp/$(DEPDIR)/add_vectors.Po \ + cpp/$(DEPDIR)/add_vectors_cpp11.Po \ + cpp/$(DEPDIR)/add_vectors_interface.Po \ + cpp/$(DEPDIR)/incrementer_cpp.Po \ + dependency/$(DEPDIR)/sequential_consistency.Po \ + dependency/$(DEPDIR)/task_end_dep.Po \ + dependency/$(DEPDIR)/task_end_dep_add.Po \ + filters/$(DEPDIR)/alloc.Po filters/$(DEPDIR)/f3d_cpu.Po \ + filters/$(DEPDIR)/f4d_cpu.Po filters/$(DEPDIR)/f5d_print.Po \ + filters/$(DEPDIR)/fblock.Po filters/$(DEPDIR)/fblock_cpu.Po \ + filters/$(DEPDIR)/fblock_opencl.Po \ + filters/$(DEPDIR)/fblock_pick_matrix.Po \ + filters/$(DEPDIR)/fblock_pick_variable.Po \ + filters/$(DEPDIR)/fblock_print.Po filters/$(DEPDIR)/fmatrix.Po \ + filters/$(DEPDIR)/fmatrix_cpu.Po \ + filters/$(DEPDIR)/fmatrix_pick_variable.Po \ + filters/$(DEPDIR)/fmatrix_pick_vector.Po \ + filters/$(DEPDIR)/fmatrix_print.Po \ + filters/$(DEPDIR)/fmultiple_manual.Po \ + filters/$(DEPDIR)/fmultiple_submit.Po \ + filters/$(DEPDIR)/fmultiple_submit_implicit.Po \ + filters/$(DEPDIR)/fmultiple_submit_readonly.Po \ + filters/$(DEPDIR)/fmultiple_submit_readonly_downgrade.Po \ + filters/$(DEPDIR)/fndim.Po \ + filters/$(DEPDIR)/fndim_1d_pick_variable.Po \ + filters/$(DEPDIR)/fndim_2d_pick_vector.Po \ + filters/$(DEPDIR)/fndim_3d_pick_matrix.Po \ + filters/$(DEPDIR)/fndim_4d_pick_block.Po \ + filters/$(DEPDIR)/fndim_5d_pick_tensor.Po \ + filters/$(DEPDIR)/fndim_pick_ndim.Po \ + filters/$(DEPDIR)/fndim_pick_variable.Po \ + filters/$(DEPDIR)/fndim_to_block.Po \ + filters/$(DEPDIR)/fndim_to_matrix.Po \ + filters/$(DEPDIR)/fndim_to_tensor.Po \ + filters/$(DEPDIR)/fndim_to_variable.Po \ + filters/$(DEPDIR)/fndim_to_vector.Po \ + filters/$(DEPDIR)/fread.Po filters/$(DEPDIR)/frecursive.Po \ + filters/$(DEPDIR)/ftensor.Po filters/$(DEPDIR)/ftensor_cpu.Po \ + filters/$(DEPDIR)/ftensor_pick_block.Po \ + filters/$(DEPDIR)/ftensor_pick_variable.Po \ + filters/$(DEPDIR)/ftensor_print.Po \ + filters/$(DEPDIR)/fvector.Po filters/$(DEPDIR)/fvector_cpu.Po \ + filters/$(DEPDIR)/fvector_pick_variable.Po \ + filters/$(DEPDIR)/shadow.Po filters/$(DEPDIR)/shadow2d.Po \ + filters/$(DEPDIR)/shadow3d.Po filters/$(DEPDIR)/shadow4d.Po \ + filters/$(DEPDIR)/shadownd.Po \ + filters/custom_mf/$(DEPDIR)/conversion_opencl.Po \ + filters/custom_mf/$(DEPDIR)/custom_conversion_codelets.Po \ + filters/custom_mf/$(DEPDIR)/custom_interface.Po \ + filters/custom_mf/$(DEPDIR)/custom_mf_filter.Po \ + filters/custom_mf/$(DEPDIR)/custom_opencl.Po \ + fortran/$(DEPDIR)/hello_c.Po \ + fortran90/$(DEPDIR)/marshalling.Po \ + gl_interop/$(DEPDIR)/gl_interop.Po \ + gl_interop/$(DEPDIR)/gl_interop_idle.Po \ + heat/$(DEPDIR)/dw_factolu.Po \ + heat/$(DEPDIR)/dw_factolu_grain.Po \ + heat/$(DEPDIR)/dw_factolu_kernels.Po \ + heat/$(DEPDIR)/dw_factolu_tag.Po \ + heat/$(DEPDIR)/dw_sparse_cg.Po \ + heat/$(DEPDIR)/dw_sparse_cg_kernels.Po heat/$(DEPDIR)/heat.Po \ + heat/$(DEPDIR)/heat_display.Po \ + heat/$(DEPDIR)/lu_kernels_model.Po \ + incrementer/$(DEPDIR)/incrementer.Po \ + incrementer/$(DEPDIR)/incrementer_kernels_opencl.Po \ + interface/$(DEPDIR)/complex.Po \ + interface/$(DEPDIR)/complex_filters.Po \ + interface/$(DEPDIR)/complex_interface.Po \ + interface/$(DEPDIR)/complex_kernels_opencl.Po \ + interface/complex_dev_handle/$(DEPDIR)/complex_dev_handle.Po \ + interface/complex_dev_handle/$(DEPDIR)/complex_dev_handle_filters.Po \ + interface/complex_dev_handle/$(DEPDIR)/complex_dev_handle_interface.Po \ + interface/complex_dev_handle/$(DEPDIR)/complex_dev_handle_kernels_opencl.Po \ + lu/$(DEPDIR)/blas_complex.Po lu/$(DEPDIR)/clu.Po \ + lu/$(DEPDIR)/clu_implicit.Po \ + lu/$(DEPDIR)/clu_implicit_pivot.Po lu/$(DEPDIR)/clu_kernels.Po \ + lu/$(DEPDIR)/clu_pivot.Po lu/$(DEPDIR)/dlu.Po \ + lu/$(DEPDIR)/dlu_implicit.Po \ + lu/$(DEPDIR)/dlu_implicit_pivot.Po lu/$(DEPDIR)/dlu_kernels.Po \ + lu/$(DEPDIR)/dlu_pivot.Po \ + lu/$(DEPDIR)/lu_example_complex_double.Po \ + lu/$(DEPDIR)/lu_example_complex_float.Po \ + lu/$(DEPDIR)/lu_example_double.Po \ + lu/$(DEPDIR)/lu_example_float.Po lu/$(DEPDIR)/slu.Po \ + lu/$(DEPDIR)/slu_implicit.Po \ + lu/$(DEPDIR)/slu_implicit_pivot.Po lu/$(DEPDIR)/slu_kernels.Po \ + lu/$(DEPDIR)/slu_pivot.Po lu/$(DEPDIR)/zlu.Po \ + lu/$(DEPDIR)/zlu_implicit.Po \ + lu/$(DEPDIR)/zlu_implicit_pivot.Po lu/$(DEPDIR)/zlu_kernels.Po \ + lu/$(DEPDIR)/zlu_pivot.Po \ + mandelbrot/$(DEPDIR)/mandelbrot-mandelbrot.Po \ + matvecmult/$(DEPDIR)/matvecmult.Po mlr/$(DEPDIR)/mlr.Po \ + mult/$(DEPDIR)/dgemm.Po mult/$(DEPDIR)/dgemm_layout.Po \ + mult/$(DEPDIR)/sgemm.Po mult/$(DEPDIR)/sgemm_layout.Po \ + openmp/$(DEPDIR)/vector_scal_omp-vector_scal_omp.Po \ + parallel_workers/$(DEPDIR)/parallel_workers-parallel_workers.Po \ + parallel_workers/$(DEPDIR)/parallel_workers_func-parallel_workers_func.Po \ + parallel_workers/$(DEPDIR)/parallel_workers_oldapi-parallel_workers_oldapi.Po \ + perf_monitoring/$(DEPDIR)/perf_counters_01.Po \ + perf_monitoring/$(DEPDIR)/perf_counters_02.Po \ + perf_steering/$(DEPDIR)/perf_knobs_01.Po \ + perf_steering/$(DEPDIR)/perf_knobs_02.Po \ + perf_steering/$(DEPDIR)/perf_knobs_03.Po pi/$(DEPDIR)/pi.Po \ + pi/$(DEPDIR)/pi_redux.Po pi/SobolQRNG/$(DEPDIR)/sobol_gold.Po \ + pi/SobolQRNG/$(DEPDIR)/sobol_primitives.Po \ + pipeline/$(DEPDIR)/pipeline.Po \ + ppm_downscaler/$(DEPDIR)/ppm_downscaler.Po \ + ppm_downscaler/$(DEPDIR)/yuv_downscaler.Po \ + profiling/$(DEPDIR)/profiling.Po \ + profiling_tool/$(DEPDIR)/libprofiling_tool.Plo \ + reductions/$(DEPDIR)/dot_product.Po \ + reductions/$(DEPDIR)/minmax_reduction.Po \ + sched_ctx/$(DEPDIR)/dummy_sched_with_ctx.Po \ + sched_ctx/$(DEPDIR)/gpu_partition.Po \ + sched_ctx/$(DEPDIR)/nested_sched_ctxs-nested_sched_ctxs.Po \ + sched_ctx/$(DEPDIR)/parallel_code-parallel_code.Po \ + sched_ctx/$(DEPDIR)/parallel_tasks_reuse_handle-parallel_tasks_reuse_handle.Po \ + sched_ctx/$(DEPDIR)/prio.Po sched_ctx/$(DEPDIR)/sched_ctx.Po \ + sched_ctx/$(DEPDIR)/sched_ctx_delete.Po \ + sched_ctx/$(DEPDIR)/sched_ctx_empty.Po \ + sched_ctx/$(DEPDIR)/sched_ctx_remove.Po \ + sched_ctx/$(DEPDIR)/sched_ctx_without_sched_policy-sched_ctx_without_sched_policy.Po \ + sched_ctx/$(DEPDIR)/sched_ctx_without_sched_policy_awake.Po \ + sched_ctx/$(DEPDIR)/two_cpu_contexts.Po \ + sched_ctx_utils/$(DEPDIR)/sched_ctx_utils.Po \ + scheduler/$(DEPDIR)/dummy_modular_sched.Po \ + scheduler/$(DEPDIR)/dummy_sched.Po \ + scheduler/$(DEPDIR)/heteroprio_test.Po \ + scheduler/$(DEPDIR)/libdummy_sched.Plo \ + spmd/$(DEPDIR)/vector_scal_spmd.Po \ + spmv/$(DEPDIR)/dw_block_spmv.Po \ + spmv/$(DEPDIR)/dw_block_spmv_kernels.Po spmv/$(DEPDIR)/spmv.Po \ + spmv/$(DEPDIR)/spmv_kernels.Po \ + spmv/matrix_market/$(DEPDIR)/mm_to_bcsr.Po \ + spmv/matrix_market/$(DEPDIR)/mmio.Po \ + subgraphs/$(DEPDIR)/codelets.Po subgraphs/$(DEPDIR)/manual.Po \ + subgraphs/$(DEPDIR)/partition.Po subgraphs/$(DEPDIR)/plan.Po \ + tag_example/$(DEPDIR)/tag_example.Po \ + tag_example/$(DEPDIR)/tag_example2.Po \ + tag_example/$(DEPDIR)/tag_example3.Po \ + tag_example/$(DEPDIR)/tag_example4.Po \ + tag_example/$(DEPDIR)/tag_restartable.Po \ + transactions/$(DEPDIR)/trs_inc.Po \ + transactions/$(DEPDIR)/trs_sgemm.Po \ + worker_collections/$(DEPDIR)/worker_list_example.Po \ + worker_collections/$(DEPDIR)/worker_tree_example.Po +am__mv = mv -f +PPF77COMPILE = $(F77) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FFLAGS) $(FFLAGS) +LTPPF77COMPILE = $(LIBTOOL) $(AM_V_lt) --tag=F77 $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(F77) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_FFLAGS) $(FFLAGS) +AM_V_PPF77 = $(am__v_PPF77_@AM_V@) +am__v_PPF77_ = $(am__v_PPF77_@AM_DEFAULT_V@) +am__v_PPF77_0 = @echo " PPF77 " $@; +am__v_PPF77_1 = +F77LD = $(F77) +F77LINK = $(LIBTOOL) $(AM_V_lt) --tag=F77 $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(F77LD) $(AM_FFLAGS) $(FFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_F77LD = $(am__v_F77LD_@AM_V@) +am__v_F77LD_ = $(am__v_F77LD_@AM_DEFAULT_V@) +am__v_F77LD_0 = @echo " F77LD " $@; +am__v_F77LD_1 = +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +CXXCOMPILE = $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) +LTCXXCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CXXFLAGS) $(CXXFLAGS) +AM_V_CXX = $(am__v_CXX_@AM_V@) +am__v_CXX_ = $(am__v_CXX_@AM_DEFAULT_V@) +am__v_CXX_0 = @echo " CXX " $@; +am__v_CXX_1 = +CXXLD = $(CXX) +CXXLINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CXXLD) $(AM_CXXFLAGS) \ + $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CXXLD = $(am__v_CXXLD_@AM_V@) +am__v_CXXLD_ = $(am__v_CXXLD_@AM_DEFAULT_V@) +am__v_CXXLD_0 = @echo " CXXLD " $@; +am__v_CXXLD_1 = +FCCOMPILE = $(FC) $(AM_FCFLAGS) $(FCFLAGS) +LTFCCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(FC) $(AM_FCFLAGS) $(FCFLAGS) +AM_V_FC = $(am__v_FC_@AM_V@) +am__v_FC_ = $(am__v_FC_@AM_DEFAULT_V@) +am__v_FC_0 = @echo " FC " $@; +am__v_FC_1 = +FCLD = $(FC) +FCLINK = $(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(FCLD) $(AM_FCFLAGS) $(FCFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_FCLD = $(am__v_FCLD_@AM_V@) +am__v_FCLD_ = $(am__v_FCLD_@AM_DEFAULT_V@) +am__v_FCLD_0 = @echo " FCLD " $@; +am__v_FCLD_1 = +SOURCES = cholesky/libmy_dmda.c profiling_tool/libprofiling_tool.c \ + scheduler/libdummy_sched.c api/bcsr_data_interface.c \ + api/block_data_interface.c api/coo_data_interface.c \ + api/csr_data_interface.c api/matrix_data_interface.c \ + api/multiformat_data_interface.c api/tensor_data_interface.c \ + api/variable_data_interface.c api/vector_data_interface.c \ + api/void_data_interface.c $(axpy_axpy_SOURCES) \ + $(basic_examples_block_SOURCES) \ + basic_examples/dynamic_handles.c basic_examples/hello_world.c \ + basic_examples/hooks.c $(basic_examples_mult_SOURCES) \ + $(basic_examples_multiformat_SOURCES) basic_examples/ndim.c \ + basic_examples/task_insert_color.c basic_examples/topology.c \ + $(basic_examples_variable_SOURCES) \ + $(basic_examples_vector_scal_SOURCES) \ + $(basic_examples_vector_scal_fortran_SOURCES) \ + $(binary_binary_SOURCES) callback/callback.c \ + callback/prologue.c $(cg_cg_SOURCES) \ + $(cholesky_cholesky_compil_SOURCES) \ + $(cholesky_cholesky_grain_tag_SOURCES) \ + $(cholesky_cholesky_implicit_SOURCES) \ + $(cholesky_cholesky_tag_SOURCES) \ + $(cholesky_cholesky_tile_tag_SOURCES) \ + $(cpp_add_vectors_SOURCES) $(cpp_add_vectors_cpp11_SOURCES) \ + $(cpp_add_vectors_interface_SOURCES) \ + $(cpp_incrementer_cpp_SOURCES) \ + dependency/sequential_consistency.c dependency/task_end_dep.c \ + dependency/task_end_dep_add.c filters/alloc.c \ + $(filters_custom_mf_custom_mf_filter_SOURCES) \ + $(filters_fblock_SOURCES) \ + $(filters_fblock_pick_matrix_SOURCES) \ + $(filters_fblock_pick_variable_SOURCES) \ + $(filters_fmatrix_SOURCES) \ + $(filters_fmatrix_pick_variable_SOURCES) \ + $(filters_fmatrix_pick_vector_SOURCES) \ + $(filters_fmultiple_manual_SOURCES) \ + $(filters_fmultiple_submit_SOURCES) \ + $(filters_fmultiple_submit_implicit_SOURCES) \ + $(filters_fmultiple_submit_readonly_SOURCES) \ + $(filters_fmultiple_submit_readonly_downgrade_SOURCES) \ + $(filters_fndim_SOURCES) filters/fndim_1d_pick_variable.c \ + $(filters_fndim_2d_pick_vector_SOURCES) \ + $(filters_fndim_3d_pick_matrix_SOURCES) \ + $(filters_fndim_4d_pick_block_SOURCES) \ + $(filters_fndim_5d_pick_tensor_SOURCES) \ + $(filters_fndim_pick_ndim_SOURCES) \ + $(filters_fndim_pick_variable_SOURCES) \ + $(filters_fndim_to_block_SOURCES) \ + $(filters_fndim_to_matrix_SOURCES) \ + $(filters_fndim_to_tensor_SOURCES) filters/fndim_to_variable.c \ + $(filters_fndim_to_vector_SOURCES) filters/fread.c \ + filters/frecursive.c $(filters_ftensor_SOURCES) \ + $(filters_ftensor_pick_block_SOURCES) \ + $(filters_ftensor_pick_variable_SOURCES) \ + $(filters_fvector_SOURCES) filters/fvector_pick_variable.c \ + filters/shadow.c filters/shadow2d.c filters/shadow3d.c \ + filters/shadow4d.c filters/shadownd.c $(fortran_hello_SOURCES) \ + $(fortran90_f90_example_SOURCES) gl_interop/gl_interop.c \ + gl_interop/gl_interop_idle.c $(heat_heat_SOURCES) \ + $(incrementer_incrementer_SOURCES) \ + $(interface_complex_SOURCES) \ + $(interface_complex_dev_handle_complex_dev_handle_SOURCES) \ + loader.c $(lu_lu_example_complex_double_SOURCES) \ + $(lu_lu_example_complex_float_SOURCES) \ + $(lu_lu_example_double_SOURCES) $(lu_lu_example_float_SOURCES) \ + $(lu_lu_implicit_example_complex_double_SOURCES) \ + $(lu_lu_implicit_example_complex_float_SOURCES) \ + $(lu_lu_implicit_example_double_SOURCES) \ + $(lu_lu_implicit_example_float_SOURCES) \ + mandelbrot/mandelbrot.c matvecmult/matvecmult.c mlr/mlr.c \ + $(mult_dgemm_SOURCES) $(mult_dgemm_layout_SOURCES) \ + $(mult_sgemm_SOURCES) $(mult_sgemm_layout_SOURCES) \ + $(native_fortran_nf_dynbuf_SOURCES) \ + $(native_fortran_nf_example_SOURCES) \ + $(native_fortran_nf_matrix_SOURCES) \ + $(native_fortran_nf_partition_SOURCES) \ + $(native_fortran_nf_sched_ctx_SOURCES) \ + $(native_fortran_nf_varbuf_SOURCES) \ + $(native_fortran_nf_vector_SOURCES) openmp/vector_scal_omp.c \ + parallel_workers/parallel_workers.c \ + parallel_workers/parallel_workers_func.c \ + parallel_workers/parallel_workers_oldapi.c \ + perf_monitoring/perf_counters_01.c \ + perf_monitoring/perf_counters_02.c \ + perf_steering/perf_knobs_01.c perf_steering/perf_knobs_02.c \ + perf_steering/perf_knobs_03.c $(pi_pi_SOURCES) \ + $(pi_pi_redux_SOURCES) $(pipeline_pipeline_SOURCES) \ + ppm_downscaler/ppm_downscaler.c \ + ppm_downscaler/yuv_downscaler.c profiling/profiling.c \ + $(reductions_dot_product_SOURCES) \ + reductions/minmax_reduction.c sched_ctx/dummy_sched_with_ctx.c \ + $(sched_ctx_gpu_partition_SOURCES) \ + sched_ctx/nested_sched_ctxs.c sched_ctx/parallel_code.c \ + sched_ctx/parallel_tasks_reuse_handle.c sched_ctx/prio.c \ + sched_ctx/sched_ctx.c sched_ctx/sched_ctx_delete.c \ + sched_ctx/sched_ctx_empty.c sched_ctx/sched_ctx_remove.c \ + sched_ctx/sched_ctx_without_sched_policy.c \ + sched_ctx/sched_ctx_without_sched_policy_awake.c \ + sched_ctx/two_cpu_contexts.c scheduler/dummy_modular_sched.c \ + scheduler/dummy_sched.c scheduler/heteroprio_test.c \ + $(spmd_vector_scal_spmd_SOURCES) $(spmv_dw_block_spmv_SOURCES) \ + $(spmv_spmv_SOURCES) $(subgraphs_manual_SOURCES) \ + $(subgraphs_partition_SOURCES) $(subgraphs_plan_SOURCES) \ + tag_example/tag_example.c tag_example/tag_example2.c \ + tag_example/tag_example3.c tag_example/tag_example4.c \ + tag_example/tag_restartable.c transactions/trs_inc.c \ + $(transactions_trs_sgemm_SOURCES) \ + worker_collections/worker_list_example.c \ + worker_collections/worker_tree_example.c +DIST_SOURCES = cholesky/libmy_dmda.c \ + profiling_tool/libprofiling_tool.c scheduler/libdummy_sched.c \ + api/bcsr_data_interface.c api/block_data_interface.c \ + api/coo_data_interface.c api/csr_data_interface.c \ + api/matrix_data_interface.c api/multiformat_data_interface.c \ + api/tensor_data_interface.c api/variable_data_interface.c \ + api/vector_data_interface.c api/void_data_interface.c \ + $(am__axpy_axpy_SOURCES_DIST) \ + $(am__basic_examples_block_SOURCES_DIST) \ + basic_examples/dynamic_handles.c basic_examples/hello_world.c \ + basic_examples/hooks.c $(am__basic_examples_mult_SOURCES_DIST) \ + $(am__basic_examples_multiformat_SOURCES_DIST) \ + basic_examples/ndim.c basic_examples/task_insert_color.c \ + basic_examples/topology.c \ + $(am__basic_examples_variable_SOURCES_DIST) \ + $(am__basic_examples_vector_scal_SOURCES_DIST) \ + $(am__basic_examples_vector_scal_fortran_SOURCES_DIST) \ + $(am__binary_binary_SOURCES_DIST) callback/callback.c \ + callback/prologue.c $(am__cg_cg_SOURCES_DIST) \ + $(am__cholesky_cholesky_compil_SOURCES_DIST) \ + $(am__cholesky_cholesky_grain_tag_SOURCES_DIST) \ + $(am__cholesky_cholesky_implicit_SOURCES_DIST) \ + $(am__cholesky_cholesky_tag_SOURCES_DIST) \ + $(am__cholesky_cholesky_tile_tag_SOURCES_DIST) \ + $(cpp_add_vectors_SOURCES) \ + $(am__cpp_add_vectors_cpp11_SOURCES_DIST) \ + $(cpp_add_vectors_interface_SOURCES) \ + $(am__cpp_incrementer_cpp_SOURCES_DIST) \ + dependency/sequential_consistency.c dependency/task_end_dep.c \ + dependency/task_end_dep_add.c filters/alloc.c \ + $(am__filters_custom_mf_custom_mf_filter_SOURCES_DIST) \ + $(am__filters_fblock_SOURCES_DIST) \ + $(am__filters_fblock_pick_matrix_SOURCES_DIST) \ + $(am__filters_fblock_pick_variable_SOURCES_DIST) \ + $(am__filters_fmatrix_SOURCES_DIST) \ + $(am__filters_fmatrix_pick_variable_SOURCES_DIST) \ + $(am__filters_fmatrix_pick_vector_SOURCES_DIST) \ + $(am__filters_fmultiple_manual_SOURCES_DIST) \ + $(am__filters_fmultiple_submit_SOURCES_DIST) \ + $(am__filters_fmultiple_submit_implicit_SOURCES_DIST) \ + $(am__filters_fmultiple_submit_readonly_SOURCES_DIST) \ + $(am__filters_fmultiple_submit_readonly_downgrade_SOURCES_DIST) \ + $(am__filters_fndim_SOURCES_DIST) \ + filters/fndim_1d_pick_variable.c \ + $(am__filters_fndim_2d_pick_vector_SOURCES_DIST) \ + $(am__filters_fndim_3d_pick_matrix_SOURCES_DIST) \ + $(am__filters_fndim_4d_pick_block_SOURCES_DIST) \ + $(am__filters_fndim_5d_pick_tensor_SOURCES_DIST) \ + $(am__filters_fndim_pick_ndim_SOURCES_DIST) \ + $(filters_fndim_pick_variable_SOURCES) \ + $(am__filters_fndim_to_block_SOURCES_DIST) \ + $(am__filters_fndim_to_matrix_SOURCES_DIST) \ + $(am__filters_fndim_to_tensor_SOURCES_DIST) \ + filters/fndim_to_variable.c \ + $(am__filters_fndim_to_vector_SOURCES_DIST) filters/fread.c \ + filters/frecursive.c $(am__filters_ftensor_SOURCES_DIST) \ + $(am__filters_ftensor_pick_block_SOURCES_DIST) \ + $(am__filters_ftensor_pick_variable_SOURCES_DIST) \ + $(am__filters_fvector_SOURCES_DIST) \ + filters/fvector_pick_variable.c filters/shadow.c \ + filters/shadow2d.c filters/shadow3d.c filters/shadow4d.c \ + filters/shadownd.c $(am__fortran_hello_SOURCES_DIST) \ + $(am__fortran90_f90_example_SOURCES_DIST) \ + gl_interop/gl_interop.c gl_interop/gl_interop_idle.c \ + $(am__heat_heat_SOURCES_DIST) \ + $(am__incrementer_incrementer_SOURCES_DIST) \ + $(am__interface_complex_SOURCES_DIST) \ + $(am__interface_complex_dev_handle_complex_dev_handle_SOURCES_DIST) \ + loader.c $(am__lu_lu_example_complex_double_SOURCES_DIST) \ + $(am__lu_lu_example_complex_float_SOURCES_DIST) \ + $(am__lu_lu_example_double_SOURCES_DIST) \ + $(am__lu_lu_example_float_SOURCES_DIST) \ + $(am__lu_lu_implicit_example_complex_double_SOURCES_DIST) \ + $(am__lu_lu_implicit_example_complex_float_SOURCES_DIST) \ + $(am__lu_lu_implicit_example_double_SOURCES_DIST) \ + $(am__lu_lu_implicit_example_float_SOURCES_DIST) \ + mandelbrot/mandelbrot.c matvecmult/matvecmult.c mlr/mlr.c \ + $(am__mult_dgemm_SOURCES_DIST) \ + $(am__mult_dgemm_layout_SOURCES_DIST) \ + $(am__mult_sgemm_SOURCES_DIST) \ + $(am__mult_sgemm_layout_SOURCES_DIST) \ + $(am__native_fortran_nf_dynbuf_SOURCES_DIST) \ + $(am__native_fortran_nf_example_SOURCES_DIST) \ + $(am__native_fortran_nf_matrix_SOURCES_DIST) \ + $(am__native_fortran_nf_partition_SOURCES_DIST) \ + $(am__native_fortran_nf_sched_ctx_SOURCES_DIST) \ + $(am__native_fortran_nf_varbuf_SOURCES_DIST) \ + $(am__native_fortran_nf_vector_SOURCES_DIST) \ + openmp/vector_scal_omp.c parallel_workers/parallel_workers.c \ + parallel_workers/parallel_workers_func.c \ + parallel_workers/parallel_workers_oldapi.c \ + perf_monitoring/perf_counters_01.c \ + perf_monitoring/perf_counters_02.c \ + perf_steering/perf_knobs_01.c perf_steering/perf_knobs_02.c \ + perf_steering/perf_knobs_03.c $(am__pi_pi_SOURCES_DIST) \ + $(am__pi_pi_redux_SOURCES_DIST) \ + $(am__pipeline_pipeline_SOURCES_DIST) \ + ppm_downscaler/ppm_downscaler.c \ + ppm_downscaler/yuv_downscaler.c profiling/profiling.c \ + $(am__reductions_dot_product_SOURCES_DIST) \ + reductions/minmax_reduction.c sched_ctx/dummy_sched_with_ctx.c \ + $(am__sched_ctx_gpu_partition_SOURCES_DIST) \ + sched_ctx/nested_sched_ctxs.c sched_ctx/parallel_code.c \ + sched_ctx/parallel_tasks_reuse_handle.c sched_ctx/prio.c \ + sched_ctx/sched_ctx.c sched_ctx/sched_ctx_delete.c \ + sched_ctx/sched_ctx_empty.c sched_ctx/sched_ctx_remove.c \ + sched_ctx/sched_ctx_without_sched_policy.c \ + sched_ctx/sched_ctx_without_sched_policy_awake.c \ + sched_ctx/two_cpu_contexts.c scheduler/dummy_modular_sched.c \ + scheduler/dummy_sched.c scheduler/heteroprio_test.c \ + $(spmd_vector_scal_spmd_SOURCES) $(spmv_dw_block_spmv_SOURCES) \ + $(am__spmv_spmv_SOURCES_DIST) $(subgraphs_manual_SOURCES) \ + $(subgraphs_partition_SOURCES) $(subgraphs_plan_SOURCES) \ + tag_example/tag_example.c tag_example/tag_example2.c \ + tag_example/tag_example3.c tag_example/tag_example4.c \ + tag_example/tag_restartable.c transactions/trs_inc.c \ + $(am__transactions_trs_sgemm_SOURCES_DIST) \ + worker_collections/worker_list_example.c \ + worker_collections/worker_tree_example.c +RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \ + ctags-recursive dvi-recursive html-recursive info-recursive \ + install-data-recursive install-dvi-recursive \ + install-exec-recursive install-html-recursive \ + install-info-recursive install-pdf-recursive \ + install-ps-recursive install-recursive installcheck-recursive \ + installdirs-recursive pdf-recursive ps-recursive \ + tags-recursive uninstall-recursive +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +DATA = $(nobase_STARPU_OPENCL_DATA_DATA) +HEADERS = $(noinst_HEADERS) +RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ + distclean-recursive maintainer-clean-recursive +am__recursive_targets = \ + $(RECURSIVE_TARGETS) \ + $(RECURSIVE_CLEAN_TARGETS) \ + $(am__extra_recursive_targets) +AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \ + check recheck distdir distdir-am +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +am__tty_colors_dummy = \ + mgn= red= grn= lgn= blu= brg= std=; \ + am__color_tests=no +am__tty_colors = { \ + $(am__tty_colors_dummy); \ + if test "X$(AM_COLOR_TESTS)" = Xno; then \ + am__color_tests=no; \ + elif test "X$(AM_COLOR_TESTS)" = Xalways; then \ + am__color_tests=yes; \ + elif test "X$$TERM" != Xdumb && { test -t 1; } 2>/dev/null; then \ + am__color_tests=yes; \ + fi; \ + if test $$am__color_tests = yes; then \ + red=''; \ + grn=''; \ + lgn=''; \ + blu=''; \ + mgn=''; \ + brg=''; \ + std=''; \ + fi; \ +} +am__recheck_rx = ^[ ]*:recheck:[ ]* +am__global_test_result_rx = ^[ ]*:global-test-result:[ ]* +am__copy_in_global_log_rx = ^[ ]*:copy-in-global-log:[ ]* +# A command that, given a newline-separated list of test names on the +# standard input, print the name of the tests that are to be re-run +# upon "make recheck". +am__list_recheck_tests = $(AWK) '{ \ + recheck = 1; \ + while ((rc = (getline line < ($$0 ".trs"))) != 0) \ + { \ + if (rc < 0) \ + { \ + if ((getline line2 < ($$0 ".log")) < 0) \ + recheck = 0; \ + break; \ + } \ + else if (line ~ /$(am__recheck_rx)[nN][Oo]/) \ + { \ + recheck = 0; \ + break; \ + } \ + else if (line ~ /$(am__recheck_rx)[yY][eE][sS]/) \ + { \ + break; \ + } \ + }; \ + if (recheck) \ + print $$0; \ + close ($$0 ".trs"); \ + close ($$0 ".log"); \ +}' +# A command that, given a newline-separated list of test names on the +# standard input, create the global log from their .trs and .log files. +am__create_global_log = $(AWK) ' \ +function fatal(msg) \ +{ \ + print "fatal: making $@: " msg | "cat >&2"; \ + exit 1; \ +} \ +function rst_section(header) \ +{ \ + print header; \ + len = length(header); \ + for (i = 1; i <= len; i = i + 1) \ + printf "="; \ + printf "\n\n"; \ +} \ +{ \ + copy_in_global_log = 1; \ + global_test_result = "RUN"; \ + while ((rc = (getline line < ($$0 ".trs"))) != 0) \ + { \ + if (rc < 0) \ + fatal("failed to read from " $$0 ".trs"); \ + if (line ~ /$(am__global_test_result_rx)/) \ + { \ + sub("$(am__global_test_result_rx)", "", line); \ + sub("[ ]*$$", "", line); \ + global_test_result = line; \ + } \ + else if (line ~ /$(am__copy_in_global_log_rx)[nN][oO]/) \ + copy_in_global_log = 0; \ + }; \ + if (copy_in_global_log) \ + { \ + rst_section(global_test_result ": " $$0); \ + while ((rc = (getline line < ($$0 ".log"))) != 0) \ + { \ + if (rc < 0) \ + fatal("failed to read from " $$0 ".log"); \ + print line; \ + }; \ + printf "\n"; \ + }; \ + close ($$0 ".trs"); \ + close ($$0 ".log"); \ +}' +# Restructured Text title. +am__rst_title = { sed 's/.*/ & /;h;s/./=/g;p;x;s/ *$$//;p;g' && echo; } +# Solaris 10 'make', and several other traditional 'make' implementations, +# pass "-e" to $(SHELL), and POSIX 2008 even requires this. Work around it +# by disabling -e (using the XSI extension "set +e") if it's set. +am__sh_e_setup = case $$- in *e*) set +e;; esac +# Default flags passed to test drivers. +am__common_driver_flags = \ + --color-tests "$$am__color_tests" \ + --enable-hard-errors "$$am__enable_hard_errors" \ + --expect-failure "$$am__expect_failure" +# To be inserted before the command running the test. Creates the +# directory for the log if needed. Stores in $dir the directory +# containing $f, in $tst the test, in $log the log. Executes the +# developer- defined test setup AM_TESTS_ENVIRONMENT (if any), and +# passes TESTS_ENVIRONMENT. Set up options for the wrapper that +# will run the test scripts (or their associated LOG_COMPILER, if +# thy have one). +am__check_pre = \ +$(am__sh_e_setup); \ +$(am__vpath_adj_setup) $(am__vpath_adj) \ +$(am__tty_colors); \ +srcdir=$(srcdir); export srcdir; \ +case "$@" in \ + */*) am__odir=`echo "./$@" | sed 's|/[^/]*$$||'`;; \ + *) am__odir=.;; \ +esac; \ +test "x$$am__odir" = x"." || test -d "$$am__odir" \ + || $(MKDIR_P) "$$am__odir" || exit $$?; \ +if test -f "./$$f"; then dir=./; \ +elif test -f "$$f"; then dir=; \ +else dir="$(srcdir)/"; fi; \ +tst=$$dir$$f; log='$@'; \ +if test -n '$(DISABLE_HARD_ERRORS)'; then \ + am__enable_hard_errors=no; \ +else \ + am__enable_hard_errors=yes; \ +fi; \ +case " $(XFAIL_TESTS) " in \ + *[\ \ ]$$f[\ \ ]* | *[\ \ ]$$dir$$f[\ \ ]*) \ + am__expect_failure=yes;; \ + *) \ + am__expect_failure=no;; \ +esac; \ +$(AM_TESTS_ENVIRONMENT) $(TESTS_ENVIRONMENT) +# A shell command to get the names of the tests scripts with any registered +# extension removed (i.e., equivalently, the names of the test logs, with +# the '.log' extension removed). The result is saved in the shell variable +# '$bases'. This honors runtime overriding of TESTS and TEST_LOGS. Sadly, +# we cannot use something simpler, involving e.g., "$(TEST_LOGS:.log=)", +# since that might cause problem with VPATH rewrites for suffix-less tests. +# See also 'test-harness-vpath-rewrite.sh' and 'test-trs-basic.sh'. +am__set_TESTS_bases = \ + bases='$(TEST_LOGS)'; \ + bases=`for i in $$bases; do echo $$i; done | sed 's/\.log$$//'`; \ + bases=`echo $$bases` +AM_TESTSUITE_SUMMARY_HEADER = ' for $(PACKAGE_STRING)' +RECHECK_LOGS = $(TEST_LOGS) +TEST_SUITE_LOG = test-suite.log +TEST_EXTENSIONS = @EXEEXT@ .test +LOG_DRIVER = $(SHELL) $(top_srcdir)/build-aux/test-driver +LOG_COMPILE = $(LOG_COMPILER) $(AM_LOG_FLAGS) $(LOG_FLAGS) +am__set_b = \ + case '$@' in \ + */*) \ + case '$*' in \ + */*) b='$*';; \ + *) b=`echo '$@' | sed 's/\.log$$//'`; \ + esac;; \ + *) \ + b='$*';; \ + esac +am__test_logs1 = $(TESTS:=.log) +am__test_logs2 = $(am__test_logs1:@EXEEXT@.log=.log) +TEST_LOGS = $(am__test_logs2:.test.log=.log) +TEST_LOG_DRIVER = $(SHELL) $(top_srcdir)/build-aux/test-driver +TEST_LOG_COMPILE = $(TEST_LOG_COMPILER) $(AM_TEST_LOG_FLAGS) \ + $(TEST_LOG_FLAGS) +DIST_SUBDIRS = $(SUBDIRS) +am__DIST_COMMON = $(srcdir)/Makefile.in \ + $(top_srcdir)/build-aux/depcomp \ + $(top_srcdir)/build-aux/test-driver \ + $(top_srcdir)/make/starpu-loader.mk \ + $(top_srcdir)/make/starpu-tests.mk \ + $(top_srcdir)/make/starpu.mk +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +am__relativize = \ + dir0=`pwd`; \ + sed_first='s,^\([^/]*\)/.*$$,\1,'; \ + sed_rest='s,^[^/]*/*,,'; \ + sed_last='s,^.*/\([^/]*\)$$,\1,'; \ + sed_butlast='s,/*[^/]*$$,,'; \ + while test -n "$$dir1"; do \ + first=`echo "$$dir1" | sed -e "$$sed_first"`; \ + if test "$$first" != "."; then \ + if test "$$first" = ".."; then \ + dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ + dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ + else \ + first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ + if test "$$first2" = "$$first"; then \ + dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ + else \ + dir2="../$$dir2"; \ + fi; \ + dir0="$$dir0"/"$$first"; \ + fi; \ + fi; \ + dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ + done; \ + reldir="$$dir2" +pkglibdir = @pkglibdir@ +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +APP_CFLAGS = @APP_CFLAGS@ +APP_CXXFLAGS = @APP_CXXFLAGS@ +APP_FCFLAGS = @APP_FCFLAGS@ +APP_FFLAGS = @APP_FFLAGS@ +AR = @AR@ +AS = @AS@ +ATLASDIR = @ATLASDIR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BLAS_LIB = @BLAS_LIB@ +BLAS_LIBS = @BLAS_LIBS@ +BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ +BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CC_OR_MPICC = @CC_OR_MPICC@ +CC_OR_NVCC = @CC_OR_NVCC@ +CFLAGS = @CFLAGS@ +COVERAGE = @COVERAGE@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CSCOPE = @CSCOPE@ +CTAGS = @CTAGS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DGELS_LIBS = @DGELS_LIBS@ +DLB_CFLAGS = @DLB_CFLAGS@ +DLB_LIBS = @DLB_LIBS@ +DLLTOOL = @DLLTOOL@ +DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +ECLIPSE = @ECLIPSE@ +EGREP = @EGREP@ +ETAGS = @ETAGS@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FC = @FC@ +FCFLAGS = @FCFLAGS@ +FFLAGS = @FFLAGS@ +FFTWF_CFLAGS = @FFTWF_CFLAGS@ +FFTWF_LIBS = @FFTWF_LIBS@ +FFTWL_CFLAGS = @FFTWL_CFLAGS@ +FFTWL_LIBS = @FFTWL_LIBS@ +FFTW_CFLAGS = @FFTW_CFLAGS@ +FFTW_LIBS = @FFTW_LIBS@ +FGREP = @FGREP@ +FILECMD = @FILECMD@ +FXTDIR = @FXTDIR@ +FXT_CFLAGS = @FXT_CFLAGS@ +FXT_LDFLAGS = @FXT_LDFLAGS@ +FXT_LIBS = @FXT_LIBS@ +GDB = @GDB@ +GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ +GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ +GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ +GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ +GOTODIR = @GOTODIR@ +GREP = @GREP@ +HAVE_CXX11 = @HAVE_CXX11@ +HAVE_FFTWFL = @HAVE_FFTWFL@ +HELP2MAN = @HELP2MAN@ +HIPCC = @HIPCC@ +HIPCCFLAGS = @HIPCCFLAGS@ $(am__append_2) +HIPCONFIG = @HIPCONFIG@ +HWLOC_CFLAGS = @HWLOC_CFLAGS@ +HWLOC_LIBS = @HWLOC_LIBS@ +HWLOC_REQUIRES = @HWLOC_REQUIRES@ +ICC = @ICC@ +ICC_ARGS = @ICC_ARGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JULIA = @JULIA@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ $(top_builddir)/src/@LIBSTARPU_LINK@ \ + $(STARPU_EXPORTED_LIBS) $(STARPU_OPENCL_LDFLAGS) \ + $(STARPU_CUDA_LDFLAGS) $(STARPU_HIP_LDFLAGS) $(MAGMA_LIBS) +LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ +LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ +LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ +LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ +LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ +LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ +LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ +LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ +LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ +LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ +LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ +LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ +LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ +LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ +LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ +LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ +LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ +LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ +LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ +LIBSTARPU_LINK = @LIBSTARPU_LINK@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAGMA_CFLAGS = @MAGMA_CFLAGS@ +MAGMA_LIBS = @MAGMA_LIBS@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPICC_LDFLAGS = @MPICC_LDFLAGS@ +MPICXX = @MPICXX@ +MPIEXEC = @MPIEXEC@ +MPIEXEC_ARGS = @MPIEXEC_ARGS@ +MPIFORT = @MPIFORT@ +MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ +MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ +NM = @NM@ +NMAD_CFLAGS = @NMAD_CFLAGS@ +NMAD_LIBS = @NMAD_LIBS@ +NMEDIT = @NMEDIT@ +NVCC = @NVCC@ +NVCCFLAGS = @NVCCFLAGS@ $(am__append_1) +NVCC_CC = @NVCC_CC@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ +OPENBLAS_LIBS = @OPENBLAS_LIBS@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PAPI_CFLAGS = @PAPI_CFLAGS@ +PAPI_LIBS = @PAPI_LIBS@ +PARALLEL = @PARALLEL@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PKG_CONFIG = @PKG_CONFIG@ +POTI_CFLAGS = @POTI_CFLAGS@ +POTI_LIBS = @POTI_LIBS@ +PROG_CLANG = @PROG_CLANG@ +PROG_DATE = @PROG_DATE@ +PROG_FIND = @PROG_FIND@ +PROG_STAT = @PROG_STAT@ +PYTHON = @PYTHON@ +PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ +PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ +PYTHON_VERSION = @PYTHON_VERSION@ +RANLIB = @RANLIB@ +REALBASH = @REALBASH@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ +SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ +SIMGRID_LIBS = @SIMGRID_LIBS@ +SIMGRID_MC = @SIMGRID_MC@ +SLIC_CONFIG = @SLIC_CONFIG@ +SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ +SOCL_VENDORS = @SOCL_VENDORS@ +STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ +STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ +STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ +STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ +STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ +STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ +STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ +STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ +STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ +STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ +STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ +STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ +STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ +STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ +STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ +STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ +STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ +STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ +STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ +STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ +STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ +STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ +STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ +STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ +STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ +STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ +STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ +STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ +STARPU_LIB_PATH = @STARPU_LIB_PATH@ +STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ +STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ +STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ +STARPU_MS_LIB = @STARPU_MS_LIB@ +STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ +STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ +STARPU_OPENBLAS = @STARPU_OPENBLAS@ +STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ +STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ +STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ +STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ +STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ +STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ +STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ +STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ +STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ +STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ +STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ +STARPU_SRC_DIR = @STARPU_SRC_DIR@ +STARPU_USE_CPU = @STARPU_USE_CPU@ +STARPU_USE_CUDA = @STARPU_USE_CUDA@ +STARPU_USE_FXT = @STARPU_USE_FXT@ +STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ +STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ +STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ +STRIP = @STRIP@ +VERSION = @VERSION@ +XMKMF = @XMKMF@ +X_CFLAGS = @X_CFLAGS@ +X_EXTRA_LIBS = @X_EXTRA_LIBS@ +X_LIBS = @X_LIBS@ +X_PRE_LIBS = @X_PRE_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_ct_F77 = @ac_ct_F77@ +ac_ct_FC = @ac_ct_FC@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +doxygencommand = @doxygencommand@ +dvidir = @dvidir@ +eclipsepath = @eclipsepath@ +epstopdfcommand = @epstopdfcommand@ +exec_prefix = @exec_prefix@ +gitcommand = @gitcommand@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +hwloccalccommand = @hwloccalccommand@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +juliapath = @juliapath@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +mpicc_path = @mpicc_path@ +mpicxx_path = @mpicxx_path@ +mpiexec_path = @mpiexec_path@ +mpifort_path = @mpifort_path@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +pdflatexcommand = @pdflatexcommand@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +runstatedir = @runstatedir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target = @target@ +target_alias = @target_alias@ +target_cpu = @target_cpu@ +target_os = @target_os@ +target_vendor = @target_vendor@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +LAUNCHER_ENV = $(am__append_4) $(am__append_6) +LAUNCHER = $(am__append_3) $(am__append_5) +AM_CFLAGS = $(GLOBAL_AM_CFLAGS) $(MAGMA_CFLAGS) $(APP_CFLAGS) +AM_CXXFLAGS = $(GLOBAL_AM_CXXFLAGS) $(MAGMA_CFLAGS) $(APP_CXXFLAGS) +AM_FFLAGS = $(GLOBAL_AM_FFLAGS) $(MAGMA_CFLAGS) $(APP_FFLAGS) +AM_FCFLAGS = $(GLOBAL_AM_FCFLAGS) $(MAGMA_CFLAGS) $(APP_FCFLAGS) +@STARPU_USE_CUDA_TRUE@V_nvcc_ = $(V_nvcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_USE_CUDA_TRUE@V_nvcc_0 = @echo " NVCC " $@; +@STARPU_USE_CUDA_TRUE@V_nvcc_1 = +@STARPU_USE_CUDA_TRUE@V_nvcc = $(V_nvcc_$(V)) + +# Avoid using nvcc when making a coverity build, nvcc produces millions of +# lines of code which we don't want to analyze. Instead, build dumb .o files +# containing empty functions. +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_ = $(V_mynvcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_0 = @echo " myNVCC " $@; +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_1 = +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc = $(V_mynvcc_$(V)) +@STARPU_USE_HIP_TRUE@V_hipcc_ = $(V_hipcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_USE_HIP_TRUE@V_hipcc_0 = @echo " HIPCC " $@; +@STARPU_USE_HIP_TRUE@V_hipcc_1 = +@STARPU_USE_HIP_TRUE@V_hipcc = $(V_hipcc_$(V)) +V_icc_ = $(V_icc_$(AM_DEFAULT_VERBOSITY)) +V_icc_0 = @echo " ICC " $@; +V_icc_1 = +V_icc = $(V_icc_$(V)) +V_ln_ = $(V_ln_$(AM_DEFAULT_VERBOSITY)) +V_ln_0 = @echo " LN " $@; +V_ln_1 = +V_ln = $(V_ln_$(V)) +V_help2man_ = $(V_help2man_$(AM_DEFAULT_VERBOSITY)) +V_help2man_0 = @echo " HELP2MAN" $@; +V_help2man_1 = +V_help2man = $(V_help2man_$(V)) +# These are always defined, both for starpu-mpi and for mpi-ms +# For MPI tests we don't want to oversubscribe the system +MPI_RUN_ENV = STARPU_WORKERS_GETBIND=0 STARPU_WORKERS_NOBIND=1 STARPU_NCPU=3 +@STARPU_SIMGRID_FALSE@STARPU_MPIEXEC = $(MPIEXEC) $(MPIEXEC_ARGS) -np $(STARPU_MPI_NP) +@STARPU_SIMGRID_TRUE@STARPU_MPIEXEC = $(abs_top_builddir)/tools/starpu_smpirun -np $(STARPU_MPI_NP) -platform $(abs_top_srcdir)/tools/perfmodels/cluster.xml -hostfile $(abs_top_srcdir)/tools/perfmodels/hostfile + +# When GNU parallel is available and -j is passed to make, run tests through +# parallel, using a "starpu" semaphore. +# Also make test shell scripts run its tests through parallel, using a +# "substarpu" semaphore. This brings some overload, but only one level. +@HAVE_PARALLEL_TRUE@STARPU_SUB_PARALLEL = $(shell echo $(MAKEFLAGS) | sed -ne 's/.*-j\([0-9]\+\).*/parallel --semaphore --id substarpu --fg --fg-exit -j \1/p') +@STARPU_USE_MPI_MASTER_SLAVE_TRUE@MS_LAUNCHER = $(STARPU_MPIEXEC) +@STARPU_USE_TCPIP_MASTER_SLAVE_TRUE@MS_LAUNCHER = $(abs_top_builddir)/tools/starpu_tcpipexec -np 2 -nobind -ncpus 1 +@STARPU_HAVE_WINDOWS_FALSE@LOADER_BIN = $(LAUNCHER) $(LOADER) $(EXTERNAL) +@STARPU_HAVE_WINDOWS_TRUE@LOADER_BIN = $(LAUNCHER) $(EXTERNAL) +@STARPU_HAVE_WINDOWS_FALSE@loader_CPPFLAGS = $(AM_CPPFLAGS) -I$(top_builddir)/src/ +@STARPU_HAVE_AM111_FALSE@TESTS_ENVIRONMENT = $(LAUNCHER_ENV) top_builddir="$(abs_top_builddir)" top_srcdir="$(abs_top_srcdir)" $(LOADER_BIN) +@STARPU_HAVE_AM111_TRUE@TESTS_ENVIRONMENT = $(LAUNCHER_ENV) top_builddir="$(abs_top_builddir)" top_srcdir="$(abs_top_srcdir)" +@STARPU_HAVE_AM111_TRUE@LOG_COMPILER = $(LOADER_BIN) +AM_TESTS_FD_REDIRECT = 9>&2 + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# Copyright (C) 2016-2016 Uppsala University +# Copyright (C) 2011-2011 Télécom Sud Paris +# Copyright (C) 2017-2017 Erwan Leria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +SUFFIXES = .hip +AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_srcdir)/examples/ -I$(top_builddir)/include $(STARPU_H_CPPFLAGS) +AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@ +SUBDIRS = stencil +BUILT_SOURCES = + +###################### +# matVecMult example # +###################### +@STARPU_USE_OPENCL_TRUE@nobase_STARPU_OPENCL_DATA_DATA = basic_examples/vector_scal_opencl_kernel.cl \ +@STARPU_USE_OPENCL_TRUE@ basic_examples/multiformat_opencl_kernel.cl \ +@STARPU_USE_OPENCL_TRUE@ basic_examples/multiformat_conversion_codelets_opencl_kernel.cl \ +@STARPU_USE_OPENCL_TRUE@ basic_examples/block_opencl_kernel.cl \ +@STARPU_USE_OPENCL_TRUE@ basic_examples/variable_kernels_opencl_kernel.cl \ +@STARPU_USE_OPENCL_TRUE@ filters/fblock_opencl_kernel.cl \ +@STARPU_USE_OPENCL_TRUE@ filters/custom_mf/conversion_opencl.cl \ +@STARPU_USE_OPENCL_TRUE@ filters/custom_mf/custom_opencl.cl \ +@STARPU_USE_OPENCL_TRUE@ $(am__append_96) \ +@STARPU_USE_OPENCL_TRUE@ incrementer/incrementer_kernels_opencl_kernel.cl \ +@STARPU_USE_OPENCL_TRUE@ interface/complex_kernels.cl \ +@STARPU_USE_OPENCL_TRUE@ interface/complex_dev_handle/complex_dev_handle_kernels.cl \ +@STARPU_USE_OPENCL_TRUE@ matvecmult/matvecmult_kernel.cl \ +@STARPU_USE_OPENCL_TRUE@ reductions/dot_product_opencl_kernels.cl +EXTRA_DIST = \ + README.txt \ + axpy/axpy.h \ + axpy/axpy_opencl_kernel.cl \ + basic_examples/vector_scal_opencl_kernel.cl \ + basic_examples/multiformat_types.h \ + basic_examples/multiformat_opencl_kernel.cl \ + basic_examples/multiformat_conversion_codelets_opencl_kernel.cl \ + common/blas_model.c \ + spmd/vector_scal_spmd.c \ + spmv/spmv_cuda.cu \ + spmv/spmv_opencl.cl \ + spmv/matrix_market/examples/fidapm05.mtx \ + mult/xgemm.c \ + mult/xgemm_layout.c \ + mult/xgemm.h \ + mult/sgemm.sh \ + lu/xlu.c \ + lu/xlu_pivot.c \ + lu/xlu_implicit.c \ + lu/xlu_implicit_pivot.c \ + lu/xlu_kernels.c \ + lu/lu_example.c \ + incrementer/incrementer_kernels_opencl_kernel.cl \ + basic_examples/variable_kernels_opencl_kernel.cl \ + matvecmult/matvecmult_kernel.cl \ + basic_examples/block_opencl_kernel.cl \ + filters/fblock_opencl_kernel.cl \ + filters/custom_mf/conversion_opencl.cl \ + filters/custom_mf/custom_opencl.cl \ + filters/custom_mf/custom_types.h \ + interface/complex_kernels.cl \ + interface/complex_dev_handle/complex_dev_handle_kernels.cl \ + reductions/dot_product.h \ + reductions/dot_product_opencl_kernels.cl \ + scheduler/libdummy_sched.sh \ + scheduler/schedulers.sh \ + scheduler/schedulers_context.sh \ + fortran/Makefile \ + sched_ctx/axpy_partition_gpu.h \ + sched_ctx/axpy_partition_gpu.cu \ + heat/heat.sh \ + cholesky/libmy_dmda.h \ + cholesky/cholesky.sh \ + cholesky/cholesky_julia.sh \ + cholesky/cholesky_compiled.c \ + lu/lu.sh \ + subgraphs/main.h \ + native_fortran/Makefile_nf_dynbuf.mk \ + native_fortran/Makefile_nf_example.mk \ + native_fortran/Makefile_nf_matrix.mk \ + native_fortran/Makefile_nf_partition.mk \ + native_fortran/Makefile_nf_sched_ctx.mk \ + native_fortran/Makefile_nf_varbuf.mk \ + native_fortran/Makefile_nf_vector.mk \ + cpp/Makefile_add_vectors_cpp11.mk \ + cpp/Makefile_add_vectors.mk \ + fortran90/Makefile.mk \ + profiling_tool/prof.sh + +CLEANFILES = *.gcno *.gcda *.linkinfo *.mod starpu_idle_microsec.log *.mps */*.mps */*/*.mps *.dot */*.dot */*/*.dot *.pl */*.pl */*/*.pl *.png *.output tasks.rec perfs.rec */perfs.rec */*/perfs.rec perfs2.rec fortran90/starpu_mod.f90 native_fortran/fstarpu_mod.f90 *.csv *.md *.Rmd *.pdf *.html +pkglib_LTLIBRARIES = $(am__append_11) $(am__append_101) +examplebindir = $(libdir)/starpu/examples/ +noinst_HEADERS = \ + axpy/axpy.h \ + cg/cg.h \ + cg/cg_kernels.c \ + heat/lu_kernels_model.h \ + heat/dw_sparse_cg.h \ + heat/heat.h \ + heat/dw_factolu.h \ + lu/xlu.h \ + lu/xlu_kernels.h \ + lu/lu-float.h \ + lu/lu-double.h \ + lu/complex_float.h \ + lu/complex_double.h \ + lu/blas_complex.h \ + cholesky/cholesky.h \ + sched_ctx_utils/sched_ctx_utils.h \ + common/blas_model.h \ + common/blas.h \ + mult/simple.h \ + mult/double.h \ + fortran/starpu_fortran.h \ + ppm_downscaler/ppm_downscaler.h \ + ppm_downscaler/yuv_downscaler.h \ + spmv/matrix_market/mmio.h \ + spmv/matrix_market/mm_to_bcsr.h \ + spmv/spmv.h \ + spmv/dw_block_spmv.h \ + basic_examples/multiformat_types.h \ + filters/custom_mf/custom_interface.h \ + filters/custom_mf/custom_types.h \ + interface/complex_interface.h \ + interface/complex_codelet.h \ + interface/complex_dev_handle/complex_dev_handle_interface.h \ + interface/complex_dev_handle/complex_dev_handle_codelet.h \ + pi/pi.h \ + pi/SobolQRNG/sobol.h \ + pi/SobolQRNG/sobol_gold.h \ + pi/SobolQRNG/sobol_gpu.h \ + pi/SobolQRNG/sobol_primitives.h \ + reductions/dot_product.h \ + basic_examples/vector_scal_cpu_template.h \ + sched_ctx/axpy_partition_gpu.h + +SHELL_TESTS = scheduler/schedulers.sh scheduler/schedulers_context.sh \ + $(am__append_8) $(am__append_10) $(am__append_22) \ + $(am__append_23) $(am__append_102) + +# STARPU_EXAMPLES list all applications which have to be compiled and checked +# Applications which should only be compiled are added directly in examplebin_PROGRAMS +# see for instance mandelbrot/mandelbrot +STARPU_EXAMPLES = sched_ctx/prio scheduler/dummy_sched \ + scheduler/dummy_modular_sched \ + worker_collections/worker_list_example api/bcsr_data_interface \ + api/block_data_interface api/coo_data_interface \ + api/csr_data_interface api/matrix_data_interface \ + api/multiformat_data_interface api/tensor_data_interface \ + api/variable_data_interface api/vector_data_interface \ + api/void_data_interface $(am__append_9) $(am__append_12) \ + $(am__append_13) $(am__append_14) $(am__append_15) \ + $(am__append_16) $(am__append_18) $(am__append_19) \ + $(am__append_21) $(am__append_24) $(am__append_25) \ + $(am__append_26) $(am__append_27) $(am__append_28) \ + $(am__append_29) $(am__append_30) +@STARPU_SIMGRID_FALSE@profiling_tool_libprofiling_tool_la_LDFLAGS = $(ldflags) -no-undefined -module -avoid-version +@STARPU_SIMGRID_FALSE@scheduler_libdummy_sched_la_LDFLAGS = $(ldflags) -no-undefined -module -avoid-version +@STARPU_HAVE_HWLOC_TRUE@@STARPU_HAVE_OPENMP_TRUE@@STARPU_HWLOC_HAVE_TOPOLOGY_DUP_TRUE@@STARPU_SIMGRID_FALSE@parallel_workers_parallel_workers_CFLAGS = \ +@STARPU_HAVE_HWLOC_TRUE@@STARPU_HAVE_OPENMP_TRUE@@STARPU_HWLOC_HAVE_TOPOLOGY_DUP_TRUE@@STARPU_SIMGRID_FALSE@ $(AM_CFLAGS) -fopenmp + +@STARPU_HAVE_HWLOC_TRUE@@STARPU_HAVE_OPENMP_TRUE@@STARPU_HWLOC_HAVE_TOPOLOGY_DUP_TRUE@@STARPU_SIMGRID_FALSE@parallel_workers_parallel_workers_func_CFLAGS = \ +@STARPU_HAVE_HWLOC_TRUE@@STARPU_HAVE_OPENMP_TRUE@@STARPU_HWLOC_HAVE_TOPOLOGY_DUP_TRUE@@STARPU_SIMGRID_FALSE@ $(AM_CFLAGS) -fopenmp + +@STARPU_HAVE_HWLOC_TRUE@@STARPU_HAVE_OPENMP_TRUE@@STARPU_HWLOC_HAVE_TOPOLOGY_DUP_TRUE@@STARPU_SIMGRID_FALSE@parallel_workers_parallel_workers_oldapi_CFLAGS = \ +@STARPU_HAVE_HWLOC_TRUE@@STARPU_HAVE_OPENMP_TRUE@@STARPU_HWLOC_HAVE_TOPOLOGY_DUP_TRUE@@STARPU_SIMGRID_FALSE@ $(AM_CFLAGS) -fopenmp + +@STARPU_USE_CUDA_TRUE@sched_ctx_gpu_partition_SOURCES = \ +@STARPU_USE_CUDA_TRUE@ sched_ctx/gpu_partition.c \ +@STARPU_USE_CUDA_TRUE@ sched_ctx/axpy_partition_gpu.cu + + +################## +# Basic examples # +################## +basic_examples_vector_scal_SOURCES = basic_examples/vector_scal.c \ + basic_examples/vector_scal_cpu.c $(am__append_31) \ + $(am__append_32) $(am__append_34) $(am__append_36) +basic_examples_mult_SOURCES = basic_examples/mult.c $(am__append_33) \ + $(am__append_35) +@STARPU_CROSS_COMPILING_FALSE@@STARPU_HAVE_ICC_TRUE@basic_examples_vector_scal_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(ICC) $(basic_examples_vector_scal_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ +@STARPU_CROSS_COMPILING_TRUE@@STARPU_HAVE_ICC_TRUE@basic_examples_vector_scal_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) $(basic_examples_vector_scal_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ +@STARPU_HAVE_ICC_FALSE@basic_examples_vector_scal_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) $(basic_examples_vector_scal_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ +@STARPU_HAVE_F77_TRUE@basic_examples_vector_scal_fortran_SOURCES = \ +@STARPU_HAVE_F77_TRUE@ basic_examples/vector_scal_fortran.F \ +@STARPU_HAVE_F77_TRUE@ basic_examples/vector_scal_c.c \ +@STARPU_HAVE_F77_TRUE@ basic_examples/vector_scal_cpu.c \ +@STARPU_HAVE_F77_TRUE@ $(am__append_37) +@STARPU_HAVE_F77_TRUE@@STARPU_USE_CUDA_TRUE@basic_examples_vector_scal_fortran_LDADD = \ +@STARPU_HAVE_F77_TRUE@@STARPU_USE_CUDA_TRUE@ $(STARPU_CUDA_FORTRAN_LDFLAGS) + +@STARPU_HAVE_F77_H_TRUE@@STARPU_HAVE_F77_TRUE@fortran_hello_SOURCES = \ +@STARPU_HAVE_F77_H_TRUE@@STARPU_HAVE_F77_TRUE@ fortran/hello_c.c \ +@STARPU_HAVE_F77_H_TRUE@@STARPU_HAVE_F77_TRUE@ fortran/hello.F \ +@STARPU_HAVE_F77_H_TRUE@@STARPU_HAVE_F77_TRUE@ fortran/starpu_fortran.h + +@STARPU_HAVE_FC_TRUE@fortran90_f90_example_SOURCES = \ +@STARPU_HAVE_FC_TRUE@ fortran90/mod_types.f90 \ +@STARPU_HAVE_FC_TRUE@ fortran90/starpu_mod.f90 \ +@STARPU_HAVE_FC_TRUE@ fortran90/mod_interface.f90 \ +@STARPU_HAVE_FC_TRUE@ fortran90/mod_compute.f90 \ +@STARPU_HAVE_FC_TRUE@ fortran90/marshalling.c \ +@STARPU_HAVE_FC_TRUE@ fortran90/f90_example.f90 + +@STARPU_HAVE_FC_TRUE@native_fortran_nf_vector_SOURCES = \ +@STARPU_HAVE_FC_TRUE@ native_fortran/nf_codelets.f90 \ +@STARPU_HAVE_FC_TRUE@ native_fortran/fstarpu_mod.f90 \ +@STARPU_HAVE_FC_TRUE@ native_fortran/nf_vector.f90 + +@STARPU_HAVE_FC_TRUE@native_fortran_nf_matrix_SOURCES = \ +@STARPU_HAVE_FC_TRUE@ native_fortran/nf_codelets.f90 \ +@STARPU_HAVE_FC_TRUE@ native_fortran/fstarpu_mod.f90 \ +@STARPU_HAVE_FC_TRUE@ native_fortran/nf_matrix.f90 + +@STARPU_HAVE_FC_TRUE@native_fortran_nf_example_SOURCES = \ +@STARPU_HAVE_FC_TRUE@ native_fortran/nf_types.f90 \ +@STARPU_HAVE_FC_TRUE@ native_fortran/nf_compute.f90 \ +@STARPU_HAVE_FC_TRUE@ native_fortran/fstarpu_mod.f90 \ +@STARPU_HAVE_FC_TRUE@ native_fortran/nf_example.f90 + +@STARPU_HAVE_FC_TRUE@native_fortran_nf_dynbuf_SOURCES = \ +@STARPU_HAVE_FC_TRUE@ native_fortran/nf_dynbuf_cl.f90 \ +@STARPU_HAVE_FC_TRUE@ native_fortran/fstarpu_mod.f90 \ +@STARPU_HAVE_FC_TRUE@ native_fortran/nf_dynbuf.f90 + +@STARPU_HAVE_FC_TRUE@native_fortran_nf_varbuf_SOURCES = \ +@STARPU_HAVE_FC_TRUE@ native_fortran/nf_varbuf_cl.f90 \ +@STARPU_HAVE_FC_TRUE@ native_fortran/fstarpu_mod.f90 \ +@STARPU_HAVE_FC_TRUE@ native_fortran/nf_varbuf.f90 + +@STARPU_HAVE_FC_TRUE@native_fortran_nf_sched_ctx_SOURCES = \ +@STARPU_HAVE_FC_TRUE@ native_fortran/nf_sched_ctx_cl.f90 \ +@STARPU_HAVE_FC_TRUE@ native_fortran/fstarpu_mod.f90 \ +@STARPU_HAVE_FC_TRUE@ native_fortran/nf_sched_ctx.f90 + +@STARPU_HAVE_FC_TRUE@native_fortran_nf_partition_SOURCES = \ +@STARPU_HAVE_FC_TRUE@ native_fortran/nf_partition_cl.f90 \ +@STARPU_HAVE_FC_TRUE@ native_fortran/fstarpu_mod.f90 \ +@STARPU_HAVE_FC_TRUE@ native_fortran/nf_partition.f90 + + +####################### +# Multiformat example # +####################### +basic_examples_multiformat_SOURCES = basic_examples/multiformat.c \ + basic_examples/multiformat_conversion_codelets.c \ + $(am__append_38) $(am__append_39) + +################# +# block example # +################# +basic_examples_block_SOURCES = basic_examples/block.c \ + basic_examples/block_cpu.c $(am__append_40) $(am__append_41) \ + $(am__append_42) + +#################### +# Variable example # +#################### +basic_examples_variable_SOURCES = basic_examples/variable.c \ + basic_examples/variable_kernels_cpu.c $(am__append_43) \ + $(am__append_44) + +########### +# Filters # +########### +filters_fvector_SOURCES = filters/fvector.c filters/fvector_cpu.c \ + $(am__append_45) $(am__append_46) +filters_fmatrix_SOURCES = filters/fmatrix.c filters/fmatrix_print.c \ + filters/fmatrix_cpu.c $(am__append_47) $(am__append_48) +filters_fblock_SOURCES = filters/fblock.c filters/fblock_print.c \ + filters/fblock_cpu.c $(am__append_49) $(am__append_50) \ + $(am__append_51) +filters_ftensor_SOURCES = filters/ftensor.c filters/ftensor_print.c \ + filters/ftensor_cpu.c $(am__append_52) $(am__append_53) +filters_fndim_SOURCES = filters/fndim.c filters/ftensor_print.c \ + filters/f4d_cpu.c $(am__append_54) $(am__append_55) +filters_fmatrix_pick_vector_SOURCES = filters/fmatrix_pick_vector.c \ + filters/fmatrix_print.c filters/fvector_cpu.c $(am__append_56) \ + $(am__append_57) +filters_fmatrix_pick_variable_SOURCES = \ + filters/fmatrix_pick_variable.c filters/fmatrix_print.c \ + $(am__append_58) +filters_fblock_pick_matrix_SOURCES = filters/fblock_pick_matrix.c \ + filters/fblock_print.c filters/fmatrix_print.c \ + filters/fmatrix_cpu.c $(am__append_59) $(am__append_60) +filters_fblock_pick_variable_SOURCES = filters/fblock_pick_variable.c \ + filters/fblock_print.c $(am__append_61) +filters_ftensor_pick_block_SOURCES = filters/ftensor_pick_block.c \ + filters/ftensor_print.c filters/fblock_print.c \ + filters/fblock_cpu.c $(am__append_62) $(am__append_63) +filters_ftensor_pick_variable_SOURCES = \ + filters/ftensor_pick_variable.c filters/ftensor_print.c \ + $(am__append_64) +filters_fndim_pick_ndim_SOURCES = filters/fndim_pick_ndim.c \ + filters/ftensor_print.c filters/fblock_print.c \ + filters/f3d_cpu.c $(am__append_65) $(am__append_66) +filters_fndim_5d_pick_tensor_SOURCES = filters/fndim_5d_pick_tensor.c \ + filters/f5d_print.c filters/ftensor_print.c \ + filters/ftensor_cpu.c $(am__append_67) $(am__append_68) +filters_fndim_4d_pick_block_SOURCES = filters/fndim_4d_pick_block.c \ + filters/ftensor_print.c filters/fblock_print.c \ + filters/fblock_cpu.c $(am__append_69) $(am__append_70) +filters_fndim_3d_pick_matrix_SOURCES = filters/fndim_3d_pick_matrix.c \ + filters/fblock_print.c filters/fmatrix_print.c \ + filters/fmatrix_cpu.c $(am__append_71) $(am__append_72) +filters_fndim_2d_pick_vector_SOURCES = filters/fndim_2d_pick_vector.c \ + filters/fmatrix_print.c filters/fvector_cpu.c $(am__append_73) \ + $(am__append_74) +filters_fndim_pick_variable_SOURCES = \ + filters/fndim_pick_variable.c \ + filters/f5d_print.c + +filters_fndim_to_tensor_SOURCES = filters/fndim_to_tensor.c \ + filters/ftensor_print.c filters/ftensor_cpu.c $(am__append_75) \ + $(am__append_76) +filters_fndim_to_block_SOURCES = filters/fndim_to_block.c \ + filters/fblock_print.c filters/fblock_cpu.c $(am__append_77) \ + $(am__append_78) +filters_fndim_to_matrix_SOURCES = filters/fndim_to_matrix.c \ + filters/fmatrix_print.c filters/fmatrix_cpu.c $(am__append_79) \ + $(am__append_80) +filters_fndim_to_vector_SOURCES = filters/fndim_to_vector.c \ + filters/fvector_cpu.c $(am__append_81) $(am__append_82) +filters_fmultiple_manual_SOURCES = filters/fmultiple_manual.c \ + $(am__append_83) $(am__append_84) +filters_fmultiple_submit_SOURCES = filters/fmultiple_submit.c \ + $(am__append_85) $(am__append_86) +filters_fmultiple_submit_readonly_SOURCES = \ + filters/fmultiple_submit_readonly.c $(am__append_87) \ + $(am__append_89) +filters_fmultiple_submit_readonly_downgrade_SOURCES = \ + filters/fmultiple_submit_readonly_downgrade.c $(am__append_88) \ + $(am__append_90) +filters_fmultiple_submit_implicit_SOURCES = \ + filters/fmultiple_submit_implicit.c $(am__append_91) \ + $(am__append_92) +filters_custom_mf_custom_mf_filter_SOURCES = \ + filters/custom_mf/custom_mf_filter.c \ + filters/custom_mf/custom_interface.c \ + filters/custom_mf/custom_conversion_codelets.c \ + $(am__append_93) $(am__append_94) + +################ +# AXPY example # +################ +@STARPU_NO_BLAS_LIB_FALSE@axpy_axpy_SOURCES = axpy/axpy.c \ +@STARPU_NO_BLAS_LIB_FALSE@ common/blas.c $(am__append_95) +@STARPU_NO_BLAS_LIB_FALSE@axpy_axpy_LDADD = \ +@STARPU_NO_BLAS_LIB_FALSE@ $(STARPU_BLAS_LDFLAGS) + + +################ +# Mult example # +################ +mult_sgemm_SOURCES = mult/sgemm.c $(am__append_97) +mult_sgemm_LDADD = \ + $(STARPU_BLAS_LDFLAGS) + +mult_sgemm_layout_SOURCES = mult/sgemm_layout.c $(am__append_99) +mult_sgemm_layout_LDADD = \ + $(STARPU_BLAS_LDFLAGS) + +mult_dgemm_SOURCES = mult/dgemm.c $(am__append_98) +mult_dgemm_LDADD = \ + $(STARPU_BLAS_LDFLAGS) + +mult_dgemm_layout_SOURCES = mult/dgemm_layout.c $(am__append_100) +mult_dgemm_layout_LDADD = \ + $(STARPU_BLAS_LDFLAGS) + + +##################### +# Trs_sgemm example # +##################### +@STARPU_NO_BLAS_LIB_FALSE@transactions_trs_sgemm_SOURCES = \ +@STARPU_NO_BLAS_LIB_FALSE@ transactions/trs_sgemm.c \ +@STARPU_NO_BLAS_LIB_FALSE@ common/blas.c + +@STARPU_NO_BLAS_LIB_FALSE@transactions_trs_sgemm_LDADD = \ +@STARPU_NO_BLAS_LIB_FALSE@ $(STARPU_BLAS_LDFLAGS) + +@STARPU_NO_BLAS_LIB_FALSE@cholesky_libmy_dmda_la_LDFLAGS = $(ldflags) -no-undefined -module -avoid-version +@STARPU_NO_BLAS_LIB_FALSE@cholesky_cholesky_tag_SOURCES = \ +@STARPU_NO_BLAS_LIB_FALSE@ cholesky/cholesky_tag.c \ +@STARPU_NO_BLAS_LIB_FALSE@ cholesky/cholesky_models.c \ +@STARPU_NO_BLAS_LIB_FALSE@ cholesky/cholesky_kernels.c \ +@STARPU_NO_BLAS_LIB_FALSE@ common/blas.c + +@STARPU_NO_BLAS_LIB_FALSE@cholesky_cholesky_tag_LDADD = \ +@STARPU_NO_BLAS_LIB_FALSE@ $(STARPU_BLAS_LDFLAGS) + +@STARPU_NO_BLAS_LIB_FALSE@cholesky_cholesky_tile_tag_SOURCES = \ +@STARPU_NO_BLAS_LIB_FALSE@ cholesky/cholesky_tile_tag.c \ +@STARPU_NO_BLAS_LIB_FALSE@ cholesky/cholesky_models.c \ +@STARPU_NO_BLAS_LIB_FALSE@ cholesky/cholesky_kernels.c \ +@STARPU_NO_BLAS_LIB_FALSE@ common/blas.c + +@STARPU_NO_BLAS_LIB_FALSE@cholesky_cholesky_tile_tag_LDADD = \ +@STARPU_NO_BLAS_LIB_FALSE@ $(STARPU_BLAS_LDFLAGS) + +@STARPU_NO_BLAS_LIB_FALSE@cholesky_cholesky_grain_tag_SOURCES = \ +@STARPU_NO_BLAS_LIB_FALSE@ cholesky/cholesky_grain_tag.c \ +@STARPU_NO_BLAS_LIB_FALSE@ cholesky/cholesky_models.c \ +@STARPU_NO_BLAS_LIB_FALSE@ cholesky/cholesky_kernels.c \ +@STARPU_NO_BLAS_LIB_FALSE@ common/blas.c + +@STARPU_NO_BLAS_LIB_FALSE@cholesky_cholesky_grain_tag_LDADD = \ +@STARPU_NO_BLAS_LIB_FALSE@ $(STARPU_BLAS_LDFLAGS) + +@STARPU_NO_BLAS_LIB_FALSE@cholesky_cholesky_implicit_SOURCES = \ +@STARPU_NO_BLAS_LIB_FALSE@ cholesky/cholesky_implicit.c \ +@STARPU_NO_BLAS_LIB_FALSE@ cholesky/cholesky_models.c \ +@STARPU_NO_BLAS_LIB_FALSE@ cholesky/cholesky_kernels.c \ +@STARPU_NO_BLAS_LIB_FALSE@ sched_ctx_utils/sched_ctx_utils.c \ +@STARPU_NO_BLAS_LIB_FALSE@ common/blas.c + +@STARPU_NO_BLAS_LIB_FALSE@cholesky_cholesky_implicit_LDADD = \ +@STARPU_NO_BLAS_LIB_FALSE@ $(STARPU_BLAS_LDFLAGS) + +@STARPU_NO_BLAS_LIB_FALSE@cholesky_cholesky_compil_SOURCES = \ +@STARPU_NO_BLAS_LIB_FALSE@ cholesky/cholesky_compil.c \ +@STARPU_NO_BLAS_LIB_FALSE@ cholesky/cholesky_models.c \ +@STARPU_NO_BLAS_LIB_FALSE@ cholesky/cholesky_kernels.c \ +@STARPU_NO_BLAS_LIB_FALSE@ sched_ctx_utils/sched_ctx_utils.c \ +@STARPU_NO_BLAS_LIB_FALSE@ common/blas.c + +@STARPU_NO_BLAS_LIB_FALSE@cholesky_cholesky_compil_LDADD = \ +@STARPU_NO_BLAS_LIB_FALSE@ $(STARPU_BLAS_LDFLAGS) + + +############## +# LU example # +############## +@STARPU_NO_BLAS_LIB_FALSE@lu_lu_example_float_SOURCES = \ +@STARPU_NO_BLAS_LIB_FALSE@ lu/lu_example_float.c \ +@STARPU_NO_BLAS_LIB_FALSE@ lu/slu.c \ +@STARPU_NO_BLAS_LIB_FALSE@ lu/slu_pivot.c \ +@STARPU_NO_BLAS_LIB_FALSE@ lu/slu_kernels.c \ +@STARPU_NO_BLAS_LIB_FALSE@ common/blas.c + +@STARPU_NO_BLAS_LIB_FALSE@lu_lu_example_float_LDADD = \ +@STARPU_NO_BLAS_LIB_FALSE@ $(STARPU_BLAS_LDFLAGS) + +@STARPU_NO_BLAS_LIB_FALSE@lu_lu_example_double_SOURCES = \ +@STARPU_NO_BLAS_LIB_FALSE@ lu/lu_example_double.c \ +@STARPU_NO_BLAS_LIB_FALSE@ lu/dlu.c \ +@STARPU_NO_BLAS_LIB_FALSE@ lu/dlu_pivot.c \ +@STARPU_NO_BLAS_LIB_FALSE@ lu/dlu_kernels.c \ +@STARPU_NO_BLAS_LIB_FALSE@ common/blas.c + +@STARPU_NO_BLAS_LIB_FALSE@lu_lu_example_double_LDADD = \ +@STARPU_NO_BLAS_LIB_FALSE@ $(STARPU_BLAS_LDFLAGS) + +@STARPU_NO_BLAS_LIB_FALSE@lu_lu_implicit_example_float_SOURCES = \ +@STARPU_NO_BLAS_LIB_FALSE@ lu/lu_example_float.c \ +@STARPU_NO_BLAS_LIB_FALSE@ lu/slu_implicit.c \ +@STARPU_NO_BLAS_LIB_FALSE@ lu/slu_implicit_pivot.c \ +@STARPU_NO_BLAS_LIB_FALSE@ lu/slu_kernels.c \ +@STARPU_NO_BLAS_LIB_FALSE@ common/blas.c + +@STARPU_NO_BLAS_LIB_FALSE@lu_lu_implicit_example_float_LDADD = \ +@STARPU_NO_BLAS_LIB_FALSE@ $(STARPU_BLAS_LDFLAGS) + +@STARPU_NO_BLAS_LIB_FALSE@lu_lu_implicit_example_double_SOURCES = \ +@STARPU_NO_BLAS_LIB_FALSE@ lu/lu_example_double.c \ +@STARPU_NO_BLAS_LIB_FALSE@ lu/dlu_implicit.c \ +@STARPU_NO_BLAS_LIB_FALSE@ lu/dlu_implicit_pivot.c \ +@STARPU_NO_BLAS_LIB_FALSE@ lu/dlu_kernels.c \ +@STARPU_NO_BLAS_LIB_FALSE@ common/blas.c + +@STARPU_NO_BLAS_LIB_FALSE@lu_lu_implicit_example_double_LDADD = \ +@STARPU_NO_BLAS_LIB_FALSE@ $(STARPU_BLAS_LDFLAGS) + +@STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@lu_lu_example_complex_float_SOURCES = \ +@STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ lu/lu_example_complex_float.c \ +@STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ lu/clu.c \ +@STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ lu/clu_pivot.c \ +@STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ lu/clu_kernels.c \ +@STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ lu/blas_complex.c \ +@STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ common/blas.c + +@STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@lu_lu_example_complex_float_LDADD = \ +@STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ $(STARPU_BLAS_LDFLAGS) + +@STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@lu_lu_implicit_example_complex_float_SOURCES = \ +@STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ lu/lu_example_complex_float.c \ +@STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ lu/clu_implicit.c \ +@STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ lu/clu_implicit_pivot.c \ +@STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ lu/clu_kernels.c \ +@STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ lu/blas_complex.c \ +@STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ common/blas.c + +@STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@lu_lu_implicit_example_complex_float_LDADD = \ +@STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ $(STARPU_BLAS_LDFLAGS) + +@STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@lu_lu_example_complex_double_SOURCES = \ +@STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ lu/lu_example_complex_double.c \ +@STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ lu/zlu.c \ +@STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ lu/zlu_pivot.c \ +@STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ lu/zlu_kernels.c \ +@STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ lu/blas_complex.c \ +@STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ common/blas.c + +@STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@lu_lu_example_complex_double_LDADD = \ +@STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ $(STARPU_BLAS_LDFLAGS) + +@STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@lu_lu_implicit_example_complex_double_SOURCES = \ +@STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ lu/lu_example_complex_double.c \ +@STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ lu/zlu_implicit.c \ +@STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ lu/zlu_implicit_pivot.c \ +@STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ lu/zlu_kernels.c \ +@STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ lu/blas_complex.c \ +@STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ common/blas.c + +@STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@lu_lu_implicit_example_complex_double_LDADD = \ +@STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ $(STARPU_BLAS_LDFLAGS) + + +################ +# Heat example # +################ +@STARPU_NO_BLAS_LIB_FALSE@heat_heat_SOURCES = \ +@STARPU_NO_BLAS_LIB_FALSE@ heat/heat.c \ +@STARPU_NO_BLAS_LIB_FALSE@ heat/dw_factolu.c \ +@STARPU_NO_BLAS_LIB_FALSE@ heat/dw_factolu_tag.c \ +@STARPU_NO_BLAS_LIB_FALSE@ heat/dw_factolu_grain.c \ +@STARPU_NO_BLAS_LIB_FALSE@ heat/dw_sparse_cg.c \ +@STARPU_NO_BLAS_LIB_FALSE@ heat/heat_display.c \ +@STARPU_NO_BLAS_LIB_FALSE@ heat/lu_kernels_model.c \ +@STARPU_NO_BLAS_LIB_FALSE@ heat/dw_sparse_cg_kernels.c \ +@STARPU_NO_BLAS_LIB_FALSE@ heat/dw_factolu_kernels.c \ +@STARPU_NO_BLAS_LIB_FALSE@ common/blas.c + +@STARPU_NO_BLAS_LIB_FALSE@heat_heat_LDADD = \ +@STARPU_NO_BLAS_LIB_FALSE@ $(STARPU_OPENGL_RENDER_LDFLAGS) \ +@STARPU_NO_BLAS_LIB_FALSE@ $(STARPU_BLAS_LDFLAGS) + + +############## +# CG example # +############## +@STARPU_NO_BLAS_LIB_FALSE@cg_cg_SOURCES = \ +@STARPU_NO_BLAS_LIB_FALSE@ cg/cg.c \ +@STARPU_NO_BLAS_LIB_FALSE@ common/blas.c + +@STARPU_NO_BLAS_LIB_FALSE@cg_cg_LDADD = \ +@STARPU_NO_BLAS_LIB_FALSE@ $(STARPU_BLAS_LDFLAGS) + + +################ +# SPMD example # +################ +spmd_vector_scal_spmd_SOURCES = \ + spmd/vector_scal_spmd.c + + +################ +# SpMV example # +################ +spmv_spmv_SOURCES = spmv/spmv.c spmv/spmv_kernels.c $(am__append_103) +spmv_dw_block_spmv_SOURCES = \ + spmv/dw_block_spmv.c \ + spmv/dw_block_spmv_kernels.c \ + spmv/matrix_market/mm_to_bcsr.c \ + spmv/matrix_market/mmio.c + +spmv_dw_block_spmv_LDADD = \ + $(STARPU_BLAS_LDFLAGS) + + +########################### +# C++ Incrementer example # +########################### +cpp_incrementer_cpp_SOURCES = cpp/incrementer_cpp.cpp \ + $(am__append_104) $(am__append_105) + +########################### +# C++ Add vectors example # +########################### +cpp_add_vectors_SOURCES = \ + cpp/add_vectors.cpp + +cpp_add_vectors_interface_SOURCES = \ + cpp/add_vectors_interface.cpp + +@STARPU_HAVE_CXX11_TRUE@cpp_add_vectors_cpp11_SOURCES = \ +@STARPU_HAVE_CXX11_TRUE@ cpp/add_vectors_cpp11.cpp + + +####################### +# Incrementer example # +####################### +incrementer_incrementer_SOURCES = incrementer/incrementer.c \ + $(am__append_106) $(am__append_107) + +################## +# Binary example # +################## +binary_binary_SOURCES = binary/binary.c $(am__append_108) + +##################### +# interface example # +##################### +interface_complex_SOURCES = interface/complex.c \ + interface/complex_interface.c interface/complex_filters.c \ + $(am__append_109) $(am__append_110) +interface_complex_dev_handle_complex_dev_handle_SOURCES = \ + interface/complex_dev_handle/complex_dev_handle.c \ + interface/complex_dev_handle/complex_dev_handle_interface.c \ + interface/complex_dev_handle/complex_dev_handle_filters.c \ + $(am__append_111) $(am__append_112) + +####################### +# dot_product example # +####################### +reductions_dot_product_SOURCES = reductions/dot_product.c \ + $(am__append_113) +mandelbrot_mandelbrot_CPPFLAGS = $(AM_CPPFLAGS) $(am__append_114) +@STARPU_HAVE_X11_TRUE@mandelbrot_mandelbrot_LDADD = $(X_PRE_LIBS) $(X_LIBS) -lX11 $(X_EXTRA_LIBS) +@STARPU_HAVE_WINDOWS_FALSE@pi_pi_SOURCES = pi/pi.c \ +@STARPU_HAVE_WINDOWS_FALSE@ pi/SobolQRNG/sobol_gold.c \ +@STARPU_HAVE_WINDOWS_FALSE@ pi/SobolQRNG/sobol_primitives.c \ +@STARPU_HAVE_WINDOWS_FALSE@ $(am__append_116) +@STARPU_HAVE_WINDOWS_FALSE@pi_pi_redux_SOURCES = pi/pi_redux.c \ +@STARPU_HAVE_WINDOWS_FALSE@ $(am__append_117) +@STARPU_HAVE_WINDOWS_FALSE@@STARPU_USE_CUDA_TRUE@pi_pi_redux_LDADD = \ +@STARPU_HAVE_WINDOWS_FALSE@@STARPU_USE_CUDA_TRUE@ $(STARPU_CURAND_LDFLAGS) + +@STARPU_HAVE_OPENGL_TRUE@gl_interop_gl_interop_LDADD = \ +@STARPU_HAVE_OPENGL_TRUE@ $(STARPU_OPENGL_RENDER_LDFLAGS) + +@STARPU_HAVE_OPENGL_TRUE@gl_interop_gl_interop_idle_LDADD = \ +@STARPU_HAVE_OPENGL_TRUE@ $(STARPU_OPENGL_RENDER_LDFLAGS) + + +#################### +# pipeline example # +#################### +@STARPU_NO_BLAS_LIB_FALSE@pipeline_pipeline_SOURCES = \ +@STARPU_NO_BLAS_LIB_FALSE@ pipeline/pipeline.c \ +@STARPU_NO_BLAS_LIB_FALSE@ common/blas.c + +@STARPU_NO_BLAS_LIB_FALSE@pipeline_pipeline_LDADD = \ +@STARPU_NO_BLAS_LIB_FALSE@ $(STARPU_BLAS_LDFLAGS) + + +################## +# openmp example # +################## +@STARPU_HAVE_OPENMP_TRUE@openmp_vector_scal_omp_CFLAGS = \ +@STARPU_HAVE_OPENMP_TRUE@ $(AM_CFLAGS) -fopenmp + +@STARPU_HAVE_OPENMP_TRUE@sched_ctx_parallel_code_CFLAGS = \ +@STARPU_HAVE_OPENMP_TRUE@ $(AM_CFLAGS) -fopenmp + +@STARPU_HAVE_OPENMP_TRUE@sched_ctx_sched_ctx_without_sched_policy_CFLAGS = \ +@STARPU_HAVE_OPENMP_TRUE@ $(AM_CFLAGS) -fopenmp + +@STARPU_HAVE_OPENMP_TRUE@sched_ctx_nested_sched_ctxs_CFLAGS = \ +@STARPU_HAVE_OPENMP_TRUE@ $(AM_CFLAGS) -fopenmp + +@STARPU_HAVE_OPENMP_TRUE@sched_ctx_parallel_tasks_reuse_handle_CFLAGS = \ +@STARPU_HAVE_OPENMP_TRUE@ $(AM_CFLAGS) -fopenmp + + +###################### +# subgraphs examples # +###################### +subgraphs_manual_SOURCES = \ + subgraphs/manual.c \ + subgraphs/codelets.c + +subgraphs_partition_SOURCES = \ + subgraphs/partition.c \ + subgraphs/codelets.c + +subgraphs_plan_SOURCES = \ + subgraphs/plan.c \ + subgraphs/codelets.c + +all: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) all-recursive + +.SUFFIXES: +.SUFFIXES: .hip .F .c .cpp .cu .cubin .f .f90 .icc .lo .log .o .obj .test .test$(EXEEXT) .trs +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/make/starpu-tests.mk $(top_srcdir)/make/starpu.mk $(top_srcdir)/make/starpu-loader.mk $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign examples/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign examples/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; +$(top_srcdir)/make/starpu-tests.mk $(top_srcdir)/make/starpu.mk $(top_srcdir)/make/starpu-loader.mk $(am__empty): + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +clean-checkPROGRAMS: + @list='$(check_PROGRAMS)'; test -n "$$list" || exit 0; \ + echo " rm -f" $$list; \ + rm -f $$list || exit $$?; \ + test -n "$(EXEEXT)" || exit 0; \ + list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ + echo " rm -f" $$list; \ + rm -f $$list +install-examplebinPROGRAMS: $(examplebin_PROGRAMS) + @$(NORMAL_INSTALL) + @list='$(examplebin_PROGRAMS)'; test -n "$(examplebindir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(examplebindir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(examplebindir)" || exit 1; \ + fi; \ + for p in $$list; do echo "$$p $$p"; done | \ + sed 's/$(EXEEXT)$$//' | \ + while read p p1; do if test -f $$p \ + || test -f $$p1 \ + ; then echo "$$p"; echo "$$p"; else :; fi; \ + done | \ + sed -e 'p;s,.*/,,;n;h' \ + -e 's|.*|.|' \ + -e 'p;x;s,.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/' | \ + sed 'N;N;N;s,\n, ,g' | \ + $(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1 } \ + { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \ + if ($$2 == $$4) files[d] = files[d] " " $$1; \ + else { print "f", $$3 "/" $$4, $$1; } } \ + END { for (d in files) print "f", d, files[d] }' | \ + while read type dir files; do \ + if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \ + test -z "$$files" || { \ + echo " $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files '$(DESTDIR)$(examplebindir)$$dir'"; \ + $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files "$(DESTDIR)$(examplebindir)$$dir" || exit $$?; \ + } \ + ; done + +uninstall-examplebinPROGRAMS: + @$(NORMAL_UNINSTALL) + @list='$(examplebin_PROGRAMS)'; test -n "$(examplebindir)" || list=; \ + files=`for p in $$list; do echo "$$p"; done | \ + sed -e 'h;s,^.*/,,;s/$(EXEEXT)$$//;$(transform)' \ + -e 's/$$/$(EXEEXT)/' \ + `; \ + test -n "$$list" || exit 0; \ + echo " ( cd '$(DESTDIR)$(examplebindir)' && rm -f" $$files ")"; \ + cd "$(DESTDIR)$(examplebindir)" && rm -f $$files + +clean-examplebinPROGRAMS: + @list='$(examplebin_PROGRAMS)'; test -n "$$list" || exit 0; \ + echo " rm -f" $$list; \ + rm -f $$list || exit $$?; \ + test -n "$(EXEEXT)" || exit 0; \ + list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ + echo " rm -f" $$list; \ + rm -f $$list + +clean-noinstPROGRAMS: + @list='$(noinst_PROGRAMS)'; test -n "$$list" || exit 0; \ + echo " rm -f" $$list; \ + rm -f $$list || exit $$?; \ + test -n "$(EXEEXT)" || exit 0; \ + list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ + echo " rm -f" $$list; \ + rm -f $$list + +install-pkglibLTLIBRARIES: $(pkglib_LTLIBRARIES) + @$(NORMAL_INSTALL) + @list='$(pkglib_LTLIBRARIES)'; test -n "$(pkglibdir)" || list=; \ + list2=; for p in $$list; do \ + if test -f $$p; then \ + list2="$$list2 $$p"; \ + else :; fi; \ + done; \ + test -z "$$list2" || { \ + echo " $(MKDIR_P) '$(DESTDIR)$(pkglibdir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(pkglibdir)" || exit 1; \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(pkglibdir)'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(pkglibdir)"; \ + } + +uninstall-pkglibLTLIBRARIES: + @$(NORMAL_UNINSTALL) + @list='$(pkglib_LTLIBRARIES)'; test -n "$(pkglibdir)" || list=; \ + for p in $$list; do \ + $(am__strip_dir) \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(pkglibdir)/$$f'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(pkglibdir)/$$f"; \ + done + +clean-pkglibLTLIBRARIES: + -test -z "$(pkglib_LTLIBRARIES)" || rm -f $(pkglib_LTLIBRARIES) + @list='$(pkglib_LTLIBRARIES)'; \ + locs=`for p in $$list; do echo $$p; done | \ + sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ + sort -u`; \ + test -z "$$locs" || { \ + echo rm -f $${locs}; \ + rm -f $${locs}; \ + } +cholesky/$(am__dirstamp): + @$(MKDIR_P) cholesky + @: > cholesky/$(am__dirstamp) +cholesky/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) cholesky/$(DEPDIR) + @: > cholesky/$(DEPDIR)/$(am__dirstamp) +cholesky/libmy_dmda.lo: cholesky/$(am__dirstamp) \ + cholesky/$(DEPDIR)/$(am__dirstamp) + +cholesky/libmy_dmda.la: $(cholesky_libmy_dmda_la_OBJECTS) $(cholesky_libmy_dmda_la_DEPENDENCIES) $(EXTRA_cholesky_libmy_dmda_la_DEPENDENCIES) cholesky/$(am__dirstamp) + $(AM_V_CCLD)$(cholesky_libmy_dmda_la_LINK) $(am_cholesky_libmy_dmda_la_rpath) $(cholesky_libmy_dmda_la_OBJECTS) $(cholesky_libmy_dmda_la_LIBADD) $(LIBS) +profiling_tool/$(am__dirstamp): + @$(MKDIR_P) profiling_tool + @: > profiling_tool/$(am__dirstamp) +profiling_tool/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) profiling_tool/$(DEPDIR) + @: > profiling_tool/$(DEPDIR)/$(am__dirstamp) +profiling_tool/libprofiling_tool.lo: profiling_tool/$(am__dirstamp) \ + profiling_tool/$(DEPDIR)/$(am__dirstamp) + +profiling_tool/libprofiling_tool.la: $(profiling_tool_libprofiling_tool_la_OBJECTS) $(profiling_tool_libprofiling_tool_la_DEPENDENCIES) $(EXTRA_profiling_tool_libprofiling_tool_la_DEPENDENCIES) profiling_tool/$(am__dirstamp) + $(AM_V_CCLD)$(profiling_tool_libprofiling_tool_la_LINK) $(am_profiling_tool_libprofiling_tool_la_rpath) $(profiling_tool_libprofiling_tool_la_OBJECTS) $(profiling_tool_libprofiling_tool_la_LIBADD) $(LIBS) +scheduler/$(am__dirstamp): + @$(MKDIR_P) scheduler + @: > scheduler/$(am__dirstamp) +scheduler/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) scheduler/$(DEPDIR) + @: > scheduler/$(DEPDIR)/$(am__dirstamp) +scheduler/libdummy_sched.lo: scheduler/$(am__dirstamp) \ + scheduler/$(DEPDIR)/$(am__dirstamp) + +scheduler/libdummy_sched.la: $(scheduler_libdummy_sched_la_OBJECTS) $(scheduler_libdummy_sched_la_DEPENDENCIES) $(EXTRA_scheduler_libdummy_sched_la_DEPENDENCIES) scheduler/$(am__dirstamp) + $(AM_V_CCLD)$(scheduler_libdummy_sched_la_LINK) $(am_scheduler_libdummy_sched_la_rpath) $(scheduler_libdummy_sched_la_OBJECTS) $(scheduler_libdummy_sched_la_LIBADD) $(LIBS) +api/$(am__dirstamp): + @$(MKDIR_P) api + @: > api/$(am__dirstamp) +api/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) api/$(DEPDIR) + @: > api/$(DEPDIR)/$(am__dirstamp) +api/bcsr_data_interface.$(OBJEXT): api/$(am__dirstamp) \ + api/$(DEPDIR)/$(am__dirstamp) + +api/bcsr_data_interface$(EXEEXT): $(api_bcsr_data_interface_OBJECTS) $(api_bcsr_data_interface_DEPENDENCIES) $(EXTRA_api_bcsr_data_interface_DEPENDENCIES) api/$(am__dirstamp) + @rm -f api/bcsr_data_interface$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(api_bcsr_data_interface_OBJECTS) $(api_bcsr_data_interface_LDADD) $(LIBS) +api/block_data_interface.$(OBJEXT): api/$(am__dirstamp) \ + api/$(DEPDIR)/$(am__dirstamp) + +api/block_data_interface$(EXEEXT): $(api_block_data_interface_OBJECTS) $(api_block_data_interface_DEPENDENCIES) $(EXTRA_api_block_data_interface_DEPENDENCIES) api/$(am__dirstamp) + @rm -f api/block_data_interface$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(api_block_data_interface_OBJECTS) $(api_block_data_interface_LDADD) $(LIBS) +api/coo_data_interface.$(OBJEXT): api/$(am__dirstamp) \ + api/$(DEPDIR)/$(am__dirstamp) + +api/coo_data_interface$(EXEEXT): $(api_coo_data_interface_OBJECTS) $(api_coo_data_interface_DEPENDENCIES) $(EXTRA_api_coo_data_interface_DEPENDENCIES) api/$(am__dirstamp) + @rm -f api/coo_data_interface$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(api_coo_data_interface_OBJECTS) $(api_coo_data_interface_LDADD) $(LIBS) +api/csr_data_interface.$(OBJEXT): api/$(am__dirstamp) \ + api/$(DEPDIR)/$(am__dirstamp) + +api/csr_data_interface$(EXEEXT): $(api_csr_data_interface_OBJECTS) $(api_csr_data_interface_DEPENDENCIES) $(EXTRA_api_csr_data_interface_DEPENDENCIES) api/$(am__dirstamp) + @rm -f api/csr_data_interface$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(api_csr_data_interface_OBJECTS) $(api_csr_data_interface_LDADD) $(LIBS) +api/matrix_data_interface.$(OBJEXT): api/$(am__dirstamp) \ + api/$(DEPDIR)/$(am__dirstamp) + +api/matrix_data_interface$(EXEEXT): $(api_matrix_data_interface_OBJECTS) $(api_matrix_data_interface_DEPENDENCIES) $(EXTRA_api_matrix_data_interface_DEPENDENCIES) api/$(am__dirstamp) + @rm -f api/matrix_data_interface$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(api_matrix_data_interface_OBJECTS) $(api_matrix_data_interface_LDADD) $(LIBS) +api/multiformat_data_interface.$(OBJEXT): api/$(am__dirstamp) \ + api/$(DEPDIR)/$(am__dirstamp) + +api/multiformat_data_interface$(EXEEXT): $(api_multiformat_data_interface_OBJECTS) $(api_multiformat_data_interface_DEPENDENCIES) $(EXTRA_api_multiformat_data_interface_DEPENDENCIES) api/$(am__dirstamp) + @rm -f api/multiformat_data_interface$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(api_multiformat_data_interface_OBJECTS) $(api_multiformat_data_interface_LDADD) $(LIBS) +api/tensor_data_interface.$(OBJEXT): api/$(am__dirstamp) \ + api/$(DEPDIR)/$(am__dirstamp) + +api/tensor_data_interface$(EXEEXT): $(api_tensor_data_interface_OBJECTS) $(api_tensor_data_interface_DEPENDENCIES) $(EXTRA_api_tensor_data_interface_DEPENDENCIES) api/$(am__dirstamp) + @rm -f api/tensor_data_interface$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(api_tensor_data_interface_OBJECTS) $(api_tensor_data_interface_LDADD) $(LIBS) +api/variable_data_interface.$(OBJEXT): api/$(am__dirstamp) \ + api/$(DEPDIR)/$(am__dirstamp) + +api/variable_data_interface$(EXEEXT): $(api_variable_data_interface_OBJECTS) $(api_variable_data_interface_DEPENDENCIES) $(EXTRA_api_variable_data_interface_DEPENDENCIES) api/$(am__dirstamp) + @rm -f api/variable_data_interface$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(api_variable_data_interface_OBJECTS) $(api_variable_data_interface_LDADD) $(LIBS) +api/vector_data_interface.$(OBJEXT): api/$(am__dirstamp) \ + api/$(DEPDIR)/$(am__dirstamp) + +api/vector_data_interface$(EXEEXT): $(api_vector_data_interface_OBJECTS) $(api_vector_data_interface_DEPENDENCIES) $(EXTRA_api_vector_data_interface_DEPENDENCIES) api/$(am__dirstamp) + @rm -f api/vector_data_interface$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(api_vector_data_interface_OBJECTS) $(api_vector_data_interface_LDADD) $(LIBS) +api/void_data_interface.$(OBJEXT): api/$(am__dirstamp) \ + api/$(DEPDIR)/$(am__dirstamp) + +api/void_data_interface$(EXEEXT): $(api_void_data_interface_OBJECTS) $(api_void_data_interface_DEPENDENCIES) $(EXTRA_api_void_data_interface_DEPENDENCIES) api/$(am__dirstamp) + @rm -f api/void_data_interface$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(api_void_data_interface_OBJECTS) $(api_void_data_interface_LDADD) $(LIBS) +axpy/$(am__dirstamp): + @$(MKDIR_P) axpy + @: > axpy/$(am__dirstamp) +axpy/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) axpy/$(DEPDIR) + @: > axpy/$(DEPDIR)/$(am__dirstamp) +axpy/axpy.$(OBJEXT): axpy/$(am__dirstamp) \ + axpy/$(DEPDIR)/$(am__dirstamp) +common/$(am__dirstamp): + @$(MKDIR_P) common + @: > common/$(am__dirstamp) +common/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) common/$(DEPDIR) + @: > common/$(DEPDIR)/$(am__dirstamp) +common/blas.$(OBJEXT): common/$(am__dirstamp) \ + common/$(DEPDIR)/$(am__dirstamp) +axpy/axpy_opencl.$(OBJEXT): axpy/$(am__dirstamp) \ + axpy/$(DEPDIR)/$(am__dirstamp) + +axpy/axpy$(EXEEXT): $(axpy_axpy_OBJECTS) $(axpy_axpy_DEPENDENCIES) $(EXTRA_axpy_axpy_DEPENDENCIES) axpy/$(am__dirstamp) + @rm -f axpy/axpy$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(axpy_axpy_OBJECTS) $(axpy_axpy_LDADD) $(LIBS) +basic_examples/$(am__dirstamp): + @$(MKDIR_P) basic_examples + @: > basic_examples/$(am__dirstamp) +basic_examples/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) basic_examples/$(DEPDIR) + @: > basic_examples/$(DEPDIR)/$(am__dirstamp) +basic_examples/block.$(OBJEXT): basic_examples/$(am__dirstamp) \ + basic_examples/$(DEPDIR)/$(am__dirstamp) +basic_examples/block_cpu.$(OBJEXT): basic_examples/$(am__dirstamp) \ + basic_examples/$(DEPDIR)/$(am__dirstamp) +basic_examples/block_cuda.$(OBJEXT): basic_examples/$(am__dirstamp) \ + basic_examples/$(DEPDIR)/$(am__dirstamp) +basic_examples/block_hip.$(OBJEXT): basic_examples/$(am__dirstamp) \ + basic_examples/$(DEPDIR)/$(am__dirstamp) +basic_examples/block_opencl.$(OBJEXT): basic_examples/$(am__dirstamp) \ + basic_examples/$(DEPDIR)/$(am__dirstamp) + +basic_examples/block$(EXEEXT): $(basic_examples_block_OBJECTS) $(basic_examples_block_DEPENDENCIES) $(EXTRA_basic_examples_block_DEPENDENCIES) basic_examples/$(am__dirstamp) + @rm -f basic_examples/block$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(basic_examples_block_OBJECTS) $(basic_examples_block_LDADD) $(LIBS) +basic_examples/dynamic_handles.$(OBJEXT): \ + basic_examples/$(am__dirstamp) \ + basic_examples/$(DEPDIR)/$(am__dirstamp) + +basic_examples/dynamic_handles$(EXEEXT): $(basic_examples_dynamic_handles_OBJECTS) $(basic_examples_dynamic_handles_DEPENDENCIES) $(EXTRA_basic_examples_dynamic_handles_DEPENDENCIES) basic_examples/$(am__dirstamp) + @rm -f basic_examples/dynamic_handles$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(basic_examples_dynamic_handles_OBJECTS) $(basic_examples_dynamic_handles_LDADD) $(LIBS) +basic_examples/hello_world.$(OBJEXT): basic_examples/$(am__dirstamp) \ + basic_examples/$(DEPDIR)/$(am__dirstamp) + +basic_examples/hello_world$(EXEEXT): $(basic_examples_hello_world_OBJECTS) $(basic_examples_hello_world_DEPENDENCIES) $(EXTRA_basic_examples_hello_world_DEPENDENCIES) basic_examples/$(am__dirstamp) + @rm -f basic_examples/hello_world$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(basic_examples_hello_world_OBJECTS) $(basic_examples_hello_world_LDADD) $(LIBS) +basic_examples/hooks.$(OBJEXT): basic_examples/$(am__dirstamp) \ + basic_examples/$(DEPDIR)/$(am__dirstamp) + +basic_examples/hooks$(EXEEXT): $(basic_examples_hooks_OBJECTS) $(basic_examples_hooks_DEPENDENCIES) $(EXTRA_basic_examples_hooks_DEPENDENCIES) basic_examples/$(am__dirstamp) + @rm -f basic_examples/hooks$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(basic_examples_hooks_OBJECTS) $(basic_examples_hooks_LDADD) $(LIBS) +basic_examples/mult.$(OBJEXT): basic_examples/$(am__dirstamp) \ + basic_examples/$(DEPDIR)/$(am__dirstamp) +basic_examples/mult_cuda.$(OBJEXT): basic_examples/$(am__dirstamp) \ + basic_examples/$(DEPDIR)/$(am__dirstamp) +basic_examples/mult_hip.$(OBJEXT): basic_examples/$(am__dirstamp) \ + basic_examples/$(DEPDIR)/$(am__dirstamp) + +basic_examples/mult$(EXEEXT): $(basic_examples_mult_OBJECTS) $(basic_examples_mult_DEPENDENCIES) $(EXTRA_basic_examples_mult_DEPENDENCIES) basic_examples/$(am__dirstamp) + @rm -f basic_examples/mult$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(basic_examples_mult_OBJECTS) $(basic_examples_mult_LDADD) $(LIBS) +basic_examples/multiformat.$(OBJEXT): basic_examples/$(am__dirstamp) \ + basic_examples/$(DEPDIR)/$(am__dirstamp) +basic_examples/multiformat_conversion_codelets.$(OBJEXT): \ + basic_examples/$(am__dirstamp) \ + basic_examples/$(DEPDIR)/$(am__dirstamp) +basic_examples/multiformat_cuda.$(OBJEXT): \ + basic_examples/$(am__dirstamp) \ + basic_examples/$(DEPDIR)/$(am__dirstamp) +basic_examples/multiformat_conversion_codelets_cuda.$(OBJEXT): \ + basic_examples/$(am__dirstamp) \ + basic_examples/$(DEPDIR)/$(am__dirstamp) +basic_examples/multiformat_opencl.$(OBJEXT): \ + basic_examples/$(am__dirstamp) \ + basic_examples/$(DEPDIR)/$(am__dirstamp) +basic_examples/multiformat_conversion_codelets_opencl.$(OBJEXT): \ + basic_examples/$(am__dirstamp) \ + basic_examples/$(DEPDIR)/$(am__dirstamp) + +basic_examples/multiformat$(EXEEXT): $(basic_examples_multiformat_OBJECTS) $(basic_examples_multiformat_DEPENDENCIES) $(EXTRA_basic_examples_multiformat_DEPENDENCIES) basic_examples/$(am__dirstamp) + @rm -f basic_examples/multiformat$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(basic_examples_multiformat_OBJECTS) $(basic_examples_multiformat_LDADD) $(LIBS) +basic_examples/ndim.$(OBJEXT): basic_examples/$(am__dirstamp) \ + basic_examples/$(DEPDIR)/$(am__dirstamp) + +basic_examples/ndim$(EXEEXT): $(basic_examples_ndim_OBJECTS) $(basic_examples_ndim_DEPENDENCIES) $(EXTRA_basic_examples_ndim_DEPENDENCIES) basic_examples/$(am__dirstamp) + @rm -f basic_examples/ndim$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(basic_examples_ndim_OBJECTS) $(basic_examples_ndim_LDADD) $(LIBS) +basic_examples/task_insert_color.$(OBJEXT): \ + basic_examples/$(am__dirstamp) \ + basic_examples/$(DEPDIR)/$(am__dirstamp) + +basic_examples/task_insert_color$(EXEEXT): $(basic_examples_task_insert_color_OBJECTS) $(basic_examples_task_insert_color_DEPENDENCIES) $(EXTRA_basic_examples_task_insert_color_DEPENDENCIES) basic_examples/$(am__dirstamp) + @rm -f basic_examples/task_insert_color$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(basic_examples_task_insert_color_OBJECTS) $(basic_examples_task_insert_color_LDADD) $(LIBS) +basic_examples/topology.$(OBJEXT): basic_examples/$(am__dirstamp) \ + basic_examples/$(DEPDIR)/$(am__dirstamp) + +basic_examples/topology$(EXEEXT): $(basic_examples_topology_OBJECTS) $(basic_examples_topology_DEPENDENCIES) $(EXTRA_basic_examples_topology_DEPENDENCIES) basic_examples/$(am__dirstamp) + @rm -f basic_examples/topology$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(basic_examples_topology_OBJECTS) $(basic_examples_topology_LDADD) $(LIBS) +basic_examples/variable.$(OBJEXT): basic_examples/$(am__dirstamp) \ + basic_examples/$(DEPDIR)/$(am__dirstamp) +basic_examples/variable_kernels_cpu.$(OBJEXT): \ + basic_examples/$(am__dirstamp) \ + basic_examples/$(DEPDIR)/$(am__dirstamp) +basic_examples/variable_kernels.$(OBJEXT): \ + basic_examples/$(am__dirstamp) \ + basic_examples/$(DEPDIR)/$(am__dirstamp) +basic_examples/variable_kernels_opencl.$(OBJEXT): \ + basic_examples/$(am__dirstamp) \ + basic_examples/$(DEPDIR)/$(am__dirstamp) + +basic_examples/variable$(EXEEXT): $(basic_examples_variable_OBJECTS) $(basic_examples_variable_DEPENDENCIES) $(EXTRA_basic_examples_variable_DEPENDENCIES) basic_examples/$(am__dirstamp) + @rm -f basic_examples/variable$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(basic_examples_variable_OBJECTS) $(basic_examples_variable_LDADD) $(LIBS) +basic_examples/vector_scal.$(OBJEXT): basic_examples/$(am__dirstamp) \ + basic_examples/$(DEPDIR)/$(am__dirstamp) +basic_examples/vector_scal_cpu.$(OBJEXT): \ + basic_examples/$(am__dirstamp) \ + basic_examples/$(DEPDIR)/$(am__dirstamp) +basic_examples/vector_scal_cpu_icc.$(OBJEXT): \ + basic_examples/$(am__dirstamp) \ + basic_examples/$(DEPDIR)/$(am__dirstamp) +basic_examples/vector_scal_cuda.$(OBJEXT): \ + basic_examples/$(am__dirstamp) \ + basic_examples/$(DEPDIR)/$(am__dirstamp) +basic_examples/vector_scal_hip.$(OBJEXT): \ + basic_examples/$(am__dirstamp) \ + basic_examples/$(DEPDIR)/$(am__dirstamp) +basic_examples/vector_scal_opencl.$(OBJEXT): \ + basic_examples/$(am__dirstamp) \ + basic_examples/$(DEPDIR)/$(am__dirstamp) + +basic_examples/vector_scal$(EXEEXT): $(basic_examples_vector_scal_OBJECTS) $(basic_examples_vector_scal_DEPENDENCIES) $(EXTRA_basic_examples_vector_scal_DEPENDENCIES) basic_examples/$(am__dirstamp) + @rm -f basic_examples/vector_scal$(EXEEXT) + $(AM_V_GEN)$(basic_examples_vector_scal_LINK) $(basic_examples_vector_scal_OBJECTS) $(basic_examples_vector_scal_LDADD) $(LIBS) +basic_examples/vector_scal_fortran.$(OBJEXT): \ + basic_examples/$(am__dirstamp) \ + basic_examples/$(DEPDIR)/$(am__dirstamp) +basic_examples/vector_scal_c.$(OBJEXT): \ + basic_examples/$(am__dirstamp) \ + basic_examples/$(DEPDIR)/$(am__dirstamp) + +basic_examples/vector_scal_fortran$(EXEEXT): $(basic_examples_vector_scal_fortran_OBJECTS) $(basic_examples_vector_scal_fortran_DEPENDENCIES) $(EXTRA_basic_examples_vector_scal_fortran_DEPENDENCIES) basic_examples/$(am__dirstamp) + @rm -f basic_examples/vector_scal_fortran$(EXEEXT) + $(AM_V_F77LD)$(F77LINK) $(basic_examples_vector_scal_fortran_OBJECTS) $(basic_examples_vector_scal_fortran_LDADD) $(LIBS) +binary/$(am__dirstamp): + @$(MKDIR_P) binary + @: > binary/$(am__dirstamp) +binary/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) binary/$(DEPDIR) + @: > binary/$(DEPDIR)/$(am__dirstamp) +binary/binary.$(OBJEXT): binary/$(am__dirstamp) \ + binary/$(DEPDIR)/$(am__dirstamp) +incrementer/$(am__dirstamp): + @$(MKDIR_P) incrementer + @: > incrementer/$(am__dirstamp) +incrementer/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) incrementer/$(DEPDIR) + @: > incrementer/$(DEPDIR)/$(am__dirstamp) +incrementer/incrementer_kernels_opencl.$(OBJEXT): \ + incrementer/$(am__dirstamp) \ + incrementer/$(DEPDIR)/$(am__dirstamp) + +binary/binary$(EXEEXT): $(binary_binary_OBJECTS) $(binary_binary_DEPENDENCIES) $(EXTRA_binary_binary_DEPENDENCIES) binary/$(am__dirstamp) + @rm -f binary/binary$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(binary_binary_OBJECTS) $(binary_binary_LDADD) $(LIBS) +callback/$(am__dirstamp): + @$(MKDIR_P) callback + @: > callback/$(am__dirstamp) +callback/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) callback/$(DEPDIR) + @: > callback/$(DEPDIR)/$(am__dirstamp) +callback/callback.$(OBJEXT): callback/$(am__dirstamp) \ + callback/$(DEPDIR)/$(am__dirstamp) + +callback/callback$(EXEEXT): $(callback_callback_OBJECTS) $(callback_callback_DEPENDENCIES) $(EXTRA_callback_callback_DEPENDENCIES) callback/$(am__dirstamp) + @rm -f callback/callback$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(callback_callback_OBJECTS) $(callback_callback_LDADD) $(LIBS) +callback/prologue.$(OBJEXT): callback/$(am__dirstamp) \ + callback/$(DEPDIR)/$(am__dirstamp) + +callback/prologue$(EXEEXT): $(callback_prologue_OBJECTS) $(callback_prologue_DEPENDENCIES) $(EXTRA_callback_prologue_DEPENDENCIES) callback/$(am__dirstamp) + @rm -f callback/prologue$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(callback_prologue_OBJECTS) $(callback_prologue_LDADD) $(LIBS) +cg/$(am__dirstamp): + @$(MKDIR_P) cg + @: > cg/$(am__dirstamp) +cg/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) cg/$(DEPDIR) + @: > cg/$(DEPDIR)/$(am__dirstamp) +cg/cg.$(OBJEXT): cg/$(am__dirstamp) cg/$(DEPDIR)/$(am__dirstamp) + +cg/cg$(EXEEXT): $(cg_cg_OBJECTS) $(cg_cg_DEPENDENCIES) $(EXTRA_cg_cg_DEPENDENCIES) cg/$(am__dirstamp) + @rm -f cg/cg$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(cg_cg_OBJECTS) $(cg_cg_LDADD) $(LIBS) +cholesky/cholesky_compil.$(OBJEXT): cholesky/$(am__dirstamp) \ + cholesky/$(DEPDIR)/$(am__dirstamp) +cholesky/cholesky_models.$(OBJEXT): cholesky/$(am__dirstamp) \ + cholesky/$(DEPDIR)/$(am__dirstamp) +cholesky/cholesky_kernels.$(OBJEXT): cholesky/$(am__dirstamp) \ + cholesky/$(DEPDIR)/$(am__dirstamp) +sched_ctx_utils/$(am__dirstamp): + @$(MKDIR_P) sched_ctx_utils + @: > sched_ctx_utils/$(am__dirstamp) +sched_ctx_utils/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) sched_ctx_utils/$(DEPDIR) + @: > sched_ctx_utils/$(DEPDIR)/$(am__dirstamp) +sched_ctx_utils/sched_ctx_utils.$(OBJEXT): \ + sched_ctx_utils/$(am__dirstamp) \ + sched_ctx_utils/$(DEPDIR)/$(am__dirstamp) + +cholesky/cholesky_compil$(EXEEXT): $(cholesky_cholesky_compil_OBJECTS) $(cholesky_cholesky_compil_DEPENDENCIES) $(EXTRA_cholesky_cholesky_compil_DEPENDENCIES) cholesky/$(am__dirstamp) + @rm -f cholesky/cholesky_compil$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(cholesky_cholesky_compil_OBJECTS) $(cholesky_cholesky_compil_LDADD) $(LIBS) +cholesky/cholesky_grain_tag.$(OBJEXT): cholesky/$(am__dirstamp) \ + cholesky/$(DEPDIR)/$(am__dirstamp) + +cholesky/cholesky_grain_tag$(EXEEXT): $(cholesky_cholesky_grain_tag_OBJECTS) $(cholesky_cholesky_grain_tag_DEPENDENCIES) $(EXTRA_cholesky_cholesky_grain_tag_DEPENDENCIES) cholesky/$(am__dirstamp) + @rm -f cholesky/cholesky_grain_tag$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(cholesky_cholesky_grain_tag_OBJECTS) $(cholesky_cholesky_grain_tag_LDADD) $(LIBS) +cholesky/cholesky_implicit.$(OBJEXT): cholesky/$(am__dirstamp) \ + cholesky/$(DEPDIR)/$(am__dirstamp) + +cholesky/cholesky_implicit$(EXEEXT): $(cholesky_cholesky_implicit_OBJECTS) $(cholesky_cholesky_implicit_DEPENDENCIES) $(EXTRA_cholesky_cholesky_implicit_DEPENDENCIES) cholesky/$(am__dirstamp) + @rm -f cholesky/cholesky_implicit$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(cholesky_cholesky_implicit_OBJECTS) $(cholesky_cholesky_implicit_LDADD) $(LIBS) +cholesky/cholesky_tag.$(OBJEXT): cholesky/$(am__dirstamp) \ + cholesky/$(DEPDIR)/$(am__dirstamp) + +cholesky/cholesky_tag$(EXEEXT): $(cholesky_cholesky_tag_OBJECTS) $(cholesky_cholesky_tag_DEPENDENCIES) $(EXTRA_cholesky_cholesky_tag_DEPENDENCIES) cholesky/$(am__dirstamp) + @rm -f cholesky/cholesky_tag$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(cholesky_cholesky_tag_OBJECTS) $(cholesky_cholesky_tag_LDADD) $(LIBS) +cholesky/cholesky_tile_tag.$(OBJEXT): cholesky/$(am__dirstamp) \ + cholesky/$(DEPDIR)/$(am__dirstamp) + +cholesky/cholesky_tile_tag$(EXEEXT): $(cholesky_cholesky_tile_tag_OBJECTS) $(cholesky_cholesky_tile_tag_DEPENDENCIES) $(EXTRA_cholesky_cholesky_tile_tag_DEPENDENCIES) cholesky/$(am__dirstamp) + @rm -f cholesky/cholesky_tile_tag$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(cholesky_cholesky_tile_tag_OBJECTS) $(cholesky_cholesky_tile_tag_LDADD) $(LIBS) +cpp/$(am__dirstamp): + @$(MKDIR_P) cpp + @: > cpp/$(am__dirstamp) +cpp/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) cpp/$(DEPDIR) + @: > cpp/$(DEPDIR)/$(am__dirstamp) +cpp/add_vectors.$(OBJEXT): cpp/$(am__dirstamp) \ + cpp/$(DEPDIR)/$(am__dirstamp) + +cpp/add_vectors$(EXEEXT): $(cpp_add_vectors_OBJECTS) $(cpp_add_vectors_DEPENDENCIES) $(EXTRA_cpp_add_vectors_DEPENDENCIES) cpp/$(am__dirstamp) + @rm -f cpp/add_vectors$(EXEEXT) + $(AM_V_CXXLD)$(CXXLINK) $(cpp_add_vectors_OBJECTS) $(cpp_add_vectors_LDADD) $(LIBS) +cpp/add_vectors_cpp11.$(OBJEXT): cpp/$(am__dirstamp) \ + cpp/$(DEPDIR)/$(am__dirstamp) + +cpp/add_vectors_cpp11$(EXEEXT): $(cpp_add_vectors_cpp11_OBJECTS) $(cpp_add_vectors_cpp11_DEPENDENCIES) $(EXTRA_cpp_add_vectors_cpp11_DEPENDENCIES) cpp/$(am__dirstamp) + @rm -f cpp/add_vectors_cpp11$(EXEEXT) + $(AM_V_CXXLD)$(CXXLINK) $(cpp_add_vectors_cpp11_OBJECTS) $(cpp_add_vectors_cpp11_LDADD) $(LIBS) +cpp/add_vectors_interface.$(OBJEXT): cpp/$(am__dirstamp) \ + cpp/$(DEPDIR)/$(am__dirstamp) + +cpp/add_vectors_interface$(EXEEXT): $(cpp_add_vectors_interface_OBJECTS) $(cpp_add_vectors_interface_DEPENDENCIES) $(EXTRA_cpp_add_vectors_interface_DEPENDENCIES) cpp/$(am__dirstamp) + @rm -f cpp/add_vectors_interface$(EXEEXT) + $(AM_V_CXXLD)$(CXXLINK) $(cpp_add_vectors_interface_OBJECTS) $(cpp_add_vectors_interface_LDADD) $(LIBS) +cpp/incrementer_cpp.$(OBJEXT): cpp/$(am__dirstamp) \ + cpp/$(DEPDIR)/$(am__dirstamp) +incrementer/incrementer_kernels.$(OBJEXT): \ + incrementer/$(am__dirstamp) \ + incrementer/$(DEPDIR)/$(am__dirstamp) + +cpp/incrementer_cpp$(EXEEXT): $(cpp_incrementer_cpp_OBJECTS) $(cpp_incrementer_cpp_DEPENDENCIES) $(EXTRA_cpp_incrementer_cpp_DEPENDENCIES) cpp/$(am__dirstamp) + @rm -f cpp/incrementer_cpp$(EXEEXT) + $(AM_V_CXXLD)$(CXXLINK) $(cpp_incrementer_cpp_OBJECTS) $(cpp_incrementer_cpp_LDADD) $(LIBS) +dependency/$(am__dirstamp): + @$(MKDIR_P) dependency + @: > dependency/$(am__dirstamp) +dependency/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) dependency/$(DEPDIR) + @: > dependency/$(DEPDIR)/$(am__dirstamp) +dependency/sequential_consistency.$(OBJEXT): \ + dependency/$(am__dirstamp) \ + dependency/$(DEPDIR)/$(am__dirstamp) + +dependency/sequential_consistency$(EXEEXT): $(dependency_sequential_consistency_OBJECTS) $(dependency_sequential_consistency_DEPENDENCIES) $(EXTRA_dependency_sequential_consistency_DEPENDENCIES) dependency/$(am__dirstamp) + @rm -f dependency/sequential_consistency$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(dependency_sequential_consistency_OBJECTS) $(dependency_sequential_consistency_LDADD) $(LIBS) +dependency/task_end_dep.$(OBJEXT): dependency/$(am__dirstamp) \ + dependency/$(DEPDIR)/$(am__dirstamp) + +dependency/task_end_dep$(EXEEXT): $(dependency_task_end_dep_OBJECTS) $(dependency_task_end_dep_DEPENDENCIES) $(EXTRA_dependency_task_end_dep_DEPENDENCIES) dependency/$(am__dirstamp) + @rm -f dependency/task_end_dep$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(dependency_task_end_dep_OBJECTS) $(dependency_task_end_dep_LDADD) $(LIBS) +dependency/task_end_dep_add.$(OBJEXT): dependency/$(am__dirstamp) \ + dependency/$(DEPDIR)/$(am__dirstamp) + +dependency/task_end_dep_add$(EXEEXT): $(dependency_task_end_dep_add_OBJECTS) $(dependency_task_end_dep_add_DEPENDENCIES) $(EXTRA_dependency_task_end_dep_add_DEPENDENCIES) dependency/$(am__dirstamp) + @rm -f dependency/task_end_dep_add$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(dependency_task_end_dep_add_OBJECTS) $(dependency_task_end_dep_add_LDADD) $(LIBS) +filters/$(am__dirstamp): + @$(MKDIR_P) filters + @: > filters/$(am__dirstamp) +filters/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) filters/$(DEPDIR) + @: > filters/$(DEPDIR)/$(am__dirstamp) +filters/alloc.$(OBJEXT): filters/$(am__dirstamp) \ + filters/$(DEPDIR)/$(am__dirstamp) + +filters/alloc$(EXEEXT): $(filters_alloc_OBJECTS) $(filters_alloc_DEPENDENCIES) $(EXTRA_filters_alloc_DEPENDENCIES) filters/$(am__dirstamp) + @rm -f filters/alloc$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(filters_alloc_OBJECTS) $(filters_alloc_LDADD) $(LIBS) +filters/custom_mf/$(am__dirstamp): + @$(MKDIR_P) filters/custom_mf + @: > filters/custom_mf/$(am__dirstamp) +filters/custom_mf/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) filters/custom_mf/$(DEPDIR) + @: > filters/custom_mf/$(DEPDIR)/$(am__dirstamp) +filters/custom_mf/custom_mf_filter.$(OBJEXT): \ + filters/custom_mf/$(am__dirstamp) \ + filters/custom_mf/$(DEPDIR)/$(am__dirstamp) +filters/custom_mf/custom_interface.$(OBJEXT): \ + filters/custom_mf/$(am__dirstamp) \ + filters/custom_mf/$(DEPDIR)/$(am__dirstamp) +filters/custom_mf/custom_conversion_codelets.$(OBJEXT): \ + filters/custom_mf/$(am__dirstamp) \ + filters/custom_mf/$(DEPDIR)/$(am__dirstamp) +filters/custom_mf/conversion.$(OBJEXT): \ + filters/custom_mf/$(am__dirstamp) \ + filters/custom_mf/$(DEPDIR)/$(am__dirstamp) +filters/custom_mf/cuda.$(OBJEXT): filters/custom_mf/$(am__dirstamp) \ + filters/custom_mf/$(DEPDIR)/$(am__dirstamp) +filters/custom_mf/conversion_opencl.$(OBJEXT): \ + filters/custom_mf/$(am__dirstamp) \ + filters/custom_mf/$(DEPDIR)/$(am__dirstamp) +filters/custom_mf/custom_opencl.$(OBJEXT): \ + filters/custom_mf/$(am__dirstamp) \ + filters/custom_mf/$(DEPDIR)/$(am__dirstamp) + +filters/custom_mf/custom_mf_filter$(EXEEXT): $(filters_custom_mf_custom_mf_filter_OBJECTS) $(filters_custom_mf_custom_mf_filter_DEPENDENCIES) $(EXTRA_filters_custom_mf_custom_mf_filter_DEPENDENCIES) filters/custom_mf/$(am__dirstamp) + @rm -f filters/custom_mf/custom_mf_filter$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(filters_custom_mf_custom_mf_filter_OBJECTS) $(filters_custom_mf_custom_mf_filter_LDADD) $(LIBS) +filters/fblock.$(OBJEXT): filters/$(am__dirstamp) \ + filters/$(DEPDIR)/$(am__dirstamp) +filters/fblock_print.$(OBJEXT): filters/$(am__dirstamp) \ + filters/$(DEPDIR)/$(am__dirstamp) +filters/fblock_cpu.$(OBJEXT): filters/$(am__dirstamp) \ + filters/$(DEPDIR)/$(am__dirstamp) +filters/fblock_cuda.$(OBJEXT): filters/$(am__dirstamp) \ + filters/$(DEPDIR)/$(am__dirstamp) +filters/fblock_hip.$(OBJEXT): filters/$(am__dirstamp) \ + filters/$(DEPDIR)/$(am__dirstamp) +filters/fblock_opencl.$(OBJEXT): filters/$(am__dirstamp) \ + filters/$(DEPDIR)/$(am__dirstamp) + +filters/fblock$(EXEEXT): $(filters_fblock_OBJECTS) $(filters_fblock_DEPENDENCIES) $(EXTRA_filters_fblock_DEPENDENCIES) filters/$(am__dirstamp) + @rm -f filters/fblock$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(filters_fblock_OBJECTS) $(filters_fblock_LDADD) $(LIBS) +filters/fblock_pick_matrix.$(OBJEXT): filters/$(am__dirstamp) \ + filters/$(DEPDIR)/$(am__dirstamp) +filters/fmatrix_print.$(OBJEXT): filters/$(am__dirstamp) \ + filters/$(DEPDIR)/$(am__dirstamp) +filters/fmatrix_cpu.$(OBJEXT): filters/$(am__dirstamp) \ + filters/$(DEPDIR)/$(am__dirstamp) +filters/fmatrix_cuda.$(OBJEXT): filters/$(am__dirstamp) \ + filters/$(DEPDIR)/$(am__dirstamp) +filters/fmatrix_hip.$(OBJEXT): filters/$(am__dirstamp) \ + filters/$(DEPDIR)/$(am__dirstamp) + +filters/fblock_pick_matrix$(EXEEXT): $(filters_fblock_pick_matrix_OBJECTS) $(filters_fblock_pick_matrix_DEPENDENCIES) $(EXTRA_filters_fblock_pick_matrix_DEPENDENCIES) filters/$(am__dirstamp) + @rm -f filters/fblock_pick_matrix$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(filters_fblock_pick_matrix_OBJECTS) $(filters_fblock_pick_matrix_LDADD) $(LIBS) +filters/fblock_pick_variable.$(OBJEXT): filters/$(am__dirstamp) \ + filters/$(DEPDIR)/$(am__dirstamp) +filters/fvariable_cuda.$(OBJEXT): filters/$(am__dirstamp) \ + filters/$(DEPDIR)/$(am__dirstamp) + +filters/fblock_pick_variable$(EXEEXT): $(filters_fblock_pick_variable_OBJECTS) $(filters_fblock_pick_variable_DEPENDENCIES) $(EXTRA_filters_fblock_pick_variable_DEPENDENCIES) filters/$(am__dirstamp) + @rm -f filters/fblock_pick_variable$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(filters_fblock_pick_variable_OBJECTS) $(filters_fblock_pick_variable_LDADD) $(LIBS) +filters/fmatrix.$(OBJEXT): filters/$(am__dirstamp) \ + filters/$(DEPDIR)/$(am__dirstamp) + +filters/fmatrix$(EXEEXT): $(filters_fmatrix_OBJECTS) $(filters_fmatrix_DEPENDENCIES) $(EXTRA_filters_fmatrix_DEPENDENCIES) filters/$(am__dirstamp) + @rm -f filters/fmatrix$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(filters_fmatrix_OBJECTS) $(filters_fmatrix_LDADD) $(LIBS) +filters/fmatrix_pick_variable.$(OBJEXT): filters/$(am__dirstamp) \ + filters/$(DEPDIR)/$(am__dirstamp) + +filters/fmatrix_pick_variable$(EXEEXT): $(filters_fmatrix_pick_variable_OBJECTS) $(filters_fmatrix_pick_variable_DEPENDENCIES) $(EXTRA_filters_fmatrix_pick_variable_DEPENDENCIES) filters/$(am__dirstamp) + @rm -f filters/fmatrix_pick_variable$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(filters_fmatrix_pick_variable_OBJECTS) $(filters_fmatrix_pick_variable_LDADD) $(LIBS) +filters/fmatrix_pick_vector.$(OBJEXT): filters/$(am__dirstamp) \ + filters/$(DEPDIR)/$(am__dirstamp) +filters/fvector_cpu.$(OBJEXT): filters/$(am__dirstamp) \ + filters/$(DEPDIR)/$(am__dirstamp) +filters/fvector_cuda.$(OBJEXT): filters/$(am__dirstamp) \ + filters/$(DEPDIR)/$(am__dirstamp) +filters/fvector_hip.$(OBJEXT): filters/$(am__dirstamp) \ + filters/$(DEPDIR)/$(am__dirstamp) + +filters/fmatrix_pick_vector$(EXEEXT): $(filters_fmatrix_pick_vector_OBJECTS) $(filters_fmatrix_pick_vector_DEPENDENCIES) $(EXTRA_filters_fmatrix_pick_vector_DEPENDENCIES) filters/$(am__dirstamp) + @rm -f filters/fmatrix_pick_vector$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(filters_fmatrix_pick_vector_OBJECTS) $(filters_fmatrix_pick_vector_LDADD) $(LIBS) +filters/fmultiple_manual.$(OBJEXT): filters/$(am__dirstamp) \ + filters/$(DEPDIR)/$(am__dirstamp) +filters/fmultiple_cuda.$(OBJEXT): filters/$(am__dirstamp) \ + filters/$(DEPDIR)/$(am__dirstamp) +filters/fmultiple_hip.$(OBJEXT): filters/$(am__dirstamp) \ + filters/$(DEPDIR)/$(am__dirstamp) + +filters/fmultiple_manual$(EXEEXT): $(filters_fmultiple_manual_OBJECTS) $(filters_fmultiple_manual_DEPENDENCIES) $(EXTRA_filters_fmultiple_manual_DEPENDENCIES) filters/$(am__dirstamp) + @rm -f filters/fmultiple_manual$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(filters_fmultiple_manual_OBJECTS) $(filters_fmultiple_manual_LDADD) $(LIBS) +filters/fmultiple_submit.$(OBJEXT): filters/$(am__dirstamp) \ + filters/$(DEPDIR)/$(am__dirstamp) + +filters/fmultiple_submit$(EXEEXT): $(filters_fmultiple_submit_OBJECTS) $(filters_fmultiple_submit_DEPENDENCIES) $(EXTRA_filters_fmultiple_submit_DEPENDENCIES) filters/$(am__dirstamp) + @rm -f filters/fmultiple_submit$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(filters_fmultiple_submit_OBJECTS) $(filters_fmultiple_submit_LDADD) $(LIBS) +filters/fmultiple_submit_implicit.$(OBJEXT): filters/$(am__dirstamp) \ + filters/$(DEPDIR)/$(am__dirstamp) + +filters/fmultiple_submit_implicit$(EXEEXT): $(filters_fmultiple_submit_implicit_OBJECTS) $(filters_fmultiple_submit_implicit_DEPENDENCIES) $(EXTRA_filters_fmultiple_submit_implicit_DEPENDENCIES) filters/$(am__dirstamp) + @rm -f filters/fmultiple_submit_implicit$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(filters_fmultiple_submit_implicit_OBJECTS) $(filters_fmultiple_submit_implicit_LDADD) $(LIBS) +filters/fmultiple_submit_readonly.$(OBJEXT): filters/$(am__dirstamp) \ + filters/$(DEPDIR)/$(am__dirstamp) + +filters/fmultiple_submit_readonly$(EXEEXT): $(filters_fmultiple_submit_readonly_OBJECTS) $(filters_fmultiple_submit_readonly_DEPENDENCIES) $(EXTRA_filters_fmultiple_submit_readonly_DEPENDENCIES) filters/$(am__dirstamp) + @rm -f filters/fmultiple_submit_readonly$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(filters_fmultiple_submit_readonly_OBJECTS) $(filters_fmultiple_submit_readonly_LDADD) $(LIBS) +filters/fmultiple_submit_readonly_downgrade.$(OBJEXT): \ + filters/$(am__dirstamp) filters/$(DEPDIR)/$(am__dirstamp) + +filters/fmultiple_submit_readonly_downgrade$(EXEEXT): $(filters_fmultiple_submit_readonly_downgrade_OBJECTS) $(filters_fmultiple_submit_readonly_downgrade_DEPENDENCIES) $(EXTRA_filters_fmultiple_submit_readonly_downgrade_DEPENDENCIES) filters/$(am__dirstamp) + @rm -f filters/fmultiple_submit_readonly_downgrade$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(filters_fmultiple_submit_readonly_downgrade_OBJECTS) $(filters_fmultiple_submit_readonly_downgrade_LDADD) $(LIBS) +filters/fndim.$(OBJEXT): filters/$(am__dirstamp) \ + filters/$(DEPDIR)/$(am__dirstamp) +filters/ftensor_print.$(OBJEXT): filters/$(am__dirstamp) \ + filters/$(DEPDIR)/$(am__dirstamp) +filters/f4d_cpu.$(OBJEXT): filters/$(am__dirstamp) \ + filters/$(DEPDIR)/$(am__dirstamp) +filters/f4d_cuda.$(OBJEXT): filters/$(am__dirstamp) \ + filters/$(DEPDIR)/$(am__dirstamp) +filters/f4d_hip.$(OBJEXT): filters/$(am__dirstamp) \ + filters/$(DEPDIR)/$(am__dirstamp) + +filters/fndim$(EXEEXT): $(filters_fndim_OBJECTS) $(filters_fndim_DEPENDENCIES) $(EXTRA_filters_fndim_DEPENDENCIES) filters/$(am__dirstamp) + @rm -f filters/fndim$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(filters_fndim_OBJECTS) $(filters_fndim_LDADD) $(LIBS) +filters/fndim_1d_pick_variable.$(OBJEXT): filters/$(am__dirstamp) \ + filters/$(DEPDIR)/$(am__dirstamp) + +filters/fndim_1d_pick_variable$(EXEEXT): $(filters_fndim_1d_pick_variable_OBJECTS) $(filters_fndim_1d_pick_variable_DEPENDENCIES) $(EXTRA_filters_fndim_1d_pick_variable_DEPENDENCIES) filters/$(am__dirstamp) + @rm -f filters/fndim_1d_pick_variable$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(filters_fndim_1d_pick_variable_OBJECTS) $(filters_fndim_1d_pick_variable_LDADD) $(LIBS) +filters/fndim_2d_pick_vector.$(OBJEXT): filters/$(am__dirstamp) \ + filters/$(DEPDIR)/$(am__dirstamp) + +filters/fndim_2d_pick_vector$(EXEEXT): $(filters_fndim_2d_pick_vector_OBJECTS) $(filters_fndim_2d_pick_vector_DEPENDENCIES) $(EXTRA_filters_fndim_2d_pick_vector_DEPENDENCIES) filters/$(am__dirstamp) + @rm -f filters/fndim_2d_pick_vector$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(filters_fndim_2d_pick_vector_OBJECTS) $(filters_fndim_2d_pick_vector_LDADD) $(LIBS) +filters/fndim_3d_pick_matrix.$(OBJEXT): filters/$(am__dirstamp) \ + filters/$(DEPDIR)/$(am__dirstamp) + +filters/fndim_3d_pick_matrix$(EXEEXT): $(filters_fndim_3d_pick_matrix_OBJECTS) $(filters_fndim_3d_pick_matrix_DEPENDENCIES) $(EXTRA_filters_fndim_3d_pick_matrix_DEPENDENCIES) filters/$(am__dirstamp) + @rm -f filters/fndim_3d_pick_matrix$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(filters_fndim_3d_pick_matrix_OBJECTS) $(filters_fndim_3d_pick_matrix_LDADD) $(LIBS) +filters/fndim_4d_pick_block.$(OBJEXT): filters/$(am__dirstamp) \ + filters/$(DEPDIR)/$(am__dirstamp) + +filters/fndim_4d_pick_block$(EXEEXT): $(filters_fndim_4d_pick_block_OBJECTS) $(filters_fndim_4d_pick_block_DEPENDENCIES) $(EXTRA_filters_fndim_4d_pick_block_DEPENDENCIES) filters/$(am__dirstamp) + @rm -f filters/fndim_4d_pick_block$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(filters_fndim_4d_pick_block_OBJECTS) $(filters_fndim_4d_pick_block_LDADD) $(LIBS) +filters/fndim_5d_pick_tensor.$(OBJEXT): filters/$(am__dirstamp) \ + filters/$(DEPDIR)/$(am__dirstamp) +filters/f5d_print.$(OBJEXT): filters/$(am__dirstamp) \ + filters/$(DEPDIR)/$(am__dirstamp) +filters/ftensor_cpu.$(OBJEXT): filters/$(am__dirstamp) \ + filters/$(DEPDIR)/$(am__dirstamp) +filters/ftensor_cuda.$(OBJEXT): filters/$(am__dirstamp) \ + filters/$(DEPDIR)/$(am__dirstamp) +filters/ftensor_hip.$(OBJEXT): filters/$(am__dirstamp) \ + filters/$(DEPDIR)/$(am__dirstamp) + +filters/fndim_5d_pick_tensor$(EXEEXT): $(filters_fndim_5d_pick_tensor_OBJECTS) $(filters_fndim_5d_pick_tensor_DEPENDENCIES) $(EXTRA_filters_fndim_5d_pick_tensor_DEPENDENCIES) filters/$(am__dirstamp) + @rm -f filters/fndim_5d_pick_tensor$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(filters_fndim_5d_pick_tensor_OBJECTS) $(filters_fndim_5d_pick_tensor_LDADD) $(LIBS) +filters/fndim_pick_ndim.$(OBJEXT): filters/$(am__dirstamp) \ + filters/$(DEPDIR)/$(am__dirstamp) +filters/f3d_cpu.$(OBJEXT): filters/$(am__dirstamp) \ + filters/$(DEPDIR)/$(am__dirstamp) +filters/f3d_cuda.$(OBJEXT): filters/$(am__dirstamp) \ + filters/$(DEPDIR)/$(am__dirstamp) +filters/f3d_hip.$(OBJEXT): filters/$(am__dirstamp) \ + filters/$(DEPDIR)/$(am__dirstamp) + +filters/fndim_pick_ndim$(EXEEXT): $(filters_fndim_pick_ndim_OBJECTS) $(filters_fndim_pick_ndim_DEPENDENCIES) $(EXTRA_filters_fndim_pick_ndim_DEPENDENCIES) filters/$(am__dirstamp) + @rm -f filters/fndim_pick_ndim$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(filters_fndim_pick_ndim_OBJECTS) $(filters_fndim_pick_ndim_LDADD) $(LIBS) +filters/fndim_pick_variable.$(OBJEXT): filters/$(am__dirstamp) \ + filters/$(DEPDIR)/$(am__dirstamp) + +filters/fndim_pick_variable$(EXEEXT): $(filters_fndim_pick_variable_OBJECTS) $(filters_fndim_pick_variable_DEPENDENCIES) $(EXTRA_filters_fndim_pick_variable_DEPENDENCIES) filters/$(am__dirstamp) + @rm -f filters/fndim_pick_variable$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(filters_fndim_pick_variable_OBJECTS) $(filters_fndim_pick_variable_LDADD) $(LIBS) +filters/fndim_to_block.$(OBJEXT): filters/$(am__dirstamp) \ + filters/$(DEPDIR)/$(am__dirstamp) + +filters/fndim_to_block$(EXEEXT): $(filters_fndim_to_block_OBJECTS) $(filters_fndim_to_block_DEPENDENCIES) $(EXTRA_filters_fndim_to_block_DEPENDENCIES) filters/$(am__dirstamp) + @rm -f filters/fndim_to_block$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(filters_fndim_to_block_OBJECTS) $(filters_fndim_to_block_LDADD) $(LIBS) +filters/fndim_to_matrix.$(OBJEXT): filters/$(am__dirstamp) \ + filters/$(DEPDIR)/$(am__dirstamp) + +filters/fndim_to_matrix$(EXEEXT): $(filters_fndim_to_matrix_OBJECTS) $(filters_fndim_to_matrix_DEPENDENCIES) $(EXTRA_filters_fndim_to_matrix_DEPENDENCIES) filters/$(am__dirstamp) + @rm -f filters/fndim_to_matrix$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(filters_fndim_to_matrix_OBJECTS) $(filters_fndim_to_matrix_LDADD) $(LIBS) +filters/fndim_to_tensor.$(OBJEXT): filters/$(am__dirstamp) \ + filters/$(DEPDIR)/$(am__dirstamp) + +filters/fndim_to_tensor$(EXEEXT): $(filters_fndim_to_tensor_OBJECTS) $(filters_fndim_to_tensor_DEPENDENCIES) $(EXTRA_filters_fndim_to_tensor_DEPENDENCIES) filters/$(am__dirstamp) + @rm -f filters/fndim_to_tensor$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(filters_fndim_to_tensor_OBJECTS) $(filters_fndim_to_tensor_LDADD) $(LIBS) +filters/fndim_to_variable.$(OBJEXT): filters/$(am__dirstamp) \ + filters/$(DEPDIR)/$(am__dirstamp) + +filters/fndim_to_variable$(EXEEXT): $(filters_fndim_to_variable_OBJECTS) $(filters_fndim_to_variable_DEPENDENCIES) $(EXTRA_filters_fndim_to_variable_DEPENDENCIES) filters/$(am__dirstamp) + @rm -f filters/fndim_to_variable$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(filters_fndim_to_variable_OBJECTS) $(filters_fndim_to_variable_LDADD) $(LIBS) +filters/fndim_to_vector.$(OBJEXT): filters/$(am__dirstamp) \ + filters/$(DEPDIR)/$(am__dirstamp) + +filters/fndim_to_vector$(EXEEXT): $(filters_fndim_to_vector_OBJECTS) $(filters_fndim_to_vector_DEPENDENCIES) $(EXTRA_filters_fndim_to_vector_DEPENDENCIES) filters/$(am__dirstamp) + @rm -f filters/fndim_to_vector$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(filters_fndim_to_vector_OBJECTS) $(filters_fndim_to_vector_LDADD) $(LIBS) +filters/fread.$(OBJEXT): filters/$(am__dirstamp) \ + filters/$(DEPDIR)/$(am__dirstamp) + +filters/fread$(EXEEXT): $(filters_fread_OBJECTS) $(filters_fread_DEPENDENCIES) $(EXTRA_filters_fread_DEPENDENCIES) filters/$(am__dirstamp) + @rm -f filters/fread$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(filters_fread_OBJECTS) $(filters_fread_LDADD) $(LIBS) +filters/frecursive.$(OBJEXT): filters/$(am__dirstamp) \ + filters/$(DEPDIR)/$(am__dirstamp) + +filters/frecursive$(EXEEXT): $(filters_frecursive_OBJECTS) $(filters_frecursive_DEPENDENCIES) $(EXTRA_filters_frecursive_DEPENDENCIES) filters/$(am__dirstamp) + @rm -f filters/frecursive$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(filters_frecursive_OBJECTS) $(filters_frecursive_LDADD) $(LIBS) +filters/ftensor.$(OBJEXT): filters/$(am__dirstamp) \ + filters/$(DEPDIR)/$(am__dirstamp) + +filters/ftensor$(EXEEXT): $(filters_ftensor_OBJECTS) $(filters_ftensor_DEPENDENCIES) $(EXTRA_filters_ftensor_DEPENDENCIES) filters/$(am__dirstamp) + @rm -f filters/ftensor$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(filters_ftensor_OBJECTS) $(filters_ftensor_LDADD) $(LIBS) +filters/ftensor_pick_block.$(OBJEXT): filters/$(am__dirstamp) \ + filters/$(DEPDIR)/$(am__dirstamp) + +filters/ftensor_pick_block$(EXEEXT): $(filters_ftensor_pick_block_OBJECTS) $(filters_ftensor_pick_block_DEPENDENCIES) $(EXTRA_filters_ftensor_pick_block_DEPENDENCIES) filters/$(am__dirstamp) + @rm -f filters/ftensor_pick_block$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(filters_ftensor_pick_block_OBJECTS) $(filters_ftensor_pick_block_LDADD) $(LIBS) +filters/ftensor_pick_variable.$(OBJEXT): filters/$(am__dirstamp) \ + filters/$(DEPDIR)/$(am__dirstamp) + +filters/ftensor_pick_variable$(EXEEXT): $(filters_ftensor_pick_variable_OBJECTS) $(filters_ftensor_pick_variable_DEPENDENCIES) $(EXTRA_filters_ftensor_pick_variable_DEPENDENCIES) filters/$(am__dirstamp) + @rm -f filters/ftensor_pick_variable$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(filters_ftensor_pick_variable_OBJECTS) $(filters_ftensor_pick_variable_LDADD) $(LIBS) +filters/fvector.$(OBJEXT): filters/$(am__dirstamp) \ + filters/$(DEPDIR)/$(am__dirstamp) + +filters/fvector$(EXEEXT): $(filters_fvector_OBJECTS) $(filters_fvector_DEPENDENCIES) $(EXTRA_filters_fvector_DEPENDENCIES) filters/$(am__dirstamp) + @rm -f filters/fvector$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(filters_fvector_OBJECTS) $(filters_fvector_LDADD) $(LIBS) +filters/fvector_pick_variable.$(OBJEXT): filters/$(am__dirstamp) \ + filters/$(DEPDIR)/$(am__dirstamp) + +filters/fvector_pick_variable$(EXEEXT): $(filters_fvector_pick_variable_OBJECTS) $(filters_fvector_pick_variable_DEPENDENCIES) $(EXTRA_filters_fvector_pick_variable_DEPENDENCIES) filters/$(am__dirstamp) + @rm -f filters/fvector_pick_variable$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(filters_fvector_pick_variable_OBJECTS) $(filters_fvector_pick_variable_LDADD) $(LIBS) +filters/shadow.$(OBJEXT): filters/$(am__dirstamp) \ + filters/$(DEPDIR)/$(am__dirstamp) + +filters/shadow$(EXEEXT): $(filters_shadow_OBJECTS) $(filters_shadow_DEPENDENCIES) $(EXTRA_filters_shadow_DEPENDENCIES) filters/$(am__dirstamp) + @rm -f filters/shadow$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(filters_shadow_OBJECTS) $(filters_shadow_LDADD) $(LIBS) +filters/shadow2d.$(OBJEXT): filters/$(am__dirstamp) \ + filters/$(DEPDIR)/$(am__dirstamp) + +filters/shadow2d$(EXEEXT): $(filters_shadow2d_OBJECTS) $(filters_shadow2d_DEPENDENCIES) $(EXTRA_filters_shadow2d_DEPENDENCIES) filters/$(am__dirstamp) + @rm -f filters/shadow2d$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(filters_shadow2d_OBJECTS) $(filters_shadow2d_LDADD) $(LIBS) +filters/shadow3d.$(OBJEXT): filters/$(am__dirstamp) \ + filters/$(DEPDIR)/$(am__dirstamp) + +filters/shadow3d$(EXEEXT): $(filters_shadow3d_OBJECTS) $(filters_shadow3d_DEPENDENCIES) $(EXTRA_filters_shadow3d_DEPENDENCIES) filters/$(am__dirstamp) + @rm -f filters/shadow3d$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(filters_shadow3d_OBJECTS) $(filters_shadow3d_LDADD) $(LIBS) +filters/shadow4d.$(OBJEXT): filters/$(am__dirstamp) \ + filters/$(DEPDIR)/$(am__dirstamp) + +filters/shadow4d$(EXEEXT): $(filters_shadow4d_OBJECTS) $(filters_shadow4d_DEPENDENCIES) $(EXTRA_filters_shadow4d_DEPENDENCIES) filters/$(am__dirstamp) + @rm -f filters/shadow4d$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(filters_shadow4d_OBJECTS) $(filters_shadow4d_LDADD) $(LIBS) +filters/shadownd.$(OBJEXT): filters/$(am__dirstamp) \ + filters/$(DEPDIR)/$(am__dirstamp) + +filters/shadownd$(EXEEXT): $(filters_shadownd_OBJECTS) $(filters_shadownd_DEPENDENCIES) $(EXTRA_filters_shadownd_DEPENDENCIES) filters/$(am__dirstamp) + @rm -f filters/shadownd$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(filters_shadownd_OBJECTS) $(filters_shadownd_LDADD) $(LIBS) +fortran/$(am__dirstamp): + @$(MKDIR_P) fortran + @: > fortran/$(am__dirstamp) +fortran/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) fortran/$(DEPDIR) + @: > fortran/$(DEPDIR)/$(am__dirstamp) +fortran/hello_c.$(OBJEXT): fortran/$(am__dirstamp) \ + fortran/$(DEPDIR)/$(am__dirstamp) +fortran/hello.$(OBJEXT): fortran/$(am__dirstamp) \ + fortran/$(DEPDIR)/$(am__dirstamp) + +fortran/hello$(EXEEXT): $(fortran_hello_OBJECTS) $(fortran_hello_DEPENDENCIES) $(EXTRA_fortran_hello_DEPENDENCIES) fortran/$(am__dirstamp) + @rm -f fortran/hello$(EXEEXT) + $(AM_V_F77LD)$(F77LINK) $(fortran_hello_OBJECTS) $(fortran_hello_LDADD) $(LIBS) +fortran90/$(am__dirstamp): + @$(MKDIR_P) fortran90 + @: > fortran90/$(am__dirstamp) +fortran90/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) fortran90/$(DEPDIR) + @: > fortran90/$(DEPDIR)/$(am__dirstamp) +fortran90/mod_types.$(OBJEXT): fortran90/$(am__dirstamp) \ + fortran90/$(DEPDIR)/$(am__dirstamp) +fortran90/starpu_mod.$(OBJEXT): fortran90/$(am__dirstamp) \ + fortran90/$(DEPDIR)/$(am__dirstamp) +fortran90/mod_interface.$(OBJEXT): fortran90/$(am__dirstamp) \ + fortran90/$(DEPDIR)/$(am__dirstamp) +fortran90/mod_compute.$(OBJEXT): fortran90/$(am__dirstamp) \ + fortran90/$(DEPDIR)/$(am__dirstamp) +fortran90/marshalling.$(OBJEXT): fortran90/$(am__dirstamp) \ + fortran90/$(DEPDIR)/$(am__dirstamp) +fortran90/f90_example.$(OBJEXT): fortran90/$(am__dirstamp) \ + fortran90/$(DEPDIR)/$(am__dirstamp) + +fortran90/f90_example$(EXEEXT): $(fortran90_f90_example_OBJECTS) $(fortran90_f90_example_DEPENDENCIES) $(EXTRA_fortran90_f90_example_DEPENDENCIES) fortran90/$(am__dirstamp) + @rm -f fortran90/f90_example$(EXEEXT) + $(AM_V_FCLD)$(FCLINK) $(fortran90_f90_example_OBJECTS) $(fortran90_f90_example_LDADD) $(LIBS) +gl_interop/$(am__dirstamp): + @$(MKDIR_P) gl_interop + @: > gl_interop/$(am__dirstamp) +gl_interop/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) gl_interop/$(DEPDIR) + @: > gl_interop/$(DEPDIR)/$(am__dirstamp) +gl_interop/gl_interop.$(OBJEXT): gl_interop/$(am__dirstamp) \ + gl_interop/$(DEPDIR)/$(am__dirstamp) + +gl_interop/gl_interop$(EXEEXT): $(gl_interop_gl_interop_OBJECTS) $(gl_interop_gl_interop_DEPENDENCIES) $(EXTRA_gl_interop_gl_interop_DEPENDENCIES) gl_interop/$(am__dirstamp) + @rm -f gl_interop/gl_interop$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(gl_interop_gl_interop_OBJECTS) $(gl_interop_gl_interop_LDADD) $(LIBS) +gl_interop/gl_interop_idle.$(OBJEXT): gl_interop/$(am__dirstamp) \ + gl_interop/$(DEPDIR)/$(am__dirstamp) + +gl_interop/gl_interop_idle$(EXEEXT): $(gl_interop_gl_interop_idle_OBJECTS) $(gl_interop_gl_interop_idle_DEPENDENCIES) $(EXTRA_gl_interop_gl_interop_idle_DEPENDENCIES) gl_interop/$(am__dirstamp) + @rm -f gl_interop/gl_interop_idle$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(gl_interop_gl_interop_idle_OBJECTS) $(gl_interop_gl_interop_idle_LDADD) $(LIBS) +heat/$(am__dirstamp): + @$(MKDIR_P) heat + @: > heat/$(am__dirstamp) +heat/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) heat/$(DEPDIR) + @: > heat/$(DEPDIR)/$(am__dirstamp) +heat/heat.$(OBJEXT): heat/$(am__dirstamp) \ + heat/$(DEPDIR)/$(am__dirstamp) +heat/dw_factolu.$(OBJEXT): heat/$(am__dirstamp) \ + heat/$(DEPDIR)/$(am__dirstamp) +heat/dw_factolu_tag.$(OBJEXT): heat/$(am__dirstamp) \ + heat/$(DEPDIR)/$(am__dirstamp) +heat/dw_factolu_grain.$(OBJEXT): heat/$(am__dirstamp) \ + heat/$(DEPDIR)/$(am__dirstamp) +heat/dw_sparse_cg.$(OBJEXT): heat/$(am__dirstamp) \ + heat/$(DEPDIR)/$(am__dirstamp) +heat/heat_display.$(OBJEXT): heat/$(am__dirstamp) \ + heat/$(DEPDIR)/$(am__dirstamp) +heat/lu_kernels_model.$(OBJEXT): heat/$(am__dirstamp) \ + heat/$(DEPDIR)/$(am__dirstamp) +heat/dw_sparse_cg_kernels.$(OBJEXT): heat/$(am__dirstamp) \ + heat/$(DEPDIR)/$(am__dirstamp) +heat/dw_factolu_kernels.$(OBJEXT): heat/$(am__dirstamp) \ + heat/$(DEPDIR)/$(am__dirstamp) + +heat/heat$(EXEEXT): $(heat_heat_OBJECTS) $(heat_heat_DEPENDENCIES) $(EXTRA_heat_heat_DEPENDENCIES) heat/$(am__dirstamp) + @rm -f heat/heat$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(heat_heat_OBJECTS) $(heat_heat_LDADD) $(LIBS) +incrementer/incrementer.$(OBJEXT): incrementer/$(am__dirstamp) \ + incrementer/$(DEPDIR)/$(am__dirstamp) + +incrementer/incrementer$(EXEEXT): $(incrementer_incrementer_OBJECTS) $(incrementer_incrementer_DEPENDENCIES) $(EXTRA_incrementer_incrementer_DEPENDENCIES) incrementer/$(am__dirstamp) + @rm -f incrementer/incrementer$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(incrementer_incrementer_OBJECTS) $(incrementer_incrementer_LDADD) $(LIBS) +interface/$(am__dirstamp): + @$(MKDIR_P) interface + @: > interface/$(am__dirstamp) +interface/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) interface/$(DEPDIR) + @: > interface/$(DEPDIR)/$(am__dirstamp) +interface/complex.$(OBJEXT): interface/$(am__dirstamp) \ + interface/$(DEPDIR)/$(am__dirstamp) +interface/complex_interface.$(OBJEXT): interface/$(am__dirstamp) \ + interface/$(DEPDIR)/$(am__dirstamp) +interface/complex_filters.$(OBJEXT): interface/$(am__dirstamp) \ + interface/$(DEPDIR)/$(am__dirstamp) +interface/complex_kernels.$(OBJEXT): interface/$(am__dirstamp) \ + interface/$(DEPDIR)/$(am__dirstamp) +interface/complex_kernels_opencl.$(OBJEXT): interface/$(am__dirstamp) \ + interface/$(DEPDIR)/$(am__dirstamp) + +interface/complex$(EXEEXT): $(interface_complex_OBJECTS) $(interface_complex_DEPENDENCIES) $(EXTRA_interface_complex_DEPENDENCIES) interface/$(am__dirstamp) + @rm -f interface/complex$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(interface_complex_OBJECTS) $(interface_complex_LDADD) $(LIBS) +interface/complex_dev_handle/$(am__dirstamp): + @$(MKDIR_P) interface/complex_dev_handle + @: > interface/complex_dev_handle/$(am__dirstamp) +interface/complex_dev_handle/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) interface/complex_dev_handle/$(DEPDIR) + @: > interface/complex_dev_handle/$(DEPDIR)/$(am__dirstamp) +interface/complex_dev_handle/complex_dev_handle.$(OBJEXT): \ + interface/complex_dev_handle/$(am__dirstamp) \ + interface/complex_dev_handle/$(DEPDIR)/$(am__dirstamp) +interface/complex_dev_handle/complex_dev_handle_interface.$(OBJEXT): \ + interface/complex_dev_handle/$(am__dirstamp) \ + interface/complex_dev_handle/$(DEPDIR)/$(am__dirstamp) +interface/complex_dev_handle/complex_dev_handle_filters.$(OBJEXT): \ + interface/complex_dev_handle/$(am__dirstamp) \ + interface/complex_dev_handle/$(DEPDIR)/$(am__dirstamp) +interface/complex_dev_handle/complex_dev_handle_kernels.$(OBJEXT): \ + interface/complex_dev_handle/$(am__dirstamp) \ + interface/complex_dev_handle/$(DEPDIR)/$(am__dirstamp) +interface/complex_dev_handle/complex_dev_handle_kernels_opencl.$(OBJEXT): \ + interface/complex_dev_handle/$(am__dirstamp) \ + interface/complex_dev_handle/$(DEPDIR)/$(am__dirstamp) + +interface/complex_dev_handle/complex_dev_handle$(EXEEXT): $(interface_complex_dev_handle_complex_dev_handle_OBJECTS) $(interface_complex_dev_handle_complex_dev_handle_DEPENDENCIES) $(EXTRA_interface_complex_dev_handle_complex_dev_handle_DEPENDENCIES) interface/complex_dev_handle/$(am__dirstamp) + @rm -f interface/complex_dev_handle/complex_dev_handle$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(interface_complex_dev_handle_complex_dev_handle_OBJECTS) $(interface_complex_dev_handle_complex_dev_handle_LDADD) $(LIBS) + +loader$(EXEEXT): $(loader_OBJECTS) $(loader_DEPENDENCIES) $(EXTRA_loader_DEPENDENCIES) + @rm -f loader$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(loader_OBJECTS) $(loader_LDADD) $(LIBS) +lu/$(am__dirstamp): + @$(MKDIR_P) lu + @: > lu/$(am__dirstamp) +lu/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) lu/$(DEPDIR) + @: > lu/$(DEPDIR)/$(am__dirstamp) +lu/lu_example_complex_double.$(OBJEXT): lu/$(am__dirstamp) \ + lu/$(DEPDIR)/$(am__dirstamp) +lu/zlu.$(OBJEXT): lu/$(am__dirstamp) lu/$(DEPDIR)/$(am__dirstamp) +lu/zlu_pivot.$(OBJEXT): lu/$(am__dirstamp) \ + lu/$(DEPDIR)/$(am__dirstamp) +lu/zlu_kernels.$(OBJEXT): lu/$(am__dirstamp) \ + lu/$(DEPDIR)/$(am__dirstamp) +lu/blas_complex.$(OBJEXT): lu/$(am__dirstamp) \ + lu/$(DEPDIR)/$(am__dirstamp) + +lu/lu_example_complex_double$(EXEEXT): $(lu_lu_example_complex_double_OBJECTS) $(lu_lu_example_complex_double_DEPENDENCIES) $(EXTRA_lu_lu_example_complex_double_DEPENDENCIES) lu/$(am__dirstamp) + @rm -f lu/lu_example_complex_double$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(lu_lu_example_complex_double_OBJECTS) $(lu_lu_example_complex_double_LDADD) $(LIBS) +lu/lu_example_complex_float.$(OBJEXT): lu/$(am__dirstamp) \ + lu/$(DEPDIR)/$(am__dirstamp) +lu/clu.$(OBJEXT): lu/$(am__dirstamp) lu/$(DEPDIR)/$(am__dirstamp) +lu/clu_pivot.$(OBJEXT): lu/$(am__dirstamp) \ + lu/$(DEPDIR)/$(am__dirstamp) +lu/clu_kernels.$(OBJEXT): lu/$(am__dirstamp) \ + lu/$(DEPDIR)/$(am__dirstamp) + +lu/lu_example_complex_float$(EXEEXT): $(lu_lu_example_complex_float_OBJECTS) $(lu_lu_example_complex_float_DEPENDENCIES) $(EXTRA_lu_lu_example_complex_float_DEPENDENCIES) lu/$(am__dirstamp) + @rm -f lu/lu_example_complex_float$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(lu_lu_example_complex_float_OBJECTS) $(lu_lu_example_complex_float_LDADD) $(LIBS) +lu/lu_example_double.$(OBJEXT): lu/$(am__dirstamp) \ + lu/$(DEPDIR)/$(am__dirstamp) +lu/dlu.$(OBJEXT): lu/$(am__dirstamp) lu/$(DEPDIR)/$(am__dirstamp) +lu/dlu_pivot.$(OBJEXT): lu/$(am__dirstamp) \ + lu/$(DEPDIR)/$(am__dirstamp) +lu/dlu_kernels.$(OBJEXT): lu/$(am__dirstamp) \ + lu/$(DEPDIR)/$(am__dirstamp) + +lu/lu_example_double$(EXEEXT): $(lu_lu_example_double_OBJECTS) $(lu_lu_example_double_DEPENDENCIES) $(EXTRA_lu_lu_example_double_DEPENDENCIES) lu/$(am__dirstamp) + @rm -f lu/lu_example_double$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(lu_lu_example_double_OBJECTS) $(lu_lu_example_double_LDADD) $(LIBS) +lu/lu_example_float.$(OBJEXT): lu/$(am__dirstamp) \ + lu/$(DEPDIR)/$(am__dirstamp) +lu/slu.$(OBJEXT): lu/$(am__dirstamp) lu/$(DEPDIR)/$(am__dirstamp) +lu/slu_pivot.$(OBJEXT): lu/$(am__dirstamp) \ + lu/$(DEPDIR)/$(am__dirstamp) +lu/slu_kernels.$(OBJEXT): lu/$(am__dirstamp) \ + lu/$(DEPDIR)/$(am__dirstamp) + +lu/lu_example_float$(EXEEXT): $(lu_lu_example_float_OBJECTS) $(lu_lu_example_float_DEPENDENCIES) $(EXTRA_lu_lu_example_float_DEPENDENCIES) lu/$(am__dirstamp) + @rm -f lu/lu_example_float$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(lu_lu_example_float_OBJECTS) $(lu_lu_example_float_LDADD) $(LIBS) +lu/zlu_implicit.$(OBJEXT): lu/$(am__dirstamp) \ + lu/$(DEPDIR)/$(am__dirstamp) +lu/zlu_implicit_pivot.$(OBJEXT): lu/$(am__dirstamp) \ + lu/$(DEPDIR)/$(am__dirstamp) + +lu/lu_implicit_example_complex_double$(EXEEXT): $(lu_lu_implicit_example_complex_double_OBJECTS) $(lu_lu_implicit_example_complex_double_DEPENDENCIES) $(EXTRA_lu_lu_implicit_example_complex_double_DEPENDENCIES) lu/$(am__dirstamp) + @rm -f lu/lu_implicit_example_complex_double$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(lu_lu_implicit_example_complex_double_OBJECTS) $(lu_lu_implicit_example_complex_double_LDADD) $(LIBS) +lu/clu_implicit.$(OBJEXT): lu/$(am__dirstamp) \ + lu/$(DEPDIR)/$(am__dirstamp) +lu/clu_implicit_pivot.$(OBJEXT): lu/$(am__dirstamp) \ + lu/$(DEPDIR)/$(am__dirstamp) + +lu/lu_implicit_example_complex_float$(EXEEXT): $(lu_lu_implicit_example_complex_float_OBJECTS) $(lu_lu_implicit_example_complex_float_DEPENDENCIES) $(EXTRA_lu_lu_implicit_example_complex_float_DEPENDENCIES) lu/$(am__dirstamp) + @rm -f lu/lu_implicit_example_complex_float$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(lu_lu_implicit_example_complex_float_OBJECTS) $(lu_lu_implicit_example_complex_float_LDADD) $(LIBS) +lu/dlu_implicit.$(OBJEXT): lu/$(am__dirstamp) \ + lu/$(DEPDIR)/$(am__dirstamp) +lu/dlu_implicit_pivot.$(OBJEXT): lu/$(am__dirstamp) \ + lu/$(DEPDIR)/$(am__dirstamp) + +lu/lu_implicit_example_double$(EXEEXT): $(lu_lu_implicit_example_double_OBJECTS) $(lu_lu_implicit_example_double_DEPENDENCIES) $(EXTRA_lu_lu_implicit_example_double_DEPENDENCIES) lu/$(am__dirstamp) + @rm -f lu/lu_implicit_example_double$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(lu_lu_implicit_example_double_OBJECTS) $(lu_lu_implicit_example_double_LDADD) $(LIBS) +lu/slu_implicit.$(OBJEXT): lu/$(am__dirstamp) \ + lu/$(DEPDIR)/$(am__dirstamp) +lu/slu_implicit_pivot.$(OBJEXT): lu/$(am__dirstamp) \ + lu/$(DEPDIR)/$(am__dirstamp) + +lu/lu_implicit_example_float$(EXEEXT): $(lu_lu_implicit_example_float_OBJECTS) $(lu_lu_implicit_example_float_DEPENDENCIES) $(EXTRA_lu_lu_implicit_example_float_DEPENDENCIES) lu/$(am__dirstamp) + @rm -f lu/lu_implicit_example_float$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(lu_lu_implicit_example_float_OBJECTS) $(lu_lu_implicit_example_float_LDADD) $(LIBS) +mandelbrot/$(am__dirstamp): + @$(MKDIR_P) mandelbrot + @: > mandelbrot/$(am__dirstamp) +mandelbrot/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) mandelbrot/$(DEPDIR) + @: > mandelbrot/$(DEPDIR)/$(am__dirstamp) +mandelbrot/mandelbrot-mandelbrot.$(OBJEXT): \ + mandelbrot/$(am__dirstamp) \ + mandelbrot/$(DEPDIR)/$(am__dirstamp) + +mandelbrot/mandelbrot$(EXEEXT): $(mandelbrot_mandelbrot_OBJECTS) $(mandelbrot_mandelbrot_DEPENDENCIES) $(EXTRA_mandelbrot_mandelbrot_DEPENDENCIES) mandelbrot/$(am__dirstamp) + @rm -f mandelbrot/mandelbrot$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(mandelbrot_mandelbrot_OBJECTS) $(mandelbrot_mandelbrot_LDADD) $(LIBS) +matvecmult/$(am__dirstamp): + @$(MKDIR_P) matvecmult + @: > matvecmult/$(am__dirstamp) +matvecmult/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) matvecmult/$(DEPDIR) + @: > matvecmult/$(DEPDIR)/$(am__dirstamp) +matvecmult/matvecmult.$(OBJEXT): matvecmult/$(am__dirstamp) \ + matvecmult/$(DEPDIR)/$(am__dirstamp) + +matvecmult/matvecmult$(EXEEXT): $(matvecmult_matvecmult_OBJECTS) $(matvecmult_matvecmult_DEPENDENCIES) $(EXTRA_matvecmult_matvecmult_DEPENDENCIES) matvecmult/$(am__dirstamp) + @rm -f matvecmult/matvecmult$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(matvecmult_matvecmult_OBJECTS) $(matvecmult_matvecmult_LDADD) $(LIBS) +mlr/$(am__dirstamp): + @$(MKDIR_P) mlr + @: > mlr/$(am__dirstamp) +mlr/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) mlr/$(DEPDIR) + @: > mlr/$(DEPDIR)/$(am__dirstamp) +mlr/mlr.$(OBJEXT): mlr/$(am__dirstamp) mlr/$(DEPDIR)/$(am__dirstamp) + +mlr/mlr$(EXEEXT): $(mlr_mlr_OBJECTS) $(mlr_mlr_DEPENDENCIES) $(EXTRA_mlr_mlr_DEPENDENCIES) mlr/$(am__dirstamp) + @rm -f mlr/mlr$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(mlr_mlr_OBJECTS) $(mlr_mlr_LDADD) $(LIBS) +mult/$(am__dirstamp): + @$(MKDIR_P) mult + @: > mult/$(am__dirstamp) +mult/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) mult/$(DEPDIR) + @: > mult/$(DEPDIR)/$(am__dirstamp) +mult/dgemm.$(OBJEXT): mult/$(am__dirstamp) \ + mult/$(DEPDIR)/$(am__dirstamp) + +mult/dgemm$(EXEEXT): $(mult_dgemm_OBJECTS) $(mult_dgemm_DEPENDENCIES) $(EXTRA_mult_dgemm_DEPENDENCIES) mult/$(am__dirstamp) + @rm -f mult/dgemm$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(mult_dgemm_OBJECTS) $(mult_dgemm_LDADD) $(LIBS) +mult/dgemm_layout.$(OBJEXT): mult/$(am__dirstamp) \ + mult/$(DEPDIR)/$(am__dirstamp) + +mult/dgemm_layout$(EXEEXT): $(mult_dgemm_layout_OBJECTS) $(mult_dgemm_layout_DEPENDENCIES) $(EXTRA_mult_dgemm_layout_DEPENDENCIES) mult/$(am__dirstamp) + @rm -f mult/dgemm_layout$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(mult_dgemm_layout_OBJECTS) $(mult_dgemm_layout_LDADD) $(LIBS) +mult/sgemm.$(OBJEXT): mult/$(am__dirstamp) \ + mult/$(DEPDIR)/$(am__dirstamp) + +mult/sgemm$(EXEEXT): $(mult_sgemm_OBJECTS) $(mult_sgemm_DEPENDENCIES) $(EXTRA_mult_sgemm_DEPENDENCIES) mult/$(am__dirstamp) + @rm -f mult/sgemm$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(mult_sgemm_OBJECTS) $(mult_sgemm_LDADD) $(LIBS) +mult/sgemm_layout.$(OBJEXT): mult/$(am__dirstamp) \ + mult/$(DEPDIR)/$(am__dirstamp) + +mult/sgemm_layout$(EXEEXT): $(mult_sgemm_layout_OBJECTS) $(mult_sgemm_layout_DEPENDENCIES) $(EXTRA_mult_sgemm_layout_DEPENDENCIES) mult/$(am__dirstamp) + @rm -f mult/sgemm_layout$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(mult_sgemm_layout_OBJECTS) $(mult_sgemm_layout_LDADD) $(LIBS) +native_fortran/$(am__dirstamp): + @$(MKDIR_P) native_fortran + @: > native_fortran/$(am__dirstamp) +native_fortran/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) native_fortran/$(DEPDIR) + @: > native_fortran/$(DEPDIR)/$(am__dirstamp) +native_fortran/nf_dynbuf_cl.$(OBJEXT): native_fortran/$(am__dirstamp) \ + native_fortran/$(DEPDIR)/$(am__dirstamp) +native_fortran/fstarpu_mod.$(OBJEXT): native_fortran/$(am__dirstamp) \ + native_fortran/$(DEPDIR)/$(am__dirstamp) +native_fortran/nf_dynbuf.$(OBJEXT): native_fortran/$(am__dirstamp) \ + native_fortran/$(DEPDIR)/$(am__dirstamp) + +native_fortran/nf_dynbuf$(EXEEXT): $(native_fortran_nf_dynbuf_OBJECTS) $(native_fortran_nf_dynbuf_DEPENDENCIES) $(EXTRA_native_fortran_nf_dynbuf_DEPENDENCIES) native_fortran/$(am__dirstamp) + @rm -f native_fortran/nf_dynbuf$(EXEEXT) + $(AM_V_FCLD)$(FCLINK) $(native_fortran_nf_dynbuf_OBJECTS) $(native_fortran_nf_dynbuf_LDADD) $(LIBS) +native_fortran/nf_types.$(OBJEXT): native_fortran/$(am__dirstamp) \ + native_fortran/$(DEPDIR)/$(am__dirstamp) +native_fortran/nf_compute.$(OBJEXT): native_fortran/$(am__dirstamp) \ + native_fortran/$(DEPDIR)/$(am__dirstamp) +native_fortran/nf_example.$(OBJEXT): native_fortran/$(am__dirstamp) \ + native_fortran/$(DEPDIR)/$(am__dirstamp) + +native_fortran/nf_example$(EXEEXT): $(native_fortran_nf_example_OBJECTS) $(native_fortran_nf_example_DEPENDENCIES) $(EXTRA_native_fortran_nf_example_DEPENDENCIES) native_fortran/$(am__dirstamp) + @rm -f native_fortran/nf_example$(EXEEXT) + $(AM_V_FCLD)$(FCLINK) $(native_fortran_nf_example_OBJECTS) $(native_fortran_nf_example_LDADD) $(LIBS) +native_fortran/nf_codelets.$(OBJEXT): native_fortran/$(am__dirstamp) \ + native_fortran/$(DEPDIR)/$(am__dirstamp) +native_fortran/nf_matrix.$(OBJEXT): native_fortran/$(am__dirstamp) \ + native_fortran/$(DEPDIR)/$(am__dirstamp) + +native_fortran/nf_matrix$(EXEEXT): $(native_fortran_nf_matrix_OBJECTS) $(native_fortran_nf_matrix_DEPENDENCIES) $(EXTRA_native_fortran_nf_matrix_DEPENDENCIES) native_fortran/$(am__dirstamp) + @rm -f native_fortran/nf_matrix$(EXEEXT) + $(AM_V_FCLD)$(FCLINK) $(native_fortran_nf_matrix_OBJECTS) $(native_fortran_nf_matrix_LDADD) $(LIBS) +native_fortran/nf_partition_cl.$(OBJEXT): \ + native_fortran/$(am__dirstamp) \ + native_fortran/$(DEPDIR)/$(am__dirstamp) +native_fortran/nf_partition.$(OBJEXT): native_fortran/$(am__dirstamp) \ + native_fortran/$(DEPDIR)/$(am__dirstamp) + +native_fortran/nf_partition$(EXEEXT): $(native_fortran_nf_partition_OBJECTS) $(native_fortran_nf_partition_DEPENDENCIES) $(EXTRA_native_fortran_nf_partition_DEPENDENCIES) native_fortran/$(am__dirstamp) + @rm -f native_fortran/nf_partition$(EXEEXT) + $(AM_V_FCLD)$(FCLINK) $(native_fortran_nf_partition_OBJECTS) $(native_fortran_nf_partition_LDADD) $(LIBS) +native_fortran/nf_sched_ctx_cl.$(OBJEXT): \ + native_fortran/$(am__dirstamp) \ + native_fortran/$(DEPDIR)/$(am__dirstamp) +native_fortran/nf_sched_ctx.$(OBJEXT): native_fortran/$(am__dirstamp) \ + native_fortran/$(DEPDIR)/$(am__dirstamp) + +native_fortran/nf_sched_ctx$(EXEEXT): $(native_fortran_nf_sched_ctx_OBJECTS) $(native_fortran_nf_sched_ctx_DEPENDENCIES) $(EXTRA_native_fortran_nf_sched_ctx_DEPENDENCIES) native_fortran/$(am__dirstamp) + @rm -f native_fortran/nf_sched_ctx$(EXEEXT) + $(AM_V_FCLD)$(FCLINK) $(native_fortran_nf_sched_ctx_OBJECTS) $(native_fortran_nf_sched_ctx_LDADD) $(LIBS) +native_fortran/nf_varbuf_cl.$(OBJEXT): native_fortran/$(am__dirstamp) \ + native_fortran/$(DEPDIR)/$(am__dirstamp) +native_fortran/nf_varbuf.$(OBJEXT): native_fortran/$(am__dirstamp) \ + native_fortran/$(DEPDIR)/$(am__dirstamp) + +native_fortran/nf_varbuf$(EXEEXT): $(native_fortran_nf_varbuf_OBJECTS) $(native_fortran_nf_varbuf_DEPENDENCIES) $(EXTRA_native_fortran_nf_varbuf_DEPENDENCIES) native_fortran/$(am__dirstamp) + @rm -f native_fortran/nf_varbuf$(EXEEXT) + $(AM_V_FCLD)$(FCLINK) $(native_fortran_nf_varbuf_OBJECTS) $(native_fortran_nf_varbuf_LDADD) $(LIBS) +native_fortran/nf_vector.$(OBJEXT): native_fortran/$(am__dirstamp) \ + native_fortran/$(DEPDIR)/$(am__dirstamp) + +native_fortran/nf_vector$(EXEEXT): $(native_fortran_nf_vector_OBJECTS) $(native_fortran_nf_vector_DEPENDENCIES) $(EXTRA_native_fortran_nf_vector_DEPENDENCIES) native_fortran/$(am__dirstamp) + @rm -f native_fortran/nf_vector$(EXEEXT) + $(AM_V_FCLD)$(FCLINK) $(native_fortran_nf_vector_OBJECTS) $(native_fortran_nf_vector_LDADD) $(LIBS) +openmp/$(am__dirstamp): + @$(MKDIR_P) openmp + @: > openmp/$(am__dirstamp) +openmp/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) openmp/$(DEPDIR) + @: > openmp/$(DEPDIR)/$(am__dirstamp) +openmp/vector_scal_omp-vector_scal_omp.$(OBJEXT): \ + openmp/$(am__dirstamp) openmp/$(DEPDIR)/$(am__dirstamp) + +openmp/vector_scal_omp$(EXEEXT): $(openmp_vector_scal_omp_OBJECTS) $(openmp_vector_scal_omp_DEPENDENCIES) $(EXTRA_openmp_vector_scal_omp_DEPENDENCIES) openmp/$(am__dirstamp) + @rm -f openmp/vector_scal_omp$(EXEEXT) + $(AM_V_CCLD)$(openmp_vector_scal_omp_LINK) $(openmp_vector_scal_omp_OBJECTS) $(openmp_vector_scal_omp_LDADD) $(LIBS) +parallel_workers/$(am__dirstamp): + @$(MKDIR_P) parallel_workers + @: > parallel_workers/$(am__dirstamp) +parallel_workers/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) parallel_workers/$(DEPDIR) + @: > parallel_workers/$(DEPDIR)/$(am__dirstamp) +parallel_workers/parallel_workers-parallel_workers.$(OBJEXT): \ + parallel_workers/$(am__dirstamp) \ + parallel_workers/$(DEPDIR)/$(am__dirstamp) + +parallel_workers/parallel_workers$(EXEEXT): $(parallel_workers_parallel_workers_OBJECTS) $(parallel_workers_parallel_workers_DEPENDENCIES) $(EXTRA_parallel_workers_parallel_workers_DEPENDENCIES) parallel_workers/$(am__dirstamp) + @rm -f parallel_workers/parallel_workers$(EXEEXT) + $(AM_V_CCLD)$(parallel_workers_parallel_workers_LINK) $(parallel_workers_parallel_workers_OBJECTS) $(parallel_workers_parallel_workers_LDADD) $(LIBS) +parallel_workers/parallel_workers_func-parallel_workers_func.$(OBJEXT): \ + parallel_workers/$(am__dirstamp) \ + parallel_workers/$(DEPDIR)/$(am__dirstamp) + +parallel_workers/parallel_workers_func$(EXEEXT): $(parallel_workers_parallel_workers_func_OBJECTS) $(parallel_workers_parallel_workers_func_DEPENDENCIES) $(EXTRA_parallel_workers_parallel_workers_func_DEPENDENCIES) parallel_workers/$(am__dirstamp) + @rm -f parallel_workers/parallel_workers_func$(EXEEXT) + $(AM_V_CCLD)$(parallel_workers_parallel_workers_func_LINK) $(parallel_workers_parallel_workers_func_OBJECTS) $(parallel_workers_parallel_workers_func_LDADD) $(LIBS) +parallel_workers/parallel_workers_oldapi-parallel_workers_oldapi.$(OBJEXT): \ + parallel_workers/$(am__dirstamp) \ + parallel_workers/$(DEPDIR)/$(am__dirstamp) + +parallel_workers/parallel_workers_oldapi$(EXEEXT): $(parallel_workers_parallel_workers_oldapi_OBJECTS) $(parallel_workers_parallel_workers_oldapi_DEPENDENCIES) $(EXTRA_parallel_workers_parallel_workers_oldapi_DEPENDENCIES) parallel_workers/$(am__dirstamp) + @rm -f parallel_workers/parallel_workers_oldapi$(EXEEXT) + $(AM_V_CCLD)$(parallel_workers_parallel_workers_oldapi_LINK) $(parallel_workers_parallel_workers_oldapi_OBJECTS) $(parallel_workers_parallel_workers_oldapi_LDADD) $(LIBS) +perf_monitoring/$(am__dirstamp): + @$(MKDIR_P) perf_monitoring + @: > perf_monitoring/$(am__dirstamp) +perf_monitoring/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) perf_monitoring/$(DEPDIR) + @: > perf_monitoring/$(DEPDIR)/$(am__dirstamp) +perf_monitoring/perf_counters_01.$(OBJEXT): \ + perf_monitoring/$(am__dirstamp) \ + perf_monitoring/$(DEPDIR)/$(am__dirstamp) + +perf_monitoring/perf_counters_01$(EXEEXT): $(perf_monitoring_perf_counters_01_OBJECTS) $(perf_monitoring_perf_counters_01_DEPENDENCIES) $(EXTRA_perf_monitoring_perf_counters_01_DEPENDENCIES) perf_monitoring/$(am__dirstamp) + @rm -f perf_monitoring/perf_counters_01$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(perf_monitoring_perf_counters_01_OBJECTS) $(perf_monitoring_perf_counters_01_LDADD) $(LIBS) +perf_monitoring/perf_counters_02.$(OBJEXT): \ + perf_monitoring/$(am__dirstamp) \ + perf_monitoring/$(DEPDIR)/$(am__dirstamp) + +perf_monitoring/perf_counters_02$(EXEEXT): $(perf_monitoring_perf_counters_02_OBJECTS) $(perf_monitoring_perf_counters_02_DEPENDENCIES) $(EXTRA_perf_monitoring_perf_counters_02_DEPENDENCIES) perf_monitoring/$(am__dirstamp) + @rm -f perf_monitoring/perf_counters_02$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(perf_monitoring_perf_counters_02_OBJECTS) $(perf_monitoring_perf_counters_02_LDADD) $(LIBS) +perf_steering/$(am__dirstamp): + @$(MKDIR_P) perf_steering + @: > perf_steering/$(am__dirstamp) +perf_steering/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) perf_steering/$(DEPDIR) + @: > perf_steering/$(DEPDIR)/$(am__dirstamp) +perf_steering/perf_knobs_01.$(OBJEXT): perf_steering/$(am__dirstamp) \ + perf_steering/$(DEPDIR)/$(am__dirstamp) + +perf_steering/perf_knobs_01$(EXEEXT): $(perf_steering_perf_knobs_01_OBJECTS) $(perf_steering_perf_knobs_01_DEPENDENCIES) $(EXTRA_perf_steering_perf_knobs_01_DEPENDENCIES) perf_steering/$(am__dirstamp) + @rm -f perf_steering/perf_knobs_01$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(perf_steering_perf_knobs_01_OBJECTS) $(perf_steering_perf_knobs_01_LDADD) $(LIBS) +perf_steering/perf_knobs_02.$(OBJEXT): perf_steering/$(am__dirstamp) \ + perf_steering/$(DEPDIR)/$(am__dirstamp) + +perf_steering/perf_knobs_02$(EXEEXT): $(perf_steering_perf_knobs_02_OBJECTS) $(perf_steering_perf_knobs_02_DEPENDENCIES) $(EXTRA_perf_steering_perf_knobs_02_DEPENDENCIES) perf_steering/$(am__dirstamp) + @rm -f perf_steering/perf_knobs_02$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(perf_steering_perf_knobs_02_OBJECTS) $(perf_steering_perf_knobs_02_LDADD) $(LIBS) +perf_steering/perf_knobs_03.$(OBJEXT): perf_steering/$(am__dirstamp) \ + perf_steering/$(DEPDIR)/$(am__dirstamp) + +perf_steering/perf_knobs_03$(EXEEXT): $(perf_steering_perf_knobs_03_OBJECTS) $(perf_steering_perf_knobs_03_DEPENDENCIES) $(EXTRA_perf_steering_perf_knobs_03_DEPENDENCIES) perf_steering/$(am__dirstamp) + @rm -f perf_steering/perf_knobs_03$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(perf_steering_perf_knobs_03_OBJECTS) $(perf_steering_perf_knobs_03_LDADD) $(LIBS) +pi/$(am__dirstamp): + @$(MKDIR_P) pi + @: > pi/$(am__dirstamp) +pi/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) pi/$(DEPDIR) + @: > pi/$(DEPDIR)/$(am__dirstamp) +pi/pi.$(OBJEXT): pi/$(am__dirstamp) pi/$(DEPDIR)/$(am__dirstamp) +pi/SobolQRNG/$(am__dirstamp): + @$(MKDIR_P) pi/SobolQRNG + @: > pi/SobolQRNG/$(am__dirstamp) +pi/SobolQRNG/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) pi/SobolQRNG/$(DEPDIR) + @: > pi/SobolQRNG/$(DEPDIR)/$(am__dirstamp) +pi/SobolQRNG/sobol_gold.$(OBJEXT): pi/SobolQRNG/$(am__dirstamp) \ + pi/SobolQRNG/$(DEPDIR)/$(am__dirstamp) +pi/SobolQRNG/sobol_primitives.$(OBJEXT): pi/SobolQRNG/$(am__dirstamp) \ + pi/SobolQRNG/$(DEPDIR)/$(am__dirstamp) +pi/pi_kernel.$(OBJEXT): pi/$(am__dirstamp) \ + pi/$(DEPDIR)/$(am__dirstamp) +pi/SobolQRNG/sobol_gpu.$(OBJEXT): pi/SobolQRNG/$(am__dirstamp) \ + pi/SobolQRNG/$(DEPDIR)/$(am__dirstamp) + +pi/pi$(EXEEXT): $(pi_pi_OBJECTS) $(pi_pi_DEPENDENCIES) $(EXTRA_pi_pi_DEPENDENCIES) pi/$(am__dirstamp) + @rm -f pi/pi$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(pi_pi_OBJECTS) $(pi_pi_LDADD) $(LIBS) +pi/pi_redux.$(OBJEXT): pi/$(am__dirstamp) pi/$(DEPDIR)/$(am__dirstamp) +pi/pi_redux_kernel.$(OBJEXT): pi/$(am__dirstamp) \ + pi/$(DEPDIR)/$(am__dirstamp) + +pi/pi_redux$(EXEEXT): $(pi_pi_redux_OBJECTS) $(pi_pi_redux_DEPENDENCIES) $(EXTRA_pi_pi_redux_DEPENDENCIES) pi/$(am__dirstamp) + @rm -f pi/pi_redux$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(pi_pi_redux_OBJECTS) $(pi_pi_redux_LDADD) $(LIBS) +pipeline/$(am__dirstamp): + @$(MKDIR_P) pipeline + @: > pipeline/$(am__dirstamp) +pipeline/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) pipeline/$(DEPDIR) + @: > pipeline/$(DEPDIR)/$(am__dirstamp) +pipeline/pipeline.$(OBJEXT): pipeline/$(am__dirstamp) \ + pipeline/$(DEPDIR)/$(am__dirstamp) + +pipeline/pipeline$(EXEEXT): $(pipeline_pipeline_OBJECTS) $(pipeline_pipeline_DEPENDENCIES) $(EXTRA_pipeline_pipeline_DEPENDENCIES) pipeline/$(am__dirstamp) + @rm -f pipeline/pipeline$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(pipeline_pipeline_OBJECTS) $(pipeline_pipeline_LDADD) $(LIBS) +ppm_downscaler/$(am__dirstamp): + @$(MKDIR_P) ppm_downscaler + @: > ppm_downscaler/$(am__dirstamp) +ppm_downscaler/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) ppm_downscaler/$(DEPDIR) + @: > ppm_downscaler/$(DEPDIR)/$(am__dirstamp) +ppm_downscaler/ppm_downscaler.$(OBJEXT): \ + ppm_downscaler/$(am__dirstamp) \ + ppm_downscaler/$(DEPDIR)/$(am__dirstamp) + +ppm_downscaler/ppm_downscaler$(EXEEXT): $(ppm_downscaler_ppm_downscaler_OBJECTS) $(ppm_downscaler_ppm_downscaler_DEPENDENCIES) $(EXTRA_ppm_downscaler_ppm_downscaler_DEPENDENCIES) ppm_downscaler/$(am__dirstamp) + @rm -f ppm_downscaler/ppm_downscaler$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(ppm_downscaler_ppm_downscaler_OBJECTS) $(ppm_downscaler_ppm_downscaler_LDADD) $(LIBS) +ppm_downscaler/yuv_downscaler.$(OBJEXT): \ + ppm_downscaler/$(am__dirstamp) \ + ppm_downscaler/$(DEPDIR)/$(am__dirstamp) + +ppm_downscaler/yuv_downscaler$(EXEEXT): $(ppm_downscaler_yuv_downscaler_OBJECTS) $(ppm_downscaler_yuv_downscaler_DEPENDENCIES) $(EXTRA_ppm_downscaler_yuv_downscaler_DEPENDENCIES) ppm_downscaler/$(am__dirstamp) + @rm -f ppm_downscaler/yuv_downscaler$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(ppm_downscaler_yuv_downscaler_OBJECTS) $(ppm_downscaler_yuv_downscaler_LDADD) $(LIBS) +profiling/$(am__dirstamp): + @$(MKDIR_P) profiling + @: > profiling/$(am__dirstamp) +profiling/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) profiling/$(DEPDIR) + @: > profiling/$(DEPDIR)/$(am__dirstamp) +profiling/profiling.$(OBJEXT): profiling/$(am__dirstamp) \ + profiling/$(DEPDIR)/$(am__dirstamp) + +profiling/profiling$(EXEEXT): $(profiling_profiling_OBJECTS) $(profiling_profiling_DEPENDENCIES) $(EXTRA_profiling_profiling_DEPENDENCIES) profiling/$(am__dirstamp) + @rm -f profiling/profiling$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(profiling_profiling_OBJECTS) $(profiling_profiling_LDADD) $(LIBS) +reductions/$(am__dirstamp): + @$(MKDIR_P) reductions + @: > reductions/$(am__dirstamp) +reductions/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) reductions/$(DEPDIR) + @: > reductions/$(DEPDIR)/$(am__dirstamp) +reductions/dot_product.$(OBJEXT): reductions/$(am__dirstamp) \ + reductions/$(DEPDIR)/$(am__dirstamp) +reductions/dot_product_kernels.$(OBJEXT): reductions/$(am__dirstamp) \ + reductions/$(DEPDIR)/$(am__dirstamp) + +reductions/dot_product$(EXEEXT): $(reductions_dot_product_OBJECTS) $(reductions_dot_product_DEPENDENCIES) $(EXTRA_reductions_dot_product_DEPENDENCIES) reductions/$(am__dirstamp) + @rm -f reductions/dot_product$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(reductions_dot_product_OBJECTS) $(reductions_dot_product_LDADD) $(LIBS) +reductions/minmax_reduction.$(OBJEXT): reductions/$(am__dirstamp) \ + reductions/$(DEPDIR)/$(am__dirstamp) + +reductions/minmax_reduction$(EXEEXT): $(reductions_minmax_reduction_OBJECTS) $(reductions_minmax_reduction_DEPENDENCIES) $(EXTRA_reductions_minmax_reduction_DEPENDENCIES) reductions/$(am__dirstamp) + @rm -f reductions/minmax_reduction$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(reductions_minmax_reduction_OBJECTS) $(reductions_minmax_reduction_LDADD) $(LIBS) +sched_ctx/$(am__dirstamp): + @$(MKDIR_P) sched_ctx + @: > sched_ctx/$(am__dirstamp) +sched_ctx/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) sched_ctx/$(DEPDIR) + @: > sched_ctx/$(DEPDIR)/$(am__dirstamp) +sched_ctx/dummy_sched_with_ctx.$(OBJEXT): sched_ctx/$(am__dirstamp) \ + sched_ctx/$(DEPDIR)/$(am__dirstamp) + +sched_ctx/dummy_sched_with_ctx$(EXEEXT): $(sched_ctx_dummy_sched_with_ctx_OBJECTS) $(sched_ctx_dummy_sched_with_ctx_DEPENDENCIES) $(EXTRA_sched_ctx_dummy_sched_with_ctx_DEPENDENCIES) sched_ctx/$(am__dirstamp) + @rm -f sched_ctx/dummy_sched_with_ctx$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(sched_ctx_dummy_sched_with_ctx_OBJECTS) $(sched_ctx_dummy_sched_with_ctx_LDADD) $(LIBS) +sched_ctx/gpu_partition.$(OBJEXT): sched_ctx/$(am__dirstamp) \ + sched_ctx/$(DEPDIR)/$(am__dirstamp) +sched_ctx/axpy_partition_gpu.$(OBJEXT): sched_ctx/$(am__dirstamp) \ + sched_ctx/$(DEPDIR)/$(am__dirstamp) + +sched_ctx/gpu_partition$(EXEEXT): $(sched_ctx_gpu_partition_OBJECTS) $(sched_ctx_gpu_partition_DEPENDENCIES) $(EXTRA_sched_ctx_gpu_partition_DEPENDENCIES) sched_ctx/$(am__dirstamp) + @rm -f sched_ctx/gpu_partition$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(sched_ctx_gpu_partition_OBJECTS) $(sched_ctx_gpu_partition_LDADD) $(LIBS) +sched_ctx/nested_sched_ctxs-nested_sched_ctxs.$(OBJEXT): \ + sched_ctx/$(am__dirstamp) sched_ctx/$(DEPDIR)/$(am__dirstamp) + +sched_ctx/nested_sched_ctxs$(EXEEXT): $(sched_ctx_nested_sched_ctxs_OBJECTS) $(sched_ctx_nested_sched_ctxs_DEPENDENCIES) $(EXTRA_sched_ctx_nested_sched_ctxs_DEPENDENCIES) sched_ctx/$(am__dirstamp) + @rm -f sched_ctx/nested_sched_ctxs$(EXEEXT) + $(AM_V_CCLD)$(sched_ctx_nested_sched_ctxs_LINK) $(sched_ctx_nested_sched_ctxs_OBJECTS) $(sched_ctx_nested_sched_ctxs_LDADD) $(LIBS) +sched_ctx/parallel_code-parallel_code.$(OBJEXT): \ + sched_ctx/$(am__dirstamp) sched_ctx/$(DEPDIR)/$(am__dirstamp) + +sched_ctx/parallel_code$(EXEEXT): $(sched_ctx_parallel_code_OBJECTS) $(sched_ctx_parallel_code_DEPENDENCIES) $(EXTRA_sched_ctx_parallel_code_DEPENDENCIES) sched_ctx/$(am__dirstamp) + @rm -f sched_ctx/parallel_code$(EXEEXT) + $(AM_V_CCLD)$(sched_ctx_parallel_code_LINK) $(sched_ctx_parallel_code_OBJECTS) $(sched_ctx_parallel_code_LDADD) $(LIBS) +sched_ctx/parallel_tasks_reuse_handle-parallel_tasks_reuse_handle.$(OBJEXT): \ + sched_ctx/$(am__dirstamp) sched_ctx/$(DEPDIR)/$(am__dirstamp) + +sched_ctx/parallel_tasks_reuse_handle$(EXEEXT): $(sched_ctx_parallel_tasks_reuse_handle_OBJECTS) $(sched_ctx_parallel_tasks_reuse_handle_DEPENDENCIES) $(EXTRA_sched_ctx_parallel_tasks_reuse_handle_DEPENDENCIES) sched_ctx/$(am__dirstamp) + @rm -f sched_ctx/parallel_tasks_reuse_handle$(EXEEXT) + $(AM_V_CCLD)$(sched_ctx_parallel_tasks_reuse_handle_LINK) $(sched_ctx_parallel_tasks_reuse_handle_OBJECTS) $(sched_ctx_parallel_tasks_reuse_handle_LDADD) $(LIBS) +sched_ctx/prio.$(OBJEXT): sched_ctx/$(am__dirstamp) \ + sched_ctx/$(DEPDIR)/$(am__dirstamp) + +sched_ctx/prio$(EXEEXT): $(sched_ctx_prio_OBJECTS) $(sched_ctx_prio_DEPENDENCIES) $(EXTRA_sched_ctx_prio_DEPENDENCIES) sched_ctx/$(am__dirstamp) + @rm -f sched_ctx/prio$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(sched_ctx_prio_OBJECTS) $(sched_ctx_prio_LDADD) $(LIBS) +sched_ctx/sched_ctx.$(OBJEXT): sched_ctx/$(am__dirstamp) \ + sched_ctx/$(DEPDIR)/$(am__dirstamp) + +sched_ctx/sched_ctx$(EXEEXT): $(sched_ctx_sched_ctx_OBJECTS) $(sched_ctx_sched_ctx_DEPENDENCIES) $(EXTRA_sched_ctx_sched_ctx_DEPENDENCIES) sched_ctx/$(am__dirstamp) + @rm -f sched_ctx/sched_ctx$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(sched_ctx_sched_ctx_OBJECTS) $(sched_ctx_sched_ctx_LDADD) $(LIBS) +sched_ctx/sched_ctx_delete.$(OBJEXT): sched_ctx/$(am__dirstamp) \ + sched_ctx/$(DEPDIR)/$(am__dirstamp) + +sched_ctx/sched_ctx_delete$(EXEEXT): $(sched_ctx_sched_ctx_delete_OBJECTS) $(sched_ctx_sched_ctx_delete_DEPENDENCIES) $(EXTRA_sched_ctx_sched_ctx_delete_DEPENDENCIES) sched_ctx/$(am__dirstamp) + @rm -f sched_ctx/sched_ctx_delete$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(sched_ctx_sched_ctx_delete_OBJECTS) $(sched_ctx_sched_ctx_delete_LDADD) $(LIBS) +sched_ctx/sched_ctx_empty.$(OBJEXT): sched_ctx/$(am__dirstamp) \ + sched_ctx/$(DEPDIR)/$(am__dirstamp) + +sched_ctx/sched_ctx_empty$(EXEEXT): $(sched_ctx_sched_ctx_empty_OBJECTS) $(sched_ctx_sched_ctx_empty_DEPENDENCIES) $(EXTRA_sched_ctx_sched_ctx_empty_DEPENDENCIES) sched_ctx/$(am__dirstamp) + @rm -f sched_ctx/sched_ctx_empty$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(sched_ctx_sched_ctx_empty_OBJECTS) $(sched_ctx_sched_ctx_empty_LDADD) $(LIBS) +sched_ctx/sched_ctx_remove.$(OBJEXT): sched_ctx/$(am__dirstamp) \ + sched_ctx/$(DEPDIR)/$(am__dirstamp) + +sched_ctx/sched_ctx_remove$(EXEEXT): $(sched_ctx_sched_ctx_remove_OBJECTS) $(sched_ctx_sched_ctx_remove_DEPENDENCIES) $(EXTRA_sched_ctx_sched_ctx_remove_DEPENDENCIES) sched_ctx/$(am__dirstamp) + @rm -f sched_ctx/sched_ctx_remove$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(sched_ctx_sched_ctx_remove_OBJECTS) $(sched_ctx_sched_ctx_remove_LDADD) $(LIBS) +sched_ctx/sched_ctx_without_sched_policy-sched_ctx_without_sched_policy.$(OBJEXT): \ + sched_ctx/$(am__dirstamp) sched_ctx/$(DEPDIR)/$(am__dirstamp) + +sched_ctx/sched_ctx_without_sched_policy$(EXEEXT): $(sched_ctx_sched_ctx_without_sched_policy_OBJECTS) $(sched_ctx_sched_ctx_without_sched_policy_DEPENDENCIES) $(EXTRA_sched_ctx_sched_ctx_without_sched_policy_DEPENDENCIES) sched_ctx/$(am__dirstamp) + @rm -f sched_ctx/sched_ctx_without_sched_policy$(EXEEXT) + $(AM_V_CCLD)$(sched_ctx_sched_ctx_without_sched_policy_LINK) $(sched_ctx_sched_ctx_without_sched_policy_OBJECTS) $(sched_ctx_sched_ctx_without_sched_policy_LDADD) $(LIBS) +sched_ctx/sched_ctx_without_sched_policy_awake.$(OBJEXT): \ + sched_ctx/$(am__dirstamp) sched_ctx/$(DEPDIR)/$(am__dirstamp) + +sched_ctx/sched_ctx_without_sched_policy_awake$(EXEEXT): $(sched_ctx_sched_ctx_without_sched_policy_awake_OBJECTS) $(sched_ctx_sched_ctx_without_sched_policy_awake_DEPENDENCIES) $(EXTRA_sched_ctx_sched_ctx_without_sched_policy_awake_DEPENDENCIES) sched_ctx/$(am__dirstamp) + @rm -f sched_ctx/sched_ctx_without_sched_policy_awake$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(sched_ctx_sched_ctx_without_sched_policy_awake_OBJECTS) $(sched_ctx_sched_ctx_without_sched_policy_awake_LDADD) $(LIBS) +sched_ctx/two_cpu_contexts.$(OBJEXT): sched_ctx/$(am__dirstamp) \ + sched_ctx/$(DEPDIR)/$(am__dirstamp) + +sched_ctx/two_cpu_contexts$(EXEEXT): $(sched_ctx_two_cpu_contexts_OBJECTS) $(sched_ctx_two_cpu_contexts_DEPENDENCIES) $(EXTRA_sched_ctx_two_cpu_contexts_DEPENDENCIES) sched_ctx/$(am__dirstamp) + @rm -f sched_ctx/two_cpu_contexts$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(sched_ctx_two_cpu_contexts_OBJECTS) $(sched_ctx_two_cpu_contexts_LDADD) $(LIBS) +scheduler/dummy_modular_sched.$(OBJEXT): scheduler/$(am__dirstamp) \ + scheduler/$(DEPDIR)/$(am__dirstamp) + +scheduler/dummy_modular_sched$(EXEEXT): $(scheduler_dummy_modular_sched_OBJECTS) $(scheduler_dummy_modular_sched_DEPENDENCIES) $(EXTRA_scheduler_dummy_modular_sched_DEPENDENCIES) scheduler/$(am__dirstamp) + @rm -f scheduler/dummy_modular_sched$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(scheduler_dummy_modular_sched_OBJECTS) $(scheduler_dummy_modular_sched_LDADD) $(LIBS) +scheduler/dummy_sched.$(OBJEXT): scheduler/$(am__dirstamp) \ + scheduler/$(DEPDIR)/$(am__dirstamp) + +scheduler/dummy_sched$(EXEEXT): $(scheduler_dummy_sched_OBJECTS) $(scheduler_dummy_sched_DEPENDENCIES) $(EXTRA_scheduler_dummy_sched_DEPENDENCIES) scheduler/$(am__dirstamp) + @rm -f scheduler/dummy_sched$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(scheduler_dummy_sched_OBJECTS) $(scheduler_dummy_sched_LDADD) $(LIBS) +scheduler/heteroprio_test.$(OBJEXT): scheduler/$(am__dirstamp) \ + scheduler/$(DEPDIR)/$(am__dirstamp) + +scheduler/heteroprio_test$(EXEEXT): $(scheduler_heteroprio_test_OBJECTS) $(scheduler_heteroprio_test_DEPENDENCIES) $(EXTRA_scheduler_heteroprio_test_DEPENDENCIES) scheduler/$(am__dirstamp) + @rm -f scheduler/heteroprio_test$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(scheduler_heteroprio_test_OBJECTS) $(scheduler_heteroprio_test_LDADD) $(LIBS) +spmd/$(am__dirstamp): + @$(MKDIR_P) spmd + @: > spmd/$(am__dirstamp) +spmd/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) spmd/$(DEPDIR) + @: > spmd/$(DEPDIR)/$(am__dirstamp) +spmd/vector_scal_spmd.$(OBJEXT): spmd/$(am__dirstamp) \ + spmd/$(DEPDIR)/$(am__dirstamp) + +spmd/vector_scal_spmd$(EXEEXT): $(spmd_vector_scal_spmd_OBJECTS) $(spmd_vector_scal_spmd_DEPENDENCIES) $(EXTRA_spmd_vector_scal_spmd_DEPENDENCIES) spmd/$(am__dirstamp) + @rm -f spmd/vector_scal_spmd$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(spmd_vector_scal_spmd_OBJECTS) $(spmd_vector_scal_spmd_LDADD) $(LIBS) +spmv/$(am__dirstamp): + @$(MKDIR_P) spmv + @: > spmv/$(am__dirstamp) +spmv/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) spmv/$(DEPDIR) + @: > spmv/$(DEPDIR)/$(am__dirstamp) +spmv/dw_block_spmv.$(OBJEXT): spmv/$(am__dirstamp) \ + spmv/$(DEPDIR)/$(am__dirstamp) +spmv/dw_block_spmv_kernels.$(OBJEXT): spmv/$(am__dirstamp) \ + spmv/$(DEPDIR)/$(am__dirstamp) +spmv/matrix_market/$(am__dirstamp): + @$(MKDIR_P) spmv/matrix_market + @: > spmv/matrix_market/$(am__dirstamp) +spmv/matrix_market/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) spmv/matrix_market/$(DEPDIR) + @: > spmv/matrix_market/$(DEPDIR)/$(am__dirstamp) +spmv/matrix_market/mm_to_bcsr.$(OBJEXT): \ + spmv/matrix_market/$(am__dirstamp) \ + spmv/matrix_market/$(DEPDIR)/$(am__dirstamp) +spmv/matrix_market/mmio.$(OBJEXT): spmv/matrix_market/$(am__dirstamp) \ + spmv/matrix_market/$(DEPDIR)/$(am__dirstamp) + +spmv/dw_block_spmv$(EXEEXT): $(spmv_dw_block_spmv_OBJECTS) $(spmv_dw_block_spmv_DEPENDENCIES) $(EXTRA_spmv_dw_block_spmv_DEPENDENCIES) spmv/$(am__dirstamp) + @rm -f spmv/dw_block_spmv$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(spmv_dw_block_spmv_OBJECTS) $(spmv_dw_block_spmv_LDADD) $(LIBS) +spmv/spmv.$(OBJEXT): spmv/$(am__dirstamp) \ + spmv/$(DEPDIR)/$(am__dirstamp) +spmv/spmv_kernels.$(OBJEXT): spmv/$(am__dirstamp) \ + spmv/$(DEPDIR)/$(am__dirstamp) +spmv/spmv_cuda.$(OBJEXT): spmv/$(am__dirstamp) \ + spmv/$(DEPDIR)/$(am__dirstamp) + +spmv/spmv$(EXEEXT): $(spmv_spmv_OBJECTS) $(spmv_spmv_DEPENDENCIES) $(EXTRA_spmv_spmv_DEPENDENCIES) spmv/$(am__dirstamp) + @rm -f spmv/spmv$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(spmv_spmv_OBJECTS) $(spmv_spmv_LDADD) $(LIBS) +subgraphs/$(am__dirstamp): + @$(MKDIR_P) subgraphs + @: > subgraphs/$(am__dirstamp) +subgraphs/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) subgraphs/$(DEPDIR) + @: > subgraphs/$(DEPDIR)/$(am__dirstamp) +subgraphs/manual.$(OBJEXT): subgraphs/$(am__dirstamp) \ + subgraphs/$(DEPDIR)/$(am__dirstamp) +subgraphs/codelets.$(OBJEXT): subgraphs/$(am__dirstamp) \ + subgraphs/$(DEPDIR)/$(am__dirstamp) + +subgraphs/manual$(EXEEXT): $(subgraphs_manual_OBJECTS) $(subgraphs_manual_DEPENDENCIES) $(EXTRA_subgraphs_manual_DEPENDENCIES) subgraphs/$(am__dirstamp) + @rm -f subgraphs/manual$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(subgraphs_manual_OBJECTS) $(subgraphs_manual_LDADD) $(LIBS) +subgraphs/partition.$(OBJEXT): subgraphs/$(am__dirstamp) \ + subgraphs/$(DEPDIR)/$(am__dirstamp) + +subgraphs/partition$(EXEEXT): $(subgraphs_partition_OBJECTS) $(subgraphs_partition_DEPENDENCIES) $(EXTRA_subgraphs_partition_DEPENDENCIES) subgraphs/$(am__dirstamp) + @rm -f subgraphs/partition$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(subgraphs_partition_OBJECTS) $(subgraphs_partition_LDADD) $(LIBS) +subgraphs/plan.$(OBJEXT): subgraphs/$(am__dirstamp) \ + subgraphs/$(DEPDIR)/$(am__dirstamp) + +subgraphs/plan$(EXEEXT): $(subgraphs_plan_OBJECTS) $(subgraphs_plan_DEPENDENCIES) $(EXTRA_subgraphs_plan_DEPENDENCIES) subgraphs/$(am__dirstamp) + @rm -f subgraphs/plan$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(subgraphs_plan_OBJECTS) $(subgraphs_plan_LDADD) $(LIBS) +tag_example/$(am__dirstamp): + @$(MKDIR_P) tag_example + @: > tag_example/$(am__dirstamp) +tag_example/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) tag_example/$(DEPDIR) + @: > tag_example/$(DEPDIR)/$(am__dirstamp) +tag_example/tag_example.$(OBJEXT): tag_example/$(am__dirstamp) \ + tag_example/$(DEPDIR)/$(am__dirstamp) + +tag_example/tag_example$(EXEEXT): $(tag_example_tag_example_OBJECTS) $(tag_example_tag_example_DEPENDENCIES) $(EXTRA_tag_example_tag_example_DEPENDENCIES) tag_example/$(am__dirstamp) + @rm -f tag_example/tag_example$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(tag_example_tag_example_OBJECTS) $(tag_example_tag_example_LDADD) $(LIBS) +tag_example/tag_example2.$(OBJEXT): tag_example/$(am__dirstamp) \ + tag_example/$(DEPDIR)/$(am__dirstamp) + +tag_example/tag_example2$(EXEEXT): $(tag_example_tag_example2_OBJECTS) $(tag_example_tag_example2_DEPENDENCIES) $(EXTRA_tag_example_tag_example2_DEPENDENCIES) tag_example/$(am__dirstamp) + @rm -f tag_example/tag_example2$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(tag_example_tag_example2_OBJECTS) $(tag_example_tag_example2_LDADD) $(LIBS) +tag_example/tag_example3.$(OBJEXT): tag_example/$(am__dirstamp) \ + tag_example/$(DEPDIR)/$(am__dirstamp) + +tag_example/tag_example3$(EXEEXT): $(tag_example_tag_example3_OBJECTS) $(tag_example_tag_example3_DEPENDENCIES) $(EXTRA_tag_example_tag_example3_DEPENDENCIES) tag_example/$(am__dirstamp) + @rm -f tag_example/tag_example3$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(tag_example_tag_example3_OBJECTS) $(tag_example_tag_example3_LDADD) $(LIBS) +tag_example/tag_example4.$(OBJEXT): tag_example/$(am__dirstamp) \ + tag_example/$(DEPDIR)/$(am__dirstamp) + +tag_example/tag_example4$(EXEEXT): $(tag_example_tag_example4_OBJECTS) $(tag_example_tag_example4_DEPENDENCIES) $(EXTRA_tag_example_tag_example4_DEPENDENCIES) tag_example/$(am__dirstamp) + @rm -f tag_example/tag_example4$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(tag_example_tag_example4_OBJECTS) $(tag_example_tag_example4_LDADD) $(LIBS) +tag_example/tag_restartable.$(OBJEXT): tag_example/$(am__dirstamp) \ + tag_example/$(DEPDIR)/$(am__dirstamp) + +tag_example/tag_restartable$(EXEEXT): $(tag_example_tag_restartable_OBJECTS) $(tag_example_tag_restartable_DEPENDENCIES) $(EXTRA_tag_example_tag_restartable_DEPENDENCIES) tag_example/$(am__dirstamp) + @rm -f tag_example/tag_restartable$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(tag_example_tag_restartable_OBJECTS) $(tag_example_tag_restartable_LDADD) $(LIBS) +transactions/$(am__dirstamp): + @$(MKDIR_P) transactions + @: > transactions/$(am__dirstamp) +transactions/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) transactions/$(DEPDIR) + @: > transactions/$(DEPDIR)/$(am__dirstamp) +transactions/trs_inc.$(OBJEXT): transactions/$(am__dirstamp) \ + transactions/$(DEPDIR)/$(am__dirstamp) + +transactions/trs_inc$(EXEEXT): $(transactions_trs_inc_OBJECTS) $(transactions_trs_inc_DEPENDENCIES) $(EXTRA_transactions_trs_inc_DEPENDENCIES) transactions/$(am__dirstamp) + @rm -f transactions/trs_inc$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(transactions_trs_inc_OBJECTS) $(transactions_trs_inc_LDADD) $(LIBS) +transactions/trs_sgemm.$(OBJEXT): transactions/$(am__dirstamp) \ + transactions/$(DEPDIR)/$(am__dirstamp) + +transactions/trs_sgemm$(EXEEXT): $(transactions_trs_sgemm_OBJECTS) $(transactions_trs_sgemm_DEPENDENCIES) $(EXTRA_transactions_trs_sgemm_DEPENDENCIES) transactions/$(am__dirstamp) + @rm -f transactions/trs_sgemm$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(transactions_trs_sgemm_OBJECTS) $(transactions_trs_sgemm_LDADD) $(LIBS) +worker_collections/$(am__dirstamp): + @$(MKDIR_P) worker_collections + @: > worker_collections/$(am__dirstamp) +worker_collections/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) worker_collections/$(DEPDIR) + @: > worker_collections/$(DEPDIR)/$(am__dirstamp) +worker_collections/worker_list_example.$(OBJEXT): \ + worker_collections/$(am__dirstamp) \ + worker_collections/$(DEPDIR)/$(am__dirstamp) + +worker_collections/worker_list_example$(EXEEXT): $(worker_collections_worker_list_example_OBJECTS) $(worker_collections_worker_list_example_DEPENDENCIES) $(EXTRA_worker_collections_worker_list_example_DEPENDENCIES) worker_collections/$(am__dirstamp) + @rm -f worker_collections/worker_list_example$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(worker_collections_worker_list_example_OBJECTS) $(worker_collections_worker_list_example_LDADD) $(LIBS) +worker_collections/worker_tree_example.$(OBJEXT): \ + worker_collections/$(am__dirstamp) \ + worker_collections/$(DEPDIR)/$(am__dirstamp) + +worker_collections/worker_tree_example$(EXEEXT): $(worker_collections_worker_tree_example_OBJECTS) $(worker_collections_worker_tree_example_DEPENDENCIES) $(EXTRA_worker_collections_worker_tree_example_DEPENDENCIES) worker_collections/$(am__dirstamp) + @rm -f worker_collections/worker_tree_example$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(worker_collections_worker_tree_example_OBJECTS) $(worker_collections_worker_tree_example_LDADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + -rm -f api/*.$(OBJEXT) + -rm -f axpy/*.$(OBJEXT) + -rm -f basic_examples/*.$(OBJEXT) + -rm -f binary/*.$(OBJEXT) + -rm -f callback/*.$(OBJEXT) + -rm -f cg/*.$(OBJEXT) + -rm -f cholesky/*.$(OBJEXT) + -rm -f cholesky/*.lo + -rm -f common/*.$(OBJEXT) + -rm -f cpp/*.$(OBJEXT) + -rm -f dependency/*.$(OBJEXT) + -rm -f filters/*.$(OBJEXT) + -rm -f filters/custom_mf/*.$(OBJEXT) + -rm -f fortran/*.$(OBJEXT) + -rm -f fortran90/*.$(OBJEXT) + -rm -f gl_interop/*.$(OBJEXT) + -rm -f heat/*.$(OBJEXT) + -rm -f incrementer/*.$(OBJEXT) + -rm -f interface/*.$(OBJEXT) + -rm -f interface/complex_dev_handle/*.$(OBJEXT) + -rm -f lu/*.$(OBJEXT) + -rm -f mandelbrot/*.$(OBJEXT) + -rm -f matvecmult/*.$(OBJEXT) + -rm -f mlr/*.$(OBJEXT) + -rm -f mult/*.$(OBJEXT) + -rm -f native_fortran/*.$(OBJEXT) + -rm -f openmp/*.$(OBJEXT) + -rm -f parallel_workers/*.$(OBJEXT) + -rm -f perf_monitoring/*.$(OBJEXT) + -rm -f perf_steering/*.$(OBJEXT) + -rm -f pi/*.$(OBJEXT) + -rm -f pi/SobolQRNG/*.$(OBJEXT) + -rm -f pipeline/*.$(OBJEXT) + -rm -f ppm_downscaler/*.$(OBJEXT) + -rm -f profiling/*.$(OBJEXT) + -rm -f profiling_tool/*.$(OBJEXT) + -rm -f profiling_tool/*.lo + -rm -f reductions/*.$(OBJEXT) + -rm -f sched_ctx/*.$(OBJEXT) + -rm -f sched_ctx_utils/*.$(OBJEXT) + -rm -f scheduler/*.$(OBJEXT) + -rm -f scheduler/*.lo + -rm -f spmd/*.$(OBJEXT) + -rm -f spmv/*.$(OBJEXT) + -rm -f spmv/matrix_market/*.$(OBJEXT) + -rm -f subgraphs/*.$(OBJEXT) + -rm -f tag_example/*.$(OBJEXT) + -rm -f transactions/*.$(OBJEXT) + -rm -f worker_collections/*.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/loader-loader.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@api/$(DEPDIR)/bcsr_data_interface.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@api/$(DEPDIR)/block_data_interface.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@api/$(DEPDIR)/coo_data_interface.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@api/$(DEPDIR)/csr_data_interface.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@api/$(DEPDIR)/matrix_data_interface.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@api/$(DEPDIR)/multiformat_data_interface.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@api/$(DEPDIR)/tensor_data_interface.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@api/$(DEPDIR)/variable_data_interface.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@api/$(DEPDIR)/vector_data_interface.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@api/$(DEPDIR)/void_data_interface.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@axpy/$(DEPDIR)/axpy.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@axpy/$(DEPDIR)/axpy_opencl.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@basic_examples/$(DEPDIR)/block.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@basic_examples/$(DEPDIR)/block_cpu.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@basic_examples/$(DEPDIR)/block_opencl.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@basic_examples/$(DEPDIR)/dynamic_handles.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@basic_examples/$(DEPDIR)/hello_world.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@basic_examples/$(DEPDIR)/hooks.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@basic_examples/$(DEPDIR)/mult.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@basic_examples/$(DEPDIR)/multiformat.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@basic_examples/$(DEPDIR)/multiformat_conversion_codelets.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@basic_examples/$(DEPDIR)/multiformat_conversion_codelets_opencl.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@basic_examples/$(DEPDIR)/multiformat_opencl.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@basic_examples/$(DEPDIR)/ndim.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@basic_examples/$(DEPDIR)/task_insert_color.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@basic_examples/$(DEPDIR)/topology.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@basic_examples/$(DEPDIR)/variable.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@basic_examples/$(DEPDIR)/variable_kernels_cpu.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@basic_examples/$(DEPDIR)/variable_kernels_opencl.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@basic_examples/$(DEPDIR)/vector_scal.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@basic_examples/$(DEPDIR)/vector_scal_c.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@basic_examples/$(DEPDIR)/vector_scal_cpu.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@basic_examples/$(DEPDIR)/vector_scal_opencl.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@binary/$(DEPDIR)/binary.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@callback/$(DEPDIR)/callback.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@callback/$(DEPDIR)/prologue.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@cg/$(DEPDIR)/cg.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@cholesky/$(DEPDIR)/cholesky_compil.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@cholesky/$(DEPDIR)/cholesky_grain_tag.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@cholesky/$(DEPDIR)/cholesky_implicit.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@cholesky/$(DEPDIR)/cholesky_kernels.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@cholesky/$(DEPDIR)/cholesky_models.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@cholesky/$(DEPDIR)/cholesky_tag.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@cholesky/$(DEPDIR)/cholesky_tile_tag.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@cholesky/$(DEPDIR)/libmy_dmda.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@common/$(DEPDIR)/blas.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@cpp/$(DEPDIR)/add_vectors.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@cpp/$(DEPDIR)/add_vectors_cpp11.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@cpp/$(DEPDIR)/add_vectors_interface.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@cpp/$(DEPDIR)/incrementer_cpp.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@dependency/$(DEPDIR)/sequential_consistency.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@dependency/$(DEPDIR)/task_end_dep.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@dependency/$(DEPDIR)/task_end_dep_add.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/alloc.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/f3d_cpu.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/f4d_cpu.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/f5d_print.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/fblock.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/fblock_cpu.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/fblock_opencl.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/fblock_pick_matrix.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/fblock_pick_variable.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/fblock_print.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/fmatrix.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/fmatrix_cpu.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/fmatrix_pick_variable.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/fmatrix_pick_vector.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/fmatrix_print.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/fmultiple_manual.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/fmultiple_submit.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/fmultiple_submit_implicit.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/fmultiple_submit_readonly.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/fmultiple_submit_readonly_downgrade.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/fndim.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/fndim_1d_pick_variable.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/fndim_2d_pick_vector.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/fndim_3d_pick_matrix.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/fndim_4d_pick_block.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/fndim_5d_pick_tensor.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/fndim_pick_ndim.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/fndim_pick_variable.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/fndim_to_block.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/fndim_to_matrix.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/fndim_to_tensor.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/fndim_to_variable.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/fndim_to_vector.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/fread.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/frecursive.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/ftensor.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/ftensor_cpu.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/ftensor_pick_block.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/ftensor_pick_variable.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/ftensor_print.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/fvector.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/fvector_cpu.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/fvector_pick_variable.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/shadow.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/shadow2d.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/shadow3d.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/shadow4d.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/shadownd.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@filters/custom_mf/$(DEPDIR)/conversion_opencl.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@filters/custom_mf/$(DEPDIR)/custom_conversion_codelets.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@filters/custom_mf/$(DEPDIR)/custom_interface.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@filters/custom_mf/$(DEPDIR)/custom_mf_filter.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@filters/custom_mf/$(DEPDIR)/custom_opencl.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@fortran/$(DEPDIR)/hello_c.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@fortran90/$(DEPDIR)/marshalling.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@gl_interop/$(DEPDIR)/gl_interop.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@gl_interop/$(DEPDIR)/gl_interop_idle.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@heat/$(DEPDIR)/dw_factolu.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@heat/$(DEPDIR)/dw_factolu_grain.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@heat/$(DEPDIR)/dw_factolu_kernels.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@heat/$(DEPDIR)/dw_factolu_tag.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@heat/$(DEPDIR)/dw_sparse_cg.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@heat/$(DEPDIR)/dw_sparse_cg_kernels.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@heat/$(DEPDIR)/heat.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@heat/$(DEPDIR)/heat_display.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@heat/$(DEPDIR)/lu_kernels_model.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@incrementer/$(DEPDIR)/incrementer.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@incrementer/$(DEPDIR)/incrementer_kernels_opencl.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@interface/$(DEPDIR)/complex.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@interface/$(DEPDIR)/complex_filters.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@interface/$(DEPDIR)/complex_interface.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@interface/$(DEPDIR)/complex_kernels_opencl.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@interface/complex_dev_handle/$(DEPDIR)/complex_dev_handle.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@interface/complex_dev_handle/$(DEPDIR)/complex_dev_handle_filters.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@interface/complex_dev_handle/$(DEPDIR)/complex_dev_handle_interface.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@interface/complex_dev_handle/$(DEPDIR)/complex_dev_handle_kernels_opencl.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@lu/$(DEPDIR)/blas_complex.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@lu/$(DEPDIR)/clu.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@lu/$(DEPDIR)/clu_implicit.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@lu/$(DEPDIR)/clu_implicit_pivot.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@lu/$(DEPDIR)/clu_kernels.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@lu/$(DEPDIR)/clu_pivot.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@lu/$(DEPDIR)/dlu.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@lu/$(DEPDIR)/dlu_implicit.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@lu/$(DEPDIR)/dlu_implicit_pivot.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@lu/$(DEPDIR)/dlu_kernels.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@lu/$(DEPDIR)/dlu_pivot.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@lu/$(DEPDIR)/lu_example_complex_double.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@lu/$(DEPDIR)/lu_example_complex_float.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@lu/$(DEPDIR)/lu_example_double.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@lu/$(DEPDIR)/lu_example_float.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@lu/$(DEPDIR)/slu.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@lu/$(DEPDIR)/slu_implicit.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@lu/$(DEPDIR)/slu_implicit_pivot.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@lu/$(DEPDIR)/slu_kernels.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@lu/$(DEPDIR)/slu_pivot.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@lu/$(DEPDIR)/zlu.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@lu/$(DEPDIR)/zlu_implicit.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@lu/$(DEPDIR)/zlu_implicit_pivot.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@lu/$(DEPDIR)/zlu_kernels.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@lu/$(DEPDIR)/zlu_pivot.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@mandelbrot/$(DEPDIR)/mandelbrot-mandelbrot.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@matvecmult/$(DEPDIR)/matvecmult.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@mlr/$(DEPDIR)/mlr.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@mult/$(DEPDIR)/dgemm.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@mult/$(DEPDIR)/dgemm_layout.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@mult/$(DEPDIR)/sgemm.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@mult/$(DEPDIR)/sgemm_layout.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@openmp/$(DEPDIR)/vector_scal_omp-vector_scal_omp.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@parallel_workers/$(DEPDIR)/parallel_workers-parallel_workers.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@parallel_workers/$(DEPDIR)/parallel_workers_func-parallel_workers_func.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@parallel_workers/$(DEPDIR)/parallel_workers_oldapi-parallel_workers_oldapi.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@perf_monitoring/$(DEPDIR)/perf_counters_01.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@perf_monitoring/$(DEPDIR)/perf_counters_02.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@perf_steering/$(DEPDIR)/perf_knobs_01.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@perf_steering/$(DEPDIR)/perf_knobs_02.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@perf_steering/$(DEPDIR)/perf_knobs_03.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pi/$(DEPDIR)/pi.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pi/$(DEPDIR)/pi_redux.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pi/SobolQRNG/$(DEPDIR)/sobol_gold.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pi/SobolQRNG/$(DEPDIR)/sobol_primitives.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pipeline/$(DEPDIR)/pipeline.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@ppm_downscaler/$(DEPDIR)/ppm_downscaler.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@ppm_downscaler/$(DEPDIR)/yuv_downscaler.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@profiling/$(DEPDIR)/profiling.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@profiling_tool/$(DEPDIR)/libprofiling_tool.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@reductions/$(DEPDIR)/dot_product.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@reductions/$(DEPDIR)/minmax_reduction.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@sched_ctx/$(DEPDIR)/dummy_sched_with_ctx.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@sched_ctx/$(DEPDIR)/gpu_partition.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@sched_ctx/$(DEPDIR)/nested_sched_ctxs-nested_sched_ctxs.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@sched_ctx/$(DEPDIR)/parallel_code-parallel_code.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@sched_ctx/$(DEPDIR)/parallel_tasks_reuse_handle-parallel_tasks_reuse_handle.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@sched_ctx/$(DEPDIR)/prio.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@sched_ctx/$(DEPDIR)/sched_ctx.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@sched_ctx/$(DEPDIR)/sched_ctx_delete.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@sched_ctx/$(DEPDIR)/sched_ctx_empty.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@sched_ctx/$(DEPDIR)/sched_ctx_remove.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@sched_ctx/$(DEPDIR)/sched_ctx_without_sched_policy-sched_ctx_without_sched_policy.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@sched_ctx/$(DEPDIR)/sched_ctx_without_sched_policy_awake.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@sched_ctx/$(DEPDIR)/two_cpu_contexts.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@sched_ctx_utils/$(DEPDIR)/sched_ctx_utils.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@scheduler/$(DEPDIR)/dummy_modular_sched.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@scheduler/$(DEPDIR)/dummy_sched.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@scheduler/$(DEPDIR)/heteroprio_test.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@scheduler/$(DEPDIR)/libdummy_sched.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@spmd/$(DEPDIR)/vector_scal_spmd.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@spmv/$(DEPDIR)/dw_block_spmv.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@spmv/$(DEPDIR)/dw_block_spmv_kernels.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@spmv/$(DEPDIR)/spmv.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@spmv/$(DEPDIR)/spmv_kernels.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@spmv/matrix_market/$(DEPDIR)/mm_to_bcsr.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@spmv/matrix_market/$(DEPDIR)/mmio.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@subgraphs/$(DEPDIR)/codelets.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@subgraphs/$(DEPDIR)/manual.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@subgraphs/$(DEPDIR)/partition.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@subgraphs/$(DEPDIR)/plan.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@tag_example/$(DEPDIR)/tag_example.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@tag_example/$(DEPDIR)/tag_example2.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@tag_example/$(DEPDIR)/tag_example3.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@tag_example/$(DEPDIR)/tag_example4.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@tag_example/$(DEPDIR)/tag_restartable.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@transactions/$(DEPDIR)/trs_inc.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@transactions/$(DEPDIR)/trs_sgemm.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@worker_collections/$(DEPDIR)/worker_list_example.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@worker_collections/$(DEPDIR)/worker_tree_example.Po@am__quote@ # am--include-marker + +$(am__depfiles_remade): + @$(MKDIR_P) $(@D) + @echo '# dummy' >$@-t && $(am__mv) $@-t $@ + +am--depfiles: $(am__depfiles_remade) + +.F.o: + $(AM_V_PPF77)$(PPF77COMPILE) -c -o $@ $< + +.F.obj: + $(AM_V_PPF77)$(PPF77COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.F.lo: + $(AM_V_PPF77)$(LTPPF77COMPILE) -c -o $@ $< +.F.f: + $(F77COMPILE) -F $< + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ +@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +loader-loader.o: loader.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(loader_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT loader-loader.o -MD -MP -MF $(DEPDIR)/loader-loader.Tpo -c -o loader-loader.o `test -f 'loader.c' || echo '$(srcdir)/'`loader.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/loader-loader.Tpo $(DEPDIR)/loader-loader.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='loader.c' object='loader-loader.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(loader_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o loader-loader.o `test -f 'loader.c' || echo '$(srcdir)/'`loader.c + +loader-loader.obj: loader.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(loader_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT loader-loader.obj -MD -MP -MF $(DEPDIR)/loader-loader.Tpo -c -o loader-loader.obj `if test -f 'loader.c'; then $(CYGPATH_W) 'loader.c'; else $(CYGPATH_W) '$(srcdir)/loader.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/loader-loader.Tpo $(DEPDIR)/loader-loader.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='loader.c' object='loader-loader.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(loader_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o loader-loader.obj `if test -f 'loader.c'; then $(CYGPATH_W) 'loader.c'; else $(CYGPATH_W) '$(srcdir)/loader.c'; fi` + +mandelbrot/mandelbrot-mandelbrot.o: mandelbrot/mandelbrot.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(mandelbrot_mandelbrot_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT mandelbrot/mandelbrot-mandelbrot.o -MD -MP -MF mandelbrot/$(DEPDIR)/mandelbrot-mandelbrot.Tpo -c -o mandelbrot/mandelbrot-mandelbrot.o `test -f 'mandelbrot/mandelbrot.c' || echo '$(srcdir)/'`mandelbrot/mandelbrot.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) mandelbrot/$(DEPDIR)/mandelbrot-mandelbrot.Tpo mandelbrot/$(DEPDIR)/mandelbrot-mandelbrot.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='mandelbrot/mandelbrot.c' object='mandelbrot/mandelbrot-mandelbrot.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(mandelbrot_mandelbrot_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o mandelbrot/mandelbrot-mandelbrot.o `test -f 'mandelbrot/mandelbrot.c' || echo '$(srcdir)/'`mandelbrot/mandelbrot.c + +mandelbrot/mandelbrot-mandelbrot.obj: mandelbrot/mandelbrot.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(mandelbrot_mandelbrot_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT mandelbrot/mandelbrot-mandelbrot.obj -MD -MP -MF mandelbrot/$(DEPDIR)/mandelbrot-mandelbrot.Tpo -c -o mandelbrot/mandelbrot-mandelbrot.obj `if test -f 'mandelbrot/mandelbrot.c'; then $(CYGPATH_W) 'mandelbrot/mandelbrot.c'; else $(CYGPATH_W) '$(srcdir)/mandelbrot/mandelbrot.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) mandelbrot/$(DEPDIR)/mandelbrot-mandelbrot.Tpo mandelbrot/$(DEPDIR)/mandelbrot-mandelbrot.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='mandelbrot/mandelbrot.c' object='mandelbrot/mandelbrot-mandelbrot.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(mandelbrot_mandelbrot_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o mandelbrot/mandelbrot-mandelbrot.obj `if test -f 'mandelbrot/mandelbrot.c'; then $(CYGPATH_W) 'mandelbrot/mandelbrot.c'; else $(CYGPATH_W) '$(srcdir)/mandelbrot/mandelbrot.c'; fi` + +openmp/vector_scal_omp-vector_scal_omp.o: openmp/vector_scal_omp.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(openmp_vector_scal_omp_CFLAGS) $(CFLAGS) -MT openmp/vector_scal_omp-vector_scal_omp.o -MD -MP -MF openmp/$(DEPDIR)/vector_scal_omp-vector_scal_omp.Tpo -c -o openmp/vector_scal_omp-vector_scal_omp.o `test -f 'openmp/vector_scal_omp.c' || echo '$(srcdir)/'`openmp/vector_scal_omp.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) openmp/$(DEPDIR)/vector_scal_omp-vector_scal_omp.Tpo openmp/$(DEPDIR)/vector_scal_omp-vector_scal_omp.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='openmp/vector_scal_omp.c' object='openmp/vector_scal_omp-vector_scal_omp.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(openmp_vector_scal_omp_CFLAGS) $(CFLAGS) -c -o openmp/vector_scal_omp-vector_scal_omp.o `test -f 'openmp/vector_scal_omp.c' || echo '$(srcdir)/'`openmp/vector_scal_omp.c + +openmp/vector_scal_omp-vector_scal_omp.obj: openmp/vector_scal_omp.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(openmp_vector_scal_omp_CFLAGS) $(CFLAGS) -MT openmp/vector_scal_omp-vector_scal_omp.obj -MD -MP -MF openmp/$(DEPDIR)/vector_scal_omp-vector_scal_omp.Tpo -c -o openmp/vector_scal_omp-vector_scal_omp.obj `if test -f 'openmp/vector_scal_omp.c'; then $(CYGPATH_W) 'openmp/vector_scal_omp.c'; else $(CYGPATH_W) '$(srcdir)/openmp/vector_scal_omp.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) openmp/$(DEPDIR)/vector_scal_omp-vector_scal_omp.Tpo openmp/$(DEPDIR)/vector_scal_omp-vector_scal_omp.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='openmp/vector_scal_omp.c' object='openmp/vector_scal_omp-vector_scal_omp.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(openmp_vector_scal_omp_CFLAGS) $(CFLAGS) -c -o openmp/vector_scal_omp-vector_scal_omp.obj `if test -f 'openmp/vector_scal_omp.c'; then $(CYGPATH_W) 'openmp/vector_scal_omp.c'; else $(CYGPATH_W) '$(srcdir)/openmp/vector_scal_omp.c'; fi` + +parallel_workers/parallel_workers-parallel_workers.o: parallel_workers/parallel_workers.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(parallel_workers_parallel_workers_CFLAGS) $(CFLAGS) -MT parallel_workers/parallel_workers-parallel_workers.o -MD -MP -MF parallel_workers/$(DEPDIR)/parallel_workers-parallel_workers.Tpo -c -o parallel_workers/parallel_workers-parallel_workers.o `test -f 'parallel_workers/parallel_workers.c' || echo '$(srcdir)/'`parallel_workers/parallel_workers.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) parallel_workers/$(DEPDIR)/parallel_workers-parallel_workers.Tpo parallel_workers/$(DEPDIR)/parallel_workers-parallel_workers.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='parallel_workers/parallel_workers.c' object='parallel_workers/parallel_workers-parallel_workers.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(parallel_workers_parallel_workers_CFLAGS) $(CFLAGS) -c -o parallel_workers/parallel_workers-parallel_workers.o `test -f 'parallel_workers/parallel_workers.c' || echo '$(srcdir)/'`parallel_workers/parallel_workers.c + +parallel_workers/parallel_workers-parallel_workers.obj: parallel_workers/parallel_workers.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(parallel_workers_parallel_workers_CFLAGS) $(CFLAGS) -MT parallel_workers/parallel_workers-parallel_workers.obj -MD -MP -MF parallel_workers/$(DEPDIR)/parallel_workers-parallel_workers.Tpo -c -o parallel_workers/parallel_workers-parallel_workers.obj `if test -f 'parallel_workers/parallel_workers.c'; then $(CYGPATH_W) 'parallel_workers/parallel_workers.c'; else $(CYGPATH_W) '$(srcdir)/parallel_workers/parallel_workers.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) parallel_workers/$(DEPDIR)/parallel_workers-parallel_workers.Tpo parallel_workers/$(DEPDIR)/parallel_workers-parallel_workers.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='parallel_workers/parallel_workers.c' object='parallel_workers/parallel_workers-parallel_workers.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(parallel_workers_parallel_workers_CFLAGS) $(CFLAGS) -c -o parallel_workers/parallel_workers-parallel_workers.obj `if test -f 'parallel_workers/parallel_workers.c'; then $(CYGPATH_W) 'parallel_workers/parallel_workers.c'; else $(CYGPATH_W) '$(srcdir)/parallel_workers/parallel_workers.c'; fi` + +parallel_workers/parallel_workers_func-parallel_workers_func.o: parallel_workers/parallel_workers_func.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(parallel_workers_parallel_workers_func_CFLAGS) $(CFLAGS) -MT parallel_workers/parallel_workers_func-parallel_workers_func.o -MD -MP -MF parallel_workers/$(DEPDIR)/parallel_workers_func-parallel_workers_func.Tpo -c -o parallel_workers/parallel_workers_func-parallel_workers_func.o `test -f 'parallel_workers/parallel_workers_func.c' || echo '$(srcdir)/'`parallel_workers/parallel_workers_func.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) parallel_workers/$(DEPDIR)/parallel_workers_func-parallel_workers_func.Tpo parallel_workers/$(DEPDIR)/parallel_workers_func-parallel_workers_func.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='parallel_workers/parallel_workers_func.c' object='parallel_workers/parallel_workers_func-parallel_workers_func.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(parallel_workers_parallel_workers_func_CFLAGS) $(CFLAGS) -c -o parallel_workers/parallel_workers_func-parallel_workers_func.o `test -f 'parallel_workers/parallel_workers_func.c' || echo '$(srcdir)/'`parallel_workers/parallel_workers_func.c + +parallel_workers/parallel_workers_func-parallel_workers_func.obj: parallel_workers/parallel_workers_func.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(parallel_workers_parallel_workers_func_CFLAGS) $(CFLAGS) -MT parallel_workers/parallel_workers_func-parallel_workers_func.obj -MD -MP -MF parallel_workers/$(DEPDIR)/parallel_workers_func-parallel_workers_func.Tpo -c -o parallel_workers/parallel_workers_func-parallel_workers_func.obj `if test -f 'parallel_workers/parallel_workers_func.c'; then $(CYGPATH_W) 'parallel_workers/parallel_workers_func.c'; else $(CYGPATH_W) '$(srcdir)/parallel_workers/parallel_workers_func.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) parallel_workers/$(DEPDIR)/parallel_workers_func-parallel_workers_func.Tpo parallel_workers/$(DEPDIR)/parallel_workers_func-parallel_workers_func.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='parallel_workers/parallel_workers_func.c' object='parallel_workers/parallel_workers_func-parallel_workers_func.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(parallel_workers_parallel_workers_func_CFLAGS) $(CFLAGS) -c -o parallel_workers/parallel_workers_func-parallel_workers_func.obj `if test -f 'parallel_workers/parallel_workers_func.c'; then $(CYGPATH_W) 'parallel_workers/parallel_workers_func.c'; else $(CYGPATH_W) '$(srcdir)/parallel_workers/parallel_workers_func.c'; fi` + +parallel_workers/parallel_workers_oldapi-parallel_workers_oldapi.o: parallel_workers/parallel_workers_oldapi.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(parallel_workers_parallel_workers_oldapi_CFLAGS) $(CFLAGS) -MT parallel_workers/parallel_workers_oldapi-parallel_workers_oldapi.o -MD -MP -MF parallel_workers/$(DEPDIR)/parallel_workers_oldapi-parallel_workers_oldapi.Tpo -c -o parallel_workers/parallel_workers_oldapi-parallel_workers_oldapi.o `test -f 'parallel_workers/parallel_workers_oldapi.c' || echo '$(srcdir)/'`parallel_workers/parallel_workers_oldapi.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) parallel_workers/$(DEPDIR)/parallel_workers_oldapi-parallel_workers_oldapi.Tpo parallel_workers/$(DEPDIR)/parallel_workers_oldapi-parallel_workers_oldapi.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='parallel_workers/parallel_workers_oldapi.c' object='parallel_workers/parallel_workers_oldapi-parallel_workers_oldapi.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(parallel_workers_parallel_workers_oldapi_CFLAGS) $(CFLAGS) -c -o parallel_workers/parallel_workers_oldapi-parallel_workers_oldapi.o `test -f 'parallel_workers/parallel_workers_oldapi.c' || echo '$(srcdir)/'`parallel_workers/parallel_workers_oldapi.c + +parallel_workers/parallel_workers_oldapi-parallel_workers_oldapi.obj: parallel_workers/parallel_workers_oldapi.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(parallel_workers_parallel_workers_oldapi_CFLAGS) $(CFLAGS) -MT parallel_workers/parallel_workers_oldapi-parallel_workers_oldapi.obj -MD -MP -MF parallel_workers/$(DEPDIR)/parallel_workers_oldapi-parallel_workers_oldapi.Tpo -c -o parallel_workers/parallel_workers_oldapi-parallel_workers_oldapi.obj `if test -f 'parallel_workers/parallel_workers_oldapi.c'; then $(CYGPATH_W) 'parallel_workers/parallel_workers_oldapi.c'; else $(CYGPATH_W) '$(srcdir)/parallel_workers/parallel_workers_oldapi.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) parallel_workers/$(DEPDIR)/parallel_workers_oldapi-parallel_workers_oldapi.Tpo parallel_workers/$(DEPDIR)/parallel_workers_oldapi-parallel_workers_oldapi.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='parallel_workers/parallel_workers_oldapi.c' object='parallel_workers/parallel_workers_oldapi-parallel_workers_oldapi.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(parallel_workers_parallel_workers_oldapi_CFLAGS) $(CFLAGS) -c -o parallel_workers/parallel_workers_oldapi-parallel_workers_oldapi.obj `if test -f 'parallel_workers/parallel_workers_oldapi.c'; then $(CYGPATH_W) 'parallel_workers/parallel_workers_oldapi.c'; else $(CYGPATH_W) '$(srcdir)/parallel_workers/parallel_workers_oldapi.c'; fi` + +sched_ctx/nested_sched_ctxs-nested_sched_ctxs.o: sched_ctx/nested_sched_ctxs.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(sched_ctx_nested_sched_ctxs_CFLAGS) $(CFLAGS) -MT sched_ctx/nested_sched_ctxs-nested_sched_ctxs.o -MD -MP -MF sched_ctx/$(DEPDIR)/nested_sched_ctxs-nested_sched_ctxs.Tpo -c -o sched_ctx/nested_sched_ctxs-nested_sched_ctxs.o `test -f 'sched_ctx/nested_sched_ctxs.c' || echo '$(srcdir)/'`sched_ctx/nested_sched_ctxs.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) sched_ctx/$(DEPDIR)/nested_sched_ctxs-nested_sched_ctxs.Tpo sched_ctx/$(DEPDIR)/nested_sched_ctxs-nested_sched_ctxs.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='sched_ctx/nested_sched_ctxs.c' object='sched_ctx/nested_sched_ctxs-nested_sched_ctxs.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(sched_ctx_nested_sched_ctxs_CFLAGS) $(CFLAGS) -c -o sched_ctx/nested_sched_ctxs-nested_sched_ctxs.o `test -f 'sched_ctx/nested_sched_ctxs.c' || echo '$(srcdir)/'`sched_ctx/nested_sched_ctxs.c + +sched_ctx/nested_sched_ctxs-nested_sched_ctxs.obj: sched_ctx/nested_sched_ctxs.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(sched_ctx_nested_sched_ctxs_CFLAGS) $(CFLAGS) -MT sched_ctx/nested_sched_ctxs-nested_sched_ctxs.obj -MD -MP -MF sched_ctx/$(DEPDIR)/nested_sched_ctxs-nested_sched_ctxs.Tpo -c -o sched_ctx/nested_sched_ctxs-nested_sched_ctxs.obj `if test -f 'sched_ctx/nested_sched_ctxs.c'; then $(CYGPATH_W) 'sched_ctx/nested_sched_ctxs.c'; else $(CYGPATH_W) '$(srcdir)/sched_ctx/nested_sched_ctxs.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) sched_ctx/$(DEPDIR)/nested_sched_ctxs-nested_sched_ctxs.Tpo sched_ctx/$(DEPDIR)/nested_sched_ctxs-nested_sched_ctxs.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='sched_ctx/nested_sched_ctxs.c' object='sched_ctx/nested_sched_ctxs-nested_sched_ctxs.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(sched_ctx_nested_sched_ctxs_CFLAGS) $(CFLAGS) -c -o sched_ctx/nested_sched_ctxs-nested_sched_ctxs.obj `if test -f 'sched_ctx/nested_sched_ctxs.c'; then $(CYGPATH_W) 'sched_ctx/nested_sched_ctxs.c'; else $(CYGPATH_W) '$(srcdir)/sched_ctx/nested_sched_ctxs.c'; fi` + +sched_ctx/parallel_code-parallel_code.o: sched_ctx/parallel_code.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(sched_ctx_parallel_code_CFLAGS) $(CFLAGS) -MT sched_ctx/parallel_code-parallel_code.o -MD -MP -MF sched_ctx/$(DEPDIR)/parallel_code-parallel_code.Tpo -c -o sched_ctx/parallel_code-parallel_code.o `test -f 'sched_ctx/parallel_code.c' || echo '$(srcdir)/'`sched_ctx/parallel_code.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) sched_ctx/$(DEPDIR)/parallel_code-parallel_code.Tpo sched_ctx/$(DEPDIR)/parallel_code-parallel_code.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='sched_ctx/parallel_code.c' object='sched_ctx/parallel_code-parallel_code.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(sched_ctx_parallel_code_CFLAGS) $(CFLAGS) -c -o sched_ctx/parallel_code-parallel_code.o `test -f 'sched_ctx/parallel_code.c' || echo '$(srcdir)/'`sched_ctx/parallel_code.c + +sched_ctx/parallel_code-parallel_code.obj: sched_ctx/parallel_code.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(sched_ctx_parallel_code_CFLAGS) $(CFLAGS) -MT sched_ctx/parallel_code-parallel_code.obj -MD -MP -MF sched_ctx/$(DEPDIR)/parallel_code-parallel_code.Tpo -c -o sched_ctx/parallel_code-parallel_code.obj `if test -f 'sched_ctx/parallel_code.c'; then $(CYGPATH_W) 'sched_ctx/parallel_code.c'; else $(CYGPATH_W) '$(srcdir)/sched_ctx/parallel_code.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) sched_ctx/$(DEPDIR)/parallel_code-parallel_code.Tpo sched_ctx/$(DEPDIR)/parallel_code-parallel_code.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='sched_ctx/parallel_code.c' object='sched_ctx/parallel_code-parallel_code.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(sched_ctx_parallel_code_CFLAGS) $(CFLAGS) -c -o sched_ctx/parallel_code-parallel_code.obj `if test -f 'sched_ctx/parallel_code.c'; then $(CYGPATH_W) 'sched_ctx/parallel_code.c'; else $(CYGPATH_W) '$(srcdir)/sched_ctx/parallel_code.c'; fi` + +sched_ctx/parallel_tasks_reuse_handle-parallel_tasks_reuse_handle.o: sched_ctx/parallel_tasks_reuse_handle.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(sched_ctx_parallel_tasks_reuse_handle_CFLAGS) $(CFLAGS) -MT sched_ctx/parallel_tasks_reuse_handle-parallel_tasks_reuse_handle.o -MD -MP -MF sched_ctx/$(DEPDIR)/parallel_tasks_reuse_handle-parallel_tasks_reuse_handle.Tpo -c -o sched_ctx/parallel_tasks_reuse_handle-parallel_tasks_reuse_handle.o `test -f 'sched_ctx/parallel_tasks_reuse_handle.c' || echo '$(srcdir)/'`sched_ctx/parallel_tasks_reuse_handle.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) sched_ctx/$(DEPDIR)/parallel_tasks_reuse_handle-parallel_tasks_reuse_handle.Tpo sched_ctx/$(DEPDIR)/parallel_tasks_reuse_handle-parallel_tasks_reuse_handle.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='sched_ctx/parallel_tasks_reuse_handle.c' object='sched_ctx/parallel_tasks_reuse_handle-parallel_tasks_reuse_handle.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(sched_ctx_parallel_tasks_reuse_handle_CFLAGS) $(CFLAGS) -c -o sched_ctx/parallel_tasks_reuse_handle-parallel_tasks_reuse_handle.o `test -f 'sched_ctx/parallel_tasks_reuse_handle.c' || echo '$(srcdir)/'`sched_ctx/parallel_tasks_reuse_handle.c + +sched_ctx/parallel_tasks_reuse_handle-parallel_tasks_reuse_handle.obj: sched_ctx/parallel_tasks_reuse_handle.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(sched_ctx_parallel_tasks_reuse_handle_CFLAGS) $(CFLAGS) -MT sched_ctx/parallel_tasks_reuse_handle-parallel_tasks_reuse_handle.obj -MD -MP -MF sched_ctx/$(DEPDIR)/parallel_tasks_reuse_handle-parallel_tasks_reuse_handle.Tpo -c -o sched_ctx/parallel_tasks_reuse_handle-parallel_tasks_reuse_handle.obj `if test -f 'sched_ctx/parallel_tasks_reuse_handle.c'; then $(CYGPATH_W) 'sched_ctx/parallel_tasks_reuse_handle.c'; else $(CYGPATH_W) '$(srcdir)/sched_ctx/parallel_tasks_reuse_handle.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) sched_ctx/$(DEPDIR)/parallel_tasks_reuse_handle-parallel_tasks_reuse_handle.Tpo sched_ctx/$(DEPDIR)/parallel_tasks_reuse_handle-parallel_tasks_reuse_handle.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='sched_ctx/parallel_tasks_reuse_handle.c' object='sched_ctx/parallel_tasks_reuse_handle-parallel_tasks_reuse_handle.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(sched_ctx_parallel_tasks_reuse_handle_CFLAGS) $(CFLAGS) -c -o sched_ctx/parallel_tasks_reuse_handle-parallel_tasks_reuse_handle.obj `if test -f 'sched_ctx/parallel_tasks_reuse_handle.c'; then $(CYGPATH_W) 'sched_ctx/parallel_tasks_reuse_handle.c'; else $(CYGPATH_W) '$(srcdir)/sched_ctx/parallel_tasks_reuse_handle.c'; fi` + +sched_ctx/sched_ctx_without_sched_policy-sched_ctx_without_sched_policy.o: sched_ctx/sched_ctx_without_sched_policy.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(sched_ctx_sched_ctx_without_sched_policy_CFLAGS) $(CFLAGS) -MT sched_ctx/sched_ctx_without_sched_policy-sched_ctx_without_sched_policy.o -MD -MP -MF sched_ctx/$(DEPDIR)/sched_ctx_without_sched_policy-sched_ctx_without_sched_policy.Tpo -c -o sched_ctx/sched_ctx_without_sched_policy-sched_ctx_without_sched_policy.o `test -f 'sched_ctx/sched_ctx_without_sched_policy.c' || echo '$(srcdir)/'`sched_ctx/sched_ctx_without_sched_policy.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) sched_ctx/$(DEPDIR)/sched_ctx_without_sched_policy-sched_ctx_without_sched_policy.Tpo sched_ctx/$(DEPDIR)/sched_ctx_without_sched_policy-sched_ctx_without_sched_policy.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='sched_ctx/sched_ctx_without_sched_policy.c' object='sched_ctx/sched_ctx_without_sched_policy-sched_ctx_without_sched_policy.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(sched_ctx_sched_ctx_without_sched_policy_CFLAGS) $(CFLAGS) -c -o sched_ctx/sched_ctx_without_sched_policy-sched_ctx_without_sched_policy.o `test -f 'sched_ctx/sched_ctx_without_sched_policy.c' || echo '$(srcdir)/'`sched_ctx/sched_ctx_without_sched_policy.c + +sched_ctx/sched_ctx_without_sched_policy-sched_ctx_without_sched_policy.obj: sched_ctx/sched_ctx_without_sched_policy.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(sched_ctx_sched_ctx_without_sched_policy_CFLAGS) $(CFLAGS) -MT sched_ctx/sched_ctx_without_sched_policy-sched_ctx_without_sched_policy.obj -MD -MP -MF sched_ctx/$(DEPDIR)/sched_ctx_without_sched_policy-sched_ctx_without_sched_policy.Tpo -c -o sched_ctx/sched_ctx_without_sched_policy-sched_ctx_without_sched_policy.obj `if test -f 'sched_ctx/sched_ctx_without_sched_policy.c'; then $(CYGPATH_W) 'sched_ctx/sched_ctx_without_sched_policy.c'; else $(CYGPATH_W) '$(srcdir)/sched_ctx/sched_ctx_without_sched_policy.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) sched_ctx/$(DEPDIR)/sched_ctx_without_sched_policy-sched_ctx_without_sched_policy.Tpo sched_ctx/$(DEPDIR)/sched_ctx_without_sched_policy-sched_ctx_without_sched_policy.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='sched_ctx/sched_ctx_without_sched_policy.c' object='sched_ctx/sched_ctx_without_sched_policy-sched_ctx_without_sched_policy.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(sched_ctx_sched_ctx_without_sched_policy_CFLAGS) $(CFLAGS) -c -o sched_ctx/sched_ctx_without_sched_policy-sched_ctx_without_sched_policy.obj `if test -f 'sched_ctx/sched_ctx_without_sched_policy.c'; then $(CYGPATH_W) 'sched_ctx/sched_ctx_without_sched_policy.c'; else $(CYGPATH_W) '$(srcdir)/sched_ctx/sched_ctx_without_sched_policy.c'; fi` + +.cpp.o: +@am__fastdepCXX_TRUE@ $(AM_V_CXX)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ +@am__fastdepCXX_TRUE@ $(CXXCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCXX_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXXCOMPILE) -c -o $@ $< + +.cpp.obj: +@am__fastdepCXX_TRUE@ $(AM_V_CXX)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ +@am__fastdepCXX_TRUE@ $(CXXCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ +@am__fastdepCXX_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXXCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.cpp.lo: +@am__fastdepCXX_TRUE@ $(AM_V_CXX)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ +@am__fastdepCXX_TRUE@ $(LTCXXCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCXX_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(LTCXXCOMPILE) -c -o $@ $< + +.f90.o: + $(AM_V_FC)$(FCCOMPILE) -c -o $@ $< + +.f90.obj: + $(AM_V_FC)$(FCCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.f90.lo: + $(AM_V_FC)$(LTFCCOMPILE) -c -o $@ $< + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + -rm -rf api/.libs api/_libs + -rm -rf axpy/.libs axpy/_libs + -rm -rf basic_examples/.libs basic_examples/_libs + -rm -rf binary/.libs binary/_libs + -rm -rf callback/.libs callback/_libs + -rm -rf cg/.libs cg/_libs + -rm -rf cholesky/.libs cholesky/_libs + -rm -rf cpp/.libs cpp/_libs + -rm -rf dependency/.libs dependency/_libs + -rm -rf filters/.libs filters/_libs + -rm -rf filters/custom_mf/.libs filters/custom_mf/_libs + -rm -rf fortran/.libs fortran/_libs + -rm -rf fortran90/.libs fortran90/_libs + -rm -rf gl_interop/.libs gl_interop/_libs + -rm -rf heat/.libs heat/_libs + -rm -rf incrementer/.libs incrementer/_libs + -rm -rf interface/.libs interface/_libs + -rm -rf interface/complex_dev_handle/.libs interface/complex_dev_handle/_libs + -rm -rf lu/.libs lu/_libs + -rm -rf mandelbrot/.libs mandelbrot/_libs + -rm -rf matvecmult/.libs matvecmult/_libs + -rm -rf mlr/.libs mlr/_libs + -rm -rf mult/.libs mult/_libs + -rm -rf native_fortran/.libs native_fortran/_libs + -rm -rf openmp/.libs openmp/_libs + -rm -rf parallel_workers/.libs parallel_workers/_libs + -rm -rf perf_monitoring/.libs perf_monitoring/_libs + -rm -rf perf_steering/.libs perf_steering/_libs + -rm -rf pi/.libs pi/_libs + -rm -rf pipeline/.libs pipeline/_libs + -rm -rf ppm_downscaler/.libs ppm_downscaler/_libs + -rm -rf profiling/.libs profiling/_libs + -rm -rf profiling_tool/.libs profiling_tool/_libs + -rm -rf reductions/.libs reductions/_libs + -rm -rf sched_ctx/.libs sched_ctx/_libs + -rm -rf scheduler/.libs scheduler/_libs + -rm -rf spmd/.libs spmd/_libs + -rm -rf spmv/.libs spmv/_libs + -rm -rf subgraphs/.libs subgraphs/_libs + -rm -rf tag_example/.libs tag_example/_libs + -rm -rf transactions/.libs transactions/_libs + -rm -rf worker_collections/.libs worker_collections/_libs +install-nobase_STARPU_OPENCL_DATADATA: $(nobase_STARPU_OPENCL_DATA_DATA) + @$(NORMAL_INSTALL) + @list='$(nobase_STARPU_OPENCL_DATA_DATA)'; test -n "$(STARPU_OPENCL_DATAdir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(STARPU_OPENCL_DATAdir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(STARPU_OPENCL_DATAdir)" || exit 1; \ + fi; \ + $(am__nobase_list) | while read dir files; do \ + xfiles=; for file in $$files; do \ + if test -f "$$file"; then xfiles="$$xfiles $$file"; \ + else xfiles="$$xfiles $(srcdir)/$$file"; fi; done; \ + test -z "$$xfiles" || { \ + test "x$$dir" = x. || { \ + echo " $(MKDIR_P) '$(DESTDIR)$(STARPU_OPENCL_DATAdir)/$$dir'"; \ + $(MKDIR_P) "$(DESTDIR)$(STARPU_OPENCL_DATAdir)/$$dir"; }; \ + echo " $(INSTALL_DATA) $$xfiles '$(DESTDIR)$(STARPU_OPENCL_DATAdir)/$$dir'"; \ + $(INSTALL_DATA) $$xfiles "$(DESTDIR)$(STARPU_OPENCL_DATAdir)/$$dir" || exit $$?; }; \ + done + +uninstall-nobase_STARPU_OPENCL_DATADATA: + @$(NORMAL_UNINSTALL) + @list='$(nobase_STARPU_OPENCL_DATA_DATA)'; test -n "$(STARPU_OPENCL_DATAdir)" || list=; \ + $(am__nobase_strip_setup); files=`$(am__nobase_strip)`; \ + dir='$(DESTDIR)$(STARPU_OPENCL_DATAdir)'; $(am__uninstall_files_from_dir) + +# This directory's subdirectories are mostly independent; you can cd +# into them and run 'make' without going through this Makefile. +# To change the values of 'make' variables: instead of editing Makefiles, +# (1) if the variable is set in 'config.status', edit 'config.status' +# (which will cause the Makefiles to be regenerated when you run 'make'); +# (2) otherwise, pass the desired values on the 'make' command line. +$(am__recursive_targets): + @fail=; \ + if $(am__make_keepgoing); then \ + failcom='fail=yes'; \ + else \ + failcom='exit 1'; \ + fi; \ + dot_seen=no; \ + target=`echo $@ | sed s/-recursive//`; \ + case "$@" in \ + distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ + *) list='$(SUBDIRS)' ;; \ + esac; \ + for subdir in $$list; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + dot_seen=yes; \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || eval $$failcom; \ + done; \ + if test "$$dot_seen" = "no"; then \ + $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ + fi; test -z "$$fail" + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-recursive +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ + include_option=--etags-include; \ + empty_fix=.; \ + else \ + include_option=--include; \ + empty_fix=; \ + fi; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + test ! -f $$subdir/TAGS || \ + set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ + fi; \ + done; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-recursive + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-recursive + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +# Recover from deleted '.trs' file; this should ensure that +# "rm -f foo.log; make foo.trs" re-run 'foo.test', and re-create +# both 'foo.log' and 'foo.trs'. Break the recipe in two subshells +# to avoid problems with "make -n". +.log.trs: + rm -f $< $@ + $(MAKE) $(AM_MAKEFLAGS) $< + +# Leading 'am--fnord' is there to ensure the list of targets does not +# expand to empty, as could happen e.g. with make check TESTS=''. +am--fnord $(TEST_LOGS) $(TEST_LOGS:.log=.trs): $(am__force_recheck) +am--force-recheck: + @: + +$(TEST_SUITE_LOG): $(TEST_LOGS) + @$(am__set_TESTS_bases); \ + am__f_ok () { test -f "$$1" && test -r "$$1"; }; \ + redo_bases=`for i in $$bases; do \ + am__f_ok $$i.trs && am__f_ok $$i.log || echo $$i; \ + done`; \ + if test -n "$$redo_bases"; then \ + redo_logs=`for i in $$redo_bases; do echo $$i.log; done`; \ + redo_results=`for i in $$redo_bases; do echo $$i.trs; done`; \ + if $(am__make_dryrun); then :; else \ + rm -f $$redo_logs && rm -f $$redo_results || exit 1; \ + fi; \ + fi; \ + if test -n "$$am__remaking_logs"; then \ + echo "fatal: making $(TEST_SUITE_LOG): possible infinite" \ + "recursion detected" >&2; \ + elif test -n "$$redo_logs"; then \ + am__remaking_logs=yes $(MAKE) $(AM_MAKEFLAGS) $$redo_logs; \ + fi; \ + if $(am__make_dryrun); then :; else \ + st=0; \ + errmsg="fatal: making $(TEST_SUITE_LOG): failed to create"; \ + for i in $$redo_bases; do \ + test -f $$i.trs && test -r $$i.trs \ + || { echo "$$errmsg $$i.trs" >&2; st=1; }; \ + test -f $$i.log && test -r $$i.log \ + || { echo "$$errmsg $$i.log" >&2; st=1; }; \ + done; \ + test $$st -eq 0 || exit 1; \ + fi + @$(am__sh_e_setup); $(am__tty_colors); $(am__set_TESTS_bases); \ + ws='[ ]'; \ + results=`for b in $$bases; do echo $$b.trs; done`; \ + test -n "$$results" || results=/dev/null; \ + all=` grep "^$$ws*:test-result:" $$results | wc -l`; \ + pass=` grep "^$$ws*:test-result:$$ws*PASS" $$results | wc -l`; \ + fail=` grep "^$$ws*:test-result:$$ws*FAIL" $$results | wc -l`; \ + skip=` grep "^$$ws*:test-result:$$ws*SKIP" $$results | wc -l`; \ + xfail=`grep "^$$ws*:test-result:$$ws*XFAIL" $$results | wc -l`; \ + xpass=`grep "^$$ws*:test-result:$$ws*XPASS" $$results | wc -l`; \ + error=`grep "^$$ws*:test-result:$$ws*ERROR" $$results | wc -l`; \ + if test `expr $$fail + $$xpass + $$error` -eq 0; then \ + success=true; \ + else \ + success=false; \ + fi; \ + br='==================='; br=$$br$$br$$br$$br; \ + result_count () \ + { \ + if test x"$$1" = x"--maybe-color"; then \ + maybe_colorize=yes; \ + elif test x"$$1" = x"--no-color"; then \ + maybe_colorize=no; \ + else \ + echo "$@: invalid 'result_count' usage" >&2; exit 4; \ + fi; \ + shift; \ + desc=$$1 count=$$2; \ + if test $$maybe_colorize = yes && test $$count -gt 0; then \ + color_start=$$3 color_end=$$std; \ + else \ + color_start= color_end=; \ + fi; \ + echo "$${color_start}# $$desc $$count$${color_end}"; \ + }; \ + create_testsuite_report () \ + { \ + result_count $$1 "TOTAL:" $$all "$$brg"; \ + result_count $$1 "PASS: " $$pass "$$grn"; \ + result_count $$1 "SKIP: " $$skip "$$blu"; \ + result_count $$1 "XFAIL:" $$xfail "$$lgn"; \ + result_count $$1 "FAIL: " $$fail "$$red"; \ + result_count $$1 "XPASS:" $$xpass "$$red"; \ + result_count $$1 "ERROR:" $$error "$$mgn"; \ + }; \ + { \ + echo "$(PACKAGE_STRING): $(subdir)/$(TEST_SUITE_LOG)" | \ + $(am__rst_title); \ + create_testsuite_report --no-color; \ + echo; \ + echo ".. contents:: :depth: 2"; \ + echo; \ + for b in $$bases; do echo $$b; done \ + | $(am__create_global_log); \ + } >$(TEST_SUITE_LOG).tmp || exit 1; \ + mv $(TEST_SUITE_LOG).tmp $(TEST_SUITE_LOG); \ + if $$success; then \ + col="$$grn"; \ + else \ + col="$$red"; \ + test x"$$VERBOSE" = x || cat $(TEST_SUITE_LOG); \ + fi; \ + echo "$${col}$$br$${std}"; \ + echo "$${col}Testsuite summary"$(AM_TESTSUITE_SUMMARY_HEADER)"$${std}"; \ + echo "$${col}$$br$${std}"; \ + create_testsuite_report --maybe-color; \ + echo "$$col$$br$$std"; \ + if $$success; then :; else \ + echo "$${col}See $(subdir)/$(TEST_SUITE_LOG)$${std}"; \ + if test -n "$(PACKAGE_BUGREPORT)"; then \ + echo "$${col}Please report to $(PACKAGE_BUGREPORT)$${std}"; \ + fi; \ + echo "$$col$$br$$std"; \ + fi; \ + $$success || exit 1 + +check-TESTS: $(check_PROGRAMS) + @list='$(RECHECK_LOGS)'; test -z "$$list" || rm -f $$list + @list='$(RECHECK_LOGS:.log=.trs)'; test -z "$$list" || rm -f $$list + @test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) + @set +e; $(am__set_TESTS_bases); \ + log_list=`for i in $$bases; do echo $$i.log; done`; \ + trs_list=`for i in $$bases; do echo $$i.trs; done`; \ + log_list=`echo $$log_list`; trs_list=`echo $$trs_list`; \ + $(MAKE) $(AM_MAKEFLAGS) $(TEST_SUITE_LOG) TEST_LOGS="$$log_list"; \ + exit $$?; +recheck: all $(check_PROGRAMS) + @test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) + @set +e; $(am__set_TESTS_bases); \ + bases=`for i in $$bases; do echo $$i; done \ + | $(am__list_recheck_tests)` || exit 1; \ + log_list=`for i in $$bases; do echo $$i.log; done`; \ + log_list=`echo $$log_list`; \ + $(MAKE) $(AM_MAKEFLAGS) $(TEST_SUITE_LOG) \ + am__force_recheck=am--force-recheck \ + TEST_LOGS="$$log_list"; \ + exit $$? +scheduler/schedulers.sh.log: scheduler/schedulers.sh + @p='scheduler/schedulers.sh'; \ + b='scheduler/schedulers.sh'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +scheduler/schedulers_context.sh.log: scheduler/schedulers_context.sh + @p='scheduler/schedulers_context.sh'; \ + b='scheduler/schedulers_context.sh'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +mult/sgemm.sh.log: mult/sgemm.sh + @p='mult/sgemm.sh'; \ + b='mult/sgemm.sh'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +profiling_tool/prof.sh.log: profiling_tool/prof.sh + @p='profiling_tool/prof.sh'; \ + b='profiling_tool/prof.sh'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +scheduler/libdummy_sched.sh.log: scheduler/libdummy_sched.sh + @p='scheduler/libdummy_sched.sh'; \ + b='scheduler/libdummy_sched.sh'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +heat/heat.sh.log: heat/heat.sh + @p='heat/heat.sh'; \ + b='heat/heat.sh'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +lu/lu.sh.log: lu/lu.sh + @p='lu/lu.sh'; \ + b='lu/lu.sh'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +cholesky/cholesky.sh.log: cholesky/cholesky.sh + @p='cholesky/cholesky.sh'; \ + b='cholesky/cholesky.sh'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +cholesky/cholesky_julia.sh.log: cholesky/cholesky_julia.sh + @p='cholesky/cholesky_julia.sh'; \ + b='cholesky/cholesky_julia.sh'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +sched_ctx/prio.log: sched_ctx/prio$(EXEEXT) + @p='sched_ctx/prio$(EXEEXT)'; \ + b='sched_ctx/prio'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +scheduler/dummy_sched.log: scheduler/dummy_sched$(EXEEXT) + @p='scheduler/dummy_sched$(EXEEXT)'; \ + b='scheduler/dummy_sched'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +scheduler/dummy_modular_sched.log: scheduler/dummy_modular_sched$(EXEEXT) + @p='scheduler/dummy_modular_sched$(EXEEXT)'; \ + b='scheduler/dummy_modular_sched'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +worker_collections/worker_list_example.log: worker_collections/worker_list_example$(EXEEXT) + @p='worker_collections/worker_list_example$(EXEEXT)'; \ + b='worker_collections/worker_list_example'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +api/bcsr_data_interface.log: api/bcsr_data_interface$(EXEEXT) + @p='api/bcsr_data_interface$(EXEEXT)'; \ + b='api/bcsr_data_interface'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +api/block_data_interface.log: api/block_data_interface$(EXEEXT) + @p='api/block_data_interface$(EXEEXT)'; \ + b='api/block_data_interface'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +api/coo_data_interface.log: api/coo_data_interface$(EXEEXT) + @p='api/coo_data_interface$(EXEEXT)'; \ + b='api/coo_data_interface'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +api/csr_data_interface.log: api/csr_data_interface$(EXEEXT) + @p='api/csr_data_interface$(EXEEXT)'; \ + b='api/csr_data_interface'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +api/matrix_data_interface.log: api/matrix_data_interface$(EXEEXT) + @p='api/matrix_data_interface$(EXEEXT)'; \ + b='api/matrix_data_interface'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +api/multiformat_data_interface.log: api/multiformat_data_interface$(EXEEXT) + @p='api/multiformat_data_interface$(EXEEXT)'; \ + b='api/multiformat_data_interface'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +api/tensor_data_interface.log: api/tensor_data_interface$(EXEEXT) + @p='api/tensor_data_interface$(EXEEXT)'; \ + b='api/tensor_data_interface'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +api/variable_data_interface.log: api/variable_data_interface$(EXEEXT) + @p='api/variable_data_interface$(EXEEXT)'; \ + b='api/variable_data_interface'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +api/vector_data_interface.log: api/vector_data_interface$(EXEEXT) + @p='api/vector_data_interface$(EXEEXT)'; \ + b='api/vector_data_interface'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +api/void_data_interface.log: api/void_data_interface$(EXEEXT) + @p='api/void_data_interface$(EXEEXT)'; \ + b='api/void_data_interface'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +basic_examples/hello_world.log: basic_examples/hello_world$(EXEEXT) + @p='basic_examples/hello_world$(EXEEXT)'; \ + b='basic_examples/hello_world'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +basic_examples/hooks.log: basic_examples/hooks$(EXEEXT) + @p='basic_examples/hooks$(EXEEXT)'; \ + b='basic_examples/hooks'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +basic_examples/topology.log: basic_examples/topology$(EXEEXT) + @p='basic_examples/topology$(EXEEXT)'; \ + b='basic_examples/topology'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +basic_examples/vector_scal.log: basic_examples/vector_scal$(EXEEXT) + @p='basic_examples/vector_scal$(EXEEXT)'; \ + b='basic_examples/vector_scal'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +basic_examples/mult.log: basic_examples/mult$(EXEEXT) + @p='basic_examples/mult$(EXEEXT)'; \ + b='basic_examples/mult'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +basic_examples/block.log: basic_examples/block$(EXEEXT) + @p='basic_examples/block$(EXEEXT)'; \ + b='basic_examples/block'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +basic_examples/variable.log: basic_examples/variable$(EXEEXT) + @p='basic_examples/variable$(EXEEXT)'; \ + b='basic_examples/variable'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +basic_examples/multiformat.log: basic_examples/multiformat$(EXEEXT) + @p='basic_examples/multiformat$(EXEEXT)'; \ + b='basic_examples/multiformat'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +basic_examples/dynamic_handles.log: basic_examples/dynamic_handles$(EXEEXT) + @p='basic_examples/dynamic_handles$(EXEEXT)'; \ + b='basic_examples/dynamic_handles'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +basic_examples/task_insert_color.log: basic_examples/task_insert_color$(EXEEXT) + @p='basic_examples/task_insert_color$(EXEEXT)'; \ + b='basic_examples/task_insert_color'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +basic_examples/ndim.log: basic_examples/ndim$(EXEEXT) + @p='basic_examples/ndim$(EXEEXT)'; \ + b='basic_examples/ndim'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +mlr/mlr.log: mlr/mlr$(EXEEXT) + @p='mlr/mlr$(EXEEXT)'; \ + b='mlr/mlr'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +cpp/incrementer_cpp.log: cpp/incrementer_cpp$(EXEEXT) + @p='cpp/incrementer_cpp$(EXEEXT)'; \ + b='cpp/incrementer_cpp'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +cpp/add_vectors.log: cpp/add_vectors$(EXEEXT) + @p='cpp/add_vectors$(EXEEXT)'; \ + b='cpp/add_vectors'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +cpp/add_vectors_interface.log: cpp/add_vectors_interface$(EXEEXT) + @p='cpp/add_vectors_interface$(EXEEXT)'; \ + b='cpp/add_vectors_interface'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +filters/alloc.log: filters/alloc$(EXEEXT) + @p='filters/alloc$(EXEEXT)'; \ + b='filters/alloc'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +filters/fread.log: filters/fread$(EXEEXT) + @p='filters/fread$(EXEEXT)'; \ + b='filters/fread'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +filters/fvector.log: filters/fvector$(EXEEXT) + @p='filters/fvector$(EXEEXT)'; \ + b='filters/fvector'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +filters/fvector_pick_variable.log: filters/fvector_pick_variable$(EXEEXT) + @p='filters/fvector_pick_variable$(EXEEXT)'; \ + b='filters/fvector_pick_variable'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +filters/ftensor.log: filters/ftensor$(EXEEXT) + @p='filters/ftensor$(EXEEXT)'; \ + b='filters/ftensor'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +filters/ftensor_pick_block.log: filters/ftensor_pick_block$(EXEEXT) + @p='filters/ftensor_pick_block$(EXEEXT)'; \ + b='filters/ftensor_pick_block'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +filters/ftensor_pick_variable.log: filters/ftensor_pick_variable$(EXEEXT) + @p='filters/ftensor_pick_variable$(EXEEXT)'; \ + b='filters/ftensor_pick_variable'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +filters/fblock.log: filters/fblock$(EXEEXT) + @p='filters/fblock$(EXEEXT)'; \ + b='filters/fblock'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +filters/fblock_pick_matrix.log: filters/fblock_pick_matrix$(EXEEXT) + @p='filters/fblock_pick_matrix$(EXEEXT)'; \ + b='filters/fblock_pick_matrix'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +filters/fblock_pick_variable.log: filters/fblock_pick_variable$(EXEEXT) + @p='filters/fblock_pick_variable$(EXEEXT)'; \ + b='filters/fblock_pick_variable'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +filters/fmatrix.log: filters/fmatrix$(EXEEXT) + @p='filters/fmatrix$(EXEEXT)'; \ + b='filters/fmatrix'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +filters/fmatrix_pick_vector.log: filters/fmatrix_pick_vector$(EXEEXT) + @p='filters/fmatrix_pick_vector$(EXEEXT)'; \ + b='filters/fmatrix_pick_vector'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +filters/fmatrix_pick_variable.log: filters/fmatrix_pick_variable$(EXEEXT) + @p='filters/fmatrix_pick_variable$(EXEEXT)'; \ + b='filters/fmatrix_pick_variable'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +filters/fndim.log: filters/fndim$(EXEEXT) + @p='filters/fndim$(EXEEXT)'; \ + b='filters/fndim'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +filters/fndim_pick_ndim.log: filters/fndim_pick_ndim$(EXEEXT) + @p='filters/fndim_pick_ndim$(EXEEXT)'; \ + b='filters/fndim_pick_ndim'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +filters/fndim_5d_pick_tensor.log: filters/fndim_5d_pick_tensor$(EXEEXT) + @p='filters/fndim_5d_pick_tensor$(EXEEXT)'; \ + b='filters/fndim_5d_pick_tensor'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +filters/fndim_4d_pick_block.log: filters/fndim_4d_pick_block$(EXEEXT) + @p='filters/fndim_4d_pick_block$(EXEEXT)'; \ + b='filters/fndim_4d_pick_block'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +filters/fndim_3d_pick_matrix.log: filters/fndim_3d_pick_matrix$(EXEEXT) + @p='filters/fndim_3d_pick_matrix$(EXEEXT)'; \ + b='filters/fndim_3d_pick_matrix'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +filters/fndim_2d_pick_vector.log: filters/fndim_2d_pick_vector$(EXEEXT) + @p='filters/fndim_2d_pick_vector$(EXEEXT)'; \ + b='filters/fndim_2d_pick_vector'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +filters/fndim_1d_pick_variable.log: filters/fndim_1d_pick_variable$(EXEEXT) + @p='filters/fndim_1d_pick_variable$(EXEEXT)'; \ + b='filters/fndim_1d_pick_variable'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +filters/fndim_pick_variable.log: filters/fndim_pick_variable$(EXEEXT) + @p='filters/fndim_pick_variable$(EXEEXT)'; \ + b='filters/fndim_pick_variable'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +filters/fndim_to_tensor.log: filters/fndim_to_tensor$(EXEEXT) + @p='filters/fndim_to_tensor$(EXEEXT)'; \ + b='filters/fndim_to_tensor'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +filters/fndim_to_block.log: filters/fndim_to_block$(EXEEXT) + @p='filters/fndim_to_block$(EXEEXT)'; \ + b='filters/fndim_to_block'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +filters/fndim_to_matrix.log: filters/fndim_to_matrix$(EXEEXT) + @p='filters/fndim_to_matrix$(EXEEXT)'; \ + b='filters/fndim_to_matrix'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +filters/fndim_to_vector.log: filters/fndim_to_vector$(EXEEXT) + @p='filters/fndim_to_vector$(EXEEXT)'; \ + b='filters/fndim_to_vector'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +filters/fndim_to_variable.log: filters/fndim_to_variable$(EXEEXT) + @p='filters/fndim_to_variable$(EXEEXT)'; \ + b='filters/fndim_to_variable'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +filters/fmultiple_manual.log: filters/fmultiple_manual$(EXEEXT) + @p='filters/fmultiple_manual$(EXEEXT)'; \ + b='filters/fmultiple_manual'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +filters/fmultiple_submit.log: filters/fmultiple_submit$(EXEEXT) + @p='filters/fmultiple_submit$(EXEEXT)'; \ + b='filters/fmultiple_submit'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +filters/fmultiple_submit_readonly.log: filters/fmultiple_submit_readonly$(EXEEXT) + @p='filters/fmultiple_submit_readonly$(EXEEXT)'; \ + b='filters/fmultiple_submit_readonly'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +filters/fmultiple_submit_readonly_downgrade.log: filters/fmultiple_submit_readonly_downgrade$(EXEEXT) + @p='filters/fmultiple_submit_readonly_downgrade$(EXEEXT)'; \ + b='filters/fmultiple_submit_readonly_downgrade'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +filters/fmultiple_submit_implicit.log: filters/fmultiple_submit_implicit$(EXEEXT) + @p='filters/fmultiple_submit_implicit$(EXEEXT)'; \ + b='filters/fmultiple_submit_implicit'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +filters/frecursive.log: filters/frecursive$(EXEEXT) + @p='filters/frecursive$(EXEEXT)'; \ + b='filters/frecursive'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +filters/shadow.log: filters/shadow$(EXEEXT) + @p='filters/shadow$(EXEEXT)'; \ + b='filters/shadow'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +filters/shadow2d.log: filters/shadow2d$(EXEEXT) + @p='filters/shadow2d$(EXEEXT)'; \ + b='filters/shadow2d'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +filters/shadow3d.log: filters/shadow3d$(EXEEXT) + @p='filters/shadow3d$(EXEEXT)'; \ + b='filters/shadow3d'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +filters/shadow4d.log: filters/shadow4d$(EXEEXT) + @p='filters/shadow4d$(EXEEXT)'; \ + b='filters/shadow4d'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +filters/shadownd.log: filters/shadownd$(EXEEXT) + @p='filters/shadownd$(EXEEXT)'; \ + b='filters/shadownd'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +tag_example/tag_example.log: tag_example/tag_example$(EXEEXT) + @p='tag_example/tag_example$(EXEEXT)'; \ + b='tag_example/tag_example'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +tag_example/tag_example2.log: tag_example/tag_example2$(EXEEXT) + @p='tag_example/tag_example2$(EXEEXT)'; \ + b='tag_example/tag_example2'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +tag_example/tag_example3.log: tag_example/tag_example3$(EXEEXT) + @p='tag_example/tag_example3$(EXEEXT)'; \ + b='tag_example/tag_example3'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +tag_example/tag_example4.log: tag_example/tag_example4$(EXEEXT) + @p='tag_example/tag_example4$(EXEEXT)'; \ + b='tag_example/tag_example4'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +tag_example/tag_restartable.log: tag_example/tag_restartable$(EXEEXT) + @p='tag_example/tag_restartable$(EXEEXT)'; \ + b='tag_example/tag_restartable'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +transactions/trs_inc.log: transactions/trs_inc$(EXEEXT) + @p='transactions/trs_inc$(EXEEXT)'; \ + b='transactions/trs_inc'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +spmd/vector_scal_spmd.log: spmd/vector_scal_spmd$(EXEEXT) + @p='spmd/vector_scal_spmd$(EXEEXT)'; \ + b='spmd/vector_scal_spmd'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +spmv/spmv.log: spmv/spmv$(EXEEXT) + @p='spmv/spmv$(EXEEXT)'; \ + b='spmv/spmv'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +callback/callback.log: callback/callback$(EXEEXT) + @p='callback/callback$(EXEEXT)'; \ + b='callback/callback'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +callback/prologue.log: callback/prologue$(EXEEXT) + @p='callback/prologue$(EXEEXT)'; \ + b='callback/prologue'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +incrementer/incrementer.log: incrementer/incrementer$(EXEEXT) + @p='incrementer/incrementer$(EXEEXT)'; \ + b='incrementer/incrementer'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +binary/binary.log: binary/binary$(EXEEXT) + @p='binary/binary$(EXEEXT)'; \ + b='binary/binary'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +interface/complex.log: interface/complex$(EXEEXT) + @p='interface/complex$(EXEEXT)'; \ + b='interface/complex'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +interface/complex_dev_handle/complex_dev_handle.log: interface/complex_dev_handle/complex_dev_handle$(EXEEXT) + @p='interface/complex_dev_handle/complex_dev_handle$(EXEEXT)'; \ + b='interface/complex_dev_handle/complex_dev_handle'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +matvecmult/matvecmult.log: matvecmult/matvecmult$(EXEEXT) + @p='matvecmult/matvecmult$(EXEEXT)'; \ + b='matvecmult/matvecmult'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +profiling/profiling.log: profiling/profiling$(EXEEXT) + @p='profiling/profiling$(EXEEXT)'; \ + b='profiling/profiling'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +perf_monitoring/perf_counters_01.log: perf_monitoring/perf_counters_01$(EXEEXT) + @p='perf_monitoring/perf_counters_01$(EXEEXT)'; \ + b='perf_monitoring/perf_counters_01'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +perf_monitoring/perf_counters_02.log: perf_monitoring/perf_counters_02$(EXEEXT) + @p='perf_monitoring/perf_counters_02$(EXEEXT)'; \ + b='perf_monitoring/perf_counters_02'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +perf_steering/perf_knobs_01.log: perf_steering/perf_knobs_01$(EXEEXT) + @p='perf_steering/perf_knobs_01$(EXEEXT)'; \ + b='perf_steering/perf_knobs_01'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +perf_steering/perf_knobs_02.log: perf_steering/perf_knobs_02$(EXEEXT) + @p='perf_steering/perf_knobs_02$(EXEEXT)'; \ + b='perf_steering/perf_knobs_02'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +perf_steering/perf_knobs_03.log: perf_steering/perf_knobs_03$(EXEEXT) + @p='perf_steering/perf_knobs_03$(EXEEXT)'; \ + b='perf_steering/perf_knobs_03'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +scheduler/heteroprio_test.log: scheduler/heteroprio_test$(EXEEXT) + @p='scheduler/heteroprio_test$(EXEEXT)'; \ + b='scheduler/heteroprio_test'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +sched_ctx/sched_ctx.log: sched_ctx/sched_ctx$(EXEEXT) + @p='sched_ctx/sched_ctx$(EXEEXT)'; \ + b='sched_ctx/sched_ctx'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +sched_ctx/sched_ctx_empty.log: sched_ctx/sched_ctx_empty$(EXEEXT) + @p='sched_ctx/sched_ctx_empty$(EXEEXT)'; \ + b='sched_ctx/sched_ctx_empty'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +sched_ctx/sched_ctx_remove.log: sched_ctx/sched_ctx_remove$(EXEEXT) + @p='sched_ctx/sched_ctx_remove$(EXEEXT)'; \ + b='sched_ctx/sched_ctx_remove'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +sched_ctx/sched_ctx_delete.log: sched_ctx/sched_ctx_delete$(EXEEXT) + @p='sched_ctx/sched_ctx_delete$(EXEEXT)'; \ + b='sched_ctx/sched_ctx_delete'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +sched_ctx/two_cpu_contexts.log: sched_ctx/two_cpu_contexts$(EXEEXT) + @p='sched_ctx/two_cpu_contexts$(EXEEXT)'; \ + b='sched_ctx/two_cpu_contexts'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +sched_ctx/dummy_sched_with_ctx.log: sched_ctx/dummy_sched_with_ctx$(EXEEXT) + @p='sched_ctx/dummy_sched_with_ctx$(EXEEXT)'; \ + b='sched_ctx/dummy_sched_with_ctx'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +worker_collections/worker_tree_example.log: worker_collections/worker_tree_example$(EXEEXT) + @p='worker_collections/worker_tree_example$(EXEEXT)'; \ + b='worker_collections/worker_tree_example'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +reductions/dot_product.log: reductions/dot_product$(EXEEXT) + @p='reductions/dot_product$(EXEEXT)'; \ + b='reductions/dot_product'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +reductions/minmax_reduction.log: reductions/minmax_reduction$(EXEEXT) + @p='reductions/minmax_reduction$(EXEEXT)'; \ + b='reductions/minmax_reduction'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +dependency/task_end_dep.log: dependency/task_end_dep$(EXEEXT) + @p='dependency/task_end_dep$(EXEEXT)'; \ + b='dependency/task_end_dep'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +dependency/task_end_dep_add.log: dependency/task_end_dep_add$(EXEEXT) + @p='dependency/task_end_dep_add$(EXEEXT)'; \ + b='dependency/task_end_dep_add'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +dependency/sequential_consistency.log: dependency/sequential_consistency$(EXEEXT) + @p='dependency/sequential_consistency$(EXEEXT)'; \ + b='dependency/sequential_consistency'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +subgraphs/manual.log: subgraphs/manual$(EXEEXT) + @p='subgraphs/manual$(EXEEXT)'; \ + b='subgraphs/manual'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +subgraphs/partition.log: subgraphs/partition$(EXEEXT) + @p='subgraphs/partition$(EXEEXT)'; \ + b='subgraphs/partition'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +subgraphs/plan.log: subgraphs/plan$(EXEEXT) + @p='subgraphs/plan$(EXEEXT)'; \ + b='subgraphs/plan'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +cpp/add_vectors_cpp11.log: cpp/add_vectors_cpp11$(EXEEXT) + @p='cpp/add_vectors_cpp11$(EXEEXT)'; \ + b='cpp/add_vectors_cpp11'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +fortran/hello.log: fortran/hello$(EXEEXT) + @p='fortran/hello$(EXEEXT)'; \ + b='fortran/hello'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +basic_examples/vector_scal_fortran.log: basic_examples/vector_scal_fortran$(EXEEXT) + @p='basic_examples/vector_scal_fortran$(EXEEXT)'; \ + b='basic_examples/vector_scal_fortran'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +fortran90/f90_example.log: fortran90/f90_example$(EXEEXT) + @p='fortran90/f90_example$(EXEEXT)'; \ + b='fortran90/f90_example'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +native_fortran/nf_vector.log: native_fortran/nf_vector$(EXEEXT) + @p='native_fortran/nf_vector$(EXEEXT)'; \ + b='native_fortran/nf_vector'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +native_fortran/nf_matrix.log: native_fortran/nf_matrix$(EXEEXT) + @p='native_fortran/nf_matrix$(EXEEXT)'; \ + b='native_fortran/nf_matrix'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +native_fortran/nf_example.log: native_fortran/nf_example$(EXEEXT) + @p='native_fortran/nf_example$(EXEEXT)'; \ + b='native_fortran/nf_example'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +native_fortran/nf_dynbuf.log: native_fortran/nf_dynbuf$(EXEEXT) + @p='native_fortran/nf_dynbuf$(EXEEXT)'; \ + b='native_fortran/nf_dynbuf'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +native_fortran/nf_varbuf.log: native_fortran/nf_varbuf$(EXEEXT) + @p='native_fortran/nf_varbuf$(EXEEXT)'; \ + b='native_fortran/nf_varbuf'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +native_fortran/nf_sched_ctx.log: native_fortran/nf_sched_ctx$(EXEEXT) + @p='native_fortran/nf_sched_ctx$(EXEEXT)'; \ + b='native_fortran/nf_sched_ctx'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +native_fortran/nf_partition.log: native_fortran/nf_partition$(EXEEXT) + @p='native_fortran/nf_partition$(EXEEXT)'; \ + b='native_fortran/nf_partition'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +mult/sgemm.log: mult/sgemm$(EXEEXT) + @p='mult/sgemm$(EXEEXT)'; \ + b='mult/sgemm'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +mult/dgemm.log: mult/dgemm$(EXEEXT) + @p='mult/dgemm$(EXEEXT)'; \ + b='mult/dgemm'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +lu/lu_example_float.log: lu/lu_example_float$(EXEEXT) + @p='lu/lu_example_float$(EXEEXT)'; \ + b='lu/lu_example_float'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +lu/lu_example_double.log: lu/lu_example_double$(EXEEXT) + @p='lu/lu_example_double$(EXEEXT)'; \ + b='lu/lu_example_double'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +lu/lu_implicit_example_float.log: lu/lu_implicit_example_float$(EXEEXT) + @p='lu/lu_implicit_example_float$(EXEEXT)'; \ + b='lu/lu_implicit_example_float'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +lu/lu_implicit_example_double.log: lu/lu_implicit_example_double$(EXEEXT) + @p='lu/lu_implicit_example_double$(EXEEXT)'; \ + b='lu/lu_implicit_example_double'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +cholesky/cholesky_tag.log: cholesky/cholesky_tag$(EXEEXT) + @p='cholesky/cholesky_tag$(EXEEXT)'; \ + b='cholesky/cholesky_tag'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +cholesky/cholesky_tile_tag.log: cholesky/cholesky_tile_tag$(EXEEXT) + @p='cholesky/cholesky_tile_tag$(EXEEXT)'; \ + b='cholesky/cholesky_tile_tag'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +cholesky/cholesky_implicit.log: cholesky/cholesky_implicit$(EXEEXT) + @p='cholesky/cholesky_implicit$(EXEEXT)'; \ + b='cholesky/cholesky_implicit'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +cholesky/cholesky_compil.log: cholesky/cholesky_compil$(EXEEXT) + @p='cholesky/cholesky_compil$(EXEEXT)'; \ + b='cholesky/cholesky_compil'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +axpy/axpy.log: axpy/axpy$(EXEEXT) + @p='axpy/axpy$(EXEEXT)'; \ + b='axpy/axpy'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +cholesky/cholesky_grain_tag.log: cholesky/cholesky_grain_tag$(EXEEXT) + @p='cholesky/cholesky_grain_tag$(EXEEXT)'; \ + b='cholesky/cholesky_grain_tag'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +heat/heat.log: heat/heat$(EXEEXT) + @p='heat/heat$(EXEEXT)'; \ + b='heat/heat'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +cg/cg.log: cg/cg$(EXEEXT) + @p='cg/cg$(EXEEXT)'; \ + b='cg/cg'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +pipeline/pipeline.log: pipeline/pipeline$(EXEEXT) + @p='pipeline/pipeline$(EXEEXT)'; \ + b='pipeline/pipeline'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +transactions/trs_sgemm.log: transactions/trs_sgemm$(EXEEXT) + @p='transactions/trs_sgemm$(EXEEXT)'; \ + b='transactions/trs_sgemm'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +lu/lu_example_complex_float.log: lu/lu_example_complex_float$(EXEEXT) + @p='lu/lu_example_complex_float$(EXEEXT)'; \ + b='lu/lu_example_complex_float'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +lu/lu_example_complex_double.log: lu/lu_example_complex_double$(EXEEXT) + @p='lu/lu_example_complex_double$(EXEEXT)'; \ + b='lu/lu_example_complex_double'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +lu/lu_implicit_example_complex_float.log: lu/lu_implicit_example_complex_float$(EXEEXT) + @p='lu/lu_implicit_example_complex_float$(EXEEXT)'; \ + b='lu/lu_implicit_example_complex_float'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +lu/lu_implicit_example_complex_double.log: lu/lu_implicit_example_complex_double$(EXEEXT) + @p='lu/lu_implicit_example_complex_double$(EXEEXT)'; \ + b='lu/lu_implicit_example_complex_double'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +spmv/dw_block_spmv.log: spmv/dw_block_spmv$(EXEEXT) + @p='spmv/dw_block_spmv$(EXEEXT)'; \ + b='spmv/dw_block_spmv'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +openmp/vector_scal_omp.log: openmp/vector_scal_omp$(EXEEXT) + @p='openmp/vector_scal_omp$(EXEEXT)'; \ + b='openmp/vector_scal_omp'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +sched_ctx/sched_ctx_without_sched_policy.log: sched_ctx/sched_ctx_without_sched_policy$(EXEEXT) + @p='sched_ctx/sched_ctx_without_sched_policy$(EXEEXT)'; \ + b='sched_ctx/sched_ctx_without_sched_policy'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +sched_ctx/nested_sched_ctxs.log: sched_ctx/nested_sched_ctxs$(EXEEXT) + @p='sched_ctx/nested_sched_ctxs$(EXEEXT)'; \ + b='sched_ctx/nested_sched_ctxs'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +sched_ctx/sched_ctx_without_sched_policy_awake.log: sched_ctx/sched_ctx_without_sched_policy_awake$(EXEEXT) + @p='sched_ctx/sched_ctx_without_sched_policy_awake$(EXEEXT)'; \ + b='sched_ctx/sched_ctx_without_sched_policy_awake'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +sched_ctx/parallel_tasks_reuse_handle.log: sched_ctx/parallel_tasks_reuse_handle$(EXEEXT) + @p='sched_ctx/parallel_tasks_reuse_handle$(EXEEXT)'; \ + b='sched_ctx/parallel_tasks_reuse_handle'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +sched_ctx/parallel_code.log: sched_ctx/parallel_code$(EXEEXT) + @p='sched_ctx/parallel_code$(EXEEXT)'; \ + b='sched_ctx/parallel_code'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +parallel_workers/parallel_workers.log: parallel_workers/parallel_workers$(EXEEXT) + @p='parallel_workers/parallel_workers$(EXEEXT)'; \ + b='parallel_workers/parallel_workers'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +parallel_workers/parallel_workers_func.log: parallel_workers/parallel_workers_func$(EXEEXT) + @p='parallel_workers/parallel_workers_func$(EXEEXT)'; \ + b='parallel_workers/parallel_workers_func'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +parallel_workers/parallel_workers_oldapi.log: parallel_workers/parallel_workers_oldapi$(EXEEXT) + @p='parallel_workers/parallel_workers_oldapi$(EXEEXT)'; \ + b='parallel_workers/parallel_workers_oldapi'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +sched_ctx/gpu_partition.log: sched_ctx/gpu_partition$(EXEEXT) + @p='sched_ctx/gpu_partition$(EXEEXT)'; \ + b='sched_ctx/gpu_partition'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +.test.log: + @p='$<'; \ + $(am__set_b); \ + $(am__check_pre) $(TEST_LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_TEST_LOG_DRIVER_FLAGS) $(TEST_LOG_DRIVER_FLAGS) -- $(TEST_LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +@am__EXEEXT_TRUE@.test$(EXEEXT).log: +@am__EXEEXT_TRUE@ @p='$<'; \ +@am__EXEEXT_TRUE@ $(am__set_b); \ +@am__EXEEXT_TRUE@ $(am__check_pre) $(TEST_LOG_DRIVER) --test-name "$$f" \ +@am__EXEEXT_TRUE@ --log-file $$b.log --trs-file $$b.trs \ +@am__EXEEXT_TRUE@ $(am__common_driver_flags) $(AM_TEST_LOG_DRIVER_FLAGS) $(TEST_LOG_DRIVER_FLAGS) -- $(TEST_LOG_COMPILE) \ +@am__EXEEXT_TRUE@ "$$tst" $(AM_TESTS_FD_REDIRECT) +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done + @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + $(am__make_dryrun) \ + || test -d "$(distdir)/$$subdir" \ + || $(MKDIR_P) "$(distdir)/$$subdir" \ + || exit 1; \ + dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ + $(am__relativize); \ + new_distdir=$$reldir; \ + dir1=$$subdir; dir2="$(top_distdir)"; \ + $(am__relativize); \ + new_top_distdir=$$reldir; \ + echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ + echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ + ($(am__cd) $$subdir && \ + $(MAKE) $(AM_MAKEFLAGS) \ + top_distdir="$$new_top_distdir" \ + distdir="$$new_distdir" \ + am__remove_distdir=: \ + am__skip_length_check=: \ + am__skip_mode_fix=: \ + distdir) \ + || exit 1; \ + fi; \ + done +check-am: all-am + $(MAKE) $(AM_MAKEFLAGS) $(check_PROGRAMS) + $(MAKE) $(AM_MAKEFLAGS) check-TESTS +check: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) check-recursive +all-am: Makefile $(PROGRAMS) $(LTLIBRARIES) $(DATA) $(HEADERS) +installdirs: installdirs-recursive +installdirs-am: + for dir in "$(DESTDIR)$(examplebindir)" "$(DESTDIR)$(pkglibdir)" "$(DESTDIR)$(STARPU_OPENCL_DATAdir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) install-recursive +install-exec: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) install-exec-recursive +install-data: install-data-recursive +uninstall: uninstall-recursive + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-recursive +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + -test -z "$(TEST_LOGS)" || rm -f $(TEST_LOGS) + -test -z "$(TEST_LOGS:.log=.trs)" || rm -f $(TEST_LOGS:.log=.trs) + -test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) + +clean-generic: + -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + -rm -f api/$(DEPDIR)/$(am__dirstamp) + -rm -f api/$(am__dirstamp) + -rm -f axpy/$(DEPDIR)/$(am__dirstamp) + -rm -f axpy/$(am__dirstamp) + -rm -f basic_examples/$(DEPDIR)/$(am__dirstamp) + -rm -f basic_examples/$(am__dirstamp) + -rm -f binary/$(DEPDIR)/$(am__dirstamp) + -rm -f binary/$(am__dirstamp) + -rm -f callback/$(DEPDIR)/$(am__dirstamp) + -rm -f callback/$(am__dirstamp) + -rm -f cg/$(DEPDIR)/$(am__dirstamp) + -rm -f cg/$(am__dirstamp) + -rm -f cholesky/$(DEPDIR)/$(am__dirstamp) + -rm -f cholesky/$(am__dirstamp) + -rm -f common/$(DEPDIR)/$(am__dirstamp) + -rm -f common/$(am__dirstamp) + -rm -f cpp/$(DEPDIR)/$(am__dirstamp) + -rm -f cpp/$(am__dirstamp) + -rm -f dependency/$(DEPDIR)/$(am__dirstamp) + -rm -f dependency/$(am__dirstamp) + -rm -f filters/$(DEPDIR)/$(am__dirstamp) + -rm -f filters/$(am__dirstamp) + -rm -f filters/custom_mf/$(DEPDIR)/$(am__dirstamp) + -rm -f filters/custom_mf/$(am__dirstamp) + -rm -f fortran/$(DEPDIR)/$(am__dirstamp) + -rm -f fortran/$(am__dirstamp) + -rm -f fortran90/$(DEPDIR)/$(am__dirstamp) + -rm -f fortran90/$(am__dirstamp) + -rm -f gl_interop/$(DEPDIR)/$(am__dirstamp) + -rm -f gl_interop/$(am__dirstamp) + -rm -f heat/$(DEPDIR)/$(am__dirstamp) + -rm -f heat/$(am__dirstamp) + -rm -f incrementer/$(DEPDIR)/$(am__dirstamp) + -rm -f incrementer/$(am__dirstamp) + -rm -f interface/$(DEPDIR)/$(am__dirstamp) + -rm -f interface/$(am__dirstamp) + -rm -f interface/complex_dev_handle/$(DEPDIR)/$(am__dirstamp) + -rm -f interface/complex_dev_handle/$(am__dirstamp) + -rm -f lu/$(DEPDIR)/$(am__dirstamp) + -rm -f lu/$(am__dirstamp) + -rm -f mandelbrot/$(DEPDIR)/$(am__dirstamp) + -rm -f mandelbrot/$(am__dirstamp) + -rm -f matvecmult/$(DEPDIR)/$(am__dirstamp) + -rm -f matvecmult/$(am__dirstamp) + -rm -f mlr/$(DEPDIR)/$(am__dirstamp) + -rm -f mlr/$(am__dirstamp) + -rm -f mult/$(DEPDIR)/$(am__dirstamp) + -rm -f mult/$(am__dirstamp) + -rm -f native_fortran/$(DEPDIR)/$(am__dirstamp) + -rm -f native_fortran/$(am__dirstamp) + -rm -f openmp/$(DEPDIR)/$(am__dirstamp) + -rm -f openmp/$(am__dirstamp) + -rm -f parallel_workers/$(DEPDIR)/$(am__dirstamp) + -rm -f parallel_workers/$(am__dirstamp) + -rm -f perf_monitoring/$(DEPDIR)/$(am__dirstamp) + -rm -f perf_monitoring/$(am__dirstamp) + -rm -f perf_steering/$(DEPDIR)/$(am__dirstamp) + -rm -f perf_steering/$(am__dirstamp) + -rm -f pi/$(DEPDIR)/$(am__dirstamp) + -rm -f pi/$(am__dirstamp) + -rm -f pi/SobolQRNG/$(DEPDIR)/$(am__dirstamp) + -rm -f pi/SobolQRNG/$(am__dirstamp) + -rm -f pipeline/$(DEPDIR)/$(am__dirstamp) + -rm -f pipeline/$(am__dirstamp) + -rm -f ppm_downscaler/$(DEPDIR)/$(am__dirstamp) + -rm -f ppm_downscaler/$(am__dirstamp) + -rm -f profiling/$(DEPDIR)/$(am__dirstamp) + -rm -f profiling/$(am__dirstamp) + -rm -f profiling_tool/$(DEPDIR)/$(am__dirstamp) + -rm -f profiling_tool/$(am__dirstamp) + -rm -f reductions/$(DEPDIR)/$(am__dirstamp) + -rm -f reductions/$(am__dirstamp) + -rm -f sched_ctx/$(DEPDIR)/$(am__dirstamp) + -rm -f sched_ctx/$(am__dirstamp) + -rm -f sched_ctx_utils/$(DEPDIR)/$(am__dirstamp) + -rm -f sched_ctx_utils/$(am__dirstamp) + -rm -f scheduler/$(DEPDIR)/$(am__dirstamp) + -rm -f scheduler/$(am__dirstamp) + -rm -f spmd/$(DEPDIR)/$(am__dirstamp) + -rm -f spmd/$(am__dirstamp) + -rm -f spmv/$(DEPDIR)/$(am__dirstamp) + -rm -f spmv/$(am__dirstamp) + -rm -f spmv/matrix_market/$(DEPDIR)/$(am__dirstamp) + -rm -f spmv/matrix_market/$(am__dirstamp) + -rm -f subgraphs/$(DEPDIR)/$(am__dirstamp) + -rm -f subgraphs/$(am__dirstamp) + -rm -f tag_example/$(DEPDIR)/$(am__dirstamp) + -rm -f tag_example/$(am__dirstamp) + -rm -f transactions/$(DEPDIR)/$(am__dirstamp) + -rm -f transactions/$(am__dirstamp) + -rm -f worker_collections/$(DEPDIR)/$(am__dirstamp) + -rm -f worker_collections/$(am__dirstamp) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." + -test -z "$(BUILT_SOURCES)" || rm -f $(BUILT_SOURCES) +clean: clean-recursive + +clean-am: clean-checkPROGRAMS clean-examplebinPROGRAMS clean-generic \ + clean-libtool clean-local clean-noinstPROGRAMS \ + clean-pkglibLTLIBRARIES mostlyclean-am + +distclean: distclean-recursive + -rm -f ./$(DEPDIR)/loader-loader.Po + -rm -f api/$(DEPDIR)/bcsr_data_interface.Po + -rm -f api/$(DEPDIR)/block_data_interface.Po + -rm -f api/$(DEPDIR)/coo_data_interface.Po + -rm -f api/$(DEPDIR)/csr_data_interface.Po + -rm -f api/$(DEPDIR)/matrix_data_interface.Po + -rm -f api/$(DEPDIR)/multiformat_data_interface.Po + -rm -f api/$(DEPDIR)/tensor_data_interface.Po + -rm -f api/$(DEPDIR)/variable_data_interface.Po + -rm -f api/$(DEPDIR)/vector_data_interface.Po + -rm -f api/$(DEPDIR)/void_data_interface.Po + -rm -f axpy/$(DEPDIR)/axpy.Po + -rm -f axpy/$(DEPDIR)/axpy_opencl.Po + -rm -f basic_examples/$(DEPDIR)/block.Po + -rm -f basic_examples/$(DEPDIR)/block_cpu.Po + -rm -f basic_examples/$(DEPDIR)/block_opencl.Po + -rm -f basic_examples/$(DEPDIR)/dynamic_handles.Po + -rm -f basic_examples/$(DEPDIR)/hello_world.Po + -rm -f basic_examples/$(DEPDIR)/hooks.Po + -rm -f basic_examples/$(DEPDIR)/mult.Po + -rm -f basic_examples/$(DEPDIR)/multiformat.Po + -rm -f basic_examples/$(DEPDIR)/multiformat_conversion_codelets.Po + -rm -f basic_examples/$(DEPDIR)/multiformat_conversion_codelets_opencl.Po + -rm -f basic_examples/$(DEPDIR)/multiformat_opencl.Po + -rm -f basic_examples/$(DEPDIR)/ndim.Po + -rm -f basic_examples/$(DEPDIR)/task_insert_color.Po + -rm -f basic_examples/$(DEPDIR)/topology.Po + -rm -f basic_examples/$(DEPDIR)/variable.Po + -rm -f basic_examples/$(DEPDIR)/variable_kernels_cpu.Po + -rm -f basic_examples/$(DEPDIR)/variable_kernels_opencl.Po + -rm -f basic_examples/$(DEPDIR)/vector_scal.Po + -rm -f basic_examples/$(DEPDIR)/vector_scal_c.Po + -rm -f basic_examples/$(DEPDIR)/vector_scal_cpu.Po + -rm -f basic_examples/$(DEPDIR)/vector_scal_opencl.Po + -rm -f binary/$(DEPDIR)/binary.Po + -rm -f callback/$(DEPDIR)/callback.Po + -rm -f callback/$(DEPDIR)/prologue.Po + -rm -f cg/$(DEPDIR)/cg.Po + -rm -f cholesky/$(DEPDIR)/cholesky_compil.Po + -rm -f cholesky/$(DEPDIR)/cholesky_grain_tag.Po + -rm -f cholesky/$(DEPDIR)/cholesky_implicit.Po + -rm -f cholesky/$(DEPDIR)/cholesky_kernels.Po + -rm -f cholesky/$(DEPDIR)/cholesky_models.Po + -rm -f cholesky/$(DEPDIR)/cholesky_tag.Po + -rm -f cholesky/$(DEPDIR)/cholesky_tile_tag.Po + -rm -f cholesky/$(DEPDIR)/libmy_dmda.Plo + -rm -f common/$(DEPDIR)/blas.Po + -rm -f cpp/$(DEPDIR)/add_vectors.Po + -rm -f cpp/$(DEPDIR)/add_vectors_cpp11.Po + -rm -f cpp/$(DEPDIR)/add_vectors_interface.Po + -rm -f cpp/$(DEPDIR)/incrementer_cpp.Po + -rm -f dependency/$(DEPDIR)/sequential_consistency.Po + -rm -f dependency/$(DEPDIR)/task_end_dep.Po + -rm -f dependency/$(DEPDIR)/task_end_dep_add.Po + -rm -f filters/$(DEPDIR)/alloc.Po + -rm -f filters/$(DEPDIR)/f3d_cpu.Po + -rm -f filters/$(DEPDIR)/f4d_cpu.Po + -rm -f filters/$(DEPDIR)/f5d_print.Po + -rm -f filters/$(DEPDIR)/fblock.Po + -rm -f filters/$(DEPDIR)/fblock_cpu.Po + -rm -f filters/$(DEPDIR)/fblock_opencl.Po + -rm -f filters/$(DEPDIR)/fblock_pick_matrix.Po + -rm -f filters/$(DEPDIR)/fblock_pick_variable.Po + -rm -f filters/$(DEPDIR)/fblock_print.Po + -rm -f filters/$(DEPDIR)/fmatrix.Po + -rm -f filters/$(DEPDIR)/fmatrix_cpu.Po + -rm -f filters/$(DEPDIR)/fmatrix_pick_variable.Po + -rm -f filters/$(DEPDIR)/fmatrix_pick_vector.Po + -rm -f filters/$(DEPDIR)/fmatrix_print.Po + -rm -f filters/$(DEPDIR)/fmultiple_manual.Po + -rm -f filters/$(DEPDIR)/fmultiple_submit.Po + -rm -f filters/$(DEPDIR)/fmultiple_submit_implicit.Po + -rm -f filters/$(DEPDIR)/fmultiple_submit_readonly.Po + -rm -f filters/$(DEPDIR)/fmultiple_submit_readonly_downgrade.Po + -rm -f filters/$(DEPDIR)/fndim.Po + -rm -f filters/$(DEPDIR)/fndim_1d_pick_variable.Po + -rm -f filters/$(DEPDIR)/fndim_2d_pick_vector.Po + -rm -f filters/$(DEPDIR)/fndim_3d_pick_matrix.Po + -rm -f filters/$(DEPDIR)/fndim_4d_pick_block.Po + -rm -f filters/$(DEPDIR)/fndim_5d_pick_tensor.Po + -rm -f filters/$(DEPDIR)/fndim_pick_ndim.Po + -rm -f filters/$(DEPDIR)/fndim_pick_variable.Po + -rm -f filters/$(DEPDIR)/fndim_to_block.Po + -rm -f filters/$(DEPDIR)/fndim_to_matrix.Po + -rm -f filters/$(DEPDIR)/fndim_to_tensor.Po + -rm -f filters/$(DEPDIR)/fndim_to_variable.Po + -rm -f filters/$(DEPDIR)/fndim_to_vector.Po + -rm -f filters/$(DEPDIR)/fread.Po + -rm -f filters/$(DEPDIR)/frecursive.Po + -rm -f filters/$(DEPDIR)/ftensor.Po + -rm -f filters/$(DEPDIR)/ftensor_cpu.Po + -rm -f filters/$(DEPDIR)/ftensor_pick_block.Po + -rm -f filters/$(DEPDIR)/ftensor_pick_variable.Po + -rm -f filters/$(DEPDIR)/ftensor_print.Po + -rm -f filters/$(DEPDIR)/fvector.Po + -rm -f filters/$(DEPDIR)/fvector_cpu.Po + -rm -f filters/$(DEPDIR)/fvector_pick_variable.Po + -rm -f filters/$(DEPDIR)/shadow.Po + -rm -f filters/$(DEPDIR)/shadow2d.Po + -rm -f filters/$(DEPDIR)/shadow3d.Po + -rm -f filters/$(DEPDIR)/shadow4d.Po + -rm -f filters/$(DEPDIR)/shadownd.Po + -rm -f filters/custom_mf/$(DEPDIR)/conversion_opencl.Po + -rm -f filters/custom_mf/$(DEPDIR)/custom_conversion_codelets.Po + -rm -f filters/custom_mf/$(DEPDIR)/custom_interface.Po + -rm -f filters/custom_mf/$(DEPDIR)/custom_mf_filter.Po + -rm -f filters/custom_mf/$(DEPDIR)/custom_opencl.Po + -rm -f fortran/$(DEPDIR)/hello_c.Po + -rm -f fortran90/$(DEPDIR)/marshalling.Po + -rm -f gl_interop/$(DEPDIR)/gl_interop.Po + -rm -f gl_interop/$(DEPDIR)/gl_interop_idle.Po + -rm -f heat/$(DEPDIR)/dw_factolu.Po + -rm -f heat/$(DEPDIR)/dw_factolu_grain.Po + -rm -f heat/$(DEPDIR)/dw_factolu_kernels.Po + -rm -f heat/$(DEPDIR)/dw_factolu_tag.Po + -rm -f heat/$(DEPDIR)/dw_sparse_cg.Po + -rm -f heat/$(DEPDIR)/dw_sparse_cg_kernels.Po + -rm -f heat/$(DEPDIR)/heat.Po + -rm -f heat/$(DEPDIR)/heat_display.Po + -rm -f heat/$(DEPDIR)/lu_kernels_model.Po + -rm -f incrementer/$(DEPDIR)/incrementer.Po + -rm -f incrementer/$(DEPDIR)/incrementer_kernels_opencl.Po + -rm -f interface/$(DEPDIR)/complex.Po + -rm -f interface/$(DEPDIR)/complex_filters.Po + -rm -f interface/$(DEPDIR)/complex_interface.Po + -rm -f interface/$(DEPDIR)/complex_kernels_opencl.Po + -rm -f interface/complex_dev_handle/$(DEPDIR)/complex_dev_handle.Po + -rm -f interface/complex_dev_handle/$(DEPDIR)/complex_dev_handle_filters.Po + -rm -f interface/complex_dev_handle/$(DEPDIR)/complex_dev_handle_interface.Po + -rm -f interface/complex_dev_handle/$(DEPDIR)/complex_dev_handle_kernels_opencl.Po + -rm -f lu/$(DEPDIR)/blas_complex.Po + -rm -f lu/$(DEPDIR)/clu.Po + -rm -f lu/$(DEPDIR)/clu_implicit.Po + -rm -f lu/$(DEPDIR)/clu_implicit_pivot.Po + -rm -f lu/$(DEPDIR)/clu_kernels.Po + -rm -f lu/$(DEPDIR)/clu_pivot.Po + -rm -f lu/$(DEPDIR)/dlu.Po + -rm -f lu/$(DEPDIR)/dlu_implicit.Po + -rm -f lu/$(DEPDIR)/dlu_implicit_pivot.Po + -rm -f lu/$(DEPDIR)/dlu_kernels.Po + -rm -f lu/$(DEPDIR)/dlu_pivot.Po + -rm -f lu/$(DEPDIR)/lu_example_complex_double.Po + -rm -f lu/$(DEPDIR)/lu_example_complex_float.Po + -rm -f lu/$(DEPDIR)/lu_example_double.Po + -rm -f lu/$(DEPDIR)/lu_example_float.Po + -rm -f lu/$(DEPDIR)/slu.Po + -rm -f lu/$(DEPDIR)/slu_implicit.Po + -rm -f lu/$(DEPDIR)/slu_implicit_pivot.Po + -rm -f lu/$(DEPDIR)/slu_kernels.Po + -rm -f lu/$(DEPDIR)/slu_pivot.Po + -rm -f lu/$(DEPDIR)/zlu.Po + -rm -f lu/$(DEPDIR)/zlu_implicit.Po + -rm -f lu/$(DEPDIR)/zlu_implicit_pivot.Po + -rm -f lu/$(DEPDIR)/zlu_kernels.Po + -rm -f lu/$(DEPDIR)/zlu_pivot.Po + -rm -f mandelbrot/$(DEPDIR)/mandelbrot-mandelbrot.Po + -rm -f matvecmult/$(DEPDIR)/matvecmult.Po + -rm -f mlr/$(DEPDIR)/mlr.Po + -rm -f mult/$(DEPDIR)/dgemm.Po + -rm -f mult/$(DEPDIR)/dgemm_layout.Po + -rm -f mult/$(DEPDIR)/sgemm.Po + -rm -f mult/$(DEPDIR)/sgemm_layout.Po + -rm -f openmp/$(DEPDIR)/vector_scal_omp-vector_scal_omp.Po + -rm -f parallel_workers/$(DEPDIR)/parallel_workers-parallel_workers.Po + -rm -f parallel_workers/$(DEPDIR)/parallel_workers_func-parallel_workers_func.Po + -rm -f parallel_workers/$(DEPDIR)/parallel_workers_oldapi-parallel_workers_oldapi.Po + -rm -f perf_monitoring/$(DEPDIR)/perf_counters_01.Po + -rm -f perf_monitoring/$(DEPDIR)/perf_counters_02.Po + -rm -f perf_steering/$(DEPDIR)/perf_knobs_01.Po + -rm -f perf_steering/$(DEPDIR)/perf_knobs_02.Po + -rm -f perf_steering/$(DEPDIR)/perf_knobs_03.Po + -rm -f pi/$(DEPDIR)/pi.Po + -rm -f pi/$(DEPDIR)/pi_redux.Po + -rm -f pi/SobolQRNG/$(DEPDIR)/sobol_gold.Po + -rm -f pi/SobolQRNG/$(DEPDIR)/sobol_primitives.Po + -rm -f pipeline/$(DEPDIR)/pipeline.Po + -rm -f ppm_downscaler/$(DEPDIR)/ppm_downscaler.Po + -rm -f ppm_downscaler/$(DEPDIR)/yuv_downscaler.Po + -rm -f profiling/$(DEPDIR)/profiling.Po + -rm -f profiling_tool/$(DEPDIR)/libprofiling_tool.Plo + -rm -f reductions/$(DEPDIR)/dot_product.Po + -rm -f reductions/$(DEPDIR)/minmax_reduction.Po + -rm -f sched_ctx/$(DEPDIR)/dummy_sched_with_ctx.Po + -rm -f sched_ctx/$(DEPDIR)/gpu_partition.Po + -rm -f sched_ctx/$(DEPDIR)/nested_sched_ctxs-nested_sched_ctxs.Po + -rm -f sched_ctx/$(DEPDIR)/parallel_code-parallel_code.Po + -rm -f sched_ctx/$(DEPDIR)/parallel_tasks_reuse_handle-parallel_tasks_reuse_handle.Po + -rm -f sched_ctx/$(DEPDIR)/prio.Po + -rm -f sched_ctx/$(DEPDIR)/sched_ctx.Po + -rm -f sched_ctx/$(DEPDIR)/sched_ctx_delete.Po + -rm -f sched_ctx/$(DEPDIR)/sched_ctx_empty.Po + -rm -f sched_ctx/$(DEPDIR)/sched_ctx_remove.Po + -rm -f sched_ctx/$(DEPDIR)/sched_ctx_without_sched_policy-sched_ctx_without_sched_policy.Po + -rm -f sched_ctx/$(DEPDIR)/sched_ctx_without_sched_policy_awake.Po + -rm -f sched_ctx/$(DEPDIR)/two_cpu_contexts.Po + -rm -f sched_ctx_utils/$(DEPDIR)/sched_ctx_utils.Po + -rm -f scheduler/$(DEPDIR)/dummy_modular_sched.Po + -rm -f scheduler/$(DEPDIR)/dummy_sched.Po + -rm -f scheduler/$(DEPDIR)/heteroprio_test.Po + -rm -f scheduler/$(DEPDIR)/libdummy_sched.Plo + -rm -f spmd/$(DEPDIR)/vector_scal_spmd.Po + -rm -f spmv/$(DEPDIR)/dw_block_spmv.Po + -rm -f spmv/$(DEPDIR)/dw_block_spmv_kernels.Po + -rm -f spmv/$(DEPDIR)/spmv.Po + -rm -f spmv/$(DEPDIR)/spmv_kernels.Po + -rm -f spmv/matrix_market/$(DEPDIR)/mm_to_bcsr.Po + -rm -f spmv/matrix_market/$(DEPDIR)/mmio.Po + -rm -f subgraphs/$(DEPDIR)/codelets.Po + -rm -f subgraphs/$(DEPDIR)/manual.Po + -rm -f subgraphs/$(DEPDIR)/partition.Po + -rm -f subgraphs/$(DEPDIR)/plan.Po + -rm -f tag_example/$(DEPDIR)/tag_example.Po + -rm -f tag_example/$(DEPDIR)/tag_example2.Po + -rm -f tag_example/$(DEPDIR)/tag_example3.Po + -rm -f tag_example/$(DEPDIR)/tag_example4.Po + -rm -f tag_example/$(DEPDIR)/tag_restartable.Po + -rm -f transactions/$(DEPDIR)/trs_inc.Po + -rm -f transactions/$(DEPDIR)/trs_sgemm.Po + -rm -f worker_collections/$(DEPDIR)/worker_list_example.Po + -rm -f worker_collections/$(DEPDIR)/worker_tree_example.Po + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-recursive + +dvi-am: + +html: html-recursive + +html-am: + +info: info-recursive + +info-am: + +install-data-am: install-examplebinPROGRAMS \ + install-nobase_STARPU_OPENCL_DATADATA + +install-dvi: install-dvi-recursive + +install-dvi-am: + +install-exec-am: install-pkglibLTLIBRARIES + +install-html: install-html-recursive + +install-html-am: + +install-info: install-info-recursive + +install-info-am: + +install-man: + +install-pdf: install-pdf-recursive + +install-pdf-am: + +install-ps: install-ps-recursive + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-recursive + -rm -f ./$(DEPDIR)/loader-loader.Po + -rm -f api/$(DEPDIR)/bcsr_data_interface.Po + -rm -f api/$(DEPDIR)/block_data_interface.Po + -rm -f api/$(DEPDIR)/coo_data_interface.Po + -rm -f api/$(DEPDIR)/csr_data_interface.Po + -rm -f api/$(DEPDIR)/matrix_data_interface.Po + -rm -f api/$(DEPDIR)/multiformat_data_interface.Po + -rm -f api/$(DEPDIR)/tensor_data_interface.Po + -rm -f api/$(DEPDIR)/variable_data_interface.Po + -rm -f api/$(DEPDIR)/vector_data_interface.Po + -rm -f api/$(DEPDIR)/void_data_interface.Po + -rm -f axpy/$(DEPDIR)/axpy.Po + -rm -f axpy/$(DEPDIR)/axpy_opencl.Po + -rm -f basic_examples/$(DEPDIR)/block.Po + -rm -f basic_examples/$(DEPDIR)/block_cpu.Po + -rm -f basic_examples/$(DEPDIR)/block_opencl.Po + -rm -f basic_examples/$(DEPDIR)/dynamic_handles.Po + -rm -f basic_examples/$(DEPDIR)/hello_world.Po + -rm -f basic_examples/$(DEPDIR)/hooks.Po + -rm -f basic_examples/$(DEPDIR)/mult.Po + -rm -f basic_examples/$(DEPDIR)/multiformat.Po + -rm -f basic_examples/$(DEPDIR)/multiformat_conversion_codelets.Po + -rm -f basic_examples/$(DEPDIR)/multiformat_conversion_codelets_opencl.Po + -rm -f basic_examples/$(DEPDIR)/multiformat_opencl.Po + -rm -f basic_examples/$(DEPDIR)/ndim.Po + -rm -f basic_examples/$(DEPDIR)/task_insert_color.Po + -rm -f basic_examples/$(DEPDIR)/topology.Po + -rm -f basic_examples/$(DEPDIR)/variable.Po + -rm -f basic_examples/$(DEPDIR)/variable_kernels_cpu.Po + -rm -f basic_examples/$(DEPDIR)/variable_kernels_opencl.Po + -rm -f basic_examples/$(DEPDIR)/vector_scal.Po + -rm -f basic_examples/$(DEPDIR)/vector_scal_c.Po + -rm -f basic_examples/$(DEPDIR)/vector_scal_cpu.Po + -rm -f basic_examples/$(DEPDIR)/vector_scal_opencl.Po + -rm -f binary/$(DEPDIR)/binary.Po + -rm -f callback/$(DEPDIR)/callback.Po + -rm -f callback/$(DEPDIR)/prologue.Po + -rm -f cg/$(DEPDIR)/cg.Po + -rm -f cholesky/$(DEPDIR)/cholesky_compil.Po + -rm -f cholesky/$(DEPDIR)/cholesky_grain_tag.Po + -rm -f cholesky/$(DEPDIR)/cholesky_implicit.Po + -rm -f cholesky/$(DEPDIR)/cholesky_kernels.Po + -rm -f cholesky/$(DEPDIR)/cholesky_models.Po + -rm -f cholesky/$(DEPDIR)/cholesky_tag.Po + -rm -f cholesky/$(DEPDIR)/cholesky_tile_tag.Po + -rm -f cholesky/$(DEPDIR)/libmy_dmda.Plo + -rm -f common/$(DEPDIR)/blas.Po + -rm -f cpp/$(DEPDIR)/add_vectors.Po + -rm -f cpp/$(DEPDIR)/add_vectors_cpp11.Po + -rm -f cpp/$(DEPDIR)/add_vectors_interface.Po + -rm -f cpp/$(DEPDIR)/incrementer_cpp.Po + -rm -f dependency/$(DEPDIR)/sequential_consistency.Po + -rm -f dependency/$(DEPDIR)/task_end_dep.Po + -rm -f dependency/$(DEPDIR)/task_end_dep_add.Po + -rm -f filters/$(DEPDIR)/alloc.Po + -rm -f filters/$(DEPDIR)/f3d_cpu.Po + -rm -f filters/$(DEPDIR)/f4d_cpu.Po + -rm -f filters/$(DEPDIR)/f5d_print.Po + -rm -f filters/$(DEPDIR)/fblock.Po + -rm -f filters/$(DEPDIR)/fblock_cpu.Po + -rm -f filters/$(DEPDIR)/fblock_opencl.Po + -rm -f filters/$(DEPDIR)/fblock_pick_matrix.Po + -rm -f filters/$(DEPDIR)/fblock_pick_variable.Po + -rm -f filters/$(DEPDIR)/fblock_print.Po + -rm -f filters/$(DEPDIR)/fmatrix.Po + -rm -f filters/$(DEPDIR)/fmatrix_cpu.Po + -rm -f filters/$(DEPDIR)/fmatrix_pick_variable.Po + -rm -f filters/$(DEPDIR)/fmatrix_pick_vector.Po + -rm -f filters/$(DEPDIR)/fmatrix_print.Po + -rm -f filters/$(DEPDIR)/fmultiple_manual.Po + -rm -f filters/$(DEPDIR)/fmultiple_submit.Po + -rm -f filters/$(DEPDIR)/fmultiple_submit_implicit.Po + -rm -f filters/$(DEPDIR)/fmultiple_submit_readonly.Po + -rm -f filters/$(DEPDIR)/fmultiple_submit_readonly_downgrade.Po + -rm -f filters/$(DEPDIR)/fndim.Po + -rm -f filters/$(DEPDIR)/fndim_1d_pick_variable.Po + -rm -f filters/$(DEPDIR)/fndim_2d_pick_vector.Po + -rm -f filters/$(DEPDIR)/fndim_3d_pick_matrix.Po + -rm -f filters/$(DEPDIR)/fndim_4d_pick_block.Po + -rm -f filters/$(DEPDIR)/fndim_5d_pick_tensor.Po + -rm -f filters/$(DEPDIR)/fndim_pick_ndim.Po + -rm -f filters/$(DEPDIR)/fndim_pick_variable.Po + -rm -f filters/$(DEPDIR)/fndim_to_block.Po + -rm -f filters/$(DEPDIR)/fndim_to_matrix.Po + -rm -f filters/$(DEPDIR)/fndim_to_tensor.Po + -rm -f filters/$(DEPDIR)/fndim_to_variable.Po + -rm -f filters/$(DEPDIR)/fndim_to_vector.Po + -rm -f filters/$(DEPDIR)/fread.Po + -rm -f filters/$(DEPDIR)/frecursive.Po + -rm -f filters/$(DEPDIR)/ftensor.Po + -rm -f filters/$(DEPDIR)/ftensor_cpu.Po + -rm -f filters/$(DEPDIR)/ftensor_pick_block.Po + -rm -f filters/$(DEPDIR)/ftensor_pick_variable.Po + -rm -f filters/$(DEPDIR)/ftensor_print.Po + -rm -f filters/$(DEPDIR)/fvector.Po + -rm -f filters/$(DEPDIR)/fvector_cpu.Po + -rm -f filters/$(DEPDIR)/fvector_pick_variable.Po + -rm -f filters/$(DEPDIR)/shadow.Po + -rm -f filters/$(DEPDIR)/shadow2d.Po + -rm -f filters/$(DEPDIR)/shadow3d.Po + -rm -f filters/$(DEPDIR)/shadow4d.Po + -rm -f filters/$(DEPDIR)/shadownd.Po + -rm -f filters/custom_mf/$(DEPDIR)/conversion_opencl.Po + -rm -f filters/custom_mf/$(DEPDIR)/custom_conversion_codelets.Po + -rm -f filters/custom_mf/$(DEPDIR)/custom_interface.Po + -rm -f filters/custom_mf/$(DEPDIR)/custom_mf_filter.Po + -rm -f filters/custom_mf/$(DEPDIR)/custom_opencl.Po + -rm -f fortran/$(DEPDIR)/hello_c.Po + -rm -f fortran90/$(DEPDIR)/marshalling.Po + -rm -f gl_interop/$(DEPDIR)/gl_interop.Po + -rm -f gl_interop/$(DEPDIR)/gl_interop_idle.Po + -rm -f heat/$(DEPDIR)/dw_factolu.Po + -rm -f heat/$(DEPDIR)/dw_factolu_grain.Po + -rm -f heat/$(DEPDIR)/dw_factolu_kernels.Po + -rm -f heat/$(DEPDIR)/dw_factolu_tag.Po + -rm -f heat/$(DEPDIR)/dw_sparse_cg.Po + -rm -f heat/$(DEPDIR)/dw_sparse_cg_kernels.Po + -rm -f heat/$(DEPDIR)/heat.Po + -rm -f heat/$(DEPDIR)/heat_display.Po + -rm -f heat/$(DEPDIR)/lu_kernels_model.Po + -rm -f incrementer/$(DEPDIR)/incrementer.Po + -rm -f incrementer/$(DEPDIR)/incrementer_kernels_opencl.Po + -rm -f interface/$(DEPDIR)/complex.Po + -rm -f interface/$(DEPDIR)/complex_filters.Po + -rm -f interface/$(DEPDIR)/complex_interface.Po + -rm -f interface/$(DEPDIR)/complex_kernels_opencl.Po + -rm -f interface/complex_dev_handle/$(DEPDIR)/complex_dev_handle.Po + -rm -f interface/complex_dev_handle/$(DEPDIR)/complex_dev_handle_filters.Po + -rm -f interface/complex_dev_handle/$(DEPDIR)/complex_dev_handle_interface.Po + -rm -f interface/complex_dev_handle/$(DEPDIR)/complex_dev_handle_kernels_opencl.Po + -rm -f lu/$(DEPDIR)/blas_complex.Po + -rm -f lu/$(DEPDIR)/clu.Po + -rm -f lu/$(DEPDIR)/clu_implicit.Po + -rm -f lu/$(DEPDIR)/clu_implicit_pivot.Po + -rm -f lu/$(DEPDIR)/clu_kernels.Po + -rm -f lu/$(DEPDIR)/clu_pivot.Po + -rm -f lu/$(DEPDIR)/dlu.Po + -rm -f lu/$(DEPDIR)/dlu_implicit.Po + -rm -f lu/$(DEPDIR)/dlu_implicit_pivot.Po + -rm -f lu/$(DEPDIR)/dlu_kernels.Po + -rm -f lu/$(DEPDIR)/dlu_pivot.Po + -rm -f lu/$(DEPDIR)/lu_example_complex_double.Po + -rm -f lu/$(DEPDIR)/lu_example_complex_float.Po + -rm -f lu/$(DEPDIR)/lu_example_double.Po + -rm -f lu/$(DEPDIR)/lu_example_float.Po + -rm -f lu/$(DEPDIR)/slu.Po + -rm -f lu/$(DEPDIR)/slu_implicit.Po + -rm -f lu/$(DEPDIR)/slu_implicit_pivot.Po + -rm -f lu/$(DEPDIR)/slu_kernels.Po + -rm -f lu/$(DEPDIR)/slu_pivot.Po + -rm -f lu/$(DEPDIR)/zlu.Po + -rm -f lu/$(DEPDIR)/zlu_implicit.Po + -rm -f lu/$(DEPDIR)/zlu_implicit_pivot.Po + -rm -f lu/$(DEPDIR)/zlu_kernels.Po + -rm -f lu/$(DEPDIR)/zlu_pivot.Po + -rm -f mandelbrot/$(DEPDIR)/mandelbrot-mandelbrot.Po + -rm -f matvecmult/$(DEPDIR)/matvecmult.Po + -rm -f mlr/$(DEPDIR)/mlr.Po + -rm -f mult/$(DEPDIR)/dgemm.Po + -rm -f mult/$(DEPDIR)/dgemm_layout.Po + -rm -f mult/$(DEPDIR)/sgemm.Po + -rm -f mult/$(DEPDIR)/sgemm_layout.Po + -rm -f openmp/$(DEPDIR)/vector_scal_omp-vector_scal_omp.Po + -rm -f parallel_workers/$(DEPDIR)/parallel_workers-parallel_workers.Po + -rm -f parallel_workers/$(DEPDIR)/parallel_workers_func-parallel_workers_func.Po + -rm -f parallel_workers/$(DEPDIR)/parallel_workers_oldapi-parallel_workers_oldapi.Po + -rm -f perf_monitoring/$(DEPDIR)/perf_counters_01.Po + -rm -f perf_monitoring/$(DEPDIR)/perf_counters_02.Po + -rm -f perf_steering/$(DEPDIR)/perf_knobs_01.Po + -rm -f perf_steering/$(DEPDIR)/perf_knobs_02.Po + -rm -f perf_steering/$(DEPDIR)/perf_knobs_03.Po + -rm -f pi/$(DEPDIR)/pi.Po + -rm -f pi/$(DEPDIR)/pi_redux.Po + -rm -f pi/SobolQRNG/$(DEPDIR)/sobol_gold.Po + -rm -f pi/SobolQRNG/$(DEPDIR)/sobol_primitives.Po + -rm -f pipeline/$(DEPDIR)/pipeline.Po + -rm -f ppm_downscaler/$(DEPDIR)/ppm_downscaler.Po + -rm -f ppm_downscaler/$(DEPDIR)/yuv_downscaler.Po + -rm -f profiling/$(DEPDIR)/profiling.Po + -rm -f profiling_tool/$(DEPDIR)/libprofiling_tool.Plo + -rm -f reductions/$(DEPDIR)/dot_product.Po + -rm -f reductions/$(DEPDIR)/minmax_reduction.Po + -rm -f sched_ctx/$(DEPDIR)/dummy_sched_with_ctx.Po + -rm -f sched_ctx/$(DEPDIR)/gpu_partition.Po + -rm -f sched_ctx/$(DEPDIR)/nested_sched_ctxs-nested_sched_ctxs.Po + -rm -f sched_ctx/$(DEPDIR)/parallel_code-parallel_code.Po + -rm -f sched_ctx/$(DEPDIR)/parallel_tasks_reuse_handle-parallel_tasks_reuse_handle.Po + -rm -f sched_ctx/$(DEPDIR)/prio.Po + -rm -f sched_ctx/$(DEPDIR)/sched_ctx.Po + -rm -f sched_ctx/$(DEPDIR)/sched_ctx_delete.Po + -rm -f sched_ctx/$(DEPDIR)/sched_ctx_empty.Po + -rm -f sched_ctx/$(DEPDIR)/sched_ctx_remove.Po + -rm -f sched_ctx/$(DEPDIR)/sched_ctx_without_sched_policy-sched_ctx_without_sched_policy.Po + -rm -f sched_ctx/$(DEPDIR)/sched_ctx_without_sched_policy_awake.Po + -rm -f sched_ctx/$(DEPDIR)/two_cpu_contexts.Po + -rm -f sched_ctx_utils/$(DEPDIR)/sched_ctx_utils.Po + -rm -f scheduler/$(DEPDIR)/dummy_modular_sched.Po + -rm -f scheduler/$(DEPDIR)/dummy_sched.Po + -rm -f scheduler/$(DEPDIR)/heteroprio_test.Po + -rm -f scheduler/$(DEPDIR)/libdummy_sched.Plo + -rm -f spmd/$(DEPDIR)/vector_scal_spmd.Po + -rm -f spmv/$(DEPDIR)/dw_block_spmv.Po + -rm -f spmv/$(DEPDIR)/dw_block_spmv_kernels.Po + -rm -f spmv/$(DEPDIR)/spmv.Po + -rm -f spmv/$(DEPDIR)/spmv_kernels.Po + -rm -f spmv/matrix_market/$(DEPDIR)/mm_to_bcsr.Po + -rm -f spmv/matrix_market/$(DEPDIR)/mmio.Po + -rm -f subgraphs/$(DEPDIR)/codelets.Po + -rm -f subgraphs/$(DEPDIR)/manual.Po + -rm -f subgraphs/$(DEPDIR)/partition.Po + -rm -f subgraphs/$(DEPDIR)/plan.Po + -rm -f tag_example/$(DEPDIR)/tag_example.Po + -rm -f tag_example/$(DEPDIR)/tag_example2.Po + -rm -f tag_example/$(DEPDIR)/tag_example3.Po + -rm -f tag_example/$(DEPDIR)/tag_example4.Po + -rm -f tag_example/$(DEPDIR)/tag_restartable.Po + -rm -f transactions/$(DEPDIR)/trs_inc.Po + -rm -f transactions/$(DEPDIR)/trs_sgemm.Po + -rm -f worker_collections/$(DEPDIR)/worker_list_example.Po + -rm -f worker_collections/$(DEPDIR)/worker_tree_example.Po + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-recursive + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-recursive + +pdf-am: + +ps: ps-recursive + +ps-am: + +uninstall-am: uninstall-examplebinPROGRAMS \ + uninstall-nobase_STARPU_OPENCL_DATADATA \ + uninstall-pkglibLTLIBRARIES + +.MAKE: $(am__recursive_targets) all check check-am install install-am \ + install-exec install-strip + +.PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am \ + am--depfiles check check-TESTS check-am clean \ + clean-checkPROGRAMS clean-examplebinPROGRAMS clean-generic \ + clean-libtool clean-local clean-noinstPROGRAMS \ + clean-pkglibLTLIBRARIES cscopelist-am ctags ctags-am distclean \ + distclean-compile distclean-generic distclean-libtool \ + distclean-tags distdir dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am install-dvi \ + install-dvi-am install-examplebinPROGRAMS install-exec \ + install-exec-am install-html install-html-am install-info \ + install-info-am install-man \ + install-nobase_STARPU_OPENCL_DATADATA install-pdf \ + install-pdf-am install-pkglibLTLIBRARIES install-ps \ + install-ps-am install-strip installcheck installcheck-am \ + installdirs installdirs-am maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + recheck tags tags-am uninstall uninstall-am \ + uninstall-examplebinPROGRAMS \ + uninstall-nobase_STARPU_OPENCL_DATADATA \ + uninstall-pkglibLTLIBRARIES + +.PRECIOUS: Makefile + +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@.cu.o: +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ @$(MKDIR_P) `dirname $@` +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ $(V_mynvcc)grep 'extern *"C" *void *' $< | sed -ne 's/extern *"C" *void *\([a-zA-Z0-9_]*\) *(.*/void \1(void) {}/p' | $(CC) -x c - -o $@ -c + +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.cubin: +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) -cubin $< -o $@ $(NVCCFLAGS) + +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.o: +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) $< -c -o $@ $(NVCCFLAGS) +@STARPU_USE_HIP_TRUE@.hip.o: +@STARPU_USE_HIP_TRUE@ $(V_hipcc) $(HIPCC) $< -c -o $@ $(HIPCCFLAGS) + +STARPU_MPI_NP ?= 4 + +showcheckfailed: + @ for x in $(shell grep -l "^FAIL " $(TEST_LOGS) /dev/null 2>/dev/null) ; do cat $$x ; done + @RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i showcheckfailed || RET=1 ; \ + done ; \ + exit $$RET + +showfailed: + @! grep "^FAIL " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "ERROR: AddressSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "WARNING: AddressSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "ERROR: ThreadSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "WARNING: ThreadSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "ERROR: LeakSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "WARNING: LeakSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l " runtime error: " $(TEST_LOGS) /dev/null 2>/dev/null + @RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -s -C $$i showfailed || RET=1 ; \ + done ; \ + exit $$RET + +showcheck: + -cat $(TEST_LOGS) /dev/null + @! grep -q "ERROR: AddressSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q "WARNING: AddressSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q "ERROR: ThreadSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q "WARNING: ThreadSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q "ERROR: LeakSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q "WARNING: LeakSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q " runtime error: " $(TEST_LOGS) /dev/null + RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i showcheck || RET=1 ; \ + done ; \ + exit $$RET + +showsuite: + -cat $(TEST_SUITE_LOG) /dev/null + @! grep -q "ERROR: AddressSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q "WARNING: AddressSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q "ERROR: ThreadSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q "WARNING: ThreadSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q "ERROR: LeakSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q "WARNING: LeakSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q " runtime error: " $(TEST_SUITE_LOG) /dev/null + RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i showsuite || RET=1 ; \ + done ; \ + exit $$RET + +@STARPU_SIMGRID_TRUE@export STARPU_PERF_MODEL_DIR=$(abs_top_srcdir)/tools/perfmodels/sampling +@STARPU_SIMGRID_TRUE@export STARPU_HOSTNAME=mirage +@STARPU_SIMGRID_TRUE@export MALLOC_PERTURB_=0 + +@STARPU_SIMGRID_TRUE@env: +@STARPU_SIMGRID_TRUE@ @echo export STARPU_PERF_MODEL_DIR=$(STARPU_PERF_MODEL_DIR) +@STARPU_SIMGRID_TRUE@ @echo export STARPU_HOSTNAME=$(STARPU_HOSTNAME) +@STARPU_SIMGRID_TRUE@ @echo export MALLOC_PERTURB_=$(MALLOC_PERTURB_) + +@STARPU_SIMGRID_TRUE@export STARPU_SIMGRID=1 + +@STARPU_QUICK_CHECK_TRUE@export STARPU_QUICK_CHECK=1 + +@STARPU_LONG_CHECK_TRUE@export STARPU_LONG_CHECK=1 + +# +# Test loading goes through a lot of launchers: +# +# - $(LAUNCHER) is called first, to run the test through starpu_msexec, i.e. +# either mpirun or starpu_tcpipexec +# +# - $(LOADER), i.e. tests/loader, is then called to implement timeout, running +# gdb, etc. But if it detects that the test is a .sh script, it just executes +# it +# +# - $(STARPU_CHECK_LAUNCHER) $(STARPU_CHECK_LAUNCHER_ARGS) is called by loader +# to run the program through e.g. valgrind.sh +# +# When the program is a shell script, additionally: +# +# - $(STARPU_SUB_PARALLEL) is called to control parallelism (see below) +# +# - $(MS_LAUNCHER) is called to run the test through starpu_msexec +# +# - $(STARPU_LAUNCH) was set by tests/loader to its own path, to run the program +# through it. +# +# - $(STARPU_CHECK_LAUNCHER) $(STARPU_CHECK_LAUNCHER_ARGS) is called by loader +# + +export LAUNCHER +@HAVE_PARALLEL_TRUE@export STARPU_SUB_PARALLEL + +export MS_LAUNCHER + +LAUNCHER ?= +MS_LAUNCHER ?= +@STARPU_HAVE_WINDOWS_FALSE@LOADER ?= ./loader + +LSAN_OPTIONS ?= suppressions=$(abs_top_srcdir)/tools/dev/lsan/suppressions +TSAN_OPTIONS ?= suppressions=$(abs_top_srcdir)/tools/dev/tsan/starpu.suppr +export LSAN_OPTIONS +export TSAN_OPTIONS + +clean-local: + -rm -rf mult/sgemm.traces lu/lu.traces + +@STARPU_HAVE_ICC_TRUE@.icc.o: +@STARPU_HAVE_ICC_TRUE@ $(V_icc) $(ICC) $(ICC_ARGS) -x c $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) $< -c -o $@ + +# - link over source file to build our own object +fortran90/starpu_mod.f90: + @$(MKDIR_P) $(dir $@) + $(V_ln) $(LN_S) $(abs_top_srcdir)/include/$(notdir $@) $@ +native_fortran/fstarpu_mod.f90: + @$(MKDIR_P) $(dir $@) + $(V_ln) $(LN_S) $(abs_top_srcdir)/include/$(notdir $@) $@ + +# Fortran90 example + +# - express the creation of .mod along .o +@STARPU_HAVE_FC_TRUE@starpu_mod.mod: fortran90/starpu_mod.o +@STARPU_HAVE_FC_TRUE@mod_types.mod: fortran90/mod_types.o +@STARPU_HAVE_FC_TRUE@mod_compute.mod: fortran90/mod_compute.o +@STARPU_HAVE_FC_TRUE@mod_interface.mod: fortran90/mod_interface.o + +# - list explicit dependences to control proper module files dependencies +@STARPU_HAVE_FC_TRUE@fortran90/mod_compute.o: mod_types.mod mod_interface.mod starpu_mod.mod +@STARPU_HAVE_FC_TRUE@fortran90/f90_example.o: mod_types.mod mod_interface.mod mod_compute.mod starpu_mod.mod + +# Native Fortran example + +# - express the creation of .mod along .o +@STARPU_HAVE_FC_TRUE@fstarpu_mod.mod: native_fortran/fstarpu_mod.o +@STARPU_HAVE_FC_TRUE@nf_codelets.mod: native_fortran/nf_codelets.o +@STARPU_HAVE_FC_TRUE@nf_compute.mod: native_fortran/nf_compute.o +@STARPU_HAVE_FC_TRUE@nf_dynbuf_cl.mod: native_fortran/nf_dynbuf_cl.o +@STARPU_HAVE_FC_TRUE@nf_partition_cl.mod: native_fortran/nf_partition_cl.o +@STARPU_HAVE_FC_TRUE@nf_sched_ctx_cl.mod: native_fortran/nf_sched_ctx_cl.o +@STARPU_HAVE_FC_TRUE@nf_types.mod: native_fortran/nf_types.o +@STARPU_HAVE_FC_TRUE@nf_varbuf_cl.mod: native_fortran/nf_varbuf_cl.o + +# - list explicit dependences to control proper module files dependencies +@STARPU_HAVE_FC_TRUE@native_fortran/nf_codelets.o: fstarpu_mod.mod +@STARPU_HAVE_FC_TRUE@native_fortran/nf_compute.o: nf_types.mod fstarpu_mod.mod +@STARPU_HAVE_FC_TRUE@native_fortran/nf_dynbuf_cl.o: fstarpu_mod.mod +@STARPU_HAVE_FC_TRUE@native_fortran/nf_dynbuf.o: nf_dynbuf_cl.mod fstarpu_mod.mod +@STARPU_HAVE_FC_TRUE@native_fortran/nf_example.o: nf_types.mod nf_compute.mod fstarpu_mod.mod +@STARPU_HAVE_FC_TRUE@native_fortran/nf_matrix.o: nf_codelets.mod fstarpu_mod.mod +@STARPU_HAVE_FC_TRUE@native_fortran/nf_partition_cl.o: fstarpu_mod.mod +@STARPU_HAVE_FC_TRUE@native_fortran/nf_partition.o: nf_partition_cl.mod fstarpu_mod.mod +@STARPU_HAVE_FC_TRUE@native_fortran/nf_sched_ctx_cl.o: fstarpu_mod.mod +@STARPU_HAVE_FC_TRUE@native_fortran/nf_sched_ctx.o: nf_sched_ctx_cl.mod fstarpu_mod.mod +@STARPU_HAVE_FC_TRUE@native_fortran/nf_varbuf_cl.o: fstarpu_mod.mod +@STARPU_HAVE_FC_TRUE@native_fortran/nf_varbuf.o: nf_varbuf_cl.mod fstarpu_mod.mod +@STARPU_HAVE_FC_TRUE@native_fortran/nf_vector.o: nf_codelets.mod fstarpu_mod.mod + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/examples/README.txt b/examples/README.txt new file mode 100644 index 0000000..1b461e3 --- /dev/null +++ b/examples/README.txt @@ -0,0 +1,121 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +audio + This applies a simple band filter over audio files + +axpy + This computes the AXPY BLAS over a big vector + +basic_examples + This contains very trivial examples: hello world, scaling a vector, etc. + +binary + This shows how to store and load compiled OpenCL kernels on and from the + file system + +callback + This shows how to use task callbacks + +cg + This computes a Conjugate Gradient + +cholesky + This computes a Cholesky factorization + +common + This holds common code for BLAS kernels + +cpp + This shows how to use StarPU from C++ + +filters + This contains several partitioning examples + +fortran90 + This shows how to use StarPU from Fortran90 + +gl_interop + This shows how interoperation can be done between StarPU CUDA + computations and OpenGL rendering + +heat + This uses a finite element method to compute heat propagation thanks to + an LU factorization or a conjugate gradient + +incrementer + This just increments a variable + +interface + This shows how to implement a user-defined data type, here simply + complex floats + +lu + This computes an LU factorization + +mandelbrot + This computes and outputs the mandelbrot set + +matvecmult + This computes a matrix-vector multiplication + +mult + This computes a matrix-matrix multiplication + +openmp + This shows how to use an OpenMP code inside a StarPU parallel task + +pi + This computes Pi thanks to random numbers + +pipeline + This shows how to submit a pipeline to StarPU with limited buffer + use, and avoiding submitted all the tasks at once + +ppm_downscaler + This downscales PPM pictures + +profiling + This examplifies how to get profiling information on executed tasks + +reductions + This examplifies how to use value reductions + +sched_ctx + This examplifies how to use scheduling contexts + +sched_ctx_utils + This is just common code for scheduling contexts + +scheduler + This examplifies how to implement a user-defined scheduler + +spmd + This shows how to define a parallel task + +spmv + This computes a sparse matrix-vector multiplication + +stencil + This computes a dumb 3D stencil with 1D subdomain decomposition + +tag_example + This examplifies how to use tags for dependencies + +top + This examplifies how to enrich StarPU-top with information + +worker_collections + This examplifies how to use worker collections diff --git a/examples/api/bcsr_data_interface.c b/examples/api/bcsr_data_interface.c new file mode 100644 index 0000000..eec0eca --- /dev/null +++ b/examples/api/bcsr_data_interface.c @@ -0,0 +1,36 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +// This program checks that the implementation of the BCSR data +// interface only uses StarPU's public API + +#define starpu_interface_bcsr_ops my_starpu_interface_bcsr_ops +#define starpu_bcsr_data_register my_starpu_bcsr_data_register +#define starpu_bcsr_get_nnz my_starpu_bcsr_get_nnz +#define starpu_bcsr_get_nrow my_starpu_bcsr_get_nrow +#define starpu_bcsr_get_firstentry my_starpu_bcsr_get_firstentry +#define starpu_bcsr_get_r my_starpu_bcsr_get_r +#define starpu_bcsr_get_c my_starpu_bcsr_get_c +#define starpu_bcsr_get_elemsize my_starpu_bcsr_get_elemsize +#define starpu_bcsr_get_local_nzval my_starpu_bcsr_get_local_nzval +#define starpu_bcsr_get_local_colind my_starpu_bcsr_get_local_colind +#define starpu_bcsr_get_local_rowptr my_starpu_bcsr_get_local_rowptr +#include "../../src/datawizard/interfaces/bcsr_interface.c" + +int main() +{ + return 0; +} diff --git a/examples/api/block_data_interface.c b/examples/api/block_data_interface.c new file mode 100644 index 0000000..099cf02 --- /dev/null +++ b/examples/api/block_data_interface.c @@ -0,0 +1,35 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +// This program checks that the implementation of the block data +// interface only uses StarPU's public API + +#define starpu_interface_block_ops my_starpu_interface_block_ops +#define starpu_block_data_register my_starpu_block_data_register +#define starpu_block_ptr_register my_starpu_block_ptr_register +#define starpu_block_get_nx my_starpu_block_get_nx +#define starpu_block_get_ny my_starpu_block_get_ny +#define starpu_block_get_nz my_starpu_block_get_nz +#define starpu_block_get_local_ldy my_starpu_block_get_local_ldy +#define starpu_block_get_local_ldz my_starpu_block_get_local_ldz +#define starpu_block_get_local_ptr my_starpu_block_get_local_ptr +#define starpu_block_get_elemsize my_starpu_block_get_elemsize +#include "../../src/datawizard/interfaces/block_interface.c" + +int main() +{ + return 0; +} diff --git a/examples/api/coo_data_interface.c b/examples/api/coo_data_interface.c new file mode 100644 index 0000000..ea39c80 --- /dev/null +++ b/examples/api/coo_data_interface.c @@ -0,0 +1,27 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +// This program checks that the implementation of the COO data +// interface only uses StarPU's public API + +#define starpu_interface_coo_ops my_starpu_interface_coo_ops +#define starpu_coo_data_register my_starpu_coo_data_register +#include "../../src/datawizard/interfaces/coo_interface.c" + +int main() +{ + return 0; +} diff --git a/examples/api/csr_data_interface.c b/examples/api/csr_data_interface.c new file mode 100644 index 0000000..eed9d25 --- /dev/null +++ b/examples/api/csr_data_interface.c @@ -0,0 +1,34 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +// This program checks that the implementation of the CSR data +// interface only uses StarPU's public API + +#define starpu_interface_csr_ops my_starpu_interface_csr_ops +#define starpu_csr_data_register my_starpu_csr_data_register +#define starpu_csr_get_nnz my_starpu_csr_get_nnz +#define starpu_csr_get_nrow my_starpu_csr_get_nrow +#define starpu_csr_get_firstentry my_starpu_csr_get_firstentry +#define starpu_csr_get_elemsize my_starpu_csr_get_elemsize +#define starpu_csr_get_local_nzval my_starpu_csr_get_local_nzval +#define starpu_csr_get_local_colind my_starpu_csr_get_local_colind +#define starpu_csr_get_local_rowptr my_starpu_csr_get_local_rowptr +#include "../../src/datawizard/interfaces/csr_interface.c" + +int main() +{ + return 0; +} diff --git a/examples/api/matrix_data_interface.c b/examples/api/matrix_data_interface.c new file mode 100644 index 0000000..df98574 --- /dev/null +++ b/examples/api/matrix_data_interface.c @@ -0,0 +1,35 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +// This program checks that the implementation of the matrix data +// interface only uses StarPU's public API + +#define starpu_interface_matrix_ops my_starpu_interface_matrix_ops +#define starpu_matrix_data_register my_starpu_matrix_data_register +#define starpu_matrix_data_register_allocsize my_starpu_matrix_data_register_allocsize +#define starpu_matrix_ptr_register my_starpu_matrix_data_ptr_register +#define starpu_matrix_get_nx my_starpu_matrix_get_nx +#define starpu_matrix_get_ny my_starpu_matrix_get_ny +#define starpu_matrix_get_local_ld my_starpu_matrix_get_local_ld +#define starpu_matrix_get_local_ptr my_starpu_matrix_get_local_ptr +#define starpu_matrix_get_elemsize my_starpu_matrix_get_elemsize +#define starpu_matrix_get_allocsize my_starpu_matrix_get_allocsize +#include "../../src/datawizard/interfaces/matrix_interface.c" + +int main() +{ + return 0; +} diff --git a/examples/api/multiformat_data_interface.c b/examples/api/multiformat_data_interface.c new file mode 100644 index 0000000..78bd821 --- /dev/null +++ b/examples/api/multiformat_data_interface.c @@ -0,0 +1,27 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +// This program checks that the implementation of the multiformat data +// interface only uses StarPU's public API + +#define starpu_interface_multiformat_ops my_starpu_interface_multiformat_ops +#define starpu_multiformat_data_register my_starpu_multiformat_data_register +#include "../../src/datawizard/interfaces/multiformat_interface.c" + +int main() +{ + return 0; +} diff --git a/examples/api/tensor_data_interface.c b/examples/api/tensor_data_interface.c new file mode 100644 index 0000000..7d5b204 --- /dev/null +++ b/examples/api/tensor_data_interface.c @@ -0,0 +1,37 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +// This program checks that the implementation of the tensor data +// interface only uses StarPU's public API + +#define starpu_interface_tensor_ops my_starpu_interface_tensor_ops +#define starpu_tensor_data_register my_starpu_tensor_data_register +#define starpu_tensor_ptr_register my_starpu_tensor_data_ptr_register +#define starpu_tensor_get_nx my_starpu_tensor_get_nx +#define starpu_tensor_get_ny my_starpu_tensor_get_ny +#define starpu_tensor_get_nz my_starpu_tensor_get_nz +#define starpu_tensor_get_nt my_starpu_tensor_get_nt +#define starpu_tensor_get_local_ldy my_starpu_tensor_get_local_ldy +#define starpu_tensor_get_local_ldz my_starpu_tensor_get_local_ldz +#define starpu_tensor_get_local_ldt my_starpu_tensor_get_local_ldt +#define starpu_tensor_get_local_ptr my_starpu_tensor_get_local_ptr +#define starpu_tensor_get_elemsize my_starpu_tensor_get_elemsize +#include "../../src/datawizard/interfaces/tensor_interface.c" + +int main() +{ + return 0; +} diff --git a/examples/api/variable_data_interface.c b/examples/api/variable_data_interface.c new file mode 100644 index 0000000..e98ab72 --- /dev/null +++ b/examples/api/variable_data_interface.c @@ -0,0 +1,30 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +// This program checks that the implementation of the variable data +// interface only uses StarPU's public API + +#define starpu_interface_variable_ops my_starpu_interface_variable_ops +#define starpu_variable_data_register my_starpu_variable_data_register +#define starpu_variable_ptr_register my_starpu_variable_ptr_register +#define starpu_variable_get_local_ptr my_starpu_variable_get_local_ptr +#define starpu_variable_get_elemsize my_starpu_variable_get_elemsize +#include "../../src/datawizard/interfaces/variable_interface.c" + +int main() +{ + return 0; +} diff --git a/examples/api/vector_data_interface.c b/examples/api/vector_data_interface.c new file mode 100644 index 0000000..1b13a00 --- /dev/null +++ b/examples/api/vector_data_interface.c @@ -0,0 +1,33 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +// This program checks that the implementation of the vector data +// interface only uses StarPU's public API + +#define starpu_interface_vector_ops my_starpu_interface_vector_ops +#define starpu_vector_data_register my_starpu_vector_data_register +#define starpu_vector_data_register_allocsize my_starpu_vector_data_register_allocsize +#define starpu_vector_ptr_register my_starpu_vector_data_ptr_register +#define starpu_vector_get_nx my_starpu_vector_get_nx +#define starpu_vector_get_local_ptr my_starpu_vector_get_local_ptr +#define starpu_vector_get_elemsize my_starpu_vector_get_elemsize +#define starpu_vector_get_allocsize my_starpu_vector_get_allocsize +#include "../../src/datawizard/interfaces/vector_interface.c" + +int main() +{ + return 0; +} diff --git a/examples/api/void_data_interface.c b/examples/api/void_data_interface.c new file mode 100644 index 0000000..a949162 --- /dev/null +++ b/examples/api/void_data_interface.c @@ -0,0 +1,27 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +// This program checks that the implementation of the void data +// interface only uses StarPU's public API + +#define starpu_interface_void_ops my_starpu_interface_void_ops +#define starpu_void_data_register my_starpu_void_data_register +#include "../../src/datawizard/interfaces/void_interface.c" + +int main() +{ + return 0; +} diff --git a/examples/axpy/axpy.c b/examples/axpy/axpy.c new file mode 100644 index 0000000..67233eb --- /dev/null +++ b/examples/axpy/axpy.c @@ -0,0 +1,245 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2010-2010 Mehdi Juhoor + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * This creates two dumb vectors, splits them into chunks, and for each pair of + * chunk, run axpy on them. + */ + +#include +#include +#include +#include +#include + +#include + +#ifdef STARPU_USE_CUDA +#include +#endif + +#include "axpy.h" + +#define AXPY STARPU_SAXPY +#define CUBLASAXPY cublasSaxpy + +#define N (16*1024*1024) + +#define NBLOCKS 8 + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +#define EPSILON 1e-6 + +TYPE *_vec_x, *_vec_y; +TYPE _alpha = 3.41; + +/* descriptors for StarPU */ +starpu_data_handle_t _handle_y, _handle_x; + +void axpy_cpu(void *descr[], void *arg) +{ + TYPE alpha = *((TYPE *)arg); + + unsigned n = STARPU_VECTOR_GET_NX(descr[0]); + + TYPE *block_x = (TYPE *)STARPU_VECTOR_GET_PTR(descr[0]); + TYPE *block_y = (TYPE *)STARPU_VECTOR_GET_PTR(descr[1]); + + AXPY((int)n, alpha, block_x, 1, block_y, 1); +} + +#ifdef STARPU_USE_CUDA +void axpy_gpu(void *descr[], void *arg) +{ + TYPE alpha = *((TYPE *)arg); + + unsigned n = STARPU_VECTOR_GET_NX(descr[0]); + + TYPE *block_x = (TYPE *)STARPU_VECTOR_GET_PTR(descr[0]); + TYPE *block_y = (TYPE *)STARPU_VECTOR_GET_PTR(descr[1]); + + cublasStatus_t status = CUBLASAXPY(starpu_cublas_get_local_handle(), (int)n, &alpha, block_x, 1, block_y, 1); + if (status != CUBLAS_STATUS_SUCCESS) + STARPU_CUBLAS_REPORT_ERROR(status); +} +#endif + +#ifdef STARPU_USE_OPENCL +extern void axpy_opencl(void *buffers[], void *args); +#endif + +static struct starpu_perfmodel axpy_model = +{ + .type = STARPU_HISTORY_BASED, + .symbol = "axpy" +}; + +static struct starpu_codelet axpy_cl = +{ + .cpu_funcs = {axpy_cpu}, + .cpu_funcs_name = {"axpy_cpu"}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {axpy_gpu}, +#elif defined(STARPU_SIMGRID) + .cuda_funcs = {(void*)1}, +#endif + .cuda_flags = {STARPU_CUDA_ASYNC}, +#ifdef STARPU_USE_OPENCL + .opencl_funcs = {axpy_opencl}, +#elif defined(STARPU_SIMGRID) + .opencl_funcs = {(void*)1}, +#endif + .opencl_flags = {STARPU_OPENCL_ASYNC}, + .nbuffers = 2, + .modes = {STARPU_R, STARPU_RW}, + .name = "axpy", + .model = &axpy_model +}; + +static int +check(void) +{ + int i; + for (i = 0; i < N; i++) + { + TYPE expected_value = _alpha * _vec_x[i] + 4.0; + if (fabs(_vec_y[i] - expected_value) > expected_value * EPSILON) + { + FPRINTF(stderr,"at %d, %f*%f+%f=%f, expected %f\n", i, _alpha, _vec_x[i], 4.0, _vec_y[i], expected_value); + return EXIT_FAILURE; + } + } + + return EXIT_SUCCESS; +} + +#ifdef STARPU_USE_OPENCL +struct starpu_opencl_program opencl_program; +#endif + +int main(void) +{ + int ret, exit_value = 0; + + /* Initialize StarPU */ + ret = starpu_init(NULL); + if (ret == -ENODEV) + return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + +#ifdef STARPU_USE_OPENCL + ret = starpu_opencl_load_opencl_from_file("examples/axpy/axpy_opencl_kernel.cl", + &opencl_program, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); +#endif + + starpu_cublas_init(); + + /* This is equivalent to + vec_a = malloc(N*sizeof(TYPE)); + vec_b = malloc(N*sizeof(TYPE)); + */ + starpu_malloc((void **)&_vec_x, N*sizeof(TYPE)); + assert(_vec_x); + + starpu_malloc((void **)&_vec_y, N*sizeof(TYPE)); + assert(_vec_y); + + unsigned i; + for (i = 0; i < N; i++) + { + _vec_x[i] = 1.0f; /*(TYPE)starpu_drand48(); */ + _vec_y[i] = 4.0f; /*(TYPE)starpu_drand48(); */ + } + + FPRINTF(stderr, "BEFORE x[0] = %2.2f\n", _vec_x[0]); + FPRINTF(stderr, "BEFORE y[0] = %2.2f\n", _vec_y[0]); + + /* Declare the data to StarPU */ + starpu_vector_data_register(&_handle_x, STARPU_MAIN_RAM, (uintptr_t)_vec_x, N, sizeof(TYPE)); + starpu_vector_data_register(&_handle_y, STARPU_MAIN_RAM, (uintptr_t)_vec_y, N, sizeof(TYPE)); + + /* Divide the vector into blocks */ + struct starpu_data_filter block_filter = + { + .filter_func = starpu_vector_filter_block, + .nchildren = NBLOCKS + }; + + starpu_data_partition(_handle_x, &block_filter); + starpu_data_partition(_handle_y, &block_filter); + + double start; + double end; + + start = starpu_timing_now(); + + unsigned b; + for (b = 0; b < NBLOCKS; b++) + { + struct starpu_task *task = starpu_task_create(); + + task->cl = &axpy_cl; + + task->cl_arg = &_alpha; + task->cl_arg_size = sizeof(_alpha); + + task->handles[0] = starpu_data_get_sub_data(_handle_x, 1, b); + task->handles[1] = starpu_data_get_sub_data(_handle_y, 1, b); + + task->tag_id = b; + + ret = starpu_task_submit(task); + if (ret == -ENODEV) + { + exit_value = 77; + goto enodev; + } + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + starpu_task_wait_for_all(); + +enodev: + starpu_data_unpartition(_handle_x, STARPU_MAIN_RAM); + starpu_data_unpartition(_handle_y, STARPU_MAIN_RAM); + starpu_data_unregister(_handle_x); + starpu_data_unregister(_handle_y); + + end = starpu_timing_now(); + double timing = end - start; + + FPRINTF(stderr, "timing -> %2.2f us %2.2f MB/s\n", timing, 3*N*sizeof(TYPE)/timing); + + FPRINTF(stderr, "AFTER y[0] = %2.2f (ALPHA = %2.2f)\n", _vec_y[0], _alpha); + + if (exit_value != 77) + exit_value = check(); + + starpu_free_noflag((void *)_vec_x, N*sizeof(TYPE)); + starpu_free_noflag((void *)_vec_y, N*sizeof(TYPE)); + +#ifdef STARPU_USE_OPENCL + ret = starpu_opencl_unload_opencl(&opencl_program); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl"); +#endif + /* Stop StarPU */ + starpu_shutdown(); + + return exit_value; +} diff --git a/examples/axpy/axpy.h b/examples/axpy/axpy.h new file mode 100644 index 0000000..19c0661 --- /dev/null +++ b/examples/axpy/axpy.h @@ -0,0 +1,24 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2010-2010 Mehdi Juhoor + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef AXPY_H__ +#define AXPY_H__ + +#define TYPE float + +#endif /* AXPY_H__ */ + diff --git a/examples/axpy/axpy_opencl.c b/examples/axpy/axpy_opencl.c new file mode 100644 index 0000000..70e6476 --- /dev/null +++ b/examples/axpy/axpy_opencl.c @@ -0,0 +1,75 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* OpenCL codelet for axpy */ + +#include +#include "axpy.h" + +extern struct starpu_opencl_program opencl_program; + +void axpy_opencl(void *buffers[], void *_args) +{ + TYPE *alpha = _args; + int id, devid; + cl_int err; + cl_kernel kernel; + cl_command_queue queue; + + unsigned n = STARPU_VECTOR_GET_NX(buffers[0]); + cl_mem x = (cl_mem) STARPU_VECTOR_GET_DEV_HANDLE(buffers[0]); + unsigned x_offset = STARPU_VECTOR_GET_OFFSET(buffers[0]); + cl_mem y = (cl_mem) STARPU_VECTOR_GET_DEV_HANDLE(buffers[1]); + unsigned y_offset = STARPU_VECTOR_GET_OFFSET(buffers[1]); + + id = starpu_worker_get_id_check(); + devid = starpu_worker_get_devid(id); + + err = starpu_opencl_load_kernel(&kernel, &queue, &opencl_program, "_axpy_opencl", devid); + if (err != CL_SUCCESS) + STARPU_OPENCL_REPORT_ERROR(err); + + err = clSetKernelArg(kernel, 0, sizeof(x), &x); + err|= clSetKernelArg(kernel, 1, sizeof(x_offset), &x_offset); + err|= clSetKernelArg(kernel, 2, sizeof(y), &y); + err|= clSetKernelArg(kernel, 3, sizeof(y_offset), &y_offset); + err|= clSetKernelArg(kernel, 4, sizeof(n), &n); + err|= clSetKernelArg(kernel, 5, sizeof(*alpha), alpha); + if (err) + STARPU_OPENCL_REPORT_ERROR(err); + + { + size_t global=n; + size_t local; + size_t s; + cl_device_id device; + + starpu_opencl_get_device(devid, &device); + + err = clGetKernelWorkGroupInfo (kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, &s); + if (err != CL_SUCCESS) + STARPU_OPENCL_REPORT_ERROR(err); + if (local > global) + local=global; + else + global = (global + local-1) / local * local; + + err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, NULL); + if (err != CL_SUCCESS) + STARPU_OPENCL_REPORT_ERROR(err); + } + starpu_opencl_release_kernel(kernel); +} diff --git a/examples/axpy/axpy_opencl_kernel.cl b/examples/axpy/axpy_opencl_kernel.cl new file mode 100644 index 0000000..ebe31f7 --- /dev/null +++ b/examples/axpy/axpy_opencl_kernel.cl @@ -0,0 +1,33 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* OpenCL kernel implementing axpy */ + +#include "axpy.h" + +__kernel void _axpy_opencl(__global TYPE *x, + unsigned x_offset, + __global TYPE *y, + unsigned y_offset, + unsigned nx, + TYPE alpha) +{ + const int i = get_global_id(0); + x = (__global char*) x + x_offset; + y = (__global char*) y + y_offset; + if (i < nx) + y[i] = alpha * x[i] + y[i]; +} diff --git a/examples/basic_examples/block.c b/examples/basic_examples/block.c new file mode 100644 index 0000000..b48efdf --- /dev/null +++ b/examples/basic_examples/block.c @@ -0,0 +1,146 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +extern void cpu_codelet(void *descr[], void *_args); +#ifdef STARPU_USE_CUDA +extern void cuda_codelet(void *descr[], void *_args); +#endif +#ifdef STARPU_USE_HIP +extern void hip_codelet(void *descr[], void *_args); +#endif +#ifdef STARPU_USE_OPENCL +extern void opencl_codelet(void *descr[], void *_args); +struct starpu_opencl_program opencl_code; +#endif + +typedef void (*device_func)(void **, void *); + +int execute_on(uint32_t where, device_func func, float *block, int pnx, int pny, int pnz, float multiplier) +{ + struct starpu_codelet cl; + starpu_data_handle_t block_handle; + int i; + + starpu_block_data_register(&block_handle, STARPU_MAIN_RAM, (uintptr_t)block, pnx, pnx*pny, pnx, pny, pnz, sizeof(float)); + + starpu_codelet_init(&cl); + cl.where = where; + cl.cuda_funcs[0] = func; + cl.hip_funcs[0] = func; + cl.cpu_funcs[0] = func; + cl.opencl_funcs[0] = func; + cl.nbuffers = 1; + cl.modes[0] = STARPU_RW, + cl.model = NULL; + cl.name = "block_scale"; + + struct starpu_task *task = starpu_task_create(); + task->cl = &cl; + task->callback_func = NULL; + task->handles[0] = block_handle; + task->cl_arg = &multiplier; + task->cl_arg_size = sizeof(multiplier); + + int ret = starpu_task_submit(task); + if (STARPU_UNLIKELY(ret == -ENODEV)) + { + FPRINTF(stderr, "No worker may execute this task\n"); + task->destroy = 0; + starpu_task_destroy(task); + return 1; + } + + starpu_task_wait_for_all(); + + /* update the array in RAM */ + starpu_data_unregister(block_handle); + + for(i=0 ; i + +void cpu_codelet(void *descr[], void *_args) +{ + float *block = (float *)STARPU_BLOCK_GET_PTR(descr[0]); + int nx = (int)STARPU_BLOCK_GET_NX(descr[0]); + int ny = (int)STARPU_BLOCK_GET_NY(descr[0]); + int nz = (int)STARPU_BLOCK_GET_NZ(descr[0]); + unsigned ldy = STARPU_BLOCK_GET_LDY(descr[0]); + unsigned ldz = STARPU_BLOCK_GET_LDZ(descr[0]); + float *multiplier = (float *)_args; + int i, j, k; + + for(k=0; k + +static __global__ void cuda_block(float *block, int nx, int ny, int nz, unsigned ldy, unsigned ldz, float multiplier) +{ + int i, j, k; + for(k=0; k>>(block, nx, ny, nz, ldy, ldz, *multiplier); + cudaError_t status = cudaGetLastError(); + if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status); + cudaStreamSynchronize(starpu_cuda_get_local_stream()); +} diff --git a/examples/basic_examples/block_hip.hip b/examples/basic_examples/block_hip.hip new file mode 100644 index 0000000..3dd6121 --- /dev/null +++ b/examples/basic_examples/block_hip.hip @@ -0,0 +1,47 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +static __global__ void hip_block(float *block, int nx, int ny, int nz, unsigned ldy, unsigned ldz, float multiplier) +{ + int i, j, k; + for(k=0; k + +#define CHECK_CL_SET_KERNEL_ARG(kernel, n, size, ptr) \ +do \ +{ \ + int check_err; \ + check_err = clSetKernelArg(kernel, n, size, ptr); \ + if (check_err != CL_SUCCESS) \ + STARPU_OPENCL_REPORT_ERROR(check_err); \ +} while (0) + +extern struct starpu_opencl_program opencl_code; + +void opencl_codelet(void *descr[], void *_args) +{ + cl_kernel kernel; + cl_command_queue queue; + cl_event event; + int id, devid, err; + cl_mem block = (cl_mem)STARPU_BLOCK_GET_DEV_HANDLE(descr[0]); + int nx = (int)STARPU_BLOCK_GET_NX(descr[0]); + int ny = (int)STARPU_BLOCK_GET_NY(descr[0]); + int nz = (int)STARPU_BLOCK_GET_NZ(descr[0]); + int ldy = (int)STARPU_BLOCK_GET_LDY(descr[0]); + int ldz = (int) STARPU_BLOCK_GET_LDZ(descr[0]); + float *multiplier = (float *)_args; + + id = starpu_worker_get_id_check(); + devid = starpu_worker_get_devid(id); + + err = starpu_opencl_load_kernel(&kernel, &queue, &opencl_code, "block", devid); + if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); + + CHECK_CL_SET_KERNEL_ARG(kernel, 0, sizeof(block), &block); + CHECK_CL_SET_KERNEL_ARG(kernel, 1, sizeof(nx), &nx); + CHECK_CL_SET_KERNEL_ARG(kernel, 2, sizeof(ny), &ny); + CHECK_CL_SET_KERNEL_ARG(kernel, 3, sizeof(nz), &nz); + CHECK_CL_SET_KERNEL_ARG(kernel, 4, sizeof(ldy), &ldy); + CHECK_CL_SET_KERNEL_ARG(kernel, 5, sizeof(ldz), &ldz); + CHECK_CL_SET_KERNEL_ARG(kernel, 6, sizeof(*multiplier), multiplier); + + { + size_t global=nx*ny*nz; + err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, NULL, 0, NULL, &event); + if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); + } + + clFinish(queue); + starpu_opencl_collect_stats(event); + clReleaseEvent(event); + + starpu_opencl_release_kernel(kernel); +} + diff --git a/examples/basic_examples/block_opencl_kernel.cl b/examples/basic_examples/block_opencl_kernel.cl new file mode 100644 index 0000000..0f3c852 --- /dev/null +++ b/examples/basic_examples/block_opencl_kernel.cl @@ -0,0 +1,22 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +__kernel void block(__global float *b, int nx, int ny, int nz, int ldy, int ldz, float multiplier) +{ + const int i = get_global_id(0); + if (i < (nz*ldz)+(ny*ldy)+nx) + b[i] = b[i] * multiplier; +} diff --git a/examples/basic_examples/dynamic_handles.c b/examples/basic_examples/dynamic_handles.c new file mode 100644 index 0000000..2100e65 --- /dev/null +++ b/examples/basic_examples/dynamic_handles.c @@ -0,0 +1,203 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +void dummy_small_kernel(void *descr[], void *cl_arg) +{ + int nb_data; + int i; + + starpu_codelet_unpack_args(cl_arg, &nb_data); + assert(nb_data == 1); + FPRINTF(stderr, "Number of data: %d\n", nb_data); + + for(i=0 ; isynchronous = 1; + task->cl = &dummy_small_cl; + starpu_codelet_pack_args(&task->cl_arg, &task->cl_arg_size, + STARPU_VALUE, &(task->cl->nbuffers), sizeof(task->cl->nbuffers), + 0); + task->dyn_handles = malloc(sizeof(*task->dyn_handles)); + task->dyn_handles[0] = handle; + task->cl_arg_free = 1; + ret = starpu_task_submit(task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + + /* This tests a large constant number of arguments with starpu_task_submit */ + task2 = starpu_task_create(); + task2->synchronous = 1; + task2->cl = &dummy_big_cl; + task2->cl_arg_free = 1; + starpu_codelet_pack_args(&task2->cl_arg, &task2->cl_arg_size, + STARPU_VALUE, &(task2->cl->nbuffers), sizeof(task2->cl->nbuffers), + 0); + task2->dyn_handles = malloc(task2->cl->nbuffers * sizeof(*(task2->dyn_handles))); + task2->dyn_modes = malloc(task2->cl->nbuffers * sizeof(*(task2->dyn_modes))); + for(i=0 ; icl->nbuffers ; i++) + { + task2->dyn_handles[i] = handle; + task2->dyn_modes[i] = STARPU_RW; + } + ret = starpu_task_submit(task2); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + + /* This tests a large variable number of arguments with starpu_task_submit */ + task3 = starpu_task_create(); + task3->synchronous = 1; + task3->cl = &dummy_variable_cl; + task3->cl_arg_free = 1; + starpu_codelet_pack_args(&task3->cl_arg, &task3->cl_arg_size, + STARPU_VALUE, &(dummy_big_cl.nbuffers), sizeof(dummy_big_cl.nbuffers), + 0); + task3->dyn_handles = malloc(dummy_big_cl.nbuffers * sizeof(*(task3->dyn_handles))); + task3->dyn_modes = malloc(dummy_big_cl.nbuffers * sizeof(*(task3->dyn_modes))); + task3->nbuffers = dummy_big_cl.nbuffers; + for(i=0 ; idyn_handles[i] = handle; + task3->dyn_modes[i] = STARPU_RW; + } + ret = starpu_task_submit(task3); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + + /* This tests a small number of arguments with starpu_task_insert */ + ret = starpu_task_insert(&dummy_small_cl, + STARPU_VALUE, &(dummy_small_cl.nbuffers), sizeof(dummy_small_cl.nbuffers), + STARPU_RW, handle, + 0); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + ret = starpu_task_wait_for_all(); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); + + + /* This tests a large constant number of arguments with starpu_task_insert */ + descrs = malloc(dummy_big_cl.nbuffers * sizeof(struct starpu_data_descr)); + for(i=0 ; icl_arg) + * - how to declare a callback function that is called once the task has been + * executed + * - how to specify if starpu_task_submit is a blocking or non-blocking + * operation (task->synchronous) + */ + +#include +#include +#include + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +/* When the task is done, task->callback_func(task->callback_arg) is called. Any + * callback function must have the prototype void (*)(void *). + * NB: Callback are NOT allowed to perform potentially blocking operations */ +void callback_func(void *callback_arg) +{ + FPRINTF(stdout, "Callback function got argument %p\n", callback_arg); +} + +/* Every implementation of a codelet must have this prototype, the first + * argument (buffers) describes the buffers/streams that are managed by the + * DSM; the second arguments references read-only data that is passed as an + * argument of the codelet (task->cl_arg). Here, "buffers" is unused as there + * are no data input/output managed by the DSM (cl.nbuffers = 0) */ +struct params +{ + int i; + float f; +}; + +void cpu_func(void *buffers[], void *cl_arg) +{ + (void)buffers; + struct params *params = (struct params *) cl_arg; + + FPRINTF(stdout, "Hello world (params = {%i, %f})\n", params->i, params->f); +} + +int main(void) +{ + struct starpu_codelet cl; + struct starpu_task *task; + struct params params = {1, 2.0f}; + int ret; + + /* initialize StarPU : passing a NULL argument means that we use + * default configuration for the scheduling policies and the number of + * processors/accelerators */ + ret = starpu_init(NULL); + if (ret == -ENODEV) + return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + /* create a new task that is non-blocking by default : the task is not + * submitted to the scheduler until the starpu_task_submit function is + * called */ + task = starpu_task_create(); + + starpu_codelet_init(&cl); + /* this codelet may only be executed on a CPU, and its cpu + * implementation is function "cpu_func" */ + cl.cpu_funcs[0] = cpu_func; + cl.cpu_funcs_name[0] = "cpu_func"; + /* the codelet does not manipulate any data that is managed + * by our DSM */ + cl.nbuffers = 0; + cl.name="hello"; + + /* the task uses codelet "cl" */ + task->cl = &cl; + + /* It is possible to pass buffers that are not managed by the DSM to the + * kernels: the second argument of the "cpu_func" function is a pointer to a + * buffer that contains information for the codelet (cl_arg stands for + * codelet argument). In the case of accelerators, it is possible that + * the codelet is given a pointer to a copy of that buffer: this buffer + * is read-only so that any modification is not passed to other copies + * of the buffer. For this reason, a buffer passed as a codelet + * argument (cl_arg) is NOT a valid synchronization medium! */ + task->cl_arg = ¶ms; + task->cl_arg_size = sizeof(params); + + /* once the task has been executed, callback_func(0x42) + * will be called on a CPU */ + task->callback_func = callback_func; + task->callback_arg = (void*) (uintptr_t) 0x42; + + /* starpu_task_submit will be a blocking call */ + task->synchronous = 1; + + /* submit the task to StarPU */ + ret = starpu_task_submit(task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + /* terminate StarPU: statistics and other debug outputs are not + * guaranteed to be generated unless this function is called. Once it + * is called, it is not possible to submit tasks anymore, and the user + * is responsible for making sure all tasks have already been executed: + * calling starpu_shutdown() before the termination of all the tasks + * results in an undefined behaviour */ + starpu_shutdown(); + + return 0; + +enodev: + starpu_shutdown(); + return 77; +} diff --git a/examples/basic_examples/hooks.c b/examples/basic_examples/hooks.c new file mode 100644 index 0000000..4ce4b46 --- /dev/null +++ b/examples/basic_examples/hooks.c @@ -0,0 +1,62 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2023-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +#define NX 21 + +static int check_malloc = 0; +static int check_free = 0; + +int malloc_hook(unsigned dst_node, void **A, size_t dim, int flags) +{ + int ret = 0; + + *A = malloc(dim); + + if (!*A) + ret = -ENOMEM; + + check_malloc++; + + return ret; +} + +int free_hook(unsigned dst_node, void *A, size_t dim, int flags) +{ + free(A); + check_free++; + + return 0; +} + +int main(void) +{ + int* vector; + int ret; + + ret = starpu_init(NULL); + if (ret == -ENODEV) return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + starpu_malloc_set_hooks(malloc_hook, free_hook); + starpu_malloc((void **)&vector, NX*sizeof(int)); + starpu_free_noflag(vector, NX*sizeof(int)); + + STARPU_ASSERT(check_malloc == 1 && check_free == 1); + + starpu_shutdown(); +} diff --git a/examples/basic_examples/mult.c b/examples/basic_examples/mult.c new file mode 100644 index 0000000..6cee03e --- /dev/null +++ b/examples/basic_examples/mult.c @@ -0,0 +1,467 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2010-2010 Mehdi Juhoor + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * This example shows a simple implementation of a blocked matrix + * multiplication. Note that this is NOT intended to be an efficient + * implementation of sgemm! In this example, we show: + * - how to declare dense matrices (starpu_matrix_data_register) + * - how to manipulate matrices within codelets (eg. descr[0].blas.ld) + * - how to use filters to partition the matrices into blocks + * (starpu_data_partition and starpu_data_map_filters) + * - how to unpartition data (starpu_data_unpartition) and how to stop + * monitoring data (starpu_data_unregister) + * - how to manipulate subsets of data (starpu_data_get_sub_data) + * - how to construct an autocalibrated performance model (starpu_perfmodel) + * - how to submit asynchronous tasks + */ + +#include +#include +#include +#include + +#include + +static float *A, *B, *C, *Cref; +static starpu_data_handle_t A_handle, B_handle, C_handle; + +static unsigned nslicesx = 4; +static unsigned nslicesy = 4; +#ifdef STARPU_QUICK_CHECK +static unsigned xdim = 512; +static unsigned ydim = 512; +static unsigned zdim = 256; +#else +static unsigned xdim = 1024; +static unsigned ydim = 1024; +static unsigned zdim = 512; +#endif + +extern void hip_mult(void *descr[], void *arg); +extern void cuda_mult(void *descr[], void *arg); + +/* + * That program should compute C = A * B + * + * A of size (z,y) + * B of size (x,z) + * C of size (x,y) + * + * |---------------| + * z | B | + * |---------------| + * z x + * |----| |---------------| + * | | | | + * | | | | + * | A | y | C | + * | | | | + * | | | | + * |----| |---------------| + * + * Note: we use FORTRAN ordering. + */ + +/* + * The codelet is passed 3 matrices, the "descr" union-type field gives a + * description of the layout of those 3 matrices in the local memory (ie. RAM + * in the case of CPU, GPU frame buffer in the case of GPU etc.). Since we have + * registered data with the "matrix" data interface, we use the matrix macros. + */ + +void cpu_mult(void *descr[], void *arg) +{ + (void)arg; + float *subA, *subB, *subC; + uint32_t nxC, nyC, nyA; + uint32_t ldA, ldB, ldC; + + /* ptr gives a pointer to the first element of the local copy */ + subA = (float *)STARPU_MATRIX_GET_PTR(descr[0]); + subB = (float *)STARPU_MATRIX_GET_PTR(descr[1]); + subC = (float *)STARPU_MATRIX_GET_PTR(descr[2]); + + /* + * Note: STARPU_MATRIX_GET_NX/NY is different from X/Y of the FORTRAN + * ordering: + * - nx is the number of consecutive elements (thus the number of rows + * in FORTRAN order) + * - ny is the number of series that are separated by ld elements (thus + * the number of columns in FORTRAN order) + * - ld stands for leading dimension + * + * NB: in case some filters were used, the leading dimension is not + * guaranteed to be the same in main memory (on the original matrix) + * and on the accelerator! */ + nxC = STARPU_MATRIX_GET_NX(descr[2]); + nyC = STARPU_MATRIX_GET_NY(descr[2]); + nyA = STARPU_MATRIX_GET_NY(descr[0]); + + ldA = STARPU_MATRIX_GET_LD(descr[0]); + ldB = STARPU_MATRIX_GET_LD(descr[1]); + ldC = STARPU_MATRIX_GET_LD(descr[2]); + + /* we use a FORTRAN-ordering! */ + unsigned i,j,k; + for (i = 0; i < nyC; i++) /* iterate over columns of C */ + { + for (j = 0; j < nxC; j++) /* iterate over rows of C */ + { + float sum = 0.0; + + for (k = 0; k < nyA; k++) + { + sum += subA[j+k*ldA]*subB[k+i*ldB]; + } + + subC[j + i*ldC] = sum; + } + } +} + +static void init_problem_data(void) +{ + unsigned i,j; + + /* we initialize matrices A, B and C in the usual way */ + + starpu_malloc_flags((void **)&A, zdim*ydim*sizeof(float), STARPU_MALLOC_PINNED|STARPU_MALLOC_SIMULATION_FOLDED); + starpu_malloc_flags((void **)&B, xdim*zdim*sizeof(float), STARPU_MALLOC_PINNED|STARPU_MALLOC_SIMULATION_FOLDED); + starpu_malloc_flags((void **)&C, xdim*ydim*sizeof(float), STARPU_MALLOC_PINNED|STARPU_MALLOC_SIMULATION_FOLDED); + + Cref = (float *) malloc(xdim*ydim*sizeof(float)); + assert(A); + assert(B); + assert(C); + assert(Cref); + + /* fill the A and B matrices */ + starpu_srand48(2009); + for (j=0; j < ydim; j++) + { + for (i=0; i < zdim; i++) + { + A[j+i*ydim] = (float)(starpu_drand48()); + } + } + + for (j=0; j < zdim; j++) + { + for (i=0; i < xdim; i++) + { + B[j+i*zdim] = (float)(starpu_drand48()); + } + } + + for (j=0; j < ydim; j++) + { + for (i=0; i < xdim; i++) + { + C[j+i*ydim] = (float)(0); + Cref[j+i*ydim] = (float)(0); + } + } +} + +static void partition_mult_data(void) +{ + /* note that we assume a FORTRAN ordering here! */ + + /* The BLAS data interface is described by 4 parameters: + * - the location of the first element of the matrix to monitor (3rd + * argument) + * - the number of elements between columns, aka leading dimension + * (4th arg) + * - the number of (contiguous) elements per column, ie. contiguous + * elements (5th arg) + * - the number of columns (6th arg) + * The first elements is a pointer to the data_handle that will be + * associated to the matrix, and the second elements gives the memory + * node in which resides the matrix: 0 means that the 3rd argument is + * an address in main memory. + */ + starpu_matrix_data_register(&A_handle, STARPU_MAIN_RAM, (uintptr_t)A, + ydim, ydim, zdim, sizeof(float)); + starpu_matrix_data_register(&B_handle, STARPU_MAIN_RAM, (uintptr_t)B, + zdim, zdim, xdim, sizeof(float)); + starpu_matrix_data_register(&C_handle, STARPU_MAIN_RAM, (uintptr_t)C, + ydim, ydim, xdim, sizeof(float)); + + /* A filter is a method to partition a data into disjoint chunks, it is + * described by the means of the "struct starpu_data_filter" structure that + * contains a function that is applied on a data handle to partition it + * into smaller chunks, and an argument that is passed to the function + * (eg. the number of blocks to create here). + */ + + /* StarPU supplies some basic filters such as the partition of a matrix + * into blocks, note that we are using a FORTRAN ordering so that the + * name of the filters are a bit misleading */ + struct starpu_data_filter vert = + { + .filter_func = starpu_matrix_filter_vertical_block, + .nchildren = nslicesx + }; + + struct starpu_data_filter horiz = + { + .filter_func = starpu_matrix_filter_block, + .nchildren = nslicesy + }; + +/* + * Illustration with nslicex = 4 and nslicey = 2, it is possible to access + * sub-data by using the "starpu_data_get_sub_data" method, which takes a data handle, + * the number of filters to apply, and the indexes for each filters, for + * instance: + * + * A' handle is starpu_data_get_sub_data(A_handle, 1, 1); + * B' handle is starpu_data_get_sub_data(B_handle, 1, 2); + * C' handle is starpu_data_get_sub_data(C_handle, 2, 2, 1); + * + * Note that here we applied 2 filters recursively onto C. + * + * "starpu_data_get_sub_data(C_handle, 1, 3)" would return a handle to the 4th column + * of blocked matrix C for example. + * + * |---|---|---|---| + * | | | B'| | B + * |---|---|---|---| + * 0 1 2 3 + * |----| |---|---|---|---| + * | | | | | | | + * | | 0 | | | | | + * |----| |---|---|---|---| + * | A' | | | | C'| | + * | | | | | | | + * |----| |---|---|---|---| + * A C + * + * IMPORTANT: applying filters is equivalent to partitioning a piece of + * data in a hierarchical manner, so that memory consistency is enforced + * for each of the elements independently. The tasks should therefore NOT + * access inner nodes (eg. one column of C or the whole C) but only the + * leafs of the tree (ie. blocks here). Manipulating inner nodes is only + * possible by disapplying the filters (using starpu_data_unpartition), to + * enforce memory consistency. + */ + + starpu_data_partition(B_handle, &vert); + starpu_data_partition(A_handle, &horiz); + + /* starpu_data_map_filters is a variable-arity function, the first argument + * is the handle of the data to partition, the second argument is the + * number of filters to apply recursively. Filters are applied in the + * same order as the arguments. + * This would be equivalent to starpu_data_partition(C_handle, &vert) and + * then applying horiz on each sub-data (ie. each column of C) + */ + starpu_data_map_filters(C_handle, 2, &vert, &horiz); +} + +static struct starpu_perfmodel mult_perf_model = +{ + .type = STARPU_HISTORY_BASED, + .symbol = "mult_perf_model" +}; + +static struct starpu_codelet cl = +{ + /* CPU implementation of the codelet */ + .cpu_funcs = {cpu_mult}, + .cpu_funcs_name = {"cpu_mult"}, +#ifdef STARPU_USE_HIP + /* HIP implementation of the codelet */ + .hip_funcs = {hip_mult}, + .hip_flags = {STARPU_HIP_ASYNC}, +#endif +#ifdef STARPU_USE_CUDA + /* CUDA implementation of the codelet */ + .cuda_funcs = {cuda_mult}, + .cuda_flags = {STARPU_CUDA_ASYNC}, + .where = STARPU_CUDA, +#endif + /* the codelet manipulates 3 buffers that are managed by the DSM */ + .nbuffers = 3, + .modes = {STARPU_R, STARPU_R, STARPU_W}, + /* in case the scheduling policy may use performance models */ + .model = &mult_perf_model +}; + +static int launch_tasks(void) +{ + int ret; + /* partition the work into slices */ + unsigned taskx, tasky; + + for (taskx = 0; taskx < nslicesx; taskx++) + { + for (tasky = 0; tasky < nslicesy; tasky++) + { + /* C[taskx, tasky] = A[tasky] B[taskx] */ + + /* by default, starpu_task_create() returns an + * asynchronous task (ie. task->synchronous = 0) */ + struct starpu_task *task = starpu_task_create(); + + /* this task implements codelet "cl" */ + task->cl = &cl; + + /* + * |---|---|---|---| + * | | * | | | B + * |---|---|---|---| + * X + * |----| |---|---|---|---| + * |****| Y | |***| | | + * |****| | |***| | | + * |----| |---|---|---|---| + * | | | | | | | + * | | | | | | | + * |----| |---|---|---|---| + * A C + */ + + /* there was a single filter applied to matrices A + * (respectively B) so we grab the handle to the chunk + * identified by "tasky" (respectively "taskx). The "1" + * tells StarPU that there is a single argument to the + * variable-arity function starpu_data_get_sub_data */ + task->handles[0] = starpu_data_get_sub_data(A_handle, 1, tasky); + task->handles[1] = starpu_data_get_sub_data(B_handle, 1, taskx); + + /* 2 filters were applied on matrix C, so we give + * starpu_data_get_sub_data 2 arguments. The order of the arguments + * must match the order in which the filters were + * applied. + * NB: starpu_data_get_sub_data(C_handle, 1, k) would have returned + * a handle to the column number k of matrix C. + * NB2: starpu_data_get_sub_data(C_handle, 2, taskx, tasky) is + * equivalent to + * starpu_data_get_sub_data(starpu_data_get_sub_data(C_handle, 1, taskx), 1, tasky)*/ + task->handles[2] = starpu_data_get_sub_data(C_handle, 2, taskx, tasky); + + /* this is not a blocking call since task->synchronous = 0 */ + ret = starpu_task_submit(task); + if (ret == -ENODEV) return ret; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + } + return 0; +} + +void check_result(float* C_gpu, float* C_ref, uint32_t ldC) +{ + unsigned i,j; + for (i = 0; i < ydim; i++) + { + for (j = 0; j < xdim; j++) + { + if(C_gpu[j + i*ldC]-C_ref[j + i*ldC] > 1e-6*C_ref[j + i*ldC]) + { + printf("| Cref[%u,%u]=%f - Cgpu[%u,%u]=%f | Error in the computation of C: the difference between the two is bigger than 1e-6 * the reference" + , i, j, C_ref[j + i*ldC], i, j, C_gpu[j + i*ldC]); + exit(1); + } + } + } + printf("SUCCESSFUL COMPUTATION\n"); +} + +int main(void) +{ + int ret; + + /* start the runtime */ + ret = starpu_init(NULL); + if (ret == -ENODEV) + return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + /* initialize matrices A, B and C and register them to StarPU */ + init_problem_data(); + + /* partition matrices into blocks that can be manipulated by the + * codelets */ + partition_mult_data(); + + /* submit all tasks in an asynchronous fashion */ + ret = launch_tasks(); + if (ret == -ENODEV) goto enodev; + + /* cpu compution to check */ + /* ============================================= */ + + uint32_t ldA = ydim; + uint32_t ldB = zdim; + uint32_t ldC = ydim; + + unsigned i,j,k; + for (i = 0; i < ydim; i++) + { + for (j = 0; j < xdim; j++) + { + float sum = 0.0; + + for (k = 0; k < zdim; k++) + { + sum += A[j+k*ldA]*B[k+i*ldB]; + } + + Cref[j + i*ldC] = sum; + } + } + + /* ============================================= */ + /* wait for termination */ + starpu_task_wait_for_all(); + + /* remove the filters applied by the means of starpu_data_map_filters; now + * it's not possible to manipulate a subset of C using starpu_data_get_sub_data until + * starpu_data_map_filters is called again on C_handle. + * The second argument is the memory node where the different subsets + * should be reassembled, 0 = main memory (RAM) */ + starpu_data_unpartition(A_handle, STARPU_MAIN_RAM); + starpu_data_unpartition(B_handle, STARPU_MAIN_RAM); + starpu_data_unpartition(C_handle, STARPU_MAIN_RAM); + + /* stop monitoring matrix C : after this, it is not possible to pass C + * (or any subset of C) as a codelet input/output. This also implements + * a barrier so that the piece of data is put back into main memory in + * case it was only available on a GPU for instance. */ + starpu_data_unregister(A_handle); + starpu_data_unregister(B_handle); + starpu_data_unregister(C_handle); + + /* Comment to remove printing of results */ + check_result(C, Cref, ldC); + + starpu_free_flags(A, zdim*ydim*sizeof(float), STARPU_MALLOC_PINNED|STARPU_MALLOC_SIMULATION_FOLDED); + starpu_free_flags(B, xdim*zdim*sizeof(float), STARPU_MALLOC_PINNED|STARPU_MALLOC_SIMULATION_FOLDED); + starpu_free_flags(C, xdim*ydim*sizeof(float), STARPU_MALLOC_PINNED|STARPU_MALLOC_SIMULATION_FOLDED); + + free(Cref); + + starpu_shutdown(); + + return 0; + +enodev: + starpu_shutdown(); + return 77; +} diff --git a/examples/basic_examples/mult_cuda.cu b/examples/basic_examples/mult_cuda.cu new file mode 100644 index 0000000..76b9b55 --- /dev/null +++ b/examples/basic_examples/mult_cuda.cu @@ -0,0 +1,131 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2010-2010 Mehdi Juhoor + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * This example shows a simple implementation of a blocked matrix + * multiplication. Note that this is NOT intended to be an efficient + * implementation of sgemm! In this example, we show: + * - how to declare dense matrices (starpu_matrix_data_register) + * - how to manipulate matrices within codelets (eg. descr[0].blas.ld) + * - how to use filters to partition the matrices into blocks + * (starpu_data_partition and starpu_data_map_filters) + * - how to unpartition data (starpu_data_unpartition) and how to stop + * monitoring data (starpu_data_unregister) + * - how to manipulate subsets of data (starpu_data_get_sub_data) + * - how to construct an autocalibrated performance model (starpu_perfmodel) + * - how to submit asynchronous tasks + */ + +#include +#include +#include +#include +#include + +#define THREADS_PER_BLOCK 256 + +/* + * That program should compute C = A * B + * + * A of size (z,y) + * B of size (x,z) + * C of size (x,y) + + |---------------| + z | B | + |---------------| + z x + |----| |---------------| + | | | | + | | | | + | A | y | C | + | | | | + | | | | + |----| |---------------| + + * Note: we use FORTRAN ordering. + */ + +/* + * The codelet is passed 3 matrices, the "descr" union-type field gives a + * description of the layout of those 3 matrices in the local memory (ie. RAM + * in the case of CPU, GPU frame buffer in the case of GPU etc.). Since we have + * registered data with the "matrix" data interface, we use the matrix macros. + */ +static __global__ void cuda_mult_kernel(uint32_t nxC, uint32_t nyC, uint32_t nyA, + uint32_t ldA, uint32_t ldB, uint32_t ldC, + float * subA, float * subB, float * subC ) +{ + uint32_t id, i, j, k; + float sum; + id = blockIdx.x * blockDim.x + threadIdx.x; + i = id % nxC; + j = id / nxC; + if (j >= nyC) + { + return; + } + sum = 0.; + for (k = 0 ; k < nyA ; k++) + { + sum += subA[i + k*ldA] * subB[k + j*ldB]; + } + subC[i + j*ldC] = sum; +} + +extern "C" void cuda_mult(void *descr[], void *arg) +{ + (void)arg; + float *d_subA, *d_subB, *d_subC; + uint32_t nxC, nyC, nyA; + uint32_t ldA, ldB, ldC; + uint32_t nblocks; + + /* ptr gives a pointer to the first element of the local copy */ + d_subA = (float *)STARPU_MATRIX_GET_PTR(descr[0]); + d_subB = (float *)STARPU_MATRIX_GET_PTR(descr[1]); + d_subC = (float *)STARPU_MATRIX_GET_PTR(descr[2]); + + /* + * Note: STARPU_MATRIX_GET_NX/NY is different from X/Y of the FORTRAN + * ordering: + * - nx is the number of consecutive elements (thus the number of rows + * in FORTRAN order) + * - ny is the number of series that are separated by ld elements (thus + * the number of columns in FORTRAN order) + * - ld stands for leading dimension + * + * NB: in case some filters were used, the leading dimension is not + * guaranteed to be the same in main memory (on the original matrix) + * and on the accelerator! */ + + nxC = STARPU_MATRIX_GET_NX(descr[2]); + nyC = STARPU_MATRIX_GET_NY(descr[2]); + nyA = STARPU_MATRIX_GET_NY(descr[0]); + + ldA = STARPU_MATRIX_GET_LD(descr[0]); + ldB = STARPU_MATRIX_GET_LD(descr[1]); + ldC = STARPU_MATRIX_GET_LD(descr[2]); + + nblocks = (nxC * nyC + THREADS_PER_BLOCK - 1)/THREADS_PER_BLOCK; + cuda_mult_kernel + <<< nblocks, THREADS_PER_BLOCK, 0, starpu_cuda_get_local_stream() + >>> (nxC, nyC, nyA, ldA, ldB, ldC, d_subA, d_subB, d_subC); + + cudaError_t status = cudaGetLastError(); + if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status); +} diff --git a/examples/basic_examples/mult_hip.hip b/examples/basic_examples/mult_hip.hip new file mode 100644 index 0000000..5ed1880 --- /dev/null +++ b/examples/basic_examples/mult_hip.hip @@ -0,0 +1,133 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2010 Mehdi Juhoor + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * This example shows a simple implementation of a blocked matrix + * multiplication. Note that this is NOT intended to be an efficient + * implementation of sgemm! In this example, we show: + * - how to declare dense matrices (starpu_matrix_data_register) + * - how to manipulate matrices within codelets (eg. descr[0].blas.ld) + * - how to use filters to partition the matrices into blocks + * (starpu_data_partition and starpu_data_map_filters) + * - how to unpartition data (starpu_data_unpartition) and how to stop + * monitoring data (starpu_data_unregister) + * - how to manipulate subsets of data (starpu_data_get_sub_data) + * - how to construct an autocalibrated performance model (starpu_perfmodel) + * - how to submit asynchronous tasks + */ + +#include +#include +#include +#include +#include + +#define THREADS_PER_BLOCK 256 + +/* + * That program should compute C = A * B + * + * A of size (z,y) + * B of size (x,z) + * C of size (x,y) + + |---------------| + z | B | + |---------------| + z x + |----| |---------------| + | | | | + | | | | + | A | y | C | + | | | | + | | | | + |----| |---------------| + + * Note: we use FORTRAN ordering. + */ + +/* + * The codelet is passed 3 matrices, the "descr" union-type field gives a + * description of the layout of those 3 matrices in the local memory (ie. RAM + * in the case of CPU, GPU frame buffer in the case of GPU etc.). Since we have + * registered data with the "matrix" data interface, we use the matrix macros. + */ +static __global__ void hip_mult_kernel(uint32_t nxC, uint32_t nyC, uint32_t nyA, + uint32_t ldA, uint32_t ldB, uint32_t ldC, + float * subA, float * subB, float * subC ) +{ + uint32_t id, i, j, k; + float sum; + id = blockIdx.x * blockDim.x + threadIdx.x; + i = id % nxC; + j = id / nxC; + if (j >= nyC) + { + return; + } + sum = 0.; + for (k = 0 ; k < nyA ; k++) + { + sum += subA[i + k*ldA] * subB[k + j*ldB]; + } + subC[i + j*ldC] = sum; +} + +extern "C" void hip_mult(void *descr[], void *arg) +{ + (void)arg; + float *subA, *subB, *subC; + uint32_t nxC, nyC, nyA; + uint32_t ldA, ldB, ldC; + uint32_t nblocks; + + /* ptr gives a pointer to the first element of the local copy */ + subA = (float *)STARPU_MATRIX_GET_PTR(descr[0]); + subB = (float *)STARPU_MATRIX_GET_PTR(descr[1]); + subC = (float *)STARPU_MATRIX_GET_PTR(descr[2]); + + /* + * Note: STARPU_MATRIX_GET_NX/NY is different from X/Y of the FORTRAN + * ordering: + * - nx is the number of consecutive elements (thus the number of rows + * in FORTRAN order) + * - ny is the number of series that are separated by ld elements (thus + * the number of columns in FORTRAN order) + * - ld stands for leading dimension + * + * NB: in case some filters were used, the leading dimension is not + * guaranteed to be the same in main memory (on the original matrix) + * and on the accelerator! */ + + nxC = STARPU_MATRIX_GET_NX(descr[2]); + nyC = STARPU_MATRIX_GET_NY(descr[2]); + nyA = STARPU_MATRIX_GET_NY(descr[0]); + + ldA = STARPU_MATRIX_GET_LD(descr[0]); + ldB = STARPU_MATRIX_GET_LD(descr[1]); + ldC = STARPU_MATRIX_GET_LD(descr[2]); + + nblocks = (nxC * nyC + THREADS_PER_BLOCK - 1)/THREADS_PER_BLOCK; + + hipLaunchKernelGGL(hip_mult_kernel, nblocks, THREADS_PER_BLOCK, 0, starpu_hip_get_local_stream(), + nxC, nyC, nyA, + ldA, ldB, ldC, + subA, subB, subC ); + + hipError_t status = hipGetLastError(); + if (status != hipSuccess) STARPU_HIP_REPORT_ERROR(status); +} diff --git a/examples/basic_examples/multiformat.c b/examples/basic_examples/multiformat.c new file mode 100644 index 0000000..6cb23d2 --- /dev/null +++ b/examples/basic_examples/multiformat.c @@ -0,0 +1,341 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "multiformat_types.h" + +static int ncpu = 0; +#ifdef STARPU_USE_CUDA +static int ncuda = 0; +#endif +#ifdef STARPU_USE_OPENCL +static int nopencl = 0; +#endif + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +static struct point array_of_structs[N_ELEMENTS]; +static starpu_data_handle_t array_of_structs_handle; + +void multiformat_scal_cpu_func(void *buffers[], void *args) +{ + struct point *aos; + unsigned int n, i; + + (void)args; + + aos = (struct point *) STARPU_MULTIFORMAT_GET_CPU_PTR(buffers[0]); + n = STARPU_MULTIFORMAT_GET_NX(buffers[0]); + + for (i = 0; i < n; i++) + { + aos[i].x *= aos[i].y; + } +} + +#ifdef STARPU_USE_CUDA +extern struct starpu_codelet cpu_to_cuda_cl; +extern struct starpu_codelet cuda_to_cpu_cl; +#endif + +#ifdef STARPU_USE_OPENCL +extern struct starpu_codelet cpu_to_opencl_cl; +extern struct starpu_codelet opencl_to_cpu_cl; +#endif + +static struct starpu_multiformat_data_interface_ops format_ops = +{ +#ifdef STARPU_USE_CUDA + .cuda_elemsize = 2* sizeof(float), + .cpu_to_cuda_cl = &cpu_to_cuda_cl, + .cuda_to_cpu_cl = &cuda_to_cpu_cl, +#endif +#ifdef STARPU_USE_OPENCL + .opencl_elemsize = 2 * sizeof(float), + .cpu_to_opencl_cl = &cpu_to_opencl_cl, + .opencl_to_cpu_cl = &opencl_to_cpu_cl, +#endif + .cpu_elemsize = sizeof(struct point), + +}; + +#ifdef STARPU_USE_CUDA +extern void multiformat_scal_cuda_func(void *buffers[], void *arg); +#endif +#ifdef STARPU_USE_OPENCL +extern void multiformat_scal_opencl_func(void *buffers[], void *arg); +#endif + +#ifdef STARPU_USE_CPU +static struct starpu_codelet cpu_cl = +{ + .cpu_funcs = {multiformat_scal_cpu_func}, + .cpu_funcs_name = {"multiformat_scal_cpu_func"}, + .nbuffers = 1, + .modes = { STARPU_RW }, + .name = "codelet_real" +}; +#endif /* !STARPU_USE_CPU */ + +#ifdef STARPU_USE_CUDA +static struct starpu_codelet cuda_cl = +{ + .cuda_funcs = { multiformat_scal_cuda_func }, + .nbuffers = 1, + .modes = { STARPU_RW }, + .name = "cuda_codelet" +}; +#endif /* !STARPU_USE_CUDA */ + +#ifdef STARPU_USE_OPENCL +static struct starpu_codelet opencl_cl = +{ + .opencl_funcs = { multiformat_scal_opencl_func }, + .nbuffers = 1, + .modes = { STARPU_RW }, + .name = "opencl_codelet" +}; +#endif /* !STARPU_USE_OPENCL */ + +/* + * Main functions + */ +static void +init_problem_data(void) +{ + int i; + for (i = 0; i < N_ELEMENTS; i++) + { + array_of_structs[i].x = 1.0 + i; + array_of_structs[i].y = 42.0; + } +} + +static void +register_data(void) +{ + starpu_multiformat_data_register(&array_of_structs_handle, + STARPU_MAIN_RAM, + &array_of_structs, + N_ELEMENTS, + &format_ops); +} + +static int +create_and_submit_task(unsigned int dev) +{ + struct starpu_task *task = starpu_task_create(); + switch (dev) + { +#ifdef STARPU_USE_CPU + case STARPU_CPU: + task->cl = &cpu_cl; + break; +#endif +#ifdef STARPU_USE_CUDA + case STARPU_CUDA: + task->cl = &cuda_cl; + break; +#endif +#ifdef STARPU_USE_OPENCL + case STARPU_OPENCL: + task->cl = &opencl_cl; + break; +#endif + default: + assert(0); + } + task->synchronous = 1; + task->handles[0] = array_of_structs_handle; + task->cl_arg = NULL; + task->cl_arg_size = 0; + return starpu_task_submit(task); +} + +static void +create_and_submit_tasks(void) +{ +#ifdef STARPU_USE_CUDA + if (ncuda > 0) + { + int err; + err = create_and_submit_task(STARPU_CUDA); + if (err != 0) + { + FPRINTF(stderr, "Cuda : %s\n", strerror(-err)); + return; + } + } +#endif + +#ifdef STARPU_USE_CPU + if (ncpu > 0) + { + int err; + err = create_and_submit_task(STARPU_CPU); + if (err != 0) + { + FPRINTF(stderr, "CPU : %s\n", strerror(-err)); + return; + } + } +#endif + +#ifdef STARPU_USE_OPENCL + if (nopencl > 0) + { + int err; + err = create_and_submit_task(STARPU_OPENCL); + if (err != 0) + { + FPRINTF(stderr, "OpenCL : %s\n", strerror(-err)); + return; + } + } +#endif /* !STARPU_USE_OPENCL */ +} + +static void +unregister_data(void) +{ + starpu_data_unregister(array_of_structs_handle); +} + +static void +print_it(void) +{ + int i; + for (i = 0; i < N_ELEMENTS; i++) + { + FPRINTF(stderr, "(%.2f %.2f) ", + array_of_structs[i].x, + array_of_structs[i].y); + } + FPRINTF(stderr, "\n"); +} + +static int +check_it(void) +{ + int i; + for (i = 0; i < N_ELEMENTS; i++) + { + float expected_value = i + 1.0; +#ifdef STARPU_USE_CUDA + if (ncuda > 0) + expected_value *= array_of_structs[i].y; +#endif +#ifdef STARPU_USE_OPENCL + if (nopencl > 0) + expected_value *= array_of_structs[i].y; +#endif + expected_value *= array_of_structs[i].y; + if (array_of_structs[i].x != expected_value) + return EXIT_FAILURE; + } + + return EXIT_SUCCESS; +} +#ifdef STARPU_USE_OPENCL +struct starpu_opencl_program opencl_program; +struct starpu_opencl_program opencl_conversion_program; +#endif + +static int +gpus_available(void) +{ +#ifdef STARPU_USE_CUDA + if (ncuda > 0) + return 1; +#endif +#ifdef STARPU_USE_OPENCL + if (nopencl > 0) + return 1; +#endif + + return 0; +} + +int +main(void) +{ +#ifdef STARPU_USE_CPU + int ret; + + struct starpu_conf conf; + starpu_conf_init(&conf); + + /* this example doesn't support Master-Slave */ + conf.nmpi_ms = 0; + conf.ntcpip_ms = 0; +#if defined(STARPU_USE_CUDA0) || defined(STARPU_USE_CUDA1) + conf.ncuda = 0; +#endif + + ret = starpu_init(&conf); + if (ret == -ENODEV) return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + ncpu = starpu_cpu_worker_get_count(); +#ifdef STARPU_USE_CUDA + ncuda = starpu_cuda_worker_get_count(); +#endif +#ifdef STARPU_USE_OPENCL + nopencl = starpu_opencl_worker_get_count(); +#endif + + if (ncpu == 0 || !gpus_available()) + { + starpu_shutdown(); + return 77; + } + +#ifdef STARPU_USE_OPENCL + ret = starpu_opencl_load_opencl_from_file("examples/basic_examples/multiformat_opencl_kernel.cl", + &opencl_program, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); + ret = starpu_opencl_load_opencl_from_file("examples/basic_examples/multiformat_conversion_codelets_opencl_kernel.cl", + &opencl_conversion_program, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); +#endif + init_problem_data(); + + print_it(); + + register_data(); + + create_and_submit_tasks(); + + unregister_data(); + + print_it(); + +#ifdef STARPU_USE_OPENCL + ret = starpu_opencl_unload_opencl(&opencl_program); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl"); + ret = starpu_opencl_unload_opencl(&opencl_conversion_program); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl"); +#endif + starpu_shutdown(); + + + return check_it(); +#else + /* Without the CPU, there is no point in using the multiformat + * interface, so this test is pointless. */ + return 77; +#endif +} diff --git a/examples/basic_examples/multiformat_conversion_codelets.c b/examples/basic_examples/multiformat_conversion_codelets.c new file mode 100644 index 0000000..d56e229 --- /dev/null +++ b/examples/basic_examples/multiformat_conversion_codelets.c @@ -0,0 +1,88 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "multiformat_types.h" + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +#ifdef STARPU_USE_CUDA +void cuda_to_cpu(void *buffers[], void *arg) +{ + (void)arg; + struct struct_of_arrays *src = STARPU_MULTIFORMAT_GET_CUDA_PTR(buffers[0]); + struct point *dst = STARPU_MULTIFORMAT_GET_CPU_PTR(buffers[0]); + int n = STARPU_MULTIFORMAT_GET_NX(buffers[0]); + int i; + for (i = 0; i < n; i++) + { + dst[i].x = src->x[i]; + dst[i].y = src->y[i]; + } +} + +extern void cpu_to_cuda_cuda_func(void *buffers[], void *args); + +struct starpu_codelet cpu_to_cuda_cl = +{ + .cuda_funcs = {cpu_to_cuda_cuda_func}, + .cuda_flags = {STARPU_CUDA_ASYNC}, + .nbuffers = 1, + .modes = {STARPU_RW}, + .name = "codelet_cpu_to_cuda" +}; + +struct starpu_codelet cuda_to_cpu_cl = +{ + .cpu_funcs = {cuda_to_cpu}, + .nbuffers = 1, + .modes = {STARPU_RW}, + .name = "codelet_cude_to_cpu" +}; +#endif + +#ifdef STARPU_USE_OPENCL +void opencl_to_cpu(void *buffers[], void *arg) +{ + (void)arg; + FPRINTF(stderr, "User Entering %s\n", __starpu_func__); + struct struct_of_arrays *src = STARPU_MULTIFORMAT_GET_OPENCL_PTR(buffers[0]); + struct point *dst = STARPU_MULTIFORMAT_GET_CPU_PTR(buffers[0]); + int n = STARPU_MULTIFORMAT_GET_NX(buffers[0]); + int i; + for (i = 0; i < n; i++) + { + dst[i].x = src->x[i]; + dst[i].y = src->y[i]; + } +} + +extern void cpu_to_opencl_opencl_func(void *buffers[], void *args); +struct starpu_codelet cpu_to_opencl_cl = +{ + .opencl_funcs = {cpu_to_opencl_opencl_func}, + .opencl_flags = {STARPU_OPENCL_ASYNC}, + .nbuffers = 1, + .modes = {STARPU_RW}, +}; + +struct starpu_codelet opencl_to_cpu_cl = +{ + .cpu_funcs = {opencl_to_cpu}, + .nbuffers = 1, + .modes = {STARPU_RW}, +}; +#endif diff --git a/examples/basic_examples/multiformat_conversion_codelets_cuda.cu b/examples/basic_examples/multiformat_conversion_codelets_cuda.cu new file mode 100644 index 0000000..486d74c --- /dev/null +++ b/examples/basic_examples/multiformat_conversion_codelets_cuda.cu @@ -0,0 +1,49 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "multiformat_types.h" + +static __global__ void cpu_to_cuda_cuda(struct point *src, + struct struct_of_arrays *dst, unsigned n) +{ + unsigned i = blockIdx.x*blockDim.x + threadIdx.x; + + if (i < n) + { + dst->x[i] = src[i].x; + dst->y[i] = src[i].y; + } + +} + +extern "C" void cpu_to_cuda_cuda_func(void *buffers[], void *_args) +{ + struct point *src; + struct struct_of_arrays *dst; + + src = (struct point *) STARPU_MULTIFORMAT_GET_CPU_PTR(buffers[0]); + dst = (struct struct_of_arrays *) STARPU_MULTIFORMAT_GET_CUDA_PTR(buffers[0]); + + int n = STARPU_MULTIFORMAT_GET_NX(buffers[0]); + + unsigned threads_per_block = 64; + unsigned nblocks = (n + threads_per_block-1) / threads_per_block; + + cpu_to_cuda_cuda<<>>(src, dst, n); + cudaError_t status = cudaGetLastError(); + if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status); +} diff --git a/examples/basic_examples/multiformat_conversion_codelets_opencl.c b/examples/basic_examples/multiformat_conversion_codelets_opencl.c new file mode 100644 index 0000000..989240c --- /dev/null +++ b/examples/basic_examples/multiformat_conversion_codelets_opencl.c @@ -0,0 +1,94 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +extern struct starpu_opencl_program opencl_conversion_program; + +void cpu_to_opencl_opencl_func(void *buffers[], void *args) +{ + (void) args; + int id, devid; + cl_int err; + cl_kernel kernel; + cl_command_queue queue; + + unsigned n = STARPU_MULTIFORMAT_GET_NX(buffers[0]); + cl_mem src = (cl_mem) STARPU_MULTIFORMAT_GET_CPU_PTR(buffers[0]); + cl_mem dst = (cl_mem) STARPU_MULTIFORMAT_GET_OPENCL_PTR(buffers[0]); + + id = starpu_worker_get_id_check(); + devid = starpu_worker_get_devid(id); + + err = starpu_opencl_load_kernel(&kernel, + &queue, + &opencl_conversion_program, + "cpu_to_opencl_opencl", + devid); + + if (err != CL_SUCCESS) + STARPU_OPENCL_REPORT_ERROR(err); + + err = clSetKernelArg(kernel, 0, sizeof(src), &src); + if (err != CL_SUCCESS) + STARPU_OPENCL_REPORT_ERROR(err); + + err = clSetKernelArg(kernel, 1, sizeof(dst), &dst); + if (err != CL_SUCCESS) + STARPU_OPENCL_REPORT_ERROR(err); + + err = clSetKernelArg(kernel, 2, sizeof(n), &n); + if (err != CL_SUCCESS) + STARPU_OPENCL_REPORT_ERROR(err); + + + { + size_t global=n; + size_t local; + size_t s; + cl_device_id device; + + starpu_opencl_get_device(devid, &device); + + err = clGetKernelWorkGroupInfo(kernel, + device, + CL_KERNEL_WORK_GROUP_SIZE, + sizeof(local), + &local, + &s); + if (err != CL_SUCCESS) + STARPU_OPENCL_REPORT_ERROR(err); + + if (local > global) + local = global; + else + global = (global + local-1) / local * local; + + err = clEnqueueNDRangeKernel(queue, + kernel, + 1, + NULL, + &global, + &local, + 0, + NULL, + NULL); + + if (err != CL_SUCCESS) + STARPU_OPENCL_REPORT_ERROR(err); + } + starpu_opencl_release_kernel(kernel); +} diff --git a/examples/basic_examples/multiformat_conversion_codelets_opencl_kernel.cl b/examples/basic_examples/multiformat_conversion_codelets_opencl_kernel.cl new file mode 100644 index 0000000..a9eb599 --- /dev/null +++ b/examples/basic_examples/multiformat_conversion_codelets_opencl_kernel.cl @@ -0,0 +1,29 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "multiformat_types.h" + +__kernel void cpu_to_opencl_opencl(__global struct point *src, + __global struct struct_of_arrays *dst, + unsigned int n) +{ + const unsigned int i = get_global_id(0); + if (i < n) + { + dst->x[i] = src[i].x; + dst->y[i] = src[i].y; + } +} diff --git a/examples/basic_examples/multiformat_cuda.cu b/examples/basic_examples/multiformat_cuda.cu new file mode 100644 index 0000000..a8c199e --- /dev/null +++ b/examples/basic_examples/multiformat_cuda.cu @@ -0,0 +1,46 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "multiformat_types.h" + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +static __global__ void multiformat_cuda(struct struct_of_arrays *soa, unsigned n) +{ + unsigned i = blockIdx.x*blockDim.x + threadIdx.x; + + if (i < n) + soa->x[i] *= soa->y[i]; +} + +extern "C" void multiformat_scal_cuda_func(void *buffers[], void *_args) +{ + (void) _args; + + FPRINTF(stderr, "Running the cuda kernel (%s)\n", __starpu_func__); + unsigned int n = STARPU_MULTIFORMAT_GET_NX(buffers[0]); + struct struct_of_arrays *soa; + + soa = (struct struct_of_arrays *) STARPU_MULTIFORMAT_GET_CUDA_PTR(buffers[0]); + unsigned threads_per_block = 64; + unsigned nblocks = (n + threads_per_block-1) / threads_per_block; + multiformat_cuda<<>>(soa, n); + cudaError_t status = cudaGetLastError(); + if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status); + + cudaStreamSynchronize(starpu_cuda_get_local_stream()); +} diff --git a/examples/basic_examples/multiformat_opencl.c b/examples/basic_examples/multiformat_opencl.c new file mode 100644 index 0000000..ffe9a35 --- /dev/null +++ b/examples/basic_examples/multiformat_opencl.c @@ -0,0 +1,93 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +extern struct starpu_opencl_program opencl_program; + +void multiformat_scal_opencl_func(void *buffers[], void *args) +{ + (void) args; + int id, devid; + cl_int err; + cl_kernel kernel; + cl_command_queue queue; + cl_event event; + + unsigned n = STARPU_MULTIFORMAT_GET_NX(buffers[0]); + cl_mem val = (cl_mem)STARPU_MULTIFORMAT_GET_OPENCL_PTR(buffers[0]); + + id = starpu_worker_get_id_check(); + devid = starpu_worker_get_devid(id); + + err = starpu_opencl_load_kernel(&kernel, + &queue, + &opencl_program, + "multiformat_opencl", + devid); + if (err != CL_SUCCESS) + STARPU_OPENCL_REPORT_ERROR(err); + + err = clSetKernelArg(kernel, 0, sizeof(val), &val); + if (err != CL_SUCCESS) + STARPU_OPENCL_REPORT_ERROR(err); + + err = clSetKernelArg(kernel, 1, sizeof(n), &n); + if (err) + STARPU_OPENCL_REPORT_ERROR(err); + + { + size_t global=n; + size_t local; + size_t s; + cl_device_id device; + + starpu_opencl_get_device(devid, &device); + + err = clGetKernelWorkGroupInfo(kernel, + device, + CL_KERNEL_WORK_GROUP_SIZE, + sizeof(local), + &local, + &s); + if (err != CL_SUCCESS) + STARPU_OPENCL_REPORT_ERROR(err); + + if (local > global) + local = global; + else + global = (global + local-1) / local * local; + + err = clEnqueueNDRangeKernel(queue, + kernel, + 1, + NULL, + &global, + &local, + 0, + NULL, + &event); + + if (err != CL_SUCCESS) + STARPU_OPENCL_REPORT_ERROR(err); + } + + clFinish(queue); + starpu_opencl_collect_stats(event); + clReleaseEvent(event); + + starpu_opencl_release_kernel(kernel); +} diff --git a/examples/basic_examples/multiformat_opencl_kernel.cl b/examples/basic_examples/multiformat_opencl_kernel.cl new file mode 100644 index 0000000..9afd439 --- /dev/null +++ b/examples/basic_examples/multiformat_opencl_kernel.cl @@ -0,0 +1,24 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "multiformat_types.h" + +__kernel void multiformat_opencl(__global struct struct_of_arrays *soa, int nx) +{ + const int i = get_global_id(0); + if (i < nx) + soa->x[i] *= soa->y[i]; +} diff --git a/examples/basic_examples/multiformat_types.h b/examples/basic_examples/multiformat_types.h new file mode 100644 index 0000000..6a96958 --- /dev/null +++ b/examples/basic_examples/multiformat_types.h @@ -0,0 +1,32 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef MULTIFORMAT_TYPES_H +#define MULTIFORMAT_TYPES_H + +#define N_ELEMENTS 10 + +struct struct_of_arrays +{ + float x[N_ELEMENTS]; + float y[N_ELEMENTS]; +}; +struct point +{ + float x, y; +}; + +#endif diff --git a/examples/basic_examples/ndim.c b/examples/basic_examples/ndim.c new file mode 100644 index 0000000..192b994 --- /dev/null +++ b/examples/basic_examples/ndim.c @@ -0,0 +1,117 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2023-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +#define NX 2 +#define NY 3 +#define NZ 2 +#define NT 2 + +void arr4d_cpu_func(void *buffers[], void *args) +{ + (void)args; + int *arr4d = (int *) STARPU_NDIM_GET_PTR(buffers[0]); + int *nn = (int *)STARPU_NDIM_GET_NN(buffers[0]); + unsigned *ldn = STARPU_NDIM_GET_LDN(buffers[0]); + int nx = nn[0]; + int ny = nn[1]; + int nz = nn[2]; + int nt = nn[3]; + unsigned ldy = ldn[1]; + unsigned ldz = ldn[2]; + unsigned ldt = ldn[3]; + int i, j, k, l; + for (l = 0; l < nt; l++) + { + for (k = 0; k < nz; k++) + { + for (j = 0; j < ny; j++) + { + for (i = 0; i < nx; i++) + { + arr4d[(l*ldt)+(k*ldz)+(j*ldy)+i] *= 10; + } + } + } + } +} + +struct starpu_codelet arr4d_cl = +{ + .cpu_funcs = {arr4d_cpu_func}, + .cpu_funcs_name = {"arr4d_cpu_func"}, + .nbuffers = 1, + .modes = { STARPU_RW }, + .name = "arr4d_cl" +}; + +int main(void) +{ + int ret; + int arr4d[NX*NY*NZ*NT]; + int val = 0; + int i, j, k, l; + starpu_data_handle_t arr4d_handle; + unsigned nn[4] = {NX, NY, NZ, NT}; + unsigned ldn[4] = {1, NX, NX*NY, NX*NY*NZ}; + + ret = starpu_init(NULL); + if (ret == -ENODEV) + return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + for (l = 0; l < NT; l++) + for (k = 0; k < NZ; k++) + for (j = 0; j < NY; j++) + for (i = 0; i < NX; i++) + arr4d[(l*NX*NY*NZ)+(k*NX*NY)+(j*NX)+i] = val++; + + starpu_ndim_data_register(&arr4d_handle, STARPU_MAIN_RAM, (uintptr_t)arr4d, ldn, nn, 4, sizeof(arr4d[0])); + + ret = starpu_task_insert(&arr4d_cl, + STARPU_RW, arr4d_handle, + 0); + if (ret == -ENODEV) + goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + + starpu_data_unregister(arr4d_handle); + for (l = 0; l < NT; l++) + { + fprintf(stderr, "------\n"); + for (k = 0; k < NZ; k++) + { + for (j = 0; j < NY; j++) + { + fprintf(stderr, "|\t"); + for (i = 0; i < NX; i++) + fprintf(stderr, "%d\t", arr4d[(l*NX*NY*NZ)+(k*NX*NY)+(j*NX)+i]); + fprintf(stderr, " |"); + } + fprintf(stderr, "\n"); + } + fprintf(stderr, "------\n"); + } + starpu_shutdown(); + + return 0; + +enodev: + starpu_data_unregister(arr4d_handle); + starpu_shutdown(); + return 77; +} diff --git a/examples/basic_examples/task_insert_color.c b/examples/basic_examples/task_insert_color.c new file mode 100644 index 0000000..7e06ad2 --- /dev/null +++ b/examples/basic_examples/task_insert_color.c @@ -0,0 +1,88 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2018-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +void func(void *descr[], void *_args) +{ + int *x = (int *)STARPU_VARIABLE_GET_PTR(descr[0]); + (void)_args; + + *x *= 2; +} + +struct starpu_codelet mycodelet = +{ + .modes = { STARPU_RW }, + .cpu_funcs = {func}, + .cpu_funcs_name = {"func"}, + .nbuffers = 1 +}; + +struct starpu_codelet mycodelet_color = +{ + .modes = { STARPU_RW }, + .cpu_funcs = {func}, + .cpu_funcs_name = {"func"}, + .nbuffers = 1, + .color = 0x0000FF, +}; + +int main(void) +{ + int value=42; + starpu_data_handle_t handle; + int ret; + + ret = starpu_init(NULL); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + starpu_variable_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)&value, sizeof(value)); + + // In the trace file, the following task should be green (executed on CPU) + ret = starpu_task_insert(&mycodelet, STARPU_RW, handle, STARPU_NAME, "mytask_green", + 0); + if (STARPU_UNLIKELY(ret == -ENODEV)) + { + starpu_data_unregister(handle); + goto enodev; + } + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + + // In the trace file, the following task will be red as specified by STARPU_TASK_COLOR + ret = starpu_task_insert(&mycodelet, STARPU_RW, handle, STARPU_NAME, "mytask_red", + STARPU_TASK_COLOR, 0xFF0000, + 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + + // In the trace file, the following task will be blue as specified by the field color of mycodelet_color + ret = starpu_task_insert(&mycodelet_color, STARPU_RW, handle, STARPU_NAME, "mytask_blue", + 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + + starpu_task_wait_for_all(); + starpu_data_unregister(handle); + + starpu_shutdown(); + + return 0; + +enodev: + return 77; +} diff --git a/examples/basic_examples/topology.c b/examples/basic_examples/topology.c new file mode 100644 index 0000000..0724791 --- /dev/null +++ b/examples/basic_examples/topology.c @@ -0,0 +1,34 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +int main(void) +{ + int ret = starpu_init(NULL); + if (ret == -ENODEV) return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + starpu_worker_display_names(stdout, STARPU_CPU_WORKER); + starpu_topology_print(stdout); + + starpu_shutdown(); + return 0; +} diff --git a/examples/basic_examples/variable.c b/examples/basic_examples/variable.c new file mode 100644 index 0000000..7ad46dd --- /dev/null +++ b/examples/basic_examples/variable.c @@ -0,0 +1,110 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +#ifdef STARPU_QUICK_CHECK +static unsigned niter = 500; +#elif !defined(STARPU_LONG_CHECK) +static unsigned niter = 5000; +#else +static unsigned niter = 50000; +#endif + +extern void cpu_codelet(void *descr[], void *_args); + +#ifdef STARPU_USE_CUDA +extern void cuda_codelet(void *descr[], void *_args); +#endif + +#ifdef STARPU_USE_OPENCL +extern void opencl_codelet(void *descr[], void *_args); +struct starpu_opencl_program opencl_program; +#endif + +int main(int argc, char **argv) +{ + unsigned i; + float foo; + starpu_data_handle_t float_array_handle; + struct starpu_codelet cl; + int ret; + + ret = starpu_init(NULL); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + if (argc == 2) niter = atoi(argv[1]); + foo = 0.0f; + + starpu_variable_data_register(&float_array_handle, STARPU_MAIN_RAM /* home node */, + (uintptr_t)&foo, sizeof(float)); + +#ifdef STARPU_USE_OPENCL + ret = starpu_opencl_load_opencl_from_file("examples/basic_examples/variable_kernels_opencl_kernel.cl", &opencl_program, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); +#endif + + starpu_codelet_init(&cl); + cl.cpu_funcs[0] = cpu_codelet; + cl.cpu_funcs_name[0] = "cpu_codelet"; +#ifdef STARPU_USE_CUDA + cl.cuda_funcs[0] = cuda_codelet; +#endif +#ifdef STARPU_USE_OPENCL + cl.opencl_funcs[0] = opencl_codelet; +#endif + cl.nbuffers = 1; + cl.modes[0] = STARPU_RW; + cl.model = NULL; + cl.name = "variable_inc"; + + for (i = 0; i < niter; i++) + { + struct starpu_task *task = starpu_task_create(); + + task->cl = &cl; + + task->callback_func = NULL; + + task->handles[0] = float_array_handle; + + ret = starpu_task_submit(task); + if (STARPU_UNLIKELY(ret == -ENODEV)) + { + FPRINTF(stderr, "No worker may execute this task\n"); + starpu_data_unregister(float_array_handle); + goto enodev; + } + } + + starpu_task_wait_for_all(); + + /* update the array in RAM */ + starpu_data_unregister(float_array_handle); + + FPRINTF(stderr, "variable -> %f\n", foo); + FPRINTF(stderr, "result is %scorrect\n", foo==niter?"":"IN"); + + starpu_shutdown(); + + return (foo == niter) ? EXIT_SUCCESS:EXIT_FAILURE; + +enodev: + return 77; +} diff --git a/examples/basic_examples/variable_kernels.cu b/examples/basic_examples/variable_kernels.cu new file mode 100644 index 0000000..fd7665a --- /dev/null +++ b/examples/basic_examples/variable_kernels.cu @@ -0,0 +1,33 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +static __global__ void cuda_variable(float * tab) +{ + *tab += 1.0f; + return; +} + +extern "C" void cuda_codelet(void *descr[], void *_args) +{ + float *val = (float *)STARPU_VARIABLE_GET_PTR(descr[0]); + + cuda_variable<<<1,1, 0, starpu_cuda_get_local_stream()>>>(val); + cudaError_t status = cudaGetLastError(); + if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status); + cudaStreamSynchronize(starpu_cuda_get_local_stream()); +} diff --git a/examples/basic_examples/variable_kernels_cpu.c b/examples/basic_examples/variable_kernels_cpu.c new file mode 100644 index 0000000..acbc597 --- /dev/null +++ b/examples/basic_examples/variable_kernels_cpu.c @@ -0,0 +1,26 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +void cpu_codelet(void *descr[], void *_args) +{ + (void)_args; + float *val = (float *)STARPU_VARIABLE_GET_PTR(descr[0]); + + *val += 1.0f; +} + diff --git a/examples/basic_examples/variable_kernels_opencl.c b/examples/basic_examples/variable_kernels_opencl.c new file mode 100644 index 0000000..451ebff --- /dev/null +++ b/examples/basic_examples/variable_kernels_opencl.c @@ -0,0 +1,51 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +extern struct starpu_opencl_program opencl_program; +void opencl_codelet(void *descr[], void *_args) +{ + cl_mem val = (cl_mem)STARPU_VARIABLE_GET_PTR(descr[0]); + cl_kernel kernel; + cl_command_queue queue; + cl_event event; + int id, devid, err; + + (void)_args; + + id = starpu_worker_get_id_check(); + devid = starpu_worker_get_devid(id); + + err = starpu_opencl_load_kernel(&kernel, &queue, &opencl_program, "variable", devid); + if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); + + err = clSetKernelArg(kernel, 0, sizeof(val), &val); + if (err) STARPU_OPENCL_REPORT_ERROR(err); + + { + size_t global=1; + size_t local=1; + err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, &event); + if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); + } + + clFinish(queue); + starpu_opencl_collect_stats(event); + clReleaseEvent(event); + + starpu_opencl_release_kernel(kernel); +} diff --git a/examples/basic_examples/variable_kernels_opencl_kernel.cl b/examples/basic_examples/variable_kernels_opencl_kernel.cl new file mode 100644 index 0000000..dd182db --- /dev/null +++ b/examples/basic_examples/variable_kernels_opencl_kernel.cl @@ -0,0 +1,23 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +__kernel void variable(__global float* input) +{ + const int i = get_global_id(0); + if (i == 0) + input[i] = input[i] + 1.0f; +} + diff --git a/examples/basic_examples/vector_scal.c b/examples/basic_examples/vector_scal.c new file mode 100644 index 0000000..1d90871 --- /dev/null +++ b/examples/basic_examples/vector_scal.c @@ -0,0 +1,203 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * This example demonstrates how to use StarPU to scale an array by a factor. + * It shows how to manipulate data with StarPU's data management library. + * 1- how to declare a piece of data to StarPU (starpu_vector_data_register) + * 2- how to describe which data are accessed by a task (task->handles[0]) + * 3- how a kernel can manipulate the data (buffers[0].vector.ptr) + */ + +#include +#include +#include +#include + +#define NX 204800 +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +extern void scal_cpu_func(void *buffers[], void *_args); +extern void scal_cpu_func_icc(void *buffers[], void *_args); +extern void scal_sse_func(void *buffers[], void *_args); +extern void scal_sse_func_icc(void *buffers[], void *_args); +extern void scal_cuda_func(void *buffers[], void *_args); +extern void scal_hip_func(void *buffers[], void *_args); +extern void scal_opencl_func(void *buffers[], void *_args); + +static struct starpu_perfmodel vector_scal_model = +{ + .type = STARPU_HISTORY_BASED, + .symbol = "vector_scal" +}; + +static struct starpu_perfmodel vector_scal_energy_model = +{ + .type = STARPU_HISTORY_BASED, + .symbol = "vector_scal_energy" +}; + +static struct starpu_codelet cl = +{ + /* CPU implementation of the codelet */ + .cpu_funcs = + { + scal_cpu_func +#if defined(STARPU_HAVE_ICC) && !defined(__KNC__) && !defined(__KNF__) + , scal_cpu_func_icc +#endif +#ifdef __SSE__ + , scal_sse_func +#if defined(STARPU_HAVE_ICC) && !defined(__KNC__) && !defined(__KNF__) + , scal_sse_func_icc +#endif +#endif + }, + .cpu_funcs_name = + { + "scal_cpu_func", +#if defined(STARPU_HAVE_ICC) && !defined(__KNC__) && !defined(__KNF__) + "scal_cpu_func_icc", +#endif +#ifdef __SSE__ + "scal_sse_func", +#if defined(STARPU_HAVE_ICC) && !defined(__KNC__) && !defined(__KNF__) + "scal_sse_func_icc" +#endif +#endif + }, + +#ifdef STARPU_USE_CUDA + /* CUDA implementation of the codelet */ + .cuda_funcs = {scal_cuda_func}, + .cuda_flags = {STARPU_CUDA_ASYNC}, +#endif +#ifdef STARPU_USE_OPENCL + /* OpenCL implementation of the codelet */ + .opencl_funcs = {scal_opencl_func}, + .opencl_flags = {STARPU_OPENCL_ASYNC}, +#endif +#ifdef STARPU_USE_HIP + /* HIP implementation of the codelet */ + .hip_funcs = {scal_hip_func}, + .hip_flags = {STARPU_HIP_ASYNC}, +#endif + .nbuffers = 1, + .modes = {STARPU_RW}, + .model = &vector_scal_model, + .energy_model = &vector_scal_energy_model +}; + +#ifdef STARPU_USE_OPENCL +struct starpu_opencl_program opencl_program; +#endif + +static int approximately_equal(float a, float b) +{ +#ifdef STARPU_HAVE_NEARBYINTF + int ai = (int) nearbyintf(a * 1000.0); + int bi = (int) nearbyintf(b * 1000.0); +#elif defined(STARPU_HAVE_RINTF) + int ai = (int) rintf(a * 1000.0); + int bi = (int) rintf(b * 1000.0); +#else +#error "Please define either nearbyintf or rintf." +#endif + return ai == bi; +} + +int main(void) +{ + /* We consider a vector of float that is initialized just as any of C data */ + float vector[NX]; + unsigned i; + for (i = 0; i < NX; i++) + vector[i] = (i+1.0f); + + /* Initialize StarPU with default configuration */ + int ret = starpu_init(NULL); + if (ret == -ENODEV) goto enodev; + + FPRINTF(stderr, "[BEFORE] 1-th element : %3.2f\n", vector[1]); + FPRINTF(stderr, "[BEFORE] (NX-1)th element: %3.2f\n", vector[NX-1]); + +#ifdef STARPU_USE_OPENCL + ret = starpu_opencl_load_opencl_from_file("examples/basic_examples/vector_scal_opencl_kernel.cl", + &opencl_program, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); +#endif + + /* Tell StaPU to associate the "vector" vector with the "vector_handle" + * identifier. When a task needs to access a piece of data, it should + * refer to the handle that is associated to it. + * In the case of the "vector" data interface: + * - the first argument of the registration method is a pointer to the + * handle that should describe the data + * - the second argument is the memory node where the data (ie. "vector") + * resides initially: STARPU_MAIN_RAM stands for an address in main memory, as + * opposed to an address on a GPU for instance. + * - the third argument is the address of the vector in RAM + * - the fourth argument is the number of elements in the vector + * - the fifth argument is the size of each element. + */ + starpu_data_handle_t vector_handle; + starpu_memory_pin(vector, sizeof(vector)); + starpu_vector_data_register(&vector_handle, STARPU_MAIN_RAM, (uintptr_t)vector, NX, sizeof(vector[0])); + + float factor = 3.14; + + /* create a synchronous task: any call to starpu_task_submit will block + * until it is terminated */ + struct starpu_task *task = starpu_task_create(); + task->synchronous = 1; + + task->cl = &cl; + + /* the codelet manipulates one buffer in RW mode */ + task->handles[0] = vector_handle; + + /* an argument is passed to the codelet, beware that this is a + * READ-ONLY buffer and that the codelet may be given a pointer to a + * COPY of the argument */ + task->cl_arg = &factor; + task->cl_arg_size = sizeof(factor); + + /* execute the task on any eligible computational resource */ + ret = starpu_task_submit(task); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + /* StarPU does not need to manipulate the array anymore so we can stop + * monitoring it */ + starpu_data_unregister(vector_handle); + starpu_memory_unpin(vector, sizeof(vector)); + +#ifdef STARPU_USE_OPENCL + ret = starpu_opencl_unload_opencl(&opencl_program); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl"); +#endif + + /* terminate StarPU, no task can be submitted after */ + starpu_shutdown(); + + ret = approximately_equal(vector[1], (1+1.0f) * factor) && approximately_equal(vector[NX-1], (NX-1+1.0f) * factor); + FPRINTF(stderr, "[AFTER] 1-th element : %3.2f (should be %3.2f)\n", vector[1], (1+1.0f) * factor); + FPRINTF(stderr, "[AFTER] (NX-1)-th element: %3.2f (should be %3.2f)\n", vector[NX-1], (NX-1+1.0f) * factor); + FPRINTF(stderr, "[AFTER] Computation is%s correct\n", ret?"":" NOT"); + return (ret ? EXIT_SUCCESS : EXIT_FAILURE); + +enodev: + return 77; +} diff --git a/examples/basic_examples/vector_scal_c.c b/examples/basic_examples/vector_scal_c.c new file mode 100644 index 0000000..d52a694 --- /dev/null +++ b/examples/basic_examples/vector_scal_c.c @@ -0,0 +1,111 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * This example demonstrates how to use StarPU to scale an array by a factor. + * It shows how to manipulate data with StarPU's data management library. + * 1- how to declare a piece of data to StarPU (starpu_vector_data_register) + * 2- how to describe which data are accessed by a task (task->handles[0]) + * 3- how a kernel can manipulate the data (buffers[0].vector.ptr) + * + * This is a variant of vector_scal.c which shows it can be integrated with fortran. + */ + +#include +#include + + +extern void scal_cpu_func(void *buffers[], void *_args); +extern void scal_cuda_func(void *buffers[], void *_args); + +static struct starpu_perfmodel vector_scal_model = +{ + .type = STARPU_HISTORY_BASED, + .symbol = "vector_scal_model" +}; + +static struct starpu_codelet cl = +{ + .modes = { STARPU_RW }, + /* CPU implementation of the codelet */ + .cpu_funcs = {scal_cpu_func}, + .cpu_funcs_name = {"scal_cpu_func"}, +#ifdef STARPU_USE_CUDA + /* CUDA implementation of the codelet */ + .cuda_funcs = {scal_cuda_func}, + .cuda_flags = {STARPU_CUDA_ASYNC}, +#endif + .nbuffers = 1, + .model = &vector_scal_model +}; + +int compute_(int *F_NX, float *vector) +{ + int NX = *F_NX; + int ret; + + /* Initialize StarPU with default configuration */ + ret = starpu_init(NULL); + if (ret == -ENODEV) return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + /* Tell StaPU to associate the "vector" vector with the "vector_handle" + * identifier. When a task needs to access a piece of data, it should + * refer to the handle that is associated to it. + * In the case of the "vector" data interface: + * - the first argument of the registration method is a pointer to the + * handle that should describe the data + * - the second argument is the memory node where the data (ie. "vector") + * resides initially: STARPU_MAIN_RAM stands for an address in main memory, as + * opposed to an address on a GPU for instance. + * - the third argument is the address of the vector in RAM + * - the fourth argument is the number of elements in the vector + * - the fifth argument is the size of each element. + */ + starpu_data_handle_t vector_handle; + starpu_vector_data_register(&vector_handle, STARPU_MAIN_RAM, (uintptr_t)vector, NX, sizeof(vector[0])); + + float factor = 3.14; + + /* create a synchronous task: any call to starpu_task_submit will block + * until it is terminated */ + struct starpu_task *task = starpu_task_create(); + task->synchronous = 1; + + task->cl = &cl; + + /* the codelet manipulates one buffer in RW mode */ + task->handles[0] = vector_handle; + + /* an argument is passed to the codelet, beware that this is a + * READ-ONLY buffer and that the codelet may be given a pointer to a + * COPY of the argument */ + task->cl_arg = &factor; + task->cl_arg_size = sizeof(factor); + + /* execute the task on any eligible computational resource */ + ret = starpu_task_submit(task); + if (ret != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + /* StarPU does not need to manipulate the array anymore so we can stop + * monitoring it */ + starpu_data_unregister(vector_handle); + + /* terminate StarPU, no task can be submitted after */ + starpu_shutdown(); + + return ret; +} diff --git a/examples/basic_examples/vector_scal_cpu.c b/examples/basic_examples/vector_scal_cpu.c new file mode 100644 index 0000000..cc12fe6 --- /dev/null +++ b/examples/basic_examples/vector_scal_cpu.c @@ -0,0 +1,25 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * This example complements vector_scal.c: here we implement a CPU version. + */ + +#include "vector_scal_cpu_template.h" + +VECTOR_SCAL_CPU_FUNC(scal_cpu_func) +VECTOR_SCAL_SSE_FUNC(scal_sse_func) + diff --git a/examples/basic_examples/vector_scal_cpu_icc.icc b/examples/basic_examples/vector_scal_cpu_icc.icc new file mode 100644 index 0000000..dbc31fe --- /dev/null +++ b/examples/basic_examples/vector_scal_cpu_icc.icc @@ -0,0 +1,26 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * This example complements vector_scal.c: here we implement a CPU version, + * meant to be compiled by icc. + */ + +#include "vector_scal_cpu_template.h" + +VECTOR_SCAL_CPU_FUNC(scal_cpu_func_icc) +VECTOR_SCAL_SSE_FUNC(scal_sse_func_icc) + diff --git a/examples/basic_examples/vector_scal_cpu_template.h b/examples/basic_examples/vector_scal_cpu_template.h new file mode 100644 index 0000000..931ebed --- /dev/null +++ b/examples/basic_examples/vector_scal_cpu_template.h @@ -0,0 +1,93 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * This example complements vector_scal.c: here we implement a CPU version. + */ + +#ifndef __VECTOR_SCAL_CPU_TEMPLATE_H__ +#define __VECTOR_SCAL_CPU_TEMPLATE_H__ + +#include +#ifdef __SSE__ +#include +#endif + +/* This kernel takes a buffer and scales it by a constant factor */ +#define VECTOR_SCAL_CPU_FUNC(func_name) \ +void func_name(void *buffers[], void *cl_arg) \ +{ \ + unsigned i; \ + float *factor = (float *) cl_arg; \ + \ + /* \ + * The "buffers" array matches the task->handles array: for instance \ + * task->handles[0] is a handle that corresponds to a data with \ + * vector "interface", so that the first entry of the array in the \ + * codelet is a pointer to a structure describing such a vector (ie. \ + * struct starpu_vector_interface *). Here, we therefore manipulate \ + * the buffers[0] element as a vector: nx gives the number of elements \ + * in the array, ptr gives the location of the array (that was possibly\ + * migrated/replicated), and elemsize gives the size of each elements. \ + */ \ + \ + struct starpu_vector_interface *vector = (struct starpu_vector_interface *) buffers[0]; \ + \ + /* length of the vector */ \ + unsigned n = STARPU_VECTOR_GET_NX(vector); \ + \ + /* get a pointer to the local copy of the vector : note that we have to\ + * cast it in (float *) since a vector could contain any type of \ + * elements so that the .ptr field is actually a uintptr_t */ \ + float *val = (float *)STARPU_VECTOR_GET_PTR(vector); \ + \ + /* scale the vector */ \ + for (i = 0; i < n; i++) \ + val[i] *= *factor; \ +} + +#ifdef __SSE__ +#define VECTOR_SCAL_SSE_FUNC(func_name) \ +void func_name(void *buffers[], void *cl_arg) \ +{ \ + float *vector = (float *) STARPU_VECTOR_GET_PTR(buffers[0]); \ + unsigned int n = STARPU_VECTOR_GET_NX(buffers[0]); \ + unsigned int n_iterations = n/4; \ + \ + __m128 *VECTOR = (__m128*) vector; \ + __m128 FACTOR STARPU_ATTRIBUTE_ALIGNED(16); \ + float factor = *(float *) cl_arg; \ + FACTOR = _mm_set1_ps(factor); \ + \ + unsigned int i; \ + for (i = 0; i < n_iterations; i++) \ + VECTOR[i] = _mm_mul_ps(FACTOR, VECTOR[i]); \ + \ + unsigned int remainder = n%4; \ + if (remainder != 0) \ + { \ + unsigned int start = 4 * n_iterations; \ + for (i = start; i < start+remainder; ++i) \ + { \ + vector[i] = factor * vector[i]; \ + } \ + } \ +} +#else /* !__SSE__ */ +#define VECTOR_SCAL_SSE_FUNC(func_name) +#endif /* !__SSE__ */ + +#endif /* !__VECTOR_SCAL_CPU_TEMPLATE_H__ */ diff --git a/examples/basic_examples/vector_scal_cuda.cu b/examples/basic_examples/vector_scal_cuda.cu new file mode 100644 index 0000000..73e2ac3 --- /dev/null +++ b/examples/basic_examples/vector_scal_cuda.cu @@ -0,0 +1,46 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * This example complements vector_scal.c: here we implement a CUDA version. + */ + +#include + +static __global__ void vector_mult_cuda(unsigned n, float *val, + float factor) +{ + unsigned i = blockIdx.x*blockDim.x + threadIdx.x; + + if (i < n) + val[i] *= factor; +} + +extern "C" void scal_cuda_func(void *buffers[], void *_args) +{ + float *factor = (float *)_args; + + /* length of the vector */ + unsigned n = STARPU_VECTOR_GET_NX(buffers[0]); + /* local copy of the vector pointer */ + float *val = (float *)STARPU_VECTOR_GET_PTR(buffers[0]); + unsigned threads_per_block = 64; + unsigned nblocks = (n + threads_per_block-1) / threads_per_block; + + vector_mult_cuda<<>>(n, val, *factor); + cudaError_t status = cudaGetLastError(); + if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status); +} diff --git a/examples/basic_examples/vector_scal_fortran.F b/examples/basic_examples/vector_scal_fortran.F new file mode 100644 index 0000000..3f86f4a --- /dev/null +++ b/examples/basic_examples/vector_scal_fortran.F @@ -0,0 +1,34 @@ +C StarPU --- Runtime system for heterogeneous multicore architectures. +C +C Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +C +C StarPU is free software; you can redistribute it and/or modify +C it under the terms of the GNU Lesser General Public License as published by +C the Free Software Foundation; either version 2.1 of the License, or (at +C your option) any later version. +C +C StarPU is distributed in the hope that it will be useful, but +C WITHOUT ANY WARRANTY; without even the implied warranty of +C MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +C +C See the GNU Lesser General Public License in COPYING.LGPL for more details. +C + + PROGRAM VECTOR_SCAL + INTEGER,PARAMETER :: F_NX=2048 + REAL,DIMENSION(F_NX) :: VECTOR + + INTEGER :: I + DO I=1,F_NX,1 + VECTOR(I)=1.0 + ENDDO + + WRITE (*,*) ' BEFORE : First element was ', VECTOR(1) + WRITE (*,*) ' BEFORE : Last element was ', VECTOR(F_NX) + + CALL COMPUTE(F_NX, VECTOR) + + WRITE (*,*) ' AFTER : First element is ', VECTOR(1) + WRITE (*,*) ' AFTER : Last element is ', VECTOR(F_NX) + + END PROGRAM diff --git a/examples/basic_examples/vector_scal_hip.hip b/examples/basic_examples/vector_scal_hip.hip new file mode 100644 index 0000000..06d2dc1 --- /dev/null +++ b/examples/basic_examples/vector_scal_hip.hip @@ -0,0 +1,45 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * This example complements vector_scal.c: here we implement a HIP version. + */ + + +#include + +static __global__ void vector_mult_hip(unsigned n, float *val, float factor) +{ + unsigned i = blockIdx.x*blockDim.x + threadIdx.x; + + if (i < n) + val[i] *= factor; +} + +extern "C" void scal_hip_func(void *buffers[], void *_args) +{ + float *factor = (float *)_args; + /* length of the vector */ + unsigned n = STARPU_VECTOR_GET_NX(buffers[0]); + /* local copy of the vector pointer */ + float *val = (float *)STARPU_VECTOR_GET_PTR(buffers[0]); + unsigned threads_per_block = 64; + unsigned nblocks = (n + threads_per_block-1) / threads_per_block; + + hipLaunchKernelGGL(vector_mult_hip, nblocks, threads_per_block, 0, starpu_hip_get_local_stream(), n, val, *factor); + hipError_t status = hipGetLastError(); + if (status != hipSuccess) STARPU_HIP_REPORT_ERROR(status); +} diff --git a/examples/basic_examples/vector_scal_opencl.c b/examples/basic_examples/vector_scal_opencl.c new file mode 100644 index 0000000..235f575 --- /dev/null +++ b/examples/basic_examples/vector_scal_opencl.c @@ -0,0 +1,66 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * This example complements vector_scal.c: here we implement a OpenCL version. + */ + +#include + +extern struct starpu_opencl_program opencl_program; + +void scal_opencl_func(void *buffers[], void *_args) +{ + float *factor = _args; + int id, devid; + cl_int err; + cl_kernel kernel; + cl_command_queue queue; + + /* length of the vector */ + unsigned int n = STARPU_VECTOR_GET_NX(buffers[0]); + /* OpenCL copy of the vector pointer */ + cl_mem val = (cl_mem)STARPU_VECTOR_GET_DEV_HANDLE(buffers[0]); + + id = starpu_worker_get_id_check(); + devid = starpu_worker_get_devid(id); + + err = starpu_opencl_load_kernel(&kernel, &queue, &opencl_program, "vector_mult_opencl", devid); + if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); + + err = clSetKernelArg(kernel, 0, sizeof(n), &n); + err |= clSetKernelArg(kernel, 1, sizeof(val), &val); + err |= clSetKernelArg(kernel, 2, sizeof(*factor), factor); + if (err) STARPU_OPENCL_REPORT_ERROR(err); + + { + size_t global=n; + size_t local; + size_t s; + cl_device_id device; + + starpu_opencl_get_device(devid, &device); + + err = clGetKernelWorkGroupInfo (kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, &s); + if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); + if (local > global) local=global; + else global = (global + local-1) / local * local; + + err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, NULL); + if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); + } + starpu_opencl_release_kernel(kernel); +} diff --git a/examples/basic_examples/vector_scal_opencl_kernel.cl b/examples/basic_examples/vector_scal_opencl_kernel.cl new file mode 100644 index 0000000..099cf62 --- /dev/null +++ b/examples/basic_examples/vector_scal_opencl_kernel.cl @@ -0,0 +1,24 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +__kernel void vector_mult_opencl(unsigned int nx, __global float* val, float factor) +{ + const int i = get_global_id(0); + if (i < nx) + { + val[i] *= factor; + } +} diff --git a/examples/binary/binary.c b/examples/binary/binary.c new file mode 100644 index 0000000..600fb3d --- /dev/null +++ b/examples/binary/binary.c @@ -0,0 +1,154 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * This shows how to load OpenCL programs, either from a .cl file, or from a + * string containing the program itself. + */ + +#include + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +#ifdef STARPU_USE_OPENCL +extern void opencl_codelet(void *descr[], void *_args); +struct starpu_opencl_program opencl_program; +#endif + +struct starpu_codelet cl = +{ +#ifdef STARPU_USE_OPENCL + .opencl_funcs = {opencl_codelet}, + .opencl_flags = {STARPU_OPENCL_ASYNC}, +#endif + .nbuffers = 1, + .modes = {STARPU_RW} +}; + +int compute(char *file_name, int load_as_file, int with_malloc) +{ + float float_array[4] STARPU_ATTRIBUTE_ALIGNED(16) = { 0.0f, 0.0f, 0.0f, 0.0f}; + starpu_data_handle_t float_array_handle; + unsigned i; + int ret = 0; +#ifdef STARPU_QUICK_CHECK + unsigned niter = 50; +#else + unsigned niter = 500; +#endif + + starpu_vector_data_register(&float_array_handle, STARPU_MAIN_RAM, (uintptr_t)&float_array, 4, sizeof(float)); + +#ifdef STARPU_USE_OPENCL + if (load_as_file) + { + ret = starpu_opencl_compile_opencl_from_file(file_name, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_compile_opencl_from_file"); + ret = starpu_opencl_load_binary_opencl(file_name, &opencl_program); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_binary_opencl"); + } + else if (with_malloc) + { + char *located_file_name; + char *located_dir_name; + char *opencl_program_source; + starpu_opencl_load_program_source_malloc(file_name, &located_file_name, &located_dir_name, &opencl_program_source); + ret = starpu_opencl_compile_opencl_from_string(opencl_program_source, "incrementer", NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_compile_opencl_from_file"); + ret = starpu_opencl_load_binary_opencl("incrementer", &opencl_program); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_binary_opencl"); + free(located_file_name); + free(located_dir_name); + free(opencl_program_source); + } + else + { + char located_file_name[1024]; + char located_dir_name[1024]; + char opencl_program_source[16384]; + starpu_opencl_load_program_source(file_name, located_file_name, located_dir_name, opencl_program_source); + ret = starpu_opencl_compile_opencl_from_string(opencl_program_source, "incrementer", NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_compile_opencl_from_file"); + ret = starpu_opencl_load_binary_opencl("incrementer", &opencl_program); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_binary_opencl"); + } +#endif + + for (i = 0; i < niter; i++) + { + ret = starpu_task_insert(&cl, STARPU_RW, float_array_handle, STARPU_TAG_ONLY, (starpu_tag_t) i, 0); + if (STARPU_UNLIKELY(ret == -ENODEV)) + { + FPRINTF(stderr, "No worker may execute this task\n"); + exit(0); + } + } + + starpu_task_wait_for_all(); + + /* update the array in RAM */ + starpu_data_unregister(float_array_handle); + +#ifdef STARPU_USE_OPENCL + ret = starpu_opencl_unload_opencl(&opencl_program); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl"); +#endif + + FPRINTF(stderr, "array -> %f, %f, %f, %f\n", float_array[0], float_array[1], float_array[2], float_array[3]); + + if (float_array[0] != niter || float_array[0] != float_array[1] + float_array[2] + float_array[3]) + { + FPRINTF(stderr, "Incorrect result\n"); + ret = 1; + } + return ret; +} + +int main(void) +{ + int ret = 0; + struct starpu_conf conf; + + starpu_conf_init(&conf); + conf.ncuda = 0; + + ret = starpu_init(&conf); + if (STARPU_UNLIKELY(ret == -ENODEV)) + { + FPRINTF(stderr, "This application requires an OpenCL worker.\n"); + return 77; + } + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + if (starpu_opencl_worker_get_count() == 0) + { + FPRINTF(stderr, "This application requires an OpenCL worker.\n"); + starpu_shutdown(); + return 77; + } + + ret = compute("examples/incrementer/incrementer_kernels_opencl_kernel.cl", 1, -1); + if (ret == 0) + ret = compute("examples/incrementer/incrementer_kernels_opencl_kernel.cl", 0, 0); + else + FPRINTF(stderr, "Error when calling compute %d\n", ret); + if (ret == 0) + ret = compute("examples/incrementer/incrementer_kernels_opencl_kernel.cl", 0, 1); + else + FPRINTF(stderr, "Error when calling compute %d\n", ret); + + starpu_shutdown(); + return ret; +} diff --git a/examples/callback/callback.c b/examples/callback/callback.c new file mode 100644 index 0000000..43453bc --- /dev/null +++ b/examples/callback/callback.c @@ -0,0 +1,93 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * This is an example of using a callback. We submit a task, whose callback + * submits another task (without any callback). + */ + +#include + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +starpu_data_handle_t handle; + +void cpu_codelet(void *descr[], void *_args) +{ + (void)_args; + int *val = (int *)STARPU_VARIABLE_GET_PTR(descr[0]); + + *val += 1; +} + +struct starpu_codelet cl = +{ + .modes = { STARPU_RW }, + .cpu_funcs = {cpu_codelet}, + .cpu_funcs_name = {"cpu_codelet"}, + .nbuffers = 1, + .name = "callback" +}; + +void callback_func(void *callback_arg) +{ + int ret; + + (void)callback_arg; + + struct starpu_task *task = starpu_task_create(); + task->cl = &cl; + task->handles[0] = handle; + + ret = starpu_task_submit(task); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); +} + +int main(void) +{ + int v=40; + int ret; + + ret = starpu_init(NULL); + if (ret == -ENODEV) + return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + starpu_variable_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)&v, sizeof(int)); + + struct starpu_task *task = starpu_task_create(); + task->cl = &cl; + task->callback_func = callback_func; + task->callback_arg = NULL; + task->handles[0] = handle; + + ret = starpu_task_submit(task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + starpu_task_wait_for_all(); + starpu_data_unregister(handle); + + FPRINTF(stderr, "v -> %d\n", v); + + starpu_shutdown(); + + return (v == 42) ? 0 : 1; + +enodev: + starpu_shutdown(); + return 77; +} diff --git a/examples/callback/prologue.c b/examples/callback/prologue.c new file mode 100644 index 0000000..dcfdaa4 --- /dev/null +++ b/examples/callback/prologue.c @@ -0,0 +1,107 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * This is an example of using a prologue callback. We submit a task, whose + * prologue callback (i.e. before task gets scheduled) prints a value, and + * whose pop_prologue callback (i.e. after task gets scheduled, but before task + * execution) prints another value. + */ + +#include + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +starpu_data_handle_t handle; + +void cpu_codelet(void *descr[], void *_args) +{ + (void)_args; + int *val = (int *)STARPU_VARIABLE_GET_PTR(descr[0]); + + *val += 1; + printf("task executing \n"); +} + +struct starpu_codelet cl = +{ + .modes = { STARPU_RW }, + .cpu_funcs = {cpu_codelet}, + .cpu_funcs_name = {"cpu_codelet"}, + .nbuffers = 1, + .name = "callback" +}; + +void prologue_callback_func(void *callback_arg) +{ + double *x = (double*)callback_arg; + printf("x = %lf\n", *x); + STARPU_ASSERT(*x == -999.0); +} + +void pop_prologue_callback_func(void *args) +{ + unsigned val = (uintptr_t) args; + printf("pop_prologue_callback val %u \n", val); + STARPU_ASSERT(val == 5); +} + + +int main(void) +{ + int v=40; + int ret; + + ret = starpu_init(NULL); + if (ret == -ENODEV) + return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + starpu_variable_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)&v, sizeof(int)); + double x = -999.0; + + struct starpu_task *task = starpu_task_create(); + task->cl = &cl; + task->prologue_callback_func = prologue_callback_func; + task->prologue_callback_arg = &x; + + task->prologue_callback_pop_func = pop_prologue_callback_func; + task->prologue_callback_pop_arg = (void*) 5; + + task->handles[0] = handle; + + ret = starpu_task_submit(task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + ret = starpu_task_insert(&cl, + STARPU_RW, handle, + STARPU_PROLOGUE_CALLBACK, prologue_callback_func, + STARPU_PROLOGUE_CALLBACK_ARG_NFREE, &x, + STARPU_PROLOGUE_CALLBACK_POP, pop_prologue_callback_func, + STARPU_PROLOGUE_CALLBACK_POP_ARG_NFREE, 5, + 0); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + + starpu_task_wait_for_all(); + +enodev: + starpu_data_unregister(handle); + FPRINTF(stderr, "v -> %d\n", v); + starpu_shutdown(); + return (ret == -ENODEV) ? 77 : 0; +} diff --git a/examples/cg/cg.c b/examples/cg/cg.c new file mode 100644 index 0000000..1d24897 --- /dev/null +++ b/examples/cg/cg.c @@ -0,0 +1,354 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include + + +/* + * Conjugate Gradient + * + * Input: + * - matrix A + * - vector b + * - vector x (starting value) + * - int i_max, error tolerance eps < 1. + * Output: + * - vector x + * + * Pseudo code: + * + * i <- 0 + * r <- b - Ax + * d <- r + * delta_new <- dot(r,r) + * delta_0 <- delta_new + * + * while (i < i_max && delta_new > eps^2 delta_0) + * { + * q <- Ad + * alpha <- delta_new/dot(d, q) + * x <- x + alpha d + * + * If (i is divisible by 50) + * r <- b - Ax + * else + * r <- r - alpha q + * + * delta_old <- delta_new + * delta_new <- dot(r,r) + * beta <- delta_new/delta_old + * d <- r + beta d + * i <- i + 1 + * } + * + * The dot() operations makes use of reduction to optimize parallelism. + * + */ + +#include "cg.h" + +static int copy_handle(starpu_data_handle_t dst, starpu_data_handle_t src, unsigned nblocks); + +#define HANDLE_TYPE_VECTOR starpu_data_handle_t +#define HANDLE_TYPE_MATRIX starpu_data_handle_t +#define TASK_INSERT(cl, ...) starpu_task_insert(cl, ##__VA_ARGS__) +#define GET_VECTOR_BLOCK(v, i) starpu_data_get_sub_data(v, 1, i) +#define GET_MATRIX_BLOCK(m, i, j) starpu_data_get_sub_data(m, 2, i, j) +#define BARRIER() +#define GET_DATA_HANDLE(handle) +#define FPRINTF_SERVER FPRINTF + +#include "cg_kernels.c" + +static TYPE *A, *b, *x; +static TYPE *r, *d, *q; + +static int copy_handle(starpu_data_handle_t dst, starpu_data_handle_t src, unsigned nb) +{ + unsigned block; + + for (block = 0; block < nb; block++) + starpu_data_cpy(starpu_data_get_sub_data(dst, 1, block), starpu_data_get_sub_data(src, 1, block), 1, NULL, NULL); + return 0; +} + +/* + * Generate Input data + */ +static void generate_random_problem(void) +{ + int i, j; + + starpu_malloc((void **)&A, n*n*sizeof(TYPE)); + starpu_malloc((void **)&b, n*sizeof(TYPE)); + starpu_malloc((void **)&x, n*sizeof(TYPE)); + assert(A && b && x); + + for (j = 0; j < n; j++) + { + b[j] = (TYPE)1.0; + x[j] = (TYPE)0.0; + + /* We take Hilbert matrix that is not well conditioned but definite positive: H(i,j) = 1/(1+i+j) */ + for (i = 0; i < n; i++) + { + A[n*j + i] = (TYPE)(1.0/(1.0+i+j)); + } + } + + /* Internal vectors */ + starpu_malloc((void **)&r, n*sizeof(TYPE)); + starpu_malloc((void **)&d, n*sizeof(TYPE)); + starpu_malloc((void **)&q, n*sizeof(TYPE)); + assert(r && d && q); + + memset(r, 0, n*sizeof(TYPE)); + memset(d, 0, n*sizeof(TYPE)); + memset(q, 0, n*sizeof(TYPE)); +} + +static void free_data(void) +{ + starpu_free_noflag(A, n*n*sizeof(TYPE)); + starpu_free_noflag(b, n*sizeof(TYPE)); + starpu_free_noflag(x, n*sizeof(TYPE)); + starpu_free_noflag(r, n*sizeof(TYPE)); + starpu_free_noflag(d, n*sizeof(TYPE)); + starpu_free_noflag(q, n*sizeof(TYPE)); +} + +static void register_data(void) +{ + starpu_matrix_data_register(&A_handle, STARPU_MAIN_RAM, (uintptr_t)A, n, n, n, sizeof(TYPE)); + starpu_vector_data_register(&b_handle, STARPU_MAIN_RAM, (uintptr_t)b, n, sizeof(TYPE)); + starpu_vector_data_register(&x_handle, STARPU_MAIN_RAM, (uintptr_t)x, n, sizeof(TYPE)); + + starpu_vector_data_register(&r_handle, STARPU_MAIN_RAM, (uintptr_t)r, n, sizeof(TYPE)); + starpu_vector_data_register(&d_handle, STARPU_MAIN_RAM, (uintptr_t)d, n, sizeof(TYPE)); + starpu_vector_data_register(&q_handle, STARPU_MAIN_RAM, (uintptr_t)q, n, sizeof(TYPE)); + + starpu_variable_data_register(&dtq_handle, STARPU_MAIN_RAM, (uintptr_t)&dtq, sizeof(TYPE)); + starpu_variable_data_register(&rtr_handle, STARPU_MAIN_RAM, (uintptr_t)&rtr, sizeof(TYPE)); + + if (use_reduction) + { + starpu_data_set_reduction_methods(q_handle, &accumulate_vector_cl, &bzero_vector_cl); + starpu_data_set_reduction_methods(r_handle, &accumulate_vector_cl, &bzero_vector_cl); + + starpu_data_set_reduction_methods(dtq_handle, &accumulate_variable_cl, &bzero_variable_cl); + starpu_data_set_reduction_methods(rtr_handle, &accumulate_variable_cl, &bzero_variable_cl); + } +} + +static void unregister_data(void) +{ + starpu_data_unpartition(A_handle, STARPU_MAIN_RAM); + starpu_data_unpartition(b_handle, STARPU_MAIN_RAM); + starpu_data_unpartition(x_handle, STARPU_MAIN_RAM); + + starpu_data_unpartition(r_handle, STARPU_MAIN_RAM); + starpu_data_unpartition(d_handle, STARPU_MAIN_RAM); + starpu_data_unpartition(q_handle, STARPU_MAIN_RAM); + + starpu_data_unregister(A_handle); + starpu_data_unregister(b_handle); + starpu_data_unregister(x_handle); + + starpu_data_unregister(r_handle); + starpu_data_unregister(d_handle); + starpu_data_unregister(q_handle); + + starpu_data_unregister(dtq_handle); + starpu_data_unregister(rtr_handle); +} + +/* + * Data partitioning filters + */ + +struct starpu_data_filter vector_filter; +struct starpu_data_filter matrix_filter_1; +struct starpu_data_filter matrix_filter_2; + +static void partition_data(void) +{ + assert(n % nblocks == 0); + + /* + * Partition the A matrix + */ + + /* Partition into contiguous parts */ + matrix_filter_1.filter_func = starpu_matrix_filter_block; + matrix_filter_1.nchildren = nblocks; + /* Partition into non-contiguous parts */ + matrix_filter_2.filter_func = starpu_matrix_filter_vertical_block; + matrix_filter_2.nchildren = nblocks; + + /* A is in FORTRAN ordering, starpu_data_get_sub_data(A_handle, 2, i, + * j) designates the block in column i and row j. */ + starpu_data_map_filters(A_handle, 2, &matrix_filter_1, &matrix_filter_2); + + /* + * Partition the vectors + */ + + vector_filter.filter_func = starpu_vector_filter_block; + vector_filter.nchildren = nblocks; + + starpu_data_partition(b_handle, &vector_filter); + starpu_data_partition(x_handle, &vector_filter); + starpu_data_partition(r_handle, &vector_filter); + starpu_data_partition(d_handle, &vector_filter); + starpu_data_partition(q_handle, &vector_filter); +} + +/* + * Debug + */ + +#if 0 +static void display_vector(starpu_data_handle_t handle, TYPE *ptr) +{ + unsigned block_size = n / nblocks; + + unsigned b, ind; + for (b = 0; b < nblocks; b++) + { + starpu_data_acquire(starpu_data_get_sub_data(handle, 1, b), STARPU_R); + for (ind = 0; ind < block_size; ind++) + { + FPRINTF(stderr, "%2.2e ", ptr[b*block_size + ind]); + } + FPRINTF(stderr, "| "); + starpu_data_release(starpu_data_get_sub_data(handle, 1, b)); + } + FPRINTF(stderr, "\n"); +} + +static void display_matrix(void) +{ + unsigned i, j; + for (i = 0; i < n; i++) + { + for (j = 0; j < n; j++) + { + FPRINTF(stderr, "%2.2e ", A[j*n + i]); + } + FPRINTF(stderr, "\n"); + } +} +#endif + +static void display_x_result(void) +{ + unsigned j, i; + starpu_data_handle_t sub; + + FPRINTF(stderr, "Computed X vector:\n"); + + unsigned block_size = n / nblocks; + + for (j = 0; j < nblocks; j++) + { + sub = starpu_data_get_sub_data(x_handle, 1, j); + starpu_data_acquire(sub, STARPU_R); + for (i = 0; i < block_size; i++) + { + FPRINTF(stderr, "% 02.2e\n", x[j*block_size + i]); + } + starpu_data_release(sub); + } +} + + +static void parse_args(int argc, char **argv) +{ + int i; + for (i = 1; i < argc; i++) + { + if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i], "--help") == 0 || strcmp(argv[i], "-help") == 0) + { + FPRINTF_SERVER(stderr, "usage: %s [-h] [-nblocks #blocks] [-display-result] [-n problem_size] [-no-reduction] [-maxiter i]\n", argv[0]); + exit(-1); + } + } + + parse_common_args(argc, argv); +} + + +int main(int argc, char **argv) +{ + int ret; + double start, end; + + /* Not supported yet */ + if (starpu_getenv_number_default("STARPU_GLOBAL_ARBITER", 0) > 0) + return 77; + + parse_args(argc, argv); + + ret = starpu_init(NULL); + if (ret == -ENODEV) + return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + if (starpu_cpu_worker_get_count() + starpu_cuda_worker_get_count() + starpu_opencl_worker_get_count() == 0) + { + starpu_shutdown(); + return 77; + } + + starpu_cublas_init(); + + FPRINTF(stderr, "************** PARAMETERS ***************\n"); + FPRINTF(stderr, "Problem size (-n): %lld\n", n); + FPRINTF(stderr, "Maximum number of iterations (-maxiter): %d\n", i_max); + FPRINTF(stderr, "Number of blocks (-nblocks): %u\n", nblocks); + FPRINTF(stderr, "Reduction (-no-reduction): %s\n", use_reduction ? "enabled" : "disabled"); + + start = starpu_timing_now(); + generate_random_problem(); + register_data(); + partition_data(); + end = starpu_timing_now(); + + FPRINTF(stderr, "Problem initialization timing : %2.2f seconds\n", (end-start)/1e6); + + ret = cg(); + if (ret == -ENODEV) + { + ret = 77; + goto enodev; + } + + starpu_task_wait_for_all(); + + if (display_result) + { + display_x_result(); + } + +enodev: + unregister_data(); + free_data(); + starpu_cublas_shutdown(); + starpu_shutdown(); + return ret; +} diff --git a/examples/cg/cg.h b/examples/cg/cg.h new file mode 100644 index 0000000..940e37e --- /dev/null +++ b/examples/cg/cg.h @@ -0,0 +1,54 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __STARPU_EXAMPLE_CG_H__ +#define __STARPU_EXAMPLE_CG_H__ + +#include +#include +#include + +#ifdef STARPU_USE_CUDA +#include +#endif + +#define DOUBLE + +#ifdef DOUBLE +#define TYPE double +#define GEMV STARPU_DGEMV +#define DOT STARPU_DDOT +#define AXPY STARPU_DAXPY +#define SCAL STARPU_DSCAL +#define cublasdot cublasDdot +#define cublasscal cublasDscal +#define cublasaxpy cublasDaxpy +#define cublasgemv cublasDgemv +#define cublasscal cublasDscal +#else +#define TYPE float +#define GEMV STARPU_SGEMV +#define DOT STARPU_SDOT +#define AXPY STARPU_SAXPY +#define SCAL STARPU_SSCAL +#define cublasdot cublasSdot +#define cublasscal cublasSscal +#define cublasaxpy cublasSaxpy +#define cublasgemv cublasSgemv +#define cublasscal cublasSscal +#endif + +#endif /* __STARPU_EXAMPLE_CG_H__ */ diff --git a/examples/cg/cg_kernels.c b/examples/cg/cg_kernels.c new file mode 100644 index 0000000..6907984 --- /dev/null +++ b/examples/cg/cg_kernels.c @@ -0,0 +1,876 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * Standard BLAS kernels used by CG + */ + +#include "cg.h" +#include +#include + +#ifdef STARPU_USE_CUDA +#include +#include +static const TYPE gp1 = 1.0; +static const TYPE gm1 = -1.0; +#endif + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +static unsigned nblocks = 8; + +#ifdef STARPU_QUICK_CHECK +static int i_max = 5; +static int long long n = 2048; +#elif !defined(STARPU_LONG_CHECK) +static int long long n = 4096; +static int i_max = 100; +#else +static int long long n = 4096; +static int i_max = 1000; +#endif +static double eps = (10e-14); + +int use_reduction = 1; +int display_result = 0; + +HANDLE_TYPE_MATRIX A_handle; +HANDLE_TYPE_VECTOR b_handle; +HANDLE_TYPE_VECTOR x_handle; + +HANDLE_TYPE_VECTOR r_handle; +HANDLE_TYPE_VECTOR d_handle; +HANDLE_TYPE_VECTOR q_handle; + +starpu_data_handle_t dtq_handle; +starpu_data_handle_t rtr_handle; +TYPE dtq, rtr; + +#if 0 +static void print_vector_from_descr(unsigned nx, TYPE *v) +{ + unsigned i; + for (i = 0; i < nx; i++) + { + fprintf(stderr, "%2.2e ", v[i]); + } + fprintf(stderr, "\n"); +} + + +static void print_matrix_from_descr(unsigned nx, unsigned ny, unsigned ld, TYPE *mat) +{ + unsigned i, j; + for (j = 0; j < nx; j++) + { + for (i = 0; i < ny; i++) + { + fprintf(stderr, "%2.2e ", mat[j+i*ld]); + } + fprintf(stderr, "\n"); + } +} +#endif + +static int can_execute(unsigned workerid, struct starpu_task *task, unsigned nimpl) +{ + (void)task; + (void)nimpl; + enum starpu_worker_archtype type = starpu_worker_get_type(workerid); + if (type == STARPU_CPU_WORKER || type == STARPU_OPENCL_WORKER) + return 1; + +#ifdef STARPU_USE_CUDA +#ifdef STARPU_SIMGRID + /* We don't know, let's assume it can */ + return 1; +#else + /* Cuda device */ + const struct cudaDeviceProp *props; + props = starpu_cuda_get_device_properties(workerid); + if (props->major >= 2 || props->minor >= 3) + /* At least compute capability 1.3, supports doubles */ + return 1; +#endif +#endif + /* Old card, does not support doubles */ + return 0; +} + +/* + * Reduction accumulation methods + */ + +#ifdef STARPU_USE_CUDA +static void accumulate_variable_cuda(void *descr[], void *cl_arg) +{ + (void)cl_arg; + TYPE *v_dst = (TYPE *)STARPU_VARIABLE_GET_PTR(descr[0]); + TYPE *v_src = (TYPE *)STARPU_VARIABLE_GET_PTR(descr[1]); + + cublasStatus_t status = cublasaxpy(starpu_cublas_get_local_handle(), 1, &gp1, v_src, 1, v_dst, 1); + if (status != CUBLAS_STATUS_SUCCESS) + STARPU_CUBLAS_REPORT_ERROR(status); +} +#endif + +void accumulate_variable_cpu(void *descr[], void *cl_arg) +{ + (void)cl_arg; + TYPE *v_dst = (TYPE *)STARPU_VARIABLE_GET_PTR(descr[0]); + TYPE *v_src = (TYPE *)STARPU_VARIABLE_GET_PTR(descr[1]); + + *v_dst = *v_dst + *v_src; +} + +static struct starpu_perfmodel accumulate_variable_model = +{ + .type = STARPU_HISTORY_BASED, + .symbol = "accumulate_variable" +}; + +struct starpu_codelet accumulate_variable_cl = +{ + .can_execute = can_execute, + .cpu_funcs = {accumulate_variable_cpu}, + .cpu_funcs_name = {"accumulate_variable_cpu"}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {accumulate_variable_cuda}, + .cuda_flags = {STARPU_CUDA_ASYNC}, +#endif + .modes = {STARPU_RW|STARPU_COMMUTE, STARPU_R}, + .nbuffers = 2, + .model = &accumulate_variable_model, + .name = "accumulate_variable" +}; + +#ifdef STARPU_USE_CUDA +static void accumulate_vector_cuda(void *descr[], void *cl_arg) +{ + (void)cl_arg; + TYPE *v_dst = (TYPE *)STARPU_VECTOR_GET_PTR(descr[0]); + TYPE *v_src = (TYPE *)STARPU_VECTOR_GET_PTR(descr[1]); + unsigned nx = STARPU_VECTOR_GET_NX(descr[0]); + + cublasStatus_t status = cublasaxpy(starpu_cublas_get_local_handle(), nx, &gp1, v_src, 1, v_dst, 1); + if (status != CUBLAS_STATUS_SUCCESS) + STARPU_CUBLAS_REPORT_ERROR(status); +} +#endif + +void accumulate_vector_cpu(void *descr[], void *cl_arg) +{ + (void)cl_arg; + TYPE *v_dst = (TYPE *)STARPU_VECTOR_GET_PTR(descr[0]); + TYPE *v_src = (TYPE *)STARPU_VECTOR_GET_PTR(descr[1]); + unsigned nx = STARPU_VECTOR_GET_NX(descr[0]); + + AXPY(nx, (TYPE)1.0, v_src, 1, v_dst, 1); +} + +static struct starpu_perfmodel accumulate_vector_model = +{ + .type = STARPU_HISTORY_BASED, + .symbol = "accumulate_vector" +}; + +struct starpu_codelet accumulate_vector_cl = +{ + .can_execute = can_execute, + .cpu_funcs = {accumulate_vector_cpu}, + .cpu_funcs_name = {"accumulate_vector_cpu"}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {accumulate_vector_cuda}, + .cuda_flags = {STARPU_CUDA_ASYNC}, +#endif + .modes = {STARPU_RW|STARPU_COMMUTE, STARPU_R}, + .nbuffers = 2, + .model = &accumulate_vector_model, + .name = "accumulate_vector" +}; + +/* + * Reduction initialization methods + */ + +#ifdef STARPU_USE_CUDA +extern void zero_vector(TYPE *x, unsigned nelems); + +static void bzero_variable_cuda(void *descr[], void *cl_arg) +{ + (void)cl_arg; + TYPE *v = (TYPE *)STARPU_VARIABLE_GET_PTR(descr[0]); + size_t size = STARPU_VARIABLE_GET_ELEMSIZE(descr[0]); + + cudaMemsetAsync(v, 0, size, starpu_cuda_get_local_stream()); +} +#endif + +void bzero_variable_cpu(void *descr[], void *cl_arg) +{ + (void)cl_arg; + TYPE *v = (TYPE *)STARPU_VARIABLE_GET_PTR(descr[0]); + *v = (TYPE)0.0; +} + +static struct starpu_perfmodel bzero_variable_model = +{ + .type = STARPU_HISTORY_BASED, + .symbol = "bzero_variable" +}; + +struct starpu_codelet bzero_variable_cl = +{ + .can_execute = can_execute, + .cpu_funcs = {bzero_variable_cpu}, + .cpu_funcs_name = {"bzero_variable_cpu"}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {bzero_variable_cuda}, + .cuda_flags = {STARPU_CUDA_ASYNC}, +#endif + .modes = {STARPU_W}, + .nbuffers = 1, + .model = &bzero_variable_model, + .name = "bzero_variable" +}; + +#ifdef STARPU_USE_CUDA +static void bzero_vector_cuda(void *descr[], void *cl_arg) +{ + (void)cl_arg; + TYPE *v = (TYPE *)STARPU_VECTOR_GET_PTR(descr[0]); + unsigned nx = STARPU_VECTOR_GET_NX(descr[0]); + size_t elemsize = STARPU_VECTOR_GET_ELEMSIZE(descr[0]); + + cudaMemsetAsync(v, 0, nx * elemsize, starpu_cuda_get_local_stream()); +} +#endif + +void bzero_vector_cpu(void *descr[], void *cl_arg) +{ + (void)cl_arg; + TYPE *v = (TYPE *)STARPU_VECTOR_GET_PTR(descr[0]); + unsigned nx = STARPU_VECTOR_GET_NX(descr[0]); + + memset(v, 0, nx*sizeof(TYPE)); +} + +static struct starpu_perfmodel bzero_vector_model = +{ + .type = STARPU_HISTORY_BASED, + .symbol = "bzero_vector" +}; + +struct starpu_codelet bzero_vector_cl = +{ + .can_execute = can_execute, + .cpu_funcs = {bzero_vector_cpu}, + .cpu_funcs_name = {"bzero_vector_cpu"}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {bzero_vector_cuda}, + .cuda_flags = {STARPU_CUDA_ASYNC}, +#endif + .modes = {STARPU_W}, + .nbuffers = 1, + .model = &bzero_vector_model, + .name = "bzero_vector" +}; + +/* + * DOT kernel : s = dot(v1, v2) + */ + +#ifdef STARPU_USE_CUDA +static void dot_kernel_cuda(void *descr[], void *cl_arg) +{ + (void)cl_arg; + TYPE *dot = (TYPE *)STARPU_VARIABLE_GET_PTR(descr[0]); + TYPE *v1 = (TYPE *)STARPU_VECTOR_GET_PTR(descr[1]); + TYPE *v2 = (TYPE *)STARPU_VECTOR_GET_PTR(descr[2]); + + unsigned nx = STARPU_VECTOR_GET_NX(descr[1]); + + cublasHandle_t handle = starpu_cublas_get_local_handle(); + cublasSetPointerMode(handle, CUBLAS_POINTER_MODE_DEVICE); + cublasStatus_t status = cublasdot(handle, + nx, v1, 1, v2, 1, dot); + if (status != CUBLAS_STATUS_SUCCESS) + STARPU_CUBLAS_REPORT_ERROR(status); + cublasSetPointerMode(handle, CUBLAS_POINTER_MODE_HOST); +} +#endif + +void dot_kernel_cpu(void *descr[], void *cl_arg) +{ + (void)cl_arg; + TYPE *dot = (TYPE *)STARPU_VARIABLE_GET_PTR(descr[0]); + TYPE *v1 = (TYPE *)STARPU_VECTOR_GET_PTR(descr[1]); + TYPE *v2 = (TYPE *)STARPU_VECTOR_GET_PTR(descr[2]); + + unsigned nx = STARPU_VECTOR_GET_NX(descr[1]); + + TYPE local_dot; + /* Note that we explicitly cast the result of the DOT kernel because + * some BLAS library will return a double for sdot for instance. */ + local_dot = (TYPE)DOT(nx, v1, 1, v2, 1); + + *dot = *dot + local_dot; +} + +static struct starpu_perfmodel dot_kernel_model = +{ + .type = STARPU_HISTORY_BASED, + .symbol = "dot_kernel" +}; + +static struct starpu_codelet dot_kernel_cl = +{ + .can_execute = can_execute, + .cpu_funcs = {dot_kernel_cpu}, + .cpu_funcs_name = {"dot_kernel_cpu"}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {dot_kernel_cuda}, +#endif + .cuda_flags = {STARPU_CUDA_ASYNC}, + .nbuffers = 3, + .model = &dot_kernel_model, + .name = "dot_kernel" +}; + +int dot_kernel(HANDLE_TYPE_VECTOR v1, + HANDLE_TYPE_VECTOR v2, + starpu_data_handle_t s, + unsigned nb) +{ + int ret; + + /* Blank the accumulation variable */ + if (use_reduction) + starpu_data_invalidate_submit(s); + else + { + ret = TASK_INSERT(&bzero_variable_cl, STARPU_W, s, 0); + if (ret == -ENODEV) return ret; + STARPU_CHECK_RETURN_VALUE(ret, "TASK_INSERT"); + } + + unsigned block; + for (block = 0; block < nb; block++) + { + ret = TASK_INSERT(&dot_kernel_cl, + use_reduction?STARPU_REDUX:STARPU_RW, s, + STARPU_R, GET_VECTOR_BLOCK(v1, block), + STARPU_R, GET_VECTOR_BLOCK(v2, block), + STARPU_TAG_ONLY, (starpu_tag_t) block, + 0); + STARPU_CHECK_RETURN_VALUE(ret, "TASK_INSERT"); + } + return 0; +} + +/* + * SCAL kernel : v1 = p1 v1 + */ + +#ifdef STARPU_USE_CUDA +static void scal_kernel_cuda(void *descr[], void *cl_arg) +{ + TYPE p1; + starpu_codelet_unpack_args(cl_arg, &p1); + + TYPE *v1 = (TYPE *)STARPU_VECTOR_GET_PTR(descr[0]); + unsigned nx = STARPU_VECTOR_GET_NX(descr[0]); + + /* v1 = p1 v1 */ + TYPE alpha = p1; + cublasStatus_t status = cublasscal(starpu_cublas_get_local_handle(), nx, &alpha, v1, 1); + if (status != CUBLAS_STATUS_SUCCESS) + STARPU_CUBLAS_REPORT_ERROR(status); +} +#endif + +void scal_kernel_cpu(void *descr[], void *cl_arg) +{ + TYPE alpha; + starpu_codelet_unpack_args(cl_arg, &alpha); + + TYPE *v1 = (TYPE *)STARPU_VECTOR_GET_PTR(descr[0]); + unsigned nx = STARPU_VECTOR_GET_NX(descr[0]); + + /* v1 = alpha v1 */ + SCAL(nx, alpha, v1, 1); +} + +static struct starpu_perfmodel scal_kernel_model = +{ + .type = STARPU_HISTORY_BASED, + .symbol = "scal_kernel" +}; + +static struct starpu_codelet scal_kernel_cl = +{ + .can_execute = can_execute, + .cpu_funcs = {scal_kernel_cpu}, + .cpu_funcs_name = {"scal_kernel_cpu"}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {scal_kernel_cuda}, + .cuda_flags = {STARPU_CUDA_ASYNC}, +#endif + .nbuffers = 1, + .model = &scal_kernel_model, + .name = "scal_kernel" +}; + +/* + * GEMV kernel : v1 = p1 * v1 + p2 * M v2 + */ + +#ifdef STARPU_USE_CUDA +static void gemv_kernel_cuda(void *descr[], void *cl_arg) +{ + TYPE *v1 = (TYPE *)STARPU_VECTOR_GET_PTR(descr[0]); + TYPE *v2 = (TYPE *)STARPU_VECTOR_GET_PTR(descr[2]); + TYPE *M = (TYPE *)STARPU_MATRIX_GET_PTR(descr[1]); + + unsigned ld = STARPU_MATRIX_GET_LD(descr[1]); + unsigned nx = STARPU_MATRIX_GET_NX(descr[1]); + unsigned ny = STARPU_MATRIX_GET_NY(descr[1]); + + TYPE alpha, beta; + starpu_codelet_unpack_args(cl_arg, &beta, &alpha); + + /* Compute v1 = alpha M v2 + beta v1 */ + cublasStatus_t status = cublasgemv(starpu_cublas_get_local_handle(), + CUBLAS_OP_N, nx, ny, &alpha, M, ld, v2, 1, &beta, v1, 1); + if (status != CUBLAS_STATUS_SUCCESS) + STARPU_CUBLAS_REPORT_ERROR(status); +} +#endif + +void gemv_kernel_cpu(void *descr[], void *cl_arg) +{ + TYPE *v1 = (TYPE *)STARPU_VECTOR_GET_PTR(descr[0]); + TYPE *v2 = (TYPE *)STARPU_VECTOR_GET_PTR(descr[2]); + TYPE *M = (TYPE *)STARPU_MATRIX_GET_PTR(descr[1]); + + unsigned ld = STARPU_MATRIX_GET_LD(descr[1]); + unsigned nx = STARPU_MATRIX_GET_NX(descr[1]); + unsigned ny = STARPU_MATRIX_GET_NY(descr[1]); + + TYPE alpha, beta; + starpu_codelet_unpack_args(cl_arg, &beta, &alpha); + + int worker_size = starpu_combined_worker_get_size(); + + if (worker_size > 1) + { + /* Parallel CPU task */ + unsigned i = starpu_combined_worker_get_rank(); + + unsigned bs = (ny + worker_size - 1)/worker_size; + unsigned new_nx = STARPU_MIN(nx, bs*(i+1)) - bs*i; + + nx = new_nx; + v1 = &v1[bs*i]; + M = &M[bs*i]; + } + + /* Compute v1 = alpha M v2 + beta v1 */ + GEMV("N", nx, ny, alpha, M, ld, v2, 1, beta, v1, 1); +} + +static struct starpu_perfmodel gemv_kernel_model = +{ + .type = STARPU_HISTORY_BASED, + .symbol = "gemv_kernel" +}; + +static struct starpu_codelet gemv_kernel_cl = +{ + .can_execute = can_execute, + .type = STARPU_SPMD, + .max_parallelism = INT_MAX, + .cpu_funcs = {gemv_kernel_cpu}, + .cpu_funcs_name = {"gemv_kernel_cpu"}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {gemv_kernel_cuda}, + .cuda_flags = {STARPU_CUDA_ASYNC}, +#endif + .nbuffers = 3, + .model = &gemv_kernel_model, + .name = "gemv_kernel" +}; + +int gemv_kernel(HANDLE_TYPE_VECTOR v1, + HANDLE_TYPE_MATRIX matrix, + HANDLE_TYPE_VECTOR v2, + TYPE p1, TYPE p2, + unsigned nb) +{ + unsigned b1, b2; + int ret; + + for (b2 = 0; b2 < nb; b2++) + { + ret = TASK_INSERT(&scal_kernel_cl, + STARPU_RW, GET_VECTOR_BLOCK(v1, b2), + STARPU_VALUE, &p1, sizeof(p1), + STARPU_TAG_ONLY, (starpu_tag_t) b2, + 0); + if (ret == -ENODEV) return ret; + STARPU_CHECK_RETURN_VALUE(ret, "TASK_INSERT"); + } + + for (b2 = 0; b2 < nb; b2++) + { + for (b1 = 0; b1 < nb; b1++) + { + TYPE one = 1.0; + ret = TASK_INSERT(&gemv_kernel_cl, + use_reduction?STARPU_REDUX:STARPU_RW, GET_VECTOR_BLOCK(v1, b2), + STARPU_R, GET_MATRIX_BLOCK(matrix, b2, b1), + STARPU_R, GET_VECTOR_BLOCK(v2, b1), + STARPU_VALUE, &one, sizeof(one), + STARPU_VALUE, &p2, sizeof(p2), + STARPU_TAG_ONLY, ((starpu_tag_t)b2) * nb + b1, + 0); + STARPU_CHECK_RETURN_VALUE(ret, "TASK_INSERT"); + } + } + return 0; +} + +/* + * AXPY + SCAL kernel : v1 = p1 * v1 + p2 * v2 + */ +#ifdef STARPU_USE_CUDA +static void scal_axpy_kernel_cuda(void *descr[], void *cl_arg) +{ + TYPE p1, p2; + starpu_codelet_unpack_args(cl_arg, &p1, &p2); + + TYPE *v1 = (TYPE *)STARPU_VECTOR_GET_PTR(descr[0]); + TYPE *v2 = (TYPE *)STARPU_VECTOR_GET_PTR(descr[1]); + + unsigned nx = STARPU_VECTOR_GET_NX(descr[0]); + + /* Compute v1 = p1 * v1 + p2 * v2. + * v1 = p1 v1 + * v1 = v1 + p2 v2 + */ + cublasStatus_t status; + status = cublasscal(starpu_cublas_get_local_handle(), nx, &p1, v1, 1); + if (status != CUBLAS_STATUS_SUCCESS) + STARPU_CUBLAS_REPORT_ERROR(status); + status = cublasaxpy(starpu_cublas_get_local_handle(), nx, &p2, v2, 1, v1, 1); + if (status != CUBLAS_STATUS_SUCCESS) + STARPU_CUBLAS_REPORT_ERROR(status); +} +#endif + +void scal_axpy_kernel_cpu(void *descr[], void *cl_arg) +{ + TYPE p1, p2; + starpu_codelet_unpack_args(cl_arg, &p1, &p2); + + TYPE *v1 = (TYPE *)STARPU_VECTOR_GET_PTR(descr[0]); + TYPE *v2 = (TYPE *)STARPU_VECTOR_GET_PTR(descr[1]); + + unsigned nx = STARPU_VECTOR_GET_NX(descr[0]); + + /* Compute v1 = p1 * v1 + p2 * v2. + * v1 = p1 v1 + * v1 = v1 + p2 v2 + */ + SCAL(nx, p1, v1, 1); + AXPY(nx, p2, v2, 1, v1, 1); +} + +static struct starpu_perfmodel scal_axpy_kernel_model = +{ + .type = STARPU_HISTORY_BASED, + .symbol = "scal_axpy_kernel" +}; + +static struct starpu_codelet scal_axpy_kernel_cl = +{ + .can_execute = can_execute, + .cpu_funcs = {scal_axpy_kernel_cpu}, + .cpu_funcs_name = {"scal_axpy_kernel_cpu"}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {scal_axpy_kernel_cuda}, + .cuda_flags = {STARPU_CUDA_ASYNC}, +#endif + .nbuffers = 2, + .model = &scal_axpy_kernel_model, + .name = "scal_axpy_kernel" +}; + +int scal_axpy_kernel(HANDLE_TYPE_VECTOR v1, TYPE p1, + HANDLE_TYPE_VECTOR v2, TYPE p2, + unsigned nb) +{ + unsigned block; + for (block = 0; block < nb; block++) + { + int ret; + ret = TASK_INSERT(&scal_axpy_kernel_cl, + STARPU_RW, GET_VECTOR_BLOCK(v1, block), + STARPU_R, GET_VECTOR_BLOCK(v2, block), + STARPU_VALUE, &p1, sizeof(p1), + STARPU_VALUE, &p2, sizeof(p2), + STARPU_TAG_ONLY, (starpu_tag_t) block, + 0); + if (ret == -ENODEV) return ret; + STARPU_CHECK_RETURN_VALUE(ret, "TASK_INSERT"); + } + return 0; +} + + +/* + * AXPY kernel : v1 = v1 + p1 * v2 + */ +#ifdef STARPU_USE_CUDA +static void axpy_kernel_cuda(void *descr[], void *cl_arg) +{ + TYPE p1; + starpu_codelet_unpack_args(cl_arg, &p1); + + TYPE *v1 = (TYPE *)STARPU_VECTOR_GET_PTR(descr[0]); + TYPE *v2 = (TYPE *)STARPU_VECTOR_GET_PTR(descr[1]); + + unsigned nx = STARPU_VECTOR_GET_NX(descr[0]); + + /* Compute v1 = v1 + p1 * v2. + */ + cublasStatus_t status = cublasaxpy(starpu_cublas_get_local_handle(), + nx, &p1, v2, 1, v1, 1); + if (status != CUBLAS_STATUS_SUCCESS) + STARPU_CUBLAS_REPORT_ERROR(status); +} +#endif + +void axpy_kernel_cpu(void *descr[], void *cl_arg) +{ + TYPE p1; + starpu_codelet_unpack_args(cl_arg, &p1); + + TYPE *v1 = (TYPE *)STARPU_VECTOR_GET_PTR(descr[0]); + TYPE *v2 = (TYPE *)STARPU_VECTOR_GET_PTR(descr[1]); + + unsigned nx = STARPU_VECTOR_GET_NX(descr[0]); + + /* Compute v1 = p1 * v1 + p2 * v2. + */ + AXPY(nx, p1, v2, 1, v1, 1); +} + +static struct starpu_perfmodel axpy_kernel_model = +{ + .type = STARPU_HISTORY_BASED, + .symbol = "axpy_kernel" +}; + +static struct starpu_codelet axpy_kernel_cl = +{ + .can_execute = can_execute, + .cpu_funcs = {axpy_kernel_cpu}, + .cpu_funcs_name = {"axpy_kernel_cpu"}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {axpy_kernel_cuda}, + .cuda_flags = {STARPU_CUDA_ASYNC}, +#endif + .nbuffers = 2, + .model = &axpy_kernel_model, + .name = "axpy_kernel" +}; + +int axpy_kernel(HANDLE_TYPE_VECTOR v1, + HANDLE_TYPE_VECTOR v2, TYPE p1, + unsigned nb) +{ + unsigned block; + for (block = 0; block < nb; block++) + { + int ret; + ret = TASK_INSERT(&axpy_kernel_cl, + STARPU_RW, GET_VECTOR_BLOCK(v1, block), + STARPU_R, GET_VECTOR_BLOCK(v2, block), + STARPU_VALUE, &p1, sizeof(p1), + STARPU_TAG_ONLY, (starpu_tag_t) block, + 0); + if (ret == -ENODEV) return ret; + STARPU_CHECK_RETURN_VALUE(ret, "TASK_INSERT"); + } + return 0; +} + + +/* + * Main loop + */ +int cg(void) +{ + TYPE delta_new, delta_0, error, delta_old, alpha, beta; + double start, end, timing; + int i = 0, ret; + + /* r <- b */ + ret = copy_handle(r_handle, b_handle, nblocks); + if (ret == -ENODEV) return ret; + + /* r <- r - A x */ + ret = gemv_kernel(r_handle, A_handle, x_handle, 1.0, -1.0, nblocks); + if (ret == -ENODEV) return ret; + + /* d <- r */ + ret = copy_handle(d_handle, r_handle, nblocks); + if (ret == -ENODEV) return ret; + + /* delta_new = dot(r,r) */ + ret = dot_kernel(r_handle, r_handle, rtr_handle, nblocks); + if (ret == -ENODEV) return ret; + + GET_DATA_HANDLE(rtr_handle); + starpu_data_acquire(rtr_handle, STARPU_R); + delta_new = rtr; + delta_0 = delta_new; + starpu_data_release(rtr_handle); + + FPRINTF_SERVER(stderr, "Delta limit: %e\n", (double) (eps*eps*delta_0)); + + FPRINTF_SERVER(stderr, "**************** INITIAL ****************\n"); + FPRINTF_SERVER(stderr, "Delta 0: %e\n", delta_new); + + BARRIER(); + start = starpu_timing_now(); + + while ((i < i_max) && ((double)delta_new > (double)(eps*eps*delta_0))) + { + starpu_iteration_push(i); + + /* q <- A d */ + gemv_kernel(q_handle, A_handle, d_handle, 0.0, 1.0, nblocks); + + /* dtq <- dot(d,q) */ + dot_kernel(d_handle, q_handle, dtq_handle, nblocks); + + /* alpha = delta_new / dtq */ + GET_DATA_HANDLE(dtq_handle); + starpu_data_acquire(dtq_handle, STARPU_R); + alpha = delta_new / dtq; + starpu_data_release(dtq_handle); + + /* x <- x + alpha d */ + axpy_kernel(x_handle, d_handle, alpha, nblocks); + + if ((i % 50) == 0) + { + /* r <- b */ + copy_handle(r_handle, b_handle, nblocks); + + /* r <- r - A x */ + gemv_kernel(r_handle, A_handle, x_handle, 1.0, -1.0, nblocks); + } + else + { + /* r <- r - alpha q */ + axpy_kernel(r_handle, q_handle, -alpha, nblocks); + } + + /* delta_new = dot(r,r) */ + dot_kernel(r_handle, r_handle, rtr_handle, nblocks); + + GET_DATA_HANDLE(rtr_handle); + starpu_data_acquire(rtr_handle, STARPU_R); + delta_old = delta_new; + delta_new = rtr; + beta = delta_new / delta_old; + starpu_data_release(rtr_handle); + + /* d <- beta d + r */ + scal_axpy_kernel(d_handle, beta, r_handle, 1.0, nblocks); + + if ((i % 10) == 0) + { + /* We here take the error as ||r||_2 / (n||b||_2) */ + error = sqrt(delta_new/delta_0)/(1.0*n); + FPRINTF_SERVER(stderr, "*****************************************\n"); + FPRINTF_SERVER(stderr, "iter %d DELTA %e - %e\n", i, delta_new, error); + } + + starpu_iteration_pop(); + i++; + } + + BARRIER(); + end = starpu_timing_now(); + timing = end - start; + + error = sqrt(delta_new/delta_0)/(1.0*n); + FPRINTF_SERVER(stderr, "*****************************************\n"); + FPRINTF_SERVER(stderr, "iter %d DELTA %e - %e\n", i, delta_new, error); + FPRINTF_SERVER(stderr, "Total timing : %2.2f seconds\n", timing/1e6); + FPRINTF_SERVER(stderr, "Seconds per iteration : %2.2e seconds\n", timing/1e6/i); + FPRINTF_SERVER(stderr, "Number of iterations per second : %2.2e it/s\n", i/(timing/1e6)); + + return 0; +} + + +void parse_common_args(int argc, char **argv) +{ + int i; + for (i = 1; i < argc; i++) + { + if (strcmp(argv[i], "-n") == 0) + { + n = (int long long)atoi(argv[++i]); + continue; + } + + if (strcmp(argv[i], "-display-result") == 0) + { + display_result = 1; + continue; + } + + if (strcmp(argv[i], "-maxiter") == 0) + { + i_max = atoi(argv[++i]); + if (i_max <= 0) + { + FPRINTF_SERVER(stderr, "the number of iterations must be positive, not %d\n", i_max); + exit(EXIT_FAILURE); + } + continue; + } + + if (strcmp(argv[i], "-nblocks") == 0) + { + nblocks = atoi(argv[++i]); + continue; + } + + if (strcmp(argv[i], "-no-reduction") == 0) + { + use_reduction = 0; + continue; + } + } +} diff --git a/examples/cholesky/cholesky.h b/examples/cholesky/cholesky.h new file mode 100644 index 0000000..e362ab6 --- /dev/null +++ b/examples/cholesky/cholesky.h @@ -0,0 +1,307 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Thibaut Lambert + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __DW_CHOLESKY_H__ +#define __DW_CHOLESKY_H__ + +#include +#include +#include +#ifdef STARPU_USE_CUDA +#include +#include +#endif + +#include +#include + +#ifdef STARPU_HAVE_VALGRIND_H +#include +#endif + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) +#define PRINTF(fmt, ...) do { if (!getenv("STARPU_SSILENT")) {printf(fmt, ## __VA_ARGS__); }} while(0) +#define NMAXBLOCKS 128 + +#define TAG_POTRF(k) ((starpu_tag_t)((1ULL<<60) | (unsigned long long)(k))) +#define TAG_TRSM(k,j) ((starpu_tag_t)(((3ULL<<60) | (((unsigned long long)(k))<<32) \ + | (unsigned long long)(j)))) +#define TAG_GEMM(k,i,j) ((starpu_tag_t)(((4ULL<<60) | ((unsigned long long)(k)<<32) \ + | ((unsigned long long)(i)<<16) \ + | (unsigned long long)(j)))) + +#define TAG_POTRF_AUX(k, prefix) ((starpu_tag_t)((((unsigned long long)(prefix))<<60) | (1ULL<<56) | (unsigned long long)(k))) +#define TAG_TRSM_AUX(k,j, prefix) ((starpu_tag_t)((((unsigned long long)(prefix))<<60) \ + | ((3ULL<<56) | (((unsigned long long)(k))<<32) \ + | (unsigned long long)(j)))) +#define TAG_GEMM_AUX(k,i,j, prefix) ((starpu_tag_t)((((unsigned long long)(prefix))<<60) \ + | ((4ULL<<56) | ((unsigned long long)(k)<<32) \ + | ((unsigned long long)(i)<<16) \ + | (unsigned long long)(j)))) + +#define BLOCKSIZE (size_p/nblocks_p) + +#define BLAS3_FLOP(n1,n2,n3) \ + (2*((uint64_t)n1)*((uint64_t)n2)*((uint64_t)n3)) + +/* This is from magma + + -- Innovative Computing Laboratory + -- Electrical Engineering and Computer Science Department + -- University of Tennessee + -- (C) Copyright 2009 + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the University of Tennessee, Knoxville nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + */ + +#define FMULS_POTRF(__n) ((double)(__n) * (((1. / 6.) * (double)(__n) + 0.5) * (double)(__n) + (1. / 3.))) +#define FADDS_POTRF(__n) ((double)(__n) * (((1. / 6.) * (double)(__n)) * (double)(__n) - (1. / 6.))) + +#define FLOPS_SPOTRF(__n) (FMULS_POTRF((__n)) + FADDS_POTRF((__n))) + +#define FMULS_TRMM_2(__m, __n) (0.5 * (double)(__n) * (double)(__m) * ((double)(__m)+1.)) +#define FADDS_TRMM_2(__m, __n) (0.5 * (double)(__n) * (double)(__m) * ((double)(__m)-1.)) + +#define FMULS_TRMM(__m, __n) (/*((__side) == PlasmaLeft) ? FMULS_TRMM_2((__m), (__n)) :*/ FMULS_TRMM_2((__n), (__m))) +#define FADDS_TRMM(__m, __n) (/*((__side) == PlasmaLeft) ? FADDS_TRMM_2((__m), (__n)) :*/ FADDS_TRMM_2((__n), (__m))) + +#define FMULS_TRSM FMULS_TRMM +#define FADDS_TRSM FMULS_TRMM + +#define FLOPS_STRSM(__m, __n) (FMULS_TRSM((__m), (__n)) + FADDS_TRSM((__m), (__n))) + + +#define FMULS_SYRK(__k, __n) (0.5 * (double)(__k) * (double)(__n) * ((double)(__n)+1.)) +#define FADDS_SYRK(__k, __n) (0.5 * (double)(__k) * (double)(__n) * ((double)(__n)+1.)) + +#define FLOPS_SSYRK(__k, __n) (FMULS_SYRK((__k), (__n)) + FADDS_SYRK((__k), (__n))) + + +#define FMULS_GEMM(__m, __n, __k) ((double)(__m) * (double)(__n) * (double)(__k)) +#define FADDS_GEMM(__m, __n, __k) ((double)(__m) * (double)(__n) * (double)(__k)) + +#define FLOPS_SGEMM(__m, __n, __k) (FMULS_GEMM((__m), (__n), (__k)) + FADDS_GEMM((__m), (__n), (__k))) + +/* End of magma code */ + +static unsigned size_p; +static unsigned nblocks_p; +static unsigned nbigblocks_p; + +static inline void init_sizes(void) +{ + int power = starpu_cpu_worker_get_count() + 32 * starpu_cuda_worker_get_count(); + int power_cbrt = cbrt(power); +#ifndef STARPU_LONG_CHECK + power_cbrt /= 2; +#endif + + if (power_cbrt < 1) + power_cbrt = 1; + +#ifdef STARPU_QUICK_CHECK + if (!size_p) + size_p = 320*2*power_cbrt; + if (!nblocks_p) + nblocks_p = 2*power_cbrt; + if (!nbigblocks_p) + nbigblocks_p = power_cbrt; +#else + if (!size_p) + size_p = 960*8*power_cbrt; + if (!nblocks_p) + nblocks_p = 8*power_cbrt; + if (!nbigblocks_p) + nbigblocks_p = 4*power_cbrt; +#endif +} + +static unsigned pinned_p = 1; +static unsigned noprio_p = 0; +static unsigned check_p = 0; +static unsigned bound_p = 0; +static unsigned bound_deps_p = 0; +static unsigned bound_lp_p = 0; +static unsigned bound_mps_p = 0; +static unsigned with_ctxs_p = 0; +static unsigned with_noctxs_p = 0; +static unsigned chole1_p = 0; +static unsigned chole2_p = 0; + +extern struct starpu_perfmodel chol_model_potrf; +extern struct starpu_perfmodel chol_model_trsm; +extern struct starpu_perfmodel chol_model_syrk; +extern struct starpu_perfmodel chol_model_gemm; + +extern struct starpu_codelet cl_potrf; +extern struct starpu_codelet cl_trsm; +extern struct starpu_codelet cl_syrk; +extern struct starpu_codelet cl_gemm; +extern struct starpu_codelet cl_potrf_gpu; +extern struct starpu_codelet cl_trsm_gpu; +extern struct starpu_codelet cl_syrk_gpu; +extern struct starpu_codelet cl_gemm_gpu; +extern struct starpu_codelet cl_potrf_cpu; +extern struct starpu_codelet cl_trsm_cpu; +extern struct starpu_codelet cl_syrk_cpu; +extern struct starpu_codelet cl_gemm_cpu; + +extern starpu_data_handle_t scratch; +extern starpu_data_handle_t devInfo; + +void cholesky_kernel_init(int nb); +void cholesky_kernel_fini(void); + +void chol_cpu_codelet_update_potrf(void **, void *); +void chol_cpu_codelet_update_trsm(void **, void *); +void chol_cpu_codelet_update_syrk(void **, void *); +void chol_cpu_codelet_update_gemm(void **, void *); + +double cpu_chol_task_potrf_cost(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl); +double cpu_chol_task_trsm_cost(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl); +double cpu_chol_task_syrk_cost(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl); +double cpu_chol_task_gemm_cost(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl); + +#ifdef STARPU_USE_CUDA +void chol_cublas_codelet_update_potrf(void *descr[], void *_args); +void chol_cublas_codelet_update_trsm(void *descr[], void *_args); +void chol_cublas_codelet_update_syrk(void *descr[], void *_args); +void chol_cublas_codelet_update_gemm(void *descr[], void *_args); + +double cuda_chol_task_potrf_cost(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl); +double cuda_chol_task_trsm_cost(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl); +double cuda_chol_task_syrk_cost(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl); +double cuda_chol_task_gemm_cost(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl); +#endif + +void initialize_chol_model(struct starpu_perfmodel* model, char* symbol, + double (*cpu_cost_function)(struct starpu_task *, struct starpu_perfmodel_arch*, unsigned), + double (*cuda_cost_function)(struct starpu_task *, struct starpu_perfmodel_arch*, unsigned)); + +static void parse_args(int argc, char **argv) +{ + int i; + for (i = 1; i < argc; i++) + { + if (strcmp(argv[i], "-with_ctxs") == 0) + { + with_ctxs_p = 1; + break; + } + else if (strcmp(argv[i], "-with_noctxs") == 0) + { + with_noctxs_p = 1; + break; + } + else if (strcmp(argv[i], "-chole1") == 0) + { + chole1_p = 1; + break; + } + else if (strcmp(argv[i], "-chole2") == 0) + { + chole2_p = 1; + break; + } + else if (strcmp(argv[i], "-size") == 0) + { + char *argptr; + size_p = strtol(argv[++i], &argptr, 10); + } + else if (strcmp(argv[i], "-nblocks") == 0) + { + char *argptr; + nblocks_p = strtol(argv[++i], &argptr, 10); + } + else if (strcmp(argv[i], "-nbigblocks") == 0) + { + char *argptr; + nbigblocks_p = strtol(argv[++i], &argptr, 10); + } + else if (strcmp(argv[i], "-no-pin") == 0) + { + pinned_p = 0; + } + else if (strcmp(argv[i], "-no-prio") == 0) + { + noprio_p = 1; + } + else if (strcmp(argv[i], "-commute") == 0) + { + cl_syrk.modes[1] |= STARPU_COMMUTE; + cl_gemm.modes[2] |= STARPU_COMMUTE; + } + else if (strcmp(argv[i], "-bound") == 0) + { + bound_p = 1; + } + else if (strcmp(argv[i], "-bound-lp") == 0) + { + bound_lp_p = 1; + } + else if (strcmp(argv[i], "-bound-mps") == 0) + { + bound_mps_p = 1; + } + else if (strcmp(argv[i], "-bound-deps") == 0) + { + bound_deps_p = 1; + } + else if (strcmp(argv[i], "-check") == 0) + { + check_p = 1; + } + else + /* if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i],"--help") == 0) */ + { + fprintf(stderr,"usage : %s [-size size] [-nblocks nblocks] [-no-pin] [-no-prio] [-bound] [-bound-deps] [-bound-lp] [-check]\n", argv[0]); + fprintf(stderr,"Currently selected: %ux%u and %ux%u blocks\n", size_p, size_p, nblocks_p, nblocks_p); + exit(0); + } + } + +#ifdef STARPU_HAVE_VALGRIND_H + if (RUNNING_ON_VALGRIND) + size_p = 16; +#endif +} + +#endif /* __DW_CHOLESKY_H__ */ diff --git a/examples/cholesky/cholesky.sh b/examples/cholesky/cholesky.sh new file mode 100755 index 0000000..40abad2 --- /dev/null +++ b/examples/cholesky/cholesky.sh @@ -0,0 +1,89 @@ +#!/bin/bash +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2018-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +ROOT=${0%.sh} +[ -z "$STARPU_SCHED" ] || STARPU_SCHEDS="$STARPU_SCHED" +#[ -n "$STARPU_SCHEDS" ] || STARPU_SCHEDS=`$(dirname $0)/../../tools/starpu_sched_display` +[ -n "$STARPU_SCHEDS" ] || STARPU_SCHEDS="dmdas modular-dmdas modular-heft2 modular-heft modular-heft-prio modular-heteroprio dmdap dmdar dmda dmdasd modular-dmdap modular-dmdar modular-dmda prio lws" +[ -n "$STARPU_HOSTNAME" ] || export STARPU_HOSTNAME=mirage +unset MALLOC_PERTURB_ + +INCR=2 +STOP=32 + +if [ -n "$STARPU_SIMGRID" ] +then + INCR=4 + STOP=14 + # These use the thread factory, and are thus much longer + if [ -n "$STARPU_QUICK_CHECK" ] + then + INCR=8 + STOP=10 + fi + if [ -n "$STARPU_LONG_CHECK" ] + then + INCR=4 + STOP=32 + fi +fi + +( +echo -n "#" +for STARPU_SCHED in $STARPU_SCHEDS ; do + echo -n " $STARPU_SCHED" +done +echo + +$MS_LAUNCHER $STARPU_LAUNCH ${ROOT}_implicit -size $((10 * 960)) -nblocks 10 -check +[ $? = 0 ] || exit 1 + +for size in `seq 2 $INCR $STOP` ; do + echo -n "$((size * 960))" + for STARPU_SCHED in $STARPU_SCHEDS + do + export STARPU_SCHED + GFLOPS=`$MS_LAUNCHER $STARPU_LAUNCH ${ROOT}_implicit -size $((size * 960)) -nblocks $size 2> /dev/null | grep -v GFlop/s | cut -d ' ' -f 3` + [ -n "$GFLOPS" ] || GFLOPS='""' + echo -n " $GFLOPS" + done + echo +done +) | tee cholesky.output + +[ -n "$TERMINAL" ] || TERMINAL=eps +[ -n "$OUTFILE" ] || OUTFILE=cholesky.eps +cat > cholesky.gp << EOF +set terminal $TERMINAL +set output "$OUTFILE" +set key top left +set xlabel "size" +set ylabel "GFlop/s" +plot \\ +EOF + +N=2 +COMMA="" +for STARPU_SCHED in $STARPU_SCHEDS +do + echo "$COMMA'cholesky.output' using 1:$N with lines title '$STARPU_SCHED' \\" >> cholesky.gp + N=$(($N + 1)) + COMMA=", " +done +gnuplot cholesky.gp +#gv $OUTFILE +true diff --git a/examples/cholesky/cholesky_compil.c b/examples/cholesky/cholesky_compil.c new file mode 100644 index 0000000..0790d6c --- /dev/null +++ b/examples/cholesky/cholesky_compil.c @@ -0,0 +1,434 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Thibaut Lambert + * Copyright (C) 2010-2010 Mehdi Juhoor + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * This version of the Cholesky factorization can include an + * externally-compiler-generated loop nest, which allows to play with + * compiler-side optimizations. + */ + +/* Note: this is using fortran ordering, i.e. column-major ordering, i.e. + * elements with consecutive row number are consecutive in memory */ + +#include "cholesky.h" +#include "../sched_ctx_utils/sched_ctx_utils.h" +#include + +#if defined(STARPU_USE_CUDA) && defined(STARPU_HAVE_MAGMA) +#include "magma.h" +#endif + +#include "starpu_cusolver.h" + +/* + * code to bootstrap the factorization + * and construct the DAG + */ + +static void callback_turn_spmd_on(void *arg) +{ + (void)arg; + cl_gemm.type = STARPU_SPMD; +} + +static int _cholesky(starpu_data_handle_t dataA, unsigned nblocks) +{ + double start; + double end; + + unsigned long nelems = starpu_matrix_get_nx(dataA); + unsigned long nn = nelems/nblocks; + unsigned unbound_prio = STARPU_MAX_PRIO == INT_MAX && STARPU_MIN_PRIO == INT_MIN; + + if (bound_p || bound_lp_p || bound_mps_p) + starpu_bound_start(bound_deps_p, 0); + starpu_fxt_start_profiling(); + + start = starpu_timing_now(); + +#define min(x,y) (x m) + { + mat[m+n*size] = 0.0f; /* debug */ + } + } + } + float *test_mat = malloc((size_t)size*size*sizeof(float)); + STARPU_ASSERT(test_mat); + + STARPU_SSYRK("L", "N", size, size, 1.0f, + mat, size, 0.0f, test_mat, size); + + FPRINTF(stderr, "comparing results ...\n"); +#ifdef PRINT_OUTPUT + for (m = 0; m < size; m++) + { + for (n = 0; n < size; n++) + { + if (n <= m) + { + FPRINTF(stdout, "%2.2f\t", test_mat[m +n*size]); + } + else + { + FPRINTF(stdout, ".\t"); + } + } + FPRINTF(stdout, "\n"); + } +#endif + + for (m = 0; m < size; m++) + { + for (n = 0; n < size; n++) + { + if (n <= m) + { + float orig = (1.0f/(1.0f+m+n)) + ((m == n)?1.0f*size:0.0f); + float err = fabsf(test_mat[m +n*size] - orig) / orig; + if (err > 0.0001) + { + FPRINTF(stderr, "Error[%llu, %llu] --> %2.6f != %2.6f (err %2.6f)\n", m, n, test_mat[m +n*size], orig, err); + assert(0); + } + } + } + } + free(test_mat); + } + starpu_free_flags(mat, (size_t)size*size*sizeof(float), STARPU_MALLOC_PINNED|STARPU_MALLOC_SIMULATION_FOLDED|STARPU_MALLOC_SIMULATION_UNIQUE); +#endif +} + +int main(int argc, char **argv) +{ +#ifdef STARPU_HAVE_MAGMA + magma_init(); +#endif + + int ret; + ret = starpu_init(NULL); + if (ret == -ENODEV) return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + //starpu_fxt_stop_profiling(); + + init_sizes(); + + parse_args(argc, argv); + + if(with_ctxs_p || with_noctxs_p || chole1_p || chole2_p) + parse_args_ctx(argc, argv); + +#ifdef STARPU_USE_CUDA + initialize_chol_model(&chol_model_potrf,"chol_model_potrf",cpu_chol_task_potrf_cost,cuda_chol_task_potrf_cost); + initialize_chol_model(&chol_model_trsm,"chol_model_trsm",cpu_chol_task_trsm_cost,cuda_chol_task_trsm_cost); + initialize_chol_model(&chol_model_syrk,"chol_model_syrk",cpu_chol_task_syrk_cost,cuda_chol_task_syrk_cost); + initialize_chol_model(&chol_model_gemm,"chol_model_gemm",cpu_chol_task_gemm_cost,cuda_chol_task_gemm_cost); +#else + initialize_chol_model(&chol_model_potrf,"chol_model_potrf",cpu_chol_task_potrf_cost,NULL); + initialize_chol_model(&chol_model_trsm,"chol_model_trsm",cpu_chol_task_trsm_cost,NULL); + initialize_chol_model(&chol_model_syrk,"chol_model_syrk",cpu_chol_task_syrk_cost,NULL); + initialize_chol_model(&chol_model_gemm,"chol_model_gemm",cpu_chol_task_gemm_cost,NULL); +#endif + + starpu_cublas_init(); + starpu_cusolver_init(); + + if(with_ctxs_p) + { + construct_contexts(); + start_2benchs(execute_cholesky); + } + else if(with_noctxs_p) + start_2benchs(execute_cholesky); + else if(chole1_p) + start_1stbench(execute_cholesky); + else if(chole2_p) + start_2ndbench(execute_cholesky); + else + execute_cholesky(size_p, nblocks_p); + + starpu_cusolver_shutdown(); + starpu_cublas_shutdown(); + starpu_shutdown(); + + return 0; +} diff --git a/examples/cholesky/cholesky_compiled.c b/examples/cholesky/cholesky_compiled.c new file mode 100644 index 0000000..a0ff924 --- /dev/null +++ b/examples/cholesky/cholesky_compiled.c @@ -0,0 +1,36 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + + /* This is the base code, just like can be read in Chameleon */ + /* A source-to-source compiler can very easily produce this kind of + code, with rewritten loops etc */ + + unsigned k, m, n; + + for (k = 0; k < nblocks; k++) + { + POTRF(A(k,k), (2*nblocks - 2*k)); + + for (m = k+1; m < nblocks; m++) + TRSM(A(k,k), A(m,k), (2*nblocks - 2*k - m)); + + for (n = k+1; n < nblocks; n++) + { + SYRK(A(n,k), A(n, n), (2*nblocks - 2*k - n)); + for (m = n+1; m < nblocks; m++) + GEMM(A(m,k), A(n,k), A(m,n), (2*nblocks - 2*k - n - m)); + } + } diff --git a/examples/cholesky/cholesky_grain_tag.c b/examples/cholesky/cholesky_grain_tag.c new file mode 100644 index 0000000..c546753 --- /dev/null +++ b/examples/cholesky/cholesky_grain_tag.c @@ -0,0 +1,500 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Thibaut Lambert + * Copyright (C) 2010-2010 Mehdi Juhoor + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * This version of the Cholesky factorization uses explicit dependency + * declaration through dependency tags. + * It also uses data partitioning to split the matrix into submatrices. + * It also changes the partitioning during execution: when called first, + * cholesky_grain_rec splits the matrix with a big granularity (nblocks) and + * processes nbigblocks blocks, before calling itself again, to process the + * remainder of the matrix with a smaller granularity. + */ + +/* Note: this is using fortran ordering, i.e. column-major ordering, i.e. + * elements with consecutive row number are consecutive in memory */ + +#include "cholesky.h" + +#if defined(STARPU_USE_CUDA) && defined(STARPU_HAVE_MAGMA) +#include "magma.h" +#endif + +#include "starpu_cusolver.h" + +/* + * Some useful functions + */ + +static struct starpu_task *create_task(starpu_tag_t id) +{ + struct starpu_task *task = starpu_task_create(); + task->cl_arg = NULL; + task->use_tag = 1; + task->tag_id = id; + + return task; +} + +/* + * Create the codelets + */ + +static struct starpu_task * create_task_potrf(starpu_data_handle_t dataA, unsigned k, unsigned reclevel) +{ +/* FPRINTF(stdout, "task potrf k = %d TAG = %llx\n", k, (TAG_POTRF(k))); */ + + struct starpu_task *task = create_task(TAG_POTRF_AUX(k, reclevel)); + + task->cl = &cl_potrf; + + /* which sub-data is manipulated ? */ + task->handles[0] = starpu_data_get_sub_data(dataA, 2, k, k); + +#if defined(STARPU_USE_CUDA) && defined(STARPU_HAVE_LIBCUSOLVER) + /* Temporary data to save libcusolver from allocating/deallocating memory */ + task->handles[1] = scratch; + task->handles[2] = devInfo; +#endif + + /* this is an important task */ + if (!noprio_p) + task->priority = STARPU_MAX_PRIO; + + /* enforce dependencies ... */ + if (k > 0) + { + starpu_tag_declare_deps(TAG_POTRF_AUX(k, reclevel), 1, TAG_GEMM_AUX(k-1, k, k, reclevel)); + } + + int n = starpu_matrix_get_nx(task->handles[0]); + task->flops = FLOPS_SPOTRF(n); + + return task; +} + +static int create_task_trsm(starpu_data_handle_t dataA, unsigned k, unsigned m, unsigned reclevel) +{ + int ret; + + struct starpu_task *task = create_task(TAG_TRSM_AUX(k, m, reclevel)); + + task->cl = &cl_trsm; + + /* which sub-data is manipulated ? */ + task->handles[0] = starpu_data_get_sub_data(dataA, 2, k, k); + task->handles[1] = starpu_data_get_sub_data(dataA, 2, m, k); + + if (!noprio_p && (m == k+1)) + { + task->priority = STARPU_MAX_PRIO; + } + + /* enforce dependencies ... */ + if (k > 0) + { + starpu_tag_declare_deps(TAG_TRSM_AUX(k, m, reclevel), 2, TAG_POTRF_AUX(k, reclevel), TAG_GEMM_AUX(k-1, m, k, reclevel)); + } + else + { + starpu_tag_declare_deps(TAG_TRSM_AUX(k, m, reclevel), 1, TAG_POTRF_AUX(k, reclevel)); + } + + int nx = starpu_matrix_get_nx(task->handles[0]); + task->flops = FLOPS_STRSM(nx, nx); + + ret = starpu_task_submit(task); + if (ret != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + return ret; +} + +static int create_task_gemm(starpu_data_handle_t dataA, unsigned k, unsigned m, unsigned n, unsigned reclevel) +{ + int ret; + +/* FPRINTF(stdout, "task gemm k,n,m = %d,%d,%d TAG = %llx\nx", k,m,n, TAG_GEMM_AUX(k,m,n)); */ + + struct starpu_task *task = create_task(TAG_GEMM_AUX(k, m, n, reclevel)); + + if (m == n) + { + task->cl = &cl_syrk; + + /* which sub-data is manipulated ? */ + task->handles[0] = starpu_data_get_sub_data(dataA, 2, n, k); + task->handles[1] = starpu_data_get_sub_data(dataA, 2, n, n); + int nx = starpu_matrix_get_nx(task->handles[0]); + task->flops = FLOPS_SSYRK(nx, nx); + } + else + { + task->cl = &cl_gemm; + + /* which sub-data is manipulated ? */ + task->handles[0] = starpu_data_get_sub_data(dataA, 2, n, k); + task->handles[1] = starpu_data_get_sub_data(dataA, 2, m, k); + task->handles[2] = starpu_data_get_sub_data(dataA, 2, m, n); + int nx = starpu_matrix_get_nx(task->handles[0]); + task->flops = FLOPS_SGEMM(nx, nx, nx); + } + + if ((n == k + 1) && (m == k +1)) + { + task->priority = STARPU_MAX_PRIO; + } + + /* enforce dependencies ... */ + if (k > 0) + { + starpu_tag_declare_deps(TAG_GEMM_AUX(k, m, n, reclevel), 3, TAG_GEMM_AUX(k-1, m, n, reclevel), TAG_TRSM_AUX(k, n, reclevel), TAG_TRSM_AUX(k, m, reclevel)); + } + else + { + starpu_tag_declare_deps(TAG_GEMM_AUX(k, m, n, reclevel), 2, TAG_TRSM_AUX(k, n, reclevel), TAG_TRSM_AUX(k, m, reclevel)); + } + + ret = starpu_task_submit(task); + if (ret != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + return ret; +} + + + +/* + * code to bootstrap the factorization + * and construct the DAG + */ + +static int cholesky_grain_rec(float *matA, unsigned size, unsigned ld, unsigned nblocks, unsigned nbigblocks, unsigned reclevel) +{ + int ret; + + /* create a new codelet */ + struct starpu_task *entry_task = NULL; + + /* create all the DAG nodes */ + unsigned k, m, n; + + starpu_data_handle_t dataA; + + /* monitor and partition the A matrix into blocks : + * one block is now determined by 2 unsigned (i,j) */ + starpu_matrix_data_register(&dataA, STARPU_MAIN_RAM, (uintptr_t)matA, ld, size, size, sizeof(float)); + + starpu_data_set_sequential_consistency_flag(dataA, 0); + + /* Split into blocks of complete rows first */ + struct starpu_data_filter f = + { + .filter_func = starpu_matrix_filter_block, + .nchildren = nblocks + }; + + /* Then split rows into tiles */ + struct starpu_data_filter f2 = + { + /* Note: here "vertical" is for row-major, we are here using column-major. */ + .filter_func = starpu_matrix_filter_vertical_block, + .nchildren = nblocks + }; + + starpu_data_map_filters(dataA, 2, &f, &f2); + + cholesky_kernel_init(size / nblocks); + + for (k = 0; k < nbigblocks; k++) + { + starpu_iteration_push(k); + struct starpu_task *task = create_task_potrf(dataA, k, reclevel); + /* we defer the launch of the first task */ + if (k == 0) + { + entry_task = task; + } + else + { + ret = starpu_task_submit(task); + if (ret == -ENODEV) return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + for (m = k+1; m m) + { + mat[m+n*size_p] = 0.0f; /* debug */ + } + } + } + float *test_mat = malloc((size_t)size_p*size_p*sizeof(float)); + STARPU_ASSERT(test_mat); + + STARPU_SSYRK("L", "N", size_p, size_p, 1.0f, + mat, size_p, 0.0f, test_mat, size_p); + + FPRINTF(stderr, "comparing results ...\n"); +#ifdef PRINT_OUTPUT + for (m = 0; m < size_p; m++) + { + for (n = 0; n < size_p; n++) + { + if (n <= m) + { + FPRINTF(stdout, "%2.2f\t", test_mat[m +n*size_p]); + } + else + { + FPRINTF(stdout, ".\t"); + } + } + FPRINTF(stdout, "\n"); + } +#endif + + for (m = 0; m < size_p; m++) + { + for (n = 0; n < size_p; n++) + { + if (n <= m) + { + float orig = (1.0f/(1.0f+m+n)) + ((m == n)?1.0f*size_p:0.0f); + float err = fabsf(test_mat[m +n*size_p] - orig) / orig; + if (err > 0.0001) + { + FPRINTF(stderr, "Error[%llu, %llu] --> %2.6f != %2.6f (err %2.6f)\n", m, n, test_mat[m +n*size_p], orig, err); + assert(0); + } + } + } + } + free(test_mat); + } +#endif + + shutdown_system(&mat, size_p, pinned_p); + return ret; +} diff --git a/examples/cholesky/cholesky_implicit.c b/examples/cholesky/cholesky_implicit.c new file mode 100644 index 0000000..7654741 --- /dev/null +++ b/examples/cholesky/cholesky_implicit.c @@ -0,0 +1,415 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Thibaut Lambert + * Copyright (C) 2010-2010 Mehdi Juhoor + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * This version of the Cholesky factorization uses implicit dependency computation. + * The whole algorithm thus appears clearly in the task submission loop in _cholesky(). + */ + +/* Note: this is using fortran ordering, i.e. column-major ordering, i.e. + * elements with consecutive row number are consecutive in memory */ + +#include "cholesky.h" +#include "../sched_ctx_utils/sched_ctx_utils.h" + +#if defined(STARPU_USE_CUDA) && defined(STARPU_HAVE_MAGMA) +#include "magma.h" +#endif + +#include "starpu_cusolver.h" + +/* + * code to bootstrap the factorization + * and construct the DAG + */ + +static void callback_turn_spmd_on(void *arg) +{ + (void)arg; + cl_gemm.type = STARPU_SPMD; +} + +static int _cholesky(starpu_data_handle_t dataA, unsigned nblocks) +{ + double start; + double end; + + unsigned k,m,n; + unsigned long nx = starpu_matrix_get_nx(dataA); + unsigned long nn = nx/nblocks; + + unsigned unbound_prio = STARPU_MAX_PRIO == INT_MAX && STARPU_MIN_PRIO == INT_MIN; + + if (bound_p || bound_lp_p || bound_mps_p) + starpu_bound_start(bound_deps_p, 0); + starpu_fxt_start_profiling(); + + start = starpu_timing_now(); + + /* create all the DAG nodes */ + for (k = 0; k < nblocks; k++) + { + int ret; + starpu_iteration_push(k); + starpu_data_handle_t sdatakk = starpu_data_get_sub_data(dataA, 2, k, k); + + ret = starpu_task_insert(&cl_potrf, + STARPU_PRIORITY, noprio_p ? STARPU_DEFAULT_PRIO : unbound_prio ? (int)(2*nblocks - 2*k) : STARPU_MAX_PRIO, + STARPU_RW, sdatakk, +#if defined(STARPU_USE_CUDA) && defined(STARPU_HAVE_LIBCUSOLVER) + STARPU_SCRATCH, scratch, + STARPU_SCRATCH, devInfo, +#endif + STARPU_CALLBACK, (k == 3*nblocks/4)?callback_turn_spmd_on:NULL, + STARPU_FLOPS, (double) FLOPS_SPOTRF(nn), + STARPU_NAME, "POTRF", + STARPU_TAG_ONLY, TAG_POTRF(k), + 0); + if (ret == -ENODEV) return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + + for (m = k+1; m m) + { + mat[m+n*size] = 0.0f; /* debug */ + } + } + } + float *test_mat = malloc((size_t)size*size*sizeof(float)); + STARPU_ASSERT(test_mat); + + STARPU_SSYRK("L", "N", size, size, 1.0f, + mat, size, 0.0f, test_mat, size); + + FPRINTF(stderr, "comparing results ...\n"); +#ifdef PRINT_OUTPUT + for (m = 0; m < size; m++) + { + for (n = 0; n < size; n++) + { + if (n <= m) + { + FPRINTF(stdout, "%2.2f\t", test_mat[m +n*size]); + } + else + { + FPRINTF(stdout, ".\t"); + } + } + FPRINTF(stdout, "\n"); + } +#endif + + for (m = 0; m < size; m++) + { + for (n = 0; n < size; n++) + { + if (n <= m) + { + float orig = (1.0f/(1.0f+m+n)) + ((m == n)?1.0f*size:0.0f); + float err = fabsf(test_mat[m +n*size] - orig) / orig; + if (err > 0.0001) + { + FPRINTF(stderr, "Error[%llu, %llu] --> %2.6f != %2.6f (err %2.6f)\n", m, n, test_mat[m +n*size], orig, err); + assert(0); + } + } + } + } + free(test_mat); + } + starpu_free_flags(mat, (size_t)size*size*sizeof(float), STARPU_MALLOC_PINNED|STARPU_MALLOC_SIMULATION_FOLDED|STARPU_MALLOC_SIMULATION_UNIQUE); +#endif +} + +int main(int argc, char **argv) +{ +#ifdef STARPU_HAVE_MAGMA + magma_init(); +#endif + + int ret; + ret = starpu_init(NULL); + if (ret == -ENODEV) return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + //starpu_fxt_stop_profiling(); + + init_sizes(); + + parse_args(argc, argv); + + if(with_ctxs_p || with_noctxs_p || chole1_p || chole2_p) + parse_args_ctx(argc, argv); + +#ifdef STARPU_USE_CUDA + initialize_chol_model(&chol_model_potrf,"chol_model_potrf",cpu_chol_task_potrf_cost,cuda_chol_task_potrf_cost); + initialize_chol_model(&chol_model_trsm,"chol_model_trsm",cpu_chol_task_trsm_cost,cuda_chol_task_trsm_cost); + initialize_chol_model(&chol_model_syrk,"chol_model_syrk",cpu_chol_task_syrk_cost,cuda_chol_task_syrk_cost); + initialize_chol_model(&chol_model_gemm,"chol_model_gemm",cpu_chol_task_gemm_cost,cuda_chol_task_gemm_cost); +#else + initialize_chol_model(&chol_model_potrf,"chol_model_potrf",cpu_chol_task_potrf_cost,NULL); + initialize_chol_model(&chol_model_trsm,"chol_model_trsm",cpu_chol_task_trsm_cost,NULL); + initialize_chol_model(&chol_model_syrk,"chol_model_syrk",cpu_chol_task_syrk_cost,NULL); + initialize_chol_model(&chol_model_gemm,"chol_model_gemm",cpu_chol_task_gemm_cost,NULL); +#endif + + starpu_cublas_init(); + starpu_cusolver_init(); + + if(with_ctxs_p) + { + construct_contexts(); + start_2benchs(execute_cholesky); + } + else if(with_noctxs_p) + start_2benchs(execute_cholesky); + else if(chole1_p) + start_1stbench(execute_cholesky); + else if(chole2_p) + start_2ndbench(execute_cholesky); + else + execute_cholesky(size_p, nblocks_p); + + starpu_cusolver_shutdown(); + starpu_cublas_shutdown(); + starpu_shutdown(); + + return 0; +} diff --git a/examples/cholesky/cholesky_julia.sh b/examples/cholesky/cholesky_julia.sh new file mode 100755 index 0000000..190ae77 --- /dev/null +++ b/examples/cholesky/cholesky_julia.sh @@ -0,0 +1,19 @@ +#!/bin/bash +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +ROOT=${0%cholesky_julia.sh} +STARPU_SCHED_LIB=$ROOT/.libs/libmy_dmda.so STARPU_SCHED=mydm $ROOT/cholesky_tag diff --git a/examples/cholesky/cholesky_kernels.c b/examples/cholesky/cholesky_kernels.c new file mode 100644 index 0000000..72dee7d --- /dev/null +++ b/examples/cholesky/cholesky_kernels.c @@ -0,0 +1,582 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * Standard kernels for the Cholesky factorization + */ + +#include +#include "cholesky.h" +#include "../common/blas.h" +#if defined(STARPU_USE_CUDA) +#include +#include "starpu_cusolver.h" +#if defined(STARPU_HAVE_MAGMA) +#include "magma.h" +#include "magma_lapack.h" +#endif +#endif + +/* + * GEMM + */ + +#if defined(STARPU_USE_CUDA) +static const float p1 = 1.0; +static const float m1 = -1.0; +#endif + +starpu_data_handle_t scratch = NULL; +starpu_data_handle_t devInfo = NULL; + +static inline void chol_common_cpu_codelet_update_gemm(void *descr[], int s, void *_args) +{ + (void)_args; + /* printf("gemm\n"); */ + float *left = (float *)STARPU_MATRIX_GET_PTR(descr[0]); + float *right = (float *)STARPU_MATRIX_GET_PTR(descr[1]); + float *center = (float *)STARPU_MATRIX_GET_PTR(descr[2]); + + unsigned dx = STARPU_MATRIX_GET_NY(descr[2]); + unsigned dy = STARPU_MATRIX_GET_NX(descr[2]); + unsigned dz = STARPU_MATRIX_GET_NY(descr[0]); + + unsigned ld21 = STARPU_MATRIX_GET_LD(descr[0]); + unsigned ld12 = STARPU_MATRIX_GET_LD(descr[1]); + unsigned ld22 = STARPU_MATRIX_GET_LD(descr[2]); + + switch (s) + { + case 0: + { + /* CPU kernel */ + int worker_size = starpu_combined_worker_get_size(); + + if (worker_size == 1) + { + /* Sequential CPU kernel */ + STARPU_SGEMM("N", "T", dy, dx, dz, -1.0f, left, ld21, + right, ld12, 1.0f, center, ld22); + } + else + { + /* Parallel CPU kernel */ + unsigned rank = starpu_combined_worker_get_rank(); + + unsigned block_size = (dx + worker_size - 1)/worker_size; + unsigned new_dx = STARPU_MIN(dx, block_size*(rank+1)) - block_size*rank; + + float *new_left = &left[block_size*rank]; + float *new_center = ¢er[block_size*rank]; + + STARPU_SGEMM("N", "T", dy, new_dx, dz, -1.0f, new_left, ld21, + right, ld12, 1.0f, new_center, ld22); + } + break; + } +#ifdef STARPU_USE_CUDA + case 1: + { + /* CUDA kernel */ + cublasStatus_t status = cublasSgemm(starpu_cublas_get_local_handle(), + CUBLAS_OP_N, CUBLAS_OP_T, dy, dx, dz, + &m1, left, ld21, right, ld12, + &p1, center, ld22); + if (status != CUBLAS_STATUS_SUCCESS) + STARPU_CUBLAS_REPORT_ERROR(status); + + break; + } +#endif + default: + STARPU_ABORT(); + break; + } +} + +void chol_cpu_codelet_update_gemm(void *descr[], void *_args) +{ + chol_common_cpu_codelet_update_gemm(descr, 0, _args); +} + +#ifdef STARPU_USE_CUDA +void chol_cublas_codelet_update_gemm(void *descr[], void *_args) +{ + chol_common_cpu_codelet_update_gemm(descr, 1, _args); +} +#endif /* STARPU_USE_CUDA */ + +/* + * SYRK + */ + +static inline void chol_common_cpu_codelet_update_syrk(void *descr[], int s, void *_args) +{ + (void)_args; + /* printf("syrk\n"); */ + float *left = (float *)STARPU_MATRIX_GET_PTR(descr[0]); + float *center = (float *)STARPU_MATRIX_GET_PTR(descr[1]); + + unsigned dx = STARPU_MATRIX_GET_NY(descr[1]); + unsigned dz = STARPU_MATRIX_GET_NY(descr[0]); + + unsigned ld21 = STARPU_MATRIX_GET_LD(descr[0]); + unsigned ld22 = STARPU_MATRIX_GET_LD(descr[1]); + + switch (s) + { + case 0: + { + /* CPU kernel */ + STARPU_SSYRK("L", "N", dx, dz, -1.0f, left, ld21, + 1.0f, center, ld22); + break; + } +#ifdef STARPU_USE_CUDA + case 1: + { + /* CUDA kernel */ + cublasStatus_t status = cublasSsyrk(starpu_cublas_get_local_handle(), + CUBLAS_FILL_MODE_LOWER, CUBLAS_OP_N, dx, dz, + &m1, left, ld21, + &p1, center, ld22); + if (status != CUBLAS_STATUS_SUCCESS) + STARPU_CUBLAS_REPORT_ERROR(status); + break; + } +#endif + default: + STARPU_ABORT(); + break; + } +} + +void chol_cpu_codelet_update_syrk(void *descr[], void *_args) +{ + chol_common_cpu_codelet_update_syrk(descr, 0, _args); +} + +#ifdef STARPU_USE_CUDA +void chol_cublas_codelet_update_syrk(void *descr[], void *_args) +{ + chol_common_cpu_codelet_update_syrk(descr, 1, _args); +} +#endif /* STARPU_USE_CUDA */ + +/* + * TRSM + */ + +static inline void chol_common_codelet_update_trsm(void *descr[], int s, void *_args) +{ + (void)_args; +/* printf("trsm\n"); */ + float *sub11; + float *sub21; + + sub11 = (float *)STARPU_MATRIX_GET_PTR(descr[0]); + sub21 = (float *)STARPU_MATRIX_GET_PTR(descr[1]); + + unsigned ld11 = STARPU_MATRIX_GET_LD(descr[0]); + unsigned ld21 = STARPU_MATRIX_GET_LD(descr[1]); + + unsigned nx21 = STARPU_MATRIX_GET_NY(descr[1]); + unsigned ny21 = STARPU_MATRIX_GET_NX(descr[1]); + +#ifdef STARPU_USE_CUDA + cublasStatus_t status; +#endif + + switch (s) + { + case 0: + STARPU_STRSM("R", "L", "T", "N", nx21, ny21, 1.0f, sub11, ld11, sub21, ld21); + break; +#ifdef STARPU_USE_CUDA + case 1: + status = cublasStrsm(starpu_cublas_get_local_handle(), + CUBLAS_SIDE_RIGHT, CUBLAS_FILL_MODE_LOWER, CUBLAS_OP_T, CUBLAS_DIAG_NON_UNIT, + nx21, ny21, &p1, sub11, ld11, sub21, ld21); + if (status != CUBLAS_STATUS_SUCCESS) + STARPU_CUBLAS_REPORT_ERROR(status); + break; +#endif + default: + STARPU_ABORT(); + break; + } +} + +void chol_cpu_codelet_update_trsm(void *descr[], void *_args) +{ + chol_common_codelet_update_trsm(descr, 0, _args); +} + +#ifdef STARPU_USE_CUDA +void chol_cublas_codelet_update_trsm(void *descr[], void *_args) +{ + chol_common_codelet_update_trsm(descr, 1, _args); +} +#endif + +/* + * POTRF + */ + +static inline void chol_common_codelet_update_potrf(void *descr[], int s, void *_args) +{ + (void)_args; +/* printf("potrf\n"); */ + float *sub11; + + sub11 = (float *)STARPU_MATRIX_GET_PTR(descr[0]); + + unsigned nx = STARPU_MATRIX_GET_NY(descr[0]); + unsigned ld = STARPU_MATRIX_GET_LD(descr[0]); + + unsigned z; + + switch (s) + { + case 0: + +#ifdef STARPU_MKL + STARPU_SPOTRF("L", nx, sub11, ld); +#else + /* + * - alpha 11 <- lambda 11 = sqrt(alpha11) + * - alpha 21 <- l 21 = alpha 21 / lambda 11 + * - A22 <- A22 - l21 trans(l21) + */ + + for (z = 0; z < nx; z++) + { + float lambda11; + lambda11 = sqrt(sub11[z+z*ld]); + sub11[z+z*ld] = lambda11; + + STARPU_ASSERT(lambda11 != 0.0f); + + STARPU_SSCAL(nx - z - 1, 1.0f/lambda11, &sub11[(z+1)+z*ld], 1); + + STARPU_SSYR("L", nx - z - 1, -1.0f, + &sub11[(z+1)+z*ld], 1, + &sub11[(z+1)+(z+1)*ld], ld); + } +#endif + break; +#ifdef STARPU_USE_CUDA + case 1: +#ifdef STARPU_HAVE_LIBCUSOLVER + { + cusolverStatus_t sstatus; + float *workspace = (float *)STARPU_VARIABLE_GET_PTR(descr[1]); + int *d_info = (int *)STARPU_VARIABLE_GET_PTR(descr[2]); + int Lwork = STARPU_VARIABLE_GET_ELEMSIZE(descr[1]) / sizeof(float); + + sstatus = cusolverDnSpotrf(starpu_cusolverDn_get_local_handle(), CUBLAS_FILL_MODE_LOWER, nx, sub11, ld, workspace, Lwork, d_info); + if (sstatus != CUSOLVER_STATUS_SUCCESS) + STARPU_CUSOLVER_REPORT_ERROR(sstatus); + } +#elif defined(STARPU_HAVE_MAGMA) + { + int ret; + int info; +#if (MAGMA_VERSION_MAJOR > 1) || (MAGMA_VERSION_MAJOR == 1 && MAGMA_VERSION_MINOR >= 4) + cudaStream_t stream = starpu_cuda_get_local_stream(); + cublasSetKernelStream(stream); + magmablasSetKernelStream(stream); +#else + starpu_cublas_set_stream(); +#endif + ret = magma_spotrf_gpu(MagmaLower, nx, sub11, ld, &info); + if (ret != MAGMA_SUCCESS) + { + fprintf(stderr, "Error in Magma: %d\n", ret); + STARPU_ABORT(); + } +#if (MAGMA_VERSION_MAJOR > 1) || (MAGMA_VERSION_MAJOR == 1 && MAGMA_VERSION_MINOR >= 4) + cudaError_t cures = cudaStreamSynchronize(stream); +#else + cudaError_t cures = cudaDeviceSynchronize(); +#endif + STARPU_ASSERT(!cures); + } +#else + { + + float *lambda11; + cublasStatus_t status; + cudaStream_t stream = starpu_cuda_get_local_stream(); + cublasHandle_t handle = starpu_cublas_get_local_handle(); + cudaHostAlloc((void **)&lambda11, sizeof(float), 0); + + for (z = 0; z < nx; z++) + { + cudaMemcpyAsync(lambda11, &sub11[z+z*ld], sizeof(float), cudaMemcpyDeviceToHost, stream); + cudaStreamSynchronize(stream); + + STARPU_ASSERT(*lambda11 != 0.0f); + + *lambda11 = sqrt(*lambda11); + +/* cublasSetVector(1, sizeof(float), lambda11, sizeof(float), &sub11[z+z*ld], sizeof(float)); */ + cudaMemcpyAsync(&sub11[z+z*ld], lambda11, sizeof(float), cudaMemcpyHostToDevice, stream); + float scal = 1.0f/(*lambda11); + + status = cublasSscal(handle, + nx - z - 1, &scal, &sub11[(z+1)+z*ld], 1); + if (status != CUBLAS_STATUS_SUCCESS) + STARPU_CUBLAS_REPORT_ERROR(status); + + status = cublasSsyr(handle, + CUBLAS_FILL_MODE_UPPER, + nx - z - 1, &m1, + &sub11[(z+1)+z*ld], 1, + &sub11[(z+1)+(z+1)*ld], ld); + if (status != CUBLAS_STATUS_SUCCESS) + STARPU_CUBLAS_REPORT_ERROR(status); + } + + cudaStreamSynchronize(stream); + cudaFreeHost(lambda11); + } +#endif + break; +#endif + default: + STARPU_ABORT(); + break; + } +} + + +void chol_cpu_codelet_update_potrf(void *descr[], void *_args) +{ + chol_common_codelet_update_potrf(descr, 0, _args); +} + +#ifdef STARPU_USE_CUDA +void chol_cublas_codelet_update_potrf(void *descr[], void *_args) +{ + chol_common_codelet_update_potrf(descr, 1, _args); +} +#endif/* STARPU_USE_CUDA */ + +struct starpu_perfmodel chol_model_potrf; +struct starpu_perfmodel chol_model_trsm; +struct starpu_perfmodel chol_model_syrk; +struct starpu_perfmodel chol_model_gemm; + +struct starpu_codelet cl_potrf = +{ + .type = STARPU_SEQ, + .cpu_funcs = {chol_cpu_codelet_update_potrf}, + .cpu_funcs_name = {"chol_cpu_codelet_update_potrf"}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {chol_cublas_codelet_update_potrf}, +# if defined(STARPU_HAVE_LIBCUSOLVER) + .cuda_flags = {STARPU_CUDA_ASYNC}, +# endif +#elif defined(STARPU_SIMGRID) + .cuda_funcs = {(void*)1}, +#endif +#if defined(STARPU_USE_CUDA) && defined(STARPU_HAVE_LIBCUSOLVER) + .nbuffers = 3, +#else + .nbuffers = 1, +#endif + .modes = { STARPU_RW +#if defined(STARPU_USE_CUDA) && defined(STARPU_HAVE_LIBCUSOLVER) + , STARPU_SCRATCH | STARPU_NOFOOTPRINT + , STARPU_SCRATCH | STARPU_NOFOOTPRINT +#endif + }, + .model = &chol_model_potrf, + .color = 0xffff00, +}; + +struct starpu_codelet cl_trsm = +{ + .type = STARPU_SEQ, + .cpu_funcs = {chol_cpu_codelet_update_trsm}, + .cpu_funcs_name = {"chol_cpu_codelet_update_trsm"}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {chol_cublas_codelet_update_trsm}, +#elif defined(STARPU_SIMGRID) + .cuda_funcs = {(void*)1}, +#endif + .cuda_flags = {STARPU_CUDA_ASYNC}, + .nbuffers = 2, + .modes = { STARPU_R, STARPU_RW }, + .model = &chol_model_trsm, + .color = 0x8080ff, +}; + +struct starpu_codelet cl_syrk = +{ + .type = STARPU_SEQ, + .max_parallelism = INT_MAX, + .cpu_funcs = {chol_cpu_codelet_update_syrk}, + .cpu_funcs_name = {"chol_cpu_codelet_update_syrk"}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {chol_cublas_codelet_update_syrk}, +#elif defined(STARPU_SIMGRID) + .cuda_funcs = {(void*)1}, +#endif + .cuda_flags = {STARPU_CUDA_ASYNC}, + .nbuffers = 2, + .modes = { STARPU_R, STARPU_RW }, + .model = &chol_model_syrk, + .color = 0x00ff00, +}; + +struct starpu_codelet cl_gemm = +{ + .type = STARPU_SEQ, + .max_parallelism = INT_MAX, + .cpu_funcs = {chol_cpu_codelet_update_gemm}, + .cpu_funcs_name = {"chol_cpu_codelet_update_gemm"}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {chol_cublas_codelet_update_gemm}, +#elif defined(STARPU_SIMGRID) + .cuda_funcs = {(void*)1}, +#endif + .cuda_flags = {STARPU_CUDA_ASYNC}, + .nbuffers = 3, + .modes = { STARPU_R, STARPU_R, STARPU_RW }, + .model = &chol_model_gemm, + .color = 0x00c000, +}; + +struct starpu_codelet cl_potrf_gpu = +{ +#ifdef STARPU_USE_CUDA + .cuda_funcs = {chol_cublas_codelet_update_potrf}, +# if defined(STARPU_HAVE_LIBCUSOLVER) + .cuda_flags = {STARPU_CUDA_ASYNC}, +# endif +#elif defined(STARPU_SIMGRID) + .cuda_funcs = {(void*)1}, +#endif +#if defined(STARPU_USE_CUDA) && defined(STARPU_HAVE_LIBCUSOLVER) + .nbuffers = 3, +#else + .nbuffers = 1, +#endif + .modes = { STARPU_RW +#if defined(STARPU_USE_CUDA) && defined(STARPU_HAVE_LIBCUSOLVER) + , STARPU_SCRATCH | STARPU_NOFOOTPRINT + , STARPU_SCRATCH | STARPU_NOFOOTPRINT +#endif + }, + .model = &chol_model_potrf, + .color = 0xffff00, +}; + +struct starpu_codelet cl_trsm_gpu = +{ +#ifdef STARPU_USE_CUDA + .cuda_funcs = {chol_cublas_codelet_update_trsm}, +#elif defined(STARPU_SIMGRID) + .cuda_funcs = {(void*)1}, +#endif + .cuda_flags = {STARPU_CUDA_ASYNC}, + .nbuffers = 2, + .modes = { STARPU_R, STARPU_RW }, + .model = &chol_model_trsm, + .color = 0x8080ff, +}; + +struct starpu_codelet cl_gemm_gpu = +{ +#ifdef STARPU_USE_CUDA + .cuda_funcs = {chol_cublas_codelet_update_gemm}, +#elif defined(STARPU_SIMGRID) + .cuda_funcs = {(void*)1}, +#endif + .cuda_flags = {STARPU_CUDA_ASYNC}, + .nbuffers = 3, + .modes = { STARPU_R, STARPU_R, STARPU_RW }, + .model = &chol_model_gemm, + .color = 0x00ff00, +}; + +struct starpu_codelet cl_potrf_cpu = +{ + .type = STARPU_SEQ, + .cpu_funcs = {chol_cpu_codelet_update_potrf}, + .cpu_funcs_name = {"chol_cpu_codelet_update_potrf"}, +#if defined(STARPU_USE_CUDA) && defined(STARPU_HAVE_LIBCUSOLVER) + .nbuffers = 3, +#else + .nbuffers = 1, +#endif + .modes = { STARPU_RW +#if defined(STARPU_USE_CUDA) && defined(STARPU_HAVE_LIBCUSOLVER) + , STARPU_SCRATCH | STARPU_NOFOOTPRINT + , STARPU_SCRATCH | STARPU_NOFOOTPRINT +#endif + }, + .model = &chol_model_potrf, + .color = 0xffff00, +}; + +struct starpu_codelet cl_trsm_cpu = +{ + .type = STARPU_SEQ, + .cpu_funcs = {chol_cpu_codelet_update_trsm}, + .cpu_funcs_name = {"chol_cpu_codelet_update_trsm"}, + .nbuffers = 2, + .modes = { STARPU_R, STARPU_RW }, + .model = &chol_model_trsm, + .color = 0x8080ff, +}; + +struct starpu_codelet cl_gemm_cpu = +{ + .type = STARPU_SEQ, + .max_parallelism = INT_MAX, + .cpu_funcs = {chol_cpu_codelet_update_gemm}, + .cpu_funcs_name = {"chol_cpu_codelet_update_gemm"}, + .nbuffers = 3, + .modes = { STARPU_R, STARPU_R, STARPU_RW }, + .model = &chol_model_gemm, + .color = 0x00ff00, +}; + +void cholesky_kernel_init(int nb) +{ +#if defined(STARPU_USE_CUDA) && defined(STARPU_HAVE_LIBCUSOLVER) + int Lwork = 0; + if (starpu_cuda_worker_get_count()) + { + cusolverStatus_t sstatus = cusolverDnSpotrf_bufferSize(starpu_cusolverDn_get_local_handle(), CUBLAS_FILL_MODE_LOWER, nb, NULL, nb, &Lwork); + if (sstatus != CUSOLVER_STATUS_SUCCESS) + STARPU_CUSOLVER_REPORT_ERROR(sstatus); + } + starpu_variable_data_register(&scratch, -1, 0, Lwork * sizeof(float)); + starpu_variable_data_register(&devInfo, -1, 0, sizeof(int)); +#endif +} + +void cholesky_kernel_fini(void) +{ +#if defined(STARPU_USE_CUDA) && defined(STARPU_HAVE_LIBCUSOLVER) + starpu_data_unregister(scratch); + starpu_data_unregister(devInfo); +#endif +} diff --git a/examples/cholesky/cholesky_models.c b/examples/cholesky/cholesky_models.c new file mode 100644 index 0000000..cd2c4fa --- /dev/null +++ b/examples/cholesky/cholesky_models.c @@ -0,0 +1,203 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2008-2023 Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2011 Télécom-SudParis + * Copyright (C) 2013 Thibaut Lambert + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * Example of a cost model for BLAS operations. This is really just an + * example! + */ + +/* + * As a convention, in that file, buffers[0] is represented by A, + * buffers[1] is B ... + */ + +/* + * Number of flops of Gemm + */ + +#include +#include +#include "cholesky.h" + +/* #define USE_PERTURBATION 1 */ + +#ifdef USE_PERTURBATION +#define PERTURB(a) ((starpu_drand48()*2.0f*(AMPL) + 1.0f - (AMPL))*(a)) +#else +#define PERTURB(a) (a) +#endif + +double cpu_chol_task_potrf_cost(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl) +{ + (void)arch; + (void)nimpl; + uint32_t n; + + n = starpu_matrix_get_nx(task->handles[0]); + + double cost = (((double)(n)*n*n)/1000.0f*0.894/0.79176); + +#ifdef STARPU_MODEL_DEBUG + FPRINTF(stdout, "cpu_chol_task_potrf_cost n %u cost %e\n", n, cost); +#endif + + return PERTURB(cost); +} + +double cuda_chol_task_potrf_cost(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl) +{ + (void)arch; + (void)nimpl; + uint32_t n; + + n = starpu_matrix_get_nx(task->handles[0]); + + double cost = (((double)(n)*n*n)/50.0f/10.75/5.088633/0.9883); + +#ifdef STARPU_MODEL_DEBUG + FPRINTF(stdout, "cuda_chol_task_potrf_cost n %u cost %e\n", n, cost); +#endif + + return PERTURB(cost); +} + +double cpu_chol_task_trsm_cost(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl) +{ + (void)arch; + (void)nimpl; + uint32_t n; + + n = starpu_matrix_get_nx(task->handles[0]); + + double cost = (((double)(n)*n*n)/7706.674/0.95/0.9965); + +#ifdef STARPU_MODEL_DEBUG + FPRINTF(stdout, "cpu_chol_task_trsm_cost n %u cost %e\n", n, cost); +#endif + + return PERTURB(cost); +} + +double cuda_chol_task_trsm_cost(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl) +{ + (void)arch; + (void)nimpl; + uint32_t n; + + n = starpu_matrix_get_nx(task->handles[0]); + + double cost = (((double)(n)*n*n)/50.0f/10.75/87.29520); + +#ifdef STARPU_MODEL_DEBUG + FPRINTF(stdout, "cuda_chol_task_trsm_cost n %u cost %e\n", n, cost); +#endif + + return PERTURB(cost); +} + +double cpu_chol_task_syrk_cost(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl) +{ + (void)arch; + (void)nimpl; + uint32_t n; + + n = starpu_matrix_get_nx(task->handles[0]); + + double cost = (((double)(n)*n*n)/50.0f/10.75/8.0760)/2; + +#ifdef STARPU_MODEL_DEBUG + FPRINTF(stdout, "cpu_chol_task_syrk_cost n %u cost %e\n", n, cost); +#endif + + return PERTURB(cost); +} + +double cuda_chol_task_syrk_cost(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl) +{ + (void)arch; + (void)nimpl; + uint32_t n; + + n = starpu_matrix_get_nx(task->handles[0]); + + double cost = (((double)(n)*n*n)/50.0f/10.75/76.30666)/2; + +#ifdef STARPU_MODEL_DEBUG + FPRINTF(stdout, "cuda_chol_task_syrk_cost n %u cost %e\n", n, cost); +#endif + + return PERTURB(cost); +} + +double cpu_chol_task_gemm_cost(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl) +{ + (void)arch; + (void)nimpl; + uint32_t n; + + n = starpu_matrix_get_nx(task->handles[0]); + + double cost = (((double)(n)*n*n)/50.0f/10.75/8.0760); + +#ifdef STARPU_MODEL_DEBUG + FPRINTF(stdout, "cpu_chol_task_gemm_cost n %u cost %e\n", n, cost); +#endif + + return PERTURB(cost); +} + +double cuda_chol_task_gemm_cost(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl) +{ + (void)arch; + (void)nimpl; + uint32_t n; + + n = starpu_matrix_get_nx(task->handles[0]); + + double cost = (((double)(n)*n*n)/50.0f/10.75/76.30666); + +#ifdef STARPU_MODEL_DEBUG + FPRINTF(stdout, "cuda_chol_task_gemm_cost n %u cost %e\n", n, cost); +#endif + + return PERTURB(cost); +} + +void initialize_chol_model(struct starpu_perfmodel* model, char * symbol, + double (*cpu_cost_function)(struct starpu_task *, struct starpu_perfmodel_arch*, unsigned), + double (*cuda_cost_function)(struct starpu_task *, struct starpu_perfmodel_arch*, unsigned)) +{ + struct starpu_perfmodel_per_arch *per_arch; + + model->symbol = symbol; + model->type = STARPU_HISTORY_BASED; + + starpu_perfmodel_init(model); + + per_arch = starpu_perfmodel_get_model_per_devices(model, 0, STARPU_CPU_WORKER, 0, 1, -1); + per_arch->cost_function = cpu_cost_function; + // We could also call directly: + // starpu_perfmodel_set_per_devices_cost_function(model, 0, cpu_cost_function, STARPU_CPU_WORKER, 0, 1, -1); + + if(starpu_worker_get_count_by_type(STARPU_CUDA_WORKER) != 0) + { + per_arch = starpu_perfmodel_get_model_per_devices(model, 0, STARPU_CUDA_WORKER, 0, 1, -1); + per_arch->cost_function = cuda_cost_function; + + } +} diff --git a/examples/cholesky/cholesky_tag.c b/examples/cholesky/cholesky_tag.c new file mode 100644 index 0000000..3202c08 --- /dev/null +++ b/examples/cholesky/cholesky_tag.c @@ -0,0 +1,480 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Thibaut Lambert + * Copyright (C) 2010-2010 Mehdi Juhoor + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * This version of the Cholesky factorization uses explicit dependency + * declaration through dependency tags. + * It also uses data partitioning to split the matrix into submatrices + */ + +/* Note: this is using fortran ordering, i.e. column-major ordering, i.e. + * elements with consecutive row number are consecutive in memory */ + +#include "cholesky.h" +#include + +#if defined(STARPU_USE_CUDA) && defined(STARPU_HAVE_MAGMA) +#include "magma.h" +#endif + +#include + +/* + * Some useful functions + */ + +static struct starpu_task *create_task(starpu_tag_t id) +{ + struct starpu_task *task = starpu_task_create(); + task->cl_arg = NULL; + task->use_tag = 1; + task->tag_id = id; + + return task; +} + +/* + * Create the codelets + */ + +static struct starpu_task * create_task_potrf(starpu_data_handle_t dataA, unsigned k) +{ +/* FPRINTF(stdout, "task potrf k = %d TAG = %llx\n", k, (TAG_POTRF(k))); */ + + struct starpu_task *task = create_task(TAG_POTRF(k)); + + task->cl = &cl_potrf; + + /* which sub-data is manipulated ? */ + task->handles[0] = starpu_data_get_sub_data(dataA, 2, k, k); + +#if defined(STARPU_USE_CUDA) && defined(STARPU_HAVE_LIBCUSOLVER) + /* Temporary data to save libcusolver from allocating/deallocating memory */ + task->handles[1] = scratch; + task->handles[2] = devInfo; +#endif + + /* this is an important task */ + if (!noprio_p) + task->priority = STARPU_MAX_PRIO; + + /* enforce dependencies ... */ + if (k > 0) + { + starpu_tag_declare_deps(TAG_POTRF(k), 1, TAG_GEMM(k-1, k, k)); + } + + int n = starpu_matrix_get_nx(task->handles[0]); + task->flops = FLOPS_SPOTRF(n); + + return task; +} + +static int create_task_trsm(starpu_data_handle_t dataA, unsigned k, unsigned m) +{ + int ret; + + struct starpu_task *task = create_task(TAG_TRSM(k, m)); + + task->cl = &cl_trsm; + + /* which sub-data is manipulated ? */ + task->handles[0] = starpu_data_get_sub_data(dataA, 2, k, k); + task->handles[1] = starpu_data_get_sub_data(dataA, 2, m, k); + + if (!noprio_p && (m == k+1)) + { + task->priority = STARPU_MAX_PRIO; + } + + /* enforce dependencies ... */ + if (k > 0) + { + starpu_tag_declare_deps(TAG_TRSM(k, m), 2, TAG_POTRF(k), TAG_GEMM(k-1, m, k)); + } + else + { + starpu_tag_declare_deps(TAG_TRSM(k, m), 1, TAG_POTRF(k)); + } + + int nx = starpu_matrix_get_nx(task->handles[0]); + task->flops = FLOPS_STRSM(nx, nx); + + ret = starpu_task_submit(task); + if (ret != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + return ret; +} + +static int create_task_gemm(starpu_data_handle_t dataA, unsigned k, unsigned m, unsigned n) +{ + int ret; + +/* FPRINTF(stdout, "task gemm k,n,m = %d,%d,%d TAG = %llx\n", k,m,n, TAG_GEMM(k,m,n)); */ + + struct starpu_task *task = create_task(TAG_GEMM(k, m, n)); + + if (m == n) + { + task->cl = &cl_syrk; + + /* which sub-data is manipulated ? */ + task->handles[0] = starpu_data_get_sub_data(dataA, 2, n, k); + task->handles[1] = starpu_data_get_sub_data(dataA, 2, n, n); + int nx = starpu_matrix_get_nx(task->handles[0]); + task->flops = FLOPS_SSYRK(nx, nx); + } + else + { + task->cl = &cl_gemm; + + /* which sub-data is manipulated ? */ + task->handles[0] = starpu_data_get_sub_data(dataA, 2, n, k); + task->handles[1] = starpu_data_get_sub_data(dataA, 2, m, k); + task->handles[2] = starpu_data_get_sub_data(dataA, 2, m, n); + int nx = starpu_matrix_get_nx(task->handles[0]); + task->flops = FLOPS_SGEMM(nx, nx, nx); + } + + if (!noprio_p && (n == k + 1) && (m == k +1)) + { + task->priority = STARPU_MAX_PRIO; + } + + /* enforce dependencies ... */ + if (k > 0) + { + starpu_tag_declare_deps(TAG_GEMM(k, m, n), 3, TAG_GEMM(k-1, m, n), TAG_TRSM(k, n), TAG_TRSM(k, m)); + } + else + { + starpu_tag_declare_deps(TAG_GEMM(k, m, n), 2, TAG_TRSM(k, n), TAG_TRSM(k, m)); + } + + ret = starpu_task_submit(task); + if (ret != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + return ret; +} + +/* + * code to bootstrap the factorization + * and construct the DAG + */ + +static int _cholesky(starpu_data_handle_t dataA, unsigned nblocks) +{ + int ret; + + double start; + double end; + + struct starpu_task *entry_task = NULL; + + /* create all the DAG nodes */ + unsigned k, m, n; + + start = starpu_timing_now(); + + for (k = 0; k < nblocks; k++) + { + starpu_iteration_push(k); + struct starpu_task *task = create_task_potrf(dataA, k); + /* we defer the launch of the first task */ + if (k == 0) + { + entry_task = task; + } + else + { + ret = starpu_task_submit(task); + if (ret == -ENODEV) + { + starpu_data_unpartition(dataA, STARPU_MAIN_RAM); + return 77; + } + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + for (m = k+1; m m) + { + mat[m+n*size_p] = 0.0f; /* debug */ + } + } + } + float *test_mat = malloc(size_p*size_p*sizeof(float)); + STARPU_ASSERT(test_mat); + + STARPU_SSYRK("L", "N", size_p, size_p, 1.0f, + mat, size_p, 0.0f, test_mat, size_p); + + FPRINTF(stderr, "comparing results ...\n"); +#ifdef PRINT_OUTPUT + for (m = 0; m < size_p; m++) + { + for (n = 0; n < size_p; n++) + { + if (n <= m) + { + FPRINTF(stdout, "%2.2f\t", test_mat[m +n*size_p]); + } + else + { + FPRINTF(stdout, ".\t"); + } + } + FPRINTF(stdout, "\n"); + } +#endif + + for (m = 0; m < size_p; m++) + { + for (n = 0; n < size_p; n++) + { + if (n <= m) + { + float orig = (1.0f/(1.0f+m+n)) + ((m == n)?1.0f*size_p:0.0f); + float err = fabsf(test_mat[m +n*size_p] - orig) / orig; + if (err > 0.0001) + { + FPRINTF(stderr, "Error[%llu, %llu] --> %2.6f != %2.6f (err %2.6f)\n", m, n, test_mat[m +n*size_p], orig, err); + assert(0); + } + } + } + } + free(test_mat); + } +#endif + + shutdown_system(&mat, size_p, pinned_p); + return ret; +} diff --git a/examples/cholesky/cholesky_tile_tag.c b/examples/cholesky/cholesky_tile_tag.c new file mode 100644 index 0000000..e96e177 --- /dev/null +++ b/examples/cholesky/cholesky_tile_tag.c @@ -0,0 +1,348 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Thibaut Lambert + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * This version of the Cholesky factorization uses explicit dependency + * declaration through dependency tags. + * It also directly registers matrix tiles instead of using partitioning. + */ + +/* Note: this is using fortran ordering, i.e. column-major ordering, i.e. + * elements with consecutive row number are consecutive in memory */ + +#include "cholesky.h" + +#if defined(STARPU_USE_CUDA) && defined(STARPU_HAVE_MAGMA) +#include "magma.h" +#endif + +#include "starpu_cusolver.h" + +/* A [ m ] [ n ] */ +float *A[NMAXBLOCKS][NMAXBLOCKS]; +starpu_data_handle_t A_state[NMAXBLOCKS][NMAXBLOCKS]; + +/* + * Some useful functions + */ + +static struct starpu_task *create_task(starpu_tag_t id) +{ + struct starpu_task *task = starpu_task_create(); + task->cl_arg = NULL; + task->use_tag = 1; + task->tag_id = id; + + return task; +} + +/* + * Create the codelets + */ + +static struct starpu_task * create_task_potrf(unsigned k, unsigned nblocks) +{ + (void)nblocks; + /* FPRINTF(stdout, "task potrf k = %d TAG = %llx\n", k, (TAG_POTRF(k))); */ + + struct starpu_task *task = create_task(TAG_POTRF(k)); + + task->cl = &cl_potrf; + + /* which sub-data is manipulated ? */ + task->handles[0] = A_state[k][k]; + +#if defined(STARPU_USE_CUDA) && defined(STARPU_HAVE_LIBCUSOLVER) + /* Temporary data to save libcusolver from allocating/deallocating memory */ + task->handles[1] = scratch; + task->handles[2] = devInfo; +#endif + + /* this is an important task */ + task->priority = STARPU_MAX_PRIO; + + /* enforce dependencies ... */ + if (k > 0) + { + starpu_tag_declare_deps(TAG_POTRF(k), 1, TAG_GEMM(k-1, k, k)); + } + + int n = starpu_matrix_get_nx(task->handles[0]); + task->flops = FLOPS_SPOTRF(n); + + return task; +} + +static int create_task_trsm(unsigned k, unsigned m) +{ + int ret; + + struct starpu_task *task = create_task(TAG_TRSM(m, k)); + + task->cl = &cl_trsm; + + /* which sub-data is manipulated ? */ + task->handles[0] = A_state[k][k]; + task->handles[1] = A_state[m][k]; + + if (m == k+1) + { + task->priority = STARPU_MAX_PRIO; + } + + /* enforce dependencies ... */ + if (k > 0) + { + starpu_tag_declare_deps(TAG_TRSM(m, k), 2, TAG_POTRF(k), TAG_GEMM(k-1, m, k)); + } + else + { + starpu_tag_declare_deps(TAG_TRSM(m, k), 1, TAG_POTRF(k)); + } + + int n = starpu_matrix_get_nx(task->handles[0]); + task->flops = FLOPS_STRSM(n, n); + + ret = starpu_task_submit(task); + if (ret != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + return ret; +} + +static int create_task_gemm(unsigned k, unsigned m, unsigned n) +{ + int ret; + +/* FPRINTF(stdout, "task gemm k,n,m = %d,%d,%d TAG = %llx\n", k,m,n, TAG_GEMM(k,m,n)); */ + + struct starpu_task *task = create_task(TAG_GEMM(k, m, n)); + + if (m == n) + { + task->cl = &cl_syrk; + + /* which sub-data is manipulated ? */ + task->handles[0] = A_state[n][k]; + task->handles[1] = A_state[n][n]; + int nx = starpu_matrix_get_nx(task->handles[0]); + task->flops = FLOPS_SSYRK(nx, nx); + } + else + { + task->cl = &cl_gemm; + + /* which sub-data is manipulated ? */ + task->handles[0] = A_state[n][k]; + task->handles[1] = A_state[m][k]; + task->handles[2] = A_state[m][n]; + int nx = starpu_matrix_get_nx(task->handles[0]); + task->flops = FLOPS_SGEMM(nx, nx, nx); + } + + if (!noprio_p && (n == k + 1) && (m == k +1)) + { + task->priority = STARPU_MAX_PRIO; + } + + /* enforce dependencies ... */ + if (k > 0) + { + starpu_tag_declare_deps(TAG_GEMM(k, m, n), 3, TAG_GEMM(k-1, m, n), TAG_TRSM(n, k), TAG_TRSM(m, k)); + } + else + { + starpu_tag_declare_deps(TAG_GEMM(k, m, n), 2, TAG_TRSM(n, k), TAG_TRSM(m, k)); + } + + ret = starpu_task_submit(task); + if (ret != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + return ret; +} + +/* + * code to bootstrap the factorization + * and construct the DAG + */ + +static int cholesky_no_stride(void) +{ + int ret; + + double start; + double end; + + struct starpu_task *entry_task = NULL; + + /* create all the DAG nodes */ + unsigned k, m, n; + + for (k = 0; k < nblocks_p; k++) + { + starpu_iteration_push(k); + struct starpu_task *task = create_task_potrf(k, nblocks_p); + /* we defer the launch of the first task */ + if (k == 0) + { + entry_task = task; + } + else + { + ret = starpu_task_submit(task); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + for (m = k+1; m +#include +#include +#include + +#include +#include /* for fpclassify() checks on knob values */ + +#ifndef DBL_MIN +#define DBL_MIN __DBL_MIN__ +#endif + +#ifndef DBL_MAX +#define DBL_MAX __DBL_MAX__ +#endif + +struct _starpu_dmda_data +{ + double alpha; + double beta; + double _gamma; + double idle_power; + + starpu_st_fifo_taskq_t queue_array[STARPU_NMAXWORKERS]; +}; + +/* The dmda scheduling policy uses + * + * alpha * T_computation + beta * T_communication + gamma * Consumption + * + * Here are the default values of alpha, beta, gamma + */ + +#define _STARPU_SCHED_ALPHA_DEFAULT 1.0 +#define _STARPU_SCHED_BETA_DEFAULT 1.0 +#define _STARPU_SCHED_GAMMA_DEFAULT 1000.0 + +static void initialize_dmda_policy(unsigned sched_ctx_id) +{ + fprintf(stderr, "HELLO FROM MY_DM\n"); + + struct _starpu_dmda_data *dt; + dt = calloc(1, sizeof(struct _starpu_dmda_data)); + assert(dt); + + starpu_sched_ctx_set_policy_data(sched_ctx_id, (void*)dt); + + dt->alpha = starpu_getenv_float_default("STARPU_SCHED_ALPHA", _STARPU_SCHED_ALPHA_DEFAULT); + dt->beta = starpu_getenv_float_default("STARPU_SCHED_BETA", _STARPU_SCHED_BETA_DEFAULT); + /* data->_gamma: cost of one Joule in us. If gamma is set to 10^6, then one Joule cost 1s */ + dt->_gamma = starpu_getenv_float_default("STARPU_SCHED_GAMMA", _STARPU_SCHED_GAMMA_DEFAULT); + /* data->idle_power: Idle power of the whole machine in Watt */ + dt->idle_power = starpu_getenv_float_default("STARPU_IDLE_POWER", 0.0); +} + +static void deinitialize_dmda_policy(unsigned sched_ctx_id) +{ + struct _starpu_dmda_data *dt = (struct _starpu_dmda_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); + free(dt); +} + +static void dmda_add_workers(unsigned sched_ctx_id, int *workerids, unsigned nworkers) +{ + struct _starpu_dmda_data *dt = (struct _starpu_dmda_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); + + unsigned i; + for (i = 0; i < nworkers; i++) + { + int workerid = workerids[i]; + /* if the worker has already belonged to this context + the queue and the synchronization variables have been already initialized */ + dt->queue_array[workerid] = starpu_st_fifo_taskq_create(); + } +} + +static void dmda_remove_workers(unsigned sched_ctx_id, int *workerids, unsigned nworkers) +{ + struct _starpu_dmda_data *dt = (struct _starpu_dmda_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); + + unsigned i; + for (i = 0; i < nworkers; i++) + { + int workerid = workerids[i]; + starpu_st_fifo_taskq_destroy(dt->queue_array[workerid]); + dt->queue_array[workerid] = NULL; + } +} + +static int dm_push_task(struct starpu_task *task) +{ + /* Julia version should look like this: + * + * best_worker = -1 + * best_implem = -1 + * best_EFT = 0 + * for worker in workers: + * for implem in implems: + * if !worker_can_execute_task_impl(worker, task, implem) + * continue + * end + * EFT = EFT(task, worker, implem) + * if best_worker == -1 || EFT < best_EFT + * best_worker = worker + * best_implem = implem + * best_EFT = EFT + * end + * end + * end + * push!(data.queue[worker], task, impl) + */ + + unsigned sched_ctx_id = task->sched_ctx; + struct _starpu_dmda_data *dt = (struct _starpu_dmda_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); + + int best = -1; + double best_exp_end_of_task = 0.0; + unsigned best_impl = 0; + double predicted = 0.0; + double predicted_transfer = 0.0; + + struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id); + struct starpu_sched_ctx_iterator it; + + double now = starpu_timing_now(); + + // Find couple (worker, implem) that minimizes EFT(task, worker, implem) + workers->init_iterator_for_parallel_tasks(workers, &it, task); + while(workers->has_next(workers, &it)) + { + unsigned nimpl; + unsigned impl_mask; + unsigned worker = workers->get_next(workers, &it); + starpu_st_fifo_taskq_t fifo = dt->queue_array[worker]; + double exp_start = starpu_st_fifo_exp_start_get(fifo); + double pipeline_len = starpu_st_fifo_pipeline_len_get(fifo); + double exp_len = starpu_st_fifo_exp_len_get(fifo); + + /* Sometimes workers didn't take the tasks as early as we expected */ + double new_exp_start = isnan(exp_start) ? now + pipeline_len : STARPU_MAX(exp_start, now); + + if (!starpu_worker_can_execute_task_impl(worker, task, &impl_mask)) + continue; + + for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++) + { + if (!(impl_mask & (1U << nimpl))) + { + /* no one on that queue may execute this task */ + continue; + } + + // todo: handle case where no calibration or no model + double local_length = starpu_task_worker_expected_length(task, worker, sched_ctx_id, nimpl); + double local_penalty = starpu_task_expected_data_transfer_time_for(task, worker); + double exp_end = new_exp_start + exp_len + local_length; + + if (best == -1 || exp_end < best_exp_end_of_task) + { + /* a better solution was found */ + best_exp_end_of_task = exp_end; + best = worker; + best_impl = nimpl; + predicted = local_length; + predicted_transfer = local_penalty; + } + } + } + STARPU_ASSERT(best >= 0); + + // Set task implem. + starpu_task_set_implementation(task, best_impl); + + // Update expected start of the next task in the queue and expected end of the last task in the queue + // This code should be generated automatically. + starpu_st_fifo_taskq_t fifo = dt->queue_array[best]; + double exp_start = starpu_st_fifo_exp_start_get(fifo); + double pipeline_len = starpu_st_fifo_pipeline_len_get(fifo); + double exp_len = starpu_st_fifo_exp_len_get(fifo); + now = starpu_timing_now(); + starpu_worker_lock(best); + double new_exp_start = isnan(exp_start) ? now + pipeline_len : STARPU_MAX(exp_start, now); + starpu_st_fifo_exp_start_set(fifo, new_exp_start); + double new_exp_end = new_exp_start + exp_len; + starpu_st_fifo_exp_end_set(fifo, new_exp_end); + if ((now + predicted_transfer) < new_exp_end) + { + /* We may hope that the transfer will be finished by + * the start of the task. */ + predicted_transfer = 0.0; + } + else + { + /* The transfer will not be finished by then, take the + * remainder into account */ + predicted_transfer = (now + predicted_transfer) - new_exp_end; + } + double new_exp_len = exp_len; + if(!isnan(predicted_transfer)) + new_exp_len += predicted_transfer; + if(!isnan(predicted)) + new_exp_len += predicted; + starpu_st_fifo_exp_len_set(fifo, new_exp_len); + starpu_st_fifo_exp_end_set(fifo, new_exp_start + new_exp_len); + starpu_worker_unlock(best); + + // Not sure what's the purpose of this. + task->predicted = predicted; + task->predicted_transfer = predicted_transfer; + + // Prefetch + if (starpu_get_prefetch_flag()) + starpu_prefetch_task_input_for(task, best); + + // Push task to worker queue + starpu_worker_lock(best); + starpu_st_fifo_taskq_push_back_task(fifo, task); + starpu_st_fifo_ntasks_inc(fifo, 1); + starpu_st_fifo_nprocessed_inc(fifo, 1); +#if !defined(STARPU_NON_BLOCKING_DRIVERS) || defined(STARPU_SIMGRID) + starpu_wake_worker_locked(best); +#endif + starpu_push_task_end(task); + starpu_worker_unlock(best); + starpu_sched_ctx_list_task_counters_increment(sched_ctx_id, best); + + return 0; +} + +static struct starpu_task *dmda_pop_task(unsigned sched_ctx_id) +{ + /* Julia version should look like this: + * + * return pop!(data.queue[worker]) + */ + struct _starpu_dmda_data *dt = (struct _starpu_dmda_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); + + struct starpu_task *task; + + unsigned workerid = starpu_worker_get_id_check(); + + starpu_st_fifo_taskq_t fifo = dt->queue_array[workerid]; + + /* Take the opportunity to update start time */ + double new_exp_start = STARPU_MAX(starpu_timing_now(), starpu_st_fifo_exp_start_get(fifo)); + double new_exp_end = new_exp_start + starpu_st_fifo_exp_end_get(fifo); + starpu_st_fifo_exp_start_set(fifo, new_exp_start); + starpu_st_fifo_exp_end_set(fifo, new_exp_end); + + task = starpu_st_fifo_taskq_pop_local_task(fifo); + if (task) + { + double transfer_model = task->predicted_transfer; + if (!isnan(transfer_model)) + { + /* We now start the transfer, move it from predicted to pipelined */ + double new_exp_len = starpu_st_fifo_exp_len_get(fifo); + new_exp_len -= transfer_model; + double new_pipeline_len = starpu_st_fifo_pipeline_len_get(fifo); + new_pipeline_len += transfer_model; + + starpu_st_fifo_exp_len_set(fifo, new_exp_len); + starpu_st_fifo_pipeline_len_set(fifo, new_pipeline_len); + + new_exp_start = starpu_timing_now() + new_pipeline_len; + new_exp_end = new_exp_start + new_exp_len; + starpu_st_fifo_exp_start_set(fifo, new_exp_start); + starpu_st_fifo_exp_end_set(fifo, new_exp_end); + } + starpu_sched_ctx_list_task_counters_decrement(sched_ctx_id, workerid); + } + + return task; +} + +// This code should be generated automatically. +static void dmda_pre_exec_hook(struct starpu_task *task, unsigned sched_ctx_id) +{ + unsigned workerid = starpu_worker_get_id_check(); + struct _starpu_dmda_data *dt = (struct _starpu_dmda_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); + starpu_st_fifo_taskq_t fifo = dt->queue_array[workerid]; + const double now = starpu_timing_now(); + + /* Once the task is executing, we can update the predicted amount + * of work. */ + starpu_worker_lock_self(); + + double model = task->predicted; + double transfer_model = task->predicted_transfer; + if(!isnan(transfer_model)) + { + /* The transfer is over, remove it from pipelined */ + starpu_st_fifo_pipeline_len_inc(fifo, -transfer_model); + } + + if(!isnan(model)) + { + /* We now start the computation, move it from predicted to pipelined */ + starpu_st_fifo_exp_len_inc(fifo, -model); + starpu_st_fifo_pipeline_len_inc(fifo, model); + starpu_st_fifo_exp_start_set(fifo, starpu_timing_now() + starpu_st_fifo_pipeline_len_get(fifo)); + starpu_st_fifo_exp_end_set(fifo, starpu_st_fifo_exp_start_get(fifo) + starpu_st_fifo_exp_len_get(fifo)); + } + + /* Take the opportunity to update start time */ + starpu_st_fifo_exp_start_set(fifo, STARPU_MAX(now + starpu_st_fifo_pipeline_len_get(fifo), starpu_st_fifo_exp_start_get(fifo))); + starpu_st_fifo_exp_end_set(fifo, starpu_st_fifo_exp_start_get(fifo) + starpu_st_fifo_exp_len_get(fifo)); + + starpu_worker_unlock_self(); +} + +// This code should be generated automatically. +static void dmda_post_exec_hook(struct starpu_task * task, unsigned sched_ctx_id) +{ + struct _starpu_dmda_data *dt = (struct _starpu_dmda_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); + unsigned workerid = starpu_worker_get_id_check(); + starpu_st_fifo_taskq_t fifo = dt->queue_array[workerid]; + starpu_worker_lock_self(); + if(!isnan(task->predicted)) + /* The execution is over, remove it from pipelined */ + starpu_st_fifo_pipeline_len_inc(fifo, -task->predicted); + starpu_st_fifo_exp_start_set(fifo, STARPU_MAX(starpu_timing_now() + starpu_st_fifo_pipeline_len_get(fifo), starpu_st_fifo_exp_start_get(fifo))); + starpu_st_fifo_exp_end_set(fifo, starpu_st_fifo_exp_start_get(fifo) + starpu_st_fifo_exp_len_get(fifo)); + starpu_worker_unlock_self(); +} + +struct starpu_sched_policy my_dm_policy = +{ + .init_sched = initialize_dmda_policy, + .deinit_sched = deinitialize_dmda_policy, + .add_workers = dmda_add_workers, + .remove_workers = dmda_remove_workers, + .push_task = dm_push_task, + .simulate_push_task = NULL, + .pop_task = dmda_pop_task, + .pre_exec_hook = dmda_pre_exec_hook, + .post_exec_hook = dmda_post_exec_hook, + .policy_name = "mydm", + .policy_description = "performance model", + .worker_type = STARPU_WORKER_LIST, + .prefetches = 1, +}; + +struct starpu_sched_policy *predefined_policies[] = +{ + &my_dm_policy +}; + +struct starpu_sched_policy *starpu_get_sched_lib_policy(const char *name) +{ + if (!strcmp(name, "mydm")) + return &my_dm_policy; + return NULL; +} + +struct starpu_sched_policy **starpu_get_sched_lib_policies(void) +{ + return predefined_policies; +} diff --git a/examples/cholesky/libmy_dmda.h b/examples/cholesky/libmy_dmda.h new file mode 100644 index 0000000..ec84ba2 --- /dev/null +++ b/examples/cholesky/libmy_dmda.h @@ -0,0 +1,24 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __STARPU_MY_DMDA_H__ +#define __STARPU_MY_DMDA_H__ + +#include + +extern struct starpu_sched_policy my_dm_policy; + +#endif /* __STARPU_MY_DMDA_H__ */ diff --git a/examples/common/blas.c b/examples/common/blas.c new file mode 100644 index 0000000..f883d1a --- /dev/null +++ b/examples/common/blas.c @@ -0,0 +1,520 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include + +#include +#include "blas.h" + +/* + * This files contains BLAS wrappers for the different BLAS implementations + * (eg. REFBLAS, ATLAS, GOTOBLAS ...). We assume a Fortran orientation as most + * libraries do not supply C-based ordering. + */ + +#ifdef STARPU_ATLAS + +inline void STARPU_SGEMM(char *transa, char *transb, int M, int N, int K, + float alpha, const float *A, int lda, const float *B, int ldb, + float beta, float *C, int ldc) +{ + enum CBLAS_TRANSPOSE ta = (toupper(transa[0]) == 'N')?CblasNoTrans:CblasTrans; + enum CBLAS_TRANSPOSE tb = (toupper(transb[0]) == 'N')?CblasNoTrans:CblasTrans; + + cblas_sgemm(CblasColMajor, ta, tb, + M, N, K, alpha, A, lda, B, ldb, beta, C, ldc); +} + +inline void STARPU_DGEMM(char *transa, char *transb, int M, int N, int K, + double alpha, double *A, int lda, double *B, int ldb, + double beta, double *C, int ldc) +{ + enum CBLAS_TRANSPOSE ta = (toupper(transa[0]) == 'N')?CblasNoTrans:CblasTrans; + enum CBLAS_TRANSPOSE tb = (toupper(transb[0]) == 'N')?CblasNoTrans:CblasTrans; + + cblas_dgemm(CblasColMajor, ta, tb, + M, N, K, alpha, A, lda, B, ldb, beta, C, ldc); +} + +inline void STARPU_SGEMV(char *transa, int M, int N, float alpha, float *A, int lda, float *X, int incX, float beta, float *Y, int incY) +{ + enum CBLAS_TRANSPOSE ta = (toupper(transa[0]) == 'N')?CblasNoTrans:CblasTrans; + + cblas_sgemv(CblasColMajor, ta, M, N, alpha, A, lda, + X, incX, beta, Y, incY); +} + +inline void STARPU_DGEMV(char *transa, int M, int N, double alpha, double *A, int lda, double *X, int incX, double beta, double *Y, int incY) +{ + enum CBLAS_TRANSPOSE ta = (toupper(transa[0]) == 'N')?CblasNoTrans:CblasTrans; + + cblas_dgemv(CblasColMajor, ta, M, N, alpha, A, lda, + X, incX, beta, Y, incY); +} + +inline float STARPU_SASUM(int N, float *X, int incX) +{ + return cblas_sasum(N, X, incX); +} + +inline double STARPU_DASUM(int N, double *X, int incX) +{ + return cblas_dasum(N, X, incX); +} + +void STARPU_SSCAL(int N, float alpha, float *X, int incX) +{ + cblas_sscal(N, alpha, X, incX); +} + +void STARPU_DSCAL(int N, double alpha, double *X, int incX) +{ + cblas_dscal(N, alpha, X, incX); +} + +void STARPU_STRSM (const char *side, const char *uplo, const char *transa, + const char *diag, const int m, const int n, + const float alpha, const float *A, const int lda, + float *B, const int ldb) +{ + enum CBLAS_SIDE side_ = (toupper(side[0]) == 'L')?CblasLeft:CblasRight; + enum CBLAS_UPLO uplo_ = (toupper(uplo[0]) == 'U')?CblasUpper:CblasLower; + enum CBLAS_TRANSPOSE transa_ = (toupper(transa[0]) == 'N')?CblasNoTrans:CblasTrans; + enum CBLAS_DIAG diag_ = (toupper(diag[0]) == 'N')?CblasNonUnit:CblasUnit; + + cblas_strsm(CblasColMajor, side_, uplo_, transa_, diag_, m, n, alpha, A, lda, B, ldb); +} + +void STARPU_DTRSM (const char *side, const char *uplo, const char *transa, + const char *diag, const int m, const int n, + const double alpha, const double *A, const int lda, + double *B, const int ldb) +{ + enum CBLAS_SIDE side_ = (toupper(side[0]) == 'L')?CblasLeft:CblasRight; + enum CBLAS_UPLO uplo_ = (toupper(uplo[0]) == 'U')?CblasUpper:CblasLower; + enum CBLAS_TRANSPOSE transa_ = (toupper(transa[0]) == 'N')?CblasNoTrans:CblasTrans; + enum CBLAS_DIAG diag_ = (toupper(diag[0]) == 'N')?CblasNonUnit:CblasUnit; + + cblas_dtrsm(CblasColMajor, side_, uplo_, transa_, diag_, m, n, alpha, A, lda, B, ldb); +} + +void STARPU_SSYR (const char *uplo, const int n, const float alpha, + const float *x, const int incx, float *A, const int lda) +{ + enum CBLAS_UPLO uplo_ = (toupper(uplo[0]) == 'U')?CblasUpper:CblasLower; + + cblas_ssyr(CblasColMajor, uplo_, n, alpha, x, incx, A, lda); +} + +void STARPU_SSYRK (const char *uplo, const char *trans, const int n, + const int k, const float alpha, const float *A, + const int lda, const float beta, float *C, + const int ldc) +{ + enum CBLAS_UPLO uplo_ = (toupper(uplo[0]) == 'U')?CblasUpper:CblasLower; + enum CBLAS_TRANSPOSE trans_ = (toupper(trans[0]) == 'N')?CblasNoTrans:CblasTrans; + + cblas_ssyrk(CblasColMajor, uplo_, trans_, n, k, alpha, A, lda, beta, C, ldc); +} + +void STARPU_SGER(const int m, const int n, const float alpha, + const float *x, const int incx, const float *y, + const int incy, float *A, const int lda) +{ + cblas_sger(CblasColMajor, m, n, alpha, x, incx, y, incy, A, lda); +} + +void STARPU_DGER(const int m, const int n, const double alpha, + const double *x, const int incx, const double *y, + const int incy, double *A, const int lda) +{ + cblas_dger(CblasColMajor, m, n, alpha, x, incx, y, incy, A, lda); +} + +void STARPU_STRSV (const char *uplo, const char *trans, const char *diag, + const int n, const float *A, const int lda, float *x, + const int incx) +{ + enum CBLAS_UPLO uplo_ = (toupper(uplo[0]) == 'U')?CblasUpper:CblasLower; + enum CBLAS_TRANSPOSE trans_ = (toupper(trans[0]) == 'N')?CblasNoTrans:CblasTrans; + enum CBLAS_DIAG diag_ = (toupper(diag[0]) == 'N')?CblasNonUnit:CblasUnit; + + cblas_strsv(CblasColMajor, uplo_, trans_, diag_, n, A, lda, x, incx); +} + +void STARPU_STRMM(const char *side, const char *uplo, const char *transA, + const char *diag, const int m, const int n, + const float alpha, const float *A, const int lda, + float *B, const int ldb) +{ + enum CBLAS_SIDE side_ = (toupper(side[0]) == 'L')?CblasLeft:CblasRight; + enum CBLAS_UPLO uplo_ = (toupper(uplo[0]) == 'U')?CblasUpper:CblasLower; + enum CBLAS_TRANSPOSE transA_ = (toupper(transA[0]) == 'N')?CblasNoTrans:CblasTrans; + enum CBLAS_DIAG diag_ = (toupper(diag[0]) == 'N')?CblasNonUnit:CblasUnit; + + cblas_strmm(CblasColMajor, side_, uplo_, transA_, diag_, m, n, alpha, A, lda, B, ldb); +} + +void STARPU_DTRMM(const char *side, const char *uplo, const char *transA, + const char *diag, const int m, const int n, + const double alpha, const double *A, const int lda, + double *B, const int ldb) +{ + enum CBLAS_SIDE side_ = (toupper(side[0]) == 'L')?CblasLeft:CblasRight; + enum CBLAS_UPLO uplo_ = (toupper(uplo[0]) == 'U')?CblasUpper:CblasLower; + enum CBLAS_TRANSPOSE transA_ = (toupper(transA[0]) == 'N')?CblasNoTrans:CblasTrans; + enum CBLAS_DIAG diag_ = (toupper(diag[0]) == 'N')?CblasNonUnit:CblasUnit; + + cblas_dtrmm(CblasColMajor, side_, uplo_, transA_, diag_, m, n, alpha, A, lda, B, ldb); +} + +void STARPU_STRMV(const char *uplo, const char *transA, const char *diag, + const int n, const float *A, const int lda, float *X, + const int incX) +{ + enum CBLAS_UPLO uplo_ = (toupper(uplo[0]) == 'U')?CblasUpper:CblasLower; + enum CBLAS_TRANSPOSE transA_ = (toupper(transA[0]) == 'N')?CblasNoTrans:CblasTrans; + enum CBLAS_DIAG diag_ = (toupper(diag[0]) == 'N')?CblasNonUnit:CblasUnit; + + cblas_strmv(CblasColMajor, uplo_, transA_, diag_, n, A, lda, X, incX); +} + +void STARPU_SAXPY(const int n, const float alpha, float *X, const int incX, float *Y, const int incY) +{ + cblas_saxpy(n, alpha, X, incX, Y, incY); +} + +void STARPU_DAXPY(const int n, const double alpha, double *X, const int incX, double *Y, const int incY) +{ + cblas_daxpy(n, alpha, X, incX, Y, incY); +} + +int STARPU_ISAMAX (const int n, float *X, const int incX) +{ + int retVal; + retVal = cblas_isamax(n, X, incX); + return retVal; +} + +int STARPU_IDAMAX (const int n, double *X, const int incX) +{ + int retVal; + retVal = cblas_idamax(n, X, incX); + return retVal; +} + +float STARPU_SDOT(const int n, const float *x, const int incx, const float *y, const int incy) +{ + return cblas_sdot(n, x, incx, y, incy); +} + +double STARPU_DDOT(const int n, const double *x, const int incx, const double *y, const int incy) +{ + return cblas_ddot(n, x, incx, y, incy); +} + +void STARPU_SSWAP(const int n, float *x, const int incx, float *y, const int incy) +{ + cblas_sswap(n, x, incx, y, incy); +} + +void STARPU_DSWAP(const int n, double *x, const int incx, double *y, const int incy) +{ + cblas_dswap(n, x, incx, y, incy); +} + +#elif defined(STARPU_GOTO) || defined(STARPU_OPENBLAS) || defined(STARPU_SYSTEM_BLAS) || defined(STARPU_MKL) || defined(STARPU_ARMPL) + +inline void STARPU_SGEMM(char *transa, char *transb, int M, int N, int K, + float alpha, const float *A, int lda, const float *B, int ldb, + float beta, float *C, int ldc) +{ + sgemm_(transa, transb, &M, &N, &K, &alpha, + A, &lda, B, &ldb, + &beta, C, &ldc); +} + +inline void STARPU_DGEMM(char *transa, char *transb, int M, int N, int K, + double alpha, double *A, int lda, double *B, int ldb, + double beta, double *C, int ldc) +{ + dgemm_(transa, transb, &M, &N, &K, &alpha, + A, &lda, B, &ldb, + &beta, C, &ldc); +} + + +inline void STARPU_SGEMV(char *transa, int M, int N, float alpha, float *A, int lda, + float *X, int incX, float beta, float *Y, int incY) +{ + sgemv_(transa, &M, &N, &alpha, A, &lda, X, &incX, &beta, Y, &incY); +} + +inline void STARPU_DGEMV(char *transa, int M, int N, double alpha, double *A, int lda, + double *X, int incX, double beta, double *Y, int incY) +{ + dgemv_(transa, &M, &N, &alpha, A, &lda, X, &incX, &beta, Y, &incY); +} + +inline float STARPU_SASUM(int N, float *X, int incX) +{ + return sasum_(&N, X, &incX); +} + +inline double STARPU_DASUM(int N, double *X, int incX) +{ + return dasum_(&N, X, &incX); +} + +void STARPU_SSCAL(int N, float alpha, float *X, int incX) +{ + sscal_(&N, &alpha, X, &incX); +} + +void STARPU_DSCAL(int N, double alpha, double *X, int incX) +{ + dscal_(&N, &alpha, X, &incX); +} + +void STARPU_STRSM (const char *side, const char *uplo, const char *transa, + const char *diag, const int m, const int n, + const float alpha, const float *A, const int lda, + float *B, const int ldb) +{ + strsm_(side, uplo, transa, diag, &m, &n, &alpha, A, &lda, B, &ldb); +} + +void STARPU_DTRSM (const char *side, const char *uplo, const char *transa, + const char *diag, const int m, const int n, + const double alpha, const double *A, const int lda, + double *B, const int ldb) +{ + dtrsm_(side, uplo, transa, diag, &m, &n, &alpha, A, &lda, B, &ldb); +} + +void STARPU_SSYR (const char *uplo, const int n, const float alpha, + const float *x, const int incx, float *A, const int lda) +{ + ssyr_(uplo, &n, &alpha, x, &incx, A, &lda); +} + +void STARPU_SSYRK (const char *uplo, const char *trans, const int n, + const int k, const float alpha, const float *A, + const int lda, const float beta, float *C, + const int ldc) +{ + ssyrk_(uplo, trans, &n, &k, &alpha, A, &lda, &beta, C, &ldc); +} + +void STARPU_SGER(const int m, const int n, const float alpha, + const float *x, const int incx, const float *y, + const int incy, float *A, const int lda) +{ + sger_(&m, &n, &alpha, x, &incx, y, &incy, A, &lda); +} + +void STARPU_DGER(const int m, const int n, const double alpha, + const double *x, const int incx, const double *y, + const int incy, double *A, const int lda) +{ + dger_(&m, &n, &alpha, x, &incx, y, &incy, A, &lda); +} + +void STARPU_STRSV (const char *uplo, const char *trans, const char *diag, + const int n, const float *A, const int lda, float *x, + const int incx) +{ + strsv_(uplo, trans, diag, &n, A, &lda, x, &incx); +} + +void STARPU_STRMM(const char *side, const char *uplo, const char *transA, + const char *diag, const int m, const int n, + const float alpha, const float *A, const int lda, + float *B, const int ldb) +{ + strmm_(side, uplo, transA, diag, &m, &n, &alpha, A, &lda, B, &ldb); +} + +void STARPU_DTRMM(const char *side, const char *uplo, const char *transA, + const char *diag, const int m, const int n, + const double alpha, const double *A, const int lda, + double *B, const int ldb) +{ + dtrmm_(side, uplo, transA, diag, &m, &n, &alpha, A, &lda, B, &ldb); +} + +void STARPU_STRMV(const char *uplo, const char *transA, const char *diag, + const int n, const float *A, const int lda, float *X, + const int incX) +{ + strmv_(uplo, transA, diag, &n, A, &lda, X, &incX); +} + +void STARPU_SAXPY(const int n, const float alpha, float *X, const int incX, float *Y, const int incY) +{ + saxpy_(&n, &alpha, X, &incX, Y, &incY); +} + +void STARPU_DAXPY(const int n, const double alpha, double *X, const int incX, double *Y, const int incY) +{ + daxpy_(&n, &alpha, X, &incX, Y, &incY); +} + +int STARPU_ISAMAX (const int n, float *X, const int incX) +{ + int retVal; + retVal = isamax_ (&n, X, &incX); + return retVal; +} + +int STARPU_IDAMAX (const int n, double *X, const int incX) +{ + int retVal; + retVal = idamax_ (&n, X, &incX); + return retVal; +} + +float STARPU_SDOT(const int n, const float *x, const int incx, const float *y, const int incy) +{ + float retVal = 0; + + /* GOTOBLAS will return a FLOATRET which is a double, not a float */ + retVal = (float)sdot_(&n, x, &incx, y, &incy); + + return retVal; +} + +double STARPU_DDOT(const int n, const double *x, const int incx, const double *y, const int incy) +{ + return ddot_(&n, x, &incx, y, &incy); +} + +void STARPU_SSWAP(const int n, float *X, const int incX, float *Y, const int incY) +{ + sswap_(&n, X, &incX, Y, &incY); +} + +void STARPU_DSWAP(const int n, double *X, const int incX, double *Y, const int incY) +{ + dswap_(&n, X, &incX, Y, &incY); +} + +#if defined(STARPU_MKL) || defined(STARPU_ARMPL) +void STARPU_SPOTRF(const char*uplo, const int n, float *a, const int lda) +{ + int info = 0; + spotrf_(uplo, &n, a, &lda, &info); +} + +void STARPU_DPOTRF(const char*uplo, const int n, double *a, const int lda) +{ + int info = 0; + dpotrf_(uplo, &n, a, &lda, &info); +} +#endif + +#elif defined(STARPU_SIMGRID) +inline void STARPU_SGEMM(char *transa, char *transb, int M, int N, int K, + float alpha, const float *A, int lda, const float *B, int ldb, + float beta, float *C, int ldc) { } + +inline void STARPU_DGEMM(char *transa, char *transb, int M, int N, int K, + double alpha, double *A, int lda, double *B, int ldb, + double beta, double *C, int ldc) { } + +inline void STARPU_SGEMV(char *transa, int M, int N, float alpha, float *A, int lda, + float *X, int incX, float beta, float *Y, int incY) { } + +inline void STARPU_DGEMV(char *transa, int M, int N, double alpha, double *A, int lda, + double *X, int incX, double beta, double *Y, int incY) { } + +inline float STARPU_SASUM(int N, float *X, int incX) { return 0.; } + +inline double STARPU_DASUM(int N, double *X, int incX) { return 0.; } + +void STARPU_SSCAL(int N, float alpha, float *X, int incX) { } + +void STARPU_DSCAL(int N, double alpha, double *X, int incX) { } + +void STARPU_STRSM (const char *side, const char *uplo, const char *transa, + const char *diag, const int m, const int n, + const float alpha, const float *A, const int lda, + float *B, const int ldb) { } + +void STARPU_DTRSM (const char *side, const char *uplo, const char *transa, + const char *diag, const int m, const int n, + const double alpha, const double *A, const int lda, + double *B, const int ldb) { } + +void STARPU_SSYR (const char *uplo, const int n, const float alpha, + const float *x, const int incx, float *A, const int lda) { } + +void STARPU_SSYRK (const char *uplo, const char *trans, const int n, + const int k, const float alpha, const float *A, + const int lda, const float beta, float *C, + const int ldc) { } + +void STARPU_SGER(const int m, const int n, const float alpha, + const float *x, const int incx, const float *y, + const int incy, float *A, const int lda) { } + +void STARPU_DGER(const int m, const int n, const double alpha, + const double *x, const int incx, const double *y, + const int incy, double *A, const int lda) { } + +void STARPU_STRSV (const char *uplo, const char *trans, const char *diag, + const int n, const float *A, const int lda, float *x, + const int incx) { } + +void STARPU_STRMM(const char *side, const char *uplo, const char *transA, + const char *diag, const int m, const int n, + const float alpha, const float *A, const int lda, + float *B, const int ldb) { } + +void STARPU_DTRMM(const char *side, const char *uplo, const char *transA, + const char *diag, const int m, const int n, + const double alpha, const double *A, const int lda, + double *B, const int ldb) { } + +void STARPU_STRMV(const char *uplo, const char *transA, const char *diag, + const int n, const float *A, const int lda, float *X, + const int incX) { } + +void STARPU_SAXPY(const int n, const float alpha, float *X, const int incX, float *Y, const int incY) { } + +void STARPU_DAXPY(const int n, const double alpha, double *X, const int incX, double *Y, const int incY) { } + +int STARPU_ISAMAX (const int n, float *X, const int incX) { return 0; } + +int STARPU_IDAMAX (const int n, double *X, const int incX) { return 0; } + +float STARPU_SDOT(const int n, const float *x, const int incx, const float *y, const int incy) { return 0.; } + +double STARPU_DDOT(const int n, const double *x, const int incx, const double *y, const int incy) { return 0.; } + +void STARPU_SSWAP(const int n, float *X, const int incX, float *Y, const int incY) { } + +void STARPU_DSWAP(const int n, double *X, const int incX, double *Y, const int incY) { } + +void STARPU_SPOTRF(const char*uplo, const int n, float *a, const int lda) { } + +void STARPU_DPOTRF(const char*uplo, const int n, double *a, const int lda) { } + + +#else +#error "no BLAS lib available..." +#endif diff --git a/examples/common/blas.h b/examples/common/blas.h new file mode 100644 index 0000000..cf5f368 --- /dev/null +++ b/examples/common/blas.h @@ -0,0 +1,173 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __BLAS_H__ +#define __BLAS_H__ + +#include + +#if defined(STARPU_ATLAS) || defined(STARPU_HAVE_CBLAS_H) +#include +#endif + +void STARPU_SGEMM(char *transa, char *transb, int M, int N, int K, float alpha, const float *A, int lda, + const float *B, int ldb, float beta, float *C, int ldc); +void STARPU_DGEMM(char *transa, char *transb, int M, int N, int K, double alpha, double *A, int lda, + double *B, int ldb, double beta, double *C, int ldc); +void STARPU_SGEMV(char *transa, int M, int N, float alpha, float *A, int lda, + float *X, int incX, float beta, float *Y, int incY); +void STARPU_DGEMV(char *transa, int M, int N, double alpha, double *A, int lda, + double *X, int incX, double beta, double *Y, int incY); +float STARPU_SASUM(int N, float *X, int incX); +double STARPU_DASUM(int N, double *X, int incX); +void STARPU_SSCAL(int N, float alpha, float *X, int incX); +void STARPU_DSCAL(int N, double alpha, double *X, int incX); +void STARPU_STRSM (const char *side, const char *uplo, const char *transa, + const char *diag, const int m, const int n, + const float alpha, const float *A, const int lda, + float *B, const int ldb); +void STARPU_DTRSM (const char *side, const char *uplo, const char *transa, + const char *diag, const int m, const int n, + const double alpha, const double *A, const int lda, + double *B, const int ldb); +void STARPU_DGEMM(char *transa, char *transb, int M, int N, int K, + double alpha, double *A, int lda, double *B, int ldb, + double beta, double *C, int ldc); +void STARPU_SSYR (const char *uplo, const int n, const float alpha, + const float *x, const int incx, float *A, const int lda); +void STARPU_SSYRK (const char *uplo, const char *trans, const int n, + const int k, const float alpha, const float *A, + const int lda, const float beta, float *C, + const int ldc); +void STARPU_SGER (const int m, const int n, const float alpha, + const float *x, const int incx, const float *y, + const int incy, float *A, const int lda); +void STARPU_DGER(const int m, const int n, const double alpha, + const double *x, const int incx, const double *y, + const int incy, double *A, const int lda); +void STARPU_STRSV (const char *uplo, const char *trans, const char *diag, + const int n, const float *A, const int lda, float *x, + const int incx); +void STARPU_STRMM(const char *side, const char *uplo, const char *transA, + const char *diag, const int m, const int n, + const float alpha, const float *A, const int lda, + float *B, const int ldb); +void STARPU_DTRMM(const char *side, const char *uplo, const char *transA, + const char *diag, const int m, const int n, + const double alpha, const double *A, const int lda, + double *B, const int ldb); +void STARPU_STRMV(const char *uplo, const char *transA, const char *diag, + const int n, const float *A, const int lda, float *X, + const int incX); +void STARPU_SAXPY(const int n, const float alpha, float *X, const int incX, float *Y, const int incy); +void STARPU_DAXPY(const int n, const double alpha, double *X, const int incX, double *Y, const int incY); +int STARPU_ISAMAX(const int n, float *X, const int incX); +int STARPU_IDAMAX(const int n, double *X, const int incX); +float STARPU_SDOT(const int n, const float *x, const int incx, const float *y, const int incy); +double STARPU_DDOT(const int n, const double *x, const int incx, const double *y, const int incy); +void STARPU_SSWAP(const int n, float *x, const int incx, float *y, const int incy); +void STARPU_DSWAP(const int n, double *x, const int incx, double *y, const int incy); + +#if defined(STARPU_MKL) || defined(STARPU_ARMPL) +void STARPU_SPOTRF(const char*uplo, const int n, float *a, const int lda); +void STARPU_DPOTRF(const char*uplo, const int n, double *a, const int lda); +#endif + +#if defined(STARPU_GOTO) || defined(STARPU_OPENBLAS) || defined(STARPU_SYSTEM_BLAS) || defined(STARPU_MKL) || defined(STARPU_ARMPL) + +#ifdef _STARPU_F2C_COMPATIBILITY +/* for compatibility with F2C, FLOATRET may not be a float but a double in GOTOBLAS */ +/* Don't know how to detect this automatically */ +#define _STARPU_FLOATRET double +#else +#define _STARPU_FLOATRET float +#endif + +extern void sgemm_(const char *transa, const char *transb, const int *m, + const int *n, const int *k, const float *alpha, + const float *A, const int *lda, const float *B, + const int *ldb, const float *beta, float *C, + const int *ldc); +extern void dgemm_(const char *transa, const char *transb, const int *m, + const int *n, const int *k, const double *alpha, + const double *A, const int *lda, const double *B, + const int *ldb, const double *beta, double *C, + const int *ldc); +extern void sgemv_(const char *trans, const int *m, const int *n, const float *alpha, + const float *a, const int *lda, const float *x, const int *incx, + const float *beta, float *y, const int *incy); +extern void dgemv_(const char *trans, const int *m, const int *n, const double *alpha, + const double *a, const int *lda, const double *x, const int *incx, + const double *beta, double *y, const int *incy); +extern void ssyr_(const char *uplo, const int *n, const float *alpha, + const float *x, const int *incx, float *A, const int *lda); +extern void ssyrk_(const char *uplo, const char *trans, const int *n, + const int *k, const float *alpha, const float *A, + const int *lda, const float *beta, float *C, + const int *ldc); +extern void strsm_(const char *side, const char *uplo, const char *transa, + const char *diag, const int *m, const int *n, + const float *alpha, const float *A, const int *lda, + float *B, const int *ldb); +extern void dtrsm_(const char *side, const char *uplo, const char *transa, + const char *diag, const int *m, const int *n, + const double *alpha, const double *A, const int *lda, + double *B, const int *ldb); +extern _STARPU_FLOATRET sasum_ (const int *n, const float *x, const int *incx); +extern double dasum_(const int *n, const double *x, const int *incx); +extern void sscal_(const int *n, const float *alpha, float *x, + const int *incx); +extern void dscal_(const int *n, const double *alpha, double *x, + const int *incx); +extern void sger_(const int *m, const int *n, const float *alpha, + const float *x, const int *incx, const float *y, + const int *incy, float *A, const int *lda); +extern void dger_(const int *m, const int *n, const double *alpha, + const double *x, const int *incx, const double *y, + const int *incy, double *A, const int *lda); +extern void strsv_(const char *uplo, const char *trans, const char *diag, + const int *n, const float *A, const int *lda, float *x, + const int *incx); +extern void strmm_(const char *side, const char *uplo, const char *transA, + const char *diag, const int *m, const int *n, + const float *alpha, const float *A, const int *lda, + float *B, const int *ldb); +extern void dtrmm_(const char *side, const char *uplo, const char *transA, + const char *diag, const int *m, const int *n, + const double *alpha, const double *A, const int *lda, + double *B, const int *ldb); +extern void strmv_(const char *uplo, const char *transA, const char *diag, + const int *n, const float *A, const int *lda, float *X, + const int *incX); +extern void saxpy_(const int *n, const float *alpha, const float *X, const int *incX, + float *Y, const int *incy); +extern void daxpy_(const int *n, const double *alpha, const double *X, const int *incX, + double *Y, const int *incy); +extern int isamax_(const int *n, const float *X, const int *incX); +extern int idamax_(const int *n, const double *X, const int *incX); +extern _STARPU_FLOATRET sdot_(const int *n, const float *x, const int *incx, const float *y, const int *incy); +extern double ddot_(const int *n, const double *x, const int *incx, const double *y, const int *incy); +extern void sswap_(const int *n, float *x, const int *incx, float *y, const int *incy); +extern void dswap_(const int *n, double *x, const int *incx, double *y, const int *incy); + +#if (defined STARPU_MKL) || (defined STARPU_ARMPL) +extern void spotrf_(const char*uplo, const int *n, float *a, const int *lda, int *info); +extern void dpotrf_(const char*uplo, const int *n, double *a, const int *lda, int *info); +#endif + +#endif + +#endif /* __BLAS_H__ */ diff --git a/examples/common/blas_model.c b/examples/common/blas_model.c new file mode 100644 index 0000000..8f879c7 --- /dev/null +++ b/examples/common/blas_model.c @@ -0,0 +1,46 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "blas_model.h" +#include + +/* + * As a convention, in that file, descr[0] is represented by A, + * descr[1] is B ... + */ + +/* + * Number of flops of Gemm + */ + +double gemm_cost(struct starpu_task *task, unsigned nimpl) +{ + /* C = A * B */ + uint32_t nxC, nyC, nxA; + + + nxC = starpu_matrix_get_nx(task->descr[2].handle); + nyC = starpu_matrix_get_ny(task->descr[2].handle); + nxA = starpu_matrix_get_nx(task->descr[0].handle); + +/* printf("nxC %d nxC %d nxA %d\n", nxC, nyC, nxA); */ + + double cost = ((double)nxC)*((double)nyC)*((double)nxA/1000.0f/4.11f); + +/* printf("cost %e \n", cost); */ + + return cost; +} diff --git a/examples/common/blas_model.h b/examples/common/blas_model.h new file mode 100644 index 0000000..778b75f --- /dev/null +++ b/examples/common/blas_model.h @@ -0,0 +1,64 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __BLAS_MODEL_H__ +#define __BLAS_MODEL_H__ + +#include + +double gemm_cost(struct starpu_task *task, unsigned nimpl); + +static struct starpu_perfmodel starpu_sgemm_model = +{ + .type = STARPU_HISTORY_BASED, +#ifdef STARPU_ATLAS + .symbol = "sgemm_atlas" +#elif defined(STARPU_GOTO) + .symbol = "sgemm_goto" +#elif defined(STARPU_OPENBLAS) + .symbol = "sgemm_openblas" +#else + .symbol = "sgemm" +#endif +}; + +static struct starpu_perfmodel starpu_sgemm_model_common = +{ + .cost_function = gemm_cost, + .type = STARPU_COMMON, +}; + +static struct starpu_perfmodel starpu_dgemm_model = +{ + .type = STARPU_HISTORY_BASED, +#ifdef STARPU_ATLAS + .symbol = "dgemm_atlas" +#elif defined(STARPU_GOTO) + .symbol = "dgemm_goto" +#elif defined(STARPU_OPENBLAS) + .symbol = "dgemm_openblas" +#else + .symbol = "dgemm" +#endif +}; + +static struct starpu_perfmodel starpu_dgemm_model_common = +{ + .cost_function = gemm_cost, + .type = STARPU_COMMON, +}; + +#endif /* __BLAS_MODEL_H__ */ diff --git a/examples/cpp/Makefile_add_vectors.mk b/examples/cpp/Makefile_add_vectors.mk new file mode 100644 index 0000000..5a11909 --- /dev/null +++ b/examples/cpp/Makefile_add_vectors.mk @@ -0,0 +1,34 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +PROG = add_vectors + +SRCCXX = add_vectors.cpp + +CXX = g++ + +CXXFLAGS = -g -DPRINT_OUTPUT $(shell pkg-config --cflags starpu-1.3) +LDLIBS = $(shell pkg-config --libs starpu-1.3) + +OBJS = $(SRCCXX:%.cpp=%.o) + +.phony: all clean +all: $(PROG) + +$(PROG): $(OBJS) + $(CXX) $(LDFLAGS) -o $@ $^ $(LDLIBS) + +clean: + rm -fv *.o $(PROG) diff --git a/examples/cpp/Makefile_add_vectors_cpp11.mk b/examples/cpp/Makefile_add_vectors_cpp11.mk new file mode 100644 index 0000000..02fa55a --- /dev/null +++ b/examples/cpp/Makefile_add_vectors_cpp11.mk @@ -0,0 +1,34 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +PROG = add_vectors_cpp11 + +SRCCXX = add_vectors_cpp11.cpp + +CXX = g++ + +CXXFLAGS = -g -std=c++11 -DPRINT_OUTPUT $(shell pkg-config --cflags starpu-1.3) +LDLIBS = $(shell pkg-config --libs starpu-1.3) + +OBJS = $(SRCCXX:%.cpp=%.o) + +.phony: all clean +all: $(PROG) + +$(PROG): $(OBJS) + $(CXX) $(LDFLAGS) -o $@ $^ $(LDLIBS) + +clean: + rm -fv *.o $(PROG) diff --git a/examples/cpp/add_vectors.cpp b/examples/cpp/add_vectors.cpp new file mode 100644 index 0000000..474377e --- /dev/null +++ b/examples/cpp/add_vectors.cpp @@ -0,0 +1,159 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * This is a small example of a C++ program using starpu. We here just + * add two std::vector without copying them (0 copy). + */ + +#include +#include + +#ifdef PRINT_OUTPUT +#include +#endif + +#include + +void cpu_kernel_add_vectors(void *buffers[], void *cl_arg) +{ + // get the current task + starpu_task* task = starpu_task_get_current(); + + // get the user data (pointers to the vec_A, vec_B, vec_C std::vector) + void* u_data0 = starpu_data_get_user_data(task->handles[0]); assert(u_data0); + void* u_data1 = starpu_data_get_user_data(task->handles[1]); assert(u_data1); + void* u_data2 = starpu_data_get_user_data(task->handles[2]); assert(u_data2); + + // cast void* in std::vector* + std::vector* vec_A = static_cast*>(u_data0); + std::vector* vec_B = static_cast*>(u_data1); + std::vector* vec_C = static_cast*>(u_data2); + + // all the std::vector have to have the same size + assert(vec_A->size() == vec_B->size() && vec_B->size() == vec_C->size()); + + // performs the vector addition (vec_C[] = vec_A[] + vec_B[]) + for (size_t i = 0; i < vec_C->size(); i++) + (*vec_C)[i] = (*vec_A)[i] + (*vec_B)[i]; +} + +#define VEC_SIZE 1024 + +int main(int argc, char **argv) +{ + std::vector vec_A(VEC_SIZE, 2); // all the vector is initialized to 2 + std::vector vec_B(VEC_SIZE, 3); // all the vector is initialized to 3 + std::vector vec_C(VEC_SIZE, 0); // all the vector is initialized to 0 + + struct starpu_conf conf; + starpu_conf_init(&conf); + /* starpu_data_get_user_data cannot work in master-slave */ + conf.nmpi_ms = 0; + conf.ntcpip_ms = 0; + + // initialize StarPU with default configuration + int ret = starpu_init(&conf); + if (ret == -ENODEV) + return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + /* StarPU can overwrite object if NUMA transfers are made */ + if (starpu_memory_nodes_get_numa_count() > 1) + { + starpu_shutdown(); + return 77; + } + + // StarPU data registering + starpu_data_handle_t spu_vec_A; + starpu_data_handle_t spu_vec_B; + starpu_data_handle_t spu_vec_C; + + // give the data of the vector to StarPU (C array) + starpu_vector_data_register(&spu_vec_A, STARPU_MAIN_RAM, (uintptr_t)&vec_A[0], vec_A.size(), sizeof(char)); + starpu_vector_data_register(&spu_vec_B, STARPU_MAIN_RAM, (uintptr_t)&vec_B[0], vec_B.size(), sizeof(char)); + starpu_vector_data_register(&spu_vec_C, STARPU_MAIN_RAM, (uintptr_t)&vec_C[0], vec_C.size(), sizeof(char)); + + // pass the pointer to the C++ vector object to StarPU + starpu_data_set_user_data(spu_vec_A, (void*)&vec_A); + starpu_data_set_user_data(spu_vec_B, (void*)&vec_B); + starpu_data_set_user_data(spu_vec_C, (void*)&vec_C); + + // create the StarPU codelet + starpu_codelet cl; + starpu_codelet_init(&cl); + cl.cpu_funcs [0] = cpu_kernel_add_vectors; + cl.cpu_funcs_name[0] = "cpu_kernel_add_vectors"; + cl.nbuffers = 3; + cl.modes [0] = STARPU_R; + cl.modes [1] = STARPU_R; + cl.modes [2] = STARPU_W; + cl.name = "add_vectors"; + + // submit a new StarPU task to execute + ret = starpu_task_insert(&cl, + STARPU_R, spu_vec_A, + STARPU_R, spu_vec_B, + STARPU_W, spu_vec_C, + 0); + if (ret == -ENODEV) + { + // StarPU data unregistering + starpu_data_unregister(spu_vec_C); + starpu_data_unregister(spu_vec_B); + starpu_data_unregister(spu_vec_A); + + // terminate StarPU, no task can be submitted after + starpu_shutdown(); + + return 77; + } + + STARPU_CHECK_RETURN_VALUE(ret, "task_submit::add_vectors"); + + // wait the task + starpu_task_wait_for_all(); + + // StarPU data unregistering + starpu_data_unregister(spu_vec_C); + starpu_data_unregister(spu_vec_B); + starpu_data_unregister(spu_vec_A); + + // terminate StarPU, no task can be submitted after + starpu_shutdown(); + + // check results + bool fail = false; + int i = 0; + while (!fail && i < VEC_SIZE) + fail = vec_C[i++] != 5; + + if (fail) + { +#ifdef PRINT_OUTPUT + std::cout << "Example failed..." << std::endl; +#endif + return EXIT_FAILURE; + } + else + { +#ifdef PRINT_OUTPUT + std::cout << "Example successfully passed!" << std::endl; +#endif + return EXIT_SUCCESS; + } +} diff --git a/examples/cpp/add_vectors_cpp11.cpp b/examples/cpp/add_vectors_cpp11.cpp new file mode 100644 index 0000000..2ededf1 --- /dev/null +++ b/examples/cpp/add_vectors_cpp11.cpp @@ -0,0 +1,165 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * This is a small example of a C++ program using starpu. We here just + * add two std::vector without copying them (0 copy). + */ + +#include +#include + +#ifdef PRINT_OUTPUT +#include +#endif + +#include +#if !defined(STARPU_HAVE_CXX11) +int main(int argc, char **argv) +{ + return 77; +} +#else +void cpu_kernel_add_vectors(void *buffers[], void *cl_arg) +{ + // get the current task + auto task = starpu_task_get_current(); + + // get the user data (pointers to the vec_A, vec_B, vec_C std::vector) + auto u_data0 = starpu_data_get_user_data(task->handles[0]); assert(u_data0); + auto u_data1 = starpu_data_get_user_data(task->handles[1]); assert(u_data1); + auto u_data2 = starpu_data_get_user_data(task->handles[2]); assert(u_data2); + + // cast void* in std::vector* + auto vec_A = static_cast*>(u_data0); + auto vec_B = static_cast*>(u_data1); + auto vec_C = static_cast*>(u_data2); + + // all the std::vector have to have the same size + assert(vec_A->size() == vec_B->size() && vec_B->size() == vec_C->size()); + + // performs the vector addition (vec_C[] = vec_A[] + vec_B[]) + for (size_t i = 0; i < vec_C->size(); i++) + (*vec_C)[i] = (*vec_A)[i] + (*vec_B)[i]; +} + +int main(int argc, char **argv) +{ + constexpr int vec_size = 1024; + + std::vector vec_A(vec_size, 2); // all the vector is initialized to 2 + std::vector vec_B(vec_size, 3); // all the vector is initialized to 3 + std::vector vec_C(vec_size, 0); // all the vector is initialized to 0 + + struct starpu_conf conf; + starpu_conf_init(&conf); + /* starpu_data_get_user_data cannot work in master-slave */ + conf.nmpi_ms = 0; + conf.ntcpip_ms = 0; + + // initialize StarPU with default configuration + auto ret = starpu_init(&conf); + if (ret == -ENODEV) + return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + if (starpu_memory_nodes_get_numa_count() > 1) + { + starpu_shutdown(); + return 77; + } + + // StarPU data registering + starpu_data_handle_t spu_vec_A; + starpu_data_handle_t spu_vec_B; + starpu_data_handle_t spu_vec_C; + + // give the data of the vector to StarPU (C array) + starpu_vector_data_register(&spu_vec_A, STARPU_MAIN_RAM, (uintptr_t)vec_A.data(), vec_A.size(), sizeof(char)); + starpu_vector_data_register(&spu_vec_B, STARPU_MAIN_RAM, (uintptr_t)vec_B.data(), vec_B.size(), sizeof(char)); + starpu_vector_data_register(&spu_vec_C, STARPU_MAIN_RAM, (uintptr_t)vec_C.data(), vec_C.size(), sizeof(char)); + + // pass the pointer to the C++ vector object to StarPU + starpu_data_set_user_data(spu_vec_A, (void*)&vec_A); + starpu_data_set_user_data(spu_vec_B, (void*)&vec_B); + starpu_data_set_user_data(spu_vec_C, (void*)&vec_C); + + // create the StarPU codelet + starpu_codelet cl; + starpu_codelet_init(&cl); + cl.cpu_funcs [0] = cpu_kernel_add_vectors; + cl.cpu_funcs_name[0] = "cpu_kernel_add_vectors"; + cl.nbuffers = 3; + cl.modes [0] = STARPU_R; + cl.modes [1] = STARPU_R; + cl.modes [2] = STARPU_W; + cl.name = "add_vectors"; + + // submit a new StarPU task to execute + ret = starpu_task_insert(&cl, + STARPU_R, spu_vec_A, + STARPU_R, spu_vec_B, + STARPU_W, spu_vec_C, + 0); + + if (ret == -ENODEV) + { + // StarPU data unregistering + starpu_data_unregister(spu_vec_C); + starpu_data_unregister(spu_vec_B); + starpu_data_unregister(spu_vec_A); + + // terminate StarPU, no task can be submitted after + starpu_shutdown(); + + return 77; + } + + STARPU_CHECK_RETURN_VALUE(ret, "task_submit::add_vectors"); + + // wait the task + starpu_task_wait_for_all(); + + // StarPU data unregistering + starpu_data_unregister(spu_vec_C); + starpu_data_unregister(spu_vec_B); + starpu_data_unregister(spu_vec_A); + + // terminate StarPU, no task can be submitted after + starpu_shutdown(); + + // check results + auto fail = false; + auto i = 0; + while (!fail && i < vec_size) + fail = vec_C[i++] != 5; + + if (fail) + { +#ifdef PRINT_OUTPUT + std::cout << "Example failed..." << std::endl; +#endif + return EXIT_FAILURE; + } + else + { +#ifdef PRINT_OUTPUT + std::cout << "Example successfully passed!" << std::endl; +#endif + return EXIT_SUCCESS; + } +} +#endif diff --git a/examples/cpp/add_vectors_interface.cpp b/examples/cpp/add_vectors_interface.cpp new file mode 100644 index 0000000..21e1a1f --- /dev/null +++ b/examples/cpp/add_vectors_interface.cpp @@ -0,0 +1,676 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * This is a small example of a C++ program using STL and starpu. We here just + * add two std::vector with duplicating vectors. StarPU achieves data + * transfers between objects. + */ + +#if defined(__GNUC__) && (__GNUC__ < 4 || (__GNUC__ == 4 && __GNU_MINOR < 9)) +int main(int argc, char **argv) +{ + return 77; +} +#else +#include +#include + +#ifdef PRINT_OUTPUT +#include +#endif + +#include + +#define MY_TYPE char, my_allocator + +/* create an allocator to put data on the correct NUMA node */ +template +class my_allocator +{ + public: + + typedef size_t size_type; + typedef ptrdiff_t difference_type; + typedef T* pointer; + typedef const T* const_pointer; + typedef T& reference; + typedef const T& const_reference; + typedef T value_type; + + my_allocator() + { + this->node = STARPU_MAIN_RAM; + } + + my_allocator(const my_allocator& a) + { + node = a.get_node(); + } + + explicit my_allocator(const unsigned thenode) + { + this->node = thenode; + } + + pointer allocate(size_type n, const void * = 0) + { + T* t = (T*) starpu_malloc_on_node(this->node, n * sizeof(T)); + return t; + } + + void deallocate(void* p, size_type n) + { + if (p) + { + starpu_free_on_node(this->node, (uintptr_t) p, n * sizeof(T)); + } + } + + unsigned get_node() const + { + return node; + } + + pointer address(reference x) const + { + return &x; + } + + const_pointer address(const_reference x) const + { + return &x; + } + + my_allocator& operator=(const my_allocator&ref) + { + node = ref.node; + return *this; + } + + void construct(pointer p, const T& val) + { + new ((T*) p) T(val); + } + + void destroy(pointer p) + { + p->~T(); + } + + size_type max_size() const + { + return size_type(-1); + } + + + template + struct rebind + { + typedef my_allocator other; + }; + + template + explicit my_allocator(const my_allocator&ref) + { + node = ref.node; + } + + template + my_allocator& operator=(const my_allocator&ref) + { + node = ref.node; + return *this; + } + + private: + unsigned node; +}; + +/* + * Create a new interface to catch C++ vector and make appropriate data transfers + */ +struct vector_cpp_interface +{ + enum starpu_data_interface_id id; + + uintptr_t ptr; + uint32_t nx; + size_t elemsize; + std::vector* vec; + + uint32_t slice_base; +}; + +#define VECTOR_CPP_GET_VEC(interface) ({ (((struct vector_cpp_interface *)(interface))->vec); }) + +static int vector_interface_copy_any_to_any(void *src_interface, unsigned src_node, + void *dst_interface, unsigned dst_node, void *async_data); + +#if __cplusplus >= 201103L +static const struct starpu_data_copy_methods vector_cpp_copy_data_methods_s = +{ + .can_copy = NULL, + + .ram_to_ram = NULL, + .ram_to_cuda = NULL, + .ram_to_hip = NULL, + .ram_to_opencl = NULL, + .ram_to_max_fpga = NULL, + + .cuda_to_ram = NULL, + .cuda_to_cuda = NULL, + + .hip_to_ram = NULL, + .hip_to_hip = NULL, + + .opencl_to_ram = NULL, + .opencl_to_opencl = NULL, + + .max_fpga_to_ram = NULL, + + .ram_to_cuda_async = NULL, + .cuda_to_ram_async = NULL, + .cuda_to_cuda_async = NULL, + + .ram_to_hip_async = NULL, + .hip_to_ram_async = NULL, + .hip_to_hip_async = NULL, + + .ram_to_opencl_async = NULL, + .opencl_to_ram_async = NULL, + .opencl_to_opencl_async = NULL, + + .ram_to_max_fpga_async = NULL, + .max_fpga_to_ram_async = NULL, + + .any_to_any = vector_interface_copy_any_to_any, +}; +#else +static const struct starpu_data_copy_methods vector_cpp_copy_data_methods_s = +{ + NULL, // can_copy + + NULL, // ram_to_ram + NULL, // ram_to_cuda + NULL, // ram_to_hip + NULL, // ram_to_opencl + NULL, // ram_to_max_fpga + + NULL, // cuda_to_ram + NULL, // cuda_to_cuda + + NULL, // hip_to_ram + NULL, // hip_to_hip + + NULL, // opencl_to_ram + NULL, // opencl_to_opencl + + NULL, // max_fpga_to_ram + + NULL, // ram_to_cuda_async + NULL, // cuda_to_ram_async + NULL, // cuda_to_cuda_async + + NULL, // ram_to_hip_async + NULL, // hip_to_ram_async + NULL, // hip_to_hip_async + + NULL, // ram_to_opencl_async + NULL, // opencl_to_ram_async + NULL, // opencl_to_opencl_async + + NULL, // ram_to_max_fpga_asyn + NULL, // max_fpga_to_ram_asyn + + vector_interface_copy_any_to_any, +}; +#endif + +static void register_vector_cpp_handle(starpu_data_handle_t handle, int home_node, void *data_interface); +static starpu_ssize_t allocate_vector_cpp_buffer_on_node(void *data_interface_, unsigned dst_node); +static void *vector_cpp_to_pointer(void *data_interface, unsigned node); +static void free_vector_cpp_buffer_on_node(void *data_interface, unsigned node); +static void free_vector_cpp_buffer_on_node(void *data_interface, unsigned node); +static size_t vector_cpp_interface_get_size(starpu_data_handle_t handle); +static uint32_t footprint_vector_cpp_interface_crc32(starpu_data_handle_t handle); +static int vector_cpp_compare(void *data_interface_a, void *data_interface_b); +static void display_vector_cpp_interface(starpu_data_handle_t handle, FILE *f); +static int pack_vector_cpp_handle(starpu_data_handle_t handle, unsigned node, void **ptr, starpu_ssize_t *count); +static int peek_vector_cpp_handle(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count); +static int unpack_vector_cpp_handle(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count); +static starpu_ssize_t vector_cpp_describe(void *data_interface, char *buf, size_t size); + +#if __cplusplus >= 201103L +static struct starpu_data_interface_ops interface_vector_cpp_ops = +{ + .register_data_handle = register_vector_cpp_handle, + .unregister_data_handle = NULL, + .allocate_data_on_node = allocate_vector_cpp_buffer_on_node, + .free_data_on_node = free_vector_cpp_buffer_on_node, + .cache_data_on_node = NULL, + .reuse_data_on_node = NULL, + .map_data = NULL, + .unmap_data = NULL, + .update_map = NULL, + .init = NULL, + .copy_methods = &vector_cpp_copy_data_methods_s, + .handle_to_pointer = NULL, + .to_pointer = vector_cpp_to_pointer, + .get_size = vector_cpp_interface_get_size, + .get_alloc_size = NULL, + .get_max_size = NULL, + .footprint = footprint_vector_cpp_interface_crc32, + .alloc_footprint = NULL, + .compare = vector_cpp_compare, + .alloc_compare = NULL, + .display = display_vector_cpp_interface, + .describe = vector_cpp_describe, + .interfaceid = STARPU_UNKNOWN_INTERFACE_ID, + .interface_size = sizeof(struct vector_cpp_interface), + .is_multiformat = 0, + .dontcache = 0, + .get_mf_ops = NULL, + .pack_data = pack_vector_cpp_handle, + .peek_data = peek_vector_cpp_handle, + .unpack_data = unpack_vector_cpp_handle, + .pack_meta = NULL, + .unpack_meta = NULL, + .free_meta = NULL, + .name = (char *) "VECTOR_CPP_INTERFACE" +}; +#else +static struct starpu_data_interface_ops interface_vector_cpp_ops = +{ + register_vector_cpp_handle, + NULL, + allocate_vector_cpp_buffer_on_node, + free_vector_cpp_buffer_on_node, + NULL, + NULL, + NULL, + NULL, + NULL, + &vector_cpp_copy_data_methods_s, + vector_cpp_to_pointer, + vector_cpp_interface_get_size, + NULL, + NULL, + footprint_vector_cpp_interface_crc32, + NULL, + vector_cpp_compare, + NULL, + display_vector_cpp_interface, + vector_cpp_describe, + STARPU_UNKNOWN_INTERFACE_ID, + sizeof(struct vector_cpp_interface), + 0, + 0, + NULL, + pack_vector_cpp_handle, + peek_vector_cpp_handle, + unpack_vector_cpp_handle, + NULL, + NULL, + NULL, + (char *) "VECTOR_CPP_INTERFACE" +}; +#endif + +static void *vector_cpp_to_pointer(void *data_interface, unsigned node) +{ + (void) node; + struct vector_cpp_interface *vector_interface = (struct vector_cpp_interface *) data_interface; + + return (void*) vector_interface->ptr; +} + +static void register_vector_cpp_handle(starpu_data_handle_t handle, int home_node, void *data_interface) +{ + struct vector_cpp_interface *vector_interface = (struct vector_cpp_interface *) data_interface; + + int node; + for (node = 0; node < STARPU_MAXNODES; node++) + { + struct vector_cpp_interface *local_interface = (struct vector_cpp_interface *) + starpu_data_get_interface_on_node(handle, node); + + if (node == home_node) + { + local_interface->ptr = vector_interface->ptr; + local_interface->vec = vector_interface->vec; + } + else + { + local_interface->ptr = 0; + local_interface->vec = NULL; + } + + local_interface->id = vector_interface->id; + local_interface->nx = vector_interface->nx; + local_interface->elemsize = vector_interface->elemsize; + local_interface->slice_base = vector_interface->slice_base; + } +} + +/* declare a new data with the vector interface */ +void vector_cpp_data_register(starpu_data_handle_t *handleptr, int home_node, + std::vector* vec, uint32_t nx, size_t elemsize) +{ +#if __cplusplus >= 201103L + struct vector_cpp_interface vector = + { + .id = STARPU_UNKNOWN_INTERFACE_ID, + .ptr = (uintptr_t) &(*vec)[0], + .nx = nx, + .elemsize = elemsize, + .vec = vec, + .slice_base = 0 + }; +#else + struct vector_cpp_interface vector = + { + STARPU_UNKNOWN_INTERFACE_ID, + (uintptr_t) &(*vec)[0], + (uintptr_t) &(*vec)[0], + 0, + nx, + elemsize, + vec, + 0 + }; +#endif + + starpu_data_register(handleptr, home_node, &vector, &interface_vector_cpp_ops); +} + +/* offer an access to the data parameters */ +uint32_t vector_cpp_get_nx(starpu_data_handle_t handle) +{ + struct vector_cpp_interface *vector_interface = (struct vector_cpp_interface *) + starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + + return vector_interface->nx; +} + + +static uint32_t footprint_vector_cpp_interface_crc32(starpu_data_handle_t handle) +{ + return starpu_hash_crc32c_be(vector_cpp_get_nx(handle), 0); +} + +static int vector_cpp_compare(void *data_interface_a, void *data_interface_b) +{ + struct vector_cpp_interface *vector_a = (struct vector_cpp_interface *) data_interface_a; + struct vector_cpp_interface *vector_b = (struct vector_cpp_interface *) data_interface_b; + + /* Two vectors are considered compatible if they have the same size */ + return ((vector_a->nx == vector_b->nx) + && (vector_a->elemsize == vector_b->elemsize)); +} + +static void display_vector_cpp_interface(starpu_data_handle_t handle, FILE *f) +{ + struct vector_cpp_interface *vector_interface = (struct vector_cpp_interface *) + starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + + fprintf(f, "%u\t", vector_interface->nx); +} + +static int pack_vector_cpp_handle(starpu_data_handle_t handle, unsigned node, void **ptr, starpu_ssize_t *count) +{ + STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); + + struct vector_cpp_interface *vector_interface = (struct vector_cpp_interface *) + starpu_data_get_interface_on_node(handle, node); + + *count = vector_interface->nx*vector_interface->elemsize; + + if (ptr != NULL) + { + *ptr = (void*) starpu_malloc_on_node_flags(node, *count, 0); + memcpy(*ptr, (void*)vector_interface->ptr, vector_interface->elemsize*vector_interface->nx); + } + + return 0; +} + +static int peek_vector_cpp_handle(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count) +{ + STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); + + struct vector_cpp_interface *vector_interface = (struct vector_cpp_interface *) + starpu_data_get_interface_on_node(handle, node); + + STARPU_ASSERT(count == vector_interface->elemsize * vector_interface->nx); + memcpy((void*)vector_interface->ptr, ptr, count); + + return 0; +} + +static int unpack_vector_cpp_handle(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count) +{ + peek_vector_cpp_handle(handle, node, ptr, count); + + starpu_free_on_node_flags(node, (uintptr_t)ptr, count, 0); + + return 0; +} + +static size_t vector_cpp_interface_get_size(starpu_data_handle_t handle) +{ + size_t size; + struct vector_cpp_interface *vector_interface = (struct vector_cpp_interface *) + starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + + size = vector_interface->nx*vector_interface->elemsize; + + return size; +} + +size_t vector_cpp_get_elemsize(starpu_data_handle_t handle) +{ + struct vector_cpp_interface *vector_interface = (struct vector_cpp_interface *) + starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + + return vector_interface->elemsize; +} + +/* memory allocation/deallocation primitives for the vector interface */ + +/* returns the size of the allocated area */ +static starpu_ssize_t allocate_vector_cpp_buffer_on_node(void *data_interface_, unsigned dst_node) +{ + struct vector_cpp_interface *vector_interface = (struct vector_cpp_interface *) data_interface_; + + uint32_t nx = vector_interface->nx; + size_t elemsize = vector_interface->elemsize; + + starpu_ssize_t allocated_memory; + + const my_allocator allocator(dst_node); + std::vector * vec = new std::vector(nx, 0, allocator); + + vector_interface->vec = vec; + if (!vector_interface->vec) + return -ENOMEM; + + allocated_memory = nx*elemsize; + + /* update the data properly in consequence */ + vector_interface->ptr = (uintptr_t) &((*vec)[0]); + + return allocated_memory; +} + +static void free_vector_cpp_buffer_on_node(void *data_interface, unsigned node) +{ + struct vector_cpp_interface *vector_interface = (struct vector_cpp_interface *) data_interface; + + delete vector_interface->vec; + + vector_interface->vec = NULL; + vector_interface->ptr = 0; +} + +static int vector_interface_copy_any_to_any(void *src_interface, unsigned src_node, + void *dst_interface, unsigned dst_node, void *async_data) +{ + struct vector_cpp_interface *src_vector = (struct vector_cpp_interface *) src_interface; + struct vector_cpp_interface *dst_vector = (struct vector_cpp_interface *) dst_interface; + int ret; + + ret = starpu_interface_copy(src_vector->ptr, 0, src_node, + dst_vector->ptr, 0, dst_node, + src_vector->nx*src_vector->elemsize, async_data); + + return ret; +} + +static starpu_ssize_t vector_cpp_describe(void *data_interface, char *buf, size_t size) +{ + struct vector_cpp_interface *vector = (struct vector_cpp_interface *) data_interface; + return snprintf(buf, size, "V%ux%u", + (unsigned) vector->nx, + (unsigned) vector->elemsize); +} + +/* + * End of interface + */ + + + +/* Kernel using STL objects */ + +void cpu_kernel_add_vectors(void *buffers[], void *cl_arg) +{ + std::vector* vec_A = VECTOR_CPP_GET_VEC(buffers[0]); + std::vector* vec_B = VECTOR_CPP_GET_VEC(buffers[1]); + std::vector* vec_C = VECTOR_CPP_GET_VEC(buffers[2]); + + // all the std::vector have to have the same size + assert(vec_A->size() == vec_B->size() && vec_B->size() == vec_C->size()); + + // performs the vector addition (vec_C[] = vec_A[] + vec_B[]) + for (size_t i = 0; i < vec_C->size(); i++) + (*vec_C)[i] = (*vec_A)[i] + (*vec_B)[i]; +} + +#define VEC_SIZE 1024 + +int main(int argc, char **argv) +{ + struct starpu_conf conf; + bool fail; + + starpu_conf_init(&conf); + /* _starpu_src_common_execute_kernel doesn't support this yet */ + conf.nmpi_ms = 0; + conf.ntcpip_ms = 0; + + // initialize StarPU with default configuration + int ret = starpu_init(&conf); + if (ret == -ENODEV) + return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + { + /* Test data transfers between NUMA nodes if available */ + unsigned last_numa_node = starpu_memory_nodes_get_numa_count() - 1; + + const my_allocator allocator_main_ram(STARPU_MAIN_RAM); + const my_allocator allocator_last_numa(last_numa_node); + std::vector vec_A(VEC_SIZE, 2, allocator_main_ram); // all the vector is initialized to 2 + std::vector vec_B(VEC_SIZE, 3, allocator_main_ram); // all the vector is initialized to 3 + std::vector vec_C(VEC_SIZE, 0, allocator_last_numa); // all the vector is initialized to 0 + + // StarPU data registering + starpu_data_handle_t spu_vec_A; + starpu_data_handle_t spu_vec_B; + starpu_data_handle_t spu_vec_C; + + // give the data of the vector to StarPU (C array) + vector_cpp_data_register(&spu_vec_A, STARPU_MAIN_RAM, &vec_A, vec_A.size(), sizeof(char)); + vector_cpp_data_register(&spu_vec_B, STARPU_MAIN_RAM, &vec_B, vec_B.size(), sizeof(char)); + vector_cpp_data_register(&spu_vec_C, last_numa_node, &vec_C, vec_C.size(), sizeof(char)); + + // create the StarPU codelet + starpu_codelet cl; + starpu_codelet_init(&cl); + cl.cpu_funcs [0] = cpu_kernel_add_vectors; + cl.cpu_funcs_name[0] = "cpu_kernel_add_vectors"; + cl.nbuffers = 3; + cl.modes [0] = STARPU_R; + cl.modes [1] = STARPU_R; + cl.modes [2] = STARPU_W; + cl.name = "add_vectors"; + + // submit a new StarPU task to execute + ret = starpu_task_insert(&cl, + STARPU_R, spu_vec_A, + STARPU_R, spu_vec_B, + STARPU_W, spu_vec_C, + 0); + if (ret == -ENODEV) + { + // StarPU data unregistering + starpu_data_unregister(spu_vec_C); + starpu_data_unregister(spu_vec_B); + starpu_data_unregister(spu_vec_A); + + // terminate StarPU, no task can be submitted after + starpu_shutdown(); + + return 77; + } + + STARPU_CHECK_RETURN_VALUE(ret, "task_submit::add_vectors"); + + // wait the task + starpu_task_wait_for_all(); + + // StarPU data unregistering + starpu_data_unregister(spu_vec_C); + starpu_data_unregister(spu_vec_B); + starpu_data_unregister(spu_vec_A); + + // check results + fail = false; + int i = 0; + while (!fail && i < VEC_SIZE) + fail = vec_C[i++] != 5; + } + + // terminate StarPU, no task can be submitted after + starpu_shutdown(); + + if (fail) + { +#ifdef PRINT_OUTPUT + std::cout << "Example failed..." << std::endl; +#endif + return EXIT_FAILURE; + } + else + { +#ifdef PRINT_OUTPUT + std::cout << "Example successfully passed!" << std::endl; +#endif + return EXIT_SUCCESS; + } +} +#endif diff --git a/examples/cpp/incrementer_cpp.cpp b/examples/cpp/incrementer_cpp.cpp new file mode 100644 index 0000000..cedb041 --- /dev/null +++ b/examples/cpp/incrementer_cpp.cpp @@ -0,0 +1,117 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * This is a small example of a C++ program using starpu. We here just + * increment two values of a vector several times. + */ + +#include + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +#ifdef STARPU_USE_CUDA +extern "C" void cuda_codelet(void *descr[], __attribute__ ((unused)) void *_args); +#endif + +#ifdef STARPU_USE_OPENCL +extern "C" void opencl_codelet(void *descr[], __attribute__ ((unused)) void *_args); +struct starpu_opencl_program opencl_program; +#endif + +extern "C" void cpu_codelet(void *descr[], __attribute__ ((unused)) void *_args) +{ + float *val = (float *)STARPU_VECTOR_GET_PTR(descr[0]); + + val[0] += 1.0f; + val[1] += 1.0f; +} + +int main(int argc, char **argv) +{ + int ret = 0; + starpu_data_handle_t float_array_handle; + float float_array[4] __attribute__ ((aligned (16))) = { 0.0f, 0.0f, 0.0f, 0.0f}; + struct starpu_codelet cl; + unsigned i; + unsigned niter = 50; + + struct starpu_conf conf; + starpu_conf_init(&conf); + + ret = starpu_init(&conf); + if (ret == -ENODEV) return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + starpu_vector_data_register(&float_array_handle, STARPU_MAIN_RAM, (uintptr_t)&float_array, 4, sizeof(float)); + +#ifdef STARPU_USE_OPENCL + ret = starpu_opencl_load_opencl_from_file("examples/incrementer/incrementer_kernels_opencl_kernel.cl", &opencl_program, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); +#endif + + starpu_codelet_init(&cl); + cl.cpu_funcs[0] = cpu_codelet; + cl.cpu_funcs_name[0] = "cpu_codelet"; +#ifdef STARPU_USE_CUDA + cl.cuda_funcs[0] = cuda_codelet; + cl.cuda_flags[0] = STARPU_CUDA_ASYNC; +#endif +#ifdef STARPU_USE_OPENCL + cl.opencl_funcs[0] = opencl_codelet; + cl.opencl_flags[0] = STARPU_OPENCL_ASYNC; +#endif + cl.nbuffers = 1; + cl.modes[0] = STARPU_RW; + cl.name = "incrementer"; + + for (i = 0; i < niter; i++) + { + ret = starpu_task_insert(&cl, + STARPU_RW, float_array_handle, + STARPU_TAG_ONLY, (starpu_tag_t) i, + 0); + if (STARPU_UNLIKELY(ret == -ENODEV)) + { + FPRINTF(stderr, "No worker may execute this task\n"); + exit(77); + } + } + + starpu_task_wait_for_all(); + + /* update the array in RAM */ + starpu_data_unregister(float_array_handle); + + FPRINTF(stderr, "array -> %f, %f, %f, %f\n", float_array[0], + float_array[1], float_array[2], float_array[3]); + +#ifdef STARPU_USE_OPENCL + ret = starpu_opencl_unload_opencl(&opencl_program); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl"); +#endif + + starpu_shutdown(); + + if (float_array[0] != niter || float_array[0] != float_array[1] + float_array[2] + float_array[3]) + { + FPRINTF(stderr, "Incorrect result\n"); + return EXIT_FAILURE; + } + + + return EXIT_SUCCESS; +} diff --git a/examples/dependency/sequential_consistency.c b/examples/dependency/sequential_consistency.c new file mode 100644 index 0000000..cf26ce8 --- /dev/null +++ b/examples/dependency/sequential_consistency.c @@ -0,0 +1,180 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2018-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +void cpu_codeletA(void *descr[], void *args); +void cpu_codeletB(void *descr[], void *args); +void cpu_codeletC(void *descr[], void *args); + +struct starpu_codelet clA = +{ + .cpu_funcs = {cpu_codeletA}, + .cpu_funcs_name = {"cpu_codeletA"}, + .nbuffers = 1, + .modes = {STARPU_RW}, + .name = "codeletA" +}; + +struct starpu_codelet clB = +{ + .cpu_funcs = {cpu_codeletB}, + .cpu_funcs_name = {"cpu_codeletB"}, + .nbuffers = 1, + .modes = {STARPU_RW}, + .name = "codeletB" +}; + +struct starpu_codelet clC = +{ + .cpu_funcs = {cpu_codeletC}, + .cpu_funcs_name = {"cpu_codeletC"}, + .nbuffers = 1, + .modes = {STARPU_RW}, + .name = "codeletC" +}; + +void cpu_codeletA(void *descr[], void *args) +{ + int *val = (int *)STARPU_VARIABLE_GET_PTR(descr[0]); + starpu_data_handle_t value_handle; + starpu_tag_t tagHoldC; + int ret; + unsigned char handle_sequential_consistency[] = {0}; + + FPRINTF(stderr, "[Task A] Value = %d\n", *val); + + starpu_codelet_unpack_args(args, &value_handle, &tagHoldC); + + // With several data, one would need to use a dynamically + // allocated array for the sequential consistency, + // the array could be freed immediately after calling + // starpu_task_insert() + + ret = starpu_task_insert(&clB, + STARPU_RW, value_handle, + STARPU_CALLBACK_WITH_ARG_NFREE, starpu_tag_notify_from_apps, tagHoldC, + STARPU_HANDLES_SEQUENTIAL_CONSISTENCY, handle_sequential_consistency, + STARPU_NAME, "taskB", + 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + + *val *= 2; +} + +void cpu_codeletB(void *descr[], void *args) +{ + (void)args; + int *val = (int *)STARPU_VARIABLE_GET_PTR(descr[0]); + + FPRINTF(stderr, "[Task B] Value = %d\n", *val); + STARPU_ASSERT_MSG(*val == 24, "Incorrect value %d (expected 24)\n", *val); + *val += 1; +} + +void cpu_codeletC(void *descr[], void *args) +{ + (void)args; + int *val = (int *)STARPU_VARIABLE_GET_PTR(descr[0]); + + FPRINTF(stderr, "[Task C] Value = %d\n", *val); + STARPU_ASSERT_MSG(*val == 25, "Incorrect value %d (expected 25)\n", *val); + *val *= 2; +} + +/* + * Submit taskA and hold it + * Submit taskC and hold it + * Release taskA + * Execute taskA --> submit taskB + * Execute taskB --> callback: release taskC + * + * All three tasks use the same data in RW, taskB is submitted after + * taskC, so taskB should normally only execute after taskC but as the + * sequential consistency for (taskB, data) is unset, taskB can + * execute straight away + */ +int main(void) +{ + int value=12; + int ret; + starpu_data_handle_t value_handle; + starpu_tag_t tagHoldA = 42; + starpu_tag_t tagHoldC = 84; + starpu_tag_t tagA = 421; + starpu_tag_t tagC = 842; + + struct starpu_conf conf; + + if (sizeof(starpu_tag_t) > sizeof(void*)) + { + // Can't pass a tag_t through callback arg :/ + return 77; + } + + starpu_conf_init(&conf); + conf.nmpi_ms = 0; + conf.ntcpip_ms = 0; + + ret = starpu_init(&conf); + if (STARPU_UNLIKELY(ret == -ENODEV)) + { + return 77; + } + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + if (starpu_cpu_worker_get_count() < 1) + { + FPRINTF(stderr, "This application requires at least 1 cpu worker\n"); + starpu_shutdown(); + return 77; + } + + starpu_variable_data_register(&value_handle, STARPU_MAIN_RAM, (uintptr_t)&value, sizeof(value)); + + starpu_tag_declare_deps_array(tagA, 1, &tagHoldA); + starpu_tag_declare_deps_array(tagC, 1, &tagHoldC); + + ret = starpu_task_insert(&clA, + STARPU_TAG, tagA, + STARPU_RW, value_handle, + STARPU_VALUE, &value_handle, sizeof(starpu_data_handle_t), + STARPU_VALUE, &tagHoldC, sizeof(starpu_tag_t), + STARPU_NAME, "taskA", + 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + ret = starpu_task_insert(&clC, + STARPU_TAG, tagC, + STARPU_RW, value_handle, + STARPU_NAME, "taskC", + 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + + // Release taskA (we want to make sure it will execute after taskC has been submitted) + starpu_tag_notify_from_apps(tagHoldA); + + starpu_data_unregister(value_handle); + + STARPU_ASSERT_MSG(value == 50, "Incorrect value %d (expected 50)\n", value); + + starpu_shutdown(); + + FPRINTF(stderr, "Value = %d\n", value); + + return ret; +} diff --git a/examples/dependency/task_end_dep.c b/examples/dependency/task_end_dep.c new file mode 100644 index 0000000..13f565d --- /dev/null +++ b/examples/dependency/task_end_dep.c @@ -0,0 +1,125 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2018-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* This shows how to defer termination of a task until the termination of + * another task. */ + +#include + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +#define INIT 12 + +void cpu_codelet2(void *descr[], void *args) +{ + int *val = (int *)STARPU_VARIABLE_GET_PTR(descr[0]); + (void)args; + STARPU_ASSERT(*val == 2*INIT); + starpu_sleep(0.1); + STARPU_ASSERT(*val == 2*INIT); + *val *= 2; +} + +struct starpu_codelet cl2 = +{ + .cpu_funcs = {cpu_codelet2}, + .cpu_funcs_name = {"cpu_codelet2"}, + .nbuffers = 1, + .modes = {STARPU_RW}, + .name = "codelet2" +}; + +void cpu_codelet(void *descr[], void *args) +{ + (void)args; + int *val = (int *)STARPU_VARIABLE_GET_PTR(descr[0]); + struct starpu_task *task = starpu_task_get_current(); + int ret; + + ret = starpu_task_insert(&cl2, + STARPU_RW, task->handles[0], + STARPU_TASK_END_DEPS_ARRAY, 1, &task, + 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + STARPU_ASSERT(*val == INIT); + starpu_sleep(0.1); + STARPU_ASSERT(*val == INIT); + *val *= 2; +} + +struct starpu_codelet cl = +{ + .cpu_funcs = {cpu_codelet}, + .cpu_funcs_name = {"cpu_codelet"}, + .nbuffers = 1, + .modes = {STARPU_RW}, + .name = "codelet" +}; + +int main(void) +{ + int value=INIT; + int ret; + starpu_data_handle_t value_handle; + struct starpu_conf conf; + struct starpu_task *task; + + starpu_conf_init(&conf); + conf.nmpi_ms = 0; + conf.ntcpip_ms = 0; + + ret = starpu_init(&conf); + if (STARPU_UNLIKELY(ret == -ENODEV)) + { + return 77; + } + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + if (starpu_cpu_worker_get_count() < 1) + { + FPRINTF(stderr, "This application requires at least 1 cpu worker\n"); + starpu_shutdown(); + return 77; + } + + starpu_variable_data_register(&value_handle, STARPU_MAIN_RAM, (uintptr_t)&value, sizeof(value)); + + task = starpu_task_build(&cl, + STARPU_RW, value_handle, + 0); + STARPU_ASSERT(task); + task->detach = 0; + ret = starpu_task_submit(task); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + ret = starpu_task_wait(task); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait"); + + starpu_data_set_sequential_consistency_flag(value_handle, 0); + starpu_data_acquire_on_node(value_handle, STARPU_MAIN_RAM, STARPU_R); + /* Waiting for the main task should have also waited for the subtask */ + STARPU_ASSERT(value == 2*2*INIT); + starpu_data_release_on_node(value_handle, STARPU_MAIN_RAM); + + starpu_data_unregister(value_handle); + + STARPU_ASSERT(value == 2*2*INIT); + + starpu_shutdown(); + + FPRINTF(stderr, "Value = %d\n", value); + + return ret; +} diff --git a/examples/dependency/task_end_dep_add.c b/examples/dependency/task_end_dep_add.c new file mode 100644 index 0000000..6598cf3 --- /dev/null +++ b/examples/dependency/task_end_dep_add.c @@ -0,0 +1,107 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2018-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* This shows how to defer termination of a task thanks to + * starpu_task_end_dep_add. */ + +#include + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +#define INIT 12 + +void cpu_codelet2(void *descr[], void *args) +{ + (void)descr; + (void)args; +} + +struct starpu_codelet cl2 = +{ + .cpu_funcs = {cpu_codelet2}, + .cpu_funcs_name = {"cpu_codelet2"}, + .name = "codelet2" +}; + +void cpu_codelet(void *descr[], void *args) +{ + (void)args; + int *val = (int *)STARPU_VARIABLE_GET_PTR(descr[0]); + struct starpu_task *task; + int ret; + + task = starpu_task_get_current(); + starpu_task_end_dep_add(task, 1); + + ret = starpu_task_insert(&cl2, + STARPU_CALLBACK_WITH_ARG_NFREE, starpu_task_end_dep_release, task, + 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + STARPU_ASSERT(*val == INIT); + *val *= 2; +} + +struct starpu_codelet cl = +{ + .cpu_funcs = {cpu_codelet}, + .cpu_funcs_name = {"cpu_codelet"}, + .nbuffers = 1, + .modes = {STARPU_RW}, + .name = "codelet" +}; + +int main(void) +{ + int value=INIT; + int ret; + starpu_data_handle_t value_handle; + struct starpu_conf conf; + + starpu_conf_init(&conf); + conf.nmpi_ms = 0; + conf.ntcpip_ms = 0; + + ret = starpu_init(&conf); + if (STARPU_UNLIKELY(ret == -ENODEV)) + { + return 77; + } + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + if (starpu_cpu_worker_get_count() < 1) + { + FPRINTF(stderr, "This application requires at least 1 cpu worker\n"); + starpu_shutdown(); + return 77; + } + + starpu_variable_data_register(&value_handle, STARPU_MAIN_RAM, (uintptr_t)&value, sizeof(value)); + + ret = starpu_task_insert(&cl, + STARPU_RW, value_handle, + 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + + starpu_data_unregister(value_handle); + + STARPU_ASSERT(value == 2*INIT); + + starpu_shutdown(); + + FPRINTF(stderr, "Value = %d\n", value); + + return ret; +} diff --git a/examples/filters/alloc.c b/examples/filters/alloc.c new file mode 100644 index 0000000..80fa8f2 --- /dev/null +++ b/examples/filters/alloc.c @@ -0,0 +1,102 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "starpu.h" + +#define NPARTS 4 +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +void init_cpu(void* buffers[], void *args) +{ + double *v = (double*)STARPU_VECTOR_GET_PTR(buffers[0]); + unsigned nx = STARPU_VECTOR_GET_NX(buffers[0]); + unsigned i; + for (i=0; i +#include "custom_types.h" +#include "custom_interface.h" + +static __global__ void custom_cuda(struct point *aop, + unsigned n, + float *x, + float *y) +{ + unsigned i = blockIdx.x*blockDim.x + threadIdx.x; + + if (i < n) + { + x[i] = aop[i].x; + y[i] = aop[i].y; + } +} + +extern "C" void cpu_to_cuda_cuda_func(void *buffers[], void *_args) +{ + (void) _args; + + unsigned int n = CUSTOM_GET_NX(buffers[0]); + float *x = (float*) CUSTOM_GET_X_PTR(buffers[0]); + float *y = (float*) CUSTOM_GET_Y_PTR(buffers[0]); + + struct point *aop; + aop = (struct point *) CUSTOM_GET_CPU_PTR(buffers[0]); + unsigned threads_per_block = 64; + unsigned nblocks = (n + threads_per_block-1) / threads_per_block; + custom_cuda<<>>(aop, n, x, y); + cudaError_t status = cudaGetLastError(); + if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status); +} diff --git a/examples/filters/custom_mf/conversion_opencl.c b/examples/filters/custom_mf/conversion_opencl.c new file mode 100644 index 0000000..58a7488 --- /dev/null +++ b/examples/filters/custom_mf/conversion_opencl.c @@ -0,0 +1,97 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "custom_types.h" +#include "custom_interface.h" + +extern struct starpu_opencl_program _opencl_conversion_program; + +void cpu_to_opencl_opencl_func(void *buffers[], void *args) +{ + (void) args; + int id, devid; + cl_int err; + cl_kernel kernel; + cl_command_queue queue; + + unsigned n = CUSTOM_GET_NX(buffers[0]); + n*=2; + struct point *aop; + aop = (struct point *) CUSTOM_GET_CPU_PTR(buffers[0]); + + id = starpu_worker_get_id_check(); + devid = starpu_worker_get_devid(id); + + err = starpu_opencl_load_kernel(&kernel, + &queue, + &_opencl_conversion_program, + "custom_opencl_conversion", + devid); + if (err != CL_SUCCESS) + STARPU_OPENCL_REPORT_ERROR(err); + + + void *x = CUSTOM_GET_OPENCL_X_PTR(buffers[0]); + if (starpu_opencl_set_kernel_args(&err, &kernel, + sizeof(aop), &aop, + sizeof(x), &x, + sizeof(n), &n, + 0) != 3) + { + STARPU_OPENCL_REPORT_ERROR(err); + assert(0); + } + + + { + size_t global=n; + size_t local; + size_t s; + cl_device_id device; + + starpu_opencl_get_device(devid, &device); + + err = clGetKernelWorkGroupInfo (kernel, + device, + CL_KERNEL_WORK_GROUP_SIZE, + sizeof(local), + &local, + &s); + if (err != CL_SUCCESS) + STARPU_OPENCL_REPORT_ERROR(err); + + if (local > global) + local = global; + else + global = (global + local-1) / local * local; + + err = clEnqueueNDRangeKernel( + queue, + kernel, + 1, /* work_dim */ + NULL, /* global_work_offset */ + &global, /* global_work_size */ + &local, /* local_work_size */ + 0, /* num_events_in_wait_list */ + NULL, /* event_wait_list */ + NULL); + + if (err != CL_SUCCESS) + STARPU_OPENCL_REPORT_ERROR(err); + } + starpu_opencl_release_kernel(kernel); +} diff --git a/examples/filters/custom_mf/conversion_opencl.cl b/examples/filters/custom_mf/conversion_opencl.cl new file mode 100644 index 0000000..ab7aae0 --- /dev/null +++ b/examples/filters/custom_mf/conversion_opencl.cl @@ -0,0 +1,32 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "custom_types.h" + +/* + * The first n/2 values of x are actual xs. The last N/2 values are ys. + */ +__kernel void custom_opencl_conversion(__global struct point *aop, + __global float *x, + int nx) +{ + const int i = get_global_id(0); + if (i < nx/2) + x[i] = aop[i].x; + else if (i < nx) + x[i] = aop[i-nx/2].y; + +} diff --git a/examples/filters/custom_mf/cuda.cu b/examples/filters/custom_mf/cuda.cu new file mode 100644 index 0000000..550bea7 --- /dev/null +++ b/examples/filters/custom_mf/cuda.cu @@ -0,0 +1,44 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "custom_types.h" +#include "custom_interface.h" + +static __global__ void scal_cuda(unsigned n, + float *x, + float *y) +{ + unsigned i = blockIdx.x*blockDim.x + threadIdx.x; + + if (i < n) + x[i] *= y[i]; +} + +extern "C" void custom_scal_cuda_func(void *buffers[], void *_args) +{ + (void) _args; + + unsigned int n = CUSTOM_GET_NX(buffers[0]); + float *x = (float*) CUSTOM_GET_X_PTR(buffers[0]); + float *y = (float*) CUSTOM_GET_Y_PTR(buffers[0]); + + unsigned threads_per_block = 64; + unsigned nblocks = (n + threads_per_block-1) / threads_per_block; + scal_cuda<<>>(n, x, y); + cudaError_t status = cudaGetLastError(); + if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status); +} diff --git a/examples/filters/custom_mf/custom_conversion_codelets.c b/examples/filters/custom_mf/custom_conversion_codelets.c new file mode 100644 index 0000000..727ee17 --- /dev/null +++ b/examples/filters/custom_mf/custom_conversion_codelets.c @@ -0,0 +1,95 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "custom_interface.h" +#include "custom_types.h" + +#ifdef STARPU_USE_CUDA +void cuda_to_cpu(void *buffers[], void *arg) +{ + (void)arg; + int n = CUSTOM_GET_NX(buffers[0]); + float *x = (float*) CUSTOM_GET_X_PTR(buffers[0]); + float *y = (float*) CUSTOM_GET_Y_PTR(buffers[0]); + struct point *aop; + aop = (struct point *) CUSTOM_GET_CPU_PTR(buffers[0]); + + int i; + for (i = 0; i < n; i++) + { + aop[i].x = x[i]; + aop[i].y = y[i]; + } + return; +} + +extern void cpu_to_cuda_cuda_func(void *buffers[], void *args); +struct starpu_codelet cpu_to_cuda_cl = +{ + .cuda_funcs = {cpu_to_cuda_cuda_func}, + .cuda_flags = {STARPU_CUDA_ASYNC}, + .modes = { STARPU_RW }, + .nbuffers = 1, + .name = "codelet_cpu_to_cuda" +}; + +struct starpu_codelet cuda_to_cpu_cl = +{ + .cpu_funcs = {cuda_to_cpu}, + .modes = { STARPU_RW }, + .nbuffers = 1, + .name = "codelet_cuda_to_cpu" +}; +#endif + + +#ifdef STARPU_USE_OPENCL +void opencl_to_cpu_cpu_func(void *buffers[], void *arg) +{ + (void)arg; + int n = CUSTOM_GET_NX(buffers[0]); + float *x = (float *) CUSTOM_GET_OPENCL_X_PTR(buffers[0]); + struct point *aop; + aop = (struct point *) CUSTOM_GET_CPU_PTR(buffers[0]); + + int i; + for (i = 0; i < n; i++) + { + aop[i].x = x[i]; + aop[i].y = x[i+n]; + } +} + +extern void cpu_to_opencl_opencl_func(void *buffers[], void *arg); + +struct starpu_codelet cpu_to_opencl_cl = +{ + .opencl_funcs = { cpu_to_opencl_opencl_func }, + .opencl_flags = {STARPU_OPENCL_ASYNC}, + .modes = { STARPU_RW }, + .nbuffers = 1, + .name = "codelet_cpu_to_opencl" +}; + +struct starpu_codelet opencl_to_cpu_cl = +{ + .cpu_funcs = { opencl_to_cpu_cpu_func }, + .modes = { STARPU_RW }, + .nbuffers = 1, + .name = "codelet_opencl_to_cpu" +}; +#endif /* !STARPU_USE_OPENCL */ diff --git a/examples/filters/custom_mf/custom_interface.c b/examples/filters/custom_mf/custom_interface.c new file mode 100644 index 0000000..83e2ecd --- /dev/null +++ b/examples/filters/custom_mf/custom_interface.c @@ -0,0 +1,495 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ +#include +#include "custom_interface.h" +#include "custom_types.h" + +#ifdef STARPU_USE_CUDA +static int copy_ram_to_cuda_async(void *src_interface, unsigned src_node, + void *dst_interface, unsigned dst_node, + cudaStream_t stream); +static int copy_cuda_to_ram_async(void *src_interface, unsigned src_node, + void *dst_interface, unsigned dst_node, + cudaStream_t stream); +static int copy_cuda_to_cuda(void *src_interface, unsigned src_node, + void *dst_interface, unsigned dst_node); +static int copy_cuda_to_cuda_async(void *src_interface, unsigned src_node, + void *dst_interface, unsigned dst_node, + cudaStream_t stream); +#endif /* !STARPU_USE_CUDA */ + +#ifdef STARPU_USE_OPENCL +static int copy_ram_to_opencl(void *src_interface, unsigned src_node, + void *dst_interface, unsigned dst_node); +static int copy_opencl_to_ram(void *src_interface, unsigned src_node, + void *dst_interface, unsigned dst_node); +static int copy_opencl_to_opencl(void *src_interface, unsigned src_node, + void *dst_interface, unsigned dst_node); +static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node, + void *dst_interface, unsigned dst_node, + cl_event *event); +static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node, + void *dst_interface, unsigned dst_node, + cl_event *event); +#endif /* !STARPU_USE_OPENCL */ + +static const struct starpu_data_copy_methods custom_copy_data_methods_s = +{ + .ram_to_ram = NULL, +#ifdef STARPU_USE_CUDA + .ram_to_cuda = NULL, + .cuda_to_ram = NULL, + .ram_to_cuda_async = copy_ram_to_cuda_async, + .cuda_to_ram_async = copy_cuda_to_ram_async, + .cuda_to_cuda = copy_cuda_to_cuda, + .cuda_to_cuda_async = copy_cuda_to_cuda_async, +#endif +#ifdef STARPU_USE_OPENCL + .ram_to_opencl = copy_ram_to_opencl, + .opencl_to_ram = copy_opencl_to_ram, + .opencl_to_opencl = copy_opencl_to_opencl, + .ram_to_opencl_async = copy_ram_to_opencl_async, + .opencl_to_ram_async = copy_opencl_to_ram_async, +#endif +}; + +static void register_custom_handle(starpu_data_handle_t handle, + int home_node, + void *data_interface); +static starpu_ssize_t allocate_custom_buffer_on_node(void *data_interface_, + unsigned dst_node); +static void* custom_to_pointer(void *data_interface, unsigned node); +static void free_custom_buffer_on_node(void *data_interface, unsigned node); +static size_t custom_interface_get_size(starpu_data_handle_t handle); +static uint32_t footprint_custom_interface_crc32(starpu_data_handle_t handle); +static void display_custom_interface(starpu_data_handle_t handle, FILE *f); +static uint32_t custom_get_nx(starpu_data_handle_t handle); + + +static struct starpu_multiformat_data_interface_ops*get_mf_ops(void *data_interface) +{ + struct custom_data_interface *custom; + custom = (struct custom_data_interface *) data_interface; + + return custom->ops; +} + +static struct starpu_data_interface_ops interface_custom_ops = +{ + .register_data_handle = register_custom_handle, + .allocate_data_on_node = allocate_custom_buffer_on_node, + .to_pointer = custom_to_pointer, + .free_data_on_node = free_custom_buffer_on_node, + .copy_methods = &custom_copy_data_methods_s, + .get_size = custom_interface_get_size, + .footprint = footprint_custom_interface_crc32, + .compare = NULL, + .interfaceid = STARPU_UNKNOWN_INTERFACE_ID, + .interface_size = sizeof(struct custom_data_interface), + .display = display_custom_interface, + .is_multiformat = 1, + .get_mf_ops = get_mf_ops +}; + +static void +register_custom_handle(starpu_data_handle_t handle, int home_node, void *data_interface) +{ + struct custom_data_interface *custom_interface; + custom_interface = (struct custom_data_interface *) data_interface; + + int node; + int nnodes = starpu_memory_nodes_get_count(); + for (node = 0; node < nnodes; node++) + { + struct custom_data_interface *local_interface = + (struct custom_data_interface *) starpu_data_get_interface_on_node(handle, node); + + if (node == home_node) + { + local_interface->cpu_ptr = custom_interface->cpu_ptr; +#ifdef STARPU_USE_CUDA + local_interface->cuda_ptr = custom_interface->cuda_ptr; +#endif +#ifdef STARPU_USE_OPENCL + local_interface->opencl_ptr = custom_interface->opencl_ptr; +#endif + } + else + { + local_interface->cpu_ptr = NULL; +#ifdef STARPU_USE_CUDA + local_interface->cuda_ptr = NULL; +#endif +#ifdef STARPU_USE_OPENCL + local_interface->opencl_ptr = NULL; +#endif + } + local_interface->nx = custom_interface->nx; + local_interface->ops = custom_interface->ops; + } +} + +static starpu_ssize_t allocate_custom_buffer_on_node(void *data_interface, unsigned node) +{ + starpu_ssize_t size = 0; + struct custom_data_interface *custom_interface; + custom_interface = (struct custom_data_interface *) data_interface; + + size = custom_interface->nx * custom_interface->ops->cpu_elemsize; + custom_interface->cpu_ptr = (void*) starpu_malloc_on_node(node, size); + if (!custom_interface->cpu_ptr) + goto fail_cpu; +#ifdef STARPU_USE_CUDA + custom_interface->cuda_ptr = (void*) starpu_malloc_on_node(node, size); + if (!custom_interface->cuda_ptr) + goto fail_cuda; +#endif +#ifdef STARPU_USE_OPENCL + custom_interface->opencl_ptr = (void*) starpu_malloc_on_node(node, size); + if (!custom_interface->opencl_ptr) + goto fail_opencl; +#endif + + return size +#ifdef STARPU_USE_CUDA + +size +#endif +#ifdef STARPU_USE_OPENCL + +size +#endif + ; +#ifdef STARPU_USE_OPENCL +fail_opencl: +#ifdef STARPU_USE_CUDA + starpu_free_on_node(node, (uintptr_t) custom_interface->cuda_ptr, size); +#endif +#endif +#ifdef STARPU_USE_CUDA +fail_cuda: +#endif + starpu_free_on_node(node, (uintptr_t) custom_interface->cpu_ptr, size); +fail_cpu: + return -ENOMEM; +} + +static void free_custom_buffer_on_node(void *data_interface, unsigned node) +{ + struct custom_data_interface *custom_interface = (struct custom_data_interface *) data_interface; + size_t size = custom_interface->nx * custom_interface->ops->cpu_elemsize; + + starpu_free_on_node(node, (uintptr_t) custom_interface->cpu_ptr, size); + custom_interface->cpu_ptr = NULL; +#ifdef STARPU_USE_CUDA + starpu_free_on_node(node, (uintptr_t) custom_interface->cuda_ptr, size); + custom_interface->cuda_ptr = NULL; +#endif +#ifdef STARPU_USE_OPENCL + starpu_free_on_node(node, (uintptr_t) custom_interface->opencl_ptr, size); + custom_interface->opencl_ptr = NULL; +#endif +} + +static void* +custom_to_pointer(void *data, unsigned node) +{ + struct custom_data_interface *data_interface = data; + + + switch(starpu_node_get_kind(node)) + { + case STARPU_CPU_RAM: + return data_interface->cpu_ptr; +#ifdef STARPU_USE_CUDA + case STARPU_CUDA_RAM: + return data_interface->cuda_ptr; +#endif +#ifdef STARPU_USE_OPENCL + case STARPU_OPENCL_RAM: + return data_interface->opencl_ptr; +#endif + default: + assert(0); + } +} + +static size_t custom_interface_get_size(starpu_data_handle_t handle) +{ + size_t size; + struct custom_data_interface *data_interface; + + data_interface = (struct custom_data_interface *) + starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + size = data_interface->nx * data_interface->ops->cpu_elemsize; + return size; +} + +static uint32_t footprint_custom_interface_crc32(starpu_data_handle_t handle) +{ + return starpu_hash_crc32c_be(custom_get_nx(handle), 0); +} + +static void display_custom_interface(starpu_data_handle_t handle, FILE *f) +{ + struct custom_data_interface *ci = (struct custom_data_interface *) + starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + fprintf(f, "Custom interface of size %u", ci->nx); +} + +static uint32_t +custom_get_nx(starpu_data_handle_t handle) +{ + struct custom_data_interface *data_interface; + data_interface = (struct custom_data_interface *) + starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + return data_interface->nx; +} + + +void custom_data_register(starpu_data_handle_t *handle, + int home_node, + void *ptr, + uint32_t nx, + struct starpu_multiformat_data_interface_ops *format_ops) +{ + struct custom_data_interface custom = + { + .cpu_ptr = ptr, +#ifdef STARPU_USE_CUDA + .cuda_ptr = NULL, +#endif +#ifdef STARPU_USE_OPENCL + .opencl_ptr = NULL, +#endif + .nx = nx, + .ops = format_ops + }; + + starpu_data_register(handle, home_node, &custom, &interface_custom_ops); +} + +#ifdef STARPU_USE_CUDA +static int +copy_cuda_common_async(void *src_interface, unsigned src_node, + void *dst_interface, unsigned dst_node, + cudaStream_t stream, enum cudaMemcpyKind kind) +{ + (void)src_node; + (void)dst_node; + struct custom_data_interface *src_custom, *dst_custom; + + src_custom = (struct custom_data_interface *) src_interface; + dst_custom = (struct custom_data_interface *) dst_interface; + + starpu_ssize_t size = 0; + cudaError_t err; + + switch (kind) + { + case cudaMemcpyHostToDevice: + { + size = src_custom->nx * src_custom->ops->cpu_elemsize; + if (dst_custom->cpu_ptr == NULL) + { + err = cudaMalloc(&dst_custom->cpu_ptr, size); + assert(err == cudaSuccess); + } + + err = cudaMemcpyAsync(dst_custom->cpu_ptr, + src_custom->cpu_ptr, + size, kind, stream); + assert(err == cudaSuccess); + + + err = cudaMalloc(&dst_custom->cuda_ptr, size); + assert(err == cudaSuccess); + break; + } + case cudaMemcpyDeviceToHost: + size = 2*src_custom->nx*sizeof(float); + if (dst_custom->cuda_ptr == NULL) + { + dst_custom->cuda_ptr = malloc(size); + if (dst_custom->cuda_ptr == NULL) + return -ENOMEM; + } + err = cudaMemcpyAsync(dst_custom->cuda_ptr, + src_custom->cuda_ptr, + size, kind, stream); + assert(err == cudaSuccess); + break; + default: + assert(0); + } + + return 0; +} + +static int copy_ram_to_cuda_async(void *src_interface, unsigned src_node, + void *dst_interface, unsigned dst_node, + cudaStream_t stream) +{ + return copy_cuda_common_async(src_interface, src_node, + dst_interface, dst_node, + stream, cudaMemcpyHostToDevice); +} +static int copy_cuda_to_ram_async(void *src_interface, unsigned src_node, + void *dst_interface, unsigned dst_node, + cudaStream_t stream) +{ + return copy_cuda_common_async(src_interface, src_node, + dst_interface, dst_node, + stream, cudaMemcpyDeviceToHost); +} +static int copy_cuda_to_cuda(void *src_interface, unsigned src_node, + void *dst_interface, unsigned dst_node) +{ + (void)src_interface; + (void)src_node; + (void)dst_interface; + (void)dst_node; + assert(0); +} +static int copy_cuda_to_cuda_async(void *src_interface, unsigned src_node, + void *dst_interface, unsigned dst_node, + cudaStream_t stream) +{ + (void)src_interface; + (void)src_node; + (void)dst_interface; + (void)dst_node; + (void)stream; + assert(0); +} +#endif /* !STARPU_USE_CUDA */ + +#ifdef STARPU_USE_OPENCL +static int copy_ram_to_opencl(void *src_interface, unsigned src_node, + void *dst_interface, unsigned dst_node) +{ + (void) src_interface; + (void) src_node; + (void) dst_interface; + (void) dst_node; + return 0; +} + +static int copy_opencl_to_ram(void *src_interface, unsigned src_node, + void *dst_interface, unsigned dst_node) +{ + (void) src_interface; + (void) src_node; + (void) dst_interface; + (void) dst_node; + return 0; +} + +static int copy_opencl_to_opencl(void *src_interface, unsigned src_node, + void *dst_interface, unsigned dst_node) +{ + (void) src_interface; + (void) src_node; + (void) dst_interface; + (void) dst_node; + return 0; +} + +static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node, + void *dst_interface, unsigned dst_node, + cl_event *event) +{ + (void)event; + starpu_ssize_t size; + struct custom_data_interface *src_custom, *dst_custom; + + src_custom = (struct custom_data_interface *) src_interface; + dst_custom = (struct custom_data_interface *) dst_interface; + + /* + * Opencl stuff. + */ + cl_context context; + cl_command_queue queue; + int id = starpu_worker_get_id_check(); + int devid = starpu_worker_get_devid(id); + starpu_opencl_get_queue(devid, &queue); + starpu_opencl_get_context(devid, &context); + + /* Real stuff */ + int err; + cl_int ret; + + size = src_custom->nx * 2 * sizeof(float); + if (dst_custom->cpu_ptr == NULL) + { + ret = starpu_opencl_allocate_memory(devid, (cl_mem*)&dst_custom->cpu_ptr, + size, CL_MEM_READ_WRITE); + assert(ret == CL_SUCCESS); + } + err = starpu_opencl_copy_ram_to_opencl(src_custom->cpu_ptr, + src_node, + dst_custom->cpu_ptr, + dst_node, + size, + 0, + NULL, + &ret); + assert(err == 0); + return 0; +} + +static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node, + void *dst_interface, unsigned dst_node, + cl_event *event) +{ + (void)event; + starpu_ssize_t size; + struct custom_data_interface *src_custom, *dst_custom; + + src_custom = (struct custom_data_interface *) src_interface; + dst_custom = (struct custom_data_interface *) dst_interface; + + /* + * Opencl stuff. + */ + cl_context context; + cl_command_queue queue; + int id = starpu_worker_get_id_check(); + int devid = starpu_worker_get_devid(id); + starpu_opencl_get_queue(devid, &queue); + starpu_opencl_get_context(devid, &context); + + /* real stuff */ + int err; + cl_int ret; + size = src_custom->nx * 2 * sizeof(float); + if (!dst_custom->opencl_ptr) + { + dst_custom->opencl_ptr = malloc(size); + assert(dst_custom->opencl_ptr != NULL); + } + + err = starpu_opencl_copy_opencl_to_ram(src_custom->opencl_ptr, + src_node, + dst_custom->opencl_ptr, + dst_node, + size, + 0, + NULL, + &ret); + assert(err == 0); + return 0; +} +#endif /* !STARPU_USE_OPENCL */ diff --git a/examples/filters/custom_mf/custom_interface.h b/examples/filters/custom_mf/custom_interface.h new file mode 100644 index 0000000..8d498c6 --- /dev/null +++ b/examples/filters/custom_mf/custom_interface.h @@ -0,0 +1,48 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ +#ifndef __CUSTOM_INTERFACE_H__ +#define __CUSTOM_INTERFACE_H__ +#include +struct custom_data_interface +{ + void *cpu_ptr; + void *cuda_ptr; + void *opencl_ptr; + struct starpu_multiformat_data_interface_ops *ops; + uint32_t nx; +}; + +void custom_data_register(starpu_data_handle_t *handle, + int home_node, + void *ptr, + uint32_t nx, + struct starpu_multiformat_data_interface_ops* ops); + +#define CUSTOM_GET_NX(interface) (((struct custom_data_interface*)(interface))->nx) +#define CUSTOM_GET_CPU_PTR(interface) (((struct custom_data_interface*)(interface))->cpu_ptr) + +#ifdef STARPU_USE_CUDA +#define CUSTOM_GET_X_PTR(interface) (((struct custom_data_interface*)(interface))->cuda_ptr) +#define CUSTOM_GET_Y_PTR(interface) \ + (((struct custom_data_interface*)(interface))->cuda_ptr)+ \ + CUSTOM_GET_NX((interface)) +#endif /* !STARPU_USE_CUDA */ + +#ifdef STARPU_USE_OPENCL +#define CUSTOM_GET_OPENCL_X_PTR(interface) (((struct custom_data_interface *)(interface))->opencl_ptr) +#endif + +#endif /* ! __CUSTOM_INTERFACE_H__ */ diff --git a/examples/filters/custom_mf/custom_mf_filter.c b/examples/filters/custom_mf/custom_mf_filter.c new file mode 100644 index 0000000..2db4d7e --- /dev/null +++ b/examples/filters/custom_mf/custom_mf_filter.c @@ -0,0 +1,326 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ +#include +#include "custom_interface.h" +#include "custom_types.h" + +#define N 12 + +#define DEBUG 1 + +#ifdef STARPU_USE_CUDA +static unsigned int _ncuda; +#endif +#ifdef STARPU_USE_OPENCL +static unsigned int _nopencl; +#endif + +static struct point _array_of_structs[N]; +static starpu_data_handle_t _handle; +static unsigned int _nchunks = 6; + +#ifdef STARPU_USE_CUDA +extern struct starpu_codelet cpu_to_cuda_cl; +extern struct starpu_codelet cuda_to_cpu_cl; +#endif + +#ifdef STARPU_USE_OPENCL +extern struct starpu_codelet cpu_to_opencl_cl; +extern struct starpu_codelet opencl_to_cpu_cl; +#endif + +static struct starpu_multiformat_data_interface_ops format_ops = +{ +#ifdef STARPU_USE_CUDA + .cuda_elemsize = sizeof(struct struct_of_arrays), + .cpu_to_cuda_cl = &cpu_to_cuda_cl, + .cuda_to_cpu_cl = &cuda_to_cpu_cl, +#endif +#ifdef STARPU_USE_OPENCL + .opencl_elemsize = sizeof(struct struct_of_arrays), + .cpu_to_opencl_cl = &cpu_to_opencl_cl, + .opencl_to_cpu_cl = &opencl_to_cpu_cl, +#endif + .cpu_elemsize = sizeof(struct point), +}; + +static void custom_filter(void *father, void *child, struct starpu_data_filter *f, + unsigned id, unsigned nchunks) +{ + (void)f; + struct custom_data_interface *custom_father, *custom_child; + custom_father = (struct custom_data_interface *) father; + custom_child = (struct custom_data_interface *) child; + + assert(N % nchunks == 0); // XXX + starpu_ssize_t chunk_size = N/nchunks; + + if (custom_father->cpu_ptr) + { + struct point *tmp = (struct point *) custom_father->cpu_ptr; + tmp += id * chunk_size; + custom_child->cpu_ptr = tmp; + } +#ifdef STARPU_USE_CUDA + else if (custom_father->cuda_ptr) + { + struct struct_of_arrays *soa_father, *soa_child; + soa_father = (struct struct_of_arrays*) custom_father->cuda_ptr; + soa_child = (struct struct_of_arrays*) custom_child->cuda_ptr; + soa_child->x = soa_father->x + chunk_size; + soa_child->y = soa_father->y + chunk_size; + } +#endif +#ifdef STARPU_USE_OPENCL + else if (custom_father->opencl_ptr) + { + struct struct_of_arrays *soa_father, *soa_child; + soa_father = (struct struct_of_arrays*) custom_father->opencl_ptr; + soa_child = (struct struct_of_arrays*) custom_child->opencl_ptr; + soa_child->x = soa_father->x + chunk_size; + soa_child->y = soa_father->y + chunk_size; + } +#endif /* !STARPU_USE_OPENCL */ + + custom_child->ops = custom_father->ops; + custom_child->nx = chunk_size; +} + +static void +register_and_partition_data(void) +{ + int i; + for (i = 0; i < N; i++) + { + _array_of_structs[i].x = i+1.0; + _array_of_structs[i].y = 42.0; + } + custom_data_register(&_handle, STARPU_MAIN_RAM, &_array_of_structs, N, &format_ops); + + struct starpu_data_filter f = + { + .filter_func = custom_filter, + .nchildren = _nchunks, + .get_nchildren = NULL, + .get_child_ops = NULL + }; + starpu_data_partition(_handle, &f); +} + +static void +unpartition_and_unregister_data(void) +{ + starpu_data_unpartition(_handle, STARPU_MAIN_RAM); + starpu_data_unregister(_handle); +} + +static void +custom_scal_cpu_func(void *buffers[], void *args) +{ + struct point *aos; + unsigned int n, i; + (void)args; + + aos = CUSTOM_GET_CPU_PTR(buffers[0]); + n = CUSTOM_GET_NX(buffers[0]); + + for (i = 0; i < n; i++) + aos[i].x *= aos[i].y; +} + +#ifdef STARPU_USE_CUDA +extern void custom_scal_cuda_func(void *buffers[], void *args); +#endif + +static struct starpu_codelet cpu_cl = +{ + .cpu_funcs = { custom_scal_cpu_func}, + .nbuffers = 1, + .modes = { STARPU_RW }, + .name = "codelet_real" +}; + +#ifdef STARPU_USE_CUDA +static struct starpu_codelet cuda_cl = +{ + .cuda_funcs = { custom_scal_cuda_func }, + .cuda_flags = {STARPU_CUDA_ASYNC}, + .nbuffers = 1, + .modes = { STARPU_RW }, + .name = "cuda_codelet" +}; +#endif /* !STARPU_USE_CUDA */ + +#ifdef STARPU_USE_OPENCL +extern void custom_scal_opencl_func(void *buffers[], void *args); + +static struct starpu_codelet opencl_cl = +{ + .opencl_funcs = { custom_scal_opencl_func }, + .opencl_flags = {STARPU_OPENCL_ASYNC}, + .nbuffers = 1, + .modes = { STARPU_RW }, + .name = "opencl_codelet" +}; +#endif /* !STARPU_USE_OPENCL */ + +static int +create_and_submit_tasks(void) +{ + int err; + unsigned int i; + for (i = 0; i < _nchunks; i++) + { + struct starpu_task *task = starpu_task_create(); + switch (i%3) + { + case 0: + task->cl = &cpu_cl; + break; + case 1: +#ifdef STARPU_USE_CUDA + if (_ncuda > 0) + task->cl = &cuda_cl; + else +#endif + task->cl = &cpu_cl; + break; + case 2: +#ifdef STARPU_USE_OPENCL + if (_nopencl > 0) + task->cl = &opencl_cl; + else +#endif + task->cl = &cpu_cl; + break; + default: + /* We should never get here */ + assert(0); + } + + task->handles[0] = starpu_data_get_sub_data(_handle, 1, i); + err = starpu_task_submit(task); + if (err != 0) + return err; + } + + + err = starpu_task_wait_for_all(); + if (err != 0) + return err; + + return 0; +} + +#if DEBUG +static void +print_it(void) +{ + int i; + for (i = 0; i < N; i++) + { + FPRINTF(stderr, "(%.2f, %.2f) ", + _array_of_structs[i].x, + _array_of_structs[i].y); + } + FPRINTF(stderr, "\n"); +} +#endif + +static int +check_it(void) +{ + int i; + for (i = 0; i < N; i++) + { + float expected_value = (i + 1.0)*42.0; + if (_array_of_structs[i].x != expected_value) + return EXIT_FAILURE; + } + + return EXIT_SUCCESS; +} + +#ifdef STARPU_USE_OPENCL +struct starpu_opencl_program _opencl_program; +struct starpu_opencl_program _opencl_conversion_program; +#endif /* !STARPU_USE_OPENCL */ + +int +main(void) +{ +#ifndef STARPU_USE_CPU + return 77; +#else + int err; + + err = starpu_init(NULL); + if (err == -ENODEV) + goto enodev; + +#ifdef STARPU_USE_CUDA + _ncuda = starpu_cuda_worker_get_count(); +#endif /* !STARPU_USE_CUDA */ +#ifdef STARPU_USE_OPENCL + _nopencl = starpu_opencl_worker_get_count(); + if (_nopencl > 0) + { + char *f1 = "examples/filters/custom_mf/custom_opencl.cl"; + char *f2 = "examples/filters/custom_mf/conversion_opencl.cl"; + err = starpu_opencl_load_opencl_from_file(f1, &_opencl_program, + NULL); + assert(err == 0); + err = starpu_opencl_load_opencl_from_file(f2, + &_opencl_conversion_program, + NULL); + assert(err == 0); + } +#endif /* !STARPU_USE_OPENCL */ + + register_and_partition_data(); +#if DEBUG + print_it(); +#endif + err = create_and_submit_tasks(); + if (err != 0) + { + FPRINTF(stderr, "create_submit_task : %s\n", + strerror(-err)); + return EXIT_FAILURE; + } + unpartition_and_unregister_data(); +#if DEBUG + print_it(); +#endif + +#ifdef STARPU_USE_OPENCL + if (_nopencl > 0) + { + err = starpu_opencl_unload_opencl(&_opencl_program); + assert(err == 0); + err = starpu_opencl_unload_opencl(&_opencl_conversion_program); + assert(err == 0); + } +#endif /* !STARPU_USE_OPENCL */ + starpu_shutdown(); + print_it(); + return check_it(); + + +enodev: + return 77; +#endif +} diff --git a/examples/filters/custom_mf/custom_opencl.c b/examples/filters/custom_mf/custom_opencl.c new file mode 100644 index 0000000..97a39a0 --- /dev/null +++ b/examples/filters/custom_mf/custom_opencl.c @@ -0,0 +1,95 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "custom_types.h" +#include "custom_interface.h" + +extern struct starpu_opencl_program _opencl_program; + +void custom_scal_opencl_func(void *buffers[], void *args) +{ + (void) args; + int id, devid; + cl_int err; + cl_kernel kernel; + cl_command_queue queue; + + unsigned n = CUSTOM_GET_NX(buffers[0]); + struct point *aop; + aop = (struct point *) CUSTOM_GET_CPU_PTR(buffers[0]); + + id = starpu_worker_get_id_check(); + devid = starpu_worker_get_devid(id); + + err = starpu_opencl_load_kernel(&kernel, + &queue, + &_opencl_program, + "custom_scal_opencl", + devid); + if (err != CL_SUCCESS) + STARPU_OPENCL_REPORT_ERROR(err); + + + void *x = CUSTOM_GET_OPENCL_X_PTR(buffers[0]); + if (starpu_opencl_set_kernel_args(&err, &kernel, + sizeof(aop), &aop, + sizeof(x), &x, + sizeof(n), &n, + 0) != 3) + { + STARPU_OPENCL_REPORT_ERROR(err); + assert(0); + } + + { + size_t global=n; + size_t local; + size_t s; + cl_device_id device; + + starpu_opencl_get_device(devid, &device); + + err = clGetKernelWorkGroupInfo (kernel, + device, + CL_KERNEL_WORK_GROUP_SIZE, + sizeof(local), + &local, + &s); + if (err != CL_SUCCESS) + STARPU_OPENCL_REPORT_ERROR(err); + + if (local > global) + local = global; + else + global = (global + local-1) / local * local; + + err = clEnqueueNDRangeKernel( + queue, + kernel, + 1, /* work_dim */ + NULL, /* global_work_offset */ + &global, /* global_work_size */ + &local, /* local_work_size */ + 0, /* num_events_in_wait_list */ + NULL, /* event_wait_list */ + NULL); + + if (err != CL_SUCCESS) + STARPU_OPENCL_REPORT_ERROR(err); + } + starpu_opencl_release_kernel(kernel); +} diff --git a/examples/filters/custom_mf/custom_opencl.cl b/examples/filters/custom_mf/custom_opencl.cl new file mode 100644 index 0000000..34cc29b --- /dev/null +++ b/examples/filters/custom_mf/custom_opencl.cl @@ -0,0 +1,26 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "custom_types.h" + +__kernel void custom_scal_opencl(__global struct point *aop, + __global float *x, + int nx) +{ + const int i = get_global_id(0); + if (i < nx) + x[i] *= x[i+nx]; +} diff --git a/examples/filters/custom_mf/custom_types.h b/examples/filters/custom_mf/custom_types.h new file mode 100644 index 0000000..b8f72c3 --- /dev/null +++ b/examples/filters/custom_mf/custom_types.h @@ -0,0 +1,32 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ +#ifndef __CUSTOM_TYPES_H__ +#define __CUSTOM_TYPES_H__ + + +struct struct_of_arrays +{ + float *x, *y; +}; + +struct point +{ + float x, y; +}; + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +#endif diff --git a/examples/filters/f3d_cpu.c b/examples/filters/f3d_cpu.c new file mode 100644 index 0000000..a8ae913 --- /dev/null +++ b/examples/filters/f3d_cpu.c @@ -0,0 +1,41 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +void f3d_cpu_func(void *buffers[], void *cl_arg) +{ + int i, j, k; + int *factor = (int *) cl_arg; + int *arr3d = (int *)STARPU_NDIM_GET_PTR(buffers[0]); + int *nn = (int *)STARPU_NDIM_GET_NN(buffers[0]); + unsigned *ldn = STARPU_NDIM_GET_LDN(buffers[0]); + int nx = nn[0]; + int ny = nn[1]; + int nz = nn[2]; + unsigned ldy = ldn[1]; + unsigned ldz = ldn[2]; + + for(k=0; k + +static __global__ void f3d_cuda(int *arr3d, int nx, int ny, int nz, unsigned ldy, unsigned ldz, float factor) +{ + int i, j, k; + + for(k=0; k>>(arr3d, nx, ny, nz, ldy, ldz, *factor); + cudaError_t status = cudaGetLastError(); + if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status); +} diff --git a/examples/filters/f3d_hip.hip b/examples/filters/f3d_hip.hip new file mode 100644 index 0000000..72c3bcf --- /dev/null +++ b/examples/filters/f3d_hip.hip @@ -0,0 +1,50 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* dumb HIP kernel to fill a 3D matrix */ + +#include + +static __global__ void f3d_hip(int *arr3d, int nx, int ny, int nz, unsigned ldy, unsigned ldz, float factor) +{ + int i, j, k; + + for(k=0; k + +void f4d_cpu_func(void *buffers[], void *cl_arg) +{ + int i, j, k, l; + int *factor = (int *) cl_arg; + int *arr4d = (int *)STARPU_NDIM_GET_PTR(buffers[0]); + int *nn = (int *)STARPU_NDIM_GET_NN(buffers[0]); + unsigned *ldn = STARPU_NDIM_GET_LDN(buffers[0]); + int nx = nn[0]; + int ny = nn[1]; + int nz = nn[2]; + int nt = nn[3]; + unsigned ldy = ldn[1]; + unsigned ldz = ldn[2]; + unsigned ldt = ldn[3]; + + for(l=0; l + +static __global__ void f4d_cuda(int *arr4d, int nx, int ny, int nz, int nt, unsigned ldy, unsigned ldz, unsigned ldt, float factor) +{ + int i, j, k, l; + + for(l=0; l>>(arr4d, nx, ny, nz, nt, ldy, ldz, ldt, *factor); + cudaError_t status = cudaGetLastError(); + if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status); +} diff --git a/examples/filters/f4d_hip.hip b/examples/filters/f4d_hip.hip new file mode 100644 index 0000000..7a7aec9 --- /dev/null +++ b/examples/filters/f4d_hip.hip @@ -0,0 +1,55 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* dumb HIP kernel to fill a 4D matrix */ + +#include + +static __global__ void f4d_hip(int *arr4d, int nx, int ny, int nz, int nt, unsigned ldy, unsigned ldz, unsigned ldt, float factor) +{ + int i, j, k, l; + + for(l=0; l + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +void print_5darr(int *arr5d, int nx, int ny, int nz, int nt, int ng, unsigned ldy, unsigned ldz, unsigned ldt, unsigned ldg) +{ + int i, j, k, l, m; + FPRINTF(stderr, "5dim array=%p nx=%d ny=%d nz=%d nt=%d ng=%d ldy=%u ldz=%u ldt=%u ldg=%u\n", arr5d, nx, ny, nz, nt, ng, ldy, ldz, ldt, ldg); + for(m=0 ; m + +#define NX 5 +#define NY 4 +#define NZ 3 +#define PARTS 2 + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +extern void block_cpu_func(void *buffers[], void *cl_arg); + +#ifdef STARPU_USE_CUDA +extern void block_cuda_func(void *buffers[], void *cl_arg); +#endif + +#ifdef STARPU_USE_HIP +extern void block_hip_func(void *buffers[], void *cl_arg); +#endif + +#ifdef STARPU_USE_OPENCL +extern void opencl_func(void *buffers[], void *cl_arg); +#endif + +#ifdef STARPU_USE_OPENCL +struct starpu_opencl_program opencl_program; +#endif + +extern void generate_block_data(int *block, int nx, int ny, int nz, unsigned ldy, unsigned ldz); +extern void print_block(int *block, int nx, int ny, int nz, unsigned ldy, unsigned ldz); +extern void print_block_data(starpu_data_handle_t block_handle); + +int main(void) +{ + int *block; + int i, j, k; + int ret; + + starpu_data_handle_t handle; + struct starpu_codelet cl = + { + .cpu_funcs = {block_cpu_func}, + .cpu_funcs_name = {"block_cpu_func"}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {block_cuda_func}, + .cuda_flags = {STARPU_CUDA_ASYNC}, +#endif +#ifdef STARPU_USE_HIP + .hip_funcs = {block_hip_func}, + .hip_flags = {STARPU_HIP_ASYNC}, +#endif +#ifdef STARPU_USE_OPENCL + .opencl_funcs = {opencl_func}, + .opencl_flags = {STARPU_OPENCL_ASYNC}, +#endif + .nbuffers = 1, + .modes = {STARPU_RW}, + .name = "block_scal" + }; + + ret = starpu_init(NULL); + if (ret == -ENODEV) + exit(77); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + starpu_malloc((void **)&block, NX*NY*NZ*sizeof(int)); + assert(block); + generate_block_data(block, NX, NY, NZ, NX, NX*NY); + +#ifdef STARPU_USE_OPENCL + ret = starpu_opencl_load_opencl_from_file("examples/filters/fblock_opencl_kernel.cl", &opencl_program, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); +#endif + + /* Declare data to StarPU */ + starpu_block_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)block, NX, NX*NY, NX, NY, NZ, sizeof(int)); + FPRINTF(stderr, "IN Block\n"); + print_block_data(handle); + + /* Partition the block in PARTS sub-blocks */ + struct starpu_data_filter f = + { + .filter_func = starpu_block_filter_block, + .nchildren = PARTS + }; + starpu_data_partition(handle, &f); + + FPRINTF(stderr,"Nb of partitions : %d\n",starpu_data_get_nb_children(handle)); + + for(i=0 ; icl = &cl; + task->synchronous = 1; + task->callback_func = NULL; + task->handles[0] = starpu_data_get_sub_data(handle, 1, i); + task->cl_arg = &multiplier; + task->cl_arg_size = sizeof(multiplier); + + ret = starpu_task_submit(task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + /* Unpartition the data, unregister it from StarPU and shutdown */ + starpu_data_unpartition(handle, STARPU_MAIN_RAM); + print_block_data(handle); + starpu_data_unregister(handle); + +#ifdef STARPU_USE_OPENCL + ret = starpu_opencl_unload_opencl(&opencl_program); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl"); +#endif + + /* Print result block */ + FPRINTF(stderr, "OUT Block\n"); + print_block(block, NX, NY, NZ, NX, NX*NY); + + starpu_free_noflag(block, NX*NY*NZ*sizeof(int)); + + starpu_shutdown(); + return 0; + +enodev: + FPRINTF(stderr, "WARNING: No one can execute this task\n"); + starpu_shutdown(); + return 77; +} diff --git a/examples/filters/fblock_cpu.c b/examples/filters/fblock_cpu.c new file mode 100644 index 0000000..b7ab56e --- /dev/null +++ b/examples/filters/fblock_cpu.c @@ -0,0 +1,41 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* dumb kernel to fill a 3D matrix */ + +#include + +void block_cpu_func(void *buffers[], void *cl_arg) +{ + int i, j, k; + int *factor = (int *) cl_arg; + int *block = (int *)STARPU_BLOCK_GET_PTR(buffers[0]); + int nx = (int)STARPU_BLOCK_GET_NX(buffers[0]); + int ny = (int)STARPU_BLOCK_GET_NY(buffers[0]); + int nz = (int)STARPU_BLOCK_GET_NZ(buffers[0]); + unsigned ldy = STARPU_BLOCK_GET_LDY(buffers[0]); + unsigned ldz = STARPU_BLOCK_GET_LDZ(buffers[0]); + + for(k=0; k + +static __global__ void fblock_cuda(int *block, int nx, int ny, int nz, unsigned ldy, unsigned ldz, float factor) +{ + int i, j, k; + for(k=0; k>>(block, nx, ny, nz, ldy, ldz, *factor); + cudaError_t status = cudaGetLastError(); + if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status); +} diff --git a/examples/filters/fblock_hip.hip b/examples/filters/fblock_hip.hip new file mode 100644 index 0000000..119b527 --- /dev/null +++ b/examples/filters/fblock_hip.hip @@ -0,0 +1,48 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* dumb HIP kernel to fill a 3D matrix */ + +#include + +static __global__ void fblock_hip(int *block, int nx, int ny, int nz, unsigned ldy, unsigned ldz, float factor) +{ + int i, j, k; + for(k=0; k + +#define CHECK_CL_SET_KERNEL_ARG(kernel, n, size, ptr) \ +do \ +{ \ + int check_err; \ + check_err = clSetKernelArg(kernel, n, size, ptr); \ + if (check_err != CL_SUCCESS) \ + STARPU_OPENCL_REPORT_ERROR(check_err); \ +} while (0) + +extern struct starpu_opencl_program opencl_program; + +void opencl_func(void *buffers[], void *cl_arg) +{ + int id, devid, err; + cl_kernel kernel; + cl_command_queue queue; + + int *factor = cl_arg; + cl_mem block = (cl_mem)STARPU_BLOCK_GET_DEV_HANDLE(buffers[0]); + unsigned offset = STARPU_BLOCK_GET_OFFSET(buffers[0]); + int nx = (int)STARPU_BLOCK_GET_NX(buffers[0]); + int ny = (int)STARPU_BLOCK_GET_NY(buffers[0]); + int nz = (int)STARPU_BLOCK_GET_NZ(buffers[0]); + unsigned ldy = STARPU_BLOCK_GET_LDY(buffers[0]); + unsigned ldz = STARPU_BLOCK_GET_LDZ(buffers[0]); + + id = starpu_worker_get_id_check(); + devid = starpu_worker_get_devid(id); + + err = starpu_opencl_load_kernel(&kernel, &queue, &opencl_program, "fblock_opencl", devid); + if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); + + CHECK_CL_SET_KERNEL_ARG(kernel, 0, sizeof(block), &block); + CHECK_CL_SET_KERNEL_ARG(kernel, 1, sizeof(offset), &offset); + CHECK_CL_SET_KERNEL_ARG(kernel, 2, sizeof(nx), &nx); + CHECK_CL_SET_KERNEL_ARG(kernel, 3, sizeof(ny), &ny); + CHECK_CL_SET_KERNEL_ARG(kernel, 4, sizeof(nz), &nz); + CHECK_CL_SET_KERNEL_ARG(kernel, 5, sizeof(ldy), &ldy); + CHECK_CL_SET_KERNEL_ARG(kernel, 6, sizeof(ldz), &ldz); + CHECK_CL_SET_KERNEL_ARG(kernel, 7, sizeof(*factor), factor); + + { + size_t global[3]={nx,ny,nz}; + err = clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global, NULL, 0, NULL, NULL); + if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); + } + starpu_opencl_release_kernel(kernel); +} + diff --git a/examples/filters/fblock_opencl_kernel.cl b/examples/filters/fblock_opencl_kernel.cl new file mode 100644 index 0000000..6ccba9f --- /dev/null +++ b/examples/filters/fblock_opencl_kernel.cl @@ -0,0 +1,34 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* dumb OpenCL kernel to fill a 3D matrix */ + +__kernel void fblock_opencl(__global int* block, unsigned offset, int nx, int ny, int nz, unsigned ldy, unsigned ldz, int factor) +{ + const int idx = get_global_id(0); + const int idy = get_global_id(1); + const int idz = get_global_id(2); + if (idx >= nx) + return; + if (idy >= ny) + return; + if (idz >= nz) + return; + + block = (__global int*) ((__global char *)block + offset); + int i = idz*ldz + idy*ldy + idx; + block[i] = factor; +} diff --git a/examples/filters/fblock_pick_matrix.c b/examples/filters/fblock_pick_matrix.c new file mode 100644 index 0000000..b822403 --- /dev/null +++ b/examples/filters/fblock_pick_matrix.c @@ -0,0 +1,133 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +#define NX 5 +#define NY 4 +#define NZ 3 +#define PARTS 2 +#define POS 1 + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +extern void matrix_cpu_func(void *buffers[], void *cl_arg); + +#ifdef STARPU_USE_CUDA +extern void matrix_cuda_func(void *buffers[], void *cl_arg); +#endif + +#ifdef STARPU_USE_HIP +extern void matrix_hip_func(void *buffers[], void *cl_arg); +#endif + +extern void generate_block_data(int *block, int nx, int ny, int nz, unsigned ldy, unsigned ldz); +extern void print_block_data(starpu_data_handle_t block_handle); +extern void print_matrix_data(starpu_data_handle_t matrix_handle); + +int main(void) +{ + int *block; + int i, j, k; + int ret; + int factor = 2; + + starpu_data_handle_t handle; + struct starpu_codelet cl = + { + .cpu_funcs = {matrix_cpu_func}, + .cpu_funcs_name = {"matrix_cpu_func"}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {matrix_cuda_func}, + .cuda_flags = {STARPU_CUDA_ASYNC}, +#endif +#ifdef STARPU_USE_HIP + .hip_funcs = {matrix_hip_func}, + .hip_flags = {STARPU_HIP_ASYNC}, +#endif + .nbuffers = 1, + .modes = {STARPU_RW}, + .name = "block_pick_matrix_scal" + }; + + ret = starpu_init(NULL); + if (ret == -ENODEV) + exit(77); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + starpu_malloc((void **)&block, NX*NY*NZ*sizeof(int)); + assert(block); + generate_block_data(block, NX, NY, NZ, NX, NX*NY); + + /* Declare data to StarPU */ + starpu_block_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)block, NX, NX*NY, NX, NY, NZ, sizeof(int)); + FPRINTF(stderr, "IN Block: \n"); + print_block_data(handle); + + /* Partition the block in PARTS sub-matrices */ + struct starpu_data_filter f = + { + .filter_func = starpu_block_filter_pick_matrix_y, + .filter_arg_ptr = (void*)(uintptr_t) POS, + .nchildren = PARTS, + /* the children use a matrix interface*/ + .get_child_ops = starpu_block_filter_pick_matrix_child_ops + }; + starpu_data_partition(handle, &f); + + FPRINTF(stderr,"Nb of partitions : %d\n",starpu_data_get_nb_children(handle)); + + for(i=0 ; icl = &cl; + task->synchronous = 1; + task->callback_func = NULL; + task->handles[0] = matrix_handle; + task->cl_arg = &factor; + task->cl_arg_size = sizeof(factor); + + ret = starpu_task_submit(task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + /* Print result matrix */ + FPRINTF(stderr, "OUT Matrix %d: \n", i); + print_matrix_data(matrix_handle); + } + + /* Unpartition the data, unregister it from StarPU and shutdown */ + starpu_data_unpartition(handle, STARPU_MAIN_RAM); + FPRINTF(stderr, "OUT Block: \n"); + print_block_data(handle); + starpu_data_unregister(handle); + + starpu_free_noflag(block, NX*NY*NZ*sizeof(int)); + + starpu_shutdown(); + return 0; + +enodev: + starpu_shutdown(); + return 77; +} diff --git a/examples/filters/fblock_pick_variable.c b/examples/filters/fblock_pick_variable.c new file mode 100644 index 0000000..933b98c --- /dev/null +++ b/examples/filters/fblock_pick_variable.c @@ -0,0 +1,136 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2021-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +#define NX 5 +#define NY 4 +#define NZ 3 + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +void cpu_func(void *buffers[], void *cl_arg) +{ + int *factor = (int *) cl_arg; + + /* local copy of the variable pointer */ + int *val = (int *)STARPU_VARIABLE_GET_PTR(buffers[0]); + + *val *= *factor; +} + +#ifdef STARPU_USE_CUDA +extern void variable_cuda_func(void *buffers[], void *cl_arg); +#endif + +extern void generate_block_data(int *block, int nx, int ny, int nz, unsigned ldy, unsigned ldz); +extern void print_block_data(starpu_data_handle_t block_handle); + +int main(void) +{ + int *block; + int i, j, k; + int ret; + int factor = 2; + uint32_t pos[3] = {1,2,1}; + + starpu_data_handle_t handle; + starpu_data_handle_t var_handle; + + struct starpu_codelet cl = + { + .cpu_funcs = {cpu_func}, + .cpu_funcs_name = {"cpu_func"}, + #ifdef STARPU_USE_CUDA + .cuda_funcs = {variable_cuda_func}, + .cuda_flags = {STARPU_CUDA_ASYNC}, + #endif + .nbuffers = 1, + .modes = {STARPU_RW}, + .name = "block_pick_variable_scal" + }; + + ret = starpu_init(NULL); + if (ret == -ENODEV) + exit(77); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + starpu_malloc((void **)&block, NX*NY*NZ*sizeof(int)); + assert(block); + generate_block_data(block, NX, NY, NZ, NX, NX*NY); + + /* Declare data to StarPU */ + starpu_block_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)block, NX, NX*NY, NX, NY, NZ, sizeof(int)); + FPRINTF(stderr, "IN Block: \n"); + print_block_data(handle); + + /* Pick a variable in the block */ + struct starpu_data_filter f_var = + { + .filter_func = starpu_block_filter_pick_variable, + .filter_arg_ptr = (void*)pos, + .nchildren = 1, + /* the children use a variable interface*/ + .get_child_ops = starpu_block_filter_pick_variable_child_ops + }; + + starpu_data_partition_plan(handle, &f_var, &var_handle); + + FPRINTF(stderr, "Sub Variable:\n"); + int *variable = (int *)starpu_variable_get_local_ptr(var_handle); + starpu_data_acquire(var_handle, STARPU_R); + FPRINTF(stderr, "%5d ", *variable); + starpu_data_release(var_handle); + FPRINTF(stderr,"\n"); + + /* Submit the task */ + struct starpu_task *task = starpu_task_create(); + + FPRINTF(stderr,"Dealing with sub-variable\n"); + task->handles[0] = var_handle; + task->cl = &cl; + task->synchronous = 1; + task->cl_arg = &factor; + task->cl_arg_size = sizeof(factor); + + ret = starpu_task_submit(task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + /* Print result variable */ + FPRINTF(stderr,"OUT Variable:\n"); + starpu_data_acquire(var_handle, STARPU_R); + FPRINTF(stderr, "%5d ", *variable); + starpu_data_release(var_handle); + FPRINTF(stderr,"\n"); + + starpu_data_partition_clean(handle, 1, &var_handle); + + /* Unpartition the data, unregister it from StarPU and shutdown */ + //starpu_data_unpartition(handle, STARPU_MAIN_RAM); + FPRINTF(stderr, "OUT Block: \n"); + print_block_data(handle); + starpu_data_unregister(handle); + + starpu_free_noflag(block, NX*NY*NZ*sizeof(int)); + + starpu_shutdown(); + return 0; + +enodev: + starpu_shutdown(); + return 77; +} diff --git a/examples/filters/fblock_print.c b/examples/filters/fblock_print.c new file mode 100644 index 0000000..c264092 --- /dev/null +++ b/examples/filters/fblock_print.c @@ -0,0 +1,78 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +void print_block(int *block, int nx, int ny, int nz, unsigned ldy, unsigned ldz) +{ + int i, j, k; + FPRINTF(stderr, "block=%p nx=%d ny=%d nz=%d ldy=%u ldz=%u\n", block, nx, ny, nz, ldy, ldz); + for(k=0 ; k + +#define NX 5 +#define NY 4 +#define PARTS 2 + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +extern void matrix_cpu_func(void *buffers[], void *cl_arg); + +#ifdef STARPU_USE_CUDA +extern void matrix_cuda_func(void *buffers[], void *cl_arg); +#endif + +#ifdef STARPU_USE_HIP +extern void matrix_hip_func(void *buffers[], void *cl_arg); +#endif + +extern void generate_matrix_data(int *matrix, int nx, int ny, unsigned ld); +extern void print_matrix_data(starpu_data_handle_t matrix_handle); + +int main(void) +{ + unsigned j; + int *matrix; + int ret, i; + int factor = 12; + + starpu_data_handle_t handle; + struct starpu_codelet cl = + { + .cpu_funcs = {matrix_cpu_func}, + .cpu_funcs_name = {"matrix_cpu_func"}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {matrix_cuda_func}, + .cuda_flags = {STARPU_CUDA_ASYNC}, +#endif +#ifdef STARPU_USE_HIP + .hip_funcs = {matrix_hip_func}, + .hip_flags = {STARPU_HIP_ASYNC}, +#endif + .nbuffers = 1, + .modes = {STARPU_RW}, + .name = "matrix_scal" + }; + + ret = starpu_init(NULL); + if (ret == -ENODEV) + exit(77); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + starpu_malloc((void **)&matrix, NX*NY*sizeof(int)); + generate_matrix_data(matrix, NX, NY, NX); + + /* Declare data to StarPU */ + starpu_matrix_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)matrix, NX, NX, NY, sizeof(matrix[0])); + FPRINTF(stderr,"IN Matrix: \n"); + print_matrix_data(handle); + + /* Partition the matrix in PARTS sub-matrices */ + struct starpu_data_filter f = + { + .filter_func = starpu_matrix_filter_block, + .nchildren = PARTS + }; + starpu_data_partition(handle, &f); + + /* Submit a task on each sub-vector */ + for (i=0; ihandles[0] = starpu_data_get_sub_data(handle, 1, i); + task->cl = &cl; + task->synchronous = 1; + task->cl_arg = &factor; + task->cl_arg_size = sizeof(factor); + + ret = starpu_task_submit(task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + /* Unpartition the data, unregister it from StarPU and shutdown */ + starpu_data_unpartition(handle, STARPU_MAIN_RAM); + FPRINTF(stderr,"OUT Matrix: \n"); + print_matrix_data(handle); + starpu_data_unregister(handle); + + starpu_free_noflag(matrix, NX*NY*sizeof(int)); + starpu_shutdown(); + + return ret; + +enodev: + FPRINTF(stderr, "WARNING: No one can execute this task\n"); + starpu_shutdown(); + return 77; +} diff --git a/examples/filters/fmatrix_cpu.c b/examples/filters/fmatrix_cpu.c new file mode 100644 index 0000000..102c82f --- /dev/null +++ b/examples/filters/fmatrix_cpu.c @@ -0,0 +1,39 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* dumb kernel to fill a 2D matrix */ + +#include + +void matrix_cpu_func(void *buffers[], void *cl_arg) +{ + int i, j; + int *factor = (int *) cl_arg; + + /* length of the matrix */ + int nx = (int)STARPU_MATRIX_GET_NX(buffers[0]); + int ny = (int)STARPU_MATRIX_GET_NY(buffers[0]); + unsigned ld = STARPU_MATRIX_GET_LD(buffers[0]); + /* local copy of the matrix pointer */ + int *matrix = (int *)STARPU_MATRIX_GET_PTR(buffers[0]); + + for(j=0; j + +static __global__ void fmatrix_cuda(int *matrix, int nx, int ny, unsigned ld, float factor) +{ + int i, j; + for(j=0; j>>(matrix, nx, ny, ld, *factor); + cudaError_t status = cudaGetLastError(); + if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status); +} diff --git a/examples/filters/fmatrix_hip.hip b/examples/filters/fmatrix_hip.hip new file mode 100644 index 0000000..f22a165 --- /dev/null +++ b/examples/filters/fmatrix_hip.hip @@ -0,0 +1,42 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* dumb HIP kernel to fill a 2D matrix */ + +#include + +static __global__ void fmatrix_hip(int *matrix, int nx, int ny, unsigned ld, float factor) +{ + int i, j; + for(j=0; j +#include + +#define NX 10 +#define NY 21 +#define PARTSX 2 +#define PARTSY 3 + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +void variable_cpu_func(void *buffers[], void *cl_arg) +{ + int *factor = (int *) cl_arg; + + /* local copy of the variable pointer */ + int *val = (int *)STARPU_VARIABLE_GET_PTR(buffers[0]); + + *val *= *factor; +} + +#ifdef STARPU_USE_CUDA +extern void variable_cuda_func(void *buffers[], void *cl_arg); +#endif + +extern void generate_matrix_data(int *matrix, int nx, int ny, unsigned ld); +extern void print_matrix_data(starpu_data_handle_t matrix_handle); + +int main(void) +{ + int *matrix; + int ret, i, j; + int factor = 12; + + uint32_t pos[2]; + + starpu_data_handle_t handle; + + struct starpu_codelet cl_r = + { + .cpu_funcs = {variable_cpu_func}, + .cpu_funcs_name = {"variable_cpu_func"}, + #ifdef STARPU_USE_CUDA + .cuda_funcs = {variable_cuda_func}, + .cuda_flags = {STARPU_CUDA_ASYNC}, + #endif + .nbuffers = 1, + .modes = {STARPU_R}, + .name = "matrix_pick_variable_scal_r" + }; + + struct starpu_codelet cl_rw = + { + .cpu_funcs = {variable_cpu_func}, + .cpu_funcs_name = {"variable_cpu_func"}, + #ifdef STARPU_USE_CUDA + .cuda_funcs = {variable_cuda_func}, + .cuda_flags = {STARPU_CUDA_ASYNC}, + #endif + .nbuffers = 1, + .modes = {STARPU_RW}, + .name = "matrix_pick_variable_scal_rw" + }; + + ret = starpu_init(NULL); + if (ret == -ENODEV) + return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + starpu_malloc((void **)&matrix, NX*NY*sizeof(int)); + generate_matrix_data(matrix, NX, NY, NX); + + /* Declare data to StarPU */ + starpu_matrix_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)matrix, NX, NX, NY, sizeof(matrix[0])); + FPRINTF(stderr, "IN Matrix: \n"); + print_matrix_data(handle); + + /* Partition the matrix in PARTS sub-matrices */ + struct starpu_data_filter f_matrix_vert = + { + .filter_func = starpu_matrix_filter_block, + .nchildren = PARTSX + }; + + struct starpu_data_filter f_matrix_horiz = + { + .filter_func = starpu_matrix_filter_vertical_block, + .nchildren = PARTSY + }; + + starpu_data_map_filters(handle, 2, &f_matrix_vert, &f_matrix_horiz); + + starpu_data_handle_t sub_matrix_handle; + + int nn; + for(nn=0; nn<=10; nn++) + { + int indxi = starpu_drand48()*(PARTSX); + int indxj = starpu_drand48()*(PARTSY); + sub_matrix_handle = starpu_data_get_sub_data(handle, 2, indxi, indxj); + FPRINTF(stderr, "sub Matrix: \n"); + print_matrix_data(sub_matrix_handle); + + starpu_data_handle_t var_handle; + + pos[0] = starpu_drand48()*(NX/PARTSX); + pos[1] = starpu_drand48()*(NY/PARTSY); + + /* Pick a variable in the matrix */ + struct starpu_data_filter f_var = + { + .filter_func = starpu_matrix_filter_pick_variable, + .filter_arg_ptr = (void*)pos, + .nchildren = 1, + /* the children use a variable interface*/ + .get_child_ops = starpu_matrix_filter_pick_variable_child_ops + }; + starpu_data_partition_plan(sub_matrix_handle, &f_var, &var_handle); + + FPRINTF(stderr, "Sub Variable:\n"); + int *variable = (int *)starpu_variable_get_local_ptr(var_handle); + starpu_data_acquire(var_handle, STARPU_R); + FPRINTF(stderr, "%5d ", *variable); + starpu_data_release(var_handle); + FPRINTF(stderr,"\n"); + + /* Submit the task */ + struct starpu_task *task = starpu_task_create(); + + FPRINTF(stderr,"Dealing with sub-variable\n"); + task->handles[0] = var_handle; + + if(starpu_drand48()>=0.2) + task->cl = &cl_r; + else + task->cl = &cl_rw; + + task->synchronous = 1; + task->cl_arg = &factor; + task->cl_arg_size = sizeof(factor); + + ret = starpu_task_submit(task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + /* Print result variable */ + FPRINTF(stderr,"OUT Variable:\n"); + starpu_data_acquire(var_handle, STARPU_R); + FPRINTF(stderr, "%5d ", *variable); + starpu_data_release(var_handle); + FPRINTF(stderr,"\n"); + + starpu_data_partition_clean(sub_matrix_handle, 1, &var_handle); + } + + /* Unpartition the data, unregister it from StarPU and shutdown */ + starpu_data_unpartition(handle, STARPU_MAIN_RAM); + FPRINTF(stderr,"OUT Matrix: \n"); + print_matrix_data(handle); + starpu_data_unregister(handle); + + starpu_free_noflag(matrix, NX*NY*sizeof(int)); + starpu_shutdown(); + + return ret; + +enodev: + starpu_shutdown(); + return 77; +} diff --git a/examples/filters/fmatrix_pick_vector.c b/examples/filters/fmatrix_pick_vector.c new file mode 100644 index 0000000..2f3754f --- /dev/null +++ b/examples/filters/fmatrix_pick_vector.c @@ -0,0 +1,136 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +#define NX 5 +#define NY 4 +#define PARTS 2 +#define POS 2 + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +extern void vector_cpu_func(void *buffers[], void *cl_arg); + +#ifdef STARPU_USE_CUDA +extern void vector_cuda_func(void *buffers[], void *cl_arg); +#endif +#ifdef STARPU_USE_HIP +extern void vector_hip_func(void *buffers[], void *cl_arg); +#endif + +extern void generate_matrix_data(int *matrix, int nx, int ny, unsigned ld); +extern void print_matrix_data(starpu_data_handle_t matrix_handle); + +int main(void) +{ + int *matrix; + int ret, i, j; + int factor = 12; + + starpu_data_handle_t handle; + struct starpu_codelet cl = + { + .cpu_funcs = {vector_cpu_func}, + .cpu_funcs_name = {"vector_cpu_func"}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {vector_cuda_func}, + .cuda_flags = {STARPU_CUDA_ASYNC}, +#endif +#ifdef STARPU_USE_HIP + .hip_funcs = {vector_hip_func}, + .hip_flags = {STARPU_HIP_ASYNC}, +#endif + .nbuffers = 1, + .modes = {STARPU_RW}, + .name = "matrix_pick_vector_scal" + }; + + ret = starpu_init(NULL); + if (ret == -ENODEV) + return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + starpu_malloc((void **)&matrix, NX*NY*sizeof(int)); + generate_matrix_data(matrix, NX, NY, NX); + + /* Declare data to StarPU */ + starpu_matrix_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)matrix, NX, NX, NY, sizeof(matrix[0])); + FPRINTF(stderr, "IN Matrix: \n"); + print_matrix_data(handle); + + /* Partition the matrix in PARTS sub-vectors */ + struct starpu_data_filter f = + { + .filter_func = starpu_matrix_filter_pick_vector_y, + .filter_arg_ptr = (void*)(uintptr_t) POS, + .nchildren = PARTS, + /* the children use a vector interface*/ + .get_child_ops = starpu_matrix_filter_pick_vector_child_ops + }; + starpu_data_partition(handle, &f); + + FPRINTF(stderr,"Nb of partitions : %d\n",starpu_data_get_nb_children(handle)); + + for(i=0 ; ihandles[0] = vector_handle; + task->cl = &cl; + task->synchronous = 1; + task->cl_arg = &factor; + task->cl_arg_size = sizeof(factor); + + ret = starpu_task_submit(task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + /* Print result vector */ + FPRINTF(stderr,"OUT Vector %d: \n", i); + starpu_data_acquire(vector_handle, STARPU_R); + for(j=0 ; j + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +void print_matrix(int *matrix, int nx, int ny, unsigned ld) +{ + int i, j; + FPRINTF(stderr, "matrix=%p nx=%d ny=%d ld=%u\n", matrix, nx, ny, ld); + for(j=0 ; j + +static __global__ void _fmultiple_check_scale_cuda(int *val, int nx, int ny, unsigned ld, int start, int factor) +{ + int i, j; + for(j=0; j>>(val, nx, ny, ld, start, factor); + cudaError_t status = cudaGetLastError(); + if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status); +} + +static __global__ void _fmultiple_check_cuda(int *val, int nx, int ny, unsigned ld, int start, int factor) +{ + int i, j; + for(j=0; j>>(val, nx, ny, ld, start, factor); + cudaError_t status = cudaGetLastError(); + if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status); +} diff --git a/examples/filters/fmultiple_hip.hip b/examples/filters/fmultiple_hip.hip new file mode 100644 index 0000000..efdff2b --- /dev/null +++ b/examples/filters/fmultiple_hip.hip @@ -0,0 +1,78 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* dumb HIP kernel to check the matrix values and scale it up */ + +#include + +static __global__ void _fmultiple_check_scale_hip(int *val, int nx, int ny, unsigned ld, int start, int factor) +{ + int i, j; + for(j=0; j + +#define NX 6 +#define NY 6 +#define PARTS 2 + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +void matrix_fill(void *buffers[], void *cl_arg) +{ + unsigned i, j; + (void)cl_arg; + + /* length of the matrix */ + unsigned nx = STARPU_MATRIX_GET_NX(buffers[0]); + unsigned ny = STARPU_MATRIX_GET_NY(buffers[0]); + unsigned ld = STARPU_MATRIX_GET_LD(buffers[0]); + int *val = (int *)STARPU_MATRIX_GET_PTR(buffers[0]); + + for(j=0; j= 0); + for (i = 1; i < nbuffers; i++) + STARPU_ASSERT(starpu_task_get_current_data_node(i) == node); +} + +struct starpu_codelet cl_switch = +{ +#if 1 + /* Check for the values */ + .cpu_funcs = {empty}, +#else + /* For production code: we do not need to actually execute anything */ + .where = STARPU_NOWHERE, +#endif + .nbuffers = STARPU_VARIABLE_NBUFFERS, + .name = "switch", +}; + +int main(void) +{ + unsigned n=1; + int matrix[NX][NY]; + int ret, i; + + /* We haven't taken care otherwise */ + STARPU_ASSERT((NX%PARTS) == 0); + STARPU_ASSERT((NY%PARTS) == 0); + + starpu_data_handle_t handle; + starpu_data_handle_t vert_handle[PARTS]; + starpu_data_handle_t horiz_handle[PARTS]; + + ret = starpu_init(NULL); + if (ret == -ENODEV) + return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + /* force to execute task on the home_node, here it is STARPU_MAIN_RAM */ + cl_switch.specific_nodes = 1; + for (i = 0; i < STARPU_NMAXBUFS; i++) + cl_switch.nodes[i] = STARPU_MAIN_RAM; + + /* Declare the whole matrix to StarPU */ + starpu_matrix_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)matrix, NX, NX, NY, sizeof(matrix[0][0])); + + /* Also declare the vertical slices to StarPU */ + for (i = 0; i < PARTS; i++) + { + starpu_matrix_data_register(&vert_handle[i], STARPU_MAIN_RAM, (uintptr_t)&matrix[0][i*(NX/PARTS)], NX, NX/PARTS, NY, sizeof(matrix[0][0])); + /* But make it invalid for now, we'll access data through the whole matrix first */ + starpu_data_invalidate(vert_handle[i]); + } + /* And the horizontal slices to StarPU */ + for (i = 0; i < PARTS; i++) + { + starpu_matrix_data_register(&horiz_handle[i], STARPU_MAIN_RAM, (uintptr_t)&matrix[i*(NY/PARTS)][0], NX, NX, NY/PARTS, sizeof(matrix[0][0])); + starpu_data_invalidate(horiz_handle[i]); + } + + /* Fill the matrix */ + ret = starpu_task_insert(&cl_fill, STARPU_W, handle, 0); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + /* Now switch to vertical view of the matrix */ + struct starpu_data_descr vert_descr[PARTS]; + for (i = 0; i < PARTS; i++) + { + vert_descr[i].handle = vert_handle[i]; + vert_descr[i].mode = STARPU_W; + } + ret = starpu_task_insert(&cl_switch, STARPU_RW, handle, STARPU_DATA_MODE_ARRAY, vert_descr, PARTS, 0); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + /* And make sure we don't accidentally access the matrix through the whole-matrix handle */ + starpu_data_invalidate_submit(handle); + + /* Check the values of the vertical slices */ + for (i = 0; i < PARTS; i++) + { + int factor = 1; + int start = i*(NX/PARTS); + ret = starpu_task_insert(&cl_check_scale, + STARPU_RW, vert_handle[i], + STARPU_VALUE, &start, sizeof(start), + STARPU_VALUE, &factor, sizeof(factor), + 0); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + /* Now switch back to total view of the matrix */ + for (i = 0; i < PARTS; i++) + vert_descr[i].mode = STARPU_RW; + ret = starpu_task_insert(&cl_switch, STARPU_DATA_MODE_ARRAY, vert_descr, PARTS, STARPU_W, handle, 0); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + /* And make sure we don't accidentally access the matrix through the vertical slices */ + for (i = 0; i < PARTS; i++) + starpu_data_invalidate_submit(vert_handle[i]); + + /* And switch to horizontal view of the matrix */ + struct starpu_data_descr horiz_descr[PARTS]; + for (i = 0; i < PARTS; i++) + { + horiz_descr[i].handle = horiz_handle[i]; + horiz_descr[i].mode = STARPU_W; + } + ret = starpu_task_insert(&cl_switch, STARPU_RW, handle, STARPU_DATA_MODE_ARRAY, horiz_descr, PARTS, 0); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + /* And make sure we don't accidentally access the matrix through the whole-matrix handle */ + starpu_data_invalidate_submit(handle); + + /* Check the values of the horizontal slices */ + for (i = 0; i < PARTS; i++) + { + int factor = 2; + int start = factor*100*i*(NY/PARTS); + ret = starpu_task_insert(&cl_check_scale, + STARPU_RW, horiz_handle[i], + STARPU_VALUE, &start, sizeof(start), + STARPU_VALUE, &factor, sizeof(factor), + 0); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + /* + * Unregister data from StarPU and shutdown It does not really matter + * which view is active at unregistration here, since all views cover + * the whole matrix, so it will be completely updated in the main memory. + */ + for (i = 0; i < PARTS; i++) + { + starpu_data_unregister(vert_handle[i]); + starpu_data_unregister(horiz_handle[i]); + } + starpu_data_unregister(handle); + starpu_shutdown(); + + return ret; + +enodev: + starpu_shutdown(); + return 77; +} diff --git a/examples/filters/fmultiple_submit.c b/examples/filters/fmultiple_submit.c new file mode 100644 index 0000000..9729769 --- /dev/null +++ b/examples/filters/fmultiple_submit.c @@ -0,0 +1,226 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * This examplifies how to access the same matrix with different partitioned + * views, doing the coherency through partition planning. + * We first run a kernel on the whole matrix to fill it, then run a kernel on + * each vertical slice to check the value and multiply it by two, then run a + * kernel on each horizontal slice to do the same. + */ + +#include + +#define NX 6 +#define NY 6 +#define PARTS 2 + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +void matrix_fill(void *buffers[], void *cl_arg) +{ + unsigned i, j; + (void)cl_arg; + + /* length of the matrix */ + unsigned nx = STARPU_MATRIX_GET_NX(buffers[0]); + unsigned ny = STARPU_MATRIX_GET_NY(buffers[0]); + unsigned ld = STARPU_MATRIX_GET_LD(buffers[0]); + int *val = (int *)STARPU_MATRIX_GET_PTR(buffers[0]); + + for(j=0; j + +#define NX 6 +#define NY 6 +#define PARTS 2 + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +void matrix_fill(void *buffers[], void *cl_arg) +{ + unsigned i, j; + (void)cl_arg; + + /* length of the matrix */ + unsigned nx = STARPU_MATRIX_GET_NX(buffers[0]); + unsigned ny = STARPU_MATRIX_GET_NY(buffers[0]); + unsigned ld = STARPU_MATRIX_GET_LD(buffers[0]); + int *val = (int *)STARPU_MATRIX_GET_PTR(buffers[0]); + + for(j=0; j + +#define NX 6 +#define NY 6 +#define PARTS 2 + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +void matrix_fill(void *buffers[], void *cl_arg) +{ + unsigned i, j; + (void)cl_arg; + + /* length of the matrix */ + unsigned nx = STARPU_MATRIX_GET_NX(buffers[0]); + unsigned ny = STARPU_MATRIX_GET_NY(buffers[0]); + unsigned ld = STARPU_MATRIX_GET_LD(buffers[0]); + int *val = (int *)STARPU_MATRIX_GET_PTR(buffers[0]); + + for(j=0; j + +#define NX 6 +#define NY 6 +#define PARTS 2 + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +void matrix_fill(void *buffers[], void *cl_arg) +{ + unsigned i, j; + + /* length of the matrix */ + unsigned nx = STARPU_MATRIX_GET_NX(buffers[0]); + unsigned ny = STARPU_MATRIX_GET_NY(buffers[0]); + unsigned ld = STARPU_MATRIX_GET_LD(buffers[0]); + int *val = (int *)STARPU_MATRIX_GET_PTR(buffers[0]); + unsigned offset_i, offset_j; + starpu_codelet_unpack_args(cl_arg, &offset_i, &offset_j); + + for(j=0; j + +#define NX 6 +#define NY 5 +#define NZ 4 +#define NT 3 +#define PARTS 2 + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +extern void f4d_cpu_func(void *buffers[], void *cl_arg); + +#ifdef STARPU_USE_CUDA +extern void f4d_cuda_func(void *buffers[], void *cl_arg); +#endif + +#ifdef STARPU_USE_HIP +extern void f4d_hip_func(void *buffers[], void *cl_arg); +#endif + +extern void generate_tensor_data(int *tensor, int nx, int ny, int nz, int nt, unsigned ldy, unsigned ldz, unsigned ldt); +extern void print_tensor(int *tensor, int nx, int ny, int nz, int nt, unsigned ldy, unsigned ldz, unsigned ldt); +extern void print_4dim_data(starpu_data_handle_t ndim_handle); + +int main(void) +{ + int *arr4d; + int i, j, k, l; + int ret; + + starpu_data_handle_t handle; + struct starpu_codelet cl = + { + .cpu_funcs = {f4d_cpu_func}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {f4d_cuda_func}, + .cuda_flags = {STARPU_CUDA_ASYNC}, +#endif +#ifdef STARPU_USE_HIP + .hip_funcs = {f4d_hip_func}, + .hip_flags = {STARPU_HIP_ASYNC}, +#endif + .nbuffers = 1, + .modes = {STARPU_RW}, + .name = "ndim_scal" + }; + + ret = starpu_init(NULL); + if (ret == -ENODEV) + exit(77); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + starpu_malloc((void **)&arr4d, NX*NY*NZ*NT*sizeof(int)); + assert(arr4d); + generate_tensor_data(arr4d, NX, NY, NZ, NT, NX, NX*NY, NX*NY*NZ); + + unsigned nn[4] = {NX, NY, NZ, NT}; + unsigned ldn[4] = {1, NX, NX*NY, NX*NY*NZ}; + + /* Declare data to StarPU */ + starpu_ndim_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)arr4d, ldn, nn, 4, sizeof(int)); + FPRINTF(stderr, "IN Ndim Array\n"); + print_4dim_data(handle); + + /* Partition the ndim array in PARTS sub-ndimarrays */ + struct starpu_data_filter f = + { + .filter_func = starpu_ndim_filter_block, + .filter_arg = 0, //Partition the array along X dimension + .nchildren = PARTS + }; + starpu_data_partition(handle, &f); + + FPRINTF(stderr,"Nb of partitions : %d\n",starpu_data_get_nb_children(handle)); + + for(i=0 ; icl = &cl; + task->synchronous = 1; + task->callback_func = NULL; + task->handles[0] = starpu_data_get_sub_data(handle, 1, i); + task->cl_arg = &multiplier; + task->cl_arg_size = sizeof(multiplier); + + ret = starpu_task_submit(task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + /* Unpartition the data, unregister it from StarPU and shutdown */ + starpu_data_unpartition(handle, STARPU_MAIN_RAM); + print_4dim_data(handle); + starpu_data_unregister(handle); + + /* Print result ndim array*/ + FPRINTF(stderr, "OUT Ndim Array\n"); + print_tensor(arr4d, NX, NY, NZ, NT, NX, NX*NY, NX*NY*NZ); + + starpu_free_noflag(arr4d, NX*NY*NZ*NT*sizeof(int)); + + starpu_shutdown(); + return 0; + +enodev: + FPRINTF(stderr, "WARNING: No one can execute this task\n"); + starpu_shutdown(); + return 77; +} diff --git a/examples/filters/fndim_1d_pick_variable.c b/examples/filters/fndim_1d_pick_variable.c new file mode 100644 index 0000000..23bd305 --- /dev/null +++ b/examples/filters/fndim_1d_pick_variable.c @@ -0,0 +1,130 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2010-2010 Mehdi Juhoor + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +#define NX 21 +#define PARTS 3 +#define POS 5 + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +void cpu_func(void *buffers[], void *cl_arg) +{ + int *factor = (int *) cl_arg; + + /* local copy of the variable pointer */ + int *val = (int *)STARPU_VARIABLE_GET_PTR(buffers[0]); + + *val *= *factor; +} + +int main(void) +{ + int i; + int *arr1d; + starpu_data_handle_t handle; + int factor = 10; + int ret; + + struct starpu_codelet cl = + { + .cpu_funcs = {cpu_func}, + .cpu_funcs_name = {"cpu_func"}, + .nbuffers = 1, + .modes = {STARPU_RW}, + .name = "arr1d_pick_variable_scal" + }; + + ret = starpu_init(NULL); + if (ret == -ENODEV) exit(77); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + starpu_malloc((void **)&arr1d, NX*sizeof(int)); + FPRINTF(stderr,"IN 1-dim Array: \n"); + for(i=0 ; ihandles[0] = variable_handle; + task->cl = &cl; + task->synchronous = 1; + task->cl_arg = &factor; + task->cl_arg_size = sizeof(factor); + + ret = starpu_task_submit(task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + FPRINTF(stderr,"OUT Variable %d: \n", i); + starpu_data_acquire(variable_handle, STARPU_R); + FPRINTF(stderr, "%5d ", *variable); + starpu_data_release(variable_handle); + FPRINTF(stderr,"\n"); + } + + starpu_data_unpartition(handle, STARPU_MAIN_RAM); + starpu_data_unregister(handle); + + FPRINTF(stderr,"OUT 1-dim Array: \n"); + for(i=0 ; i + +#define NX 5 +#define NY 4 +#define PARTS 2 +#define POS 2 + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +extern void vector_cpu_func(void *buffers[], void *cl_arg); + +#ifdef STARPU_USE_CUDA +extern void vector_cuda_func(void *buffers[], void *cl_arg); +#endif + +#ifdef STARPU_USE_HIP +extern void vector_hip_func(void *buffers[], void *cl_arg); +#endif + +extern void generate_matrix_data(int *matrix, int nx, int ny, unsigned ld); +extern void print_2dim_data(starpu_data_handle_t ndim_handle); + +int main(void) +{ + int *arr2d; + int ret, i, j; + int factor = 12; + + starpu_data_handle_t handle; + struct starpu_codelet cl = + { + .cpu_funcs = {vector_cpu_func}, + .cpu_funcs_name = {"vector_cpu_func"}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {vector_cuda_func}, + .cuda_flags = {STARPU_CUDA_ASYNC}, +#endif +#ifdef STARPU_USE_HIP + .hip_funcs = {vector_hip_func}, + .hip_flags = {STARPU_HIP_ASYNC}, +#endif + .nbuffers = 1, + .modes = {STARPU_RW}, + .name = "arr2d_pick_vector_scal" + }; + + ret = starpu_init(NULL); + if (ret == -ENODEV) + return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + starpu_malloc((void **)&arr2d, NX*NY*sizeof(int)); + generate_matrix_data(arr2d, NX, NY, NX); + + unsigned nn[2] = {NX, NY}; + unsigned ldn[2] = {1, NX}; + + /* Declare data to StarPU */ + starpu_ndim_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)arr2d, ldn, nn, 2, sizeof(int)); + FPRINTF(stderr, "IN 2-dim Array: \n"); + print_2dim_data(handle); + + /* Partition the 2-dim array in PARTS sub-vectors */ + struct starpu_data_filter f = + { + .filter_func = starpu_ndim_filter_2d_pick_vector, + .filter_arg = 1, //Partition the array along Y dimension + .filter_arg_ptr = (void*)(uintptr_t) POS, + .nchildren = PARTS, + /* the children use a vector interface*/ + .get_child_ops = starpu_ndim_filter_pick_vector_child_ops + }; + starpu_data_partition(handle, &f); + + FPRINTF(stderr,"Nb of partitions : %d\n",starpu_data_get_nb_children(handle)); + + for(i=0 ; ihandles[0] = vector_handle; + task->cl = &cl; + task->synchronous = 1; + task->cl_arg = &factor; + task->cl_arg_size = sizeof(factor); + + ret = starpu_task_submit(task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + /* Print result vector */ + FPRINTF(stderr,"OUT Vector %d: \n", i); + starpu_data_acquire(vector_handle, STARPU_R); + for(j=0 ; j + +#define NX 5 +#define NY 4 +#define NZ 3 +#define PARTS 2 +#define POS 1 + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +extern void matrix_cpu_func(void *buffers[], void *cl_arg); + +#ifdef STARPU_USE_CUDA +extern void matrix_cuda_func(void *buffers[], void *cl_arg); +#endif + +#ifdef STARPU_USE_HIP +extern void matrix_hip_func(void *buffers[], void *cl_arg); +#endif + +extern void generate_block_data(int *block, int nx, int ny, int nz, unsigned ldy, unsigned ldz); +extern void print_3dim_data(starpu_data_handle_t ndim_handle); +extern void print_matrix_data(starpu_data_handle_t matrix_handle); + +int main(void) +{ + int *arr3d; + int i, j, k; + int ret; + int factor = 2; + + starpu_data_handle_t handle; + struct starpu_codelet cl = + { + .cpu_funcs = {matrix_cpu_func}, + .cpu_funcs_name = {"matrix_cpu_func"}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {matrix_cuda_func}, + .cuda_flags = {STARPU_CUDA_ASYNC}, +#endif +#ifdef STARPU_USE_HIP + .hip_funcs = {matrix_hip_func}, + .hip_flags = {STARPU_HIP_ASYNC}, +#endif + .nbuffers = 1, + .modes = {STARPU_RW}, + .name = "arr3d_pick_matrix_scal" + }; + + ret = starpu_init(NULL); + if (ret == -ENODEV) + exit(77); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + starpu_malloc((void **)&arr3d, NX*NY*NZ*sizeof(int)); + assert(arr3d); + generate_block_data(arr3d, NX, NY, NZ, NX, NX*NY); + + unsigned nn[3] = {NX, NY, NZ}; + unsigned ldn[3] = {1, NX, NX*NY}; + + /* Declare data to StarPU */ + starpu_ndim_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)arr3d, ldn, nn, 3, sizeof(int)); + FPRINTF(stderr, "IN 3-dim Array: \n"); + print_3dim_data(handle); + + /* Partition the 3-dim array in PARTS sub-matrices */ + struct starpu_data_filter f = + { + .filter_func = starpu_ndim_filter_3d_pick_matrix, + .filter_arg = 1, //Partition the array along Y dimension + .filter_arg_ptr = (void*)(uintptr_t) POS, + .nchildren = PARTS, + /* the children use a matrix interface*/ + .get_child_ops = starpu_ndim_filter_pick_matrix_child_ops + }; + starpu_data_partition(handle, &f); + + FPRINTF(stderr,"Nb of partitions : %d\n",starpu_data_get_nb_children(handle)); + + for(i=0 ; icl = &cl; + task->synchronous = 1; + task->callback_func = NULL; + task->handles[0] = matrix_handle; + task->cl_arg = &factor; + task->cl_arg_size = sizeof(factor); + + ret = starpu_task_submit(task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + /* Print result matrix */ + FPRINTF(stderr, "OUT Matrix %d: \n", i); + print_matrix_data(matrix_handle); + } + + /* Unpartition the data, unregister it from StarPU and shutdown */ + starpu_data_unpartition(handle, STARPU_MAIN_RAM); + FPRINTF(stderr, "OUT 3-dim Array: \n"); + print_3dim_data(handle); + starpu_data_unregister(handle); + + starpu_free_noflag(arr3d, NX*NY*NZ*sizeof(int)); + + starpu_shutdown(); + return 0; + +enodev: + starpu_shutdown(); + return 77; +} diff --git a/examples/filters/fndim_4d_pick_block.c b/examples/filters/fndim_4d_pick_block.c new file mode 100644 index 0000000..7ade8eb --- /dev/null +++ b/examples/filters/fndim_4d_pick_block.c @@ -0,0 +1,138 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +#define NX 6 +#define NY 5 +#define NZ 4 +#define NT 3 +#define PARTS 2 +#define POS 1 + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +extern void block_cpu_func(void *buffers[], void *cl_arg); + +#ifdef STARPU_USE_CUDA +extern void block_cuda_func(void *buffers[], void *cl_arg); +#endif + +#ifdef STARPU_USE_HIP +extern void block_hip_func(void *buffers[], void *cl_arg); +#endif + +extern void generate_tensor_data(int *tensor, int nx, int ny, int nz, int nt, unsigned ldy, unsigned ldz, unsigned ldt); +extern void print_4dim_data(starpu_data_handle_t ndim_handle); +extern void print_block_data(starpu_data_handle_t block_handle); + +int main(void) +{ + int *arr4d; + int i, j, k, l; + int ret; + int factor = 2; + + starpu_data_handle_t handle; + struct starpu_codelet cl = + { + .cpu_funcs = {block_cpu_func}, + .cpu_funcs_name = {"block_cpu_func"}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {block_cuda_func}, + .cuda_flags = {STARPU_CUDA_ASYNC}, +#endif +#ifdef STARPU_USE_HIP + .hip_funcs = {block_hip_func}, + .hip_flags = {STARPU_HIP_ASYNC}, +#endif + .nbuffers = 1, + .modes = {STARPU_RW}, + .name = "arr4d_pick_block_scal" + }; + + ret = starpu_init(NULL); + if (ret == -ENODEV) + return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + starpu_malloc((void **)&arr4d, NX*NY*NZ*NT*sizeof(int)); + assert(arr4d); + generate_tensor_data(arr4d, NX, NY, NZ, NT, NX, NX*NY, NX*NY*NZ); + + unsigned nn[4] = {NX, NY, NZ, NT}; + unsigned ldn[4] = {1, NX, NX*NY, NX*NY*NZ}; + + /* Declare data to StarPU */ + starpu_ndim_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)arr4d, ldn, nn, 4, sizeof(int)); + FPRINTF(stderr, "IN 4-dim Array: \n"); + print_4dim_data(handle); + + /* Partition the 4-dim array in PARTS sub-blocks */ + struct starpu_data_filter f = + { + .filter_func = starpu_ndim_filter_4d_pick_block, + .filter_arg = 2, //Partition the array along Z dimension + .filter_arg_ptr = (void*)(uintptr_t) POS, + .nchildren = PARTS, + /* the children use a block interface*/ + .get_child_ops = starpu_ndim_filter_pick_block_child_ops + }; + starpu_data_partition(handle, &f); + + FPRINTF(stderr,"Nb of partitions : %d\n",starpu_data_get_nb_children(handle)); + + for(i=0 ; icl = &cl; + task->synchronous = 1; + task->callback_func = NULL; + task->handles[0] = block_handle; + task->cl_arg = &factor; + task->cl_arg_size = sizeof(factor); + + ret = starpu_task_submit(task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + /* Print result block */ + FPRINTF(stderr, "OUT Block %d: \n", i); + print_block_data(block_handle); + } + + /* Unpartition the data, unregister it from StarPU and shutdown */ + starpu_data_unpartition(handle, STARPU_MAIN_RAM); + FPRINTF(stderr, "OUT 4-dim Array: \n"); + print_4dim_data(handle); + starpu_data_unregister(handle); + + starpu_free_noflag(arr4d, NX*NY*NZ*NT*sizeof(int)); + + starpu_shutdown(); + return 0; + +enodev: + starpu_shutdown(); + return 77; +} diff --git a/examples/filters/fndim_5d_pick_tensor.c b/examples/filters/fndim_5d_pick_tensor.c new file mode 100644 index 0000000..e0c501d --- /dev/null +++ b/examples/filters/fndim_5d_pick_tensor.c @@ -0,0 +1,139 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +#define NX 6 +#define NY 5 +#define NZ 4 +#define NT 3 +#define NG 2 +#define PARTS 2 +#define POS 1 + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +extern void tensor_cpu_func(void *buffers[], void *cl_arg); + +#ifdef STARPU_USE_CUDA +extern void tensor_cuda_func(void *buffers[], void *cl_arg); +#endif + +#ifdef STARPU_USE_HIP +extern void tensor_hip_func(void *buffers[], void *cl_arg); +#endif + +extern void generate_5dim_data(int *arr5d, int nx, int ny, int nz, int nt, int ng, unsigned ldy, unsigned ldz, unsigned ldt, unsigned ldg); +extern void print_5dim_data(starpu_data_handle_t ndim_handle); +extern void print_tensor_data(starpu_data_handle_t ndim_handle); + +int main(void) +{ + int *arr5d; + int i, j, k, l, m; + int ret; + int factor = 2; + + starpu_data_handle_t handle; + struct starpu_codelet cl = + { + .cpu_funcs = {tensor_cpu_func}, + .cpu_funcs_name = {"tensor_cpu_func"}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {tensor_cuda_func}, + .cuda_flags = {STARPU_CUDA_ASYNC}, +#endif +#ifdef STARPU_USE_HIP + .hip_funcs = {tensor_hip_func}, + .hip_flags = {STARPU_HIP_ASYNC}, +#endif + .nbuffers = 1, + .modes = {STARPU_RW}, + .name = "arr5d_pick_tensor_scal" + }; + + ret = starpu_init(NULL); + if (ret == -ENODEV) + return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + starpu_malloc((void **)&arr5d, NX*NY*NZ*NT*NG*sizeof(int)); + assert(arr5d); + generate_5dim_data(arr5d, NX, NY, NZ, NT, NG, NX, NX*NY, NX*NY*NZ, NX*NY*NZ*NT); + + unsigned nn[5] = {NX, NY, NZ, NT, NG}; + unsigned ldn[5] = {1, NX, NX*NY, NX*NY*NZ, NX*NY*NZ*NT}; + + /* Declare data to StarPU */ + starpu_ndim_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)arr5d, ldn, nn, 5, sizeof(int)); + FPRINTF(stderr, "IN 5-dim Array: \n"); + print_5dim_data(handle); + + /* Partition the 5-dim array in PARTS tensors */ + struct starpu_data_filter f = + { + .filter_func = starpu_ndim_filter_5d_pick_tensor, + .filter_arg = 3, //Partition the array along T dimension + .filter_arg_ptr = (void*)(uintptr_t) POS, + .nchildren = PARTS, + /* the children use a tensor interface*/ + .get_child_ops = starpu_ndim_filter_pick_tensor_child_ops + }; + starpu_data_partition(handle, &f); + + FPRINTF(stderr,"Nb of partitions : %d\n",starpu_data_get_nb_children(handle)); + + for(i=0 ; icl = &cl; + task->synchronous = 1; + task->callback_func = NULL; + task->handles[0] = tensor_handle; + task->cl_arg = &factor; + task->cl_arg_size = sizeof(factor); + + ret = starpu_task_submit(task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + /* Print result tensor */ + FPRINTF(stderr, "OUT Tensor %d: \n", i); + print_tensor_data(tensor_handle); + } + + /* Unpartition the data, unregister it from StarPU and shutdown */ + starpu_data_unpartition(handle, STARPU_MAIN_RAM); + FPRINTF(stderr, "OUT 5-dim Array: \n"); + print_5dim_data(handle); + starpu_data_unregister(handle); + + starpu_free_noflag(arr5d, NX*NY*NZ*NT*NG*sizeof(int)); + + starpu_shutdown(); + return 0; + +enodev: + starpu_shutdown(); + return 77; +} diff --git a/examples/filters/fndim_pick_ndim.c b/examples/filters/fndim_pick_ndim.c new file mode 100644 index 0000000..3c9c14d --- /dev/null +++ b/examples/filters/fndim_pick_ndim.c @@ -0,0 +1,135 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +#define NX 6 +#define NY 5 +#define NZ 4 +#define NT 3 +#define PARTS 2 +#define POS 1 + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +extern void f3d_cpu_func(void *buffers[], void *cl_arg); + +#ifdef STARPU_USE_CUDA +extern void f3d_cuda_func(void *buffers[], void *cl_arg); +#endif + +#ifdef STARPU_USE_HIP +extern void f3d_hip_func(void *buffers[], void *cl_arg); +#endif + +extern void generate_tensor_data(int *tensor, int nx, int ny, int nz, int nt, unsigned ldy, unsigned ldz, unsigned ldt); +extern void print_4dim_data(starpu_data_handle_t ndim_handle); +extern void print_3dim_data(starpu_data_handle_t ndim_handle); + +int main(void) +{ + int *arr4d; + int i, j, k, l; + int ret; + int factor = 2; + + starpu_data_handle_t handle; + struct starpu_codelet cl = + { + .cpu_funcs = {f3d_cpu_func}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {f3d_cuda_func}, + .cuda_flags = {STARPU_CUDA_ASYNC}, +#endif +#ifdef STARPU_USE_HIP + .hip_funcs = {f3d_hip_func}, + .hip_flags = {STARPU_HIP_ASYNC}, +#endif + .nbuffers = 1, + .modes = {STARPU_RW}, + .name = "arr4d_pick_arr3d_scal" + }; + + ret = starpu_init(NULL); + if (ret == -ENODEV) + return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + starpu_malloc((void **)&arr4d, NX*NY*NZ*NT*sizeof(int)); + assert(arr4d); + generate_tensor_data(arr4d, NX, NY, NZ, NT, NX, NX*NY, NX*NY*NZ); + + unsigned nn[4] = {NX, NY, NZ, NT}; + unsigned ldn[4] = {1, NX, NX*NY, NX*NY*NZ}; + + /* Declare data to StarPU */ + starpu_ndim_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)arr4d, ldn, nn, 4, sizeof(int)); + FPRINTF(stderr, "IN 4-dim Array: \n"); + print_4dim_data(handle); + + /* Partition the 4-dim array in PARTS sub 3-dim arrays */ + struct starpu_data_filter f = + { + .filter_func = starpu_ndim_filter_pick_ndim, + .filter_arg = 2, //Partition the array along Z dimension + .filter_arg_ptr = (void*)(uintptr_t) POS, + .nchildren = PARTS + }; + starpu_data_partition(handle, &f); + + FPRINTF(stderr,"Nb of partitions : %d\n",starpu_data_get_nb_children(handle)); + + for(i=0 ; icl = &cl; + task->synchronous = 1; + task->callback_func = NULL; + task->handles[0] = sub3d_handle; + task->cl_arg = &factor; + task->cl_arg_size = sizeof(factor); + + ret = starpu_task_submit(task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + /* Print result 3-dim array */ + FPRINTF(stderr, "OUT 3-dim Array %d: \n", i); + print_3dim_data(sub3d_handle); + } + + /* Unpartition the data, unregister it from StarPU and shutdown */ + starpu_data_unpartition(handle, STARPU_MAIN_RAM); + FPRINTF(stderr, "OUT 4-dim Array: \n"); + print_4dim_data(handle); + starpu_data_unregister(handle); + + starpu_free_noflag(arr4d, NX*NY*NZ*NT*sizeof(int)); + + starpu_shutdown(); + return 0; + +enodev: + starpu_shutdown(); + return 77; +} diff --git a/examples/filters/fndim_pick_variable.c b/examples/filters/fndim_pick_variable.c new file mode 100644 index 0000000..2c1ed39 --- /dev/null +++ b/examples/filters/fndim_pick_variable.c @@ -0,0 +1,134 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +#define NX 6 +#define NY 5 +#define NZ 4 +#define NT 3 +#define NG 2 + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +void cpu_func(void *buffers[], void *cl_arg) +{ + int *factor = (int *) cl_arg; + + /* local copy of the variable pointer */ + int *val = (int *)STARPU_VARIABLE_GET_PTR(buffers[0]); + + *val *= *factor; +} + +extern void generate_5dim_data(int *arr5d, int nx, int ny, int nz, int nt, int ng, unsigned ldy, unsigned ldz, unsigned ldt, unsigned ldg); +extern void print_5dim_data(starpu_data_handle_t ndim_handle); + +int main(void) +{ + int *arr5d; + int i, j, k, l, m; + int ret; + int factor = 2; + uint32_t pos[5] = {1,2,1,2,1}; + + starpu_data_handle_t handle; + starpu_data_handle_t var_handle; + + struct starpu_codelet cl = + { + .cpu_funcs = {cpu_func}, + .cpu_funcs_name = {"cpu_func"}, + .nbuffers = 1, + .modes = {STARPU_RW}, + .name = "arr5d_pick_variable_scal" + }; + + ret = starpu_init(NULL); + if (ret == -ENODEV) + return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + starpu_malloc((void **)&arr5d, NX*NY*NZ*NT*NG*sizeof(int)); + assert(arr5d); + generate_5dim_data(arr5d, NX, NY, NZ, NT, NG, NX, NX*NY, NX*NY*NZ, NX*NY*NZ*NT); + + unsigned nn[5] = {NX, NY, NZ, NT, NG}; + unsigned ldn[5] = {1, NX, NX*NY, NX*NY*NZ, NX*NY*NZ*NT}; + + /* Declare data to StarPU */ + starpu_ndim_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)arr5d, ldn, nn, 5, sizeof(int)); + FPRINTF(stderr, "IN 5-dim Array: \n"); + print_5dim_data(handle); + + /* Pick a variable in the 5-dim array */ + struct starpu_data_filter f_var = + { + .filter_func = starpu_ndim_filter_pick_variable, + .filter_arg_ptr = (void*)pos, + .nchildren = 1, + /* the children use a variable interface*/ + .get_child_ops = starpu_ndim_filter_pick_variable_child_ops + }; + + starpu_data_partition_plan(handle, &f_var, &var_handle); + + FPRINTF(stderr, "Sub Variable:\n"); + int *variable = (int *)starpu_variable_get_local_ptr(var_handle); + starpu_data_acquire(var_handle, STARPU_R); + FPRINTF(stderr, "%5d ", *variable); + starpu_data_release(var_handle); + FPRINTF(stderr,"\n"); + + /* Submit the task */ + struct starpu_task *task = starpu_task_create(); + + FPRINTF(stderr,"Dealing with sub-variable\n"); + task->handles[0] = var_handle; + task->cl = &cl; + task->synchronous = 1; + task->cl_arg = &factor; + task->cl_arg_size = sizeof(factor); + + ret = starpu_task_submit(task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + /* Print result variable */ + FPRINTF(stderr,"OUT Variable:\n"); + starpu_data_acquire(var_handle, STARPU_R); + FPRINTF(stderr, "%5d ", *variable); + starpu_data_release(var_handle); + FPRINTF(stderr,"\n"); + + starpu_data_partition_clean(handle, 1, &var_handle); + + /* Unpartition the data, unregister it from StarPU and shutdown */ + //starpu_data_unpartition(handle, STARPU_MAIN_RAM); + FPRINTF(stderr, "OUT 5-dim Array: \n"); + print_5dim_data(handle); + starpu_data_unregister(handle); + + starpu_free_noflag(arr5d, NX*NY*NZ*NT*NG*sizeof(int)); + + starpu_shutdown(); + return 0; + +enodev: + FPRINTF(stderr, "WARNING: No one can execute this task\n"); + starpu_shutdown(); + return 77; +} diff --git a/examples/filters/fndim_to_block.c b/examples/filters/fndim_to_block.c new file mode 100644 index 0000000..25c6dea --- /dev/null +++ b/examples/filters/fndim_to_block.c @@ -0,0 +1,136 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +#define NX 5 +#define NY 4 +#define NZ 3 +#define PARTS 2 + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +extern void block_cpu_func(void *buffers[], void *cl_arg); + +#ifdef STARPU_USE_CUDA +extern void block_cuda_func(void *buffers[], void *cl_arg); +#endif + +#ifdef STARPU_USE_HIP +extern void block_hip_func(void *buffers[], void *cl_arg); +#endif + +extern void generate_block_data(int *block, int nx, int ny, int nz, unsigned ldy, unsigned ldz); +extern void print_3dim_data(starpu_data_handle_t ndim_handle); +extern void print_block_data(starpu_data_handle_t block_handle); + +int main(void) +{ + int *arr3d; + int i, j, k; + int ret; + int factor = 2; + + starpu_data_handle_t handle; + struct starpu_codelet cl = + { + .cpu_funcs = {block_cpu_func}, + .cpu_funcs_name = {"block_cpu_func"}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {block_cuda_func}, + .cuda_flags = {STARPU_CUDA_ASYNC}, +#endif +#ifdef STARPU_USE_HIP + .hip_funcs = {block_hip_func}, + .hip_flags = {STARPU_HIP_ASYNC}, +#endif + .nbuffers = 1, + .modes = {STARPU_RW}, + .name = "arr3d_to_matrix_scal" + }; + + ret = starpu_init(NULL); + if (ret == -ENODEV) + exit(77); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + starpu_malloc((void **)&arr3d, NX*NY*NZ*sizeof(int)); + assert(arr3d); + generate_block_data(arr3d, NX, NY, NZ, NX, NX*NY); + + unsigned nn[3] = {NX, NY, NZ}; + unsigned ldn[3] = {1, NX, NX*NY}; + + /* Declare data to StarPU */ + starpu_ndim_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)arr3d, ldn, nn, 3, sizeof(int)); + FPRINTF(stderr, "IN 3-dim Array: \n"); + print_3dim_data(handle); + + /* Partition the 3-dim array in PARTS sub-blocks */ + struct starpu_data_filter f = + { + .filter_func = starpu_ndim_filter_to_block, + .filter_arg = 0, //Partition the array along X dimension + .nchildren = PARTS, + /* the children use a block interface*/ + .get_child_ops = starpu_ndim_filter_to_block_child_ops + }; + starpu_data_partition(handle, &f); + + FPRINTF(stderr,"Nb of partitions : %d\n",starpu_data_get_nb_children(handle)); + + for(i=0 ; icl = &cl; + task->synchronous = 1; + task->callback_func = NULL; + task->handles[0] = block_handle; + task->cl_arg = &factor; + task->cl_arg_size = sizeof(factor); + + ret = starpu_task_submit(task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + /* Print result block */ + FPRINTF(stderr, "OUT Block %d: \n", i); + print_block_data(block_handle); + } + + /* Unpartition the data, unregister it from StarPU and shutdown */ + starpu_data_unpartition(handle, STARPU_MAIN_RAM); + FPRINTF(stderr, "OUT 3-dim Array: \n"); + print_3dim_data(handle); + starpu_data_unregister(handle); + + starpu_free_noflag(arr3d, NX*NY*NZ*sizeof(int)); + + starpu_shutdown(); + return 0; + +enodev: + FPRINTF(stderr, "WARNING: No one can execute this task\n"); + starpu_shutdown(); + return 77; +} diff --git a/examples/filters/fndim_to_matrix.c b/examples/filters/fndim_to_matrix.c new file mode 100644 index 0000000..54ad8b2 --- /dev/null +++ b/examples/filters/fndim_to_matrix.c @@ -0,0 +1,132 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +#define NX 5 +#define NY 4 +#define PARTS 2 + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +extern void matrix_cpu_func(void *buffers[], void *cl_arg); + +#ifdef STARPU_USE_CUDA +extern void matrix_cuda_func(void *buffers[], void *cl_arg); +#endif + +#ifdef STARPU_USE_HIP +extern void matrix_hip_func(void *buffers[], void *cl_arg); +#endif + +extern void generate_matrix_data(int *matrix, int nx, int ny, unsigned ld); +extern void print_2dim_data(starpu_data_handle_t ndim_handle); +extern void print_matrix_data(starpu_data_handle_t matrix_handle); + +int main(void) +{ + int *arr2d; + int ret, i, j, k; + int factor = 12; + + starpu_data_handle_t handle; + struct starpu_codelet cl = + { + .cpu_funcs = {matrix_cpu_func}, + .cpu_funcs_name = {"matrix_cpu_func"}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {matrix_cuda_func}, + .cuda_flags = {STARPU_CUDA_ASYNC}, +#endif +#ifdef STARPU_USE_HIP + .hip_funcs = {matrix_hip_func}, + .hip_flags = {STARPU_HIP_ASYNC}, +#endif + .nbuffers = 1, + .modes = {STARPU_RW}, + .name = "arr2d_to_matrix_scal" + }; + + ret = starpu_init(NULL); + if (ret == -ENODEV) + exit(77); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + starpu_malloc((void **)&arr2d, NX*NY*sizeof(int)); + generate_matrix_data(arr2d, NX, NY, NX); + + unsigned nn[2] = {NX, NY}; + unsigned ldn[2] = {1, NX}; + + /* Declare data to StarPU */ + starpu_ndim_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)arr2d, ldn, nn, 2, sizeof(int)); + FPRINTF(stderr, "IN 2-dim Array: \n"); + print_2dim_data(handle); + + /* Partition the 2-dim array in PARTS sub-matrices */ + struct starpu_data_filter f = + { + .filter_func = starpu_ndim_filter_to_matrix, + .filter_arg = 1, //Partition the array along Y dimension + .nchildren = PARTS, + /* the children use a matrix interface*/ + .get_child_ops = starpu_ndim_filter_to_matrix_child_ops + }; + starpu_data_partition(handle, &f); + + FPRINTF(stderr,"Nb of partitions : %d\n",starpu_data_get_nb_children(handle)); + + for(i=0 ; icl = &cl; + task->synchronous = 1; + task->callback_func = NULL; + task->handles[0] = matrix_handle; + task->cl_arg = &factor; + task->cl_arg_size = sizeof(factor); + + ret = starpu_task_submit(task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + /* Print result matrix */ + FPRINTF(stderr, "OUT Matrix %d: \n", i); + print_matrix_data(matrix_handle); + } + + /* Unpartition the data, unregister it from StarPU and shutdown */ + starpu_data_unpartition(handle, STARPU_MAIN_RAM); + FPRINTF(stderr,"OUT 2-dim Array: \n"); + print_2dim_data(handle); + starpu_data_unregister(handle); + + starpu_free_noflag(arr2d, NX*NY*sizeof(int)); + starpu_shutdown(); + + return 0; + +enodev: + starpu_shutdown(); + return 77; +} diff --git a/examples/filters/fndim_to_tensor.c b/examples/filters/fndim_to_tensor.c new file mode 100644 index 0000000..aea89ef --- /dev/null +++ b/examples/filters/fndim_to_tensor.c @@ -0,0 +1,137 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +#define NX 6 +#define NY 5 +#define NZ 4 +#define NT 3 +#define PARTS 2 + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +extern void tensor_cpu_func(void *buffers[], void *cl_arg); + +#ifdef STARPU_USE_CUDA +extern void tensor_cuda_func(void *buffers[], void *cl_arg); +#endif + +#ifdef STARPU_USE_HIP +extern void tensor_hip_func(void *buffers[], void *cl_arg); +#endif + +extern void generate_tensor_data(int *tensor, int nx, int ny, int nz, int nt, unsigned ldy, unsigned ldz, unsigned ldt); +extern void print_4dim_data(starpu_data_handle_t ndim_handle); +extern void print_tensor_data(starpu_data_handle_t tensor_handle); + +int main(void) +{ + int *arr4d; + int i, j, k, l; + int ret; + + starpu_data_handle_t handle; + struct starpu_codelet cl = + { + .cpu_funcs = {tensor_cpu_func}, + .cpu_funcs_name = {"tensor_cpu_func"}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {tensor_cuda_func}, + .cuda_flags = {STARPU_CUDA_ASYNC}, +#endif +#ifdef STARPU_USE_HIP + .hip_funcs = {tensor_hip_func}, + .hip_flags = {STARPU_HIP_ASYNC}, +#endif + .nbuffers = 1, + .modes = {STARPU_RW}, + .name = "arr4d_to_tensor_scal" + }; + + ret = starpu_init(NULL); + if (ret == -ENODEV) + exit(77); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + starpu_malloc((void **)&arr4d, NX*NY*NZ*NT*sizeof(int)); + assert(arr4d); + generate_tensor_data(arr4d, NX, NY, NZ, NT, NX, NX*NY, NX*NY*NZ); + + unsigned nn[4] = {NX, NY, NZ, NT}; + unsigned ldn[4] = {1, NX, NX*NY, NX*NY*NZ}; + + /* Declare data to StarPU */ + starpu_ndim_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)arr4d, ldn, nn, 4, sizeof(int)); + FPRINTF(stderr, "IN 4-dim Array: \n"); + print_4dim_data(handle); + + /* Partition the 4-dim array in PARTS sub-tensors */ + struct starpu_data_filter f = + { + .filter_func = starpu_ndim_filter_to_tensor, + .filter_arg = 0, //Partition the array along X dimension + .nchildren = PARTS, + /* the children use a tensor interface*/ + .get_child_ops = starpu_ndim_filter_to_tensor_child_ops + }; + starpu_data_partition(handle, &f); + + FPRINTF(stderr,"Nb of partitions : %d\n",starpu_data_get_nb_children(handle)); + + for(i=0 ; icl = &cl; + task->synchronous = 1; + task->callback_func = NULL; + task->handles[0] = tensor_handle; + task->cl_arg = &multiplier; + task->cl_arg_size = sizeof(multiplier); + + ret = starpu_task_submit(task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + /* Print result tensor*/ + FPRINTF(stderr, "OUT Tensor %d: \n", i); + print_tensor_data(tensor_handle); + } + + /* Unpartition the data, unregister it from StarPU and shutdown */ + starpu_data_unpartition(handle, STARPU_MAIN_RAM); + FPRINTF(stderr, "OUT 4-dim Array: \n"); + print_4dim_data(handle); + starpu_data_unregister(handle); + + starpu_free_noflag(arr4d, NX*NY*NZ*NT*sizeof(int)); + + starpu_shutdown(); + return 0; + +enodev: + FPRINTF(stderr, "WARNING: No one can execute this task\n"); + starpu_shutdown(); + return 77; +} diff --git a/examples/filters/fndim_to_variable.c b/examples/filters/fndim_to_variable.c new file mode 100644 index 0000000..974f8ca --- /dev/null +++ b/examples/filters/fndim_to_variable.c @@ -0,0 +1,119 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2010-2010 Mehdi Juhoor + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +#define PARTS 1 + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +void cpu_func(void *buffers[], void *cl_arg) +{ + int *factor = (int *) cl_arg; + + /* local copy of the variable pointer */ + int *val = (int *)STARPU_VARIABLE_GET_PTR(buffers[0]); + + *val *= *factor; +} + +int main(void) +{ + int i; + int arr0d; + starpu_data_handle_t handle; + int factor = 10; + int ret; + + struct starpu_codelet cl = + { + .cpu_funcs = {cpu_func}, + .cpu_funcs_name = {"cpu_func"}, + .nbuffers = 1, + .modes = {STARPU_RW}, + .name = "arr0d_to_variable_scal" + }; + + FPRINTF(stderr,"IN 0-dim Array: \n"); + arr0d = 1; + FPRINTF(stderr, "%5d ", arr0d); + FPRINTF(stderr,"\n"); + + ret = starpu_init(NULL); + if (ret == -ENODEV) + exit(77); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + /* Declare data to StarPU */ + starpu_ndim_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)&arr0d, NULL, NULL, 0, sizeof(int)); + + /* Transfer the 0-dim array to a variable */ + struct starpu_data_filter f = + { + .filter_func = starpu_ndim_filter_to_variable, + .nchildren = PARTS, + /* the children use a variable interface*/ + .get_child_ops = starpu_ndim_filter_to_variable_child_ops + }; + starpu_data_partition(handle, &f); + + /* Submit a task on the variable */ + for (i=0; ihandles[0] = variable_handle; + task->cl = &cl; + task->synchronous = 1; + task->cl_arg = &factor; + task->cl_arg_size = sizeof(factor); + + ret = starpu_task_submit(task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + /* Print result variable */ + FPRINTF(stderr,"OUT Variable %d: \n", i); + starpu_data_acquire(variable_handle, STARPU_R); + FPRINTF(stderr, "%5d ", *variable); + starpu_data_release(variable_handle); + FPRINTF(stderr,"\n"); + } + + starpu_data_unpartition(handle, STARPU_MAIN_RAM); + starpu_data_unregister(handle); + starpu_shutdown(); + + FPRINTF(stderr,"OUT 0-dim Array: \n"); + FPRINTF(stderr, "%5d ", arr0d); + FPRINTF(stderr,"\n"); + + return 0; + +enodev: + FPRINTF(stderr, "WARNING: No one can execute this task\n"); + starpu_shutdown(); + return 77; +} diff --git a/examples/filters/fndim_to_vector.c b/examples/filters/fndim_to_vector.c new file mode 100644 index 0000000..686adb0 --- /dev/null +++ b/examples/filters/fndim_to_vector.c @@ -0,0 +1,140 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +#define NX 21 +#define PARTS 3 + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +extern void vector_cpu_func(void *buffers[], void *cl_arg); + +#ifdef STARPU_USE_CUDA +extern void vector_cuda_func(void *buffers[], void *cl_arg); +#endif +#ifdef STARPU_USE_HIP +extern void vector_hip_func(void *buffers[], void *cl_arg); +#endif + +int main(void) +{ + int i, j; + int *arr1d; + int factor = 10; + int ret; + + starpu_data_handle_t handle; + struct starpu_codelet cl = + { + .cpu_funcs = {vector_cpu_func}, + .cpu_funcs_name = {"vector_cpu_func"}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {vector_cuda_func}, + .cuda_flags = {STARPU_CUDA_ASYNC}, +#endif +#ifdef STARPU_USE_HIP + .hip_funcs = {vector_hip_func}, + .hip_flags = {STARPU_HIP_ASYNC}, +#endif + .nbuffers = 1, + .modes = {STARPU_RW}, + .name = "arr1d_to_vector_scal" + }; + + ret = starpu_init(NULL); + if (ret == -ENODEV) + return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + starpu_malloc((void **)&arr1d, NX*sizeof(int)); + FPRINTF(stderr,"IN 1-dim Array: \n"); + for(i=0 ; ihandles[0] = vector_handle; + task->cl = &cl; + task->synchronous = 1; + task->cl_arg = &factor; + task->cl_arg_size = sizeof(factor); + + ret = starpu_task_submit(task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + /* Print result vector */ + FPRINTF(stderr,"OUT Vector %d: \n", i); + starpu_data_acquire(vector_handle, STARPU_R); + for(j=0 ; j + +#define NX 20 +#define PARTS 2 + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +void display_func(void *buffers[], void *cl_arg) +{ + unsigned i; + + /* length of the vector */ + unsigned n = STARPU_VECTOR_GET_NX(buffers[0]); + /* local copy of the vector pointer */ + int *val = (int *)STARPU_VECTOR_GET_PTR(buffers[0]); + + FPRINTF(stderr, "vector with n=%u : ", n); + for (i = 0; i < n; i++) + FPRINTF(stderr, "%5d ", val[i]); + FPRINTF(stderr, "\n"); +} + +void cpu_func(void *buffers[], void *cl_arg) +{ + unsigned i; + + /* length of the vector */ + unsigned n = STARPU_VECTOR_GET_NX(buffers[0]); + /* local copy of the vector pointer */ + int *val = (int *)STARPU_VECTOR_GET_PTR(buffers[0]); + + FPRINTF(stderr, "computing on vector with n=%u\n", n); + for (i = 0; i < n; i++) + val[i] *= 2; +} + +int main(void) +{ + int i; + int vector[NX]; + starpu_data_handle_t handle; + starpu_data_handle_t subhandles[PARTS]; + int ret; + + struct starpu_codelet cl = + { + .cpu_funcs = {cpu_func}, + .cpu_funcs_name = {"cpu_func"}, + .nbuffers = 1, + .modes = {STARPU_RW}, + .name = "vector_scal" + }; + struct starpu_codelet print_cl = + { + .cpu_funcs = {display_func}, + .cpu_funcs_name = {"display_func"}, + .nbuffers = 1, + .modes = {STARPU_R}, + .name = "vector_display" + }; + + for(i=0 ; i + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +void cpu_codelet(void *buffers[], void *cl_arg) +{ + unsigned i, j; + int factor; + + starpu_codelet_unpack_args(cl_arg, &factor, 0); + /* length of the matrix */ + unsigned nx = STARPU_MATRIX_GET_NX(buffers[0]); + unsigned ny = STARPU_MATRIX_GET_NY(buffers[0]); + unsigned ld = STARPU_MATRIX_GET_LD(buffers[0]); + /* local copy of the matrix pointer */ + int *val = (int *)STARPU_MATRIX_GET_PTR(buffers[0]); + + FPRINTF(stderr, "computing on matrix with nx=%u, ny=%u, ld=%u\n", nx, ny, ld); + for(j=0; j + +#define NX 6 +#define NY 5 +#define NZ 4 +#define NT 3 +#define PARTS 2 + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +extern void tensor_cpu_func(void *buffers[], void *cl_arg); + +#ifdef STARPU_USE_CUDA +extern void tensor_cuda_func(void *buffers[], void *cl_arg); +#endif +#ifdef STARPU_USE_HIP +extern void tensor_hip_func(void *buffers[], void *cl_arg); +#endif + + +extern void generate_tensor_data(int *tensor, int nx, int ny, int nz, int nt, unsigned ldy, unsigned ldz, unsigned ldt); +extern void print_tensor(int *tensor, int nx, int ny, int nz, int nt, unsigned ldy, unsigned ldz, unsigned ldt); +extern void print_tensor_data(starpu_data_handle_t tensor_handle); + +int main(void) +{ + int *tensor; + int i, j, k, l; + int ret; + + starpu_data_handle_t handle; + struct starpu_codelet cl = + { + .cpu_funcs = {tensor_cpu_func}, + .cpu_funcs_name = {"tensor_cpu_func"}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {tensor_cuda_func}, + .cuda_flags = {STARPU_CUDA_ASYNC}, +#endif +#ifdef STARPU_USE_HIP + .hip_funcs = {tensor_hip_func}, + .hip_flags = {STARPU_HIP_ASYNC}, +#endif + .nbuffers = 1, + .modes = {STARPU_RW}, + .name = "tensor_scal" + }; + + ret = starpu_init(NULL); + if (ret == -ENODEV) + exit(77); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + starpu_malloc((void **)&tensor, NX*NY*NZ*NT*sizeof(int)); + assert(tensor); + generate_tensor_data(tensor, NX, NY, NZ, NT, NX, NX*NY, NX*NY*NZ); + + /* Declare data to StarPU */ + starpu_tensor_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)tensor, NX, NX*NY, NX*NY*NZ, NX, NY, NZ, NT, sizeof(int)); + FPRINTF(stderr, "IN Tensor\n"); + print_tensor_data(handle); + + /* Partition the tensor in PARTS sub-tensors */ + struct starpu_data_filter f = + { + .filter_func = starpu_tensor_filter_block, + .nchildren = PARTS + }; + starpu_data_partition(handle, &f); + + FPRINTF(stderr,"Nb of partitions : %d\n",starpu_data_get_nb_children(handle)); + + for(i=0 ; icl = &cl; + task->synchronous = 1; + task->callback_func = NULL; + task->handles[0] = starpu_data_get_sub_data(handle, 1, i); + task->cl_arg = &multiplier; + task->cl_arg_size = sizeof(multiplier); + + ret = starpu_task_submit(task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + /* Unpartition the data, unregister it from StarPU and shutdown */ + starpu_data_unpartition(handle, STARPU_MAIN_RAM); + print_tensor_data(handle); + starpu_data_unregister(handle); + + /* Print result tensor */ + FPRINTF(stderr, "OUT Tensor\n"); + print_tensor(tensor, NX, NY, NZ, NT, NX, NX*NY, NX*NY*NZ); + + starpu_free_noflag(tensor, NX*NY*NZ*NT*sizeof(int)); + + starpu_shutdown(); + return 0; + +enodev: + FPRINTF(stderr, "WARNING: No one can execute this task\n"); + starpu_shutdown(); + return 77; +} diff --git a/examples/filters/ftensor_cpu.c b/examples/filters/ftensor_cpu.c new file mode 100644 index 0000000..a8c03ce --- /dev/null +++ b/examples/filters/ftensor_cpu.c @@ -0,0 +1,46 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* dumb kernel to fill a 4D matrix */ + +#include + +void tensor_cpu_func(void *buffers[], void *cl_arg) +{ + int i, j, k, l; + int *factor = (int *) cl_arg; + int *tensor = (int *)STARPU_TENSOR_GET_PTR(buffers[0]); + int nx = (int)STARPU_TENSOR_GET_NX(buffers[0]); + int ny = (int)STARPU_TENSOR_GET_NY(buffers[0]); + int nz = (int)STARPU_TENSOR_GET_NZ(buffers[0]); + int nt = (int)STARPU_TENSOR_GET_NT(buffers[0]); + unsigned ldy = STARPU_TENSOR_GET_LDY(buffers[0]); + unsigned ldz = STARPU_TENSOR_GET_LDZ(buffers[0]); + unsigned ldt = STARPU_TENSOR_GET_LDT(buffers[0]); + + for(l=0; l + +static __global__ void ftensor_cuda(int *tensor, int nx, int ny, int nz, int nt, unsigned ldy, unsigned ldz, unsigned ldt, float factor) +{ + int i, j, k, l; + + for(l=0; l>>(tensor, nx, ny, nz, nt, ldy, ldz, ldt, *factor); + cudaError_t status = cudaGetLastError(); + if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status); +} diff --git a/examples/filters/ftensor_hip.hip b/examples/filters/ftensor_hip.hip new file mode 100644 index 0000000..c3cf7a6 --- /dev/null +++ b/examples/filters/ftensor_hip.hip @@ -0,0 +1,53 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* dumb HIP kernel to fill a 4D matrix */ + +#include + +static __global__ void ftensor_hip(int *tensor, int nx, int ny, int nz, int nt, unsigned ldy, unsigned ldz, unsigned ldt, float factor) +{ + int i, j, k, l; + + for(l=0; l + +#define NX 6 +#define NY 5 +#define NZ 4 +#define NT 3 +#define PARTS 2 +#define POS 1 + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +extern void block_cpu_func(void *buffers[], void *cl_arg); + +#ifdef STARPU_USE_CUDA +extern void block_cuda_func(void *buffers[], void *cl_arg); +#endif + +#ifdef STARPU_USE_HIP +extern void block_hip_func(void *buffers[], void *cl_arg); +#endif + +extern void generate_tensor_data(int *tensor, int nx, int ny, int nz, int nt, unsigned ldy, unsigned ldz, unsigned ldt); +extern void print_tensor_data(starpu_data_handle_t tensor_handle); +extern void print_block_data(starpu_data_handle_t block_handle); + +int main(void) +{ + int *tensor; + int i, j, k, l; + int ret; + int factor = 2; + + starpu_data_handle_t handle; + struct starpu_codelet cl = + { + .cpu_funcs = {block_cpu_func}, + .cpu_funcs_name = {"block_cpu_func"}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {block_cuda_func}, + .cuda_flags = {STARPU_CUDA_ASYNC}, +#endif +#ifdef STARPU_USE_HIP + .hip_funcs = {block_hip_func}, + .hip_flags = {STARPU_HIP_ASYNC}, +#endif + .nbuffers = 1, + .modes = {STARPU_RW}, + .name = "tensor_pick_block_scal" + }; + + ret = starpu_init(NULL); + if (ret == -ENODEV) + return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + starpu_malloc((void **)&tensor, NX*NY*NZ*NT*sizeof(int)); + assert(tensor); + generate_tensor_data(tensor, NX, NY, NZ, NT, NX, NX*NY, NX*NY*NZ); + + /* Declare data to StarPU */ + starpu_tensor_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)tensor, NX, NX*NY, NX*NY*NZ, NX, NY, NZ, NT, sizeof(int)); + FPRINTF(stderr, "IN Tensor: \n"); + print_tensor_data(handle); + + /* Partition the tensor in PARTS sub-blocks */ + struct starpu_data_filter f = + { + .filter_func = starpu_tensor_filter_pick_block_z, + .filter_arg_ptr = (void*)(uintptr_t) POS, + .nchildren = PARTS, + /* the children use a block interface*/ + .get_child_ops = starpu_tensor_filter_pick_block_child_ops + }; + starpu_data_partition(handle, &f); + + FPRINTF(stderr,"Nb of partitions : %d\n",starpu_data_get_nb_children(handle)); + + for(i=0 ; icl = &cl; + task->synchronous = 1; + task->callback_func = NULL; + task->handles[0] = block_handle; + task->cl_arg = &factor; + task->cl_arg_size = sizeof(factor); + + ret = starpu_task_submit(task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + /* Print result block */ + FPRINTF(stderr, "OUT Block %d: \n", i); + print_block_data(block_handle); + } + + /* Unpartition the data, unregister it from StarPU and shutdown */ + starpu_data_unpartition(handle, STARPU_MAIN_RAM); + FPRINTF(stderr, "OUT Tensor: \n"); + print_tensor_data(handle); + starpu_data_unregister(handle); + + starpu_free_noflag(tensor, NX*NY*NZ*NT*sizeof(int)); + + starpu_shutdown(); + return 0; + +enodev: + starpu_shutdown(); + return 77; +} diff --git a/examples/filters/ftensor_pick_variable.c b/examples/filters/ftensor_pick_variable.c new file mode 100644 index 0000000..dbdd886 --- /dev/null +++ b/examples/filters/ftensor_pick_variable.c @@ -0,0 +1,137 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2021-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +#define NX 6 +#define NY 5 +#define NZ 4 +#define NT 3 + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +void cpu_func(void *buffers[], void *cl_arg) +{ + int *factor = (int *) cl_arg; + + /* local copy of the variable pointer */ + int *val = (int *)STARPU_VARIABLE_GET_PTR(buffers[0]); + + *val *= *factor; +} + +#ifdef STARPU_USE_CUDA +extern void variable_cuda_func(void *buffers[], void *cl_arg); +#endif + +extern void generate_tensor_data(int *tensor, int nx, int ny, int nz, int nt, unsigned ldy, unsigned ldz, unsigned ldt); +extern void print_tensor_data(starpu_data_handle_t tensor_handle); + +int main(void) +{ + int *tensor; + int i, j, k, l; + int ret; + int factor = 2; + uint32_t pos[4] = {1,2,1,2}; + + starpu_data_handle_t handle; + starpu_data_handle_t var_handle; + + struct starpu_codelet cl = + { + .cpu_funcs = {cpu_func}, + .cpu_funcs_name = {"cpu_func"}, + #ifdef STARPU_USE_CUDA + .cuda_funcs = {variable_cuda_func}, + .cuda_flags = {STARPU_CUDA_ASYNC}, + #endif + .nbuffers = 1, + .modes = {STARPU_RW}, + .name = "tensor_pick_variable_scal" + }; + + ret = starpu_init(NULL); + if (ret == -ENODEV) + return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + starpu_malloc((void **)&tensor, NX*NY*NZ*NT*sizeof(int)); + assert(tensor); + generate_tensor_data(tensor, NX, NY, NZ, NT, NX, NX*NY, NX*NY*NZ); + + /* Declare data to StarPU */ + starpu_tensor_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)tensor, NX, NX*NY, NX*NY*NZ, NX, NY, NZ, NT, sizeof(int)); + FPRINTF(stderr, "IN Tensor: \n"); + print_tensor_data(handle); + + /* Pick a variable in the tensor */ + struct starpu_data_filter f_var = + { + .filter_func = starpu_tensor_filter_pick_variable, + .filter_arg_ptr = (void*)pos, + .nchildren = 1, + /* the children use a variable interface*/ + .get_child_ops = starpu_tensor_filter_pick_variable_child_ops + }; + + starpu_data_partition_plan(handle, &f_var, &var_handle); + + FPRINTF(stderr, "Sub Variable:\n"); + int *variable = (int *)starpu_variable_get_local_ptr(var_handle); + starpu_data_acquire(var_handle, STARPU_R); + FPRINTF(stderr, "%5d ", *variable); + starpu_data_release(var_handle); + FPRINTF(stderr,"\n"); + + /* Submit the task */ + struct starpu_task *task = starpu_task_create(); + + FPRINTF(stderr,"Dealing with sub-variable\n"); + task->handles[0] = var_handle; + task->cl = &cl; + task->synchronous = 1; + task->cl_arg = &factor; + task->cl_arg_size = sizeof(factor); + + ret = starpu_task_submit(task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + /* Print result variable */ + FPRINTF(stderr,"OUT Variable:\n"); + starpu_data_acquire(var_handle, STARPU_R); + FPRINTF(stderr, "%5d ", *variable); + starpu_data_release(var_handle); + FPRINTF(stderr,"\n"); + + starpu_data_partition_clean(handle, 1, &var_handle); + + /* Unpartition the data, unregister it from StarPU and shutdown */ + //starpu_data_unpartition(handle, STARPU_MAIN_RAM); + FPRINTF(stderr, "OUT Tensor: \n"); + print_tensor_data(handle); + starpu_data_unregister(handle); + + starpu_free_noflag(tensor, NX*NY*NZ*NT*sizeof(int)); + + starpu_shutdown(); + return 0; + +enodev: + starpu_shutdown(); + return 77; +} diff --git a/examples/filters/ftensor_print.c b/examples/filters/ftensor_print.c new file mode 100644 index 0000000..95c6a5f --- /dev/null +++ b/examples/filters/ftensor_print.c @@ -0,0 +1,87 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +void print_tensor(int *tensor, int nx, int ny, int nz, int nt, unsigned ldy, unsigned ldz, unsigned ldt) +{ + int i, j, k, l; + FPRINTF(stderr, "tensor=%p nx=%d ny=%d nz=%d nt=%d ldy=%u ldz=%u ldt=%u\n", tensor, nx, ny, nz, nt, ldy, ldz, ldt); + for(l=0 ; l + +static __global__ void fvariable_cuda(int *val, int factor) +{ + *val *= factor; +} + +extern "C" void variable_cuda_func(void *buffers[], void *_args) +{ + int *factor = (int *)_args; + int *val = (int *) STARPU_VARIABLE_GET_PTR(buffers[0]); + + fvariable_cuda<<<1,1, 0, starpu_cuda_get_local_stream()>>>(val, *factor); + cudaError_t status = cudaGetLastError(); + if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status); +} diff --git a/examples/filters/fvector.c b/examples/filters/fvector.c new file mode 100644 index 0000000..ec0b97a --- /dev/null +++ b/examples/filters/fvector.c @@ -0,0 +1,121 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2010-2010 Mehdi Juhoor + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * This examplifies how to use partitioning filters. We here just split a + * vector into slices, and run a dumb kernel on them. + */ + +#include + +#define NX 21 +#define PARTS 3 + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +extern void vector_cpu_func(void *buffers[], void *cl_arg); + +#ifdef STARPU_USE_CUDA +extern void vector_cuda_func(void *buffers[], void *cl_arg); +#endif +#ifdef STARPU_USE_HIP +extern void vector_hip_func(void *buffers[], void *cl_arg); +#endif + +int main(void) +{ + int i; + int* vector; + starpu_data_handle_t handle; + int factor=1; + int ret; + + struct starpu_codelet cl = + { + .cpu_funcs = {vector_cpu_func}, + .cpu_funcs_name = {"vector_cpu_func"}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {vector_cuda_func}, + .cuda_flags = {STARPU_CUDA_ASYNC}, +#endif +#ifdef STARPU_USE_HIP + .hip_funcs = {vector_hip_func}, + .hip_flags = {STARPU_HIP_ASYNC}, +#endif + .nbuffers = 1, + .modes = {STARPU_RW}, + .name = "vector_scal" + }; + + ret = starpu_init(NULL); + if (ret == -ENODEV) + exit(77); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + starpu_malloc((void **)&vector, NX*sizeof(int)); + for(i=0 ; ihandles[0] = sub_handle; + task->cl = &cl; + task->synchronous = 1; + task->cl_arg = &factor; + task->cl_arg_size = sizeof(factor); + + ret = starpu_task_submit(task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + /* Unpartition the data, unregister it from StarPU and shutdown */ + starpu_data_unpartition(handle, STARPU_MAIN_RAM); + starpu_data_unregister(handle); + + FPRINTF(stderr,"OUT Vector: "); + for(i=0 ; i + +void vector_cpu_func(void *buffers[], void *cl_arg) +{ + int i; + int *factor = (int *) cl_arg; + + /* length of the vector */ + int n = (int)STARPU_VECTOR_GET_NX(buffers[0]); + /* local copy of the vector pointer */ + int *vector = (int *)STARPU_VECTOR_GET_PTR(buffers[0]); + + for (i = 0; i < n; i++) + vector[i] *= *factor; +} + diff --git a/examples/filters/fvector_cuda.cu b/examples/filters/fvector_cuda.cu new file mode 100644 index 0000000..38d4db1 --- /dev/null +++ b/examples/filters/fvector_cuda.cu @@ -0,0 +1,37 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* dumb CUDA kernel to fill a 1D matrix */ + +#include + +static __global__ void fvector_cuda(int *vector, int n, float factor) +{ + int i; + for (i = 0; i < n; i++) + vector[i] *= factor; +} + +extern "C" void vector_cuda_func(void *buffers[], void *_args) +{ + int *factor = (int *)_args; + int *vector = (int *)STARPU_VECTOR_GET_PTR(buffers[0]); + int n = (int)STARPU_VECTOR_GET_NX(buffers[0]); + + fvector_cuda<<<1,1, 0, starpu_cuda_get_local_stream()>>>(vector, n, *factor); + cudaError_t status = cudaGetLastError(); + if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status); +} diff --git a/examples/filters/fvector_hip.hip b/examples/filters/fvector_hip.hip new file mode 100644 index 0000000..99c0185 --- /dev/null +++ b/examples/filters/fvector_hip.hip @@ -0,0 +1,37 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* dumb HIP kernel to fill a 1D matrix */ + +#include + +static __global__ void fvector_hip(int *vector, int n, float factor) +{ + int i; + for (i = 0; i < n; i++) + vector[i] *= factor; +} + +extern "C" void vector_hip_func(void *buffers[], void *_args) +{ + int *factor = (int *)_args; + int *vector = (int *)STARPU_VECTOR_GET_PTR(buffers[0]); + int n = (int)STARPU_VECTOR_GET_NX(buffers[0]); + + hipLaunchKernelGGL(fvector_hip, 1, 1, 0, starpu_hip_get_local_stream(), vector, n, *factor); + hipError_t status = hipGetLastError(); + if (status != hipSuccess) STARPU_HIP_REPORT_ERROR(status); +} diff --git a/examples/filters/fvector_pick_variable.c b/examples/filters/fvector_pick_variable.c new file mode 100644 index 0000000..9937be7 --- /dev/null +++ b/examples/filters/fvector_pick_variable.c @@ -0,0 +1,129 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2010-2010 Mehdi Juhoor + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +#define NX 21 +#define PARTS 3 +#define POS 5 + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +void cpu_func(void *buffers[], void *cl_arg) +{ + int *factor = (int *) cl_arg; + + /* local copy of the variable pointer */ + int *val = (int *)STARPU_VARIABLE_GET_PTR(buffers[0]); + + *val *= *factor; +} + +int main(void) +{ + int i; + int* vector; + starpu_data_handle_t handle; + int factor = 10; + int ret; + + struct starpu_codelet cl = + { + .cpu_funcs = {cpu_func}, + .cpu_funcs_name = {"cpu_func"}, + .nbuffers = 1, + .modes = {STARPU_RW}, + .name = "vector_pick_variable_scal" + }; + + ret = starpu_init(NULL); + if (ret == -ENODEV) + exit(77); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + starpu_malloc((void **)&vector, NX*sizeof(int)); + FPRINTF(stderr,"IN Vector: \n"); + for(i=0 ; ihandles[0] = sub_handle; + task->cl = &cl; + task->synchronous = 1; + task->cl_arg = &factor; + task->cl_arg_size = sizeof(factor); + + ret = starpu_task_submit(task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + /* Print result variable */ + FPRINTF(stderr,"OUT Variable %d: \n", i); + starpu_data_acquire(sub_handle, STARPU_R); + FPRINTF(stderr, "%5d ", *variable); + starpu_data_release(sub_handle); + FPRINTF(stderr,"\n"); + } + + starpu_data_unpartition(handle, STARPU_MAIN_RAM); + starpu_data_unregister(handle); + + FPRINTF(stderr,"OUT Vector: \n"); + for(i=0 ; i + +/* Shadow width */ +#define SHADOW 2 +#define NX 30 +#define PARTS 3 + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +void cpu_func(void *buffers[], void *cl_arg) +{ + (void)cl_arg; + unsigned i; + + /* length of the shadowed source vector */ + unsigned n = STARPU_VECTOR_GET_NX(buffers[0]); + /* local copy of the shadowed source vector pointer */ + int *val = (int *)STARPU_VECTOR_GET_PTR(buffers[0]); + + /* length of the destination vector */ + unsigned n2 = STARPU_VECTOR_GET_NX(buffers[1]); + /* local copy of the destination vector pointer */ + int *val2 = (int *)STARPU_VECTOR_GET_PTR(buffers[1]); + + /* If things go right, sizes should match */ + STARPU_ASSERT(n == n2); + for (i = 0; i < n; i++) + val2[i] = val[i]; +} + +#ifdef STARPU_USE_CUDA +void cuda_func(void *buffers[], void *cl_arg) +{ + (void)cl_arg; + /* length of the shadowed source vector */ + unsigned n = STARPU_VECTOR_GET_NX(buffers[0]); + /* local copy of the shadowed source vector pointer */ + int *val = (int *)STARPU_VECTOR_GET_PTR(buffers[0]); + + /* length of the destination vector */ + unsigned n2 = STARPU_VECTOR_GET_NX(buffers[1]); + /* local copy of the destination vector pointer */ + int *val2 = (int *)STARPU_VECTOR_GET_PTR(buffers[1]); + + /* If things go right, sizes should match */ + STARPU_ASSERT(n == n2); + cudaMemcpyAsync(val2, val, n*sizeof(*val), cudaMemcpyDeviceToDevice, starpu_cuda_get_local_stream()); +} +#endif + +int main(void) +{ + unsigned j; + int vector[NX + 2*SHADOW]; + int vector2[NX + PARTS*2*SHADOW]; + starpu_data_handle_t handle, handle2; + int ret, i; + + struct starpu_codelet cl = + { + .cpu_funcs = {cpu_func}, + .cpu_funcs_name = {"cpu_func"}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {cuda_func}, + .cuda_flags = {STARPU_CUDA_ASYNC}, +#endif + .nbuffers = 2, + .modes = {STARPU_R, STARPU_W} + }; + + for(i=0 ; ihandles[0] = sub_handle; + task->handles[1] = sub_handle2; + task->cl = &cl; + task->synchronous = 1; + + ret = starpu_task_submit(task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + starpu_data_unpartition(handle, STARPU_MAIN_RAM); + starpu_data_unpartition(handle2, STARPU_MAIN_RAM); + starpu_data_unregister(handle); + starpu_data_unregister(handle2); + starpu_shutdown(); + + FPRINTF(stderr,"OUT Vector: "); + for(i=0 ; i + +/* Shadow width */ +#define SHADOWX 3 +#define SHADOWY 2 +#define NX 20 +#define NY 30 +#define PARTSX 2 +#define PARTSY 3 + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +void cpu_func(void *buffers[], void *cl_arg) +{ + (void)cl_arg; + /* length of the shadowed source matrix */ + unsigned ld = STARPU_MATRIX_GET_LD(buffers[0]); + unsigned n = STARPU_MATRIX_GET_NX(buffers[0]); + unsigned m = STARPU_MATRIX_GET_NY(buffers[0]); + /* local copy of the shadowed source matrix pointer */ + int *val = (int *)STARPU_MATRIX_GET_PTR(buffers[0]); + + /* length of the destination matrix */ + unsigned ld2 = STARPU_MATRIX_GET_LD(buffers[1]); + unsigned n2 = STARPU_MATRIX_GET_NX(buffers[1]); + unsigned m2 = STARPU_MATRIX_GET_NY(buffers[1]); + /* local copy of the destination matrix pointer */ + int *val2 = (int *)STARPU_MATRIX_GET_PTR(buffers[1]); + + unsigned i, j; + + /* If things go right, sizes should match */ + STARPU_ASSERT(n == n2); + STARPU_ASSERT(m == m2); + for (j = 0; j < m; j++) + for (i = 0; i < n; i++) + val2[j*ld2+i] = val[j*ld+i]; +} + +#ifdef STARPU_USE_CUDA +void cuda_func(void *buffers[], void *cl_arg) +{ + (void)cl_arg; + cudaError_t cures; + + /* length of the shadowed source matrix */ + unsigned ld = STARPU_MATRIX_GET_LD(buffers[0]); + unsigned n = STARPU_MATRIX_GET_NX(buffers[0]); + unsigned m = STARPU_MATRIX_GET_NY(buffers[0]); + /* local copy of the shadowed source matrix pointer */ + int *val = (int *)STARPU_MATRIX_GET_PTR(buffers[0]); + + /* length of the destination matrix */ + unsigned ld2 = STARPU_MATRIX_GET_LD(buffers[1]); + unsigned n2 = STARPU_MATRIX_GET_NX(buffers[1]); + unsigned m2 = STARPU_MATRIX_GET_NY(buffers[1]); + /* local copy of the destination matrix pointer */ + int *val2 = (int *)STARPU_MATRIX_GET_PTR(buffers[1]); + + /* If things go right, sizes should match */ + STARPU_ASSERT(n == n2); + STARPU_ASSERT(m == m2); + cures = cudaMemcpy2DAsync(val2, ld2*sizeof(*val2), val, ld*sizeof(*val), n*sizeof(*val), m, cudaMemcpyDeviceToDevice, starpu_cuda_get_local_stream()); + if (STARPU_UNLIKELY(cures)) STARPU_CUDA_REPORT_ERROR(cures); +} +#endif + +int main(void) +{ + unsigned i, j, k, l; + int matrix[NY + 2*SHADOWY][NX + 2*SHADOWX]; + int matrix2[NY + PARTSY*2*SHADOWY][NX + PARTSX*2*SHADOWX]; + starpu_data_handle_t handle, handle2; + int ret; + + struct starpu_codelet cl = + { + .cpu_funcs = {cpu_func}, + .cpu_funcs_name = {"cpu_func"}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {cuda_func}, + .cuda_flags = {STARPU_CUDA_ASYNC}, +#endif + .nbuffers = 2, + .modes = {STARPU_R, STARPU_W} + }; + + memset(matrix, -1, sizeof(matrix)); + for(j=1 ; j<=NY ; j++) + for(i=1 ; i<=NX ; i++) + matrix[SHADOWY+j-1][SHADOWX+i-1] = i+j; + + /* Copy borders */ + for (j = SHADOWY ; jhandles[0] = sub_handle; + task->handles[1] = sub_handle2; + task->cl = &cl; + task->synchronous = 1; + + ret = starpu_task_submit(task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + } + + starpu_data_unpartition(handle, STARPU_MAIN_RAM); + starpu_data_unpartition(handle2, STARPU_MAIN_RAM); + starpu_data_unregister(handle); + starpu_data_unregister(handle2); + starpu_shutdown(); + + FPRINTF(stderr,"OUT Matrix:\n"); + for(j=0 ; j + +/* Shadow width */ +#define SHADOWX 2 +#define SHADOWY 3 +#define SHADOWZ 4 +#define NX 12 +#define NY 9 +#define NZ 6 +#define PARTSX 4 +#define PARTSY 3 +#define PARTSZ 2 + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +void cpu_func(void *buffers[], void *cl_arg) +{ + (void)cl_arg; + /* length of the shadowed source matrix */ + unsigned ldy = STARPU_BLOCK_GET_LDY(buffers[0]); + unsigned ldz = STARPU_BLOCK_GET_LDZ(buffers[0]); + unsigned x = STARPU_BLOCK_GET_NX(buffers[0]); + unsigned y = STARPU_BLOCK_GET_NY(buffers[0]); + unsigned z = STARPU_BLOCK_GET_NZ(buffers[0]); + /* local copy of the shadowed source matrix pointer */ + int *val = (int *)STARPU_BLOCK_GET_PTR(buffers[0]); + + /* length of the destination matrix */ + unsigned ldy2 = STARPU_BLOCK_GET_LDY(buffers[1]); + unsigned ldz2 = STARPU_BLOCK_GET_LDZ(buffers[1]); + unsigned x2 = STARPU_BLOCK_GET_NX(buffers[1]); + unsigned y2 = STARPU_BLOCK_GET_NY(buffers[1]); + unsigned z2 = STARPU_BLOCK_GET_NZ(buffers[1]); + /* local copy of the destination matrix pointer */ + int *val2 = (int *)STARPU_BLOCK_GET_PTR(buffers[1]); + + unsigned i, j, k; + + /* If things go right, sizes should match */ + STARPU_ASSERT(x == x2); + STARPU_ASSERT(y == y2); + STARPU_ASSERT(z == z2); + for (k = 0; k < z; k++) + for (j = 0; j < y; j++) + for (i = 0; i < x; i++) + val2[k*ldz2+j*ldy2+i] = val[k*ldz+j*ldy+i]; +} + +#ifdef STARPU_USE_CUDA +void cuda_func(void *buffers[], void *cl_arg) +{ + (void)cl_arg; + /* length of the shadowed source matrix */ + unsigned ldy = STARPU_BLOCK_GET_LDY(buffers[0]); + unsigned ldz = STARPU_BLOCK_GET_LDZ(buffers[0]); + unsigned x = STARPU_BLOCK_GET_NX(buffers[0]); + unsigned y = STARPU_BLOCK_GET_NY(buffers[0]); + unsigned z = STARPU_BLOCK_GET_NZ(buffers[0]); + /* local copy of the shadowed source matrix pointer */ + int *val = (int *)STARPU_BLOCK_GET_PTR(buffers[0]); + + /* length of the destination matrix */ + unsigned ldy2 = STARPU_BLOCK_GET_LDY(buffers[1]); + unsigned ldz2 = STARPU_BLOCK_GET_LDZ(buffers[1]); + unsigned x2 = STARPU_BLOCK_GET_NX(buffers[1]); + unsigned y2 = STARPU_BLOCK_GET_NY(buffers[1]); + unsigned z2 = STARPU_BLOCK_GET_NZ(buffers[1]); + /* local copy of the destination matrix pointer */ + int *val2 = (int *)STARPU_BLOCK_GET_PTR(buffers[1]); + + unsigned k; + cudaError_t cures; + + /* If things go right, sizes should match */ + STARPU_ASSERT(x == x2); + STARPU_ASSERT(y == y2); + STARPU_ASSERT(z == z2); + for (k = 0; k < z; k++) + { + cures = cudaMemcpy2DAsync(val2+k*ldz2, ldy2*sizeof(*val2), val+k*ldz, ldy*sizeof(*val), + x*sizeof(*val), y, cudaMemcpyDeviceToDevice, starpu_cuda_get_local_stream()); + STARPU_ASSERT(!cures); + } +} +#endif + +int main(void) +{ + unsigned i, j, k, l, m, n; + int matrix[NZ + 2*SHADOWZ][NY + 2*SHADOWY][NX + 2*SHADOWX]; + int matrix2[NZ + PARTSZ*2*SHADOWZ][NY + PARTSY*2*SHADOWY][NX + PARTSX*2*SHADOWX]; + starpu_data_handle_t handle, handle2; + int ret; + + struct starpu_codelet cl = + { + .cpu_funcs = {cpu_func}, + .cpu_funcs_name = {"cpu_func"}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {cuda_func}, + .cuda_flags = {STARPU_CUDA_ASYNC}, +#endif + .nbuffers = 2, + .modes = {STARPU_R, STARPU_W} + }; + + memset(matrix, -1, sizeof(matrix)); + for(k=1 ; k<=NZ ; k++) + for(j=1 ; j<=NY ; j++) + for(i=1 ; i<=NX ; i++) + matrix[SHADOWZ+k-1][SHADOWY+j-1][SHADOWX+i-1] = i+j+k; + + /* Copy planes */ + for (k = SHADOWZ ; khandles[0] = sub_handle; + task->handles[1] = sub_handle2; + task->cl = &cl; + task->synchronous = 1; + + ret = starpu_task_submit(task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + } + } + + starpu_data_unpartition(handle, STARPU_MAIN_RAM); + starpu_data_unpartition(handle2, STARPU_MAIN_RAM); + starpu_data_unregister(handle); + starpu_data_unregister(handle2); + starpu_shutdown(); + + FPRINTF(stderr,"OUT Matrix:\n"); + for(k=0 ; k + +/* Shadow width */ +#define SHADOWX 2 +#define SHADOWY 2 +#define SHADOWZ 1 +#define SHADOWT 1 +#define NX 6 +#define NY 6 +#define NZ 2 +#define NT 2 +#define PARTSX 2 +#define PARTSY 2 +#define PARTSZ 2 +#define PARTST 2 + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +void cpu_func(void *buffers[], void *cl_arg) +{ + (void)cl_arg; + /* length of the shadowed source matrix */ + unsigned ldy = STARPU_TENSOR_GET_LDY(buffers[0]); + unsigned ldz = STARPU_TENSOR_GET_LDZ(buffers[0]); + unsigned ldt = STARPU_TENSOR_GET_LDT(buffers[0]); + unsigned x = STARPU_TENSOR_GET_NX(buffers[0]); + unsigned y = STARPU_TENSOR_GET_NY(buffers[0]); + unsigned z = STARPU_TENSOR_GET_NZ(buffers[0]); + unsigned t = STARPU_TENSOR_GET_NT(buffers[0]); + /* local copy of the shadowed source matrix pointer */ + int *val = (int *)STARPU_TENSOR_GET_PTR(buffers[0]); + + /* length of the destination matrix */ + unsigned ldy2 = STARPU_TENSOR_GET_LDY(buffers[1]); + unsigned ldz2 = STARPU_TENSOR_GET_LDZ(buffers[1]); + unsigned ldt2 = STARPU_TENSOR_GET_LDT(buffers[1]); + unsigned x2 = STARPU_TENSOR_GET_NX(buffers[1]); + unsigned y2 = STARPU_TENSOR_GET_NY(buffers[1]); + unsigned z2 = STARPU_TENSOR_GET_NZ(buffers[1]); + unsigned t2 = STARPU_TENSOR_GET_NT(buffers[1]); + /* local copy of the destination matrix pointer */ + int *val2 = (int *)STARPU_TENSOR_GET_PTR(buffers[1]); + + unsigned i, j, k, l; + + /* If things go right, sizes should match */ + STARPU_ASSERT(x == x2); + STARPU_ASSERT(y == y2); + STARPU_ASSERT(z == z2); + STARPU_ASSERT(t == t2); + for (l = 0; l < t; l++) + for (k = 0; k < z; k++) + for (j = 0; j < y; j++) + for (i = 0; i < x; i++) + val2[l*ldt2+k*ldz2+j*ldy2+i] = val[l*ldt+k*ldz+j*ldy+i]; +} + +#ifdef STARPU_USE_CUDA +void cuda_func(void *buffers[], void *cl_arg) +{ + (void)cl_arg; + /* length of the shadowed source matrix*/ + unsigned ldy = STARPU_TENSOR_GET_LDY(buffers[0]); + unsigned ldz = STARPU_TENSOR_GET_LDZ(buffers[0]); + unsigned ldt = STARPU_TENSOR_GET_LDT(buffers[0]); + unsigned x = STARPU_TENSOR_GET_NX(buffers[0]); + unsigned y = STARPU_TENSOR_GET_NY(buffers[0]); + unsigned z = STARPU_TENSOR_GET_NZ(buffers[0]); + unsigned t = STARPU_TENSOR_GET_NT(buffers[0]); + /* local copy of the shadowed source matrix pointer */ + int *val = (int *)STARPU_TENSOR_GET_PTR(buffers[0]); + + /* length of the destination matrix */ + unsigned ldy2 = STARPU_TENSOR_GET_LDY(buffers[1]); + unsigned ldz2 = STARPU_TENSOR_GET_LDZ(buffers[1]); + unsigned ldt2 = STARPU_TENSOR_GET_LDT(buffers[1]); + unsigned x2 = STARPU_TENSOR_GET_NX(buffers[1]); + unsigned y2 = STARPU_TENSOR_GET_NY(buffers[1]); + unsigned z2 = STARPU_TENSOR_GET_NZ(buffers[1]); + unsigned t2 = STARPU_TENSOR_GET_NT(buffers[1]); + /* local copy of the destination matrix pointer */ + int *val2 = (int *)STARPU_TENSOR_GET_PTR(buffers[1]); + + unsigned k, l; + cudaError_t cures; + + /* If things go right, sizes should match */ + STARPU_ASSERT(x == x2); + STARPU_ASSERT(y == y2); + STARPU_ASSERT(z == z2); + STARPU_ASSERT(t == t2); + for (l = 0; l < t; l++) + { + for (k = 0; k < z; k++) + { + cures = cudaMemcpy2DAsync(val2+k*ldz2+l*ldt2, ldy2*sizeof(*val2), val+k*ldz+l*ldt, ldy*sizeof(*val), + x*sizeof(*val), y, cudaMemcpyDeviceToDevice, starpu_cuda_get_local_stream()); + STARPU_ASSERT(!cures); + } + } +} +#endif + +int main(void) +{ + unsigned i, j, k, l, m, n, p, q; + int matrix[NT + 2*SHADOWT][NZ + 2*SHADOWZ][NY + 2*SHADOWY][NX + 2*SHADOWX]; + int matrix2[NT + PARTST*2*SHADOWT][NZ + PARTSZ*2*SHADOWZ][NY + PARTSY*2*SHADOWY][NX + PARTSX*2*SHADOWX]; + starpu_data_handle_t handle, handle2; + int ret; + + struct starpu_codelet cl = + { + .cpu_funcs = {cpu_func}, + .cpu_funcs_name = {"cpu_func"}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {cuda_func}, + .cuda_flags = {STARPU_CUDA_ASYNC}, +#endif + .nbuffers = 2, + .modes = {STARPU_R, STARPU_W} + }; + + memset(matrix, -1, sizeof(matrix)); + for(l=1 ; l<=NT ; l++) + for(k=1 ; k<=NZ ; k++) + for(j=1 ; j<=NY ; j++) + for(i=1 ; i<=NX ; i++) + matrix[SHADOWT+l-1][SHADOWZ+k-1][SHADOWY+j-1][SHADOWX+i-1] = i+j+k+l; + + /*copy cubes*/ + for (l = SHADOWT ; lhandles[0] = sub_handle; + task->handles[1] = sub_handle2; + task->cl = &cl; + task->synchronous = 1; + + ret = starpu_task_submit(task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + } + } + } + + starpu_data_unpartition(handle, STARPU_MAIN_RAM); + starpu_data_unpartition(handle2, STARPU_MAIN_RAM); + starpu_data_unregister(handle); + starpu_data_unregister(handle2); + starpu_shutdown(); + + FPRINTF(stderr,"OUT Matrix:\n"); + for(l=0 ; l + +/* Shadow width */ +#define SHADOWX 2 +#define SHADOWY 2 +#define SHADOWZ 1 +#define SHADOWT 1 +#define SHADOWG 1 +#define NX 6 +#define NY 6 +#define NZ 2 +#define NT 2 +#define NG 2 +#define PARTSX 2 +#define PARTSY 2 +#define PARTSZ 2 +#define PARTST 2 +#define PARTSG 2 + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +void cpu_func(void *buffers[], void *cl_arg) +{ + (void)cl_arg; + /* length of the shadowed source matrix */ + unsigned *nn = STARPU_NDIM_GET_NN(buffers[0]); + unsigned *ldn = STARPU_NDIM_GET_LDN(buffers[0]); + unsigned x = nn[0]; + unsigned y = nn[1]; + unsigned z = nn[2]; + unsigned t = nn[3]; + unsigned g = nn[4]; + unsigned ldy = ldn[1]; + unsigned ldz = ldn[2]; + unsigned ldt = ldn[3]; + unsigned ldg = ldn[4]; + /* local copy of the shadowed source matrix pointer */ + int *val = (int *)STARPU_NDIM_GET_PTR(buffers[0]); + + /* length of the destination matrix */ + unsigned *nn2 = STARPU_NDIM_GET_NN(buffers[1]); + unsigned *ldn2 = STARPU_NDIM_GET_LDN(buffers[1]); + unsigned x2 = nn2[0]; + unsigned y2 = nn2[1]; + unsigned z2 = nn2[2]; + unsigned t2 = nn2[3]; + unsigned g2 = nn2[4]; + unsigned ldy2 = ldn2[1]; + unsigned ldz2 = ldn2[2]; + unsigned ldt2 = ldn2[3]; + unsigned ldg2 = ldn2[4]; + /* local copy of the destination matrix pointer */ + int *val2 = (int *)STARPU_NDIM_GET_PTR(buffers[1]); + + unsigned i, j, k, l, m; + + /* If things go right, sizes should match */ + STARPU_ASSERT(x == x2); + STARPU_ASSERT(y == y2); + STARPU_ASSERT(z == z2); + STARPU_ASSERT(t == t2); + STARPU_ASSERT(g == g2); + for(m = 0; m < g; m++) + for (l = 0; l < t; l++) + for (k = 0; k < z; k++) + for (j = 0; j < y; j++) + for (i = 0; i < x; i++) + val2[m*ldg2+l*ldt2+k*ldz2+j*ldy2+i] = val[m*ldg+l*ldt+k*ldz+j*ldy+i]; +} + +#ifdef STARPU_USE_CUDA +void cuda_func(void *buffers[], void *cl_arg) +{ + (void)cl_arg; + /* length of the shadowed source matrix */ + unsigned *nn = STARPU_NDIM_GET_NN(buffers[0]); + unsigned *ldn = STARPU_NDIM_GET_LDN(buffers[0]); + unsigned x = nn[0]; + unsigned y = nn[1]; + unsigned z = nn[2]; + unsigned t = nn[3]; + unsigned g = nn[4]; + unsigned ldy = ldn[1]; + unsigned ldz = ldn[2]; + unsigned ldt = ldn[3]; + unsigned ldg = ldn[4]; + /* local copy of the shadowed source matrix pointer */ + int *val = (int *)STARPU_NDIM_GET_PTR(buffers[0]); + + /* length of the destination matrix */ + unsigned *nn2 = STARPU_NDIM_GET_NN(buffers[1]); + unsigned *ldn2 = STARPU_NDIM_GET_LDN(buffers[1]); + unsigned x2 = nn2[0]; + unsigned y2 = nn2[1]; + unsigned z2 = nn2[2]; + unsigned t2 = nn2[3]; + unsigned g2 = nn2[4]; + unsigned ldy2 = ldn2[1]; + unsigned ldz2 = ldn2[2]; + unsigned ldt2 = ldn2[3]; + unsigned ldg2 = ldn2[4]; + /* local copy of the destination matrix pointer */ + int *val2 = (int *)STARPU_NDIM_GET_PTR(buffers[1]); + + unsigned k, l, m; + cudaError_t cures; + + /* If things go right, sizes should match */ + STARPU_ASSERT(x == x2); + STARPU_ASSERT(y == y2); + STARPU_ASSERT(z == z2); + STARPU_ASSERT(t == t2); + STARPU_ASSERT(g == g2); + for(m = 0; m < g; m++) + { + for (l = 0; l < t; l++) + { + for (k = 0; k < z; k++) + { + cures = cudaMemcpy2DAsync(val2+k*ldz2+l*ldt2+m*ldg2, ldy2*sizeof(*val2), val+k*ldz+l*ldt+m*ldg, ldy*sizeof(*val), + x*sizeof(*val), y, cudaMemcpyDeviceToDevice, starpu_cuda_get_local_stream()); + STARPU_ASSERT(!cures); + } + } + } +} +#endif + +int main(void) +{ + unsigned i, j, k, l, m, n, p, q, r, s; + int matrix[NG + 2*SHADOWG][NT + 2*SHADOWT][NZ + 2*SHADOWZ][NY + 2*SHADOWY][NX + 2*SHADOWX]; + int matrix2[NG + PARTSG*2*SHADOWG][NT + PARTST*2*SHADOWT][NZ + PARTSZ*2*SHADOWZ][NY + PARTSY*2*SHADOWY][NX + PARTSX*2*SHADOWX]; + starpu_data_handle_t handle, handle2; + int ret; + + struct starpu_codelet cl = + { + .cpu_funcs = {cpu_func}, + .cpu_funcs_name = {"cpu_func"}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {cuda_func}, + .cuda_flags = {STARPU_CUDA_ASYNC}, +#endif + .nbuffers = 2, + .modes = {STARPU_R, STARPU_W} + }; + + memset(matrix, -1, sizeof(matrix)); + for(m=1 ; m<=NG ; m++) + for(l=1 ; l<=NT ; l++) + for(k=1 ; k<=NZ ; k++) + for(j=1 ; j<=NY ; j++) + for(i=1 ; i<=NX ; i++) + matrix[SHADOWG+m-1][SHADOWT+l-1][SHADOWZ+k-1][SHADOWY+j-1][SHADOWX+i-1] = i+j+k+l+m; + + /*copy tensors*/ + for(m=SHADOWG ; mhandles[0] = sub_handle; + task->handles[1] = sub_handle2; + task->cl = &cl; + task->synchronous = 1; + + ret = starpu_task_submit(task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + } + } + } + } + + starpu_data_unpartition(handle, STARPU_MAIN_RAM); + starpu_data_unpartition(handle2, STARPU_MAIN_RAM); + starpu_data_unregister(handle); + starpu_data_unregister(handle2); + starpu_shutdown(); + + FPRINTF(stderr,"OUT Matrix:\n"); + for(m=0 ; m mesh%elt(i) + CALL starpu_register_element_c(numpar%Neq_max,elt%Np,elt%Ng,elt%ro,elt%dro, & + elt%basis,elt%ro_h,elt%dro_h,elt%basis_h) + ENDDO + !Compute + DO it = 1,it_tot + + ! compute new dro for each element + DO i = 1,Nelt + elt => mesh%elt(i) + CALL starpu_loop_element_task_c(numpar%coeff,elt%ro_h,elt%dro_h,elt%basis_h) + ENDDO + ! sync (if needed by the algorithm) + CALL starpu_task_wait_for_all() + + ! - - - - - + + ! copy dro to ro for each element + DO i = 1,Nelt + elt => mesh%elt(i) + CALL starpu_copy_element_task_c(elt%ro_h,elt%dro_h) + ENDDO + ! sync (if needed by the algorithm) + CALL starpu_task_wait_for_all() + + ENDDO + !Unregistration of elements + DO i = 1,Nelt + elt => mesh%elt(i) + CALL starpu_unregister_element_c(elt%ro_h,elt%dro_h,elt%basis_h) + ENDDO + + !Terminate StarPU, no task can be submitted after + CALL starpu_shutdown() + + !Check data with StarPU + WRITE(6,'(a)') " " + WRITE(6,'(a)') " %%%% RESULTS STARPU %%%% " + WRITE(6,'(a)') " " + DO i = 1,Nelt + WRITE(6,'(a,i4,a)') " elt ", i , " ; elt%ro = " + WRITE(6,'(10(1x,F11.2))') mesh%elt(i)%ro + WRITE(6,'(a)') " ------------------------ " + ENDDO + + !Same compute without StarPU + DO i = 1,Nelt + elt => mesh%elt(i) + CALL init_element(elt%ro,elt%dro,elt%basis,numpar%Neq_max,elt%Np,elt%Ng,i) + ENDDO + + DO it = 1, it_tot + DO i = 1,Nelt + elt => mesh%elt(i) + CALL loop_element_cpu(elt%ro,elt%dro,elt%basis,numpar%coeff,numpar%Neq_max,elt%Ng,elt%Np) + elt%ro = elt%ro + elt%dro + ENDDO + ENDDO + + WRITE(6,'(a)') " " + WRITE(6,'(a)') " %%%% RESULTS VERIFICATION %%%% " + WRITE(6,'(a)') " " + + DO i = 1,Nelt + WRITE(6,'(a,i4,a)') " elt ", i , " ; elt%ro = " + WRITE(6,'(10(1x,F11.2))') mesh%elt(i)%ro + WRITE(6,'(a)') " ------------------------ " + ENDDO + + WRITE(6,'(a)') " " + + !Deallocation + DO i = 1,Nelt + elt => mesh%elt(i) + DEALLOCATE(elt%ro) + DEALLOCATE(elt%dro) + DEALLOCATE(elt%basis) + ENDDO + DEALLOCATE(mesh%elt) + +END PROGRAM f90_example diff --git a/examples/fortran90/marshalling.c b/examples/fortran90/marshalling.c new file mode 100644 index 0000000..d6076c7 --- /dev/null +++ b/examples/fortran90/marshalling.c @@ -0,0 +1,166 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2015-2015 ONERA + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* Helper functions to initialize StarPU and register element matrices */ + +#include + +//--------------------------------------------------------------// +void starpu_register_element_c(int Neq_max,int Np, int Ng,double **ro, double **dro, + double **basis, void **ro_h, void **dro_h, void **basis_h) +{ + starpu_data_handle_t ro_handle; + starpu_data_handle_t dro_handle; + starpu_data_handle_t basis_handle; + + starpu_matrix_data_register(&ro_handle, 0, + (uintptr_t)ro,Neq_max,Neq_max,Np, sizeof(double)); + starpu_matrix_data_register(&dro_handle, 0, + (uintptr_t)dro,Neq_max,Neq_max,Np, sizeof(double)); + starpu_matrix_data_register(&basis_handle, 0, + (uintptr_t)basis,Np,Np,Ng, sizeof(double)); + + *ro_h = ro_handle; + *dro_h = dro_handle; + *basis_h = basis_handle; +} + +void starpu_unregister_element_c(void **ro_h, void **dro_h, void **basis_h) +{ + starpu_data_handle_t ro_handle = *ro_h; + starpu_data_handle_t dro_handle = *dro_h; + starpu_data_handle_t basis_handle = *basis_h; + + starpu_data_unregister(ro_handle); + starpu_data_unregister(dro_handle); + starpu_data_unregister(basis_handle); +} + +//--------------------------------------------------------------// +void loop_element_cpu_fortran(double coeff, int Neq_max, int Np, int Ng, void *ro_ptr, void *dro_ptr, void *basis_ptr, void *cl_arg); + +void loop_element_cpu_func(void *buffers[], void *cl_arg); + +struct starpu_codelet cl_loop_element = +{ + .cpu_funcs = {loop_element_cpu_func}, + .nbuffers = 3, + .modes = {STARPU_R,STARPU_RW,STARPU_R}, + .name = "LOOP_ELEMENT" +}; + +void loop_element_cpu_func(void *buffers[], void *cl_arg) +{ + double coeff; + + double **ro = (double **) STARPU_MATRIX_GET_PTR(buffers[0]); + int Neq_max = STARPU_MATRIX_GET_NX(buffers[0]); + + double **dro = (double **) STARPU_MATRIX_GET_PTR(buffers[1]); + + double **basis = (double **) STARPU_MATRIX_GET_PTR(buffers[2]); + int Np = STARPU_MATRIX_GET_NX(buffers[2]); + int Ng = STARPU_MATRIX_GET_NY(buffers[2]); + + starpu_codelet_unpack_args(cl_arg, &coeff); + + void *ro_ptr = &ro; + void *dro_ptr = &dro; + void *basis_ptr = &basis; + + loop_element_cpu_fortran(coeff,Neq_max,Np,Ng, + ro_ptr,dro_ptr,basis_ptr,cl_arg); +} + +void starpu_loop_element_task_c(double coeff, void **ro_h, void **dro_h, void **basis_h) +{ + int ret; + + starpu_data_handle_t ro_handle = *ro_h; + starpu_data_handle_t dro_handle = *dro_h; + starpu_data_handle_t basis_handle = *basis_h; + + /* execute the task on any eligible computational resource */ + ret = starpu_task_insert(&cl_loop_element, + STARPU_VALUE, &coeff, sizeof(double), + STARPU_R, ro_handle, + STARPU_RW, dro_handle, + STARPU_R, basis_handle, + 0); + + /* verification */ + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); +} + +//--------------------------------------------------------------// +void copy_element_cpu_fortran(int Neq_max, int Np, void *ro_ptr, void *dro_ptr); + +void copy_element_cpu_func(void *buffers[], void *cl_arg); + +struct starpu_codelet cl_copy_element = +{ + .cpu_funcs = {copy_element_cpu_func}, + .nbuffers = 2, + .modes = {STARPU_RW,STARPU_R}, + .name = "COPY_ELEMENT" +}; + +void copy_element_cpu_func(void *buffers[], void *cl_arg) +{ + (void)cl_arg; + double **ro = (double **) STARPU_MATRIX_GET_PTR(buffers[0]); + int Neq_max = STARPU_MATRIX_GET_NX(buffers[0]); + int Np = STARPU_MATRIX_GET_NY(buffers[0]); + + double **dro = (double **) STARPU_MATRIX_GET_PTR(buffers[1]); + + void *ro_ptr = &ro; + void *dro_ptr = &dro; + + copy_element_cpu_fortran(Neq_max,Np,ro_ptr,dro_ptr); +} + +void starpu_copy_element_task_c(void **ro_h, void **dro_h) +{ + int ret; + + starpu_data_handle_t ro_handle = *ro_h; + starpu_data_handle_t dro_handle = *dro_h; + + /* execute the task on any eligible computational resource */ + ret = starpu_task_insert(&cl_copy_element, + STARPU_RW, ro_handle, + STARPU_R, dro_handle, + 0); + + /* verification */ + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); +} + +//--------------------------------------------------------------// +int starpu_my_init_c() +{ + /* Initialize StarPU with default configuration */ + int ret; + struct starpu_conf conf; + starpu_conf_init(&conf); + conf.sched_policy_name = "dmda"; + + ret = starpu_init(&conf); + /* int ret = starpu_init(NULL); */ + return ret; +} diff --git a/examples/fortran90/mod_compute.f90 b/examples/fortran90/mod_compute.f90 new file mode 100644 index 0000000..f8cfaa1 --- /dev/null +++ b/examples/fortran90/mod_compute.f90 @@ -0,0 +1,127 @@ +! StarPU --- Runtime system for heterogeneous multicore architectures. +! +! Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +! Copyright (C) 2015-2015 ONERA +! +! StarPU is free software; you can redistribute it and/or modify +! it under the terms of the GNU Lesser General Public License as published by +! the Free Software Foundation; either version 2.1 of the License, or (at +! your option) any later version. +! +! StarPU is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of +! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +! +! See the GNU Lesser General Public License in COPYING.LGPL for more details. +! +! Computation kernels for the simulation + +MODULE mod_compute + + USE mod_types + USE starpu_mod + USE mod_interface + USE iso_c_binding + + IMPLICIT NONE + +CONTAINS + + !--------------------------------------------------------------! + SUBROUTINE init_element(ro,dro,basis,Neq_max,Np,Ng,i) + INTEGER(KIND=C_INT),INTENT(IN) :: Neq_max,Np,Ng,i + REAL(KIND=C_DOUBLE),DIMENSION(:,:),POINTER,INTENT(INOUT) :: ro,basis,dro + !Local variables + INTEGER(KIND=C_INT) :: n,nb,neq + + DO nb=1,Np + DO neq= 1,Neq_max + ro(neq,nb) = 0.01*(nb+neq)*i + END DO + END DO + + DO nb=1,Np + DO neq= 1,Neq_max + dro(neq,nb) = 0.05*(nb-neq)*i + END DO + END DO + + DO n=1,Ng + DO nb=1,Np + basis(nb,n) = 0.05*(n+nb)*i + END DO + END DO + + END SUBROUTINE init_element + + !--------------------------------------------------------------! + RECURSIVE SUBROUTINE loop_element_cpu_fortran(coeff,Neq_max,Np,Ng, & + & ro_ptr,dro_ptr,basis_ptr) BIND(C) + INTEGER(KIND=C_INT),VALUE :: Neq_max,Np,Ng + REAL(KIND=C_DOUBLE),VALUE :: coeff + TYPE(C_PTR) :: ro_ptr,dro_ptr,basis_ptr + !Local variables + REAL(KIND=C_DOUBLE),DIMENSION(:,:),POINTER :: ro,dro,basis + + CALL C_F_POINTER(ro_ptr,ro,[Neq_max,Np]) + CALL C_F_POINTER(dro_ptr,dro,[Neq_max,Np]) + CALL C_F_POINTER(basis_ptr,basis,[Np,Ng]) + + CALL loop_element_cpu(ro,dro,basis,coeff,Neq_max,Ng,Np) + + END SUBROUTINE loop_element_cpu_fortran + + !--------------------------------------------------------------! + RECURSIVE SUBROUTINE loop_element_cpu(ro,dro,basis,coeff,Neq_max,Ng,Np) + REAL(KIND=C_DOUBLE),INTENT(IN) :: coeff + INTEGER(KIND=C_INT),INTENT(IN) :: Neq_max,Ng,Np + REAL(KIND=C_DOUBLE),DIMENSION(:,:),POINTER,INTENT(IN) :: ro,basis + REAL(KIND=C_DOUBLE),DIMENSION(:,:),POINTER,INTENT(INOUT) :: dro + !Local variables + REAL(KIND=C_DOUBLE) :: coeff2,r + INTEGER(KIND=C_INT) :: n,nb,neq + + DO n=1,Ng + r = 0. + DO nb=1,Np + DO neq= 1,Neq_max + r = r + basis(nb,n) * ro(neq,nb) + ENDDO + ENDDO + + coeff2 = r + coeff + + DO nb=1,Np + DO neq = 1,Neq_max + dro(neq,nb) = coeff2 + dro(neq,nb) + ENDDO + ENDDO + ENDDO + + END SUBROUTINE loop_element_cpu + + !--------------------------------------------------------------! + RECURSIVE SUBROUTINE copy_element_cpu_fortran(Neq_max,Np, & + & ro_ptr,dro_ptr) BIND(C) + INTEGER(KIND=C_INT),VALUE :: Neq_max,Np + TYPE(C_PTR) :: ro_ptr,dro_ptr + !Local variables + REAL(KIND=C_DOUBLE),DIMENSION(:,:),POINTER :: ro,dro + + CALL C_F_POINTER(ro_ptr,ro,[Neq_max,Np]) + CALL C_F_POINTER(dro_ptr,dro,[Neq_max,Np]) + + CALL copy_element_cpu(ro,dro) + + END SUBROUTINE copy_element_cpu_fortran + + !--------------------------------------------------------------! + RECURSIVE SUBROUTINE copy_element_cpu(ro,dro) + REAL(KIND=C_DOUBLE),DIMENSION(:,:),POINTER,INTENT(INOUT) :: ro + REAL(KIND=C_DOUBLE),DIMENSION(:,:),POINTER,INTENT(IN) :: dro + + ro = ro + dro + + END SUBROUTINE copy_element_cpu + +END MODULE mod_compute diff --git a/examples/fortran90/mod_interface.f90 b/examples/fortran90/mod_interface.f90 new file mode 100644 index 0000000..8bbf6cf --- /dev/null +++ b/examples/fortran90/mod_interface.f90 @@ -0,0 +1,63 @@ +! StarPU --- Runtime system for heterogeneous multicore architectures. +! +! Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +! Copyright (C) 2015-2015 ONERA +! +! StarPU is free software; you can redistribute it and/or modify +! it under the terms of the GNU Lesser General Public License as published by +! the Free Software Foundation; either version 2.1 of the License, or (at +! your option) any later version. +! +! StarPU is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of +! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +! +! See the GNU Lesser General Public License in COPYING.LGPL for more details. +! +! Fortran module interface for StarPU initialization and element registration + +MODULE mod_interface + + INTERFACE + FUNCTION starpu_my_init_c() BIND(C) + USE iso_c_binding + INTEGER(KIND=C_INT) :: starpu_my_init_c + END FUNCTION starpu_my_init_c + END INTERFACE + + INTERFACE + SUBROUTINE starpu_register_element_c(Neq,Np,Ng,ro,dro,basis,ro_h,dro_h,basis_h) BIND(C) + USE iso_c_binding + INTEGER(KIND=C_INT),VALUE :: Neq,Np,Ng + REAL(KIND=C_DOUBLE),DIMENSION(Neq,Np) :: ro,dro + REAL(KIND=C_DOUBLE),DIMENSION(Np,Ng) :: basis + TYPE(C_PTR), INTENT(OUT) :: ro_h, dro_h, basis_h + END SUBROUTINE starpu_register_element_c + END INTERFACE + + INTERFACE + SUBROUTINE starpu_unregister_element_c( & + ro_h,dro_h,basis_h) BIND(C) + USE iso_c_binding + TYPE(C_PTR), INTENT(IN) :: ro_h, dro_h, basis_h + END SUBROUTINE starpu_unregister_element_c + END INTERFACE + + INTERFACE + SUBROUTINE starpu_loop_element_task_c(coeff, & + ro_h,dro_h,basis_h) BIND(C) + USE iso_c_binding + REAL(KIND=C_DOUBLE),VALUE :: coeff + TYPE(C_PTR), INTENT(IN) :: ro_h, dro_h, basis_h + END SUBROUTINE starpu_loop_element_task_c + END INTERFACE + + INTERFACE + SUBROUTINE starpu_copy_element_task_c( & + ro_h,dro_h) BIND(C) + USE iso_c_binding + TYPE(C_PTR), INTENT(IN) :: ro_h, dro_h + END SUBROUTINE starpu_copy_element_task_c + END INTERFACE + +END MODULE mod_interface diff --git a/examples/fortran90/mod_types.f90 b/examples/fortran90/mod_types.f90 new file mode 100644 index 0000000..c8b9d7e --- /dev/null +++ b/examples/fortran90/mod_types.f90 @@ -0,0 +1,37 @@ +! StarPU --- Runtime system for heterogeneous multicore architectures. +! +! Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +! Copyright (C) 2015-2015 ONERA +! +! StarPU is free software; you can redistribute it and/or modify +! it under the terms of the GNU Lesser General Public License as published by +! the Free Software Foundation; either version 2.1 of the License, or (at +! your option) any later version. +! +! StarPU is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of +! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +! +! See the GNU Lesser General Public License in COPYING.LGPL for more details. +! +MODULE mod_types + + USE iso_c_binding + + TYPE type_numpar + REAL(KIND=C_DOUBLE) :: coeff + INTEGER(KIND=C_INT) :: Neq_max + END TYPE type_numpar + + TYPE type_mesh_elt + INTEGER(KIND=C_INT) :: Ng, Np + REAL(KIND=C_DOUBLE),POINTER,DIMENSION(:,:) :: ro, dro + REAL(KIND=C_DOUBLE),POINTER,DIMENSION(:,:) :: basis + TYPE(C_PTR) :: ro_h, dro_h, basis_h + END TYPE type_mesh_elt + + TYPE type_mesh + TYPE(type_mesh_elt), POINTER, DIMENSION(:) :: elt + END TYPE type_mesh + +END MODULE mod_types diff --git a/examples/fortran90/starpu_mod.f90 b/examples/fortran90/starpu_mod.f90 new file mode 100644 index 0000000..9cce981 --- /dev/null +++ b/examples/fortran90/starpu_mod.f90 @@ -0,0 +1,145 @@ +! StarPU --- Runtime system for heterogeneous multicore architectures. +! +! Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +! +! StarPU is free software; you can redistribute it and/or modify +! it under the terms of the GNU Lesser General Public License as published by +! the Free Software Foundation; either version 2.1 of the License, or (at +! your option) any later version. +! +! StarPU is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of +! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +! +! See the GNU Lesser General Public License in COPYING.LGPL for more details. +! +MODULE starpu_mod + ! == starpu.h == + + ! starpu_conf_init + INTERFACE + SUBROUTINE starpu_conf_init(conf) BIND(C) + USE iso_c_binding + TYPE(C_PTR), VALUE :: conf + END SUBROUTINE starpu_conf_init + END INTERFACE + + ! starpu_init + INTERFACE + FUNCTION starpu_init(conf) BIND(C) + USE iso_c_binding + TYPE(C_PTR), VALUE :: conf + INTEGER(KIND=C_INT) :: starpu_init + END FUNCTION starpu_init + END INTERFACE + + ! starpu_initialize + + ! starpu_pause + INTERFACE + SUBROUTINE starpu_pause() BIND(C) + USE iso_c_binding + END SUBROUTINE starpu_pause + END INTERFACE + + ! starpu_resume + INTERFACE + SUBROUTINE starpu_resume() BIND(C) + USE iso_c_binding + END SUBROUTINE starpu_resume + END INTERFACE + + ! starpu_shutdown + INTERFACE + SUBROUTINE starpu_shutdown() BIND(C) + USE iso_c_binding + END SUBROUTINE starpu_shutdown + END INTERFACE + + ! starpu_topology_print + + ! starpu_asynchronous_copy_disabled + INTERFACE + SUBROUTINE starpu_asynchronous_copy_disabled() BIND(C) + USE iso_c_binding + END SUBROUTINE starpu_asynchronous_copy_disabled + END INTERFACE + + ! starpu_asynchronous_cuda_copy_disabled + INTERFACE + SUBROUTINE starpu_asynchronous_cuda_copy_disabled() BIND(C) + USE iso_c_binding + END SUBROUTINE starpu_asynchronous_cuda_copy_disabled + END INTERFACE + + ! starpu_asynchronous_opencl_copy_disabled + INTERFACE + SUBROUTINE starpu_asynchronous_opencl_copy_disabled() BIND(C) + USE iso_c_binding + END SUBROUTINE starpu_asynchronous_opencl_copy_disabled + END INTERFACE + + ! starpu_display_stats + INTERFACE + SUBROUTINE starpu_display_stats() BIND(C) + USE iso_c_binding + END SUBROUTINE starpu_display_stats + END INTERFACE + + ! starpu_get_version + INTERFACE + SUBROUTINE starpu_get_version(major,minor,release) BIND(C) + USE iso_c_binding + INTEGER(KIND=C_INT), INTENT(OUT) :: major,minor,release + END SUBROUTINE starpu_get_version + END INTERFACE + + ! starpu_cpu_worker_get_count + INTERFACE + FUNCTION starpu_cpu_worker_get_count() BIND(C) + USE iso_c_binding + INTEGER(KIND=C_INT) :: starpu_cpu_worker_get_count + END FUNCTION starpu_cpu_worker_get_count + END INTERFACE + + ! == starpu_task.h == + + ! starpu_tag_declare_deps + ! starpu_tag_declare_deps_array + ! starpu_task_declare_deps_array + ! starpu_tag_wait + ! starpu_tag_wait_array + ! starpu_tag_notify_from_apps + ! starpu_tag_restart + ! starpu_tag_remove + ! starpu_task_init + ! starpu_task_clean + ! starpu_task_create + ! starpu_task_destroy + ! starpu_task_set_destroy + ! starpu_task_submit + ! starpu_task_submit_to_ctx + ! starpu_task_finished + ! starpu_task_wait + ! starpu_task_wait_for_all + INTERFACE + SUBROUTINE starpu_task_wait_for_all() BIND(C) + USE iso_c_binding + END SUBROUTINE starpu_task_wait_for_all + END INTERFACE + ! starpu_task_wait_for_n_submitted + ! starpu_task_wait_for_all_in_ctx + ! starpu_task_wait_for_n_submitted_in_ctx + ! starpu_task_wait_for_no_ready + ! starpu_task_nready + ! starpu_task_nsubmitted + ! starpu_codelet_init + ! starpu_codelet_display_stats + ! starpu_task_get_current + ! starpu_parallel_task_barrier_init + ! starpu_parallel_task_barrier_init_n + ! starpu_task_dup + ! starpu_task_set_implementation + ! starpu_task_get_implementation + +END MODULE starpu_mod diff --git a/examples/gl_interop/gl_interop.c b/examples/gl_interop/gl_interop.c new file mode 100644 index 0000000..d5d36b7 --- /dev/null +++ b/examples/gl_interop/gl_interop.c @@ -0,0 +1,138 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * This example demonstrates how to use StarPU combined with OpenGL rendering, + * which needs: + * + * - initializing GLUT first, + * - enabling it at initialization, + * - running the corresponding CUDA worker in the GLUT thread (here, the main + * thread). + */ + +#include +#include + +#if (defined(STARPU_USE_CUDA) && defined(STARPU_OPENGL_RENDER)) +#include + +void dummy(void *buffers[], void *cl_arg) +{ + float *v = (float *) STARPU_VECTOR_GET_PTR(buffers[0]); + + printf("Codelet running\n"); + cudaMemsetAsync(v, 0, STARPU_VECTOR_GET_NX(buffers[0]) * sizeof(float), starpu_cuda_get_local_stream()); + cudaStreamSynchronize(starpu_cuda_get_local_stream()); + printf("Codelet done\n"); +} + +struct starpu_codelet cl = +{ + .cuda_funcs = { dummy }, + .nbuffers = 1, + .modes = { STARPU_W }, +}; + +void foo(void) +{ +} + +void display(float i) +{ + glClear(GL_COLOR_BUFFER_BIT); + glColor3f(1, 1, 1); + glBegin(GL_LINES); + glVertex2f(-i, -i); + glVertex2f(i, i); + glEnd(); + glFinish(); + glutPostRedisplay(); + glutMainLoopEvent(); +} + +void callback_func(void *foo) +{ + printf("Callback running, rendering\n"); + float i = 1.; + while (i > 0) + { + starpu_usleep(100000); + display(i); + i -= 0.1; + } + printf("rendering done\n"); + + /* Tell it was already the last submitted task */ + starpu_drivers_request_termination(); +} +#endif + +int main(int argc, char **argv) +{ +#if !(defined(STARPU_USE_CUDA) && defined(STARPU_OPENGL_RENDER)) + return 77; +#else + struct starpu_conf conf; + int cuda_device = 0; + int cuda_devices[] = { cuda_device }; + struct starpu_driver drivers[] = + { + { .type = STARPU_CUDA_WORKER, .id.cuda_id = cuda_device } + }; + int ret; + struct starpu_task *task; + starpu_data_handle_t handle; + + glutInit(&argc, argv); + glutInitDisplayMode (GLUT_SINGLE | GLUT_RGB); + glutInitWindowPosition(0, 0); + glutInitWindowSize(300,200); + glutCreateWindow("StarPU OpenGL interoperability test"); + glClearColor (0.5, 0.5, 0.5, 0.0); + + /* Enable OpenGL interoperability */ + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + conf.ncuda = 1; + conf.cuda_opengl_interoperability = cuda_devices; + conf.n_cuda_opengl_interoperability = sizeof(cuda_devices) / sizeof(*cuda_devices); + conf.not_launched_drivers = drivers; + conf.n_not_launched_drivers = sizeof(drivers) / sizeof(*drivers); + ret = starpu_init(&conf); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + starpu_vector_data_register(&handle, -1, 0, 10, sizeof(float)); + + /* Submit just one dumb task */ + task = starpu_task_create(); + task->cl = &cl; + task->handles[0] = handle; + task->callback_func = callback_func; + task->callback_arg = NULL; + ret = starpu_task_submit(task); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + /* And run the driver inside main, which will run the task */ + printf("running the driver\n"); + starpu_driver_run(&drivers[0]); + printf("finished running the driver\n"); + + starpu_shutdown(); + + return 0; +#endif +} diff --git a/examples/gl_interop/gl_interop_idle.c b/examples/gl_interop/gl_interop_idle.c new file mode 100644 index 0000000..fd9ea12 --- /dev/null +++ b/examples/gl_interop/gl_interop_idle.c @@ -0,0 +1,161 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * This example demonstrates how to use StarPU combined with OpenGL rendering, + * which needs: + * + * - initializing GLUT first, + * - enabling it at initialization, + * - running the corresponding CUDA worker in the GLUT thread (here, the main + * thread). + * + * The difference with gl_interop.c is that this version runs StarPU Tasks in + * the glut idle handler. + */ + +#include +#include + +#if (defined(STARPU_USE_CUDA) && defined(STARPU_OPENGL_RENDER)) +#include + +void dummy(void *buffers[], void *cl_arg) +{ + float *v = (float *) STARPU_VECTOR_GET_PTR(buffers[0]); + + printf("Codelet running\n"); + cudaMemsetAsync(v, 0, STARPU_VECTOR_GET_NX(buffers[0]) * sizeof(float), starpu_cuda_get_local_stream()); + cudaStreamSynchronize(starpu_cuda_get_local_stream()); + printf("Codelet done\n"); +} + +struct starpu_codelet cl = +{ + .cuda_funcs = { dummy }, + .nbuffers = 1, + .modes = { STARPU_W }, +}; + +void foo(void) +{ +} + +void display(float i) +{ + glClear(GL_COLOR_BUFFER_BIT); + glColor3f(1, 1, 1); + glBegin(GL_LINES); + glVertex2f(-i, -i); + glVertex2f(i, i); + glEnd(); + glFinish(); + glutPostRedisplay(); +} + +static int cuda_devices[] = { 0 }; +static struct starpu_driver drivers[] = +{ + { .type = STARPU_CUDA_WORKER } +}; + +void callback_func(void *foo) +{ + printf("Callback running, rendering\n"); + float i = 1.; + while (i > 0) + { + starpu_usleep(100000); + display(i); + i -= 0.1; + } + printf("rendering done\n"); + + /* Tell it was already the last submitted task */ + starpu_drivers_request_termination(); + + /* And terminate StarPU */ + starpu_driver_deinit(&drivers[0]); + starpu_shutdown(); + exit(0); +} + +static void idle(void) +{ + starpu_driver_run_once(&drivers[0]); +} +#endif + +int main(int argc, char **argv) +{ +#if !(defined(STARPU_USE_CUDA) && defined(STARPU_OPENGL_RENDER)) + return 77; +#else + struct starpu_conf conf; + int ret; + struct starpu_task *task; + starpu_data_handle_t handle; + int cuda_device = 0; + + cuda_devices[0] = cuda_device; + drivers[0].id.cuda_id = cuda_device; + + glutInit(&argc, argv); + glutInitDisplayMode (GLUT_SINGLE | GLUT_RGB); + glutInitWindowPosition(0, 0); + glutInitWindowSize(300,200); + glutCreateWindow("StarPU OpenGL interoperability test"); + glClearColor (0.5, 0.5, 0.5, 0.0); + + /* Enable OpenGL interoperability */ + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + conf.ncuda = 1; + conf.cuda_opengl_interoperability = cuda_devices; + conf.n_cuda_opengl_interoperability = sizeof(cuda_devices) / sizeof(*cuda_devices); + conf.not_launched_drivers = drivers; + conf.n_not_launched_drivers = sizeof(drivers) / sizeof(*drivers); + ret = starpu_init(&conf); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + starpu_vector_data_register(&handle, -1, 0, 10, sizeof(float)); + + /* Submit just one dumb task */ + task = starpu_task_create(); + task->cl = &cl; + task->handles[0] = handle; + task->callback_func = callback_func; + task->callback_arg = NULL; + ret = starpu_task_submit(task); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + /* And run the driver inside main, which will run the task */ + printf("running the driver\n"); + /* Initialize it */ + starpu_driver_init(&drivers[0]); + /* Register driver loop content as idle handler */ + glutIdleFunc(idle); + /* Now run the glut loop */ + glutMainLoop(); + /* And deinitialize driver */ + starpu_driver_deinit(&drivers[0]); + printf("finished running the driver\n"); + + starpu_shutdown(); + + return 0; +#endif +} diff --git a/examples/heat/dw_factolu.c b/examples/heat/dw_factolu.c new file mode 100644 index 0000000..31b2b1d --- /dev/null +++ b/examples/heat/dw_factolu.c @@ -0,0 +1,869 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Thibaut Lambert + * Copyright (C) 2010-2010 Mehdi Juhoor + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * This implements an LU factorization. + * The task graph is submitted through continuation: the rest of the graph is + * submitted as appropriate in the tasks' callback. + */ + +#include "dw_factolu.h" + +#ifdef STARPU_HAVE_HELGRIND_H +#include +#endif +#ifndef ANNOTATE_HAPPENS_BEFORE +#define ANNOTATE_HAPPENS_BEFORE(obj) ((void)0) +#endif +#ifndef ANNOTATE_HAPPENS_AFTER +#define ANNOTATE_HAPPENS_AFTER(obj) ((void)0) +#endif + +#if 0 +#define debug(fmt, ...) fprintf(stderr, fmt, ## __VA_ARGS__) +#else +#define debug(fmt, ...) +#endif + +struct starpu_perfmodel model_getrf; +struct starpu_perfmodel model_trsm_ll; +struct starpu_perfmodel model_trsm_ru; +struct starpu_perfmodel model_gemm; + +static unsigned *advance_11; /* size nblocks, whether the 11 task is done */ +static unsigned *advance_12_21; /* size nblocks*nblocks */ +static unsigned *advance_22; /* array of nblocks *nblocks*nblocks */ + +static double start; +static double end; + +static unsigned no_prio = 0; + +static struct starpu_codelet cl_getrf = +{ + .cpu_funcs = {dw_cpu_codelet_update_getrf}, + .cpu_funcs_name = {"dw_cpu_codelet_update_getrf"}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {dw_cublas_codelet_update_getrf}, +#endif + .nbuffers = 1, + .modes = {STARPU_RW}, + .model = &model_getrf +}; + +static struct starpu_codelet cl_trsm_ll = +{ + .cpu_funcs = {dw_cpu_codelet_update_trsm_ll}, + .cpu_funcs_name = {"dw_cpu_codelet_update_trsm_ll"}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {dw_cublas_codelet_update_trsm_ll}, +#endif + .cuda_flags = {STARPU_CUDA_ASYNC}, + .nbuffers = 2, + .modes = {STARPU_R, STARPU_RW}, + .model = &model_trsm_ll +}; + +static struct starpu_codelet cl_trsm_ru = +{ + .cpu_funcs = {dw_cpu_codelet_update_trsm_ru}, + .cpu_funcs_name = {"dw_cpu_codelet_update_trsm_ru"}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {dw_cublas_codelet_update_trsm_ru}, +#endif + .cuda_flags = {STARPU_CUDA_ASYNC}, + .nbuffers = 2, + .modes = {STARPU_R, STARPU_RW}, + .model = &model_trsm_ru +}; + +static struct starpu_codelet cl_gemm = +{ + .cpu_funcs = {dw_cpu_codelet_update_gemm}, + .cpu_funcs_name = {"dw_cpu_codelet_update_gemm"}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {dw_cublas_codelet_update_gemm}, +#endif + .cuda_flags = {STARPU_CUDA_ASYNC}, + .nbuffers = 3, + .modes = {STARPU_R, STARPU_R, STARPU_RW}, + .model = &model_gemm +}; + + + +#define STARTED 0x01 +#define DONE 0x11 + +/* + * Upgraded Callbacks : break the pipeline design ! + */ + +void dw_callback_v2_codelet_update_gemm(void *argcb) +{ + int ret; + cl_args *args = argcb; + + unsigned k = args->k; + unsigned i = args->i; + unsigned j = args->j; + unsigned nblocks = args->nblocks; + + debug("ugemm %d %d %d\n", k, i, j); + + /* we did task 22k,i,j */ + advance_22[k*nblocks*nblocks + i + j*nblocks] = DONE; + + if ((i == j) && (i == k+1)) + { + /* we now reduce the LU22 part (recursion appears there) */ + cl_args *ugetrfarg = malloc(sizeof(cl_args)); + + struct starpu_task *task = starpu_task_create(); + task->callback_func = dw_callback_v2_codelet_update_getrf; + task->callback_arg = ugetrfarg; + task->cl = &cl_getrf; + task->cl_arg = ugetrfarg; + task->cl_arg_size = sizeof(*ugetrfarg); + + task->handles[0] = starpu_data_get_sub_data(args->dataA, 2, k+1, k+1); + + ugetrfarg->dataA = args->dataA; + ugetrfarg->i = k + 1; + ugetrfarg->nblocks = args->nblocks; + + /* schedule the codelet */ + if (!no_prio) + task->priority = STARPU_MAX_PRIO; + + debug("ugemm %d %d %d start ugetrf %d\n", k, i, j, k + 1); + ret = starpu_task_submit(task); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + /* 11k+1 + 22k,k+1,j => 21 k+1,j */ + if (i == k + 1 && j > k + 1) + { + uint8_t dep; + /* 11 k+1*/ + dep = advance_11[(k+1)]; + if (dep & DONE) + { + /* try to push the task */ + uint8_t u = STARPU_ATOMIC_OR(&advance_12_21[(k+1) + j*nblocks], STARTED); + if ((u & STARTED) == 0) + { + /* we are the only one that should launch that task */ + cl_args *utrsmrua = malloc(sizeof(cl_args)); + + struct starpu_task *task_trsm_ru = starpu_task_create(); + task_trsm_ru->callback_func = dw_callback_v2_codelet_update_trsm_ru; + task_trsm_ru->callback_arg = utrsmrua; + task_trsm_ru->cl = &cl_trsm_ru; + task_trsm_ru->cl_arg = utrsmrua; + task_trsm_ru->cl_arg_size = sizeof(*utrsmrua); + + utrsmrua->i = k+1; + utrsmrua->k = j; + utrsmrua->nblocks = args->nblocks; + utrsmrua->dataA = args->dataA; + + task_trsm_ru->handles[0] = starpu_data_get_sub_data(args->dataA, 2, utrsmrua->i, utrsmrua->i); + task_trsm_ru->handles[1] = starpu_data_get_sub_data(args->dataA, 2, utrsmrua->i, utrsmrua->k); + + debug("ugemm %d %d %d start utrsmru %d %d\n", k, i, j, k+1, j); + ret = starpu_task_submit(task_trsm_ru); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + } + } + + /* 11k + 22k-1,i,k => 12 k,i */ + if (j == k + 1 && i > k + 1) + { + uint8_t dep; + /* 11 k+1*/ + dep = advance_11[(k+1)]; + if (dep & DONE) + { + /* try to push the task */ + uint8_t u = STARPU_ATOMIC_OR(&advance_12_21[(k+1)*nblocks + i], STARTED); + if ((u & STARTED) == 0) + { + /* we are the only one that should launch that task */ + cl_args *utrsmlla = malloc(sizeof(cl_args)); + + struct starpu_task *task_trsm_ll = starpu_task_create(); + task_trsm_ll->callback_func = dw_callback_v2_codelet_update_trsm_ll; + task_trsm_ll->callback_arg = utrsmlla; + task_trsm_ll->cl = &cl_trsm_ll; + task_trsm_ll->cl_arg = utrsmlla; + task_trsm_ll->cl_arg_size = sizeof(*utrsmlla); + + utrsmlla->i = k+1; + utrsmlla->k = i; + utrsmlla->nblocks = args->nblocks; + utrsmlla->dataA = args->dataA; + + task_trsm_ll->handles[0] = starpu_data_get_sub_data(args->dataA, 2, utrsmlla->i, utrsmlla->i); + task_trsm_ll->handles[1] = starpu_data_get_sub_data(args->dataA, 2, utrsmlla->k, utrsmlla->i); + + debug("ugemm %d %d %d start utrsmll %d %d\n", k, i, j, k+1, i); + ret = starpu_task_submit(task_trsm_ll); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + } + } + + free(args); +} + +void dw_callback_v2_codelet_update_trsm_ll(void *argcb) +{ + int ret; + cl_args *args = argcb; + + /* now launch the update of LU22 */ + unsigned i = args->i; + unsigned k = args->k; + unsigned nblocks = args->nblocks; + + debug("utrsmll %d %d\n", i, k); + + /* we did task 21i,k */ + advance_12_21[i*nblocks + k] = DONE; + + unsigned slicey; + for (slicey = i+1; slicey < nblocks; slicey++) + { + /* can we launch 22 i,args->k,slicey ? */ + /* deps : 21 args->k, slicey */ + uint8_t dep; + dep = advance_12_21[i + slicey*nblocks]; + if (dep & DONE) + { + /* perhaps we may schedule the 22 i,args->k,slicey task */ + uint8_t u = STARPU_ATOMIC_OR(&advance_22[i*nblocks*nblocks + slicey*nblocks + k], STARTED); + if ((u & STARTED) == 0) + { + /* update that square matrix */ + cl_args *ugemma = malloc(sizeof(cl_args)); + + struct starpu_task *task_gemm = starpu_task_create(); + task_gemm->callback_func = dw_callback_v2_codelet_update_gemm; + task_gemm->callback_arg = ugemma; + task_gemm->cl = &cl_gemm; + task_gemm->cl_arg = ugemma; + task_gemm->cl_arg_size = sizeof(*ugemma); + + ugemma->k = i; + ugemma->i = k; + ugemma->j = slicey; + ugemma->dataA = args->dataA; + ugemma->nblocks = nblocks; + + task_gemm->handles[0] = starpu_data_get_sub_data(args->dataA, 2, ugemma->i, ugemma->k); + task_gemm->handles[1] = starpu_data_get_sub_data(args->dataA, 2, ugemma->k, ugemma->j); + task_gemm->handles[2] = starpu_data_get_sub_data(args->dataA, 2, ugemma->i, ugemma->j); + + /* schedule that codelet */ + if (!no_prio && (slicey == i+1)) + task_gemm->priority = STARPU_MAX_PRIO; + + debug("utrsmll %d %d start ugemm %d %d %d\n", i, k, i, k, slicey); + ret = starpu_task_submit(task_gemm); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + } + } + free(argcb); +} + +void dw_callback_v2_codelet_update_trsm_ru(void *argcb) +{ + int ret; + cl_args *args = argcb; + + /* now launch the update of LU22 */ + unsigned i = args->i; + unsigned k = args->k; + unsigned nblocks = args->nblocks; + + /* we did task 21i,k */ + advance_12_21[i + k*nblocks] = DONE; + + debug("utrsmru %d %d\n", i, k); + + unsigned slicex; + for (slicex = i+1; slicex < nblocks; slicex++) + { + /* can we launch 22 i,slicex,k ? */ + /* deps : 12 slicex k */ + uint8_t dep; + dep = advance_12_21[i*nblocks + slicex]; + if (dep & DONE) + { + /* perhaps we may schedule the 22 i,args->k,slicey task */ + uint8_t u = STARPU_ATOMIC_OR(&advance_22[i*nblocks*nblocks + k*nblocks + slicex], STARTED); + if ((u & STARTED) == 0) + { + /* update that square matrix */ + cl_args *ugemma = malloc(sizeof(cl_args)); + + struct starpu_task *task_gemm = starpu_task_create(); + task_gemm->callback_func = dw_callback_v2_codelet_update_gemm; + task_gemm->callback_arg = ugemma; + task_gemm->cl = &cl_gemm; + task_gemm->cl_arg = ugemma; + task_gemm->cl_arg_size = sizeof(*ugemma); + + ugemma->k = i; + ugemma->i = slicex; + ugemma->j = k; + ugemma->dataA = args->dataA; + ugemma->nblocks = nblocks; + + task_gemm->handles[0] = starpu_data_get_sub_data(args->dataA, 2, ugemma->i, ugemma->k); + task_gemm->handles[1] = starpu_data_get_sub_data(args->dataA, 2, ugemma->k, ugemma->j); + task_gemm->handles[2] = starpu_data_get_sub_data(args->dataA, 2, ugemma->i, ugemma->j); + + /* schedule that codelet */ + if (!no_prio && (slicex == i+1)) + task_gemm->priority = STARPU_MAX_PRIO; + + debug("utrsmru %d %d start ugemm %d %d %d\n", i, k, i, slicex, k); + ret = starpu_task_submit(task_gemm); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + } + } + free(argcb); +} + +void dw_callback_v2_codelet_update_getrf(void *argcb) +{ + /* in case there remains work, go on */ + cl_args *args = argcb; + + unsigned nblocks = args->nblocks; + unsigned i = args->i; + + debug("ugetrf %d\n", i); + + /* we did task 11k */ + advance_11[i] = DONE; + + if (i == nblocks - 1) + { + /* we are done */ + free(argcb); + return; + } + else + { + /* put new tasks */ + unsigned slice; + for (slice = i + 1; slice < nblocks; slice++) + { + + /* can we launch 12i,slice ? */ + uint8_t deps12; + if (i == 0) + { + deps12 = DONE; + } + else + { + deps12 = advance_22[(i-1)*nblocks*nblocks + slice + i*nblocks]; + } + if (deps12 & DONE) + { + /* we may perhaps launch the task 12i,slice */ + uint8_t u = STARPU_ATOMIC_OR(&advance_12_21[i*nblocks + slice], STARTED); + if ((u & STARTED) == 0) + { + int ret; + + /* we are the only one that should launch that task */ + cl_args *utrsmlla = malloc(sizeof(cl_args)); + + struct starpu_task *task_trsm_ll = starpu_task_create(); + task_trsm_ll->callback_func = dw_callback_v2_codelet_update_trsm_ll; + task_trsm_ll->callback_arg = utrsmlla; + task_trsm_ll->cl = &cl_trsm_ll; + task_trsm_ll->cl_arg = utrsmlla; + task_trsm_ll->cl_arg_size = sizeof(*utrsmlla); + + utrsmlla->i = i; + utrsmlla->k = slice; + utrsmlla->nblocks = args->nblocks; + utrsmlla->dataA = args->dataA; + + task_trsm_ll->handles[0] = starpu_data_get_sub_data(args->dataA, 2, utrsmlla->i, utrsmlla->i); + task_trsm_ll->handles[1] = starpu_data_get_sub_data(args->dataA, 2, utrsmlla->k, utrsmlla->i); + + if (!no_prio && (slice == i +1)) + task_trsm_ll->priority = STARPU_MAX_PRIO; + + debug("ugetrf %d start utrsmll %d %d\n", i, i, slice); + ret = starpu_task_submit(task_trsm_ll); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + } + + /* can we launch 21i,slice ? */ + if (i == 0) + { + deps12 = DONE; + } + else + { + deps12 = advance_22[(i-1)*nblocks*nblocks + slice*nblocks + i]; + } + if (deps12 & DONE) + { + /* we may perhaps launch the task 12i,slice */ + uint8_t u = STARPU_ATOMIC_OR(&advance_12_21[i + slice*nblocks], STARTED); + if ((u & STARTED) == 0) + { + int ret; + + /* we are the only one that should launch that task */ + cl_args *utrsmrua = malloc(sizeof(cl_args)); + + struct starpu_task *task_trsm_ru = starpu_task_create(); + task_trsm_ru->callback_func = dw_callback_v2_codelet_update_trsm_ru; + task_trsm_ru->callback_arg = utrsmrua; + task_trsm_ru->cl = &cl_trsm_ru; + task_trsm_ru->cl_arg = utrsmrua; + task_trsm_ru->cl_arg_size = sizeof(*utrsmrua); + + utrsmrua->i = i; + utrsmrua->k = slice; + utrsmrua->nblocks = args->nblocks; + utrsmrua->dataA = args->dataA; + + task_trsm_ru->handles[0] = starpu_data_get_sub_data(args->dataA, 2, utrsmrua->i, utrsmrua->i); + task_trsm_ru->handles[1] = starpu_data_get_sub_data(args->dataA, 2, utrsmrua->i, utrsmrua->k); + + if (!no_prio && (slice == i +1)) + task_trsm_ru->priority = STARPU_MAX_PRIO; + + debug("ugetrf %d start utrsmru %d %d\n", i, i, slice); + ret = starpu_task_submit(task_trsm_ru); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + } + } + } + free(argcb); +} + + + +/* + * Callbacks + */ + + +void dw_callback_codelet_update_getrf(void *argcb) +{ + /* in case there remains work, go on */ + cl_args *args = argcb; + + if (args->i == args->nblocks - 1) + { + /* we are done */ + free(argcb); + return; + } + else + { + /* put new tasks */ + unsigned nslices; + nslices = args->nblocks - 1 - args->i; + + unsigned *remaining = malloc(sizeof(unsigned)); + *remaining = 2*nslices; + + unsigned slice; + for (slice = args->i + 1; slice < args->nblocks; slice++) + { + int ret; + + /* update slice from utrsmll */ + cl_args *utrsmlla = malloc(sizeof(cl_args)); + + /* update slice from utrsmru */ + cl_args *utrsmrua = malloc(sizeof(cl_args)); + + struct starpu_task *task_trsm_ll = starpu_task_create(); + task_trsm_ll->callback_func = dw_callback_codelet_update_trsm_ll_21; + task_trsm_ll->callback_arg = utrsmlla; + task_trsm_ll->cl = &cl_trsm_ll; + task_trsm_ll->cl_arg = utrsmlla; + task_trsm_ll->cl_arg_size = sizeof(*utrsmlla); + + struct starpu_task *task_trsm_ru = starpu_task_create(); + task_trsm_ru->callback_func = dw_callback_codelet_update_trsm_ll_21; + task_trsm_ru->callback_arg = utrsmrua; + task_trsm_ru->cl = &cl_trsm_ru; + task_trsm_ru->cl_arg = utrsmrua; + task_trsm_ru->cl_arg_size = sizeof(*utrsmrua); + + utrsmlla->i = args->i; + utrsmlla->k = slice; + utrsmlla->nblocks = args->nblocks; + utrsmlla->dataA = args->dataA; + utrsmlla->remaining = remaining; + + utrsmrua->i = args->i; + utrsmrua->k = slice; + utrsmrua->nblocks = args->nblocks; + utrsmrua->dataA = args->dataA; + utrsmrua->remaining = remaining; + + task_trsm_ll->handles[0] = starpu_data_get_sub_data(args->dataA, 2, utrsmlla->i, utrsmlla->i); + task_trsm_ll->handles[1] = starpu_data_get_sub_data(args->dataA, 2, utrsmlla->k, utrsmlla->i); + + task_trsm_ru->handles[0] = starpu_data_get_sub_data(args->dataA, 2, utrsmrua->i, utrsmrua->i); + task_trsm_ru->handles[1] = starpu_data_get_sub_data(args->dataA, 2, utrsmrua->i, utrsmrua->k); + + ret = starpu_task_submit(task_trsm_ll); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + ret = starpu_task_submit(task_trsm_ru); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + free(remaining); + } +} + + +void dw_callback_codelet_update_gemm(void *argcb) +{ + cl_args *args = argcb; + unsigned remaining = STARPU_ATOMIC_ADD(args->remaining, (-1)); + ANNOTATE_HAPPENS_BEFORE(args->remaining); + + if (remaining == 0) + { + int ret; + + ANNOTATE_HAPPENS_AFTER(args->remaining); + /* all worker already used the counter */ + free(args->remaining); + + /* we now reduce the LU22 part (recursion appears there) */ + cl_args *ugetrfarg = malloc(sizeof(cl_args)); + + struct starpu_task *task = starpu_task_create(); + task->callback_func = dw_callback_codelet_update_getrf; + task->callback_arg = ugetrfarg; + task->cl = &cl_getrf; + task->cl_arg = ugetrfarg; + task->cl_arg_size = sizeof(*ugetrfarg); + + task->handles[0] = starpu_data_get_sub_data(args->dataA, 2, args->k + 1, args->k + 1); + + ugetrfarg->dataA = args->dataA; + ugetrfarg->i = args->k + 1; + ugetrfarg->nblocks = args->nblocks; + + /* schedule the codelet */ + ret = starpu_task_submit(task); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + free(args); +} + +void dw_callback_codelet_update_trsm_ll_21(void *argcb) +{ + cl_args *args = argcb; + unsigned remaining = STARPU_ATOMIC_ADD(args->remaining, -1); + ANNOTATE_HAPPENS_BEFORE(args->remaining); + + if (remaining == 0) + { + ANNOTATE_HAPPENS_AFTER(args->remaining); + /* now launch the update of LU22 */ + unsigned i = args->i; + unsigned nblocks = args->nblocks; + + /* the number of tasks to be done */ + unsigned *remaining_tasks = malloc(sizeof(unsigned)); + *remaining_tasks = (nblocks - 1 - i)*(nblocks - 1 - i); + + unsigned slicey, slicex; + for (slicey = i+1; slicey < nblocks; slicey++) + { + for (slicex = i+1; slicex < nblocks; slicex++) + { + int ret; + + /* update that square matrix */ + cl_args *ugemma = malloc(sizeof(cl_args)); + + struct starpu_task *task_gemm = starpu_task_create(); + task_gemm->callback_func = dw_callback_codelet_update_gemm; + task_gemm->callback_arg = ugemma; + task_gemm->cl = &cl_gemm; + task_gemm->cl_arg = ugemma; + task_gemm->cl_arg_size = sizeof(*ugemma); + + ugemma->k = i; + ugemma->i = slicex; + ugemma->j = slicey; + ugemma->dataA = args->dataA; + ugemma->nblocks = nblocks; + ugemma->remaining = remaining_tasks; + + task_gemm->handles[0] = starpu_data_get_sub_data(args->dataA, 2, ugemma->i, ugemma->k); + task_gemm->handles[1] = starpu_data_get_sub_data(args->dataA, 2, ugemma->k, ugemma->j); + task_gemm->handles[2] = starpu_data_get_sub_data(args->dataA, 2, ugemma->i, ugemma->j); + + /* schedule that codelet */ + ret = starpu_task_submit(task_gemm); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + } + + free(remaining_tasks); + } +} + + + +/* + * code to bootstrap the factorization + */ + +void dw_codelet_facto(starpu_data_handle_t dataA, unsigned nblocks) +{ + int ret; + cl_args *args = malloc(sizeof(cl_args)); + + args->i = 0; + args->nblocks = nblocks; + args->dataA = dataA; + + start = starpu_timing_now(); + + /* inject a new task with this codelet into the system */ + struct starpu_task *task = starpu_task_create(); + task->callback_func = dw_callback_codelet_update_getrf; + task->callback_arg = args; + task->cl = &cl_getrf; + task->cl_arg = args; + + task->handles[0] = starpu_data_get_sub_data(dataA, 2, 0, 0); + + /* schedule the codelet */ + ret = starpu_task_submit(task); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + starpu_task_wait_for_all(); + + end = starpu_timing_now(); + + double timing = end - start; + + unsigned n = starpu_matrix_get_nx(dataA); + double flop = (2.0f*n*n*n)/3.0f; + + PRINTF("# size\tms\tGFlop/s\n"); + PRINTF("%u\t%.0f\t%.1f\n", n, timing/1000, flop/timing/1000.0f); +} + +void dw_codelet_facto_v2(starpu_data_handle_t dataA, unsigned nblocks) +{ + + advance_11 = calloc(nblocks, sizeof(*advance_11)); + STARPU_ASSERT(advance_11); + + advance_12_21 = calloc(nblocks*nblocks, sizeof(*advance_12_21)); + STARPU_ASSERT(advance_12_21); + + advance_22 = calloc(nblocks*nblocks*nblocks, sizeof(*advance_22)); + STARPU_ASSERT(advance_22); + + cl_args *args = calloc(1, sizeof(cl_args)); + + args->i = 0; + args->nblocks = nblocks; + args->dataA = dataA; + + start = starpu_timing_now(); + + /* inject a new task with this codelet into the system */ + struct starpu_task *task = starpu_task_create(); + task->callback_func = dw_callback_v2_codelet_update_getrf; + task->callback_arg = args; + task->cl = &cl_getrf; + task->cl_arg = args; + task->cl_arg_size = sizeof(*args); + + task->handles[0] = starpu_data_get_sub_data(dataA, 2, 0, 0); + + /* schedule the codelet */ + int ret = starpu_task_submit(task); + if (STARPU_UNLIKELY(ret == -ENODEV)) + { + FPRINTF(stderr, "No worker may execute this task\n"); + exit(0); + } + + starpu_task_wait_for_all(); + + end = starpu_timing_now(); + + double timing = end - start; + + unsigned n = starpu_matrix_get_nx(dataA); + double flop = (2.0f*n*n*n)/3.0f; + + PRINTF("# size\tms\tGFlop/s\n"); + PRINTF("%u\t%.0f\t%.1f\n", n, timing/1000, flop/timing/1000.0f); + + free(advance_11); + free(advance_12_21); + free(advance_22); +} + +void initialize_system(float **A, float **B, unsigned dim, unsigned pinned) +{ + int ret; + + ret = starpu_init(NULL); + if (ret == -ENODEV) + exit(77); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + +#ifdef STARPU_ATLAS + char * symbol_getrf = "lu_model_getrf_atlas"; + char * symbol_trsm_ll = "lu_model_trsm_ll_atlas"; + char * symbol_trsm_ru = "lu_model_trsm_ru_atlas"; + char * symbol_gemm = "lu_model_gemm_atlas"; +#elif defined(STARPU_GOTO) + char * symbol_getrf = "lu_model_getrf_goto"; + char * symbol_trsm_ll = "lu_model_trsm_ll_goto"; + char * symbol_trsm_ru = "lu_model_trsm_ru_goto"; + char * symbol_gemm = "lu_model_gemm_goto"; +#elif defined(STARPU_OPENBLAS) + char * symbol_getrf = "lu_model_getrf_openblas"; + char * symbol_trsm_ll = "lu_model_trsm_ll_openblas"; + char * symbol_trsm_ru = "lu_model_trsm_ru_openblas"; + char * symbol_gemm = "lu_model_gemm_openblas"; +#else + char * symbol_getrf = "lu_model_getrf"; + char * symbol_trsm_ll = "lu_model_trsm_ll"; + char * symbol_trsm_ru = "lu_model_trsm_ru"; + char * symbol_gemm = "lu_model_gemm"; +#endif + initialize_lu_kernels_model(&model_getrf,symbol_getrf,task_getrf_cost,task_getrf_cost_cpu,task_getrf_cost_cuda); + initialize_lu_kernels_model(&model_trsm_ll,symbol_trsm_ll,task_trsm_ll_cost,task_trsm_ll_cost_cpu,task_trsm_ll_cost_cuda); + initialize_lu_kernels_model(&model_trsm_ru,symbol_trsm_ru,task_trsm_ru_cost,task_trsm_ru_cost_cpu,task_trsm_ru_cost_cuda); + initialize_lu_kernels_model(&model_gemm,symbol_gemm,task_gemm_cost,task_gemm_cost_cpu,task_gemm_cost_cuda); + + starpu_cublas_init(); + + if (pinned) + { + starpu_malloc((void **)A, (size_t)dim*dim*sizeof(float)); + starpu_malloc((void **)B, (size_t)dim*sizeof(float)); + } + else + { + *A = malloc((size_t)dim*dim*sizeof(float)); + STARPU_ASSERT(*A); + *B = malloc((size_t)dim*sizeof(float)); + STARPU_ASSERT(*B); + } +} + +void free_system(float *A, float *B, unsigned dim, unsigned pinned) +{ + if (pinned) + { + starpu_free_noflag(A, (size_t)dim*dim*sizeof(float)); + starpu_free_noflag(B, (size_t)dim*sizeof(float)); + } + else + { + free(A); + free(B); + } +} + +void dw_factoLU(float *matA, unsigned size, + unsigned ld, unsigned nblocks, + unsigned version, unsigned _no_prio) +{ + +#ifdef CHECK_RESULTS + FPRINTF(stderr, "Checking results ...\n"); + float *Asaved; + Asaved = malloc((size_t)ld*ld*sizeof(float)); + + memcpy(Asaved, matA, (size_t)ld*ld*sizeof(float)); +#endif + + no_prio = _no_prio; + + starpu_data_handle_t dataA; + + /* monitor and partition the A matrix into blocks : + * one block is now determined by 2 unsigned (i,j) */ + starpu_matrix_data_register(&dataA, STARPU_MAIN_RAM, (uintptr_t)matA, ld, + size, size, sizeof(float)); + + struct starpu_data_filter f = + { + .filter_func = starpu_matrix_filter_vertical_block, + .nchildren = nblocks + }; + + struct starpu_data_filter f2 = + { + .filter_func = starpu_matrix_filter_block, + .nchildren = nblocks + }; + + starpu_data_map_filters(dataA, 2, &f, &f2); + + switch (version) + { + case 1: + dw_codelet_facto(dataA, nblocks); + break; + default: + case 2: + dw_codelet_facto_v2(dataA, nblocks); + break; + } + + /* gather all the data */ + starpu_data_unpartition(dataA, STARPU_MAIN_RAM); + + starpu_data_unregister(dataA); + +#ifdef CHECK_RESULTS + compare_A_LU(Asaved, matA, size, ld); +#endif +} diff --git a/examples/heat/dw_factolu.h b/examples/heat/dw_factolu.h new file mode 100644 index 0000000..f319760 --- /dev/null +++ b/examples/heat/dw_factolu.h @@ -0,0 +1,221 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Thibaut Lambert + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __DW_FACTO_LU_H__ +#define __DW_FACTO_LU_H__ + +#include +#include +#include +#include +#ifdef STARPU_USE_CUDA +#include +#include +#endif + +#include "../common/blas.h" + +#include "lu_kernels_model.h" + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) +#define PRINTF(fmt, ...) do { if (!getenv("STARPU_SSILENT")) {printf(fmt, ## __VA_ARGS__); }} while(0) + +#define BLAS3_FLOP(n1,n2,n3) \ + (2*((uint64_t)n1)*((uint64_t)n2)*((uint64_t)n3)) + +typedef struct +{ + starpu_data_handle_t dataA; + unsigned i; + unsigned j; + unsigned k; + unsigned nblocks; + unsigned *remaining; +} cl_args; + +#ifdef CHECK_RESULTS +static void compare_A_LU(float *A, float *LU, unsigned size, unsigned ld) +{ + unsigned i,j; + float *L; + float *U; + + L = malloc(size*size*sizeof(float)); + U = malloc(size*size*sizeof(float)); + + memset(L, 0, size*size*sizeof(float)); + memset(U, 0, size*size*sizeof(float)); + + /* only keep the lower part */ + for (j = 0; j < size; j++) + { + for (i = 0; i < j; i++) + { + L[j+i*size] = LU[j+i*ld]; + } + + /* diag i = j */ + L[j+j*size] = LU[j+j*ld]; + U[j+j*size] = 1.0f; + + for (i = j+1; i < size; i++) + { + U[j+i*size] = LU[j+i*ld]; + } + } + +#if 0 + /* display L */ + FPRINTF(stdout, "(LU): \n"); + for (j = 0; j < size; j++) + { + for (i = 0; i < size; i++) + { +/* if (i <= j) + { */ + FPRINTF(stdout, "%2.2f\t", LU[j +i*size]); +/* } + else + { + FPRINTF(stdout, ".\t"); + } */ + } + FPRINTF(stdout, "\n"); + } + + + + /* display L */ + FPRINTF(stdout, "L: \n"); + for (j = 0; j < size; j++) + { + for (i = 0; i < size; i++) + { +/* if (i <= j) + { */ + FPRINTF(stdout, "%2.2f\t", L[j +i*size]); +/* } + else + { + FPRINTF(stdout, ".\t"); + } */ + } + FPRINTF(stdout, "\n"); + } + + /* display U */ + FPRINTF(stdout, "U: \n"); + for (j = 0; j < size; j++) + { + for (i = 0; i < size; i++) + { +/* if (i <= j) + { */ + FPRINTF(stdout, "%2.2f\t", U[j +i*size]); +/* } + else + { + FPRINTF(stdout, ".\t"); + } */ + } + FPRINTF(stdout, "\n"); + } + +#endif + + /* now A_err = L, compute L*U */ + STARPU_STRMM("R", "U", "N", "U", size, size, 1.0f, U, size, L, size); + + float max_err = 0.0f; + for (i = 0; i < size ; i++) + { + for (j = 0; j < size; j++) + { + max_err = STARPU_MAX(max_err, fabs(L[j+i*size] - A[j+i*ld])); + } + } + +#if 0 + /* display A */ + FPRINTF(stdout, "A: \n"); + for (j = 0; j < size; j++) + { + for (i = 0; i < size; i++) + { + /* if (i <= j) + { */ + FPRINTF(stdout, "%2.2f\t", A[j +i*size]); + /* } + else + { + FPRINTF(stdout, ".\t"); + } */ + } + FPRINTF(stdout, "\n"); + } + + + /* display LU */ + FPRINTF(stdout, "LU: \n"); + for (j = 0; j < size; j++) + { + for (i = 0; i < size; i++) + { + /* if (i <= j) + { */ + FPRINTF(stdout, "%2.2f\t", L[j +i*size]); + /* } + else + { + FPRINTF(stdout, ".\t"); + } */ + } + FPRINTF(stdout, "\n"); + } +#endif + + FPRINTF(stdout, "max error between A and L*U = %f \n", max_err); +} +#endif /* CHECK_RESULTS */ + +void dw_cpu_codelet_update_getrf(void **, void *); +void dw_cpu_codelet_update_trsm_ll(void **, void *); +void dw_cpu_codelet_update_trsm_ru(void **, void *); +void dw_cpu_codelet_update_gemm(void **, void *); + +#ifdef STARPU_USE_CUDA +void dw_cublas_codelet_update_getrf(void *descr[], void *_args); +void dw_cublas_codelet_update_trsm_ll(void *descr[], void *_args); +void dw_cublas_codelet_update_trsm_ru(void *descr[], void *_args); +void dw_cublas_codelet_update_gemm(void *descr[], void *_args); +#endif + +void dw_callback_codelet_update_getrf(void *); +void dw_callback_codelet_update_trsm_ll_21(void *); +void dw_callback_codelet_update_gemm(void *); + +void dw_callback_v2_codelet_update_getrf(void *); +void dw_callback_v2_codelet_update_trsm_ll(void *); +void dw_callback_v2_codelet_update_trsm_ru(void *); +void dw_callback_v2_codelet_update_gemm(void *); + +extern struct starpu_perfmodel model_getrf; +extern struct starpu_perfmodel model_trsm_ll; +extern struct starpu_perfmodel model_trsm_ru; +extern struct starpu_perfmodel model_gemm; + +#endif /* __DW_FACTO_LU_H__ */ diff --git a/examples/heat/dw_factolu_grain.c b/examples/heat/dw_factolu_grain.c new file mode 100644 index 0000000..a201205 --- /dev/null +++ b/examples/heat/dw_factolu_grain.c @@ -0,0 +1,382 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Thibaut Lambert + * Copyright (C) 2010-2010 Mehdi Juhoor + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * This implements an LU factorization. + * The task graph is submitted through dependency tags. + * It also changes the partitioning during execution: when called first, + * dw_factoLU_grain_inner splits the matrix with a big granularity (nblocks) + * and processes nbigblocks blocks, before calling itself again, to process the + * remainder of the matrix with a smaller granularity. + */ + +#include "dw_factolu.h" + +#define TAG_GETRF(k, prefix) ((starpu_tag_t)((((unsigned long long)(prefix))<<60) | (1ULL<<56) | (unsigned long long)(k))) +#define TAG_TRSM_LL(k,i, prefix) ((starpu_tag_t)((((unsigned long long)(prefix))<<60) | ((2ULL<<56) | (((unsigned long long)(k))<<32) \ + | (unsigned long long)(i)))) +#define TAG_TRSM_RU(k,j, prefix) ((starpu_tag_t)((((unsigned long long)(prefix))<<60) | ((3ULL<<56) | (((unsigned long long)(k))<<32) \ + | (unsigned long long)(j)))) +#define TAG_GEMM(k,i,j, prefix) ((starpu_tag_t)((((unsigned long long)(prefix))<<60) | ((4ULL<<56) | ((unsigned long long)(k)<<32) \ + | ((unsigned long long)(i)<<16) \ + | (unsigned long long)(j)))) + +/* + * Construct the DAG + */ + +static struct starpu_task *create_task(starpu_tag_t id) +{ + struct starpu_task *task = starpu_task_create(); + task->cl_arg = NULL; + + task->use_tag = 1; + task->tag_id = id; + + return task; +} + +static struct starpu_codelet cl_getrf = +{ + .modes = { STARPU_RW }, + .cpu_funcs = {dw_cpu_codelet_update_getrf}, + .cpu_funcs_name = {"dw_cpu_codelet_update_getrf"}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {dw_cublas_codelet_update_getrf}, +#endif + .nbuffers = 1, + .model = &model_getrf +}; + +static struct starpu_task *create_task_getrf(starpu_data_handle_t dataA, unsigned k, unsigned tag_prefix) +{ +/* FPRINTF(stdout, "task 11 k = %d TAG = %llx\n", k, (TAG_GETRF(k))); */ + + struct starpu_task *task = create_task(TAG_GETRF(k, tag_prefix)); + + task->cl = &cl_getrf; + + /* which sub-data is manipulated ? */ + task->handles[0] = starpu_data_get_sub_data(dataA, 2, k, k); + + /* this is an important task */ + task->priority = STARPU_MAX_PRIO; + + /* enforce dependencies ... */ + if (k > 0) + { + starpu_tag_declare_deps(TAG_GETRF(k, tag_prefix), 1, TAG_GEMM(k-1, k, k, tag_prefix)); + } + + return task; +} + +static struct starpu_codelet cl_trsm_ll = +{ + .modes = { STARPU_R, STARPU_RW }, + .cpu_funcs = {dw_cpu_codelet_update_trsm_ll}, + .cpu_funcs_name = {"dw_cpu_codelet_update_trsm_ll"}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {dw_cublas_codelet_update_trsm_ll}, +#endif + .cuda_flags = {STARPU_CUDA_ASYNC}, + .nbuffers = 2, + .model = &model_trsm_ll +}; + +static void create_task_trsm_ll(starpu_data_handle_t dataA, unsigned k, unsigned i, unsigned tag_prefix) +{ + int ret; + +/* FPRINTF(stdout, "task 12 k,i = %d,%d TAG = %llx\n", k,i, TAG_TRSM_LL(k,i)); */ + + struct starpu_task *task = create_task(TAG_TRSM_LL(k, i, tag_prefix)); + + task->cl = &cl_trsm_ll; + + /* which sub-data is manipulated ? */ + task->handles[0] = starpu_data_get_sub_data(dataA, 2, k, k); + task->handles[1] = starpu_data_get_sub_data(dataA, 2, i, k); + + if (i == k+1) + { + task->priority = STARPU_MAX_PRIO; + } + + /* enforce dependencies ... */ + if (k > 0) + { + starpu_tag_declare_deps(TAG_TRSM_LL(k, i, tag_prefix), 2, TAG_GETRF(k, tag_prefix), TAG_GEMM(k-1, i, k, tag_prefix)); + } + else + { + starpu_tag_declare_deps(TAG_TRSM_LL(k, i, tag_prefix), 1, TAG_GETRF(k, tag_prefix)); + } + + ret = starpu_task_submit(task); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); +} + +static struct starpu_codelet cl_trsm_ru = +{ + .modes = { STARPU_R, STARPU_RW }, + .cpu_funcs = {dw_cpu_codelet_update_trsm_ru}, + .cpu_funcs_name = {"dw_cpu_codelet_update_trsm_ru"}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {dw_cublas_codelet_update_trsm_ru}, +#endif + .cuda_flags = {STARPU_CUDA_ASYNC}, + .nbuffers = 2, + .model = &model_trsm_ru +}; + +static void create_task_trsm_ru(starpu_data_handle_t dataA, unsigned k, unsigned j, unsigned tag_prefix) +{ + int ret; + struct starpu_task *task = create_task(TAG_TRSM_RU(k, j, tag_prefix)); + + task->cl = &cl_trsm_ru; + + /* which sub-data is manipulated ? */ + task->handles[0] = starpu_data_get_sub_data(dataA, 2, k, k); + task->handles[1] = starpu_data_get_sub_data(dataA, 2, k, j); + + if (j == k+1) + { + task->priority = STARPU_MAX_PRIO; + } + + /* enforce dependencies ... */ + if (k > 0) + { + starpu_tag_declare_deps(TAG_TRSM_RU(k, j, tag_prefix), 2, TAG_GETRF(k, tag_prefix), TAG_GEMM(k-1, k, j, tag_prefix)); + } + else + { + starpu_tag_declare_deps(TAG_TRSM_RU(k, j, tag_prefix), 1, TAG_GETRF(k, tag_prefix)); + } + + ret = starpu_task_submit(task); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); +} + +static struct starpu_codelet cl_gemm = +{ + .modes = { STARPU_R, STARPU_R, STARPU_RW }, + .cpu_funcs = {dw_cpu_codelet_update_gemm}, + .cpu_funcs_name = {"dw_cpu_codelet_update_gemm"}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {dw_cublas_codelet_update_gemm}, +#endif + .cuda_flags = {STARPU_CUDA_ASYNC}, + .nbuffers = 3, + .model = &model_gemm +}; + +static void create_task_gemm(starpu_data_handle_t dataA, unsigned k, unsigned i, unsigned j, unsigned tag_prefix) +{ + int ret; +/* FPRINTF(stdout, "task 22 k,i,j = %d,%d,%d TAG = %llx\n", k,i,j, TAG_GEMM(k,i,j)); */ + + struct starpu_task *task = create_task(TAG_GEMM(k, i, j, tag_prefix)); + + task->cl = &cl_gemm; + + /* which sub-data is manipulated ? */ + task->handles[0] = starpu_data_get_sub_data(dataA, 2, i, k); + task->handles[1] = starpu_data_get_sub_data(dataA, 2, k, j); + task->handles[2] = starpu_data_get_sub_data(dataA, 2, i, j); + + if ((i == k + 1) && (j == k +1)) + { + task->priority = STARPU_MAX_PRIO; + } + + /* enforce dependencies ... */ + if (k > 0) + { + starpu_tag_declare_deps(TAG_GEMM(k, i, j, tag_prefix), 3, TAG_GEMM(k-1, i, j, tag_prefix), TAG_TRSM_LL(k, i, tag_prefix), TAG_TRSM_RU(k, j, tag_prefix)); + } + else + { + starpu_tag_declare_deps(TAG_GEMM(k, i, j, tag_prefix), 2, TAG_TRSM_LL(k, i, tag_prefix), TAG_TRSM_RU(k, j, tag_prefix)); + } + + ret = starpu_task_submit(task); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); +} + +static void dw_factoLU_grain_inner(float *matA, unsigned size, unsigned inner_size, + unsigned ld, unsigned blocksize, unsigned tag_prefix) +{ + int ret; + /* + * (re)partition data + */ + starpu_data_handle_t dataA; + starpu_matrix_data_register(&dataA, STARPU_MAIN_RAM, (uintptr_t)matA, ld, size, size, sizeof(float)); + + STARPU_ASSERT((size % blocksize) == 0); + STARPU_ASSERT((inner_size % blocksize) == 0); + + unsigned nblocks = size / blocksize; + unsigned maxk = inner_size / blocksize; + + struct starpu_data_filter f = + { + .filter_func = starpu_matrix_filter_vertical_block, + .nchildren = nblocks + }; + + struct starpu_data_filter f2 = + { + .filter_func = starpu_matrix_filter_block, + .nchildren = nblocks + }; + + starpu_data_map_filters(dataA, 2, &f, &f2); + + + /* + * submit tasks + */ + + struct starpu_task *entry_task = NULL; + + /* create all the DAG nodes */ + unsigned i,j,k; + + /* if maxk < nblocks we'll stop before the LU decomposition is totally done */ + for (k = 0; k < maxk; k++) + { + struct starpu_task *task = create_task_getrf(dataA, k, tag_prefix); + + /* we defer the launch of the first task */ + if (k == 0) + { + entry_task = task; + } + else + { + ret = starpu_task_submit(task); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + for (i = k+1; i +#endif + +#ifdef STARPU_USE_CUDA +#include +static const float p1 = 1.0; +static const float m1 = -1.0; +#endif + +unsigned count_getrf_per_worker[STARPU_NMAXWORKERS] = {0}; +unsigned count_trsm_ll_per_worker[STARPU_NMAXWORKERS] = {0}; +unsigned count_trsm_ru_per_worker[STARPU_NMAXWORKERS] = {0}; +unsigned count_gemm_per_worker[STARPU_NMAXWORKERS] = {0}; + +unsigned count_total_per_worker[STARPU_NMAXWORKERS] = {0}; + +unsigned count_getrf_total = 0; +unsigned count_trsm_ll_total = 0; +unsigned count_trsm_ru_total = 0; +unsigned count_gemm_total = 0; + +void display_stat_heat(void) +{ + unsigned nworkers = starpu_worker_get_count(); + + FPRINTF(stderr, "STATS : \n"); + + unsigned worker; + for (worker = 0; worker < nworkers; worker++) + { + count_total_per_worker[worker] = count_getrf_per_worker[worker] + + count_trsm_ll_per_worker[worker] + + count_trsm_ru_per_worker[worker] + + count_gemm_per_worker[worker]; + + count_getrf_total += count_getrf_per_worker[worker]; + count_trsm_ll_total += count_trsm_ll_per_worker[worker]; + count_trsm_ru_total += count_trsm_ru_per_worker[worker]; + count_gemm_total += count_gemm_per_worker[worker]; + } + + FPRINTF(stderr, "\t11 (diagonal block LU)\n"); + for (worker = 0; worker < nworkers; worker++) + { + if (count_total_per_worker[worker]) + { + char name[64]; + starpu_worker_get_name(worker, name, sizeof(name)); + + FPRINTF(stderr, "\t\t%s -> %u / %u (%2.2f %%)\n", name, count_getrf_per_worker[worker], count_getrf_total, (100.0*count_getrf_per_worker[worker])/count_getrf_total); + } + } + + FPRINTF(stderr, "\t12 (TRSM)\n"); + for (worker = 0; worker < nworkers; worker++) + { + if (count_total_per_worker[worker]) + { + char name[64]; + starpu_worker_get_name(worker, name, sizeof(name)); + + FPRINTF(stderr, "\t\t%s -> %u / %u (%2.2f %%)\n", name, count_trsm_ll_per_worker[worker], count_trsm_ll_total, (100.0*count_trsm_ll_per_worker[worker])/count_trsm_ll_total); + } + } + + + FPRINTF(stderr, "\t21 (TRSM)\n"); + for (worker = 0; worker < nworkers; worker++) + { + if (count_total_per_worker[worker]) + { + char name[64]; + starpu_worker_get_name(worker, name, sizeof(name)); + + FPRINTF(stderr, "\t\t%s -> %u / %u (%2.2f %%)\n", name, count_trsm_ru_per_worker[worker], count_trsm_ru_total, (100.0*count_trsm_ru_per_worker[worker])/count_trsm_ru_total); + } + } + + FPRINTF(stderr, "\t22 (SGEMM)\n"); + for (worker = 0; worker < nworkers; worker++) + { + if (count_total_per_worker[worker]) + { + char name[64]; + starpu_worker_get_name(worker, name, sizeof(name)); + + FPRINTF(stderr, "\t\t%s -> %u / %u (%2.2f %%)\n", name, count_gemm_per_worker[worker], count_gemm_total, (100.0*count_gemm_per_worker[worker])/count_gemm_total); + } + } +} + +/* + * GEMM + */ + +static inline void dw_common_cpu_codelet_update_gemm(void *descr[], int s, void *_args) +{ + (void)_args; + float *left = (float *)STARPU_MATRIX_GET_PTR(descr[0]); + float *right = (float *)STARPU_MATRIX_GET_PTR(descr[1]); + float *center = (float *)STARPU_MATRIX_GET_PTR(descr[2]); + + unsigned dx = STARPU_MATRIX_GET_NX(descr[2]); + unsigned dy = STARPU_MATRIX_GET_NY(descr[2]); + unsigned dz = STARPU_MATRIX_GET_NY(descr[0]); + + unsigned ld12 = STARPU_MATRIX_GET_LD(descr[0]); + unsigned ld21 = STARPU_MATRIX_GET_LD(descr[1]); + unsigned ld22 = STARPU_MATRIX_GET_LD(descr[2]); + +#ifdef STARPU_USE_CUDA + cublasStatus_t status; +#endif + + switch (s) + { + case 0: + STARPU_SGEMM("N", "N", dy, dx, dz, + -1.0f, left, ld21, right, ld12, + 1.0f, center, ld22); + break; + +#ifdef STARPU_USE_CUDA + case 1: + status = cublasSgemm(starpu_cublas_get_local_handle(), + CUBLAS_OP_N, CUBLAS_OP_N, + dx, dy, dz, &m1, left, ld21, + right, ld12, &p1, center, ld22); + if (status != CUBLAS_STATUS_SUCCESS) + STARPU_CUBLAS_REPORT_ERROR(status); + + break; +#endif + default: + STARPU_ABORT(); + break; + } +} + +void dw_cpu_codelet_update_gemm(void *descr[], void *_args) +{ + dw_common_cpu_codelet_update_gemm(descr, 0, _args); + + int id = starpu_worker_get_id_check(); + count_gemm_per_worker[id]++; +} + +#ifdef STARPU_USE_CUDA +void dw_cublas_codelet_update_gemm(void *descr[], void *_args) +{ + dw_common_cpu_codelet_update_gemm(descr, 1, _args); + + int id = starpu_worker_get_id_check(); + count_gemm_per_worker[id]++; +} +#endif /* STARPU_USE_CUDA */ + +/* + * TRSM_LL + */ + +static inline void dw_common_codelet_update_trsm_ll(void *descr[], int s, void *_args) +{ + (void)_args; + float *sub11; + float *sub12; + + sub11 = (float *)STARPU_MATRIX_GET_PTR(descr[0]); + sub12 = (float *)STARPU_MATRIX_GET_PTR(descr[1]); + + unsigned ld11 = STARPU_MATRIX_GET_LD(descr[0]); + unsigned ld12 = STARPU_MATRIX_GET_LD(descr[1]); + + unsigned nx12 = STARPU_MATRIX_GET_NX(descr[1]); + unsigned ny12 = STARPU_MATRIX_GET_NY(descr[1]); + +#ifdef STARPU_USE_CUDA + cublasStatus_t status; +#endif + + /* solve L11 U12 = A12 (find U12) */ + switch (s) + { + case 0: + STARPU_STRSM("L", "L", "N", "N", + nx12, ny12, 1.0f, sub11, ld11, sub12, ld12); + break; +#ifdef STARPU_USE_CUDA + case 1: + status = cublasStrsm(starpu_cublas_get_local_handle(), + CUBLAS_SIDE_LEFT, CUBLAS_FILL_MODE_LOWER, CUBLAS_OP_N, CUBLAS_DIAG_NON_UNIT, + ny12, nx12, + &p1, sub11, ld11, sub12, ld12); + if (status != CUBLAS_STATUS_SUCCESS) + STARPU_CUBLAS_REPORT_ERROR(status); + + break; +#endif + default: + STARPU_ABORT(); + break; + } +} + +void dw_cpu_codelet_update_trsm_ll(void *descr[], void *_args) +{ + dw_common_codelet_update_trsm_ll(descr, 0, _args); + + int id = starpu_worker_get_id_check(); + count_trsm_ll_per_worker[id]++; +} + +#ifdef STARPU_USE_CUDA +void dw_cublas_codelet_update_trsm_ll(void *descr[], void *_args) +{ + dw_common_codelet_update_trsm_ll(descr, 1, _args); + + int id = starpu_worker_get_id_check(); + count_trsm_ll_per_worker[id]++; +} +#endif /* STARPU_USE_CUDA */ + +/* + * TRSM_RU + */ + +static inline void dw_common_codelet_update_trsm_ru(void *descr[], int s, void *_args) +{ + (void)_args; + float *sub11; + float *sub21; + + sub11 = (float *)STARPU_MATRIX_GET_PTR(descr[0]); + sub21 = (float *)STARPU_MATRIX_GET_PTR(descr[1]); + + unsigned ld11 = STARPU_MATRIX_GET_LD(descr[0]); + unsigned ld21 = STARPU_MATRIX_GET_LD(descr[1]); + + unsigned nx21 = STARPU_MATRIX_GET_NX(descr[1]); + unsigned ny21 = STARPU_MATRIX_GET_NY(descr[1]); + +#ifdef STARPU_USE_CUDA + cublasStatus_t status; +#endif + + switch (s) + { + case 0: + STARPU_STRSM("R", "U", "N", "U", nx21, ny21, 1.0f, sub11, ld11, sub21, ld21); + break; +#ifdef STARPU_USE_CUDA + case 1: + status = cublasStrsm(starpu_cublas_get_local_handle(), + CUBLAS_SIDE_RIGHT, CUBLAS_FILL_MODE_UPPER, CUBLAS_OP_N, CUBLAS_DIAG_UNIT, + ny21, nx21, &p1, sub11, ld11, sub21, ld21); + if (status != CUBLAS_STATUS_SUCCESS) + STARPU_CUBLAS_REPORT_ERROR(status); + + break; +#endif + default: + STARPU_ABORT(); + break; + } +} + +void dw_cpu_codelet_update_trsm_ru(void *descr[], void *_args) +{ + dw_common_codelet_update_trsm_ru(descr, 0, _args); + + int id = starpu_worker_get_id_check(); + count_trsm_ru_per_worker[id]++; +} + +#ifdef STARPU_USE_CUDA +void dw_cublas_codelet_update_trsm_ru(void *descr[], void *_args) +{ + dw_common_codelet_update_trsm_ru(descr, 1, _args); + + int id = starpu_worker_get_id_check(); + count_trsm_ru_per_worker[id]++; +} +#endif + +/* + * GETRF + */ + +static inline void debug_print(float *tab, unsigned ld, unsigned n) +{ + unsigned j,i; + for (j = 0; j < n; j++) + { + for (i = 0; i < n; i++) + { + FPRINTF(stderr, "%2.2f\t", tab[(size_t)j+(size_t)i*ld]); + } + FPRINTF(stderr, "\n"); + } + + FPRINTF(stderr, "\n"); +} + +static inline void dw_common_codelet_update_getrf(void *descr[], int s, void *_args) +{ + (void)_args; + float *sub11; + + sub11 = (float *)STARPU_MATRIX_GET_PTR(descr[0]); + + unsigned long nx = STARPU_MATRIX_GET_NX(descr[0]); + unsigned long ld = STARPU_MATRIX_GET_LD(descr[0]); + + unsigned long z; + +#ifdef STARPU_USE_CUDA + cudaStream_t stream; + cublasStatus_t status; +#endif + + switch (s) + { + case 0: + for (z = 0; z < nx; z++) + { + float pivot; + pivot = sub11[z+z*ld]; + +#ifdef STARPU_HAVE_VALGRIND_H + if (RUNNING_ON_VALGRIND) + { + if (fpclassify(pivot) == FP_ZERO) + /* Running in valgrind, don't care about the result */ + pivot = 1.0f; + } + else +#endif + STARPU_ASSERT(fpclassify(pivot) != FP_ZERO); + + STARPU_SSCAL(nx - z - 1, (1.0f/pivot), &sub11[z+(z+1)*ld], ld); + + STARPU_SGER(nx - z - 1, nx - z - 1, -1.0f, + &sub11[z+(z+1)*ld], ld, + &sub11[(z+1)+z*ld], 1, + &sub11[(z+1) + (z+1)*ld],ld); + } + break; +#ifdef STARPU_USE_CUDA + case 1: + /* TODO: Use cusolver */ + stream = starpu_cuda_get_local_stream(); + for (z = 0; z < nx; z++) + { + float pivot; + cudaMemcpyAsync(&pivot, &sub11[z+z*ld], sizeof(float), cudaMemcpyDeviceToHost, stream); + cudaStreamSynchronize(stream); + +#ifdef STARPU_HAVE_VALGRIND_H + if (RUNNING_ON_VALGRIND) + { + if (fpclassify(pivot) == FP_ZERO) + /* Running in valgrind, don't care about the result */ + pivot = 1.0f; + } + else +#endif + STARPU_ASSERT(fpclassify(pivot) != FP_ZERO); + + float scal = 1.0f/pivot; + + status = cublasSscal(starpu_cublas_get_local_handle(), + nx - z - 1, &scal, &sub11[z+(z+1)*ld], ld); + if (status != CUBLAS_STATUS_SUCCESS) + STARPU_CUBLAS_REPORT_ERROR(status); + + status = cublasSger(starpu_cublas_get_local_handle(), + nx - z - 1, nx - z - 1, &m1, + &sub11[z+(z+1)*ld], ld, + &sub11[(z+1)+z*ld], 1, + &sub11[(z+1) + (z+1)*ld],ld); + if (status != CUBLAS_STATUS_SUCCESS) + STARPU_CUBLAS_REPORT_ERROR(status); + } + + cudaStreamSynchronize(stream); + + break; +#endif + default: + STARPU_ABORT(); + break; + } +} + + +void dw_cpu_codelet_update_getrf(void *descr[], void *_args) +{ + dw_common_codelet_update_getrf(descr, 0, _args); + + int id = starpu_worker_get_id_check(); + count_getrf_per_worker[id]++; +} + +#ifdef STARPU_USE_CUDA +void dw_cublas_codelet_update_getrf(void *descr[], void *_args) +{ + dw_common_codelet_update_getrf(descr, 1, _args); + + int id = starpu_worker_get_id_check(); + count_getrf_per_worker[id]++; +} +#endif /* STARPU_USE_CUDA */ diff --git a/examples/heat/dw_factolu_tag.c b/examples/heat/dw_factolu_tag.c new file mode 100644 index 0000000..0bd0574 --- /dev/null +++ b/examples/heat/dw_factolu_tag.c @@ -0,0 +1,335 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Thibaut Lambert + * Copyright (C) 2010-2010 Mehdi Juhoor + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * This implements an LU factorization. + * The task graph is submitted through dependency tags. + */ + +#include "dw_factolu.h" + +#define TAG_GETRF(k) ((starpu_tag_t)((1ULL<<60) | (unsigned long long)(k))) +#define TAG_TRSM_LL(k,i) ((starpu_tag_t)(((2ULL<<60) | (((unsigned long long)(k))<<32) \ + | (unsigned long long)(i)))) +#define TAG_TRSM_RU(k,j) ((starpu_tag_t)(((3ULL<<60) | (((unsigned long long)(k))<<32) \ + | (unsigned long long)(j)))) +#define TAG_GEMM(k,i,j) ((starpu_tag_t)(((4ULL<<60) | ((unsigned long long)(k)<<32) \ + | ((unsigned long long)(i)<<16) \ + | (unsigned long long)(j)))) + +static unsigned no_prio = 0; + +/* + * Construct the DAG + */ + +static struct starpu_task *create_task(starpu_tag_t id) +{ + struct starpu_task *task = starpu_task_create(); + task->cl_arg = NULL; + + task->use_tag = 1; + task->tag_id = id; + + return task; +} + +static struct starpu_codelet cl_getrf = +{ + .modes = { STARPU_RW }, + .cpu_funcs = {dw_cpu_codelet_update_getrf}, + .cpu_funcs_name = {"dw_cpu_codelet_update_getrf"}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {dw_cublas_codelet_update_getrf}, +#endif + .nbuffers = 1, + .model = &model_getrf +}; + +static struct starpu_task *create_task_getrf(starpu_data_handle_t dataA, unsigned k) +{ +/* printf("task 11 k = %d TAG = %llx\n", k, (TAG_GETRF(k))); */ + + struct starpu_task *task = create_task(TAG_GETRF(k)); + + task->cl = &cl_getrf; + + /* which sub-data is manipulated ? */ + task->handles[0] = starpu_data_get_sub_data(dataA, 2, k, k); + + /* this is an important task */ + if (!no_prio) + task->priority = STARPU_MAX_PRIO; + + /* enforce dependencies ... */ + if (k > 0) + { + starpu_tag_declare_deps(TAG_GETRF(k), 1, TAG_GEMM(k-1, k, k)); + } + + return task; +} + +static struct starpu_codelet cl_trsm_ll = +{ + .modes = { STARPU_R, STARPU_RW }, + .cpu_funcs = {dw_cpu_codelet_update_trsm_ll}, + .cpu_funcs_name = {"dw_cpu_codelet_update_trsm_ll"}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {dw_cublas_codelet_update_trsm_ll}, +#endif + .nbuffers = 2, + .model = &model_trsm_ll +}; + +static void create_task_trsm_ll(starpu_data_handle_t dataA, unsigned k, unsigned i) +{ + int ret; + +/* printf("task 12 k,i = %d,%d TAG = %llx\n", k,i, TAG_TRSM_LL(k,i)); */ + + struct starpu_task *task = create_task(TAG_TRSM_LL(k, i)); + + task->cl = &cl_trsm_ll; + + /* which sub-data is manipulated ? */ + task->handles[0] = starpu_data_get_sub_data(dataA, 2, k, k); + task->handles[1] = starpu_data_get_sub_data(dataA, 2, i, k); + + if (!no_prio && (i == k+1)) + { + task->priority = STARPU_MAX_PRIO; + } + + /* enforce dependencies ... */ + if (k > 0) + { + starpu_tag_declare_deps(TAG_TRSM_LL(k, i), 2, TAG_GETRF(k), TAG_GEMM(k-1, i, k)); + } + else + { + starpu_tag_declare_deps(TAG_TRSM_LL(k, i), 1, TAG_GETRF(k)); + } + + ret = starpu_task_submit(task); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); +} + +static struct starpu_codelet cl_trsm_ru = +{ + .modes = { STARPU_R, STARPU_RW }, + .cpu_funcs = {dw_cpu_codelet_update_trsm_ru}, + .cpu_funcs_name = {"dw_cpu_codelet_update_trsm_ru"}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {dw_cublas_codelet_update_trsm_ru}, +#endif + .nbuffers = 2, + .model = &model_trsm_ru +}; + +static void create_task_trsm_ru(starpu_data_handle_t dataA, unsigned k, unsigned j) +{ + int ret; + struct starpu_task *task = create_task(TAG_TRSM_RU(k, j)); + + task->cl = &cl_trsm_ru; + + /* which sub-data is manipulated ? */ + task->handles[0] = starpu_data_get_sub_data(dataA, 2, k, k); + task->handles[1] = starpu_data_get_sub_data(dataA, 2, k, j); + + if (!no_prio && (j == k+1)) + { + task->priority = STARPU_MAX_PRIO; + } + + /* enforce dependencies ... */ + if (k > 0) + { + starpu_tag_declare_deps(TAG_TRSM_RU(k, j), 2, TAG_GETRF(k), TAG_GEMM(k-1, k, j)); + } + else + { + starpu_tag_declare_deps(TAG_TRSM_RU(k, j), 1, TAG_GETRF(k)); + } + + ret = starpu_task_submit(task); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); +} + +static struct starpu_codelet cl_gemm = +{ + .modes = { STARPU_R, STARPU_R, STARPU_RW }, + .cpu_funcs = {dw_cpu_codelet_update_gemm}, + .cpu_funcs_name = {"dw_cpu_codelet_update_gemm"}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {dw_cublas_codelet_update_gemm}, +#endif + .nbuffers = 3, + .model = &model_gemm +}; + +static void create_task_gemm(starpu_data_handle_t dataA, unsigned k, unsigned i, unsigned j) +{ + int ret; + +/* printf("task 22 k,i,j = %d,%d,%d TAG = %llx\n", k,i,j, TAG_GEMM(k,i,j)); */ + + struct starpu_task *task = create_task(TAG_GEMM(k, i, j)); + + task->cl = &cl_gemm; + + /* which sub-data is manipulated ? */ + task->handles[0] = starpu_data_get_sub_data(dataA, 2, i, k); + task->handles[1] = starpu_data_get_sub_data(dataA, 2, k, j); + task->handles[2] = starpu_data_get_sub_data(dataA, 2, i, j); + + if (!no_prio && (i == k + 1) && (j == k +1)) + { + task->priority = STARPU_MAX_PRIO; + } + + /* enforce dependencies ... */ + if (k > 0) + { + starpu_tag_declare_deps(TAG_GEMM(k, i, j), 3, TAG_GEMM(k-1, i, j), TAG_TRSM_LL(k, i), TAG_TRSM_RU(k, j)); + } + else + { + starpu_tag_declare_deps(TAG_GEMM(k, i, j), 2, TAG_TRSM_LL(k, i), TAG_TRSM_RU(k, j)); + } + + ret = starpu_task_submit(task); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); +} + +/* + * code to bootstrap the factorization + */ + +static void dw_codelet_facto_v3(starpu_data_handle_t dataA, unsigned nblocks) +{ + int ret; + + double start; + double end; + + struct starpu_task *entry_task = NULL; + + /* create all the DAG nodes */ + unsigned i,j,k; + + for (k = 0; k < nblocks; k++) + { + struct starpu_task *task = create_task_getrf(dataA, k); + + /* we defer the launch of the first task */ + if (k == 0) + { + entry_task = task; + } + else + { + ret = starpu_task_submit(task); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + for (i = k+1; iuse_tag = 1; + task->tag_id = id; + + return task; +} + +static void create_data(float **_nzvalA, float **_vecb, float **_vecx, uint32_t *_nnz, uint32_t *_nrow, uint32_t **_colind, uint32_t **_rowptr) +{ + /* we need a sparse symmetric (definite positive ?) matrix and a "dense" vector */ + + /* example of 3-band matrix */ + float *nzval; + uint32_t nnz; + uint32_t *colind; + uint32_t *rowptr; + + nnz = 3*_size-2; + + nzval = malloc(nnz*sizeof(float)); + colind = malloc(nnz*sizeof(uint32_t)); + rowptr = malloc(_size*sizeof(uint32_t)); + + assert(nzval); + assert(colind); + assert(rowptr); + + + /* fill the matrix */ + unsigned row; + unsigned pos = 0; + for (row = 0; row < _size; row++) + { + rowptr[row] = pos; + + if (row > 0) + { + nzval[pos] = 1.0f; + colind[pos] = row-1; + pos++; + } + + nzval[pos] = 5.0f; + colind[pos] = row; + pos++; + + if (row < _size - 1) + { + nzval[pos] = 1.0f; + colind[pos] = row+1; + pos++; + } + } + + *_nnz = nnz; + *_nrow = _size; + *_nzvalA = nzval; + *_colind = colind; + *_rowptr = rowptr; + + STARPU_ASSERT(pos == nnz); + + /* initiate the 2 vectors */ + float *invec, *outvec; + invec = malloc(_size*sizeof(float)); + assert(invec); + + outvec = malloc(_size*sizeof(float)); + assert(outvec); + + /* fill those */ + unsigned ind; + for (ind = 0; ind < _size; ind++) + { + invec[ind] = 2.0f; + outvec[ind] = 0.0f; + } + + *_vecb = invec; + *_vecx = outvec; +} + +void init_problem(void) +{ + /* create the sparse input matrix */ + float *nzval; + float *vecb; + float *vecx; + uint32_t nnz; + uint32_t nrow; + uint32_t *colind; + uint32_t *rowptr; + + create_data(&nzval, &vecb, &vecx, &nnz, &nrow, &colind, &rowptr); + + conjugate_gradient(nzval, vecb, vecx, nnz, nrow, colind, rowptr); +} + +/* + * cg initialization phase + */ + +static struct starpu_codelet cl1 = +{ + .cpu_funcs = { cpu_codelet_func_1 }, + .cpu_funcs_name = { "cpu_codelet_func_1" }, + .nbuffers = 4, + .modes = { STARPU_R, STARPU_R, STARPU_W, STARPU_R }, +}; + +static struct starpu_codelet cl2 = +{ + .cpu_funcs = { cpu_codelet_func_2 }, + .cpu_funcs_name = { "cpu_codelet_func_2" }, + .nbuffers = 2, + .modes = { STARPU_W, STARPU_R }, +}; + +static struct starpu_codelet cl3 = +{ + .cpu_funcs = { cpu_codelet_func_3 }, + .cpu_funcs_name = { "cpu_codelet_func_3" }, +#ifdef STARPU_USE_CUDA + .cuda_funcs = { cublas_codelet_func_3 }, +#endif + .nbuffers = 1, + .modes = { STARPU_R }, +}; + +void init_cg(struct cg_problem *problem) +{ + int ret; + + problem->i = 0; + + /* r = b - A x */ + struct starpu_task *task1 = create_task(1UL); + task1->cl = &cl1; + task1->handles[0] = problem->ds_matrixA; + task1->handles[1] = problem->ds_vecx; + task1->handles[2] = problem->ds_vecr; + task1->handles[3] = problem->ds_vecb; + + /* d = r */ + struct starpu_task *task2 = create_task(2UL); + task2->cl = &cl2; + task2->handles[0] = problem->ds_vecd; + task2->handles[1] = problem->ds_vecr; + + starpu_tag_declare_deps((starpu_tag_t)2UL, 1, (starpu_tag_t)1UL); + + /* delta_new = trans(r) r */ + struct starpu_task *task3 = create_task(3UL); + task3->cl = &cl3; + task3->cl_arg = problem; + task3->cl_arg_size = sizeof(*problem); + task3->handles[0] = problem->ds_vecr; + + task3->callback_func = iteration_cg; + task3->callback_arg = problem; + + /* XXX 3 should only depend on 1 ... */ + starpu_tag_declare_deps((starpu_tag_t)3UL, 1, (starpu_tag_t)2UL); + + /* launch the computation now */ + ret = starpu_task_submit(task1); + if (STARPU_UNLIKELY(ret == -ENODEV)) + { + FPRINTF(stderr, "No worker may execute this task\n"); + exit(0); + } + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + ret = starpu_task_submit(task2); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + ret = starpu_task_submit(task3); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); +} + +/* + * the inner iteration of the cg algorithm + * the codelet code launcher is its own callback ! + */ + +static struct starpu_codelet cl4 = +{ + .cpu_funcs = { cpu_codelet_func_4 }, + .cpu_funcs_name = { "cpu_codelet_func_4" }, + .nbuffers = 3, + .modes = { STARPU_R, STARPU_R, STARPU_W }, +}; + +static struct starpu_codelet cl5 = +{ + .cpu_funcs = { cpu_codelet_func_5 }, + .cpu_funcs_name = { "cpu_codelet_func_5" }, +#ifdef STARPU_USE_CUDA + .cuda_funcs = { cublas_codelet_func_5 }, +#endif + .nbuffers = 2, + .modes = { STARPU_R, STARPU_R }, +}; + +static struct starpu_codelet cl6 = +{ + .cpu_funcs = { cpu_codelet_func_6 }, + .cpu_funcs_name = { "cpu_codelet_func_6" }, +#ifdef STARPU_USE_CUDA + .cuda_funcs = { cublas_codelet_func_6 }, + .cuda_flags = { STARPU_CUDA_ASYNC }, +#endif + .nbuffers = 2, + .modes = { STARPU_RW, STARPU_R }, +}; + +static struct starpu_codelet cl7 = +{ + .cpu_funcs = { cpu_codelet_func_7 }, + .cpu_funcs_name = { "cpu_codelet_func_7" }, +#ifdef STARPU_USE_CUDA + .cuda_funcs = { cublas_codelet_func_7 }, + .cuda_flags = { STARPU_CUDA_ASYNC }, +#endif + .nbuffers = 2, + .modes = { STARPU_RW, STARPU_R }, +}; + +static struct starpu_codelet cl8 = +{ + .cpu_funcs = { cpu_codelet_func_8 }, + .cpu_funcs_name = { "cpu_codelet_func_8" }, +#ifdef STARPU_USE_CUDA + .cuda_funcs = { cublas_codelet_func_8 }, +#endif + .nbuffers = 1, + .modes = { STARPU_R }, +}; + +static struct starpu_codelet cl9 = +{ + .cpu_funcs = { cpu_codelet_func_9 }, + .cpu_funcs_name = { "cpu_codelet_func_9" }, +#ifdef STARPU_USE_CUDA + .cuda_funcs = { cublas_codelet_func_9 }, + .cuda_flags = { STARPU_CUDA_ASYNC }, +#endif + .nbuffers = 2, + .modes = { STARPU_RW, STARPU_R }, +}; + +void launch_new_cg_iteration(struct cg_problem *problem) +{ + int ret; + + unsigned iter = problem->i; + + unsigned long long maskiter = ((unsigned long long)iter*1024); + + /* q = A d */ + struct starpu_task *task4 = create_task(maskiter | 4UL); + task4->cl = &cl4; + task4->handles[0] = problem->ds_matrixA; + task4->handles[1] = problem->ds_vecd; + task4->handles[2] = problem->ds_vecq; + + /* alpha = delta_new / (trans(d) q)*/ + struct starpu_task *task5 = create_task(maskiter | 5UL); + task5->cl = &cl5; + task5->cl_arg = problem; + task5->cl_arg_size = sizeof(*problem); + task5->handles[0] = problem->ds_vecd; + task5->handles[1] = problem->ds_vecq; + + starpu_tag_declare_deps((starpu_tag_t)(maskiter | 5UL), 1, (starpu_tag_t)(maskiter | 4UL)); + + /* x = x + alpha d */ + struct starpu_task *task6 = create_task(maskiter | 6UL); + task6->cl = &cl6; + task6->cl_arg = problem; + task6->cl_arg_size = sizeof(*problem); + task6->handles[0] = problem->ds_vecx; + task6->handles[1] = problem->ds_vecd; + + starpu_tag_declare_deps((starpu_tag_t)(maskiter | 6UL), 1, (starpu_tag_t)(maskiter | 5UL)); + + /* r = r - alpha q */ + struct starpu_task *task7 = create_task(maskiter | 7UL); + task7->cl = &cl7; + task7->cl_arg = problem; + task7->cl_arg_size = sizeof(*problem); + task7->handles[0] = problem->ds_vecr; + task7->handles[1] = problem->ds_vecq; + + starpu_tag_declare_deps((starpu_tag_t)(maskiter | 7UL), 1, (starpu_tag_t)(maskiter | 6UL)); + + /* update delta_* and compute beta */ + struct starpu_task *task8 = create_task(maskiter | 8UL); + task8->cl = &cl8; + task8->cl_arg = problem; + task8->cl_arg_size = sizeof(*problem); + task8->handles[0] = problem->ds_vecr; + + starpu_tag_declare_deps((starpu_tag_t)(maskiter | 8UL), 1, (starpu_tag_t)(maskiter | 7UL)); + + /* d = r + beta d */ + struct starpu_task *task9 = create_task(maskiter | 9UL); + task9->cl = &cl9; + task9->cl_arg = problem; + task9->cl_arg_size = sizeof(*problem); + task9->handles[0] = problem->ds_vecd; + task9->handles[1] = problem->ds_vecr; + + starpu_tag_declare_deps((starpu_tag_t)(maskiter | 9UL), 1, (starpu_tag_t)(maskiter | 8UL)); + + task9->callback_func = iteration_cg; + task9->callback_arg = problem; + + /* launch the computation now */ + ret = starpu_task_submit(task4); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + ret = starpu_task_submit(task5); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + ret = starpu_task_submit(task6); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + ret = starpu_task_submit(task7); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + ret = starpu_task_submit(task8); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + ret = starpu_task_submit(task9); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); +} + +void iteration_cg(void *problem) +{ + struct cg_problem *pb = problem; + + FPRINTF(stdout, "i : %d (MAX %d)\n\tdelta_new %f (%f)\n", pb->i, MAXITER, pb->delta_new, sqrt(pb->delta_new / pb->size)); + + if ((pb->i < MAXITER) && + (pb->delta_new > pb->epsilon)) + { + if (pb->i % 1000 == 0) + FPRINTF(stdout, "i : %d\n\tdelta_new %f (%f)\n", pb->i, pb->delta_new, sqrt(pb->delta_new / pb->size)); + + pb->i++; + + /* we did not reach the stop condition yet */ + launch_new_cg_iteration(problem); + } + else + { + /* we may stop */ + FPRINTF(stdout, "We are done ... after %d iterations \n", pb->i - 1); + FPRINTF(stdout, "i : %d\n\tdelta_new %2.5f\n", pb->i, pb->delta_new); + sem_post(pb->sem); + } +} + +/* + * initializing the problem + */ + +void conjugate_gradient(float *nzvalA, float *vecb, float *vecx, uint32_t nnz, + unsigned nrow, uint32_t *colind, uint32_t *rowptr) +{ + /* first register all the data structures to StarPU */ + + starpu_data_handle_t ds_matrixA; + starpu_data_handle_t ds_vecx, ds_vecb; + starpu_data_handle_t ds_vecr, ds_vecd, ds_vecq; + + /* first the user-allocated data */ + starpu_csr_data_register(&ds_matrixA, STARPU_MAIN_RAM, nnz, nrow, + (uintptr_t)nzvalA, colind, rowptr, 0, sizeof(float)); + starpu_vector_data_register(&ds_vecx, STARPU_MAIN_RAM, (uintptr_t)vecx, nrow, sizeof(float)); + starpu_vector_data_register(&ds_vecb, STARPU_MAIN_RAM, (uintptr_t)vecb, nrow, sizeof(float)); + + /* then allocate the algorithm intern data */ + float *ptr_vecr, *ptr_vecd, *ptr_vecq; + + unsigned i; + starpu_malloc((void **)&ptr_vecr, nrow*sizeof(float)); + starpu_malloc((void **)&ptr_vecd, nrow*sizeof(float)); + starpu_malloc((void **)&ptr_vecq, nrow*sizeof(float)); + + for (i = 0; i < nrow; i++) + { + ptr_vecr[i] = 0.0f; + ptr_vecd[i] = 0.0f; + ptr_vecq[i] = 0.0f; + } + + FPRINTF(stdout, "nrow = %u \n", nrow); + + /* and register them as well */ + starpu_vector_data_register(&ds_vecr, STARPU_MAIN_RAM, (uintptr_t)ptr_vecr, nrow, sizeof(float)); + starpu_vector_data_register(&ds_vecd, STARPU_MAIN_RAM, (uintptr_t)ptr_vecd, nrow, sizeof(float)); + starpu_vector_data_register(&ds_vecq, STARPU_MAIN_RAM, (uintptr_t)ptr_vecq, nrow, sizeof(float)); + + /* we now have the complete problem */ + struct cg_problem problem; + + problem.ds_matrixA = ds_matrixA; + problem.ds_vecx = ds_vecx; + problem.ds_vecb = ds_vecb; + problem.ds_vecr = ds_vecr; + problem.ds_vecd = ds_vecd; + problem.ds_vecq = ds_vecq; + + problem.epsilon = EPSILON; + problem.size = nrow; + problem.delta_old = 1.0; + problem.delta_new = 1.0; /* just to make sure we do at least one iteration */ + + /* we need a semaphore to synchronize with callbacks */ + sem_t sem; + sem_init(&sem, 0, 0U); + problem.sem = &sem; + + init_cg(&problem); + + sem_wait(&sem); + sem_destroy(&sem); + + starpu_task_wait_for_all(); + + print_results(vecx, nrow); + + starpu_data_unregister(ds_matrixA); + starpu_data_unregister(ds_vecx); + starpu_data_unregister(ds_vecb); + starpu_data_unregister(ds_vecr); + starpu_data_unregister(ds_vecd); + starpu_data_unregister(ds_vecq); + + starpu_free_noflag(ptr_vecr, nrow*sizeof(float)); + starpu_free_noflag(ptr_vecd, nrow*sizeof(float)); + starpu_free_noflag(ptr_vecq, nrow*sizeof(float)); +} + + +void do_conjugate_gradient(float *nzvalA, float *vecb, float *vecx, uint32_t nnz, + unsigned nrow, uint32_t *colind, uint32_t *rowptr) +{ + /* start the runtime */ + int ret; + + ret = starpu_init(NULL); + if (ret == -ENODEV) + exit(77); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + starpu_cublas_init(); + + conjugate_gradient(nzvalA, vecb, vecx, nnz, nrow, colind, rowptr); + + starpu_shutdown(); +} diff --git a/examples/heat/dw_sparse_cg.h b/examples/heat/dw_sparse_cg.h new file mode 100644 index 0000000..5026899 --- /dev/null +++ b/examples/heat/dw_sparse_cg.h @@ -0,0 +1,136 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __DW_SPARSE_CG_H__ +#define __DW_SPARSE_CG_H__ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "../common/blas.h" + +#define MAXITER 100000 +#define EPSILON 0.0000001f + +/* code parameters */ +static uint32_t _size = 33554432; +static unsigned _usecpu = 0; +static unsigned _blocks = 512; +static unsigned _grids = 8; + +struct cg_problem +{ + starpu_data_handle_t ds_matrixA; + starpu_data_handle_t ds_vecx; + starpu_data_handle_t ds_vecb; + starpu_data_handle_t ds_vecr; + starpu_data_handle_t ds_vecd; + starpu_data_handle_t ds_vecq; + + sem_t *sem; + + float alpha; + float beta; + float delta_0; + float delta_old; + float delta_new; + float epsilon; + + int i; + unsigned size; +}; + +/* some useful functions */ +static void parse_args(int argc, char **argv) +{ + int i; + for (i = 1; i < argc; i++) + { + if (strcmp(argv[i], "-size") == 0) + { + char *argptr; + _size = strtol(argv[++i], &argptr, 10); + } + + if (strcmp(argv[i], "-block") == 0) + { + char *argptr; + _blocks = strtol(argv[++i], &argptr, 10); + } + + if (strcmp(argv[i], "-grid") == 0) + { + char *argptr; + _grids = strtol(argv[++i], &argptr, 10); + } + + if (strcmp(argv[i], "-cpu") == 0) + { + _usecpu = 1; + } + } +} + + +static void print_results(float *result, unsigned size) +{ + printf("**** RESULTS **** \n"); + unsigned i; + + for (i = 0; i < STARPU_MIN(size, 16); i++) + { + printf("%u -> %f\n", i, result[i]); + } +} + +void cpu_codelet_func_1(void *descr[], void *arg); + +void cpu_codelet_func_2(void *descr[], void *arg); + +void cublas_codelet_func_3(void *descr[], void *arg); +void cpu_codelet_func_3(void *descr[], void *arg); + +void cpu_codelet_func_4(void *descr[], void *arg); + +void cpu_codelet_func_5(void *descr[], void *arg); +void cublas_codelet_func_5(void *descr[], void *arg); + +void cublas_codelet_func_6(void *descr[], void *arg); +void cpu_codelet_func_6(void *descr[], void *arg); + +void cublas_codelet_func_7(void *descr[], void *arg); +void cpu_codelet_func_7(void *descr[], void *arg); + +void cublas_codelet_func_8(void *descr[], void *arg); +void cpu_codelet_func_8(void *descr[], void *arg); + +void cublas_codelet_func_9(void *descr[], void *arg); +void cpu_codelet_func_9(void *descr[], void *arg); + +void iteration_cg(void *problem); + +void conjugate_gradient(float *nzvalA, float *vecb, float *vecx, uint32_t nnz, + unsigned nrow, uint32_t *colind, uint32_t *rowptr); + +#endif /* __DW_SPARSE_CG_H__ */ diff --git a/examples/heat/dw_sparse_cg_kernels.c b/examples/heat/dw_sparse_cg_kernels.c new file mode 100644 index 0000000..c86802d --- /dev/null +++ b/examples/heat/dw_sparse_cg_kernels.c @@ -0,0 +1,446 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "dw_sparse_cg.h" + +#ifdef STARPU_USE_CUDA +#include +#endif + +/* + * Algorithm : + * + * i = 0 + * r = b - A x + * (d = A x ; r = r - d) + * d = r + * delta_new = trans(r) r + * delta_0 = delta_new + * + * while (i < i_max && delta_new > eps^2 delta_0) + * { + * q = A d + * alpha = delta_new / (trans(d) q) + * x = x + alpha d + * if (i is divisible by 50) + * r = b - A x + * else + * r = r - alpha q + * delta_old = delta_new + * delta_new = trans(r) r + * beta = delta_new / delta_old + * d = r + beta d + * i = i + 1 + * } + */ + + +/* + * compute r = b - A x + * + * descr[0] = A, descr[1] = x, descr [2] = r, descr[3] = b + */ + +void cpu_codelet_func_1(void *descr[], void *arg) +{ + (void)arg; + float *nzval = (float *)STARPU_CSR_GET_NZVAL(descr[0]); + uint32_t *colind = STARPU_CSR_GET_COLIND(descr[0]); + uint32_t *rowptr = STARPU_CSR_GET_ROWPTR(descr[0]); + + uint32_t firstentry = STARPU_CSR_GET_ELEMSIZE(descr[0]); + + float *vecx = (float *)STARPU_VECTOR_GET_PTR(descr[1]); + float *vecr = (float *)STARPU_VECTOR_GET_PTR(descr[2]); + float *vecb = (float *)STARPU_VECTOR_GET_PTR(descr[3]); + + + uint32_t nrow; + + nrow = STARPU_CSR_GET_NROW(descr[0]); + + unsigned row; + for (row = 0; row < nrow; row++) + { + float tmp = 0.0f; + unsigned index; + + unsigned firstindex = rowptr[row] - firstentry; + unsigned lastindex = rowptr[row+1] - firstentry; + + for (index = firstindex; index < lastindex; index++) + { + unsigned col; + + col = colind[index]; + tmp += nzval[index]*vecx[col]; + } + + vecr[row] = vecb[row] - tmp; + } +} + +/* + * compute d = r + * descr[0] = d, descr[1] = r + */ +void cpu_codelet_func_2(void *descr[], void *arg) +{ + (void)arg; + /* simply copy r into d */ + uint32_t nx = STARPU_VECTOR_GET_NX(descr[0]); + size_t elemsize = STARPU_VECTOR_GET_ELEMSIZE(descr[0]); + + STARPU_ASSERT(STARPU_VECTOR_GET_NX(descr[0]) == STARPU_VECTOR_GET_NX(descr[1])); + STARPU_ASSERT(STARPU_VECTOR_GET_ELEMSIZE(descr[0]) == STARPU_VECTOR_GET_ELEMSIZE(descr[1])); + + float *src = (float *)STARPU_VECTOR_GET_PTR(descr[1]); + float *dst = (float *)STARPU_VECTOR_GET_PTR(descr[0]); + + memcpy(dst, src, nx*elemsize); +} + +/* + * compute delta_new = trans(r) r + * delta_0 = delta_new + * + * args = &delta_new, &delta_0 + */ + +void cpu_codelet_func_3(void *descr[], void *arg) +{ + struct cg_problem *pb = arg; + float dot; + float *vec; + int size; + + /* get the vector */ + vec = (float *)STARPU_VECTOR_GET_PTR(descr[0]); + size = (int)STARPU_VECTOR_GET_NX(descr[0]); + + dot = STARPU_SDOT(size, vec, 1, vec, 1); + + fprintf(stderr, "func 3 : DOT = %f\n", dot); + + pb->delta_new = dot; + pb->delta_0 = dot; +} + +#ifdef STARPU_USE_CUDA +void cublas_codelet_func_3(void *descr[], void *arg) +{ + struct cg_problem *pb = arg; + float dot; + float *vec; + uint32_t size; + + /* get the vector */ + vec = (float *)STARPU_VECTOR_GET_PTR(descr[0]); + size = STARPU_VECTOR_GET_NX(descr[0]); + + cublasStatus_t status = cublasSdot (starpu_cublas_get_local_handle(), size, vec, 1, vec, 1, &dot); + if (status != CUBLAS_STATUS_SUCCESS) + STARPU_CUBLAS_REPORT_ERROR(status); + cudaStreamSynchronize(starpu_cuda_get_local_stream()); + + pb->delta_new = dot; + pb->delta_0 = dot; +} +#endif + + +/* + * compute q with : q = A d + * + * descr[0] = A, descr[1] = d, descr [2] = q + */ + +void cpu_codelet_func_4(void *descr[], void *arg) +{ + (void)arg; + float *nzval = (float *)STARPU_CSR_GET_NZVAL(descr[0]); + uint32_t *colind = STARPU_CSR_GET_COLIND(descr[0]); + uint32_t *rowptr = STARPU_CSR_GET_ROWPTR(descr[0]); + + uint32_t firstentry = STARPU_CSR_GET_FIRSTENTRY(descr[0]); + + float *vecd = (float *)STARPU_VECTOR_GET_PTR(descr[1]); + float *vecq = (float *)STARPU_VECTOR_GET_PTR(descr[2]); + + uint32_t nrow; + + nrow = STARPU_CSR_GET_NROW(descr[0]); + + unsigned row; + for (row = 0; row < nrow; row++) + { + float tmp = 0.0f; + unsigned index; + + unsigned firstindex = rowptr[row] - firstentry; + unsigned lastindex = rowptr[row+1] - firstentry; + + for (index = firstindex; index < lastindex; index++) + { + unsigned col; + + col = colind[index]; + tmp += nzval[index]*vecd[col]; + } + + vecq[row] = tmp; + } + +} + +/* + * compute alpha = delta_new / (trans(d) q) + * + * descr[0] = d, descr[1] = q + * args = &alpha, &delta_new + */ + +void cpu_codelet_func_5(void *descr[], void *arg) +{ + float dot; + struct cg_problem *pb = arg; + float *vecd, *vecq; + uint32_t size; + + /* get the vector */ + vecd = (float *)STARPU_VECTOR_GET_PTR(descr[0]); + vecq = (float *)STARPU_VECTOR_GET_PTR(descr[1]); + + STARPU_ASSERT(STARPU_VECTOR_GET_NX(descr[0]) == STARPU_VECTOR_GET_NX(descr[1])); + size = STARPU_VECTOR_GET_NX(descr[0]); + + dot = STARPU_SDOT(size, vecd, 1, vecq, 1); + + pb->alpha = pb->delta_new / dot; +} + +#ifdef STARPU_USE_CUDA +void cublas_codelet_func_5(void *descr[], void *arg) +{ + float dot; + struct cg_problem *pb = arg; + float *vecd, *vecq; + uint32_t size; + + /* get the vector */ + vecd = (float *)STARPU_VECTOR_GET_PTR(descr[0]); + vecq = (float *)STARPU_VECTOR_GET_PTR(descr[1]); + + STARPU_ASSERT(STARPU_VECTOR_GET_NX(descr[0]) == STARPU_VECTOR_GET_NX(descr[1])); + size = STARPU_VECTOR_GET_NX(descr[0]); + + cublasStatus_t status = cublasSdot (starpu_cublas_get_local_handle(), size, vecd, 1, vecq, 1, &dot); + if (status != CUBLAS_STATUS_SUCCESS) + STARPU_CUBLAS_REPORT_ERROR(status); + cudaStreamSynchronize(starpu_cuda_get_local_stream()); + + pb->alpha = pb->delta_new / dot; +} +#endif + + + +/* + * compute x = x + alpha d + * + * descr[0] : x, descr[1] : d + * args = &alpha + */ + +void cpu_codelet_func_6(void *descr[], void *arg) +{ + struct cg_problem *pb = arg; + float *vecx, *vecd; + uint32_t size; + + /* get the vector */ + vecx = (float *)STARPU_VECTOR_GET_PTR(descr[0]); + vecd = (float *)STARPU_VECTOR_GET_PTR(descr[1]); + + size = STARPU_VECTOR_GET_NX(descr[0]); + + STARPU_SAXPY(size, pb->alpha, vecd, 1, vecx, 1); +} + +#ifdef STARPU_USE_CUDA +void cublas_codelet_func_6(void *descr[], void *arg) +{ + struct cg_problem *pb = arg; + float *vecx, *vecd; + uint32_t size; + + /* get the vector */ + vecx = (float *)STARPU_VECTOR_GET_PTR(descr[0]); + vecd = (float *)STARPU_VECTOR_GET_PTR(descr[1]); + + size = STARPU_VECTOR_GET_NX(descr[0]); + + cublasStatus_t status = cublasSaxpy (starpu_cublas_get_local_handle(), size, &pb->alpha, vecd, 1, vecx, 1); + if (status != CUBLAS_STATUS_SUCCESS) + STARPU_CUBLAS_REPORT_ERROR(status); +} +#endif + +/* + * compute r = r - alpha q + * + * descr[0] : r, descr[1] : q + * args = &alpha + */ + +void cpu_codelet_func_7(void *descr[], void *arg) +{ + struct cg_problem *pb = arg; + float *vecr, *vecq; + uint32_t size; + + /* get the vector */ + vecr = (float *)STARPU_VECTOR_GET_PTR(descr[0]); + vecq = (float *)STARPU_VECTOR_GET_PTR(descr[1]); + + size = STARPU_VECTOR_GET_NX(descr[0]); + + STARPU_SAXPY(size, -pb->alpha, vecq, 1, vecr, 1); +} + +#ifdef STARPU_USE_CUDA +void cublas_codelet_func_7(void *descr[], void *arg) +{ + struct cg_problem *pb = arg; + float *vecr, *vecq; + uint32_t size; + + /* get the vector */ + vecr = (float *)STARPU_VECTOR_GET_PTR(descr[0]); + vecq = (float *)STARPU_VECTOR_GET_PTR(descr[1]); + + size = STARPU_VECTOR_GET_NX(descr[0]); + + float scal = -pb->alpha; + + cublasStatus_t status = cublasSaxpy (starpu_cublas_get_local_handle(), size, &scal, vecq, 1, vecr, 1); + if (status != CUBLAS_STATUS_SUCCESS) + STARPU_CUBLAS_REPORT_ERROR(status); +} +#endif + +/* + * compute delta_old = delta_new + * delta_new = trans(r) r + * beta = delta_new / delta_old + * + * descr[0] = r + * args = &delta_old, &delta_new, &beta + */ + +void cpu_codelet_func_8(void *descr[], void *arg) +{ + float dot; + struct cg_problem *pb = arg; + float *vecr; + uint32_t size; + + /* get the vector */ + vecr = (float *)STARPU_VECTOR_GET_PTR(descr[0]); + size = STARPU_VECTOR_GET_NX(descr[0]); + + dot = STARPU_SDOT(size, vecr, 1, vecr, 1); + + pb->delta_old = pb->delta_new; + pb->delta_new = dot; + pb->beta = pb->delta_new/pb->delta_old; +} + +#ifdef STARPU_USE_CUDA +void cublas_codelet_func_8(void *descr[], void *arg) +{ + float dot; + struct cg_problem *pb = arg; + float *vecr; + uint32_t size; + + /* get the vector */ + vecr = (float *)STARPU_VECTOR_GET_PTR(descr[0]); + size = STARPU_VECTOR_GET_NX(descr[0]); + + cublasStatus_t status = cublasSdot(starpu_cublas_get_local_handle(), size, vecr, 1, vecr, 1, &dot); + if (status != CUBLAS_STATUS_SUCCESS) STARPU_CUBLAS_REPORT_ERROR(status); + cudaStreamSynchronize(starpu_cuda_get_local_stream()); + + pb->delta_old = pb->delta_new; + pb->delta_new = dot; + pb->beta = pb->delta_new/pb->delta_old; +} +#endif + +/* + * compute d = r + beta d + * + * descr[0] : d, descr[1] : r + * args = &beta + * + */ + +void cpu_codelet_func_9(void *descr[], void *arg) +{ + struct cg_problem *pb = arg; + float *vecd, *vecr; + uint32_t size; + + /* get the vector */ + vecd = (float *)STARPU_VECTOR_GET_PTR(descr[0]); + vecr = (float *)STARPU_VECTOR_GET_PTR(descr[1]); + + size = STARPU_VECTOR_GET_NX(descr[0]); + + /* d = beta d */ + STARPU_SSCAL(size, pb->beta, vecd, 1); + + /* d = r + d */ + STARPU_SAXPY (size, 1.0f, vecr, 1, vecd, 1); +} + +#ifdef STARPU_USE_CUDA +void cublas_codelet_func_9(void *descr[], void *arg) +{ + struct cg_problem *pb = arg; + float *vecd, *vecr; + uint32_t size; + + /* get the vector */ + vecd = (float *)STARPU_VECTOR_GET_PTR(descr[0]); + vecr = (float *)STARPU_VECTOR_GET_PTR(descr[1]); + + size = STARPU_VECTOR_GET_NX(descr[0]); + + /* d = beta d */ + cublasStatus_t status; + status = cublasSscal(starpu_cublas_get_local_handle(), size, &pb->beta, vecd, 1); + if (status != CUBLAS_STATUS_SUCCESS) + STARPU_CUBLAS_REPORT_ERROR(status); + + /* d = r + d */ + float scal = 1.0f; + status = cublasSaxpy (starpu_cublas_get_local_handle(), size, &scal, vecr, 1, vecd, 1); + if (status != CUBLAS_STATUS_SUCCESS) + STARPU_CUBLAS_REPORT_ERROR(status); +} +#endif diff --git a/examples/heat/heat.c b/examples/heat/heat.c new file mode 100644 index 0000000..5680131 --- /dev/null +++ b/examples/heat/heat.c @@ -0,0 +1,819 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * heat propagation simulation through either direct LU factorization or + * iterative conjugate gradient. + */ +#include "heat.h" + +/* default values */ +#ifdef STARPU_QUICK_CHECK +static unsigned ntheta = 8+2; +static unsigned nthick = 8+2; +#else +static unsigned ntheta = 32+2; +static unsigned nthick = 32+2; +#endif +static unsigned nblocks = 16; +static unsigned nbigblocks = 8; +static unsigned shape = 0; +static unsigned pinned = 0; +static unsigned check = 0; +static unsigned version = 2; +static unsigned use_cg = 0; /* use a LU decomposition of CG ? */ +static unsigned no_prio = 0; + +extern void do_conjugate_gradient(float *nzvalA, float *vecb, float *vecx, uint32_t nnz, + unsigned nrow, uint32_t *colind, uint32_t *rowptr); + +static void parse_args(int argc, char **argv) +{ + int i; + for (i = 1; i < argc; i++) + { + if (strcmp(argv[i], "-cg") == 0) + { + use_cg = 1; + } + + if (strcmp(argv[i], "-shape") == 0) + { + char *argptr; + shape = strtol(argv[++i], &argptr, 10); + } + + if (strcmp(argv[i], "-nthick") == 0) + { + char *argptr; + nthick = strtol(argv[++i], &argptr, 10); + } + + if (strcmp(argv[i], "-ntheta") == 0) + { + char *argptr; + ntheta = strtol(argv[++i], &argptr, 10); + } + + if (strcmp(argv[i], "-nblocks") == 0) + { + char *argptr; + nblocks = strtol(argv[++i], &argptr, 10); + } + + if (strcmp(argv[i], "-nbigblocks") == 0) + { + char *argptr; + nbigblocks = strtol(argv[++i], &argptr, 10); + } + + if (strcmp(argv[i], "-v1") == 0) + { + version = 1; + } + + if (strcmp(argv[i], "-v2") == 0) + { + version = 2; + } + + if (strcmp(argv[i], "-v3") == 0) + { + version = 3; + } + + if (strcmp(argv[i], "-v4") == 0) + { + version = 4; + } + + if (strcmp(argv[i], "-pin") == 0) + { + pinned = 1; + } + + if (strcmp(argv[i], "-check") == 0) + { + check = 1; + } + + if (strcmp(argv[i], "-no-prio") == 0) + { + no_prio = 1; + } + + if (strcmp(argv[i], "-size") == 0) + { + char *argptr; + unsigned size = strtol(argv[++i], &argptr, 10); + nthick = 130; + ntheta = (size/128) + 2; + STARPU_ASSERT((nthick - 2)*(ntheta - 2) == size); + } + + if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i], "--help") == 0 || strcmp(argv[i], "-help") == 0) + { + printf("usage : %s [-v1|-v2|-v3|-v4] [-pin] [-nthick number] [-ntheta number] [-shape [0|1|2]] [-cg] [-size number] [-no-prio]\n", argv[0]); + } + } +} + + + +/* + * The Finite element method code + * + * B C + * ********** + * * 0 * * + * * * * + * * * 1 * + * ********** + * A D + */ + +static inline float diff_psi(unsigned theta_tr, unsigned thick_tr, unsigned side_tr, + unsigned theta_psi, unsigned thick_psi, unsigned xy, point *pmesh) +{ + float xa,ya,xb,yb,xc,yc; + float tmp; + + assert(theta_tr + 2 <= ntheta); + assert(thick_tr + 2 <= nthick); + + /* A */ + xa = pmesh[NODE_NUMBER(theta_tr, thick_tr)].x; + ya = pmesh[NODE_NUMBER(theta_tr, thick_tr)].y; + + /* B */ + if (side_tr) + { + /* lower D is actually B here */ + xb = pmesh[NODE_NUMBER(theta_tr+1, thick_tr)].x; + yb = pmesh[NODE_NUMBER(theta_tr+1, thick_tr)].y; + } + else + { + /* upper */ + xb = pmesh[NODE_NUMBER(theta_tr, thick_tr+1)].x; + yb = pmesh[NODE_NUMBER(theta_tr, thick_tr+1)].y; + } + + xc = pmesh[NODE_NUMBER(theta_tr+1, thick_tr+1)].x; + yc = pmesh[NODE_NUMBER(theta_tr+1, thick_tr+1)].y; + + /* now look for the actual psi node */ + if (NODE_NUMBER(theta_tr, thick_tr) == NODE_NUMBER(theta_psi, thick_psi)) + { + /* A nothing to do */ + } + else if (NODE_NUMBER(theta_tr+1, thick_tr+1) == NODE_NUMBER(theta_psi, thick_psi)) + { + /* psi matches C */ + /* swap A and C coordinates */ + tmp = xa; xa = xc; xc = tmp; + tmp = ya; ya = yc; yc = tmp; + } + else if (side_tr && (NODE_NUMBER(theta_tr+1, thick_tr) == NODE_NUMBER(theta_psi, thick_psi))) + { + /* psi is D (that was stored in C) XXX */ + tmp = xa; xa = xb; xb = tmp; + tmp = ya; ya = yb; yb = tmp; + } + else if (!side_tr && (NODE_NUMBER(theta_tr, thick_tr+1) == NODE_NUMBER(theta_psi, thick_psi))) + { + /* psi is C */ + tmp = xa; xa = xb; xb = tmp; + tmp = ya; ya = yb; yb = tmp; + } + else + { + /* the psi node is not a node of the current triangle */ + return 0.0f; + } + + /* now the triangle should have A as the psi node */ + float denom; + float value; + + denom = (xa - xb)*(yc - ya) - (xc - xb)*(ya - yb); + + switch (xy) + { + case X: + value = (yc - yb)/denom; + break; + case Y: + value = -(xc - xb)/denom; + break; + default: + assert(0); + } + + return value; +} + +static inline float diff_y_psi(unsigned theta_tr, unsigned thick_tr, unsigned side_tr, + unsigned theta_psi, unsigned thick_psi, point *pmesh) +{ + return diff_psi(theta_tr, thick_tr, side_tr, theta_psi, thick_psi, Y, pmesh); +} + +static inline float diff_x_psi(unsigned theta_tr, unsigned thick_tr, unsigned side_tr, + unsigned theta_psi, unsigned thick_psi, point *pmesh) +{ + return diff_psi(theta_tr, thick_tr, side_tr, theta_psi, thick_psi, X, pmesh); +} + +static inline float surface_triangle(unsigned theta_tr, unsigned thick_tr, unsigned side_tr, point *pmesh) +{ + float surface; + float tmp; + + float xi, xj, xk, yi, yj, yk; + + STARPU_ASSERT(theta_tr + 2 <= ntheta); + STARPU_ASSERT(thick_tr + 2 <= nthick); + + xi = pmesh[NODE_NUMBER(theta_tr, thick_tr)].x; + yi = pmesh[NODE_NUMBER(theta_tr, thick_tr)].y; + + xj = pmesh[NODE_NUMBER(theta_tr+1, thick_tr+1)].x; + yj = pmesh[NODE_NUMBER(theta_tr+1, thick_tr+1)].y; + + if (side_tr) + { + /* lower */ + xk = pmesh[NODE_NUMBER(theta_tr+1, thick_tr)].x; + yk = pmesh[NODE_NUMBER(theta_tr+1, thick_tr)].y; + } + else + { + xk = pmesh[NODE_NUMBER(theta_tr, thick_tr+1)].x; + yk = pmesh[NODE_NUMBER(theta_tr, thick_tr+1)].y; + } + + tmp = (xi - xj)*(yk -yj) - (xk - xj)*(yi -yj); + + surface = 0.5*fabs(tmp); + + return surface; +} + +static inline float integral_triangle(int theta_tr, int thick_tr, unsigned side_tr, + unsigned theta_i, unsigned thick_i, unsigned theta_j, unsigned thick_j, point *pmesh) +{ + float surface; + float value; + + float dxi, dxj, dyi, dyj; + + if (theta_tr < 0) return 0.0f; + if (theta_tr + 2 > (int)ntheta) return 0.0f; + + if (thick_tr < 0) return 0.0f; + if (thick_tr + 2 > (int)nthick) return 0.0f; + + dxi = diff_x_psi(theta_tr, thick_tr, side_tr, theta_i, thick_i, pmesh); + dyi = diff_y_psi(theta_tr, thick_tr, side_tr, theta_i, thick_i, pmesh); + dxj = diff_x_psi(theta_tr, thick_tr, side_tr, theta_j, thick_j, pmesh); + dyj = diff_y_psi(theta_tr, thick_tr, side_tr, theta_j, thick_j, pmesh); + + surface = surface_triangle(theta_tr, thick_tr, side_tr, pmesh); + + value = (dxi*dxj + dyi*dyj)*surface; + + return value; +} + +static inline float integrale_sum(unsigned theta_i, unsigned thick_i, unsigned theta_j, unsigned thick_j, point *pmesh) +{ + float integral = 0.0f; + + integral += integral_triangle(theta_i - 1, thick_i - 1, 1, theta_i, thick_i, theta_j, thick_j, pmesh); + integral += integral_triangle(theta_i - 1, thick_i - 1, 0, theta_i, thick_i, theta_j, thick_j, pmesh); + integral += integral_triangle(theta_i - 1, thick_i, 1, theta_i, thick_i, theta_j, thick_j, pmesh); + integral += integral_triangle(theta_i, thick_i, 0, theta_i, thick_i, theta_j, thick_j, pmesh); + integral += integral_triangle(theta_i, thick_i, 1, theta_i, thick_i, theta_j, thick_j, pmesh); + integral += integral_triangle(theta_i, thick_i - 1, 0, theta_i, thick_i, theta_j, thick_j, pmesh); + + return integral; +} + + +static float compute_A_value(unsigned i, unsigned j, point *pmesh) +{ + float value = 0.0f; + + unsigned thick_i, thick_j; + unsigned theta_i, theta_j; + + /* add all contributions from all connex triangles */ + thick_i = NODE_TO_THICK(i); + thick_j = NODE_TO_THICK(j); + + theta_i = NODE_TO_THETA(i); + theta_j = NODE_TO_THETA(j); + + /* Compute the Sum of all the integral over all triangles */ + if ((abs((int)thick_i - (int)thick_j) <= 1) && (abs((int)theta_i - (int)theta_j) <= 1)) + { + if ((theta_j == theta_i -1) && (thick_j == thick_i +1)) + goto done; + + if ((theta_j == theta_i + 1) && (thick_j == thick_i - 1)) + goto done; + + /* this may not be a null entry */ + value += integrale_sum(theta_i, thick_i, theta_j, thick_j, pmesh); + } + +done: + return value; +} + + +#define TRANSLATE(k) (RefArray[(k)]) +#define TRANSLATEBACK(k) (RefArrayBack[(k)]) + +static void solve_system(unsigned size, unsigned subsize, float *result, int *RefArray, float *Bformer, float *A, float *B) +{ + unsigned i; + + /* solve the actual problem LU X = B */ + /* solve LX' = Y with X' = UX */ + /* solve UX = X' */ + FPRINTF(stderr, "Solving the problem ...\n"); + + float *savedB = NULL; + float *LUB = NULL; + + if (check) + { + savedB = malloc(subsize*sizeof(float)); + memcpy(savedB, B, subsize*sizeof(float)); + LUB = malloc(subsize*sizeof(float)); + } + + /* L */ + STARPU_STRSV("L", "N", "N", subsize, A, subsize, B, 1); + + /* U */ + STARPU_STRSV("U", "N", "U", subsize, A, subsize, B, 1); + + STARPU_ASSERT(DIM == size); + + if (check) + { + /* compute the error on (LUB - savedB) which should be 0 */ + + /* LUB = B */ + memcpy(LUB, B, subsize*sizeof(float)); + + + /* LUB = U * LUB */ + STARPU_STRMV("U", "N", "U", subsize, A, subsize, LUB, 1); + + /* LUB = L * LUB */ + STARPU_STRMV("L", "N", "N", subsize, A, subsize, LUB, 1); + + /* LUB -= B */ + STARPU_SAXPY(subsize, -1.0f, savedB, 1, LUB, 1); + + /* check if LUB is close to the 0 vector */ + int maxind = STARPU_ISAMAX(subsize, LUB, 1); + FPRINTF(stderr, "max error (LUX - B) = %e\n",LUB[maxind - 1]); + + float sum = STARPU_SASUM(subsize, LUB, 1); + FPRINTF(stderr,"avg. error %e\n", sum/subsize); + + free(LUB); + free(savedB); + } + + /* now display back the ACTUAL result */ + for (i = 0; i < subsize; i++) + { + result[TRANSLATE(i)] = B[i]; + } + + for (i = subsize ; i < size; i++) + { + result[TRANSLATE(i)] = Bformer[TRANSLATE(i)]; + } + +} + +unsigned compute_pivot_array(int *RefArray, int *RefArrayBack, unsigned size) +{ + unsigned k; + unsigned index = 0; + unsigned theta, thick; + unsigned newsize; + + for (k = 0; k < size; k++) + { + RefArray[k] = k; + RefArrayBack[k] = k; + } + + /* first inner nodes */ + for (theta = 1; theta < ntheta - 1 ; theta++) + { + for (thick = 1; thick < nthick - 1; thick++) + { + /* inner nodes are unknown */ + RefArrayBack[NODE_NUMBER(theta, thick)] = index; + RefArray[index] = NODE_NUMBER(theta, thick); + + index++; + } + } + + newsize = index; + + for (theta=0; theta < ntheta; theta++) + { + /* Lower boundary "South" */ + RefArrayBack[NODE_NUMBER(theta, 0)] = index; + RefArray[index++] = NODE_NUMBER(theta, 0); + + /* Upper boundary "North" */ + RefArrayBack[NODE_NUMBER(theta, nthick-1)] = index; + RefArray[index++] = NODE_NUMBER(theta, nthick-1); + } + + for (thick = 1; thick < nthick -1; thick++) + { + /* "West "*/ + RefArrayBack[NODE_NUMBER(0, thick)] = index; + RefArray[index++] = NODE_NUMBER(0, thick); + + /* "East" */ + RefArrayBack[NODE_NUMBER(ntheta-1, thick)] = index; + RefArray[index++] = NODE_NUMBER(ntheta-1, thick); + } + + assert(index == size); + + return newsize; +} + +void build_mesh(point *mesh) +{ + unsigned theta, thick; + + /* first build the mesh by determining all points positions */ + for (theta = 0; theta < ntheta; theta++) + { + float angle; + angle = (ntheta - 1 - theta) * Pi/(ntheta-1); + + for (thick = 0; thick < nthick; thick++) + { + float r; + r = thick * (RMAX - RMIN)/(nthick - 1) + RMIN; + + switch (shape) + { + default: + case 0: + mesh[NODE_NUMBER(theta,thick)].x = r*cosf(angle); + mesh[NODE_NUMBER(theta,thick)].y = r*sinf(angle); + break; + case 1: + mesh[NODE_NUMBER(theta,thick)].x = + -100 + RMIN+((RMAX-RMIN)*theta)/(ntheta - 1); + mesh[NODE_NUMBER(theta,thick)].y = + RMIN+((RMAX-RMIN)*thick)/(nthick - 1); + break; + case 2: + mesh[NODE_NUMBER(theta,thick)].x = r*(2.0f*theta/(ntheta - 1)- 1.0f); + mesh[NODE_NUMBER(theta,thick)].y = r*(2.0f*thick/(nthick - 1)- 1.0f); + break; + } + } + } +} + +static unsigned long build_neighbour_vector(unsigned long*neighbours, unsigned node, int *RefArray, int *RefArrayBack) +{ + /* where is that point in the former space ? */ + int former = TRANSLATE(node); + int former_thick, former_theta; + former_thick= (int)NODE_TO_THICK(former); + former_theta = (int)NODE_TO_THETA(former); + + /* do a list of all the possible neighbours */ + unsigned nneighbours = 0; + + int dtheta, dthick; + for (dthick = -1; dthick <= 1; dthick++) + { + if ((former_thick + dthick) >= 0 && (former_thick + dthick) <= (int)nthick) + { + for (dtheta = -1; dtheta <= 1; dtheta++) + { + if ((former_theta + dtheta) >= 0 && (former_theta + dtheta) <= (int)ntheta) + { + /* we got a possible neighbour */ + unsigned pnode = + NODE_NUMBER((former_theta + dtheta), (former_thick + dthick)); + + neighbours[nneighbours++] = TRANSLATEBACK(pnode); + } + } + } + } + + unsigned i; + /* order that list */ + for (i = 0; i < nneighbours; i++) + { + /* find the i^th smallest entry for position i */ + unsigned index; + unsigned min , min_index; + + min = neighbours[i]; + min_index = i; + + for (index = i+1; index < nneighbours; index++) + { + STARPU_ASSERT(neighbours[i] != neighbours[index]); + + if (neighbours[index] < min) + { + min = neighbours[index]; + min_index = index; + } + } + + /* swap values */ + neighbours[min_index] = neighbours[i]; + neighbours[i] = min; + } + + return nneighbours; +} + +static void build_sparse_stiffness_matrix_B(point *pmesh, float *B, float *Bformer, unsigned size, unsigned newsize, int *RefArray, int *RefArrayBack) +{ + unsigned i,j; + + /* first give the value of known nodes (at boundaries) */ + for (i = 0; i < size; i++) + { + Bformer[i] = 0.0f; + } + + for (i = 0; i < nthick; i++) + { + Bformer[i] = 200.0f; + Bformer[size-1-i] = 200.0f; + } + + for (i = 1; i < ntheta-1; i++) + { + Bformer[i*nthick] = 200.0f; + Bformer[(i+1)*nthick-1] = 100.0f; + } + + /* now the actual stiffness (reordered) matrix*/ + for (j = 0 ; j < newsize ; j++) + { + + unsigned long neighbour; + unsigned long nneighbours; + unsigned long neighbours[9]; + + nneighbours = build_neighbour_vector(&neighbours[0], j, RefArray, RefArrayBack); + + B[j] = Bformer[TRANSLATE(j)]; + + for (neighbour = 0; neighbour < nneighbours; neighbour++) + { + unsigned n = neighbours[neighbour]; + if (n >= newsize) + { + B[j] -= compute_A_value(TRANSLATE(n), TRANSLATE(j), pmesh)*Bformer[TRANSLATE(n)]; + } + } + } +} + +static unsigned build_sparse_stiffness_matrix_A(point *pmesh, float **nzval, uint32_t **colind, + uint32_t *rowptr, unsigned newsize, int *RefArray, int *RefArrayBack) +{ + unsigned j; + + unsigned pos = 0; + + *nzval = NULL; + *colind = NULL; + + /* now the actual stiffness (reordered) matrix*/ + for (j = 0 ; j < newsize ; j++) + { + rowptr[j] = pos; + + unsigned long neighbour; + unsigned long nneighbours; + unsigned long neighbours[9]; + + nneighbours = build_neighbour_vector(&neighbours[0], j, RefArray, RefArrayBack); + + for (neighbour = 0; neighbour < nneighbours; neighbour++) + { + unsigned nodeneighbour = neighbours[neighbour]; + + if (nodeneighbour < newsize) + { + float val; + val = compute_A_value(TRANSLATE(j), TRANSLATE(nodeneighbour), pmesh); + + if (val != 0.0f) + { + *nzval = realloc(*nzval, (pos+1)*sizeof(float)); + STARPU_ASSERT(*nzval); + *colind = realloc(*colind, (pos+1)*sizeof(uint32_t)); + STARPU_ASSERT(*colind); + + (*nzval)[pos] = val; + (*colind)[pos] = nodeneighbour; + + pos++; + } + } + } + } + + rowptr[newsize] = pos; + + return pos; +} + +static void build_dense_stiffness_matrix_A(point *pmesh, float *A, unsigned newsize, int *RefArray, int *RefArrayBack) +{ + unsigned long j; + + /* touch all the memory */ + memset(A, 0, newsize*newsize*sizeof(float)); + + /* now the actual stiffness (reordered) matrix*/ + for (j = 0 ; j < newsize ; j++) + { + unsigned long neighbour; + unsigned long nneighbours; + unsigned long neighbours[9]; + + nneighbours = build_neighbour_vector(&neighbours[0], j, RefArray, RefArrayBack); + + for (neighbour = 0; neighbour < nneighbours; neighbour++) + { + unsigned long nodeneighbour = neighbours[neighbour]; + + if (nodeneighbour < newsize) + { + float val; + val = compute_A_value(TRANSLATE(j), TRANSLATE(nodeneighbour), pmesh); + A[j+ (unsigned long)newsize*nodeneighbour] = val; + } + } + } +} + +int main(int argc, char **argv) +{ + float *A; + float *B; + + unsigned newsize; + float *result; + int *RefArray, *RefArrayBack; + point *pmesh; + float *Bformer; + + parse_args(argc, argv); + + pmesh = malloc(DIM*sizeof(point)); + RefArray = malloc(DIM*sizeof(int)); + RefArrayBack = malloc(DIM*sizeof(int)); + Bformer = malloc(DIM*sizeof(float)); + result = calloc(DIM, sizeof(float)); + + build_mesh(pmesh); + + /* now simplify that problem given the boundary conditions + * to do so, we remove the already known variables from the system + * by pivoting the various know variable, RefArray keep track of that + * pivoting */ + newsize = compute_pivot_array(RefArray, RefArrayBack, DIM); + + /* we can either use a direct method (LU decomposition here) or an + * iterative method (conjugate gradient here) */ + if (use_cg) + { + unsigned nnz; + float *nzval; + uint32_t *colind; + uint32_t *rowptr; + + rowptr = malloc((newsize+1)*sizeof(uint32_t)); + + B = malloc(newsize*sizeof(float)); + + build_sparse_stiffness_matrix_B(pmesh, B, Bformer, DIM, newsize, RefArray, RefArrayBack); + + nnz = build_sparse_stiffness_matrix_A(pmesh, &nzval, &colind, rowptr, newsize, RefArray, RefArrayBack); + + do_conjugate_gradient(nzval, B, result, nnz, newsize, colind, rowptr); + + /* XXX */ + memcpy(B, result, newsize*sizeof(float)); + + /* now display back the ACTUAL result */ + unsigned i; + for (i = 0; i < newsize; i++) + { + result[TRANSLATE(i)] = B[i]; + } + + for (i = newsize ; i < DIM; i++) + { + result[TRANSLATE(i)] = Bformer[TRANSLATE(i)]; + } + + free(nzval); + free(colind); + free(rowptr); + free(B); + } + else + { + + /* unfortunately CUDA does not allow late memory registration, + * we need to do the malloc using CUDA itself ... */ + initialize_system(&A, &B, newsize, pinned); + + /* then build the stiffness matrix A */ + build_sparse_stiffness_matrix_B(pmesh, B, Bformer, DIM, newsize, RefArray, RefArrayBack); + + build_dense_stiffness_matrix_A(pmesh, A, newsize, RefArray, RefArrayBack); + + FPRINTF(stderr, "Problem size : %ux%u (%ux%u) (%lu MB)\n", newsize, newsize, DIM, DIM, ((unsigned long)newsize*newsize*4UL)/(1024*1024)); + + STARPU_ASSERT(newsize % nblocks == 0); + + switch (version) + { + case 1: + case 2: + dw_factoLU(A, newsize, newsize, nblocks, version, no_prio); + break; + case 3: + dw_factoLU_tag(A, newsize, newsize, nblocks, no_prio); + break; + case 4: + dw_factoLU_grain(A, newsize, newsize, nblocks, nbigblocks); + break; + default: + STARPU_ABORT(); + } + + display_stat_heat(); + + if (check) + solve_system(DIM, newsize, result, RefArray, Bformer, A, B); + + starpu_cublas_shutdown(); + starpu_shutdown(); + free_system(A, B, newsize, pinned); + } + +#ifdef STARPU_OPENGL_RENDER + const char *display = getenv("DISPLAY"); + if (display && display[0]) + opengl_render(ntheta, nthick, result, pmesh, argc, argv); +#endif + + free(pmesh); + free(RefArray); + free(RefArrayBack); + free(Bformer); + free(result); + + return 0; +} diff --git a/examples/heat/heat.h b/examples/heat/heat.h new file mode 100644 index 0000000..806cca0 --- /dev/null +++ b/examples/heat/heat.h @@ -0,0 +1,72 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __HEAT_H__ +#define __HEAT_H__ + +#include +#include +#include +#include +#include + +/* needed for STARPU_OPENGL_RENDER */ +#include + +#include + +#ifdef STARPU_OPENGL_RENDER +#include +#include +#include +#endif + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) +#define PRINTF(fmt, ...) do { if (!getenv("STARPU_SSILENT")) {printf(fmt, ## __VA_ARGS__); }} while(0) + +#define X 0 +#define Y 1 + +#define DIM ntheta*nthick + +#define RMIN (150.0f) +#define RMAX (200.0f) + +#define Pi (3.141592f) + +#define NODE_NUMBER(theta, thick) ((unsigned long)((thick)+(theta)*nthick)) +#define NODE_TO_THICK(n) ((n) % nthick) +#define NODE_TO_THETA(n) ((n) / nthick) + +typedef struct point_t +{ + float x; + float y; +} point; + +extern void dw_factoLU(float *matA, unsigned size, unsigned ld, unsigned nblocks, unsigned version, unsigned no_prio); +extern void dw_factoLU_tag(float *matA, unsigned size, unsigned ld, unsigned nblocks, unsigned no_prio); +extern void dw_factoLU_grain(float *matA, unsigned size, unsigned ld, unsigned nblocks, unsigned nbigblocks); +extern void initialize_system(float **A, float **B, unsigned dim, unsigned pinned); +extern void free_system(float *A, float *B, unsigned dim, unsigned pinned); + +void display_stat_heat(void); + +#ifdef STARPU_OPENGL_RENDER +extern void opengl_render(unsigned _ntheta, unsigned _nthick, float *_result, point *_pmesh, int argc_, char **argv_); +#endif + +#endif /* __HEAT_H__ */ diff --git a/examples/heat/heat.sh b/examples/heat/heat.sh new file mode 100755 index 0000000..d1e54ac --- /dev/null +++ b/examples/heat/heat.sh @@ -0,0 +1,42 @@ +#!/bin/bash +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +# Test various LU options + +set -e + +PREFIX=$(dirname $0) + +$MS_LAUNCHER $STARPU_LAUNCH $PREFIX/heat -shape 0 +$MS_LAUNCHER $STARPU_LAUNCH $PREFIX/heat -shape 1 +# sometimes lead to pivot being 0 +#$MS_LAUNCHER $STARPU_LAUNCH $PREFIX/heat -shape 2 + +$MS_LAUNCHER $STARPU_LAUNCH $PREFIX/heat -cg + +# TODO: FIXME + +# segfault +#$MS_LAUNCHER $STARPU_LAUNCH $PREFIX/heat -v1 + +# (actually the default...) +$MS_LAUNCHER $STARPU_LAUNCH $PREFIX/heat -v2 + +# hang +#$MS_LAUNCHER $STARPU_LAUNCH $PREFIX/heat -v3 + +# hang +#$MS_LAUNCHER $STARPU_LAUNCH $PREFIX/heat -v4 diff --git a/examples/heat/heat_display.c b/examples/heat/heat_display.c new file mode 100644 index 0000000..dbac490 --- /dev/null +++ b/examples/heat/heat_display.c @@ -0,0 +1,241 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "heat.h" + +#ifdef STARPU_OPENGL_RENDER +/* + * Just some dummy OpenGL code to display our results + * + */ + +static float minval, maxval; + +static unsigned ntheta; +static unsigned nthick; +static float *result; +static unsigned printmesh =0; +static point *pmesh; + +float xmin, xmax, ymin, ymax; +float xcenter, ycenter; + +static void generate_graph(void) +{ + unsigned theta, thick; + + for (theta = 0; theta < ntheta-1; theta++) + { + for (thick = 0; thick < nthick-1; thick++) + { + unsigned nodeA = NODE_NUMBER(theta, thick); + unsigned nodeB = NODE_NUMBER(theta, thick+1); + unsigned nodeC = NODE_NUMBER(theta+1, thick+1); + unsigned nodeD = NODE_NUMBER(theta+1, thick); + + float colorA_R, colorB_R, colorC_R, colorD_R; + float colorA_G, colorB_G, colorC_G, colorD_G; + float colorA_B, colorB_B, colorC_B, colorD_B; + + if (maxval == minval) + { + colorA_R = 1.0f; colorA_G = 1.0f; colorA_B = 1.0f; + colorB_R = 1.0f; colorB_G = 1.0f; colorB_B = 1.0f; + colorC_R = 1.0f; colorC_G = 1.0f; colorC_B = 1.0f; + colorD_R = 1.0f; colorD_G = 1.0f; colorD_B = 1.0f; + } + else + { + float amplitude = maxval - minval; + + float coeffA, coeffB, coeffC, coeffD; + + coeffA = (result[nodeA] - minval)/amplitude; + coeffB = (result[nodeB] - minval)/amplitude; + coeffC = (result[nodeC] - minval)/amplitude; + coeffD = (result[nodeD] - minval)/amplitude; + + colorA_R = coeffA>0.5f?1.0f:(2.0*coeffA)*1.0f; + colorB_R = coeffB>0.5f?1.0f:(2.0*coeffB)*1.0f; + colorC_R = coeffC>0.5f?1.0f:(2.0*coeffC)*1.0f; + colorD_R = coeffD>0.5f?1.0f:(2.0*coeffD)*1.0f; + + colorA_B = 0.0f; + colorB_B = 0.0f; + colorC_B = 0.0f; + colorD_B = 0.0f; + + colorA_G = coeffA<0.5f?1.0f:2.0*(1 - coeffA)*1.0f; + colorB_G = coeffB<0.5f?1.0f:2.0*(1 - coeffB)*1.0f; + colorC_G = coeffC<0.5f?1.0f:2.0*(1 - coeffC)*1.0f; + colorD_G = coeffD<0.5f?1.0f:2.0*(1 - coeffD)*1.0f; + } + + if (printmesh) + { + glColor3f (0.0f, 0.0f, 0.0f); + glPolygonMode(GL_FRONT_AND_BACK, GL_LINE); + glLineWidth(3.0f); + glBegin(GL_POLYGON); + glVertex3f(pmesh[nodeA].x, pmesh[nodeA].y, 2.0f); + glVertex3f(pmesh[nodeD].x, pmesh[nodeD].y, 2.0f); + glVertex3f(pmesh[nodeC].x, pmesh[nodeC].y, 2.0f); + glVertex3f(pmesh[nodeA].x, pmesh[nodeA].y, 2.0f); + glEnd(); + + glBegin(GL_POLYGON); + glVertex3f(pmesh[nodeA].x, pmesh[nodeA].y, 1.0f); + glVertex3f(pmesh[nodeC].x, pmesh[nodeC].y, 1.0f); + glVertex3f(pmesh[nodeB].x, pmesh[nodeB].y, 1.0f); + glVertex3f(pmesh[nodeA].x, pmesh[nodeA].y, 1.0f); + glEnd(); + } + + glPolygonMode(GL_FRONT_AND_BACK, GL_FILL); + glBegin(GL_POLYGON); + glColor3f (colorA_R, colorA_G, colorA_B); + glVertex3f(pmesh[nodeA].x, pmesh[nodeA].y, 0.0f); + glColor3f (colorD_R, colorD_G, colorD_B); + glVertex3f(pmesh[nodeD].x, pmesh[nodeD].y, 0.0f); + glColor3f (colorC_R, colorC_G, colorC_B); + glVertex3f(pmesh[nodeC].x, pmesh[nodeC].y, 0.0f); + glEnd(); + + glBegin(GL_POLYGON); + glColor3f (colorA_R, colorA_G, colorA_B); + glVertex3f(pmesh[nodeA].x, pmesh[nodeA].y, 0.0f); + glColor3f (colorC_R, colorC_G, colorC_B); + glVertex3f(pmesh[nodeC].x, pmesh[nodeC].y, 0.0f); + glColor3f (colorB_R, colorB_G, colorB_B); + glVertex3f(pmesh[nodeB].x, pmesh[nodeB].y, 0.0f); + glEnd(); + } + } +} + +static void display(void) +{ + glClear (GL_COLOR_BUFFER_BIT); + glLoadIdentity (); /* clear the matrix */ + float amplitude = STARPU_MAX(xmax - xmin, ymax - ymin); + float factor = 1.0/amplitude; + glScalef (factor, factor, factor); /* modeling transformation */ + gluLookAt (xcenter, ycenter, 30.0f, xcenter, ycenter, 0.0f, 0.0f, 1.0f, 0.0f); +/* printf("factor %f\n", factor); + glRotatef(-0,0.0,0.0,0.0); */ + generate_graph(); + glFlush (); +} + + +static void pressKey(unsigned char key, int x, int y) +{ + switch (key) + { + case 'q': + exit(0); + default: + printmesh = !printmesh; + display(); + break; + } +} + + + +static void reshape (int w, int h) +{ + glViewport (0, 0, (GLsizei) w, (GLsizei) h); + glMatrixMode (GL_PROJECTION); + glLoadIdentity (); + glFrustum (xmin, xmax, ymin, ymax, 5.0f, 5.0f); + glMatrixMode (GL_MODELVIEW); +} + + +void find_limits(void) +{ + minval = 100000000.0f; + maxval = -10000000.0f; + + unsigned i; + for (i = 0; i < DIM; i++) + { + /* find min */ + minval = STARPU_MIN(result[i], minval); + + /* find max */ + maxval = STARPU_MAX(result[i], maxval); + } + + xmin = 10000000.0f; + xmax = -10000000.0f; + ymin = 10000000.0f; + ymax = -10000000.0f; + + unsigned theta, thick; + for (theta = 0; theta < ntheta; theta++) + { + for (thick = 0; thick < nthick; thick++) + { + point *p = &pmesh[NODE_NUMBER(theta, thick)]; + + if (p->x < xmin) + xmin = p->x; + + if (p->x > xmax) + xmax = p->x; + + if (p->y < ymin) + ymin = p->y; + + if (p->y > ymax) + ymax = p->y; + } + } + + ycenter = (ymin + ymax)/2; + xcenter = (xmin + xmax)/2; +} + +void opengl_render(unsigned _ntheta, unsigned _nthick, float *_result, point *_pmesh, int argc_, char **argv_) +{ + FPRINTF(stderr, "OpenGL rendering ... \n"); + + ntheta = _ntheta; + nthick = _nthick; + result = _result; + printmesh = 0; + pmesh = _pmesh; + + find_limits(); + + glutInit(&argc_, argv_); + glutInitDisplayMode (GLUT_SINGLE | GLUT_RGB); + glutInitWindowSize (800, 800); + glutInitWindowPosition (100, 100); + glutCreateWindow ("Temperature"); + + /* init */ + glClearColor (0.0, 0.0, 0.0, 0.0); + glShadeModel (GL_MODELVIEW); + + glutKeyboardFunc(pressKey); + glutDisplayFunc(display); + glutReshapeFunc(reshape); + glutMainLoop(); +} +#endif /* STARPU_OPENGL_RENDER */ diff --git a/examples/heat/lu_kernels_model.c b/examples/heat/lu_kernels_model.c new file mode 100644 index 0000000..a225063 --- /dev/null +++ b/examples/heat/lu_kernels_model.c @@ -0,0 +1,254 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Thibaut Lambert + * Copyright (C) 2011-2011 Télécom Sud Paris + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "lu_kernels_model.h" + +/* + * As a convention, in that file, buffers[0] is represented by A, + * buffers[1] is B ... + */ + +/* + * Number of flops of Gemm + */ + +/* #define USE_PERTURBATION 1 */ + + +#ifdef USE_PERTURBATION +#define PERTURB(a) ((starpu_drand48()*2.0f*(AMPL) + 1.0f - (AMPL))*(a)) +#else +#define PERTURB(a) (a) +#endif + +/* + * + * Generic models + * + */ + +double task_getrf_cost(struct starpu_task *task, unsigned nimpl) +{ + (void)nimpl; + uint32_t n; + + n = starpu_matrix_get_nx(task->handles[0]); + + double cost = ((n*n*n)/537.5); + + return PERTURB(cost); +} + +double task_trsm_ll_cost(struct starpu_task *task, unsigned nimpl) +{ + (void)nimpl; + uint32_t n; + + n = starpu_matrix_get_nx(task->handles[0]); + +/* double cost = ((n*n*n)/1744.695); */ + double cost = ((n*n*n)/3210.80); + + /* fprintf(stderr, "task TRSM_LL predicts %e\n", cost); */ + return PERTURB(cost); +} + + +double task_trsm_ru_cost(struct starpu_task *task, unsigned nimpl) +{ + (void)nimpl; + uint32_t n; + + n = starpu_matrix_get_nx(task->handles[0]); + +/* double cost = ((n*n*n)/1744.695); */ + double cost = ((n*n*n)/3691.53); + + /* fprintf(stderr, "task TRSM_RU predicts %e\n", cost); */ + return PERTURB(cost); +} + + + +double task_gemm_cost(struct starpu_task *task, unsigned nimpl) +{ + (void)nimpl; + uint32_t nx, ny, nz; + + nx = starpu_matrix_get_nx(task->handles[2]); + ny = starpu_matrix_get_ny(task->handles[2]); + nz = starpu_matrix_get_ny(task->handles[0]); + + double cost = ((nx*ny*nz)/4110.0); + + return PERTURB(cost); +} + +/* + * + * Models for CUDA + * + */ + + +double task_getrf_cost_cuda(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl) +{ + (void)arch; + (void)nimpl; + uint32_t n; + + n = starpu_matrix_get_nx(task->handles[0]); + + double cost = ((n*n*n)/1853.7806); + +/* printf("CUDA task GETRF ; predict %e\n", cost); */ + return PERTURB(cost); +} + +double task_trsm_ll_cost_cuda(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl) +{ + (void)arch; + (void)nimpl; + uint32_t n; + + n = starpu_matrix_get_nx(task->handles[0]); + + double cost = ((n*n*n)/42838.5718); + +/* printf("CUDA task TRSM_LL ; predict %e\n", cost); */ + return PERTURB(cost); +} + + +double task_trsm_ru_cost_cuda(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl) +{ + (void)arch; + (void)nimpl; + uint32_t n; + + n = starpu_matrix_get_nx(task->handles[0]); + + double cost = ((n*n*n)/49208.667); + +/* printf("CUDA task TRSM_RU ; predict %e\n", cost); */ + return PERTURB(cost); +} + + + +double task_gemm_cost_cuda(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl) +{ + (void)arch; + (void)nimpl; + uint32_t nx, ny, nz; + + nx = starpu_matrix_get_nx(task->handles[2]); + ny = starpu_matrix_get_ny(task->handles[2]); + nz = starpu_matrix_get_ny(task->handles[0]); + + double cost = ((nx*ny*nz)/57523.560); + +/* printf("CUDA task GEMM ; predict %e\n", cost); */ + return PERTURB(cost); +} + +/* + * + * Models for CPUs + * + */ + +double task_getrf_cost_cpu(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl) +{ + (void)arch; + (void)nimpl; + uint32_t n; + + n = starpu_matrix_get_nx(task->handles[0]); + + double cost = ((n*n*n)/537.5); + +/* printf("CPU task GETRF ; predict %e\n", cost); */ + return PERTURB(cost); +} + +double task_trsm_ll_cost_cpu(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl) +{ + (void)arch; + (void)nimpl; + uint32_t n; + + n = starpu_matrix_get_nx(task->handles[0]); + + double cost = ((n*n*n)/6668.224); + +/* printf("CPU task TRSM_LL ; predict %e\n", cost); */ + return PERTURB(cost); +} + + +double task_trsm_ru_cost_cpu(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl) +{ + (void)arch; + (void)nimpl; + uint32_t n; + + n = starpu_matrix_get_nx(task->handles[0]); + + double cost = ((n*n*n)/6793.8423); + +/* printf("CPU task TRSM_RU ; predict %e\n", cost); */ + return PERTURB(cost); +} + + + +double task_gemm_cost_cpu(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl) +{ + (void)arch; + (void)nimpl; + uint32_t nx, ny, nz; + + nx = starpu_matrix_get_nx(task->handles[2]); + ny = starpu_matrix_get_ny(task->handles[2]); + nz = starpu_matrix_get_ny(task->handles[0]); + + double cost = ((nx*ny*nz)/4203.0175); + +/* printf("CPU task GEMM ; predict %e\n", cost); */ + return PERTURB(cost); +} + +void initialize_lu_kernels_model(struct starpu_perfmodel* model, char * symbol, + double (*cost_function)(struct starpu_task *, unsigned), + double (*cpu_cost_function)(struct starpu_task *, struct starpu_perfmodel_arch*, unsigned), + double (*cuda_cost_function)(struct starpu_task *, struct starpu_perfmodel_arch*, unsigned)) +{ + (void)cost_function; + model->symbol = symbol; + model->type = STARPU_HISTORY_BASED; + + starpu_perfmodel_init(model); + + starpu_perfmodel_set_per_devices_cost_function(model, 0, cpu_cost_function, STARPU_CPU_WORKER, 0, 1, -1); + + if(starpu_worker_get_count_by_type(STARPU_CUDA_WORKER) != 0) + { + starpu_perfmodel_set_per_devices_cost_function(model, 0, cuda_cost_function, STARPU_CUDA_WORKER, 0, 1, -1); + } +} diff --git a/examples/heat/lu_kernels_model.h b/examples/heat/lu_kernels_model.h new file mode 100644 index 0000000..d1bbd4e --- /dev/null +++ b/examples/heat/lu_kernels_model.h @@ -0,0 +1,43 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Thibaut Lambert + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __LU_KERNELS_MODEL_H__ +#define __LU_KERNELS_MODEL_H__ + +#include + +double task_getrf_cost(struct starpu_task *task, unsigned nimpl); +double task_trsm_ll_cost(struct starpu_task *task, unsigned nimpl); +double task_trsm_ru_cost(struct starpu_task *task, unsigned nimpl); +double task_gemm_cost(struct starpu_task *task, unsigned nimpl); + +double task_getrf_cost_cuda(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl); +double task_trsm_ll_cost_cuda(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl); +double task_trsm_ru_cost_cuda(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl); +double task_gemm_cost_cuda(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl); + +double task_getrf_cost_cpu(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl); +double task_trsm_ll_cost_cpu(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl); +double task_trsm_ru_cost_cpu(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl); +double task_gemm_cost_cpu(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl); + +void initialize_lu_kernels_model(struct starpu_perfmodel* model, char * symbol, + double (*cost_function)(struct starpu_task *, unsigned), + double (*cpu_cost_function)(struct starpu_task *, struct starpu_perfmodel_arch*, unsigned), + double (*cuda_cost_function)(struct starpu_task *, struct starpu_perfmodel_arch*, unsigned)); + +#endif /* __LU_KERNELS_MODEL_H__ */ diff --git a/examples/incrementer/incrementer.c b/examples/incrementer/incrementer.c new file mode 100644 index 0000000..972aca4 --- /dev/null +++ b/examples/incrementer/incrementer.c @@ -0,0 +1,144 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * This is just a small example which increments two values of a vector several times. + */ +#include + +#ifdef STARPU_QUICK_CHECK +static unsigned niter = 500; +#elif !defined(STARPU_LONG_CHECK) +static unsigned niter = 5000; +#else +static unsigned niter = 50000; +#endif + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +#ifdef STARPU_USE_CUDA +extern void cuda_codelet(void *descr[], void *_args); +#endif + +#ifdef STARPU_USE_OPENCL +extern void opencl_codelet(void *descr[], void *_args); +struct starpu_opencl_program opencl_program; +#endif + +void cpu_codelet(void *descr[], void *_args) +{ + (void)_args; + float *val = (float *)STARPU_VECTOR_GET_PTR(descr[0]); + + val[0] += 1.0f; val[1] += 1.0f; +} + +int main(int argc, char **argv) +{ + int ret = 0; + + ret = starpu_init(NULL); + if (ret == -ENODEV) + return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + +#ifdef STARPU_QUICK_CHECK + niter /= 100; +#endif + if (argc == 2) + niter = atoi(argv[1]); + + float float_array[4] STARPU_ATTRIBUTE_ALIGNED(16) = { 0.0f, 0.0f, 0.0f, 0.0f}; + + starpu_data_handle_t float_array_handle; + starpu_vector_data_register(&float_array_handle, STARPU_MAIN_RAM /* home node */, + (uintptr_t)&float_array, 4, sizeof(float)); + +#ifdef STARPU_USE_OPENCL + ret = starpu_opencl_load_opencl_from_file("examples/incrementer/incrementer_kernels_opencl_kernel.cl", &opencl_program, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); +#endif + + struct starpu_codelet cl = + { + .cpu_funcs = {cpu_codelet}, + .cpu_funcs_name = {"cpu_codelet"}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {cuda_codelet}, + .cuda_flags = {STARPU_CUDA_ASYNC}, +#endif +#ifdef STARPU_USE_OPENCL + .opencl_funcs = {opencl_codelet}, + .opencl_flags = {STARPU_OPENCL_ASYNC}, +#endif + .nbuffers = 1, + .modes = {STARPU_RW}, + .name = "increment" + }; + + double start; + double end; + + start = starpu_timing_now(); + + unsigned i; + for (i = 0; i < niter; i++) + { + struct starpu_task *task = starpu_task_create(); + + task->cl = &cl; + + task->callback_func = NULL; + + task->handles[0] = float_array_handle; + + ret = starpu_task_submit(task); + if (STARPU_UNLIKELY(ret == -ENODEV)) + { + FPRINTF(stderr, "No worker may execute this task\n"); + exit(0); + } + } + + starpu_task_wait_for_all(); + + /* update the array in RAM */ + starpu_data_unregister(float_array_handle); + + end = starpu_timing_now(); + +#ifdef STARPU_USE_OPENCL + ret = starpu_opencl_unload_opencl(&opencl_program); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl"); +#endif + + FPRINTF(stderr, "array -> %f, %f, %f, %f\n", float_array[0], + float_array[1], float_array[2], float_array[3]); + + if (float_array[0] != niter || float_array[0] != float_array[1] + float_array[2] + float_array[3]) + { + FPRINTF(stderr, "Incorrect result\n"); + ret = 1; + } + + double timing = end - start; + + FPRINTF(stderr, "%u elems took %f ms\n", niter, timing/1000); + + starpu_shutdown(); + + return ret; +} diff --git a/examples/incrementer/incrementer_kernels.cu b/examples/incrementer/incrementer_kernels.cu new file mode 100644 index 0000000..22f644d --- /dev/null +++ b/examples/incrementer/incrementer_kernels.cu @@ -0,0 +1,37 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* CUDA kernel for incrementation */ + +#include + +static __global__ void cuda_incrementer(float * tab) +{ + tab[0] = tab[0] + 1.0f; + tab[2] = tab[2] + 1.0f; + + return; +} + +extern "C" void cuda_codelet(void *descr[], void *_args) +{ + (void)_args; + float *val = (float *)STARPU_VECTOR_GET_PTR(descr[0]); + + cuda_incrementer<<<1,1, 0, starpu_cuda_get_local_stream()>>>(val); + cudaError_t status = cudaGetLastError(); + if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status); +} diff --git a/examples/incrementer/incrementer_kernels_opencl.c b/examples/incrementer/incrementer_kernels_opencl.c new file mode 100644 index 0000000..fa34fe5 --- /dev/null +++ b/examples/incrementer/incrementer_kernels_opencl.c @@ -0,0 +1,54 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* OpenCL codelet for incrementation */ + +#include + +extern struct starpu_opencl_program opencl_program; +void opencl_codelet(void *descr[], void *_args) +{ + (void)_args; + cl_mem val = (cl_mem)STARPU_VECTOR_GET_DEV_HANDLE(descr[0]); + cl_kernel kernel; + cl_command_queue queue; + int id, devid, err; + + id = starpu_worker_get_id_check(); + devid = starpu_worker_get_devid(id); + + err = starpu_opencl_load_kernel(&kernel, &queue, &opencl_program, "incrementer", devid); + if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); + + err = clSetKernelArg(kernel, 0, sizeof(val), &val); + if (err) STARPU_OPENCL_REPORT_ERROR(err); + + { + size_t global=4; + size_t local, s; + cl_device_id device; + + starpu_opencl_get_device(devid, &device); + err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, &s); + if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); + if (local > global) local=global; + + err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, NULL); + if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); + } + + starpu_opencl_release_kernel(kernel); +} diff --git a/examples/incrementer/incrementer_kernels_opencl_kernel.cl b/examples/incrementer/incrementer_kernels_opencl_kernel.cl new file mode 100644 index 0000000..0f06391 --- /dev/null +++ b/examples/incrementer/incrementer_kernels_opencl_kernel.cl @@ -0,0 +1,25 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* OpenCL kernel for incrementation */ + +__kernel void incrementer(__global float* input) +{ + const int i = get_global_id(0); + if (i == 0 || i == 3) + input[i] = input[i] + 1.0f; +} + diff --git a/examples/interface/complex.c b/examples/interface/complex.c new file mode 100644 index 0000000..078b107 --- /dev/null +++ b/examples/interface/complex.c @@ -0,0 +1,292 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "complex_interface.h" +#include "complex_codelet.h" + +void copy_complex_codelet_cpu(void *descr[], void *_args) +{ + int i; + int nx = STARPU_COMPLEX_GET_NX(descr[0]); + + double *i_real = STARPU_COMPLEX_GET_REAL(descr[0]); + double *i_imaginary = STARPU_COMPLEX_GET_IMAGINARY(descr[0]); + + double *o_real = STARPU_COMPLEX_GET_REAL(descr[1]); + double *o_imaginary = STARPU_COMPLEX_GET_IMAGINARY(descr[1]); + + for(i=0 ; imajor >= 2 || props->minor >= 3) + { + /* At least compute capability 1.3, supports doubles */ + return 1; + } + else + { + /* Old card does not support doubles */ + return 0; + } +#endif +#else + return 1; +#endif +} + +#ifdef STARPU_USE_CUDA +extern void copy_complex_codelet_cuda(void *descr[], void *_args); +#endif +#ifdef STARPU_USE_OPENCL +extern void copy_complex_codelet_opencl(void *buffers[], void *args); +#endif + +struct starpu_codelet cl_copy = +{ + .cpu_funcs = {copy_complex_codelet_cpu}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {copy_complex_codelet_cuda}, + .cuda_flags = {STARPU_CUDA_ASYNC}, +#endif +#ifdef STARPU_USE_OPENCL + .opencl_funcs = {copy_complex_codelet_opencl}, + .opencl_flags = {STARPU_OPENCL_ASYNC}, +#endif + .nbuffers = 2, + .modes = {STARPU_R, STARPU_W}, + .can_execute = can_execute, + .name = "cl_copy" +}; + +#ifdef STARPU_USE_OPENCL +struct starpu_opencl_program opencl_program; +#endif + +int main(void) +{ + int ret = 0; + starpu_data_handle_t handle1; + starpu_data_handle_t handle2; + starpu_data_handle_t handle3; + starpu_data_handle_t handle4; + + double real = 45.0; + double imaginary = 12.0; + double copy_real = 78.0; + double copy_imaginary = 78.0; + + int compare; + int *compare_ptr = &compare; + + starpu_data_handle_t vectorh; + struct starpu_vector_interface *vectori; + double *vector; + + // When using master-slave MPI mode, it is necessary for the slaves to know about the complex interface + starpu_complex_data_register_ops(); + + ret = starpu_init(NULL); + if (ret == -ENODEV) return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + +#ifdef STARPU_USE_OPENCL + ret = starpu_opencl_load_opencl_from_file("examples/interface/complex_kernels.cl", + &opencl_program, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); +#endif + starpu_complex_data_register(&handle1, STARPU_MAIN_RAM, &real, &imaginary, 1); + starpu_complex_data_register(&handle2, STARPU_MAIN_RAM, ©_real, ©_imaginary, 1); + /* Create a vector of two complexs. */ + starpu_complex_data_register(&handle3, -1, 0, 0, 2); + starpu_complex_data_register(&handle4, -1, 0, 0, 1); + + ret = starpu_task_insert(&cl_display, STARPU_VALUE, "handle1", strlen("handle1")+1, STARPU_R, handle1, 0); + if (ret == -ENODEV) goto end; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + + ret = starpu_task_insert(&cl_display, STARPU_VALUE, "handle2", strlen("handle2")+1, STARPU_R, handle2, 0); + if (ret == -ENODEV) goto end; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + + /* Compare two different complexs. */ + ret = starpu_task_insert(&cl_compare, + STARPU_R, handle1, + STARPU_R, handle2, + STARPU_VALUE, &compare_ptr, sizeof(compare_ptr), + 0); + if (ret == -ENODEV) goto end; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + starpu_task_wait_for_all(); + if (compare != 0) + { + _FPRINTF(stderr, "Complex numbers should NOT be similar\n"); + goto end; + } + + /* Copy one into the other. */ + ret = starpu_task_insert(&cl_copy, + STARPU_R, handle1, + STARPU_W, handle2, + 0); + if (ret == -ENODEV) goto end; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + + ret = starpu_task_insert(&cl_display, STARPU_VALUE, "handle1", strlen("handle1")+1, STARPU_R, handle1, 0); + if (ret == -ENODEV) goto end; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + + ret = starpu_task_insert(&cl_display, STARPU_VALUE, "handle2", strlen("handle2")+1, STARPU_R, handle2, 0); + if (ret == -ENODEV) goto end; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + + /* And compare again. */ + ret = starpu_task_insert(&cl_compare, + STARPU_R, handle1, + STARPU_R, handle2, + STARPU_VALUE, &compare_ptr, sizeof(compare_ptr), + 0); + if (ret == -ENODEV) goto end; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + + starpu_task_wait_for_all(); + + if (compare != 1) + { + _FPRINTF(stderr, "Complex numbers should be similar\n"); + } + + /* Put another value again */ + starpu_data_acquire(handle2, STARPU_W); + copy_real = 78.0; + copy_imaginary = 77.0; + starpu_data_release(handle2); + + /* Split it in two pieces (thus one complex each). */ + struct starpu_data_filter f = + { + .filter_func = starpu_complex_filter_block, + .nchildren = 2, + }; + starpu_data_partition(handle3, &f); + + /* Copy the two complexs into each part */ + ret = starpu_task_insert(&cl_copy, + STARPU_R, handle1, + STARPU_W, starpu_data_get_sub_data(handle3, 1, 0), + 0); + if (ret == -ENODEV) goto end; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + ret = starpu_task_insert(&cl_copy, + STARPU_R, handle2, + STARPU_W, starpu_data_get_sub_data(handle3, 1, 1), + 0); + if (ret == -ENODEV) goto end; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + + /* Gather the two pieces. */ + starpu_data_unpartition(handle3, STARPU_MAIN_RAM); + + /* Show it. */ + ret = starpu_task_insert(&cl_display, STARPU_VALUE, "handle3", strlen("handle3")+1, STARPU_R, handle3, 0); + if (ret == -ENODEV) goto end; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + + /* Get the real and imaginary vectors. */ + struct starpu_data_filter fcanon = + { + .filter_func = starpu_complex_filter_canonical, + .nchildren = 2, + .get_child_ops = starpu_complex_filter_canonical_child_ops, + }; + starpu_data_partition(handle3, &fcanon); + + /* Check the corresponding data. */ + vectorh = starpu_data_get_sub_data(handle3, 1, 0); + starpu_data_acquire(vectorh, STARPU_R); + vectori = starpu_data_get_interface_on_node(vectorh, STARPU_MAIN_RAM); + vector = (double*) vectori->ptr; + STARPU_ASSERT_MSG(vector[0] == 45., "Bogus value: %f instead of %f", vector[0], 45.); + STARPU_ASSERT_MSG(vector[1] == 78., "Bogus value: %f instead of %f", vector[1], 78.); + starpu_data_release(vectorh); + + vectorh = starpu_data_get_sub_data(handle3, 1, 1); + starpu_data_acquire(vectorh, STARPU_R); + vectori = starpu_data_get_interface_on_node(vectorh, STARPU_MAIN_RAM); + vector = (double*) vectori->ptr; + STARPU_ASSERT_MSG(vector[0] == 12., "Bogus value: %f instead of %f", vector[0], 12.); + STARPU_ASSERT_MSG(vector[1] == 77., "Bogus value: %f instead of %f", vector[1], 77.); + starpu_data_release(vectorh); + + starpu_data_unpartition(handle3, STARPU_MAIN_RAM); + + /* Use helper starpu_data_cpy */ + ret = starpu_data_cpy(handle4, handle1, 0, NULL, NULL); + if (ret == -ENODEV) goto end; + ret = starpu_task_insert(&cl_display, STARPU_VALUE, "handle4", strlen("handle4")+1, STARPU_R, handle4, 0); + if (ret == -ENODEV) goto end; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + + /* Compare two different complexs. */ + ret = starpu_task_insert(&cl_compare, + STARPU_R, handle1, + STARPU_R, handle4, + STARPU_VALUE, &compare_ptr, sizeof(compare_ptr), + 0); + if (ret == -ENODEV) goto end; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + starpu_task_wait_for_all(); + if (compare != 1) + { + _FPRINTF(stderr, "Complex numbers should be similar\n"); + goto end; + } + +end: +#ifdef STARPU_USE_OPENCL + { + int ret2 = starpu_opencl_unload_opencl(&opencl_program); + STARPU_CHECK_RETURN_VALUE(ret2, "starpu_opencl_unload_opencl"); + } +#endif + starpu_data_unregister(handle1); + starpu_data_unregister(handle2); + starpu_data_unregister(handle3); + starpu_data_unregister(handle4); + starpu_shutdown(); + if (ret == -ENODEV) return 77; else return !compare; +} diff --git a/examples/interface/complex_codelet.h b/examples/interface/complex_codelet.h new file mode 100644 index 0000000..73906bd --- /dev/null +++ b/examples/interface/complex_codelet.h @@ -0,0 +1,108 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "complex_interface.h" + +#ifndef __COMPLEX_CODELET_H +#define __COMPLEX_CODELET_H + +#define _FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +/* Dumb performance model for simgrid */ +static double complex_cost_function(struct starpu_task *task, unsigned nimpl) +{ + (void) task; + (void) nimpl; + return 0.000001; +} + +static struct starpu_perfmodel complex_model = +{ + .type = STARPU_COMMON, + .cost_function = complex_cost_function, + .symbol = "complex" +}; + +void compare_complex_codelet(void *descr[], void *_args) +{ + int nx1 = STARPU_COMPLEX_GET_NX(descr[0]); + double *real1 = STARPU_COMPLEX_GET_REAL(descr[0]); + double *imaginary1 = STARPU_COMPLEX_GET_IMAGINARY(descr[0]); + + int nx2 = STARPU_COMPLEX_GET_NX(descr[1]); + double *real2 = STARPU_COMPLEX_GET_REAL(descr[1]); + double *imaginary2 = STARPU_COMPLEX_GET_IMAGINARY(descr[1]); + + int *compare; + + starpu_codelet_unpack_args(_args, &compare); + *compare = (nx1 == nx2); + if (nx1 == nx2) + { + int i; + for(i=0 ; i +#include "complex_dev_handle_interface.h" +#include "complex_dev_handle_codelet.h" + +void copy_complex_dev_handle_codelet_cpu(void *descr[], void *_args) +{ + int i; + int nx = STARPU_COMPLEX_DEV_HANDLE_GET_NX(descr[0]); + + double *i_real = (double*)STARPU_COMPLEX_DEV_HANDLE_GET_PTR_REAL(descr[0]); + double *i_imaginary = (double*)STARPU_COMPLEX_DEV_HANDLE_GET_PTR_IMAGINARY(descr[0]); + + double *o_real = (double*)STARPU_COMPLEX_DEV_HANDLE_GET_PTR_REAL(descr[1]); + double *o_imaginary = (double*)STARPU_COMPLEX_DEV_HANDLE_GET_PTR_IMAGINARY(descr[1]); + + for(i=0 ; imajor >= 2 || props->minor >= 3) + { + /* At least compute capability 1.3, supports doubles */ + return 1; + } + else + { + /* Old card does not support doubles */ + return 0; + } +#endif +#else + return 1; +#endif +} + +#ifdef STARPU_USE_CUDA +extern void copy_complex_dev_handle_codelet_cuda(void *descr[], void *_args); +#endif +#ifdef STARPU_USE_OPENCL +extern void copy_complex_dev_handle_codelet_opencl(void *buffers[], void *args); +#endif + +struct starpu_codelet cl_dev_handle_copy = +{ + .cpu_funcs = {copy_complex_dev_handle_codelet_cpu}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {copy_complex_dev_handle_codelet_cuda}, + .cuda_flags = {STARPU_CUDA_ASYNC}, +#endif +#ifdef STARPU_USE_OPENCL + .opencl_funcs = {copy_complex_dev_handle_codelet_opencl}, + .opencl_flags = {STARPU_OPENCL_ASYNC}, +#endif + .nbuffers = 2, + .modes = {STARPU_R, STARPU_W}, + .can_execute = can_execute, + .name = "cl_dev_handle_copy" +}; + +#ifdef STARPU_USE_OPENCL +struct starpu_opencl_program opencl_program; +#endif + +int main(void) +{ + int ret = 0; + starpu_data_handle_t handle1; + starpu_data_handle_t handle2; + starpu_data_handle_t handle3; + starpu_data_handle_t handle4; + + double real = 45.0; + double imaginary = 12.0; + double copy_real = 78.0; + double copy_imaginary = 78.0; + + int compare; + int *compare_ptr = &compare; + + starpu_data_handle_t vectorh; + struct starpu_vector_interface *vectori; + double *vector; + + ret = starpu_init(NULL); + if (ret == -ENODEV) return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + +#ifdef STARPU_USE_OPENCL + ret = starpu_opencl_load_opencl_from_file("examples/interface/complex_dev_handle/complex_dev_handle_kernels.cl", + &opencl_program, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); +#endif + starpu_complex_dev_handle_data_register(&handle1, STARPU_MAIN_RAM, (uintptr_t)&real, (uintptr_t)&imaginary, 1); + starpu_complex_dev_handle_data_register(&handle2, STARPU_MAIN_RAM, (uintptr_t)©_real, (uintptr_t)©_imaginary, 1); + /* Create a vector of two complexs. */ + starpu_complex_dev_handle_data_register(&handle3, -1, 0, 0, 2); + starpu_complex_dev_handle_data_register(&handle4, -1, 0, 0, 1); + + ret = starpu_task_insert(&cl_dev_handle_display, STARPU_VALUE, "handle1", strlen("handle1")+1, STARPU_R, handle1, 0); + if (ret == -ENODEV) goto end; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + + ret = starpu_task_insert(&cl_dev_handle_display, STARPU_VALUE, "handle2", strlen("handle2")+1, STARPU_R, handle2, 0); + if (ret == -ENODEV) goto end; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + + /* Compare two different complexs. */ + ret = starpu_task_insert(&cl_dev_handle_compare, + STARPU_R, handle1, + STARPU_R, handle2, + STARPU_VALUE, &compare_ptr, sizeof(compare_ptr), + 0); + if (ret == -ENODEV) goto end; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + starpu_task_wait_for_all(); + if (compare != 0) + { + _FPRINTF(stderr, "Complex numbers should NOT be similar\n"); + goto end; + } + + /* Copy one into the other. */ + ret = starpu_task_insert(&cl_dev_handle_copy, + STARPU_R, handle1, + STARPU_W, handle2, + 0); + if (ret == -ENODEV) goto end; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + + ret = starpu_task_insert(&cl_dev_handle_display, STARPU_VALUE, "handle1", strlen("handle1")+1, STARPU_R, handle1, 0); + if (ret == -ENODEV) goto end; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + + ret = starpu_task_insert(&cl_dev_handle_display, STARPU_VALUE, "handle2", strlen("handle2")+1, STARPU_R, handle2, 0); + if (ret == -ENODEV) goto end; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + + /* And compare again. */ + ret = starpu_task_insert(&cl_dev_handle_compare, + STARPU_R, handle1, + STARPU_R, handle2, + STARPU_VALUE, &compare_ptr, sizeof(compare_ptr), + 0); + if (ret == -ENODEV) goto end; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + + starpu_task_wait_for_all(); + + if (compare != 1) + { + _FPRINTF(stderr, "Complex numbers should be similar\n"); + } + + /* Put another value again */ + starpu_data_acquire(handle2, STARPU_W); + copy_real = 78.0; + copy_imaginary = 77.0; + starpu_data_release(handle2); + + /* Split it in two pieces (thus one complex each). */ + struct starpu_data_filter f = + { + .filter_func = starpu_complex_dev_handle_filter_block, + .nchildren = 2, + }; + starpu_data_partition(handle3, &f); + + /* Copy the two complexs into each part */ + ret = starpu_task_insert(&cl_dev_handle_copy, + STARPU_R, handle1, + STARPU_W, starpu_data_get_sub_data(handle3, 1, 0), + 0); + if (ret == -ENODEV) goto end; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + ret = starpu_task_insert(&cl_dev_handle_copy, + STARPU_R, handle2, + STARPU_W, starpu_data_get_sub_data(handle3, 1, 1), + 0); + if (ret == -ENODEV) goto end; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + + /* Gather the two pieces. */ + starpu_data_unpartition(handle3, STARPU_MAIN_RAM); + + /* Show it. */ + ret = starpu_task_insert(&cl_dev_handle_display, STARPU_VALUE, "handle3", strlen("handle3")+1, STARPU_R, handle3, 0); + if (ret == -ENODEV) goto end; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + + /* Get the real and imaginary vectors. */ + struct starpu_data_filter fcanon = + { + .filter_func = starpu_complex_dev_handle_filter_canonical, + .nchildren = 2, + .get_child_ops = starpu_complex_dev_handle_filter_canonical_child_ops, + }; + starpu_data_partition(handle3, &fcanon); + + /* Check the corresponding data. */ + vectorh = starpu_data_get_sub_data(handle3, 1, 0); + starpu_data_acquire(vectorh, STARPU_R); + vectori = starpu_data_get_interface_on_node(vectorh, STARPU_MAIN_RAM); + vector = (double*) vectori->ptr; + STARPU_ASSERT_MSG(vector[0] == 45., "Bogus value: %f instead of %f", vector[0], 45.); + STARPU_ASSERT_MSG(vector[1] == 78., "Bogus value: %f instead of %f", vector[1], 78.); + starpu_data_release(vectorh); + + vectorh = starpu_data_get_sub_data(handle3, 1, 1); + starpu_data_acquire(vectorh, STARPU_R); + vectori = starpu_data_get_interface_on_node(vectorh, STARPU_MAIN_RAM); + vector = (double*) vectori->ptr; + STARPU_ASSERT_MSG(vector[0] == 12., "Bogus value: %f instead of %f", vector[0], 12.); + STARPU_ASSERT_MSG(vector[1] == 77., "Bogus value: %f instead of %f", vector[1], 77.); + starpu_data_release(vectorh); + + starpu_data_unpartition(handle3, STARPU_MAIN_RAM); + + /* Use helper starpu_data_cpy */ + starpu_data_cpy(handle4, handle1, 0, NULL, NULL); + ret = starpu_task_insert(&cl_dev_handle_display, STARPU_VALUE, "handle4", strlen("handle4")+1, STARPU_R, handle4, 0); + if (ret == -ENODEV) goto end; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + + /* Compare two different complexs. */ + ret = starpu_task_insert(&cl_dev_handle_compare, + STARPU_R, handle1, + STARPU_R, handle4, + STARPU_VALUE, &compare_ptr, sizeof(compare_ptr), + 0); + if (ret == -ENODEV) goto end; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + starpu_task_wait_for_all(); + if (compare != 1) + { + _FPRINTF(stderr, "Complex numbers should be similar\n"); + goto end; + } + +end: +#ifdef STARPU_USE_OPENCL + { + int ret2 = starpu_opencl_unload_opencl(&opencl_program); + STARPU_CHECK_RETURN_VALUE(ret2, "starpu_opencl_unload_opencl"); + } +#endif + starpu_data_unregister(handle1); + starpu_data_unregister(handle2); + starpu_data_unregister(handle3); + starpu_data_unregister(handle4); + starpu_shutdown(); + if (ret == -ENODEV) return 77; else return !compare; +} diff --git a/examples/interface/complex_dev_handle/complex_dev_handle_codelet.h b/examples/interface/complex_dev_handle/complex_dev_handle_codelet.h new file mode 100644 index 0000000..154eff0 --- /dev/null +++ b/examples/interface/complex_dev_handle/complex_dev_handle_codelet.h @@ -0,0 +1,109 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "complex_dev_handle_interface.h" + +#ifndef __COMPLEX_DEV_HANDLE_CODELET_H +#define __COMPLEX_DEV_HANDLE_CODELET_H + +#define _FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +/* Dumb performance model for simgrid */ +static double complex_dev_handle_cost_function(struct starpu_task *task, unsigned nimpl) +{ + (void) task; + (void) nimpl; + return 0.000001; +} + +static struct starpu_perfmodel complex_dev_handle_model = +{ + .type = STARPU_COMMON, + .cost_function = complex_dev_handle_cost_function, + .symbol = "complex_dev_handle" +}; + +void compare_complex_dev_handle_codelet(void *descr[], void *_args) +{ + int nx1 = STARPU_COMPLEX_DEV_HANDLE_GET_NX(descr[0]); + double* real1 = (double*)STARPU_COMPLEX_DEV_HANDLE_GET_PTR_REAL(descr[0]); + double* imaginary1 = (double*)STARPU_COMPLEX_DEV_HANDLE_GET_PTR_IMAGINARY(descr[0]); + + int nx2 = STARPU_COMPLEX_DEV_HANDLE_GET_NX(descr[1]); + double* real2 = (double*)STARPU_COMPLEX_DEV_HANDLE_GET_PTR_REAL(descr[1]); + double* imaginary2 = (double*)STARPU_COMPLEX_DEV_HANDLE_GET_PTR_IMAGINARY(descr[1]); + + int *compare; + + starpu_codelet_unpack_args(_args, &compare); + *compare = (nx1 == nx2); + if (nx1 == nx2) + { + int i; + for(i=0 ; i + +#include "complex_dev_handle_interface.h" + +void starpu_complex_dev_handle_filter_block(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned id, unsigned nchunks) +{ + struct starpu_complex_dev_handle_interface *complex_dev_handle_father = father_interface; + struct starpu_complex_dev_handle_interface *complex_dev_handle_child = child_interface; + + uint32_t nx = complex_dev_handle_father->nx; + size_t elemsize = sizeof(double); + + STARPU_ASSERT_MSG(nchunks <= nx, "%u parts for %u elements", nchunks, nx); + + uint32_t child_nx; + size_t offset; + /* Compute the split */ + starpu_filter_nparts_compute_chunk_size_and_offset(nx, nchunks, elemsize, id, 1, + &child_nx, &offset); + + complex_dev_handle_child->nx = child_nx; + + if (complex_dev_handle_father->dev_handle_real) + { + if (complex_dev_handle_father->ptr_real) + { + complex_dev_handle_child->ptr_real = complex_dev_handle_father->ptr_real + offset; + complex_dev_handle_child->ptr_imaginary = complex_dev_handle_father->ptr_imaginary + offset; + } + complex_dev_handle_child->dev_handle_real = complex_dev_handle_father->dev_handle_real; + complex_dev_handle_child->offset_real = complex_dev_handle_father->offset_real + offset; + complex_dev_handle_child->dev_handle_imaginary = complex_dev_handle_father->dev_handle_imaginary; + complex_dev_handle_child->offset_imaginary = complex_dev_handle_father->offset_imaginary + offset; + } +} + +void starpu_complex_dev_handle_filter_canonical(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned id, unsigned nchunks) +{ + struct starpu_complex_dev_handle_interface *complex_dev_handle_father = father_interface; + struct starpu_vector_interface *vector_child = child_interface; + + STARPU_ASSERT_MSG(nchunks == 2, "complex_dev_handle can only be split into two pieces"); + STARPU_ASSERT_MSG(id < 2, "complex_dev_handle has only two pieces"); + + vector_child->id = STARPU_VECTOR_INTERFACE_ID; + + vector_child->nx = complex_dev_handle_father->nx; + vector_child->elemsize = sizeof(double); + vector_child->slice_base = 0; + vector_child->allocsize = vector_child->nx * vector_child->elemsize; + + if (complex_dev_handle_father->dev_handle_real) + { + if (complex_dev_handle_father->ptr_real) + { + if (id == 0) + vector_child->ptr = complex_dev_handle_father->ptr_real; + else + vector_child->ptr = complex_dev_handle_father->ptr_imaginary; + } + if (id == 0) + { + vector_child->dev_handle = complex_dev_handle_father->dev_handle_real; + vector_child->offset = complex_dev_handle_father->offset_real; + } + else + { + vector_child->dev_handle = complex_dev_handle_father->dev_handle_imaginary; + vector_child->offset = complex_dev_handle_father->offset_imaginary; + } + + } +} + +struct starpu_data_interface_ops *starpu_complex_dev_handle_filter_canonical_child_ops(STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned child) +{ + return &starpu_interface_vector_ops; +} diff --git a/examples/interface/complex_dev_handle/complex_dev_handle_interface.c b/examples/interface/complex_dev_handle/complex_dev_handle_interface.c new file mode 100644 index 0000000..03c56ab --- /dev/null +++ b/examples/interface/complex_dev_handle/complex_dev_handle_interface.c @@ -0,0 +1,305 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +#include "complex_dev_handle_interface.h" + +uintptr_t starpu_complex_dev_handle_get_ptr_real(starpu_data_handle_t handle) +{ + struct starpu_complex_dev_handle_interface *complex_dev_handle_interface = + (struct starpu_complex_dev_handle_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + + return complex_dev_handle_interface->ptr_real; +} + +uintptr_t starpu_complex_dev_handle_get_ptr_imaginary(starpu_data_handle_t handle) +{ + struct starpu_complex_dev_handle_interface *complex_dev_handle_interface = + (struct starpu_complex_dev_handle_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + + return complex_dev_handle_interface->ptr_imaginary; +} + +int starpu_complex_dev_handle_get_nx(starpu_data_handle_t handle) +{ + struct starpu_complex_dev_handle_interface *complex_dev_handle_interface = + (struct starpu_complex_dev_handle_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + + return complex_dev_handle_interface->nx; +} + +uintptr_t starpu_complex_dev_handle_get_dev_handle_real(starpu_data_handle_t handle) +{ + struct starpu_complex_dev_handle_interface *complex_dev_handle_interface = + (struct starpu_complex_dev_handle_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + + return complex_dev_handle_interface->dev_handle_real; +} + +uintptr_t starpu_complex_dev_handle_get_dev_handle_imaginary(starpu_data_handle_t handle) +{ + struct starpu_complex_dev_handle_interface *complex_dev_handle_interface = + (struct starpu_complex_dev_handle_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + + return complex_dev_handle_interface->dev_handle_imaginary; +} + +size_t starpu_complex_dev_handle_get_offset_real(starpu_data_handle_t handle) +{ + struct starpu_complex_dev_handle_interface *complex_dev_handle_interface = + (struct starpu_complex_dev_handle_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + + return complex_dev_handle_interface->offset_real; +} + +size_t starpu_complex_dev_handle_get_offset_imaginary(starpu_data_handle_t handle) +{ + struct starpu_complex_dev_handle_interface *complex_dev_handle_interface = + (struct starpu_complex_dev_handle_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + + return complex_dev_handle_interface->offset_imaginary; +} + +static void complex_dev_handle_register_data_handle(starpu_data_handle_t handle, int home_node, void *data_interface) +{ + struct starpu_complex_dev_handle_interface *complex_dev_handle_interface = (struct starpu_complex_dev_handle_interface *) data_interface; + + int node; + for (node = 0; node < STARPU_MAXNODES; node++) + { + struct starpu_complex_dev_handle_interface *local_interface = (struct starpu_complex_dev_handle_interface *) + starpu_data_get_interface_on_node(handle, node); + + local_interface->nx = complex_dev_handle_interface->nx; + if (node == home_node) + { + local_interface->ptr_real = complex_dev_handle_interface->ptr_real; + local_interface->dev_handle_real = complex_dev_handle_interface->dev_handle_real; + local_interface->offset_real = complex_dev_handle_interface->offset_real; + local_interface->ptr_imaginary = complex_dev_handle_interface->ptr_imaginary; + local_interface->dev_handle_imaginary = complex_dev_handle_interface->dev_handle_imaginary; + local_interface->offset_imaginary = complex_dev_handle_interface->offset_imaginary; + } + else + { + local_interface->ptr_real = 0; + local_interface->dev_handle_real = 0; + local_interface->offset_real = 0; + local_interface->ptr_imaginary = 0; + local_interface->dev_handle_imaginary = 0; + local_interface->offset_imaginary = 0; + } + } +} + +static starpu_ssize_t complex_dev_handle_allocate_data_on_node(void *data_interface, unsigned node) +{ + struct starpu_complex_dev_handle_interface *complex_dev_handle_interface = (struct starpu_complex_dev_handle_interface *) data_interface; + + uintptr_t addr_real = 0, addr_imaginary = 0, dev_handle_real, dev_handle_imaginary; + + starpu_ssize_t requested_memory = complex_dev_handle_interface->nx * sizeof(double); + + dev_handle_real = starpu_malloc_on_node(node, requested_memory); + if (!dev_handle_real) + goto fail_real; + dev_handle_imaginary = starpu_malloc_on_node(node, requested_memory); + if (!dev_handle_imaginary) + goto fail_imaginary; + + if (starpu_node_get_kind(node) != STARPU_OPENCL_RAM) + { + addr_real = dev_handle_real; + addr_imaginary = dev_handle_imaginary; + } + + /* update the data properly in consequence */ + complex_dev_handle_interface->ptr_real = addr_real; + complex_dev_handle_interface->dev_handle_real = dev_handle_real; + complex_dev_handle_interface->offset_real = 0; + complex_dev_handle_interface->ptr_imaginary = addr_imaginary; + complex_dev_handle_interface->dev_handle_imaginary = dev_handle_imaginary; + complex_dev_handle_interface->offset_imaginary = 0; + + return 2*requested_memory; + +fail_imaginary: + starpu_free_on_node(node, dev_handle_real, requested_memory); +fail_real: + return -ENOMEM; +} + +static void complex_dev_handle_free_data_on_node(void *data_interface, unsigned node) +{ + struct starpu_complex_dev_handle_interface *complex_dev_handle_interface = (struct starpu_complex_dev_handle_interface *) data_interface; + starpu_ssize_t requested_memory = complex_dev_handle_interface->nx * sizeof(double); + + starpu_free_on_node(node, (uintptr_t) complex_dev_handle_interface->dev_handle_real, requested_memory); + complex_dev_handle_interface->ptr_real = 0; + complex_dev_handle_interface->dev_handle_real = 0; + starpu_free_on_node(node, (uintptr_t) complex_dev_handle_interface->dev_handle_imaginary, requested_memory); + complex_dev_handle_interface->ptr_imaginary = 0; + complex_dev_handle_interface->dev_handle_imaginary = 0; +} + +static size_t complex_dev_handle_get_size(starpu_data_handle_t handle) +{ + size_t size; + struct starpu_complex_dev_handle_interface *complex_dev_handle_interface = (struct starpu_complex_dev_handle_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + + size = complex_dev_handle_interface->nx * 2 * sizeof(double); + return size; +} + +static uint32_t complex_dev_handle_footprint(starpu_data_handle_t handle) +{ + return starpu_hash_crc32c_be(starpu_complex_dev_handle_get_nx(handle), 0); +} + +static int complex_dev_handle_pack_data(starpu_data_handle_t handle, unsigned node, void **ptr, starpu_ssize_t *count) +{ + STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); + + struct starpu_complex_dev_handle_interface *complex_dev_handle_interface = (struct starpu_complex_dev_handle_interface *) + starpu_data_get_interface_on_node(handle, node); + + *count = complex_dev_handle_get_size(handle); + if (ptr != NULL) + { + char *real = (void *)complex_dev_handle_interface->ptr_real; + char *imaginary = (void *)complex_dev_handle_interface->ptr_imaginary; + + *ptr = (void*) starpu_malloc_on_node_flags(node, *count, 0); + char *data = (char*) *ptr; + memcpy(data, real, complex_dev_handle_interface->nx*sizeof(double)); + memcpy(data+complex_dev_handle_interface->nx*sizeof(double), imaginary, complex_dev_handle_interface->nx*sizeof(double)); + } + + return 0; +} + +static int complex_dev_handle_peek_data(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count) +{ + char *data = ptr; + STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); + + struct starpu_complex_dev_handle_interface *complex_dev_handle_interface = (struct starpu_complex_dev_handle_interface *) + starpu_data_get_interface_on_node(handle, node); + + STARPU_ASSERT(count == 2 * complex_dev_handle_interface->nx * sizeof(double)); + + char *real = (void *)complex_dev_handle_interface->ptr_real; + char *imaginary = (void *)complex_dev_handle_interface->ptr_imaginary; + + memcpy(real, data, complex_dev_handle_interface->nx*sizeof(double)); + memcpy(imaginary, data+complex_dev_handle_interface->nx*sizeof(double), complex_dev_handle_interface->nx*sizeof(double)); + + return 0; +} + +static int complex_dev_handle_unpack_data(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count) +{ + complex_dev_handle_peek_data(handle, node, ptr, count); + + starpu_free_on_node_flags(node, (uintptr_t) ptr, count, 0); + + return 0; +} + +static starpu_ssize_t complex_dev_handle_describe(void *data_interface, char *buf, size_t size) +{ + struct starpu_complex_dev_handle_interface *complex_dev_handle_interface = (struct starpu_complex_dev_handle_interface *) data_interface; + return snprintf(buf, size, "Complex_dev_handle%d", complex_dev_handle_interface->nx); +} + +static int complex_dev_handle_compare(void *data_interface_a, void *data_interface_b) +{ + struct starpu_complex_dev_handle_interface *complex_dev_handle_a = (struct starpu_complex_dev_handle_interface *) data_interface_a; + struct starpu_complex_dev_handle_interface *complex_dev_handle_b = (struct starpu_complex_dev_handle_interface *) data_interface_b; + + return (complex_dev_handle_a->nx == complex_dev_handle_b->nx); +} + +int copy_any_to_any(void *src_interface, unsigned src_node, + void *dst_interface, unsigned dst_node, + void *async_data) +{ + struct starpu_complex_dev_handle_interface *src_complex_dev_handle = src_interface; + struct starpu_complex_dev_handle_interface *dst_complex_dev_handle = dst_interface; + int ret = 0; + + if (starpu_interface_copy(src_complex_dev_handle->dev_handle_real, src_complex_dev_handle->offset_real, src_node, + dst_complex_dev_handle->dev_handle_real, dst_complex_dev_handle->offset_real, dst_node, + src_complex_dev_handle->nx*sizeof(double), + async_data)) + ret = -EAGAIN; + if (starpu_interface_copy(src_complex_dev_handle->dev_handle_imaginary, src_complex_dev_handle->offset_imaginary, src_node, + dst_complex_dev_handle->dev_handle_imaginary, dst_complex_dev_handle->offset_imaginary, dst_node, + src_complex_dev_handle->nx*sizeof(double), + async_data)) + ret = -EAGAIN; + return ret; +} + +const struct starpu_data_copy_methods complex_dev_handle_copy_methods = +{ + .any_to_any = copy_any_to_any +}; + +struct starpu_data_interface_ops interface_complex_dev_handle_ops = +{ + .register_data_handle = complex_dev_handle_register_data_handle, + .allocate_data_on_node = complex_dev_handle_allocate_data_on_node, + .free_data_on_node = complex_dev_handle_free_data_on_node, + .copy_methods = &complex_dev_handle_copy_methods, + .get_size = complex_dev_handle_get_size, + .footprint = complex_dev_handle_footprint, + .interfaceid = STARPU_UNKNOWN_INTERFACE_ID, + .interface_size = sizeof(struct starpu_complex_dev_handle_interface), + .to_pointer = NULL, + .pack_data = complex_dev_handle_pack_data, + .peek_data = complex_dev_handle_peek_data, + .unpack_data = complex_dev_handle_unpack_data, + .describe = complex_dev_handle_describe, + .compare = complex_dev_handle_compare +}; + +void starpu_complex_dev_handle_data_register(starpu_data_handle_t *handleptr, int home_node, uintptr_t ptr_real, uintptr_t ptr_imaginary, int nx) +{ + struct starpu_complex_dev_handle_interface complex_dev_handle = + { + .ptr_real = ptr_real, + .dev_handle_real = ptr_real, + .ptr_imaginary = ptr_imaginary, + .dev_handle_imaginary = ptr_imaginary, + .nx = nx + }; + + starpu_data_register(handleptr, home_node, &complex_dev_handle, &interface_complex_dev_handle_ops); +} + +void starpu_complex_dev_handle_ptr_register(starpu_data_handle_t handle, int node, uintptr_t ptr_real, uintptr_t ptr_imaginary, uintptr_t dev_handle_real, uintptr_t dev_handle_imaginary, size_t offset_real, size_t offset_imaginary) +{ + struct starpu_complex_dev_handle_interface *complex_dev_handle_interface = starpu_data_get_interface_on_node(handle, node); + starpu_data_ptr_register(handle, node); + complex_dev_handle_interface->ptr_real = ptr_real; + complex_dev_handle_interface->dev_handle_real = dev_handle_real; + complex_dev_handle_interface->offset_real = offset_real; + complex_dev_handle_interface->ptr_imaginary = ptr_imaginary; + complex_dev_handle_interface->dev_handle_imaginary = dev_handle_imaginary; + complex_dev_handle_interface->offset_imaginary = offset_imaginary; +} diff --git a/examples/interface/complex_dev_handle/complex_dev_handle_interface.h b/examples/interface/complex_dev_handle/complex_dev_handle_interface.h new file mode 100644 index 0000000..9d48ae4 --- /dev/null +++ b/examples/interface/complex_dev_handle/complex_dev_handle_interface.h @@ -0,0 +1,61 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +#ifndef __COMPLEX_DEV_HANDLE_INTERFACE_H +#define __COMPLEX_DEV_HANDLE_INTERFACE_H + +/* interface for complex numbers supporting opencl*/ +struct starpu_complex_dev_handle_interface +{ + int nx; + uintptr_t ptr_real; + uintptr_t dev_handle_real; + size_t offset_real; + uintptr_t ptr_imaginary; + uintptr_t dev_handle_imaginary; + size_t offset_imaginary; +}; + +void starpu_complex_dev_handle_data_register(starpu_data_handle_t *handle, int home_node, uintptr_t ptr_real, uintptr_t ptr_imaginary, int nx); + +void starpu_complex_dev_handle_ptr_register(starpu_data_handle_t handle, int node, uintptr_t ptr_real, uintptr_t ptr_imaginary, uintptr_t dev_handle_real, uintptr_t dev_handle_imaginary, size_t offset_real, size_t offset_imaginary); + +int starpu_complex_dev_handle_get_nx(starpu_data_handle_t handle); +uintptr_t starpu_complex_dev_handle_get_ptr_real(starpu_data_handle_t handle); +uintptr_t starpu_complex_dev_handle_get_dev_handle_real(starpu_data_handle_t handle); +size_t starpu_complex_dev_handle_get_offset_real(starpu_data_handle_t handle); +uintptr_t starpu_complex_dev_handle_get_ptr_imaginary(starpu_data_handle_t handle); +uintptr_t starpu_complex_dev_handle_get_dev_handle_imaginary(starpu_data_handle_t handle); +size_t starpu_complex_dev_handle_get_offset_imaginary(starpu_data_handle_t handle); + +#define STARPU_COMPLEX_DEV_HANDLE_GET_NX(interface) (((struct starpu_complex_dev_handle_interface *)(interface))->nx) +#define STARPU_COMPLEX_DEV_HANDLE_GET_PTR_REAL(interface) (((struct starpu_complex_dev_handle_interface *)(interface))->ptr_real) +#define STARPU_COMPLEX_DEV_HANDLE_GET_DEV_HANDLE_REAL(interface) (((struct starpu_complex_dev_handle_interface *)(interface))->dev_handle_real) +#define STARPU_COMPLEX_DEV_HANDLE_GET_OFFSET_REAL(interface) (((struct starpu_complex_dev_handle_interface *)(interface))->offset_real) +#define STARPU_COMPLEX_DEV_HANDLE_GET_PTR_IMAGINARY(interface) (((struct starpu_complex_dev_handle_interface *)(interface))->ptr_imaginary) +#define STARPU_COMPLEX_DEV_HANDLE_GET_DEV_HANDLE_IMAGINARY(interface) (((struct starpu_complex_dev_handle_interface *)(interface))->dev_handle_imaginary) +#define STARPU_COMPLEX_DEV_HANDLE_GET_OFFSET_IMAGINARY(interface) (((struct starpu_complex_dev_handle_interface *)(interface))->offset_imaginary) + +/* Split complex vector into smaller complex vectors */ +void starpu_complex_dev_handle_filter_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nchunks); + +/* Split complex into two simple vectors */ +void starpu_complex_dev_handle_filter_canonical(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nchunks); +struct starpu_data_interface_ops *starpu_complex_dev_handle_filter_canonical_child_ops(struct starpu_data_filter *f, unsigned child); + +#endif /* __COMPLEX_DEV_HANDLE_INTERFACE_H */ diff --git a/examples/interface/complex_dev_handle/complex_dev_handle_kernels.cl b/examples/interface/complex_dev_handle/complex_dev_handle_kernels.cl new file mode 100644 index 0000000..7526df8 --- /dev/null +++ b/examples/interface/complex_dev_handle/complex_dev_handle_kernels.cl @@ -0,0 +1,41 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* Use the "double" type */ +#pragma OPENCL EXTENSION cl_khr_fp64 : enable + +__kernel void complex_copy_opencl(__global double *o_real, + unsigned o_real_offset, + __global double *o_imaginary, + unsigned o_imaginary_offset, + __global double *i_real, + unsigned i_real_offset, + __global double *i_imaginary, + unsigned i_imaginary_offset, + unsigned nx) +{ + const int i = get_global_id(0); + if (i < nx) + { + o_real = (__global char*) o_real + o_real_offset; + o_imaginary = (__global char*) o_imaginary + o_imaginary_offset; + i_real = (__global char*) i_real + i_real_offset; + i_imaginary = (__global char*) i_imaginary + i_imaginary_offset; + + o_real[i] = i_real[i]; + o_imaginary[i] = i_imaginary[i]; + } +} diff --git a/examples/interface/complex_dev_handle/complex_dev_handle_kernels.cu b/examples/interface/complex_dev_handle/complex_dev_handle_kernels.cu new file mode 100644 index 0000000..69936e8 --- /dev/null +++ b/examples/interface/complex_dev_handle/complex_dev_handle_kernels.cu @@ -0,0 +1,49 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "complex_dev_handle_interface.h" + +static __global__ void complex_copy_cuda(double *o_real, double *o_imaginary, double *i_real, double *i_imaginary, unsigned n) +{ + unsigned i = blockIdx.x*blockDim.x + threadIdx.x; + + if (i < n) + { + o_real[i] = i_real[i]; + o_imaginary[i] = i_imaginary[i]; + } +} + +extern "C" void copy_complex_dev_handle_codelet_cuda(void *descr[], void *_args) +{ + (void)_args; + + int nx = STARPU_COMPLEX_DEV_HANDLE_GET_NX(descr[0]); + + double *i_real = (double*)STARPU_COMPLEX_DEV_HANDLE_GET_PTR_REAL(descr[0]); + double *i_imaginary = (double*)STARPU_COMPLEX_DEV_HANDLE_GET_PTR_IMAGINARY(descr[0]); + + double *o_real = (double*)STARPU_COMPLEX_DEV_HANDLE_GET_PTR_REAL(descr[1]); + double *o_imaginary = (double*)STARPU_COMPLEX_DEV_HANDLE_GET_PTR_IMAGINARY(descr[1]); + + unsigned threads_per_block = 64; + unsigned nblocks = (nx + threads_per_block-1) / threads_per_block; + + complex_copy_cuda<<>>(o_real, o_imaginary, i_real, i_imaginary, nx); + cudaError_t status = cudaGetLastError(); + if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status); +} diff --git a/examples/interface/complex_dev_handle/complex_dev_handle_kernels_opencl.c b/examples/interface/complex_dev_handle/complex_dev_handle_kernels_opencl.c new file mode 100644 index 0000000..f1ce512 --- /dev/null +++ b/examples/interface/complex_dev_handle/complex_dev_handle_kernels_opencl.c @@ -0,0 +1,83 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "complex_dev_handle_interface.h" + +extern struct starpu_opencl_program opencl_program; + +void copy_complex_dev_handle_codelet_opencl(void *buffers[], void *_args) +{ + (void) _args; + + int id, devid; + cl_int err; + cl_kernel kernel; + cl_command_queue queue; + + /* length of the vector */ + unsigned n = STARPU_COMPLEX_DEV_HANDLE_GET_NX(buffers[0]); + /* OpenCL copy of the vector pointer */ + cl_mem i_real = (cl_mem) STARPU_COMPLEX_DEV_HANDLE_GET_DEV_HANDLE_REAL(buffers[0]); + unsigned i_real_offset = STARPU_COMPLEX_DEV_HANDLE_GET_OFFSET_REAL(buffers[0]); + cl_mem i_imaginary = (cl_mem) STARPU_COMPLEX_DEV_HANDLE_GET_DEV_HANDLE_IMAGINARY(buffers[0]); + unsigned i_imaginary_offset = STARPU_COMPLEX_DEV_HANDLE_GET_OFFSET_IMAGINARY(buffers[0]); + cl_mem o_real = (cl_mem) STARPU_COMPLEX_DEV_HANDLE_GET_DEV_HANDLE_REAL(buffers[1]); + unsigned o_real_offset = STARPU_COMPLEX_DEV_HANDLE_GET_OFFSET_REAL(buffers[1]); + cl_mem o_imaginary = (cl_mem) STARPU_COMPLEX_DEV_HANDLE_GET_DEV_HANDLE_IMAGINARY(buffers[1]); + unsigned o_imaginary_offset = STARPU_COMPLEX_DEV_HANDLE_GET_OFFSET_IMAGINARY(buffers[1]); + + id = starpu_worker_get_id_check(); + devid = starpu_worker_get_devid(id); + + err = starpu_opencl_load_kernel(&kernel, &queue, &opencl_program, "complex_copy_opencl", devid); + if (err != CL_SUCCESS) + STARPU_OPENCL_REPORT_ERROR(err); + + err = clSetKernelArg(kernel, 0, sizeof(o_real), &o_real); + err|= clSetKernelArg(kernel, 1, sizeof(o_real_offset), &o_real_offset); + err|= clSetKernelArg(kernel, 2, sizeof(o_imaginary), &o_imaginary); + err|= clSetKernelArg(kernel, 3, sizeof(o_imaginary_offset), &o_imaginary_offset); + err|= clSetKernelArg(kernel, 4, sizeof(i_real), &i_real); + err|= clSetKernelArg(kernel, 5, sizeof(i_real_offset), &i_real_offset); + err|= clSetKernelArg(kernel, 6, sizeof(i_imaginary), &i_imaginary); + err|= clSetKernelArg(kernel, 7, sizeof(i_imaginary_offset), &i_imaginary_offset); + err|= clSetKernelArg(kernel, 8, sizeof(n), &n); + if (err) + STARPU_OPENCL_REPORT_ERROR(err); + + { + size_t global=n; + size_t local; + size_t s; + cl_device_id device; + + starpu_opencl_get_device(devid, &device); + + err = clGetKernelWorkGroupInfo (kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, &s); + if (err != CL_SUCCESS) + STARPU_OPENCL_REPORT_ERROR(err); + if (local > global) + local=global; + else + global = (global + local-1) / local * local; + + err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, NULL); + if (err != CL_SUCCESS) + STARPU_OPENCL_REPORT_ERROR(err); + } + starpu_opencl_release_kernel(kernel); +} diff --git a/examples/interface/complex_filters.c b/examples/interface/complex_filters.c new file mode 100644 index 0000000..02f3124 --- /dev/null +++ b/examples/interface/complex_filters.c @@ -0,0 +1,73 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +#include "complex_interface.h" + +void starpu_complex_filter_block(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned id, unsigned nchunks) +{ + struct starpu_complex_interface *complex_father = father_interface; + struct starpu_complex_interface *complex_child = child_interface; + + uint32_t nx = complex_father->nx; + size_t elemsize = sizeof(double); + + STARPU_ASSERT_MSG(nchunks <= nx, "%u parts for %u elements", nchunks, nx); + + uint32_t child_nx; + size_t offset; + /* Compute the split */ + starpu_filter_nparts_compute_chunk_size_and_offset(nx, nchunks, elemsize, id, 1, + &child_nx, &offset); + + complex_child->nx = child_nx; + + if (complex_father->real) + { + complex_child->real = (void*) ((uintptr_t) complex_father->real + offset); + complex_child->imaginary = (void*) ((uintptr_t) complex_father->imaginary + offset); + } +} + +void starpu_complex_filter_canonical(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned id, unsigned nchunks) +{ + struct starpu_complex_interface *complex_father = father_interface; + struct starpu_vector_interface *vector_child = child_interface; + + STARPU_ASSERT_MSG(nchunks == 2, "complex can only be split into two pieces"); + STARPU_ASSERT_MSG(id < 2, "complex has only two pieces"); + + vector_child->id = STARPU_VECTOR_INTERFACE_ID; + if (id == 0) + vector_child->ptr = (uintptr_t) complex_father->real; + else + vector_child->ptr = (uintptr_t) complex_father->imaginary; + + /* the complex interface doesn't support dev_handle/offset */ + vector_child->dev_handle = vector_child->ptr; + vector_child->offset = 0; + + vector_child->nx = complex_father->nx; + vector_child->elemsize = sizeof(double); + vector_child->slice_base = 0; + vector_child->allocsize = vector_child->nx * vector_child->elemsize; +} + +struct starpu_data_interface_ops *starpu_complex_filter_canonical_child_ops(STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned child) +{ + return &starpu_interface_vector_ops; +} diff --git a/examples/interface/complex_interface.c b/examples/interface/complex_interface.c new file mode 100644 index 0000000..82ed038 --- /dev/null +++ b/examples/interface/complex_interface.c @@ -0,0 +1,277 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +#include "complex_interface.h" + +double *starpu_complex_get_real(starpu_data_handle_t handle) +{ + struct starpu_complex_interface *complex_interface = + (struct starpu_complex_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + + return complex_interface->real; +} + +double *starpu_complex_get_imaginary(starpu_data_handle_t handle) +{ + struct starpu_complex_interface *complex_interface = + (struct starpu_complex_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + + return complex_interface->imaginary; +} + +int starpu_complex_get_nx(starpu_data_handle_t handle) +{ + struct starpu_complex_interface *complex_interface = + (struct starpu_complex_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + + return complex_interface->nx; +} + +static void complex_register_data_handle(starpu_data_handle_t handle, int home_node, void *data_interface) +{ + struct starpu_complex_interface *complex_interface = (struct starpu_complex_interface *) data_interface; + + int node; + for (node = 0; node < STARPU_MAXNODES; node++) + { + struct starpu_complex_interface *local_interface = (struct starpu_complex_interface *) + starpu_data_get_interface_on_node(handle, node); + + local_interface->nx = complex_interface->nx; + if (node == home_node) + { + local_interface->real = complex_interface->real; + local_interface->imaginary = complex_interface->imaginary; + } + else + { + local_interface->real = NULL; + local_interface->imaginary = NULL; + } + } +} + +static starpu_ssize_t complex_allocate_data_on_node(void *data_interface, unsigned node) +{ + struct starpu_complex_interface *complex_interface = (struct starpu_complex_interface *) data_interface; + + double *addr_real = NULL; + double *addr_imaginary = NULL; + starpu_ssize_t requested_memory = complex_interface->nx * sizeof(complex_interface->real[0]); + + addr_real = (double*) starpu_malloc_on_node(node, requested_memory); + if (!addr_real) + goto fail_real; + addr_imaginary = (double*) starpu_malloc_on_node(node, requested_memory); + if (!addr_imaginary) + goto fail_imaginary; + + /* update the data properly in consequence */ + complex_interface->real = addr_real; + complex_interface->imaginary = addr_imaginary; + + return 2*requested_memory; + +fail_imaginary: + starpu_free_on_node(node, (uintptr_t) addr_real, requested_memory); +fail_real: + return -ENOMEM; +} + +static void complex_free_data_on_node(void *data_interface, unsigned node) +{ + struct starpu_complex_interface *complex_interface = (struct starpu_complex_interface *) data_interface; + starpu_ssize_t requested_memory = complex_interface->nx * sizeof(complex_interface->real[0]); + + starpu_free_on_node(node, (uintptr_t) complex_interface->real, requested_memory); + complex_interface->real = NULL; + starpu_free_on_node(node, (uintptr_t) complex_interface->imaginary, requested_memory); + complex_interface->imaginary = NULL; +} + +static size_t complex_get_size(starpu_data_handle_t handle) +{ + size_t size; + struct starpu_complex_interface *complex_interface = (struct starpu_complex_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + + size = complex_interface->nx * 2 * sizeof(double); + return size; +} + +static uint32_t complex_footprint(starpu_data_handle_t handle) +{ + return starpu_hash_crc32c_be(starpu_complex_get_nx(handle), 0); +} + +static int complex_pack_data(starpu_data_handle_t handle, unsigned node, void **ptr, starpu_ssize_t *count) +{ + STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); + + struct starpu_complex_interface *complex_interface = (struct starpu_complex_interface *) + starpu_data_get_interface_on_node(handle, node); + + *count = complex_get_size(handle); + if (ptr != NULL) + { + char *data; + data = (void*) starpu_malloc_on_node_flags(node, *count, 0); + *ptr = data; + memcpy(data, complex_interface->real, complex_interface->nx*sizeof(double)); + memcpy(data+complex_interface->nx*sizeof(double), complex_interface->imaginary, complex_interface->nx*sizeof(double)); + } + + return 0; +} + +static int complex_peek_data(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count) +{ + char *data = ptr; + STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); + + struct starpu_complex_interface *complex_interface = (struct starpu_complex_interface *) + starpu_data_get_interface_on_node(handle, node); + + STARPU_ASSERT(count == 2 * complex_interface->nx * sizeof(double)); + memcpy(complex_interface->real, data, complex_interface->nx*sizeof(double)); + memcpy(complex_interface->imaginary, data+complex_interface->nx*sizeof(double), complex_interface->nx*sizeof(double)); + + return 0; +} + +static int complex_unpack_data(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count) +{ + complex_peek_data(handle, node, ptr, count); + + starpu_free_on_node_flags(node, (uintptr_t) ptr, count, 0); + + return 0; +} + +static starpu_ssize_t complex_describe(void *data_interface, char *buf, size_t size) +{ + struct starpu_complex_interface *complex_interface = (struct starpu_complex_interface *) data_interface; + return snprintf(buf, size, "Complex%d", complex_interface->nx); +} + +static int complex_compare(void *data_interface_a, void *data_interface_b) +{ + struct starpu_complex_interface *complex_a = (struct starpu_complex_interface *) data_interface_a; + struct starpu_complex_interface *complex_b = (struct starpu_complex_interface *) data_interface_b; + + return (complex_a->nx == complex_b->nx); +} + +#define _pack(dst, src) do { memcpy(dst, &src, sizeof(src)); dst += sizeof(src); } while(0) +#define _unpack(dst, src) do { memcpy(&dst, src, sizeof(dst)); src += sizeof(dst); } while(0) + +static starpu_ssize_t complex_size_meta(struct starpu_complex_interface *complex_interface) +{ + return sizeof(complex_interface->real) + sizeof(complex_interface->imaginary) + sizeof(complex_interface->nx); +} + +static int complex_pack_meta(void *data_interface, void **ptr, starpu_ssize_t *count) +{ + struct starpu_complex_interface *complex_interface = (struct starpu_complex_interface *) data_interface; + + *count = complex_size_meta(complex_interface); + *ptr = calloc(1, *count); + + char *cur = *ptr; + _pack(cur, complex_interface->real); + _pack(cur, complex_interface->imaginary); + _pack(cur, complex_interface->nx); + + return 0; +} + +static int complex_unpack_meta(void **data_interface, void *ptr, starpu_ssize_t *count) +{ + *data_interface = calloc(1, sizeof(struct starpu_complex_interface)); + struct starpu_complex_interface *complex_interface = (struct starpu_complex_interface *) (*data_interface); + char *cur = ptr; + + _unpack(complex_interface->real, cur); + _unpack(complex_interface->imaginary, cur); + _unpack(complex_interface->nx, cur); + + *count = complex_size_meta(complex_interface); + + return 0; +} + +int copy_any_to_any(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *async_data) +{ + struct starpu_complex_interface *src_complex = src_interface; + struct starpu_complex_interface *dst_complex = dst_interface; + int ret = 0; + + if (starpu_interface_copy((uintptr_t) src_complex->real, 0, src_node, + (uintptr_t) dst_complex->real, 0, dst_node, + src_complex->nx*sizeof(src_complex->real[0]), + async_data)) + ret = -EAGAIN; + if (starpu_interface_copy((uintptr_t) src_complex->imaginary, 0, src_node, + (uintptr_t) dst_complex->imaginary, 0, dst_node, + src_complex->nx*sizeof(src_complex->imaginary[0]), + async_data)) + ret = -EAGAIN; + return ret; +} + +const struct starpu_data_copy_methods complex_copy_methods = +{ + .any_to_any = copy_any_to_any +}; + +struct starpu_data_interface_ops interface_complex_ops = +{ + .register_data_handle = complex_register_data_handle, + .allocate_data_on_node = complex_allocate_data_on_node, + .free_data_on_node = complex_free_data_on_node, + .copy_methods = &complex_copy_methods, + .get_size = complex_get_size, + .footprint = complex_footprint, + .interfaceid = STARPU_UNKNOWN_INTERFACE_ID, + .interface_size = sizeof(struct starpu_complex_interface), + .to_pointer = NULL, + .pack_data = complex_pack_data, + .peek_data = complex_peek_data, + .unpack_data = complex_unpack_data, + .describe = complex_describe, + .compare = complex_compare, + .pack_meta = complex_pack_meta, + .unpack_meta = complex_unpack_meta, + .free_meta = NULL +}; + +void starpu_complex_data_register_ops() +{ + starpu_data_register_ops(&interface_complex_ops); +} + +void starpu_complex_data_register(starpu_data_handle_t *handleptr, int home_node, double *real, double *imaginary, int nx) +{ + struct starpu_complex_interface complex = + { + .real = real, + .imaginary = imaginary, + .nx = nx + }; + + starpu_data_register(handleptr, home_node, &complex, &interface_complex_ops); +} diff --git a/examples/interface/complex_interface.h b/examples/interface/complex_interface.h new file mode 100644 index 0000000..7c0b4e6 --- /dev/null +++ b/examples/interface/complex_interface.h @@ -0,0 +1,48 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +#ifndef __COMPLEX_INTERFACE_H +#define __COMPLEX_INTERFACE_H + +/* interface for complex numbers */ +struct starpu_complex_interface +{ + double *real; + double *imaginary; + int nx; +}; + +void starpu_complex_data_register(starpu_data_handle_t *handle, int home_node, double *real, double *imaginary, int nx); +void starpu_complex_data_register_ops(); + +double *starpu_complex_get_real(starpu_data_handle_t handle); +double *starpu_complex_get_imaginary(starpu_data_handle_t handle); +int starpu_complex_get_nx(starpu_data_handle_t handle); + +#define STARPU_COMPLEX_GET_REAL(interface) (((struct starpu_complex_interface *)(interface))->real) +#define STARPU_COMPLEX_GET_IMAGINARY(interface) (((struct starpu_complex_interface *)(interface))->imaginary) +#define STARPU_COMPLEX_GET_NX(interface) (((struct starpu_complex_interface *)(interface))->nx) + +/* Split complex vector into smaller complex vectors */ +void starpu_complex_filter_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nchunks); + +/* Split complex into two simple vectors */ +void starpu_complex_filter_canonical(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nchunks); +struct starpu_data_interface_ops *starpu_complex_filter_canonical_child_ops(struct starpu_data_filter *f, unsigned child); + +#endif /* __COMPLEX_INTERFACE_H */ diff --git a/examples/interface/complex_kernels.cl b/examples/interface/complex_kernels.cl new file mode 100644 index 0000000..1d444fd --- /dev/null +++ b/examples/interface/complex_kernels.cl @@ -0,0 +1,32 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* Use the "double" type */ +#pragma OPENCL EXTENSION cl_khr_fp64 : enable + +__kernel void complex_copy_opencl(__global double *o_real, + __global double *o_imaginary, + __global double *i_real, + __global double *i_imaginary, + unsigned nx) +{ + const int i = get_global_id(0); + if (i < nx) + { + o_real[i] = i_real[i]; + o_imaginary[i] = i_imaginary[i]; + } +} diff --git a/examples/interface/complex_kernels.cu b/examples/interface/complex_kernels.cu new file mode 100644 index 0000000..709afcc --- /dev/null +++ b/examples/interface/complex_kernels.cu @@ -0,0 +1,49 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "complex_interface.h" + +static __global__ void complex_copy_cuda(double *o_real, double *o_imaginary, double *i_real, double *i_imaginary, unsigned n) +{ + unsigned i = blockIdx.x*blockDim.x + threadIdx.x; + + if (i < n) + { + o_real[i] = i_real[i]; + o_imaginary[i] = i_imaginary[i]; + } +} + +extern "C" void copy_complex_codelet_cuda(void *descr[], void *_args) +{ + (void)_args; + + int nx = STARPU_COMPLEX_GET_NX(descr[0]); + + double *i_real = STARPU_COMPLEX_GET_REAL(descr[0]); + double *i_imaginary = STARPU_COMPLEX_GET_IMAGINARY(descr[0]); + + double *o_real = STARPU_COMPLEX_GET_REAL(descr[1]); + double *o_imaginary = STARPU_COMPLEX_GET_IMAGINARY(descr[1]); + + unsigned threads_per_block = 64; + unsigned nblocks = (nx + threads_per_block-1) / threads_per_block; + + complex_copy_cuda<<>>(o_real, o_imaginary, i_real, i_imaginary, nx); + cudaError_t status = cudaGetLastError(); + if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status); +} diff --git a/examples/interface/complex_kernels_opencl.c b/examples/interface/complex_kernels_opencl.c new file mode 100644 index 0000000..8c593fb --- /dev/null +++ b/examples/interface/complex_kernels_opencl.c @@ -0,0 +1,75 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "complex_interface.h" + +extern struct starpu_opencl_program opencl_program; + +void copy_complex_codelet_opencl(void *buffers[], void *_args) +{ + (void) _args; + + int id, devid; + cl_int err; + cl_kernel kernel; + cl_command_queue queue; + + /* length of the vector */ + unsigned n = STARPU_COMPLEX_GET_NX(buffers[0]); + /* OpenCL copy of the vector pointer */ + cl_mem i_real = (cl_mem) STARPU_COMPLEX_GET_REAL(buffers[0]); + cl_mem i_imaginary = (cl_mem) STARPU_COMPLEX_GET_IMAGINARY(buffers[0]); + cl_mem o_real = (cl_mem) STARPU_COMPLEX_GET_REAL(buffers[1]); + cl_mem o_imaginary = (cl_mem) STARPU_COMPLEX_GET_IMAGINARY(buffers[1]); + + id = starpu_worker_get_id_check(); + devid = starpu_worker_get_devid(id); + + err = starpu_opencl_load_kernel(&kernel, &queue, &opencl_program, "complex_copy_opencl", devid); + if (err != CL_SUCCESS) + STARPU_OPENCL_REPORT_ERROR(err); + + err = clSetKernelArg(kernel, 0, sizeof(o_real), &o_real); + err|= clSetKernelArg(kernel, 1, sizeof(o_imaginary), &o_imaginary); + err|= clSetKernelArg(kernel, 2, sizeof(i_real), &i_real); + err|= clSetKernelArg(kernel, 3, sizeof(i_imaginary), &i_imaginary); + err|= clSetKernelArg(kernel, 4, sizeof(n), &n); + if (err) + STARPU_OPENCL_REPORT_ERROR(err); + + { + size_t global=n; + size_t local; + size_t s; + cl_device_id device; + + starpu_opencl_get_device(devid, &device); + + err = clGetKernelWorkGroupInfo (kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, &s); + if (err != CL_SUCCESS) + STARPU_OPENCL_REPORT_ERROR(err); + if (local > global) + local=global; + else + global = (global + local-1) / local * local; + + err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, NULL); + if (err != CL_SUCCESS) + STARPU_OPENCL_REPORT_ERROR(err); + } + starpu_opencl_release_kernel(kernel); +} diff --git a/examples/loader.c b/examples/loader.c new file mode 100644 index 0000000..804797d --- /dev/null +++ b/examples/loader.c @@ -0,0 +1,505 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if defined(_WIN32) && !defined(__MINGW32__) && !defined(__CYGWIN__) +#include +#else +#include +#endif + +#ifdef STARPU_QUICK_CHECK +/* Quick checks are supposed to be real quick, typically less than 1s each, sometimes 10s + add some extra times for tests which run with all schedulers +*/ +#define DEFAULT_TIMEOUT 100 +#elif !defined(STARPU_LONG_CHECK) +/* Normal checks are supposed to be short enough, typically less than 10s each, sometimes 1-2m */ +#define DEFAULT_TIMEOUT 300 +#else +/* Long checks can be very long */ +#define DEFAULT_TIMEOUT 1000 +#endif +#define AUTOTEST_SKIPPED_TEST 77 + +static pid_t child_pid = 0; +static int timeout; + +#if defined(_WIN32) && !defined(__MINGW32__) && !defined(__CYGWIN__) +static int mygettimeofday(struct timeval *tv, void *tz) +{ + if (tv) + { + FILETIME ft; + unsigned long long res; + GetSystemTimeAsFileTime(&ft); + /* 100-nanosecond intervals since January 1, 1601 */ + res = ft.dwHighDateTime; + res <<= 32; + res |= ft.dwLowDateTime; + res /= 10; + /* Now we have microseconds */ + res -= (((1970-1601)*365) + 89) * 24ULL * 3600ULL * 1000000ULL; + /* Now we are based on epoch */ + tv->tv_sec = res / 1000000ULL; + tv->tv_usec = res % 1000000ULL; + } +} +#else +#define mygettimeofday(tv,tz) gettimeofday(tv,tz) +#endif + +#ifdef STARPU_GDB_PATH +static int try_launch_gdb(const char *exe, const char *core) +{ +# define GDB_COMMANDS \ + "-ex", "py-list", \ + "-ex", "starpu-tasks", \ + "-ex", "starpu-workers", \ + "-ex", "starpu-print-datas-summary", \ + "-ex", "starpu-memusage", \ + "-ex", "starpu-print-archs", \ + "-ex", "starpu-print-registered-models", \ + "-ex", "bt full", \ + "-ex", "py-bt", \ + "-ex", "thread apply all bt full", \ + "-ex", "thread apply all py-bt", \ + + int err; + pid_t pid; + struct stat st; + const char *top_builddir; + char *gdb; + + err = stat(core, &st); + if (err != 0) + { + fprintf(stderr, "while looking for core file of %s: %s: %m\n", + exe, core); + return -1; + } + + if (!(st.st_mode & S_IFREG)) + { + fprintf(stderr, "%s: not a regular file\n", core); + return -1; + } + + top_builddir = getenv("top_builddir"); + + pid = fork(); + switch (pid) + { + case 0: /* kid */ + if (top_builddir != NULL) + { + /* Run gdb with Libtool. */ + gdb = alloca(strlen(top_builddir) + + sizeof("/libtool") + 1); + strcpy(gdb, top_builddir); + strcat(gdb, "/libtool"); + err = execl(gdb, "gdb", "--mode=execute", + STARPU_GDB_PATH, "--batch", + GDB_COMMANDS + exe, core, NULL); + } + else + { + /* Run gdb directly */ + gdb = STARPU_GDB_PATH; + err = execl(gdb, "gdb", "--batch", + GDB_COMMANDS + exe, core, NULL); + } + if (err != 0) + { + fprintf(stderr, "while launching `%s': %m\n", gdb); + exit(EXIT_FAILURE); + } + exit(EXIT_SUCCESS); + break; + + case -1: + fprintf(stderr, "fork: %m\n"); + return -1; + + default: /* parent */ + { + pid_t who; + int status; + who = waitpid(pid, &status, 0); + if (who != pid) + fprintf(stderr, "while waiting for gdb " + "process %d: %m\n", pid); + } + } + return 0; +# undef GDB_COMMANDS +} +#endif /* STARPU_GDB_PATH */ + +static void launch_gdb(const char *exe) +{ +#ifdef STARPU_GDB_PATH + char s[32]; + snprintf(s, sizeof(s), "core.%d", child_pid); + if (try_launch_gdb(exe, s) < 0) + try_launch_gdb(exe, "core"); +#endif /* STARPU_GDB_PATH */ +} + +static char *test_name; + +static void test_cleaner(int sig) +{ + pid_t child_gid; + int status; + (void) sig; + + // send signal to all loader family members + fprintf(stderr, "[error] test %s has been blocked for %d seconds. Mark it as failed\n", test_name, timeout); + child_gid = getpgid(child_pid); + kill(-child_gid, SIGQUIT); + waitpid(child_pid, &status, 0); + launch_gdb(test_name); + raise(SIGALRM); + exit(EXIT_FAILURE); +} + +static void forwardsig(int sig) +{ + pid_t child_gid; + child_gid = getpgid(child_pid); + kill(-child_gid, sig); +} + +static int _decode(char **src, char *motif, const char *value) +{ + char *found; + + found = strstr(*src, motif); + if (found == NULL) return 0; + + char *new_src = calloc(1, strlen(*src)-strlen(motif)+strlen(value)+1); + + strncpy(new_src, *src, found - *src); + strcat(new_src, value); + strcat(new_src, found+strlen(motif)); + + *src = new_src; + return 1; +} + +static void decode(char **src, char *motif, const char *value) +{ + if (*src) + { + if (strstr(*src, motif) && value == NULL) + { + fprintf(stderr, "error: $%s undefined\n", motif); + exit(EXIT_FAILURE); + } + int d = _decode(src, motif, value); + while (d) + d = _decode(src, motif, value); + } +} + +int main(int argc, char *argv[]) +{ + int child_exit_status; + char *test_args; + char *launcher; + char *launcher_args; + char *libtool; + char *cflags; + const char *top_builddir = getenv("top_builddir"); + struct sigaction sa; + int ret; + struct timeval start; + struct timeval end; + double timing; + int x=1; + int asan = 0, lsan = 0, tsan = 0, usan = 0; + + (void) argc; + test_args = NULL; + timeout = 0; + + launcher=getenv("STARPU_CHECK_LAUNCHER"); + launcher_args=getenv("STARPU_CHECK_LAUNCHER_ARGS"); + cflags = getenv("CFLAGS"); + if (cflags) + { + if (strstr(cflags, "-fsanitize=address")) + asan = 1; + if (strstr(cflags, "-fsanitize=leak")) + lsan = 1; + if (strstr(cflags, "-fsanitize=thread")) + tsan = 1; + if (strstr(cflags, "-fsanitize=undefined")) + usan = 1; + } + + if (argv[x] && strcmp(argv[x], "-t") == 0) + { + timeout = strtol(argv[x+1], NULL, 10); + x += 2; + } + else if (getenv("STARPU_TIMEOUT_ENV")) + { + /* get user-defined iter_max value */ + timeout = strtol(getenv("STARPU_TIMEOUT_ENV"), NULL, 10); + } + else if (timeout <= 0) + { + timeout = DEFAULT_TIMEOUT; + if ((launcher && strstr(launcher, "valgrind")) || + (launcher && strstr(launcher, "helgrind")) || + tsan) + timeout *= 20; + if (asan || usan || lsan || + (launcher && strstr(launcher, "compute-sanitizer"))) + timeout *= 5; + + if (timeout > 1750) + timeout = 1750; + } + +#ifdef STARPU_SIMGRID +#ifdef STARPU_DEBUG + timeout *= 20; +#endif +#endif + +#ifdef STARPU_USE_MPI_MASTER_SLAVE + /* compare values between the 2 values of timeout */ + if (getenv("MPIEXEC_TIMEOUT")) + { + int mpiexec_timeout = strtol(getenv("MPIEXEC_TIMEOUT"), NULL, 10); + if (mpiexec_timeout != timeout) + fprintf(stderr, "[warning] MPIEXEC_TIMEOUT and STARPU_TIMEOUT_ENV values are different (%d and %d). The behavior may be different than expected !\n", mpiexec_timeout, timeout); + } +#endif + + if (argv[x] && strcmp(argv[x], "-p") == 0) + { + test_name = malloc(strlen(argv[x+1]) + 1 + strlen(argv[x+2]) + 1); + sprintf(test_name, "%s/%s", argv[x+1], argv[x+2]); + x += 3; + } + else + { + test_name = argv[x]; + x += 1; + } + + if (!test_name) + { + fprintf(stderr, "[error] Need name of program to start\n"); + exit(EXIT_FAILURE); + } + + size_t len = strlen(test_name); + if (len >= 3 && + test_name[len-3] == '.' && + test_name[len-2] == 's' && + test_name[len-1] == 'h') + { + /* This is a shell script, don't run ourself on bash, but make + * the script call us for each program invocation */ + + char *launch = NULL; + if (top_builddir == NULL) + // this may fail if .libs is in the directory path + setenv("STARPU_LAUNCH", argv[0], 1); + else + { + launch = malloc(strlen(top_builddir) + strlen("/tests/loader") + 1); + strcpy(launch, top_builddir); + strcat(launch, "/tests/loader"); + setenv("STARPU_LAUNCH", launch, 1); + } + + execvp(test_name, argv+x-1); + + fprintf(stderr, "[error] '%s' failed to exec. test marked as failed\n", test_name); + free(launch); + exit(EXIT_FAILURE); + } + + if (strstr(test_name, "spmv/dw_block_spmv")) + { + test_args = (char *) calloc(512, sizeof(char)); + snprintf(test_args, 512, "%s/examples/spmv/matrix_market/examples/fidapm05.mtx", STARPU_SRC_DIR); + } + else if (strstr(test_name, "starpu_perfmodel_display")) + { + if (x >= argc) + test_args = strdup("-l"); + } + else if (strstr(test_name, "starpu_perfmodel_plot")) + { + if (x >= argc) + test_args = strdup("-l"); + } + + /* get launcher program */ + if (launcher_args) + launcher_args=strdup(launcher_args); + + if (top_builddir == NULL) + { + fprintf(stderr, + "warning: $top_builddir undefined, " + "so $STARPU_CHECK_LAUNCHER ignored\n"); + launcher = NULL; + launcher_args = NULL; + libtool = NULL; + } + else + { + libtool = malloc(strlen(top_builddir) + 1 + strlen("libtool") + 1); + strcpy(libtool, top_builddir); + strcat(libtool, "/libtool"); + } + + if (launcher) + { + const char *top_srcdir = getenv("top_srcdir"); + decode(&launcher, "@top_srcdir@", top_srcdir); + decode(&launcher_args, "@top_srcdir@", top_srcdir); + } + + setenv("STARPU_OPENCL_PROGRAM_DIR", STARPU_SRC_DIR, 1); + + /* set SIGALARM handler */ + sa.sa_flags = SA_RESETHAND | SA_NODEFER; + sigemptyset(&sa.sa_mask); + sa.sa_handler = test_cleaner; + if (-1 == sigaction(SIGALRM, &sa, NULL)) + perror("sigaction"); + + signal(SIGINT, forwardsig); + signal(SIGHUP, forwardsig); + signal(SIGPIPE, forwardsig); + signal(SIGTERM, forwardsig); + + child_pid = fork(); + if (child_pid == 0) + { + char *launcher_argv[100]; + int i=0; + + setpgid(0, 0); + + /* "Launchers" such as Valgrind need to be inserted + * after the Libtool-generated wrapper scripts, hence + * this special-case. */ + if (launcher && top_builddir != NULL) + { + launcher_argv[i++] = libtool; + launcher_argv[i++] = "--mode=execute"; + launcher_argv[i++] = launcher; + if (launcher_args) + { + launcher_argv[i++] = strtok(launcher_args, " "); + while (launcher_argv[i-1]) + { + launcher_argv[i++] = strtok(NULL, " "); + } + } + } + + launcher_argv[i++] = test_name; + if (test_args) + launcher_argv[i++] = test_args; + else while (argv[x]) + { + launcher_argv[i++] = argv[x++]; + } +#ifdef STARPU_SIMGRID +#ifdef STARPU_DEBUG + launcher_argv[i++] = "--cfg=contexts/factory:thread"; +#endif +#endif + launcher_argv[i++] = NULL; + execvp(*launcher_argv, launcher_argv); + + fprintf(stderr, "[error] '%s' failed to exec. test marked as failed\n", test_name); + exit(EXIT_FAILURE); + } + if (child_pid == -1) + { + fprintf(stderr, "[error] fork. test marked as failed\n"); + exit(EXIT_FAILURE); + } + free(test_args); + free(libtool); + + ret = EXIT_SUCCESS; + gettimeofday(&start, NULL); + alarm(timeout); + if (child_pid == waitpid(child_pid, &child_exit_status, 0)) + { + if (WIFEXITED(child_exit_status)) + { + int status = WEXITSTATUS(child_exit_status); + if (status == EXIT_SUCCESS) + { + alarm(0); + } + else + { + if (status != AUTOTEST_SKIPPED_TEST) + fprintf(stdout, "`%s' exited with return code %d\n", + test_name, status); + ret = status; + } + } + else if (WIFSIGNALED(child_exit_status)) + { + fprintf(stderr, "[error] `%s' killed with signal %d; test marked as failed\n", + test_name, WTERMSIG(child_exit_status)); + launch_gdb(test_name); + ret = EXIT_FAILURE; + } + else + { + fprintf(stderr, "[error] `%s' did not terminate normally; test marked as failed\n", + test_name); + ret = EXIT_FAILURE; + } + } + + gettimeofday(&end, NULL); + timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec)); + fprintf(stderr, "#Execution_time_in_seconds %f %s\n", timing/1000000, test_name); + + return ret; +} diff --git a/examples/lu/blas_complex.c b/examples/lu/blas_complex.c new file mode 100644 index 0000000..b9bdb3c --- /dev/null +++ b/examples/lu/blas_complex.c @@ -0,0 +1,212 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include + +#include +#include "blas_complex.h" + +/* + * This file contains BLAS wrappers for the different BLAS implementations + * (eg. REFBLAS, STARPU_ATLAS, GOTOBLAS ...). We assume a Fortran orientation as most + * libraries do not supply C-based ordering. + */ + +#ifdef STARPU_ATLAS +#error not implemented +#elif defined(STARPU_GOTO) || defined(STARPU_SYSTEM_BLAS) +#error not implemented +#elif defined(STARPU_OPENBLAS) || defined(STARPU_MKL) + +inline void CGEMM(char *transa, char *transb, int M, int N, int K, + complex float alpha, complex float *A, int lda, complex float *B, int ldb, + complex float beta, complex float *C, int ldc) +{ + cgemm_(transa, transb, &M, &N, &K, &alpha, + A, &lda, B, &ldb, + &beta, C, &ldc); +} + +inline void ZGEMM(char *transa, char *transb, int M, int N, int K, + complex double alpha, complex double *A, int lda, complex double *B, int ldb, + complex double beta, complex double *C, int ldc) +{ + zgemm_(transa, transb, &M, &N, &K, &alpha, + A, &lda, B, &ldb, + &beta, C, &ldc); +} + +inline void CGEMV(char *transa, int M, int N, complex float alpha, complex float *A, int lda, + complex float *X, int incX, complex float beta, complex float *Y, int incY) +{ + cgemv_(transa, &M, &N, &alpha, A, &lda, X, &incX, &beta, Y, &incY); +} + +inline void ZGEMV(char *transa, int M, int N, complex double alpha, complex double *A, int lda, + complex double *X, int incX, complex double beta, complex double *Y, int incY) +{ + zgemv_(transa, &M, &N, &alpha, A, &lda, X, &incX, &beta, Y, &incY); +} + +inline float SCASUM(int N, complex float *X, int incX) +{ + return scasum_(&N, X, &incX); +} + +inline double DZASUM(int N, complex double *X, int incX) +{ + return dzasum_(&N, X, &incX); +} + +void CSCAL(int N, complex float alpha, complex float *X, int incX) +{ + cscal_(&N, &alpha, X, &incX); +} + +void ZSCAL(int N, complex double alpha, complex double *X, int incX) +{ + zscal_(&N, &alpha, X, &incX); +} + +void CTRSM (const char *side, const char *uplo, const char *transa, + const char *diag, const int m, const int n, + const complex float alpha, const complex float *A, const int lda, + complex float *B, const int ldb) +{ + ctrsm_(side, uplo, transa, diag, &m, &n, &alpha, A, &lda, B, &ldb); +} + +void ZTRSM (const char *side, const char *uplo, const char *transa, + const char *diag, const int m, const int n, + const complex double alpha, const complex double *A, const int lda, + complex double *B, const int ldb) +{ + ztrsm_(side, uplo, transa, diag, &m, &n, &alpha, A, &lda, B, &ldb); +} + +void CSYR (const char *uplo, const int n, const complex float alpha, + const complex float *x, const int incx, complex float *A, const int lda) +{ + csyr_(uplo, &n, &alpha, x, &incx, A, &lda); +} + +void CSYRK (const char *uplo, const char *trans, const int n, + const int k, const complex float alpha, const complex float *A, + const int lda, const complex float beta, complex float *C, + const int ldc) +{ + csyrk_(uplo, trans, &n, &k, &alpha, A, &lda, &beta, C, &ldc); +} + +void CGERU(const int m, const int n, const complex float alpha, + const complex float *x, const int incx, const complex float *y, + const int incy, complex float *A, const int lda) +{ + cgeru_(&m, &n, &alpha, x, &incx, y, &incy, A, &lda); +} + +void ZGERU(const int m, const int n, const complex double alpha, + const complex double *x, const int incx, const complex double *y, + const int incy, complex double *A, const int lda) +{ + zgeru_(&m, &n, &alpha, x, &incx, y, &incy, A, &lda); +} + +void CTRSV (const char *uplo, const char *trans, const char *diag, + const int n, const complex float *A, const int lda, complex float *x, + const int incx) +{ + ctrsv_(uplo, trans, diag, &n, A, &lda, x, &incx); +} + +void CTRMM(const char *side, const char *uplo, const char *transA, + const char *diag, const int m, const int n, + const complex float alpha, const complex float *A, const int lda, + complex float *B, const int ldb) +{ + ctrmm_(side, uplo, transA, diag, &m, &n, &alpha, A, &lda, B, &ldb); +} + +void ZTRMM(const char *side, const char *uplo, const char *transA, + const char *diag, const int m, const int n, + const complex double alpha, const complex double *A, const int lda, + complex double *B, const int ldb) +{ + ztrmm_(side, uplo, transA, diag, &m, &n, &alpha, A, &lda, B, &ldb); +} + +void CTRMV(const char *uplo, const char *transA, const char *diag, + const int n, const complex float *A, const int lda, complex float *X, + const int incX) +{ + ctrmv_(uplo, transA, diag, &n, A, &lda, X, &incX); +} + +void CAXPY(const int n, const complex float alpha, complex float *X, const int incX, complex float *Y, const int incY) +{ + caxpy_(&n, &alpha, X, &incX, Y, &incY); +} + +void ZAXPY(const int n, const complex double alpha, complex double *X, const int incX, complex double *Y, const int incY) +{ + zaxpy_(&n, &alpha, X, &incX, Y, &incY); +} + +int ICAMAX (const int n, complex float *X, const int incX) +{ + int retVal; + retVal = icamax_ (&n, X, &incX); + return retVal; +} + +int IZAMAX (const int n, complex double *X, const int incX) +{ + int retVal; + retVal = izamax_ (&n, X, &incX); + return retVal; +} + +complex float CDOTU(const int n, const complex float *x, const int incx, const complex float *y, const int incy) +{ + complex float retVal = 0; + + /* GOTOBLAS will return a FLOATRET which is a double, not a float */ + retVal = (float)cdotu_(&n, x, &incx, y, &incy); + + return retVal; +} + +complex double ZDOTU(const int n, const complex double *x, const int incx, const complex double *y, const int incy) +{ + return zdotu_(&n, x, &incx, y, &incy); +} + +void CSWAP(const int n, complex float *X, const int incX, complex float *Y, const int incY) +{ + cswap_(&n, X, &incX, Y, &incY); +} + +void ZSWAP(const int n, complex double *X, const int incX, complex double *Y, const int incY) +{ + zswap_(&n, X, &incX, Y, &incY); +} + + +#else +#error "no BLAS lib available..." +#endif diff --git a/examples/lu/blas_complex.h b/examples/lu/blas_complex.h new file mode 100644 index 0000000..b9166bc --- /dev/null +++ b/examples/lu/blas_complex.h @@ -0,0 +1,155 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __BLAS_H__ +#define __BLAS_H__ + +#include +#if defined(STARPU_MKL) +#define MKLcomplex8 complex float +#define MKLcomplex16 complex double +#endif + +void CGEMM(char *transa, char *transb, int M, int N, int K, complex float alpha, complex float *A, int lda, + complex float *B, int ldb, complex float beta, complex float *C, int ldc); +void ZGEMM(char *transa, char *transb, int M, int N, int K, complex double alpha, complex double *A, int lda, + complex double *B, int ldb, complex double beta, complex double *C, int ldc); +void CGEMV(char *transa, int M, int N, complex float alpha, complex float *A, int lda, + complex float *X, int incX, complex float beta, complex float *Y, int incY); +void ZGEMV(char *transa, int M, int N, complex double alpha, complex double *A, int lda, + complex double *X, int incX, complex double beta, complex double *Y, int incY); +float SCASUM(int N, complex float *X, int incX); +double DZASUM(int N, complex double *X, int incX); +void CSCAL(int N, complex float alpha, complex float *X, int incX); +void ZSCAL(int N, complex double alpha, complex double *X, int incX); +void CTRSM (const char *side, const char *uplo, const char *transa, + const char *diag, const int m, const int n, + const complex float alpha, const complex float *A, const int lda, + complex float *B, const int ldb); +void ZTRSM (const char *side, const char *uplo, const char *transa, + const char *diag, const int m, const int n, + const complex double alpha, const complex double *A, const int lda, + complex double *B, const int ldb); +void CSYR (const char *uplo, const int n, const complex float alpha, + const complex float *x, const int incx, complex float *A, const int lda); +void CSYRK (const char *uplo, const char *trans, const int n, + const int k, const complex float alpha, const complex float *A, + const int lda, const complex float beta, complex float *C, + const int ldc); +void CGERU (const int m, const int n, const complex float alpha, + const complex float *x, const int incx, const complex float *y, + const int incy, complex float *A, const int lda); +void ZGERU(const int m, const int n, const complex double alpha, + const complex double *x, const int incx, const complex double *y, + const int incy, complex double *A, const int lda); +void CTRSV (const char *uplo, const char *trans, const char *diag, + const int n, const complex float *A, const int lda, complex float *x, + const int incx); +void CTRMM(const char *side, const char *uplo, const char *transA, + const char *diag, const int m, const int n, + const complex float alpha, const complex float *A, const int lda, + complex float *B, const int ldb); +void ZTRMM(const char *side, const char *uplo, const char *transA, + const char *diag, const int m, const int n, + const complex double alpha, const complex double *A, const int lda, + complex double *B, const int ldb); +void CTRMV(const char *uplo, const char *transA, const char *diag, + const int n, const complex float *A, const int lda, complex float *X, + const int incX); +void CAXPY(const int n, const complex float alpha, complex float *X, const int incX, complex float *Y, const int incy); +void ZAXPY(const int n, const complex double alpha, complex double *X, const int incX, complex double *Y, const int incY); +int ICAMAX (const int n, complex float *X, const int incX); +int IZAMAX (const int n, complex double *X, const int incX); +complex float CDOTU(const int n, const complex float *x, const int incx, const complex float *y, const int incy); +complex double ZDOTU(const int n, const complex double *x, const int incx, const complex double *y, const int incy); +void CSWAP(const int n, complex float *x, const int incx, complex float *y, const int incy); +void ZSWAP(const int n, complex double *x, const int incx, complex double *y, const int incy); + +#if defined(STARPU_GOTO) || defined(STARPU_SYSTEM_BLAS) +#error not implemented +#elif defined(STARPU_OPENBLAS) || defined(STARPU_MKL) + +extern void cgemm_ (const char *transa, const char *transb, const int *m, + const int *n, const int *k, const complex float *alpha, + const complex float *A, const int *lda, const complex float *B, + const int *ldb, const complex float *beta, complex float *C, + const int *ldc); +extern void zgemm_ (const char *transa, const char *transb, const int *m, + const int *n, const int *k, const complex double *alpha, + const complex double *A, const int *lda, const complex double *B, + const int *ldb, const complex double *beta, complex double *C, + const int *ldc); +extern void cgemv_(const char *trans, int *m, int *n, complex float *alpha, + void *a, int *lda, void *x, int *incx, + complex float *beta, void *y, int *incy); +extern void zgemv_(const char *trans, int *m, int *n, complex double *alpha, + void *a, int *lda, void *x, int *incx, + complex double *beta, void *y, int *incy); +extern void csyr_ (const char *uplo, const int *n, const complex float *alpha, + const complex float *x, const int *incx, complex float *A, const int *lda); +extern void csyrk_ (const char *uplo, const char *trans, const int *n, + const int *k, const complex float *alpha, const complex float *A, + const int *lda, const complex float *beta, complex float *C, + const int *ldc); +extern void ctrsm_ (const char *side, const char *uplo, const char *transa, + const char *diag, const int *m, const int *n, + const complex float *alpha, const complex float *A, const int *lda, + complex float *B, const int *ldb); +extern void ztrsm_ (const char *side, const char *uplo, const char *transa, + const char *diag, const int *m, const int *n, + const complex double *alpha, const complex double *A, const int *lda, + complex double *B, const int *ldb); +extern complex double scasum_ (const int *n, const complex float *x, const int *incx); +extern complex double dzasum_ (const int *n, const complex double *x, const int *incx); +extern void cscal_ (const int *n, const complex float *alpha, complex float *x, + const int *incx); +extern void zscal_ (const int *n, const complex double *alpha, complex double *x, + const int *incx); +extern void cgeru_(const int *m, const int *n, const complex float *alpha, + const complex float *x, const int *incx, const complex float *y, + const int *incy, complex float *A, const int *lda); +extern void zgeru_(const int *m, const int *n, const complex double *alpha, + const complex double *x, const int *incx, const complex double *y, + const int *incy, complex double *A, const int *lda); +extern void ctrsv_ (const char *uplo, const char *trans, const char *diag, + const int *n, const complex float *A, const int *lda, complex float *x, + const int *incx); +extern void ctrmm_(const char *side, const char *uplo, const char *transA, + const char *diag, const int *m, const int *n, + const complex float *alpha, const complex float *A, const int *lda, + complex float *B, const int *ldb); +extern void ztrmm_(const char *side, const char *uplo, const char *transA, + const char *diag, const int *m, const int *n, + const complex double *alpha, const complex double *A, const int *lda, + complex double *B, const int *ldb); +extern void ctrmv_(const char *uplo, const char *transA, const char *diag, + const int *n, const complex float *A, const int *lda, complex float *X, + const int *incX); +extern void caxpy_(const int *n, const complex float *alpha, complex float *X, const int *incX, + complex float *Y, const int *incy); +extern void zaxpy_(const int *n, const complex double *alpha, complex double *X, const int *incX, + complex double *Y, const int *incy); +extern int icamax_(const int *n, complex float *X, const int *incX); +extern int izamax_(const int *n, complex double *X, const int *incX); +/* for some reason, FLOATRET is not a float but a double in GOTOBLAS */ +extern complex double cdotu_(const int *n, const complex float *x, const int *incx, const complex float *y, const int *incy); +extern complex double zdotu_(const int *n, const complex double *x, const int *incx, const complex double *y, const int *incy); +extern void cswap_(const int *n, complex float *x, const int *incx, complex float *y, const int *incy); +extern void zswap_(const int *n, complex double *x, const int *incx, complex double *y, const int *incy); + +#endif + +#endif /* __BLAS_COMPLEX_H__ */ diff --git a/examples/lu/clu.c b/examples/lu/clu.c new file mode 100644 index 0000000..3ec0a82 --- /dev/null +++ b/examples/lu/clu.c @@ -0,0 +1,20 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* Complex float LU version, explicit dependencies */ + +#include "complex_float.h" +#include "xlu.c" diff --git a/examples/lu/clu_implicit.c b/examples/lu/clu_implicit.c new file mode 100644 index 0000000..b0dace4 --- /dev/null +++ b/examples/lu/clu_implicit.c @@ -0,0 +1,20 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* Complex float LU version, implicit dependencies */ + +#include "complex_float.h" +#include "xlu_implicit.c" diff --git a/examples/lu/clu_implicit_pivot.c b/examples/lu/clu_implicit_pivot.c new file mode 100644 index 0000000..d53df0b --- /dev/null +++ b/examples/lu/clu_implicit_pivot.c @@ -0,0 +1,20 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* Complex float LU version, implicit dependencies, and partial pivoting */ + +#include "complex_float.h" +#include "xlu_implicit_pivot.c" diff --git a/examples/lu/clu_kernels.c b/examples/lu/clu_kernels.c new file mode 100644 index 0000000..da0b2ea --- /dev/null +++ b/examples/lu/clu_kernels.c @@ -0,0 +1,20 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* Complex float LU kernels */ + +#include "complex_float.h" +#include "xlu_kernels.c" diff --git a/examples/lu/clu_pivot.c b/examples/lu/clu_pivot.c new file mode 100644 index 0000000..075f0a7 --- /dev/null +++ b/examples/lu/clu_pivot.c @@ -0,0 +1,20 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* Complex float LU kernels with partial pivoting */ + +#include "complex_float.h" +#include "xlu_pivot.c" diff --git a/examples/lu/complex_double.h b/examples/lu/complex_double.h new file mode 100644 index 0000000..04b3700 --- /dev/null +++ b/examples/lu/complex_double.h @@ -0,0 +1,52 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* Complex double macros */ + +#include +#include "blas_complex.h" +#define TYPE complex double +#define CUBLAS_TYPE cuDoubleComplex + +#define STARPU_LU(name) starpu_zlu_##name +#define COMPLEX_LU + +#define CUBLAS_GEMM cublasZgemm +#define CUBLAS_TRSM cublasZtrsm +#define CUBLAS_SCAL cublasZscal +#define CUBLAS_GER cublasZgeru +#define CUBLAS_SWAP cublasZswap +#define CUBLAS_IAMAX cublasIzamax + +#define CUSOLVER_GETRF cusolverDnZgetrf +#define CUSOLVER_GETRF_BUFFERSIZE cusolverDnZgetrf_bufferSize + +#define CPU_GEMM ZGEMM +#define CPU_TRSM ZTRSM +#define CPU_SCAL ZSCAL +#define CPU_GER ZGERU +#define CPU_SWAP ZSWAP + +#define CPU_TRMM ZTRMM +#define CPU_AXPY ZAXPY +#define CPU_ASUM DZASUM +#define CPU_IAMAX IZAMAX + +#define PIVOT_THRESHHOLD 10e-5 + +#define CAN_EXECUTE .can_execute = can_execute, + +#define ISZERO(f) (fpclassify(creal(f)) == FP_ZERO && fpclassify(cimag(f)) == FP_ZERO) diff --git a/examples/lu/complex_float.h b/examples/lu/complex_float.h new file mode 100644 index 0000000..908e043 --- /dev/null +++ b/examples/lu/complex_float.h @@ -0,0 +1,52 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* Complex float macros */ + +#include +#include "blas_complex.h" +#define TYPE complex float +#define CUBLAS_TYPE cuComplex + +#define STARPU_LU(name) starpu_clu_##name +#define COMPLEX_LU + +#define CUBLAS_GEMM cublasCgemm +#define CUBLAS_TRSM cublasCtrsm +#define CUBLAS_SCAL cublasCscal +#define CUBLAS_GER cublasCgeru +#define CUBLAS_SWAP cublasCswap +#define CUBLAS_IAMAX cublasIcamax + +#define CUSOLVER_GETRF cusolverDnCgetrf +#define CUSOLVER_GETRF_BUFFERSIZE cusolverDnCgetrf_bufferSize + +#define CPU_GEMM CGEMM +#define CPU_TRSM CTRSM +#define CPU_SCAL CSCAL +#define CPU_GER CGERU +#define CPU_SWAP CSWAP + +#define CPU_TRMM CTRMM +#define CPU_AXPY CAXPY +#define CPU_ASUM SCASUM +#define CPU_IAMAX ICAMAX + +#define PIVOT_THRESHHOLD 10e-5 + +#define CAN_EXECUTE + +#define ISZERO(f) (fpclassify(creal(f)) == FP_ZERO && fpclassify(cimag(f)) == FP_ZERO) diff --git a/examples/lu/dlu.c b/examples/lu/dlu.c new file mode 100644 index 0000000..ea78496 --- /dev/null +++ b/examples/lu/dlu.c @@ -0,0 +1,20 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* Real double LU version, explicit dependencies */ + +#include "lu-double.h" +#include "xlu.c" diff --git a/examples/lu/dlu_implicit.c b/examples/lu/dlu_implicit.c new file mode 100644 index 0000000..c299ffd --- /dev/null +++ b/examples/lu/dlu_implicit.c @@ -0,0 +1,20 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* Real double LU version, implicit dependencies */ + +#include "lu-double.h" +#include "xlu_implicit.c" diff --git a/examples/lu/dlu_implicit_pivot.c b/examples/lu/dlu_implicit_pivot.c new file mode 100644 index 0000000..0dafbb5 --- /dev/null +++ b/examples/lu/dlu_implicit_pivot.c @@ -0,0 +1,20 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* Real double LU version, implicit dependencies with partial pivoting */ + +#include "lu-double.h" +#include "xlu_implicit_pivot.c" diff --git a/examples/lu/dlu_kernels.c b/examples/lu/dlu_kernels.c new file mode 100644 index 0000000..97436ef --- /dev/null +++ b/examples/lu/dlu_kernels.c @@ -0,0 +1,20 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* Real double LU kernels */ + +#include "lu-double.h" +#include "xlu_kernels.c" diff --git a/examples/lu/dlu_pivot.c b/examples/lu/dlu_pivot.c new file mode 100644 index 0000000..62d831d --- /dev/null +++ b/examples/lu/dlu_pivot.c @@ -0,0 +1,20 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* Real double LU kernels with partial pivoting */ + +#include "lu-double.h" +#include "xlu_pivot.c" diff --git a/examples/lu/lu-double.h b/examples/lu/lu-double.h new file mode 100644 index 0000000..ad9edc4 --- /dev/null +++ b/examples/lu/lu-double.h @@ -0,0 +1,50 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* Real double macros */ + +#define TYPE double +#define CUBLAS_TYPE TYPE + +#define STARPU_LU(name) starpu_dlu_##name + +#define CUBLAS_GEMM cublasDgemm +#define CUBLAS_TRSM cublasDtrsm +#define CUBLAS_SCAL cublasDscal +#define CUBLAS_GER cublasDger +#define CUBLAS_SWAP cublasDswap +#define CUBLAS_IAMAX cublasIdamax + + +#define CUSOLVER_GETRF cusolverDnDgetrf +#define CUSOLVER_GETRF_BUFFERSIZE cusolverDnDgetrf_bufferSize + +#define CPU_GEMM STARPU_DGEMM +#define CPU_TRSM STARPU_DTRSM +#define CPU_SCAL STARPU_DSCAL +#define CPU_GER STARPU_DGER +#define CPU_SWAP STARPU_DSWAP + +#define CPU_TRMM STARPU_DTRMM +#define CPU_AXPY STARPU_DAXPY +#define CPU_ASUM STARPU_DASUM +#define CPU_IAMAX STARPU_IDAMAX + +#define PIVOT_THRESHHOLD 10e-10 + +#define CAN_EXECUTE .can_execute = can_execute, + +#define ISZERO(f) (fpclassify(f) == FP_ZERO) diff --git a/examples/lu/lu-float.h b/examples/lu/lu-float.h new file mode 100644 index 0000000..bfeee4e --- /dev/null +++ b/examples/lu/lu-float.h @@ -0,0 +1,49 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* Real float macros */ + +#define TYPE float +#define CUBLAS_TYPE TYPE + +#define STARPU_LU(name) starpu_slu_##name + +#define CUBLAS_GEMM cublasSgemm +#define CUBLAS_TRSM cublasStrsm +#define CUBLAS_SCAL cublasSscal +#define CUBLAS_GER cublasSger +#define CUBLAS_SWAP cublasSswap +#define CUBLAS_IAMAX cublasIsamax + +#define CUSOLVER_GETRF cusolverDnSgetrf +#define CUSOLVER_GETRF_BUFFERSIZE cusolverDnSgetrf_bufferSize + +#define CPU_GEMM STARPU_SGEMM +#define CPU_TRSM STARPU_STRSM +#define CPU_SCAL STARPU_SSCAL +#define CPU_GER STARPU_SGER +#define CPU_SWAP STARPU_SSWAP + +#define CPU_TRMM STARPU_STRMM +#define CPU_AXPY STARPU_SAXPY +#define CPU_ASUM STARPU_SASUM +#define CPU_IAMAX STARPU_ISAMAX + +#define PIVOT_THRESHHOLD 10e-5 + +#define CAN_EXECUTE + +#define ISZERO(f) (fpclassify(f) == FP_ZERO) diff --git a/examples/lu/lu.sh b/examples/lu/lu.sh new file mode 100755 index 0000000..a6e41aa --- /dev/null +++ b/examples/lu/lu.sh @@ -0,0 +1,45 @@ +#!/bin/sh +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2017-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +# Test various LU options + +set -e + +PREFIX=$(dirname $0) +rm -rf $PREFIX/lu.traces +mkdir -p $PREFIX/lu.traces + +export STARPU_FXT_PREFIX=$PREFIX/lu.traces +export STARPU_FXT_TRACE=1 + +if [ "$STARPU_QUICK_CHECK" = 1 ] +then + SIDE=16 +else + SIDE=160 +fi + +$MS_LAUNCHER $STARPU_LAUNCH $PREFIX/lu_implicit_example_float -size $(($SIDE * 4)) -nblocks 4 -piv +$MS_LAUNCHER $STARPU_LAUNCH $PREFIX/lu_implicit_example_float -size $(($SIDE * 4)) -nblocks 4 -no-stride +$MS_LAUNCHER $STARPU_LAUNCH $PREFIX/lu_implicit_example_float -size $(($SIDE * 4)) -nblocks 4 -bound +$MS_LAUNCHER $STARPU_LAUNCH $PREFIX/lu_implicit_example_float -size $(($SIDE * 2)) -nblocks 2 -bounddeps -directory $STARPU_FXT_PREFIX +$MS_LAUNCHER $STARPU_LAUNCH $PREFIX/lu_implicit_example_float -size $(($SIDE * 2)) -nblocks 2 -bound -bounddeps -bounddepsprio -directory $STARPU_FXT_PREFIX + +$MS_LAUNCHER $STARPU_LAUNCH $PREFIX/lu_example_float -size $(($SIDE * 4)) -nblocks 4 -piv +$MS_LAUNCHER $STARPU_LAUNCH $PREFIX/lu_example_float -size $(($SIDE * 4)) -nblocks 4 -no-stride +$MS_LAUNCHER $STARPU_LAUNCH $PREFIX/lu_example_float -size $(($SIDE * 4)) -nblocks 4 -bound +$MS_LAUNCHER $STARPU_LAUNCH $PREFIX/lu_example_float -size $(($SIDE * 2)) -nblocks 2 -bounddeps -directory $PREFIX/lu.traces +$MS_LAUNCHER $STARPU_LAUNCH $PREFIX/lu_example_float -size $(($SIDE * 2)) -nblocks 2 -bound -bounddeps -bounddepsprio -directory $STARPU_FXT_PREFIX diff --git a/examples/lu/lu_example.c b/examples/lu/lu_example.c new file mode 100644 index 0000000..26d42f2 --- /dev/null +++ b/examples/lu/lu_example.c @@ -0,0 +1,480 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* Main body for the LU factorization: matrix initialization and result + * checking */ + +#include +#include +#include +#include +#include +#include +#include "xlu.h" +#include "xlu_kernels.h" + +#ifdef STARPU_HAVE_VALGRIND_H +#include +#endif + +#include "starpu_cusolver.h" + +static unsigned long size = 0; +static unsigned nblocks = 0; +static unsigned check = 0; +static unsigned pivot = 0; +static unsigned no_stride = 0; +static unsigned profile = 0; +static unsigned no_prio=0; +unsigned bound = 0; +unsigned bounddeps = 0; +unsigned boundprio = 0; +char *directory = NULL; + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +TYPE *A, *A_saved; + +/* in case we use non-strided blocks */ +TYPE **A_blocks; + +static void parse_args(int argc, char **argv) +{ + int i; + for (i = 1; i < argc; i++) + { + if (strcmp(argv[i], "-size") == 0) + { + char *argptr; + size = strtol(argv[++i], &argptr, 10); + } + + else if (strcmp(argv[i], "-nblocks") == 0) + { + char *argptr; + nblocks = strtol(argv[++i], &argptr, 10); + } + +#ifndef STARPU_SIMGRID + else if (strcmp(argv[i], "-check") == 0) + { + check = 1; + } + + else if (strcmp(argv[i], "-piv") == 0) + { + pivot = 1; + } + + else if (strcmp(argv[i], "-no-stride") == 0) + { + no_stride = 1; + } +#endif + + else if (strcmp(argv[i], "-profile") == 0) + { + profile = 1; + } + + else if (strcmp(argv[i], "-bound") == 0) + { + bound = 1; + } + else if (strcmp(argv[i], "-bounddeps") == 0) + { + bound = 1; + bounddeps = 1; + } + else if (strcmp(argv[i], "-bounddepsprio") == 0) + { + bound = 1; + bounddeps = 1; + boundprio = 1; + } + else if (strcmp(argv[i], "-directory") == 0) + { + free(directory); + directory = strdup(argv[++i]); + } + else if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i], "--help") == 0) + { + fprintf(stderr,"usage: lu [-size n] [-nblocks b] [-piv] [-no-stride] [-profile] [-bound] [-bounddeps] [-bounddepsprio] [-directory d]\n"); + fprintf(stderr,"Default is size %lu and nblocks %u\n", size, nblocks); + exit(0); + } + } +} + +static void display_matrix(TYPE *m, unsigned n, unsigned ld, char *str) +{ + (void)m; + (void)n; + (void)ld; + (void)str; +#if 0 + FPRINTF(stderr, "***********\n"); + FPRINTF(stderr, "Display matrix %s\n", str); + unsigned i,j; + for (j = 0; j < n; j++) + { + for (i = 0; i < n; i++) + { + FPRINTF(stderr, "%2.2f\t", m[i+j*ld]); + } + FPRINTF(stderr, "\n"); + } + FPRINTF(stderr, "***********\n"); +#endif +} + +void copy_blocks_into_matrix(void) +{ + unsigned blocksize = (size/nblocks); + + unsigned i, j; + unsigned bi, bj; + for (bj = 0; bj < nblocks; bj++) + for (bi = 0; bi < nblocks; bi++) + { + for (j = 0; j < blocksize; j++) + for (i = 0; i < blocksize; i++) + { + A[(i+bi*blocksize) + (j + bj*blocksize)*size] = + A_blocks[bi+nblocks*bj][i + j * blocksize]; + } + + starpu_free_noflag(A_blocks[bi+nblocks*bj], (size_t)blocksize*blocksize*sizeof(TYPE)); + } +} + + + +void copy_matrix_into_blocks(void) +{ + unsigned blocksize = (size/nblocks); + + unsigned i, j; + unsigned bi, bj; + for (bj = 0; bj < nblocks; bj++) + for (bi = 0; bi < nblocks; bi++) + { + starpu_malloc((void **)&A_blocks[bi+nblocks*bj], (size_t)blocksize*blocksize*sizeof(TYPE)); + + for (j = 0; j < blocksize; j++) + for (i = 0; i < blocksize; i++) + { + A_blocks[bi+nblocks*bj][i + j * blocksize] = + A[(i+bi*blocksize) + (j + bj*blocksize)*size]; + } + } +} + +static void init_matrix(void) +{ + /* allocate matrix */ +#ifdef STARPU_SIMGRID + A = (void*) 1; +#else + starpu_malloc_flags((void **)&A, (size_t)size*size*sizeof(TYPE), STARPU_MALLOC_PINNED|STARPU_MALLOC_SIMULATION_FOLDED); +#endif + STARPU_ASSERT(A); + + starpu_srand48((long int)time(NULL)); + /* starpu_srand48(0); */ + +#ifndef STARPU_SIMGRID + /* initialize matrix content */ + unsigned long i,j; + for (j = 0; j < size; j++) + { + for (i = 0; i < size; i++) + { + A[i + j*size] = (TYPE)starpu_drand48(); +#ifdef COMPLEX_LU + /* also randomize the imaginary component for complex number cases */ + A[i + j*size] += (TYPE)(I*starpu_drand48()); +#endif + if (i == j) + { + A[i + j*size] += 1; + A[i + j*size] *= size; + } + } + } +#endif + +} + +static void save_matrix(void) +{ + A_saved = malloc((size_t)size*size*sizeof(TYPE)); + STARPU_ASSERT(A_saved); + + memcpy(A_saved, A, (size_t)size*size*sizeof(TYPE)); +} + +static double frobenius_norm(TYPE *v, unsigned n) +{ + double sum2 = 0.0; + + /* compute sqrt(Sum(|x|^2)) */ + + unsigned i,j; + for (j = 0; j < n; j++) + for (i = 0; i < n; i++) + { + double a = fabsl((double)v[i+n*j]); + sum2 += a*a; + } + + return sqrt(sum2); +} + +static void pivot_saved_matrix(unsigned *ipiv) +{ + unsigned k; + for (k = 0; k < size; k++) + { + if (k != ipiv[k]) + { + /* FPRINTF(stderr, "SWAP %d and %d\n", k, ipiv[k]); */ + CPU_SWAP(size, &A_saved[k*size], 1, &A_saved[ipiv[k]*size], 1); + } + } +} + +static void check_result(void) +{ + unsigned i,j; + TYPE *L, *U; + + L = malloc((size_t)size*size*sizeof(TYPE)); + U = malloc((size_t)size*size*sizeof(TYPE)); + + memset(L, 0, size*size*sizeof(TYPE)); + memset(U, 0, size*size*sizeof(TYPE)); + + /* only keep the lower part */ + for (j = 0; j < size; j++) + { + for (i = 0; i < j; i++) + { + L[j+i*size] = A[j+i*size]; + } + + /* diag i = j */ + L[j+j*size] = A[j+j*size]; + U[j+j*size] = 1.0; + + for (i = j+1; i < size; i++) + { + U[j+i*size] = A[j+i*size]; + } + } + + display_matrix(L, size, size, "L"); + display_matrix(U, size, size, "U"); + + /* now A_err = L, compute L*U */ + CPU_TRMM("R", "U", "N", "U", size, size, 1.0f, U, size, L, size); + + display_matrix(A_saved, size, size, "P A_saved"); + display_matrix(L, size, size, "LU"); + + /* compute "LU - A" in L*/ + CPU_AXPY(size*size, -1.0, A_saved, 1, L, 1); + display_matrix(L, size, size, "Residuals"); + +#ifdef COMPLEX_LU + double err = CPU_ASUM(size*size, L, 1); + int max = CPU_IAMAX(size*size, L, 1); + TYPE l_max = L[max]; + + FPRINTF(stderr, "Avg error : %e\n", err/(size*size)); + FPRINTF(stderr, "Max error : %e\n", sqrt(creal(l_max)*creal(l_max)+cimag(l_max)*cimag(l_max))); +#else + TYPE err = CPU_ASUM(size*size, L, 1); + int max = CPU_IAMAX(size*size, L, 1); + + FPRINTF(stderr, "Avg error : %e\n", err/(size*size)); + FPRINTF(stderr, "Max error : %e\n", L[max]); +#endif + + double residual = frobenius_norm(L, size); + double matnorm = frobenius_norm(A_saved, size); + + FPRINTF(stderr, "||%sA-LU|| / (||A||*N) : %e\n", pivot?"P":"", residual/(matnorm*size)); + + if (residual/(matnorm*size) > PIVOT_THRESHHOLD) + exit(-1); + + free(L); + free(U); + free(A_saved); +} + +int main(int argc, char **argv) +{ + int ret; + + ret = starpu_init(NULL); + if (ret == -ENODEV) + return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + int power = starpu_cpu_worker_get_count() + 32 * starpu_cuda_worker_get_count(); + int power_cbrt = cbrt(power); +#ifndef STARPU_LONG_CHECK + power_cbrt /= 2; +#endif + if (power_cbrt < 1) + power_cbrt = 1; + +#ifdef STARPU_QUICK_CHECK + if (!size) + size = 320*2*power_cbrt; + if (!nblocks) + nblocks = 2*power_cbrt; +#else + if (!size) + size = 960*8*power_cbrt; + if (!nblocks) + nblocks = 8*power_cbrt; +#endif + + parse_args(argc, argv); + +#ifdef STARPU_HAVE_VALGRIND_H + if (RUNNING_ON_VALGRIND) + size = 16; +#endif + + starpu_cublas_init(); + starpu_cusolver_init(); + + init_matrix(); + +#ifndef STARPU_SIMGRID + unsigned *ipiv = NULL; + if (check) + save_matrix(); + + display_matrix(A, size, size, "A"); + + if (profile) + starpu_profiling_status_set(STARPU_PROFILING_ENABLE); + + /* Factorize the matrix (in place) */ + if (pivot) + { + ipiv = malloc(size*sizeof(unsigned)); + if (no_stride) + { + /* in case the LU decomposition uses non-strided blocks, we _copy_ the matrix into smaller blocks */ + A_blocks = malloc(nblocks*nblocks*sizeof(TYPE *)); + copy_matrix_into_blocks(); + + ret = STARPU_LU(lu_decomposition_pivot_no_stride)(A_blocks, ipiv, size, size, nblocks, no_prio); + + copy_blocks_into_matrix(); + free(A_blocks); + } + else + { + double start; + double end; + + start = starpu_timing_now(); + + ret = STARPU_LU(lu_decomposition_pivot)(A, ipiv, size, size, nblocks, no_prio); + + end = starpu_timing_now(); + + double timing = end - start; + + unsigned n = size; + double flop = (2.0f*n*n*n)/3.0f; + FPRINTF(stderr, "Synthetic GFlop/s (TOTAL) : \n"); + FPRINTF(stdout, "%u %6.2f\n", n, (flop/timing/1000.0f)); + } + } + else +#endif + { + ret = STARPU_LU(lu_decomposition)(A, size, size, nblocks, no_prio); + } + + if (profile) + { + FPRINTF(stderr, "Setting profile\n"); + starpu_profiling_status_set(STARPU_PROFILING_DISABLE); + starpu_profiling_bus_helper_display_summary(); + } + + if (bound) + { + if (bounddeps) + { + if (!directory) + directory = strdup("."); + char filename[256]; + snprintf(filename, sizeof(filename), "%s/%s", directory, "lu.pl"); + FILE *f = fopen(filename, "w"); + starpu_bound_print_lp(f); + FPRINTF(stderr,"system printed to %s\n", filename); + fclose(f); + snprintf(filename, sizeof(filename), "%s/%s", directory, "lu.mps"); + f = fopen(filename, "w"); + starpu_bound_print_mps(f); + FPRINTF(stderr,"system printed to %s\n", filename); + fclose(f); + snprintf(filename, sizeof(filename), "%s/%s", directory, "lu.dot"); + f = fopen(filename, "w"); + starpu_bound_print_dot(f); + FPRINTF(stderr,"system printed to %s\n", filename); + fclose(f); + } + } + +#ifndef STARPU_SIMGRID + if (check) + { + FPRINTF(stderr, "Checking result\n"); + if (pivot) + { + pivot_saved_matrix(ipiv); + } + + check_result(); + } + + if (pivot) + free(ipiv); +#endif + +#ifndef STARPU_SIMGRID + starpu_free_flags(A, (size_t)size*size*sizeof(TYPE), STARPU_MALLOC_PINNED|STARPU_MALLOC_SIMULATION_FOLDED); +#endif + + starpu_cusolver_shutdown(); + starpu_cublas_shutdown(); + + starpu_shutdown(); + free(directory); + + if (ret == -ENODEV) return 77; else return 0; +} diff --git a/examples/lu/lu_example_complex_double.c b/examples/lu/lu_example_complex_double.c new file mode 100644 index 0000000..e84c75f --- /dev/null +++ b/examples/lu/lu_example_complex_double.c @@ -0,0 +1,20 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* Main body for the LU factorization, complex double version */ + +#include "complex_double.h" +#include "lu_example.c" diff --git a/examples/lu/lu_example_complex_float.c b/examples/lu/lu_example_complex_float.c new file mode 100644 index 0000000..b7b3871 --- /dev/null +++ b/examples/lu/lu_example_complex_float.c @@ -0,0 +1,20 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* Main body for the LU factorization, complex float version */ + +#include "complex_float.h" +#include "lu_example.c" diff --git a/examples/lu/lu_example_double.c b/examples/lu/lu_example_double.c new file mode 100644 index 0000000..26629bc --- /dev/null +++ b/examples/lu/lu_example_double.c @@ -0,0 +1,20 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* Main body for the LU factorization, real double version */ + +#include "lu-double.h" +#include "lu_example.c" diff --git a/examples/lu/lu_example_float.c b/examples/lu/lu_example_float.c new file mode 100644 index 0000000..1797f45 --- /dev/null +++ b/examples/lu/lu_example_float.c @@ -0,0 +1,20 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* Main body for the LU factorization, real float version */ + +#include "lu-float.h" +#include "lu_example.c" diff --git a/examples/lu/slu.c b/examples/lu/slu.c new file mode 100644 index 0000000..825d3ca --- /dev/null +++ b/examples/lu/slu.c @@ -0,0 +1,20 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* Real float LU version, explicit dependencies */ + +#include "lu-float.h" +#include "xlu.c" diff --git a/examples/lu/slu_implicit.c b/examples/lu/slu_implicit.c new file mode 100644 index 0000000..1372a46 --- /dev/null +++ b/examples/lu/slu_implicit.c @@ -0,0 +1,20 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* Real float LU version, implicit dependencies */ + +#include "lu-float.h" +#include "xlu_implicit.c" diff --git a/examples/lu/slu_implicit_pivot.c b/examples/lu/slu_implicit_pivot.c new file mode 100644 index 0000000..b3b1e57 --- /dev/null +++ b/examples/lu/slu_implicit_pivot.c @@ -0,0 +1,20 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* Real float LU version, implicit dependencies with partial pivoting */ + +#include "lu-float.h" +#include "xlu_implicit_pivot.c" diff --git a/examples/lu/slu_kernels.c b/examples/lu/slu_kernels.c new file mode 100644 index 0000000..3a6b634 --- /dev/null +++ b/examples/lu/slu_kernels.c @@ -0,0 +1,20 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* Real float LU kernels */ + +#include "lu-float.h" +#include "xlu_kernels.c" diff --git a/examples/lu/slu_pivot.c b/examples/lu/slu_pivot.c new file mode 100644 index 0000000..16e1399 --- /dev/null +++ b/examples/lu/slu_pivot.c @@ -0,0 +1,20 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* Real float LU kernels with partial pivoting */ + +#include "lu-float.h" +#include "xlu_pivot.c" diff --git a/examples/lu/xlu.c b/examples/lu/xlu.c new file mode 100644 index 0000000..c5c249f --- /dev/null +++ b/examples/lu/xlu.c @@ -0,0 +1,290 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2010-2010 Mehdi Juhoor + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* LU StarPU implementation using explicit tag dependencies */ +#include "xlu.h" +#include "xlu_kernels.h" + +#include "starpu_cusolver.h" + +/* + * Construct the DAG + */ + +static struct starpu_task *create_task(starpu_tag_t id) +{ + struct starpu_task *task = starpu_task_create(); + task->cl_arg = NULL; + + task->use_tag = 1; + task->tag_id = id; + + return task; +} + +static struct starpu_task *create_task_getrf(starpu_data_handle_t dataA, unsigned k, unsigned no_prio, int nblocks) +{ +/* printf("task GETRF k = %d TAG = %llx\n", k, (TAG_GETRF(k))); */ + + struct starpu_task *task = create_task(TAG_GETRF(k)); + + task->cl = &cl_getrf; + task->color = 0xffff00; + + /* which sub-data is manipulated ? */ + task->handles[0] = starpu_data_get_sub_data(dataA, 2, k, k); + +#if defined(STARPU_USE_CUDA) && defined(STARPU_HAVE_LIBCUSOLVER) + task->handles[1] = scratch; + task->handles[2] = devInfo; +#endif + + /* this is an important task */ + if (!no_prio) + task->priority = 3*nblocks - 3*k; /* Bottom-level-based prio */ + + /* enforce dependencies ... */ + if (k > 0) + { + starpu_tag_declare_deps(TAG_GETRF(k), 1, TAG_GEMM(k-1, k, k)); + } + + return task; +} + +static int create_task_trsm_ll(starpu_data_handle_t dataA, unsigned k, unsigned j, unsigned no_prio, int nblocks) +{ + int ret; + +/* printf("task TRSM_LL k,i = %d,%d TAG = %llx\n", k,i, TAG_TRSM_LL(k,i)); */ + + struct starpu_task *task = create_task(TAG_TRSM_LL(k, j)); + + task->cl = &cl_trsm_ll; + task->color = 0x8080ff; + + /* which sub-data is manipulated ? */ + task->handles[0] = starpu_data_get_sub_data(dataA, 2, k, k); + task->handles[1] = starpu_data_get_sub_data(dataA, 2, j, k); + + if (!no_prio) + task->priority = 3*nblocks - (2*k + j); /* Bottom-level-based prio */ + + /* enforce dependencies ... */ + if (k > 0) + { + starpu_tag_declare_deps(TAG_TRSM_LL(k, j), 2, TAG_GETRF(k), TAG_GEMM(k-1, k, j)); + } + else + { + starpu_tag_declare_deps(TAG_TRSM_LL(k, j), 1, TAG_GETRF(k)); + } + + ret = starpu_task_submit(task); + if (ret != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + return ret; +} + +static int create_task_trsm_ru(starpu_data_handle_t dataA, unsigned k, unsigned i, unsigned no_prio, int nblocks) +{ + int ret; + struct starpu_task *task = create_task(TAG_TRSM_RU(k, i)); + + task->cl = &cl_trsm_ru; + task->color = 0x8080c0; + + /* which sub-data is manipulated ? */ + task->handles[0] = starpu_data_get_sub_data(dataA, 2, k, k); + task->handles[1] = starpu_data_get_sub_data(dataA, 2, k, i); + + if (!no_prio) + task->priority = 3*nblocks - (2*k + i); /* Bottom-level-based prio */ + + /* enforce dependencies ... */ + if (k > 0) + { + starpu_tag_declare_deps(TAG_TRSM_RU(k, i), 2, TAG_GETRF(k), TAG_GEMM(k-1, i, k)); + } + else + { + starpu_tag_declare_deps(TAG_TRSM_RU(k, i), 1, TAG_GETRF(k)); + } + + ret = starpu_task_submit(task); + if (ret != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + return ret; +} + +static int create_task_gemm(starpu_data_handle_t dataA, unsigned k, unsigned i, unsigned j, unsigned no_prio, int nblocks) +{ + int ret; + +/* printf("task GEMM k,i,j = %d,%d,%d TAG = %llx\n", k,i,j, TAG_GEMM(k,i,j)); */ + + struct starpu_task *task = create_task(TAG_GEMM(k, i, j)); + + task->cl = &cl_gemm; + task->color = 0x00ff00; + + /* which sub-data is manipulated ? */ + task->handles[0] = starpu_data_get_sub_data(dataA, 2, k, i); /* produced by TAG_TRSM_RU(k, i) */ + task->handles[1] = starpu_data_get_sub_data(dataA, 2, j, k); /* produced by TAG_TRSM_LL(k, j) */ + task->handles[2] = starpu_data_get_sub_data(dataA, 2, j, i); /* produced by TAG_GEMM(k-1, i, j) */ + + if (!no_prio) + task->priority = 3*nblocks - (k + i + j); /* Bottom-level-based prio */ + + /* enforce dependencies ... */ + if (k > 0) + { + starpu_tag_declare_deps(TAG_GEMM(k, i, j), 3, TAG_GEMM(k-1, i, j), TAG_TRSM_LL(k, j), TAG_TRSM_RU(k, i)); + } + else + { + starpu_tag_declare_deps(TAG_GEMM(k, i, j), 2, TAG_TRSM_LL(k, j), TAG_TRSM_RU(k, i)); + } + + ret = starpu_task_submit(task); + if (ret != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + return ret; +} + +/* + * code to bootstrap the factorization + */ + +static int dw_codelet_facto_v3(starpu_data_handle_t dataA, unsigned nblocks, unsigned no_prio) +{ + int ret; + double start; + double end; + + struct starpu_task *entry_task = NULL; + + /* create all the DAG nodes */ + unsigned i,j,k; + + if (bound) + starpu_bound_start(bounddeps, boundprio); + + for (k = 0; k < nblocks; k++) + { + starpu_iteration_push(k); + struct starpu_task *task = create_task_getrf(dataA, k, no_prio, nblocks); + + /* we defer the launch of the first task */ + if (k == 0) + { + entry_task = task; + } + else + { + ret = starpu_task_submit(task); + if (ret == -ENODEV) return ret; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + for (i = k+1; i +#include + +#define TAG_GETRF(k) ((starpu_tag_t)((1ULL<<60) | (unsigned long long)(k))) +#define TAG_TRSM_LL(k,i) ((starpu_tag_t)(((2ULL<<60) | (((unsigned long long)(k))<<32) \ + | (unsigned long long)(i)))) +#define TAG_TRSM_RU(k,j) ((starpu_tag_t)(((3ULL<<60) | (((unsigned long long)(k))<<32) \ + | (unsigned long long)(j)))) +#define TAG_GEMM(k,i,j) ((starpu_tag_t)(((4ULL<<60) | ((unsigned long long)(k)<<32) \ + | ((unsigned long long)(i)<<16) \ + | (unsigned long long)(j)))) +#define PIVOT(k,i) ((starpu_tag_t)(((5ULL<<60) | (((unsigned long long)(k))<<32) \ + | (unsigned long long)(i)))) + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) +#define PRINTF(fmt, ...) do { if (!getenv("STARPU_SSILENT")) {printf(fmt, ## __VA_ARGS__); }} while(0) + +#define BLAS3_FLOP(n1,n2,n3) \ + (2*((uint64_t)n1)*((uint64_t)n2)*((uint64_t)n3)) + +#ifdef CHECK_RESULTS +static void compare_A_LU(float *A, float *LU, unsigned size, unsigned ld) +{ + unsigned i,j; + float *L; + float *U; + + L = malloc(size*size*sizeof(float)); + U = malloc(size*size*sizeof(float)); + + memset(L, 0, size*size*sizeof(float)); + memset(U, 0, size*size*sizeof(float)); + + /* only keep the lower part */ + for (j = 0; j < size; j++) + { + for (i = 0; i < j; i++) + { + L[j+i*size] = LU[j+i*ld]; + } + + /* diag i = j */ + L[j+j*size] = LU[j+j*ld]; + U[j+j*size] = 1.0f; + + for (i = j+1; i < size; i++) + { + U[j+i*size] = LU[j+i*ld]; + } + } + + /* now A_err = L, compute L*U */ + STARPU_STRMM("R", "U", "N", "U", size, size, 1.0f, U, size, L, size); + + float max_err = 0.0f; + for (i = 0; i < size ; i++) + { + for (j = 0; j < size; j++) + { + max_err = STARPU_MAX(max_err, fabs(L[j+i*size] - A[j+i*ld])); + } + } + + FPRINTF(stdout, "max error between A and L*U = %f \n", max_err); +} +#endif /* CHECK_RESULTS */ + +void dw_cpu_codelet_update_getrf(void **, void *); +void dw_cpu_codelet_update_trsm_ll(void **, void *); +void dw_cpu_codelet_update_trsm_ru(void **, void *); +void dw_cpu_codelet_update_gemm(void **, void *); + +#ifdef STARPU_USE_CUDA +void dw_cublas_codelet_update_getrf(void *descr[], void *_args); +void dw_cublas_codelet_update_trsm_ll(void *descr[], void *_args); +void dw_cublas_codelet_update_trsm_ru(void *descr[], void *_args); +void dw_cublas_codelet_update_gemm(void *descr[], void *_args); +#endif + +void dw_callback_codelet_update_getrf(void *); +void dw_callback_codelet_update_trsm_ll_21(void *); +void dw_callback_codelet_update_gemm(void *); + +void dw_callback_v2_codelet_update_getrf(void *); +void dw_callback_v2_codelet_update_trsm_ll(void *); +void dw_callback_v2_codelet_update_trsm_ru(void *); +void dw_callback_v2_codelet_update_gemm(void *); + +extern struct starpu_perfmodel model_getrf; +extern struct starpu_perfmodel model_trsm_ll; +extern struct starpu_perfmodel model_trsm_ru; +extern struct starpu_perfmodel model_gemm; +extern unsigned bound; +extern unsigned bounddeps; +extern unsigned boundprio; + +extern starpu_data_handle_t scratch; +extern starpu_data_handle_t devInfo; +void lu_kernel_init(int nb); +void lu_kernel_fini(void); + +struct piv_s +{ + unsigned *piv; /* complete pivot array */ + unsigned first; /* first element */ + unsigned last; /* last element */ +}; + +int STARPU_LU(lu_decomposition)(TYPE *matA, unsigned size, unsigned ld, unsigned nblocks, unsigned no_prio); +int STARPU_LU(lu_decomposition_pivot_no_stride)(TYPE **matA, unsigned *ipiv, unsigned size, unsigned ld, unsigned nblocks, unsigned no_prio); +int STARPU_LU(lu_decomposition_pivot)(TYPE *matA, unsigned *ipiv, unsigned size, unsigned ld, unsigned nblocks, unsigned no_prio); + +#endif /* __XLU_H__ */ diff --git a/examples/lu/xlu_implicit.c b/examples/lu/xlu_implicit.c new file mode 100644 index 0000000..cb26ee3 --- /dev/null +++ b/examples/lu/xlu_implicit.c @@ -0,0 +1,225 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2010-2010 Mehdi Juhoor + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* LU StarPU implementation using implicit task dependencies. */ + +#include "xlu.h" +#include "xlu_kernels.h" +#include "starpu_cusolver.h" + +static int create_task_getrf(starpu_data_handle_t dataA, unsigned k, unsigned no_prio, int nblocks) +{ + int ret; + struct starpu_task *task = starpu_task_create(); + task->cl = &cl_getrf; + + /* which sub-data is manipulated ? */ + task->handles[0] = starpu_data_get_sub_data(dataA, 2, k, k); + +#if defined(STARPU_USE_CUDA) && defined(STARPU_HAVE_LIBCUSOLVER) + task->handles[1] = scratch; + task->handles[2] = devInfo; +#endif + + task->tag_id = TAG_GETRF(k); + task->color = 0xffff00; + + /* this is an important task */ + if (!no_prio) + task->priority = 3*nblocks - 3*k; /* Bottom-level-based prio */ + + ret = starpu_task_submit(task); + if (ret != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + return ret; +} + +static int create_task_trsm_ll(starpu_data_handle_t dataA, unsigned k, unsigned j, unsigned no_prio, int nblocks) +{ + int ret; + struct starpu_task *task = starpu_task_create(); + task->cl = &cl_trsm_ll; + + /* which sub-data is manipulated ? */ + task->handles[0] = starpu_data_get_sub_data(dataA, 2, k, k); + task->handles[1] = starpu_data_get_sub_data(dataA, 2, j, k); + + task->tag_id = TAG_TRSM_LL(k,j); + task->color = 0x8080ff; + + if (!no_prio) + task->priority = 3*nblocks - (2*k + j); /* Bottom-level-based prio */ + + ret = starpu_task_submit(task); + if (ret != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + return ret; +} + +static int create_task_trsm_ru(starpu_data_handle_t dataA, unsigned k, unsigned i, unsigned no_prio, int nblocks) +{ + int ret; + struct starpu_task *task = starpu_task_create(); + + task->cl = &cl_trsm_ru; + + /* which sub-data is manipulated ? */ + task->handles[0] = starpu_data_get_sub_data(dataA, 2, k, k); + task->handles[1] = starpu_data_get_sub_data(dataA, 2, k, i); + + task->tag_id = TAG_TRSM_RU(k,i); + task->color = 0x8080c0; + + if (!no_prio) + task->priority = 3*nblocks - (2*k + i); /* Bottom-level-based prio */ + + ret = starpu_task_submit(task); + if (ret != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + return ret; +} + +static int create_task_gemm(starpu_data_handle_t dataA, unsigned k, unsigned i, unsigned j, unsigned no_prio, int nblocks) +{ + int ret; + struct starpu_task *task = starpu_task_create(); + + task->cl = &cl_gemm; + task->color = 0x00ff00; + + /* which sub-data is manipulated ? */ + task->handles[0] = starpu_data_get_sub_data(dataA, 2, k, i); + task->handles[1] = starpu_data_get_sub_data(dataA, 2, j, k); + task->handles[2] = starpu_data_get_sub_data(dataA, 2, j, i); + + task->tag_id = TAG_GEMM(k,i,j); + + if (!no_prio) + task->priority = 3*nblocks - (k + i + j); /* Bottom-level-based prio */ + + ret = starpu_task_submit(task); + if (ret != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + return ret; +} + +/* + * code to bootstrap the factorization + */ + +static int dw_codelet_facto_v3(starpu_data_handle_t dataA, unsigned nblocks, unsigned no_prio) +{ + double start; + double end; + + /* create all the DAG nodes */ + unsigned i,j,k; + + if (bound) + starpu_bound_start(bounddeps, boundprio); + + start = starpu_timing_now(); + + for (k = 0; k < nblocks; k++) + { + int ret; + + starpu_iteration_push(k); + + ret = create_task_getrf(dataA, k, no_prio, nblocks); + if (ret == -ENODEV) return ret; + + for (i = k+1; icl = &cl_pivot; + task->color = 0xc0c000; + + /* which sub-data is manipulated ? */ + task->handles[0] = get_block(dataAp, nblocks, k, i); + + task->tag_id = PIVOT(k, i); + + task->cl_arg = &piv_description[k]; + + /* this is an important task */ + if (!no_prio) + task->priority = 3*nblocks - (2*k + i); /* Bottom-level-based prio */ + + ret = starpu_task_submit(task); + if (ret != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + return ret; +} + +static int create_task_getrf_pivot(starpu_data_handle_t *dataAp, unsigned nblocks, + unsigned k, struct piv_s *piv_description, + starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned), unsigned no_prio) +{ + int ret; + + struct starpu_task *task = starpu_task_create(); + + task->cl = &cl_getrf_pivot; + task->color = 0xffff00; + + task->cl_arg = &piv_description[k]; + + /* which sub-data is manipulated ? */ + task->handles[0] = get_block(dataAp, nblocks, k, k); + + task->tag_id = TAG_GETRF(k); + + /* this is an important task */ + if (!no_prio) + task->priority = 3*nblocks - 3*k; /* Bottom-level-based prio */ + + ret = starpu_task_submit(task); + if (ret != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + return ret; +} + +static int create_task_trsm_ll(starpu_data_handle_t *dataAp, unsigned nblocks, unsigned k, unsigned j, + starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned), unsigned no_prio) +{ + int ret; + struct starpu_task *task = starpu_task_create(); + + task->cl = &cl_trsm_ll; + task->color = 0x8080ff; + + /* which sub-data is manipulated ? */ + task->handles[0] = get_block(dataAp, nblocks, k, k); + task->handles[1] = get_block(dataAp, nblocks, j, k); + + task->tag_id = TAG_TRSM_LL(k,j); + + if (!no_prio) + task->priority = 3*nblocks - (2*k + j); /* Bottom-level-based prio */ + + ret = starpu_task_submit(task); + if (ret != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + return ret; +} + +static int create_task_trsm_ru(starpu_data_handle_t *dataAp, unsigned nblocks, unsigned k, unsigned i, + starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned), unsigned no_prio) +{ + int ret; + struct starpu_task *task = starpu_task_create(); + + task->cl = &cl_trsm_ru; + task->color = 0x8080c0; + + /* which sub-data is manipulated ? */ + task->handles[0] = get_block(dataAp, nblocks, k, k); + task->handles[1] = get_block(dataAp, nblocks, k, i); + + task->tag_id = TAG_TRSM_RU(k,i); + + if (!no_prio) + task->priority = 3*nblocks - (2*k + i); /* Bottom-level-based prio */ + + ret = starpu_task_submit(task); + if (ret != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + return ret; +} + +static int create_task_gemm(starpu_data_handle_t *dataAp, unsigned nblocks, unsigned k, unsigned i, unsigned j, + starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned), unsigned no_prio) +{ + int ret; + struct starpu_task *task = starpu_task_create(); + + task->cl = &cl_gemm; + task->color = 0x00ff00; + + /* which sub-data is manipulated ? */ + task->handles[0] = get_block(dataAp, nblocks, k, i); + task->handles[1] = get_block(dataAp, nblocks, j, k); + task->handles[2] = get_block(dataAp, nblocks, j, i); + + task->tag_id = TAG_GEMM(k,i,j); + + if (!no_prio) + task->priority = 3*nblocks - (k + i + j); /* Bottom-level-based prio */ + + ret = starpu_task_submit(task); + if (ret != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + return ret; +} + +/* + * code to bootstrap the factorization + */ + +static int dw_codelet_facto_pivot(starpu_data_handle_t *dataAp, + struct piv_s *piv_description, + unsigned nblocks, + starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned), + double *timing, unsigned no_prio) +{ + double start; + double end; + + /* create all the DAG nodes */ + unsigned i,j,k; + + if (bound) + starpu_bound_start(bounddeps, boundprio); + + start = starpu_timing_now(); + + for (k = 0; k < nblocks; k++) + { + int ret; + + starpu_iteration_push(k); + + ret = create_task_getrf_pivot(dataAp, nblocks, k, piv_description, get_block, no_prio); + if (ret == -ENODEV) return ret; + + for (i = 0; i < nblocks; i++) + { + if (i != k) + { + ret = create_task_pivot(dataAp, nblocks, piv_description, k, i, get_block, no_prio); + if (ret == -ENODEV) return ret; + } + } + + for (i = k+1; i +#include + +#ifdef STARPU_USE_CUDA +#include +#include "starpu_cusolver.h" +#endif + +#define str(s) #s +#define xstr(s) str(s) +#define STARPU_LU_STR(name) xstr(STARPU_LU(name)) + +#ifdef STARPU_USE_CUDA +static const TYPE p1 = 1.0f; +static const TYPE m1 = -1.0f; +#endif + +starpu_data_handle_t scratch = NULL; +starpu_data_handle_t devInfo = NULL; + +/* + * GEMM + */ + +static inline void STARPU_LU(common_gemm)(void *descr[], int s, void *_args) +{ + (void)_args; + TYPE *right = (TYPE *)STARPU_MATRIX_GET_PTR(descr[0]); + TYPE *left = (TYPE *)STARPU_MATRIX_GET_PTR(descr[1]); + TYPE *center = (TYPE *)STARPU_MATRIX_GET_PTR(descr[2]); + + unsigned dx = STARPU_MATRIX_GET_NX(descr[2]); + unsigned dy = STARPU_MATRIX_GET_NY(descr[2]); + unsigned dz = STARPU_MATRIX_GET_NY(descr[0]); + + unsigned ld12 = STARPU_MATRIX_GET_LD(descr[0]); + unsigned ld21 = STARPU_MATRIX_GET_LD(descr[1]); + unsigned ld22 = STARPU_MATRIX_GET_LD(descr[2]); + +#ifdef STARPU_USE_CUDA + cublasStatus_t status; +#endif + + switch (s) + { + case 0: + CPU_GEMM("N", "N", dy, dx, dz, + (TYPE)-1.0, right, ld21, left, ld12, + (TYPE)1.0, center, ld22); + break; + +#ifdef STARPU_USE_CUDA + case 1: + { + status = CUBLAS_GEMM(starpu_cublas_get_local_handle(), + CUBLAS_OP_N, CUBLAS_OP_N, dx, dy, dz, + (CUBLAS_TYPE *)&m1, (CUBLAS_TYPE *)right, ld21, (CUBLAS_TYPE *)left, ld12, + (CUBLAS_TYPE *)&p1, (CUBLAS_TYPE *)center, ld22); + + if (STARPU_UNLIKELY(status != CUBLAS_STATUS_SUCCESS)) + STARPU_CUBLAS_REPORT_ERROR(status); + + break; + } +#endif + default: + STARPU_ABORT(); + break; + } +} + +void STARPU_LU(cpu_gemm)(void *descr[], void *_args) +{ + STARPU_LU(common_gemm)(descr, 0, _args); +} + +#ifdef STARPU_USE_CUDA +void STARPU_LU(cublas_gemm)(void *descr[], void *_args) +{ + STARPU_LU(common_gemm)(descr, 1, _args); +} +#endif /* STARPU_USE_CUDA */ + +static struct starpu_perfmodel STARPU_LU(model_gemm) = +{ + .type = STARPU_HISTORY_BASED, +#ifdef STARPU_ATLAS + .symbol = STARPU_LU_STR(lu_model_gemm_atlas) +#elif defined(STARPU_GOTO) + .symbol = STARPU_LU_STR(lu_model_gemm_goto) +#elif defined(STARPU_OPENBLAS) + .symbol = STARPU_LU_STR(lu_model_gemm_openblas) +#else + .symbol = STARPU_LU_STR(lu_model_gemm) +#endif +}; + +#ifdef STARPU_USE_CUDA +static int can_execute(unsigned workerid, struct starpu_task *task, unsigned nimpl) +{ + (void)task; + (void)nimpl; + enum starpu_worker_archtype type = starpu_worker_get_type(workerid); + if (type == STARPU_CPU_WORKER) + return 1; + +#ifdef STARPU_SIMGRID + /* We don't know, let's assume it can */ + return 1; +#else + /* Cuda device */ + const struct cudaDeviceProp *props; + props = starpu_cuda_get_device_properties(workerid); + if (props->major >= 2 || props->minor >= 3) + { + /* At least compute capability 1.3, supports doubles */ + return 1; + } + else + { + /* Old card does not support doubles */ + return 0; + } +#endif +} +#endif + +#define STRINGIFY_(x) #x +#define STRINGIFY(x) STRINGIFY_(x) +struct starpu_codelet cl_gemm = +{ + .cpu_funcs = {STARPU_LU(cpu_gemm)}, + .cpu_funcs_name = {STRINGIFY(STARPU_LU(cpu_gemm))}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {STARPU_LU(cublas_gemm)}, + CAN_EXECUTE +#elif defined(STARPU_SIMGRID) + .cuda_funcs = {(void*)1}, +#endif + .cuda_flags = {STARPU_CUDA_ASYNC}, + .nbuffers = 3, + .modes = {STARPU_R, STARPU_R, STARPU_RW}, + .model = &STARPU_LU(model_gemm) +}; + +/* + * TRSM_LL + */ + +static inline void STARPU_LU(common_trsmll)(void *descr[], int s, void *_args) +{ + (void)_args; + TYPE *sub11; + TYPE *sub12; + + sub11 = (TYPE *)STARPU_MATRIX_GET_PTR(descr[0]); + sub12 = (TYPE *)STARPU_MATRIX_GET_PTR(descr[1]); + + unsigned ld11 = STARPU_MATRIX_GET_LD(descr[0]); + unsigned ld12 = STARPU_MATRIX_GET_LD(descr[1]); + + unsigned nx12 = STARPU_MATRIX_GET_NX(descr[1]); + unsigned ny12 = STARPU_MATRIX_GET_NY(descr[1]); + +#ifdef STARPU_USE_CUDA + cublasStatus_t status; +#endif + + /* solve L11 U12 = A12 (find U12) */ + switch (s) + { + case 0: + CPU_TRSM("L", "L", "N", "N", nx12, ny12, + (TYPE)1.0, sub11, ld11, sub12, ld12); + break; +#ifdef STARPU_USE_CUDA + case 1: + status = CUBLAS_TRSM(starpu_cublas_get_local_handle(), + CUBLAS_SIDE_LEFT, CUBLAS_FILL_MODE_LOWER, CUBLAS_OP_N, CUBLAS_DIAG_NON_UNIT, + ny12, nx12, + (CUBLAS_TYPE*)&p1, (CUBLAS_TYPE*)sub11, ld11, (CUBLAS_TYPE*)sub12, ld12); + + if (STARPU_UNLIKELY(status != CUBLAS_STATUS_SUCCESS)) + STARPU_CUBLAS_REPORT_ERROR(status); + + break; +#endif + default: + STARPU_ABORT(); + break; + } +} + +void STARPU_LU(cpu_trsmll)(void *descr[], void *_args) +{ + STARPU_LU(common_trsmll)(descr, 0, _args); +} + +#ifdef STARPU_USE_CUDA +void STARPU_LU(cublas_trsmll)(void *descr[], void *_args) +{ + STARPU_LU(common_trsmll)(descr, 1, _args); +} +#endif /* STARPU_USE_CUDA */ + +static struct starpu_perfmodel STARPU_LU(model_trsm_ll) = +{ + .type = STARPU_HISTORY_BASED, +#ifdef STARPU_ATLAS + .symbol = STARPU_LU_STR(lu_model_trsm_ll_atlas) +#elif defined(STARPU_GOTO) + .symbol = STARPU_LU_STR(lu_model_trsm_ll_goto) +#elif defined(STARPU_OPENBLAS) + .symbol = STARPU_LU_STR(lu_model_trsm_ll_openblas) +#else + .symbol = STARPU_LU_STR(lu_model_trsm_ll) +#endif +}; + +struct starpu_codelet cl_trsm_ll = +{ + .cpu_funcs = {STARPU_LU(cpu_trsmll)}, + .cpu_funcs_name = {STRINGIFY(STARPU_LU(cpu_trsmll))}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {STARPU_LU(cublas_trsmll)}, + CAN_EXECUTE +#elif defined(STARPU_SIMGRID) + .cuda_funcs = {(void*)1}, +#endif + .cuda_flags = {STARPU_CUDA_ASYNC}, + .nbuffers = 2, + .modes = {STARPU_R, STARPU_RW}, + .model = &STARPU_LU(model_trsm_ll) +}; + +/* + * TRSM_RU + */ + +static inline void STARPU_LU(common_trsmru)(void *descr[], int s, void *_args) +{ + (void)_args; + TYPE *sub11; + TYPE *sub21; + + sub11 = (TYPE *)STARPU_MATRIX_GET_PTR(descr[0]); + sub21 = (TYPE *)STARPU_MATRIX_GET_PTR(descr[1]); + + unsigned ld11 = STARPU_MATRIX_GET_LD(descr[0]); + unsigned ld21 = STARPU_MATRIX_GET_LD(descr[1]); + + unsigned nx21 = STARPU_MATRIX_GET_NX(descr[1]); + unsigned ny21 = STARPU_MATRIX_GET_NY(descr[1]); + +#ifdef STARPU_USE_CUDA + cublasStatus_t status; +#endif + + switch (s) + { + case 0: + CPU_TRSM("R", "U", "N", "U", nx21, ny21, + (TYPE)1.0, sub11, ld11, sub21, ld21); + break; +#ifdef STARPU_USE_CUDA + case 1: + status = CUBLAS_TRSM(starpu_cublas_get_local_handle(), + CUBLAS_SIDE_RIGHT, CUBLAS_FILL_MODE_UPPER, CUBLAS_OP_N, CUBLAS_DIAG_UNIT, + ny21, nx21, + (CUBLAS_TYPE*)&p1, (CUBLAS_TYPE*)sub11, ld11, (CUBLAS_TYPE*)sub21, ld21); + + if (status != CUBLAS_STATUS_SUCCESS) + STARPU_CUBLAS_REPORT_ERROR(status); + + break; +#endif + default: + STARPU_ABORT(); + break; + } +} + +void STARPU_LU(cpu_trsmru)(void *descr[], void *_args) +{ + STARPU_LU(common_trsmru)(descr, 0, _args); +} + +#ifdef STARPU_USE_CUDA +void STARPU_LU(cublas_trsmru)(void *descr[], void *_args) +{ + STARPU_LU(common_trsmru)(descr, 1, _args); +} +#endif + +static struct starpu_perfmodel STARPU_LU(model_trsm_ru) = +{ + .type = STARPU_HISTORY_BASED, +#ifdef STARPU_ATLAS + .symbol = STARPU_LU_STR(lu_model_trsm_ru_atlas) +#elif defined(STARPU_GOTO) + .symbol = STARPU_LU_STR(lu_model_trsm_ru_goto) +#elif defined(STARPU_OPENBLAS) + .symbol = STARPU_LU_STR(lu_model_trsm_ru_openblas) +#else + .symbol = STARPU_LU_STR(lu_model_trsm_ru) +#endif +}; + +struct starpu_codelet cl_trsm_ru = +{ + .cpu_funcs = {STARPU_LU(cpu_trsmru)}, + .cpu_funcs_name = {STRINGIFY(STARPU_LU(cpu_trsmru))}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {STARPU_LU(cublas_trsmru)}, + CAN_EXECUTE +#elif defined(STARPU_SIMGRID) + .cuda_funcs = {(void*)1}, +#endif + .cuda_flags = {STARPU_CUDA_ASYNC}, + .nbuffers = 2, + .modes = {STARPU_R, STARPU_RW}, + .model = &STARPU_LU(model_trsm_ru) +}; + +/* + * GETRF + */ + +static inline void STARPU_LU(common_getrf)(void *descr[], int s, void *_args) +{ + (void)_args; + TYPE *sub11; + + sub11 = (TYPE *)STARPU_MATRIX_GET_PTR(descr[0]); + + unsigned long nx = STARPU_MATRIX_GET_NX(descr[0]); + unsigned long ld = STARPU_MATRIX_GET_LD(descr[0]); + + unsigned long z; + +#ifdef STARPU_USE_CUDA + cublasStatus_t status; + cublasHandle_t handle; + cudaStream_t stream; +#endif + + switch (s) + { + case 0: + for (z = 0; z < nx; z++) + { + TYPE pivot; + pivot = sub11[z+z*ld]; + STARPU_ASSERT(!ISZERO(pivot)); + + CPU_SCAL(nx - z - 1, (1.0/pivot), &sub11[z+(z+1)*ld], ld); + + CPU_GER(nx - z - 1, nx - z - 1, -1.0, + &sub11[(z+1)+z*ld], 1, + &sub11[z+(z+1)*ld], ld, + &sub11[(z+1) + (z+1)*ld],ld); + } + break; +#ifdef STARPU_USE_CUDA + case 1: +#ifdef STARPU_HAVE_LIBCUSOLVER + { + cusolverStatus_t sstatus; + CUBLAS_TYPE *cublas_sub11 = (CUBLAS_TYPE *)sub11; + CUBLAS_TYPE *workspace = (CUBLAS_TYPE *)STARPU_VARIABLE_GET_PTR(descr[1]); + int *d_info = (int *)STARPU_VARIABLE_GET_PTR(descr[2]); + + sstatus = CUSOLVER_GETRF(starpu_cusolverDn_get_local_handle(), nx, nx, cublas_sub11, ld, workspace, NULL, d_info); + if (sstatus != CUSOLVER_STATUS_SUCCESS) + STARPU_CUSOLVER_REPORT_ERROR(sstatus); + } +#else + handle = starpu_cublas_get_local_handle(); + stream = starpu_cuda_get_local_stream(); + for (z = 0; z < nx; z++) + { + TYPE pivot; + TYPE inv_pivot; + cudaMemcpyAsync(&pivot, &sub11[z+z*ld], sizeof(TYPE), cudaMemcpyDeviceToHost, stream); + cudaStreamSynchronize(stream); + STARPU_ASSERT(!ISZERO(pivot)); + + inv_pivot = 1.0/pivot; + status = CUBLAS_SCAL(handle, + nx - z - 1, + (CUBLAS_TYPE*)&inv_pivot, (CUBLAS_TYPE*)&sub11[z+(z+1)*ld], ld); + if (status != CUBLAS_STATUS_SUCCESS) + STARPU_CUBLAS_REPORT_ERROR(status); + + status = CUBLAS_GER(handle, + nx - z - 1, nx - z - 1, + (CUBLAS_TYPE*)&m1, + (CUBLAS_TYPE*)&sub11[(z+1)+z*ld], 1, + (CUBLAS_TYPE*)&sub11[z+(z+1)*ld], ld, + (CUBLAS_TYPE*)&sub11[(z+1) + (z+1)*ld],ld); + if (status != CUBLAS_STATUS_SUCCESS) + STARPU_CUBLAS_REPORT_ERROR(status); + } + + cudaStreamSynchronize(stream); +#endif + break; +#endif + default: + STARPU_ABORT(); + break; + } +} + +void STARPU_LU(cpu_getrf)(void *descr[], void *_args) +{ + STARPU_LU(common_getrf)(descr, 0, _args); +} + +#ifdef STARPU_USE_CUDA +void STARPU_LU(cublas_getrf)(void *descr[], void *_args) +{ + STARPU_LU(common_getrf)(descr, 1, _args); +} +#endif /* STARPU_USE_CUDA */ + +static struct starpu_perfmodel STARPU_LU(model_getrf) = +{ + .type = STARPU_HISTORY_BASED, +#ifdef STARPU_ATLAS + .symbol = STARPU_LU_STR(lu_model_getrf_atlas) +#elif defined(STARPU_GOTO) + .symbol = STARPU_LU_STR(lu_model_getrf_goto) +#elif defined(STARPU_OPENBLAS) + .symbol = STARPU_LU_STR(lu_model_getrf_openblas) +#else + .symbol = STARPU_LU_STR(lu_model_getrf) +#endif +}; + +struct starpu_codelet cl_getrf = +{ + .cpu_funcs = {STARPU_LU(cpu_getrf)}, + .cpu_funcs_name = {STRINGIFY(STARPU_LU(cpu_getrf))}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {STARPU_LU(cublas_getrf)}, + CAN_EXECUTE +#if defined(STARPU_HAVE_LIBCUSOLVER) + .cuda_flags = {STARPU_CUDA_ASYNC}, +#endif +#elif defined(STARPU_SIMGRID) + .cuda_funcs = {(void*)1}, +#endif +#if defined(STARPU_USE_CUDA) && defined(STARPU_HAVE_LIBCUSOLVER) + .nbuffers = 3, +#else + .nbuffers = 1, +#endif + .modes = { STARPU_RW +#if defined(STARPU_USE_CUDA) && defined(STARPU_HAVE_LIBCUSOLVER) + , STARPU_SCRATCH | STARPU_NOFOOTPRINT + , STARPU_SCRATCH | STARPU_NOFOOTPRINT +#endif + }, + .model = &STARPU_LU(model_getrf) +}; + +/* + * GETRF with pivoting + */ + +static inline void STARPU_LU(common_getrf_pivot)(void *descr[], + int s, void *_args) +{ + TYPE *sub11; + + sub11 = (TYPE *)STARPU_MATRIX_GET_PTR(descr[0]); + + unsigned long nx = STARPU_MATRIX_GET_NX(descr[0]); + unsigned long ld = STARPU_MATRIX_GET_LD(descr[0]); + + unsigned long z; + + struct piv_s *piv = _args; + unsigned *ipiv = piv->piv; + unsigned first = piv->first; + +#ifdef STARPU_USE_CUDA + cublasStatus_t status; + cublasHandle_t handle; + cudaStream_t stream; +#endif + + switch (s) + { + case 0: + for (z = 0; z < nx; z++) + { + TYPE pivot; + pivot = sub11[z+z*ld]; + + if (fabs((double)(pivot)) < PIVOT_THRESHHOLD) + { + + /* find the pivot */ + int piv_ind = CPU_IAMAX(nx - z, &sub11[z*(ld+1)], ld); + + ipiv[z + first] = piv_ind + z + first; + + /* swap if needed */ + if (piv_ind != 0) + { + CPU_SWAP(nx, &sub11[z*ld], 1, &sub11[(z+piv_ind)*ld], 1); + } + + pivot = sub11[z+z*ld]; + } + + STARPU_ASSERT(!ISZERO(pivot)); + + CPU_SCAL(nx - z - 1, (1.0/pivot), &sub11[z+(z+1)*ld], ld); + + CPU_GER(nx - z - 1, nx - z - 1, -1.0, + &sub11[(z+1)+z*ld], 1, + &sub11[z+(z+1)*ld], ld, + &sub11[(z+1) + (z+1)*ld],ld); + } + + break; +#ifdef STARPU_USE_CUDA + case 1: + handle = starpu_cublas_get_local_handle(); + stream = starpu_cuda_get_local_stream(); + for (z = 0; z < nx; z++) + { + TYPE pivot; + TYPE inv_pivot; + cudaMemcpyAsync(&pivot, &sub11[z+z*ld], sizeof(TYPE), cudaMemcpyDeviceToHost, stream); + cudaStreamSynchronize(stream); + + if (fabs((double)(pivot)) < PIVOT_THRESHHOLD) + { + /* find the pivot */ + int piv_ind; + status = CUBLAS_IAMAX(handle, + nx - z, (CUBLAS_TYPE*)&sub11[z*(ld+1)], ld, &piv_ind); + piv_ind -= 1; + if (status != CUBLAS_STATUS_SUCCESS) + STARPU_CUBLAS_REPORT_ERROR(status); + + ipiv[z + first] = piv_ind + z + first; + + /* swap if needed */ + if (piv_ind != 0) + { + status = CUBLAS_SWAP(handle, + nx, + (CUBLAS_TYPE*)&sub11[z*ld], 1, + (CUBLAS_TYPE*)&sub11[(z+piv_ind)*ld], 1); + if (status != CUBLAS_STATUS_SUCCESS) + STARPU_CUBLAS_REPORT_ERROR(status); + } + + cudaMemcpyAsync(&pivot, &sub11[z+z*ld], sizeof(TYPE), cudaMemcpyDeviceToHost, stream); + cudaStreamSynchronize(stream); + } + + STARPU_ASSERT(!ISZERO(pivot)); + + inv_pivot = 1.0/pivot; + status = CUBLAS_SCAL(handle, + nx - z - 1, + (CUBLAS_TYPE*)&inv_pivot, + (CUBLAS_TYPE*)&sub11[z+(z+1)*ld], ld); + if (status != CUBLAS_STATUS_SUCCESS) + STARPU_CUBLAS_REPORT_ERROR(status); + + status = CUBLAS_GER(handle, + nx - z - 1, nx - z - 1, + (CUBLAS_TYPE*)&m1, + (CUBLAS_TYPE*)&sub11[(z+1)+z*ld], 1, + (CUBLAS_TYPE*)&sub11[z+(z+1)*ld], ld, + (CUBLAS_TYPE*)&sub11[(z+1) + (z+1)*ld],ld); + if (status != CUBLAS_STATUS_SUCCESS) + STARPU_CUBLAS_REPORT_ERROR(status); + } + + cudaStreamSynchronize(stream); + + break; +#endif + default: + STARPU_ABORT(); + break; + } +} + +void STARPU_LU(cpu_getrf_pivot)(void *descr[], void *_args) +{ + STARPU_LU(common_getrf_pivot)(descr, 0, _args); +} + +#ifdef STARPU_USE_CUDA +void STARPU_LU(cublas_getrf_pivot)(void *descr[], void *_args) +{ + STARPU_LU(common_getrf_pivot)(descr, 1, _args); +} +#endif /* STARPU_USE_CUDA */ + +static struct starpu_perfmodel STARPU_LU(model_getrf_pivot) = +{ + .type = STARPU_HISTORY_BASED, +#ifdef STARPU_ATLAS + .symbol = STARPU_LU_STR(lu_model_getrf_pivot_atlas) +#elif defined(STARPU_GOTO) + .symbol = STARPU_LU_STR(lu_model_getrf_pivot_goto) +#elif defined(STARPU_OPENBLAS) + .symbol = STARPU_LU_STR(lu_model_getrf_pivot_openblas) +#else + .symbol = STARPU_LU_STR(lu_model_getrf_pivot) +#endif +}; + +struct starpu_codelet cl_getrf_pivot = +{ + .cpu_funcs = {STARPU_LU(cpu_getrf_pivot)}, + // It uses shared-memory cl_arg + //.cpu_funcs_name = {STRINGIFY(STARPU_LU(cpu_getrf_pivot))}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {STARPU_LU(cublas_getrf_pivot)}, + CAN_EXECUTE +#elif defined(STARPU_SIMGRID) + .cuda_funcs = {(void*)1}, +#endif + .nbuffers = 1, + .modes = { STARPU_RW }, + .model = &STARPU_LU(model_getrf_pivot) +}; + +/* + * Pivoting + */ + +static inline void STARPU_LU(common_pivot)(void *descr[], + int s, void *_args) +{ + TYPE *matrix; + + matrix = (TYPE *)STARPU_MATRIX_GET_PTR(descr[0]); + unsigned long nx = STARPU_MATRIX_GET_NX(descr[0]); + unsigned long ld = STARPU_MATRIX_GET_LD(descr[0]); + + unsigned row; + + struct piv_s *piv = _args; + unsigned *ipiv = piv->piv; + unsigned first = piv->first; + +#ifdef STARPU_USE_CUDA + cublasStatus_t status; + cublasHandle_t handle; +#endif + + switch (s) + { + case 0: + for (row = 0; row < nx; row++) + { + unsigned rowpiv = ipiv[row+first] - first; + if (rowpiv != row) + { + CPU_SWAP(nx, &matrix[row*ld], 1, &matrix[rowpiv*ld], 1); + } + } + break; +#ifdef STARPU_USE_CUDA + case 1: + handle = starpu_cublas_get_local_handle(); + for (row = 0; row < nx; row++) + { + unsigned rowpiv = ipiv[row+first] - first; + if (rowpiv != row) + { + status = CUBLAS_SWAP(handle, + nx, + (CUBLAS_TYPE*)&matrix[row*ld], 1, + (CUBLAS_TYPE*)&matrix[rowpiv*ld], 1); + if (status != CUBLAS_STATUS_SUCCESS) + STARPU_CUBLAS_REPORT_ERROR(status); + } + } + + break; +#endif + default: + STARPU_ABORT(); + break; + } +} + +void STARPU_LU(cpu_pivot)(void *descr[], void *_args) +{ + STARPU_LU(common_pivot)(descr, 0, _args); +} + +#ifdef STARPU_USE_CUDA +void STARPU_LU(cublas_pivot)(void *descr[], void *_args) +{ + STARPU_LU(common_pivot)(descr, 1, _args); +} + +#endif /* STARPU_USE_CUDA */ + +static struct starpu_perfmodel STARPU_LU(model_pivot) = +{ + .type = STARPU_HISTORY_BASED, +#ifdef STARPU_ATLAS + .symbol = STARPU_LU_STR(lu_model_pivot_atlas) +#elif defined(STARPU_GOTO) + .symbol = STARPU_LU_STR(lu_model_pivot_goto) +#elif defined(STARPU_OPENBLAS) + .symbol = STARPU_LU_STR(lu_model_pivot_openblas) +#else + .symbol = STARPU_LU_STR(lu_model_pivot) +#endif +}; + +struct starpu_codelet cl_pivot = +{ + .cpu_funcs = {STARPU_LU(cpu_pivot)}, + // It uses shared-memory cl_arg + //.cpu_funcs_name = {STRINGIFY(STARPU_LU(cpu_pivot))}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {STARPU_LU(cublas_pivot)}, + CAN_EXECUTE +#elif defined(STARPU_SIMGRID) + .cuda_funcs = {(void*)1}, +#endif + .cuda_flags = {STARPU_CUDA_ASYNC}, + .nbuffers = 1, + .modes = {STARPU_RW}, + .model = &STARPU_LU(model_pivot) +}; + +void lu_kernel_init(int nb) +{ +#if defined(STARPU_USE_CUDA) && defined(STARPU_HAVE_LIBCUSOLVER) + int Lwork = 0; + if (starpu_cuda_worker_get_count()) + { + cusolverStatus_t sstatus = CUSOLVER_GETRF_BUFFERSIZE(starpu_cusolverDn_get_local_handle(), nb, nb, NULL, nb, &Lwork); + if (sstatus != CUSOLVER_STATUS_SUCCESS) + STARPU_CUSOLVER_REPORT_ERROR(sstatus); + } + starpu_variable_data_register(&scratch, -1, 0, Lwork * sizeof(TYPE)); + starpu_variable_data_register(&devInfo, -1, 0, sizeof(int)); +#endif +} + +void lu_kernel_fini(void) +{ +#if defined(STARPU_USE_CUDA) && defined(STARPU_HAVE_LIBCUSOLVER) + starpu_data_unregister(scratch); + starpu_data_unregister(devInfo); +#endif +} diff --git a/examples/lu/xlu_kernels.h b/examples/lu/xlu_kernels.h new file mode 100644 index 0000000..578972c --- /dev/null +++ b/examples/lu/xlu_kernels.h @@ -0,0 +1,45 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __XLU_KERNELS_H__ +#define __XLU_KERNELS_H__ + +#include + +void STARPU_LU(cpu_pivot)(void *descr[], void *_args); +void STARPU_LU(cpu_getrf_pivot)(void *descr[], void *_args); +void STARPU_LU(cpu_getrf)(void *descr[], void *_args); +void STARPU_LU(cpu_trsmll)(void *descr[], void *_args); +void STARPU_LU(cpu_trsmru)(void *descr[], void *_args); +void STARPU_LU(cpu_gemm)(void *descr[], void *_args); + +#ifdef STARPU_USE_CUDA +void STARPU_LU(cublas_pivot)(void *descr[], void *_args); +void STARPU_LU(cublas_getrf_pivot)(void *descr[], void *_args); +void STARPU_LU(cublas_getrf)(void *descr[], void *_args); +void STARPU_LU(cublas_trsmll)(void *descr[], void *_args); +void STARPU_LU(cublas_trsmru)(void *descr[], void *_args); +void STARPU_LU(cublas_gemm)(void *descr[], void *_args); +#endif + +extern struct starpu_codelet cl_getrf; +extern struct starpu_codelet cl_getrf_pivot; +extern struct starpu_codelet cl_trsm_ll; +extern struct starpu_codelet cl_trsm_ru; +extern struct starpu_codelet cl_gemm; +extern struct starpu_codelet cl_pivot; + +#endif /* __XLU_KERNELS_H__ */ diff --git a/examples/lu/xlu_pivot.c b/examples/lu/xlu_pivot.c new file mode 100644 index 0000000..c6a228e --- /dev/null +++ b/examples/lu/xlu_pivot.c @@ -0,0 +1,468 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* LU Kernels with partial pivoting */ + +#include "xlu.h" +#include "xlu_kernels.h" + +/* + * Construct the DAG + */ + +static struct starpu_task *create_task(starpu_tag_t id) +{ + struct starpu_task *task = starpu_task_create(); + task->cl_arg = NULL; + + task->use_tag = 1; + task->tag_id = id; + + return task; +} + +static int create_task_pivot(starpu_data_handle_t *dataAp, unsigned nblocks, + struct piv_s *piv_description, + unsigned k, unsigned i, + starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned), unsigned no_prio) +{ + int ret; + + struct starpu_task *task = create_task(PIVOT(k, i)); + + task->cl = &cl_pivot; + task->color = 0xc0c000; + + /* which sub-data is manipulated ? */ + task->handles[0] = get_block(dataAp, nblocks, k, i); + + task->cl_arg = &piv_description[k]; + + /* this is an important task */ + if (!no_prio) + task->priority = 3*nblocks - (2*k + i); /* Bottom-level-based prio */ + + /* enforce dependencies ... */ + if (k == 0) + { + starpu_tag_declare_deps(PIVOT(k, i), 1, TAG_GETRF(k)); + } + else + { + if (i > k) + { + starpu_tag_declare_deps(PIVOT(k, i), 2, TAG_GETRF(k), TAG_GEMM(k-1, i, k)); + } + else + { + starpu_tag_t *tags = malloc((nblocks - k)*sizeof(starpu_tag_t)); + + tags[0] = TAG_GETRF(k); + unsigned ind, ind2; + for (ind = k + 1, ind2 = 0; ind < nblocks; ind++, ind2++) + { + tags[1 + ind2] = TAG_GEMM(k-1, ind, k); + } + + /* perhaps we could do better ... :/ */ + starpu_tag_declare_deps_array(PIVOT(k, i), (nblocks-k), tags); + free(tags); + } + } + + ret = starpu_task_submit(task); + if (ret != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + return ret; +} + +static struct starpu_task *create_task_getrf_pivot(starpu_data_handle_t *dataAp, unsigned nblocks, + unsigned k, struct piv_s *piv_description, + starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned), unsigned no_prio) +{ + struct starpu_task *task = create_task(TAG_GETRF(k)); + + task->cl = &cl_getrf_pivot; + task->color = 0xffff00; + + task->cl_arg = &piv_description[k]; + + /* which sub-data is manipulated ? */ + task->handles[0] = get_block(dataAp, nblocks, k, k); + + /* this is an important task */ + if (!no_prio) + task->priority = 3*nblocks - 3*k; /* Bottom-level-based prio */ + + /* enforce dependencies ... */ + if (k > 0) + { + starpu_tag_declare_deps(TAG_GETRF(k), 1, TAG_GEMM(k-1, k, k)); + } + + return task; +} + +static int create_task_trsm_ll(starpu_data_handle_t *dataAp, unsigned nblocks, unsigned k, unsigned j, + starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned), unsigned no_prio) +{ + int ret; + + /* printf("task trsm_ll k,i = %d,%d TAG = %llx\n", k,i, TAG_TRSM_LL(k,i)); */ + + struct starpu_task *task = create_task(TAG_TRSM_LL(k, j)); + + task->cl = &cl_trsm_ll; + task->color = 0x8080ff; + + /* which sub-data is manipulated ? */ + task->handles[0] = get_block(dataAp, nblocks, k, k); + task->handles[1] = get_block(dataAp, nblocks, j, k); + + if (!no_prio) + task->priority = 3*nblocks - (2*k + j); /* Bottom-level-based prio */ + + /* enforce dependencies ... */ +#if 0 + starpu_tag_declare_deps(TAG_TRSM_LL(k, i), 1, PIVOT(k, i)); +#endif + if (k > 0) + { + starpu_tag_declare_deps(TAG_TRSM_LL(k, j), 2, TAG_GETRF(k), TAG_GEMM(k-1, k, j)); + } + else + { + starpu_tag_declare_deps(TAG_TRSM_LL(k, j), 1, TAG_GETRF(k)); + } + + ret = starpu_task_submit(task); + if (ret != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + return ret; +} + +static int create_task_trsm_ru(starpu_data_handle_t *dataAp, unsigned nblocks, unsigned k, unsigned i, + starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned), unsigned no_prio) +{ + int ret; + + struct starpu_task *task = create_task(TAG_TRSM_RU(k, i)); + + task->cl = &cl_trsm_ru; + task->color = 0x8080c0; + + /* which sub-data is manipulated ? */ + task->handles[0] = get_block(dataAp, nblocks, k, k); + task->handles[1] = get_block(dataAp, nblocks, k, i); + + if (!no_prio) + task->priority = 3*nblocks - (2*k + i); /* Bottom-level-based prio */ + + /* enforce dependencies ... */ + starpu_tag_declare_deps(TAG_TRSM_RU(k, i), 1, PIVOT(k, i)); + + ret = starpu_task_submit(task); + if (ret != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + return ret; +} + +static int create_task_gemm(starpu_data_handle_t *dataAp, unsigned nblocks, unsigned k, unsigned i, unsigned j, + starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned), unsigned no_prio) +{ + int ret; + +/* printf("task gemm k,i,j = %d,%d,%d TAG = %llx\n", k,i,j, TAG_GEMM(k,i,j)); */ + + struct starpu_task *task = create_task(TAG_GEMM(k, i, j)); + + task->cl = &cl_gemm; + task->color = 0x00ff00; + + /* which sub-data is manipulated ? */ + task->handles[0] = get_block(dataAp, nblocks, k, i); /* produced by TAG_TRSM_RU(k, i) */ + task->handles[1] = get_block(dataAp, nblocks, j, k); /* produced by TAG_TRSM_LL(k, j) */ + task->handles[2] = get_block(dataAp, nblocks, j, i); /* produced by TAG_GEMM(k-1, i, j) */ + + if (!no_prio) + task->priority = 3*nblocks - (k + i + j); /* Bottom-level-based prio */ + + /* enforce dependencies ... */ + if (k > 0) + { + starpu_tag_declare_deps(TAG_GEMM(k, i, j), 3, TAG_GEMM(k-1, i, j), TAG_TRSM_LL(k, j), TAG_TRSM_RU(k, i)); + } + else + { + starpu_tag_declare_deps(TAG_GEMM(k, i, j), 2, TAG_TRSM_LL(k, j), TAG_TRSM_RU(k, i)); + } + + ret = starpu_task_submit(task); + if (ret != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + return ret; +} + +/* + * code to bootstrap the factorization + */ + +static int dw_codelet_facto_pivot(starpu_data_handle_t *dataAp, + struct piv_s *piv_description, + unsigned nblocks, + starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned), + double *timing, unsigned no_prio) +{ + int ret; + + double start; + double end; + + struct starpu_task *entry_task = NULL; + + /* create all the DAG nodes */ + unsigned i,j,k; + + if (bound) + starpu_bound_start(bounddeps, boundprio); + + for (k = 0; k < nblocks; k++) + { + starpu_iteration_push(k); + struct starpu_task *task = create_task_getrf_pivot(dataAp, nblocks, k, piv_description, get_block, no_prio); + + /* we defer the launch of the first task */ + if (k == 0) + { + entry_task = task; + } + else + { + ret = starpu_task_submit(task); + if (ret != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + if (ret == -ENODEV) return ret; + } + + for (i = 0; i < nblocks; i++) + { + if (i != k) + { + ret = create_task_pivot(dataAp, nblocks, piv_description, k, i, get_block, no_prio); + if (ret == -ENODEV) return ret; + } + } + + for (i = k+1; i +#include +#include +#ifdef STARPU_HAVE_X11 +#include +#include +int use_x11_p = 1; +#endif + +#ifdef STARPU_HAVE_HELGRIND_H +#include +#endif +#ifndef ANNOTATE_HAPPENS_BEFORE +#define ANNOTATE_HAPPENS_BEFORE(obj) ((void)0) +#endif +#ifndef ANNOTATE_HAPPENS_AFTER +#define ANNOTATE_HAPPENS_AFTER(obj) ((void)0) +#endif + +int demo_p = 0; +static double demozoom_p = 0.05; + +/* NB: The X11 code is inspired from the http://locklessinc.com/articles/mandelbrot/ article */ + +static int nblocks_p = 20; +static int height_p = 400; +static int width_p = 640; +static int maxIt_p = 20000; /* max number of iteration in the Mandelbrot function */ +static int niter_p = -1; /* number of loops in case we don't use X11, -1 means infinite */ +static int use_spmd_p = 0; + +static double leftX_p = -0.745; +static double rightX_p = -0.74375; +static double topY_p = .15; +static double bottomY_p = .14875; + +/* + * X11 window management + */ + +#ifdef STARPU_HAVE_X11 +/* X11 data */ +static Display *dpy_p; +static Window win_p; +static XImage *bitmap_p; +static GC gc_p; +static KeySym Left_p=-1, Right_p, Down_p, Up_p, Alt_p; + +static void exit_x11(void) +{ + XDestroyImage(bitmap_p); + XDestroyWindow(dpy_p, win_p); + XCloseDisplay(dpy_p); +} + +static void init_x11(int width, int height, unsigned *buffer) +{ + /* Attempt to open the display */ + dpy_p = XOpenDisplay(NULL); + + /* Failure */ + if (!dpy_p) + exit(0); + + unsigned long white = WhitePixel(dpy_p, DefaultScreen(dpy_p)); + unsigned long black = BlackPixel(dpy_p, DefaultScreen(dpy_p)); + + win_p = XCreateSimpleWindow(dpy_p, DefaultRootWindow(dpy_p), 0, 0, + width, height, 0, black, white); + + /* We want to be notified when the window appears */ + XSelectInput(dpy_p, win_p, StructureNotifyMask); + + /* Make it appear */ + XMapWindow(dpy_p, win_p); + + XTextProperty tp; + char name[128] = "Mandelbrot - StarPU"; + char *n = name; + Status st = XStringListToTextProperty(&n, 1, &tp); + if (st) + XSetWMName(dpy_p, win_p, &tp); + + /* Wait for the MapNotify event */ + XFlush(dpy_p); + + int depth = DefaultDepth(dpy_p, DefaultScreen(dpy_p)); + Visual *visual = DefaultVisual(dpy_p, DefaultScreen(dpy_p)); + + /* Make bitmap */ + bitmap_p = XCreateImage(dpy_p, visual, depth, + ZPixmap, 0, (char *)buffer, + width, height, 32, 0); + + /* Init GC */ + gc_p = XCreateGC(dpy_p, win_p, 0, NULL); + XSetForeground(dpy_p, gc_p, black); + + XSelectInput(dpy_p, win_p, ExposureMask | KeyPressMask | StructureNotifyMask); + + Atom wmDeleteMessage; + wmDeleteMessage = XInternAtom(dpy_p, "WM_DELETE_WINDOW", False); + XSetWMProtocols(dpy_p, win_p, &wmDeleteMessage, 1); + + Left_p = XStringToKeysym ("Left"); + Right_p = XStringToKeysym ("Right"); + Up_p = XStringToKeysym ("Up"); + Down_p = XStringToKeysym ("Down"); + Alt_p = XStringToKeysym ("Alt"); +} + +static int handle_events(void) +{ + XEvent event; + + XNextEvent(dpy_p, &event); + if (event.type == KeyPress) + { + KeySym key; + char text[255]; + + XLookupString(&event.xkey,text,255,&key,0); + if (key == Left_p) + { + double widthX = rightX_p - leftX_p; + leftX_p -= 0.25*widthX; + rightX_p -= 0.25*widthX; + } + else if (key == Right_p) + { + double widthX = rightX_p - leftX_p; + leftX_p += 0.25*widthX; + rightX_p += 0.25*widthX; + } + else if (key == Up_p) + { + double heightY = topY_p - bottomY_p; + topY_p += 0.25*heightY; + bottomY_p += 0.25*heightY; + } + else if (key == Down_p) + { + double heightY = topY_p - bottomY_p; + topY_p -= 0.25*heightY; + bottomY_p -= 0.25*heightY; + } + else + { + double widthX = rightX_p - leftX_p; + double heightY = topY_p - bottomY_p; + + if (text[0] == '-') + { + /* Zoom out */ + leftX_p -= 0.125*widthX; + rightX_p += 0.125*widthX; + topY_p += 0.125*heightY; + bottomY_p -= 0.125*heightY; + } + else if (text[0] == '+') + { + /* Zoom in */ + leftX_p += 0.125*widthX; + rightX_p -= 0.125*widthX; + topY_p -= 0.125*heightY; + bottomY_p += 0.125*heightY; + } + } + + if (text[0]=='q') + { + return -1; + } + } + + if (event.type==ButtonPress) + { + /* tell where the mouse Button was Pressed */ + printf("You pressed a button at (%i,%i)\n", + event.xbutton.x,event.xbutton.y); + } + + return 0; +} +#endif + +/* + * OpenCL kernel + */ + +#ifdef STARPU_USE_OPENCL +char *mandelbrot_opencl_src = "\ +#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n \ +#define MIN(a,b) (((a)<(b))? (a) : (b)) \n \ +__kernel void mandelbrot_kernel(__global unsigned* a, \n \ + double leftX, double topY, \n \ + double stepX, double stepY, \n \ + int maxIt, int iby, int block_size, int width) \n \ +{ \n \ + size_t id_x = get_global_id(0); \n \ + size_t id_y = get_global_id(1); \n \ + if ((id_x < width) && (id_y < block_size)) \n \ + { \n \ + double xc = leftX + id_x * stepX; \n \ + double yc = topY - (id_y + iby*block_size) * stepY; \n \ + int it; \n \ + double x,y; \n \ + x = y = (double)0.0; \n \ + for (it=0;it 4.0) break; \n \ + double twoxy = (double)2.0*x*y; \n \ + x = x2 - y2 + xc; \n \ + y = twoxy + yc; \n \ + } \n \ + unsigned int v = MIN((1024*((float)(it)/(2000))), 256); \n \ + a[id_x + width * id_y] = (v<<16|(255-v)<<8); \n \ + } \n \ +}"; + +static struct starpu_opencl_program opencl_programs; + +static void compute_block_opencl(void *descr[], void *cl_arg) +{ + int iby, block_size; + double stepX, stepY; + int *pcnt; /* unused for CUDA tasks */ + starpu_codelet_unpack_args(cl_arg, &iby, &block_size, &stepX, &stepY, &pcnt); + + cl_mem data = (cl_mem)STARPU_VECTOR_GET_DEV_HANDLE(descr[0]); + + cl_kernel kernel; + cl_command_queue queue; + cl_int err; + + int id = starpu_worker_get_id_check(); + int devid = starpu_worker_get_devid(id); + + err = starpu_opencl_load_kernel(&kernel, &queue, &opencl_programs, "mandelbrot_kernel", devid); + if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); + + clSetKernelArg(kernel, 0, sizeof(data), &data); + clSetKernelArg(kernel, 1, sizeof(leftX_p), &leftX_p); + clSetKernelArg(kernel, 2, sizeof(topY_p), &topY_p); + clSetKernelArg(kernel, 3, sizeof(stepX), &stepX); + clSetKernelArg(kernel, 4, sizeof(stepY), &stepY); + clSetKernelArg(kernel, 5, sizeof(maxIt_p), &maxIt_p); + clSetKernelArg(kernel, 6, sizeof(iby), &iby); + clSetKernelArg(kernel, 7, sizeof(block_size), &block_size); + clSetKernelArg(kernel, 8, sizeof(width_p), &width_p); + + unsigned dim = 16; + size_t local[2] = {dim, 1}; + size_t global[2] = {width_p, block_size}; + err = clEnqueueNDRangeKernel(queue, kernel, 2, NULL, global, local, 0, NULL, NULL); + if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); + starpu_opencl_release_kernel(kernel); +} +#endif + +/* + * CPU kernel + */ + +static void compute_block(void *descr[], void *cl_arg) +{ + int iby, block_size; + double stepX, stepY; + int *pcnt; /* unused for sequential tasks */ + + starpu_codelet_unpack_args(cl_arg, &iby, &block_size, &stepX, &stepY, &pcnt); + + unsigned *data = (unsigned *)STARPU_VECTOR_GET_PTR(descr[0]); + + int local_iy; + for (local_iy = 0; local_iy < block_size; local_iy++) + { + int ix, iy; + + iy = iby*block_size + local_iy; + for (ix = 0; ix < width_p; ix++) + { + double cx = leftX_p + ix * stepX; + double cy = topY_p - iy * stepY; + /* Z = X+I*Y */ + double x = 0; + double y = 0; + int it; + for (it = 0; it < maxIt_p; it++) + { + double x2 = x*x; + double y2 = y*y; + + /* Stop iterations when |Z| > 2 */ + if (x2 + y2 > 4.0) + break; + + double twoxy = 2.0*x*y; + + /* Z = Z^2 + C */ + x = x2 - y2 + cx; + y = twoxy + cy; + } + + unsigned int v = STARPU_MIN((1024*((float)(it)/(2000))), 256); + data[ix + local_iy*width_p] = (v<<16|(255-v)<<8); + } + } +} + +static void compute_block_spmd(void *descr[], void *cl_arg) +{ + + int iby, block_size; + double stepX, stepY; + int *pcnt; + starpu_codelet_unpack_args(cl_arg, &iby, &block_size, &stepX, &stepY, &pcnt); + + unsigned *data = (unsigned *)STARPU_VECTOR_GET_PTR(descr[0]); + + while (1) + { + int ix, iy; /* global coordinates */ + int local_iy; /* current line */ + + local_iy = STARPU_ATOMIC_ADD((unsigned int *)pcnt, 1) - 1; + ANNOTATE_HAPPENS_BEFORE(pcnt); + if (local_iy >= block_size) + { + ANNOTATE_HAPPENS_AFTER(pcnt); + break; + } + + iy = iby*block_size + local_iy; + + for (ix = 0; ix < width_p; ix++) + { + double cx = leftX_p + ix * stepX; + double cy = topY_p - iy * stepY; + /* Z = X+I*Y */ + double x = 0; + double y = 0; + int it; + for (it = 0; it < maxIt_p; it++) + { + double x2 = x*x; + double y2 = y*y; + + /* Stop iterations when |Z| > 2 */ + if (x2 + y2 > 4.0) + break; + + double twoxy = 2.0*x*y; + + /* Z = Z^2 + C */ + x = x2 - y2 + cx; + y = twoxy + cy; + } + + unsigned int v = STARPU_MIN((1024*((float)(it)/(2000))), 256); + data[ix + local_iy*width_p] = (v<<16|(255-v)<<8); + } + } +} + + + +static struct starpu_codelet spmd_mandelbrot_cl = +{ + .type = STARPU_SPMD, + .max_parallelism = INT_MAX, + .cpu_funcs = {compute_block_spmd}, +#ifdef STARPU_USE_OPENCL + .opencl_funcs = {compute_block_opencl}, + .opencl_flags = {STARPU_OPENCL_ASYNC}, +#endif + .nbuffers = 1 +}; + +static struct starpu_codelet mandelbrot_cl = +{ + .type = STARPU_SEQ, + .cpu_funcs = {compute_block}, +#ifdef STARPU_USE_OPENCL + .opencl_funcs = {compute_block_opencl}, + .opencl_flags = {STARPU_OPENCL_ASYNC}, +#endif + .nbuffers = 1 +}; + +static void parse_args(int argc, char **argv) +{ + int i; + for (i = 1; i < argc; i++) + { + if (strcmp(argv[i], "-h") == 0) + { + fprintf(stderr, "Usage: %s [-h] [ -width 800] [-height 600] [-nblocks 16] [-no-x11] [-pos leftx:rightx:bottomy:topy] [-niter 1000] [-spmd] [-demo] [-demozoom 0.2]\n", argv[0]); + exit(-1); + } + + if (strcmp(argv[i], "-width") == 0) + { + char *argptr; + width_p = strtol(argv[++i], &argptr, 10); + } + + if (strcmp(argv[i], "-height") == 0) + { + char *argptr; + height_p = strtol(argv[++i], &argptr, 10); + } + + if (strcmp(argv[i], "-nblocks") == 0) + { + char *argptr; + nblocks_p = strtol(argv[++i], &argptr, 10); + } + + if (strcmp(argv[i], "-niter") == 0) + { + char *argptr; + niter_p = strtol(argv[++i], &argptr, 10); + } + + if (strcmp(argv[i], "-pos") == 0) + { + int ret = sscanf(argv[++i], "%lf:%lf:%lf:%lf", &leftX_p, &rightX_p, + &bottomY_p, &topY_p); + assert(ret == 4); + } + + if (strcmp(argv[i], "-demo") == 0) + { + demo_p = 1; + leftX_p = -50.22749575062760; + rightX_p = 48.73874621262927; + topY_p = -49.35016705749115; + bottomY_p = 49.64891691946615; + + } + + if (strcmp(argv[i], "-demozoom") == 0) + { + char *argptr; + demozoom_p = strtof(argv[++i], &argptr); + } + + if (strcmp(argv[i], "-no-x11") == 0) + { +#ifdef STARPU_HAVE_X11 + use_x11_p = 0; +#endif + } + + if (strcmp(argv[i], "-spmd") == 0) + { + use_spmd_p = 1; + } + } +} + +int main(int argc, char **argv) +{ + int ret; + + parse_args(argc, argv); + + /* We don't use CUDA in that example */ + struct starpu_conf conf; + starpu_conf_init(&conf); + conf.ncuda = 0; + + if (use_spmd_p) + { + conf.sched_policy_name = "peager"; + } + + ret = starpu_init(&conf); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + unsigned *buffer; + starpu_malloc((void **)&buffer, height_p*width_p*sizeof(unsigned)); + +#ifdef STARPU_HAVE_X11 + if (use_x11_p) + init_x11(width_p, height_p, buffer); +#endif + + int block_size = height_p/nblocks_p; + STARPU_ASSERT((height_p % nblocks_p) == 0); + +#ifdef STARPU_USE_OPENCL + starpu_opencl_load_opencl_from_string(mandelbrot_opencl_src, &opencl_programs, NULL); +#endif + + starpu_data_handle_t block_handles[nblocks_p]; + + int iby; + for (iby = 0; iby < nblocks_p; iby++) + { + unsigned *data = &buffer[iby*block_size*width_p]; + starpu_vector_data_register(&block_handles[iby], STARPU_MAIN_RAM, + (uintptr_t)data, block_size*width_p, sizeof(unsigned)); + } + + unsigned iter = 0; + + double start, end; + + start = starpu_timing_now(); + + while (niter_p-- != 0) + { + double stepX = (rightX_p - leftX_p)/width_p; + double stepY = (topY_p - bottomY_p)/height_p; + + /* In case we have a SPMD task, each worker will grab tasks in + * a greedy and select which piece of image to compute by + * incrementing a counter shared by all the workers within the + * parallel task. */ + int per_block_cnt[nblocks_p]; + + starpu_iteration_push(niter_p); + + for (iby = 0; iby < nblocks_p; iby++) + { + per_block_cnt[iby] = 0; + int *pcnt = &per_block_cnt[iby]; + + ret = starpu_task_insert(use_spmd_p?&spmd_mandelbrot_cl:&mandelbrot_cl, + STARPU_VALUE, &iby, sizeof(iby), + STARPU_VALUE, &block_size, sizeof(block_size), + STARPU_VALUE, &stepX, sizeof(stepX), + STARPU_VALUE, &stepY, sizeof(stepY), + STARPU_W, block_handles[iby], + STARPU_VALUE, &pcnt, sizeof(int *), + STARPU_TAG_ONLY, ((starpu_tag_t)niter_p)*nblocks_p + iby, + 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + } + + for (iby = 0; iby < nblocks_p; iby++) + { +#ifdef STARPU_HAVE_X11 + if (use_x11_p) + { + starpu_data_acquire(block_handles[iby], STARPU_R); + XPutImage(dpy_p, win_p, gc_p, bitmap_p, + 0, iby*block_size, + 0, iby*block_size, + width_p, block_size); + starpu_data_release(block_handles[iby]); + } +#endif + } + + + starpu_iteration_pop(); + if (demo_p) + { + /* Zoom in */ + double zoom_factor = demozoom_p; + double widthX = rightX_p - leftX_p; + double heightY = topY_p - bottomY_p; + + iter++; + + /* If the window is too small, we reset the demo and display some statistics */ + if ((fabs(widthX) < 1e-12) || (fabs(heightY) < 1e-12)) + { + leftX_p = -50.22749575062760; + rightX_p = 48.73874621262927; + topY_p = -49.35016705749115; + bottomY_p = 49.64891691946615; + + end = starpu_timing_now(); + double timing = end - start; + + fprintf(stderr, "Time to generate %u frames : %f s\n", iter, timing/1000000.0); + fprintf(stderr, "Average FPS: %f\n", ((double)iter*1e+6)/timing); + + /* Reset counters */ + iter = 0; + start = starpu_timing_now(); + } + else + { + leftX_p += (zoom_factor/2)*widthX; + rightX_p -= (zoom_factor/2)*widthX; + topY_p -= (zoom_factor/2)*heightY; + bottomY_p += (zoom_factor/2)*heightY; + } + + } +#ifdef STARPU_HAVE_X11 + else if (use_x11_p && handle_events()) + break; +#endif + } + +#ifdef STARPU_HAVE_X11 + if (use_x11_p) + exit_x11(); +#endif + + for (iby = 0; iby < nblocks_p; iby++) + starpu_data_unregister(block_handles[iby]); + +/* starpu_data_free_pinned_if_possible(buffer); */ + + starpu_shutdown(); + + return 0; +} diff --git a/examples/matvecmult/matvecmult.c b/examples/matvecmult/matvecmult.c new file mode 100644 index 0000000..3c764a2 --- /dev/null +++ b/examples/matvecmult/matvecmult.c @@ -0,0 +1,232 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +#ifdef STARPU_USE_OPENCL +struct starpu_opencl_program opencl_code; +void opencl_codelet(void *descr[], void *_args) +{ + (void)_args; + cl_kernel kernel; + cl_command_queue queue; + int id, devid, err, n; + cl_mem matrix = (cl_mem)STARPU_MATRIX_GET_DEV_HANDLE(descr[0]); + cl_mem vector = (cl_mem)STARPU_VECTOR_GET_DEV_HANDLE(descr[1]); + cl_mem mult = (cl_mem)STARPU_VECTOR_GET_DEV_HANDLE(descr[2]); + int nx = STARPU_MATRIX_GET_NX(descr[0]); + int ny = STARPU_MATRIX_GET_NY(descr[0]); + int ld = STARPU_MATRIX_GET_LD(descr[0]); + + id = starpu_worker_get_id_check(); + devid = starpu_worker_get_devid(id); + + err = starpu_opencl_load_kernel(&kernel, &queue, &opencl_code, "matVecMult", devid); + if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); + + n=0; + err = clSetKernelArg(kernel, n++, sizeof(matrix), &matrix); + err |= clSetKernelArg(kernel, n++, sizeof(vector), &vector); + err |= clSetKernelArg(kernel, n++, sizeof(nx), (void*)&nx); + err |= clSetKernelArg(kernel, n++, sizeof(ny), (void*)&ny); + err |= clSetKernelArg(kernel, n++, sizeof(mult), &mult); + err |= clSetKernelArg(kernel, n++, sizeof(ld), (void*)&ld); + if (err) STARPU_OPENCL_REPORT_ERROR(err); + + { + size_t global=nx*ny; + err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, NULL, 0, NULL, NULL); + if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); + } + starpu_opencl_release_kernel(kernel); +} +#endif + +void fillArray(float* pfData, int iSize) +{ + int i; + const float fScale = 1.0f / (float)RAND_MAX; + for (i = 0; i < iSize; ++i) + { + pfData[i] = fScale * rand(); + } +} + +#if 0 +void printArray(float* pfData, int iSize) +{ + int i; + for (i = 0; i < iSize; ++i) + { + FPRINTF(stderr, "%f ", pfData[i]); + } + FPRINTF(stderr, "\n"); +} +#endif + +void matVecMult(const float *matrix, const float *vector, int width, int height, float *mult) +{ + int i, j; + for (i = 0; i < height; ++i) + { + double sum = 0; + for (j = 0; j < width; ++j) + { + double a = matrix[i * width + j]; + double b = vector[j]; + sum += a * b; + } + mult[i] = (float)sum; + } +} + +int compareL2fe(const float* reference, const float* data, const unsigned int len, const float epsilon) +{ + float error = 0; + float ref = 0; + unsigned int i; + + for(i = 0; i < len; ++i) + { + float diff = reference[i] - data[i]; + error += diff * diff; + ref += reference[i] * reference[i]; + } + + float normRef = sqrtf(ref); + if (fabs(ref) < 1e-7) return 1; + + float normError = sqrtf(error); + error = normError / normRef; + + return error < epsilon ? 0 : 1; +} + +static struct starpu_perfmodel starpu_matvecmult_model = +{ + .type = STARPU_HISTORY_BASED, + .symbol = "matvecmult" +}; + +static struct starpu_codelet cl = +{ +#ifdef STARPU_USE_OPENCL + .opencl_funcs = {opencl_codelet}, + .opencl_flags = {STARPU_OPENCL_ASYNC}, +#endif + .nbuffers = 3, + .modes = {STARPU_R, STARPU_R, STARPU_RW}, + .model = &starpu_matvecmult_model +}; + +int main(void) +{ + struct starpu_conf conf; + + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + conf.nopencl = 1; + + /* int width=1100; */ + /* int height=244021; */ + int width=20; + int height=4; + + float *matrix, *vector, *mult; + float *correctResult; + unsigned int mem_size_matrix, mem_size_vector, mem_size_mult; + + starpu_data_handle_t matrix_handle, vector_handle, mult_handle; + int ret, submit; + + ret = starpu_init(&conf); + if (STARPU_UNLIKELY(ret == -ENODEV)) + { + FPRINTF(stderr, "This application requires an OpenCL worker.\n"); + return 77; + } + + mem_size_matrix = width * height * sizeof(float); + starpu_malloc((void **)&matrix, mem_size_matrix); + mem_size_vector = width * sizeof(float); + starpu_malloc((void **)&vector, mem_size_vector); + mem_size_mult = height * sizeof(float); + starpu_malloc((void **)&mult, mem_size_mult); + correctResult = (float*)malloc(mem_size_mult); + + assert(matrix); + assert(vector); + assert(mult); + assert(correctResult); + + fillArray(matrix, width*height); + fillArray(vector, width); + fillArray(mult, height); + matVecMult(matrix, vector, width, height, correctResult); + + starpu_matrix_data_register(&matrix_handle, STARPU_MAIN_RAM, (uintptr_t)matrix, width, width, height, sizeof(float)); + starpu_vector_data_register(&vector_handle, STARPU_MAIN_RAM, (uintptr_t)vector, width, sizeof(float)); + starpu_vector_data_register(&mult_handle, STARPU_MAIN_RAM, (uintptr_t)mult, height, sizeof(float)); + +#ifdef STARPU_USE_OPENCL + ret = starpu_opencl_load_opencl_from_file("examples/matvecmult/matvecmult_kernel.cl", &opencl_code, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); +#endif + + struct starpu_task *task = starpu_task_create(); + task->cl = &cl; + task->callback_func = NULL; + task->handles[0] = matrix_handle; + task->handles[1] = vector_handle; + task->handles[2] = mult_handle; + + submit = starpu_task_submit(task); + if (STARPU_UNLIKELY(submit == -ENODEV)) + { + FPRINTF(stderr, "No worker may execute this task. This application requires an OpenCL worker.\n"); + } + else + { + starpu_task_wait_for_all(); + } + + starpu_data_unregister(matrix_handle); + starpu_data_unregister(vector_handle); + starpu_data_unregister(mult_handle); + + if (STARPU_LIKELY(submit != -ENODEV)) + { + int res = compareL2fe(correctResult, mult, height, 1e-6f); + FPRINTF(stdout, "TEST %s\n\n", (res == 0) ? "PASSED" : "FAILED !!!"); + } + +#if 0 + printArray(matrix, width*height); + printArray(vector, width); + printArray(mult, height); +#endif + + starpu_free_noflag(matrix, mem_size_matrix); + starpu_free_noflag(vector, mem_size_vector); + starpu_free_noflag(mult, mem_size_mult); + free(correctResult); + starpu_shutdown(); + + return (submit == -ENODEV) ? 77 : 0; +} diff --git a/examples/matvecmult/matvecmult_kernel.cl b/examples/matvecmult/matvecmult_kernel.cl new file mode 100644 index 0000000..b9428fc --- /dev/null +++ b/examples/matvecmult/matvecmult_kernel.cl @@ -0,0 +1,30 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +__kernel void matVecMult(const __global float *A, const __global float *X, int n, int m, __global float *Y, int ld) +{ + const int i = get_global_id(0); + if (i < m) + { + float val = 0; + int j; + + for (j = 0; j < n; j++) + val += A[i*ld+j] * X[j]; + + Y[i] = val; + } +} diff --git a/examples/mlr/mlr.c b/examples/mlr/mlr.c new file mode 100644 index 0000000..d32ce8c --- /dev/null +++ b/examples/mlr/mlr.c @@ -0,0 +1,227 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * This examples demonstrates how to use multiple linear regression + * models. + * + * First, there is mlr_codelet__init codelet for which we know the + * parameters, but not the their exponents and relations. This tasks + * should be benchmarked and analyzed to find the model, using + * "tools/starpu_mlr_analysis" script as a template. + * + * For the second (codelet cl_model_final), it is assumed that the + * analysis has already been performed and that the duration of the + * codelet mlr_codelet_final will be computed using the following + * equation: + * + * T = a + b * (M^2*N) + c * (N^3*K) + * + * where M, N, K are the parameters of the task, exponents are coming + * from model->combinations[..][..] and finally a, b, c are + * coefficients which mostly depend on the machine speed. + * + * These coefficients are going to be automatically computed using + * least square method. + * + */ + +#include +#include +#include +#include + +#ifdef STARPU_QUICK_CHECK +#define NTASKS 10 +#else +#define NTASKS 1000 +#endif + +static long sum; + +/* Performance function of the task, which is in this case very simple, as the parameter values just need to be written in the array "parameters" */ +static void cl_params(struct starpu_task *task, double *parameters) +{ + int m, n, k; + int* vector_mn; + + vector_mn = (int*)STARPU_VECTOR_GET_PTR(task->interfaces[0]); + m = vector_mn[0]; + n = vector_mn[1]; + + starpu_codelet_unpack_args(task->cl_arg, &k); + + parameters[0] = m; + parameters[1] = n; + parameters[2] = k; +} + +/* Function of the task that will be executed. In this case running dummy cycles, just to make sure task duration is significant */ +void cpu_func(void *buffers[], void *cl_arg) +{ + long i; + int m,n,k; + int* vector_mn; + + vector_mn = (int*)STARPU_VECTOR_GET_PTR(buffers[0]); + m = vector_mn[0]; + n = vector_mn[1]; + + starpu_codelet_unpack_args(cl_arg, &k); + + for(i=0; i < (long) (m*m*n); i++) + STARPU_ATOMIC_ADD(&sum, i); + + for(i=0; i < (long) (n*n*n*k); i++) + STARPU_ATOMIC_ADD(&sum, i); +} + +/* ############################################ */ +/* Start of the part specific to multiple linear regression perfmodels */ + +/* Defining perfmodel, number of parameters and their names Initially + * application developer only knows these parameters. The execution of + * this codelet will generate traces that can be analyzed using + * "tools/starpu_mlr_analysis" as a template to obtain the parameters + * combinations and exponents. + */ + +static const char * parameters_names[] = { "M", "N", "K", }; + +static struct starpu_perfmodel cl_model_init = +{ + .type = STARPU_MULTIPLE_REGRESSION_BASED, + .symbol = "mlr_init", + .parameters = cl_params, + .nparameters = 3, + .parameters_names = parameters_names, +}; + +/* Defining the equation for modeling duration of the task. The + * parameters combinations and exponents are computed externally + * offline, for example using "tools/starpu_mlr_analysis" tool as a + * template. + */ + +/* M^2 * N^1 * K^0 */ +static unsigned combi1 [3] = { 2, 1, 0 }; +/* M^0 * N^3 * K^1 */ +static unsigned combi2 [3] = { 0, 3, 1 }; + +static unsigned *combinations[] = { combi1, combi2 }; + +static struct starpu_perfmodel cl_model_final = +{ + .type = STARPU_MULTIPLE_REGRESSION_BASED, + .symbol = "mlr_final", + .parameters = cl_params, + .nparameters = 3, + .parameters_names = parameters_names, + .ncombinations = 2, + .combinations = combinations, +}; + +/* End of the part specific to multiple linear regression perfmodels */ +/* ############################################ */ + +static struct starpu_codelet cl_init = +{ + .cpu_funcs = { cpu_func }, + .cpu_funcs_name = { "cpu_func" }, + .nbuffers = 1, + .modes = {STARPU_R}, + .model = &cl_model_init, +}; + +static struct starpu_codelet cl_final = +{ + .cpu_funcs = { cpu_func }, + .cpu_funcs_name = { "cpu_func" }, + .nbuffers = 1, + .modes = {STARPU_R}, + .model = &cl_model_final, +}; + + +int main(void) +{ + /* Initialization */ + unsigned i; + int ret; + + struct starpu_conf conf; + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + conf.ncpus = -1; + conf.nmpi_ms = -1; + conf.ntcpip_ms = -1; + + ret = starpu_init(&conf); + if (ret == -ENODEV) + return 77; + + sum=0; + int* vector_mn = calloc(2, sizeof(int)); + starpu_data_handle_t vector_mn_handle; + + starpu_vector_data_register(&vector_mn_handle, + STARPU_MAIN_RAM, + (uintptr_t)vector_mn, 2, + sizeof(int)); + + /* Giving pseudo-random values to the M,N,K parameters and inserting tasks */ + for (i = 0; i < 42; i++) + { + int j; + int m,n,k; + + m = (int) ((rand() % 10)+1); + n = (int) ((rand() % 10)+1); + k = (int) ((rand() % 10)+1); + + /* To illustrate the usage, M and N are stored in a data handle */ + starpu_data_acquire(vector_mn_handle, STARPU_W); + vector_mn[0] = m; + vector_mn[1] = n; + starpu_data_release(vector_mn_handle); + + for (j = 0; j < NTASKS; j++) + { + ret = starpu_task_insert(&cl_init, + STARPU_R, vector_mn_handle, + STARPU_VALUE, &k, sizeof(int), + 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + ret = starpu_task_insert(&cl_final, + STARPU_R, vector_mn_handle, + STARPU_VALUE, &k, sizeof(int), + 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + } + } + + starpu_data_unregister(vector_mn_handle); + free(vector_mn); + starpu_shutdown(); + + ret = starpu_init(NULL); + if (ret == -ENODEV) + return 77; + starpu_perfmodel_dump_xml(stdout, &cl_model_final); + starpu_shutdown(); + + return 0; +} diff --git a/examples/mult/dgemm.c b/examples/mult/dgemm.c new file mode 100644 index 0000000..16923c3 --- /dev/null +++ b/examples/mult/dgemm.c @@ -0,0 +1,18 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "double.h" +#include "xgemm.c" diff --git a/examples/mult/dgemm_layout.c b/examples/mult/dgemm_layout.c new file mode 100644 index 0000000..27ba8b9 --- /dev/null +++ b/examples/mult/dgemm_layout.c @@ -0,0 +1,18 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "double.h" +#include "xgemm_layout.c" diff --git a/examples/mult/double.h b/examples/mult/double.h new file mode 100644 index 0000000..4dfec08 --- /dev/null +++ b/examples/mult/double.h @@ -0,0 +1,30 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#define TYPE double +#define EPSILON 0.000000000001 + +#define CUBLAS_GEMM cublasDgemm +#define HIPBLAS_GEMM hipblasDgemm +#define CPU_GEMM STARPU_DGEMM +#define CPU_ASUM STARPU_DASUM +#define CPU_IAMAX STARPU_IDAMAX +#define STARPU_GEMM(name) starpu_dgemm_##name + +#define str(s) #s +#define xstr(s) str(s) +#define STARPU_GEMM_STR(name) xstr(STARPU_GEMM(name)) + diff --git a/examples/mult/sgemm.c b/examples/mult/sgemm.c new file mode 100644 index 0000000..3c096f0 --- /dev/null +++ b/examples/mult/sgemm.c @@ -0,0 +1,18 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "simple.h" +#include "xgemm.c" diff --git a/examples/mult/sgemm.sh b/examples/mult/sgemm.sh new file mode 100755 index 0000000..4bd37e4 --- /dev/null +++ b/examples/mult/sgemm.sh @@ -0,0 +1,76 @@ +#!/bin/sh -x +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# Copyright (C) 2018-2018 Federal University of Rio Grande do Sul (UFRGS) +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +# Test parsing of FxT traces + +# Testing another specific scheduler, no need to run this +[ -z "$STARPU_SCHED" -o "$STARPU_SCHED" = dmdas ] || exit 77 + +# XXX: Also see tests/overlap/overlap.sh + +set -e + +PREFIX=$(dirname $0) +rm -rf $PREFIX/sgemm.traces +mkdir -p $PREFIX/sgemm.traces + +export STARPU_FXT_PREFIX=$PREFIX/sgemm.traces + +STARPU_FXT_TRACE=1 STARPU_SCHED=dmdas $MS_LAUNCHER $STARPU_LAUNCH $PREFIX/sgemm -check +if [ -x $PREFIX/../../tools/starpu_fxt_tool ]; +then + $STARPU_LAUNCH $PREFIX/../../tools/starpu_perfmodel_plot -o $STARPU_FXT_PREFIX -s starpu_sgemm_gemm -i $STARPU_FXT_PREFIX/prof_file_${USER}_0 + [ -f $STARPU_FXT_PREFIX/starpu_starpu_sgemm_gemm.gp -a -f $STARPU_FXT_PREFIX/starpu_starpu_sgemm_gemm.data -a -f $STARPU_FXT_PREFIX/starpu_starpu_sgemm_gemm.data ] + + # Generate paje, dag, data, etc. + $STARPU_LAUNCH $PREFIX/../../tools/starpu_fxt_tool -d $STARPU_FXT_PREFIX -memory-states -label-deps -i $STARPU_FXT_PREFIX/prof_file_${USER}_0 + + $PREFIX/../../tools/starpu_paje_sort $STARPU_FXT_PREFIX/paje.trace + ! type pj_dump || pj_dump -e 0 < $STARPU_FXT_PREFIX/paje.trace + + $PREFIX/../../tools/starpu_codelet_profile $STARPU_FXT_PREFIX/distrib.data starpu_sgemm_gemm + [ -f $STARPU_FXT_PREFIX/distrib.data.gp ] + data=`ls $STARPU_FXT_PREFIX/distrib.data.[0-9]*` + [ -n "$data" ] + + $STARPU_LAUNCH $PREFIX/../../tools/starpu_fxt_data_trace -d $STARPU_FXT_PREFIX $STARPU_FXT_PREFIX/prof_file_${USER}_0 starpu_sgemm_gemm + [ -f $STARPU_FXT_PREFIX/data_trace.gp ] + + $STARPU_LAUNCH $PREFIX/../../tools/starpu_fxt_stats -i $STARPU_FXT_PREFIX/prof_file_${USER}_0 + $MS_LAUNCHER $STARPU_LAUNCH $PREFIX/../../tools/starpu_tasks_rec_complete $STARPU_FXT_PREFIX/tasks.rec $STARPU_FXT_PREFIX/tasks2.rec + python3 $PREFIX/../../tools/starpu_trace_state_stats.py $STARPU_FXT_PREFIX/trace.rec + ! type gnuplot || ( $PREFIX/../../tools/starpu_workers_activity -d $STARPU_FXT_PREFIX $STARPU_FXT_PREFIX/activity.data && [ -f $STARPU_FXT_PREFIX/activity.eps ] ) + + # needs some R packages + $PREFIX/../../tools/starpu_paje_draw_histogram $STARPU_FXT_PREFIX/paje.trace || true + $PREFIX/../../tools/starpu_paje_state_stats $STARPU_FXT_PREFIX/paje.trace || true + $PREFIX/../../tools/starpu_paje_summary $STARPU_FXT_PREFIX/paje.trace || true + $PREFIX/../../tools/starpu_codelet_histo_profile $STARPU_FXT_PREFIX/distrib.data || true + [ -f $STARPU_FXT_PREFIX/distrib.data.starpu_sgemm_gemm.0.492beed5.33177600.pdf ] || true + + if [ -x $PREFIX/../../tools/starpu_replay ]; then + $STARPU_LAUNCH $PREFIX/../../tools/starpu_replay $STARPU_FXT_PREFIX/tasks.rec + fi + + [ ! -x $PREFIX/../../tools/starpu_perfmodel_recdump ] || $MS_LAUNCHER $STARPU_LAUNCH $PREFIX/../../tools/starpu_perfmodel_recdump $STARPU_FXT_PREFIX/tasks.rec -o $STARPU_FXT_PREFIX/perfs2.rec + [ -f $STARPU_FXT_PREFIX/perfs2.rec ] +fi + +[ ! -x $PREFIX/../../tools/starpu_perfmodel_display ] || $STARPU_LAUNCH $PREFIX/../../tools/starpu_perfmodel_display -s starpu_sgemm_gemm +[ ! -x $PREFIX/../../tools/starpu_perfmodel_display ] || $STARPU_LAUNCH $PREFIX/../../tools/starpu_perfmodel_display -x -s starpu_sgemm_gemm +[ ! -x $PREFIX/../../tools/starpu_perfmodel_recdump ] || $MS_LAUNCHER $STARPU_LAUNCH $PREFIX/../../tools/starpu_perfmodel_recdump -o $STARPU_FXT_PREFIX/perfs.rec +[ -f $STARPU_FXT_PREFIX/perfs.rec ] diff --git a/examples/mult/sgemm_layout.c b/examples/mult/sgemm_layout.c new file mode 100644 index 0000000..2093b4d --- /dev/null +++ b/examples/mult/sgemm_layout.c @@ -0,0 +1,18 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "simple.h" +#include "xgemm_layout.c" diff --git a/examples/mult/simple.h b/examples/mult/simple.h new file mode 100644 index 0000000..41f6094 --- /dev/null +++ b/examples/mult/simple.h @@ -0,0 +1,29 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#define TYPE float +#define EPSILON 0.000001 + +#define CUBLAS_GEMM cublasSgemm +#define HIPBLAS_GEMM hipblasSgemm +#define CPU_GEMM STARPU_SGEMM +#define CPU_ASUM STARPU_SASUM +#define CPU_IAMAX STARPU_ISAMAX +#define STARPU_GEMM(name) starpu_sgemm_##name + +#define str(s) #s +#define xstr(s) str(s) +#define STARPU_GEMM_STR(name) xstr(STARPU_GEMM(name)) diff --git a/examples/mult/xgemm.c b/examples/mult/xgemm.c new file mode 100644 index 0000000..15b5aa5 --- /dev/null +++ b/examples/mult/xgemm.c @@ -0,0 +1,533 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2010-2010 Mehdi Juhoor + * Copyright (C) 2017-2017 Erwan Leria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * Simple parallel GEMM implementation: partition the output matrix in the two + * dimensions, and the input matrices in the corresponding dimension, and + * perform the output computations in parallel. + */ +#include "xgemm.h" + +static void init_problem_data(void) +{ +#ifndef STARPU_SIMGRID + unsigned i,j; +#endif + + starpu_malloc_flags((void **)&A, zdim*ydim*sizeof(TYPE), STARPU_MALLOC_PINNED|STARPU_MALLOC_SIMULATION_FOLDED); + starpu_malloc_flags((void **)&B, xdim*zdim*sizeof(TYPE), STARPU_MALLOC_PINNED|STARPU_MALLOC_SIMULATION_FOLDED); + starpu_malloc_flags((void **)&C, xdim*ydim*sizeof(TYPE), STARPU_MALLOC_PINNED|STARPU_MALLOC_SIMULATION_FOLDED); + +#ifndef STARPU_SIMGRID + /* fill the A and B matrices */ + for (j=0; j < ydim; j++) + { + for (i=0; i < zdim; i++) + { + A[j+i*ydim] = (TYPE)(starpu_drand48()); + } + } + + for (j=0; j < zdim; j++) + { + for (i=0; i < xdim; i++) + { + B[j+i*zdim] = (TYPE)(starpu_drand48()); + } + } + + for (j=0; j < ydim; j++) + { + for (i=0; i < xdim; i++) + { + C[j+i*ydim] = (TYPE)(0); + } + } +#endif +} + +static void partition_mult_data(void) +{ + unsigned x, y, z; + + starpu_matrix_data_register(&A_handle, STARPU_MAIN_RAM, (uintptr_t)A, ydim, ydim, zdim, sizeof(TYPE)); + starpu_matrix_data_register(&B_handle, STARPU_MAIN_RAM, (uintptr_t)B, zdim, zdim, xdim, sizeof(TYPE)); + starpu_matrix_data_register(&C_handle, STARPU_MAIN_RAM, (uintptr_t)C, ydim, ydim, xdim, sizeof(TYPE)); + + struct starpu_data_filter vert; + memset(&vert, 0, sizeof(vert)); + vert.filter_func = starpu_matrix_filter_vertical_block; + vert.nchildren = nslicesx; + + struct starpu_data_filter horiz; + memset(&horiz, 0, sizeof(horiz)); + horiz.filter_func = starpu_matrix_filter_block; + horiz.nchildren = nslicesy; + + if (tiled) + { + struct starpu_data_filter vertA; + memset(&vertA, 0, sizeof(vertA)); + vertA.filter_func = starpu_matrix_filter_vertical_block; + vertA.nchildren = nslicesz; + + struct starpu_data_filter horizB; + memset(&horizB, 0, sizeof(horizB)); + horizB.filter_func = starpu_matrix_filter_block; + horizB.nchildren = nslicesz; + + starpu_data_map_filters(A_handle, 2, &vertA, &horiz); + starpu_data_map_filters(B_handle, 2, &vert, &horizB); + starpu_data_map_filters(C_handle, 2, &vert, &horiz); + + for (y = 0; y < nslicesy; y++) + for (z = 0; z < nslicesz; z++) + starpu_data_set_coordinates(starpu_data_get_sub_data(A_handle, 2, z, y), 2, z, y); + + for (x = 0; x < nslicesx; x++) + for (z = 0; z < nslicesz; z++) + starpu_data_set_coordinates(starpu_data_get_sub_data(B_handle, 2, x, z), 2, x, z); + } + else + { + starpu_data_partition(B_handle, &vert); + starpu_data_partition(A_handle, &horiz); + + starpu_data_map_filters(C_handle, 2, &vert, &horiz); + + for (y = 0; y < nslicesy; y++) + starpu_data_set_coordinates(starpu_data_get_sub_data(A_handle, 1, y), 2, 0, y); + + for (x = 0; x < nslicesx; x++) + starpu_data_set_coordinates(starpu_data_get_sub_data(B_handle, 1, x), 2, x, 0); + } + + for (x = 0; x < nslicesx; x++) + for (y = 0; y < nslicesy; y++) + starpu_data_set_coordinates(starpu_data_get_sub_data(C_handle, 2, x, y), 2, x, y); +} + +#ifdef STARPU_USE_CUDA +static void cublas_mult(void *descr[], void *arg, const TYPE *beta) +{ + (void)arg; + TYPE *subA = (TYPE *)STARPU_MATRIX_GET_PTR(descr[0]); + TYPE *subB = (TYPE *)STARPU_MATRIX_GET_PTR(descr[1]); + TYPE *subC = (TYPE *)STARPU_MATRIX_GET_PTR(descr[2]); + + unsigned nxC = STARPU_MATRIX_GET_NX(descr[2]); + unsigned nyC = STARPU_MATRIX_GET_NY(descr[2]); + unsigned nyA = STARPU_MATRIX_GET_NY(descr[0]); + + unsigned ldA = STARPU_MATRIX_GET_LD(descr[0]); + unsigned ldB = STARPU_MATRIX_GET_LD(descr[1]); + unsigned ldC = STARPU_MATRIX_GET_LD(descr[2]); + + cublasStatus_t status = CUBLAS_GEMM(starpu_cublas_get_local_handle(), + CUBLAS_OP_N, CUBLAS_OP_N, + nxC, nyC, nyA, + &p1_cuda, subA, ldA, subB, ldB, + beta, subC, ldC); + if (status != CUBLAS_STATUS_SUCCESS) + STARPU_CUBLAS_REPORT_ERROR(status); +} +#endif + +#ifdef STARPU_HAVE_BLAS +void cpu_mult(void *descr[], void *arg, TYPE beta) +{ + (void)arg; + TYPE *subA = (TYPE *)STARPU_MATRIX_GET_PTR(descr[0]); + TYPE *subB = (TYPE *)STARPU_MATRIX_GET_PTR(descr[1]); + TYPE *subC = (TYPE *)STARPU_MATRIX_GET_PTR(descr[2]); + + unsigned nxC = STARPU_MATRIX_GET_NX(descr[2]); + unsigned nyC = STARPU_MATRIX_GET_NY(descr[2]); + unsigned nyA = STARPU_MATRIX_GET_NY(descr[0]); + + unsigned ldA = STARPU_MATRIX_GET_LD(descr[0]); + unsigned ldB = STARPU_MATRIX_GET_LD(descr[1]); + unsigned ldC = STARPU_MATRIX_GET_LD(descr[2]); + + int worker_size = starpu_combined_worker_get_size(); + + if (worker_size == 1) + { + /* Sequential CPU task */ + CPU_GEMM("N", "N", nxC, nyC, nyA, (TYPE)1.0, subA, ldA, subB, ldB, beta, subC, ldC); + } + else + { + /* Parallel CPU task */ + unsigned rank = starpu_combined_worker_get_rank(); + + unsigned block_size = (nyC + worker_size - 1)/worker_size; + unsigned new_nyC = STARPU_MIN(nyC, block_size*(rank+1)) - block_size*rank; + + STARPU_ASSERT(nyC == STARPU_MATRIX_GET_NY(descr[1])); + + TYPE *new_subB = &subB[block_size*rank]; + TYPE *new_subC = &subC[block_size*rank]; + + CPU_GEMM("N", "N", nxC, new_nyC, nyA, (TYPE)1.0, subA, ldA, new_subB, ldB, beta, new_subC, ldC); + } +} +#endif + +static struct starpu_codelet cl_gemm0 = +{ +#ifdef STARPU_HAVE_BLAS + .type = STARPU_SEQ, /* changed to STARPU_SPMD if -spmd is passed */ + .max_parallelism = INT_MAX, + .cpu_funcs = {cpu_gemm0}, + .cpu_funcs_name = {"cpu_gemm0"}, +#endif +#ifdef STARPU_USE_CUDA + .cuda_funcs = {cublas_gemm0}, +#elif defined(STARPU_USE_HIP) && defined(STARPU_USE_HIPBLAS) + .hip_funcs = {hipblas_gemm0}, +#elif defined(STARPU_SIMGRID) + .cuda_funcs = {(void*)1}, +#endif + .cuda_flags = {STARPU_CUDA_ASYNC}, + .hip_flags = {STARPU_HIP_ASYNC}, + .nbuffers = 3, + .modes = {STARPU_R, STARPU_R, STARPU_W}, + .model = &starpu_gemm_model +}; + +static struct starpu_codelet cl_gemm = +{ +#ifdef STARPU_HAVE_BLAS + .type = STARPU_SEQ, /* changed to STARPU_SPMD if -spmd is passed */ + .max_parallelism = INT_MAX, + .cpu_funcs = {cpu_gemm}, + .cpu_funcs_name = {"cpu_gemm"}, +#endif +#ifdef STARPU_USE_CUDA + .cuda_funcs = {cublas_gemm}, +#elif defined(STARPU_USE_HIP) && defined(STARPU_USE_HIPBLAS) + .hip_funcs = {hipblas_gemm}, +#elif defined(STARPU_SIMGRID) + .cuda_funcs = {(void*)1}, +#endif + .cuda_flags = {STARPU_CUDA_ASYNC}, + .hip_flags = {STARPU_HIP_ASYNC}, + .nbuffers = 3, + .modes = {STARPU_R, STARPU_R, STARPU_RW}, + .model = &starpu_gemm_model +}; + +static void parse_args(int argc, char **argv) +{ + int i; + int size_set = 0; + + for (i = 1; i < argc; i++) + { + if (strcmp(argv[i], "-3d") == 0) + { + tiled = 1; + } + + else if (strcmp(argv[i], "-nblocks") == 0) + { + char *argptr; + nslicesx = strtol(argv[++i], &argptr, 10); + nslicesy = nslicesx; + nslicesz = nslicesx; + if (nslicesx == 0) + { + fprintf(stderr, "the number of blocks in X cannot be 0!\n"); + exit(EXIT_FAILURE); + } + } + + else if (strcmp(argv[i], "-nblocksx") == 0) + { + char *argptr; + nslicesx = strtol(argv[++i], &argptr, 10); + if (nslicesx == 0) + { + fprintf(stderr, "the number of blocks in X cannot be 0!\n"); + exit(EXIT_FAILURE); + } + } + + else if (strcmp(argv[i], "-nblocksy") == 0) + { + char *argptr; + nslicesy = strtol(argv[++i], &argptr, 10); + if (nslicesy == 0) + { + fprintf(stderr, "the number of blocks in Y cannot be 0!\n"); + exit(EXIT_FAILURE); + } + } + + else if (strcmp(argv[i], "-nblocksz") == 0) + { + char *argptr; + nslicesz = strtol(argv[++i], &argptr, 10); + if (nslicesz == 0) + { + fprintf(stderr, "the number of blocks in Z cannot be 0!\n"); + exit(EXIT_FAILURE); + } + } + + else if (strcmp(argv[i], "-x") == 0) + { + char *argptr; + xdim = strtol(argv[++i], &argptr, 10); + if (xdim == 0) + { + fprintf(stderr, "the X dimension cannot be 0!\n"); + exit(EXIT_FAILURE); + } + size_set = 1; + } + + else if (strcmp(argv[i], "-xy") == 0) + { + char *argptr; + xdim = ydim = strtol(argv[++i], &argptr, 10); + if (xdim == 0) + { + fprintf(stderr, "the XY dimensions cannot be 0!\n"); + exit(EXIT_FAILURE); + } + size_set = 1; + } + + else if (strcmp(argv[i], "-xyz") == 0) + { + char *argptr; + xdim = ydim = zdim = strtol(argv[++i], &argptr, 10); + size_set = 1; + } + + else if (strcmp(argv[i], "-y") == 0) + { + char *argptr; + ydim = strtol(argv[++i], &argptr, 10); + if (ydim == 0) + { + fprintf(stderr, "the Y dimension cannot be 0!\n"); + exit(EXIT_FAILURE); + } + size_set = 1; + } + + else if (strcmp(argv[i], "-z") == 0) + { + char *argptr; + zdim = strtol(argv[++i], &argptr, 10); + if (zdim == 0) + { + fprintf(stderr, "the Z dimension cannot be 0!\n"); + exit(EXIT_FAILURE); + } + size_set = 1; + } + + else if (strcmp(argv[i], "-size") == 0) + { + char *argptr; + xdim = ydim = zdim = strtol(argv[++i], &argptr, 10); + if (xdim == 0) + { + fprintf(stderr, "the size cannot be 0!\n"); + exit(EXIT_FAILURE); + } + size_set = 1; + } + + else if (strcmp(argv[i], "-iter") == 0) + { + char *argptr; + niter = strtol(argv[++i], &argptr, 10); + if (niter == 0) + { + fprintf(stderr, "the number of iterations cannot be 0!\n"); + exit(EXIT_FAILURE); + } + } + + else if (strcmp(argv[i], "-nsleeps") == 0) + { + char *argptr; + nsleeps = strtol(argv[++i], &argptr, 10); + } + + else if (strcmp(argv[i], "-bound") == 0) + { + bound = 1; + } + + else if (strcmp(argv[i], "-hostname") == 0) + { + print_hostname = 1; + } + + else if (strcmp(argv[i], "-check") == 0) + { + check = 1; + } + + else if (strcmp(argv[i], "-spmd") == 0) + { + cl_gemm0.type = STARPU_SPMD; + } + + else if (strcmp(argv[i], "-help") == 0 || strcmp(argv[i], "--help") == 0 || strcmp(argv[i], "-h") == 0) + { + fprintf(stderr,"Usage: %s [-3d] [-nblocks n] [-nblocksx x] [-nblocksy y] [-nblocksz z] [-x x] [-y y] [-xy n] [-z z] [-xyz n] [-size size] [-iter iter] [-bound] [-check] [-spmd] [-hostname] [-nsleeps nsleeps]\n", argv[0]); + if (tiled) + fprintf(stderr,"Currently selected: %ux%u * %ux%u and %ux%ux%u blocks (size %ux%u length %u), %u iterations, %u sleeps\n", zdim, ydim, xdim, zdim, nslicesx, nslicesy, nslicesz, xdim / nslicesx, ydim / nslicesy, zdim / nslicesz, niter, nsleeps); + else + fprintf(stderr,"Currently selected: %ux%u * %ux%u and %ux%u blocks (size %ux%u length %u), %u iterations, %u sleeps\n", zdim, ydim, xdim, zdim, nslicesx, nslicesy, xdim / nslicesx, ydim / nslicesy, zdim, niter, nsleeps); + exit(EXIT_SUCCESS); + } + else + { + fprintf(stderr,"Unrecognized option %s\n", argv[i]); + exit(EXIT_FAILURE); + } + } + +#ifndef STARPU_SIMGRID + if (check && !size_set) + { + /* Check is sequential, reduce its default duration */ + xdim /= 2; + ydim /= 2; + } +#endif + +#ifdef STARPU_QUICK_CHECK + niter /= 10; + if(niter==0) + niter=1; +#endif +} + +static int run_data(void) +{ + PRINTF("# "); + if (print_hostname) + PRINTF("node\t"); + PRINTF("x\ty\tz\tms\tGFlop/s"); + if (bound) + PRINTF("\tTms\tTGFlop/s\tTims\tTiGFlop/s"); + PRINTF("\n"); + + unsigned sleeps; + for (sleeps = 0; sleeps < nsleeps; sleeps++) + { + if (bound) + starpu_bound_start(0, 0); + + starpu_fxt_start_profiling(); + double start = starpu_timing_now(); + + unsigned x, y, z, iter; + for (iter = 0; iter < niter; iter++) + { + if (tiled) + { + for (x = 0; x < nslicesx; x++) + for (y = 0; y < nslicesy; y++) + { + starpu_data_handle_t Ctile = starpu_data_get_sub_data(C_handle, 2, x, y); + for (z = 0; z < nslicesz; z++) + { + struct starpu_codelet *cl = z == 0 ? &cl_gemm0 : &cl_gemm; + int ret = starpu_task_insert(cl, + cl->modes[0], starpu_data_get_sub_data(A_handle, 2, z, y), + cl->modes[1], starpu_data_get_sub_data(B_handle, 2, x, z), + cl->modes[2], Ctile, + STARPU_FLOPS, (double) (2ULL * (xdim/nslicesx) * (ydim/nslicesy) * (zdim/nslicesz)), + 0); + if (ret == -ENODEV) + { + check = 0; + return 77; + } + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + } + starpu_data_wont_use(Ctile); + } + } + else + { + for (x = 0; x < nslicesx; x++) + for (y = 0; y < nslicesy; y++) + { + int ret = starpu_task_insert(&cl_gemm0, + cl_gemm0.modes[0], starpu_data_get_sub_data(A_handle, 1, y), + cl_gemm0.modes[1], starpu_data_get_sub_data(B_handle, 1, x), + cl_gemm0.modes[2], starpu_data_get_sub_data(C_handle, 2, x, y), + STARPU_FLOPS, (double) (2ULL * (xdim/nslicesx) * (ydim/nslicesy) * zdim), + 0); + if (ret == -ENODEV) + { + check = 0; + return 77; + } + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + starpu_data_wont_use(starpu_data_get_sub_data(C_handle, 2, x, y)); + } + } + + starpu_task_wait_for_all(); + } + + double end = starpu_timing_now(); + starpu_fxt_stop_profiling(); + + if (bound) + starpu_bound_stop(); + + double timing = end - start; + double min, min_int; + double flops = 2.0*((unsigned long long)(niter))*((unsigned long long)xdim) + *((unsigned long long)ydim)*((unsigned long long)zdim); + + if (bound) + starpu_bound_compute(&min, &min_int, 1); + + if (print_hostname) + { + char hostname[255]; + gethostname(hostname, 255); + PRINTF("%s\t", hostname); + } + PRINTF("%u\t%u\t%u\t%.0f\t%.1f", xdim, ydim, zdim, timing/(niter)/1000.0, flops/timing/1000.0); + if (bound) + PRINTF("\t%.0f\t%.1f\t%.0f\t%.1f", min, flops/min/1000000.0, min_int, flops/min_int/1000000.0); + PRINTF("\n"); + + if (sleeps < nsleeps-1) + { + sleep(10); + } + } + return 0; +} + diff --git a/examples/mult/xgemm.h b/examples/mult/xgemm.h new file mode 100644 index 0000000..157d391 --- /dev/null +++ b/examples/mult/xgemm.h @@ -0,0 +1,230 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2017-2017 Erwan Leria + * Copyright (C) 2010-2010 Mehdi Juhoor + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef TYPE +#error "Do not compile xgemm.c directly, compile sgemm.c or dgemm.c" +#endif + +#include +#include +#include +#include +#include +#include +#include + +#ifdef STARPU_HAVE_BLAS +#include +#endif + +#ifdef STARPU_USE_CUDA +#include +#include +static const TYPE p1_cuda = 1.0; +static const TYPE v0_cuda = 0.0; +#endif + +#ifdef STARPU_USE_HIP +#include +#include +static const TYPE p1_hip = 1.0; +static const TYPE v0_hip = 0.0; +#endif + +#ifdef STARPU_QUICK_CHECK +static unsigned niter = 2; +#else +static unsigned niter = 10; +#endif +static unsigned nsleeps = 1; +static unsigned nslicesx = 4; +static unsigned nslicesy = 4; +static unsigned nslicesz = 4; +#if defined(STARPU_QUICK_CHECK) && !defined(STARPU_SIMGRID) +static unsigned xdim = 256; +static unsigned ydim = 256; +static unsigned zdim = 64; +#else +static unsigned xdim = 960*4; +static unsigned ydim = 960*4; +static unsigned zdim = 960*4; +#endif +static unsigned check = 0; +static unsigned bound = 0; +static unsigned print_hostname = 0; +static unsigned tiled = 0; + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) +#define PRINTF(fmt, ...) do { if (!getenv("STARPU_SSILENT")) {printf(fmt, ## __VA_ARGS__); fflush(stdout); }} while(0) + +static TYPE *A, *B, *C; +static starpu_data_handle_t A_handle, B_handle, C_handle; + +#ifdef STARPU_HAVE_BLAS +static int check_output(void) +{ + /* compute C = C - AB */ + CPU_GEMM("N", "N", ydim, xdim, zdim, (TYPE)-1.0f, A, ydim, B, zdim, (TYPE)1.0f, C, ydim); + + /* make sure C = 0 */ + TYPE err; + err = CPU_ASUM(xdim*ydim, C, 1); + + if (err < EPSILON*xdim*ydim*zdim) + { + FPRINTF(stderr, "Results are OK\n"); + return 0; + } + else + { + int max; + max = CPU_IAMAX(xdim*ydim, C, 1); + + FPRINTF(stderr, "There were errors ... err = %f\n", err); + FPRINTF(stderr, "Max error : %e\n", C[max]); + return 1; + } +} +#endif + +static int clean_problem_data(int enodev) +{ + int ret = enodev; + + starpu_data_unpartition(C_handle, STARPU_MAIN_RAM); + starpu_data_unpartition(B_handle, STARPU_MAIN_RAM); + starpu_data_unpartition(A_handle, STARPU_MAIN_RAM); + + starpu_data_unregister(A_handle); + starpu_data_unregister(B_handle); + starpu_data_unregister(C_handle); + +#ifdef STARPU_HAVE_BLAS +#ifndef STARPU_SIMGRID + if (!enodev && check) + ret = check_output(); +#endif +#endif + + starpu_free_flags(A, zdim*ydim*sizeof(TYPE), STARPU_MALLOC_PINNED|STARPU_MALLOC_SIMULATION_FOLDED); + starpu_free_flags(B, xdim*zdim*sizeof(TYPE), STARPU_MALLOC_PINNED|STARPU_MALLOC_SIMULATION_FOLDED); + starpu_free_flags(C, xdim*ydim*sizeof(TYPE), STARPU_MALLOC_PINNED|STARPU_MALLOC_SIMULATION_FOLDED); + + return ret; +} + +#ifdef STARPU_USE_CUDA +static void cublas_mult(void *descr[], void *arg, const TYPE *beta); +static void cublas_gemm0(void *descr[], void *arg) +{ + cublas_mult(descr, arg, &v0_cuda); +} + +static void cublas_gemm(void *descr[], void *arg) +{ + cublas_mult(descr, arg, &p1_cuda); +} +#endif + +#ifdef STARPU_USE_HIP +#ifdef STARPU_USE_HIPBLAS +static void hipblas_mult(void *descr[], void *arg, const TYPE *beta) +{ + (void)arg; + TYPE *subA = (TYPE *)STARPU_MATRIX_GET_PTR(descr[0]); + TYPE *subB = (TYPE *)STARPU_MATRIX_GET_PTR(descr[1]); + TYPE *subC = (TYPE *)STARPU_MATRIX_GET_PTR(descr[2]); + + unsigned nxC = STARPU_MATRIX_GET_NX(descr[2]); + unsigned nyC = STARPU_MATRIX_GET_NY(descr[2]); + unsigned nyA = STARPU_MATRIX_GET_NY(descr[0]); + + unsigned ldA = STARPU_MATRIX_GET_LD(descr[0]); + unsigned ldB = STARPU_MATRIX_GET_LD(descr[1]); + unsigned ldC = STARPU_MATRIX_GET_LD(descr[2]); + + hipblasStatus_t status = HIPBLAS_GEMM(starpu_hipblas_get_local_handle(), + HIPBLAS_OP_N, HIPBLAS_OP_N, + nxC, nyC, nyA, + &p1_hip, subA, ldA, subB, ldB, + beta, subC, ldC); + if (status != HIPBLAS_STATUS_SUCCESS) + STARPU_HIPBLAS_REPORT_ERROR(status); +} + +static void hipblas_gemm0(void *descr[], void *arg) +{ + hipblas_mult(descr, arg, &v0_hip); +} + +static void hipblas_gemm(void *descr[], void *arg) +{ + hipblas_mult(descr, arg, &p1_hip); +} +#endif +#endif + +#ifdef STARPU_HAVE_BLAS +void cpu_mult(void *descr[], void *arg, TYPE beta); +void cpu_gemm0(void *descr[], void *arg) +{ + cpu_mult(descr, arg, 0.); +} + +void cpu_gemm(void *descr[], void *arg) +{ + cpu_mult(descr, arg, 1.); +} +#endif + +static struct starpu_perfmodel starpu_gemm_model = +{ + .type = STARPU_HISTORY_BASED, + .symbol = STARPU_GEMM_STR(gemm) +}; + +static void parse_args(int argc, char **argv); +static void init_problem_data(void); +static void partition_mult_data(void); +static int run_data(void); + +int main(int argc, char **argv) +{ + parse_args(argc, argv); + + starpu_fxt_autostart_profiling(0); + int ret = starpu_init(NULL); + if (ret == -ENODEV) + return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + starpu_cublas_init(); + starpu_hipblas_init(); + + init_problem_data(); + partition_mult_data(); + + ret = run_data(); + ret = clean_problem_data(ret); + + starpu_cublas_shutdown(); + starpu_hipblas_shutdown(); + starpu_shutdown(); + + return ret; +} diff --git a/examples/mult/xgemm_layout.c b/examples/mult/xgemm_layout.c new file mode 100644 index 0000000..ecd6685 --- /dev/null +++ b/examples/mult/xgemm_layout.c @@ -0,0 +1,1199 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2017-2017 Erwan Leria + * Copyright (C) 2010-2010 Mehdi Juhoor + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * Simple parallel GEMM implementation: partition the output matrix in the two + * dimensions, and the input matrices in the corresponding dimension, and + * perform the output computations in parallel. + */ +#include "xgemm.h" + +static unsigned invalidate_c_tile = 0; +static unsigned random_task_order = 0; +static unsigned recursive_matrix_layout = 0; +static unsigned random_data_access = 0; +static unsigned count_do_schedule = 1; +static unsigned sparse_matrix = 0; +/* % de chance qu'une tâche soit créé avec sparse matrix. */ +static int chance_to_be_created = 100; +static TYPE **Cscratch; + +static void init_problem_data(void) +{ +#ifndef STARPU_SIMGRID + unsigned i,j; +#endif + + starpu_malloc_flags((void **)&A, zdim*ydim*sizeof(TYPE), STARPU_MALLOC_PINNED|STARPU_MALLOC_SIMULATION_FOLDED); + starpu_malloc_flags((void **)&B, xdim*zdim*sizeof(TYPE), STARPU_MALLOC_PINNED|STARPU_MALLOC_SIMULATION_FOLDED); + starpu_malloc_flags((void **)&C, xdim*ydim*sizeof(TYPE), STARPU_MALLOC_PINNED|STARPU_MALLOC_SIMULATION_FOLDED); + +#ifndef STARPU_SIMGRID + /* fill the A and B matrices */ + for (j=0; j < ydim; j++) + { + for (i=0; i < zdim; i++) + { + A[j+i*ydim] = (TYPE)(starpu_drand48()); + } + } + + for (j=0; j < zdim; j++) + { + for (i=0; i < xdim; i++) + { + B[j+i*zdim] = (TYPE)(starpu_drand48()); + } + } + + for (j=0; j < ydim; j++) + { + for (i=0; i < xdim; i++) + { + C[j+i*ydim] = (TYPE)(0); + } + } +#endif + + if (!tiled) + { + unsigned x; + unsigned ncuda = starpu_cuda_worker_get_count(); + Cscratch = malloc(sizeof(TYPE*) * ncuda); + for(x = 0; x < ncuda; x++) + { + unsigned worker = starpu_worker_get_by_type(STARPU_CUDA_WORKER, x); + unsigned node = starpu_worker_get_memory_node(worker); + Cscratch[x] = (TYPE*) starpu_malloc_on_node(node, (xdim / nslicesx) * (ydim / nslicesy) * sizeof(TYPE)); + } + } +} + +void nop(void *descr[], void *arg) +{ + (void) descr; + (void) arg; +} + +static struct starpu_codelet redux_cl = +{ + .where = STARPU_NOWHERE, + .cpu_funcs = {nop}, + .cpu_funcs_name = {"nop"}, + .cuda_funcs = {nop}, + .cuda_flags = {STARPU_CUDA_ASYNC}, + .nbuffers = 2, + .modes = {STARPU_RW | STARPU_COMMUTE, STARPU_R}, + .model = &starpu_perfmodel_nop +}; + +static struct starpu_codelet init_cl = +{ + .where = STARPU_NOWHERE, + .cpu_funcs = {nop}, + .cpu_funcs_name = {"nop"}, + .cuda_funcs = {nop}, + .cuda_flags = {STARPU_CUDA_ASYNC}, + .nbuffers = 1, + .modes = {STARPU_W}, + .model = &starpu_perfmodel_nop +}; + +static void partition_mult_data(void) +{ + unsigned x, y, z; + + starpu_matrix_data_register(&A_handle, STARPU_MAIN_RAM, (uintptr_t)A, ydim, ydim, zdim, sizeof(TYPE)); + starpu_matrix_data_register(&B_handle, STARPU_MAIN_RAM, (uintptr_t)B, zdim, zdim, xdim, sizeof(TYPE)); + starpu_matrix_data_register(&C_handle, STARPU_MAIN_RAM, (uintptr_t)C, ydim, ydim, xdim, sizeof(TYPE)); + starpu_data_set_reduction_methods(C_handle, &redux_cl, &init_cl); + + struct starpu_data_filter vert; + memset(&vert, 0, sizeof(vert)); + vert.filter_func = starpu_matrix_filter_vertical_block; + vert.nchildren = nslicesx; + + struct starpu_data_filter horiz; + memset(&horiz, 0, sizeof(horiz)); + horiz.filter_func = starpu_matrix_filter_block; + horiz.nchildren = nslicesy; + + if (tiled) + { + struct starpu_data_filter vertA; + memset(&vertA, 0, sizeof(vertA)); + vertA.filter_func = starpu_matrix_filter_vertical_block; + vertA.nchildren = nslicesz; + + struct starpu_data_filter horizB; + memset(&horizB, 0, sizeof(horizB)); + horizB.filter_func = starpu_matrix_filter_block; + horizB.nchildren = nslicesz; + + starpu_data_map_filters(A_handle, 2, &vertA, &horiz); + starpu_data_map_filters(B_handle, 2, &vert, &horizB); + starpu_data_map_filters(C_handle, 2, &vert, &horiz); + + for (y = 0; y < nslicesy; y++) + for (z = 0; z < nslicesz; z++) + starpu_data_set_coordinates(starpu_data_get_sub_data(A_handle, 2, z, y), 2, z, y); + + for (x = 0; x < nslicesx; x++) + for (z = 0; z < nslicesz; z++) + starpu_data_set_coordinates(starpu_data_get_sub_data(B_handle, 2, x, z), 2, x, z); + } + else + { + starpu_data_partition(B_handle, &vert); + starpu_data_partition(A_handle, &horiz); + + starpu_data_map_filters(C_handle, 2, &vert, &horiz); + + for (y = 0; y < nslicesy; y++) + starpu_data_set_coordinates(starpu_data_get_sub_data(A_handle, 1, y), 2, 0, y); + + for (x = 0; x < nslicesx; x++) + starpu_data_set_coordinates(starpu_data_get_sub_data(B_handle, 1, x), 2, x, 0); + } + + for (x = 0; x < nslicesx; x++) + for (y = 0; y < nslicesy; y++) + starpu_data_set_coordinates(starpu_data_get_sub_data(C_handle, 2, x, y), 2, x, y); +} + +#ifdef STARPU_USE_CUDA +static void cublas_mult2d(void *descr[], void *arg, const TYPE *beta) +{ + (void)arg; + TYPE *subA = (TYPE *)STARPU_MATRIX_GET_PTR(descr[0]); + TYPE *subB = (TYPE *)STARPU_MATRIX_GET_PTR(descr[1]); + unsigned worker = starpu_worker_get_id_check(); + unsigned devid = starpu_worker_get_devid(worker); + TYPE *subC = Cscratch[devid]; + + unsigned nxC = STARPU_MATRIX_GET_NY(descr[1]); + unsigned nyC = STARPU_MATRIX_GET_NX(descr[0]); + unsigned nyA = STARPU_MATRIX_GET_NY(descr[0]); + + unsigned ldA = STARPU_MATRIX_GET_LD(descr[0]); + unsigned ldB = STARPU_MATRIX_GET_LD(descr[1]); + unsigned ldC = nxC; + + cudaStream_t stream = starpu_cuda_get_local_stream(); + + cublasStatus_t status = CUBLAS_GEMM(starpu_cublas_get_local_handle(), + CUBLAS_OP_N, CUBLAS_OP_N, + nxC, nyC, nyA, + &p1_cuda, subA, ldA, subB, ldB, + beta, subC, ldC); + if (status != CUBLAS_STATUS_SUCCESS) + STARPU_CUBLAS_REPORT_ERROR(status); +} +#endif + +#ifdef STARPU_USE_CUDA +static void cublas_mult(void *descr[], void *arg, const TYPE *beta) +{ + (void)arg; + TYPE *subA = (TYPE *)STARPU_MATRIX_GET_PTR(descr[0]); + TYPE *subB = (TYPE *)STARPU_MATRIX_GET_PTR(descr[1]); + TYPE *subC = (TYPE *)STARPU_MATRIX_GET_PTR(descr[2]); + + unsigned nxC = STARPU_MATRIX_GET_NX(descr[2]); + unsigned nyC = STARPU_MATRIX_GET_NY(descr[2]); + unsigned nyA = STARPU_MATRIX_GET_NY(descr[0]); + + unsigned ldA = STARPU_MATRIX_GET_LD(descr[0]); + unsigned ldB = STARPU_MATRIX_GET_LD(descr[1]); + unsigned ldC = STARPU_MATRIX_GET_LD(descr[2]); + + cudaStream_t stream = starpu_cuda_get_local_stream(); + + if (nxC == ldC) + cudaMemsetAsync(subC, 0, sizeof(*subC) * nxC * nyC, stream); + else + { + unsigned i; + for (i = 0; i < nyC; i++) + cudaMemsetAsync(subC + i*ldC, 0, sizeof(*subC) * nxC, stream); + } + + cublasStatus_t status = CUBLAS_GEMM(starpu_cublas_get_local_handle(), + CUBLAS_OP_N, CUBLAS_OP_N, + nxC, nyC, nyA, + &p1_cuda, subA, ldA, subB, ldB, + beta, subC, ldC); + if (status != CUBLAS_STATUS_SUCCESS) + STARPU_CUBLAS_REPORT_ERROR(status); +} +#endif + +#ifdef STARPU_USE_CUDA +static void cublas_gemm2d(void *descr[], void *arg) +{ + cublas_mult2d(descr, arg, &v0_cuda); +} +#endif + +#ifdef STARPU_HAVE_BLAS +void cpu_mult2d(void *descr[], void *arg, TYPE beta) +{ + (void)arg; + TYPE *subA = (TYPE *)STARPU_MATRIX_GET_PTR(descr[0]); + TYPE *subB = (TYPE *)STARPU_MATRIX_GET_PTR(descr[1]); + + unsigned nxC = STARPU_MATRIX_GET_NY(descr[1]); + unsigned nyC = STARPU_MATRIX_GET_NX(descr[0]); + unsigned nyA = STARPU_MATRIX_GET_NY(descr[0]); + + unsigned ldA = STARPU_MATRIX_GET_LD(descr[0]); + unsigned ldB = STARPU_MATRIX_GET_LD(descr[1]); + + unsigned ldC = nxC; + + TYPE subC[nxC*nyC]; + + int worker_size = starpu_combined_worker_get_size(); + + if (worker_size == 1) + { + /* Sequential CPU task */ + CPU_GEMM("N", "N", nxC, nyC, nyA, (TYPE)1.0, subA, ldA, subB, ldB, beta, subC, ldC); + } + else + { + /* Parallel CPU task */ + unsigned rank = starpu_combined_worker_get_rank(); + + unsigned block_size = (nyC + worker_size - 1)/worker_size; + unsigned new_nyC = STARPU_MIN(nyC, block_size*(rank+1)) - block_size*rank; + + STARPU_ASSERT(nyC == STARPU_MATRIX_GET_NY(descr[1])); + + TYPE *new_subB = &subB[block_size*rank]; + TYPE *new_subC = &subC[block_size*rank]; + + CPU_GEMM("N", "N", nxC, new_nyC, nyA, (TYPE)1.0, subA, ldA, new_subB, ldB, beta, new_subC, ldC); + } +} +#endif + +#ifdef STARPU_HAVE_BLAS +void cpu_mult(void *descr[], void *arg, TYPE beta) +{ + (void)arg; + TYPE *subA = (TYPE *)STARPU_MATRIX_GET_PTR(descr[0]); + TYPE *subB = (TYPE *)STARPU_MATRIX_GET_PTR(descr[1]); + TYPE *subC = (TYPE *)STARPU_MATRIX_GET_PTR(descr[2]); + + unsigned nxC = STARPU_MATRIX_GET_NX(descr[2]); + unsigned nyC = STARPU_MATRIX_GET_NY(descr[2]); + unsigned nyA = STARPU_MATRIX_GET_NY(descr[0]); + + unsigned ldA = STARPU_MATRIX_GET_LD(descr[0]); + unsigned ldB = STARPU_MATRIX_GET_LD(descr[1]); + unsigned ldC = STARPU_MATRIX_GET_LD(descr[2]); + + int worker_size = starpu_combined_worker_get_size(); + + if (nxC == ldC) + memset(subC, 0, sizeof(*subC) * nxC * nyC); + else + { + unsigned i; + for (i = 0; i < nyC; i++) + memset(subC + i*ldC, 0, sizeof(*subC) * nxC); + } + + if (worker_size == 1) + { + /* Sequential CPU task */ + CPU_GEMM("N", "N", nxC, nyC, nyA, (TYPE)1.0, subA, ldA, subB, ldB, beta, subC, ldC); + } + else + { + /* Parallel CPU task */ + unsigned rank = starpu_combined_worker_get_rank(); + + unsigned block_size = (nyC + worker_size - 1)/worker_size; + unsigned new_nyC = STARPU_MIN(nyC, block_size*(rank+1)) - block_size*rank; + + STARPU_ASSERT(nyC == STARPU_MATRIX_GET_NY(descr[1])); + + TYPE *new_subB = &subB[block_size*rank]; + TYPE *new_subC = &subC[block_size*rank]; + + CPU_GEMM("N", "N", nxC, new_nyC, nyA, (TYPE)1.0, subA, ldA, new_subB, ldB, beta, new_subC, ldC); + } +} +#endif + +#ifdef STARPU_HAVE_BLAS +void cpu_gemm2d(void *descr[], void *arg) +{ + cpu_mult2d(descr, arg, 0.); +} +#endif + +/* Codelet for 2D matrix */ +static struct starpu_codelet cl_gemm2d = +{ +#ifdef STARPU_HAVE_BLAS + .type = STARPU_SEQ, /* changed to STARPU_SPMD if -spmd is passed */ + .max_parallelism = INT_MAX, + .cpu_funcs = {cpu_gemm2d}, + .cpu_funcs_name = {"cpu_gemm2d"}, +#endif +#ifdef STARPU_USE_CUDA + .cuda_funcs = {cublas_gemm2d}, +#elif defined(STARPU_SIMGRID) + .cuda_funcs = {(void*)1}, +#endif + .cuda_flags = {STARPU_CUDA_ASYNC}, + .nbuffers = 2, + .modes = {STARPU_R, STARPU_R}, + .model = &starpu_gemm_model +}; + +/* Codelet for 3D matrix z = 0 */ +static struct starpu_codelet cl_gemm0 = +{ +#ifdef STARPU_HAVE_BLAS + .type = STARPU_SEQ, /* changed to STARPU_SPMD if -spmd is passed */ + .max_parallelism = INT_MAX, + .cpu_funcs = {cpu_gemm0}, + .cpu_funcs_name = {"cpu_gemm0"}, +#endif +#ifdef STARPU_USE_CUDA + .cuda_funcs = {cublas_gemm0}, +#elif defined(STARPU_USE_HIP) && defined(STARPU_USE_HIPBLAS) + .hip_funcs = {hipblas_gemm0}, +#elif defined(STARPU_SIMGRID) + .cuda_funcs = {(void*)1}, +#endif + .cuda_flags = {STARPU_CUDA_ASYNC}, + .hip_flags = {STARPU_HIP_ASYNC}, + .nbuffers = 3, + .modes = {STARPU_R, STARPU_R, STARPU_R}, + .model = &starpu_gemm_model +}; + +/* Codelet for 3D matrix z = 1, 2, 3 */ +static struct starpu_codelet cl_gemm = +{ +#ifdef STARPU_HAVE_BLAS + .type = STARPU_SEQ, /* changed to STARPU_SPMD if -spmd is passed */ + .max_parallelism = INT_MAX, + .cpu_funcs = {cpu_gemm}, + .cpu_funcs_name = {"cpu_gemm"}, +#endif +#ifdef STARPU_USE_CUDA + .cuda_funcs = {cublas_gemm}, +#elif defined(STARPU_USE_HIP) && defined(STARPU_USE_HIPBLAS) + .hip_funcs = {hipblas_gemm}, +#elif defined(STARPU_SIMGRID) + .cuda_funcs = {(void*)1}, +#endif + .cuda_flags = {STARPU_CUDA_ASYNC}, + .hip_flags = {STARPU_HIP_ASYNC}, + .nbuffers = 3, + .modes = {STARPU_R, STARPU_R, STARPU_REDUX}, + .model = &starpu_gemm_model +}; + +/** + INVALIDATE_C_TILE Pour choisir de mettre ou non les RW dans les codelets gemm en 3D. + To randomize tasks or their order RANDOM_TASK_ORDER (only for 2D matrix) + RECURSIVE_MATRIX_LAYOUT (only for 2D matrix) + RANDOM_DATA_ACCESS (only for 2D matrix) + COUNT_DO_SCHEDULE do schedule for HFP pris en compte ou non + SPARSE_MATRIX 0 by default. Something else than 0 correspond to the percentage of chance of a task to be created. So SPARSE_MATRIX=10 means you a 10% of the tasks (on average). Fix STARPU_RAND_SEED if you want to have similar results among different schedulers! +*/ +static void parse_args(int argc, char **argv) +{ + int i; + int size_set = 0; + + for (i = 1; i < argc; i++) + { + if (strcmp(argv[i], "-3d") == 0) + { + tiled = 1; + } + + else if (strcmp(argv[i], "-nblocks") == 0) + { + char *argptr; + nslicesx = strtol(argv[++i], &argptr, 10); + nslicesy = nslicesx; + nslicesz = nslicesx; + if (nslicesx == 0) + { + fprintf(stderr, "the number of blocks in X cannot be 0!\n"); + exit(EXIT_FAILURE); + } + } + + else if (strcmp(argv[i], "-nblocksx") == 0) + { + char *argptr; + nslicesx = strtol(argv[++i], &argptr, 10); + if (nslicesx == 0) + { + fprintf(stderr, "the number of blocks in X cannot be 0!\n"); + exit(EXIT_FAILURE); + } + } + + else if (strcmp(argv[i], "-nblocksy") == 0) + { + char *argptr; + nslicesy = strtol(argv[++i], &argptr, 10); + if (nslicesy == 0) + { + fprintf(stderr, "the number of blocks in Y cannot be 0!\n"); + exit(EXIT_FAILURE); + } + } + + else if (strcmp(argv[i], "-nblocksz") == 0) + { + char *argptr; + nslicesz = strtol(argv[++i], &argptr, 10); + if (nslicesz == 0) + { + fprintf(stderr, "the number of blocks in Z cannot be 0!\n"); + exit(EXIT_FAILURE); + } + } + + else if (strcmp(argv[i], "-x") == 0) + { + char *argptr; + xdim = strtol(argv[++i], &argptr, 10); + if (xdim == 0) + { + fprintf(stderr, "the X dimension cannot be 0!\n"); + exit(EXIT_FAILURE); + } + size_set = 1; + } + + else if (strcmp(argv[i], "-xy") == 0) + { + char *argptr; + xdim = ydim = strtol(argv[++i], &argptr, 10); + if (xdim == 0) + { + fprintf(stderr, "the XY dimensions cannot be 0!\n"); + exit(EXIT_FAILURE); + } + size_set = 1; + } + + else if (strcmp(argv[i], "-xyz") == 0) + { + char *argptr; + xdim = ydim = zdim = strtol(argv[++i], &argptr, 10); + size_set = 1; + } + + else if (strcmp(argv[i], "-xyz") == 0) + { + char *argptr; + xdim = ydim = zdim = strtol(argv[++i], &argptr, 10); + } + + else if (strcmp(argv[i], "-y") == 0) + { + char *argptr; + ydim = strtol(argv[++i], &argptr, 10); + if (ydim == 0) + { + fprintf(stderr, "the Y dimension cannot be 0!\n"); + exit(EXIT_FAILURE); + } + size_set = 1; + } + + else if (strcmp(argv[i], "-z") == 0) + { + char *argptr; + zdim = strtol(argv[++i], &argptr, 10); + if (zdim == 0) + { + fprintf(stderr, "the Z dimension cannot be 0!\n"); + exit(EXIT_FAILURE); + } + size_set = 1; + } + + else if (strcmp(argv[i], "-size") == 0) + { + char *argptr; + xdim = ydim = zdim = strtol(argv[++i], &argptr, 10); + if (xdim == 0) + { + fprintf(stderr, "the size cannot be 0!\n"); + exit(EXIT_FAILURE); + } + size_set = 1; + } + + else if (strcmp(argv[i], "-iter") == 0) + { + char *argptr; + niter = strtol(argv[++i], &argptr, 10); + if (niter == 0) + { + fprintf(stderr, "the number of iterations cannot be 0!\n"); + exit(EXIT_FAILURE); + } + } + + else if (strcmp(argv[i], "-nsleeps") == 0) + { + char *argptr; + nsleeps = strtol(argv[++i], &argptr, 10); + } + + else if (strcmp(argv[i], "-bound") == 0) + { + bound = 1; + } + + else if (strcmp(argv[i], "-invalidate-c-tile") == 0) + { + invalidate_c_tile = 1; + } + + else if (strcmp(argv[i], "-random-task-order") == 0) + { + random_task_order = 1; + } + + else if (strcmp(argv[i], "-random-data-access") == 0) + { + random_data_access = 1; + } + + else if (strcmp(argv[i], "-recursive-matrix-layout") == 0) + { + recursive_matrix_layout = 1; + } + + else if (strcmp(argv[i], "-no-count-do-schedule") == 0) + { + count_do_schedule = 0; + } + + else if (strcmp(argv[i], "-sparse-matrix") == 0) + { + char *argptr; + sparse_matrix = strtol(argv[++i], &argptr, 10); + if (sparse_matrix > 100) + { + fprintf(stderr, "incorrect value %u for sparse-matrix parameter!\n", sparse_matrix); + exit(EXIT_FAILURE); + } + if (sparse_matrix != 0) + { + chance_to_be_created = sparse_matrix; + } + } + + else if (strcmp(argv[i], "-hostname") == 0) + { + print_hostname = 1; + } + + else if (strcmp(argv[i], "-check") == 0) + { + check = 1; + } + + else if (strcmp(argv[i], "-spmd") == 0) + { + cl_gemm0.type = STARPU_SPMD; + } + + else if (strcmp(argv[i], "-help") == 0 || strcmp(argv[i], "--help") == 0 || strcmp(argv[i], "-h") == 0) + { + fprintf(stderr,"Usage: %s [-3d] [-nblocks n] [-nblocksx x] [-nblocksy y] [-nblocksz z] [-x x] [-y y] [-xy n] [-z z] [-xyz n] [-size size] [-iter iter] [-bound] [-check] [-spmd] [-hostname] [-nsleeps nsleeps]\n", argv[0]); + if (tiled) + fprintf(stderr,"Currently selected: %ux%u * %ux%u and %ux%ux%u blocks (size %ux%u length %u), %u iterations, %u sleeps\n", zdim, ydim, xdim, zdim, nslicesx, nslicesy, nslicesz, xdim / nslicesx, ydim / nslicesy, zdim / nslicesz, niter, nsleeps); + else + fprintf(stderr,"Currently selected: %ux%u * %ux%u and %ux%u blocks (size %ux%u length %u), %u iterations, %u sleeps\n", zdim, ydim, xdim, zdim, nslicesx, nslicesy, xdim / nslicesx, ydim / nslicesy, zdim, niter, nsleeps); + exit(EXIT_SUCCESS); + } + else + { + fprintf(stderr,"Unrecognized option %s\n", argv[i]); + exit(EXIT_FAILURE); + } + } + +#ifndef STARPU_SIMGRID + if (check && !size_set) + { + /* Check is sequential, reduce its default duration */ + xdim /= 2; + ydim /= 2; + } +#endif + +#ifdef STARPU_QUICK_CHECK + niter /= 10; + if(niter==0) + niter=1; +#endif + +} + +#define check_evicted(main_handle, i1, i2) do { \ + if (index++ < next_evicted) \ + continue; \ + int is_allocated; \ + starpu_data_handle_t sub_handle = starpu_data_get_sub_data(main_handle, 2, i1, i2); \ + starpu_data_query_status(sub_handle, node, &is_allocated, NULL, NULL); \ + if (is_allocated && starpu_data_can_evict(sub_handle, node, is_prefetch)) \ + { \ + next_evicted = index; \ + FPRINTF(stderr,"evicting %p\n", sub_handle); \ + return sub_handle; \ + } \ +} while(0) + +/* Don't do this at home, kids, this is really dumb! */ +starpu_data_handle_t dumb_victim_selector(starpu_data_handle_t *toload, unsigned node, enum starpu_is_prefetch is_prefetch) +{ + static unsigned next_evicted; // index of next data to evict, to avoid getting stuck. Yes this is awful. + unsigned index = 0; + + if (tiled) + { + if (next_evicted == nslicesy*nslicesz + nslicesx+nslicesz + nslicesx*nslicesy) + next_evicted = 0; + + unsigned x, y, z; + for (y = 0; y < nslicesy; y++) + for (z = 0; z < nslicesz; z++) + check_evicted(A_handle, z, y); + + for (x = 0; x < nslicesx; x++) + for (z = 0; z < nslicesz; z++) + check_evicted(B_handle, x, z); + + for (x = 0; x < nslicesx; x++) + for (y = 0; y < nslicesy; y++) + check_evicted(C_handle, x, y); + } + else + { + if (next_evicted == 3*nslicesx*nslicesy) + next_evicted = 0; + + unsigned x, y; + for (x = 0; x < nslicesx; x++) + for (y = 0; y < nslicesy; y++) + check_evicted(A_handle, 1, y); + + for (x = 0; x < nslicesx; x++) + for (y = 0; y < nslicesy; y++) + check_evicted(B_handle, 1, x); + + for (x = 0; x < nslicesx; x++) + for (y = 0; y < nslicesy; y++) + check_evicted(C_handle, x, y); + } + + FPRINTF(stderr,"uh, no evictable data\n"); + next_evicted = 0; + return NULL; +} + +int data_evict_from_non_cpus(starpu_data_handle_t handle) +{ + int global_ret=0; + unsigned nodeid; + for (nodeid = 0; nodeid < starpu_memory_nodes_get_count(); nodeid++) + { + if (starpu_node_get_kind(nodeid) != STARPU_CPU_RAM) + { + int ret = starpu_data_evict_from_node(handle, nodeid); + if (ret != 0) + global_ret = ret; + } + } + return global_ret; +} + +#define SCHEDULE_WAIT() do { \ + if (count_do_schedule == 0) \ + { \ + starpu_do_schedule(); \ + start = starpu_timing_now(); \ + starpu_resume(); \ + starpu_task_wait_for_all(); \ + end = starpu_timing_now(); \ + } \ + else \ + { \ + start = starpu_timing_now(); \ + starpu_do_schedule(); \ + starpu_resume(); \ + starpu_task_wait_for_all(); \ + end = starpu_timing_now(); \ + }} while(0) + +static int run_data(void) +{ + PRINTF("# "); + if (print_hostname) + PRINTF("node\t"); + PRINTF("x\ty\tz\tms\tGFlops\tDeviance"); + if (bound) + PRINTF("\tTms\tTGFlops\tTims\tTiGFlops\tTDeviance"); + PRINTF("\n"); + + starpu_seed(0); + + unsigned sleeps; + for(sleeps = 0; sleeps < nsleeps; sleeps++) + { + if (bound) + starpu_bound_start(0, 0); + + starpu_fxt_start_profiling(); + double start, end; + //start = starpu_timing_now(); /* Moved before starpu_resume so we don't start time during scheduling */ + double timing = 0; + double timing_square = 0; + + /* Matrice 3D */ + if (tiled) + { + unsigned iter; + for (iter = 0; iter < niter; iter++) + { + starpu_pause(); /* To get all tasks at once */ + unsigned x,y; + for (x = 0; x < nslicesx; x++) + for (y = 0; y < nslicesy; y++) + { + starpu_data_handle_t Ctile = starpu_data_get_sub_data(C_handle, 2, x, y); + if (invalidate_c_tile == 1) + { + starpu_data_invalidate(Ctile); /* Modifie les perfs pour DMDAR, à N>35 cela plombe ces performances au niveau de EAGER. La raison est l'allocation. */ + } + unsigned z; + for (z = 0; z < nslicesz; z++) + { + /* Ajout pour sparse matrix. */ + if (random()%100 < chance_to_be_created) + { + struct starpu_codelet *cl; + cl = (z == 0) ? &cl_gemm0 : &cl_gemm; + int ret = starpu_task_insert(cl, + cl->modes[0], starpu_data_get_sub_data(A_handle, 2, z, y), + cl->modes[1], starpu_data_get_sub_data(B_handle, 2, x, z), + cl->modes[2], Ctile, + STARPU_FLOPS, (double) (2ULL * (xdim/nslicesx) * (ydim/nslicesy) * (zdim/nslicesz)), + 0); + if (ret == -ENODEV) + { + check = 0; + starpu_resume(); + return 77; + } + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + } + } + starpu_data_wont_use(Ctile); + } + + SCHEDULE_WAIT(); + + if (niter > 1) + { + if (iter != 0) + { + timing += end - start; + timing_square += (end-start) * (end-start); + } + + for (x = 0; x < nslicesx; x++) + for (y = 0; y < nslicesy; y++) + { + data_evict_from_non_cpus(starpu_data_get_sub_data(C_handle, 2, x, y)); + + unsigned z; + for (z = 0; z < nslicesz; z++) + { + data_evict_from_non_cpus(starpu_data_get_sub_data(A_handle, 2, z, y)); + data_evict_from_non_cpus(starpu_data_get_sub_data(B_handle, 2, x, z)); + } + } + } + else + { + timing = end - start; + } + } + } + else if (random_task_order == 1 && recursive_matrix_layout == 0 && random_data_access == 0) + { + /* Randomize the order in which task are sent, but the tasks are the same */ + unsigned tab_x[nslicesx][nslicesx]; + unsigned tab_y[nslicesy][nslicesy]; + unsigned iter; + for (iter = 0; iter < niter; iter++) + { + unsigned i, j; + for (i=0; i < nslicesx; i++) + for (j = 0; j < nslicesx; j++) + tab_x[i][j] = i; + for (i=0; i < nslicesy; i++) + for (j = 0; j < nslicesy; j++) + tab_y[i][j] = j; + + //Shuffle + for(i=0; i 1) + { + if (iter != 0) + { + timing += end - start; + timing_square += (end-start) * (end-start); + } + + for (i = 0; i < nslicesx; i++) + for (j = 0; j < nslicesy; j++) + { + data_evict_from_non_cpus(starpu_data_get_sub_data(A_handle, 1, j)); + data_evict_from_non_cpus(starpu_data_get_sub_data(B_handle, 1, i)); + } + } + else + { + timing = end - start; + } + } + //End if RANDOM_TASK_ORDER == 1 + } + else if (recursive_matrix_layout == 1 && random_data_access == 0) + { + /* Tasks arrive in a "Z-order" */ + unsigned tab_x[nslicesx][nslicesx]; + unsigned tab_y[nslicesy][nslicesy]; + unsigned iter; + for (iter = 0; iter < niter; iter++) + { + unsigned i, j; + for (i= 0; i < nslicesx; i++) + for (j = 0; j < nslicesx; j++) + tab_x[i][j] = i; + for (i= 0; i < nslicesy; i++) + for (j = 0; j < nslicesy; j++) + tab_y[i][j] = j; + + for (i= 0; i < nslicesx; i++) + { + int x_z_layout, x_z_layout_i; + int i_bis = 0; + for (j = 0; j < nslicesx; j++) + { + if (i_bis%2 == 1) + { + x_z_layout_i = nslicesx/2; + } + if (j >= 4) + { + x_z_layout = (j/4)*2; + } + tab_x[i][j] = j%2 + x_z_layout + x_z_layout_i; + } + x_z_layout = 0; + x_z_layout_i = 0; + if (i%2 == 1) + { + i_bis++; + } + } + + for (i= 0; i < nslicesy; i++) + { + int y_z_layout_i = 0; int i_bis = 0; int y_z_layout = 0; + for (j = 0; j < nslicesy; j++) + { + int j_bis = 0; + if (i >= 4) + { + y_z_layout_i = 4*(i/4); + } + if (j_bis%2 == 1) + { + y_z_layout = 1; + } + if (i%2 == 1) + { + y_z_layout += 2; + } + tab_y[i][j] = y_z_layout + y_z_layout_i; + if (j%2 == 1) + { + j_bis++; + } + y_z_layout = 0; + y_z_layout_i = 0; + } + y_z_layout = 0; + if (i%2 == 1) + { + i_bis++; + } + } + + starpu_pause(); + for (i = 0; i < nslicesx; i++) + { + for (j = 0; j < nslicesy; j++) + { + if (random()%100 < chance_to_be_created) + { + int ret = starpu_task_insert(&cl_gemm2d, + cl_gemm2d.modes[0], starpu_data_get_sub_data(A_handle, 1, tab_y[i][j]), + cl_gemm2d.modes[1], starpu_data_get_sub_data(B_handle, 1, tab_x[i][j]), + cl_gemm2d.modes[2], starpu_data_get_sub_data(C_handle, 2, tab_x[i][j], tab_y[i][j]), + STARPU_FLOPS, (double) (2ULL * (xdim/nslicesx) * (ydim/nslicesy) * zdim), + 0); + if (ret == -ENODEV) + { + starpu_resume(); + return 77; + } + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + starpu_data_invalidate_submit(starpu_data_get_sub_data(C_handle, 2, tab_x[i][j], tab_y[i][j])); + } + } + } + + SCHEDULE_WAIT(); + + if (iter != 0) + { + timing += end - start; + timing_square += (end-start) * (end-start); + } + } + //End If RECURSIVE_MATRIX_LAYOUT == 1 + } + /* This is the random 2D matrix operation we use */ + else if (random_data_access == 1) + { + /* Each task takes as data a random line and a random column from A and B */ + unsigned iter; + for (iter = 0; iter < niter; iter++) + { + starpu_pause(); + unsigned x, y; + for (x = 0; x < nslicesx; x++) + for (y = 0; y < nslicesy; y++) + { + if (random()%100 < chance_to_be_created) + { + int ret = starpu_task_insert(&cl_gemm2d, + cl_gemm2d.modes[0], starpu_data_get_sub_data(A_handle, 1, random()%nslicesy), + cl_gemm2d.modes[1], starpu_data_get_sub_data(B_handle, 1, random()%nslicesx), + cl_gemm2d.modes[2], starpu_data_get_sub_data(C_handle, 2, x, y), + STARPU_FLOPS, (double) (2ULL * (xdim/nslicesx) * (ydim/nslicesy) * zdim), + 0); + if (ret == -ENODEV) + { + starpu_resume(); + return 77; + } + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + starpu_data_invalidate_submit(starpu_data_get_sub_data(C_handle, 2, x, y)); + } + } + + SCHEDULE_WAIT(); + + /* If I have more than 1 iteration I want the mean timing, else I don't */ + if (niter > 1) + { + if (iter != 0) + { + timing += end - start; + timing_square += (end-start) * (end-start); + } + + for (x = 0; x < nslicesx; x++) + for (y = 0; y < nslicesy; y++) + { + data_evict_from_non_cpus(starpu_data_get_sub_data(A_handle, 1, y)); + data_evict_from_non_cpus(starpu_data_get_sub_data(B_handle, 1, x)); + } + } + else + { + timing = end - start; + } + } + } + else + { + /* Normal execution of xgemm */ + unsigned iter; + for (iter = 0; iter < niter; iter++) + { + starpu_pause(); + unsigned x,y; + for (x = 0; x < nslicesx; x++) + for (y = 0; y < nslicesy; y++) + { + if (random()%100 < chance_to_be_created) + { + int ret = starpu_task_insert(&cl_gemm2d, + cl_gemm2d.modes[0], starpu_data_get_sub_data(A_handle, 1, y), + cl_gemm2d.modes[1], starpu_data_get_sub_data(B_handle, 1, x), + cl_gemm2d.modes[2], starpu_data_get_sub_data(C_handle, 2, x, y), + STARPU_FLOPS, (double) (2ULL * (xdim/nslicesx) * (ydim/nslicesy) * zdim), + 0); + if (ret == -ENODEV) + { + starpu_resume(); + return 77; + } + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + starpu_data_invalidate_submit(starpu_data_get_sub_data(C_handle, 2, x, y)); + } + } + + SCHEDULE_WAIT(); + + if (niter > 1) + { + if (iter != 0) + { + timing += end - start; + timing_square += (end-start) * (end-start); + } + + for (x = 0; x < nslicesx; x++) + for (y = 0; y < nslicesy; y++) + { + data_evict_from_non_cpus(starpu_data_get_sub_data(A_handle, 1, y)); + data_evict_from_non_cpus(starpu_data_get_sub_data(B_handle, 1, x)); + } + } + else + { + timing = end - start; + } + } + /* End of normal execution of 2D matrix. */ + } + + starpu_fxt_stop_profiling(); + + if (bound) + starpu_bound_stop(); + + double min, min_int; + if (bound) + starpu_bound_compute(&min, &min_int, 1); + + if (print_hostname) + { + char hostname[255]; + gethostname(hostname, 255); + PRINTF("%s\t", hostname); + } + + /* Don't count first iteration */ + niter--; + if (niter+1 > 1) /* We also print the deviance */ + { + double flops = 2.0 * ((unsigned long long)(niter)) * ((unsigned long long)xdim) * ((unsigned long long)ydim) * ((unsigned long long)zdim); + /* Cas sparse je divise les flops */ + if (sparse_matrix != 0) + { + flops = (flops*sparse_matrix)/100; + } + double average = timing/niter; + double deviation = sqrt(fabs(timing_square / niter - average*average)); + PRINTF("%u\t%u\t%u\t%.0f\t%.1f\t%f", xdim, ydim, zdim, timing/niter/1000.0, flops/timing/1000.0, flops/niter/(average*average)*deviation/1000.0); + if (bound) + PRINTF("\t%.0f\t%.1f\t%.0f\t%.1f\t%f", min, flops/min/1000000.0, min_int, flops/min_int/1000000.0, flops/niter/(average*average)*deviation/1000.0); + PRINTF("\n"); + } + else /* We don't */ + { + double flops = 2.0 * ((unsigned long long)(niter+1)) * ((unsigned long long)xdim) * ((unsigned long long)ydim) * ((unsigned long long)zdim); + PRINTF("%u\t%u\t%u\t%.0f\t%.1f\t%f", xdim, ydim, zdim, timing/(niter+1)/1000.0, flops/timing/1000.0, 0.0); + if (bound) + PRINTF("\t%.0f\t%.1f\t%.0f\t%.1f\t%f", min, flops/min/1000000.0, min_int, flops/min_int/1000000.0, 0.0); + PRINTF("\n"); + } + + if (sleeps < nsleeps-1) + { + sleep(10); + } + } + return 0; +} diff --git a/examples/native_fortran/Makefile_nf_dynbuf.mk b/examples/native_fortran/Makefile_nf_dynbuf.mk new file mode 100644 index 0000000..151d6b7 --- /dev/null +++ b/examples/native_fortran/Makefile_nf_dynbuf.mk @@ -0,0 +1,49 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# Copyright (C) 2015-2015 ONERA +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +PROG = nf_dynbuf + +STARPU_VERSION=1.3 +FSTARPU_MOD = $(shell pkg-config --variable=starpu_includedir starpu-$(STARPU_VERSION))/fstarpu_mod.f90 + +SRCSF = nf_dynbuf_cl.f90 \ + nf_dynbuf.f90 + +FC = gfortran + +FCFLAGS = -fdefault-real-8 -J. -g +LDLIBS = $(shell pkg-config --libs starpu-$(STARPU_VERSION)) + +OBJS = fstarpu_mod.o $(SRCSF:%.f90=%.o) + +.phony: all clean +all: $(PROG) + +$(PROG): $(OBJS) + $(FC) $(LDFLAGS) -o $@ $^ $(LDLIBS) + +fstarpu_mod.o: $(FSTARPU_MOD) + $(FC) $(FCFLAGS) -c -o $@ $< + +%.o: %.f90 + $(FC) $(FCFLAGS) -c -o $@ $< + +clean: + rm -fv *.o *.mod $(PROG) + +# modfiles generation dependences +nf_dynbuf_cl.o: nf_dynbuf_cl.f90 fstarpu_mod.o +nf_dynbuf.o: nf_dynbuf.f90 nf_types.o fstarpu_mod.o diff --git a/examples/native_fortran/Makefile_nf_example.mk b/examples/native_fortran/Makefile_nf_example.mk new file mode 100644 index 0000000..7bf6963 --- /dev/null +++ b/examples/native_fortran/Makefile_nf_example.mk @@ -0,0 +1,50 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# Copyright (C) 2015-2015 ONERA +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +PROG = nf_example + +STARPU_VERSION=1.3 +FSTARPU_MOD = $(shell pkg-config --variable=starpu_includedir starpu-$(STARPU_VERSION))/fstarpu_mod.f90 + +SRCSF = nf_types.f90 \ + nf_compute.f90 \ + nf_example.f90 + +FC = gfortran + +FCFLAGS = -fdefault-real-8 -J. -g +LDLIBS = $(shell pkg-config --libs starpu-$(STARPU_VERSION)) + +OBJS = fstarpu_mod.o $(SRCSF:%.f90=%.o) + +.phony: all clean +all: $(PROG) + +$(PROG): $(OBJS) + $(FC) $(LDFLAGS) -o $@ $^ $(LDLIBS) + +fstarpu_mod.o: $(FSTARPU_MOD) + $(FC) $(FCFLAGS) -c -o $@ $< + +%.o: %.f90 + $(FC) $(FCFLAGS) -c -o $@ $< + +clean: + rm -fv *.o *.mod $(PROG) + +# modfiles generation dependences +nf_compute.o: nf_compute.f90 nf_types.o fstarpu_mod.o +nf_example.o: nf_example.f90 nf_types.o nf_compute.o fstarpu_mod.o diff --git a/examples/native_fortran/Makefile_nf_matrix.mk b/examples/native_fortran/Makefile_nf_matrix.mk new file mode 100644 index 0000000..9a4a408 --- /dev/null +++ b/examples/native_fortran/Makefile_nf_matrix.mk @@ -0,0 +1,52 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +PROG = nf_matrix + +STARPU_VERSION=1.3 +FSTARPU_MOD = $(shell pkg-config --variable=starpu_includedir starpu-$(STARPU_VERSION))/fstarpu_mod.f90 + +SRCSF = nf_matrix.f90 \ + nf_codelets.f90 + +FC = gfortran +CC = gcc + +CFLAGS = -g $(shell pkg-config --cflags starpu-$(STARPU_VERSION)) +FCFLAGS = -fdefault-real-8 -J. -g +LDLIBS = $(shell pkg-config --libs starpu-$(STARPU_VERSION)) + +OBJS = $(SRCSC:%.c=%.o) fstarpu_mod.o $(SRCSF:%.f90=%.o) + +.phony: all clean +all: $(PROG) + +$(PROG): $(OBJS) + $(FC) $(LDFLAGS) -o $@ $^ $(LDLIBS) + +%.o: %.c + $(CC) $(CFLAGS) -c -o $@ $< + +fstarpu_mod.o: $(FSTARPU_MOD) + $(FC) $(FCFLAGS) -c -o $@ $< + +%.o: %.f90 + $(FC) $(FCFLAGS) -c -o $@ $< + +clean: + rm -fv *.o *.mod $(PROG) + +nf_matrix.o: nf_matrix.f90 nf_codelets.o fstarpu_mod.o +nf_codelets.o: fstarpu_mod.o diff --git a/examples/native_fortran/Makefile_nf_partition.mk b/examples/native_fortran/Makefile_nf_partition.mk new file mode 100644 index 0000000..1ca43ca --- /dev/null +++ b/examples/native_fortran/Makefile_nf_partition.mk @@ -0,0 +1,52 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +PROG = nf_partition + +STARPU_VERSION=1.3 +FSTARPU_MOD = $(shell pkg-config --variable=starpu_includedir starpu-$(STARPU_VERSION))/fstarpu_mod.f90 + +SRCSF = nf_partition_cl.f90 \ + nf_partition.f90 + +FC = gfortran +CC = gcc + +CFLAGS = -g $(shell pkg-config --cflags starpu-$(STARPU_VERSION)) +FCFLAGS = -fdefault-real-8 -J. -g +LDLIBS = $(shell pkg-config --libs starpu-$(STARPU_VERSION)) + +OBJS = $(SRCSC:%.c=%.o) fstarpu_mod.o $(SRCSF:%.f90=%.o) + +.phony: all clean +all: $(PROG) + +$(PROG): $(OBJS) + $(FC) $(LDFLAGS) -o $@ $^ $(LDLIBS) + +%.o: %.c + $(CC) $(CFLAGS) -c -o $@ $< + +fstarpu_mod.o: $(FSTARPU_MOD) + $(FC) $(FCFLAGS) -c -o $@ $< + +%.o: %.f90 + $(FC) $(FCFLAGS) -c -o $@ $< + +clean: + rm -fv *.o *.mod $(PROG) + +nf_parition_cl.o: nf_partition_cl.f90 fstarpu_mod.o +nf_partition.o: nf_partition.f90 nf_parition_cl.o fstarpu_mod.o diff --git a/examples/native_fortran/Makefile_nf_sched_ctx.mk b/examples/native_fortran/Makefile_nf_sched_ctx.mk new file mode 100644 index 0000000..0adfc3f --- /dev/null +++ b/examples/native_fortran/Makefile_nf_sched_ctx.mk @@ -0,0 +1,48 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +PROG = nf_sched_ctx + +STARPU_VERSION=1.3 +FSTARPU_MOD = $(shell pkg-config --variable=starpu_includedir starpu-$(STARPU_VERSION))/fstarpu_mod.f90 + +SRCSF = nf_sched_ctx_cl.f90 \ + nf_sched_ctx.f90 + +FC = gfortran + +FCFLAGS = -fdefault-real-8 -J. -g +LDLIBS = $(shell pkg-config --libs starpu-$(STARPU_VERSION)) + +OBJS = fstarpu_mod.o $(SRCSF:%.f90=%.o) + +.phony: all clean +all: $(PROG) + +$(PROG): $(OBJS) + $(FC) $(LDFLAGS) -o $@ $^ $(LDLIBS) + +fstarpu_mod.o: $(FSTARPU_MOD) + $(FC) $(FCFLAGS) -c -o $@ $< + +%.o: %.f90 + $(FC) $(FCFLAGS) -c -o $@ $< + +clean: + rm -fv *.o *.mod $(PROG) + +# modfiles generation dependences +nf_sched_ctx_cl.o: nf_sched_ctx_cl.f90 fstarpu_mod.o +nf_sched_ctx.o: nf_sched_ctx.f90 fstarpu_mod.o diff --git a/examples/native_fortran/Makefile_nf_varbuf.mk b/examples/native_fortran/Makefile_nf_varbuf.mk new file mode 100644 index 0000000..32faed4 --- /dev/null +++ b/examples/native_fortran/Makefile_nf_varbuf.mk @@ -0,0 +1,48 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +PROG = nf_varbuf + +STARPU_VERSION=1.3 +FSTARPU_MOD = $(shell pkg-config --variable=starpu_includedir starpu-$(STARPU_VERSION))/fstarpu_mod.f90 + +SRCSF = nf_varbuf_cl.f90 \ + nf_varbuf.f90 + +FC = gfortran + +FCFLAGS = -fdefault-real-8 -J. -g +LDLIBS = $(shell pkg-config --libs starpu-$(STARPU_VERSION)) + +OBJS = fstarpu_mod.o $(SRCSF:%.f90=%.o) + +.phony: all clean +all: $(PROG) + +$(PROG): $(OBJS) + $(FC) $(LDFLAGS) -o $@ $^ $(LDLIBS) + +fstarpu_mod.o: $(FSTARPU_MOD) + $(FC) $(FCFLAGS) -c -o $@ $< + +%.o: %.f90 + $(FC) $(FCFLAGS) -c -o $@ $< + +clean: + rm -fv *.o *.mod $(PROG) + +# modfiles generation dependences +nf_varbuf_cl.o: nf_varbuf_cl.f90 fstarpu_mod.o +nf_varbuf.o: nf_varbuf.f90 nf_types.o fstarpu_mod.o diff --git a/examples/native_fortran/Makefile_nf_vector.mk b/examples/native_fortran/Makefile_nf_vector.mk new file mode 100644 index 0000000..fa016cd --- /dev/null +++ b/examples/native_fortran/Makefile_nf_vector.mk @@ -0,0 +1,52 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +PROG = nf_vector + +STARPU_VERSION=1.3 +FSTARPU_MOD = $(shell pkg-config --variable=starpu_includedir starpu-$(STARPU_VERSION))/fstarpu_mod.f90 + +SRCSF = nf_vector.f90 \ + nf_codelets.f90 + +FC = gfortran +CC = gcc + +CFLAGS = -g $(shell pkg-config --cflags starpu-$(STARPU_VERSION)) +FCFLAGS = -fdefault-real-8 -J. -g +LDLIBS = $(shell pkg-config --libs starpu-$(STARPU_VERSION)) + +OBJS = $(SRCSC:%.c=%.o) fstarpu_mod.o $(SRCSF:%.f90=%.o) + +.phony: all clean +all: $(PROG) + +$(PROG): $(OBJS) + $(FC) $(LDFLAGS) -o $@ $^ $(LDLIBS) + +%.o: %.c + $(CC) $(CFLAGS) -c -o $@ $< + +fstarpu_mod.o: $(FSTARPU_MOD) + $(FC) $(FCFLAGS) -c -o $@ $< + +%.o: %.f90 + $(FC) $(FCFLAGS) -c -o $@ $< + +clean: + rm -fv *.o *.mod $(PROG) + +nf_vector.o: nf_vector.f90 nf_codelets.o fstarpu_mod.o +nf_codelets.o: fstarpu_mod.o diff --git a/examples/native_fortran/fstarpu_mod.f90 b/examples/native_fortran/fstarpu_mod.f90 new file mode 100644 index 0000000..041de99 --- /dev/null +++ b/examples/native_fortran/fstarpu_mod.f90 @@ -0,0 +1,2697 @@ +! StarPU --- Runtime system for heterogeneous multicore architectures. +! +! Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +! +! StarPU is free software; you can redistribute it and/or modify +! it under the terms of the GNU Lesser General Public License as published by +! the Free Software Foundation; either version 2.1 of the License, or (at +! your option) any later version. +! +! StarPU is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of +! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +! +! See the GNU Lesser General Public License in COPYING.LGPL for more details. +! +!> @ingroup API_Fortran +!> @brief Fortran API +module fstarpu_mod + use iso_c_binding + implicit none + + ! Note: Constants truly are intptr_t, but are declared as c_ptr to be + ! readily usable in c_ptr arrays to mimic variadic functions. + ! Note: Bitwise or operator is provided by the .ior. overloaded operator + type(c_ptr), bind(C) :: FSTARPU_R + type(c_ptr), bind(C) :: FSTARPU_W + type(c_ptr), bind(C) :: FSTARPU_RW + type(c_ptr), bind(C) :: FSTARPU_SCRATCH + type(c_ptr), bind(C) :: FSTARPU_REDUX + type(c_ptr), bind(C) :: FSTARPU_MPI_REDUX + type(c_ptr), bind(C) :: FSTARPU_COMMUTE + type(c_ptr), bind(C) :: FSTARPU_SSEND + type(c_ptr), bind(C) :: FSTARPU_LOCALITY + + type(c_ptr), bind(C) :: FSTARPU_DATA_ARRAY + type(c_ptr), bind(C) :: FSTARPU_DATA_MODE_ARRAY + type(c_ptr), bind(C) :: FSTARPU_CL_ARGS + type(c_ptr), bind(C) :: FSTARPU_CL_ARGS_NFREE + type(c_ptr), bind(C) :: FSTARPU_TASK_DEPS_ARRAY + type(c_ptr), bind(C) :: FSTARPU_CALLBACK + type(c_ptr), bind(C) :: FSTARPU_CALLBACK_WITH_ARG + type(c_ptr), bind(C) :: FSTARPU_CALLBACK_WITH_ARG_NFREE + type(c_ptr), bind(C) :: FSTARPU_CALLBACK_ARG + type(c_ptr), bind(C) :: FSTARPU_CALLBACK_ARG_NFREE + type(c_ptr), bind(C) :: FSTARPU_PROLOGUE_CALLBACK + type(c_ptr), bind(C) :: FSTARPU_PROLOGUE_CALLBACK_ARG + type(c_ptr), bind(C) :: FSTARPU_PROLOGUE_CALLBACK_ARG_NFREE + type(c_ptr), bind(C) :: FSTARPU_PROLOGUE_CALLBACK_POP + type(c_ptr), bind(C) :: FSTARPU_PROLOGUE_CALLBACK_POP_ARG + type(c_ptr), bind(C) :: FSTARPU_PROLOGUE_CALLBACK_POP_ARG_NFREE + type(c_ptr), bind(C) :: FSTARPU_PRIORITY + type(c_ptr), bind(C) :: FSTARPU_EXECUTE_ON_NODE + type(c_ptr), bind(C) :: FSTARPU_EXECUTE_ON_DATA + type(c_ptr), bind(C) :: FSTARPU_EXECUTE_ON_WORKER + type(c_ptr), bind(C) :: FSTARPU_WORKER_ORDER + type(c_ptr), bind(C) :: FSTARPU_EXECUTE_WHERE + type(c_ptr), bind(C) :: FSTARPU_HYPERVISOR_TAG + type(c_ptr), bind(C) :: FSTARPU_POSSIBLY_PARALLEL + type(c_ptr), bind(C) :: FSTARPU_FLOPS + type(c_ptr), bind(C) :: FSTARPU_TAG + type(c_ptr), bind(C) :: FSTARPU_TAG_ONLY + type(c_ptr), bind(C) :: FSTARPU_NAME + type(c_ptr), bind(C) :: FSTARPU_TASK_COLOR + type(c_ptr), bind(C) :: FSTARPU_TASK_SYNCHRONOUS + type(c_ptr), bind(C) :: FSTARPU_HANDLES_SEQUENTIAL_CONSISTENCY + type(c_ptr), bind(C) :: FSTARPU_TASK_END_DEP + type(c_ptr), bind(C) :: FSTARPU_NODE_SELECTION_POLICY + type(c_ptr), bind(C) :: FSTARPU_TASK_SCHED_DATA + + type(c_ptr), bind(C) :: FSTARPU_VALUE + type(c_ptr), bind(C) :: FSTARPU_SCHED_CTX + + type(c_ptr), bind(C) :: FSTARPU_CPU_WORKER + type(c_ptr), bind(C) :: FSTARPU_CUDA_WORKER + type(c_ptr), bind(C) :: FSTARPU_OPENCL_WORKER + type(c_ptr), bind(C) :: FSTARPU_ANY_WORKER + + integer(c_int), bind(C) :: FSTARPU_NMAXBUFS + + type(c_ptr), bind(C) :: FSTARPU_SCHED_CTX_POLICY_NAME + type(c_ptr), bind(C) :: FSTARPU_SCHED_CTX_POLICY_STRUCT + type(c_ptr), bind(C) :: FSTARPU_SCHED_CTX_POLICY_MIN_PRIO + type(c_ptr), bind(C) :: FSTARPU_SCHED_CTX_POLICY_MAX_PRIO + type(c_ptr), bind(C) :: FSTARPU_SCHED_CTX_HIERARCHY_LEVEL + type(c_ptr), bind(C) :: FSTARPU_SCHED_CTX_NESTED + type(c_ptr), bind(C) :: FSTARPU_SCHED_CTX_AWAKE_WORKERS + type(c_ptr), bind(C) :: FSTARPU_SCHED_CTX_POLICY_INIT + type(c_ptr), bind(C) :: FSTARPU_SCHED_CTX_USER_DATA + + type(c_ptr), bind(C) :: FSTARPU_NOWHERE + type(c_ptr), bind(C) :: FSTARPU_CPU + type(c_ptr), bind(C) :: FSTARPU_CUDA + type(c_ptr), bind(C) :: FSTARPU_OPENCL + + type(c_ptr), bind(C) :: FSTARPU_CODELET_SIMGRID_EXECUTE + type(c_ptr), bind(C) :: FSTARPU_CODELET_SIMGRID_EXECUTE_AND_INJECT + type(c_ptr), bind(C) :: FSTARPU_CUDA_ASYNC + type(c_ptr), bind(C) :: FSTARPU_OPENCL_ASYNC + + !type(c_ptr), bind(C) :: FSTARPU_PER_WORKER + !type(c_ptr), bind(C) :: FSTARPU_PER_ARCH + !type(c_ptr), bind(C) :: FSTARPU_PER_COMMON + type(c_ptr), bind(C) :: FSTARPU_HISTORY_BASED + type(c_ptr), bind(C) :: FSTARPU_REGRESSION_BASED + type(c_ptr), bind(C) :: FSTARPU_NL_REGRESSION_BASED + type(c_ptr), bind(C) :: FSTARPU_MULTIPLE_REGRESSION_BASED + + type(c_ptr), bind(C) :: FSTARPU_SEQ + type(c_ptr), bind(C) :: FSTARPU_SPMD + type(c_ptr), bind(C) :: FSTARPU_FORKJOIN + + ! (some) portable iso_c_binding types + type(c_ptr), bind(C) :: FSTARPU_SZ_C_DOUBLE + type(c_ptr), bind(C) :: FSTARPU_SZ_C_FLOAT + type(c_ptr), bind(C) :: FSTARPU_SZ_C_CHAR + type(c_ptr), bind(C) :: FSTARPU_SZ_C_INT + type(c_ptr), bind(C) :: FSTARPU_SZ_C_INTPTR_T + type(c_ptr), bind(C) :: FSTARPU_SZ_C_PTR + type(c_ptr), bind(C) :: FSTARPU_SZ_C_SIZE_T + + ! (some) native Fortran types + type(c_ptr), bind(C) :: FSTARPU_SZ_CHARACTER + + type(c_ptr), bind(C) :: FSTARPU_SZ_INTEGER + type(c_ptr), bind(C) :: FSTARPU_SZ_INT4 + type(c_ptr), bind(C) :: FSTARPU_SZ_INT8 + + type(c_ptr), bind(C) :: FSTARPU_SZ_REAL + type(c_ptr), bind(C) :: FSTARPU_SZ_REAL4 + type(c_ptr), bind(C) :: FSTARPU_SZ_REAL8 + + type(c_ptr), bind(C) :: FSTARPU_SZ_DOUBLE_PRECISION + + type(c_ptr), bind(C) :: FSTARPU_SZ_COMPLEX + type(c_ptr), bind(C) :: FSTARPU_SZ_COMPLEX4 + type(c_ptr), bind(C) :: FSTARPU_SZ_COMPLEX8 + + integer(c_int), bind(C), target :: FSTARPU_DEFAULT_PRIO + + interface operator (.ior.) + procedure or_cptrs + end interface operator (.ior.) + + interface + ! == starpu.h == + + ! void starpu_conf_init(struct starpu_conf *conf); + subroutine fstarpu_conf_init (conf) bind(C,name="starpu_conf_init") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: conf + end subroutine fstarpu_conf_init + + function fstarpu_conf_allocate () bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr) :: fstarpu_conf_allocate + end function fstarpu_conf_allocate + + subroutine fstarpu_conf_free (conf) bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: conf + end subroutine fstarpu_conf_free + + subroutine fstarpu_conf_set_sched_policy_name (conf, policy_name) bind(C) + use iso_c_binding, only: c_ptr, c_char + type(c_ptr), value, intent(in) :: conf + character(c_char), intent(in) :: policy_name + end subroutine fstarpu_conf_set_sched_policy_name + + subroutine fstarpu_conf_set_min_prio (conf, min_prio) bind(C) + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: conf + integer(c_int), value, intent(in) :: min_prio + end subroutine fstarpu_conf_set_min_prio + + subroutine fstarpu_conf_set_max_prio (conf, max_prio) bind(C) + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: conf + integer(c_int), value, intent(in) :: max_prio + end subroutine fstarpu_conf_set_max_prio + + subroutine fstarpu_conf_set_ncpu (conf, ncpu) bind(C) + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: conf + integer(c_int), value, intent(in) :: ncpu + end subroutine fstarpu_conf_set_ncpu + + subroutine fstarpu_conf_set_ncuda (conf, ncuda) bind(C) + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: conf + integer(c_int), value, intent(in) :: ncuda + end subroutine fstarpu_conf_set_ncuda + + subroutine fstarpu_conf_set_nopencl (conf, nopencl) bind(C) + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: conf + integer(c_int), value, intent(in) :: nopencl + end subroutine fstarpu_conf_set_nopencl + + ! starpu_init: see fstarpu_init + ! starpu_initialize: see fstarpu_init + + ! void starpu_pause(void); + subroutine fstarpu_pause() bind(C,name="starpu_pause") + end subroutine fstarpu_pause + + ! void starpu_resume(void); + subroutine fstarpu_resume() bind(C,name="starpu_resume") + end subroutine fstarpu_resume + + ! int starpu_is_paused(void); + function fstarpu_is_paused() bind(C,name="starpu_is_paused") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_is_paused + end function fstarpu_is_paused + + ! void starpu_shutdown(void); + subroutine fstarpu_shutdown () bind(C,name="starpu_shutdown") + end subroutine fstarpu_shutdown + + ! starpu_topology_print + subroutine fstarpu_topology_print () bind(C) + end subroutine fstarpu_topology_print + + ! int starpu_asynchronous_copy_disabled(void); + function fstarpu_asynchronous_copy_disabled() bind(C,name="starpu_asynchronous_copy_disabled") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_asynchronous_copy_disabled + end function fstarpu_asynchronous_copy_disabled + + ! int starpu_asynchronous_cuda_copy_disabled(void); + function fstarpu_asynchronous_cuda_copy_disabled() bind(C,name="starpu_asynchronous_cuda_copy_disabled") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_asynchronous_cuda_copy_disabled + end function fstarpu_asynchronous_cuda_copy_disabled + + ! int starpu_asynchronous_opencl_copy_disabled(void); + function fstarpu_asynchronous_opencl_copy_disabled() bind(C,name="starpu_asynchronous_opencl_copy_disabled") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_asynchronous_opencl_copy_disabled + end function fstarpu_asynchronous_opencl_copy_disabled + + ! void starpu_display_stats(); + subroutine fstarpu_display_stats() bind(C,name="starpu_display_stats") + end subroutine fstarpu_display_stats + + ! void starpu_get_version(int *major, int *minor, int *release); + subroutine fstarpu_get_version(major,minor,release) bind(C,name="starpu_get_version") + use iso_c_binding, only: c_int + integer(c_int), intent(out) :: major,minor,release + end subroutine fstarpu_get_version + + ! == starpu_worker.h == + + ! unsigned starpu_worker_get_count(void); + function fstarpu_worker_get_count() bind(C,name="starpu_worker_get_count") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_worker_get_count + end function fstarpu_worker_get_count + + ! unsigned starpu_combined_worker_get_count(void); + function fstarpu_combined_worker_get_count() bind(C,name="starpu_combined_worker_get_count") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_combined_worker_get_count + end function fstarpu_combined_worker_get_count + + ! unsigned starpu_worker_is_combined_worker(int id); + function fstarpu_worker_is_combined_worker(id) bind(C,name="starpu_worker_is_combined_worker") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_worker_is_combined_worker + integer(c_int), value, intent(in) :: id + end function fstarpu_worker_is_combined_worker + + + ! unsigned starpu_cpu_worker_get_count(void); + function fstarpu_cpu_worker_get_count() bind(C,name="starpu_cpu_worker_get_count") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_cpu_worker_get_count + end function fstarpu_cpu_worker_get_count + + ! unsigned starpu_cuda_worker_get_count(void); + function fstarpu_cuda_worker_get_count() bind(C,name="starpu_cuda_worker_get_count") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_cuda_worker_get_count + end function fstarpu_cuda_worker_get_count + + ! unsigned starpu_opencl_worker_get_count(void); + function fstarpu_opencl_worker_get_count() bind(C,name="starpu_opencl_worker_get_count") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_opencl_worker_get_count + end function fstarpu_opencl_worker_get_count + + ! int starpu_worker_get_id(void); + function fstarpu_worker_get_id() bind(C,name="starpu_worker_get_id") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_worker_get_id + end function fstarpu_worker_get_id + + ! _starpu_worker_get_id_check + ! starpu_worker_get_id_check + + ! int starpu_worker_get_bindid(int workerid); + function fstarpu_worker_get_bindid(id) bind(C,name="starpu_worker_get_bindid") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_worker_get_bindid + integer(c_int), value, intent(in) :: id + end function fstarpu_worker_get_bindid + + ! int starpu_combined_worker_get_id(void); + function fstarpu_combined_worker_get_id() bind(C,name="starpu_combined_worker_get_id") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_combined_worker_get_id + end function fstarpu_combined_worker_get_id + + ! int starpu_combined_worker_get_size(void); + function fstarpu_combined_worker_get_size() bind(C,name="starpu_combined_worker_get_size") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_combined_worker_get_size + end function fstarpu_combined_worker_get_size + + ! int starpu_combined_worker_get_rank(void); + function fstarpu_combined_worker_get_rank() bind(C,name="starpu_combined_worker_get_rank") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_combined_worker_get_rank + end function fstarpu_combined_worker_get_rank + + ! enum starpu_worker_archtype starpu_worker_get_type(int id); + function fstarpu_worker_get_type(id) bind(C) + use iso_c_binding, only: c_int, c_ptr + type(c_ptr) :: fstarpu_worker_get_type ! C function returns c_intptr_t + integer(c_int),value,intent(in) :: id + end function fstarpu_worker_get_type + + ! int starpu_worker_get_count_by_type(enum starpu_worker_archtype type); + function fstarpu_worker_get_count_by_type(typeid) bind(C) + use iso_c_binding, only: c_int, c_ptr + integer(c_int) :: fstarpu_worker_get_count_by_type + type(c_ptr),value,intent(in) :: typeid ! c_intptr_t expected by C func + end function fstarpu_worker_get_count_by_type + + ! int starpu_worker_get_ids_by_type(enum starpu_worker_archtype type, int *workerids, int maxsize); + function fstarpu_worker_get_ids_by_type(typeid, workerids, maxsize) bind(C) + use iso_c_binding, only: c_int, c_ptr + integer(c_int) :: fstarpu_worker_get_ids_by_type + type(c_ptr),value,intent(in) :: typeid ! c_intptr_t expected by C func + integer(c_int),intent(out) :: workerids(*) + integer(c_int),value,intent(in) :: maxsize + end function fstarpu_worker_get_ids_by_type + + ! int starpu_worker_get_by_type(enum starpu_worker_archtype type, int num); + function fstarpu_worker_get_by_type(typeid, num) bind(C) + use iso_c_binding, only: c_int, c_ptr + integer(c_int) :: fstarpu_worker_get_by_type + type(c_ptr),value,intent(in) :: typeid ! c_intptr_t expected by C func + integer(c_int),value,intent(in) :: num + end function fstarpu_worker_get_by_type + + ! int starpu_worker_get_by_devid(enum starpu_worker_archtype type, int devid); + function fstarpu_worker_get_by_devid(typeid, devid) bind(C) + use iso_c_binding, only: c_int, c_ptr + integer(c_int) :: fstarpu_worker_get_by_devid + type(c_ptr),value,intent(in) :: typeid ! c_intptr_t expected by C func + integer(c_int),value,intent(in) :: devid + end function fstarpu_worker_get_by_devid + + ! void starpu_worker_get_name(int id, char *dst, size_t maxlen); + subroutine fstarpu_worker_get_name(id, dst, maxlen) bind(C,name="starpu_worker_get_name") + use iso_c_binding, only: c_int, c_char, c_size_t + integer(c_int),value,intent(in) :: id + character(c_char),intent(out) :: dst(*) + integer(c_size_t),value,intent(in) :: maxlen + end subroutine fstarpu_worker_get_name + + + ! int starpu_worker_get_devid(int id); + function fstarpu_worker_get_devid(id) bind(C,name="starpu_worker_get_devid") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_worker_get_devid + integer(c_int), value, intent(in) :: id + end function fstarpu_worker_get_devid + + ! struct starpu_tree* starpu_workers_get_tree(void); + ! unsigned starpu_worker_get_sched_ctx_list(int worker, unsigned **sched_ctx); + + ! unsigned starpu_worker_is_blocked(int workerid); + function fstarpu_worker_is_blocked(id) bind(C,name="starpu_worker_is_blocked") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_worker_is_blocked + integer(c_int), value, intent(in) :: id + end function fstarpu_worker_is_blocked + + ! unsigned starpu_worker_is_slave_somewhere(int workerid); + function fstarpu_worker_is_slave_somewhere(id) bind(C,name="starpu_worker_is_slave_somewhere") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_worker_is_slave_somewhere + integer(c_int), value, intent(in) :: id + end function fstarpu_worker_is_slave_somewhere + + ! char *starpu_worker_get_type_as_string(enum starpu_worker_archtype type); + subroutine fstarpu_worker_get_type_as_string(typeid,dst,maxlen) bind(C) + use iso_c_binding, only: c_ptr, c_char, c_size_t + type(c_ptr),value,intent(in) :: typeid ! c_intptr_t expected by C func + character(c_char),intent(out) :: dst(*) + integer(c_size_t),value,intent(in) :: maxlen + end subroutine fstarpu_worker_get_type_as_string + + ! int starpu_bindid_get_workerids(int bindid, int **workerids); + + ! == starpu_task.h == + + function fstarpu_task_create_sync (handle, mode) bind(C,name="starpu_task_create_sync") + use iso_c_binding, only: c_ptr + type(c_ptr) :: fstarpu_task_create_sync + type(c_ptr), value, intent(in) :: handle + type(c_ptr), value, intent(in) :: mode + end function fstarpu_task_create_sync + + ! void starpu_tag_declare_deps_array(starpu_tag_t id, unsigned ndeps, starpu_tag_t *array); + subroutine fstarpu_tag_declare_deps_array(id,ndeps,tag_array) bind(C,name="starpu_tag_declare_deps_array") + use iso_c_binding, only: c_int, c_long_long + integer(c_int), value, intent(in) :: id + integer(c_int), value, intent(in) :: ndeps + integer(c_long_long), intent(in) :: tag_array(*) + end subroutine fstarpu_tag_declare_deps_array + + ! void starpu_task_declare_deps(starpu_tag_t id, unsigned ndeps, ...); + subroutine fstarpu_task_declare_deps(task,ndeps,root_task) bind(C,name="starpu_task_declare_deps") + use iso_c_binding, only: c_int, c_ptr + type(c_ptr), value, intent(in) :: task + integer(c_int), value, intent(in) :: ndeps + type(c_ptr), value, intent(in) :: root_task + end subroutine fstarpu_task_declare_deps + + ! void starpu_task_declare_deps_array(struct starpu_task *task, unsigned ndeps, struct starpu_task *task_array[]); + subroutine fstarpu_task_declare_deps_array(task,ndeps,task_array) bind(C,name="starpu_task_declare_deps_array") + use iso_c_binding, only: c_int, c_ptr + type(c_ptr), value, intent(in) :: task + integer(c_int), value, intent(in) :: ndeps + type(c_ptr), intent(in) :: task_array(*) + end subroutine fstarpu_task_declare_deps_array + + ! void starpu_task_end_dep_add(struct starpu_task *t, int nb_deps) + subroutine fstarpu_task_end_dep_add(task, nb_deps) & + bind(C,name="starpu_task_end_dep_add") + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: task + integer(c_int), value, intent(in) :: nb_deps + end subroutine fstarpu_task_end_dep_add + + ! void starpu_task_end_dep_release(struct starpu_task *t) + subroutine fstarpu_task_end_dep_release(task) & + bind(C,name="starpu_task_end_dep_release") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: task + end subroutine fstarpu_task_end_dep_release + + + ! int starpu_tag_wait(starpu_tag_t id); + function fstarpu_tag_wait(id) bind(C,name="starpu_tag_wait") + use iso_c_binding, only: c_int, c_long_long + integer(c_int) :: fstarpu_tag_wait + integer(c_long_long), value, intent(in) :: id + end function fstarpu_tag_wait + + ! int starpu_tag_wait_array(unsigned ntags, starpu_tag_t *id); + function fstarpu_tag_wait_array(ntags,tag_array) bind(C,name="starpu_tag_wait_array") + use iso_c_binding, only: c_int, c_long_long + integer(c_int) :: fstarpu_tag_wait_array + integer(c_int), value, intent(in) :: ntags + integer(c_long_long), intent(in) :: tag_array(*) + end function fstarpu_tag_wait_array + + ! void starpu_tag_notify_from_apps(starpu_tag_t id); + subroutine fstarpu_tag_notify_from_apps(id) bind(C,name="starpu_tag_notify_from_apps") + use iso_c_binding, only: c_long_long + integer(c_long_long), value, intent(in) :: id + end subroutine fstarpu_tag_notify_from_apps + + ! void starpu_tag_restart(starpu_tag_t id); + subroutine fstarpu_tag_restart(id) bind(C,name="starpu_tag_restart") + use iso_c_binding, only: c_long_long + integer(c_long_long), value, intent(in) :: id + end subroutine fstarpu_tag_restart + + ! void starpu_tag_remove(starpu_tag_t id); + subroutine fstarpu_tag_remove(id) bind(C,name="starpu_tag_remove") + use iso_c_binding, only: c_long_long + integer(c_long_long), value, intent(in) :: id + end subroutine fstarpu_tag_remove + + ! struct starpu_task *starpu_tag_get_task(starpu_tag_t id); + function fstarpu_tag_get_task(id) bind(C,name="starpu_tag_get_task") + use iso_c_binding, only: c_ptr, c_long_long + type(c_ptr) :: fstarpu_tag_get_task + integer(c_long_long), value, intent(in) :: id + end function fstarpu_tag_get_task + + + ! void starpu_task_init(struct starpu_task *task); + subroutine fstarpu_task_init (task) bind(C,name="starpu_task_init") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: task + end subroutine fstarpu_task_init + + ! void starpu_task_clean(struct starpu_task *task); + subroutine fstarpu_task_clean (task) bind(C,name="starpu_task_clean") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: task + end subroutine fstarpu_task_clean + + ! struct starpu_task *starpu_task_create(void) STARPU_ATTRIBUTE_MALLOC; + function fstarpu_task_create () bind(C,name="starpu_task_create") + use iso_c_binding, only: c_ptr + type(c_ptr) :: fstarpu_task_create + end function fstarpu_task_create + + ! void starpu_task_destroy(struct starpu_task *task); + subroutine fstarpu_task_destroy (task) bind(C,name="starpu_task_destroy") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: task + end subroutine fstarpu_task_destroy + + ! void starpu_task_set_destroy(struct starpu_task *task); + subroutine fstarpu_task_set_destroy (task) bind(C,name="starpu_task_set_destroy") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: task + end subroutine fstarpu_task_set_destroy + + ! int starpu_task_submit(struct starpu_task *task) STARPU_WARN_UNUSED_RESULT; + function fstarpu_task_submit (task) bind(C,name="starpu_task_submit") + use iso_c_binding, only: c_int,c_ptr + integer(c_int) :: fstarpu_task_submit + type(c_ptr), value, intent(in) :: task + end function fstarpu_task_submit + + ! int starpu_task_submit_to_ctx(struct starpu_task *task, unsigned sched_ctx_id); + function fstarpu_task_submit_to_ctx (task,sched_ctx_id) bind(C,name="starpu_task_submit_to_ctx") + use iso_c_binding, only: c_int,c_ptr + integer(c_int) :: fstarpu_task_submit_to_ctx + type(c_ptr), value, intent(in) :: task + integer(c_int), value, intent(in) :: sched_ctx_id + end function fstarpu_task_submit_to_ctx + + ! int starpu_task_finished(struct starpu_task *task) STARPU_WARN_UNUSED_RESULT; + function fstarpu_task_finished (task) bind(C,name="starpu_task_finished") + use iso_c_binding, only: c_int,c_ptr + integer(c_int) :: fstarpu_task_finished + type(c_ptr), value, intent(in) :: task + end function fstarpu_task_finished + + ! int starpu_task_wait(struct starpu_task *task) STARPU_WARN_UNUSED_RESULT; + function fstarpu_task_wait (task) bind(C,name="starpu_task_wait") + use iso_c_binding, only: c_int,c_ptr + integer(c_int) :: fstarpu_task_wait + type(c_ptr), value, intent(in) :: task + end function fstarpu_task_wait + + ! int starpu_task_wait_array(struct starpu_task **tasks, unsigned nb_tasks) STARPU_WARN_UNUSED_RESULT; + function fstarpu_task_wait_array(task_array,ntasks) bind(C,name="starpu_task_wait_array") + use iso_c_binding, only: c_int, c_ptr + integer(c_int) :: fstarpu_task_wait_array + integer(c_int), value, intent(in) :: ntasks + type(c_ptr), intent(in) :: task_array + end function fstarpu_task_wait_array + + + ! int starpu_task_wait_for_all(void); + subroutine fstarpu_task_wait_for_all () bind(C,name="starpu_task_wait_for_all") + end subroutine fstarpu_task_wait_for_all + + ! int starpu_task_wait_for_n_submitted(unsigned n); + subroutine fstarpu_task_wait_for_n_submitted (n) bind(C,name="starpu_task_wait_for_n_submitted") + use iso_c_binding, only: c_int + integer(c_int), value, intent(in) :: n + end subroutine fstarpu_task_wait_for_n_submitted + + ! int starpu_task_wait_for_all_in_ctx(unsigned sched_ctx_id); + subroutine fstarpu_task_wait_for_all_in_ctx (ctx) bind(C,name="starpu_task_wait_for_all_in_ctx") + use iso_c_binding, only: c_int + integer(c_int), value, intent(in) :: ctx + end subroutine fstarpu_task_wait_for_all_in_ctx + + ! int starpu_task_wait_for_n_submitted_in_ctx(unsigned sched_ctx_id, unsigned n); + subroutine fstarpu_task_wait_for_n_submitted_in_ctx (ctx,n) bind(C,name="starpu_task_wait_for_n_submitted_in_ctx") + use iso_c_binding, only: c_int + integer(c_int), value, intent(in) :: ctx + integer(c_int), value, intent(in) :: n + end subroutine fstarpu_task_wait_for_n_submitted_in_ctx + + ! int starpu_task_wait_for_no_ready(void); + function fstarpu_task_wait_for_no_ready () bind(C,name="starpu_task_wait_for_no_ready") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_task_wait_for_no_ready + end function fstarpu_task_wait_for_no_ready + + ! int starpu_task_nready(void); + function fstarpu_task_nready () bind(C,name="starpu_task_nready") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_task_nready + end function fstarpu_task_nready + + ! int starpu_task_nsubmitted(void); + function fstarpu_task_nsubmitted () bind(C,name="starpu_task_nsubmitted") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_task_nsubmitted + end function fstarpu_task_nsubmitted + + ! void starpu_do_schedule(void); + subroutine fstarpu_do_schedule () bind(C,name="starpu_do_schedule") + end subroutine fstarpu_do_schedule + + ! starpu_codelet_init + subroutine fstarpu_codelet_init (codelet) bind(C,name="starpu_codelet_init") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: codelet + end subroutine fstarpu_codelet_init + + ! starpu_codelet_display_stats + subroutine fstarpu_codelet_display_stats (codelet) bind(C,name="starpu_codelet_display_stats") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: codelet + end subroutine fstarpu_codelet_display_stats + + + ! struct starpu_task *starpu_task_get_current(void); + function fstarpu_task_get_current () bind(C,name="starpu_task_get_current") + use iso_c_binding, only: c_ptr + type(c_ptr) :: fstarpu_task_get_current + end function fstarpu_task_get_current + + ! void starpu_parallel_task_barrier_init(struct starpu_task *task, int workerid); + subroutine fstarpu_parallel_task_barrier_init_init (task,id) & + bind(C,name="starpu_parallel_task_barrier_init_init") + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: task + integer(c_int), value, intent(in) :: id + end subroutine fstarpu_parallel_task_barrier_init_init + + ! void starpu_parallel_task_barrier_init_n(struct starpu_task *task, int worker_size); + subroutine fstarpu_parallel_task_barrier_init_n_init_n (task,sz) & + bind(C,name="starpu_parallel_task_barrier_init_n_init_n") + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: task + integer(c_int), value, intent(in) :: sz + end subroutine fstarpu_parallel_task_barrier_init_n_init_n + + ! struct starpu_task *starpu_task_dup(struct starpu_task *task); + function fstarpu_task_dup (task) bind(C,name="starpu_task_dup") + use iso_c_binding, only: c_ptr + type(c_ptr) :: fstarpu_task_dup + type(c_ptr), value, intent(in) :: task + end function fstarpu_task_dup + + ! void starpu_task_set_implementation(struct starpu_task *task, unsigned impl); + subroutine fstarpu_task_set_implementation (task,impl) & + bind(C,name="starpu_task_set_implementation") + use iso_c_binding, only: c_ptr,c_int + type(c_ptr), value, intent(in) :: task + integer(c_int), value, intent(in) :: impl + end subroutine fstarpu_task_set_implementation + + ! unsigned starpu_task_get_implementation(struct starpu_task *task); + function fstarpu_task_get_implementation (task) & + bind(C,name="starpu_task_get_implementation") + use iso_c_binding, only: c_ptr,c_int + type(c_ptr), value, intent(in) :: task + integer(c_int) :: fstarpu_task_get_implementation + end function fstarpu_task_get_implementation + + ! -- + + function fstarpu_codelet_allocate () bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr) :: fstarpu_codelet_allocate + end function fstarpu_codelet_allocate + + subroutine fstarpu_codelet_free (cl) bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: cl + end subroutine fstarpu_codelet_free + + subroutine fstarpu_codelet_set_name (cl, cl_name) bind(C) + use iso_c_binding, only: c_ptr, c_char + type(c_ptr), value, intent(in) :: cl + character(c_char), intent(in) :: cl_name + end subroutine fstarpu_codelet_set_name + + subroutine fstarpu_codelet_set_color (cl, cl_color) bind(C) + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: cl + integer(c_int), value, intent(in) :: cl_color + end subroutine fstarpu_codelet_set_color + + subroutine fstarpu_codelet_set_model (cl, cl_perfmodel) bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: cl + type(c_ptr), value, intent(in) :: cl_perfmodel + end subroutine fstarpu_codelet_set_model + + subroutine fstarpu_codelet_set_energy_model (cl, cl_perfmodel) bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: cl + type(c_ptr), value, intent(in) :: cl_perfmodel + end subroutine fstarpu_codelet_set_energy_model + + subroutine fstarpu_codelet_add_cpu_func (cl, f_ptr) bind(C) + use iso_c_binding, only: c_ptr, c_funptr + type(c_ptr), value, intent(in) :: cl + type(c_funptr), value, intent(in) :: f_ptr + end subroutine fstarpu_codelet_add_cpu_func + + subroutine fstarpu_codelet_add_cuda_func (cl, f_ptr) bind(C) + use iso_c_binding, only: c_ptr, c_funptr + type(c_ptr), value, intent(in) :: cl + type(c_funptr), value, intent(in) :: f_ptr + end subroutine fstarpu_codelet_add_cuda_func + + subroutine fstarpu_codelet_add_cuda_flags (cl, flags) bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: cl + type(c_ptr), value, intent(in) :: flags ! C function expects an intptr_t + end subroutine fstarpu_codelet_add_cuda_flags + + subroutine fstarpu_codelet_add_opencl_func (cl, f_ptr) bind(C) + use iso_c_binding, only: c_ptr, c_funptr + type(c_ptr), value, intent(in) :: cl + type(c_funptr), value, intent(in) :: f_ptr + end subroutine fstarpu_codelet_add_opencl_func + + subroutine fstarpu_codelet_add_opencl_flags (cl, flags) bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: cl + type(c_ptr), value, intent(in) :: flags ! C function expects an intptr_t + end subroutine fstarpu_codelet_add_opencl_flags + + subroutine fstarpu_codelet_add_buffer (cl, mode) bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: cl + type(c_ptr), value, intent(in) :: mode ! C function expects an intptr_t + end subroutine fstarpu_codelet_add_buffer + + subroutine fstarpu_codelet_set_variable_nbuffers (cl) bind(C) + use iso_c_binding, only: c_ptr,c_int + type(c_ptr), value, intent(in) :: cl + end subroutine fstarpu_codelet_set_variable_nbuffers + + subroutine fstarpu_codelet_set_nbuffers (cl, nbuffers) bind(C) + use iso_c_binding, only: c_ptr,c_int + type(c_ptr), value, intent(in) :: cl + integer(c_int), value, intent(in) :: nbuffers + end subroutine fstarpu_codelet_set_nbuffers + + subroutine fstarpu_codelet_set_flags (cl, flags) bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: cl + type(c_ptr), value, intent(in) :: flags ! C function expects an intptr_t + end subroutine fstarpu_codelet_set_flags + + subroutine fstarpu_codelet_set_where (cl, where) bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: cl + type(c_ptr), value, intent(in) :: where ! C function expects an intptr_t + end subroutine fstarpu_codelet_set_where + + subroutine fstarpu_codelet_set_type (cl, type_constant) bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: cl + type(c_ptr), value, intent(in) :: type_constant ! C function expects an intptr_t + end subroutine fstarpu_codelet_set_type + + subroutine fstarpu_codelet_set_max_parallelism (cl, max_parallelism) bind(C) + use iso_c_binding, only: c_ptr,c_int + type(c_ptr), value, intent(in) :: cl + integer(c_int), value, intent(in) :: max_parallelism + end subroutine fstarpu_codelet_set_max_parallelism + + function fstarpu_perfmodel_allocate () bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr) :: fstarpu_perfmodel_allocate + end function fstarpu_perfmodel_allocate + + subroutine fstarpu_perfmodel_free (model) bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: model + end subroutine fstarpu_perfmodel_free + + subroutine fstarpu_perfmodel_set_symbol (model, model_symbol) bind(C) + use iso_c_binding, only: c_ptr, c_char + type(c_ptr), value, intent(in) :: model + character(c_char), intent(in) :: model_symbol + end subroutine fstarpu_perfmodel_set_symbol + + subroutine fstarpu_perfmodel_set_type (model, type) bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: model + type(c_ptr), value, intent(in) :: type ! C function expects an intptr_t + end subroutine fstarpu_perfmodel_set_type + + ! == starpu_data_interface.h == + + ! uintptr_t starpu_malloc_on_node_flags(unsigned dst_node, size_t size, int flags); + + ! uintptr_t starpu_malloc_on_node(unsigned dst_node, size_t size); + function fstarpu_malloc_on_node(node,sz) bind(C,name="starpu_malloc_on_node") + use iso_c_binding, only: c_int,c_intptr_t,c_size_t + integer(c_intptr_t) :: fstarpu_malloc_on_node + integer(c_int), value, intent(in) :: node + integer(c_size_t), value, intent(in) :: sz + end function fstarpu_malloc_on_node + + ! void starpu_free_on_node_flags(unsigned dst_node, uintptr_t addr, size_t size, int flags); + + ! void starpu_free_on_node(unsigned dst_node, uintptr_t addr, size_t size); + subroutine fstarpu_free_on_node(node,addr,sz) bind(C,name="starpu_free_on_node") + use iso_c_binding, only: c_int,c_intptr_t,c_size_t + integer(c_int), value, intent(in) :: node + integer(c_intptr_t), value, intent(in) :: addr + integer(c_size_t), value, intent(in) :: sz + end subroutine fstarpu_free_on_node + + ! void starpu_malloc_on_node_set_default_flags(unsigned node, int flags); + + ! int starpu_data_interface_get_next_id(void); + ! void starpu_data_register(starpu_data_handle_t *handleptr, unsigned home_node, void *data_interface, struct starpu_data_interface_ops *ops); + + + ! void starpu_data_ptr_register(starpu_data_handle_t handle, unsigned node); + subroutine fstarpug_data_ptr_register (dh,node) bind(C,name="starpu_data_ptr_register") + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: node + end subroutine fstarpug_data_ptr_register + + ! void starpu_data_register_same(starpu_data_handle_t *handledst, starpu_data_handle_t handlesrc); + subroutine fstarpu_data_register_same (dh_dst,dh_src) bind(C,name="starpu_data_register_same") + use iso_c_binding, only: c_ptr + type(c_ptr), intent(out) :: dh_dst + type(c_ptr), value, intent(in) :: dh_src + end subroutine fstarpu_data_register_same + + ! void *starpu_data_handle_to_pointer(starpu_data_handle_t handle, unsigned node); + function fstarpu_data_handle_to_pointer (dh,node) bind(C,name="starpu_data_handle_to_pointer") + use iso_c_binding, only: c_ptr, c_int + type(c_ptr) :: fstarpu_data_handle_to_pointer + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: node + end function fstarpu_data_handle_to_pointer + + ! void *starpu_data_get_local_ptr(starpu_data_handle_t handle); + function fstarpu_data_get_local_ptr (dh) bind(C,name="starpu_data_get_local_ptr") + use iso_c_binding, only: c_ptr, c_int + type(c_ptr) :: fstarpu_data_get_local_ptr + type(c_ptr), value, intent(in) :: dh + end function fstarpu_data_get_local_ptr + + ! void *starpu_data_get_interface_on_node(starpu_data_handle_t handle, unsigned memory_node); + + ! == starpu_data_interface.h: tensor == + + ! void starpu_tensor_data_register(starpu_data_handle_t *handle, unsigned home_node, uintptr_t ptr, uint32_t ldy, uint32_t ldz, uint32_t ldt, uint32_t nx, uint32_t ny, uint32_t nz, uint32_t nt, size_t elemsize); + subroutine fstarpu_tensor_data_register(dh, home_node, ptr, ldy, ldz, ldt, nx, ny, nz, nt, elt_size) & + bind(C,name="starpu_tensor_data_register") + use iso_c_binding, only: c_ptr, c_int, c_size_t + type(c_ptr), intent(out) :: dh + integer(c_int), value, intent(in) :: home_node + type(c_ptr), value, intent(in) :: ptr + integer(c_int), value, intent(in) :: ldy + integer(c_int), value, intent(in) :: ldz + integer(c_int), value, intent(in) :: ldt + integer(c_int), value, intent(in) :: nx + integer(c_int), value, intent(in) :: ny + integer(c_int), value, intent(in) :: nz + integer(c_int), value, intent(in) :: nt + integer(c_size_t), value, intent(in) :: elt_size + end subroutine fstarpu_tensor_data_register + + ! void starpu_tensor_ptr_register(starpu_data_handle_t handle, unsigned node, uintptr_t ptr, uintptr_t dev_handle, size_t offset, uint32_t ldy, uint32_t ldz, uint32_t ldt); + subroutine fstarpu_tensor_ptr_register(dh, node, ptr, dev_handle, offset, ldy, ldz, ldt) & + bind(C,name="starpu_tensor_ptr_register") + use iso_c_binding, only: c_ptr, c_int, c_size_t + type(c_ptr), intent(out) :: dh + integer(c_int), value, intent(in) :: node + type(c_ptr), value, intent(in) :: ptr + type(c_ptr), value, intent(in) :: dev_handle + integer(c_size_t), value, intent(in) :: offset + integer(c_int), value, intent(in) :: ldy + integer(c_int), value, intent(in) :: ldz + integer(c_int), value, intent(in) :: ldt + end subroutine fstarpu_tensor_ptr_register + + function fstarpu_tensor_get_ptr(buffers, i) bind(C) + use iso_c_binding, only: c_ptr, c_int + type(c_ptr) :: fstarpu_tensor_get_ptr + type(c_ptr), value, intent(in) :: buffers + integer(c_int), value, intent(in) :: i + end function fstarpu_tensor_get_ptr + + function fstarpu_tensor_get_ldy(buffers, i) bind(C) + use iso_c_binding, only: c_ptr, c_int + integer(c_int) :: fstarpu_tensor_get_ldy + type(c_ptr), value, intent(in) :: buffers + integer(c_int), value, intent(in) :: i + end function fstarpu_tensor_get_ldy + + function fstarpu_tensor_get_ldz(buffers, i) bind(C) + use iso_c_binding, only: c_ptr, c_int + integer(c_int) :: fstarpu_tensor_get_ldz + type(c_ptr), value, intent(in) :: buffers + integer(c_int), value, intent(in) :: i + end function fstarpu_tensor_get_ldz + + function fstarpu_tensor_get_ldt(buffers, i) bind(C) + use iso_c_binding, only: c_ptr, c_int + integer(c_int) :: fstarpu_tensor_get_ldt + type(c_ptr), value, intent(in) :: buffers + integer(c_int), value, intent(in) :: i + end function fstarpu_tensor_get_ldt + + function fstarpu_tensor_get_nx(buffers, i) bind(C) + use iso_c_binding, only: c_ptr, c_int + integer(c_int) :: fstarpu_tensor_get_nx + type(c_ptr), value, intent(in) :: buffers + integer(c_int), value, intent(in) :: i + end function fstarpu_tensor_get_nx + + function fstarpu_tensor_get_ny(buffers, i) bind(C) + use iso_c_binding, only: c_ptr, c_int + integer(c_int) :: fstarpu_tensor_get_ny + type(c_ptr), value, intent(in) :: buffers + integer(c_int), value, intent(in) :: i + end function fstarpu_tensor_get_ny + + function fstarpu_tensor_get_nz(buffers, i) bind(C) + use iso_c_binding, only: c_ptr, c_int + integer(c_int) :: fstarpu_tensor_get_nz + type(c_ptr), value, intent(in) :: buffers + integer(c_int), value, intent(in) :: i + end function fstarpu_tensor_get_nz + + function fstarpu_tensor_get_nt(buffers, i) bind(C) + use iso_c_binding, only: c_ptr, c_int + integer(c_int) :: fstarpu_tensor_get_nt + type(c_ptr), value, intent(in) :: buffers + integer(c_int), value, intent(in) :: i + end function fstarpu_tensor_get_nt + + ! == starpu_data_interface.h: block == + + ! void starpu_block_data_register(starpu_data_handle_t *handle, unsigned home_node, uintptr_t ptr, uint32_t ldy, uint32_t ldz, uint32_t nx, uint32_t ny, uint32_t nz, size_t elemsize); + subroutine fstarpu_block_data_register(dh, home_node, ptr, ldy, ldz, nx, ny, nz, elt_size) & + bind(C,name="starpu_block_data_register") + use iso_c_binding, only: c_ptr, c_int, c_size_t + type(c_ptr), intent(out) :: dh + integer(c_int), value, intent(in) :: home_node + type(c_ptr), value, intent(in) :: ptr + integer(c_int), value, intent(in) :: ldy + integer(c_int), value, intent(in) :: ldz + integer(c_int), value, intent(in) :: nx + integer(c_int), value, intent(in) :: ny + integer(c_int), value, intent(in) :: nz + integer(c_size_t), value, intent(in) :: elt_size + end subroutine fstarpu_block_data_register + + ! void starpu_block_ptr_register(starpu_data_handle_t handle, unsigned node, uintptr_t ptr, uintptr_t dev_handle, size_t offset, uint32_t ldy, uint32_t ldz); + subroutine fstarpu_block_ptr_register(dh, node, ptr, dev_handle, offset, ldy, ldz) & + bind(C,name="starpu_block_ptr_register") + use iso_c_binding, only: c_ptr, c_int, c_size_t + type(c_ptr), intent(out) :: dh + integer(c_int), value, intent(in) :: node + type(c_ptr), value, intent(in) :: ptr + type(c_ptr), value, intent(in) :: dev_handle + integer(c_size_t), value, intent(in) :: offset + integer(c_int), value, intent(in) :: ldy + integer(c_int), value, intent(in) :: ldz + end subroutine fstarpu_block_ptr_register + + function fstarpu_block_get_ptr(buffers, i) bind(C) + use iso_c_binding, only: c_ptr, c_int + type(c_ptr) :: fstarpu_block_get_ptr + type(c_ptr), value, intent(in) :: buffers + integer(c_int), value, intent(in) :: i + end function fstarpu_block_get_ptr + + function fstarpu_block_get_ldy(buffers, i) bind(C) + use iso_c_binding, only: c_ptr, c_int + integer(c_int) :: fstarpu_block_get_ldy + type(c_ptr), value, intent(in) :: buffers + integer(c_int), value, intent(in) :: i + end function fstarpu_block_get_ldy + + function fstarpu_block_get_ldz(buffers, i) bind(C) + use iso_c_binding, only: c_ptr, c_int + integer(c_int) :: fstarpu_block_get_ldz + type(c_ptr), value, intent(in) :: buffers + integer(c_int), value, intent(in) :: i + end function fstarpu_block_get_ldz + + function fstarpu_block_get_nx(buffers, i) bind(C) + use iso_c_binding, only: c_ptr, c_int + integer(c_int) :: fstarpu_block_get_nx + type(c_ptr), value, intent(in) :: buffers + integer(c_int), value, intent(in) :: i + end function fstarpu_block_get_nx + + function fstarpu_block_get_ny(buffers, i) bind(C) + use iso_c_binding, only: c_ptr, c_int + integer(c_int) :: fstarpu_block_get_ny + type(c_ptr), value, intent(in) :: buffers + integer(c_int), value, intent(in) :: i + end function fstarpu_block_get_ny + + function fstarpu_block_get_nz(buffers, i) bind(C) + use iso_c_binding, only: c_ptr, c_int + integer(c_int) :: fstarpu_block_get_nz + type(c_ptr), value, intent(in) :: buffers + integer(c_int), value, intent(in) :: i + end function fstarpu_block_get_nz + + ! == starpu_data_interface.h: matrix == + + ! void starpu_matrix_data_register(starpu_data_handle_t *handle, unsigned home_node, uintptr_t ptr, uint32_t ld, uint32_t nx, uint32_t ny, size_t elemsize); + subroutine fstarpu_matrix_data_register(dh, home_node, ptr, ld, nx, ny, elt_size) & + bind(C,name="starpu_matrix_data_register") + use iso_c_binding, only: c_ptr, c_int, c_size_t + type(c_ptr), intent(out) :: dh + integer(c_int), value, intent(in) :: home_node + type(c_ptr), value, intent(in) :: ptr + integer(c_int), value, intent(in) :: ld + integer(c_int), value, intent(in) :: nx + integer(c_int), value, intent(in) :: ny + integer(c_size_t), value, intent(in) :: elt_size + end subroutine fstarpu_matrix_data_register + + ! void starpu_matrix_ptr_register(starpu_data_handle_t handle, unsigned node, uintptr_t ptr, uintptr_t dev_handle, size_t offset, uint32_t ld); + subroutine fstarpu_matrix_ptr_register(dh, node, ptr, dev_handle, offset, ld) & + bind(C,name="starpu_matrix_ptr_register") + use iso_c_binding, only: c_ptr, c_int, c_size_t + type(c_ptr), intent(out) :: dh + integer(c_int), value, intent(in) :: node + type(c_ptr), value, intent(in) :: ptr + type(c_ptr), value, intent(in) :: dev_handle + integer(c_size_t), value, intent(in) :: offset + integer(c_int), value, intent(in) :: ld + end subroutine fstarpu_matrix_ptr_register + + function fstarpu_matrix_get_ptr(buffers, i) bind(C) + use iso_c_binding, only: c_ptr, c_int + type(c_ptr) :: fstarpu_matrix_get_ptr + type(c_ptr), value, intent(in) :: buffers + integer(c_int), value, intent(in) :: i + end function fstarpu_matrix_get_ptr + + function fstarpu_matrix_get_ld(buffers, i) bind(C) + use iso_c_binding, only: c_ptr, c_int + integer(c_int) :: fstarpu_matrix_get_ld + type(c_ptr), value, intent(in) :: buffers + integer(c_int), value, intent(in) :: i + end function fstarpu_matrix_get_ld + + function fstarpu_matrix_get_nx(buffers, i) bind(C) + use iso_c_binding, only: c_ptr, c_int + integer(c_int) :: fstarpu_matrix_get_nx + type(c_ptr), value, intent(in) :: buffers + integer(c_int), value, intent(in) :: i + end function fstarpu_matrix_get_nx + + function fstarpu_matrix_get_ny(buffers, i) bind(C) + use iso_c_binding, only: c_ptr, c_int + integer(c_int) :: fstarpu_matrix_get_ny + type(c_ptr), value, intent(in) :: buffers + integer(c_int), value, intent(in) :: i + end function fstarpu_matrix_get_ny + + ! == starpu_data_interface.h: vector == + + ! void starpu_vector_data_register(starpu_data_handle_t *handle, unsigned home_node, uintptr_t ptr, uint32_t nx, size_t elemsize); + subroutine fstarpu_vector_data_register(dh, home_node, ptr,nx, elt_size) & + bind(C,name="starpu_vector_data_register") + use iso_c_binding, only: c_ptr, c_int, c_size_t + type(c_ptr), intent(out) :: dh + integer(c_int), value, intent(in) :: home_node + type(c_ptr), value, intent(in) :: ptr + integer(c_int), value, intent(in) :: nx + integer(c_size_t), value, intent(in) :: elt_size + end subroutine fstarpu_vector_data_register + + ! void starpu_vector_ptr_register(starpu_data_handle_t handle, unsigned node, uintptr_t ptr, uintptr_t dev_handle, size_t offset); + subroutine fstarpu_vector_ptr_register(dh, node, ptr, dev_handle, offset) & + bind(C,name="starpu_vector_ptr_register") + use iso_c_binding, only: c_ptr, c_int, c_size_t + type(c_ptr), intent(out) :: dh + integer(c_int), value, intent(in) :: node + type(c_ptr), value, intent(in) :: ptr + type(c_ptr), value, intent(in) :: dev_handle + integer(c_size_t), value, intent(in) :: offset + end subroutine fstarpu_vector_ptr_register + + + function fstarpu_vector_get_ptr(buffers, i) bind(C) + use iso_c_binding, only: c_ptr, c_int + type(c_ptr) :: fstarpu_vector_get_ptr + type(c_ptr), value, intent(in) :: buffers + integer(c_int), value, intent(in) :: i + end function fstarpu_vector_get_ptr + + function fstarpu_vector_get_nx(buffers, i) bind(C) + use iso_c_binding, only: c_ptr, c_int + integer(c_int) :: fstarpu_vector_get_nx + type(c_ptr), value, intent(in) :: buffers + integer(c_int), value, intent(in) :: i + end function fstarpu_vector_get_nx + + ! == starpu_data_interface.h: variable == + + ! void starpu_variable_data_register(starpu_data_handle_t *handle, unsigned home_node, uintptr_t ptr, size_t size); + subroutine fstarpu_variable_data_register(dh, home_node, ptr, elt_size) & + bind(C,name="starpu_variable_data_register") + use iso_c_binding, only: c_ptr, c_int, c_size_t + type(c_ptr), intent(out) :: dh + integer(c_int), value, intent(in) :: home_node + type(c_ptr), value, intent(in) :: ptr + integer(c_size_t), value, intent(in) :: elt_size + end subroutine fstarpu_variable_data_register + + ! void starpu_variable_ptr_register(starpu_data_handle_t handle, unsigned node, uintptr_t ptr, uintptr_t dev_handle, size_t offset); + subroutine fstarpu_variable_ptr_register(dh, node, ptr, dev_handle, offset) & + bind(C,name="starpu_variable_ptr_register") + use iso_c_binding, only: c_ptr, c_int, c_size_t + type(c_ptr), intent(out) :: dh + integer(c_int), value, intent(in) :: node + type(c_ptr), value, intent(in) :: ptr + type(c_ptr), value, intent(in) :: dev_handle + integer(c_size_t), value, intent(in) :: offset + end subroutine fstarpu_variable_ptr_register + + function fstarpu_variable_get_ptr(buffers, i) bind(C) + use iso_c_binding, only: c_ptr, c_int + type(c_ptr) :: fstarpu_variable_get_ptr + type(c_ptr), value, intent(in) :: buffers + integer(c_int), value, intent(in) :: i + end function fstarpu_variable_get_ptr + + ! == starpu_data_interface.h: void == + + ! void starpu_void_data_register(starpu_data_handle_t *handle); + subroutine fstarpu_void_data_register(dh) & + bind(C,name="starpu_void_data_register") + use iso_c_binding, only: c_ptr, c_int, c_size_t + type(c_ptr), intent(out) :: dh + end subroutine fstarpu_void_data_register + + ! == starpu_data_filter.h == + + function fstarpu_data_filter_allocate () bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr) :: fstarpu_data_filter_allocate + end function fstarpu_data_filter_allocate + + subroutine fstarpu_data_filter_free (filter) bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: filter + end subroutine fstarpu_data_filter_free + + ! Note: use fstarpu_df_alloc_ prefix instead of fstarpu_data_filter_allocate_ + ! to fit within the Fortran id length limit */ + function fstarpu_df_alloc_bcsr_filter_canonical_block () bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr) :: fstarpu_df_alloc_bcsr_filter_canonical_block + end function fstarpu_df_alloc_bcsr_filter_canonical_block + + function fstarpu_df_alloc_csr_filter_vertical_block () bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr) :: fstarpu_df_alloc_csr_filter_vertical_block + end function fstarpu_df_alloc_csr_filter_vertical_block + + function fstarpu_df_alloc_matrix_filter_block () bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr) :: fstarpu_df_alloc_matrix_filter_block + end function fstarpu_df_alloc_matrix_filter_block + + function fstarpu_df_alloc_matrix_filter_block_shadow () bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr) :: fstarpu_df_alloc_matrix_filter_block_shadow + end function fstarpu_df_alloc_matrix_filter_block_shadow + + function fstarpu_df_alloc_matrix_filter_vertical_block () bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr) :: fstarpu_df_alloc_matrix_filter_vertical_block + end function fstarpu_df_alloc_matrix_filter_vertical_block + + function fstarpu_df_alloc_matrix_filter_vertical_block_shadow () bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr) :: fstarpu_df_alloc_matrix_filter_vertical_block_shadow + end function fstarpu_df_alloc_matrix_filter_vertical_block_shadow + + function fstarpu_df_alloc_vector_filter_block () bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr) :: fstarpu_df_alloc_vector_filter_block + end function fstarpu_df_alloc_vector_filter_block + + function fstarpu_df_alloc_vector_filter_block_shadow () bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr) :: fstarpu_df_alloc_vector_filter_block_shadow + end function fstarpu_df_alloc_vector_filter_block_shadow + + function fstarpu_df_alloc_vector_filter_list () bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr) :: fstarpu_df_alloc_vector_filter_list + end function fstarpu_df_alloc_vector_filter_list + + function fstarpu_df_alloc_vector_filter_divide_in_2 () bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr) :: fstarpu_df_alloc_vector_filter_divide_in_2 + end function fstarpu_df_alloc_vector_filter_divide_in_2 + + function fstarpu_df_alloc_block_filter_block () bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr) :: fstarpu_df_alloc_block_filter_block + end function fstarpu_df_alloc_block_filter_block + + function fstarpu_df_alloc_block_filter_block_shadow () bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr) :: fstarpu_df_alloc_block_filter_block_shadow + end function fstarpu_df_alloc_block_filter_block_shadow + + function fstarpu_df_alloc_block_filter_vertical_block () bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr) :: fstarpu_df_alloc_block_filter_vertical_block + end function fstarpu_df_alloc_block_filter_vertical_block + + function fstarpu_df_alloc_block_filter_vertical_block_shadow () bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr) :: fstarpu_df_alloc_block_filter_vertical_block_shadow + end function fstarpu_df_alloc_block_filter_vertical_block_shadow + + subroutine fstarpu_data_filter_set_filter_func (filter, f_ptr) bind(C) + use iso_c_binding, only: c_ptr, c_funptr + type(c_ptr), value, intent(in) :: filter + type(c_funptr), value, intent(in) :: f_ptr + end subroutine fstarpu_data_filter_set_filter_func + + subroutine fstarpu_data_filter_set_nchildren (filter, nchildren) bind(C) + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: filter + integer(c_int), value, intent(in) :: nchildren + end subroutine fstarpu_data_filter_set_nchildren + + subroutine fstarpu_data_filter_set_get_nchildren_func (filter, f_ptr) bind(C) + use iso_c_binding, only: c_ptr, c_funptr + type(c_ptr), value, intent(in) :: filter + type(c_funptr), value, intent(in) :: f_ptr + end subroutine fstarpu_data_filter_set_get_nchildren_func + + subroutine fstarpu_data_filter_set_get_child_ops_func (filter, f_ptr) bind(C) + use iso_c_binding, only: c_ptr, c_funptr + type(c_ptr), value, intent(in) :: filter + type(c_funptr), value, intent(in) :: f_ptr + end subroutine fstarpu_data_filter_set_get_child_ops_func + + subroutine fstarpu_data_filter_set_filter_arg (filter, filter_arg) bind(C) + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: filter + integer(c_int), value, intent(in) :: filter_arg + end subroutine fstarpu_data_filter_set_filter_arg + + subroutine fstarpu_data_filter_set_filter_arg_ptr (filter, filter_arg_ptr) bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: filter + type(c_ptr), value, intent(in) :: filter_arg_ptr + end subroutine fstarpu_data_filter_set_filter_arg_ptr + + ! void starpu_data_partition(starpu_data_handle_t initial_handle, struct starpu_data_filter *f); + subroutine fstarpu_data_partition (dh,filter) bind(C,name="starpu_data_partition") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: dh + type(c_ptr), value, intent(in) :: filter + end subroutine fstarpu_data_partition + + ! void starpu_data_unpartition(starpu_data_handle_t root_data, unsigned gathering_node); + subroutine fstarpu_data_unpartition (root_dh,gathering_node) bind(C,name="starpu_data_unpartition") + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: root_dh + integer(c_int), value, intent(in) :: gathering_node + end subroutine fstarpu_data_unpartition + + ! void starpu_data_partition_plan(starpu_data_handle_t initial_handle, struct starpu_data_filter *f, starpu_data_handle_t *children); + subroutine fstarpu_data_partition_plan (dh,filter,children) & + bind(C,name="starpu_data_partition_plan") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: dh + type(c_ptr), value, intent(in) :: filter + type(c_ptr), intent(in) :: children(*) + end subroutine fstarpu_data_partition_plan + + ! void starpu_data_partition_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children); + subroutine fstarpu_data_partition_submit (dh,nparts,children) & + bind(C,name="starpu_data_partition_submit") + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: nparts + type(c_ptr), intent(in) :: children(*) + end subroutine fstarpu_data_partition_submit + + ! void starpu_data_partition_readonly_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children); + subroutine fstarpu_data_partition_readonly_submit (dh,nparts,children) & + bind(C,name="starpu_data_partition_readonly_submit") + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: nparts + type(c_ptr), intent(in) :: children(*) + end subroutine fstarpu_data_partition_readonly_submit + + ! void starpu_data_partition_readwrite_upgrade_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children); + subroutine fstarpu_data_partition_readwrite_upgrade_submit (dh,nparts,children) & + bind(C,name="starpu_data_partition_readwrite_upgrade_submit") + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: nparts + type(c_ptr), intent(in) :: children(*) + end subroutine fstarpu_data_partition_readwrite_upgrade_submit + + ! void starpu_data_unpartition_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, int gathering_node); + subroutine fstarpu_data_unpartition_submit (dh,nparts,children,gathering_node) & + bind(C,name="starpu_data_unpartition_submit") + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: nparts + type(c_ptr), intent(in) :: children(*) + integer(c_int), value, intent(in) :: gathering_node + end subroutine fstarpu_data_unpartition_submit + + ! void starpu_data_unpartition_readonly_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, int gathering_node); + subroutine fstarpu_data_unpartition_readonly_submit (dh,nparts,children,gathering_node) & + bind(C,name="starpu_data_unpartition_readonly_submit") + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: nparts + type(c_ptr), intent(in) :: children(*) + integer(c_int), value, intent(in) :: gathering_node + end subroutine fstarpu_data_unpartition_readonly_submit + + ! void starpu_data_partition_clean(starpu_data_handle_t root_data, unsigned nparts, starpu_data_handle_t *children); + subroutine fstarpu_data_partition_clean (dh,nparts,children) & + bind(C,name="starpu_data_partition_clean") + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: nparts + type(c_ptr), intent(in) :: children(*) + end subroutine fstarpu_data_partition_clean + + ! int starpu_data_get_nb_children(starpu_data_handle_t handle); + function fstarpu_data_get_nb_children(dh) bind(C,name="starpu_data_get_nb_children") + use iso_c_binding, only: c_ptr, c_int + integer(c_int) :: fstarpu_data_get_nb_children + type(c_ptr), value, intent(in) :: dh + end function fstarpu_data_get_nb_children + + ! starpu_data_handle_t starpu_data_get_child(starpu_data_handle_t handle, unsigned i); + function fstarpu_data_get_child(dh,i) bind(C,name="starpu_data_get_child") + use iso_c_binding, only: c_ptr, c_int + type(c_ptr) :: fstarpu_data_get_child + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: i + end function fstarpu_data_get_child + + ! starpu_data_handle_t starpu_data_get_sub_data(starpu_data_handle_t root_data, unsigned depth, ... ); + ! . see: fstarpu_data_get_sub_data + ! starpu_data_handle_t starpu_data_vget_sub_data(starpu_data_handle_t root_data, unsigned depth, va_list pa); + ! . see: fstarpu_data_get_sub_data + + ! note: defined in filters.c + function fstarpu_data_get_sub_data (root_dh,depth,indices) bind(C) + use iso_c_binding, only: c_ptr, c_int + type(c_ptr) :: fstarpu_data_get_sub_data + type(c_ptr), value, intent(in) :: root_dh + integer(c_int), value, intent(in) :: depth + integer(c_int), intent(in) :: indices(*) + end function fstarpu_data_get_sub_data + + ! void starpu_data_map_filters(starpu_data_handle_t root_data, unsigned nfilters, ...); + ! . see fstarpu_data_map_filters + ! void starpu_data_vmap_filters(starpu_data_handle_t root_data, unsigned nfilters, va_list pa); + ! . see fstarpu_data_map_filters + + ! note: defined in filters.c + subroutine fstarpu_data_map_filters (root_dh,nfilters,filters) bind(C) + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: root_dh + integer(c_int), value, intent(in) :: nfilters + type(c_ptr), intent(in) :: filters(*) + end subroutine fstarpu_data_map_filters + + ! void starpu_matrix_filter_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + subroutine fstarpu_matrix_filter_block (father_interface,child_interface,filter,id,nparts) & + bind(C,name="starpu_matrix_filter_block") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: father_interface + type(c_ptr), value, intent(in) :: child_interface + type(c_ptr), value, intent(in) :: filter + type(c_ptr), value, intent(in) :: id + type(c_ptr), value, intent(in) :: nparts + end subroutine fstarpu_matrix_filter_block + + ! void starpu_matrix_filter_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + subroutine fstarpu_matrix_filter_block_shadow (father_interface,child_interface,filter,id,nparts) & + bind(C,name="starpu_matrix_filter_block_shadow") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: father_interface + type(c_ptr), value, intent(in) :: child_interface + type(c_ptr), value, intent(in) :: filter + type(c_ptr), value, intent(in) :: id + type(c_ptr), value, intent(in) :: nparts + end subroutine fstarpu_matrix_filter_block_shadow + + ! void starpu_matrix_filter_vertical_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + subroutine fstarpu_matrix_filter_vertical_block (father_interface,child_interface,filter,id,nparts) & + bind(C,name="starpu_matrix_filter_vertical_block") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: father_interface + type(c_ptr), value, intent(in) :: child_interface + type(c_ptr), value, intent(in) :: filter + type(c_ptr), value, intent(in) :: id + type(c_ptr), value, intent(in) :: nparts + end subroutine fstarpu_matrix_filter_vertical_block + + ! void starpu_matrix_filter_vertical_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + subroutine fstarpu_matrix_filter_vertical_block_shadow (father_interface,child_interface,filter,id,nparts) & + bind(C,name="starpu_matrix_filter_vertical_block_shadow") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: father_interface + type(c_ptr), value, intent(in) :: child_interface + type(c_ptr), value, intent(in) :: filter + type(c_ptr), value, intent(in) :: id + type(c_ptr), value, intent(in) :: nparts + end subroutine fstarpu_matrix_filter_vertical_block_shadow + + ! void starpu_vector_filter_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + subroutine fstarpu_vector_filter_block (father_interface,child_interface,filter,id,nparts) & + bind(C,name="starpu_vector_filter_block") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: father_interface + type(c_ptr), value, intent(in) :: child_interface + type(c_ptr), value, intent(in) :: filter + type(c_ptr), value, intent(in) :: id + type(c_ptr), value, intent(in) :: nparts + end subroutine fstarpu_vector_filter_block + + ! void starpu_vector_filter_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + subroutine fstarpu_vector_filter_block_shadow (father_interface,child_interface,filter,id,nparts) & + bind(C,name="starpu_vector_filter_block_shadow") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: father_interface + type(c_ptr), value, intent(in) :: child_interface + type(c_ptr), value, intent(in) :: filter + type(c_ptr), value, intent(in) :: id + type(c_ptr), value, intent(in) :: nparts + end subroutine fstarpu_vector_filter_block_shadow + + ! void starpu_vector_filter_list_long(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + subroutine fstarpu_vector_filter_list_long (father_interface,child_interface,filter,id,nparts) & + bind(C,name="starpu_vector_filter_list_long") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: father_interface + type(c_ptr), value, intent(in) :: child_interface + type(c_ptr), value, intent(in) :: filter + type(c_ptr), value, intent(in) :: id + type(c_ptr), value, intent(in) :: nparts + end subroutine fstarpu_vector_filter_list_long + + ! void starpu_vector_filter_list(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + subroutine fstarpu_vector_filter_list (father_interface,child_interface,filter,id,nparts) & + bind(C,name="starpu_vector_filter_list") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: father_interface + type(c_ptr), value, intent(in) :: child_interface + type(c_ptr), value, intent(in) :: filter + type(c_ptr), value, intent(in) :: id + type(c_ptr), value, intent(in) :: nparts + end subroutine fstarpu_vector_filter_list + + ! void starpu_vector_filter_divide_in_2(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + subroutine fstarpu_vector_divide_in_2 (father_interface,child_interface,filter,id,nparts) & + bind(C,name="starpu_vector_divide_in_2") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: father_interface + type(c_ptr), value, intent(in) :: child_interface + type(c_ptr), value, intent(in) :: filter + type(c_ptr), value, intent(in) :: id + type(c_ptr), value, intent(in) :: nparts + end subroutine fstarpu_vector_divide_in_2 + + ! void starpu_block_filter_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + subroutine fstarpu_block_filter_block (father_interface,child_interface,filter,id,nparts) & + bind(C,name="starpu_block_filter_block") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: father_interface + type(c_ptr), value, intent(in) :: child_interface + type(c_ptr), value, intent(in) :: filter + type(c_ptr), value, intent(in) :: id + type(c_ptr), value, intent(in) :: nparts + end subroutine fstarpu_block_filter_block + + ! void starpu_block_filter_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + subroutine fstarpu_block_filter_block_shadow (father_interface,child_interface,filter,id,nparts) & + bind(C,name="starpu_block_filter_block_shadow") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: father_interface + type(c_ptr), value, intent(in) :: child_interface + type(c_ptr), value, intent(in) :: filter + type(c_ptr), value, intent(in) :: id + type(c_ptr), value, intent(in) :: nparts + end subroutine fstarpu_block_filter_block_shadow + + ! void starpu_block_filter_vertical_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + subroutine fstarpu_block_filter_vertical_block (father_interface,child_interface,filter,id,nparts) & + bind(C,name="starpu_block_filter_vertical_block") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: father_interface + type(c_ptr), value, intent(in) :: child_interface + type(c_ptr), value, intent(in) :: filter + type(c_ptr), value, intent(in) :: id + type(c_ptr), value, intent(in) :: nparts + end subroutine fstarpu_block_filter_vertical_block + + ! void starpu_block_filter_vertical_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + subroutine fstarpu_block_filter_vertical_block_shadow (father_interface,child_interface,filter,id,nparts) & + bind(C,name="starpu_block_filter_vertical_block_shadow") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: father_interface + type(c_ptr), value, intent(in) :: child_interface + type(c_ptr), value, intent(in) :: filter + type(c_ptr), value, intent(in) :: id + type(c_ptr), value, intent(in) :: nparts + end subroutine fstarpu_block_filter_vertical_block_shadow + + ! void starpu_block_filter_depth_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + subroutine fstarpu_block_filter_depth_block (father_interface,child_interface,filter,id,nparts) & + bind(C,name="starpu_block_filter_depth_block") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: father_interface + type(c_ptr), value, intent(in) :: child_interface + type(c_ptr), value, intent(in) :: filter + type(c_ptr), value, intent(in) :: id + type(c_ptr), value, intent(in) :: nparts + end subroutine fstarpu_block_filter_depth_block + + ! void starpu_block_filter_depth_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + subroutine fstarpu_block_filter_depth_block_shadow (father_interface,child_interface,filter,id,nparts) & + bind(C,name="starpu_block_filter_depth_block_shadow") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: father_interface + type(c_ptr), value, intent(in) :: child_interface + type(c_ptr), value, intent(in) :: filter + type(c_ptr), value, intent(in) :: id + type(c_ptr), value, intent(in) :: nparts + end subroutine fstarpu_block_filter_depth_block_shadow + + + ! == starpu_data.h == + + ! void starpu_data_unregister(starpu_data_handle_t handle); + subroutine fstarpu_data_unregister (dh) bind(C,name="starpu_data_unregister") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: dh + end subroutine fstarpu_data_unregister + + ! void starpu_data_unregister_no_coherency(starpu_data_handle_t handle); + subroutine fstarpu_data_unregister_no_coherency (dh) bind(C,name="starpu_data_unregister_no_coherency") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: dh + end subroutine fstarpu_data_unregister_no_coherency + + ! void starpu_data_unregister_submit(starpu_data_handle_t handle); + subroutine fstarpu_data_unregister_submit (dh) bind(C,name="starpu_data_unregister_submit") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: dh + end subroutine fstarpu_data_unregister_submit + + ! void starpu_data_deinitialize(starpu_data_handle_t handle); + subroutine fstarpu_data_deinitialize (dh) bind(C,name="starpu_data_deinitialize") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: dh + end subroutine fstarpu_data_deinitialize + + ! void starpu_data_deinitialize_submit(starpu_data_handle_t handle); + subroutine fstarpu_data_deinitialize_submit (dh) bind(C,name="starpu_data_deinitialize_submit") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: dh + end subroutine fstarpu_data_deinitialize_submit + + ! void starpu_data_invalidate(starpu_data_handle_t handle); + subroutine fstarpu_data_invalidate (dh) bind(C,name="starpu_data_invalidate") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: dh + end subroutine fstarpu_data_invalidate + + ! void starpu_data_invalidate_submit(starpu_data_handle_t handle); + subroutine fstarpu_data_invalidate_submit (dh) bind(C,name="starpu_data_invalidate_submit") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: dh + end subroutine fstarpu_data_invalidate_submit + + ! void starpu_data_advise_as_important(starpu_data_handle_t handle, unsigned is_important); + subroutine fstarpu_data_advise_as_important (dh,is_important) bind(C,name="starpu_data_advise_as_important") + use iso_c_binding, only: c_ptr,c_int + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: is_important + end subroutine fstarpu_data_advise_as_important + + ! starpu_data_acquire: see fstarpu_data_acquire + subroutine fstarpu_data_acquire (dh, mode) bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: dh + type(c_ptr), value, intent(in) :: mode ! C function expects an intptr_t + end subroutine fstarpu_data_acquire + + ! int starpu_data_acquire_on_node(starpu_data_handle_t handle, int node, enum starpu_data_access_mode mode); + ! int starpu_data_acquire_cb(starpu_data_handle_t handle, enum starpu_data_access_mode mode, void (*callback)(void *), void *arg); + ! int starpu_data_acquire_on_node_cb(starpu_data_handle_t handle, int node, enum starpu_data_access_mode mode, void (*callback)(void *), void *arg); + ! int starpu_data_acquire_cb_sequential_consistency(starpu_data_handle_t handle, enum starpu_data_access_mode mode, void (*callback)(void *), void *arg, int sequential_consistency); + ! int starpu_data_acquire_on_node_cb_sequential_consistency(starpu_data_handle_t handle, int node, enum starpu_data_access_mode mode, void (*callback)(void *), void *arg, int sequential_consistency); + + ! void starpu_data_release(starpu_data_handle_t handle); + subroutine fstarpu_data_release (dh) bind(C,name="starpu_data_release") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: dh + end subroutine fstarpu_data_release + + ! void starpu_data_release_on_node(starpu_data_handle_t handle, int node); + subroutine fstarpu_data_release_on_node (dh, node) bind(C,name="starpu_data_release_on_node") + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: node + end subroutine fstarpu_data_release_on_node + + ! starpu_arbiter_t starpu_arbiter_create(void) STARPU_ATTRIBUTE_MALLOC; + function fstarpu_arbiter_create () bind(C,name="starpu_arbiter_create") + use iso_c_binding, only: c_ptr + type(c_ptr) :: fstarpu_arbiter_create + end function fstarpu_arbiter_create + + ! void starpu_data_assign_arbiter(starpu_data_handle_t handle, starpu_arbiter_t arbiter); + subroutine fstarpu_data_assign_arbiter (dh,arbiter) bind(C,name="starpu_data_assign_arbiter") + use iso_c_binding, only: c_ptr + type(c_ptr), intent(out) :: dh + type(c_ptr), value, intent(in) :: arbiter + end subroutine fstarpu_data_assign_arbiter + + ! void starpu_arbiter_destroy(starpu_arbiter_t arbiter); + subroutine fstarpu_arbiter_destroy (arbiter) bind(C,name="starpu_arbiter_destroy") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: arbiter + end subroutine fstarpu_arbiter_destroy + + ! void starpu_data_display_memory_stats(); + subroutine fstarpu_display_memory_stats() bind(C,name="starpu_display_memory_stats") + end subroutine fstarpu_display_memory_stats + + ! int starpu_data_request_allocation(starpu_data_handle_t handle, unsigned node); + subroutine fstarpu_data_request_allocation (dh, node) & + bind(C,name="starpu_data_request_allocation") + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: node + end subroutine fstarpu_data_request_allocation + + ! int starpu_data_fetch_on_node(starpu_data_handle_t handle, unsigned node, unsigned async); + subroutine fstarpu_data_fetch_on_node (dh, node, async) & + bind(C,name="starpu_data_fetch_on_node") + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: node + integer(c_int), value, intent(in) :: async + end subroutine fstarpu_data_fetch_on_node + + ! int starpu_data_prefetch_on_node(starpu_data_handle_t handle, unsigned node, unsigned async); + subroutine fstarpu_data_prefetch_on_node (dh, node, async) & + bind(C,name="starpu_data_prefetch_on_node") + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: node + integer(c_int), value, intent(in) :: async + end subroutine fstarpu_data_prefetch_on_node + + ! int starpu_data_prefetch_on_node_prio(starpu_data_handle_t handle, unsigned node, unsigned async, int prio); + subroutine fstarpu_data_prefetch_on_node_prio (dh, node, async, prio) & + bind(C,name="starpu_data_prefetch_on_node_prio") + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: node + integer(c_int), value, intent(in) :: async + integer(c_int), value, intent(in) :: prio + end subroutine fstarpu_data_prefetch_on_node_prio + + ! int starpu_data_idle_prefetch_on_node(starpu_data_handle_t handle, unsigned node, unsigned async); + subroutine fstarpu_data_idle_prefetch_on_node (dh, node, async) & + bind(C,name="starpu_data_idle_prefetch_on_node") + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: node + integer(c_int), value, intent(in) :: async + end subroutine fstarpu_data_idle_prefetch_on_node + + ! int starpu_data_idle_prefetch_on_node_prio(starpu_data_handle_t handle, unsigned node, unsigned async, int prio); + subroutine fstarpu_data_idle_prefetch_on_node_prio (dh, node, async, prio) & + bind(C,name="starpu_data_idle_prefetch_on_node_prio") + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: node + integer(c_int), value, intent(in) :: async + integer(c_int), value, intent(in) :: prio + end subroutine fstarpu_data_idle_prefetch_on_node_prio + + !unsigned starpu_data_is_on_node(starpu_data_handle_t handle, unsigned node); + function fstarpu_data_is_on_node(dh, node) & + bind(C,name="starpu_data_is_on_node") + use iso_c_binding, only: c_ptr, c_int + integer(c_int) :: fstarpu_data_is_on_node + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: node + end function fstarpu_data_is_on_node + + ! void starpu_data_wont_use(starpu_data_handle_t handle); + subroutine fstarpu_data_wont_use (dh) bind(c,name="starpu_data_wont_use") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: dh + end subroutine fstarpu_data_wont_use + + ! unsigned starpu_worker_get_memory_node(unsigned workerid); + function fstarpu_worker_get_memory_node(id) bind(C,name="starpu_worker_get_memory_node") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_worker_get_memory_node + integer(c_int), value, intent(in) :: id + end function fstarpu_worker_get_memory_node + + ! unsigned starpu_memory_nodes_get_count(void); + function fstarpu_memory_nodes_get_count() bind(C,name="starpu_memory_nodes_get_count") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_memory_nodes_get_count + end function fstarpu_memory_nodes_get_count + + ! enum starpu_node_kind starpu_node_get_kind(unsigned node); + ! void starpu_data_set_wt_mask(starpu_data_handle_t handle, uint32_t wt_mask); + ! void starpu_data_set_sequential_consistency_flag(starpu_data_handle_t handle, unsigned flag); + ! unsigned starpu_data_get_sequential_consistency_flag(starpu_data_handle_t handle); + ! unsigned starpu_data_get_default_sequential_consistency_flag(void); + ! void starpu_data_set_default_sequential_consistency_flag(unsigned flag); + ! void starpu_data_query_status(starpu_data_handle_t handle, int memory_node, int *is_allocated, int *is_valid, int *is_requested); + + ! void starpu_data_set_reduction_methods(starpu_data_handle_t handle, struct starpu_codelet *redux_cl, struct starpu_codelet *init_cl); + subroutine fstarpu_data_set_reduction_methods (dh,redux_cl,init_cl) bind(C,name="starpu_data_set_reduction_methods") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: dh + type(c_ptr), value, intent(in) :: redux_cl + type(c_ptr), value, intent(in) :: init_cl + end subroutine fstarpu_data_set_reduction_methods + + ! void starpu_data_set_reduction_methods_with_args(starpu_data_handle_t handle, struct starpu_codelet *redux_cl, void *redux_args, struct starpu_codelet *init_cl, void *init_args) + subroutine fstarpu_data_set_reduction_methods_with_args (dh,redux_cl,redux_args,init_cl,init_args) & + bind(C,name="starpu_data_set_reduction_methods_with_args") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: dh + type(c_ptr), value, intent(in) :: redux_cl + type(c_ptr), value, intent(in) :: redux_args + type(c_ptr), value, intent(in) :: init_cl + type(c_ptr), value, intent(in) :: init_args + end subroutine fstarpu_data_set_reduction_methods_with_args + + ! struct starpu_data_interface_ops* starpu_data_get_interface_ops(starpu_data_handle_t handle); + + ! unsigned starpu_data_test_if_allocated_on_node(starpu_data_handle_t handle, unsigned memory_node); + function fstarpu_data_test_if_allocated_on_node(dh,mem_node) bind(C,name="starpu_data_test_if_allocated_on_node") + use iso_c_binding, only: c_ptr, c_int + integer(c_int) :: fstarpu_data_test_if_allocated_on_node + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: mem_node + end function fstarpu_data_test_if_allocated_on_node + + ! void starpu_memchunk_tidy(unsigned memory_node); + subroutine fstarpu_memchunk_tidy (mem_node) bind(c,name="starpu_memchunk_tidy") + use iso_c_binding, only: c_int + integer(c_int), value, intent(in) :: mem_node + end subroutine fstarpu_memchunk_tidy + + ! == starpu_task_util.h == + + ! starpu_data_handle_t *fstarpu_data_handle_array_alloc(int nb); + function fstarpu_data_handle_array_alloc (nb) bind(C) + use iso_c_binding, only: c_ptr, c_int + type(c_ptr) :: fstarpu_data_handle_array_alloc + integer(c_int), value, intent(in) :: nb + end function fstarpu_data_handle_array_alloc + + ! void fstarpu_data_handle_array_free(starpu_data_handle_t *handles); + subroutine fstarpu_data_handle_array_free (handles) bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: handles + end subroutine fstarpu_data_handle_array_free + + ! void fstarpu_data_handle_array_set(starpu_data_handle_t *handles, int i, starpu_data_handle_t handle); + subroutine fstarpu_data_handle_array_set (handles, i, handle) bind(C) + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: handles + integer(c_int), value, intent(in) :: i + type(c_ptr), value, intent(in) :: handle + end subroutine fstarpu_data_handle_array_set + + ! struct starpu_data_descr *fstarpu_data_descr_array_alloc(int nb); + function fstarpu_data_descr_array_alloc (nb) bind(C) + use iso_c_binding, only: c_ptr, c_int + type(c_ptr) :: fstarpu_data_descr_array_alloc + integer(c_int), value, intent(in) :: nb + end function fstarpu_data_descr_array_alloc + + ! struct starpu_data_descr *fstarpu_data_descr_alloc(void); + function fstarpu_data_descr_alloc () bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr) :: fstarpu_data_descr_alloc + end function fstarpu_data_descr_alloc + + ! void fstarpu_data_descr_array_free(struct starpu_data_descr *descrs); + subroutine fstarpu_data_descr_array_free (descrs) bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: descrs + end subroutine fstarpu_data_descr_array_free + + ! void fstarpu_data_descr_free(struct starpu_data_descr *descr); + subroutine fstarpu_data_descrg_free (descr) bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: descr + end subroutine fstarpu_data_descrg_free + + ! void fstarpu_data_descr_array_set(struct starpu_data_descr *descrs, int i, starpu_data_handle_t handle, intptr_t mode); + subroutine fstarpu_data_descr_array_set (descrs, i, handle, mode) bind(C) + use iso_c_binding, only: c_ptr, c_int, c_intptr_t + type(c_ptr), value, intent(in) :: descrs + integer(c_int), value, intent(in) :: i + type(c_ptr), value, intent(in) :: handle + type(c_ptr), value, intent(in) :: mode ! C func expects c_intptr_t + end subroutine fstarpu_data_descr_array_set + + ! void fstarpu_data_descr_set(struct starpu_data_descr *descr, starpu_data_handle_t handle, intptr_t mode); + subroutine fstarpu_data_descr_set (descr, handle, mode) bind(C) + use iso_c_binding, only: c_ptr, c_intptr_t + type(c_ptr), value, intent(in) :: descr + type(c_ptr), value, intent(in) :: handle + type(c_ptr), value, intent(in) :: mode ! C func expects c_intptr_t + end subroutine fstarpu_data_descr_set + + + subroutine fstarpu_task_insert(arglist) bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr), dimension(*), intent(in) :: arglist + end subroutine fstarpu_task_insert + subroutine fstarpu_insert_task(arglist) bind(C,name="fstarpu_task_insert") + use iso_c_binding, only: c_ptr + type(c_ptr), dimension(*), intent(in) :: arglist + end subroutine fstarpu_insert_task + + subroutine fstarpu_unpack_arg(cl_arg,bufferlist) bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: cl_arg + type(c_ptr), dimension(*), intent(in) :: bufferlist + end subroutine fstarpu_unpack_arg + + ! void starpu_create_sync_task(starpu_tag_t sync_tag, unsigned ndeps, starpu_tag_t *deps, void (*callback)(void *), void *callback_arg) + subroutine fstarpu_create_sync_task(sync_tag, ndeps, tag_array, callback, callback_arg) & + bind(C,name="starpu_create_sync_task") + use iso_c_binding, only: c_int, c_long_long, c_ptr, c_funptr + integer(c_int), value, intent(in) :: sync_tag + integer(c_int), value, intent(in) :: ndeps + integer(c_long_long), intent(in) :: tag_array(*) + type(c_funptr), value, intent(in) :: callback + type(c_ptr), value, intent(in) :: callback_arg + end subroutine fstarpu_create_sync_task + + ! == starpu_sched_ctx.h == + + ! starpu_sched_ctx_create: see fstarpu_sched_ctx_create + function fstarpu_sched_ctx_create(workers_array,nworkers,ctx_name, arglist) bind(C) + use iso_c_binding, only: c_int, c_char, c_ptr + integer(c_int) :: fstarpu_sched_ctx_create + integer(c_int), intent(in) :: workers_array(*) + integer(c_int), value, intent(in) :: nworkers + character(c_char), intent(in) :: ctx_name + type(c_ptr), dimension(*), intent(in) :: arglist + end function fstarpu_sched_ctx_create + + ! unsigned starpu_sched_ctx_create_inside_interval(const char *policy_name, const char *sched_ctx_name, int min_ncpus, int max_ncpus, int min_ngpus, int max_ngpus, unsigned allow_overlap); + function fstarpu_sched_ctx_create_inside_interval(policy_name, sched_ctx_name, & + min_ncpus, max_ncpus, min_ngpus, max_ngpus, allow_overlap) & + bind(C,name="starpu_sched_ctx_create_inside_interval") + use iso_c_binding, only: c_int, c_char + integer(c_int) :: fstarpu_sched_ctx_create_inside_interval + character(c_char), intent(in) :: policy_name + character(c_char), intent(in) :: sched_ctx_name + integer(c_int), value, intent(in) :: min_ncpus + integer(c_int), value, intent(in) :: max_ncpus + integer(c_int), value, intent(in) :: min_ngpus + integer(c_int), value, intent(in) :: max_ngpus + integer(c_int), value, intent(in) :: allow_overlap + end function fstarpu_sched_ctx_create_inside_interval + + ! void starpu_sched_ctx_register_close_callback(unsigned sched_ctx_id, void (*close_callback)(unsigned sched_ctx_id, void* args), void *args); + subroutine fstarpu_sched_ctx_register_close_callback (sched_ctx_id, close_callback, args) & + bind(c,name="starpu_sched_ctx_register_close_callback") + use iso_c_binding, only: c_ptr, c_funptr, c_int + integer(c_int), value, intent(in) :: sched_ctx_id + type(c_funptr), value, intent(in) :: close_callback + type(c_ptr), value, intent(in) :: args + end subroutine fstarpu_sched_ctx_register_close_callback + + ! void starpu_sched_ctx_add_workers(int *workerids_ctx, int nworkers_ctx, unsigned sched_ctx_id); + subroutine fstarpu_sched_ctx_add_workers(workerids,nworkers,ctx) bind(C,name="starpu_sched_ctx_add_workers") + use iso_c_binding, only: c_int + integer(c_int), intent(in) :: workerids (*) + integer(c_int), value, intent(in) :: nworkers + integer(c_int), value, intent(in) :: ctx + end subroutine fstarpu_sched_ctx_add_workers + + ! void starpu_sched_ctx_remove_workers(int *workerids_ctx, int nworkers_ctx, unsigned sched_ctx_id); + subroutine fstarpu_sched_ctx_remove_workers(workerids,nworkers,ctx) bind(C,name="starpu_sched_ctx_remove_workers") + use iso_c_binding, only: c_int + integer(c_int), intent(in) :: workerids (*) + integer(c_int), value, intent(in) :: nworkers + integer(c_int), value, intent(in) :: ctx + end subroutine fstarpu_sched_ctx_remove_workers + + ! starpu_sched_ctx_display_workers: see fstarpu_sched_ctx_display_workers + subroutine fstarpu_sched_ctx_display_workers (ctx) bind(C) + use iso_c_binding, only: c_int + integer(c_int), value, intent(in) :: ctx + end subroutine fstarpu_sched_ctx_display_workers + + ! void starpu_sched_ctx_delete(unsigned sched_ctx_id); + subroutine fstarpu_sched_ctx_delete (ctx) bind(C,name="starpu_sched_ctx_delete") + use iso_c_binding, only: c_int + integer(c_int), value, intent(in) :: ctx + end subroutine fstarpu_sched_ctx_delete + + ! void starpu_sched_ctx_set_inheritor(unsigned sched_ctx_id, unsigned inheritor); + subroutine fstarpu_sched_ctx_set_inheritor (ctx,inheritor) bind(C,name="starpu_sched_ctx_set_inheritor") + use iso_c_binding, only: c_int + integer(c_int), value, intent(in) :: ctx + integer(c_int), value, intent(in) :: inheritor + end subroutine fstarpu_sched_ctx_set_inheritor + + ! unsigned starpu_sched_ctx_get_inheritor(unsigned sched_ctx_id); + function fstarpu_sched_ctx_get_inheritor (ctx) bind(C,name="starpu_sched_ctx_get_inheritor") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_sched_ctx_get_inheritor + integer(c_int), value, intent(in) :: ctx + end function fstarpu_sched_ctx_get_inheritor + + ! unsigned starpu_sched_ctx_get_hierarchy_level(unsigned sched_ctx_id); + function fstarpu_sched_ctx_get_hierarchy_level (ctx) bind(C,name="starpu_sched_ctx_get_hierarchy_level") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_sched_ctx_get_hierarchy_level + integer(c_int), value, intent(in) :: ctx + end function fstarpu_sched_ctx_get_hierarchy_level + + ! void starpu_sched_ctx_set_context(unsigned *sched_ctx_id); + subroutine fstarpu_sched_ctx_set_context (ctx_ptr) bind(C,name="starpu_sched_ctx_set_context") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: ctx_ptr + end subroutine fstarpu_sched_ctx_set_context + + ! unsigned starpu_sched_ctx_get_context(void); + function fstarpu_sched_ctx_get_context () bind(C,name="starpu_sched_ctx_get_context") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_sched_ctx_get_context + end function fstarpu_sched_ctx_get_context + + ! void starpu_sched_ctx_stop_task_submission(void); + subroutine fstarpu_sched_ctx_stop_task_submission () bind(c,name="starpu_sched_ctx_stop_task_submission") + use iso_c_binding + end subroutine fstarpu_sched_ctx_stop_task_submission + + ! void starpu_sched_ctx_finished_submit(unsigned sched_ctx_id); + subroutine fstarpu_sched_ctx_finished_submit (sched_ctx_id) bind(c,name="starpu_sched_ctx_finished_submit") + use iso_c_binding, only: c_int + integer(c_int), value, intent(in) :: sched_ctx_id + end subroutine fstarpu_sched_ctx_finished_submit + + ! unsigned starpu_sched_ctx_get_workers_list(unsigned sched_ctx_id, int **workerids); + ! unsigned starpu_sched_ctx_get_workers_list_raw(unsigned sched_ctx_id, int **workerids); + + ! unsigned starpu_sched_ctx_get_nworkers(unsigned sched_ctx_id); + function fstarpu_sched_ctx_get_nworkers (sched_ctx_id) & + bind(c,name="starpu_sched_ctx_get_nworkers") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_sched_ctx_get_nworkers + integer(c_int), value, intent(in) :: sched_ctx_id + end function fstarpu_sched_ctx_get_nworkers + + ! unsigned starpu_sched_ctx_get_nshared_workers(unsigned sched_ctx_id, unsigned sched_ctx_id2); + function fstarpu_sched_ctx_get_nshared_workers (sched_ctx_id, sched_ctx_id2) & + bind(c,name="starpu_sched_ctx_get_nshared_workers") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_sched_ctx_get_nshared_workers + integer(c_int), value, intent(in) :: sched_ctx_id + integer(c_int), value, intent(in) :: sched_ctx_id2 + end function fstarpu_sched_ctx_get_nshared_workers + + ! unsigned starpu_sched_ctx_contains_worker(int workerid, unsigned sched_ctx_id); + function fstarpu_sched_ctx_contains_worker (workerid, sched_ctx_id) & + bind(c,name="starpu_sched_ctx_contains_worker") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_sched_ctx_contains_worker + integer(c_int), value, intent(in) :: workerid + integer(c_int), value, intent(in) :: sched_ctx_id + end function fstarpu_sched_ctx_contains_worker + + ! unsigned starpu_sched_ctx_contains_type_of_worker(enum starpu_worker_archtype arch, unsigned sched_ctx_id); + function fstarpu_sched_ctx_contains_type_of_worker (arch, sched_ctx_id) & + bind(c,name="starpu_sched_ctx_contains_type_of_worker") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_sched_ctx_contains_type_of_worker + integer(c_int), value, intent(in) :: arch + integer(c_int), value, intent(in) :: sched_ctx_id + end function fstarpu_sched_ctx_contains_type_of_worker + + ! unsigned starpu_sched_ctx_worker_get_id(unsigned sched_ctx_id); + function fstarpu_sched_ctx_worker_get_id (sched_ctx_id) & + bind(c,name="starpu_sched_ctx_worker_get_id") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_sched_ctx_worker_get_id + integer(c_int), value, intent(in) :: sched_ctx_id + end function fstarpu_sched_ctx_worker_get_id + + ! unsigned starpu_sched_ctx_get_ctx_for_task(struct starpu_task *task); + function fstarpu_sched_ctx_get_ctx_for_task (task) & + bind(c,name="starpu_sched_ctx_get_ctx_for_task") + use iso_c_binding, only: c_int, c_ptr + integer(c_int) :: fstarpu_sched_ctx_get_ctx_for_task + type(c_ptr), value, intent(in) :: task + end function fstarpu_sched_ctx_get_ctx_for_task + + ! unsigned starpu_sched_ctx_overlapping_ctxs_on_worker(int workerid); + function fstarpu_sched_ctx_overlapping_ctxs_on_worker (workerid) & + bind(c,name="starpu_sched_ctx_overlapping_ctxs_on_worker") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_sched_ctx_overlapping_ctxs_on_worker + integer(c_int), value, intent(in) :: workerid + end function fstarpu_sched_ctx_overlapping_ctxs_on_worker + + ! int starpu_sched_get_min_priority(void); + function fstarpu_sched_get_min_priority () & + bind(c,name="starpu_sched_get_min_priority") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_sched_get_min_priority + end function fstarpu_sched_get_min_priority + + ! int starpu_sched_get_max_priority(void); + function fstarpu_sched_get_max_priority () & + bind(c,name="starpu_sched_get_max_priority") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_sched_get_max_priority + end function fstarpu_sched_get_max_priority + + ! int starpu_sched_set_min_priority(int min_prio); + function fstarpu_sched_set_min_priority (min_prio) & + bind(c,name="starpu_sched_set_min_priority") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_sched_set_min_priority + integer(c_int), value, intent(in) :: min_prio + end function fstarpu_sched_set_min_priority + + ! int starpu_sched_set_max_priority(int max_prio); + function fstarpu_sched_set_max_priority (max_prio) & + bind(c,name="starpu_sched_set_max_priority") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_sched_set_max_priority + integer(c_int), value, intent(in) :: max_prio + end function fstarpu_sched_set_max_priority + + ! int starpu_sched_ctx_get_min_priority(unsigned sched_ctx_id); + function fstarpu_sched_ctx_get_min_priority (sched_ctx_id) & + bind(c,name="starpu_sched_ctx_get_min_priority") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_sched_ctx_get_min_priority + integer(c_int), value, intent(in) :: sched_ctx_id + end function fstarpu_sched_ctx_get_min_priority + + ! int starpu_sched_ctx_get_max_priority(unsigned sched_ctx_id); + function fstarpu_sched_ctx_get_max_priority (sched_ctx_id) & + bind(c,name="starpu_sched_ctx_get_max_priority") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_sched_ctx_get_max_priority + integer(c_int), value, intent(in) :: sched_ctx_id + end function fstarpu_sched_ctx_get_max_priority + + ! int starpu_sched_ctx_set_min_priority(unsigned sched_ctx_id, int min_prio); + function fstarpu_sched_ctx_set_min_priority (sched_ctx_id, min_prio) & + bind(c,name="starpu_sched_ctx_set_min_priority") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_sched_ctx_set_min_priority + integer(c_int), value, intent(in) :: sched_ctx_id + integer(c_int), value, intent(in) :: min_prio + end function fstarpu_sched_ctx_set_min_priority + + ! int starpu_sched_ctx_set_max_priority(unsigned sched_ctx_id, int max_prio); + function fstarpu_sched_ctx_set_max_priority (sched_ctx_id, max_prio) & + bind(c,name="starpu_sched_ctx_set_max_priority") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_sched_ctx_set_max_priority + integer(c_int), value, intent(in) :: sched_ctx_id + integer(c_int), value, intent(in) :: max_prio + end function fstarpu_sched_ctx_set_max_priority + + ! int starpu_sched_ctx_min_priority_is_set(unsigned sched_ctx_id); + function fstarpu_sched_ctx_min_priority_is_set (sched_ctx_id) & + bind(c,name="starpu_sched_ctx_min_priority_is_set") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_sched_ctx_min_priority_is_set + integer(c_int), value, intent(in) :: sched_ctx_id + end function fstarpu_sched_ctx_min_priority_is_set + + ! int starpu_sched_ctx_max_priority_is_set(unsigned sched_ctx_id); + function fstarpu_sched_ctx_max_priority_is_set (sched_ctx_id) & + bind(c,name="starpu_sched_ctx_max_priority_is_set") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_sched_ctx_max_priority_is_set + integer(c_int), value, intent(in) :: sched_ctx_id + end function fstarpu_sched_ctx_max_priority_is_set + + ! void *starpu_sched_ctx_get_user_data(unsigned sched_ctx_id); + function fstarpu_sched_ctx_get_user_data(sched_ctx_id) & + bind(c,name="starpu_sched_ctx_get_user_data") + use iso_c_binding, only: c_int, c_ptr + integer(c_int), value, intent(in) :: sched_ctx_id + type(c_ptr) :: fstarpu_sched_ctx_get_user_data + end function fstarpu_sched_ctx_get_user_data + + ! struct starpu_worker_collection *starpu_sched_ctx_create_worker_collection(unsigned sched_ctx_id, enum starpu_worker_collection_type type) STARPU_ATTRIBUTE_MALLOC; + + ! void starpu_sched_ctx_delete_worker_collection(unsigned sched_ctx_id); + subroutine fstarpu_sched_ctx_delete_worker_collection (sched_ctx_id) & + bind(c,name="starpu_sched_ctx_delete_worker_collection") + use iso_c_binding, only: c_int + integer(c_int), value, intent(in) :: sched_ctx_id + end subroutine fstarpu_sched_ctx_delete_worker_collection + + ! struct starpu_worker_collection *starpu_sched_ctx_get_worker_collection(unsigned sched_ctx_id); + + ! void starpu_sched_ctx_set_policy_data(unsigned sched_ctx_id, void *policy_data); + subroutine fstarpu_sched_ctx_set_policy_data (sched_ctx_id, policy_data) & + bind(c,name="starpu_sched_ctx_set_policy_data") + use iso_c_binding, only: c_int, c_ptr + integer(c_int), value, intent(in) :: sched_ctx_id + type(c_ptr), value, intent(in) :: policy_data + end subroutine fstarpu_sched_ctx_set_policy_data + + ! void *starpu_sched_ctx_get_policy_data(unsigned sched_ctx_id); + function fstarpu_sched_ctx_get_policy_data (sched_ctx_id) & + bind(c,name="starpu_sched_ctx_get_policy_data") + use iso_c_binding, only: c_int, c_ptr + type(c_ptr) :: fstarpu_sched_ctx_get_policy_data + integer(c_int), value, intent(in) :: sched_ctx_id + end function fstarpu_sched_ctx_get_policy_data + + ! void *starpu_sched_ctx_exec_parallel_code(void* (*func)(void*), void *param, unsigned sched_ctx_id); + function fstarpu_sched_ctx_exec_parallel_code (func, param, sched_ctx_id) & + bind(c,name="starpu_sched_ctx_exec_parallel_code") + use iso_c_binding, only: c_int, c_funptr, c_ptr + type(c_ptr) :: fstarpu_sched_ctx_exec_parallel_code + type(c_funptr), value, intent(in) :: func + type(c_ptr), value, intent(in) :: param + integer(c_int), value, intent(in) :: sched_ctx_id + end function fstarpu_sched_ctx_exec_parallel_code + + + ! int starpu_sched_ctx_get_nready_tasks(unsigned sched_ctx_id); + function fstarpu_sched_ctx_get_nready_tasks (sched_ctx_id) & + bind(c,name="starpu_sched_ctx_get_nready_tasks") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_sched_ctx_get_nready_tasks + integer(c_int), value, intent(in) :: sched_ctx_id + end function fstarpu_sched_ctx_get_nready_tasks + + ! double starpu_sched_ctx_get_nready_flops(unsigned sched_ctx_id); + function fstarpu_sched_ctx_get_nready_flops (sched_ctx_id) & + bind(c,name="starpu_sched_ctx_get_nready_flops") + use iso_c_binding, only: c_int, c_double + real(c_double) :: fstarpu_sched_ctx_get_nready_flops + integer(c_int), value, intent(in) :: sched_ctx_id + end function fstarpu_sched_ctx_get_nready_flops + + ! void starpu_sched_ctx_list_task_counters_increment(unsigned sched_ctx_id, int workerid); + subroutine fstarpu_sched_ctx_list_task_counters_increment (sched_ctx_id, workerid) & + bind(c,name="starpu_sched_ctx_list_task_counters_increment") + use iso_c_binding, only: c_int + integer(c_int), value, intent(in) :: sched_ctx_id + integer(c_int), value, intent(in) :: workerid + end subroutine fstarpu_sched_ctx_list_task_counters_increment + + ! void starpu_sched_ctx_list_task_counters_decrement(unsigned sched_ctx_id, int workerid); + subroutine fstarpu_sched_ctx_list_task_counters_decrement (sched_ctx_id, workerid) & + bind(c,name="starpu_sched_ctx_list_task_counters_decrement") + use iso_c_binding, only: c_int + integer(c_int), value, intent(in) :: sched_ctx_id + integer(c_int), value, intent(in) :: workerid + + end subroutine fstarpu_sched_ctx_list_task_counters_decrement + + ! void starpu_sched_ctx_list_task_counters_reset(unsigned sched_ctx_id, int workerid); + subroutine fstarpu_sched_ctx_list_task_counters_reset (sched_ctx_id, workerid) & + bind(c,name="starpu_sched_ctx_list_task_counters_reset") + use iso_c_binding, only: c_int + integer(c_int), value, intent(in) :: sched_ctx_id + integer(c_int), value, intent(in) :: workerid + + end subroutine fstarpu_sched_ctx_list_task_counters_reset + + ! void starpu_sched_ctx_list_task_counters_increment_all(struct starpu_task *task, unsigned sched_ctx_id); + subroutine fstarpu_sched_ctx_list_task_counters_increment_all (task, sched_ctx_id) & + bind(c,name="starpu_sched_ctx_list_task_counters_increment_all") + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: task + integer(c_int), value, intent(in) :: sched_ctx_id + end subroutine fstarpu_sched_ctx_list_task_counters_increment_all + + ! void starpu_sched_ctx_list_task_counters_decrement_all(struct starpu_task *task, unsigned sched_ctx_id); + subroutine fstarpu_sched_ctx_list_task_counters_decrement_all (task, sched_ctx_id) & + bind(c,name="starpu_sched_ctx_list_task_counters_decrement_all") + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: task + integer(c_int), value, intent(in) :: sched_ctx_id + end subroutine fstarpu_sched_ctx_list_task_counters_decrement_all + + ! void starpu_sched_ctx_list_task_counters_reset_all(struct starpu_task *task, unsigned sched_ctx_id); + subroutine fstarpu_sched_ctx_list_task_counters_reset_all (task, sched_ctx_id) & + bind(c,name="starpu_sched_ctx_list_task_counters_reset_all") + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: task + integer(c_int), value, intent(in) :: sched_ctx_id + end subroutine fstarpu_sched_ctx_list_task_counters_reset_all + + ! unsigned starpu_sched_ctx_get_priority(int worker, unsigned sched_ctx_id); + function fstarpu_sched_ctx_get_priority (worker, sched_ctx_id) & + bind(c,name="starpu_sched_ctx_get_priority") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_sched_ctx_get_priority + integer(c_int), value, intent(in) :: worker + integer(c_int), value, intent(in) :: sched_ctx_id + end function fstarpu_sched_ctx_get_priority + + ! void starpu_sched_ctx_get_available_cpuids(unsigned sched_ctx_id, int **cpuids, int *ncpuids); + + ! void starpu_sched_ctx_bind_current_thread_to_cpuid(unsigned cpuid); + subroutine fstarpu_sched_ctx_bind_current_thread_to_cpuid (cpuid) & + bind(c,name="starpu_sched_ctx_bind_current_thread_to_cpuid") + use iso_c_binding, only: c_int + integer(c_int), value, intent(in) :: cpuid + end subroutine fstarpu_sched_ctx_bind_current_thread_to_cpuid + + ! int starpu_sched_ctx_book_workers_for_task(unsigned sched_ctx_id, int *workerids, int nworkers); + function fstarpu_sched_ctx_book_workers_for_task (sched_ctx_id, workerids, nworkers) & + bind(c,name="starpu_sched_ctx_book_workers_for_task") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_sched_ctx_book_workers_for_task + integer(c_int), value, intent(in) :: sched_ctx_id + integer(c_int), intent(in) :: workerids(*) + integer(c_int), value, intent(in) :: nworkers + end function fstarpu_sched_ctx_book_workers_for_task + + ! void starpu_sched_ctx_unbook_workers_for_task(unsigned sched_ctx_id, int master); + subroutine fstarpu_sched_ctx_unbook_workers_for_task (sched_ctx_id, master) & + bind(c,name="starpu_sched_ctx_unbook_workers_for_task") + use iso_c_binding, only: c_int + integer(c_int), value, intent(in) :: sched_ctx_id + integer(c_int), value, intent(in) :: master + end subroutine fstarpu_sched_ctx_unbook_workers_for_task + + ! unsigned starpu_sched_ctx_worker_is_master_for_child_ctx(int workerid, unsigned sched_ctx_id); + function fstarpu_sched_ctx_worker_is_master_for_child_ctx (workerid, sched_ctx_id) & + bind(c,name="starpu_sched_ctx_worker_is_master_for_child_ctx") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_sched_ctx_worker_is_master_for_child_ctx + integer(c_int), value, intent(in) :: workerid + integer(c_int), value, intent(in) :: sched_ctx_id + end function fstarpu_sched_ctx_worker_is_master_for_child_ctx + + ! unsigned starpu_sched_ctx_master_get_context(int masterid); + function fstarpu_sched_ctx_master_get_context (masterid) & + bind(c,name="starpu_sched_ctx_master_get_context") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_sched_ctx_master_get_context + integer(c_int), value, intent(in) :: masterid + end function fstarpu_sched_ctx_master_get_context + + ! void starpu_sched_ctx_revert_task_counters(unsigned sched_ctx_id, double flops); + subroutine fstarpu_sched_ctx_revert_task_counters (sched_ctx_id, flops) & + bind(c,name="starpu_sched_ctx_revert_task_counters") + use iso_c_binding, only: c_int, c_double + integer(c_int), value, intent(in) :: sched_ctx_id + real(c_double), value, intent(in) :: flops + end subroutine fstarpu_sched_ctx_revert_task_counters + + ! void starpu_sched_ctx_move_task_to_ctx(struct starpu_task *task, unsigned sched_ctx, unsigned manage_mutex); + subroutine fstarpu_sched_ctx_move_task_to_ctx (task, sched_ctx, manage_mutex) & + bind(c,name="starpu_sched_ctx_move_task_to_ctx") + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: task + integer(c_int), value, intent(in) :: sched_ctx + integer(c_int), value, intent(in) :: manage_mutex + end subroutine fstarpu_sched_ctx_move_task_to_ctx + + ! int starpu_sched_ctx_get_worker_rank(unsigned sched_ctx_id); + function fstarpu_sched_ctx_get_worker_rank (sched_ctx_id) & + bind(c,name="starpu_sched_ctx_get_worker_rank") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_sched_ctx_get_worker_rank + integer(c_int), value, intent(in) :: sched_ctx_id + end function fstarpu_sched_ctx_get_worker_rank + + ! unsigned starpu_sched_ctx_has_starpu_scheduler(unsigned sched_ctx_id, unsigned *awake_workers); + + ! void starpu_sched_ctx_call_pushed_task_cb(int workerid, unsigned sched_ctx_id); + subroutine fstarpu_sched_ctx_call_pushed_task_cb (workerid, sched_ctx_id) & + bind(c,name="starpu_sched_ctx_call_pushed_task_cb") + use iso_c_binding, only: c_int + integer(c_int), value, intent(in) :: workerid + integer(c_int), value, intent(in) :: sched_ctx_id + end subroutine fstarpu_sched_ctx_call_pushed_task_cb + + ! == starpu_fxt.h == + + ! void starpu_fxt_options_init(struct starpu_fxt_options *options); + subroutine fstarpu_fxt_options_init (fxt_options) bind(C,name="starpu_fxt_options_init") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: fxt_options + end subroutine fstarpu_fxt_options_init + + ! void starpu_fxt_generate_trace(struct starpu_fxt_options *options); + subroutine fstarpu_fxt_generate_trace (fxt_options) bind(C,name="starpu_fxt_generate_trace") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: fxt_options + end subroutine fstarpu_fxt_generate_trace + + ! void starpu_fxt_autostart_profiling(int autostart); + subroutine fstarpu_fxt_autostart_profiling (autostart) bind(c,name="starpu_fxt_autostart_profiling") + use iso_c_binding, only: c_int + integer(c_int), value, intent(in) :: autostart + end subroutine fstarpu_fxt_autostart_profiling + + ! void starpu_fxt_start_profiling(void); + subroutine fstarpu_fxt_start_profiling () bind(c,name="starpu_fxt_start_profiling") + use iso_c_binding + end subroutine fstarpu_fxt_start_profiling + + ! void starpu_fxt_stop_profiling(void); + subroutine fstarpu_fxt_stop_profiling () bind(c,name="starpu_fxt_stop_profiling") + use iso_c_binding + end subroutine fstarpu_fxt_stop_profiling + + ! void starpu_fxt_write_data_trace(char *filename_in); + subroutine fstarpu_fxt_write_data_trace (filename) bind(c,name="starpu_fxt_write_data_trace") + use iso_c_binding, only: c_char + character(c_char), intent(in) :: filename + end subroutine fstarpu_fxt_write_data_trace + + ! void starpu_fxt_trace_user_event(unsigned long code); + subroutine fstarpu_trace_user_event (code) bind(c,name="starpu_trace_user_event") + use iso_c_binding, only: c_long + integer(c_long), value, intent(in) :: code + end subroutine fstarpu_trace_user_event + + ! double starpu_timing_now(void) + function fstarpu_timing_now () bind(C,name="starpu_timing_now") + use iso_c_binding, only: c_double + real(c_double) :: fstarpu_timing_now + end function fstarpu_timing_now + + ! == starpu_cuda.h == + + ! cudaStream_t starpu_cuda_get_local_stream(void); + function fstarpu_cuda_get_local_stream () bind(C,name="starpu_cuda_get_local_stream") + use iso_c_binding, only: c_ptr + type(c_ptr) :: fstarpu_cuda_get_local_stream + end function fstarpu_cuda_get_local_stream + + ! == starpu_stdlib.h == + + ! int starpu_malloc(void **A, size_t dim); + function fstarpu_malloc (ptr, len) bind(C,name="starpu_malloc") + use iso_c_binding, only: c_ptr, c_long, c_int + type(c_ptr), intent(out) :: ptr + integer(c_long), value, intent(in) :: len + integer(c_int) :: fstarpu_malloc + end function fstarpu_malloc + + ! int starpu_free_noflag(void *A, size_t dim); + function fstarpu_free_noflag (ptr, len) bind(C,name="starpu_free_noflag") + use iso_c_binding, only: c_ptr, c_long, c_int + type(c_ptr), value, intent(in) :: ptr + integer(c_long), value, intent(in) :: len + integer(c_int) :: fstarpu_free_noflag + end function fstarpu_free_noflag + + ! int starpu_memory_pin(void *addr, size_t size); + function fstarpu_memory_pin (ptr, len) bind(C,name="starpu_memory_pin") + use iso_c_binding, only: c_ptr, c_long, c_int + type(c_ptr), value, intent(in) :: ptr + integer(c_long), value, intent(in) :: len + integer(c_int) :: fstarpu_memory_pin + end function fstarpu_memory_pin + + ! int starpu_memory_unpin(void *addr, size_t size); + function fstarpu_memory_unpin (ptr, len) bind(C,name="starpu_memory_unpin") + use iso_c_binding, only: c_ptr, c_long, c_int + type(c_ptr), value, intent(in) :: ptr + integer(c_long), value, intent(in) :: len + integer(c_int) :: fstarpu_memory_unpin + end function fstarpu_memory_unpin + + ! int starpu_sleep(float nb_sec); + subroutine fstarpu_sleep (nb_sec) bind(C,name="starpu_sleep") + use iso_c_binding, only: c_float + real(c_float), value, intent(in) :: nb_sec + end subroutine fstarpu_sleep + + ! int starpu_usleep(float nb_sec); + subroutine fstarpu_usleep (nb_sec) bind(C,name="starpu_usleep") + use iso_c_binding, only: c_float + real(c_float), value, intent(in) :: nb_sec + end subroutine fstarpu_usleep + + ! void starpu_cublas_init(void); + subroutine fstarpu_cublas_init () bind(C,name="starpu_cublas_init") + end subroutine fstarpu_cublas_init + + ! void starpu_cublas_shutdown(void); + subroutine fstarpu_cublas_shutdown () bind(C,name="starpu_cublas_shutdown") + end subroutine fstarpu_cublas_shutdown + + end interface + + contains + function or_cptrs(op1,op2) + type(c_ptr) :: or_cptrs + type(c_ptr),intent(in) :: op1,op2 + integer(c_intptr_t) :: i_op1,i_op2 + i_op1 = transfer(op1,0_c_intptr_t) + i_op2 = transfer(op2,0_c_intptr_t) + or_cptrs = transfer(ior(i_op1,i_op2), C_NULL_PTR) + end function + + function ip_to_p(i) bind(C) + use iso_c_binding, only: c_ptr,c_intptr_t,C_NULL_PTR + type(c_ptr) :: ip_to_p + integer(c_intptr_t), value, intent(in) :: i + ip_to_p = transfer(i,C_NULL_PTR) + end function ip_to_p + + function p_to_ip(p) bind(C) + use iso_c_binding, only: c_ptr,c_intptr_t + integer(c_intptr_t) :: p_to_ip + type(c_ptr), value, intent(in) :: p + p_to_ip = transfer(p,0_c_intptr_t) + end function p_to_ip + + function sz_to_p(sz) bind(C) + use iso_c_binding, only: c_ptr,c_size_t,c_intptr_t + type(c_ptr) :: sz_to_p + integer(c_size_t), value, intent(in) :: sz + sz_to_p = ip_to_p(int(sz,kind=c_intptr_t)) + end function sz_to_p + + function fstarpu_init (conf) bind(C) + use iso_c_binding + integer(c_int) :: fstarpu_init + type(c_ptr), value, intent(in) :: conf + + real(c_double) :: FSTARPU_SZ_C_DOUBLE_dummy + real(c_float) :: FSTARPU_SZ_C_FLOAT_dummy + character(c_char) :: FSTARPU_SZ_C_CHAR_dummy + integer(c_int) :: FSTARPU_SZ_C_INT_dummy + integer(c_intptr_t) :: FSTARPU_SZ_C_INTPTR_T_dummy + type(c_ptr) :: FSTARPU_SZ_C_PTR_dummy + integer(c_size_t) :: FSTARPU_SZ_C_SIZE_T_dummy + + character :: FSTARPU_SZ_CHARACTER_dummy + + integer :: FSTARPU_SZ_INTEGER_dummy + integer(4) :: FSTARPU_SZ_INT4_dummy + integer(8) :: FSTARPU_SZ_INT8_dummy + + real :: FSTARPU_SZ_REAL_dummy + real(4) :: FSTARPU_SZ_REAL4_dummy + real(8) :: FSTARPU_SZ_REAL8_dummy + + double precision :: FSTARPU_SZ_DOUBLE_PRECISION_dummy + + complex :: FSTARPU_SZ_COMPLEX_dummy + complex(4) :: FSTARPU_SZ_COMPLEX4_dummy + complex(8) :: FSTARPU_SZ_COMPLEX8_dummy + + ! Note: Referencing global C constants from Fortran has + ! been found unreliable on some architectures, notably + ! on Darwin. The get_integer/get_pointer_constant + ! scheme is a workaround to that issue. + + interface + ! These functions are not exported to the end user + function fstarpu_get_constant(s) bind(C) + use iso_c_binding, only: c_ptr,c_char + type(c_ptr) :: fstarpu_get_constant ! C function returns an intptr_t + character(kind=c_char) :: s + end function fstarpu_get_constant + + function fstarpu_init_internal (conf) bind(C,name="starpu_init") + use iso_c_binding, only: c_ptr,c_int + integer(c_int) :: fstarpu_init_internal + type(c_ptr), value :: conf + end function fstarpu_init_internal + + end interface + + ! Initialize Fortran constants from C peers + FSTARPU_R = fstarpu_get_constant(C_CHAR_"FSTARPU_R"//C_NULL_CHAR) + FSTARPU_W = fstarpu_get_constant(C_CHAR_"FSTARPU_W"//C_NULL_CHAR) + FSTARPU_RW = fstarpu_get_constant(C_CHAR_"FSTARPU_RW"//C_NULL_CHAR) + FSTARPU_SCRATCH = fstarpu_get_constant(C_CHAR_"FSTARPU_SCRATCH"//C_NULL_CHAR) + FSTARPU_REDUX = fstarpu_get_constant(C_CHAR_"FSTARPU_REDUX"//C_NULL_CHAR) + FSTARPU_MPI_REDUX = fstarpu_get_constant(C_CHAR_"FSTARPU_MPI_REDUX"//C_NULL_CHAR) + FSTARPU_COMMUTE = fstarpu_get_constant(C_CHAR_"FSTARPU_COMMUTE"//C_NULL_CHAR) + FSTARPU_SSEND = fstarpu_get_constant(C_CHAR_"FSTARPU_SSEND"//C_NULL_CHAR) + FSTARPU_LOCALITY = fstarpu_get_constant(C_CHAR_"FSTARPU_LOCALITY"//C_NULL_CHAR) + + FSTARPU_DATA_ARRAY = fstarpu_get_constant(C_CHAR_"FSTARPU_DATA_ARRAY"//C_NULL_CHAR) + FSTARPU_DATA_MODE_ARRAY = fstarpu_get_constant(C_CHAR_"FSTARPU_DATA_MODE_ARRAY"//C_NULL_CHAR) + FSTARPU_CL_ARGS = fstarpu_get_constant(C_CHAR_"FSTARPU_CL_ARGS"//C_NULL_CHAR) + FSTARPU_CL_ARGS_NFREE = fstarpu_get_constant(C_CHAR_"FSTARPU_CL_ARGS_NFREE"//C_NULL_CHAR) + FSTARPU_TASK_DEPS_ARRAY = fstarpu_get_constant(C_CHAR_"FSTARPU_TASK_DEPS_ARRAY"//C_NULL_CHAR) + FSTARPU_CALLBACK = fstarpu_get_constant(C_CHAR_"FSTARPU_CALLBACK"//C_NULL_CHAR) + FSTARPU_CALLBACK_WITH_ARG = fstarpu_get_constant(C_CHAR_"FSTARPU_CALLBACK_WITH_ARG"//C_NULL_CHAR) + FSTARPU_CALLBACK_WITH_ARG_NFREE = & + fstarpu_get_constant(C_CHAR_"FSTARPU_CALLBACK_WITH_ARG_NFREE"//C_NULL_CHAR) + FSTARPU_CALLBACK_ARG = fstarpu_get_constant(C_CHAR_"FSTARPU_CALLBACK_ARG"//C_NULL_CHAR) + FSTARPU_CALLBACK_ARG_NFREE = fstarpu_get_constant(C_CHAR_"FSTARPU_CALLBACK_ARG_NFREE"//C_NULL_CHAR) + FSTARPU_PROLOGUE_CALLBACK = fstarpu_get_constant(C_CHAR_"FSTARPU_PROLOGUE_CALLBACK"//C_NULL_CHAR) + FSTARPU_PROLOGUE_CALLBACK_ARG = fstarpu_get_constant(C_CHAR_"FSTARPU_PROLOGUE_CALLBACK_ARG"//C_NULL_CHAR) + FSTARPU_PROLOGUE_CALLBACK_ARG_NFREE = & + fstarpu_get_constant(C_CHAR_"FSTARPU_PROLOGUE_CALLBACK_ARG_NFREE"//C_NULL_CHAR) + FSTARPU_PROLOGUE_CALLBACK_POP = fstarpu_get_constant(C_CHAR_"FSTARPU_PROLOGUE_CALLBACK_POP"//C_NULL_CHAR) + FSTARPU_PROLOGUE_CALLBACK_POP_ARG = & + fstarpu_get_constant(C_CHAR_"FSTARPU_PROLOGUE_CALLBACK_POP_ARG"//C_NULL_CHAR) + FSTARPU_PROLOGUE_CALLBACK_POP_ARG_NFREE = & + fstarpu_get_constant(C_CHAR_"FSTARPU_PROLOGUE_CALLBACK_POP_ARG_NFREE"//C_NULL_CHAR) + FSTARPU_PRIORITY = fstarpu_get_constant(C_CHAR_"FSTARPU_PRIORITY"//C_NULL_CHAR) + FSTARPU_EXECUTE_ON_NODE = fstarpu_get_constant(C_CHAR_"FSTARPU_EXECUTE_ON_NODE"//C_NULL_CHAR) + FSTARPU_EXECUTE_ON_DATA = fstarpu_get_constant(C_CHAR_"FSTARPU_EXECUTE_ON_DATA"//C_NULL_CHAR) + FSTARPU_EXECUTE_ON_WORKER = fstarpu_get_constant(C_CHAR_"FSTARPU_EXECUTE_ON_WORKER"//C_NULL_CHAR) + FSTARPU_WORKER_ORDER = fstarpu_get_constant(C_CHAR_"FSTARPU_WORKER_ORDER"//C_NULL_CHAR) + FSTARPU_EXECUTE_WHERE = fstarpu_get_constant(C_CHAR_"FSTARPU_EXECUTE_WHERE"//C_NULL_CHAR) + FSTARPU_HYPERVISOR_TAG = fstarpu_get_constant(C_CHAR_"FSTARPU_HYPERVISOR_TAG"//C_NULL_CHAR) + FSTARPU_POSSIBLY_PARALLEL = fstarpu_get_constant(C_CHAR_"FSTARPU_POSSIBLY_PARALLEL"//C_NULL_CHAR) + FSTARPU_FLOPS = fstarpu_get_constant(C_CHAR_"FSTARPU_FLOPS"//C_NULL_CHAR) + FSTARPU_TAG = fstarpu_get_constant(C_CHAR_"FSTARPU_TAG"//C_NULL_CHAR) + FSTARPU_TAG_ONLY = fstarpu_get_constant(C_CHAR_"FSTARPU_TAG_ONLY"//C_NULL_CHAR) + FSTARPU_NAME = fstarpu_get_constant(C_CHAR_"FSTARPU_NAME"//C_NULL_CHAR) + FSTARPU_NODE_SELECTION_POLICY = fstarpu_get_constant(C_CHAR_"FSTARPU_NODE_SELECTION_POLICY"//C_NULL_CHAR) + FSTARPU_TASK_SCHED_DATA = fstarpu_get_constant(C_CHAR_"FSTARPU_TASK_SCHED_DATA"//C_NULL_CHAR) + + FSTARPU_VALUE = fstarpu_get_constant(C_CHAR_"FSTARPU_VALUE"//C_NULL_CHAR) + FSTARPU_SCHED_CTX = fstarpu_get_constant(C_CHAR_"FSTARPU_SCHED_CTX"//C_NULL_CHAR) + FSTARPU_CPU_WORKER = fstarpu_get_constant(C_CHAR_"FSTARPU_CPU_WORKER"//C_NULL_CHAR) + FSTARPU_CUDA_WORKER = fstarpu_get_constant(C_CHAR_"FSTARPU_CUDA_WORKER"//C_NULL_CHAR) + FSTARPU_OPENCL_WORKER = fstarpu_get_constant(C_CHAR_"FSTARPU_OPENCL_WORKER"//C_NULL_CHAR) + FSTARPU_ANY_WORKER = fstarpu_get_constant(C_CHAR_"FSTARPU_ANY_WORKER"//C_NULL_CHAR) + + FSTARPU_NMAXBUFS = int(p_to_ip(fstarpu_get_constant(C_CHAR_"FSTARPU_NMAXBUFS"//C_NULL_CHAR)),c_int) + + FSTARPU_SCHED_CTX_POLICY_NAME = & + fstarpu_get_constant(C_CHAR_"FSTARPU_SCHED_CTX_POLICY_NAME"//C_NULL_CHAR) + FSTARPU_SCHED_CTX_POLICY_STRUCT = & + fstarpu_get_constant(C_CHAR_"FSTARPU_SCHED_CTX_POLICY_STRUCT"//C_NULL_CHAR) + FSTARPU_SCHED_CTX_POLICY_MIN_PRIO = & + fstarpu_get_constant(C_CHAR_"FSTARPU_SCHED_CTX_POLICY_MIN_PRIO"//C_NULL_CHAR) + FSTARPU_SCHED_CTX_POLICY_MAX_PRIO = & + fstarpu_get_constant(C_CHAR_"FSTARPU_SCHED_CTX_POLICY_MAX_PRIO"//C_NULL_CHAR) + FSTARPU_SCHED_CTX_HIERARCHY_LEVEL = & + fstarpu_get_constant(C_CHAR_"FSTARPU_SCHED_CTX_HIERARCHY_LEVEL"//C_NULL_CHAR) + FSTARPU_SCHED_CTX_NESTED = & + fstarpu_get_constant(C_CHAR_"FSTARPU_SCHED_CTX_NESTED"//C_NULL_CHAR) + FSTARPU_SCHED_CTX_AWAKE_WORKERS = & + fstarpu_get_constant(C_CHAR_"FSTARPU_SCHED_CTX_AWAKE_WORKERS"//C_NULL_CHAR) + FSTARPU_SCHED_CTX_POLICY_INIT = & + fstarpu_get_constant(C_CHAR_"FSTARPU_SCHED_CTX_POLICY_INIT"//C_NULL_CHAR) + FSTARPU_SCHED_CTX_USER_DATA = & + fstarpu_get_constant(C_CHAR_"FSTARPU_SCHED_CTX_USER_DATA"//C_NULL_CHAR) + + FSTARPU_NOWHERE = & + fstarpu_get_constant(C_CHAR_"FSTARPU_NOWHERE"//C_NULL_CHAR) + FSTARPU_CPU = & + fstarpu_get_constant(C_CHAR_"FSTARPU_CPU"//C_NULL_CHAR) + FSTARPU_CUDA = & + fstarpu_get_constant(C_CHAR_"FSTARPU_CUDA"//C_NULL_CHAR) + FSTARPU_OPENCL = & + fstarpu_get_constant(C_CHAR_"FSTARPU_OPENCL"//C_NULL_CHAR) + + FSTARPU_CODELET_SIMGRID_EXECUTE = & + fstarpu_get_constant(C_CHAR_"FSTARPU_CODELET_SIMGRID_EXECUTE"//C_NULL_CHAR) + FSTARPU_CODELET_SIMGRID_EXECUTE_AND_INJECT = & + fstarpu_get_constant(C_CHAR_"FSTARPU_CODELET_SIMGRID_EXECUTE_AND_INJECT"//C_NULL_CHAR) + FSTARPU_CUDA_ASYNC = & + fstarpu_get_constant(C_CHAR_"FSTARPU_CUDA_ASYNC"//C_NULL_CHAR) + FSTARPU_OPENCL_ASYNC = & + fstarpu_get_constant(C_CHAR_"FSTARPU_OPENCL_ASYNC"//C_NULL_CHAR) + + !FSTARPU_PER_WORKER = & + ! fstarpu_get_constant(C_CHAR_"FSTARPU_PER_WORKER"//C_NULL_CHAR) + !FSTARPU_PER_ARCH = & + ! fstarpu_get_constant(C_CHAR_"FSTARPU_PER_ARCH"//C_NULL_CHAR) + !FSTARPU_PER_COMMON = & + ! fstarpu_get_constant(C_CHAR_"FSTARPU_PER_COMMON"//C_NULL_CHAR) + FSTARPU_HISTORY_BASED = & + fstarpu_get_constant(C_CHAR_"FSTARPU_HISTORY_BASED"//C_NULL_CHAR) + FSTARPU_REGRESSION_BASED = & + fstarpu_get_constant(C_CHAR_"FSTARPU_REGRESSION_BASED"//C_NULL_CHAR) + FSTARPU_NL_REGRESSION_BASED = & + fstarpu_get_constant(C_CHAR_"FSTARPU_NL_REGRESSION_BASED"//C_NULL_CHAR) + FSTARPU_MULTIPLE_REGRESSION_BASED = & + fstarpu_get_constant(C_CHAR_"FSTARPU_MULTIPLE_REGRESSION_BASED"//C_NULL_CHAR) + + FSTARPU_SEQ = & + fstarpu_get_constant(C_CHAR_"FSTARPU_SEQ"//C_NULL_CHAR) + FSTARPU_SPMD = & + fstarpu_get_constant(C_CHAR_"FSTARPU_SPMD"//C_NULL_CHAR) + FSTARPU_FORKJOIN = & + fstarpu_get_constant(C_CHAR_"FSTARPU_FORKJOIN"//C_NULL_CHAR) + + ! Initialize size constants as 'c_ptr' + FSTARPU_SZ_C_DOUBLE = sz_to_p(c_sizeof(FSTARPU_SZ_C_DOUBLE_dummy)) + FSTARPU_SZ_C_FLOAT = sz_to_p(c_sizeof(FSTARPU_SZ_C_FLOAT_dummy)) + FSTARPU_SZ_C_CHAR = sz_to_p(c_sizeof(FSTARPU_SZ_C_CHAR_dummy)) + FSTARPU_SZ_C_INT = sz_to_p(c_sizeof(FSTARPU_SZ_C_INT_dummy)) + FSTARPU_SZ_C_INTPTR_T = sz_to_p(c_sizeof(FSTARPU_SZ_C_INTPTR_T_dummy)) + FSTARPU_SZ_C_PTR = sz_to_p(c_sizeof(FSTARPU_SZ_C_PTR_dummy)) + FSTARPU_SZ_C_SIZE_T = sz_to_p(c_sizeof(FSTARPU_SZ_C_SIZE_T_dummy)) + + FSTARPU_SZ_CHARACTER = sz_to_p(c_sizeof(FSTARPU_SZ_CHARACTER_dummy)) + + FSTARPU_SZ_INTEGER = sz_to_p(c_sizeof(FSTARPU_SZ_INTEGER_dummy)) + FSTARPU_SZ_INT4 = sz_to_p(c_sizeof(FSTARPU_SZ_INT4_dummy)) + FSTARPU_SZ_INT8 = sz_to_p(c_sizeof(FSTARPU_SZ_INT8_dummy)) + + FSTARPU_SZ_REAL = sz_to_p(c_sizeof(FSTARPU_SZ_REAL_dummy)) + FSTARPU_SZ_REAL4 = sz_to_p(c_sizeof(FSTARPU_SZ_REAL4_dummy)) + FSTARPU_SZ_REAL8 = sz_to_p(c_sizeof(FSTARPU_SZ_REAL8_dummy)) + + FSTARPU_SZ_DOUBLE_PRECISION = sz_to_p(c_sizeof(FSTARPU_SZ_DOUBLE_PRECISION_dummy)) + + FSTARPU_SZ_COMPLEX = sz_to_p(c_sizeof(FSTARPU_SZ_COMPLEX_dummy)) + FSTARPU_SZ_COMPLEX4 = sz_to_p(c_sizeof(FSTARPU_SZ_COMPLEX4_dummy)) + FSTARPU_SZ_COMPLEX8 = sz_to_p(c_sizeof(FSTARPU_SZ_COMPLEX8_dummy)) + FSTARPU_SZ_COMPLEX8 = sz_to_p(c_sizeof(FSTARPU_SZ_COMPLEX8_dummy)) + + FSTARPU_DEFAULT_PRIO = int(p_to_ip(& + fstarpu_get_constant(C_CHAR_"FSTARPU_DEFAULT_PRIO"//C_NULL_CHAR)),c_int) + + ! Initialize StarPU + if (c_associated(conf)) then + fstarpu_init = fstarpu_init_internal(conf) + else + fstarpu_init = fstarpu_init_internal(C_NULL_PTR) + end if + end function fstarpu_init + + function fstarpu_csizet_to_cptr(i) bind(C) + use iso_c_binding + type(c_ptr) :: fstarpu_csizet_to_cptr + integer(c_size_t) :: i + fstarpu_csizet_to_cptr = transfer(int(i,kind=c_intptr_t),C_NULL_PTR) + end function fstarpu_csizet_to_cptr + + function fstarpu_int_to_cptr(i) bind(C) + use iso_c_binding + type(c_ptr) :: fstarpu_int_to_cptr + integer(c_int) :: i + fstarpu_int_to_cptr = transfer(int(i,kind=c_intptr_t),C_NULL_PTR) + end function fstarpu_int_to_cptr + + function fstarpu_long_to_cptr(i) bind(C) + use iso_c_binding + type(c_ptr) :: fstarpu_long_to_cptr + integer(c_long) :: i + fstarpu_long_to_cptr = transfer(int(i,kind=c_intptr_t),C_NULL_PTR) + end function fstarpu_long_to_cptr + + ! Note: do not add binding declarations here in 'CONTAINS' + ! section, because the compiler generates empty functions for + ! them. + ! Instead, put binding declarations in the 'INTERFACE' section + ! above. + +end module fstarpu_mod diff --git a/examples/native_fortran/nf_codelets.f90 b/examples/native_fortran/nf_codelets.f90 new file mode 100644 index 0000000..762c584 --- /dev/null +++ b/examples/native_fortran/nf_codelets.f90 @@ -0,0 +1,115 @@ +! StarPU --- Runtime system for heterogeneous multicore architectures. +! +! Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +! +! StarPU is free software; you can redistribute it and/or modify +! it under the terms of the GNU Lesser General Public License as published by +! the Free Software Foundation; either version 2.1 of the License, or (at +! your option) any later version. +! +! StarPU is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of +! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +! +! See the GNU Lesser General Public License in COPYING.LGPL for more details. +! +module nf_codelets +contains + ! 'cl_vec' codelet routine + ! + ! Note: codelet routines must: + ! . be declared recursive (~ 'reentrant routine') + ! . be declared with the 'bind(C)' attribute for proper C interfacing +recursive subroutine cl_cpu_func_vec (buffers, cl_args) bind(C) + use iso_c_binding ! C interfacing module + use fstarpu_mod ! StarPU interfacing module + implicit none + + type(c_ptr), value, intent(in) :: buffers, cl_args ! cl_args is unused + real(8), dimension(:), pointer :: va + integer, dimension(:), pointer :: vb + integer :: nx_va,nx_vb,i + + write(*,*) "task -->" + ! get 'va' number of elements + nx_va = fstarpu_vector_get_nx(buffers, 0) + write(*,*) "nx_va" + write(*,*) nx_va + + ! get 'vb' number of elements + nx_vb = fstarpu_vector_get_nx(buffers, 1) + write(*,*) "nx_vb" + write(*,*) nx_vb + + ! get 'va' converted Fortran pointer + call c_f_pointer(fstarpu_vector_get_ptr(buffers, 0), va, shape=[nx_va]) + write(*,*) "va" + do i=1,nx_va + write(*,*) i,va(i) + end do + + ! get 'vb' converted Fortran pointer + call c_f_pointer(fstarpu_vector_get_ptr(buffers, 1), vb, shape=[nx_vb]) + write(*,*) "vb" + do i=1,nx_vb + write(*,*) i,vb(i) + end do + write(*,*) "task <--" + +end subroutine cl_cpu_func_vec + + ! 'cl_mat' codelet routine +recursive subroutine cl_cpu_func_mat (buffers, cl_args) bind(C) + use iso_c_binding ! C interfacing module + use fstarpu_mod ! StarPU interfacing module + implicit none + + type(c_ptr), value, intent(in) :: buffers, cl_args ! cl_args is unused + real(8), dimension(:,:), pointer :: ma + integer, dimension(:,:), pointer :: mb + integer :: ld_ma,nx_ma,ny_ma + integer :: ld_mb,nx_mb,ny_mb + integer :: i,j + + write(*,*) "task -->" + ld_ma = fstarpu_matrix_get_ld(buffers, 0) + nx_ma = fstarpu_matrix_get_nx(buffers, 0) + ny_ma = fstarpu_matrix_get_ny(buffers, 0) + write(*,*) "ld_ma" + write(*,*) ld_ma + write(*,*) "nx_ma" + write(*,*) nx_ma + write(*,*) "ny_ma" + write(*,*) ny_ma + + ld_mb = fstarpu_matrix_get_ld(buffers, 1) + nx_mb = fstarpu_matrix_get_nx(buffers, 1) + ny_mb = fstarpu_matrix_get_ny(buffers, 1) + write(*,*) "ld_mb" + write(*,*) ld_mb + write(*,*) "nx_mb" + write(*,*) nx_mb + write(*,*) "ny_mb" + write(*,*) ny_mb + + call c_f_pointer(fstarpu_matrix_get_ptr(buffers, 0), ma, shape=[ld_ma,ny_ma]) + write(*,*) "ma" + do i=1,nx_ma + do j=1,ny_ma + write(*,*) i,j,ma(i,j) + end do + write(*,*) '-' + end do + + call c_f_pointer(fstarpu_matrix_get_ptr(buffers, 1), mb, shape=[ld_mb,ny_mb]) + write(*,*) "mb" + do i=1,nx_mb + do j=1,ny_mb + write(*,*) i,j,mb(i,j) + end do + write(*,*) '-' + end do + write(*,*) "task <--" + +end subroutine cl_cpu_func_mat +end module nf_codelets diff --git a/examples/native_fortran/nf_compute.f90 b/examples/native_fortran/nf_compute.f90 new file mode 100644 index 0000000..02403cf --- /dev/null +++ b/examples/native_fortran/nf_compute.f90 @@ -0,0 +1,132 @@ +! StarPU --- Runtime system for heterogeneous multicore architectures. +! +! Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +! Copyright (C) 2015-2015 ONERA +! +! StarPU is free software; you can redistribute it and/or modify +! it under the terms of the GNU Lesser General Public License as published by +! the Free Software Foundation; either version 2.1 of the License, or (at +! your option) any later version. +! +! StarPU is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of +! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +! +! See the GNU Lesser General Public License in COPYING.LGPL for more details. +! +! Computation kernels for the simulation + +MODULE nf_compute + + USE nf_types + USE fstarpu_mod + USE iso_c_binding + + IMPLICIT NONE + +CONTAINS + + !--------------------------------------------------------------! + SUBROUTINE init_element(ro,dro,basis,Neq_max,Np,Ng,i) + INTEGER(KIND=C_INT),INTENT(IN) :: Neq_max,Np,Ng,i + REAL(KIND=C_DOUBLE),DIMENSION(:,:),POINTER,INTENT(INOUT) :: ro,basis,dro + !Local variables + INTEGER(KIND=C_INT) :: n,nb,neq + + DO nb=1,Np + DO neq= 1,Neq_max + ro(neq,nb) = 0.01*(nb+neq)*i + END DO + END DO + + DO nb=1,Np + DO neq= 1,Neq_max + dro(neq,nb) = 0.05*(nb-neq)*i + END DO + END DO + + DO n=1,Ng + DO nb=1,Np + basis(nb,n) = 0.05*(n+nb)*i + END DO + END DO + + END SUBROUTINE init_element + + !--------------------------------------------------------------! + RECURSIVE SUBROUTINE loop_element_cpu_fortran(buffers, cl_args) BIND(C) + TYPE(C_PTR), VALUE, INTENT(IN) :: buffers, cl_args + + INTEGER(KIND=C_INT) :: Neq_max,Np,Ng + REAL(KIND=C_DOUBLE),DIMENSION(:,:),POINTER :: ro,dro,basis + REAL(KIND=C_DOUBLE),TARGET :: coeff + + Neq_max = fstarpu_matrix_get_nx(buffers, 0) + Np = fstarpu_matrix_get_nx(buffers, 2) + Ng = fstarpu_matrix_get_ny(buffers, 2) + + CALL fstarpu_unpack_arg(cl_args,(/ c_loc(coeff) /)) + + CALL c_f_pointer(fstarpu_matrix_get_ptr(buffers, 0), ro, shape=[Neq_max,Np]) + CALL c_f_pointer(fstarpu_matrix_get_ptr(buffers, 1), dro, shape=[Neq_max,Np]) + CALL c_f_pointer(fstarpu_matrix_get_ptr(buffers, 2), basis, shape=[Np,Ng]) + + CALL loop_element_cpu(ro,dro,basis,coeff,Neq_max,Ng,Np) + END SUBROUTINE loop_element_cpu_fortran + + !--------------------------------------------------------------! + RECURSIVE SUBROUTINE loop_element_cpu(ro,dro,basis,coeff,Neq_max,Ng,Np) + REAL(KIND=C_DOUBLE),INTENT(IN) :: coeff + INTEGER(KIND=C_INT),INTENT(IN) :: Neq_max,Ng,Np + REAL(KIND=C_DOUBLE),DIMENSION(:,:),POINTER,INTENT(IN) :: ro,basis + REAL(KIND=C_DOUBLE),DIMENSION(:,:),POINTER,INTENT(INOUT) :: dro + !Local variables + REAL(KIND=C_DOUBLE) :: coeff2,r + INTEGER(KIND=C_INT) :: n,nb,neq + + DO n=1,Ng + r = 0. + DO nb=1,Np + DO neq= 1,Neq_max + r = r + basis(nb,n) * ro(neq,nb) + ENDDO + ENDDO + + coeff2 = r + coeff + + DO nb=1,Np + DO neq = 1,Neq_max + dro(neq,nb) = coeff2 + dro(neq,nb) + ENDDO + ENDDO + ENDDO + + END SUBROUTINE loop_element_cpu + + !--------------------------------------------------------------! + RECURSIVE SUBROUTINE copy_element_cpu_fortran(buffers, cl_args) BIND(C) + TYPE(C_PTR), VALUE, INTENT(IN) :: buffers, cl_args + + INTEGER(KIND=C_INT) :: Neq_max,Np + REAL(KIND=C_DOUBLE),DIMENSION(:,:),POINTER :: ro,dro + + Neq_max = fstarpu_matrix_get_nx(buffers, 0) + Np = fstarpu_matrix_get_ny(buffers, 0) + + CALL c_f_pointer(fstarpu_matrix_get_ptr(buffers, 0), ro, shape=[Neq_max,Np]) + CALL c_f_pointer(fstarpu_matrix_get_ptr(buffers, 1), dro, shape=[Neq_max,Np]) + + CALL copy_element_cpu(ro,dro) + + END SUBROUTINE copy_element_cpu_fortran + + !--------------------------------------------------------------! + RECURSIVE SUBROUTINE copy_element_cpu(ro,dro) + REAL(KIND=C_DOUBLE),DIMENSION(:,:),POINTER,INTENT(INOUT) :: ro + REAL(KIND=C_DOUBLE),DIMENSION(:,:),POINTER,INTENT(IN) :: dro + + ro = ro + dro + + END SUBROUTINE copy_element_cpu + +END MODULE nf_compute diff --git a/examples/native_fortran/nf_dynbuf.f90 b/examples/native_fortran/nf_dynbuf.f90 new file mode 100644 index 0000000..df21478 --- /dev/null +++ b/examples/native_fortran/nf_dynbuf.f90 @@ -0,0 +1,77 @@ +! StarPU --- Runtime system for heterogeneous multicore architectures. +! +! Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +! +! StarPU is free software; you can redistribute it and/or modify +! it under the terms of the GNU Lesser General Public License as published by +! the Free Software Foundation; either version 2.1 of the License, or (at +! your option) any later version. +! +! StarPU is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of +! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +! +! See the GNU Lesser General Public License in COPYING.LGPL for more details. +! +program nf_dynbuf + use iso_c_binding ! C interfacing module + use fstarpu_mod ! StarPU interfacing module + use nf_dynbuf_cl + implicit none + + type(c_ptr) :: cl_dynbuf_big ! a pointer for the codelet structure + type(c_ptr) :: dh_var + type(c_ptr) :: descrs_var + integer(c_int),target :: nbuffers + integer(c_int) :: err ! return status for fstarpu_init + integer(c_int) :: ncpu ! number of cpus workers + + integer(c_int),target :: var + integer(c_int) :: i + + var = 42 + + ! initialize StarPU with default settings + err = fstarpu_init(C_NULL_PTR) + if (err == -19) then + stop 77 + end if + + ! stop there if no CPU worker available + ncpu = fstarpu_cpu_worker_get_count() + if (ncpu == 0) then + call fstarpu_shutdown() + stop 77 + end if + + ! allocate an empty codelet structure + cl_dynbuf_big = fstarpu_codelet_allocate() + call fstarpu_codelet_set_name(cl_dynbuf_big, C_CHAR_"dummy_big_kernel"//C_NULL_CHAR) + call fstarpu_codelet_add_cpu_func(cl_dynbuf_big, C_FUNLOC(cl_cpu_func_dynbuf_big)) + + write(*,*) "FSTARPU_NMAXBUFS",FSTARPU_NMAXBUFS + nbuffers = FSTARPU_NMAXBUFS+1 + call fstarpu_codelet_set_nbuffers(cl_dynbuf_big, nbuffers) + + call fstarpu_variable_data_register(dh_var, 0, c_loc(var), c_sizeof(var)) + + descrs_var = fstarpu_data_descr_array_alloc(nbuffers) + do i=0,nbuffers-1 + call fstarpu_data_descr_array_set(descrs_var, i, dh_var, FSTARPU_RW) + end do + call fstarpu_task_insert((/ cl_dynbuf_big, & + FSTARPU_VALUE, c_loc(nbuffers), FSTARPU_SZ_C_INT, & + FSTARPU_DATA_MODE_ARRAY, descrs_var, c_loc(nbuffers), & + C_NULL_PTR /)) + call fstarpu_task_wait_for_all() + + call fstarpu_data_descr_array_free(descrs_var) + call fstarpu_data_unregister(dh_var) + + ! free codelet structure + call fstarpu_codelet_free(cl_dynbuf_big) + + ! shut StarPU down + call fstarpu_shutdown() + +end program nf_dynbuf diff --git a/examples/native_fortran/nf_dynbuf_cl.f90 b/examples/native_fortran/nf_dynbuf_cl.f90 new file mode 100644 index 0000000..395d1ec --- /dev/null +++ b/examples/native_fortran/nf_dynbuf_cl.f90 @@ -0,0 +1,38 @@ +! StarPU --- Runtime system for heterogeneous multicore architectures. +! +! Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +! +! StarPU is free software; you can redistribute it and/or modify +! it under the terms of the GNU Lesser General Public License as published by +! the Free Software Foundation; either version 2.1 of the License, or (at +! your option) any later version. +! +! StarPU is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of +! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +! +! See the GNU Lesser General Public License in COPYING.LGPL for more details. +! +module nf_dynbuf_cl +contains +recursive subroutine cl_cpu_func_dynbuf_big (buffers, cl_args) bind(C) + use iso_c_binding ! C interfacing module + use fstarpu_mod ! StarPU interfacing module + implicit none + + type(c_ptr), value, intent(in) :: buffers, cl_args ! cl_args is unused + integer(c_int),target :: nb_data + integer(c_int),pointer :: val + integer(c_int) :: i + + call fstarpu_unpack_arg(cl_args,(/ c_loc(nb_data) /)) + write(*,*) "number of data:", nb_data + do i=0,nb_data-1 + call c_f_pointer(fstarpu_variable_get_ptr(buffers, i), val) + write(*,*) "i:", i, ", val:", val + if (val /= 42) then + stop 1 + end if + end do +end subroutine cl_cpu_func_dynbuf_big +end module nf_dynbuf_cl diff --git a/examples/native_fortran/nf_example.f90 b/examples/native_fortran/nf_example.f90 new file mode 100644 index 0000000..9ca9bb1 --- /dev/null +++ b/examples/native_fortran/nf_example.f90 @@ -0,0 +1,188 @@ +! StarPU --- Runtime system for heterogeneous multicore architectures. +! +! Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +! Copyright (C) 2015-2015 ONERA +! +! StarPU is free software; you can redistribute it and/or modify +! it under the terms of the GNU Lesser General Public License as published by +! the Free Software Foundation; either version 2.1 of the License, or (at +! your option) any later version. +! +! StarPU is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of +! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +! +! See the GNU Lesser General Public License in COPYING.LGPL for more details. +! +! This is an example of Fortran90 program making use of StarPU. +! It registers a few matrices for each element of a domain, performs +! update computations on them, and checks the result. + +PROGRAM f90_example + + USE nf_types + USE fstarpu_mod + USE nf_compute + USE iso_c_binding + + IMPLICIT NONE + + TYPE(type_mesh) :: mesh + TYPE(type_numpar),TARGET :: numpar + TYPE(type_mesh_elt),POINTER :: elt => NULL() + INTEGER(KIND=C_INT) :: i,Nelt,res,cpus + INTEGER(KIND=C_INT) :: starpu_maj,starpu_min,starpu_rev + INTEGER(KIND=C_INT) :: it,it_tot + INTEGER(KIND=C_INT), PARAMETER :: loop_color = INT(Z'7FFF00', KIND=C_INT) + INTEGER(KIND=C_INT), PARAMETER :: copy_color = INT(Z'3F7FFF', KIND=C_INT) + REAL(KIND=C_DOUBLE),TARGET :: flops + INTEGER(C_INT), TARGET :: max_prio + + TYPE(C_PTR) :: cl_loop_element = C_NULL_PTR ! loop codelet + TYPE(C_PTR) :: cl_copy_element = C_NULL_PTR ! copy codelet + + !Initialization with arbitrary data + Nelt = 2 + it_tot = 2 + numpar%Neq_max = 5 + numpar%coeff = 1.0 + ALLOCATE(mesh%elt(Nelt)) + DO i = 1,Nelt + elt => mesh%elt(i) + elt%Ng = 4 + elt%Np = 2 + ALLOCATE(elt%ro(numpar%Neq_max,elt%Np)) + ALLOCATE(elt%dro(numpar%Neq_max,elt%Np)) + ALLOCATE(elt%basis(elt%Np,elt%Ng)) + CALL init_element(elt%ro,elt%dro,elt%basis,numpar%Neq_max,elt%Np,elt%Ng,i) + ENDDO + + !Initialization of StarPU + res = fstarpu_init(C_NULL_PTR) + IF (res == -19) THEN + STOP 77 + END IF + CALL fstarpu_get_version(starpu_maj,starpu_min,starpu_rev) + WRITE(6,'(a,i4,a,i4,a,i4)') "StarPU version: ", starpu_maj , "." , starpu_min , "." , starpu_rev + cpus = fstarpu_cpu_worker_get_count() + IF (cpus == 0) THEN + CALL fstarpu_shutdown() + STOP 77 + END IF + max_prio = fstarpu_sched_get_max_priority() + + cl_loop_element = fstarpu_codelet_allocate() + CALL fstarpu_codelet_add_cpu_func(cl_loop_element, C_FUNLOC(loop_element_cpu_fortran)) + CALL fstarpu_codelet_add_buffer(cl_loop_element, FSTARPU_R) + CALL fstarpu_codelet_add_buffer(cl_loop_element, FSTARPU_RW) + CALL fstarpu_codelet_add_buffer(cl_loop_element, FSTARPU_R) + CALL fstarpu_codelet_set_name(cl_loop_element, C_CHAR_"LOOP_ELEMENT"//C_NULL_CHAR) + CALL fstarpu_codelet_set_color(cl_loop_element, loop_color) + + cl_copy_element = fstarpu_codelet_allocate() + CALL fstarpu_codelet_add_cpu_func(cl_copy_element, C_FUNLOC(copy_element_cpu_fortran)) + CALL fstarpu_codelet_add_buffer(cl_copy_element, FSTARPU_RW) + CALL fstarpu_codelet_add_buffer(cl_copy_element, FSTARPU_R) + CALL fstarpu_codelet_set_name(cl_copy_element, C_CHAR_"COPY_ELEMENT"//C_NULL_CHAR) + CALL fstarpu_codelet_set_color(cl_copy_element, copy_color) + + !Registration of elements + DO i = 1,Nelt + elt => mesh%elt(i) + call fstarpu_matrix_data_register(elt%ro_h, 0, c_loc(elt%ro), numpar%Neq_max, numpar%Neq_max, elt%Np, c_sizeof(elt%ro(1,1))) + call fstarpu_matrix_data_register(elt%dro_h, 0, c_loc(elt%dro), numpar%Neq_max, numpar%Neq_max, elt%Np, c_sizeof(elt%dro(1,1))) + call fstarpu_matrix_data_register(elt%basis_h, 0, c_loc(elt%basis), elt%Np, elt%Np, elt%Ng, c_sizeof(elt%basis(1,1))) + ENDDO + !Compute + DO it = 1,it_tot + + ! compute new dro for each element + DO i = 1,Nelt + elt => mesh%elt(i) + flops = elt%Ng * ( (elt%Np * numpar%Neq_max * 2) + 1 + elt%Np * numpar%Neq_max) + CALL fstarpu_task_insert((/ cl_loop_element, & + FSTARPU_VALUE, c_loc(numpar%coeff), FSTARPU_SZ_C_DOUBLE, & + FSTARPU_R, elt%ro_h, & + FSTARPU_RW, elt%dro_h, & + FSTARPU_R, elt%basis_h, & + FSTARPU_FLOPS, c_loc(flops), & + FSTARPU_PRIORITY, c_loc(FSTARPU_DEFAULT_PRIO), & + C_NULL_PTR /)) + ENDDO + ! sync (if needed by the algorithm) + CALL fstarpu_task_wait_for_all() + + ! - - - - - + + ! copy dro to ro for each element + DO i = 1,Nelt + elt => mesh%elt(i) + CALL fstarpu_task_insert((/ cl_copy_element, & + FSTARPU_RW, elt%ro_h, & + FSTARPU_R, elt%dro_h, & + FSTARPU_PRIORITY, c_loc(max_prio), & + C_NULL_PTR /)) + ENDDO + ! sync (if needed by the algorithm) + CALL fstarpu_task_wait_for_all() + + ENDDO + !Unregistration of elements + DO i = 1,Nelt + elt => mesh%elt(i) + CALL fstarpu_data_unregister(elt%ro_h) + CALL fstarpu_data_unregister(elt%dro_h) + CALL fstarpu_data_unregister(elt%basis_h) + ENDDO + + !Terminate StarPU, no task can be submitted after + CALL fstarpu_shutdown() + + !Check data with StarPU + WRITE(6,'(a)') " " + WRITE(6,'(a)') " %%%% RESULTS STARPU %%%% " + WRITE(6,'(a)') " " + DO i = 1,Nelt + WRITE(6,'(a,i4,a)') " elt ", i , " ; elt%ro = " + WRITE(6,'(10(1x,F11.2))') mesh%elt(i)%ro + WRITE(6,'(a)') " ------------------------ " + ENDDO + + !Same compute without StarPU + DO i = 1,Nelt + elt => mesh%elt(i) + CALL init_element(elt%ro,elt%dro,elt%basis,numpar%Neq_max,elt%Np,elt%Ng,i) + ENDDO + + DO it = 1, it_tot + DO i = 1,Nelt + elt => mesh%elt(i) + CALL loop_element_cpu(elt%ro,elt%dro,elt%basis,numpar%coeff,numpar%Neq_max,elt%Ng,elt%Np) + elt%ro = elt%ro + elt%dro + ENDDO + ENDDO + + WRITE(6,'(a)') " " + WRITE(6,'(a)') " %%%% RESULTS VERIFICATION %%%% " + WRITE(6,'(a)') " " + + DO i = 1,Nelt + WRITE(6,'(a,i4,a)') " elt ", i , " ; elt%ro = " + WRITE(6,'(10(1x,F11.2))') mesh%elt(i)%ro + WRITE(6,'(a)') " ------------------------ " + ENDDO + + WRITE(6,'(a)') " " + + !Deallocation + CALL fstarpu_codelet_free(cl_loop_element) + CALL fstarpu_codelet_free(cl_copy_element) + DO i = 1,Nelt + elt => mesh%elt(i) + DEALLOCATE(elt%ro) + DEALLOCATE(elt%dro) + DEALLOCATE(elt%basis) + ENDDO + DEALLOCATE(mesh%elt) + +END PROGRAM f90_example diff --git a/examples/native_fortran/nf_matrix.f90 b/examples/native_fortran/nf_matrix.f90 new file mode 100644 index 0000000..4b17646 --- /dev/null +++ b/examples/native_fortran/nf_matrix.f90 @@ -0,0 +1,120 @@ +! StarPU --- Runtime system for heterogeneous multicore architectures. +! +! Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +! +! StarPU is free software; you can redistribute it and/or modify +! it under the terms of the GNU Lesser General Public License as published by +! the Free Software Foundation; either version 2.1 of the License, or (at +! your option) any later version. +! +! StarPU is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of +! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +! +! See the GNU Lesser General Public License in COPYING.LGPL for more details. +! +program nf_matrix + use iso_c_binding ! C interfacing module + use fstarpu_mod ! StarPU interfacing module + use nf_codelets + implicit none + + real(8), dimension(:,:), allocatable, target :: ma + integer, dimension(:,:), allocatable, target :: mb + integer :: i,j + + type(c_ptr) :: cl_mat ! a pointer for the codelet structure + type(c_ptr) :: dh_ma ! a pointer for the 'ma' vector data handle + type(c_ptr) :: dh_mb ! a pointer for the 'mb' vector data handle + integer(c_int) :: err ! return status for fstarpu_init + integer(c_int) :: ncpu ! number of cpus workers + real(c_double) :: start_time ! start clock in usec + real(c_double) :: end_time ! end clock in usec + + allocate(ma(5,6)) + do i=1,5 + do j=1,6 + ma(i,j) = (i*10)+j + end do + end do + + allocate(mb(7,8)) + do i=1,7 + do j=1,8 + mb(i,j) = (i*10)+j + end do + end do + + ! initialize StarPU with default settings + err = fstarpu_init(C_NULL_PTR) + if (err == -19) then + stop 77 + end if + + ! stop there if no CPU worker available + ncpu = fstarpu_cpu_worker_get_count() + if (ncpu == 0) then + call fstarpu_shutdown() + stop 77 + end if + + ! collect the start clock time + start_time = fstarpu_timing_now() + + ! allocate an empty codelet structure + cl_mat = fstarpu_codelet_allocate() + + ! set the codelet name + call fstarpu_codelet_set_name(cl_mat, C_CHAR_"my_mat_codelet"//C_NULL_CHAR) + + ! add a CPU implementation function to the codelet + call fstarpu_codelet_add_cpu_func(cl_mat, C_FUNLOC(cl_cpu_func_mat)) + + ! add a Read-only mode data buffer to the codelet + call fstarpu_codelet_add_buffer(cl_mat, FSTARPU_R) + + ! add a Read-Write mode data buffer to the codelet + call fstarpu_codelet_add_buffer(cl_mat, FSTARPU_RW) + + ! register 'ma', a vector of real(8) elements + !dh_ma = fstarpu_matrix_data_register(c_loc(ma), 5, 5, 6, c_sizeof(ma(1,1)), 0) + call fstarpu_matrix_data_register(dh_ma, 0, c_loc(ma), 5, 5, 6, c_sizeof(ma(1,1))) + + ! register 'mb', a vector of integer elements + call fstarpu_matrix_data_register(dh_mb, 0, c_loc(mb), 7, 7, 8, c_sizeof(mb(1,1))) + + ! insert a task with codelet cl_mat, and vectors 'ma' and 'mb' + ! + ! Note: The array argument must follow the layout: + ! (/ + ! , + ! [ [, + ! [ [, + ! [ [ +#include +#include + +#ifdef STARPU_QUICK_CHECK +#define NX 2048 +#else +#define NX 2048000 +#endif + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +void scal_cpu_func(void *buffers[], void *_args) +{ + unsigned i; + float *factor = _args, f = *factor; + struct starpu_vector_interface *vector = buffers[0]; + unsigned n = STARPU_VECTOR_GET_NX(vector); + float *val = (float *)STARPU_VECTOR_GET_PTR(vector); + + FPRINTF(stderr, "running task with %d CPUs.\n", starpu_combined_worker_get_size()); + +#pragma omp parallel for num_threads(starpu_combined_worker_get_size()) + for (i = 0; i < n; i++) + { + float v = val[i]; + int j; + for (j = 0; j < 100; j++) + v = v * f; + val[i] = v; + } +} + +static struct starpu_perfmodel vector_scal_model = +{ + .type = STARPU_HISTORY_BASED, + .symbol = "vector_scal_parallel" +}; + +static struct starpu_codelet cl = +{ + .modes = { STARPU_RW }, + .type = STARPU_FORKJOIN, + .max_parallelism = INT_MAX, + .cpu_funcs = {scal_cpu_func}, + .cpu_funcs_name = {"scal_cpu_func"}, + .nbuffers = 1, + .model = &vector_scal_model, +}; + +int main(void) +{ + struct starpu_conf conf; + float *vector; + unsigned i; + int ret; + + starpu_conf_init(&conf); + + /* Most OpenMP implementations do not support concurrent parallel + * sections, so only enable one combined worker at a time. */ + conf.single_combined_worker = 1; + conf.sched_policy_name = "pheft"; + + ret = starpu_init(&conf); + if (ret == -ENODEV) return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + starpu_malloc((void **)&vector, NX*sizeof(float)); + + for (i = 0; i < NX; i++) + vector[i] = (i+1.0f); + + FPRINTF(stderr, "BEFORE: First element was %f\n", vector[0]); + FPRINTF(stderr, "BEFORE: Last element was %f\n", vector[NX-1]); + + starpu_data_handle_t vector_handle; + starpu_vector_data_register(&vector_handle, STARPU_MAIN_RAM, (uintptr_t)vector, NX, sizeof(vector[0])); + + float factor = 1.001; + + for (i = 0; i < 100; i++) + { + struct starpu_task *task = starpu_task_create(); + + task->cl = &cl; + + task->handles[0] = vector_handle; + task->cl_arg = &factor; + task->cl_arg_size = sizeof(factor); + + ret = starpu_task_submit(task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + starpu_data_unregister(vector_handle); + + FPRINTF(stderr, "AFTER: First element is %f\n", vector[0]); + FPRINTF(stderr, "AFTER: Last element is %f\n", vector[NX-1]); + + starpu_free_noflag(vector, NX*sizeof(float)); + + /* terminate StarPU, no task can be submitted after */ + starpu_shutdown(); + + return 0; + +enodev: + starpu_data_unregister(vector_handle); + starpu_free_noflag(vector, NX*sizeof(float)); + starpu_shutdown(); + return 77; +} diff --git a/examples/parallel_workers/parallel_workers.c b/examples/parallel_workers/parallel_workers.c new file mode 100644 index 0000000..4731a63 --- /dev/null +++ b/examples/parallel_workers/parallel_workers.c @@ -0,0 +1,151 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include + +#if !defined(STARPU_PARALLEL_WORKER) +int main(void) +{ + return 77; +} +#else + +#ifdef STARPU_QUICK_CHECK +#define NTASKS 8 +#else +#define NTASKS 32 +#endif +#define SIZE 4000 + +/* Codelet SUM */ +static void sum_cpu(void * descr[], void *cl_arg) +{ + double * v_dst = (double *) STARPU_VECTOR_GET_PTR(descr[0]); + double * v_src0 = (double *) STARPU_VECTOR_GET_PTR(descr[1]); + double * v_src1 = (double *) STARPU_VECTOR_GET_PTR(descr[1]); + + int size; + starpu_codelet_unpack_args(cl_arg, &size); + fprintf(stderr, "sum_cpu\n"); + int i, k; +#pragma omp parallel + fprintf(stderr, "hello from the task %d\n", omp_get_thread_num()); + for (k=0;k<10;k++) + { +#pragma omp parallel for + for (i=0; i 6) ? 1 : 0, + + /* Note that this mode requires that you put a prologue callback managing + this on all tasks to be taken into account. */ + STARPU_PROLOGUE_CALLBACK_POP, &starpu_parallel_worker_openmp_prologue, + + 0); + + if (ret == -ENODEV) + goto out; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + + +out: + /* wait for all tasks at the end*/ + starpu_task_wait_for_all(); + + starpu_data_unregister(handle1); + starpu_data_unregister(handle2); + starpu_parallel_worker_shutdown(parallel_workers); + + starpu_shutdown(); + return (ret == -ENODEV) ? 77 : 0 ; + +enodev: + starpu_shutdown(); + return 77; +} +#endif diff --git a/examples/parallel_workers/parallel_workers_func.c b/examples/parallel_workers/parallel_workers_func.c new file mode 100644 index 0000000..3a3698e --- /dev/null +++ b/examples/parallel_workers/parallel_workers_func.c @@ -0,0 +1,105 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include + +#if !defined(STARPU_PARALLEL_WORKER) +int main(void) +{ + return 77; +} +#else + +static void display_cpu(void *descr[], void *cl_arg) +{ + (void)descr; + (void)cl_arg; +#pragma omp parallel + { +#ifdef __linux__ + fprintf(stderr, "thread %d on cpu %d\n", omp_get_thread_num(), sched_getcpu()); +#endif + } +} + +static struct starpu_codelet display_cl = +{ + .cpu_funcs = {display_cpu, NULL}, + .nbuffers = 0, +}; + +void bind_func(void *arg) +{ + (void) arg; + int workerid = starpu_worker_get_id_check(); + + if (starpu_worker_get_type(workerid) == STARPU_CPU_WORKER) + { + struct starpu_task *task = starpu_task_get_current(); + int sched_ctx = task->sched_ctx; + int *cpuids = NULL; + int ncpuids = 0; + + starpu_sched_ctx_get_available_cpuids(sched_ctx, &cpuids, &ncpuids); + omp_set_num_threads(ncpuids); +#pragma omp parallel + { + starpu_sched_ctx_bind_current_thread_to_cpuid(cpuids[omp_get_thread_num()]); + } + free(cpuids); + } + return; +} + +int main(void) +{ + int ret, i; + struct starpu_parallel_worker_config *parallel_workers; + + setenv("STARPU_NMPI_MS","0",1); + + ret = starpu_init(NULL); + if (ret == -ENODEV) + return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + parallel_workers = starpu_parallel_worker_init(HWLOC_OBJ_SOCKET, + STARPU_PARALLEL_WORKER_POLICY_NAME, "dmdas", + STARPU_PARALLEL_WORKER_CREATE_FUNC, &bind_func, + STARPU_PARALLEL_WORKER_CREATE_FUNC_ARG, NULL, + 0); + if (parallel_workers == NULL) + goto enodev; + starpu_parallel_worker_print(parallel_workers); + + ret = starpu_task_insert(&display_cl, 0); + if (ret == -ENODEV) + goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + /* wait for all tasks at the end*/ + starpu_task_wait_for_all(); + starpu_parallel_worker_shutdown(parallel_workers); + starpu_shutdown(); + return 0; + +enodev: + starpu_shutdown(); + return 77; +} +#endif diff --git a/examples/parallel_workers/parallel_workers_oldapi.c b/examples/parallel_workers/parallel_workers_oldapi.c new file mode 100644 index 0000000..51fe724 --- /dev/null +++ b/examples/parallel_workers/parallel_workers_oldapi.c @@ -0,0 +1,54 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +#if !defined(STARPU_PARALLEL_WORKER) +int main(void) +{ + return 77; +} +#else + +int main(void) +{ + int ret; + struct starpu_cluster_machine *clusters; + + ret = starpu_init(NULL); + if (ret == -ENODEV) + return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + /* We regroup resources under each sockets into a parallel worker. We express a partition + * of one socket to create two internal parallel workers */ + clusters = starpu_cluster_machine(HWLOC_OBJ_SOCKET, + STARPU_CLUSTER_POLICY_NAME, "dmdas", + STARPU_PARALLEL_WORKER_PARTITION_ONE, + STARPU_PARALLEL_WORKER_NEW, + STARPU_PARALLEL_WORKER_NB, 2, + STARPU_PARALLEL_WORKER_NCORES, 1, + 0); + if (clusters != NULL) + { + starpu_cluster_print(clusters); + starpu_uncluster_machine(clusters); + } + + starpu_shutdown(); + return 0; +} +#endif diff --git a/examples/perf_monitoring/perf_counters_01.c b/examples/perf_monitoring/perf_counters_01.c new file mode 100644 index 0000000..4551f9d --- /dev/null +++ b/examples/perf_monitoring/perf_counters_01.c @@ -0,0 +1,131 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include + +static void print_scope(const enum starpu_perf_counter_scope scope) +{ + int nb = starpu_perf_counter_nb(scope); + int i; + printf("scope %s\n", starpu_perf_counter_scope_id_to_name(scope)); + for (i=0; i +#include +#include + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +/* global counters */ +static int id_g_total_submitted; +static int id_g_peak_submitted; +static int id_g_peak_ready; + +/* per worker counters */ +static int id_w_total_executed; +static int id_w_cumul_execution_time; + +/* per_codelet counters */ +static int id_c_total_submitted; +static int id_c_peak_submitted; +static int id_c_peak_ready; +static int id_c_total_executed; +static int id_c_cumul_execution_time; + +void g_listener_cb(struct starpu_perf_counter_listener *listener, struct starpu_perf_counter_sample *sample, void *context) +{ + (void) listener; + (void) context; + int64_t g_total_submitted = starpu_perf_counter_sample_get_int64_value(sample, id_g_total_submitted); + int64_t g_peak_submitted = starpu_perf_counter_sample_get_int64_value(sample, id_g_peak_submitted); + int64_t g_peak_ready = starpu_perf_counter_sample_get_int64_value(sample, id_g_peak_ready); + printf("global: g_total_submitted = %"PRId64", g_peak_submitted = %"PRId64", g_peak_ready = %"PRId64"\n", g_total_submitted, g_peak_submitted, g_peak_ready); +} + +void w_listener_cb(struct starpu_perf_counter_listener *listener, struct starpu_perf_counter_sample *sample, void *context) +{ + (void) listener; + (void) context; + int workerid = starpu_worker_get_id(); + int64_t w_total_executed = starpu_perf_counter_sample_get_int64_value(sample, id_w_total_executed); + double w_cumul_execution_time = starpu_perf_counter_sample_get_double_value(sample, id_w_cumul_execution_time); + + printf("worker[%d]: w_total_executed = %"PRId64", w_cumul_execution_time = %lf\n", workerid, w_total_executed, w_cumul_execution_time); +} + +void c_listener_cb(struct starpu_perf_counter_listener *listener, struct starpu_perf_counter_sample *sample, void *context) +{ + (void) listener; + struct starpu_codelet *cl = context; + int64_t c_total_submitted = starpu_perf_counter_sample_get_int64_value(sample, id_c_total_submitted); + int64_t c_peak_submitted = starpu_perf_counter_sample_get_int64_value(sample, id_c_peak_submitted); + int64_t c_peak_ready = starpu_perf_counter_sample_get_int64_value(sample, id_c_peak_ready); + int64_t c_total_executed = starpu_perf_counter_sample_get_int64_value(sample, id_c_total_executed); + double c_cumul_execution_time = starpu_perf_counter_sample_get_double_value(sample, id_c_cumul_execution_time); + if (cl->name != NULL) + { + printf("codelet[%s]: c_total_submitted = %"PRId64", c_peak_submitted = %"PRId64", c_peak_ready = %"PRId64", c_total_executed = %"PRId64", c_cumul_execution_time = %lf\n", cl->name, c_total_submitted, c_peak_submitted, c_peak_ready, c_total_executed, c_cumul_execution_time); + } + else + { + printf("codelet[%p]: c_total_submitted = %"PRId64", c_peak_submitted = %"PRId64", c_peak_ready = %"PRId64", c_total_executed = %"PRId64", c_cumul_execution_time = %lf\n", cl, c_total_submitted, c_peak_submitted, c_peak_ready, c_total_executed, c_cumul_execution_time); + } +} + +void func(void *buffers[], void *cl_args) +{ + int *int_vector = (int*)STARPU_VECTOR_GET_PTR(buffers[0]); + int NX = (int)STARPU_VECTOR_GET_NX(buffers[0]); + const int niters; + starpu_codelet_unpack_args(cl_args, &niters); + int i; + for (i=0; i +#include +#include + +static void print_scope(const enum starpu_perf_knob_scope scope) +{ + int nb = starpu_perf_knob_nb(scope); + int i; + printf("scope %s\n", starpu_perf_knob_scope_id_to_name(scope)); + for (i=0; i +#include +#include + +int main(int argc, char **argv) +{ + int ret; + + ret = starpu_init(NULL); + if (ret == -ENODEV) + return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + { + const char * const knob_name = "starpu.global.g_calibrate_knob"; + const char * const knob_scope_name = "global"; + const char * const knob_type_name = "int32"; + int32_t val, val_save; + + const int scope_id = starpu_perf_knob_scope_name_to_id(knob_scope_name); + const int id = starpu_perf_knob_name_to_id(scope_id, knob_name); + STARPU_ASSERT(starpu_perf_knob_get_type_id(id) == starpu_perf_knob_type_name_to_id(knob_type_name)); + + printf("%s:\n", knob_name); + + val_save = val = starpu_perf_knob_get_global_int32_value(id); + printf("- %d\n", val); + + starpu_perf_knob_set_global_int32_value(id, 1); + val = starpu_perf_knob_get_global_int32_value(id); + printf("- %d\n", val); + STARPU_ASSERT(val == 1); + + starpu_perf_knob_set_global_int32_value(id, 0); + val = starpu_perf_knob_get_global_int32_value(id); + printf("- %d\n", val); + STARPU_ASSERT(val == 0); + + starpu_perf_knob_set_global_int32_value(id, val_save); + val = starpu_perf_knob_get_global_int32_value(id); + printf("- %d\n", val); + STARPU_ASSERT(val == val_save); + } + + { + const char * const knob_name = "starpu.global.g_enable_catch_signal_knob"; + const char * const knob_scope_name = "global"; + const char * const knob_type_name = "int32"; + int32_t val, val_save; + + const int scope_id = starpu_perf_knob_scope_name_to_id(knob_scope_name); + const int id = starpu_perf_knob_name_to_id(scope_id, knob_name); + STARPU_ASSERT(starpu_perf_knob_get_type_id(id) == starpu_perf_knob_type_name_to_id(knob_type_name)); + + printf("%s:\n", knob_name); + + val_save = val = starpu_perf_knob_get_global_int32_value(id); + printf("- %d\n", val); + + starpu_perf_knob_set_global_int32_value(id, 1); + val = starpu_perf_knob_get_global_int32_value(id); + printf("- %d\n", val); + STARPU_ASSERT(val == 1); + + starpu_perf_knob_set_global_int32_value(id, 0); + val = starpu_perf_knob_get_global_int32_value(id); + printf("- %d\n", val); + STARPU_ASSERT(val == 0); + + starpu_perf_knob_set_global_int32_value(id, val_save); + val = starpu_perf_knob_get_global_int32_value(id); + printf("- %d\n", val); + STARPU_ASSERT(val == val_save); + } + + + { + const char * const knob_name = "starpu.worker.w_bind_to_pu_knob"; + const char * const knob_scope_name = "per_worker"; + const char * const knob_type_name = "int32"; + int32_t val; + + const int scope_id = starpu_perf_knob_scope_name_to_id(knob_scope_name); + const int id = starpu_perf_knob_name_to_id(scope_id, knob_name); + STARPU_ASSERT(starpu_perf_knob_get_type_id(id) == starpu_perf_knob_type_name_to_id(knob_type_name)); + + printf("%s:\n", knob_name); + + unsigned int ncpu = starpu_cpu_worker_get_count(); + unsigned int i; + for (i=0; i= 0); + printf("- %u: %d\n", i, val); + } + } + + { + const char * const knob_name = "starpu.task.s_max_priority_cap_knob"; + const char * const knob_scope_name = "per_scheduler"; + const char * const knob_type_name = "int32"; + int32_t val; + + const int scope_id = starpu_perf_knob_scope_name_to_id(knob_scope_name); + const int id = starpu_perf_knob_name_to_id(scope_id, knob_name); + STARPU_ASSERT(starpu_perf_knob_get_type_id(id) == starpu_perf_knob_type_name_to_id(knob_type_name)); + + printf("%s:\n", knob_name); + val = starpu_perf_knob_get_per_scheduler_int32_value(id, "prio"); + printf("- %d\n", val); + } + + { + const char * const knob_name = "starpu.task.s_min_priority_cap_knob"; + const char * const knob_scope_name = "per_scheduler"; + const char * const knob_type_name = "int32"; + int32_t val; + + const int scope_id = starpu_perf_knob_scope_name_to_id(knob_scope_name); + const int id = starpu_perf_knob_name_to_id(scope_id, knob_name); + STARPU_ASSERT(starpu_perf_knob_get_type_id(id) == starpu_perf_knob_type_name_to_id(knob_type_name)); + + printf("%s:\n", knob_name); + val = starpu_perf_knob_get_per_scheduler_int32_value(id, "prio"); + printf("- %d\n", val); + } + + + starpu_shutdown(); + + return 0; +} diff --git a/examples/perf_steering/perf_knobs_03.c b/examples/perf_steering/perf_knobs_03.c new file mode 100644 index 0000000..da12356 --- /dev/null +++ b/examples/perf_steering/perf_knobs_03.c @@ -0,0 +1,180 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include + +#define NTASKS 100 + +volatile int task_count[2]; + +void cpu_func(void *buffer[], void *cl_arg) +{ + (void)buffer; + (void)cl_arg; + int workerid = starpu_worker_get_id(); + STARPU_ASSERT(workerid == 0 || workerid == 1); + task_count[workerid]++; +} + +int main(int argc, char **argv) +{ + int ret; + + struct starpu_conf conf; + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + conf.ncpus = 2; + { + const char *sched_pol_name = starpu_getenv("STARPU_SCHED"); + if (sched_pol_name != NULL && strcmp(sched_pol_name, "prio") != 0) + { + fprintf(stderr, "example uses 'prio' scheduling policy.\n"); + return 77; + } + } + + conf.sched_policy_name = "prio"; + + ret = starpu_initialize(&conf, &argc, &argv); + if (ret == -ENODEV) + return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + if (starpu_cpu_worker_get_count() != 2 + || starpu_cuda_worker_get_count() != 0 + || starpu_opencl_worker_get_count() != 0 + || starpu_mpi_ms_worker_get_count() != 0) + { + starpu_shutdown(); + fprintf(stderr, "example needs exactly two cpu cores.\n"); + return 77; + } + + { + const char * const max_prio_knob_name = "starpu.task.s_max_priority_cap_knob"; + const char * const min_prio_knob_name = "starpu.task.s_min_priority_cap_knob"; + const char * const knob_scope_name = "per_scheduler"; + const char * const knob_type_name = "int32"; + int32_t max_prio_val; + int32_t min_prio_val; + + const int scope_id = starpu_perf_knob_scope_name_to_id(knob_scope_name); + + const int max_prio_id = starpu_perf_knob_name_to_id(scope_id, max_prio_knob_name); + STARPU_ASSERT(starpu_perf_knob_get_type_id(max_prio_id) == starpu_perf_knob_type_name_to_id(knob_type_name)); + + const int min_prio_id = starpu_perf_knob_name_to_id(scope_id, min_prio_knob_name); + STARPU_ASSERT(starpu_perf_knob_get_type_id(min_prio_id) == starpu_perf_knob_type_name_to_id(knob_type_name)); + + printf("%s:\n", max_prio_knob_name); + max_prio_val = starpu_perf_knob_get_per_scheduler_int32_value(max_prio_id, "prio"); + printf("- %d\n", max_prio_val); + + printf("%s:\n", min_prio_knob_name); + min_prio_val = starpu_perf_knob_get_per_scheduler_int32_value(min_prio_id, "prio"); + printf("- %d\n", min_prio_val); + STARPU_ASSERT(max_prio_val >= min_prio_val); + + if (min_prio_val > 0) + { + starpu_perf_knob_set_per_scheduler_int32_value(min_prio_id, "prio", 0); + starpu_perf_knob_set_per_scheduler_int32_value(max_prio_id, "prio", 0); + } + else + { + starpu_perf_knob_set_per_scheduler_int32_value(max_prio_id, "prio", 0); + starpu_perf_knob_set_per_scheduler_int32_value(min_prio_id, "prio", 0); + } + + printf("%s:\n", max_prio_knob_name); + max_prio_val = starpu_perf_knob_get_per_scheduler_int32_value(max_prio_id, "prio"); + printf("- %d\n", max_prio_val); + + printf("%s:\n", min_prio_knob_name); + min_prio_val = starpu_perf_knob_get_per_scheduler_int32_value(min_prio_id, "prio"); + printf("- %d\n", min_prio_val); + STARPU_ASSERT(max_prio_val == 0); + STARPU_ASSERT(min_prio_val == 0); + + } + + { + const char * const knob_name = "starpu.worker.w_enable_worker_knob"; + const char * const knob_scope_name = "per_worker"; + const char * const knob_type_name = "int32"; + int32_t val; + + const int scope_id = starpu_perf_knob_scope_name_to_id(knob_scope_name); + const int id = starpu_perf_knob_name_to_id(scope_id, knob_name); + STARPU_ASSERT(starpu_perf_knob_get_type_id(id) == starpu_perf_knob_type_name_to_id(knob_type_name)); + + struct starpu_codelet cl = + { + .cpu_funcs = {cpu_func} + }; + + task_count[0] = 0; + task_count[1] = 0; + + val = starpu_perf_knob_get_per_worker_int32_value(id, 0); + STARPU_ASSERT(val == 1); + val = starpu_perf_knob_get_per_worker_int32_value(id, 1); + STARPU_ASSERT(val == 1); + + starpu_perf_knob_set_per_worker_int32_value(id, 1, 0); + val = starpu_perf_knob_get_per_worker_int32_value(id, 1); + STARPU_ASSERT(val == 0); + + int i; + for (i=0; i select the number of tasks\n"); + fprintf(stderr,"-nshot select the number of shot per task\n"); + exit(0); + } + } +} + +static struct starpu_perfmodel model = +{ + .type = STARPU_HISTORY_BASED, + .size_base = size_base, + .symbol = "monte_carlo_pi" +}; + +static struct starpu_codelet pi_cl = +{ + .cpu_funcs = {cpu_kernel}, + .cpu_funcs_name = {"cpu_kernel"}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {cuda_kernel}, +#endif + .nbuffers = 2, + .modes = {STARPU_R, STARPU_W}, + .model = &model +}; + +int main(int argc, char **argv) +{ + unsigned i; + int ret; + + parse_args(argc, argv); + + ret = starpu_init(NULL); + if (ret == -ENODEV) return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + /* Initialize the random number generator */ + unsigned *sobol_qrng_directions = malloc(n_dimensions*n_directions*sizeof(unsigned)); + STARPU_ASSERT(sobol_qrng_directions); + + initSobolDirectionVectors(n_dimensions, sobol_qrng_directions); + + /* Any worker may use that array now */ + starpu_data_handle_t sobol_qrng_direction_handle; + starpu_vector_data_register(&sobol_qrng_direction_handle, STARPU_MAIN_RAM, + (uintptr_t)sobol_qrng_directions, n_dimensions*n_directions, sizeof(unsigned)); + + unsigned *cnt_array = calloc(ntasks, sizeof(unsigned)); + STARPU_ASSERT(cnt_array); + starpu_data_handle_t cnt_array_handle; + starpu_vector_data_register(&cnt_array_handle, STARPU_MAIN_RAM, (uintptr_t)cnt_array, ntasks, sizeof(unsigned)); + + /* Use a write-through policy : when the data is modified on an + * accelerator, we know that it will only be modified once and be + * accessed by the CPU later on */ + starpu_data_set_wt_mask(cnt_array_handle, (1<cl = &pi_cl; + + STARPU_ASSERT(starpu_data_get_sub_data(cnt_array_handle, 1, i)); + + task->handles[0] = sobol_qrng_direction_handle; + task->handles[1] = starpu_data_get_sub_data(cnt_array_handle, 1, i); + + ret = starpu_task_submit(task); + STARPU_ASSERT(!ret); + } + + starpu_task_wait_for_all(); + + /* Get the cnt_array back in main memory */ + starpu_data_unpartition(cnt_array_handle, STARPU_MAIN_RAM); + starpu_data_unregister(cnt_array_handle); + starpu_data_unregister(sobol_qrng_direction_handle); + + /* Count the total number of entries */ + unsigned long total_cnt = 0; + for (i = 0; i < ntasks; i++) + total_cnt += cnt_array[i]; + + end = starpu_timing_now(); + + double timing = end - start; + + unsigned long total_shot_cnt = ntasks * nshot_per_task; + + /* Total surface : Pi * r^ 2 = Pi*1^2, total square surface : 2^2 = 4, probability to impact the disk: pi/4 */ + FPRINTF(stderr, "Pi approximation : %f (%lu / %lu)\n", ((TYPE)total_cnt*4)/(total_shot_cnt), total_cnt, total_shot_cnt); + FPRINTF(stderr, "Total time : %f ms\n", timing/1000.0); + FPRINTF(stderr, "Speed : %f GShot/s\n", total_shot_cnt/(1e3*timing)); + + if (!getenv("STARPU_SSILENT")) starpu_codelet_display_stats(&pi_cl); + + starpu_shutdown(); + + return 0; +} diff --git a/examples/pi/pi.h b/examples/pi/pi.h new file mode 100644 index 0000000..995adf7 --- /dev/null +++ b/examples/pi/pi.h @@ -0,0 +1,29 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __PI_H__ +#define __PI_H__ + +#include +#include + +#define TYPE float + +/* extern "C" void cuda_kernel(void *descr[], void *cl_arg); */ + +static int n_dimensions = 100; + +#endif /* __PI_H__ */ diff --git a/examples/pi/pi_kernel.cu b/examples/pi/pi_kernel.cu new file mode 100644 index 0000000..4b107b9 --- /dev/null +++ b/examples/pi/pi_kernel.cu @@ -0,0 +1,156 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* First draw a series of coordinates, then count how many fall inside the + * circle quarter */ + +#include "SobolQRNG/sobol_gpu.h" +#include "pi.h" + +#define MAXNBLOCKS 128 +#define MAXTHREADSPERBLOCK 256 + +static __global__ void monte_carlo(TYPE *random_numbers_x, TYPE *random_numbers_y, + unsigned n, unsigned *output_cnt) +{ + __shared__ unsigned scnt[MAXTHREADSPERBLOCK]; + + /* Do we have a successful shot ? */ + const int tid = threadIdx.x + blockIdx.x*blockDim.x; + + const int nthreads = gridDim.x * blockDim.x; + + /* Blank the shared mem buffer */ + if (threadIdx.x < MAXTHREADSPERBLOCK) + scnt[threadIdx.x] = 0; + + __syncthreads(); + int ind; + for (ind = tid; ind < n; ind += nthreads) + { + TYPE x = random_numbers_x[ind]; + TYPE y = random_numbers_y[ind]; + TYPE dist = (x*x + y*y); + + unsigned success = (dist <= 1.0f)?1:0; + + scnt[threadIdx.x] += success; + + } + + __syncthreads(); + + /* Perform a reduction to compute the sum on each thread within that block */ + + /* NB: We assume that the number of threads per block is a power of 2 ! */ + unsigned s; + for (s = blockDim.x/2; s!=0; s>>=1) + { + if (threadIdx.x < s) + scnt[threadIdx.x] += scnt[threadIdx.x + s]; + + __syncthreads(); + } + + /* report the number of successful shots in the block */ + if (threadIdx.x == 0) + output_cnt[blockIdx.x] = scnt[0]; + + __syncthreads(); +} + +static __global__ void sum_per_block_cnt(unsigned *output_cnt, unsigned *cnt) +{ + __shared__ unsigned accumulator[MAXNBLOCKS]; + + unsigned i; + + /* Load the values from global mem */ + for (i = 0; i < blockDim.x; i++) + accumulator[i] = output_cnt[i]; + + __syncthreads(); + + /* Perform a reduction in shared memory */ + unsigned s; + for (s = blockDim.x/2; s!=0; s>>=1) + { + if (threadIdx.x < s) + accumulator[threadIdx.x] += accumulator[threadIdx.x + s]; + + __syncthreads(); + } + + /* Save the result in global memory */ + if (threadIdx.x == 0) + *cnt = accumulator[0]; +} + +extern "C" void cuda_kernel(void *descr[], void *cl_arg) +{ + cudaError_t cures; + + unsigned *directions = (unsigned *)STARPU_VECTOR_GET_PTR(descr[0]); + unsigned long long *nshot_per_task = (unsigned long long *) cl_arg; + unsigned nx = *nshot_per_task; + + /* Generate Random numbers */ + float *random_numbers; + cudaMalloc((void **)&random_numbers, 2*nx*sizeof(float)); + STARPU_ASSERT(random_numbers); + + sobolGPU(2*nx/n_dimensions, n_dimensions, directions, random_numbers); + cudaStreamSynchronize(starpu_cuda_get_local_stream()); + + TYPE *random_numbers_x = &random_numbers[0]; + TYPE *random_numbers_y = &random_numbers[nx]; + + unsigned *cnt = (unsigned *)STARPU_VECTOR_GET_PTR(descr[1]); + + /* How many blocks do we use ? */ + unsigned nblocks = 128; // TODO + + STARPU_ASSERT(nblocks <= MAXNBLOCKS); + + unsigned *per_block_cnt; + cudaMalloc((void **)&per_block_cnt, nblocks*sizeof(unsigned)); + + STARPU_ASSERT((nx % nblocks) == 0); + + /* How many threads per block ? At most 256, but no more threads than + * there are entries to process per block. */ + unsigned nthread_per_block = STARPU_MIN(MAXTHREADSPERBLOCK, (nx / nblocks)); + + /* each entry of per_block_cnt contains the number of successful shots + * in the corresponding block. */ + monte_carlo<<>>(random_numbers_x, random_numbers_y, nx, per_block_cnt); + cures = cudaGetLastError(); + if (cures != cudaSuccess) STARPU_CUDA_REPORT_ERROR(cures); + + /* Note that we do not synchronize between kernel calls because there is an implicit serialization */ + + /* compute the total number of successful shots by adding the elements + * of the per_block_cnt array */ + sum_per_block_cnt<<<1, nblocks, 0, starpu_cuda_get_local_stream()>>>(per_block_cnt, cnt); + cures = cudaGetLastError(); + if (cures != cudaSuccess) STARPU_CUDA_REPORT_ERROR(cures); + cures = cudaStreamSynchronize(starpu_cuda_get_local_stream()); + if (cures) + STARPU_CUDA_REPORT_ERROR(cures); + + cudaFree(per_block_cnt); + cudaFree(random_numbers); +} diff --git a/examples/pi/pi_redux.c b/examples/pi/pi_redux.c new file mode 100644 index 0000000..4a0c4d0 --- /dev/null +++ b/examples/pi/pi_redux.c @@ -0,0 +1,420 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * This computes Pi by using drawing random coordinates (thanks to the sobol + * generator) and check whether they fall within one quarter of a circle. The + * proportion gives an approximation of Pi. For each task, we draw a number of + * coordinates, and we gather the number of successful draws. + * + * This version uses reduction to optimize gathering the number of successful + * draws. + */ + +#include +#include +#include + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) +#define PI 3.14159265358979323846 + +#if defined(STARPU_USE_CUDA) && !defined(STARPU_HAVE_CURAND) +#warning CURAND is required to run that example on CUDA devices +#endif + +#ifdef STARPU_HAVE_CURAND +#include +#include +#endif + +static unsigned long long nshot_per_task = 16*1024*1024ULL; + +/* default value */ +static unsigned long ntasks = 1024; +static unsigned long ntasks_warmup = 0; + +static unsigned use_redux = 1; +static unsigned do_warmup = 0; + +/* + * Initialization of the Random Number Generators (RNG) + */ + +#ifdef STARPU_HAVE_CURAND +/* RNG for the CURAND library */ +static curandGenerator_t curandgens[STARPU_NMAXWORKERS]; +#endif + +/* state for the erand48 function : note the huge padding to avoid false-sharing */ +#define PADDING 1024 +static unsigned short xsubi[STARPU_NMAXWORKERS*PADDING]; +static starpu_drand48_data randbuffer[STARPU_NMAXWORKERS*PADDING]; + +/* Function to initialize the random number generator in the current worker */ +static void init_rng(void *arg) +{ + (void)arg; +#ifdef STARPU_HAVE_CURAND + curandStatus_t res; +#endif + + int workerid = starpu_worker_get_id_check(); + + switch (starpu_worker_get_type(workerid)) + { + case STARPU_CPU_WORKER: + /* create a seed */ + starpu_srand48_r((long int)workerid, &randbuffer[PADDING*workerid]); + + xsubi[0 + PADDING*workerid] = (unsigned short)workerid; + xsubi[1 + PADDING*workerid] = (unsigned short)workerid; + xsubi[2 + PADDING*workerid] = (unsigned short)workerid; + break; +#ifdef STARPU_HAVE_CURAND + case STARPU_CUDA_WORKER: + + /* Create a RNG */ + res = curandCreateGenerator(&curandgens[workerid], + CURAND_RNG_PSEUDO_DEFAULT); + STARPU_ASSERT(res == CURAND_STATUS_SUCCESS); + + /* Seed it with worker's id */ + res = curandSetPseudoRandomGeneratorSeed(curandgens[workerid], + (unsigned long long)workerid); + STARPU_ASSERT(res == CURAND_STATUS_SUCCESS); + break; +#endif + default: + STARPU_ABORT(); + break; + } +} + +/* The amount of work does not depend on the data size at all :) */ +static size_t size_base(struct starpu_task *task, unsigned nimpl) +{ + (void)task; + (void)nimpl; + return nshot_per_task; +} + +static void parse_args(int argc, char **argv) +{ + int i; + for (i = 1; i < argc; i++) + { + if (strcmp(argv[i], "-ntasks") == 0) + { + char *argptr; + ntasks = strtol(argv[++i], &argptr, 10); + } + + if (strcmp(argv[i], "-nshot") == 0) + { + char *argptr; + nshot_per_task = strtol(argv[++i], &argptr, 10); + } + + if (strcmp(argv[i], "-noredux") == 0) + { + use_redux = 0; + } + + if (strcmp(argv[i], "-warmup") == 0) + { + do_warmup = 1; + ntasks_warmup = 8; /* arbitrary number of warmup tasks */ + } + + if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i], "--help") == 0) + { + fprintf(stderr, "Usage: %s [-ntasks n] [-noredux] [-warmup] [-h]\n", argv[0]); + exit(-1); + } + } +} + +/* + * Monte-carlo kernel + */ + +void pi_func_cpu(void *descr[], void *cl_arg) +{ + (void)cl_arg; + int workerid = starpu_worker_get_id_check(); + + unsigned short *worker_xsub; + worker_xsub = &xsubi[PADDING*workerid]; + + starpu_drand48_data *buffer; + buffer = &randbuffer[PADDING*workerid]; + + unsigned long local_cnt = 0; + + /* Fill the scratchpad with random numbers */ + unsigned i; + for (i = 0; i < nshot_per_task; i++) + { + double randx, randy; + + starpu_erand48_r(worker_xsub, buffer, &randx); + starpu_erand48_r(worker_xsub, buffer, &randy); + + double x = (2.0*randx - 1.0); + double y = (2.0*randy - 1.0); + + double dist = x*x + y*y; + if (dist < 1.0) + local_cnt++; + } + + /* Put the contribution of that task into the counter */ + unsigned long *cnt = (unsigned long *)STARPU_VARIABLE_GET_PTR(descr[1]); + *cnt = *cnt + local_cnt; +} + +extern void pi_redux_cuda_kernel(float *x, float *y, unsigned n, unsigned long *shot_cnt); + +#ifdef STARPU_HAVE_CURAND +static void pi_func_cuda(void *descr[], void *cl_arg) +{ + (void)cl_arg; + curandStatus_t res; + + int workerid = starpu_worker_get_id_check(); + + /* CURAND is a bit silly: it assumes that any error is fatal. Calling + * cudaGetLastError resets the last error value. */ + (void) cudaGetLastError(); + + /* Fill the scratchpad with random numbers. Note that both x and y + * arrays are in stored the same vector. */ + float *scratchpad_xy = (float *)STARPU_VECTOR_GET_PTR(descr[0]); + res = curandGenerateUniform(curandgens[workerid], scratchpad_xy, 2*nshot_per_task); + STARPU_ASSERT(res == CURAND_STATUS_SUCCESS); + + float *x = &scratchpad_xy[0]; + float *y = &scratchpad_xy[nshot_per_task]; + + unsigned long *shot_cnt = (unsigned long *)STARPU_VARIABLE_GET_PTR(descr[1]); + pi_redux_cuda_kernel(x, y, nshot_per_task, shot_cnt); +} +#endif + +static struct starpu_perfmodel pi_model = +{ + .type = STARPU_HISTORY_BASED, + .size_base = size_base, + .symbol = "monte_carlo_pi_scratch" +}; + +static struct starpu_codelet pi_cl = +{ + .cpu_funcs = {pi_func_cpu}, + .cpu_funcs_name = {"pi_func_cpu"}, +#ifdef STARPU_HAVE_CURAND + .cuda_funcs = {pi_func_cuda}, +#endif + .nbuffers = 2, + .modes = {STARPU_SCRATCH, STARPU_RW}, + .model = &pi_model +}; + +static struct starpu_perfmodel pi_model_redux = +{ + .type = STARPU_HISTORY_BASED, + .size_base = size_base, + .symbol = "monte_carlo_pi_scratch_redux" +}; + +static struct starpu_codelet pi_cl_redux = +{ + .cpu_funcs = {pi_func_cpu}, + .cpu_funcs_name = {"pi_func_cpu"}, +#ifdef STARPU_HAVE_CURAND + .cuda_funcs = {pi_func_cuda}, +#endif + .nbuffers = 2, + .modes = {STARPU_SCRATCH, STARPU_REDUX}, + .model = &pi_model_redux +}; + +/* + * Codelets to implement reduction + */ + +void init_cpu_func(void *descr[], void *cl_arg) +{ + (void)cl_arg; + unsigned long *val = (unsigned long *)STARPU_VARIABLE_GET_PTR(descr[0]); + *val = 0; +} + +#ifdef STARPU_HAVE_CURAND +static void init_cuda_func(void *descr[], void *cl_arg) +{ + (void)cl_arg; + unsigned long *val = (unsigned long *)STARPU_VARIABLE_GET_PTR(descr[0]); + cudaMemsetAsync(val, 0, sizeof(unsigned long), starpu_cuda_get_local_stream()); +} +#endif + +static struct starpu_codelet init_codelet = +{ + .cpu_funcs = {init_cpu_func}, + .cpu_funcs_name = {"init_cpu_func"}, +#ifdef STARPU_HAVE_CURAND + .cuda_funcs = {init_cuda_func}, + .cuda_flags = {STARPU_CUDA_ASYNC}, +#endif + .modes = {STARPU_W}, + .nbuffers = 1 +}; + +#ifdef STARPU_HAVE_CURAND +/* Dummy implementation of the addition of two unsigned longs in CUDA */ +static void redux_cuda_func(void *descr[], void *cl_arg) +{ + (void)cl_arg; + unsigned long *d_a = (unsigned long *)STARPU_VARIABLE_GET_PTR(descr[0]); + unsigned long *d_b = (unsigned long *)STARPU_VARIABLE_GET_PTR(descr[1]); + + unsigned long h_a, h_b; + + cudaMemcpyAsync(&h_a, d_a, sizeof(h_a), cudaMemcpyDeviceToHost, starpu_cuda_get_local_stream()); + cudaMemcpyAsync(&h_b, d_b, sizeof(h_b), cudaMemcpyDeviceToHost, starpu_cuda_get_local_stream()); + cudaStreamSynchronize(starpu_cuda_get_local_stream()); + + h_a += h_b; + + cudaMemcpyAsync(d_a, &h_a, sizeof(h_a), cudaMemcpyHostToDevice, starpu_cuda_get_local_stream()); +} +#endif + +void redux_cpu_func(void *descr[], void *cl_arg) +{ + (void)cl_arg; + unsigned long *a = (unsigned long *)STARPU_VARIABLE_GET_PTR(descr[0]); + unsigned long *b = (unsigned long *)STARPU_VARIABLE_GET_PTR(descr[1]); + + *a = *a + *b; +} + +static struct starpu_codelet redux_codelet = +{ + .cpu_funcs = {redux_cpu_func}, + .cpu_funcs_name = {"redux_cpu_func"}, +#ifdef STARPU_HAVE_CURAND + .cuda_funcs = {redux_cuda_func}, + .cuda_flags = {STARPU_CUDA_ASYNC}, +#endif + .modes = {STARPU_RW|STARPU_COMMUTE, STARPU_R}, + .nbuffers = 2 +}; + +/* + * Main program + */ + +int main(int argc, char **argv) +{ + unsigned i; + int ret; + + /* Not supported yet */ + if (starpu_getenv_number_default("STARPU_GLOBAL_ARBITER", 0) > 0) + return 77; + + parse_args(argc, argv); + + ret = starpu_init(NULL); + if (ret == -ENODEV) return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + /* Launch a Random Number Generator (RNG) on each worker */ + starpu_execute_on_each_worker(init_rng, NULL, STARPU_CPU|STARPU_CUDA); + + /* Create a scratchpad data */ + starpu_data_handle_t xy_scratchpad_handle; + starpu_vector_data_register(&xy_scratchpad_handle, -1, (uintptr_t)NULL, + 2*nshot_per_task, sizeof(float)); + + /* Create a variable that will be used to count the number of shots + * that actually hit the unit circle when shooting randomly in + * [-1,1]^2. */ + unsigned long shot_cnt = 0; + starpu_data_handle_t shot_cnt_handle; + starpu_variable_data_register(&shot_cnt_handle, STARPU_MAIN_RAM, + (uintptr_t)&shot_cnt, sizeof(shot_cnt)); + + starpu_data_set_reduction_methods(shot_cnt_handle, + &redux_codelet, &init_codelet); + + double start; + double end; + + for (i = 0; i < ntasks_warmup; i++) + { + struct starpu_task *task = starpu_task_create(); + + task->cl = use_redux?&pi_cl_redux:&pi_cl; + + task->handles[0] = xy_scratchpad_handle; + task->handles[1] = shot_cnt_handle; + + ret = starpu_task_submit(task); + STARPU_ASSERT(!ret); + } + + + start = starpu_timing_now(); + + for (i = 0; i < ntasks; i++) + { + struct starpu_task *task = starpu_task_create(); + + task->cl = use_redux?&pi_cl_redux:&pi_cl; + + task->handles[0] = xy_scratchpad_handle; + task->handles[1] = shot_cnt_handle; + + ret = starpu_task_submit(task); + STARPU_ASSERT(!ret); + } + + starpu_data_unregister(shot_cnt_handle); + starpu_data_unregister(xy_scratchpad_handle); + + end = starpu_timing_now(); + double timing = end - start; + /* Total surface : Pi * r^ 2 = Pi*1^2, total square surface : 2^2 = 4, + * probability to impact the disk: pi/4 */ + unsigned long total = (ntasks + ntasks_warmup)*nshot_per_task; + double pi_approx = ((double)shot_cnt*4.0)/total; + + FPRINTF(stderr, "Reductions? %s\n", use_redux?"yes":"no"); + FPRINTF(stderr, "Pi approximation : %f (%lu / %lu)\n", pi_approx, shot_cnt, total); + FPRINTF(stderr, "Error %e \n", pi_approx - PI); + FPRINTF(stderr, "Total time : %f ms\n", timing/1000.0); + FPRINTF(stderr, "Speed : %f GShot/s\n", total/(1e3*timing)); + + starpu_shutdown(); + + if (fabs(pi_approx - PI) > 1.0) + return 1; + + return 0; +} diff --git a/examples/pi/pi_redux_kernel.cu b/examples/pi/pi_redux_kernel.cu new file mode 100644 index 0000000..3ac61db --- /dev/null +++ b/examples/pi/pi_redux_kernel.cu @@ -0,0 +1,133 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* This counts how many fall inside the circle quarter */ + +#include + +#define MAXNBLOCKS 128 +#define MAXTHREADSPERBLOCK 256 + +static __global__ void monte_carlo(float *x, float *y, unsigned n, unsigned long *output_cnt) +{ + __shared__ unsigned scnt[MAXTHREADSPERBLOCK]; + + /* Do we have a successful shot ? */ + const int tid = threadIdx.x + blockIdx.x*blockDim.x; + + const int nthreads = gridDim.x * blockDim.x; + + /* Blank the shared mem buffer */ + if (threadIdx.x < MAXTHREADSPERBLOCK) + scnt[threadIdx.x] = 0; + + __syncthreads(); + int ind; + for (ind = tid; ind < n; ind += nthreads) + { + float xval = (2.0f * x[ind] - 1.0f); + float yval = (2.0f * y[ind] - 1.0f); + float dist = (xval*xval + yval*yval); + + unsigned long success = (dist <= 1.0f)?1:0; + + scnt[threadIdx.x] += success; + + } + + __syncthreads(); + + /* Perform a reduction to compute the sum on each thread within that block */ + + /* NB: We assume that the number of threads per block is a power of 2 ! */ + unsigned long s; + for (s = blockDim.x/2; s!=0; s>>=1) + { + if (threadIdx.x < s) + scnt[threadIdx.x] += scnt[threadIdx.x + s]; + + __syncthreads(); + } + + /* report the number of successful shots in the block */ + if (threadIdx.x == 0) + output_cnt[blockIdx.x] = scnt[0]; + + __syncthreads(); +} + +static __global__ void sum_per_block_cnt(unsigned long *output_cnt, unsigned long *cnt) +{ + __shared__ unsigned long accumulator[MAXNBLOCKS]; + + unsigned i; + + /* Load the values from global mem */ + for (i = 0; i < blockDim.x; i++) + accumulator[i] = output_cnt[i]; + + __syncthreads(); + + /* Perform a reduction in shared memory */ + unsigned s; + for (s = blockDim.x/2; s!=0; s>>=1) + { + if (threadIdx.x < s) + accumulator[threadIdx.x] += accumulator[threadIdx.x + s]; + + __syncthreads(); + } + + /* Save the result in global memory */ + if (threadIdx.x == 0) + *cnt = *cnt + accumulator[0]; +} + +extern "C" void pi_redux_cuda_kernel(float *x, float *y, unsigned n, unsigned long *shot_cnt) +{ + cudaError_t cures; + + /* How many blocks do we use ? */ + unsigned nblocks = 128; // TODO + STARPU_ASSERT(nblocks <= MAXNBLOCKS); + STARPU_ASSERT((n % nblocks) == 0); + + unsigned long *per_block_cnt; + cudaMalloc((void **)&per_block_cnt, nblocks*sizeof(unsigned long)); + + /* How many threads per block ? At most 256, but no more threads than + * there are entries to process per block. */ + unsigned nthread_per_block = STARPU_MIN(MAXTHREADSPERBLOCK, (n / nblocks)); + + /* each entry of per_block_cnt contains the number of successful shots + * in the corresponding block. */ + monte_carlo<<>>(x, y, n, per_block_cnt); + cures = cudaGetLastError(); + if (cures != cudaSuccess) STARPU_CUDA_REPORT_ERROR(cures); + + /* Note that we do not synchronize between kernel calls because there is an implicit serialization */ + + /* compute the total number of successful shots by adding the elements + * of the per_block_cnt array */ + sum_per_block_cnt<<<1, nblocks, 0, starpu_cuda_get_local_stream()>>>(per_block_cnt, shot_cnt); + cures = cudaGetLastError(); + if (cures != cudaSuccess) STARPU_CUDA_REPORT_ERROR(cures); + cures = cudaStreamSynchronize(starpu_cuda_get_local_stream()); + if (cures) + STARPU_CUDA_REPORT_ERROR(cures); + + cudaFree(per_block_cnt); +} diff --git a/examples/pipeline/pipeline.c b/examples/pipeline/pipeline.c new file mode 100644 index 0000000..438a19f --- /dev/null +++ b/examples/pipeline/pipeline.c @@ -0,0 +1,268 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * This examples shows how to submit a pipeline to StarPU with limited buffer + * use, and avoiding submitted all the tasks at once. + * + * This is a dumb example pipeline, depicted here: + * + * x--\ + * >==axpy-->sum + * y--/ + * + * x and y produce vectors full of x and y values, axpy multiplies them, and sum + * sums it up. We thus have 3 temporary buffers + */ + +#include +#include +#include +#include + +#ifdef STARPU_USE_CUDA +#include +#endif + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +/* Vector size */ +#ifdef STARPU_QUICK_CHECK +#define N 16 +#else +#define N 1048576 +#endif + +/* Number of iteration buffers, and thus overlapped pipeline iterations */ +#define K 16 + +/* Number of concurrently submitted pipeline iterations */ +#define C 64 + +/* Number of iterations */ +#define L 256 + +/* X / Y codelets */ +void pipeline_cpu_x(void *descr[], void *args) +{ + float x; + float *val = (float *) STARPU_VECTOR_GET_PTR(descr[0]); + int n = STARPU_VECTOR_GET_NX(descr[0]); + int i; + + starpu_codelet_unpack_args(args, &x); + for (i = 0; i < n ; i++) + val[i] = x; +} + +static struct starpu_perfmodel pipeline_model_x = +{ + .type = STARPU_HISTORY_BASED, + .symbol = "pipeline_model_x" +}; + +static struct starpu_codelet pipeline_codelet_x = +{ + .cpu_funcs = {pipeline_cpu_x}, + .cpu_funcs_name = {"pipeline_cpu_x"}, + .nbuffers = 1, + .modes = {STARPU_W}, + .model = &pipeline_model_x +}; + +/* axpy codelets */ +void pipeline_cpu_axpy(void *descr[], void *arg) +{ + (void)arg; + float *x = (float *) STARPU_VECTOR_GET_PTR(descr[0]); + float *y = (float *) STARPU_VECTOR_GET_PTR(descr[1]); + int n = STARPU_VECTOR_GET_NX(descr[0]); + + STARPU_SAXPY(n, 1., x, 1, y, 1); +} + +#ifdef STARPU_USE_CUDA +void pipeline_cublas_axpy(void *descr[], void *arg) +{ + (void)arg; + float *x = (float *) STARPU_VECTOR_GET_PTR(descr[0]); + float *y = (float *) STARPU_VECTOR_GET_PTR(descr[1]); + int n = STARPU_VECTOR_GET_NX(descr[0]); + float alpha = 1.; + + cublasStatus_t status = cublasSaxpy(starpu_cublas_get_local_handle(), n, &alpha, x, 1, y, 1); + if (status != CUBLAS_STATUS_SUCCESS) + STARPU_CUBLAS_REPORT_ERROR(status); +} +#endif + +static struct starpu_perfmodel pipeline_model_axpy = +{ + .type = STARPU_HISTORY_BASED, + .symbol = "pipeline_model_axpy" +}; + +static struct starpu_codelet pipeline_codelet_axpy = +{ + .cpu_funcs = {pipeline_cpu_axpy}, + .cpu_funcs_name = {"pipeline_cpu_axpy"}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {pipeline_cublas_axpy}, + .cuda_flags = {STARPU_CUDA_ASYNC}, +#endif + .nbuffers = 2, + .modes = {STARPU_R, STARPU_RW}, + .model = &pipeline_model_axpy +}; + +/* sum codelet */ +void pipeline_cpu_sum(void *descr[], void *arg) +{ + (void)arg; + float *x = (float *) STARPU_VECTOR_GET_PTR(descr[0]); + int n = STARPU_VECTOR_GET_NX(descr[0]); + float y; + + y = STARPU_SASUM(n, x, 1); + + FPRINTF(stderr,"CPU finished with %f\n", y); +} + +#ifdef STARPU_USE_CUDA +void pipeline_cublas_sum(void *descr[], void *arg) +{ + (void)arg; + float *x = (float *) STARPU_VECTOR_GET_PTR(descr[0]); + int n = STARPU_VECTOR_GET_NX(descr[0]); + float y; + + cublasStatus_t status = cublasSasum(starpu_cublas_get_local_handle(), n, x, 1, &y); + if (status != CUBLAS_STATUS_SUCCESS) + STARPU_CUBLAS_REPORT_ERROR(status); + + FPRINTF(stderr,"CUBLAS finished with %f\n", y); +} +#endif + +static struct starpu_perfmodel pipeline_model_sum = +{ + .type = STARPU_HISTORY_BASED, + .symbol = "pipeline_model_sum" +}; + +static struct starpu_codelet pipeline_codelet_sum = +{ + .cpu_funcs = {pipeline_cpu_sum}, + .cpu_funcs_name = {"pipeline_cpu_sum"}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {pipeline_cublas_sum}, + .cuda_flags = {STARPU_CUDA_ASYNC}, +#endif + .nbuffers = 1, + .modes = {STARPU_R}, + .model = &pipeline_model_sum +}; + +static void release_sem(void *arg) +{ + sem_post(arg); +}; + +int main(void) +{ + int ret = 0; + int k, l, c; + starpu_data_handle_t buffersX[K], buffersY[K], buffersP[K]; + sem_t sems[C]; + + ret = starpu_init(NULL); + if (ret == -ENODEV) + exit(77); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + starpu_cublas_init(); + + /* Initialize the K temporary buffers. No need to allocate it ourselves + * Since it's the X and Y kernels which will fill the initial values. */ + for (k = 0; k < K; k++) + { + starpu_vector_data_register(&buffersX[k], -1, 0, N, sizeof(float)); + starpu_vector_data_register(&buffersY[k], -1, 0, N, sizeof(float)); + starpu_vector_data_register(&buffersP[k], -1, 0, N, sizeof(float)); + } + + /* Initialize way to wait for the C previous concurrent stages */ + for (c = 0; c < C; c++) + sem_init(&sems[c], 0, 0); + + /* Submits the l pipeline stages */ + for (l = 0; l < L; l++) + { + float x = l; + float y = 2*l; + /* First wait for the C previous concurrent stages */ + if (l >= C) + { + starpu_do_schedule(); + sem_wait(&sems[l%C]); + } + + /* Now submit the next stage */ + ret = starpu_task_insert(&pipeline_codelet_x, + STARPU_W, buffersX[l%K], + STARPU_VALUE, &x, sizeof(x), + STARPU_TAG_ONLY, (starpu_tag_t) (100*l), + 0); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert x"); + + ret = starpu_task_insert(&pipeline_codelet_x, + STARPU_W, buffersY[l%K], + STARPU_VALUE, &y, sizeof(y), + STARPU_TAG_ONLY, (starpu_tag_t) (100*l+1), + 0); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert y"); + + ret = starpu_task_insert(&pipeline_codelet_axpy, + STARPU_R, buffersX[l%K], + STARPU_RW, buffersY[l%K], + STARPU_TAG_ONLY, (starpu_tag_t) l, + 0); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert axpy"); + + ret = starpu_task_insert(&pipeline_codelet_sum, + STARPU_R, buffersY[l%K], + STARPU_CALLBACK_WITH_ARG_NFREE, release_sem, &sems[l%C], + STARPU_TAG_ONLY, (starpu_tag_t) l, + 0); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert sum"); + } + starpu_task_wait_for_all(); + +enodev: + for (k = 0; k < K; k++) + { + starpu_data_unregister(buffersX[k]); + starpu_data_unregister(buffersY[k]); + starpu_data_unregister(buffersP[k]); + } + starpu_shutdown(); + + return (ret == -ENODEV ? 77 : 0); +} diff --git a/examples/ppm_downscaler/ppm_downscaler.c b/examples/ppm_downscaler/ppm_downscaler.c new file mode 100644 index 0000000..def0b7f --- /dev/null +++ b/examples/ppm_downscaler/ppm_downscaler.c @@ -0,0 +1,186 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* This uses a dummy algorithm to downscale a ppm file. */ +/* TODO: turn this into StarPU. */ + +#include "ppm_downscaler.h" + +#include + +#ifdef STARPU_HAVE_MALLOC_H +#include +#endif +#include +#include +#include + +struct ppm_image *allocate_new_ppm(int ncols, int nlines, int coldepth) +{ + struct ppm_image *ppm = (struct ppm_image *) malloc(sizeof(struct ppm_image)); + assert(ppm); + + ppm->ncols = ncols; + ppm->nlines = nlines; + ppm->coldepth = coldepth; + +#ifdef STARPU_HAVE_MEMALIGN + ppm->data = (struct ppm_color *) memalign(16384, ncols*nlines*sizeof(struct ppm_color)); +#else + ppm->data = (struct ppm_color *) malloc(ncols*nlines*sizeof(struct ppm_color)); +#endif + assert(ppm->data); + + return ppm; +} + +struct ppm_image *file_to_ppm(char *filename) +{ + int ret; + + struct ppm_image *ppm = (struct ppm_image *) malloc(sizeof(struct ppm_image)); + assert(ppm); + + FILE *file = fopen(filename, "r"); + assert(file); + + /* read the file's dimensions */ + ret = fscanf(file, "P6\n%d %d\n%d\n", &ppm->ncols, &ppm->nlines, &ppm->coldepth); + if (ret != 3) + { + fclose(file); + fprintf(stderr, "file %s is not valid\n", filename); + exit(-1); + } + + /* allocate a buffer for the image */ +#ifdef STARPU_HAVE_MEMALIGN + ppm->data = (struct ppm_color *) memalign(16384, ppm->ncols*ppm->nlines*sizeof(struct ppm_color)); +#else + ppm->data = (struct ppm_color *) malloc(ppm->ncols*ppm->nlines*sizeof(struct ppm_color)); +#endif + assert(ppm->data); + + ret = fread(ppm->data, sizeof(struct ppm_color), ppm->ncols*ppm->nlines, file); + STARPU_ASSERT(ret == ppm->ncols*ppm->nlines); + + int i; + for (i = 0; i < ppm->ncols*ppm->nlines; i++) + { +/* fprintf(stderr, "READ (index %d) -> r %d g %d b %d\n", i, ppm->data[i].r, ppm->data[i].g, ppm->data[i].b); */ + } + + fclose(file); + + return ppm; +} + +void ppm_to_file(struct ppm_image *ppm, char *filename) +{ + FILE *file = fopen(filename, "w+"); + assert(file); + + /* read the file's dimensions */ + fprintf(file, "P6\n%d %d\n%d\n", ppm->ncols, ppm->nlines, ppm->coldepth); + + fwrite(&ppm->data[0], sizeof(struct ppm_color), ppm->ncols*ppm->nlines, file); + + fclose(file); +} + + + +char *filename_in = "serpents.ppm"; +char *filename_out = "serpents.small.ppm"; + +void parse_args(int argc, char **argv) +{ + if (argc == 3) + { + filename_in = argv[1]; + filename_out = argv[2]; + } +} + +/* what is the downscaling factor ? */ +#define FACTOR 2 + +void dummy_downscale(struct ppm_image *input_ppm, struct ppm_image *output_ppm) +{ + struct ppm_color *in = input_ppm->data; + struct ppm_color *out = output_ppm->data; + + int line, col; + for (line = 0; line < output_ppm->nlines; line++) + { + for (col = 0; col < output_ppm->ncols; col++) + { + unsigned sum_r = 0, sum_g = 0, sum_b = 0; + + unsigned big_col = col*FACTOR; + unsigned big_line = line*FACTOR; + + /* compute the average value of all components */ + unsigned i, j; + for (i = 0; i < FACTOR; i++) + { + for (j = 0; j < FACTOR; j++) + { + unsigned index = (big_col + i)+(big_line + j)*input_ppm->ncols; + +/* fprintf(stderr, "(col %d, line %d) i %d j %d index %d -> r %d g %d b %d\n", col, line, i, j, index, in[index].r, in[index].g, in[index].b); */ + + sum_r += (unsigned)in[index].r; + sum_g += (unsigned)in[index].g; + sum_b += (unsigned)in[index].b; + } + } + + out[col + line*output_ppm->ncols].r = (unsigned char)(sum_r/(FACTOR*FACTOR)); + out[col + line*output_ppm->ncols].g = (unsigned char)(sum_g/(FACTOR*FACTOR)); + out[col + line*output_ppm->ncols].b = (unsigned char)(sum_b/(FACTOR*FACTOR)); + +/* fprintf(stderr, "col %d line %d -> sum_r = %d out -> %d\n", col, line, sum_r, out[col + line*FACTOR].r); */ + + } + } +} + +int main(int argc, char **argv) +{ + struct ppm_image *input_ppm, *output_ppm; + + parse_args(argc, argv); + + input_ppm = file_to_ppm(filename_in); + + fprintf(stderr, "Read input ppm file : ncols = %d, nlines = %d, coldept = %d\n", + input_ppm->nlines, input_ppm->ncols, input_ppm->coldepth); + + assert(input_ppm->nlines % FACTOR == 0); + assert(input_ppm->ncols % FACTOR == 0); + + output_ppm = allocate_new_ppm(input_ppm->ncols/FACTOR, input_ppm->nlines/FACTOR, input_ppm->coldepth); + + dummy_downscale(input_ppm, output_ppm); + + ppm_to_file(output_ppm, filename_out); + + free(input_ppm); + free(output_ppm); + + return 0; +} diff --git a/examples/ppm_downscaler/ppm_downscaler.h b/examples/ppm_downscaler/ppm_downscaler.h new file mode 100644 index 0000000..1861174 --- /dev/null +++ b/examples/ppm_downscaler/ppm_downscaler.h @@ -0,0 +1,31 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* we make the assumption that there are 256 color levels at most */ +struct ppm_color +{ + unsigned char r; + unsigned char g; + unsigned char b; +}; + +struct ppm_image +{ + int nlines; + int ncols; + int coldepth; + struct ppm_color *data; +}; diff --git a/examples/ppm_downscaler/yuv_downscaler.c b/examples/ppm_downscaler/yuv_downscaler.c new file mode 100644 index 0000000..cdb8da4 --- /dev/null +++ b/examples/ppm_downscaler/yuv_downscaler.c @@ -0,0 +1,320 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2010-2010 Mehdi Juhoor + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * This uses a dummy algorithm to downscale a 1920x1080 yuv film. + * Each frame is split in horizontal stripes which are processed in parallel. + */ + +#include + +#include +#include +#include +#include +#include + +#include "yuv_downscaler.h" + +static double start; +static double end; + +static const char *filename_in_default = "hugefile.2s.yuv"; +static const char *filename_out_default = "hugefile.2s.out.yuv"; +static char filename_in[1024]; +static char filename_out[1024]; + +void parse_args(int argc, char **argv) +{ + if (argc == 3) + { + strncpy(filename_in, argv[1], 1023); + strncpy(filename_out, argv[2], 1023); + } + else + { + strncpy(filename_in, filename_in_default, 1023); + strncpy(filename_out, filename_out_default, 1023); + } +} + +#define FRAMESIZE sizeof(struct yuv_frame) +#define NEW_FRAMESIZE sizeof(struct yuv_new_frame) + +void ds_kernel_cpu(void *descr[], void *arg) +{ + (void)arg; + uint8_t *input = (uint8_t *)STARPU_MATRIX_GET_PTR(descr[0]); + const unsigned input_ld = STARPU_MATRIX_GET_LD(descr[0]); + + uint8_t *output = (uint8_t *)STARPU_MATRIX_GET_PTR(descr[1]); + const unsigned output_ld = STARPU_MATRIX_GET_LD(descr[1]); + + const unsigned ncols = STARPU_MATRIX_GET_NX(descr[0]); + const unsigned nlines = STARPU_MATRIX_GET_NY(descr[0]); + + unsigned line, col; + for (line = 0; line < nlines; line+=FACTOR) + for (col = 0; col < ncols; col+=FACTOR) + { + unsigned sum = 0; + + unsigned lline, lcol; + for (lline = 0; lline < FACTOR; lline++) + for (lcol = 0; lcol < FACTOR; lcol++) + { + unsigned in_index = (lcol + col) + (lline + line)*input_ld; + + sum += input[in_index]; + } + + unsigned out_index = (col / FACTOR) + (line / FACTOR)*output_ld; + output[out_index] = (uint8_t)(sum/(FACTOR*FACTOR)); + } +} + +static struct starpu_codelet ds_codelet = +{ + .cpu_funcs = {ds_kernel_cpu}, + .cpu_funcs_name = {"ds_kernel_cpu"}, + .nbuffers = 2, /* input -> output */ + .modes = {STARPU_R, STARPU_W}, + .model = NULL +}; + +/* each block contains BLOCK_HEIGHT consecutive lines */ +static struct starpu_data_filter filter_y = +{ + .filter_func = starpu_matrix_filter_block, + .nchildren= HEIGHT/BLOCK_HEIGHT +}; + +static struct starpu_data_filter filter_uv = +{ + .filter_func = starpu_matrix_filter_block, + .nchildren = (HEIGHT/2)/BLOCK_HEIGHT +}; + +int main(int argc, char **argv) +{ + int ret; + size_t sret; + + assert(HEIGHT % (2*BLOCK_HEIGHT) == 0); + assert(HEIGHT % FACTOR == 0); + + parse_args(argc, argv); + +/* fprintf(stderr, "Reading input file ...\n"); */ + + /* how many frames ? */ + struct stat stbuf; + ret = stat(filename_in, &stbuf); + assert(ret); + size_t filesize = stbuf.st_size; + + unsigned nframes = filesize/FRAMESIZE; + +/* fprintf(stderr, "filesize %lx (FRAME SIZE %lx NEW SIZE %lx); nframes %d\n", filesize, FRAMESIZE, NEW_FRAMESIZE, nframes); */ + assert((filesize % sizeof(struct yuv_frame)) == 0); + + struct yuv_frame *yuv_in_buffer = (struct yuv_frame *) malloc(nframes*FRAMESIZE); + assert(yuv_in_buffer); + +/* fprintf(stderr, "Alloc output file ...\n"); */ + struct yuv_new_frame *yuv_out_buffer = (struct yuv_new_frame *) calloc(nframes, NEW_FRAMESIZE); + assert(yuv_out_buffer); + + /* fetch input data */ + FILE *f_in = fopen(filename_in, "r"); + if (!f_in) + { + fprintf(stderr, "couldn't open input file %s\n", filename_in); + exit(EXIT_FAILURE); + } + + /* allocate room for an output buffer */ + FILE *f_out = fopen(filename_out, "w+"); + if (!f_out) + { + fprintf(stderr, "couldn't open output file %s\n", filename_out); + exit(EXIT_FAILURE); + } + + sret = fread(yuv_in_buffer, FRAMESIZE, nframes, f_in); + assert(sret == nframes); + + starpu_data_handle_t *frame_y_handle = (starpu_data_handle_t *) calloc(nframes, sizeof(starpu_data_handle_t)); + starpu_data_handle_t *frame_u_handle = (starpu_data_handle_t *) calloc(nframes, sizeof(starpu_data_handle_t)); + starpu_data_handle_t *frame_v_handle = (starpu_data_handle_t *) calloc(nframes, sizeof(starpu_data_handle_t)); + + starpu_data_handle_t *new_frame_y_handle = (starpu_data_handle_t *) calloc(nframes, sizeof(starpu_data_handle_t)); + starpu_data_handle_t *new_frame_u_handle = (starpu_data_handle_t *) calloc(nframes, sizeof(starpu_data_handle_t)); + starpu_data_handle_t *new_frame_v_handle = (starpu_data_handle_t *) calloc(nframes, sizeof(starpu_data_handle_t)); + + ret = starpu_init(NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + /* register and partition all layers */ + unsigned frame; + for (frame = 0; frame < nframes; frame++) + { + /* register Y layer */ + starpu_matrix_data_register(&frame_y_handle[frame], STARPU_MAIN_RAM, + (uintptr_t)&yuv_in_buffer[frame].y, + WIDTH, WIDTH, HEIGHT, sizeof(uint8_t)); + + starpu_data_partition(frame_y_handle[frame], &filter_y); + + starpu_matrix_data_register(&new_frame_y_handle[frame], STARPU_MAIN_RAM, + (uintptr_t)&yuv_out_buffer[frame].y, + NEW_WIDTH, NEW_WIDTH, NEW_HEIGHT, sizeof(uint8_t)); + + starpu_data_partition(new_frame_y_handle[frame], &filter_y); + + /* register U layer */ + starpu_matrix_data_register(&frame_u_handle[frame], STARPU_MAIN_RAM, + (uintptr_t)&yuv_in_buffer[frame].u, + WIDTH/2, WIDTH/2, HEIGHT/2, sizeof(uint8_t)); + + starpu_data_partition(frame_u_handle[frame], &filter_uv); + + starpu_matrix_data_register(&new_frame_u_handle[frame], STARPU_MAIN_RAM, + (uintptr_t)&yuv_out_buffer[frame].u, + NEW_WIDTH/2, NEW_WIDTH/2, NEW_HEIGHT/2, sizeof(uint8_t)); + + starpu_data_partition(new_frame_u_handle[frame], &filter_uv); + + /* register V layer */ + starpu_matrix_data_register(&frame_v_handle[frame], STARPU_MAIN_RAM, + (uintptr_t)&yuv_in_buffer[frame].v, + WIDTH/2, WIDTH/2, HEIGHT/2, sizeof(uint8_t)); + + starpu_data_partition(frame_v_handle[frame], &filter_uv); + + starpu_matrix_data_register(&new_frame_v_handle[frame], STARPU_MAIN_RAM, + (uintptr_t)&yuv_out_buffer[frame].v, + NEW_WIDTH/2, NEW_WIDTH/2, NEW_HEIGHT/2, sizeof(uint8_t)); + + starpu_data_partition(new_frame_v_handle[frame], &filter_uv); + + } + + /* how many tasks are there ? */ + unsigned nblocks_y = filter_y.nchildren; + unsigned nblocks_uv = filter_uv.nchildren; + + unsigned ntasks = (nblocks_y + 2*nblocks_uv)*nframes; + + fprintf(stderr, "Start computation: there will be %u tasks for %u frames\n", ntasks, nframes); + start = starpu_timing_now(); + + /* do the computation */ + for (frame = 0; frame < nframes; frame++) + { + starpu_iteration_push(frame); + unsigned blocky; + for (blocky = 0; blocky < nblocks_y; blocky++) + { + struct starpu_task *task = starpu_task_create(); + task->cl = &ds_codelet; + + /* input */ + task->handles[0] = starpu_data_get_sub_data(frame_y_handle[frame], 1, blocky); + + /* output */ + task->handles[1] = starpu_data_get_sub_data(new_frame_y_handle[frame], 1, blocky); + + ret = starpu_task_submit(task); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + unsigned blocku; + for (blocku = 0; blocku < nblocks_uv; blocku++) + { + struct starpu_task *task = starpu_task_create(); + task->cl = &ds_codelet; + + /* input */ + task->handles[0] = starpu_data_get_sub_data(frame_u_handle[frame], 1, blocku); + + /* output */ + task->handles[1] = starpu_data_get_sub_data(new_frame_u_handle[frame], 1, blocku); + + ret = starpu_task_submit(task); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + unsigned blockv; + for (blockv = 0; blockv < nblocks_uv; blockv++) + { + struct starpu_task *task = starpu_task_create(); + task->cl = &ds_codelet; + + /* input */ + task->handles[0] = starpu_data_get_sub_data(frame_v_handle[frame], 1, blockv); + + /* output */ + task->handles[1] = starpu_data_get_sub_data(new_frame_v_handle[frame], 1, blockv); + + ret = starpu_task_submit(task); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + starpu_iteration_pop(); + } + + /* make sure all output buffers are sync'ed */ + for (frame = 0; frame < nframes; frame++) + { + starpu_data_unregister(frame_y_handle[frame]); + starpu_data_unregister(frame_u_handle[frame]); + starpu_data_unregister(frame_v_handle[frame]); + + starpu_data_unregister(new_frame_y_handle[frame]); + starpu_data_unregister(new_frame_u_handle[frame]); + starpu_data_unregister(new_frame_v_handle[frame]); + } + + free(frame_y_handle); + free(frame_u_handle); + free(frame_v_handle); + free(new_frame_y_handle); + free(new_frame_u_handle); + free(new_frame_v_handle); + + /* There is an implicit barrier: the unregister methods will block + * until the computation is done and that the result was put back into + * memory. */ + end = starpu_timing_now(); + + double timing = end - start; + printf("# s\tFPS\n"); + printf("%f\t%f\n", timing/1000000, (1000000*nframes)/timing); + + fwrite(yuv_out_buffer, NEW_FRAMESIZE, nframes, f_out); + + /* partition the layers into smaller parts */ + starpu_shutdown(); + + if (fclose(f_in) != 0) + fprintf(stderr, "Could not close %s properly\n", filename_in); + + if (fclose(f_out) != 0) + fprintf(stderr, "Could not close %s properly\n", filename_out); + + return 0; +} diff --git a/examples/ppm_downscaler/yuv_downscaler.h b/examples/ppm_downscaler/yuv_downscaler.h new file mode 100644 index 0000000..3770bed --- /dev/null +++ b/examples/ppm_downscaler/yuv_downscaler.h @@ -0,0 +1,41 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#define WIDTH 1920 +#define HEIGHT 1080 + +#define FACTOR 2 + +#define NEW_WIDTH (WIDTH/FACTOR) +#define NEW_HEIGHT (HEIGHT/FACTOR) + +#define BLOCK_HEIGHT 20 + +#include + +struct yuv_frame +{ + uint8_t y[WIDTH*HEIGHT]; + uint8_t u[(WIDTH*HEIGHT)/4]; + uint8_t v[(WIDTH*HEIGHT)/4]; +}; + +struct yuv_new_frame +{ + uint8_t y[NEW_WIDTH*NEW_HEIGHT]; + uint8_t u[(NEW_WIDTH*NEW_HEIGHT)/4]; + uint8_t v[(NEW_WIDTH*NEW_HEIGHT)/4]; +}; diff --git a/examples/profiling/profiling.c b/examples/profiling/profiling.c new file mode 100644 index 0000000..ba22de8 --- /dev/null +++ b/examples/profiling/profiling.c @@ -0,0 +1,157 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * This examplifies how to get task execution profiling from the application. + */ + +#include +#include +#include + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +#ifdef STARPU_QUICK_CHECK +static unsigned niter = 50; +#else +static unsigned niter = 500; +#endif + +void sleep_codelet(void *descr[], void *arg) +{ + (void)descr; + (void)arg; + starpu_usleep(1000); +} + +int main(int argc, char **argv) +{ + int ret; + + if (argc == 2) + niter = atoi(argv[1]); + + ret = starpu_init(NULL); + if (ret == -ENODEV) + return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + /* Enable profiling */ + starpu_profiling_status_set(STARPU_PROFILING_ENABLE); + +#ifdef STARPU_QUICK_CHECK + /* We should observe at least 50ms in the sleep time reported by every + * worker. */ + starpu_usleep(50000); +#else + /* We should observe at least 500ms in the sleep time reported by every + * worker. */ + starpu_usleep(500000); +#endif + + struct starpu_codelet cl = + { + .cpu_funcs = {sleep_codelet}, + .cpu_funcs_name = {"sleep_codelet"}, + .cuda_funcs = {sleep_codelet}, + .opencl_funcs = {sleep_codelet}, + .nbuffers = 0, + .name = "sleep" + }; + + struct starpu_task **tasks = (struct starpu_task **) malloc(niter*sizeof(struct starpu_task *)); + assert(tasks); + + unsigned i; + for (i = 0; i < niter; i++) + { + struct starpu_task *task = starpu_task_create(); + + task->cl = &cl; + + /* We will destroy the task structure by hand so that we can + * query the profiling info before the task is destroyed. */ + task->destroy = 0; + + tasks[i] = task; + + ret = starpu_task_submit(task); + if (STARPU_UNLIKELY(ret == -ENODEV)) + { + FPRINTF(stderr, "No worker may execute this task\n"); + exit(0); + } + } + + starpu_task_wait_for_all(); + + double delay_sum = 0.0; + double length_sum = 0.0; + + for (i = 0; i < niter; i++) + { + struct starpu_task *task = tasks[i]; + struct starpu_profiling_task_info *info = task->profiling_info; + + /* How much time did it take before the task started ? */ + delay_sum += starpu_timing_timespec_delay_us(&info->submit_time, &info->start_time); + + /* How long was the task execution ? */ + length_sum += starpu_timing_timespec_delay_us(&info->start_time, &info->end_time); + + /* We don't need the task structure anymore */ + starpu_task_destroy(task); + } + + free(tasks); + + if (niter) + { + FPRINTF(stderr, "Avg. delay : %2.2lf us\n", (delay_sum)/niter); + FPRINTF(stderr, "Avg. length : %2.2lf us\n", (length_sum)/niter); + } + + /* Display the occupancy of all workers during the test */ + unsigned worker; + for (worker = 0; worker < starpu_worker_get_count(); worker++) + { + struct starpu_profiling_worker_info worker_info; + ret = starpu_profiling_worker_get_info(worker, &worker_info); + STARPU_ASSERT(!ret); + + double total_time = starpu_timing_timespec_to_us(&worker_info.total_time); + double executing_time = starpu_timing_timespec_to_us(&worker_info.executing_time); + double sleeping_time = starpu_timing_timespec_to_us(&worker_info.sleeping_time); + double overhead_time = total_time - executing_time - sleeping_time; + + float executing_ratio = 100.0*executing_time/total_time; + float sleeping_ratio = 100.0*sleeping_time/total_time; + float overhead_ratio = 100.0 - executing_ratio - sleeping_ratio; + + char workername[128]; + starpu_worker_get_name(worker, workername, 128); + FPRINTF(stderr, "Worker %s:\n", workername); + FPRINTF(stderr, "\t%d task(s)\n", worker_info.executed_tasks); + FPRINTF(stderr, "\ttotal time : %.2lf ms\n", total_time*1e-3); + FPRINTF(stderr, "\texec time : %.2lf ms (%.2f %%)\n", executing_time*1e-3, executing_ratio); + FPRINTF(stderr, "\tblocked time : %.2lf ms (%.2f %%)\n", sleeping_time*1e-3, sleeping_ratio); + FPRINTF(stderr, "\toverhead time: %.2lf ms (%.2f %%)\n", overhead_time*1e-3, overhead_ratio); + } + + starpu_shutdown(); + + return 0; +} diff --git a/examples/profiling_tool/libprofiling_tool.c b/examples/profiling_tool/libprofiling_tool.c new file mode 100644 index 0000000..d3f2465 --- /dev/null +++ b/examples/profiling_tool/libprofiling_tool.c @@ -0,0 +1,76 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2022-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2022,2023 École de Technologie Supérieure (ETS, Montréal) + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include + +void myfunction_cb(struct starpu_prof_tool_info *prof_info, union starpu_prof_tool_event_info *event_info, struct starpu_prof_tool_api_info *api_info) +{ + if (NULL != prof_info) + { + printf("CALLBACK CALLED %d\n", prof_info->event_type); + } + else + { + printf("CALLBACK CALLED NULL INFO\n"); + return; + } + + switch (prof_info->event_type) + { + case starpu_prof_tool_event_driver_init: + printf("init driver\n"); + break; + case starpu_prof_tool_event_driver_init_start: + printf("begin init driver\n"); + break; + case starpu_prof_tool_event_driver_init_end: + printf("end init driver\n"); + break; + case starpu_prof_tool_event_start_cpu_exec: + printf("Start exec fun %p on device %d\n", prof_info->fun_ptr, prof_info->device_number); + break; + case starpu_prof_tool_event_end_cpu_exec: + printf("End exec fun %p on device %d\n", prof_info->fun_ptr, prof_info->device_number); + break; + case starpu_prof_tool_event_start_transfer: + printf("Start transfer on memnode %ud\n", prof_info->memnode); + break; + case starpu_prof_tool_event_end_transfer: + printf("End transfer on memnode %ud\n", prof_info->memnode); + break; + default: + printf("Unknown callback %d\n", prof_info->event_type); + break; + } +} + +/* Mandatory */ +void starpu_prof_tool_library_register(starpu_prof_tool_entry_register_func reg, starpu_prof_tool_entry_register_func unreg) +{ + enum starpu_prof_tool_command info = 0; + reg(starpu_prof_tool_event_driver_init, &myfunction_cb, info); + reg(starpu_prof_tool_event_driver_init_start, &myfunction_cb, info); + reg(starpu_prof_tool_event_driver_init_end, &myfunction_cb, info); + reg(starpu_prof_tool_event_start_cpu_exec, &myfunction_cb, info); + reg(starpu_prof_tool_event_end_cpu_exec, &myfunction_cb, info); + reg(starpu_prof_tool_event_start_transfer, &myfunction_cb, info); + reg(starpu_prof_tool_event_end_transfer, &myfunction_cb, info); + + fprintf(stderr,"REGISTER LIBRARY\n"); +} + diff --git a/examples/profiling_tool/prof.sh b/examples/profiling_tool/prof.sh new file mode 100755 index 0000000..18edb20 --- /dev/null +++ b/examples/profiling_tool/prof.sh @@ -0,0 +1,25 @@ +#!/bin/bash +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# Copyright (C) 2022-2022 École de Technologie Supérieure (ETS, Montréal) +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +ROOT=${0%/prof.sh} +if test -x $ROOT/../basic_examples/hello_world +then + STARPU_PROF_TOOL=$ROOT/.libs/libprofiling_tool.so $ROOT/../basic_examples/hello_world +else + exit 77 +fi diff --git a/examples/reductions/dot_product.c b/examples/reductions/dot_product.c new file mode 100644 index 0000000..9819e3d --- /dev/null +++ b/examples/reductions/dot_product.c @@ -0,0 +1,461 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * This computes the dot product of a big vector, using data reduction to + * optimize the dot reduction. + */ + +#include +#include +#include + +#include + +#ifdef STARPU_USE_CUDA +#include +#include +#endif + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +static float *_x; +static float *_y; +static starpu_data_handle_t *_x_handles; +static starpu_data_handle_t *_y_handles; +#ifdef STARPU_USE_OPENCL +static struct starpu_opencl_program _opencl_program; +#endif + +#ifdef STARPU_QUICK_CHECK +static unsigned _nblocks = 128; +#else +static unsigned _nblocks = 4096; +#endif +static unsigned _entries_per_block = 1024; + +static DOT_TYPE _dot = 0.0f; +static starpu_data_handle_t _dot_handle; + +#ifdef STARPU_USE_CUDA +static int cublas_version; +#endif + +static int can_execute(unsigned workerid, struct starpu_task *task, unsigned nimpl) +{ + (void)task; + (void)nimpl; + enum starpu_worker_archtype type = starpu_worker_get_type(workerid); + if (type == STARPU_CPU_WORKER || type == STARPU_OPENCL_WORKER) + return 1; + +#ifdef STARPU_USE_CUDA +#ifdef STARPU_SIMGRID + /* We don't know, let's assume it can */ + return 1; +#else + /* Cuda device */ + const struct cudaDeviceProp *props; + props = starpu_cuda_get_device_properties(workerid); + if (props->major >= 2 || props->minor >= 3) + /* At least compute capability 1.3, supports doubles */ + return 1; +#endif +#endif + /* Old card, does not support doubles */ + return 0; +} + +/* + * Codelet to create a neutral element + */ + +void init_cpu_func(void *descr[], void *cl_arg) +{ + (void)cl_arg; + DOT_TYPE *dot = (DOT_TYPE *)STARPU_VARIABLE_GET_PTR(descr[0]); + *dot = 0.0f; +} + +#ifdef STARPU_USE_CUDA +void init_cuda_func(void *descr[], void *cl_arg) +{ + (void)cl_arg; + DOT_TYPE *dot = (DOT_TYPE *)STARPU_VARIABLE_GET_PTR(descr[0]); + cudaMemsetAsync(dot, 0, sizeof(DOT_TYPE), starpu_cuda_get_local_stream()); +} +#endif + +#ifdef STARPU_USE_OPENCL +void init_opencl_func(void *buffers[], void *cl_arg) +{ + (void)cl_arg; + cl_int err; + cl_command_queue queue; + + cl_mem dot = (cl_mem) STARPU_VARIABLE_GET_PTR(buffers[0]); + starpu_opencl_get_current_queue(&queue); + DOT_TYPE zero = (DOT_TYPE) 0.0; + + err = clEnqueueWriteBuffer(queue, + dot, + CL_TRUE, + 0, + sizeof(DOT_TYPE), + &zero, + 0, + NULL, + NULL); + if (err != CL_SUCCESS) + STARPU_OPENCL_REPORT_ERROR(err); +} +#endif + +static struct starpu_codelet init_codelet = +{ + .can_execute = can_execute, + .cpu_funcs = {init_cpu_func}, + .cpu_funcs_name = {"init_cpu_func"}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {init_cuda_func}, + .cuda_flags = {STARPU_CUDA_ASYNC}, +#endif +#ifdef STARPU_USE_OPENCL + .opencl_funcs = {init_opencl_func}, +#endif + .modes = {STARPU_W}, + .nbuffers = 1, + .name = "init", +}; + +/* + * Codelet to perform the reduction of two elements + */ + +void redux_cpu_func(void *descr[], void *cl_arg) +{ + (void)cl_arg; + DOT_TYPE *dota = (DOT_TYPE *)STARPU_VARIABLE_GET_PTR(descr[0]); + DOT_TYPE *dotb = (DOT_TYPE *)STARPU_VARIABLE_GET_PTR(descr[1]); + + *dota = *dota + *dotb; +} + +#ifdef STARPU_USE_CUDA +extern void redux_cuda_func(void *descr[], void *_args); +#endif + +#ifdef STARPU_USE_OPENCL +void redux_opencl_func(void *buffers[], void *args) +{ + (void)args; + int id, devid; + cl_int err; + cl_kernel kernel; + cl_command_queue queue; + + cl_mem dota = (cl_mem) STARPU_VARIABLE_GET_PTR(buffers[0]); + cl_mem dotb = (cl_mem) STARPU_VARIABLE_GET_PTR(buffers[1]); + + id = starpu_worker_get_id_check(); + devid = starpu_worker_get_devid(id); + + err = starpu_opencl_load_kernel(&kernel, &queue, &_opencl_program, "_redux_opencl", devid); + if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); + + err = clSetKernelArg(kernel, 0, sizeof(dota), &dota); + err|= clSetKernelArg(kernel, 1, sizeof(dotb), &dotb); + if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); + + { + size_t global=1; + size_t local=1; + cl_device_id device; + + starpu_opencl_get_device(devid, &device); + + err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, NULL); + if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); + } + starpu_opencl_release_kernel(kernel); +} +#endif + +static struct starpu_codelet redux_codelet = +{ + .can_execute = can_execute, + .cpu_funcs = {redux_cpu_func}, + .cpu_funcs_name = {"redux_cpu_func"}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {redux_cuda_func}, + .cuda_flags = {STARPU_CUDA_ASYNC}, +#endif +#ifdef STARPU_USE_OPENCL + .opencl_funcs = {redux_opencl_func}, + .opencl_flags = {STARPU_OPENCL_ASYNC}, +#endif + .modes = {STARPU_RW|STARPU_COMMUTE, STARPU_R}, + .nbuffers = 2, + .name = "redux" +}; + +/* + * Dot product codelet + */ + +void dot_cpu_func(void *descr[], void *cl_arg) +{ + (void)cl_arg; + float *local_x = (float *)STARPU_VECTOR_GET_PTR(descr[0]); + float *local_y = (float *)STARPU_VECTOR_GET_PTR(descr[1]); + DOT_TYPE *dot = (DOT_TYPE *)STARPU_VARIABLE_GET_PTR(descr[2]); + + unsigned n = STARPU_VECTOR_GET_NX(descr[0]); + + DOT_TYPE local_dot = 0.0; + + unsigned i; + for (i = 0; i < n; i++) + { + local_dot += (DOT_TYPE)local_x[i]*(DOT_TYPE)local_y[i]; + } + + *dot = *dot + local_dot; +} + +#ifdef STARPU_USE_CUDA +void dot_cuda_func(void *descr[], void *cl_arg) +{ + (void)cl_arg; + DOT_TYPE current_dot; + float local_dot; + + float *local_x = (float *)STARPU_VECTOR_GET_PTR(descr[0]); + float *local_y = (float *)STARPU_VECTOR_GET_PTR(descr[1]); + DOT_TYPE *dot = (DOT_TYPE *)STARPU_VARIABLE_GET_PTR(descr[2]); + + unsigned n = STARPU_VECTOR_GET_NX(descr[0]); + + cudaMemcpyAsync(¤t_dot, dot, sizeof(DOT_TYPE), cudaMemcpyDeviceToHost, starpu_cuda_get_local_stream()); + cudaStreamSynchronize(starpu_cuda_get_local_stream()); + + cublasStatus_t status = cublasSdot(starpu_cublas_get_local_handle(), n, local_x, 1, local_y, 1, &local_dot); + if (status != CUBLAS_STATUS_SUCCESS) + STARPU_CUBLAS_REPORT_ERROR(status); + cudaStreamSynchronize(starpu_cuda_get_local_stream()); + + /* FPRINTF(stderr, "current_dot %f local dot %f -> %f\n", current_dot, local_dot, current_dot + local_dot); */ + current_dot += local_dot; + + cudaMemcpyAsync(dot, ¤t_dot, sizeof(DOT_TYPE), cudaMemcpyHostToDevice, starpu_cuda_get_local_stream()); + cudaStreamSynchronize(starpu_cuda_get_local_stream()); +} +#endif + +#ifdef STARPU_USE_OPENCL +void dot_opencl_func(void *buffers[], void *cl_arg) +{ + (void)cl_arg; + int id, devid; + cl_int err; + cl_kernel kernel; + cl_command_queue queue; + + cl_mem x = (cl_mem) STARPU_VECTOR_GET_DEV_HANDLE(buffers[0]); + cl_mem y = (cl_mem) STARPU_VECTOR_GET_DEV_HANDLE(buffers[1]); + cl_mem dot = (cl_mem) STARPU_VARIABLE_GET_PTR(buffers[2]); + unsigned n = STARPU_VECTOR_GET_NX(buffers[0]); + + id = starpu_worker_get_id_check(); + devid = starpu_worker_get_devid(id); + + err = starpu_opencl_load_kernel(&kernel, &queue, &_opencl_program, "_dot_opencl", devid); + if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); + + err = clSetKernelArg(kernel, 0, sizeof(x), &x); + err|= clSetKernelArg(kernel, 1, sizeof(y), &y); + err|= clSetKernelArg(kernel, 2, sizeof(dot), &dot); + err|= clSetKernelArg(kernel, 3, sizeof(n), &n); + if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); + + { + size_t global=1; + size_t local=1; + cl_device_id device; + + starpu_opencl_get_device(devid, &device); + + err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, NULL); + if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); + } + starpu_opencl_release_kernel(kernel); +} +#endif + +static struct starpu_codelet dot_codelet = +{ + .can_execute = can_execute, + .cpu_funcs = {dot_cpu_func}, + .cpu_funcs_name = {"dot_cpu_func"}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {dot_cuda_func}, +#endif +#ifdef STARPU_USE_OPENCL + .opencl_funcs = {dot_opencl_func}, + .opencl_flags = {STARPU_OPENCL_ASYNC}, +#endif + .nbuffers = 3, + .modes = {STARPU_R, STARPU_R, STARPU_REDUX}, + .name = "dot" +}; + +/* + * Tasks initialization + */ + +int main(void) +{ + int ret; + + /* Not supported yet */ + if (starpu_getenv_number_default("STARPU_GLOBAL_ARBITER", 0) > 0) + return 77; + + ret = starpu_init(NULL); + if (ret == -ENODEV) + return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + +#ifdef STARPU_USE_OPENCL + ret = starpu_opencl_load_opencl_from_file("examples/reductions/dot_product_opencl_kernels.cl", + &_opencl_program, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); +#endif + +#ifdef STARPU_USE_CUDA + unsigned devices = starpu_cuda_worker_get_count(); + if (devices) + { + cublasHandle_t handle; + cublasCreate(&handle); + cublasGetVersion(handle, &cublas_version); + cublasDestroy(handle); + if (cublas_version >= 7050) + starpu_cublas_init(); + else + /* Disable the sdot cublas kernel, it is bogus with a + * non-blocking stream (Nvidia bugid 1669886) */ + dot_codelet.cuda_funcs[0] = NULL; + } +#endif + + unsigned long nelems = _nblocks*_entries_per_block; + size_t size = nelems*sizeof(float); + + starpu_malloc((void **)&_x, size*sizeof(float)); + starpu_malloc((void **)&_y, size*sizeof(float)); + + _x_handles = (starpu_data_handle_t *) calloc(_nblocks, sizeof(starpu_data_handle_t)); + _y_handles = (starpu_data_handle_t *) calloc(_nblocks, sizeof(starpu_data_handle_t)); + + assert(_x && _y); + + starpu_srand48(0); + + DOT_TYPE reference_dot = 0.0; + + unsigned long i; + for (i = 0; i < nelems; i++) + { + _x[i] = (float)starpu_drand48(); + _y[i] = (float)starpu_drand48(); + + reference_dot += (DOT_TYPE)_x[i]*(DOT_TYPE)_y[i]; + } + + unsigned block; + for (block = 0; block < _nblocks; block++) + { + starpu_vector_data_register(&_x_handles[block], STARPU_MAIN_RAM, + (uintptr_t)&_x[_entries_per_block*block], _entries_per_block, sizeof(float)); + starpu_vector_data_register(&_y_handles[block], STARPU_MAIN_RAM, + (uintptr_t)&_y[_entries_per_block*block], _entries_per_block, sizeof(float)); + } + + starpu_variable_data_register(&_dot_handle, STARPU_MAIN_RAM, (uintptr_t)&_dot, sizeof(DOT_TYPE)); + + /* + * Compute dot product with StarPU + */ + starpu_data_set_reduction_methods(_dot_handle, &redux_codelet, &init_codelet); + + for (block = 0; block < _nblocks; block++) + { + struct starpu_task *task = starpu_task_create(); + + task->cl = &dot_codelet; + task->destroy = 1; + + task->handles[0] = _x_handles[block]; + task->handles[1] = _y_handles[block]; + task->handles[2] = _dot_handle; + + ret = starpu_task_submit(task); + if (ret == -ENODEV) goto enodev; + STARPU_ASSERT(!ret); + } + + for (block = 0; block < _nblocks; block++) + { + starpu_data_unregister(_x_handles[block]); + starpu_data_unregister(_y_handles[block]); + } + starpu_data_unregister(_dot_handle); + + FPRINTF(stderr, "Reference : %e vs. %e (Delta %e)\n", reference_dot, _dot, reference_dot - _dot); + +#ifdef STARPU_USE_CUDA + if (cublas_version >= 7050) + starpu_cublas_shutdown(); +#endif + +#ifdef STARPU_USE_OPENCL + ret = starpu_opencl_unload_opencl(&_opencl_program); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl"); +#endif + starpu_shutdown(); + + + starpu_free_noflag(_x, size*sizeof(float)); + starpu_free_noflag(_y, size*sizeof(float)); + free(_x_handles); + free(_y_handles); + + if (fabs(reference_dot - _dot) < reference_dot * 1e-6) + return EXIT_SUCCESS; + else + { + FPRINTF(stderr, "ERROR: fabs(%e - %e) >= %e * 1e-6\n", reference_dot, _dot, reference_dot); + return EXIT_FAILURE; + } + +enodev: + starpu_shutdown(); + FPRINTF(stderr, "WARNING: No one can execute this task\n"); + /* yes, we do not perform the computation but we did detect that no one + * could perform the kernel, so this is not an error from StarPU */ + return 77; +} diff --git a/examples/reductions/dot_product.h b/examples/reductions/dot_product.h new file mode 100644 index 0000000..36632e7 --- /dev/null +++ b/examples/reductions/dot_product.h @@ -0,0 +1,22 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef DOT_PRODUCT_H__ +#define DOT_PRODUCT_H__ + +#define DOT_TYPE double + +#endif /* DOT_PRODUCT_H__ */ diff --git a/examples/reductions/dot_product_kernels.cu b/examples/reductions/dot_product_kernels.cu new file mode 100644 index 0000000..985a2dd --- /dev/null +++ b/examples/reductions/dot_product_kernels.cu @@ -0,0 +1,38 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* Trivial dot reduction CUDA kernel */ + +#include + +#define DOT_TYPE double + +static __global__ void cuda_redux(DOT_TYPE *dota, DOT_TYPE *dotb) +{ + *dota = *dota + *dotb; + return; +} + +extern "C" void redux_cuda_func(void *descr[], void *_args) +{ + (void)_args; + DOT_TYPE *dota = (DOT_TYPE *)STARPU_VARIABLE_GET_PTR(descr[0]); + DOT_TYPE *dotb = (DOT_TYPE *)STARPU_VARIABLE_GET_PTR(descr[1]); + + cuda_redux<<<1,1, 0, starpu_cuda_get_local_stream()>>>(dota, dotb); + cudaError_t status = cudaGetLastError(); + if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status); +} diff --git a/examples/reductions/dot_product_opencl_kernels.cl b/examples/reductions/dot_product_opencl_kernels.cl new file mode 100644 index 0000000..79deb82 --- /dev/null +++ b/examples/reductions/dot_product_opencl_kernels.cl @@ -0,0 +1,42 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* Trivial dot reduction OpenCL kernel */ + +#include "dot_product.h" + +#pragma OPENCL EXTENSION cl_khr_fp64 : enable + +__kernel void _redux_opencl(__global DOT_TYPE *dota, + __global DOT_TYPE *dotb) +{ + *dota += *dotb; +} + +__kernel void _dot_opencl(__global float *x, + __global float *y, + __global DOT_TYPE *dot, + unsigned n) +{ +/* FIXME: real parallel implementation */ + unsigned i; + __local double tmp; + tmp = 0.0; + for (i = 0; i < n ; i++) + tmp += x[i]*y[i]; + + *dot += tmp; +} diff --git a/examples/reductions/minmax_reduction.c b/examples/reductions/minmax_reduction.c new file mode 100644 index 0000000..745567a --- /dev/null +++ b/examples/reductions/minmax_reduction.c @@ -0,0 +1,225 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * This computes the minimum and maximum values of a big vector, using data + * reduction to optimize the computation. + */ + +#include +#include +#include +#include + +#ifdef STARPU_QUICK_CHECK +static unsigned _nblocks = 512; +static unsigned _entries_per_bock = 64; +#else +static unsigned _nblocks = 8192; +static unsigned _entries_per_bock = 1024; +#endif + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +#define TYPE double +#define TYPE_MAX DBL_MAX +#define TYPE_MIN DBL_MIN + +static TYPE *_x; +static starpu_data_handle_t *_x_handles; + +/* The first element (resp. second) stores the min element (resp. max). */ +static TYPE _minmax[2]; +static starpu_data_handle_t _minmax_handle; + +/* + * Codelet to create a neutral element + */ + +void minmax_neutral_cpu_func(void *descr[], void *cl_arg) +{ + (void)cl_arg; + TYPE *array = (TYPE *)STARPU_VARIABLE_GET_PTR(descr[0]); + + /* Initialize current min to the greatest possible value. */ + array[0] = TYPE_MAX; + + /* Initialize current max to the smallest possible value. */ + array[1] = TYPE_MIN; +} + +static struct starpu_codelet minmax_init_codelet = +{ + .cpu_funcs = {minmax_neutral_cpu_func}, + .cpu_funcs_name = {"minmax_neutral_cpu_func"}, + .modes = {STARPU_W}, + .nbuffers = 1, + .name = "init" +}; + +/* + * Codelet to perform the reduction of two elements + */ + +void minmax_redux_cpu_func(void *descr[], void *cl_arg) +{ + (void)cl_arg; + TYPE *array_dst = (TYPE *)STARPU_VARIABLE_GET_PTR(descr[0]); + TYPE *array_src = (TYPE *)STARPU_VARIABLE_GET_PTR(descr[1]); + + /* Compute the min value */ + TYPE min_dst = array_dst[0]; + TYPE min_src = array_src[0]; + array_dst[0] = STARPU_MIN(min_dst, min_src); + + /* Compute the max value */ + TYPE max_dst = array_dst[1]; + TYPE max_src = array_src[1]; + array_dst[1] = STARPU_MAX(max_dst, max_src); +} + +static struct starpu_codelet minmax_redux_codelet = +{ + .cpu_funcs = {minmax_redux_cpu_func}, + .cpu_funcs_name = {"minmax_redux_cpu_func"}, + .modes = {STARPU_RW|STARPU_COMMUTE, STARPU_R}, + .nbuffers = 2, + .name = "redux" +}; + +/* + * Compute max/min within a vector and update the min/max value + */ + +void minmax_cpu_func(void *descr[], void *cl_arg) +{ + (void)cl_arg; + /* The array containing the values */ + TYPE *local_array = (TYPE *)STARPU_VECTOR_GET_PTR(descr[0]); + unsigned n = STARPU_VECTOR_GET_NX(descr[0]); + + TYPE *minmax = (TYPE *)STARPU_VARIABLE_GET_PTR(descr[1]); + + TYPE local_min = minmax[0]; + TYPE local_max = minmax[1]; + + /* Compute the min and the max elements in the array */ + unsigned i; + for (i = 0; i < n; i++) + { + TYPE val = local_array[i]; + local_min = STARPU_MIN(local_min, val); + local_max = STARPU_MAX(local_max, val); + } + + minmax[0] = local_min; + minmax[1] = local_max; +} + +static struct starpu_codelet minmax_codelet = +{ + .cpu_funcs = {minmax_cpu_func}, + .cpu_funcs_name = {"minmax_cpu_func"}, + .nbuffers = 2, + .modes = {STARPU_R, STARPU_REDUX}, + .name = "minmax" +}; + +/* + * Tasks initialization + */ + +int main(void) +{ + unsigned long i; + int ret; + + /* Not supported yet */ + if (starpu_getenv_number_default("STARPU_GLOBAL_ARBITER", 0) > 0) + return 77; + + ret = starpu_init(NULL); + if (ret == -ENODEV) + return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + unsigned long nelems = _nblocks*_entries_per_bock; + size_t size = nelems*sizeof(TYPE); + + starpu_malloc((void **)&_x, size*sizeof(TYPE)); + _x_handles = (starpu_data_handle_t *) malloc(_nblocks * sizeof(starpu_data_handle_t)); + + assert(_x && _x_handles); + + /* Initialize the vector with random values */ + starpu_srand48(0); + for (i = 0; i < nelems; i++) + _x[i] = (TYPE)starpu_drand48(); + + unsigned block; + for (block = 0; block < _nblocks; block++) + { + uintptr_t block_start = (uintptr_t)&_x[_entries_per_bock*block]; + starpu_vector_data_register(&_x_handles[block], STARPU_MAIN_RAM, block_start, + _entries_per_bock, sizeof(TYPE)); + } + + /* Initialize current min */ + _minmax[0] = TYPE_MAX; + + /* Initialize current max */ + _minmax[1] = TYPE_MIN; + + starpu_variable_data_register(&_minmax_handle, STARPU_MAIN_RAM, (uintptr_t)_minmax, 2*sizeof(TYPE)); + + /* Set the methods to define neutral elements and to perform the reduction operation */ + starpu_data_set_reduction_methods(_minmax_handle, &minmax_redux_codelet, &minmax_init_codelet); + + for (block = 0; block < _nblocks; block++) + { + struct starpu_task *task = starpu_task_create(); + + task->cl = &minmax_codelet; + + task->handles[0] = _x_handles[block]; + task->handles[1] = _minmax_handle; + + ret = starpu_task_submit(task); + if (ret) + { + STARPU_ASSERT(ret == -ENODEV); + FPRINTF(stderr, "This test can only run on CPUs, but there are no CPU workers (this is not a bug).\n"); + return 77; + } + } + + for (block = 0; block < _nblocks; block++) + { + starpu_data_unregister(_x_handles[block]); + } + starpu_data_unregister(_minmax_handle); + + FPRINTF(stderr, "Min : %e\n", _minmax[0]); + FPRINTF(stderr, "Max : %e\n", _minmax[1]); + + STARPU_ASSERT(_minmax[0] <= _minmax[1]); + + starpu_free_noflag(_x, size*sizeof(TYPE)); + free(_x_handles); + starpu_shutdown(); + + return 0; +} diff --git a/examples/sched_ctx/axpy_partition_gpu.cu b/examples/sched_ctx/axpy_partition_gpu.cu new file mode 100644 index 0000000..9c0a56d --- /dev/null +++ b/examples/sched_ctx/axpy_partition_gpu.cu @@ -0,0 +1,78 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2016-2016 Uppsala University + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * This creates two dumb vectors, splits them into chunks, and for each pair of + * chunk, run axpy on them. + */ + +#include +#include "axpy_partition_gpu.h" +#include + +//This code demonstrates how to transform a kernel to execute on a given set of GPU SMs. + + +// Original kernel +__global__ void saxpy(int n, float a, float *x, float *y) +{ + int i = blockIdx.x*blockDim.x + threadIdx.x; + if (i>>(__P_HKARGS,n,a,x,y); + cudaError_t status = cudaGetLastError(); + if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status); +} diff --git a/examples/sched_ctx/axpy_partition_gpu.h b/examples/sched_ctx/axpy_partition_gpu.h new file mode 100644 index 0000000..852b8e7 --- /dev/null +++ b/examples/sched_ctx/axpy_partition_gpu.h @@ -0,0 +1,134 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2016-2016 Uppsala University + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * This creates two dumb vectors, splits them into chunks, and for each pair of + * chunk, run axpy on them. + */ + +#pragma once + +__device__ static uint get_smid(void) +{ +#if defined(__CUDACC__) + uint ret; + asm("mov.u32 %0, %smid;" : "=r"(ret)); + return ret; +#else + return 0; +#endif +} + +#define __P_HKARGS dimGrid, active_blocks ,occupancy, block_assignment_d, mapping_start +#define __P_KARGS dim3 blocks, int active_blocks, int occupancy, unsigned int* block_assignment, int mapping_start + +#define __P_DARGS blocks,blockid + +#define __P_BEGIN \ + __shared__ unsigned int block_start; \ + int smid = get_smid(); \ + if(threadIdx.x == 0 && threadIdx.y == 0 && threadIdx.z == 0) \ + { \ + block_start = atomicDec(&block_assignment[smid],0xDEADBEEF); \ + } \ + __syncthreads(); \ + \ + if(block_start > active_blocks) \ + { \ + return; \ + } + +#define __P_LOOPXY \ + dim3 blockid; \ + blockid.z = 0; \ + \ + int gridDim_sum = blocks.x*blocks.y; \ + int startBlock = block_start + (smid - mapping_start) * occupancy; \ + int blockid_sum; \ + for(blockid_sum = startBlock; blockid_sum < gridDim_sum; blockid_sum +=active_blocks) \ + { \ + blockid.x = blockid_sum % blocks.x; \ + blockid.y = blockid_sum / blocks.x; + +#define __P_LOOPEND } +// Needed if shared memory is used +#define __P_LOOPEND_SAFE __syncthreads(); } + +#define __P_LOOPX \ + dim3 blockid; \ + blockid.z = 0; \ + blockid.y = 0; \ + int gridDim_sum = blocks.x; \ + int startBlock = (smid-mapping_start) + block_start*(active_blocks/occupancy); \ + int blockid_sum; \ + for(blockid_sum = startBlock; blockid_sum < gridDim_sum; blockid_sum +=active_blocks) \ + { \ + blockid.x = blockid_sum; + +// int startBlock = block_start + (smid - mapping_start) * occupancy; \ + +//////////// HOST side functions + +template +static void buildPartitionedBlockMapping(F cudaFun, int threads, int shmem, int mapping_start, int allocation, + int &width, int &active_blocks, unsigned int *block_assignment_d,cudaStream_t current_stream = +#ifdef cudaStreamPerThread + cudaStreamPerThread +#else + NULL +#endif + ) +{ + int occupancy; + int nb_SM = 13; //TODO: replace with call + int mapping_end = mapping_start + allocation - 1; // exclusive + unsigned int block_assignment[15]; + +#if CUDART_VERSION >= 6050 + cudaOccupancyMaxActiveBlocksPerMultiprocessor(&occupancy,cudaFun,threads,shmem); +#else + occupancy = 4; +#endif + width = occupancy * nb_SM; // Physical wrapper grid size. Fits GPU exactly + active_blocks = occupancy*allocation; // The total number of blocks doing work + + int i; + for(i = 0; i < mapping_start; i++) + block_assignment[i] = (unsigned) -1; + + for(i = mapping_start; i <= mapping_end; i++) + { + block_assignment[i] = occupancy - 1; + } + + for(i = mapping_end+1; i < nb_SM; i++) + block_assignment[i] = (unsigned) -1; + + cudaMemcpyAsync((void*)block_assignment_d,block_assignment,sizeof(block_assignment),cudaMemcpyHostToDevice, current_stream); + //cudaMemcpy((void*)block_assignment_d,block_assignment,sizeof(block_assignment),cudaMemcpyHostToDevice); + //cudaDeviceSynchronize(); +} + +#define __P_HOSTSETUP(KERNEL,GRIDDIM,BLOCKSIZE,SHMEMSIZE,MAPPING_START,MAPPING_END,STREAM) \ + unsigned int* block_assignment_d; cudaMalloc((void**) &block_assignment_d,15*sizeof(unsigned int)); \ + int width = 0; \ + int active_blocks = 0; \ + buildPartitionedBlockMapping(KERNEL,BLOCKSIZE,SHMEMSIZE,(MAPPING_START),(MAPPING_END)-(MAPPING_START), \ + width, active_blocks, block_assignment_d,STREAM); \ + int occupancy = active_blocks/((MAPPING_END)-(MAPPING_START)); \ + dim3 dimGrid = (GRIDDIM); \ + int mapping_start = (MAPPING_START); diff --git a/examples/sched_ctx/dummy_sched_with_ctx.c b/examples/sched_ctx/dummy_sched_with_ctx.c new file mode 100644 index 0000000..56baf52 --- /dev/null +++ b/examples/sched_ctx/dummy_sched_with_ctx.c @@ -0,0 +1,177 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * This is an example of an application-defined scheduler run inside a + * scheduling context. + * This is a mere eager scheduler with a centralized list of tasks to schedule: + * when a task becomes ready (push) it is put on the list. When a device + * becomes ready (pop), a task is taken from the list. + */ +#include +#include + +#ifdef STARPU_QUICK_CHECK +#define NTASKS 320 +#elif !defined(STARPU_LONG_CHECK) +#define NTASKS 3200 +#else +#define NTASKS 32000 +#endif +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +struct dummy_sched_data +{ + struct starpu_task_list sched_list; + starpu_pthread_mutex_t policy_mutex; +}; + +static void init_dummy_sched(unsigned sched_ctx_id) +{ + struct dummy_sched_data *data = (struct dummy_sched_data*)malloc(sizeof(struct dummy_sched_data)); + + /* Create a linked-list of tasks and a condition variable to protect it */ + starpu_task_list_init(&data->sched_list); + + starpu_sched_ctx_set_policy_data(sched_ctx_id, (void*)data); + + STARPU_PTHREAD_MUTEX_INIT(&data->policy_mutex, NULL); + FPRINTF(stderr, "Initialising Dummy scheduler\n"); +} + +static void deinit_dummy_sched(unsigned sched_ctx_id) +{ + struct dummy_sched_data *data = (struct dummy_sched_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); + + STARPU_ASSERT(starpu_task_list_empty(&data->sched_list)); + + STARPU_PTHREAD_MUTEX_DESTROY(&data->policy_mutex); + + free(data); + + FPRINTF(stderr, "Destroying Dummy scheduler\n"); +} + +static int push_task_dummy(struct starpu_task *task) +{ + unsigned sched_ctx_id = task->sched_ctx; + struct dummy_sched_data *data = (struct dummy_sched_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); + + /* NB: In this simplistic strategy, we assume that the context in which + we push task has at least one worker*/ + + + /* lock all workers when pushing tasks on a list where all + of them would pop for tasks */ + STARPU_PTHREAD_MUTEX_LOCK(&data->policy_mutex); + + starpu_task_list_push_front(&data->sched_list, task); + + starpu_push_task_end(task); + STARPU_PTHREAD_MUTEX_UNLOCK(&data->policy_mutex); + + /*if there are no tasks block */ + /* wake people waiting for a task */ + struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id); + struct starpu_sched_ctx_iterator it; + workers->init_iterator(workers, &it); + while(workers->has_next(workers, &it)) + { + unsigned worker; + worker = workers->get_next(workers, &it); + starpu_pthread_mutex_t *sched_mutex; + starpu_pthread_cond_t *sched_cond; + starpu_worker_get_sched_condition(worker, &sched_mutex, &sched_cond); + STARPU_PTHREAD_MUTEX_LOCK(sched_mutex); + STARPU_PTHREAD_COND_SIGNAL(sched_cond); + STARPU_PTHREAD_MUTEX_UNLOCK(sched_mutex); + } + + return 0; +} + +/* The mutex associated to the calling worker is already taken by StarPU */ +static struct starpu_task *pop_task_dummy(unsigned sched_ctx_id) +{ + /* NB: In this simplistic strategy, we assume that all workers are able + * to execute all tasks, otherwise, it would have been necessary to go + * through the entire list until we find a task that is executable from + * the calling worker. So we just take the head of the list and give it + * to the worker. */ + struct dummy_sched_data *data = (struct dummy_sched_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); +#ifdef STARPU_NON_BLOCKING_DRIVERS + if (starpu_task_list_empty(&data->sched_list)) + return NULL; +#endif + STARPU_PTHREAD_MUTEX_LOCK(&data->policy_mutex); + struct starpu_task *task = NULL; + if (!starpu_task_list_empty(&data->sched_list)) + task = starpu_task_list_pop_back(&data->sched_list); + STARPU_PTHREAD_MUTEX_UNLOCK(&data->policy_mutex); + return task; +} + +static struct starpu_sched_policy dummy_sched_policy = +{ + .init_sched = init_dummy_sched, + .add_workers = NULL, + .remove_workers = NULL, + .deinit_sched = deinit_dummy_sched, + .push_task = push_task_dummy, + .pop_task = pop_task_dummy, + .post_exec_hook = NULL, + .policy_name = "dummy", + .policy_description = "dummy scheduling strategy", + .worker_type = STARPU_WORKER_LIST, +}; + +int main(void) +{ + int ntasks = NTASKS; + int ret; +/* struct starpu_conf conf; */ + +/* starpu_conf_init(&conf); */ +/* conf.sched_policy = &dummy_sched_policy, */ + ret = starpu_init(NULL); + if (ret == -ENODEV) + return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + unsigned sched_ctx = starpu_sched_ctx_create(NULL, -1, "dummy", STARPU_SCHED_CTX_POLICY_STRUCT, &dummy_sched_policy, 0); +#ifdef STARPU_QUICK_CHECK + ntasks /= 100; +#endif + + starpu_sched_ctx_set_context(&sched_ctx); + int i; + for (i = 0; i < ntasks; i++) + { + struct starpu_task *task = starpu_task_create(); + + task->cl = &starpu_codelet_nop; + task->cl_arg = NULL; + + ret = starpu_task_submit(task); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + starpu_task_wait_for_all(); + + starpu_shutdown(); + + return 0; +} diff --git a/examples/sched_ctx/gpu_partition.c b/examples/sched_ctx/gpu_partition.c new file mode 100644 index 0000000..4457a1c --- /dev/null +++ b/examples/sched_ctx/gpu_partition.c @@ -0,0 +1,255 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2016-2016 Uppsala University + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * This creates two dumb vectors & run axpy on them. + */ + +#include +#include +#include +#include +#include + +#include + + +#define N 512*512 +#define NITER 100 + + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +#define EPSILON 1e-6 + +float *_vec_x[NITER], *_vec_y[NITER]; +float _alpha = 3.41; + +/* descriptors for StarPU */ +starpu_data_handle_t _handle_y[NITER], _handle_x[NITER]; + +void axpy_cpu(void *descr[], void *arg) +{ + float alpha = *((float *)arg); + + unsigned n = STARPU_VECTOR_GET_NX(descr[0]); + + float *block_x = (float *)STARPU_VECTOR_GET_PTR(descr[0]); + float *block_y = (float *)STARPU_VECTOR_GET_PTR(descr[1]); + + unsigned i; + for(i = 0; i < n; i++) + block_y[i] = alpha * block_x[i] + block_y[i]; +} + +#ifdef STARPU_USE_CUDA +extern void cuda_axpy(void *descr[], void *_args); +#endif + +static struct starpu_perfmodel axpy_model = +{ + .type = STARPU_HISTORY_BASED, + .symbol = "axpy" +}; + +static struct starpu_codelet axpy_cl = +{ + /* .cpu_funcs = {axpy_cpu}, */ + /* .cpu_funcs_name = {"axpy_cpu"}, */ +#ifdef STARPU_USE_CUDA + .cuda_funcs = {cuda_axpy}, +#elif defined(STARPU_SIMGRID) + .cuda_funcs = {(void*)1}, +#endif + .cuda_flags = {STARPU_CUDA_ASYNC}, + .nbuffers = 2, + .modes = {STARPU_R, STARPU_RW}, + .name = "axpy", + .model = &axpy_model +}; + +static int +check(int niter) +{ + int i; + for (i = 0; i < N; i++) + { + float expected_value = _alpha * _vec_x[niter][i] + 4.0; + if (fabs(_vec_y[niter][i] - expected_value) > expected_value * EPSILON) + { + FPRINTF(stderr,"[error for iter %d, indice %d], obtained value %f NOT expected value %f (%f*%f+%f)\n", niter, i, _vec_y[niter][i], expected_value, _alpha, _vec_x[niter][i], 4.0); + return EXIT_FAILURE; + } + } + + return EXIT_SUCCESS; +} + +int main(void) +{ + int ret, exit_value = 0; + int iter; +#ifdef STARPU_USE_CUDA + int ncuda = 0; + int gpu_devid = -1; +#endif + +#ifdef STARPU_DEVEL +#warning temporary fix: skip test as cuda computation fails +#endif + return 77; + +#ifndef STARPU_HAVE_SETENV + return 77; +#else + /* Have separate threads for streams */ + setenv("STARPU_CUDA_THREAD_PER_WORKER", "1", 1); + setenv("STARPU_NWORKER_PER_CUDA", "2", 1); + setenv("STARPU_NCUDA", "1", 1); +#endif + + /* Initialize StarPU */ + ret = starpu_init(NULL); + if (ret == -ENODEV) + return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + +#ifdef STARPU_USE_CUDA + ncuda = starpu_worker_get_devids(STARPU_CUDA_WORKER, &gpu_devid, 1); + FPRINTF(stderr, "gpu_devid found %d \n", gpu_devid); +#endif + if (ncuda == 0) + { + starpu_shutdown(); + return 77; + } + + for(iter = 0; iter < NITER; iter++) + { + /* This is equivalent to + vec_a = malloc(N*sizeof(float)); + vec_b = malloc(N*sizeof(float)); + */ + starpu_malloc((void **)&_vec_x[iter], N*sizeof(float)); + assert(_vec_x[iter]); + + starpu_malloc((void **)&_vec_y[iter], N*sizeof(float)); + assert(_vec_y[iter]); + + unsigned i; + for (i = 0; i < N; i++) + { + _vec_x[iter][i] = 1.0f; /*(float)starpu_drand48(); */ + _vec_y[iter][i] = 4.0f; /*(float)starpu_drand48(); */ + } + + /* Declare the data to StarPU */ + starpu_vector_data_register(&_handle_x[iter], STARPU_MAIN_RAM, (uintptr_t)_vec_x[iter], N, sizeof(float)); + starpu_vector_data_register(&_handle_y[iter], STARPU_MAIN_RAM, (uintptr_t)_vec_y[iter], N, sizeof(float)); + } + + double start; + double end; +#ifdef STARPU_USE_CUDA + unsigned nworkers = starpu_worker_get_count(); + int stream_workerids[nworkers]; + + int nstreams = starpu_worker_get_stream_workerids(gpu_devid, stream_workerids, STARPU_CUDA_WORKER); + + int s; + for(s = 0; s < nstreams; s++) + FPRINTF(stderr, "stream w %d \n", stream_workerids[s]); + + int ncpus = starpu_cpu_worker_get_count(); + int workers[ncpus+nstreams]; + starpu_worker_get_ids_by_type(STARPU_CPU_WORKER, workers, ncpus); + + unsigned sched_ctxs[nstreams]; + int nsms[nstreams]; + nsms[0] = 6; + nsms[1] = 7; + + for(s = 0; s < nstreams; s++) + { + sched_ctxs[s] = starpu_sched_ctx_create(&stream_workerids[s], 1, "subctx", STARPU_SCHED_CTX_CUDA_NSMS, nsms[s], 0); + workers[ncpus+s] = stream_workerids[s]; + } + unsigned sched_ctx1 = starpu_sched_ctx_create(workers, ncpus+nstreams, "ctx1", STARPU_SCHED_CTX_SUB_CTXS, sched_ctxs, nstreams, STARPU_SCHED_CTX_POLICY_NAME, "dmdas", 0); + + FPRINTF(stderr, "parent ctx %u\n", sched_ctx1); + starpu_sched_ctx_set_context(&sched_ctx1); + +#endif + start = starpu_timing_now(); + + for (iter = 0; iter < NITER; iter++) + { + struct starpu_task *task = starpu_task_create(); + + task->cl = &axpy_cl; + + task->cl_arg = &_alpha; + task->cl_arg_size = sizeof(_alpha); + + task->handles[0] = _handle_x[iter]; + task->handles[1] = _handle_y[iter]; + + ret = starpu_task_submit(task); + if (ret == -ENODEV) + { + exit_value = 77; + goto enodev; + } + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + starpu_task_wait_for_all(); + +enodev: + for(iter = 0; iter < NITER; iter++) + { + starpu_data_unregister(_handle_x[iter]); + starpu_data_unregister(_handle_y[iter]); + } + end = starpu_timing_now(); + double timing = end - start; + + FPRINTF(stderr, "timing -> %2.2f us %2.2f MB/s\n", timing, 3*N*sizeof(float)/timing); + +// FPRINTF(stderr, "AFTER y[0] = %2.2f (ALPHA = %2.2f)\n", _vec_y[iter][0], _alpha); + + if (exit_value != 77) + { + for(iter = 0; iter < NITER; iter++) + { + exit_value = check(iter); + if(exit_value != EXIT_SUCCESS) + break; + } + } + + for(iter = 0; iter < NITER; iter++) + { + starpu_free_noflag((void *)_vec_x[iter], N*sizeof(float)); + starpu_free_noflag((void *)_vec_y[iter], N*sizeof(float)); + } + + /* Stop StarPU */ + starpu_shutdown(); + + return exit_value; +} diff --git a/examples/sched_ctx/nested_sched_ctxs.c b/examples/sched_ctx/nested_sched_ctxs.c new file mode 100644 index 0000000..9a039b4 --- /dev/null +++ b/examples/sched_ctx/nested_sched_ctxs.c @@ -0,0 +1,248 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include + +#ifdef STARPU_QUICK_CHECK +#define NTASKS 64 +#else +#define NTASKS 100 +#endif + +int tasks_executed[2]; + +int parallel_code(int sched_ctx) +{ + int i; + int t = 0; + int *cpuids = NULL; + int ncpuids = 0; + starpu_sched_ctx_get_available_cpuids(sched_ctx, &cpuids, &ncpuids); + +// printf("execute task of %d threads \n", ncpuids); +#pragma omp parallel num_threads(ncpuids) + { + starpu_sched_ctx_bind_current_thread_to_cpuid(cpuids[omp_get_thread_num()]); +#ifdef __linux__ +// printf("cpu = %d ctx%d nth = %d\n", sched_getcpu(), sched_ctx, omp_get_num_threads()); +#endif +#pragma omp for + for(i = 0; i < NTASKS; i++) + t++; + } + + free(cpuids); + return t; +} + +static void sched_ctx_func(void *descr[], void *arg) +{ + (void)descr; + unsigned sched_ctx = (uintptr_t)arg; + int t = parallel_code(sched_ctx); + if (sched_ctx > 0 && sched_ctx < 3) + { + STARPU_ATOMIC_ADD(&tasks_executed[sched_ctx-1], t); + } + + //printf("w %d executed %d it \n", w, n); +} + + +static struct starpu_codelet sched_ctx_codelet = +{ + .cpu_funcs = {sched_ctx_func}, + .model = NULL, + .nbuffers = 0, + .name = "sched_ctx" +}; + +int main(void) +{ + tasks_executed[0] = 0; + tasks_executed[1] = 0; + int ntasks = NTASKS; + int ret, j, k; + unsigned ncpus = 0; + + ret = starpu_init(NULL); + if (ret == -ENODEV) + return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + int nprocs1 = 1; + int nprocs2 = 1; + int *procs1, *procs2; + +#ifdef STARPU_USE_CPU + ncpus = starpu_cpu_worker_get_count(); + procs1 = (int*)malloc(ncpus*sizeof(int)); + starpu_worker_get_ids_by_type(STARPU_CPU_WORKER, procs1, ncpus); + + if (ncpus > 1) + { + nprocs1 = ncpus/2; + nprocs2 = nprocs1; + k = 0; + procs2 = (int*)malloc(nprocs2*sizeof(int)); + for(j = nprocs1; j < nprocs1+nprocs2; j++) + procs2[k++] = procs1[j]; + } + else + { + procs2 = (int*)malloc(nprocs2*sizeof(int)); + procs2[0] = procs1[0]; + } +#endif + + if (ncpus == 0) + { +#ifdef STARPU_USE_CPU + free(procs1); + free(procs2); +#endif + starpu_shutdown(); + return 77; + } + + /*create contexts however you want*/ + unsigned sched_ctx1 = starpu_sched_ctx_create(procs1, nprocs1, "ctx1", STARPU_SCHED_CTX_POLICY_NAME, "dmda", 0); + unsigned sched_ctx2 = starpu_sched_ctx_create(procs2, nprocs2, "ctx2", STARPU_SCHED_CTX_POLICY_NAME, "dmda", 0); + + /*indicate what to do with the resources when context 2 finishes (it depends on your application)*/ + starpu_sched_ctx_set_inheritor(sched_ctx2, sched_ctx1); + + int nprocs3 = nprocs1/2; + int nprocs4 = nprocs3; + int nprocs5 = nprocs2/2; + int nprocs6 = nprocs5; + int *procs3 = NULL; + int *procs4 = NULL; + int *procs5 = NULL; + int *procs6 = NULL; + + if (nprocs3) + procs3 = malloc(nprocs3 * sizeof(*procs3)); + if (nprocs4) + procs4 = malloc(nprocs4 * sizeof(*procs4)); + if (nprocs5) + procs5 = malloc(nprocs5 * sizeof(*procs5)); + if (nprocs6) + procs6 = malloc(nprocs6 * sizeof(*procs6)); + + k = 0; + for(j = 0; j < nprocs3; j++) + procs3[k++] = procs1[j]; + k = 0; + for(j = nprocs3; j < nprocs3+nprocs4; j++) + procs4[k++] = procs1[j]; + + k = 0; + for(j = 0; j < nprocs5; j++) + procs5[k++] = procs2[j]; + k = 0; + for(j = nprocs5; j < nprocs5+nprocs6; j++) + procs6[k++] = procs2[j]; + + int sched_ctx3 = -1; + int sched_ctx4 = -1; + int sched_ctx5 = -1; + int sched_ctx6 = -1; + + if (nprocs3) + sched_ctx3 = starpu_sched_ctx_create(procs3, nprocs3, "ctx3", STARPU_SCHED_CTX_NESTED, sched_ctx1, 0); + if (nprocs4) + sched_ctx4 = starpu_sched_ctx_create(procs4, nprocs4, "ctx4", STARPU_SCHED_CTX_NESTED, sched_ctx1, 0); + if (nprocs5) + sched_ctx5 = starpu_sched_ctx_create(procs5, nprocs5, "ctx5", STARPU_SCHED_CTX_NESTED, sched_ctx2, 0); + if (nprocs6) + sched_ctx6 = starpu_sched_ctx_create(procs6, nprocs6, "ctx6", STARPU_SCHED_CTX_NESTED, sched_ctx2, 0); + + + int i; + for (i = 0; i < ntasks; i++) + { + struct starpu_task *task = starpu_task_create(); + + task->cl = &sched_ctx_codelet; + task->cl_arg = (void*)(uintptr_t) sched_ctx1; + task->possibly_parallel = 1; + + /*submit tasks to context*/ + ret = starpu_task_submit_to_ctx(task,sched_ctx1); + + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + for (i = 0; i < ntasks; i++) + { + struct starpu_task *task = starpu_task_create(); + + task->cl = &sched_ctx_codelet; + task->cl_arg = (void*)(uintptr_t) sched_ctx2; + task->possibly_parallel = 1; + + /*submit tasks to context*/ + ret = starpu_task_submit_to_ctx(task,sched_ctx2); + + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + + /* tell starpu when you finished submitting tasks to this context + in order to allow moving resources from this context to the inheritor one + when its corresponding tasks finished executing */ + + + + /* wait for all tasks at the end*/ + starpu_task_wait_for_all(); + + if (nprocs3) + { + starpu_sched_ctx_delete(sched_ctx3); + free(procs3); + } + if (nprocs4) + { + starpu_sched_ctx_delete(sched_ctx4); + free(procs4); + } + if (nprocs5) + { + starpu_sched_ctx_delete(sched_ctx5); + free(procs5); + } + if (nprocs6) + { + starpu_sched_ctx_delete(sched_ctx6); + free(procs6); + } + + starpu_sched_ctx_delete(sched_ctx1); + starpu_sched_ctx_delete(sched_ctx2); + + printf("ctx%u: tasks starpu executed %d out of %d\n", sched_ctx1, tasks_executed[0], NTASKS*NTASKS); + printf("ctx%u: tasks starpu executed %d out of %d\n", sched_ctx2, tasks_executed[1], NTASKS*NTASKS); + +#ifdef STARPU_USE_CPU + free(procs1); + free(procs2); +#endif + starpu_shutdown(); + return 0; +} diff --git a/examples/sched_ctx/parallel_code.c b/examples/sched_ctx/parallel_code.c new file mode 100644 index 0000000..5d82a37 --- /dev/null +++ b/examples/sched_ctx/parallel_code.c @@ -0,0 +1,105 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#ifdef STARPU_USE_CPU +#include + +#ifdef STARPU_QUICK_CHECK +#define NTASKS 4 +#else +#define NTASKS 10 +#endif + +int parallel_code(unsigned *sched_ctx) +{ + int i; + int t = 0; + int *cpuids = NULL; + int ncpuids = 0; + starpu_sched_ctx_get_available_cpuids(*sched_ctx, &cpuids, &ncpuids); + + /* printf("execute task of %d threads \n", ncpuids); */ + omp_set_num_threads(ncpuids); +#pragma omp parallel + { + starpu_sched_ctx_bind_current_thread_to_cpuid(cpuids[omp_get_thread_num()]); +#ifdef __linux__ + /* printf("cpu = %d ctx%d nth = %d\n", sched_getcpu(), *sched_ctx, omp_get_num_threads()); */ +#endif +#pragma omp for + for(i = 0; i < NTASKS; i++) + { +#pragma omp atomic + t++; + } + } + + free(cpuids); + return t; +} + +void *th(void* p) +{ + unsigned* sched_ctx = (unsigned*)p; + void* ret; + ret = starpu_sched_ctx_exec_parallel_code((void*)parallel_code, p, *sched_ctx); + pthread_exit(ret); +} + +int main(void) +{ + int ret; + void* tasks_executed; + + ret = starpu_init(NULL); + if (ret == -ENODEV) + return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + int nprocs1; + int *procs1; + + unsigned ncpus = starpu_cpu_worker_get_count(); + procs1 = (int*)malloc(ncpus*sizeof(int)); + starpu_worker_get_ids_by_type(STARPU_CPU_WORKER, procs1, ncpus); + nprocs1 = ncpus; + + unsigned sched_ctx1 = starpu_sched_ctx_create(procs1, nprocs1, "ctx1", STARPU_SCHED_CTX_POLICY_NAME, "dmda", 0); + + /* This is the interesting part, we can launch a code to hijack the context and + use its cores to do something else entirely thanks to this */ + pthread_t mp; + STARPU_PTHREAD_CREATE(&mp, NULL, th, &sched_ctx1); + + STARPU_PTHREAD_JOIN(mp, &tasks_executed); + + /* Finished, delete the context and print the amount of executed tasks */ + starpu_sched_ctx_delete(sched_ctx1); + printf("ctx%u: tasks starpu executed %ld out of %d\n", sched_ctx1, (intptr_t)tasks_executed, NTASKS); + starpu_shutdown(); + + free(procs1); + + return 0; +} +#else /* STARPU_USE_CPU */ +int main(int argc, char **argv) +{ + /* starpu_sched_ctx_exec_parallel_code() requires a CPU worker has parallel region master */ + return 77; /* STARPU_TEST_SKIPPED */ +} +#endif /* STARPU_USE_CPU */ diff --git a/examples/sched_ctx/parallel_tasks_reuse_handle.c b/examples/sched_ctx/parallel_tasks_reuse_handle.c new file mode 100644 index 0000000..af857b1 --- /dev/null +++ b/examples/sched_ctx/parallel_tasks_reuse_handle.c @@ -0,0 +1,245 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include + +#ifdef STARPU_QUICK_CHECK +#define NTASKS 64 +#define SIZE 40 +#define LOOPS 4 +#else +#define NTASKS 100 +#define SIZE 400 +#define LOOPS 10 +#endif + +#define N_NESTED_CTXS 2 + +struct context +{ + int ncpus; + int *cpus; + unsigned id; +}; + +/* Helper for the task that will initiate everything */ +void parallel_task_prologue_init_once_and_for_all(void * sched_ctx_) +{ + fprintf(stderr, "%p: %s -->\n", (void*)pthread_self(), __func__); + int sched_ctx = *(int *)sched_ctx_; + int *cpuids = NULL; + int ncpuids = 0; + starpu_sched_ctx_get_available_cpuids(sched_ctx, &cpuids, &ncpuids); + +#pragma omp parallel num_threads(ncpuids) + { + starpu_sched_ctx_bind_current_thread_to_cpuid(cpuids[omp_get_thread_num()]); + } + + omp_set_num_threads(ncpuids); + free(cpuids); + fprintf(stderr, "%p: %s <--\n", (void*)pthread_self(), __func__); + return; +} + +void noop(void * buffers[], void * cl_arg) +{ + (void)buffers; + (void)cl_arg; +} + +static struct starpu_codelet init_parallel_worker_cl= +{ + .cpu_funcs = {noop}, + .nbuffers = 0, + .name = "init_parallel_worker" +}; + +/* function called to initialize the parallel "workers" */ +void parallel_task_init_one_context(unsigned * context_id) +{ + struct starpu_task * t; + int ret; + + t = starpu_task_build(&init_parallel_worker_cl, + STARPU_SCHED_CTX, *context_id, + 0); + t->destroy = 1; + t->prologue_callback_pop_func=parallel_task_prologue_init_once_and_for_all; + if (t->prologue_callback_pop_arg_free) + free(t->prologue_callback_pop_arg); + t->prologue_callback_pop_arg=context_id; + t->prologue_callback_pop_arg_free=0; + + ret = starpu_task_submit(t); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); +} + +struct context main_context; +struct context *contexts; +void parallel_task_init() +{ + /* Context creation */ + main_context.ncpus = starpu_cpu_worker_get_count(); + main_context.cpus = (int *) malloc(main_context.ncpus*sizeof(int)); + fprintf(stderr, "ncpus : %d \n",main_context.ncpus); + + starpu_worker_get_ids_by_type(STARPU_CPU_WORKER, main_context.cpus, main_context.ncpus); + + main_context.id = starpu_sched_ctx_create(main_context.cpus, + main_context.ncpus,"main_ctx", + STARPU_SCHED_CTX_POLICY_NAME, "dmda", + 0); + + /* Initialize nested contexts */ + contexts = malloc(sizeof(struct context)*N_NESTED_CTXS); + int cpus_per_context = main_context.ncpus/N_NESTED_CTXS; + int i; + for(i = 0; i < N_NESTED_CTXS; i++) + { + contexts[i].ncpus = cpus_per_context; + if (i == N_NESTED_CTXS-1) + contexts[i].ncpus += main_context.ncpus%N_NESTED_CTXS; + contexts[i].cpus = main_context.cpus+i*cpus_per_context; + } + + for(i = 0; i < N_NESTED_CTXS; i++) + contexts[i].id = starpu_sched_ctx_create(contexts[i].cpus, + contexts[i].ncpus,"nested_ctx", + STARPU_SCHED_CTX_NESTED,main_context.id, + 0); + + for (i = 0; i < N_NESTED_CTXS; i++) + { + parallel_task_init_one_context(&contexts[i].id); + } + + starpu_task_wait_for_all(); + starpu_sched_ctx_set_context(&main_context.id); +} + +void parallel_task_deinit() +{ + int i; + for (i=0; idestroy = 1; + t->possibly_parallel = 1; + + ret=starpu_task_submit(t); + if (ret == -ENODEV) + goto out; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + +out: + /* wait for all tasks at the end*/ + starpu_task_wait_for_all(); + + starpu_data_unregister(handle1); + starpu_data_unregister(handle2); + parallel_task_deinit(); + + starpu_free_noflag(array1, SIZE*sizeof(double)); + starpu_free_noflag(array2, SIZE*sizeof(double)); + + starpu_shutdown(); + return 0; +} diff --git a/examples/sched_ctx/prio.c b/examples/sched_ctx/prio.c new file mode 100644 index 0000000..3bc5c1c --- /dev/null +++ b/examples/sched_ctx/prio.c @@ -0,0 +1,60 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +int main(void) +{ + int ret; + + ret = starpu_init(NULL); + if (ret == -ENODEV) return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + unsigned sched_ctx1 = starpu_sched_ctx_create(NULL, -1, "ctx1", STARPU_SCHED_CTX_POLICY_NAME, "prio", 0); + + FPRINTF(stderr, "min prio %d\n", starpu_sched_ctx_get_min_priority(sched_ctx1)); + FPRINTF(stderr, "max prio %d\n", starpu_sched_ctx_get_max_priority(sched_ctx1)); + + unsigned sched_ctx2 = starpu_sched_ctx_create(NULL, -1, "ctx2", + STARPU_SCHED_CTX_POLICY_NAME, "prio", + STARPU_SCHED_CTX_POLICY_MIN_PRIO, -12, + STARPU_SCHED_CTX_POLICY_MAX_PRIO, 32, + 0); + + FPRINTF(stderr, "min prio %d\n", starpu_sched_ctx_get_min_priority(sched_ctx2)); + FPRINTF(stderr, "max prio %d\n", starpu_sched_ctx_get_max_priority(sched_ctx2)); + + if (starpu_sched_ctx_get_min_priority(sched_ctx2) != -12) + { + FPRINTF(stderr, "Error with min priority: %d != %d\n", starpu_sched_ctx_get_min_priority(sched_ctx2), -12); + ret = 1; + } + if (starpu_sched_ctx_get_max_priority(sched_ctx2) != 32) + { + FPRINTF(stderr, "Error with max priority: %d != %d\n", starpu_sched_ctx_get_max_priority(sched_ctx2), 32); + ret = 1; + } + + starpu_sched_ctx_delete(sched_ctx1); + starpu_sched_ctx_delete(sched_ctx2); + + starpu_shutdown(); + + return ret; +} diff --git a/examples/sched_ctx/sched_ctx.c b/examples/sched_ctx/sched_ctx.c new file mode 100644 index 0000000..e354eaa --- /dev/null +++ b/examples/sched_ctx/sched_ctx.c @@ -0,0 +1,172 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include + +#ifdef STARPU_HAVE_VALGRIND_H +#include +#endif + +#ifdef STARPU_QUICK_CHECK +#define NTASKS 64 +#else +#define NTASKS 1000 +#endif + +int tasks_executed = 0; +int ctx1_tasks_executed = 0; +int ctx2_tasks_executed = 0; +int cpu_tasks_executed = 0; +int gpu_tasks_executed = 0; + +static void sched_ctx_cpu_func(void *descr[], void *arg) +{ + (void)descr; + (void)arg; + (void)STARPU_ATOMIC_ADD(&tasks_executed,1); + (void)STARPU_ATOMIC_ADD(&ctx1_tasks_executed,1); + (void)STARPU_ATOMIC_ADD(&cpu_tasks_executed,1); +} + +static void sched_ctx2_cpu_func(void *descr[], void *arg) +{ + (void)descr; + (void)arg; + (void)STARPU_ATOMIC_ADD(&tasks_executed,1); + (void)STARPU_ATOMIC_ADD(&ctx2_tasks_executed,1); + (void)STARPU_ATOMIC_ADD(&cpu_tasks_executed,1); +} + +static void sched_ctx2_cuda_func(void *descr[], void *arg) +{ + (void)descr; + (void)arg; + (void)STARPU_ATOMIC_ADD(&tasks_executed,1); + (void)STARPU_ATOMIC_ADD(&ctx2_tasks_executed,1); + (void)STARPU_ATOMIC_ADD(&gpu_tasks_executed,1); +} + +static struct starpu_codelet sched_ctx_codelet1 = +{ + .cpu_funcs = {sched_ctx_cpu_func}, + .model = NULL, + .nbuffers = 0, + .name = "sched_ctx" +}; + +static struct starpu_codelet sched_ctx_codelet2 = +{ + .cpu_funcs = {sched_ctx2_cpu_func}, + .cuda_funcs = {sched_ctx2_cuda_func}, + .model = NULL, + .nbuffers = 0, + .name = "sched_ctx" +}; + + +int main(void) +{ + int ntasks = NTASKS; + int ret; + int nprocs1 = 0; + int nprocs2 = 0; + int procs1[STARPU_NMAXWORKERS], procs2[STARPU_NMAXWORKERS]; + char *sched = getenv("STARPU_SCHED"); + ret = starpu_init(NULL); + if (ret == -ENODEV) + return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + +#ifdef STARPU_HAVE_VALGRIND_H + if (RUNNING_ON_VALGRIND) + ntasks = 8; +#endif + +#ifdef STARPU_USE_CPU + nprocs1 = starpu_cpu_worker_get_count(); + starpu_worker_get_ids_by_type(STARPU_CPU_WORKER, procs1, nprocs1); +#endif + // if there is no cpu, skip + if (nprocs1 == 0) goto enodev; + +#ifdef STARPU_USE_CUDA + nprocs2 = starpu_cuda_worker_get_count(); + starpu_worker_get_ids_by_type(STARPU_CUDA_WORKER, procs2, nprocs2); +#endif + if (nprocs2 == 0) + { + nprocs2 = 1; + procs2[0] = procs1[0]; + } + + /*create contexts however you want*/ + unsigned sched_ctx1 = starpu_sched_ctx_create(procs1, nprocs1, "ctx1", STARPU_SCHED_CTX_POLICY_NAME, sched?sched:"eager", 0); + unsigned sched_ctx2 = starpu_sched_ctx_create(procs2, nprocs2, "ctx2", STARPU_SCHED_CTX_POLICY_NAME, sched?sched:"eager", 0); + + /*indicate what to do with the resources when context 2 finishes (it depends on your application)*/ + starpu_sched_ctx_set_inheritor(sched_ctx2, sched_ctx1); + + starpu_sched_ctx_display_workers(sched_ctx2, stderr); + + int i; + for (i = 0; i < ntasks/2; i++) + { + struct starpu_task *task = starpu_task_create(); + + task->cl = &sched_ctx_codelet1; + task->cl_arg = NULL; + + /*submit tasks to context*/ + ret = starpu_task_submit_to_ctx(task,sched_ctx1); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + /* tell starpu when you finished submitting tasks to this context + in order to allow moving resources from this context to the inheritor one + when its corresponding tasks finished executing */ + starpu_sched_ctx_finished_submit(sched_ctx1); + + for (i = 0; i < ntasks/2; i++) + { + struct starpu_task *task = starpu_task_create(); + + task->cl = &sched_ctx_codelet2; + task->cl_arg = NULL; + + ret = starpu_task_submit_to_ctx(task,sched_ctx2); + + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + starpu_sched_ctx_finished_submit(sched_ctx2); + + /* wait for all tasks at the end*/ + starpu_task_wait_for_all(); + + starpu_sched_ctx_add_workers(procs1, nprocs1, sched_ctx2); + starpu_sched_ctx_delete(sched_ctx1); + starpu_sched_ctx_delete(sched_ctx2); + printf("tasks executed %d out of %d\n", tasks_executed, ntasks); + printf("tasks executed on ctx1: %d\n", ctx1_tasks_executed); + printf("tasks executed on ctx2: %d\n", ctx2_tasks_executed); + printf("tasks executed on CPU: %d\n", cpu_tasks_executed); + printf("tasks executed on GPU: %d\n", gpu_tasks_executed); + +enodev: + starpu_shutdown(); + return nprocs1 == 0 ? 77 : 0; +} diff --git a/examples/sched_ctx/sched_ctx_delete.c b/examples/sched_ctx/sched_ctx_delete.c new file mode 100644 index 0000000..cb140b5 --- /dev/null +++ b/examples/sched_ctx/sched_ctx_delete.c @@ -0,0 +1,51 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include + +int main(void) +{ + int ret; + int nprocs = 0; + int procs[STARPU_NMAXWORKERS]; + unsigned sched_ctx1, sched_ctx2; + + ret = starpu_init(NULL); + if (ret == -ENODEV) return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + if (starpu_worker_get_count_by_type(STARPU_CPU_WORKER) == 0) + { + // Needs at least 1 CPU worker + starpu_shutdown(); + return 77; + } + + nprocs = starpu_worker_get_count_by_type(STARPU_CPU_WORKER); + starpu_worker_get_ids_by_type(STARPU_CPU_WORKER, procs, nprocs); + + sched_ctx1 = starpu_sched_ctx_create(procs, nprocs, "ctx1", 0); + sched_ctx2 = starpu_sched_ctx_create(procs, nprocs, "ctx2", 0); + + starpu_sched_ctx_set_inheritor(sched_ctx2, sched_ctx1); + + starpu_sched_ctx_delete(sched_ctx1); + starpu_sched_ctx_delete(sched_ctx2); + + starpu_shutdown(); + return 0; +} diff --git a/examples/sched_ctx/sched_ctx_empty.c b/examples/sched_ctx/sched_ctx_empty.c new file mode 100644 index 0000000..58e619b --- /dev/null +++ b/examples/sched_ctx/sched_ctx_empty.c @@ -0,0 +1,65 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +static void cpu_func(void *descr[], void *arg) +{ + (void)descr; + (void)arg; + FPRINTF(stdout, "Hello world\n"); +} + +static struct starpu_codelet codelet = +{ + .cpu_funcs = {cpu_func}, + .nbuffers = 0, + .name = "codelet" +}; + +int main(void) +{ + int ret; + int nprocs = 0; + int procs[STARPU_NMAXWORKERS]; + unsigned sched_ctx_id; + + ret = starpu_init(NULL); + if (ret == -ENODEV) return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + nprocs = starpu_cpu_worker_get_count(); + // if there is no cpu, skip + if (nprocs == 0) goto enodev; + + sched_ctx_id = starpu_sched_ctx_create(NULL, 0, "ctx", 0); + starpu_sched_ctx_set_context(&sched_ctx_id); + + ret = starpu_task_insert(&codelet, 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + + starpu_worker_get_ids_by_type(STARPU_CPU_WORKER, procs, nprocs); + starpu_sched_ctx_add_workers(procs, nprocs, sched_ctx_id); + starpu_task_wait_for_all(); + + starpu_sched_ctx_delete(sched_ctx_id); + +enodev: + starpu_shutdown(); + return nprocs == 0 ? 77 : 0; +} diff --git a/examples/sched_ctx/sched_ctx_remove.c b/examples/sched_ctx/sched_ctx_remove.c new file mode 100644 index 0000000..f23c9cb --- /dev/null +++ b/examples/sched_ctx/sched_ctx_remove.c @@ -0,0 +1,174 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include + +#ifdef STARPU_HAVE_VALGRIND_H +#include +#endif + +#ifdef STARPU_QUICK_CHECK +#define NTASKS 64 +#else +#define NTASKS 1000 +#endif + +int tasks_executed = 0; +int ctx1_tasks_executed = 0; +int ctx2_tasks_executed = 0; +int cpu_tasks_executed = 0; +int gpu_tasks_executed = 0; + +static void sched_ctx_cpu_func(void *descr[], void *arg) +{ + (void)descr; + (void)arg; + (void)STARPU_ATOMIC_ADD(&tasks_executed,1); + (void)STARPU_ATOMIC_ADD(&ctx1_tasks_executed,1); + (void)STARPU_ATOMIC_ADD(&cpu_tasks_executed,1); +} + +static void sched_ctx2_cpu_func(void *descr[], void *arg) +{ + (void)descr; + (void)arg; + (void)STARPU_ATOMIC_ADD(&tasks_executed,1); + (void)STARPU_ATOMIC_ADD(&ctx2_tasks_executed,1); + (void)STARPU_ATOMIC_ADD(&cpu_tasks_executed,1); +} + +static void sched_ctx2_cuda_func(void *descr[], void *arg) +{ + (void)descr; + (void)arg; + (void)STARPU_ATOMIC_ADD(&tasks_executed,1); + (void)STARPU_ATOMIC_ADD(&ctx2_tasks_executed,1); + (void)STARPU_ATOMIC_ADD(&gpu_tasks_executed,1); +} + +static struct starpu_codelet sched_ctx_codelet1 = +{ + .cpu_funcs = {sched_ctx_cpu_func}, + .model = NULL, + .nbuffers = 0, + .name = "sched_ctx" +}; + +static struct starpu_codelet sched_ctx_codelet2 = +{ + .cpu_funcs = {sched_ctx2_cpu_func}, + .cuda_funcs = {sched_ctx2_cuda_func}, + .model = NULL, + .nbuffers = 0, + .name = "sched_ctx" +}; + + +int main(void) +{ + int ntasks = NTASKS; + int ret; + int nprocs1 = 0; + int nprocs2 = 0; + int procs1[STARPU_NMAXWORKERS], procs2[STARPU_NMAXWORKERS]; + char *sched = getenv("STARPU_SCHED"); + ret = starpu_init(NULL); + if (ret == -ENODEV) + return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + +#ifdef STARPU_HAVE_VALGRIND_H + if (RUNNING_ON_VALGRIND) + ntasks = 8; +#endif + +#ifdef STARPU_USE_CPU + nprocs1 = starpu_cpu_worker_get_count(); + starpu_worker_get_ids_by_type(STARPU_CPU_WORKER, procs1, nprocs1); +#endif + // if there is not enough cpu, skip + if (nprocs1 <= 1) goto enodev; + +#ifdef STARPU_USE_CUDA + nprocs2 = starpu_cuda_worker_get_count(); + starpu_worker_get_ids_by_type(STARPU_CUDA_WORKER, procs2, nprocs2); +#endif + if (nprocs2 == 0) + { + nprocs2 = 1; + procs2[0] = procs1[0]; + } + + /*create contexts however you want*/ + unsigned sched_ctx1 = starpu_sched_ctx_create(procs1, nprocs1, "ctx1", STARPU_SCHED_CTX_POLICY_NAME, sched?sched:"eager", 0); + unsigned sched_ctx2 = starpu_sched_ctx_create(procs2, nprocs2, "ctx2", STARPU_SCHED_CTX_POLICY_NAME, sched?sched:"eager", 0); + + /*indicate what to do with the resources when context 2 finishes (it depends on your application)*/ + starpu_sched_ctx_set_inheritor(sched_ctx2, sched_ctx1); + + starpu_sched_ctx_display_workers(sched_ctx2, stderr); + + int i; + for (i = 0; i < ntasks/2; i++) + { + struct starpu_task *task = starpu_task_create(); + + task->cl = &sched_ctx_codelet1; + task->cl_arg = NULL; + + /*submit tasks to context*/ + ret = starpu_task_submit_to_ctx(task,sched_ctx1); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + /* tell starpu when you finished submitting tasks to this context + in order to allow moving resources from this context to the inheritor one + when its corresponding tasks finished executing */ + starpu_sched_ctx_finished_submit(sched_ctx1); + + starpu_sched_ctx_add_workers(procs1, nprocs1/2, sched_ctx2); + starpu_sched_ctx_remove_workers(procs1, nprocs1/2, sched_ctx1); + + for (i = 0; i < ntasks/2; i++) + { + struct starpu_task *task = starpu_task_create(); + + task->cl = &sched_ctx_codelet2; + task->cl_arg = NULL; + + ret = starpu_task_submit_to_ctx(task,sched_ctx2); + + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + starpu_sched_ctx_finished_submit(sched_ctx2); + + /* wait for all tasks at the end*/ + starpu_task_wait_for_all(); + + starpu_sched_ctx_delete(sched_ctx1); + starpu_sched_ctx_delete(sched_ctx2); + printf("tasks executed %d out of %d\n", tasks_executed, ntasks); + printf("tasks executed on ctx1: %d\n", ctx1_tasks_executed); + printf("tasks executed on ctx2: %d\n", ctx2_tasks_executed); + printf("tasks executed on CPU: %d\n", cpu_tasks_executed); + printf("tasks executed on GPU: %d\n", gpu_tasks_executed); + +enodev: + starpu_shutdown(); + return nprocs1 <= 1 ? 77 : 0; +} diff --git a/examples/sched_ctx/sched_ctx_without_sched_policy.c b/examples/sched_ctx/sched_ctx_without_sched_policy.c new file mode 100644 index 0000000..100dd1e --- /dev/null +++ b/examples/sched_ctx/sched_ctx_without_sched_policy.c @@ -0,0 +1,177 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include + +#ifndef STARPU_QUICK_CHECK +#define NTASKS 64 +#else +#define NTASKS 10 +#endif + +int tasks_executed[2]; + +int parallel_code(int sched_ctx) +{ + int i; + int t = 0; + int *cpuids = NULL; + int ncpuids = 0; + starpu_sched_ctx_get_available_cpuids(sched_ctx, &cpuids, &ncpuids); + +// printf("execute task of %d threads \n", ncpuids); +#pragma omp parallel num_threads(ncpuids) reduction(+:t) + { + starpu_sched_ctx_bind_current_thread_to_cpuid(cpuids[omp_get_thread_num()]); +#ifdef __linux__ +// printf("cpu = %d ctx%d nth = %d\n", sched_getcpu(), sched_ctx, omp_get_num_threads()); +#endif +#pragma omp for + for(i = 0; i < NTASKS; i++) + t++; + } + + free(cpuids); + return t; +} + +static void sched_ctx_func(void *descr[], void *arg) +{ + (void)descr; + unsigned sched_ctx = (uintptr_t)arg; + tasks_executed[sched_ctx-1] += parallel_code(sched_ctx); +} + + +static struct starpu_codelet sched_ctx_codelet = +{ + .cpu_funcs = {sched_ctx_func}, + .model = NULL, + .nbuffers = 0, + .name = "sched_ctx" +}; + + +int main(void) +{ + tasks_executed[0] = 0; + tasks_executed[1] = 0; + int ntasks = NTASKS; + int ret, j, k; + unsigned ncpus = 0; + + ret = starpu_init(NULL); + if (ret == -ENODEV) + return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + int nprocs1 = 1; + int nprocs2 = 1; + int *procs1, *procs2; + +#ifdef STARPU_USE_CUDA + int ncuda = 0; + int *procscuda; + ncuda = starpu_cuda_worker_get_count(); + procscuda = (int*)malloc(ncuda*sizeof(int)); + starpu_worker_get_ids_by_type(STARPU_CUDA_WORKER, procscuda, ncuda); +#endif +#ifdef STARPU_USE_CPU + ncpus = starpu_cpu_worker_get_count(); + procs1 = (int*)malloc(ncpus*sizeof(int)); + starpu_worker_get_ids_by_type(STARPU_CPU_WORKER, procs1, ncpus); + + if(ncpus > 1) + { + nprocs1 = ncpus/2; + nprocs2 = ncpus-nprocs1; + k = 0; + procs2 = (int*)malloc(nprocs2*sizeof(int)); + for(j = nprocs1; j < nprocs1+nprocs2; j++) + procs2[k++] = procs1[j]; + } + else + { + procs2 = (int*)malloc(nprocs2*sizeof(int)); + procs2[0] = procs1[0]; + + } +#endif + + if (ncpus == 0) goto enodev; +#ifdef STARPU_USE_CUDA + if (ncuda > 0 && nprocs1 > 1) + { + procs1[nprocs1-1] = procscuda[0]; + } +#endif + + /*create contexts however you want*/ + unsigned sched_ctx1 = starpu_sched_ctx_create(procs1, nprocs1, "ctx1", 0); + unsigned sched_ctx2 = starpu_sched_ctx_create(procs2, nprocs2, "ctx2", 0); + starpu_sched_ctx_display_workers(sched_ctx1, stderr); + starpu_sched_ctx_display_workers(sched_ctx2, stderr); + int i; + for (i = 0; i < ntasks; i++) + { + struct starpu_task *task = starpu_task_create(); + + task->cl = &sched_ctx_codelet; + task->cl_arg = (void*)(uintptr_t) sched_ctx1; + + /*submit tasks to context*/ + ret = starpu_task_submit_to_ctx(task,sched_ctx1); + + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + for (i = 0; i < ntasks; i++) + { + struct starpu_task *task = starpu_task_create(); + + task->cl = &sched_ctx_codelet; + task->cl_arg = (void*)(uintptr_t) sched_ctx2; + + /*submit tasks to context*/ + ret = starpu_task_submit_to_ctx(task,sched_ctx2); + + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + + /* tell starpu when you finished submitting tasks to this context + in order to allow moving resources from this context to the inheritor one + when its corresponding tasks finished executing */ + + + + /* wait for all tasks at the end*/ + starpu_task_wait_for_all(); + + starpu_sched_ctx_delete(sched_ctx1); + starpu_sched_ctx_delete(sched_ctx2); + printf("ctx%u: tasks starpu executed %d out of %d\n", sched_ctx1, tasks_executed[0], NTASKS*NTASKS); + printf("ctx%u: tasks starpu executed %d out of %d\n", sched_ctx2, tasks_executed[1], NTASKS*NTASKS); + +enodev: +#ifdef STARPU_USE_CPU + free(procs1); + free(procs2); +#endif + starpu_shutdown(); + return ncpus == 0 ? 77 : 0; +} diff --git a/examples/sched_ctx/sched_ctx_without_sched_policy_awake.c b/examples/sched_ctx/sched_ctx_without_sched_policy_awake.c new file mode 100644 index 0000000..87b3242 --- /dev/null +++ b/examples/sched_ctx/sched_ctx_without_sched_policy_awake.c @@ -0,0 +1,166 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include + +#ifdef STARPU_QUICK_CHECK +#define NTASKS 64 +#else +#define NTASKS 100 +#endif + + +int tasks_executed[2][STARPU_NMAXWORKERS]; +int parallel_code(int sched_ctx) +{ + int i; + int t = 0; + int workerid = starpu_worker_get_id(); + for(i = 0; i < NTASKS; i++) + t++; + tasks_executed[sched_ctx-1][workerid] = t; +// printf("executed %d tasks on worker %d of sched_ctx %d \n", t, workerid, sched_ctx); + + return t; +} + +static void sched_ctx_func(void *descr[], void *arg) +{ + (void)descr; + unsigned sched_ctx = (uintptr_t)arg; + parallel_code(sched_ctx); +} + + +static struct starpu_codelet sched_ctx_codelet = +{ + .cpu_funcs = {sched_ctx_func}, + .model = NULL, + .nbuffers = 0, + .name = "sched_ctx" +}; + + +int main(void) +{ + int i; + for(i = 0; i < STARPU_NMAXWORKERS; i++) + { + tasks_executed[0][i] = 0; + tasks_executed[1][i] = 0; + } + int ntasks = NTASKS; + int ret, j, k; + unsigned ncpus = 0; + + ret = starpu_init(NULL); + if (ret == -ENODEV) + return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + int nprocs1 = 1; + int nprocs2 = 1; + int *procs1, *procs2; + +#ifdef STARPU_USE_CPU + ncpus = starpu_cpu_worker_get_count(); + procs1 = (int*)malloc(ncpus*sizeof(int)); + starpu_worker_get_ids_by_type(STARPU_CPU_WORKER, procs1, ncpus); + + if(ncpus > 1) + { + nprocs1 = ncpus/2; + nprocs2 = ncpus-nprocs1; + k = 0; + procs2 = (int*)malloc(nprocs2*sizeof(int)); + for(j = nprocs1; j < nprocs1+nprocs2; j++) + procs2[k++] = procs1[j]; + } + else + { + procs2 = (int*)malloc(nprocs2*sizeof(int)); + procs2[0] = procs1[0]; + + } +#endif + + if (ncpus == 0) goto enodev; + + /*create contexts however you want*/ + unsigned sched_ctx1 = starpu_sched_ctx_create(procs1, nprocs1, "ctx1", STARPU_SCHED_CTX_AWAKE_WORKERS, 0); + unsigned sched_ctx2 = starpu_sched_ctx_create(procs2, nprocs2, "ctx2", STARPU_SCHED_CTX_AWAKE_WORKERS, 0); + + + for (i = 0; i < ntasks; i++) + { + struct starpu_task *task = starpu_task_create(); + + task->cl = &sched_ctx_codelet; + task->cl_arg = (void*)(uintptr_t) sched_ctx1; + + /*submit tasks to context*/ + ret = starpu_task_submit_to_ctx(task,sched_ctx1); + + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + for (i = 0; i < ntasks; i++) + { + struct starpu_task *task = starpu_task_create(); + + task->cl = &sched_ctx_codelet; + task->cl_arg = (void*)(uintptr_t) sched_ctx2; + + /*submit tasks to context*/ + ret = starpu_task_submit_to_ctx(task,sched_ctx2); + + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + + /* tell starpu when you finished submitting tasks to this context + in order to allow moving resources from this context to the inheritor one + when its corresponding tasks finished executing */ + + + + /* wait for all tasks at the end*/ + starpu_task_wait_for_all(); + + starpu_sched_ctx_delete(sched_ctx1); + starpu_sched_ctx_delete(sched_ctx2); + + int tasks_per_ctx[2]; + tasks_per_ctx[0] = 0; + tasks_per_ctx[1] = 0; + for(i = 0; i < STARPU_NMAXWORKERS; i++) + { + tasks_per_ctx[0] += tasks_executed[0][i]; + tasks_per_ctx[1] += tasks_executed[1][i]; + } + + printf("ctx%u: tasks starpu executed %d out of %d\n", sched_ctx1, tasks_per_ctx[0]/nprocs1, NTASKS); + printf("ctx%u: tasks starpu executed %d out of %d\n", sched_ctx2, tasks_per_ctx[1]/nprocs2, NTASKS); + +enodev: +#ifdef STARPU_USE_CPU + free(procs1); + free(procs2); +#endif + starpu_shutdown(); + return ncpus == 0 ? 77 : 0; +} diff --git a/examples/sched_ctx/two_cpu_contexts.c b/examples/sched_ctx/two_cpu_contexts.c new file mode 100644 index 0000000..aa5383f --- /dev/null +++ b/examples/sched_ctx/two_cpu_contexts.c @@ -0,0 +1,124 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include + +/* This example case follows the same pattern its native Fortran version nf_sched_ctx.f90 */ +static void sched_ctx_cpu_func(void *descr[], void *cl_args) +{ + (void)descr; + int task_id; + starpu_codelet_unpack_args(cl_args, &task_id); + printf("task: %d, workerid: %d\n", task_id, starpu_worker_get_id()); +} + +static struct starpu_codelet sched_ctx_codelet = +{ + .cpu_funcs = {sched_ctx_cpu_func}, + .model = NULL, + .nbuffers = 0, + .name = "sched_ctx" +}; + +int main(void) +{ + int ncpu; + int nprocs1; + int nprocs2; + int *procs = NULL; + int *procs1 = NULL; + int *procs2 = NULL; + int i; + int n = 20; + + int ret = starpu_init(NULL); + if (ret == -ENODEV) + return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + ncpu = starpu_cpu_worker_get_count(); + /* actually we really need at least 2 CPU workers such to allocate 2 + * non overlapping contexts */ + if (ncpu < 2) + { + starpu_shutdown(); + return 77; + } + + procs = calloc(ncpu, sizeof(int)); + starpu_worker_get_ids_by_type(STARPU_CPU_WORKER, procs, ncpu); + + nprocs1 = ncpu / 2; + procs1 = calloc(nprocs1, sizeof(int)); + + for (i=0; i + +unsigned size1; +unsigned size2; +unsigned nblocks1; +unsigned nblocks2; +unsigned cpu1; +unsigned cpu2; +unsigned gpu; +unsigned gpu1; +unsigned gpu2; + +struct params +{ + unsigned id; + unsigned ctx; + int the_other_ctx; + int *procs; + int nprocs; + void (*bench)(unsigned, unsigned); + unsigned size; + unsigned nblocks; +}; + +struct retvals +{ + double flops; + double avg_timing; +}; + +#define NSAMPLES 1 +int first = 1; +starpu_pthread_mutex_t mut; +struct retvals rv[2]; +struct params p1, p2; + +starpu_pthread_key_t key; + +void init() +{ + size1 = 4*1024; + size2 = 4*1024; + nblocks1 = 16; + nblocks2 = 16; + cpu1 = 0; + cpu2 = 0; + gpu = 0; + gpu1 = 0; + gpu2 = 0; + + rv[0].flops = 0.0; + rv[1].flops = 0.0; + rv[1].avg_timing = 0.0; + + p1.ctx = 0; + p2.ctx = 0; + + p1.id = 0; + p2.id = 1; + STARPU_PTHREAD_KEY_CREATE(&key, NULL); +} + +void update_sched_ctx_timing_results(double flops, double avg_timing) +{ + unsigned *id = STARPU_PTHREAD_GETSPECIFIC(key); + rv[*id].flops += flops; + rv[*id].avg_timing += avg_timing; +} + +void* start_bench(void *val) +{ + struct params *p = (struct params*)val; + int i; + + STARPU_PTHREAD_SETSPECIFIC(key, &p->id); + + if(p->ctx != 0) + starpu_sched_ctx_set_context(&p->ctx); + + for(i = 0; i < NSAMPLES; i++) + p->bench(p->size, p->nblocks); + + if(p->ctx != 0) + { + STARPU_PTHREAD_MUTEX_LOCK(&mut); + if(first) + { + starpu_sched_ctx_delete(p->ctx); + } + + first = 0; + STARPU_PTHREAD_MUTEX_UNLOCK(&mut); + } + + rv[p->id].flops /= NSAMPLES; + rv[p->id].avg_timing /= NSAMPLES; + + return NULL; +} + +void start_2benchs(void (*bench)(unsigned, unsigned)) +{ + p1.bench = bench; + p1.size = size1; + printf("size %u\n", size1); + p1.nblocks = nblocks1; + + p2.bench = bench; + p2.size = size2; + printf("size %u\n", size2); + p2.nblocks = nblocks2; + + starpu_pthread_t tid[2]; + STARPU_PTHREAD_MUTEX_INIT(&mut, NULL); + + double start; + double end; + + start = starpu_timing_now(); + + STARPU_PTHREAD_CREATE(&tid[0], NULL, (void*)start_bench, (void*)&p1); + STARPU_PTHREAD_CREATE(&tid[1], NULL, (void*)start_bench, (void*)&p2); + + STARPU_PTHREAD_JOIN(tid[0], NULL); + STARPU_PTHREAD_JOIN(tid[1], NULL); + + end = starpu_timing_now(); + + STARPU_PTHREAD_MUTEX_DESTROY(&mut); + + double timing = end - start; + timing /= 1000000; + + printf("%2.2f %2.2f ", rv[0].flops, rv[1].flops); + printf("%2.2f %2.2f %2.2f\n", rv[0].avg_timing, rv[1].avg_timing, timing); + +} + +void start_1stbench(void (*bench)(unsigned, unsigned)) +{ + p1.bench = bench; + p1.size = size1; + p1.nblocks = nblocks1; + + double start; + double end; + + start = starpu_timing_now(); + + start_bench((void*)&p1); + + end = starpu_timing_now(); + + STARPU_PTHREAD_MUTEX_DESTROY(&mut); + + double timing = end - start; + timing /= 1000000; + + printf("%2.2f ", rv[0].flops); + printf("%2.2f %2.2f\n", rv[0].avg_timing, timing); +} + +void start_2ndbench(void (*bench)(unsigned, unsigned)) +{ + p2.bench = bench; + p2.size = size2; + p2.nblocks = nblocks2; + + double start; + double end; + + start = starpu_timing_now(); + + start_bench((void*)&p2); + + end = starpu_timing_now(); + + STARPU_PTHREAD_MUTEX_DESTROY(&mut); + + double timing = end - start; + timing /= 1000000; + + printf("%2.2f ", rv[1].flops); + printf("%2.2f %2.2f\n", rv[1].avg_timing, timing); +} + +void construct_contexts() +{ + unsigned nprocs1 = cpu1 + gpu + gpu1; + unsigned nprocs2 = cpu2 + gpu + gpu2; + unsigned n_all_gpus = gpu + gpu1 + gpu2; + int procs[nprocs1]; + unsigned i; + int k = 0; + + for(i = 0; i < gpu; i++) + { + procs[k++] = i; + printf("%u ", i); + } + + for(i = gpu; i < gpu + gpu1; i++) + { + procs[k++] = i; + printf("%u ", i); + } + + + for(i = n_all_gpus; i < n_all_gpus + cpu1; i++) + { + procs[k++] = i; + printf("%u ", i); + } + printf("\n "); + + p1.ctx = starpu_sched_ctx_create(procs, nprocs1, "sched_ctx1", STARPU_SCHED_CTX_POLICY_NAME, "heft", 0); + p2.the_other_ctx = (int)p1.ctx; + p1.procs = procs; + p1.nprocs = nprocs1; + int procs2[nprocs2]; + + k = 0; + + for(i = 0; i < gpu; i++) + { + procs2[k++] = i; + printf("%u ", i); + } + + for(i = gpu + gpu1; i < gpu + gpu1 + gpu2; i++) + { + procs2[k++] = i; + printf("%u ", i); + } + + for(i = n_all_gpus + cpu1; i < n_all_gpus + cpu1 + cpu2; i++) + { + procs2[k++] = i; + printf("%u ", i); + } + printf("\n"); + + p2.ctx = starpu_sched_ctx_create(procs2, nprocs2, "sched_ctx2", STARPU_SCHED_CTX_POLICY_NAME, "heft", 0); + p1.the_other_ctx = (int)p2.ctx; + p2.procs = procs2; + starpu_sched_ctx_set_inheritor(p1.ctx, p2.ctx); + starpu_sched_ctx_set_inheritor(p2.ctx, p1.ctx); + p2.nprocs = nprocs2; +} + + +void parse_args_ctx(int argc, char **argv) +{ + init(); + int i; + for (i = 1; i < argc; i++) + { + if (strcmp(argv[i], "-size1") == 0) + { + char *argptr; + size1 = strtol(argv[++i], &argptr, 10); + } + + if (strcmp(argv[i], "-nblocks1") == 0) + { + char *argptr; + nblocks1 = strtol(argv[++i], &argptr, 10); + } + + if (strcmp(argv[i], "-size2") == 0) + { + char *argptr; + size2 = strtol(argv[++i], &argptr, 10); + } + + if (strcmp(argv[i], "-nblocks2") == 0) + { + char *argptr; + nblocks2 = strtol(argv[++i], &argptr, 10); + } + + if (strcmp(argv[i], "-cpu1") == 0) + { + char *argptr; + cpu1 = strtol(argv[++i], &argptr, 10); + } + + if (strcmp(argv[i], "-cpu2") == 0) + { + char *argptr; + cpu2 = strtol(argv[++i], &argptr, 10); + } + + if (strcmp(argv[i], "-gpu") == 0) + { + char *argptr; + gpu = strtol(argv[++i], &argptr, 10); + } + + if (strcmp(argv[i], "-gpu1") == 0) + { + char *argptr; + gpu1 = strtol(argv[++i], &argptr, 10); + } + + if (strcmp(argv[i], "-gpu2") == 0) + { + char *argptr; + gpu2 = strtol(argv[++i], &argptr, 10); + } + } +} diff --git a/examples/sched_ctx_utils/sched_ctx_utils.h b/examples/sched_ctx_utils/sched_ctx_utils.h new file mode 100644 index 0000000..52f960f --- /dev/null +++ b/examples/sched_ctx_utils/sched_ctx_utils.h @@ -0,0 +1,27 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include + +void parse_args_ctx(int argc, char **argv); +void update_sched_ctx_timing_results(double gflops, double timing); +void construct_contexts(); +void start_2benchs(void (*bench)(unsigned size, unsigned nblocks)); +void start_1stbench(void (*bench)(unsigned size, unsigned nblocks)); +void start_2ndbench(void (*bench)(unsigned size, unsigned nblocks)); diff --git a/examples/scheduler/dummy_modular_sched.c b/examples/scheduler/dummy_modular_sched.c new file mode 100644 index 0000000..f5561a5 --- /dev/null +++ b/examples/scheduler/dummy_modular_sched.c @@ -0,0 +1,247 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * This is an example of an application-defined scheduler. + * This is a mere eager scheduler with a centralized list of tasks to schedule: + * when a task becomes ready (push) it is put on the list. When a device + * becomes ready (pop), a task is taken from the list. + */ +#include +#include +#include + +#ifdef STARPU_QUICK_CHECK +#define NTASKS 320 +#elif !defined(STARPU_LONG_CHECK) +#define NTASKS 3200 +#else +#define NTASKS 32000 +#endif +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +struct dummy_sched_params +{ + int verbose; +}; + +struct dummy_sched_data +{ + int verbose; + struct starpu_task_list sched_list; + starpu_pthread_mutex_t policy_mutex; +}; + +static void dummy_deinit_data(struct starpu_sched_component * component) +{ + struct dummy_sched_data *data = component->data; + + STARPU_ASSERT(starpu_task_list_empty(&data->sched_list)); + + if (data->verbose) + fprintf(stderr, "Destroying Dummy scheduler\n"); + + STARPU_PTHREAD_MUTEX_DESTROY(&data->policy_mutex); + free(data); +} + +static int dummy_push_task(struct starpu_sched_component *component, struct starpu_task *task) +{ + struct dummy_sched_data *data = component->data; + if (data->verbose) + fprintf(stderr, "pushing task %p\n", task); + + /* NB: In this simplistic strategy, we assume that the context in which + we push task has at least one worker*/ + + /* lock all workers when pushing tasks on a list where all + of them would pop for tasks */ + STARPU_PTHREAD_MUTEX_LOCK(&data->policy_mutex); + + starpu_task_list_push_front(&data->sched_list, task); + + starpu_push_task_end(task); + STARPU_PTHREAD_MUTEX_UNLOCK(&data->policy_mutex); + + /* Tell below that they can now pull */ + component->can_pull(component); + + return 0; +} + +static struct starpu_task *dummy_pull_task(struct starpu_sched_component *component, struct starpu_sched_component *to) +{ + struct dummy_sched_data *data = component->data; + if (data->verbose) + fprintf(stderr, "%p pulling for a task\n", to); + +#ifdef STARPU_NON_BLOCKING_DRIVERS + if (starpu_task_list_empty(&data->sched_list)) + return NULL; +#endif + STARPU_PTHREAD_MUTEX_LOCK(&data->policy_mutex); + struct starpu_task *task = NULL; + if (!starpu_task_list_empty(&data->sched_list)) + task = starpu_task_list_pop_back(&data->sched_list); + STARPU_PTHREAD_MUTEX_UNLOCK(&data->policy_mutex); + + return task; +} + +static int dummy_can_push(struct starpu_sched_component * component, struct starpu_sched_component * to) +{ + struct dummy_sched_data *data = component->data; + int didwork = 0; + + if (data->verbose) + fprintf(stderr, "%p tells me I can push to him\n", to); + + struct starpu_task *task; + task = starpu_sched_component_pump_to(component, to, &didwork); + + if (task) + { + if (data->verbose) + fprintf(stderr, "oops, %p couldn't take our task\n", to); + /* Oops, we couldn't push everything, put back this task */ + STARPU_PTHREAD_MUTEX_LOCK(&data->policy_mutex); + starpu_task_list_push_back(&data->sched_list, task); + STARPU_PTHREAD_MUTEX_UNLOCK(&data->policy_mutex); + } + else + { + if (data->verbose) + { + if (didwork) + fprintf(stderr, "pushed some tasks to %p\n", to); + else + fprintf(stderr, "I didn't have anything for %p\n", to); + } + } + + /* There is room now */ + return didwork || starpu_sched_component_can_push(component, to); +} + +static int dummy_can_pull(struct starpu_sched_component * component) +{ + struct dummy_sched_data *data = component->data; + + if (data->verbose) + fprintf(stderr,"telling below they can pull\n"); + + return starpu_sched_component_can_pull(component); +} + +struct starpu_sched_component *dummy_create(struct starpu_sched_tree *tree, struct dummy_sched_params *params) +{ + struct starpu_sched_component *component = starpu_sched_component_create(tree, "dummy"); + struct dummy_sched_data *data = malloc(sizeof(*data)); + + STARPU_PTHREAD_MUTEX_INIT(&data->policy_mutex, NULL); + /* Create a linked-list of tasks and a condition variable to protect it */ + starpu_task_list_init(&data->sched_list); + data->verbose = params->verbose; + + component->data = data; + component->push_task = dummy_push_task; + component->pull_task = dummy_pull_task; + component->can_push = dummy_can_push; + component->can_pull = dummy_can_pull; + component->deinit_data = dummy_deinit_data; + + return component; +} + +static void init_dummy_sched(unsigned sched_ctx_id) +{ + FPRINTF(stderr, "Initialising Dummy scheduler\n"); + + struct dummy_sched_params params = + { + .verbose = 0, + }; + + starpu_sched_component_initialize_simple_scheduler((starpu_sched_component_create_t) dummy_create, ¶ms, + STARPU_SCHED_SIMPLE_DECIDE_WORKERS | + STARPU_SCHED_SIMPLE_FIFOS_BELOW | + STARPU_SCHED_SIMPLE_FIFOS_BELOW_PRIO, + sched_ctx_id); +} + +static void deinit_dummy_sched(unsigned sched_ctx_id) +{ + struct starpu_sched_tree *t = (struct starpu_sched_tree*)starpu_sched_ctx_get_policy_data(sched_ctx_id); + starpu_sched_tree_destroy(t); +} + +static struct starpu_sched_policy dummy_sched_policy = +{ + .init_sched = init_dummy_sched, + .deinit_sched = deinit_dummy_sched, + .add_workers = starpu_sched_tree_add_workers, + .remove_workers = starpu_sched_tree_remove_workers, + .push_task = starpu_sched_tree_push_task, + .pop_task = starpu_sched_tree_pop_task, + .pre_exec_hook = starpu_sched_component_worker_pre_exec_hook, + .post_exec_hook = starpu_sched_component_worker_post_exec_hook, + .policy_name = "dummy", + .policy_description = "dummy modular scheduling strategy", + .worker_type = STARPU_WORKER_LIST, +}; + +int main(void) +{ + int ntasks = NTASKS; + int ret; + struct starpu_conf conf; + + char *sched = getenv("STARPU_SCHED"); + if (sched && sched[0]) + /* Testing a specific scheduler, no need to run this */ + return 77; + + starpu_conf_init(&conf); + conf.sched_policy = &dummy_sched_policy, + ret = starpu_init(&conf); + if (ret == -ENODEV) + return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + +#ifdef STARPU_QUICK_CHECK + ntasks /= 100; +#endif + + starpu_codelet_nop.model = &starpu_perfmodel_nop; + + int i; + for (i = 0; i < ntasks; i++) + { + struct starpu_task *task = starpu_task_create(); + + task->cl = &starpu_codelet_nop; + task->cl_arg = NULL; + + ret = starpu_task_submit(task); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + starpu_task_wait_for_all(); + + starpu_shutdown(); + + return 0; +} diff --git a/examples/scheduler/dummy_sched.c b/examples/scheduler/dummy_sched.c new file mode 100644 index 0000000..1100bf3 --- /dev/null +++ b/examples/scheduler/dummy_sched.c @@ -0,0 +1,172 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * This is an example of an application-defined scheduler. + * This is a mere eager scheduler with a centralized list of tasks to schedule: + * when a task becomes ready (push) it is put on the list. When a device + * becomes ready (pop), a task is taken from the list. + */ +#include +#include + +#ifdef STARPU_QUICK_CHECK +#define NTASKS 320 +#elif !defined(STARPU_LONG_CHECK) +#define NTASKS 3200 +#else +#define NTASKS 32000 +#endif +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +struct dummy_sched_data +{ + struct starpu_task_list sched_list; + starpu_pthread_mutex_t policy_mutex; +}; + +static void init_dummy_sched(unsigned sched_ctx_id) +{ + struct dummy_sched_data *data = (struct dummy_sched_data*)malloc(sizeof(struct dummy_sched_data)); + + /* Create a linked-list of tasks and a condition variable to protect it */ + starpu_task_list_init(&data->sched_list); + + starpu_sched_ctx_set_policy_data(sched_ctx_id, (void*)data); + + STARPU_PTHREAD_MUTEX_INIT(&data->policy_mutex, NULL); + FPRINTF(stderr, "Initialising Dummy scheduler\n"); +} + +static void deinit_dummy_sched(unsigned sched_ctx_id) +{ + struct dummy_sched_data *data = (struct dummy_sched_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); + + STARPU_ASSERT(starpu_task_list_empty(&data->sched_list)); + + STARPU_PTHREAD_MUTEX_DESTROY(&data->policy_mutex); + + free(data); + + FPRINTF(stderr, "Destroying Dummy scheduler\n"); +} + +static int push_task_dummy(struct starpu_task *task) +{ + unsigned sched_ctx_id = task->sched_ctx; + struct dummy_sched_data *data = (struct dummy_sched_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); + + /* NB: In this simplistic strategy, we assume that the context in which + we push task has at least one worker*/ + + + /* lock all workers when pushing tasks on a list where all + of them would pop for tasks */ + STARPU_PTHREAD_MUTEX_LOCK(&data->policy_mutex); + + starpu_task_list_push_front(&data->sched_list, task); + + starpu_push_task_end(task); + STARPU_PTHREAD_MUTEX_UNLOCK(&data->policy_mutex); + + /*if there are no tasks block */ + /* wake people waiting for a task */ + struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id); + struct starpu_sched_ctx_iterator it; + workers->init_iterator(workers, &it); + while(workers->has_next(workers, &it)) + { + unsigned worker = workers->get_next(workers, &it); + starpu_wake_worker_relax_light(worker); + } + + return 0; +} + +/* The mutex associated to the calling worker is already taken by StarPU */ +static struct starpu_task *pop_task_dummy(unsigned sched_ctx_id) +{ + /* NB: In this simplistic strategy, we assume that all workers are able + * to execute all tasks, otherwise, it would have been necessary to go + * through the entire list until we find a task that is executable from + * the calling worker. So we just take the head of the list and give it + * to the worker. */ + struct dummy_sched_data *data = (struct dummy_sched_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); +#ifdef STARPU_NON_BLOCKING_DRIVERS + if (starpu_task_list_empty(&data->sched_list)) + return NULL; +#endif + STARPU_PTHREAD_MUTEX_LOCK(&data->policy_mutex); + struct starpu_task *task = NULL; + if (!starpu_task_list_empty(&data->sched_list)) + task = starpu_task_list_pop_back(&data->sched_list); + STARPU_PTHREAD_MUTEX_UNLOCK(&data->policy_mutex); + return task; +} + +static struct starpu_sched_policy dummy_sched_policy = +{ + .init_sched = init_dummy_sched, + .deinit_sched = deinit_dummy_sched, + .push_task = push_task_dummy, + .pop_task = pop_task_dummy, + .policy_name = "dummy", + .policy_description = "dummy scheduling strategy", + .worker_type = STARPU_WORKER_LIST, +}; + +int main(void) +{ + int ntasks = NTASKS; + int ret; + struct starpu_conf conf; + + char *sched = getenv("STARPU_SCHED"); + if (sched && sched[0]) + /* Testing a specific scheduler, no need to run this */ + return 77; + + starpu_conf_init(&conf); + conf.sched_policy = &dummy_sched_policy, + ret = starpu_init(&conf); + if (ret == -ENODEV) + return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + +#ifdef STARPU_QUICK_CHECK + ntasks /= 100; +#endif + + starpu_codelet_nop.model = &starpu_perfmodel_nop; + + int i; + for (i = 0; i < ntasks; i++) + { + struct starpu_task *task = starpu_task_create(); + + task->cl = &starpu_codelet_nop; + task->cl_arg = NULL; + + ret = starpu_task_submit(task); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + starpu_task_wait_for_all(); + + starpu_shutdown(); + + return 0; +} diff --git a/examples/scheduler/heteroprio_test.c b/examples/scheduler/heteroprio_test.c new file mode 100644 index 0000000..d1284eb --- /dev/null +++ b/examples/scheduler/heteroprio_test.c @@ -0,0 +1,238 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * This is an example making use of the heteroprio scheduler, it shows how + * priorities are taken into account. + */ +#include +#include +#include + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +void initSchedulerCallback(unsigned sched_ctx) +{ + // CPU uses 3 buckets +#ifdef STARPU_USE_CPU + if (starpu_cpu_worker_get_count()) + { + starpu_heteroprio_set_nb_prios(0, STARPU_CPU_WORKER, 3); + // It uses direct mapping idx => idx + unsigned idx; + for(idx = 0; idx < 3; ++idx) + { + starpu_heteroprio_set_mapping(sched_ctx, STARPU_CPU_WORKER, idx, idx); + starpu_heteroprio_set_faster_arch(sched_ctx, STARPU_CPU_WORKER, idx); + } + } +#endif +#ifdef STARPU_USE_OPENCL + // OpenCL is enabled and uses 2 buckets + starpu_heteroprio_set_nb_prios(sched_ctx, STARPU_OPENCL_WORKER, 2); + // OpenCL will first look to priority 2 + int prio2 = starpu_cpu_worker_get_count() ? 2 : 1; + starpu_heteroprio_set_mapping(sched_ctx, STARPU_OPENCL_WORKER, 0, prio2); + // For this bucket OpenCL is the fastest + starpu_heteroprio_set_faster_arch(sched_ctx, STARPU_OPENCL_WORKER, prio2); + // And CPU is 4 times slower +#ifdef STARPU_USE_CPU + starpu_heteroprio_set_arch_slow_factor(sched_ctx, STARPU_CPU_WORKER, 2, 4.0f); +#endif + + int prio1 = starpu_cpu_worker_get_count() ? 1 : 0; + starpu_heteroprio_set_mapping(sched_ctx, STARPU_OPENCL_WORKER, 1, prio1); + // We let the CPU as the fastest and tell that OpenCL is 1.7 times slower + starpu_heteroprio_set_arch_slow_factor(sched_ctx, STARPU_OPENCL_WORKER, prio1, 1.7f); +#endif +} + +void callback_a_cpu(void *buffers[], void *cl_arg) +{ + (void)buffers; + (void)cl_arg; + starpu_usleep(100000); + FPRINTF(stderr, "[COMMUTE_LOG] callback %s\n", __FUNCTION__); fflush(stderr); +} + +void callback_b_cpu(void *buffers[], void *cl_arg) +{ + (void)buffers; + (void)cl_arg; + starpu_usleep(100000); + FPRINTF(stderr, "[COMMUTE_LOG] callback %s\n", __FUNCTION__); fflush(stderr); +} + +void callback_c_cpu(void *buffers[], void *cl_arg) +{ + (void)buffers; + (void)cl_arg; + starpu_usleep(100000); + FPRINTF(stderr, "[COMMUTE_LOG] callback %s\n", __FUNCTION__); fflush(stderr); +} + +#ifdef STARPU_USE_OPENCL +void callback_a_opencl(void *buffers[], void *cl_arg) +{ + (void)buffers; + (void)cl_arg; + starpu_usleep(100000); + FPRINTF(stderr, "[COMMUTE_LOG] callback %s\n", __FUNCTION__); fflush(stderr); +} + +void callback_b_opencl(void *buffers[], void *cl_arg) +{ + (void)buffers; + (void)cl_arg; + starpu_usleep(100000); + FPRINTF(stderr, "[COMMUTE_LOG] callback %s\n", __FUNCTION__); fflush(stderr); +} + +void callback_c_opencl(void *buffers[], void *cl_arg) +{ + (void)buffers; + (void)cl_arg; + starpu_usleep(100000); + FPRINTF(stderr, "[COMMUTE_LOG] callback %s\n", __FUNCTION__); fflush(stderr); +} +#endif + +int main(void) +{ + int ret; + struct starpu_conf conf; + int ncpus, nopencls; + + ret = starpu_conf_init(&conf); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_conf_init"); + assert(ret == 0); + + conf.sched_policy_name = "heteroprio"; + conf.sched_policy_callback = &initSchedulerCallback; + ret = starpu_init(&conf); + if (ret == -ENODEV) + return 77; + + ncpus = starpu_cpu_worker_get_count(); + nopencls = starpu_opencl_worker_get_count(); + FPRINTF(stderr, "Worker = %u\n", starpu_worker_get_count()); + FPRINTF(stderr, "Worker CPU = %d\n", ncpus); + FPRINTF(stderr, "Worker OpenCL = %d\n", nopencls); + if (ncpus + nopencls == 0) + { + FPRINTF(stderr, "Needs at least one CPU or OpenCL device\n"); + starpu_shutdown(); + return 77; + } + + struct starpu_codelet codeleteA; + { + memset(&codeleteA, 0, sizeof(codeleteA)); + codeleteA.nbuffers = 2; + codeleteA.modes[0] = STARPU_RW; + codeleteA.modes[1] = STARPU_RW; + codeleteA.name = "codeleteA"; +#ifdef STARPU_USE_CPU + codeleteA.cpu_funcs[0] = callback_a_cpu; +#endif +#ifdef STARPU_USE_OPENCL + codeleteA.opencl_funcs[0] = callback_a_opencl; +#endif + } + struct starpu_codelet codeleteB; + { + memset(&codeleteB, 0, sizeof(codeleteB)); + codeleteB.nbuffers = 2; + codeleteB.modes[0] = STARPU_RW; + codeleteB.modes[1] = STARPU_RW; + codeleteB.name = "codeleteB"; + codeleteB.cpu_funcs[0] = callback_b_cpu; +#ifdef STARPU_USE_OPENCL + codeleteB.opencl_funcs[0] = callback_b_opencl; +#endif + } + struct starpu_codelet codeleteC; + { + memset(&codeleteC, 0, sizeof(codeleteC)); + codeleteC.nbuffers = 2; + codeleteC.modes[0] = STARPU_RW; + codeleteC.modes[1] = STARPU_RW; + codeleteC.name = "codeleteC"; + codeleteC.cpu_funcs[0] = callback_c_cpu; +#ifdef STARPU_USE_OPENCL + codeleteC.opencl_funcs[0] = callback_c_opencl; +#endif + } + + const int nbHandles = 10; + FPRINTF(stderr, "Nb handles = %d\n", nbHandles); + + starpu_data_handle_t handles[nbHandles]; + memset(handles, 0, sizeof(handles[0])*nbHandles); + int dataA[nbHandles]; + int idx; + for(idx = 0; idx < nbHandles; ++idx) + { + dataA[idx] = idx; + } + int idxHandle; + for(idxHandle = 0; idxHandle < nbHandles; ++idxHandle) + { + starpu_variable_data_register(&handles[idxHandle], 0, (uintptr_t)&dataA[idxHandle], sizeof(dataA[idxHandle])); + } + + const int nbTasks = 4; + FPRINTF(stderr, "Submit %d tasks \n", nbTasks); + + int prio2 = starpu_cpu_worker_get_count() ? 2 : 1; + + int idxTask; + for(idxTask = 0; idxTask < nbTasks; ++idxTask) + { + ret = starpu_task_insert(&codeleteA, + STARPU_PRIORITY, 0, + (STARPU_RW), handles[(idxTask*2)%nbHandles], + (STARPU_RW), handles[(idxTask*3+1)%nbHandles], + 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + ret = starpu_task_insert(&codeleteB, + STARPU_PRIORITY, 1, + (STARPU_RW), handles[(idxTask*2 +1)%nbHandles], + (STARPU_RW), handles[(idxTask*2)%nbHandles], + 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + ret = starpu_task_insert(&codeleteC, + STARPU_PRIORITY, prio2, + (STARPU_RW), handles[(idxTask)%nbHandles], + (STARPU_RW), handles[(idxTask*idxTask)%nbHandles], + 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + } + + FPRINTF(stderr, "Wait task\n"); + starpu_task_wait_for_all(); + + FPRINTF(stderr, "Release data\n"); + for(idxHandle = 0 ; idxHandle < nbHandles ; ++idxHandle) + { + starpu_data_unregister(handles[idxHandle]); + } + + FPRINTF(stderr, "Shutdown\n"); + + starpu_shutdown(); + return 0; +} diff --git a/examples/scheduler/libdummy_sched.c b/examples/scheduler/libdummy_sched.c new file mode 100644 index 0000000..3d9286c --- /dev/null +++ b/examples/scheduler/libdummy_sched.c @@ -0,0 +1,139 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * This is an example of an application-defined scheduler. + * This is a mere eager scheduler with a centralized list of tasks to schedule: + * when a task becomes ready (push) it is put on the list. When a device + * becomes ready (pop), a task is taken from the list. + */ +#include +#include + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +struct dummy_sched_data +{ + struct starpu_task_list sched_list; + starpu_pthread_mutex_t policy_mutex; +}; + +static void init_dummy_sched(unsigned sched_ctx_id) +{ + struct dummy_sched_data *data = (struct dummy_sched_data*)malloc(sizeof(struct dummy_sched_data)); + + /* Create a linked-list of tasks and a condition variable to protect it */ + starpu_task_list_init(&data->sched_list); + + starpu_sched_ctx_set_policy_data(sched_ctx_id, (void*)data); + + STARPU_PTHREAD_MUTEX_INIT(&data->policy_mutex, NULL); + FPRINTF(stderr, "Initialising Dummy scheduler\n"); +} + +static void deinit_dummy_sched(unsigned sched_ctx_id) +{ + struct dummy_sched_data *data = (struct dummy_sched_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); + + STARPU_ASSERT(starpu_task_list_empty(&data->sched_list)); + + STARPU_PTHREAD_MUTEX_DESTROY(&data->policy_mutex); + + free(data); + + FPRINTF(stderr, "Destroying Dummy scheduler\n"); +} + +static int push_task_dummy(struct starpu_task *task) +{ + unsigned sched_ctx_id = task->sched_ctx; + struct dummy_sched_data *data = (struct dummy_sched_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); + + /* NB: In this simplistic strategy, we assume that the context in which + we push task has at least one worker*/ + + + /* lock all workers when pushing tasks on a list where all + of them would pop for tasks */ + STARPU_PTHREAD_MUTEX_LOCK(&data->policy_mutex); + + starpu_task_list_push_front(&data->sched_list, task); + + starpu_push_task_end(task); + STARPU_PTHREAD_MUTEX_UNLOCK(&data->policy_mutex); + + /*if there are no tasks block */ + /* wake people waiting for a task */ + struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id); + struct starpu_sched_ctx_iterator it; + workers->init_iterator(workers, &it); + while(workers->has_next(workers, &it)) + { + unsigned worker = workers->get_next(workers, &it); + starpu_wake_worker_relax_light(worker); + } + + return 0; +} + +/* The mutex associated to the calling worker is already taken by StarPU */ +static struct starpu_task *pop_task_dummy(unsigned sched_ctx_id) +{ + /* NB: In this simplistic strategy, we assume that all workers are able + * to execute all tasks, otherwise, it would have been necessary to go + * through the entire list until we find a task that is executable from + * the calling worker. So we just take the head of the list and give it + * to the worker. */ + struct dummy_sched_data *data = (struct dummy_sched_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); +#ifdef STARPU_NON_BLOCKING_DRIVERS + if (starpu_task_list_empty(&data->sched_list)) + return NULL; +#endif + STARPU_PTHREAD_MUTEX_LOCK(&data->policy_mutex); + struct starpu_task *task = NULL; + if (!starpu_task_list_empty(&data->sched_list)) + task = starpu_task_list_pop_back(&data->sched_list); + STARPU_PTHREAD_MUTEX_UNLOCK(&data->policy_mutex); + return task; +} + +static struct starpu_sched_policy dummy_sched_policy = +{ + .init_sched = init_dummy_sched, + .deinit_sched = deinit_dummy_sched, + .push_task = push_task_dummy, + .pop_task = pop_task_dummy, + .policy_name = "dummy", + .policy_description = "dummy scheduling strategy", + .worker_type = STARPU_WORKER_LIST, +}; + +struct starpu_sched_policy *starpu_get_sched_lib_policy(const char *name) +{ + if (!strcmp(name, "dummy")) + return &dummy_sched_policy; + return NULL; +} + +struct starpu_sched_policy *predefined_policies[] = +{ + &dummy_sched_policy +}; + +struct starpu_sched_policy **starpu_get_sched_lib_policies(void) +{ + return predefined_policies; +} diff --git a/examples/scheduler/libdummy_sched.sh b/examples/scheduler/libdummy_sched.sh new file mode 100755 index 0000000..992eb5e --- /dev/null +++ b/examples/scheduler/libdummy_sched.sh @@ -0,0 +1,24 @@ +#!/bin/bash +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +ROOT=${0%/libdummy_sched.sh} +if test -x $ROOT/../incrementer/incrementer +then + STARPU_SCHED_LIB=$ROOT/.libs/libdummy_sched.so STARPU_SCHED=dummy $ROOT/../incrementer/incrementer +else + exit 77 +fi diff --git a/examples/scheduler/schedulers.sh b/examples/scheduler/schedulers.sh new file mode 100755 index 0000000..5e51e29 --- /dev/null +++ b/examples/scheduler/schedulers.sh @@ -0,0 +1,81 @@ +#!/bin/bash +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +check_success() +{ + if [ $1 -eq 77 ] ; then + ( echo SKIP: STARPU_SCHED=$sched $basedir/../cholesky/cholesky_tag >&9 ) 2> /dev/null || true + echo "skip" >&2 + exit $1 + elif [ $1 -ne 0 ] ; then + ( echo FAIL: STARPU_SCHED=$sched $basedir/../cholesky/cholesky_tag >&9 ) 2> /dev/null || true + echo "failure" >&2 + exit $1 + else + ( echo PASS: STARPU_SCHED=$sched $basedir/../cholesky/cholesky_tag >&9 ) 2> /dev/null || true + fi +} + +basedir=$(dirname $0) +if test ! -x $basedir/../cholesky/cholesky_tag +then + echo "Application $basedir/../cholesky/cholesky_tag unavailable" + exit 77 +fi + +if [ -n "$STARPU_SCHED" ] +then + SCHEDULERS=$STARPU_SCHED +else + SCHEDULERS=`$basedir/../../tools/starpu_sched_display | grep -v heteroprio` +fi + +if [ "$STARPU_QUICK_CHECK" = 1 ] +then + SIDE=32 +else + SIDE=320 +fi + +run() +{ + sched=$1 + echo "cholesky.$sched" + STARPU_SCHED=$sched $STARPU_SUB_PARALLEL $MS_LAUNCHER $STARPU_LAUNCH $basedir/../cholesky/cholesky_tag -size $(($SIDE*3)) -nblocks 3 + check_success $? +} + +if [ -n "$STARPU_SUB_PARALLEL" ] +then + for sched in $SCHEDULERS + do + run $sched & + done + RESULT=0 + while true + do + wait -n + RET=$? + if [ $RET = 127 ] ; then break ; fi + if [ $RET != 0 -a $RET != 77 ] ; then RESULT=1 ; fi + done + exit $RESULT +else + for sched in $SCHEDULERS + do + run $sched + done +fi diff --git a/examples/scheduler/schedulers_context.sh b/examples/scheduler/schedulers_context.sh new file mode 100755 index 0000000..e893f1f --- /dev/null +++ b/examples/scheduler/schedulers_context.sh @@ -0,0 +1,74 @@ +#!/bin/bash +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +check_success() +{ + if [ $1 -eq 77 ] ; then + ( echo SKIP: STARPU_SCHED=$sched $basedir/../sched_ctx/sched_ctx >&9 ) 2> /dev/null || true + echo "skip" >&2 + exit $1 + elif [ $1 -ne 0 ] ; then + ( echo FAIL: STARPU_SCHED=$sched $basedir/../sched_ctx/sched_ctx >&9 ) 2> /dev/null || true + echo "failure" >&2 + exit $1 + else + ( echo PASS: STARPU_SCHED=$sched $basedir/../sched_ctx/sched_ctx >&9 ) 2> /dev/null || true + fi +} + +basedir=$(dirname $0) +if test ! -x $basedir/../sched_ctx/sched_ctx +then + echo "Application $basedir/../sched_ctx/sched_ctx unavailable" + exit 77 +fi + +if [ -n "$STARPU_SCHED" ] +then + SCHEDULERS="$STARPU_SCHED" +else + SCHEDULERS=`$basedir/../../tools/starpu_sched_display | grep -v pheft | grep -v peager | grep -v heteroprio | grep -v modular-gemm` +fi + +run() +{ + sched=$1 + echo "sched_ctx.$sched" + STARPU_SCHED=$sched $STARPU_SUB_PARALLEL $MS_LAUNCHER $STARPU_LAUNCH $basedir/../sched_ctx/sched_ctx + check_success $? +} + +if [ -n "$STARPU_SUB_PARALLEL" ] +then + for sched in $SCHEDULERS + do + run $sched & + done + RESULT=0 + while true + do + wait -n + RET=$? + if [ $RET = 127 ] ; then break ; fi + if [ $RET != 0 -a $RET != 77 ] ; then RESULT=1 ; fi + done + exit $RESULT +else + for sched in $SCHEDULERS + do + run $sched + done +fi diff --git a/examples/spmd/vector_scal_spmd.c b/examples/spmd/vector_scal_spmd.c new file mode 100644 index 0000000..a90b53b --- /dev/null +++ b/examples/spmd/vector_scal_spmd.c @@ -0,0 +1,170 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * This shows how to implement an spmd parallel StarPU task: scal_cpu_func is + * called in parallel over several cores, and has to split the work accordingly. + * This is a mere vector scaling example. + */ + +/* gcc build: + * + * gcc -O2 -g vector_scal.c -o vector_scal $(pkg-config --cflags starpu-1.0) $(pkg-config --libs starpu-1.0) + * + */ + +#include +#include +#include + +#define MIN(a,b) ((a)<(b)?(a):(b)) + +#define NX 204800 +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +#ifdef STARPU_QUICK_CHECK +#define ITER 10 +#else +#define ITER 100 +#endif + +static int get_first_element_rank(int nel, int rank, int nb_workers) +{ + if(rank == 0) + return 0; + + /* We get the number of bigger parts which stand before the part */ + int nb_big_parts = MIN(nel % nb_workers, rank); + + return nb_big_parts * (nel / nb_workers + 1) + (rank - nb_big_parts) * (nel / nb_workers); +} + +void scal_cpu_func(void *buffers[], void *_args) +{ + int i; + float *factor = _args, f = *factor; + struct starpu_vector_interface *vector = buffers[0]; + int n = STARPU_VECTOR_GET_NX(vector); + float *val = (float *)STARPU_VECTOR_GET_PTR(vector); + + int nb_workers = starpu_combined_worker_get_size(); + int rank = starpu_combined_worker_get_rank(); + + if (rank == 0) + FPRINTF(stderr, "running task with %d CPUs.\n", starpu_combined_worker_get_size()); + + /* We add 1 to the (nel_total % nb_workers) first workers, thus we get an evenly split data. */ + int nel_worker = (n / nb_workers) + ((rank < (n % nb_workers)) ? 1 : 0); + + int begin = get_first_element_rank(n, rank, nb_workers); + + + for (i = 0; i < nel_worker; i++) + { + rank = i + begin; + + float v = val[rank]; + int j; + for (j = 0; j < 100; j++) + v = v * f; + val[rank] = v; + } +} + +static struct starpu_perfmodel vector_scal_model = +{ + .type = STARPU_HISTORY_BASED, + .symbol = "vector_scal_parallel" +}; + +static struct starpu_codelet cl = +{ + .modes = { STARPU_RW }, + .type = STARPU_SPMD, + .max_parallelism = INT_MAX, + .cpu_funcs = {scal_cpu_func}, + .cpu_funcs_name = {"scal_cpu_func"}, + .nbuffers = 1, + .model = &vector_scal_model, +}; + +int main(void) +{ + struct starpu_conf conf; + float *vector; + unsigned i; + int ret; + + starpu_conf_init(&conf); + conf.single_combined_worker = 1; + conf.sched_policy_name = "pheft"; + + { + ret = starpu_init(NULL); + if (ret == -ENODEV) return 77; + conf.ncpus = starpu_cpu_worker_get_count(); + conf.ncpus /= 2; + starpu_shutdown(); + } + + + ret = starpu_init(&conf); + if (ret == -ENODEV) return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + starpu_malloc((void **)&vector, NX*sizeof(float)); + for (i = 0; i < NX; i++) + vector[i] = (i+1.0f); + + FPRINTF(stderr, "BEFORE: First element was %f\n", vector[0]); + FPRINTF(stderr, "BEFORE: Last element was %f\n", vector[NX-1]); + + starpu_data_handle_t vector_handle; + starpu_vector_data_register(&vector_handle, STARPU_MAIN_RAM, (uintptr_t)vector, NX, sizeof(vector[0])); + + float factor = 1.001; + + for (i = 0; i < ITER; i++) + { + struct starpu_task *task = starpu_task_create(); + + task->cl = &cl; + + task->handles[0] = vector_handle; + task->cl_arg = &factor; + task->cl_arg_size = sizeof(factor); + + ret = starpu_task_submit(task); + if (ret == -ENODEV) + { + ret = 77; + break; + } + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + starpu_data_unregister(vector_handle); + + FPRINTF(stderr, "AFTER: First element is %f\n", vector[0]); + FPRINTF(stderr, "AFTER: Last element is %f\n", vector[NX-1]); + + starpu_free_noflag(vector, NX*sizeof(float)); + + /* terminate StarPU, no task can be submitted after */ + starpu_shutdown(); + + return ret; +} diff --git a/examples/spmv/dw_block_spmv.c b/examples/spmv/dw_block_spmv.c new file mode 100644 index 0000000..1f07e75 --- /dev/null +++ b/examples/spmv/dw_block_spmv.c @@ -0,0 +1,342 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2010-2010 Mehdi Juhoor + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * This computes an SPMV on a BCSR sparse matrix. It simply splits the matrix + * into its blocks, thus turning the problem into mere matrix-vector products + * (GEMV) which can be run in parallel. + */ +#include "dw_block_spmv.h" +#include "matrix_market/mm_to_bcsr.h" + +#ifdef STARPU_HAVE_HELGRIND_H +#include +#endif +#ifndef ANNOTATE_HAPPENS_BEFORE +#define ANNOTATE_HAPPENS_BEFORE(obj) ((void)0) +#endif +#ifndef ANNOTATE_HAPPENS_AFTER +#define ANNOTATE_HAPPENS_AFTER(obj) ((void)0) +#endif + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +static double start; +static double end; + +static sem_t sem; + +static unsigned c = 256; +static unsigned r = 256; + +static int remainingtasks = -1; + +static starpu_data_handle_t sparse_matrix; +static starpu_data_handle_t vector_in, vector_out; + +static uint32_t size; +static char *inputfile; +static bcsr_t *bcsr_matrix; + +static float *vector_in_ptr; +static float *vector_out_ptr; + +void create_data(void) +{ + /* read the input file */ + bcsr_matrix = mm_file_to_bcsr(inputfile, c, r); + + /* declare the corresponding block CSR to the runtime */ + starpu_bcsr_data_register(&sparse_matrix, STARPU_MAIN_RAM, bcsr_matrix->nnz_blocks, bcsr_matrix->nrows_blocks, + (uintptr_t)bcsr_matrix->val, bcsr_matrix->colind, bcsr_matrix->rowptr, + 0, bcsr_matrix->r, bcsr_matrix->c, sizeof(float)); + + size = c*r*starpu_bcsr_get_nnz(sparse_matrix); +/* printf("size = %d \n ", size); */ + + /* initiate the 2 vectors */ + starpu_malloc((void **)&vector_in_ptr, size*sizeof(float)); + assert(vector_in_ptr); + + starpu_malloc((void **)&vector_out_ptr, size*sizeof(float)); + assert(vector_out_ptr); + + /* fill those */ + unsigned ind; + for (ind = 0; ind < size; ind++) + { + vector_in_ptr[ind] = 2.0f; + vector_out_ptr[ind] = 0.0f; + } + + starpu_vector_data_register(&vector_in, STARPU_MAIN_RAM, (uintptr_t)vector_in_ptr, size, sizeof(float)); + starpu_vector_data_register(&vector_out, STARPU_MAIN_RAM, (uintptr_t)vector_out_ptr, size, sizeof(float)); +} + +void unregister_data(void) +{ + starpu_data_unpartition(sparse_matrix, STARPU_MAIN_RAM); + starpu_data_unregister(sparse_matrix); + + starpu_data_unpartition(vector_in, STARPU_MAIN_RAM); + starpu_data_unregister(vector_in); + + starpu_data_unpartition(vector_out, STARPU_MAIN_RAM); + starpu_data_unregister(vector_out); +} + +void init_problem_callback(void *arg) +{ + unsigned *remaining = arg; + + unsigned val = STARPU_ATOMIC_ADD(remaining, -1); + ANNOTATE_HAPPENS_BEFORE(&remaining); + +/* if (val < 10) + printf("callback %d remaining \n", val); */ + + if (val == 0) + { + ANNOTATE_HAPPENS_AFTER(&remaining); + printf("DONE ...\n"); + end = starpu_timing_now(); + + sem_post(&sem); + } +} + +void call_filters(void) +{ + + struct starpu_data_filter bcsr_f; + struct starpu_data_filter vector_in_f, vector_out_f; + + bcsr_f.filter_func = starpu_bcsr_filter_canonical_block; + bcsr_f.get_nchildren = starpu_bcsr_filter_canonical_block_get_nchildren; + /* the children use a matrix interface ! */ + bcsr_f.get_child_ops = starpu_bcsr_filter_canonical_block_child_ops; + + vector_in_f.filter_func = starpu_vector_filter_block; + vector_in_f.nchildren = size/c; + vector_in_f.get_nchildren = NULL; + vector_in_f.get_child_ops = NULL; + + vector_out_f.filter_func = starpu_vector_filter_block; + vector_out_f.nchildren = size/r; + vector_out_f.get_nchildren = NULL; + vector_out_f.get_child_ops = NULL; + + starpu_data_partition(sparse_matrix, &bcsr_f); + + starpu_data_partition(vector_in, &vector_in_f); + starpu_data_partition(vector_out, &vector_out_f); +} + +#define NSPMV 32 +unsigned totaltasks; + +struct starpu_codelet cl = +{ + .cpu_funcs = { cpu_block_spmv}, + .cpu_funcs_name = { "cpu_block_spmv" }, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {cublas_block_spmv}, +#endif + .cuda_flags = {STARPU_CUDA_ASYNC}, + .nbuffers = 3, + .modes = {STARPU_R, STARPU_R, STARPU_RW} +}; + +void launch_spmv_codelets(void) +{ + struct starpu_task *task_tab; + uint8_t *is_entry_tab; + + /* we call one codelet per block */ + unsigned nblocks = starpu_bcsr_get_nnz(sparse_matrix); + unsigned nrows = starpu_bcsr_get_nrow(sparse_matrix); + + remainingtasks = NSPMV*nblocks; + totaltasks = remainingtasks; + + unsigned taskid = 0; + + task_tab = calloc(totaltasks, sizeof(struct starpu_task)); + STARPU_ASSERT(task_tab); + + is_entry_tab = calloc(totaltasks, sizeof(uint8_t)); + STARPU_ASSERT(is_entry_tab); + + printf("there will be %d codelets\n", remainingtasks); + + uint32_t *rowptr = starpu_bcsr_get_local_rowptr(sparse_matrix); + uint32_t *colind = starpu_bcsr_get_local_colind(sparse_matrix); + + start = starpu_timing_now(); + + unsigned loop; + for (loop = 0; loop < NSPMV; loop++) + { + unsigned row; + unsigned part = 0; + + for (row = 0; row < nrows; row++) + { + unsigned index; + + if (rowptr[row] == rowptr[row+1]) + { + continue; + } + + + for (index = rowptr[row]; index < rowptr[row+1]; index++, part++) + { + struct starpu_task *task = &task_tab[taskid]; + starpu_task_init(task); + + task->use_tag = 1; + task->tag_id = taskid; + + task->callback_func = init_problem_callback; + task->callback_arg = &remainingtasks; + task->cl = &cl; + task->cl_arg = NULL; + + unsigned i = colind[index]; + unsigned j = row; + + task->handles[0] = starpu_data_get_sub_data(sparse_matrix, 1, part); + task->handles[1] = starpu_data_get_sub_data(vector_in, 1, i); + task->handles[2] = starpu_data_get_sub_data(vector_out, 1, j); + + /* all tasks in the same row are dependent so that we don't wait too much for data + * we need to wait on the previous task if we are not the first task of a row */ + if (index != rowptr[row & ~0x3]) + { + /* this is not the first task in the row */ + starpu_tag_declare_deps((starpu_tag_t)taskid, 1, (starpu_tag_t)(taskid-1)); + + is_entry_tab[taskid] = 0; + } + else + { + /* this is an entry task */ + is_entry_tab[taskid] = 1; + } + + taskid++; + } + } + } + + printf("start submitting tasks !\n"); + + /* submit ALL tasks now */ + unsigned nchains = 0; + unsigned task; + for (task = 0; task < totaltasks; task++) + { + int ret; + if (is_entry_tab[task]) + { + nchains++; + } + + ret = starpu_task_submit(&task_tab[task]); + if (ret == -ENODEV) + exit(77); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + printf("end of task submission (there was %u chains for %u tasks : ratio %u tasks per chain) !\n", nchains, totaltasks, totaltasks/nchains); + free(is_entry_tab); +} + +void init_problem(void) +{ + /* create the sparse input matrix */ + create_data(); + + /* create a new codelet that will perform a SpMV on it */ + call_filters(); +} + +void print_results(void) +{ + unsigned row; + + for (row = 0; row < STARPU_MIN(size, 16); row++) + { + printf("%2.2f\t%2.2f\n", vector_in_ptr[row], vector_out_ptr[row]); + } +} + +int main(int argc, char *argv[]) +{ + int ret; + + if (argc < 2) + { + FPRINTF(stderr, "usage : %s filename [tile size]\n", argv[0]); + exit(-1); + } + + if (argc == 3) + { + /* third argument is the tile size */ + char *argptr; + r = strtol(argv[2], &argptr, 10); + c = r; + } + + inputfile = argv[1]; + + /* start the runtime */ + ret = starpu_init(NULL); + if (ret == -ENODEV) + return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + starpu_cublas_init(); + + sem_init(&sem, 0, 0U); + + init_problem(); + + launch_spmv_codelets(); + + sem_wait(&sem); + sem_destroy(&sem); + + unregister_data(); + print_results(); + + double totalflop = 2.0*c*r*totaltasks; + + double timing = end - start; + FPRINTF(stderr, "Computation took (in ms)\n"); + FPRINTF(stdout, "%2.2f\n", timing/1000); + FPRINTF(stderr, "Flop %e\n", totalflop); + FPRINTF(stderr, "GFlop/s : %2.2f\n", totalflop/timing/1000); + + starpu_free_noflag(vector_in_ptr, size*sizeof(float)); + starpu_free_noflag(vector_out_ptr, size*sizeof(float)); + + starpu_shutdown(); + + return 0; +} diff --git a/examples/spmv/dw_block_spmv.h b/examples/spmv/dw_block_spmv.h new file mode 100644 index 0000000..953c781 --- /dev/null +++ b/examples/spmv/dw_block_spmv.h @@ -0,0 +1,36 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __DW_BLOCK_SPMV_H__ +#define __DW_BLOCK_SPMV_H__ + +#include +#include +#include +#include +#include +#include +#include + +#include + +void cpu_block_spmv(void *descr[], void *_args); + +#ifdef STARPU_USE_CUDA +void cublas_block_spmv(void *descr[], void *_args); +#endif /* STARPU_USE_CUDA */ + +#endif /* __DW_BLOCK_SPMV_H__ */ diff --git a/examples/spmv/dw_block_spmv_kernels.c b/examples/spmv/dw_block_spmv_kernels.c new file mode 100644 index 0000000..0ce50c9 --- /dev/null +++ b/examples/spmv/dw_block_spmv_kernels.c @@ -0,0 +1,79 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * Standard GEMV kernel (on one matrix block of the sparse matrix) + */ +#include "dw_block_spmv.h" + +/* + * U22 + */ + +#ifdef STARPU_USE_CUDA +#include +static const float p1 = 1.0; +static const float m1 = -1.0; +#endif + +static inline void common_block_spmv(void *descr[], int s, void *_args) +{ + /* printf("22\n"); */ + float *block = (float *)STARPU_MATRIX_GET_PTR(descr[0]); + float *in = (float *)STARPU_VECTOR_GET_PTR(descr[1]); + float *out = (float *)STARPU_VECTOR_GET_PTR(descr[2]); + + unsigned dx = STARPU_MATRIX_GET_NX(descr[0]); + unsigned dy = STARPU_MATRIX_GET_NY(descr[0]); + + unsigned ld = STARPU_MATRIX_GET_LD(descr[0]); + + switch (s) + { + case 0: + cblas_sgemv(CblasRowMajor, CblasNoTrans, dx, dy, 1.0f, block, ld, in, 1, 1.0f, out, 1); + break; +#ifdef STARPU_USE_CUDA + case 1: + { + cublasStatus_t status = cublasSgemv (starpu_cublas_get_local_handle(), + CUBLAS_OP_T, dx, dy, &p1, block, ld, in, 1, &p1, out, 1); + if (status != CUBLAS_STATUS_SUCCESS) + STARPU_CUBLAS_REPORT_ERROR(status); + break; + } +#endif + default: + STARPU_ABORT(); + break; + } +} + +void cpu_block_spmv(void *descr[], void *_args) +{ +/* printf("CPU CODELET \n"); */ + + common_block_spmv(descr, 0, _args); +} + +#ifdef STARPU_USE_CUDA +void cublas_block_spmv(void *descr[], void *_args) +{ +/* printf("CUBLAS CODELET \n"); */ + + common_block_spmv(descr, 1, _args); +} +#endif /* STARPU_USE_CUDA */ diff --git a/examples/spmv/matrix_market/examples/fidapm05.mtx b/examples/spmv/matrix_market/examples/fidapm05.mtx new file mode 100644 index 0000000..fa7ff7a --- /dev/null +++ b/examples/spmv/matrix_market/examples/fidapm05.mtx @@ -0,0 +1,522 @@ +%%MatrixMarket matrix coordinate real general +42 42 520 +1 1 1.9555555555555e+00 +2 1 -1.9999999999999e-01 +10 1 -1.0666666666667e+00 +11 1 -3.5555555555556e-01 +19 1 1.3322676295502e-15 +20 1 1.1111111111111e-01 +25 1 1.3333333333333e-01 +26 1 4.4444444444444e-03 +27 1 1.3333333333333e-02 +1 2 -1.9999999999999e-01 +2 2 1.2444444444444e+00 +3 2 -2.0000000000001e-01 +4 2 -3.3333333333334e-02 +10 2 -3.5555555555555e-01 +11 2 -4.0000000000000e-01 +12 2 -3.5555555555555e-01 +13 2 1.1111111111111e-01 +19 2 1.1111111111111e-01 +20 2 -6.6666666666669e-02 +21 2 1.1111111111111e-01 +22 2 -2.2222222222222e-02 +25 2 3.3333333333333e-02 +26 2 1.1111111111111e-03 +27 2 6.6666666666666e-03 +28 2 3.3333333333334e-02 +29 2 1.1111111111111e-03 +30 2 6.6666666666669e-03 +2 3 -2.0000000000001e-01 +3 3 1.9555555555555e+00 +4 3 -1.9999999999999e-01 +11 3 -3.5555555555555e-01 +12 3 -1.0666666666667e+00 +13 3 -3.5555555555556e-01 +20 3 1.1111111111111e-01 +21 3 3.5527136788005e-15 +22 3 1.1111111111111e-01 +28 3 1.3333333333333e-01 +29 3 4.4444444444444e-03 +30 3 4.0000000000000e-02 +2 4 -3.3333333333334e-02 +3 4 -1.9999999999999e-01 +4 4 1.2444444444444e+00 +5 4 -2.0000000000001e-01 +6 4 -3.3333333333337e-02 +11 4 1.1111111111111e-01 +12 4 -3.5555555555554e-01 +13 4 -4.0000000000000e-01 +14 4 -3.5555555555555e-01 +15 4 1.1111111111112e-01 +20 4 -2.2222222222222e-02 +21 4 1.1111111111111e-01 +22 4 -6.6666666666664e-02 +23 4 1.1111111111111e-01 +24 4 -2.2222222222223e-02 +28 4 3.3333333333331e-02 +29 4 1.1111111111111e-03 +30 4 1.3333333333333e-02 +31 4 3.3333333333334e-02 +32 4 1.1111111111111e-03 +33 4 1.3333333333334e-02 +4 5 -2.0000000000001e-01 +5 5 1.9555555555555e+00 +6 5 -1.9999999999997e-01 +13 5 -3.5555555555555e-01 +14 5 -1.0666666666667e+00 +15 5 -3.5555555555557e-01 +22 5 1.1111111111111e-01 +23 5 7.5495165674511e-15 +24 5 1.1111111111111e-01 +31 5 1.3333333333333e-01 +32 5 4.4444444444444e-03 +33 5 6.6666666666666e-02 +4 6 -3.3333333333337e-02 +5 6 -1.9999999999997e-01 +6 6 1.2444444444445e+00 +7 6 -2.0000000000003e-01 +8 6 -3.3333333333331e-02 +13 6 1.1111111111111e-01 +14 6 -3.5555555555555e-01 +15 6 -4.0000000000001e-01 +16 6 -3.5555555555554e-01 +17 6 1.1111111111111e-01 +22 6 -2.2222222222223e-02 +23 6 1.1111111111111e-01 +24 6 -6.6666666666670e-02 +31 6 3.3333333333334e-02 +32 6 1.1111111111111e-03 +33 6 2.0000000000000e-02 +34 6 3.3333333333332e-02 +35 6 1.1111111111110e-03 +36 6 1.9999999999999e-02 +40 6 1.1111111111111e-01 +41 6 -2.2222222222222e-02 +6 7 -2.0000000000003e-01 +7 7 1.9555555555555e+00 +8 7 -1.9999999999998e-01 +15 7 -3.5555555555554e-01 +16 7 -1.0666666666666e+00 +17 7 -3.5555555555554e-01 +24 7 1.1111111111110e-01 +34 7 1.3333333333333e-01 +35 7 4.4444444444444e-03 +36 7 9.3333333333331e-02 +40 7 -6.6613381477509e-15 +41 7 1.1111111111112e-01 +6 8 -3.3333333333331e-02 +7 8 -1.9999999999998e-01 +8 8 1.2444444444445e+00 +9 8 -2.0000000000005e-01 +15 8 1.1111111111111e-01 +16 8 -3.5555555555558e-01 +17 8 -3.9999999999998e-01 +18 8 -3.5555555555556e-01 +24 8 -2.2222222222222e-02 +34 8 3.3333333333333e-02 +35 8 1.1111111111112e-03 +36 8 2.6666666666666e-02 +37 8 3.3333333333334e-02 +38 8 1.1111111111111e-03 +39 8 2.6666666666667e-02 +40 8 1.1111111111111e-01 +41 8 -6.6666666666668e-02 +42 8 1.1111111111111e-01 +8 9 -2.0000000000005e-01 +9 9 1.9555555555556e+00 +17 9 -3.5555555555552e-01 +18 9 -1.0666666666667e+00 +37 9 1.3333333333333e-01 +38 9 4.4444444444443e-03 +39 9 1.2000000000000e-01 +41 9 1.1111111111111e-01 +42 9 3.1086244689504e-15 +1 10 -1.0666666666667e+00 +2 10 -3.5555555555555e-01 +10 10 5.6888888888889e+00 +11 10 -1.0666666666667e+00 +19 10 -1.0666666666667e+00 +20 10 -3.5555555555555e-01 +25 10 1.1102230246252e-16 +26 10 1.7777777777778e-02 +27 10 1.3877787807814e-17 +1 11 -3.5555555555556e-01 +2 11 -4.0000000000000e-01 +3 11 -3.5555555555555e-01 +4 11 1.1111111111111e-01 +10 11 -1.0666666666667e+00 +11 11 3.9111111111110e+00 +12 11 -1.0666666666666e+00 +13 11 -5.3290705182007e-15 +19 11 -3.5555555555555e-01 +20 11 -3.9999999999999e-01 +21 11 -3.5555555555555e-01 +22 11 1.1111111111111e-01 +26 11 4.4444444444443e-03 +27 11 5.5511151231258e-17 +28 11 5.5511151231258e-17 +29 11 4.4444444444446e-03 +30 11 -3.4694469519536e-17 +2 12 -3.5555555555555e-01 +3 12 -1.0666666666667e+00 +4 12 -3.5555555555554e-01 +11 12 -1.0666666666666e+00 +12 12 5.6888888888888e+00 +13 12 -1.0666666666667e+00 +20 12 -3.5555555555555e-01 +21 12 -1.0666666666667e+00 +22 12 -3.5555555555555e-01 +28 12 2.2204460492503e-16 +29 12 1.7777777777778e-02 +30 12 1.9428902930940e-16 +2 13 1.1111111111111e-01 +3 13 -3.5555555555556e-01 +4 13 -4.0000000000000e-01 +5 13 -3.5555555555555e-01 +6 13 1.1111111111111e-01 +11 13 -5.3290705182007e-15 +12 13 -1.0666666666667e+00 +13 13 3.9111111111111e+00 +14 13 -1.0666666666667e+00 +15 13 -2.6645352591004e-15 +20 13 1.1111111111111e-01 +21 13 -3.5555555555556e-01 +22 13 -3.9999999999999e-01 +23 13 -3.5555555555555e-01 +24 13 1.1111111111111e-01 +28 13 2.2204460492503e-16 +29 13 4.4444444444443e-03 +30 13 1.6653345369377e-16 +31 13 1.1102230246252e-16 +32 13 4.4444444444446e-03 +33 13 6.9388939039072e-17 +4 14 -3.5555555555555e-01 +5 14 -1.0666666666667e+00 +6 14 -3.5555555555555e-01 +13 14 -1.0666666666667e+00 +14 14 5.6888888888888e+00 +15 14 -1.0666666666666e+00 +22 14 -3.5555555555555e-01 +23 14 -1.0666666666667e+00 +24 14 -3.5555555555554e-01 +31 14 -7.7715611723761e-16 +32 14 1.7777777777778e-02 +33 14 -1.6653345369377e-16 +4 15 1.1111111111112e-01 +5 15 -3.5555555555557e-01 +6 15 -4.0000000000001e-01 +7 15 -3.5555555555554e-01 +8 15 1.1111111111111e-01 +13 15 -2.6645352591004e-15 +14 15 -1.0666666666666e+00 +15 15 3.9111111111110e+00 +16 15 -1.0666666666667e+00 +22 15 1.1111111111112e-01 +23 15 -3.5555555555557e-01 +24 15 -3.9999999999999e-01 +31 15 4.4408920985006e-16 +32 15 4.4444444444444e-03 +34 15 -4.7184478546569e-16 +35 15 4.4444444444444e-03 +36 15 -3.1918911957973e-16 +40 15 -3.5555555555555e-01 +41 15 1.1111111111111e-01 +6 16 -3.5555555555554e-01 +7 16 -1.0666666666666e+00 +8 16 -3.5555555555558e-01 +15 16 -1.0666666666667e+00 +16 16 5.6888888888888e+00 +17 16 -1.0666666666667e+00 +24 16 -3.5555555555553e-01 +35 16 1.7777777777777e-02 +36 16 3.8857805861880e-16 +40 16 -1.0666666666666e+00 +41 16 -3.5555555555558e-01 +6 17 1.1111111111111e-01 +7 17 -3.5555555555554e-01 +8 17 -3.9999999999998e-01 +9 17 -3.5555555555552e-01 +16 17 -1.0666666666667e+00 +17 17 3.9111111111111e+00 +18 17 -1.0666666666667e+00 +24 17 1.1111111111111e-01 +34 17 -2.2204460492503e-16 +35 17 4.4444444444441e-03 +36 17 -2.2204460492503e-16 +37 17 -1.3877787807815e-16 +38 17 4.4444444444448e-03 +39 17 -2.7755575615629e-16 +40 17 -3.5555555555553e-01 +41 17 -3.9999999999997e-01 +42 17 -3.5555555555551e-01 +8 18 -3.5555555555556e-01 +9 18 -1.0666666666667e+00 +17 18 -1.0666666666667e+00 +18 18 5.6888888888888e+00 +37 18 -6.6613381477509e-16 +38 18 1.7777777777778e-02 +39 18 -3.3306690738755e-16 +41 18 -3.5555555555555e-01 +42 18 -1.0666666666667e+00 +1 19 1.3322676295502e-15 +2 19 1.1111111111111e-01 +10 19 -1.0666666666667e+00 +11 19 -3.5555555555555e-01 +19 19 1.9555555555556e+00 +20 19 -2.0000000000000e-01 +25 19 -1.3333333333333e-01 +26 19 -2.2222222222222e-02 +27 19 -1.3333333333333e-02 +1 20 1.1111111111111e-01 +2 20 -6.6666666666669e-02 +3 20 1.1111111111111e-01 +4 20 -2.2222222222222e-02 +10 20 -3.5555555555555e-01 +11 20 -3.9999999999999e-01 +12 20 -3.5555555555555e-01 +13 20 1.1111111111111e-01 +19 20 -2.0000000000000e-01 +20 20 1.2444444444444e+00 +21 20 -2.0000000000001e-01 +22 20 -3.3333333333331e-02 +25 20 -3.3333333333334e-02 +26 20 -5.5555555555556e-03 +27 20 -6.6666666666667e-03 +28 20 -3.3333333333334e-02 +29 20 -5.5555555555557e-03 +30 20 -6.6666666666669e-03 +2 21 1.1111111111111e-01 +3 21 3.5527136788005e-15 +4 21 1.1111111111111e-01 +11 21 -3.5555555555555e-01 +12 21 -1.0666666666667e+00 +13 21 -3.5555555555556e-01 +20 21 -2.0000000000001e-01 +21 21 1.9555555555556e+00 +22 21 -2.0000000000000e-01 +28 21 -1.3333333333333e-01 +29 21 -2.2222222222222e-02 +30 21 -4.0000000000000e-02 +2 22 -2.2222222222222e-02 +3 22 1.1111111111111e-01 +4 22 -6.6666666666664e-02 +5 22 1.1111111111111e-01 +6 22 -2.2222222222223e-02 +11 22 1.1111111111111e-01 +12 22 -3.5555555555555e-01 +13 22 -3.9999999999999e-01 +14 22 -3.5555555555555e-01 +15 22 1.1111111111112e-01 +20 22 -3.3333333333331e-02 +21 22 -2.0000000000000e-01 +22 22 1.2444444444444e+00 +23 22 -2.0000000000001e-01 +24 22 -3.3333333333335e-02 +28 22 -3.3333333333332e-02 +29 22 -5.5555555555553e-03 +30 22 -1.3333333333333e-02 +31 22 -3.3333333333334e-02 +32 22 -5.5555555555556e-03 +33 22 -1.3333333333333e-02 +4 23 1.1111111111111e-01 +5 23 7.5495165674511e-15 +6 23 1.1111111111111e-01 +13 23 -3.5555555555555e-01 +14 23 -1.0666666666667e+00 +15 23 -3.5555555555557e-01 +22 23 -2.0000000000001e-01 +23 23 1.9555555555555e+00 +24 23 -1.9999999999998e-01 +31 23 -1.3333333333333e-01 +32 23 -2.2222222222222e-02 +33 23 -6.6666666666667e-02 +4 24 -2.2222222222223e-02 +5 24 1.1111111111111e-01 +6 24 -6.6666666666670e-02 +7 24 1.1111111111110e-01 +8 24 -2.2222222222222e-02 +13 24 1.1111111111111e-01 +14 24 -3.5555555555554e-01 +15 24 -3.9999999999999e-01 +16 24 -3.5555555555553e-01 +17 24 1.1111111111111e-01 +22 24 -3.3333333333335e-02 +23 24 -1.9999999999998e-01 +24 24 1.2444444444444e+00 +31 24 -3.3333333333334e-02 +32 24 -5.5555555555556e-03 +33 24 -2.0000000000000e-02 +34 24 -3.3333333333331e-02 +35 24 -5.5555555555552e-03 +36 24 -1.9999999999999e-02 +40 24 -2.0000000000004e-01 +41 24 -3.3333333333329e-02 +1 25 1.3333333333333e-01 +2 25 3.3333333333333e-02 +10 25 1.1102230246252e-16 +19 25 -1.3333333333333e-01 +20 25 -3.3333333333334e-02 +25 25 0.0000000000000e+00 +1 26 4.4444444444444e-03 +2 26 1.1111111111111e-03 +10 26 1.7777777777778e-02 +11 26 4.4444444444443e-03 +19 26 -2.2222222222222e-02 +20 26 -5.5555555555556e-03 +26 26 0.0000000000000e+00 +1 27 1.3333333333333e-02 +2 27 6.6666666666666e-03 +10 27 1.3877787807814e-17 +11 27 5.5511151231258e-17 +19 27 -1.3333333333333e-02 +20 27 -6.6666666666667e-03 +27 27 0.0000000000000e+00 +2 28 3.3333333333334e-02 +3 28 1.3333333333333e-01 +4 28 3.3333333333331e-02 +11 28 5.5511151231258e-17 +12 28 2.2204460492503e-16 +13 28 2.2204460492503e-16 +20 28 -3.3333333333334e-02 +21 28 -1.3333333333333e-01 +22 28 -3.3333333333332e-02 +28 28 0.0000000000000e+00 +2 29 1.1111111111111e-03 +3 29 4.4444444444444e-03 +4 29 1.1111111111111e-03 +11 29 4.4444444444446e-03 +12 29 1.7777777777778e-02 +13 29 4.4444444444443e-03 +20 29 -5.5555555555557e-03 +21 29 -2.2222222222222e-02 +22 29 -5.5555555555553e-03 +29 29 0.0000000000000e+00 +2 30 6.6666666666669e-03 +3 30 4.0000000000000e-02 +4 30 1.3333333333333e-02 +11 30 -3.4694469519536e-17 +12 30 1.9428902930940e-16 +13 30 1.6653345369377e-16 +20 30 -6.6666666666669e-03 +21 30 -4.0000000000000e-02 +22 30 -1.3333333333333e-02 +30 30 0.0000000000000e+00 +4 31 3.3333333333334e-02 +5 31 1.3333333333333e-01 +6 31 3.3333333333334e-02 +13 31 1.1102230246252e-16 +14 31 -7.7715611723761e-16 +15 31 4.4408920985006e-16 +22 31 -3.3333333333334e-02 +23 31 -1.3333333333333e-01 +24 31 -3.3333333333334e-02 +31 31 0.0000000000000e+00 +4 32 1.1111111111111e-03 +5 32 4.4444444444444e-03 +6 32 1.1111111111111e-03 +13 32 4.4444444444446e-03 +14 32 1.7777777777778e-02 +15 32 4.4444444444444e-03 +22 32 -5.5555555555556e-03 +23 32 -2.2222222222222e-02 +24 32 -5.5555555555556e-03 +32 32 0.0000000000000e+00 +4 33 1.3333333333334e-02 +5 33 6.6666666666666e-02 +6 33 2.0000000000000e-02 +13 33 6.9388939039072e-17 +14 33 -1.6653345369377e-16 +22 33 -1.3333333333333e-02 +23 33 -6.6666666666667e-02 +24 33 -2.0000000000000e-02 +33 33 0.0000000000000e+00 +6 34 3.3333333333332e-02 +7 34 1.3333333333333e-01 +8 34 3.3333333333333e-02 +15 34 -4.7184478546569e-16 +17 34 -2.2204460492503e-16 +24 34 -3.3333333333331e-02 +34 34 0.0000000000000e+00 +40 34 -1.3333333333333e-01 +41 34 -3.3333333333334e-02 +6 35 1.1111111111110e-03 +7 35 4.4444444444444e-03 +8 35 1.1111111111112e-03 +15 35 4.4444444444444e-03 +16 35 1.7777777777777e-02 +17 35 4.4444444444441e-03 +24 35 -5.5555555555552e-03 +35 35 0.0000000000000e+00 +40 35 -2.2222222222222e-02 +41 35 -5.5555555555555e-03 +6 36 1.9999999999999e-02 +7 36 9.3333333333331e-02 +8 36 2.6666666666666e-02 +15 36 -3.1918911957973e-16 +16 36 3.8857805861880e-16 +17 36 -2.2204460492503e-16 +24 36 -1.9999999999999e-02 +36 36 0.0000000000000e+00 +40 36 -9.3333333333331e-02 +41 36 -2.6666666666667e-02 +8 37 3.3333333333334e-02 +9 37 1.3333333333333e-01 +17 37 -1.3877787807815e-16 +18 37 -6.6613381477509e-16 +37 37 0.0000000000000e+00 +41 37 -3.3333333333334e-02 +42 37 -1.3333333333333e-01 +8 38 1.1111111111111e-03 +9 38 4.4444444444443e-03 +17 38 4.4444444444448e-03 +18 38 1.7777777777778e-02 +38 38 0.0000000000000e+00 +41 38 -5.5555555555556e-03 +42 38 -2.2222222222222e-02 +8 39 2.6666666666667e-02 +9 39 1.2000000000000e-01 +17 39 -2.7755575615629e-16 +18 39 -3.3306690738755e-16 +39 39 0.0000000000000e+00 +41 39 -2.6666666666667e-02 +42 39 -1.2000000000000e-01 +6 40 1.1111111111111e-01 +7 40 -6.6613381477509e-15 +8 40 1.1111111111111e-01 +15 40 -3.5555555555555e-01 +16 40 -1.0666666666666e+00 +17 40 -3.5555555555553e-01 +24 40 -2.0000000000004e-01 +34 40 -1.3333333333333e-01 +35 40 -2.2222222222222e-02 +36 40 -9.3333333333331e-02 +40 40 1.9555555555555e+00 +41 40 -1.9999999999998e-01 +6 41 -2.2222222222222e-02 +7 41 1.1111111111112e-01 +8 41 -6.6666666666668e-02 +9 41 1.1111111111111e-01 +15 41 1.1111111111111e-01 +16 41 -3.5555555555558e-01 +17 41 -3.9999999999997e-01 +18 41 -3.5555555555555e-01 +24 41 -3.3333333333329e-02 +34 41 -3.3333333333334e-02 +35 41 -5.5555555555555e-03 +36 41 -2.6666666666667e-02 +37 41 -3.3333333333334e-02 +38 41 -5.5555555555556e-03 +39 41 -2.6666666666667e-02 +40 41 -1.9999999999998e-01 +41 41 1.2444444444445e+00 +42 41 -2.0000000000005e-01 +8 42 1.1111111111111e-01 +9 42 3.1086244689504e-15 +17 42 -3.5555555555551e-01 +18 42 -1.0666666666667e+00 +37 42 -1.3333333333333e-01 +38 42 -2.2222222222222e-02 +39 42 -1.2000000000000e-01 +41 42 -2.0000000000005e-01 +42 42 1.9555555555556e+00 diff --git a/examples/spmv/matrix_market/mm_to_bcsr.c b/examples/spmv/matrix_market/mm_to_bcsr.c new file mode 100644 index 0000000..2f46292 --- /dev/null +++ b/examples/spmv/matrix_market/mm_to_bcsr.c @@ -0,0 +1,378 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +#include "mm_to_bcsr.h" + +/* Some debug functions */ + +static void print_block(tmp_block_t *block, unsigned r, unsigned c) +{ + printf(" **** block %u %u **** \n", block->i, block->j); + + unsigned i, j; + for (j = 0; j < r; j++) + { + for (i = 0; i < c; i++) + { + printf("%2.2f\t", block->val[i + j*c]); + } + printf("\n"); + } +} + +static void print_all_blocks(tmp_block_t *block_list, unsigned r, unsigned c) +{ + tmp_block_t *current_block = block_list; + + while(current_block) + { + print_block(current_block, r, c); + + current_block = current_block->next; + } +} + +static void print_bcsr(bcsr_t *bcsr) +{ + fprintf(stderr, "** BSCR **\n"); + fprintf(stderr, "non zero - blocks = %u\n", bcsr->nnz_blocks); + fprintf(stderr, "nrows - blocks = %u\n", bcsr->nrows_blocks); + fprintf(stderr, "block size : c %u r %u\n", bcsr->c, bcsr->r); +} + +static unsigned count_blocks(tmp_block_t *block_list) +{ + unsigned count = 0; + tmp_block_t *current_block = block_list; + + while(current_block) + { + count++; + current_block = current_block->next; + } + + return count; +} + +static unsigned count_row_blocks(tmp_block_t *block_list) +{ + unsigned maxrow = 0; + tmp_block_t *current_block = block_list; + + while(current_block) + { + if (current_block->j > maxrow) + maxrow = current_block->j; + + current_block = current_block->next; + } + + return (maxrow+1); +} + + + +/* Find the block that corresponds to (i,j) if it exists in the list */ + +static tmp_block_t *search_block(tmp_block_t *block_list, unsigned i, unsigned j) +{ + tmp_block_t *current_block = block_list; + /* printf("search %d %d\n", i, j); */ + + while (current_block) + { + if ((current_block->i == i) && (current_block->j == j)) + { + /* we found the block */ + return current_block; + } + + current_block = current_block->next; + }; + + /* no entry was found ... */ + return NULL; +} + +static tmp_block_t *create_block(unsigned c, unsigned r) +{ + tmp_block_t *block; + + block = malloc(sizeof(tmp_block_t)); + block->val = calloc(c*r, sizeof(float)); + + return block; +} + +/* determine if next block is bigger in lexical order */ +static unsigned next_block_is_bigger(tmp_block_t *block, unsigned i, unsigned j) +{ + tmp_block_t *next = block->next; + + if (next) + { + /* we evaluate lexical order */ + if (next->j < j) + return 0; + + if (next->j > j) + return 1; + + /* next->j == j */ + return (next->i > i); + } + + /* this is the last block, so it's bigger */ + return 1; +} + +/* we insert a block in the list, directly at the appropriate place */ +static void insert_block(tmp_block_t *block, tmp_block_t **block_list, unsigned i, unsigned j) +{ + /* insert block at the beginning of the list */ + /*block->next = *block_list; + *block_list = block; */ + + /* insert the block in lexicographical order */ + /* first find an element that is bigger, then insert the block just before it */ + tmp_block_t *current_block = *block_list; + + if (!current_block) + { + /* list was empty */ + *block_list = block; + block->next = NULL; + return; + } + + while (current_block) + { + if (next_block_is_bigger(current_block, i, j)) + { + /* insert block here */ + block->next = current_block->next; + current_block->next = block; + return; + } + + current_block = current_block->next; + }; + + /* should not be reached ! */ +} + +/* we add an element to the list of blocks, it is either added to an existing block or in a block specifically created if there was none */ +static void insert_elem(tmp_block_t **block_list, unsigned abs_i, unsigned abs_j, float val, unsigned c, unsigned r) +{ + /* we are looking for the block that contains (abs_i, abs_j) (abs = absolute) */ + unsigned i,j; + + i = abs_i / c; + j = abs_j / r; + + tmp_block_t *block; + + block = search_block(*block_list, i, j); + + if (!block) + { + /* the block does not exist yet */ + /* create it */ + block = create_block(c, r); + + block->i = i; + block->j = j; + + /* printf("create block %d %d !\n", i, j); */ + + /* insert it in the block list */ + insert_block(block, block_list, i, j); + } + + /* now insert the value in the corresponding block */ + unsigned local_i, local_j, local_index; + + local_i = abs_i % c; + local_j = abs_j % r; + local_index = local_j * c + local_i; + + block->val[local_index] = val; +} + +/* transform a list of values (with coordinates) into a list of blocks that are easily processed into BCSR */ +static tmp_block_t * mm_to_blocks(int nz, unsigned *I, unsigned *J, float *val, unsigned c, unsigned r) +{ + int elem; + + /* at first, the list of block is empty */ + tmp_block_t *block_list = NULL; + + for (elem = 0; elem < nz; elem++) + { + insert_elem(&block_list, I[elem], J[elem], val[elem], c, r); + } + + return block_list; +} + +static void fill_bcsr(tmp_block_t *block_list, unsigned c, unsigned r, bcsr_t *bcsr) +{ + unsigned block = 0; + unsigned current_offset = 0; + size_t block_size = c*r*sizeof(float); + + tmp_block_t *current_block = block_list; + + while(current_block) + { + /* copy the val from the block to the contiguous area in the BCSR */ + memcpy(&bcsr->val[current_offset], current_block->val, block_size); + + /* write the the index of the block + * XXX should it be in blocks ? */ + bcsr->colind[block] = current_block->i; + + if ((bcsr->rowptr[current_block->j] == 0) && (current_block->j != 0)) + { + /* this is the first element of the line */ + bcsr->rowptr[current_block->j] = block; + } + + block++; + current_offset = block*c*r; + current_block = current_block->next; + }; + + /* for all lines where there were no block at all (XXX), fill the 0 in rowptr */ + /* the first row must start at 0 ? */ + bcsr->rowptr[0] = 0; + + unsigned row; + for (row = 1; row < bcsr->nrows_blocks; row++) + { + if (bcsr->rowptr[row] == 0) + bcsr->rowptr[row] = bcsr->rowptr[row-1]; + } + + bcsr->rowptr[bcsr->nrows_blocks] = bcsr->nnz_blocks; +} + +static bcsr_t * blocks_to_bcsr(tmp_block_t *block_list, unsigned c, unsigned r) +{ + unsigned nblocks; + + /* print_all_blocks(block_list, r, c); */ + + nblocks = count_blocks(block_list); + + bcsr_t *bcsr = malloc(sizeof(bcsr_t)); + + bcsr->nnz_blocks = nblocks; + bcsr->r = r; + bcsr->c = c; + + unsigned nrows_blocks = count_row_blocks(block_list); + bcsr->nrows_blocks = nrows_blocks; + + bcsr->val = malloc(nblocks*r*c*sizeof(float)); + bcsr->colind = malloc(nblocks*sizeof(unsigned)); + bcsr->rowptr = calloc((nrows_blocks + 1), sizeof(unsigned)); + + fill_bcsr(block_list, c, r, bcsr); + + return bcsr; +} + +bcsr_t *mm_to_bcsr(unsigned nz, unsigned *I, unsigned *J, float *val, unsigned c, unsigned r) +{ + bcsr_t *bcsr; + tmp_block_t *block_list; + + block_list = mm_to_blocks(nz, I, J, val, c, r); + bcsr = blocks_to_bcsr(block_list, c, r); + + print_bcsr(bcsr); + + return bcsr; +} + +bcsr_t *mm_file_to_bcsr(char *filename, unsigned c, unsigned r) +{ + FILE *f; + MM_typecode matcode; + int M, N; + int nz; + int i; + unsigned *I, *J; + float *val; + + bcsr_t *bcsr; + + if ((f = fopen(filename, "r")) == NULL) + { + fprintf(stderr, "File <%s> not found\n", filename); + exit(1); + } + + if (mm_read_banner(f, &matcode) != 0) + { + printf("Could not process Matrix Market banner.\n"); + exit(1); + } + + /* This is how one can screen matrix types if their application */ + /* only supports a subset of the Matrix Market data types. */ + + if (mm_is_complex(matcode) && mm_is_matrix(matcode) && mm_is_sparse(matcode)) + { + printf("Sorry, this application does not support "); + printf("Market Market type: [%s]\n", mm_typecode_to_str(matcode)); + exit(1); + } + + /* find out size of sparse matrix .... */ + + if ((mm_read_mtx_crd_size(f, &M, &N, &nz)) !=0) + exit(1); + + + /* reseve memory for matrices */ + + I = malloc(nz * sizeof(unsigned)); + J = malloc(nz * sizeof(unsigned)); + /* XXX float ! */ + val = (float *) malloc(nz * sizeof(float)); + + for (i=0; i +#include +#include +#include +#include "mmio.h" + +/* convert a matrix stored in a file with the matrix market format into the + * BCSR format */ + +typedef struct tmp_block +{ + /* we have a linked list of blocks */ + struct tmp_block *next; + + /* column i, row j*/ + unsigned i, j; + + float *val; + +} tmp_block_t; + +typedef struct +{ + unsigned r,c; + unsigned nnz_blocks; + unsigned nrows_blocks; + + float *val; + uint32_t *colind; + uint32_t *rowptr; +} bcsr_t; + + +/* directly read input from a file */ +bcsr_t *mm_file_to_bcsr(char *filename, unsigned c, unsigned r); + +/* read the matrix as a set of valuated coordinates */ +bcsr_t *mm_to_bcsr(unsigned nz, unsigned *I_, unsigned *J, float *val, unsigned c, unsigned r); diff --git a/examples/spmv/matrix_market/mmio.c b/examples/spmv/matrix_market/mmio.c new file mode 100644 index 0000000..3b443c2 --- /dev/null +++ b/examples/spmv/matrix_market/mmio.c @@ -0,0 +1,488 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ +/* +* Matrix Market I/O library for ANSI C +* +* See http://math.nist.gov/MatrixMarket for details. +* +* +*/ + + +#include +#include +#include +#include +#include + +#include "mmio.h" + +int mm_read_unsymmetric_sparse(const char *fname, int *M_, int *N_, int *nz_, double **val_, int **I_, int **J_) +{ + FILE *f; + MM_typecode matcode; + int M, N, nz; + int i; + double *val; + int *I, *J; + + if ((f = fopen(fname, "r")) == NULL) + { + fprintf(stderr, "File <%s> not found\n", fname); + return -1; + } + + if (mm_read_banner(f, &matcode) != 0) + { + fprintf(stderr, "mm_read_unsymetric: Could not process Matrix Market banner "); + fprintf(stderr, " in file [%s]\n", fname); + return -1; + } + + if (!(mm_is_real(matcode) && mm_is_matrix(matcode) && mm_is_sparse(matcode))) + { + fprintf(stderr, "Sorry, this application does not support "); + fprintf(stderr, "Market Market type: [%s]\n", mm_typecode_to_str(matcode)); + return -1; + } + + /* find out size of sparse matrix: M, N, nz .... */ + if (mm_read_mtx_crd_size(f, &M, &N, &nz) !=0) + { + fprintf(stderr, "read_unsymmetric_sparse(): could not parse matrix size.\n"); + return -1; + } + + *M_ = M; + *N_ = N; + *nz_ = nz; + + /* reseve memory for matrices */ + I = (int *) malloc(nz * sizeof(int)); + J = (int *) malloc(nz * sizeof(int)); + val = (double *) malloc(nz * sizeof(double)); + + *val_ = val; + *I_ = I; + *J_ = J; + + /* NOTE: when reading in doubles, ANSI C requires the use of the "l" */ + /* specifier as in "%lg", "%lf", "%le", otherwise errors will occur */ + /* (ANSI C X3.159-1989, Sec. 4.9.6.2, p. 136 lines 13-15) */ + for (i=0; i 0) + { + nzval[pos] = LOWER_BAND; + colind[pos] = row-1; + pos++; + } + + nzval[pos] = MIDDLE_BAND; + colind[pos] = row; + pos++; + + if (row < size - 1) + { + nzval[pos] = UPPER_BAND; + colind[pos] = row+1; + pos++; + } + } + + STARPU_ASSERT(pos == nnz); + + rowptr[size] = nnz; + + /* initiate the 2 vectors */ + starpu_malloc((void **)&vector_in_ptr, size*sizeof(float)); + starpu_malloc((void **)&vector_out_ptr, size*sizeof(float)); + starpu_malloc((void **)&vector_exp_out_ptr, size*sizeof(float)); + assert(vector_in_ptr && vector_out_ptr && vector_exp_out_ptr); + + /* fill them */ + for (ind = 0; ind < size; ind++) + { + vector_in_ptr[ind] = ind % 100; + vector_out_ptr[ind] = 0.0f; + } + + /* + * Register the CSR matrix and the 2 vectors + */ + starpu_csr_data_register(&sparse_matrix, STARPU_MAIN_RAM, nnz, size, (uintptr_t)nzval, colind, rowptr, 0, sizeof(float)); + starpu_vector_data_register(&vector_in, STARPU_MAIN_RAM, (uintptr_t)vector_in_ptr, size, sizeof(float)); + starpu_vector_data_register(&vector_out, STARPU_MAIN_RAM, (uintptr_t)vector_out_ptr, size, sizeof(float)); + + /* + * Partition the CSR matrix and the output vector + */ + csr_f.nchildren = nblocks; + vector_f.nchildren = nblocks; + starpu_data_partition(sparse_matrix, &csr_f); + starpu_data_partition(vector_out, &vector_f); + + /* + * If we use OpenCL, we need to compile the SpMV kernel + */ +#ifdef STARPU_USE_OPENCL + compile_spmv_opencl_kernel(); +#endif + + start = starpu_timing_now(); + + /* + * Create and submit StarPU tasks + */ + for (part = 0; part < nblocks; part++) + { + struct starpu_task *task = starpu_task_create(); + task->cl = &spmv_cl; + + task->handles[0] = starpu_data_get_sub_data(sparse_matrix, 1, part); + task->handles[1] = vector_in; + task->handles[2] = starpu_data_get_sub_data(vector_out, 1, part); + + ret = starpu_task_submit(task); + if (STARPU_UNLIKELY(ret == -ENODEV)) + { + FPRINTF(stderr, "No worker may execute this task\n"); + exit(0); + } + } + + starpu_task_wait_for_all(); + end = starpu_timing_now(); + + /* + * Unregister the CSR matrix and the output vector + */ + starpu_data_unpartition(sparse_matrix, STARPU_MAIN_RAM); + starpu_data_unpartition(vector_out, STARPU_MAIN_RAM); + + /* + * Unregister data + */ + starpu_data_unregister(sparse_matrix); + starpu_data_unregister(vector_in); + starpu_data_unregister(vector_out); + + /* + * Display the result + */ + for (row = 0; row < STARPU_MIN(size, 16); row++) + { + FPRINTF(stdout, "%2.2f\t%2.2f\n", vector_in_ptr[row], vector_out_ptr[row]); + } + + /* Check the result */ + memset(vector_exp_out_ptr, 0, sizeof(vector_exp_out_ptr[0])*size); + for (row = 0; row < size; row++) + { + if (row > 0) + vector_exp_out_ptr[row] += LOWER_BAND * vector_in_ptr[row-1]; + vector_exp_out_ptr[row] += MIDDLE_BAND * vector_in_ptr[row]; + if (row < size-1) + vector_exp_out_ptr[row] += UPPER_BAND * vector_in_ptr[row+1]; + } + for (row = 0; row < size; row++) + { + if (vector_out_ptr[row] != vector_exp_out_ptr[row]) + { + FPRINTF(stderr, "check failed at %u: %f vs expected %f\n", row, vector_out_ptr[row], vector_exp_out_ptr[row]); + exit(EXIT_FAILURE); + } + } + + starpu_free_noflag(nzval, nnz*sizeof(float)); + starpu_free_noflag(colind, nnz*sizeof(uint32_t)); + starpu_free_noflag(rowptr, (size+1)*sizeof(uint32_t)); + starpu_free_noflag(vector_in_ptr, size*sizeof(float)); + starpu_free_noflag(vector_out_ptr, size*sizeof(float)); + starpu_free_noflag(vector_exp_out_ptr, size*sizeof(float)); + + /* + * Stop StarPU + */ + starpu_shutdown(); + + timing = end - start; + FPRINTF(stderr, "Computation took (in ms)\n"); + FPRINTF(stdout, "%2.2f\n", timing/1000); + + return 0; +} diff --git a/examples/spmv/spmv.h b/examples/spmv/spmv.h new file mode 100644 index 0000000..1c93f7f --- /dev/null +++ b/examples/spmv/spmv.h @@ -0,0 +1,41 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __SPMV_H__ +#define __SPMV_H__ + +#include +#include +#include +#include +#include + +#include + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +#ifdef STARPU_USE_CUDA +void spmv_kernel_cuda(void *descr[], void *args); +#endif + +#ifdef STARPU_USE_OPENCL +void spmv_kernel_opencl(void *descr[], void *args); +void compile_spmv_opencl_kernel(void); +#endif + +void spmv_kernel_cpu(void *descr[], void *arg); + +#endif /* __SPMV_H__ */ diff --git a/examples/spmv/spmv_cuda.cu b/examples/spmv/spmv_cuda.cu new file mode 100644 index 0000000..80a04f8 --- /dev/null +++ b/examples/spmv/spmv_cuda.cu @@ -0,0 +1,102 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* CUDA kernel for SPMV */ + +#include + +#define MIN(a,b) ((a)<(b)?(a):(b)) + +extern "C" __global__ void spmv_kernel(uint32_t nnz, uint32_t nrow, float *nzval, uint32_t *colind, uint32_t *rowptr, + uint32_t firstentry, uint32_t elemsize, + float *vecin, uint32_t nx_in, uint32_t elemsize1, float * vecout, uint32_t nx_out, uint32_t elemsize2) +{ + /* only one dimension is used here */ + unsigned nthreads = gridDim.x*blockDim.x; + unsigned threadid = threadIdx.x + blockIdx.x*blockDim.x; + + unsigned rowstart = threadid * ((nrow + (nthreads - 1))/nthreads); + unsigned rowend = MIN(nrow, (threadid+1) * ((nrow + (nthreads - 1))/nthreads)); + + unsigned row; + for (row = rowstart; row < rowend; row++) + { + float tmp = 0.0f; + unsigned index; + + unsigned firstindex = rowptr[row] - firstentry; + unsigned lastindex = rowptr[row+1] - firstentry; + + for (index = firstindex; index < lastindex; index++) + { + tmp += nzval[index]*vecin[colind[index]]; + } + + vecout[row] = tmp; + } +} + +extern "C" __global__ void spmv_kernel_3(uint32_t nnz, uint32_t nrow, float *nzval, uint32_t *colind, uint32_t *rowptr, + uint32_t firstentry, + float *vecin, uint32_t nx_in, float * vecout, uint32_t nx_out) +{ + /* only one dimension is used here */ + unsigned block_rowstart = blockIdx.x*( (nrow + gridDim.x - 1)/gridDim.x ); + unsigned block_rowend = MIN((blockIdx.x+1)*( (nrow + gridDim.x - 1)/gridDim.x ), nrow); + + unsigned row; + for (row = block_rowstart + threadIdx.x; row < block_rowend; row+=blockDim.x) + { + float tmp = 0.0f; + unsigned index; + + unsigned firstindex = rowptr[row] - firstentry; + unsigned lastindex = rowptr[row+1] - firstentry; + + for (index = firstindex; index < lastindex; index++) + { + tmp += nzval[index]*vecin[colind[index]]; + } + + vecout[row] = tmp; + } + + +} + +extern "C" void spmv_kernel_cuda(void *descr[], void *args) +{ + uint32_t nnz = STARPU_CSR_GET_NNZ(descr[0]); + uint32_t nrow = STARPU_CSR_GET_NROW(descr[0]); + float *nzval = (float *)STARPU_CSR_GET_NZVAL(descr[0]); + uint32_t *colind = STARPU_CSR_GET_COLIND(descr[0]); + uint32_t *rowptr = STARPU_CSR_GET_ROWPTR(descr[0]); + uint32_t firstentry = STARPU_CSR_GET_FIRSTENTRY(descr[0]); + + float *vecin = (float *)STARPU_VECTOR_GET_PTR(descr[1]); + uint32_t nx_in = STARPU_VECTOR_GET_NX(descr[1]); + + float *vecout = (float *)STARPU_VECTOR_GET_PTR(descr[2]); + uint32_t nx_out = STARPU_VECTOR_GET_NX(descr[2]); + + dim3 dimBlock(8, 1); + dim3 dimGrid(512, 1); + + spmv_kernel_3<<>> + (nnz, nrow, nzval, colind, rowptr, firstentry, vecin, nx_in, vecout, nx_out); + cudaError_t status = cudaGetLastError(); + if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status); +} diff --git a/examples/spmv/spmv_kernels.c b/examples/spmv/spmv_kernels.c new file mode 100644 index 0000000..ed262b7 --- /dev/null +++ b/examples/spmv/spmv_kernels.c @@ -0,0 +1,142 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2010-2010 Mehdi Juhoor + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* OpenCL codelet for SPMV */ + +#include "spmv.h" + +#ifdef STARPU_USE_OPENCL +struct starpu_opencl_program opencl_codelet; + +void spmv_kernel_opencl(void *descr[], void *args) +{ + cl_kernel kernel; + cl_command_queue queue; + int id, devid, err, n; + (void)args; + + int nnz = (int) STARPU_CSR_GET_NNZ(descr[0]); + int nrow = (int) STARPU_CSR_GET_NROW(descr[0]); + cl_mem nzval = (cl_mem)STARPU_CSR_GET_NZVAL(descr[0]); + cl_mem colind = (cl_mem)STARPU_CSR_GET_COLIND(descr[0]); + cl_mem rowptr = (cl_mem)STARPU_CSR_GET_ROWPTR(descr[0]); + int firstentry = STARPU_CSR_GET_FIRSTENTRY(descr[0]); + + cl_mem vecin = (cl_mem)STARPU_VECTOR_GET_DEV_HANDLE(descr[1]); + int nx_in = (int)STARPU_VECTOR_GET_NX(descr[1]); + + cl_mem vecout = (cl_mem)STARPU_VECTOR_GET_DEV_HANDLE(descr[2]); + int nx_out = (int)STARPU_VECTOR_GET_NX(descr[2]); + + id = starpu_worker_get_id_check(); + devid = starpu_worker_get_devid(id); + + err = starpu_opencl_load_kernel(&kernel, &queue, &opencl_codelet, "spmv", devid); + if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); + + n=0; + err = clSetKernelArg(kernel, n++, sizeof(nnz), &nnz); + if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); + + err = clSetKernelArg(kernel, n++, sizeof(nrow), &nrow); + if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); + + err = clSetKernelArg(kernel, n++, sizeof(nzval), &nzval); + if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); + + err = clSetKernelArg(kernel, n++, sizeof(colind), &colind); + if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); + + err = clSetKernelArg(kernel, n++, sizeof(rowptr), &rowptr); + if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); + + err = clSetKernelArg(kernel, n++, sizeof(firstentry), &firstentry); + if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); + + err = clSetKernelArg(kernel, n++, sizeof(vecin), &vecin); + if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); + + err = clSetKernelArg(kernel, n++, sizeof(nx_in), &nx_in); + if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); + + err = clSetKernelArg(kernel, n++, sizeof(vecout), &vecout); + if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); + + err = clSetKernelArg(kernel, n++, sizeof(nx_out), &nx_out); + if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); + + { + size_t global=nrow; + err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, NULL, 0, NULL, NULL); + if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); + } + starpu_opencl_release_kernel(kernel); +} + +void compile_spmv_opencl_kernel(void) +{ + int ret; + ret = starpu_opencl_load_opencl_from_file("examples/spmv/spmv_opencl.cl", &opencl_codelet, NULL); + if (ret) + { + FPRINTF(stderr, "Failed to compile OpenCL codelet\n"); + exit(ret); + } +} +#endif + +void spmv_kernel_cpu(void *descr[], void *arg) +{ + (void)arg; + float *nzval = (float *)STARPU_CSR_GET_NZVAL(descr[0]); + uint32_t *colind = STARPU_CSR_GET_COLIND(descr[0]); + uint32_t *rowptr = STARPU_CSR_GET_ROWPTR(descr[0]); + + float *vecin = (float *)STARPU_VECTOR_GET_PTR(descr[1]); + float *vecout = (float *)STARPU_VECTOR_GET_PTR(descr[2]); + + uint32_t firstelem = STARPU_CSR_GET_FIRSTENTRY(descr[0]); + + uint32_t nrow; + + nrow = STARPU_CSR_GET_NROW(descr[0]); + + STARPU_ASSERT(nrow == STARPU_VECTOR_GET_NX(descr[2])); + + unsigned row; + for (row = 0; row < nrow; row++) + { + float tmp = 0.0f; + unsigned index; + + unsigned firstindex = rowptr[row] - firstelem; + unsigned lastindex = rowptr[row+1] - firstelem; + + for (index = firstindex; index < lastindex; index++) + { + unsigned col; + + col = colind[index]; + tmp += nzval[index]*vecin[col]; + } + + vecout[row] = tmp; + } + +} + + diff --git a/examples/spmv/spmv_opencl.cl b/examples/spmv/spmv_opencl.cl new file mode 100644 index 0000000..da5d589 --- /dev/null +++ b/examples/spmv/spmv_opencl.cl @@ -0,0 +1,44 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* OpenCL kernel for SPMV */ + +__kernel void spmv(int nnz, int nrow, + __global float* nzval, __global unsigned* colind, + __global unsigned* rowptr, int firstentry, + __global float *vecin, int nx_in, + __global float *vecout, int nx_out) +{ + const int row = get_global_id(0); + if (row < nrow) + { + float tmp = 0.0f; + unsigned index; + + unsigned firstindex = rowptr[row] - firstentry; + unsigned lastindex = rowptr[row+1] - firstentry; + + for (index = firstindex; index < lastindex; index++) + { + unsigned col; + + col = colind[index]; + tmp += nzval[index]*vecin[col]; + } + + vecout[row] = tmp; + } +} diff --git a/examples/stencil/0.5.out b/examples/stencil/0.5.out new file mode 100644 index 0000000..c86507a --- /dev/null +++ b/examples/stencil/0.5.out @@ -0,0 +1,79 @@ +| 0 0 2 0 2 1 2 1 0 1 2 1 0 1 0 1 0 0 1 0 2 0 2 1 0 1 1 0 2 0 2 1 0 2 0 0 1 0 2 2 0 1 0 1 2 1 2 1 0 0 2 0 1 0 0 1 0 1 2 0 2 2 1 0 +| 0 0 0 2 2 2 1 2 1 1 1 1 1 0 1 0 0 0 0 1 0 2 1 1 1 1 1 2 0 2 2 2 0 2 0 0 0 0 2 2 0 0 0 2 2 2 2 0 0 0 0 2 0 0 0 0 1 2 2 2 2 2 2 0 +| * 0 0 2 2 2 2 * 1 1 1 1 * * * * 0 0 * * * * * 1 1 * * * * * * * * 0 * * * 2 * * * * * * * * * * * * * * * * * * * * * * * * * * +| 0 * * * * * * 1 * * * * 1 1 0 0 * * 0 1 1 1 1 * * 1 1 1 2 2 2 2 2 * 0 0 0 * 2 2 0 0 0 2 2 2 2 0 0 0 0 0 0 0 0 1 1 1 2 2 2 2 2 0 +| 0 1 2 2 2 2 2 1 1 1 1 1 1 1 0 0 0 1 0 1 1 1 1 1 1 1 1 1 2 2 2 2 2 0 0 0 0 0 2 2 0 0 2 2 2 2 2 0 0 0 0 0 0 0 0 1 1 1 2 2 2 2 0 0 +| 0 2 2 2 2 2 2 2 1 1 1 1 1 1 0 0 0 0 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 0 0 0 0 0 0 2 0 0 2 2 2 2 2 0 0 0 0 0 0 0 0 1 1 1 2 2 2 0 0 0 +| 0 2 2 2 2 2 2 2 2 1 1 1 1 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 0 0 0 0 0 0 0 0 2 2 2 2 2 2 0 0 0 0 0 0 0 1 1 1 1 2 2 2 0 0 0 +| 0 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 0 0 2 1 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 0 0 0 0 0 0 0 2 2 2 2 2 0 0 0 0 0 0 0 2 1 1 0 2 2 2 0 0 0 +| 0 2 2 2 2 * * * * * 0 1 * * * * * 2 * * * * * * * * * 2 2 2 2 * * * * * * * * * 0 0 2 2 2 2 2 * * * 0 0 * 2 * * * * 2 2 2 * * * +| * * * * * 0 0 0 0 0 * * 1 1 1 1 1 * 2 1 0 1 2 1 1 2 2 * * * * 2 1 1 1 1 0 0 0 0 * * * 2 2 * * 0 0 0 * * 0 * 2 1 1 2 * * * 2 1 0 +| 2 2 2 2 2 0 0 0 0 0 1 1 1 1 1 0 0 2 2 1 1 1 1 1 1 2 2 2 2 2 2 0 1 1 0 1 0 0 0 0 1 2 2 * * 2 2 2 0 2 1 1 0 0 2 2 1 2 2 2 2 2 1 0 +| 2 2 0 2 0 0 0 0 0 1 0 0 1 1 1 0 0 2 2 1 1 1 1 1 1 2 2 2 2 2 2 0 0 0 0 0 0 0 0 0 0 2 2 2 0 0 2 2 2 1 1 0 0 0 0 1 1 0 2 2 2 2 2 1 +| 2 2 0 0 0 0 0 0 1 0 0 0 1 1 1 0 0 2 1 1 1 1 1 1 1 1 2 2 2 2 2 2 0 1 2 0 0 0 0 1 2 2 2 2 2 0 1 2 1 1 1 0 0 0 0 1 1 0 2 2 2 2 2 2 +| * 0 0 0 0 0 0 0 0 0 0 * * * * * 0 * * * * * * * * * * 2 2 2 2 0 0 2 2 0 0 0 0 1 2 2 2 2 2 0 2 1 1 1 1 0 0 0 * * * * * * 2 2 2 * +| 0 * 0 0 0 0 0 0 * * * 0 1 1 1 0 * 1 1 1 1 1 1 1 2 2 2 * * * * * * * * * 0 0 * * * * * * * * * * * * * * * * 0 1 1 0 2 2 * * * 2 +| 0 0 * * * * * * 0 0 0 0 1 1 1 0 0 1 1 2 1 1 1 1 2 2 2 2 2 2 2 0 0 2 2 0 * * 0 0 2 2 2 2 2 0 1 1 1 1 1 0 0 1 1 1 1 0 0 2 1 2 2 2 +| 0 0 0 0 0 0 0 0 0 0 0 0 1 1 2 0 0 2 2 0 1 1 1 2 2 2 2 2 2 2 2 0 0 2 2 1 0 2 0 2 0 2 2 2 2 0 1 1 1 1 1 0 1 1 1 1 1 0 0 1 1 2 2 2 +| 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 2 2 2 2 0 1 1 2 2 0 2 1 1 2 0 0 0 2 2 0 0 0 2 2 1 2 2 2 2 0 1 1 1 1 0 1 1 1 1 1 2 0 0 1 1 2 2 2 +| 1 0 0 0 0 0 0 0 0 0 0 * * * * * * 0 0 * * * * * * * 0 0 1 1 0 0 2 0 0 0 0 0 2 2 2 2 2 2 2 0 1 1 1 1 0 * * * * 2 2 0 * 1 1 2 2 2 +| * * * * * 0 0 2 0 * * 0 1 1 1 1 0 * * 0 2 2 2 2 2 0 * * * * * * * * 2 0 0 0 * * * * 2 2 * * * * * * * 1 1 1 1 * * * 0 * * * * * +| 2 0 0 0 0 * * * * 0 0 0 1 1 1 1 0 2 2 0 2 2 2 2 2 0 0 0 1 1 0 0 0 1 * * * * 2 2 2 2 * * 2 2 1 1 1 1 1 1 1 1 1 2 2 0 0 1 1 2 2 2 +| 0 0 0 0 0 0 0 0 0 0 0 0 2 1 1 1 1 0 2 0 2 2 2 2 2 0 0 2 1 1 0 0 0 2 2 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 2 0 0 0 1 1 1 1 1 +| 0 1 0 0 0 0 0 0 0 0 0 0 2 1 1 1 1 0 0 0 2 2 2 2 2 0 0 2 1 1 1 0 0 2 2 2 0 0 2 2 2 2 2 2 2 1 2 1 1 1 1 1 1 1 1 2 0 0 0 1 1 1 1 1 +| 2 0 0 0 0 0 0 0 0 2 0 0 0 1 1 1 1 0 0 2 2 2 * * * * 0 2 2 1 2 0 0 0 2 0 0 0 0 2 2 2 2 2 1 1 2 1 1 1 1 1 1 1 * * * 2 0 1 0 1 1 1 +| * * 0 0 0 0 2 0 * * * * * * 1 1 * * * * * * 2 2 0 0 * * * * * * 0 0 0 1 0 0 0 0 2 * * * * * * * 1 1 1 1 * * 1 2 0 * * * * * 1 * +| 2 0 * * * * * * 2 2 2 0 0 1 * * 1 0 2 2 2 2 2 2 0 0 2 2 2 1 0 0 * * * * * * * * * 2 2 2 1 1 1 1 * * * * 1 1 1 0 0 0 0 0 0 1 * 1 +| 2 0 0 1 0 0 2 0 2 2 2 0 0 1 1 1 1 1 2 2 2 2 2 2 0 0 2 2 2 2 0 0 0 0 0 2 1 2 0 0 0 2 2 2 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 1 1 1 1 +| 2 0 0 1 0 0 0 2 2 2 2 0 0 1 1 1 1 1 2 2 2 2 2 2 0 0 2 2 2 2 0 0 0 0 0 2 2 2 0 0 0 2 2 2 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 1 1 1 1 +| 1 0 0 1 0 0 0 2 2 2 2 0 0 1 1 1 1 1 2 2 2 2 2 2 0 0 2 2 2 2 0 0 0 0 2 2 2 0 0 0 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 1 1 1 +| 1 0 0 1 0 0 0 2 2 2 2 0 0 1 1 1 1 1 2 2 2 2 2 2 0 0 2 2 2 2 0 0 0 0 2 2 2 0 0 0 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 1 1 1 +| 1 0 0 0 0 0 0 2 2 2 0 * * * * * * * * 2 2 * * * * * * * 2 2 2 0 0 0 2 2 2 0 0 0 2 2 2 2 1 1 1 1 1 1 * * * * * * * * 1 1 1 1 1 1 +| * * * * * * * * * * * 0 0 0 1 1 1 1 2 * * 2 2 2 0 0 2 2 * * * * * * * * * * * * * * * * * * * * * * 1 1 1 1 1 1 0 0 * * * * * * +| 0 0 0 0 0 0 0 2 2 2 1 2 1 2 1 1 1 1 1 2 2 1 1 2 0 2 2 2 2 2 0 0 0 0 0 2 2 2 0 2 2 2 2 2 0 0 0 1 1 1 1 1 1 1 1 0 0 2 0 1 1 1 1 2 +| 0 0 0 0 0 0 2 2 2 2 2 1 2 2 1 1 1 1 0 1 0 1 0 2 2 2 2 0 2 2 2 0 1 0 2 2 2 2 2 2 2 2 2 0 0 0 0 1 1 1 1 1 1 1 1 0 2 1 0 1 0 0 2 2 +| 0 0 0 0 0 0 2 2 2 2 2 1 0 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 2 2 0 0 0 2 2 2 2 2 2 2 1 2 2 0 0 1 1 1 1 1 1 1 1 1 1 0 1 0 0 0 0 2 2 +| 0 0 0 0 0 2 2 0 * * * * * * * * * 1 * 0 0 0 * * * * * * * * * 0 0 * * 1 1 1 2 2 2 1 1 2 0 0 2 1 * * 1 1 1 * * * * * * * 0 0 1 2 +| * * * * * * * * 2 2 2 1 0 0 2 0 2 * 2 * * * 0 0 0 0 0 0 2 2 2 * * 2 2 * * * * * * * * * * * * * 1 1 * * * 1 1 1 1 1 1 0 * * * * +| 0 0 1 0 0 0 0 0 2 2 2 1 0 2 0 2 1 2 2 0 0 0 0 0 0 0 0 2 2 2 2 0 0 2 2 1 1 1 2 2 2 1 2 1 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 2 2 +| 2 0 0 0 0 0 0 0 2 2 1 1 2 0 2 1 2 2 2 0 0 0 0 0 0 0 0 2 2 2 2 0 2 2 2 1 1 1 2 2 2 2 1 1 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 0 0 2 2 2 +| 0 0 0 0 0 0 0 2 2 2 1 1 2 2 2 1 2 2 2 1 0 0 0 0 0 0 2 2 2 2 2 0 0 2 1 1 1 1 2 2 2 2 1 1 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 +| 0 0 0 2 0 2 0 * * * * * * * * 1 * 2 2 0 0 0 2 0 0 0 2 2 0 2 2 1 2 1 0 1 1 1 2 2 2 2 0 1 1 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 +| * * * * * * * 2 2 0 1 0 2 2 2 * 2 * * * * * * * * * * * * * * * * * * * * * * * * * * 1 * * * * * * 1 1 * * * * * * * * * * * * +| 0 1 0 0 0 2 0 2 2 0 1 1 2 2 2 2 2 2 2 0 0 0 0 0 0 0 2 0 0 2 2 2 2 0 1 1 1 1 1 2 2 2 0 * 2 2 2 0 1 0 * * 1 1 1 1 1 1 1 0 0 0 0 0 +| 0 1 1 0 0 2 2 2 2 1 1 1 2 2 2 2 2 2 2 0 0 0 0 0 0 0 0 0 0 2 2 2 2 0 0 1 1 1 1 2 2 2 2 2 2 2 2 1 2 1 0 1 1 1 1 1 1 1 1 1 1 0 0 0 +| 0 1 1 0 0 2 2 2 2 2 2 1 2 2 2 2 2 2 2 0 0 0 0 0 0 0 0 2 1 1 1 0 1 0 0 0 1 1 1 2 2 2 2 2 2 2 2 2 2 1 0 0 0 0 1 1 0 1 1 1 1 2 1 0 +| 1 1 1 0 0 2 2 2 2 2 2 * * * 2 2 2 2 2 0 0 0 0 0 2 0 1 0 1 1 1 1 0 0 0 0 1 1 1 1 2 2 2 2 2 2 2 2 2 2 0 0 0 0 0 0 0 1 1 1 1 2 1 1 +| * * * * * * * * * * * 2 2 2 * * * * * * 0 0 0 * * * * * * * * * * * * * * * * * * 2 2 2 2 1 0 0 2 0 0 0 0 0 0 0 * * * * * * * * +| 1 2 1 0 0 2 2 1 1 2 2 2 2 2 0 1 2 2 1 0 * * * 0 0 2 0 1 1 1 1 1 0 0 0 0 0 1 1 2 2 * * * * * * * * * * * 0 0 * * 0 2 1 1 1 2 1 1 +| 1 2 0 0 0 1 1 1 1 2 2 2 2 2 2 0 2 2 0 0 1 1 0 0 0 0 0 1 1 1 1 1 0 0 0 2 2 2 2 2 2 2 2 2 2 1 0 0 0 0 0 0 * * 0 1 0 0 1 1 1 2 1 1 +| 1 2 0 0 0 1 1 1 1 1 1 2 2 2 2 2 0 2 0 0 1 1 0 0 0 0 0 1 1 1 1 1 1 0 0 2 2 2 2 2 2 2 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 2 1 +| 1 2 2 0 0 1 1 1 1 1 1 2 2 2 2 2 2 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 0 0 0 2 2 2 2 2 2 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 2 2 1 +| 1 2 2 0 * 1 1 1 1 1 1 2 2 2 2 2 2 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 0 0 0 2 2 2 2 2 2 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 2 2 2 +| * * * * 0 * * * * * * * * * 2 * * * * * * * * * * * * * * * * * * * * * 2 2 2 2 2 2 2 2 2 2 0 0 0 0 2 0 0 0 * * * * * * * * * * +| 2 2 2 0 0 1 1 1 1 1 1 2 2 2 * 2 2 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 0 0 * * * * * * 2 * * 2 * * * 0 * * * * 1 0 0 1 1 1 1 2 2 2 +| 2 2 2 0 0 1 1 1 1 1 1 2 2 2 2 2 2 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 0 0 2 2 2 2 2 2 * 2 2 * 0 0 0 * 0 0 0 1 0 0 0 1 1 1 1 2 2 2 +| 2 2 2 0 0 1 1 1 1 1 1 2 2 2 2 2 0 0 0 0 0 2 2 0 0 0 0 1 1 1 1 1 1 1 1 0 2 2 2 2 2 2 0 2 2 2 2 0 0 0 0 0 1 1 1 1 0 1 1 1 1 2 2 2 +| 2 2 0 0 0 1 1 1 1 1 1 2 2 2 2 2 0 0 0 0 0 2 2 2 2 2 2 2 1 2 1 1 1 1 1 1 2 2 2 2 2 2 2 0 2 2 0 0 0 0 0 1 1 1 1 0 0 0 1 1 1 2 2 0 +| 2 * 0 0 0 1 1 1 1 1 0 1 2 2 2 2 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 2 2 2 2 0 0 0 0 2 2 0 0 2 0 1 1 1 1 1 0 0 0 1 1 1 2 0 0 +| * 2 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 0 0 0 2 0 0 0 0 0 1 1 * * * * * * * * * 0 * * +| 2 2 0 0 0 2 1 1 1 1 1 1 2 2 2 2 0 0 0 0 0 2 2 2 2 2 2 2 0 2 2 1 1 1 1 1 2 2 2 2 0 * 0 * * * * * * * * * 1 1 1 0 0 0 1 1 2 * 2 0 +| 2 2 0 0 0 0 1 1 1 1 1 1 2 2 2 2 0 0 0 0 0 2 2 2 2 2 2 2 2 0 1 1 1 1 1 1 2 1 2 0 0 0 * 0 0 0 0 0 0 0 1 1 1 1 1 0 0 0 1 1 2 1 0 1 +| 2 0 0 0 0 0 1 1 1 2 2 2 2 2 2 2 0 0 0 0 2 2 2 2 1 2 1 0 0 0 1 1 1 1 1 1 1 1 0 2 0 0 0 0 0 0 0 0 0 0 1 1 2 2 2 0 0 0 1 1 1 0 0 0 +| * * 0 0 0 0 2 1 1 2 2 1 2 2 2 2 0 0 0 0 2 2 2 1 1 1 1 0 0 0 1 2 1 1 2 1 1 1 2 0 0 0 0 0 0 0 0 0 0 0 1 1 2 2 2 0 0 0 1 2 1 0 0 0 +| 2 1 * * * * * * * * * * * * * * * * * * * * * * * * * * 0 * * * * * * * * * * 0 * 0 1 0 0 0 0 0 0 0 0 1 2 2 2 2 * * * * * * * * +| 2 1 0 0 0 2 2 1 1 2 1 2 2 2 2 2 0 0 0 0 0 2 1 1 1 1 1 0 * 1 1 2 2 0 1 0 1 0 1 * 2 * * * 0 0 0 0 0 * * * * * * * 1 2 0 2 1 0 0 0 +| 2 1 0 0 0 0 2 1 1 2 0 2 2 2 2 2 0 0 0 0 0 2 1 1 1 1 1 0 0 1 1 2 2 1 0 0 1 1 1 1 2 1 0 0 * * * * * 0 0 2 2 2 2 1 1 0 0 2 2 0 0 0 +| 2 1 0 0 0 0 2 1 1 2 0 0 2 1 2 2 0 0 0 0 0 0 1 1 1 1 0 0 0 1 1 1 1 1 0 0 1 1 1 1 2 2 0 0 0 2 0 2 0 0 0 2 2 2 2 2 2 0 2 2 2 0 0 0 +| 0 0 * 0 0 0 0 1 1 1 1 2 2 2 2 2 0 0 0 0 0 0 1 1 1 1 0 0 0 1 2 2 1 1 0 0 1 1 1 1 2 2 0 0 0 2 1 2 0 0 0 2 2 2 2 2 2 2 2 2 2 0 0 0 +| * * 0 * 0 * * * * * * * * * * * * * 0 0 0 * * * * * * 0 0 * 2 2 * * 0 * * * * * * * 0 0 0 2 1 1 0 0 0 2 2 2 2 2 * * * * * * * * +| 0 0 0 0 * 0 0 1 1 1 1 2 1 1 2 1 2 0 * * * 0 1 1 1 1 0 * * 1 * * 2 1 * 0 1 1 1 0 1 2 * * * * * * * * * * * * * * 2 2 2 2 2 0 2 1 +| 0 0 0 0 0 0 0 1 1 1 1 2 1 2 1 1 2 2 0 0 1 1 1 1 1 1 1 0 0 0 2 2 2 2 0 0 0 2 0 0 2 0 0 0 0 2 2 1 0 0 0 1 2 2 2 2 2 2 2 2 2 2 0 1 +| 1 0 0 0 0 0 1 1 1 1 1 0 0 0 1 2 2 1 2 1 2 1 0 1 1 0 1 0 0 0 2 2 2 2 0 2 2 0 0 0 0 0 0 0 0 2 2 0 0 0 0 1 2 2 2 2 1 1 2 2 2 2 0 1 +| * * 0 0 0 1 * * * * * * 0 0 0 0 0 0 1 1 1 1 1 1 1 1 * 0 0 2 2 2 2 2 2 2 2 0 0 2 2 1 1 0 0 2 2 2 2 2 0 1 2 2 2 1 1 1 2 2 2 2 * * +| 1 0 * * * * 0 2 1 1 1 0 * * * * * * * * * * * * * * 0 * * * * * * * * * * * * * * * * * 0 2 2 2 2 0 0 * * 2 1 1 * * * * * * 2 1 +| 1 0 1 1 1 1 0 2 1 1 0 0 0 0 0 0 0 0 1 1 1 2 0 1 1 1 0 0 0 1 2 2 2 2 2 0 2 2 2 2 1 1 1 2 * * * * * * * 0 0 * * * 1 1 2 2 2 0 0 1 +| 1 1 0 1 1 1 0 1 1 1 0 0 0 0 0 0 0 0 2 2 2 2 0 0 1 1 0 0 0 0 2 2 2 2 0 0 2 2 2 2 1 1 2 2 0 2 2 2 2 0 0 0 0 1 1 1 1 2 2 2 2 0 0 1 +| 2 1 1 1 2 2 1 1 1 1 0 0 0 0 0 0 0 0 2 0 2 2 0 0 0 0 0 0 0 0 2 2 1 1 0 0 0 2 2 2 1 1 2 0 2 0 2 2 2 0 0 0 0 1 1 1 1 1 1 2 2 0 2 1 +| 1 1 1 2 2 2 1 1 1 1 0 1 0 2 2 0 0 0 0 2 2 2 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 2 2 2 1 1 1 0 0 0 0 2 2 0 0 0 0 1 1 1 1 1 2 2 0 2 2 2 +| diff --git a/examples/stencil/0.out b/examples/stencil/0.out new file mode 100644 index 0000000..a119f4a --- /dev/null +++ b/examples/stencil/0.out @@ -0,0 +1,94 @@ +| 0 0 2 1 0 1 1 1 0 1 1 0 1 1 0 0 1 2 0 2 0 0 1 0 2 1 0 2 1 0 2 1 2 0 0 1 0 1 2 0 1 2 2 0 0 1 2 0 1 0 1 0 0 1 2 0 2 0 0 1 2 1 0 2 +| * * 2 0 * 0 * 0 * * 0 * * * * * * 0 * * 0 * * 0 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 0 * * * * * * * +| 0 0 * * 1 * 1 * 2 2 * 0 2 0 1 1 2 * 1 2 * 1 2 * 2 0 1 2 0 1 0 1 2 1 0 1 2 2 2 0 0 0 1 2 0 0 0 0 1 1 0 2 0 2 1 0 * 2 0 2 2 0 1 0 +| 0 2 1 2 1 1 1 0 2 1 1 2 0 1 2 0 1 1 0 1 2 0 1 1 0 2 0 1 1 0 2 0 1 2 0 0 1 0 0 0 0 1 2 0 1 2 0 0 0 0 1 0 0 0 2 1 0 1 0 0 0 1 2 2 +| * * * 1 * 0 2 1 1 * 0 0 0 0 * * * * 0 * 0 * 0 * * 1 * * * * * * * * * * * * 0 0 * * * * 0 * * * * * * * 0 * * * * * * 0 0 * * * +| 1 0 1 * 2 * * * * 2 * * * * 1 1 0 2 * 2 * 1 * 1 0 * 2 0 0 0 1 0 0 2 2 0 2 2 * * 2 1 0 1 * 0 0 2 1 2 1 0 * 0 1 0 1 0 2 * * 2 1 0 +| 0 0 2 1 1 0 0 2 2 0 1 0 2 1 2 0 1 0 1 1 0 2 2 1 0 0 2 0 1 0 2 2 0 1 2 0 0 0 2 0 2 1 2 0 2 0 1 0 0 1 0 0 2 0 2 0 0 0 2 0 0 2 2 1 +| * 1 1 2 * 1 0 0 2 2 0 0 0 2 0 0 2 0 * * 0 * * * * 1 0 * 0 * * 0 0 * * * * * * 2 * * * * 0 * * 0 * * * * 0 * * * * * * * * * * * +| 1 * * * 1 * * * * * * * * * * * * * 2 1 * 2 2 1 2 * * 2 * 0 0 * * 1 0 0 1 2 0 * 2 0 2 1 * 2 1 * 0 2 0 1 * 0 0 1 0 1 2 0 1 0 2 1 +| 1 2 1 2 1 1 0 1 0 0 0 2 0 0 1 1 0 0 2 0 2 0 0 0 1 1 2 0 1 2 1 1 0 0 0 0 2 1 0 0 0 1 2 0 2 2 2 0 0 2 0 1 2 0 2 0 1 1 0 1 0 0 0 1 +| * * 0 0 1 1 1 2 1 0 0 1 0 2 2 0 1 0 0 0 * * 0 * * * 0 0 1 2 0 2 1 2 * * * 1 * * 0 * * 0 * * 0 2 * * 0 1 0 1 2 0 0 0 1 * * 2 * * +| 1 0 * * * * * * 0 2 * * * * * * * * * * 2 1 * 2 1 0 * * * * * * * * 0 2 0 * 1 2 * 0 1 * 2 1 * * 1 2 * * * * * * * * * 2 1 * 1 2 +| 0 1 1 1 2 1 0 0 * * 0 2 0 2 1 0 0 0 1 0 2 1 1 0 2 1 2 2 0 2 1 1 0 2 2 0 2 0 2 0 1 0 0 0 0 0 2 0 1 0 2 0 0 2 1 0 2 1 2 0 1 1 0 0 +| 0 * * 1 2 1 2 1 2 0 0 0 1 1 0 2 0 2 0 0 0 * * 0 1 0 2 0 2 0 0 1 1 1 1 0 0 * * * 2 0 0 0 * * * * 2 0 0 * 0 0 1 2 0 0 2 2 2 * * * +| * 0 2 * * * * 0 * 0 0 2 * * * * * * * * * 2 1 * * * * * * * * * * * * * * 2 0 2 * * * * 1 1 0 2 * * * 1 * * * * * * * * * 1 2 1 +| 1 0 2 0 1 2 2 * 2 * * * 0 0 0 2 0 0 1 0 0 0 0 2 2 1 0 0 2 1 1 1 0 2 0 1 1 0 0 2 1 0 2 0 0 0 0 1 0 1 2 1 0 0 0 1 1 0 0 2 2 2 2 0 +| 0 0 2 1 0 2 0 0 2 1 0 2 0 1 0 0 1 0 2 1 1 1 0 0 2 0 1 2 2 1 0 2 0 1 0 1 2 0 1 0 2 1 2 1 0 2 0 2 0 2 2 0 1 2 0 0 1 0 2 0 0 0 2 1 +| * * * * * * * 0 2 2 0 2 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * +| 1 0 2 0 1 2 0 * * * * * 0 1 1 0 0 0 0 1 0 0 0 2 1 0 2 1 2 0 1 1 1 0 2 0 0 1 0 0 2 0 0 0 0 2 2 1 0 0 0 2 1 0 0 2 2 2 2 2 1 2 0 2 +| 0 1 2 2 0 1 0 2 0 1 1 0 2 0 1 1 1 2 0 2 0 0 1 0 1 1 0 1 0 2 1 1 2 1 0 0 2 1 0 0 0 2 0 0 2 1 2 0 1 0 0 0 0 1 0 1 0 2 2 0 2 0 0 0 +| * * * * * * * * 0 0 1 1 * * 0 * * * * * * * * * * * * 0 * * * * * * * * * * * * * * * * * * * * * * * * * * * 1 * * * * * * * * +| 2 0 1 2 0 1 2 2 * * * * 2 2 * 1 0 2 2 0 1 1 0 0 2 0 1 * 0 0 0 0 2 0 2 0 1 0 0 1 0 1 0 2 0 1 1 2 0 1 0 2 0 1 2 * 1 0 0 2 1 1 1 0 +| 2 1 0 0 2 0 2 2 2 0 1 2 0 0 2 0 1 0 0 1 1 0 2 0 2 1 2 0 1 0 0 0 0 1 1 2 2 2 2 2 0 0 0 2 0 2 0 1 1 1 2 1 0 1 0 0 0 0 2 2 0 1 0 1 +| * * * * 0 * * * * * 0 * 0 0 * * * * * * * * * * * 2 2 0 2 0 0 * * * * * * * * * * * * * * * * * * * * * * * * * * 0 2 * * * * * +| 2 0 2 0 * 2 0 2 2 1 * 2 * * 1 1 1 0 0 2 1 1 1 2 1 * * * * * * 1 0 0 0 2 1 0 0 0 0 0 0 2 2 1 0 2 0 0 1 2 1 2 1 1 2 * * 0 2 2 0 0 +| 0 2 0 0 0 0 0 0 2 2 2 2 0 0 1 1 2 0 0 0 0 0 0 0 2 1 0 1 1 0 0 2 1 1 2 0 2 2 2 0 1 2 0 2 1 2 1 2 0 2 1 2 1 1 0 1 2 0 0 0 0 2 0 0 +| * * * * * 0 0 * 1 * * 2 0 * * * 0 0 * 0 0 0 * 0 * 1 0 2 2 0 0 * * * 0 1 * 0 * 0 * * * * 0 * 0 1 * * * * * * * * * 2 0 * * 1 * * +| 0 0 0 1 0 * * 2 * 2 1 * * 2 1 2 * * 1 * * * 1 * 1 * * * * * * 0 2 0 * * 2 * 1 * 2 0 1 2 * 2 * * 1 2 1 0 1 2 0 1 0 * * 0 0 * 2 0 +| 1 2 1 0 1 0 1 2 0 0 1 0 1 0 2 0 1 1 1 1 0 0 2 1 0 1 2 1 2 0 0 1 1 0 1 0 0 0 2 1 0 0 2 0 2 1 0 0 1 1 1 2 0 2 0 0 1 2 2 1 1 0 0 2 +| * 2 * * * * 1 1 2 2 0 * * * * * * * 0 0 1 0 0 0 2 1 0 1 0 2 0 * * * 0 0 0 2 0 2 0 * 0 * 2 1 1 0 0 1 0 * * * * * * * * 0 1 1 * * +| 0 * 2 0 2 0 * * * * * 0 1 0 2 1 2 1 * * * * * * * * * * * * * 2 0 1 * * * * * * * 1 * 2 * * * * * * * 2 2 1 2 1 0 0 0 * * * 0 1 +| 1 2 2 0 1 2 1 2 0 0 0 0 1 0 0 1 0 0 1 2 2 1 0 0 0 2 2 1 1 0 1 2 0 2 0 1 2 2 0 2 0 0 0 1 0 2 0 0 0 0 0 2 1 1 2 1 1 0 1 1 0 2 2 0 +| 1 * 0 0 2 1 1 2 2 0 1 0 1 0 0 * 0 2 0 1 2 0 * * * * 0 0 2 0 * 0 2 1 2 0 0 1 1 0 1 1 0 * 0 2 1 0 0 2 1 0 * * * * * 2 0 0 0 0 1 1 +| * 2 * * * * * * * * * * * * * 2 * * * * * * 2 1 0 1 * * * * 2 * * * * * * * * * * * * 2 * * * * * * * * 0 0 1 0 0 * * * * * * * +| 0 2 0 0 1 0 2 2 0 0 1 0 0 0 1 1 0 2 0 0 0 2 2 0 1 0 1 1 2 1 0 2 2 1 0 1 2 1 0 1 2 1 0 1 0 0 2 1 1 0 2 1 0 0 0 2 2 0 0 1 0 0 2 1 +| 2 2 0 2 1 0 1 2 1 2 0 1 0 * 2 0 0 0 0 1 0 2 0 2 0 1 1 2 1 0 2 0 1 0 2 2 0 0 2 2 1 1 0 0 2 2 0 1 2 0 1 0 1 * * * 0 0 2 0 0 0 0 2 +| * * * * * * * * * * 0 1 * 1 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 1 0 0 * * * * * * * * +| 0 0 0 2 0 0 0 2 1 2 * * 1 2 0 1 0 2 0 2 0 2 2 0 1 0 0 1 0 2 1 0 0 2 0 0 0 0 0 2 0 0 1 2 1 0 0 2 0 1 0 2 0 2 1 0 2 2 1 0 2 0 1 1 +| 2 1 0 1 2 0 0 0 2 0 0 2 1 1 2 0 0 2 0 2 1 0 0 0 1 2 0 0 2 0 1 0 0 0 1 2 0 1 2 1 2 1 0 2 1 0 0 2 1 1 1 0 0 0 0 2 1 0 0 2 0 2 1 0 +| * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 0 * * * * * * * * * * * * * * * * * +| 0 2 0 0 0 2 1 0 1 2 0 2 1 0 2 0 2 2 0 0 2 0 0 0 2 1 2 1 1 1 1 2 2 0 0 2 2 0 0 1 2 2 0 1 0 2 * 0 0 1 1 2 0 1 2 2 2 1 2 0 0 1 2 1 +| 1 2 1 1 2 0 0 0 1 2 0 0 0 2 0 1 1 0 1 0 0 0 0 0 0 0 2 2 2 1 1 1 2 1 2 0 1 0 0 1 0 2 0 2 0 0 2 1 2 1 2 2 0 0 1 2 0 0 0 0 2 1 1 1 +| * 2 * * 2 1 * * * * * * * * * * * 2 * * * * * 2 1 2 0 * * * * * * * 0 * 2 * * * * * * * * * * * * * * * 0 * * * * * * 0 2 0 0 2 +| 1 * 2 1 * * 1 2 0 0 2 1 2 1 0 0 0 * 0 0 0 2 0 * * * * 0 2 0 1 0 0 1 * 2 * 1 0 2 1 2 2 2 0 2 1 0 0 0 1 2 * 2 1 2 0 0 1 * * * * * +| 2 1 1 2 1 1 0 0 0 0 1 2 0 0 0 0 1 2 0 0 0 2 1 1 0 1 0 0 1 0 2 0 2 1 0 2 0 0 2 0 1 0 1 1 2 0 2 0 2 1 0 1 2 1 1 0 1 2 1 0 0 0 0 1 +| 0 1 2 0 * 1 * * * * * * * 0 * * 1 0 2 * * * * * * 0 0 0 * * * * * * 0 2 1 * * * * * * * * * * * 0 1 0 0 * 0 2 0 1 2 * * 0 2 1 2 +| * * * * 1 * 1 2 1 2 1 0 1 * 1 2 * * * 2 1 0 0 0 1 * * * 1 0 1 0 2 0 * * * 0 1 2 0 0 2 1 2 0 0 0 * * * * 2 * * * * * 0 0 * * * * +| 2 2 0 0 0 2 0 0 1 1 1 0 1 0 2 0 0 1 0 0 2 2 1 2 0 2 2 1 0 1 0 2 2 2 0 0 0 2 0 2 2 0 1 2 0 1 1 1 2 0 0 0 0 2 1 0 0 2 2 1 0 1 0 2 +| 0 0 0 2 0 1 2 * * * 0 * * * 2 2 1 0 * * * * * * 0 * * 2 1 1 2 * * * * * 0 * * * 2 1 1 0 * * * * 1 1 0 0 0 * * 0 1 0 0 0 * 2 0 0 +| * * * * * * * 0 2 1 * 2 0 1 * * * * 1 0 2 0 0 2 * 0 2 * * * * 0 1 1 0 2 * 1 1 2 * * * * 1 0 2 1 * * * * * 0 0 * * * * * 2 * * * +| 1 0 0 0 2 2 0 0 2 1 0 1 1 0 2 1 2 1 2 0 0 1 2 1 0 0 2 2 1 1 0 0 1 0 1 1 0 2 0 1 2 0 1 2 0 2 0 0 1 2 0 0 2 0 2 0 0 0 0 0 1 0 0 2 +| 1 0 2 0 0 1 2 * * * * * 2 0 2 * 0 0 0 1 0 * * * * * 1 2 0 1 0 1 2 2 * * 1 2 0 1 2 0 2 2 * 0 0 * 0 2 0 * 2 0 0 0 0 0 2 0 1 0 2 0 +| * * * * * * * 2 1 0 2 1 * * * 1 * * * * * 0 0 1 0 0 * * * * * * * * 1 0 * * * * * * * * 0 * * 1 * * * 1 * * * * * * * * * * * * +| 1 2 2 2 1 2 1 0 2 1 2 0 2 0 2 0 1 1 1 2 0 0 2 0 2 1 0 2 1 0 2 1 2 1 0 0 0 0 0 0 0 1 0 1 1 0 1 2 0 1 2 0 0 0 2 0 2 0 2 1 0 2 0 0 +| 2 0 0 2 0 0 2 * 1 * * 0 0 2 0 1 0 2 1 1 2 1 2 0 0 0 1 2 0 2 2 0 2 0 2 1 0 0 0 0 1 1 * 2 * 1 2 1 0 1 0 0 0 1 0 2 0 2 2 1 2 0 1 0 +| * * * 2 * * * 0 * 0 1 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 0 * 0 * * * * * * * * * * * * * * * * * * * +| 0 1 0 * 2 0 0 1 2 0 2 2 1 1 1 2 1 1 0 0 0 0 2 0 0 2 0 1 1 2 2 0 0 2 0 1 2 0 2 2 0 0 2 2 0 1 0 1 2 0 2 0 0 1 2 0 2 1 1 0 2 2 0 1 +| 2 0 0 0 2 1 0 0 1 1 0 2 2 0 2 1 0 2 0 0 2 1 0 2 0 1 0 1 2 1 1 2 2 1 0 2 1 2 1 0 2 1 0 0 0 1 0 0 0 0 2 1 2 0 1 0 0 0 0 0 1 2 0 2 +| * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * +| 1 0 1 2 2 1 0 1 0 1 0 1 2 0 1 0 1 0 1 0 1 1 0 0 0 2 2 1 0 2 0 2 0 2 1 0 2 0 1 0 0 0 0 2 0 1 0 2 2 1 2 0 2 0 0 2 0 1 1 2 1 0 2 0 +| 1 0 1 0 0 0 2 1 0 2 1 2 2 1 0 2 2 0 0 0 1 1 0 0 1 2 0 1 2 0 0 0 0 0 0 0 1 1 2 0 2 1 0 2 0 1 0 1 2 2 0 0 0 2 0 1 2 1 2 0 0 2 1 2 +| * * * * * * * * * * * * * * * * * * * * * * * * * 0 * * * 1 1 * 0 * * * 1 0 1 0 * * * * * * 1 * * * * * * * * * 0 1 0 * * * * * +| 0 1 2 0 2 0 0 1 1 1 2 2 0 1 2 0 0 2 1 0 1 1 0 2 2 * 2 1 0 * * 2 * 0 0 1 * * * * 1 2 0 1 1 2 * 0 0 0 0 0 2 0 2 1 * * * 1 1 2 0 2 +| 2 1 0 0 1 0 2 0 2 0 1 1 1 0 2 0 2 0 2 0 0 2 0 2 0 0 1 2 2 2 1 0 1 0 0 1 2 0 2 1 2 2 1 0 1 1 0 2 0 1 1 0 1 0 1 0 0 1 2 0 0 1 0 1 +| 0 * 0 * * * * * * * 1 * * * * * 0 * 0 * * * * * * * * 1 * 1 1 0 0 0 0 0 1 * 0 * * * * * * * * 0 * * * * * * 0 0 0 2 0 0 * 2 0 0 +| * 2 * 2 2 0 1 2 1 0 * 2 0 1 1 0 * 1 * 1 1 0 1 0 1 0 2 * 2 * * * * * * * * 2 * 1 1 2 0 0 0 1 0 * 0 2 0 0 0 1 * * * * * * 2 * * * +| 1 0 1 0 0 0 0 1 2 0 0 0 2 1 1 1 2 0 0 0 0 2 1 1 0 0 1 1 2 0 2 0 2 2 2 1 0 0 2 2 0 1 1 1 0 2 1 0 2 0 0 2 0 2 0 0 0 2 1 0 0 2 2 1 +| 0 1 2 0 2 0 * * * 0 * 2 * * * * * * 0 0 0 * * 0 * * * 0 1 0 2 2 0 2 2 2 1 1 0 0 * * * 1 0 0 1 1 0 2 * * * * * * 0 2 1 1 1 2 0 1 +| * * * * * * 2 1 0 * 0 * 0 0 0 1 0 2 * * * 0 1 * 1 1 1 * * * * * * * * * * * * * 1 0 2 * * * * * * * 1 2 2 1 1 2 * * * * * * * * +| 2 1 2 0 0 0 1 0 0 0 2 1 0 1 1 1 2 0 2 0 1 0 1 2 1 0 0 0 0 0 0 2 0 0 0 1 2 1 1 0 1 2 0 1 2 0 1 1 2 0 1 0 0 2 2 0 1 0 0 2 2 0 1 0 +| 0 0 0 2 0 * * * 1 0 * 2 2 2 0 2 1 2 0 2 * * * * * * * 1 0 0 2 1 2 0 0 2 0 1 0 2 0 1 0 0 1 0 0 0 1 0 2 1 * * * * * * 1 0 0 1 0 2 +| * * * * * 0 2 1 * * 1 * * * * * * * * * 0 1 1 0 0 1 0 * * * * * * * * * * * * * * * * * * * * * * * * * 2 2 0 1 0 1 * * * * * * +| 1 1 0 2 0 2 0 1 0 1 0 1 0 0 0 2 0 0 0 2 1 2 1 2 2 0 0 1 0 2 0 1 2 0 0 1 2 2 2 1 1 0 1 0 1 0 0 1 2 1 0 0 2 1 2 0 2 0 0 2 1 0 2 0 +| 0 0 1 0 0 0 1 2 0 2 1 0 * 0 1 2 0 0 0 * * * * * * 0 2 0 1 0 2 0 1 1 2 0 1 1 0 1 0 2 1 0 2 0 1 1 1 2 2 2 0 0 2 0 2 2 2 2 2 1 0 1 +| * * * * * * * * * * * * 1 * * * * * * 0 1 1 0 0 0 * * * * * 1 2 2 0 0 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * +| 1 1 0 2 2 0 0 2 2 2 2 1 2 0 0 0 0 1 0 0 0 0 0 0 0 2 1 0 1 0 * * * * * 0 1 2 2 2 1 0 0 2 0 2 2 1 1 1 0 2 1 0 0 1 2 1 0 1 2 1 2 1 +| 2 0 0 0 1 0 2 0 1 2 0 0 1 0 0 1 0 2 0 1 2 1 0 1 1 1 0 2 0 2 1 0 2 0 2 0 2 0 1 0 2 0 2 2 1 1 2 2 1 1 0 0 2 1 0 0 2 1 0 1 2 1 1 0 +| * * 0 * * * * * * * * * * * * * * * * * * * * * * * * * * * 1 * 2 0 1 * 1 * * * * * * * * * * * * * 0 0 * * * * * * * * * * * * +| 0 0 * 1 1 1 0 0 2 0 1 0 1 2 0 0 0 0 2 1 0 0 1 2 0 2 2 0 0 1 * 2 * * * 2 * 2 1 2 0 1 1 1 0 2 0 1 0 2 * * 1 0 2 1 0 0 1 2 1 2 0 1 +| 0 0 2 2 0 2 1 2 0 1 0 1 2 0 2 1 1 0 1 1 1 0 0 1 2 0 0 1 1 1 0 1 2 2 0 0 1 0 0 0 1 0 0 1 2 1 0 0 1 1 2 0 0 1 2 0 1 2 1 1 0 0 2 0 +| 2 * 0 * * * * * * * * * * * * * 2 0 * * * * * * * * * * * * 1 0 0 1 1 0 0 * 1 * * * 0 * * * * 2 1 0 * 0 2 0 * * * * * * * * * * +| * 2 * 1 0 0 1 2 2 0 0 1 0 1 1 0 * * 0 1 1 0 2 1 2 0 1 1 2 1 * * * * * * * 2 * 2 0 1 * 1 2 0 2 * * * 2 * * * 1 0 0 0 0 1 2 2 2 0 +| 0 2 1 0 2 2 2 0 2 2 1 2 1 0 1 2 0 1 2 2 0 1 1 0 2 0 0 1 2 1 0 0 2 2 0 0 0 1 0 0 2 0 2 1 2 0 0 1 1 1 2 0 1 1 0 1 1 2 0 0 0 2 0 0 +| 2 * 0 2 2 * * * * * * * * * * * * 0 0 2 0 * * * * * * * * * * 0 0 0 0 2 1 1 2 * * * * * 0 * 0 2 1 0 2 1 * * 0 0 * * 0 1 2 0 2 2 +| * 1 * * * 0 0 0 1 0 0 2 0 2 1 0 0 * * * * 0 0 2 0 2 0 1 0 2 0 * * * * * * * * 1 2 1 2 1 * 2 * * * * * * 1 2 * * 2 1 * * * * * * +| 2 1 0 0 1 0 2 0 2 0 0 0 0 2 1 0 2 1 2 1 0 0 0 1 1 0 0 1 0 0 2 2 2 2 1 2 2 1 0 1 0 2 0 1 2 1 0 0 2 1 0 2 1 0 1 0 0 2 1 2 2 1 2 1 +| 0 * 1 2 2 * * 2 0 0 0 1 0 1 2 0 * * 1 0 2 0 2 * 2 * 2 0 * 0 2 1 2 1 1 2 1 0 0 0 2 0 1 0 2 0 2 0 1 0 * 2 2 0 2 0 0 2 1 * * 0 0 1 +| * 1 * * * 0 1 * * * * * * * * * 0 1 * * * * * 0 * 0 * * 1 * * * * * * * * * * * * * * * * * * * * * 0 * * * * * * * * 2 0 * * * +| 0 0 1 2 1 1 2 1 2 1 1 0 1 0 1 0 2 2 0 1 2 1 1 2 1 0 1 0 0 2 2 1 0 2 0 0 1 0 0 2 0 0 2 0 0 0 0 2 1 0 1 2 2 0 0 1 0 1 0 1 2 0 0 0 +| 0 1 1 0 2 1 1 0 0 2 1 1 2 2 2 2 0 0 0 0 2 1 2 0 1 1 1 0 * 0 1 0 0 0 2 2 2 2 1 2 0 2 0 2 0 2 0 0 0 1 0 * 1 0 0 0 0 1 * * * 2 2 0 +| * * * * * * * * * * * * * * * * * * * * * * * * * * * * 2 * * * * * * * * * * * * * * * * * * * * * * 2 * * * * * * 0 0 1 * * * +| 0 1 2 2 0 1 0 0 0 0 1 2 1 2 0 1 0 1 1 1 2 0 1 2 2 2 0 2 0 1 1 2 1 2 0 1 0 0 1 0 0 1 1 0 2 0 2 0 2 0 0 0 0 0 2 1 2 1 0 2 1 0 0 1 +| 0 0 0 2 0 2 2 0 2 1 2 0 0 0 1 2 0 2 0 1 1 2 0 1 0 2 1 2 0 1 2 0 1 0 1 1 0 0 1 2 0 1 0 2 0 2 0 0 1 0 2 0 2 1 2 0 0 0 0 1 0 0 2 2 +| diff --git a/examples/stencil/1.out b/examples/stencil/1.out new file mode 100644 index 0000000..5780ed6 --- /dev/null +++ b/examples/stencil/1.out @@ -0,0 +1,75 @@ +| 2 0 2 0 2 0 2 0 2 0 1 1 1 1 1 0 2 0 2 0 2 0 2 0 2 1 2 1 1 1 1 0 2 0 2 0 2 0 2 0 2 1 0 1 1 1 1 2 0 2 0 2 0 2 0 2 1 0 1 1 1 1 0 0 +| 0 2 0 2 0 2 0 2 0 2 1 1 1 1 1 2 0 2 0 2 0 2 0 2 1 2 1 1 1 1 1 1 0 2 0 2 0 2 0 2 1 1 0 1 1 1 1 0 2 0 2 0 2 0 2 1 1 0 1 1 1 0 0 0 +| * * * 0 * 0 * 0 0 * 1 1 1 1 1 * * 0 * * * * * 1 * 1 1 1 1 1 1 1 * * * * * * * * * 1 1 1 1 1 1 * * * * * * * * * 1 1 1 1 1 0 0 0 +| 0 0 2 * 2 * 2 * * 2 * * 1 * * 2 2 * 2 0 2 0 2 * 2 * * * 1 1 * * 0 0 2 0 2 0 2 0 1 * * * 1 * * 0 0 2 0 2 0 2 1 1 * * * 1 * * * * +| 0 0 0 2 0 2 0 0 0 2 1 1 * 1 1 2 2 2 0 2 0 2 2 2 1 1 1 1 * * 1 1 0 0 0 2 0 2 0 0 1 1 1 1 * 1 1 0 0 0 2 0 2 1 1 1 1 1 1 * 1 0 0 0 +| 0 0 0 0 2 0 0 0 0 2 1 1 1 1 1 2 2 2 2 0 2 2 2 2 1 1 1 1 1 1 1 1 0 0 0 0 2 0 0 0 1 1 1 1 1 1 1 0 0 2 2 2 2 1 1 1 1 1 1 1 1 0 0 0 +| 0 0 0 0 2 0 0 0 0 0 1 1 1 1 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 0 0 2 2 2 2 1 1 1 1 1 1 1 1 0 0 0 +| 0 0 0 0 2 0 0 0 0 0 0 1 1 1 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 0 2 2 2 2 2 1 1 1 1 1 1 1 1 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 2 2 0 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 * * 2 * 0 0 0 0 * * * * * 1 1 * * * * * 2 2 1 1 1 1 1 2 1 * * * 0 0 +| * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 1 1 * 0 * * * * 0 0 1 1 1 * * 1 1 0 0 1 * * * * * * * * * 1 1 0 * * +| 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 2 2 0 0 0 0 0 0 0 0 1 1 1 1 1 1 0 0 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 0 0 1 1 2 2 0 0 0 0 0 0 0 0 1 1 1 1 1 1 0 0 2 2 2 2 0 1 1 1 1 1 1 1 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 0 0 1 1 2 2 0 0 0 0 0 0 0 0 1 1 1 1 1 1 0 0 2 2 2 2 1 1 1 1 1 1 1 1 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 0 0 1 1 2 2 0 0 0 0 0 0 0 0 1 1 1 1 1 1 0 0 2 2 2 2 1 1 1 1 1 1 1 1 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 0 0 1 1 2 2 0 0 0 0 0 0 0 0 1 1 1 1 1 1 0 0 2 2 2 2 1 1 1 1 1 1 1 1 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 0 0 1 1 * 1 0 0 0 2 0 0 * * * * 1 1 1 1 0 * * * 2 2 1 1 1 1 1 1 1 1 0 0 0 +| * * * 0 0 0 * * * * 2 2 2 1 1 2 2 2 * * * * * * * * * * * * * 1 * * * * * * * 0 0 1 1 * * * * * 0 2 2 * * * * 1 * * * * * * * * +| 0 2 0 * * * 0 0 2 2 * * * * * * * * 2 2 2 2 2 2 2 1 1 0 0 1 1 1 1 0 0 0 2 0 0 0 0 1 1 1 1 1 1 0 0 2 2 2 2 1 1 * 1 1 1 1 1 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 1 1 0 0 1 1 1 1 0 0 0 0 0 0 0 0 1 1 1 1 1 1 0 0 2 2 2 2 1 1 1 1 1 1 1 1 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 0 0 1 1 1 1 0 0 0 0 0 0 0 0 1 1 1 1 1 1 0 0 2 2 2 2 1 2 1 1 1 1 1 1 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 0 0 1 1 1 1 0 0 0 0 0 0 0 0 1 1 1 1 1 1 0 0 2 2 2 2 2 1 1 1 1 1 1 1 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 0 0 1 1 1 1 0 0 0 0 0 0 0 0 1 1 1 1 1 1 0 0 0 1 2 2 2 1 1 1 1 1 1 1 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 0 0 1 1 1 1 0 0 0 0 0 0 0 0 1 1 1 1 1 1 0 0 0 2 2 2 2 1 1 1 1 1 1 1 0 0 0 +| 0 0 0 0 0 0 0 * * * 2 2 2 1 1 2 2 2 2 2 2 2 * * * * * * * * 1 * * * * * * * * * * * * 1 1 * * * * * * * * * * * * * 1 1 * * * * +| * * * * * * * 0 2 2 * * * * * * * * * * * * 2 2 2 2 2 0 0 1 * 1 1 0 0 0 0 0 0 0 0 1 1 * * 1 1 0 0 0 2 2 2 2 1 1 1 1 * * 1 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 0 0 1 1 1 1 0 0 0 0 0 0 0 0 1 1 1 1 1 1 0 0 0 2 2 2 2 1 1 1 1 1 1 1 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 0 0 1 1 1 1 0 0 0 0 0 0 0 0 1 1 1 1 1 1 0 0 0 2 2 2 2 1 1 1 1 1 1 1 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 0 0 1 1 1 1 0 0 0 0 0 0 0 1 1 1 1 1 1 1 0 0 0 2 2 2 2 1 1 1 1 1 1 1 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 0 0 1 1 1 1 0 0 0 0 0 0 0 1 1 1 1 1 1 1 0 0 0 2 2 2 2 1 1 1 1 1 1 1 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 0 0 1 1 1 1 0 0 0 0 0 0 0 1 1 1 1 1 1 1 0 0 0 2 2 2 2 1 1 1 1 1 1 1 0 0 0 +| 0 0 0 0 0 0 0 * 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 0 0 1 1 1 1 * * * * * * * * 1 1 1 1 1 1 * * * * 2 2 2 1 1 1 1 1 1 1 0 0 0 +| * * * * * * * 0 * * * * 2 1 1 2 2 2 2 2 2 2 2 * * * * * * * * * * 0 0 0 0 0 0 0 1 * * 1 * * * 0 0 0 2 * * * 1 1 1 1 1 * * * * * +| 0 0 0 0 0 0 0 0 2 2 2 2 * * * 2 2 2 * * * * * 2 2 2 2 1 1 1 1 1 1 0 0 0 0 0 0 0 1 1 1 * 1 1 1 0 0 0 2 2 2 2 * * * * * 1 1 2 0 0 +| 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 * * * 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 0 0 0 0 0 0 0 1 1 1 1 1 1 1 0 0 0 2 2 2 2 1 1 1 1 1 1 1 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 0 0 0 0 0 0 0 1 1 1 1 1 1 1 0 0 0 2 2 2 2 1 1 1 1 1 1 1 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 0 0 0 0 0 0 0 1 1 1 1 1 1 1 0 0 0 2 2 2 2 1 1 1 1 1 1 1 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 0 0 0 0 0 0 0 1 1 1 1 1 1 1 0 0 0 2 2 2 2 1 1 1 1 1 1 1 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 0 0 0 0 0 0 0 1 1 1 1 1 1 1 0 0 0 2 2 2 2 1 1 1 1 1 1 1 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 0 0 0 0 0 0 * * * * * 1 1 * 0 0 0 2 2 2 2 1 1 1 1 1 1 1 0 0 0 +| 0 * * * * * * * * * 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 * * * * * * * * * * * * * 0 1 1 1 1 * * 1 * * * * * * * * * * * * * * * * 0 +| * 0 0 0 0 0 0 0 2 2 * * * * * * * * 2 2 * * * * * * 2 1 1 1 1 1 1 0 0 0 0 0 0 0 1 1 1 1 1 1 1 0 0 0 2 2 2 2 1 1 1 1 1 1 1 0 0 * +| 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 2 2 2 * * 2 2 2 2 2 2 2 1 1 1 1 1 1 0 0 0 0 0 0 0 1 1 1 1 1 1 1 0 0 0 2 2 2 2 1 1 1 1 1 1 1 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 0 0 0 0 0 0 0 1 1 1 1 1 1 1 0 0 0 2 2 2 2 1 1 1 1 1 1 1 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 0 0 0 0 0 0 0 1 1 1 1 1 1 1 0 0 0 2 2 2 2 1 1 1 1 1 1 1 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 0 0 1 1 1 1 1 1 1 0 0 0 2 2 2 2 1 1 1 1 1 1 1 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 0 0 1 1 1 1 1 1 1 0 0 0 2 2 2 2 1 1 1 1 1 1 1 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 * * * * * 0 0 0 0 0 0 * * * * * * * * * 0 0 2 2 2 2 1 1 1 1 1 1 1 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 * * * 1 1 1 1 1 * * * * * * 0 1 1 1 1 1 1 1 0 * * * * * * * * * * * * * * * 0 +| * * 0 0 0 0 0 0 2 2 * * * * * * * * * 2 2 * * * * 2 2 2 1 1 1 1 1 0 0 0 0 0 0 0 1 1 1 1 1 1 1 0 0 0 2 2 2 2 1 1 1 1 1 1 1 0 0 * +| 0 0 * * * * * * * * 2 2 2 1 1 2 2 2 2 * * 2 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 0 0 1 1 1 1 1 1 1 0 0 0 2 2 2 2 1 1 1 1 1 1 1 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 0 0 1 1 1 1 1 1 1 0 0 0 2 2 2 2 1 1 1 1 1 1 1 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 0 0 1 1 1 1 1 1 1 0 0 0 2 2 2 2 1 1 1 1 1 1 1 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 0 0 1 1 1 1 1 1 1 0 0 0 2 2 2 2 1 1 1 1 1 1 0 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 0 0 1 1 1 1 1 1 1 0 0 0 2 2 2 2 1 1 1 1 1 1 0 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 * * * * * * 0 0 0 0 0 0 * * * * * * * * * 0 0 2 2 2 2 1 1 1 1 1 * 0 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 2 2 * * * * 2 2 2 2 2 2 2 2 * * * 2 1 1 1 1 1 * * * 0 * * 0 1 1 1 1 1 1 1 0 * * * 2 * * * * * * * 1 * * 0 0 +| 0 0 0 0 0 0 0 0 2 2 * * 2 1 1 2 * * 2 2 2 2 * * 2 2 2 2 1 1 1 1 1 0 0 0 * 0 0 0 1 1 1 1 1 1 1 0 0 0 2 * 2 2 1 1 1 1 1 1 0 0 * * +| * * * 0 0 0 0 * * * 2 2 2 1 1 2 2 2 * * * * 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 0 0 1 1 1 1 1 1 1 0 0 0 2 2 2 2 1 1 1 1 1 1 0 0 0 0 +| 0 0 0 * * * * 0 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 0 0 1 1 1 1 1 1 1 0 0 0 2 2 2 2 1 1 1 1 1 1 0 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 0 0 1 1 1 1 1 1 1 0 0 0 2 2 2 2 1 1 1 1 1 1 0 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 0 0 1 1 1 1 1 1 1 0 0 0 2 2 2 2 1 1 1 1 1 0 0 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 0 0 1 1 1 1 1 1 1 0 0 0 2 2 2 2 1 1 1 1 1 0 0 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 * * * * * * * 0 0 0 0 0 * * * * * * * * * 0 0 2 2 2 2 1 1 1 1 1 0 0 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 2 2 2 * * 2 2 2 2 2 2 2 2 2 * * * 2 1 1 1 1 1 0 * * * * * 0 1 1 1 1 1 1 1 0 * * * 2 * * * * * * * * * 0 0 0 +| 0 0 0 0 0 0 0 0 2 * * * * 1 1 * * * * 2 2 * * * 2 2 2 2 1 1 1 1 1 0 0 0 0 0 0 0 1 1 1 1 1 1 1 0 0 0 2 * 2 2 1 1 1 1 1 0 0 * * 0 +| * * * 0 0 0 0 * * 2 2 2 2 1 1 2 2 2 2 * * 2 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 0 0 1 1 1 1 1 1 1 0 0 0 2 2 2 2 1 1 1 1 1 0 0 0 0 * +| 0 0 0 * * * * 0 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 0 0 1 1 1 1 1 1 1 0 0 0 2 2 2 2 1 1 1 1 1 0 0 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 0 0 1 1 1 1 1 1 1 0 0 0 2 2 2 2 1 1 1 1 1 0 0 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 0 0 1 1 1 1 1 1 1 0 0 0 2 2 2 2 1 1 1 1 1 0 0 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 0 0 1 1 1 1 1 1 1 0 0 0 2 2 2 2 1 1 1 1 1 0 0 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 0 0 * * * * * * * 0 0 0 2 2 2 2 1 1 1 1 1 0 0 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 * * * * * * * * * * * * * * * * 1 1 1 1 1 1 1 * * * * * * * * * * * * * 0 0 0 0 +| * 0 0 0 0 0 0 0 2 * * * * * * * * * * * * * * * 2 2 2 2 1 1 1 1 0 0 0 0 0 0 0 0 1 0 2 0 1 1 1 0 0 0 2 2 2 2 1 1 1 1 1 0 * * * * +| 0 _ _ _ _ _ _ _ _ 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 2 1 1 1 0 0 0 2 2 2 2 2 2 1 1 1 0 0 0 0 0 +| diff --git a/examples/stencil/2.out b/examples/stencil/2.out new file mode 100644 index 0000000..0fccb9b --- /dev/null +++ b/examples/stencil/2.out @@ -0,0 +1,74 @@ +| 2 0 2 0 2 0 2 0 2 0 0 0 0 2 0 2 0 2 1 1 1 1 1 1 1 2 2 2 2 2 1 0 0 0 0 1 0 0 0 0 0 0 0 2 1 1 1 2 2 2 2 2 2 2 2 1 1 1 1 0 0 0 0 0 +| 0 2 2 2 0 2 0 2 0 0 0 0 0 2 2 0 2 1 1 1 1 1 1 1 1 2 2 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 2 2 2 2 2 2 2 2 1 1 1 1 0 0 0 0 0 +| 0 2 2 2 2 2 2 0 0 0 0 0 0 2 2 2 1 1 1 1 1 1 1 1 1 2 2 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 2 2 2 2 2 2 2 2 1 1 1 1 0 0 0 0 0 +| 0 2 2 2 2 2 2 0 0 0 0 0 0 2 2 2 1 1 1 1 1 1 1 1 1 2 2 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 2 2 2 2 2 2 2 2 1 1 1 1 0 0 0 0 0 +| * * * * * * * * * * * * * * * * * 1 1 1 1 1 1 1 1 * * * * * * 0 0 0 0 * * * * * * * * 1 1 1 * * * * * * * * * * 1 1 1 * * * * * +| 0 2 2 2 2 2 2 0 0 0 0 0 0 2 2 2 1 * 1 1 1 1 1 * * 2 2 2 2 2 1 * * * * 0 0 0 0 0 0 0 0 * * * 1 2 2 2 2 2 2 2 2 1 * * * 0 0 0 0 0 +| 0 2 2 2 2 2 2 0 0 0 0 0 0 2 2 2 1 1 * 1 1 1 * 1 1 2 2 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 2 2 2 2 2 2 2 2 1 1 1 1 0 0 0 0 0 +| 0 2 2 2 2 2 2 0 0 0 0 0 0 2 2 2 1 1 1 * 1 * 1 1 1 1 1 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 2 2 2 2 2 2 2 2 1 1 1 1 0 0 0 0 0 +| 0 2 2 2 2 2 2 0 0 0 0 0 0 2 2 2 1 1 1 1 * 1 1 1 1 1 1 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 2 2 2 2 2 2 2 2 1 1 1 1 0 0 0 0 0 +| 0 2 2 2 2 2 2 0 0 0 0 0 0 2 2 2 1 1 1 1 1 1 1 1 1 1 1 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 2 2 2 2 2 2 2 2 1 1 1 1 0 0 0 0 0 +| 0 2 2 2 2 2 2 0 0 0 0 0 0 2 2 2 1 1 1 1 1 1 1 1 1 1 1 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 2 2 2 2 2 2 2 2 1 1 1 1 0 0 0 0 0 +| * * 2 2 2 2 2 0 0 0 0 0 * 2 2 2 1 1 1 1 1 1 1 1 1 1 1 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 2 2 2 2 2 2 2 2 1 1 1 1 0 * * * * +| 0 2 * * * * * * * * * * 0 * * * * 1 1 1 1 1 1 1 1 1 1 * * * * * * * * * * * * * * * * * 1 1 * * * * * * * * * * 1 1 * * 0 0 0 0 +| 0 2 2 2 2 2 2 0 0 0 0 0 0 2 2 2 1 * * 1 1 1 1 1 1 * * 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 0 1 * * 1 2 2 2 2 2 2 2 2 1 * * 1 0 0 0 0 0 +| 0 2 2 2 2 2 2 0 0 0 0 0 0 2 2 2 1 1 1 * * 1 1 * * 1 1 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 2 2 2 2 2 2 2 2 1 1 1 1 0 0 0 0 0 +| 0 2 2 2 2 2 2 0 0 0 0 0 0 2 2 2 1 1 1 1 1 * * 1 1 1 1 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 2 2 2 2 2 2 2 2 1 1 1 1 0 0 0 0 0 +| 0 2 2 2 2 2 2 0 0 0 0 0 0 2 2 2 1 1 1 1 1 1 1 1 1 1 1 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 2 2 2 2 2 2 2 2 1 1 1 1 0 0 0 0 0 +| 0 2 2 2 2 2 2 0 0 0 0 0 0 2 2 2 1 1 1 1 1 1 1 1 1 1 1 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 2 2 2 2 2 2 2 2 1 1 1 1 0 0 0 0 0 +| 0 2 2 2 2 2 2 0 0 0 0 0 0 2 2 2 1 1 1 1 1 1 1 1 1 1 1 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 2 2 2 2 2 2 2 2 1 1 1 1 0 0 0 0 0 +| * * * 2 2 2 * * * * * * * * * 2 1 1 1 1 1 1 1 1 1 1 1 2 2 2 1 0 0 0 0 0 0 0 0 0 * * * 1 1 1 1 2 2 2 2 2 2 2 2 1 1 1 * * * * * * +| 0 2 2 * * * 2 0 0 0 0 0 0 2 2 * * 1 1 1 1 1 1 1 1 1 1 2 2 * * * * * * * * * * * 0 0 0 * 1 1 1 2 2 2 2 2 2 2 2 1 * * 1 0 0 0 0 0 +| 0 2 2 2 2 2 2 0 0 0 0 0 0 2 2 2 1 * * 1 1 1 1 1 1 1 1 * * 2 1 0 0 0 0 0 0 0 0 0 0 0 0 1 * * 1 2 2 2 * * * * * * 1 1 1 0 0 0 0 0 +| 0 2 2 2 2 2 2 0 0 0 0 0 0 2 2 2 1 1 1 * * 1 1 1 1 * * 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 * * * * 2 2 2 2 2 1 1 1 1 0 0 0 0 0 +| 0 2 2 2 2 2 2 0 0 0 0 0 0 2 2 2 1 1 1 1 1 * 1 * * 1 1 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 2 2 2 2 2 2 2 2 1 1 1 1 0 0 0 0 0 +| 0 2 2 2 2 2 2 0 0 0 0 0 0 2 2 2 1 1 1 1 1 1 * 1 1 1 1 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 2 2 2 2 2 2 2 2 1 1 1 1 0 0 0 0 0 +| 0 2 2 2 2 2 2 0 0 0 0 0 0 2 2 2 1 1 1 1 1 1 1 1 1 1 1 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 2 2 2 2 2 2 2 2 1 1 1 1 0 0 0 0 0 +| 0 2 2 2 2 2 2 0 0 0 0 0 0 2 2 2 1 1 1 1 1 1 1 1 1 1 1 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 2 2 2 2 2 2 2 2 1 1 1 1 0 0 0 * * +| * * * 2 2 * * * * * * * * * * 2 1 1 1 1 1 1 1 1 1 1 1 2 2 2 1 * * * * * * * * * * * * 1 1 1 1 2 2 2 2 2 2 2 2 1 1 * * * * * 0 0 +| 0 2 2 * * 2 2 0 0 0 0 0 0 2 2 * * 1 1 1 1 1 1 1 1 1 1 2 2 * * 0 0 0 0 0 0 0 0 0 0 0 0 * * 1 1 2 2 2 2 2 2 2 2 1 * 1 1 0 0 0 0 0 +| 0 2 2 2 2 2 2 0 0 0 0 0 0 2 2 2 1 * 1 1 1 1 1 1 1 1 1 * * 2 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 * 1 2 2 2 2 2 2 2 * * 1 1 1 0 0 0 0 0 +| 0 2 2 2 2 2 2 2 0 0 0 0 0 2 2 2 1 1 * * 1 1 1 1 1 * * 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 * * 2 2 2 2 2 * 2 1 1 1 1 0 0 0 0 0 +| 0 2 2 2 2 2 2 2 0 0 0 0 0 2 2 2 1 1 1 1 * * 1 1 * 1 1 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 2 * * * * * 2 2 1 1 1 1 0 0 0 0 0 +| 0 2 2 2 2 2 2 2 0 0 0 0 0 2 2 2 1 1 1 1 1 1 * * 1 1 1 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 2 2 2 2 2 2 2 2 1 1 1 1 0 0 0 0 0 +| 0 2 2 2 2 2 2 2 0 0 0 0 0 2 2 2 1 1 1 1 1 1 1 1 1 1 1 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 2 2 2 2 2 2 2 2 1 1 1 1 0 0 0 0 0 +| * * 2 2 2 2 2 2 * * * * * * 2 2 1 1 1 1 1 1 1 1 1 1 1 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 2 2 2 2 2 2 2 2 1 1 1 * * * * * * +| 0 2 * 2 2 2 * * 0 0 0 0 0 2 * 2 1 1 1 1 1 1 1 1 1 1 1 2 2 2 * * * * * * * * * * * * * * 1 1 1 2 2 2 2 2 2 2 2 1 1 * 1 0 0 0 0 0 +| 0 2 2 * * * 2 2 0 0 0 0 0 2 2 * * 1 1 1 1 1 1 1 1 1 1 2 2 * 1 0 0 0 0 0 0 0 0 0 0 0 0 1 * 1 1 2 2 2 2 2 2 2 2 1 * 1 1 0 0 0 0 0 +| 0 2 2 2 2 2 2 2 0 0 0 0 0 2 2 2 1 * 1 1 1 1 1 1 1 1 1 * * 2 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 * 1 2 2 2 2 2 2 2 * * 1 1 1 0 0 0 0 0 +| 0 2 2 2 2 2 2 2 0 0 0 0 0 2 2 2 1 1 * 1 1 1 1 1 1 1 * 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 * * 2 2 2 2 2 * 2 1 1 1 1 0 0 0 0 0 +| 0 2 2 2 2 2 2 2 0 0 0 0 0 2 2 2 1 1 1 * * 1 1 1 * * 1 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 2 * * 2 * * 2 2 1 1 1 1 0 0 0 0 0 +| 0 2 2 2 2 2 2 2 2 0 0 0 0 2 2 2 1 1 1 1 1 * 1 * 1 1 1 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 2 2 2 * 2 2 2 2 1 1 1 1 0 0 0 0 0 +| 0 2 2 2 2 2 2 2 2 0 0 0 0 2 2 2 1 1 1 1 1 1 * 1 1 1 1 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 2 2 2 2 2 2 2 2 1 1 1 1 0 0 0 0 0 +| * * 2 2 2 2 2 2 * * * * * * 2 2 1 1 1 1 1 1 1 1 1 1 1 2 2 2 1 0 0 0 0 0 0 0 0 0 * * * 1 1 1 1 2 2 2 2 2 2 2 2 1 1 1 * * * * * * +| 0 2 * * 2 2 * * 2 0 0 0 0 2 * * 1 1 1 1 1 1 1 1 1 1 1 2 2 2 * * * * * * * * * * 0 0 0 * 1 1 1 2 2 2 2 2 2 2 2 1 1 * 1 0 0 0 0 0 +| 0 2 2 2 * * 2 2 2 0 0 0 0 2 2 2 * 1 1 1 1 1 1 1 1 1 1 2 * * 1 0 0 0 0 0 0 0 0 0 0 0 1 1 * 1 1 2 2 2 2 2 2 2 2 1 * 1 1 0 0 0 0 0 +| 0 2 2 2 2 2 2 2 2 0 0 0 0 2 2 2 1 * 1 1 1 1 1 1 1 1 1 * 2 2 1 0 0 0 0 0 0 0 0 0 0 0 1 1 1 * 1 2 2 2 2 2 2 2 * * 1 1 1 0 0 0 0 0 +| 0 2 2 2 2 2 2 2 2 0 0 0 0 2 2 2 1 1 * * 1 1 1 1 1 1 * 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 * * 2 2 2 2 * * 2 1 1 1 1 0 0 0 0 0 +| 0 2 2 2 2 2 2 2 2 0 0 0 0 2 2 2 1 1 1 1 * 1 1 1 * * 1 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 2 * * 2 * 2 2 2 1 1 1 1 0 0 0 0 0 +| 0 2 2 2 2 2 2 2 2 0 0 0 0 2 2 2 1 1 1 1 1 * 1 * 1 1 1 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 2 2 2 * 2 2 2 2 1 1 1 1 0 0 0 0 0 +| 0 2 2 2 2 2 2 2 2 0 0 0 0 2 2 2 1 1 1 1 1 1 * 1 1 1 1 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 2 2 2 2 2 2 2 2 1 1 1 1 0 0 0 0 0 +| * * * 2 2 2 2 2 * 0 0 * * * 2 2 1 1 1 1 1 1 1 1 1 1 1 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 2 2 2 2 2 2 2 2 1 1 1 1 0 0 * * * +| 0 2 2 * * * * * 2 * * 0 0 2 * * * 1 1 1 1 1 1 1 1 1 1 2 2 2 * * * * * * * * * * * * * 1 1 1 1 2 2 2 2 2 2 2 2 1 1 * * * * 0 0 0 +| 0 2 2 2 2 2 2 2 2 0 0 0 0 2 2 2 1 * 1 1 1 1 1 1 1 1 1 * * * 1 0 0 0 0 0 0 0 0 0 0 0 1 * 1 1 1 2 2 2 2 2 2 2 2 1 * 1 1 0 0 0 0 0 +| 0 2 2 2 2 2 2 2 2 0 0 0 0 2 2 2 1 1 * * 1 1 1 1 1 * * 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 1 1 * * 1 2 2 2 2 2 * * * * 1 1 1 0 0 0 0 0 +| 0 2 2 2 2 2 2 2 2 0 0 0 0 2 2 2 1 1 1 1 * 1 1 1 * 1 1 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 * * 2 * * * 2 2 2 1 1 1 1 0 0 0 0 0 +| 0 2 2 2 2 2 2 2 2 0 0 0 0 2 2 2 1 1 1 1 1 * 1 * 1 1 1 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 2 * 2 2 2 2 2 2 1 1 1 1 0 0 0 0 0 +| 0 2 2 2 2 2 2 2 2 0 0 0 0 2 2 2 1 1 1 1 1 1 * 1 1 1 1 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 2 2 2 2 2 2 2 2 1 1 1 1 0 0 0 0 0 +| 0 2 2 2 2 2 2 2 2 0 0 0 0 2 2 2 1 1 1 1 1 1 1 1 1 1 1 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 2 2 2 2 2 2 2 2 1 1 1 1 0 0 0 0 0 +| * * * 2 2 2 2 2 * 0 0 0 * * * 2 1 1 1 1 1 1 1 1 1 1 1 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 2 2 2 2 2 2 2 2 1 1 1 1 0 0 0 0 0 +| 0 2 2 * * * * * 2 * * * 0 2 2 * * 1 1 1 1 1 1 1 1 1 1 2 2 2 1 0 0 0 0 0 0 * * * * * 1 1 1 1 1 2 2 2 2 2 2 2 2 1 1 * * * * * * * +| 0 2 2 2 2 2 2 2 2 0 0 0 0 2 2 2 1 * * 1 1 1 1 1 1 1 * * * * * * * * * * * 0 0 0 0 0 * * 1 1 1 2 2 2 2 2 2 2 2 1 * 1 1 0 0 0 0 0 +| 0 2 2 2 2 2 2 2 2 0 0 0 0 2 2 2 1 1 1 * * 1 1 1 1 * 1 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 1 1 * * 1 2 * * * * * * * * 1 1 1 0 0 0 0 0 +| 0 2 2 2 2 2 2 2 2 0 0 0 0 2 2 2 1 1 1 1 1 * 1 * * 1 1 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 * * 2 2 2 2 2 2 2 1 1 1 1 0 0 0 0 0 +| 0 2 2 2 2 2 2 2 2 0 0 0 0 2 2 2 1 1 1 1 1 1 * 1 1 1 1 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 2 2 2 2 2 2 2 2 1 1 1 1 0 0 0 0 0 +| 0 2 2 2 2 2 2 2 2 0 0 0 0 2 2 2 1 1 1 1 1 1 1 1 1 1 1 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 2 2 2 2 2 2 2 2 1 1 1 1 0 0 0 0 0 +| 0 2 2 2 2 2 2 2 2 0 0 0 0 2 2 2 1 1 1 1 1 1 1 1 1 1 1 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 2 2 2 2 2 2 2 2 1 1 1 1 0 0 0 0 0 +| * * * * * * * * * 0 0 0 * * * * 1 1 1 1 1 1 1 1 1 1 1 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 2 2 2 2 2 2 2 2 1 1 1 1 0 0 0 0 * +| 0 2 2 2 2 2 2 2 2 * * * 0 2 2 2 * * 1 1 1 1 1 1 1 1 1 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 2 2 2 2 2 2 2 2 1 1 1 1 * * * * 0 +| 0 2 2 2 2 2 2 2 2 0 0 0 0 2 2 2 1 1 * * 1 1 1 1 1 * * * * * * * * 0 0 0 0 0 0 * * * 1 1 1 1 1 2 2 2 2 2 2 2 2 1 1 * * 0 0 0 0 0 +| 0 2 2 2 2 2 2 2 2 0 0 0 0 2 2 2 1 1 1 1 * * 1 * * 1 1 2 2 2 1 0 0 * * * * * * 0 0 0 * * * * * * * * * * * * * * * 1 1 0 0 0 0 0 +| 0 2 2 2 2 2 2 2 2 0 0 0 0 2 2 2 1 1 1 1 1 1 * 1 1 1 1 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 2 2 2 2 2 2 2 2 1 1 1 1 0 0 0 0 0 +| 0 2 2 2 2 2 2 2 2 0 0 0 0 2 2 2 1 1 1 1 1 1 1 1 1 1 1 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 2 2 2 2 2 2 2 2 1 1 1 1 0 0 0 0 0 +| 0 2 2 2 2 2 2 2 2 0 0 0 0 2 2 2 1 1 1 1 1 1 1 1 1 1 1 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 2 2 2 2 2 2 2 2 1 1 1 1 0 0 0 0 0 +| diff --git a/examples/stencil/3.out b/examples/stencil/3.out new file mode 100644 index 0000000..6e07bc8 --- /dev/null +++ b/examples/stencil/3.out @@ -0,0 +1,75 @@ +| 2 0 2 0 2 0 2 0 0 0 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 1 1 1 1 1 1 1 1 1 1 1 1 2 1 0 0 0 0 0 0 0 0 0 2 2 2 0 2 0 1 2 1 1 1 1 1 2 2 2 +| 2 2 0 2 0 2 0 0 0 0 0 0 2 0 2 0 2 0 2 0 2 0 2 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 2 2 2 0 1 1 1 1 1 1 1 1 2 2 2 +| 2 2 2 0 2 0 0 0 0 0 0 0 0 2 0 2 0 2 0 2 0 2 0 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 2 2 2 +| 2 2 2 2 * * * * * * * * * * 2 * 2 * * * * * * * 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 * * 0 0 0 0 * * * * 2 2 2 * 1 1 1 1 1 1 1 * * 2 2 +| * * * * 0 0 0 0 0 0 0 0 0 0 * 0 * 0 2 0 2 0 2 2 * * * 1 1 1 1 1 1 1 * * * * * 0 0 * * * * 0 0 0 2 * * * 2 * * * * * * * 1 2 * * +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 2 0 2 0 2 0 2 2 2 1 1 1 * * * * * * * 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 2 2 2 +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 0 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 2 2 2 +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 2 2 2 +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 2 2 2 +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 2 2 2 +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 * 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 * * * * * * * * * * * * 2 2 2 2 2 1 1 1 1 1 1 1 1 2 2 2 +| 2 2 2 * * * * * * * * * * * * * * 0 * 2 2 2 2 2 1 1 1 1 1 1 1 1 1 * * * 1 1 1 0 0 0 0 0 0 0 0 0 * 2 2 2 2 1 1 1 1 1 1 1 1 2 2 2 +| 2 * * 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 * * 2 2 * * * * * * * * * * 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 * * 2 2 1 1 * * * * * * 2 2 2 +| * 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 * * 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 2 2 * * * * 1 1 1 1 1 1 * * 2 +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 2 2 * +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 2 2 2 +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 2 2 2 +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 2 2 2 +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 * 2 2 2 2 2 2 1 1 1 1 1 1 1 * * * * * * * * * * * * * * * * * 2 2 2 2 2 1 1 1 1 1 1 1 1 2 2 2 +| 2 2 2 * * * * * * * * * * * * * * 0 * 2 2 2 2 2 1 1 * * * * * 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 * 2 2 2 2 1 1 1 1 1 1 1 1 2 2 2 +| 2 2 * 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 * 2 2 2 2 * * 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 * 2 2 2 1 1 1 1 * * * * 2 2 2 +| 2 * 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 * 2 2 * 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 2 * 2 * * * * * 1 1 1 1 * 2 2 +| * 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 * * 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 2 2 * 2 1 1 1 1 1 1 1 1 2 * 2 +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 2 2 * +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 2 2 2 +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 * * * * * * 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 2 2 2 +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 * * * * * * * * * * * 1 1 1 1 0 0 * * * * * * * 2 2 2 2 2 1 1 1 1 1 1 1 1 2 2 2 +| 2 2 2 2 * * * * * * * * * * * * * * * 2 2 2 2 * 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 * * 2 2 2 1 1 1 1 1 1 1 1 2 2 2 +| 2 2 * * 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 * 2 2 * 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 2 * 2 2 1 1 1 1 1 1 1 1 2 2 2 +| 2 * 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 * * 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 2 2 * * * * * * * * * * * 2 2 +| * 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 2 * 2 +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 2 2 * +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 2 2 2 +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 1 1 1 * * * * * * * * * * * * * * * * * 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 2 2 2 +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 * * * * 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 * * * * * 2 2 2 2 1 1 1 1 1 1 1 1 2 2 2 +| 2 2 2 2 0 0 0 * * * * * * * * * * * 2 2 2 2 * 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 * 2 2 2 1 1 1 1 1 1 1 1 2 2 2 +| 2 2 2 * * * * 0 0 0 0 0 0 0 0 0 0 0 * 2 2 * 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 2 * 2 2 1 1 1 1 1 1 * * 2 2 2 +| 2 2 * 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 * * 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 2 2 * * * * * * * * 1 1 * 2 2 +| * * 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 2 * 2 +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 2 2 * +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 1 1 1 1 1 1 1 1 * * * * * * * * * * * * 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 2 2 2 +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 * * * * * * * * 1 1 1 1 1 1 1 0 0 0 0 0 * * * * 2 2 2 2 2 1 1 1 1 1 1 1 1 2 2 2 +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 * 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 * 2 2 2 2 1 1 1 1 1 1 1 1 2 2 2 +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 * * 2 2 2 * * 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 * 2 2 2 1 1 1 1 1 1 1 1 2 2 2 +| 2 2 2 * * * * * * * * * * * * * 0 0 * 2 * 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 2 * 2 2 1 1 1 1 * * * * 2 2 2 +| 2 2 * 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 * 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 2 2 * * * * * * 1 1 1 1 * 2 2 +| 2 * 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 2 * 2 +| * 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 2 2 * +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 1 * * * * * * * * * * * * * * * * * * * * * * 0 2 2 2 2 2 1 1 1 1 1 1 1 1 2 2 2 +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 * * 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 * * 2 2 2 2 1 1 1 1 1 1 1 1 2 2 2 +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 * 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 * 2 2 2 1 1 1 1 1 1 1 1 2 2 2 +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 * 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 2 * 2 2 1 1 1 1 1 1 1 1 2 2 2 +| 2 2 2 2 * * * * * * * * * * * * * * 2 2 * 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 2 2 * 2 1 1 * * * * * * 2 2 2 +| 2 2 2 * 0 0 0 0 0 0 0 0 0 0 0 0 0 0 * * 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 2 2 2 * * * 1 1 1 1 1 1 * 2 2 +| 2 2 * 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 2 * 2 +| 2 * 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 1 1 1 1 1 1 1 1 * * * * * * * * * * * * 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 2 2 * +| * 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 * * * * * * * * * 1 1 1 1 1 1 1 0 0 0 0 0 * * * * 2 2 2 2 2 1 1 1 1 1 1 1 1 2 2 2 +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 * 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 * 2 2 2 2 1 1 1 1 1 1 1 1 2 2 2 +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 * 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 * 2 2 2 1 1 1 1 1 1 1 1 2 2 2 +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 * 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 2 * * 2 1 1 1 1 1 1 1 1 2 2 2 +| 2 2 2 2 0 0 0 0 0 0 * * * * * * * * 2 * 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 2 2 2 * * * * * * 1 1 1 2 2 2 +| 2 2 2 * * * * * * * 0 0 0 0 0 0 0 0 * 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 * * * * 2 2 +| 2 2 * 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 2 * 2 +| 2 * 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 1 1 * * * * * * * * * * * * * * * * * * * * * * 2 2 2 2 2 1 1 1 1 1 1 1 1 2 2 * +| * 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 * * * 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 * 2 2 2 2 1 1 1 1 1 1 1 1 2 2 2 +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 * 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 * 2 2 2 1 1 1 1 1 1 1 1 2 2 2 +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 * 2 2 1 1 1 1 1 1 * * * * * * * * * * 0 0 0 0 0 0 0 0 2 2 * 2 2 1 1 1 1 1 1 1 1 2 2 2 +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 * 2 2 2 * * * * * * 1 1 1 1 1 1 1 1 1 0 * 0 0 0 0 0 0 0 2 2 2 * * 1 1 1 1 1 1 1 1 2 2 2 +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 * * * * * * 2 2 2 * 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 * * 0 0 0 0 0 2 2 2 2 2 * * * * * * * * 2 2 2 +| 2 2 2 * * * * * * * * * * * 0 0 0 0 2 2 2 2 * 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 * * * * * * 2 2 2 2 1 1 1 1 1 1 1 1 * 2 2 +| 2 2 * 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 * 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 * 2 2 2 1 1 1 1 1 1 1 1 2 * 2 +| 2 * 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 * 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 2 * 2 * * * * * * * * 1 2 2 * +| _ 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 * 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 2 2 * 2 1 1 1 1 1 1 1 * * 2 2 +| _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 2 _ _ +| diff --git a/examples/stencil/4.out b/examples/stencil/4.out new file mode 100644 index 0000000..33731c4 --- /dev/null +++ b/examples/stencil/4.out @@ -0,0 +1,78 @@ +| 2 0 2 0 2 0 0 0 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 2 2 2 0 2 0 2 0 2 0 0 0 0 2 0 +| 0 2 0 2 0 0 0 0 0 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 2 2 2 2 0 2 0 2 0 0 0 0 0 0 2 +| * * * * * * 0 * * * * 2 * 2 * 2 * 2 * 2 * 2 * * * * * * * * * * 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 * * * 2 2 * * * * * * * * * * * * +| 2 0 2 0 0 0 * 0 0 0 0 * 0 * 0 * 0 * 0 * 0 * 0 2 0 2 0 2 0 2 0 1 * * 1 1 1 1 1 1 1 1 1 1 1 1 * 0 0 2 * * 2 2 0 2 0 0 0 0 0 0 0 0 +| 0 2 0 0 0 0 0 0 0 0 0 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 1 1 1 1 * 1 1 1 1 1 1 1 1 1 1 * 1 0 0 2 2 2 2 2 2 0 0 0 0 0 0 0 0 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 1 1 1 1 1 1 * 1 1 1 1 1 1 1 1 * 1 1 0 0 2 2 2 2 2 2 0 0 0 0 0 0 0 0 0 +| * * * 0 0 0 0 0 0 0 0 0 0 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 1 1 1 1 1 1 1 1 * 1 1 1 1 1 1 * 1 1 1 0 0 2 2 2 2 2 2 0 0 * * * * * * * +| 0 0 0 * * * * * * * * * * * * * * * * * * * * * * * * * * 1 1 1 1 1 1 1 1 * 1 1 1 1 * 1 1 1 * * * * 2 2 2 2 * * * 0 0 0 0 0 0 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 0 2 0 2 0 2 0 2 0 1 1 * 1 1 1 1 1 1 1 1 * 1 1 * 1 1 1 * 1 0 0 2 * 2 2 * 2 0 0 0 0 0 0 0 0 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 0 2 0 2 0 2 0 1 1 1 1 * 1 1 1 1 1 1 1 1 * * 1 1 1 * 1 1 0 0 2 2 * * 2 2 0 0 0 0 0 0 0 0 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 0 2 0 2 0 1 1 1 1 1 1 * 1 1 1 1 1 1 1 1 1 1 1 * 1 1 1 0 0 2 2 2 2 2 2 0 0 0 0 0 0 0 0 0 +| * * * * * 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 0 2 0 2 1 1 1 1 1 1 1 * 1 1 1 1 1 1 1 1 1 * 1 1 1 1 0 0 2 2 2 2 2 2 0 0 0 * * * * * * +| 0 0 0 0 0 * * * * * * * * * * * * * * * * * * * * 1 1 1 1 1 1 1 1 * * 1 1 1 1 1 * * 1 1 1 1 * * * * 2 2 2 2 * * * * 0 0 0 0 0 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 0 2 2 * 1 1 1 1 1 1 1 1 1 * * 1 1 * 1 1 1 1 1 * 1 0 0 2 * 2 2 * 2 0 0 0 0 0 0 0 0 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 2 2 1 * 1 1 1 1 1 1 1 1 1 1 * * 1 1 1 1 1 * 1 1 0 0 2 2 * * 2 2 0 0 0 0 0 0 0 0 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 1 1 * 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 * 1 1 1 0 0 2 2 2 2 2 2 0 0 0 0 0 0 0 0 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 1 1 1 * 1 1 1 1 1 1 1 1 1 1 1 1 1 * 1 1 1 1 0 0 2 2 2 2 2 2 0 0 0 0 0 0 0 0 0 +| * * * * * * * * * * * 0 0 0 0 0 0 0 0 0 0 2 2 2 2 1 1 1 1 * * 1 1 1 1 1 1 1 1 1 1 * 1 1 1 1 1 0 0 2 2 2 2 2 2 * * * * * * * * * +| 0 0 0 0 0 0 0 0 0 0 0 * * * * * * * * * * * 2 2 2 1 1 1 1 1 1 * 1 1 1 1 1 1 1 1 * 1 1 1 1 1 * * * * 2 2 2 2 * 0 0 0 0 0 0 0 0 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 * 2 2 1 1 1 1 1 1 1 * 1 1 1 1 1 * * 1 1 1 1 1 * 1 0 0 2 * 2 2 * 2 0 0 0 0 0 0 0 0 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 * 2 1 1 1 1 1 1 1 1 * * 1 1 * 1 1 1 1 1 1 * 1 1 0 0 2 2 * * 2 2 0 0 0 0 0 0 0 0 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 * 1 1 1 1 1 1 1 1 1 1 * * 1 1 1 1 1 1 * 1 1 1 0 0 2 2 2 2 2 2 0 0 0 0 0 0 0 0 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 * * 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 * 1 1 1 1 0 0 2 2 2 2 2 2 0 0 0 0 0 0 0 0 0 +| * * * * * * * * * 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 1 1 * 1 1 1 1 1 1 1 1 1 1 1 1 1 * 1 1 1 1 1 0 0 2 2 2 2 2 2 * * * * * * * * * +| 0 0 0 0 0 0 0 0 0 * * * * * * * * * * * * * 2 2 2 1 1 1 * 1 1 1 1 1 1 1 1 1 1 1 * 1 1 1 1 1 * * * * 2 2 2 2 * 0 0 0 0 0 0 0 0 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 * 2 2 1 1 1 1 * 1 1 1 1 1 1 1 1 1 * 1 1 1 1 1 * 1 0 0 2 * 2 2 * 2 0 0 0 0 0 0 0 0 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 * 2 1 1 1 1 1 * 1 1 1 1 1 1 1 * 1 1 1 1 1 * 1 1 0 0 2 2 * * 2 2 0 0 0 0 0 0 0 0 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 * 1 1 1 1 1 1 * 1 1 1 1 1 * 1 1 1 1 1 * 1 1 1 0 0 2 2 2 2 2 2 0 0 0 0 0 0 0 0 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 * 1 1 1 1 1 1 * 1 1 * * 1 1 1 1 1 * 1 1 1 1 0 0 2 2 2 2 2 2 0 0 0 0 0 0 0 0 0 +| * * * * * * * 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 1 * * 1 1 1 1 1 * * 1 1 1 1 1 1 * 1 1 1 1 1 0 0 2 2 2 2 2 2 * * * * * * * * * +| 0 0 0 0 0 0 0 * * * * * * * * * * * * * * * 2 2 2 1 1 1 * 1 1 1 1 1 1 1 1 1 1 1 * 1 1 1 1 1 * * * * 2 2 2 2 * 0 0 0 0 0 0 0 0 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 * 2 2 1 1 1 1 * 1 1 1 1 1 1 1 1 1 * 1 1 1 1 1 * 1 0 0 2 * 2 2 * 2 0 0 0 0 0 0 0 0 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 * 2 1 1 1 1 1 * 1 1 1 1 1 1 1 * 1 1 1 1 1 * 1 1 0 0 2 2 * * 2 2 0 0 0 0 0 0 0 0 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 * 1 1 1 1 1 1 * 1 1 1 1 1 * 1 1 1 1 1 * 1 1 1 0 0 2 2 2 2 2 2 0 0 0 0 0 0 0 0 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 * 1 1 1 1 1 1 * 1 1 * * 1 1 1 1 1 * 1 1 1 1 0 0 2 2 2 2 2 2 0 0 0 0 0 0 0 0 0 +| * * * * * * 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 1 * 1 1 1 1 1 1 * * 1 1 1 1 1 1 * 1 1 1 1 1 0 0 2 2 2 2 2 2 0 * * * * * * * * +| 0 0 0 0 0 0 * * * * * * * * * * * * * * * * 2 2 2 1 1 * * 1 1 1 1 1 1 1 1 1 1 1 * 1 1 1 1 1 * * * * 2 2 2 2 * * 0 0 0 0 0 0 0 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 * 2 2 1 1 1 1 * 1 1 1 1 1 1 1 1 1 * 1 1 1 1 1 * 1 0 0 2 * 2 2 * 2 0 0 0 0 0 0 0 0 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 * 2 1 1 1 1 1 * 1 1 1 1 1 1 1 * 1 1 1 1 1 * 1 1 0 0 2 2 * * 2 2 0 0 0 0 0 0 0 0 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 * 1 1 1 1 1 1 * 1 1 1 1 1 * 1 1 1 1 1 * 1 1 1 0 0 2 2 2 2 2 2 0 0 0 0 0 0 0 0 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 * 1 1 1 1 1 1 * 1 1 1 * 1 1 1 1 1 * 1 1 1 1 0 0 2 2 2 2 2 2 0 0 0 0 0 0 0 0 0 +| * * * * * * 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 1 * 1 1 1 1 1 1 * * * 1 1 1 1 1 * 1 1 1 1 1 0 0 2 2 2 2 2 2 0 0 * * * * * * * +| 0 0 0 0 0 0 * * * * * * * * * * * * * * * * 2 2 2 1 1 * 1 1 1 1 1 1 1 1 1 1 1 1 * 1 1 1 1 1 * * * * 2 2 2 2 * * * 0 0 0 0 0 0 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 * 2 2 1 1 1 * * 1 1 1 1 1 1 1 1 1 * 1 1 1 1 1 * 1 0 0 2 * 2 2 * 2 0 0 0 0 0 0 0 0 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 * 2 1 1 1 1 1 * 1 1 1 1 1 1 1 * 1 1 1 1 1 * 1 1 0 0 2 2 * * 2 2 0 0 0 0 0 0 0 0 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 * 1 1 1 1 1 1 * 1 1 1 1 1 * 1 1 1 1 1 * 1 1 1 0 0 2 2 2 2 2 2 0 0 0 0 0 0 0 0 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 * 1 1 1 1 1 1 * 1 1 1 * 1 1 1 1 1 * 1 1 1 1 0 0 2 2 2 2 2 2 0 0 0 0 0 0 0 0 0 +| * * * * 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 1 * 1 1 1 1 1 1 * 1 * 1 1 1 1 1 * 1 1 1 1 1 0 0 2 2 2 2 2 2 0 0 0 * * * * * * +| 0 0 0 0 * * * * * * * * * * * * * * * * * * 2 2 2 1 1 * 1 1 1 1 1 1 * 1 1 1 1 1 * 1 1 1 1 1 * * * * 2 2 2 2 * * * * 0 0 0 0 0 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 * 2 2 1 1 1 * 1 1 1 1 1 1 1 1 1 1 * 1 1 1 1 1 * 1 0 0 2 * 2 2 * 2 0 0 0 0 0 0 0 0 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 * 2 1 1 1 1 * * 1 1 1 1 1 1 1 * 1 1 1 1 1 * 1 1 0 0 2 2 * * 2 2 0 0 0 0 0 0 0 0 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 * 1 1 1 1 1 1 * 1 1 1 1 1 * 1 1 1 1 1 * 1 1 1 0 0 2 2 2 2 2 2 0 0 0 0 0 0 0 0 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 * 1 1 1 1 1 1 * 1 1 1 * 1 1 1 1 1 * 1 1 1 1 0 0 2 2 2 2 2 2 0 0 0 0 0 0 0 0 0 +| * * * * 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 1 * 1 1 1 1 1 1 * 1 * 1 1 1 1 1 * 1 1 1 1 1 0 0 2 2 2 2 2 2 0 0 0 0 * * * * * +| 0 0 0 0 * * * * * * * * * * * * * * * * * * 2 2 2 1 1 * 1 1 1 1 1 1 * 1 1 1 1 1 * 1 1 1 1 1 * * * * 2 2 2 2 * * * * * 0 0 0 0 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 * 2 2 1 1 1 * 1 1 1 1 1 1 1 1 1 1 * 1 1 1 1 1 * 1 0 0 2 * 2 2 * 2 0 0 0 0 0 0 0 0 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 * 2 1 1 1 1 * 1 1 1 1 1 1 1 1 * 1 1 1 1 1 * 1 1 0 0 2 2 * * 2 2 0 0 0 0 0 0 0 0 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 * 1 1 1 1 1 * 1 1 1 1 1 1 * 1 1 1 1 1 * 1 1 1 0 0 2 2 2 2 2 2 0 0 0 0 0 0 0 0 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 * 1 1 1 1 1 * 1 1 1 1 * 1 1 1 1 1 * 1 1 1 1 0 0 2 2 2 2 2 2 0 0 0 0 0 0 0 0 0 +| * * * 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 1 * 1 1 1 1 1 * * 1 * 1 1 1 1 1 * 1 1 1 1 1 0 0 2 2 2 2 2 2 0 0 0 0 * * * * * +| 0 0 0 * * * * * * * * * * * * * * * * * * * 2 2 2 1 1 * 1 1 1 1 1 1 * 1 1 1 1 1 * 1 1 1 1 1 * * * * 2 2 2 2 * * * * * 0 0 0 0 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 * 2 2 1 1 1 * 1 1 1 1 1 1 1 1 1 1 * 1 1 1 1 1 * 1 0 0 2 * 2 2 * 2 0 0 0 0 0 0 0 0 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 * 2 1 1 1 1 * 1 1 1 1 1 1 1 1 * 1 1 1 1 1 * 1 1 0 0 2 2 * * 2 2 0 0 0 0 0 0 0 0 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 * 1 1 1 1 1 * 1 1 1 1 1 1 * 1 1 1 1 1 * 1 1 1 0 0 2 2 2 2 2 2 0 0 0 0 0 0 0 0 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 * 1 1 1 1 1 * 1 1 1 1 * 1 1 1 1 1 * 1 1 1 1 0 0 2 2 2 2 2 2 0 0 0 0 0 0 0 0 0 +| * * 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 1 * 1 1 1 1 1 * 1 1 * 1 1 1 1 1 * 1 1 1 1 1 0 0 2 2 2 2 2 2 0 0 0 0 0 * * * * +| 0 0 * * * * * * * * * * * * * * * * * * * * 2 2 2 1 1 * 1 1 1 1 1 * * 1 1 1 1 1 * 1 1 1 1 1 * * * * 2 2 2 2 * * * * * * 0 0 0 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 * 2 2 1 1 1 * 1 1 1 1 1 1 1 1 1 1 * 1 1 1 1 1 * 1 0 0 2 * 2 2 * 2 0 0 0 0 0 0 0 0 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 * 2 1 1 1 1 * 1 1 1 1 1 1 1 1 * 1 1 1 1 1 * 1 1 0 0 2 2 * * 2 2 0 0 0 0 0 0 0 0 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 * 1 1 1 1 1 * 1 1 1 1 1 1 * 1 1 1 1 1 * 1 1 1 0 0 2 2 2 2 2 2 0 0 0 0 0 0 0 0 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 * 1 1 1 1 1 * 1 1 1 1 * 1 1 1 1 1 * 1 1 1 1 0 0 2 2 2 2 2 2 0 0 0 0 0 0 0 0 0 +| * 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 1 * 1 1 1 1 1 * 1 1 * 1 1 1 1 1 * 1 1 1 1 1 0 0 2 2 2 2 2 2 0 0 0 0 0 0 * * * +| 0 * * * * * * * * * * * * * * * * * * * * * 2 2 2 1 1 * 1 1 1 1 1 * * 1 1 1 1 1 * 1 1 1 1 1 * * * * 2 2 2 2 * * * * * * * 0 0 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 * 2 2 1 1 1 * 1 1 1 1 1 1 1 1 1 1 * 1 1 1 1 1 * 1 0 0 2 * 2 2 * 2 0 0 0 0 0 0 0 0 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 * 2 1 1 1 1 * 1 1 1 1 1 1 1 1 * 1 1 1 1 1 * 1 1 0 0 2 2 * * 2 2 0 0 0 0 0 0 0 0 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 * 1 1 1 1 1 _ _ _ _ _ _ _ _ 1 1 1 1 1 * 1 1 1 0 0 2 2 2 2 2 2 0 0 0 0 0 0 0 0 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 1 1 1 1 0 0 2 2 2 2 2 2 0 0 0 0 0 0 0 0 0 +| diff --git a/examples/stencil/6.out b/examples/stencil/6.out new file mode 100644 index 0000000..c026145 --- /dev/null +++ b/examples/stencil/6.out @@ -0,0 +1,83 @@ +| 2 0 0 0 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 0 0 0 2 0 2 0 2 0 2 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 +| 2 0 0 0 0 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 0 0 0 0 0 2 0 2 0 2 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 +| 2 * * * * * * 2 * 2 * 2 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 1 1 1 1 1 1 1 1 1 1 1 1 +| * 0 0 0 0 0 0 * 0 * 0 * 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 0 0 0 0 0 0 0 2 0 2 0 1 0 1 1 * 1 1 1 1 1 1 1 1 1 1 * +| 2 0 0 0 0 0 0 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 0 0 0 0 0 0 0 0 0 2 0 1 0 1 1 1 1 * 1 1 1 1 1 1 1 1 * 1 +| 2 0 0 0 0 0 0 0 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 1 1 1 1 1 * 1 1 1 1 1 1 * 1 1 +| 2 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 1 1 1 1 1 1 * 1 1 1 1 * 1 1 1 +| * 0 0 0 0 0 0 0 0 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 * 1 1 1 1 1 1 * 1 1 * 1 1 1 * +| 2 0 0 0 0 0 0 0 0 0 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 * 1 1 1 1 1 1 * * 1 1 1 * 1 +| 2 0 0 0 0 0 0 0 0 0 0 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 * 1 1 1 1 1 1 1 1 1 * 1 1 +| 2 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 1 1 1 1 * 1 1 1 1 1 1 1 * 1 1 1 +| * 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 * 1 1 1 1 * 1 1 1 1 1 * 1 1 1 * +| 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 * 1 1 1 1 * 1 1 1 * 1 1 1 * 1 +| 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 * 1 1 1 1 * 1 * 1 1 1 * 1 1 +| 2 0 0 0 * * * * * * * * * * * * 2 * 2 * 2 * * * * * * * * * * * * * * * * * * * * * * * * * * * 1 1 1 * 1 1 1 1 * 1 1 1 * 1 1 1 +| * * * * 0 0 0 0 0 0 0 0 0 0 0 0 * 0 * 0 * 0 2 0 2 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 * 1 1 1 * 1 1 1 1 1 1 * 1 1 1 1 +| 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 0 2 0 2 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 * 1 1 1 * 1 1 1 1 * 1 1 1 * * +| 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 0 2 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 * 1 1 1 * 1 1 * 1 1 1 * 1 1 +| 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 0 2 0 0 0 * * * * * * * * * * * * * * * * * * * * 1 1 1 1 * 1 1 1 * * 1 1 1 * 1 1 1 +| * * * * * * * * * * * * * * * * * * * * * * * * * * * 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 * 1 1 1 1 * 1 1 1 1 1 1 * 1 1 1 1 +| 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 * 1 1 1 1 * 1 1 1 1 * 1 1 1 1 * +| 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 * 1 1 1 1 * 1 1 * 1 1 1 1 * 1 +| 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 * 1 1 1 1 * * 1 1 1 1 * 1 1 +| 2 0 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 1 1 1 * 1 1 1 1 1 1 1 1 * 1 1 1 +| * * 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 * 1 1 1 * 1 1 1 1 1 1 * 1 1 1 1 +| 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 * 1 1 1 * 1 1 1 1 * 1 1 1 1 * +| 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 * 1 1 1 * 1 1 * 1 1 1 * * 1 +| 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 * * * * * * * * * * * * * * * * 1 1 1 1 * 1 1 1 * * 1 1 1 * 1 1 1 +| * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 * 1 1 1 1 * 1 1 1 1 1 1 * 1 1 1 1 +| 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 * 1 1 1 1 * 1 1 1 1 * 1 1 1 1 * +| 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 * 1 1 1 1 * 1 1 * 1 1 1 1 * 1 +| 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 * 1 1 1 1 * * 1 1 1 1 * 1 1 +| 2 0 0 0 0 0 0 0 0 0 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 1 1 1 1 * 1 1 1 1 1 1 1 1 * 1 1 1 +| * * * * * * * * * * 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 * 1 1 1 1 * 1 1 1 1 1 1 * 1 1 1 1 +| 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 * 1 1 1 1 * 1 1 1 1 * 1 1 1 1 * +| 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 * 1 1 1 1 * 1 1 * 1 1 1 1 * 1 +| 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 * * * * * * * * 1 1 1 * * 1 1 1 * * 1 1 1 1 * 1 1 +| * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 0 0 0 0 0 0 0 0 * 1 1 1 1 * 1 1 1 1 1 1 1 * 1 1 1 +| 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 * 1 1 1 1 * 1 1 1 1 * * 1 1 1 * +| 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 * 1 1 1 1 * 1 1 * 1 1 1 1 * 1 +| 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 * 1 1 1 1 * * 1 1 1 1 * 1 1 +| 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 * * * * * * * * * * * * * * * * * * * * * * * * * * * * 1 1 1 1 * 1 1 1 1 1 1 1 1 * 1 1 1 +| * * * * * * * * * * * * * * * * * * * 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 * 1 1 1 1 * 1 1 1 1 1 1 * 1 1 1 1 +| 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 * 1 1 1 1 * 1 1 1 1 * 1 1 1 1 * +| 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 * 1 1 1 1 * 1 1 * 1 1 1 1 * 1 +| 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 * 1 1 1 1 * * 1 1 1 1 * 1 1 +| * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 1 1 1 * 1 1 1 1 1 1 1 1 * 1 1 1 +| 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 * 1 1 1 * 1 1 1 1 1 1 * 1 1 1 * +| 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 * 1 1 1 * 1 1 1 1 * 1 1 1 * 1 +| 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 * 1 1 1 * 1 1 * 1 1 1 * 1 1 +| 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 * * * * * * * * * * * * * * * * * * * * 1 1 1 1 * 1 1 1 * * 1 1 1 * 1 1 1 +| * * * * * * * * * * * * * * * * * * * * * * * * * * * 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 * 1 1 1 1 * 1 1 1 1 1 1 * 1 1 1 1 +| 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 * 1 1 1 1 * 1 1 1 1 * 1 1 1 1 * +| 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 * 1 1 1 1 * 1 1 * 1 1 1 1 * 1 +| 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 * 1 1 1 1 * * 1 1 1 1 * 1 1 +| 2 0 0 0 0 0 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 1 1 1 1 * 1 1 1 1 1 1 1 1 * 1 1 1 +| * * * * * * 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 * 1 1 1 1 * 1 1 1 1 1 1 * 1 1 1 1 +| 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 * * 1 1 1 * 1 1 1 1 * 1 1 1 1 * +| 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 * 1 1 1 * 1 1 * 1 1 1 1 * 1 +| 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 * * * * * * * * * * * * 1 1 1 1 * 1 1 1 * * 1 1 1 1 * 1 1 +| * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 0 0 0 0 0 0 0 0 0 0 0 0 * 1 1 1 1 * 1 1 1 1 1 1 * * 1 1 1 +| 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 * 1 1 1 1 * 1 1 1 1 * 1 1 1 1 * +| 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 * 1 1 1 1 * 1 1 * 1 1 1 1 * 1 +| 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 * 1 1 1 1 * * 1 1 1 1 * 1 1 +| 2 0 0 0 0 0 0 0 0 0 0 0 0 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 1 1 1 1 * 1 1 1 1 1 1 1 1 * 1 1 1 +| * * * * * * * * * * * * * 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 * 1 1 1 1 * 1 1 1 1 1 1 * 1 1 1 1 +| 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 * 1 1 1 1 * 1 1 1 1 * 1 1 1 1 * +| 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 * 1 1 1 1 * 1 1 * 1 1 1 1 * 1 +| 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 * * * * 1 1 1 * 1 1 1 1 * * 1 1 1 1 * 1 1 +| * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 0 0 0 0 * 1 1 1 * 1 1 1 1 1 1 1 1 * 1 1 1 +| 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 * 1 1 1 * * 1 1 1 1 1 * 1 1 1 * +| 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 * 1 1 1 1 * 1 1 1 * 1 1 1 * 1 +| 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 * 1 1 1 1 * * * 1 1 1 * 1 1 +| 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 * * * * * * * * * * * * * * * * * * * * * * * * * 1 1 1 1 * 1 1 1 1 1 1 1 1 * 1 1 1 +| * * * * * * * * * * * * * * * * * * * * * * 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 * 1 1 1 1 * 1 1 1 1 1 1 * 1 1 1 1 +| 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 * 1 1 1 1 * 1 1 1 1 * 1 1 1 1 * +| 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 * 1 1 1 1 * 1 1 * 1 1 1 1 * 1 +| 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 * 1 1 1 1 * * 1 1 1 1 * 1 1 +| 2 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 1 1 1 * 1 1 1 1 1 1 1 1 * 1 1 1 +| * 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 * 1 1 1 * 1 1 1 1 1 1 * 1 1 1 1 +| 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 * 1 1 1 _ _ _ _ _ _ 1 1 1 * * +| 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 _ _ _ _ _ _ _ _ _ _ _ _ 1 1 +| diff --git a/examples/stencil/Makefile.am b/examples/stencil/Makefile.am new file mode 100644 index 0000000..eadc857 --- /dev/null +++ b/examples/stencil/Makefile.am @@ -0,0 +1,152 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +include $(top_srcdir)/make/starpu-tests.mk +include $(top_srcdir)/make/starpu-loader.mk +if STARPU_SIMGRID +LOADER_BIN = $(LAUNCHER) +endif + +AM_CFLAGS += $(APP_CFLAGS) + +AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_srcdir)/examples/ -I$(top_builddir)/include $(STARPU_H_CPPFLAGS) +AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@ +LIBS += $(top_builddir)/src/@LIBSTARPU_LINK@ +LIBS += $(STARPU_OPENCL_LDFLAGS) $(STARPU_CUDA_LDFLAGS) +LIBS += $(STARPU_EXPORTED_LIBS) + +if STARPU_USE_MPI +LIBS += $(top_builddir)/mpi/src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la +AM_CPPFLAGS += -I$(top_srcdir)/mpi/include +LAUNCHER = $(STARPU_MPIEXEC) +LAUNCHER_ENV = $(MPI_RUN_ENV) +if STARPU_SIMGRID +NVCCFLAGS += --compiler-options -fPIC +endif +endif + +CC = $(CC_OR_MPICC) + +##################################### +# What to install and what to check # +##################################### + +check_PROGRAMS = $(STARPU_EXAMPLES) + +if !STARPU_SIMGRID +if STARPU_USE_MPI +if STARPU_MPI_CHECK +TESTS = $(STARPU_EXAMPLES) +endif +else +TESTS = $(STARPU_EXAMPLES) +endif +endif + +################### +# stencil example # +################### +STARPU_EXAMPLES = \ + stencil \ + implicit_stencil + +examplebindir = $(libdir)/starpu/examples/stencil + +examplebin_PROGRAMS = \ + stencil \ + implicit_stencil + +stencil_SOURCES = \ + life.c \ + stencil-kernels.c \ + stencil-tasks.c \ + stencil-blocks.c \ + stencil.c + +noinst_HEADERS = \ + stencil.h \ + implicit-stencil.h \ + shadow.h + +if STARPU_USE_CUDA +stencil_SOURCES += \ + life_cuda.cu \ + shadow.cu +endif + +if STARPU_USE_OPENCL +stencil_SOURCES += \ + life_opencl.c \ + shadow_opencl.c +endif + +implicit_stencil_SOURCES = \ + life.c \ + implicit-stencil-kernels.c \ + implicit-stencil-tasks.c \ + implicit-stencil-blocks.c \ + implicit-stencil.c + +if STARPU_USE_CUDA +implicit_stencil_SOURCES += \ + life_cuda.cu \ + shadow.cu +endif + +if STARPU_USE_OPENCL +implicit_stencil_SOURCES += \ + life_opencl.c \ + shadow_opencl.c +endif + +outs = \ + 0.5.out \ + 0.out \ + 1.out \ + 2.out \ + 3.out \ + 4.out \ + 6.out \ + mpi.out + +EXTRA_DIST = $(outs) results run README + +pics: $(outs:.out=.xpm) + +CLEANFILES = *.gcno *.gcda *.xpm starpu_idle_microsec.log + + +.out.out2: + $(GREP) '^|' $< | tr -d ' ' > $@ + +.out2.xpm: + ( width=$$(expr $$(head -n 1 < $< | wc -c) - 1) ; \ + height=`wc -l < $<` ; \ + echo "/* XPM */" ; \ + echo "static char * test_xpm[] = {" ; \ + echo "\"$$width $$height 9 1\"," ; \ + echo "\"_ c None\"," ; \ + echo "\"0 c #FF0000\"," ; \ + echo "\"1 c #00FF00\"," ; \ + echo "\"2 c #0000FF\"," ; \ + echo "\"3 c #FFFF00\"," ; \ + echo "\"4 c #FF00FF\"," ; \ + echo "\"5 c #00FFFF\"," ; \ + echo "\"| c #FFFFFF\"," ; \ + echo "\"* c #000000\"," ; \ + < $< $(SED) -e 's/^/"/' -e 's/$$/",/' | $(SED) -e '$$s/",$$/"};/' ) > $@ + +view: + feh --zoom 800 -F 0.xpm 0.5.xpm 1.xpm 2.xpm 3.xpm 4.xpm 6.xpm mpi.xpm diff --git a/examples/stencil/Makefile.in b/examples/stencil/Makefile.in new file mode 100644 index 0000000..084c3c1 --- /dev/null +++ b/examples/stencil/Makefile.in @@ -0,0 +1,1673 @@ +# Makefile.in generated by automake 1.16.5 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2021 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +target_triplet = @target@ +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@am__append_1 = --compiler-options -fno-strict-aliasing -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ $(STARPU_NVCC_H_CPPFLAGS) +@STARPU_USE_HIP_TRUE@am__append_2 = -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ +noinst_PROGRAMS = $(am__EXEEXT_2) +# Make tests run through mpiexec +@STARPU_USE_MPI_MASTER_SLAVE_TRUE@am__append_3 = $(abs_top_srcdir)/tools/starpu_msexec +@STARPU_USE_MPI_MASTER_SLAVE_TRUE@am__append_4 = $(MPI_RUN_ENV) STARPU_NMPIMSTHREADS=4 +@STARPU_USE_TCPIP_MASTER_SLAVE_TRUE@am__append_5 = $(abs_top_srcdir)/tools/starpu_msexec +# switch off local socket usage +#MS_LAUNCHER = $(abs_top_builddir)/tools/starpu_tcpipexec -np 2 -nobind -ncpus 1 -nolocal +@STARPU_USE_TCPIP_MASTER_SLAVE_TRUE@am__append_6 = STARPU_RESERVE_NCPU=2 +@STARPU_HAVE_WINDOWS_FALSE@am__append_7 = loader +@STARPU_USE_MPI_TRUE@am__append_8 = $(top_builddir)/mpi/src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la +@STARPU_USE_MPI_TRUE@am__append_9 = -I$(top_srcdir)/mpi/include +@STARPU_SIMGRID_TRUE@@STARPU_USE_MPI_TRUE@am__append_10 = --compiler-options -fPIC +check_PROGRAMS = $(am__EXEEXT_1) +@STARPU_MPI_CHECK_TRUE@@STARPU_SIMGRID_FALSE@@STARPU_USE_MPI_TRUE@TESTS = $(am__EXEEXT_1) +@STARPU_SIMGRID_FALSE@@STARPU_USE_MPI_FALSE@TESTS = $(am__EXEEXT_1) +examplebin_PROGRAMS = stencil$(EXEEXT) implicit_stencil$(EXEEXT) +@STARPU_USE_CUDA_TRUE@am__append_11 = \ +@STARPU_USE_CUDA_TRUE@ life_cuda.cu \ +@STARPU_USE_CUDA_TRUE@ shadow.cu + +@STARPU_USE_OPENCL_TRUE@am__append_12 = \ +@STARPU_USE_OPENCL_TRUE@ life_opencl.c \ +@STARPU_USE_OPENCL_TRUE@ shadow_opencl.c + +@STARPU_USE_CUDA_TRUE@am__append_13 = \ +@STARPU_USE_CUDA_TRUE@ life_cuda.cu \ +@STARPU_USE_CUDA_TRUE@ shadow.cu + +@STARPU_USE_OPENCL_TRUE@am__append_14 = \ +@STARPU_USE_OPENCL_TRUE@ life_opencl.c \ +@STARPU_USE_OPENCL_TRUE@ shadow_opencl.c + +subdir = examples/stencil +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ + $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ + $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ + $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(noinst_HEADERS) \ + $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/src/common/config.h \ + $(top_builddir)/src/common/config-src-build.h \ + $(top_builddir)/include/starpu_config.h \ + $(top_builddir)/starpurm/include/starpurm_config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +am__EXEEXT_1 = stencil$(EXEEXT) implicit_stencil$(EXEEXT) +am__installdirs = "$(DESTDIR)$(examplebindir)" +@STARPU_HAVE_WINDOWS_FALSE@am__EXEEXT_2 = loader$(EXEEXT) +PROGRAMS = $(examplebin_PROGRAMS) $(noinst_PROGRAMS) +am__implicit_stencil_SOURCES_DIST = life.c implicit-stencil-kernels.c \ + implicit-stencil-tasks.c implicit-stencil-blocks.c \ + implicit-stencil.c life_cuda.cu shadow.cu life_opencl.c \ + shadow_opencl.c +@STARPU_USE_CUDA_TRUE@am__objects_1 = life_cuda.$(OBJEXT) \ +@STARPU_USE_CUDA_TRUE@ shadow.$(OBJEXT) +@STARPU_USE_OPENCL_TRUE@am__objects_2 = life_opencl.$(OBJEXT) \ +@STARPU_USE_OPENCL_TRUE@ shadow_opencl.$(OBJEXT) +am_implicit_stencil_OBJECTS = life.$(OBJEXT) \ + implicit-stencil-kernels.$(OBJEXT) \ + implicit-stencil-tasks.$(OBJEXT) \ + implicit-stencil-blocks.$(OBJEXT) implicit-stencil.$(OBJEXT) \ + $(am__objects_1) $(am__objects_2) +implicit_stencil_OBJECTS = $(am_implicit_stencil_OBJECTS) +implicit_stencil_LDADD = $(LDADD) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +loader_SOURCES = loader.c +loader_OBJECTS = loader-loader.$(OBJEXT) +loader_LDADD = $(LDADD) +am__stencil_SOURCES_DIST = life.c stencil-kernels.c stencil-tasks.c \ + stencil-blocks.c stencil.c life_cuda.cu shadow.cu \ + life_opencl.c shadow_opencl.c +am_stencil_OBJECTS = life.$(OBJEXT) stencil-kernels.$(OBJEXT) \ + stencil-tasks.$(OBJEXT) stencil-blocks.$(OBJEXT) \ + stencil.$(OBJEXT) $(am__objects_1) $(am__objects_2) +stencil_OBJECTS = $(am_stencil_OBJECTS) +stencil_LDADD = $(LDADD) +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)/src/common -I$(top_builddir)/include -I$(top_builddir)/starpurm/include +depcomp = $(SHELL) $(top_srcdir)/build-aux/depcomp +am__maybe_remake_depfiles = depfiles +am__depfiles_remade = ./$(DEPDIR)/implicit-stencil-blocks.Po \ + ./$(DEPDIR)/implicit-stencil-kernels.Po \ + ./$(DEPDIR)/implicit-stencil-tasks.Po \ + ./$(DEPDIR)/implicit-stencil.Po ./$(DEPDIR)/life.Po \ + ./$(DEPDIR)/life_opencl.Po ./$(DEPDIR)/loader-loader.Po \ + ./$(DEPDIR)/shadow_opencl.Po ./$(DEPDIR)/stencil-blocks.Po \ + ./$(DEPDIR)/stencil-kernels.Po ./$(DEPDIR)/stencil-tasks.Po \ + ./$(DEPDIR)/stencil.Po +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(implicit_stencil_SOURCES) loader.c $(stencil_SOURCES) +DIST_SOURCES = $(am__implicit_stencil_SOURCES_DIST) loader.c \ + $(am__stencil_SOURCES_DIST) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +HEADERS = $(noinst_HEADERS) +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +am__tty_colors_dummy = \ + mgn= red= grn= lgn= blu= brg= std=; \ + am__color_tests=no +am__tty_colors = { \ + $(am__tty_colors_dummy); \ + if test "X$(AM_COLOR_TESTS)" = Xno; then \ + am__color_tests=no; \ + elif test "X$(AM_COLOR_TESTS)" = Xalways; then \ + am__color_tests=yes; \ + elif test "X$$TERM" != Xdumb && { test -t 1; } 2>/dev/null; then \ + am__color_tests=yes; \ + fi; \ + if test $$am__color_tests = yes; then \ + red=''; \ + grn=''; \ + lgn=''; \ + blu=''; \ + mgn=''; \ + brg=''; \ + std=''; \ + fi; \ +} +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__recheck_rx = ^[ ]*:recheck:[ ]* +am__global_test_result_rx = ^[ ]*:global-test-result:[ ]* +am__copy_in_global_log_rx = ^[ ]*:copy-in-global-log:[ ]* +# A command that, given a newline-separated list of test names on the +# standard input, print the name of the tests that are to be re-run +# upon "make recheck". +am__list_recheck_tests = $(AWK) '{ \ + recheck = 1; \ + while ((rc = (getline line < ($$0 ".trs"))) != 0) \ + { \ + if (rc < 0) \ + { \ + if ((getline line2 < ($$0 ".log")) < 0) \ + recheck = 0; \ + break; \ + } \ + else if (line ~ /$(am__recheck_rx)[nN][Oo]/) \ + { \ + recheck = 0; \ + break; \ + } \ + else if (line ~ /$(am__recheck_rx)[yY][eE][sS]/) \ + { \ + break; \ + } \ + }; \ + if (recheck) \ + print $$0; \ + close ($$0 ".trs"); \ + close ($$0 ".log"); \ +}' +# A command that, given a newline-separated list of test names on the +# standard input, create the global log from their .trs and .log files. +am__create_global_log = $(AWK) ' \ +function fatal(msg) \ +{ \ + print "fatal: making $@: " msg | "cat >&2"; \ + exit 1; \ +} \ +function rst_section(header) \ +{ \ + print header; \ + len = length(header); \ + for (i = 1; i <= len; i = i + 1) \ + printf "="; \ + printf "\n\n"; \ +} \ +{ \ + copy_in_global_log = 1; \ + global_test_result = "RUN"; \ + while ((rc = (getline line < ($$0 ".trs"))) != 0) \ + { \ + if (rc < 0) \ + fatal("failed to read from " $$0 ".trs"); \ + if (line ~ /$(am__global_test_result_rx)/) \ + { \ + sub("$(am__global_test_result_rx)", "", line); \ + sub("[ ]*$$", "", line); \ + global_test_result = line; \ + } \ + else if (line ~ /$(am__copy_in_global_log_rx)[nN][oO]/) \ + copy_in_global_log = 0; \ + }; \ + if (copy_in_global_log) \ + { \ + rst_section(global_test_result ": " $$0); \ + while ((rc = (getline line < ($$0 ".log"))) != 0) \ + { \ + if (rc < 0) \ + fatal("failed to read from " $$0 ".log"); \ + print line; \ + }; \ + printf "\n"; \ + }; \ + close ($$0 ".trs"); \ + close ($$0 ".log"); \ +}' +# Restructured Text title. +am__rst_title = { sed 's/.*/ & /;h;s/./=/g;p;x;s/ *$$//;p;g' && echo; } +# Solaris 10 'make', and several other traditional 'make' implementations, +# pass "-e" to $(SHELL), and POSIX 2008 even requires this. Work around it +# by disabling -e (using the XSI extension "set +e") if it's set. +am__sh_e_setup = case $$- in *e*) set +e;; esac +# Default flags passed to test drivers. +am__common_driver_flags = \ + --color-tests "$$am__color_tests" \ + --enable-hard-errors "$$am__enable_hard_errors" \ + --expect-failure "$$am__expect_failure" +# To be inserted before the command running the test. Creates the +# directory for the log if needed. Stores in $dir the directory +# containing $f, in $tst the test, in $log the log. Executes the +# developer- defined test setup AM_TESTS_ENVIRONMENT (if any), and +# passes TESTS_ENVIRONMENT. Set up options for the wrapper that +# will run the test scripts (or their associated LOG_COMPILER, if +# thy have one). +am__check_pre = \ +$(am__sh_e_setup); \ +$(am__vpath_adj_setup) $(am__vpath_adj) \ +$(am__tty_colors); \ +srcdir=$(srcdir); export srcdir; \ +case "$@" in \ + */*) am__odir=`echo "./$@" | sed 's|/[^/]*$$||'`;; \ + *) am__odir=.;; \ +esac; \ +test "x$$am__odir" = x"." || test -d "$$am__odir" \ + || $(MKDIR_P) "$$am__odir" || exit $$?; \ +if test -f "./$$f"; then dir=./; \ +elif test -f "$$f"; then dir=; \ +else dir="$(srcdir)/"; fi; \ +tst=$$dir$$f; log='$@'; \ +if test -n '$(DISABLE_HARD_ERRORS)'; then \ + am__enable_hard_errors=no; \ +else \ + am__enable_hard_errors=yes; \ +fi; \ +case " $(XFAIL_TESTS) " in \ + *[\ \ ]$$f[\ \ ]* | *[\ \ ]$$dir$$f[\ \ ]*) \ + am__expect_failure=yes;; \ + *) \ + am__expect_failure=no;; \ +esac; \ +$(AM_TESTS_ENVIRONMENT) $(TESTS_ENVIRONMENT) +# A shell command to get the names of the tests scripts with any registered +# extension removed (i.e., equivalently, the names of the test logs, with +# the '.log' extension removed). The result is saved in the shell variable +# '$bases'. This honors runtime overriding of TESTS and TEST_LOGS. Sadly, +# we cannot use something simpler, involving e.g., "$(TEST_LOGS:.log=)", +# since that might cause problem with VPATH rewrites for suffix-less tests. +# See also 'test-harness-vpath-rewrite.sh' and 'test-trs-basic.sh'. +am__set_TESTS_bases = \ + bases='$(TEST_LOGS)'; \ + bases=`for i in $$bases; do echo $$i; done | sed 's/\.log$$//'`; \ + bases=`echo $$bases` +AM_TESTSUITE_SUMMARY_HEADER = ' for $(PACKAGE_STRING)' +RECHECK_LOGS = $(TEST_LOGS) +AM_RECURSIVE_TARGETS = check recheck +TEST_SUITE_LOG = test-suite.log +TEST_EXTENSIONS = @EXEEXT@ .test +LOG_DRIVER = $(SHELL) $(top_srcdir)/build-aux/test-driver +LOG_COMPILE = $(LOG_COMPILER) $(AM_LOG_FLAGS) $(LOG_FLAGS) +am__set_b = \ + case '$@' in \ + */*) \ + case '$*' in \ + */*) b='$*';; \ + *) b=`echo '$@' | sed 's/\.log$$//'`; \ + esac;; \ + *) \ + b='$*';; \ + esac +am__test_logs1 = $(TESTS:=.log) +am__test_logs2 = $(am__test_logs1:@EXEEXT@.log=.log) +TEST_LOGS = $(am__test_logs2:.test.log=.log) +TEST_LOG_DRIVER = $(SHELL) $(top_srcdir)/build-aux/test-driver +TEST_LOG_COMPILE = $(TEST_LOG_COMPILER) $(AM_TEST_LOG_FLAGS) \ + $(TEST_LOG_FLAGS) +am__DIST_COMMON = $(srcdir)/Makefile.in \ + $(top_srcdir)/build-aux/depcomp \ + $(top_srcdir)/build-aux/test-driver \ + $(top_srcdir)/make/starpu-loader.mk \ + $(top_srcdir)/make/starpu-tests.mk \ + $(top_srcdir)/make/starpu.mk README +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +pkglibdir = @pkglibdir@ +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +APP_CFLAGS = @APP_CFLAGS@ +APP_CXXFLAGS = @APP_CXXFLAGS@ +APP_FCFLAGS = @APP_FCFLAGS@ +APP_FFLAGS = @APP_FFLAGS@ +AR = @AR@ +AS = @AS@ +ATLASDIR = @ATLASDIR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BLAS_LIB = @BLAS_LIB@ +BLAS_LIBS = @BLAS_LIBS@ +BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ +BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ +CC = $(CC_OR_MPICC) +CCDEPMODE = @CCDEPMODE@ +CC_OR_MPICC = @CC_OR_MPICC@ +CC_OR_NVCC = @CC_OR_NVCC@ +CFLAGS = @CFLAGS@ +COVERAGE = @COVERAGE@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CSCOPE = @CSCOPE@ +CTAGS = @CTAGS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DGELS_LIBS = @DGELS_LIBS@ +DLB_CFLAGS = @DLB_CFLAGS@ +DLB_LIBS = @DLB_LIBS@ +DLLTOOL = @DLLTOOL@ +DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +ECLIPSE = @ECLIPSE@ +EGREP = @EGREP@ +ETAGS = @ETAGS@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FC = @FC@ +FCFLAGS = @FCFLAGS@ +FFLAGS = @FFLAGS@ +FFTWF_CFLAGS = @FFTWF_CFLAGS@ +FFTWF_LIBS = @FFTWF_LIBS@ +FFTWL_CFLAGS = @FFTWL_CFLAGS@ +FFTWL_LIBS = @FFTWL_LIBS@ +FFTW_CFLAGS = @FFTW_CFLAGS@ +FFTW_LIBS = @FFTW_LIBS@ +FGREP = @FGREP@ +FILECMD = @FILECMD@ +FXTDIR = @FXTDIR@ +FXT_CFLAGS = @FXT_CFLAGS@ +FXT_LDFLAGS = @FXT_LDFLAGS@ +FXT_LIBS = @FXT_LIBS@ +GDB = @GDB@ +GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ +GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ +GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ +GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ +GOTODIR = @GOTODIR@ +GREP = @GREP@ +HAVE_CXX11 = @HAVE_CXX11@ +HAVE_FFTWFL = @HAVE_FFTWFL@ +HELP2MAN = @HELP2MAN@ +HIPCC = @HIPCC@ +HIPCCFLAGS = @HIPCCFLAGS@ $(am__append_2) +HIPCONFIG = @HIPCONFIG@ +HWLOC_CFLAGS = @HWLOC_CFLAGS@ +HWLOC_LIBS = @HWLOC_LIBS@ +HWLOC_REQUIRES = @HWLOC_REQUIRES@ +ICC = @ICC@ +ICC_ARGS = @ICC_ARGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JULIA = @JULIA@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ $(top_builddir)/src/@LIBSTARPU_LINK@ \ + $(STARPU_OPENCL_LDFLAGS) $(STARPU_CUDA_LDFLAGS) \ + $(STARPU_EXPORTED_LIBS) $(am__append_8) +LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ +LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ +LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ +LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ +LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ +LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ +LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ +LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ +LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ +LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ +LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ +LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ +LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ +LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ +LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ +LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ +LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ +LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ +LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ +LIBSTARPU_LINK = @LIBSTARPU_LINK@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAGMA_CFLAGS = @MAGMA_CFLAGS@ +MAGMA_LIBS = @MAGMA_LIBS@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPICC_LDFLAGS = @MPICC_LDFLAGS@ +MPICXX = @MPICXX@ +MPIEXEC = @MPIEXEC@ +MPIEXEC_ARGS = @MPIEXEC_ARGS@ +MPIFORT = @MPIFORT@ +MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ +MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ +NM = @NM@ +NMAD_CFLAGS = @NMAD_CFLAGS@ +NMAD_LIBS = @NMAD_LIBS@ +NMEDIT = @NMEDIT@ +NVCC = @NVCC@ +NVCCFLAGS = @NVCCFLAGS@ $(am__append_1) $(am__append_10) +NVCC_CC = @NVCC_CC@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ +OPENBLAS_LIBS = @OPENBLAS_LIBS@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PAPI_CFLAGS = @PAPI_CFLAGS@ +PAPI_LIBS = @PAPI_LIBS@ +PARALLEL = @PARALLEL@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PKG_CONFIG = @PKG_CONFIG@ +POTI_CFLAGS = @POTI_CFLAGS@ +POTI_LIBS = @POTI_LIBS@ +PROG_CLANG = @PROG_CLANG@ +PROG_DATE = @PROG_DATE@ +PROG_FIND = @PROG_FIND@ +PROG_STAT = @PROG_STAT@ +PYTHON = @PYTHON@ +PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ +PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ +PYTHON_VERSION = @PYTHON_VERSION@ +RANLIB = @RANLIB@ +REALBASH = @REALBASH@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ +SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ +SIMGRID_LIBS = @SIMGRID_LIBS@ +SIMGRID_MC = @SIMGRID_MC@ +SLIC_CONFIG = @SLIC_CONFIG@ +SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ +SOCL_VENDORS = @SOCL_VENDORS@ +STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ +STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ +STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ +STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ +STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ +STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ +STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ +STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ +STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ +STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ +STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ +STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ +STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ +STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ +STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ +STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ +STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ +STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ +STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ +STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ +STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ +STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ +STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ +STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ +STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ +STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ +STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ +STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ +STARPU_LIB_PATH = @STARPU_LIB_PATH@ +STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ +STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ +STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ +STARPU_MS_LIB = @STARPU_MS_LIB@ +STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ +STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ +STARPU_OPENBLAS = @STARPU_OPENBLAS@ +STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ +STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ +STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ +STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ +STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ +STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ +STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ +STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ +STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ +STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ +STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ +STARPU_SRC_DIR = @STARPU_SRC_DIR@ +STARPU_USE_CPU = @STARPU_USE_CPU@ +STARPU_USE_CUDA = @STARPU_USE_CUDA@ +STARPU_USE_FXT = @STARPU_USE_FXT@ +STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ +STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ +STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ +STRIP = @STRIP@ +VERSION = @VERSION@ +XMKMF = @XMKMF@ +X_CFLAGS = @X_CFLAGS@ +X_EXTRA_LIBS = @X_EXTRA_LIBS@ +X_LIBS = @X_LIBS@ +X_PRE_LIBS = @X_PRE_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_ct_F77 = @ac_ct_F77@ +ac_ct_FC = @ac_ct_FC@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +doxygencommand = @doxygencommand@ +dvidir = @dvidir@ +eclipsepath = @eclipsepath@ +epstopdfcommand = @epstopdfcommand@ +exec_prefix = @exec_prefix@ +gitcommand = @gitcommand@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +hwloccalccommand = @hwloccalccommand@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +juliapath = @juliapath@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +mpicc_path = @mpicc_path@ +mpicxx_path = @mpicxx_path@ +mpiexec_path = @mpiexec_path@ +mpifort_path = @mpifort_path@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +pdflatexcommand = @pdflatexcommand@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +runstatedir = @runstatedir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target = @target@ +target_alias = @target_alias@ +target_cpu = @target_cpu@ +target_os = @target_os@ +target_vendor = @target_vendor@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +@STARPU_USE_MPI_TRUE@LAUNCHER_ENV = $(MPI_RUN_ENV) +LAUNCHER_ENV = $(am__append_4) $(am__append_6) +@STARPU_USE_MPI_TRUE@LAUNCHER = $(STARPU_MPIEXEC) +LAUNCHER = $(am__append_3) $(am__append_5) +AM_CFLAGS = $(GLOBAL_AM_CFLAGS) $(APP_CFLAGS) +AM_CXXFLAGS = $(GLOBAL_AM_CXXFLAGS) +AM_FFLAGS = $(GLOBAL_AM_FFLAGS) +AM_FCFLAGS = $(GLOBAL_AM_FCFLAGS) +@STARPU_USE_CUDA_TRUE@V_nvcc_ = $(V_nvcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_USE_CUDA_TRUE@V_nvcc_0 = @echo " NVCC " $@; +@STARPU_USE_CUDA_TRUE@V_nvcc_1 = +@STARPU_USE_CUDA_TRUE@V_nvcc = $(V_nvcc_$(V)) + +# Avoid using nvcc when making a coverity build, nvcc produces millions of +# lines of code which we don't want to analyze. Instead, build dumb .o files +# containing empty functions. +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_ = $(V_mynvcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_0 = @echo " myNVCC " $@; +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_1 = +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc = $(V_mynvcc_$(V)) +@STARPU_USE_HIP_TRUE@V_hipcc_ = $(V_hipcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_USE_HIP_TRUE@V_hipcc_0 = @echo " HIPCC " $@; +@STARPU_USE_HIP_TRUE@V_hipcc_1 = +@STARPU_USE_HIP_TRUE@V_hipcc = $(V_hipcc_$(V)) +V_icc_ = $(V_icc_$(AM_DEFAULT_VERBOSITY)) +V_icc_0 = @echo " ICC " $@; +V_icc_1 = +V_icc = $(V_icc_$(V)) +V_ln_ = $(V_ln_$(AM_DEFAULT_VERBOSITY)) +V_ln_0 = @echo " LN " $@; +V_ln_1 = +V_ln = $(V_ln_$(V)) +V_help2man_ = $(V_help2man_$(AM_DEFAULT_VERBOSITY)) +V_help2man_0 = @echo " HELP2MAN" $@; +V_help2man_1 = +V_help2man = $(V_help2man_$(V)) +# These are always defined, both for starpu-mpi and for mpi-ms +# For MPI tests we don't want to oversubscribe the system +MPI_RUN_ENV = STARPU_WORKERS_GETBIND=0 STARPU_WORKERS_NOBIND=1 STARPU_NCPU=3 +@STARPU_SIMGRID_FALSE@STARPU_MPIEXEC = $(MPIEXEC) $(MPIEXEC_ARGS) -np $(STARPU_MPI_NP) +@STARPU_SIMGRID_TRUE@STARPU_MPIEXEC = $(abs_top_builddir)/tools/starpu_smpirun -np $(STARPU_MPI_NP) -platform $(abs_top_srcdir)/tools/perfmodels/cluster.xml -hostfile $(abs_top_srcdir)/tools/perfmodels/hostfile + +# When GNU parallel is available and -j is passed to make, run tests through +# parallel, using a "starpu" semaphore. +# Also make test shell scripts run its tests through parallel, using a +# "substarpu" semaphore. This brings some overload, but only one level. +@HAVE_PARALLEL_TRUE@STARPU_SUB_PARALLEL = $(shell echo $(MAKEFLAGS) | sed -ne 's/.*-j\([0-9]\+\).*/parallel --semaphore --id substarpu --fg --fg-exit -j \1/p') +@STARPU_USE_MPI_MASTER_SLAVE_TRUE@MS_LAUNCHER = $(STARPU_MPIEXEC) +@STARPU_USE_TCPIP_MASTER_SLAVE_TRUE@MS_LAUNCHER = $(abs_top_builddir)/tools/starpu_tcpipexec -np 2 -nobind -ncpus 1 +@STARPU_HAVE_WINDOWS_FALSE@LOADER_BIN = $(LAUNCHER) $(LOADER) $(EXTERNAL) +@STARPU_HAVE_WINDOWS_TRUE@LOADER_BIN = $(LAUNCHER) $(EXTERNAL) + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +@STARPU_SIMGRID_TRUE@LOADER_BIN = $(LAUNCHER) +@STARPU_HAVE_WINDOWS_FALSE@loader_CPPFLAGS = $(AM_CPPFLAGS) -I$(top_builddir)/src/ +@STARPU_HAVE_AM111_FALSE@TESTS_ENVIRONMENT = $(LAUNCHER_ENV) top_builddir="$(abs_top_builddir)" top_srcdir="$(abs_top_srcdir)" $(LOADER_BIN) +@STARPU_HAVE_AM111_TRUE@TESTS_ENVIRONMENT = $(LAUNCHER_ENV) top_builddir="$(abs_top_builddir)" top_srcdir="$(abs_top_srcdir)" +@STARPU_HAVE_AM111_TRUE@LOG_COMPILER = $(LOADER_BIN) +AM_TESTS_FD_REDIRECT = 9>&2 +AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_srcdir)/examples/ \ + -I$(top_builddir)/include $(STARPU_H_CPPFLAGS) $(am__append_9) +AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@ + +################### +# stencil example # +################### +STARPU_EXAMPLES = \ + stencil \ + implicit_stencil + +examplebindir = $(libdir)/starpu/examples/stencil +stencil_SOURCES = life.c stencil-kernels.c stencil-tasks.c \ + stencil-blocks.c stencil.c $(am__append_11) $(am__append_12) +noinst_HEADERS = \ + stencil.h \ + implicit-stencil.h \ + shadow.h + +implicit_stencil_SOURCES = life.c implicit-stencil-kernels.c \ + implicit-stencil-tasks.c implicit-stencil-blocks.c \ + implicit-stencil.c $(am__append_13) $(am__append_14) +outs = \ + 0.5.out \ + 0.out \ + 1.out \ + 2.out \ + 3.out \ + 4.out \ + 6.out \ + mpi.out + +EXTRA_DIST = $(outs) results run README +CLEANFILES = *.gcno *.gcda *.xpm starpu_idle_microsec.log +all: all-am + +.SUFFIXES: +.SUFFIXES: .c .cu .cubin .hip .lo .log .o .obj .out .out2 .test .test$(EXEEXT) .trs .xpm +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/make/starpu-tests.mk $(top_srcdir)/make/starpu.mk $(top_srcdir)/make/starpu-loader.mk $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign examples/stencil/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign examples/stencil/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; +$(top_srcdir)/make/starpu-tests.mk $(top_srcdir)/make/starpu.mk $(top_srcdir)/make/starpu-loader.mk $(am__empty): + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +clean-checkPROGRAMS: + @list='$(check_PROGRAMS)'; test -n "$$list" || exit 0; \ + echo " rm -f" $$list; \ + rm -f $$list || exit $$?; \ + test -n "$(EXEEXT)" || exit 0; \ + list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ + echo " rm -f" $$list; \ + rm -f $$list +install-examplebinPROGRAMS: $(examplebin_PROGRAMS) + @$(NORMAL_INSTALL) + @list='$(examplebin_PROGRAMS)'; test -n "$(examplebindir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(examplebindir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(examplebindir)" || exit 1; \ + fi; \ + for p in $$list; do echo "$$p $$p"; done | \ + sed 's/$(EXEEXT)$$//' | \ + while read p p1; do if test -f $$p \ + || test -f $$p1 \ + ; then echo "$$p"; echo "$$p"; else :; fi; \ + done | \ + sed -e 'p;s,.*/,,;n;h' \ + -e 's|.*|.|' \ + -e 'p;x;s,.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/' | \ + sed 'N;N;N;s,\n, ,g' | \ + $(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1 } \ + { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \ + if ($$2 == $$4) files[d] = files[d] " " $$1; \ + else { print "f", $$3 "/" $$4, $$1; } } \ + END { for (d in files) print "f", d, files[d] }' | \ + while read type dir files; do \ + if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \ + test -z "$$files" || { \ + echo " $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files '$(DESTDIR)$(examplebindir)$$dir'"; \ + $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files "$(DESTDIR)$(examplebindir)$$dir" || exit $$?; \ + } \ + ; done + +uninstall-examplebinPROGRAMS: + @$(NORMAL_UNINSTALL) + @list='$(examplebin_PROGRAMS)'; test -n "$(examplebindir)" || list=; \ + files=`for p in $$list; do echo "$$p"; done | \ + sed -e 'h;s,^.*/,,;s/$(EXEEXT)$$//;$(transform)' \ + -e 's/$$/$(EXEEXT)/' \ + `; \ + test -n "$$list" || exit 0; \ + echo " ( cd '$(DESTDIR)$(examplebindir)' && rm -f" $$files ")"; \ + cd "$(DESTDIR)$(examplebindir)" && rm -f $$files + +clean-examplebinPROGRAMS: + @list='$(examplebin_PROGRAMS)'; test -n "$$list" || exit 0; \ + echo " rm -f" $$list; \ + rm -f $$list || exit $$?; \ + test -n "$(EXEEXT)" || exit 0; \ + list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ + echo " rm -f" $$list; \ + rm -f $$list + +clean-noinstPROGRAMS: + @list='$(noinst_PROGRAMS)'; test -n "$$list" || exit 0; \ + echo " rm -f" $$list; \ + rm -f $$list || exit $$?; \ + test -n "$(EXEEXT)" || exit 0; \ + list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ + echo " rm -f" $$list; \ + rm -f $$list + +implicit_stencil$(EXEEXT): $(implicit_stencil_OBJECTS) $(implicit_stencil_DEPENDENCIES) $(EXTRA_implicit_stencil_DEPENDENCIES) + @rm -f implicit_stencil$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(implicit_stencil_OBJECTS) $(implicit_stencil_LDADD) $(LIBS) + +loader$(EXEEXT): $(loader_OBJECTS) $(loader_DEPENDENCIES) $(EXTRA_loader_DEPENDENCIES) + @rm -f loader$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(loader_OBJECTS) $(loader_LDADD) $(LIBS) + +stencil$(EXEEXT): $(stencil_OBJECTS) $(stencil_DEPENDENCIES) $(EXTRA_stencil_DEPENDENCIES) + @rm -f stencil$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(stencil_OBJECTS) $(stencil_LDADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/implicit-stencil-blocks.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/implicit-stencil-kernels.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/implicit-stencil-tasks.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/implicit-stencil.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/life.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/life_opencl.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/loader-loader.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/shadow_opencl.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/stencil-blocks.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/stencil-kernels.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/stencil-tasks.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/stencil.Po@am__quote@ # am--include-marker + +$(am__depfiles_remade): + @$(MKDIR_P) $(@D) + @echo '# dummy' >$@-t && $(am__mv) $@-t $@ + +am--depfiles: $(am__depfiles_remade) + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ +@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +loader-loader.o: loader.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(loader_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT loader-loader.o -MD -MP -MF $(DEPDIR)/loader-loader.Tpo -c -o loader-loader.o `test -f 'loader.c' || echo '$(srcdir)/'`loader.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/loader-loader.Tpo $(DEPDIR)/loader-loader.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='loader.c' object='loader-loader.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(loader_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o loader-loader.o `test -f 'loader.c' || echo '$(srcdir)/'`loader.c + +loader-loader.obj: loader.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(loader_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT loader-loader.obj -MD -MP -MF $(DEPDIR)/loader-loader.Tpo -c -o loader-loader.obj `if test -f 'loader.c'; then $(CYGPATH_W) 'loader.c'; else $(CYGPATH_W) '$(srcdir)/loader.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/loader-loader.Tpo $(DEPDIR)/loader-loader.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='loader.c' object='loader-loader.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(loader_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o loader-loader.obj `if test -f 'loader.c'; then $(CYGPATH_W) 'loader.c'; else $(CYGPATH_W) '$(srcdir)/loader.c'; fi` + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-am +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-am + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-am + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +# Recover from deleted '.trs' file; this should ensure that +# "rm -f foo.log; make foo.trs" re-run 'foo.test', and re-create +# both 'foo.log' and 'foo.trs'. Break the recipe in two subshells +# to avoid problems with "make -n". +.log.trs: + rm -f $< $@ + $(MAKE) $(AM_MAKEFLAGS) $< + +# Leading 'am--fnord' is there to ensure the list of targets does not +# expand to empty, as could happen e.g. with make check TESTS=''. +am--fnord $(TEST_LOGS) $(TEST_LOGS:.log=.trs): $(am__force_recheck) +am--force-recheck: + @: + +$(TEST_SUITE_LOG): $(TEST_LOGS) + @$(am__set_TESTS_bases); \ + am__f_ok () { test -f "$$1" && test -r "$$1"; }; \ + redo_bases=`for i in $$bases; do \ + am__f_ok $$i.trs && am__f_ok $$i.log || echo $$i; \ + done`; \ + if test -n "$$redo_bases"; then \ + redo_logs=`for i in $$redo_bases; do echo $$i.log; done`; \ + redo_results=`for i in $$redo_bases; do echo $$i.trs; done`; \ + if $(am__make_dryrun); then :; else \ + rm -f $$redo_logs && rm -f $$redo_results || exit 1; \ + fi; \ + fi; \ + if test -n "$$am__remaking_logs"; then \ + echo "fatal: making $(TEST_SUITE_LOG): possible infinite" \ + "recursion detected" >&2; \ + elif test -n "$$redo_logs"; then \ + am__remaking_logs=yes $(MAKE) $(AM_MAKEFLAGS) $$redo_logs; \ + fi; \ + if $(am__make_dryrun); then :; else \ + st=0; \ + errmsg="fatal: making $(TEST_SUITE_LOG): failed to create"; \ + for i in $$redo_bases; do \ + test -f $$i.trs && test -r $$i.trs \ + || { echo "$$errmsg $$i.trs" >&2; st=1; }; \ + test -f $$i.log && test -r $$i.log \ + || { echo "$$errmsg $$i.log" >&2; st=1; }; \ + done; \ + test $$st -eq 0 || exit 1; \ + fi + @$(am__sh_e_setup); $(am__tty_colors); $(am__set_TESTS_bases); \ + ws='[ ]'; \ + results=`for b in $$bases; do echo $$b.trs; done`; \ + test -n "$$results" || results=/dev/null; \ + all=` grep "^$$ws*:test-result:" $$results | wc -l`; \ + pass=` grep "^$$ws*:test-result:$$ws*PASS" $$results | wc -l`; \ + fail=` grep "^$$ws*:test-result:$$ws*FAIL" $$results | wc -l`; \ + skip=` grep "^$$ws*:test-result:$$ws*SKIP" $$results | wc -l`; \ + xfail=`grep "^$$ws*:test-result:$$ws*XFAIL" $$results | wc -l`; \ + xpass=`grep "^$$ws*:test-result:$$ws*XPASS" $$results | wc -l`; \ + error=`grep "^$$ws*:test-result:$$ws*ERROR" $$results | wc -l`; \ + if test `expr $$fail + $$xpass + $$error` -eq 0; then \ + success=true; \ + else \ + success=false; \ + fi; \ + br='==================='; br=$$br$$br$$br$$br; \ + result_count () \ + { \ + if test x"$$1" = x"--maybe-color"; then \ + maybe_colorize=yes; \ + elif test x"$$1" = x"--no-color"; then \ + maybe_colorize=no; \ + else \ + echo "$@: invalid 'result_count' usage" >&2; exit 4; \ + fi; \ + shift; \ + desc=$$1 count=$$2; \ + if test $$maybe_colorize = yes && test $$count -gt 0; then \ + color_start=$$3 color_end=$$std; \ + else \ + color_start= color_end=; \ + fi; \ + echo "$${color_start}# $$desc $$count$${color_end}"; \ + }; \ + create_testsuite_report () \ + { \ + result_count $$1 "TOTAL:" $$all "$$brg"; \ + result_count $$1 "PASS: " $$pass "$$grn"; \ + result_count $$1 "SKIP: " $$skip "$$blu"; \ + result_count $$1 "XFAIL:" $$xfail "$$lgn"; \ + result_count $$1 "FAIL: " $$fail "$$red"; \ + result_count $$1 "XPASS:" $$xpass "$$red"; \ + result_count $$1 "ERROR:" $$error "$$mgn"; \ + }; \ + { \ + echo "$(PACKAGE_STRING): $(subdir)/$(TEST_SUITE_LOG)" | \ + $(am__rst_title); \ + create_testsuite_report --no-color; \ + echo; \ + echo ".. contents:: :depth: 2"; \ + echo; \ + for b in $$bases; do echo $$b; done \ + | $(am__create_global_log); \ + } >$(TEST_SUITE_LOG).tmp || exit 1; \ + mv $(TEST_SUITE_LOG).tmp $(TEST_SUITE_LOG); \ + if $$success; then \ + col="$$grn"; \ + else \ + col="$$red"; \ + test x"$$VERBOSE" = x || cat $(TEST_SUITE_LOG); \ + fi; \ + echo "$${col}$$br$${std}"; \ + echo "$${col}Testsuite summary"$(AM_TESTSUITE_SUMMARY_HEADER)"$${std}"; \ + echo "$${col}$$br$${std}"; \ + create_testsuite_report --maybe-color; \ + echo "$$col$$br$$std"; \ + if $$success; then :; else \ + echo "$${col}See $(subdir)/$(TEST_SUITE_LOG)$${std}"; \ + if test -n "$(PACKAGE_BUGREPORT)"; then \ + echo "$${col}Please report to $(PACKAGE_BUGREPORT)$${std}"; \ + fi; \ + echo "$$col$$br$$std"; \ + fi; \ + $$success || exit 1 + +check-TESTS: $(check_PROGRAMS) + @list='$(RECHECK_LOGS)'; test -z "$$list" || rm -f $$list + @list='$(RECHECK_LOGS:.log=.trs)'; test -z "$$list" || rm -f $$list + @test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) + @set +e; $(am__set_TESTS_bases); \ + log_list=`for i in $$bases; do echo $$i.log; done`; \ + trs_list=`for i in $$bases; do echo $$i.trs; done`; \ + log_list=`echo $$log_list`; trs_list=`echo $$trs_list`; \ + $(MAKE) $(AM_MAKEFLAGS) $(TEST_SUITE_LOG) TEST_LOGS="$$log_list"; \ + exit $$?; +recheck: all $(check_PROGRAMS) + @test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) + @set +e; $(am__set_TESTS_bases); \ + bases=`for i in $$bases; do echo $$i; done \ + | $(am__list_recheck_tests)` || exit 1; \ + log_list=`for i in $$bases; do echo $$i.log; done`; \ + log_list=`echo $$log_list`; \ + $(MAKE) $(AM_MAKEFLAGS) $(TEST_SUITE_LOG) \ + am__force_recheck=am--force-recheck \ + TEST_LOGS="$$log_list"; \ + exit $$? +stencil.log: stencil$(EXEEXT) + @p='stencil$(EXEEXT)'; \ + b='stencil'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +implicit_stencil.log: implicit_stencil$(EXEEXT) + @p='implicit_stencil$(EXEEXT)'; \ + b='implicit_stencil'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +.test.log: + @p='$<'; \ + $(am__set_b); \ + $(am__check_pre) $(TEST_LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_TEST_LOG_DRIVER_FLAGS) $(TEST_LOG_DRIVER_FLAGS) -- $(TEST_LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +@am__EXEEXT_TRUE@.test$(EXEEXT).log: +@am__EXEEXT_TRUE@ @p='$<'; \ +@am__EXEEXT_TRUE@ $(am__set_b); \ +@am__EXEEXT_TRUE@ $(am__check_pre) $(TEST_LOG_DRIVER) --test-name "$$f" \ +@am__EXEEXT_TRUE@ --log-file $$b.log --trs-file $$b.trs \ +@am__EXEEXT_TRUE@ $(am__common_driver_flags) $(AM_TEST_LOG_DRIVER_FLAGS) $(TEST_LOG_DRIVER_FLAGS) -- $(TEST_LOG_COMPILE) \ +@am__EXEEXT_TRUE@ "$$tst" $(AM_TESTS_FD_REDIRECT) +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am + $(MAKE) $(AM_MAKEFLAGS) $(check_PROGRAMS) + $(MAKE) $(AM_MAKEFLAGS) check-TESTS +check: check-am +all-am: Makefile $(PROGRAMS) $(HEADERS) +installdirs: + for dir in "$(DESTDIR)$(examplebindir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + -test -z "$(TEST_LOGS)" || rm -f $(TEST_LOGS) + -test -z "$(TEST_LOGS:.log=.trs)" || rm -f $(TEST_LOGS:.log=.trs) + -test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) + +clean-generic: + -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-checkPROGRAMS clean-examplebinPROGRAMS clean-generic \ + clean-libtool clean-noinstPROGRAMS mostlyclean-am + +distclean: distclean-am + -rm -f ./$(DEPDIR)/implicit-stencil-blocks.Po + -rm -f ./$(DEPDIR)/implicit-stencil-kernels.Po + -rm -f ./$(DEPDIR)/implicit-stencil-tasks.Po + -rm -f ./$(DEPDIR)/implicit-stencil.Po + -rm -f ./$(DEPDIR)/life.Po + -rm -f ./$(DEPDIR)/life_opencl.Po + -rm -f ./$(DEPDIR)/loader-loader.Po + -rm -f ./$(DEPDIR)/shadow_opencl.Po + -rm -f ./$(DEPDIR)/stencil-blocks.Po + -rm -f ./$(DEPDIR)/stencil-kernels.Po + -rm -f ./$(DEPDIR)/stencil-tasks.Po + -rm -f ./$(DEPDIR)/stencil.Po + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: install-examplebinPROGRAMS + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f ./$(DEPDIR)/implicit-stencil-blocks.Po + -rm -f ./$(DEPDIR)/implicit-stencil-kernels.Po + -rm -f ./$(DEPDIR)/implicit-stencil-tasks.Po + -rm -f ./$(DEPDIR)/implicit-stencil.Po + -rm -f ./$(DEPDIR)/life.Po + -rm -f ./$(DEPDIR)/life_opencl.Po + -rm -f ./$(DEPDIR)/loader-loader.Po + -rm -f ./$(DEPDIR)/shadow_opencl.Po + -rm -f ./$(DEPDIR)/stencil-blocks.Po + -rm -f ./$(DEPDIR)/stencil-kernels.Po + -rm -f ./$(DEPDIR)/stencil-tasks.Po + -rm -f ./$(DEPDIR)/stencil.Po + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-examplebinPROGRAMS + +.MAKE: check-am install-am install-strip + +.PHONY: CTAGS GTAGS TAGS all all-am am--depfiles check check-TESTS \ + check-am clean clean-checkPROGRAMS clean-examplebinPROGRAMS \ + clean-generic clean-libtool clean-noinstPROGRAMS cscopelist-am \ + ctags ctags-am distclean distclean-compile distclean-generic \ + distclean-libtool distclean-tags distdir dvi dvi-am html \ + html-am info info-am install install-am install-data \ + install-data-am install-dvi install-dvi-am \ + install-examplebinPROGRAMS install-exec install-exec-am \ + install-html install-html-am install-info install-info-am \ + install-man install-pdf install-pdf-am install-ps \ + install-ps-am install-strip installcheck installcheck-am \ + installdirs maintainer-clean maintainer-clean-generic \ + mostlyclean mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool pdf pdf-am ps ps-am recheck tags tags-am \ + uninstall uninstall-am uninstall-examplebinPROGRAMS + +.PRECIOUS: Makefile + +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@.cu.o: +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ @$(MKDIR_P) `dirname $@` +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ $(V_mynvcc)grep 'extern *"C" *void *' $< | sed -ne 's/extern *"C" *void *\([a-zA-Z0-9_]*\) *(.*/void \1(void) {}/p' | $(CC) -x c - -o $@ -c + +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.cubin: +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) -cubin $< -o $@ $(NVCCFLAGS) + +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.o: +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) $< -c -o $@ $(NVCCFLAGS) +@STARPU_USE_HIP_TRUE@.hip.o: +@STARPU_USE_HIP_TRUE@ $(V_hipcc) $(HIPCC) $< -c -o $@ $(HIPCCFLAGS) + +STARPU_MPI_NP ?= 4 + +showcheckfailed: + @ for x in $(shell grep -l "^FAIL " $(TEST_LOGS) /dev/null 2>/dev/null) ; do cat $$x ; done + @RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i showcheckfailed || RET=1 ; \ + done ; \ + exit $$RET + +showfailed: + @! grep "^FAIL " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "ERROR: AddressSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "WARNING: AddressSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "ERROR: ThreadSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "WARNING: ThreadSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "ERROR: LeakSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "WARNING: LeakSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l " runtime error: " $(TEST_LOGS) /dev/null 2>/dev/null + @RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -s -C $$i showfailed || RET=1 ; \ + done ; \ + exit $$RET + +showcheck: + -cat $(TEST_LOGS) /dev/null + @! grep -q "ERROR: AddressSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q "WARNING: AddressSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q "ERROR: ThreadSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q "WARNING: ThreadSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q "ERROR: LeakSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q "WARNING: LeakSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q " runtime error: " $(TEST_LOGS) /dev/null + RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i showcheck || RET=1 ; \ + done ; \ + exit $$RET + +showsuite: + -cat $(TEST_SUITE_LOG) /dev/null + @! grep -q "ERROR: AddressSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q "WARNING: AddressSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q "ERROR: ThreadSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q "WARNING: ThreadSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q "ERROR: LeakSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q "WARNING: LeakSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q " runtime error: " $(TEST_SUITE_LOG) /dev/null + RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i showsuite || RET=1 ; \ + done ; \ + exit $$RET + +@STARPU_SIMGRID_TRUE@export STARPU_PERF_MODEL_DIR=$(abs_top_srcdir)/tools/perfmodels/sampling +@STARPU_SIMGRID_TRUE@export STARPU_HOSTNAME=mirage +@STARPU_SIMGRID_TRUE@export MALLOC_PERTURB_=0 + +@STARPU_SIMGRID_TRUE@env: +@STARPU_SIMGRID_TRUE@ @echo export STARPU_PERF_MODEL_DIR=$(STARPU_PERF_MODEL_DIR) +@STARPU_SIMGRID_TRUE@ @echo export STARPU_HOSTNAME=$(STARPU_HOSTNAME) +@STARPU_SIMGRID_TRUE@ @echo export MALLOC_PERTURB_=$(MALLOC_PERTURB_) + +@STARPU_SIMGRID_TRUE@export STARPU_SIMGRID=1 + +@STARPU_QUICK_CHECK_TRUE@export STARPU_QUICK_CHECK=1 + +@STARPU_LONG_CHECK_TRUE@export STARPU_LONG_CHECK=1 + +# +# Test loading goes through a lot of launchers: +# +# - $(LAUNCHER) is called first, to run the test through starpu_msexec, i.e. +# either mpirun or starpu_tcpipexec +# +# - $(LOADER), i.e. tests/loader, is then called to implement timeout, running +# gdb, etc. But if it detects that the test is a .sh script, it just executes +# it +# +# - $(STARPU_CHECK_LAUNCHER) $(STARPU_CHECK_LAUNCHER_ARGS) is called by loader +# to run the program through e.g. valgrind.sh +# +# When the program is a shell script, additionally: +# +# - $(STARPU_SUB_PARALLEL) is called to control parallelism (see below) +# +# - $(MS_LAUNCHER) is called to run the test through starpu_msexec +# +# - $(STARPU_LAUNCH) was set by tests/loader to its own path, to run the program +# through it. +# +# - $(STARPU_CHECK_LAUNCHER) $(STARPU_CHECK_LAUNCHER_ARGS) is called by loader +# + +export LAUNCHER +@HAVE_PARALLEL_TRUE@export STARPU_SUB_PARALLEL + +export MS_LAUNCHER + +LAUNCHER ?= +MS_LAUNCHER ?= +@STARPU_HAVE_WINDOWS_FALSE@LOADER ?= ./loader + +LSAN_OPTIONS ?= suppressions=$(abs_top_srcdir)/tools/dev/lsan/suppressions +TSAN_OPTIONS ?= suppressions=$(abs_top_srcdir)/tools/dev/tsan/starpu.suppr +export LSAN_OPTIONS +export TSAN_OPTIONS + +pics: $(outs:.out=.xpm) + +.out.out2: + $(GREP) '^|' $< | tr -d ' ' > $@ + +.out2.xpm: + ( width=$$(expr $$(head -n 1 < $< | wc -c) - 1) ; \ + height=`wc -l < $<` ; \ + echo "/* XPM */" ; \ + echo "static char * test_xpm[] = {" ; \ + echo "\"$$width $$height 9 1\"," ; \ + echo "\"_ c None\"," ; \ + echo "\"0 c #FF0000\"," ; \ + echo "\"1 c #00FF00\"," ; \ + echo "\"2 c #0000FF\"," ; \ + echo "\"3 c #FFFF00\"," ; \ + echo "\"4 c #FF00FF\"," ; \ + echo "\"5 c #00FFFF\"," ; \ + echo "\"| c #FFFFFF\"," ; \ + echo "\"* c #000000\"," ; \ + < $< $(SED) -e 's/^/"/' -e 's/$$/",/' | $(SED) -e '$$s/",$$/"};/' ) > $@ + +view: + feh --zoom 800 -F 0.xpm 0.5.xpm 1.xpm 2.xpm 3.xpm 4.xpm 6.xpm mpi.xpm + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/examples/stencil/README b/examples/stencil/README new file mode 100644 index 0000000..93550a3 --- /dev/null +++ b/examples/stencil/README @@ -0,0 +1,46 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +This is a sample 3D stencil application (here just using the game of life rules +for simplicity), split on the z axis. + +This is a suggest order of read: + +life.c +life.cu: Heart of the stencil computation: compute a new state from an old one. + +shadow.cu +shadow.h: Perform replication of data on X and Y edges, to fold the domain on +itself through mere replication of the source state. + +stencil.h: Declarations + +stencil-kernels.c: Computation Kernels + +stencil-blocks.c: Manage block and tags allocation + +stencil-tasks.c: Schedule tasks for updates and saves + +stencil.c: Main application + +*.out: various results according to beta value (communication vs computation +penalty ratio), run make pics or make view to get pictures. +mpi.out: results on MPI. + +results: a few results + +You can also use the implicit distributed flavour of this application (e.g. +with communications between processes automatically inferred by StarPU-MPI), +which is called implicit_stencil. diff --git a/examples/stencil/implicit-stencil-blocks.c b/examples/stencil/implicit-stencil-blocks.c new file mode 100644 index 0000000..538d7ee --- /dev/null +++ b/examples/stencil/implicit-stencil-blocks.c @@ -0,0 +1,446 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "implicit-stencil.h" +#include + +/* Manage block and tags allocation */ + +static struct block_description *blocks; +static unsigned sizex, sizey, sizez; +static unsigned nbz; +static unsigned *block_sizes_z; + +/* + * Tags for various codelet completion + */ + +/* + * common tag format: + */ +static starpu_tag_t tag_common(int z, int dir, int type) +{ + return (((((starpu_tag_t)type) << 4) | ((dir+1)/2)) << 32)|(starpu_tag_t)z; +} + +/* Completion of last update tasks */ +starpu_tag_t TAG_FINISH(int z) +{ + z = (z + nbz)%nbz; + + starpu_tag_t tag = tag_common(z, 0, 1); + return tag; +} + +/* Completion of the save codelet for MPI send/recv */ +starpu_tag_t TAG_START(int z, int dir) +{ + z = (z + nbz)%nbz; + + starpu_tag_t tag = tag_common(z, dir, 2); + return tag; +} + +/* + * common MPI tag format: + */ +static int mpi_tag_common(int z, int dir, int layer_or_boundary, int buffer) +{ + return (z<<12) | (layer_or_boundary << 8) | ((((1+dir)/2))<<4) | buffer; +} + +int MPI_TAG_LAYERS(int z, int buffer) +{ + z = (z + nbz)%nbz; + + /* No direction for layers ; layer is 0 */ + int tag = mpi_tag_common(z, 0, 0, buffer); + + return tag; +} + +int MPI_TAG_BOUNDARIES(int z, int dir, int buffer) +{ + z = (z + nbz)%nbz; + + int tag = mpi_tag_common(z, dir, 1, buffer); + + return tag; +} + + +/* + * Block descriptors + */ + +/* Compute the size of the different blocks */ +static void compute_block_sizes(void) +{ + block_sizes_z = (unsigned *) malloc(nbz*sizeof(unsigned)); + STARPU_ASSERT(block_sizes_z); + + /* Perhaps the last chunk is smaller */ + unsigned default_block_size = (sizez+nbz-1)/nbz; + unsigned remaining = sizez; + + unsigned b; + for (b = 0; b < nbz; b++) + { + block_sizes_z[b] = MIN(default_block_size, remaining); + remaining -= block_sizes_z[b]; + } + + STARPU_ASSERT(remaining == 0); +} + +unsigned get_block_size(int bz) +{ + return block_sizes_z[bz]; +} + +struct block_description *get_block_description(int z) +{ + z = (z + nbz)%nbz; + + STARPU_ASSERT(&blocks[z]); + + return &blocks[z]; +} + +int get_block_mpi_node(int z) +{ + z = (z + nbz)%nbz; + return blocks[z].mpi_node; +} + +void create_blocks_array(unsigned _sizex, unsigned _sizey, unsigned _sizez, unsigned _nbz) +{ + /* Store the parameters */ + nbz = _nbz; + sizex = _sizex; + sizey = _sizey; + sizez = _sizez; + + /* Create a grid of block descriptors */ + blocks = (struct block_description *) calloc(nbz, sizeof(struct block_description)); + STARPU_ASSERT(blocks); + + /* What is the size of the different blocks ? */ + compute_block_sizes(); + + unsigned bz; + for (bz = 0; bz < nbz; bz++) + { + struct block_description * block = + get_block_description(bz); + + /* Which block is it ? */ + block->bz = bz; + + /* For simplicity, we store which are the neighbours blocks */ + block->boundary_blocks[B] = get_block_description((bz-1+nbz)%nbz); + block->boundary_blocks[T] = get_block_description((bz+1)%nbz); + } +} + +void free_blocks_array() +{ + free(blocks); + free(block_sizes_z); +} + +/* + * Initialization of the blocks + */ + +void assign_blocks_to_workers(int rank) +{ + unsigned bz; + + /* NB: perhaps we could count a GPU as multiple workers */ + + /* how many workers are there ? */ + /*unsigned nworkers = starpu_worker_get_count();*/ + + /* how many blocks are on that MPI node ? */ +// unsigned nblocks = 0; +// for (bz = 0; bz < nbz; bz++) +// { +// struct block_description *block = +// get_block_description(bz); +// +// if (block->mpi_node == rank) +// nblocks++; +// } + + /* how many blocks per worker ? */ + /*unsigned nblocks_per_worker = (nblocks + nworkers - 1)/nworkers;*/ + + /* we now attribute up to nblocks_per_worker blocks per workers */ + unsigned attributed = 0; + for (bz = 0; bz < nbz; bz++) + { + struct block_description *block = + get_block_description(bz); + + if (block->mpi_node == rank) + { + unsigned workerid; + /* Manage initial block distribution between CPU and GPU */ + #if 0 + #if 1 + /* GPUs then CPUs */ + if (attributed < 3*18) + workerid = attributed / 18; + else + workerid = 3+ (attributed - 3*18) / 2; + #else + /* GPUs interleaved with CPUs */ + if ((attributed % 20) <= 1) + workerid = 3 + attributed / 20; + else if (attributed < 60) + workerid = attributed / 20; + else + workerid = (attributed - 60)/2 + 6; + #endif + #else + /* Only GPUS */ + workerid = (attributed / 21) % 3; + #endif + /*= attributed/nblocks_per_worker;*/ + + block->preferred_worker = workerid; + + attributed++; + } + } +} + + + +void assign_blocks_to_mpi_nodes(int world_size) +{ + unsigned nzblocks_per_process = (nbz + world_size - 1) / world_size; + + unsigned bz; + for (bz = 0; bz < nbz; bz++) + { + struct block_description *block = + get_block_description(bz); + + block->mpi_node = bz / nzblocks_per_process; + } +} + +static size_t allocated = 0; + +static void allocate_block_on_node(starpu_data_handle_t *handleptr, unsigned bz, TYPE **ptr, unsigned nx, unsigned ny, unsigned nz) +{ + int ret; + size_t block_size = nx*ny*nz*sizeof(TYPE); + + /* Allocate memory */ +#if 1 + ret = starpu_malloc_flags((void **)ptr, block_size, STARPU_MALLOC_PINNED|STARPU_MALLOC_SIMULATION_FOLDED); + STARPU_ASSERT(ret == 0); +#else + *ptr = malloc(block_size); + STARPU_ASSERT(*ptr); +#endif + + allocated += block_size; + +//#ifndef STARPU_SIMGRID +// /* Fill the blocks with 0 */ +// memset(*ptr, 0, block_size); +//#endif + + /* Register it to StarPU */ + starpu_block_data_register(handleptr, STARPU_MAIN_RAM, (uintptr_t)*ptr, nx, nx*ny, nx, ny, nz, sizeof(TYPE)); + + starpu_data_set_coordinates(*handleptr, 1, bz); +} + +static void free_block_on_node(starpu_data_handle_t handleptr, unsigned nx, unsigned ny, unsigned nz) +{ + void *ptr = (void *) starpu_block_get_local_ptr(handleptr); + size_t block_size = nx*ny*nz*sizeof(TYPE); + starpu_data_unregister(handleptr); + starpu_free_flags(ptr, block_size, STARPU_MALLOC_PINNED|STARPU_MALLOC_SIMULATION_FOLDED); +} + +void display_memory_consumption(int rank, double time) +{ + FPRINTF(stderr, "%lu B of memory were allocated on node %d in %f ms\n", (unsigned long)allocated, rank, time/1000); +} + +void allocate_memory_on_node(int rank) +{ + unsigned bz; + + /* Correctly allocate and declare all data handles to StarPU. */ + for (bz = 0; bz < nbz; bz++) + { + struct block_description *block = get_block_description(bz); + int node = block->mpi_node; + unsigned size_bz = block_sizes_z[bz]; + + if (node == rank) + { + /* Main blocks */ + allocate_block_on_node(&block->layers_handle[0], bz, &block->layers[0], + (sizex + 2*K), (sizey + 2*K), (size_bz + 2*K)); + allocate_block_on_node(&block->layers_handle[1], bz, &block->layers[1], + (sizex + 2*K), (sizey + 2*K), (size_bz + 2*K)); + + /* Boundary blocks : Top */ + allocate_block_on_node(&block->boundaries_handle[T][0], bz, &block->boundaries[T][0], + (sizex + 2*K), (sizey + 2*K), K); + allocate_block_on_node(&block->boundaries_handle[T][1], bz, &block->boundaries[T][1], + (sizex + 2*K), (sizey + 2*K), K); + + /* Boundary blocks : Bottom */ + allocate_block_on_node(&block->boundaries_handle[B][0], bz, &block->boundaries[B][0], + (sizex + 2*K), (sizey + 2*K), K); + allocate_block_on_node(&block->boundaries_handle[B][1], bz, &block->boundaries[B][1], + (sizex + 2*K), (sizey + 2*K), K); + } + /* Register void blocks to StarPU, that StarPU-MPI will request to + * neighbour nodes if needed for the local computation */ + else + { + /* Main blocks */ + starpu_block_data_register(&block->layers_handle[0], -1, (uintptr_t) NULL, (sizex + 2*K), (sizex + 2*K)*(sizey + 2*K), (sizex + 2*K), (sizey + 2*K), (size_bz + 2*K), sizeof(TYPE)); + starpu_block_data_register(&block->layers_handle[1], -1, (uintptr_t) NULL, (sizex + 2*K), (sizex + 2*K)*(sizey + 2*K), (sizex + 2*K), (sizey + 2*K), (size_bz + 2*K), sizeof(TYPE)); + + /* Boundary blocks : Top */ + starpu_block_data_register(&block->boundaries_handle[T][0], -1, (uintptr_t) NULL, (sizex + 2*K), (sizex + 2*K)*(sizey + 2*K), (sizex + 2*K), (sizey + 2*K), K, sizeof(TYPE)); + starpu_block_data_register(&block->boundaries_handle[T][1], -1, (uintptr_t) NULL, (sizex + 2*K), (sizex + 2*K)*(sizey + 2*K), (sizex + 2*K), (sizey + 2*K), K, sizeof(TYPE)); + + /* Boundary blocks : Bottom */ + starpu_block_data_register(&block->boundaries_handle[B][0], -1, (uintptr_t) NULL, (sizex + 2*K), (sizex + 2*K)*(sizey + 2*K), (sizex + 2*K), (sizey + 2*K), K, sizeof(TYPE)); + starpu_block_data_register(&block->boundaries_handle[B][1], -1, (uintptr_t) NULL, (sizex + 2*K), (sizex + 2*K)*(sizey + 2*K), (sizex + 2*K), (sizey + 2*K), K, sizeof(TYPE)); + } + +#if defined(STARPU_USE_MPI) && !defined(STARPU_USE_MPI_MASTER_SLAVE) + /* Register all data to StarPU-MPI, even the ones that are not + * allocated on the local node. */ + + /* Main blocks */ + starpu_mpi_data_register(block->layers_handle[0], MPI_TAG_LAYERS(bz, 0), node); + starpu_mpi_data_register(block->layers_handle[1], MPI_TAG_LAYERS(bz, 1), node); + + /* Boundary blocks : Top */ + starpu_mpi_data_register(block->boundaries_handle[T][0], MPI_TAG_BOUNDARIES(bz, T, 0), node); + starpu_mpi_data_register(block->boundaries_handle[T][1], MPI_TAG_BOUNDARIES(bz, T, 1), node); + + /* Boundary blocks : Bottom */ + starpu_mpi_data_register(block->boundaries_handle[B][0], MPI_TAG_BOUNDARIES(bz, B, 0), node); + starpu_mpi_data_register(block->boundaries_handle[B][1], MPI_TAG_BOUNDARIES(bz, B, 1), node); +#endif + } + + /* Initialize all the data in parallel */ + for (bz = 0; bz < nbz; bz++) + { + struct block_description *block = get_block_description(bz); + int node = block->mpi_node; + + if (node == rank) + { + /* Set all the data to 0 */ + create_task_memset(sizex, sizey, bz); + + /* Initialize the first layer with some random data */ + create_task_initlayer(sizex, sizey, bz); + } + } + starpu_task_wait_for_all(); +} + +void free_memory_on_node(int rank) +{ + unsigned bz; + for (bz = 0; bz < nbz; bz++) + { + struct block_description *block = get_block_description(bz); + + int node = block->mpi_node; + + /* Main blocks */ + if (node == rank) + { + free_block_on_node(block->layers_handle[0], (sizex + 2*K), (sizey + 2*K), K); + free_block_on_node(block->layers_handle[1], (sizex + 2*K), (sizey + 2*K), K); + } + else + { + starpu_data_unregister(block->layers_handle[0]); + starpu_data_unregister(block->layers_handle[1]); + } + + /* Boundary blocks : Top */ + if (node == rank) + { + free_block_on_node(block->boundaries_handle[T][0], (sizex + 2*K), (sizey + 2*K), K); + free_block_on_node(block->boundaries_handle[T][1], (sizex + 2*K), (sizey + 2*K), K); + } + else + { + starpu_data_unregister(block->boundaries_handle[T][0]); + starpu_data_unregister(block->boundaries_handle[T][1]); + } + + /* Boundary blocks : Bottom */ + if (node == rank) + { + free_block_on_node(block->boundaries_handle[B][0], (sizex + 2*K), (sizey + 2*K), K); + free_block_on_node(block->boundaries_handle[B][1], (sizex + 2*K), (sizey + 2*K), K); + } + else + { + starpu_data_unregister(block->boundaries_handle[B][0]); + starpu_data_unregister(block->boundaries_handle[B][1]); + } + } +} + +/* check how many cells are alive */ +void check(int rank) +{ + unsigned bz; + for (bz = 0; bz < nbz; bz++) + { + struct block_description *block = get_block_description(bz); + + int node = block->mpi_node; + + /* Main blocks */ + if (node == rank) + { + unsigned size_bz = block_sizes_z[bz]; +#ifdef LIFE + unsigned x, y, z; + unsigned sum = 0; + for (x = 0; x < sizex; x++) + for (y = 0; y < sizey; y++) + for (z = 0; z < size_bz; z++) + sum += block->layers[0][(K+x)+(K+y)*(sizex + 2*K)+(K+z)*(sizex+2*K)*(sizey+2*K)]; + printf("block %u got %u/%u alive\n", bz, sum, sizex*sizey*size_bz); +#endif + } + } +} diff --git a/examples/stencil/implicit-stencil-kernels.c b/examples/stencil/implicit-stencil-kernels.c new file mode 100644 index 0000000..c8a10ff --- /dev/null +++ b/examples/stencil/implicit-stencil-kernels.c @@ -0,0 +1,763 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "implicit-stencil.h" + +/* Computation Kernels */ + +/* + * There are three codeletets: + * + * - cl_update, which takes a block and the boundaries of its neighbours, loads + * the boundaries into the block and perform some update loops: + * + * comp. buffer save. buffers comp. buffer save. buffers comp. buffer + * | ... | + * | | +------------------+ +------------------+ + * | #N+1 | | #N+1 bottom copy====>#N+1 bottom copy | + * +-------------+ +------------------+ +------------------+ + * | #N top copy | | #N top copy | | | + * +-------------+ +------------------+ | | + * | #N | + * ... + * | | +----------------+ +----------------------+ + * | | | #N bottom copy | | block #N bottom copy | + * ^ +------------------+ +----------------+ +----------------------+ + * | | #N-1 top copy <====#N-1 top copy | | block #N-1 | + * | +------------------+ +----------------+ | | + * Z ... + * + * - save_cl_top, which take a block and its top boundary, and saves the top of + * the block into the boundary (to be given as bottom of the neighbour above + * this block). + * + * comp. buffer save. buffers comp. buffer save. buffers comp. buffer + * | ... | + * | | +------------------+ +------------------+ + * | #N+1 | | #N+1 bottom copy | | #N+1 bottom copy | + * +-------------+ +------------------+ +------------------+ + * | #N top copy | | #N top copy <==== | + * +-------------+ +------------------+ |..................| + * | #N | + * ... + * | | +----------------+ +----------------------+ + * | | | #N bottom copy | | block #N bottom copy | + * ^ +------------------+ +----------------+ +----------------------+ + * | | #N-1 top copy | | #N-1 top copy | | block #N-1 | + * | +------------------+ +----------------+ | | + * Z ... + * + * - save_cl_bottom, same for the bottom + * comp. buffer save. buffers comp. buffer save. buffers comp. buffer + * | ... | + * | | +------------------+ +------------------+ + * | #N+1 | | #N+1 bottom copy | | #N+1 bottom copy | + * +-------------+ +------------------+ +------------------+ + * | #N top copy | | #N top copy | | | + * +-------------+ +------------------+ | | + * | #N | + * ... + * |..................| +----------------+ +----------------------+ + * | ====>#N bottom copy | | block #N bottom copy | + * ^ +------------------+ +----------------+ +----------------------+ + * | | #N-1 top copy | | #N-1 top copy | | block #N-1 | + * | +------------------+ +----------------+ | | + * Z ... + * + * The idea is that the computation buffers thus don't have to move, only their + * boundaries are copied to buffers that do move (be it CPU/GPU, GPU/GPU or via + * MPI) + * + * For each of the buffers above, there are two (0/1) buffers to make new/old switch costless. + */ + +#if 0 +# define DEBUG(fmt, ...) fprintf(stderr,fmt,##__VA_ARGS__) +#else +# define DEBUG(fmt, ...) (void) 0 +#endif + +/* Record which GPU ran which block, for nice pictures */ +int who_runs_what_len; +int *who_runs_what; +int *who_runs_what_index; +double *last_tick; + +/* Achieved iterations */ +static int achieved_iter; + +/* Record how many updates each worker performed */ +unsigned update_per_worker[STARPU_NMAXWORKERS]; + +static void record_who_runs_what(struct block_description *block) +{ + double now, now2, diff, delta = get_ticks() * 1000; + int workerid = starpu_worker_get_id_check(); + + now = starpu_timing_now(); + now2 = now - start; + diff = now2 - last_tick[block->bz]; + while (diff >= delta) + { + last_tick[block->bz] += delta; + diff = now2 - last_tick[block->bz]; + if (who_runs_what_index[block->bz] < who_runs_what_len) + who_runs_what[block->bz + (who_runs_what_index[block->bz]++) * get_nbz()] = -1; + } + + if (who_runs_what_index[block->bz] < who_runs_what_len) + who_runs_what[block->bz + (who_runs_what_index[block->bz]++) * get_nbz()] = global_workerid(workerid); +} + +static void check_load(struct starpu_block_interface *block, struct starpu_block_interface *boundary) +{ + /* Sanity checks */ + STARPU_ASSERT(block->nx == boundary->nx); + STARPU_ASSERT(block->ny == boundary->ny); + STARPU_ASSERT(boundary->nz == K); + + /* NB: this is not fully guaranteed ... but it's *very* likely and that + * makes our life much simpler */ + STARPU_ASSERT(block->ldy == boundary->ldy); + STARPU_ASSERT(block->ldz == boundary->ldz); +} + +/* + * Load a neighbour's boundary into block, CPU version + */ +static void load_subblock_from_buffer_cpu(void *_block, + void *_boundary, + unsigned firstz) +{ + struct starpu_block_interface *block = (struct starpu_block_interface *)_block; + struct starpu_block_interface *boundary = (struct starpu_block_interface *)_boundary; + check_load(block, boundary); + + /* We do a contiguous memory transfer */ + size_t boundary_size = K*block->ldz*block->elemsize; + + unsigned offset = firstz*block->ldz; + TYPE *block_data = (TYPE *)block->ptr; + TYPE *boundary_data = (TYPE *)boundary->ptr; + memcpy(&block_data[offset], boundary_data, boundary_size); +} + +/* + * Load a neighbour's boundary into block, CUDA version + */ +#ifdef STARPU_USE_CUDA +static void load_subblock_from_buffer_cuda(void *_block, + void *_boundary, + unsigned firstz) +{ + struct starpu_block_interface *block = (struct starpu_block_interface *)_block; + struct starpu_block_interface *boundary = (struct starpu_block_interface *)_boundary; + check_load(block, boundary); + + /* We do a contiguous memory transfer */ + size_t boundary_size = K*block->ldz*block->elemsize; + + unsigned offset = firstz*block->ldz; + TYPE *block_data = (TYPE *)block->ptr; + TYPE *boundary_data = (TYPE *)boundary->ptr; + cudaMemcpyAsync(&block_data[offset], boundary_data, boundary_size, cudaMemcpyDeviceToDevice, starpu_cuda_get_local_stream()); +} + +/* + * cl_update (CUDA version) + */ +static void update_func_cuda(void *descr[], void *arg) +{ + unsigned z; + starpu_codelet_unpack_args(arg, &z); + struct block_description *block = get_block_description(z); + + int workerid = starpu_worker_get_id_check(); + DEBUG("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n"); + if (block->bz == 0) + FPRINTF(stderr,"!!! DO update_func_cuda z %u CUDA%d !!!\n", block->bz, workerid); + else + DEBUG("!!! DO update_func_cuda z %u CUDA%d !!!\n", block->bz, workerid); +#if defined(STARPU_USE_MPI) && !defined(STARPU_SIMGRID) && !defined(STARPU_USE_MPI_MASTER_SLAVE) + int rank = 0; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + DEBUG("!!! RANK %d !!!\n", rank); +#endif + DEBUG("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n"); + + unsigned block_size_z = get_block_size(block->bz); + unsigned i; + update_per_worker[workerid]++; + + record_who_runs_what(block); + + /* + * Load neighbours' boundaries : TOP + */ + + /* The offset along the z axis is (block_size_z + K) */ + load_subblock_from_buffer_cuda(descr[0], descr[2], block_size_z+K); + load_subblock_from_buffer_cuda(descr[1], descr[3], block_size_z+K); + + /* + * Load neighbours' boundaries : BOTTOM + */ + load_subblock_from_buffer_cuda(descr[0], descr[4], 0); + load_subblock_from_buffer_cuda(descr[1], descr[5], 0); + + /* + * Stencils ... do the actual work here :) TODO + */ + + for (i=1; i<=K; i++) + { + struct starpu_block_interface *oldb = descr[i%2], *newb = descr[(i+1)%2]; + TYPE *old = (void*) oldb->ptr, *newer = (void*) newb->ptr; + + /* Shadow data */ + cuda_shadow_host(block->bz, old, oldb->nx, oldb->ny, oldb->nz, oldb->ldy, oldb->ldz, i); + + /* And perform actual computation */ +#ifdef LIFE + cuda_life_update_host(block->bz, old, newer, oldb->nx, oldb->ny, oldb->nz, oldb->ldy, oldb->ldz, i); +#else + cudaMemcpyAsync(newer, old, oldb->nx * oldb->ny * oldb->nz * sizeof(*newer), cudaMemcpyDeviceToDevice, starpu_cuda_get_local_stream()); +#endif /* LIFE */ + } +} +#endif /* STARPU_USE_CUDA */ + +/* + * Load a neighbour's boundary into block, OpenCL version + */ +#ifdef STARPU_USE_OPENCL +static void load_subblock_from_buffer_opencl(struct starpu_block_interface *block, + struct starpu_block_interface *boundary, + unsigned firstz) +{ + check_load(block, boundary); + + /* We do a contiguous memory transfer */ + size_t boundary_size = K*block->ldz*block->elemsize; + + unsigned offset = firstz*block->ldz; + cl_mem block_data = (cl_mem)block->dev_handle; + cl_mem boundary_data = (cl_mem)boundary->dev_handle; + + cl_command_queue cq; + starpu_opencl_get_current_queue(&cq); + cl_int ret = clEnqueueCopyBuffer(cq, boundary_data, block_data, 0, offset, boundary_size, 0, NULL, NULL); + if (ret != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(ret); +} + +/* + * cl_update (OpenCL version) + */ +static void update_func_opencl(void *descr[], void *arg) +{ + unsigned z; + starpu_codelet_unpack_args(arg, &z); + struct block_description *block = get_block_description(z); + + int workerid = starpu_worker_get_id_check(); + DEBUG("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n"); + if (block->bz == 0) + FPRINTF(stderr,"!!! DO update_func_opencl z %u OPENCL%d !!!\n", block->bz, workerid); + else + DEBUG("!!! DO update_func_opencl z %u OPENCL%d !!!\n", block->bz, workerid); +#if defined(STARPU_USE_MPI) && !defined(STARPU_SIMGRID) && !defined(STARPU_USE_MPI_MASTER_SLAVE) + int rank = 0; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + DEBUG("!!! RANK %d !!!\n", rank); +#endif + DEBUG("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n"); + + unsigned block_size_z = get_block_size(block->bz); + unsigned i; + update_per_worker[workerid]++; + + record_who_runs_what(block); + + cl_command_queue cq; + starpu_opencl_get_current_queue(&cq); + + /* + * Load neighbours' boundaries : TOP + */ + + /* The offset along the z axis is (block_size_z + K) */ + load_subblock_from_buffer_opencl(descr[0], descr[2], block_size_z+K); + load_subblock_from_buffer_opencl(descr[1], descr[3], block_size_z+K); + + /* + * Load neighbours' boundaries : BOTTOM + */ + load_subblock_from_buffer_opencl(descr[0], descr[4], 0); + load_subblock_from_buffer_opencl(descr[1], descr[5], 0); + + /* + * Stencils ... do the actual work here :) TODO + */ + + for (i=1; i<=K; i++) + { + struct starpu_block_interface *oldb = descr[i%2], *newb = descr[(i+1)%2]; + TYPE *old = (void*) oldb->dev_handle, *newer = (void*) newb->dev_handle; + + /* Shadow data */ + opencl_shadow_host(block->bz, old, oldb->nx, oldb->ny, oldb->nz, oldb->ldy, oldb->ldz, i); + + /* And perform actual computation */ +#ifdef LIFE + opencl_life_update_host(block->bz, old, newer, oldb->nx, oldb->ny, oldb->nz, oldb->ldy, oldb->ldz, i); +#else + cl_event event; + cl_int ret = clEnqueueCopyBuffer(cq, old, newer, 0, 0, oldb->nx * oldb->ny * oldb->nz * sizeof(*newer), 0, NULL, &event); + if (ret != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(ret); + +#endif /* LIFE */ + } +} +#endif /* STARPU_USE_OPENCL */ + +/* + * cl_update (CPU version) + */ +void update_func_cpu(void *descr[], void *arg) +{ + unsigned zz; + starpu_codelet_unpack_args(arg, &zz); + struct block_description *block = get_block_description(zz); + + int workerid = starpu_worker_get_id_check(); + DEBUG("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n"); + if (block->bz == 0) + DEBUG("!!! DO update_func_cpu z %u worker%d !!!\n", block->bz, workerid); + else + DEBUG("!!! DO update_func_cpu z %u worker%d !!!\n", block->bz, workerid); +#if defined(STARPU_USE_MPI) && !defined(STARPU_SIMGRID) && !defined(STARPU_USE_MPI_MASTER_SLAVE) + int rank = 0; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + DEBUG("!!! RANK %d !!!\n", rank); +#endif + DEBUG("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n"); + + unsigned block_size_z = get_block_size(block->bz); + unsigned i; + update_per_worker[workerid]++; + + record_who_runs_what(block); + + /* + * Load neighbours' boundaries : TOP + */ + + /* The offset along the z axis is (block_size_z + K) */ + load_subblock_from_buffer_cpu(descr[0], descr[2], block_size_z+K); + load_subblock_from_buffer_cpu(descr[1], descr[3], block_size_z+K); + + /* + * Load neighbours' boundaries : BOTTOM + */ + load_subblock_from_buffer_cpu(descr[0], descr[4], 0); + load_subblock_from_buffer_cpu(descr[1], descr[5], 0); + + /* + * Stencils ... do the actual work here :) TODO + */ + + for (i=1; i<=K; i++) + { + struct starpu_block_interface *oldb = (struct starpu_block_interface *) descr[i%2], *newb = (struct starpu_block_interface *) descr[(i+1)%2]; + TYPE *old = (TYPE*) oldb->ptr, *newer = (TYPE*) newb->ptr; + + /* Shadow data */ + unsigned ldy = oldb->ldy, ldz = oldb->ldz; + unsigned nx = oldb->nx, ny = oldb->ny, nz = oldb->nz; + unsigned x, y, z; + unsigned stepx = 1; + unsigned stepy = 1; + unsigned stepz = 1; + unsigned idx = 0; + unsigned idy = 0; + unsigned idz = 0; + TYPE *ptr = old; + +# include "shadow.h" + + /* And perform actual computation */ +#ifdef LIFE + life_update(block->bz, old, newer, oldb->nx, oldb->ny, oldb->nz, oldb->ldy, oldb->ldz, i); +#else + memcpy(newer, old, oldb->nx * oldb->ny * oldb->nz * sizeof(*newer)); +#endif /* LIFE */ + } +} + +/* Performance model and codelet structure */ +static struct starpu_perfmodel cl_update_model = +{ + .type = STARPU_HISTORY_BASED, + .symbol = "cl_update" +}; + +struct starpu_codelet cl_update = +{ + .cpu_funcs = {update_func_cpu}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {update_func_cuda}, + .cuda_flags = {STARPU_CUDA_ASYNC}, +#endif +#ifdef STARPU_USE_OPENCL + .opencl_funcs = {update_func_opencl}, + .opencl_flags = {STARPU_OPENCL_ASYNC}, +#endif + .model = &cl_update_model, + .nbuffers = 6, + .modes = {STARPU_RW, STARPU_RW, STARPU_R, STARPU_R, STARPU_R, STARPU_R} +}; + +/* + * Save the block internal boundaries to give them to our neighbours. + */ + +/* CPU version */ +static void load_subblock_into_buffer_cpu(void *_block, + void *_boundary, + unsigned firstz) +{ + struct starpu_block_interface *block = (struct starpu_block_interface *)_block; + struct starpu_block_interface *boundary = (struct starpu_block_interface *)_boundary; + check_load(block, boundary); + + /* We do a contiguous memory transfer */ + size_t boundary_size = K*block->ldz*block->elemsize; + + unsigned offset = firstz*block->ldz; + TYPE *block_data = (TYPE *)block->ptr; + TYPE *boundary_data = (TYPE *)boundary->ptr; + memcpy(boundary_data, &block_data[offset], boundary_size); +} + +/* CUDA version */ +#ifdef STARPU_USE_CUDA +static void load_subblock_into_buffer_cuda(void *_block, + void *_boundary, + unsigned firstz) +{ + struct starpu_block_interface *block = (struct starpu_block_interface *)_block; + struct starpu_block_interface *boundary = (struct starpu_block_interface *)_boundary; + check_load(block, boundary); + + /* We do a contiguous memory transfer */ + size_t boundary_size = K*block->ldz*block->elemsize; + + unsigned offset = firstz*block->ldz; + TYPE *block_data = (TYPE *)block->ptr; + TYPE *boundary_data = (TYPE *)boundary->ptr; + cudaMemcpyAsync(boundary_data, &block_data[offset], boundary_size, cudaMemcpyDeviceToDevice, starpu_cuda_get_local_stream()); +} +#endif /* STARPU_USE_CUDA */ + +/* OPENCL version */ +#ifdef STARPU_USE_OPENCL +static void load_subblock_into_buffer_opencl(struct starpu_block_interface *block, + struct starpu_block_interface *boundary, + unsigned firstz) +{ + check_load(block, boundary); + + /* We do a contiguous memory transfer */ + size_t boundary_size = K*block->ldz*block->elemsize; + + unsigned offset = firstz*block->ldz; + cl_mem block_data = (cl_mem)block->dev_handle; + cl_mem boundary_data = (cl_mem)boundary->dev_handle; + + cl_command_queue cq; + starpu_opencl_get_current_queue(&cq); + + cl_int ret = clEnqueueCopyBuffer(cq, block_data, boundary_data, offset, 0, boundary_size, 0, NULL, NULL); + if (ret != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(ret); +} +#endif /* STARPU_USE_OPENCL */ + +/* Record how many top/bottom saves each worker performed */ +unsigned top_per_worker[STARPU_NMAXWORKERS]; +unsigned bottom_per_worker[STARPU_NMAXWORKERS]; + +/* top save, CPU version */ +void dummy_func_top_cpu(void *descr[], void *arg) +{ + unsigned z; + starpu_codelet_unpack_args(arg, &z); + struct block_description *block = get_block_description(z); + + int workerid = starpu_worker_get_id_check(); + top_per_worker[workerid]++; + + DEBUG("DO SAVE Bottom block %d\n", block->bz); + + /* The offset along the z axis is (block_size_z + K)- K */ + unsigned block_size_z = get_block_size(block->bz); + + load_subblock_into_buffer_cpu(descr[0], descr[2], block_size_z); + load_subblock_into_buffer_cpu(descr[1], descr[3], block_size_z); +} + +/* bottom save, CPU version */ +void dummy_func_bottom_cpu(void *descr[], void *arg) +{ + unsigned z; + starpu_codelet_unpack_args(arg, &z); + struct block_description *block = get_block_description(z); + STARPU_ASSERT(block); + + int workerid = starpu_worker_get_id_check(); + bottom_per_worker[workerid]++; + + DEBUG("DO SAVE Top block %d\n", block->bz); + + load_subblock_into_buffer_cpu(descr[0], descr[2], K); + load_subblock_into_buffer_cpu(descr[1], descr[3], K); +} + +/* top save, CUDA version */ +#ifdef STARPU_USE_CUDA +static void dummy_func_top_cuda(void *descr[], void *arg) +{ + unsigned z; + starpu_codelet_unpack_args(arg, &z); + struct block_description *block = get_block_description(z); + + int workerid = starpu_worker_get_id_check(); + top_per_worker[workerid]++; + + DEBUG("DO SAVE Top block %d\n", block->bz); + + /* The offset along the z axis is (block_size_z + K)- K */ + unsigned block_size_z = get_block_size(block->bz); + + load_subblock_into_buffer_cuda(descr[0], descr[2], block_size_z); + load_subblock_into_buffer_cuda(descr[1], descr[3], block_size_z); +} + +/* bottom save, CUDA version */ +static void dummy_func_bottom_cuda(void *descr[], void *arg) +{ + unsigned z; + starpu_codelet_unpack_args(arg, &z); + struct block_description *block = get_block_description(z); + (void) block; + + int workerid = starpu_worker_get_id_check(); + bottom_per_worker[workerid]++; + + DEBUG("DO SAVE Bottom block %d on CUDA\n", block->bz); + + load_subblock_into_buffer_cuda(descr[0], descr[2], K); + load_subblock_into_buffer_cuda(descr[1], descr[3], K); +} +#endif /* STARPU_USE_CUDA */ + +/* top save, OpenCL version */ +#ifdef STARPU_USE_OPENCL +static void dummy_func_top_opencl(void *descr[], void *arg) +{ + unsigned z; + starpu_codelet_unpack_args(arg, &z); + struct block_description *block = get_block_description(z); + + int workerid = starpu_worker_get_id_check(); + top_per_worker[workerid]++; + + DEBUG("DO SAVE Top block %d\n", block->bz); + + /* The offset along the z axis is (block_size_z + K)- K */ + unsigned block_size_z = get_block_size(block->bz); + + load_subblock_into_buffer_opencl(descr[0], descr[2], block_size_z); + load_subblock_into_buffer_opencl(descr[1], descr[3], block_size_z); +} + +/* bottom save, OPENCL version */ +static void dummy_func_bottom_opencl(void *descr[], void *arg) +{ + unsigned z; + starpu_codelet_unpack_args(arg, &z); + struct block_description *block = get_block_description(z); + (void) block; + + int workerid = starpu_worker_get_id_check(); + bottom_per_worker[workerid]++; + + DEBUG("DO SAVE Bottom block %d on OPENCL\n", block->bz); + + load_subblock_into_buffer_opencl(descr[0], descr[2], K); + load_subblock_into_buffer_opencl(descr[1], descr[3], K); +} +#endif /* STARPU_USE_OPENCL */ + +/* Performance models and codelet for save */ +static struct starpu_perfmodel save_cl_bottom_model = +{ + .type = STARPU_HISTORY_BASED, + .symbol = "save_cl_bottom" +}; + +static struct starpu_perfmodel save_cl_top_model = +{ + .type = STARPU_HISTORY_BASED, + .symbol = "save_cl_top" +}; + +struct starpu_codelet save_cl_bottom = +{ + .cpu_funcs = {dummy_func_bottom_cpu}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {dummy_func_bottom_cuda}, + .cuda_flags = {STARPU_CUDA_ASYNC}, +#endif +#ifdef STARPU_USE_OPENCL + .opencl_funcs = {dummy_func_bottom_opencl}, + .opencl_flags = {STARPU_OPENCL_ASYNC}, +#endif + .model = &save_cl_bottom_model, + .nbuffers = 4, + .modes = {STARPU_R, STARPU_R, STARPU_W, STARPU_W} +}; + +struct starpu_codelet save_cl_top = +{ + .cpu_funcs = {dummy_func_top_cpu}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {dummy_func_top_cuda}, + .cuda_flags = {STARPU_CUDA_ASYNC}, +#endif +#ifdef STARPU_USE_OPENCL + .opencl_funcs = {dummy_func_top_opencl}, + .opencl_flags = {STARPU_OPENCL_ASYNC}, +#endif + .model = &save_cl_top_model, + .nbuffers = 4, + .modes = {STARPU_R, STARPU_R, STARPU_W, STARPU_W} +}; + +/* Memset a block's buffers */ +void memset_func(void *descr[], void *arg) +{ + (void)descr; + unsigned sizex, sizey, bz; + starpu_codelet_unpack_args(arg, &sizex, &sizey, &bz); + struct block_description *block = get_block_description(bz); + unsigned size_bz = get_block_size(bz); + + unsigned x,y,z; + for (x = 0; x < sizex + 2*K; x++) + { + for (y = 0; y < sizey + 2*K; y++) + { + /* Main blocks */ + for (z = 0; z < size_bz + 2*K; z++) + { + block->layers[0][(x)+(y)*(sizex + 2*K)+(z)*(sizex+2*K)*(sizey+2*K)] = 0; + block->layers[1][(x)+(y)*(sizex + 2*K)+(z)*(sizex+2*K)*(sizey+2*K)] = 0; + } + for (z = 0; z < K; z++) + { + /* Boundary blocks : Top */ + block->boundaries[T][0][(x)+(y)*(sizex + 2*K)+(z)*(sizex+2*K)*(sizey+2*K)] = 0; + block->boundaries[T][1][(x)+(y)*(sizex + 2*K)+(z)*(sizex+2*K)*(sizey+2*K)] = 0; + + /* Boundary blocks : Bottom */ + block->boundaries[B][0][(x)+(y)*(sizex + 2*K)+(z)*(sizex+2*K)*(sizey+2*K)] = 0; + block->boundaries[B][1][(x)+(y)*(sizex + 2*K)+(z)*(sizex+2*K)*(sizey+2*K)] = 0; + } + } + } + //memset(block->layers[0], 0, (sizex + 2*K)*(sizey + 2*K)*(size_bz + 2*K)*sizeof(block->layers[0])); + //memset(block->layers[1], 0, (sizex + 2*K)*(sizey + 2*K)*(size_bz + 2*K)*sizeof(block->layers[1])); + + //memset(block->boundaries[T][0], 0, (sizex + 2*K)*(sizey + 2*K)*K*sizeof(block->boundaries[T][0])); + //memset(block->boundaries[T][1], 0, (sizex + 2*K)*(sizey + 2*K)*K*sizeof(block->boundaries[T][1])); + + //memset(block->boundaries[B][0], 0, (sizex + 2*K)*(sizey + 2*K)*K*sizeof(block->boundaries[B][0])); + //memset(block->boundaries[B][1], 0, (sizex + 2*K)*(sizey + 2*K)*K*sizeof(block->boundaries[B][1])); +} + +static double memset_cost_function(struct starpu_task *task, unsigned nimpl) +{ + (void) task; + (void) nimpl; + return 0.000001; +} + +static struct starpu_perfmodel memset_model = +{ + .type = STARPU_COMMON, + .cost_function = memset_cost_function, + .symbol = "memset" +}; + +struct starpu_codelet cl_memset = +{ + .cpu_funcs = {memset_func}, + .cpu_funcs_name = {"memset_func"}, + .model = &memset_model, + .nbuffers = 6, + .modes = {STARPU_W, STARPU_W, STARPU_W, STARPU_W, STARPU_W, STARPU_W} +}; + +/* Initialize a block's layer */ +static void initlayer_func(void *descr[], void *arg) +{ + (void)descr; + unsigned sizex, sizey, bz; + starpu_codelet_unpack_args(arg, &sizex, &sizey, &bz); + struct block_description *block = get_block_description(bz); + unsigned size_bz = get_block_size(bz); + + /* Initialize layer with some random data */ + unsigned x, y, z; + unsigned sum = 0; + for (x = 0; x < sizex; x++) + for (y = 0; y < sizey; y++) + for (z = 0; z < size_bz; z++) + sum += block->layers[0][(K+x)+(K+y)*(sizex + 2*K)+(K+z)*(sizex+2*K)*(sizey+2*K)] = (int)((x/7.+y/13.+(bz*size_bz + z)/17.) * 10.) % 2; +} + +static double initlayer_cost_function(struct starpu_task *task, unsigned nimpl) +{ + (void) task; + (void) nimpl; + return 0.000001; +} + +static struct starpu_perfmodel initlayer_model = +{ + .type = STARPU_COMMON, + .cost_function = initlayer_cost_function, + .symbol = "initlayer" +}; + +struct starpu_codelet cl_initlayer = +{ + .cpu_funcs = {initlayer_func}, + .model = &initlayer_model, + .nbuffers = 1, + .modes = {STARPU_W} +}; + diff --git a/examples/stencil/implicit-stencil-tasks.c b/examples/stencil/implicit-stencil-tasks.c new file mode 100644 index 0000000..83aa404 --- /dev/null +++ b/examples/stencil/implicit-stencil-tasks.c @@ -0,0 +1,198 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "implicit-stencil.h" + +#define BIND_LAST 1 + +/* + * Schedule tasks for updates and saves + */ + +/* + * NB: iter = 0: initialization phase, TAG_U(z, 0) = TAG_INIT + * + * dir is -1 or +1. + */ + +#if 0 +# define DEBUG(fmt, ...) fprintf(stderr,fmt,##__VA_ARGS__) +#else +# define DEBUG(fmt, ...) +#endif + +#if defined(STARPU_USE_MPI) && !defined(STARPU_USE_MPI_MASTER_SLAVE) +#include +#undef starpu_task_insert +#define starpu_task_insert(...) starpu_mpi_task_insert(MPI_COMM_WORLD, __VA_ARGS__) +#endif + +/* + * Schedule initialization tasks + */ + +void create_task_memset(unsigned sizex, unsigned sizey, unsigned z) +{ + struct block_description *descr = get_block_description(z); + + int ret = starpu_task_insert(&cl_memset, + STARPU_VALUE, &sizex, sizeof(unsigned), + STARPU_VALUE, &sizey, sizeof(unsigned), + STARPU_VALUE, &z, sizeof(unsigned), + STARPU_W, descr->layers_handle[0], + STARPU_W, descr->layers_handle[1], + STARPU_W, descr->boundaries_handle[T][0], + STARPU_W, descr->boundaries_handle[T][1], + STARPU_W, descr->boundaries_handle[B][0], + STARPU_W, descr->boundaries_handle[B][1], + 0); + + if (ret) + { + FPRINTF(stderr, "Could not submit task memset: %d\n", ret); + if (ret == -ENODEV) + exit(77); + STARPU_ABORT(); + } +} + +void create_task_initlayer(unsigned sizex, unsigned sizey, unsigned z) +{ + struct block_description *descr = get_block_description(z); + + int ret = starpu_task_insert(&cl_initlayer, + STARPU_VALUE, &sizex, sizeof(unsigned), + STARPU_VALUE, &sizey, sizeof(unsigned), + STARPU_VALUE, &z, sizeof(unsigned), + STARPU_W, descr->layers_handle[0], + 0); + + if (ret) + { + FPRINTF(stderr, "Could not submit task initlayer: %d\n", ret); + if (ret == -ENODEV) + exit(77); + STARPU_ABORT(); + } +} + +/* + * Schedule saving boundaries of blocks to communication buffers + */ + +static void create_task_save_local(unsigned z, int dir) +{ + struct block_description *descr = get_block_description(z); + struct starpu_codelet *codelet; + int ret; + + codelet = (dir == -1)?&save_cl_bottom:&save_cl_top; + ret = starpu_task_insert(codelet, + STARPU_VALUE, &z, sizeof(unsigned), + STARPU_R, descr->layers_handle[0], + STARPU_R, descr->layers_handle[1], + STARPU_W, descr->boundaries_handle[(1-dir)/2][0], + STARPU_W, descr->boundaries_handle[(1-dir)/2][1], + STARPU_PRIORITY, STARPU_MAX_PRIO, + 0); + + if (ret) + { + FPRINTF(stderr, "Could not submit task save: %d\n", ret); + if (ret == -ENODEV) + exit(77); + STARPU_ABORT(); + } +} + +/* + * Schedule update computation in computation buffer + */ + +void create_task_update(unsigned iter, unsigned z, int local_rank) +{ + STARPU_ASSERT(iter != 0); + + unsigned old_layer = (K*(iter-1)) % 2; + unsigned new_layer = (old_layer + 1) % 2; + + struct block_description *descr = get_block_description(z); + struct block_description *bottom_neighbour = descr->boundary_blocks[B]; + struct block_description *top_neighbour = descr->boundary_blocks[T]; + + struct starpu_codelet *codelet = &cl_update; + + // Simple-level prio + //int prio = ((bottom_neighbour->mpi_node != local_rank) || (top_neighbour->mpi_node != local_rank)) ? STARPU_MAX_PRIO : STARPU_DEFAULT_PRIO; + + // Two-level prio + int prio = ((bottom_neighbour->mpi_node != local_rank) || (top_neighbour->mpi_node != local_rank)) ? STARPU_MAX_PRIO : + ((bottom_neighbour->boundary_blocks[B]->mpi_node != local_rank) || (top_neighbour->boundary_blocks[T]->mpi_node != local_rank)) ? STARPU_MAX_PRIO-1 : STARPU_DEFAULT_PRIO; + + int ret = starpu_task_insert(codelet, + STARPU_VALUE, &z, sizeof(unsigned), + STARPU_RW, descr->layers_handle[old_layer], + STARPU_RW, descr->layers_handle[new_layer], + STARPU_R, bottom_neighbour->boundaries_handle[T][old_layer], + STARPU_R, bottom_neighbour->boundaries_handle[T][new_layer], + STARPU_R, top_neighbour->boundaries_handle[B][old_layer], + STARPU_R, top_neighbour->boundaries_handle[B][new_layer], + STARPU_PRIORITY, prio, + 0); + if (ret) + { + FPRINTF(stderr, "Could not submit task update block: %d\n", ret); + if (ret == -ENODEV) + exit(77); + STARPU_ABORT(); + } +} + +/* + * Create all the tasks + */ +void create_tasks(int rank) +{ + int iter; + int bz; + int niter = get_niter(); + int nbz = get_nbz(); + + for (iter = 0; iter <= niter; iter++) + { + for (bz = 0; bz < nbz; bz++) + { + if ((iter > 0) && ((get_block_mpi_node(bz) == rank)|| (get_block_mpi_node(bz+1) == rank)|| (get_block_mpi_node(bz-1) == rank))) + create_task_update(iter, bz, rank); + } + + for (bz = 0; bz < nbz; bz++) + { + if (iter != niter) + { + int node_z = get_block_mpi_node(bz); + int node_z_and_b = get_block_mpi_node(bz-1); + int node_z_and_t = get_block_mpi_node(bz+1); + + if ((node_z == rank) || ((node_z != node_z_and_b) && (node_z_and_b == rank))) + create_task_save_local(bz, +1); + + if ((node_z == rank) || ((node_z != node_z_and_t) && (node_z_and_t == rank))) + create_task_save_local(bz, -1); + } + } + } +} diff --git a/examples/stencil/implicit-stencil.c b/examples/stencil/implicit-stencil.c new file mode 100644 index 0000000..02877a6 --- /dev/null +++ b/examples/stencil/implicit-stencil.c @@ -0,0 +1,398 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "implicit-stencil.h" + +#ifdef STARPU_HAVE_VALGRIND_H +#include +#endif + +/* Main application */ + +/* default parameter values */ +static unsigned bind_tasks = 0; + +static unsigned ticks = 1000; + +#ifdef STARPU_QUICK_CHECK +static unsigned niter = 4; +#define SIZE 16 +#define NBZ 8 +#else +static unsigned niter = 32; +#define SIZE 128 +#define NBZ 64 +#endif + +/* Problem size */ +static unsigned sizex = SIZE; +static unsigned sizey = SIZE; +static unsigned sizez = NBZ*SIZE; + +/* Number of blocks (scattered over the different MPI processes) */ +unsigned nbz = NBZ; + +double start; +double begin, end; +double timing; + +/* + * Initialization + */ + +unsigned get_bind_tasks(void) +{ + return bind_tasks; +} + +unsigned get_nbz(void) +{ + return nbz; +} + +unsigned get_niter(void) +{ + return niter; +} + +unsigned get_ticks(void) +{ + return ticks; +} + +static void parse_args(int argc, char **argv) +{ + int i; + for (i = 1; i < argc; i++) + { + if (strcmp(argv[i], "-b") == 0) + { + bind_tasks = 1; + } + + if (strcmp(argv[i], "-nbz") == 0) + { + nbz = atoi(argv[++i]); + } + + if (strcmp(argv[i], "-sizex") == 0) + { + sizex = atoi(argv[++i]); + } + + if (strcmp(argv[i], "-sizey") == 0) + { + sizey = atoi(argv[++i]); + } + + if (strcmp(argv[i], "-sizez") == 0) + { + sizez = atoi(argv[++i]); + } + + if (strcmp(argv[i], "-niter") == 0) + { + niter = atoi(argv[++i]); + } + + if (strcmp(argv[i], "-ticks") == 0) + { + ticks = atoi(argv[++i]); + } + + if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i], "--help") == 0) + { + fprintf(stderr, "Usage : %s [options...]\n", argv[0]); + fprintf(stderr, "\n"); + fprintf(stderr, "Options:\n"); + fprintf(stderr, "-b bind tasks on CPUs/GPUs\n"); + fprintf(stderr, "-nbz Number of blocks on Z axis (%u by default)\n", nbz); + fprintf(stderr, "-size[xyz] Domain size on x/y/z axis (%ux%ux%u by default)\n", sizex, sizey, sizez); + fprintf(stderr, "-niter Number of iterations (%u by default)\n", niter); + fprintf(stderr, "-ticks How often to put ticks in the output (ms, %u by default)\n", ticks); + exit(0); + } + } + +#ifdef STARPU_HAVE_VALGRIND_H + if (RUNNING_ON_VALGRIND) + { + sizex = sizey = 3; + nbz = 10; + sizez = nbz*3; + } +#endif +} + +static void init_problem(int argc, char **argv, int rank, int world_size) +{ + parse_args(argc, argv); + + create_blocks_array(sizex, sizey, sizez, nbz); + + /* Select the MPI process which should compute the different blocks */ + assign_blocks_to_mpi_nodes(world_size); + + assign_blocks_to_workers(rank); + + /* Allocate the different memory blocks, if used by the MPI process */ + start = starpu_timing_now(); + + allocate_memory_on_node(rank); + + end = starpu_timing_now(); + timing = end - begin; + + display_memory_consumption(rank, timing); + + who_runs_what_len = 2*niter; + who_runs_what = (int *) calloc(nbz * who_runs_what_len, sizeof(*who_runs_what)); + who_runs_what_index = (int *) calloc(nbz, sizeof(*who_runs_what_index)); + last_tick = (double *) calloc(nbz, sizeof(*last_tick)); +} + +static void free_problem(int rank) +{ + free_memory_on_node(rank); + free_blocks_array(); + free(who_runs_what); + free(who_runs_what_index); + free(last_tick); +} + +/* + * Main body + */ + +void func(unsigned task_per_worker[STARPU_NMAXWORKERS]) +{ + unsigned total = 0; + int worker; + + for (worker = 0; worker < STARPU_NMAXWORKERS; worker++) + total += task_per_worker[worker]; + for (worker = 0; worker < STARPU_NMAXWORKERS; worker++) + { + if (task_per_worker[worker]) + { + char name[64]; + starpu_worker_get_name(worker, name, sizeof(name)); + FPRINTF(stderr,"\t%s -> %u (%2.2f%%)\n", name, task_per_worker[worker], (100.0*task_per_worker[worker])/total); + } + } +} + +unsigned global_workerid(unsigned local_workerid) +{ +#if defined(STARPU_USE_MPI) && !defined(STARPU_USE_MPI_MASTER_SLAVE) + int rank; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + unsigned workers_per_node = starpu_worker_get_count(); + + return (local_workerid + rank*workers_per_node); +#else + return local_workerid; +#endif +} + +int main(int argc, char **argv) +{ + int rank; + int world_size; + int ret; + +#if defined(STARPU_USE_MPI) && !defined(STARPU_SIMGRID) && !defined(STARPU_USE_MPI_MASTER_SLAVE) + int thread_support; + if (MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &thread_support)) + { + FPRINTF(stderr, "MPI_Init_thread failed\n"); + } + if (thread_support == MPI_THREAD_FUNNELED) + FPRINTF(stderr,"Warning: MPI only has funneled thread support, not serialized, hoping this will work\n"); + if (thread_support < MPI_THREAD_FUNNELED) + FPRINTF(stderr,"Warning: MPI does not have thread support!\n"); + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &world_size); +#else + rank = 0; + world_size = 1; +#endif + + if (rank == 0) + { + FPRINTF(stderr, "Running on %d nodes\n", world_size); + fflush(stderr); + } + + struct starpu_conf conf; + starpu_conf_init(&conf); + + /*nbz is a global variable, this example doesn't support Master-Slave*/ + conf.nmpi_ms = 0; + conf.ntcpip_ms = 0; + + ret = starpu_init(&conf); + if (ret == -ENODEV) return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + +#if defined(STARPU_USE_MPI) && !defined(STARPU_SIMGRID) && !defined(STARPU_USE_MPI_MASTER_SLAVE) + ret = starpu_mpi_init(NULL, NULL, 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); +#endif + +#ifdef STARPU_USE_OPENCL + opencl_life_init(); + opencl_shadow_init(); +#endif /*STARPU_USE_OPENCL*/ + + init_problem(argc, argv, rank, world_size); + +#if defined(STARPU_USE_MPI) && !defined(STARPU_SIMGRID) && !defined(STARPU_USE_MPI_MASTER_SLAVE) + int barrier_ret = MPI_Barrier(MPI_COMM_WORLD); + STARPU_ASSERT(barrier_ret == MPI_SUCCESS); +#endif + if (rank == 0) + FPRINTF(stderr, "GO !\n"); + + start = starpu_timing_now(); + + begin = starpu_timing_now(); + + create_tasks(rank); + + //starpu_tag_notify_from_apps(TAG_INIT_TASK); + + //wait_end_tasks(rank); + + starpu_task_wait_for_all(); + + end = starpu_timing_now(); + +#if defined(STARPU_USE_MPI) && !defined(STARPU_SIMGRID) && !defined(STARPU_USE_MPI_MASTER_SLAVE) + barrier_ret = MPI_Barrier(MPI_COMM_WORLD); + STARPU_ASSERT(barrier_ret == MPI_SUCCESS); +#endif + +#if 0 + check(rank); +#endif + + /*display_debug(nbz, niter, rank);*/ + + /* timing in us */ + timing = end - begin; + + double min_timing = timing; + double max_timing = timing; + double sum_timing = timing; + +#if defined(STARPU_USE_MPI) && !defined(STARPU_SIMGRID) && !defined(STARPU_USE_MPI_MASTER_SLAVE) + int reduce_ret; + + reduce_ret = MPI_Reduce(&timing, &min_timing, 1, MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD); + STARPU_ASSERT(reduce_ret == MPI_SUCCESS); + + reduce_ret = MPI_Reduce(&timing, &max_timing, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); + STARPU_ASSERT(reduce_ret == MPI_SUCCESS); + + reduce_ret = MPI_Reduce(&timing, &sum_timing, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); + STARPU_ASSERT(reduce_ret == MPI_SUCCESS); + + /* XXX we should do a gather instead, here we assume that non initialized values are still 0 */ + int *who_runs_what_tmp = malloc(nbz * who_runs_what_len * sizeof(*who_runs_what)); + reduce_ret = MPI_Reduce(who_runs_what, who_runs_what_tmp, nbz * who_runs_what_len, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); + STARPU_ASSERT(reduce_ret == MPI_SUCCESS); + + memcpy(who_runs_what, who_runs_what_tmp, nbz * who_runs_what_len * sizeof(*who_runs_what)); + free(who_runs_what_tmp); + + /* XXX we should do a gather instead, here we assume that non initialized values are still 0 */ + int *who_runs_what_index_tmp = malloc(nbz * sizeof(*who_runs_what_index)); + reduce_ret = MPI_Reduce(who_runs_what_index, who_runs_what_index_tmp, nbz, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); + STARPU_ASSERT(reduce_ret == MPI_SUCCESS); + + memcpy(who_runs_what_index, who_runs_what_index_tmp, nbz * sizeof(*who_runs_what_index)); + free(who_runs_what_index_tmp); +#endif + + if (rank == 0) + { +#if 1 + FPRINTF(stderr, "update:\n"); + func(update_per_worker); + FPRINTF(stderr, "top:\n"); + func(top_per_worker); + FPRINTF(stderr, "bottom:\n"); + func(bottom_per_worker); +#endif +#if 1 + unsigned nzblocks_per_process = (nbz + world_size - 1) / world_size; + + int iter; + for (iter = 0; iter < who_runs_what_len; iter++) + { + unsigned last, bz; + last = 1; + for (bz = 0; bz < nbz; bz++) + { + if ((bz % nzblocks_per_process) == 0) + FPRINTF(stderr, "| "); + + if (who_runs_what_index[bz] <= iter) + FPRINTF(stderr,"_ "); + else + { + last = 0; + if (who_runs_what[bz + iter * nbz] == -1) + FPRINTF(stderr,"* "); + else + FPRINTF(stderr, "%d ", who_runs_what[bz + iter * nbz]); + } + } + FPRINTF(stderr, "\n"); + + if (last) + break; + } +#endif + + fflush(stderr); + + FPRINTF(stdout, "Computation took: %f ms on %d MPI processes\n", max_timing/1000, world_size); + FPRINTF(stdout, "\tMIN : %f ms\n", min_timing/1000); + FPRINTF(stdout, "\tMAX : %f ms\n", max_timing/1000); + FPRINTF(stdout, "\tAVG : %f ms\n", sum_timing/(world_size*1000)); + } + + free_problem(rank); + +#if defined(STARPU_USE_MPI) && !defined(STARPU_SIMGRID) && !defined(STARPU_USE_MPI_MASTER_SLAVE) + starpu_mpi_shutdown(); +#endif + + starpu_shutdown(); + +#if defined(STARPU_USE_MPI) && !defined(STARPU_SIMGRID) && !defined(STARPU_USE_MPI_MASTER_SLAVE) + MPI_Finalize(); +#endif + +#ifdef STARPU_USE_OPENCL + opencl_life_free(); + opencl_shadow_free(); +#endif /*STARPU_USE_OPENCL*/ + + return 0; +} diff --git a/examples/stencil/implicit-stencil.h b/examples/stencil/implicit-stencil.h new file mode 100644 index 0000000..5354a79 --- /dev/null +++ b/examples/stencil/implicit-stencil.h @@ -0,0 +1,153 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __IMPLICIT_STENCIL_H__ +#define __IMPLICIT_STENCIL_H__ + +#include +#include +#include + +#ifndef __CUDACC__ +#if defined(STARPU_USE_MPI) && !defined(STARPU_USE_MPI_MASTER_SLAVE) +#include +#include +#endif +#endif + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +#define LIFE + +#ifdef LIFE +#define TYPE unsigned char +extern void life_update(int bz, const TYPE *old, TYPE *newp, int nx, int ny, int nz, int ldy, int ldz, int iter); +#else +#define TYPE float +#endif + +#define K 1 + +#define NDIRS 2 + +/* Split only on the z axis to make things simple */ +typedef enum +{ + B = 0, + T = 1 +} direction; + +/* Description of a domain block */ +struct block_description +{ + /* Which MPI node should process that block ? */ + int mpi_node; + + unsigned preferred_worker; + + unsigned bz; + + + /* For each of the following buffers, there are two (0/1) buffers to + * make new/old switch costless. */ + + /* This is the computation buffer for this block, it includes + * neighbours' border to make computation easier */ + TYPE *layers[2]; + starpu_data_handle_t layers_handle[2]; + + /* This is the "save" buffer, i.e. a copy of our neighbour's border. + * This one is used for CPU/GPU or MPI communication (rather than the + * whole domain block) */ + TYPE *boundaries[NDIRS][2]; + starpu_data_handle_t boundaries_handle[NDIRS][2]; + + /* Shortcut pointer to the neighbours */ + struct block_description *boundary_blocks[NDIRS]; +}; + +#define TAG_INIT_TASK ((starpu_tag_t)1) + +starpu_tag_t TAG_FINISH(int z); +starpu_tag_t TAG_START(int z, int dir); +int MPI_TAG0(int z, int iter, int dir); +int MPI_TAG1(int z, int iter, int dir); + +#define MIN(a,b) ((a)<(b)?(a):(b)) + +void create_blocks_array(unsigned sizex, unsigned sizey, unsigned sizez, unsigned nbz); +void free_blocks_array(); +struct block_description *get_block_description(int z); +void assign_blocks_to_mpi_nodes(int world_size); +void allocate_memory_on_node(int rank); +void assign_blocks_to_workers(int rank); +void create_tasks(int rank); +void wait_end_tasks(int rank); +void check(int rank); +void free_memory_on_node(int rank); + +void display_memory_consumption(int rank, double time); + +int get_block_mpi_node(int z); +unsigned get_block_size(int z); +unsigned get_bind_tasks(void); + +unsigned get_nbz(void); +unsigned get_niter(void); +unsigned get_ticks(void); + +unsigned global_workerid(unsigned local_workerid); + +void create_task_memset(unsigned sizex, unsigned sizey, unsigned z); +void create_task_initlayer(unsigned sizex, unsigned sizey, unsigned z); +void create_task_update(unsigned iter, unsigned z, int local_rank); +void create_task_save(unsigned iter, unsigned z, int dir, int local_rank); + +extern int starpu_mpi_initialize(void); +extern int starpu_mpi_shutdown(void); + +/* kernels */ +extern struct starpu_codelet cl_update; +extern struct starpu_codelet save_cl_bottom; +extern struct starpu_codelet save_cl_top; +extern struct starpu_codelet cl_memset; +extern struct starpu_codelet cl_initlayer; + +extern unsigned update_per_worker[STARPU_NMAXWORKERS]; +extern unsigned top_per_worker[STARPU_NMAXWORKERS]; +extern unsigned bottom_per_worker[STARPU_NMAXWORKERS]; + +extern double start; +extern int who_runs_what_len; +extern int *who_runs_what; +extern int *who_runs_what_index; +extern double *last_tick; + +#ifndef _externC +#define _externC +#endif + +_externC void cuda_life_update_host(int bz, const TYPE *old, TYPE *newp, int nx, int ny, int nz, int ldy, int ldz, int iter); +_externC void cuda_shadow_host(int bz, TYPE *ptr, int nx, int ny, int nz, int ldy, int ldz, int i); + +_externC void opencl_shadow_init(void); +_externC void opencl_shadow_free(void); +_externC void opencl_shadow_host(int bz, TYPE *ptr, int nx, int ny, int nz, int ldy, int ldz, int i); +_externC void opencl_life_init(void); +_externC void opencl_life_free(void); +_externC void opencl_life_update_host(int bz, const TYPE *old, TYPE *newp, int nx, int ny, int nz, int ldy, int ldz, int iter); + +#endif /* __IMPLICIT_STENCIL_H__ */ diff --git a/examples/stencil/life.c b/examples/stencil/life.c new file mode 100644 index 0000000..16a18b6 --- /dev/null +++ b/examples/stencil/life.c @@ -0,0 +1,48 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "stencil.h" + +/* Heart of the stencil computation: compute a new state from an old one. */ + +void life_update(int bz, const TYPE *old, TYPE *newp, int nx, int ny, int nz, int ldy, int ldz, int iter) +{ + (void)bz; + int x, y, z, num, alive; + + for (z = iter; z < nz - iter; z++) + { + for (y = K; y < ny - K; y++) + { + for (x = K; x < nx - K; x++) + { + num = 0 + + old[x+(y+1)*ldy+(z+0)*ldz] + + old[x+(y+1)*ldy+(z+1)*ldz] + + old[x+(y+0)*ldy+(z+1)*ldz] + + old[x+(y-1)*ldy+(z+1)*ldz] + + old[x+(y-1)*ldy+(z+0)*ldz] + + old[x+(y-1)*ldy+(z-1)*ldz] + + old[x+(y+0)*ldy+(z-1)*ldz] + + old[x+(y+1)*ldy+(z-1)*ldz] + ; + alive = old[x+y*ldy+z*ldz]; + alive = (alive && num == 2) || num == 3; + newp[x+y*ldy+z*ldz] = alive; + } + } + } +} diff --git a/examples/stencil/life_cuda.cu b/examples/stencil/life_cuda.cu new file mode 100644 index 0000000..3302b8b --- /dev/null +++ b/examples/stencil/life_cuda.cu @@ -0,0 +1,78 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#define _externC extern "C" +#include "stencil.h" + +/* Heart of the stencil computation: compute a new state from an old one. */ + +extern "C" __global__ void cuda_life_update(int bz, const TYPE *old, TYPE *newp, int nx, int ny, int nz, int ldy, int ldz, int iter) +{ + unsigned idx = threadIdx.x + blockIdx.x * blockDim.x; + unsigned idy = threadIdx.y + blockIdx.y * blockDim.y; + //unsigned idz = threadIdx.z + blockIdx.z * blockDim.z; + unsigned idz = 0; + unsigned stepx = blockDim.x * gridDim.x; + unsigned stepy = blockDim.y * gridDim.y; + //unsigned stepz = blockDim.z * gridDim.z; + unsigned stepz = 1; + unsigned x, y, z; + unsigned num, alive; + + for (z = iter + idz; z < nz - iter; z += stepz) + for (y = K + idy; y < ny - K; y += stepy) + { + for (x = K + idx; x < nx - K; x += stepx) + { + unsigned index = x + y*ldy + z*ldz; + num = 0 + + old[index+1*ldy+0*ldz] + + old[index+1*ldy+1*ldz] + + old[index+0*ldy+1*ldz] + + old[index-1*ldy+1*ldz] + + old[index-1*ldy+0*ldz] + + old[index-1*ldy-1*ldz] + + old[index+0*ldy-1*ldz] + + old[index+1*ldy-1*ldz] + ; + alive = old[index]; + alive = (alive && num == 2) || num == 3; + newp[index] = alive; + } + } +} + +extern "C" void cuda_life_update_host(int bz, const TYPE *old, TYPE *newp, int nx, int ny, int nz, int ldy, int ldz, int iter) +{ + unsigned max_parallelism = 512; + unsigned threads_per_dim_x = max_parallelism; + while (threads_per_dim_x / 2 >= nx) + threads_per_dim_x /= 2; + unsigned threads_per_dim_y = max_parallelism / threads_per_dim_x; + while (threads_per_dim_y / 2 >= ny) + threads_per_dim_y /= 2; +#if 0 + unsigned threads_per_dim_z = 4; + dim3 dimBlock(threads_per_dim_x, threads_per_dim_y, threads_per_dim_z); + dim3 dimGrid(nx / threads_per_dim_x, ny / threads_per_dim_y, nz / threads_per_dim_z); +#else + dim3 dimBlock(threads_per_dim_x, threads_per_dim_y); + dim3 dimGrid((nx + threads_per_dim_x-1) / threads_per_dim_x, (ny + threads_per_dim_y-1) / threads_per_dim_y); +#endif + cuda_life_update <<>> (bz, old, newp, nx, ny, nz, ldy, ldz, iter); + cudaError_t status = cudaGetLastError(); + if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status); +} diff --git a/examples/stencil/life_opencl.c b/examples/stencil/life_opencl.c new file mode 100644 index 0000000..e7fb245 --- /dev/null +++ b/examples/stencil/life_opencl.c @@ -0,0 +1,116 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* Heart of the stencil computation: compute a new state from an old one. */ + +/* #define _externC extern "C" */ + +#include +#define CL_TARGET_OPENCL_VERSION 100 +#ifdef __APPLE__ +#include +#else +#include +#endif +#include + +#define str(x) #x + +#define clsrc(t,k) "__kernel void\n\ +#define TYPE " str(t) "\n\ +#define K " str(k) "\n\ +life_update(int bz, __global const TYPE *old, __global TYPE *newp, int nx, int ny, int nz, int ldy, int ldz, int iter)\n\ +{\n \ + unsigned idx = get_global_id(0);\n \ + unsigned idy = get_global_id(1);\n \ + //unsigned idz = threadIdx.z + blockIdx.z * blockDim.z;\n \ + unsigned idz = 0;\n \ + unsigned stepx = get_global_size(0);\n \ + unsigned stepy = get_global_size(1);\n \ + //unsigned stepz = blockDim.z * gridDim.z;\n \ + unsigned stepz = 1;\n \ + unsigned x, y, z;\n \ + unsigned num, alive;\n \ + \n \ + for (z = iter + idz; z < nz - iter; z += stepz)\n \ + for (y = K + idy; y < ny - K; y += stepy) \n \ + {\n \ + for (x = K + idx; x < nx - K; x += stepx) \ + {\n \ + unsigned index = x + y*ldy + z*ldz;\n \ + num = 0\n \ + + old[index+1*ldy+0*ldz]\n \ + + old[index+1*ldy+1*ldz]\n \ + + old[index+0*ldy+1*ldz]\n \ + + old[index-1*ldy+1*ldz]\n \ + + old[index-1*ldy+0*ldz]\n \ + + old[index-1*ldy-1*ldz]\n \ + + old[index+0*ldy-1*ldz]\n \ + + old[index+1*ldy-1*ldz]\n \ + ;\n \ + alive = old[index];\n \ + alive = (alive && num == 2) || num == 3;\n \ + newp[index] = alive;\n \ + }\n \ + }\n \ +}" + +static const char * src = clsrc(TYPE,K); +static struct starpu_opencl_program program; + +void opencl_life_init(void) +{ + starpu_opencl_load_opencl_from_string(src, &program, NULL); +} + +void opencl_life_free(void) +{ + int ret = starpu_opencl_unload_opencl(&program); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl"); +} + +void opencl_life_update_host(int bz, const TYPE *old, TYPE *newp, int nx, int ny, int nz, int ldy, int ldz, int iter) +{ +#if 0 + size_t dim[] = {nx, ny, nz}; +#else + size_t dim[] = {nx, ny, 1}; +#endif + + int devid,id; + cl_int err; + + id = starpu_worker_get_id_check(); + devid = starpu_worker_get_devid(id); + + cl_kernel kernel; + cl_command_queue cq; + err = starpu_opencl_load_kernel(&kernel, &cq, &program, "life_update", devid); + if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); + + clSetKernelArg(kernel, 0, sizeof(bz), &bz); + clSetKernelArg(kernel, 1, sizeof(old), &old); + clSetKernelArg(kernel, 2, sizeof(newp), &newp); + clSetKernelArg(kernel, 3, sizeof(nx), &nx); + clSetKernelArg(kernel, 4, sizeof(ny), &ny); + clSetKernelArg(kernel, 5, sizeof(nz), &nz); + clSetKernelArg(kernel, 6, sizeof(ldy), &ldy); + clSetKernelArg(kernel, 7, sizeof(ldz), &ldz); + clSetKernelArg(kernel, 8, sizeof(iter), &iter); + + err = clEnqueueNDRangeKernel(cq, kernel, 3, NULL, dim, NULL, 0, NULL, NULL); + if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); +} diff --git a/examples/stencil/loader.c b/examples/stencil/loader.c new file mode 100644 index 0000000..804797d --- /dev/null +++ b/examples/stencil/loader.c @@ -0,0 +1,505 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if defined(_WIN32) && !defined(__MINGW32__) && !defined(__CYGWIN__) +#include +#else +#include +#endif + +#ifdef STARPU_QUICK_CHECK +/* Quick checks are supposed to be real quick, typically less than 1s each, sometimes 10s + add some extra times for tests which run with all schedulers +*/ +#define DEFAULT_TIMEOUT 100 +#elif !defined(STARPU_LONG_CHECK) +/* Normal checks are supposed to be short enough, typically less than 10s each, sometimes 1-2m */ +#define DEFAULT_TIMEOUT 300 +#else +/* Long checks can be very long */ +#define DEFAULT_TIMEOUT 1000 +#endif +#define AUTOTEST_SKIPPED_TEST 77 + +static pid_t child_pid = 0; +static int timeout; + +#if defined(_WIN32) && !defined(__MINGW32__) && !defined(__CYGWIN__) +static int mygettimeofday(struct timeval *tv, void *tz) +{ + if (tv) + { + FILETIME ft; + unsigned long long res; + GetSystemTimeAsFileTime(&ft); + /* 100-nanosecond intervals since January 1, 1601 */ + res = ft.dwHighDateTime; + res <<= 32; + res |= ft.dwLowDateTime; + res /= 10; + /* Now we have microseconds */ + res -= (((1970-1601)*365) + 89) * 24ULL * 3600ULL * 1000000ULL; + /* Now we are based on epoch */ + tv->tv_sec = res / 1000000ULL; + tv->tv_usec = res % 1000000ULL; + } +} +#else +#define mygettimeofday(tv,tz) gettimeofday(tv,tz) +#endif + +#ifdef STARPU_GDB_PATH +static int try_launch_gdb(const char *exe, const char *core) +{ +# define GDB_COMMANDS \ + "-ex", "py-list", \ + "-ex", "starpu-tasks", \ + "-ex", "starpu-workers", \ + "-ex", "starpu-print-datas-summary", \ + "-ex", "starpu-memusage", \ + "-ex", "starpu-print-archs", \ + "-ex", "starpu-print-registered-models", \ + "-ex", "bt full", \ + "-ex", "py-bt", \ + "-ex", "thread apply all bt full", \ + "-ex", "thread apply all py-bt", \ + + int err; + pid_t pid; + struct stat st; + const char *top_builddir; + char *gdb; + + err = stat(core, &st); + if (err != 0) + { + fprintf(stderr, "while looking for core file of %s: %s: %m\n", + exe, core); + return -1; + } + + if (!(st.st_mode & S_IFREG)) + { + fprintf(stderr, "%s: not a regular file\n", core); + return -1; + } + + top_builddir = getenv("top_builddir"); + + pid = fork(); + switch (pid) + { + case 0: /* kid */ + if (top_builddir != NULL) + { + /* Run gdb with Libtool. */ + gdb = alloca(strlen(top_builddir) + + sizeof("/libtool") + 1); + strcpy(gdb, top_builddir); + strcat(gdb, "/libtool"); + err = execl(gdb, "gdb", "--mode=execute", + STARPU_GDB_PATH, "--batch", + GDB_COMMANDS + exe, core, NULL); + } + else + { + /* Run gdb directly */ + gdb = STARPU_GDB_PATH; + err = execl(gdb, "gdb", "--batch", + GDB_COMMANDS + exe, core, NULL); + } + if (err != 0) + { + fprintf(stderr, "while launching `%s': %m\n", gdb); + exit(EXIT_FAILURE); + } + exit(EXIT_SUCCESS); + break; + + case -1: + fprintf(stderr, "fork: %m\n"); + return -1; + + default: /* parent */ + { + pid_t who; + int status; + who = waitpid(pid, &status, 0); + if (who != pid) + fprintf(stderr, "while waiting for gdb " + "process %d: %m\n", pid); + } + } + return 0; +# undef GDB_COMMANDS +} +#endif /* STARPU_GDB_PATH */ + +static void launch_gdb(const char *exe) +{ +#ifdef STARPU_GDB_PATH + char s[32]; + snprintf(s, sizeof(s), "core.%d", child_pid); + if (try_launch_gdb(exe, s) < 0) + try_launch_gdb(exe, "core"); +#endif /* STARPU_GDB_PATH */ +} + +static char *test_name; + +static void test_cleaner(int sig) +{ + pid_t child_gid; + int status; + (void) sig; + + // send signal to all loader family members + fprintf(stderr, "[error] test %s has been blocked for %d seconds. Mark it as failed\n", test_name, timeout); + child_gid = getpgid(child_pid); + kill(-child_gid, SIGQUIT); + waitpid(child_pid, &status, 0); + launch_gdb(test_name); + raise(SIGALRM); + exit(EXIT_FAILURE); +} + +static void forwardsig(int sig) +{ + pid_t child_gid; + child_gid = getpgid(child_pid); + kill(-child_gid, sig); +} + +static int _decode(char **src, char *motif, const char *value) +{ + char *found; + + found = strstr(*src, motif); + if (found == NULL) return 0; + + char *new_src = calloc(1, strlen(*src)-strlen(motif)+strlen(value)+1); + + strncpy(new_src, *src, found - *src); + strcat(new_src, value); + strcat(new_src, found+strlen(motif)); + + *src = new_src; + return 1; +} + +static void decode(char **src, char *motif, const char *value) +{ + if (*src) + { + if (strstr(*src, motif) && value == NULL) + { + fprintf(stderr, "error: $%s undefined\n", motif); + exit(EXIT_FAILURE); + } + int d = _decode(src, motif, value); + while (d) + d = _decode(src, motif, value); + } +} + +int main(int argc, char *argv[]) +{ + int child_exit_status; + char *test_args; + char *launcher; + char *launcher_args; + char *libtool; + char *cflags; + const char *top_builddir = getenv("top_builddir"); + struct sigaction sa; + int ret; + struct timeval start; + struct timeval end; + double timing; + int x=1; + int asan = 0, lsan = 0, tsan = 0, usan = 0; + + (void) argc; + test_args = NULL; + timeout = 0; + + launcher=getenv("STARPU_CHECK_LAUNCHER"); + launcher_args=getenv("STARPU_CHECK_LAUNCHER_ARGS"); + cflags = getenv("CFLAGS"); + if (cflags) + { + if (strstr(cflags, "-fsanitize=address")) + asan = 1; + if (strstr(cflags, "-fsanitize=leak")) + lsan = 1; + if (strstr(cflags, "-fsanitize=thread")) + tsan = 1; + if (strstr(cflags, "-fsanitize=undefined")) + usan = 1; + } + + if (argv[x] && strcmp(argv[x], "-t") == 0) + { + timeout = strtol(argv[x+1], NULL, 10); + x += 2; + } + else if (getenv("STARPU_TIMEOUT_ENV")) + { + /* get user-defined iter_max value */ + timeout = strtol(getenv("STARPU_TIMEOUT_ENV"), NULL, 10); + } + else if (timeout <= 0) + { + timeout = DEFAULT_TIMEOUT; + if ((launcher && strstr(launcher, "valgrind")) || + (launcher && strstr(launcher, "helgrind")) || + tsan) + timeout *= 20; + if (asan || usan || lsan || + (launcher && strstr(launcher, "compute-sanitizer"))) + timeout *= 5; + + if (timeout > 1750) + timeout = 1750; + } + +#ifdef STARPU_SIMGRID +#ifdef STARPU_DEBUG + timeout *= 20; +#endif +#endif + +#ifdef STARPU_USE_MPI_MASTER_SLAVE + /* compare values between the 2 values of timeout */ + if (getenv("MPIEXEC_TIMEOUT")) + { + int mpiexec_timeout = strtol(getenv("MPIEXEC_TIMEOUT"), NULL, 10); + if (mpiexec_timeout != timeout) + fprintf(stderr, "[warning] MPIEXEC_TIMEOUT and STARPU_TIMEOUT_ENV values are different (%d and %d). The behavior may be different than expected !\n", mpiexec_timeout, timeout); + } +#endif + + if (argv[x] && strcmp(argv[x], "-p") == 0) + { + test_name = malloc(strlen(argv[x+1]) + 1 + strlen(argv[x+2]) + 1); + sprintf(test_name, "%s/%s", argv[x+1], argv[x+2]); + x += 3; + } + else + { + test_name = argv[x]; + x += 1; + } + + if (!test_name) + { + fprintf(stderr, "[error] Need name of program to start\n"); + exit(EXIT_FAILURE); + } + + size_t len = strlen(test_name); + if (len >= 3 && + test_name[len-3] == '.' && + test_name[len-2] == 's' && + test_name[len-1] == 'h') + { + /* This is a shell script, don't run ourself on bash, but make + * the script call us for each program invocation */ + + char *launch = NULL; + if (top_builddir == NULL) + // this may fail if .libs is in the directory path + setenv("STARPU_LAUNCH", argv[0], 1); + else + { + launch = malloc(strlen(top_builddir) + strlen("/tests/loader") + 1); + strcpy(launch, top_builddir); + strcat(launch, "/tests/loader"); + setenv("STARPU_LAUNCH", launch, 1); + } + + execvp(test_name, argv+x-1); + + fprintf(stderr, "[error] '%s' failed to exec. test marked as failed\n", test_name); + free(launch); + exit(EXIT_FAILURE); + } + + if (strstr(test_name, "spmv/dw_block_spmv")) + { + test_args = (char *) calloc(512, sizeof(char)); + snprintf(test_args, 512, "%s/examples/spmv/matrix_market/examples/fidapm05.mtx", STARPU_SRC_DIR); + } + else if (strstr(test_name, "starpu_perfmodel_display")) + { + if (x >= argc) + test_args = strdup("-l"); + } + else if (strstr(test_name, "starpu_perfmodel_plot")) + { + if (x >= argc) + test_args = strdup("-l"); + } + + /* get launcher program */ + if (launcher_args) + launcher_args=strdup(launcher_args); + + if (top_builddir == NULL) + { + fprintf(stderr, + "warning: $top_builddir undefined, " + "so $STARPU_CHECK_LAUNCHER ignored\n"); + launcher = NULL; + launcher_args = NULL; + libtool = NULL; + } + else + { + libtool = malloc(strlen(top_builddir) + 1 + strlen("libtool") + 1); + strcpy(libtool, top_builddir); + strcat(libtool, "/libtool"); + } + + if (launcher) + { + const char *top_srcdir = getenv("top_srcdir"); + decode(&launcher, "@top_srcdir@", top_srcdir); + decode(&launcher_args, "@top_srcdir@", top_srcdir); + } + + setenv("STARPU_OPENCL_PROGRAM_DIR", STARPU_SRC_DIR, 1); + + /* set SIGALARM handler */ + sa.sa_flags = SA_RESETHAND | SA_NODEFER; + sigemptyset(&sa.sa_mask); + sa.sa_handler = test_cleaner; + if (-1 == sigaction(SIGALRM, &sa, NULL)) + perror("sigaction"); + + signal(SIGINT, forwardsig); + signal(SIGHUP, forwardsig); + signal(SIGPIPE, forwardsig); + signal(SIGTERM, forwardsig); + + child_pid = fork(); + if (child_pid == 0) + { + char *launcher_argv[100]; + int i=0; + + setpgid(0, 0); + + /* "Launchers" such as Valgrind need to be inserted + * after the Libtool-generated wrapper scripts, hence + * this special-case. */ + if (launcher && top_builddir != NULL) + { + launcher_argv[i++] = libtool; + launcher_argv[i++] = "--mode=execute"; + launcher_argv[i++] = launcher; + if (launcher_args) + { + launcher_argv[i++] = strtok(launcher_args, " "); + while (launcher_argv[i-1]) + { + launcher_argv[i++] = strtok(NULL, " "); + } + } + } + + launcher_argv[i++] = test_name; + if (test_args) + launcher_argv[i++] = test_args; + else while (argv[x]) + { + launcher_argv[i++] = argv[x++]; + } +#ifdef STARPU_SIMGRID +#ifdef STARPU_DEBUG + launcher_argv[i++] = "--cfg=contexts/factory:thread"; +#endif +#endif + launcher_argv[i++] = NULL; + execvp(*launcher_argv, launcher_argv); + + fprintf(stderr, "[error] '%s' failed to exec. test marked as failed\n", test_name); + exit(EXIT_FAILURE); + } + if (child_pid == -1) + { + fprintf(stderr, "[error] fork. test marked as failed\n"); + exit(EXIT_FAILURE); + } + free(test_args); + free(libtool); + + ret = EXIT_SUCCESS; + gettimeofday(&start, NULL); + alarm(timeout); + if (child_pid == waitpid(child_pid, &child_exit_status, 0)) + { + if (WIFEXITED(child_exit_status)) + { + int status = WEXITSTATUS(child_exit_status); + if (status == EXIT_SUCCESS) + { + alarm(0); + } + else + { + if (status != AUTOTEST_SKIPPED_TEST) + fprintf(stdout, "`%s' exited with return code %d\n", + test_name, status); + ret = status; + } + } + else if (WIFSIGNALED(child_exit_status)) + { + fprintf(stderr, "[error] `%s' killed with signal %d; test marked as failed\n", + test_name, WTERMSIG(child_exit_status)); + launch_gdb(test_name); + ret = EXIT_FAILURE; + } + else + { + fprintf(stderr, "[error] `%s' did not terminate normally; test marked as failed\n", + test_name); + ret = EXIT_FAILURE; + } + } + + gettimeofday(&end, NULL); + timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec)); + fprintf(stderr, "#Execution_time_in_seconds %f %s\n", timing/1000000, test_name); + + return ret; +} diff --git a/examples/stencil/mpi.out b/examples/stencil/mpi.out new file mode 100644 index 0000000..4ffc978 --- /dev/null +++ b/examples/stencil/mpi.out @@ -0,0 +1,94 @@ +Warning: MPI only has funneled thread support, not serialized, hoping this will work +Running on 2 nodes +Warning: MPI only has funneled thread support, not serialized, hoping this will work +9 MB of memory were allocated on node 1 +9 MB of memory were allocated on node 0 +GO ! +update: + CPU 0 -> 2048 (100.00%) +top: + CPU 0 -> 2048 (100.00%) +bottom: + CPU 0 -> 2048 (100.00%) +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 +| * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * | * 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 * +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 * 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 * 1 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 1 * 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 * 1 1 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 1 1 * 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 * 1 1 1 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 1 1 1 * 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 * 1 1 1 1 +| * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * | * 1 1 1 1 * 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 * 1 1 1 1 * +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 * 1 1 1 1 * 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 * 1 1 1 1 * 1 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 1 * 1 1 1 1 * 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 * 1 1 1 1 * 1 1 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 1 1 * 1 1 1 1 * 1 1 1 1 1 1 1 1 1 1 1 1 1 1 * 1 1 1 1 * 1 1 1 +| * * * * * 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 * * * * | 1 1 1 1 * 1 1 1 1 * 1 1 1 1 1 1 1 1 1 1 1 1 * 1 1 1 1 * 1 1 1 1 +| 0 0 0 0 0 * * * * * * * * * * * * * * * * * * * * * * * 0 0 0 0 | * 1 1 1 1 * 1 1 1 1 * 1 1 1 1 1 1 1 1 1 1 * 1 1 1 1 * 1 1 1 1 * +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 * 1 1 1 1 * 1 1 1 1 * 1 1 1 1 1 1 1 1 * 1 1 1 1 * 1 1 1 1 * 1 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 1 * 1 1 1 1 * 1 1 1 1 * 1 1 1 1 1 1 * 1 1 1 1 * 1 1 1 1 * 1 1 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 1 1 * 1 1 1 1 * 1 1 1 1 * 1 1 1 1 * 1 1 1 1 * 1 1 1 1 * 1 1 1 +| * * * * * * * * * 0 0 0 0 0 0 0 0 0 0 0 0 0 * * * * * * * * * * | 1 1 1 1 * 1 1 1 1 * 1 1 1 1 * 1 1 * 1 1 1 1 * 1 1 1 1 * 1 1 1 1 +| 0 0 0 0 0 0 0 0 0 * * * * * * * * * * * * * 0 0 0 0 0 0 0 0 0 0 | * 1 1 1 1 * 1 1 1 1 * 1 1 1 1 * * 1 1 1 1 * 1 1 1 1 * 1 1 1 1 * +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 * 1 1 1 1 * 1 1 1 1 * 1 1 1 1 1 1 1 1 * 1 1 1 1 * 1 1 1 1 * 1 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 1 * 1 1 1 1 * 1 1 1 1 * 1 1 1 1 1 1 * 1 1 1 1 * 1 1 1 1 * 1 1 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 1 1 * 1 1 1 1 * 1 1 1 1 * 1 1 1 1 * 1 1 1 1 * 1 1 1 1 * 1 1 1 +| * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * | * 1 1 1 * 1 1 1 1 * 1 1 1 1 * 1 1 * 1 1 1 1 * 1 1 1 1 * 1 1 1 * +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 * 1 1 1 * 1 1 1 1 * 1 1 1 1 * * 1 1 1 1 * 1 1 1 1 * 1 1 1 * 1 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 1 * 1 1 1 * 1 1 1 1 * 1 1 1 1 1 1 1 1 * 1 1 1 1 * 1 1 1 * 1 1 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 1 1 * 1 1 1 * 1 1 1 1 * 1 1 1 1 1 1 * 1 1 1 1 * 1 1 1 * 1 1 1 +| * * * * * * * * 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 * * * * * * * | 1 1 1 1 * 1 1 1 * 1 1 1 1 * 1 1 1 1 * 1 1 1 1 * 1 1 1 * 1 1 1 1 +| 0 0 0 0 0 0 0 0 * * * * * * * * * * * * * * * * * 0 0 0 0 0 0 0 | * 1 1 1 1 * 1 1 1 * 1 1 1 1 * 1 1 * 1 1 1 1 * 1 1 1 * 1 1 1 1 * +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 * 1 1 1 1 * 1 1 1 * 1 1 1 1 * * 1 1 1 1 * 1 1 1 * 1 1 1 1 * 1 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 1 * 1 1 1 1 * 1 1 1 * 1 1 1 1 1 1 1 1 * 1 1 1 * 1 1 1 1 * 1 1 +| * 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 * * | 1 1 1 * 1 1 1 1 * 1 1 1 * 1 1 1 1 1 1 * 1 1 1 * 1 1 1 1 * 1 1 1 +| 0 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 0 0 | * 1 1 1 * 1 1 1 1 * 1 1 1 * 1 1 1 1 * 1 1 1 * 1 1 1 1 * 1 1 1 * +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 * 1 1 1 * 1 1 1 1 * 1 1 1 * 1 1 * 1 1 1 * 1 1 1 1 * 1 1 1 * 1 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 1 * 1 1 1 * 1 1 1 1 * 1 1 1 * * 1 1 1 * 1 1 1 1 * 1 1 1 * 1 1 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 1 1 * 1 1 1 * 1 1 1 1 * 1 1 1 1 1 1 * 1 1 1 1 * 1 1 1 * 1 1 1 +| * * * * * * * * * * 0 0 0 0 0 0 0 0 0 0 0 0 * * * * * * * * * * | 1 1 1 1 * 1 1 1 * 1 1 1 1 * 1 1 1 1 * 1 1 1 1 * 1 1 1 * 1 1 1 1 +| 0 0 0 0 0 0 0 0 0 0 * * * * * * * * * * * * 0 0 0 0 0 0 0 0 0 0 | * 1 1 1 1 * 1 1 1 * 1 1 1 1 * 1 1 * 1 1 1 1 * 1 1 1 * 1 1 1 1 * +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 * 1 1 1 1 * 1 1 1 * 1 1 1 1 * * 1 1 1 1 * 1 1 1 * 1 1 1 1 * 1 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 1 * 1 1 1 1 * 1 1 1 * 1 1 1 1 1 1 1 1 * 1 1 1 * 1 1 1 1 * 1 1 +| * * 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 * * * | 1 1 1 * 1 1 1 1 * 1 1 1 * 1 1 1 1 1 1 * 1 1 1 * 1 1 1 1 * 1 1 1 +| 0 0 * * * * * * * * * * * * * * * * * * * * * * * * * * * 0 0 0 | * 1 1 1 * 1 1 1 1 * 1 1 1 * 1 1 1 1 * 1 1 1 * 1 1 1 1 * 1 1 1 * +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 * 1 1 1 * 1 1 1 1 * 1 1 1 * 1 1 * 1 1 1 * 1 1 1 1 * 1 1 1 * 1 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 1 * 1 1 1 * 1 1 1 1 * 1 1 1 * * 1 1 1 * 1 1 1 1 * 1 1 1 * 1 1 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 1 1 * 1 1 1 * 1 1 1 1 * 1 1 1 1 1 1 * 1 1 1 1 * 1 1 1 * 1 1 1 +| * * * * * * * * * * 0 0 0 0 0 0 0 0 0 0 0 0 * * * * * * * * * * | 1 1 1 1 * 1 1 1 * 1 1 1 1 * 1 1 1 1 * 1 1 1 1 * 1 1 1 * 1 1 1 1 +| 0 0 0 0 0 0 0 0 0 0 * * * * * * * * * * * * 0 0 0 0 0 0 0 0 0 0 | * 1 1 1 1 * 1 1 1 * 1 1 1 1 * 1 1 * 1 1 1 1 * 1 1 1 * 1 1 1 1 * +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 * 1 1 1 1 * 1 1 1 * 1 1 1 1 * * 1 1 1 1 * 1 1 1 * 1 1 1 1 * 1 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 1 * 1 1 1 1 * 1 1 1 * 1 1 1 1 1 1 1 1 * 1 1 1 * 1 1 1 1 * 1 1 +| * 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 * | 1 1 1 * 1 1 1 1 * 1 1 1 * 1 1 1 1 1 1 * 1 1 1 * 1 1 1 1 * 1 1 1 +| 0 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 0 | * 1 1 1 * 1 1 1 1 * 1 1 1 * * 1 1 * * 1 1 1 * 1 1 1 1 * 1 1 1 * +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 * 1 1 1 * 1 1 1 1 * 1 1 1 1 * * 1 1 1 1 * 1 1 1 1 * 1 1 1 * 1 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 1 * 1 1 1 * 1 1 1 1 * 1 1 1 1 1 1 1 1 * 1 1 1 1 * 1 1 1 * 1 1 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 1 1 * 1 1 1 * 1 1 1 1 * 1 1 1 1 1 1 * 1 1 1 1 * 1 1 1 * 1 1 1 +| * * * * * * * * * * * * 0 0 0 0 0 0 0 0 * * * * * * * * * * * * | 1 1 1 1 * 1 1 1 * 1 1 1 1 * 1 1 1 1 * 1 1 1 1 * 1 1 1 * 1 1 1 1 +| 0 0 0 0 0 0 0 0 0 0 0 0 * * * * * * * * 0 0 0 0 0 0 0 0 0 0 0 0 | * 1 1 1 1 * 1 1 1 * 1 1 1 1 * 1 1 * 1 1 1 1 * 1 1 1 * 1 1 1 1 * +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 * 1 1 1 1 * 1 1 1 * 1 1 1 1 * * 1 1 1 1 * 1 1 1 * 1 1 1 1 * 1 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 1 * 1 1 1 1 * 1 1 1 * 1 1 1 1 1 1 1 1 * 1 1 1 * 1 1 1 1 * 1 1 +| * 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 * | 1 1 1 * 1 1 1 1 * 1 1 1 * 1 1 1 1 1 1 * 1 1 1 * 1 1 1 1 * 1 1 1 +| 0 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 0 | * 1 1 1 * 1 1 1 1 * 1 1 1 * 1 1 1 1 * 1 1 1 * 1 1 1 1 * 1 1 1 * +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 * 1 1 1 * 1 1 1 1 * 1 1 1 * 1 1 * 1 1 1 * 1 1 1 1 * 1 1 1 * 1 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 1 * 1 1 1 * 1 1 1 1 * 1 1 1 * * 1 1 1 * 1 1 1 1 * 1 1 1 * 1 1 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 1 1 * 1 1 1 * 1 1 1 1 * 1 1 1 1 1 1 * 1 1 1 1 * 1 1 1 * 1 1 1 +| * * * * 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 * * * | 1 1 1 1 * 1 1 1 * 1 1 1 1 * 1 1 1 1 * 1 1 1 1 * 1 1 1 * 1 1 1 1 +| 0 0 0 0 * * * * * * * * * * * * * * * * * * * * * * * * * 0 0 0 | * 1 1 1 1 * 1 1 1 * 1 1 1 1 * 1 1 * 1 1 1 1 * 1 1 1 * 1 1 1 1 * +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 * 1 1 1 1 * 1 1 1 * 1 1 1 1 * * 1 1 1 1 * 1 1 1 * 1 1 1 1 * 1 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 1 * 1 1 1 1 * 1 1 1 * 1 1 1 1 1 1 1 1 * 1 1 1 * 1 1 1 1 * 1 1 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 1 1 * 1 1 1 1 * 1 1 1 * 1 1 1 1 1 1 * 1 1 1 * 1 1 1 1 * 1 1 1 +| * * * * * * * * * 0 0 0 0 0 0 0 0 0 0 0 0 0 0 * * * * * * * * * | 1 1 1 1 * 1 1 1 1 * 1 1 1 * 1 1 1 1 * 1 1 1 * 1 1 1 1 * 1 1 1 1 +| 0 0 0 0 0 0 0 0 0 * * * * * * * * * * * * * * 0 0 0 0 0 0 0 0 0 | * 1 1 1 1 * 1 1 1 1 * 1 1 1 * 1 1 * 1 1 1 * 1 1 1 1 * 1 1 1 1 * +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 * 1 1 1 1 * 1 1 1 1 * 1 1 1 * * 1 1 1 * 1 1 1 1 * 1 1 1 1 * 1 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 1 * 1 1 1 1 * 1 1 1 1 * 1 1 1 1 1 1 * 1 1 1 1 * 1 1 1 1 * 1 1 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 1 1 * 1 1 1 1 * 1 1 1 1 * 1 1 1 1 * 1 1 1 1 * 1 1 1 1 * 1 1 1 +| * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * | * 1 1 1 * 1 1 1 1 * 1 1 1 1 * 1 1 * 1 1 1 1 * 1 1 1 1 * 1 1 1 * +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 * 1 1 1 * 1 1 1 1 * 1 1 1 1 * * 1 1 1 1 * 1 1 1 1 * 1 1 1 * 1 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 1 * 1 1 1 * 1 1 1 1 * 1 1 1 1 1 1 1 1 * 1 1 1 1 * 1 1 1 * 1 1 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 1 1 * 1 1 1 * 1 1 1 1 * 1 1 1 1 1 1 * 1 1 1 1 * 1 1 1 * 1 1 1 +| * * * * * * * 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 * * * * * * | 1 1 1 1 * 1 1 1 * 1 1 1 1 _ _ _ _ _ _ 1 1 1 1 * 1 1 1 * 1 1 1 1 +| 0 0 0 0 0 0 0 * * * * * * * * * * * * * * * * * * * 0 0 0 0 0 0 | * 1 1 1 1 * 1 1 1 _ _ _ _ _ _ _ _ _ _ _ _ _ _ 1 1 1 * 1 1 1 1 * +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 * 1 1 1 1 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 1 1 1 1 * 1 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 1 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 1 1 +| _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ | _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ diff --git a/examples/stencil/results b/examples/stencil/results new file mode 100644 index 0000000..c951e61 --- /dev/null +++ b/examples/stencil/results @@ -0,0 +1,398 @@ +B=0 +| 0 1 1 1 2 0 0 1 0 1 1 0 2 0 0 2 1 0 2 1 0 2 0 1 2 0 2 1 2 0 0 1 0 1 1 2 0 1 0 2 0 1 0 0 2 0 1 2 0 2 2 1 0 0 2 1 0 0 2 0 0 1 2 1 +| 0 0 0 1 2 1 1 0 0 0 1 1 0 2 1 0 1 0 0 1 1 0 1 0 1 0 0 1 0 2 0 2 1 0 0 0 1 1 2 0 0 2 0 0 1 2 0 1 2 0 2 0 1 0 2 0 1 2 0 1 2 0 2 2 +| 2 0 1 2 1 2 0 2 0 1 0 2 2 1 0 0 2 0 0 0 2 0 0 0 1 0 1 0 1 0 2 1 2 0 2 2 0 2 2 1 0 0 1 0 0 1 2 1 0 0 0 2 2 0 1 0 2 1 2 0 1 0 0 1 +| 0 0 1 0 0 2 1 0 0 1 2 1 1 2 1 2 1 0 2 2 1 0 0 0 0 1 2 1 0 0 1 2 1 0 0 1 0 2 0 1 2 1 2 1 0 1 0 2 0 0 0 0 2 1 0 0 2 0 1 0 1 2 1 2 +| 1 0 0 0 0 2 1 2 2 0 2 1 2 0 1 1 0 0 1 2 0 1 2 1 1 0 0 2 2 1 0 1 2 0 0 0 0 2 1 0 2 0 0 2 0 1 1 0 2 1 2 2 0 1 1 1 2 0 0 0 0 2 0 2 +| 0 0 1 1 0 1 1 2 1 2 2 1 0 1 0 2 2 0 1 2 1 0 1 0 1 0 0 2 1 0 2 1 0 0 2 0 0 1 0 1 2 0 2 0 1 2 0 1 1 0 2 0 0 2 1 0 1 0 0 1 2 0 2 0 +| 2 0 1 2 1 0 0 0 1 0 2 0 1 2 0 0 0 2 0 1 0 0 0 0 1 1 0 2 1 0 0 1 0 2 1 2 0 2 2 0 2 1 1 1 0 0 0 0 1 2 2 1 1 0 0 0 2 0 2 1 2 0 2 1 +| 1 0 0 0 0 0 0 0 1 0 0 0 1 0 1 1 0 0 2 0 2 0 0 2 0 0 2 1 2 2 2 0 1 1 0 1 1 1 1 1 0 1 0 2 2 2 0 1 0 1 0 2 0 1 1 0 0 1 2 1 0 2 1 2 +| 2 0 1 2 0 2 0 1 0 2 1 0 0 0 0 0 2 2 1 0 2 2 1 0 2 0 1 0 0 2 0 0 2 0 1 1 0 0 0 2 2 1 2 0 1 2 0 2 0 0 2 2 1 0 0 2 0 2 1 0 1 0 2 0 +| 0 0 2 1 2 0 2 2 1 0 0 2 1 2 0 1 0 2 1 2 0 2 0 0 1 0 0 0 1 1 0 1 2 1 2 1 1 0 1 2 1 0 2 0 1 2 0 0 0 2 2 2 0 2 0 2 0 0 0 0 1 0 2 0 +| 0 0 0 1 2 0 0 1 0 1 2 2 0 0 1 1 0 2 1 0 2 2 1 1 0 0 2 0 1 2 0 0 0 0 1 2 0 0 0 2 1 0 0 2 0 2 1 2 0 0 2 1 2 0 2 1 2 1 2 1 0 2 0 1 +| 1 1 1 1 2 2 0 1 2 1 0 0 0 1 2 2 0 1 2 1 2 0 1 1 2 0 1 1 0 2 0 1 0 1 2 0 2 0 2 0 1 0 2 0 2 2 1 2 0 1 2 0 0 0 1 2 0 0 0 0 0 2 2 0 +| 0 2 0 0 2 0 1 1 1 0 0 1 2 0 1 1 2 0 0 0 0 1 0 0 2 0 1 0 0 1 0 0 1 2 1 0 2 2 1 1 0 2 1 2 0 2 1 0 0 0 1 0 2 1 2 1 0 2 0 1 2 1 0 0 +| 1 1 0 0 1 1 1 0 1 2 2 0 1 2 2 0 1 2 0 2 1 1 2 2 1 0 0 1 2 2 2 1 2 0 2 0 0 0 0 2 2 0 0 1 0 0 1 0 0 2 0 1 2 0 0 1 1 2 0 1 0 2 1 2 +| 1 2 2 1 0 0 0 1 0 2 0 0 2 1 1 1 1 1 0 1 2 1 2 1 0 2 2 0 2 0 2 1 0 0 1 2 0 1 2 0 1 2 0 0 1 0 2 1 1 2 0 1 0 2 2 0 0 0 0 0 1 0 0 1 +| 0 0 2 0 2 2 0 1 2 0 1 0 2 0 2 0 1 0 1 2 2 1 0 1 2 2 1 0 0 1 0 1 0 2 0 1 0 0 2 1 2 0 0 0 1 1 1 1 1 0 0 0 2 0 1 0 0 0 2 2 0 2 0 0 +| 0 0 1 0 0 0 1 0 0 1 2 0 2 0 2 0 2 1 0 1 0 1 0 2 0 0 0 1 0 0 1 2 1 0 2 2 1 1 2 0 2 1 1 2 2 2 0 0 2 1 2 0 0 0 2 1 1 2 2 0 1 0 1 0 +| 2 1 1 0 2 2 0 1 0 2 1 1 0 0 1 0 0 0 2 1 0 1 0 2 2 2 0 1 2 2 0 2 0 0 0 2 0 1 2 0 1 2 2 0 1 0 0 1 1 0 2 0 2 1 0 2 0 1 1 0 1 0 0 0 +| 2 0 2 0 2 1 1 0 1 0 1 2 1 0 2 0 0 1 0 2 0 2 0 2 0 1 0 1 0 2 0 1 2 0 0 0 1 0 1 0 0 2 1 2 1 0 2 1 2 0 1 2 2 0 2 0 0 0 1 1 2 0 2 2 +| 2 1 0 2 0 2 1 2 0 2 0 0 1 0 0 1 0 0 0 0 2 1 2 0 2 1 2 2 0 2 1 0 2 0 2 2 2 1 0 2 2 1 0 2 1 0 0 0 1 0 0 1 2 1 0 0 0 2 1 1 1 0 2 1 +| 2 2 0 0 1 0 0 1 0 1 0 0 0 2 0 2 1 0 1 2 2 0 2 0 2 0 2 0 0 2 1 0 0 1 0 0 2 1 2 2 1 1 2 0 1 2 2 0 2 0 0 2 0 0 0 1 0 2 0 1 2 2 0 1 +| 2 0 2 1 2 0 2 2 1 2 0 0 0 2 1 0 0 2 2 0 0 1 2 2 0 2 2 0 0 0 1 2 1 0 0 0 2 0 2 2 2 0 0 1 2 0 1 1 0 1 2 1 0 0 0 0 1 0 0 2 1 1 0 1 +| 0 0 1 1 1 0 1 0 2 1 0 0 2 1 2 0 1 1 2 0 1 1 0 0 1 2 0 1 2 0 0 0 2 0 1 0 1 0 0 2 1 0 2 1 2 0 0 2 0 2 1 0 1 2 2 2 1 0 2 0 1 0 1 2 +| 2 2 0 2 1 0 2 0 0 0 0 0 1 2 0 1 0 2 0 1 0 0 2 0 2 0 2 1 0 2 1 0 2 2 0 1 0 1 0 1 2 0 0 1 2 1 0 0 1 2 1 2 0 1 0 1 2 2 0 0 1 2 1 1 +| 0 2 0 0 1 0 1 2 0 0 0 2 1 0 2 2 2 1 0 1 2 0 0 1 1 2 1 0 1 1 0 2 1 2 0 0 0 1 0 1 1 0 0 0 1 0 2 2 1 0 0 2 0 2 1 2 0 2 0 0 1 1 0 2 +| 2 1 1 2 0 2 0 0 0 0 1 2 2 0 0 2 0 2 1 0 2 0 2 0 1 1 0 2 1 0 1 0 0 0 2 2 0 2 1 0 0 2 2 2 0 1 2 1 0 0 2 0 1 0 0 0 0 1 2 1 0 2 0 2 +| 1 0 2 1 0 0 0 1 0 1 0 2 2 0 1 1 0 2 0 0 2 1 1 2 1 0 0 0 2 2 2 2 1 1 1 2 1 0 1 2 0 2 0 2 0 0 2 0 1 0 0 2 1 0 0 2 1 2 0 2 0 0 1 0 +| 1 1 2 2 1 2 0 2 0 0 2 0 2 0 1 0 1 0 0 2 1 0 1 2 1 1 2 0 2 1 2 1 0 0 2 2 0 2 0 0 0 2 0 0 0 1 0 1 0 2 0 1 1 2 2 0 0 2 2 0 2 0 0 0 +| 0 2 0 1 0 0 1 0 1 2 1 1 2 0 0 0 0 2 2 1 0 1 0 2 0 2 0 1 2 0 2 0 2 0 0 2 1 0 2 2 1 0 2 1 2 0 0 0 1 1 1 1 2 1 1 0 1 2 1 2 1 2 1 2 +| 0 2 0 0 0 1 1 0 1 1 2 0 0 2 2 0 2 2 2 2 2 0 2 0 1 2 0 0 0 0 1 1 0 2 0 1 2 0 2 1 0 2 2 0 0 2 2 0 1 0 2 2 1 2 0 0 2 2 1 0 0 0 0 1 +| 2 0 0 2 0 2 2 0 2 0 0 0 1 0 0 0 1 0 2 2 2 0 2 0 0 2 0 2 0 1 2 0 2 1 0 2 1 2 0 0 2 0 0 1 0 2 2 2 0 1 0 0 2 0 0 0 1 1 0 1 0 2 0 1 +| 0 2 2 0 0 2 0 2 0 0 2 1 1 0 0 0 2 0 1 2 0 2 2 1 0 2 0 0 1 0 2 2 0 0 0 1 1 2 1 1 0 2 0 1 0 0 2 1 0 2 1 0 1 0 2 0 2 2 2 2 1 1 0 2 +| 0 1 2 1 0 1 0 2 1 1 2 0 0 2 0 0 1 1 0 2 1 0 2 2 2 0 1 0 0 0 0 0 0 0 2 2 1 1 0 1 2 0 1 0 0 1 0 1 0 2 2 0 0 0 1 0 2 2 0 2 0 2 1 0 +| 1 0 1 0 2 0 2 2 0 2 1 1 0 2 0 0 0 2 2 0 2 2 0 1 0 0 1 0 2 1 0 0 0 1 2 0 0 2 0 0 0 0 2 2 1 1 2 2 1 0 2 1 1 0 0 1 1 0 2 0 1 1 0 2 +| 0 2 0 2 1 1 0 2 0 1 2 0 2 1 2 2 2 2 0 0 0 1 1 0 2 0 1 2 0 1 2 2 0 0 0 1 2 0 0 0 2 0 1 0 2 1 2 0 0 0 0 0 2 0 0 2 0 2 2 2 0 0 0 2 +| 1 2 0 2 1 0 2 0 1 1 2 0 0 2 1 2 0 2 0 1 2 2 1 0 2 0 2 0 2 2 0 1 2 2 0 0 2 0 0 2 0 1 0 0 2 2 1 0 1 2 0 0 2 2 0 0 0 0 2 2 0 1 2 1 +| 1 0 1 1 0 1 0 2 1 0 1 1 1 1 1 0 0 0 2 2 1 0 0 1 1 0 0 2 0 2 0 0 2 0 0 0 0 2 2 0 0 1 1 0 2 1 1 2 1 0 2 1 1 0 0 0 0 0 1 2 0 0 0 0 +| 2 0 0 2 1 0 2 0 1 2 0 2 0 1 2 1 0 0 0 1 0 2 0 2 0 2 1 0 0 0 2 0 0 1 0 2 2 2 1 2 2 0 1 0 1 1 0 2 0 0 0 1 2 0 1 2 2 1 1 0 1 1 1 1 +| 1 0 2 0 1 0 0 0 0 0 0 0 0 2 2 2 2 0 2 0 0 0 1 0 2 1 0 0 0 2 0 1 2 2 2 0 2 0 1 1 2 2 2 2 2 0 1 0 2 2 2 0 1 1 0 1 2 2 0 1 1 2 0 2 +| 2 0 0 0 0 1 2 1 1 0 1 1 1 0 2 1 0 2 0 1 0 2 1 2 2 1 0 2 0 1 1 0 2 0 0 2 2 0 0 2 0 0 2 1 0 1 0 1 0 1 2 1 2 0 0 2 0 2 0 2 0 2 0 0 +| 1 0 1 2 2 0 1 2 1 0 0 2 1 2 2 1 0 1 0 0 0 1 1 0 0 2 1 0 0 2 1 1 1 0 0 0 0 0 0 2 1 1 0 2 2 1 1 0 2 2 2 0 1 0 0 0 2 0 0 2 1 1 0 2 +| 1 1 1 0 1 2 0 2 1 2 0 2 0 2 1 0 2 0 2 0 0 0 1 2 0 2 0 2 2 0 0 1 1 2 1 2 0 1 2 0 0 1 0 0 2 0 1 0 2 1 0 0 2 2 0 2 0 2 2 2 1 2 0 1 +| 1 1 0 2 2 1 2 0 0 1 0 2 2 0 2 0 1 0 2 1 0 0 0 0 2 0 2 1 0 1 2 1 0 0 1 2 0 1 0 1 0 1 1 0 0 2 2 2 2 0 1 1 0 0 0 0 2 0 1 2 2 1 2 0 +| 0 0 0 0 1 0 2 2 1 2 1 0 2 2 1 2 1 0 2 0 1 2 2 0 1 0 2 0 0 0 2 1 0 2 1 0 1 0 0 0 0 0 0 1 0 2 0 1 0 1 0 2 1 0 0 0 2 0 2 2 2 0 1 2 +| 0 1 0 2 0 0 1 0 1 1 0 0 0 1 2 2 0 1 0 2 2 1 0 0 2 0 1 1 2 2 2 0 1 1 2 2 1 1 2 0 2 1 2 2 2 0 0 2 2 0 1 0 0 0 0 0 2 0 1 1 2 2 1 0 +| 0 0 0 2 1 0 2 2 1 0 1 1 1 0 1 2 2 2 1 0 0 2 0 2 2 1 0 1 0 1 2 1 0 0 0 0 1 0 1 0 2 2 0 1 2 2 0 1 0 0 0 2 2 1 0 0 2 0 0 0 2 0 1 0 +| 0 1 0 0 0 0 0 1 0 0 0 1 0 1 1 2 0 2 0 1 1 2 2 0 1 1 0 1 2 1 0 1 0 2 1 0 1 1 0 2 1 1 0 2 2 1 1 0 2 2 0 0 0 0 2 1 0 2 0 0 0 1 2 2 +| 1 1 2 1 2 2 0 0 2 1 2 0 2 0 0 0 0 0 0 1 1 2 2 2 1 0 1 0 2 0 0 2 0 1 2 1 0 2 1 1 0 1 0 2 0 1 1 0 2 0 1 2 2 0 1 2 0 2 0 0 1 0 2 0 +| 2 0 0 0 1 0 2 0 2 0 0 0 2 1 1 0 1 2 0 2 1 1 1 1 1 0 2 1 1 2 0 2 0 2 1 2 0 2 1 0 0 2 1 1 0 2 0 2 0 0 1 0 0 0 1 0 2 0 2 0 1 0 2 0 +| 2 0 2 0 1 0 0 1 1 0 0 0 1 2 2 1 0 2 1 1 0 1 0 2 1 0 0 0 1 2 0 0 0 0 1 2 1 2 1 0 0 0 0 0 1 2 0 1 0 2 2 0 0 0 2 1 0 1 2 0 0 1 0 0 +| 0 1 0 1 2 0 0 1 0 2 0 0 2 1 2 1 0 0 0 2 1 0 2 0 0 0 0 2 2 0 0 0 2 1 0 2 2 0 1 2 2 2 0 2 2 0 2 2 2 1 0 2 1 0 1 2 1 2 0 2 0 1 0 1 +| 2 0 2 0 1 2 0 1 0 0 2 1 1 0 0 0 1 0 1 2 0 2 1 0 1 1 2 2 0 0 2 0 0 0 2 1 0 0 1 2 2 0 0 0 0 2 0 2 2 2 1 0 0 0 0 1 0 0 2 1 1 2 0 2 +| 0 0 1 0 2 2 1 0 2 2 0 1 0 1 1 0 0 1 1 2 0 0 2 0 0 0 2 1 2 0 0 0 1 2 0 1 0 1 2 1 0 0 1 0 2 1 0 0 1 0 2 0 1 1 0 2 1 1 0 2 2 0 2 1 +| 1 0 1 1 0 1 2 0 2 1 0 0 1 0 0 0 0 0 0 0 0 2 0 2 0 0 0 2 1 0 2 2 0 2 1 1 1 2 0 1 0 0 0 1 2 0 1 2 1 2 2 0 2 0 1 1 2 1 2 0 1 2 1 0 +| 0 2 0 1 0 1 2 0 1 0 1 0 2 0 0 1 0 0 2 0 2 0 2 0 1 2 0 0 1 2 1 1 0 0 0 1 1 0 1 0 2 2 0 1 2 2 0 2 1 2 2 0 0 2 1 0 0 2 0 2 1 2 0 0 +| 2 2 2 0 0 0 0 2 1 0 0 0 2 2 1 2 0 1 0 2 0 0 1 0 2 0 1 2 0 0 0 2 0 2 0 1 1 2 1 0 2 2 2 1 2 2 2 1 2 2 0 1 1 1 2 0 2 0 0 2 2 0 1 0 +| 0 0 2 0 1 2 2 1 0 0 2 0 2 1 1 0 1 0 0 1 1 1 2 1 1 0 1 2 0 2 0 1 0 0 0 1 0 2 0 0 1 0 0 0 1 0 1 2 0 1 0 1 0 0 0 1 0 2 0 0 2 2 0 2 +| 0 2 1 0 1 1 0 2 0 2 2 1 2 1 1 1 0 0 1 2 2 2 0 1 0 0 1 2 1 0 2 0 0 1 0 2 0 0 0 2 2 1 0 0 0 2 1 2 0 2 1 0 1 0 1 0 2 0 2 0 1 2 2 2 +| 0 1 0 2 2 1 2 0 2 0 0 1 2 1 0 0 2 2 0 0 2 0 2 1 0 1 2 2 0 1 1 0 0 0 2 1 1 0 2 0 0 0 0 0 1 0 0 0 0 0 0 2 1 2 2 1 1 0 0 1 0 1 2 0 +| 2 1 0 0 1 1 2 2 0 1 0 2 0 2 1 0 2 0 1 0 0 2 0 1 0 0 0 1 0 0 0 0 0 1 1 0 0 2 0 1 0 1 0 2 1 0 2 2 2 0 0 2 1 1 2 1 2 1 1 0 1 2 0 1 +| 1 0 0 2 1 0 2 0 2 1 2 2 1 0 1 2 0 1 1 1 2 1 0 0 0 0 2 1 0 2 0 1 2 2 0 0 0 0 2 2 1 0 1 0 0 2 0 2 0 0 2 0 2 1 0 2 1 0 0 1 0 0 0 0 +| 0 1 0 2 0 2 1 0 1 2 0 0 2 0 0 2 2 0 1 2 1 2 1 0 0 2 2 0 1 2 2 0 0 2 0 1 2 0 1 2 0 1 0 2 0 1 0 2 0 0 0 0 0 0 1 2 1 1 0 0 1 1 0 2 +| 0 2 1 0 2 0 0 1 0 1 2 2 1 2 0 1 0 2 0 1 0 0 0 0 1 0 0 2 2 2 1 2 2 1 0 0 2 1 0 1 2 0 0 1 0 0 2 2 1 0 0 2 0 2 1 0 2 0 0 0 2 0 2 1 +| 0 0 1 2 0 2 2 2 1 0 2 0 2 2 0 0 1 0 1 0 2 2 0 0 0 1 2 2 1 1 1 0 0 1 2 1 0 1 0 1 2 2 0 2 0 2 2 0 2 0 2 2 0 2 1 2 0 0 2 0 2 2 0 0 + +B=1 +| 0 0 2 0 2 0 2 0 2 0 1 1 1 1 1 1 0 0 0 0 2 2 2 1 0 1 2 2 2 2 2 1 2 1 0 0 0 0 1 0 0 2 0 2 0 1 1 1 2 2 2 2 1 1 1 0 0 0 0 2 0 1 1 1 +| 0 0 0 2 0 2 0 2 0 0 1 1 1 1 1 1 0 0 0 0 2 2 2 1 1 1 2 2 2 2 2 2 1 1 0 0 0 0 0 0 0 2 2 0 2 1 1 1 2 2 2 2 1 1 1 0 0 0 0 0 2 1 1 1 +| 0 0 0 0 2 0 2 0 0 0 1 1 1 1 1 1 0 0 0 0 2 2 2 1 1 1 2 2 2 2 2 2 1 1 0 0 0 0 0 0 0 2 2 2 2 1 1 1 2 2 2 2 1 1 1 0 0 0 0 0 2 1 1 1 +| 0 0 0 0 0 2 0 0 0 0 1 1 1 1 1 1 0 0 0 0 2 2 2 1 1 1 2 2 2 2 2 2 1 1 0 0 0 0 0 0 0 2 2 2 2 1 1 1 2 2 2 2 1 1 1 0 0 0 0 0 2 1 1 1 +| 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 0 0 0 0 2 2 2 1 1 1 2 2 2 2 2 2 1 1 0 0 0 0 0 0 0 2 2 2 2 1 1 1 2 2 2 2 1 1 1 0 0 0 0 2 2 1 1 1 +| 0 0 0 0 0 0 2 0 2 1 1 1 1 1 1 1 2 2 0 2 2 2 2 1 1 1 2 2 2 2 2 2 1 1 0 0 0 0 0 0 0 2 2 2 2 1 1 1 2 2 2 2 1 1 1 0 0 0 0 2 2 1 1 1 +| 0 0 0 0 0 0 0 2 0 1 1 1 1 1 1 1 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 1 1 0 0 0 0 0 0 0 2 2 2 2 1 1 1 2 2 2 2 1 1 1 0 0 0 0 2 2 1 1 1 +| 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 1 1 0 0 0 0 0 0 0 2 2 2 2 1 1 1 2 2 2 2 1 1 1 0 0 0 0 2 2 1 1 1 +| 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 1 1 0 0 0 0 0 0 0 2 2 2 2 1 1 1 2 2 2 2 1 1 1 0 0 0 0 2 2 1 1 1 +| 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 1 1 0 0 0 0 0 0 0 2 2 2 2 1 1 1 2 2 2 2 1 1 1 0 0 0 0 2 2 1 1 1 +| 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 1 1 0 0 0 0 0 0 0 2 2 2 2 1 1 1 2 2 2 2 1 1 0 0 0 0 0 0 2 1 1 0 +| 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 1 1 0 0 0 0 0 0 0 2 2 2 2 1 1 1 2 2 2 2 1 1 0 0 0 0 0 0 0 1 1 0 +| 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 1 1 0 0 0 0 0 0 0 2 2 2 2 1 1 1 2 2 2 2 1 1 0 0 0 0 0 0 0 1 1 0 +| 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 1 1 0 0 0 0 0 0 0 2 2 2 2 2 1 1 2 2 2 2 1 1 0 0 0 0 0 0 0 1 1 0 +| 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 1 1 0 0 0 0 0 0 0 2 2 2 2 2 1 1 2 2 2 2 1 1 0 0 0 0 0 0 0 1 1 0 +| 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 1 1 0 0 0 0 0 0 0 2 2 2 2 2 1 1 2 2 2 2 1 1 0 0 0 0 0 0 0 1 1 0 +| 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 1 1 0 0 0 0 0 0 0 2 2 2 2 2 1 1 2 2 2 2 1 1 0 0 0 0 0 0 0 1 1 0 +| 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 1 1 0 0 0 0 0 0 0 2 2 2 2 2 1 1 2 2 2 2 1 1 0 0 0 0 0 0 0 1 1 0 +| 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 1 1 0 0 0 0 0 0 0 2 2 2 2 2 1 1 2 2 2 2 1 1 0 0 0 0 0 0 0 1 1 0 +| 0 0 0 0 0 0 0 0 0 1 1 2 2 1 1 1 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 1 1 0 0 0 0 0 0 0 2 2 2 2 2 1 1 2 2 2 2 1 1 0 0 0 0 0 0 0 1 1 0 +| 0 0 0 0 0 0 0 0 0 1 1 2 2 1 1 1 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 1 1 0 0 0 0 0 0 0 2 2 2 2 2 1 1 2 2 2 2 1 1 0 0 0 0 0 0 0 1 1 0 +| 0 0 0 0 0 0 0 0 0 1 1 2 2 1 1 1 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 1 1 0 0 0 0 0 0 0 2 2 2 2 2 1 1 2 2 2 2 1 1 0 0 0 0 0 0 0 1 1 0 +| 0 0 0 0 0 0 0 0 0 1 1 2 2 1 1 1 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 1 1 0 0 0 0 0 0 0 2 2 2 2 2 1 1 2 2 2 2 1 1 0 0 0 0 0 0 0 1 1 0 +| 0 0 0 0 0 0 0 0 0 1 1 2 2 1 1 1 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 1 1 0 0 0 0 0 0 0 2 2 2 2 2 1 1 2 2 2 2 1 1 0 0 0 0 0 0 0 1 1 0 +| 0 0 0 0 0 0 0 0 0 1 1 2 2 1 1 1 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 1 1 0 0 0 0 0 0 0 2 2 2 2 2 1 1 2 2 2 2 1 1 0 0 0 0 0 0 0 1 1 0 +| 0 0 0 0 0 0 0 0 0 1 1 2 2 1 1 1 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 1 1 0 0 0 0 0 0 0 2 2 2 2 2 1 1 2 2 2 2 1 1 0 0 0 0 0 0 0 1 1 0 +| 0 0 0 0 0 0 0 0 0 1 1 2 2 1 1 1 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 1 1 0 0 0 0 0 0 0 2 2 2 2 1 1 1 2 2 2 1 1 1 0 0 0 0 0 0 0 1 1 0 +| 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 1 1 1 0 0 0 0 0 0 2 2 2 2 1 1 1 2 2 2 1 1 1 0 0 0 0 0 0 0 1 1 0 +| 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 2 1 1 0 0 0 0 0 0 2 2 2 2 1 1 1 2 2 2 1 1 1 0 0 0 0 0 0 0 1 1 0 +| 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 2 1 1 0 0 0 0 0 0 2 2 2 2 1 1 1 2 2 2 1 1 1 0 0 0 0 0 0 0 1 1 0 +| 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 2 1 1 0 0 0 0 0 0 2 2 2 2 1 1 1 2 2 2 1 1 1 0 0 0 0 0 0 0 1 1 0 +| 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 2 1 1 0 0 0 0 0 0 2 2 2 2 1 1 1 2 2 2 1 1 1 0 0 0 0 0 0 0 2 2 0 +| 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 2 1 1 0 0 0 0 0 0 2 2 2 2 1 1 1 2 2 2 1 1 1 0 0 0 0 0 0 0 2 2 0 +| 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 2 1 1 0 0 0 0 0 0 2 2 2 2 1 1 1 2 2 2 1 1 1 0 0 0 0 0 0 0 2 2 0 +| 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 2 1 1 0 0 0 0 0 0 2 2 2 2 1 1 1 2 2 2 1 1 1 0 0 0 0 0 0 0 2 2 0 +| 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 2 1 1 0 0 0 0 0 0 2 2 2 2 1 1 1 2 2 2 1 1 1 0 0 0 0 0 0 0 2 2 0 +| 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 2 1 1 0 0 0 0 0 0 2 2 2 2 1 1 1 2 2 1 1 1 1 0 0 0 0 0 0 0 2 2 0 +| 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 2 1 1 0 0 0 0 0 0 2 2 2 2 1 1 1 2 1 1 1 1 1 0 0 0 0 0 0 0 2 2 0 +| 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 2 2 2 1 2 2 2 1 1 1 1 2 2 2 2 2 1 1 1 0 0 0 0 0 0 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 2 2 2 2 +| 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 1 1 1 0 0 0 0 0 0 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 +| 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 1 1 1 0 0 0 0 0 0 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 +| 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 1 1 1 0 0 0 0 0 0 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 +| 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 1 1 1 0 0 0 0 0 0 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 +| 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 1 1 1 0 0 0 0 0 0 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 +| 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 1 1 1 0 0 0 0 0 0 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 +| 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 1 1 1 0 0 0 0 0 0 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 +| 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 1 1 0 0 0 0 0 0 0 2 2 2 2 1 1 1 1 1 1 1 1 0 0 0 0 0 0 2 2 2 2 2 +| 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 1 1 0 0 0 0 0 0 0 2 2 2 2 1 1 1 1 1 1 1 1 0 0 0 0 0 0 2 2 2 2 2 +| 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 1 1 0 0 0 0 0 0 0 2 2 2 2 1 1 1 1 1 1 1 1 0 0 0 0 0 0 2 2 2 2 2 +| 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 1 1 0 0 0 0 0 0 0 2 2 2 2 1 1 1 1 1 1 1 1 0 0 0 0 0 0 2 2 2 2 2 +| 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 1 1 0 0 0 0 0 0 0 2 2 2 2 1 1 1 1 1 1 1 1 0 0 0 0 0 0 2 2 2 2 2 +| 0 0 0 0 0 0 2 1 0 0 0 0 0 1 1 1 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 1 0 0 0 0 0 0 0 0 2 2 2 2 1 1 1 1 1 1 1 1 0 2 2 2 1 2 2 2 2 2 2 +| 0 0 0 0 0 0 1 1 1 1 0 0 0 0 1 1 2 2 2 2 2 2 2 1 0 1 1 2 2 2 2 2 2 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 2 2 2 0 0 0 0 2 2 2 +| 0 1 1 0 0 0 1 1 1 1 0 0 0 0 0 1 2 2 2 2 2 2 2 0 1 1 1 2 2 2 2 2 2 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 0 1 2 2 2 2 0 0 0 0 2 2 2 +| 1 1 1 0 0 0 1 1 1 1 0 0 0 0 0 0 1 0 2 2 2 2 2 0 1 1 1 2 2 2 2 2 2 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 0 2 2 2 2 0 0 0 0 2 2 2 +| 1 1 1 0 0 0 1 1 1 1 0 0 0 0 0 0 0 0 2 2 2 2 2 0 1 1 1 2 2 2 2 2 2 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 0 2 2 2 2 0 0 0 0 2 2 2 +| 1 1 1 0 0 0 1 1 1 1 0 0 0 0 0 0 0 0 2 2 2 2 2 0 1 1 1 2 2 2 2 2 2 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 2 2 2 2 0 0 0 0 2 2 2 +| 1 1 1 0 0 0 1 1 1 1 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 2 2 2 2 2 2 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 2 2 2 2 0 0 0 0 2 2 2 +| 1 1 1 0 0 0 1 1 1 1 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 2 2 2 2 2 2 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 2 2 2 2 0 0 0 0 2 2 2 +| 1 1 1 0 0 0 1 1 1 1 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 2 2 2 2 2 2 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 2 2 2 0 0 0 0 2 2 2 +| 1 1 1 0 0 0 1 1 1 1 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 2 2 2 2 2 2 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 2 2 2 0 0 0 0 2 2 2 +| 1 1 1 0 0 0 1 1 1 1 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 2 2 2 2 2 2 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 2 2 2 0 0 0 0 2 2 2 +| 1 1 1 0 0 0 1 1 1 1 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 2 2 2 2 2 2 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 2 2 2 0 0 0 0 2 2 2 +| 1 1 1 0 0 0 1 1 1 1 0 0 0 0 0 0 0 0 2 2 2 2 1 1 1 1 1 1 1 2 2 2 2 0 0 0 0 0 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 2 2 0 0 0 0 0 2 2 2 + + +B=2 +| 2 0 2 0 2 0 0 0 2 2 2 0 0 0 0 2 0 2 1 1 1 1 1 1 1 1 2 0 2 0 2 0 2 1 1 1 1 0 2 0 2 0 2 0 2 0 1 2 1 1 1 1 1 0 0 0 2 0 2 0 2 0 2 0 +| 0 2 0 2 0 0 0 0 2 2 2 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 2 2 0 2 0 2 0 1 1 1 1 0 0 2 0 2 0 2 0 1 1 1 1 1 1 1 1 0 0 0 0 2 0 2 0 2 0 2 +| 2 0 2 0 0 0 0 0 2 2 2 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 2 2 2 0 2 0 1 1 1 1 1 0 0 0 2 0 2 0 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 0 2 0 2 0 +| 0 2 2 0 0 0 0 0 2 2 2 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 2 2 2 2 0 2 1 1 1 1 1 0 0 0 0 2 0 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 0 2 0 2 +| 2 2 2 0 0 0 0 0 2 2 2 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 0 2 0 +| 2 2 2 0 0 0 0 0 2 2 2 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 0 2 +| 2 2 2 0 0 0 0 0 2 2 2 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 2 +| 2 2 2 0 0 0 0 0 2 2 2 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 2 +| 2 2 2 0 0 0 0 0 2 2 2 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 2 +| 2 2 2 0 0 0 0 0 2 2 2 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 2 +| 2 2 2 0 0 0 0 0 2 2 2 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 2 +| 2 2 2 0 0 0 0 0 2 2 2 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 2 +| 2 2 2 0 0 0 0 0 2 2 2 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 2 +| 2 2 2 0 0 0 0 0 0 2 2 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 2 +| 2 2 2 0 0 0 0 0 0 2 2 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 2 +| 2 2 2 0 0 0 0 0 0 2 2 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 2 +| 2 2 2 0 0 0 0 0 0 2 2 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 2 +| 2 2 2 0 0 0 0 0 0 2 2 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 2 +| 2 2 2 0 0 0 0 0 0 2 2 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 2 +| 2 2 2 0 0 0 0 0 0 2 2 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 2 +| 2 2 2 0 0 0 0 0 0 2 0 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 2 +| 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 2 +| 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 2 +| 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 2 +| 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 2 +| 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 2 +| 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 2 +| 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 2 +| 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 2 +| 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 2 +| 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 2 +| 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 2 +| 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 2 +| 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 2 +| 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 2 +| 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 2 +| 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 2 +| 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 2 +| 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 2 +| 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 2 +| 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 2 +| 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 2 +| 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 2 +| 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 2 +| 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 2 +| 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 2 +| 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 2 +| 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 2 +| 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 2 +| 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 2 +| 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 2 +| 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 2 +| 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 +| 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 +| 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 +| 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 +| 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 +| 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 +| 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 +| 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 +| 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 +| 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 +| 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 +| 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 + +B=3 +| 0 0 2 0 2 0 0 0 2 2 0 0 0 2 0 2 0 2 0 2 0 2 0 2 0 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 0 2 0 2 0 1 0 1 2 1 1 1 1 1 2 1 2 0 2 0 0 +| 0 0 0 2 0 0 0 0 2 2 0 0 0 0 2 0 2 0 2 0 2 0 2 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 0 2 0 1 0 1 1 1 1 1 1 1 1 1 2 1 2 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 2 0 2 0 2 0 2 0 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 0 1 0 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 2 0 2 0 2 0 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 0 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 2 0 2 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 2 0 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 +| 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 + + +B=4 +| 2 0 2 0 2 0 2 0 0 0 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 2 1 2 2 2 2 2 2 2 +| 2 2 0 2 0 2 0 0 0 0 0 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 +| 2 2 2 0 2 0 0 0 0 0 0 0 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 0 2 0 2 0 2 0 2 0 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 0 2 0 2 0 2 0 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 0 2 0 2 0 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 0 2 0 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 0 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 +| 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 + +B=6 +| 0 0 0 0 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 0 0 0 2 0 2 0 2 0 2 0 1 0 1 1 1 1 1 1 1 1 1 1 1 0 +| 0 0 0 0 0 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 0 0 0 0 0 2 0 2 0 2 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 0 +| 0 0 0 0 0 0 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 0 0 0 0 0 0 0 2 0 2 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 0 +| 0 0 0 0 0 0 0 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 0 0 0 0 0 0 0 0 0 2 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 +| 0 0 0 0 0 0 0 0 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 +| 0 0 0 0 0 0 0 0 0 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 +| 0 0 0 0 0 0 0 0 0 0 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 0 2 0 2 0 2 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 0 2 0 2 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 0 2 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 +| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 + diff --git a/examples/stencil/run b/examples/stencil/run new file mode 100755 index 0000000..46c25ee --- /dev/null +++ b/examples/stencil/run @@ -0,0 +1,28 @@ +#!/bin/bash +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +[ -z "$N" ] && N=1 +export N +[ -z "$STARPU_SCHED_BETA" ] && STARPU_SCHED_BETA="2" +export STARPU_SCHED_BETA +echo sched $STARPU_SCHED 1>&2 +echo sched_beta $STARPU_SCHED_BETA 1>&2 +echo prefetch $STARPU_PREFETCH 1>&2 +echo calibrate $STARPU_CALIBRATE 1>&2 +echo ncpus $STARPU_NCPUS 1>&2 +echo ncuda $STARPU_NCUDA 1>&2 +echo N $N +./stencil -nbz $(($N * 64)) -sizex 128 -sizey 128 -sizez $(( $((16 * $N)) * 128 )) -niter 64 "$@" diff --git a/examples/stencil/shadow.cu b/examples/stencil/shadow.cu new file mode 100644 index 0000000..f21ee71 --- /dev/null +++ b/examples/stencil/shadow.cu @@ -0,0 +1,58 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#define _externC extern "C" +#include "stencil.h" + +/* Perform replication of data on X and Y edges, to fold the domain on + itself through mere replication of the source state. */ + +extern "C" __global__ void cuda_shadow( int bz, TYPE *ptr, int nx, int ny, int nz, int ldy, int ldz, int i) +{ + unsigned idx = threadIdx.x + blockIdx.x * blockDim.x; + unsigned idy = threadIdx.y + blockIdx.y * blockDim.y; + //unsigned idz = threadIdx.z + blockIdx.z * blockDim.z; + unsigned idz = 0; + unsigned stepx = blockDim.x * gridDim.x; + unsigned stepy = blockDim.y * gridDim.y; + //unsigned stepz = blockDim.z * gridDim.z; + unsigned stepz = 1; + unsigned x, y, z; + +#include "shadow.h" +} + +extern "C" void cuda_shadow_host(int bz, TYPE *ptr, int nx, int ny, int nz, int ldy, int ldz, int i) +{ + unsigned max_parallelism = 512; + unsigned threads_per_dim_x = max_parallelism; + while (threads_per_dim_x / 2 >= nx) + threads_per_dim_x /= 2; + unsigned threads_per_dim_y = max_parallelism / threads_per_dim_x; + while (threads_per_dim_y / 2 >= ny) + threads_per_dim_y /= 2; +#if 0 + unsigned threads_per_dim_z = 4; + dim3 dimBlock(threads_per_dim_x, threads_per_dim_y, threads_per_dim_z); + dim3 dimGrid(nx / threads_per_dim_x, ny / threads_per_dim_y, nz / threads_per_dim_z); +#else + dim3 dimBlock(threads_per_dim_x, threads_per_dim_y); + dim3 dimGrid((nx + threads_per_dim_x-1) / threads_per_dim_x, (ny + threads_per_dim_y-1) / threads_per_dim_y); +#endif + cuda_shadow <<>> (bz, ptr, nx, ny, nz, ldy, ldz, i); + cudaError_t status = cudaGetLastError(); + if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status); +} diff --git a/examples/stencil/shadow.h b/examples/stencil/shadow.h new file mode 100644 index 0000000..4e182d9 --- /dev/null +++ b/examples/stencil/shadow.h @@ -0,0 +1,49 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * Perform replication of data on X and Y edges, to fold the domain on + * itself through mere replication of the source state. +*/ + +/* TODO: rather use a dummy for loop, to assign the job to the threads that will work on it? */ + if (idy == 0) + for (z = i-1 + idz; z < nz-(i-1); z += stepz) + for (x = K + idx; x < nx-K; x += stepx) + { + unsigned index = x+z*ldz; + ptr[index+(K-1)*ldy] = ptr[index+(ny-K-1)*ldy]; + ptr[index+(ny-K)*ldy] = ptr[index+K*ldy]; + } + + if (idx == 0) + for (z = i-1 + idz; z < nz-(i-1); z += stepz) + for (y = K + idy; y < ny-K; y += stepy) + { + unsigned index = y*ldy+z*ldz; + ptr[(K-1)+index] = ptr[(nx-K-1)+index]; + ptr[(nx-K)+index] = ptr[K+index]; + } + + if (idx == 0 && idy == 0) + for (z = i-1 + idz; z < nz-(i-1); z += stepz) + { + unsigned index = z*ldz; + ptr[K-1+(K-1)*ldy+index] = ptr[(nx-K-1)+(ny-K-1)*ldy+index]; + ptr[(nx-K)+(K-1)*ldy+index] = ptr[K+(ny-K-1)*ldy+index]; + ptr[(K-1)+(ny-K)*ldy+index] = ptr[(nx-K-1)+K*ldy+index]; + ptr[(nx-K)+(ny-K)*ldy+index] = ptr[K+K*ldy+index]; + } diff --git a/examples/stencil/shadow_opencl.c b/examples/stencil/shadow_opencl.c new file mode 100644 index 0000000..8c3ec6e --- /dev/null +++ b/examples/stencil/shadow_opencl.c @@ -0,0 +1,112 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "stencil.h" + +/* Perform replication of data on X and Y edges, to fold the domain on + * itself through mere replication of the source state. */ + +#define str(x) #x + +#define clsrc(t,k) "__kernel void\n\ +#define TYPE " str(t) "\n\ +#define K " str(k) "\n\ +shadow(int bz, __global TYPE *ptr, int nx, int ny, int nz, int ldy, int ldz, int i)\n\ +{\n\ + unsigned idx = get_global_id(0);\n\ + unsigned idy = get_global_id(1);\n\ + //unsigned idz = threadIdx.z + blockIdx.z * blockDim.z;\n\ + unsigned idz = 0;\n\ + unsigned stepx = get_global_size(0);\n\ + unsigned stepy = get_global_size(1);\n\ + //unsigned stepz = blockDim.z * gridDim.z;\n\ + unsigned stepz = 1;\n\ + unsigned x, y, z;\n\ + if (idy == 0)\n\ + for (z = i-1 + idz; z < nz-(i-1); z += stepz)\n\ + for (x = K + idx; x < nx-K; x += stepx) \ + {\n \ + unsigned index = x+z*ldz;\n\ + ptr[index+(K-1)*ldy] = ptr[index+(ny-K-1)*ldy];\n\ + ptr[index+(ny-K)*ldy] = ptr[index+K*ldy];\n\ + }\n\ +\n\ + if (idx == 0)\n\ + for (z = i-1 + idz; z < nz-(i-1); z += stepz)\n\ + for (y = K + idy; y < ny-K; y += stepy) \ + {\n \ + unsigned index = y*ldy+z*ldz;\n\ + ptr[(K-1)+index] = ptr[(nx-K-1)+index];\n\ + ptr[(nx-K)+index] = ptr[K+index];\n\ + }\n\ +\n\ + if (idx == 0 && idy == 0)\n\ + for (z = i-1 + idz; z < nz-(i-1); z += stepz) \ + {\n \ + unsigned index = z*ldz;\n\ + ptr[K-1+(K-1)*ldy+index] = ptr[(nx-K-1)+(ny-K-1)*ldy+index];\n\ + ptr[(nx-K)+(K-1)*ldy+index] = ptr[K+(ny-K-1)*ldy+index];\n\ + ptr[(K-1)+(ny-K)*ldy+index] = ptr[(nx-K-1)+K*ldy+index];\n\ + ptr[(nx-K)+(ny-K)*ldy+index] = ptr[K+K*ldy+index];\n\ + }\n\ +}" + +static const char * src = clsrc(TYPE,K); +static struct starpu_opencl_program program; + +void opencl_shadow_init(void) +{ + starpu_opencl_load_opencl_from_string(src, &program, NULL); +} + +void opencl_shadow_free(void) +{ + int ret = starpu_opencl_unload_opencl(&program); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl"); +} + +void +opencl_shadow_host(int bz, TYPE *ptr, int nx, int ny, int nz, int ldy, int ldz, int i) +{ +#if 0 + size_t dim[] = {nx, ny, nz}; +#else + size_t dim[] = {nx, ny, 1}; +#endif + + int devid,id; + id = starpu_worker_get_id_check(); + devid = starpu_worker_get_devid(id); + + cl_kernel kernel; + cl_command_queue cq; + cl_int err; + + err = starpu_opencl_load_kernel(&kernel, &cq, &program, "shadow", devid); + if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); + + clSetKernelArg(kernel, 0, sizeof(bz), &bz); + clSetKernelArg(kernel, 1, sizeof(ptr), &ptr); + clSetKernelArg(kernel, 2, sizeof(nx), &nx); + clSetKernelArg(kernel, 3, sizeof(ny), &ny); + clSetKernelArg(kernel, 4, sizeof(nz), &nz); + clSetKernelArg(kernel, 5, sizeof(ldy), &ldy); + clSetKernelArg(kernel, 6, sizeof(ldz), &ldz); + clSetKernelArg(kernel, 7, sizeof(i), &i); + + err = clEnqueueNDRangeKernel(cq, kernel, 3, NULL, dim, NULL, 0, NULL, NULL); + if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); +} diff --git a/examples/stencil/stencil-blocks.c b/examples/stencil/stencil-blocks.c new file mode 100644 index 0000000..1f554af --- /dev/null +++ b/examples/stencil/stencil-blocks.c @@ -0,0 +1,403 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "stencil.h" +#include + +/* Manage block and tags allocation */ + +static struct block_description *blocks; +static unsigned sizex, sizey, sizez; +static unsigned nbz; +static unsigned *block_sizes_z; + +/* + * Tags for various codelet completion + */ + +/* + * common tag format: + */ +static starpu_tag_t tag_common(int z, int dir, int type) +{ + return (((((starpu_tag_t)type) << 4) | ((dir+1)/2)) << 32)|(starpu_tag_t)z; +} + +/* Completion of last update tasks */ +starpu_tag_t TAG_FINISH(int z) +{ + z = (z + nbz)%nbz; + + starpu_tag_t tag = tag_common(z, 0, 1); + return tag; +} + +/* Completion of the save codelet for MPI send/recv */ +starpu_tag_t TAG_START(int z, int dir) +{ + z = (z + nbz)%nbz; + + starpu_tag_t tag = tag_common(z, dir, 2); + return tag; +} + +/* + * common MPI tag format: + * iter is actually not needed for coherency, but it makes debugging easier + */ +static int mpi_tag_common(int z, int iter, int dir, int buffer) +{ + return (((((iter << 12)|z)<<4) | ((1+dir)/2))<<4)|buffer; +} + +int MPI_TAG0(int z, int iter, int dir) +{ + z = (z + nbz)%nbz; + + int tag = mpi_tag_common(z, iter, dir, 0); + + return tag; +} + +int MPI_TAG1(int z, int iter, int dir) +{ + z = (z + nbz)%nbz; + + int tag = mpi_tag_common(z, iter, dir, 1); + + return tag; +} + + + +/* + * Block descriptors + */ + +/* Compute the size of the different blocks */ +static void compute_block_sizes(void) +{ + block_sizes_z = (unsigned *) malloc(nbz*sizeof(unsigned)); + STARPU_ASSERT(block_sizes_z); + + /* Perhaps the last chunk is smaller */ + unsigned default_block_size = (sizez+nbz-1)/nbz; + unsigned remaining = sizez; + + unsigned b; + for (b = 0; b < nbz; b++) + { + block_sizes_z[b] = MIN(default_block_size, remaining); + remaining -= block_sizes_z[b]; + } + + STARPU_ASSERT(remaining == 0); +} + +unsigned get_block_size(int bz) +{ + return block_sizes_z[bz]; +} + +struct block_description *get_block_description(int z) +{ + z = (z + nbz)%nbz; + + STARPU_ASSERT(&blocks[z]); + + return &blocks[z]; +} + +int get_block_mpi_node(int z) +{ + z = (z + nbz)%nbz; + return blocks[z].mpi_node; +} + +void create_blocks_array(unsigned _sizex, unsigned _sizey, unsigned _sizez, unsigned _nbz) +{ + /* Store the parameters */ + nbz = _nbz; + sizex = _sizex; + sizey = _sizey; + sizez = _sizez; + + /* Create a grid of block descriptors */ + blocks = (struct block_description *) calloc(nbz, sizeof(struct block_description)); + STARPU_ASSERT(blocks); + + /* What is the size of the different blocks ? */ + compute_block_sizes(); + + unsigned bz; + for (bz = 0; bz < nbz; bz++) + { + struct block_description * block = + get_block_description(bz); + + /* Which block is it ? */ + block->bz = bz; + + /* For simplicity, we store which are the neighbours blocks */ + block->boundary_blocks[B] = get_block_description((bz-1+nbz)%nbz); + block->boundary_blocks[T] = get_block_description((bz+1)%nbz); + } +} + +void free_blocks_array() +{ + free(blocks); + free(block_sizes_z); +} + +/* + * Initialization of the blocks + */ + +void assign_blocks_to_workers(int rank) +{ + unsigned bz; + + /* NB: perhaps we could count a GPU as multiple workers */ + + /* how many workers are there ? */ + /*unsigned nworkers = starpu_worker_get_count();*/ + + /* how many blocks are on that MPI node ? */ +// unsigned nblocks = 0; +// for (bz = 0; bz < nbz; bz++) +// { +// struct block_description *block = +// get_block_description(bz); +// +// if (block->mpi_node == rank) +// nblocks++; +// } + + /* how many blocks per worker ? */ + /*unsigned nblocks_per_worker = (nblocks + nworkers - 1)/nworkers;*/ + + /* we now attribute up to nblocks_per_worker blocks per workers */ + unsigned attributed = 0; + for (bz = 0; bz < nbz; bz++) + { + struct block_description *block = + get_block_description(bz); + + if (block->mpi_node == rank) + { + unsigned workerid; + /* Manage initial block distribution between CPU and GPU */ + #if 0 + #if 1 + /* GPUs then CPUs */ + if (attributed < 3*18) + workerid = attributed / 18; + else + workerid = 3+ (attributed - 3*18) / 2; + #else + /* GPUs interleaved with CPUs */ + if ((attributed % 20) <= 1) + workerid = 3 + attributed / 20; + else if (attributed < 60) + workerid = attributed / 20; + else + workerid = (attributed - 60)/2 + 6; + #endif + #else + /* Only GPUS */ + workerid = (attributed / 21) % 3; + #endif + /*= attributed/nblocks_per_worker;*/ + + block->preferred_worker = workerid; + + attributed++; + } + } +} + + + +void assign_blocks_to_mpi_nodes(int world_size) +{ + unsigned nzblocks_per_process = (nbz + world_size - 1) / world_size; + + unsigned bz; + for (bz = 0; bz < nbz; bz++) + { + struct block_description *block = + get_block_description(bz); + + block->mpi_node = bz / nzblocks_per_process; + } +} + +static size_t allocated = 0; + +static void allocate_block_on_node(starpu_data_handle_t *handleptr, unsigned bz, TYPE **ptr, unsigned nx, unsigned ny, unsigned nz) +{ + int ret; + size_t block_size = nx*ny*nz*sizeof(TYPE); + + /* Allocate memory */ +#if 1 + ret = starpu_malloc_flags((void **)ptr, block_size, STARPU_MALLOC_PINNED|STARPU_MALLOC_SIMULATION_FOLDED); + STARPU_ASSERT(ret == 0); +#else + *ptr = malloc(block_size); + STARPU_ASSERT(*ptr); +#endif + + allocated += block_size; + +#ifndef STARPU_SIMGRID + /* Fill the blocks with 0 */ + memset(*ptr, 0, block_size); +#endif + + /* Register it to StarPU */ + starpu_block_data_register(handleptr, STARPU_MAIN_RAM, (uintptr_t)*ptr, nx, nx*ny, nx, ny, nz, sizeof(TYPE)); + + starpu_data_set_coordinates(*handleptr, 1, bz); +} + +static void free_block_on_node(starpu_data_handle_t handleptr, unsigned nx, unsigned ny, unsigned nz) +{ + void *ptr = (void *) starpu_block_get_local_ptr(handleptr); + size_t block_size = nx*ny*nz*sizeof(TYPE); + starpu_data_unregister(handleptr); + starpu_free_flags(ptr, block_size, STARPU_MALLOC_PINNED|STARPU_MALLOC_SIMULATION_FOLDED); +} + +void display_memory_consumption(int rank) +{ + FPRINTF(stderr, "%lu B of memory were allocated on node %d\n", (unsigned long) allocated, rank); +} + +void allocate_memory_on_node(int rank) +{ + unsigned bz; + for (bz = 0; bz < nbz; bz++) + { + struct block_description *block = get_block_description(bz); + + int node = block->mpi_node; + + /* Main blocks */ + if (node == rank) + { + unsigned size_bz = block_sizes_z[bz]; + allocate_block_on_node(&block->layers_handle[0], bz, &block->layers[0], + (sizex + 2*K), (sizey + 2*K), (size_bz + 2*K)); +#ifndef STARPU_SIMGRID +#ifdef LIFE + unsigned x, y, z; + unsigned sum = 0; + for (x = 0; x < sizex; x++) + for (y = 0; y < sizey; y++) + for (z = 0; z < size_bz; z++) + /* Just random data */ + sum += block->layers[0][(K+x)+(K+y)*(sizex + 2*K)+(K+z)*(sizex+2*K)*(sizey+2*K)] = (int)((x/7.+y/13.+(bz*size_bz + z)/17.) * 10.) % 2; +/* printf("block %d starts with %d/%d alive\n", bz, sum, sizex*sizey*size_bz);*/ +#endif +#endif + allocate_block_on_node(&block->layers_handle[1], bz, &block->layers[1], + (sizex + 2*K), (sizey + 2*K), (size_bz + 2*K)); + } + + /* Boundary blocks : Top */ + int top_node = block->boundary_blocks[T]->mpi_node; + if ((node == rank) || (top_node == rank)) + { + allocate_block_on_node(&block->boundaries_handle[T][0], bz, &block->boundaries[T][0], + (sizex + 2*K), (sizey + 2*K), K); + allocate_block_on_node(&block->boundaries_handle[T][1], bz, &block->boundaries[T][1], + (sizex + 2*K), (sizey + 2*K), K); + } + + /* Boundary blocks : Bottom */ + int bottom_node = block->boundary_blocks[B]->mpi_node; + if ((node == rank) || (bottom_node == rank)) + { + allocate_block_on_node(&block->boundaries_handle[B][0], bz, &block->boundaries[B][0], + (sizex + 2*K), (sizey + 2*K), K); + allocate_block_on_node(&block->boundaries_handle[B][1], bz, &block->boundaries[B][1], + (sizex + 2*K), (sizey + 2*K), K); + } + } +} + +void free_memory_on_node(int rank) +{ + unsigned bz; + for (bz = 0; bz < nbz; bz++) + { + struct block_description *block = get_block_description(bz); + + int node = block->mpi_node; + + /* Main blocks */ + if (node == rank) + { + free_block_on_node(block->layers_handle[0], (sizex + 2*K), (sizey + 2*K), K); + free_block_on_node(block->layers_handle[1], (sizex + 2*K), (sizey + 2*K), K); + } + + /* Boundary blocks : Top */ + int top_node = block->boundary_blocks[T]->mpi_node; + if ((node == rank) || (top_node == rank)) + { + free_block_on_node(block->boundaries_handle[T][0], (sizex + 2*K), (sizey + 2*K), K); + free_block_on_node(block->boundaries_handle[T][1], (sizex + 2*K), (sizey + 2*K), K); + } + + /* Boundary blocks : Bottom */ + int bottom_node = block->boundary_blocks[B]->mpi_node; + if ((node == rank) || (bottom_node == rank)) + { + free_block_on_node(block->boundaries_handle[B][0], (sizex + 2*K), (sizey + 2*K), K); + free_block_on_node(block->boundaries_handle[B][1], (sizex + 2*K), (sizey + 2*K), K); + } + } +} + +/* check how many cells are alive */ +void check(int rank) +{ + unsigned bz; + for (bz = 0; bz < nbz; bz++) + { + struct block_description *block = get_block_description(bz); + + int node = block->mpi_node; + + /* Main blocks */ + if (node == rank) + { +#ifdef LIFE + unsigned size_bz = block_sizes_z[bz]; + unsigned x, y, z; + unsigned sum = 0; + for (x = 0; x < sizex; x++) + for (y = 0; y < sizey; y++) + for (z = 0; z < size_bz; z++) + sum += block->layers[0][(K+x)+(K+y)*(sizex + 2*K)+(K+z)*(sizex+2*K)*(sizey+2*K)]; + printf("block %u got %u/%u alive\n", bz, sum, sizex*sizey*size_bz); +#endif + } + } +} diff --git a/examples/stencil/stencil-kernels.c b/examples/stencil/stencil-kernels.c new file mode 100644 index 0000000..13e3952 --- /dev/null +++ b/examples/stencil/stencil-kernels.c @@ -0,0 +1,631 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "stencil.h" + +/* Computation Kernels */ + +/* + * There are three codeletets: + * + * - cl_update, which takes a block and the boundaries of its neighbours, loads + * the boundaries into the block and perform some update loops: + * + * comp. buffer save. buffers comp. buffer save. buffers comp. buffer + * | ... | + * | | +------------------+ +------------------+ + * | #N+1 | | #N+1 bottom copy====>#N+1 bottom copy | + * +-------------+ +------------------+ +------------------+ + * | #N top copy | | #N top copy | | | + * +-------------+ +------------------+ | | + * | #N | + * ... + * | | +----------------+ +----------------------+ + * | | | #N bottom copy | | block #N bottom copy | + * ^ +------------------+ +----------------+ +----------------------+ + * | | #N-1 top copy <====#N-1 top copy | | block #N-1 | + * | +------------------+ +----------------+ | | + * Z ... + * + * - save_cl_top, which take a block and its top boundary, and saves the top of + * the block into the boundary (to be given as bottom of the neighbour above + * this block). + * + * comp. buffer save. buffers comp. buffer save. buffers comp. buffer + * | ... | + * | | +------------------+ +------------------+ + * | #N+1 | | #N+1 bottom copy | | #N+1 bottom copy | + * +-------------+ +------------------+ +------------------+ + * | #N top copy | | #N top copy <==== | + * +-------------+ +------------------+ |..................| + * | #N | + * ... + * | | +----------------+ +----------------------+ + * | | | #N bottom copy | | block #N bottom copy | + * ^ +------------------+ +----------------+ +----------------------+ + * | | #N-1 top copy | | #N-1 top copy | | block #N-1 | + * | +------------------+ +----------------+ | | + * Z ... + * + * - save_cl_bottom, same for the bottom + * comp. buffer save. buffers comp. buffer save. buffers comp. buffer + * | ... | + * | | +------------------+ +------------------+ + * | #N+1 | | #N+1 bottom copy | | #N+1 bottom copy | + * +-------------+ +------------------+ +------------------+ + * | #N top copy | | #N top copy | | | + * +-------------+ +------------------+ | | + * | #N | + * ... + * |..................| +----------------+ +----------------------+ + * | ====>#N bottom copy | | block #N bottom copy | + * ^ +------------------+ +----------------+ +----------------------+ + * | | #N-1 top copy | | #N-1 top copy | | block #N-1 | + * | +------------------+ +----------------+ | | + * Z ... + * + * The idea is that the computation buffers thus don't have to move, only their + * boundaries are copied to buffers that do move (be it CPU/GPU, GPU/GPU or via + * MPI) + * + * For each of the buffers above, there are two (0/1) buffers to make new/old switch costless. + */ + +#if 0 +# define DEBUG(fmt, ...) fprintf(stderr,fmt,##__VA_ARGS__) +#else +# define DEBUG(fmt, ...) (void) 0 +#endif + +/* Record which GPU ran which block, for nice pictures */ +int who_runs_what_len; +int *who_runs_what; +int *who_runs_what_index; +double *last_tick; + +/* Achieved iterations */ +static int achieved_iter; + +/* Record how many updates each worker performed */ +unsigned update_per_worker[STARPU_NMAXWORKERS]; + +static void record_who_runs_what(struct block_description *block) +{ + double now, now2, diff, delta = get_ticks() * 1000; + int workerid = starpu_worker_get_id_check(); + + now = starpu_timing_now(); + now2 = now - start; + diff = now2 - last_tick[block->bz]; + while (diff >= delta) + { + last_tick[block->bz] += delta; + diff = now2 - last_tick[block->bz]; + if (who_runs_what_index[block->bz] < who_runs_what_len) + who_runs_what[block->bz + (who_runs_what_index[block->bz]++) * get_nbz()] = -1; + } + + if (who_runs_what_index[block->bz] < who_runs_what_len) + who_runs_what[block->bz + (who_runs_what_index[block->bz]++) * get_nbz()] = global_workerid(workerid); +} + +static void check_load(struct starpu_block_interface *block, struct starpu_block_interface *boundary) +{ + /* Sanity checks */ + STARPU_ASSERT(block->nx == boundary->nx); + STARPU_ASSERT(block->ny == boundary->ny); + STARPU_ASSERT(boundary->nz == K); + + /* NB: this is not fully guaranteed ... but it's *very* likely and that + * makes our life much simpler */ + STARPU_ASSERT(block->ldy == boundary->ldy); + STARPU_ASSERT(block->ldz == boundary->ldz); +} + +/* + * Load a neighbour's boundary into block, CPU version + */ +static void load_subblock_from_buffer_cpu(void *_block, + void *_boundary, + unsigned firstz) +{ + struct starpu_block_interface *block = (struct starpu_block_interface *)_block; + struct starpu_block_interface *boundary = (struct starpu_block_interface *)_boundary; + check_load(block, boundary); + + /* We do a contiguous memory transfer */ + size_t boundary_size = K*block->ldz*block->elemsize; + + unsigned offset = firstz*block->ldz; + TYPE *block_data = (TYPE *)block->ptr; + TYPE *boundary_data = (TYPE *)boundary->ptr; + memcpy(&block_data[offset], boundary_data, boundary_size); +} + +/* + * Load a neighbour's boundary into block, CUDA version + */ +#ifdef STARPU_USE_CUDA +static void load_subblock_from_buffer_cuda(void *_block, + void *_boundary, + unsigned firstz) +{ + struct starpu_block_interface *block = (struct starpu_block_interface *)_block; + struct starpu_block_interface *boundary = (struct starpu_block_interface *)_boundary; + check_load(block, boundary); + + /* We do a contiguous memory transfer */ + size_t boundary_size = K*block->ldz*block->elemsize; + + unsigned offset = firstz*block->ldz; + TYPE *block_data = (TYPE *)block->ptr; + TYPE *boundary_data = (TYPE *)boundary->ptr; + cudaMemcpyAsync(&block_data[offset], boundary_data, boundary_size, cudaMemcpyDeviceToDevice, starpu_cuda_get_local_stream()); +} + +/* + * cl_update (CUDA version) + */ +static void update_func_cuda(void *descr[], void *arg) +{ + struct block_description *block = arg; + int workerid = starpu_worker_get_id_check(); + DEBUG("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n"); + if (block->bz == 0) + FPRINTF(stderr,"!!! DO update_func_cuda z %u CUDA%d !!!\n", block->bz, workerid); + else + DEBUG("!!! DO update_func_cuda z %u CUDA%d !!!\n", block->bz, workerid); +#if defined(STARPU_USE_MPI) && !defined(STARPU_SIMGRID) && !defined(STARPU_USE_MPI_MASTER_SLAVE) + int rank = 0; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + DEBUG("!!! RANK %d !!!\n", rank); +#endif + DEBUG("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n"); + + unsigned block_size_z = get_block_size(block->bz); + unsigned i; + update_per_worker[workerid]++; + + record_who_runs_what(block); + + /* + * Load neighbours' boundaries : TOP + */ + + /* The offset along the z axis is (block_size_z + K) */ + load_subblock_from_buffer_cuda(descr[0], descr[2], block_size_z+K); + load_subblock_from_buffer_cuda(descr[1], descr[3], block_size_z+K); + + /* + * Load neighbours' boundaries : BOTTOM + */ + load_subblock_from_buffer_cuda(descr[0], descr[4], 0); + load_subblock_from_buffer_cuda(descr[1], descr[5], 0); + + /* + * Stencils ... do the actual work here :) TODO + */ + + for (i=1; i<=K; i++) + { + struct starpu_block_interface *oldb = descr[i%2], *newb = descr[(i+1)%2]; + TYPE *old = (void*) oldb->ptr, *newer = (void*) newb->ptr; + + /* Shadow data */ + cuda_shadow_host(block->bz, old, oldb->nx, oldb->ny, oldb->nz, oldb->ldy, oldb->ldz, i); + + /* And perform actual computation */ +#ifdef LIFE + cuda_life_update_host(block->bz, old, newer, oldb->nx, oldb->ny, oldb->nz, oldb->ldy, oldb->ldz, i); +#else + cudaMemcpyAsync(newer, old, oldb->nx * oldb->ny * oldb->nz * sizeof(*newer), cudaMemcpyDeviceToDevice, starpu_cuda_get_local_stream()); +#endif /* LIFE */ + } +} +#endif /* STARPU_USE_CUDA */ + +/* + * Load a neighbour's boundary into block, OpenCL version + */ +#ifdef STARPU_USE_OPENCL +static void load_subblock_from_buffer_opencl(struct starpu_block_interface *block, + struct starpu_block_interface *boundary, + unsigned firstz) +{ + check_load(block, boundary); + + /* We do a contiguous memory transfer */ + size_t boundary_size = K*block->ldz*block->elemsize; + + unsigned offset = firstz*block->ldz; + cl_mem block_data = (cl_mem)block->dev_handle; + cl_mem boundary_data = (cl_mem)boundary->dev_handle; + + cl_command_queue cq; + starpu_opencl_get_current_queue(&cq); + cl_int ret = clEnqueueCopyBuffer(cq, boundary_data, block_data, 0, offset, boundary_size, 0, NULL, NULL); + if (ret != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(ret); +} + +/* + * cl_update (OpenCL version) + */ +static void update_func_opencl(void *descr[], void *arg) +{ + struct block_description *block = arg; + int workerid = starpu_worker_get_id_check(); + DEBUG("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n"); + if (block->bz == 0) + FPRINTF(stderr,"!!! DO update_func_opencl z %u OPENCL%d !!!\n", block->bz, workerid); + else + DEBUG("!!! DO update_func_opencl z %u OPENCL%d !!!\n", block->bz, workerid); +#if defined(STARPU_USE_MPI) && !defined(STARPU_SIMGRID) && !defined(STARPU_USE_MPI_MASTER_SLAVE) + int rank = 0; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + DEBUG("!!! RANK %d !!!\n", rank); +#endif + DEBUG("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n"); + + unsigned block_size_z = get_block_size(block->bz); + unsigned i; + update_per_worker[workerid]++; + + record_who_runs_what(block); + + cl_command_queue cq; + starpu_opencl_get_current_queue(&cq); + + /* + * Load neighbours' boundaries : TOP + */ + + /* The offset along the z axis is (block_size_z + K) */ + load_subblock_from_buffer_opencl(descr[0], descr[2], block_size_z+K); + load_subblock_from_buffer_opencl(descr[1], descr[3], block_size_z+K); + + /* + * Load neighbours' boundaries : BOTTOM + */ + load_subblock_from_buffer_opencl(descr[0], descr[4], 0); + load_subblock_from_buffer_opencl(descr[1], descr[5], 0); + + /* + * Stencils ... do the actual work here :) TODO + */ + + for (i=1; i<=K; i++) + { + struct starpu_block_interface *oldb = descr[i%2], *newb = descr[(i+1)%2]; + TYPE *old = (void*) oldb->dev_handle, *newer = (void*) newb->dev_handle; + + /* Shadow data */ + opencl_shadow_host(block->bz, old, oldb->nx, oldb->ny, oldb->nz, oldb->ldy, oldb->ldz, i); + + /* And perform actual computation */ +#ifdef LIFE + opencl_life_update_host(block->bz, old, newer, oldb->nx, oldb->ny, oldb->nz, oldb->ldy, oldb->ldz, i); +#else + cl_event event; + cl_int ret = clEnqueueCopyBuffer(cq, old, newer, 0, 0, oldb->nx * oldb->ny * oldb->nz * sizeof(*newer), 0, NULL, &event); + if (ret != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(ret); + +#endif /* LIFE */ + } +} +#endif /* STARPU_USE_OPENCL */ + +/* + * cl_update (CPU version) + */ +void update_func_cpu(void *descr[], void *arg) +{ + struct block_description *block = (struct block_description *) arg; + int workerid = starpu_worker_get_id_check(); + DEBUG("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n"); + if (block->bz == 0) + FPRINTF(stderr,"!!! DO update_func_cpu z %u worker%d !!!\n", block->bz, workerid); + else + DEBUG("!!! DO update_func_cpu z %u worker%d !!!\n", block->bz, workerid); +#if defined(STARPU_USE_MPI) && !defined(STARPU_SIMGRID) && !defined(STARPU_USE_MPI_MASTER_SLAVE) + int rank = 0; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + DEBUG("!!! RANK %d !!!\n", rank); +#endif + DEBUG("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n"); + + unsigned block_size_z = get_block_size(block->bz); + unsigned i; + update_per_worker[workerid]++; + + record_who_runs_what(block); + + /* + * Load neighbours' boundaries : TOP + */ + + /* The offset along the z axis is (block_size_z + K) */ + load_subblock_from_buffer_cpu(descr[0], descr[2], block_size_z+K); + load_subblock_from_buffer_cpu(descr[1], descr[3], block_size_z+K); + + /* + * Load neighbours' boundaries : BOTTOM + */ + load_subblock_from_buffer_cpu(descr[0], descr[4], 0); + load_subblock_from_buffer_cpu(descr[1], descr[5], 0); + + /* + * Stencils ... do the actual work here :) TODO + */ + + for (i=1; i<=K; i++) + { + struct starpu_block_interface *oldb = (struct starpu_block_interface *) descr[i%2], *newb = (struct starpu_block_interface *) descr[(i+1)%2]; + TYPE *old = (TYPE*) oldb->ptr, *newer = (TYPE*) newb->ptr; + + /* Shadow data */ + unsigned ldy = oldb->ldy, ldz = oldb->ldz; + unsigned nx = oldb->nx, ny = oldb->ny, nz = oldb->nz; + unsigned x, y, z; + unsigned stepx = 1; + unsigned stepy = 1; + unsigned stepz = 1; + unsigned idx = 0; + unsigned idy = 0; + unsigned idz = 0; + TYPE *ptr = old; + +# include "shadow.h" + + /* And perform actual computation */ +#ifdef LIFE + life_update(block->bz, old, newer, oldb->nx, oldb->ny, oldb->nz, oldb->ldy, oldb->ldz, i); +#else + memcpy(newer, old, oldb->nx * oldb->ny * oldb->nz * sizeof(*newer)); +#endif /* LIFE */ + } +} + +/* Performance model and codelet structure */ +static struct starpu_perfmodel cl_update_model = +{ + .type = STARPU_HISTORY_BASED, + .symbol = "cl_update" +}; + +struct starpu_codelet cl_update = +{ + .cpu_funcs = {update_func_cpu}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {update_func_cuda}, + .cuda_flags = {STARPU_CUDA_ASYNC}, +#endif +#ifdef STARPU_USE_OPENCL + .opencl_funcs = {update_func_opencl}, + .opencl_flags = {STARPU_OPENCL_ASYNC}, +#endif + .model = &cl_update_model, + .nbuffers = 6, + .modes = {STARPU_RW, STARPU_RW, STARPU_R, STARPU_R, STARPU_R, STARPU_R} +}; + +/* + * Save the block internal boundaries to give them to our neighbours. + */ + +/* CPU version */ +static void load_subblock_into_buffer_cpu(void *_block, + void *_boundary, + unsigned firstz) +{ + struct starpu_block_interface *block = (struct starpu_block_interface *)_block; + struct starpu_block_interface *boundary = (struct starpu_block_interface *)_boundary; + check_load(block, boundary); + + /* We do a contiguous memory transfer */ + size_t boundary_size = K*block->ldz*block->elemsize; + + unsigned offset = firstz*block->ldz; + TYPE *block_data = (TYPE *)block->ptr; + TYPE *boundary_data = (TYPE *)boundary->ptr; + memcpy(boundary_data, &block_data[offset], boundary_size); +} + +/* CUDA version */ +#ifdef STARPU_USE_CUDA +static void load_subblock_into_buffer_cuda(void *_block, + void *_boundary, + unsigned firstz) +{ + struct starpu_block_interface *block = (struct starpu_block_interface *)_block; + struct starpu_block_interface *boundary = (struct starpu_block_interface *)_boundary; + check_load(block, boundary); + + /* We do a contiguous memory transfer */ + size_t boundary_size = K*block->ldz*block->elemsize; + + unsigned offset = firstz*block->ldz; + TYPE *block_data = (TYPE *)block->ptr; + TYPE *boundary_data = (TYPE *)boundary->ptr; + cudaMemcpyAsync(boundary_data, &block_data[offset], boundary_size, cudaMemcpyDeviceToDevice, starpu_cuda_get_local_stream()); +} +#endif /* STARPU_USE_CUDA */ + +/* OPENCL version */ +#ifdef STARPU_USE_OPENCL +static void load_subblock_into_buffer_opencl(struct starpu_block_interface *block, + struct starpu_block_interface *boundary, + unsigned firstz) +{ + check_load(block, boundary); + + /* We do a contiguous memory transfer */ + size_t boundary_size = K*block->ldz*block->elemsize; + + unsigned offset = firstz*block->ldz; + cl_mem block_data = (cl_mem)block->dev_handle; + cl_mem boundary_data = (cl_mem)boundary->dev_handle; + + cl_command_queue cq; + starpu_opencl_get_current_queue(&cq); + + cl_int ret = clEnqueueCopyBuffer(cq, block_data, boundary_data, offset, 0, boundary_size, 0, NULL, NULL); + if (ret != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(ret); +} +#endif /* STARPU_USE_OPENCL */ + +/* Record how many top/bottom saves each worker performed */ +unsigned top_per_worker[STARPU_NMAXWORKERS]; +unsigned bottom_per_worker[STARPU_NMAXWORKERS]; + +/* top save, CPU version */ +void dummy_func_top_cpu(void *descr[], void *arg) +{ + struct block_description *block = (struct block_description *) arg; + int workerid = starpu_worker_get_id_check(); + top_per_worker[workerid]++; + + DEBUG("DO SAVE Bottom block %d\n", block->bz); + + /* The offset along the z axis is (block_size_z + K)- K */ + unsigned block_size_z = get_block_size(block->bz); + + load_subblock_into_buffer_cpu(descr[0], descr[2], block_size_z); + load_subblock_into_buffer_cpu(descr[1], descr[3], block_size_z); +} + +/* bottom save, CPU version */ +void dummy_func_bottom_cpu(void *descr[], void *arg) +{ + struct block_description *block = (struct block_description *) arg; + (void) block; + int workerid = starpu_worker_get_id_check(); + bottom_per_worker[workerid]++; + + DEBUG("DO SAVE Top block %d\n", block->bz); + + load_subblock_into_buffer_cpu(descr[0], descr[2], K); + load_subblock_into_buffer_cpu(descr[1], descr[3], K); +} + +/* top save, CUDA version */ +#ifdef STARPU_USE_CUDA +static void dummy_func_top_cuda(void *descr[], void *arg) +{ + struct block_description *block = (struct block_description *) arg; + int workerid = starpu_worker_get_id_check(); + top_per_worker[workerid]++; + + DEBUG("DO SAVE Top block %d\n", block->bz); + + /* The offset along the z axis is (block_size_z + K)- K */ + unsigned block_size_z = get_block_size(block->bz); + + load_subblock_into_buffer_cuda(descr[0], descr[2], block_size_z); + load_subblock_into_buffer_cuda(descr[1], descr[3], block_size_z); +} + +/* bottom save, CUDA version */ +static void dummy_func_bottom_cuda(void *descr[], void *arg) +{ + struct block_description *block = (struct block_description *) arg; + (void) block; + int workerid = starpu_worker_get_id_check(); + bottom_per_worker[workerid]++; + + DEBUG("DO SAVE Bottom block %d on CUDA\n", block->bz); + + load_subblock_into_buffer_cuda(descr[0], descr[2], K); + load_subblock_into_buffer_cuda(descr[1], descr[3], K); +} +#endif /* STARPU_USE_CUDA */ + +/* top save, OpenCL version */ +#ifdef STARPU_USE_OPENCL +static void dummy_func_top_opencl(void *descr[], void *arg) +{ + struct block_description *block = (struct block_description *) arg; + (void) block; + int workerid = starpu_worker_get_id_check(); + top_per_worker[workerid]++; + + DEBUG("DO SAVE Top block %d\n", block->bz); + + /* The offset along the z axis is (block_size_z + K)- K */ + unsigned block_size_z = get_block_size(block->bz); + + load_subblock_into_buffer_opencl(descr[0], descr[2], block_size_z); + load_subblock_into_buffer_opencl(descr[1], descr[3], block_size_z); +} + +/* bottom save, OPENCL version */ +static void dummy_func_bottom_opencl(void *descr[], void *arg) +{ + struct block_description *block = (struct block_description *) arg; + (void) block; + int workerid = starpu_worker_get_id_check(); + bottom_per_worker[workerid]++; + + DEBUG("DO SAVE Bottom block %d on OPENCL\n", block->bz); + + load_subblock_into_buffer_opencl(descr[0], descr[2], K); + load_subblock_into_buffer_opencl(descr[1], descr[3], K); +} +#endif /* STARPU_USE_OPENCL */ + +/* Performance models and codelet for save */ +static struct starpu_perfmodel save_cl_bottom_model = +{ + .type = STARPU_HISTORY_BASED, + .symbol = "save_cl_bottom" +}; + +static struct starpu_perfmodel save_cl_top_model = +{ + .type = STARPU_HISTORY_BASED, + .symbol = "save_cl_top" +}; + +struct starpu_codelet save_cl_bottom = +{ + .cpu_funcs = {dummy_func_bottom_cpu}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {dummy_func_bottom_cuda}, + .cuda_flags = {STARPU_CUDA_ASYNC}, +#endif +#ifdef STARPU_USE_OPENCL + .opencl_funcs = {dummy_func_bottom_opencl}, + .opencl_flags = {STARPU_OPENCL_ASYNC}, +#endif + .model = &save_cl_bottom_model, + .nbuffers = 4, + .modes = {STARPU_R, STARPU_R, STARPU_W, STARPU_W} +}; + +struct starpu_codelet save_cl_top = +{ + .cpu_funcs = {dummy_func_top_cpu}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {dummy_func_top_cuda}, + .cuda_flags = {STARPU_CUDA_ASYNC}, +#endif +#ifdef STARPU_USE_OPENCL + .opencl_funcs = {dummy_func_top_opencl}, + .opencl_flags = {STARPU_OPENCL_ASYNC}, +#endif + .model = &save_cl_top_model, + .nbuffers = 4, + .modes = {STARPU_R, STARPU_R, STARPU_W, STARPU_W} +}; diff --git a/examples/stencil/stencil-tasks.c b/examples/stencil/stencil-tasks.c new file mode 100644 index 0000000..2ec3f91 --- /dev/null +++ b/examples/stencil/stencil-tasks.c @@ -0,0 +1,358 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "stencil.h" + +#define BIND_LAST 1 + +/* + * Schedule tasks for updates and saves + */ + +/* + * NB: iter = 0: initialization phase, TAG_U(z, 0) = TAG_INIT + * + * dir is -1 or +1. + */ + +#if 0 +# define DEBUG(fmt, ...) fprintf(stderr,fmt,##__VA_ARGS__) +#else +# define DEBUG(fmt, ...) +#endif + +/* + * SAVE + */ + +/* R(z) = R(z+d) = local, just call the save kernel */ +static void create_task_save_local(unsigned iter, unsigned z, int dir) +{ + struct starpu_task *save_task = starpu_task_create(); + struct block_description *descr = get_block_description(z); + + save_task->cl = (dir == -1)?&save_cl_bottom:&save_cl_top; + save_task->cl_arg = descr; + + /* Saving our border... */ + save_task->handles[0] = descr->layers_handle[0]; + save_task->handles[1] = descr->layers_handle[1]; + + /* ... to the neighbour's copy */ + struct block_description *neighbour = descr->boundary_blocks[(1+dir)/2]; + save_task->handles[2] = neighbour->boundaries_handle[(1-dir)/2][0]; + save_task->handles[3] = neighbour->boundaries_handle[(1-dir)/2][1]; + + /* Bind */ + if (iter <= BIND_LAST) + save_task->execute_on_a_specific_worker = get_bind_tasks(); + save_task->workerid = descr->preferred_worker; + + int ret = starpu_task_submit(save_task); + if (ret) + { + FPRINTF(stderr, "Could not submit task save: %d\n", ret); + if (ret == -ENODEV) + exit(77); + STARPU_ABORT(); + } +} + +/* R(z) = local & R(z+d) != local */ +/* We need to send our save over MPI */ + +static void send_done(void *arg) +{ + uintptr_t z = (uintptr_t) arg; + (void) z; + DEBUG("DO SEND %d\n", (int)z); +} + +#if defined(STARPU_USE_MPI) && !defined(STARPU_USE_MPI_MASTER_SLAVE) +/* Post MPI send */ +static void create_task_save_mpi_send(unsigned iter, unsigned z, int dir, int local_rank) +{ + struct block_description *descr = get_block_description(z); + STARPU_ASSERT(descr->mpi_node == local_rank); + + struct block_description *neighbour = descr->boundary_blocks[(1+dir)/2]; + int dest = neighbour->mpi_node; + STARPU_ASSERT(neighbour->mpi_node != local_rank); + + /* Send neighbour's border copy to the neighbour */ + starpu_data_handle_t handle0 = neighbour->boundaries_handle[(1-dir)/2][0]; + starpu_data_handle_t handle1 = neighbour->boundaries_handle[(1-dir)/2][1]; + int ret; + + ret = starpu_mpi_isend_detached(handle0, dest, MPI_TAG0(z, iter, dir), MPI_COMM_WORLD, send_done, (void*)(uintptr_t)z); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend_detached"); + ret = starpu_mpi_isend_detached(handle1, dest, MPI_TAG1(z, iter, dir), MPI_COMM_WORLD, send_done, (void*)(uintptr_t)z); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend_detached"); +} + +/* R(z) != local & R(z+d) = local */ +/* We need to receive over MPI */ + +static void recv_done(void *arg) +{ + uintptr_t z = (uintptr_t) arg; + (void) z; + DEBUG("DO RECV %d\n", (int)z); +} + +/* Post MPI recv */ +static void create_task_save_mpi_recv(unsigned iter, unsigned z, int dir, int local_rank) +{ + struct block_description *descr = get_block_description(z); + STARPU_ASSERT(descr->mpi_node != local_rank); + + struct block_description *neighbour = descr->boundary_blocks[(1+dir)/2]; + int source = descr->mpi_node; + STARPU_ASSERT(neighbour->mpi_node == local_rank); + + /* Receive our neighbour's border in our neighbour copy */ + starpu_data_handle_t handle0 = neighbour->boundaries_handle[(1-dir)/2][0]; + starpu_data_handle_t handle1 = neighbour->boundaries_handle[(1-dir)/2][1]; + int ret; + + ret = starpu_mpi_irecv_detached(handle0, source, MPI_TAG0(z, iter, dir), MPI_COMM_WORLD, recv_done, (void*)(uintptr_t)z); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_irecv_detached"); + ret = starpu_mpi_irecv_detached(handle1, source, MPI_TAG1(z, iter, dir), MPI_COMM_WORLD, recv_done, (void*)(uintptr_t)z); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_irecv_detached"); +} +#endif /* STARPU_USE_MPI */ + +/* + * Schedule saving boundaries of blocks to communication buffers + */ +void create_task_save(unsigned iter, unsigned z, int dir, int local_rank) +{ + int node_z = get_block_mpi_node(z); + int node_z_and_d = get_block_mpi_node(z+dir); + +#if defined(STARPU_USE_MPI) && !defined(STARPU_USE_MPI_MASTER_SLAVE) + if (node_z == local_rank) + { + /* Save data from update */ + create_task_save_local(iter, z, dir); + if (node_z_and_d != local_rank) + { + /* R(z) = local & R(z+d) != local, We have to send the data */ + create_task_save_mpi_send(iter, z, dir, local_rank); + } + + } + else + { + /* node_z != local_rank, this MPI node doesn't have the saved data */ + if (node_z_and_d == local_rank) + { + create_task_save_mpi_recv(iter, z, dir, local_rank); + } + else + { + /* R(z) != local & R(z+d) != local We don't have + the saved data and don't need it, we shouldn't + even have been called! */ + STARPU_ABORT(); + } + } +#else /* !STARPU_USE_MPI */ + STARPU_ASSERT((node_z == local_rank) && (node_z_and_d == local_rank)); + create_task_save_local(iter, z, dir); +#endif /* STARPU_USE_MPI */ +} + +/* + * Schedule update computation in computation buffer + */ + +void create_task_update(unsigned iter, unsigned z, int local_rank) +{ + (void)local_rank; // unneeded parameter, we keep it to have a similar function prototype to the implicit case + STARPU_ASSERT(iter != 0); + + struct starpu_task *task = starpu_task_create(); + + unsigned niter = get_niter(); + + /* We are going to synchronize with the last tasks */ + if (iter == niter) + { + task->use_tag = 1; + task->tag_id = TAG_FINISH(z); + } + + unsigned old_layer = (K*(iter-1)) % 2; + unsigned new_layer = (old_layer + 1) % 2; + + struct block_description *descr = get_block_description(z); + task->handles[0] = descr->layers_handle[new_layer]; + task->handles[1] = descr->layers_handle[old_layer]; + + task->handles[2] = descr->boundaries_handle[T][new_layer]; + task->handles[3] = descr->boundaries_handle[T][old_layer]; + + task->handles[4] = descr->boundaries_handle[B][new_layer]; + task->handles[5] = descr->boundaries_handle[B][old_layer]; + + task->cl = &cl_update; + task->cl_arg = descr; + + if (iter <= BIND_LAST) + task->execute_on_a_specific_worker = get_bind_tasks(); + task->workerid = descr->preferred_worker; + + int ret = starpu_task_submit(task); + if (ret) + { + FPRINTF(stderr, "Could not submit task update block: %d\n", ret); + if (ret == -ENODEV) + exit(77); + STARPU_ABORT(); + } +} + +/* Dummy empty codelet taking one buffer */ +void null_func(void *descr[], void *arg) +{ + (void)descr; + (void)arg; +} + +static double null_cost_function(struct starpu_task *task, unsigned nimpl) +{ + (void) task; + (void) nimpl; + return 0.000001; +} + +static struct starpu_perfmodel null_model = +{ + .type = STARPU_COMMON, + .cost_function = null_cost_function, + .symbol = "null" +}; + +static struct starpu_codelet null = +{ + .modes = { STARPU_W, STARPU_W }, + .cpu_funcs = {null_func}, + .cpu_funcs_name = {"null_func"}, + .cuda_funcs = {null_func}, + .opencl_funcs = {null_func}, + .nbuffers = 2, + .model = &null_model, + .name = "start" +}; + +void create_start_task(int z, int dir) +{ + /* Dumb task depending on the init task and simulating writing the + neighbour buffers, to avoid communications and computation running + before we start measuring time */ + struct starpu_task *wait_init = starpu_task_create(); + struct block_description *descr = get_block_description(z); + starpu_tag_t tag_init = TAG_INIT_TASK; + wait_init->cl = &null; + wait_init->use_tag = 1; + wait_init->tag_id = TAG_START(z, dir); + wait_init->handles[0] = descr->boundaries_handle[(1 + dir) / 2][0]; + wait_init->handles[1] = descr->boundaries_handle[(1 + dir) / 2][1]; + starpu_tag_declare_deps_array(wait_init->tag_id, 1, &tag_init); + + int ret = starpu_task_submit(wait_init); + if (ret) + { + FPRINTF(stderr, "Could not submit task initial wait: %d\n", ret); + if (ret == -ENODEV) + exit(77); + STARPU_ABORT(); + } +} + + +/* + * Create all the tasks + */ +void create_tasks(int rank) +{ + int iter; + int bz; + int niter = get_niter(); + int nbz = get_nbz(); + + for (bz = 0; bz < nbz; bz++) + { + if ((get_block_mpi_node(bz) == rank) || (get_block_mpi_node(bz+1) == rank)) + create_start_task(bz, +1); + if ((get_block_mpi_node(bz) == rank) || (get_block_mpi_node(bz-1) == rank)) + create_start_task(bz, -1); + } + + for (iter = 0; iter <= niter; iter++) + { + starpu_iteration_push(iter); + for (bz = 0; bz < nbz; bz++) + { + if ((iter > 0) && (get_block_mpi_node(bz) == rank)) + create_task_update(iter, bz, rank); + + } + for (bz = 0; bz < nbz; bz++) + { + if (iter != niter) + { + if ((get_block_mpi_node(bz) == rank) || (get_block_mpi_node(bz+1) == rank)) + create_task_save(iter, bz, +1, rank); + + if ((get_block_mpi_node(bz) == rank) || (get_block_mpi_node(bz-1) == rank)) + create_task_save(iter, bz, -1, rank); + } + } + starpu_iteration_pop(); + } +} + +/* + * Wait for termination + */ +void wait_end_tasks(int rank) +{ + int bz; + int nbz = get_nbz(); + + for (bz = 0; bz < nbz; bz++) + { + if (get_block_mpi_node(bz) == rank) + { + /* Wait for the task producing block "bz" */ + starpu_tag_wait(TAG_FINISH(bz)); + + /* Get the result back to memory */ + struct block_description *block = get_block_description(bz); + starpu_data_acquire(block->layers_handle[0], STARPU_R); + starpu_data_acquire(block->layers_handle[1], STARPU_R); + /* the data_acquire here is done to make sure + * the data is sent back to the ram memory, we + * can safely do a data_release, to avoid the + * data_unregister to block later on + */ + starpu_data_release(block->layers_handle[0]); + starpu_data_release(block->layers_handle[1]); + } + } +} diff --git a/examples/stencil/stencil.c b/examples/stencil/stencil.c new file mode 100644 index 0000000..5896ffc --- /dev/null +++ b/examples/stencil/stencil.c @@ -0,0 +1,392 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "stencil.h" + +#ifdef STARPU_HAVE_VALGRIND_H +#include +#endif + +/* Main application */ + +/* default parameter values */ +static unsigned bind_tasks = 0; + +static unsigned ticks = 1000; + +#ifdef STARPU_QUICK_CHECK +static unsigned niter = 4; +#define SIZE 16 +#define NBZ 8 +#else +static unsigned niter = 32; +#define SIZE 128 +#define NBZ 64 +#endif + +/* Problem size */ +static unsigned sizex = SIZE; +static unsigned sizey = SIZE; +static unsigned sizez = NBZ*SIZE; + +/* Number of blocks (scattered over the different MPI processes) */ +unsigned nbz = NBZ; + +/* + * Initialization + */ + +unsigned get_bind_tasks(void) +{ + return bind_tasks; +} + +unsigned get_nbz(void) +{ + return nbz; +} + +unsigned get_niter(void) +{ + return niter; +} + +unsigned get_ticks(void) +{ + return ticks; +} + +static void parse_args(int argc, char **argv) +{ + int i; + for (i = 1; i < argc; i++) + { + if (strcmp(argv[i], "-b") == 0) + { + bind_tasks = 1; + } + + if (strcmp(argv[i], "-nbz") == 0) + { + nbz = atoi(argv[++i]); + } + + if (strcmp(argv[i], "-sizex") == 0) + { + sizex = atoi(argv[++i]); + } + + if (strcmp(argv[i], "-sizey") == 0) + { + sizey = atoi(argv[++i]); + } + + if (strcmp(argv[i], "-sizez") == 0) + { + sizez = atoi(argv[++i]); + } + + if (strcmp(argv[i], "-niter") == 0) + { + niter = atoi(argv[++i]); + } + + if (strcmp(argv[i], "-ticks") == 0) + { + ticks = atoi(argv[++i]); + } + + if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i], "--help") == 0) + { + fprintf(stderr, "Usage : %s [options...]\n", argv[0]); + fprintf(stderr, "\n"); + fprintf(stderr, "Options:\n"); + fprintf(stderr, "-b bind tasks on CPUs/GPUs\n"); + fprintf(stderr, "-nbz Number of blocks on Z axis (%u by default)\n", nbz); + fprintf(stderr, "-size[xyz] Domain size on x/y/z axis (%ux%ux%u by default)\n", sizex, sizey, sizez); + fprintf(stderr, "-niter Number of iterations (%u by default)\n", niter); + fprintf(stderr, "-ticks How often to put ticks in the output (ms, %u by default)\n", ticks); + exit(0); + } + } + +#ifdef STARPU_HAVE_VALGRIND_H + if (RUNNING_ON_VALGRIND) + { + sizex = sizey = 3; + nbz = 10; + sizez = nbz*3; + } +#endif +} + +static void init_problem(int argc, char **argv, int rank, int world_size) +{ + parse_args(argc, argv); + + create_blocks_array(sizex, sizey, sizez, nbz); + + /* Select the MPI process which should compute the different blocks */ + assign_blocks_to_mpi_nodes(world_size); + + assign_blocks_to_workers(rank); + + /* Allocate the different memory blocks, if used by the MPI process */ + allocate_memory_on_node(rank); + + display_memory_consumption(rank); + + who_runs_what_len = 2*niter; + who_runs_what = (int *) calloc(nbz * who_runs_what_len, sizeof(*who_runs_what)); + who_runs_what_index = (int *) calloc(nbz, sizeof(*who_runs_what_index)); + last_tick = (double *) calloc(nbz, sizeof(*last_tick)); +} + +static void free_problem(int rank) +{ + free_memory_on_node(rank); + free_blocks_array(); + free(who_runs_what); + free(who_runs_what_index); + free(last_tick); +} + +/* + * Main body + */ + +double start; +double begin, end; +double timing; + +void func(unsigned task_per_worker[STARPU_NMAXWORKERS]) +{ + unsigned total = 0; + int worker; + + for (worker = 0; worker < STARPU_NMAXWORKERS; worker++) + total += task_per_worker[worker]; + for (worker = 0; worker < STARPU_NMAXWORKERS; worker++) + { + if (task_per_worker[worker]) + { + char name[64]; + starpu_worker_get_name(worker, name, sizeof(name)); + FPRINTF(stderr,"\t%s -> %u (%2.2f%%)\n", name, task_per_worker[worker], (100.0*task_per_worker[worker])/total); + } + } +} + +unsigned global_workerid(unsigned local_workerid) +{ +#if defined(STARPU_USE_MPI) && !defined(STARPU_USE_MPI_MASTER_SLAVE) + int rank; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + unsigned workers_per_node = starpu_worker_get_count(); + + return (local_workerid + rank*workers_per_node); +#else + return local_workerid; +#endif +} + +int main(int argc, char **argv) +{ + int rank; + int world_size; + int ret; + +#if defined(STARPU_USE_MPI) && !defined(STARPU_SIMGRID) && !defined(STARPU_USE_MPI_MASTER_SLAVE) + int thread_support; + if (MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &thread_support)) + { + FPRINTF(stderr, "MPI_Init_thread failed\n"); + } + if (thread_support == MPI_THREAD_FUNNELED) + FPRINTF(stderr,"Warning: MPI only has funneled thread support, not serialized, hoping this will work\n"); + if (thread_support < MPI_THREAD_FUNNELED) + FPRINTF(stderr,"Warning: MPI does not have thread support!\n"); + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &world_size); +#else + rank = 0; + world_size = 1; +#endif + + if (rank == 0) + { + FPRINTF(stderr, "Running on %d nodes\n", world_size); + fflush(stderr); + } + + struct starpu_conf conf; + starpu_conf_init(&conf); + + /*nbz is a global variable, this example doesn't support Master-Slave*/ + conf.nmpi_ms = 0; + conf.ntcpip_ms = 0; + + ret = starpu_init(&conf); + if (ret == -ENODEV) return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + +#if defined(STARPU_USE_MPI) && !defined(STARPU_SIMGRID) && !defined(STARPU_USE_MPI_MASTER_SLAVE) + ret = starpu_mpi_init(NULL, NULL, 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); +#endif + +#ifdef STARPU_USE_OPENCL + opencl_life_init(); + opencl_shadow_init(); +#endif /*STARPU_USE_OPENCL*/ + + init_problem(argc, argv, rank, world_size); + + create_tasks(rank); + +#if defined(STARPU_USE_MPI) && !defined(STARPU_SIMGRID) && !defined(STARPU_USE_MPI_MASTER_SLAVE) + int barrier_ret = MPI_Barrier(MPI_COMM_WORLD); + STARPU_ASSERT(barrier_ret == MPI_SUCCESS); +#endif + if (rank == 0) + FPRINTF(stderr, "GO !\n"); + + start = starpu_timing_now(); + + begin = starpu_timing_now(); + + starpu_tag_notify_from_apps(TAG_INIT_TASK); + + wait_end_tasks(rank); + + end = starpu_timing_now(); + +#if defined(STARPU_USE_MPI) && !defined(STARPU_SIMGRID) && !defined(STARPU_USE_MPI_MASTER_SLAVE) + barrier_ret = MPI_Barrier(MPI_COMM_WORLD); + STARPU_ASSERT(barrier_ret == MPI_SUCCESS); +#endif + +#if 0 + check(rank); +#endif + + /*display_debug(nbz, niter, rank);*/ + +#if defined(STARPU_USE_MPI) && !defined(STARPU_SIMGRID) && !defined(STARPU_USE_MPI_MASTER_SLAVE) + starpu_mpi_shutdown(); +#endif + + /* timing in us */ + timing = end - begin; + + double min_timing = timing; + double max_timing = timing; + double sum_timing = timing; + +#if defined(STARPU_USE_MPI) && !defined(STARPU_SIMGRID) && !defined(STARPU_USE_MPI_MASTER_SLAVE) + int reduce_ret; + + reduce_ret = MPI_Reduce(&timing, &min_timing, 1, MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD); + STARPU_ASSERT(reduce_ret == MPI_SUCCESS); + + reduce_ret = MPI_Reduce(&timing, &max_timing, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); + STARPU_ASSERT(reduce_ret == MPI_SUCCESS); + + reduce_ret = MPI_Reduce(&timing, &sum_timing, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); + STARPU_ASSERT(reduce_ret == MPI_SUCCESS); + + /* XXX we should do a gather instead, here we assume that non initialized values are still 0 */ + int *who_runs_what_tmp = malloc(nbz * who_runs_what_len * sizeof(*who_runs_what)); + reduce_ret = MPI_Reduce(who_runs_what, who_runs_what_tmp, nbz * who_runs_what_len, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); + STARPU_ASSERT(reduce_ret == MPI_SUCCESS); + + memcpy(who_runs_what, who_runs_what_tmp, nbz * who_runs_what_len * sizeof(*who_runs_what)); + free(who_runs_what_tmp); + + /* XXX we should do a gather instead, here we assume that non initialized values are still 0 */ + int *who_runs_what_index_tmp = malloc(nbz * sizeof(*who_runs_what_index)); + reduce_ret = MPI_Reduce(who_runs_what_index, who_runs_what_index_tmp, nbz, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); + STARPU_ASSERT(reduce_ret == MPI_SUCCESS); + + memcpy(who_runs_what_index, who_runs_what_index_tmp, nbz * sizeof(*who_runs_what_index)); + free(who_runs_what_index_tmp); +#endif + + if (rank == 0) + { +#if 1 + FPRINTF(stderr, "update:\n"); + func(update_per_worker); + FPRINTF(stderr, "top:\n"); + func(top_per_worker); + FPRINTF(stderr, "bottom:\n"); + func(bottom_per_worker); +#endif +#if 1 + unsigned nzblocks_per_process = (nbz + world_size - 1) / world_size; + + int iter; + for (iter = 0; iter < who_runs_what_len; iter++) + { + starpu_iteration_push(iter); + unsigned last, bz; + last = 1; + for (bz = 0; bz < nbz; bz++) + { + if ((bz % nzblocks_per_process) == 0) + FPRINTF(stderr, "| "); + + if (who_runs_what_index[bz] <= iter) + FPRINTF(stderr,"_ "); + else + { + last = 0; + if (who_runs_what[bz + iter * nbz] == -1) + FPRINTF(stderr,"* "); + else + FPRINTF(stderr, "%d ", who_runs_what[bz + iter * nbz]); + } + } + FPRINTF(stderr, "\n"); + + starpu_iteration_pop(); + if (last) + break; + } +#endif + + fflush(stderr); + + FPRINTF(stdout, "Computation took: %f ms on %d MPI processes\n", max_timing/1000, world_size); + FPRINTF(stdout, "\tMIN : %f ms\n", min_timing/1000); + FPRINTF(stdout, "\tMAX : %f ms\n", max_timing/1000); + FPRINTF(stdout, "\tAVG : %f ms\n", sum_timing/(world_size*1000)); + } + + free_problem(rank); + starpu_shutdown(); + +#if defined(STARPU_USE_MPI) && !defined(STARPU_SIMGRID) && !defined(STARPU_USE_MPI_MASTER_SLAVE) + MPI_Finalize(); +#endif + +#ifdef STARPU_USE_OPENCL + opencl_life_free(); + opencl_shadow_free(); +#endif /*STARPU_USE_OPENCL*/ + + return 0; +} diff --git a/examples/stencil/stencil.h b/examples/stencil/stencil.h new file mode 100644 index 0000000..6481e8e --- /dev/null +++ b/examples/stencil/stencil.h @@ -0,0 +1,149 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __STENCIL_H__ +#define __STENCIL_H__ + +#include +#include +#include + +#ifndef __CUDACC__ +#if defined(STARPU_USE_MPI) && !defined(STARPU_USE_MPI_MASTER_SLAVE) +#include +#include +#endif +#endif + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +#define LIFE + +#ifdef LIFE +#define TYPE unsigned char +extern void life_update(int bz, const TYPE *old, TYPE *newp, int nx, int ny, int nz, int ldy, int ldz, int iter); +#else +#define TYPE float +#endif + +#define K 1 + +#define NDIRS 2 + +/* Split only on the z axis to make things simple */ +typedef enum +{ + B = 0, + T = 1 +} direction; + +/* Description of a domain block */ +struct block_description +{ + /* Which MPI node should process that block ? */ + int mpi_node; + + unsigned preferred_worker; + + unsigned bz; + + + /* For each of the following buffers, there are two (0/1) buffers to + * make new/old switch costless. */ + + /* This is the computation buffer for this block, it includes + * neighbours' border to make computation easier */ + TYPE *layers[2]; + starpu_data_handle_t layers_handle[2]; + + /* This is the "save" buffer, i.e. a copy of our neighbour's border. + * This one is used for CPU/GPU or MPI communication (rather than the + * whole domain block) */ + TYPE *boundaries[NDIRS][2]; + starpu_data_handle_t boundaries_handle[NDIRS][2]; + + /* Shortcut pointer to the neighbours */ + struct block_description *boundary_blocks[NDIRS]; +}; + +#define TAG_INIT_TASK ((starpu_tag_t)1) + +starpu_tag_t TAG_FINISH(int z); +starpu_tag_t TAG_START(int z, int dir); +int MPI_TAG0(int z, int iter, int dir); +int MPI_TAG1(int z, int iter, int dir); + +#define MIN(a,b) ((a)<(b)?(a):(b)) + +void create_blocks_array(unsigned sizex, unsigned sizey, unsigned sizez, unsigned nbz); +void free_blocks_array(); +struct block_description *get_block_description(int z); +void assign_blocks_to_mpi_nodes(int world_size); +void allocate_memory_on_node(int rank); +void assign_blocks_to_workers(int rank); +void create_tasks(int rank); +void wait_end_tasks(int rank); +void check(int rank); +void free_memory_on_node(int rank); + +void display_memory_consumption(int rank); + +int get_block_mpi_node(int z); +unsigned get_block_size(int z); +unsigned get_bind_tasks(void); + +unsigned get_nbz(void); +unsigned get_niter(void); +unsigned get_ticks(void); + +unsigned global_workerid(unsigned local_workerid); + +void create_task_update(unsigned iter, unsigned z, int local_rank); +void create_task_save(unsigned iter, unsigned z, int dir, int local_rank); + +extern int starpu_mpi_initialize(void); +extern int starpu_mpi_shutdown(void); + +/* kernels */ +extern struct starpu_codelet cl_update; +extern struct starpu_codelet save_cl_bottom; +extern struct starpu_codelet save_cl_top; + +extern unsigned update_per_worker[STARPU_NMAXWORKERS]; +extern unsigned top_per_worker[STARPU_NMAXWORKERS]; +extern unsigned bottom_per_worker[STARPU_NMAXWORKERS]; + +extern double start; +extern int who_runs_what_len; +extern int *who_runs_what; +extern int *who_runs_what_index; +extern double *last_tick; + +#ifndef _externC +#define _externC +#endif + +_externC void cuda_life_update_host(int bz, const TYPE *old, TYPE *newp, int nx, int ny, int nz, int ldy, int ldz, int iter); +_externC void cuda_shadow_host(int bz, TYPE *ptr, int nx, int ny, int nz, int ldy, int ldz, int i); + +_externC void opencl_shadow_init(void); +_externC void opencl_shadow_free(void); +_externC void opencl_shadow_host(int bz, TYPE *ptr, int nx, int ny, int nz, int ldy, int ldz, int i); +_externC void opencl_life_init(void); +_externC void opencl_life_free(void); +_externC void opencl_life_update_host(int bz, const TYPE *old, TYPE *newp, int nx, int ny, int nz, int ldy, int ldz, int iter); + +#endif /* __STENCIL_H__ */ diff --git a/examples/subgraphs/codelets.c b/examples/subgraphs/codelets.c new file mode 100644 index 0000000..1937776 --- /dev/null +++ b/examples/subgraphs/codelets.c @@ -0,0 +1,85 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +void matrix_fill(void *buffers[], void *cl_arg) +{ + unsigned i, j; + (void)cl_arg; + + /* length of the matrix */ + unsigned nx = STARPU_MATRIX_GET_NX(buffers[0]); + unsigned ny = STARPU_MATRIX_GET_NY(buffers[0]); + unsigned ld = STARPU_MATRIX_GET_LD(buffers[0]); + int *val = (int *)STARPU_MATRIX_GET_PTR(buffers[0]); + + for(j=0; j + +#define NX 6 +#define NY 6 +#define PARTS 2 + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +extern struct starpu_codelet cl_fill; +extern struct starpu_codelet cl_check_scale; + +void empty(void *buffers[], void *cl_arg) +{ + /* This doesn't need to do anything, it's simply used to make coherency + * between the two views, by simply running on the home node of the + * data, thus getting back all data pieces there. */ + (void)buffers; + (void)cl_arg; + + /* This check is just for testsuite */ + int node = starpu_task_get_current_data_node(0); + unsigned i; + unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(starpu_task_get_current()); + STARPU_ASSERT(node >= 0); + for (i = 1; i < nbuffers; i++) + STARPU_ASSERT(starpu_task_get_current_data_node(i) == node); +} + +struct starpu_codelet cl_switch = +{ + .cpu_funcs = {empty}, + .nbuffers = STARPU_VARIABLE_NBUFFERS, + .name = "switch", +}; + +int do_starpu_init() +{ + int ret, i; + + ret = starpu_init(NULL); + if (ret == -ENODEV) return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + /* force to execute task on the home_node, here it is STARPU_MAIN_RAM */ + cl_switch.specific_nodes = 1; + for(i = 0; i < STARPU_NMAXBUFS; i++) + cl_switch.nodes[i] = STARPU_MAIN_RAM; + + return 0; +} + +void do_init_sub_data(int matrix[NX][NY], starpu_data_handle_t handle, starpu_data_handle_t sub_handle[PARTS], void (*filter_func)(void *father_interface, void *child_interface, struct starpu_data_filter *, unsigned id, unsigned nparts), int x, int y, int nx, int ny, int ld) +{ + int i; + for (i = 0; i < PARTS; i++) + { + starpu_matrix_data_register(&sub_handle[i], STARPU_MAIN_RAM, (uintptr_t)&matrix[i*x][i*y], nx, ny, ld, sizeof(matrix[0][0])); + /* But make it invalid for now, we'll access data through the whole matrix first */ + starpu_data_invalidate(sub_handle[i]); + } +} + +int do_apply_sub_graph(starpu_data_handle_t handle, starpu_data_handle_t sub_handle[PARTS], void (*filter_func)(void *father_interface, void *child_interface, struct starpu_data_filter *, unsigned id, unsigned nparts), int factor, int start) +{ + int i, ret; + + /* Now switch to vertical view of the matrix */ + struct starpu_data_descr descr[PARTS]; + for (i = 0; i < PARTS; i++) + { + descr[i].handle = sub_handle[i]; + descr[i].mode = STARPU_W; + } + ret = starpu_task_insert(&cl_switch, STARPU_RW, handle, STARPU_DATA_MODE_ARRAY, descr, PARTS, 0); + if (ret == -ENODEV) return ret; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + /* And make sure we don't accidentally access the matrix through the whole-matrix handle */ + starpu_data_invalidate_submit(handle); + + /* Check the values of the vertical slices */ + for (i = 0; i < PARTS; i++) + { + int xstart = i*start; + ret = starpu_task_insert(&cl_check_scale, + STARPU_RW, sub_handle[i], + STARPU_VALUE, &xstart, sizeof(xstart), + STARPU_VALUE, &factor, sizeof(factor), + 0); + if (ret == -ENODEV) return ret; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + return 0; +} + +int do_clean_sub_graph(starpu_data_handle_t handle, starpu_data_handle_t sub_handle[PARTS]) +{ + int i, ret; + struct starpu_data_descr descr[PARTS]; + + /* Now switch back to total view of the matrix */ + for (i = 0; i < PARTS; i++) + { + descr[i].handle = sub_handle[i]; + descr[i].mode = STARPU_RW; + } + + ret = starpu_task_insert(&cl_switch, STARPU_DATA_MODE_ARRAY, descr, PARTS, STARPU_W, handle, 0); + if (ret == -ENODEV) return ret; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + /* And make sure we don't accidentally access the matrix through the sub slices */ + for (i = 0; i < PARTS; i++) + starpu_data_invalidate_submit(sub_handle[i]); + + return 0; +} + +void do_clean_sub_data(starpu_data_handle_t sub_handle[PARTS]) +{ + int i; + for (i = 0; i < PARTS; i++) + { + starpu_data_unregister(sub_handle[i]); + } +} + +#include "main.h" diff --git a/examples/subgraphs/partition.c b/examples/subgraphs/partition.c new file mode 100644 index 0000000..476f964 --- /dev/null +++ b/examples/subgraphs/partition.c @@ -0,0 +1,83 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +#define NX 6 +#define NY 6 +#define PARTS 2 + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +extern struct starpu_codelet cl_fill; +extern struct starpu_codelet cl_check_scale; + +int do_starpu_init() +{ + int ret, i; + + ret = starpu_init(NULL); + if (ret == -ENODEV) return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + return 0; +} + +void do_init_sub_data(int matrix[NX][NY], starpu_data_handle_t handle, starpu_data_handle_t sub_handle[PARTS], void (*filter_func)(void *father_interface, void *child_interface, struct starpu_data_filter *, unsigned id, unsigned nparts), int x, int y, int nx, int ny, int ld) +{ + // nothing to do +} + +int do_apply_sub_graph(starpu_data_handle_t handle, starpu_data_handle_t sub_handle[PARTS], void (*filter_func)(void *father_interface, void *child_interface, struct starpu_data_filter *, unsigned id, unsigned nparts), int factor, int start) +{ + int i, ret; + + struct starpu_data_filter f = + { + .filter_func = filter_func, + .nchildren = PARTS + }; + starpu_data_partition(handle, &f); + + /* Check the values of the slices */ + for (i = 0; i < PARTS; i++) + { + int xstart = i*start; + ret = starpu_task_insert(&cl_check_scale, + STARPU_RW, starpu_data_get_sub_data(handle, 1, i), + STARPU_VALUE, &xstart, sizeof(xstart), + STARPU_VALUE, &factor, sizeof(factor), + 0); + if (ret == -ENODEV) return ret; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + starpu_data_unpartition(handle, STARPU_MAIN_RAM); + return 0; +} + +int do_clean_sub_graph(starpu_data_handle_t handle, starpu_data_handle_t sub_handle[PARTS]) +{ + // nothing to do + return 0; +} + +void do_clean_sub_data(starpu_data_handle_t sub_handle[PARTS]) +{ + // nothing to do +} + +#include "main.h" diff --git a/examples/subgraphs/plan.c b/examples/subgraphs/plan.c new file mode 100644 index 0000000..63323e5 --- /dev/null +++ b/examples/subgraphs/plan.c @@ -0,0 +1,80 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +#define NX 6 +#define NY 6 +#define PARTS 2 + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +extern struct starpu_codelet cl_fill; +extern struct starpu_codelet cl_check_scale; + +int do_starpu_init() +{ + int ret, i; + + ret = starpu_init(NULL); + if (ret == -ENODEV) return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + return 0; +} + +void do_init_sub_data(int matrix[NX][NY], starpu_data_handle_t handle, starpu_data_handle_t sub_handle[PARTS], void (*filter_func)(void *father_interface, void *child_interface, struct starpu_data_filter *, unsigned id, unsigned nparts), int x, int y, int nx, int ny, int ld) +{ + struct starpu_data_filter f = + { + .filter_func = filter_func, + .nchildren = PARTS + }; + starpu_data_partition_plan(handle, &f, sub_handle); +} + +int do_apply_sub_graph(starpu_data_handle_t handle, starpu_data_handle_t sub_handle[PARTS], void (*filter_func)(void *father_interface, void *child_interface, struct starpu_data_filter *, unsigned id, unsigned nparts), int factor, int start) +{ + int i, ret; + + /* Check the values of the slices */ + for (i = 0; i < PARTS; i++) + { + int xstart = i*start; + ret = starpu_task_insert(&cl_check_scale, + STARPU_RW, sub_handle[i], + STARPU_VALUE, &xstart, sizeof(xstart), + STARPU_VALUE, &factor, sizeof(factor), + 0); + if (ret == -ENODEV) return ret; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + return 0; +} + +int do_clean_sub_graph(starpu_data_handle_t handle, starpu_data_handle_t sub_handle[PARTS]) +{ + starpu_data_partition_clean(handle, PARTS, sub_handle); + return 0; +} + +void do_clean_sub_data(starpu_data_handle_t sub_handle[PARTS]) +{ + // nothing to do +} + +#include "main.h" diff --git a/examples/tag_example/tag_example.c b/examples/tag_example/tag_example.c new file mode 100644 index 0000000..15f9155 --- /dev/null +++ b/examples/tag_example/tag_example.c @@ -0,0 +1,246 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* This example shows how to use tags to define a grid of dependencies, shaped this way: + * + * ... ... + * v v + * ... -> task (i, j) --> task (i, j+1) --> ... + * v v + * ... -> task (i+1,j) --> task (i+1,j+1) --> ... + * v v + * ... ... + */ + +#include +#include +#include +#include + +#include + +#ifdef STARPU_HAVE_HELGRIND_H +#include +#endif +#ifndef ANNOTATE_HAPPENS_BEFORE +#define ANNOTATE_HAPPENS_BEFORE(obj) ((void)0) +#endif +#ifndef ANNOTATE_HAPPENS_AFTER +#define ANNOTATE_HAPPENS_AFTER(obj) ((void)0) +#endif + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) +#define TAG(i, j, iter) ((starpu_tag_t) (((uint64_t)(iter)<<48) | ((uint64_t)(j)<<24) | (i))) + +#ifdef STARPU_QUICK_CHECK +#define Ni 32 +#define Nj 32 +#define Nk 32 +#else +#define Ni 64 +#define Nj 32 +#define Nk 128 +#endif + +static unsigned ni = Ni, nj = Nj, nk = Nk; +static unsigned callback_cnt; +static unsigned iter = 0; + +static void parse_args(int argc, char **argv) +{ + int i; + for (i = 1; i < argc; i++) + { + if (strcmp(argv[i], "-iter") == 0) + { + char *argptr; + nk = strtol(argv[++i], &argptr, 10); + } + + if (strcmp(argv[i], "-i") == 0) + { + char *argptr; + ni = strtol(argv[++i], &argptr, 10); + } + + if (strcmp(argv[i], "-j") == 0) + { + char *argptr; + nj = strtol(argv[++i], &argptr, 10); + } + + if (strcmp(argv[i], "-h") == 0) + { + printf("usage : %s [-iter iter] [-i i] [-j j]\n", argv[0]); + } + } +} + +void callback_cpu(void *argcb); +static void express_deps(unsigned i, unsigned j, unsigned iter); + +static void tag_cleanup_grid(unsigned piter) +{ + unsigned i,j; + + for (j = 0; j < nj; j++) + for (i = 0; i < ni; i++) + { + starpu_tag_remove(TAG(i,j,piter)); + } + + +} + +static int create_task_grid(unsigned piter) +{ + unsigned i, j; + int ret; + +/* FPRINTF(stderr, "start iter %d...\n", piter); */ + callback_cnt = (ni*nj); + + /* create non-entry tasks */ + for (j = 0; j < nj; j++) + for (i = 1; i < ni; i++) + { + /* create a new task */ + struct starpu_task *task = starpu_task_create(); + task->callback_func = callback_cpu; + /* jb->argcb = &coords[i][j]; */ + task->cl = &starpu_codelet_nop; + task->cl_arg = NULL; + + task->use_tag = 1; + task->tag_id = TAG(i, j, piter); + + /* express deps : (i,j) depends on (i-1, j-1) & (i-1, j+1) */ + express_deps(i, j, piter); + + ret = starpu_task_submit(task); + if (ret == -ENODEV) return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + /* create entry tasks */ + for (j = 0; j < nj; j++) + { + /* create a new task */ + struct starpu_task *task = starpu_task_create(); + task->callback_func = callback_cpu; + task->cl = &starpu_codelet_nop; + task->cl_arg = NULL; + + task->use_tag = 1; + /* this is an entry task */ + task->tag_id = TAG(0, j, piter); + + ret = starpu_task_submit(task); + if (ret == -ENODEV) return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + return 0; +} + + +void callback_cpu(void *argcb) +{ + (void)argcb; + unsigned newcnt = STARPU_ATOMIC_ADD(&callback_cnt, -1); + ANNOTATE_HAPPENS_BEFORE(&callback_cnt); + + if (newcnt == 0) + { + ANNOTATE_HAPPENS_AFTER(&callback_cnt); + if (++iter < nk) + { + /* cleanup old grids ... */ + if (iter > 2) + tag_cleanup_grid(iter-2); + + /* create a new iteration */ + create_task_grid(iter); + } + } +} + +static void express_deps(unsigned i, unsigned j, unsigned piter) +{ + if (j > 0) + { + /* (i,j-1) exists */ + if (j < nj - 1) + { + /* (i,j+1) exists */ + starpu_tag_declare_deps(TAG(i,j,piter), 2, TAG(i-1,j-1,piter), TAG(i-1,j+1,piter)); + } + else + { + /* (i,j+1) does not exist */ + starpu_tag_declare_deps(TAG(i,j,piter), 1, TAG(i-1,j-1,piter)); + } + } + else + { + /* (i, (j-1) does not exist */ + if (j < nj - 1) + { + /* (i,j+1) exists */ + starpu_tag_declare_deps(TAG(i,j,piter), 1, TAG(i-1,j+1,piter)); + } + else + { + /* (i,j+1) does not exist */ + STARPU_ABORT(); + } + } +} + +int main(int argc, char **argv) +{ + int ret; + +#ifdef STARPU_HAVE_HELGRIND_H + if (RUNNING_ON_VALGRIND) + { + ni /= 2; + nj /= 2; + nk /= 2; + } +#endif + + ret = starpu_init(NULL); + if (ret == -ENODEV) + exit(77); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + parse_args(argc, argv); + + FPRINTF(stderr, "ITER: %u\n", nk); + + ret = create_task_grid(0); + if (ret == 0) + starpu_task_wait_for_all(); + + tag_cleanup_grid(nk-2); + tag_cleanup_grid(nk-1); + + starpu_shutdown(); + + FPRINTF(stderr, "TEST DONE ...\n"); + + return ret; +} diff --git a/examples/tag_example/tag_example2.c b/examples/tag_example/tag_example2.c new file mode 100644 index 0000000..8763b7c --- /dev/null +++ b/examples/tag_example/tag_example2.c @@ -0,0 +1,143 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* This example shows how to submit a series of tasks in a chain of dependency: + * + * ... -> task (i) --> task (i+1) --> ... + * + * This is repeated several times + */ + +#include +#include +#include +#include + +#include + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) +#define TAG(i, iter) ((starpu_tag_t) (((uint64_t)iter)<<32 | (i))) + +#define Ni 64 +#define Nk 256 + +static unsigned ni = Ni, nk = Nk; + +static void parse_args(int argc, char **argv) +{ + int i; + for (i = 1; i < argc; i++) + { + if (strcmp(argv[i], "-iter") == 0) + { + char *argptr; + nk = strtol(argv[++i], &argptr, 10); + } + + if (strcmp(argv[i], "-i") == 0) + { + char *argptr; + ni = strtol(argv[++i], &argptr, 10); + } + + if (strcmp(argv[i], "-h") == 0) + { + printf("usage : %s [-iter iter] [-i i]\n", argv[0]); + } + } +} + +void callback_cpu(void *argcb); + +static void tag_cleanup_grid(unsigned iter) +{ + unsigned i; + + for (i = 0; i < ni; i++) + starpu_tag_remove(TAG(i,iter)); +} + +static int create_task_grid(unsigned iter) +{ + unsigned i; + +/* FPRINTF(stderr, "start iter %d ni %d...\n", iter, ni); */ + + for (i = 0; i < ni; i++) + { + int ret; + + /* create a new task */ + struct starpu_task *task = starpu_task_create(); + + task->cl = &starpu_codelet_nop; + task->cl_arg = NULL; + + task->use_tag = 1; + task->tag_id = TAG(i, iter); + + if (i != 0) + starpu_tag_declare_deps(TAG(i,iter), 1, TAG(i-1,iter)); + + ret = starpu_task_submit(task); + if (ret == -ENODEV) return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + return 0; +} + +int main(int argc , char **argv) +{ + unsigned i; + int ret; + + ret = starpu_init(NULL); + if (ret == -ENODEV) + exit(77); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + +#ifdef STARPU_QUICK_CHECK + ni /= 4; + nk /= 16; +#endif + + parse_args(argc, argv); + + FPRINTF(stderr, "ITER : %u\n", nk); + + for (i = 0; i < nk; i++) + { + ret = create_task_grid(i); + if (ret == 77) goto enodev; + + starpu_tag_wait(TAG(ni-1, i)); + + /* cleanup old grids ... */ + if (i > 1) + tag_cleanup_grid(i-1); + } + + starpu_task_wait_for_all(); + +enodev: + tag_cleanup_grid(nk-1); + + starpu_shutdown(); + + FPRINTF(stderr, "TEST DONE ...\n"); + + return ret; +} diff --git a/examples/tag_example/tag_example3.c b/examples/tag_example/tag_example3.c new file mode 100644 index 0000000..e8a48f5 --- /dev/null +++ b/examples/tag_example/tag_example3.c @@ -0,0 +1,147 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* This example shows how to submit a series of tasks in a chain of dependency: + * + * ... -> task (i) --> task (i+1) --> ... + * + * but here submitted in reverse order. + * + * This is repeated several times + */ + +#include +#include +#include +#include + +#include + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) +#define TAG(i, iter) ((starpu_tag_t) (((uint64_t)iter)<<32 | (i))) + +#define Ni 64 +#define Nk 256 + +static unsigned ni = Ni, nk = Nk; + +static void parse_args(int argc, char **argv) +{ + int i; + for (i = 1; i < argc; i++) + { + if (strcmp(argv[i], "-iter") == 0) + { + char *argptr; + nk = strtol(argv[++i], &argptr, 10); + } + + if (strcmp(argv[i], "-i") == 0) + { + char *argptr; + ni = strtol(argv[++i], &argptr, 10); + } + + if (strcmp(argv[i], "-h") == 0) + { + printf("usage : %s [-iter iter] [-i i]\n", argv[0]); + } + } +} + +void callback_cpu(void *argcb); + +static void tag_cleanup_grid(unsigned iter) +{ + unsigned i; + + for (i = 0; i < ni; i++) + starpu_tag_remove(TAG(i,iter)); +} + +static int create_task_grid(unsigned iter) +{ + int i; + +/* FPRINTF(stderr, "start iter %d ni %d...\n", iter, ni); */ + + for (i = ni - 1; i > 0; i--) + { + int ret; + + /* create a new task */ + struct starpu_task *task = starpu_task_create(); + + task->cl = &starpu_codelet_nop; + task->cl_arg = NULL; + + task->use_tag = 1; + task->tag_id = TAG(i, iter); + + if (i != 1) + starpu_tag_declare_deps(TAG(i,iter), 1, TAG(i-1,iter)); + + ret = starpu_task_submit(task); + if (ret == -ENODEV) return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + return 0; +} + +void cpu_codelet(void *descr[], void *_args) +{ + (void)descr; + (void)_args; +} + +int main(int argc, char **argv) +{ + unsigned i; + int ret; + + ret = starpu_init(NULL); + if (ret == -ENODEV) + exit(77); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + +#ifdef STARPU_QUICK_CHECK + ni /= 4; + nk /= 16; +#endif + + parse_args(argc, argv); + + FPRINTF(stderr, "ITER : %u\n", nk); + + for (i = 0; i < nk; i++) + { + ret = create_task_grid(i); + if (ret == 77) goto enodev; + + starpu_tag_wait(TAG(ni-1, i)); + + /* cleanup old grids ... */ + if (i > 1) + tag_cleanup_grid(i-1); + } + +enodev: + starpu_shutdown(); + + FPRINTF(stderr, "TEST DONE ...\n"); + + return ret; +} diff --git a/examples/tag_example/tag_example4.c b/examples/tag_example/tag_example4.c new file mode 100644 index 0000000..ab2b39c --- /dev/null +++ b/examples/tag_example/tag_example4.c @@ -0,0 +1,157 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* This example shows how to make a task depend on either of a series of tasks. + * + * For each i, we submit i tasks of type A, which fill the i-th variable, and i + * tasks of type B, which check that the i-th variable is filled. Thanks to + * tag dependency, B tasks are scheduled as soon as one of the corresponding A + * task is finished. + */ + +#include +#include +#include +#include + +#include + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) +#define TAG(i, iter) ((starpu_tag_t) (((uint64_t)i)<<32 | (iter))) + +void cpu_codelet_A(void *descr[], void *_args) +{ + (void)descr; + int *arg = _args; + STARPU_ATOMIC_OR(arg, 1); + fprintf(stderr,"A"); +} + +void cpu_codelet_B(void *descr[], void *_args) +{ + (void)descr; + int *arg = _args; + if (*arg != 1) + exit(EXIT_FAILURE); + fprintf(stderr,"B"); +} + +struct starpu_codelet cl_A = +{ + .cpu_funcs = { cpu_codelet_A}, + .cuda_funcs = { cpu_codelet_A}, + .opencl_funcs = { cpu_codelet_A}, + .nbuffers = 0, + .name = "dummyA" +}; + +struct starpu_codelet cl_B = +{ + .cpu_funcs = { cpu_codelet_B}, + .cuda_funcs = { cpu_codelet_B}, + .opencl_funcs = { cpu_codelet_B}, + .nbuffers = 0, + .name = "dummyB" +}; + +#define Ni 64 + +static unsigned ni = Ni; + +static void parse_args(int argc, char **argv) +{ + int i; + for (i = 1; i < argc; i++) + { + if (strcmp(argv[i], "-iter") == 0) + { + char *argptr; + ni = strtol(argv[++i], &argptr, 10); + } + + if (strcmp(argv[i], "-h") == 0) + { + printf("usage : %s [-iter iter]\n", argv[0]); + } + } +} + +int main(int argc, char **argv) +{ + unsigned i, j; + int ret; + + ret = starpu_init(NULL); + if (ret == -ENODEV) + exit(77); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + parse_args(argc, argv); + + FPRINTF(stderr, "ITER : %u\n", ni); + + { + int array[ni]; + + memset(array, 0, sizeof(array)); + + for (i = 1; i < ni; i++) + { + for (j = 1; j < i; j++) + { + struct starpu_task *task_A = starpu_task_create(); + task_A->cl = &cl_A; + task_A->cl_arg = &array[i]; + task_A->use_tag = 1; + task_A->tag_id = TAG(0, i); + + ret = starpu_task_submit(task_A); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + for (j = 1; j < i; j++) + { + struct starpu_task *task_B = starpu_task_create(); + task_B->cl = &cl_B; + task_B->cl_arg = &array[i]; + task_B->use_tag = 1; + task_B->tag_id = TAG(j, i); + + starpu_tag_declare_deps(TAG(j, i), 1, TAG(0, i)); + + ret = starpu_task_submit(task_B); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + } + + starpu_task_wait_for_all(); + } + + for (i = 1; i < ni; i++) + { + for (j = 0; j < i; j++) + starpu_tag_remove(TAG(j, i)); + } + +enodev: + starpu_shutdown(); + + FPRINTF(stderr, "TEST DONE ...\n"); + + if (ret == -ENODEV) return 77; else return 0; +} diff --git a/examples/tag_example/tag_restartable.c b/examples/tag_example/tag_restartable.c new file mode 100644 index 0000000..0b6c437 --- /dev/null +++ b/examples/tag_example/tag_restartable.c @@ -0,0 +1,168 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * This example shows how to reuse a tag. + * We define a series of dependency chains, shaped this way: + * + * ... ... + * v v + * ... task (i, j) task (i, j+1) ... + * v v + * ... task (i+1,j) task (i+1,j+1) ... + * v v + * ... ... + * + * And this grid is used several times, by waiting for the completion of a + * chain before starting it over. + */ + +#include +#include +#include +#include +#include +#include + +#include + +#define Nrolls 4 +#define SLEEP 1 + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) +#define TAG(i, iter) ((starpu_tag_t) (((uint64_t)((iter)%Nrolls))<<32 | (i))) + +#define Ni 64 +#define Nk 256 + +static unsigned ni = Ni, nk = Nk; +struct starpu_task **tasks[Nrolls]; + +static void parse_args(int argc, char **argv) +{ + int i; + for (i = 1; i < argc; i++) + { + if (strcmp(argv[i], "-iter") == 0) + { + char *argptr; + nk = strtol(argv[++i], &argptr, 10); + } + + if (strcmp(argv[i], "-i") == 0) + { + char *argptr; + ni = strtol(argv[++i], &argptr, 10); + } + + if (strcmp(argv[i], "-h") == 0) + { + printf("usage : %s [-iter iter] [-i i]\n", argv[0]); + } + } +} + +void callback_cpu(void *argcb); + +static void create_task_grid(unsigned iter) +{ + unsigned i; + + FPRINTF(stderr, "init iter %u ni %u...\n", iter, ni); + + for (i = 0; i < ni; i++) + { + /* create a new task */ + struct starpu_task *task = tasks[iter][i] = starpu_task_create(); + + task->cl = &starpu_codelet_nop; + task->use_tag = 1; + task->tag_id = TAG(i, iter); + + task->detach = 1; + task->destroy = 0; + + if (i != 0) + starpu_tag_declare_deps(TAG(i,iter), 1, TAG(i-1,iter)); + } + +} + +static int start_task_grid(unsigned iter) +{ + unsigned i; + + /* FPRINTF(stderr, "start grid %d ni %d...\n", iter, ni); */ + + for (i = 0; i < ni; i++) + { + int ret; + ret = starpu_task_submit(tasks[iter][i]); + if (ret == -ENODEV) return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + return 0; +} + +int main(int argc, char **argv) +{ + unsigned i, j; + int ret; + + ret = starpu_init(NULL); + if (ret == -ENODEV) + return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + +#ifdef STARPU_QUICK_CHECK + ni /= 4; + nk /= 16; +#endif + + parse_args(argc, argv); + + FPRINTF(stderr, "ITER : %u\n", nk); + + for (i = 0; i < Nrolls; i++) + { + tasks[i] = (struct starpu_task **) malloc(ni * sizeof(*tasks[i])); + + create_task_grid(i); + } + + for (i = 0; i < nk; i++) + { + ret = start_task_grid(i % Nrolls); + if (ret == 77) goto enodev; + + if (i+1 >= Nrolls) + /* Wait before re-using same tasks & tags */ + starpu_tag_wait(TAG(ni-1, i + 1)); + } + + starpu_shutdown(); + FPRINTF(stderr, "TEST DONE ...\n"); + +enodev: + for (i = 0; i < Nrolls; i++) + { + for (j = 0; j < ni; j++) + starpu_task_destroy(tasks[i][j]); + free(tasks[i]); + } + + return ret; +} diff --git a/examples/transactions/trs_inc.c b/examples/transactions/trs_inc.c new file mode 100644 index 0000000..dd5c761 --- /dev/null +++ b/examples/transactions/trs_inc.c @@ -0,0 +1,155 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2018-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * This is just a small example which increments two values of a vector several times. + */ +#include + +#ifdef STARPU_QUICK_CHECK +static unsigned niter = 500; +#elif !defined(STARPU_LONG_CHECK) +static unsigned niter = 5000; +#else +static unsigned niter = 50000; +#endif + +#define DO_TRANS_MOD 10 +#define DO_START_MOD 2 + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +static int _do_start_transaction(int val) +{ + if ((val / DO_TRANS_MOD) % DO_START_MOD == 0) + { + return 0; + } + else + { + return 1; + } +} + +int do_start_transaction(void *descr, void *arg) +{ + int val = (int)(intptr_t)arg; + int ret = _do_start_transaction(val); + return ret; +} + +void cpu_func(void *descr[], void *_args) +{ + (void)_args; + int *val = (int *)STARPU_VARIABLE_GET_PTR(descr[0]); + *val += 1; +} + +int main(int argc, char **argv) +{ + int ret = 0; + double start; + double end; + struct starpu_conf conf; + starpu_conf_init(&conf); + conf.nmpi_ms = 0; + conf.ntcpip_ms = 0; + + ret = starpu_init(&conf); + if (ret == -ENODEV) + return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + if (argc == 2) + niter = atoi(argv[1]); + + int value = 0; + + starpu_data_handle_t handle; + starpu_variable_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)&value, sizeof(value)); + + struct starpu_codelet cl = + { + .cpu_funcs = { cpu_func }, + .cpu_funcs_name = { "cpu_func" }, + .nbuffers = STARPU_VARIABLE_NBUFFERS, + .name = "trs_increment" + }; + + start = starpu_timing_now(); + + struct starpu_transaction *transaction = starpu_transaction_open(do_start_transaction, (void*)(intptr_t)0); + if (transaction == NULL) + { + starpu_cublas_shutdown(); + starpu_shutdown(); + return 77; /* transaction begin task submit failed with ENODEV */ + } + + int simulated_transaction_status = _do_start_transaction(0); + int expected_result = 0; + unsigned i; + + for (i = 0; i < niter; i++) + { + if (i>0 && (i%DO_TRANS_MOD == 0)) + { + starpu_transaction_next_epoch(transaction, (void*)(intptr_t)i); + simulated_transaction_status = _do_start_transaction(i); + } + + if (simulated_transaction_status) + { + expected_result ++; + } + + ret = starpu_task_insert(&cl, + STARPU_RW, handle, + STARPU_TRANSACTION, transaction, + 0); + + if (STARPU_UNLIKELY(ret == -ENODEV)) + { + FPRINTF(stderr, "No worker may execute this task\n"); + starpu_data_unregister(handle); + goto enodev; + } + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + } + + starpu_transaction_close(transaction); + + starpu_task_wait_for_all(); + + starpu_data_unregister(handle); + + end = starpu_timing_now(); + + if (value != expected_result) + { + FPRINTF(stderr, "Incorrect result, value = %d, expected %d\n", value, expected_result); + ret = 1; + } + + double timing = end - start; + + FPRINTF(stderr, "%u,%f,%d\n", niter, timing/1000, value); + +enodev: + starpu_shutdown(); + + return (ret == -ENODEV ? 77 : ret); +} diff --git a/examples/transactions/trs_sgemm.c b/examples/transactions/trs_sgemm.c new file mode 100644 index 0000000..bd14942 --- /dev/null +++ b/examples/transactions/trs_sgemm.c @@ -0,0 +1,469 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2017-2017 Erwan Leria + * Copyright (C) 2010-2010 Mehdi Juhoor + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * Simple parallel GEMM implementation: partition the output matrix in the two + * dimensions, and the input matrices in the corresponding dimension, and + * perform the output computations in parallel. + */ + + +#define TYPE float + +#define CUBLAS_GEMM cublasSgemm +#define CPU_GEMM STARPU_SGEMM +#define CPU_ASUM STARPU_SASUM +#define CPU_IAMAX STARPU_ISAMAX +#define STARPU_GEMM(name) starpu_sgemm_##name + +#define str(s) #s +#define xstr(s) str(s) +#define STARPU_GEMM_STR(name) xstr(STARPU_GEMM(name)) + + +#include +#include +#include +#include +#include +#include + +#include + +static int do_start_trs_mod=2; + +static int _do_start_transaction(int val) +{ + if (do_start_trs_mod == 0) + { + return 0; + } + return ((val+1) % do_start_trs_mod == 0); +} + +int do_start_transaction(void *descr, void *arg) +{ + int val = (int)(intptr_t)arg; + int ret = _do_start_transaction(val); + return ret; +} + +#ifdef STARPU_USE_CUDA +#include +#include +static const TYPE p1 = 1.0; +static const TYPE m1 = -1.0; +static const TYPE v0 = 0.0; +#endif + +static unsigned niter = 10; +static unsigned nslicesx = 4; +static unsigned nslicesy = 4; +#if defined(STARPU_QUICK_CHECK) && !defined(STARPU_SIMGRID) +static unsigned xdim = 256; +static unsigned ydim = 256; +static unsigned zdim = 64; +#else +static unsigned xdim = 960*4; +static unsigned ydim = 960*4; +static unsigned zdim = 960*4; +#endif +static unsigned check = 0; + +static TYPE *A, *B, *C; +static starpu_data_handle_t A_handle, B_handle, C_handle; + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) +#define PRINTF(fmt, ...) do { if (!getenv("STARPU_SSILENT")) {printf(fmt, ## __VA_ARGS__); }} while(0) + +static void check_output(void) +{ + /* compute C = C - AB */ + CPU_GEMM("N", "N", ydim, xdim, zdim, (TYPE)-1.0f, A, ydim, B, zdim, (TYPE)1.0f, C, ydim); + + /* make sure C = 0 */ + TYPE err; + err = CPU_ASUM(xdim*ydim, C, 1); + + if (err < xdim*ydim*0.001) + { + FPRINTF(stderr, "Results are OK\n"); + } + else + { + int max; + max = CPU_IAMAX(xdim*ydim, C, 1); + + FPRINTF(stderr, "There were errors ... err = %f\n", err); + FPRINTF(stderr, "Max error : %e\n", C[max]); + } +} + +static void init_problem_data(void) +{ +#ifndef STARPU_SIMGRID + unsigned i,j; +#endif + + starpu_malloc_flags((void **)&A, zdim*ydim*sizeof(TYPE), STARPU_MALLOC_PINNED|STARPU_MALLOC_SIMULATION_FOLDED); + starpu_malloc_flags((void **)&B, xdim*zdim*sizeof(TYPE), STARPU_MALLOC_PINNED|STARPU_MALLOC_SIMULATION_FOLDED); + starpu_malloc_flags((void **)&C, xdim*ydim*sizeof(TYPE), STARPU_MALLOC_PINNED|STARPU_MALLOC_SIMULATION_FOLDED); + +#ifndef STARPU_SIMGRID + /* fill the A and B matrices */ + for (j=0; j < ydim; j++) + { + for (i=0; i < zdim; i++) + { + A[j+i*ydim] = (TYPE)(starpu_drand48()); + } + } + + for (j=0; j < zdim; j++) + { + for (i=0; i < xdim; i++) + { + B[j+i*zdim] = (TYPE)(starpu_drand48()); + } + } + + for (j=0; j < ydim; j++) + { + for (i=0; i < xdim; i++) + { + C[j+i*ydim] = (TYPE)(0); + } + } +#endif +} + +static void partition_mult_data(void) +{ + starpu_matrix_data_register(&A_handle, STARPU_MAIN_RAM, (uintptr_t)A, + ydim, ydim, zdim, sizeof(TYPE)); + starpu_matrix_data_register(&B_handle, STARPU_MAIN_RAM, (uintptr_t)B, + zdim, zdim, xdim, sizeof(TYPE)); + starpu_matrix_data_register(&C_handle, STARPU_MAIN_RAM, (uintptr_t)C, + ydim, ydim, xdim, sizeof(TYPE)); + + struct starpu_data_filter vert; + memset(&vert, 0, sizeof(vert)); + vert.filter_func = starpu_matrix_filter_vertical_block; + vert.nchildren = nslicesx; + + struct starpu_data_filter horiz; + memset(&horiz, 0, sizeof(horiz)); + horiz.filter_func = starpu_matrix_filter_block; + horiz.nchildren = nslicesy; + + starpu_data_partition(B_handle, &vert); + starpu_data_partition(A_handle, &horiz); + + starpu_data_map_filters(C_handle, 2, &vert, &horiz); +} + +#ifdef STARPU_USE_CUDA +static void cublas_mult(void *descr[], void *arg) +{ + (void)arg; + TYPE *subA = (TYPE *)STARPU_MATRIX_GET_PTR(descr[0]); + TYPE *subB = (TYPE *)STARPU_MATRIX_GET_PTR(descr[1]); + TYPE *subC = (TYPE *)STARPU_MATRIX_GET_PTR(descr[2]); + + unsigned nxC = STARPU_MATRIX_GET_NX(descr[2]); + unsigned nyC = STARPU_MATRIX_GET_NY(descr[2]); + unsigned nyA = STARPU_MATRIX_GET_NY(descr[0]); + + unsigned ldA = STARPU_MATRIX_GET_LD(descr[0]); + unsigned ldB = STARPU_MATRIX_GET_LD(descr[1]); + unsigned ldC = STARPU_MATRIX_GET_LD(descr[2]); + + cublasStatus_t status = CUBLAS_GEMM(starpu_cublas_get_local_handle(), + CUBLAS_OP_N, CUBLAS_OP_N, + nxC, nyC, nyA, + &p1, subA, ldA, subB, ldB, + &v0, subC, ldC); + if (status != CUBLAS_STATUS_SUCCESS) + STARPU_CUBLAS_REPORT_ERROR(status); +} +#endif + +void cpu_mult(void *descr[], void *arg) +{ + (void)arg; + TYPE *subA = (TYPE *)STARPU_MATRIX_GET_PTR(descr[0]); + TYPE *subB = (TYPE *)STARPU_MATRIX_GET_PTR(descr[1]); + TYPE *subC = (TYPE *)STARPU_MATRIX_GET_PTR(descr[2]); + + unsigned nxC = STARPU_MATRIX_GET_NX(descr[2]); + unsigned nyC = STARPU_MATRIX_GET_NY(descr[2]); + unsigned nyA = STARPU_MATRIX_GET_NY(descr[0]); + + unsigned ldA = STARPU_MATRIX_GET_LD(descr[0]); + unsigned ldB = STARPU_MATRIX_GET_LD(descr[1]); + unsigned ldC = STARPU_MATRIX_GET_LD(descr[2]); + + int worker_size = starpu_combined_worker_get_size(); + + if (worker_size == 1) + { + /* Sequential CPU task */ + CPU_GEMM("N", "N", nxC, nyC, nyA, (TYPE)1.0, subA, ldA, subB, ldB, (TYPE)0.0, subC, ldC); + } + else + { + /* Parallel CPU task */ + unsigned rank = starpu_combined_worker_get_rank(); + + unsigned block_size = (nyC + worker_size - 1)/worker_size; + unsigned new_nyC = STARPU_MIN(nyC, block_size*(rank+1)) - block_size*rank; + + STARPU_ASSERT(nyC == STARPU_MATRIX_GET_NY(descr[1])); + + TYPE *new_subB = &subB[block_size*rank]; + TYPE *new_subC = &subC[block_size*rank]; + + CPU_GEMM("N", "N", nxC, new_nyC, nyA, (TYPE)1.0, subA, ldA, new_subB, ldB, (TYPE)0.0, new_subC, ldC); + } +} + +static struct starpu_perfmodel starpu_gemm_model = +{ + .type = STARPU_HISTORY_BASED, + .symbol = STARPU_GEMM_STR(gemm) +}; + +static struct starpu_codelet cl = +{ + .type = STARPU_SEQ, /* changed to STARPU_SPMD if -spmd is passed */ + .max_parallelism = INT_MAX, + .cpu_funcs = {cpu_mult}, + .cpu_funcs_name = {"cpu_mult"}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {cublas_mult}, +#elif defined(STARPU_SIMGRID) + .cuda_funcs = {(void*)1}, +#endif + .cuda_flags = {STARPU_CUDA_ASYNC}, + .nbuffers = STARPU_VARIABLE_NBUFFERS, /* required for transactions */ + /* .modes = {STARPU_R, STARPU_R, STARPU_RW}, */ + .model = &starpu_gemm_model +}; + +static void parse_args(int argc, char **argv) +{ + int i; + for (i = 1; i < argc; i++) + { + if (strcmp(argv[i], "-nblocks") == 0) + { + char *argptr; + nslicesx = strtol(argv[++i], &argptr, 10); + nslicesy = nslicesx; + } + + else if (strcmp(argv[i], "-nblocksx") == 0) + { + char *argptr; + nslicesx = strtol(argv[++i], &argptr, 10); + } + + else if (strcmp(argv[i], "-nblocksy") == 0) + { + char *argptr; + nslicesy = strtol(argv[++i], &argptr, 10); + } + + else if (strcmp(argv[i], "-x") == 0) + { + char *argptr; + xdim = strtol(argv[++i], &argptr, 10); + } + + else if (strcmp(argv[i], "-y") == 0) + { + char *argptr; + ydim = strtol(argv[++i], &argptr, 10); + } + + else if (strcmp(argv[i], "-z") == 0) + { + char *argptr; + zdim = strtol(argv[++i], &argptr, 10); + } + + else if (strcmp(argv[i], "-size") == 0) + { + char *argptr; + xdim = ydim = zdim = strtol(argv[++i], &argptr, 10); + } + + else if (strcmp(argv[i], "-iter") == 0) + { + char *argptr; + niter = strtol(argv[++i], &argptr, 10); + } + + /* Modulo operand to decide which iterations to confirm or cancel. + * + * An iteration will be confirmed if ((iter+1) % MOD) == 0, and + * cancelled otherwise. As a special value, if MOD == 0, all + * iterations will be cancelled. */ + else if (strcmp(argv[i], "-mod") == 0) + { + char *argptr; + do_start_trs_mod = strtol(argv[++i], &argptr, 10); + } + + else if (strcmp(argv[i], "-check") == 0) + { + check = 1; + } + + else if (strcmp(argv[i], "-spmd") == 0) + { + cl.type = STARPU_SPMD; + } + + else if (strcmp(argv[i], "-help") == 0 || strcmp(argv[i], "--help") == 0 || strcmp(argv[i], "-h") == 0) + { + fprintf(stderr,"Usage: %s [-nblocks n] [-nblocksx x] [-nblocksy y] [-x x] [-y y] [-z z] [-size size] [-iter iter] [-check] [-spmd] [-mod start_trs_mod]\n", argv[0]); + fprintf(stderr,"Currently selected: %ux%u * %ux%u and %ux%u blocks, %u iterations, transaction confirmation modulo = %d\n", zdim, ydim, xdim, zdim, nslicesx, nslicesy, niter, do_start_trs_mod); + exit(EXIT_SUCCESS); + } + else + { + fprintf(stderr,"Unrecognized option %s", argv[i]); + exit(EXIT_FAILURE); + } + } +} + +int main(int argc, char **argv) +{ + double start, end; + int ret; + struct starpu_conf conf; + starpu_conf_init(&conf); + conf.nmpi_ms = 0; + conf.ntcpip_ms = 0; + + parse_args(argc, argv); + + starpu_fxt_autostart_profiling(0); + ret = starpu_init(&conf); + if (ret == -ENODEV) + return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + starpu_cublas_init(); + + init_problem_data(); + partition_mult_data(); + + starpu_fxt_start_profiling(); + start = starpu_timing_now(); + + unsigned x, y, iter; + struct starpu_transaction *transaction = starpu_transaction_open(do_start_transaction, (void*)(intptr_t)0); + if (transaction == NULL) + { + starpu_cublas_shutdown(); + starpu_shutdown(); + return 77; /* transaction begin task submit failed with ENODEV */ + } + + int simulated_transaction_status = _do_start_transaction(0); + int n_cancelled_iter=0; + + for (iter = 0; iter < niter; iter++) + { + if (iter > 0) + { + starpu_transaction_next_epoch(transaction, (void*)(intptr_t)iter); + simulated_transaction_status = _do_start_transaction(iter); + } + if (simulated_transaction_status == 0) + { + n_cancelled_iter++; + } + for (x = 0; x < nslicesx; x++) + { + for (y = 0; y < nslicesy; y++) + { + struct starpu_task *task = starpu_task_create(); + + task->cl = &cl; + + task->handles[0] = starpu_data_get_sub_data(A_handle, 1, y); + task->handles[1] = starpu_data_get_sub_data(B_handle, 1, x); + task->handles[2] = starpu_data_get_sub_data(C_handle, 2, x, y); + task->modes[0] = STARPU_R; + task->modes[1] = STARPU_R; + task->modes[2] = STARPU_RW; + task->nbuffers = 3; + task->transaction = transaction; + + task->flops = 2ULL * (xdim/nslicesx) * (ydim/nslicesy) * zdim; + + ret = starpu_task_submit(task); + if (ret == -ENODEV) + { + ret = 77; + goto enodev; + } + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + starpu_data_wont_use(starpu_data_get_sub_data(C_handle, 2, x, y)); + } + } + + } + starpu_transaction_close(transaction); + starpu_task_wait_for_all(); + + end = starpu_timing_now(); + starpu_fxt_stop_profiling(); + + double timing = end - start; + + PRINTF("# x\ty\tz\ts\tniter\tncancelled"); + PRINTF("\n"); + PRINTF("%u\t%u\t%u\t%.3f\t%d\t%d", xdim, ydim, zdim, timing/1.0e6, niter, n_cancelled_iter); + PRINTF("\n"); + +enodev: + starpu_data_unpartition(C_handle, STARPU_MAIN_RAM); + starpu_data_unpartition(B_handle, STARPU_MAIN_RAM); + starpu_data_unpartition(A_handle, STARPU_MAIN_RAM); + + starpu_data_unregister(A_handle); + starpu_data_unregister(B_handle); + starpu_data_unregister(C_handle); + + if (check) + check_output(); + + starpu_free_flags(A, zdim*ydim*sizeof(TYPE), STARPU_MALLOC_PINNED|STARPU_MALLOC_SIMULATION_FOLDED); + starpu_free_flags(B, xdim*zdim*sizeof(TYPE), STARPU_MALLOC_PINNED|STARPU_MALLOC_SIMULATION_FOLDED); + starpu_free_flags(C, xdim*ydim*sizeof(TYPE), STARPU_MALLOC_PINNED|STARPU_MALLOC_SIMULATION_FOLDED); + + starpu_cublas_shutdown(); + starpu_shutdown(); + + return ret; +} diff --git a/examples/worker_collections/worker_list_example.c b/examples/worker_collections/worker_list_example.c new file mode 100644 index 0000000..cbc9142 --- /dev/null +++ b/examples/worker_collections/worker_list_example.c @@ -0,0 +1,95 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * This shows how to manipulate worker lists. + */ +#include + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +int main() +{ + int ret; + + ret = starpu_init(NULL); + if (ret == -ENODEV) + return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + int procs[STARPU_NMAXWORKERS]; + unsigned ncpus = starpu_cpu_worker_get_count(); + starpu_worker_get_ids_by_type(STARPU_CPU_WORKER, procs, ncpus); + + struct starpu_worker_collection *co = (struct starpu_worker_collection*)malloc(sizeof(struct starpu_worker_collection)); + co->has_next = starpu_worker_list.has_next; + co->get_next = starpu_worker_list.get_next; + co->add = starpu_worker_list.add; + co->remove = starpu_worker_list.remove; + co->init = starpu_worker_list.init; + co->deinit = starpu_worker_list.deinit; + co->init_iterator = starpu_worker_list.init_iterator; + co->type = STARPU_WORKER_LIST; + + FPRINTF(stderr, "ncpus %u\n", ncpus); + + double start_time; + double end_time; + + start_time = starpu_timing_now(); + + co->init(co); + + end_time = starpu_timing_now(); + + double timing = (end_time - start_time) / 1000; + + unsigned i; + for(i = 0; i < ncpus; i++) + { + int added = co->add(co, procs[i]); + FPRINTF(stderr, "added proc %d to the tree \n", added); + } + + struct starpu_sched_ctx_iterator it; + + int pu; + co->init_iterator(co, &it); + while(co->has_next(co, &it)) + { + pu = co->get_next(co, &it); + FPRINTF(stderr, "pu = %d out of %u workers \n", pu, co->nworkers); + } + + for(i = 0; i < 6; i++) + { + co->remove(co, i); + FPRINTF(stderr, "remove %u out of %u workers\n", i, co->nworkers); + } + + while(co->has_next(co, &it)) + { + pu = co->get_next(co, &it); + FPRINTF(stderr, "pu = %d out of %u workers\n", pu, co->nworkers); + } + + FPRINTF(stderr, "timing init = %lf \n", timing); + co->deinit(co); + free(co); + starpu_shutdown(); + + return 0; +} diff --git a/examples/worker_collections/worker_tree_example.c b/examples/worker_collections/worker_tree_example.c new file mode 100644 index 0000000..5965f6d --- /dev/null +++ b/examples/worker_collections/worker_tree_example.c @@ -0,0 +1,109 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * This shows how to manipulate worker trees. + */ + +#include + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +#if !defined(STARPU_HAVE_HWLOC) +#warning hwloc is not enabled. Skipping test +int main(int argc, char **argv) +{ + return 77; +} +#else + +int main() +{ + int ret; + + ret = starpu_init(NULL); + if (ret == -ENODEV) + return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + int procs[STARPU_NMAXWORKERS]; + unsigned ncpus = starpu_cpu_worker_get_count(); + starpu_worker_get_ids_by_type(STARPU_CPU_WORKER, procs, ncpus); + + struct starpu_worker_collection *co = (struct starpu_worker_collection*)calloc(1, sizeof(struct starpu_worker_collection)); + co->has_next = starpu_worker_tree.has_next; + co->get_next = starpu_worker_tree.get_next; + co->add = starpu_worker_tree.add; + co->remove = starpu_worker_tree.remove; + co->init = starpu_worker_tree.init; + co->deinit = starpu_worker_tree.deinit; + co->init_iterator = starpu_worker_tree.init_iterator; + co->type = STARPU_WORKER_TREE; + + FPRINTF(stderr, "ncpus %u \n", ncpus); + + double start_time; + double end_time; + + start_time = starpu_timing_now(); + + co->init(co); + + end_time = starpu_timing_now(); + + double timing = (end_time - start_time) / 1000; + + unsigned i; + for(i = 0; i < ncpus; i++) + { + int added = co->add(co, procs[i]); + FPRINTF(stderr, "added proc %d to the tree \n", added); + } + + struct starpu_sched_ctx_iterator it; + + int pu; + co->init_iterator(co, &it); + while(co->has_next(co, &it)) + { + pu = co->get_next(co, &it); + FPRINTF(stderr, "pu = %d out of %u workers \n", pu, co->nworkers); + } + + unsigned six = 6; + if (six < ncpus) + six = ncpus/2; + for(i = 0; i < six; i++) + { + co->remove(co, i); + FPRINTF(stderr, "remove %u out of %u workers\n", i, co->nworkers); + } + + while(co->has_next(co, &it)) + { + pu = co->get_next(co, &it); + FPRINTF(stderr, "pu = %d out of %u workers \n", pu, co->nworkers); + } + + FPRINTF(stderr, "timing init = %lf \n", timing); + + co->deinit(co); + starpu_shutdown(); + free(co); + + return 0; +} +#endif diff --git a/include/fstarpu_mod.f90 b/include/fstarpu_mod.f90 new file mode 100644 index 0000000..041de99 --- /dev/null +++ b/include/fstarpu_mod.f90 @@ -0,0 +1,2697 @@ +! StarPU --- Runtime system for heterogeneous multicore architectures. +! +! Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +! +! StarPU is free software; you can redistribute it and/or modify +! it under the terms of the GNU Lesser General Public License as published by +! the Free Software Foundation; either version 2.1 of the License, or (at +! your option) any later version. +! +! StarPU is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of +! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +! +! See the GNU Lesser General Public License in COPYING.LGPL for more details. +! +!> @ingroup API_Fortran +!> @brief Fortran API +module fstarpu_mod + use iso_c_binding + implicit none + + ! Note: Constants truly are intptr_t, but are declared as c_ptr to be + ! readily usable in c_ptr arrays to mimic variadic functions. + ! Note: Bitwise or operator is provided by the .ior. overloaded operator + type(c_ptr), bind(C) :: FSTARPU_R + type(c_ptr), bind(C) :: FSTARPU_W + type(c_ptr), bind(C) :: FSTARPU_RW + type(c_ptr), bind(C) :: FSTARPU_SCRATCH + type(c_ptr), bind(C) :: FSTARPU_REDUX + type(c_ptr), bind(C) :: FSTARPU_MPI_REDUX + type(c_ptr), bind(C) :: FSTARPU_COMMUTE + type(c_ptr), bind(C) :: FSTARPU_SSEND + type(c_ptr), bind(C) :: FSTARPU_LOCALITY + + type(c_ptr), bind(C) :: FSTARPU_DATA_ARRAY + type(c_ptr), bind(C) :: FSTARPU_DATA_MODE_ARRAY + type(c_ptr), bind(C) :: FSTARPU_CL_ARGS + type(c_ptr), bind(C) :: FSTARPU_CL_ARGS_NFREE + type(c_ptr), bind(C) :: FSTARPU_TASK_DEPS_ARRAY + type(c_ptr), bind(C) :: FSTARPU_CALLBACK + type(c_ptr), bind(C) :: FSTARPU_CALLBACK_WITH_ARG + type(c_ptr), bind(C) :: FSTARPU_CALLBACK_WITH_ARG_NFREE + type(c_ptr), bind(C) :: FSTARPU_CALLBACK_ARG + type(c_ptr), bind(C) :: FSTARPU_CALLBACK_ARG_NFREE + type(c_ptr), bind(C) :: FSTARPU_PROLOGUE_CALLBACK + type(c_ptr), bind(C) :: FSTARPU_PROLOGUE_CALLBACK_ARG + type(c_ptr), bind(C) :: FSTARPU_PROLOGUE_CALLBACK_ARG_NFREE + type(c_ptr), bind(C) :: FSTARPU_PROLOGUE_CALLBACK_POP + type(c_ptr), bind(C) :: FSTARPU_PROLOGUE_CALLBACK_POP_ARG + type(c_ptr), bind(C) :: FSTARPU_PROLOGUE_CALLBACK_POP_ARG_NFREE + type(c_ptr), bind(C) :: FSTARPU_PRIORITY + type(c_ptr), bind(C) :: FSTARPU_EXECUTE_ON_NODE + type(c_ptr), bind(C) :: FSTARPU_EXECUTE_ON_DATA + type(c_ptr), bind(C) :: FSTARPU_EXECUTE_ON_WORKER + type(c_ptr), bind(C) :: FSTARPU_WORKER_ORDER + type(c_ptr), bind(C) :: FSTARPU_EXECUTE_WHERE + type(c_ptr), bind(C) :: FSTARPU_HYPERVISOR_TAG + type(c_ptr), bind(C) :: FSTARPU_POSSIBLY_PARALLEL + type(c_ptr), bind(C) :: FSTARPU_FLOPS + type(c_ptr), bind(C) :: FSTARPU_TAG + type(c_ptr), bind(C) :: FSTARPU_TAG_ONLY + type(c_ptr), bind(C) :: FSTARPU_NAME + type(c_ptr), bind(C) :: FSTARPU_TASK_COLOR + type(c_ptr), bind(C) :: FSTARPU_TASK_SYNCHRONOUS + type(c_ptr), bind(C) :: FSTARPU_HANDLES_SEQUENTIAL_CONSISTENCY + type(c_ptr), bind(C) :: FSTARPU_TASK_END_DEP + type(c_ptr), bind(C) :: FSTARPU_NODE_SELECTION_POLICY + type(c_ptr), bind(C) :: FSTARPU_TASK_SCHED_DATA + + type(c_ptr), bind(C) :: FSTARPU_VALUE + type(c_ptr), bind(C) :: FSTARPU_SCHED_CTX + + type(c_ptr), bind(C) :: FSTARPU_CPU_WORKER + type(c_ptr), bind(C) :: FSTARPU_CUDA_WORKER + type(c_ptr), bind(C) :: FSTARPU_OPENCL_WORKER + type(c_ptr), bind(C) :: FSTARPU_ANY_WORKER + + integer(c_int), bind(C) :: FSTARPU_NMAXBUFS + + type(c_ptr), bind(C) :: FSTARPU_SCHED_CTX_POLICY_NAME + type(c_ptr), bind(C) :: FSTARPU_SCHED_CTX_POLICY_STRUCT + type(c_ptr), bind(C) :: FSTARPU_SCHED_CTX_POLICY_MIN_PRIO + type(c_ptr), bind(C) :: FSTARPU_SCHED_CTX_POLICY_MAX_PRIO + type(c_ptr), bind(C) :: FSTARPU_SCHED_CTX_HIERARCHY_LEVEL + type(c_ptr), bind(C) :: FSTARPU_SCHED_CTX_NESTED + type(c_ptr), bind(C) :: FSTARPU_SCHED_CTX_AWAKE_WORKERS + type(c_ptr), bind(C) :: FSTARPU_SCHED_CTX_POLICY_INIT + type(c_ptr), bind(C) :: FSTARPU_SCHED_CTX_USER_DATA + + type(c_ptr), bind(C) :: FSTARPU_NOWHERE + type(c_ptr), bind(C) :: FSTARPU_CPU + type(c_ptr), bind(C) :: FSTARPU_CUDA + type(c_ptr), bind(C) :: FSTARPU_OPENCL + + type(c_ptr), bind(C) :: FSTARPU_CODELET_SIMGRID_EXECUTE + type(c_ptr), bind(C) :: FSTARPU_CODELET_SIMGRID_EXECUTE_AND_INJECT + type(c_ptr), bind(C) :: FSTARPU_CUDA_ASYNC + type(c_ptr), bind(C) :: FSTARPU_OPENCL_ASYNC + + !type(c_ptr), bind(C) :: FSTARPU_PER_WORKER + !type(c_ptr), bind(C) :: FSTARPU_PER_ARCH + !type(c_ptr), bind(C) :: FSTARPU_PER_COMMON + type(c_ptr), bind(C) :: FSTARPU_HISTORY_BASED + type(c_ptr), bind(C) :: FSTARPU_REGRESSION_BASED + type(c_ptr), bind(C) :: FSTARPU_NL_REGRESSION_BASED + type(c_ptr), bind(C) :: FSTARPU_MULTIPLE_REGRESSION_BASED + + type(c_ptr), bind(C) :: FSTARPU_SEQ + type(c_ptr), bind(C) :: FSTARPU_SPMD + type(c_ptr), bind(C) :: FSTARPU_FORKJOIN + + ! (some) portable iso_c_binding types + type(c_ptr), bind(C) :: FSTARPU_SZ_C_DOUBLE + type(c_ptr), bind(C) :: FSTARPU_SZ_C_FLOAT + type(c_ptr), bind(C) :: FSTARPU_SZ_C_CHAR + type(c_ptr), bind(C) :: FSTARPU_SZ_C_INT + type(c_ptr), bind(C) :: FSTARPU_SZ_C_INTPTR_T + type(c_ptr), bind(C) :: FSTARPU_SZ_C_PTR + type(c_ptr), bind(C) :: FSTARPU_SZ_C_SIZE_T + + ! (some) native Fortran types + type(c_ptr), bind(C) :: FSTARPU_SZ_CHARACTER + + type(c_ptr), bind(C) :: FSTARPU_SZ_INTEGER + type(c_ptr), bind(C) :: FSTARPU_SZ_INT4 + type(c_ptr), bind(C) :: FSTARPU_SZ_INT8 + + type(c_ptr), bind(C) :: FSTARPU_SZ_REAL + type(c_ptr), bind(C) :: FSTARPU_SZ_REAL4 + type(c_ptr), bind(C) :: FSTARPU_SZ_REAL8 + + type(c_ptr), bind(C) :: FSTARPU_SZ_DOUBLE_PRECISION + + type(c_ptr), bind(C) :: FSTARPU_SZ_COMPLEX + type(c_ptr), bind(C) :: FSTARPU_SZ_COMPLEX4 + type(c_ptr), bind(C) :: FSTARPU_SZ_COMPLEX8 + + integer(c_int), bind(C), target :: FSTARPU_DEFAULT_PRIO + + interface operator (.ior.) + procedure or_cptrs + end interface operator (.ior.) + + interface + ! == starpu.h == + + ! void starpu_conf_init(struct starpu_conf *conf); + subroutine fstarpu_conf_init (conf) bind(C,name="starpu_conf_init") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: conf + end subroutine fstarpu_conf_init + + function fstarpu_conf_allocate () bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr) :: fstarpu_conf_allocate + end function fstarpu_conf_allocate + + subroutine fstarpu_conf_free (conf) bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: conf + end subroutine fstarpu_conf_free + + subroutine fstarpu_conf_set_sched_policy_name (conf, policy_name) bind(C) + use iso_c_binding, only: c_ptr, c_char + type(c_ptr), value, intent(in) :: conf + character(c_char), intent(in) :: policy_name + end subroutine fstarpu_conf_set_sched_policy_name + + subroutine fstarpu_conf_set_min_prio (conf, min_prio) bind(C) + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: conf + integer(c_int), value, intent(in) :: min_prio + end subroutine fstarpu_conf_set_min_prio + + subroutine fstarpu_conf_set_max_prio (conf, max_prio) bind(C) + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: conf + integer(c_int), value, intent(in) :: max_prio + end subroutine fstarpu_conf_set_max_prio + + subroutine fstarpu_conf_set_ncpu (conf, ncpu) bind(C) + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: conf + integer(c_int), value, intent(in) :: ncpu + end subroutine fstarpu_conf_set_ncpu + + subroutine fstarpu_conf_set_ncuda (conf, ncuda) bind(C) + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: conf + integer(c_int), value, intent(in) :: ncuda + end subroutine fstarpu_conf_set_ncuda + + subroutine fstarpu_conf_set_nopencl (conf, nopencl) bind(C) + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: conf + integer(c_int), value, intent(in) :: nopencl + end subroutine fstarpu_conf_set_nopencl + + ! starpu_init: see fstarpu_init + ! starpu_initialize: see fstarpu_init + + ! void starpu_pause(void); + subroutine fstarpu_pause() bind(C,name="starpu_pause") + end subroutine fstarpu_pause + + ! void starpu_resume(void); + subroutine fstarpu_resume() bind(C,name="starpu_resume") + end subroutine fstarpu_resume + + ! int starpu_is_paused(void); + function fstarpu_is_paused() bind(C,name="starpu_is_paused") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_is_paused + end function fstarpu_is_paused + + ! void starpu_shutdown(void); + subroutine fstarpu_shutdown () bind(C,name="starpu_shutdown") + end subroutine fstarpu_shutdown + + ! starpu_topology_print + subroutine fstarpu_topology_print () bind(C) + end subroutine fstarpu_topology_print + + ! int starpu_asynchronous_copy_disabled(void); + function fstarpu_asynchronous_copy_disabled() bind(C,name="starpu_asynchronous_copy_disabled") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_asynchronous_copy_disabled + end function fstarpu_asynchronous_copy_disabled + + ! int starpu_asynchronous_cuda_copy_disabled(void); + function fstarpu_asynchronous_cuda_copy_disabled() bind(C,name="starpu_asynchronous_cuda_copy_disabled") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_asynchronous_cuda_copy_disabled + end function fstarpu_asynchronous_cuda_copy_disabled + + ! int starpu_asynchronous_opencl_copy_disabled(void); + function fstarpu_asynchronous_opencl_copy_disabled() bind(C,name="starpu_asynchronous_opencl_copy_disabled") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_asynchronous_opencl_copy_disabled + end function fstarpu_asynchronous_opencl_copy_disabled + + ! void starpu_display_stats(); + subroutine fstarpu_display_stats() bind(C,name="starpu_display_stats") + end subroutine fstarpu_display_stats + + ! void starpu_get_version(int *major, int *minor, int *release); + subroutine fstarpu_get_version(major,minor,release) bind(C,name="starpu_get_version") + use iso_c_binding, only: c_int + integer(c_int), intent(out) :: major,minor,release + end subroutine fstarpu_get_version + + ! == starpu_worker.h == + + ! unsigned starpu_worker_get_count(void); + function fstarpu_worker_get_count() bind(C,name="starpu_worker_get_count") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_worker_get_count + end function fstarpu_worker_get_count + + ! unsigned starpu_combined_worker_get_count(void); + function fstarpu_combined_worker_get_count() bind(C,name="starpu_combined_worker_get_count") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_combined_worker_get_count + end function fstarpu_combined_worker_get_count + + ! unsigned starpu_worker_is_combined_worker(int id); + function fstarpu_worker_is_combined_worker(id) bind(C,name="starpu_worker_is_combined_worker") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_worker_is_combined_worker + integer(c_int), value, intent(in) :: id + end function fstarpu_worker_is_combined_worker + + + ! unsigned starpu_cpu_worker_get_count(void); + function fstarpu_cpu_worker_get_count() bind(C,name="starpu_cpu_worker_get_count") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_cpu_worker_get_count + end function fstarpu_cpu_worker_get_count + + ! unsigned starpu_cuda_worker_get_count(void); + function fstarpu_cuda_worker_get_count() bind(C,name="starpu_cuda_worker_get_count") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_cuda_worker_get_count + end function fstarpu_cuda_worker_get_count + + ! unsigned starpu_opencl_worker_get_count(void); + function fstarpu_opencl_worker_get_count() bind(C,name="starpu_opencl_worker_get_count") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_opencl_worker_get_count + end function fstarpu_opencl_worker_get_count + + ! int starpu_worker_get_id(void); + function fstarpu_worker_get_id() bind(C,name="starpu_worker_get_id") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_worker_get_id + end function fstarpu_worker_get_id + + ! _starpu_worker_get_id_check + ! starpu_worker_get_id_check + + ! int starpu_worker_get_bindid(int workerid); + function fstarpu_worker_get_bindid(id) bind(C,name="starpu_worker_get_bindid") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_worker_get_bindid + integer(c_int), value, intent(in) :: id + end function fstarpu_worker_get_bindid + + ! int starpu_combined_worker_get_id(void); + function fstarpu_combined_worker_get_id() bind(C,name="starpu_combined_worker_get_id") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_combined_worker_get_id + end function fstarpu_combined_worker_get_id + + ! int starpu_combined_worker_get_size(void); + function fstarpu_combined_worker_get_size() bind(C,name="starpu_combined_worker_get_size") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_combined_worker_get_size + end function fstarpu_combined_worker_get_size + + ! int starpu_combined_worker_get_rank(void); + function fstarpu_combined_worker_get_rank() bind(C,name="starpu_combined_worker_get_rank") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_combined_worker_get_rank + end function fstarpu_combined_worker_get_rank + + ! enum starpu_worker_archtype starpu_worker_get_type(int id); + function fstarpu_worker_get_type(id) bind(C) + use iso_c_binding, only: c_int, c_ptr + type(c_ptr) :: fstarpu_worker_get_type ! C function returns c_intptr_t + integer(c_int),value,intent(in) :: id + end function fstarpu_worker_get_type + + ! int starpu_worker_get_count_by_type(enum starpu_worker_archtype type); + function fstarpu_worker_get_count_by_type(typeid) bind(C) + use iso_c_binding, only: c_int, c_ptr + integer(c_int) :: fstarpu_worker_get_count_by_type + type(c_ptr),value,intent(in) :: typeid ! c_intptr_t expected by C func + end function fstarpu_worker_get_count_by_type + + ! int starpu_worker_get_ids_by_type(enum starpu_worker_archtype type, int *workerids, int maxsize); + function fstarpu_worker_get_ids_by_type(typeid, workerids, maxsize) bind(C) + use iso_c_binding, only: c_int, c_ptr + integer(c_int) :: fstarpu_worker_get_ids_by_type + type(c_ptr),value,intent(in) :: typeid ! c_intptr_t expected by C func + integer(c_int),intent(out) :: workerids(*) + integer(c_int),value,intent(in) :: maxsize + end function fstarpu_worker_get_ids_by_type + + ! int starpu_worker_get_by_type(enum starpu_worker_archtype type, int num); + function fstarpu_worker_get_by_type(typeid, num) bind(C) + use iso_c_binding, only: c_int, c_ptr + integer(c_int) :: fstarpu_worker_get_by_type + type(c_ptr),value,intent(in) :: typeid ! c_intptr_t expected by C func + integer(c_int),value,intent(in) :: num + end function fstarpu_worker_get_by_type + + ! int starpu_worker_get_by_devid(enum starpu_worker_archtype type, int devid); + function fstarpu_worker_get_by_devid(typeid, devid) bind(C) + use iso_c_binding, only: c_int, c_ptr + integer(c_int) :: fstarpu_worker_get_by_devid + type(c_ptr),value,intent(in) :: typeid ! c_intptr_t expected by C func + integer(c_int),value,intent(in) :: devid + end function fstarpu_worker_get_by_devid + + ! void starpu_worker_get_name(int id, char *dst, size_t maxlen); + subroutine fstarpu_worker_get_name(id, dst, maxlen) bind(C,name="starpu_worker_get_name") + use iso_c_binding, only: c_int, c_char, c_size_t + integer(c_int),value,intent(in) :: id + character(c_char),intent(out) :: dst(*) + integer(c_size_t),value,intent(in) :: maxlen + end subroutine fstarpu_worker_get_name + + + ! int starpu_worker_get_devid(int id); + function fstarpu_worker_get_devid(id) bind(C,name="starpu_worker_get_devid") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_worker_get_devid + integer(c_int), value, intent(in) :: id + end function fstarpu_worker_get_devid + + ! struct starpu_tree* starpu_workers_get_tree(void); + ! unsigned starpu_worker_get_sched_ctx_list(int worker, unsigned **sched_ctx); + + ! unsigned starpu_worker_is_blocked(int workerid); + function fstarpu_worker_is_blocked(id) bind(C,name="starpu_worker_is_blocked") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_worker_is_blocked + integer(c_int), value, intent(in) :: id + end function fstarpu_worker_is_blocked + + ! unsigned starpu_worker_is_slave_somewhere(int workerid); + function fstarpu_worker_is_slave_somewhere(id) bind(C,name="starpu_worker_is_slave_somewhere") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_worker_is_slave_somewhere + integer(c_int), value, intent(in) :: id + end function fstarpu_worker_is_slave_somewhere + + ! char *starpu_worker_get_type_as_string(enum starpu_worker_archtype type); + subroutine fstarpu_worker_get_type_as_string(typeid,dst,maxlen) bind(C) + use iso_c_binding, only: c_ptr, c_char, c_size_t + type(c_ptr),value,intent(in) :: typeid ! c_intptr_t expected by C func + character(c_char),intent(out) :: dst(*) + integer(c_size_t),value,intent(in) :: maxlen + end subroutine fstarpu_worker_get_type_as_string + + ! int starpu_bindid_get_workerids(int bindid, int **workerids); + + ! == starpu_task.h == + + function fstarpu_task_create_sync (handle, mode) bind(C,name="starpu_task_create_sync") + use iso_c_binding, only: c_ptr + type(c_ptr) :: fstarpu_task_create_sync + type(c_ptr), value, intent(in) :: handle + type(c_ptr), value, intent(in) :: mode + end function fstarpu_task_create_sync + + ! void starpu_tag_declare_deps_array(starpu_tag_t id, unsigned ndeps, starpu_tag_t *array); + subroutine fstarpu_tag_declare_deps_array(id,ndeps,tag_array) bind(C,name="starpu_tag_declare_deps_array") + use iso_c_binding, only: c_int, c_long_long + integer(c_int), value, intent(in) :: id + integer(c_int), value, intent(in) :: ndeps + integer(c_long_long), intent(in) :: tag_array(*) + end subroutine fstarpu_tag_declare_deps_array + + ! void starpu_task_declare_deps(starpu_tag_t id, unsigned ndeps, ...); + subroutine fstarpu_task_declare_deps(task,ndeps,root_task) bind(C,name="starpu_task_declare_deps") + use iso_c_binding, only: c_int, c_ptr + type(c_ptr), value, intent(in) :: task + integer(c_int), value, intent(in) :: ndeps + type(c_ptr), value, intent(in) :: root_task + end subroutine fstarpu_task_declare_deps + + ! void starpu_task_declare_deps_array(struct starpu_task *task, unsigned ndeps, struct starpu_task *task_array[]); + subroutine fstarpu_task_declare_deps_array(task,ndeps,task_array) bind(C,name="starpu_task_declare_deps_array") + use iso_c_binding, only: c_int, c_ptr + type(c_ptr), value, intent(in) :: task + integer(c_int), value, intent(in) :: ndeps + type(c_ptr), intent(in) :: task_array(*) + end subroutine fstarpu_task_declare_deps_array + + ! void starpu_task_end_dep_add(struct starpu_task *t, int nb_deps) + subroutine fstarpu_task_end_dep_add(task, nb_deps) & + bind(C,name="starpu_task_end_dep_add") + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: task + integer(c_int), value, intent(in) :: nb_deps + end subroutine fstarpu_task_end_dep_add + + ! void starpu_task_end_dep_release(struct starpu_task *t) + subroutine fstarpu_task_end_dep_release(task) & + bind(C,name="starpu_task_end_dep_release") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: task + end subroutine fstarpu_task_end_dep_release + + + ! int starpu_tag_wait(starpu_tag_t id); + function fstarpu_tag_wait(id) bind(C,name="starpu_tag_wait") + use iso_c_binding, only: c_int, c_long_long + integer(c_int) :: fstarpu_tag_wait + integer(c_long_long), value, intent(in) :: id + end function fstarpu_tag_wait + + ! int starpu_tag_wait_array(unsigned ntags, starpu_tag_t *id); + function fstarpu_tag_wait_array(ntags,tag_array) bind(C,name="starpu_tag_wait_array") + use iso_c_binding, only: c_int, c_long_long + integer(c_int) :: fstarpu_tag_wait_array + integer(c_int), value, intent(in) :: ntags + integer(c_long_long), intent(in) :: tag_array(*) + end function fstarpu_tag_wait_array + + ! void starpu_tag_notify_from_apps(starpu_tag_t id); + subroutine fstarpu_tag_notify_from_apps(id) bind(C,name="starpu_tag_notify_from_apps") + use iso_c_binding, only: c_long_long + integer(c_long_long), value, intent(in) :: id + end subroutine fstarpu_tag_notify_from_apps + + ! void starpu_tag_restart(starpu_tag_t id); + subroutine fstarpu_tag_restart(id) bind(C,name="starpu_tag_restart") + use iso_c_binding, only: c_long_long + integer(c_long_long), value, intent(in) :: id + end subroutine fstarpu_tag_restart + + ! void starpu_tag_remove(starpu_tag_t id); + subroutine fstarpu_tag_remove(id) bind(C,name="starpu_tag_remove") + use iso_c_binding, only: c_long_long + integer(c_long_long), value, intent(in) :: id + end subroutine fstarpu_tag_remove + + ! struct starpu_task *starpu_tag_get_task(starpu_tag_t id); + function fstarpu_tag_get_task(id) bind(C,name="starpu_tag_get_task") + use iso_c_binding, only: c_ptr, c_long_long + type(c_ptr) :: fstarpu_tag_get_task + integer(c_long_long), value, intent(in) :: id + end function fstarpu_tag_get_task + + + ! void starpu_task_init(struct starpu_task *task); + subroutine fstarpu_task_init (task) bind(C,name="starpu_task_init") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: task + end subroutine fstarpu_task_init + + ! void starpu_task_clean(struct starpu_task *task); + subroutine fstarpu_task_clean (task) bind(C,name="starpu_task_clean") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: task + end subroutine fstarpu_task_clean + + ! struct starpu_task *starpu_task_create(void) STARPU_ATTRIBUTE_MALLOC; + function fstarpu_task_create () bind(C,name="starpu_task_create") + use iso_c_binding, only: c_ptr + type(c_ptr) :: fstarpu_task_create + end function fstarpu_task_create + + ! void starpu_task_destroy(struct starpu_task *task); + subroutine fstarpu_task_destroy (task) bind(C,name="starpu_task_destroy") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: task + end subroutine fstarpu_task_destroy + + ! void starpu_task_set_destroy(struct starpu_task *task); + subroutine fstarpu_task_set_destroy (task) bind(C,name="starpu_task_set_destroy") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: task + end subroutine fstarpu_task_set_destroy + + ! int starpu_task_submit(struct starpu_task *task) STARPU_WARN_UNUSED_RESULT; + function fstarpu_task_submit (task) bind(C,name="starpu_task_submit") + use iso_c_binding, only: c_int,c_ptr + integer(c_int) :: fstarpu_task_submit + type(c_ptr), value, intent(in) :: task + end function fstarpu_task_submit + + ! int starpu_task_submit_to_ctx(struct starpu_task *task, unsigned sched_ctx_id); + function fstarpu_task_submit_to_ctx (task,sched_ctx_id) bind(C,name="starpu_task_submit_to_ctx") + use iso_c_binding, only: c_int,c_ptr + integer(c_int) :: fstarpu_task_submit_to_ctx + type(c_ptr), value, intent(in) :: task + integer(c_int), value, intent(in) :: sched_ctx_id + end function fstarpu_task_submit_to_ctx + + ! int starpu_task_finished(struct starpu_task *task) STARPU_WARN_UNUSED_RESULT; + function fstarpu_task_finished (task) bind(C,name="starpu_task_finished") + use iso_c_binding, only: c_int,c_ptr + integer(c_int) :: fstarpu_task_finished + type(c_ptr), value, intent(in) :: task + end function fstarpu_task_finished + + ! int starpu_task_wait(struct starpu_task *task) STARPU_WARN_UNUSED_RESULT; + function fstarpu_task_wait (task) bind(C,name="starpu_task_wait") + use iso_c_binding, only: c_int,c_ptr + integer(c_int) :: fstarpu_task_wait + type(c_ptr), value, intent(in) :: task + end function fstarpu_task_wait + + ! int starpu_task_wait_array(struct starpu_task **tasks, unsigned nb_tasks) STARPU_WARN_UNUSED_RESULT; + function fstarpu_task_wait_array(task_array,ntasks) bind(C,name="starpu_task_wait_array") + use iso_c_binding, only: c_int, c_ptr + integer(c_int) :: fstarpu_task_wait_array + integer(c_int), value, intent(in) :: ntasks + type(c_ptr), intent(in) :: task_array + end function fstarpu_task_wait_array + + + ! int starpu_task_wait_for_all(void); + subroutine fstarpu_task_wait_for_all () bind(C,name="starpu_task_wait_for_all") + end subroutine fstarpu_task_wait_for_all + + ! int starpu_task_wait_for_n_submitted(unsigned n); + subroutine fstarpu_task_wait_for_n_submitted (n) bind(C,name="starpu_task_wait_for_n_submitted") + use iso_c_binding, only: c_int + integer(c_int), value, intent(in) :: n + end subroutine fstarpu_task_wait_for_n_submitted + + ! int starpu_task_wait_for_all_in_ctx(unsigned sched_ctx_id); + subroutine fstarpu_task_wait_for_all_in_ctx (ctx) bind(C,name="starpu_task_wait_for_all_in_ctx") + use iso_c_binding, only: c_int + integer(c_int), value, intent(in) :: ctx + end subroutine fstarpu_task_wait_for_all_in_ctx + + ! int starpu_task_wait_for_n_submitted_in_ctx(unsigned sched_ctx_id, unsigned n); + subroutine fstarpu_task_wait_for_n_submitted_in_ctx (ctx,n) bind(C,name="starpu_task_wait_for_n_submitted_in_ctx") + use iso_c_binding, only: c_int + integer(c_int), value, intent(in) :: ctx + integer(c_int), value, intent(in) :: n + end subroutine fstarpu_task_wait_for_n_submitted_in_ctx + + ! int starpu_task_wait_for_no_ready(void); + function fstarpu_task_wait_for_no_ready () bind(C,name="starpu_task_wait_for_no_ready") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_task_wait_for_no_ready + end function fstarpu_task_wait_for_no_ready + + ! int starpu_task_nready(void); + function fstarpu_task_nready () bind(C,name="starpu_task_nready") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_task_nready + end function fstarpu_task_nready + + ! int starpu_task_nsubmitted(void); + function fstarpu_task_nsubmitted () bind(C,name="starpu_task_nsubmitted") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_task_nsubmitted + end function fstarpu_task_nsubmitted + + ! void starpu_do_schedule(void); + subroutine fstarpu_do_schedule () bind(C,name="starpu_do_schedule") + end subroutine fstarpu_do_schedule + + ! starpu_codelet_init + subroutine fstarpu_codelet_init (codelet) bind(C,name="starpu_codelet_init") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: codelet + end subroutine fstarpu_codelet_init + + ! starpu_codelet_display_stats + subroutine fstarpu_codelet_display_stats (codelet) bind(C,name="starpu_codelet_display_stats") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: codelet + end subroutine fstarpu_codelet_display_stats + + + ! struct starpu_task *starpu_task_get_current(void); + function fstarpu_task_get_current () bind(C,name="starpu_task_get_current") + use iso_c_binding, only: c_ptr + type(c_ptr) :: fstarpu_task_get_current + end function fstarpu_task_get_current + + ! void starpu_parallel_task_barrier_init(struct starpu_task *task, int workerid); + subroutine fstarpu_parallel_task_barrier_init_init (task,id) & + bind(C,name="starpu_parallel_task_barrier_init_init") + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: task + integer(c_int), value, intent(in) :: id + end subroutine fstarpu_parallel_task_barrier_init_init + + ! void starpu_parallel_task_barrier_init_n(struct starpu_task *task, int worker_size); + subroutine fstarpu_parallel_task_barrier_init_n_init_n (task,sz) & + bind(C,name="starpu_parallel_task_barrier_init_n_init_n") + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: task + integer(c_int), value, intent(in) :: sz + end subroutine fstarpu_parallel_task_barrier_init_n_init_n + + ! struct starpu_task *starpu_task_dup(struct starpu_task *task); + function fstarpu_task_dup (task) bind(C,name="starpu_task_dup") + use iso_c_binding, only: c_ptr + type(c_ptr) :: fstarpu_task_dup + type(c_ptr), value, intent(in) :: task + end function fstarpu_task_dup + + ! void starpu_task_set_implementation(struct starpu_task *task, unsigned impl); + subroutine fstarpu_task_set_implementation (task,impl) & + bind(C,name="starpu_task_set_implementation") + use iso_c_binding, only: c_ptr,c_int + type(c_ptr), value, intent(in) :: task + integer(c_int), value, intent(in) :: impl + end subroutine fstarpu_task_set_implementation + + ! unsigned starpu_task_get_implementation(struct starpu_task *task); + function fstarpu_task_get_implementation (task) & + bind(C,name="starpu_task_get_implementation") + use iso_c_binding, only: c_ptr,c_int + type(c_ptr), value, intent(in) :: task + integer(c_int) :: fstarpu_task_get_implementation + end function fstarpu_task_get_implementation + + ! -- + + function fstarpu_codelet_allocate () bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr) :: fstarpu_codelet_allocate + end function fstarpu_codelet_allocate + + subroutine fstarpu_codelet_free (cl) bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: cl + end subroutine fstarpu_codelet_free + + subroutine fstarpu_codelet_set_name (cl, cl_name) bind(C) + use iso_c_binding, only: c_ptr, c_char + type(c_ptr), value, intent(in) :: cl + character(c_char), intent(in) :: cl_name + end subroutine fstarpu_codelet_set_name + + subroutine fstarpu_codelet_set_color (cl, cl_color) bind(C) + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: cl + integer(c_int), value, intent(in) :: cl_color + end subroutine fstarpu_codelet_set_color + + subroutine fstarpu_codelet_set_model (cl, cl_perfmodel) bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: cl + type(c_ptr), value, intent(in) :: cl_perfmodel + end subroutine fstarpu_codelet_set_model + + subroutine fstarpu_codelet_set_energy_model (cl, cl_perfmodel) bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: cl + type(c_ptr), value, intent(in) :: cl_perfmodel + end subroutine fstarpu_codelet_set_energy_model + + subroutine fstarpu_codelet_add_cpu_func (cl, f_ptr) bind(C) + use iso_c_binding, only: c_ptr, c_funptr + type(c_ptr), value, intent(in) :: cl + type(c_funptr), value, intent(in) :: f_ptr + end subroutine fstarpu_codelet_add_cpu_func + + subroutine fstarpu_codelet_add_cuda_func (cl, f_ptr) bind(C) + use iso_c_binding, only: c_ptr, c_funptr + type(c_ptr), value, intent(in) :: cl + type(c_funptr), value, intent(in) :: f_ptr + end subroutine fstarpu_codelet_add_cuda_func + + subroutine fstarpu_codelet_add_cuda_flags (cl, flags) bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: cl + type(c_ptr), value, intent(in) :: flags ! C function expects an intptr_t + end subroutine fstarpu_codelet_add_cuda_flags + + subroutine fstarpu_codelet_add_opencl_func (cl, f_ptr) bind(C) + use iso_c_binding, only: c_ptr, c_funptr + type(c_ptr), value, intent(in) :: cl + type(c_funptr), value, intent(in) :: f_ptr + end subroutine fstarpu_codelet_add_opencl_func + + subroutine fstarpu_codelet_add_opencl_flags (cl, flags) bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: cl + type(c_ptr), value, intent(in) :: flags ! C function expects an intptr_t + end subroutine fstarpu_codelet_add_opencl_flags + + subroutine fstarpu_codelet_add_buffer (cl, mode) bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: cl + type(c_ptr), value, intent(in) :: mode ! C function expects an intptr_t + end subroutine fstarpu_codelet_add_buffer + + subroutine fstarpu_codelet_set_variable_nbuffers (cl) bind(C) + use iso_c_binding, only: c_ptr,c_int + type(c_ptr), value, intent(in) :: cl + end subroutine fstarpu_codelet_set_variable_nbuffers + + subroutine fstarpu_codelet_set_nbuffers (cl, nbuffers) bind(C) + use iso_c_binding, only: c_ptr,c_int + type(c_ptr), value, intent(in) :: cl + integer(c_int), value, intent(in) :: nbuffers + end subroutine fstarpu_codelet_set_nbuffers + + subroutine fstarpu_codelet_set_flags (cl, flags) bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: cl + type(c_ptr), value, intent(in) :: flags ! C function expects an intptr_t + end subroutine fstarpu_codelet_set_flags + + subroutine fstarpu_codelet_set_where (cl, where) bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: cl + type(c_ptr), value, intent(in) :: where ! C function expects an intptr_t + end subroutine fstarpu_codelet_set_where + + subroutine fstarpu_codelet_set_type (cl, type_constant) bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: cl + type(c_ptr), value, intent(in) :: type_constant ! C function expects an intptr_t + end subroutine fstarpu_codelet_set_type + + subroutine fstarpu_codelet_set_max_parallelism (cl, max_parallelism) bind(C) + use iso_c_binding, only: c_ptr,c_int + type(c_ptr), value, intent(in) :: cl + integer(c_int), value, intent(in) :: max_parallelism + end subroutine fstarpu_codelet_set_max_parallelism + + function fstarpu_perfmodel_allocate () bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr) :: fstarpu_perfmodel_allocate + end function fstarpu_perfmodel_allocate + + subroutine fstarpu_perfmodel_free (model) bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: model + end subroutine fstarpu_perfmodel_free + + subroutine fstarpu_perfmodel_set_symbol (model, model_symbol) bind(C) + use iso_c_binding, only: c_ptr, c_char + type(c_ptr), value, intent(in) :: model + character(c_char), intent(in) :: model_symbol + end subroutine fstarpu_perfmodel_set_symbol + + subroutine fstarpu_perfmodel_set_type (model, type) bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: model + type(c_ptr), value, intent(in) :: type ! C function expects an intptr_t + end subroutine fstarpu_perfmodel_set_type + + ! == starpu_data_interface.h == + + ! uintptr_t starpu_malloc_on_node_flags(unsigned dst_node, size_t size, int flags); + + ! uintptr_t starpu_malloc_on_node(unsigned dst_node, size_t size); + function fstarpu_malloc_on_node(node,sz) bind(C,name="starpu_malloc_on_node") + use iso_c_binding, only: c_int,c_intptr_t,c_size_t + integer(c_intptr_t) :: fstarpu_malloc_on_node + integer(c_int), value, intent(in) :: node + integer(c_size_t), value, intent(in) :: sz + end function fstarpu_malloc_on_node + + ! void starpu_free_on_node_flags(unsigned dst_node, uintptr_t addr, size_t size, int flags); + + ! void starpu_free_on_node(unsigned dst_node, uintptr_t addr, size_t size); + subroutine fstarpu_free_on_node(node,addr,sz) bind(C,name="starpu_free_on_node") + use iso_c_binding, only: c_int,c_intptr_t,c_size_t + integer(c_int), value, intent(in) :: node + integer(c_intptr_t), value, intent(in) :: addr + integer(c_size_t), value, intent(in) :: sz + end subroutine fstarpu_free_on_node + + ! void starpu_malloc_on_node_set_default_flags(unsigned node, int flags); + + ! int starpu_data_interface_get_next_id(void); + ! void starpu_data_register(starpu_data_handle_t *handleptr, unsigned home_node, void *data_interface, struct starpu_data_interface_ops *ops); + + + ! void starpu_data_ptr_register(starpu_data_handle_t handle, unsigned node); + subroutine fstarpug_data_ptr_register (dh,node) bind(C,name="starpu_data_ptr_register") + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: node + end subroutine fstarpug_data_ptr_register + + ! void starpu_data_register_same(starpu_data_handle_t *handledst, starpu_data_handle_t handlesrc); + subroutine fstarpu_data_register_same (dh_dst,dh_src) bind(C,name="starpu_data_register_same") + use iso_c_binding, only: c_ptr + type(c_ptr), intent(out) :: dh_dst + type(c_ptr), value, intent(in) :: dh_src + end subroutine fstarpu_data_register_same + + ! void *starpu_data_handle_to_pointer(starpu_data_handle_t handle, unsigned node); + function fstarpu_data_handle_to_pointer (dh,node) bind(C,name="starpu_data_handle_to_pointer") + use iso_c_binding, only: c_ptr, c_int + type(c_ptr) :: fstarpu_data_handle_to_pointer + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: node + end function fstarpu_data_handle_to_pointer + + ! void *starpu_data_get_local_ptr(starpu_data_handle_t handle); + function fstarpu_data_get_local_ptr (dh) bind(C,name="starpu_data_get_local_ptr") + use iso_c_binding, only: c_ptr, c_int + type(c_ptr) :: fstarpu_data_get_local_ptr + type(c_ptr), value, intent(in) :: dh + end function fstarpu_data_get_local_ptr + + ! void *starpu_data_get_interface_on_node(starpu_data_handle_t handle, unsigned memory_node); + + ! == starpu_data_interface.h: tensor == + + ! void starpu_tensor_data_register(starpu_data_handle_t *handle, unsigned home_node, uintptr_t ptr, uint32_t ldy, uint32_t ldz, uint32_t ldt, uint32_t nx, uint32_t ny, uint32_t nz, uint32_t nt, size_t elemsize); + subroutine fstarpu_tensor_data_register(dh, home_node, ptr, ldy, ldz, ldt, nx, ny, nz, nt, elt_size) & + bind(C,name="starpu_tensor_data_register") + use iso_c_binding, only: c_ptr, c_int, c_size_t + type(c_ptr), intent(out) :: dh + integer(c_int), value, intent(in) :: home_node + type(c_ptr), value, intent(in) :: ptr + integer(c_int), value, intent(in) :: ldy + integer(c_int), value, intent(in) :: ldz + integer(c_int), value, intent(in) :: ldt + integer(c_int), value, intent(in) :: nx + integer(c_int), value, intent(in) :: ny + integer(c_int), value, intent(in) :: nz + integer(c_int), value, intent(in) :: nt + integer(c_size_t), value, intent(in) :: elt_size + end subroutine fstarpu_tensor_data_register + + ! void starpu_tensor_ptr_register(starpu_data_handle_t handle, unsigned node, uintptr_t ptr, uintptr_t dev_handle, size_t offset, uint32_t ldy, uint32_t ldz, uint32_t ldt); + subroutine fstarpu_tensor_ptr_register(dh, node, ptr, dev_handle, offset, ldy, ldz, ldt) & + bind(C,name="starpu_tensor_ptr_register") + use iso_c_binding, only: c_ptr, c_int, c_size_t + type(c_ptr), intent(out) :: dh + integer(c_int), value, intent(in) :: node + type(c_ptr), value, intent(in) :: ptr + type(c_ptr), value, intent(in) :: dev_handle + integer(c_size_t), value, intent(in) :: offset + integer(c_int), value, intent(in) :: ldy + integer(c_int), value, intent(in) :: ldz + integer(c_int), value, intent(in) :: ldt + end subroutine fstarpu_tensor_ptr_register + + function fstarpu_tensor_get_ptr(buffers, i) bind(C) + use iso_c_binding, only: c_ptr, c_int + type(c_ptr) :: fstarpu_tensor_get_ptr + type(c_ptr), value, intent(in) :: buffers + integer(c_int), value, intent(in) :: i + end function fstarpu_tensor_get_ptr + + function fstarpu_tensor_get_ldy(buffers, i) bind(C) + use iso_c_binding, only: c_ptr, c_int + integer(c_int) :: fstarpu_tensor_get_ldy + type(c_ptr), value, intent(in) :: buffers + integer(c_int), value, intent(in) :: i + end function fstarpu_tensor_get_ldy + + function fstarpu_tensor_get_ldz(buffers, i) bind(C) + use iso_c_binding, only: c_ptr, c_int + integer(c_int) :: fstarpu_tensor_get_ldz + type(c_ptr), value, intent(in) :: buffers + integer(c_int), value, intent(in) :: i + end function fstarpu_tensor_get_ldz + + function fstarpu_tensor_get_ldt(buffers, i) bind(C) + use iso_c_binding, only: c_ptr, c_int + integer(c_int) :: fstarpu_tensor_get_ldt + type(c_ptr), value, intent(in) :: buffers + integer(c_int), value, intent(in) :: i + end function fstarpu_tensor_get_ldt + + function fstarpu_tensor_get_nx(buffers, i) bind(C) + use iso_c_binding, only: c_ptr, c_int + integer(c_int) :: fstarpu_tensor_get_nx + type(c_ptr), value, intent(in) :: buffers + integer(c_int), value, intent(in) :: i + end function fstarpu_tensor_get_nx + + function fstarpu_tensor_get_ny(buffers, i) bind(C) + use iso_c_binding, only: c_ptr, c_int + integer(c_int) :: fstarpu_tensor_get_ny + type(c_ptr), value, intent(in) :: buffers + integer(c_int), value, intent(in) :: i + end function fstarpu_tensor_get_ny + + function fstarpu_tensor_get_nz(buffers, i) bind(C) + use iso_c_binding, only: c_ptr, c_int + integer(c_int) :: fstarpu_tensor_get_nz + type(c_ptr), value, intent(in) :: buffers + integer(c_int), value, intent(in) :: i + end function fstarpu_tensor_get_nz + + function fstarpu_tensor_get_nt(buffers, i) bind(C) + use iso_c_binding, only: c_ptr, c_int + integer(c_int) :: fstarpu_tensor_get_nt + type(c_ptr), value, intent(in) :: buffers + integer(c_int), value, intent(in) :: i + end function fstarpu_tensor_get_nt + + ! == starpu_data_interface.h: block == + + ! void starpu_block_data_register(starpu_data_handle_t *handle, unsigned home_node, uintptr_t ptr, uint32_t ldy, uint32_t ldz, uint32_t nx, uint32_t ny, uint32_t nz, size_t elemsize); + subroutine fstarpu_block_data_register(dh, home_node, ptr, ldy, ldz, nx, ny, nz, elt_size) & + bind(C,name="starpu_block_data_register") + use iso_c_binding, only: c_ptr, c_int, c_size_t + type(c_ptr), intent(out) :: dh + integer(c_int), value, intent(in) :: home_node + type(c_ptr), value, intent(in) :: ptr + integer(c_int), value, intent(in) :: ldy + integer(c_int), value, intent(in) :: ldz + integer(c_int), value, intent(in) :: nx + integer(c_int), value, intent(in) :: ny + integer(c_int), value, intent(in) :: nz + integer(c_size_t), value, intent(in) :: elt_size + end subroutine fstarpu_block_data_register + + ! void starpu_block_ptr_register(starpu_data_handle_t handle, unsigned node, uintptr_t ptr, uintptr_t dev_handle, size_t offset, uint32_t ldy, uint32_t ldz); + subroutine fstarpu_block_ptr_register(dh, node, ptr, dev_handle, offset, ldy, ldz) & + bind(C,name="starpu_block_ptr_register") + use iso_c_binding, only: c_ptr, c_int, c_size_t + type(c_ptr), intent(out) :: dh + integer(c_int), value, intent(in) :: node + type(c_ptr), value, intent(in) :: ptr + type(c_ptr), value, intent(in) :: dev_handle + integer(c_size_t), value, intent(in) :: offset + integer(c_int), value, intent(in) :: ldy + integer(c_int), value, intent(in) :: ldz + end subroutine fstarpu_block_ptr_register + + function fstarpu_block_get_ptr(buffers, i) bind(C) + use iso_c_binding, only: c_ptr, c_int + type(c_ptr) :: fstarpu_block_get_ptr + type(c_ptr), value, intent(in) :: buffers + integer(c_int), value, intent(in) :: i + end function fstarpu_block_get_ptr + + function fstarpu_block_get_ldy(buffers, i) bind(C) + use iso_c_binding, only: c_ptr, c_int + integer(c_int) :: fstarpu_block_get_ldy + type(c_ptr), value, intent(in) :: buffers + integer(c_int), value, intent(in) :: i + end function fstarpu_block_get_ldy + + function fstarpu_block_get_ldz(buffers, i) bind(C) + use iso_c_binding, only: c_ptr, c_int + integer(c_int) :: fstarpu_block_get_ldz + type(c_ptr), value, intent(in) :: buffers + integer(c_int), value, intent(in) :: i + end function fstarpu_block_get_ldz + + function fstarpu_block_get_nx(buffers, i) bind(C) + use iso_c_binding, only: c_ptr, c_int + integer(c_int) :: fstarpu_block_get_nx + type(c_ptr), value, intent(in) :: buffers + integer(c_int), value, intent(in) :: i + end function fstarpu_block_get_nx + + function fstarpu_block_get_ny(buffers, i) bind(C) + use iso_c_binding, only: c_ptr, c_int + integer(c_int) :: fstarpu_block_get_ny + type(c_ptr), value, intent(in) :: buffers + integer(c_int), value, intent(in) :: i + end function fstarpu_block_get_ny + + function fstarpu_block_get_nz(buffers, i) bind(C) + use iso_c_binding, only: c_ptr, c_int + integer(c_int) :: fstarpu_block_get_nz + type(c_ptr), value, intent(in) :: buffers + integer(c_int), value, intent(in) :: i + end function fstarpu_block_get_nz + + ! == starpu_data_interface.h: matrix == + + ! void starpu_matrix_data_register(starpu_data_handle_t *handle, unsigned home_node, uintptr_t ptr, uint32_t ld, uint32_t nx, uint32_t ny, size_t elemsize); + subroutine fstarpu_matrix_data_register(dh, home_node, ptr, ld, nx, ny, elt_size) & + bind(C,name="starpu_matrix_data_register") + use iso_c_binding, only: c_ptr, c_int, c_size_t + type(c_ptr), intent(out) :: dh + integer(c_int), value, intent(in) :: home_node + type(c_ptr), value, intent(in) :: ptr + integer(c_int), value, intent(in) :: ld + integer(c_int), value, intent(in) :: nx + integer(c_int), value, intent(in) :: ny + integer(c_size_t), value, intent(in) :: elt_size + end subroutine fstarpu_matrix_data_register + + ! void starpu_matrix_ptr_register(starpu_data_handle_t handle, unsigned node, uintptr_t ptr, uintptr_t dev_handle, size_t offset, uint32_t ld); + subroutine fstarpu_matrix_ptr_register(dh, node, ptr, dev_handle, offset, ld) & + bind(C,name="starpu_matrix_ptr_register") + use iso_c_binding, only: c_ptr, c_int, c_size_t + type(c_ptr), intent(out) :: dh + integer(c_int), value, intent(in) :: node + type(c_ptr), value, intent(in) :: ptr + type(c_ptr), value, intent(in) :: dev_handle + integer(c_size_t), value, intent(in) :: offset + integer(c_int), value, intent(in) :: ld + end subroutine fstarpu_matrix_ptr_register + + function fstarpu_matrix_get_ptr(buffers, i) bind(C) + use iso_c_binding, only: c_ptr, c_int + type(c_ptr) :: fstarpu_matrix_get_ptr + type(c_ptr), value, intent(in) :: buffers + integer(c_int), value, intent(in) :: i + end function fstarpu_matrix_get_ptr + + function fstarpu_matrix_get_ld(buffers, i) bind(C) + use iso_c_binding, only: c_ptr, c_int + integer(c_int) :: fstarpu_matrix_get_ld + type(c_ptr), value, intent(in) :: buffers + integer(c_int), value, intent(in) :: i + end function fstarpu_matrix_get_ld + + function fstarpu_matrix_get_nx(buffers, i) bind(C) + use iso_c_binding, only: c_ptr, c_int + integer(c_int) :: fstarpu_matrix_get_nx + type(c_ptr), value, intent(in) :: buffers + integer(c_int), value, intent(in) :: i + end function fstarpu_matrix_get_nx + + function fstarpu_matrix_get_ny(buffers, i) bind(C) + use iso_c_binding, only: c_ptr, c_int + integer(c_int) :: fstarpu_matrix_get_ny + type(c_ptr), value, intent(in) :: buffers + integer(c_int), value, intent(in) :: i + end function fstarpu_matrix_get_ny + + ! == starpu_data_interface.h: vector == + + ! void starpu_vector_data_register(starpu_data_handle_t *handle, unsigned home_node, uintptr_t ptr, uint32_t nx, size_t elemsize); + subroutine fstarpu_vector_data_register(dh, home_node, ptr,nx, elt_size) & + bind(C,name="starpu_vector_data_register") + use iso_c_binding, only: c_ptr, c_int, c_size_t + type(c_ptr), intent(out) :: dh + integer(c_int), value, intent(in) :: home_node + type(c_ptr), value, intent(in) :: ptr + integer(c_int), value, intent(in) :: nx + integer(c_size_t), value, intent(in) :: elt_size + end subroutine fstarpu_vector_data_register + + ! void starpu_vector_ptr_register(starpu_data_handle_t handle, unsigned node, uintptr_t ptr, uintptr_t dev_handle, size_t offset); + subroutine fstarpu_vector_ptr_register(dh, node, ptr, dev_handle, offset) & + bind(C,name="starpu_vector_ptr_register") + use iso_c_binding, only: c_ptr, c_int, c_size_t + type(c_ptr), intent(out) :: dh + integer(c_int), value, intent(in) :: node + type(c_ptr), value, intent(in) :: ptr + type(c_ptr), value, intent(in) :: dev_handle + integer(c_size_t), value, intent(in) :: offset + end subroutine fstarpu_vector_ptr_register + + + function fstarpu_vector_get_ptr(buffers, i) bind(C) + use iso_c_binding, only: c_ptr, c_int + type(c_ptr) :: fstarpu_vector_get_ptr + type(c_ptr), value, intent(in) :: buffers + integer(c_int), value, intent(in) :: i + end function fstarpu_vector_get_ptr + + function fstarpu_vector_get_nx(buffers, i) bind(C) + use iso_c_binding, only: c_ptr, c_int + integer(c_int) :: fstarpu_vector_get_nx + type(c_ptr), value, intent(in) :: buffers + integer(c_int), value, intent(in) :: i + end function fstarpu_vector_get_nx + + ! == starpu_data_interface.h: variable == + + ! void starpu_variable_data_register(starpu_data_handle_t *handle, unsigned home_node, uintptr_t ptr, size_t size); + subroutine fstarpu_variable_data_register(dh, home_node, ptr, elt_size) & + bind(C,name="starpu_variable_data_register") + use iso_c_binding, only: c_ptr, c_int, c_size_t + type(c_ptr), intent(out) :: dh + integer(c_int), value, intent(in) :: home_node + type(c_ptr), value, intent(in) :: ptr + integer(c_size_t), value, intent(in) :: elt_size + end subroutine fstarpu_variable_data_register + + ! void starpu_variable_ptr_register(starpu_data_handle_t handle, unsigned node, uintptr_t ptr, uintptr_t dev_handle, size_t offset); + subroutine fstarpu_variable_ptr_register(dh, node, ptr, dev_handle, offset) & + bind(C,name="starpu_variable_ptr_register") + use iso_c_binding, only: c_ptr, c_int, c_size_t + type(c_ptr), intent(out) :: dh + integer(c_int), value, intent(in) :: node + type(c_ptr), value, intent(in) :: ptr + type(c_ptr), value, intent(in) :: dev_handle + integer(c_size_t), value, intent(in) :: offset + end subroutine fstarpu_variable_ptr_register + + function fstarpu_variable_get_ptr(buffers, i) bind(C) + use iso_c_binding, only: c_ptr, c_int + type(c_ptr) :: fstarpu_variable_get_ptr + type(c_ptr), value, intent(in) :: buffers + integer(c_int), value, intent(in) :: i + end function fstarpu_variable_get_ptr + + ! == starpu_data_interface.h: void == + + ! void starpu_void_data_register(starpu_data_handle_t *handle); + subroutine fstarpu_void_data_register(dh) & + bind(C,name="starpu_void_data_register") + use iso_c_binding, only: c_ptr, c_int, c_size_t + type(c_ptr), intent(out) :: dh + end subroutine fstarpu_void_data_register + + ! == starpu_data_filter.h == + + function fstarpu_data_filter_allocate () bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr) :: fstarpu_data_filter_allocate + end function fstarpu_data_filter_allocate + + subroutine fstarpu_data_filter_free (filter) bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: filter + end subroutine fstarpu_data_filter_free + + ! Note: use fstarpu_df_alloc_ prefix instead of fstarpu_data_filter_allocate_ + ! to fit within the Fortran id length limit */ + function fstarpu_df_alloc_bcsr_filter_canonical_block () bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr) :: fstarpu_df_alloc_bcsr_filter_canonical_block + end function fstarpu_df_alloc_bcsr_filter_canonical_block + + function fstarpu_df_alloc_csr_filter_vertical_block () bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr) :: fstarpu_df_alloc_csr_filter_vertical_block + end function fstarpu_df_alloc_csr_filter_vertical_block + + function fstarpu_df_alloc_matrix_filter_block () bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr) :: fstarpu_df_alloc_matrix_filter_block + end function fstarpu_df_alloc_matrix_filter_block + + function fstarpu_df_alloc_matrix_filter_block_shadow () bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr) :: fstarpu_df_alloc_matrix_filter_block_shadow + end function fstarpu_df_alloc_matrix_filter_block_shadow + + function fstarpu_df_alloc_matrix_filter_vertical_block () bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr) :: fstarpu_df_alloc_matrix_filter_vertical_block + end function fstarpu_df_alloc_matrix_filter_vertical_block + + function fstarpu_df_alloc_matrix_filter_vertical_block_shadow () bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr) :: fstarpu_df_alloc_matrix_filter_vertical_block_shadow + end function fstarpu_df_alloc_matrix_filter_vertical_block_shadow + + function fstarpu_df_alloc_vector_filter_block () bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr) :: fstarpu_df_alloc_vector_filter_block + end function fstarpu_df_alloc_vector_filter_block + + function fstarpu_df_alloc_vector_filter_block_shadow () bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr) :: fstarpu_df_alloc_vector_filter_block_shadow + end function fstarpu_df_alloc_vector_filter_block_shadow + + function fstarpu_df_alloc_vector_filter_list () bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr) :: fstarpu_df_alloc_vector_filter_list + end function fstarpu_df_alloc_vector_filter_list + + function fstarpu_df_alloc_vector_filter_divide_in_2 () bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr) :: fstarpu_df_alloc_vector_filter_divide_in_2 + end function fstarpu_df_alloc_vector_filter_divide_in_2 + + function fstarpu_df_alloc_block_filter_block () bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr) :: fstarpu_df_alloc_block_filter_block + end function fstarpu_df_alloc_block_filter_block + + function fstarpu_df_alloc_block_filter_block_shadow () bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr) :: fstarpu_df_alloc_block_filter_block_shadow + end function fstarpu_df_alloc_block_filter_block_shadow + + function fstarpu_df_alloc_block_filter_vertical_block () bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr) :: fstarpu_df_alloc_block_filter_vertical_block + end function fstarpu_df_alloc_block_filter_vertical_block + + function fstarpu_df_alloc_block_filter_vertical_block_shadow () bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr) :: fstarpu_df_alloc_block_filter_vertical_block_shadow + end function fstarpu_df_alloc_block_filter_vertical_block_shadow + + subroutine fstarpu_data_filter_set_filter_func (filter, f_ptr) bind(C) + use iso_c_binding, only: c_ptr, c_funptr + type(c_ptr), value, intent(in) :: filter + type(c_funptr), value, intent(in) :: f_ptr + end subroutine fstarpu_data_filter_set_filter_func + + subroutine fstarpu_data_filter_set_nchildren (filter, nchildren) bind(C) + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: filter + integer(c_int), value, intent(in) :: nchildren + end subroutine fstarpu_data_filter_set_nchildren + + subroutine fstarpu_data_filter_set_get_nchildren_func (filter, f_ptr) bind(C) + use iso_c_binding, only: c_ptr, c_funptr + type(c_ptr), value, intent(in) :: filter + type(c_funptr), value, intent(in) :: f_ptr + end subroutine fstarpu_data_filter_set_get_nchildren_func + + subroutine fstarpu_data_filter_set_get_child_ops_func (filter, f_ptr) bind(C) + use iso_c_binding, only: c_ptr, c_funptr + type(c_ptr), value, intent(in) :: filter + type(c_funptr), value, intent(in) :: f_ptr + end subroutine fstarpu_data_filter_set_get_child_ops_func + + subroutine fstarpu_data_filter_set_filter_arg (filter, filter_arg) bind(C) + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: filter + integer(c_int), value, intent(in) :: filter_arg + end subroutine fstarpu_data_filter_set_filter_arg + + subroutine fstarpu_data_filter_set_filter_arg_ptr (filter, filter_arg_ptr) bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: filter + type(c_ptr), value, intent(in) :: filter_arg_ptr + end subroutine fstarpu_data_filter_set_filter_arg_ptr + + ! void starpu_data_partition(starpu_data_handle_t initial_handle, struct starpu_data_filter *f); + subroutine fstarpu_data_partition (dh,filter) bind(C,name="starpu_data_partition") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: dh + type(c_ptr), value, intent(in) :: filter + end subroutine fstarpu_data_partition + + ! void starpu_data_unpartition(starpu_data_handle_t root_data, unsigned gathering_node); + subroutine fstarpu_data_unpartition (root_dh,gathering_node) bind(C,name="starpu_data_unpartition") + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: root_dh + integer(c_int), value, intent(in) :: gathering_node + end subroutine fstarpu_data_unpartition + + ! void starpu_data_partition_plan(starpu_data_handle_t initial_handle, struct starpu_data_filter *f, starpu_data_handle_t *children); + subroutine fstarpu_data_partition_plan (dh,filter,children) & + bind(C,name="starpu_data_partition_plan") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: dh + type(c_ptr), value, intent(in) :: filter + type(c_ptr), intent(in) :: children(*) + end subroutine fstarpu_data_partition_plan + + ! void starpu_data_partition_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children); + subroutine fstarpu_data_partition_submit (dh,nparts,children) & + bind(C,name="starpu_data_partition_submit") + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: nparts + type(c_ptr), intent(in) :: children(*) + end subroutine fstarpu_data_partition_submit + + ! void starpu_data_partition_readonly_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children); + subroutine fstarpu_data_partition_readonly_submit (dh,nparts,children) & + bind(C,name="starpu_data_partition_readonly_submit") + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: nparts + type(c_ptr), intent(in) :: children(*) + end subroutine fstarpu_data_partition_readonly_submit + + ! void starpu_data_partition_readwrite_upgrade_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children); + subroutine fstarpu_data_partition_readwrite_upgrade_submit (dh,nparts,children) & + bind(C,name="starpu_data_partition_readwrite_upgrade_submit") + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: nparts + type(c_ptr), intent(in) :: children(*) + end subroutine fstarpu_data_partition_readwrite_upgrade_submit + + ! void starpu_data_unpartition_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, int gathering_node); + subroutine fstarpu_data_unpartition_submit (dh,nparts,children,gathering_node) & + bind(C,name="starpu_data_unpartition_submit") + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: nparts + type(c_ptr), intent(in) :: children(*) + integer(c_int), value, intent(in) :: gathering_node + end subroutine fstarpu_data_unpartition_submit + + ! void starpu_data_unpartition_readonly_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, int gathering_node); + subroutine fstarpu_data_unpartition_readonly_submit (dh,nparts,children,gathering_node) & + bind(C,name="starpu_data_unpartition_readonly_submit") + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: nparts + type(c_ptr), intent(in) :: children(*) + integer(c_int), value, intent(in) :: gathering_node + end subroutine fstarpu_data_unpartition_readonly_submit + + ! void starpu_data_partition_clean(starpu_data_handle_t root_data, unsigned nparts, starpu_data_handle_t *children); + subroutine fstarpu_data_partition_clean (dh,nparts,children) & + bind(C,name="starpu_data_partition_clean") + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: nparts + type(c_ptr), intent(in) :: children(*) + end subroutine fstarpu_data_partition_clean + + ! int starpu_data_get_nb_children(starpu_data_handle_t handle); + function fstarpu_data_get_nb_children(dh) bind(C,name="starpu_data_get_nb_children") + use iso_c_binding, only: c_ptr, c_int + integer(c_int) :: fstarpu_data_get_nb_children + type(c_ptr), value, intent(in) :: dh + end function fstarpu_data_get_nb_children + + ! starpu_data_handle_t starpu_data_get_child(starpu_data_handle_t handle, unsigned i); + function fstarpu_data_get_child(dh,i) bind(C,name="starpu_data_get_child") + use iso_c_binding, only: c_ptr, c_int + type(c_ptr) :: fstarpu_data_get_child + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: i + end function fstarpu_data_get_child + + ! starpu_data_handle_t starpu_data_get_sub_data(starpu_data_handle_t root_data, unsigned depth, ... ); + ! . see: fstarpu_data_get_sub_data + ! starpu_data_handle_t starpu_data_vget_sub_data(starpu_data_handle_t root_data, unsigned depth, va_list pa); + ! . see: fstarpu_data_get_sub_data + + ! note: defined in filters.c + function fstarpu_data_get_sub_data (root_dh,depth,indices) bind(C) + use iso_c_binding, only: c_ptr, c_int + type(c_ptr) :: fstarpu_data_get_sub_data + type(c_ptr), value, intent(in) :: root_dh + integer(c_int), value, intent(in) :: depth + integer(c_int), intent(in) :: indices(*) + end function fstarpu_data_get_sub_data + + ! void starpu_data_map_filters(starpu_data_handle_t root_data, unsigned nfilters, ...); + ! . see fstarpu_data_map_filters + ! void starpu_data_vmap_filters(starpu_data_handle_t root_data, unsigned nfilters, va_list pa); + ! . see fstarpu_data_map_filters + + ! note: defined in filters.c + subroutine fstarpu_data_map_filters (root_dh,nfilters,filters) bind(C) + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: root_dh + integer(c_int), value, intent(in) :: nfilters + type(c_ptr), intent(in) :: filters(*) + end subroutine fstarpu_data_map_filters + + ! void starpu_matrix_filter_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + subroutine fstarpu_matrix_filter_block (father_interface,child_interface,filter,id,nparts) & + bind(C,name="starpu_matrix_filter_block") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: father_interface + type(c_ptr), value, intent(in) :: child_interface + type(c_ptr), value, intent(in) :: filter + type(c_ptr), value, intent(in) :: id + type(c_ptr), value, intent(in) :: nparts + end subroutine fstarpu_matrix_filter_block + + ! void starpu_matrix_filter_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + subroutine fstarpu_matrix_filter_block_shadow (father_interface,child_interface,filter,id,nparts) & + bind(C,name="starpu_matrix_filter_block_shadow") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: father_interface + type(c_ptr), value, intent(in) :: child_interface + type(c_ptr), value, intent(in) :: filter + type(c_ptr), value, intent(in) :: id + type(c_ptr), value, intent(in) :: nparts + end subroutine fstarpu_matrix_filter_block_shadow + + ! void starpu_matrix_filter_vertical_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + subroutine fstarpu_matrix_filter_vertical_block (father_interface,child_interface,filter,id,nparts) & + bind(C,name="starpu_matrix_filter_vertical_block") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: father_interface + type(c_ptr), value, intent(in) :: child_interface + type(c_ptr), value, intent(in) :: filter + type(c_ptr), value, intent(in) :: id + type(c_ptr), value, intent(in) :: nparts + end subroutine fstarpu_matrix_filter_vertical_block + + ! void starpu_matrix_filter_vertical_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + subroutine fstarpu_matrix_filter_vertical_block_shadow (father_interface,child_interface,filter,id,nparts) & + bind(C,name="starpu_matrix_filter_vertical_block_shadow") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: father_interface + type(c_ptr), value, intent(in) :: child_interface + type(c_ptr), value, intent(in) :: filter + type(c_ptr), value, intent(in) :: id + type(c_ptr), value, intent(in) :: nparts + end subroutine fstarpu_matrix_filter_vertical_block_shadow + + ! void starpu_vector_filter_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + subroutine fstarpu_vector_filter_block (father_interface,child_interface,filter,id,nparts) & + bind(C,name="starpu_vector_filter_block") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: father_interface + type(c_ptr), value, intent(in) :: child_interface + type(c_ptr), value, intent(in) :: filter + type(c_ptr), value, intent(in) :: id + type(c_ptr), value, intent(in) :: nparts + end subroutine fstarpu_vector_filter_block + + ! void starpu_vector_filter_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + subroutine fstarpu_vector_filter_block_shadow (father_interface,child_interface,filter,id,nparts) & + bind(C,name="starpu_vector_filter_block_shadow") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: father_interface + type(c_ptr), value, intent(in) :: child_interface + type(c_ptr), value, intent(in) :: filter + type(c_ptr), value, intent(in) :: id + type(c_ptr), value, intent(in) :: nparts + end subroutine fstarpu_vector_filter_block_shadow + + ! void starpu_vector_filter_list_long(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + subroutine fstarpu_vector_filter_list_long (father_interface,child_interface,filter,id,nparts) & + bind(C,name="starpu_vector_filter_list_long") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: father_interface + type(c_ptr), value, intent(in) :: child_interface + type(c_ptr), value, intent(in) :: filter + type(c_ptr), value, intent(in) :: id + type(c_ptr), value, intent(in) :: nparts + end subroutine fstarpu_vector_filter_list_long + + ! void starpu_vector_filter_list(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + subroutine fstarpu_vector_filter_list (father_interface,child_interface,filter,id,nparts) & + bind(C,name="starpu_vector_filter_list") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: father_interface + type(c_ptr), value, intent(in) :: child_interface + type(c_ptr), value, intent(in) :: filter + type(c_ptr), value, intent(in) :: id + type(c_ptr), value, intent(in) :: nparts + end subroutine fstarpu_vector_filter_list + + ! void starpu_vector_filter_divide_in_2(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + subroutine fstarpu_vector_divide_in_2 (father_interface,child_interface,filter,id,nparts) & + bind(C,name="starpu_vector_divide_in_2") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: father_interface + type(c_ptr), value, intent(in) :: child_interface + type(c_ptr), value, intent(in) :: filter + type(c_ptr), value, intent(in) :: id + type(c_ptr), value, intent(in) :: nparts + end subroutine fstarpu_vector_divide_in_2 + + ! void starpu_block_filter_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + subroutine fstarpu_block_filter_block (father_interface,child_interface,filter,id,nparts) & + bind(C,name="starpu_block_filter_block") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: father_interface + type(c_ptr), value, intent(in) :: child_interface + type(c_ptr), value, intent(in) :: filter + type(c_ptr), value, intent(in) :: id + type(c_ptr), value, intent(in) :: nparts + end subroutine fstarpu_block_filter_block + + ! void starpu_block_filter_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + subroutine fstarpu_block_filter_block_shadow (father_interface,child_interface,filter,id,nparts) & + bind(C,name="starpu_block_filter_block_shadow") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: father_interface + type(c_ptr), value, intent(in) :: child_interface + type(c_ptr), value, intent(in) :: filter + type(c_ptr), value, intent(in) :: id + type(c_ptr), value, intent(in) :: nparts + end subroutine fstarpu_block_filter_block_shadow + + ! void starpu_block_filter_vertical_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + subroutine fstarpu_block_filter_vertical_block (father_interface,child_interface,filter,id,nparts) & + bind(C,name="starpu_block_filter_vertical_block") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: father_interface + type(c_ptr), value, intent(in) :: child_interface + type(c_ptr), value, intent(in) :: filter + type(c_ptr), value, intent(in) :: id + type(c_ptr), value, intent(in) :: nparts + end subroutine fstarpu_block_filter_vertical_block + + ! void starpu_block_filter_vertical_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + subroutine fstarpu_block_filter_vertical_block_shadow (father_interface,child_interface,filter,id,nparts) & + bind(C,name="starpu_block_filter_vertical_block_shadow") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: father_interface + type(c_ptr), value, intent(in) :: child_interface + type(c_ptr), value, intent(in) :: filter + type(c_ptr), value, intent(in) :: id + type(c_ptr), value, intent(in) :: nparts + end subroutine fstarpu_block_filter_vertical_block_shadow + + ! void starpu_block_filter_depth_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + subroutine fstarpu_block_filter_depth_block (father_interface,child_interface,filter,id,nparts) & + bind(C,name="starpu_block_filter_depth_block") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: father_interface + type(c_ptr), value, intent(in) :: child_interface + type(c_ptr), value, intent(in) :: filter + type(c_ptr), value, intent(in) :: id + type(c_ptr), value, intent(in) :: nparts + end subroutine fstarpu_block_filter_depth_block + + ! void starpu_block_filter_depth_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + subroutine fstarpu_block_filter_depth_block_shadow (father_interface,child_interface,filter,id,nparts) & + bind(C,name="starpu_block_filter_depth_block_shadow") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: father_interface + type(c_ptr), value, intent(in) :: child_interface + type(c_ptr), value, intent(in) :: filter + type(c_ptr), value, intent(in) :: id + type(c_ptr), value, intent(in) :: nparts + end subroutine fstarpu_block_filter_depth_block_shadow + + + ! == starpu_data.h == + + ! void starpu_data_unregister(starpu_data_handle_t handle); + subroutine fstarpu_data_unregister (dh) bind(C,name="starpu_data_unregister") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: dh + end subroutine fstarpu_data_unregister + + ! void starpu_data_unregister_no_coherency(starpu_data_handle_t handle); + subroutine fstarpu_data_unregister_no_coherency (dh) bind(C,name="starpu_data_unregister_no_coherency") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: dh + end subroutine fstarpu_data_unregister_no_coherency + + ! void starpu_data_unregister_submit(starpu_data_handle_t handle); + subroutine fstarpu_data_unregister_submit (dh) bind(C,name="starpu_data_unregister_submit") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: dh + end subroutine fstarpu_data_unregister_submit + + ! void starpu_data_deinitialize(starpu_data_handle_t handle); + subroutine fstarpu_data_deinitialize (dh) bind(C,name="starpu_data_deinitialize") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: dh + end subroutine fstarpu_data_deinitialize + + ! void starpu_data_deinitialize_submit(starpu_data_handle_t handle); + subroutine fstarpu_data_deinitialize_submit (dh) bind(C,name="starpu_data_deinitialize_submit") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: dh + end subroutine fstarpu_data_deinitialize_submit + + ! void starpu_data_invalidate(starpu_data_handle_t handle); + subroutine fstarpu_data_invalidate (dh) bind(C,name="starpu_data_invalidate") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: dh + end subroutine fstarpu_data_invalidate + + ! void starpu_data_invalidate_submit(starpu_data_handle_t handle); + subroutine fstarpu_data_invalidate_submit (dh) bind(C,name="starpu_data_invalidate_submit") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: dh + end subroutine fstarpu_data_invalidate_submit + + ! void starpu_data_advise_as_important(starpu_data_handle_t handle, unsigned is_important); + subroutine fstarpu_data_advise_as_important (dh,is_important) bind(C,name="starpu_data_advise_as_important") + use iso_c_binding, only: c_ptr,c_int + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: is_important + end subroutine fstarpu_data_advise_as_important + + ! starpu_data_acquire: see fstarpu_data_acquire + subroutine fstarpu_data_acquire (dh, mode) bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: dh + type(c_ptr), value, intent(in) :: mode ! C function expects an intptr_t + end subroutine fstarpu_data_acquire + + ! int starpu_data_acquire_on_node(starpu_data_handle_t handle, int node, enum starpu_data_access_mode mode); + ! int starpu_data_acquire_cb(starpu_data_handle_t handle, enum starpu_data_access_mode mode, void (*callback)(void *), void *arg); + ! int starpu_data_acquire_on_node_cb(starpu_data_handle_t handle, int node, enum starpu_data_access_mode mode, void (*callback)(void *), void *arg); + ! int starpu_data_acquire_cb_sequential_consistency(starpu_data_handle_t handle, enum starpu_data_access_mode mode, void (*callback)(void *), void *arg, int sequential_consistency); + ! int starpu_data_acquire_on_node_cb_sequential_consistency(starpu_data_handle_t handle, int node, enum starpu_data_access_mode mode, void (*callback)(void *), void *arg, int sequential_consistency); + + ! void starpu_data_release(starpu_data_handle_t handle); + subroutine fstarpu_data_release (dh) bind(C,name="starpu_data_release") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: dh + end subroutine fstarpu_data_release + + ! void starpu_data_release_on_node(starpu_data_handle_t handle, int node); + subroutine fstarpu_data_release_on_node (dh, node) bind(C,name="starpu_data_release_on_node") + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: node + end subroutine fstarpu_data_release_on_node + + ! starpu_arbiter_t starpu_arbiter_create(void) STARPU_ATTRIBUTE_MALLOC; + function fstarpu_arbiter_create () bind(C,name="starpu_arbiter_create") + use iso_c_binding, only: c_ptr + type(c_ptr) :: fstarpu_arbiter_create + end function fstarpu_arbiter_create + + ! void starpu_data_assign_arbiter(starpu_data_handle_t handle, starpu_arbiter_t arbiter); + subroutine fstarpu_data_assign_arbiter (dh,arbiter) bind(C,name="starpu_data_assign_arbiter") + use iso_c_binding, only: c_ptr + type(c_ptr), intent(out) :: dh + type(c_ptr), value, intent(in) :: arbiter + end subroutine fstarpu_data_assign_arbiter + + ! void starpu_arbiter_destroy(starpu_arbiter_t arbiter); + subroutine fstarpu_arbiter_destroy (arbiter) bind(C,name="starpu_arbiter_destroy") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: arbiter + end subroutine fstarpu_arbiter_destroy + + ! void starpu_data_display_memory_stats(); + subroutine fstarpu_display_memory_stats() bind(C,name="starpu_display_memory_stats") + end subroutine fstarpu_display_memory_stats + + ! int starpu_data_request_allocation(starpu_data_handle_t handle, unsigned node); + subroutine fstarpu_data_request_allocation (dh, node) & + bind(C,name="starpu_data_request_allocation") + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: node + end subroutine fstarpu_data_request_allocation + + ! int starpu_data_fetch_on_node(starpu_data_handle_t handle, unsigned node, unsigned async); + subroutine fstarpu_data_fetch_on_node (dh, node, async) & + bind(C,name="starpu_data_fetch_on_node") + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: node + integer(c_int), value, intent(in) :: async + end subroutine fstarpu_data_fetch_on_node + + ! int starpu_data_prefetch_on_node(starpu_data_handle_t handle, unsigned node, unsigned async); + subroutine fstarpu_data_prefetch_on_node (dh, node, async) & + bind(C,name="starpu_data_prefetch_on_node") + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: node + integer(c_int), value, intent(in) :: async + end subroutine fstarpu_data_prefetch_on_node + + ! int starpu_data_prefetch_on_node_prio(starpu_data_handle_t handle, unsigned node, unsigned async, int prio); + subroutine fstarpu_data_prefetch_on_node_prio (dh, node, async, prio) & + bind(C,name="starpu_data_prefetch_on_node_prio") + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: node + integer(c_int), value, intent(in) :: async + integer(c_int), value, intent(in) :: prio + end subroutine fstarpu_data_prefetch_on_node_prio + + ! int starpu_data_idle_prefetch_on_node(starpu_data_handle_t handle, unsigned node, unsigned async); + subroutine fstarpu_data_idle_prefetch_on_node (dh, node, async) & + bind(C,name="starpu_data_idle_prefetch_on_node") + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: node + integer(c_int), value, intent(in) :: async + end subroutine fstarpu_data_idle_prefetch_on_node + + ! int starpu_data_idle_prefetch_on_node_prio(starpu_data_handle_t handle, unsigned node, unsigned async, int prio); + subroutine fstarpu_data_idle_prefetch_on_node_prio (dh, node, async, prio) & + bind(C,name="starpu_data_idle_prefetch_on_node_prio") + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: node + integer(c_int), value, intent(in) :: async + integer(c_int), value, intent(in) :: prio + end subroutine fstarpu_data_idle_prefetch_on_node_prio + + !unsigned starpu_data_is_on_node(starpu_data_handle_t handle, unsigned node); + function fstarpu_data_is_on_node(dh, node) & + bind(C,name="starpu_data_is_on_node") + use iso_c_binding, only: c_ptr, c_int + integer(c_int) :: fstarpu_data_is_on_node + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: node + end function fstarpu_data_is_on_node + + ! void starpu_data_wont_use(starpu_data_handle_t handle); + subroutine fstarpu_data_wont_use (dh) bind(c,name="starpu_data_wont_use") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: dh + end subroutine fstarpu_data_wont_use + + ! unsigned starpu_worker_get_memory_node(unsigned workerid); + function fstarpu_worker_get_memory_node(id) bind(C,name="starpu_worker_get_memory_node") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_worker_get_memory_node + integer(c_int), value, intent(in) :: id + end function fstarpu_worker_get_memory_node + + ! unsigned starpu_memory_nodes_get_count(void); + function fstarpu_memory_nodes_get_count() bind(C,name="starpu_memory_nodes_get_count") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_memory_nodes_get_count + end function fstarpu_memory_nodes_get_count + + ! enum starpu_node_kind starpu_node_get_kind(unsigned node); + ! void starpu_data_set_wt_mask(starpu_data_handle_t handle, uint32_t wt_mask); + ! void starpu_data_set_sequential_consistency_flag(starpu_data_handle_t handle, unsigned flag); + ! unsigned starpu_data_get_sequential_consistency_flag(starpu_data_handle_t handle); + ! unsigned starpu_data_get_default_sequential_consistency_flag(void); + ! void starpu_data_set_default_sequential_consistency_flag(unsigned flag); + ! void starpu_data_query_status(starpu_data_handle_t handle, int memory_node, int *is_allocated, int *is_valid, int *is_requested); + + ! void starpu_data_set_reduction_methods(starpu_data_handle_t handle, struct starpu_codelet *redux_cl, struct starpu_codelet *init_cl); + subroutine fstarpu_data_set_reduction_methods (dh,redux_cl,init_cl) bind(C,name="starpu_data_set_reduction_methods") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: dh + type(c_ptr), value, intent(in) :: redux_cl + type(c_ptr), value, intent(in) :: init_cl + end subroutine fstarpu_data_set_reduction_methods + + ! void starpu_data_set_reduction_methods_with_args(starpu_data_handle_t handle, struct starpu_codelet *redux_cl, void *redux_args, struct starpu_codelet *init_cl, void *init_args) + subroutine fstarpu_data_set_reduction_methods_with_args (dh,redux_cl,redux_args,init_cl,init_args) & + bind(C,name="starpu_data_set_reduction_methods_with_args") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: dh + type(c_ptr), value, intent(in) :: redux_cl + type(c_ptr), value, intent(in) :: redux_args + type(c_ptr), value, intent(in) :: init_cl + type(c_ptr), value, intent(in) :: init_args + end subroutine fstarpu_data_set_reduction_methods_with_args + + ! struct starpu_data_interface_ops* starpu_data_get_interface_ops(starpu_data_handle_t handle); + + ! unsigned starpu_data_test_if_allocated_on_node(starpu_data_handle_t handle, unsigned memory_node); + function fstarpu_data_test_if_allocated_on_node(dh,mem_node) bind(C,name="starpu_data_test_if_allocated_on_node") + use iso_c_binding, only: c_ptr, c_int + integer(c_int) :: fstarpu_data_test_if_allocated_on_node + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: mem_node + end function fstarpu_data_test_if_allocated_on_node + + ! void starpu_memchunk_tidy(unsigned memory_node); + subroutine fstarpu_memchunk_tidy (mem_node) bind(c,name="starpu_memchunk_tidy") + use iso_c_binding, only: c_int + integer(c_int), value, intent(in) :: mem_node + end subroutine fstarpu_memchunk_tidy + + ! == starpu_task_util.h == + + ! starpu_data_handle_t *fstarpu_data_handle_array_alloc(int nb); + function fstarpu_data_handle_array_alloc (nb) bind(C) + use iso_c_binding, only: c_ptr, c_int + type(c_ptr) :: fstarpu_data_handle_array_alloc + integer(c_int), value, intent(in) :: nb + end function fstarpu_data_handle_array_alloc + + ! void fstarpu_data_handle_array_free(starpu_data_handle_t *handles); + subroutine fstarpu_data_handle_array_free (handles) bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: handles + end subroutine fstarpu_data_handle_array_free + + ! void fstarpu_data_handle_array_set(starpu_data_handle_t *handles, int i, starpu_data_handle_t handle); + subroutine fstarpu_data_handle_array_set (handles, i, handle) bind(C) + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: handles + integer(c_int), value, intent(in) :: i + type(c_ptr), value, intent(in) :: handle + end subroutine fstarpu_data_handle_array_set + + ! struct starpu_data_descr *fstarpu_data_descr_array_alloc(int nb); + function fstarpu_data_descr_array_alloc (nb) bind(C) + use iso_c_binding, only: c_ptr, c_int + type(c_ptr) :: fstarpu_data_descr_array_alloc + integer(c_int), value, intent(in) :: nb + end function fstarpu_data_descr_array_alloc + + ! struct starpu_data_descr *fstarpu_data_descr_alloc(void); + function fstarpu_data_descr_alloc () bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr) :: fstarpu_data_descr_alloc + end function fstarpu_data_descr_alloc + + ! void fstarpu_data_descr_array_free(struct starpu_data_descr *descrs); + subroutine fstarpu_data_descr_array_free (descrs) bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: descrs + end subroutine fstarpu_data_descr_array_free + + ! void fstarpu_data_descr_free(struct starpu_data_descr *descr); + subroutine fstarpu_data_descrg_free (descr) bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: descr + end subroutine fstarpu_data_descrg_free + + ! void fstarpu_data_descr_array_set(struct starpu_data_descr *descrs, int i, starpu_data_handle_t handle, intptr_t mode); + subroutine fstarpu_data_descr_array_set (descrs, i, handle, mode) bind(C) + use iso_c_binding, only: c_ptr, c_int, c_intptr_t + type(c_ptr), value, intent(in) :: descrs + integer(c_int), value, intent(in) :: i + type(c_ptr), value, intent(in) :: handle + type(c_ptr), value, intent(in) :: mode ! C func expects c_intptr_t + end subroutine fstarpu_data_descr_array_set + + ! void fstarpu_data_descr_set(struct starpu_data_descr *descr, starpu_data_handle_t handle, intptr_t mode); + subroutine fstarpu_data_descr_set (descr, handle, mode) bind(C) + use iso_c_binding, only: c_ptr, c_intptr_t + type(c_ptr), value, intent(in) :: descr + type(c_ptr), value, intent(in) :: handle + type(c_ptr), value, intent(in) :: mode ! C func expects c_intptr_t + end subroutine fstarpu_data_descr_set + + + subroutine fstarpu_task_insert(arglist) bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr), dimension(*), intent(in) :: arglist + end subroutine fstarpu_task_insert + subroutine fstarpu_insert_task(arglist) bind(C,name="fstarpu_task_insert") + use iso_c_binding, only: c_ptr + type(c_ptr), dimension(*), intent(in) :: arglist + end subroutine fstarpu_insert_task + + subroutine fstarpu_unpack_arg(cl_arg,bufferlist) bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: cl_arg + type(c_ptr), dimension(*), intent(in) :: bufferlist + end subroutine fstarpu_unpack_arg + + ! void starpu_create_sync_task(starpu_tag_t sync_tag, unsigned ndeps, starpu_tag_t *deps, void (*callback)(void *), void *callback_arg) + subroutine fstarpu_create_sync_task(sync_tag, ndeps, tag_array, callback, callback_arg) & + bind(C,name="starpu_create_sync_task") + use iso_c_binding, only: c_int, c_long_long, c_ptr, c_funptr + integer(c_int), value, intent(in) :: sync_tag + integer(c_int), value, intent(in) :: ndeps + integer(c_long_long), intent(in) :: tag_array(*) + type(c_funptr), value, intent(in) :: callback + type(c_ptr), value, intent(in) :: callback_arg + end subroutine fstarpu_create_sync_task + + ! == starpu_sched_ctx.h == + + ! starpu_sched_ctx_create: see fstarpu_sched_ctx_create + function fstarpu_sched_ctx_create(workers_array,nworkers,ctx_name, arglist) bind(C) + use iso_c_binding, only: c_int, c_char, c_ptr + integer(c_int) :: fstarpu_sched_ctx_create + integer(c_int), intent(in) :: workers_array(*) + integer(c_int), value, intent(in) :: nworkers + character(c_char), intent(in) :: ctx_name + type(c_ptr), dimension(*), intent(in) :: arglist + end function fstarpu_sched_ctx_create + + ! unsigned starpu_sched_ctx_create_inside_interval(const char *policy_name, const char *sched_ctx_name, int min_ncpus, int max_ncpus, int min_ngpus, int max_ngpus, unsigned allow_overlap); + function fstarpu_sched_ctx_create_inside_interval(policy_name, sched_ctx_name, & + min_ncpus, max_ncpus, min_ngpus, max_ngpus, allow_overlap) & + bind(C,name="starpu_sched_ctx_create_inside_interval") + use iso_c_binding, only: c_int, c_char + integer(c_int) :: fstarpu_sched_ctx_create_inside_interval + character(c_char), intent(in) :: policy_name + character(c_char), intent(in) :: sched_ctx_name + integer(c_int), value, intent(in) :: min_ncpus + integer(c_int), value, intent(in) :: max_ncpus + integer(c_int), value, intent(in) :: min_ngpus + integer(c_int), value, intent(in) :: max_ngpus + integer(c_int), value, intent(in) :: allow_overlap + end function fstarpu_sched_ctx_create_inside_interval + + ! void starpu_sched_ctx_register_close_callback(unsigned sched_ctx_id, void (*close_callback)(unsigned sched_ctx_id, void* args), void *args); + subroutine fstarpu_sched_ctx_register_close_callback (sched_ctx_id, close_callback, args) & + bind(c,name="starpu_sched_ctx_register_close_callback") + use iso_c_binding, only: c_ptr, c_funptr, c_int + integer(c_int), value, intent(in) :: sched_ctx_id + type(c_funptr), value, intent(in) :: close_callback + type(c_ptr), value, intent(in) :: args + end subroutine fstarpu_sched_ctx_register_close_callback + + ! void starpu_sched_ctx_add_workers(int *workerids_ctx, int nworkers_ctx, unsigned sched_ctx_id); + subroutine fstarpu_sched_ctx_add_workers(workerids,nworkers,ctx) bind(C,name="starpu_sched_ctx_add_workers") + use iso_c_binding, only: c_int + integer(c_int), intent(in) :: workerids (*) + integer(c_int), value, intent(in) :: nworkers + integer(c_int), value, intent(in) :: ctx + end subroutine fstarpu_sched_ctx_add_workers + + ! void starpu_sched_ctx_remove_workers(int *workerids_ctx, int nworkers_ctx, unsigned sched_ctx_id); + subroutine fstarpu_sched_ctx_remove_workers(workerids,nworkers,ctx) bind(C,name="starpu_sched_ctx_remove_workers") + use iso_c_binding, only: c_int + integer(c_int), intent(in) :: workerids (*) + integer(c_int), value, intent(in) :: nworkers + integer(c_int), value, intent(in) :: ctx + end subroutine fstarpu_sched_ctx_remove_workers + + ! starpu_sched_ctx_display_workers: see fstarpu_sched_ctx_display_workers + subroutine fstarpu_sched_ctx_display_workers (ctx) bind(C) + use iso_c_binding, only: c_int + integer(c_int), value, intent(in) :: ctx + end subroutine fstarpu_sched_ctx_display_workers + + ! void starpu_sched_ctx_delete(unsigned sched_ctx_id); + subroutine fstarpu_sched_ctx_delete (ctx) bind(C,name="starpu_sched_ctx_delete") + use iso_c_binding, only: c_int + integer(c_int), value, intent(in) :: ctx + end subroutine fstarpu_sched_ctx_delete + + ! void starpu_sched_ctx_set_inheritor(unsigned sched_ctx_id, unsigned inheritor); + subroutine fstarpu_sched_ctx_set_inheritor (ctx,inheritor) bind(C,name="starpu_sched_ctx_set_inheritor") + use iso_c_binding, only: c_int + integer(c_int), value, intent(in) :: ctx + integer(c_int), value, intent(in) :: inheritor + end subroutine fstarpu_sched_ctx_set_inheritor + + ! unsigned starpu_sched_ctx_get_inheritor(unsigned sched_ctx_id); + function fstarpu_sched_ctx_get_inheritor (ctx) bind(C,name="starpu_sched_ctx_get_inheritor") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_sched_ctx_get_inheritor + integer(c_int), value, intent(in) :: ctx + end function fstarpu_sched_ctx_get_inheritor + + ! unsigned starpu_sched_ctx_get_hierarchy_level(unsigned sched_ctx_id); + function fstarpu_sched_ctx_get_hierarchy_level (ctx) bind(C,name="starpu_sched_ctx_get_hierarchy_level") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_sched_ctx_get_hierarchy_level + integer(c_int), value, intent(in) :: ctx + end function fstarpu_sched_ctx_get_hierarchy_level + + ! void starpu_sched_ctx_set_context(unsigned *sched_ctx_id); + subroutine fstarpu_sched_ctx_set_context (ctx_ptr) bind(C,name="starpu_sched_ctx_set_context") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: ctx_ptr + end subroutine fstarpu_sched_ctx_set_context + + ! unsigned starpu_sched_ctx_get_context(void); + function fstarpu_sched_ctx_get_context () bind(C,name="starpu_sched_ctx_get_context") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_sched_ctx_get_context + end function fstarpu_sched_ctx_get_context + + ! void starpu_sched_ctx_stop_task_submission(void); + subroutine fstarpu_sched_ctx_stop_task_submission () bind(c,name="starpu_sched_ctx_stop_task_submission") + use iso_c_binding + end subroutine fstarpu_sched_ctx_stop_task_submission + + ! void starpu_sched_ctx_finished_submit(unsigned sched_ctx_id); + subroutine fstarpu_sched_ctx_finished_submit (sched_ctx_id) bind(c,name="starpu_sched_ctx_finished_submit") + use iso_c_binding, only: c_int + integer(c_int), value, intent(in) :: sched_ctx_id + end subroutine fstarpu_sched_ctx_finished_submit + + ! unsigned starpu_sched_ctx_get_workers_list(unsigned sched_ctx_id, int **workerids); + ! unsigned starpu_sched_ctx_get_workers_list_raw(unsigned sched_ctx_id, int **workerids); + + ! unsigned starpu_sched_ctx_get_nworkers(unsigned sched_ctx_id); + function fstarpu_sched_ctx_get_nworkers (sched_ctx_id) & + bind(c,name="starpu_sched_ctx_get_nworkers") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_sched_ctx_get_nworkers + integer(c_int), value, intent(in) :: sched_ctx_id + end function fstarpu_sched_ctx_get_nworkers + + ! unsigned starpu_sched_ctx_get_nshared_workers(unsigned sched_ctx_id, unsigned sched_ctx_id2); + function fstarpu_sched_ctx_get_nshared_workers (sched_ctx_id, sched_ctx_id2) & + bind(c,name="starpu_sched_ctx_get_nshared_workers") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_sched_ctx_get_nshared_workers + integer(c_int), value, intent(in) :: sched_ctx_id + integer(c_int), value, intent(in) :: sched_ctx_id2 + end function fstarpu_sched_ctx_get_nshared_workers + + ! unsigned starpu_sched_ctx_contains_worker(int workerid, unsigned sched_ctx_id); + function fstarpu_sched_ctx_contains_worker (workerid, sched_ctx_id) & + bind(c,name="starpu_sched_ctx_contains_worker") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_sched_ctx_contains_worker + integer(c_int), value, intent(in) :: workerid + integer(c_int), value, intent(in) :: sched_ctx_id + end function fstarpu_sched_ctx_contains_worker + + ! unsigned starpu_sched_ctx_contains_type_of_worker(enum starpu_worker_archtype arch, unsigned sched_ctx_id); + function fstarpu_sched_ctx_contains_type_of_worker (arch, sched_ctx_id) & + bind(c,name="starpu_sched_ctx_contains_type_of_worker") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_sched_ctx_contains_type_of_worker + integer(c_int), value, intent(in) :: arch + integer(c_int), value, intent(in) :: sched_ctx_id + end function fstarpu_sched_ctx_contains_type_of_worker + + ! unsigned starpu_sched_ctx_worker_get_id(unsigned sched_ctx_id); + function fstarpu_sched_ctx_worker_get_id (sched_ctx_id) & + bind(c,name="starpu_sched_ctx_worker_get_id") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_sched_ctx_worker_get_id + integer(c_int), value, intent(in) :: sched_ctx_id + end function fstarpu_sched_ctx_worker_get_id + + ! unsigned starpu_sched_ctx_get_ctx_for_task(struct starpu_task *task); + function fstarpu_sched_ctx_get_ctx_for_task (task) & + bind(c,name="starpu_sched_ctx_get_ctx_for_task") + use iso_c_binding, only: c_int, c_ptr + integer(c_int) :: fstarpu_sched_ctx_get_ctx_for_task + type(c_ptr), value, intent(in) :: task + end function fstarpu_sched_ctx_get_ctx_for_task + + ! unsigned starpu_sched_ctx_overlapping_ctxs_on_worker(int workerid); + function fstarpu_sched_ctx_overlapping_ctxs_on_worker (workerid) & + bind(c,name="starpu_sched_ctx_overlapping_ctxs_on_worker") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_sched_ctx_overlapping_ctxs_on_worker + integer(c_int), value, intent(in) :: workerid + end function fstarpu_sched_ctx_overlapping_ctxs_on_worker + + ! int starpu_sched_get_min_priority(void); + function fstarpu_sched_get_min_priority () & + bind(c,name="starpu_sched_get_min_priority") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_sched_get_min_priority + end function fstarpu_sched_get_min_priority + + ! int starpu_sched_get_max_priority(void); + function fstarpu_sched_get_max_priority () & + bind(c,name="starpu_sched_get_max_priority") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_sched_get_max_priority + end function fstarpu_sched_get_max_priority + + ! int starpu_sched_set_min_priority(int min_prio); + function fstarpu_sched_set_min_priority (min_prio) & + bind(c,name="starpu_sched_set_min_priority") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_sched_set_min_priority + integer(c_int), value, intent(in) :: min_prio + end function fstarpu_sched_set_min_priority + + ! int starpu_sched_set_max_priority(int max_prio); + function fstarpu_sched_set_max_priority (max_prio) & + bind(c,name="starpu_sched_set_max_priority") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_sched_set_max_priority + integer(c_int), value, intent(in) :: max_prio + end function fstarpu_sched_set_max_priority + + ! int starpu_sched_ctx_get_min_priority(unsigned sched_ctx_id); + function fstarpu_sched_ctx_get_min_priority (sched_ctx_id) & + bind(c,name="starpu_sched_ctx_get_min_priority") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_sched_ctx_get_min_priority + integer(c_int), value, intent(in) :: sched_ctx_id + end function fstarpu_sched_ctx_get_min_priority + + ! int starpu_sched_ctx_get_max_priority(unsigned sched_ctx_id); + function fstarpu_sched_ctx_get_max_priority (sched_ctx_id) & + bind(c,name="starpu_sched_ctx_get_max_priority") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_sched_ctx_get_max_priority + integer(c_int), value, intent(in) :: sched_ctx_id + end function fstarpu_sched_ctx_get_max_priority + + ! int starpu_sched_ctx_set_min_priority(unsigned sched_ctx_id, int min_prio); + function fstarpu_sched_ctx_set_min_priority (sched_ctx_id, min_prio) & + bind(c,name="starpu_sched_ctx_set_min_priority") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_sched_ctx_set_min_priority + integer(c_int), value, intent(in) :: sched_ctx_id + integer(c_int), value, intent(in) :: min_prio + end function fstarpu_sched_ctx_set_min_priority + + ! int starpu_sched_ctx_set_max_priority(unsigned sched_ctx_id, int max_prio); + function fstarpu_sched_ctx_set_max_priority (sched_ctx_id, max_prio) & + bind(c,name="starpu_sched_ctx_set_max_priority") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_sched_ctx_set_max_priority + integer(c_int), value, intent(in) :: sched_ctx_id + integer(c_int), value, intent(in) :: max_prio + end function fstarpu_sched_ctx_set_max_priority + + ! int starpu_sched_ctx_min_priority_is_set(unsigned sched_ctx_id); + function fstarpu_sched_ctx_min_priority_is_set (sched_ctx_id) & + bind(c,name="starpu_sched_ctx_min_priority_is_set") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_sched_ctx_min_priority_is_set + integer(c_int), value, intent(in) :: sched_ctx_id + end function fstarpu_sched_ctx_min_priority_is_set + + ! int starpu_sched_ctx_max_priority_is_set(unsigned sched_ctx_id); + function fstarpu_sched_ctx_max_priority_is_set (sched_ctx_id) & + bind(c,name="starpu_sched_ctx_max_priority_is_set") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_sched_ctx_max_priority_is_set + integer(c_int), value, intent(in) :: sched_ctx_id + end function fstarpu_sched_ctx_max_priority_is_set + + ! void *starpu_sched_ctx_get_user_data(unsigned sched_ctx_id); + function fstarpu_sched_ctx_get_user_data(sched_ctx_id) & + bind(c,name="starpu_sched_ctx_get_user_data") + use iso_c_binding, only: c_int, c_ptr + integer(c_int), value, intent(in) :: sched_ctx_id + type(c_ptr) :: fstarpu_sched_ctx_get_user_data + end function fstarpu_sched_ctx_get_user_data + + ! struct starpu_worker_collection *starpu_sched_ctx_create_worker_collection(unsigned sched_ctx_id, enum starpu_worker_collection_type type) STARPU_ATTRIBUTE_MALLOC; + + ! void starpu_sched_ctx_delete_worker_collection(unsigned sched_ctx_id); + subroutine fstarpu_sched_ctx_delete_worker_collection (sched_ctx_id) & + bind(c,name="starpu_sched_ctx_delete_worker_collection") + use iso_c_binding, only: c_int + integer(c_int), value, intent(in) :: sched_ctx_id + end subroutine fstarpu_sched_ctx_delete_worker_collection + + ! struct starpu_worker_collection *starpu_sched_ctx_get_worker_collection(unsigned sched_ctx_id); + + ! void starpu_sched_ctx_set_policy_data(unsigned sched_ctx_id, void *policy_data); + subroutine fstarpu_sched_ctx_set_policy_data (sched_ctx_id, policy_data) & + bind(c,name="starpu_sched_ctx_set_policy_data") + use iso_c_binding, only: c_int, c_ptr + integer(c_int), value, intent(in) :: sched_ctx_id + type(c_ptr), value, intent(in) :: policy_data + end subroutine fstarpu_sched_ctx_set_policy_data + + ! void *starpu_sched_ctx_get_policy_data(unsigned sched_ctx_id); + function fstarpu_sched_ctx_get_policy_data (sched_ctx_id) & + bind(c,name="starpu_sched_ctx_get_policy_data") + use iso_c_binding, only: c_int, c_ptr + type(c_ptr) :: fstarpu_sched_ctx_get_policy_data + integer(c_int), value, intent(in) :: sched_ctx_id + end function fstarpu_sched_ctx_get_policy_data + + ! void *starpu_sched_ctx_exec_parallel_code(void* (*func)(void*), void *param, unsigned sched_ctx_id); + function fstarpu_sched_ctx_exec_parallel_code (func, param, sched_ctx_id) & + bind(c,name="starpu_sched_ctx_exec_parallel_code") + use iso_c_binding, only: c_int, c_funptr, c_ptr + type(c_ptr) :: fstarpu_sched_ctx_exec_parallel_code + type(c_funptr), value, intent(in) :: func + type(c_ptr), value, intent(in) :: param + integer(c_int), value, intent(in) :: sched_ctx_id + end function fstarpu_sched_ctx_exec_parallel_code + + + ! int starpu_sched_ctx_get_nready_tasks(unsigned sched_ctx_id); + function fstarpu_sched_ctx_get_nready_tasks (sched_ctx_id) & + bind(c,name="starpu_sched_ctx_get_nready_tasks") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_sched_ctx_get_nready_tasks + integer(c_int), value, intent(in) :: sched_ctx_id + end function fstarpu_sched_ctx_get_nready_tasks + + ! double starpu_sched_ctx_get_nready_flops(unsigned sched_ctx_id); + function fstarpu_sched_ctx_get_nready_flops (sched_ctx_id) & + bind(c,name="starpu_sched_ctx_get_nready_flops") + use iso_c_binding, only: c_int, c_double + real(c_double) :: fstarpu_sched_ctx_get_nready_flops + integer(c_int), value, intent(in) :: sched_ctx_id + end function fstarpu_sched_ctx_get_nready_flops + + ! void starpu_sched_ctx_list_task_counters_increment(unsigned sched_ctx_id, int workerid); + subroutine fstarpu_sched_ctx_list_task_counters_increment (sched_ctx_id, workerid) & + bind(c,name="starpu_sched_ctx_list_task_counters_increment") + use iso_c_binding, only: c_int + integer(c_int), value, intent(in) :: sched_ctx_id + integer(c_int), value, intent(in) :: workerid + end subroutine fstarpu_sched_ctx_list_task_counters_increment + + ! void starpu_sched_ctx_list_task_counters_decrement(unsigned sched_ctx_id, int workerid); + subroutine fstarpu_sched_ctx_list_task_counters_decrement (sched_ctx_id, workerid) & + bind(c,name="starpu_sched_ctx_list_task_counters_decrement") + use iso_c_binding, only: c_int + integer(c_int), value, intent(in) :: sched_ctx_id + integer(c_int), value, intent(in) :: workerid + + end subroutine fstarpu_sched_ctx_list_task_counters_decrement + + ! void starpu_sched_ctx_list_task_counters_reset(unsigned sched_ctx_id, int workerid); + subroutine fstarpu_sched_ctx_list_task_counters_reset (sched_ctx_id, workerid) & + bind(c,name="starpu_sched_ctx_list_task_counters_reset") + use iso_c_binding, only: c_int + integer(c_int), value, intent(in) :: sched_ctx_id + integer(c_int), value, intent(in) :: workerid + + end subroutine fstarpu_sched_ctx_list_task_counters_reset + + ! void starpu_sched_ctx_list_task_counters_increment_all(struct starpu_task *task, unsigned sched_ctx_id); + subroutine fstarpu_sched_ctx_list_task_counters_increment_all (task, sched_ctx_id) & + bind(c,name="starpu_sched_ctx_list_task_counters_increment_all") + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: task + integer(c_int), value, intent(in) :: sched_ctx_id + end subroutine fstarpu_sched_ctx_list_task_counters_increment_all + + ! void starpu_sched_ctx_list_task_counters_decrement_all(struct starpu_task *task, unsigned sched_ctx_id); + subroutine fstarpu_sched_ctx_list_task_counters_decrement_all (task, sched_ctx_id) & + bind(c,name="starpu_sched_ctx_list_task_counters_decrement_all") + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: task + integer(c_int), value, intent(in) :: sched_ctx_id + end subroutine fstarpu_sched_ctx_list_task_counters_decrement_all + + ! void starpu_sched_ctx_list_task_counters_reset_all(struct starpu_task *task, unsigned sched_ctx_id); + subroutine fstarpu_sched_ctx_list_task_counters_reset_all (task, sched_ctx_id) & + bind(c,name="starpu_sched_ctx_list_task_counters_reset_all") + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: task + integer(c_int), value, intent(in) :: sched_ctx_id + end subroutine fstarpu_sched_ctx_list_task_counters_reset_all + + ! unsigned starpu_sched_ctx_get_priority(int worker, unsigned sched_ctx_id); + function fstarpu_sched_ctx_get_priority (worker, sched_ctx_id) & + bind(c,name="starpu_sched_ctx_get_priority") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_sched_ctx_get_priority + integer(c_int), value, intent(in) :: worker + integer(c_int), value, intent(in) :: sched_ctx_id + end function fstarpu_sched_ctx_get_priority + + ! void starpu_sched_ctx_get_available_cpuids(unsigned sched_ctx_id, int **cpuids, int *ncpuids); + + ! void starpu_sched_ctx_bind_current_thread_to_cpuid(unsigned cpuid); + subroutine fstarpu_sched_ctx_bind_current_thread_to_cpuid (cpuid) & + bind(c,name="starpu_sched_ctx_bind_current_thread_to_cpuid") + use iso_c_binding, only: c_int + integer(c_int), value, intent(in) :: cpuid + end subroutine fstarpu_sched_ctx_bind_current_thread_to_cpuid + + ! int starpu_sched_ctx_book_workers_for_task(unsigned sched_ctx_id, int *workerids, int nworkers); + function fstarpu_sched_ctx_book_workers_for_task (sched_ctx_id, workerids, nworkers) & + bind(c,name="starpu_sched_ctx_book_workers_for_task") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_sched_ctx_book_workers_for_task + integer(c_int), value, intent(in) :: sched_ctx_id + integer(c_int), intent(in) :: workerids(*) + integer(c_int), value, intent(in) :: nworkers + end function fstarpu_sched_ctx_book_workers_for_task + + ! void starpu_sched_ctx_unbook_workers_for_task(unsigned sched_ctx_id, int master); + subroutine fstarpu_sched_ctx_unbook_workers_for_task (sched_ctx_id, master) & + bind(c,name="starpu_sched_ctx_unbook_workers_for_task") + use iso_c_binding, only: c_int + integer(c_int), value, intent(in) :: sched_ctx_id + integer(c_int), value, intent(in) :: master + end subroutine fstarpu_sched_ctx_unbook_workers_for_task + + ! unsigned starpu_sched_ctx_worker_is_master_for_child_ctx(int workerid, unsigned sched_ctx_id); + function fstarpu_sched_ctx_worker_is_master_for_child_ctx (workerid, sched_ctx_id) & + bind(c,name="starpu_sched_ctx_worker_is_master_for_child_ctx") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_sched_ctx_worker_is_master_for_child_ctx + integer(c_int), value, intent(in) :: workerid + integer(c_int), value, intent(in) :: sched_ctx_id + end function fstarpu_sched_ctx_worker_is_master_for_child_ctx + + ! unsigned starpu_sched_ctx_master_get_context(int masterid); + function fstarpu_sched_ctx_master_get_context (masterid) & + bind(c,name="starpu_sched_ctx_master_get_context") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_sched_ctx_master_get_context + integer(c_int), value, intent(in) :: masterid + end function fstarpu_sched_ctx_master_get_context + + ! void starpu_sched_ctx_revert_task_counters(unsigned sched_ctx_id, double flops); + subroutine fstarpu_sched_ctx_revert_task_counters (sched_ctx_id, flops) & + bind(c,name="starpu_sched_ctx_revert_task_counters") + use iso_c_binding, only: c_int, c_double + integer(c_int), value, intent(in) :: sched_ctx_id + real(c_double), value, intent(in) :: flops + end subroutine fstarpu_sched_ctx_revert_task_counters + + ! void starpu_sched_ctx_move_task_to_ctx(struct starpu_task *task, unsigned sched_ctx, unsigned manage_mutex); + subroutine fstarpu_sched_ctx_move_task_to_ctx (task, sched_ctx, manage_mutex) & + bind(c,name="starpu_sched_ctx_move_task_to_ctx") + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: task + integer(c_int), value, intent(in) :: sched_ctx + integer(c_int), value, intent(in) :: manage_mutex + end subroutine fstarpu_sched_ctx_move_task_to_ctx + + ! int starpu_sched_ctx_get_worker_rank(unsigned sched_ctx_id); + function fstarpu_sched_ctx_get_worker_rank (sched_ctx_id) & + bind(c,name="starpu_sched_ctx_get_worker_rank") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_sched_ctx_get_worker_rank + integer(c_int), value, intent(in) :: sched_ctx_id + end function fstarpu_sched_ctx_get_worker_rank + + ! unsigned starpu_sched_ctx_has_starpu_scheduler(unsigned sched_ctx_id, unsigned *awake_workers); + + ! void starpu_sched_ctx_call_pushed_task_cb(int workerid, unsigned sched_ctx_id); + subroutine fstarpu_sched_ctx_call_pushed_task_cb (workerid, sched_ctx_id) & + bind(c,name="starpu_sched_ctx_call_pushed_task_cb") + use iso_c_binding, only: c_int + integer(c_int), value, intent(in) :: workerid + integer(c_int), value, intent(in) :: sched_ctx_id + end subroutine fstarpu_sched_ctx_call_pushed_task_cb + + ! == starpu_fxt.h == + + ! void starpu_fxt_options_init(struct starpu_fxt_options *options); + subroutine fstarpu_fxt_options_init (fxt_options) bind(C,name="starpu_fxt_options_init") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: fxt_options + end subroutine fstarpu_fxt_options_init + + ! void starpu_fxt_generate_trace(struct starpu_fxt_options *options); + subroutine fstarpu_fxt_generate_trace (fxt_options) bind(C,name="starpu_fxt_generate_trace") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: fxt_options + end subroutine fstarpu_fxt_generate_trace + + ! void starpu_fxt_autostart_profiling(int autostart); + subroutine fstarpu_fxt_autostart_profiling (autostart) bind(c,name="starpu_fxt_autostart_profiling") + use iso_c_binding, only: c_int + integer(c_int), value, intent(in) :: autostart + end subroutine fstarpu_fxt_autostart_profiling + + ! void starpu_fxt_start_profiling(void); + subroutine fstarpu_fxt_start_profiling () bind(c,name="starpu_fxt_start_profiling") + use iso_c_binding + end subroutine fstarpu_fxt_start_profiling + + ! void starpu_fxt_stop_profiling(void); + subroutine fstarpu_fxt_stop_profiling () bind(c,name="starpu_fxt_stop_profiling") + use iso_c_binding + end subroutine fstarpu_fxt_stop_profiling + + ! void starpu_fxt_write_data_trace(char *filename_in); + subroutine fstarpu_fxt_write_data_trace (filename) bind(c,name="starpu_fxt_write_data_trace") + use iso_c_binding, only: c_char + character(c_char), intent(in) :: filename + end subroutine fstarpu_fxt_write_data_trace + + ! void starpu_fxt_trace_user_event(unsigned long code); + subroutine fstarpu_trace_user_event (code) bind(c,name="starpu_trace_user_event") + use iso_c_binding, only: c_long + integer(c_long), value, intent(in) :: code + end subroutine fstarpu_trace_user_event + + ! double starpu_timing_now(void) + function fstarpu_timing_now () bind(C,name="starpu_timing_now") + use iso_c_binding, only: c_double + real(c_double) :: fstarpu_timing_now + end function fstarpu_timing_now + + ! == starpu_cuda.h == + + ! cudaStream_t starpu_cuda_get_local_stream(void); + function fstarpu_cuda_get_local_stream () bind(C,name="starpu_cuda_get_local_stream") + use iso_c_binding, only: c_ptr + type(c_ptr) :: fstarpu_cuda_get_local_stream + end function fstarpu_cuda_get_local_stream + + ! == starpu_stdlib.h == + + ! int starpu_malloc(void **A, size_t dim); + function fstarpu_malloc (ptr, len) bind(C,name="starpu_malloc") + use iso_c_binding, only: c_ptr, c_long, c_int + type(c_ptr), intent(out) :: ptr + integer(c_long), value, intent(in) :: len + integer(c_int) :: fstarpu_malloc + end function fstarpu_malloc + + ! int starpu_free_noflag(void *A, size_t dim); + function fstarpu_free_noflag (ptr, len) bind(C,name="starpu_free_noflag") + use iso_c_binding, only: c_ptr, c_long, c_int + type(c_ptr), value, intent(in) :: ptr + integer(c_long), value, intent(in) :: len + integer(c_int) :: fstarpu_free_noflag + end function fstarpu_free_noflag + + ! int starpu_memory_pin(void *addr, size_t size); + function fstarpu_memory_pin (ptr, len) bind(C,name="starpu_memory_pin") + use iso_c_binding, only: c_ptr, c_long, c_int + type(c_ptr), value, intent(in) :: ptr + integer(c_long), value, intent(in) :: len + integer(c_int) :: fstarpu_memory_pin + end function fstarpu_memory_pin + + ! int starpu_memory_unpin(void *addr, size_t size); + function fstarpu_memory_unpin (ptr, len) bind(C,name="starpu_memory_unpin") + use iso_c_binding, only: c_ptr, c_long, c_int + type(c_ptr), value, intent(in) :: ptr + integer(c_long), value, intent(in) :: len + integer(c_int) :: fstarpu_memory_unpin + end function fstarpu_memory_unpin + + ! int starpu_sleep(float nb_sec); + subroutine fstarpu_sleep (nb_sec) bind(C,name="starpu_sleep") + use iso_c_binding, only: c_float + real(c_float), value, intent(in) :: nb_sec + end subroutine fstarpu_sleep + + ! int starpu_usleep(float nb_sec); + subroutine fstarpu_usleep (nb_sec) bind(C,name="starpu_usleep") + use iso_c_binding, only: c_float + real(c_float), value, intent(in) :: nb_sec + end subroutine fstarpu_usleep + + ! void starpu_cublas_init(void); + subroutine fstarpu_cublas_init () bind(C,name="starpu_cublas_init") + end subroutine fstarpu_cublas_init + + ! void starpu_cublas_shutdown(void); + subroutine fstarpu_cublas_shutdown () bind(C,name="starpu_cublas_shutdown") + end subroutine fstarpu_cublas_shutdown + + end interface + + contains + function or_cptrs(op1,op2) + type(c_ptr) :: or_cptrs + type(c_ptr),intent(in) :: op1,op2 + integer(c_intptr_t) :: i_op1,i_op2 + i_op1 = transfer(op1,0_c_intptr_t) + i_op2 = transfer(op2,0_c_intptr_t) + or_cptrs = transfer(ior(i_op1,i_op2), C_NULL_PTR) + end function + + function ip_to_p(i) bind(C) + use iso_c_binding, only: c_ptr,c_intptr_t,C_NULL_PTR + type(c_ptr) :: ip_to_p + integer(c_intptr_t), value, intent(in) :: i + ip_to_p = transfer(i,C_NULL_PTR) + end function ip_to_p + + function p_to_ip(p) bind(C) + use iso_c_binding, only: c_ptr,c_intptr_t + integer(c_intptr_t) :: p_to_ip + type(c_ptr), value, intent(in) :: p + p_to_ip = transfer(p,0_c_intptr_t) + end function p_to_ip + + function sz_to_p(sz) bind(C) + use iso_c_binding, only: c_ptr,c_size_t,c_intptr_t + type(c_ptr) :: sz_to_p + integer(c_size_t), value, intent(in) :: sz + sz_to_p = ip_to_p(int(sz,kind=c_intptr_t)) + end function sz_to_p + + function fstarpu_init (conf) bind(C) + use iso_c_binding + integer(c_int) :: fstarpu_init + type(c_ptr), value, intent(in) :: conf + + real(c_double) :: FSTARPU_SZ_C_DOUBLE_dummy + real(c_float) :: FSTARPU_SZ_C_FLOAT_dummy + character(c_char) :: FSTARPU_SZ_C_CHAR_dummy + integer(c_int) :: FSTARPU_SZ_C_INT_dummy + integer(c_intptr_t) :: FSTARPU_SZ_C_INTPTR_T_dummy + type(c_ptr) :: FSTARPU_SZ_C_PTR_dummy + integer(c_size_t) :: FSTARPU_SZ_C_SIZE_T_dummy + + character :: FSTARPU_SZ_CHARACTER_dummy + + integer :: FSTARPU_SZ_INTEGER_dummy + integer(4) :: FSTARPU_SZ_INT4_dummy + integer(8) :: FSTARPU_SZ_INT8_dummy + + real :: FSTARPU_SZ_REAL_dummy + real(4) :: FSTARPU_SZ_REAL4_dummy + real(8) :: FSTARPU_SZ_REAL8_dummy + + double precision :: FSTARPU_SZ_DOUBLE_PRECISION_dummy + + complex :: FSTARPU_SZ_COMPLEX_dummy + complex(4) :: FSTARPU_SZ_COMPLEX4_dummy + complex(8) :: FSTARPU_SZ_COMPLEX8_dummy + + ! Note: Referencing global C constants from Fortran has + ! been found unreliable on some architectures, notably + ! on Darwin. The get_integer/get_pointer_constant + ! scheme is a workaround to that issue. + + interface + ! These functions are not exported to the end user + function fstarpu_get_constant(s) bind(C) + use iso_c_binding, only: c_ptr,c_char + type(c_ptr) :: fstarpu_get_constant ! C function returns an intptr_t + character(kind=c_char) :: s + end function fstarpu_get_constant + + function fstarpu_init_internal (conf) bind(C,name="starpu_init") + use iso_c_binding, only: c_ptr,c_int + integer(c_int) :: fstarpu_init_internal + type(c_ptr), value :: conf + end function fstarpu_init_internal + + end interface + + ! Initialize Fortran constants from C peers + FSTARPU_R = fstarpu_get_constant(C_CHAR_"FSTARPU_R"//C_NULL_CHAR) + FSTARPU_W = fstarpu_get_constant(C_CHAR_"FSTARPU_W"//C_NULL_CHAR) + FSTARPU_RW = fstarpu_get_constant(C_CHAR_"FSTARPU_RW"//C_NULL_CHAR) + FSTARPU_SCRATCH = fstarpu_get_constant(C_CHAR_"FSTARPU_SCRATCH"//C_NULL_CHAR) + FSTARPU_REDUX = fstarpu_get_constant(C_CHAR_"FSTARPU_REDUX"//C_NULL_CHAR) + FSTARPU_MPI_REDUX = fstarpu_get_constant(C_CHAR_"FSTARPU_MPI_REDUX"//C_NULL_CHAR) + FSTARPU_COMMUTE = fstarpu_get_constant(C_CHAR_"FSTARPU_COMMUTE"//C_NULL_CHAR) + FSTARPU_SSEND = fstarpu_get_constant(C_CHAR_"FSTARPU_SSEND"//C_NULL_CHAR) + FSTARPU_LOCALITY = fstarpu_get_constant(C_CHAR_"FSTARPU_LOCALITY"//C_NULL_CHAR) + + FSTARPU_DATA_ARRAY = fstarpu_get_constant(C_CHAR_"FSTARPU_DATA_ARRAY"//C_NULL_CHAR) + FSTARPU_DATA_MODE_ARRAY = fstarpu_get_constant(C_CHAR_"FSTARPU_DATA_MODE_ARRAY"//C_NULL_CHAR) + FSTARPU_CL_ARGS = fstarpu_get_constant(C_CHAR_"FSTARPU_CL_ARGS"//C_NULL_CHAR) + FSTARPU_CL_ARGS_NFREE = fstarpu_get_constant(C_CHAR_"FSTARPU_CL_ARGS_NFREE"//C_NULL_CHAR) + FSTARPU_TASK_DEPS_ARRAY = fstarpu_get_constant(C_CHAR_"FSTARPU_TASK_DEPS_ARRAY"//C_NULL_CHAR) + FSTARPU_CALLBACK = fstarpu_get_constant(C_CHAR_"FSTARPU_CALLBACK"//C_NULL_CHAR) + FSTARPU_CALLBACK_WITH_ARG = fstarpu_get_constant(C_CHAR_"FSTARPU_CALLBACK_WITH_ARG"//C_NULL_CHAR) + FSTARPU_CALLBACK_WITH_ARG_NFREE = & + fstarpu_get_constant(C_CHAR_"FSTARPU_CALLBACK_WITH_ARG_NFREE"//C_NULL_CHAR) + FSTARPU_CALLBACK_ARG = fstarpu_get_constant(C_CHAR_"FSTARPU_CALLBACK_ARG"//C_NULL_CHAR) + FSTARPU_CALLBACK_ARG_NFREE = fstarpu_get_constant(C_CHAR_"FSTARPU_CALLBACK_ARG_NFREE"//C_NULL_CHAR) + FSTARPU_PROLOGUE_CALLBACK = fstarpu_get_constant(C_CHAR_"FSTARPU_PROLOGUE_CALLBACK"//C_NULL_CHAR) + FSTARPU_PROLOGUE_CALLBACK_ARG = fstarpu_get_constant(C_CHAR_"FSTARPU_PROLOGUE_CALLBACK_ARG"//C_NULL_CHAR) + FSTARPU_PROLOGUE_CALLBACK_ARG_NFREE = & + fstarpu_get_constant(C_CHAR_"FSTARPU_PROLOGUE_CALLBACK_ARG_NFREE"//C_NULL_CHAR) + FSTARPU_PROLOGUE_CALLBACK_POP = fstarpu_get_constant(C_CHAR_"FSTARPU_PROLOGUE_CALLBACK_POP"//C_NULL_CHAR) + FSTARPU_PROLOGUE_CALLBACK_POP_ARG = & + fstarpu_get_constant(C_CHAR_"FSTARPU_PROLOGUE_CALLBACK_POP_ARG"//C_NULL_CHAR) + FSTARPU_PROLOGUE_CALLBACK_POP_ARG_NFREE = & + fstarpu_get_constant(C_CHAR_"FSTARPU_PROLOGUE_CALLBACK_POP_ARG_NFREE"//C_NULL_CHAR) + FSTARPU_PRIORITY = fstarpu_get_constant(C_CHAR_"FSTARPU_PRIORITY"//C_NULL_CHAR) + FSTARPU_EXECUTE_ON_NODE = fstarpu_get_constant(C_CHAR_"FSTARPU_EXECUTE_ON_NODE"//C_NULL_CHAR) + FSTARPU_EXECUTE_ON_DATA = fstarpu_get_constant(C_CHAR_"FSTARPU_EXECUTE_ON_DATA"//C_NULL_CHAR) + FSTARPU_EXECUTE_ON_WORKER = fstarpu_get_constant(C_CHAR_"FSTARPU_EXECUTE_ON_WORKER"//C_NULL_CHAR) + FSTARPU_WORKER_ORDER = fstarpu_get_constant(C_CHAR_"FSTARPU_WORKER_ORDER"//C_NULL_CHAR) + FSTARPU_EXECUTE_WHERE = fstarpu_get_constant(C_CHAR_"FSTARPU_EXECUTE_WHERE"//C_NULL_CHAR) + FSTARPU_HYPERVISOR_TAG = fstarpu_get_constant(C_CHAR_"FSTARPU_HYPERVISOR_TAG"//C_NULL_CHAR) + FSTARPU_POSSIBLY_PARALLEL = fstarpu_get_constant(C_CHAR_"FSTARPU_POSSIBLY_PARALLEL"//C_NULL_CHAR) + FSTARPU_FLOPS = fstarpu_get_constant(C_CHAR_"FSTARPU_FLOPS"//C_NULL_CHAR) + FSTARPU_TAG = fstarpu_get_constant(C_CHAR_"FSTARPU_TAG"//C_NULL_CHAR) + FSTARPU_TAG_ONLY = fstarpu_get_constant(C_CHAR_"FSTARPU_TAG_ONLY"//C_NULL_CHAR) + FSTARPU_NAME = fstarpu_get_constant(C_CHAR_"FSTARPU_NAME"//C_NULL_CHAR) + FSTARPU_NODE_SELECTION_POLICY = fstarpu_get_constant(C_CHAR_"FSTARPU_NODE_SELECTION_POLICY"//C_NULL_CHAR) + FSTARPU_TASK_SCHED_DATA = fstarpu_get_constant(C_CHAR_"FSTARPU_TASK_SCHED_DATA"//C_NULL_CHAR) + + FSTARPU_VALUE = fstarpu_get_constant(C_CHAR_"FSTARPU_VALUE"//C_NULL_CHAR) + FSTARPU_SCHED_CTX = fstarpu_get_constant(C_CHAR_"FSTARPU_SCHED_CTX"//C_NULL_CHAR) + FSTARPU_CPU_WORKER = fstarpu_get_constant(C_CHAR_"FSTARPU_CPU_WORKER"//C_NULL_CHAR) + FSTARPU_CUDA_WORKER = fstarpu_get_constant(C_CHAR_"FSTARPU_CUDA_WORKER"//C_NULL_CHAR) + FSTARPU_OPENCL_WORKER = fstarpu_get_constant(C_CHAR_"FSTARPU_OPENCL_WORKER"//C_NULL_CHAR) + FSTARPU_ANY_WORKER = fstarpu_get_constant(C_CHAR_"FSTARPU_ANY_WORKER"//C_NULL_CHAR) + + FSTARPU_NMAXBUFS = int(p_to_ip(fstarpu_get_constant(C_CHAR_"FSTARPU_NMAXBUFS"//C_NULL_CHAR)),c_int) + + FSTARPU_SCHED_CTX_POLICY_NAME = & + fstarpu_get_constant(C_CHAR_"FSTARPU_SCHED_CTX_POLICY_NAME"//C_NULL_CHAR) + FSTARPU_SCHED_CTX_POLICY_STRUCT = & + fstarpu_get_constant(C_CHAR_"FSTARPU_SCHED_CTX_POLICY_STRUCT"//C_NULL_CHAR) + FSTARPU_SCHED_CTX_POLICY_MIN_PRIO = & + fstarpu_get_constant(C_CHAR_"FSTARPU_SCHED_CTX_POLICY_MIN_PRIO"//C_NULL_CHAR) + FSTARPU_SCHED_CTX_POLICY_MAX_PRIO = & + fstarpu_get_constant(C_CHAR_"FSTARPU_SCHED_CTX_POLICY_MAX_PRIO"//C_NULL_CHAR) + FSTARPU_SCHED_CTX_HIERARCHY_LEVEL = & + fstarpu_get_constant(C_CHAR_"FSTARPU_SCHED_CTX_HIERARCHY_LEVEL"//C_NULL_CHAR) + FSTARPU_SCHED_CTX_NESTED = & + fstarpu_get_constant(C_CHAR_"FSTARPU_SCHED_CTX_NESTED"//C_NULL_CHAR) + FSTARPU_SCHED_CTX_AWAKE_WORKERS = & + fstarpu_get_constant(C_CHAR_"FSTARPU_SCHED_CTX_AWAKE_WORKERS"//C_NULL_CHAR) + FSTARPU_SCHED_CTX_POLICY_INIT = & + fstarpu_get_constant(C_CHAR_"FSTARPU_SCHED_CTX_POLICY_INIT"//C_NULL_CHAR) + FSTARPU_SCHED_CTX_USER_DATA = & + fstarpu_get_constant(C_CHAR_"FSTARPU_SCHED_CTX_USER_DATA"//C_NULL_CHAR) + + FSTARPU_NOWHERE = & + fstarpu_get_constant(C_CHAR_"FSTARPU_NOWHERE"//C_NULL_CHAR) + FSTARPU_CPU = & + fstarpu_get_constant(C_CHAR_"FSTARPU_CPU"//C_NULL_CHAR) + FSTARPU_CUDA = & + fstarpu_get_constant(C_CHAR_"FSTARPU_CUDA"//C_NULL_CHAR) + FSTARPU_OPENCL = & + fstarpu_get_constant(C_CHAR_"FSTARPU_OPENCL"//C_NULL_CHAR) + + FSTARPU_CODELET_SIMGRID_EXECUTE = & + fstarpu_get_constant(C_CHAR_"FSTARPU_CODELET_SIMGRID_EXECUTE"//C_NULL_CHAR) + FSTARPU_CODELET_SIMGRID_EXECUTE_AND_INJECT = & + fstarpu_get_constant(C_CHAR_"FSTARPU_CODELET_SIMGRID_EXECUTE_AND_INJECT"//C_NULL_CHAR) + FSTARPU_CUDA_ASYNC = & + fstarpu_get_constant(C_CHAR_"FSTARPU_CUDA_ASYNC"//C_NULL_CHAR) + FSTARPU_OPENCL_ASYNC = & + fstarpu_get_constant(C_CHAR_"FSTARPU_OPENCL_ASYNC"//C_NULL_CHAR) + + !FSTARPU_PER_WORKER = & + ! fstarpu_get_constant(C_CHAR_"FSTARPU_PER_WORKER"//C_NULL_CHAR) + !FSTARPU_PER_ARCH = & + ! fstarpu_get_constant(C_CHAR_"FSTARPU_PER_ARCH"//C_NULL_CHAR) + !FSTARPU_PER_COMMON = & + ! fstarpu_get_constant(C_CHAR_"FSTARPU_PER_COMMON"//C_NULL_CHAR) + FSTARPU_HISTORY_BASED = & + fstarpu_get_constant(C_CHAR_"FSTARPU_HISTORY_BASED"//C_NULL_CHAR) + FSTARPU_REGRESSION_BASED = & + fstarpu_get_constant(C_CHAR_"FSTARPU_REGRESSION_BASED"//C_NULL_CHAR) + FSTARPU_NL_REGRESSION_BASED = & + fstarpu_get_constant(C_CHAR_"FSTARPU_NL_REGRESSION_BASED"//C_NULL_CHAR) + FSTARPU_MULTIPLE_REGRESSION_BASED = & + fstarpu_get_constant(C_CHAR_"FSTARPU_MULTIPLE_REGRESSION_BASED"//C_NULL_CHAR) + + FSTARPU_SEQ = & + fstarpu_get_constant(C_CHAR_"FSTARPU_SEQ"//C_NULL_CHAR) + FSTARPU_SPMD = & + fstarpu_get_constant(C_CHAR_"FSTARPU_SPMD"//C_NULL_CHAR) + FSTARPU_FORKJOIN = & + fstarpu_get_constant(C_CHAR_"FSTARPU_FORKJOIN"//C_NULL_CHAR) + + ! Initialize size constants as 'c_ptr' + FSTARPU_SZ_C_DOUBLE = sz_to_p(c_sizeof(FSTARPU_SZ_C_DOUBLE_dummy)) + FSTARPU_SZ_C_FLOAT = sz_to_p(c_sizeof(FSTARPU_SZ_C_FLOAT_dummy)) + FSTARPU_SZ_C_CHAR = sz_to_p(c_sizeof(FSTARPU_SZ_C_CHAR_dummy)) + FSTARPU_SZ_C_INT = sz_to_p(c_sizeof(FSTARPU_SZ_C_INT_dummy)) + FSTARPU_SZ_C_INTPTR_T = sz_to_p(c_sizeof(FSTARPU_SZ_C_INTPTR_T_dummy)) + FSTARPU_SZ_C_PTR = sz_to_p(c_sizeof(FSTARPU_SZ_C_PTR_dummy)) + FSTARPU_SZ_C_SIZE_T = sz_to_p(c_sizeof(FSTARPU_SZ_C_SIZE_T_dummy)) + + FSTARPU_SZ_CHARACTER = sz_to_p(c_sizeof(FSTARPU_SZ_CHARACTER_dummy)) + + FSTARPU_SZ_INTEGER = sz_to_p(c_sizeof(FSTARPU_SZ_INTEGER_dummy)) + FSTARPU_SZ_INT4 = sz_to_p(c_sizeof(FSTARPU_SZ_INT4_dummy)) + FSTARPU_SZ_INT8 = sz_to_p(c_sizeof(FSTARPU_SZ_INT8_dummy)) + + FSTARPU_SZ_REAL = sz_to_p(c_sizeof(FSTARPU_SZ_REAL_dummy)) + FSTARPU_SZ_REAL4 = sz_to_p(c_sizeof(FSTARPU_SZ_REAL4_dummy)) + FSTARPU_SZ_REAL8 = sz_to_p(c_sizeof(FSTARPU_SZ_REAL8_dummy)) + + FSTARPU_SZ_DOUBLE_PRECISION = sz_to_p(c_sizeof(FSTARPU_SZ_DOUBLE_PRECISION_dummy)) + + FSTARPU_SZ_COMPLEX = sz_to_p(c_sizeof(FSTARPU_SZ_COMPLEX_dummy)) + FSTARPU_SZ_COMPLEX4 = sz_to_p(c_sizeof(FSTARPU_SZ_COMPLEX4_dummy)) + FSTARPU_SZ_COMPLEX8 = sz_to_p(c_sizeof(FSTARPU_SZ_COMPLEX8_dummy)) + FSTARPU_SZ_COMPLEX8 = sz_to_p(c_sizeof(FSTARPU_SZ_COMPLEX8_dummy)) + + FSTARPU_DEFAULT_PRIO = int(p_to_ip(& + fstarpu_get_constant(C_CHAR_"FSTARPU_DEFAULT_PRIO"//C_NULL_CHAR)),c_int) + + ! Initialize StarPU + if (c_associated(conf)) then + fstarpu_init = fstarpu_init_internal(conf) + else + fstarpu_init = fstarpu_init_internal(C_NULL_PTR) + end if + end function fstarpu_init + + function fstarpu_csizet_to_cptr(i) bind(C) + use iso_c_binding + type(c_ptr) :: fstarpu_csizet_to_cptr + integer(c_size_t) :: i + fstarpu_csizet_to_cptr = transfer(int(i,kind=c_intptr_t),C_NULL_PTR) + end function fstarpu_csizet_to_cptr + + function fstarpu_int_to_cptr(i) bind(C) + use iso_c_binding + type(c_ptr) :: fstarpu_int_to_cptr + integer(c_int) :: i + fstarpu_int_to_cptr = transfer(int(i,kind=c_intptr_t),C_NULL_PTR) + end function fstarpu_int_to_cptr + + function fstarpu_long_to_cptr(i) bind(C) + use iso_c_binding + type(c_ptr) :: fstarpu_long_to_cptr + integer(c_long) :: i + fstarpu_long_to_cptr = transfer(int(i,kind=c_intptr_t),C_NULL_PTR) + end function fstarpu_long_to_cptr + + ! Note: do not add binding declarations here in 'CONTAINS' + ! section, because the compiler generates empty functions for + ! them. + ! Instead, put binding declarations in the 'INTERFACE' section + ! above. + +end module fstarpu_mod diff --git a/include/omp.h b/include/omp.h new file mode 100644 index 0000000..dab99e1 --- /dev/null +++ b/include/omp.h @@ -0,0 +1,110 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2018-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +#ifndef __STARPU_OPENMP_OMP_H__ +#define __STARPU_OPENMP_OMP_H__ + +#if defined STARPU_OPENMP +typedef starpu_omp_lock_t omp_lock_t; +typedef starpu_omp_nest_lock_t omp_nest_lock_t; + +enum omp_sched_value +{ + omp_sched_undefined = 0, + omp_sched_static = 1, + omp_sched_dynamic = 2, + omp_sched_guided = 3, + omp_sched_auto = 4, + omp_sched_runtime = 5 +}; + +enum omp_proc_bind_value +{ + omp_proc_bind_undefined = -1, + omp_proc_bind_false = 0, + omp_proc_bind_true = 1, + omp_proc_bind_master = 2, + omp_proc_bind_close = 3, + omp_proc_bind_spread = 4 +}; + +#ifdef __cplusplus +extern "C" { +#define __STARPU_OMP_NOTHROW throw() +#else +#define __STARPU_OMP_NOTHROW __attribute__((__nothrow__)) +#endif + +extern void omp_set_num_threads(int threads) __STARPU_OMP_NOTHROW; +extern int omp_get_num_threads() __STARPU_OMP_NOTHROW; +extern int omp_get_thread_num() __STARPU_OMP_NOTHROW; +extern int omp_get_max_threads() __STARPU_OMP_NOTHROW; +extern int omp_get_num_procs(void) __STARPU_OMP_NOTHROW; +extern int omp_in_parallel(void) __STARPU_OMP_NOTHROW; +extern void omp_set_dynamic(int dynamic_threads) __STARPU_OMP_NOTHROW; +extern int omp_get_dynamic(void) __STARPU_OMP_NOTHROW; +extern void omp_set_nested(int nested) __STARPU_OMP_NOTHROW; +extern int omp_get_nested(void) __STARPU_OMP_NOTHROW; +extern int omp_get_cancellation(void) __STARPU_OMP_NOTHROW; +extern void omp_set_schedule(enum omp_sched_value kind, int modifier) __STARPU_OMP_NOTHROW; +extern void omp_get_schedule(enum omp_sched_value *kind, int *modifier) __STARPU_OMP_NOTHROW; +extern int omp_get_thread_limit(void) __STARPU_OMP_NOTHROW; +extern void omp_set_max_active_levels(int max_levels) __STARPU_OMP_NOTHROW; +extern int omp_get_max_active_levels(void) __STARPU_OMP_NOTHROW; +extern int omp_get_level(void) __STARPU_OMP_NOTHROW; +extern int omp_get_ancestor_thread_num(int level) __STARPU_OMP_NOTHROW; +extern int omp_get_team_size(int level) __STARPU_OMP_NOTHROW; +extern int omp_get_active_level(void) __STARPU_OMP_NOTHROW; +extern int omp_in_final(void) __STARPU_OMP_NOTHROW; +extern enum omp_proc_bind_value omp_get_proc_bind(void) __STARPU_OMP_NOTHROW; +extern int omp_get_num_places(void) __STARPU_OMP_NOTHROW; +extern int omp_get_place_num_procs(int place_num) __STARPU_OMP_NOTHROW; +extern void omp_get_place_proc_ids(int place_num, int *ids) __STARPU_OMP_NOTHROW; +extern int omp_get_place_num(void) __STARPU_OMP_NOTHROW; +extern int omp_get_partition_num_places(void) __STARPU_OMP_NOTHROW; +extern void omp_get_partition_place_nums(int *place_nums) __STARPU_OMP_NOTHROW; +extern void omp_set_default_device(int device_num) __STARPU_OMP_NOTHROW; +extern int omp_get_default_device(void) __STARPU_OMP_NOTHROW; +extern int omp_get_num_devices(void) __STARPU_OMP_NOTHROW; +extern int omp_get_num_teams(void) __STARPU_OMP_NOTHROW; +extern int omp_get_team_num(void) __STARPU_OMP_NOTHROW; +extern int omp_is_initial_device(void) __STARPU_OMP_NOTHROW; +extern int omp_get_initial_device(void) __STARPU_OMP_NOTHROW; +extern int omp_get_max_task_priority(void) __STARPU_OMP_NOTHROW; +extern void omp_init_lock(omp_lock_t *lock) __STARPU_OMP_NOTHROW; +extern void omp_destroy_lock(omp_lock_t *lock) __STARPU_OMP_NOTHROW; +extern void omp_set_lock(omp_lock_t *lock) __STARPU_OMP_NOTHROW; +extern void omp_unset_lock(omp_lock_t *lock) __STARPU_OMP_NOTHROW; +extern int omp_test_lock(omp_lock_t *lock) __STARPU_OMP_NOTHROW; +extern void omp_init_nest_lock(omp_nest_lock_t *lock) __STARPU_OMP_NOTHROW; +extern void omp_destroy_nest_lock(omp_nest_lock_t *lock) __STARPU_OMP_NOTHROW; +extern void omp_set_nest_lock(omp_nest_lock_t *lock) __STARPU_OMP_NOTHROW; +extern void omp_unset_nest_lock(omp_nest_lock_t *lock) __STARPU_OMP_NOTHROW; +extern int omp_test_nest_lock(omp_nest_lock_t *lock) __STARPU_OMP_NOTHROW; +extern void omp_atomic_fallback_inline_begin(void) __STARPU_OMP_NOTHROW; +extern void omp_atomic_fallback_inline_end(void) __STARPU_OMP_NOTHROW; +extern double omp_get_wtime(void) __STARPU_OMP_NOTHROW; +extern double omp_get_wtick(void) __STARPU_OMP_NOTHROW; +extern void *omp_get_local_cuda_stream(void) __STARPU_OMP_NOTHROW; + +#ifdef __cplusplus +} +#endif + +#endif /* STARPU_USE_OPENMP && !STARPU_DONT_INCLUDE_OPENMP_HEADERS */ +#endif /* __STARPU_OPENMP_OMP_H__ */ diff --git a/include/pthread_win32/pthread.h b/include/pthread_win32/pthread.h new file mode 100644 index 0000000..215e099 --- /dev/null +++ b/include/pthread_win32/pthread.h @@ -0,0 +1,520 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* This is a minimal pthread implementation based on windows functions. + * It is *not* intended to be complete - just complete enough to get + * StarPU running. + */ + +#ifndef __STARPU_PTHREAD_H__ +#define __STARPU_PTHREAD_H__ + +/* TODO: + * pthread_rwlock_* + * pthread_spinlock_* + */ + +#include +#include +#include +#ifndef STARPU_CONFIGURE +#include +#endif +#ifdef STARPU_HAVE_UNISTD_H +#include +#endif +#include +#include +#include + +#ifdef __CYGWIN32__ +#include +#define unixErrno() cygwin_internal(CW_GET_ERRNO_FROM_WINERROR, (GetLastError()) +#else +#define unixErrno() EIO +#endif +#if 0 +#define setSystemErrno() \ + do { \ + fprintf(stderr, "%s:%d: win %d\n", __FILE__, __LINE__, GetLastError()); \ + errno = unixErrno(); \ + } \ + while (0) +#define winPthreadAssertWindows(expr) \ + do { \ + if (!(expr)) \ + { \ + fprintf(stderr, "%s:%d: %d\n", __FILE__, __LINE__, unixErrno()); \ + return unixErrno(); \ + } \ + } \ + while (0) +#define winPthreadAssertPthread(expr) \ + do { \ + int ret = (expr); \ + if (ret) \ + { \ + fprintf(stderr, "%s:%d: %d\n", __FILE__, __LINE__, ret); \ + return ret; \ + } \ + } \ + while (0) +#define winPthreadAssert(expr) \ + do { \ + if (!(expr)) \ + { \ + fprintf(stderr, "%s:%d: %d\n", __FILE__, __LINE__, errno); \ + return EIO; \ + } \ + } \ + while (0) +#else +#define setSystemErrno() errno = unixErrno() +#define winPthreadAssertWindows(expr) \ + do { \ + if (!(expr)) { return unixErrno(); } \ + } \ + while (0) +#define winPthreadAssertPthread(expr) \ + do { \ + int ret = (expr); \ + if (ret) return ret; \ + } \ + while (0) +#define winPthreadAssert(expr) \ + do { \ + if (!(expr)) return EIO; \ + } \ + while (0) +#endif + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +/*********** + * threads * + ***********/ + +typedef DWORD pthread_attr_t; +typedef HANDLE pthread_t; + +static __inline pthread_t pthread_self(void) +{ + return GetCurrentThread(); +} + +static __inline int pthread_equal(pthread_t t1, pthread_t t2) +{ + return t1 == t2; +} + +static __inline int pthread_attr_init(pthread_attr_t *attr) +{ + *attr = 0; + return 0; +} + +#define PTHREAD_CREATE_DETACHED 1 +static __inline int pthread_attr_setdetachstate(pthread_attr_t *attr, int yes) +{ + (void)attr; + (void)yes; + /* not supported, ignore */ + return 0; +} + +static __inline int pthread_attr_setstacksize(pthread_attr_t *attr, size_t stacksize) +{ + (void)attr; + (void)stacksize; + /* not supported, ignore */ + return 0; +} + +static __inline int pthread_attr_destroy(pthread_attr_t *attr) +{ + (void)attr; + return 0; +} + +/* "real" cleanup handling not yet implemented */ +typedef struct +{ + void (*routine)(void *); + void *arg; +} __pthread_cleanup_handler; + +void pthread_cleanup_push(void (*routine)(void *), void *arg); +#define pthread_cleanup_push(routine, arg) \ + do { \ + __pthread_cleanup_handler __cleanup_handler = {routine, arg}; + +void pthread_cleanup_pop(int execute); +#define pthread_cleanup_pop(execute) \ + if (execute) __cleanup_handler.routine(__cleanup_handler.arg); \ + } \ + while (0) \ + ; + +static __inline int pthread_create( + pthread_t *thread, const pthread_attr_t *attr, + void *(*fun)(void *), void *arg) +{ + if (attr && *attr) + return EINVAL; + winPthreadAssertWindows(*thread = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE)fun, arg, 0, NULL)); + return 0; +} + +static __inline int pthread_setcancelstate(int state, int *oldstate) +{ + (void)state; + (void)oldstate; + /* not yet implemented */ + return 0; +} + +static __inline int pthread_cancel(pthread_t thread) +{ + /* This is quite harsh */ + winPthreadAssertWindows(TerminateThread(thread, 0)); + return 0; +} + +static __inline void pthread_exit(void *res) +{ + ExitThread((DWORD)(DWORD_PTR)res); +} + +static __inline int pthread_join(pthread_t thread, void **res) +{ +again: + switch (WaitForSingleObject(thread, INFINITE)) + { + default: + case WAIT_FAILED: + return unixErrno(); + case WAIT_ABANDONED: + case WAIT_OBJECT_0: + break; + case WAIT_TIMEOUT: + goto again; + } + if (res) + { + DWORD _res; + if (GetExitCodeThread(thread, &_res)) + *res = (void *)(DWORD_PTR)_res; + } + return 0; +} + +/*********** + * mutexes * + ***********/ + +#define PTHREAD_MUTEX_INITIALIZER NULL +typedef HANDLE pthread_mutex_t; +#define PTHREAD_MUTEX_RECURSIVE 1 +#define PTHREAD_MUTEX_ERRORCHECK 2 +typedef int pthread_mutexattr_t; + +static __inline int pthread_mutexattr_init(pthread_mutexattr_t *attr) +{ + *attr = PTHREAD_MUTEX_ERRORCHECK; + return 0; +} + +static __inline int pthread_mutexattr_destroy(pthread_mutexattr_t *attr) +{ + *attr = -1; + return 0; +} + +static __inline int pthread_mutexattr_settype(pthread_mutexattr_t *attr, int type) +{ + if (type != PTHREAD_MUTEX_RECURSIVE && type != PTHREAD_MUTEX_ERRORCHECK) + return EINVAL; + *attr = type; + return 0; +} + +static __inline int pthread_mutex_init(pthread_mutex_t *mutex, pthread_mutexattr_t *attr) +{ + /* TODO: we could use CreateMutex and ReleaseMutex to support recursivity */ + if (attr && *attr != PTHREAD_MUTEX_ERRORCHECK) + return EINVAL; + winPthreadAssertWindows(*mutex = CreateSemaphore(NULL, 1, 1, NULL)); + return 0; +} + +static __inline int pthread_mutex_unlock(pthread_mutex_t *mutex) +{ + winPthreadAssertWindows(ReleaseSemaphore(*mutex, 1, NULL)); + return 0; +} + +static __inline int pthread_mutex_lock(pthread_mutex_t *mutex); +static __inline int __pthread_mutex_alloc_concurrently(pthread_mutex_t *mutex) +{ + HANDLE mutex_init_mutex; + /* Get access to one global named mutex to serialize mutex initialization */ + winPthreadAssertWindows((mutex_init_mutex = CreateSemaphore(NULL, 1, 1, "StarPU mutex init"))); + winPthreadAssertPthread(pthread_mutex_lock(&mutex_init_mutex)); + /* Now we are the one that can initialize it */ + if (!*mutex) + winPthreadAssertPthread(pthread_mutex_init(mutex, NULL)); + winPthreadAssertPthread(pthread_mutex_unlock(&mutex_init_mutex)); + winPthreadAssertWindows(CloseHandle(mutex_init_mutex)); + return 0; +} + +static __inline int pthread_mutex_lock(pthread_mutex_t *mutex) +{ + if (!*mutex) + __pthread_mutex_alloc_concurrently(mutex); +again: + switch (WaitForSingleObject(*mutex, INFINITE)) + { + default: + case WAIT_FAILED: + return unixErrno(); + case WAIT_ABANDONED: + case WAIT_OBJECT_0: + return 0; + case WAIT_TIMEOUT: + goto again; + } +} + +static __inline int pthread_mutex_trylock(pthread_mutex_t *mutex) +{ + if (!*mutex) + __pthread_mutex_alloc_concurrently(mutex); + switch (WaitForSingleObject(*mutex, 0)) + { + default: + case WAIT_FAILED: + return unixErrno(); + case WAIT_ABANDONED: + case WAIT_OBJECT_0: + return 0; + case WAIT_TIMEOUT: + return EBUSY; + } +} + +static __inline int pthread_mutex_destroy(pthread_mutex_t *mutex) +{ + winPthreadAssertWindows(CloseHandle(*mutex)); + *mutex = INVALID_HANDLE_VALUE; + return 0; +} + +/******************************************** + * rwlock * + * VERY LAZY, don't even look at it please! * + * Should be fine unoptimized for now. * + * TODO: FIXME, using conds for instance? * + ********************************************/ + +#define PTHREAD_RWLOCK_INITIALIZER NULL +typedef pthread_mutex_t pthread_rwlock_t; +typedef int pthread_rwlockattr_t; +#define pthread_rwlock_init(lock, attr) pthread_mutex_init(lock, NULL) +#define pthread_rwlock_wrlock(lock) pthread_mutex_lock(lock) +#define pthread_rwlock_trywrlock(lock) pthread_mutex_trylock(lock) +#define pthread_rwlock_rdlock(lock) pthread_mutex_lock(lock) +#define pthread_rwlock_tryrdlock(lock) pthread_mutex_trylock(lock) +#define pthread_rwlock_unlock(lock) pthread_mutex_unlock(lock) +#define pthread_rwlock_destroy(lock) pthread_mutex_destroy(lock) + +/************** + * conditions * + **************/ + +typedef struct +{ + HANDLE sem; + volatile unsigned nbwait; +} pthread_cond_t; +#define PTHREAD_COND_INITIALIZER \ + { \ + NULL, 0 \ + } + +#if !defined(STARPU_HAVE_STRUCT_TIMESPEC) || defined(_MSC_VER) +#ifndef STARPU_TIMESPEC_DEFINED +#define STARPU_TIMESPEC_DEFINED 1 +struct timespec +{ + time_t tv_sec; /* Seconds */ + long tv_nsec; /* Nanoseconds */ +}; +#endif /* STARPU_TIMESPEC_DEFINED */ +#endif /* STARPU_HAVE_STRUCT_TIMESPEC */ + +typedef unsigned pthread_condattr_t; + +static __inline int pthread_cond_init(pthread_cond_t *cond, const pthread_condattr_t *attr) +{ + if (attr) + return EINVAL; + winPthreadAssertWindows(cond->sem = CreateSemaphore(NULL, 0, MAXLONG, NULL)); + cond->nbwait = 0; + return 0; +} + +static __inline int pthread_cond_timedwait(pthread_cond_t *cond, pthread_mutex_t *mutex, const struct timespec *time) +{ + if (!cond->sem) + winPthreadAssertPthread(pthread_cond_init(cond, NULL)); + cond->nbwait++; + winPthreadAssertPthread(pthread_mutex_unlock(mutex)); +again: + switch (WaitForSingleObject(cond->sem, time->tv_sec * 1000 + time->tv_nsec / 1000)) + { + default: + case WAIT_FAILED: + { + int error = unixErrno(); + winPthreadAssertPthread(pthread_mutex_lock(mutex)); + return error; + } + case WAIT_TIMEOUT: + goto again; + case WAIT_ABANDONED: + case WAIT_OBJECT_0: + break; + } + winPthreadAssertPthread(pthread_mutex_lock(mutex)); + cond->nbwait--; + return 0; +} + +static __inline int pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *mutex) +{ + if (!cond->sem) + winPthreadAssertPthread(pthread_cond_init(cond, NULL)); + cond->nbwait++; + winPthreadAssertPthread(pthread_mutex_unlock(mutex)); +again: + switch (WaitForSingleObject(cond->sem, INFINITE)) + { + case WAIT_FAILED: + { + int error; + error = unixErrno(); + winPthreadAssertPthread(pthread_mutex_lock(mutex)); + return error; + } + case WAIT_TIMEOUT: + goto again; + case WAIT_ABANDONED: + case WAIT_OBJECT_0: + break; + } + winPthreadAssertPthread(pthread_mutex_lock(mutex)); + cond->nbwait--; + return 0; +} + +static __inline int pthread_cond_signal(pthread_cond_t *cond) +{ + if (!cond->sem) + winPthreadAssertPthread(pthread_cond_init(cond, NULL)); + if (cond->nbwait) + ReleaseSemaphore(cond->sem, 1, NULL); + return 0; +} + +static __inline int pthread_cond_broadcast(pthread_cond_t *cond) +{ + if (!cond->sem) + winPthreadAssertPthread(pthread_cond_init(cond, NULL)); + ReleaseSemaphore(cond->sem, cond->nbwait, NULL); + return 0; +} + +static __inline int pthread_cond_destroy(pthread_cond_t *cond) +{ + if (cond->sem) + { + winPthreadAssertWindows(CloseHandle(cond->sem)); + cond->sem = NULL; + } + return 0; +} + +/******* + * TLS * + *******/ + +typedef DWORD pthread_key_t; +#define PTHREAD_ONCE_INIT \ + { \ + PTHREAD_MUTEX_INITIALIZER, 0 \ + } +typedef struct +{ + pthread_mutex_t mutex; + unsigned done; +} pthread_once_t; + +static __inline int pthread_once(pthread_once_t *once, void (*oncefun)(void)) +{ + winPthreadAssertPthread(pthread_mutex_lock(&once->mutex)); + if (!once->done) + { + oncefun(); + once->done = 1; + } + winPthreadAssertPthread(pthread_mutex_unlock(&once->mutex)); + return 0; +} + +static __inline int pthread_key_create(pthread_key_t *key, void (*freefun)(void *)) +{ + (void)freefun; + pthread_key_t res; + winPthreadAssertWindows((res = TlsAlloc()) != 0xFFFFFFFF); + *key = res; + return 0; +} + +static __inline int pthread_key_delete(pthread_key_t key) +{ + winPthreadAssertWindows(TlsFree(key)); + return 0; +} + +static __inline void *pthread_getspecific(pthread_key_t key) +{ + return TlsGetValue(key); +} + +static __inline int pthread_setspecific(pthread_key_t key, const void *data) +{ + winPthreadAssertWindows(TlsSetValue(key, (LPVOID)data)); + return 0; +} + +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#endif /* __STARPU_PTHREAD_H__ */ diff --git a/include/pthread_win32/semaphore.h b/include/pthread_win32/semaphore.h new file mode 100644 index 0000000..9f97175 --- /dev/null +++ b/include/pthread_win32/semaphore.h @@ -0,0 +1,72 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* This is a minimal pthread implementation based on windows functions. + * It is *not* intended to be complete - just complete enough to get + * StarPU running. + */ + +#ifndef __STARPU_SEMAPHORE_H__ +#define __STARPU_SEMAPHORE_H__ + +#include "pthread.h" + +/************** + * semaphores * + **************/ + +typedef HANDLE sem_t; + +static __inline int sem_init(sem_t *sem, int pshared, unsigned int value) +{ + (void)pshared; + winPthreadAssertWindows(*sem = CreateSemaphore(NULL, value, MAXLONG, NULL)); + return 0; +} + +static __inline int do_sem_wait(sem_t *sem, DWORD timeout) +{ + switch (WaitForSingleObject(*sem, timeout)) + { + default: + case WAIT_FAILED: + setSystemErrno(); + return -1; + case WAIT_TIMEOUT: + errno = EAGAIN; + return -1; + case WAIT_ABANDONED: + case WAIT_OBJECT_0: + return 0; + } +} + +#define sem_wait(sem) do_sem_wait(sem, INFINITE) +#define sem_trywait(sem) do_sem_wait(sem, 0) + +static __inline int sem_post(sem_t *sem) +{ + winPthreadAssertWindows(ReleaseSemaphore(*sem, 1, NULL)); + return 0; +} + +static __inline int sem_destroy(sem_t *sem) +{ + winPthreadAssertWindows(CloseHandle(*sem)); + return 0; +} + +#endif /* __STARPU_SEMAPHORE_H__ */ diff --git a/include/schedulers/starpu_heteroprio.h b/include/schedulers/starpu_heteroprio.h new file mode 100644 index 0000000..312c4f0 --- /dev/null +++ b/include/schedulers/starpu_heteroprio.h @@ -0,0 +1,132 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +#ifndef __STARPU_SCHEDULER_HETEROPRIO_H__ +#define __STARPU_SCHEDULER_HETEROPRIO_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + @defgroup API_HeteroPrio Heteroprio Scheduler + @brief This is the interface for the heteroprio scheduler + @{ + */ + +#define STARPU_HETEROPRIO_MAX_PREFETCH 2 +#if STARPU_HETEROPRIO_MAX_PREFETCH <= 0 +#error STARPU_HETEROPRIO_MAX_PREFETCH == 1 means no prefetch so STARPU_HETEROPRIO_MAX_PREFETCH must >= 1 +#endif + +#define STARPU_AUTOHETEROPRIO_PRIORITY_ORDERING_POLICY_COUNT 28 + +/** + todo +*/ +enum starpu_autoheteroprio_priority_ordering_policy +{ + STARPU_HETEROPRIO_NOD_TIME_COMBINATION, // 0 + STARPU_HETEROPRIO_BEST_NODS_SCORE, + STARPU_HETEROPRIO_BEST_NODS, + STARPU_HETEROPRIO_URT_PURE, + STARPU_HETEROPRIO_URT, + STARPU_HETEROPRIO_URT_2, // 5 + STARPU_HETEROPRIO_URT_DOT_DIFF_PURE, + STARPU_HETEROPRIO_URT_DOT_DIFF_PURE_2, + STARPU_HETEROPRIO_URT_DOT_REL_DIFF_PURE, + STARPU_HETEROPRIO_URT_DOT_REL_DIFF_PURE_2, + STARPU_HETEROPRIO_URT_DOT_DIFF_2, // 10 + STARPU_HETEROPRIO_URT_DOT_DIFF_3, + STARPU_HETEROPRIO_URT_DOT_DIFF_4, + STARPU_HETEROPRIO_URT_DOT_DIFF_5, + STARPU_HETEROPRIO_URT_DOT_DIFF_6, + STARPU_HETEROPRIO_URT_DOT_DIFF_7, // 15 + STARPU_HETEROPRIO_URT_DOT_DIFF_8, + STARPU_HETEROPRIO_URT_DOT_DIFF_9, + STARPU_HETEROPRIO_URT_DOT_DIFF_10, + STARPU_HETEROPRIO_URT_DOT_DIFF_11, + STARPU_HETEROPRIO_URTS_PER_SECONDS, // 20 + STARPU_HETEROPRIO_URTS_PER_SECONDS_2, + STARPU_HETEROPRIO_URTS_PER_SECONDS_DIFF, + STARPU_HETEROPRIO_URTS_TIME_RELEASED_DIFF, + STARPU_HETEROPRIO_URTS_TIME_COMBINATION, + STARPU_HETEROPRIO_NODS_PER_SECOND, + STARPU_HETEROPRIO_NODS_TIME_RELEASED, + STARPU_HETEROPRIO_NODS_TIME_RELEASED_DIFF +}; + +static const char starpu_autoheteroprio_priority_ordering_policy_names[STARPU_AUTOHETEROPRIO_PRIORITY_ORDERING_POLICY_COUNT][64] = { + "STARPU_HETEROPRIO_NOD_TIME_COMBINATION", + "STARPU_HETEROPRIO_BEST_NODS_SCORE", + "STARPU_HETEROPRIO_BEST_NODS", + "STARPU_HETEROPRIO_URT_PURE", + "STARPU_HETEROPRIO_URT", + "STARPU_HETEROPRIO_URT_2", + "STARPU_HETEROPRIO_URT_DOT_DIFF_PURE", + "STARPU_HETEROPRIO_URT_DOT_DIFF_PURE_2", + "STARPU_HETEROPRIO_URT_DOT_REL_DIFF_PURE", + "STARPU_HETEROPRIO_URT_DOT_REL_DIFF_PURE_2", + "STARPU_HETEROPRIO_URT_DOT_DIFF_2", + "STARPU_HETEROPRIO_URT_DOT_DIFF_3", + "STARPU_HETEROPRIO_URT_DOT_DIFF_4", + "STARPU_HETEROPRIO_URT_DOT_DIFF_5", + "STARPU_HETEROPRIO_URT_DOT_DIFF_6", + "STARPU_HETEROPRIO_URT_DOT_DIFF_7", + "STARPU_HETEROPRIO_URT_DOT_DIFF_8", + "STARPU_HETEROPRIO_URT_DOT_DIFF_9", + "STARPU_HETEROPRIO_URT_DOT_DIFF_10", + "STARPU_HETEROPRIO_URT_DOT_DIFF_11", + "STARPU_HETEROPRIO_URTS_PER_SECONDS", + "STARPU_HETEROPRIO_URTS_PER_SECONDS_2", + "STARPU_HETEROPRIO_URTS_PER_SECONDS_DIFF", + "STARPU_HETEROPRIO_URTS_TIME_RELEASED_DIFF", + "STARPU_HETEROPRIO_URTS_TIME_COMBINATION", + "STARPU_HETEROPRIO_NODS_PER_SECOND", + "STARPU_HETEROPRIO_NODS_TIME_RELEASED", + "STARPU_HETEROPRIO_NODS_TIME_RELEASED_DIFF", +}; + +/** Set if heteroprio should use data locality or not */ +void starpu_heteroprio_set_use_locality(unsigned sched_ctx_id, unsigned use_locality); + +/** Tell how many prio there are for a given arch */ +void starpu_heteroprio_set_nb_prios(unsigned sched_ctx_id, enum starpu_worker_archtype arch, unsigned max_prio); + +/** Set the mapping for a given arch prio=>bucket */ +void starpu_heteroprio_set_mapping(unsigned sched_ctx_id, enum starpu_worker_archtype arch, unsigned source_prio, unsigned dest_bucket_id); + +/** Tell which arch is the faster for the tasks of a bucket (optional) */ +void starpu_heteroprio_set_faster_arch(unsigned sched_ctx_id, enum starpu_worker_archtype arch, unsigned bucket_id); + +/** Tell how slow is a arch for the tasks of a bucket (optional) */ +void starpu_heteroprio_set_arch_slow_factor(unsigned sched_ctx_id, enum starpu_worker_archtype arch, unsigned bucket_id, float slow_factor); + +/** One memory node will be one wgroup */ +void starpu_heteroprio_map_wgroup_memory_nodes(unsigned sched_ctx_id); + +/** Print the current setup groups */ +void starpu_heteroprio_print_wgroups(FILE *stream, unsigned sched_ctx_id); + +/** @} */ + +#ifdef __cplusplus +} +#endif + +#endif /* __STARPU_SCHEDULER_HETEROPRIO_H__ */ diff --git a/include/schedulers/starpu_scheduler_toolbox.h b/include/schedulers/starpu_scheduler_toolbox.h new file mode 100644 index 0000000..964de37 --- /dev/null +++ b/include/schedulers/starpu_scheduler_toolbox.h @@ -0,0 +1,168 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2016-2016 Uppsala University + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +#ifndef __STARPU_SCHEDULER_TOOLBOX_FIFO_QUEUES_H__ +#define __STARPU_SCHEDULER_TOOLBOX_FIFO_QUEUES_H__ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + @defgroup API_Scheduler_Toolbox Scheduler Toolbox + @brief This is the interface for the scheduler toolbox + + The definitions of the different queue types below (e.g + ::starpu_st_fifo_taskq_t) are private and are thus not available + outside the StarPU source directory. Hence when defining your own + scheduler outside of StarPU source directory, you should use the + functions below. Look for example in the scheduler defined in + examples/cholesky/libmy_dmda.c + @{ + */ + +/** + Opaque type for FIFO task queue +*/ +typedef struct starpu_st_fifo_taskq *starpu_st_fifo_taskq_t; + +/** Create a FIFO task queue */ +starpu_st_fifo_taskq_t starpu_st_fifo_taskq_create(void) STARPU_ATTRIBUTE_MALLOC; +void starpu_st_fifo_taskq_init(starpu_st_fifo_taskq_t fifo); +void starpu_st_fifo_taskq_destroy(starpu_st_fifo_taskq_t fifo); +int starpu_st_fifo_taskq_empty(starpu_st_fifo_taskq_t fifo); +double starpu_st_fifo_taskq_get_exp_len_prev_task_list(starpu_st_fifo_taskq_t fifo_queue, struct starpu_task *task, int workerid, int nimpl, int *fifo_ntasks); + +/** get the number of tasks currently in the queue */ +unsigned starpu_st_fifo_ntasks_get(starpu_st_fifo_taskq_t fifo); + +/** increase by n the number of tasks currently in the queue */ +void starpu_st_fifo_ntasks_inc(starpu_st_fifo_taskq_t fifo, int n); + +/** get the number of tasks currently in the queue corresponding to each priority */ +unsigned *starpu_st_fifo_ntasks_per_priority_get(starpu_st_fifo_taskq_t fifo); + +/** get the number of tasks that were processed */ +unsigned starpu_st_fifo_nprocessed_get(starpu_st_fifo_taskq_t fifo); + +/** increase by n the number of tasks that were processed */ +void starpu_st_fifo_nprocessed_inc(starpu_st_fifo_taskq_t fifo, int n); + +/** only meaningful if the queue is only used by a single worker */ +/** + Get the expected start date of next item to do in the + queue (i.e. not started yet). This is thus updated + when we start it. +*/ +double starpu_st_fifo_exp_start_get(starpu_st_fifo_taskq_t fifo); + +/** + Set the expected start date of next item to do in the + queue (i.e. not started yet). + */ +void starpu_st_fifo_exp_start_set(starpu_st_fifo_taskq_t fifo, double exp_start); + +/** get the expected end date of last task in the queue */ +double starpu_st_fifo_exp_end_get(starpu_st_fifo_taskq_t fifo); + +/** set the expected end date of last task in the queue */ +void starpu_st_fifo_exp_end_set(starpu_st_fifo_taskq_t fifo, double exp_end); + +/** get the expected duration of the set of tasks in the queue */ +double starpu_st_fifo_exp_len_get(starpu_st_fifo_taskq_t fifo); + +/** set the expected duration of the set of tasks in the queue */ +void starpu_st_fifo_exp_len_set(starpu_st_fifo_taskq_t fifo, double exp_len); + +/** increase or decrease the expected duration of the set of tasks in the queue */ +void starpu_st_fifo_exp_len_inc(starpu_st_fifo_taskq_t fifo, double exp_len); + +/** get the expected duration of the set of tasks in the queue corresponding to each priority */ +double *starpu_st_fifo_exp_len_per_priority_get(starpu_st_fifo_taskq_t fifo); + +/** get the expected duration of what is already pushed to the worker */ +double starpu_st_fifo_pipeline_len_get(starpu_st_fifo_taskq_t fifo); + +/** set the expected duration of what is already pushed to the worker */ +void starpu_st_fifo_pipeline_len_set(starpu_st_fifo_taskq_t fifo, double pipeline_len); + +/** increase the expected duration of what is already pushed to the worker (the value can be negative) */ +void starpu_st_fifo_pipeline_len_inc(starpu_st_fifo_taskq_t fifo, double pipeline_len); + +int starpu_st_fifo_taskq_push_sorted_task(starpu_st_fifo_taskq_t fifo_queue, struct starpu_task *task); +int starpu_st_fifo_taskq_push_task(starpu_st_fifo_taskq_t fifo, struct starpu_task *task); +int starpu_st_fifo_taskq_push_back_task(starpu_st_fifo_taskq_t fifo_queue, struct starpu_task *task); + +int starpu_st_fifo_taskq_pop_this_task(starpu_st_fifo_taskq_t fifo_queue, int workerid, struct starpu_task *task); +struct starpu_task *starpu_st_fifo_taskq_pop_task(starpu_st_fifo_taskq_t fifo, int workerid); +/** + This is the same as starpu_st_fifo_taskq_pop_task(), but without checking that the + worker will be able to execute this task. This is useful when the scheduler + has already checked it. +*/ +struct starpu_task *starpu_st_fifo_taskq_pop_local_task(starpu_st_fifo_taskq_t fifo); + +/** + Pop the first task that can be executed on the calling driver and taking into account readiness of data +*/ +struct starpu_task *starpu_st_fifo_taskq_pop_first_ready_task(starpu_st_fifo_taskq_t fifo_queue, unsigned workerid, int num_priorities); + +/** + Opaque type for PRIO task queue +*/ +typedef struct starpu_st_prio_deque *starpu_st_prio_deque_t; + +/** all _starpu_prio_deque_pop/deque_task function return a task or a NULL pointer if none are available + * in O(lg(nb priorities)) + */ +void starpu_st_prio_deque_init(starpu_st_prio_deque_t pdeque); +void starpu_st_prio_deque_destroy(starpu_st_prio_deque_t pdeque); +/** return 0 iff the struct starpu_st_prio_deque is not empty */ +int starpu_st_prio_deque_is_empty(starpu_st_prio_deque_t pdeque); + +int starpu_st_prio_deque_push_back_task(starpu_st_prio_deque_t pdeque, struct starpu_task *task); +/** push a task in O(lg(nb priorities)) */ +int starpu_st_prio_deque_push_front_task(starpu_st_prio_deque_t pdeque, struct starpu_task *task); + +/** deque a task of the higher priority available from the front of the list for the highest priority */ +struct starpu_task *starpu_st_prio_deque_pop_task_for_worker(starpu_st_prio_deque_t pdeque, int workerid, struct starpu_task **skipped); +/** return a task that can be executed by workerid from the back of the list for the highest priority */ +struct starpu_task *starpu_st_prio_deque_deque_task_for_worker(starpu_st_prio_deque_t pdeque, int workerid, struct starpu_task **skipped); +struct starpu_task *starpu_st_prio_deque_deque_first_ready_task(starpu_st_prio_deque_t pdeque, unsigned workerid); + +struct starpu_task *starpu_st_prio_deque_pop_task(starpu_st_prio_deque_t pdeque); +struct starpu_task *starpu_st_prio_deque_highest_task(starpu_st_prio_deque_t pdeque); +struct starpu_task *starpu_st_prio_deque_pop_back_task(starpu_st_prio_deque_t pdeque); +int starpu_st_prio_deque_pop_this_task(starpu_st_prio_deque_t pdeque, int workerid, struct starpu_task *task); + +void starpu_st_prio_deque_erase(starpu_st_prio_deque_t pdeque, struct starpu_task *task); + +int starpu_st_normalize_prio(int priority, int num_priorities, unsigned sched_ctx_id); +int starpu_st_non_ready_buffers_count(struct starpu_task *task, unsigned worker); +void starpu_st_non_ready_buffers_size(struct starpu_task *task, unsigned worker, size_t *non_readyp, size_t *non_loadingp, size_t *non_allocatedp); + +/** @} */ + +#ifdef __cplusplus +} +#endif + +#endif /* __STARPU_SCHEDULER_TOOLBOX_FIFO_QUEUES_H__ */ diff --git a/include/starpu.h b/include/starpu.h new file mode 100644 index 0000000..88a8847 --- /dev/null +++ b/include/starpu.h @@ -0,0 +1,859 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2021-2021 Federal University of Rio Grande do Sul (UFRGS) + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __STARPU_H__ +#define __STARPU_H__ + +#include + +#ifndef _MSC_VER +#include +#else +#include +typedef unsigned char uint8_t; +typedef unsigned short uint16_t; +typedef unsigned int uint32_t; +typedef unsigned long long uint64_t; +typedef UINT_PTR uintptr_t; +typedef char int8_t; +typedef short int16_t; +typedef int int32_t; +typedef long long int64_t; +typedef INT_PTR intptr_t; +#endif + +#include + +#ifdef STARPU_HAVE_WINDOWS +#include +#endif + +#if defined(STARPU_USE_OPENCL) && !defined(__CUDACC__) && !defined(__HIPCC__) +#include +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifndef BUILDING_STARPU +#include +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + @defgroup API_Initialization_and_Termination Initialization and Termination + @{ +*/ + +/** + Structure passed to the starpu_init() function to configure StarPU. + It has to be initialized with starpu_conf_init(). When the default + value is used, StarPU automatically selects the number of + processing units and takes the default scheduling policy. The + environment variables overwrite the equivalent parameters unless + starpu_conf::precedence_over_environment_variables is set. +*/ +struct starpu_conf +{ + /** + @private + Will be initialized by starpu_conf_init(). Should not be + set by hand. + */ + int magic; + + /** + @private + Tell starpu_init() if MPI will be initialized later. + */ + int will_use_mpi; + + /** + Name of the scheduling policy. This can also be specified + with the environment variable \ref STARPU_SCHED. (default = + NULL). + */ + const char *sched_policy_name; + + /** + Definition of the scheduling policy. This field is ignored + if starpu_conf::sched_policy_name is set. + (default = NULL) + */ + struct starpu_sched_policy *sched_policy; + + /** + Callback function that can later be used by the scheduler. + The scheduler can retrieve this function by calling + starpu_sched_ctx_get_sched_policy_callback() + */ + void (*sched_policy_callback)(unsigned); + + /** + For all parameters specified in this structure that can + also be set with environment variables, by default, + StarPU chooses the value of the environment variable + against the value set in starpu_conf. Setting the parameter + starpu_conf::precedence_over_environment_variables to 1 allows to give precedence + to the value set in the structure over the environment + variable. + */ + int precedence_over_environment_variables; + + /** + Number of CPU cores that StarPU can use. This can also be + specified with the environment variable \ref STARPU_NCPU. + (default = \c -1) + */ + int ncpus; + + /** + Number of CPU cores to that StarPU should leave aside. They can then + be used by application threads, by calling starpu_get_next_bindid() to + get their ID, and starpu_bind_thread_on() to bind the current thread to them. + */ + int reserve_ncpus; + + /** + Number of CUDA devices that StarPU can use. This can also + be specified with the environment variable \ref + STARPU_NCUDA. + (default = \c -1) + */ + int ncuda; + + /** + Number of HIP devices that StarPU can use. This can also + be specified with the environment variable \ref + STARPU_NHIP. + (default = \c -1) + */ + int nhip; + + /** + Number of OpenCL devices that StarPU can use. This can also + be specified with the environment variable \ref + STARPU_NOPENCL. + (default = \c -1) + */ + int nopencl; + + /** + Number of Maxeler FPGA devices that StarPU can use. This can also + be specified with the environment variable \ref + STARPU_NMAX_FPGA. + (default = -1) + */ + int nmax_fpga; + + /** + Number of MPI Master Slave devices that StarPU can use. + This can also be specified with the environment variable + \ref STARPU_NMPI_MS. + (default = \c -1) + */ + int nmpi_ms; + + /** + Number of TCP/IP Master Slave devices that StarPU can use. + This can also be specified with the environment variable + \ref STARPU_NTCPIP_MS. + (default = \c -1) + */ + int ntcpip_ms; + + /** + If this flag is set, the starpu_conf::workers_bindid array + indicates where the different workers are bound, otherwise + StarPU automatically selects where to bind the different + workers. This can also be specified with the environment + variable \ref STARPU_WORKERS_CPUID. + (default = \c 0) + */ + unsigned use_explicit_workers_bindid; + + /** + If the starpu_conf::use_explicit_workers_bindid flag is + set, this array indicates where to bind the different + workers. The i-th entry of the starpu_conf::workers_bindid + indicates the logical identifier of the processor which + should execute the i-th worker. Note that the logical + ordering of the CPUs is either determined by the OS, or + provided by the \c hwloc library in case it is available. + */ + unsigned workers_bindid[STARPU_NMAXWORKERS]; + + /** + If this flag is set, the CUDA workers will be attached to + the CUDA devices specified in the + starpu_conf::workers_cuda_gpuid array. Otherwise, StarPU + affects the CUDA devices in a round-robin fashion. This can + also be specified with the environment variable \ref + STARPU_WORKERS_CUDAID. + (default = \c 0) + */ + unsigned use_explicit_workers_cuda_gpuid; + + /** + If the starpu_conf::use_explicit_workers_cuda_gpuid flag is + set, this array contains the logical identifiers of the + CUDA devices (as used by \c cudaGetDevice()). + */ + unsigned workers_cuda_gpuid[STARPU_NMAXWORKERS]; + + /** + If this flag is set, the HIP workers will be attached to + the HIP devices specified in the + starpu_conf::workers_hip_gpuid array. Otherwise, StarPU + affects the HIP devices in a round-robin fashion. This can + also be specified with the environment variable \ref + STARPU_WORKERS_HIPID. + (default = \c 0) + */ + unsigned use_explicit_workers_hip_gpuid; + + /** + If the starpu_conf::use_explicit_workers_hip_gpuid flag is + set, this array contains the logical identifiers of the + HIP devices (as used by \c hipGetDevice()). + */ + unsigned workers_hip_gpuid[STARPU_NMAXWORKERS]; + + /** + If this flag is set, the OpenCL workers will be attached to + the OpenCL devices specified in the + starpu_conf::workers_opencl_gpuid array. Otherwise, StarPU + affects the OpenCL devices in a round-robin fashion. This + can also be specified with the environment variable \ref + STARPU_WORKERS_OPENCLID. + (default = \c 0) + */ + unsigned use_explicit_workers_opencl_gpuid; + + /** + If the starpu_conf::use_explicit_workers_opencl_gpuid flag + is set, this array contains the logical identifiers of the + OpenCL devices to be used. + */ + unsigned workers_opencl_gpuid[STARPU_NMAXWORKERS]; + + /** + If this flag is set, the Maxeler FPGA workers will be attached to + the Maxeler FPGA devices specified in the + starpu_conf::workers_max_fpga_deviceid array. Otherwise, StarPU + affects the Maxeler FPGA devices in a round-robin fashion. This + can also be specified with the environment variable \ref + STARPU_WORKERS_MAX_FPGAID. + (default = 0) + */ + unsigned use_explicit_workers_max_fpga_deviceid; + + /** + If the starpu_conf::use_explicit_workers_max_fpga_deviceid flag + is set, this array contains the logical identifiers of the + Maxeler FPGA devices to be used. + */ + unsigned workers_max_fpga_deviceid[STARPU_NMAXWORKERS]; + +#ifdef STARPU_USE_MAX_FPGA + /** + This allows to specify the Maxeler file(s) to be loaded on Maxeler FPGAs. + This is an array of starpu_max_load, the last of which shall have + file set to NULL. In order to use all available devices, + starpu_max_load::engine_id_pattern can be set to "*", but only the + last non-NULL entry can be set so. + + If this is not set, it is assumed that the basic static SLiC + interface is used. + */ + struct starpu_max_load *max_fpga_load; +#else + void *max_fpga_load; +#endif + + /** + If this flag is set, the MPI Master Slave workers will be + attached to the MPI Master Slave devices specified in the + array starpu_conf::workers_mpi_ms_deviceid. Otherwise, + StarPU affects the MPI Master Slave devices in a + round-robin fashion. + (default = \c 0) + */ + unsigned use_explicit_workers_mpi_ms_deviceid; + + /** + If the flag + starpu_conf::use_explicit_workers_mpi_ms_deviceid is set, + the array contains the logical identifiers of the MPI + Master Slave devices to be used. + */ + unsigned workers_mpi_ms_deviceid[STARPU_NMAXWORKERS]; + + /** + If this flag is set, StarPU will recalibrate the bus. If + this value is equal to -1, the default value is used. This + can also be specified with the environment variable \ref + STARPU_BUS_CALIBRATE. + (default = \c 0) + */ + int bus_calibrate; + + /** + If this flag is set, StarPU will calibrate the performance + models when executing tasks. If this value is equal to -1, + the default value is used. If the value is equal to 1, it + will force continuing calibration. If the value is equal to + 2, the existing performance models will be overwritten. + This can also be specified with the environment variable + \ref STARPU_CALIBRATE. + (default = \c 0) + */ + int calibrate; + + /** + This flag should be set to 1 to enforce data locality when + choosing a worker to execute a task. + This can also be specified with the environment variable + \ref STARPU_DATA_LOCALITY_ENFORCE. + This can also be specified at compilation time by giving to + the configure script the option \ref + enable-data-locality-enforce "--enable-data-locality-enforce". + (default = \c 0) + */ + int data_locality_enforce; + + /** + By default, StarPU executes parallel tasks concurrently. + Some parallel libraries (e.g. most OpenMP implementations) + however do not support concurrent calls to parallel code. + In such case, setting this flag makes StarPU only start one + parallel task at a time (but other CPU and GPU tasks are + not affected and can be run concurrently). The parallel + task scheduler will however still try varying combined + worker sizes to look for the most efficient ones. + This can also be specified with the environment variable + \ref STARPU_SINGLE_COMBINED_WORKER. + (default = \c 0) + */ + int single_combined_worker; + + /** + This flag should be set to 1 to disable asynchronous copies + between CPUs and all accelerators. + The AMD implementation of OpenCL is known to fail when + copying data asynchronously. When using this + implementation, it is therefore necessary to disable + asynchronous data transfers. + This can also be specified with the environment variable + \ref STARPU_DISABLE_ASYNCHRONOUS_COPY. + This can also be specified at compilation time by giving to + the configure script the option \ref + disable-asynchronous-copy "--disable-asynchronous-copy". + (default = \c 0) + */ + int disable_asynchronous_copy; + + /** + This flag should be set to 1 to disable asynchronous copies + between CPUs and CUDA accelerators. + This can also be specified with the environment variable + \ref STARPU_DISABLE_ASYNCHRONOUS_CUDA_COPY. + This can also be specified at compilation time by giving to + the configure script the option \ref + disable-asynchronous-cuda-copy + "--disable-asynchronous-cuda-copy". + (default = \c 0) + */ + int disable_asynchronous_cuda_copy; + + /** + This flag should be set to 1 to disable asynchronous copies + between CPUs and HIP accelerators. + This can also be specified with the environment variable + \ref STARPU_DISABLE_ASYNCHRONOUS_HIP_COPY. + This can also be specified at compilation time by giving to + the configure script the option \ref + disable-asynchronous-hip-copy + "--disable-asynchronous-hip-copy". + (default = \c 0) + */ + int disable_asynchronous_hip_copy; + + /** + This flag should be set to 1 to disable asynchronous copies + between CPUs and OpenCL accelerators. + The AMD implementation of OpenCL is known to fail when + copying data asynchronously. When using this + implementation, it is therefore necessary to disable + asynchronous data transfers. + This can also be specified with the environment variable + \ref STARPU_DISABLE_ASYNCHRONOUS_OPENCL_COPY. + This can also be specified at compilation time by giving to + the configure script the option \ref + disable-asynchronous-opencl-copy + "--disable-asynchronous-opencl-copy". + (default = \c 0) + */ + int disable_asynchronous_opencl_copy; + + /** + This flag should be set to 1 to disable asynchronous copies + between CPUs and MPI Master Slave devices. + This can also be specified with the environment variable + \ref STARPU_DISABLE_ASYNCHRONOUS_MPI_MS_COPY. + This can also be specified at compilation time by giving to + the configure script the option \ref + disable-asynchronous-mpi-master-slave-copy + "--disable-asynchronous-mpi-master-slave-copy". + (default = \c 0). + */ + int disable_asynchronous_mpi_ms_copy; + + /** + This flag should be set to 1 to disable asynchronous copies + between CPUs and TCP/IP Master Slave devices. + This can also be specified with the environment variable + \ref STARPU_DISABLE_ASYNCHRONOUS_TCPIP_MS_COPY. + This can also be specified at compilation time by giving to + the configure script the option \ref + disable-asynchronous-tcpip-master-slave-copy + "--disable-asynchronous-tcpip-master-slave-copy". + (default = \c 0). + */ + int disable_asynchronous_tcpip_ms_copy; + + /** + This flag should be set to 1 to disable asynchronous copies + between CPUs and Maxeler FPGA devices. + This can also be specified with the environment variable + \ref STARPU_DISABLE_ASYNCHRONOUS_MAX_FPGA_COPY. + This can also be specified at compilation time by giving to + the configure script the option \ref + disable-asynchronous-fpga-copy + "--disable-asynchronous-fpga-copy". + (default = 0). + */ + int disable_asynchronous_max_fpga_copy; + + /** + This flag should be set to 1 to disable memory mapping + support between memory nodes. This can also be specified + with the environment variable \ref STARPU_ENABLE_MAP. + */ + int enable_map; + + /** + Enable CUDA/OpenGL interoperation on these CUDA devices. + This can be set to an array of CUDA device identifiers for + which \c cudaGLSetGLDevice() should be called instead of \c + cudaSetDevice(). Its size is specified by the + starpu_conf::n_cuda_opengl_interoperability field below + (default = NULL) + */ + unsigned *cuda_opengl_interoperability; + + /** + Size of the array starpu_conf::cuda_opengl_interoperability + */ + unsigned n_cuda_opengl_interoperability; + + /** + Array of drivers that should not be launched by StarPU. The + application will run in one of its own threads. + (default = NULL) + */ + struct starpu_driver *not_launched_drivers; + + /** + The number of StarPU drivers that should not be launched by + StarPU, i.e number of elements of the array + starpu_conf::not_launched_drivers. + (default = \c 0) + */ + unsigned n_not_launched_drivers; + + /** + Specify the buffer size used for FxT tracing. Starting from + FxT version 0.2.12, the buffer will automatically be + flushed when it fills in, but it may still be interesting + to specify a bigger value to avoid any flushing (which + would disturb the trace). + */ + uint64_t trace_buffer_size; + + /** + Set the minimum priority used by priorities-aware + schedulers. + This also can be specified with the environment variable \ref + STARPU_MIN_PRIO + */ + int global_sched_ctx_min_priority; + + /** + Set the maximum priority used by priorities-aware + schedulers. + This also can be specified with the environment variable \ref + STARPU_MAX_PRIO + */ + int global_sched_ctx_max_priority; + +#ifdef STARPU_WORKER_CALLBACKS + void (*callback_worker_going_to_sleep)(unsigned workerid); + void (*callback_worker_waking_up)(unsigned workerid); +#endif + + /** + Specify if StarPU should catch \c SIGINT, \c SIGSEGV and \c SIGTRAP + signals to make sure final actions (e.g dumping FxT trace + files) are done even though the application has crashed. By + default (value = \c 1), signals are caught. It should be + disabled on systems which already catch these signals for + their own needs (e.g JVM) + This can also be specified with the environment variable + \ref STARPU_CATCH_SIGNALS. + */ + int catch_signals; + + /** + Specify whether StarPU should automatically start to collect + performance counters after initialization + */ + unsigned start_perf_counter_collection; + + /** + Minimum spinning backoff of drivers (default = \c 1) + */ + unsigned driver_spinning_backoff_min; + + /** + Maximum spinning backoff of drivers. (default = \c 32) + */ + unsigned driver_spinning_backoff_max; + + /** + Specify if CUDA workers should do only fast allocations + when running the datawizard progress of + other memory nodes. This will pass the interval value + _STARPU_DATAWIZARD_ONLY_FAST_ALLOC to the allocation method. + Default value is 0, allowing CUDA workers to do slow + allocations. + This can also be specified with the environment variable + \ref STARPU_CUDA_ONLY_FAST_ALLOC_OTHER_MEMNODES. + */ + int cuda_only_fast_alloc_other_memnodes; +}; + +/** + Initialize the \p conf structure with the default values. In case + some configuration parameters are already specified through + environment variables, starpu_conf_init() initializes the fields of + \p conf according to the environment variables. + For instance if \ref STARPU_CALIBRATE is set, its value is put in + the field starpu_conf::calibrate of \p conf. + Upon successful completion, this function returns 0. Otherwise, + -EINVAL indicates that the argument was NULL. +*/ +int starpu_conf_init(struct starpu_conf *conf); + +/** + Set fields of \p conf so that no worker is enabled, i.e. set + starpu_conf::ncpus = 0, starpu_conf::ncuda = 0, etc. + + This allows to portably enable only a given type of worker: +
    + + starpu_conf_noworker(&conf);
    + conf.ncpus = -1; +
    + + See \ref ConfigurationAndInitialization for more details. +*/ +int starpu_conf_noworker(struct starpu_conf *conf); + +/** + StarPU initialization method, must be called prior to any other + StarPU call. It is possible to specify StarPU’s configuration (e.g. + scheduling policy, number of cores, ...) by passing a + non-NULL \p conf. Default configuration is used if \p conf + is NULL. Upon successful completion, this function returns + 0. Otherwise, -ENODEV indicates that no worker was available + (and thus StarPU was not initialized). See \ref SubmittingATask for more details. +*/ +int starpu_init(struct starpu_conf *conf) STARPU_WARN_UNUSED_RESULT; + +/** + Similar to starpu_init(), but also take the \p argc and \p argv as + defined by the application, which is necessary when running in + Simgrid mode or MPI Master Slave mode. + Do not call starpu_init() and starpu_initialize() in the same + program. See \ref SubmittingATask for more details. +*/ +int starpu_initialize(struct starpu_conf *user_conf, int *argc, char ***argv); + +/** + Return 1 if StarPU is already initialized. See \ref ConfigurationAndInitialization for more details. +*/ +int starpu_is_initialized(void); + +/** + Wait for starpu_init() call to finish. See \ref ConfigurationAndInitialization for more details. +*/ +void starpu_wait_initialized(void); + +/** + StarPU termination method, must be called at the end of the + application: statistics and other post-mortem debugging information + are not guaranteed to be available until this method has been + called. See \ref SubmittingATask for more details. +*/ +void starpu_shutdown(void); + +/** + Suspend the processing of new tasks by workers. It can be used in a + program where StarPU is used during only a part of the execution. + Without this call, the workers continue to poll for new tasks in a + tight loop, wasting CPU time. The symmetric call to starpu_resume() + should be used to unfreeze the workers. See \ref KernelThreadsStartedByStarPU and \ref PauseResume for more details. +*/ +void starpu_pause(void); + +/** + Symmetrical call to starpu_pause(), used to resume the workers + polling for new tasks. This would be typically called only once + having submitted all tasks. See \ref KernelThreadsStartedByStarPU and \ref PauseResume for more details. +*/ +void starpu_resume(void); + +/** + Return !0 if task processing by workers is currently paused, 0 otherwise. + See \ref StarPUEatsCPUs for more details. + */ +int starpu_is_paused(void); + +/** + Value to be passed to starpu_get_next_bindid() and + starpu_bind_thread_on() when binding a thread which will + significantly eat CPU time, and should thus have its own dedicated + CPU. +*/ +#define STARPU_THREAD_ACTIVE (1 << 0) + +/** + Return a PU binding ID which can be used to bind threads with + starpu_bind_thread_on(). \p flags can be set to + ::STARPU_THREAD_ACTIVE or 0. When \p npreferred is set to non-zero, + \p preferred is an array of size \p npreferred in which a + preference of PU binding IDs can be set. By default StarPU will + return the first PU available for binding. + See \ref KernelThreadsStartedByStarPU and \ref cpuWorkers for more details. +*/ +unsigned starpu_get_next_bindid(unsigned flags, unsigned *preferred, unsigned npreferred); + +/** + Bind the calling thread on the given \p cpuid (which should have + been obtained with starpu_get_next_bindid()). + + Return -1 if a thread was already bound to this PU (but binding + will still have been done, and a warning will have been printed), + so the caller can tell the user how to avoid the issue. + + \p name should be set to a unique string so that different calls + with the same name for the same \p cpuid does not produce a warning. + + See \ref KernelThreadsStartedByStarPU and \ref cpuWorkers for more details. +*/ +int starpu_bind_thread_on(int cpuid, unsigned flags, const char *name); + +/** + Bind the calling thread on the cores corresponding to the \p workerid . + + \p workerid can be a basic worker or a combined worker. + + This can be used e.g. before initializing a library which records at + initialization time the thread binding to be used when running kernels. + + See \ref KernelThreadsStartedByStarPU and \ref cpuWorkers for more details. +*/ +void starpu_bind_thread_on_worker(unsigned workerid); + +/** + Bind the calling thread back to the core reserved for the main thread. + + This can be used e.g. after initializing a library which records at + initialization time the thread binding to be used when running kernels. + + See \ref KernelThreadsStartedByStarPU and \ref cpuWorkers for more details. +*/ +void starpu_bind_thread_on_main(void); + +/** + Bind the calling thread on the given \p cpuid + + This can be used e.g. after initializing a library which records at + initialization time the thread binding to be used when running kernels. + + See \ref KernelThreadsStartedByStarPU and \ref cpuWorkers for more details. +*/ +void starpu_bind_thread_on_cpu(int cpuid); + +/** + Return the OS number of a given \p cpuid + + StarPU uses logical numbering (as define by hwloc) all along, but in case + interaction is needed with another binding tool that uses numbering as + defined by the OS, we need to convert from hwloc logical numbering to hwloc + physical numbering. +*/ +int starpu_cpu_os_index(int cpuid); + +/** + Print a description of the topology on \p f. + See \ref ConfigurationAndInitialization for more details. +*/ +void starpu_topology_print(FILE *f); + +/** + Return 1 if asynchronous data transfers between CPU and + accelerators are disabled. + See \ref Basic for more details. +*/ +int starpu_asynchronous_copy_disabled(void); + +/** + Return 1 if asynchronous data transfers between CPU and CUDA + accelerators are disabled. + See \ref cudaWorkers for more details. +*/ +int starpu_asynchronous_cuda_copy_disabled(void); + +/** + Return 1 if asynchronous data transfers between CPU and HIP + accelerators are disabled. + See \ref hipWorkers for more details. +*/ +int starpu_asynchronous_hip_copy_disabled(void); + +/** + Return 1 if asynchronous data transfers between CPU and OpenCL + accelerators are disabled. + See \ref openclWorkers for more details. +*/ +int starpu_asynchronous_opencl_copy_disabled(void); + +/** + Return 1 if asynchronous data transfers between CPU and Maxeler FPGA + devices are disabled. + See \ref maxfpgaWorkers for more details. +*/ +int starpu_asynchronous_max_fpga_copy_disabled(void); + +/** + Return 1 if asynchronous data transfers between CPU and MPI Slave + devices are disabled. + See \ref mpimsWorkers for more details. +*/ +int starpu_asynchronous_mpi_ms_copy_disabled(void); + +/** + Return 1 if asynchronous data transfers between CPU and TCP/IP Slave + devices are disabled. + See \ref tcpipmsWorkers for more details. +*/ +int starpu_asynchronous_tcpip_ms_copy_disabled(void); + +/** + Return 1 if asynchronous data transfers with a given kind of memory + are disabled. +*/ +int starpu_asynchronous_copy_disabled_for(enum starpu_node_kind kind); + +/** + Return 1 if memory mapping support between memory nodes is + enabled. + See \ref Basic for more details. +*/ +int starpu_map_enabled(void); + +/** + Call starpu_profiling_bus_helper_display_summary() and + starpu_profiling_worker_helper_display_summary(). + See \ref DataStatistics for more details. +*/ +void starpu_display_stats(void); + +/** @} */ + +/** + @defgroup API_Versioning Versioning + @{ +*/ + +/** + Return as 3 integers the version of StarPU used when running the + application. + See \ref ConfigurationAndInitialization for more details. +*/ +void starpu_get_version(int *major, int *minor, int *release); + +/** @} */ + +#ifdef __cplusplus +} +#endif + +#include "starpu_deprecated_api.h" + +#endif /* __STARPU_H__ */ diff --git a/include/starpu_bitmap.h b/include/starpu_bitmap.h new file mode 100644 index 0000000..3ff2cb4 --- /dev/null +++ b/include/starpu_bitmap.h @@ -0,0 +1,299 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Simon Archipoff + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __STARPU_BITMAP_H__ +#define __STARPU_BITMAP_H__ + +#include +#include + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + @defgroup API_Bitmap Bitmap + @brief This is the interface for the bitmap utilities provided by StarPU. + @{ +*/ +#ifndef _STARPU_LONG_BIT +#define _STARPU_LONG_BIT ((int)(sizeof(unsigned long) * 8)) +#endif + +#define _STARPU_BITMAP_SIZE ((STARPU_NMAXWORKERS - 1) / _STARPU_LONG_BIT) + 1 + +/** create a empty starpu_bitmap */ +static inline struct starpu_bitmap *starpu_bitmap_create(void) STARPU_ATTRIBUTE_MALLOC; +/** zero a starpu_bitmap */ +static inline void starpu_bitmap_init(struct starpu_bitmap *b); +/** free \p b */ +static inline void starpu_bitmap_destroy(struct starpu_bitmap *b); + +/** set bit \p e in \p b */ +static inline void starpu_bitmap_set(struct starpu_bitmap *b, int e); +/** unset bit \p e in \p b */ +static inline void starpu_bitmap_unset(struct starpu_bitmap *b, int e); +/** unset all bits in \p b */ +static inline void starpu_bitmap_unset_all(struct starpu_bitmap *b); + +/** return true iff bit \p e is set in \p b */ +static inline int starpu_bitmap_get(struct starpu_bitmap *b, int e); +/** Basically compute \c starpu_bitmap_unset_all(\p a) ; \p a = \p b & \p c; */ +static inline void starpu_bitmap_unset_and(struct starpu_bitmap *a, struct starpu_bitmap *b, struct starpu_bitmap *c); +/** Basically compute \p a |= \p b */ +static inline void starpu_bitmap_or(struct starpu_bitmap *a, struct starpu_bitmap *b); +/** return 1 iff \p e is set in \p b1 AND \p e is set in \p b2 */ +static inline int starpu_bitmap_and_get(struct starpu_bitmap *b1, struct starpu_bitmap *b2, int e); +/** return the number of set bits in \p b */ +static inline int starpu_bitmap_cardinal(struct starpu_bitmap *b); + +/** return the index of the first set bit of \p b, -1 if none */ +static inline int starpu_bitmap_first(struct starpu_bitmap *b); +/** return the position of the last set bit of \p b, -1 if none */ +static inline int starpu_bitmap_last(struct starpu_bitmap *b); +/** return the position of set bit right after \p e in \p b, -1 if none */ +static inline int starpu_bitmap_next(struct starpu_bitmap *b, int e); +/** todo */ +static inline int starpu_bitmap_has_next(struct starpu_bitmap *b, int e); + +/** @} */ + +/** + todo +*/ +struct starpu_bitmap +{ + unsigned long bits[_STARPU_BITMAP_SIZE]; + int cardinal; +}; + +#ifdef _STARPU_DEBUG_BITMAP +static int _starpu_check_bitmap(struct starpu_bitmap *b) +{ + int card = b->cardinal; + int i = starpu_bitmap_first(b); + int j; + for (j = 0; j < card; j++) + { + if (i == -1) + return 0; + int tmp = starpu_bitmap_next(b, i); + if (tmp == i) + return 0; + i = tmp; + } + if (i != -1) + return 0; + return 1; +} +#else +#define _starpu_check_bitmap(b) 1 +#endif + +static int _starpu_count_bit_static(unsigned long e) +{ +#if (__GNUC__ >= 4) || ((__GNUC__ == 3) && (__GNUC_MINOR__) >= 4) + return __builtin_popcountl(e); +#else + int c = 0; + while (e) + { + c += e & 1; + e >>= 1; + } + return c; +#endif +} + +static inline struct starpu_bitmap *starpu_bitmap_create(void) +{ + return (struct starpu_bitmap *)calloc(1, sizeof(struct starpu_bitmap)); +} + +static inline void starpu_bitmap_init(struct starpu_bitmap *b) +{ + memset(b, 0, sizeof(*b)); +} + +static inline void starpu_bitmap_destroy(struct starpu_bitmap *b) +{ + free(b); +} + +static inline void starpu_bitmap_set(struct starpu_bitmap *b, int e) +{ + if (!starpu_bitmap_get(b, e)) + b->cardinal++; + else + return; + STARPU_ASSERT(e / _STARPU_LONG_BIT < _STARPU_BITMAP_SIZE); + b->bits[e / _STARPU_LONG_BIT] |= (1ul << (e % _STARPU_LONG_BIT)); + STARPU_ASSERT(_starpu_check_bitmap(b)); +} +static inline void starpu_bitmap_unset(struct starpu_bitmap *b, int e) +{ + if (starpu_bitmap_get(b, e)) + b->cardinal--; + else + return; + STARPU_ASSERT(e / _STARPU_LONG_BIT < _STARPU_BITMAP_SIZE); + if (e / _STARPU_LONG_BIT > _STARPU_BITMAP_SIZE) + return; + b->bits[e / _STARPU_LONG_BIT] &= ~(1ul << (e % _STARPU_LONG_BIT)); + STARPU_ASSERT(_starpu_check_bitmap(b)); +} + +static inline void starpu_bitmap_unset_all(struct starpu_bitmap *b) +{ + memset(b->bits, 0, _STARPU_BITMAP_SIZE * sizeof(unsigned long)); +} + +static inline void starpu_bitmap_unset_and(struct starpu_bitmap *a, struct starpu_bitmap *b, struct starpu_bitmap *c) +{ + a->cardinal = 0; + int i; + for (i = 0; i < _STARPU_BITMAP_SIZE; i++) + { + a->bits[i] = b->bits[i] & c->bits[i]; + a->cardinal += _starpu_count_bit_static(a->bits[i]); + } +} + +static inline int starpu_bitmap_get(struct starpu_bitmap *b, int e) +{ + STARPU_ASSERT(e / _STARPU_LONG_BIT < _STARPU_BITMAP_SIZE); + if (e / _STARPU_LONG_BIT >= _STARPU_BITMAP_SIZE) + return 0; + return (b->bits[e / _STARPU_LONG_BIT] & (1ul << (e % _STARPU_LONG_BIT))) ? 1 : 0; +} + +static inline void starpu_bitmap_or(struct starpu_bitmap *a, struct starpu_bitmap *b) +{ + int i; + a->cardinal = 0; + for (i = 0; i < _STARPU_BITMAP_SIZE; i++) + { + a->bits[i] |= b->bits[i]; + a->cardinal += _starpu_count_bit_static(a->bits[i]); + } +} + +static inline int starpu_bitmap_and_get(struct starpu_bitmap *b1, struct starpu_bitmap *b2, int e) +{ + return starpu_bitmap_get(b1, e) && starpu_bitmap_get(b2, e); +} + +static inline int starpu_bitmap_cardinal(struct starpu_bitmap *b) +{ + return b->cardinal; +} + +static inline int _starpu_get_first_bit_rank(unsigned long ms) +{ + STARPU_ASSERT(ms != 0); +#if (__GNUC__ >= 4) || ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4)) + return __builtin_ffsl(ms) - 1; +#else + unsigned long m = 1ul; + int i = 0; + while (!(m & ms)) + i++, m <<= 1; + return i; +#endif +} + +static inline int _starpu_get_last_bit_rank(unsigned long l) +{ + STARPU_ASSERT(l != 0); +#if (__GNUC__ >= 4) || ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4)) + return 8 * sizeof(l) - __builtin_clzl(l); +#else + int ibit = _STARPU_LONG_BIT - 1; + while ((!(1ul << ibit)) & l) + ibit--; + STARPU_ASSERT(ibit >= 0); + return ibit; +#endif +} + +static inline int starpu_bitmap_first(struct starpu_bitmap *b) +{ + int i = 0; + while (i < _STARPU_BITMAP_SIZE && !b->bits[i]) + i++; + if (i == _STARPU_BITMAP_SIZE) + return -1; + int nb_long = i; + unsigned long ms = b->bits[i]; + + return (nb_long * _STARPU_LONG_BIT) + _starpu_get_first_bit_rank(ms); +} + +static inline int starpu_bitmap_has_next(struct starpu_bitmap *b, int e) +{ + int nb_long = (e + 1) / _STARPU_LONG_BIT; + int nb_bit = (e + 1) % _STARPU_LONG_BIT; + unsigned long mask = (~0ul) << nb_bit; + if (b->bits[nb_long] & mask) + return 1; + for (nb_long++; nb_long < _STARPU_BITMAP_SIZE; nb_long++) + if (b->bits[nb_long]) + return 1; + return 0; +} + +static inline int starpu_bitmap_last(struct starpu_bitmap *b) +{ + if (b->cardinal == 0) + return -1; + int ilong; + for (ilong = _STARPU_BITMAP_SIZE - 1; ilong >= 0; ilong--) + { + if (b->bits[ilong]) + break; + } + STARPU_ASSERT(ilong >= 0); + unsigned long l = b->bits[ilong]; + return ilong * _STARPU_LONG_BIT + _starpu_get_last_bit_rank(l); +} + +static inline int starpu_bitmap_next(struct starpu_bitmap *b, int e) +{ + int nb_long = e / _STARPU_LONG_BIT; + int nb_bit = e % _STARPU_LONG_BIT; + unsigned long rest = nb_bit == _STARPU_LONG_BIT - 1 ? 0 : (~0ul << (nb_bit + 1)) & b->bits[nb_long]; + if (nb_bit != (_STARPU_LONG_BIT - 1) && rest) + { + int i = _starpu_get_first_bit_rank(rest); + STARPU_ASSERT(i >= 0 && i < _STARPU_LONG_BIT); + return (nb_long * _STARPU_LONG_BIT) + i; + } + + for (nb_long++; nb_long < _STARPU_BITMAP_SIZE; nb_long++) + if (b->bits[nb_long]) + return nb_long * _STARPU_LONG_BIT + _starpu_get_first_bit_rank(b->bits[nb_long]); + return -1; +} + +#ifdef __cplusplus +} +#endif + +#endif /* __STARPU_BITMAP_H__ */ diff --git a/include/starpu_bound.h b/include/starpu_bound.h new file mode 100644 index 0000000..d7d90fd --- /dev/null +++ b/include/starpu_bound.h @@ -0,0 +1,97 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __STARPU_BOUND_H__ +#define __STARPU_BOUND_H__ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + @defgroup API_Theoretical_Lower_Bound_on_Execution_Time Theoretical Lower Bound on Execution Time + @brief Compute theoretical upper computation efficiency bound corresponding to some actual execution. + @{ +*/ + +/** + Start recording tasks (resets stats). \p deps tells whether + dependencies should be recorded too (this is quite expensive) + + See \ref TheoreticalLowerBoundOnExecutionTime for more details. +*/ +void starpu_bound_start(int deps, int prio); + +/** + Stop recording tasks + + See \ref TheoreticalLowerBoundOnExecutionTime for more details. +*/ +void starpu_bound_stop(void); + +/** + Emit the DAG that was recorded on \p output. + + See \ref TheoreticalLowerBoundOnExecutionTime for more details. +*/ +void starpu_bound_print_dot(FILE *output); + +/** + Get theoretical upper bound (in ms) (needs glpk support detected by + configure script). It returns 0 if some performance models are not + calibrated. \p integer permits to choose between integer solving + (which takes a long time but is correct), and relaxed solving + (which provides an approximate solution). + + See \ref TheoreticalLowerBoundOnExecutionTime for more details. +*/ +void starpu_bound_compute(double *res, double *integer_res, int integer); + +/** + Emit the Linear Programming system on \p output for the recorded + tasks, in the lp format + + See \ref TheoreticalLowerBoundOnExecutionTime for more details. +*/ +void starpu_bound_print_lp(FILE *output); + +/** + Emit the Linear Programming system on \p output for the recorded + tasks, in the mps format + + See \ref TheoreticalLowerBoundOnExecutionTime for more details. +*/ +void starpu_bound_print_mps(FILE *output); + +/** + Emit on \p output the statistics of actual execution vs theoretical + upper bound. \p integer permits to choose between integer solving + (which takes a long time but is correct), and relaxed solving + (which provides an approximate solution). + + See \ref TheoreticalLowerBoundOnExecutionTime for more details. +*/ +void starpu_bound_print(FILE *output, int integer); + +/** @} */ + +#ifdef __cplusplus +} +#endif + +#endif /* __STARPU_BOUND_H__ */ diff --git a/include/starpu_config.h.in b/include/starpu_config.h.in new file mode 100644 index 0000000..a9f1b81 --- /dev/null +++ b/include/starpu_config.h.in @@ -0,0 +1,393 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2020-2020 Federal University of Rio Grande do Sul (UFRGS) + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * This is the public config.h file, installed along libstarpu. + * + * It should only contain the build-time #defines which have an effect on the + * API & ABI. + */ + +#ifndef __STARPU_CONFIG_PUBLIC_H__ +#define __STARPU_CONFIG_PUBLIC_H__ + +/** + Define the major version of StarPU. This is the version used when + compiling the application. + @ingroup API_Versioning +*/ +#undef STARPU_MAJOR_VERSION + +/** + @ingroup API_Versioning + Define the minor version of StarPU. This is the version used when + compiling the application. +*/ +#undef STARPU_MINOR_VERSION + +/** + Define the release version of StarPU. This is the version used when + compiling the application. + @ingroup API_Versioning +*/ +#undef STARPU_RELEASE_VERSION + +#undef STARPU_USE_CPU + +/** + Defined when StarPU has been installed with + CUDA support. It should be used in your code to detect the + availability of CUDA. + @ingroup API_CUDA_Extensions +*/ +#undef STARPU_USE_CUDA + +/** + Defined when StarPU is testing the CUDA0 driver. +*/ +#undef STARPU_USE_CUDA0 + +/** + Defined when StarPU is testing the CUDA1 driver. +*/ +#undef STARPU_USE_CUDA1 + +/** + Defined when StarPU has been installed with + HIP support. It should be used in your code to detect the + availability of HIP. + @ingroup API_HIP_Extensions +*/ +#undef STARPU_USE_HIP + +/** + Defined when StarPU has been installed with + HIP BLAS support. It should be used in your code to detect the + availability of HIP BLAS. + @ingroup API_HIP_Extensions +*/ +#undef STARPU_USE_HIPBLAS + +/** + Defined when StarPU has been installed with + NVidia-ML support. It should be used in your code to detect the + availability of NVML-related functions. + @ingroup API_CUDA_Extensions +*/ +#undef STARPU_HAVE_NVML_H + +/** + Defined when StarPU has been installed with OpenCL support. It + should be used in your code to detect the availability of OpenCL as + shown in \ref FullSourceCodeVectorScal. + @ingroup API_OpenCL_Extensions +*/ +#undef STARPU_USE_OPENCL + +/** + Defined when StarPU has been installed with FPGA support. It should + be used in your code to detect the availability of FPGA. + @ingroup API_Max_FPGA_Extensions +*/ +#undef STARPU_USE_MAX_FPGA + +/** + Defined when StarPU has been installed with MPI Master Slave + support. It should be used in your code to detect the availability + of MPI Master Slave. + @ingroup API_MPI_Support +*/ +#undef STARPU_USE_MPI_MASTER_SLAVE + +/** + Defined when StarPU has been installed with TCP/IP Master Slave + support. It should be used in your code to detect the availability + of TCP/IP Master Slave. +*/ +#undef STARPU_USE_TCPIP_MASTER_SLAVE + +/** + Defined when StarPU has been installed with OpenMP Runtime support. + It should be used in your code to detect the availability of the + runtime support for OpenMP. + @ingroup API_OpenMP_Runtime_Support +*/ +#undef STARPU_OPENMP + +#undef STARPU_BUBBLE +#undef STARPU_PARALLEL_WORKER + +#undef STARPU_SIMGRID +#undef STARPU_SIMGRID_MC +#undef STARPU_SIMGRID_HAVE_XBT_BARRIER_INIT +#undef STARPU_HAVE_SIMGRID_MSG_H +#undef STARPU_HAVE_MSG_MSG_H +#undef STARPU_HAVE_SIMGRID_ACTOR_H +#undef STARPU_HAVE_SIMGRID_SEMAPHORE_H +#undef STARPU_HAVE_SIMGRID_MUTEX_H +#undef STARPU_HAVE_SIMGRID_COND_H +#undef STARPU_HAVE_SIMGRID_BARRIER_H +#undef STARPU_HAVE_XBT_SYNCHRO_H +#undef STARPU_HAVE_VALGRIND_H +#undef STARPU_HAVE_MEMCHECK_H +#undef STARPU_VALGRIND_FULL +#undef STARPU_SANITIZE_LEAK +#undef STARPU_NON_BLOCKING_DRIVERS +/* workers must call callbacks on sleep/wake-up */ +#undef STARPU_WORKER_CALLBACKS + +#undef STARPU_HAVE_ICC + +/** + Defined when StarPU has been installed with MPI support. It should + be used in your code to detect the availability of MPI. + @ingroup API_MPI_Support +*/ +#undef STARPU_USE_MPI +#undef STARPU_USE_MPI_MPI +#undef STARPU_USE_MPI_NMAD +#undef STARPU_USE_MPI_FT +#undef STARPU_USE_MPI_FT_STATS + +#undef STARPU_ATLAS +#undef STARPU_GOTO +#undef STARPU_OPENBLAS +#undef STARPU_MKL +#undef STARPU_ARMPL +#undef STARPU_SYSTEM_BLAS +#undef STARPU_HAVE_CBLAS_H +#undef STARPU_HAVE_BLAS + +/** + Define the directory in which the OpenCL codelets of the + applications provided with StarPU have been installed. + @ingroup API_OpenCL_Extensions +*/ +#undef STARPU_OPENCL_DATADIR +#undef STARPU_HAVE_LIBCUBLASLT +#undef STARPU_HAVE_LIBCUSPARSE +#undef STARPU_HAVE_LIBCUSOLVER +#undef STARPU_HAVE_MAGMA + +#undef STARPU_OPENGL_RENDER +#undef STARPU_USE_GTK +#undef STARPU_HAVE_X11 +#undef STARPU_PAPI + +#undef STARPU_HAVE_POSIX_MEMALIGN + +#undef STARPU_HAVE_MEMALIGN + +#undef STARPU_HAVE_MALLOC_H + +#undef STARPU_HAVE_SYNC_BOOL_COMPARE_AND_SWAP +#undef STARPU_HAVE_SYNC_BOOL_COMPARE_AND_SWAP_8 +#undef STARPU_HAVE_SYNC_VAL_COMPARE_AND_SWAP +#undef STARPU_HAVE_SYNC_VAL_COMPARE_AND_SWAP_8 +#undef STARPU_HAVE_SYNC_FETCH_AND_ADD +#undef STARPU_HAVE_SYNC_FETCH_AND_ADD_8 +#undef STARPU_HAVE_SYNC_FETCH_AND_OR +#undef STARPU_HAVE_SYNC_FETCH_AND_OR_8 +#undef STARPU_HAVE_SYNC_LOCK_TEST_AND_SET +#undef STARPU_HAVE_ATOMIC_COMPARE_EXCHANGE_N +#undef STARPU_HAVE_ATOMIC_COMPARE_EXCHANGE_N_8 +#undef STARPU_HAVE_ATOMIC_EXCHANGE_N +#undef STARPU_HAVE_ATOMIC_EXCHANGE_N_8 +#undef STARPU_HAVE_ATOMIC_FETCH_ADD +#undef STARPU_HAVE_ATOMIC_FETCH_ADD_8 +#undef STARPU_HAVE_ATOMIC_FETCH_OR +#undef STARPU_HAVE_ATOMIC_FETCH_OR_8 +#undef STARPU_HAVE_ATOMIC_TEST_AND_SET +#undef STARPU_HAVE_SYNC_SYNCHRONIZE + +#undef STARPU_DEVEL +#undef STARPU_MODEL_DEBUG +#undef STARPU_NO_ASSERT +#undef STARPU_DEBUG +#undef STARPU_VERBOSE +#undef STARPU_GDB_PATH + +#undef STARPU_HAVE_FFTW +#undef STARPU_HAVE_FFTWF +#undef STARPU_HAVE_FFTWL +#undef STARPU_HAVE_CUFFTDOUBLECOMPLEX + +#undef STARPU_HAVE_CURAND + +/** + Define the maximum number of memory nodes managed by StarPU. The + default value can be modified at configure by using the option \ref + enable-maxnodes "--enable-maxnodes". Reducing it allows to + considerably reduce memory used by StarPU data structures. + @ingroup API_Workers +*/ +#undef STARPU_MAXNODES + +/** + Define the maximum number of buffers that tasks will be able to + take as parameters. The default value is 8, it can be changed by + using the configure option \ref enable-maxbuffers + "--enable-maxbuffers". + @ingroup API_Codelet_And_Tasks +*/ +#undef STARPU_NMAXBUFS + +/** + Define the maximum number of fxt mpi files that can be read when + generating traces. The default value is 64, it can be changed by + using the configure option \ref enable-fxt-max-files + "--enable-fxt-max-files". + @ingroup API_MPI_Support +*/ +#undef STARPU_FXT_MAX_FILES + +/** + Define the maximum number of CPU workers managed by StarPU. The + default value can be modified at configure by using the option \ref + enable-maxcpus "--enable-maxcpus". + @ingroup API_Workers +*/ +#undef STARPU_MAXCPUS + +/** + Define the maximum number of NUMA nodes managed by StarPU. The + default value can be modified at configure by using the option \ref + enable-maxnumanodes "--enable-maxnumanodes". + @ingroup API_Workers +*/ +#undef STARPU_MAXNUMANODES + +/** + Define the maximum number of CUDA devices that are supported by StarPU. + @ingroup API_CUDA_Extensions +*/ +#undef STARPU_MAXCUDADEVS + +/** + Define the maximum number of OpenCL devices that are supported by + StarPU. + @ingroup API_OpenCL_Extensions +*/ +#undef STARPU_MAXOPENCLDEVS + +/** + Define the maximum number of Maxeler FPGA devices that are supported by + StarPU. + @ingroup API_Max_FPGA_Extensions + */ +#undef STARPU_MAXMAXFPGADEVS + +/** + Define the maximum number of HIP devices that are supported by + StarPU. + @ingroup API_HIP_Extensions + */ +#undef STARPU_MAXHIPDEVS + +/** + Define the maximum number of workers managed by StarPU. + @ingroup API_Workers +*/ +#undef STARPU_NMAXWORKERS + +/** + Define the maximum number of scheduling contexts managed by StarPU. + The default value can be modified at configure by using the option + \ref enable-max-sched-ctxs "--enable-max-sched-ctxs". + @ingroup API_Scheduling_Policy +*/ +#undef STARPU_NMAX_SCHED_CTXS + +/** + Define the maximum number of implementations per architecture. The + default value can be modified at configure by using the option \ref + enable-maximplementations "--enable-maximplementations". + @ingroup API_Scheduling_Policy +*/ +#undef STARPU_MAXIMPLEMENTATIONS + +#undef STARPU_USE_SC_HYPERVISOR +#undef STARPU_SC_HYPERVISOR_DEBUG +#undef STARPU_HAVE_GLPK_H + +#undef STARPU_HAVE_CUDA_MEMCPY_PEER +#undef STARPU_HAVE_LIBNUMA + +#undef STARPU_HAVE_WINDOWS +#undef STARPU_LINUX_SYS +#undef STARPU_HAVE_SETENV +#undef STARPU_HAVE_UNSETENV +#undef STARPU_HAVE_UNISTD_H +#undef STARPU_HAVE_HDF5 + +#undef STARPU_HAVE_MPI_COMM_CREATE_GROUP + +#undef STARPU_USE_FXT +#undef STARPU_FXT_LOCK_TRACES + +#ifdef _MSC_VER +typedef long starpu_ssize_t; +#define __starpu_func__ __FUNCTION__ +#else +#include +typedef ssize_t starpu_ssize_t; +#define __starpu_func__ __func__ +#endif + +#if defined(c_plusplus) || defined(__cplusplus) +/* inline is part of C++ */ +#define __starpu_inline inline +#elif defined(_MSC_VER) || defined(__HP_cc) +#define __starpu_inline __inline +#else +#define __starpu_inline __inline__ +#endif + +#undef STARPU_QUICK_CHECK +#undef STARPU_LONG_CHECK +#undef STARPU_USE_DRAND48 +#undef STARPU_USE_ERAND48_R +#undef STARPU_HAVE_NEARBYINTF +#undef STARPU_HAVE_RINTF + +#undef STARPU_HAVE_HWLOC +#undef STARPU_HAVE_PTHREAD_SPIN_LOCK +#undef STARPU_HAVE_PTHREAD_BARRIER +#undef STARPU_HAVE_PTHREAD_SETNAME_NP +#undef STARPU_HAVE_STRUCT_TIMESPEC +#undef STARPU_PTHREAD_MUTEX_INITIALIZER_ZERO +#undef STARPU_PTHREAD_COND_INITIALIZER_ZERO +#undef STARPU_PTHREAD_RWLOCK_INITIALIZER_ZERO + +/** This is only for building examples */ +#undef STARPU_HAVE_HELGRIND_H + +/** Enable Fortran to C MPI interface */ +#undef HAVE_MPI_COMM_F2C + +#undef STARPU_HAVE_DARWIN + +#undef STARPU_HAVE_CXX11 +#undef STARPU_HAVE_STRERROR_R +#undef STARPU_HAVE_STATEMENT_EXPRESSIONS +#undef STARPU_PERF_MODEL_DIR + +#undef STARPU_PYTHON_HAVE_NUMPY + +#undef STARPU_PROF_TOOL + +#endif diff --git a/include/starpu_cublas.h b/include/starpu_cublas.h new file mode 100644 index 0000000..46c8e8d --- /dev/null +++ b/include/starpu_cublas.h @@ -0,0 +1,62 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __STARPU_CUBLAS_H__ +#define __STARPU_CUBLAS_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + @ingroup API_CUDA_Extensions + @{ +*/ + +/** + Initialize CUBLAS on every CUDA device. The + CUBLAS library must be initialized prior to any CUBLAS call. Calling + starpu_cublas_init() will initialize CUBLAS on every CUDA device + controlled by StarPU. This call blocks until CUBLAS has been properly + initialized on every device. See \ref CUDA-specificOptimizations for more details. +*/ +void starpu_cublas_init(void); + +/** + Set the proper CUBLAS stream for CUBLAS v1. This must be called + from the CUDA codelet before calling CUBLAS v1 kernels, so that + they are queued on the proper CUDA stream. When using one thread + per CUDA worker, this function does not do anything since the + CUBLAS stream does not change, and is set once by + starpu_cublas_init(). + See \ref CUDA-specificOptimizations for more details. +*/ +void starpu_cublas_set_stream(void); + +/** + Synchronously deinitialize the CUBLAS library on + every CUDA device. + See \ref CUDA-specificOptimizations for more details. +*/ +void starpu_cublas_shutdown(void); + +/** @} */ + +#ifdef __cplusplus +} +#endif + +#endif /* __STARPU_CUBLAS_H__ */ diff --git a/include/starpu_cublasLt.h b/include/starpu_cublasLt.h new file mode 100644 index 0000000..25971d0 --- /dev/null +++ b/include/starpu_cublasLt.h @@ -0,0 +1,65 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __STARPU_CUBLASLT_H__ +#define __STARPU_CUBLASLT_H__ + +#ifdef STARPU_USE_CUDA +#ifdef STARPU_HAVE_LIBCUBLASLT +#include +#endif +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/** + @ingroup API_CUDA_Extensions + @{ +*/ + +/** + Initialize CUBLASLT on every CUDA device + controlled by StarPU. This call blocks until CUBLASLT has been properly + initialized on every device. See \ref CUDA-specificOptimizations for more details. +*/ +void starpu_cublasLt_init(void); + +/** + Synchronously deinitialize the CUBLASLT library on + every CUDA device. See \ref CUDA-specificOptimizations for more details. +*/ +void starpu_cublasLt_shutdown(void); + +#ifdef STARPU_USE_CUDA +#ifdef STARPU_HAVE_LIBCUBLASLT +/** + Return the CUBLASLT handle to be used to queue CUBLASLT + kernels. It is properly initialized and configured for multistream by + starpu_cublasLt_init(). See \ref CUDA-specificOptimizations for more details. +*/ +cublasLtHandle_t starpu_cublasLt_get_local_handle(void); +#endif +#endif + +/** @} */ + +#ifdef __cplusplus +} +#endif + +#endif /* __STARPU_CUBLASLT_H__ */ diff --git a/include/starpu_cublas_v2.h b/include/starpu_cublas_v2.h new file mode 100644 index 0000000..ed73a6f --- /dev/null +++ b/include/starpu_cublas_v2.h @@ -0,0 +1,48 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __STARPU_CUBLAS_V2_H__ +#define __STARPU_CUBLAS_V2_H__ + +#ifdef STARPU_USE_CUDA + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + @ingroup API_CUDA_Extensions + @{ +*/ + +/** + Return the CUBLAS handle to be used to queue CUBLAS kernels. It + is properly initialized and configured for multistream by + starpu_cublas_init(). See \ref CUDA-specificOptimizations for more details. +*/ +cublasHandle_t starpu_cublas_get_local_handle(void); + +/** @} */ + +#ifdef __cplusplus +} +#endif + +#endif + +#endif /* __STARPU_CUBLAS_V2_H__ */ diff --git a/include/starpu_cuda.h b/include/starpu_cuda.h new file mode 100644 index 0000000..ac6b610 --- /dev/null +++ b/include/starpu_cuda.h @@ -0,0 +1,162 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __STARPU_CUDA_H__ +#define __STARPU_CUDA_H__ + +#include + +#ifdef STARPU_USE_CUDA +#include +#include +#include + +#ifdef STARPU_HAVE_NVML_H +#include +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/** + @defgroup API_CUDA_Extensions CUDA Extensions + @{ +*/ + +/** + Report a CUBLAS error. + See \ref CUDASupport for more details. +*/ +void starpu_cublas_report_error(const char *func, const char *file, int line, int status); + +/** + Call starpu_cublas_report_error(), passing the current function, file and line position. +*/ +#define STARPU_CUBLAS_REPORT_ERROR(status) starpu_cublas_report_error(__starpu_func__, __FILE__, __LINE__, status) + +/** + Report a CUDA error. + See \ref CUDASupport for more details. +*/ +void starpu_cuda_report_error(const char *func, const char *file, int line, cudaError_t status); + +/** + Call starpu_cuda_report_error(), passing the current function, file and line position. +*/ +#define STARPU_CUDA_REPORT_ERROR(status) starpu_cuda_report_error(__starpu_func__, __FILE__, __LINE__, status) + +/** + Return the current worker’s CUDA stream. StarPU provides a stream + for every CUDA device controlled by StarPU. This function is only + provided for convenience so that programmers can easily use + asynchronous operations within codelets without having to create a + stream by hand. Note that the application is not forced to use the + stream provided by starpu_cuda_get_local_stream() and may also + create its own streams. Synchronizing with + cudaDeviceSynchronize() is allowed, but will reduce the + likelihood of having all transfers overlapped. + See \ref CUDA-specificOptimizations for more details. +*/ +cudaStream_t starpu_cuda_get_local_stream(void); + +/** + Return a pointer to device properties for worker \p workerid + (assumed to be a CUDA worker). See \ref EnablingImplementationAccordingToCapabilities for more details. +*/ +const struct cudaDeviceProp *starpu_cuda_get_device_properties(unsigned workerid); + +/** + Copy \p ssize bytes from the pointer \p src_ptr on \p src_node + to the pointer \p dst_ptr on \p dst_node. The function first tries to + copy the data asynchronous (unless \p stream is NULL). If the + asynchronous copy fails or if \p stream is NULL, it copies the + data synchronously. The function returns -EAGAIN if the + asynchronous launch was successful. It returns 0 if the synchronous + copy was successful, or fails otherwise. + + See \ref CUDASupport for more details. +*/ +int starpu_cuda_copy_async_sync(void *src_ptr, unsigned src_node, void *dst_ptr, unsigned dst_node, size_t ssize, cudaStream_t stream, enum cudaMemcpyKind kind); + +/** + Copy \p numblocks blocks of \p blocksize bytes from the pointer \p src_ptr on + \p src_node to the pointer \p dst_ptr on \p dst_node. + + The blocks start at addresses which are ld_src (resp. ld_dst) bytes apart in + the source (resp. destination) interface. + + The function first tries to copy the data asynchronous (unless \p stream is + NULL). If the asynchronous copy fails or if \p stream is NULL, + it copies the data synchronously. The function returns -EAGAIN if the + asynchronous launch was successful. It returns 0 if the synchronous copy was + successful, or fails otherwise. + + See \ref CUDASupport for more details. +*/ +int starpu_cuda_copy2d_async_sync(void *src_ptr, unsigned src_node, void *dst_ptr, unsigned dst_node, + size_t blocksize, + size_t numblocks, size_t ld_src, size_t ld_dst, + cudaStream_t stream, enum cudaMemcpyKind kind); + +/** + Copy \p numblocks_1 * \p numblocks_2 blocks of \p blocksize bytes from the + pointer \p src_ptr on \p src_node to the pointer \p dst_ptr on \p dst_node. + + The blocks are grouped by \p numblocks_1 blocks whose start addresses are + ld1_src (resp. ld1_dst) bytes apart in the source (resp. destination) + interface. + + The function first tries to copy the data asynchronous (unless \p stream is + NULL). If the asynchronous copy fails or if \p stream is NULL, + it copies the data synchronously. The function returns -EAGAIN if the + asynchronous launch was successful. It returns 0 if the synchronous copy was + successful, or fails otherwise. + + See \ref CUDASupport for more details. +*/ +int starpu_cuda_copy3d_async_sync(void *src_ptr, unsigned src_node, void *dst_ptr, unsigned dst_node, + size_t blocksize, + size_t numblocks_1, size_t ld1_src, size_t ld1_dst, + size_t numblocks_2, size_t ld2_src, size_t ld2_dst, + cudaStream_t stream, enum cudaMemcpyKind kind); + +/** + Call cudaSetDevice(\p devid) or cudaGLSetGLDevice(\p devid), + according to whether \p devid is among the field + starpu_conf::cuda_opengl_interoperability. + + See \ref CUDASupport for more details. +*/ +void starpu_cuda_set_device(unsigned devid); + +#ifdef STARPU_HAVE_NVML_H +/** + Return the nvml device for a CUDA device + See \ref CUDASupport for more details. +*/ +nvmlDevice_t starpu_cuda_get_nvmldev(unsigned devid); +#endif + +/** @} */ + +#ifdef __cplusplus +} +#endif + +#endif /* STARPU_USE_CUDA */ + +#endif /* __STARPU_CUDA_H__ */ diff --git a/include/starpu_cusolver.h b/include/starpu_cusolver.h new file mode 100644 index 0000000..834de47 --- /dev/null +++ b/include/starpu_cusolver.h @@ -0,0 +1,99 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __STARPU_CUSOLVER_H__ +#define __STARPU_CUSOLVER_H__ + +#ifdef STARPU_USE_CUDA +#include +#include +#include +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/** + @ingroup API_CUDA_Extensions + @{ +*/ + +/** + Initialize CUSOLVER on every CUDA device + controlled by StarPU. This call blocks until CUSOLVER has been properly + initialized on every device. + + See \ref CUDA-specificOptimizations +*/ +void starpu_cusolver_init(void); + +/** + Synchronously deinitialize the CUSOLVER library on + every CUDA device. + + See \ref CUDA-specificOptimizations +*/ +void starpu_cusolver_shutdown(void); + +#ifdef STARPU_USE_CUDA +/** + Return the CUSOLVER Dense handle to be used to queue CUSOLVER + kernels. It is properly initialized and configured for multistream by + starpu_cusolver_init(). + + See \ref CUDA-specificOptimizations +*/ +cusolverDnHandle_t starpu_cusolverDn_get_local_handle(void); + +/** + Return the CUSOLVER Sparse handle to be used to queue CUSOLVER + kernels. It is properly initialized and configured for multistream by + starpu_cusolver_init(). + + See \ref CUDA-specificOptimizations +*/ +cusolverSpHandle_t starpu_cusolverSp_get_local_handle(void); + +/** + Return the CUSOLVER Refactorization handle to be used to queue CUSOLVER + kernels. It is properly initialized and configured for multistream by + starpu_cusolver_init(). + + See \ref CUDA-specificOptimizations +*/ +cusolverRfHandle_t starpu_cusolverRf_get_local_handle(void); + +/** + Report a CUSOLVER error. + See \ref CUDASupport for more details. +*/ +void starpu_cusolver_report_error(const char *func, const char *file, int line, cusolverStatus_t status); + +/** + Call starpu_cusolver_report_error(), passing the current function, file and line position. +*/ +#define STARPU_CUSOLVER_REPORT_ERROR(status) starpu_cusolver_report_error(__starpu_func__, __FILE__, __LINE__, status) + +#endif + +/** @} */ + +#ifdef __cplusplus +} +#endif + +#endif /* __STARPU_CUSOLVER_H__ */ diff --git a/include/starpu_cusparse.h b/include/starpu_cusparse.h new file mode 100644 index 0000000..6459127 --- /dev/null +++ b/include/starpu_cusparse.h @@ -0,0 +1,61 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __STARPU_CUSPARSE_H__ +#define __STARPU_CUSPARSE_H__ + +#ifdef STARPU_USE_CUDA +#include +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/** + @ingroup API_CUDA_Extensions + @{ +*/ + +/** + Initialize CUSPARSE on every CUDA device + controlled by StarPU. This call blocks until CUSPARSE has been properly + initialized on every device. See \ref CUDA-specificOptimizations for more details. +*/ +void starpu_cusparse_init(void); + +/** + Synchronously deinitialize the CUSPARSE library on + every CUDA device. See \ref CUDA-specificOptimizations for more details. +*/ +void starpu_cusparse_shutdown(void); + +#ifdef STARPU_USE_CUDA +/** + Return the CUSPARSE handle to be used to queue CUSPARSE + kernels. It is properly initialized and configured for multistream by + starpu_cusparse_init(). See \ref CUDA-specificOptimizations for more details. +*/ +cusparseHandle_t starpu_cusparse_get_local_handle(void); +#endif + +/** @} */ + +#ifdef __cplusplus +} +#endif + +#endif /* __STARPU_CUSPARSE_H__ */ diff --git a/include/starpu_data.h b/include/starpu_data.h new file mode 100644 index 0000000..a1d8874 --- /dev/null +++ b/include/starpu_data.h @@ -0,0 +1,727 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2021-2021 Federal University of Rio Grande do Sul (UFRGS) + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __STARPU_DATA_H__ +#define __STARPU_DATA_H__ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + @defgroup API_Data_Management Data Management + @brief Data management facilities provided by StarPU. We show how + to use existing data interfaces in \ref API_Data_Interfaces, but + developers can design their own data interfaces if required. + @{ +*/ + +struct _starpu_data_state; + +/** + StarPU uses ::starpu_data_handle_t as an opaque handle to manage a + piece of data. Once a piece of data has been registered to StarPU, + it is associated to a ::starpu_data_handle_t which keeps track of + the state of the piece of data over the entire machine, so that we + can maintain data consistency and locate data replicates for + instance. See \ref DataInterface for more details. +*/ +typedef struct _starpu_data_state *starpu_data_handle_t; + +/** + Describe a StarPU data access mode + + Note: when adding a flag here, update + _starpu_detect_implicit_data_deps_with_handle + + Note: other STARPU_* values in include/starpu_task_util.h +*/ +enum starpu_data_access_mode +{ + STARPU_R = (1 << 0), /**< read-only mode */ + STARPU_W = (1 << 1), /**< write-only mode */ + STARPU_RW = (STARPU_R | STARPU_W), /**< read-write mode. Equivalent to ::STARPU_R|::STARPU_W */ + STARPU_SCRATCH = (1 << 2), /**< A temporary buffer is allocated + for the task, but StarPU does not + enforce data consistency---i.e. each + device has its own buffer, + independently from each other (even + for CPUs), and no data transfer is + ever performed. This is useful for + temporary variables to avoid + allocating/freeing buffers inside + each task. Currently, no behavior is + defined concerning the relation with + the ::STARPU_R and ::STARPU_W modes + and the value provided at + registration --- i.e., the value of + the scratch buffer is undefined at + entry of the codelet function. It + is being considered for future + extensions at least to define the + initial value. For now, data to be + used in ::STARPU_SCRATCH mode should + be registered with node -1 and a + NULL pointer, since the value + of the provided buffer is simply + ignored for now. + + See \ref ScratchData for more details. + */ + STARPU_REDUX = (1 << 3), /**< Reduction mode. + StarPU will allocate on the fly a per-worker + buffer, so that various tasks that access the + same data in ::STARPU_REDUX mode can execute + in parallel. When a task accesses the + data without ::STARPU_REDUX, StarPU will + automatically reduce the different contributions. + + Codelets contributing to these reductions + with ::STARPU_REDUX must be registered with + ::STARPU_RW | ::STARPU_COMMUTE access modes. + + See \ref DataReduction for more details. + */ + STARPU_COMMUTE = (1 << 4), /**< ::STARPU_COMMUTE can be passed + along ::STARPU_W or ::STARPU_RW to + express that StarPU can let tasks + commute, which is useful e.g. when + bringing a contribution into some + data, which can be done in any order + (but still require sequential + consistency against reads or + non-commutative writes). + + See \ref DataCommute for more details. + */ + STARPU_SSEND = (1 << 5), /**< used in starpu_mpi_task_insert() to + specify the data has to be sent using + a synchronous and non-blocking mode + (see starpu_mpi_issend()) + */ + STARPU_LOCALITY = (1 << 6), /**< used to tell the scheduler which + data is the most important for the + task, and should thus be used to + try to group tasks on the same core + or cache, etc. For now only the ws + and lws schedulers take this flag + into account, and only when rebuild + with \c USE_LOCALITY flag defined in + the + src/sched_policies/work_stealing_policy.c + source code. + + TODO add extended description in documentation. + */ + STARPU_MPI_REDUX = (1 << 7), /**< Inter-node reduction only. + This is similar to ::STARPU_REDUX, except that + StarPU will allocate a per-node buffer only, + i.e. parallelism will be achieved between + nodes, but not within each node. This is + useful when the per-worker buffers allocated + with ::STARPU_REDUX consume too much memory. + + See \ref MPIMpiRedux for more details. + */ + STARPU_NOPLAN = (1 << 8), /**< Disable automatic submission of asynchronous + partitioning/unpartitioning, only use internally by StarPU + */ + STARPU_UNMAP = (1 << 9), /**< Request unmapping the destination replicate, only use internally by StarPU + */ + STARPU_NOFOOTPRINT = (1 << 10), /**< Ignore this data for the footprint computation. See \ref ScratchData + */ + STARPU_NONE = (1 << 11), /**< todo */ + STARPU_ACCESS_MODE_MAX = (1 << 12) /**< The purpose of ::STARPU_ACCESS_MODE_MAX is to + be the maximum of this enum. + */ +}; + +struct starpu_data_interface_ops; + +/** + Set the name of the data, to be shown in various profiling tools. + See \ref CreatingAGanttDiagram for more details. +*/ +void starpu_data_set_name(starpu_data_handle_t handle, const char *name); + +/** + Set the coordinates of the data, to be shown in various profiling + tools. \p dimensions is the size of the \p dims array. This can be + for instance the tile coordinates within a big matrix. See \ref CreatingAGanttDiagram for more details. +*/ +void starpu_data_set_coordinates_array(starpu_data_handle_t handle, unsigned dimensions, int dims[]); + +/** + Set the coordinates of the data, to be shown in various profiling + tools. \p dimensions is the number of subsequent \c int parameters. + This can be for instance the tile coordinates within a big matrix. See \ref CreatingAGanttDiagram for more details. +*/ +void starpu_data_set_coordinates(starpu_data_handle_t handle, unsigned dimensions, ...); + +/** + Get the coordinates of the data, as set by a previous call to + starpu_data_set_coordinates_array() or starpu_data_set_coordinates() + \p dimensions is the size of the \p dims array. + This returns the actual number of returned coordinates. + See \ref CreatingAGanttDiagram for more details. +*/ +unsigned starpu_data_get_coordinates_array(starpu_data_handle_t handle, unsigned dimensions, int dims[]); + +/** + Unregister a data \p handle from StarPU. If the data was + automatically allocated by StarPU because the home node was -1, all + automatically allocated buffers are freed. Otherwise, a valid copy + of the data is put back into the home node in the buffer that was + initially registered. Using a data handle that has been + unregistered from StarPU results in an undefined behaviour. In case + we do not need to update the value of the data in the home node, we + can use the function starpu_data_unregister_no_coherency() instead. + See \ref TaskSubmission for more details. +*/ +void starpu_data_unregister(starpu_data_handle_t handle); + +/** + Similar to starpu_data_unregister(), except that StarPU does not + put back a valid copy into the home node, in the buffer that was + initially registered. See \ref DataManagementAllocation for more details. +*/ +void starpu_data_unregister_no_coherency(starpu_data_handle_t handle); + +/** + Destroy the data \p handle once it is no longer needed by any + submitted task. No coherency is provided. + + This is not safe to call starpu_data_unregister_submit() on a handle that + comes from the registration of a non-NULL application home buffer, since the + moment when the unregistration will happen is unknown to the + application. Only calling starpu_shutdown() allows to be sure that the data + was really unregistered. See \ref TemporaryData for more details. +*/ +void starpu_data_unregister_submit(starpu_data_handle_t handle); + +/** + Deinitialize all replicates of the data \p handle immediately. After + data deinitialization, the first access to \p handle must be performed + in ::STARPU_W mode. Accessing an deinitialized data in ::STARPU_R + mode results in undefined behaviour. See \ref DataManagementAllocation for more details. +*/ +void starpu_data_deinitialize(starpu_data_handle_t handle); + +/** + Submit deinitialization of the data \p handle after completion of + previously submitted tasks. See \ref DataManagementAllocation for more details. +*/ +void starpu_data_deinitialize_submit(starpu_data_handle_t handle); + +/** + Destroy all replicates of the data \p handle immediately. After + data invalidation, the first access to \p handle must be performed + in ::STARPU_W mode. Accessing an invalidated data in ::STARPU_R + mode results in undefined behaviour. See \ref DataManagementAllocation for more details. + + This is the same as starpu_data_deinitialize(), plus explicitly releasing the buffers. +*/ +void starpu_data_invalidate(starpu_data_handle_t handle); + +/** + Submit invalidation of the data \p handle after completion of + previously submitted tasks. See \ref DataManagementAllocation for more details. + + This is the same as starpu_data_deinitialize_submit(), plus explicitly releasing the buffers. +*/ +void starpu_data_invalidate_submit(starpu_data_handle_t handle); + +/** + Specify that the data \p handle can be discarded without impacting + the application. +*/ +void starpu_data_advise_as_important(starpu_data_handle_t handle, unsigned is_important); + +/** + @name Access registered data from the application + @{ +*/ + +/** + This macro can be used to acquire data, but not require it to be + available on a given node, only enforce R/W dependencies. This can + for instance be used to wait for tasks which produce the data, but + without requesting a fetch to the main memory. +*/ +#define STARPU_ACQUIRE_NO_NODE -1 + +/** + Similar to ::STARPU_ACQUIRE_NO_NODE, but will lock the data on all + nodes, preventing them from being evicted for instance. This is + mostly useful inside StarPU only. +*/ +#define STARPU_ACQUIRE_NO_NODE_LOCK_ALL -2 + +/** + The application must call this function prior to accessing + registered data from main memory outside tasks. StarPU ensures that + the application will get an up-to-date copy of \p handle in main + memory located where the data was originally registered, and that + all concurrent accesses (e.g. from tasks) will be consistent with + the access mode specified with \p mode. starpu_data_release() must + be called once the application no longer needs to access the piece + of data. Note that implicit data dependencies are also enforced by + starpu_data_acquire(), i.e. starpu_data_acquire() will wait for all + tasks scheduled to work on the data, unless they have been disabled + explicitly by calling + starpu_data_set_default_sequential_consistency_flag() or + starpu_data_set_sequential_consistency_flag(). + starpu_data_acquire() is a blocking call, so that it cannot be + called from tasks or from their callbacks (in that case, + starpu_data_acquire() returns -EDEADLK). Upon successful + completion, this function returns 0. See \ref DataAccess for more details. +*/ +int starpu_data_acquire(starpu_data_handle_t handle, enum starpu_data_access_mode mode); + +/** + Similar to starpu_data_acquire(), except that the data will be + available on the given memory node instead of main memory. + ::STARPU_ACQUIRE_NO_NODE and ::STARPU_ACQUIRE_NO_NODE_LOCK_ALL can + be used instead of an explicit node number. See \ref DataAccess for more details. +*/ +int starpu_data_acquire_on_node(starpu_data_handle_t handle, int node, enum starpu_data_access_mode mode); + +/** + Asynchronous equivalent of starpu_data_acquire(). When the data + specified in \p handle is available in the access \p mode, the \p + callback function is executed. The application may access + the requested data during the execution of \p callback. The \p callback + function must call starpu_data_release() once the application no longer + needs to access the piece of data. Note that implicit data + dependencies are also enforced by starpu_data_acquire_cb() in case they + are not disabled. Contrary to starpu_data_acquire(), this function is + non-blocking and may be called from task callbacks. Upon successful + completion, this function returns 0. See \ref DataAccess for more details. +*/ +int starpu_data_acquire_cb(starpu_data_handle_t handle, enum starpu_data_access_mode mode, void (*callback)(void *), void *arg); + +/** + Similar to starpu_data_acquire_cb(), except that the + data will be available on the given memory node instead of main + memory. + ::STARPU_ACQUIRE_NO_NODE and ::STARPU_ACQUIRE_NO_NODE_LOCK_ALL can be + used instead of an explicit node number. See \ref DataAccess for more details. +*/ +int starpu_data_acquire_on_node_cb(starpu_data_handle_t handle, int node, enum starpu_data_access_mode mode, void (*callback)(void *), void *arg); + +/** + Similar to starpu_data_acquire_cb() with the possibility of + enabling or disabling data dependencies. + When the data specified in \p handle is available in the access + \p mode, the \p callback function is executed. The application may access + the requested data during the execution of this \p callback. The \p callback + function must call starpu_data_release() once the application no longer + needs to access the piece of data. Note that implicit data + dependencies are also enforced by starpu_data_acquire_cb_sequential_consistency() in case they + are not disabled specifically for the given \p handle or by the parameter \p sequential_consistency. + Similarly to starpu_data_acquire_cb(), this function is + non-blocking and may be called from task callbacks. Upon successful + completion, this function returns 0. See \ref DataAccess for more details. +*/ +int starpu_data_acquire_cb_sequential_consistency(starpu_data_handle_t handle, enum starpu_data_access_mode mode, void (*callback)(void *), void *arg, int sequential_consistency); + +/** + Similar to starpu_data_acquire_cb_sequential_consistency(), except that the + data will be available on the given memory node instead of main + memory. + ::STARPU_ACQUIRE_NO_NODE and ::STARPU_ACQUIRE_NO_NODE_LOCK_ALL can be used instead of an + explicit node number. See \ref DataAccess for more details. +*/ +int starpu_data_acquire_on_node_cb_sequential_consistency(starpu_data_handle_t handle, int node, enum starpu_data_access_mode mode, void (*callback)(void *), void *arg, int sequential_consistency); + +/** + Similar to starpu_data_acquire_on_node_cb_sequential_consistency(), + except that the \e pre_sync_jobid and \e post_sync_jobid parameters can be used + to retrieve the jobid of the synchronization tasks. \e pre_sync_jobid happens + just before the acquisition, and \e post_sync_jobid happens just after the + release. + + \p callback_acquired is called when the data is acquired in terms of semantic, + but the data is not fetched yet. It is given a pointer to the node, which it + can modify if it wishes so. + + This is a very internal interface, subject to changes, do not use this. +*/ +int starpu_data_acquire_on_node_cb_sequential_consistency_sync_jobids(starpu_data_handle_t handle, int node, enum starpu_data_access_mode mode, void (*callback_acquired)(void *arg, int *node, enum starpu_data_access_mode mode), void (*callback)(void *arg), void *arg, int sequential_consistency, int quick, long *pre_sync_jobid, long *post_sync_jobid, int prio); + +/** + The application can call this function instead of starpu_data_acquire() so as to + acquire the data like starpu_data_acquire(), but only if all + previously-submitted tasks have completed, in which case starpu_data_acquire_try() + returns 0. StarPU will have ensured that the application will get an up-to-date + copy of \p handle in main memory located where the data was originally + registered. starpu_data_release() must be called once the application no longer + needs to access the piece of data. See \ref DataAccess for more details. +*/ +int starpu_data_acquire_try(starpu_data_handle_t handle, enum starpu_data_access_mode mode); + +/** + Similar to starpu_data_acquire_try(), except that the + data will be available on the given memory node instead of main + memory. + ::STARPU_ACQUIRE_NO_NODE and ::STARPU_ACQUIRE_NO_NODE_LOCK_ALL can be used instead of an + explicit node number. See \ref DataAccess for more details. +*/ +int starpu_data_acquire_on_node_try(starpu_data_handle_t handle, int node, enum starpu_data_access_mode mode); + +#ifdef __GCC__ + +/** + STARPU_DATA_ACQUIRE_CB() is the same as starpu_data_acquire_cb(), + except that the code to be executed in a callback is directly provided + as a macro parameter, and the data \p handle is automatically released + after it. This permits to easily execute code which depends on the + value of some registered data. This is non-blocking too and may be + called from task callbacks. +*/ +#define STARPU_DATA_ACQUIRE_CB(handle, mode, code) \ + do \ + { \ + void callback(void *arg) \ + { \ + code; \ + starpu_data_release(handle); \ + } \ + starpu_data_acquire_cb(handle, mode, callback, NULL); \ + } \ + while (0) +#endif + +/** + Release the piece of data acquired by the + application either by starpu_data_acquire() or by + starpu_data_acquire_cb(). See \ref DataAccess for more details. +*/ +void starpu_data_release(starpu_data_handle_t handle); + +/** + Similar to starpu_data_release(), except that the data + was made available on the given memory \p node instead of main memory. + The \p node parameter must be exactly the same as the corresponding \c + starpu_data_acquire_on_node* call. See \ref DataAccess for more details. +*/ +void starpu_data_release_on_node(starpu_data_handle_t handle, int node); + +/** + Partly release the piece of data acquired by the application either by + starpu_data_acquire() or by starpu_data_acquire_cb(), switching the + acquisition down to \p down_to_mode. For now, only releasing from ::STARPU_RW + or ::STARPU_W acquisition down to ::STARPU_R is supported, or down to the same + acquisition. ::STARPU_NONE can also be passed as \p down_to_mode, in which + case this is equivalent to calling starpu_data_release(). See \ref DataAccess for more details. +*/ +void starpu_data_release_to(starpu_data_handle_t handle, enum starpu_data_access_mode down_to_mode); + +/** + Similar to starpu_data_release_to(), except that the data + was made available on the given memory \p node instead of main memory. + The \p node parameter must be exactly the same as the corresponding \c + starpu_data_acquire_on_node* call. See \ref DataAccess for more details. +*/ +void starpu_data_release_to_on_node(starpu_data_handle_t handle, enum starpu_data_access_mode down_to_mode, int node); + +/** @} */ + +/** + This is an arbiter, which implements an advanced but centralized + management of concurrent data accesses, see \ref + ConcurrentDataAccess for the details. +*/ +typedef struct starpu_arbiter *starpu_arbiter_t; + +/** + Create a data access arbiter, see \ref ConcurrentDataAccess for the + details +*/ +starpu_arbiter_t starpu_arbiter_create(void) STARPU_ATTRIBUTE_MALLOC; + +/** + Make access to \p handle managed by \p arbiter, see \ref + ConcurrentDataAccess for the details. +*/ +void starpu_data_assign_arbiter(starpu_data_handle_t handle, starpu_arbiter_t arbiter); + +/** + Destroy the \p arbiter. This must only be called after all data + assigned to it have been unregistered. See \ref + ConcurrentDataAccess for the details. +*/ +void starpu_arbiter_destroy(starpu_arbiter_t arbiter); + +/** + Explicitly ask StarPU to allocate room for a piece of data on + the specified memory \p node. See \ref DataPrefetch for more details. +*/ +int starpu_data_request_allocation(starpu_data_handle_t handle, unsigned node); + +/** + Prefetch levels + + Data requests are ordered by priorities, but also by prefetching level, + between data that a task wants now, and data that we will probably want + "soon". +*/ +enum starpu_is_prefetch +{ + /** A task really needs it now! */ + STARPU_FETCH = 0, + /** A task will need it soon */ + STARPU_TASK_PREFETCH = 1, + /** It is a good idea to have it asap */ + STARPU_PREFETCH = 2, + /** Get this here when you have time to */ + STARPU_IDLEFETCH = 3, + STARPU_NFETCH +}; + +/** + Issue a fetch request for the data \p handle to \p node, i.e. + requests that the data be replicated to the given node as soon as possible, so that it is + available there for tasks. If \p async is 0, the call will + block until the transfer is achieved, else the call will return immediately, + after having just queued the request. In the latter case, the request will + asynchronously wait for the completion of any task writing on the + data. See \ref DataPrefetch for more details. +*/ +int starpu_data_fetch_on_node(starpu_data_handle_t handle, unsigned node, unsigned async); + +/** + Issue a prefetch request for the data \p handle to \p node, i.e. + requests that the data be replicated to \p node when there is room for it, so that it is + available there for tasks. If \p async is 0, the call will + block until the transfer is achieved, else the call will return immediately, + after having just queued the request. In the latter case, the request will + asynchronously wait for the completion of any task writing on the + data. See \ref DataPrefetch for more details. +*/ +int starpu_data_prefetch_on_node(starpu_data_handle_t handle, unsigned node, unsigned async); + +/** + See \ref DataPrefetch for more details. + */ +int starpu_data_prefetch_on_node_prio(starpu_data_handle_t handle, unsigned node, unsigned async, int prio); + +/** + Issue an idle prefetch request for the data \p handle to \p node, i.e. + requests that the data be replicated to \p node, so that it is + available there for tasks, but only when the bus is really idle. If \p async is 0, the call will + block until the transfer is achieved, else the call will return immediately, + after having just queued the request. In the latter case, the request will + asynchronously wait for the completion of any task writing on the data. See \ref DataPrefetch for more details. +*/ +int starpu_data_idle_prefetch_on_node(starpu_data_handle_t handle, unsigned node, unsigned async); + +/** + See \ref DataPrefetch for more details. + */ +int starpu_data_idle_prefetch_on_node_prio(starpu_data_handle_t handle, unsigned node, unsigned async, int prio); + +/** + Check whether a valid copy of \p handle is currently available on + memory node \p node (or a transfer request for getting so is ongoing). See \ref SchedulingHelpers for more details. +*/ +unsigned starpu_data_is_on_node(starpu_data_handle_t handle, unsigned node); + +/** + Advise StarPU that \p handle will not be used in the close future, and is + thus a good candidate for eviction from GPUs. StarPU will thus write its value + back to its home node when the bus is idle, and select this data in priority + for eviction when memory gets low. See \ref DataPrefetch for more details. +*/ +void starpu_data_wont_use(starpu_data_handle_t handle); + +/** + Advise StarPU to evict \p handle from the memory node \p node + StarPU will thus write its value back to its home node, before evicting it. + This may however fail if e.g. some task is still working on it. + + If the eviction was successful, 0 is returned ; -1 is returned otherwise. + + See \ref DataPrefetch for more details. +*/ +int starpu_data_evict_from_node(starpu_data_handle_t handle, unsigned node); + +/** + Set the write-through mask of the data \p handle (and + its children), i.e. a bitmask of nodes where the data should be always + replicated after modification. It also prevents the data from being + evicted from these nodes when memory gets scarse. When the data is + modified, it is automatically transferred into those memory nodes. For + instance a 1<<0 write-through mask means that the CUDA workers + will commit their changes in main memory (node 0). See \ref DataManagementAllocation for more details. +*/ +void starpu_data_set_wt_mask(starpu_data_handle_t handle, uint32_t wt_mask); + +/** + @name Implicit Data Dependencies + In this section, we describe how StarPU makes it possible to + insert implicit task dependencies in order to enforce sequential data + consistency. When this data consistency is enabled on a specific data + handle, any data access will appear as sequentially consistent from + the application. For instance, if the application submits two tasks + that access the same piece of data in read-only mode, and then a third + task that access it in write mode, dependencies will be added between + the two first tasks and the third one. Implicit data dependencies are + also inserted in the case of data accesses from the application. + @{ +*/ + +/** + Set the data consistency mode associated to a data handle. The + consistency mode set using this function has the priority over the + default mode which can be set with + starpu_data_set_default_sequential_consistency_flag(). + See \ref SequentialConsistency and \ref DataManagementAllocation for more details. +*/ +void starpu_data_set_sequential_consistency_flag(starpu_data_handle_t handle, unsigned flag); + +/** + Get the data consistency mode associated to the data handle \p handle. See \ref SequentialConsistency for more details. +*/ +unsigned starpu_data_get_sequential_consistency_flag(starpu_data_handle_t handle); + +/** + Return the default sequential consistency flag. See \ref SequentialConsistency for more details. +*/ +unsigned starpu_data_get_default_sequential_consistency_flag(void); + +/** + Set the default sequential consistency flag. If a non-zero + value is passed, a sequential data consistency will be enforced for + all handles registered after this function call, otherwise it is + disabled. By default, StarPU enables sequential data consistency. It + is also possible to select the data consistency mode of a specific + data handle with the function + starpu_data_set_sequential_consistency_flag(). See \ref SequentialConsistency for more details. +*/ +void starpu_data_set_default_sequential_consistency_flag(unsigned flag); + +/** @} */ + +/** + Set whether this data should be elligible to be evicted to disk + storage (1) or not (0). The default is 1. See \ref OOCDataRegistration for more details. +*/ +void starpu_data_set_ooc_flag(starpu_data_handle_t handle, unsigned flag); + +/** + Get whether this data was set to be elligible to be evicted to disk + storage (1) or not (0). See \ref OOCDataRegistration for more details. +*/ +unsigned starpu_data_get_ooc_flag(starpu_data_handle_t handle); + +/** + Query the status of \p handle on the specified \p memory_node. + + \p is_allocated tells whether memory was allocated there for the data. + \p is_valid tells whether the actual value is available there. + \p is_loading tells whether the actual value is getting loaded there. + \p is_requested tells whether the actual value is requested to be loaded + there by some fetch/prefetch/idlefetch request. + See \ref DataPrefetch for more details. +*/ +void starpu_data_query_status2(starpu_data_handle_t handle, int memory_node, int *is_allocated, int *is_valid, int *is_loading, int *is_requested); + +/** + Same as starpu_data_query_status2(), but without the is_loading parameter. See \ref DataPrefetch for more details. +*/ +void starpu_data_query_status(starpu_data_handle_t handle, int memory_node, int *is_allocated, int *is_valid, int *is_requested); + +struct starpu_codelet; + +/** + Set the codelets to be used for \p handle when it is accessed in the + mode ::STARPU_REDUX. Per-worker buffers will be initialized with + the codelet \p init_cl (which has to take one handle with ::STARPU_W), and + reduction between per-worker buffers will be done with the codelet \p + redux_cl (which has to take a first accumulation handle with + ::STARPU_RW|::STARPU_COMMUTE, and a second contribution handle with ::STARPU_R). + See \ref DataReduction and \ref TemporaryData for more details. +*/ +void starpu_data_set_reduction_methods(starpu_data_handle_t handle, struct starpu_codelet *redux_cl, struct starpu_codelet *init_cl); + +/** + Same as starpu_data_set_reduction_methods() but allows to pass + arguments to the reduction and init tasks +*/ +void starpu_data_set_reduction_methods_with_args(starpu_data_handle_t handle, struct starpu_codelet *redux_cl, void *redux_cl_arg, struct starpu_codelet *init_cl, void *init_cl_arg); + +struct starpu_data_interface_ops *starpu_data_get_interface_ops(starpu_data_handle_t handle); + +/** + See \ref DataPrefetch for more details. +*/ +unsigned starpu_data_test_if_allocated_on_node(starpu_data_handle_t handle, unsigned memory_node); + +/** + See \ref DataPrefetch for more details. +*/ +unsigned starpu_data_test_if_mapped_on_node(starpu_data_handle_t handle, unsigned memory_node); + +/** + See \ref DataPrefetch for more details. +*/ +void starpu_memchunk_tidy(unsigned memory_node); + +/** + Set the field \c user_data for the \p handle to \p user_data . It can + then be retrieved with starpu_data_get_user_data(). \p user_data can be any + application-defined value, for instance a pointer to an object-oriented + container for the data. + See \ref DataHandlesHelpers for more details. +*/ +void starpu_data_set_user_data(starpu_data_handle_t handle, void *user_data); + +/** + Retrieve the field \c user_data previously set for the \p handle. + See \ref DataHandlesHelpers for more details. +*/ +void *starpu_data_get_user_data(starpu_data_handle_t handle); + +/** + Set the field \c sched_data for the \p handle to \p sched_data . It can + then be retrieved with starpu_data_get_sched_data(). \p sched_data can be any + scheduler-defined value. + See \ref DataHandlesHelpers for more details. +*/ +void starpu_data_set_sched_data(starpu_data_handle_t handle, void *sched_data); + +/** + Retrieve the field \c sched_data previously set for the \p handle. + See \ref DataHandlesHelpers for more details. +*/ +void *starpu_data_get_sched_data(starpu_data_handle_t handle); + +/** + Check whether data \p handle can be evicted now from node \p node. See \ref DataPrefetch for more details. +*/ +int starpu_data_can_evict(starpu_data_handle_t handle, unsigned node, enum starpu_is_prefetch is_prefetch); + +/** @} */ + +#ifdef __cplusplus +} +#endif + +#endif /* __STARPU_DATA_H__ */ diff --git a/include/starpu_data_filters.h b/include/starpu_data_filters.h new file mode 100644 index 0000000..0e80414 --- /dev/null +++ b/include/starpu_data_filters.h @@ -0,0 +1,1152 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2010-2010 Mehdi Juhoor + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +#ifndef __STARPU_DATA_FILTERS_H__ +#define __STARPU_DATA_FILTERS_H__ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + @defgroup API_Data_Partition Data Partition + @{ +*/ + +struct starpu_data_interface_ops; + +/** + Describe a data partitioning operation, to be given to starpu_data_partition(). + See \ref DefiningANewDataFilter for more details. +*/ +struct starpu_data_filter +{ + /** + Fill the \p child_interface structure with interface information + for the \p i -th child of the parent \p father_interface (among + \p nparts). The \p filter structure is provided, allowing to inspect the + starpu_data_filter::filter_arg and starpu_data_filter::filter_arg_ptr + parameters. + The details of what needs to be filled in \p child_interface vary according + to the data interface, but generally speaking: +
      +
    • id is usually just copied over from the father, + when the sub data has the same structure as the father, + e.g. a subvector is a vector, a submatrix is a matrix, etc. + This is however not the case for instance when dividing a + BCSR matrix into its dense blocks, which then are matrices. +
    • +
    • nx, ny and alike are usually divided by + the number of subdata, depending how the subdivision is + done (e.g. nx division vs ny division for vertical matrix + division vs horizontal matrix division).
    • +
    • ld for matrix interfaces are usually just + copied over: the leading dimension (ld) usually does not + change.
    • +
    • elemsize is usually just copied over.
    • +
    • ptr, the pointer to the data, has to be + computed according to \p i and the father's ptr, so + as to point to the start of the sub data. This should + however be done only if the father has ptr different + from NULL: in the OpenCL case notably, the + dev_handle and offset fields are used + instead.
    • +
    • dev_handle should be just copied over from the + parent.
    • +
    • offset has to be computed according to \p i and + the father's offset, so as to provide the offset of + the start of the sub data. This is notably used for the + OpenCL case. +
    + */ + void (*filter_func)(void *father_interface, void *child_interface, struct starpu_data_filter *, unsigned id, unsigned nparts); + + unsigned nchildren; /**< Number of parts to partition the data into. */ + + /** + Return the number of children. This can be used instead of + starpu_data_filter::nchildren when the number of children depends + on the actual data (e.g. the number of blocks in a sparse + matrix). + */ + unsigned (*get_nchildren)(struct starpu_data_filter *, starpu_data_handle_t initial_handle); + + /** + When children use different data interface, + return which interface is used by child number \p id. + */ + struct starpu_data_interface_ops *(*get_child_ops)(struct starpu_data_filter *, unsigned id); + + unsigned filter_arg; /**< Additional parameter for the filter function */ + + /** + Additional pointer parameter for + the filter function, such as the + sizes of the different parts. + */ + void *filter_arg_ptr; +}; + +/** + @name Basic API + @{ +*/ + +/** + Request the partitioning of \p initial_handle into several subdata + according to the filter \p f. + + Here an example of how to use the function. + \code{.c} + struct starpu_data_filter f = + { + .filter_func = starpu_matrix_filter_block, + .nchildren = nslicesx + }; + starpu_data_partition(A_handle, &f); + \endcode + + See \ref PartitioningData for more details. +*/ +void starpu_data_partition(starpu_data_handle_t initial_handle, struct starpu_data_filter *f); + +/** + Unapply the filter which has been applied to \p root_data, thus + unpartitioning the data. The pieces of data are collected back into + one big piece in the \p gathering_node (usually ::STARPU_MAIN_RAM). + Tasks working on the partitioned data will be waited for + by starpu_data_unpartition(). + + Here an example of how to use the function. + \code{.c} + starpu_data_unpartition(A_handle, STARPU_MAIN_RAM); + \endcode + + See \ref PartitioningData for more details. +*/ +void starpu_data_unpartition(starpu_data_handle_t root_data, unsigned gathering_node); + +/** + Return the \p i -th child of the given \p handle, which must have + been partitioned beforehand. + See \ref PartitioningData for more details. +*/ +starpu_data_handle_t starpu_data_get_child(starpu_data_handle_t handle, unsigned i); + +/** + Return the number of children \p handle has been partitioned into. + See \ref PartitioningData for more details. +*/ +int starpu_data_get_nb_children(starpu_data_handle_t handle); + +/** + After partitioning a StarPU data by applying a filter, + starpu_data_get_sub_data() can be used to get handles for each of the + data portions. \p root_data is the parent data that was partitioned. + \p depth is the number of filters to traverse (in case several filters + have been applied, to e.g. partition in row blocks, and then in column + blocks), and the subsequent parameters are the indexes. The function + returns a handle to the subdata. + + Here an example of how to use the function. + \code{.c} + h = starpu_data_get_sub_data(A_handle, 1, taskx); + \endcode + + See \ref PartitioningData for more details. +*/ +starpu_data_handle_t starpu_data_get_sub_data(starpu_data_handle_t root_data, unsigned depth, ...); + +/** + Similar to starpu_data_get_sub_data() but use a \c va_list for the + parameter list. + See \ref PartitioningData for more details. +*/ +starpu_data_handle_t starpu_data_vget_sub_data(starpu_data_handle_t root_data, unsigned depth, va_list pa); + +/** + Apply \p nfilters filters to the handle designated by \p + root_handle recursively. \p nfilters pointers to variables of the + type starpu_data_filter should be given. + See \ref PartitioningData for more details. +*/ +void starpu_data_map_filters(starpu_data_handle_t root_data, unsigned nfilters, ...); + +/** + Apply \p nfilters filters to the handle designated by + \p root_handle recursively. Use a \p va_list of pointers to + variables of the type starpu_data_filter. + See \ref PartitioningData for more details. +*/ +void starpu_data_vmap_filters(starpu_data_handle_t root_data, unsigned nfilters, va_list pa); + +/** + Apply \p nfilters filters to the handle designated by \p + root_handle recursively. The pointer of the filter list \p filters + of the type starpu_data_filter should be given. + See \ref PartitioningData for more details. +*/ +void starpu_data_map_filters_parray(starpu_data_handle_t root_handle, int nfilters, struct starpu_data_filter **filters); + +/** + Apply \p nfilters filters to the handle designated by \p + root_handle recursively. The list of filter \p filters + of the type starpu_data_filter should be given. + See \ref PartitioningData for more details. +*/ +void starpu_data_map_filters_array(starpu_data_handle_t root_handle, int nfilters, struct starpu_data_filter *filters); + +/** @} */ + +/** + @name Asynchronous API + @{ +*/ + +/** + Plan to partition \p initial_handle into several subdata according to + the filter \p f. + The handles are returned into the \p children array, which has to be + the same size as the number of parts described in \p f. + + Here is an example of how to use the function: + \code{.c} + starpu_data_handle_t children[nslicesx]; + struct starpu_data_filter f = + { + .filter_func = starpu_matrix_filter_block, + .nchildren = nslicesx + }; + starpu_data_partition_plan(A_handle, &f, children); + \endcode + + See \ref AsynchronousPartitioning for more details. +*/ +void starpu_data_partition_plan(starpu_data_handle_t initial_handle, struct starpu_data_filter *f, starpu_data_handle_t *children); + +/** + Submit the actual partitioning of \p initial_handle into the \p nparts + \p children handles. This call is asynchronous, it only submits that the + partitioning should be done, so that the \p children handles can now be used to + submit tasks, and \p initial_handle can not be used to submit tasks any more (to + guarantee coherency). + For instance, + \code{.c} + starpu_data_partition_submit(A_handle, nslicesx, children); + \endcode + + See \ref AsynchronousPartitioning for more details. +*/ +void starpu_data_partition_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children); + +/** + Similar to starpu_data_partition_submit(), but do not invalidate \p + initial_handle. This allows to continue using it, but the application has to be + careful not to write to \p initial_handle or \p children handles, only read from + them, since the coherency is otherwise not guaranteed. This thus allows to + submit various tasks which concurrently read from various partitions of the data. + + When the application wants to write to \p initial_handle again, it should call + starpu_data_unpartition_submit(), which will properly add dependencies between the + reads on the \p children and the writes to be submitted. + + If instead the application wants to write to \p children handles, it should + call starpu_data_partition_readwrite_upgrade_submit(), which will correctly add + dependencies between the reads on the \p initial_handle and the writes to be + submitted. + See \ref AsynchronousPartitioning for more details. +*/ +void starpu_data_partition_readonly_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children); + +/** + Similar to starpu_data_partition_readonly_submit(), but allow to + specify the coherency to be used for the main data \p initial_handle. + See \ref AsynchronousPartitioning for more details. + */ +void starpu_data_partition_readonly_submit_sequential_consistency(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, int sequential_consistency); + +/** + Assume that a partitioning of \p initial_handle has already been submitted + in readonly mode through starpu_data_partition_readonly_submit(), and will upgrade + that partitioning into read-write mode for the \p children, by invalidating \p + initial_handle, and adding the necessary dependencies. + See \ref AsynchronousPartitioning for more details. +*/ +void starpu_data_partition_readwrite_upgrade_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children); + +/** + Assume that a partitioning of \p initial_handle has already been submitted + in read-write mode through starpu_data_partition_submit(), and will downgrade + that partitioning into read-only mode for the \p children, fetching data back to the \p + initial_handle, and adding the necessary dependencies. + See \ref AsynchronousPartitioning for more details. +*/ +void starpu_data_partition_readonly_downgrade_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children); + +/** + Assuming that \p initial_handle is partitioned into \p children, + submit an unpartitionning of \p initial_handle, i.e. submit a + gathering of the pieces on the requested \p gathering_node memory + node, and submit an invalidation of the children. + See \ref AsynchronousPartitioning for more details. +*/ +void starpu_data_unpartition_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, int gathering_node); + +/** + Similar to starpu_data_partition_submit(), but do not invalidate \p + initial_handle. This allows to continue using it, but the application has to be + careful not to write to \p initial_handle or \p children handles, only read from + them, since the coherency is otherwise not guaranteed. This thus allows to + submit various tasks which concurrently read from various + partitions of the data. + See \ref AsynchronousPartitioning for more details. +*/ +void starpu_data_unpartition_readonly_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, int gathering_node); + +/** + Clear the partition planning established between \p root_data and + \p children with starpu_data_partition_plan(). This will notably + submit an unregister all the \p children, which can thus not be + used any more afterwards. + See \ref AsynchronousPartitioning for more details. +*/ +void starpu_data_partition_clean(starpu_data_handle_t root_data, unsigned nparts, starpu_data_handle_t *children); + +/** + Similar to starpu_data_partition_clean() but the root data will be + gathered on the given node. + See \ref AsynchronousPartitioning for more details. +*/ +void starpu_data_partition_clean_node(starpu_data_handle_t root_data, unsigned nparts, starpu_data_handle_t *children, int gather_node); + +/** + Similar to starpu_data_unpartition_submit_sequential_consistency() + but allow to specify a callback function for the unpartitiong task. + See \ref AsynchronousPartitioning for more details. +*/ +void starpu_data_unpartition_submit_sequential_consistency_cb(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, int gather_node, int sequential_consistency, void (*callback_func)(void *), void *callback_arg); + +/** + Similar to starpu_data_partition_submit() but also allow to specify + the coherency to be used for the main data \p initial_handle + through the parameter \p sequential_consistency. + See \ref AsynchronousPartitioning for more details. +*/ +void starpu_data_partition_submit_sequential_consistency(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, int sequential_consistency); + +/** + Similar to starpu_data_unpartition_submit() but also allow to specify + the coherency to be used for the main data \p initial_handle + through the parameter \p sequential_consistency. + See \ref AsynchronousPartitioning for more details. +*/ +void starpu_data_unpartition_submit_sequential_consistency(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, int gathering_node, int sequential_consistency); + +/** @} */ + +/** + @name Predefined BCSR Filter Functions + Predefined partitioning functions for BCSR data. Examples on how to + use them are shown in \ref PartitioningData. + @{ +*/ + +/** + Partition a block-sparse matrix into dense matrices. + starpu_data_filter::get_child_ops needs to be set to + starpu_bcsr_filter_canonical_block_child_ops() + and starpu_data_filter::get_nchildren set to + starpu_bcsr_filter_canonical_block_get_nchildren(). + + See \ref BCSRDataInterface for more details. +*/ +void starpu_bcsr_filter_canonical_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + +/** + Return the number of children obtained with starpu_bcsr_filter_canonical_block(). + See \ref BCSRDataInterface for more details. +*/ +unsigned starpu_bcsr_filter_canonical_block_get_nchildren(struct starpu_data_filter *f, starpu_data_handle_t handle); + +/** + Return the child_ops of the partition obtained with starpu_bcsr_filter_canonical_block(). + See \ref BCSRDataInterface for more details. +*/ +struct starpu_data_interface_ops *starpu_bcsr_filter_canonical_block_child_ops(struct starpu_data_filter *f, unsigned child); + +/** + Partition a block-sparse matrix into block-sparse matrices. + + The split is done along the leading dimension, i.e. along adjacent nnz blocks. + + See \ref BCSRDataInterface for more details. +*/ +void starpu_bcsr_filter_vertical_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + +/** @} */ + +/** + @name Predefined CSR Filter Functions + Predefined partitioning functions for CSR data. Examples on how to + use them are shown in \ref PartitioningData. + @{ +*/ + +/** + Partition a block-sparse matrix into vertical block-sparse matrices. + + See \ref CSRDataInterface for more details. +*/ +void starpu_csr_filter_vertical_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + +/** @} */ + +/** + @name Predefined Matrix Filter Functions + Predefined partitioning functions for matrix + data. Examples on how to use them are shown in \ref + PartitioningData. + Note: this is using the C element order which is row-major, i.e. elements + with consecutive x coordinates are consecutive in memory. + @{ +*/ + +/** + Partition a dense Matrix along the x dimension, thus getting (x/\p + nparts ,y) matrices. If \p nparts does not divide x, the last + submatrix contains the remainder. + + See \ref MatrixDataInterface for more details. +*/ +void starpu_matrix_filter_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + +/** + Partition a dense Matrix along the x dimension, with a + shadow border starpu_data_filter::filter_arg_ptr, thus getting ((x-2*shadow)/\p + nparts +2*shadow,y) matrices. If \p nparts does not divide x-2*shadow, + the last submatrix contains the remainder. + + IMPORTANT: This can + only be used for read-only access, as no coherency is enforced for the + shadowed parts. A usage example is available in + examples/filters/shadow2d.c + + See \ref MatrixDataInterface for more details. +*/ +void starpu_matrix_filter_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + +/** + Partition a dense Matrix along the y dimension, thus getting + (x,y/\p nparts) matrices. If \p nparts does not divide y, the last + submatrix contains the remainder. + + See \ref MatrixDataInterface for more details. +*/ +void starpu_matrix_filter_vertical_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + +/** + Partition a dense Matrix along the y dimension, with a + shadow border starpu_data_filter::filter_arg_ptr, thus getting + (x,(y-2*shadow)/\p nparts +2*shadow) matrices. If \p nparts does not + divide y-2*shadow, the last submatrix contains the remainder. + + IMPORTANT: This can only be used for read-only access, as no + coherency is enforced for the shadowed parts. A usage example is + available in examples/filters/shadow2d.c + + See \ref MatrixDataInterface for more details. +*/ +void starpu_matrix_filter_vertical_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + +/** + Pick \p nparts contiguous vectors from a matrix along + the Y dimension. The starting position on Y-axis is set in + starpu_data_filter::filter_arg_ptr. + + starpu_data_filter::get_child_ops needs to be set to + starpu_matrix_filter_pick_vector_child_ops(). A usage example is + available in examples/filters/fmatrix_pick_vector.c + + See \ref MatrixDataInterface for more details. +*/ +void starpu_matrix_filter_pick_vector_y(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + +/** + Return the child_ops of the partition obtained with starpu_matrix_filter_pick_vector_y(). + See \ref MatrixDataInterface for more details. +*/ +struct starpu_data_interface_ops *starpu_matrix_filter_pick_vector_child_ops(struct starpu_data_filter *f, unsigned child); + +/** + Pick \p nparts contiguous variables from a matrix. The starting position + is set in starpu_data_filter::filter_arg_ptr. + + starpu_data_filter::get_child_ops needs to be set to + starpu_matrix_filter_pick_variable_child_ops(). A usage example is + available in examples/filters/fmatrix_pick_variable.c + + See \ref MatrixDataInterface for more details. +*/ +void starpu_matrix_filter_pick_variable(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + +/** + Return the child_ops of the partition obtained with starpu_matrix_filter_pick_variable(). + See \ref MatrixDataInterface for more details. +*/ +struct starpu_data_interface_ops *starpu_matrix_filter_pick_variable_child_ops(struct starpu_data_filter *f, unsigned child); + +/** @} */ + +/** + @name Predefined Vector Filter Functions + Predefined partitioning functions for vector + data. Examples on how to use them are shown in \ref + PartitioningData. + @{ +*/ + +/** + Return in \p child_interface the \p id th element of the vector + represented by \p father_interface once partitioned in \p nparts chunks of + equal size. + + See \ref VectorDataInterface for more details. +*/ +void starpu_vector_filter_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + +/** + Return in \p child_interface the \p id th element of the vector + represented by \p father_interface once partitioned in \p nparts chunks of + equal size with a shadow border starpu_data_filter::filter_arg_ptr, thus getting a vector + of size (n-2*shadow)/nparts+2*shadow. The starpu_data_filter::filter_arg_ptr field + of \p f must be the shadow size casted into \c void*. + + IMPORTANT: This can only be used for read-only access, as no coherency is + enforced for the shadowed parts. An usage example is available in + examples/filters/shadow.c + + See \ref VectorDataInterface for more details. +*/ +void starpu_vector_filter_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + +/** + Return in \p child_interface the \p id th element of the vector + represented by \p father_interface once partitioned into \p nparts chunks + according to the starpu_data_filter::filter_arg_ptr field of \p f. The + starpu_data_filter::filter_arg_ptr field must point to an array of \p nparts long + elements, each of which specifies the number of elements in each chunk + of the partition. + + See \ref VectorDataInterface for more details. +*/ +void starpu_vector_filter_list_long(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + +/** + Return in \p child_interface the \p id th element of the vector + represented by \p father_interface once partitioned into \p nparts chunks + according to the starpu_data_filter::filter_arg_ptr field of \p f. The + starpu_data_filter::filter_arg_ptr field must point to an array of \p nparts uint32_t + elements, each of which specifies the number of elements in each chunk + of the partition. + + See \ref VectorDataInterface for more details. +*/ +void starpu_vector_filter_list(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + +/** + Return in \p child_interface the \p id th element of the vector + represented by \p father_interface once partitioned in 2 chunks of + equal size, ignoring nparts. Thus, \p id must be 0 or 1. + + See \ref VectorDataInterface for more details. +*/ +void starpu_vector_filter_divide_in_2(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + +/** + Pick \p nparts contiguous variables from a vector. The starting + position is set in starpu_data_filter::filter_arg_ptr. + + starpu_data_filter::get_child_ops needs to be set to + starpu_vector_filter_pick_variable_child_ops(). A usage example is + available in examples/filters/fvector_pick_variable.c + + See \ref VectorDataInterface for more details. +*/ +void starpu_vector_filter_pick_variable(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + +/** + Return the child_ops of the partition obtained with starpu_vector_filter_pick_variable(). + See \ref VectorDataInterface for more details. +*/ +struct starpu_data_interface_ops *starpu_vector_filter_pick_variable_child_ops(struct starpu_data_filter *f, unsigned child); + +/** @} */ + +/** + @name Predefined Block Filter Functions + Predefined partitioning functions for block data. Examples on how + to use them are shown in \ref PartitioningData. An example is + available in \c examples/filters/shadow3d.c + Note: this is using the C element order which is row-major, i.e. elements + with consecutive x coordinates are consecutive in memory. + @{ +*/ + +/** + Partition a block along the X dimension, thus getting + (x/\p nparts ,y,z) 3D matrices. If \p nparts does not divide x, the last + submatrix contains the remainder. + + See \ref BlockDataInterface for more details. +*/ +void starpu_block_filter_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + +/** + Partition a block along the X dimension, with a + shadow border starpu_data_filter::filter_arg_ptr, thus getting + ((x-2*shadow)/\p nparts +2*shadow,y,z) blocks. If \p nparts does not + divide x, the last submatrix contains the remainder. + + IMPORTANT: + This can only be used for read-only access, as no coherency is + enforced for the shadowed parts. + + See \ref BlockDataInterface for more details. +*/ +void starpu_block_filter_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + +/** + Partition a block along the Y dimension, thus getting + (x,y/\p nparts ,z) blocks. If \p nparts does not divide y, the last + submatrix contains the remainder. + + See \ref BlockDataInterface for more details. +*/ +void starpu_block_filter_vertical_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + +/** + Partition a block along the Y dimension, with a + shadow border starpu_data_filter::filter_arg_ptr, thus getting + (x,(y-2*shadow)/\p nparts +2*shadow,z) 3D matrices. If \p nparts does not + divide y, the last submatrix contains the remainder. + + IMPORTANT: + This can only be used for read-only access, as no coherency is + enforced for the shadowed parts. + + See \ref BlockDataInterface for more details. +*/ +void starpu_block_filter_vertical_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + +/** + Partition a block along the Z dimension, thus getting + (x,y,z/\p nparts) blocks. If \p nparts does not divide z, the last + submatrix contains the remainder. + + See \ref BlockDataInterface for more details. +*/ +void starpu_block_filter_depth_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + +/** + Partition a block along the Z dimension, with a + shadow border starpu_data_filter::filter_arg_ptr, thus getting + (x,y,(z-2*shadow)/\p nparts +2*shadow) blocks. If \p nparts does not + divide z, the last submatrix contains the remainder. + + IMPORTANT: + This can only be used for read-only access, as no coherency is + enforced for the shadowed parts. + + See \ref BlockDataInterface for more details. +*/ +void starpu_block_filter_depth_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + +/** + Pick \p nparts contiguous matrices from a block along + the Z dimension. The starting position on Z-axis is set in + starpu_data_filter::filter_arg_ptr. + + starpu_data_filter::get_child_ops needs to be set to + starpu_block_filter_pick_matrix_child_ops(). A usage example is + available in examples/filters/fblock_pick_matrix.c + + See \ref BlockDataInterface for more details. +*/ +void starpu_block_filter_pick_matrix_z(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + +/** + Pick \p nparts contiguous matrices from a block along + the Y dimension. The starting position on Y-axis is set in + starpu_data_filter::filter_arg_ptr. + + starpu_data_filter::get_child_ops needs to be set to + starpu_block_filter_pick_matrix_child_ops(). A usage example is + available in examples/filters/fblock_pick_matrix.c + + See \ref BlockDataInterface for more details. +*/ +void starpu_block_filter_pick_matrix_y(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + +/** + Return the child_ops of the partition obtained with starpu_block_filter_pick_matrix_z() + and starpu_block_filter_pick_matrix_y(). + See \ref BlockDataInterface for more details. +*/ +struct starpu_data_interface_ops *starpu_block_filter_pick_matrix_child_ops(struct starpu_data_filter *f, unsigned child); + +/** + Pick \p nparts contiguous variables from a block. The starting position + is set in starpu_data_filter::filter_arg_ptr. + + starpu_data_filter::get_child_ops needs to be set to + starpu_block_filter_pick_variable_child_ops(). A usage example is + available in examples/filters/fblock_pick_variable.c + + See \ref BlockDataInterface for more details. +*/ +void starpu_block_filter_pick_variable(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + +/** + Return the child_ops of the partition obtained with starpu_block_filter_pick_variable(). + See \ref BlockDataInterface for more details. +*/ +struct starpu_data_interface_ops *starpu_block_filter_pick_variable_child_ops(struct starpu_data_filter *f, unsigned child); + +/** @} */ + +/** + @name Predefined Tensor Filter Functions + Predefined partitioning functions for tensor + data. + @{ +*/ + +/** + Partition a tensor along the X dimension, thus getting + (x/\p nparts ,y,z,t) tensors. If \p nparts does not divide x, the last + submatrix contains the remainder. + + See \ref TensorDataInterface for more details. +*/ +void starpu_tensor_filter_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + +/** + Partition a tensor along the X dimension, with a + shadow border starpu_data_filter::filter_arg_ptr, thus getting + ((x-2*shadow)/\p nparts +2*shadow,y,z,t) tensors. If \p nparts does not + divide x, the last submatrix contains the remainder. + + IMPORTANT: + This can only be used for read-only access, as no coherency is + enforced for the shadowed parts. + + See \ref TensorDataInterface for more details. +*/ +void starpu_tensor_filter_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + +/** + Partition a tensor along the Y dimension, thus getting + (x,y/\p nparts ,z,t) tensors. If \p nparts does not divide y, the last + submatrix contains the remainder. + + See \ref TensorDataInterface for more details. +*/ +void starpu_tensor_filter_vertical_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + +/** + Partition a tensor along the Y dimension, with a + shadow border starpu_data_filter::filter_arg_ptr, thus getting + (x,(y-2*shadow)/\p nparts +2*shadow,z,t) tensors. If \p nparts does not + divide y, the last submatrix contains the remainder. + + IMPORTANT: + This can only be used for read-only access, as no coherency is + enforced for the shadowed parts. + + See \ref TensorDataInterface for more details. +*/ +void starpu_tensor_filter_vertical_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + +/** + Partition a tensor along the Z dimension, thus getting + (x,y,z/\p nparts,t) tensors. If \p nparts does not divide z, the last + submatrix contains the remainder. + + See \ref TensorDataInterface for more details. +*/ +void starpu_tensor_filter_depth_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + +/** + Partition a tensor along the Z dimension, with a + shadow border starpu_data_filter::filter_arg_ptr, thus getting + (x,y,(z-2*shadow)/\p nparts +2*shadow,t) tensors. If \p nparts does not + divide z, the last submatrix contains the remainder. + + IMPORTANT: + This can only be used for read-only access, as no coherency is + enforced for the shadowed parts. + + See \ref TensorDataInterface for more details. +*/ +void starpu_tensor_filter_depth_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + +/** + Partition a tensor along the T dimension, thus getting + (x,y,z,t/\p nparts) tensors. If \p nparts does not divide t, the last + submatrix contains the remainder. + + See \ref TensorDataInterface for more details. +*/ +void starpu_tensor_filter_time_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + +/** + Partition a tensor along the T dimension, with a + shadow border starpu_data_filter::filter_arg_ptr, thus getting + (x,y,z,(t-2*shadow)/\p nparts +2*shadow) tensors. If \p nparts does not + divide t, the last submatrix contains the remainder. + + IMPORTANT: + This can only be used for read-only access, as no coherency is + enforced for the shadowed parts. + + See \ref TensorDataInterface for more details. +*/ +void starpu_tensor_filter_time_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + +/** + Pick \p nparts contiguous blocks from a tensor along + the T dimension. The starting position on T-axis is set in + starpu_data_filter::filter_arg_ptr. + + starpu_data_filter::get_child_ops needs to be set to + starpu_tensor_filter_pick_block_child_ops(). A usage example is + available in examples/filters/ftensor_pick_block.c + + See \ref TensorDataInterface for more details. +*/ +void starpu_tensor_filter_pick_block_t(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + +/** + Pick \p nparts contiguous blocks from a tensor along + the Z dimension. The starting position on Z-axis is set in + starpu_data_filter::filter_arg_ptr. + + starpu_data_filter::get_child_ops needs to be set to + starpu_tensor_filter_pick_block_child_ops(). A usage example is + available in examples/filters/ftensor_pick_block.c + + See \ref TensorDataInterface for more details. +*/ +void starpu_tensor_filter_pick_block_z(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + +/** + Pick \p nparts contiguous blocks from a tensor along + the Y dimension. The starting position on Y-axis is set in + starpu_data_filter::filter_arg_ptr. + + starpu_data_filter::get_child_ops needs to be set to + starpu_tensor_filter_pick_block_child_ops(). A usage example is + available in examples/filters/ftensor_pick_block.c + + See \ref TensorDataInterface for more details. +*/ +void starpu_tensor_filter_pick_block_y(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + +/** + Return the child_ops of the partition obtained with starpu_tensor_filter_pick_block_t(), + starpu_tensor_filter_pick_block_z() and starpu_tensor_filter_pick_block_y(). + See \ref TensorDataInterface for more details. +*/ +struct starpu_data_interface_ops *starpu_tensor_filter_pick_block_child_ops(struct starpu_data_filter *f, unsigned child); + +/** + Pick \p nparts contiguous variables from a tensor. The starting position + is set in starpu_data_filter::filter_arg_ptr. + + starpu_data_filter::get_child_ops needs to be set to + starpu_tensor_filter_pick_variable_child_ops(). A usage example is + available in examples/filters/ftensor_pick_variable.c + + See \ref TensorDataInterface for more details. +*/ +void starpu_tensor_filter_pick_variable(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + +/** + Return the child_ops of the partition obtained with starpu_tensor_filter_pick_variable(). + See \ref TensorDataInterface for more details. +*/ +struct starpu_data_interface_ops *starpu_tensor_filter_pick_variable_child_ops(struct starpu_data_filter *f, unsigned child); + +/** @} */ + +/** + @name Predefined Ndim Filter Functions + Predefined partitioning functions for ndim array + data. + @{ +*/ + +/** + Partition a ndim array along the given dimension set in + starpu_data_filter::filter_arg. If \p nparts does not + divide the element number on dimension, the last submatrix contains the remainder. + + See \ref NdimDataInterface for more details. +*/ +void starpu_ndim_filter_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + +/** + Partition a ndim array along the given dimension set in + starpu_data_filter::filter_arg, with a shadow border + starpu_data_filter::filter_arg_ptr. If \p nparts does not + divide the element number on dimension, the last submatrix contains the remainder. + + IMPORTANT: + This can only be used for read-only access, as no coherency is + enforced for the shadowed parts. + + See \ref NdimDataInterface for more details. +*/ +void starpu_ndim_filter_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + +/** + Partition a 4-dim array into \p nparts tensors along the given + dimension set in starpu_data_filter::filter_arg. + + starpu_data_filter::get_child_ops needs to be set to + starpu_ndim_filter_to_tensor_child_ops(). A usage example is + available in examples/filters/fndim_to_tensor.c + + See \ref NdimDataInterface for more details. +*/ +void starpu_ndim_filter_to_tensor(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + +/** + Partition a 3-dim array into \p nparts blocks along the given + dimension set in starpu_data_filter::filter_arg. + + starpu_data_filter::get_child_ops needs to be set to + starpu_ndim_filter_to_block_child_ops(). A usage example is + available in examples/filters/fndim_to_block.c + + See \ref NdimDataInterface for more details. +*/ +void starpu_ndim_filter_to_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + +/** + Partition a 2-dim array into \p nparts matrices along the given + dimension set in starpu_data_filter::filter_arg. + + starpu_data_filter::get_child_ops needs to be set to + starpu_ndim_filter_to_matrix_child_ops(). A usage example is + available in examples/filters/fndim_to_matrix.c + + See \ref NdimDataInterface for more details. +*/ +void starpu_ndim_filter_to_matrix(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + +/** + Partition a 1-dim array into \p nparts vectors. + + starpu_data_filter::get_child_ops needs to be set to + starpu_ndim_filter_to_vector_child_ops(). A usage example is + available in examples/filters/fndim_to_vector.c + + See \ref NdimDataInterface for more details. +*/ +void starpu_ndim_filter_to_vector(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + +/** + Transfer a 0-dim array to a variable. + + starpu_data_filter::get_child_ops needs to be set to + starpu_ndim_filter_to_variable_child_ops(). A usage example is + available in examples/filters/fndim_to_variable.c + + See \ref NdimDataInterface for more details. +*/ +void starpu_ndim_filter_to_variable(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + +/** + Pick \p nparts contiguous (n-1)dim arrays from a ndim array along + the given dimension set in starpu_data_filter::filter_arg. + The starting position is set in starpu_data_filter::filter_arg_ptr. + + A usage example is available in examples/filters/fndim_pick_ndim.c + + See \ref NdimDataInterface for more details. +*/ +void starpu_ndim_filter_pick_ndim(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + +/** + Pick \p nparts contiguous tensors from a 5-dim array along + the given dimension set in starpu_data_filter::filter_arg. + The starting position is set in starpu_data_filter::filter_arg_ptr. + + starpu_data_filter::get_child_ops needs to be set to + starpu_ndim_filter_pick_tensor_child_ops(). A usage example is + available in examples/filters/fndim_5d_pick_tensor.c + + See \ref NdimDataInterface for more details. +*/ +void starpu_ndim_filter_5d_pick_tensor(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + +/** + Pick \p nparts contiguous blocks from a 4-dim array along + the given dimension set in starpu_data_filter::filter_arg. + The starting position is set in starpu_data_filter::filter_arg_ptr. + + starpu_data_filter::get_child_ops needs to be set to + starpu_ndim_filter_pick_block_child_ops(). A usage example is + available in examples/filters/fndim_4d_pick_block.c + + See \ref NdimDataInterface for more details. +*/ +void starpu_ndim_filter_4d_pick_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + +/** + Pick \p nparts contiguous matrices from a 3-dim array along + the given dimension set in starpu_data_filter::filter_arg. + The starting position is set in starpu_data_filter::filter_arg_ptr. + + starpu_data_filter::get_child_ops needs to be set to + starpu_ndim_filter_pick_matrix_child_ops(). A usage example is + available in examples/filters/fndim_3d_pick_matrix.c + + See \ref NdimDataInterface for more details. +*/ +void starpu_ndim_filter_3d_pick_matrix(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + +/** + Pick \p nparts contiguous vectors from a 2-dim array along + the given dimension set in starpu_data_filter::filter_arg. + The starting position is set in starpu_data_filter::filter_arg_ptr. + + starpu_data_filter::get_child_ops needs to be set to + starpu_ndim_filter_pick_vector_child_ops(). A usage example is + available in examples/filters/fndim_2d_pick_vector.c + + See \ref NdimDataInterface for more details. +*/ +void starpu_ndim_filter_2d_pick_vector(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + +/** + Pick \p nparts contiguous variables from a 1-dim array. + The starting position is set in starpu_data_filter::filter_arg_ptr. + + starpu_data_filter::get_child_ops needs to be set to + starpu_ndim_filter_pick_variable_child_ops(). A usage example is + available in examples/filters/fndim_1d_pick_variable.c + + See \ref NdimDataInterface for more details. +*/ +void starpu_ndim_filter_1d_pick_variable(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + +/** + Pick \p nparts contiguous variables from a ndim array. + The starting position is set in starpu_data_filter::filter_arg_ptr. + + starpu_data_filter::get_child_ops needs to be set to + starpu_ndim_filter_pick_variable_child_ops(). A usage example is + available in examples/filters/fndim_pick_variable.c + + See \ref NdimDataInterface for more details. +*/ +void starpu_ndim_filter_pick_variable(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + +/** + Return the child_ops of the partition obtained with starpu_ndim_filter_pick_tensor(). + See \ref NdimDataInterface for more details. +*/ +struct starpu_data_interface_ops *starpu_ndim_filter_pick_tensor_child_ops(struct starpu_data_filter *f, unsigned child); + +/** + Return the child_ops of the partition obtained with starpu_ndim_filter_pick_block(). + See \ref NdimDataInterface for more details. +*/ +struct starpu_data_interface_ops *starpu_ndim_filter_pick_block_child_ops(struct starpu_data_filter *f, unsigned child); + +/** + Return the child_ops of the partition obtained with starpu_ndim_filter_pick_matrix(). + See \ref NdimDataInterface for more details. +*/ +struct starpu_data_interface_ops *starpu_ndim_filter_pick_matrix_child_ops(struct starpu_data_filter *f, unsigned child); + +/** + Return the child_ops of the partition obtained with starpu_ndim_filter_pick_vector(). + See \ref NdimDataInterface for more details. +*/ +struct starpu_data_interface_ops *starpu_ndim_filter_pick_vector_child_ops(struct starpu_data_filter *f, unsigned child); + +/** + Return the child_ops of the partition obtained with starpu_ndim_filter_pick_variable(). + See \ref NdimDataInterface for more details. +*/ +struct starpu_data_interface_ops *starpu_ndim_filter_pick_variable_child_ops(struct starpu_data_filter *f, unsigned child); + +/** + Return the child_ops of the partition obtained with starpu_ndim_filter_to_tensor(). + See \ref NdimDataInterface for more details. +*/ +struct starpu_data_interface_ops *starpu_ndim_filter_to_tensor_child_ops(struct starpu_data_filter *f, unsigned child); + +/** + Return the child_ops of the partition obtained with starpu_ndim_filter_to_block(). + See \ref NdimDataInterface for more details. +*/ +struct starpu_data_interface_ops *starpu_ndim_filter_to_block_child_ops(struct starpu_data_filter *f, unsigned child); + +/** + Return the child_ops of the partition obtained with starpu_ndim_filter_to_matrix(). + See \ref NdimDataInterface for more details. +*/ +struct starpu_data_interface_ops *starpu_ndim_filter_to_matrix_child_ops(struct starpu_data_filter *f, unsigned child); + +/** + Return the child_ops of the partition obtained with starpu_ndim_filter_to_vector(). + See \ref NdimDataInterface for more details. +*/ +struct starpu_data_interface_ops *starpu_ndim_filter_to_vector_child_ops(struct starpu_data_filter *f, unsigned child); + +/** + Return the child_ops of the partition obtained with starpu_ndim_filter_to_variable(). + See \ref NdimDataInterface for more details. +*/ +struct starpu_data_interface_ops *starpu_ndim_filter_to_variable_child_ops(struct starpu_data_filter *f, unsigned child); + +/** + Given an integer \p n, \p n the number of parts it must be divided in, \p id the + part currently considered, determines the \p chunk_size and the \p offset, taking + into account the size of the elements stored in the data structure \p elemsize + and \p blocksize, which is most often 1. + See \ref DefiningANewDataFilter for more details. + */ +void starpu_filter_nparts_compute_chunk_size_and_offset(unsigned n, unsigned nparts, size_t elemsize, unsigned id, unsigned blocksize, unsigned *chunk_size, size_t *offset); + +/** @} */ + +/** @} */ + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/include/starpu_data_interfaces.h b/include/starpu_data_interfaces.h new file mode 100644 index 0000000..637ade3 --- /dev/null +++ b/include/starpu_data_interfaces.h @@ -0,0 +1,2723 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __STARPU_DATA_INTERFACES_H__ +#define __STARPU_DATA_INTERFACES_H__ + +#include + +#ifdef STARPU_USE_CUDA +/* to use CUDA streams */ +#include +typedef cudaStream_t starpu_cudaStream_t; +#endif + +#ifdef STARPU_USE_HIP +/* to use HIP streams */ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wundef" +#pragma GCC diagnostic ignored "-Wunused-result" +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" +#ifndef __cplusplus +#pragma GCC diagnostic ignored "-Wimplicit-int" +#endif +#pragma GCC diagnostic ignored "-Wreturn-type" +#include +#pragma GCC diagnostic pop +typedef hipStream_t starpu_hipStream_t; +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/** + @defgroup API_Data_Interfaces Data Interfaces + @brief Data management is done at a high-level in StarPU: rather than + accessing a mere list of contiguous buffers, the tasks may manipulate + data that are described by a high-level construct which we call data + interface. + + An example of data interface is the "vector" interface which describes + a contiguous data array on a specific memory node. This interface is a + simple structure containing the number of elements in the array, the + size of the elements, and the address of the array in the appropriate + address space (this address may be invalid if there is no valid copy + of the array in the memory node). More information on the data + interfaces provided by StarPU are given in \ref API_Data_Interfaces. + + When a piece of data managed by StarPU is used by a task, the task + implementation is given a pointer to an interface describing a valid + copy of the data that is accessible from the current processing unit. + + Every worker is associated to a memory node which is a logical + abstraction of the address space from which the processing unit gets + its data. For instance, the memory node associated to the different + CPU workers represents main memory (RAM), the memory node associated + to a GPU is DRAM embedded on the device. Every memory node is + identified by a logical index which is accessible from the + function starpu_worker_get_memory_node(). When registering a piece of + data to StarPU, the specified memory node indicates where the piece of + data initially resides (we also call this memory node the home node of + a piece of data). + + In the case of NUMA systems, functions starpu_memory_nodes_numa_devid_to_id() + and starpu_memory_nodes_numa_id_to_devid() can be used to convert from NUMA node + numbers as seen by the Operating System and NUMA node numbers as seen by StarPU. + + There are several ways to register a memory region so that it can be + managed by StarPU. StarPU provides data interfaces for vectors, 2D + matrices, 3D matrices as well as BCSR and CSR sparse matrices. + + Each data interface is provided with a set of field access functions. + The ones using a void * parameter aimed to be used in codelet + implementations (see for example the code in + \ref VectorScalingUsingStarPUAPI). + + Applications can provide their own interface as shown in \ref DefiningANewDataInterface. + + @{ +*/ + +/** + Define the per-interface methods. If the + starpu_data_copy_methods::any_to_any method is provided, it will be + used by default if no specific method is provided. It can still be + useful to provide more specific method in case of e.g. available + particular CUDA, HIP or OpenCL support. + + See \ref DefiningANewDataInterface_copy for more details. +*/ +struct starpu_data_copy_methods +{ + /** + If defined, allow the interface to declare whether it supports + transferring from \p src_interface on node \p src_node to \p + dst_interface on node \p dst_node, run from node \p handling_node. + If not defined, it is assumed that the interface supports all + transfers. + */ + int (*can_copy)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, unsigned handling_node); + + /** + Define how to copy data from the \p src_interface interface on the + \p src_node CPU node to the \p dst_interface interface on the \p + dst_node CPU node. Return 0 on success. + */ + int (*ram_to_ram)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node); + + /** + Define how to copy data from the \p src_interface interface on the + \p src_node CPU node to the \p dst_interface interface on the \p + dst_node CUDA node. Return 0 on success. + */ + int (*ram_to_cuda)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node); + + /** + Define how to copy data from the \p src_interface interface on the + \p src_node CPU node to the \p dst_interface interface on the \p + dst_node HIP node. Return 0 on success. + */ + int (*ram_to_hip)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node); + + /** + Define how to copy data from the \p src_interface interface on the + \p src_node CPU node to the \p dst_interface interface on the \p + dst_node OpenCL node. Return 0 on success. + */ + int (*ram_to_opencl)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node); + + /** + Define how to copy data from the \p src_interface interface on the + \p src_node CPU node to the \p dst_interface interface on the \p + dst_node FPGA node. Return 0 on success. + */ + int (*ram_to_max_fpga)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node); + + /** + Define how to copy data from the \p src_interface interface on the + \p src_node CUDA node to the \p dst_interface interface on the \p + dst_node CPU node. Return 0 on success. + */ + int (*cuda_to_ram)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node); + + /** + Define how to copy data from the \p src_interface interface on the + \p src_node CUDA node to the \p dst_interface interface on the \p + dst_node CUDA node. Return 0 on success. + */ + int (*cuda_to_cuda)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node); + + /** + Define how to copy data from the \p src_interface interface on the + \p src_node HIP node to the \p dst_interface interface on the \p + dst_node CPU node. Return 0 on success. + */ + int (*hip_to_ram)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node); + + /** + Define how to copy data from the \p src_interface interface on the + \p src_node HIP node to the \p dst_interface interface on the \p + dst_node HIP node. Return 0 on success. + */ + int (*hip_to_hip)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node); + + /** + Define how to copy data from the \p src_interface interface on the + \p src_node OpenCL node to the \p dst_interface interface on the + \p dst_node CPU node. Return 0 on success. + */ + int (*opencl_to_ram)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node); + + /** + Define how to copy data from the \p src_interface interface on the + \p src_node OpenCL node to the \p dst_interface interface on the + \p dst_node OpenCL node. Return 0 on success. + */ + int (*opencl_to_opencl)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node); + + /** + Define how to copy data from the \p src_interface interface on the + \p src_node FPGA node to the \p dst_interface interface on the \p + dst_node CPU node. Return 0 on success. + */ + int (*max_fpga_to_ram)(void *src_interface, unsigned srd_node, void *dst_interface, unsigned dst_node); + +#ifdef STARPU_USE_CUDA + /** + Define how to copy data from the \p src_interface interface on the + \p src_node CPU node to the \p dst_interface interface on the \p + dst_node CUDA node, using the given stream. Must return 0 if the + transfer was actually completed completely synchronously, or + -EAGAIN if at least some transfers are still ongoing and + should be awaited for by the core. + */ + int (*ram_to_cuda_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, starpu_cudaStream_t stream); + + /** + Define how to copy data from the \p src_interface interface on the + \p src_node CUDA node to the \p dst_interface interface on the \p + dst_node CPU node, using the given stream. Must return 0 if the + transfer was actually completed completely synchronously, or + -EAGAIN if at least some transfers are still ongoing and + should be awaited for by the core. + */ + int (*cuda_to_ram_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, starpu_cudaStream_t stream); + + /** + Define how to copy data from the \p src_interface interface on the + \p src_node CUDA node to the \p dst_interface interface on the \p + dst_node CUDA node, using the given stream. Must return 0 if the + transfer was actually completed completely synchronously, or + -EAGAIN if at least some transfers are still ongoing and + should be awaited for by the core. + */ + int (*cuda_to_cuda_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, starpu_cudaStream_t stream); +#else + int (*ram_to_cuda_async)(void); + int (*cuda_to_ram_async)(void); + int (*cuda_to_cuda_async)(void); +#endif + +#ifdef STARPU_USE_HIP + /** + Define how to copy data from the \p src_interface interface on the + \p src_node CPU node to the \p dst_interface interface on the \p + dst_node HIP node, using the given stream. Must return 0 if the + transfer was actually completed completely synchronously, or + -EAGAIN if at least some transfers are still ongoing and + should be awaited for by the core. + */ + int (*ram_to_hip_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, starpu_hipStream_t stream); + + /** + Define how to copy data from the \p src_interface interface on the + \p src_node HIP node to the \p dst_interface interface on the \p + dst_node CPU node, using the given stream. Must return 0 if the + transfer was actually completed completely synchronously, or + -EAGAIN if at least some transfers are still ongoing and + should be awaited for by the core. + */ + int (*hip_to_ram_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, starpu_hipStream_t stream); + + /** + Define how to copy data from the \p src_interface interface on the + \p src_node HIP node to the \p dst_interface interface on the \p + dst_node HIP node, using the given stream. Must return 0 if the + transfer was actually completed completely synchronously, or + -EAGAIN if at least some transfers are still ongoing and + should be awaited for by the core. + */ + int (*hip_to_hip_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, starpu_hipStream_t stream); +#else + int (*ram_to_hip_async)(void); + int (*hip_to_ram_async)(void); + int (*hip_to_hip_async)(void); +#endif + +#if defined(STARPU_USE_OPENCL) && !defined(__CUDACC__) && !defined(__HIPCC__) + /** + Define how to copy data from the \p src_interface interface on the + \p src_node CPU node to the \p dst_interface interface on the \p + dst_node OpenCL node, by recording in \p event, a pointer to a + cl_event, the event of the last submitted transfer. Must + return 0 if the transfer was actually completed completely + synchronously, or -EAGAIN if at least some transfers are + still ongoing and should be awaited for by the core. + */ + int (*ram_to_opencl_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cl_event *event); + /** + Define how to copy data from the \p src_interface interface on the + \p src_node OpenCL node to the \p dst_interface interface on the + \p dst_node CPU node, by recording in \p event, a pointer to a + cl_event, the event of the last submitted transfer. Must + return 0 if the transfer was actually completed completely + synchronously, or -EAGAIN if at least some transfers are + still ongoing and should be awaited for by the core. + */ + int (*opencl_to_ram_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cl_event *event); + /** + Define how to copy data from the \p src_interface interface on the + \p src_node OpenCL node to the \p dst_interface interface on the + \p dst_node OpenCL node, by recording in \p event, a pointer to a + cl_event, the event of the last submitted transfer. Must + return 0 if the transfer was actually completed completely + synchronously, or -EAGAIN if at least some transfers are + still ongoing and should be awaited for by the core. + */ + int (*opencl_to_opencl_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cl_event *event); +#else + int (*ram_to_opencl_async)(void); + int (*opencl_to_ram_async)(void); + int (*opencl_to_opencl_async)(void); +#endif + + /** + Define how to copy data from the \p src_interface interface on the + \p src_node CPU node to the \p dst_interface interface on the \p + dst_node FPGA node. Must return 0 if the transfer was actually + completed completely synchronously, or -EAGAIN if at least + some transfers are still ongoing and should be awaited for by the + core. + */ + int (*ram_to_max_fpga_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node); + + /** + Define how to copy data from the \p src_interface interface on the + \p src_node FPGA node to the \p dst_interface interface on the \p + dst_node CPU node. Must return 0 if the transfer was actually + completed completely synchronously, or -EAGAIN if at least + some transfers are still ongoing and should be awaited for by the + core. + */ + int (*max_fpga_to_ram_async)(void *src_interface, unsigned srd_node, void *dst_interface, unsigned dst_node); + + /** + Define how to copy data from the \p src_interface interface on the + \p src_node node to the \p dst_interface interface on the \p + dst_node node. This is meant to be implemented through the + starpu_interface_copy() helper, to which async_data should be + passed as such, and will be used to manage asynchronicity. This + must return -EAGAIN if any of the starpu_interface_copy() + calls has returned -EAGAIN (i.e. at least some transfer is + still ongoing), and return 0 otherwise. + + This can only be implemented if the interface has ready-to-send + data blocks. If the interface is more involved than + this, i.e. it needs to collect pieces of data before + transferring, starpu_data_interface_ops::pack_data and + starpu_data_interface_ops::peek_data should be implemented instead, + and the core will just transfer the resulting data buffer. + */ + int (*any_to_any)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *async_data); +}; + +/** + Identifier for all predefined StarPU data interfaces +*/ +enum starpu_data_interface_id +{ + STARPU_UNKNOWN_INTERFACE_ID = -1, /**< Unknown interface */ + STARPU_MATRIX_INTERFACE_ID = 0, /**< Identifier for the matrix data interface */ + STARPU_BLOCK_INTERFACE_ID = 1, /**< Identifier for the block data interface*/ + STARPU_VECTOR_INTERFACE_ID = 2, /**< Identifier for the vector data interface*/ + STARPU_CSR_INTERFACE_ID = 3, /**< Identifier for the CSR data interface*/ + STARPU_BCSR_INTERFACE_ID = 4, /**< Identifier for the BCSR data interface*/ + STARPU_VARIABLE_INTERFACE_ID = 5, /**< Identifier for the variable data interface*/ + STARPU_VOID_INTERFACE_ID = 6, /**< Identifier for the void data interface*/ + STARPU_MULTIFORMAT_INTERFACE_ID = 7, /**< Identifier for the multiformat data interface*/ + STARPU_COO_INTERFACE_ID = 8, /**< Identifier for the COO data interface*/ + STARPU_TENSOR_INTERFACE_ID = 9, /**< Identifier for the tensor data interface*/ + STARPU_NDIM_INTERFACE_ID = 10, /**< Identifier for the ndim array data interface*/ + STARPU_MAX_INTERFACE_ID = 11 /**< Maximum number of data interfaces */ +}; + +/** + Per-interface data management methods. +*/ +struct starpu_data_interface_ops +{ + /** + Register an existing interface into a data handle. + + This iterates over all memory nodes to initialize all fields of the data + interface on each of them. Since data is not allocated yet except on the + home node, pointers should be left as NULL except on the \p home_node (if >= 0), for + which the pointers should be copied from the given \p data_interface, which + was filled with the application's pointers. + + This method is mandatory. + + See \ref DefiningANewDataInterface_registration for more details. + */ + void (*register_data_handle)(starpu_data_handle_t handle, int home_node, void *data_interface); + + /** + Unregister a data handle. + + This iterates over all memory nodes to free any pointer in the data + interface on each of them. + + At this point, free_data_on_node has been already called on each of them. + This just clears anything that would still be left. + + See \ref DefiningANewDataInterface_registration for more details. + */ + void (*unregister_data_handle)(starpu_data_handle_t handle); + + /** + Allocate data for the interface on a given node. This should use + starpu_malloc_on_node() to perform the allocation(s), and fill the pointers + in the data interface. It should return the size of the allocated memory, or + -ENOMEM if memory could not be allocated. + + Note that the memory node can be CPU memory, GPU memory, or even disk + area. The result returned by starpu_malloc_on_node() should be just + stored as uintptr_t without trying to interpret it since it may be a + GPU pointer, a disk descriptor, etc. + + This method is mandatory to be able to support memory nodes. + + See \ref DefiningANewDataInterface_pointers for more details. + */ + starpu_ssize_t (*allocate_data_on_node)(void *data_interface, unsigned node); + + /** + Free data of the interface on a given node. + + This method is mandatory to be able to support memory nodes. + + See \ref DefiningANewDataInterface_pointers for more details. + */ + void (*free_data_on_node)(void *data_interface, unsigned node); + + /** + Cache the buffers from the given node to a caching interface. + + This method is optional, mostly useful when also making + starpu_data_interface_ops::unregister_data_handle check that pointers are NULL. + + \p src_interface is an interface that already has buffers + allocated, but which we don't need any more. \p cached_interface + is a new interface into which the buffer pointers should be + transferred, for later reuse when allocating data of the same kind. + + Usually we can just memcpy over the set of pointers and descriptions + (this is what StarPU does when this method is not implemented), but + if unregister_data_handle checks that pointers are NULL, we need to + additionally clear the pointers in \p src_interface. Also, + it is not useful to copy the whole interface, only the + pointers need to be copied (essentially the pointers that + starpu_data_interface_ops::reuse_data_on_node will then transfer into + a new handle interface), as well as the properties + that starpu_data_interface_ops::compare (or + starpu_data_interface_ops::alloc_compare if defined) needs for + comparing interfaces for caching compatibility. + + When this method is not defined, StarPU will just copy the \p + cached_interface into \p src_interface. + + See \ref VariableSizeDataInterface and \ref DefiningANewDataInterface_pointers for more details. + */ + void (*cache_data_on_node)(void *cached_interface, void *src_interface, unsigned node); + + /** + Reuse on the given node the buffers of the provided interface + + This method is optional, mostly useful when also defining + alloc_footprint to share tiles of the same allocation size but + different shapes, or when the interface contains pointers which + are initialized at registration (e.g. nn array in the ndim interface) + + \p cached_interface is an already-allocated buffer that we want to + reuse, and \p new_data_interface is an interface in which we want to + install that already-allocated buffer. Usually we can just memcpy over + the set of pointers and descriptions. But e.g. with 2D tiles the ld + value may not be correct, and memcpy would wrongly overwrite it in + new_data_interface, i.e. reusing a vertical tile allocation for a horizontal tile, or vice-versa. + + reuse_data_on_node should thus copy over pointers, and define fields + that are usually set by allocate_data_on_node (e.g. ld). + + See \ref VariableSizeDataInterface and \ref DefiningANewDataInterface_pointers for more details. + */ + void (*reuse_data_on_node)(void *dst_data_interface, const void *cached_interface, unsigned node); + + /** + Map data from a source to a destination. + Define function starpu_interface_map() to set this field. + See \ref DefiningANewDataInterface_pointers for more details. + */ + int (*map_data)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node); + + /** + Unmap data from a source to a destination. + Define function starpu_interface_unmap() to set this field. + See \ref DefiningANewDataInterface_pointers for more details. + */ + int (*unmap_data)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node); + + /** + Update map data from a source to a destination. + Define function starpu_interface_update_map() to set this field. + See \ref DefiningANewDataInterface_pointers for more details. + */ + int (*update_map)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node); + + /** + Initialize the interface. + This method is optional. It is called when initializing the + handler on all the memory nodes. + */ + void (*init)(void *data_interface); + + /** + Struct with pointer to functions for performing ram/cuda/opencl synchronous and asynchronous transfers. + + This field is mandatory to be able to support memory + nodes, except disk nodes which can be supported by just + implementing starpu_data_interface_ops::pack_data and + starpu_data_interface_ops::unpack_data. + */ + const struct starpu_data_copy_methods *copy_methods; + + /** + @deprecated + Use starpu_data_interface_ops::to_pointer instead. + Return the current pointer (if any) for the handle on the given node. + + This method is only required if starpu_data_interface_ops::to_pointer + is not implemented. + */ + void *(*handle_to_pointer)(starpu_data_handle_t handle, unsigned node); + + /** + Return the current pointer (if any) for the given interface on the given node. + + This method is only required for starpu_data_handle_to_pointer() + and starpu_data_get_local_ptr(), and for disk support. + */ + void *(*to_pointer)(void *data_interface, unsigned node); + + /** + Return an estimation of the size of data, for performance models and tracing feedback. + */ + size_t (*get_size)(starpu_data_handle_t handle); + + /** + Return an estimation of the size of allocated data, for allocation + management. + If not specified, the starpu_data_interface_ops::get_size method is + used instead. + */ + size_t (*get_alloc_size)(starpu_data_handle_t handle); + + /** + Return the maximum size that the data may need to increase to. For + instance, in the case of compressed matrix tiles this is the size + when the block is fully dense. + This is currently only used for feedback tools. + */ + size_t (*get_max_size)(starpu_data_handle_t handle); + + /** + Return a 32bit footprint which characterizes the data size and layout (nx, ny, ld, elemsize, etc.), required for indexing performance models. + + starpu_hash_crc32c_be() and alike can be used to produce this 32bit value from various types of values. + */ + uint32_t (*footprint)(starpu_data_handle_t handle); + + /** + Return a 32bit footprint which characterizes the data allocation, to be used + for indexing allocation cache. + If not specified, the starpu_data_interface_ops::footprint method is + used instead. + If specified, alloc_compare should be set to provide the strict + comparison, and reuse_data_on_node should be set to provide correct buffer reuse. + */ + uint32_t (*alloc_footprint)(starpu_data_handle_t handle); + + /** + Compare the data size and layout of two interfaces (nx, ny, ld, elemsize, + etc.), to be used for indexing performance models. It should return 1 if + the two interfaces size and layout match computation-wise, and 0 otherwise. + It does *not* compare the actual content of the interfaces. + */ + int (*compare)(void *data_interface_a, void *data_interface_b); + + /** + Compare the data allocation of two interfaces etc.), to be used for indexing + allocation cache. It should return + 1 if the two interfaces are allocation-compatible, i.e. basically have the same alloc_size, and 0 otherwise. + If not specified, the starpu_data_interface_ops::compare method is + used instead. + */ + int (*alloc_compare)(void *data_interface_a, void *data_interface_b); + + /** + Dump the sizes of a handle to a file. + This is required for performance models + */ + void (*display)(starpu_data_handle_t handle, FILE *f); + + /** + Describe the data into a string in a brief way, such as one + letter to describe the type of data, and the data + dimensions. + This is required for tracing feedback. + */ + starpu_ssize_t (*describe)(void *data_interface, char *buf, size_t size); + + /** + An identifier that is unique to each interface. + */ + enum starpu_data_interface_id interfaceid; + + /** + Size of the interface data descriptor. + */ + size_t interface_size; + + /** + */ + char is_multiformat; + + /** + If set to non-zero, StarPU will never try to reuse an allocated + buffer for a different handle. This can be notably useful for + application-defined interfaces which have a dynamic size, and for + which it thus does not make sense to reuse the buffer since will + probably not have the proper size. + */ + char dontcache; + + /** + */ + struct starpu_multiformat_data_interface_ops *(*get_mf_ops)(void *data_interface); + + /** + Pack the data handle into a contiguous buffer at the address + allocated with starpu_malloc_flags(ptr, size, 0) (and thus + returned in \p ptr) and set the size of the newly created buffer + in \p count. If \p ptr is NULL, the function should not + copy the data in the buffer but just set count to the size of the + buffer which would have been allocated. The special value -1 + indicates the size is yet unknown. + + This method (and starpu_data_interface_ops::unpack_data) is required + for disk support if the starpu_data_copy_methods::any_to_any method + is not implemented (because the in-memory data layout is too + complex). + + This is also required for MPI support if there is no registered MPI data type. + */ + int (*pack_data)(starpu_data_handle_t handle, unsigned node, void **ptr, starpu_ssize_t *count); + + /** + Read the data handle from the contiguous buffer at the address + \p ptr of size \p count. + */ + int (*peek_data)(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count); + + /** + Unpack the data handle from the contiguous buffer at the address + \p ptr of size \p count. + The memory at the address \p ptr should be freed after the data unpacking operation. + */ + int (*unpack_data)(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count); + + /** + Pack the interface into a contiguous buffer and set the + size of the newly created buffer in \p count. This function + is used in master slave mode for data interfaces with a + dynamic content. + */ + int (*pack_meta)(void *data_interface, void **ptr, starpu_ssize_t *count); + + /** + Unpack the interface from the given buffer and set the size + of the unpacked data in \p count. This function + is used in master slave mode for data interfaces with a + dynamic content. + */ + int (*unpack_meta)(void **data_interface, void *ptr, starpu_ssize_t *count); + + /** + Free the allocated memory by a previous call to unpack_meta() + */ + int (*free_meta)(void *data_interface); + + /** + Name of the interface + */ + char *name; +}; + +/** + @name Basic API + @{ +*/ + +/** + Register a piece of data into the handle located at the + \p handleptr address. The \p data_interface buffer contains the initial + description of the data in the \p home_node. The \p ops argument is a + pointer to a structure describing the different methods used to + manipulate this type of interface. See starpu_data_interface_ops for + more details on this structure. + If \p home_node is -1, StarPU will automatically allocate the memory when + it is used for the first time in write-only mode. Once such data + handle has been automatically allocated, it is possible to access it + using any access mode. + Note that StarPU supplies a set of predefined types of interface (e.g. + vector or matrix) which can be registered by the means of helper + functions (e.g. starpu_vector_data_register() or + starpu_matrix_data_register()). + + See \ref DefiningANewDataInterface_registration for more details. +*/ +void starpu_data_register(starpu_data_handle_t *handleptr, int home_node, void *data_interface, struct starpu_data_interface_ops *ops); + +/** + Register the given data interface operations. If the field + starpu_data_interface_ops::field is set to + ::STARPU_UNKNOWN_INTERFACE_ID, then a new identifier will be set by + calling starpu_data_interface_get_next_id(). + The function is automatically called when registering a piece of + data with starpu_data_register(). It is only necessary to call it + beforehand for some specific cases (such as the usmaster slave mode). +*/ +void starpu_data_register_ops(struct starpu_data_interface_ops *ops); + +/** + Register that a buffer for \p handle on \p node will be set. This is typically + used by starpu_*_ptr_register helpers before setting the interface pointers for + this node, to tell the core that that is now allocated. + See \ref DefiningANewDataInterface_pointers for more details. +*/ +void starpu_data_ptr_register(starpu_data_handle_t handle, unsigned node); + +/** + Register a new piece of data into the handle \p handledst with the + same interface as the handle \p handlesrc. + See \ref DataHandlesHelpers for more details. +*/ +void starpu_data_register_same(starpu_data_handle_t *handledst, starpu_data_handle_t handlesrc); + +/** + Return the pointer associated with \p handle on node \p node or NULL + if handle’s interface does not support this operation or data for this + \p handle is not allocated on that \p node. + See \ref DataPointers for more details. +*/ +void *starpu_data_handle_to_pointer(starpu_data_handle_t handle, unsigned node); + +/** + Return the local pointer associated with \p handle or NULL if + \p handle’s interface does not have any data allocated locally. + See \ref DataPointers for more details. +*/ +void *starpu_data_get_local_ptr(starpu_data_handle_t handle); + +/** + Return the interface associated with \p handle on \p memory_node. + See \ref DefiningANewDataInterface_pack for more details. +*/ +void *starpu_data_get_interface_on_node(starpu_data_handle_t handle, unsigned memory_node); + +/** + Return the unique identifier of the interface associated with + the given \p handle. + See \ref DefiningANewDataInterface_helpers for more details. +*/ +enum starpu_data_interface_id starpu_data_get_interface_id(starpu_data_handle_t handle); + +/** + Execute the packing operation of the interface of the data + registered at \p handle (see starpu_data_interface_ops). This + packing operation must allocate a buffer large enough at \p ptr on node \p node and copy + into the newly allocated buffer the data associated to \p handle. \p count + will be set to the size of the allocated buffer. If \p ptr is NULL, the + function should not copy the data in the buffer but just set \p count to + the size of the buffer which would have been allocated. The special + value -1 indicates the size is yet unknown. + See \ref DataHandlesHelpers for more details. +*/ +int starpu_data_pack_node(starpu_data_handle_t handle, unsigned node, void **ptr, starpu_ssize_t *count); + +/** + Like starpu_data_pack_node(), but for the local memory node. + See \ref DataHandlesHelpers for more details. +*/ +int starpu_data_pack(starpu_data_handle_t handle, void **ptr, starpu_ssize_t *count); + +/** + Read in handle's \p node replicate the data located at \p ptr + of size \p count as described by the interface of the data. The interface + registered at \p handle must define a peeking operation (see + starpu_data_interface_ops). + See \ref DataHandlesHelpers for more details. +*/ +int starpu_data_peek_node(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count); + +/** + Read in handle's local replicate the data located at \p ptr + of size \p count as described by the interface of the data. The interface + registered at \p handle must define a peeking operation (see + starpu_data_interface_ops). + See \ref DataHandlesHelpers for more details. +*/ +int starpu_data_peek(starpu_data_handle_t handle, void *ptr, size_t count); + +/** + Unpack in handle the data located at \p ptr of size \p count allocated + on node \p node as described by the interface of the data. The interface + registered at \p handle must define an unpacking operation (see + starpu_data_interface_ops). + See \ref DataHandlesHelpers for more details. +*/ +int starpu_data_unpack_node(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count); + +/** + Unpack in handle the data located at \p ptr of size \p count as + described by the interface of the data. The interface registered at + \p handle must define a unpacking operation (see + starpu_data_interface_ops). + See \ref DataHandlesHelpers for more details. +*/ +int starpu_data_unpack(starpu_data_handle_t handle, void *ptr, size_t count); + +/** + Return the size of the data associated with \p handle. + See \ref DataHandlesHelpers for more details. +*/ +size_t starpu_data_get_size(starpu_data_handle_t handle); + +/** + Return the size of the allocated data associated with \p handle. + See \ref DataHandlesHelpers for more details. +*/ +size_t starpu_data_get_alloc_size(starpu_data_handle_t handle); + +/** + Return the maximum size that the \p handle data may need to increase to. + See \ref DataHandlesHelpers for more details. +*/ +starpu_ssize_t starpu_data_get_max_size(starpu_data_handle_t handle); + +/** + See \ref DataHandlesHelpers for more details. +*/ +int starpu_data_get_home_node(starpu_data_handle_t handle); + +/** + Print basic information on \p handle on \p node. + See \ref DataHandlesHelpers for more details. + */ +void starpu_data_print(starpu_data_handle_t handle, unsigned node, FILE *stream); + +/** + Return the next available id for a newly created data interface + (\ref DefiningANewDataInterface). +*/ +int starpu_data_interface_get_next_id(void); + +/** + Copy \p size bytes from byte offset \p src_offset of \p src on \p src_node + to byte offset \p dst_offset of \p dst on \p dst_node. This is to be used in + the starpu_data_copy_methods::any_to_any copy method, which is provided with \p async_data to + be passed to starpu_interface_copy(). this returns -EAGAIN if the + transfer is still ongoing, or 0 if the transfer is already completed. + + See \ref DefiningANewDataInterface_copy for more details. +*/ +int starpu_interface_copy(uintptr_t src, size_t src_offset, unsigned src_node, + uintptr_t dst, size_t dst_offset, unsigned dst_node, + size_t size, void *async_data); + +/** + Copy \p numblocks blocks of \p blocksize bytes from byte offset \p src_offset + of \p src on \p src_node to byte offset \p dst_offset of \p dst on \p + dst_node. + + The blocks start at addresses which are ld_src (resp. ld_dst) bytes apart in + the source (resp. destination) interface. + + If blocksize == ld_src == ld_dst, the transfer is optimized into a single + starpu_interface_copy call. + + This is to be used in the starpu_data_copy_methods::any_to_any copy + method for 2D data, which is provided with \p async_data to be passed to + starpu_interface_copy(). this returns -EAGAIN if the transfer is still + ongoing, or 0 if the transfer is already completed. + + See \ref DefiningANewDataInterface_copy for more details. +*/ +int starpu_interface_copy2d(uintptr_t src, size_t src_offset, unsigned src_node, + uintptr_t dst, size_t dst_offset, unsigned dst_node, + size_t blocksize, + size_t numblocks, size_t ld_src, size_t ld_dst, + void *async_data); + +/** + Copy \p numblocks_1 * \p numblocks_2 blocks of \p blocksize bytes from byte + offset \p src_offset of \p src on \p src_node to byte offset \p dst_offset of + \p dst on \p dst_node. + + The blocks are grouped by \p numblocks_1 blocks whose start addresses are + ld1_src (resp. ld1_dst) bytes apart in the source (resp. destination) + interface. + + Such groups are grouped by numblocks_2 groups whose start addresses are + ld2_src (resp. ld2_dst) bytes apart in the source (resp. destination) + interface. + + If the blocks are contiguous, the transfers will be optimized. + + This is to be used in the starpu_data_copy_methods::any_to_any copy + method for 3D data, which is provided with \p async_data to be passed to + starpu_interface_copy(). this returns -EAGAIN if the transfer is still + ongoing, or 0 if the transfer is already completed. + + See \ref DefiningANewDataInterface_copy for more details. +*/ +int starpu_interface_copy3d(uintptr_t src, size_t src_offset, unsigned src_node, + uintptr_t dst, size_t dst_offset, unsigned dst_node, + size_t blocksize, + size_t numblocks1, size_t ld1_src, size_t ld1_dst, + size_t numblocks2, size_t ld2_src, size_t ld2_dst, + void *async_data); + +/** + Copy \p numblocks_1 * \p numblocks_2 * \p numblocks_3 blocks of \p blocksize + bytes from byte offset \p src_offset of \p src on \p src_node to byte offset + \p dst_offset of \p dst on \p dst_node. + + The blocks are grouped by \p numblocks_1 blocks whose start addresses are + ld1_src (resp. ld1_dst) bytes apart in the source (resp. destination) + interface. + + Such groups are grouped by numblocks_2 groups whose start addresses are + ld2_src (resp. ld2_dst) bytes apart in the source (resp. destination) + interface. + + Such groups are grouped by numblocks_3 groups whose start addresses are + ld3_src (resp. ld3_dst) bytes apart in the source (resp. destination) + interface. + + If the blocks are contiguous, the transfers will be optimized. + + This is to be used in the starpu_data_copy_methods::any_to_any copy + method for 4D data, which is provided with \p async_data to be passed to + starpu_interface_copy(). this returns -EAGAIN if the transfer is still + ongoing, or 0 if the transfer is already completed. + + See \ref DefiningANewDataInterface_copy for more details. +*/ +int starpu_interface_copy4d(uintptr_t src, size_t src_offset, unsigned src_node, + uintptr_t dst, size_t dst_offset, unsigned dst_node, + size_t blocksize, + size_t numblocks1, size_t ld1_src, size_t ld1_dst, + size_t numblocks2, size_t ld2_src, size_t ld2_dst, + size_t numblocks3, size_t ld3_src, size_t ld3_dst, + void *async_data); + +/** + Copy \p nn[1] * \p nn[2]...* \p nn[ndim-1] blocks of \p nn[0] * \p elemsize bytes from byte + offset \p src_offset of \p src on \p src_node to byte offset \p dst_offset of + \p dst on \p dst_node. + + The blocks are grouped by \p nn[i] blocks (i = 1, 2, ... ndim-1) whose start addresses are + ldn_src[i] * \p elemsize (resp. ld1_dst[i] * \p elemsize) bytes apart + in the source (resp. destination) interface. + + If the blocks are contiguous, the transfers will be optimized. + + This is to be used in the starpu_data_copy_methods::any_to_any copy + method for Ndim data, which is provided with \p async_data to be passed to + starpu_interface_copy(). this returns -EAGAIN if the transfer is still + ongoing, or 0 if the transfer is already completed. + + See \ref DefiningANewDataInterface_copy for more details. +*/ +int starpu_interface_copynd(uintptr_t src, size_t src_offset, unsigned src_node, + uintptr_t dst, size_t dst_offset, unsigned dst_node, + size_t elemsize, size_t ndim, + uint32_t *nn, uint32_t *ldn_src, uint32_t *ldn_dst, + void *async_data); + +/** + When an asynchronous implementation of the data transfer is implemented, the call + to the underlying CUDA, OpenCL, etc. call should be surrounded + by calls to starpu_interface_start_driver_copy_async() and + starpu_interface_end_driver_copy_async(), so that it is recorded in offline + execution traces, and the timing of the submission is checked. \p start must + point to a variable whose value will be passed unchanged to + starpu_interface_end_driver_copy_async(). + + See \ref DefiningANewDataInterface_copy for more details. +*/ +void starpu_interface_start_driver_copy_async(unsigned src_node, unsigned dst_node, double *start); + +/** + See starpu_interface_start_driver_copy_async(). + See \ref DefiningANewDataInterface_copy for more details. +*/ +void starpu_interface_end_driver_copy_async(unsigned src_node, unsigned dst_node, double start); + +/** + Record in offline execution traces the copy of \p size bytes from + node \p src_node to node \p dst_node. + See \ref DefiningANewDataInterface_copy for more details. + */ +void starpu_interface_data_copy(unsigned src_node, unsigned dst_node, size_t size); + +/** + Allocate \p size bytes on node \p dst_node with the given allocation \p flags + (such as ::STARPU_MALLOC_PINNED, ::STARPU_MALLOC_COUNT, etc.). This returns 0 if + allocation failed, the allocation method should then return -ENOMEM as + allocated size. Deallocation must be done with starpu_free_on_node_flags(). + + See \ref VariableSizeDataInterface for more details. +*/ +uintptr_t starpu_malloc_on_node_flags(unsigned dst_node, size_t size, int flags); + +/** + Allocate \p size bytes on node \p dst_node with the default allocation flags. This returns 0 if + allocation failed, the allocation method should then return -ENOMEM as + allocated size. Deallocation must be done with starpu_free_on_node(). + + See \ref DefiningANewDataInterface_allocation for more details. +*/ +uintptr_t starpu_malloc_on_node(unsigned dst_node, size_t size); + +/** + Free \p addr of \p size bytes on node \p dst_node which was previously allocated + with starpu_malloc_on_node_flags() with the given allocation \p flags. + + See \ref VariableSizeDataInterface for more details. +*/ +void starpu_free_on_node_flags(unsigned dst_node, uintptr_t addr, size_t size, int flags); + +/** + Free \p addr of \p size bytes on node \p dst_node which was previously allocated + with starpu_malloc_on_node(). + + See \ref DefiningANewDataInterface_allocation for more details. +*/ +void starpu_free_on_node(unsigned dst_node, uintptr_t addr, size_t size); + +/** + Define the default flags for allocations performed by starpu_malloc_on_node() and + starpu_free_on_node(). The default is \ref STARPU_MALLOC_PINNED | \ref STARPU_MALLOC_COUNT. + See \ref HowToLimitMemoryPerNode for more details. +*/ +void starpu_malloc_on_node_set_default_flags(unsigned node, int flags); + +/** @} */ + +/** + @name MAP API + @{ +*/ + +/** + Used to set starpu_data_interface_ops::map_data. + See \ref DefiningANewDataInterface_pointers for more details. +*/ +uintptr_t starpu_interface_map(uintptr_t src, size_t src_offset, unsigned src_node, unsigned dst_node, size_t size, int *ret); +/** + Used to set starpu_data_interface_ops::unmap_data. + See \ref DefiningANewDataInterface_pointers for more details. +*/ +int starpu_interface_unmap(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, unsigned dst_node, size_t size); +/** + Used to set starpu_data_interface_ops::update_map. + See \ref DefiningANewDataInterface_pointers for more details. +*/ +int starpu_interface_update_map(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size); + +/** @} */ + +/** + @name Accessing Matrix Data Interfaces + @{ +*/ + +extern struct starpu_data_interface_ops starpu_interface_matrix_ops; + +/** + Matrix interface for dense matrices +*/ +struct starpu_matrix_interface +{ + enum starpu_data_interface_id id; /**< Identifier of the interface */ + uintptr_t ptr; /**< local pointer of the matrix */ + uintptr_t dev_handle; /**< device handle of the matrix */ + size_t offset; /**< offset in the matrix */ + uint32_t nx; /**< number of elements on the x-axis of the matrix */ + uint32_t ny; /**< number of elements on the y-axis of the matrix */ + uint32_t ld; /**< number of elements between each row of the + matrix. Maybe be equal to starpu_matrix_interface::nx + when there is no padding. + */ + size_t elemsize; /**< size of the elements of the matrix */ + size_t allocsize; /**< size actually currently allocated */ +}; + +/** + Register the \p nx x \p ny 2D matrix of \p elemsize-byte elements pointed + by \p ptr and initialize \p handle to represent it. \p ld specifies the number + of elements between rows. a value greater than \p nx adds padding, which + can be useful for alignment purposes. + + Here an example of how to use the function. + \code{.c} + float *matrix; + starpu_data_handle_t matrix_handle; + matrix = (float*)malloc(width * height * sizeof(float)); + starpu_matrix_data_register(&matrix_handle, STARPU_MAIN_RAM, (uintptr_t)matrix, width, width, height, sizeof(float)); + \endcode + + See \ref MatrixDataInterface for more details. +*/ +void starpu_matrix_data_register(starpu_data_handle_t *handle, int home_node, uintptr_t ptr, uint32_t ld, uint32_t nx, uint32_t ny, size_t elemsize); + +/** + Similar to starpu_matrix_data_register, but additionally specifies which + allocation size should be used instead of the initial nx*ny*elemsize. + + See \ref VariableSizeDataInterface for more details. +*/ +void starpu_matrix_data_register_allocsize(starpu_data_handle_t *handle, int home_node, uintptr_t ptr, uint32_t ld, uint32_t nx, uint32_t ny, size_t elemsize, size_t allocsize); + +/** + Register into the \p handle that to store data on node \p node it should use the + buffer located at \p ptr, or device handle \p dev_handle and offset \p offset + (for OpenCL, notably), with \p ld elements between rows. +*/ +void starpu_matrix_ptr_register(starpu_data_handle_t handle, unsigned node, uintptr_t ptr, uintptr_t dev_handle, size_t offset, uint32_t ld); + +/** + Return the number of elements on the x-axis of the matrix + designated by \p handle. +*/ +uint32_t starpu_matrix_get_nx(starpu_data_handle_t handle); + +/** + Return the number of elements on the y-axis of the matrix + designated by \p handle. +*/ +uint32_t starpu_matrix_get_ny(starpu_data_handle_t handle); + +/** + Return the number of elements between each row of the matrix + designated by \p handle. Maybe be equal to nx when there is no padding. +*/ +uint32_t starpu_matrix_get_local_ld(starpu_data_handle_t handle); + +/** + Return the local pointer associated with \p handle. +*/ +uintptr_t starpu_matrix_get_local_ptr(starpu_data_handle_t handle); + +/** + Return the size of the elements registered into the matrix + designated by \p handle. +*/ +size_t starpu_matrix_get_elemsize(starpu_data_handle_t handle); + +/** + Return the allocated size of the matrix designated by \p handle. +*/ +size_t starpu_matrix_get_allocsize(starpu_data_handle_t handle); + +#if defined(STARPU_HAVE_STATEMENT_EXPRESSIONS) && defined(STARPU_DEBUG) +#define STARPU_MATRIX_CHECK(interface) STARPU_ASSERT_MSG((((struct starpu_matrix_interface *)(interface))->id) == STARPU_MATRIX_INTERFACE_ID, "Error. The given data is not a matrix.") +#define STARPU_MATRIX_GET_PTR(interface) ( \ + { \ + STARPU_MATRIX_CHECK(interface); \ + (((struct starpu_matrix_interface *)(interface))->ptr); \ + }) +#define STARPU_MATRIX_GET_DEV_HANDLE(interface) ( \ + { \ + STARPU_MATRIX_CHECK(interface); \ + (((struct starpu_matrix_interface *)(interface))->dev_handle); \ + }) +#define STARPU_MATRIX_GET_OFFSET(interface) ( \ + { \ + STARPU_MATRIX_CHECK(interface); \ + (((struct starpu_matrix_interface *)(interface))->offset); \ + }) +#define STARPU_MATRIX_GET_NX(interface) ( \ + { \ + STARPU_MATRIX_CHECK(interface); \ + (((struct starpu_matrix_interface *)(interface))->nx); \ + }) +#define STARPU_MATRIX_GET_NY(interface) ( \ + { \ + STARPU_MATRIX_CHECK(interface); \ + (((struct starpu_matrix_interface *)(interface))->ny); \ + }) +#define STARPU_MATRIX_GET_LD(interface) ( \ + { \ + STARPU_MATRIX_CHECK(interface); \ + (((struct starpu_matrix_interface *)(interface))->ld); \ + }) +#define STARPU_MATRIX_GET_ELEMSIZE(interface) ( \ + { \ + STARPU_MATRIX_CHECK(interface); \ + (((struct starpu_matrix_interface *)(interface))->elemsize); \ + }) +#define STARPU_MATRIX_GET_ALLOCSIZE(interface) ( \ + { \ + STARPU_MATRIX_CHECK(interface); \ + (((struct starpu_matrix_interface *)(interface))->allocsize); \ + }) +#else +/** + Return a pointer to the matrix designated by \p interface, valid + on CPUs and CUDA devices only. For OpenCL devices, the device handle + and offset need to be used instead. +*/ +#define STARPU_MATRIX_GET_PTR(interface) (((struct starpu_matrix_interface *)(interface))->ptr) +/** + Return a device handle for the matrix designated by \p interface, + to be used with OpenCL. The offset returned by + ::STARPU_MATRIX_GET_OFFSET has to be used in + addition to this. +*/ +#define STARPU_MATRIX_GET_DEV_HANDLE(interface) (((struct starpu_matrix_interface *)(interface))->dev_handle) +/** + Return the offset in the matrix designated by \p interface, to be + used with the device handle. +*/ +#define STARPU_MATRIX_GET_OFFSET(interface) (((struct starpu_matrix_interface *)(interface))->offset) +/** + Return the number of elements on the x-axis of the matrix + designated by \p interface. +*/ +#define STARPU_MATRIX_GET_NX(interface) (((struct starpu_matrix_interface *)(interface))->nx) +/** + Return the number of elements on the y-axis of the matrix + designated by \p interface. +*/ +#define STARPU_MATRIX_GET_NY(interface) (((struct starpu_matrix_interface *)(interface))->ny) +/** + Return the number of elements between each row of the matrix + designated by \p interface. May be equal to nx when there is no padding. +*/ +#define STARPU_MATRIX_GET_LD(interface) (((struct starpu_matrix_interface *)(interface))->ld) +/** + Return the size of the elements registered into the matrix + designated by \p interface. +*/ +#define STARPU_MATRIX_GET_ELEMSIZE(interface) (((struct starpu_matrix_interface *)(interface))->elemsize) +/** + Return the allocated size of the matrix designated by \p interface. +*/ +#define STARPU_MATRIX_GET_ALLOCSIZE(interface) (((struct starpu_matrix_interface *)(interface))->allocsize) +#endif + +/** + Set the number of elements on the x-axis of the matrix + designated by \p interface. +*/ +#define STARPU_MATRIX_SET_NX(interface, newnx) \ + do { \ + STARPU_MATRIX_CHECK(interface); \ + (((struct starpu_matrix_interface *)(interface))->nx) = (newnx); \ + } \ + while (0) +/** + Set the number of elements on the y-axis of the matrix + designated by \p interface. +*/ +#define STARPU_MATRIX_SET_NY(interface, newny) \ + do { \ + STARPU_MATRIX_CHECK(interface); \ + (((struct starpu_matrix_interface *)(interface))->ny) = (newny); \ + } \ + while (0) +/** + Set the number of elements between each row of the matrix + designated by \p interface. May be set to the same value as nx when there is + no padding. +*/ +#define STARPU_MATRIX_SET_LD(interface, newld) \ + do { \ + STARPU_MATRIX_CHECK(interface); \ + (((struct starpu_matrix_interface *)(interface))->ld) = (newld); \ + } \ + while (0) + +/** @} */ + +/** + @name Accessing COO Data Interfaces + @{ +*/ + +extern struct starpu_data_interface_ops starpu_interface_coo_ops; + +/** + COO Matrices +*/ +struct starpu_coo_interface +{ + enum starpu_data_interface_id id; /**< identifier of the interface */ + + uint32_t *columns; /**< column array of the matrix */ + uint32_t *rows; /**< row array of the matrix */ + uintptr_t values; /**< values of the matrix */ + uint32_t nx; /**< number of elements on the x-axis of the matrix */ + uint32_t ny; /**< number of elements on the y-axis of the matrix */ + uint32_t n_values; /**< number of values registered in the matrix */ + size_t elemsize; /**< size of the elements of the matrix */ +}; + +/** + Register the \p nx x \p ny 2D matrix given in the COO format, using the + \p columns, \p rows, \p values arrays, which must have \p n_values elements of + size \p elemsize. Initialize \p handleptr. + See \ref COODataInterface for more details. +*/ +void starpu_coo_data_register(starpu_data_handle_t *handleptr, int home_node, uint32_t nx, uint32_t ny, uint32_t n_values, uint32_t *columns, uint32_t *rows, uintptr_t values, size_t elemsize); + +/** + Return a pointer to the column array of the matrix designated + by \p interface. +*/ +#define STARPU_COO_GET_COLUMNS(interface) (((struct starpu_coo_interface *)(interface))->columns) +/** + Return a device handle for the column array of the matrix + designated by \p interface, to be used with OpenCL. The offset + returned by ::STARPU_COO_GET_OFFSET has to be used in addition to + this. +*/ +#define STARPU_COO_GET_COLUMNS_DEV_HANDLE(interface) (((struct starpu_coo_interface *)(interface))->columns) +/** + Return a pointer to the rows array of the matrix designated by + \p interface. +*/ +#define STARPU_COO_GET_ROWS(interface) (((struct starpu_coo_interface *)(interface))->rows) +/** + Return a device handle for the row array of the matrix + designated by \p interface, to be used on OpenCL. The offset returned + by ::STARPU_COO_GET_OFFSET has to be used in addition to this. +*/ +#define STARPU_COO_GET_ROWS_DEV_HANDLE(interface) (((struct starpu_coo_interface *)(interface))->rows) +/** + Return a pointer to the values array of the matrix designated + by \p interface. +*/ +#define STARPU_COO_GET_VALUES(interface) (((struct starpu_coo_interface *)(interface))->values) +/** + Return a device handle for the value array of the matrix + designated by \p interface, to be used on OpenCL. The offset returned + by ::STARPU_COO_GET_OFFSET has to be used in addition to this. +*/ +#define STARPU_COO_GET_VALUES_DEV_HANDLE(interface) (((struct starpu_coo_interface *)(interface))->values) +/** + Return the offset in the arrays of the COO matrix designated by + \p interface. +*/ +#define STARPU_COO_GET_OFFSET 0 +/** + Return the number of elements on the x-axis of the matrix + designated by \p interface. +*/ +#define STARPU_COO_GET_NX(interface) (((struct starpu_coo_interface *)(interface))->nx) +/** + Return the number of elements on the y-axis of the matrix + designated by \p interface. +*/ +#define STARPU_COO_GET_NY(interface) (((struct starpu_coo_interface *)(interface))->ny) +/** + Return the number of values registered in the matrix designated + by \p interface. +*/ +#define STARPU_COO_GET_NVALUES(interface) (((struct starpu_coo_interface *)(interface))->n_values) +/** + Return the size of the elements registered into the matrix + designated by \p interface. +*/ +#define STARPU_COO_GET_ELEMSIZE(interface) (((struct starpu_coo_interface *)(interface))->elemsize) + +/** @} */ + +/** + @name Block Data Interface + @{ +*/ + +extern struct starpu_data_interface_ops starpu_interface_block_ops; + +/* TODO: rename to 3dmatrix? */ +/* TODO: add allocsize support */ +/** + Block interface for 3D dense blocks +*/ +struct starpu_block_interface +{ + enum starpu_data_interface_id id; /**< identifier of the interface */ + + uintptr_t ptr; /**< local pointer of the block */ + uintptr_t dev_handle; /**< device handle of the block. */ + size_t offset; /**< offset in the block. */ + uint32_t nx; /**< number of elements on the x-axis of the block. */ + uint32_t ny; /**< number of elements on the y-axis of the block. */ + uint32_t nz; /**< number of elements on the z-axis of the block. */ + uint32_t ldy; /**< number of elements between two lines */ + uint32_t ldz; /**< number of elements between two planes */ + size_t elemsize; /**< size of the elements of the block. */ +}; + +/** + Register the \p nx x \p ny x \p nz 3D matrix of \p elemsize byte elements + pointed by \p ptr and initialize \p handle to represent it. Again, \p ldy and + \p ldz specify the number of elements between rows and between z planes. + + Here an example of how to use the function. + \code{.c} + float *block; + starpu_data_handle_t block_handle; + block = (float*)malloc(nx*ny*nz*sizeof(float)); + starpu_block_data_register(&block_handle, STARPU_MAIN_RAM, (uintptr_t)block, nx, nx*ny, nx, ny, nz, sizeof(float)); + \endcode + + See \ref BlockDataInterface for more details. +*/ +void starpu_block_data_register(starpu_data_handle_t *handle, int home_node, uintptr_t ptr, uint32_t ldy, uint32_t ldz, uint32_t nx, uint32_t ny, uint32_t nz, size_t elemsize); + +/** + Register into the \p handle that to store data on node \p node it should use the + buffer located at \p ptr, or device handle \p dev_handle and offset \p offset + (for OpenCL, notably), with \p ldy elements between rows and \p ldz + elements between z planes. +*/ +void starpu_block_ptr_register(starpu_data_handle_t handle, unsigned node, uintptr_t ptr, uintptr_t dev_handle, size_t offset, uint32_t ldy, uint32_t ldz); + +/** + Return the number of elements on the x-axis of the block + designated by \p handle. + */ +uint32_t starpu_block_get_nx(starpu_data_handle_t handle); + +/** + Return the number of elements on the y-axis of the block + designated by \p handle. + */ +uint32_t starpu_block_get_ny(starpu_data_handle_t handle); + +/** + Return the number of elements on the z-axis of the block + designated by \p handle. + */ +uint32_t starpu_block_get_nz(starpu_data_handle_t handle); + +/** + Return the number of elements between each row of the block + designated by \p handle, in the format of the current memory node. +*/ +uint32_t starpu_block_get_local_ldy(starpu_data_handle_t handle); + +/** + Return the number of elements between each z plane of the block + designated by \p handle, in the format of the current memory node. + */ +uint32_t starpu_block_get_local_ldz(starpu_data_handle_t handle); + +/** + Return the local pointer associated with \p handle. + */ +uintptr_t starpu_block_get_local_ptr(starpu_data_handle_t handle); + +/** + Return the size of the elements of the block designated by + \p handle. + */ +size_t starpu_block_get_elemsize(starpu_data_handle_t handle); + +#if defined(STARPU_HAVE_STATEMENT_EXPRESSIONS) && defined(STARPU_DEBUG) +#define STARPU_BLOCK_CHECK(interface) STARPU_ASSERT_MSG((((struct starpu_block_interface *)(interface))->id) == STARPU_BLOCK_INTERFACE_ID, "Error. The given data is not a block.") +#define STARPU_BLOCK_GET_PTR(interface) ( \ + { \ + STARPU_BLOCK_CHECK(interface); \ + (((struct starpu_block_interface *)(interface))->ptr); \ + }) +#define STARPU_BLOCK_GET_DEV_HANDLE(interface) ( \ + { \ + STARPU_BLOCK_CHECK(interface); \ + (((struct starpu_block_interface *)(interface))->dev_handle); \ + }) +#define STARPU_BLOCK_GET_OFFSET(interface) ( \ + { \ + STARPU_BLOCK_CHECK(interface); \ + (((struct starpu_block_interface *)(interface))->offset); \ + }) +#define STARPU_BLOCK_GET_NX(interface) ( \ + { \ + STARPU_BLOCK_CHECK(interface); \ + (((struct starpu_block_interface *)(interface))->nx); \ + }) +#define STARPU_BLOCK_GET_NY(interface) ( \ + { \ + STARPU_BLOCK_CHECK(interface); \ + (((struct starpu_block_interface *)(interface))->ny); \ + }) +#define STARPU_BLOCK_GET_NZ(interface) ( \ + { \ + STARPU_BLOCK_CHECK(interface); \ + (((struct starpu_block_interface *)(interface))->nz); \ + }) +#define STARPU_BLOCK_GET_LDY(interface) ( \ + { \ + STARPU_BLOCK_CHECK(interface); \ + (((struct starpu_block_interface *)(interface))->ldy); \ + }) +#define STARPU_BLOCK_GET_LDZ(interface) ( \ + { \ + STARPU_BLOCK_CHECK(interface); \ + (((struct starpu_block_interface *)(interface))->ldz); \ + }) +#define STARPU_BLOCK_GET_ELEMSIZE(interface) ( \ + { \ + STARPU_BLOCK_CHECK(interface); \ + (((struct starpu_block_interface *)(interface))->elemsize); \ + }) +#else +/** + Return a pointer to the block designated by \p interface. + */ +#define STARPU_BLOCK_GET_PTR(interface) (((struct starpu_block_interface *)(interface))->ptr) +/** + Return a device handle for the block designated by \p interface, + to be used on OpenCL. The offset returned by + ::STARPU_BLOCK_GET_OFFSET has to be used in + addition to this. + */ +#define STARPU_BLOCK_GET_DEV_HANDLE(interface) (((struct starpu_block_interface *)(interface))->dev_handle) +/** + Return the offset in the block designated by \p interface, to be + used with the device handle. + */ +#define STARPU_BLOCK_GET_OFFSET(interface) (((struct starpu_block_interface *)(interface))->offset) +/** + Return the number of elements on the x-axis of the block + designated by \p interface. + */ +#define STARPU_BLOCK_GET_NX(interface) (((struct starpu_block_interface *)(interface))->nx) +/** + Return the number of elements on the y-axis of the block + designated by \p interface. + */ +#define STARPU_BLOCK_GET_NY(interface) (((struct starpu_block_interface *)(interface))->ny) +/** +Return the number of elements on the z-axis of the block +designated by \p interface. + */ +#define STARPU_BLOCK_GET_NZ(interface) (((struct starpu_block_interface *)(interface))->nz) +/** + Return the number of elements between each row of the block + designated by \p interface. May be equal to nx when there is no padding. + */ +#define STARPU_BLOCK_GET_LDY(interface) (((struct starpu_block_interface *)(interface))->ldy) +/** + Return the number of elements between each z plane of the block + designated by \p interface. May be equal to nx*ny when there is no + padding. + */ +#define STARPU_BLOCK_GET_LDZ(interface) (((struct starpu_block_interface *)(interface))->ldz) +/** + Return the size of the elements of the block designated by + \p interface. + */ +#define STARPU_BLOCK_GET_ELEMSIZE(interface) (((struct starpu_block_interface *)(interface))->elemsize) +#endif + +/** @} */ + +/** + @name Tensor Data Interface + @{ +*/ + +extern struct starpu_data_interface_ops starpu_interface_tensor_ops; + +/* TODO: rename to 4dtensor? */ +/* TODO: add allocsize support */ +/** + Tensor interface for 4D dense tensors +*/ +struct starpu_tensor_interface +{ + enum starpu_data_interface_id id; /**< identifier of the interface */ + + uintptr_t ptr; /**< local pointer of the tensor */ + uintptr_t dev_handle; /**< device handle of the tensor. */ + size_t offset; /**< offset in the tensor. */ + uint32_t nx; /**< number of elements on the x-axis of the tensor. */ + uint32_t ny; /**< number of elements on the y-axis of the tensor. */ + uint32_t nz; /**< number of elements on the z-axis of the tensor. */ + uint32_t nt; /**< number of elements on the t-axis of the tensor. */ + uint32_t ldy; /**< number of elements between two lines */ + uint32_t ldz; /**< number of elements between two planes */ + uint32_t ldt; /**< number of elements between two cubes */ + size_t elemsize; /**< size of the elements of the tensor. */ +}; + +/** + Register the \p nx x \p ny x \p nz x \p nt 4D tensor of \p elemsize byte elements + pointed by \p ptr and initialize \p handle to represent it. Again, \p ldy, + \p ldz, and \p ldt specify the number of elements between rows, between z planes and between t cubes. + + Here an example of how to use the function. + \code{.c} + float *tensor; + starpu_data_handle_t tensor_handle; + tensor = (float*)malloc(nx*ny*nz*nt*sizeof(float)); + starpu_tensor_data_register(&tensor_handle, STARPU_MAIN_RAM, (uintptr_t)tensor, nx, nx*ny, nx*ny*nz, nx, ny, nz, nt, sizeof(float)); + \endcode + + See \ref TensorDataInterface for more details. +*/ +void starpu_tensor_data_register(starpu_data_handle_t *handle, int home_node, uintptr_t ptr, uint32_t ldy, uint32_t ldz, uint32_t ldt, uint32_t nx, uint32_t ny, uint32_t nz, uint32_t nt, size_t elemsize); + +/** + Register into the \p handle that to store data on node \p node it should use the + buffer located at \p ptr, or device handle \p dev_handle and offset \p offset + (for OpenCL, notably), with \p ldy elements between rows, and \p ldz + elements between z planes, and \p ldt elements between t cubes. +*/ +void starpu_tensor_ptr_register(starpu_data_handle_t handle, unsigned node, uintptr_t ptr, uintptr_t dev_handle, size_t offset, uint32_t ldy, uint32_t ldz, uint32_t ldt); + +/** + Return the number of elements on the x-axis of the tensor + designated by \p handle. + */ +uint32_t starpu_tensor_get_nx(starpu_data_handle_t handle); + +/** + Return the number of elements on the y-axis of the tensor + designated by \p handle. + */ +uint32_t starpu_tensor_get_ny(starpu_data_handle_t handle); + +/** + Return the number of elements on the z-axis of the tensor + designated by \p handle. + */ +uint32_t starpu_tensor_get_nz(starpu_data_handle_t handle); + +/** + Return the number of elements on the t-axis of the tensor + designated by \p handle. + */ +uint32_t starpu_tensor_get_nt(starpu_data_handle_t handle); + +/** + Return the number of elements between each row of the tensor + designated by \p handle, in the format of the current memory node. +*/ +uint32_t starpu_tensor_get_local_ldy(starpu_data_handle_t handle); + +/** + Return the number of elements between each z plane of the tensor + designated by \p handle, in the format of the current memory node. + */ +uint32_t starpu_tensor_get_local_ldz(starpu_data_handle_t handle); + +/** + Return the number of elements between each t cubes of the tensor + designated by \p handle, in the format of the current memory node. + */ +uint32_t starpu_tensor_get_local_ldt(starpu_data_handle_t handle); + +/** + Return the local pointer associated with \p handle. + */ +uintptr_t starpu_tensor_get_local_ptr(starpu_data_handle_t handle); + +/** + Return the size of the elements of the tensor designated by + \p handle. + */ +size_t starpu_tensor_get_elemsize(starpu_data_handle_t handle); + +#if defined(STARPU_HAVE_STATEMENT_EXPRESSIONS) && defined(STARPU_DEBUG) +#define STARPU_TENSOR_CHECK(interface) STARPU_ASSERT_MSG((((struct starpu_tensor_interface *)(interface))->id) == STARPU_TENSOR_INTERFACE_ID, "Error. The given data is not a tensor.") +#define STARPU_TENSOR_GET_PTR(interface) ( \ + { \ + STARPU_TENSOR_CHECK(interface); \ + (((struct starpu_tensor_interface *)(interface))->ptr); \ + }) +#define STARPU_TENSOR_GET_DEV_HANDLE(interface) ( \ + { \ + STARPU_TENSOR_CHECK(interface); \ + (((struct starpu_tensor_interface *)(interface))->dev_handle); \ + }) +#define STARPU_TENSOR_GET_OFFSET(interface) ( \ + { \ + STARPU_TENSOR_CHECK(interface); \ + (((struct starpu_tensor_interface *)(interface))->offset); \ + }) +#define STARPU_TENSOR_GET_NX(interface) ( \ + { \ + STARPU_TENSOR_CHECK(interface); \ + (((struct starpu_tensor_interface *)(interface))->nx); \ + }) +#define STARPU_TENSOR_GET_NY(interface) ( \ + { \ + STARPU_TENSOR_CHECK(interface); \ + (((struct starpu_tensor_interface *)(interface))->ny); \ + }) +#define STARPU_TENSOR_GET_NZ(interface) ( \ + { \ + STARPU_TENSOR_CHECK(interface); \ + (((struct starpu_tensor_interface *)(interface))->nz); \ + }) +#define STARPU_TENSOR_GET_NT(interface) ( \ + { \ + STARPU_TENSOR_CHECK(interface); \ + (((struct starpu_tensor_interface *)(interface))->nt); \ + }) +#define STARPU_TENSOR_GET_LDY(interface) ( \ + { \ + STARPU_TENSOR_CHECK(interface); \ + (((struct starpu_tensor_interface *)(interface))->ldy); \ + }) +#define STARPU_TENSOR_GET_LDZ(interface) ( \ + { \ + STARPU_TENSOR_CHECK(interface); \ + (((struct starpu_tensor_interface *)(interface))->ldz); \ + }) +#define STARPU_TENSOR_GET_LDT(interface) ( \ + { \ + STARPU_TENSOR_CHECK(interface); \ + (((struct starpu_tensor_interface *)(interface))->ldt); \ + }) +#define STARPU_TENSOR_GET_ELEMSIZE(interface) ( \ + { \ + STARPU_TENSOR_CHECK(interface); \ + (((struct starpu_tensor_interface *)(interface))->elemsize); \ + }) +#else +/** + Return a pointer to the tensor designated by \p interface. + */ +#define STARPU_TENSOR_GET_PTR(interface) (((struct starpu_tensor_interface *)(interface))->ptr) +/** + Return a device handle for the tensor designated by \p interface, + to be used on OpenCL. The offset returned by + ::STARPU_TENSOR_GET_OFFSET has to be used in + addition to this. + */ +#define STARPU_TENSOR_GET_DEV_HANDLE(interface) (((struct starpu_tensor_interface *)(interface))->dev_handle) +/** + Return the offset in the tensor designated by \p interface, to be + used with the device handle. + */ +#define STARPU_TENSOR_GET_OFFSET(interface) (((struct starpu_tensor_interface *)(interface))->offset) +/** + Return the number of elements on the x-axis of the tensor + designated by \p interface. + */ +#define STARPU_TENSOR_GET_NX(interface) (((struct starpu_tensor_interface *)(interface))->nx) +/** + Return the number of elements on the y-axis of the tensor + designated by \p interface. + */ +#define STARPU_TENSOR_GET_NY(interface) (((struct starpu_tensor_interface *)(interface))->ny) +/** +Return the number of elements on the z-axis of the tensor +designated by \p interface. + */ +#define STARPU_TENSOR_GET_NZ(interface) (((struct starpu_tensor_interface *)(interface))->nz) +/** +Return the number of elements on the t-axis of the tensor +designated by \p interface. + */ +#define STARPU_TENSOR_GET_NT(interface) (((struct starpu_tensor_interface *)(interface))->nt) +/** + Return the number of elements between each row of the tensor + designated by \p interface. May be equal to nx when there is no padding. + */ +#define STARPU_TENSOR_GET_LDY(interface) (((struct starpu_tensor_interface *)(interface))->ldy) +/** + Return the number of elements between each z plane of the tensor + designated by \p interface. May be equal to nx*ny when there is no + padding. + */ +#define STARPU_TENSOR_GET_LDZ(interface) (((struct starpu_tensor_interface *)(interface))->ldz) +/** + Return the number of elements between each t cubes of the tensor + designated by \p interface. May be equal to nx*ny*nz when there is no + padding. + */ +#define STARPU_TENSOR_GET_LDT(interface) (((struct starpu_tensor_interface *)(interface))->ldt) +/** + Return the size of the elements of the tensor designated by + \p interface. + */ +#define STARPU_TENSOR_GET_ELEMSIZE(interface) (((struct starpu_tensor_interface *)(interface))->elemsize) +#endif + +/** @} */ + +/** + @name Ndim Array Data Interface + @{ +*/ + +extern struct starpu_data_interface_ops starpu_interface_ndim_ops; + +/** + ndim interface for ndim array +*/ +struct starpu_ndim_interface +{ + enum starpu_data_interface_id id; /**< identifier of the interface */ + + uintptr_t ptr; /**< local pointer of the ndim */ + uintptr_t dev_handle; /**< device handle of the ndim. */ + size_t offset; /**< offset in the ndim. */ + size_t allocsize; /**< size actually currently allocated. */ + uint32_t *nn; /**< array of element number on each dimension */ + uint32_t *ldn; /**< array of element number between two units on each dimension */ + size_t ndim; /**< size of the dimension. */ + size_t elemsize; /**< size of the elements of the ndim. */ +}; + +/** + Register the \p nn[0] x \p nn[1] x ... \p ndim-dimension matrix of \p elemsize byte elements + pointed by \p ptr and initialize \p handle to represent it. Again, \p ldn, + specifies the number of elements between two units on each dimension. + + Here an example of how to use the function. + \code{.c} + float *ndim_arr; + size_t arrsize = 1; + int i; + for (i = 0; i < ndim; i++) + arrsize = arrsize * nn[i]; + starpu_data_handle_t ndim_handle; + ndim_arr = (float*)malloc(arrsize*sizeof(float)); + starpu_ndim_data_register(&ndim_handle, STARPU_MAIN_RAM, (uintptr_t)ndim_arr, ldn, nn, ndim, sizeof(float)); + \endcode + + See \ref NdimDataInterface for more details. +*/ +void starpu_ndim_data_register(starpu_data_handle_t *handleptr, int home_node, uintptr_t ptr, uint32_t *ldn, uint32_t *nn, size_t ndim, size_t elemsize); +/** + Register into the \p handle that to store data on node \p node it should use the + buffer located at \p ptr, or device handle \p dev_handle and offset \p offset + (for OpenCL, notably), with \p ldn elements between two units on each dimension. +*/ +void starpu_ndim_ptr_register(starpu_data_handle_t handle, unsigned node, uintptr_t ptr, uintptr_t dev_handle, size_t offset, uint32_t *ldn); + +/** + Return the number of elements on each dimension of the ndim array + designated by \p handle. + */ +uint32_t *starpu_ndim_get_nn(starpu_data_handle_t handle); + +/** + Return the number of elements on the i-axis of the ndim array + designated by \p handle. When i=0, it means x-axis, + when i=1, it means y-axis, when i=2, it means z-axis, etc. + */ +uint32_t starpu_ndim_get_ni(starpu_data_handle_t handle, size_t i); + +/** + Return the number of elements between two units on each dimension of the ndim array + designated by \p handle, in the format of the current memory node. +*/ +uint32_t *starpu_ndim_get_local_ldn(starpu_data_handle_t handle); + +/** + Return the number of elements between two units i-axis dimension of the ndim array + designated by \p handle, in the format of the current memory node. +*/ +uint32_t starpu_ndim_get_local_ldi(starpu_data_handle_t handle, size_t i); + +/** + Return the local pointer associated with \p handle. + */ +uintptr_t starpu_ndim_get_local_ptr(starpu_data_handle_t handle); + +/** + Return the dimension size. +*/ +size_t starpu_ndim_get_ndim(starpu_data_handle_t handle); + +/** + Return the size of the elements of the ndim array designated by + \p handle. + */ +size_t starpu_ndim_get_elemsize(starpu_data_handle_t handle); + +#if defined(STARPU_HAVE_STATEMENT_EXPRESSIONS) && defined(STARPU_DEBUG) +#define STARPU_NDIM_CHECK(interface) STARPU_ASSERT_MSG((((struct starpu_ndim_interface *)(interface))->id) == STARPU_NDIM_INTERFACE_ID, "Error. The given data is not a ndim.") +#define STARPU_NDIM_GET_PTR(interface) ( \ + { \ + STARPU_NDIM_CHECK(interface); \ + (((struct starpu_ndim_interface *)(interface))->ptr); \ + }) +#define STARPU_NDIM_GET_DEV_HANDLE(interface) ( \ + { \ + STARPU_NDIM_CHECK(interface); \ + (((struct starpu_ndim_interface *)(interface))->dev_handle); \ + }) +#define STARPU_NDIM_GET_OFFSET(interface) ( \ + { \ + STARPU_NDIM_CHECK(interface); \ + (((struct starpu_ndim_interface *)(interface))->offset); \ + }) +#define STARPU_NDIM_GET_NN(interface) ( \ + { \ + STARPU_NDIM_CHECK(interface); \ + (((struct starpu_ndim_interface *)(interface))->nn); \ + }) +#define STARPU_NDIM_GET_LDN(interface) ( \ + { \ + STARPU_NDIM_CHECK(interface); \ + (((struct starpu_ndim_interface *)(interface))->ldn); \ + }) +#define STARPU_NDIM_GET_NDIM(interface) ( \ + { \ + STARPU_NDIM_CHECK(interface); \ + (((struct starpu_ndim_interface *)(interface))->ndim); \ + }) +#define STARPU_NDIM_GET_ELEMSIZE(interface) ( \ + { \ + STARPU_NDIM_CHECK(interface); \ + (((struct starpu_ndim_interface *)(interface))->elemsize); \ + }) +#else +/** + Return a pointer to the ndim array designated by \p interface. + */ +#define STARPU_NDIM_GET_PTR(interface) (((struct starpu_ndim_interface *)(interface))->ptr) +/** + Return a device handle for the ndim array designated by \p interface, + to be used on OpenCL. The offset returned by + ::STARPU_NDIM_GET_OFFSET has to be used in + addition to this. + */ +#define STARPU_NDIM_GET_DEV_HANDLE(interface) (((struct starpu_ndim_interface *)(interface))->dev_handle) +/** + Return the offset in the ndim designated by \p interface, to be + used with the device handle. + */ +#define STARPU_NDIM_GET_OFFSET(interface) (((struct starpu_ndim_interface *)(interface))->offset) +/** + Return the number of elements on each dimension of the ndim array + designated by \p interface. + */ +#define STARPU_NDIM_GET_NN(interface) (((struct starpu_ndim_interface *)(interface))->nn) +/** + Return the number of elements between each two units on each dimension of the ndim array + designated by \p interface. May be equal to nx when there is no padding. + */ +#define STARPU_NDIM_GET_LDN(interface) (((struct starpu_ndim_interface *)(interface))->ldn) +/** + Return the dimension size of the ndim array designated by + \p interface. + */ +#define STARPU_NDIM_GET_NDIM(interface) (((struct starpu_ndim_interface *)(interface))->ndim) +/** + Return the size of the elements of the ndim array designated by + \p interface. + */ +#define STARPU_NDIM_GET_ELEMSIZE(interface) (((struct starpu_ndim_interface *)(interface))->elemsize) +#endif + +/** @} */ + +/** + @name Vector Data Interface + @{ +*/ + +extern struct starpu_data_interface_ops starpu_interface_vector_ops; + +/** + todo +*/ +struct starpu_vector_interface +{ + enum starpu_data_interface_id id; /**< Identifier of the interface */ + + uintptr_t ptr; /**< local pointer of the vector */ + uintptr_t dev_handle; /**< device handle of the vector. */ + size_t offset; /**< offset in the vector */ + uint32_t nx; /**< number of elements on the x-axis of the vector */ + size_t elemsize; /**< size of the elements of the vector */ + uint32_t slice_base; /**< vector slice base, used by the StarPU OpenMP runtime support */ + size_t allocsize; /**< size actually currently allocated */ +}; + +/** + Register the \p nx \p elemsize-byte elements pointed to by \p ptr and initialize \p handle to represent it. + + Here an example of how to use the function. + \code{.c} + float vector[NX]; + starpu_data_handle_t vector_handle; + starpu_vector_data_register(&vector_handle, STARPU_MAIN_RAM, (uintptr_t)vector, NX, sizeof(vector[0])); + \endcode + + See \ref VectorDataInterface for more details. + */ +void starpu_vector_data_register(starpu_data_handle_t *handle, int home_node, uintptr_t ptr, uint32_t nx, size_t elemsize); + +/** + Similar to starpu_vector_data_register, but additionally specifies which + allocation size should be used instead of the initial nx*elemsize. + See \ref VariableSizeDataInterface for more details. +*/ +void starpu_vector_data_register_allocsize(starpu_data_handle_t *handle, int home_node, uintptr_t ptr, uint32_t nx, size_t elemsize, size_t allocsize); + +/** + Register into the \p handle that to store data on node \p node it should use the + buffer located at \p ptr, or device handle \p dev_handle and offset \p offset + (for OpenCL, notably) +*/ +void starpu_vector_ptr_register(starpu_data_handle_t handle, unsigned node, uintptr_t ptr, uintptr_t dev_handle, size_t offset); + +/** + Return the number of elements registered into the array designated by \p handle. + */ +uint32_t starpu_vector_get_nx(starpu_data_handle_t handle); + +/** + Return the size of each element of the array designated by \p handle. + */ +size_t starpu_vector_get_elemsize(starpu_data_handle_t handle); + +/** + Return the allocated size of the array designated by \p handle. + */ +size_t starpu_vector_get_allocsize(starpu_data_handle_t handle); + +/** + Return the local pointer associated with \p handle. + */ +uintptr_t starpu_vector_get_local_ptr(starpu_data_handle_t handle); + +#if defined(STARPU_HAVE_STATEMENT_EXPRESSIONS) && defined(STARPU_DEBUG) +#define STARPU_VECTOR_CHECK(interface) STARPU_ASSERT_MSG((((struct starpu_vector_interface *)(interface))->id) == STARPU_VECTOR_INTERFACE_ID, "Error. The given data is not a vector.") +#define STARPU_VECTOR_GET_PTR(interface) ( \ + { \ + STARPU_VECTOR_CHECK(interface); \ + (((struct starpu_vector_interface *)(interface))->ptr); \ + }) +#define STARPU_VECTOR_GET_DEV_HANDLE(interface) ( \ + { \ + STARPU_VECTOR_CHECK(interface); \ + (((struct starpu_vector_interface *)(interface))->dev_handle); \ + }) +#define STARPU_VECTOR_GET_OFFSET(interface) ( \ + { \ + STARPU_VECTOR_CHECK(interface); \ + (((struct starpu_vector_interface *)(interface))->offset); \ + }) +#define STARPU_VECTOR_GET_NX(interface) ( \ + { \ + STARPU_VECTOR_CHECK(interface); \ + (((struct starpu_vector_interface *)(interface))->nx); \ + }) +#define STARPU_VECTOR_GET_ELEMSIZE(interface) ( \ + { \ + STARPU_VECTOR_CHECK(interface); \ + (((struct starpu_vector_interface *)(interface))->elemsize); \ + }) +#define STARPU_VECTOR_GET_ALLOCSIZE(interface) ( \ + { \ + STARPU_VECTOR_CHECK(interface); \ + (((struct starpu_vector_interface *)(interface))->allocsize); \ + }) +#define STARPU_VECTOR_GET_SLICE_BASE(interface) ( \ + { \ + STARPU_VECTOR_CHECK(interface); \ + (((struct starpu_vector_interface *)(interface))->slice_base); \ + }) +#else +/** + Return a pointer to the array designated by \p interface, valid on + CPUs and CUDA only. For OpenCL, the device handle and offset need to + be used instead. + */ +#define STARPU_VECTOR_GET_PTR(interface) (((struct starpu_vector_interface *)(interface))->ptr) +/** + Return a device handle for the array designated by \p interface, + to be used with OpenCL. the offset returned by ::STARPU_VECTOR_GET_OFFSET has to be used in + addition to this. + */ +#define STARPU_VECTOR_GET_DEV_HANDLE(interface) (((struct starpu_vector_interface *)(interface))->dev_handle) +/** + Return the offset in the array designated by \p interface, to be + used with the device handle. +*/ +#define STARPU_VECTOR_GET_OFFSET(interface) (((struct starpu_vector_interface *)(interface))->offset) +/** + Return the number of elements registered into the array + designated by \p interface. + */ +#define STARPU_VECTOR_GET_NX(interface) (((struct starpu_vector_interface *)(interface))->nx) +/** + Return the size of each element of the array designated by + \p interface. + */ +#define STARPU_VECTOR_GET_ELEMSIZE(interface) (((struct starpu_vector_interface *)(interface))->elemsize) +/** + Return the size of each element of the array designated by + \p interface. + */ +#define STARPU_VECTOR_GET_ALLOCSIZE(interface) (((struct starpu_vector_interface *)(interface))->allocsize) +/** + Return the OpenMP slice base annotation of each element of the array designated by + \p interface. + */ +#define STARPU_VECTOR_GET_SLICE_BASE(interface) (((struct starpu_vector_interface *)(interface))->slice_base) +#endif + +/** + Set the number of elements registered into the array designated by \p + interface. + */ +#define STARPU_VECTOR_SET_NX(interface, newnx) \ + do { \ + STARPU_VECTOR_CHECK(interface); \ + (((struct starpu_vector_interface *)(interface))->nx) = (newnx); \ + } \ + while (0) + +/** @} */ + +/** + @name Variable Data Interface + @{ +*/ + +extern struct starpu_data_interface_ops starpu_interface_variable_ops; + +/** + Variable interface for a single data (not a vector, a matrix, a list, + ...) +*/ +struct starpu_variable_interface +{ + enum starpu_data_interface_id id; /**< Identifier of the interface */ + + uintptr_t ptr; /**< local pointer of the variable */ + uintptr_t dev_handle; /**< device handle of the variable. */ + size_t offset; /**< offset in the variable */ + size_t elemsize; /**< size of the variable */ +}; + +/** + Register the \p size byte element pointed to by \p ptr, which is + typically a scalar or a pointer to an application-specific structure, and + initialize \p handle to represent this data item. + + Here an example of how to use the function. + \code{.c} + float var = 42.0; + starpu_data_handle_t var_handle; + starpu_variable_data_register(&var_handle, STARPU_MAIN_RAM, (uintptr_t)&var, sizeof(var)); + \endcode + + See \ref VariableDataInterface for more details. +*/ +void starpu_variable_data_register(starpu_data_handle_t *handle, int home_node, uintptr_t ptr, size_t size); + +/** + Register into the \p handle that to store data on node \p node it should use the + buffer located at \p ptr, or device handle \p dev_handle and offset \p offset + (for OpenCL, notably) + */ +void starpu_variable_ptr_register(starpu_data_handle_t handle, unsigned node, uintptr_t ptr, uintptr_t dev_handle, size_t offset); + +/** + Return the size of the variable designated by \p handle. + */ +size_t starpu_variable_get_elemsize(starpu_data_handle_t handle); + +/** + Return a pointer to the variable designated by \p handle. + */ +uintptr_t starpu_variable_get_local_ptr(starpu_data_handle_t handle); + +#if defined(STARPU_HAVE_STATEMENT_EXPRESSIONS) && defined(STARPU_DEBUG) +#define STARPU_VARIABLE_CHECK(interface) STARPU_ASSERT_MSG((((struct starpu_variable_interface *)(interface))->id) == STARPU_VARIABLE_INTERFACE_ID, "Error. The given data is not a variable.") +#define STARPU_VARIABLE_GET_PTR(interface) ( \ + { \ + STARPU_VARIABLE_CHECK(interface); \ + (((struct starpu_variable_interface *)(interface))->ptr); \ + }) +#define STARPU_VARIABLE_GET_OFFSET(interface) ( \ + { \ + STARPU_VARIABLE_CHECK(interface); \ + (((struct starpu_variable_interface *)(interface))->offset); \ + }) +#define STARPU_VARIABLE_GET_ELEMSIZE(interface) ( \ + { \ + STARPU_VARIABLE_CHECK(interface); \ + (((struct starpu_variable_interface *)(interface))->elemsize); \ + }) +#define STARPU_VARIABLE_GET_DEV_HANDLE(interface) ( \ + { \ + STARPU_VARIABLE_CHECK(interface); \ + (((struct starpu_variable_interface *)(interface))->ptr); \ + }) +#else +/** + Return a pointer to the variable designated by \p interface. + */ +#define STARPU_VARIABLE_GET_PTR(interface) (((struct starpu_variable_interface *)(interface))->ptr) +/** + Return the offset in the variable designated by \p interface, to + be used with the device handle. + */ +#define STARPU_VARIABLE_GET_OFFSET(interface) (((struct starpu_variable_interface *)(interface))->offset) +/** + Return the size of the variable designated by \p interface. + */ +#define STARPU_VARIABLE_GET_ELEMSIZE(interface) (((struct starpu_variable_interface *)(interface))->elemsize) +/** + Return a device handle for the variable designated by + \p interface, to be used with OpenCL. The offset returned by + ::STARPU_VARIABLE_GET_OFFSET has to be + used in addition to this. + */ +#define STARPU_VARIABLE_GET_DEV_HANDLE(interface) (((struct starpu_variable_interface *)(interface))->ptr) +#endif + +/** @} */ + +/** + @name Void Data Interface + @{ +*/ + +extern struct starpu_data_interface_ops starpu_interface_void_ops; + +/** + Register a void interface. There is no data really associated + to that interface, but it may be used as a synchronization mechanism. + It also permits to express an abstract piece of data that is managed + by the application internally: this makes it possible to forbid the + concurrent execution of different tasks accessing the same void + data in read-write concurrently. + See \ref DataHandlesHelpers for more details. + */ +void starpu_void_data_register(starpu_data_handle_t *handle); + +/** @} */ + +/** + @name CSR Data Interface + @{ +*/ + +extern struct starpu_data_interface_ops starpu_interface_csr_ops; + +/** + CSR interface for sparse matrices (compressed sparse row + representation) +*/ +struct starpu_csr_interface +{ + enum starpu_data_interface_id id; /**< Identifier of the interface */ + + uint32_t nnz; /**< number of non-zero entries */ + uint32_t nrow; /**< number of rows */ + uintptr_t nzval; /**< non-zero values */ + uint32_t *colind; /**< position of non-zero entries on the row */ + uint32_t *rowptr; /**< index (in nzval) of the first entry of the row */ + uint32_t *ram_colind; /**< position of non-zero entries on the row (stored in RAM) */ + uint32_t *ram_rowptr; /**< index (in nzval) of the first entry of the row (stored in RAM) */ + + uint32_t firstentry; /**< k for k-based indexing (0 or 1 usually). also useful when partitioning the matrix. */ + + size_t elemsize; /**< size of the elements of the matrix */ +}; + +/** + Register a CSR (Compressed Sparse Row Representation) sparse matrix. + See \ref CSRDataInterface for more details. + */ +void starpu_csr_data_register(starpu_data_handle_t *handle, int home_node, uint32_t nnz, uint32_t nrow, uintptr_t nzval, uint32_t *colind, uint32_t *rowptr, uint32_t firstentry, size_t elemsize); + +/** + Return the number of non-zero values in the matrix designated + by \p handle. + */ +uint32_t starpu_csr_get_nnz(starpu_data_handle_t handle); + +/** + Return the size of the row pointer array of the matrix + designated by \p handle. + */ +uint32_t starpu_csr_get_nrow(starpu_data_handle_t handle); + +/** + Return the index at which all arrays (the column indexes, the + row pointers...) of the matrix designated by \p handle. + */ +uint32_t starpu_csr_get_firstentry(starpu_data_handle_t handle); + +/** + Return a local pointer to the non-zero values of the matrix + designated by \p handle. + */ +uintptr_t starpu_csr_get_local_nzval(starpu_data_handle_t handle); + +/** + Return a local pointer to the column index of the matrix + designated by \p handle. + */ +uint32_t *starpu_csr_get_local_colind(starpu_data_handle_t handle); + +/** + Return a local pointer to the row pointer array of the matrix + designated by \p handle. + */ +uint32_t *starpu_csr_get_local_rowptr(starpu_data_handle_t handle); + +/** + Return the size of the elements registered into the matrix + designated by \p handle. + */ +size_t starpu_csr_get_elemsize(starpu_data_handle_t handle); + +/** + Return the number of non-zero values in the matrix designated + by \p interface. + */ +#define STARPU_CSR_GET_NNZ(interface) (((struct starpu_csr_interface *)(interface))->nnz) +/** + Return the size of the row pointer array of the matrix + designated by \p interface. + */ +#define STARPU_CSR_GET_NROW(interface) (((struct starpu_csr_interface *)(interface))->nrow) +/** + Return a pointer to the non-zero values of the matrix + designated by \p interface. + */ +#define STARPU_CSR_GET_NZVAL(interface) (((struct starpu_csr_interface *)(interface))->nzval) +/** + Return a device handle for the array of non-zero values in the + matrix designated by \p interface. The offset returned by ::STARPU_CSR_GET_OFFSET + has to used in addition to this. + */ +#define STARPU_CSR_GET_NZVAL_DEV_HANDLE(interface) (((struct starpu_csr_interface *)(interface))->nnz) +/** + Return a pointer to the column index of the matrix designated + by \p interface. + */ +#define STARPU_CSR_GET_COLIND(interface) (((struct starpu_csr_interface *)(interface))->colind) +/** + Return a RAM pointer to the column index of the matrix designated + by \p interface. + */ +#define STARPU_CSR_GET_RAM_COLIND(interface) (((struct starpu_csr_interface *)(interface))->ram_colind) +/** + Return a device handle for the column index of the matrix + designated by \p interface. The offset returned by ::STARPU_CSR_GET_OFFSET has to be used in + addition to this. + */ +#define STARPU_CSR_GET_COLIND_DEV_HANDLE(interface) (((struct starpu_csr_interface *)(interface))->colind) +/** + Return a pointer to the row pointer array of the matrix + designated by \p interface. + */ +#define STARPU_CSR_GET_ROWPTR(interface) (((struct starpu_csr_interface *)(interface))->rowptr) +/** + Return a RAM pointer to the row pointer array of the matrix + designated by \p interface. + */ +#define STARPU_CSR_GET_RAM_ROWPTR(interface) (((struct starpu_csr_interface *)(interface))->ram_rowptr) +/** + Return a device handle for the row pointer array of the matrix + designated by \p interface. The offset returned by ::STARPU_CSR_GET_OFFSET has to be used in + addition to this. + */ +#define STARPU_CSR_GET_ROWPTR_DEV_HANDLE(interface) (((struct starpu_csr_interface *)(interface))->rowptr) +/** + Return the offset in the arrays (colind, rowptr, nzval) of the + matrix designated by \p interface, to be used with the device handles. + */ +#define STARPU_CSR_GET_OFFSET 0 +/** + Return the index at which all arrays (the column indexes, the + row pointers...) of the \p interface start. + */ +#define STARPU_CSR_GET_FIRSTENTRY(interface) (((struct starpu_csr_interface *)(interface))->firstentry) +/** + Return the size of the elements registered into the matrix + designated by \p interface. + */ +#define STARPU_CSR_GET_ELEMSIZE(interface) (((struct starpu_csr_interface *)(interface))->elemsize) + +/** @} */ + +/** + @name BCSR Data Interface + @{ +*/ + +extern struct starpu_data_interface_ops starpu_interface_bcsr_ops; + +/** + BCSR interface for sparse matrices (blocked compressed sparse + row representation) + + Note: when a BCSR matrix is partitioned, nzval, colind, and rowptr point into + the corresponding father arrays. The rowptr content is thus the same as the + father's. Firstentry is used to offset this so it becomes valid for the child + arrays. +*/ +struct starpu_bcsr_interface +{ + enum starpu_data_interface_id id; /**< Identifier of the interface */ + + uint32_t nnz; /**< number of non-zero BLOCKS */ + uint32_t nrow; /**< number of rows (in terms of BLOCKS) */ + + uintptr_t nzval; /**< non-zero values: nnz blocks of r*c elements */ + uint32_t *colind; /**< array of nnz elements, colind[i] is the block-column index for block i in nzval */ + uint32_t *rowptr; /**< array of nrow+1 + * elements, rowptr[i] is + * the block-index (in + * nzval) of the first block + * of row i. By convention, + * rowptr[nrow] is the + * number of blocks, this + * allows an easier access + * of the matrix's elements + * for the kernels. */ + uint32_t *ram_colind; /**< array of nnz elements (stored in RAM) */ + uint32_t *ram_rowptr; /**< array of nrow+1 elements (stored in RAM) */ + + uint32_t firstentry; /**< k for k-based indexing (0 or 1 usually). Also useful when partitioning the matrix. */ + + uint32_t r; /**< height of the blocks */ + uint32_t c; /**< width of the blocks */ + + size_t elemsize; /**< size of the elements of the matrix */ +}; + +/** + This variant of starpu_data_register() uses the BCSR (Blocked + Compressed Sparse Row Representation) sparse matrix interface. + Register the sparse matrix made of \p nnz non-zero blocks of elements of + size \p elemsize stored in \p nzval and initializes \p handle to represent it. + Blocks have size \p r * \p c. \p nrow is the number of rows (in terms of + blocks), \p colind is an array of nnz elements, colind[i] is the block-column index for block i in \p nzval, + \p rowptr is an array of nrow+1 elements, rowptr[i] is the block-index (in \p nzval) of the first block of row i. By convention, rowptr[nrow] is the number of blocks, this allows an easier access of the matrix's elements for the kernels. + \p firstentry is the index of the first entry of the given arrays + (usually 0 or 1). + + Here an example with the following matrix: + + \code | 0 1 0 0 | \endcode + \code | 2 3 0 0 | \endcode + \code | 4 5 8 9 | \endcode + \code | 6 7 10 11 | \endcode + + \code nzval = [0, 1, 2, 3] ++ [4, 5, 6, 7] ++ [8, 9, 10, 11] \endcode + \code colind = [0, 0, 1] \endcode + \code rowptr = [0, 1, 3] \endcode + \code r = c = 2 \endcode + + which translates into the following code + + \code{.c} + int R = 2; // Size of the blocks + int C = 2; + + int NROWS = 2; + int NNZ_BLOCKS = 3; // out of 4 + int NZVAL_SIZE = (R*C*NNZ_BLOCKS); + + int nzval[NZVAL_SIZE] = + { + 0, 1, 2, 3, // First block + 4, 5, 6, 7, // Second block + 8, 9, 10, 11 // Third block + }; + uint32_t colind[NNZ_BLOCKS] = + { + 0, // block-column index for first block in nzval + 0, // block-column index for second block in nzval + 1 // block-column index for third block in nzval + }; + uint32_t rowptr[NROWS+1] = + { + 0, // block-index in nzval of the first block of the first row. + 1, // block-index in nzval of the first block of the second row. + NNZ_BLOCKS // number of blocks, to allow an easier element's access for the kernels + }; + + starpu_data_handle_t bcsr_handle; + starpu_bcsr_data_register(&bcsr_handle, + STARPU_MAIN_RAM, + NNZ_BLOCKS, + NROWS, + (uintptr_t) nzval, + colind, + rowptr, + 0, // firstentry + R, + C, + sizeof(nzval[0])); + \endcode + + See \ref BCSRDataInterface for more details. +*/ +void starpu_bcsr_data_register(starpu_data_handle_t *handle, int home_node, uint32_t nnz, uint32_t nrow, uintptr_t nzval, uint32_t *colind, uint32_t *rowptr, uint32_t firstentry, uint32_t r, uint32_t c, size_t elemsize); + +/** + Return the number of non-zero elements in the matrix designated + by \p handle. + */ +uint32_t starpu_bcsr_get_nnz(starpu_data_handle_t handle); + +/** + Return the number of rows (in terms of blocks of size r*c) in + the matrix designated by \p handle. + */ +uint32_t starpu_bcsr_get_nrow(starpu_data_handle_t handle); + +/** + Return the index at which all arrays (the column indexes, the + row pointers...) of the matrix desginated by \p handle. + */ +uint32_t starpu_bcsr_get_firstentry(starpu_data_handle_t handle); + +/** + Return a pointer to the non-zero values of the matrix + designated by \p handle. + */ +uintptr_t starpu_bcsr_get_local_nzval(starpu_data_handle_t handle); + +/** + Return a pointer to the column index, which holds the positions + of the non-zero entries in the matrix designated by \p handle. + */ +uint32_t *starpu_bcsr_get_local_colind(starpu_data_handle_t handle); + +/** + Return the row pointer array of the matrix designated by + \p handle. + */ +uint32_t *starpu_bcsr_get_local_rowptr(starpu_data_handle_t handle); + +/** + Return the number of rows in a block. + */ +uint32_t starpu_bcsr_get_r(starpu_data_handle_t handle); + +/** + Return the number of columns in a block. + */ +uint32_t starpu_bcsr_get_c(starpu_data_handle_t handle); + +/** + Return the size of the elements in the matrix designated by + \p handle. + */ +size_t starpu_bcsr_get_elemsize(starpu_data_handle_t handle); + +/** + Return the number of non-zero values in the matrix designated + by \p interface. + */ +#define STARPU_BCSR_GET_NNZ(interface) (((struct starpu_bcsr_interface *)(interface))->nnz) +/** + Return the number of block rows in the matrix designated + by \p interface. + */ +#define STARPU_BCSR_GET_NROW(interface) (((struct starpu_bcsr_interface *)(interface))->nrow) +/** + Return a pointer to the non-zero values of the matrix + designated by \p interface. + */ +#define STARPU_BCSR_GET_NZVAL(interface) (((struct starpu_bcsr_interface *)(interface))->nzval) +/** + Return a device handle for the array of non-zero values in the + matrix designated by \p interface. The offset returned by ::STARPU_BCSR_GET_OFFSET has to be + used in addition to this. + */ +#define STARPU_BCSR_GET_NZVAL_DEV_HANDLE(interface) (((struct starpu_bcsr_interface *)(interface))->nnz) +/** + Return a pointer to the column index of the matrix designated + by \p interface. + */ +#define STARPU_BCSR_GET_COLIND(interface) (((struct starpu_bcsr_interface *)(interface))->colind) +/** + Return a RAM pointer to the column index of the matrix designated + by \p interface. + */ +#define STARPU_BCSR_GET_RAM_COLIND(interface) (((struct starpu_bcsr_interface *)(interface))->ram_colind) +/** + Return a device handle for the column index of the matrix + designated by \p interface. The offset returned by ::STARPU_BCSR_GET_OFFSET has to be used in + addition to this. + */ +#define STARPU_BCSR_GET_COLIND_DEV_HANDLE(interface) (((struct starpu_bcsr_interface *)(interface))->colind) +/** + Return a pointer to the row pointer array of the matrix + designated by \p interface. + */ +#define STARPU_BCSR_GET_ROWPTR(interface) (((struct starpu_bcsr_interface *)(interface))->rowptr) +/** + Return a RAM pointer to the row pointer array of the matrix + designated by \p interface. + */ +#define STARPU_BCSR_GET_RAM_ROWPTR(interface) (((struct starpu_bcsr_interface *)(interface))->ram_rowptr) +/** + Return a device handle for the row pointer array of the matrix + designated by \p interface. The offset returned by ::STARPU_BCSR_GET_OFFSET has to be used in + addition to this. + */ +#define STARPU_BCSR_GET_ROWPTR_DEV_HANDLE(interface) (((struct starpu_bcsr_interface *)(interface))->rowptr) +/** + Return the base of the indexing (0 or 1 usually) in the matrix designated + by \p interface. + */ +#define STARPU_BCSR_GET_FIRSTENTRY(interface) (((struct starpu_bcsr_interface *)(interface))->firstentry) +/** + Return the height of blocks in the matrix designated + by \p interface. + */ +#define STARPU_BCSR_GET_R(interface) (((struct starpu_bcsr_interface *)(interface))->r) +/** + Return the width of blocks in the matrix designated + by \p interface. + */ +#define STARPU_BCSR_GET_C(interface) (((struct starpu_bcsr_interface *)(interface))->c) +/** + Return the size of elements in the matrix designated by \p interface. + */ +#define STARPU_BCSR_GET_ELEMSIZE(interface) (((struct starpu_bcsr_interface *)(interface))->elemsize) +/** + Return the offset in the arrays (coling, rowptr, nzval) of the + matrix designated by \p interface, to be used with the device handles. + */ +#define STARPU_BCSR_GET_OFFSET 0 + +/** @} */ + +/** + @name Multiformat Data Interface + @{ +*/ + +/** + Multiformat operations +*/ +struct starpu_multiformat_data_interface_ops +{ + size_t cpu_elemsize; /**< size of each element on CPUs */ + size_t opencl_elemsize; /**< size of each element on OpenCL devices */ + struct starpu_codelet *cpu_to_opencl_cl; /**< pointer to a codelet which converts from CPU to OpenCL */ + struct starpu_codelet *opencl_to_cpu_cl; /**< pointer to a codelet which converts from OpenCL to CPU */ + size_t cuda_elemsize; /**< size of each element on CUDA devices */ + struct starpu_codelet *cpu_to_cuda_cl; /**< pointer to a codelet which converts from CPU to CUDA */ + struct starpu_codelet *cuda_to_cpu_cl; /**< pointer to a codelet which converts from CUDA to CPU */ +}; + +/** + todo +*/ +struct starpu_multiformat_interface +{ + enum starpu_data_interface_id id; + + void *cpu_ptr; + void *cuda_ptr; + void *hip_ptr; + void *opencl_ptr; + uint32_t nx; + struct starpu_multiformat_data_interface_ops *ops; +}; + +/** + Register a piece of data that can be represented in different + ways, depending upon the processing unit that manipulates it. It + allows the programmer, for instance, to use an array of structures + when working on a CPU, and a structure of arrays when working on a + GPU. \p nobjects is the number of elements in the data. \p format_ops + describes the format. + See \ref TheMultiformatInterface for more details. +*/ +void starpu_multiformat_data_register(starpu_data_handle_t *handle, int home_node, void *ptr, uint32_t nobjects, struct starpu_multiformat_data_interface_ops *format_ops); + +/** + Return the local pointer to the data with CPU format. + */ +#define STARPU_MULTIFORMAT_GET_CPU_PTR(interface) (((struct starpu_multiformat_interface *)(interface))->cpu_ptr) +/** + Return the local pointer to the data with CUDA format. + */ +#define STARPU_MULTIFORMAT_GET_CUDA_PTR(interface) (((struct starpu_multiformat_interface *)(interface))->cuda_ptr) +/** + Return the local pointer to the data with HIP format. + */ +#define STARPU_MULTIFORMAT_GET_HIP_PTR(interface) (((struct starpu_multiformat_interface *)(interface))->hip_ptr) + +/** + Return the local pointer to the data with OpenCL format. +*/ +#define STARPU_MULTIFORMAT_GET_OPENCL_PTR(interface) (((struct starpu_multiformat_interface *)(interface))->opencl_ptr) +/** + Return the number of elements in the data. + */ +#define STARPU_MULTIFORMAT_GET_NX(interface) (((struct starpu_multiformat_interface *)(interface))->nx) + +/** @} */ + +/** @} */ + +#ifdef __cplusplus +} +#endif + +#endif /* __STARPU_DATA_INTERFACES_H__ */ diff --git a/include/starpu_deprecated_api.h b/include/starpu_deprecated_api.h new file mode 100644 index 0000000..1f26620 --- /dev/null +++ b/include/starpu_deprecated_api.h @@ -0,0 +1,122 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __STARPU_DEPRECATED_API_H__ +#define __STARPU_DEPRECATED_API_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +#if defined(STARPU_USE_DEPRECATED_API) || defined(STARPU_USE_DEPRECATED_ONE_ZERO_API) +#warning Your application is using deprecated types. You may want to update to use the latest API, by using tools/dev/rename.sh. +#endif /* defined(STARPU_USE_DEPRECATED_API) || defined(STARPU_USE_DEPRECATED_ONE_ZERO_API) */ + +#define starpu_permodel_history_based_expected_perf starpu_perfmodel_history_based_expected_perf + +#ifdef STARPU_USE_DEPRECATED_ONE_ZERO_API + +#define starpu_allocate_buffer_on_node starpu_malloc_on_node +#define starpu_free_buffer_on_node starpu_free_on_node +#define starpu_helper_cublas_init starpu_cublas_init +#define starpu_helper_cublas_shutdown starpu_cublas_shutdown + +#define starpu_canonical_block_filter_bcsr starpu_bcsr_filter_canonical_block +#define starpu_vertical_block_filter_func_csr starpu_csr_filter_vertical_block + +#define starpu_block_filter_func starpu_matrix_filter_block +#define starpu_block_shadow_filter_func starpu_matrix_filter_block_shadow +#define starpu_vertical_block_filter_func starpu_matrix_filter_vertical_block +#define starpu_vertical_block_shadow_filter_func starpu_matrix_filter_vertical_block_shadow + +#define starpu_block_filter_func_vector starpu_vector_filter_block +#define starpu_block_shadow_filter_func_vector starpu_vector_filter_block_shadow +#define starpu_vector_list_filter_func starpu_vector_filter_list +#define starpu_vector_divide_in_2_filter_func starpu_vector_filter_divide_in_2 + +#define starpu_block_filter_func_block starpu_block_filter_block +#define starpu_block_shadow_filter_func_block starpu_block_filter_block_shadow +#define starpu_vertical_block_filter_func_block starpu_block_filter_vertical_block +#define starpu_vertical_block_shadow_filter_func_block starpu_block_filter_vertical_block_shadow +#define starpu_depth_block_filter_func_block starpu_block_filter_depth_block +#define starpu_depth_block_shadow_filter_func_block starpu_block_filter_depth_block_shadow + +#define starpu_display_codelet_stats starpu_codelet_display_stats + +#define starpu_access_mode starpu_data_access_mode +#define starpu_buffer_descr starpu_data_descr +#define starpu_memory_display_stats starpu_data_display_memory_stats +#define starpu_handle_to_pointer starpu_data_handle_to_pointer +#define starpu_handle_get_local_ptr starpu_data_get_local_ptr +#define starpu_crc32_be_n starpu_hash_crc32c_be_n +#define starpu_crc32_be starpu_hash_crc32c_be +#define starpu_crc32_string starpu_hash_crc32c_string +#define starpu_perf_archtype starpu_perfmodel_archtype +#define starpu_history_based_expected_perf starpu_perfmodel_history_based_expected_perf +#define starpu_task_profiling_info starpu_profiling_task_info +#define starpu_worker_profiling_info starpu_profiling_worker_info +#define starpu_bus_profiling_info starpu_profiling_bus_info +#define starpu_set_profiling_id starpu_profiling_set_id +#define starpu_worker_get_profiling_info starpu_profiling_worker_get_info +#define starpu_bus_profiling_helper_display_summary starpu_profiling_bus_helper_display_summary +#define starpu_worker_profiling_helper_display_summary starpu_profiling_worker_helper_display_summary +#define starpu_archtype starpu_worker_archtype + +#define starpu_handle_get_interface_id starpu_data_get_interface_id +#define starpu_handle_get_size starpu_data_get_size +#define starpu_handle_pack_data starpu_data_pack +#define starpu_handle_unpack_data starpu_data_unpack + +#endif /* STARPU_USE_DEPRECATED_ONE_ZERO_API */ + +#ifdef STARPU_USE_DEPRECATED_API +typedef starpu_data_handle_t starpu_data_handle; +typedef struct starpu_block_interface starpu_block_interface_t; +typedef struct starpu_matrix_interface starpu_matrix_interface_t; +typedef struct starpu_vector_interface starpu_vector_interface_t; +typedef struct starpu_variable_interface starpu_variable_interface_t; +typedef struct starpu_csr_interface starpu_csr_interface_t; +typedef struct starpu_bcsr_interface starpu_bcsr_interface_t; +typedef struct starpu_multiformat_interface starpu_multiformat_interface_t; +#define starpu_machine_topology_s starpu_machine_topology +#define starpu_htbl32_node_s starpu_htbl32_node +#define starpu_history_list_t starpu_history_list +#define starpu_buffer_descr_t starpu_buffer_descr +#define starpu_regression_model_t starpu_regression_model +#define starpu_per_arch_perfmodel_t starpu_per_arch_perfmodel +#define starpu_perfmodel_t starpu_perfmodel +#define starpu_sched_policy_s starpu_sched_policy +#define starpu_data_interface_ops_t starpu_data_interface_ops + +typedef struct starpu_buffer_descr starpu_buffer_descr; +typedef struct starpu_codelet starpu_codelet; +typedef struct starpu_codelet starpu_codelet_t; +typedef enum starpu_access_mode starpu_access_mode; + +#define starpu_print_bus_bandwidth starpu_bus_print_bandwidth +#define starpu_get_handle_interface_id starpu_handle_get_interface_id +#define starpu_get_current_task starpu_task_get_current +#define starpu_unpack_cl_args starpu_codelet_unpack_args +#define starpu_pack_cl_args starpu_codelet_pack_args +#define starpu_task_deinit starpu_task_clean + +#endif /* STARPU_USE_DEPRECATED_API */ + +#ifdef __cplusplus +} +#endif + +#endif /* __STARPU_DEPRECATED_API_H__ */ diff --git a/include/starpu_disk.h b/include/starpu_disk.h new file mode 100644 index 0000000..2e54389 --- /dev/null +++ b/include/starpu_disk.h @@ -0,0 +1,230 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Corentin Salingue + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __STARPU_DISK_H__ +#define __STARPU_DISK_H__ + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + @defgroup API_Out_Of_Core Out Of Core + @{ +*/ + +/** + Set of functions to manipulate data on disk. See \ref DiskFunctions for more details. +*/ +struct starpu_disk_ops +{ + /** + Connect a disk memory at location \p parameter with size \p size, and return a + base as void*, which will be passed by StarPU to all other methods. + */ + void *(*plug)(void *parameter, starpu_ssize_t size); + /** + Disconnect a disk memory \p base. + */ + void (*unplug)(void *base); + + /** + Measure the bandwidth and the latency for the disk \p node and save it. Returns + 1 if it could measure it. + */ + int (*bandwidth)(unsigned node, void *base); + + /** + Create a new location for data of size \p size. Return an opaque object pointer. + */ + void *(*alloc)(void *base, size_t size); + + /** + Free a data \p obj previously allocated with starpu_disk_ops::alloc. + */ + void (*free)(void *base, void *obj, size_t size); + + /** + Open an existing location of data, at a specific position \p pos dependent on the backend. + */ + void *(*open)(void *base, void *pos, size_t size); + /** + Close, without deleting it, a location of data \p obj. + */ + void (*close)(void *base, void *obj, size_t size); + + /** + Read \p size bytes of data from \p obj in \p base, at offset \p offset, and put + into \p buf. Return the actual number of read bytes. + */ + int (*read)(void *base, void *obj, void *buf, off_t offset, size_t size); + /** + Write \p size bytes of data to \p obj in \p base, at offset \p offset, from \p buf. Return 0 on success. + */ + int (*write)(void *base, void *obj, const void *buf, off_t offset, size_t size); + + /** + Read all data from \p obj of \p base, from offset 0. Returns it in an allocated buffer \p ptr, of size \p size + */ + int (*full_read)(void *base, void *obj, void **ptr, size_t *size, unsigned dst_node); + /** + Write data in \p ptr to \p obj of \p base, from offset 0, and truncate \p obj to + \p size, so that a \c full_read will get it. + */ + int (*full_write)(void *base, void *obj, void *ptr, size_t size); + + /** + Asynchronously write \p size bytes of data to \p obj in \p base, at offset \p + offset, from \p buf. Return a void* pointer that StarPU will pass to \c + xxx_request methods for testing for the completion. + */ + void *(*async_write)(void *base, void *obj, void *buf, off_t offset, size_t size); + /** + Asynchronously read \p size bytes of data from \p obj in \p base, at offset \p + offset, and put into \p buf. Return a void* pointer that StarPU will pass to \c + xxx_request methods for testing for the completion. + */ + void *(*async_read)(void *base, void *obj, void *buf, off_t offset, size_t size); + + /** + Read all data from \p obj of \p base, from offset 0. Return it in an allocated buffer \p ptr, of size \p size + */ + void *(*async_full_read)(void *base, void *obj, void **ptr, size_t *size, unsigned dst_node); + /** + Write data in \p ptr to \p obj of \p base, from offset 0, and truncate \p obj to + \p size, so that a starpu_disk_ops::full_read will get it. + */ + void *(*async_full_write)(void *base, void *obj, void *ptr, size_t size); + + /** + Copy from offset \p offset_src of disk object \p obj_src in \p base_src to + offset \p offset_dst of disk object \p obj_dst in \p base_dst. Return a void* + pointer that StarPU will pass to \c xxx_request methods for testing for the + completion. + */ + void *(*copy)(void *base_src, void *obj_src, off_t offset_src, void *base_dst, void *obj_dst, off_t offset_dst, size_t size); + + /** + Wait for completion of request \p async_channel returned by a previous + asynchronous read, write or copy. + */ + void (*wait_request)(void *async_channel); + /** + Test for completion of request \p async_channel returned by a previous + asynchronous read, write or copy. Return 1 on completion, 0 otherwise. + */ + int (*test_request)(void *async_channel); + + /** + Free the request allocated by a previous asynchronous read, write or copy. + */ + void (*free_request)(void *async_channel); + + /* TODO: readv, writev, read2d, write2d, etc. */ +}; + +/** + Use the stdio library (fwrite, fread...) to read/write on disk. + + Warning: It creates one file per allocation ! + + Do not support asynchronous transfers. +*/ +extern struct starpu_disk_ops starpu_disk_stdio_ops; + +/** + Use the HDF5 library. + + It doesn't support multiple opening from different processes. + + You may only allow one process to write in the HDF5 file. + + If HDF5 library is not compiled with --thread-safe you can't open more than one HDF5 file at the same time. +*/ +extern struct starpu_disk_ops starpu_disk_hdf5_ops; + +/** + Use the unistd library (write, read...) to read/write on disk. + + Warning: It creates one file per allocation ! +*/ +extern struct starpu_disk_ops starpu_disk_unistd_ops; + +/** + Use the unistd library (write, read...) to read/write on disk with the O_DIRECT flag. + + Warning: It creates one file per allocation ! + + Only available on Linux systems. +*/ +extern struct starpu_disk_ops starpu_disk_unistd_o_direct_ops; + +/** + Use the leveldb created by Google. More information at https://code.google.com/p/leveldb/ + Do not support asynchronous transfers. +*/ +extern struct starpu_disk_ops starpu_disk_leveldb_ops; + +/** + Close an existing data opened with starpu_disk_open(). See \ref OutOfCore_Introduction for more details. +*/ +void starpu_disk_close(unsigned node, void *obj, size_t size); + +/** + Open an existing file memory in a disk node. \p size is the size of + the file. \p pos is the specific position dependent on the backend, + given to the \c open method of the disk operations. Return an + opaque object pointer. See \ref OutOfCore_Introduction for more details. +*/ +void *starpu_disk_open(unsigned node, void *pos, size_t size); + +/** + Register a disk memory node with a set of functions to manipulate + data. The \c plug member of \p func will be passed \p parameter, + and return a \c base which will be passed to all \p func methods. +
    + SUCCESS: return the disk node.
    + FAIL: return an error code.
    + \p size must be at least \ref STARPU_DISK_SIZE_MIN bytes ! \p size + being negative means infinite size. + + See \ref OutOfCore_Introduction for more details. +*/ +int starpu_disk_register(struct starpu_disk_ops *func, void *parameter, starpu_ssize_t size); + +/** + Minimum size of a registered disk. The size of a disk is the last + parameter of the function starpu_disk_register(). +*/ +#define STARPU_DISK_SIZE_MIN (16 * 1024 * 1024) + +/** + Contain the node number of the disk swap, if set up through the + \ref STARPU_DISK_SWAP variable. +*/ +extern int starpu_disk_swap_node; + +/** @} */ + +#ifdef __cplusplus +} +#endif + +#endif /* __STARPU_DISK_H__ */ diff --git a/include/starpu_driver.h b/include/starpu_driver.h new file mode 100644 index 0000000..6bff38c --- /dev/null +++ b/include/starpu_driver.h @@ -0,0 +1,116 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __STARPU_DRIVER_H__ +#define __STARPU_DRIVER_H__ + +#include +#if defined(STARPU_USE_OPENCL) && !defined(__CUDACC__) && !defined(__HIPCC__) +#include +#endif + +#if defined(STARPU_USE_MAX_FPGA) +#include +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/** + @defgroup API_Running_Drivers Running Drivers + @{ +*/ + +/** + Pre-initialize drivers + So as to register information on device types, memory types, etc. + Only use internally by StarPU. +*/ +void starpu_drivers_preinit(void); + +/** + structure for designating a given driver. See \ref UsingTheDriverAPI for more details. +*/ +struct starpu_driver +{ + /** + Type of the driver. Only ::STARPU_CPU_WORKER, ::STARPU_CUDA_WORKER + and ::STARPU_OPENCL_WORKER are currently supported. + */ + enum starpu_worker_archtype type; + /** + Identifier of the driver. + */ + union + { + unsigned cpu_id; + unsigned cuda_id; + unsigned hip_id; +#if defined(STARPU_USE_OPENCL) && !defined(__CUDACC__) && !defined(__HIPCC__) + cl_device_id opencl_id; +#endif + } id; +}; + +/** + Initialize the given driver, run it until it receives a request to + terminate, deinitialize it and return 0 on success. Return + -EINVAL if starpu_driver::type is not a valid StarPU device type + (::STARPU_CPU_WORKER, ::STARPU_CUDA_WORKER or ::STARPU_OPENCL_WORKER). + + This is the same as using the following functions: calling + starpu_driver_init(), then calling starpu_driver_run_once() in a loop, + and finally starpu_driver_deinit(). + + See \ref UsingTheDriverAPI for more details. +*/ +int starpu_driver_run(struct starpu_driver *d); + +/** + Notify all running drivers that they should terminate. + See \ref UsingTheDriverAPI for more details. +*/ +void starpu_drivers_request_termination(void); + +/** + Initialize the given driver. Return 0 on success, -EINVAL + if starpu_driver::type is not a valid ::starpu_worker_archtype. + See \ref UsingTheDriverAPI for more details. +*/ +int starpu_driver_init(struct starpu_driver *d); + +/** + Run the driver once, then return 0 on success, -EINVAL if + starpu_driver::type is not a valid ::starpu_worker_archtype. + See \ref UsingTheDriverAPI for more details. +*/ +int starpu_driver_run_once(struct starpu_driver *d); + +/** + Deinitialize the given driver. Return 0 on success, -EINVAL if + starpu_driver::type is not a valid ::starpu_worker_archtype. + See \ref UsingTheDriverAPI for more details. +*/ +int starpu_driver_deinit(struct starpu_driver *d); + +/** @} */ + +#ifdef __cplusplus +} +#endif + +#endif /* __STARPU_DRIVER_H__ */ diff --git a/include/starpu_expert.h b/include/starpu_expert.h new file mode 100644 index 0000000..f776eef --- /dev/null +++ b/include/starpu_expert.h @@ -0,0 +1,54 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __STARPU_EXPERT_H__ +#define __STARPU_EXPERT_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + @defgroup API_Expert_Mode Expert Mode + @{ +*/ + +/** + Wake all the workers, so they can inspect data requests and task + submissions again. +*/ +void starpu_wake_all_blocked_workers(void); + +/** + Register a progression hook, to be called when workers are idle. +*/ +int starpu_progression_hook_register(unsigned (*func)(void *arg), void *arg); + +/** + Unregister a given progression hook. +*/ +void starpu_progression_hook_deregister(int hook_id); + +int starpu_idle_hook_register(unsigned (*func)(void *arg), void *arg); +void starpu_idle_hook_deregister(int hook_id); + +/** @} */ + +#ifdef __cplusplus +} +#endif + +#endif /* __STARPU_H__ */ diff --git a/include/starpu_fxt.h b/include/starpu_fxt.h new file mode 100644 index 0000000..df27e9a --- /dev/null +++ b/include/starpu_fxt.h @@ -0,0 +1,189 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2020-2020 Federal University of Rio Grande do Sul (UFRGS) + * Copyright (C) 2013-2013 Thibaut Lambert + * Copyright (C) 2013-2013 Joris Pablo + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __STARPU_FXT_H__ +#define __STARPU_FXT_H__ + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + @defgroup API_FxT_Support FxT Support + @{ +*/ + +/** + todo +*/ +struct starpu_fxt_codelet_event +{ + char symbol[2048]; + int workerid; + char perfmodel_archname[256]; + uint32_t hash; + size_t size; + float time; +}; + +/** + Store information related to clock synchronizations: mainly the offset to apply to each time. +*/ +struct starpu_fxt_mpi_offset +{ + uint64_t local_time_start; /**< node time for the barrier at the beginning of the program */ + int64_t offset_start; /**< offset to apply to node time, computed at the beginning of the program */ + uint64_t local_time_end; /**< node time for the barrier at the end of the program (optional) */ + int64_t offset_end; /**< offset to apply to node time, computed at the end of the program (optional) */ + int nb_barriers; /**< number of barriers to synchronize clocks during the execution of the program + (can be 0, 1 or 2) */ +}; + +/** + todo +*/ +struct starpu_fxt_options +{ + unsigned per_task_colour; + unsigned no_events; + unsigned no_counter; + unsigned no_bus; + unsigned no_flops; + unsigned ninputfiles; + unsigned no_smooth; + unsigned no_acquire; + unsigned memory_states; + unsigned internal; + unsigned label_deps; + unsigned use_task_color; + char *filenames[STARPU_FXT_MAX_FILES]; + char *out_paje_path; + char *distrib_time_path; + char *activity_path; + char *sched_tasks_path; + char *dag_path; + char *tasks_path; + char *data_path; + char *papi_path; + char *comms_path; + char *number_events_path; + char *anim_path; + char *states_path; + char *dir; + char worker_names[STARPU_NMAXWORKERS][256]; + int nworkers; + struct starpu_perfmodel_arch worker_archtypes[STARPU_NMAXWORKERS]; + + /** + In case we are going to gather multiple traces (e.g in the case of + MPI processes), we may need to prefix the name of the containers. + */ + char *file_prefix; + + /** + In case we are going to gather multiple traces (e.g in the case of + MPI processes), we may need to synchronize clocks and apply an offset. + */ + struct starpu_fxt_mpi_offset file_offset; + + /** + In case we are going to gather multiple traces (e.g in the case of + MPI processes), this variable stores the MPI rank of the trace file. + */ + int file_rank; + + /** + In case we want to dump the list of codelets to an external tool + */ + struct starpu_fxt_codelet_event **dumped_codelets; + + /** + In case we want to dump the list of codelets to an external tool, number + of dumped codelets. + */ + long dumped_codelets_count; +}; + +void starpu_fxt_options_init(struct starpu_fxt_options *options); +void starpu_fxt_options_shutdown(struct starpu_fxt_options *options); +void starpu_fxt_generate_trace(struct starpu_fxt_options *options); + +/** + Determine whether profiling should be started by starpu_init(), or only when + starpu_fxt_start_profiling() is called. \p autostart should be 1 to do so, or 0 to + prevent it. + This function has to be called before starpu_init(). + See \ref LimitingScopeTrace for more details. +*/ +void starpu_fxt_autostart_profiling(int autostart); + +/** + Start recording the trace. The trace is by default started from + starpu_init() call, but can be paused by using + starpu_fxt_stop_profiling(), in which case + starpu_fxt_start_profiling() should be called to resume recording + events. + See \ref LimitingScopeTrace for more details. +*/ +void starpu_fxt_start_profiling(void); + +/** + Stop recording the trace. The trace is by default stopped when calling + starpu_shutdown(). starpu_fxt_stop_profiling() can however be used to + stop it earlier. starpu_fxt_start_profiling() can then be called to + start recording it again, etc. + See \ref LimitingScopeTrace for more details. +*/ +void starpu_fxt_stop_profiling(void); + +void starpu_fxt_write_data_trace(char *filename_in); +void starpu_fxt_write_data_trace_in_dir(char *filename_in, char *dir); + +/** + Wrapper to get value of env variable STARPU_FXT_TRACE +*/ +int starpu_fxt_is_enabled(void); + +/** + Add an event in the execution trace if FxT is enabled. + See \ref CreatingAGanttDiagram for more details. +*/ +void starpu_fxt_trace_user_event(unsigned long code); + +/** + Add a string event in the execution trace if FxT is enabled. + See \ref CreatingAGanttDiagram for more details. +*/ +void starpu_fxt_trace_user_event_string(const char *s); + +/** + Add a string event in the execution trace if FxT is enabled even during initialization. +*/ +void starpu_fxt_trace_user_meta_string(const char *s); + +/** @} */ + +#ifdef __cplusplus +} +#endif + +#endif /* __STARPU_FXT_H__ */ diff --git a/include/starpu_hash.h b/include/starpu_hash.h new file mode 100644 index 0000000..460e595 --- /dev/null +++ b/include/starpu_hash.h @@ -0,0 +1,74 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __STARPU_HASH_H__ +#define __STARPU_HASH_H__ + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + @ingroup API_Data_Interfaces + @{ +*/ + +/** + Compute the CRC of a byte buffer seeded by the \p inputcrc + current state. The return value should be considered as the new + current state for future CRC computation. This is used for computing + data size footprint. + See \ref DefiningANewDataInterface_footprint for more details. +*/ +uint32_t starpu_hash_crc32c_be_n(const void *input, size_t n, uint32_t inputcrc); + +/** + Compute the CRC of a pointer value seeded by the \p inputcrc + current state. The return value should be considered as the new + current state for future CRC computation. This is used for computing + data size footprint. + See \ref DefiningANewDataInterface_footprint for more details. +*/ +uint32_t starpu_hash_crc32c_be_ptr(void *input, uint32_t inputcrc); + +/** + Compute the CRC of a 32bit number seeded by the \p inputcrc + current state. The return value should be considered as the new + current state for future CRC computation. This is used for computing + data size footprint. + See \ref DefiningANewDataInterface_footprint for more details. +*/ +uint32_t starpu_hash_crc32c_be(uint32_t input, uint32_t inputcrc); + +/** + Compute the CRC of a string seeded by the \p inputcrc current + state. The return value should be considered as the new current + state for future CRC computation. This is used for computing data + size footprint. + See \ref DefiningANewDataInterface_footprint for more details. +*/ +uint32_t starpu_hash_crc32c_string(const char *str, uint32_t inputcrc); + +/** @} */ + +#ifdef __cplusplus +} +#endif + +#endif /* __STARPU_HASH_H__ */ diff --git a/include/starpu_helper.h b/include/starpu_helper.h new file mode 100644 index 0000000..da8e42f --- /dev/null +++ b/include/starpu_helper.h @@ -0,0 +1,285 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +#ifndef __STARPU_HELPER_H__ +#define __STARPU_HELPER_H__ + +#include + +#ifdef STARPU_HAVE_HWLOC +#include +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/** + @defgroup API_Miscellaneous_Helpers Miscellaneous Helpers + @{ +*/ + +/** + Return the min of the two parameters. +*/ +#define STARPU_MIN(a, b) ((a) < (b) ? (a) : (b)) +/** + Return the max of the two parameters. +*/ +#define STARPU_MAX(a, b) ((a) < (b) ? (b) : (a)) + +/** + Define a value which can be used to mark pointers as invalid + values. +*/ +#define STARPU_POISON_PTR ((void *)0xdeadbeef) + +extern int _starpu_silent; + +/** + Retrieve the value of an environment variable. + See \ref ExecutionConfigurationThroughEnvironmentVariables for more details. + */ +char *starpu_getenv(const char *str); + +/** + Same as starpu_get_env_string_var_default() + */ +#define starpu_getenv_string_var_default(s, ss, d) starpu_get_env_string_var_default(s, ss, d) + +/** + If the environment variable \c str is defined and its value is contained in the array \c strings, return the array position. + Raise an error if the environment variable \c str is defined with a value not in \c strings + Return \c defvalue if the environment variable \c str is not defined. + See \ref ExecutionConfigurationThroughEnvironmentVariables for more details. + */ +int starpu_get_env_string_var_default(const char *str, const char *strings[], int defvalue); + +/** + Same as starpu_get_env_size_default() + */ +#define starpu_getenv_size_default(s, d) starpu_get_env_size_default(s, d) + +/** + If the environment variable \c str is defined with a well-defined size value, return the value as a size in bytes. Expected size qualifiers are b, B, k, K, m, M, g, G. The default qualifier is K. + If the environment variable \c str is not defined or is empty, return \c defval + Raise an error if the value of the environment variable \c str is not well-defined. + See \ref ExecutionConfigurationThroughEnvironmentVariables for more details. + */ +int starpu_get_env_size_default(const char *str, int defval); + +/** + Same as starpu_get_env_number() + */ +#define starpu_getenv_number(s) starpu_get_env_number(s) + +/** + Return the integer value of the environment variable named \p str. + Return 0 otherwise (the variable does not exist or has a + non-integer value). +*/ +static __starpu_inline int starpu_get_env_number(const char *str) +{ + char *strval; + + strval = starpu_getenv(str); + if (strval) + { + /* the env variable was actually set */ + long int val; + char *pcheck; + + val = strtol(strval, &pcheck, 10); + if (*pcheck) + { + fprintf(stderr, "The %s environment variable must contain an integer\n", str); + STARPU_ABORT(); + } + + /* fprintf(stderr, "ENV %s WAS %d\n", str, val); */ + STARPU_ASSERT_MSG(val >= 0, "The value for the environment variable '%s' cannot be negative", str); + return (int)val; + } + else + { + /* there is no such env variable */ + /* fprintf("There was no %s ENV\n", str); */ + return -1; + } +} + +/** + Same as starpu_get_env_number_default() + */ +#define starpu_getenv_number_default(s, d) starpu_get_env_number_default(s, d) + +static __starpu_inline int starpu_get_env_number_default(const char *str, int defval) +{ + int ret = starpu_get_env_number(str); + if (ret == -1) + ret = defval; + return ret; +} + +/** + Same as starpu_get_env_float_default() + */ +#define starpu_getenv_float_default(s, d) starpu_get_env_float_default(s, d) + +static __starpu_inline float starpu_get_env_float_default(const char *str, float defval) +{ + char *strval; + + strval = starpu_getenv(str); + if (strval) + { + /* the env variable was actually set */ + float val; + char *pcheck; + + val = strtof(strval, &pcheck); + if (*pcheck) + { + fprintf(stderr, "The %s environment variable must contain a float\n", str); + STARPU_ABORT(); + } + + /* fprintf(stderr, "ENV %s WAS %f\n", str, val); */ + return val; + } + else + { + /* there is no such env variable */ + /* fprintf("There was no %s ENV\n", str); */ + return defval; + } +} + +/** + Execute the given function \p func on a subset of workers. When + calling this method, the offloaded function \p func is executed by + every StarPU worker that are eligible to execute the function. The + argument \p arg is passed to the offloaded function. The argument + \p where specifies on which types of processing units the function + should be executed. + Similarly to the field starpu_codelet::where, it is possible to + specify that the function should be executed on every CUDA device + and every CPU by passing ::STARPU_CPU|::STARPU_CUDA. This function + blocks until \p func has been executed on every appropriate + processing units, and thus may not be called from a callback + function for instance. + See \ref HowToInitializeAComputationLibraryOnceForEachWorker for more details. +*/ +void starpu_execute_on_each_worker(void (*func)(void *), void *arg, uint32_t where); + +/** + Same as starpu_execute_on_each_worker(), except that the task name + is specified in the argument \p name. + See \ref HowToInitializeAComputationLibraryOnceForEachWorker for more details. +*/ +void starpu_execute_on_each_worker_ex(void (*func)(void *), void *arg, uint32_t where, const char *name); + +/** + Call \p func(\p arg) on every worker in the \p workers array. \p + num_workers indicates the number of workers in this array. This + function is synchronous, but the different workers may execute the + function in parallel. + See \ref HowToInitializeAComputationLibraryOnceForEachWorker for more details. +*/ +void starpu_execute_on_specific_workers(void (*func)(void *), void *arg, unsigned num_workers, unsigned *workers, const char *name); + +/** + Return the current date in micro-seconds. See \ref Preparing for more details. +*/ +double starpu_timing_now(void); + +/** + Copy the content of \p src_handle into \p dst_handle. The parameter \p + asynchronous indicates whether the function should block or not. In + the case of an asynchronous call, it is possible to synchronize with + the termination of this operation either by the means of implicit + dependencies (if enabled) or by calling starpu_task_wait_for_all(). If + \p callback_func is not NULL, this callback function is executed after + the handle has been copied, and it is given the pointer \p + callback_arg as argument. + See \ref DataHandlesHelpers for more details. +*/ +int starpu_data_cpy(starpu_data_handle_t dst_handle, starpu_data_handle_t src_handle, int asynchronous, void (*callback_func)(void *), void *callback_arg); + +/** + Like starpu_data_cpy(), copy the content of \p src_handle into \p dst_handle, + but additionally take a \p priority parameter to sort it among the whole task + graph. + See \ref DataHandlesHelpers for more details. +*/ +int starpu_data_cpy_priority(starpu_data_handle_t dst_handle, starpu_data_handle_t src_handle, int asynchronous, void (*callback_func)(void *), void *callback_arg, int priority); + +/** + Create a copy of \p src_handle, and return a new handle in \p dst_handle, + which is to be used only for read accesses. This allows StarPU to optimize it + by not actually copying the data whenever possible (e.g. it may possibly + simply return src_handle itself). + The parameter \p asynchronous indicates whether the function should block + or not. In the case of an asynchronous call, it is possible to synchronize + with the termination of this operation either by the means of implicit + dependencies (if enabled) or by calling starpu_task_wait_for_all(). If + \p callback_func is not NULL, this callback function is executed after + the handle has been copied, and it is given the pointer \p + callback_arg as argument. + See \ref DataHandlesHelpers for more details. +*/ +int starpu_data_dup_ro(starpu_data_handle_t *dst_handle, starpu_data_handle_t src_handle, int asynchronous); + +/** + Call hwloc-ps or lstopo to display binding of each process and thread running on + the machine.
    + Use the environment variable \ref STARPU_DISPLAY_BINDINGS to automatically + call this function at the beginning of the execution of StarPU. + See \ref MiscellaneousAndDebug for more details. +*/ +void starpu_display_bindings(void); + +/** + If \c hwloc is used, convert the given \p logical_index of a PU to the OS + index of this PU. If \c hwloc is not used, return \p logical_index. + See \ref HardwareTopology for more details. +*/ +int starpu_get_pu_os_index(unsigned logical_index); + +/** + Return a bitmap representing logical indexes of NUMA nodes where the buffer + targeted by \p ptr is allocated. An error is notified by a negative result. + See \ref HardwareTopology for more details. +*/ +long starpu_get_memory_location_bitmap(void *ptr, size_t size); + +#ifdef STARPU_HAVE_HWLOC +/** + Get the hwloc topology used by StarPU. One can use this pointer to get + information about topology, but not to change settings related to topology. + See \ref HardwareTopology for more details. +*/ +hwloc_topology_t starpu_get_hwloc_topology(void); +#endif +/** @} */ + +#ifdef __cplusplus +} +#endif + +#endif // __STARPU_HELPER_H__ diff --git a/include/starpu_hip.h b/include/starpu_hip.h new file mode 100644 index 0000000..22b147a --- /dev/null +++ b/include/starpu_hip.h @@ -0,0 +1,148 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ +#ifndef __STARPU_HIP_H__ +#define __STARPU_HIP_H__ + +#include + +#ifdef STARPU_USE_HIP + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wundef" +#pragma GCC diagnostic ignored "-Wunused-result" +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" +#ifndef __cplusplus +#pragma GCC diagnostic ignored "-Wimplicit-int" +#endif +#pragma GCC diagnostic ignored "-Wreturn-type" +#ifdef STARPU_USE_HIPBLAS +#include +#endif +#include +#include +#pragma GCC diagnostic pop + +#ifdef __cplusplus +extern "C" { +#endif + +/** + @defgroup API_HIP_Extensions HIP Extensions + @{ + */ + +/** + Report a HIPBLAS error. +*/ +void starpu_hipblas_report_error(const char *func, const char *file, int line, int status); + +/** + Call starpu_hipblas_report_error(), passing the current function, file and line position. +*/ +#define STARPU_HIPBLAS_REPORT_ERROR(status) starpu_hipblas_report_error(__starpu_func__, __FILE__, __LINE__, status) + +/** + Report a HIP error. +*/ +void starpu_hip_report_error(const char *func, const char *file, int line, hipError_t status); + +/** + Call starpu_hip_report_error(), passing the current function, file and line position. +*/ +#define STARPU_HIP_REPORT_ERROR(status) starpu_hip_report_error(__starpu_func__, __FILE__, __LINE__, status) + +/** + Return the current worker’s HIP stream. StarPU provides a stream + for every HIP device controlled by StarPU. This function is only + provided for convenience so that programmers can easily use + asynchronous operations within codelets without having to create a + stream by hand. Note that the application is not forced to use the + stream provided by starpu_hip_get_local_stream() and may also + create its own streams. Synchronizing with + hipDeviceSynchronize() is allowed, but will reduce the + likelihood of having all transfers overlapped. +*/ +hipStream_t starpu_hip_get_local_stream(void); + +/** + Return a pointer to device properties for worker \p workerid + (assumed to be a HIP worker). +*/ +const struct hipDeviceProp_t *starpu_hip_get_device_properties(unsigned workerid); + +/** + Copy \p ssize bytes from the pointer \p src_ptr on \p src_node + to the pointer \p dst_ptr on \p dst_node. The function first tries to + copy the data asynchronous (unless \p stream is NULL). If the + asynchronous copy fails or if \p stream is NULL, it copies the + data synchronously. The function returns -EAGAIN if the + asynchronous launch was successful. It returns 0 if the synchronous + copy was successful, or fails otherwise. +*/ +int starpu_hip_copy_async_sync(void *src_ptr, unsigned src_node, void *dst_ptr, unsigned dst_node, size_t ssize, hipStream_t stream, hipMemcpyKind kind); + +/** + Copy \p numblocks blocks of \p blocksize bytes from the pointer \p src_ptr on + \p src_node to the pointer \p dst_ptr on \p dst_node. + + The blocks start at addresses which are ld_src (resp. ld_dst) bytes apart in + the source (resp. destination) interface. + + The function first tries to copy the data asynchronous (unless \p stream is + NULL). If the asynchronous copy fails or if \p stream is NULL, + it copies the data synchronously. The function returns -EAGAIN if the + asynchronous launch was successful. It returns 0 if the synchronous copy was + successful, or fails otherwise. +*/ +int starpu_hip_copy2d_async_sync(void *src_ptr, unsigned src_node, + void *dst_ptr, unsigned dst_node, + size_t blocksize, + size_t numblocks, size_t ld_src, size_t ld_dst, + hipStream_t stream, hipMemcpyKind kind); + +/** + Copy \p numblocks_1 * \p numblocks_2 blocks of \p blocksize bytes from the + pointer \p src_ptr on \p src_node to the pointer \p dst_ptr on \p dst_node. + + The blocks are grouped by \p numblocks_1 blocks whose start addresses are + ld1_src (resp. ld1_dst) bytes apart in the source (resp. destination) + interface. + + The function first tries to copy the data asynchronous (unless \p stream is + NULL). If the asynchronous copy fails or if \p stream is NULL, + it copies the data synchronously. The function returns -EAGAIN if the + asynchronous launch was successful. It returns 0 if the synchronous copy was + successful, or fails otherwise. +*/ +int starpu_hip_copy3d_async_sync(void *src_ptr, unsigned src_node, void *dst_ptr, unsigned dst_node, + size_t blocksize, + size_t numblocks_1, size_t ld1_src, size_t ld1_dst, + size_t numblocks_2, size_t ld2_src, size_t ld2_dst, + hipStream_t stream, hipMemcpyKind kind); + +/** + Call hipSetDevice(\p devid). +*/ +void starpu_hip_set_device(int devid); + +/** @} */ + +#ifdef __cplusplus +} +#endif + +#endif /* STARPU_USE_HIP */ +#endif /* __STARPU_HIP_H__ */ diff --git a/include/starpu_hipblas.h b/include/starpu_hipblas.h new file mode 100644 index 0000000..77292a4 --- /dev/null +++ b/include/starpu_hipblas.h @@ -0,0 +1,67 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __STARPU_HIPBLAS_H__ +#define __STARPU_HIPBLAS_H__ + +#ifdef STARPU_USE_HIP +#ifdef STARPU_USE_HIPBLAS +#include +#endif +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/** + @ingroup API_HIP_Extensions + @{ + */ + +/** + Initialize HIPBLAS on every HIPdevice. The + HIPBLAS library must be initialized prior to any HIPBLAS call. Calling + starpu_hipblas_init() will initialize HIPBLAS on every HIP device + controlled by StarPU. This call blocks until HIPBLAS has been properly + initialized on every device. +*/ +void starpu_hipblas_init(void); + +#ifdef STARPU_USE_HIP +#ifdef STARPU_USE_HIPBLAS +/** + Return the HIPBLAS handle to be used to queue HIPBLAS kernels. It + is properly initialized and configured for multistream by + starpu_hipblas_init(). +*/ +hipblasHandle_t starpu_hipblas_get_local_handle(void); +#endif +#endif + +/** + Synchronously deinitialize the HIPBLAS library on + every HIP device. +*/ +void starpu_hipblas_shutdown(void); + +/** @} */ + +#ifdef __cplusplus +} +#endif + +#endif /* __STARPU_HIPBLAS_H__ */ diff --git a/include/starpu_max_fpga.h b/include/starpu_max_fpga.h new file mode 100644 index 0000000..9238608 --- /dev/null +++ b/include/starpu_max_fpga.h @@ -0,0 +1,60 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __STARPU_MAX_FPGA_H__ +#define __STARPU_MAX_FPGA_H__ + +#include + +#if defined STARPU_USE_MAX_FPGA +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + @defgroup API_Max_FPGA_Extensions Maxeler FPGA Extensions + @{ +*/ + +/** + This specifies a Maxeler file to be loaded on some engines. +*/ +struct starpu_max_load +{ + max_file_t *file; /**< Provide the file to be loaded */ + const char *engine_id_pattern; /**< Provide the engine(s) on which to be loaded, following + the Maxeler engine naming, i.e. typically + "*:0", "*:1", etc. + In an array of struct starpu_max_load, only one can have + the "*" specification. */ +}; + +/** + Maxeler engine of the current worker. + See \ref MaxFPGAExample for more details. +*/ +max_engine_t *starpu_max_fpga_get_local_engine(void); + +/** @} */ + +#ifdef __cplusplus +} +#endif + +#endif /* STARPU_USE_MAX_FPGA */ +#endif /* __STARPU_MAX_FPGA_H__ */ diff --git a/include/starpu_mod.f90 b/include/starpu_mod.f90 new file mode 100644 index 0000000..9cce981 --- /dev/null +++ b/include/starpu_mod.f90 @@ -0,0 +1,145 @@ +! StarPU --- Runtime system for heterogeneous multicore architectures. +! +! Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +! +! StarPU is free software; you can redistribute it and/or modify +! it under the terms of the GNU Lesser General Public License as published by +! the Free Software Foundation; either version 2.1 of the License, or (at +! your option) any later version. +! +! StarPU is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of +! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +! +! See the GNU Lesser General Public License in COPYING.LGPL for more details. +! +MODULE starpu_mod + ! == starpu.h == + + ! starpu_conf_init + INTERFACE + SUBROUTINE starpu_conf_init(conf) BIND(C) + USE iso_c_binding + TYPE(C_PTR), VALUE :: conf + END SUBROUTINE starpu_conf_init + END INTERFACE + + ! starpu_init + INTERFACE + FUNCTION starpu_init(conf) BIND(C) + USE iso_c_binding + TYPE(C_PTR), VALUE :: conf + INTEGER(KIND=C_INT) :: starpu_init + END FUNCTION starpu_init + END INTERFACE + + ! starpu_initialize + + ! starpu_pause + INTERFACE + SUBROUTINE starpu_pause() BIND(C) + USE iso_c_binding + END SUBROUTINE starpu_pause + END INTERFACE + + ! starpu_resume + INTERFACE + SUBROUTINE starpu_resume() BIND(C) + USE iso_c_binding + END SUBROUTINE starpu_resume + END INTERFACE + + ! starpu_shutdown + INTERFACE + SUBROUTINE starpu_shutdown() BIND(C) + USE iso_c_binding + END SUBROUTINE starpu_shutdown + END INTERFACE + + ! starpu_topology_print + + ! starpu_asynchronous_copy_disabled + INTERFACE + SUBROUTINE starpu_asynchronous_copy_disabled() BIND(C) + USE iso_c_binding + END SUBROUTINE starpu_asynchronous_copy_disabled + END INTERFACE + + ! starpu_asynchronous_cuda_copy_disabled + INTERFACE + SUBROUTINE starpu_asynchronous_cuda_copy_disabled() BIND(C) + USE iso_c_binding + END SUBROUTINE starpu_asynchronous_cuda_copy_disabled + END INTERFACE + + ! starpu_asynchronous_opencl_copy_disabled + INTERFACE + SUBROUTINE starpu_asynchronous_opencl_copy_disabled() BIND(C) + USE iso_c_binding + END SUBROUTINE starpu_asynchronous_opencl_copy_disabled + END INTERFACE + + ! starpu_display_stats + INTERFACE + SUBROUTINE starpu_display_stats() BIND(C) + USE iso_c_binding + END SUBROUTINE starpu_display_stats + END INTERFACE + + ! starpu_get_version + INTERFACE + SUBROUTINE starpu_get_version(major,minor,release) BIND(C) + USE iso_c_binding + INTEGER(KIND=C_INT), INTENT(OUT) :: major,minor,release + END SUBROUTINE starpu_get_version + END INTERFACE + + ! starpu_cpu_worker_get_count + INTERFACE + FUNCTION starpu_cpu_worker_get_count() BIND(C) + USE iso_c_binding + INTEGER(KIND=C_INT) :: starpu_cpu_worker_get_count + END FUNCTION starpu_cpu_worker_get_count + END INTERFACE + + ! == starpu_task.h == + + ! starpu_tag_declare_deps + ! starpu_tag_declare_deps_array + ! starpu_task_declare_deps_array + ! starpu_tag_wait + ! starpu_tag_wait_array + ! starpu_tag_notify_from_apps + ! starpu_tag_restart + ! starpu_tag_remove + ! starpu_task_init + ! starpu_task_clean + ! starpu_task_create + ! starpu_task_destroy + ! starpu_task_set_destroy + ! starpu_task_submit + ! starpu_task_submit_to_ctx + ! starpu_task_finished + ! starpu_task_wait + ! starpu_task_wait_for_all + INTERFACE + SUBROUTINE starpu_task_wait_for_all() BIND(C) + USE iso_c_binding + END SUBROUTINE starpu_task_wait_for_all + END INTERFACE + ! starpu_task_wait_for_n_submitted + ! starpu_task_wait_for_all_in_ctx + ! starpu_task_wait_for_n_submitted_in_ctx + ! starpu_task_wait_for_no_ready + ! starpu_task_nready + ! starpu_task_nsubmitted + ! starpu_codelet_init + ! starpu_codelet_display_stats + ! starpu_task_get_current + ! starpu_parallel_task_barrier_init + ! starpu_parallel_task_barrier_init_n + ! starpu_task_dup + ! starpu_task_set_implementation + ! starpu_task_get_implementation + +END MODULE starpu_mod diff --git a/include/starpu_opencl.h b/include/starpu_opencl.h new file mode 100644 index 0000000..0df064d --- /dev/null +++ b/include/starpu_opencl.h @@ -0,0 +1,359 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __STARPU_OPENCL_H__ +#define __STARPU_OPENCL_H__ + +#include +#ifdef STARPU_USE_OPENCL +#ifndef CL_TARGET_OPENCL_VERSION +#define CL_TARGET_OPENCL_VERSION 100 +#endif +#ifdef __APPLE__ +#include +#else +#include +#endif +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + @defgroup API_OpenCL_Extensions OpenCL Extensions + @{ +*/ + +/** + Store the OpenCL programs as compiled for the different OpenCL + devices. +*/ +struct starpu_opencl_program +{ + /** Store each program for each OpenCL device. */ + cl_program programs[STARPU_MAXOPENCLDEVS]; +}; + +/** + @name Writing OpenCL kernels + @{ +*/ + +/** + Return the OpenCL context of the device designated by \p devid + in \p context. + See \ref OpenCLSupport for more details. +*/ +void starpu_opencl_get_context(int devid, cl_context *context); + +/** + Return the cl_device_id corresponding to \p devid in \p device. + See \ref OpenCLSupport for more details. +*/ +void starpu_opencl_get_device(int devid, cl_device_id *device); + +/** + Return the command queue of the device designated by \p devid + into \p queue. + See \ref OpenCLSupport for more details. +*/ +void starpu_opencl_get_queue(int devid, cl_command_queue *queue); + +/** + Return the context of the current worker. + See \ref OpenCLSupport for more details. +*/ +void starpu_opencl_get_current_context(cl_context *context); + +/** + Return the computation kernel command queue of the current + worker. + See \ref OpenCLSupport for more details. +*/ +void starpu_opencl_get_current_queue(cl_command_queue *queue); + +/** + Set the arguments of a given kernel. The list of arguments + must be given as (size_t size_of_the_argument, cl_mem * + pointer_to_the_argument). The last argument must be 0. Return the + number of arguments that were successfully set. In case of failure, + return the id of the argument that could not be set and \p err is set to + the error returned by OpenCL. Otherwise, return the number of + arguments that were set. + + Here an example: + \code{.c} + int n; + cl_int err; + cl_kernel kernel; + n = starpu_opencl_set_kernel_args(&err, 2, &kernel, sizeof(foo), &foo, sizeof(bar), &bar, 0); + if (n != 2) fprintf(stderr, "Error : %d\n", err); + \endcode + + See \ref OpenCLSupport for more details. +*/ +int starpu_opencl_set_kernel_args(cl_int *err, cl_kernel *kernel, ...); + +/** @} */ + +/** + @name Compiling OpenCL kernels + Source codes for OpenCL kernels can be stored in a file or in a + string. StarPU provides functions to build the program executable for + each available OpenCL device as a cl_program object. This program + executable can then be loaded within a specific queue as explained in + the next section. These are only helpers, Applications can also fill a + starpu_opencl_program array by hand for more advanced use (e.g. + different programs on the different OpenCL devices, for relocation + purpose for instance). + @{ +*/ + +/** + Store the contents of the file \p source_file_name in the buffer + \p opencl_program_source. The file \p source_file_name can be located in the + current directory, or in the directory specified by the environment + variable \ref STARPU_OPENCL_PROGRAM_DIR, or + in the directory share/starpu/opencl of the installation + directory of StarPU, or in the source directory of StarPU. When the + file is found, \p located_file_name is the full name of the file as it + has been located on the system, \p located_dir_name the directory + where it has been located. Otherwise, they are both set to the empty + string. See \ref OpenCLSupport for more details. +*/ +void starpu_opencl_load_program_source(const char *source_file_name, char *located_file_name, char *located_dir_name, char *opencl_program_source); + +/** + Similar to function starpu_opencl_load_program_source() but + allocate the buffers \p located_file_name, \p located_dir_name and + \p opencl_program_source. + See \ref OpenCLSupport for more details. +*/ +void starpu_opencl_load_program_source_malloc(const char *source_file_name, char **located_file_name, char **located_dir_name, char **opencl_program_source); + +/** + Compile the OpenCL kernel stored in the file \p source_file_name + with the given options \p build_options and store the result in the + directory $STARPU_HOME/.starpu/opencl with the same filename as + \p source_file_name. The compilation is done for every OpenCL device, + and the filename is suffixed with the vendor id and the device id of + the OpenCL device. + See \ref OpenCLSupport for more details. +*/ +int starpu_opencl_compile_opencl_from_file(const char *source_file_name, const char *build_options); + +/** + Compile the OpenCL kernel in the string \p opencl_program_source + with the given options \p build_options and store the result in the + directory $STARPU_HOME/.starpu/opencl with the filename \p + file_name. The compilation is done for every OpenCL device, and the + filename is suffixed with the vendor id and the device id of the + OpenCL device. + See \ref OpenCLSupport for more details. +*/ +int starpu_opencl_compile_opencl_from_string(const char *opencl_program_source, const char *file_name, const char *build_options); + +/** + Compile the binary OpenCL kernel identified with \p kernel_id. + For every OpenCL device, the binary OpenCL kernel will be loaded from + the file + $STARPU_HOME/.starpu/opencl/\.\.vendor_id_\_device_id_\. + See \ref OpenCLSupport for more details. +*/ +int starpu_opencl_load_binary_opencl(const char *kernel_id, struct starpu_opencl_program *opencl_programs); + +/** + Compile an OpenCL source code stored in a file. + See \ref OpenCLSupport for more details. +*/ +int starpu_opencl_load_opencl_from_file(const char *source_file_name, struct starpu_opencl_program *opencl_programs, const char *build_options); +/** + Compile an OpenCL source code stored in a string. + See \ref OpenCLSupport for more details. + */ +int starpu_opencl_load_opencl_from_string(const char *opencl_program_source, struct starpu_opencl_program *opencl_programs, const char *build_options); + +/** + Unload an OpenCL compiled code. + See \ref OpenCLSupport for more details. +*/ +int starpu_opencl_unload_opencl(struct starpu_opencl_program *opencl_programs); + +/** @} */ + +/** + @name Loading OpenCL kernels + @{ +*/ + +/** + Create a kernel \p kernel for device \p devid, on its computation + command queue returned in \p queue, using program \p opencl_programs + and name \p kernel_name. + See \ref OpenCLSupport for more details. +*/ +int starpu_opencl_load_kernel(cl_kernel *kernel, cl_command_queue *queue, struct starpu_opencl_program *opencl_programs, const char *kernel_name, int devid); + +/** + Release the given \p kernel, to be called after kernel execution. + See \ref OpenCLSupport for more details. +*/ +int starpu_opencl_release_kernel(cl_kernel kernel); + +/** @} */ + +/** + @name OpenCL Statistics + @{ +*/ + +/** + Collect statistics on a kernel execution. + After termination of the kernels, the OpenCL codelet should call this + function with the event returned by \c clEnqueueNDRangeKernel(), to + let StarPU collect statistics about the kernel execution (used cycles, + consumed energy). See \ref OpenCL-specificOptimizations for more details. +*/ +int starpu_opencl_collect_stats(cl_event event); + +/** @} */ + +/** + @name OpenCL Utilities + @{ +*/ + +/** + Return the error message in English corresponding to \p status, an OpenCL + error code. + See \ref OpenCLSupport for more details. +*/ +const char *starpu_opencl_error_string(cl_int status); + +/** + Given a valid error status, print the corresponding error message on + \c stdout, along with the function name \p func, the filename + \p file, the line number \p line and the message \p msg. + See \ref OpenCLSupport for more details. +*/ +void starpu_opencl_display_error(const char *func, const char *file, int line, const char *msg, cl_int status); + +/** + Call the function starpu_opencl_display_error() with the error + \p status, the current function name, current file and line number, + and a empty message. +*/ +#define STARPU_OPENCL_DISPLAY_ERROR(status) starpu_opencl_display_error(__starpu_func__, __FILE__, __LINE__, NULL, status) + +/** + Call the function starpu_opencl_display_error() and abort. +*/ +static __starpu_inline void starpu_opencl_report_error(const char *func, const char *file, int line, const char *msg, cl_int status) +{ + starpu_opencl_display_error(func, file, line, msg, status); + assert(0); +} + +/** + Call the function starpu_opencl_report_error() with the error \p + status, the current function name, current file and line number, + and a empty message. +*/ +#define STARPU_OPENCL_REPORT_ERROR(status) starpu_opencl_report_error(__starpu_func__, __FILE__, __LINE__, NULL, status) + +/** + Call the function starpu_opencl_report_error() with \p msg + and \p status, the current function name, current file and line number. +*/ +#define STARPU_OPENCL_REPORT_ERROR_WITH_MSG(msg, status) starpu_opencl_report_error(__starpu_func__, __FILE__, __LINE__, msg, status) + +/** + Allocate \p size bytes of memory, stored in \p addr. \p flags must be a valid + combination of \c cl_mem_flags values. + See \ref DefiningANewDataInterface_allocation for more details. +*/ +cl_int starpu_opencl_allocate_memory(int devid, cl_mem *addr, size_t size, cl_mem_flags flags); + +/** + Copy \p size bytes from the given \p ptr on RAM \p src_node to the + given \p buffer on OpenCL \p dst_node. \p offset is the offset, in + bytes, in \p buffer. if \p event is NULL, the copy is + synchronous, i.e the queue is synchronised before returning. If not + NULL, \p event can be used after the call to wait for this + particular copy to complete. This function returns CL_SUCCESS + if the copy was successful, or a valid OpenCL error code otherwise. + The integer pointed to by \p ret is set to -EAGAIN if the + asynchronous launch was successful, or to 0 if \p event was + NULL. + See \ref DefiningANewDataInterface_copy for more details. +*/ +cl_int starpu_opencl_copy_ram_to_opencl(void *ptr, unsigned src_node, cl_mem buffer, unsigned dst_node, size_t size, size_t offset, cl_event *event, int *ret); + +/** + Copy \p size bytes asynchronously from the given \p buffer on OpenCL + \p src_node to the given \p ptr on RAM \p dst_node. \p offset is the + offset, in bytes, in \p buffer. if \p event is NULL, the copy + is synchronous, i.e the queue is synchronised before returning. If not + NULL, \p event can be used after the call to wait for this + particular copy to complete. This function returns CL_SUCCESS + if the copy was successful, or a valid OpenCL error code otherwise. + The integer pointed to by \p ret is set to -EAGAIN if the + asynchronous launch was successful, or to 0 if \p event was + NULL. + See \ref DefiningANewDataInterface_copy for more details. +*/ +cl_int starpu_opencl_copy_opencl_to_ram(cl_mem buffer, unsigned src_node, void *ptr, unsigned dst_node, size_t size, size_t offset, cl_event *event, int *ret); + +/** + Copy \p size bytes asynchronously from byte offset \p src_offset of \p + src on OpenCL \p src_node to byte offset \p dst_offset of \p dst on + OpenCL \p dst_node. if \p event is NULL, the copy is + synchronous, i.e. the queue is synchronised before returning. If not + NULL, \p event can be used after the call to wait for this + particular copy to complete. This function returns CL_SUCCESS + if the copy was successful, or a valid OpenCL error code otherwise. + The integer pointed to by \p ret is set to -EAGAIN if the + asynchronous launch was successful, or to 0 if \p event was + NULL. + See \ref DefiningANewDataInterface_copy for more details. +*/ +cl_int starpu_opencl_copy_opencl_to_opencl(cl_mem src, unsigned src_node, size_t src_offset, cl_mem dst, unsigned dst_node, size_t dst_offset, size_t size, cl_event *event, int *ret); + +/** + Copy \p size bytes from byte offset \p src_offset of \p src on \p + src_node to byte offset \p dst_offset of \p dst on \p dst_node. if \p + event is NULL, the copy is synchronous, i.e. the queue is + synchronised before returning. If not NULL, \p event can be + used after the call to wait for this particular copy to complete. The + function returns -EAGAIN if the asynchronous launch was + successful. It returns 0 if the synchronous copy was successful, or + fails otherwise. + See \ref DefiningANewDataInterface_copy for more details. +*/ +cl_int starpu_opencl_copy_async_sync(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, cl_event *event); + +/** @} */ + +/** @} */ + +#ifdef __cplusplus +} +#endif + +#endif /* STARPU_USE_OPENCL */ + +#endif /* __STARPU_OPENCL_H__ */ diff --git a/include/starpu_openmp.h b/include/starpu_openmp.h new file mode 100644 index 0000000..71c5edb --- /dev/null +++ b/include/starpu_openmp.h @@ -0,0 +1,1315 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2014-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __STARPU_OPENMP_H__ +#define __STARPU_OPENMP_H__ + +#include + +/** + @defgroup API_OpenMP_Runtime_Support OpenMP Runtime Support + @brief API for implementing OpenMP runtimes on top of StarPU. + @{ +*/ + +#if defined STARPU_OPENMP +/** + Opaque Simple Lock object (\anchor SimpleLock) for inter-task + synchronization operations. + \sa starpu_omp_init_lock() + \sa starpu_omp_destroy_lock() + \sa starpu_omp_set_lock() + \sa starpu_omp_unset_lock() + \sa starpu_omp_test_lock() +*/ +typedef struct +{ + void *internal; /**< opaque pointer for internal use */ +} starpu_omp_lock_t; + +/** + Opaque Nestable Lock object (\anchor NestableLock) for inter-task + synchronization operations. + \sa starpu_omp_init_nest_lock() + \sa starpu_omp_destroy_nest_lock() + \sa starpu_omp_set_nest_lock() + \sa starpu_omp_unset_nest_lock() + \sa starpu_omp_test_nest_lock() +*/ +typedef struct +{ + void *internal; /**< opaque pointer for internal use */ +} starpu_omp_nest_lock_t; + +/** + Set of constants for selecting the for loop iteration scheduling + algorithm (\anchor OMPFor) as defined by the OpenMP specification. + \sa starpu_omp_for() + \sa starpu_omp_for_inline_first() + \sa starpu_omp_for_inline_next() + \sa starpu_omp_for_alt() + \sa starpu_omp_for_inline_first_alt() + \sa starpu_omp_for_inline_next_alt() +*/ +enum starpu_omp_sched_value +{ + starpu_omp_sched_undefined = 0, /**< Undefined iteration scheduling algorithm. */ + starpu_omp_sched_static = 1, /**< \b Static iteration scheduling algorithm.*/ + starpu_omp_sched_dynamic = 2, /**< \b Dynamic iteration scheduling algorithm.*/ + starpu_omp_sched_guided = 3, /**< \b Guided iteration scheduling algorithm.*/ + starpu_omp_sched_auto = 4, /**< \b Automatically chosen iteration scheduling algorithm.*/ + starpu_omp_sched_runtime = 5 /**< Choice of iteration scheduling algorithm deferred at \b runtime.*/ +}; + +/** + Set of constants for selecting the processor binding method, as + defined in the OpenMP specification. + \sa starpu_omp_get_proc_bind() +*/ +enum starpu_omp_proc_bind_value +{ + starpu_omp_proc_bind_undefined = -1, /**< Undefined processor binding method.*/ + starpu_omp_proc_bind_false = 0, /**< Team threads may be moved between places at any time.*/ + starpu_omp_proc_bind_true = 1, /**< Team threads may not be moved between places.*/ + starpu_omp_proc_bind_master = 2, /**< Assign every thread in the team to the same place as the \b master thread.*/ + starpu_omp_proc_bind_close = 3, /**< Assign every thread in the team to a place \b close to the parent thread.*/ + starpu_omp_proc_bind_spread = 4 /**< Assign team threads as a sparse distribution over the selected places.*/ +}; + +/** + Set of attributes used for creating a new parallel region. + \sa starpu_omp_parallel_region() +*/ +struct starpu_omp_parallel_region_attr +{ + /** + ::starpu_codelet (\ref API_Codelet_And_Tasks) to use for the + parallel region implicit tasks. The codelet must provide a + CPU implementation function. + */ + struct starpu_codelet cl; + /** + Array of zero or more ::starpu_data_handle_t data handle to + be passed to the parallel region implicit tasks. + */ + starpu_data_handle_t *handles; + /** + Optional pointer to an inline argument to be passed to the + region implicit tasks. + */ + void *cl_arg; + /** + Size of the optional inline argument to be passed to the + region implicit tasks, or 0 if unused. + */ + size_t cl_arg_size; + /** + Boolean indicating whether the optional inline argument + should be automatically freed (true), or not (false). + */ + unsigned cl_arg_free; + + /** + Boolean indicating whether the \b if clause of the + corresponding pragma omp parallel is true or false. + */ + int if_clause; + + /** + Integer indicating the requested number of threads in the + team of the newly created parallel region, or 0 to let the + runtime choose the number of threads alone. This attribute + may be ignored by the runtime system if the requested + number of threads is higher than the number of threads that + the runtime can create. + */ + int num_threads; +}; + +/** + Set of attributes used for creating a new task region. + \sa starpu_omp_task_region() +*/ +struct starpu_omp_task_region_attr +{ + /** + ::starpu_codelet (\ref API_Codelet_And_Tasks) to use for + the task region explicit task. The codelet must provide a + CPU implementation function or an accelerator + implementation for offloaded target regions. + */ + struct starpu_codelet cl; + /** + Array of zero or more ::starpu_data_handle_t data handle to + be passed to the task region explicit tasks. + */ + starpu_data_handle_t *handles; + /** + Optional pointer to an inline argument to be passed to the + region implicit tasks. + */ + void *cl_arg; + /** + Size of the optional inline argument to be passed to the + region implicit tasks, or 0 if unused. + */ + size_t cl_arg_size; + /** + Boolean indicating whether the optional inline argument + should be automatically freed (true), or not (false). + */ + unsigned cl_arg_free; + int priority; + + /** + Boolean indicating whether the \b if clause of the + corresponding pragma omp task is true or false. + */ + int if_clause; + /** + Boolean indicating whether the \b final clause of the + corresponding pragma omp task is true or false. + */ + int final_clause; + + /** + Boolean indicating whether the \b untied clause of the + corresponding pragma omp task is true or false. + */ + int untied_clause; + /** + Boolean indicating whether the \b mergeable clause of the + corresponding pragma omp task is true or false. + */ + int mergeable_clause; + + /** + taskloop attribute + */ + int is_loop; + int nogroup_clause; + + int collapse; + int num_tasks; + unsigned long long nb_iterations; + unsigned long long grainsize; + unsigned long long begin_i; + unsigned long long end_i; + unsigned long long chunk; +}; + +#ifdef __cplusplus +extern "C" { +#define __STARPU_OMP_NOTHROW throw() +#else +#define __STARPU_OMP_NOTHROW __attribute__((__nothrow__)) +#endif + +/** + @name Initialisation + @{ +*/ + +/** + Initialize StarPU and its OpenMP Runtime support. See \ref OMPInitExit for more details. +*/ +extern int starpu_omp_init(void) __STARPU_OMP_NOTHROW; +/** + Shutdown StarPU and its OpenMP Runtime support. See \ref OMPInitExit for more details. +*/ +extern void starpu_omp_shutdown(void) __STARPU_OMP_NOTHROW; + +/** @} */ + +/** + @name Parallel + \anchor ORS_Parallel + @{ +*/ + +/** + Generate and launch an OpenMP parallel region and return after its + completion. \p attr specifies the attributes for the generated parallel region. + If this function is called from inside another, generating, parallel region, the + generated parallel region is nested within the generating parallel region. + + This function can be used to implement \#pragma omp parallel. + See \ref OMPParallel for more details. +*/ +extern void starpu_omp_parallel_region(const struct starpu_omp_parallel_region_attr *attr) __STARPU_OMP_NOTHROW; + +/** + Execute a function only on the master thread of the OpenMP + parallel region it is called from. When called from a thread that is not the + master of the parallel region it is called from, this function does nothing. \p + f is the function to be called. \p arg is an argument passed to function \p f. + + This function can be used to implement \#pragma omp master. + See \ref OMPSingle for more details. +*/ +extern void starpu_omp_master(void (*f)(void *arg), void *arg) __STARPU_OMP_NOTHROW; + +/** + Determine whether the calling thread is the master of the OpenMP parallel region + it is called from or not. + + This function can be used to implement \#pragma omp master without code + outlining. + \return !0 if called by the region's master thread. + \return 0 if not called by the region's master thread. + See \ref OMPSingle for more details. +*/ +extern int starpu_omp_master_inline(void) __STARPU_OMP_NOTHROW; + +/** @} */ + +/** + @name Synchronization + \anchor ORS_Synchronization + @{ +*/ + +/** + Wait until each participating thread of the innermost OpenMP parallel region + has reached the barrier and each explicit OpenMP task bound to this region has + completed its execution. + + This function can be used to implement \#pragma omp barrier. + See \ref OMPBarrier for more details. +*/ +extern void starpu_omp_barrier(void) __STARPU_OMP_NOTHROW; + +/** + Wait until no other thread is executing within the context of the selected + critical section, then proceeds to the exclusive execution of a function within + the critical section. \p f is the function to be executed in the critical + section. \p arg is an argument passed to function \p f. \p name is the name of + the selected critical section. If name == NULL, the selected critical + section is the unique anonymous critical section. + + This function can be used to implement \#pragma omp + critical. + + See \ref OMPCritical for more details. +*/ +extern void starpu_omp_critical(void (*f)(void *arg), void *arg, const char *name) __STARPU_OMP_NOTHROW; + +/** + Wait until execution can proceed exclusively within the context of the + selected critical section. \p name is the name of the selected critical + section. If name == NULL, the selected critical section is the unique + anonymous critical section. + + This function together with #starpu_omp_critical_inline_end can be used to + implement \#pragma omp critical without code outlining. + + See \ref OMPCritical for more details. +*/ +extern void starpu_omp_critical_inline_begin(const char *name) __STARPU_OMP_NOTHROW; + +/** + End the exclusive execution within the context of the selected critical + section. \p name is the name of the selected critical section. If + name==NULL, the selected critical section is the unique anonymous + critical section. + + This function together with #starpu_omp_critical_inline_begin can be used to + implement \#pragma omp critical without code outlining. + + See \ref OMPCritical for more details. +*/ +extern void starpu_omp_critical_inline_end(const char *name) __STARPU_OMP_NOTHROW; + +/** @} */ + +/** + @name Worksharing + \anchor ORS_Worksharing + @{ +*/ + +/** + Ensure that a single participating thread of the innermost OpenMP parallel + region executes a function. \p f is the function to be executed by a single + thread. \p arg is an argument passed to function \p f. \p nowait is a flag + indicating whether an implicit barrier is requested after the single section + (nowait==0) or not (nowait==!0). + + This function can be used to implement \#pragma omp single. + See \ref OMPSingle for more details. +*/ +extern void starpu_omp_single(void (*f)(void *arg), void *arg, int nowait) __STARPU_OMP_NOTHROW; + +/** + Decide whether the current thread is elected to run the following single + section among the participating threads of the innermost OpenMP parallel + region. + + This function can be used to implement \#pragma omp single without code + outlining. + \return !0 if the calling thread has won the election. + \return 0 if the calling thread has lost the election. + See \ref OMPSingle for more details. +*/ +extern int starpu_omp_single_inline(void) __STARPU_OMP_NOTHROW; + +/** + Execute \p f on a single task of the current parallel region + task, and then broadcast the contents of the memory block pointed by the + copyprivate pointer \p data and of size \p data_size to the corresponding \p + data pointed memory blocks of all the other participating region tasks. This + function can be used to implement \#pragma omp single with a copyprivate + clause. + + \sa starpu_omp_single_copyprivate_inline + \sa starpu_omp_single_copyprivate_inline_begin + \sa starpu_omp_single_copyprivate_inline_end + + See \ref OMPSingle for more details. +*/ +extern void starpu_omp_single_copyprivate(void (*f)(void *arg, void *data, unsigned long long data_size), void *arg, void *data, unsigned long long data_size) __STARPU_OMP_NOTHROW; + +/** + Elect one task among the tasks of the current parallel region + task to execute the following single section, and then broadcast the + copyprivate pointer \p data to all the other participating region tasks. This + function can be used to implement \#pragma omp single with a copyprivate + clause without code outlining. + + \sa starpu_omp_single_copyprivate_inline + \sa starpu_omp_single_copyprivate_inline_end + + See \ref OMPSingle for more details. +*/ +extern void *starpu_omp_single_copyprivate_inline_begin(void *data) __STARPU_OMP_NOTHROW; + +/** + Complete the execution of a single section and return the + broadcasted copyprivate pointer for tasks that lost the election and NULL for + the task that won the election. This function can be used to implement + \#pragma omp single with a copyprivate clause without code outlining. + + Return the copyprivate pointer for tasks that lost the election and therefore did not execute the code of the single section. + Return NULL for the task that won the election and executed the code of the single section. + + \sa starpu_omp_single_copyprivate_inline + \sa starpu_omp_single_copyprivate_inline_begin + + See \ref OMPSingle for more details. +*/ +extern void starpu_omp_single_copyprivate_inline_end(void) __STARPU_OMP_NOTHROW; + +/** + Execute a parallel loop together with the other threads participating to the + innermost parallel region. \p f is the function to be executed iteratively. \p + arg is an argument passed to function \p f. \p nb_iterations is the number of + iterations to be performed by the parallel loop. \p chunk is the number of + consecutive iterations that should be affected to the same thread when + scheduling the loop workshares, it follows the semantics of the \c modifier + argument in OpenMP \#pragma omp for specification. \p schedule is the + scheduling mode according to the OpenMP specification. \p ordered is a flag + indicating whether the loop region may contain an ordered section + (ordered==!0) or not (ordered==0). \p nowait is a flag + indicating whether an implicit barrier is requested after the for section + (nowait==0) or not (nowait==!0). + + The function \p f will be called with arguments \p _first_i, the first iteration + to perform, \p _nb_i, the number of consecutive iterations to perform before + returning, \p arg, the free \p arg argument. + + This function can be used to implement \#pragma omp for. + See \ref OMPFor for more details. +*/ +extern void starpu_omp_for(void (*f)(unsigned long long _first_i, unsigned long long _nb_i, void *arg), void *arg, unsigned long long nb_iterations, unsigned long long chunk, int schedule, int ordered, int nowait) __STARPU_OMP_NOTHROW; + +/** + Decide whether the current thread should start to execute a parallel loop + section. See #starpu_omp_for for the argument description. + + This function together with #starpu_omp_for_inline_next can be used to + implement \#pragma omp for without code outlining. + + \return !0 if the calling thread participates to the loop region and + should execute a first chunk of iterations. In that case, \p *_first_i will be + set to the first iteration of the chunk to perform and \p *_nb_i will be set to + the number of iterations of the chunk to perform. + + \return 0 if the calling thread does not participate to the loop region + because all the available iterations have been affected to the other threads of + the parallel region. + + \sa starpu_omp_for + + See \ref OMPFor for more details. +*/ +extern int starpu_omp_for_inline_first(unsigned long long nb_iterations, unsigned long long chunk, int schedule, int ordered, unsigned long long *_first_i, unsigned long long *_nb_i) __STARPU_OMP_NOTHROW; + +/** + Decide whether the current thread should continue to execute a parallel loop + section. See #starpu_omp_for for the argument description. + + This function together with #starpu_omp_for_inline_first can be used to + implement \#pragma omp for without code outlining. + + \return !0 if the calling thread should execute a next chunk of + iterations. In that case, \p *_first_i will be set to the first iteration of the + chunk to perform and \p *_nb_i will be set to the number of iterations of the + chunk to perform. + + \return 0 if the calling thread does not participate anymore to the loop + region because all the available iterations have been affected to the other + threads of the parallel region. + + \sa starpu_omp_for + + See \ref OMPFor for more details. +*/ +extern int starpu_omp_for_inline_next(unsigned long long nb_iterations, unsigned long long chunk, int schedule, int ordered, unsigned long long *_first_i, unsigned long long *_nb_i) __STARPU_OMP_NOTHROW; + +/** + Alternative implementation of a parallel loop. Differ from + #starpu_omp_for in the expected arguments of the loop function \c f. + + The function \p f will be called with arguments \p _begin_i, the first iteration + to perform, \p _end_i, the first iteration not to perform before + returning, \p arg, the free \p arg argument. + + This function can be used to implement \#pragma omp for. + + \sa starpu_omp_for + + See \ref OMPFor for more details. +*/ +extern void starpu_omp_for_alt(void (*f)(unsigned long long _begin_i, unsigned long long _end_i, void *arg), void *arg, unsigned long long nb_iterations, unsigned long long chunk, int schedule, int ordered, int nowait) __STARPU_OMP_NOTHROW; + +/** + Inline version of the alternative implementation of a parallel loop. + + This function together with #starpu_omp_for_inline_next_alt can be used to + implement \#pragma omp for without code outlining. + + \sa starpu_omp_for + \sa starpu_omp_for_alt + \sa starpu_omp_for_inline_first + + See \ref OMPFor for more details. +*/ +extern int starpu_omp_for_inline_first_alt(unsigned long long nb_iterations, unsigned long long chunk, int schedule, int ordered, unsigned long long *_begin_i, unsigned long long *_end_i) __STARPU_OMP_NOTHROW; + +/** + Inline version of the alternative implementation of a parallel loop. + + This function together with #starpu_omp_for_inline_first_alt can be used to + implement \#pragma omp for without code outlining. + + \sa starpu_omp_for + \sa starpu_omp_for_alt + \sa starpu_omp_for_inline_next + + See \ref OMPFor for more details. +*/ +extern int starpu_omp_for_inline_next_alt(unsigned long long nb_iterations, unsigned long long chunk, int schedule, int ordered, unsigned long long *_begin_i, unsigned long long *_end_i) __STARPU_OMP_NOTHROW; + +/** + Ensure that a function is sequentially executed once for each iteration in + order within a parallel loop, by the thread that own the iteration. \p f is the + function to be executed by the thread that own the current iteration. \p arg is + an argument passed to function \p f. + + This function can be used to implement \#pragma omp ordered. + + See \ref OMPFor for more details. +*/ +extern void starpu_omp_ordered(void (*f)(void *arg), void *arg) __STARPU_OMP_NOTHROW; + +/** + Wait until all the iterations of a parallel loop below the iteration owned by + the current thread have been executed. + + This function together with #starpu_omp_ordered_inline_end can be used to + implement \#pragma omp ordered without code code outlining. + + See \ref OMPFor for more details. +*/ +extern void starpu_omp_ordered_inline_begin(void) __STARPU_OMP_NOTHROW; + +/** + Notify that the ordered section for the current iteration has been completed. + + This function together with #starpu_omp_ordered_inline_begin can be used to + implement \#pragma omp ordered without code code outlining. + + See \ref OMPFor for more details. +*/ +extern void starpu_omp_ordered_inline_end(void) __STARPU_OMP_NOTHROW; + +/** + Ensure that each function of a given array of functions is executed by one and + only one thread. \p nb_sections is the number of functions in the array \p + section_f. \p section_f is the array of functions to be executed as sections. \p + section_arg is an array of arguments to be passed to the corresponding function. + \p nowait is a flag indicating whether an implicit barrier is requested after + the execution of all the sections (nowait==0) or not (nowait==!0). + + This function can be used to implement \#pragma omp sections and \#pragma omp section. + + See \ref OMPSections for more details. + */ +extern void starpu_omp_sections(unsigned long long nb_sections, void (**section_f)(void *arg), void **section_arg, int nowait) __STARPU_OMP_NOTHROW; + +/** + Alternative implementation of sections. Differ from + #starpu_omp_sections in that all the sections are combined within a single + function in this version. \p section_f is the function implementing the combined + sections. + + The function \p section_f will be called with arguments \p section_num, the + section number to be executed, \p arg, the entry of \p section_arg corresponding + to this section. + + This function can be used to implement \#pragma omp sections and \#pragma omp section. + + \sa starpu_omp_sections + + See \ref OMPSections for more details. + */ +extern void starpu_omp_sections_combined(unsigned long long nb_sections, void (*section_f)(unsigned long long section_num, void *arg), void *section_arg, int nowait) __STARPU_OMP_NOTHROW; + +/** @} */ + +/** + @name Task + \anchor ORS_Task + @{ +*/ + +/** + Generate an explicit child task. The execution of the generated task is + asynchronous with respect to the calling code unless specified otherwise. + \p attr specifies the attributes for the generated task region. + + This function can be used to implement \#pragma omp task. + + See \ref OMPTaskExplicit for more details. + */ +extern void starpu_omp_task_region(const struct starpu_omp_task_region_attr *attr) __STARPU_OMP_NOTHROW; + +/** + Wait for the completion of the tasks generated by the current task. This + function does not wait for the descendants of the tasks generated by the current + task. + + This function can be used to implement \#pragma omp taskwait. + + See \ref OMPTaskSyncs for more details. + */ +extern void starpu_omp_taskwait(void) __STARPU_OMP_NOTHROW; + +/** + Launch a function and wait for the completion of every descendant task + generated during the execution of the function. + + This function can be used to implement \#pragma omp taskgroup. + + \sa starpu_omp_taskgroup_inline_begin + \sa starpu_omp_taskgroup_inline_end + + See \ref OMPTaskSyncs for more details. + */ +extern void starpu_omp_taskgroup(void (*f)(void *arg), void *arg) __STARPU_OMP_NOTHROW; + +/** + Launch a function and gets ready to wait for the completion of every descendant task + generated during the dynamic scope of the taskgroup. + + This function can be used to implement \#pragma omp taskgroup without code outlining. + + \sa starpu_omp_taskgroup + \sa starpu_omp_taskgroup_inline_end + + See \ref OMPTaskSyncs for more details. + */ +extern void starpu_omp_taskgroup_inline_begin(void) __STARPU_OMP_NOTHROW; + +/** + Wait for the completion of every descendant task + generated during the dynamic scope of the taskgroup. + + This function can be used to implement \#pragma omp taskgroup without code outlining. + + \sa starpu_omp_taskgroup + \sa starpu_omp_taskgroup_inline_begin + + See \ref OMPTaskSyncs for more details. + */ +extern void starpu_omp_taskgroup_inline_end(void) __STARPU_OMP_NOTHROW; + +extern void starpu_omp_taskloop_inline_begin(struct starpu_omp_task_region_attr *attr) __STARPU_OMP_NOTHROW; + +extern void starpu_omp_taskloop_inline_end(const struct starpu_omp_task_region_attr *attr) __STARPU_OMP_NOTHROW; + +/** @} */ + +/** + @name API + \anchor ORS_API + @{ +*/ + +/** + Set ICVS nthreads_var for the parallel regions to be created + with the current region. + + Note: The StarPU OpenMP runtime support currently ignores + this setting for nested parallel regions. + + \sa starpu_omp_get_num_threads + \sa starpu_omp_get_thread_num + \sa starpu_omp_get_max_threads + \sa starpu_omp_get_num_procs + + See \ref OMPStandard for more details. +*/ +extern void starpu_omp_set_num_threads(int threads) __STARPU_OMP_NOTHROW; + +/** + Return the number of threads of the current region. + + \return the number of threads of the current region. + + \sa starpu_omp_set_num_threads + \sa starpu_omp_get_thread_num + \sa starpu_omp_get_max_threads + \sa starpu_omp_get_num_procs + + See \ref OMPStandard for more details. + */ +extern int starpu_omp_get_num_threads(void) __STARPU_OMP_NOTHROW; + +/** + Return the rank of the current thread among the threads + of the current region. + + \return the rank of the current thread in the current region. + + \sa starpu_omp_set_num_threads + \sa starpu_omp_get_num_threads + \sa starpu_omp_get_max_threads + \sa starpu_omp_get_num_procs + + See \ref OMPStandard for more details. + */ +extern int starpu_omp_get_thread_num(void) __STARPU_OMP_NOTHROW; + +/** + Return the maximum number of threads that can be used to + create a region from the current region. + + \return the maximum number of threads that can be used to create a region from the current region. + + \sa starpu_omp_set_num_threads + \sa starpu_omp_get_num_threads + \sa starpu_omp_get_thread_num + \sa starpu_omp_get_num_procs + + See \ref OMPStandard for more details. + */ +extern int starpu_omp_get_max_threads(void) __STARPU_OMP_NOTHROW; + +/** + Return the number of StarPU CPU workers. + + \return the number of StarPU CPU workers. + + \sa starpu_omp_set_num_threads + \sa starpu_omp_get_num_threads + \sa starpu_omp_get_thread_num + \sa starpu_omp_get_max_threads + + See \ref OMPStandard for more details. +*/ +extern int starpu_omp_get_num_procs(void) __STARPU_OMP_NOTHROW; + +/** + Return whether it is called from the scope of a parallel region or not. + + \return !0 if called from a parallel region scope. + \return 0 otherwise. + + See \ref OMPStandard for more details. +*/ +extern int starpu_omp_in_parallel(void) __STARPU_OMP_NOTHROW; + +/** + Enable (1) or disable (0) dynamically adjusting the number of parallel threads. + + Note: The StarPU OpenMP runtime support currently ignores the argument of this function. + + \sa starpu_omp_get_dynamic + + See \ref OMPStandard for more details. +*/ +extern void starpu_omp_set_dynamic(int dynamic_threads) __STARPU_OMP_NOTHROW; + +/** + Return the state of dynamic thread number adjustment. + + \return !0 if dynamic thread number adjustment is enabled. + \return 0 otherwise. + + \sa starpu_omp_set_dynamic + + See \ref OMPStandard for more details. +*/ +extern int starpu_omp_get_dynamic(void) __STARPU_OMP_NOTHROW; + +/** + Enable (1) or disable (0) nested parallel regions. + + Note: The StarPU OpenMP runtime support currently ignores the argument of this function. + + \sa starpu_omp_get_nested + \sa starpu_omp_get_max_active_levels + \sa starpu_omp_set_max_active_levels + \sa starpu_omp_get_level + \sa starpu_omp_get_active_level + + See \ref OMPStandard for more details. +*/ +extern void starpu_omp_set_nested(int nested) __STARPU_OMP_NOTHROW; + +/** + Return whether nested parallel sections are enabled or not. + + \return !0 if nested parallel sections are enabled. + \return 0 otherwise. + + \sa starpu_omp_set_nested + \sa starpu_omp_get_max_active_levels + \sa starpu_omp_set_max_active_levels + \sa starpu_omp_get_level + \sa starpu_omp_get_active_level + + See \ref OMPStandard for more details. +*/ +extern int starpu_omp_get_nested(void) __STARPU_OMP_NOTHROW; + +/** + Return the state of the cancel ICVS var. + + See \ref OMPStandard for more details. + */ +extern int starpu_omp_get_cancellation(void) __STARPU_OMP_NOTHROW; + +/** + Set the default scheduling kind for upcoming loops within the + current parallel section. \p kind is the scheduler kind, \p modifier + complements the scheduler kind with information such as the chunk size, + in accordance with the OpenMP specification. + + \sa starpu_omp_get_schedule + + See \ref OMPFor for more details. + */ +extern void starpu_omp_set_schedule(enum starpu_omp_sched_value kind, int modifier) __STARPU_OMP_NOTHROW; + +/** + Return the kind and the modifier of the current default loop scheduler. + + \sa starpu_omp_set_schedule + + See \ref OMPStandard for more details. +*/ +extern void starpu_omp_get_schedule(enum starpu_omp_sched_value *kind, int *modifier) __STARPU_OMP_NOTHROW; + +/** + Return the number of StarPU CPU workers. + + \return the number of StarPU CPU workers. + + See \ref OMPStandard for more details. +*/ +extern int starpu_omp_get_thread_limit(void) __STARPU_OMP_NOTHROW; + +/** + Set the maximum number of allowed active parallel section levels. + + Note: The StarPU OpenMP runtime support currently ignores the argument of this function and assume \p max_levels equals 1 instead. + + \sa starpu_omp_set_nested + \sa starpu_omp_get_nested + \sa starpu_omp_get_max_active_levels + \sa starpu_omp_get_level + \sa starpu_omp_get_active_level + + See \ref OMPStandard for more details. +*/ +extern void starpu_omp_set_max_active_levels(int max_levels) __STARPU_OMP_NOTHROW; + +/** + Return the current maximum number of allowed active parallel section levels + + \return the current maximum number of allowed active parallel section levels. + + \sa starpu_omp_set_nested + \sa starpu_omp_get_nested + \sa starpu_omp_set_max_active_levels + \sa starpu_omp_get_level + \sa starpu_omp_get_active_level + + See \ref OMPStandard for more details. +*/ +extern int starpu_omp_get_max_active_levels(void) __STARPU_OMP_NOTHROW; + +/** + Return the nesting level of the current parallel section. + + \return the nesting level of the current parallel section. + + \sa starpu_omp_set_nested + \sa starpu_omp_get_nested + \sa starpu_omp_get_max_active_levels + \sa starpu_omp_set_max_active_levels + \sa starpu_omp_get_active_level + + See \ref OMPStandard for more details. +*/ +extern int starpu_omp_get_level(void) __STARPU_OMP_NOTHROW; + +/** + Return the number of the ancestor of the current parallel section. + + \return the number of the ancestor of the current parallel section. + + See \ref OMPStandard for more details. +*/ +extern int starpu_omp_get_ancestor_thread_num(int level) __STARPU_OMP_NOTHROW; + +/** + Return the size of the team of the current parallel section. + + \return the size of the team of the current parallel section. + + See \ref OMPStandard for more details. +*/ +extern int starpu_omp_get_team_size(int level) __STARPU_OMP_NOTHROW; + +/** + Return the nestinglevel of the current innermost active parallel section. + + \return the nestinglevel of the current innermost active parallel section. + + \sa starpu_omp_set_nested + \sa starpu_omp_get_nested + \sa starpu_omp_get_max_active_levels + \sa starpu_omp_set_max_active_levels + \sa starpu_omp_get_level + + See \ref OMPStandard for more details. +*/ +extern int starpu_omp_get_active_level(void) __STARPU_OMP_NOTHROW; + +/** + Check whether the current task is final or not. + + \return !0 if called from a final task. + \return 0 otherwise. + + See \ref OMPStandard for more details. +*/ +extern int starpu_omp_in_final(void) __STARPU_OMP_NOTHROW; + +/** + Return the proc_bind setting of the current parallel region. + + \return the proc_bind setting of the current parallel region. + + See \ref OMPStandard for more details. +*/ +extern enum starpu_omp_proc_bind_value starpu_omp_get_proc_bind(void) __STARPU_OMP_NOTHROW; + +/** + Return the number of places available to the execution environment in the place list. + + \return the number of places available to the execution environment in the place list. + + See \ref OMPStandard for more details. +*/ +extern int starpu_omp_get_num_places(void) __STARPU_OMP_NOTHROW; + +/** + Return the number of processors available to the execution environment in the specified place. + + \return the number of processors available to the execution environment in the specified place. + + See \ref OMPStandard for more details. +*/ +extern int starpu_omp_get_place_num_procs(int place_num) __STARPU_OMP_NOTHROW; + +/** + Return the numerical identifiers of the processors available to the execution environment in the specified place. + + See \ref OMPStandard for more details. +*/ +extern void starpu_omp_get_place_proc_ids(int place_num, int *ids) __STARPU_OMP_NOTHROW; + +/** + Return the place number of the place to which the encountering thread is bound. + + \return the place number of the place to which the encountering thread is bound. + + See \ref OMPStandard for more details. +*/ +extern int starpu_omp_get_place_num(void) __STARPU_OMP_NOTHROW; + +/** + Return the number of places in the place partition of the innermost implicit task. + + \return the number of places in the place partition of the innermost implicit task. + + See \ref OMPStandard for more details. +*/ +extern int starpu_omp_get_partition_num_places(void) __STARPU_OMP_NOTHROW; + +/** + Return the list of place numbers corresponding to the places in the place-partition-var ICV of the innermost implicit task. + + See \ref OMPStandard for more details. +*/ +extern void starpu_omp_get_partition_place_nums(int *place_nums) __STARPU_OMP_NOTHROW; + +/** + Set the number of the device to use as default. + + Note: The StarPU OpenMP runtime support currently ignores the argument of this function. + + \sa starpu_omp_get_default_device + \sa starpu_omp_is_initial_device + + See \ref OMPStandard for more details. +*/ +extern void starpu_omp_set_default_device(int device_num) __STARPU_OMP_NOTHROW; + +/** + Return the number of the device used as default. + + \return the number of the device used as default. + + \sa starpu_omp_set_default_device + \sa starpu_omp_is_initial_device + + See \ref OMPStandard for more details. + */ +extern int starpu_omp_get_default_device(void) __STARPU_OMP_NOTHROW; + +/** + Return the number of the devices. + + \return the number of the devices. + + See \ref OMPStandard for more details. +*/ +extern int starpu_omp_get_num_devices(void) __STARPU_OMP_NOTHROW; + +/** + Return the number of teams in the current teams region. + + \return the number of teams in the current teams region. + + \sa starpu_omp_get_num_teams + + See \ref OMPStandard for more details. +*/ +extern int starpu_omp_get_num_teams(void) __STARPU_OMP_NOTHROW; + +/** + Return the team number of the calling thread. + + \return the team number of the calling thread. + + \sa starpu_omp_get_num_teams + + See \ref OMPStandard for more details. +*/ +extern int starpu_omp_get_team_num(void) __STARPU_OMP_NOTHROW; + +/** + Check whether the current device is the initial device or not. + + See \ref OMPStandard for more details. +*/ +extern int starpu_omp_is_initial_device(void) __STARPU_OMP_NOTHROW; + +/** + Return a device number that represents the host device. + + \return a device number that represents the host device. + + See \ref OMPStandard for more details. +*/ +extern int starpu_omp_get_initial_device(void) __STARPU_OMP_NOTHROW; + +/** + Return the maximum value that can be specified in the priority + clause. + + \return !0 if called from the host device. + \return 0 otherwise. + + \sa starpu_omp_set_default_device + \sa starpu_omp_get_default_device + + See \ref OMPStandard for more details. +*/ +extern int starpu_omp_get_max_task_priority(void) __STARPU_OMP_NOTHROW; + +/** + Initialize an opaque lock object. + + \sa starpu_omp_destroy_lock + \sa starpu_omp_set_lock + \sa starpu_omp_unset_lock + \sa starpu_omp_test_lock + + See \ref OMPSimpleLock for more details. +*/ +extern void starpu_omp_init_lock(starpu_omp_lock_t *lock) __STARPU_OMP_NOTHROW; + +/** + Destroy an opaque lock object. + + \sa starpu_omp_init_lock + \sa starpu_omp_set_lock + \sa starpu_omp_unset_lock + \sa starpu_omp_test_lock + + See \ref OMPSimpleLock for more details. +*/ +extern void starpu_omp_destroy_lock(starpu_omp_lock_t *lock) __STARPU_OMP_NOTHROW; + +/** + Lock an opaque lock object. If the lock is already locked, the + function will block until it succeeds in exclusively acquiring the lock. + + \sa starpu_omp_init_lock + \sa starpu_omp_destroy_lock + \sa starpu_omp_unset_lock + \sa starpu_omp_test_lock + + See \ref OMPSimpleLock for more details. +*/ +extern void starpu_omp_set_lock(starpu_omp_lock_t *lock) __STARPU_OMP_NOTHROW; + +/** + Unlock a previously locked lock object. The behaviour of this + function is unspecified if it is called on an unlocked lock object. + + \sa starpu_omp_init_lock + \sa starpu_omp_destroy_lock + \sa starpu_omp_set_lock + \sa starpu_omp_test_lock + + See \ref OMPSimpleLock for more details. +*/ +extern void starpu_omp_unset_lock(starpu_omp_lock_t *lock) __STARPU_OMP_NOTHROW; + +/** + Unblockingly attempt to lock a lock object and return whether + it succeeded or not. + + \return !0 if the function succeeded in acquiring the lock. + \return 0 if the lock was already locked. + + \sa starpu_omp_init_lock + \sa starpu_omp_destroy_lock + \sa starpu_omp_set_lock + \sa starpu_omp_unset_lock + + See \ref OMPSimpleLock for more details. +*/ +extern int starpu_omp_test_lock(starpu_omp_lock_t *lock) __STARPU_OMP_NOTHROW; + +/** + Initialize an opaque lock object supporting nested locking operations. + + \sa starpu_omp_destroy_nest_lock + \sa starpu_omp_set_nest_lock + \sa starpu_omp_unset_nest_lock + \sa starpu_omp_test_nest_lock + + See \ref OMPNestableLock for more details. +*/ +extern void starpu_omp_init_nest_lock(starpu_omp_nest_lock_t *lock) __STARPU_OMP_NOTHROW; + +/** + Destroy an opaque lock object supporting nested locking operations. + + \sa starpu_omp_init_nest_lock + \sa starpu_omp_set_nest_lock + \sa starpu_omp_unset_nest_lock + \sa starpu_omp_test_nest_lock + + See \ref OMPNestableLock for more details. +*/ +extern void starpu_omp_destroy_nest_lock(starpu_omp_nest_lock_t *lock) __STARPU_OMP_NOTHROW; + +/** + Lock an opaque lock object supporting nested locking operations. + If the lock is already locked by another task, the function will block until + it succeeds in exclusively acquiring the lock. If the lock is already taken by + the current task, the function will increase the nested locking level of the + lock object. + + \sa starpu_omp_init_nest_lock + \sa starpu_omp_destroy_nest_lock + \sa starpu_omp_unset_nest_lock + \sa starpu_omp_test_nest_lock + + See \ref OMPNestableLock for more details. +*/ +extern void starpu_omp_set_nest_lock(starpu_omp_nest_lock_t *lock) __STARPU_OMP_NOTHROW; + +/** + Unlock a previously locked lock object supporting nested locking + operations. If the lock has been locked multiple times in nested fashion, the + nested locking level is decreased and the lock remains locked. Otherwise, if + the lock has only been locked once, it becomes unlocked. The behaviour of this + function is unspecified if it is called on an unlocked lock object. The + behaviour of this function is unspecified if it is called from a different task + than the one that locked the lock object. + + \sa starpu_omp_init_nest_lock + \sa starpu_omp_destroy_nest_lock + \sa starpu_omp_set_nest_lock + \sa starpu_omp_test_nest_lock + + See \ref OMPNestableLock for more details. +*/ +extern void starpu_omp_unset_nest_lock(starpu_omp_nest_lock_t *lock) __STARPU_OMP_NOTHROW; + +/** + Unblocking attempt to lock an opaque lock object supporting + nested locking operations and returns whether it succeeded or not. If the lock + is already locked by another task, the function will return without having + acquired the lock. If the lock is already taken by the current task, the + function will increase the nested locking level of the lock object. + + \return !0 if the function succeeded in acquiring the lock. + \return 0 if the lock was already locked. + + \sa starpu_omp_init_nest_lock + \sa starpu_omp_destroy_nest_lock + \sa starpu_omp_set_nest_lock + \sa starpu_omp_unset_nest_lock + + See \ref OMPNestableLock for more details. +*/ +extern int starpu_omp_test_nest_lock(starpu_omp_nest_lock_t *lock) __STARPU_OMP_NOTHROW; + +/** + Implement the entry point of a fallback global atomic region. + Block until it succeeds in acquiring exclusive access to the global atomic + region. + + \sa starpu_omp_atomic_fallback_inline_end + */ +extern void starpu_omp_atomic_fallback_inline_begin(void) __STARPU_OMP_NOTHROW; + +/** + Implement the exit point of a fallback global atomic region. + Release the exclusive access to the global atomic region. + + \sa starpu_omp_atomic_fallback_inline_begin + */ +extern void starpu_omp_atomic_fallback_inline_end(void) __STARPU_OMP_NOTHROW; + +/** + Return the elapsed wallclock time in seconds. + + \return the elapsed wallclock time in seconds. + + \sa starpu_omp_get_wtick + + See \ref OMPStandard for more details. +*/ +extern double starpu_omp_get_wtime(void) __STARPU_OMP_NOTHROW; + +/** + Return the precision of the time used by \p starpu_omp_get_wtime(). + + \return the precision of the time used by \p starpu_omp_get_wtime(). + + \sa starpu_omp_get_wtime + + See \ref OMPStandard for more details. +*/ +extern double starpu_omp_get_wtick(void) __STARPU_OMP_NOTHROW; + +/** + Enable setting additional vector metadata needed by the OpenMP Runtime Support. + + \p handle is vector data handle. + \p slice_base is the base of an array slice, expressed in number of vector elements from the array base. + + \sa STARPU_VECTOR_GET_SLICE_BASE + */ +extern void starpu_omp_vector_annotate(starpu_data_handle_t handle, uint32_t slice_base) __STARPU_OMP_NOTHROW; + +/** + Only use internally by StarPU. + */ +extern struct starpu_arbiter *starpu_omp_get_default_arbiter(void) __STARPU_OMP_NOTHROW; + +/** + Register a handle for ptr->handle data lookup. + + \sa starpu_omp_handle_unregister + \sa starpu_omp_data_lookup + + See \ref OMPDataDependencies for more details. + */ +extern void starpu_omp_handle_register(starpu_data_handle_t handle) __STARPU_OMP_NOTHROW; + +/** + Unregister a handle from ptr->handle data lookup. + + \sa starpu_omp_handle_register + \sa starpu_omp_data_lookup + + See \ref OMPDataDependencies for more details. + */ +extern void starpu_omp_handle_unregister(starpu_data_handle_t handle) __STARPU_OMP_NOTHROW; + +/** + Return the handle corresponding to the data pointed to by the \p ptr host pointer. + + \return the handle or \c NULL if not found. + + See \ref OMPDataDependencies for more details. +*/ +extern starpu_data_handle_t starpu_omp_data_lookup(const void *ptr) __STARPU_OMP_NOTHROW; + +/** @} */ + +#ifdef __cplusplus +} +#endif + +#endif /* STARPU_USE_OPENMP && !STARPU_DONT_INCLUDE_OPENMP_HEADERS */ + +/** @} */ + +#endif /* __STARPU_OPENMP_H__ */ diff --git a/include/starpu_parallel_worker.h b/include/starpu_parallel_worker.h new file mode 100644 index 0000000..a3e0a1d --- /dev/null +++ b/include/starpu_parallel_worker.h @@ -0,0 +1,191 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __STARPU_PARALLEL_WORKERS_UTIL_H__ +#define __STARPU_PARALLEL_WORKERS_UTIL_H__ + +#include + +#ifdef STARPU_PARALLEL_WORKER +#ifdef STARPU_HAVE_HWLOC + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + @defgroup API_Parallel_Worker Parallel Workers + @{ + */ + +/** + Used when calling starpu_parallel_worker_init() + */ +#define STARPU_PARALLEL_WORKER_MIN_NB (1 << STARPU_MODE_SHIFT) + +/** + Used when calling starpu_parallel_worker_init() + */ +#define STARPU_PARALLEL_WORKER_MAX_NB (2 << STARPU_MODE_SHIFT) + +/** + Used when calling starpu_parallel_worker_init() + */ +#define STARPU_PARALLEL_WORKER_NB (3 << STARPU_MODE_SHIFT) + +/** + Used when calling starpu_parallel_worker_init() + */ +#define STARPU_PARALLEL_WORKER_PREFERE_MIN (4 << STARPU_MODE_SHIFT) + +/** + Used when calling starpu_parallel_worker_init() + */ +#define STARPU_PARALLEL_WORKER_KEEP_HOMOGENEOUS (5 << STARPU_MODE_SHIFT) + +/** + Used when calling starpu_parallel_worker_init() + */ +#define STARPU_PARALLEL_WORKER_POLICY_NAME (6 << STARPU_MODE_SHIFT) + +/** + Used when calling starpu_parallel_worker_init() + */ +#define STARPU_PARALLEL_WORKER_POLICY_STRUCT (7 << STARPU_MODE_SHIFT) + +/** + Used when calling starpu_parallel_worker_init() + */ +#define STARPU_PARALLEL_WORKER_CREATE_FUNC (8 << STARPU_MODE_SHIFT) + +/** + Used when calling starpu_parallel_worker_init() + */ +#define STARPU_PARALLEL_WORKER_CREATE_FUNC_ARG (9 << STARPU_MODE_SHIFT) +/** + Used when calling starpu_parallel_worker_init() + */ +#define STARPU_PARALLEL_WORKER_TYPE (10 << STARPU_MODE_SHIFT) + +/** + Used when calling starpu_parallel_worker_init() + */ +#define STARPU_PARALLEL_WORKER_AWAKE_WORKERS (11 << STARPU_MODE_SHIFT) + +/** + Used when calling starpu_parallel_worker_init() + */ +#define STARPU_PARALLEL_WORKER_PARTITION_ONE (12 << STARPU_MODE_SHIFT) + +/** + Used when calling starpu_parallel_worker_init() + */ +#define STARPU_PARALLEL_WORKER_NEW (13 << STARPU_MODE_SHIFT) + +/** + Used when calling starpu_parallel_worker_init() + */ +#define STARPU_PARALLEL_WORKER_NCORES (14 << STARPU_MODE_SHIFT) + +/** + These represent the default available functions to enforce parallel_worker + use by the sub-runtime +*/ +enum starpu_parallel_worker_types +{ + STARPU_PARALLEL_WORKER_OPENMP, /**< todo */ + STARPU_PARALLEL_WORKER_INTEL_OPENMP_MKL, /**< todo */ + STARPU_PARALLEL_WORKER_GNU_OPENMP_MKL, /**< todo */ +}; + +/** + Parallel_Worker configuration + */ +struct starpu_parallel_worker_config; + +/** + Create parallel_workers on the machine with the given parameters. + See \ref CreatingParallel for more details. + + This returns NULL if too many parallel workers were created. + The --enable-max-sched-ctxs configure option can be used to increase the limitation. + */ +struct starpu_parallel_worker_config *starpu_parallel_worker_init(hwloc_obj_type_t parallel_worker_level, ...); + +/** + Delete the given parallel_workers configuration + */ +int starpu_parallel_worker_shutdown(struct starpu_parallel_worker_config *parallel_workers); + +/** + Print the given parallel_workers configuration. + See \ref CreatingParallel for more details. + */ +int starpu_parallel_worker_print(struct starpu_parallel_worker_config *parallel_workers); + +/** Prologue functions */ +void starpu_parallel_worker_openmp_prologue(void *); +#define starpu_parallel_worker_intel_openmp_mkl_prologue starpu_parallel_worker_openmp_prologue +#ifdef STARPU_MKL +void starpu_parallel_worker_gnu_openmp_mkl_prologue(void *); +#endif /* STARPU_MKL */ + +#define STARPU_CLUSTER_MIN_NB STARPU_PARALLEL_WORKER_MIN_NB /**< @deprecated Use ::STARPU_PARALLEL_WORKER_MIN_NB */ +#define STARPU_CLUSTER_MAX_NB STARPU_PARALLEL_WORKER_MAX_NB /**< @deprecated Use ::STARPU_PARALLEL_WORKER_MAX_NB */ +#define STARPU_CLUSTER_NB STARPU_PARALLEL_WORKER_NB /**< @deprecated Use ::STARPU_PARALLEL_WORKER_NB */ +#define STARPU_CLUSTER_PREFERE_MIN STARPU_PARALLEL_WORKER_PREFERE_MIN /**< @deprecated Use ::STARPU_PARALLEL_WORKER_PREFERE_MIN */ +#define STARPU_CLUSTER_KEEP_HOMOGENEOUS STARPU_PARALLEL_WORKER_KEEP_HOMOGENEOUS /**< @deprecated Use ::STARPU_PARALLEL_WORKER_KEEP_HOMOGENEOUS */ +#define STARPU_CLUSTER_POLICY_NAME STARPU_PARALLEL_WORKER_POLICY_NAME /**< @deprecated Use ::STARPU_PARALLEL_WORKER_POLICY_NAME */ +#define STARPU_CLUSTER_POLICY_STRUCT STARPU_PARALLEL_WORKER_POLICY_STRUCT /**< @deprecated Use ::STARPU_PARALLEL_WORKER_POLICY_STRUCT */ +#define STARPU_CLUSTER_CREATE_FUNC STARPU_PARALLEL_WORKER_CREATE_FUNC /**< @deprecated Use ::STARPU_PARALLEL_WORKER_CREATE_FUNC */ +#define STARPU_CLUSTER_CREATE_FUNC_ARG STARPU_PARALLEL_WORKER_CREATE_FUNC_ARG /**< @deprecated Use ::STARPU_PARALLEL_WORKER_CREATE_FUNC_ARG */ +#define STARPU_CLUSTER_TYPE STARPU_PARALLEL_WORKER_TYPE /**< @deprecated Use ::STARPU_PARALLEL_WORKER_TYPE */ +#define STARPU_CLUSTER_AWAKE_WORKERS STARPU_PARALLEL_WORKER_AWAKE_WORKERS /**< @deprecated Use ::STARPU_PARALLEL_WORKER_AWAKE_WORKERS */ +#define STARPU_CLUSTER_PARTITION_ONE STARPU_PARALLEL_WORKER_PARTITION_ONE /**< @deprecated Use ::STARPU_PARALLEL_WORKER_PARTITION_ONE */ +#define STARPU_CLUSTER_NEW STARPU_PARALLEL_WORKER_NEW /**< @deprecated Use ::STARPU_PARALLEL_WORKER_NEW */ +#define STARPU_CLUSTER_NCORES STARPU_PARALLEL_WORKER_NCORES /**< @deprecated Use ::STARPU_PARALLEL_WORKER_NCORES */ + +/** + @deprecated Use ::starpu_parallel_worker_types +*/ +enum starpu_cluster_types +{ + STARPU_CLUSTER_OPENMP, /**< deprecated */ + STARPU_CLUSTER_INTEL_OPENMP_MKL, /**< deprecated */ +#ifdef STARPU_MKL + STARPU_CLUSTER_GNU_OPENMP_MKL, /**< deprecated */ +#endif +}; +/** @deprecated Use starpu_parallel_worker_config */ +struct starpu_cluster_machine; +/** @deprecated Use starpu_parallel_worker_init() */ +struct starpu_cluster_machine *starpu_cluster_machine(hwloc_obj_type_t cluster_level, ...) STARPU_DEPRECATED; +/** @deprecated Use starpu_parallel_worker_shutdown() */ +int starpu_uncluster_machine(struct starpu_cluster_machine *clusters) STARPU_DEPRECATED; +/** @deprecated Use starpu_parallel_worker_print() */ +int starpu_cluster_print(struct starpu_cluster_machine *clusters) STARPU_DEPRECATED; + +/** @} */ + +#ifdef __cplusplus +} +#endif +#endif +#endif + +#endif /* __STARPU_PARALLEL_WORKERS_UTIL_H__ */ diff --git a/include/starpu_perf_monitoring.h b/include/starpu_perf_monitoring.h new file mode 100644 index 0000000..4a0e27c --- /dev/null +++ b/include/starpu_perf_monitoring.h @@ -0,0 +1,268 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +#ifndef __STARPU_PERF_MONITORING_H__ +#define __STARPU_PERF_MONITORING_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + @defgroup API_Perf_Monitoring Performance Monitoring Counters + @brief API to access performance monitoring counters. + @{ +*/ + +/** + @name API + \anchor PM_API + @{ +*/ + +/** + Enum of all possible performance counter scopes. +*/ +enum starpu_perf_counter_scope +{ + starpu_perf_counter_scope_undefined = 0, /**< undefined scope */ + starpu_perf_counter_scope_global = 2, /**< global scope */ + starpu_perf_counter_scope_per_worker = 4, /**< per-worker scope */ + starpu_perf_counter_scope_per_codelet = 6 /**< per-codelet scope */ +}; + +/** + Enum of all possible performance counter value type. +*/ +enum starpu_perf_counter_type +{ + starpu_perf_counter_type_undefined = 0, /**< undefined value type */ + starpu_perf_counter_type_int32 = 1, /**< signed 32-bit integer value */ + starpu_perf_counter_type_int64 = 2, /**< signed 64-bit integer value */ + starpu_perf_counter_type_float = 3, /**< 32-bit single precision floating-point value */ + starpu_perf_counter_type_double = 4 /**< 64-bit double precision floating-point value */ +}; + +struct starpu_perf_counter_listener; +struct starpu_perf_counter_sample; +struct starpu_perf_counter_set; + +/** + Start collecting performance counter values. +*/ +void starpu_perf_counter_collection_start(void); + +/** + Stop collecting performance counter values. +*/ +void starpu_perf_counter_collection_stop(void); + +/** @} */ + +/** + @name Scope Related Routines + @{ +*/ + +/** + Translate scope name constant string to scope id. +*/ +int starpu_perf_counter_scope_name_to_id(const char *name); + +/** + Translate scope id to scope name constant string. +*/ +const char *starpu_perf_counter_scope_id_to_name(enum starpu_perf_counter_scope scope); + +/** @} */ + +/** + @name Type Related Routines + @{ +*/ + +/** + Translate type name constant string to type id. +*/ +int starpu_perf_counter_type_name_to_id(const char *name); + +/** + Translate type id to type name constant string. +*/ +const char *starpu_perf_counter_type_id_to_name(enum starpu_perf_counter_type type); + +/** @} */ + +/** + @name Counter Related Routines + @{ +*/ + +/** + Return the number of performance counters for the given scope. +*/ +int starpu_perf_counter_nb(enum starpu_perf_counter_scope scope); + +/** + Translate a performance counter name to its id. +*/ +int starpu_perf_counter_name_to_id(enum starpu_perf_counter_scope scope, const char *name); + +/** + Translate a performance counter rank in its scope to its counter id. +*/ +int starpu_perf_counter_nth_to_id(enum starpu_perf_counter_scope scope, int nth); + +/** + Translate a counter id to its name constant string. +*/ +const char *starpu_perf_counter_id_to_name(int id); + +/** + Return the counter's type id. +*/ +int starpu_perf_counter_get_type_id(int id); + +/** + Return the counter's help string. +*/ +const char *starpu_perf_counter_get_help_string(int id); + +/** @} */ + +/** + @name Listener Related Routines + @{ +*/ + +/** + Display the list of counters defined in the given scope. +*/ +void starpu_perf_counter_list_avail(enum starpu_perf_counter_scope scope); + +/** + Display the list of counters defined in all scopes. +*/ +void starpu_perf_counter_list_all_avail(void); + +/** + Allocate a new performance counter set. +*/ +struct starpu_perf_counter_set *starpu_perf_counter_set_alloc(enum starpu_perf_counter_scope scope); + +/** + Free a performance counter set. +*/ +void starpu_perf_counter_set_free(struct starpu_perf_counter_set *set); + +/** + Enable a given counter in the set. +*/ +void starpu_perf_counter_set_enable_id(struct starpu_perf_counter_set *set, int id); + +/** + Disable a given counter in the set. +*/ +void starpu_perf_counter_set_disable_id(struct starpu_perf_counter_set *set, int id); + +/** + Initialize a new performance counter listener. +*/ +struct starpu_perf_counter_listener *starpu_perf_counter_listener_init(struct starpu_perf_counter_set *set, void (*callback)(struct starpu_perf_counter_listener *listener, struct starpu_perf_counter_sample *sample, void *context), void *user_arg); + +/** + End a performance counter listener. +*/ +void starpu_perf_counter_listener_exit(struct starpu_perf_counter_listener *listener); + +/** + Set a listener for the global scope. +*/ +void starpu_perf_counter_set_global_listener(struct starpu_perf_counter_listener *listener); + +/** + Set a listener for the per_worker scope on a given worker. +*/ +void starpu_perf_counter_set_per_worker_listener(unsigned workerid, struct starpu_perf_counter_listener *listener); + +/** + Set a common listener for all workers. +*/ +void starpu_perf_counter_set_all_per_worker_listeners(struct starpu_perf_counter_listener *listener); + +/** + Set a per_codelet listener for a codelet. +*/ +void starpu_perf_counter_set_per_codelet_listener(struct starpu_codelet *cl, struct starpu_perf_counter_listener *listener); + +/** + Unset the global listener. +*/ +void starpu_perf_counter_unset_global_listener(void); + +/** + Unset the per_worker listener. +*/ +void starpu_perf_counter_unset_per_worker_listener(unsigned workerid); + +/** + Unset all per_worker listeners. +*/ +void starpu_perf_counter_unset_all_per_worker_listeners(void); + +/** + Unset a per_codelet listener. +*/ +void starpu_perf_counter_unset_per_codelet_listener(struct starpu_codelet *cl); + +/** @} */ + +/** + @name Sample Related Routines + @{ +*/ + +/** + Read an int32 counter value from a sample. +*/ +int32_t starpu_perf_counter_sample_get_int32_value(struct starpu_perf_counter_sample *sample, const int counter_id); + +/** + Read an int64 counter value from a sample. +*/ +int64_t starpu_perf_counter_sample_get_int64_value(struct starpu_perf_counter_sample *sample, const int counter_id); + +/** + Read a float counter value from a sample. +*/ +float starpu_perf_counter_sample_get_float_value(struct starpu_perf_counter_sample *sample, const int counter_id); + +/** + Read a double counter value from a sample. +*/ +double starpu_perf_counter_sample_get_double_value(struct starpu_perf_counter_sample *sample, const int counter_id); + +/** @} */ + +/** @} */ + +#ifdef __cplusplus +} +#endif + +#endif /* __STARPU_PERF_MONITORING_H__ */ diff --git a/include/starpu_perf_steering.h b/include/starpu_perf_steering.h new file mode 100644 index 0000000..0a3215e --- /dev/null +++ b/include/starpu_perf_steering.h @@ -0,0 +1,270 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +#ifndef __STARPU_PERF_STEERING_H__ +#define __STARPU_PERF_STEERING_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + @defgroup API_Perf_Steering Performance Steering Knobs + @brief API to access performance steering counters. + @{ +*/ + +/** + @name API + \anchor PM_API + @{ +*/ + +/** + Enum of all possible performance knob scopes. + */ +enum starpu_perf_knob_scope +{ + starpu_perf_knob_scope_undefined = 0, /**< undefined scope */ + starpu_perf_knob_scope_global = 1, /**< global scope */ + starpu_perf_knob_scope_per_worker = 3, /**< per-worker scope */ + starpu_perf_knob_scope_per_scheduler = 5 /**< per-scheduler scope */ +}; + +/** + Enum of all possible performance knob value type. + */ +enum starpu_perf_knob_type +{ + starpu_perf_knob_type_undefined = 0, /**< undefined value type */ + starpu_perf_knob_type_int32 = 1, /**< signed 32-bit integer value */ + starpu_perf_knob_type_int64 = 2, /**< signed 64-bit integer value */ + starpu_perf_knob_type_float = 3, /**< 32-bit single precision floating-point value */ + starpu_perf_knob_type_double = 4 /**< 64-bit double precision floating-point value */ +}; + +/** @} */ + +/** + @name Scope Related Routines + @{ +*/ + +/** + Translate scope name constant string to scope id. +*/ +int starpu_perf_knob_scope_name_to_id(const char *name); + +/** + Translate scope id to scope name constant string. +*/ +const char *starpu_perf_knob_scope_id_to_name(enum starpu_perf_knob_scope scope); + +/** @} */ + +/** + @name Type Related Routines + @{ +*/ + +/** + Translate type name constant string to type id. +*/ +int starpu_perf_knob_type_name_to_id(const char *name); + +/** + Translate type id to type name constant string. +*/ +const char *starpu_perf_knob_type_id_to_name(enum starpu_perf_knob_type type); + +/** @} */ + +/** + @name Performance Steering Knob Related Routines + @{ +*/ + +/** + Return the number of performance steering knobs for the given scope. +*/ +int starpu_perf_knob_nb(enum starpu_perf_knob_scope scope); + +/** + Translate a performance knob name to its id. +*/ +int starpu_perf_knob_name_to_id(enum starpu_perf_knob_scope scope, const char *name); + +/** + Translate a performance knob name to its id. +*/ +int starpu_perf_knob_nth_to_id(enum starpu_perf_knob_scope scope, int nth); + +/** + Translate a performance knob rank in its scope to its knob id. +*/ +const char *starpu_perf_knob_id_to_name(int id); + +/** + Translate a knob id to its name constant string. +*/ +int starpu_perf_knob_get_type_id(int id); + +/** + Return the knob's help string. +*/ +const char *starpu_perf_knob_get_help_string(int id); + +/** + Display the list of knobs defined in the given scope. +*/ +void starpu_perf_knob_list_avail(enum starpu_perf_knob_scope scope); + +/** + Display the list of knobs defined in all scopes. +*/ +void starpu_perf_knob_list_all_avail(void); + +/** + Get knob value for Global scope. +*/ +int32_t starpu_perf_knob_get_global_int32_value(const int knob_id); + +/** + Get knob value for Global scope. +*/ +int64_t starpu_perf_knob_get_global_int64_value(const int knob_id); + +/** + Get knob value for Global scope. +*/ +float starpu_perf_knob_get_global_float_value(const int knob_id); + +/** + Get knob value for Global scope. +*/ +double starpu_perf_knob_get_global_double_value(const int knob_id); + +/** + Set int32 knob value for Global scope. +*/ +void starpu_perf_knob_set_global_int32_value(const int knob_id, int32_t new_value); + +/** + Set int64 knob value for Global scope. +*/ +void starpu_perf_knob_set_global_int64_value(const int knob_id, int64_t new_value); + +/** + Set float knob value for Global scope. +*/ +void starpu_perf_knob_set_global_float_value(const int knob_id, float new_value); + +/** + Set double knob value for Global scope. +*/ +void starpu_perf_knob_set_global_double_value(const int knob_id, double new_value); + +/** + Get int32 value for Per_worker scope. +*/ +int32_t starpu_perf_knob_get_per_worker_int32_value(const int knob_id, unsigned workerid); + +/** + Get int64 value for Per_worker scope. +*/ +int64_t starpu_perf_knob_get_per_worker_int64_value(const int knob_id, unsigned workerid); + +/** + Get float value for Per_worker scope. +*/ +float starpu_perf_knob_get_per_worker_float_value(const int knob_id, unsigned workerid); + +/** + Get double value for Per_worker scope. +*/ +double starpu_perf_knob_get_per_worker_double_value(const int knob_id, unsigned workerid); + +/** + Set int32 value for Per_worker scope. +*/ +void starpu_perf_knob_set_per_worker_int32_value(const int knob_id, unsigned workerid, int32_t new_value); + +/** + Set int64 value for Per_worker scope. +*/ +void starpu_perf_knob_set_per_worker_int64_value(const int knob_id, unsigned workerid, int64_t new_value); + +/** + Set float value for Per_worker scope. +*/ +void starpu_perf_knob_set_per_worker_float_value(const int knob_id, unsigned workerid, float new_value); + +/** + Set double value for Per_worker scope. +*/ +void starpu_perf_knob_set_per_worker_double_value(const int knob_id, unsigned workerid, double new_value); + +/** + Get int32 value for per_scheduler scope. +*/ +int32_t starpu_perf_knob_get_per_scheduler_int32_value(const int knob_id, const char *sched_policy_name); + +/** + Get int64 value for per_scheduler scope. +*/ +int64_t starpu_perf_knob_get_per_scheduler_int64_value(const int knob_id, const char *sched_policy_name); + +/** + Get float value for per_scheduler scope. +*/ +float starpu_perf_knob_get_per_scheduler_float_value(const int knob_id, const char *sched_policy_name); + +/** + Get double value for per_scheduler scope. +*/ +double starpu_perf_knob_get_per_scheduler_double_value(const int knob_id, const char *sched_policy_name); + +/** + Set int32 value for per_scheduler scope. +*/ +void starpu_perf_knob_set_per_scheduler_int32_value(const int knob_id, const char *sched_policy_name, int32_t new_value); + +/** + Set int64 value for per_scheduler scope. +*/ +void starpu_perf_knob_set_per_scheduler_int64_value(const int knob_id, const char *sched_policy_name, int64_t new_value); + +/** + Set float value for per_scheduler scope. +*/ +void starpu_perf_knob_set_per_scheduler_float_value(const int knob_id, const char *sched_policy_name, float new_value); + +/** + Set double value for per_scheduler scope. +*/ +void starpu_perf_knob_set_per_scheduler_double_value(const int knob_id, const char *sched_policy_name, double new_value); + +/** @} */ + +/** @} */ + +#ifdef __cplusplus +} +#endif + +#endif /* __STARPU_PERF_STEERING_H__ */ diff --git a/include/starpu_perfmodel.h b/include/starpu_perfmodel.h new file mode 100644 index 0000000..9c73ee8 --- /dev/null +++ b/include/starpu_perfmodel.h @@ -0,0 +1,527 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Thibaut Lambert + * Copyright (C) 2011-2011 Télécom Sud Paris + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +#ifndef __STARPU_PERFMODEL_H__ +#define __STARPU_PERFMODEL_H__ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + @defgroup API_Performance_Model Performance Model + @{ +*/ + +struct starpu_task; +struct starpu_data_descr; + +/** + todo +*/ +struct starpu_perfmodel_device +{ + enum starpu_worker_archtype type; /**< type of the device */ + int devid; /**< identifier of the precise device */ + int ncores; /**< number of execution in parallel, minus 1 */ +}; + +/** + todo +*/ +struct starpu_perfmodel_arch +{ + int ndevices; /**< number of the devices for the given arch */ + struct starpu_perfmodel_device *devices; /**< list of the devices for the given arch */ +}; + +/** + todo +*/ +struct starpu_perfmodel_history_entry +{ + double mean; /**< mean_n = 1/n sum */ + double deviation; /**< n dev_n = sum2 - 1/n (sum)^2 */ + double sum; /**< sum of samples (in µs) */ + double sum2; /**< sum of samples^2 */ + unsigned nsample; /**< number of samples */ + unsigned nerror; + uint32_t footprint; /**< data footprint */ + size_t size; /**< in bytes */ + double flops; /**< Provided by the application */ + + double duration; + starpu_tag_t tag; + double *parameters; +}; + +/** + todo +*/ +struct starpu_perfmodel_history_list +{ + struct starpu_perfmodel_history_list *next; + struct starpu_perfmodel_history_entry *entry; +}; + +/** + todo +*/ +struct starpu_perfmodel_regression_model +{ + double sumlny; /**< sum of ln(measured) */ + + double sumlnx; /**< sum of ln(size) */ + double sumlnx2; /**< sum of ln(size)^2 */ + + unsigned long minx; /**< minimum size */ + unsigned long maxx; /**< maximum size */ + + double sumlnxlny; /**< sum of ln(size)*ln(measured) */ + + double alpha; /**< estimated = alpha * size ^ beta */ + double beta; /**< estimated = alpha * size ^ beta */ + unsigned valid; /**< whether the linear regression model is valid (i.e. enough measures) */ + + double a; /**< estimated = a size ^b + c */ + double b; /**< estimated = a size ^b + c */ + double c; /**< estimated = a size ^b + c */ + unsigned nl_valid; /**< whether the non-linear regression model is valid (i.e. enough measures) */ + + unsigned nsample; /**< number of sample values for non-linear regression */ + + double *coeff; /**< list of computed coefficients for multiple linear regression model */ + unsigned ncoeff; /**< number of coefficients for multiple linear regression model */ + unsigned multi_valid; /**< whether the multiple linear regression model is valid */ +}; + +struct starpu_perfmodel_history_table; + +#define starpu_per_arch_perfmodel starpu_perfmodel_per_arch STARPU_DEPRECATED + +typedef double (*starpu_perfmodel_per_arch_cost_function)(struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned nimpl); +typedef size_t (*starpu_perfmodel_per_arch_size_base)(struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned nimpl); + +/** + information about the performance model of a given arch. +*/ +struct starpu_perfmodel_per_arch +{ + /** + Used by ::STARPU_PER_ARCH, must point to functions which take a + task, the target arch and implementation number (as mere + conveniency, since the array is already indexed by these), and + must return a task duration estimation in micro-seconds. + */ + starpu_perfmodel_per_arch_cost_function cost_function; + /** + Same as in structure starpu_perfmodel, but per-arch, in case it + depends on the architecture-specific implementation. + */ + starpu_perfmodel_per_arch_size_base size_base; + + /** + \private + The history of performance measurements. + */ + struct starpu_perfmodel_history_table *history; + /** + \private + Used by ::STARPU_HISTORY_BASED, ::STARPU_NL_REGRESSION_BASED and + ::STARPU_MULTIPLE_REGRESSION_BASED, records all execution history + measures. + */ + struct starpu_perfmodel_history_list *list; + /** + \private + Used by ::STARPU_REGRESSION_BASED, ::STARPU_NL_REGRESSION_BASED + and ::STARPU_MULTIPLE_REGRESSION_BASED, contains the estimated + factors of the regression. + */ + struct starpu_perfmodel_regression_model regression; + + char debug_path[256]; +}; + +/** + todo +*/ +enum starpu_perfmodel_type +{ + STARPU_PERFMODEL_INVALID = 0, + STARPU_PER_WORKER, /**< Application-provided per-worker cost model function */ + STARPU_PER_ARCH, /**< Application-provided per-arch cost model function */ + STARPU_COMMON, /**< Application-provided common cost model function, with per-arch factor */ + STARPU_HISTORY_BASED, /**< Automatic history-based cost model */ + STARPU_REGRESSION_BASED, /**< Automatic linear regression-based cost model (alpha * size ^ beta) */ + STARPU_NL_REGRESSION_BASED, /**< Automatic non-linear regression-based cost model (a * size ^ b + c) */ + STARPU_MULTIPLE_REGRESSION_BASED /**< Automatic multiple linear regression-based cost model. Application + provides parameters, their combinations and exponents. */ +}; + +struct _starpu_perfmodel_state; +typedef struct _starpu_perfmodel_state *starpu_perfmodel_state_t; + +/** + Contain all information about a performance model. At least the + type and symbol fields have to be filled when defining a performance + model for a codelet. For compatibility, make sure to initialize the + whole structure to zero, either by using explicit memset, or by + letting the compiler implicitly do it in e.g. static storage case. If + not provided, other fields have to be zero. +*/ +struct starpu_perfmodel +{ + /** + type of performance model +
      +
    • + ::STARPU_HISTORY_BASED, ::STARPU_REGRESSION_BASED, + ::STARPU_NL_REGRESSION_BASED: No other fields needs to be + provided, this is purely history-based. +
    • +
    • + ::STARPU_MULTIPLE_REGRESSION_BASED: Need to provide fields + starpu_perfmodel::nparameters (number of different parameters), + starpu_perfmodel::ncombinations (number of parameters + combinations-tuples) and table starpu_perfmodel::combinations + which defines exponents of the equation. Function cl_perf_func + also needs to define how to extract parameters from the task. +
    • +
    • + ::STARPU_PER_ARCH: either field + starpu_perfmodel::arch_cost_function has to be filled with a + function that returns the cost in micro-seconds on the arch given + as parameter, or field starpu_perfmodel::per_arch has to be filled + with functions which return the cost in micro-seconds. +
    • +
    • + ::STARPU_COMMON: field starpu_perfmodel::cost_function has to be + filled with a function that returns the cost in micro-seconds on a + CPU, timing on other archs will be determined by multiplying by an + arch-specific factor. +
    • +
    + */ + enum starpu_perfmodel_type type; + + /** + Used by ::STARPU_COMMON. Take a task and implementation number, + and must return a task duration estimation in micro-seconds. + */ + double (*cost_function)(struct starpu_task *, unsigned nimpl); + /** + Used by ::STARPU_PER_ARCH. Take a task, an arch and implementation + number, and must return a task duration estimation in + micro-seconds on that arch. + */ + double (*arch_cost_function)(struct starpu_task *, struct starpu_perfmodel_arch *arch, unsigned nimpl); + /** + Used by ::STARPU_PER_WORKER. Take a task, a worker id and implementation + number, and must return a task duration estimation in + micro-seconds on that worker. + */ + double (*worker_cost_function)(struct starpu_task *, unsigned workerid, unsigned nimpl); + + /** + Used by ::STARPU_HISTORY_BASED, ::STARPU_REGRESSION_BASED and + ::STARPU_NL_REGRESSION_BASED. If not NULL, take a task and + implementation number, and return the size to be used as index to + distinguish histories and as a base for regressions. + */ + size_t (*size_base)(struct starpu_task *, unsigned nimpl); + /** + Used by ::STARPU_HISTORY_BASED. If not NULL, take a task + and return the footprint to be used as index to distinguish + histories. The default is to use the starpu_task_data_footprint() + function. + */ + uint32_t (*footprint)(struct starpu_task *); + + /** + symbol name for the performance model, which will be used as file + name to store the model. It must be set otherwise the model will + be ignored. + */ + const char *symbol; + + /** + name of the file storing the performance model. It is non + NULL if the model has been loaded or stored in a file. + */ + char *path; + + /** + \private + Whether the performance model is already loaded from the disk. + */ + unsigned is_loaded; + /** + \private + */ + unsigned benchmarking; + /** + \private + */ + unsigned is_init; + + void (*parameters)(struct starpu_task *task, double *parameters); + /** + \private + Names of parameters used for multiple linear regression models (M, + N, K) + */ + const char **parameters_names; + /** + \private + Number of parameters used for multiple linear regression models + */ + unsigned nparameters; + /** + \private + Table of combinations of parameters (and the exponents) used for + multiple linear regression models + */ + unsigned **combinations; + /** + \private + Number of combination of parameters used for multiple linear + regression models + */ + unsigned ncombinations; + /** + \private + */ + starpu_perfmodel_state_t state; +}; + +/** + Initialize the \p model performance model structure. This is automatically + called when e.g. submitting a task using a codelet using this performance model. +*/ +void starpu_perfmodel_init(struct starpu_perfmodel *model); + +/** + Deinitialize the \p model performance model structure. You need to call this + before deallocating the structure. You will probably want to call + starpu_perfmodel_unload_model() before calling this function, to save the perfmodel. +*/ +int starpu_perfmodel_deinit(struct starpu_perfmodel *model); + +/** + starpu_energy_start - start counting hardware events in an event set + + - \p workerid is the worker on which calibration is to be performed (in the case of GPUs, use -1 for CPUs) + - \p archi is the type of architecture on which calibration will be run + + See \ref MeasuringEnergyandPower for more details. +*/ +int starpu_energy_start(int workerid, enum starpu_worker_archtype archi); + +/** + starpu_energy_stop - stop counting hardware events in an event set + + - \p model is the energy performance model to be filled with the result + - \p task is a task specimen, so the performance model folds the result according to the parameter sizes of the task. + - \p nimpl is the implementation number run during calibration + - \p ntasks is the number of tasks run during calibration + - \p workerid is the worker on which calibration was performed (in the case of GPUs, use -1 for CPUs) + - \p archi is the type of architecture on which calibration was run + + See \ref MeasuringEnergyandPower for more details. +*/ +int starpu_energy_stop(struct starpu_perfmodel *model, struct starpu_task *task, unsigned nimpl, unsigned ntasks, int workerid, enum starpu_worker_archtype archi); + +/** + Load the performance model found in the file named \p filename. \p model has to be + completely zero, and will be filled with the information stored in the given file. +*/ +int starpu_perfmodel_load_file(const char *filename, struct starpu_perfmodel *model); + +/** + Load a given performance model. \p model has to be + completely zero, and will be filled with the information stored in + $STARPU_HOME/.starpu. The function is intended to be used by + external tools that want to read the performance model files. +*/ + +int starpu_perfmodel_load_symbol(const char *symbol, struct starpu_perfmodel *model); + +/** + Unload \p model which has been previously loaded + through the function starpu_perfmodel_load_symbol() +*/ +int starpu_perfmodel_unload_model(struct starpu_perfmodel *model); + +/** + Save the performance model in its file. +*/ +void starpu_save_history_based_model(struct starpu_perfmodel *model); + +/** + Fills \p path (supposed to be \p maxlen long) with the full path to the + performance model file for symbol \p symbol. This path can later on be used + for instance with starpu_perfmodel_load_file() . +*/ +void starpu_perfmodel_get_model_path(const char *symbol, char *path, size_t maxlen); + +/** + Dump performance model \p model to output stream \p output, in XML format. + See \ref PerformanceModelExample for more details. +*/ +void starpu_perfmodel_dump_xml(FILE *output, struct starpu_perfmodel *model); + +/** + Free internal memory used for sampling + management. It should only be called by an application which is not + calling starpu_shutdown() as this function already calls it. See for + example tools/starpu_perfmodel_display.c. +*/ +void starpu_perfmodel_free_sampling(void); + +/** + Return the architecture type of the worker \p workerid. +*/ +struct starpu_perfmodel_arch *starpu_worker_get_perf_archtype(int workerid, unsigned sched_ctx_id); + +int starpu_perfmodel_get_narch_combs(void); +int starpu_perfmodel_arch_comb_add(int ndevices, struct starpu_perfmodel_device *devices); +int starpu_perfmodel_arch_comb_get(int ndevices, struct starpu_perfmodel_device *devices); +struct starpu_perfmodel_arch *starpu_perfmodel_arch_comb_fetch(int comb); + +struct starpu_perfmodel_per_arch *starpu_perfmodel_get_model_per_arch(struct starpu_perfmodel *model, struct starpu_perfmodel_arch *arch, unsigned impl); +struct starpu_perfmodel_per_arch *starpu_perfmodel_get_model_per_devices(struct starpu_perfmodel *model, int impl, ...); + +int starpu_perfmodel_set_per_devices_cost_function(struct starpu_perfmodel *model, int impl, starpu_perfmodel_per_arch_cost_function func, ...); +int starpu_perfmodel_set_per_devices_size_base(struct starpu_perfmodel *model, int impl, starpu_perfmodel_per_arch_size_base func, ...); + +/** + Return the path to the debugging information for the performance model. +*/ +void starpu_perfmodel_debugfilepath(struct starpu_perfmodel *model, struct starpu_perfmodel_arch *arch, char *path, size_t maxlen, unsigned nimpl); + +const char *starpu_perfmodel_get_archtype_name(enum starpu_worker_archtype archtype); + +/** + Return the architecture name for \p arch +*/ +void starpu_perfmodel_get_arch_name(struct starpu_perfmodel_arch *arch, char *archname, size_t maxlen, unsigned nimpl); + +/** + Return the estimated time in µs of a task with the given model and the given footprint. +*/ +double starpu_perfmodel_history_based_expected_perf(struct starpu_perfmodel *model, struct starpu_perfmodel_arch *arch, uint32_t footprint); + +/** + If starpu_init() is not used, starpu_perfmodel_initialize() should be used called calling starpu_perfmodel_* functions. +*/ +void starpu_perfmodel_initialize(void); + +/** + Print a list of all performance models on \p output +*/ +int starpu_perfmodel_list(FILE *output); + +void starpu_perfmodel_print(struct starpu_perfmodel *model, struct starpu_perfmodel_arch *arch, unsigned nimpl, char *parameter, uint32_t *footprint, FILE *output); +int starpu_perfmodel_print_all(struct starpu_perfmodel *model, char *arch, char *parameter, uint32_t *footprint, FILE *output); +int starpu_perfmodel_print_estimations(struct starpu_perfmodel *model, uint32_t footprint, FILE *output); + +int starpu_perfmodel_list_combs(FILE *output, struct starpu_perfmodel *model); + +/** + Feed the performance model \p model with one explicit + measurement (in µs or J), in addition to measurements done by StarPU + itself. This can be useful when the application already has an + existing set of measurements done in good conditions, that StarPU + could benefit from instead of doing on-line measurements. An example + of use can be seen in \ref PerformanceModelExample. + + Note that this records only one measurement, and StarPU would ignore + the first measurement (since it is usually disturbed by library loading + etc.). Make sure to call this function several times to record all your + measurements. + + You can also call starpu_perfmodel_update_history_n() to directly provide an + average performed on several tasks. + + See \ref PerformanceModelCalibration for more details. +*/ +void starpu_perfmodel_update_history(struct starpu_perfmodel *model, struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned cpuid, unsigned nimpl, double measured); + +/** + Feed the performance model \p model with an explicit average measurement (in µs or J). + + This is similar to starpu_perfmodel_update_history(), but records a batch of + \p number measurements provided as the average of the measurements \p average_measured. +*/ +void starpu_perfmodel_update_history_n(struct starpu_perfmodel *model, struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned cpuid, unsigned nimpl, double average_measured, unsigned number); + +/** + Print the directory name storing performance models on \p output +*/ +void starpu_perfmodel_directory(FILE *output); + +/** + Print a matrix of bus bandwidths on \p f. +*/ +void starpu_bus_print_bandwidth(FILE *f); + +/** + Print the affinity devices on \p f. +*/ +void starpu_bus_print_affinity(FILE *f); + +/** + Print on \p f the name of the files containing the matrix of bus bandwidths, the affinity devices and the latency. +*/ +void starpu_bus_print_filenames(FILE *f); + +/** + Return the bandwidth of data transfer between two memory nodes. + See \ref SchedulingHelpers for more details. +*/ +double starpu_transfer_bandwidth(unsigned src_node, unsigned dst_node); + +/** + Return the latency of data transfer between two memory nodes. + See \ref SchedulingHelpers for more details. +*/ +double starpu_transfer_latency(unsigned src_node, unsigned dst_node); + +/** + Return the estimated time to transfer a given size between two memory nodes. + See \ref SchedulingHelpers for more details. +*/ +double starpu_transfer_predict(unsigned src_node, unsigned dst_node, size_t size); + +/** + Performance model which just always return 1µs. +*/ +extern struct starpu_perfmodel starpu_perfmodel_nop; + +/** @} */ + +#ifdef __cplusplus +} +#endif + +#endif /* __STARPU_PERFMODEL_H__ */ diff --git a/include/starpu_profiling.h b/include/starpu_profiling.h new file mode 100644 index 0000000..2a0292f --- /dev/null +++ b/include/starpu_profiling.h @@ -0,0 +1,393 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2020-2020 Federal University of Rio Grande do Sul (UFRGS) + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +#ifndef __STARPU_PROFILING_H__ +#define __STARPU_PROFILING_H__ + +#include +#include + +#include + +#ifdef STARPU_PAPI +#include +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/** + @defgroup API_Profiling Profiling + @{ +*/ + +/** + Used when calling the function starpu_profiling_status_set() to disable profiling. +*/ +#define STARPU_PROFILING_DISABLE 0 +/** + Used when calling the function starpu_profiling_status_set() to enable profiling. +*/ +#define STARPU_PROFILING_ENABLE 1 + +/** + Information about the execution of a task. It is accessible from + the field starpu_task::profiling_info if profiling was enabled. +*/ +struct starpu_profiling_task_info +{ + /** Date of task submission (relative to the initialization of StarPU). */ + struct timespec submit_time; + + /** Time when the task was submitted to the scheduler. */ + struct timespec push_start_time; + /** Time when the scheduler finished with the task submission. */ + struct timespec push_end_time; + /** Time when the scheduler started to be requested for a task, and eventually gave that task. */ + struct timespec pop_start_time; + /** Time when the scheduler finished providing the task for execution. */ + struct timespec pop_end_time; + + /** Time when the worker started fetching input data. */ + struct timespec acquire_data_start_time; + /** Time when the worker finished fetching input data. */ + struct timespec acquire_data_end_time; + + /** Date of task execution beginning (relative to the initialization of StarPU). */ + struct timespec start_time; + /** Date of task execution termination (relative to the initialization of StarPU). */ + struct timespec end_time; + + /** Time when the worker started releasing data. */ + struct timespec release_data_start_time; + /** Time when the worker finished releasing data. */ + struct timespec release_data_end_time; + + /** Time when the worker started the application callback for the task. */ + struct timespec callback_start_time; + /** Time when the worker finished the application callback for the task. */ + struct timespec callback_end_time; + + /* TODO add expected length, expected start/end ? */ + + /** Identifier of the worker which has executed the task. */ + int workerid; + + /** Number of cycles used by the task, only available in the MoviSim */ + uint64_t used_cycles; + /** Number of cycles stalled within the task, only available in the MoviSim */ + uint64_t stall_cycles; + /** Energy consumed by the task, in Joules */ + double energy_consumed; + +#ifdef STARPU_PAPI + /** PAPI Events **/ + long long int papi_values[PAPI_MAX_HWCTRS]; + int papi_event_set; +#endif +}; + +/** + Profiling information associated to a worker. The timing is + provided since the previous call to + starpu_profiling_worker_get_info(). + + The executing_time, callback_time, waiting_time, sleeping_time, and + scheduling_time are exclusive to each other, i.e. they can be added up, their + sum is smaller than total_time. The difference between total_time and the sum + is the uncategorized runtime overhead. +*/ +struct starpu_profiling_worker_info +{ + /** Starting date for the reported profiling measurements. */ + struct timespec start_time; + /** Duration of the profiling measurement interval. */ + struct timespec total_time; + + /** Time spent by the worker to execute tasks during the profiling measurement interval. */ + struct timespec executing_time; + /** Time spent by the worker to execute callbacks, while not executing a + * task, during the profiling measurement interval. */ + struct timespec callback_time; + /** Time spent by the worker waiting for a data transfer to finish, + * while not executing a task or a callback, during the profiling + * measurement interval. */ + struct timespec waiting_time; + /** Time spent idling by the worker because no task were available, and + * not executing a task or a callback or waiting for a data transfer to + * finish, during the profiling measurement interval. */ + struct timespec sleeping_time; + /** Time spent by the worker scheduling tasks, while not executing a + * task or a callback or waiting for a data transfer to finish, and there + * are tasks to be scheduled, during the profiling measurement interval. */ + struct timespec scheduling_time; + + /** Time spent by the worker to execute tasks during the profiling measurement interval. + * Normally always equal to executing_time. */ + struct timespec all_executing_time; + /** Time spent by the worker to execute callbacks during the profiling measurement interval. + * Normally always greater than callback_time. */ + struct timespec all_callback_time; + /** Time spent by the worker waiting for a data transfer to finish during the profiling measurement interval. + * Normally always greater than waiting_time. */ + struct timespec all_waiting_time; + /** Time spent idling by the worker because no task were available during the profiling measurement interval. + * Normally always greater than sleeping_time. */ + struct timespec all_sleeping_time; + /** Time spent by the worker scheduling tasks during the profiling measurement interval. + * Normally always greater than scheduling_time. */ + struct timespec all_scheduling_time; + + /** Number of tasks executed by the worker during the profiling measurement interval. */ + int executed_tasks; + + /** Number of cycles used by the worker, only available in the MoviSim */ + uint64_t used_cycles; + /** Number of cycles stalled within the worker, only available in the MoviSim */ + uint64_t stall_cycles; + /** Energy consumed by the worker, in Joules */ + double energy_consumed; + + /* TODO: add wasted time due to failed tasks */ + + double flops; +}; + +/** + todo +*/ +struct starpu_profiling_bus_info +{ + /** Time of bus profiling startup. */ + struct timespec start_time; + /** Total time of bus profiling. */ + struct timespec total_time; + /** Number of bytes transferred during profiling. */ + int long long transferred_bytes; + /** Number of transfers during profiling. */ + int transfer_count; +}; + +/** + Reset performance counters and enable profiling if the + environment variable \ref STARPU_PROFILING is set to a positive value. + See \ref EnablingOn-linePerformanceMonitoring for more details. +*/ +void starpu_profiling_init(void); + +/** + Set the ID used for profiling trace filename. Has to be called before starpu_init(). + See \ref TraceMpi for more details. +*/ +void starpu_profiling_set_id(int new_id); + +/** + Set the profiling status. Profiling is activated + by passing \ref STARPU_PROFILING_ENABLE in \p status. Passing + \ref STARPU_PROFILING_DISABLE disables profiling. Calling this function + resets all profiling measurements. When profiling is enabled, the + field starpu_task::profiling_info points to a valid structure + starpu_profiling_task_info containing information about the execution + of the task. Negative return values indicate an error, otherwise the + previous status is returned. + See \ref EnablingOn-linePerformanceMonitoring for more details. +*/ +int starpu_profiling_status_set(int status); + +/** + Return the current profiling status or a negative value in case + there was an error. + See \ref EnablingOn-linePerformanceMonitoring for more details. +*/ +int starpu_profiling_status_get(void); + +#ifdef BUILDING_STARPU +#include +#ifdef __GNUC__ +extern int _starpu_profiling; +#define starpu_profiling_status_get() ( \ + { \ + int __ret; \ + ANNOTATE_HAPPENS_AFTER(&_starpu_profiling); \ + __ret = _starpu_profiling; \ + ANNOTATE_HAPPENS_BEFORE(&_starpu_profiling); \ + __ret; \ + }) +#endif +#endif + +/** + Get the profiling info associated to the worker identified by + \p workerid, and reset the profiling measurements. If the argument \p + worker_info is NULL, only reset the counters associated to worker + \p workerid. Upon successful completion, this function returns 0. + Otherwise, a negative value is returned. + See \ref Per-workerFeedback for more details. +*/ +int starpu_profiling_worker_get_info(int workerid, struct starpu_profiling_worker_info *worker_info); + +/** + Return the number of buses in the machine. + See \ref HardwareTopology for more details. +*/ +int starpu_bus_get_count(void); + +/** + Return the identifier of the bus between \p src and \p dst. + See \ref HardwareTopology for more details. +*/ +int starpu_bus_get_id(int src, int dst); + +/** + Return the source point of bus \p busid. + See \ref HardwareTopology for more details. +*/ +int starpu_bus_get_src(int busid); + +/** + Return the destination point of bus \p busid. + See \ref HardwareTopology for more details. +*/ +int starpu_bus_get_dst(int busid); +/** + See \ref HardwareTopology for more details. +*/ +void starpu_bus_set_direct(int busid, int direct); +/** + See \ref HardwareTopology for more details. +*/ +int starpu_bus_get_direct(int busid); +/** + See \ref HardwareTopology for more details. +*/ +void starpu_bus_set_ngpus(int busid, int ngpus); +/** + See \ref HardwareTopology for more details. +*/ +int starpu_bus_get_ngpus(int busid); + +/** + See _starpu_profiling_bus_helper_display_summary in src/profiling/profiling_helpers.c for a usage example. + Note that calling starpu_bus_get_profiling_info() resets the counters to zero. + See \ref FeedBackFigures for more details. +*/ +int starpu_bus_get_profiling_info(int busid, struct starpu_profiling_bus_info *bus_info); + +/* Some helper functions to manipulate profiling API output */ +/* Reset timespec */ +static __starpu_inline void starpu_timespec_clear(struct timespec *tsp) +{ + tsp->tv_sec = 0; + tsp->tv_nsec = 0; +} + +#define STARPU_NS_PER_S 1000000000 + +/* Computes result = a + b */ +static __starpu_inline void starpu_timespec_add(struct timespec *a, + struct timespec *b, + struct timespec *result) +{ + result->tv_sec = a->tv_sec + b->tv_sec; + result->tv_nsec = a->tv_nsec + b->tv_nsec; + + if (result->tv_nsec >= STARPU_NS_PER_S) + { + ++(result)->tv_sec; + result->tv_nsec -= STARPU_NS_PER_S; + } +} + +/* Computes res += b */ +static __starpu_inline void starpu_timespec_accumulate(struct timespec *result, + struct timespec *a) +{ + result->tv_sec += a->tv_sec; + result->tv_nsec += a->tv_nsec; + + if (result->tv_nsec >= STARPU_NS_PER_S) + { + ++(result)->tv_sec; + result->tv_nsec -= STARPU_NS_PER_S; + } +} + +/* Computes result = a - b */ +static __starpu_inline void starpu_timespec_sub(const struct timespec *a, + const struct timespec *b, + struct timespec *result) +{ + result->tv_sec = a->tv_sec - b->tv_sec; + result->tv_nsec = a->tv_nsec - b->tv_nsec; + + if ((result)->tv_nsec < 0) + { + --(result)->tv_sec; + result->tv_nsec += STARPU_NS_PER_S; + } +} + +#define starpu_timespec_cmp(a, b, CMP) \ + (((a)->tv_sec == (b)->tv_sec) ? ((a)->tv_nsec CMP(b)->tv_nsec) : ((a)->tv_sec CMP(b)->tv_sec)) + +/** + Return the time elapsed between \p start and \p end in microseconds. + See \ref Per-taskFeedback for more details. +*/ +double starpu_timing_timespec_delay_us(struct timespec *start, struct timespec *end); + +/** + Convert the given timespec \p ts into microseconds. + See \ref Per-taskFeedback for more details. +*/ +double starpu_timing_timespec_to_us(struct timespec *ts); + +/** + Display statistics about the bus on \c stderr. if the environment + variable \ref STARPU_BUS_STATS is defined. The function is called + automatically by starpu_shutdown(). + See \ref DataStatistics for more details. +*/ +void starpu_profiling_bus_helper_display_summary(void); + +/** + Display statistic about the workers on \c stderr if the + environment variable \ref STARPU_WORKER_STATS is defined. The function is + called automatically by starpu_shutdown(). + See \ref DataStatistics for more details. +*/ +void starpu_profiling_worker_helper_display_summary(void); + +/** + Display statistics about the current data handles registered + within StarPU. StarPU must have been configured with the configure + option \ref enable-memory-stats "--enable-memory-stats" (see \ref + MemoryFeedback). + See \ref MemoryFeedback for more details. +*/ +void starpu_data_display_memory_stats(void); + +/** @} */ + +#ifdef __cplusplus +} +#endif + +#endif /* __STARPU_PROFILING_H__ */ diff --git a/include/starpu_profiling_tool.h b/include/starpu_profiling_tool.h new file mode 100644 index 0000000..264a2f1 --- /dev/null +++ b/include/starpu_profiling_tool.h @@ -0,0 +1,154 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2022-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2022,2023 École de Technologie Supérieure (ETS, Montréal) + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +#ifndef __STARPU_PROFILING_TOOL_H__ +#define __STARPU_PROFILING_TOOL_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + @defgroup API_Profiling_Tool Profiling Tool + @{ +*/ + +/** + Event type +*/ +enum starpu_prof_tool_event +{ + starpu_prof_tool_event_none = 0, + starpu_prof_tool_event_init, + starpu_prof_tool_event_terminate, + starpu_prof_tool_event_init_begin, + starpu_prof_tool_event_init_end, + + starpu_prof_tool_event_driver_init, + starpu_prof_tool_event_driver_deinit, + starpu_prof_tool_event_driver_init_start, + starpu_prof_tool_event_driver_init_end, + starpu_prof_tool_event_start_cpu_exec, + starpu_prof_tool_event_end_cpu_exec, + starpu_prof_tool_event_start_gpu_exec, + starpu_prof_tool_event_end_gpu_exec, + starpu_prof_tool_event_start_transfer, + starpu_prof_tool_event_end_transfer, + + starpu_prof_tool_event_user_start, + starpu_prof_tool_event_user_end +}; + +/** + todo +*/ +enum starpu_prof_tool_driver_type +{ + starpu_prof_tool_driver_cpu, + starpu_prof_tool_driver_gpu, + starpu_prof_tool_driver_hip, + starpu_prof_tool_driver_ocl +}; + +/** + todo +*/ +enum starpu_prof_tool_command +{ + starpu_prof_tool_command_reg = 0, + starpu_prof_tool_command_toggle = 1, + starpu_prof_tool_command_toggle_per_thread = 2 +}; + +/** + General information +*/ +struct starpu_prof_tool_info +{ + struct starpu_conf *conf; + enum starpu_prof_tool_event event_type; + unsigned int starpu_version[3]; + int thread_id; + int worker_id; + + int device_number; + enum starpu_prof_tool_driver_type driver_type; // not sure + + unsigned memnode; + unsigned bytes_to_transfer; + unsigned bytes_transfered; + + void* fun_ptr; /* NULL when not relevant (driver init etc) */ + + /* int valid_bytes; + int version; + starpu_device_t device_type; + int device_number; + starpu_ssize_t async; + starpu_ssize_t async_queue; + const char* src_file; + const char* func_name; + int line_no, end_line_no; + int func_line_no, func_end_line_no;*/ +}; + +/** + Event info +*/ +union starpu_prof_tool_event_info +{ + enum starpu_prof_tool_event event_type; + /* starpu_data_event_info data_event; + starpu_launch_event_info launch_event; + starpu_other_event_info other_event;*/ +}; + +/** + API info +*/ +struct starpu_prof_tool_api_info +{ + /*acc_device_api device_api; + int valid_bytes; + acc_device_t device_type; + int vendor; + const void* device_handle; + const void* context_handle; + const void* async_handle;*/ +}; + +typedef void (*starpu_prof_tool_cb_func)(struct starpu_prof_tool_info*, union starpu_prof_tool_event_info*, struct starpu_prof_tool_api_info*); + +/** + Register / unregister events +*/ +typedef void (*starpu_prof_tool_entry_register_func)(enum starpu_prof_tool_event event_type, starpu_prof_tool_cb_func cb, enum starpu_prof_tool_command info); + +/** + A function with this signature must be implemented by external tools that want to use the callbacks +*/ +typedef void (*starpu_prof_tool_entry_func)(starpu_prof_tool_entry_register_func reg, starpu_prof_tool_entry_register_func unreg); + +/** @} */ + +#ifdef __cplusplus +} +#endif + +#endif /* __STARPU_PROFILING_TOOL_H__ */ diff --git a/include/starpu_rand.h b/include/starpu_rand.h new file mode 100644 index 0000000..9a66ba2 --- /dev/null +++ b/include/starpu_rand.h @@ -0,0 +1,92 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __STARPU_RAND_H__ +#define __STARPU_RAND_H__ + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + @defgroup API_Random_Functions Random Functions + @{ + */ + +#ifdef STARPU_SIMGRID +/* In simgrid mode, force using seed 0 by default to get reproducible behavior by default */ +#define starpu_seed(seed) starpu_getenv_number_default("STARPU_RAND_SEED", 0) +#else +#define starpu_seed(seed) starpu_getenv_number_default("STARPU_RAND_SEED", (seed)) +#endif + +#ifdef STARPU_USE_DRAND48 +#define starpu_srand48(seed) srand48(starpu_seed(seed)) +#define starpu_drand48() drand48() +#define starpu_lrand48() lrand48() +#define starpu_erand48(xsubi) erand48(xsubi) +#ifdef STARPU_USE_ERAND48_R +typedef struct drand48_data starpu_drand48_data; +#define starpu_srand48_r(seed, buffer) srand48_r(starpu_seed(seed), buffer) +#define starpu_drand48_r(buffer, result) drand48_r(buffer, result) +#define starpu_lrand48_r(buffer, result) lrand48_r(buffer, result) +#define starpu_erand48_r(xsubi, buffer, result) erand48_r(xsubi, buffer, result) +#else +typedef int starpu_drand48_data; +#define starpu_srand48_r(seed, buffer) srand48(starpu_seed(seed)) +#define starpu_drand48_r(buffer, result) \ + do { \ + *(result) = drand48(); \ + } \ + while (0) +#define starpu_lrand48_r(buffer, result) \ + do { \ + *(result) = lrand48(); \ + } \ + while (0) +#define starpu_erand48_r(xsubi, buffer, result) \ + do { \ + (void)buffer; \ + *(result) = erand48(xsubi); \ + } \ + while (0) +#endif +#else +typedef int starpu_drand48_data; +#define starpu_srand48(seed) srand(starpu_seed(seed)) +#define starpu_drand48() (double)(rand()) / RAND_MAX +#define starpu_lrand48() rand() +#define starpu_erand48(xsubi) starpu_drand48() +#define starpu_srand48_r(seed, buffer) srand(starpu_seed(seed)) +#define starpu_erand48_r(xsubi, buffer, result) \ + do { \ + (void)xsubi; \ + (void)buffer; \ + *(result) = ((double)(rand()) / RAND_MAX); \ + } \ + while (0) +#endif + +/** @} */ + +#ifdef __cplusplus +} +#endif + +#endif /* __STARPU_RAND_H__ */ diff --git a/include/starpu_sched_component.h b/include/starpu_sched_component.h new file mode 100644 index 0000000..e3573b4 --- /dev/null +++ b/include/starpu_sched_component.h @@ -0,0 +1,887 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Simon Archipoff + * Copyright (C) 2017-2017 Arthur Chevalier + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +#ifndef __STARPU_SCHED_COMPONENT_H__ +#define __STARPU_SCHED_COMPONENT_H__ + +#ifdef STARPU_HAVE_HWLOC +#include +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/** + @defgroup API_Modularized_Scheduler Modularized Scheduler Interface + @{ +*/ + +/** + flags for starpu_sched_component::properties +*/ +enum starpu_sched_component_properties +{ + STARPU_SCHED_COMPONENT_HOMOGENEOUS = (1 << 0), /**< indicate that all workers have the same starpu_worker_archtype */ + STARPU_SCHED_COMPONENT_SINGLE_MEMORY_NODE = (1 << 1) /**< indicate that all workers have the same memory component */ +}; + +/** + indicate if component is homogeneous +*/ +#define STARPU_SCHED_COMPONENT_IS_HOMOGENEOUS(component) ((component)->properties & STARPU_SCHED_COMPONENT_HOMOGENEOUS) + +/** + indicate if all workers have the same memory component +*/ +#define STARPU_SCHED_COMPONENT_IS_SINGLE_MEMORY_NODE(component) ((component)->properties & STARPU_SCHED_COMPONENT_SINGLE_MEMORY_NODE) + +/** + Structure for a scheduler module. A scheduler is a + tree-like structure of them, some parts of scheduler can be shared by + several contexes to perform some local optimisations, so, for all + components, a list of parent is defined by \c sched_ctx_id. They + embed there specialised method in a pseudo object-style, so calls are + like component->push_task(component,task) +*/ +struct starpu_sched_component +{ + /** The tree containing the component*/ + struct starpu_sched_tree *tree; + /** set of underlying workers */ + struct starpu_bitmap workers; + /** + subset of starpu_sched_component::workers that is currently available in the context + The push method should take this value into account, it is set with: + component->workers UNION tree->workers UNION + component->child[i]->workers_in_ctx iff exist x such as component->children[i]->parents[x] == component + */ + struct starpu_bitmap workers_in_ctx; + /** private data */ + void *data; + char *name; + /** number of compoments's children */ + unsigned nchildren; + /** vector of component's children */ + struct starpu_sched_component **children; + /** number of component's parents */ + unsigned nparents; + /** vector of component's parents */ + struct starpu_sched_component **parents; + + /** add a child to component */ + void (*add_child)(struct starpu_sched_component *component, struct starpu_sched_component *child); + /** remove a child from component */ + void (*remove_child)(struct starpu_sched_component *component, struct starpu_sched_component *child); + void (*add_parent)(struct starpu_sched_component *component, struct starpu_sched_component *parent); + void (*remove_parent)(struct starpu_sched_component *component, struct starpu_sched_component *parent); + + /** + push a task in the scheduler module. this function is called to + push a task on component subtree, this can either perform a + recursive call on a child or store the task in the component, + then it will be returned by a further pull_task call. + the caller must ensure that component is able to execute task. + This method must either return 0 if it the task was properly stored or + passed over to a child component, or return a value different from 0 if the + task could not be consumed (e.g. the queue is full). + */ + int (*push_task)(struct starpu_sched_component *, struct starpu_task *); + + /** + pop a task from the scheduler module. this function is called by workers to get a task from their + parents. this function should first return a locally stored task + or perform a recursive call on the parents. + the task returned by this function should be executable by the caller + */ + struct starpu_task *(*pull_task)(struct starpu_sched_component *from, struct starpu_sched_component *to); + + /** + This function is called by a component which implements a queue, + allowing it to signify to its parents that an empty slot is + available in its queue. This should return 1 if some tasks could be pushed + The basic implementation of this function + is a recursive call to its parents, the user has to specify a + personally-made function to catch those calls. + */ + int (*can_push)(struct starpu_sched_component *from, struct starpu_sched_component *to); + + /** + This function allow a component to wake up a worker. It is + currently called by component which implements a queue, to + signify to its children that a task have been pushed in its local + queue, and is available to be popped by a worker, for example. + This should return 1 if some some container or worker could (or will) pull + some tasks. + The basic implementation of this function is a recursive call to + its children, until at least one worker have been woken up. + */ + int (*can_pull)(struct starpu_sched_component *component); + + /** + This function is called when starpu_do_schedule() is called by the application. + */ + void (*do_schedule)(struct starpu_sched_component *component); + + int (*notify)(struct starpu_sched_component *component, int message_ID, void *arg); + + /** + heuristic to compute load of scheduler module. Basically the number of tasks divided by the sum + of relatives speedup of workers available in context. + estimated_load(component) = sum(estimated_load(component_children)) + nb_local_tasks / average(relative_speedup(underlying_worker)) + */ + double (*estimated_load)(struct starpu_sched_component *component); + /** + return the time when a worker will enter in starvation. This function is relevant only if the task->predicted + member has been set. + */ + double (*estimated_end)(struct starpu_sched_component *component); + + /** + called by starpu_sched_component_destroy. Should free data allocated during creation + */ + void (*deinit_data)(struct starpu_sched_component *component); + + /** + this function is called for each component when workers are added or removed from a context + */ + void (*notify_change_workers)(struct starpu_sched_component *component); + int properties; + +#ifdef STARPU_HAVE_HWLOC + /** + the hwloc object associated to scheduler module. points to the + part of topology that is binded to this component, eg: a numa + node for a ws component that would balance load between + underlying sockets + */ + hwloc_obj_t obj; +#else + void *obj; +#endif +}; + +/** + The actual scheduler +*/ +struct starpu_sched_tree +{ + /** + entry module of the scheduler + */ + struct starpu_sched_component *root; + /** + set of workers available in this context, this value is used to mask workers in modules + */ + struct starpu_bitmap workers; + /** + context id of the scheduler + */ + unsigned sched_ctx_id; + /** + lock used to protect the scheduler, it is taken in read mode pushing a task and in write mode for adding or + removing workers + */ + starpu_pthread_mutex_t lock; +}; + +/** + @name Scheduling Tree API + @{ +*/ + +/** + create a empty initialized starpu_sched_tree. + See \ref ImplementAModularizedScheduler for more details. +*/ +struct starpu_sched_tree *starpu_sched_tree_create(unsigned sched_ctx_id) STARPU_ATTRIBUTE_MALLOC; +/** + destroy tree and free all non shared component in it. + See \ref ImplementAModularizedScheduler for more details. +*/ +void starpu_sched_tree_destroy(struct starpu_sched_tree *tree); +/** + calls starpu_sched_tree_destroy, ready for use for starpu_sched_policy::deinit_sched field. + See \ref ImplementAModularizedScheduler for more details. + */ +void starpu_sched_tree_deinitialize(unsigned sched_ctx_id); +/** + See \ref ImplementAModularizedScheduler for more details. + */ +struct starpu_sched_tree *starpu_sched_tree_get(unsigned sched_ctx_id); +/** + recursively set all starpu_sched_component::workers, do not take into account shared parts (except workers). + See \ref ImplementAModularizedScheduler for more details. +*/ +void starpu_sched_tree_update_workers(struct starpu_sched_tree *t); +/** + recursively set all starpu_sched_component::workers_in_ctx, do not take into account shared parts (except workers) + See \ref ImplementAModularizedScheduler for more details. +*/ +void starpu_sched_tree_update_workers_in_ctx(struct starpu_sched_tree *t); +/** + compatibility with starpu_sched_policy interface. + See \ref ImplementAModularizedScheduler for more details. +*/ +int starpu_sched_tree_push_task(struct starpu_task *task); +/** + compatibility with starpu_sched_policy interface. + See \ref ImplementAModularizedScheduler for more details. +*/ +struct starpu_task *starpu_sched_tree_pop_task(unsigned sched_ctx); + +/** + Push a task to a component. This is a helper for component->push_task(component, task) plus tracing. +*/ +int starpu_sched_component_push_task(struct starpu_sched_component *from, struct starpu_sched_component *to, struct starpu_task *task); + +/** + Pull a task from a component. This is a helper for component->pull_task(component) plus tracing. +*/ +struct starpu_task *starpu_sched_component_pull_task(struct starpu_sched_component *from, struct starpu_sched_component *to); + +struct starpu_task *starpu_sched_component_pump_to(struct starpu_sched_component *component, struct starpu_sched_component *to, int *success); +struct starpu_task *starpu_sched_component_pump_downstream(struct starpu_sched_component *component, int *success); +int starpu_sched_component_send_can_push_to_parents(struct starpu_sched_component *component); +/** + compatibility with starpu_sched_policy interface +*/ +void starpu_sched_tree_add_workers(unsigned sched_ctx_id, int *workerids, unsigned nworkers); +/** + compatibility with starpu_sched_policy interface +*/ +void starpu_sched_tree_remove_workers(unsigned sched_ctx_id, int *workerids, unsigned nworkers); + +/** + Run the do_schedule method of the components. This is a helper for starpu_sched_policy::do_schedule. +*/ +void starpu_sched_tree_do_schedule(unsigned sched_ctx_id); + +/** + Attach component \p child to parent \p parent. Some component may accept only one child, others accept several (e.g. MCT) +*/ +void starpu_sched_component_connect(struct starpu_sched_component *parent, struct starpu_sched_component *child); + +/** @} */ + +/** + @name Generic Scheduling Component API + @{ +*/ + +typedef struct starpu_sched_component *(*starpu_sched_component_create_t)(struct starpu_sched_tree *tree, void *data); + +/** + allocate and initialize component field with defaults values : + .pop_task make recursive call on father + .estimated_load compute relative speedup and tasks in sub tree + .estimated_end return the minimum of recursive call on children + .add_child is starpu_sched_component_add_child + .remove_child is starpu_sched_component_remove_child + .notify_change_workers does nothing + .deinit_data does nothing +*/ +struct starpu_sched_component *starpu_sched_component_create(struct starpu_sched_tree *tree, const char *name) STARPU_ATTRIBUTE_MALLOC; + +/** + free data allocated by starpu_sched_component_create and call component->deinit_data(component) + set to NULL the member starpu_sched_component::fathers[sched_ctx_id] of all child if its equal to \p component +*/ + +void starpu_sched_component_destroy(struct starpu_sched_component *component); +/** + recursively destroy non shared parts of a \p component 's tree +*/ +void starpu_sched_component_destroy_rec(struct starpu_sched_component *component); + +void starpu_sched_component_add_child(struct starpu_sched_component *component, struct starpu_sched_component *child); + +/** + return true iff \p component can execute \p task, this function take into account the workers available in the scheduling context +*/ +int starpu_sched_component_can_execute_task(struct starpu_sched_component *component, struct starpu_task *task); + +/** + return a non NULL value if \p component can execute \p task. + write the execution prediction length for the best implementation of the best worker available and write this at \p length address. + this result is more relevant if starpu_sched_component::is_homogeneous is non NULL. + if a worker need to be calibrated for an implementation, nan is set to \p length. +*/ +int STARPU_WARN_UNUSED_RESULT starpu_sched_component_execute_preds(struct starpu_sched_component *component, struct starpu_task *task, double *length); + +/** + return the average time to transfer \p task data to underlying \p component workers. +*/ +double starpu_sched_component_transfer_length(struct starpu_sched_component *component, struct starpu_task *task); + +void starpu_sched_component_prefetch_on_node(struct starpu_sched_component *component, struct starpu_task *task); + +/** @} */ + +/** + @name Worker Component API + @{ +*/ + +/** + return the struct starpu_sched_component corresponding to \p workerid. Undefined if \p workerid is not a valid workerid +*/ +struct starpu_sched_component *starpu_sched_component_worker_get(unsigned sched_ctx, int workerid); +struct starpu_sched_component *starpu_sched_component_worker_new(unsigned sched_ctx, int workerid); + +/** + Create a combined worker that pushes tasks in parallel to workers \p workers (size \p nworkers). +*/ +struct starpu_sched_component *starpu_sched_component_parallel_worker_create(struct starpu_sched_tree *tree, unsigned nworkers, unsigned *workers); + +/** + return the workerid of \p worker_component, undefined if starpu_sched_component_is_worker(worker_component) == 0 +*/ +int starpu_sched_component_worker_get_workerid(struct starpu_sched_component *worker_component); + +/** + return true iff \p component is a worker component +*/ +int starpu_sched_component_is_worker(struct starpu_sched_component *component); + +/** + return true iff \p component is a simple worker component +*/ +int starpu_sched_component_is_simple_worker(struct starpu_sched_component *component); + +/** + return true iff \p component is a combined worker component +*/ +int starpu_sched_component_is_combined_worker(struct starpu_sched_component *component); + +/** + compatibility with starpu_sched_policy interface + update predictions for workers +*/ +void starpu_sched_component_worker_pre_exec_hook(struct starpu_task *task, unsigned sched_ctx_id); + +/** + compatibility with starpu_sched_policy interface +*/ +void starpu_sched_component_worker_post_exec_hook(struct starpu_task *task, unsigned sched_ctx_id); + +/** @} */ + +/** + @name Flow-control Fifo Component API + These can be used as methods of components. Note: they are not to be called directly, one should really call the methods of the components. + @{ +*/ + +/** + default function for the pull component method, just call pull of parents until one of them returns a task +*/ +struct starpu_task *starpu_sched_component_parents_pull_task(struct starpu_sched_component *component, struct starpu_sched_component *to); + +/** + default function for the can_push component method, just call can_push of parents until one of them returns non-zero +*/ +int starpu_sched_component_can_push(struct starpu_sched_component *component, struct starpu_sched_component *to); + +/** +default function for the can_pull component method, just call can_pull of children until one of them returns non-zero +*/ +int starpu_sched_component_can_pull(struct starpu_sched_component *component); + +/** + function for the can_pull component method, call can_pull of all children +*/ +int starpu_sched_component_can_pull_all(struct starpu_sched_component *component); + +/** + default function for the estimated_load component method, just sum up the loads + of the children of the component. +*/ +double starpu_sched_component_estimated_load(struct starpu_sched_component *component); + +/** + function that can be used for the estimated_end component method, compute the minimum completion time of the children. +*/ +double starpu_sched_component_estimated_end_min(struct starpu_sched_component *component); + +/** + function that can be used for the estimated_end component method, compute + the minimum completion time of the children, and add to it an estimation of how + existing queued work, plus the exp_len work, can be completed. This is typically + used instead of starpu_sched_component_estimated_end_min when the component + contains a queue of tasks, which thus needs to be added to the estimations. +*/ +double starpu_sched_component_estimated_end_min_add(struct starpu_sched_component *component, double exp_len); + +/** + default function for the estimated_end component method, compute the average completion time of the children. +*/ +double starpu_sched_component_estimated_end_average(struct starpu_sched_component *component); + +/** + todo +*/ +struct starpu_sched_component_fifo_data +{ + unsigned ntasks_threshold; + double exp_len_threshold; + int ready; + int exp; +}; + +/** + Return a struct starpu_sched_component with a fifo. A stable sort is performed according to tasks priorities. + A push_task call on this component does not perform recursive calls, underlying components will have to call pop_task to get it. + starpu_sched_component::estimated_end function compute the estimated length by dividing the sequential length by the number of underlying workers. +*/ +struct starpu_sched_component *starpu_sched_component_fifo_create(struct starpu_sched_tree *tree, struct starpu_sched_component_fifo_data *fifo_data) STARPU_ATTRIBUTE_MALLOC; + +/** + return true iff \p component is a fifo component +*/ +int starpu_sched_component_is_fifo(struct starpu_sched_component *component); + +/** @} */ + +/** + @name Flow-control Prio Component API + @{ +*/ + +/** + todo +*/ +struct starpu_sched_component_prio_data +{ + unsigned ntasks_threshold; + double exp_len_threshold; + int ready; + int exp; +}; +struct starpu_sched_component *starpu_sched_component_prio_create(struct starpu_sched_tree *tree, struct starpu_sched_component_prio_data *prio_data) STARPU_ATTRIBUTE_MALLOC; +int starpu_sched_component_is_prio(struct starpu_sched_component *component); + +/** @} */ + +/** + @name Resource-mapping Work-Stealing Component API + @{ +*/ + +/** + return a component that perform a work stealing scheduling. Tasks are pushed in a round robin way. estimated_end return the average of expected length of fifos, starting at the average of the expected_end of his children. When a worker have to steal a task, it steal a task in a round robin way, and get the last pushed task of the higher priority. +*/ +struct starpu_sched_component *starpu_sched_component_work_stealing_create(struct starpu_sched_tree *tree, void *arg) STARPU_ATTRIBUTE_MALLOC; + +/** + return true iff \p component is a work stealing component + */ +int starpu_sched_component_is_work_stealing(struct starpu_sched_component *component); + +/** + undefined if there is no work stealing component in the scheduler. If any, \p task is pushed in a default way if the caller is the application, and in the caller's fifo if its a worker. +*/ +int starpu_sched_tree_work_stealing_push_task(struct starpu_task *task); + +/** @} */ + +/** + @name Resource-mapping Random Component API + @{ +*/ + +/** + create a component that perform a random scheduling +*/ +struct starpu_sched_component *starpu_sched_component_random_create(struct starpu_sched_tree *tree, void *arg) STARPU_ATTRIBUTE_MALLOC; + +/** + return true iff \p component is a random component +*/ +int starpu_sched_component_is_random(struct starpu_sched_component *); + +/** @} */ + +/** + @name Resource-mapping Eager Component API + @{ +*/ + +struct starpu_sched_component *starpu_sched_component_eager_create(struct starpu_sched_tree *tree, void *arg) STARPU_ATTRIBUTE_MALLOC; +int starpu_sched_component_is_eager(struct starpu_sched_component *); + +/** @} */ + +/** + @name Resource-mapping Eager Prio Component API + @{ +*/ + +struct starpu_sched_component *starpu_sched_component_eager_prio_create(struct starpu_sched_tree *tree, void *arg) STARPU_ATTRIBUTE_MALLOC; +int starpu_sched_component_is_eager_prio(struct starpu_sched_component *); + +/** @} */ + +/** + @name Resource-mapping Eager-Calibration Component API + @{ +*/ + +struct starpu_sched_component *starpu_sched_component_eager_calibration_create(struct starpu_sched_tree *tree, void *arg) STARPU_ATTRIBUTE_MALLOC; +int starpu_sched_component_is_eager_calibration(struct starpu_sched_component *); + +/** @} */ + +/** + @name Resource-mapping MCT Component API + @{ +*/ + +/** + todo +*/ +struct starpu_sched_component_mct_data +{ + double alpha; + double beta; + double _gamma; + double idle_power; + int nolock; +}; + +/** + create a component with mct_data parameters. the mct component does not + do anything but pushing tasks on no_perf_model_component and + calibrating_component +*/ +struct starpu_sched_component *starpu_sched_component_mct_create(struct starpu_sched_tree *tree, struct starpu_sched_component_mct_data *mct_data) STARPU_ATTRIBUTE_MALLOC; + +int starpu_sched_component_is_mct(struct starpu_sched_component *component); + +/** @} */ + +/** + @name Resource-mapping Heft Component API + @{ +*/ + +struct starpu_sched_component *starpu_sched_component_heft_create(struct starpu_sched_tree *tree, struct starpu_sched_component_mct_data *mct_data) STARPU_ATTRIBUTE_MALLOC; +int starpu_sched_component_is_heft(struct starpu_sched_component *component); + +/** @} */ + +/** + @name Resource-mapping Heteroprio Component API + @{ +*/ + +/** + todo +*/ +struct starpu_sched_component_heteroprio_data +{ + struct starpu_sched_component_mct_data *mct; + unsigned batch; +}; + +struct starpu_sched_component *starpu_sched_component_heteroprio_create(struct starpu_sched_tree *tree, struct starpu_sched_component_heteroprio_data *params) STARPU_ATTRIBUTE_MALLOC; +int starpu_sched_component_is_heteroprio(struct starpu_sched_component *component); + +/** @} */ + +/** + @name Special-purpose Best_Implementation Component API + @{ +*/ + +/** + Select the implementation that offer the shortest computation length for the first worker that can execute the task. + Or an implementation that need to be calibrated. + Also set starpu_task::predicted and starpu_task::predicted_transfer for memory component of the first suitable workerid. + If starpu_sched_component::push method is called and starpu_sched_component::nchild > 1 the result is undefined. +*/ +struct starpu_sched_component *starpu_sched_component_best_implementation_create(struct starpu_sched_tree *tree, void *arg) STARPU_ATTRIBUTE_MALLOC; + +/** @} */ + +/** + @name Special-purpose Perfmodel_Select Component API + @{ +*/ + +/** + todo +*/ +struct starpu_sched_component_perfmodel_select_data +{ + struct starpu_sched_component *calibrator_component; + struct starpu_sched_component *no_perfmodel_component; + struct starpu_sched_component *perfmodel_component; +}; +struct starpu_sched_component *starpu_sched_component_perfmodel_select_create(struct starpu_sched_tree *tree, struct starpu_sched_component_perfmodel_select_data *perfmodel_select_data) STARPU_ATTRIBUTE_MALLOC; +int starpu_sched_component_is_perfmodel_select(struct starpu_sched_component *component); + +/** @} */ + +/** + @name Staged pull Component API + @{ +*/ + +struct starpu_sched_component *starpu_sched_component_stage_create(struct starpu_sched_tree *tree, void *arg) STARPU_ATTRIBUTE_MALLOC; +int starpu_sched_component_is_stage(struct starpu_sched_component *component); + +/** @} */ + +/** + @name User-choice push Component API + @{ +*/ + +struct starpu_sched_component *starpu_sched_component_userchoice_create(struct starpu_sched_tree *tree, void *arg) STARPU_ATTRIBUTE_MALLOC; +int starpu_sched_component_is_userchoice(struct starpu_sched_component *component); + +/** @} */ + +/** + @name Recipe Component API + @{ +*/ + +/** + parameters for starpu_sched_component_composed_component_create +*/ +struct starpu_sched_component_composed_recipe; + +/** + return an empty recipe for a composed component, it should not be used without modification. + See \ref ImplementAModularizedScheduler for more details. +*/ +struct starpu_sched_component_composed_recipe *starpu_sched_component_composed_recipe_create(void) STARPU_ATTRIBUTE_MALLOC; + +/** + return a recipe to build a composed component with a \p create_component +*/ +struct starpu_sched_component_composed_recipe *starpu_sched_component_composed_recipe_create_singleton(struct starpu_sched_component *(*create_component)(struct starpu_sched_tree *tree, void *arg), void *arg) STARPU_ATTRIBUTE_MALLOC; + +/** + add \p create_component under all previous components in recipe +*/ +void starpu_sched_component_composed_recipe_add(struct starpu_sched_component_composed_recipe *recipe, struct starpu_sched_component *(*create_component)(struct starpu_sched_tree *tree, void *arg), void *arg); + +/** + destroy composed_sched_component, this should be done after starpu_sched_component_composed_component_create was called +*/ +void starpu_sched_component_composed_recipe_destroy(struct starpu_sched_component_composed_recipe *); + +/** + create a component that behave as all component of recipe where linked. Except that you can not use starpu_sched_component_is_foo function + if recipe contain a single create_foo arg_foo pair, create_foo(arg_foo) is returned instead of a composed component +*/ +struct starpu_sched_component *starpu_sched_component_composed_component_create(struct starpu_sched_tree *tree, struct starpu_sched_component_composed_recipe *recipe) STARPU_ATTRIBUTE_MALLOC; + +#ifdef STARPU_HAVE_HWLOC +/** + Define how build a scheduler according to topology. Each level (except for hwloc_machine_composed_sched_component) can be NULL, then + the level is just skipped. Bugs everywhere, do not rely on. +*/ +struct starpu_sched_component_specs +{ + /** + the composed component to put on the top of the scheduler + this member must not be NULL as it is the root of the topology + */ + struct starpu_sched_component_composed_recipe *hwloc_machine_composed_sched_component; + /** + the composed component to put for each memory component + */ + struct starpu_sched_component_composed_recipe *hwloc_component_composed_sched_component; + /** + the composed component to put for each socket + */ + struct starpu_sched_component_composed_recipe *hwloc_socket_composed_sched_component; + /** + the composed component to put for each cache + */ + struct starpu_sched_component_composed_recipe *hwloc_cache_composed_sched_component; + + /** + a function that return a starpu_sched_component_composed_recipe to put on top of a worker of type \p archtype. + NULL is a valid return value, then no component will be added on top + */ + struct starpu_sched_component_composed_recipe *(*worker_composed_sched_component)(enum starpu_worker_archtype archtype); + /** + this flag is a dirty hack because of the poor expressivity of this interface. As example, if you want to build + a heft component with a fifo component per numa component, and you also have GPUs, if this flag is set, GPUs will share those fifos. + If this flag is not set, a new fifo will be built for each of them (if they have the same starpu_perf_arch and the same + numa component it will be shared. it indicates if heterogeneous workers should be brothers or cousins, as example, if a gpu and a cpu should share or not there numa node + */ + int mix_heterogeneous_workers; +}; + +/** + build a scheduler for \p sched_ctx_id according to \p s and the hwloc topology of the machine. +*/ +struct starpu_sched_tree *starpu_sched_component_make_scheduler(unsigned sched_ctx_id, struct starpu_sched_component_specs s); +#endif /* STARPU_HAVE_HWLOC */ + +/** + @name Basic API + @{ +*/ + +#define STARPU_SCHED_SIMPLE_DECIDE_MASK (3 << 0) + +/** + Request to create downstream queues per worker, i.e. the scheduling decision-making component will choose exactly which workers tasks should got to. +*/ +#define STARPU_SCHED_SIMPLE_DECIDE_WORKERS (1 << 0) + +/** + Request to create downstream queues per memory nodes, i.e. the scheduling decision-making component will choose which memory node tasks will go to. +*/ +#define STARPU_SCHED_SIMPLE_DECIDE_MEMNODES (2 << 0) + +/** + Request to create downstream queues per computation arch, i.e. the scheduling decision-making component will choose whether tasks go to CPUs, or CUDA, or OpenCL, etc. +*/ +#define STARPU_SCHED_SIMPLE_DECIDE_ARCHS (3 << 0) + +/** + Request to create the scheduling decision-making component even if there is only one available choice. This is useful for instance when the decision-making component will store tasks itself (and not use STARPU_SCHED_SIMPLE_FIFO_ABOVE) to decide in which order tasks should be passed below. +*/ +#define STARPU_SCHED_SIMPLE_DECIDE_ALWAYS (1 << 3) + +/** + Request to add a perfmodel selector above the scheduling decision-making component. That way, only tasks with a calibrated performance model will be given to the component, other tasks will go to an eager branch that will distributed tasks so that their performance models will get calibrated. + In other words, this is needed when using a component which needs performance models for tasks. +*/ +#define STARPU_SCHED_SIMPLE_PERFMODEL (1 << 4) + +/** + Request that a component be added just above workers, that chooses the best task implementation. +*/ +#define STARPU_SCHED_SIMPLE_IMPL (1 << 5) + +/** + Request to create a fifo above the scheduling decision-making component, otherwise tasks will be pushed directly to the component. + + This is useful to store tasks if there is a fifo below which limits the number of tasks to be scheduld in advance. The scheduling decision-making component can also store tasks itself, in which case this flag is not useful. +*/ +#define STARPU_SCHED_SIMPLE_FIFO_ABOVE (1 << 6) + +/** + Request that the fifo above be sorted by priorities +*/ +#define STARPU_SCHED_SIMPLE_FIFO_ABOVE_PRIO (1 << 7) + +/** + Request to create fifos below the scheduling decision-making component, otherwise tasks will be pulled directly from workers. + + This is useful to be able to schedule a (tunable) small number of tasks in advance only. +*/ +#define STARPU_SCHED_SIMPLE_FIFOS_BELOW (1 << 8) + +/** + Request that the fifos below be sorted by priorities +*/ +#define STARPU_SCHED_SIMPLE_FIFOS_BELOW_PRIO (1 << 9) + +/** + Request that the fifos below be pulled rather ready tasks +*/ +#define STARPU_SCHED_SIMPLE_FIFOS_BELOW_READY (1 << 10) + +/** + Request that the fifos below have no size limit +*/ +#define STARPU_SCHED_SIMPLE_FIFOS_BELOW_NOLIMIT (1 << 16) + +/** + Request that work between workers using the same fifo below be distributed using a work stealing component. +*/ +#define STARPU_SCHED_SIMPLE_WS_BELOW (1 << 11) + +/** + Request to not only choose between simple workers, but also choose between combined workers. +*/ +#define STARPU_SCHED_SIMPLE_COMBINED_WORKERS (1 << 12) + +/** + Request that the fifos below keep track of expected duration, start and end time of theirs elements +*/ +#define STARPU_SCHED_SIMPLE_FIFOS_BELOW_EXP (1 << 13) + +/** + Request to prepend a component before the decision component. This should be + used alone and followed by the component creation function pointer and its + data. +*/ +#define STARPU_SCHED_SIMPLE_PRE_DECISION (1 << 14) + +/** + Create a simple modular scheduler tree around a scheduling decision-making + component \p component. The details of what should be built around \p component + is described by \p flags. The different STARPU_SCHED_SIMPL_DECIDE_* flags are + mutually exclusive. \p data is passed to the \p create_decision_component + function when creating the decision component. + See \ref ImplementAModularizedScheduler for more details. +*/ +void starpu_sched_component_initialize_simple_scheduler(starpu_sched_component_create_t create_decision_component, void *data, unsigned flags, unsigned sched_ctx_id); + +/** + Create a simple modular scheduler tree around several scheduling decision-making + components. The parameters are similar to + starpu_sched_component_initialize_simple_scheduler, but per scheduling decision, for instance: + + starpu_sched_component_initialize_simple_schedulers(sched_ctx_id, 2, + create1, data1, flags1, + create2, data2, flags2); + + The different flags parameters must be coherent: same decision flags. They + must not include the perfmodel flag (not supported yet). +*/ +void starpu_sched_component_initialize_simple_schedulers(unsigned sched_ctx_id, unsigned ndecisions, ...); + +/** @} */ + +#define STARPU_COMPONENT_MUTEX_LOCK(m) \ + do \ + { \ + const int _relaxed_state = starpu_worker_get_relax_state(); \ + if (!_relaxed_state) \ + starpu_worker_relax_on(); \ + STARPU_PTHREAD_MUTEX_LOCK((m)); \ + if (!_relaxed_state) \ + starpu_worker_relax_off(); \ + } \ + while (0) + +#define STARPU_COMPONENT_MUTEX_TRYLOCK(m) STARPU_PTHREAD_MUTEX_TRYLOCK((m)) + +#define STARPU_COMPONENT_MUTEX_UNLOCK(m) STARPU_PTHREAD_MUTEX_UNLOCK((m)) + +/** @} */ + +#ifdef __cplusplus +} +#endif + +#endif /* __STARPU_SCHED_COMPONENT_H__ */ diff --git a/include/starpu_sched_ctx.h b/include/starpu_sched_ctx.h new file mode 100644 index 0000000..c4502bc --- /dev/null +++ b/include/starpu_sched_ctx.h @@ -0,0 +1,454 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2016-2016 Uppsala University + * Copyright (C) 2017-2017 Arthur Chevalier + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +#ifndef __STARPU_SCHED_CTX_H__ +#define __STARPU_SCHED_CTX_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + @defgroup API_Scheduling_Contexts Scheduling Contexts + @brief StarPU permits on one hand grouping workers in combined + workers in order to execute a parallel task and on the other hand + grouping tasks in bundles that will be executed by a single + specified worker. + In contrast when we group workers in scheduling contexts we submit + starpu tasks to them and we schedule them with the policy assigned + to the context. Scheduling contexts can be created, deleted and + modified dynamically. + @{ +*/ + +/** + @name Scheduling Contexts Basic API + @{ +*/ + +/** + Used when calling starpu_sched_ctx_create() to specify a + name for a scheduling policy +*/ +#define STARPU_SCHED_CTX_POLICY_NAME (1 << 16) + +/** + Used when calling starpu_sched_ctx_create() to specify a + pointer to a scheduling policy +*/ +#define STARPU_SCHED_CTX_POLICY_STRUCT (2 << 16) + +/** + Used when calling starpu_sched_ctx_create() to specify a + minimum scheduler priority value. +*/ +#define STARPU_SCHED_CTX_POLICY_MIN_PRIO (3 << 16) + +/** + Used when calling starpu_sched_ctx_create() to specify a + maximum scheduler priority value. +*/ +#define STARPU_SCHED_CTX_POLICY_MAX_PRIO (4 << 16) + +#define STARPU_SCHED_CTX_HIERARCHY_LEVEL (5 << 16) +#define STARPU_SCHED_CTX_NESTED (6 << 16) + +/** + Used when calling starpu_sched_ctx_create() to specify ??? +*/ +#define STARPU_SCHED_CTX_AWAKE_WORKERS (7 << 16) + +/** + Used when calling starpu_sched_ctx_create() to specify a + function pointer allowing to initialize the scheduling policy. +*/ +#define STARPU_SCHED_CTX_POLICY_INIT (8 << 16) + +/** + Used when calling starpu_sched_ctx_create() to specify a + pointer to some user data related to the context being created. +*/ +#define STARPU_SCHED_CTX_USER_DATA (9 << 16) + +/** + Used when calling starpu_sched_ctx_create() in order to create a + context on the NVIDIA GPU to specify the number of SMs the context + should have +*/ +#define STARPU_SCHED_CTX_CUDA_NSMS (10 << 16) + +/** + Used when calling starpu_sched_ctx_create() to specify + a list of sub contexts of the current context. +*/ +#define STARPU_SCHED_CTX_SUB_CTXS (11 << 16) + +/** + Create a scheduling context with the given parameters + (see below) and assign the workers in \p workerids_ctx to execute the + tasks submitted to it. The return value represents the identifier of + the context that has just been created. It will be further used to + indicate the context the tasks will be submitted to. The return value + should be at most ::STARPU_NMAX_SCHED_CTXS. + + The arguments following the name of the scheduling context can be of + the following types: +
      +
    • ::STARPU_SCHED_CTX_POLICY_NAME, followed by the name of a + predefined scheduling policy. Use an empty string to create the + context with the default scheduling policy. +
    • +
    • ::STARPU_SCHED_CTX_POLICY_STRUCT, followed by a pointer to a + custom scheduling policy (struct starpu_sched_policy *) +
    • +
    • ::STARPU_SCHED_CTX_POLICY_MIN_PRIO, followed by a integer + representing the minimum priority value to be defined for the + scheduling policy. +
    • +
    • ::STARPU_SCHED_CTX_POLICY_MAX_PRIO, followed by a integer + representing the maximum priority value to be defined for the + scheduling policy. +
    • +
    • ::STARPU_SCHED_CTX_POLICY_INIT, followed by a function pointer + (ie. void init_sched(void)) allowing to initialize the scheduling policy. +
    • +
    • ::STARPU_SCHED_CTX_USER_DATA, followed by a pointer + to a custom user data structure, to be retrieved by \ref starpu_sched_ctx_get_user_data(). +
    • +
    + See \ref CreatingAContext for more details. +*/ +unsigned starpu_sched_ctx_create(int *workerids_ctx, int nworkers_ctx, const char *sched_ctx_name, ...); + +/** + Create a context indicating an approximate interval of resources +*/ +unsigned starpu_sched_ctx_create_inside_interval(const char *policy_name, const char *sched_ctx_name, int min_ncpus, int max_ncpus, int min_ngpus, int max_ngpus, unsigned allow_overlap); + +/** + Execute the callback whenever the last task of the context finished + executing, it is called with the parameters \p sched_ctx and any + other parameter needed by the application (packed in \p args) +*/ +void starpu_sched_ctx_register_close_callback(unsigned sched_ctx_id, void (*close_callback)(unsigned sched_ctx_id, void *args), void *args); + +/** + Add dynamically the workers in \p workerids_ctx to the context \p + sched_ctx_id. The last argument cannot be greater than + ::STARPU_NMAX_SCHED_CTXS. + See \ref ModifyingAContext for more details. +*/ +void starpu_sched_ctx_add_workers(int *workerids_ctx, unsigned nworkers_ctx, unsigned sched_ctx_id); + +/** + Remove the workers in \p workerids_ctx from the context + \p sched_ctx_id. The last argument cannot be greater than + ::STARPU_NMAX_SCHED_CTXS. + See \ref ModifyingAContext for more details. +*/ +void starpu_sched_ctx_remove_workers(int *workerids_ctx, unsigned nworkers_ctx, unsigned sched_ctx_id); + +/** + Print on the file \p f the worker names belonging to the context \p + sched_ctx_id +*/ +void starpu_sched_ctx_display_workers(unsigned sched_ctx_id, FILE *f); + +/** + Delete scheduling context \p sched_ctx_id and transfer remaining + workers to the inheritor scheduling context. + See \ref DeletingAContext for more details. +*/ +void starpu_sched_ctx_delete(unsigned sched_ctx_id); + +/** + Indicate that the context \p inheritor will inherit the resources + of the context \p sched_ctx_id when \p sched_ctx_id will be + deleted. + See \ref DeletingAContext for more details. +*/ +void starpu_sched_ctx_set_inheritor(unsigned sched_ctx_id, unsigned inheritor); + +unsigned starpu_sched_ctx_get_inheritor(unsigned sched_ctx_id); + +unsigned starpu_sched_ctx_get_hierarchy_level(unsigned sched_ctx_id); + +/** + Set the scheduling context the subsequent tasks will be submitted + to. + See \ref SubmittingTasksToAContext and \ref TmpCTXS for more details. +*/ +void starpu_sched_ctx_set_context(unsigned *sched_ctx_id); + +/** + Return the scheduling context the tasks are currently submitted to, + or ::STARPU_NMAX_SCHED_CTXS if no default context has been defined + by calling the function starpu_sched_ctx_set_context(). +*/ +unsigned starpu_sched_ctx_get_context(void); + +/** + Stop submitting tasks from the empty context list until the next + time the context has time to check the empty context list. + See \ref EmptyingAContext for more details. +*/ +void starpu_sched_ctx_stop_task_submission(void); + +/** + Indicate starpu that the application finished submitting to this + context in order to move the workers to the inheritor as soon as + possible. + See \ref DeletingAContext for more details. +*/ +void starpu_sched_ctx_finished_submit(unsigned sched_ctx_id); + +/** + Return the list of workers in the array \p workerids, the return + value is the number of workers. The user should free the \p + workerids table after finishing using it (it is allocated inside + the function with the proper size) +*/ +unsigned starpu_sched_ctx_get_workers_list(unsigned sched_ctx_id, int **workerids); + +/** + Return the list of workers in the array \p workerids, the return + value is the number of workers. This list is provided in raw order, + i.e. not sorted by tree or list order, and the user should not free + the \p workerids table. This function is thus much less costly than + starpu_sched_ctx_get_workers_list(). +*/ +unsigned starpu_sched_ctx_get_workers_list_raw(unsigned sched_ctx_id, int **workerids); + +/** + Return the number of workers managed by the specified context + (Usually needed to verify if it manages any workers or if it should + be blocked) +*/ +unsigned starpu_sched_ctx_get_nworkers(unsigned sched_ctx_id); + +/** + Return the number of workers shared by two contexts. +*/ +unsigned starpu_sched_ctx_get_nshared_workers(unsigned sched_ctx_id, unsigned sched_ctx_id2); + +/** + Return 1 if the worker belongs to the context and 0 otherwise +*/ +unsigned starpu_sched_ctx_contains_worker(int workerid, unsigned sched_ctx_id); + +unsigned starpu_sched_ctx_contains_type_of_worker(enum starpu_worker_archtype arch, unsigned sched_ctx_id); + +/** + Return the workerid if the worker belongs to the context and -1 otherwise. + If the thread calling this function is not a worker the function returns -1 + as it calls the function starpu_worker_get_id(). +*/ +unsigned starpu_sched_ctx_worker_get_id(unsigned sched_ctx_id); + +unsigned starpu_sched_ctx_get_ctx_for_task(struct starpu_task *task); + +unsigned starpu_worker_get_sched_ctx_id_stream(unsigned stream_workerid); + +/** + Check if a worker is shared between several contexts +*/ +unsigned starpu_sched_ctx_overlapping_ctxs_on_worker(int workerid); + +/** + Return the user data pointer associated to the scheduling context. +*/ +void *starpu_sched_ctx_get_user_data(unsigned sched_ctx_id); + +void starpu_sched_ctx_set_user_data(unsigned sched_ctx_id, void *user_data); + +/** + Allocate the scheduling policy data (private information of the + scheduler like queues, variables, additional condition variables) + the context. + See \ref DefiningANewBasicSchedulingPolicy for more details. +*/ +void starpu_sched_ctx_set_policy_data(unsigned sched_ctx_id, void *policy_data); + +/** + Return the scheduling policy data (private information of the + scheduler) of the contexts previously assigned to. + See \ref DefiningANewBasicSchedulingPolicy for more details. +*/ +void *starpu_sched_ctx_get_policy_data(unsigned sched_ctx_id); + +struct starpu_sched_policy *starpu_sched_ctx_get_sched_policy(unsigned sched_ctx_id); + +/** + Execute any parallel code on the workers of the sched_ctx (workers + are blocked) +*/ +void *starpu_sched_ctx_exec_parallel_code(void *(*func)(void *), void *param, unsigned sched_ctx_id); + +int starpu_sched_ctx_get_nready_tasks(unsigned sched_ctx_id); + +double starpu_sched_ctx_get_nready_flops(unsigned sched_ctx_id); + +void starpu_sched_ctx_list_task_counters_increment(unsigned sched_ctx_id, int workerid); + +void starpu_sched_ctx_list_task_counters_decrement(unsigned sched_ctx_id, int workerid); + +void starpu_sched_ctx_list_task_counters_reset(unsigned sched_ctx_id, int workerid); + +void starpu_sched_ctx_list_task_counters_increment_all_ctx_locked(struct starpu_task *task, unsigned sched_ctx_id); + +void starpu_sched_ctx_list_task_counters_decrement_all_ctx_locked(struct starpu_task *task, unsigned sched_ctx_id); + +void starpu_sched_ctx_list_task_counters_reset_all(struct starpu_task *task, unsigned sched_ctx_id); + +void starpu_sched_ctx_set_priority(int *workers, int nworkers, unsigned sched_ctx_id, unsigned priority); + +unsigned starpu_sched_ctx_get_priority(int worker, unsigned sched_ctx_id); + +void starpu_sched_ctx_get_available_cpuids(unsigned sched_ctx_id, int **cpuids, int *ncpuids); + +void starpu_sched_ctx_bind_current_thread_to_cpuid(unsigned cpuid); + +int starpu_sched_ctx_book_workers_for_task(unsigned sched_ctx_id, int *workerids, int nworkers); + +void starpu_sched_ctx_unbook_workers_for_task(unsigned sched_ctx_id, int master); + +/** + Return the first context (child of sched_ctx_id) where the workerid + is master + */ +unsigned starpu_sched_ctx_worker_is_master_for_child_ctx(int workerid, unsigned sched_ctx_id); + +/** + Return the context id of masterid if it master of a context. If + not, return ::STARPU_NMAX_SCHED_CTXS. +*/ +unsigned starpu_sched_ctx_master_get_context(int masterid); + +void starpu_sched_ctx_revert_task_counters_ctx_locked(unsigned sched_ctx_id, double flops); + +void starpu_sched_ctx_move_task_to_ctx_locked(struct starpu_task *task, unsigned sched_ctx, unsigned with_repush); + +int starpu_sched_ctx_get_worker_rank(unsigned sched_ctx_id); + +/** + Return the function associated with the scheduler context \p + sched_ctx_id which was given through the field + starpu_conf::sched_policy_callback +*/ +void (*starpu_sched_ctx_get_sched_policy_callback(unsigned sched_ctx_id))(unsigned); + +unsigned starpu_sched_ctx_has_starpu_scheduler(unsigned sched_ctx_id, unsigned *awake_workers); + +int starpu_sched_ctx_get_stream_worker(unsigned sub_ctx); +int starpu_sched_ctx_get_nsms(unsigned sched_ctx); +void starpu_sched_ctx_get_sms_interval(int stream_workerid, int *start, int *end); + +/** @} */ + +/** + @name Scheduling Context Priorities + @{ +*/ + +/** + Return the current minimum priority level supported by the + scheduling policy of the given scheduler context. +*/ +int starpu_sched_ctx_get_min_priority(unsigned sched_ctx_id); + +/** + Return the current maximum priority level supported by the + scheduling policy of the given scheduler context. +*/ +int starpu_sched_ctx_get_max_priority(unsigned sched_ctx_id); + +/** + Define the minimum task priority level supported by the scheduling + policy of the given scheduler context. The default minimum priority + level is the same as the default priority level which is 0 by + convention. The application may access that value by calling the + function starpu_sched_ctx_get_min_priority(). This function should + only be called from the initialization method of the scheduling + policy, and should not be used directly from the application. +*/ +int starpu_sched_ctx_set_min_priority(unsigned sched_ctx_id, int min_prio); + +/** + Define the maximum priority level supported by the scheduling + policy of the given scheduler context. The default maximum priority + level is 1. The application may access that value by calling the + starpu_sched_ctx_get_max_priority() function. This function should + only be called from the initialization method of the scheduling + policy, and should not be used directly from the application. +*/ +int starpu_sched_ctx_set_max_priority(unsigned sched_ctx_id, int max_prio); + +int starpu_sched_ctx_min_priority_is_set(unsigned sched_ctx_id); + +int starpu_sched_ctx_max_priority_is_set(unsigned sched_ctx_id); + +/** + Provided for legacy reasons. +*/ +#define STARPU_MIN_PRIO (starpu_sched_get_min_priority()) + +/** + Provided for legacy reasons. +*/ +#define STARPU_MAX_PRIO (starpu_sched_get_max_priority()) + +/** + By convention, the default priority level should be 0 so that we + can statically allocate tasks with a default priority. +*/ +#define STARPU_DEFAULT_PRIO 0 + +/** @} */ + +/** + @name Scheduling Context Worker Collection + @{ +*/ + +/** + Create a worker collection of the type indicated by the last + parameter for the context specified through the first parameter. +*/ +struct starpu_worker_collection *starpu_sched_ctx_create_worker_collection(unsigned sched_ctx_id, enum starpu_worker_collection_type type) STARPU_ATTRIBUTE_MALLOC; + +/** + Delete the worker collection of the specified scheduling context +*/ +void starpu_sched_ctx_delete_worker_collection(unsigned sched_ctx_id); + +/** + Return the worker collection managed by the indicated context +*/ +struct starpu_worker_collection *starpu_sched_ctx_get_worker_collection(unsigned sched_ctx_id); + +/** @} */ + +/** @} */ + +#ifdef __cplusplus +} +#endif + +#endif /* __STARPU_SCHED_CTX_H__ */ diff --git a/include/starpu_sched_ctx_hypervisor.h b/include/starpu_sched_ctx_hypervisor.h new file mode 100644 index 0000000..2b4d8c5 --- /dev/null +++ b/include/starpu_sched_ctx_hypervisor.h @@ -0,0 +1,108 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __STARPU_SCHED_CTX_HYPERVISOR_H__ +#define __STARPU_SCHED_CTX_HYPERVISOR_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + @ingroup API_Scheduling_Contexts + @{ +*/ + +/** + @name Scheduling Context Link with Hypervisor + @{ +*/ + +/** + Performance counters used by the starpu to indicate the hypervisor + how the application and the resources are executing. + */ +struct starpu_sched_ctx_performance_counters +{ + /** + Inform the hypervisor for how long a worker has been idle + in the specified context + */ + void (*notify_idle_cycle)(unsigned sched_ctx_id, int worker, double idle_time); + + /** + Inform the hypervisor that a task executing a specified + number of instructions has been popped from the worker + */ + void (*notify_poped_task)(unsigned sched_ctx_id, int worker); + + /** + Notify the hypervisor that a task has been scheduled on + the queue of the worker corresponding to the specified + context + */ + void (*notify_pushed_task)(unsigned sched_ctx_id, int worker); + + /** + Notify the hypervisor that a task has just been executed + */ + void (*notify_post_exec_task)(struct starpu_task *task, size_t data_size, uint32_t footprint, int hypervisor_tag, double flops); + + /** + Notify the hypervisor that a task has just been submitted + */ + void (*notify_submitted_job)(struct starpu_task *task, uint32_t footprint, size_t data_size); + + void (*notify_empty_ctx)(unsigned sched_ctx_id, struct starpu_task *task); + + /** + Notify the hypervisor that the context was deleted + */ + void (*notify_delete_context)(unsigned sched_ctx); +}; + +/** + Indicate to starpu the pointer to the performance counter +*/ +void starpu_sched_ctx_set_perf_counters(unsigned sched_ctx_id, void *perf_counters); + +/** + Callback that lets the scheduling policy tell the hypervisor that a + task was pushed on a worker +*/ +void starpu_sched_ctx_call_pushed_task_cb(int workerid, unsigned sched_ctx_id); + +/** + Allow the hypervisor to let starpu know it's initialised +*/ +void starpu_sched_ctx_notify_hypervisor_exists(void); + +/** + Ask starpu if it is informed if the hypervisor is initialised +*/ +unsigned starpu_sched_ctx_check_if_hypervisor_exists(void); + +void starpu_sched_ctx_update_start_resizing_sample(unsigned sched_ctx_id, double start_sample); + +/** @} */ + +/** @} */ + +#ifdef __cplusplus +} +#endif + +#endif /* __STARPU_SCHED_CTX_HYPERVISOR_H__ */ diff --git a/include/starpu_scheduler.h b/include/starpu_scheduler.h new file mode 100644 index 0000000..3525029 --- /dev/null +++ b/include/starpu_scheduler.h @@ -0,0 +1,563 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2016-2016 Uppsala University + * Copyright (C) 2013-2013 Thibaut Lambert + * Copyright (C) 2011-2011 Télécom Sud Paris + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +#ifndef __STARPU_SCHEDULER_H__ +#define __STARPU_SCHEDULER_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + @defgroup API_Scheduling_Policy Scheduling Policy + @brief TODO. While StarPU comes with a variety of scheduling + policies (see \ref TaskSchedulingPolicy), it may sometimes be + desirable to implement custom policies to address specific + problems. The API described below allows users to write their own + scheduling policy. + @{ +*/ + +struct starpu_task; + +/** + Contain all the methods that implement a scheduling policy. An + application may specify which scheduling strategy in the field + starpu_conf::sched_policy passed to the function starpu_init(). + + For each task going through the scheduler, the following methods + get called in the given order: + +
      +
    • starpu_sched_policy::submit_hook when the task is + submitted
    • +
    • starpu_sched_policy::push_task when the task becomes ready. The + scheduler is here given the task
    • +
    • starpu_sched_policy::pop_task when the worker is idle. The + scheduler here gives back the task to the core. It must not + access this task any more
    • +
    • starpu_sched_policy::pre_exec_hook right before the worker + actually starts the task computation (after transferring any + missing data).
    • +
    • starpu_sched_policy::post_exec_hook right after the worker + actually completes the task computation.
    • +
    + + For each task not going through the scheduler (because + starpu_task::execute_on_a_specific_worker was set), these get + called: + +
      +
    • starpu_sched_policy::submit_hook when the task is + submitted
    • +
    • starpu_sched_policy::push_task_notify when the task becomes + ready. This is just a notification, the scheduler does not have to + do anything about the task.
    • +
    • starpu_sched_policy::pre_exec_hook right before the worker + actually starts the task computation (after transferring any + missing data).
    • +
    • starpu_sched_policy::post_exec_hook right after the worker + actually completes the task computation.
    • +
    +*/ +struct starpu_sched_policy +{ + /** + Initialize the scheduling policy, called before any other + method. + */ + void (*init_sched)(unsigned sched_ctx_id); + /** + Cleanup the scheduling policy + */ + void (*deinit_sched)(unsigned sched_ctx_id); + + /** + Insert a task into the scheduler, called when the task + becomes ready for execution. This must call + starpu_push_task_end() once it has effectively pushed the + task to a queue (to note the time when this was done in the + task), but before releasing mutexes (so that the task + hasn't been already taken by a worker). + */ + int (*push_task)(struct starpu_task *); + + double (*simulate_push_task)(struct starpu_task *); + + /** + Notify the scheduler that a task was pushed on a given + worker. This method is called when a task that was + explicitly assigned to a worker becomes ready and is about + to be executed by the worker. This method therefore permits + to keep the state of the scheduler coherent even when + StarPU bypasses the scheduling strategy. + + Note: to get an estimation of the task duration, \p perf_workerid + needs to be used rather than \p workerid, for the case of parallel + tasks. + */ + void (*push_task_notify)(struct starpu_task *, int workerid, int perf_workerid, unsigned sched_ctx_id); + + /** + Get a task from the scheduler. + If this method returns NULL, the worker will start + sleeping. If later on some task are pushed for this worker, + starpu_wake_worker() must be called to wake the worker so + it can call the pop_task() method again. + The mutex associated to the worker is already taken when + this method is called. This method may release it (e.g. for + scalability reasons when doing work stealing), but it must + acquire it again before taking the decision whether to + return a task or NULL, so the atomicity of deciding to + return NULL and making the worker actually sleep is + preserved. Otherwise in simgrid or blocking driver mode the + worker might start sleeping while a task has just been + pushed for it. + If this method is defined as NULL, the worker will + only execute tasks from its local queue. In this case, the + push_task method should use the starpu_push_local_task + method to assign tasks to the different workers. + */ + struct starpu_task *(*pop_task)(unsigned sched_ctx_id); + + /** + Optional field. This method is called when a task is + submitted. + */ + void (*submit_hook)(struct starpu_task *task); + + /** + Optional field. This method is called every time a task is + starting. + */ + void (*pre_exec_hook)(struct starpu_task *, unsigned sched_ctx_id); + + /** + Optional field. This method is called every time a task has + been executed. + */ + void (*post_exec_hook)(struct starpu_task *, unsigned sched_ctx_id); + + /** + Optional field. This method is called when it is a good + time to start scheduling tasks. This is notably called when + the application calls starpu_task_wait_for_all() or + starpu_do_schedule() explicitly. + */ + void (*do_schedule)(unsigned sched_ctx_id); + + /** + Initialize scheduling structures corresponding to each + worker used by the policy. + */ + void (*add_workers)(unsigned sched_ctx_id, int *workerids, unsigned nworkers); + + /** + Deinitialize scheduling structures corresponding to each + worker used by the policy. + */ + void (*remove_workers)(unsigned sched_ctx_id, int *workerids, unsigned nworkers); + + /** Whether this scheduling policy does data prefetching, and thus the + core should not try to do it opportunistically. + */ + int prefetches; + + /** + Optional field. Name of the policy. + */ + const char *policy_name; + + /** + Optional field. Human readable description of the policy. + */ + const char *policy_description; + + enum starpu_worker_collection_type worker_type; +}; + +/** + Return an NULL-terminated array of all the predefined + scheduling policies. + See \ref TaskSchedulingPolicy for more details. +*/ +struct starpu_sched_policy **starpu_sched_get_predefined_policies(void); + +/** + Allow an external library to return a scheduling policy to be + loaded dynamically. + See \ref UsingaNewSchedulingPolicy for more details. + */ +struct starpu_sched_policy *starpu_get_sched_lib_policy(const char *name); + +/** + Allow an external library to return a list of scheduling policies to be + loaded dynamically. + See \ref UsingaNewSchedulingPolicy for more details. + */ +struct starpu_sched_policy **starpu_get_sched_lib_policies(void); + +/** + Return the scheduler policy of the default context. + See \ref TaskSchedulingPolicy for more details. +*/ +struct starpu_sched_policy *starpu_sched_get_sched_policy_in_ctx(unsigned sched_ctx_id); + +/** + Return the scheduler policy of the given context. + See \ref TaskSchedulingPolicy for more details. +*/ +struct starpu_sched_policy *starpu_sched_get_sched_policy(void); + +/** + When there is no available task for a worker, StarPU blocks this + worker on a condition variable. This function specifies which + condition variable (and the associated mutex) should be used to + block (and to wake up) a worker. Note that multiple workers may use + the same condition variable. For instance, in the case of a + scheduling strategy with a single task queue, the same condition + variable would be used to block and wake up all workers. +*/ +void starpu_worker_get_sched_condition(int workerid, starpu_pthread_mutex_t **sched_mutex, starpu_pthread_cond_t **sched_cond); + +/** + Return the job identifier associated with the task. + See \ref TraceSchedTaskDetails for more details. +*/ +unsigned long starpu_task_get_job_id(struct starpu_task *task); + +/** + TODO: check if this is correct + Return the current minimum priority level supported by the scheduling + policy. + See \ref DefiningANewBasicSchedulingPolicy for more details. +*/ +int starpu_sched_get_min_priority(void); + +/** + TODO: check if this is correct + Return the current maximum priority level supported by the + scheduling policy. + See \ref DefiningANewBasicSchedulingPolicy for more details. +*/ +int starpu_sched_get_max_priority(void); + +/** + TODO: check if this is correct + Define the minimum task priority level supported by the scheduling + policy. The default minimum priority level is the same as the + default priority level which is 0 by convention. The application + may access that value by calling the function + starpu_sched_get_min_priority(). This function should only be + called from the initialization method of the scheduling policy, and + should not be used directly from the application. + See \ref DefiningANewBasicSchedulingPolicy for more details. +*/ +int starpu_sched_set_min_priority(int min_prio); + +/** + TODO: check if this is correct + Define the maximum priority level supported by the scheduling + policy. The default maximum priority level is 1. The application + may access that value by calling the function + starpu_sched_get_max_priority(). This function should only be + called from the initialization method of the scheduling policy, and + should not be used directly from the application. + See \ref DefiningANewBasicSchedulingPolicy for more details. +*/ +int starpu_sched_set_max_priority(int max_prio); + +/** + Check if the worker specified by workerid can execute the codelet. + Schedulers need to call it before assigning a task to a worker, + otherwise the task may fail to execute. + See \ref DefiningANewBasicSchedulingPolicy for more details. +*/ +int starpu_worker_can_execute_task(unsigned workerid, struct starpu_task *task, unsigned nimpl); + +/** + Check if the worker specified by workerid can execute the codelet + and return which implementation numbers can be used. + Schedulers need to call it before assigning a task to a worker, + otherwise the task may fail to execute. + This should be preferred rather than calling + starpu_worker_can_execute_task() for each and every implementation. + It can also be used with impl_mask == NULL to check for at + least one implementation without determining which. + See \ref DefiningANewBasicSchedulingPolicy for more details. +*/ +int starpu_worker_can_execute_task_impl(unsigned workerid, struct starpu_task *task, unsigned *impl_mask); + +/** + Check if the worker specified by workerid can execute the codelet + and return the first implementation which can be used. + Schedulers need to call it before assigning a task to a worker, + otherwise the task may fail to execute. This should be preferred + rather than calling starpu_worker_can_execute_task() for + each and every implementation. It can also be used with + impl_mask == NULL to check for at least one implementation + without determining which. + See \ref DefiningANewBasicSchedulingPolicy for more details. +*/ +int starpu_worker_can_execute_task_first_impl(unsigned workerid, struct starpu_task *task, unsigned *nimpl); + +/** + The scheduling policy may put tasks directly into a worker’s local + queue so that it is not always necessary to create its own queue + when the local queue is sufficient. \p back is ignored: the task priority is + used to order tasks in this queue. + See \ref DefiningANewBasicSchedulingPolicy for more details. +*/ +int starpu_push_local_task(int workerid, struct starpu_task *task, int back); + +/** + Must be called by a scheduler to notify that the given + task has just been pushed. + See \ref DefiningANewBasicSchedulingPolicy for more details. +*/ +int starpu_push_task_end(struct starpu_task *task); + +/** + Whether \ref STARPU_PREFETCH was set. + See \ref SchedulingHelpers for more details. +*/ +int starpu_get_prefetch_flag(void); + +/** + Prefetch data for a given p task on a given p node with a given + priority. + See \ref SchedulingHelpers for more details. +*/ +int starpu_prefetch_task_input_on_node_prio(struct starpu_task *task, unsigned node, int prio); + +/** + Prefetch data for a given p task on a given p node. + See \ref SchedulingHelpers for more details. +*/ +int starpu_prefetch_task_input_on_node(struct starpu_task *task, unsigned node); + +/** + Prefetch data for a given p task on a given p node when the bus is + idle with a given priority. + See \ref SchedulingHelpers for more details. +*/ +int starpu_idle_prefetch_task_input_on_node_prio(struct starpu_task *task, unsigned node, int prio); + +/** + Prefetch data for a given p task on a given p node when the bus is + idle. + See \ref SchedulingHelpers for more details. +*/ +int starpu_idle_prefetch_task_input_on_node(struct starpu_task *task, unsigned node); + +/** + Prefetch data for a given p task on a given p worker with a given + priority. + See \ref SchedulingHelpers for more details. +*/ +int starpu_prefetch_task_input_for_prio(struct starpu_task *task, unsigned worker, int prio); + +/** + Prefetch data for a given p task on a given p worker. + See \ref SchedulingHelpers for more details. +*/ +int starpu_prefetch_task_input_for(struct starpu_task *task, unsigned worker); + +/** + Prefetch data for a given p task on a given p worker when the bus + is idle with a given priority. + See \ref SchedulingHelpers for more details. +*/ +int starpu_idle_prefetch_task_input_for_prio(struct starpu_task *task, unsigned worker, int prio); + +/** + Prefetch data for a given p task on a given p worker when the bus + is idle. + See \ref SchedulingHelpers for more details. +*/ +int starpu_idle_prefetch_task_input_for(struct starpu_task *task, unsigned worker); + +/** + Return the footprint for a given task, taking into account + user-provided perfmodel footprint or size_base functions. + See \ref PerformanceModelExample for more details. +*/ +uint32_t starpu_task_footprint(struct starpu_perfmodel *model, struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned nimpl); + +/** + Return the raw footprint for the data of a given task (without + taking into account user-provided functions). + See \ref PerformanceModelExample for more details. +*/ +uint32_t starpu_task_data_footprint(struct starpu_task *task); + +/** + Return expected task duration in micro-seconds on a given architecture \p arch using given implementation \p nimpl. + See \ref SchedulingHelpers for more details. +*/ +double starpu_task_expected_length(struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned nimpl); + +/** + Same as starpu_task_expected_length() but for a precise worker. + See \ref SchedulingHelpers for more details. +*/ +double starpu_task_worker_expected_length(struct starpu_task *task, unsigned workerid, unsigned sched_ctx_id, unsigned nimpl); + +/** + Return expected task duration in micro-seconds, averaged over the different workers driven by the scheduler \p sched_ctx_id + Note: this is not just the average of the durations using the number of + processing units as coefficients, but their efficiency at processing the + task, thus the harmonic average of the durations. + See \ref SchedulingHelpers for more details. +*/ +double starpu_task_expected_length_average(struct starpu_task *task, unsigned sched_ctx_id); + +/** + Return an estimated speedup factor relative to CPU speed. + See \ref SchedulingHelpers for more details. +*/ +double starpu_worker_get_relative_speedup(struct starpu_perfmodel_arch *perf_arch); + +/** + Return expected data transfer time in micro-seconds for the given \p + memory_node. Prefer using starpu_task_expected_data_transfer_time_for() which is + more precise. + See \ref SchedulingHelpers for more details. +*/ +double starpu_task_expected_data_transfer_time(unsigned memory_node, struct starpu_task *task); + +/** + Return expected data transfer time in micro-seconds for the given + \p worker. + See \ref SchedulingHelpers for more details. +*/ +double starpu_task_expected_data_transfer_time_for(struct starpu_task *task, unsigned worker); + +/** + Predict the transfer time (in micro-seconds) to move \p handle to a + memory node. + See \ref SchedulingHelpers for more details. +*/ +double starpu_data_expected_transfer_time(starpu_data_handle_t handle, unsigned memory_node, enum starpu_data_access_mode mode); + +/** + Return expected energy use in J. + See \ref SchedulingHelpers for more details. +*/ +double starpu_task_expected_energy(struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned nimpl); + +/** + Same as starpu_task_expected_energy but for a precise worker. + See \ref SchedulingHelpers for more details. +*/ +double starpu_task_worker_expected_energy(struct starpu_task *task, unsigned workerid, unsigned sched_ctx_id, unsigned nimpl); + +/** + Return expected task energy use in J, averaged over the different workers driven by the scheduler \p sched_ctx_id + Note: this is not just the average of the energy uses using the number of + processing units as coefficients, but their efficiency at processing the + task, thus the harmonic average of the energy uses. + See \ref SchedulingHelpers for more details. +*/ +double starpu_task_expected_energy_average(struct starpu_task *task, unsigned sched_ctx_id); + +/** + Return expected conversion time in ms (multiformat interface only). + See \ref SchedulingHelpers for more details. +*/ +double starpu_task_expected_conversion_time(struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned nimpl); + +typedef void (*starpu_notify_ready_soon_func)(void *data, struct starpu_task *task, double delay); + +/** + Register a callback to be called when it is determined when a task + will be ready an estimated amount of time from now, because its + last dependency has just started and we know how long it will take. + See \ref SchedulingHelpers for more details. +*/ +void starpu_task_notify_ready_soon_register(starpu_notify_ready_soon_func f, void *data); + +/** + The scheduling policies indicates if the worker may pop tasks from + the list of other workers or if there is a central list with task + for all the workers. + See \ref DefiningANewBasicSchedulingPolicy for more details. +*/ +void starpu_sched_ctx_worker_shares_tasks_lists(int workerid, int sched_ctx_id); + +/** + The scheduling policy should call this when it makes a scheduling decision + for a task. This will possibly stop execution at this point, and then the + programmer can inspect local variables etc. to determine why this scheduling + decision was done. + + See \ref STARPU_TASK_BREAK_ON_SCHED + See \ref DefiningANewBasicSchedulingPolicy for more details. + */ +void starpu_sched_task_break(struct starpu_task *task); + +/** + @name Worker operations + @{ +*/ + +/** + Wake up \p workerid while temporarily entering the current worker + relax state if needed during the waiting process. Return 1 if \p + workerid has been woken up or its state_keep_awake flag has been + set to \c 1, and \c 0 otherwise (if \p workerid was not in the + STATE_SLEEPING or in the STATE_SCHEDULING). + See \ref DefiningANewBasicSchedulingPolicy for more details. +*/ +int starpu_wake_worker_relax(int workerid); + +/** + Must be called to wake up a worker that is sleeping on the cond. + Return 0 whenever the worker is not in a sleeping state or has the + state_keep_awake flag on. + See \ref DefiningANewBasicSchedulingPolicy for more details. +*/ +int starpu_wake_worker_no_relax(int workerid); + +/** + Version of starpu_wake_worker_no_relax() which assumes that the + sched mutex is locked + See \ref DefiningANewBasicSchedulingPolicy for more details. +*/ +int starpu_wake_worker_locked(int workerid); + +/** + Light version of starpu_wake_worker_relax() which, when possible, + speculatively set keep_awake on the target worker without waiting + for the worker to enter the relax state. + See \ref DefiningANewBasicSchedulingPolicy for more details. +*/ +int starpu_wake_worker_relax_light(int workerid); + +/** @} */ + +/** @} */ + +#ifdef __cplusplus +} +#endif + +#endif /* __STARPU_SCHEDULER_H__ */ diff --git a/include/starpu_simgrid_wrap.h b/include/starpu_simgrid_wrap.h new file mode 100644 index 0000000..70dc71e --- /dev/null +++ b/include/starpu_simgrid_wrap.h @@ -0,0 +1,32 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __STARPU_SIMGRID_WRAP_H__ +#define __STARPU_SIMGRID_WRAP_H__ + +#include + +#ifdef STARPU_SIMGRID +#ifndef main +#define main starpu_main +#ifdef __cplusplus +extern "C" int starpu_main(int argc, char *argv[]); +extern "C" int starpu_main(int argc, char **argv); +#endif +#endif +#endif + +#endif /* __STARPU_SIMGRID_WRAP_H__ */ diff --git a/include/starpu_sink.h b/include/starpu_sink.h new file mode 100644 index 0000000..4626285 --- /dev/null +++ b/include/starpu_sink.h @@ -0,0 +1,37 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __STARPU_SINK_H__ +#define __STARPU_SINK_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + @defgroup API_Sink Sink + @{ +*/ + +void starpu_sink_common_worker(int argc, char **argv); + +/** @} */ + +#ifdef __cplusplus +} +#endif + +#endif /* __STARPU_SINK_H__ */ diff --git a/include/starpu_stdlib.h b/include/starpu_stdlib.h new file mode 100644 index 0000000..3a48621 --- /dev/null +++ b/include/starpu_stdlib.h @@ -0,0 +1,326 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2022-2022 Federal University of Rio Grande do Sul (UFRGS) + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +#ifndef __STARPU_STDLIB_H__ +#define __STARPU_STDLIB_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + @defgroup API_Standard_Memory_Library Standard Memory Library + @{ +*/ + +/** + Value passed to the function starpu_malloc_flags() to indicate the + memory allocation should be pinned. +*/ +#define STARPU_MALLOC_PINNED ((1ULL) << 1) + +/** + Value passed to the function starpu_malloc_flags() to indicate the + memory allocation should be in the limit defined by the environment + variables \ref STARPU_LIMIT_CUDA_devid_MEM, \ref + STARPU_LIMIT_CUDA_MEM, \ref STARPU_LIMIT_OPENCL_devid_MEM, \ref + STARPU_LIMIT_OPENCL_MEM, \ref STARPU_LIMIT_HIP_MEM, \ref STARPU_LIMIT_HIP_devid_MEM + and \ref STARPU_LIMIT_CPU_MEM (see Section \ref HowToLimitMemoryPerNode). + If no memory is available, it tries to reclaim memory from StarPU. + Memory allocated this way needs to be freed by calling the function + starpu_free_flags() with the same flag. +*/ +#define STARPU_MALLOC_COUNT ((1ULL) << 2) + +/** + Value passed to the function starpu_malloc_flags() along + ::STARPU_MALLOC_COUNT to indicate that while the memory allocation + should be kept in the limits defined for ::STARPU_MALLOC_COUNT, no + reclaiming should be performed by starpu_malloc_flags() itself, + thus potentially overflowing the memory node a bit. StarPU will + reclaim memory after next task termination, according to the \ref + STARPU_MINIMUM_AVAILABLE_MEM, \ref STARPU_TARGET_AVAILABLE_MEM, + \ref STARPU_MINIMUM_CLEAN_BUFFERS, and \ref + STARPU_TARGET_CLEAN_BUFFERS environment variables. If + ::STARPU_MEMORY_WAIT is set, no overflowing will happen, + starpu_malloc_flags() will wait for other eviction mechanisms to + release enough memory. +*/ +#define STARPU_MALLOC_NORECLAIM ((1ULL) << 3) + +/** + Value passed to starpu_memory_allocate() to specify that the + function should wait for the requested amount of memory to become + available, and atomically allocate it. +*/ +#define STARPU_MEMORY_WAIT ((1ULL) << 4) + +/** + Value passed to starpu_memory_allocate() to specify that the + function should allocate the amount of memory, even if that means + overflowing the total size of the memory node. +*/ +#define STARPU_MEMORY_OVERFLOW ((1ULL) << 5) + +/** + Value passed to the function starpu_malloc_flags() to indicate that + when StarPU is using simgrid, the allocation can be "folded", i.e. + a memory area is allocated, but its content is actually a replicate + of the same memory area, to avoid having to actually allocate that + much memory . This thus allows to have a memory area that does not + actually consumes memory, to which one can read from and write to + normally, but get bogus values. +*/ +#define STARPU_MALLOC_SIMULATION_FOLDED ((1ULL) << 6) + +/** + Value passed to the function starpu_malloc_flags() to indicate that + when StarPU is using simgrid, the allocation for that size could be unique. + Different from only STARPU_MALLOC_SIMULATION_FOLDED, the same address will + be given for all mallocs of that particular size. +*/ +#define STARPU_MALLOC_SIMULATION_UNIQUE ((1ULL)<<7) + +/** + @deprecated + Equivalent to starpu_malloc(). This macro is provided to avoid + breaking old codes. +*/ +#define starpu_data_malloc_pinned_if_possible starpu_malloc + +/** + @deprecated + Equivalent to starpu_free(). This macro is provided to avoid + breaking old codes. +*/ +#define starpu_data_free_pinned_if_possible starpu_free + +/** + Set an alignment constraints for starpu_malloc() allocations. \p + align must be a power of two. This is for instance called + automatically by the OpenCL driver to specify its own alignment + constraints. + See \ref DataManagementAllocation for more details. +*/ +void starpu_malloc_set_align(size_t align); + +/** + Allocate data of the given size \p dim in main memory, and return + the pointer to the allocated data through \p A. It will also try to + pin it in CUDA or OpenCL, so that data transfers from this buffer + can be asynchronous, and thus permit data transfer and computation + overlapping. The allocated buffer must be freed thanks to the + starpu_free_noflag() function. + See \ref DataManagementAllocation for more details. +*/ +int starpu_malloc(void **A, size_t dim); + +/** + @deprecated + Free memory which has previously been allocated with + starpu_malloc(). This function is deprecated, one should use + starpu_free_noflag(). + The function does nothing if the pointer is \c NULL. + See \ref DataManagementAllocation for more details. +*/ +int starpu_free(void *A) STARPU_DEPRECATED; + +/** + Perform a memory allocation based on the constraints defined by the + given flag. + See \ref HowToLimitMemoryPerNode for more details. +*/ +int starpu_malloc_flags(void **A, size_t dim, int flags); + +/** + Free memory by specifying its size. The given flags should be + consistent with the ones given to starpu_malloc_flags() when + allocating the memory. + The function does nothing if the pointer is \c NULL. + See \ref HowToLimitMemoryPerNode for more details. +*/ +int starpu_free_flags(void *A, size_t dim, int flags); + +/** + Free memory by specifying its size. Should be used for memory + allocated with starpu_malloc(). + The function does nothing if the pointer is \c NULL. + See \ref DataManagementAllocation for more details. +*/ +int starpu_free_noflag(void *A, size_t dim); + +typedef int (*starpu_malloc_hook)(unsigned dst_node, void **A, size_t dim, int flags); +typedef int (*starpu_free_hook)(unsigned dst_node, void *A, size_t dim, int flags); + +/** + Set allocation functions to be used by StarPU. By default, StarPU + will use \c malloc() (or \c cudaHostAlloc() if CUDA GPUs are used) + for all its data handle allocations. The application can specify + another allocation primitive by calling this. The malloc_hook + should pass the allocated pointer through the \c A parameter, and + return 0 on success. On allocation failure, it should return + -ENOMEM. The \c flags parameter contains ::STARPU_MALLOC_PINNED if + the memory should be pinned by the hook for GPU transfer + efficiency. The hook can use starpu_memory_pin() to achieve this. + The \c dst_node parameter is the starpu memory node, one can + convert it to an hwloc logical id with + starpu_memory_nodes_numa_id_to_hwloclogid() or to an OS NUMA number + with starpu_memory_nodes_numa_devid_to_id(). + See \ref DataManagementAllocation for more details. +*/ +void starpu_malloc_set_hooks(starpu_malloc_hook malloc_hook, starpu_free_hook free_hook); + +/** + Pin the given memory area, so that CPU-GPU transfers can be done + asynchronously with DMAs. The memory must be unpinned with + starpu_memory_unpin() before being freed. Return 0 on success, -1 + on error. + See \ref DataManagementAllocation for more details. +*/ +int starpu_memory_pin(void *addr, size_t size); + +/** + Unpin the given memory area previously pinned with + starpu_memory_pin(). Return 0 on success, -1 on error. + See \ref DataManagementAllocation for more details. +*/ +int starpu_memory_unpin(void *addr, size_t size); + +/** + If a memory limit is defined on the given node (see Section \ref + HowToLimitMemoryPerNode), return the amount of total memory on the + node. Otherwise return -1. + See \ref HowToLimitMemoryPerNode for more details. +*/ +starpu_ssize_t starpu_memory_get_total(unsigned node); + +/** + If a memory limit is defined on the given node (see Section \ref + HowToLimitMemoryPerNode), return the amount of available memory on + the node. Otherwise return -1. + See \ref HowToLimitMemoryPerNode for more details. +*/ +starpu_ssize_t starpu_memory_get_available(unsigned node); + +/** + Return the amount of used memory on the node. + See \ref DataManagementAllocation for more details. +*/ +size_t starpu_memory_get_used(unsigned node); + +/** + Return the amount of total memory on all memory nodes for whose a + memory limit is defined (see Section \ref DataManagementAllocation). +*/ +starpu_ssize_t starpu_memory_get_total_all_nodes(void); + +/** + Return the amount of available memory on all memory nodes for whose + a memory limit is defined (see Section \ref + DataManagementAllocation). +*/ +starpu_ssize_t starpu_memory_get_available_all_nodes(void); + +/** + Return the amount of used memory on all memory nodes. + See \ref DataManagementAllocation for more details. +*/ +size_t starpu_memory_get_used_all_nodes(void); + +/** + If a memory limit is defined on the given node (see Section \ref + HowToLimitMemoryPerNode), try to allocate some of it. This does not + actually allocate memory, but only accounts for it. This can be + useful when the application allocates data another way, but want + StarPU to be aware of the allocation size e.g. for memory + reclaiming. + By default, return -ENOMEM if there is not enough room on + the given node. \p flags can be either ::STARPU_MEMORY_WAIT or + ::STARPU_MEMORY_OVERFLOW to change this. + See \ref HowToLimitMemoryPerNode for more details. +*/ +int starpu_memory_allocate(unsigned node, size_t size, int flags); + +/** + If a memory limit is defined on the given node (see Section \ref + HowToLimitMemoryPerNode), free some of it. This does not actually + free memory, but only accounts for it, like + starpu_memory_allocate(). The amount does not have to be exactly + the same as what was passed to starpu_memory_allocate(), only the + eventual amount needs to be the same, i.e. one call to + starpu_memory_allocate() can be followed by several calls to + starpu_memory_deallocate() to declare the deallocation piece by + piece. + See \ref HowToLimitMemoryPerNode for more details. +*/ +void starpu_memory_deallocate(unsigned node, size_t size); + +/** + If a memory limit is defined on the given node (see Section \ref + HowToLimitMemoryPerNode), this will wait for \p size bytes to + become available on \p node. Of course, since another thread may be + allocating memory concurrently, this does not necessarily mean that + this amount will be actually available, just that it was reached. + To atomically wait for some amount of memory and reserve it, + starpu_memory_allocate() should be used with the + ::STARPU_MEMORY_WAIT flag. + See \ref HowToLimitMemoryPerNode for more details. +*/ +void starpu_memory_wait_available(unsigned node, size_t size); + +/** + Sleep for the given \p nb_sec seconds. Similar to calling Unix' \c + sleep function, except that it takes a float to allow sub-second + sleeping, and when StarPU is compiled in SimGrid mode it does not + really sleep but just makes SimGrid record that the thread has + taken some time to sleep. + See \ref Helpers for more details. +*/ +void starpu_sleep(float nb_sec); + +/** + Sleep for the given \p nb_micro_sec micro-seconds. + In simgrid mode, this only sleeps within virtual time. + See \ref Helpers for more details. + */ +void starpu_usleep(float nb_micro_sec); + +/** + Account for \p joules J being used. + This is support in simgrid mode, to record how much energy was used, and will + show up in further call to starpu_energy_used(). + See \ref Energy-basedScheduling fore more details. + */ +void starpu_energy_use(float joules); + +/** + Return the amount of energy having been used in J. + This account the amounts passed to starpu_energy_use(), but also the static + energy use set by the \ref STARPU_IDLE_POWER environment variable. + See \ref Energy-basedScheduling fore more details. + */ +double starpu_energy_used(void); + +/** @} */ + +#ifdef __cplusplus +} +#endif + +#endif /* __STARPU_STDLIB_H__ */ diff --git a/include/starpu_task.h b/include/starpu_task.h new file mode 100644 index 0000000..352366c --- /dev/null +++ b/include/starpu_task.h @@ -0,0 +1,2085 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2011 Télécom Sud Paris + * Copyright (C) 2016 Uppsala University + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +#ifndef __STARPU_TASK_H__ +#define __STARPU_TASK_H__ + +#include +#include + +#ifdef STARPU_USE_CUDA +#include +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/** + @defgroup API_Codelet_And_Tasks Codelet And Tasks + @brief API to manipulate codelets and tasks. + @{ +*/ + +/** + To be used when setting the field starpu_codelet::where to specify + that the codelet has no computation part, and thus does not need to + be scheduled, and data does not need to be actually loaded. This is + thus essentially used for synchronization tasks. +*/ +#define STARPU_NOWHERE ((1ULL) << 0) + +/** + Convert from enum starpu_worker_archtype to worker type mask for use in "where" fields +*/ +#define STARPU_WORKER_TO_MASK(worker_archtype) (1ULL << (worker_archtype + 1)) + +/** + To be used when setting the field starpu_codelet::where (or + starpu_task::where) to specify the codelet (or the task) may be + executed on a CPU processing unit. +*/ +#define STARPU_CPU STARPU_WORKER_TO_MASK(STARPU_CPU_WORKER) + +/** + To be used when setting the field starpu_codelet::where (or + starpu_task::where) to specify the codelet (or the task) may be + executed on a CUDA processing unit. +*/ +#define STARPU_CUDA STARPU_WORKER_TO_MASK(STARPU_CUDA_WORKER) + +/** + To be used when setting the field starpu_codelet::where (or + starpu_task::where) to specify the codelet (or the task) may be + executed on a HIP processing unit. +*/ +#define STARPU_HIP STARPU_WORKER_TO_MASK(STARPU_HIP_WORKER) + +/** + To be used when setting the field starpu_codelet::where (or + starpu_task::where) to specify the codelet (or the task) may be + executed on a OpenCL processing unit. +*/ +#define STARPU_OPENCL STARPU_WORKER_TO_MASK(STARPU_OPENCL_WORKER) + +/** + To be used when setting the field starpu_codelet::where (or + starpu_task::where) to specify the codelet (or the task) may be + executed on a MAX FPGA. +*/ +#define STARPU_MAX_FPGA STARPU_WORKER_TO_MASK(STARPU_MAX_FPGA_WORKER) + +/** + To be used when setting the field starpu_codelet::where (or + starpu_task::where) to specify the codelet (or the task) may be + executed on a MPI Slave processing unit. +*/ +#define STARPU_MPI_MS STARPU_WORKER_TO_MASK(STARPU_MPI_MS_WORKER) + +/** + To be used when setting the field starpu_codelet::where (or + starpu_task::where) to specify the codelet (or the task) may be + executed on a TCP/IP Slave processing unit. +*/ +#define STARPU_TCPIP_MS STARPU_WORKER_TO_MASK(STARPU_TCPIP_MS_WORKER) + +/** + Value to be set in starpu_codelet::flags to execute the codelet + functions even in simgrid mode. +*/ +#define STARPU_CODELET_SIMGRID_EXECUTE (1 << 0) + +/** + Value to be set in starpu_codelet::flags to execute the codelet + functions even in simgrid mode, and later inject the measured + timing inside the simulation. +*/ +#define STARPU_CODELET_SIMGRID_EXECUTE_AND_INJECT (1 << 1) + +/** + Value to be set in starpu_codelet::flags to make starpu_task_submit() + not submit automatic asynchronous partitioning/unpartitioning. +*/ +#define STARPU_CODELET_NOPLANS (1 << 2) + +/** + Value to be set in starpu_codelet::cuda_flags to allow asynchronous + CUDA kernel execution. This requires to use the proper CUDA stream, + see \ref CUDA-specificOptimizations +*/ +#define STARPU_CUDA_ASYNC (1 << 0) + +/** + Value to be set in starpu_codelet::hip_flags to allow asynchronous + HIP kernel execution. This requires to use the proper HIP stream +*/ +#define STARPU_HIP_ASYNC (1 << 0) + +/** + Value to be set in starpu_codelet::opencl_flags to allow + asynchronous OpenCL kernel execution. This requires to use proper queueing, + see \ref OpenCL-specificOptimizations +*/ +#define STARPU_OPENCL_ASYNC (1 << 0) + +/** + To be used as memory node number for the main CPU memory node. +*/ +#define STARPU_MAIN_RAM 0 + +/** + Describe the type of parallel task. See \ref ParallelTasks for + details. +*/ +enum starpu_codelet_type +{ + STARPU_SEQ = 0, /**< (default) for classical sequential + tasks. + */ + STARPU_SPMD, /**< for a parallel task whose threads are + handled by StarPU, the code has to use + starpu_combined_worker_get_size() and + starpu_combined_worker_get_rank() to + distribute the work. + */ + STARPU_FORKJOIN /**< for a parallel task whose threads are + started by the codelet function, which has + to use starpu_combined_worker_get_size() to + determine how many threads should be + started. + */ +}; + +/** + todo +*/ +enum starpu_task_status +{ + STARPU_TASK_INIT, /**< The task has just been initialized. */ +#define STARPU_TASK_INIT 0 +#define STARPU_TASK_INVALID STARPU_TASK_INIT /**< old name for STARPU_TASK_INIT */ + STARPU_TASK_BLOCKED, /**< The task has just been submitted, and its dependencies has not been checked yet. */ + STARPU_TASK_READY, /**< The task is ready for execution. */ + STARPU_TASK_RUNNING, /**< The task is running on some worker. */ + STARPU_TASK_FINISHED, /**< The task is finished executing. */ + STARPU_TASK_BLOCKED_ON_TAG, /**< The task is waiting for a tag. */ + STARPU_TASK_BLOCKED_ON_TASK, /**< The task is waiting for a task. */ + STARPU_TASK_BLOCKED_ON_DATA, /**< The task is waiting for some data. */ + STARPU_TASK_STOPPED /**< The task is stopped. */ +}; + +/** + CPU implementation of a codelet. +*/ +typedef void (*starpu_cpu_func_t)(void **, void *); + +/** + CUDA implementation of a codelet. +*/ +typedef void (*starpu_cuda_func_t)(void **, void *); + +/** + HIP implementation of a codelet. +*/ +typedef void (*starpu_hip_func_t)(void **, void *); + +/** + OpenCL implementation of a codelet. +*/ +typedef void (*starpu_opencl_func_t)(void **, void *); + +/** + Maxeler FPGA implementation of a codelet. +*/ +typedef void (*starpu_max_fpga_func_t)(void **, void *); + +/** + @ingroup API_Bubble Hierarchical Dags + Bubble decision function +*/ +typedef int (*starpu_bubble_func_t)(struct starpu_task *t, void *arg); + +/** + @ingroup API_Bubble Hierarchical Dags + Bubble DAG generation function +*/ +typedef void (*starpu_bubble_gen_dag_func_t)(struct starpu_task *t, void *arg); + +/** + @deprecated + Setting the field starpu_codelet::cpu_func with this macro + indicates the codelet will have several implementations. The use of + this macro is deprecated. One should always only define the field + starpu_codelet::cpu_funcs. +*/ +#define STARPU_MULTIPLE_CPU_IMPLEMENTATIONS ((starpu_cpu_func_t)-1) + +/** + @deprecated + Setting the field starpu_codelet::cuda_func with this macro + indicates the codelet will have several implementations. The use of + this macro is deprecated. One should always only define the field + starpu_codelet::cuda_funcs. +*/ +#define STARPU_MULTIPLE_CUDA_IMPLEMENTATIONS ((starpu_cuda_func_t)-1) + +/** + @deprecated + Setting the field starpu_codelet::hip_func with this macro + indicates the codelet will have several implementations. The use of + this macro is deprecated. One should always only define the field + starpu_codelet::hip_funcs. +*/ +#define STARPU_MULTIPLE_HIP_IMPLEMENTATIONS ((starpu_hip_func_t)-1) + +/** + @deprecated + Setting the field starpu_codelet::opencl_func with this macro + indicates the codelet will have several implementations. The use of + this macro is deprecated. One should always only define the field + starpu_codelet::opencl_funcs. +*/ +#define STARPU_MULTIPLE_OPENCL_IMPLEMENTATIONS ((starpu_opencl_func_t)-1) + +/** + Value to set in starpu_codelet::nbuffers to specify that the + codelet can accept a variable number of buffers, specified in + starpu_task::nbuffers. +*/ +#define STARPU_VARIABLE_NBUFFERS (-1) + +/** + Value to be set in the starpu_codelet::nodes field to request + StarPU to put the data in local memory of the worker running the task (this + is the default behavior). +*/ +#define STARPU_SPECIFIC_NODE_LOCAL (-1) + +/** + Value to be set in the starpu_codelet::nodes field to request + StarPU to put the data in CPU-accessible memory (and let StarPU + choose the NUMA node). +*/ +#define STARPU_SPECIFIC_NODE_CPU (-2) + +/** + Value to be set in the starpu_codelet::nodes field to request + StarPU to put the data in some slow memory. +*/ +#define STARPU_SPECIFIC_NODE_SLOW (-3) + +/** + Value to be set in the starpu_codelet::nodes field to request + StarPU to put the data in some fast memory. +*/ +#define STARPU_SPECIFIC_NODE_FAST (-4) + +/** + Value to be set in the starpu_codelet::nodes field to let StarPU decide + whether to put the data in the local memory of the worker running the task, + or in CPU-accessible memory (and let StarPU choose the NUMA node). +*/ +#define STARPU_SPECIFIC_NODE_LOCAL_OR_CPU (-5) + +/** + Value to be set in the starpu_codelet::nodes field to make StarPU not actually + put the data in any particular memory, i.e. the task will only get the + sequential consistency dependencies, but not actually trigger any data + transfer. +*/ +#define STARPU_SPECIFIC_NODE_NONE (-6) + +struct starpu_transaction; +struct _starpu_trs_epoch; +typedef struct _starpu_trs_epoch *starpu_trs_epoch_t; +struct starpu_task; + +/** + The codelet structure describes a kernel that is possibly + implemented on various targets. For compatibility, make sure to + initialize the whole structure to zero, either by using explicit + memset, or the function starpu_codelet_init(), or by letting the + compiler implicitly do it in e.g. static storage case. + + Note that the codelet structure needs to exist until the task is + terminated. If dynamic codelet allocation is desired, release should be done + no sooner than the starpu_task::callback_func callback time. + + If the application wants to make the structure constant, it needs to be + filled exactly as StarPU expects: + + - starpu_codelet::cpu_funcs, starpu_codelet::cuda_funcs, etc. must be used instead + of the deprecated starpu_codelet::cpu_func, starpu_codelet::cuda_func, etc. + + - the starpu_codelet::where field must be set. + + and additionally, starpu_codelet::checked must be set to 1 to tell StarPU + that the conditions above are properly met. Also, the \ref + STARPU_CODELET_PROFILING environment variable must be set to 0. + An example is provided in tests/main/const_codelet.c +*/ +struct starpu_codelet +{ + /** + Optional field to indicate which types of processing units + are able to execute the codelet. The different values + ::STARPU_CPU, ::STARPU_CUDA, ::STARPU_HIP, ::STARPU_OPENCL can be + combined to specify on which types of processing units the + codelet can be executed. ::STARPU_CPU|::STARPU_CUDA for + instance indicates that the codelet is implemented for both + CPU cores and CUDA devices while ::STARPU_OPENCL indicates + that it is only available on OpenCL devices. If the field + is unset, its value will be automatically set based on the + availability of the XXX_funcs fields defined below. It can + also be set to ::STARPU_NOWHERE to specify that no + computation has to be actually done. + */ + uint32_t where; + + /** + Define a function which should return 1 if the worker + designated by \p workerid can execute the \p nimpl -th + implementation of \p task, 0 otherwise. + */ + int (*can_execute)(unsigned workerid, struct starpu_task *task, unsigned nimpl); + + /** + Optional field to specify the type of the codelet. The + default is ::STARPU_SEQ, i.e. usual sequential + implementation. Other values (::STARPU_SPMD or + ::STARPU_FORKJOIN) declare that a parallel implementation is + also available. See \ref ParallelTasks for details. + */ + enum starpu_codelet_type type; + + /** + Optional field. If a parallel implementation is available, + this denotes the maximum combined worker size that StarPU + will use to execute parallel tasks for this codelet. + */ + int max_parallelism; + + /** + @deprecated + Optional field which has been made deprecated. One should + use instead the field starpu_codelet::cpu_funcs. + */ + starpu_cpu_func_t cpu_func STARPU_DEPRECATED; + + /** + @deprecated + Optional field which has been made deprecated. One should + use instead the starpu_codelet::cuda_funcs field. + */ + starpu_cuda_func_t cuda_func STARPU_DEPRECATED; + + /** + @deprecated + Optional field which has been made deprecated. One should + use instead the starpu_codelet::opencl_funcs field. + */ + starpu_opencl_func_t opencl_func STARPU_DEPRECATED; + + /** + Optional array of function pointers to the CPU + implementations of the codelet. The functions prototype + must be: + \code{.c} + void cpu_func(void *buffers[], void *cl_arg) + \endcode + The first argument being the array of data managed by the + data management library, and the second argument is a + pointer to the argument passed from the field + starpu_task::cl_arg. If the field starpu_codelet::where is + set, then the field tarpu_codelet::cpu_funcs is ignored if + ::STARPU_CPU does not appear in the field + starpu_codelet::where, it must be non-NULL otherwise. + */ + starpu_cpu_func_t cpu_funcs[STARPU_MAXIMPLEMENTATIONS]; + + /** + Optional array of function pointers to the CUDA + implementations of the codelet. The functions must be + host-functions written in the CUDA runtime API. Their + prototype must be: + \code{.c} + void cuda_func(void *buffers[], void *cl_arg) + \endcode + If the field starpu_codelet::where is set, then the field + starpu_codelet::cuda_funcs is ignored if ::STARPU_CUDA does + not appear in the field starpu_codelet::where, it must be + non-NULL otherwise. + */ + starpu_cuda_func_t cuda_funcs[STARPU_MAXIMPLEMENTATIONS]; + + /** + Optional array of flags for CUDA execution. They specify + some semantic details about CUDA kernel execution, such as + asynchronous execution. + */ + char cuda_flags[STARPU_MAXIMPLEMENTATIONS]; + + /** + Optional array of function pointers to the HIP + implementations of the codelet. The functions must be + host-functions written in the HIP runtime API. Their + prototype must be: + \code{.c} + void hip_func(void *buffers[], void *cl_arg) + \endcode + If the field starpu_codelet::where is set, then the field + starpu_codelet::hip_funcs is ignored if ::STARPU_HIP does + not appear in the field starpu_codelet::where, it must be + non-NULL otherwise. + */ + starpu_hip_func_t hip_funcs[STARPU_MAXIMPLEMENTATIONS]; + + /** + Optional array of flags for HIP execution. They specify + some semantic details about HIP kernel execution, such as + asynchronous execution. + */ + char hip_flags[STARPU_MAXIMPLEMENTATIONS]; + + /** + Optional array of function pointers to the OpenCL + implementations of the codelet. The functions prototype + must be: + \code{.c} + void opencl_func(void *buffers[], void *cl_arg) + \endcode + If the field starpu_codelet::where field is set, then the + field starpu_codelet::opencl_funcs is ignored if + ::STARPU_OPENCL does not appear in the field + starpu_codelet::where, it must be non-NULL otherwise. + */ + starpu_opencl_func_t opencl_funcs[STARPU_MAXIMPLEMENTATIONS]; + + /** + Optional array of flags for OpenCL execution. They specify + some semantic details about OpenCL kernel execution, such + as asynchronous execution. + */ + char opencl_flags[STARPU_MAXIMPLEMENTATIONS]; + + /** + Optional array of function pointers to the Maxeler FPGA + implementations of the codelet. The functions prototype + must be: + \code{.c} + void fpga_func(void *buffers[], void *cl_arg) + \endcode + The first argument being the array of data managed by the + data management library, and the second argument is a + pointer to the argument passed from the field + starpu_task::cl_arg. If the field starpu_codelet::where is + set, then the field starpu_codelet::max_fpga_funcs is ignored if + ::STARPU_MAX_FPGA does not appear in the field + starpu_codelet::where, it must be non-NULL otherwise. + */ + starpu_max_fpga_func_t max_fpga_funcs[STARPU_MAXIMPLEMENTATIONS]; + + /** + Optional array of strings which provide the name of the CPU + functions referenced in the array + starpu_codelet::cpu_funcs. This can be used when running on + MPI MS devices for StarPU to simply look + up the MPI MS function implementation through its name. + */ + const char *cpu_funcs_name[STARPU_MAXIMPLEMENTATIONS]; + + /** + Optional function to decide if the task is to be + transformed into a bubble + */ + starpu_bubble_func_t bubble_func; + + /** + Optional function to transform the task into a new graph + */ + starpu_bubble_gen_dag_func_t bubble_gen_dag_func; + + /** + Specify the number of arguments taken by the codelet. These + arguments are managed by the DSM and are accessed from the + void *buffers[] array. The constant argument passed + with the field starpu_task::cl_arg is not counted in this + number. This value should not be above \ref + STARPU_NMAXBUFS. It may be set to \ref + STARPU_VARIABLE_NBUFFERS to specify that the number of + buffers and their access modes will be set in + starpu_task::nbuffers and starpu_task::modes or + starpu_task::dyn_modes, which thus permits to define + codelets with a varying number of data. + */ + int nbuffers; + + /** + Is an array of ::starpu_data_access_mode. It describes the + required access modes to the data needed by the codelet + (e.g. ::STARPU_RW). The number of entries in this array + must be specified in the field starpu_codelet::nbuffers, + and should not exceed \ref STARPU_NMAXBUFS. If + insufficient, this value can be set with the configure + option \ref enable-maxbuffers "--enable-maxbuffers". + */ + enum starpu_data_access_mode modes[STARPU_NMAXBUFS]; + + /** + Is an array of ::starpu_data_access_mode. It describes the + required access modes to the data needed by the codelet + (e.g. ::STARPU_RW). The number of entries in this array + must be specified in the field starpu_codelet::nbuffers. + This field should be used for codelets having a number of + data greater than \ref STARPU_NMAXBUFS (see \ref + SettingManyDataHandlesForATask). When defining a codelet, + one should either define this field or the field + starpu_codelet::modes defined above. + */ + enum starpu_data_access_mode *dyn_modes; + + /** + Default value is 0. If this flag is set, StarPU will not + systematically send all data to the memory node where the + task will be executing, it will read the + starpu_codelet::nodes or starpu_codelet::dyn_nodes array to + determine, for each data, on which memory node to send it. + */ + unsigned specific_nodes; + + /** + Optional field. When starpu_codelet::specific_nodes is 1, + this specifies the memory nodes where each data should be + sent to for task execution. This can be a specific memory + node (>= 0), or any of ::STARPU_SPECIFIC_NODE_LOCAL, + ::STARPU_SPECIFIC_NODE_CPU, ::STARPU_SPECIFIC_NODE_SLOW, + :STARPU_SPECIFIC_NODE_FASTSTARPU_SPECIFIC_NODE_FAST, + ::STARPU_SPECIFIC_NODE_LOCAL_OR_CPU, + ::STARPU_SPECIFIC_NODE_NONE. + + The number of entries in this + array is starpu_codelet::nbuffers, and should not exceed + \ref STARPU_NMAXBUFS. + */ + int nodes[STARPU_NMAXBUFS]; + + /** + Optional field. When starpu_codelet::specific_nodes is 1, + this specifies the memory nodes where each data should be + sent to for task execution. The number of entries in this + array is starpu_codelet::nbuffers. This field should be + used for codelets having a number of data greater than + \ref STARPU_NMAXBUFS (see \ref + SettingManyDataHandlesForATask). When defining a codelet, + one should either define this field or the field + starpu_codelet::nodes defined above. + */ + int *dyn_nodes; + + /** + Optional pointer to the task duration performance model + associated to this codelet. This optional field is ignored + when set to NULL or when its field + starpu_perfmodel::symbol is not set. + */ + struct starpu_perfmodel *model; + + /** + Optional pointer to the task energy consumption performance + model associated to this codelet (in J). This optional field is + ignored when set to NULL or when its field + starpu_perfmodel::symbol is not set. In the case of + parallel codelets, this has to account for all processing + units involved in the parallel execution. + */ + struct starpu_perfmodel *energy_model; + + /** + Optional array for statistics collected at runtime: this is + filled by StarPU and should not be accessed directly, but + for example by calling the function + starpu_codelet_display_stats() (See + starpu_codelet_display_stats() for details). + */ + unsigned long per_worker_stats[STARPU_NMAXWORKERS]; + + /** + Optional name of the codelet. This can be useful for + debugging purposes. + */ + const char *name; + + /** + Optional color of the codelet. This can be useful for + debugging purposes. Value 0 acts like if this field wasn't specified. + Color representation is hex triplet (for example: 0xff0000 is red, + 0x0000ff is blue, 0xffa500 is orange, ...). + */ + unsigned color; + + /** + Optional field, the default value is NULL. This is a + function pointer of prototype void (*f)(void *) + which specifies a possible callback. If this pointer is + non-NULL, the callback function is executed on the + host after the execution of the task. If the task defines a + callback, the codelet callback is not called, unless called + within the task callback function. + The callback is passed the value contained in the + starpu_task::callback_arg field. No callback is executed if + the field is set to NULL. + */ + void (*callback_func)(void *); + + /** + Various flags for the codelet. + */ + int flags; + + struct starpu_perf_counter_sample *perf_counter_sample; + struct starpu_perf_counter_sample_cl_values *perf_counter_values; + + /** + Whether _starpu_codelet_check_deprecated_fields was already done or not. + */ + int checked; +}; + +/** + Codelet with empty function defined for all drivers +*/ +extern struct starpu_codelet starpu_codelet_nop; + +/** + Describe a data handle along with an access mode. +*/ +struct starpu_data_descr +{ + starpu_data_handle_t handle; /**< data */ + enum starpu_data_access_mode mode; /**< access mode */ +}; + +/** + Describe a task that can be offloaded on the various processing + units managed by StarPU. It instantiates a codelet. It can either + be allocated dynamically with the function starpu_task_create(), or + declared statically. In the latter case, the programmer has to zero + the structure starpu_task and to fill the different fields + properly. The indicated default values correspond to the + configuration of a task allocated with starpu_task_create(). +*/ +struct starpu_task +{ + /** + Optional name of the task. This can be useful for debugging + purposes. + + With starpu_task_insert() and alike this can be specified thanks to + ::STARPU_NAME followed by the const char *. + */ + const char *name; + + /** + Optional file name where the task was submitted. This can be useful + for debugging purposes. + */ + const char *file; + + /** + Optional line number where the task was submitted. This can be useful + for debugging purposes. + */ + int line; + + /** + Pointer to the corresponding structure starpu_codelet. This + describes where the kernel should be executed, and supplies + the appropriate implementations. When set to NULL, + no code is executed during the tasks, such empty tasks can + be useful for synchronization purposes. + */ + struct starpu_codelet *cl; + + /** + When set, specify where the task is allowed to be executed. + When unset, take the value of starpu_codelet::where. + + With starpu_task_insert() and alike this can be specified thanks to + ::STARPU_EXECUTE_WHERE followed by an unsigned long long. + */ + int32_t where; + + /** + Specify the number of buffers. This is only used when + starpu_codelet::nbuffers is \ref STARPU_VARIABLE_NBUFFERS. + + With starpu_task_insert() and alike this is automatically computed + when using ::STARPU_DATA_ARRAY and alike. + */ + int nbuffers; + + /** + Keep dyn_handles, dyn_interfaces and dyn_modes before the + equivalent static arrays, so we can detect dyn_handles + being NULL while nbuffers being bigger that STARPU_NMAXBUFS + (otherwise the overflow would put a non-NULL) + */ + + /** + Array of ::starpu_data_handle_t. Specify the handles to the + different pieces of data accessed by the task. The number + of entries in this array must be specified in the field + starpu_codelet::nbuffers. This field should be used for + tasks having a number of data greater than \ref + STARPU_NMAXBUFS (see \ref SettingManyDataHandlesForATask). + When defining a task, one should either define this field + or the field starpu_task::handles defined below. + + With starpu_task_insert() and alike this is automatically filled + when using ::STARPU_DATA_ARRAY and alike. + */ + starpu_data_handle_t *dyn_handles; + + /** + Array of data pointers to the memory node where execution + will happen, managed by the DSM. Is used when the field + starpu_task::dyn_handles is defined. + + This is filled by StarPU. + */ + void **dyn_interfaces; + + /** + Used only when starpu_codelet::nbuffers is \ref + STARPU_VARIABLE_NBUFFERS. + Array of ::starpu_data_access_mode which describes the + required access modes to the data needed by the codelet + (e.g. ::STARPU_RW). The number of entries in this array + must be specified in the field starpu_codelet::nbuffers. + This field should be used for codelets having a number of + data greater than \ref STARPU_NMAXBUFS (see \ref + SettingManyDataHandlesForATask). + When defining a codelet, one should either define this + field or the field starpu_task::modes defined below. + + With starpu_task_insert() and alike this is automatically filled + when using ::STARPU_DATA_MODE_ARRAY and alike. + */ + enum starpu_data_access_mode *dyn_modes; + + /** + Array of ::starpu_data_handle_t. Specify the handles to the + different pieces of data accessed by the task. The number + of entries in this array must be specified in the field + starpu_codelet::nbuffers, and should not exceed + \ref STARPU_NMAXBUFS. If insufficient, this value can be + set with the configure option \ref enable-maxbuffers + "--enable-maxbuffers". + + With starpu_task_insert() and alike this is automatically filled + when using ::STARPU_R and alike. + */ + starpu_data_handle_t handles[STARPU_NMAXBUFS]; + + /** + Array of Data pointers to the memory node where execution + will happen, managed by the DSM. + + This is filled by StarPU. + */ + void *interfaces[STARPU_NMAXBUFS]; + + /** + Used only when starpu_codelet::nbuffers is \ref + STARPU_VARIABLE_NBUFFERS. + Array of ::starpu_data_access_mode which describes the + required access modes to the data needed by the codelet + (e.g. ::STARPU_RW). The number of entries in this array + must be specified in the field starpu_task::nbuffers, and + should not exceed \ref STARPU_NMAXBUFS. If insufficient, + this value can be set with the configure option + \ref enable-maxbuffers "--enable-maxbuffers". + + With starpu_task_insert() and alike this is automatically filled + when using ::STARPU_DATA_MODE_ARRAY and alike. + */ + enum starpu_data_access_mode modes[STARPU_NMAXBUFS]; + + /** + Optional pointer to an array of characters which allows to + define the sequential consistency for each handle for the + current task. + + With starpu_task_insert() and alike this can be specified thanks to + ::STARPU_HANDLES_SEQUENTIAL_CONSISTENCY followed by an unsigned char * + */ + unsigned char *handles_sequential_consistency; + + /** + Optional pointer which is passed to the codelet through the + second argument of the codelet implementation (e.g. + starpu_codelet::cpu_func or starpu_codelet::cuda_func). The + default value is NULL. + + Note that the pointer is passed unchanged to most drivers, so the + application has to ensure the liveness of the pointed data, by using + static memory or dynamic allocation (starpu_task::cl_arg_free can be + used for convenience in that case). + + For the master/slave drivers however, the content pointed by cl_arg + is copied to the slave, so the size of the data must be set in + starpu_task::cl_arg_size. + + starpu_codelet_pack_args() and starpu_codelet_unpack_args() are + helpers that can can be used to respectively pack and unpack data + into and from it and update starpu_task::cl_arg_size accordingly. + + With starpu_task_insert() and alike this can be specified thanks to + ::STARPU_CL_ARGS followed by a void* and a size_t. + */ + void *cl_arg; + /** + Optional field. For some specific drivers, the pointer + starpu_task::cl_arg cannot not be directly given to the + driver function. A buffer of size starpu_task::cl_arg_size + needs to be allocated on the driver. This buffer is then + filled with the starpu_task::cl_arg_size bytes starting at + address starpu_task::cl_arg. In this case, the argument + given to the codelet is therefore not the + starpu_task::cl_arg pointer, but the address of the buffer + in local store (LS) instead. This field is ignored for CPU, + CUDA and OpenCL codelets, where the starpu_task::cl_arg + pointer is given as such. + + With starpu_task_insert() and alike this can be specified thanks to + ::STARPU_CL_ARGS followed by a void* and a size_t. + */ + size_t cl_arg_size; + + /** + Optional pointer which points to the return value of submitted task. + The default value is NULL. starpu_codelet_pack_arg() + and starpu_codelet_unpack_arg() can be used to respectively + pack and unpack the return value into and form it. starpu_task::cl_ret + can be used for MPI support. The only requirement is that + the size of the return value must be set in starpu_task::cl_ret_size . + */ + void *cl_ret; + + /** + Optional field. The buffer of starpu_codelet_pack_arg() + and starpu_codelet_unpack_arg() can be allocated with + the starpu_task::cl_ret_size bytes starting at address starpu_task::cl_ret. + starpu_task::cl_ret_size can be used for MPI support. + */ + size_t cl_ret_size; + + /** + Optional field, the default value is NULL. This is a + function pointer of prototype void (*f)(void *) which + specifies a possible callback. If this pointer is non-NULL, + the callback function is executed on the host after the execution of + the task. Contrary to starpu_task::callback_func, it is called + before releasing tasks which depend on this task, so those cannot be + already executing. The callback is passed + the value contained in the starpu_task::epilogue_callback_arg field. + No callback is executed if the field is set to NULL. + + With starpu_task_insert() and alike this can be specified thanks to + ::STARPU_EPILOGUE_CALLBACK followed by the function pointer. + */ + void (*epilogue_callback_func)(void *); + + /** + Optional field, the default value is NULL. This is + the pointer passed to the epilogue callback function. This field is + ignored if the field starpu_task::epilogue_callback_func is set to + NULL. + */ + void *epilogue_callback_arg; + + /** + Optional field, the default value is NULL. This is a + function pointer of prototype void (*f)(void *) + which specifies a possible callback. If this pointer is + non-NULL, the callback function is executed on the + host after the execution of the task. Contrary to + starpu_task::epilogue_callback, it is called after releasing + tasks which depend on this task, so those + might already be executing. The callback is passed the + value contained in the starpu_task::callback_arg field. No + callback is executed if the field is set to NULL. + + With starpu_task_insert() and alike this can be specified thanks to + ::STARPU_CALLBACK followed by the function pointer, or thanks to + ::STARPU_CALLBACK_WITH_ARG (or + ::STARPU_CALLBACK_WITH_ARG_NFREE) followed by the function + pointer and the argument. + */ + void (*callback_func)(void *); + + /** + Optional field, the default value is NULL. This is + the pointer passed to the callback function. This field is + ignored if the field starpu_task::callback_func is set to + NULL. + + With starpu_task_insert() and alike this can be specified thanks to + ::STARPU_CALLBACK_ARG followed by the argument pointer, or thanks to + ::STARPU_CALLBACK_WITH_ARG or + ::STARPU_CALLBACK_WITH_ARG_NFREE followed by the function + pointer and the argument. + */ + void *callback_arg; + + /** + Optional field, the default value is NULL. This is a + function pointer of prototype void (*f)(void *) + which specifies a possible callback. If this pointer is + non-NULL, the callback function is executed on the + host when the task becomes ready for execution, before + getting scheduled. The callback is passed the value + contained in the starpu_task::prologue_callback_arg field. + No callback is executed if the field is set to NULL. + + With starpu_task_insert() and alike this can be specified thanks to + ::STARPU_PROLOGUE_CALLBACK followed by the function pointer. + */ + void (*prologue_callback_func)(void *); + + /** + Optional field, the default value is NULL. This is + the pointer passed to the prologue callback function. This + field is ignored if the field + starpu_task::prologue_callback_func is set to NULL. + + With starpu_task_insert() and alike this can be specified thanks to + ::STARPU_PROLOGUE_CALLBACK_ARG followed by the argument + */ + void *prologue_callback_arg; + + /** + Optional field, the default value is NULL. This is a + function pointer of prototype void (*f)(void*) + which specifies a possible callback. If this pointer is + non-NULL, the callback function is executed on the host + when the task is pop-ed from the scheduler, just before getting + executed. The callback is passed the value contained in the + starpu_task::prologue_callback_pop_arg field. + No callback is executed if the field is set to NULL. + + With starpu_task_insert() and alike this can be specified thanks to + ::STARPU_PROLOGUE_CALLBACK_POP followed by the function pointer. + */ + void (*prologue_callback_pop_func)(void *); + + /** + Optional field, the default value is NULL. This is + the pointer passed to the prologue_callback_pop function. This + field is ignored if the field + starpu_task::prologue_callback_pop_func is set to NULL. + + With starpu_task_insert() and alike this can be specified thanks to + ::STARPU_PROLOGUE_CALLBACK_POP_ARG followed by the argument. + */ + void *prologue_callback_pop_arg; + + /** + Transaction to which the task belongs, if any + */ + struct starpu_transaction *transaction; + + /** + Transaction epoch to which the task belongs, if any + */ + starpu_trs_epoch_t trs_epoch; + + /** + Optional field. Contain the tag associated to the task if + the field starpu_task::use_tag is set, ignored + otherwise. + + With starpu_task_insert() and alike this can be specified thanks to + ::STARPU_TAG followed by a starpu_tag_t. + */ + starpu_tag_t tag_id; + + /** + Optional field. In case starpu_task::cl_arg was allocated + by the application through malloc(), setting + starpu_task::cl_arg_free to 1 makes StarPU automatically + call free(cl_arg) when destroying the task. This + saves the user from defining a callback just for that. + + With starpu_task_insert() and alike this is set to 1 when using + ::STARPU_CL_ARGS. + */ + unsigned cl_arg_free : 1; + + /** + Optional field. In case starpu_task::cl_ret was allocated + by the application through malloc(), setting + starpu_task::cl_ret_free to 1 makes StarPU automatically + call free(cl_ret) when destroying the task. + */ + unsigned cl_ret_free : 1; + + /** + Optional field. In case starpu_task::callback_arg was + allocated by the application through malloc(), + setting starpu_task::callback_arg_free to 1 makes StarPU + automatically call free(callback_arg) when + destroying the task. + + With starpu_task_insert() and alike, this is set to 1 when using + ::STARPU_CALLBACK_ARG or ::STARPU_CALLBACK_WITH_ARG, or set + to 0 when using ::STARPU_CALLBACK_ARG_NFREE + */ + unsigned callback_arg_free : 1; + + /** + Optional field. In case starpu_task::epilogue_callback_arg was + allocated by the application through malloc(), + setting starpu_task::epilogue_callback_arg_free to 1 makes StarPU + automatically call free(epilogue_callback_arg) when + destroying the task. + */ + unsigned epilogue_callback_arg_free : 1; + + /** + Optional field. In case starpu_task::prologue_callback_arg + was allocated by the application through malloc(), + setting starpu_task::prologue_callback_arg_free to 1 makes + StarPU automatically call + free(prologue_callback_arg) when destroying the task. + + With starpu_task_insert() and alike this is set to 1 when using + ::STARPU_PROLOGUE_CALLBACK_ARG, or set to 0 when using + ::STARPU_PROLOGUE_CALLBACK_ARG_NFREE + */ + unsigned prologue_callback_arg_free : 1; + + /** + Optional field. In case starpu_task::prologue_callback_pop_arg + was allocated by the application through malloc(), + setting starpu_task::prologue_callback_pop_arg_free to 1 makes + StarPU automatically call + free(prologue_callback_pop_arg) when destroying the + task. + + With starpu_task_insert() and alike this is set to 1 when using + ::STARPU_PROLOGUE_CALLBACK_POP_ARG, or set to 0 when using + ::STARPU_PROLOGUE_CALLBACK_POP_ARG_NFREE + */ + unsigned prologue_callback_pop_arg_free : 1; + + /** + Optional field, the default value is 0. If set, this flag + indicates that the task should be associated with the tag + contained in the starpu_task::tag_id field. Tag allow the + application to synchronize with the task and to express + task dependencies easily. + + With starpu_task_insert() and alike this is set to 1 when using + ::STARPU_TAG. + */ + unsigned use_tag : 1; + + /** + If this flag is set (which is the default), sequential + consistency is enforced for the data parameters of this + task for which sequential consistency is enabled. Clearing + this flag permits to disable sequential consistency for + this task, even if data have it enabled. + + With starpu_task_insert() and alike this can be specified thanks to + ::STARPU_SEQUENTIAL_CONSISTENCY followed by an unsigned. + */ + unsigned sequential_consistency : 1; + + /** + If this flag is set, the function starpu_task_submit() is + blocking and returns only when the task has been executed + (or if no worker is able to process the task). Otherwise, + starpu_task_submit() returns immediately. + + With starpu_task_insert() and alike this can be specified thanks to + ::STARPU_TASK_SYNCHRONOUS followed an int. + */ + unsigned synchronous : 1; + + /** + Default value is 0. If this flag is set, StarPU will bypass + the scheduler and directly affect this task to the worker + specified by the field starpu_task::workerid. + + With starpu_task_insert() and alike this is set to 1 when using + ::STARPU_EXECUTE_ON_WORKER. + */ + unsigned execute_on_a_specific_worker : 1; + + /** + Optional field, default value is 1. If this flag is set, it + is not possible to synchronize with the task by the means + of starpu_task_wait() later on. Internal data structures + are only guaranteed to be freed once starpu_task_wait() is + called if the flag is not set. + + With starpu_task_insert() and alike this is set to 1. + */ + unsigned detach : 1; + + /** + Optional value. Default value is 0 for starpu_task_init(), + and 1 for starpu_task_create(). If this flag is set, the + task structure will automatically be freed, either after + the execution of the callback if the task is detached, or + during starpu_task_wait() otherwise. If this flag is not + set, dynamically allocated data structures will not be + freed until starpu_task_destroy() is called explicitly. + Setting this flag for a statically allocated task structure + will result in undefined behaviour. The flag is set to 1 + when the task is created by calling starpu_task_create(). + Note that starpu_task_wait_for_all() will not free any task. + + With starpu_task_insert() and alike this is set to 1. + + Calling starpu_task_set_destroy() can be used to set this field to 1 after submission. + Indeed this function will manage concurrency against the termination of the task. + */ + unsigned destroy : 1; + + /** + Optional field. If this flag is set, the task will be + re-submitted to StarPU once it has been executed. This flag + must not be set if the flag starpu_task::destroy is set. + This flag must be set before making another task depend on + this one. + + With starpu_task_insert() and alike this is set to 0. + */ + unsigned regenerate : 1; + + /** + do not allocate a submitorder id for this task + + With starpu_task_insert() and alike this can be specified + thanks to ::STARPU_TASK_NO_SUBMITORDER followed by + an unsigned. + */ + unsigned no_submitorder : 1; + + /** + @private + This is only used for tasks that use multiformat handle. + This should only be used by StarPU. + */ + unsigned char mf_skip; + + /** + Whether this task has failed and will thus have to be retried + + Set by StarPU. + */ + unsigned char failed; + + /** + Whether the scheduler has pushed the task on some queue + + Set by StarPU. + */ + unsigned char scheduled; + + /** + Whether the scheduler has prefetched the task's data + + Set by StarPU. + */ + unsigned char prefetched; + + /** + Optional field. If the field + starpu_task::execute_on_a_specific_worker is set, this + field indicates the identifier of the worker that should + process this task (as returned by starpu_worker_get_id()). + This field is ignored if the field + starpu_task::execute_on_a_specific_worker is set to 0. + + With starpu_task_insert() and alike this can be specified thanks to + ::STARPU_EXECUTE_ON_WORKER followed by an int. + */ + unsigned workerid; + + /** + Optional field. If the field + starpu_task::execute_on_a_specific_worker is set, this + field indicates the per-worker consecutive order in which + tasks should be executed on the worker. Tasks will be + executed in consecutive starpu_task::workerorder values, + thus ignoring the availability order or task priority. See + \ref StaticScheduling for more details. This field is + ignored if the field + starpu_task::execute_on_a_specific_worker is set to 0. + + With starpu_task_insert() and alike this can be specified thanks to + ::STARPU_WORKER_ORDER followed by an unsigned. + */ + unsigned workerorder; + + /** + Optional field. If the field starpu_task::workerids_len is + different from 0, this field indicates an array of bits + (stored as uint32_t values) which indicate the set of + workers which are allowed to execute the task. + starpu_task::workerid takes precedence over this. + + With starpu_task_insert() and alike, this can be specified + along the field workerids_len thanks to ::STARPU_TASK_WORKERIDS + followed by a number of workers and an array of bits which + size is the number of workers. + */ + uint32_t *workerids; + + /** + Optional field. This provides the number of uint32_t values + in the starpu_task::workerids array. + + With starpu_task_insert() and alike, this can be specified + along the field workerids thanks to ::STARPU_TASK_WORKERIDS + followed by a number of workers and an array of bits which + size is the number of workers. + */ + unsigned workerids_len; + + /** + Optional field, the default value is ::STARPU_DEFAULT_PRIO. + This field indicates a level of priority for the task. This + is an integer value that must be set between the return + values of the function starpu_sched_get_min_priority() for + the least important tasks, and that of the function + starpu_sched_get_max_priority() for the most important + tasks (included). The ::STARPU_MIN_PRIO and + ::STARPU_MAX_PRIO macros are provided for convenience and + respectively return the value of + starpu_sched_get_min_priority() and + starpu_sched_get_max_priority(). Default priority is + ::STARPU_DEFAULT_PRIO, which is always defined as 0 in + order to allow static task initialization. Scheduling + strategies that take priorities into account can use this + parameter to take better scheduling decisions, but the + scheduling policy may also ignore it. + + With starpu_task_insert() and alike this can be specified thanks to + ::STARPU_PRIORITY followed by an unsigned long long. + */ + int priority; + + /** + Current state of the task. + + Call starpu_task_status_get_as_string() to get the status as a string. + + Set by StarPU. + */ + enum starpu_task_status status; + + /** + @private + This field is set when initializing a task. The function + starpu_task_submit() will fail if the field does not have + the correct value. This will hence avoid submitting tasks + which have not been properly initialised. + */ + int magic; + + /** + Allow to get the type of task, for filtering out tasks + in profiling outputs, whether it is really internal to + StarPU (::STARPU_TASK_TYPE_INTERNAL), a data acquisition + synchronization task (::STARPU_TASK_TYPE_DATA_ACQUIRE), or + a normal task (::STARPU_TASK_TYPE_NORMAL) + + Set by StarPU. + */ + unsigned type; + + /** + color of the task to be used in dag.dot. + + With starpu_task_insert() and alike this can be specified thanks to + ::STARPU_TASK_COLOR followed by an int. + */ + unsigned color; + + /** + Scheduling context. + + With starpu_task_insert() and alike this can be specified thanks to + ::STARPU_SCHED_CTX followed by an unsigned. + */ + unsigned sched_ctx; + + /** + Help the hypervisor monitor the execution of this task. + + With starpu_task_insert() and alike this can be specified thanks to + ::STARPU_HYPERVISOR_TAG followed by an int. + */ + int hypervisor_tag; + + /** + TODO: related with sched contexts and parallel tasks + + With starpu_task_insert() and alike this can be specified thanks to + ::STARPU_POSSIBLY_PARALLEL followed by an unsigned. + */ + unsigned possibly_parallel; + + /** + Optional field. The bundle that includes this task. If no + bundle is used, this should be NULL. + */ + starpu_task_bundle_t bundle; + + /** + Optional field. Profiling information for the task. + + With starpu_task_insert() and alike this can be specified thanks to + ::STARPU_TASK_PROFILING_INFO followed by a pointer to the + appropriate struct. + */ + struct starpu_profiling_task_info *profiling_info; + + /** + The application can set this to the number of floating points + operations that the task will have to achieve. StarPU will measure + the time that the task takes, and divide the two to get the GFlop/s + achieved by the task. This will allow getting GFlops/s curves + from the tool starpu_perfmodel_plot, and is useful for the + hypervisor load balancing. + + With starpu_task_insert() and alike this can be specified thanks to + ::STARPU_FLOPS followed by a double. + */ + + double flops; + /** + Output field. Predicted duration of the task in microseconds. This field is + only set if the scheduling strategy uses performance + models. + + Set by StarPU. + */ + double predicted; + + /** + Output field. Predicted data transfer duration for the task in + microseconds. This field is only valid if the scheduling + strategy uses performance models. + + Set by StarPU. + */ + double predicted_transfer; + double predicted_start; + + /** + @private + A pointer to the previous task. This should only be used by + StarPU schedulers. + */ + struct starpu_task *prev; + + /** + @private + A pointer to the next task. This should only be used by + StarPU schedulers. + */ + struct starpu_task *next; + + /** + @private + This is private to StarPU, do not modify. + */ + void *starpu_private; + +#ifdef STARPU_OPENMP + /** + @private + This is private to StarPU, do not modify. + */ + struct starpu_omp_task *omp_task; +#else + void *omp_task; +#endif + + /** + When using hierarchical dags, the job identifier of the + bubble task which created the current task + */ + unsigned long bubble_parent; + + /** + When using hierarchical dags, a pointer to the bubble + decision function + */ + starpu_bubble_func_t bubble_func; + + /** + When using hierarchical dags, a pointer to an argument to + be given when calling the bubble decision function + */ + void *bubble_func_arg; + + /** + When using hierarchical dags, a pointer to the bubble + DAG generation function + */ + starpu_bubble_gen_dag_func_t bubble_gen_dag_func; + + /** + When using hierarchical dags, a pointer to an argument to + be given when calling the bubble DAG generation function + */ + void *bubble_gen_dag_func_arg; + + /** + @private + This is private to StarPU, do not modify. + */ + unsigned nb_termination_call_required; + + /** + This field is managed by the scheduler, is it allowed to do + whatever with it. Typically, some area would be allocated on push, and released on pop. + + With starpu_task_insert() and alike this is set when using + ::STARPU_TASK_SCHED_DATA. + */ + void *sched_data; +}; + +/** + To be used in the starpu_task::type field, for normal application tasks. +*/ +#define STARPU_TASK_TYPE_NORMAL 0 + +/** + To be used in the starpu_task::type field, for StarPU-internal tasks. +*/ +#define STARPU_TASK_TYPE_INTERNAL (1 << 0) + +/** + To be used in the starpu_task::type field, for StarPU-internal data acquisition tasks. +*/ +#define STARPU_TASK_TYPE_DATA_ACQUIRE (1 << 1) + +/* Note: remember to update starpu_task_init and starpu_task_ft_create_retry + * as well */ +/** + Value to be used to initialize statically allocated tasks. This is + equivalent to initializing a structure starpu_task + with the function starpu_task_init(). +*/ +#define STARPU_TASK_INITIALIZER \ + { \ + .cl = NULL, \ + .where = -1, \ + .cl_arg = NULL, \ + .cl_arg_size = 0, \ + .cl_ret = NULL, \ + .cl_ret_size = 0, \ + .callback_func = NULL, \ + .callback_arg = NULL, \ + .epilogue_callback_func = NULL, \ + .epilogue_callback_arg = NULL, \ + .priority = STARPU_DEFAULT_PRIO, \ + .use_tag = 0, \ + .sequential_consistency = 1, \ + .synchronous = 0, \ + .execute_on_a_specific_worker = 0, \ + .workerorder = 0, \ + .bundle = NULL, \ + .detach = 1, \ + .destroy = 0, \ + .regenerate = 0, \ + .status = STARPU_TASK_INIT, \ + .profiling_info = NULL, \ + .predicted = NAN, \ + .predicted_transfer = NAN, \ + .predicted_start = NAN, \ + .starpu_private = NULL, \ + .magic = 42, \ + .type = 0, \ + .color = 0, \ + .sched_ctx = STARPU_NMAX_SCHED_CTXS, \ + .hypervisor_tag = 0, \ + .flops = 0.0, \ + .scheduled = 0, \ + .prefetched = 0, \ + .dyn_handles = NULL, \ + .dyn_interfaces = NULL, \ + .dyn_modes = NULL, \ + .name = NULL, \ + .possibly_parallel = 0 \ + } + +/** + Return the number of buffers for \p task, i.e. + starpu_codelet::nbuffers, or starpu_task::nbuffers if the former is + \ref STARPU_VARIABLE_NBUFFERS. +*/ +#define STARPU_TASK_GET_NBUFFERS(task) ((unsigned)((task)->cl->nbuffers == STARPU_VARIABLE_NBUFFERS ? ((task)->nbuffers) : ((task)->cl->nbuffers))) + +/** + Return the \p i -th data handle of \p task. If \p task is defined + with a static or dynamic number of handles, will either return the + \p i -th element of the field starpu_task::handles or the \p i -th + element of the field starpu_task::dyn_handles (see \ref + SettingManyDataHandlesForATask) +*/ +#define STARPU_TASK_GET_HANDLE(task, i) (((task)->dyn_handles) ? (task)->dyn_handles[i] : (task)->handles[i]) + +/** + Return all the data handles of \p task. If \p task is defined + with a static or dynamic number of handles, will either return all + the element of the field starpu_task::handles or all the elements + of the field starpu_task::dyn_handles (see \ref SettingManyDataHandlesForATask) +*/ +#define STARPU_TASK_GET_HANDLES(task) (((task)->dyn_handles) ? (task)->dyn_handles : (task)->handles) + +/** + Set the \p i -th data handle of \p task with \p handle. If \p task + is defined with a static or dynamic number of handles, will either + set the \p i -th element of the field starpu_task::handles or the + \p i -th element of the field starpu_task::dyn_handles + (see \ref SettingManyDataHandlesForATask) +*/ +#define STARPU_TASK_SET_HANDLE(task, handle, i) \ + do { \ + if ((task)->dyn_handles) \ + (task)->dyn_handles[i] = handle; \ + else \ + (task)->handles[i] = handle; \ + } \ + while (0) + +/** + Return the access mode of the \p i -th data handle of \p codelet. + If \p codelet is defined with a static or dynamic number of + handles, will either return the \p i -th element of the field + starpu_codelet::modes or the \p i -th element of the field + starpu_codelet::dyn_modes (see \ref SettingManyDataHandlesForATask) +*/ +#define STARPU_CODELET_GET_MODE(codelet, i) \ + (((codelet)->dyn_modes) ? (codelet)->dyn_modes[i] : (assert(i < STARPU_NMAXBUFS), (codelet)->modes[i])) + +/** + Set the access mode of the \p i -th data handle of \p codelet. If + \p codelet is defined with a static or dynamic number of handles, + will either set the \p i -th element of the field + starpu_codelet::modes or the \p i -th element of the field + starpu_codelet::dyn_modes (see \ref SettingManyDataHandlesForATask) +*/ +#define STARPU_CODELET_SET_MODE(codelet, mode, i) \ + do { \ + if ((codelet)->dyn_modes) \ + (codelet)->dyn_modes[i] = mode; \ + else \ + (codelet)->modes[i] = mode; \ + } \ + while (0) + +/** + Return the access mode of the \p i -th data handle of \p task. If + \p task is defined with a static or dynamic number of handles, will + either return the \p i -th element of the field starpu_task::modes + or the \p i -th element of the field starpu_task::dyn_modes (see + \ref SettingManyDataHandlesForATask) +*/ +#define STARPU_TASK_GET_MODE(task, i) \ + ((task)->cl->nbuffers == STARPU_VARIABLE_NBUFFERS || (task)->dyn_modes ? (((task)->dyn_modes) ? (task)->dyn_modes[i] : (task)->modes[i]) : STARPU_CODELET_GET_MODE((task)->cl, i)) + +/** + Set the access mode of the \p i -th data handle of \p task. If \p + task is defined with a static or dynamic number of handles, will + either set the \p i -th element of the field starpu_task::modes or + the \p i -th element of the field starpu_task::dyn_modes (see \ref + SettingManyDataHandlesForATask) +*/ +#define STARPU_TASK_SET_MODE(task, mode, i) \ + do { \ + if ((task)->cl->nbuffers == STARPU_VARIABLE_NBUFFERS || (task)->cl->nbuffers > STARPU_NMAXBUFS) \ + if ((task)->dyn_modes) \ + (task)->dyn_modes[i] = mode; \ + else \ + (task)->modes[i] = mode; \ + else \ + { \ + enum starpu_data_access_mode cl_mode = STARPU_CODELET_GET_MODE((task)->cl, i); \ + STARPU_ASSERT_MSG(cl_mode == mode, \ + "Task <%s> can't set its %d-th buffer mode to %d as the codelet it derives from uses %d", \ + (task)->cl->name, i, mode, cl_mode); \ + } \ + } \ + while (0) + +/** + Return the target node of the \p i -th data handle of \p codelet. + If \p node is defined with a static or dynamic number of handles, + will either return the \p i -th element of the field + starpu_codelet::nodes or the \p i -th element of the field + starpu_codelet::dyn_nodes (see \ref SettingManyDataHandlesForATask) +*/ +#define STARPU_CODELET_GET_NODE(codelet, i) (((codelet)->dyn_nodes) ? (codelet)->dyn_nodes[i] : (codelet)->nodes[i]) + +/** + Set the target node of the \p i -th data handle of \p codelet. If + \p codelet is defined with a static or dynamic number of handles, + will either set the \p i -th element of the field + starpu_codelet::nodes or the \p i -th element of the field + starpu_codelet::dyn_nodes (see \ref SettingManyDataHandlesForATask) +*/ +#define STARPU_CODELET_SET_NODE(codelet, __node, i) \ + do { \ + if ((codelet)->dyn_nodes) \ + (codelet)->dyn_nodes[i] = __node; \ + else \ + (codelet)->nodes[i] = __node; \ + } \ + while (0) + +/** + Initialize \p task with default values. This function is implicitly + called by starpu_task_create(). By default, tasks initialized with + starpu_task_init() must be deinitialized explicitly with + starpu_task_clean(). Tasks can also be initialized statically, + using ::STARPU_TASK_INITIALIZER. + See \ref PerformanceModelCalibration for more details. +*/ +void starpu_task_init(struct starpu_task *task); + +/** + Release all the structures automatically allocated to execute \p + task, but not the task structure itself and values set by the user + remain unchanged. It is thus useful for statically allocated tasks + for instance. It is also useful when users want to execute the same + operation several times with as least overhead as possible. It is + called automatically by starpu_task_destroy(). It has to be called + only after explicitly waiting for the task or after + starpu_shutdown() (waiting for the callback is not enough, since + StarPU still manipulates the task after calling the callback). + See \ref PerformanceModelCalibration for more details. +*/ +void starpu_task_clean(struct starpu_task *task); + +/** + Allocate a task structure and initialize it with default values. + Tasks allocated dynamically with starpu_task_create() are + automatically freed when the task is terminated. This means that + the task pointer can not be used any more once the task is + submitted, since it can be executed at any time (unless + dependencies make it wait) and thus freed at any time. If the field + starpu_task::destroy is explicitly unset, the resources used by the + task have to be freed by calling starpu_task_destroy(). + See \ref SubmittingATask for more details. +*/ +struct starpu_task *starpu_task_create(void) STARPU_ATTRIBUTE_MALLOC; + +/** + Allocate a task structure that does nothing but accesses data \p handle + with mode \p mode. This allows to synchronize with the task graph, according + to the sequential consistency, against tasks submitted before or after + submitting this task. One can then use starpu_task_declare_deps_array() or + starpu_task_end_dep_add() / starpu_task_end_dep_release() to add dependencies + against this task before submitting it. + See \ref SynchronizationTasks for more details. + */ +struct starpu_task *starpu_task_create_sync(starpu_data_handle_t handle, enum starpu_data_access_mode mode) STARPU_ATTRIBUTE_MALLOC; + +/** + Free the resource allocated during starpu_task_create() and + associated with \p task. This function is called automatically + after the execution of a task when the field starpu_task::destroy + is set, which is the default for tasks created by + starpu_task_create(). Calling this function on a statically + allocated task results in an undefined behaviour. + See \ref Per-taskFeedback and \ref PerformanceModelExample for more details. +*/ +void starpu_task_destroy(struct starpu_task *task); + +/** + Tell StarPU to free the resources associated with \p task when the task is + over. This is equivalent to having set task->destroy = 1 before submission, + the difference is that this can be called after submission and properly deals + with concurrency with the task execution. + See \ref WaitingForTasks for more details. +*/ +void starpu_task_set_destroy(struct starpu_task *task); + +/** + Submit \p task to StarPU. Calling this function does not mean that + the task will be executed immediately as there can be data or task + (tag) dependencies that are not fulfilled yet: StarPU will take + care of scheduling this task with respect to such dependencies. + This function returns immediately if the field + starpu_task::synchronous is set to 0, and block until the + termination of the task otherwise. It is also possible to + synchronize the application with asynchronous tasks by the means of + tags, using the function starpu_tag_wait() function for instance. + In case of success, this function returns 0, a return value of + -ENODEV means that there is no worker able to process this + task (e.g. there is no GPU available and this task is only + implemented for CUDA devices). starpu_task_submit() can be called + from anywhere, including codelet functions and callbacks, provided + that the field starpu_task::synchronous is set to 0. + See \ref SubmittingATask for more details. +*/ +int starpu_task_submit(struct starpu_task *task) STARPU_WARN_UNUSED_RESULT; + +#ifdef STARPU_USE_FXT +static inline int starpu_task_submit_line(struct starpu_task *task, const char *file, int line) +{ + task->file = file; + task->line = line; + return starpu_task_submit(task); +} +#define starpu_task_submit(task) starpu_task_submit_line((task), __FILE__, __LINE__) +#endif + +/** + Submit \p task to StarPU with dependency bypass. + + This can only be called on behalf of another task which has already taken the + proper dependencies, e.g. this task is just an attempt of doing the actual + computation of that task. + See \ref TaskRetry for more details. +*/ +int starpu_task_submit_nodeps(struct starpu_task *task) STARPU_WARN_UNUSED_RESULT; + +/** + Submit \p task to the context \p sched_ctx_id. By default, + starpu_task_submit() submits the task to a global context that is + created automatically by StarPU. + See \ref SubmittingTasksToAContext for more details. +*/ +int starpu_task_submit_to_ctx(struct starpu_task *task, unsigned sched_ctx_id); + +/** + Return 1 if \p task is terminated. + See \ref WaitingForTasks for more details. +*/ +int starpu_task_finished(struct starpu_task *task) STARPU_WARN_UNUSED_RESULT; + +/** + Block until \p task has been executed. It is not possible to + synchronize with a task more than once. It is not possible to wait + for synchronous or detached tasks. Upon successful completion, this + function returns 0. Otherwise, -EINVAL indicates that the + specified task was either synchronous or detached. + See \ref SubmittingATask for more details. +*/ +int starpu_task_wait(struct starpu_task *task) STARPU_WARN_UNUSED_RESULT; + +/** + Allow to wait for an array of tasks. Upon successful completion, + this function returns 0. Otherwise, -EINVAL indicates that + one of the tasks was either synchronous or detached. + See \ref WaitingForTasks for more details. +*/ +int starpu_task_wait_array(struct starpu_task **tasks, unsigned nb_tasks) STARPU_WARN_UNUSED_RESULT; + +/** + Block until all the tasks that were submitted (to the current + context or the global one if there is no current context) are + terminated. It does not destroy these tasks. + See \ref SubmittingATask for more details. +*/ +int starpu_task_wait_for_all(void); + +/** + Block until there are \p n submitted tasks left (to the current + context or the global one if there is no current context) to be + executed. It does not destroy these tasks. + See \ref HowtoReuseMemory for more details. +*/ +int starpu_task_wait_for_n_submitted(unsigned n); + +/** + Wait until all the tasks that were already submitted to the context + \p sched_ctx_id have been terminated. + See \ref WaitingForTasks for more details. +*/ +int starpu_task_wait_for_all_in_ctx(unsigned sched_ctx_id); + +/** + Wait until there are \p n tasks submitted left to be + executed that were already submitted to the context \p + sched_ctx_id. + See \ref WaitingForTasks for more details. +*/ +int starpu_task_wait_for_n_submitted_in_ctx(unsigned sched_ctx_id, unsigned n); + +/** + Wait until there is no more ready task. + See \ref WaitingForTasks for more details. +*/ +int starpu_task_wait_for_no_ready(void); + +/** + Return the number of submitted tasks which are ready for execution + are already executing. It thus does not include tasks waiting for + dependencies. + See \ref WaitingForTasks for more details. +*/ +int starpu_task_nready(void); + +/** + Return the number of submitted tasks which have not completed yet. + See \ref WaitingForTasks for more details. +*/ +int starpu_task_nsubmitted(void); + +/** + Set the iteration number for all the tasks to be submitted after + this call. This is typically called at the beginning of a task + submission loop. This number will then show up in tracing tools. A + corresponding starpu_iteration_pop() call must be made to match the + call to starpu_iteration_push(), at the end of the same task + submission loop, typically. + + Nested calls to starpu_iteration_push() and starpu_iteration_pop() + are allowed, to describe a loop nest for instance, provided that + they match properly. + + See \ref CreatingAGanttDiagram for more details. +*/ +void starpu_iteration_push(unsigned long iteration); + +/** + Drop the iteration number for submitted tasks. This must match a + previous call to starpu_iteration_push(), and is typically called + at the end of a task submission loop. + See \ref CreatingAGanttDiagram for more details. +*/ +void starpu_iteration_pop(void); +/** + See \ref GraphScheduling for more details. +*/ +void starpu_do_schedule(void); + +/** + Initialize \p cl with default values. Codelets should preferably be + initialized statically as shown in \ref DefiningACodelet. However + such a initialisation is not always possible, e.g. when using C++. + See \ref DefiningACodelet for more details. +*/ +void starpu_codelet_init(struct starpu_codelet *cl); + +/** + Output on \c stderr some statistics on the codelet \p cl. + See \ref Per-codeletFeedback for more details. +*/ +void starpu_codelet_display_stats(struct starpu_codelet *cl); + +/** + Return the task currently executed by the worker, or NULL if + it is called either from a thread that is not a task or simply + because there is no task being executed at the moment. + See \ref Per-taskFeedback for more details. +*/ +struct starpu_task *starpu_task_get_current(void); + +/** + Return the memory node number of parameter \p i of the task + currently executed, or -1 if it is called either from a thread that + is not a task or simply because there is no task being executed at + the moment. + + Usually, the returned memory node number is simply the memory node + for the current worker. That may however be different when using + e.g. starpu_codelet::specific_nodes. + + See \ref SpecifyingATargetNode for more details. +*/ +int starpu_task_get_current_data_node(unsigned i); + +/** + Return the name of the performance model of \p task. + See \ref PerformanceModelExample for more details. +*/ +const char *starpu_task_get_model_name(struct starpu_task *task); + +/** + Return the name of \p task, i.e. either its starpu_task::name + field, or the name of the corresponding performance model. + See \ref TraceTaskDetails for more details. +*/ +const char *starpu_task_get_name(struct starpu_task *task); + +/** + Allocate a task structure which is the exact duplicate of \p task. + See \ref OtherTaskUtility for more details. +*/ +struct starpu_task *starpu_task_dup(struct starpu_task *task); + +/** + This function should be called by schedulers to specify the + codelet implementation to be executed when executing \p task. + See \ref SchedulingHelpers for more details. +*/ +void starpu_task_set_implementation(struct starpu_task *task, unsigned impl); + +/** + Return the codelet implementation to be executed + when executing \p task. + See \ref SchedulingHelpers for more details. +*/ +unsigned starpu_task_get_implementation(struct starpu_task *task); + +/** + Create and submit an empty task that unlocks a tag once all its + dependencies are fulfilled. + See \ref SynchronizationTasks for more details. +*/ +void starpu_create_sync_task(starpu_tag_t sync_tag, unsigned ndeps, starpu_tag_t *deps, void (*callback)(void *), void *callback_arg); + +/** + Create and submit an empty task with the given callback. + See \ref SynchronizationTasks for more details. +*/ +void starpu_create_callback_task(void (*callback)(void *), void *callback_arg); + +/** + Function to be used as a prologue callback to enable fault tolerance for the + task. This prologue will create a try-task, i.e a duplicate of the task, + which will to the actual computation. + + The prologue argument can be set to a check_ft function that will be + called on termination of the duplicate, which can check the result of the + task, and either confirm success, or resubmit another attempt. + If it is not set, the default implementation is to just resubmit a new + try-task. + + See \ref TaskRetry for more details. +*/ +void starpu_task_ft_prologue(void *check_ft); + +/** + Create a try-task for a \p meta_task, given a \p template_task task + template. The meta task can be passed as template on the first call, but + since it is mangled by starpu_task_ft_create_retry(), further calls + (typically made by the check_ft callback) need to be passed the previous + try-task as template task. + + \p check_ft is similar to the prologue argument of + starpu_task_ft_prologue(), and is typically set to the very function calling + starpu_task_ft_create_retry(). + + The try-task is returned, and can be modified (e.g. to change scheduling + parameters) before being submitted with starpu_task_submit_nodeps(). + + See \ref TaskRetry for more details. +*/ +struct starpu_task *starpu_task_ft_create_retry(const struct starpu_task *meta_task, const struct starpu_task *template_task, void (*check_ft)(void *)); + +/** + Record that this task failed, and should thus be retried. + This is usually called from the task codelet function itself, after checking + the result and noticing that the computation went wrong, and thus the task + should be retried. The performance of this task execution will not be + recorded for performance models. + + This can only be called for a task whose data access modes are either + ::STARPU_R and ::STARPU_W. +*/ +void starpu_task_ft_failed(struct starpu_task *task); + +/** + Notify that the try-task was successful and thus the meta-task was + successful. + See \ref TaskRetry for more details. +*/ +void starpu_task_ft_success(struct starpu_task *meta_task); + +/** + Set the function to call when the watchdog detects that StarPU has + not finished any task for \ref STARPU_WATCHDOG_TIMEOUT seconds. + See \ref WatchdogSupport for more details. +*/ +void starpu_task_watchdog_set_hook(void (*hook)(void *), void *hook_arg); + +/** + Return the given status as a string +*/ +char *starpu_task_status_get_as_string(enum starpu_task_status status); + +/** + Specify a minimum number of submitted tasks allowed at a given + time, this allows to control the task submission flow. The value + can also be specified with the environment variable \ref + STARPU_LIMIT_MIN_SUBMITTED_TASKS. + See \ref HowToReduceTheMemoryFootprintOfInternalDataStructures for more details. +*/ +void starpu_set_limit_min_submitted_tasks(int limit_min); + +/** + Specify a maximum number of submitted tasks allowed at a given + time, this allows to control the task submission flow. The value + can also be specified with the environment variable \ref + STARPU_LIMIT_MAX_SUBMITTED_TASKS. + See \ref HowToReduceTheMemoryFootprintOfInternalDataStructures for more details. +*/ +void starpu_set_limit_max_submitted_tasks(int limit_min); + +/** @} */ + +/** + @defgroup API_Transactions Transactions + @{ + */ + +/** + Function to open a new transaction object and start the first transaction epoch. + + @return A pointer to an initializes struct starpu_transaction + or \c NULL if submitting the transaction begin task failed with \c ENODEV. + See \ref TransactionsCreation for more details. +*/ +struct starpu_transaction *starpu_transaction_open(int (*do_start_func)(void *buffer, void *arg), void *do_start_arg); + +/** + Function to mark the end of the current transaction epoch and start a new epoch. + See \ref TransactionsEpochNext for more details. +*/ +void starpu_transaction_next_epoch(struct starpu_transaction *p_trs, void *do_start_arg); + +/** + Function to mark the end of the last transaction epoch and free the transaction object. + See \ref TransactionsClosing for more details. +*/ +void starpu_transaction_close(struct starpu_transaction *p_trs); + +/** @} */ + +#ifdef __cplusplus +} +#endif + +#endif /* __STARPU_TASK_H__ */ diff --git a/include/starpu_task_bundle.h b/include/starpu_task_bundle.h new file mode 100644 index 0000000..c0d16ab --- /dev/null +++ b/include/starpu_task_bundle.h @@ -0,0 +1,96 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2011-2011 Télécom Sud Paris + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __STARPU_TASK_BUNDLE_H__ +#define __STARPU_TASK_BUNDLE_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + @defgroup API_Task_Bundles Task Bundles + @{ +*/ + +struct starpu_task; +struct starpu_perfmodel_arch; + +/** + Opaque structure describing a list of tasks that should be + scheduled on the same worker whenever it’s possible. It must be + considered as a hint given to the scheduler as there is no + guarantee that they will be executed on the same worker. +*/ +typedef struct _starpu_task_bundle *starpu_task_bundle_t; + +/** + Factory function creating and initializing \p bundle, when the call + returns, memory needed is allocated and \p bundle is ready to use. +*/ +void starpu_task_bundle_create(starpu_task_bundle_t *bundle); + +/** + Insert \p task in \p bundle. Until \p task is removed from \p + bundle its expected length and data transfer time will be + considered along those of the other tasks of bundle. This function + must not be called if \p bundle is already closed and/or \p task is + already submitted. On success, it returns 0. There are two cases of + error : if \p bundle is already closed it returns -EPERM, if + \p task was already submitted it returns -EINVAL. +*/ +int starpu_task_bundle_insert(starpu_task_bundle_t bundle, struct starpu_task *task); + +/** + Remove \p task from \p bundle. Of course \p task must have been + previously inserted in \p bundle. This function must not be called + if \p bundle is already closed and/or \p task is already submitted. + Doing so would result in undefined behaviour. On success, it + returns 0. If \p bundle is already closed it returns + -ENOENT. +*/ +int starpu_task_bundle_remove(starpu_task_bundle_t bundle, struct starpu_task *task); + +/** + Inform the runtime that the user will not modify \p bundle anymore, + it means no more inserting or removing task. Thus the runtime can + destroy it when possible. +*/ +void starpu_task_bundle_close(starpu_task_bundle_t bundle); + +/** + Return the expected duration of \p bundle in micro-seconds. +*/ +double starpu_task_bundle_expected_length(starpu_task_bundle_t bundle, struct starpu_perfmodel_arch *arch, unsigned nimpl); + +/** + Return the time (in micro-seconds) expected to transfer all data used within \p bundle. +*/ +double starpu_task_bundle_expected_data_transfer_time(starpu_task_bundle_t bundle, unsigned memory_node); + +/** + Return the expected energy consumption of \p bundle in J. +*/ +double starpu_task_bundle_expected_energy(starpu_task_bundle_t bundle, struct starpu_perfmodel_arch *arch, unsigned nimpl); + +/** @} */ + +#ifdef __cplusplus +} +#endif + +#endif /* __STARPU_TASK_BUNDLE_H__ */ diff --git a/include/starpu_task_dep.h b/include/starpu_task_dep.h new file mode 100644 index 0000000..c9b8483 --- /dev/null +++ b/include/starpu_task_dep.h @@ -0,0 +1,257 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2016-2016 Uppsala University + * Copyright (C) 2011-2011 Télécom Sud Paris + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +#ifndef __STARPU_TASK_DEP_H__ +#define __STARPU_TASK_DEP_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + @defgroup API_Explicit_Dependencies Explicit Dependencies + @{ +*/ + +/** + Declare task dependencies between a \p task and an array of tasks + of length \p ndeps. This function must be called prior to the + submission of the task, but it may called after the submission or + the execution of the tasks in the array, provided the tasks are + still valid (i.e. they were not automatically destroyed). Calling + this function on a task that was already submitted or with an entry + of \p task_array that is no longer a valid task results in an + undefined behaviour. If \p ndeps is 0, no dependency is added. It + is possible to call starpu_task_declare_deps_array() several times + on the same task, in this case, the dependencies are added. It is + possible to have redundancy in the task dependencies. + See \ref TasksAndTagsDependencies for more details. +*/ +void starpu_task_declare_deps_array(struct starpu_task *task, unsigned ndeps, struct starpu_task *task_array[]); + +/** + Declare task dependencies between a \p task and an series of \p + ndeps tasks, similarly to starpu_task_declare_deps_array(), but the + tasks are passed after \p ndeps, which indicates how many tasks \p + task shall be made to depend on. If \p ndeps is 0, no dependency is + added. + See \ref TasksAndTagsDependencies for more details. +*/ +void starpu_task_declare_deps(struct starpu_task *task, unsigned ndeps, ...); + +/** + Declare task end dependencies between a \p task and an array of + tasks of length \p ndeps. \p task will appear as terminated not + only when \p task is termination, but also when the tasks of \p + task_array have terminated. This function must be called prior to + the termination of the task, but it may called after the submission + or the execution of the tasks in the array, provided the tasks are + still valid (i.e. they were not automatically destroyed). Calling + this function on a task that was already terminated or with an + entry of \p task_array that is no longer a valid task results in an + undefined behaviour. If \p ndeps is 0, no dependency is added. It + is possible to call starpu_task_declare_end_deps_array() several + times on the same task, in this case, the dependencies are added. + It is currently not implemented to have redundancy in the task + dependencies. + See \ref TasksAndTagsDependencies for more details. +*/ +void starpu_task_declare_end_deps_array(struct starpu_task *task, unsigned ndeps, struct starpu_task *task_array[]); + +/** + Declare task end dependencies between a \p task and an series of \p + ndeps tasks, similarly to starpu_task_declare_end_deps_array(), but + the tasks are passed after \p ndeps, which indicates how many tasks + \p task 's termination shall be made to depend on. If \p ndeps is + 0, no dependency is added. + See \ref TasksAndTagsDependencies for more details. +*/ +void starpu_task_declare_end_deps(struct starpu_task *task, unsigned ndeps, ...); + +/** + Fill \p task_array with the list of tasks which are direct children + of \p task. \p ndeps is the size of \p task_array. This function + returns the number of direct children. \p task_array can be set to + NULL if \p ndeps is 0, which allows to compute the number of + children before allocating an array to store them. This function + can only be called if \p task has not completed yet, otherwise the + results are undefined. The result may also be outdated if some + additional dependency has been added in the meanwhile. + See \ref GettingTaskChildren for more details. +*/ +int starpu_task_get_task_succs(struct starpu_task *task, unsigned ndeps, struct starpu_task *task_array[]); + +/** + Behave like starpu_task_get_task_succs(), except that it only + reports tasks which will go through the scheduler, thus avoiding + tasks with not codelet, or with explicit placement. + See \ref GettingTaskChildren for more details. +*/ +int starpu_task_get_task_scheduled_succs(struct starpu_task *task, unsigned ndeps, struct starpu_task *task_array[]); + +/** + Add \p nb_deps end dependencies to the task \p t. This means the + task will not terminate until the required number of calls to the + function starpu_task_end_dep_release() has been made. + See \ref TasksAndTagsDependencies for more details. +*/ +void starpu_task_end_dep_add(struct starpu_task *t, int nb_deps); + +/** + Unlock 1 end dependency to the task \p t. This function must be + called after starpu_task_end_dep_add(). + See \ref TasksAndTagsDependencies for more details. +*/ +void starpu_task_end_dep_release(struct starpu_task *t); + +/** + Define a task logical identifier. It is possible to associate a task + with a unique tag chosen by the application, and to + express dependencies between tasks by the means of those tags. To + do so, fill the field starpu_task::tag_id with a tag number (can be + arbitrary) and set the field starpu_task::use_tag to 1. If + starpu_tag_declare_deps() is called with this tag number, the task + will not be started until the tasks which holds the declared + dependency tags are completed. +*/ +typedef uint64_t starpu_tag_t; + +/** + Specify the dependencies of the task identified by tag \p id. The + first argument specifies the tag which is configured, the second + argument gives the number of tag(s) on which \p id depends. The + following arguments are the tags which have to be terminated to + unlock the task. This function must be called before the associated + task is submitted to StarPU with starpu_task_submit(). + + WARNING! Use with caution. Because of the variable arity of + starpu_tag_declare_deps(), note that the last arguments must be of + type ::starpu_tag_t : constant values typically need to be + explicitly casted. Otherwise, due to integer sizes and argument + passing on the stack, the C compiler might consider the tag + 0x200000003 instead of 0x2 and 0x3 when + calling starpu_tag_declare_deps(0x1, 2, 0x2, 0x3). Using the + starpu_tag_declare_deps_array() function avoids this hazard. + + \code{.c} + // Tag 0x1 depends on tags 0x32 and 0x52 + starpu_tag_declare_deps((starpu_tag_t)0x1, 2, (starpu_tag_t)0x32, (starpu_tag_t)0x52); + \endcode + + See \ref TasksAndTagsDependencies for more details. +*/ +void starpu_tag_declare_deps(starpu_tag_t id, unsigned ndeps, ...); + +/** + Similar to starpu_tag_declare_deps(), except that its does not take + a variable number of arguments but an \p array of tags of size \p + ndeps. + + \code{.c} + // Tag 0x1 depends on tags 0x32 and 0x52 + starpu_tag_t tag_array[2] = {0x32, 0x52}; + starpu_tag_declare_deps_array((starpu_tag_t)0x1, 2, tag_array); + \endcode + + See \ref TasksAndTagsDependencies for more details. +*/ +void starpu_tag_declare_deps_array(starpu_tag_t id, unsigned ndeps, starpu_tag_t *array); + +/** + Block until the task associated to tag \p id has been executed. + This is a blocking call which must therefore not be called within + tasks or callbacks, but only from the application directly. It is + possible to synchronize with the same tag multiple times, as long + as the starpu_tag_remove() function is not called. Note that it is + still possible to synchronize with a tag associated to a task for + which the structure starpu_task was freed (e.g. if the field + starpu_task::destroy was enabled). + See \ref WaitingForTasks for more details. +*/ +int starpu_tag_wait(starpu_tag_t id); + +/** + Similar to starpu_tag_wait() except that it blocks until all the \p + ntags tags contained in the array \p id are terminated. + See \ref WaitingForTasks for more details. +*/ +int starpu_tag_wait_array(unsigned ntags, starpu_tag_t *id); + +/** + Clear the already notified status of a tag which is not + associated with a task. Before that, calling + starpu_tag_notify_from_apps() again will not notify the successors. + After that, the next call to starpu_tag_notify_from_apps() will + notify the successors. + See \ref TasksAndTagsDependencies for more details. +*/ +void starpu_tag_restart(starpu_tag_t id); + +/** + Release the resources associated to tag \p id. It can be called + once the corresponding task has been executed and when there is no + other tag that depend on this tag anymore. + See \ref TasksAndTagsDependencies for more details. +*/ +void starpu_tag_remove(starpu_tag_t id); + +/** + Explicitly unlock tag \p id. It may be useful in the case of + applications which execute part of their computation outside StarPU + tasks (e.g. third-party libraries). It is also provided as a + convenient tool for the programmer, for instance to entirely + construct the task DAG before actually giving StarPU the + opportunity to execute the tasks. When called several times on the + same tag, notification will be done only on first call, thus + implementing "OR" dependencies, until the tag is restarted using + starpu_tag_restart(). + See \ref TasksAndTagsDependencies for more details. +*/ +void starpu_tag_notify_from_apps(starpu_tag_t id); + +/** + Atomically call starpu_tag_notify_from_apps() and starpu_tag_restart() on tag + \p id. + This is useful with cyclic graphs, when we want to safely trigger its startup. + See \ref TasksAndTagsDependencies for more details. +*/ +void starpu_tag_notify_restart_from_apps(starpu_tag_t id); + +/** + Return the task associated to the tag \p id. + See \ref TasksAndTagsDependencies for more details. + */ +struct starpu_task *starpu_tag_get_task(starpu_tag_t id); + +/** + Calls starpu_tag_remove() for all tags. + The current implementation requires that no starpu_tag_wait_array() is + currently pending. + See \ref TasksAndTagsDependencies for more details. + */ +void starpu_tag_clear(void); + +/** @} */ + +#ifdef __cplusplus +} +#endif + +#endif /* __STARPU_TASK_DEP_H__ */ diff --git a/include/starpu_task_list.h b/include/starpu_task_list.h new file mode 100644 index 0000000..7031c1a --- /dev/null +++ b/include/starpu_task_list.h @@ -0,0 +1,151 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __STARPU_TASK_LIST_H__ +#define __STARPU_TASK_LIST_H__ + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + @defgroup API_Task_Lists Task Lists + @{ +*/ + +/* NOTE: this needs to have at least the same size as lists in src/common/list.h */ +#ifdef BUILDING_STARPU +#define STARPU_TASK_LIST_INLINE extern inline +#else +/** + Store a double-chained list of tasks +*/ +struct starpu_task_list +{ + struct starpu_task *head; /**< head of the list */ + struct starpu_task *tail; /**< tail of the list */ +}; +#define STARPU_TASK_LIST_INLINE extern +#endif + +/** + Initialize a list structure. + See \ref SchedulingHelpers for more details. +*/ +STARPU_TASK_LIST_INLINE +void starpu_task_list_init(struct starpu_task_list *list); + +/** + Push \p task at the front of \p list. + See \ref SchedulingHelpers for more details. +*/ +STARPU_TASK_LIST_INLINE +void starpu_task_list_push_front(struct starpu_task_list *list, struct starpu_task *task); + +/** + Push \p task at the back of \p list. + See \ref SchedulingHelpers for more details. +*/ +STARPU_TASK_LIST_INLINE +void starpu_task_list_push_back(struct starpu_task_list *list, struct starpu_task *task); + +/** + Get the front of \p list (without removing it). + See \ref SchedulingHelpers for more details. +*/ +STARPU_TASK_LIST_INLINE +struct starpu_task *starpu_task_list_front(const struct starpu_task_list *list); + +/** + Get the back of \p list (without removing it). + See \ref SchedulingHelpers for more details. +*/ +STARPU_TASK_LIST_INLINE +struct starpu_task *starpu_task_list_back(const struct starpu_task_list *list); + +/** + Test if \p list is empty. + See \ref SchedulingHelpers for more details. +*/ +STARPU_TASK_LIST_INLINE +int starpu_task_list_empty(const struct starpu_task_list *list); + +/** + Remove \p task from \p list. + See \ref SchedulingHelpers for more details. +*/ +STARPU_TASK_LIST_INLINE +void starpu_task_list_erase(struct starpu_task_list *list, struct starpu_task *task); + +/** + Remove the element at the front of \p list. + See \ref SchedulingHelpers for more details. +*/ +STARPU_TASK_LIST_INLINE +struct starpu_task *starpu_task_list_pop_front(struct starpu_task_list *list); + +/** + Remove the element at the back of \p list. + See \ref SchedulingHelpers for more details. +*/ +STARPU_TASK_LIST_INLINE +struct starpu_task *starpu_task_list_pop_back(struct starpu_task_list *list); + +/** + Get the first task of \p list. + See \ref SchedulingHelpers for more details. +*/ +STARPU_TASK_LIST_INLINE +struct starpu_task *starpu_task_list_begin(const struct starpu_task_list *list); + +/** + Get the end of \p list. + See \ref SchedulingHelpers for more details. +*/ +STARPU_TASK_LIST_INLINE +struct starpu_task *starpu_task_list_end(const struct starpu_task_list *list STARPU_ATTRIBUTE_UNUSED); + +/** + Get the next task of \p list. This is not erase-safe. + See \ref SchedulingHelpers for more details. +*/ +STARPU_TASK_LIST_INLINE +struct starpu_task *starpu_task_list_next(const struct starpu_task *task); + +/** + Test whether the given task \p look is contained in the \p list. + See \ref SchedulingHelpers for more details. +*/ +STARPU_TASK_LIST_INLINE +int starpu_task_list_ismember(const struct starpu_task_list *list, const struct starpu_task *look); + +/** + Move list from one head \p lsrc to another \p ldst. + See \ref SchedulingHelpers for more details. +*/ +STARPU_TASK_LIST_INLINE +void starpu_task_list_move(struct starpu_task_list *ldst, struct starpu_task_list *lsrc); + +/** @} */ + +#ifdef __cplusplus +} +#endif + +#endif /* __STARPU_TASK_LIST_H__ */ diff --git a/include/starpu_task_util.h b/include/starpu_task_util.h new file mode 100644 index 0000000..d33a791 --- /dev/null +++ b/include/starpu_task_util.h @@ -0,0 +1,652 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +#ifndef __STARPU_TASK_UTIL_H__ +#define __STARPU_TASK_UTIL_H__ + +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + @defgroup API_Insert_Task Task Insert Utility + @{ +*/ + +/* NOTE: when adding a value here, please make sure to update both + * src/util/starpu_task_insert_utils.c (in two places) and + * mpi/src/starpu_mpi_task_insert.c and mpi/src/starpu_mpi_task_insert_fortran.c */ + +#define STARPU_MODE_SHIFT 17 + +/** + Used when calling starpu_task_insert(), must be followed by a + pointer to a constant value and the size of the constant + */ +#define STARPU_VALUE (1 << STARPU_MODE_SHIFT) + +/** + Used when calling starpu_task_insert(), must be followed by a + pointer to a callback function +*/ +#define STARPU_CALLBACK (2 << STARPU_MODE_SHIFT) + +/** + Used when calling starpu_task_insert(), must be followed by two + pointers: one to a callback function, and the other to be given as + an argument to the callback function; this is equivalent to using + both ::STARPU_CALLBACK and ::STARPU_CALLBACK_ARG. +*/ +#define STARPU_CALLBACK_WITH_ARG (3 << STARPU_MODE_SHIFT) + +/** + Used when calling starpu_task_insert(), must be followed by a + pointer to be given as an argument to the callback function +*/ +#define STARPU_CALLBACK_ARG (4 << STARPU_MODE_SHIFT) + +/** + Used when calling starpu_task_insert(), must + be followed by a integer defining a priority level +*/ +#define STARPU_PRIORITY (5 << STARPU_MODE_SHIFT) + +/** + \ingroup API_MPI_Support + Used when calling starpu_mpi_task_insert(), must be followed by a + integer value which specified the node on which to execute the + codelet. + */ +#define STARPU_EXECUTE_ON_NODE (6 << STARPU_MODE_SHIFT) + +/** + \ingroup API_MPI_Support + Used when calling starpu_mpi_task_insert(), must be followed by a + data handle to specify that the node owning the given data will + execute the codelet. +*/ +#define STARPU_EXECUTE_ON_DATA (7 << STARPU_MODE_SHIFT) + +/** + Used when calling starpu_task_insert(), must be followed by an array of + handles and the number of elements in the array (as int). This is equivalent + to passing the handles as separate parameters with ::STARPU_R, + ::STARPU_W or ::STARPU_RW. +*/ +#define STARPU_DATA_ARRAY (8 << STARPU_MODE_SHIFT) + +/** + Used when calling starpu_task_insert(), must be followed by an array of + struct starpu_data_descr and the number of elements in the array (as int). + This is equivalent to passing the handles with the corresponding modes. +*/ +#define STARPU_DATA_MODE_ARRAY (9 << STARPU_MODE_SHIFT) + +/** + Used when calling starpu_task_insert(), must be followed by a tag. +*/ +#define STARPU_TAG (10 << STARPU_MODE_SHIFT) + +/** + Used when calling starpu_task_insert(), must be followed by a tag. +*/ +#define STARPU_HYPERVISOR_TAG (11 << STARPU_MODE_SHIFT) + +/** + Used when calling starpu_task_insert(), must be followed by an + amount of floating point operations, as a double. Users MUST + explicitly cast into double, otherwise parameter passing will not + work. +*/ +#define STARPU_FLOPS (12 << STARPU_MODE_SHIFT) + +/** + Used when calling starpu_task_insert(), must be followed by the id + of the scheduling context to which to submit the task to. +*/ +#define STARPU_SCHED_CTX (13 << STARPU_MODE_SHIFT) + +/** + Used when calling starpu_task_insert(), must be followed by a + pointer to a prologue callback function +*/ +#define STARPU_PROLOGUE_CALLBACK (14 << STARPU_MODE_SHIFT) + +/** + Used when calling starpu_task_insert(), must be followed by a + pointer to be given as an argument to the prologue callback + function +*/ +#define STARPU_PROLOGUE_CALLBACK_ARG (15 << STARPU_MODE_SHIFT) + +/** + Used when calling starpu_task_insert(), must be followed by a + pointer to a prologue callback pop function +*/ +#define STARPU_PROLOGUE_CALLBACK_POP (16 << STARPU_MODE_SHIFT) + +/** + Used when calling starpu_task_insert(), must be followed by a + pointer to be given as an argument to the prologue callback pop + function +*/ +#define STARPU_PROLOGUE_CALLBACK_POP_ARG (17 << STARPU_MODE_SHIFT) + +/** + Used when calling starpu_task_insert(), must be followed by an + integer value specifying the worker on which to execute the task + (as specified by starpu_task::execute_on_a_specific_worker) +*/ +#define STARPU_EXECUTE_ON_WORKER (18 << STARPU_MODE_SHIFT) + +/** + Used when calling starpu_task_insert(), must be followed by an + unsigned long long value specifying the mask of worker on which to execute + the task (as specified by starpu_task::where) +*/ +#define STARPU_EXECUTE_WHERE (19 << STARPU_MODE_SHIFT) + +/** + Used when calling starpu_task_insert(), must be followed by a tag + stored in starpu_task::tag_id. Leave starpu_task::use_tag as 0. +*/ +#define STARPU_TAG_ONLY (20 << STARPU_MODE_SHIFT) + +/** + Used when calling starpu_task_insert(), must be followed by an unsigned + stored in starpu_task::possibly_parallel. +*/ +#define STARPU_POSSIBLY_PARALLEL (21 << STARPU_MODE_SHIFT) + +/** + used when calling starpu_task_insert(), must be + followed by an integer value specifying the worker order in which + to execute the tasks (as specified by starpu_task::workerorder) +*/ +#define STARPU_WORKER_ORDER (22 << STARPU_MODE_SHIFT) + +/** + \ingroup API_MPI_Support + Used when calling starpu_mpi_task_insert(), must be followed by a + identifier to a node selection policy. This is needed when several + nodes own data in ::STARPU_W mode. +*/ +#define STARPU_NODE_SELECTION_POLICY (23 << STARPU_MODE_SHIFT) + +/** + Used when calling starpu_task_insert(), must be followed by a + char * stored in starpu_task::name. +*/ +#define STARPU_NAME (24 << STARPU_MODE_SHIFT) + +/** + Used when calling starpu_task_insert(), must be followed by a + memory buffer containing the arguments to be given to the task, and + by the size of the arguments. The memory buffer should be the + result of a previous call to starpu_codelet_pack_args(), and will + be freed (i.e. starpu_task::cl_arg_free will be set to 1) +*/ +#define STARPU_CL_ARGS (25 << STARPU_MODE_SHIFT) + +/** + Used when calling starpu_task_insert(), similarly to + ::STARPU_CL_ARGS, must be followed by a memory buffer containing + the arguments to be given to the task, and by the size of the + arguments. The memory buffer should be the result of a previous + call to starpu_codelet_pack_args(), and will NOT be freed (i.e. + starpu_task::cl_arg_free will be set to 0) +*/ +#define STARPU_CL_ARGS_NFREE (26 << STARPU_MODE_SHIFT) + +/** + Used when calling starpu_task_insert(), must be followed by a + number of tasks as int, and an array containing these tasks. The + function starpu_task_declare_deps_array() will be called with the + given values. +*/ +#define STARPU_TASK_DEPS_ARRAY (27 << STARPU_MODE_SHIFT) + +/** + Used when calling starpu_task_insert(), must be followed by an + integer representing a color +*/ +#define STARPU_TASK_COLOR (28 << STARPU_MODE_SHIFT) + +/** + Used when calling starpu_task_insert(), must be followed by an + array of characters representing the sequential consistency for + each buffer of the task. +*/ +#define STARPU_HANDLES_SEQUENTIAL_CONSISTENCY (29 << STARPU_MODE_SHIFT) + +/** + Used when calling starpu_task_insert(), must be followed by an + integer stating if the task is synchronous or not +*/ +#define STARPU_TASK_SYNCHRONOUS (30 << STARPU_MODE_SHIFT) + +/** + Used when calling starpu_task_insert(), must be followed by a + number of tasks as int, and an array containing these tasks. The + function starpu_task_declare_end_deps_array() will be called with + the given values. +*/ +#define STARPU_TASK_END_DEPS_ARRAY (31 << STARPU_MODE_SHIFT) + +/** + Used when calling starpu_task_insert(), must be followed by an + integer which will be given to starpu_task_end_dep_add() +*/ +#define STARPU_TASK_END_DEP (32 << STARPU_MODE_SHIFT) + +/** + Used when calling starpu_task_insert(), must be followed by an + unsigned being a number of workers, and an array of bits which size + is the number of workers, the array indicates the set of workers + which are allowed to execute the task. +*/ +#define STARPU_TASK_WORKERIDS (33 << STARPU_MODE_SHIFT) + +/** + Used when calling starpu_task_insert(), must be followed by an + unsigned which sets the sequential consistency for the data + parameters of the task. +*/ +#define STARPU_SEQUENTIAL_CONSISTENCY (34 << STARPU_MODE_SHIFT) + +/** + Used when calling starpu_task_insert() and alike, must be followed + by a pointer to a struct starpu_profiling_task_info + */ +#define STARPU_TASK_PROFILING_INFO (35 << STARPU_MODE_SHIFT) + +/** + Used when calling starpu_task_insert() and alike, must be followed + by an unsigned specifying not to allocate a submitorder id for the task + */ +#define STARPU_TASK_NO_SUBMITORDER (36 << STARPU_MODE_SHIFT) + +/** + Used when calling starpu_task_insert(), similarly to + ::STARPU_CALLBACK_ARG, must be followed by a pointer to be given as + an argument to the callback function, the argument will not be + freed, i.e starpu_task::callback_arg_free will be set to 0 +*/ +#define STARPU_CALLBACK_ARG_NFREE (37 << STARPU_MODE_SHIFT) + +/** + Used when calling starpu_task_insert(), similarly to + ::STARPU_CALLBACK_WITH_ARG, must be followed by two pointers: one + to a callback function, and the other to be given as an argument to + the callback function; this is equivalent to using both + ::STARPU_CALLBACK and ::STARPU_CALLBACK_ARG_NFREE. +*/ +#define STARPU_CALLBACK_WITH_ARG_NFREE (38 << STARPU_MODE_SHIFT) + +/** + Used when calling starpu_task_insert(), similarly to + ::STARPU_PROLOGUE_CALLBACK_ARG, must be followed by a + pointer to be given as an argument to the prologue callback + function, the argument will not be + freed, i.e starpu_task::prologue_callback_arg_free will be set to 0 +*/ +#define STARPU_PROLOGUE_CALLBACK_ARG_NFREE (39 << STARPU_MODE_SHIFT) + +/** + Used when calling starpu_task_insert(), similarly to + ::STARPU_PROLOGUE_CALLBACK_POP_ARG, must be followed by a pointer + to be given as an argument to the prologue callback pop function, + the argument will not be freed, i.e + starpu_task::prologue_callback_pop_arg_free will be set to 0 +*/ +#define STARPU_PROLOGUE_CALLBACK_POP_ARG_NFREE (40 << STARPU_MODE_SHIFT) + +/** + Used when calling starpu_task_insert() and alike, must be followed + by a void* specifying the value to be set in starpu_task::sched_data + */ +#define STARPU_TASK_SCHED_DATA (41 << STARPU_MODE_SHIFT) + +/** + Used when calling starpu_task_insert() and alike, must be followed + by a struct starpu_transaction * specifying the value to be set in + the transaction field of the task. + */ +#define STARPU_TRANSACTION (42 << STARPU_MODE_SHIFT) + +/** + Used when calling starpu_task_insert(), must be followed by a + char * stored in starpu_task::file. + + This is automatically set when FXT is enabled. +*/ +#define STARPU_TASK_FILE (43 << STARPU_MODE_SHIFT) + +/** + Used when calling starpu_task_insert(), must be followed by an + int stored in starpu_task::line. + + This is automatically set when FXT is enabled. +*/ +#define STARPU_TASK_LINE (44 << STARPU_MODE_SHIFT) + +/** + Used when calling starpu_task_insert(), must be followed by a + pointer to a epilogue callback function +*/ +#define STARPU_EPILOGUE_CALLBACK (45 << STARPU_MODE_SHIFT) + +/** + Used when calling starpu_task_insert(), must be followed by a + pointer to be given as an argument to the epilogue callback + function +*/ +#define STARPU_EPILOGUE_CALLBACK_ARG (46 << STARPU_MODE_SHIFT) + +/** + \ingroup API_Bubble + Used when calling starpu_task_insert(), must be followed by a + pointer to a bubble decision function ::starpu_bubble_func_t +*/ +#define STARPU_BUBBLE_FUNC (47 << STARPU_MODE_SHIFT) + +/** + \ingroup API_Bubble + Used when calling starpu_task_insert(), must be followed by a + pointer which will be passed to the function defined in + starpu_codelet::bubble_func +*/ +#define STARPU_BUBBLE_FUNC_ARG (48 << STARPU_MODE_SHIFT) + +/** + \ingroup API_Bubble + Used when calling starpu_task_insert(), must be followed by a + pointer to a bubble DAG generation function + ::starpu_bubble_gen_dag_func_t +*/ +#define STARPU_BUBBLE_GEN_DAG_FUNC (49 << STARPU_MODE_SHIFT) + +/** + \ingroup API_Bubble + Used when calling starpu_task_insert(), must be followed by a + pointer which will be passed to the function defined in + starpu_codelet::bubble_gen_dag_func +*/ +#define STARPU_BUBBLE_GEN_DAG_FUNC_ARG (50 << STARPU_MODE_SHIFT) + +/** + \ingroup API_Bubble + Used when calling starpu_task_insert(), must be followed by a + pointer to a task. The task will be set as the bubble parent task + when using the offline tracing tool. +*/ +#define STARPU_BUBBLE_PARENT (51 << STARPU_MODE_SHIFT) + +/** + This has to be the last mode value plus 1 +*/ +#define STARPU_SHIFTED_MODE_MAX (52 << STARPU_MODE_SHIFT) + +/** + Set the given \p task corresponding to \p cl with the following arguments. + The argument list must be zero-terminated. The arguments + following the codelet are the same as the ones for the function + starpu_task_insert(). + If some arguments of type ::STARPU_VALUE are given, the parameter + starpu_task::cl_arg_free will be set to 1. + See \ref OtherTaskUtility for more details. +*/ +int starpu_task_set(struct starpu_task *task, struct starpu_codelet *cl, ...); +#ifdef STARPU_USE_FXT +#define starpu_task_set(task, cl, ...) starpu_task_set(task, cl, STARPU_TASK_FILE, __FILE__, STARPU_TASK_LINE, __LINE__, ##__VA_ARGS__) +#endif + +/** + Create a task corresponding to \p cl with the following arguments. + The argument list must be zero-terminated. The arguments + following the codelet are the same as the ones for the function + starpu_task_insert(). + If some arguments of type ::STARPU_VALUE are given, the parameter + starpu_task::cl_arg_free will be set to 1. + See \ref OtherTaskUtility for more details. +*/ +struct starpu_task *starpu_task_build(struct starpu_codelet *cl, ...); +#ifdef STARPU_USE_FXT +#define starpu_task_build(cl, ...) starpu_task_build(cl, STARPU_TASK_FILE, __FILE__, STARPU_TASK_LINE, __LINE__, ##__VA_ARGS__) +#endif + +/** + Create and submit a task corresponding to \p cl with the following + given arguments. The argument list must be zero-terminated. + + The arguments following the codelet can be of the following types: +
      +
    • ::STARPU_R, ::STARPU_W, ::STARPU_RW, ::STARPU_SCRATCH, + ::STARPU_REDUX an access mode followed by a data handle; +
    • ::STARPU_DATA_ARRAY followed by an array of data handles and + its number of elements; +
    • ::STARPU_DATA_MODE_ARRAY followed by an array of struct + starpu_data_descr, i.e data handles with their associated access + modes, and its number of elements; +
    • ::STARPU_EXECUTE_ON_WORKER, ::STARPU_WORKER_ORDER followed by + an integer value specifying the worker on which to execute the task + (as specified by starpu_task::execute_on_a_specific_worker) +
    • the specific values ::STARPU_VALUE, ::STARPU_CALLBACK, + ::STARPU_CALLBACK_ARG, ::STARPU_CALLBACK_WITH_ARG, + ::STARPU_PRIORITY, ::STARPU_TAG, ::STARPU_TAG_ONLY, ::STARPU_FLOPS, + ::STARPU_SCHED_CTX, ::STARPU_CL_ARGS, ::STARPU_CL_ARGS_NFREE, + ::STARPU_TASK_DEPS_ARRAY, ::STARPU_TASK_COLOR, + ::STARPU_HANDLES_SEQUENTIAL_CONSISTENCY, ::STARPU_TASK_SYNCHRONOUS, + ::STARPU_TASK_END_DEP followed by the appropriated objects as + defined elsewhere. +
    + + When using ::STARPU_DATA_ARRAY, the access mode of the data handles + is not defined, it will be taken from the codelet + starpu_codelet::modes or starpu_codelet::dyn_modes field. One + should use ::STARPU_DATA_MODE_ARRAY to define the data handles + along with the access modes. + + Parameters to be passed to the codelet implementation are defined + through the type ::STARPU_VALUE. The function + starpu_codelet_unpack_args() must be called within the codelet implementation to retrieve them. + + See \ref InsertTaskUtility for more details. +*/ +int starpu_task_insert(struct starpu_codelet *cl, ...); +#ifdef STARPU_USE_FXT +#define starpu_task_insert(cl, ...) starpu_task_insert(cl, STARPU_TASK_FILE, __FILE__, STARPU_TASK_LINE, __LINE__, ##__VA_ARGS__) +#endif + +/** + Identical to starpu_task_insert(). Kept to avoid breaking old codes. +*/ +int starpu_insert_task(struct starpu_codelet *cl, ...); +#ifdef STARPU_USE_FXT +#define starpu_insert_task(cl, ...) starpu_insert_task(cl, STARPU_TASK_FILE, __FILE__, STARPU_TASK_LINE, __LINE__, ##__VA_ARGS__) +#endif + +/** + Assuming that there are already \p current_buffer data handles + passed to the task, and if *allocated_buffers is not 0, the + task->dyn_handles array has size \p *allocated_buffers, this + function makes room for \p room other data handles, allocating or + reallocating task->dyn_handles as necessary and updating \p + *allocated_buffers accordingly. One can thus start with + *allocated_buffers equal to 0 and current_buffer equal to 0, then + make room by calling this function, then store handles with + STARPU_TASK_SET_HANDLE(), make room again with this function, store + yet more handles, etc. + See \ref OtherTaskUtility for more details. +*/ +void starpu_task_insert_data_make_room(struct starpu_codelet *cl, struct starpu_task *task, int *allocated_buffers, int current_buffer, int room); + +/** + Store data handle \p handle into task \p task with mode \p + arg_type, updating \p *allocated_buffers and \p *current_buffer + accordingly. + See \ref OtherTaskUtility for more details. +*/ +void starpu_task_insert_data_process_arg(struct starpu_codelet *cl, struct starpu_task *task, int *allocated_buffers, int *current_buffer, int arg_type, starpu_data_handle_t handle); + +/** + Store \p nb_handles data handles \p handles into task \p task, + updating \p *allocated_buffers and \p *current_buffer accordingly. + See \ref OtherTaskUtility for more details. +*/ +void starpu_task_insert_data_process_array_arg(struct starpu_codelet *cl, struct starpu_task *task, int *allocated_buffers, int *current_buffer, int nb_handles, starpu_data_handle_t *handles); + +/** + Store \p nb_descrs data handles described by \p descrs into task \p + task, updating \p *allocated_buffers and \p *current_buffer + accordingly. + See \ref OtherTaskUtility for more details. +*/ +void starpu_task_insert_data_process_mode_array_arg(struct starpu_codelet *cl, struct starpu_task *task, int *allocated_buffers, int *current_buffer, int nb_descrs, struct starpu_data_descr *descrs); + +/** + Pack arguments of type ::STARPU_VALUE into a buffer which can be + given to a codelet and later unpacked with the function + starpu_codelet_unpack_args(). + + Instead of calling starpu_codelet_pack_args(), one can also call + starpu_codelet_pack_arg_init(), then starpu_codelet_pack_arg() for + each data, then starpu_codelet_pack_arg_fini(). + + See \ref InsertTaskUtility for more details. +*/ +void starpu_codelet_pack_args(void **arg_buffer, size_t *arg_buffer_size, ...); + +/** + Structure to be used for starpu_codelet_pack_arg_init() & co, and + starpu_codelet_unpack_arg_init() & co. The contents is public, + however users should not directly access it, but only use as a + parameter to the appropriate functions. +*/ +struct starpu_codelet_pack_arg_data +{ + char *arg_buffer; + size_t arg_buffer_size; + size_t arg_buffer_used; + size_t current_offset; + int nargs; +}; + +/** + Initialize struct starpu_codelet_pack_arg before calling + starpu_codelet_pack_arg() and starpu_codelet_pack_arg_fini(). This + will simply initialize the content of the structure. + See \ref InsertTaskUtility for more details. +*/ +void starpu_codelet_pack_arg_init(struct starpu_codelet_pack_arg_data *state); + +/** + Pack one argument into struct starpu_codelet_pack_arg \p state. + That structure has to be initialized before with + starpu_codelet_pack_arg_init(), and after all + starpu_codelet_pack_arg() calls performed, + starpu_codelet_pack_arg_fini() has to be used to get the \p cl_arg + and \p cl_arg_size to be put in the task. + See \ref InsertTaskUtility for more details. +*/ +void starpu_codelet_pack_arg(struct starpu_codelet_pack_arg_data *state, const void *ptr, size_t ptr_size); + +/** + Finish packing data, after calling starpu_codelet_pack_arg_init() + once and starpu_codelet_pack_arg() several times. + See \ref InsertTaskUtility for more details. +*/ +void starpu_codelet_pack_arg_fini(struct starpu_codelet_pack_arg_data *state, void **cl_arg, size_t *cl_arg_size); + +/** + Retrieve the arguments of type ::STARPU_VALUE associated to a + task automatically created using the function starpu_task_insert(). If + any parameter's value is 0, unpacking will stop there and ignore the remaining + parameters. + See \ref InsertTaskUtility for more details. +*/ +void starpu_codelet_unpack_args(void *cl_arg, ...); + +/** + Initialize \p state with \p cl_arg and \p cl_arg_size. This has to + be called before calling starpu_codelet_unpack_arg(). + See \ref InsertTaskUtility for more details. +*/ +void starpu_codelet_unpack_arg_init(struct starpu_codelet_pack_arg_data *state, void *cl_arg, size_t cl_arg_size); + +/** + Unpack the next argument of size \p size from \p state into \p ptr with a copy. + \p state has to be initialized before with starpu_codelet_unpack_arg_init(). + See \ref InsertTaskUtility for more details. +*/ +void starpu_codelet_unpack_arg(struct starpu_codelet_pack_arg_data *state, void *ptr, size_t size); + +/** + Unpack the next argument of unknown size from \p state into \p ptr + with a copy. \p ptr is allocated before copying in it the value of + the argument. + The size of the argument is returned in \p size. + \p has to be initialized before with starpu_codelet_unpack_arg_init(). + See \ref InsertTaskUtility for more details. +*/ +void starpu_codelet_dup_arg(struct starpu_codelet_pack_arg_data *state, void **ptr, size_t *size); + +/** + Unpack the next argument of unknown size from \p state into \p ptr. + \p ptr will be a pointer to the memory of the argument. + The size of the argument is returned in \p size. + \p has to be initialized before with starpu_codelet_unpack_arg_init(). + See \ref InsertTaskUtility for more details. +*/ +void starpu_codelet_pick_arg(struct starpu_codelet_pack_arg_data *state, void **ptr, size_t *size); + +/** + Finish unpacking data, after calling starpu_codelet_unpack_arg_init() + once and starpu_codelet_unpack_arg() or starpu_codelet_dup_arg() or + starpu_codelet_pick_arg() several times. + See \ref InsertTaskUtility for more details. +*/ +void starpu_codelet_unpack_arg_fini(struct starpu_codelet_pack_arg_data *state); + +/** + Call this function during unpacking to skip saving the argument in ptr. + See \ref InsertTaskUtility for more details. +*/ +void starpu_codelet_unpack_discard_arg(struct starpu_codelet_pack_arg_data *state); + +/** + Similar to starpu_codelet_unpack_args(), but if any parameter is 0, + copy the part of \p cl_arg that has not been read in \p buffer + which can then be used in a later call to one of the unpack + functions. + See \ref InsertTaskUtility for more details. +*/ +void starpu_codelet_unpack_args_and_copyleft(void *cl_arg, void *buffer, size_t buffer_size, ...); + +/** @} */ + +#ifdef __cplusplus +} +#endif + +#endif /* __STARPU_TASK_UTIL_H__ */ diff --git a/include/starpu_thread.h b/include/starpu_thread.h new file mode 100644 index 0000000..f3822a1 --- /dev/null +++ b/include/starpu_thread.h @@ -0,0 +1,510 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +// The documentation for this file is in doc/doxygen/chapters/api/threads.doxy + +#ifndef __STARPU_THREAD_H__ +#define __STARPU_THREAD_H__ + +#include +#include +#ifdef STARPU_SIMGRID +#include +#ifdef STARPU_HAVE_SIMGRID_MUTEX_H +#include +#include +#elif defined(STARPU_HAVE_XBT_SYNCHRO_H) +#include +#else +#include +#endif +#ifdef STARPU_HAVE_SIMGRID_ACTOR_H +#include +#endif +#ifdef STARPU_HAVE_SIMGRID_SEMAPHORE_H +#include +#endif +#ifdef STARPU_HAVE_SIMGRID_MUTEX_H +#include +#endif +#ifdef STARPU_HAVE_SIMGRID_COND_H +#include +#endif +#ifdef STARPU_HAVE_SIMGRID_BARRIER_H +#include +#endif +#ifdef STARPU_HAVE_SIMGRID_HOST_H +#include +#endif + +#ifdef STARPU_HAVE_SIMGRID_MSG_H +#include +#elif defined(STARPU_HAVE_MSG_MSG_H) +#include +#endif + +#elif !defined(_MSC_VER) || defined(BUILDING_STARPU) +#include +#include +#endif +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Encapsulation of the pthread_create function. + */ + +#ifdef STARPU_SIMGRID + +#ifdef STARPU_HAVE_SIMGRID_ACTOR_H +typedef sg_actor_t starpu_pthread_t; +#else +typedef msg_process_t starpu_pthread_t; +#endif +typedef struct +{ + size_t stacksize; +} starpu_pthread_attr_t; + +#ifdef STARPU_HAVE_SIMGRID_ACTOR_H +typedef sg_host_t starpu_sg_host_t; +#else +typedef msg_host_t starpu_sg_host_t; +#endif +int starpu_pthread_equal(starpu_pthread_t t1, starpu_pthread_t t2); +starpu_pthread_t starpu_pthread_self(void); +int starpu_pthread_create_on(const char *name, starpu_pthread_t *thread, const starpu_pthread_attr_t *attr, void *(*start_routine)(void *), void *arg, starpu_sg_host_t host); +int starpu_pthread_create(starpu_pthread_t *thread, const starpu_pthread_attr_t *attr, void *(*start_routine)(void *), void *arg); +starpu_pthread_t _starpu_simgrid_actor_create(const char *name, xbt_main_func_t code, starpu_sg_host_t host, int argc, char *argv[]); +int starpu_pthread_join(starpu_pthread_t thread, void **retval); +int starpu_pthread_detach(starpu_pthread_t thread); +int starpu_pthread_exit(void *retval) STARPU_ATTRIBUTE_NORETURN; +int starpu_pthread_attr_init(starpu_pthread_attr_t *attr); +int starpu_pthread_attr_destroy(starpu_pthread_attr_t *attr); +int starpu_pthread_attr_setdetachstate(starpu_pthread_attr_t *attr, int detachstate); +int starpu_pthread_attr_setstacksize(starpu_pthread_attr_t *attr, size_t stacksize); + +#elif !defined(_MSC_VER) || defined(BUILDING_STARPU) /* STARPU_SIMGRID */ + +typedef pthread_t starpu_pthread_t; +typedef pthread_attr_t starpu_pthread_attr_t; + +#define starpu_pthread_equal pthread_equal +#define starpu_pthread_self pthread_self +#define starpu_pthread_create pthread_create +#define starpu_pthread_create_on(name, thread, attr, routine, arg, where) starpu_pthread_create(thread, attr, routine, arg) +#define starpu_pthread_join pthread_join +#define starpu_pthread_detach pthread_detach +#define starpu_pthread_exit pthread_exit +#define starpu_pthread_attr_init pthread_attr_init +#define starpu_pthread_attr_destroy pthread_attr_destroy +#define starpu_pthread_attr_setdetachstate pthread_attr_setdetachstate +#define starpu_pthread_attr_setstacksize pthread_attr_setstacksize + +#endif /* STARPU_SIMGRID, _MSC_VER */ + +#ifdef STARPU_HAVE_PTHREAD_SETNAME_NP +#ifdef STARPU_HAVE_DARWIN +#define starpu_pthread_setname(name) pthread_setname_np(name) +#else +#define starpu_pthread_setname(name) pthread_setname_np(pthread_self(), name) +#endif +#else +#define starpu_pthread_setname(name) +#endif + +/* + * Encapsulation of the pthread_mutex_* functions. + */ + +#ifdef STARPU_SIMGRID +#ifdef STARPU_HAVE_SIMGRID_MUTEX_H +typedef sg_mutex_t starpu_pthread_mutex_t; +#else +typedef xbt_mutex_t starpu_pthread_mutex_t; +#endif +typedef int starpu_pthread_mutexattr_t; + +#define STARPU_PTHREAD_MUTEX_INITIALIZER NULL + +int starpu_pthread_mutex_init(starpu_pthread_mutex_t *mutex, const starpu_pthread_mutexattr_t *mutexattr); +int starpu_pthread_mutex_destroy(starpu_pthread_mutex_t *mutex); +int starpu_pthread_mutex_lock(starpu_pthread_mutex_t *mutex); +int starpu_pthread_mutex_unlock(starpu_pthread_mutex_t *mutex); +int starpu_pthread_mutex_trylock(starpu_pthread_mutex_t *mutex); +int starpu_pthread_mutexattr_gettype(const starpu_pthread_mutexattr_t *attr, int *type); +int starpu_pthread_mutexattr_settype(starpu_pthread_mutexattr_t *attr, int type); +int starpu_pthread_mutexattr_destroy(starpu_pthread_mutexattr_t *attr); +int starpu_pthread_mutexattr_init(starpu_pthread_mutexattr_t *attr); + +#elif !defined(_MSC_VER) || defined(BUILDING_STARPU) /* !STARPU_SIMGRID */ + +typedef pthread_mutex_t starpu_pthread_mutex_t; +typedef pthread_mutexattr_t starpu_pthread_mutexattr_t; + +#define starpu_pthread_mutex_init pthread_mutex_init +#define starpu_pthread_mutex_destroy pthread_mutex_destroy +#define starpu_pthread_mutexattr_gettype pthread_mutexattr_gettype +#define starpu_pthread_mutexattr_settype pthread_mutexattr_settype +#define starpu_pthread_mutexattr_destroy pthread_mutexattr_destroy +#define starpu_pthread_mutexattr_init pthread_mutexattr_init + +#ifdef STARPU_FXT_LOCK_TRACES +int starpu_pthread_mutex_lock(starpu_pthread_mutex_t *mutex); +int starpu_pthread_mutex_unlock(starpu_pthread_mutex_t *mutex); +int starpu_pthread_mutex_trylock(starpu_pthread_mutex_t *mutex); +#else +#define starpu_pthread_mutex_lock pthread_mutex_lock +#define starpu_pthread_mutex_unlock pthread_mutex_unlock +#define starpu_pthread_mutex_trylock pthread_mutex_trylock +#endif + +#define STARPU_PTHREAD_MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER + +#endif /* STARPU_SIMGRID, _MSC_VER */ + +#if !defined(_MSC_VER) || defined(BUILDING_STARPU) +int starpu_pthread_mutex_lock_sched(starpu_pthread_mutex_t *mutex); +int starpu_pthread_mutex_unlock_sched(starpu_pthread_mutex_t *mutex); +int starpu_pthread_mutex_trylock_sched(starpu_pthread_mutex_t *mutex); +void starpu_pthread_mutex_check_sched(starpu_pthread_mutex_t *mutex, char *file, int line); +#endif + +/* + * Encapsulation of the pthread_key_* functions. + */ +#ifdef STARPU_SIMGRID + +typedef int starpu_pthread_key_t; +int starpu_pthread_key_create(starpu_pthread_key_t *key, void (*destr_function)(void *)); +int starpu_pthread_key_delete(starpu_pthread_key_t key); +int starpu_pthread_setspecific(starpu_pthread_key_t key, const void *pointer); +void *starpu_pthread_getspecific(starpu_pthread_key_t key); + +#elif !defined(_MSC_VER) || defined(BUILDING_STARPU) /* !STARPU_SIMGRID */ + +typedef pthread_key_t starpu_pthread_key_t; + +#define starpu_pthread_key_create pthread_key_create +#define starpu_pthread_key_delete pthread_key_delete +#define starpu_pthread_setspecific pthread_setspecific +#define starpu_pthread_getspecific pthread_getspecific + +#endif /* STARPU_SIMGRID, _MSC_VER */ + +/* + * Encapsulation of the pthread_cond_* functions. + */ + +#ifdef STARPU_SIMGRID + +#ifdef STARPU_HAVE_SIMGRID_COND_H +typedef sg_cond_t starpu_pthread_cond_t; +#else +typedef xbt_cond_t starpu_pthread_cond_t; +#endif +typedef int starpu_pthread_condattr_t; +#define STARPU_PTHREAD_COND_INITIALIZER NULL + +int starpu_pthread_cond_init(starpu_pthread_cond_t *cond, starpu_pthread_condattr_t *cond_attr); +int starpu_pthread_cond_signal(starpu_pthread_cond_t *cond); +int starpu_pthread_cond_broadcast(starpu_pthread_cond_t *cond); +int starpu_pthread_cond_wait(starpu_pthread_cond_t *cond, starpu_pthread_mutex_t *mutex); +int starpu_pthread_cond_timedwait(starpu_pthread_cond_t *cond, starpu_pthread_mutex_t *mutex, const struct timespec *abstime); +int starpu_pthread_cond_destroy(starpu_pthread_cond_t *cond); + +#elif !defined(_MSC_VER) || defined(BUILDING_STARPU) /* !STARPU_SIMGRID */ + +typedef pthread_cond_t starpu_pthread_cond_t; +typedef pthread_condattr_t starpu_pthread_condattr_t; +#define STARPU_PTHREAD_COND_INITIALIZER PTHREAD_COND_INITIALIZER + +#define starpu_pthread_cond_init pthread_cond_init +#define starpu_pthread_cond_signal pthread_cond_signal +#define starpu_pthread_cond_broadcast pthread_cond_broadcast + +#ifdef STARPU_FXT_LOCK_TRACES +int starpu_pthread_cond_wait(starpu_pthread_cond_t *cond, starpu_pthread_mutex_t *mutex); +#else +#define starpu_pthread_cond_wait pthread_cond_wait +#endif + +#define starpu_pthread_cond_timedwait pthread_cond_timedwait +#define starpu_pthread_cond_destroy pthread_cond_destroy + +#endif /* STARPU_SIMGRID, _MSC_VER */ + +/* + * Encapsulation of the pthread_rwlock_* functions. + */ + +#ifdef STARPU_SIMGRID + +#ifdef STARPU_HAVE_SIMGRID_MUTEX_H +typedef sg_mutex_t starpu_pthread_rwlock_t; +#else +typedef xbt_mutex_t starpu_pthread_rwlock_t; +#endif +typedef int starpu_pthread_rwlockattr_t; +#define STARPU_PTHREAD_RWLOCK_INITIALIZER NULL + +int starpu_pthread_rwlock_init(starpu_pthread_rwlock_t *rwlock, const starpu_pthread_rwlockattr_t *attr); +int starpu_pthread_rwlock_destroy(starpu_pthread_rwlock_t *rwlock); +int starpu_pthread_rwlock_rdlock(starpu_pthread_rwlock_t *rwlock); +int starpu_pthread_rwlock_tryrdlock(starpu_pthread_rwlock_t *rwlock); +int starpu_pthread_rwlock_wrlock(starpu_pthread_rwlock_t *rwlock); +int starpu_pthread_rwlock_trywrlock(starpu_pthread_rwlock_t *rwlock); +int starpu_pthread_rwlock_unlock(starpu_pthread_rwlock_t *rwlock); + +#elif !defined(_MSC_VER) || defined(BUILDING_STARPU) /* !STARPU_SIMGRID */ + +typedef pthread_rwlock_t starpu_pthread_rwlock_t; +typedef pthread_rwlockattr_t starpu_pthread_rwlockattr_t; +#define STARPU_PTHREAD_RWLOCK_INITIALIZER PTHREAD_RWLOCK_INITIALIZER + +#define starpu_pthread_rwlock_init pthread_rwlock_init +#define starpu_pthread_rwlock_destroy pthread_rwlock_destroy + +#ifdef STARPU_FXT_LOCK_TRACES +int starpu_pthread_rwlock_rdlock(starpu_pthread_rwlock_t *rwlock); +int starpu_pthread_rwlock_tryrdlock(starpu_pthread_rwlock_t *rwlock); +int starpu_pthread_rwlock_wrlock(starpu_pthread_rwlock_t *rwlock); +int starpu_pthread_rwlock_trywrlock(starpu_pthread_rwlock_t *rwlock); +int starpu_pthread_rwlock_unlock(starpu_pthread_rwlock_t *rwlock); +#else +#define starpu_pthread_rwlock_rdlock pthread_rwlock_rdlock +#define starpu_pthread_rwlock_tryrdlock pthread_rwlock_tryrdlock +#define starpu_pthread_rwlock_wrlock pthread_rwlock_wrlock +#define starpu_pthread_rwlock_trywrlock pthread_rwlock_trywrlock +#define starpu_pthread_rwlock_unlock pthread_rwlock_unlock +#endif + +#endif /* STARPU_SIMGRID, _MSC_VER */ + +/* + * Encapsulation of the pthread_barrier_* functions. + */ + +#if defined(STARPU_SIMGRID) || (!defined(STARPU_HAVE_PTHREAD_BARRIER) && (!defined(_MSC_VER) || defined(BUILDING_STARPU))) + +#if defined(STARPU_SIMGRID) && (defined(STARPU_HAVE_SIMGRID_BARRIER_H) || defined(STARPU_SIMGRID_HAVE_XBT_BARRIER_INIT) || defined(xbt_barrier_init)) +#ifdef STARPU_HAVE_SIMGRID_BARRIER_H +typedef sg_bar_t starpu_pthread_barrier_t; +#else +typedef xbt_bar_t starpu_pthread_barrier_t; +#endif +typedef int starpu_pthread_barrierattr_t; +#ifdef SG_BARRIER_SERIAL_THREAD +#define STARPU_PTHREAD_BARRIER_SERIAL_THREAD SG_BARRIER_SERIAL_THREAD +#else +#define STARPU_PTHREAD_BARRIER_SERIAL_THREAD -1 +#endif +#else +typedef struct +{ + starpu_pthread_mutex_t mutex; + starpu_pthread_cond_t cond; + starpu_pthread_cond_t cond_destroy; + unsigned count; + unsigned done; + unsigned busy; +} starpu_pthread_barrier_t; +typedef int starpu_pthread_barrierattr_t; +#define STARPU_PTHREAD_BARRIER_SERIAL_THREAD -1 +#endif + +int starpu_pthread_barrier_init(starpu_pthread_barrier_t *barrier, const starpu_pthread_barrierattr_t *attr, unsigned count); +int starpu_pthread_barrier_destroy(starpu_pthread_barrier_t *barrier); +int starpu_pthread_barrier_wait(starpu_pthread_barrier_t *barrier); + +#elif !defined(_MSC_VER) /* STARPU_SIMGRID, !STARPU_HAVE_PTHREAD_BARRIER */ + +typedef pthread_barrier_t starpu_pthread_barrier_t; +typedef pthread_barrierattr_t starpu_pthread_barrierattr_t; + +#define starpu_pthread_barrier_init pthread_barrier_init +#define starpu_pthread_barrier_destroy pthread_barrier_destroy + +#ifdef STARPU_FXT_LOCK_TRACES +int starpu_pthread_barrier_wait(starpu_pthread_barrier_t *barrier); +#else +#define starpu_pthread_barrier_wait pthread_barrier_wait +#endif +#define STARPU_PTHREAD_BARRIER_SERIAL_THREAD PTHREAD_BARRIER_SERIAL_THREAD + +#endif /* STARPU_SIMGRID, !STARPU_HAVE_PTHREAD_BARRIER, _MSC_VER */ + +/* + * Encapsulation of the pthread_spin_* functions. + */ + +#if defined(STARPU_SIMGRID) || (defined(STARPU_LINUX_SYS) && defined(STARPU_HAVE_XCHG)) || !defined(STARPU_HAVE_PTHREAD_SPIN_LOCK) + +typedef struct +{ +#ifdef STARPU_SIMGRID + int taken; +#elif defined(STARPU_LINUX_SYS) && defined(STARPU_HAVE_XCHG) + unsigned taken STARPU_ATTRIBUTE_ALIGNED(16); +#else /* we only have a trivial implementation yet ! */ + uint32_t taken STARPU_ATTRIBUTE_ALIGNED(16); +#endif +} starpu_pthread_spinlock_t; + +int starpu_pthread_spin_init(starpu_pthread_spinlock_t *lock, int pshared); +int starpu_pthread_spin_destroy(starpu_pthread_spinlock_t *lock); +int starpu_pthread_spin_lock(starpu_pthread_spinlock_t *lock); +int starpu_pthread_spin_trylock(starpu_pthread_spinlock_t *lock); +int starpu_pthread_spin_unlock(starpu_pthread_spinlock_t *lock); + +#elif !defined(_MSC_VER) /* !(defined(STARPU_SIMGRID) || !defined(STARPU_HAVE_PTHREAD_SPIN_LOCK)) */ + +typedef pthread_spinlock_t starpu_pthread_spinlock_t; +#define starpu_pthread_spin_init pthread_spin_init +#define starpu_pthread_spin_destroy pthread_spin_destroy +#define starpu_pthread_spin_lock pthread_spin_lock +#define starpu_pthread_spin_trylock pthread_spin_trylock +#define starpu_pthread_spin_unlock pthread_spin_unlock + +#endif /* !(defined(STARPU_SIMGRID) || !defined(STARPU_HAVE_PTHREAD_SPIN_LOCK)) */ + +/* + * Other needed pthread definitions + */ + +#if defined(_MSC_VER) && !defined(BUILDING_STARPU) +typedef void *starpu_pthread_rwlock_t; +typedef void *starpu_pthread_mutex_t; +typedef void *starpu_pthread_cond_t; +typedef void *starpu_pthread_barrier_t; +#endif /* _MSC_VER */ + +/* + * Simgrid-specific register/wait synchronization + * + * Producers create a "queue" object, and when they have produced something, + * they call either queue_signal or queue_broadcast in order to wake either one + * or all consumers waiting on the queue. + * + * starpu_pthread_queue_init(&global_queue1->queue); + * while (1) { + * element = compute(); + * push(element, global_queue1); + * starpu_pthread_queue_signal(global_queue1); + * } + * starpu_pthread_queue_destroy(&global_queue1->queue); + * + * Consumers create a "wait" object, then queue_register on as many queues they + * want. In their consumption loop, they wait_reset, then test for availability + * on all producers, and if none was available, call wait_wait to actually wait + * for producers. On termination, consumers have to queue_unregister before + * destroying the "wait" object: + * + * starpu_pthread_wait_t wait; + * + * starpu_pthread_wait_init(&wait); + * starpu_pthread_queue_register(&wait, &global_queue1->queue); + * starpu_pthread_queue_register(&wait, &global_queue2->queue); + * + * while (1) { + * int sleep = 1; + * starpu_pthread_wait_reset(&wait); + * if (global_queue1->navailable) + * { + * work(global_queue1); + * sleep = 0; + * } + * if (global_queue2->navailable) + * { + * work(global_queue2); + * sleep = 0; + * } + * if (sleep) + * starpu_pthread_wait_wait(&wait); + * } + * starpu_pthread_queue_unregister(&wait, &global_queue1->queue); + * starpu_pthread_queue_unregister(&wait, &global_queue2->queue); + * starpu_pthread_wait_destroy(&wait); + */ + +#ifdef STARPU_SIMGRID +typedef struct +{ + starpu_pthread_mutex_t mutex; + starpu_pthread_cond_t cond; + unsigned block; +} starpu_pthread_wait_t; + +typedef struct +{ + starpu_pthread_mutex_t mutex; + starpu_pthread_wait_t **queue; + unsigned allocqueue; + unsigned nqueue; +} starpu_pthread_queue_t; + +int starpu_pthread_queue_init(starpu_pthread_queue_t *q); +int starpu_pthread_queue_signal(starpu_pthread_queue_t *q); +int starpu_pthread_queue_broadcast(starpu_pthread_queue_t *q); +int starpu_pthread_queue_destroy(starpu_pthread_queue_t *q); + +int starpu_pthread_wait_init(starpu_pthread_wait_t *w); +int starpu_pthread_queue_register(starpu_pthread_wait_t *w, starpu_pthread_queue_t *q); +int starpu_pthread_queue_unregister(starpu_pthread_wait_t *w, starpu_pthread_queue_t *q); +int starpu_pthread_wait_reset(starpu_pthread_wait_t *w); +int starpu_pthread_wait_wait(starpu_pthread_wait_t *w); +int starpu_pthread_wait_timedwait(starpu_pthread_wait_t *w, const struct timespec *abstime); +int starpu_pthread_wait_destroy(starpu_pthread_wait_t *w); +#endif + +/* + * Encapsulation of the semaphore functions. + */ + +#ifdef STARPU_SIMGRID + +#ifdef STARPU_HAVE_SIMGRID_SEMAPHORE_H +typedef sg_sem_t starpu_sem_t; +#else +typedef msg_sem_t starpu_sem_t; +#endif +int starpu_sem_destroy(starpu_sem_t *sem); +int starpu_sem_getvalue(starpu_sem_t *sem, int *retval); +int starpu_sem_init(starpu_sem_t *sem, int pshared, unsigned value); +int starpu_sem_post(starpu_sem_t *sem); +int starpu_sem_trywait(starpu_sem_t *sem); +int starpu_sem_wait(starpu_sem_t *sem); + +#elif !defined(_MSC_VER) || defined(BUILDING_STARPU) /* !STARPU_SIMGRID */ + +typedef sem_t starpu_sem_t; +#define starpu_sem_destroy sem_destroy +#define starpu_sem_getvalue sem_getvalue +#define starpu_sem_init sem_init +#define starpu_sem_post sem_post +int starpu_sem_trywait(starpu_sem_t *sem); +int starpu_sem_wait(starpu_sem_t *sem); + +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* __STARPU_THREAD_H__ */ diff --git a/include/starpu_thread_util.h b/include/starpu_thread_util.h new file mode 100644 index 0000000..69aa54d --- /dev/null +++ b/include/starpu_thread_util.h @@ -0,0 +1,529 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +// The documentation for this file is in doc/doxygen/chapters/api/threads.doxy + +#ifndef __STARPU_THREAD_UTIL_H__ +#define __STARPU_THREAD_UTIL_H__ + +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#if !(defined(_MSC_VER) && !defined(BUILDING_STARPU)) +/* + * Encapsulation of the starpu_pthread_create_* functions. + */ + +#define STARPU_PTHREAD_CREATE_ON(name, thread, attr, routine, arg, where) \ + do { \ + int p_ret = starpu_pthread_create_on((name), (thread), (attr), (routine), (arg), (where)); \ + if (STARPU_UNLIKELY(p_ret != 0)) \ + { \ + fprintf(stderr, \ + "%s:%d starpu_pthread_create_on: %s\n", \ + __FILE__, __LINE__, strerror(p_ret)); \ + STARPU_ABORT(); \ + } \ + } \ + while (0) + +#define STARPU_PTHREAD_CREATE(thread, attr, routine, arg) \ + do { \ + int p_ret = starpu_pthread_create((thread), (attr), (routine), (arg)); \ + if (STARPU_UNLIKELY(p_ret != 0)) \ + { \ + fprintf(stderr, \ + "%s:%d starpu_pthread_create: %s\n", \ + __FILE__, __LINE__, strerror(p_ret)); \ + STARPU_ABORT(); \ + } \ + } \ + while (0) + +#define STARPU_PTHREAD_JOIN(thread, retval) \ + do { \ + int p_ret = starpu_pthread_join((thread), (retval)); \ + if (STARPU_UNLIKELY(p_ret != 0)) \ + { \ + fprintf(stderr, \ + "%s:%d starpu_pthread_join: %s\n", \ + __FILE__, __LINE__, strerror(p_ret)); \ + STARPU_ABORT(); \ + } \ + } \ + while (0) + +/* + * Encapsulation of the starpu_pthread_mutex_* functions. + */ + +#define _STARPU_PTHREAD_MUTEX_INIT(mutex, attr) \ + do { \ + int p_ret = starpu_pthread_mutex_init((mutex), (attr)); \ + if (STARPU_UNLIKELY(p_ret)) \ + { \ + fprintf(stderr, \ + "%s:%d starpu_pthread_mutex_init: %s\n", \ + __FILE__, __LINE__, strerror(p_ret)); \ + STARPU_ABORT(); \ + } \ + } \ + while (0) + +#ifdef STARPU_PTHREAD_MUTEX_INITIALIZER_ZERO +#define STARPU_PTHREAD_MUTEX_INIT(mutex, attr) \ + do { \ + if (!attr) \ + memset(mutex, 0, sizeof(*mutex)); \ + else \ + _STARPU_PTHREAD_MUTEX_INIT(mutex, attr); \ + } \ + while (0) +#define STARPU_PTHREAD_MUTEX_INIT0(mutex, attr) \ + do { \ + if (attr) \ + _STARPU_PTHREAD_MUTEX_INIT(mutex, attr); \ + } \ + while (0) +#else +#define STARPU_PTHREAD_MUTEX_INIT(mutex, attr) _STARPU_PTHREAD_MUTEX_INIT(mutex, attr) +#define STARPU_PTHREAD_MUTEX_INIT0(mutex, attr) _STARPU_PTHREAD_MUTEX_INIT(mutex, attr) +#endif + +#define STARPU_PTHREAD_MUTEX_DESTROY(mutex) \ + do { \ + int p_ret = starpu_pthread_mutex_destroy(mutex); \ + if (STARPU_UNLIKELY(p_ret)) \ + { \ + fprintf(stderr, \ + "%s:%d starpu_pthread_mutex_destroy: %s\n", \ + __FILE__, __LINE__, strerror(p_ret)); \ + STARPU_ABORT(); \ + } \ + } \ + while (0) + +#ifdef STARPU_DEBUG +#define _STARPU_CHECK_NOT_SCHED_MUTEX(mutex, file, line) \ + starpu_pthread_mutex_check_sched((mutex), file, line) +#else +#define _STARPU_CHECK_NOT_SCHED_MUTEX(mutex, file, line) +#endif + +#define STARPU_PTHREAD_MUTEX_LOCK(mutex) \ + do { \ + int p_ret = starpu_pthread_mutex_lock(mutex); \ + if (STARPU_UNLIKELY(p_ret)) \ + { \ + fprintf(stderr, \ + "%s:%d starpu_pthread_mutex_lock: %s\n", \ + __FILE__, __LINE__, strerror(p_ret)); \ + STARPU_ABORT(); \ + } \ + _STARPU_CHECK_NOT_SCHED_MUTEX(mutex, __FILE__, __LINE__); \ + } \ + while (0) + +#define STARPU_PTHREAD_MUTEX_LOCK_SCHED(mutex) \ + do { \ + int p_ret = starpu_pthread_mutex_lock_sched(mutex); \ + if (STARPU_UNLIKELY(p_ret)) \ + { \ + fprintf(stderr, \ + "%s:%d starpu_pthread_mutex_lock_sched: %s\n", \ + __FILE__, __LINE__, strerror(p_ret)); \ + STARPU_ABORT(); \ + } \ + } \ + while (0) + +#define STARPU_PTHREAD_MUTEX_TRYLOCK(mutex) \ + _starpu_pthread_mutex_trylock(mutex, __FILE__, __LINE__) +static STARPU_INLINE int _starpu_pthread_mutex_trylock(starpu_pthread_mutex_t *mutex, char *file, int line) +{ + int p_ret = starpu_pthread_mutex_trylock(mutex); + if (STARPU_UNLIKELY(p_ret != 0 && p_ret != EBUSY)) + { + fprintf(stderr, + "%s:%d starpu_pthread_mutex_trylock: %s\n", + file, line, strerror(p_ret)); + STARPU_ABORT(); + } + _STARPU_CHECK_NOT_SCHED_MUTEX(mutex, file, line); + return p_ret; +} + +#define STARPU_PTHREAD_MUTEX_TRYLOCK_SCHED(mutex) \ + _starpu_pthread_mutex_trylock_sched(mutex, __FILE__, __LINE__) +static STARPU_INLINE int _starpu_pthread_mutex_trylock_sched(starpu_pthread_mutex_t *mutex, char *file, int line) +{ + int p_ret = starpu_pthread_mutex_trylock_sched(mutex); + if (STARPU_UNLIKELY(p_ret != 0 && p_ret != EBUSY)) + { + fprintf(stderr, + "%s:%d starpu_pthread_mutex_trylock_sched: %s\n", + file, line, strerror(p_ret)); + STARPU_ABORT(); + } + return p_ret; +} + +#define STARPU_PTHREAD_MUTEX_UNLOCK(mutex) \ + do { \ + _STARPU_CHECK_NOT_SCHED_MUTEX(mutex, __FILE__, __LINE__); \ + int p_ret = starpu_pthread_mutex_unlock(mutex); \ + if (STARPU_UNLIKELY(p_ret)) \ + { \ + fprintf(stderr, \ + "%s:%d starpu_pthread_mutex_unlock: %s\n", \ + __FILE__, __LINE__, strerror(p_ret)); \ + STARPU_ABORT(); \ + } \ + } \ + while (0) + +#define STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(mutex) \ + do { \ + int p_ret = starpu_pthread_mutex_unlock_sched(mutex); \ + if (STARPU_UNLIKELY(p_ret)) \ + { \ + fprintf(stderr, \ + "%s:%d starpu_pthread_mutex_unlock_sched: %s\n", \ + __FILE__, __LINE__, strerror(p_ret)); \ + STARPU_ABORT(); \ + } \ + } \ + while (0) + +/* + * Encapsulation of the starpu_pthread_key_* functions. + */ +#define STARPU_PTHREAD_KEY_CREATE(key, destr) \ + do { \ + int p_ret = starpu_pthread_key_create((key), (destr)); \ + if (STARPU_UNLIKELY(p_ret != 0)) \ + { \ + fprintf(stderr, \ + "%s:%d starpu_pthread_key_create: %s\n", \ + __FILE__, __LINE__, strerror(p_ret)); \ + } \ + } \ + while (0) + +#define STARPU_PTHREAD_KEY_DELETE(key) \ + do { \ + int p_ret = starpu_pthread_key_delete((key)); \ + if (STARPU_UNLIKELY(p_ret != 0)) \ + { \ + fprintf(stderr, \ + "%s:%d starpu_pthread_key_delete: %s\n", \ + __FILE__, __LINE__, strerror(p_ret)); \ + } \ + } \ + while (0) + +#define STARPU_PTHREAD_SETSPECIFIC(key, ptr) \ + do { \ + int p_ret = starpu_pthread_setspecific((key), (ptr)); \ + if (STARPU_UNLIKELY(p_ret != 0)) \ + { \ + fprintf(stderr, \ + "%s:%d starpu_pthread_setspecific: %s\n", \ + __FILE__, __LINE__, strerror(p_ret)); \ + }; \ + } \ + while (0) + +#define STARPU_PTHREAD_GETSPECIFIC(key) starpu_pthread_getspecific((key)) + +/* + * Encapsulation of the starpu_pthread_rwlock_* functions. + */ +#define _STARPU_PTHREAD_RWLOCK_INIT(rwlock, attr) \ + do { \ + int p_ret = starpu_pthread_rwlock_init((rwlock), (attr)); \ + if (STARPU_UNLIKELY(p_ret)) \ + { \ + fprintf(stderr, \ + "%s:%d starpu_pthread_rwlock_init: %s\n", \ + __FILE__, __LINE__, strerror(p_ret)); \ + STARPU_ABORT(); \ + } \ + } \ + while (0) + +#ifdef STARPU_PTHREAD_RWLOCK_INITIALIZER_ZERO +#define STARPU_PTHREAD_RWLOCK_INIT(rwlock, attr) \ + do { \ + if (!attr) \ + memset(rwlock, 0, sizeof(*rwlock)); \ + else \ + _STARPU_PTHREAD_RWLOCK_INIT(rwlock, attr); \ + } \ + while (0) +#define STARPU_PTHREAD_RWLOCK_INIT0(rwlock, attr) \ + do { \ + if (attr) \ + _STARPU_PTHREAD_RWLOCK_INIT(rwlock, attr); \ + } \ + while (0) +#else +#define STARPU_PTHREAD_RWLOCK_INIT(rwlock, attr) _STARPU_PTHREAD_RWLOCK_INIT(rwlock, attr) +#define STARPU_PTHREAD_RWLOCK_INIT0(rwlock, attr) _STARPU_PTHREAD_RWLOCK_INIT(rwlock, attr) +#endif + +#define STARPU_PTHREAD_RWLOCK_RDLOCK(rwlock) \ + do { \ + int p_ret = starpu_pthread_rwlock_rdlock(rwlock); \ + if (STARPU_UNLIKELY(p_ret)) \ + { \ + fprintf(stderr, \ + "%s:%d starpu_pthread_rwlock_rdlock: %s\n", \ + __FILE__, __LINE__, strerror(p_ret)); \ + STARPU_ABORT(); \ + } \ + } \ + while (0) + +#define STARPU_PTHREAD_RWLOCK_TRYRDLOCK(rwlock) \ + _starpu_pthread_rwlock_tryrdlock(rwlock, __FILE__, __LINE__) +static STARPU_INLINE int _starpu_pthread_rwlock_tryrdlock(starpu_pthread_rwlock_t *rwlock, char *file, int line) +{ + int p_ret = starpu_pthread_rwlock_tryrdlock(rwlock); + if (STARPU_UNLIKELY(p_ret != 0 && p_ret != EBUSY)) + { + fprintf(stderr, + "%s:%d starpu_pthread_rwlock_tryrdlock: %s\n", + file, line, strerror(p_ret)); + STARPU_ABORT(); + } + return p_ret; +} + +#define STARPU_PTHREAD_RWLOCK_WRLOCK(rwlock) \ + do { \ + int p_ret = starpu_pthread_rwlock_wrlock(rwlock); \ + if (STARPU_UNLIKELY(p_ret)) \ + { \ + fprintf(stderr, \ + "%s:%d starpu_pthread_rwlock_wrlock: %s\n", \ + __FILE__, __LINE__, strerror(p_ret)); \ + STARPU_ABORT(); \ + } \ + } \ + while (0) + +#define STARPU_PTHREAD_RWLOCK_TRYWRLOCK(rwlock) \ + _starpu_pthread_rwlock_trywrlock(rwlock, __FILE__, __LINE__) +static STARPU_INLINE int _starpu_pthread_rwlock_trywrlock(starpu_pthread_rwlock_t *rwlock, char *file, int line) +{ + int p_ret = starpu_pthread_rwlock_trywrlock(rwlock); + if (STARPU_UNLIKELY(p_ret != 0 && p_ret != EBUSY)) + { + fprintf(stderr, + "%s:%d starpu_pthread_rwlock_trywrlock: %s\n", + file, line, strerror(p_ret)); + STARPU_ABORT(); + } + return p_ret; +} + +#define STARPU_PTHREAD_RWLOCK_UNLOCK(rwlock) \ + do { \ + int p_ret = starpu_pthread_rwlock_unlock(rwlock); \ + if (STARPU_UNLIKELY(p_ret)) \ + { \ + fprintf(stderr, \ + "%s:%d starpu_pthread_rwlock_unlock: %s\n", \ + __FILE__, __LINE__, strerror(p_ret)); \ + STARPU_ABORT(); \ + } \ + } \ + while (0) + +#define STARPU_PTHREAD_RWLOCK_DESTROY(rwlock) \ + do { \ + int p_ret = starpu_pthread_rwlock_destroy(rwlock); \ + if (STARPU_UNLIKELY(p_ret)) \ + { \ + fprintf(stderr, \ + "%s:%d starpu_pthread_rwlock_destroy: %s\n", \ + __FILE__, __LINE__, strerror(p_ret)); \ + STARPU_ABORT(); \ + } \ + } \ + while (0) + +/* + * Encapsulation of the starpu_pthread_cond_* functions. + */ +#define _STARPU_PTHREAD_COND_INIT(cond, attr) \ + do { \ + int p_ret = starpu_pthread_cond_init((cond), (attr)); \ + if (STARPU_UNLIKELY(p_ret)) \ + { \ + fprintf(stderr, \ + "%s:%d starpu_pthread_cond_init: %s\n", \ + __FILE__, __LINE__, strerror(p_ret)); \ + STARPU_ABORT(); \ + } \ + } \ + while (0) + +#ifdef STARPU_PTHREAD_COND_INITIALIZER_ZERO +#define STARPU_PTHREAD_COND_INIT(cond, attr) \ + do { \ + if (!attr) \ + memset(cond, 0, sizeof(*cond)); \ + else \ + _STARPU_PTHREAD_COND_INIT(cond, attr); \ + } \ + while (0) +#define STARPU_PTHREAD_COND_INIT0(cond, attr) \ + do { \ + if (attr) \ + _STARPU_PTHREAD_COND_INIT(cond, attr); \ + } \ + while (0) +#else +#define STARPU_PTHREAD_COND_INIT(cond, attr) _STARPU_PTHREAD_COND_INIT(cond, attr) +#define STARPU_PTHREAD_COND_INIT0(cond, attr) _STARPU_PTHREAD_COND_INIT(cond, attr) +#endif + +#define STARPU_PTHREAD_COND_DESTROY(cond) \ + do { \ + int p_ret = starpu_pthread_cond_destroy(cond); \ + if (STARPU_UNLIKELY(p_ret)) \ + { \ + fprintf(stderr, \ + "%s:%d starpu_pthread_cond_destroy: %s\n", \ + __FILE__, __LINE__, strerror(p_ret)); \ + STARPU_ABORT(); \ + } \ + } \ + while (0) + +#define STARPU_PTHREAD_COND_SIGNAL(cond) \ + do { \ + int p_ret = starpu_pthread_cond_signal(cond); \ + if (STARPU_UNLIKELY(p_ret)) \ + { \ + fprintf(stderr, \ + "%s:%d starpu_pthread_cond_signal: %s\n", \ + __FILE__, __LINE__, strerror(p_ret)); \ + STARPU_ABORT(); \ + } \ + } \ + while (0) + +#define STARPU_PTHREAD_COND_BROADCAST(cond) \ + do { \ + int p_ret = starpu_pthread_cond_broadcast(cond); \ + if (STARPU_UNLIKELY(p_ret)) \ + { \ + fprintf(stderr, \ + "%s:%d starpu_pthread_cond_broadcast: %s\n", \ + __FILE__, __LINE__, strerror(p_ret)); \ + STARPU_ABORT(); \ + } \ + } \ + while (0) + +#define STARPU_PTHREAD_COND_WAIT(cond, mutex) \ + do { \ + int p_ret = starpu_pthread_cond_wait((cond), (mutex)); \ + if (STARPU_UNLIKELY(p_ret)) \ + { \ + fprintf(stderr, \ + "%s:%d starpu_pthread_cond_wait: %s\n", \ + __FILE__, __LINE__, strerror(p_ret)); \ + STARPU_ABORT(); \ + } \ + } \ + while (0) + +/* pthread_cond_timedwait not yet available on windows, but we don't run simgrid there anyway */ +#ifdef STARPU_SIMGRID +#define STARPU_PTHREAD_COND_TIMEDWAIT(cond, mutex, abstime) \ + _starpu_pthread_cond_timedwait(cond, mutex, abstime, __FILE__, __LINE__) +static STARPU_INLINE int _starpu_pthread_cond_timedwait(starpu_pthread_cond_t *cond, starpu_pthread_mutex_t *mutex, const struct timespec *abstime, char *file, int line) +{ + int p_ret = starpu_pthread_cond_timedwait(cond, mutex, abstime); + if (STARPU_UNLIKELY(p_ret != 0 && p_ret != ETIMEDOUT)) + { + fprintf(stderr, + "%s:%d starpu_pthread_cond_timedwait: %s\n", + file, line, strerror(p_ret)); + STARPU_ABORT(); + } + return p_ret; +} +#endif + +/* + * Encapsulation of the starpu_pthread_barrier_* functions. + */ + +#define STARPU_PTHREAD_BARRIER_INIT(barrier, attr, count) \ + do { \ + int p_ret = starpu_pthread_barrier_init((barrier), (attr), (count)); \ + if (STARPU_UNLIKELY(p_ret)) \ + { \ + fprintf(stderr, \ + "%s:%d starpu_pthread_barrier_init: %s\n", \ + __FILE__, __LINE__, strerror(p_ret)); \ + STARPU_ABORT(); \ + } \ + } \ + while (0) + +#define STARPU_PTHREAD_BARRIER_DESTROY(barrier) \ + do { \ + int p_ret = starpu_pthread_barrier_destroy((barrier)); \ + if (STARPU_UNLIKELY(p_ret)) \ + { \ + fprintf(stderr, \ + "%s:%d starpu_pthread_barrier_destroy: %s\n", \ + __FILE__, __LINE__, strerror(p_ret)); \ + STARPU_ABORT(); \ + } \ + } \ + while (0) + +#define STARPU_PTHREAD_BARRIER_WAIT(barrier) \ + do { \ + int p_ret = starpu_pthread_barrier_wait((barrier)); \ + if (STARPU_UNLIKELY(!((p_ret == 0) || (p_ret == STARPU_PTHREAD_BARRIER_SERIAL_THREAD)))) \ + { \ + fprintf(stderr, \ + "%s:%d starpu_pthread_barrier_wait: %s\n", \ + __FILE__, __LINE__, strerror(p_ret)); \ + STARPU_ABORT(); \ + } \ + } \ + while (0) +#endif /* _MSC_VER */ + +#ifdef __cplusplus +} +#endif + +#endif /* __STARPU_THREAD_UTIL_H__ */ diff --git a/include/starpu_tree.h b/include/starpu_tree.h new file mode 100644 index 0000000..25049e6 --- /dev/null +++ b/include/starpu_tree.h @@ -0,0 +1,60 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __STARPU_TREE_H__ +#define __STARPU_TREE_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + @defgroup API_Tree Tree + @brief API tree facilities + @{ +*/ + +/** + todo +*/ +struct starpu_tree +{ + struct starpu_tree *nodes; + struct starpu_tree *father; + int arity; + int id; + int level; + int is_pu; +}; + +void starpu_tree_reset_visited(struct starpu_tree *tree, char *visited); + +void starpu_tree_prepare_children(unsigned arity, struct starpu_tree *father); +void starpu_tree_insert(struct starpu_tree *tree, int id, int level, int is_pu, int arity, struct starpu_tree *father); + +struct starpu_tree *starpu_tree_get(struct starpu_tree *tree, int id); + +struct starpu_tree *starpu_tree_get_neighbour(struct starpu_tree *tree, struct starpu_tree *node, char *visited, char *present); + +void starpu_tree_free(struct starpu_tree *tree); + +/** @} */ + +#ifdef __cplusplus +} +#endif + +#endif /* __STARPU_TREE_H__ */ diff --git a/include/starpu_util.h b/include/starpu_util.h new file mode 100644 index 0000000..aa637b9 --- /dev/null +++ b/include/starpu_util.h @@ -0,0 +1,887 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __STARPU_UTIL_H__ +#define __STARPU_UTIL_H__ + +#include +#include +#include +#include +#include + +#include + +#ifdef __GLIBC__ +#include +#endif + +#ifdef STARPU_SIMGRID_MC +#include +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/** + @defgroup API_Toolbox Toolbox + @brief The following macros allow to make GCC extensions portable, + and to have a code which can be compiled with any C compiler. + @{ +*/ + +/** + Return true (non-zero) if GCC version \p maj.\p min or later is + being used (macro taken from glibc.) +*/ +#if defined __GNUC__ && defined __GNUC_MINOR__ +#define STARPU_GNUC_PREREQ(maj, min) \ + ((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min)) +#else +#define STARPU_GNUC_PREREQ(maj, min) 0 +#endif + +/** + When building with a GNU C Compiler, allow programmers to mark an + expression as unlikely. +*/ +#ifdef __GNUC__ +#define STARPU_UNLIKELY(expr) (__builtin_expect(!!(expr), 0)) +#else +#define STARPU_UNLIKELY(expr) (expr) +#endif + +/** + When building with a GNU C Compiler, allow programmers to mark an + expression as likely. +*/ +#ifdef __GNUC__ +#define STARPU_LIKELY(expr) (__builtin_expect(!!(expr), 1)) +#else +#define STARPU_LIKELY(expr) (expr) +#endif + +/** + When building with a GNU C Compiler, defined to __attribute__((unused)) +*/ +#ifdef __GNUC__ +#define STARPU_ATTRIBUTE_UNUSED __attribute__((unused)) +#else +#define STARPU_ATTRIBUTE_UNUSED +#endif + +/** + When building with a GNU C Compiler, defined to __attribute__((noreturn)) +*/ +#ifdef __GNUC__ +#define STARPU_ATTRIBUTE_NORETURN __attribute__((noreturn)) +#else +#define STARPU_ATTRIBUTE_NORETURN +#endif + +/** + When building with a GNU C Compiler, defined to __attribute__((visibility ("default"))) +*/ +#ifdef __GNUC__ +#define STARPU_ATTRIBUTE_VISIBILITY_DEFAULT __attribute__((visibility("default"))) +#else +#define STARPU_ATTRIBUTE_VISIBILITY_DEFAULT +#endif + +/** + When building with a GNU C Compiler, defined to \#pragma GCC visibility push(hidden) +*/ +#ifdef __GNUC__ +#define STARPU_VISIBILITY_PUSH_HIDDEN #pragma GCC visibility push(hidden) +#else +#define STARPU_VISIBILITY_PUSH_HIDDEN +#endif + +/** + When building with a GNU C Compiler, defined to \#pragma GCC visibility pop +*/ +#ifdef __GNUC__ +#define STARPU_VISIBILITY_POP #pragma GCC visibility pop +#else +#define STARPU_VISIBILITY_POP +#endif + +/** + When building with a GNU C Compiler, defined to __attribute__((malloc)) +*/ +#ifdef __GNUC__ +#define STARPU_ATTRIBUTE_MALLOC __attribute__((malloc)) +#else +#define STARPU_ATTRIBUTE_MALLOC +#endif + +/** + When building with a GNU C Compiler, defined to __attribute__((warn_unused_result)) +*/ +#ifdef __GNUC__ +#define STARPU_ATTRIBUTE_WARN_UNUSED_RESULT __attribute__((warn_unused_result)) +#else +#define STARPU_ATTRIBUTE_WARN_UNUSED_RESULT +#endif + +/** + When building with a GNU C Compiler, defined to __attribute__((pure)) +*/ +#ifdef __GNUC__ +#define STARPU_ATTRIBUTE_PURE __attribute__((pure)) +#else +#define STARPU_ATTRIBUTE_PURE +#endif + +/** + When building with a GNU C Compiler, defined to__attribute__((aligned(size))) +*/ +#ifdef __GNUC__ +#define STARPU_ATTRIBUTE_ALIGNED(size) __attribute__((aligned(size))) +#else +#define STARPU_ATTRIBUTE_ALIGNED(size) +#endif + +#ifdef __GNUC__ +#define STARPU_ATTRIBUTE_FORMAT(type, string, first) __attribute__((format(type, string, first))) +#else +#define STARPU_ATTRIBUTE_FORMAT(type, string, first) +#endif + +/* Note that if we're compiling C++, then just use the "inline" + keyword, since it's part of C++ */ +#if defined(c_plusplus) || defined(__cplusplus) +#define STARPU_INLINE inline +#elif defined(_MSC_VER) || defined(__HP_cc) +#define STARPU_INLINE __inline +#else +#define STARPU_INLINE __inline__ +#endif + +#if STARPU_GNUC_PREREQ(4, 3) +#define STARPU_ATTRIBUTE_CALLOC_SIZE(num, size) __attribute__((alloc_size(num, size))) +#define STARPU_ATTRIBUTE_ALLOC_SIZE(size) __attribute__((alloc_size(size))) +#else +#define STARPU_ATTRIBUTE_CALLOC_SIZE(num, size) +#define STARPU_ATTRIBUTE_ALLOC_SIZE(size) +#endif + +#if STARPU_GNUC_PREREQ(3, 1) && !defined(BUILDING_STARPU) && !defined(STARPU_USE_DEPRECATED_API) && !defined(STARPU_USE_DEPRECATED_ONE_ZERO_API) +#define STARPU_DEPRECATED __attribute__((__deprecated__)) +#else +#define STARPU_DEPRECATED +#endif /* __GNUC__ */ + +#if STARPU_GNUC_PREREQ(3, 3) +#define STARPU_WARN_UNUSED_RESULT __attribute__((__warn_unused_result__)) +#else +#define STARPU_WARN_UNUSED_RESULT +#endif /* __GNUC__ */ + +#define STARPU_BACKTRACE_LENGTH 32 +#ifdef __GLIBC__ +#define STARPU_DUMP_BACKTRACE() \ + do { \ + void *__ptrs[STARPU_BACKTRACE_LENGTH]; \ + int __n = backtrace(__ptrs, STARPU_BACKTRACE_LENGTH); \ + backtrace_symbols_fd(__ptrs, __n, 2); \ + } \ + while (0) +#else +#define STARPU_DUMP_BACKTRACE() \ + do { \ + } \ + while (0) +#endif + +#ifdef STARPU_SIMGRID_MC +#define STARPU_SIMGRID_ASSERT(x) MC_assert(!!(x)) +#else +#define STARPU_SIMGRID_ASSERT(x) +#endif + +/** + Unless StarPU has been configured with the option \ref enable-fast + "--enable-fast", this macro will abort if the expression \p x is false. +*/ +#ifdef STARPU_NO_ASSERT +#define STARPU_ASSERT(x) \ + do { \ + if (0) { (void)(x); } \ + } \ + while (0) +#else +#if defined(__CUDACC__) || defined(STARPU_HAVE_WINDOWS) +#define STARPU_ASSERT(x) \ + do { \ + if (STARPU_UNLIKELY(!(x))) \ + { \ + STARPU_DUMP_BACKTRACE(); \ + STARPU_SIMGRID_ASSERT(0 && #x); \ + *(int *)NULL = 0; \ + } \ + } \ + while (0) +#else +#define STARPU_ASSERT(x) \ + do { \ + if (STARPU_UNLIKELY(!(x))) \ + { \ + STARPU_DUMP_BACKTRACE(); \ + STARPU_SIMGRID_ASSERT(0 && #x); \ + assert(0 && #x); \ + } \ + } \ + while (0) +#endif +#endif + +/** + Unless StarPU has been configured with the option \ref enable-fast + "--enable-fast", this macro will abort if the pointer \p x is not pointing to + valid memory. +*/ +#ifdef STARPU_NO_ASSERT +#define STARPU_ASSERT_ACCESSIBLE(x) \ + do { \ + if (0) { (void)(x); } \ + } \ + while (0) +#else +#define STARPU_ASSERT_ACCESSIBLE(ptr) \ + do { \ + volatile char __c STARPU_ATTRIBUTE_UNUSED = *(char *)(ptr); \ + } \ + while (0) +#endif + +/** + This macro will abort compilation if the expression \p x is false. +*/ +#if STARPU_GNUC_PREREQ(4, 6) && !defined __cplusplus && !defined(__STRICT_ANSI__) +#define STARPU_STATIC_ASSERT(x) _Static_assert(x, #x) +#else +#define STARPU_STATIC_ASSERT(x) STARPU_ASSERT(x) +#endif + +/** + This macro will abort if the expression \p x is false. + The string \p msg will be displayed. +*/ +#if defined(__INTEL_COMPILER) +#pragma warning disable 279 // otherwise icc triggers "warning #279: controlling expression is constant" (probably because of assert(0 && #x)) +#endif +#if defined(__CUDACC__) || defined(STARPU_HAVE_WINDOWS) +#define STARPU_ASSERT_MSG_ALWAYS(x, msg, ...) \ + do { \ + if (STARPU_UNLIKELY(!(x))) \ + { \ + STARPU_DUMP_BACKTRACE(); \ + fprintf(stderr, "\n[starpu][%s][assert failure] " msg "\n\n", __starpu_func__, ##__VA_ARGS__); \ + STARPU_SIMGRID_ASSERT(0 && #x); \ + *(int *)NULL = 0; \ + } \ + } \ + while (0) +#else +#define STARPU_ASSERT_MSG_ALWAYS(x, msg, ...) \ + do { \ + if (STARPU_UNLIKELY(!(x))) \ + { \ + STARPU_DUMP_BACKTRACE(); \ + fprintf(stderr, "\n[starpu][%s][assert failure] " msg "\n\n", __starpu_func__, ##__VA_ARGS__); \ + STARPU_SIMGRID_ASSERT(0 && #x); \ + assert(0 && #x); \ + abort(); \ + *(int *)NULL = 0; \ + } \ + } \ + while (0) +#endif + +/** + Unless StarPU has been configured with the option \ref enable-fast + "--enable-fast", this macro will abort if the expression \p x is false. + The string \p msg will be displayed. +*/ +#ifdef STARPU_NO_ASSERT +#define STARPU_ASSERT_MSG(x, msg, ...) \ + do { \ + if (0) \ + { \ + (void)(x); \ + (void)msg; \ + } \ + } \ + while (0) +#else +#define STARPU_ASSERT_MSG(x, msg, ...) \ + STARPU_ASSERT_MSG_ALWAYS(x, msg, ##__VA_ARGS__) +#endif + +#ifdef __APPLE_CC__ +#ifdef __clang_analyzer__ +#define _starpu_abort() exit(42) +#else +#define _starpu_abort() *(volatile int *)NULL = 0 +#endif +#else +#define _starpu_abort() abort() +#endif + +/** + Abort the program. +*/ +#define STARPU_ABORT() \ + do { \ + STARPU_DUMP_BACKTRACE(); \ + fprintf(stderr, "[starpu][abort][%s()@%s:%d]\n", __starpu_func__, __FILE__, __LINE__); \ + _starpu_abort(); \ + } \ + while (0) + +/** + Print the string '[starpu][abort][name of the calling function:name + of the file:line in the file]' followed by the given string \p msg + and abort the program +*/ +#define STARPU_ABORT_MSG(msg, ...) \ + do { \ + STARPU_DUMP_BACKTRACE(); \ + fprintf(stderr, "[starpu][abort][%s()@%s:%d] " msg "\n", __starpu_func__, __FILE__, __LINE__, ##__VA_ARGS__); \ + _starpu_abort(); \ + } \ + while (0) + +#if defined(_MSC_VER) +#undef STARPU_HAVE_STRERROR_R +#endif + +#if defined(STARPU_HAVE_STRERROR_R) +#if (!defined(__GLIBC__) || !__GLIBC__) || ((_POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600) && (!defined(_GNU_SOURCE))) +/* XSI-compliant version of strerror_r returns an int */ +#define starpu_strerror_r(errnum, buf, buflen) \ + do \ + { \ + int _ret = strerror_r((errnum), (buf), (buflen)); \ + STARPU_ASSERT(_ret == 0); \ + } \ + while (0) +#else +/* GNU-specific version of strerror_r returns a char * */ +#define starpu_strerror_r(errnum, buf, buflen) \ + do \ + { \ + char *const _user_buf = (buf); \ + const size_t _user_buflen = (buflen); \ + /* the GNU-specific behaviour when 'buf' == NULL cannot be emulated with the XSI-compliant version */ \ + STARPU_ASSERT((buf) != NULL); \ + char *_tmp_buf = strerror_r((errnum), _user_buf, _user_buflen); \ + if (_tmp_buf != _user_buf) \ + { \ + if (_user_buflen > 0) \ + { \ + strncpy(_user_buf, _tmp_buf, _user_buflen - 1); \ + _user_buf[_user_buflen - 1] = '\0'; \ + } \ + } \ + } \ + while (0) +#endif /* strerror_r ABI version */ +#endif /* STARPU_HAVE_STRERROR_R */ + +/** + Abort the program (after displaying \p message) if \p err has a + value which is not 0. +*/ +#if defined(STARPU_HAVE_STRERROR_R) +#define STARPU_CHECK_RETURN_VALUE(err, message, ...) \ + { \ + if (STARPU_UNLIKELY(err != 0)) \ + { \ + char xmessage[256]; \ + starpu_strerror_r(-err, xmessage, 256); \ + fprintf(stderr, "[starpu] Unexpected value: <%d:%s> returned for " message "\n", err, xmessage, ##__VA_ARGS__); \ + STARPU_ABORT(); \ + } \ + } +#else +#define STARPU_CHECK_RETURN_VALUE(err, message, ...) \ + { \ + if (STARPU_UNLIKELY(err != 0)) \ + { \ + fprintf(stderr, "[starpu] Unexpected value: <%d> returned for " message "\n", err, ##__VA_ARGS__); \ + STARPU_ABORT(); \ + } \ + } +#endif + +/** + Abort the program (after displaying \p message) if \p err is + different from \p value. +*/ +#if defined(STARPU_HAVE_STRERROR_R) +#define STARPU_CHECK_RETURN_VALUE_IS(err, value, message, ...) \ + { \ + if (STARPU_UNLIKELY(err != value)) \ + { \ + char xmessage[256]; \ + starpu_strerror_r(-err, xmessage, 256); \ + fprintf(stderr, "[starpu] Unexpected value: <%d!=%d:%s> returned for " message "\n", err, value, xmessage, ##__VA_ARGS__); \ + STARPU_ABORT(); \ + } \ + } +#else +#define STARPU_CHECK_RETURN_VALUE_IS(err, value, message, ...) \ + { \ + if (STARPU_UNLIKELY(err != value)) \ + { \ + fprintf(stderr, "[starpu] Unexpected value: <%d != %d> returned for " message "\n", err, value, ##__VA_ARGS__); \ + STARPU_ABORT(); \ + } \ + } +#endif + +/* Note: do not use _starpu_cmpxchg / _starpu_xchg / _starpu_cmpxchgl / + * _starpu_xchgl / _starpu_cmpxchg64 / _starpu_xchg64, which only + * assembly-hand-written fallbacks used when building with an old gcc. + * Rather use STARPU_VAL_COMPARE_AND_SWAP and STARPU_VAL_EXCHANGE available on + * all platforms with a recent-enough gcc */ + +#if defined(__i386__) || defined(__x86_64__) +static __starpu_inline unsigned _starpu_cmpxchg(unsigned *ptr, unsigned old, unsigned next) +{ + __asm__ __volatile__("lock cmpxchgl %2,%1" + : "+a"(old), "+m"(*ptr) + : "q"(next) + : "memory"); + return old; +} +#define STARPU_HAVE_CMPXCHG +static __starpu_inline unsigned _starpu_xchg(unsigned *ptr, unsigned next) +{ + /* Note: xchg is always locked already */ + __asm__ __volatile__("xchgl %1,%0" + : "+m"(*ptr), "+q"(next) + : + : "memory"); + return next; +} +#define STARPU_HAVE_XCHG + +static __starpu_inline uint32_t _starpu_cmpxchg32(uint32_t *ptr, uint32_t old, uint32_t next) +{ + __asm__ __volatile__("lock cmpxchgl %2,%1" + : "+a"(old), "+m"(*ptr) + : "q"(next) + : "memory"); + return old; +} +#define STARPU_HAVE_CMPXCHG32 +static __starpu_inline uint32_t _starpu_xchg32(uint32_t *ptr, uint32_t next) +{ + /* Note: xchg is always locked already */ + __asm__ __volatile__("xchgl %1,%0" + : "+m"(*ptr), "+q"(next) + : + : "memory"); + return next; +} +#define STARPU_HAVE_XCHG32 + +#if defined(__i386__) +static __starpu_inline unsigned long _starpu_cmpxchgl(unsigned long *ptr, unsigned long old, unsigned long next) +{ + __asm__ __volatile__("lock cmpxchgl %2,%1" + : "+a"(old), "+m"(*ptr) + : "q"(next) + : "memory"); + return old; +} +#define STARPU_HAVE_CMPXCHGL +static __starpu_inline unsigned long _starpu_xchgl(unsigned long *ptr, unsigned long next) +{ + /* Note: xchg is always locked already */ + __asm__ __volatile__("xchgl %1,%0" + : "+m"(*ptr), "+q"(next) + : + : "memory"); + return next; +} +#define STARPU_HAVE_XCHGL +#endif + +#if defined(__x86_64__) +static __starpu_inline unsigned long _starpu_cmpxchgl(unsigned long *ptr, unsigned long old, unsigned long next) +{ + __asm__ __volatile__("lock cmpxchgq %2,%1" + : "+a"(old), "+m"(*ptr) + : "q"(next) + : "memory"); + return old; +} +#define STARPU_HAVE_CMPXCHGL +static __starpu_inline unsigned long _starpu_xchgl(unsigned long *ptr, unsigned long next) +{ + /* Note: xchg is always locked already */ + __asm__ __volatile__("xchgq %1,%0" + : "+m"(*ptr), "+q"(next) + : + : "memory"); + return next; +} +#define STARPU_HAVE_XCHGL +#endif + +#if defined(__i386__) +static __starpu_inline uint64_t _starpu_cmpxchg64(uint64_t *ptr, uint64_t old, uint64_t next) +{ + uint32_t next_hi = next >> 32; + uint32_t next_lo = next & 0xfffffffful; + __asm__ __volatile__("lock cmpxchg8b %1" + : "+A"(old), "+m"(*ptr) + : "c"(next_hi), "b"(next_lo) + : "memory"); + return old; +} +#define STARPU_HAVE_CMPXCHG64 +#endif + +#if defined(__x86_64__) +static __starpu_inline uint64_t _starpu_cmpxchg64(uint64_t *ptr, uint64_t old, uint64_t next) +{ + __asm__ __volatile__("lock cmpxchgq %2,%1" + : "+a"(old), "+m"(*ptr) + : "q"(next) + : "memory"); + return old; +} +#define STARPU_HAVE_CMPXCHG64 +static __starpu_inline uint64_t _starpu_xchg64(uint64_t *ptr, uint64_t next) +{ + /* Note: xchg is always locked already */ + __asm__ __volatile__("xchgq %1,%0" + : "+m"(*ptr), "+q"(next) + : + : "memory"); + return next; +} +#define STARPU_HAVE_XCHG64 +#endif + +#endif + +#define STARPU_ATOMIC_SOMETHING(name, expr) \ + static __starpu_inline unsigned starpu_atomic_##name(unsigned *ptr, unsigned value) \ + { \ + unsigned old, next; \ + while (1) \ + { \ + old = *ptr; \ + next = expr; \ + if (_starpu_cmpxchg(ptr, old, next) == old) \ + break; \ + }; \ + return expr; \ + } +#define STARPU_ATOMIC_SOMETHINGL(name, expr) \ + static __starpu_inline unsigned long starpu_atomic_##name##l(unsigned long *ptr, unsigned long value) \ + { \ + unsigned long old, next; \ + while (1) \ + { \ + old = *ptr; \ + next = expr; \ + if (_starpu_cmpxchgl(ptr, old, next) == old) \ + break; \ + }; \ + return expr; \ + } +#define STARPU_ATOMIC_SOMETHING64(name, expr) \ + static __starpu_inline uint64_t starpu_atomic_##name##64(uint64_t * ptr, uint64_t value) \ + { \ + uint64_t old, next; \ + while (1) \ + { \ + old = *ptr; \ + next = expr; \ + if (_starpu_cmpxchg64(ptr, old, next) == old) \ + break; \ + }; \ + return expr; \ + } + +/* Atomic addition, returns the new value */ +#if defined(STARPU_HAVE_SYNC_FETCH_AND_ADD) +#define STARPU_ATOMIC_ADD(ptr, value) (__sync_fetch_and_add((ptr), (value)) + (value)) +#define STARPU_ATOMIC_ADDL(ptr, value) (__sync_fetch_and_add((ptr), (value)) + (value)) +#elif defined(STARPU_HAVE_ATOMIC_FETCH_ADD) +#define STARPU_ATOMIC_ADD(ptr, value) (__atomic_fetch_add((ptr), (value), __ATOMIC_SEQ_CST) + (value)) +#define STARPU_ATOMIC_ADDL(ptr, value) (__atomic_fetch_add((ptr), (value), __ATOMIC_SEQ_CST) + (value)) +#else +#if defined(STARPU_HAVE_CMPXCHG) +STARPU_ATOMIC_SOMETHING(add, old + value) +#define STARPU_ATOMIC_ADD(ptr, value) starpu_atomic_add(ptr, value) +#endif +#if defined(STARPU_HAVE_CMPXCHGL) +STARPU_ATOMIC_SOMETHINGL(add, old + value) +#define STARPU_ATOMIC_ADDL(ptr, value) starpu_atomic_addl(ptr, value) +#endif +#endif + +#if defined(STARPU_HAVE_SYNC_FETCH_AND_ADD_8) +#define STARPU_ATOMIC_ADD64(ptr, value) (__sync_fetch_and_add((ptr), (value)) + (value)) +#elif defined(STARPU_HAVE_ATOMIC_FETCH_ADD_8) +#define STARPU_ATOMIC_ADD64(ptr, value) (__atomic_fetch_add((ptr), (value), __ATOMIC_SEQ_CST) + (value)) +#else +#if defined(STARPU_HAVE_CMPXCHG64) +STARPU_ATOMIC_SOMETHING64(add, old + value) +#define STARPU_ATOMIC_ADD64(ptr, value) starpu_atomic_add64(ptr, value) +#endif +#endif + +/* Atomic OR, returns the *old* value */ +#if defined(STARPU_HAVE_SYNC_FETCH_AND_OR) +#define STARPU_ATOMIC_OR(ptr, value) (__sync_fetch_and_or((ptr), (value))) +#define STARPU_ATOMIC_ORL(ptr, value) (__sync_fetch_and_or((ptr), (value))) +#elif defined(STARPU_HAVE_ATOMIC_FETCH_OR) +#define STARPU_ATOMIC_OR(ptr, value) (__atomic_fetch_or((ptr), (value), __ATOMIC_SEQ_CST)) +#define STARPU_ATOMIC_ORL(ptr, value) (__atomic_fetch_or((ptr), (value), __ATOMIC_SEQ_CST)) +#else +#if defined(STARPU_HAVE_CMPXCHG) +STARPU_ATOMIC_SOMETHING(or, old | value) +#define STARPU_ATOMIC_OR(ptr, value) starpu_atomic_or(ptr, value) +#endif +#if defined(STARPU_HAVE_CMPXCHGL) +STARPU_ATOMIC_SOMETHINGL(or, old | value) +#define STARPU_ATOMIC_ORL(ptr, value) starpu_atomic_orl(ptr, value) +#endif +#endif + +#if defined(STARPU_HAVE_SYNC_FETCH_AND_OR_8) +#define STARPU_ATOMIC_OR64(ptr, value) (__sync_fetch_and_or((ptr), (value))) +#elif defined(STARPU_HAVE_ATOMIC_FETCH_OR_8) +#define STARPU_ATOMIC_OR64(ptr, value) (__atomic_fetch_or((ptr), (value), __ATOMIC_SEQ_CST)) +#else +#if defined(STARPU_HAVE_CMPXCHG64) +STARPU_ATOMIC_SOMETHING64(or, old | value) +#define STARPU_ATOMIC_OR64(ptr, value) starpu_atomic_or64(ptr, value) +#endif +#endif + +/* Try to replace `old' with `value' at `ptr'. Returns true iff the swap was successful. */ +#ifdef STARPU_HAVE_SYNC_BOOL_COMPARE_AND_SWAP +#define STARPU_BOOL_COMPARE_AND_SWAP(ptr, old, value) (__sync_bool_compare_and_swap((ptr), (old), (value))) +#else +#ifdef STARPU_HAVE_CMPXCHG +#define STARPU_BOOL_COMPARE_AND_SWAP(ptr, old, value) (_starpu_cmpxchg((ptr), (old), (value)) == (old)) +#endif +#endif + +#ifdef STARPU_HAVE_SYNC_BOOL_COMPARE_AND_SWAP +#define STARPU_BOOL_COMPARE_AND_SWAP32(ptr, old, value) (__sync_bool_compare_and_swap((ptr), (old), (value))) +#else +#ifdef STARPU_HAVE_CMPXCHG32 +#define STARPU_BOOL_COMPARE_AND_SWAP32(ptr, old, value) (_starpu_cmpxchg32((ptr), (old), (value)) == (old)) +#endif +#endif + +#if defined(STARPU_HAVE_SYNC_BOOL_COMPARE_AND_SWAP_8) +#define STARPU_BOOL_COMPARE_AND_SWAP64(ptr, old, value) (__sync_bool_compare_and_swap((ptr), (old), (value))) +#elif defined(STARPU_HAVE_ATOMIC_EXCHANGE_N_8) && defined(__GNUC__) +static __starpu_inline int starpu_bool_compare_and_swap64(uint64_t *ptr, uint64_t old, uint64_t value) +{ + uint64_t expected = old; + return __atomic_compare_exchange_n(ptr, &expected, value, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); +} +#define STARPU_BOOL_COMPARE_AND_SWAP64(ptr, old, value) starpu_bool_compare_and_swap64((ptr), (old), (value)) +#else +#ifdef STARPU_HAVE_CMPXCHG64 +#define STARPU_BOOL_COMPARE_AND_SWAP64(ptr, old, value) (_starpu_cmpxchg64((ptr), (old), (value)) == (old)) +#endif +#endif + +#if UINTPTR_MAX == UINT64_MAX +#define STARPU_BOOL_COMPARE_AND_SWAP_PTR(ptr, old, value) STARPU_BOOL_COMPARE_AND_SWAP64((uint64_t*) (ptr), (uint64_t) (old), (uint64_t) (value)) +#else +#define STARPU_BOOL_COMPARE_AND_SWAP_PTR(ptr, old, value) STARPU_BOOL_COMPARE_AND_SWAP32(ptr, old, value) +#endif + +/* Try to replace `old' with `value' at `ptr'. Returns the value actually seen at `ptr'. */ +#ifdef STARPU_HAVE_SYNC_VAL_COMPARE_AND_SWAP +#define STARPU_VAL_COMPARE_AND_SWAP(ptr, old, value) (__sync_val_compare_and_swap((ptr), (old), (value))) +#else +#ifdef STARPU_HAVE_CMPXCHG +#define STARPU_VAL_COMPARE_AND_SWAP(ptr, old, value) (_starpu_cmpxchg((ptr), (old), (value))) +#endif +#endif + +#ifdef STARPU_HAVE_SYNC_VAL_COMPARE_AND_SWAP +#define STARPU_VAL_COMPARE_AND_SWAP32(ptr, old, value) (__sync_val_compare_and_swap((ptr), (old), (value))) +#else +#ifdef STARPU_HAVE_CMPXCHG32 +#define STARPU_VAL_COMPARE_AND_SWAP32(ptr, old, value) (_starpu_cmpxchg32((ptr), (old), (value))) +#endif +#endif + +#if defined(STARPU_HAVE_SYNC_VAL_COMPARE_AND_SWAP_8) +#define STARPU_VAL_COMPARE_AND_SWAP64(ptr, old, value) (__sync_val_compare_and_swap((ptr), (old), (value))) +#elif defined(STARPU_HAVE_ATOMIC_EXCHANGE_N_8) && defined(__GNUC__) +static __starpu_inline uint64_t starpu_val_compare_and_swap64(uint64_t *ptr, uint64_t old, uint64_t value) +{ + uint64_t expected = old; + if (__atomic_compare_exchange_n(ptr, &expected, value, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) + return old; + else + return expected; +} +#define STARPU_VAL_COMPARE_AND_SWAP64(ptr, old, value) starpu_val_compare_and_swap64((ptr), (old), (value)) +#else +#ifdef STARPU_HAVE_CMPXCHG64 +#define STARPU_VAL_COMPARE_AND_SWAP64(ptr, old, value) (_starpu_cmpxchg64((ptr), (old), (value))) +#endif +#endif + +#if UINTPTR_MAX == UINT64_MAX +#define STARPU_VAL_COMPARE_AND_SWAP_PTR(ptr, old, value) ((void*)STARPU_VAL_COMPARE_AND_SWAP64((uint64_t*) (ptr), (uint64_t) (old), (uint64_t) (value))) +#else +#define STARPU_VAL_COMPARE_AND_SWAP_PTR(ptr, old, value) STARPU_VAL_COMPARE_AND_SWAP32(ptr, old, value) +#endif + +#ifdef STARPU_HAVE_ATOMIC_EXCHANGE_N_8 +#define STARPU_VAL_EXCHANGE64(ptr, value) STARPU_VAL_EXCHANGE((ptr)(value)) +#else +#ifdef STARPU_HAVE_XCHG64 +#define STARPU_VAL_EXCHANGE64(ptr, value) (_starpu_xchg64((ptr), (value))) +#endif +#endif + +#ifdef STARPU_HAVE_ATOMIC_EXCHANGE_N +#define STARPU_VAL_EXCHANGE(ptr, value) (__atomic_exchange_n((ptr), (value), __ATOMIC_SEQ_CST)) +#define STARPU_VAL_EXCHANGEL(ptr, value) STARPU_VAL_EXCHANGE((ptr)(value)) +#define STARPU_VAL_EXCHANGE32(ptr, value) STARPU_VAL_EXCHANGE((ptr)(value)) +#else +#ifdef STARPU_HAVE_XCHG +#define STARPU_VAL_EXCHANGE(ptr, value) (_starpu_xchg((ptr), (value))) +#endif +#ifdef STARPU_HAVE_XCHGL +#define STARPU_VAL_EXCHANGEL(ptr, value) (_starpu_xchgl((ptr), (value))) +#endif +#ifdef STARPU_HAVE_XCHG32 +#define STARPU_VAL_EXCHANGE32(ptr, value) (_starpu_xchg32((ptr), (value))) +#endif +#endif + +/* Returns the previous value */ +#ifdef STARPU_HAVE_SYNC_LOCK_TEST_AND_SET +#define STARPU_TEST_AND_SET(ptr, value) (__sync_lock_test_and_set((ptr), (value))) +#define STARPU_RELEASE(ptr) (__sync_lock_release((ptr))) +#elif defined(STARPU_HAVE_XCHG) +#define STARPU_TEST_AND_SET(ptr, value) (_starpu_xchg((ptr), (value))) +#define STARPU_RELEASE(ptr) (_starpu_xchg((ptr), 0)) +#endif + +#ifdef STARPU_HAVE_SYNC_SYNCHRONIZE +#define STARPU_SYNCHRONIZE() __sync_synchronize() +#elif defined(__i386__) +#define STARPU_SYNCHRONIZE() __asm__ __volatile__("lock; addl $0,0(%%esp)" :: \ + : "memory") +#elif defined(__KNC__) || defined(__KNF__) +#define STARPU_SYNCHRONIZE() __asm__ __volatile__("lock; addl $0,0(%%rsp)" :: \ + : "memory") +#elif defined(__x86_64__) +#define STARPU_SYNCHRONIZE() __asm__ __volatile__("mfence" :: \ + : "memory") +#elif defined(__ppc__) || defined(__ppc64__) +#define STARPU_SYNCHRONIZE() __asm__ __volatile__("sync" :: \ + : "memory") +#endif + +/** + This macro can be used to do a synchronization. +*/ +#if defined(__x86_64__) +#define STARPU_RMB() __asm__ __volatile__("lfence" :: \ + : "memory") +#elif defined(__aarch64__) +#define STARPU_RMB() __asm__ __volatile__("dsb ld" :: \ + : "memory") +#else +#define STARPU_RMB() STARPU_SYNCHRONIZE() +#endif + +/** + This macro can be used to do a synchronization. +*/ +#if defined(__x86_64__) +#define STARPU_WMB() __asm__ __volatile__("sfence" :: \ + : "memory") +#elif defined(__aarch64__) +#define STARPU_WMB() __asm__ __volatile__("dsb st" :: \ + : "memory") +#else +#define STARPU_WMB() STARPU_SYNCHRONIZE() +#endif + +#if defined(__i386__) || defined(__x86_64__) +#define STARPU_CACHELINE_SIZE 64 +#elif defined(__ppc__) || defined(__ppc64__) || defined(__ia64__) +#define STARPU_CACHELINE_SIZE 128 +#elif defined(__s390__) || defined(__s390x__) +#define STARPU_CACHELINE_SIZE 256 +#else +/* Conservative default */ +#define STARPU_CACHELINE_SIZE 1024 +#endif + +#ifdef _WIN32 +/* Try to fetch the system definition of timespec */ +#include +#include +#ifdef HAVE_UNISTD_H +#include +#endif +#include +#if !defined(_MSC_VER) || defined(BUILDING_STARPU) +#include +#endif +#if !defined(STARPU_HAVE_STRUCT_TIMESPEC) || (defined(_MSC_VER) && _MSC_VER < 1900) +/* If it didn't get defined in the standard places, then define it ourself */ +#ifndef STARPU_TIMESPEC_DEFINED +#define STARPU_TIMESPEC_DEFINED 1 +struct timespec +{ + time_t tv_sec; /* Seconds */ + long tv_nsec; /* Nanoseconds */ +}; +#endif /* STARPU_TIMESPEC_DEFINED */ +#endif /* STARPU_HAVE_STRUCT_TIMESPEC */ +/* Fetch gettimeofday on mingw/cygwin */ +#if defined(__MINGW32__) || defined(__CYGWIN__) +#include +#endif +#else +#include +#endif /* _WIN32 */ + +/** @} */ + +#ifdef __cplusplus +} +#endif + +#endif /* __STARPU_UTIL_H__ */ diff --git a/include/starpu_worker.h b/include/starpu_worker.h new file mode 100644 index 0000000..47888a9 --- /dev/null +++ b/include/starpu_worker.h @@ -0,0 +1,739 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2016-2016 Uppsala University + * Copyright (C) 2013-2013 Thibaut Lambert + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __STARPU_WORKER_H__ +#define __STARPU_WORKER_H__ + +#include +#include +#include +#include + +#ifdef STARPU_HAVE_HWLOC +#include +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/** + @defgroup API_Workers Workers + @{ +*/ + +/** + Memory node Type +*/ +enum starpu_node_kind +{ + STARPU_UNUSED = 0, + STARPU_CPU_RAM = 1, /**< CPU core */ + STARPU_CUDA_RAM = 2, /**< NVIDIA CUDA device */ + STARPU_OPENCL_RAM = 3, /**< OpenCL device */ + STARPU_MAX_FPGA_RAM = 4, /**< Maxeler FPGA device */ + STARPU_DISK_RAM = 5, /**< Disk memory */ + STARPU_MPI_MS_RAM = 6, /**< MPI Slave device */ + STARPU_TCPIP_MS_RAM = 7, /**< TCPIP Slave device */ + STARPU_HIP_RAM = 8, /**< NVIDIA/AMD HIP device */ + STARPU_MAX_RAM = 8, /**< Maximum value of memory types */ + STARPU_NRAM = 9, /**< Number of memory types */ +}; + +/** + Worker Architecture Type + + The value 4 which was used by the driver SCC is no longer used as + renumbering workers would make unusable old performance model + files. +*/ +enum starpu_worker_archtype +{ + STARPU_CPU_WORKER = 0, /**< CPU core */ + STARPU_CUDA_WORKER = 1, /**< NVIDIA CUDA device */ + STARPU_OPENCL_WORKER = 2, /**< OpenCL device */ + STARPU_MAX_FPGA_WORKER = 4, /**< Maxeler FPGA device */ + STARPU_MPI_MS_WORKER = 5, /**< MPI Slave device */ + STARPU_TCPIP_MS_WORKER = 6, /**< TCPIP Slave device */ + STARPU_HIP_WORKER = 7, /**< NVIDIA/AMD HIP device */ + STARPU_NARCH = 8, /**< Number of arch types */ + STARPU_ANY_WORKER = 255 /**< any worker, used in the hypervisor */ +}; + +#define STARPU_UNKNOWN_WORKER ((enum starpu_worker_archtype)-1) /**< Invalid worker value */ + +/** + Structure needed to iterate on the collection +*/ +struct starpu_sched_ctx_iterator +{ + /** + The index of the current worker in the collection, needed + when iterating on the collection. + */ + int cursor; + void *value; + void *possible_value; + char visited[STARPU_NMAXWORKERS]; + int possibly_parallel; +}; + +/** + Types of structures the worker collection can implement +*/ +enum starpu_worker_collection_type +{ + STARPU_WORKER_TREE, /**< The collection is a tree */ + STARPU_WORKER_LIST /**< The collection is an array */ +}; + +/** + A scheduling context manages a collection of workers that can be + memorized using different data structures. Thus, a generic + structure is available in order to simplify the choice of its type. + Only the list data structure is available but further data + structures(like tree) implementations are foreseen. +*/ +struct starpu_worker_collection +{ + /** + The workerids managed by the collection + */ + int *workerids; + void *collection_private; + /** + The number of workers in the collection + */ + unsigned nworkers; + void *unblocked_workers; + unsigned nunblocked_workers; + void *masters; + unsigned nmasters; + char present[STARPU_NMAXWORKERS]; + char is_unblocked[STARPU_NMAXWORKERS]; + char is_master[STARPU_NMAXWORKERS]; + /** + The type of structure + */ + enum starpu_worker_collection_type type; + /** + Check if there is another element in collection + */ + unsigned (*has_next)(struct starpu_worker_collection *workers, struct starpu_sched_ctx_iterator *it); + /** + Return the next element in the collection + */ + int (*get_next)(struct starpu_worker_collection *workers, struct starpu_sched_ctx_iterator *it); + /** + Add a new element in the collection + */ + int (*add)(struct starpu_worker_collection *workers, int worker); + /** + Remove an element from the collection + */ + int (*remove)(struct starpu_worker_collection *workers, int worker); + /** + Initialize the collection + */ + void (*init)(struct starpu_worker_collection *workers); + /** + Deinitialize the collection + */ + void (*deinit)(struct starpu_worker_collection *workers); + /** + Initialize the cursor if there is one + */ + void (*init_iterator)(struct starpu_worker_collection *workers, struct starpu_sched_ctx_iterator *it); + void (*init_iterator_for_parallel_tasks)(struct starpu_worker_collection *workers, struct starpu_sched_ctx_iterator *it, struct starpu_task *task); +}; + +extern struct starpu_worker_collection starpu_worker_list; +extern struct starpu_worker_collection starpu_worker_tree; + +/** + Wait for all workers to be initialised. Calling this function is + normally not necessary. It is called for example in + tools/starpu_machine_display to make sure all workers + information are correctly set before printing their information. + See \ref PauseResume for more details. +*/ +void starpu_worker_wait_for_initialisation(void); + +/** + Return true if type matches one of StarPU's defined worker architectures. + See \ref TopologyWorkers for more details. +*/ +unsigned starpu_worker_archtype_is_valid(enum starpu_worker_archtype type); + +/** + Convert a mask of architectures to a worker archtype. + See \ref TopologyWorkers for more details. +*/ +enum starpu_worker_archtype starpu_arch_mask_to_worker_archtype(unsigned mask); + +/** + Return the number of workers (i.e. processing units executing + StarPU tasks). The return value should be at most \ref + STARPU_NMAXWORKERS. + See \ref TopologyWorkers for more details. +*/ +unsigned starpu_worker_get_count(void); + +/** + Return the number of CPUs controlled by StarPU. The return value + should be at most \ref STARPU_MAXCPUS. + See \ref TopologyWorkers for more details. +*/ +unsigned starpu_cpu_worker_get_count(void); + +/** + Return the number of CUDA devices controlled by StarPU. The return + value should be at most \ref STARPU_MAXCUDADEVS. + See \ref TopologyWorkers for more details. +*/ +unsigned starpu_cuda_worker_get_count(void); + +/** + Return the number of HIP devices controlled by StarPU. The return + value should be at most \ref STARPU_MAXHIPDEVS. + See \ref TopologyWorkers for more details. +*/ +unsigned starpu_hip_worker_get_count(void); + +/** + Return the number of OpenCL devices controlled by StarPU. The + return value should be at most \ref STARPU_MAXOPENCLDEVS. + See \ref TopologyWorkers for more details. +*/ +unsigned starpu_opencl_worker_get_count(void); + +/** + Return the number of MPI Master Slave workers controlled by StarPU. + See \ref TopologyWorkers for more details. +*/ +unsigned starpu_mpi_ms_worker_get_count(void); + +/** + Return the number of TCPIP Master Slave workers controlled by StarPU. + See \ref TopologyWorkers for more details. +*/ +unsigned starpu_tcpip_ms_worker_get_count(void); + +/** + Return the identifier of the current worker, i.e the one associated + to the calling thread. The return value is either \c -1 if the + current context is not a StarPU worker (i.e. when called from the + application outside a task or a callback), or an integer between \c + 0 and starpu_worker_get_count() - \c 1. + See \ref HowToInitializeAComputationLibraryOnceForEachWorker for more details. +*/ +int starpu_worker_get_id(void); + +unsigned _starpu_worker_get_id_check(const char *f, int l); + +/** + Similar to starpu_worker_get_id(), but abort when called from + outside a worker (i.e. when starpu_worker_get_id() would return \c + -1). + See \ref HowToInitializeAComputationLibraryOnceForEachWorker for more details. +*/ +unsigned starpu_worker_get_id_check(void); + +#define starpu_worker_get_id_check() _starpu_worker_get_id_check(__FILE__, __LINE__) + +/** + See \ref TopologyWorkers for more details. +*/ +int starpu_worker_get_bindid(int workerid); + +/** + See \ref SchedulingHelpers for more details. +*/ +void starpu_sched_find_all_worker_combinations(void); + +/** + Return the type of processing unit associated to the worker \p id. + The worker identifier is a value returned by the function + starpu_worker_get_id()). The return value indicates the + architecture of the worker: ::STARPU_CPU_WORKER for a CPU core, + ::STARPU_CUDA_WORKER for a CUDA device, and ::STARPU_OPENCL_WORKER + for a OpenCL device. The return value for an invalid identifier is + unspecified. + See \ref TopologyWorkers for more details. +*/ +enum starpu_worker_archtype starpu_worker_get_type(int id); + +/** + Return the number of workers of \p type. A positive (or + NULL) value is returned in case of success, -EINVAL + indicates that \p type is not valid otherwise. + See \ref TopologyWorkers for more details. +*/ +int starpu_worker_get_count_by_type(enum starpu_worker_archtype type); + +/** + Get the list of identifiers of workers of \p type. Fill the array + \p workerids with the identifiers of the \p workers. The argument + \p maxsize indicates the size of the array \p workerids. The return + value gives the number of identifiers that were put in the array. + -ERANGE is returned is \p maxsize is lower than the number + of workers with the appropriate type: in that case, the array is + filled with the \p maxsize first elements. To avoid such overflows, + the value of maxsize can be chosen by the means of the function + starpu_worker_get_count_by_type(), or by passing a value greater or + equal to \ref STARPU_NMAXWORKERS. + See \ref TopologyWorkers for more details. +*/ +unsigned starpu_worker_get_ids_by_type(enum starpu_worker_archtype type, int *workerids, unsigned maxsize); + +/** + Return the identifier of the \p num -th worker that has the + specified \p type. If there is no such worker, -1 is returned. + See \ref TopologyWorkers for more details. +*/ +int starpu_worker_get_by_type(enum starpu_worker_archtype type, int num); + +/** + Return the identifier of the worker that has the specified \p type + and device id \p devid (which may not be the n-th, if some devices + are skipped for instance). If there is no such worker, \c -1 is + returned. + See \ref TopologyWorkers for more details. +*/ +int starpu_worker_get_by_devid(enum starpu_worker_archtype type, int devid); + +/** + Return true if worker type can execute this task. + See \ref SchedulingHelpers for more details. +*/ +unsigned starpu_worker_type_can_execute_task(enum starpu_worker_archtype worker_type, const struct starpu_task *task); + +/** + Get the name of the worker \p id. StarPU associates a unique human + readable string to each processing unit. This function copies at + most the \p maxlen first bytes of the unique string associated to + the worker \p id into the \p dst buffer. The caller is responsible + for ensuring that \p dst is a valid pointer to a buffer of \p + maxlen bytes at least. Calling this function on an invalid + identifier results in an unspecified behaviour. + See \ref TopologyWorkers for more details. +*/ +void starpu_worker_get_name(int id, char *dst, size_t maxlen); + +/** + Display on \p output the list (if any) of all workers. + See \ref TopologyWorkers for more details. +*/ +void starpu_worker_display_all(FILE *output); + +/** + Display on \p output the list (if any) of all the workers of the + given \p type. + See \ref TopologyWorkers for more details. +*/ +void starpu_worker_display_names(FILE *output, enum starpu_worker_archtype type); + +/** + Display on \p output the number of workers of the given \p type. + See \ref TopologyWorkers for more details. +*/ +void starpu_worker_display_count(FILE *output, enum starpu_worker_archtype type); + +/** + Return the device id of the worker \p id. The worker should be + identified with the value returned by the starpu_worker_get_id() + function. In the case of a CUDA worker, this device identifier is + the logical device identifier exposed by CUDA (used by the function + \c cudaGetDevice() for instance). The device identifier of a CPU + worker is the logical identifier of the core on which the worker + was bound; this identifier is either provided by the OS or by the + library hwloc in case it is available. + See \ref TopologyWorkers for more details. +*/ +int starpu_worker_get_devid(int id); + +/** + See \ref TopologyWorkers for more details. +*/ +int starpu_worker_get_devnum(int id); + +/** + See \ref TopologyWorkers for more details. +*/ +int starpu_worker_get_subworkerid(int id); + +/** + See \ref TopologyWorkers for more details. +*/ +struct starpu_tree *starpu_workers_get_tree(void); + +/** + See \ref TopologyWorkers for more details. +*/ +unsigned starpu_worker_get_sched_ctx_list(int worker, unsigned **sched_ctx); + +/** + Return when the current task is expected to be finished. + + Note: the returned date should be used with caution since the task might very + well end just after this function returns. + + See \ref Per-taskFeedback for more details. + */ +void starpu_worker_get_current_task_exp_end(unsigned workerid, struct timespec *date); + +/** + Return whether worker \p workerid is currently blocked in a parallel task. + See \ref SchedulingHelpers for more details. + */ +unsigned starpu_worker_is_blocked_in_parallel(int workerid); + +/** + See \ref SchedulingHelpers for more details. + */ +unsigned starpu_worker_is_slave_somewhere(int workerid); + +/** + Return worker \p type as a string. + See \ref TopologyWorkers for more details. +*/ +const char *starpu_worker_get_type_as_string(enum starpu_worker_archtype type); + +/** + Return worker \p type from a string. + Returns STARPU_UNKNOWN_WORKER if the string doesn't match a worker type. + See \ref TopologyWorkers for more details. +*/ +enum starpu_worker_archtype starpu_worker_get_type_from_string(const char *type); + +/** + Return worker \p type as a string suitable for environment variable names (CPU, CUDA, etc.). + See \ref TopologyWorkers for more details. +*/ +const char *starpu_worker_get_type_as_env_var(enum starpu_worker_archtype type); + +/** + See \ref TopologyWorkers for more details. +*/ +int starpu_bindid_get_workerids(int bindid, int **workerids); + +/** + See \ref TopologyWorkers for more details. +*/ +int starpu_worker_get_devids(enum starpu_worker_archtype type, int *devids, int num); + +/** + See \ref TopologyWorkers for more details. +*/ +int starpu_worker_get_stream_workerids(unsigned devid, int *workerids, enum starpu_worker_archtype type); + +#ifdef STARPU_HAVE_HWLOC +/** + If StarPU was compiled with \c hwloc support, return a duplicate of + the \c hwloc cpuset associated with the worker \p workerid. The + returned cpuset is obtained from a \c hwloc_bitmap_dup() function + call. It must be freed by the caller using \c hwloc_bitmap_free(). + See \ref InteroperabilityHWLOC for more details. +*/ +hwloc_cpuset_t starpu_worker_get_hwloc_cpuset(int workerid); +/** + If StarPU was compiled with \c hwloc support, return the \c hwloc + object corresponding to the worker \p workerid. + See \ref SchedulingHelpers for more details. +*/ +hwloc_obj_t starpu_worker_get_hwloc_obj(int workerid); +#endif + +/** + See \ref TopologyMemory for more details. +*/ +int starpu_memory_node_get_devid(unsigned node); + +/** + Return the memory node associated to the current worker. + See \ref TopologyWorkers for more details. +*/ +unsigned starpu_worker_get_local_memory_node(void); + +/** + Return the identifier of the memory node associated to the worker + identified by \p workerid. + See \ref TopologyWorkers for more details. +*/ +unsigned starpu_worker_get_memory_node(unsigned workerid); + +/** + Return the number of memory nodes. + See \ref TopologyWorkers for more details. +*/ +unsigned starpu_memory_nodes_get_count(void); + +/** + Return the number of memory nodes of a given \p kind. + See \ref TopologyWorkers for more details. +*/ +unsigned starpu_memory_nodes_get_count_by_kind(enum starpu_node_kind kind); + +/** + Get the list of memory nodes of kind \p kind. + Fill the array \p memory_nodes_ids with the memory nodes numbers. + The argument \p maxsize indicates the size of the array + \p memory_nodes_ids. The return value gives the number of node numbers + that were put in the array. -ERANGE is returned if \p maxsize + is lower than the number of memory nodes with the appropriate kind: in that + case, the array is filled with the \p maxsize first elements. To avoid such + overflows, the value of maxsize can be chosen by the means of function + starpu_memory_nodes_get_count_by_kind(), or by passing a value greater or + equal to \ref STARPU_MAXNODES. + See \ref TopologyWorkers for more details. +*/ +unsigned starpu_memory_node_get_ids_by_type(enum starpu_node_kind kind, unsigned *memory_nodes_ids, unsigned maxsize); + +/** + Return in \p name the name of a memory node (NUMA 0, CUDA 0, etc.) + \p size is the size of the \p name array. + See \ref TopologyWorkers for more details. +*/ +int starpu_memory_node_get_name(unsigned node, char *name, size_t size); + +/** + Return the number of NUMA nodes used by StarPU. + See \ref TopologyWorkers for more details. +*/ +unsigned starpu_memory_nodes_get_numa_count(void); + +/** + Return the identifier of the memory node associated to the NUMA + node identified by \p osid by the Operating System. + See \ref TopologyWorkers for more details. +*/ +int starpu_memory_nodes_numa_id_to_devid(int osid); + +/** + Return the Operating System identifier of the memory node whose + StarPU identifier is \p id. + See \ref TopologyWorkers for more details. +*/ +int starpu_memory_nodes_numa_devid_to_id(unsigned id); + +/** + Return the type of \p node as defined by ::starpu_node_kind. For + example, when defining a new data interface, this function should + be used in the allocation function to determine on which device the + memory needs to be allocated. + See \ref TopologyWorkers for more details. +*/ +enum starpu_node_kind starpu_node_get_kind(unsigned node); + +/** + Return the type of worker which operates on memory node kind \p node_kind. + See \ref TopologyWorkers for more details. + */ +enum starpu_worker_archtype starpu_memory_node_get_worker_archtype(enum starpu_node_kind node_kind); + +/** + Return the type of memory node that arch type \p type operates on. + See \ref TopologyWorkers for more details. + */ +enum starpu_node_kind starpu_worker_get_memory_node_kind(enum starpu_worker_archtype type); + +/** + @name Scheduling operations + @{ +*/ + +/** + Return \c !0 if current worker has a scheduling operation in + progress, and \c 0 otherwise. +*/ +int starpu_worker_sched_op_pending(void); + +/** + Allow other threads and workers to temporarily observe the current + worker state, even though it is performing a scheduling operation. + Must be called by a worker before performing a potentially blocking + call such as acquiring a mutex other than its own sched_mutex. This + function increases \c state_relax_refcnt from the current worker. + No more than UINT_MAX-1 nested starpu_worker_relax_on() + calls should performed on the same worker. This function is + automatically called by starpu_worker_lock() to relax the caller + worker state while attempting to lock the target worker. + See \ref DefiningANewBasicSchedulingPolicy for more details. +*/ +void starpu_worker_relax_on(void); + +/** + Must be called after a potentially blocking call is complete, to + restore the relax state in place before the corresponding + starpu_worker_relax_on(). Decreases \c state_relax_refcnt. Calls to + starpu_worker_relax_on() and starpu_worker_relax_off() must be + properly paired. This function is automatically called by + starpu_worker_unlock() after the target worker has been unlocked. + See \ref DefiningANewBasicSchedulingPolicy for more details. +*/ +void starpu_worker_relax_off(void); + +/** + Return \c !0 if the current worker \c state_relax_refcnt!=0 and \c + 0 otherwise. + See \ref DefiningANewBasicSchedulingPolicy for more details. +*/ +int starpu_worker_get_relax_state(void); + +/** + Acquire the sched mutex of \p workerid. If the caller is a worker, + distinct from \p workerid, the caller worker automatically enters a + relax state while acquiring the target worker lock. + See \ref DefiningANewBasicSchedulingPolicy for more details. +*/ +void starpu_worker_lock(int workerid); + +/** + Attempt to acquire the sched mutex of \p workerid. Returns \c 0 if + successful, \c !0 if \p workerid sched mutex is held or the + corresponding worker is not in a relax state. If the caller is a + worker, distinct from \p workerid, the caller worker automatically + enters relax state if successfully acquiring the target worker lock. + See \ref DefiningANewBasicSchedulingPolicy for more details. +*/ +int starpu_worker_trylock(int workerid); + +/** + Release the previously acquired sched mutex of \p workerid. Restore + the relax state of the caller worker if needed. + See \ref DefiningANewBasicSchedulingPolicy for more details. +*/ +void starpu_worker_unlock(int workerid); + +/** + Acquire the current worker sched mutex. + See \ref DefiningANewBasicSchedulingPolicy for more details. +*/ +void starpu_worker_lock_self(void); + +/** + Release the current worker sched mutex. + See \ref DefiningANewBasicSchedulingPolicy for more details. +*/ +void starpu_worker_unlock_self(void); + +#ifdef STARPU_WORKER_CALLBACKS +/** + If StarPU was compiled with blocking drivers support and worker + callbacks support enabled, allow to specify an external resource + manager callback to be notified about workers going to sleep. + See \ref SchedulingHelpers for more details. +*/ +void starpu_worker_set_going_to_sleep_callback(void (*callback)(unsigned workerid)); + +/** + If StarPU was compiled with blocking drivers support and worker + callbacks support enabled, allow to specify an external resource + manager callback to be notified about workers waking-up. + See \ref SchedulingHelpers for more details. +*/ +void starpu_worker_set_waking_up_callback(void (*callback)(unsigned workerid)); +#endif + +/** @} */ + +/** @} */ + +/** + @defgroup API_Parallel_Tasks Parallel Tasks + @{ +*/ + +/** + Return the number of different combined workers. + See \ref SchedulingHelpers for more details. +*/ +unsigned starpu_combined_worker_get_count(void); +/** + See \ref SchedulingHelpers for more details. +*/ +unsigned starpu_worker_is_combined_worker(int id); + +/** + Return the identifier of the current combined worker. + See \ref SchedulingHelpers for more details. +*/ +int starpu_combined_worker_get_id(void); + +/** + Return the size of the current combined worker, i.e. the total + number of CPUS running the same task in the case of ::STARPU_SPMD + parallel tasks, or the total number of threads that the task is + allowed to start in the case of ::STARPU_FORKJOIN parallel tasks. + See \ref Fork-modeParallelTasks and \ref SPMD-modeParallelTasks for more details. +*/ +int starpu_combined_worker_get_size(void); + +/** + Return the rank of the current thread within the combined worker. + Can only be used in ::STARPU_SPMD parallel tasks, to know which + part of the task to work on. + See \ref SPMD-modeParallelTasks for more details. +*/ +int starpu_combined_worker_get_rank(void); + +/** + Register a new combined worker and get its identifier. + See \ref SchedulingHelpers for more details. +*/ +int starpu_combined_worker_assign_workerid(int nworkers, int workerid_array[]); + +/** + Get the description of a combined worker. + See \ref SchedulingHelpers for more details. + + \p workerid is the requested combined worker id, + \p worker_size returns the number of workers in the combined worker, + \p combined_workerid returns the list for worker ids in the combined worker. +*/ +int starpu_combined_worker_get_description(int workerid, int *worker_size, int **combined_workerid); + +/** + Variant of starpu_worker_can_execute_task() compatible with + combined workers. + See \ref DefiningANewBasicSchedulingPolicy for more details. +*/ +int starpu_combined_worker_can_execute_task(unsigned workerid, struct starpu_task *task, unsigned nimpl); + +/** + Initialise the barrier for the parallel task, and dispatch the task + between the different workers of the given combined worker. + See \ref SchedulingHelpers for more details. + */ +void starpu_parallel_task_barrier_init(struct starpu_task *task, int workerid); + +/** + Initialise the barrier for the parallel task, to be pushed to \p + worker_size workers (without having to explicit a given combined + worker). + See \ref SchedulingHelpers for more details. +*/ +void starpu_parallel_task_barrier_init_n(struct starpu_task *task, int worker_size); + +/** @} */ + +#ifdef __cplusplus +} +#endif + +#endif /* __STARPU_WORKER_H__ */ diff --git a/julia/Makefile.am b/julia/Makefile.am new file mode 100644 index 0000000..bf1a7cc --- /dev/null +++ b/julia/Makefile.am @@ -0,0 +1,25 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +include $(top_srcdir)/make/starpu-subdirtests.mk + +SUBDIRS = src + +if STARPU_BUILD_EXAMPLES +SUBDIRS += examples +endif + +EXTRA_DIST = README diff --git a/julia/Makefile.in b/julia/Makefile.in new file mode 100644 index 0000000..e404b03 --- /dev/null +++ b/julia/Makefile.in @@ -0,0 +1,891 @@ +# Makefile.in generated by automake 1.16.5 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2021 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +target_triplet = @target@ +@STARPU_BUILD_EXAMPLES_TRUE@am__append_1 = examples +subdir = julia +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ + $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ + $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ + $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/src/common/config.h \ + $(top_builddir)/src/common/config-src-build.h \ + $(top_builddir)/include/starpu_config.h \ + $(top_builddir)/starpurm/include/starpurm_config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +SOURCES = +DIST_SOURCES = +RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \ + ctags-recursive dvi-recursive html-recursive info-recursive \ + install-data-recursive install-dvi-recursive \ + install-exec-recursive install-html-recursive \ + install-info-recursive install-pdf-recursive \ + install-ps-recursive install-recursive installcheck-recursive \ + installdirs-recursive pdf-recursive ps-recursive \ + tags-recursive uninstall-recursive +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ + distclean-recursive maintainer-clean-recursive +am__recursive_targets = \ + $(RECURSIVE_TARGETS) \ + $(RECURSIVE_CLEAN_TARGETS) \ + $(am__extra_recursive_targets) +AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \ + distdir distdir-am +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +DIST_SUBDIRS = src examples +am__DIST_COMMON = $(srcdir)/Makefile.in \ + $(top_srcdir)/make/starpu-subdirtests.mk README +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +am__relativize = \ + dir0=`pwd`; \ + sed_first='s,^\([^/]*\)/.*$$,\1,'; \ + sed_rest='s,^[^/]*/*,,'; \ + sed_last='s,^.*/\([^/]*\)$$,\1,'; \ + sed_butlast='s,/*[^/]*$$,,'; \ + while test -n "$$dir1"; do \ + first=`echo "$$dir1" | sed -e "$$sed_first"`; \ + if test "$$first" != "."; then \ + if test "$$first" = ".."; then \ + dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ + dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ + else \ + first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ + if test "$$first2" = "$$first"; then \ + dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ + else \ + dir2="../$$dir2"; \ + fi; \ + dir0="$$dir0"/"$$first"; \ + fi; \ + fi; \ + dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ + done; \ + reldir="$$dir2" +pkglibdir = @pkglibdir@ +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +APP_CFLAGS = @APP_CFLAGS@ +APP_CXXFLAGS = @APP_CXXFLAGS@ +APP_FCFLAGS = @APP_FCFLAGS@ +APP_FFLAGS = @APP_FFLAGS@ +AR = @AR@ +AS = @AS@ +ATLASDIR = @ATLASDIR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BLAS_LIB = @BLAS_LIB@ +BLAS_LIBS = @BLAS_LIBS@ +BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ +BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CC_OR_MPICC = @CC_OR_MPICC@ +CC_OR_NVCC = @CC_OR_NVCC@ +CFLAGS = @CFLAGS@ +COVERAGE = @COVERAGE@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CSCOPE = @CSCOPE@ +CTAGS = @CTAGS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DGELS_LIBS = @DGELS_LIBS@ +DLB_CFLAGS = @DLB_CFLAGS@ +DLB_LIBS = @DLB_LIBS@ +DLLTOOL = @DLLTOOL@ +DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +ECLIPSE = @ECLIPSE@ +EGREP = @EGREP@ +ETAGS = @ETAGS@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FC = @FC@ +FCFLAGS = @FCFLAGS@ +FFLAGS = @FFLAGS@ +FFTWF_CFLAGS = @FFTWF_CFLAGS@ +FFTWF_LIBS = @FFTWF_LIBS@ +FFTWL_CFLAGS = @FFTWL_CFLAGS@ +FFTWL_LIBS = @FFTWL_LIBS@ +FFTW_CFLAGS = @FFTW_CFLAGS@ +FFTW_LIBS = @FFTW_LIBS@ +FGREP = @FGREP@ +FILECMD = @FILECMD@ +FXTDIR = @FXTDIR@ +FXT_CFLAGS = @FXT_CFLAGS@ +FXT_LDFLAGS = @FXT_LDFLAGS@ +FXT_LIBS = @FXT_LIBS@ +GDB = @GDB@ +GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ +GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ +GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ +GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ +GOTODIR = @GOTODIR@ +GREP = @GREP@ +HAVE_CXX11 = @HAVE_CXX11@ +HAVE_FFTWFL = @HAVE_FFTWFL@ +HELP2MAN = @HELP2MAN@ +HIPCC = @HIPCC@ +HIPCCFLAGS = @HIPCCFLAGS@ +HIPCONFIG = @HIPCONFIG@ +HWLOC_CFLAGS = @HWLOC_CFLAGS@ +HWLOC_LIBS = @HWLOC_LIBS@ +HWLOC_REQUIRES = @HWLOC_REQUIRES@ +ICC = @ICC@ +ICC_ARGS = @ICC_ARGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JULIA = @JULIA@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ +LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ +LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ +LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ +LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ +LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ +LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ +LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ +LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ +LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ +LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ +LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ +LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ +LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ +LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ +LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ +LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ +LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ +LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ +LIBSTARPU_LINK = @LIBSTARPU_LINK@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAGMA_CFLAGS = @MAGMA_CFLAGS@ +MAGMA_LIBS = @MAGMA_LIBS@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPICC_LDFLAGS = @MPICC_LDFLAGS@ +MPICXX = @MPICXX@ +MPIEXEC = @MPIEXEC@ +MPIEXEC_ARGS = @MPIEXEC_ARGS@ +MPIFORT = @MPIFORT@ +MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ +MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ +NM = @NM@ +NMAD_CFLAGS = @NMAD_CFLAGS@ +NMAD_LIBS = @NMAD_LIBS@ +NMEDIT = @NMEDIT@ +NVCC = @NVCC@ +NVCCFLAGS = @NVCCFLAGS@ +NVCC_CC = @NVCC_CC@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ +OPENBLAS_LIBS = @OPENBLAS_LIBS@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PAPI_CFLAGS = @PAPI_CFLAGS@ +PAPI_LIBS = @PAPI_LIBS@ +PARALLEL = @PARALLEL@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PKG_CONFIG = @PKG_CONFIG@ +POTI_CFLAGS = @POTI_CFLAGS@ +POTI_LIBS = @POTI_LIBS@ +PROG_CLANG = @PROG_CLANG@ +PROG_DATE = @PROG_DATE@ +PROG_FIND = @PROG_FIND@ +PROG_STAT = @PROG_STAT@ +PYTHON = @PYTHON@ +PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ +PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ +PYTHON_VERSION = @PYTHON_VERSION@ +RANLIB = @RANLIB@ +REALBASH = @REALBASH@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ +SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ +SIMGRID_LIBS = @SIMGRID_LIBS@ +SIMGRID_MC = @SIMGRID_MC@ +SLIC_CONFIG = @SLIC_CONFIG@ +SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ +SOCL_VENDORS = @SOCL_VENDORS@ +STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ +STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ +STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ +STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ +STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ +STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ +STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ +STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ +STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ +STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ +STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ +STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ +STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ +STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ +STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ +STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ +STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ +STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ +STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ +STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ +STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ +STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ +STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ +STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ +STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ +STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ +STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ +STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ +STARPU_LIB_PATH = @STARPU_LIB_PATH@ +STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ +STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ +STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ +STARPU_MS_LIB = @STARPU_MS_LIB@ +STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ +STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ +STARPU_OPENBLAS = @STARPU_OPENBLAS@ +STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ +STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ +STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ +STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ +STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ +STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ +STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ +STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ +STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ +STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ +STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ +STARPU_SRC_DIR = @STARPU_SRC_DIR@ +STARPU_USE_CPU = @STARPU_USE_CPU@ +STARPU_USE_CUDA = @STARPU_USE_CUDA@ +STARPU_USE_FXT = @STARPU_USE_FXT@ +STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ +STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ +STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ +STRIP = @STRIP@ +VERSION = @VERSION@ +XMKMF = @XMKMF@ +X_CFLAGS = @X_CFLAGS@ +X_EXTRA_LIBS = @X_EXTRA_LIBS@ +X_LIBS = @X_LIBS@ +X_PRE_LIBS = @X_PRE_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_ct_F77 = @ac_ct_F77@ +ac_ct_FC = @ac_ct_FC@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +doxygencommand = @doxygencommand@ +dvidir = @dvidir@ +eclipsepath = @eclipsepath@ +epstopdfcommand = @epstopdfcommand@ +exec_prefix = @exec_prefix@ +gitcommand = @gitcommand@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +hwloccalccommand = @hwloccalccommand@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +juliapath = @juliapath@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +mpicc_path = @mpicc_path@ +mpicxx_path = @mpicxx_path@ +mpiexec_path = @mpiexec_path@ +mpifort_path = @mpifort_path@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +pdflatexcommand = @pdflatexcommand@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +runstatedir = @runstatedir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target = @target@ +target_alias = @target_alias@ +target_cpu = @target_cpu@ +target_os = @target_os@ +target_vendor = @target_vendor@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +SUBDIRS = src $(am__append_1) +EXTRA_DIST = README +all: all-recursive + +.SUFFIXES: +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/make/starpu-subdirtests.mk $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign julia/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign julia/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; +$(top_srcdir)/make/starpu-subdirtests.mk $(am__empty): + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +# This directory's subdirectories are mostly independent; you can cd +# into them and run 'make' without going through this Makefile. +# To change the values of 'make' variables: instead of editing Makefiles, +# (1) if the variable is set in 'config.status', edit 'config.status' +# (which will cause the Makefiles to be regenerated when you run 'make'); +# (2) otherwise, pass the desired values on the 'make' command line. +$(am__recursive_targets): + @fail=; \ + if $(am__make_keepgoing); then \ + failcom='fail=yes'; \ + else \ + failcom='exit 1'; \ + fi; \ + dot_seen=no; \ + target=`echo $@ | sed s/-recursive//`; \ + case "$@" in \ + distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ + *) list='$(SUBDIRS)' ;; \ + esac; \ + for subdir in $$list; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + dot_seen=yes; \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || eval $$failcom; \ + done; \ + if test "$$dot_seen" = "no"; then \ + $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ + fi; test -z "$$fail" + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-recursive +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ + include_option=--etags-include; \ + empty_fix=.; \ + else \ + include_option=--include; \ + empty_fix=; \ + fi; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + test ! -f $$subdir/TAGS || \ + set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ + fi; \ + done; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-recursive + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-recursive + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done + @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + $(am__make_dryrun) \ + || test -d "$(distdir)/$$subdir" \ + || $(MKDIR_P) "$(distdir)/$$subdir" \ + || exit 1; \ + dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ + $(am__relativize); \ + new_distdir=$$reldir; \ + dir1=$$subdir; dir2="$(top_distdir)"; \ + $(am__relativize); \ + new_top_distdir=$$reldir; \ + echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ + echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ + ($(am__cd) $$subdir && \ + $(MAKE) $(AM_MAKEFLAGS) \ + top_distdir="$$new_top_distdir" \ + distdir="$$new_distdir" \ + am__remove_distdir=: \ + am__skip_length_check=: \ + am__skip_mode_fix=: \ + distdir) \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-recursive +all-am: Makefile +installdirs: installdirs-recursive +installdirs-am: +install: install-recursive +install-exec: install-exec-recursive +install-data: install-data-recursive +uninstall: uninstall-recursive + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-recursive +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-recursive + +clean-am: clean-generic clean-libtool mostlyclean-am + +distclean: distclean-recursive + -rm -f Makefile +distclean-am: clean-am distclean-generic distclean-tags + +dvi: dvi-recursive + +dvi-am: + +html: html-recursive + +html-am: + +info: info-recursive + +info-am: + +install-data-am: + +install-dvi: install-dvi-recursive + +install-dvi-am: + +install-exec-am: + +install-html: install-html-recursive + +install-html-am: + +install-info: install-info-recursive + +install-info-am: + +install-man: + +install-pdf: install-pdf-recursive + +install-pdf-am: + +install-ps: install-ps-recursive + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-recursive + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-recursive + +mostlyclean-am: mostlyclean-generic mostlyclean-libtool + +pdf: pdf-recursive + +pdf-am: + +ps: ps-recursive + +ps-am: + +uninstall-am: + +.MAKE: $(am__recursive_targets) install-am install-strip + +.PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am check \ + check-am clean clean-generic clean-libtool cscopelist-am ctags \ + ctags-am distclean distclean-generic distclean-libtool \ + distclean-tags distdir dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-html \ + install-html-am install-info install-info-am install-man \ + install-pdf install-pdf-am install-ps install-ps-am \ + install-strip installcheck installcheck-am installdirs \ + installdirs-am maintainer-clean maintainer-clean-generic \ + mostlyclean mostlyclean-generic mostlyclean-libtool pdf pdf-am \ + ps ps-am tags tags-am uninstall uninstall-am + +.PRECIOUS: Makefile + + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +recheck: + RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i recheck || RET=1 ; \ + done ; \ + exit $$RET + +showcheckfailed: + @RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i showcheckfailed || RET=1 ; \ + done ; \ + exit $$RET + +showfailed: + @RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -s -C $$i showfailed || RET=1 ; \ + done ; \ + exit $$RET + +showcheck: + RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i showcheck || RET=1 ; \ + done ; \ + exit $$RET + +showsuite: + RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i showsuite || RET=1 ; \ + done ; \ + exit $$RET + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/julia/README b/julia/README new file mode 100644 index 0000000..a85214c --- /dev/null +++ b/julia/README @@ -0,0 +1,68 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +Contents +======== + +* Installing Julia +* Installing StarPU module for Julia +* Running Examples + +Installing Julia +---------------- +Julia version 1.3+ is required and can be downloaded from +https://julialang.org/downloads/. + + +Installing StarPU module for Julia +---------------------------------- +First, build the jlstarpu_c_wrapper library: + +$ make + +Then, you need to add the lib/ directory to your library path and the julia/ +directory to your Julia load path: + +$ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$PWD/src/.lib +$ export JULIA_LOAD_PATH=$PWD/src:$JULIA_LOAD_PATH + +This step can also be done by sourcing the setenv.sh script: + +$ . setenv.sh + +Running Examples +---------------- + +You can find several examples in the examples/ directory. + +For each example X, three versions are provided: + +- X.c: Original C+starpu code +- X_native.jl: Native Julia version (without StarPU) +- X.jl: Julia version using StarPU + + +To run the original C+StarPU code: +$ make cstarpu.dat + +To run the native Julia version: +$ make julia_native.dat + +To run the Julia version using StarPU: +$ make julia_generatedc.dat + + + + diff --git a/julia/examples/Makefile.am b/julia/examples/Makefile.am new file mode 100644 index 0000000..4c90191 --- /dev/null +++ b/julia/examples/Makefile.am @@ -0,0 +1,111 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +include $(top_srcdir)/make/starpu-tests.mk +include $(top_srcdir)/make/starpu-loader.mk + +BUILT_SOURCES = + +CLEANFILES = *.gcno *.gcda *.linkinfo starpu_idle_microsec.log + +EXTRA_DIST = \ + axpy/axpy.jl \ + axpy/axpy.sh \ + black_scholes/black_scholes.jl \ + callback/callback.jl \ + callback/callback.sh \ + check_deps/check_deps.jl \ + check_deps/check_deps.sh \ + cholesky/cholesky_codelets.jl \ + cholesky/cholesky_common.jl \ + cholesky/cholesky_native.jl \ + cholesky/cholesky_implicit.jl \ + cholesky/cholesky_tag.jl \ + cholesky/cholesky.sh \ + dependency/end_dep.jl \ + dependency/end_dep.sh \ + dependency/tag_dep.jl \ + dependency/tag_dep.sh \ + dependency/task_dep.sh \ + dependency/task_dep.jl \ + gemm/gemm.jl \ + gemm/gemm_native.jl \ + gemm/gemm.sh \ + mandelbrot/mandelbrot_native.jl \ + mandelbrot/mandelbrot.jl \ + mandelbrot/mandelbrot.sh \ + mult/mult_native.jl \ + mult/mult.jl \ + mult/perf.sh \ + mult/mult_starpu.sh \ + task_insert_color/task_insert_color.jl \ + task_insert_color/task_insert_color.sh \ + variable/variable.jl \ + variable/variable_native.jl \ + variable/variable.sh \ + vector_scal/vector_scal.jl \ + vector_scal/vector_scal.sh + +examplebindir = $(libdir)/starpu/julia + +examplebin_PROGRAMS = + +AM_CFLAGS += $(MAGMA_CFLAGS) $(APP_CFLAGS) +AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_srcdir)/examples/ -I$(top_builddir)/include $(STARPU_H_CPPFLAGS) +AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@ +LIBS += $(top_builddir)/src/@LIBSTARPU_LINK@ ../src/libstarpujulia-@STARPU_EFFECTIVE_VERSION@.la $(STARPU_EXPORTED_LIBS) +LIBS += $(STARPU_OPENCL_LDFLAGS) $(STARPU_CUDA_LDFLAGS) +LIBS += -lm + +check_PROGRAMS = $(LOADER) $(starpu_julia_EXAMPLES) +SHELL_TESTS = +STARPU_JULIA_EXAMPLES = + +examplebin_PROGRAMS += $(STARPU_JULIA_EXAMPLES) + +TESTS = $(SHELL_TESTS) $(STARPU_JULIA_EXAMPLES) + +###################### +# Examples # +###################### + +SHELL_TESTS += check_deps/check_deps.sh + +STARPU_JULIA_EXAMPLES += mult/mult +mult_mult_SOURCES = mult/mult.c mult/cpu_mult.c +SHELL_TESTS += mult/mult_starpu.sh + +STARPU_JULIA_EXAMPLES += task_insert_color/task_insert_color +SHELL_TESTS += task_insert_color/task_insert_color.sh + +SHELL_TESTS += variable/variable.sh +SHELL_TESTS += vector_scal/vector_scal.sh + +STARPU_JULIA_EXAMPLES += mandelbrot/mandelbrot +mandelbrot_mandelbrot_SOURCES = mandelbrot/mandelbrot.c mandelbrot/cpu_mandelbrot.c mandelbrot/cpu_mandelbrot.h +SHELL_TESTS += mandelbrot/mandelbrot.sh + +STARPU_JULIA_EXAMPLES += callback/callback +SHELL_TESTS += callback/callback.sh + +SHELL_TESTS += dependency/tag_dep.sh +SHELL_TESTS += dependency/task_dep.sh +SHELL_TESTS += dependency/end_dep.sh + +if !STARPU_NO_BLAS_LIB +SHELL_TESTS += axpy/axpy.sh +SHELL_TESTS += cholesky/cholesky.sh +SHELL_TESTS += gemm/gemm.sh +endif diff --git a/julia/examples/Makefile.in b/julia/examples/Makefile.in new file mode 100644 index 0000000..8f8f252 --- /dev/null +++ b/julia/examples/Makefile.in @@ -0,0 +1,1802 @@ +# Makefile.in generated by automake 1.16.5 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2021 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +target_triplet = @target@ +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@am__append_1 = --compiler-options -fno-strict-aliasing -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ $(STARPU_NVCC_H_CPPFLAGS) +@STARPU_USE_HIP_TRUE@am__append_2 = -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ +noinst_PROGRAMS = $(am__EXEEXT_2) +# Make tests run through mpiexec +@STARPU_USE_MPI_MASTER_SLAVE_TRUE@am__append_3 = $(abs_top_srcdir)/tools/starpu_msexec +@STARPU_USE_MPI_MASTER_SLAVE_TRUE@am__append_4 = $(MPI_RUN_ENV) STARPU_NMPIMSTHREADS=4 +@STARPU_USE_TCPIP_MASTER_SLAVE_TRUE@am__append_5 = $(abs_top_srcdir)/tools/starpu_msexec +# switch off local socket usage +#MS_LAUNCHER = $(abs_top_builddir)/tools/starpu_tcpipexec -np 2 -nobind -ncpus 1 -nolocal +@STARPU_USE_TCPIP_MASTER_SLAVE_TRUE@am__append_6 = STARPU_RESERVE_NCPU=2 +@STARPU_HAVE_WINDOWS_FALSE@am__append_7 = loader +examplebin_PROGRAMS = $(am__EXEEXT_1) +check_PROGRAMS = +TESTS = $(SHELL_TESTS) $(am__EXEEXT_1) +@STARPU_NO_BLAS_LIB_FALSE@am__append_8 = axpy/axpy.sh \ +@STARPU_NO_BLAS_LIB_FALSE@ cholesky/cholesky.sh gemm/gemm.sh +subdir = julia/examples +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ + $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ + $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ + $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/src/common/config.h \ + $(top_builddir)/src/common/config-src-build.h \ + $(top_builddir)/include/starpu_config.h \ + $(top_builddir)/starpurm/include/starpurm_config.h +CONFIG_CLEAN_FILES = execute.sh +CONFIG_CLEAN_VPATH_FILES = +am__EXEEXT_1 = mult/mult$(EXEEXT) \ + task_insert_color/task_insert_color$(EXEEXT) \ + mandelbrot/mandelbrot$(EXEEXT) callback/callback$(EXEEXT) +am__installdirs = "$(DESTDIR)$(examplebindir)" +@STARPU_HAVE_WINDOWS_FALSE@am__EXEEXT_2 = loader$(EXEEXT) +PROGRAMS = $(examplebin_PROGRAMS) $(noinst_PROGRAMS) +callback_callback_SOURCES = callback/callback.c +am__dirstamp = $(am__leading_dot)dirstamp +callback_callback_OBJECTS = callback/callback.$(OBJEXT) +callback_callback_LDADD = $(LDADD) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +loader_SOURCES = loader.c +loader_OBJECTS = loader-loader.$(OBJEXT) +loader_LDADD = $(LDADD) +am_mandelbrot_mandelbrot_OBJECTS = mandelbrot/mandelbrot.$(OBJEXT) \ + mandelbrot/cpu_mandelbrot.$(OBJEXT) +mandelbrot_mandelbrot_OBJECTS = $(am_mandelbrot_mandelbrot_OBJECTS) +mandelbrot_mandelbrot_LDADD = $(LDADD) +am_mult_mult_OBJECTS = mult/mult.$(OBJEXT) mult/cpu_mult.$(OBJEXT) +mult_mult_OBJECTS = $(am_mult_mult_OBJECTS) +mult_mult_LDADD = $(LDADD) +task_insert_color_task_insert_color_SOURCES = \ + task_insert_color/task_insert_color.c +task_insert_color_task_insert_color_OBJECTS = \ + task_insert_color/task_insert_color.$(OBJEXT) +task_insert_color_task_insert_color_LDADD = $(LDADD) +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)/src/common -I$(top_builddir)/include -I$(top_builddir)/starpurm/include +depcomp = $(SHELL) $(top_srcdir)/build-aux/depcomp +am__maybe_remake_depfiles = depfiles +am__depfiles_remade = ./$(DEPDIR)/loader-loader.Po \ + callback/$(DEPDIR)/callback.Po \ + mandelbrot/$(DEPDIR)/cpu_mandelbrot.Po \ + mandelbrot/$(DEPDIR)/mandelbrot.Po mult/$(DEPDIR)/cpu_mult.Po \ + mult/$(DEPDIR)/mult.Po \ + task_insert_color/$(DEPDIR)/task_insert_color.Po +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = callback/callback.c loader.c \ + $(mandelbrot_mandelbrot_SOURCES) $(mult_mult_SOURCES) \ + task_insert_color/task_insert_color.c +DIST_SOURCES = callback/callback.c loader.c \ + $(mandelbrot_mandelbrot_SOURCES) $(mult_mult_SOURCES) \ + task_insert_color/task_insert_color.c +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +am__tty_colors_dummy = \ + mgn= red= grn= lgn= blu= brg= std=; \ + am__color_tests=no +am__tty_colors = { \ + $(am__tty_colors_dummy); \ + if test "X$(AM_COLOR_TESTS)" = Xno; then \ + am__color_tests=no; \ + elif test "X$(AM_COLOR_TESTS)" = Xalways; then \ + am__color_tests=yes; \ + elif test "X$$TERM" != Xdumb && { test -t 1; } 2>/dev/null; then \ + am__color_tests=yes; \ + fi; \ + if test $$am__color_tests = yes; then \ + red=''; \ + grn=''; \ + lgn=''; \ + blu=''; \ + mgn=''; \ + brg=''; \ + std=''; \ + fi; \ +} +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__recheck_rx = ^[ ]*:recheck:[ ]* +am__global_test_result_rx = ^[ ]*:global-test-result:[ ]* +am__copy_in_global_log_rx = ^[ ]*:copy-in-global-log:[ ]* +# A command that, given a newline-separated list of test names on the +# standard input, print the name of the tests that are to be re-run +# upon "make recheck". +am__list_recheck_tests = $(AWK) '{ \ + recheck = 1; \ + while ((rc = (getline line < ($$0 ".trs"))) != 0) \ + { \ + if (rc < 0) \ + { \ + if ((getline line2 < ($$0 ".log")) < 0) \ + recheck = 0; \ + break; \ + } \ + else if (line ~ /$(am__recheck_rx)[nN][Oo]/) \ + { \ + recheck = 0; \ + break; \ + } \ + else if (line ~ /$(am__recheck_rx)[yY][eE][sS]/) \ + { \ + break; \ + } \ + }; \ + if (recheck) \ + print $$0; \ + close ($$0 ".trs"); \ + close ($$0 ".log"); \ +}' +# A command that, given a newline-separated list of test names on the +# standard input, create the global log from their .trs and .log files. +am__create_global_log = $(AWK) ' \ +function fatal(msg) \ +{ \ + print "fatal: making $@: " msg | "cat >&2"; \ + exit 1; \ +} \ +function rst_section(header) \ +{ \ + print header; \ + len = length(header); \ + for (i = 1; i <= len; i = i + 1) \ + printf "="; \ + printf "\n\n"; \ +} \ +{ \ + copy_in_global_log = 1; \ + global_test_result = "RUN"; \ + while ((rc = (getline line < ($$0 ".trs"))) != 0) \ + { \ + if (rc < 0) \ + fatal("failed to read from " $$0 ".trs"); \ + if (line ~ /$(am__global_test_result_rx)/) \ + { \ + sub("$(am__global_test_result_rx)", "", line); \ + sub("[ ]*$$", "", line); \ + global_test_result = line; \ + } \ + else if (line ~ /$(am__copy_in_global_log_rx)[nN][oO]/) \ + copy_in_global_log = 0; \ + }; \ + if (copy_in_global_log) \ + { \ + rst_section(global_test_result ": " $$0); \ + while ((rc = (getline line < ($$0 ".log"))) != 0) \ + { \ + if (rc < 0) \ + fatal("failed to read from " $$0 ".log"); \ + print line; \ + }; \ + printf "\n"; \ + }; \ + close ($$0 ".trs"); \ + close ($$0 ".log"); \ +}' +# Restructured Text title. +am__rst_title = { sed 's/.*/ & /;h;s/./=/g;p;x;s/ *$$//;p;g' && echo; } +# Solaris 10 'make', and several other traditional 'make' implementations, +# pass "-e" to $(SHELL), and POSIX 2008 even requires this. Work around it +# by disabling -e (using the XSI extension "set +e") if it's set. +am__sh_e_setup = case $$- in *e*) set +e;; esac +# Default flags passed to test drivers. +am__common_driver_flags = \ + --color-tests "$$am__color_tests" \ + --enable-hard-errors "$$am__enable_hard_errors" \ + --expect-failure "$$am__expect_failure" +# To be inserted before the command running the test. Creates the +# directory for the log if needed. Stores in $dir the directory +# containing $f, in $tst the test, in $log the log. Executes the +# developer- defined test setup AM_TESTS_ENVIRONMENT (if any), and +# passes TESTS_ENVIRONMENT. Set up options for the wrapper that +# will run the test scripts (or their associated LOG_COMPILER, if +# thy have one). +am__check_pre = \ +$(am__sh_e_setup); \ +$(am__vpath_adj_setup) $(am__vpath_adj) \ +$(am__tty_colors); \ +srcdir=$(srcdir); export srcdir; \ +case "$@" in \ + */*) am__odir=`echo "./$@" | sed 's|/[^/]*$$||'`;; \ + *) am__odir=.;; \ +esac; \ +test "x$$am__odir" = x"." || test -d "$$am__odir" \ + || $(MKDIR_P) "$$am__odir" || exit $$?; \ +if test -f "./$$f"; then dir=./; \ +elif test -f "$$f"; then dir=; \ +else dir="$(srcdir)/"; fi; \ +tst=$$dir$$f; log='$@'; \ +if test -n '$(DISABLE_HARD_ERRORS)'; then \ + am__enable_hard_errors=no; \ +else \ + am__enable_hard_errors=yes; \ +fi; \ +case " $(XFAIL_TESTS) " in \ + *[\ \ ]$$f[\ \ ]* | *[\ \ ]$$dir$$f[\ \ ]*) \ + am__expect_failure=yes;; \ + *) \ + am__expect_failure=no;; \ +esac; \ +$(AM_TESTS_ENVIRONMENT) $(TESTS_ENVIRONMENT) +# A shell command to get the names of the tests scripts with any registered +# extension removed (i.e., equivalently, the names of the test logs, with +# the '.log' extension removed). The result is saved in the shell variable +# '$bases'. This honors runtime overriding of TESTS and TEST_LOGS. Sadly, +# we cannot use something simpler, involving e.g., "$(TEST_LOGS:.log=)", +# since that might cause problem with VPATH rewrites for suffix-less tests. +# See also 'test-harness-vpath-rewrite.sh' and 'test-trs-basic.sh'. +am__set_TESTS_bases = \ + bases='$(TEST_LOGS)'; \ + bases=`for i in $$bases; do echo $$i; done | sed 's/\.log$$//'`; \ + bases=`echo $$bases` +AM_TESTSUITE_SUMMARY_HEADER = ' for $(PACKAGE_STRING)' +RECHECK_LOGS = $(TEST_LOGS) +AM_RECURSIVE_TARGETS = check recheck +TEST_SUITE_LOG = test-suite.log +TEST_EXTENSIONS = @EXEEXT@ .test +LOG_DRIVER = $(SHELL) $(top_srcdir)/build-aux/test-driver +LOG_COMPILE = $(LOG_COMPILER) $(AM_LOG_FLAGS) $(LOG_FLAGS) +am__set_b = \ + case '$@' in \ + */*) \ + case '$*' in \ + */*) b='$*';; \ + *) b=`echo '$@' | sed 's/\.log$$//'`; \ + esac;; \ + *) \ + b='$*';; \ + esac +am__test_logs1 = $(TESTS:=.log) +am__test_logs2 = $(am__test_logs1:@EXEEXT@.log=.log) +TEST_LOGS = $(am__test_logs2:.test.log=.log) +TEST_LOG_DRIVER = $(SHELL) $(top_srcdir)/build-aux/test-driver +TEST_LOG_COMPILE = $(TEST_LOG_COMPILER) $(AM_TEST_LOG_FLAGS) \ + $(TEST_LOG_FLAGS) +am__DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/execute.sh.in \ + $(top_srcdir)/build-aux/depcomp \ + $(top_srcdir)/build-aux/test-driver \ + $(top_srcdir)/make/starpu-loader.mk \ + $(top_srcdir)/make/starpu-tests.mk \ + $(top_srcdir)/make/starpu.mk +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +pkglibdir = @pkglibdir@ +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +APP_CFLAGS = @APP_CFLAGS@ +APP_CXXFLAGS = @APP_CXXFLAGS@ +APP_FCFLAGS = @APP_FCFLAGS@ +APP_FFLAGS = @APP_FFLAGS@ +AR = @AR@ +AS = @AS@ +ATLASDIR = @ATLASDIR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BLAS_LIB = @BLAS_LIB@ +BLAS_LIBS = @BLAS_LIBS@ +BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ +BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CC_OR_MPICC = @CC_OR_MPICC@ +CC_OR_NVCC = @CC_OR_NVCC@ +CFLAGS = @CFLAGS@ +COVERAGE = @COVERAGE@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CSCOPE = @CSCOPE@ +CTAGS = @CTAGS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DGELS_LIBS = @DGELS_LIBS@ +DLB_CFLAGS = @DLB_CFLAGS@ +DLB_LIBS = @DLB_LIBS@ +DLLTOOL = @DLLTOOL@ +DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +ECLIPSE = @ECLIPSE@ +EGREP = @EGREP@ +ETAGS = @ETAGS@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FC = @FC@ +FCFLAGS = @FCFLAGS@ +FFLAGS = @FFLAGS@ +FFTWF_CFLAGS = @FFTWF_CFLAGS@ +FFTWF_LIBS = @FFTWF_LIBS@ +FFTWL_CFLAGS = @FFTWL_CFLAGS@ +FFTWL_LIBS = @FFTWL_LIBS@ +FFTW_CFLAGS = @FFTW_CFLAGS@ +FFTW_LIBS = @FFTW_LIBS@ +FGREP = @FGREP@ +FILECMD = @FILECMD@ +FXTDIR = @FXTDIR@ +FXT_CFLAGS = @FXT_CFLAGS@ +FXT_LDFLAGS = @FXT_LDFLAGS@ +FXT_LIBS = @FXT_LIBS@ +GDB = @GDB@ +GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ +GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ +GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ +GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ +GOTODIR = @GOTODIR@ +GREP = @GREP@ +HAVE_CXX11 = @HAVE_CXX11@ +HAVE_FFTWFL = @HAVE_FFTWFL@ +HELP2MAN = @HELP2MAN@ +HIPCC = @HIPCC@ +HIPCCFLAGS = @HIPCCFLAGS@ $(am__append_2) +HIPCONFIG = @HIPCONFIG@ +HWLOC_CFLAGS = @HWLOC_CFLAGS@ +HWLOC_LIBS = @HWLOC_LIBS@ +HWLOC_REQUIRES = @HWLOC_REQUIRES@ +ICC = @ICC@ +ICC_ARGS = @ICC_ARGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JULIA = @JULIA@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ $(top_builddir)/src/@LIBSTARPU_LINK@ \ + ../src/libstarpujulia-@STARPU_EFFECTIVE_VERSION@.la \ + $(STARPU_EXPORTED_LIBS) $(STARPU_OPENCL_LDFLAGS) \ + $(STARPU_CUDA_LDFLAGS) -lm +LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ +LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ +LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ +LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ +LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ +LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ +LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ +LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ +LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ +LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ +LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ +LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ +LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ +LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ +LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ +LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ +LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ +LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ +LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ +LIBSTARPU_LINK = @LIBSTARPU_LINK@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAGMA_CFLAGS = @MAGMA_CFLAGS@ +MAGMA_LIBS = @MAGMA_LIBS@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPICC_LDFLAGS = @MPICC_LDFLAGS@ +MPICXX = @MPICXX@ +MPIEXEC = @MPIEXEC@ +MPIEXEC_ARGS = @MPIEXEC_ARGS@ +MPIFORT = @MPIFORT@ +MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ +MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ +NM = @NM@ +NMAD_CFLAGS = @NMAD_CFLAGS@ +NMAD_LIBS = @NMAD_LIBS@ +NMEDIT = @NMEDIT@ +NVCC = @NVCC@ +NVCCFLAGS = @NVCCFLAGS@ $(am__append_1) +NVCC_CC = @NVCC_CC@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ +OPENBLAS_LIBS = @OPENBLAS_LIBS@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PAPI_CFLAGS = @PAPI_CFLAGS@ +PAPI_LIBS = @PAPI_LIBS@ +PARALLEL = @PARALLEL@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PKG_CONFIG = @PKG_CONFIG@ +POTI_CFLAGS = @POTI_CFLAGS@ +POTI_LIBS = @POTI_LIBS@ +PROG_CLANG = @PROG_CLANG@ +PROG_DATE = @PROG_DATE@ +PROG_FIND = @PROG_FIND@ +PROG_STAT = @PROG_STAT@ +PYTHON = @PYTHON@ +PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ +PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ +PYTHON_VERSION = @PYTHON_VERSION@ +RANLIB = @RANLIB@ +REALBASH = @REALBASH@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ +SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ +SIMGRID_LIBS = @SIMGRID_LIBS@ +SIMGRID_MC = @SIMGRID_MC@ +SLIC_CONFIG = @SLIC_CONFIG@ +SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ +SOCL_VENDORS = @SOCL_VENDORS@ +STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ +STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ +STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ +STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ +STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ +STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ +STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ +STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ +STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ +STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ +STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ +STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ +STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ +STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ +STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ +STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ +STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ +STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ +STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ +STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ +STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ +STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ +STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ +STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ +STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ +STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ +STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ +STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ +STARPU_LIB_PATH = @STARPU_LIB_PATH@ +STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ +STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ +STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ +STARPU_MS_LIB = @STARPU_MS_LIB@ +STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ +STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ +STARPU_OPENBLAS = @STARPU_OPENBLAS@ +STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ +STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ +STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ +STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ +STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ +STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ +STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ +STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ +STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ +STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ +STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ +STARPU_SRC_DIR = @STARPU_SRC_DIR@ +STARPU_USE_CPU = @STARPU_USE_CPU@ +STARPU_USE_CUDA = @STARPU_USE_CUDA@ +STARPU_USE_FXT = @STARPU_USE_FXT@ +STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ +STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ +STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ +STRIP = @STRIP@ +VERSION = @VERSION@ +XMKMF = @XMKMF@ +X_CFLAGS = @X_CFLAGS@ +X_EXTRA_LIBS = @X_EXTRA_LIBS@ +X_LIBS = @X_LIBS@ +X_PRE_LIBS = @X_PRE_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_ct_F77 = @ac_ct_F77@ +ac_ct_FC = @ac_ct_FC@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +doxygencommand = @doxygencommand@ +dvidir = @dvidir@ +eclipsepath = @eclipsepath@ +epstopdfcommand = @epstopdfcommand@ +exec_prefix = @exec_prefix@ +gitcommand = @gitcommand@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +hwloccalccommand = @hwloccalccommand@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +juliapath = @juliapath@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +mpicc_path = @mpicc_path@ +mpicxx_path = @mpicxx_path@ +mpiexec_path = @mpiexec_path@ +mpifort_path = @mpifort_path@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +pdflatexcommand = @pdflatexcommand@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +runstatedir = @runstatedir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target = @target@ +target_alias = @target_alias@ +target_cpu = @target_cpu@ +target_os = @target_os@ +target_vendor = @target_vendor@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +LAUNCHER_ENV = $(am__append_4) $(am__append_6) +LAUNCHER = $(am__append_3) $(am__append_5) +AM_CFLAGS = $(GLOBAL_AM_CFLAGS) $(MAGMA_CFLAGS) $(APP_CFLAGS) +AM_CXXFLAGS = $(GLOBAL_AM_CXXFLAGS) +AM_FFLAGS = $(GLOBAL_AM_FFLAGS) +AM_FCFLAGS = $(GLOBAL_AM_FCFLAGS) +@STARPU_USE_CUDA_TRUE@V_nvcc_ = $(V_nvcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_USE_CUDA_TRUE@V_nvcc_0 = @echo " NVCC " $@; +@STARPU_USE_CUDA_TRUE@V_nvcc_1 = +@STARPU_USE_CUDA_TRUE@V_nvcc = $(V_nvcc_$(V)) + +# Avoid using nvcc when making a coverity build, nvcc produces millions of +# lines of code which we don't want to analyze. Instead, build dumb .o files +# containing empty functions. +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_ = $(V_mynvcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_0 = @echo " myNVCC " $@; +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_1 = +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc = $(V_mynvcc_$(V)) +@STARPU_USE_HIP_TRUE@V_hipcc_ = $(V_hipcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_USE_HIP_TRUE@V_hipcc_0 = @echo " HIPCC " $@; +@STARPU_USE_HIP_TRUE@V_hipcc_1 = +@STARPU_USE_HIP_TRUE@V_hipcc = $(V_hipcc_$(V)) +V_icc_ = $(V_icc_$(AM_DEFAULT_VERBOSITY)) +V_icc_0 = @echo " ICC " $@; +V_icc_1 = +V_icc = $(V_icc_$(V)) +V_ln_ = $(V_ln_$(AM_DEFAULT_VERBOSITY)) +V_ln_0 = @echo " LN " $@; +V_ln_1 = +V_ln = $(V_ln_$(V)) +V_help2man_ = $(V_help2man_$(AM_DEFAULT_VERBOSITY)) +V_help2man_0 = @echo " HELP2MAN" $@; +V_help2man_1 = +V_help2man = $(V_help2man_$(V)) +# These are always defined, both for starpu-mpi and for mpi-ms +# For MPI tests we don't want to oversubscribe the system +MPI_RUN_ENV = STARPU_WORKERS_GETBIND=0 STARPU_WORKERS_NOBIND=1 STARPU_NCPU=3 +@STARPU_SIMGRID_FALSE@STARPU_MPIEXEC = $(MPIEXEC) $(MPIEXEC_ARGS) -np $(STARPU_MPI_NP) +@STARPU_SIMGRID_TRUE@STARPU_MPIEXEC = $(abs_top_builddir)/tools/starpu_smpirun -np $(STARPU_MPI_NP) -platform $(abs_top_srcdir)/tools/perfmodels/cluster.xml -hostfile $(abs_top_srcdir)/tools/perfmodels/hostfile + +# When GNU parallel is available and -j is passed to make, run tests through +# parallel, using a "starpu" semaphore. +# Also make test shell scripts run its tests through parallel, using a +# "substarpu" semaphore. This brings some overload, but only one level. +@HAVE_PARALLEL_TRUE@STARPU_SUB_PARALLEL = $(shell echo $(MAKEFLAGS) | sed -ne 's/.*-j\([0-9]\+\).*/parallel --semaphore --id substarpu --fg --fg-exit -j \1/p') +@STARPU_USE_MPI_MASTER_SLAVE_TRUE@MS_LAUNCHER = $(STARPU_MPIEXEC) +@STARPU_USE_TCPIP_MASTER_SLAVE_TRUE@MS_LAUNCHER = $(abs_top_builddir)/tools/starpu_tcpipexec -np 2 -nobind -ncpus 1 +@STARPU_HAVE_WINDOWS_FALSE@LOADER_BIN = $(LAUNCHER) $(LOADER) $(EXTERNAL) +@STARPU_HAVE_WINDOWS_TRUE@LOADER_BIN = $(LAUNCHER) $(EXTERNAL) +@STARPU_HAVE_WINDOWS_FALSE@loader_CPPFLAGS = $(AM_CPPFLAGS) -I$(top_builddir)/src/ +@STARPU_HAVE_AM111_FALSE@TESTS_ENVIRONMENT = $(LAUNCHER_ENV) top_builddir="$(abs_top_builddir)" top_srcdir="$(abs_top_srcdir)" $(LOADER_BIN) +@STARPU_HAVE_AM111_TRUE@TESTS_ENVIRONMENT = $(LAUNCHER_ENV) top_builddir="$(abs_top_builddir)" top_srcdir="$(abs_top_srcdir)" +@STARPU_HAVE_AM111_TRUE@LOG_COMPILER = $(LOADER_BIN) +AM_TESTS_FD_REDIRECT = 9>&2 + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +BUILT_SOURCES = +CLEANFILES = *.gcno *.gcda *.linkinfo starpu_idle_microsec.log +EXTRA_DIST = \ + axpy/axpy.jl \ + axpy/axpy.sh \ + black_scholes/black_scholes.jl \ + callback/callback.jl \ + callback/callback.sh \ + check_deps/check_deps.jl \ + check_deps/check_deps.sh \ + cholesky/cholesky_codelets.jl \ + cholesky/cholesky_common.jl \ + cholesky/cholesky_native.jl \ + cholesky/cholesky_implicit.jl \ + cholesky/cholesky_tag.jl \ + cholesky/cholesky.sh \ + dependency/end_dep.jl \ + dependency/end_dep.sh \ + dependency/tag_dep.jl \ + dependency/tag_dep.sh \ + dependency/task_dep.sh \ + dependency/task_dep.jl \ + gemm/gemm.jl \ + gemm/gemm_native.jl \ + gemm/gemm.sh \ + mandelbrot/mandelbrot_native.jl \ + mandelbrot/mandelbrot.jl \ + mandelbrot/mandelbrot.sh \ + mult/mult_native.jl \ + mult/mult.jl \ + mult/perf.sh \ + mult/mult_starpu.sh \ + task_insert_color/task_insert_color.jl \ + task_insert_color/task_insert_color.sh \ + variable/variable.jl \ + variable/variable_native.jl \ + variable/variable.sh \ + vector_scal/vector_scal.jl \ + vector_scal/vector_scal.sh + +examplebindir = $(libdir)/starpu/julia +AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_srcdir)/examples/ -I$(top_builddir)/include $(STARPU_H_CPPFLAGS) +AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@ + +###################### +# Examples # +###################### +SHELL_TESTS = check_deps/check_deps.sh mult/mult_starpu.sh \ + task_insert_color/task_insert_color.sh variable/variable.sh \ + vector_scal/vector_scal.sh mandelbrot/mandelbrot.sh \ + callback/callback.sh dependency/tag_dep.sh \ + dependency/task_dep.sh dependency/end_dep.sh $(am__append_8) +STARPU_JULIA_EXAMPLES = mult/mult task_insert_color/task_insert_color \ + mandelbrot/mandelbrot callback/callback +mult_mult_SOURCES = mult/mult.c mult/cpu_mult.c +mandelbrot_mandelbrot_SOURCES = mandelbrot/mandelbrot.c mandelbrot/cpu_mandelbrot.c mandelbrot/cpu_mandelbrot.h +all: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) all-am + +.SUFFIXES: +.SUFFIXES: .c .cu .cubin .hip .lo .log .o .obj .test .test$(EXEEXT) .trs +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/make/starpu-tests.mk $(top_srcdir)/make/starpu.mk $(top_srcdir)/make/starpu-loader.mk $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign julia/examples/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign julia/examples/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; +$(top_srcdir)/make/starpu-tests.mk $(top_srcdir)/make/starpu.mk $(top_srcdir)/make/starpu-loader.mk $(am__empty): + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): +execute.sh: $(top_builddir)/config.status $(srcdir)/execute.sh.in + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ + +clean-checkPROGRAMS: + @list='$(check_PROGRAMS)'; test -n "$$list" || exit 0; \ + echo " rm -f" $$list; \ + rm -f $$list || exit $$?; \ + test -n "$(EXEEXT)" || exit 0; \ + list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ + echo " rm -f" $$list; \ + rm -f $$list +install-examplebinPROGRAMS: $(examplebin_PROGRAMS) + @$(NORMAL_INSTALL) + @list='$(examplebin_PROGRAMS)'; test -n "$(examplebindir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(examplebindir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(examplebindir)" || exit 1; \ + fi; \ + for p in $$list; do echo "$$p $$p"; done | \ + sed 's/$(EXEEXT)$$//' | \ + while read p p1; do if test -f $$p \ + || test -f $$p1 \ + ; then echo "$$p"; echo "$$p"; else :; fi; \ + done | \ + sed -e 'p;s,.*/,,;n;h' \ + -e 's|.*|.|' \ + -e 'p;x;s,.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/' | \ + sed 'N;N;N;s,\n, ,g' | \ + $(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1 } \ + { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \ + if ($$2 == $$4) files[d] = files[d] " " $$1; \ + else { print "f", $$3 "/" $$4, $$1; } } \ + END { for (d in files) print "f", d, files[d] }' | \ + while read type dir files; do \ + if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \ + test -z "$$files" || { \ + echo " $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files '$(DESTDIR)$(examplebindir)$$dir'"; \ + $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files "$(DESTDIR)$(examplebindir)$$dir" || exit $$?; \ + } \ + ; done + +uninstall-examplebinPROGRAMS: + @$(NORMAL_UNINSTALL) + @list='$(examplebin_PROGRAMS)'; test -n "$(examplebindir)" || list=; \ + files=`for p in $$list; do echo "$$p"; done | \ + sed -e 'h;s,^.*/,,;s/$(EXEEXT)$$//;$(transform)' \ + -e 's/$$/$(EXEEXT)/' \ + `; \ + test -n "$$list" || exit 0; \ + echo " ( cd '$(DESTDIR)$(examplebindir)' && rm -f" $$files ")"; \ + cd "$(DESTDIR)$(examplebindir)" && rm -f $$files + +clean-examplebinPROGRAMS: + @list='$(examplebin_PROGRAMS)'; test -n "$$list" || exit 0; \ + echo " rm -f" $$list; \ + rm -f $$list || exit $$?; \ + test -n "$(EXEEXT)" || exit 0; \ + list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ + echo " rm -f" $$list; \ + rm -f $$list + +clean-noinstPROGRAMS: + @list='$(noinst_PROGRAMS)'; test -n "$$list" || exit 0; \ + echo " rm -f" $$list; \ + rm -f $$list || exit $$?; \ + test -n "$(EXEEXT)" || exit 0; \ + list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ + echo " rm -f" $$list; \ + rm -f $$list +callback/$(am__dirstamp): + @$(MKDIR_P) callback + @: > callback/$(am__dirstamp) +callback/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) callback/$(DEPDIR) + @: > callback/$(DEPDIR)/$(am__dirstamp) +callback/callback.$(OBJEXT): callback/$(am__dirstamp) \ + callback/$(DEPDIR)/$(am__dirstamp) + +callback/callback$(EXEEXT): $(callback_callback_OBJECTS) $(callback_callback_DEPENDENCIES) $(EXTRA_callback_callback_DEPENDENCIES) callback/$(am__dirstamp) + @rm -f callback/callback$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(callback_callback_OBJECTS) $(callback_callback_LDADD) $(LIBS) + +loader$(EXEEXT): $(loader_OBJECTS) $(loader_DEPENDENCIES) $(EXTRA_loader_DEPENDENCIES) + @rm -f loader$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(loader_OBJECTS) $(loader_LDADD) $(LIBS) +mandelbrot/$(am__dirstamp): + @$(MKDIR_P) mandelbrot + @: > mandelbrot/$(am__dirstamp) +mandelbrot/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) mandelbrot/$(DEPDIR) + @: > mandelbrot/$(DEPDIR)/$(am__dirstamp) +mandelbrot/mandelbrot.$(OBJEXT): mandelbrot/$(am__dirstamp) \ + mandelbrot/$(DEPDIR)/$(am__dirstamp) +mandelbrot/cpu_mandelbrot.$(OBJEXT): mandelbrot/$(am__dirstamp) \ + mandelbrot/$(DEPDIR)/$(am__dirstamp) + +mandelbrot/mandelbrot$(EXEEXT): $(mandelbrot_mandelbrot_OBJECTS) $(mandelbrot_mandelbrot_DEPENDENCIES) $(EXTRA_mandelbrot_mandelbrot_DEPENDENCIES) mandelbrot/$(am__dirstamp) + @rm -f mandelbrot/mandelbrot$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(mandelbrot_mandelbrot_OBJECTS) $(mandelbrot_mandelbrot_LDADD) $(LIBS) +mult/$(am__dirstamp): + @$(MKDIR_P) mult + @: > mult/$(am__dirstamp) +mult/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) mult/$(DEPDIR) + @: > mult/$(DEPDIR)/$(am__dirstamp) +mult/mult.$(OBJEXT): mult/$(am__dirstamp) \ + mult/$(DEPDIR)/$(am__dirstamp) +mult/cpu_mult.$(OBJEXT): mult/$(am__dirstamp) \ + mult/$(DEPDIR)/$(am__dirstamp) + +mult/mult$(EXEEXT): $(mult_mult_OBJECTS) $(mult_mult_DEPENDENCIES) $(EXTRA_mult_mult_DEPENDENCIES) mult/$(am__dirstamp) + @rm -f mult/mult$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(mult_mult_OBJECTS) $(mult_mult_LDADD) $(LIBS) +task_insert_color/$(am__dirstamp): + @$(MKDIR_P) task_insert_color + @: > task_insert_color/$(am__dirstamp) +task_insert_color/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) task_insert_color/$(DEPDIR) + @: > task_insert_color/$(DEPDIR)/$(am__dirstamp) +task_insert_color/task_insert_color.$(OBJEXT): \ + task_insert_color/$(am__dirstamp) \ + task_insert_color/$(DEPDIR)/$(am__dirstamp) + +task_insert_color/task_insert_color$(EXEEXT): $(task_insert_color_task_insert_color_OBJECTS) $(task_insert_color_task_insert_color_DEPENDENCIES) $(EXTRA_task_insert_color_task_insert_color_DEPENDENCIES) task_insert_color/$(am__dirstamp) + @rm -f task_insert_color/task_insert_color$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(task_insert_color_task_insert_color_OBJECTS) $(task_insert_color_task_insert_color_LDADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + -rm -f callback/*.$(OBJEXT) + -rm -f mandelbrot/*.$(OBJEXT) + -rm -f mult/*.$(OBJEXT) + -rm -f task_insert_color/*.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/loader-loader.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@callback/$(DEPDIR)/callback.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@mandelbrot/$(DEPDIR)/cpu_mandelbrot.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@mandelbrot/$(DEPDIR)/mandelbrot.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@mult/$(DEPDIR)/cpu_mult.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@mult/$(DEPDIR)/mult.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@task_insert_color/$(DEPDIR)/task_insert_color.Po@am__quote@ # am--include-marker + +$(am__depfiles_remade): + @$(MKDIR_P) $(@D) + @echo '# dummy' >$@-t && $(am__mv) $@-t $@ + +am--depfiles: $(am__depfiles_remade) + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ +@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +loader-loader.o: loader.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(loader_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT loader-loader.o -MD -MP -MF $(DEPDIR)/loader-loader.Tpo -c -o loader-loader.o `test -f 'loader.c' || echo '$(srcdir)/'`loader.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/loader-loader.Tpo $(DEPDIR)/loader-loader.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='loader.c' object='loader-loader.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(loader_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o loader-loader.o `test -f 'loader.c' || echo '$(srcdir)/'`loader.c + +loader-loader.obj: loader.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(loader_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT loader-loader.obj -MD -MP -MF $(DEPDIR)/loader-loader.Tpo -c -o loader-loader.obj `if test -f 'loader.c'; then $(CYGPATH_W) 'loader.c'; else $(CYGPATH_W) '$(srcdir)/loader.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/loader-loader.Tpo $(DEPDIR)/loader-loader.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='loader.c' object='loader-loader.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(loader_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o loader-loader.obj `if test -f 'loader.c'; then $(CYGPATH_W) 'loader.c'; else $(CYGPATH_W) '$(srcdir)/loader.c'; fi` + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + -rm -rf callback/.libs callback/_libs + -rm -rf mandelbrot/.libs mandelbrot/_libs + -rm -rf mult/.libs mult/_libs + -rm -rf task_insert_color/.libs task_insert_color/_libs + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-am +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-am + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-am + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +# Recover from deleted '.trs' file; this should ensure that +# "rm -f foo.log; make foo.trs" re-run 'foo.test', and re-create +# both 'foo.log' and 'foo.trs'. Break the recipe in two subshells +# to avoid problems with "make -n". +.log.trs: + rm -f $< $@ + $(MAKE) $(AM_MAKEFLAGS) $< + +# Leading 'am--fnord' is there to ensure the list of targets does not +# expand to empty, as could happen e.g. with make check TESTS=''. +am--fnord $(TEST_LOGS) $(TEST_LOGS:.log=.trs): $(am__force_recheck) +am--force-recheck: + @: + +$(TEST_SUITE_LOG): $(TEST_LOGS) + @$(am__set_TESTS_bases); \ + am__f_ok () { test -f "$$1" && test -r "$$1"; }; \ + redo_bases=`for i in $$bases; do \ + am__f_ok $$i.trs && am__f_ok $$i.log || echo $$i; \ + done`; \ + if test -n "$$redo_bases"; then \ + redo_logs=`for i in $$redo_bases; do echo $$i.log; done`; \ + redo_results=`for i in $$redo_bases; do echo $$i.trs; done`; \ + if $(am__make_dryrun); then :; else \ + rm -f $$redo_logs && rm -f $$redo_results || exit 1; \ + fi; \ + fi; \ + if test -n "$$am__remaking_logs"; then \ + echo "fatal: making $(TEST_SUITE_LOG): possible infinite" \ + "recursion detected" >&2; \ + elif test -n "$$redo_logs"; then \ + am__remaking_logs=yes $(MAKE) $(AM_MAKEFLAGS) $$redo_logs; \ + fi; \ + if $(am__make_dryrun); then :; else \ + st=0; \ + errmsg="fatal: making $(TEST_SUITE_LOG): failed to create"; \ + for i in $$redo_bases; do \ + test -f $$i.trs && test -r $$i.trs \ + || { echo "$$errmsg $$i.trs" >&2; st=1; }; \ + test -f $$i.log && test -r $$i.log \ + || { echo "$$errmsg $$i.log" >&2; st=1; }; \ + done; \ + test $$st -eq 0 || exit 1; \ + fi + @$(am__sh_e_setup); $(am__tty_colors); $(am__set_TESTS_bases); \ + ws='[ ]'; \ + results=`for b in $$bases; do echo $$b.trs; done`; \ + test -n "$$results" || results=/dev/null; \ + all=` grep "^$$ws*:test-result:" $$results | wc -l`; \ + pass=` grep "^$$ws*:test-result:$$ws*PASS" $$results | wc -l`; \ + fail=` grep "^$$ws*:test-result:$$ws*FAIL" $$results | wc -l`; \ + skip=` grep "^$$ws*:test-result:$$ws*SKIP" $$results | wc -l`; \ + xfail=`grep "^$$ws*:test-result:$$ws*XFAIL" $$results | wc -l`; \ + xpass=`grep "^$$ws*:test-result:$$ws*XPASS" $$results | wc -l`; \ + error=`grep "^$$ws*:test-result:$$ws*ERROR" $$results | wc -l`; \ + if test `expr $$fail + $$xpass + $$error` -eq 0; then \ + success=true; \ + else \ + success=false; \ + fi; \ + br='==================='; br=$$br$$br$$br$$br; \ + result_count () \ + { \ + if test x"$$1" = x"--maybe-color"; then \ + maybe_colorize=yes; \ + elif test x"$$1" = x"--no-color"; then \ + maybe_colorize=no; \ + else \ + echo "$@: invalid 'result_count' usage" >&2; exit 4; \ + fi; \ + shift; \ + desc=$$1 count=$$2; \ + if test $$maybe_colorize = yes && test $$count -gt 0; then \ + color_start=$$3 color_end=$$std; \ + else \ + color_start= color_end=; \ + fi; \ + echo "$${color_start}# $$desc $$count$${color_end}"; \ + }; \ + create_testsuite_report () \ + { \ + result_count $$1 "TOTAL:" $$all "$$brg"; \ + result_count $$1 "PASS: " $$pass "$$grn"; \ + result_count $$1 "SKIP: " $$skip "$$blu"; \ + result_count $$1 "XFAIL:" $$xfail "$$lgn"; \ + result_count $$1 "FAIL: " $$fail "$$red"; \ + result_count $$1 "XPASS:" $$xpass "$$red"; \ + result_count $$1 "ERROR:" $$error "$$mgn"; \ + }; \ + { \ + echo "$(PACKAGE_STRING): $(subdir)/$(TEST_SUITE_LOG)" | \ + $(am__rst_title); \ + create_testsuite_report --no-color; \ + echo; \ + echo ".. contents:: :depth: 2"; \ + echo; \ + for b in $$bases; do echo $$b; done \ + | $(am__create_global_log); \ + } >$(TEST_SUITE_LOG).tmp || exit 1; \ + mv $(TEST_SUITE_LOG).tmp $(TEST_SUITE_LOG); \ + if $$success; then \ + col="$$grn"; \ + else \ + col="$$red"; \ + test x"$$VERBOSE" = x || cat $(TEST_SUITE_LOG); \ + fi; \ + echo "$${col}$$br$${std}"; \ + echo "$${col}Testsuite summary"$(AM_TESTSUITE_SUMMARY_HEADER)"$${std}"; \ + echo "$${col}$$br$${std}"; \ + create_testsuite_report --maybe-color; \ + echo "$$col$$br$$std"; \ + if $$success; then :; else \ + echo "$${col}See $(subdir)/$(TEST_SUITE_LOG)$${std}"; \ + if test -n "$(PACKAGE_BUGREPORT)"; then \ + echo "$${col}Please report to $(PACKAGE_BUGREPORT)$${std}"; \ + fi; \ + echo "$$col$$br$$std"; \ + fi; \ + $$success || exit 1 + +check-TESTS: $(check_PROGRAMS) + @list='$(RECHECK_LOGS)'; test -z "$$list" || rm -f $$list + @list='$(RECHECK_LOGS:.log=.trs)'; test -z "$$list" || rm -f $$list + @test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) + @set +e; $(am__set_TESTS_bases); \ + log_list=`for i in $$bases; do echo $$i.log; done`; \ + trs_list=`for i in $$bases; do echo $$i.trs; done`; \ + log_list=`echo $$log_list`; trs_list=`echo $$trs_list`; \ + $(MAKE) $(AM_MAKEFLAGS) $(TEST_SUITE_LOG) TEST_LOGS="$$log_list"; \ + exit $$?; +recheck: all $(check_PROGRAMS) + @test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) + @set +e; $(am__set_TESTS_bases); \ + bases=`for i in $$bases; do echo $$i; done \ + | $(am__list_recheck_tests)` || exit 1; \ + log_list=`for i in $$bases; do echo $$i.log; done`; \ + log_list=`echo $$log_list`; \ + $(MAKE) $(AM_MAKEFLAGS) $(TEST_SUITE_LOG) \ + am__force_recheck=am--force-recheck \ + TEST_LOGS="$$log_list"; \ + exit $$? +check_deps/check_deps.sh.log: check_deps/check_deps.sh + @p='check_deps/check_deps.sh'; \ + b='check_deps/check_deps.sh'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +mult/mult_starpu.sh.log: mult/mult_starpu.sh + @p='mult/mult_starpu.sh'; \ + b='mult/mult_starpu.sh'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +task_insert_color/task_insert_color.sh.log: task_insert_color/task_insert_color.sh + @p='task_insert_color/task_insert_color.sh'; \ + b='task_insert_color/task_insert_color.sh'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +variable/variable.sh.log: variable/variable.sh + @p='variable/variable.sh'; \ + b='variable/variable.sh'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +vector_scal/vector_scal.sh.log: vector_scal/vector_scal.sh + @p='vector_scal/vector_scal.sh'; \ + b='vector_scal/vector_scal.sh'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +mandelbrot/mandelbrot.sh.log: mandelbrot/mandelbrot.sh + @p='mandelbrot/mandelbrot.sh'; \ + b='mandelbrot/mandelbrot.sh'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +callback/callback.sh.log: callback/callback.sh + @p='callback/callback.sh'; \ + b='callback/callback.sh'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +dependency/tag_dep.sh.log: dependency/tag_dep.sh + @p='dependency/tag_dep.sh'; \ + b='dependency/tag_dep.sh'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +dependency/task_dep.sh.log: dependency/task_dep.sh + @p='dependency/task_dep.sh'; \ + b='dependency/task_dep.sh'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +dependency/end_dep.sh.log: dependency/end_dep.sh + @p='dependency/end_dep.sh'; \ + b='dependency/end_dep.sh'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +axpy/axpy.sh.log: axpy/axpy.sh + @p='axpy/axpy.sh'; \ + b='axpy/axpy.sh'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +cholesky/cholesky.sh.log: cholesky/cholesky.sh + @p='cholesky/cholesky.sh'; \ + b='cholesky/cholesky.sh'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +gemm/gemm.sh.log: gemm/gemm.sh + @p='gemm/gemm.sh'; \ + b='gemm/gemm.sh'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +mult/mult.log: mult/mult$(EXEEXT) + @p='mult/mult$(EXEEXT)'; \ + b='mult/mult'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +task_insert_color/task_insert_color.log: task_insert_color/task_insert_color$(EXEEXT) + @p='task_insert_color/task_insert_color$(EXEEXT)'; \ + b='task_insert_color/task_insert_color'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +mandelbrot/mandelbrot.log: mandelbrot/mandelbrot$(EXEEXT) + @p='mandelbrot/mandelbrot$(EXEEXT)'; \ + b='mandelbrot/mandelbrot'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +callback/callback.log: callback/callback$(EXEEXT) + @p='callback/callback$(EXEEXT)'; \ + b='callback/callback'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +.test.log: + @p='$<'; \ + $(am__set_b); \ + $(am__check_pre) $(TEST_LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_TEST_LOG_DRIVER_FLAGS) $(TEST_LOG_DRIVER_FLAGS) -- $(TEST_LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +@am__EXEEXT_TRUE@.test$(EXEEXT).log: +@am__EXEEXT_TRUE@ @p='$<'; \ +@am__EXEEXT_TRUE@ $(am__set_b); \ +@am__EXEEXT_TRUE@ $(am__check_pre) $(TEST_LOG_DRIVER) --test-name "$$f" \ +@am__EXEEXT_TRUE@ --log-file $$b.log --trs-file $$b.trs \ +@am__EXEEXT_TRUE@ $(am__common_driver_flags) $(AM_TEST_LOG_DRIVER_FLAGS) $(TEST_LOG_DRIVER_FLAGS) -- $(TEST_LOG_COMPILE) \ +@am__EXEEXT_TRUE@ "$$tst" $(AM_TESTS_FD_REDIRECT) +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am + $(MAKE) $(AM_MAKEFLAGS) $(check_PROGRAMS) + $(MAKE) $(AM_MAKEFLAGS) check-TESTS +check: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) check-am +all-am: Makefile $(PROGRAMS) +installdirs: + for dir in "$(DESTDIR)$(examplebindir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) install-am +install-exec: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + -test -z "$(TEST_LOGS)" || rm -f $(TEST_LOGS) + -test -z "$(TEST_LOGS:.log=.trs)" || rm -f $(TEST_LOGS:.log=.trs) + -test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) + +clean-generic: + -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + -rm -f callback/$(DEPDIR)/$(am__dirstamp) + -rm -f callback/$(am__dirstamp) + -rm -f mandelbrot/$(DEPDIR)/$(am__dirstamp) + -rm -f mandelbrot/$(am__dirstamp) + -rm -f mult/$(DEPDIR)/$(am__dirstamp) + -rm -f mult/$(am__dirstamp) + -rm -f task_insert_color/$(DEPDIR)/$(am__dirstamp) + -rm -f task_insert_color/$(am__dirstamp) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." + -test -z "$(BUILT_SOURCES)" || rm -f $(BUILT_SOURCES) +clean: clean-am + +clean-am: clean-checkPROGRAMS clean-examplebinPROGRAMS clean-generic \ + clean-libtool clean-noinstPROGRAMS mostlyclean-am + +distclean: distclean-am + -rm -f ./$(DEPDIR)/loader-loader.Po + -rm -f callback/$(DEPDIR)/callback.Po + -rm -f mandelbrot/$(DEPDIR)/cpu_mandelbrot.Po + -rm -f mandelbrot/$(DEPDIR)/mandelbrot.Po + -rm -f mult/$(DEPDIR)/cpu_mult.Po + -rm -f mult/$(DEPDIR)/mult.Po + -rm -f task_insert_color/$(DEPDIR)/task_insert_color.Po + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: install-examplebinPROGRAMS + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f ./$(DEPDIR)/loader-loader.Po + -rm -f callback/$(DEPDIR)/callback.Po + -rm -f mandelbrot/$(DEPDIR)/cpu_mandelbrot.Po + -rm -f mandelbrot/$(DEPDIR)/mandelbrot.Po + -rm -f mult/$(DEPDIR)/cpu_mult.Po + -rm -f mult/$(DEPDIR)/mult.Po + -rm -f task_insert_color/$(DEPDIR)/task_insert_color.Po + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-examplebinPROGRAMS + +.MAKE: all check check-am install install-am install-exec \ + install-strip + +.PHONY: CTAGS GTAGS TAGS all all-am am--depfiles check check-TESTS \ + check-am clean clean-checkPROGRAMS clean-examplebinPROGRAMS \ + clean-generic clean-libtool clean-noinstPROGRAMS cscopelist-am \ + ctags ctags-am distclean distclean-compile distclean-generic \ + distclean-libtool distclean-tags distdir dvi dvi-am html \ + html-am info info-am install install-am install-data \ + install-data-am install-dvi install-dvi-am \ + install-examplebinPROGRAMS install-exec install-exec-am \ + install-html install-html-am install-info install-info-am \ + install-man install-pdf install-pdf-am install-ps \ + install-ps-am install-strip installcheck installcheck-am \ + installdirs maintainer-clean maintainer-clean-generic \ + mostlyclean mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool pdf pdf-am ps ps-am recheck tags tags-am \ + uninstall uninstall-am uninstall-examplebinPROGRAMS + +.PRECIOUS: Makefile + +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@.cu.o: +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ @$(MKDIR_P) `dirname $@` +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ $(V_mynvcc)grep 'extern *"C" *void *' $< | sed -ne 's/extern *"C" *void *\([a-zA-Z0-9_]*\) *(.*/void \1(void) {}/p' | $(CC) -x c - -o $@ -c + +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.cubin: +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) -cubin $< -o $@ $(NVCCFLAGS) + +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.o: +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) $< -c -o $@ $(NVCCFLAGS) +@STARPU_USE_HIP_TRUE@.hip.o: +@STARPU_USE_HIP_TRUE@ $(V_hipcc) $(HIPCC) $< -c -o $@ $(HIPCCFLAGS) + +STARPU_MPI_NP ?= 4 + +showcheckfailed: + @ for x in $(shell grep -l "^FAIL " $(TEST_LOGS) /dev/null 2>/dev/null) ; do cat $$x ; done + @RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i showcheckfailed || RET=1 ; \ + done ; \ + exit $$RET + +showfailed: + @! grep "^FAIL " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "ERROR: AddressSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "WARNING: AddressSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "ERROR: ThreadSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "WARNING: ThreadSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "ERROR: LeakSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "WARNING: LeakSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l " runtime error: " $(TEST_LOGS) /dev/null 2>/dev/null + @RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -s -C $$i showfailed || RET=1 ; \ + done ; \ + exit $$RET + +showcheck: + -cat $(TEST_LOGS) /dev/null + @! grep -q "ERROR: AddressSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q "WARNING: AddressSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q "ERROR: ThreadSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q "WARNING: ThreadSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q "ERROR: LeakSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q "WARNING: LeakSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q " runtime error: " $(TEST_LOGS) /dev/null + RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i showcheck || RET=1 ; \ + done ; \ + exit $$RET + +showsuite: + -cat $(TEST_SUITE_LOG) /dev/null + @! grep -q "ERROR: AddressSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q "WARNING: AddressSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q "ERROR: ThreadSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q "WARNING: ThreadSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q "ERROR: LeakSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q "WARNING: LeakSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q " runtime error: " $(TEST_SUITE_LOG) /dev/null + RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i showsuite || RET=1 ; \ + done ; \ + exit $$RET + +@STARPU_SIMGRID_TRUE@export STARPU_PERF_MODEL_DIR=$(abs_top_srcdir)/tools/perfmodels/sampling +@STARPU_SIMGRID_TRUE@export STARPU_HOSTNAME=mirage +@STARPU_SIMGRID_TRUE@export MALLOC_PERTURB_=0 + +@STARPU_SIMGRID_TRUE@env: +@STARPU_SIMGRID_TRUE@ @echo export STARPU_PERF_MODEL_DIR=$(STARPU_PERF_MODEL_DIR) +@STARPU_SIMGRID_TRUE@ @echo export STARPU_HOSTNAME=$(STARPU_HOSTNAME) +@STARPU_SIMGRID_TRUE@ @echo export MALLOC_PERTURB_=$(MALLOC_PERTURB_) + +@STARPU_SIMGRID_TRUE@export STARPU_SIMGRID=1 + +@STARPU_QUICK_CHECK_TRUE@export STARPU_QUICK_CHECK=1 + +@STARPU_LONG_CHECK_TRUE@export STARPU_LONG_CHECK=1 + +# +# Test loading goes through a lot of launchers: +# +# - $(LAUNCHER) is called first, to run the test through starpu_msexec, i.e. +# either mpirun or starpu_tcpipexec +# +# - $(LOADER), i.e. tests/loader, is then called to implement timeout, running +# gdb, etc. But if it detects that the test is a .sh script, it just executes +# it +# +# - $(STARPU_CHECK_LAUNCHER) $(STARPU_CHECK_LAUNCHER_ARGS) is called by loader +# to run the program through e.g. valgrind.sh +# +# When the program is a shell script, additionally: +# +# - $(STARPU_SUB_PARALLEL) is called to control parallelism (see below) +# +# - $(MS_LAUNCHER) is called to run the test through starpu_msexec +# +# - $(STARPU_LAUNCH) was set by tests/loader to its own path, to run the program +# through it. +# +# - $(STARPU_CHECK_LAUNCHER) $(STARPU_CHECK_LAUNCHER_ARGS) is called by loader +# + +export LAUNCHER +@HAVE_PARALLEL_TRUE@export STARPU_SUB_PARALLEL + +export MS_LAUNCHER + +LAUNCHER ?= +MS_LAUNCHER ?= +@STARPU_HAVE_WINDOWS_FALSE@LOADER ?= ./loader + +LSAN_OPTIONS ?= suppressions=$(abs_top_srcdir)/tools/dev/lsan/suppressions +TSAN_OPTIONS ?= suppressions=$(abs_top_srcdir)/tools/dev/tsan/starpu.suppr +export LSAN_OPTIONS +export TSAN_OPTIONS + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/julia/examples/axpy/axpy.jl b/julia/examples/axpy/axpy.jl new file mode 100644 index 0000000..1994d52 --- /dev/null +++ b/julia/examples/axpy/axpy.jl @@ -0,0 +1,99 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +using StarPU +using Printf +const EPSILON = 1e-6 + +function check(alpha, X, Y) + for i in 1:length(X) + expected_value = alpha * X[i] + 4.0 + if abs(Y[i] - expected_value) > expected_value * EPSILON + error("at ", i, ", ", alpha, "*", X[i], "+4.0=", Y[i], ", expected ", expected_value) + end + end +end + +@target STARPU_CPU+STARPU_CUDA +@codelet function axpy(X :: Vector{Float32}, Y :: Vector{Float32}, alpha ::Float32) :: Nothing + STARPU_SAXPY(length(X), alpha, X, 1, Y, 1) + return +end + +function axpy(N, NBLOCKS, alpha, display = true) + X = Array(fill(1.0f0, N)) + Y = Array(fill(4.0f0, N)) + + starpu_memory_pin(X) + starpu_memory_pin(Y) + + block_filter = starpu_data_filter(STARPU_VECTOR_FILTER_BLOCK, NBLOCKS) + + if display + println("BEFORE x[0] = ", X[1]) + println("BEFORE y[0] = ", Y[1]) + end + + t_start = time_ns() + + @starpu_block let + hX,hY = starpu_data_register(X, Y) + + starpu_data_partition(hX, block_filter) + starpu_data_partition(hY, block_filter) + + for b in 1:NBLOCKS + starpu_task_insert(codelet_name = "axpy", + handles = [hX[b], hY[b]], + cl_arg = (Float32(alpha),), + tag = starpu_tag_t(b), + modes = [STARPU_R, STARPU_RW]) + end + + starpu_task_wait_for_all() + end + + t_end = time_ns() + + timing = (t_end-t_start)/1000 + + if display + @printf("timing -> %d us %.2f MB/s\n", timing, 3*N*4/timing) + println("AFTER y[0] = ", Y[1], " (ALPHA=", alpha, ")") + end + + check(alpha, X, Y) + + starpu_memory_unpin(X) + starpu_memory_unpin(Y) +end + +function main() + N = 16 * 1024 * 1024 + NBLOCKS = 8 + alpha = 3.41 + + starpu_init() + starpu_cublas_init() + + # warmup + axpy(10, 1, alpha, false) + + axpy(N, NBLOCKS, alpha) + + starpu_shutdown() +end + +main() diff --git a/julia/examples/axpy/axpy.sh b/julia/examples/axpy/axpy.sh new file mode 100755 index 0000000..7c14881 --- /dev/null +++ b/julia/examples/axpy/axpy.sh @@ -0,0 +1,19 @@ +#!/bin/bash +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +$(dirname $0)/../execute.sh axpy/axpy.jl + diff --git a/julia/examples/black_scholes/black_scholes.jl b/julia/examples/black_scholes/black_scholes.jl new file mode 100644 index 0000000..1904364 --- /dev/null +++ b/julia/examples/black_scholes/black_scholes.jl @@ -0,0 +1,208 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# Copyright (C) 2019-2019 Mael Keryell +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +import Libdl +using StarPU + +@target STARPU_CPU+STARPU_CUDA +@codelet function black_scholes(data ::Matrix{Float64}, res ::Matrix{Float64}) :: Float32 + + widthn ::Int64 = width(data) + + # data[1,...] -> S + # data[2,...] -> K + # data[3,...] -> r + # data[4,...] -> T + # data[4,...] -> sig + + p ::Float64 = 0.2316419 + b1 ::Float64 = 0.31938153 + b2 ::Float64 = -0.356563782 + b3 ::Float64 = 1.781477937 + b4 ::Float64 = -1.821255978 + b5 ::Float64 = 1.330274428 + + + @parallel for i = 1:widthn + + + d1 ::Float64 = (log(data[1,i] / data[2,i]) + (data[3,i] + pow(data[5,i], 2.0) * 0.5) * data[4,i]) / (data[5,i] * sqrt(data[4,i])) + d2 ::Float64 = (log(data[1,i] / data[2,i]) + (data[3,i] - pow(data[5,i], 2.0) * 0.5) * data[4,i]) / (data[5,i] * sqrt(data[4,i])) + + + + + f ::Float64 = 0 + ff ::Float64 = 0 + s1 ::Float64 = 0 + s2 ::Float64 = 0 + s3 ::Float64 = 0 + s4 ::Float64 = 0 + s5 ::Float64 = 0 + sz ::Float64 = 0 + + + + + ######## Compute normcdf of d1 + + normd1p ::Float64 = 0 + normd1n ::Float64 = 0 + + boold1 ::Int64 = (d1 >= 0) + (d1 <= 0) + + if (boold1 >= 2) + normd1p = 0.5 + normd1n = 0.5 + else + tmp1 ::Float64 = abs(d1) + f = 1 / sqrt(2 * M_PI) + ff = exp(-pow(tmp1, 2.0) / 2) * f + s1 = b1 / (1 + p * tmp1) + s2 = b2 / pow((1 + p * tmp1), 2.0) + s3 = b3 / pow((1 + p * tmp1), 3.0) + s4 = b4 / pow((1 + p * tmp1), 4.0) + s5 = b5 / pow((1 + p * tmp1), 5.0) + sz = ff * (s1 + s2 + s3 + s4 + s5) + + if (d1 > 0) + normd1p = 1 - sz # normcdf(d1) + normd1n = sz # normcdf(-d1) + else + normd1p = sz + normd1n = 1 - sz + end + end + ######## + + + ######## Compute normcdf of d2 + normd2p ::Float64 = 0 + normd2n ::Float64 = 0 + + boold2 ::Int64 = (d2 >= 0) + (d2 <= 0) + + if (boold2 >= 2) + normd2p = 0.5 + normd2n = 0.5 + else + tmp2 ::Float64 = abs(d2) + f = 1 / sqrt(2 * M_PI) + ff = exp(-pow(tmp2, 2.0) / 2) * f + s1 = b1 / (1 + p * tmp2) + s2 = b2 / pow((1 + p * tmp2), 2.0) + s3 = b3 / pow((1 + p * tmp2), 3.0) + s4 = b4 / pow((1 + p * tmp2), 4.0) + s5 = b5 / pow((1 + p * tmp2), 5.0) + sz = ff * (s1 + s2 + s3 + s4 + s5) + + + if (d2 > 0) + normd2p = 1 - sz # normcdf(d2) + normd2n = sz # normcdf(-d2) + else + normd2p = sz + normd2n = 1 - sz + end + end + # normd1p = (1 + erf(d1/sqrt(2.0)))/2.0 + # normd1n = (1 + erf(-d1/sqrt(2.0)))/2.0 + + # normd2p = (1 + erf(d2/sqrt(2.0)))/2.0 + # normd2n = (1 + erf(-d2/sqrt(2.0)))/2.0 + + res[1,i] = data[1,i] * (normd1p) - data[2,i]*exp(-data[3,i]*data[4,i]) * (normd2p) # S * N(d1) - r*exp(-r*T) * norm(d2) + res[2,i] = -data[1,i] * (normd1n) + data[2,i]*exp(-data[3,i]*data[4,i]) * (normd2n) # -S * N(-d1) + r*exp(-r*T) * norm(-d2) + + end + return 0 +end + +starpu_init() + +function black_scholes_starpu(data ::Matrix{Float64}, res ::Matrix{Float64}, nslices ::Int64) + vert = StarpuDataFilter(STARPU_MATRIX_FILTER_VERTICAL_BLOCK, nslices) + + @starpu_block let + dat_handle, res_handle = starpu_data_register(data, res) + + starpu_data_partition(dat_handle, vert) + starpu_data_partition(res_handle, vert) + + #Compute the price of call and put option in the res matrix + @starpu_sync_tasks for task in (1:nslices) + @starpu_async_cl black_scholes(dat_handle[task], res_handle[task]) [STARPU_RW, STARPU_RW] + end + end + return 0 +end + + +function init_data(data, data_nbr); + for i in 1:data_nbr + data[1,i] = rand(Float64) * 100 + data[2,i] = rand(Float64) * 100 + data[3,i] = rand(Float64) + data[4,i] = rand(Float64) * 10 + data[5,i] = rand(Float64) * 10 + end + return data +end + + + +function median_times(data_nbr, nslices, nbr_tests) + + data ::Matrix{Float64} = zeros(5, data_nbr) + # data[1,1] = 100.0 + # data[2,1] = 100.0 + # data[3,1] = 0.05 + # data[4,1] = 1.0 + # data[5,1] = 0.2 + + + res ::Matrix{Float64} = zeros(2, data_nbr) + + exec_times ::Vector{Float64} = [0. for i in 1:nbr_tests] + + for i = 1:nbr_tests + + init_data(data, data_nbr) + + tic() + black_scholes_starpu(data, res, nslices); + t = toq() + + exec_times[i] = t + end + sort!(exec_times) + # println(data) + # println(res) + + return exec_times[1 + div(nbr_tests - 1, 2)] +end + +function display_times(start_nbr, step_nbr, stop_nbr, nslices, nbr_tests) + i = 1 + open("black_scholes_times.dat", "w") do f + for data_nbr in (start_nbr : step_nbr : stop_nbr) + t = median_times(data_nbr, nslices, nbr_tests) + println("Number of data:\n$data_nbr\nTimes:\njl: $t\nC: $(mtc[i])\nGen: $(mtcgen[i])") + write(f, "$data_nbr $(t)\n") + i = i + 1 + end + end +end diff --git a/julia/examples/callback/callback.c b/julia/examples/callback/callback.c new file mode 100644 index 0000000..43453bc --- /dev/null +++ b/julia/examples/callback/callback.c @@ -0,0 +1,93 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * This is an example of using a callback. We submit a task, whose callback + * submits another task (without any callback). + */ + +#include + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +starpu_data_handle_t handle; + +void cpu_codelet(void *descr[], void *_args) +{ + (void)_args; + int *val = (int *)STARPU_VARIABLE_GET_PTR(descr[0]); + + *val += 1; +} + +struct starpu_codelet cl = +{ + .modes = { STARPU_RW }, + .cpu_funcs = {cpu_codelet}, + .cpu_funcs_name = {"cpu_codelet"}, + .nbuffers = 1, + .name = "callback" +}; + +void callback_func(void *callback_arg) +{ + int ret; + + (void)callback_arg; + + struct starpu_task *task = starpu_task_create(); + task->cl = &cl; + task->handles[0] = handle; + + ret = starpu_task_submit(task); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); +} + +int main(void) +{ + int v=40; + int ret; + + ret = starpu_init(NULL); + if (ret == -ENODEV) + return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + starpu_variable_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)&v, sizeof(int)); + + struct starpu_task *task = starpu_task_create(); + task->cl = &cl; + task->callback_func = callback_func; + task->callback_arg = NULL; + task->handles[0] = handle; + + ret = starpu_task_submit(task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + starpu_task_wait_for_all(); + starpu_data_unregister(handle); + + FPRINTF(stderr, "v -> %d\n", v); + + starpu_shutdown(); + + return (v == 42) ? 0 : 1; + +enodev: + starpu_shutdown(); + return 77; +} diff --git a/julia/examples/callback/callback.jl b/julia/examples/callback/callback.jl new file mode 100644 index 0000000..9464e2e --- /dev/null +++ b/julia/examples/callback/callback.jl @@ -0,0 +1,77 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +using StarPU + +@target STARPU_CPU +@codelet function variable(val ::Ref{Int32}) :: Nothing + val[] = val[] + 1 + + return +end + +function callback(args) + cl = args[1] + handles = args[2] + + task = starpu_task(cl = cl, handles=handles) + starpu_task_submit(task) +end + +function variable_with_starpu(val ::Ref{Int32}) + perfmodel = starpu_perfmodel( + perf_type = starpu_perfmodel_type(STARPU_HISTORY_BASED), + symbol = "history_perf" + ) + + cl = starpu_codelet( + cpu_func = "variable", + modes = [STARPU_RW], + perfmodel = perfmodel + ) + + @starpu_block let + hVal = starpu_data_register(val) + + starpu_task_insert(codelet_name = "variable", + cl = cl, + handles = [hVal], + callback = callback, + callback_arg = (cl, [hVal])) + + starpu_task_wait_for_all() + end +end + +function display() + v = Ref(Int32(40)) + + variable_with_starpu(v) + + println("variable -> ", v[]) + if v[] == 42 + println("result is correct") + else + error("result is incorret") + end +end + +# Disable garbage collector because of random segfault/hang when using mutex. +# This issue should be solved with Julia release 1.5. +GC.enable(false) +starpu_init() +display() +starpu_shutdown() +GC.enable(true) diff --git a/julia/examples/callback/callback.sh b/julia/examples/callback/callback.sh new file mode 100755 index 0000000..a9b6001 --- /dev/null +++ b/julia/examples/callback/callback.sh @@ -0,0 +1,19 @@ +#!/bin/bash +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +$(dirname $0)/../execute.sh callback/callback.jl + diff --git a/julia/examples/check_deps/check_deps.jl b/julia/examples/check_deps/check_deps.jl new file mode 100644 index 0000000..9baaf15 --- /dev/null +++ b/julia/examples/check_deps/check_deps.jl @@ -0,0 +1,32 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +import Pkg + +try + using CBinding + using Clang + using ThreadPools +catch + Pkg.activate((@__DIR__)*"/../..") + Pkg.instantiate() + using Clang + using CBinding + using ThreadPools +end + +using StarPU + +starpu_translate_headers() diff --git a/julia/examples/check_deps/check_deps.sh b/julia/examples/check_deps/check_deps.sh new file mode 100755 index 0000000..091925e --- /dev/null +++ b/julia/examples/check_deps/check_deps.sh @@ -0,0 +1,20 @@ +#!/bin/bash +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +$(dirname $0)/../execute.sh check_deps/check_deps.jl + + diff --git a/julia/examples/cholesky/cholesky.sh b/julia/examples/cholesky/cholesky.sh new file mode 100755 index 0000000..b0ac5af --- /dev/null +++ b/julia/examples/cholesky/cholesky.sh @@ -0,0 +1,20 @@ +#!/bin/bash +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +$(dirname $0)/../execute.sh cholesky/cholesky_native.jl -quickcheck +$(dirname $0)/../execute.sh cholesky/cholesky_implicit.jl -quickcheck +$(dirname $0)/../execute.sh cholesky/cholesky_tag.jl -quickcheck diff --git a/julia/examples/cholesky/cholesky_codelets.jl b/julia/examples/cholesky/cholesky_codelets.jl new file mode 100644 index 0000000..3c7772a --- /dev/null +++ b/julia/examples/cholesky/cholesky_codelets.jl @@ -0,0 +1,52 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +chol_model_potrf = starpu_perfmodel( + perf_type = starpu_perfmodel_type(STARPU_HISTORY_BASED), + symbol = "chol_model_potrf" +) + +chol_model_trsm = starpu_perfmodel( + perf_type = starpu_perfmodel_type(STARPU_HISTORY_BASED), + symbol = "chol_model_trsm" +) + +chol_model_gemm = starpu_perfmodel( + perf_type = starpu_perfmodel_type(STARPU_HISTORY_BASED), + symbol = "chol_model_gemm" +) + +cl_potrf = starpu_codelet( + cpu_func = "potrf", + cuda_func = "potrf", + modes = [STARPU_RW], + color = 0xffff00, + perfmodel = chol_model_potrf +) +cl_trsm = starpu_codelet( + cpu_func = "trsm", + cuda_func = "trsm", + modes = [STARPU_R, STARPU_RW], + color = 0x8080ff, + perfmodel = chol_model_trsm +) +cl_gemm = starpu_codelet( + cpu_func = "gemm", + cuda_func = "gemm", + modes = [STARPU_R, STARPU_R, STARPU_RW], + color = 0x00ff00, + perfmodel = chol_model_gemm +) diff --git a/julia/examples/cholesky/cholesky_common.jl b/julia/examples/cholesky/cholesky_common.jl new file mode 100644 index 0000000..7ceec8d --- /dev/null +++ b/julia/examples/cholesky/cholesky_common.jl @@ -0,0 +1,166 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +# Standard kernels for the Cholesky factorization + +@target STARPU_CPU+STARPU_CUDA +@codelet function potrf(sub11 :: Matrix{Float32}) :: Nothing + nx :: Int32 = width(sub11) + ld :: Int32 = ld(sub11) + + for z in 0:nx-1 + lambda11 :: Float32 = sqrt(sub11[z+1,z+1]) + sub11[z+1,z+1] = lambda11 + + alpha ::Float32 = 1.0f0 / lambda11 + X :: Vector{Float32} = view(sub11, z+2:z+2+(nx-z-2), z+1) + STARPU_SSCAL(nx-z-1, alpha, X, 1) + + alpha = -1.0f0 + A :: Matrix{Float32} = view(sub11, z+2:z+2+(nx-z-2), z+2:z+2+(nx-z-2)) + STARPU_SSYR("L", nx-z-1, alpha, X, 1, A, ld) + end + return +end + +@target STARPU_CPU+STARPU_CUDA +@codelet function trsm(sub11 :: Matrix{Float32}, + sub21 :: Matrix{Float32}) :: Nothing + ld11 :: Int32 = ld(sub11) + ld21 :: Int32 = ld(sub21) + nx21 :: Int32 = width(sub21) + ny21 :: Int32 = height(sub21) + alpha :: Float32 = 1.0f0 + STARPU_STRSM("R", "L", "T", "N", nx21, ny21, alpha, sub11, ld11, sub21, ld21) + return +end + +@target STARPU_CPU+STARPU_CUDA +@codelet function gemm(left :: Matrix{Float32}, + right :: Matrix{Float32}, + center :: Matrix{Float32}) :: Nothing + dx :: Int32 = width(center) + dy :: Int32 = height(center) + dz :: Int32 = width(left) + ld21 :: Int32 = ld(left) + ld12 :: Int32 = ld(center) + ld22 :: Int32 = ld(right) + alpha :: Float32 = -1.0f0 + beta :: Float32 = 1.0f0 + STARPU_SGEMM("N", "T", dy, dx, dz, alpha, left, ld21, right, ld12, beta, center, ld22) + return +end + +@inline function tag_potrf(k) + return starpu_tag_t((UInt64(1)<<60) | UInt64(k)) +end + +@inline function tag_trsm(k, j) + return starpu_tag_t((UInt64(3)<<60) | (UInt64(k)<<32) | UInt64(j)) +end + +@inline function tag_gemm(k, i, j) + return starpu_tag_t((UInt64(4)<<60) | (UInt64(k)<<32) | (UInt64(i)<<16) | UInt64(j)) +end + +function check(mat::Matrix{Float32}) + size_p = size(mat, 1) + + for i in 1:size_p + for j in 1:size_p + if j > i + mat[i, j] = 0.0f0 + end + end + end + + test_mat ::Matrix{Float32} = zeros(Float32, size_p, size_p) + + syrk!('L', 'N', 1.0f0, mat, 0.0f0, test_mat) + + for i in 1:size_p + for j in 1:size_p + if j <= i + orig = (1.0f0/(1.0f0+(i-1)+(j-1))) + ((i == j) ? 1.0f0*size_p : 0.0f0) + err = abs(test_mat[i,j] - orig) / orig + if err > 0.0001 + got = test_mat[i,j] + expected = orig + error("[$i, $j] -> $got != $expected (err $err)") + end + end + end + end + + println(stderr, "Verification successful !") +end + +function clean_tags(nblocks) + for k in 1:nblocks + starpu_tag_remove(tag_potrf(k)) + + for m in k+1:nblocks + starpu_tag_remove(tag_trsm(k, m)) + + for n in k+1:nblocks + if n <= m + starpu_tag_remove(tag_gemm(k, m, n)) + end + end + end + end +end + +function main(size_p :: Int, nblocks :: Int; verify = false, verbose = false) + mat :: Matrix{Float32} = zeros(Float32, size_p, size_p) + + # create a simple definite positive symetric matrix + # Hilbert matrix h(i,j) = 1/(i+j+1) + + for i in 1:size_p + for j in 1:size_p + mat[i, j] = 1.0f0 / (1.0f0+(i-1)+(j-1)) + ((i == j) ? 1.0f0*size_p : 0.0f0) + end + end + + if verbose + display(mat) + end + + starpu_memory_pin(mat) + + t_start = time_ns() + + cholesky(mat, size_p, nblocks) + + t_end = time_ns() + + starpu_memory_unpin(mat) + + flop = (1.0*size_p*size_p*size_p)/3.0 + time_ms = (t_end-t_start) / 1e6 + gflops = flop/(time_ms*1000)/1000 + println("$size_p\t$time_ms\t$gflops") + + clean_tags(nblocks) + + if verbose + display(mat) + end + + if verify + check(mat) + end +end diff --git a/julia/examples/cholesky/cholesky_implicit.jl b/julia/examples/cholesky/cholesky_implicit.jl new file mode 100644 index 0000000..a181f73 --- /dev/null +++ b/julia/examples/cholesky/cholesky_implicit.jl @@ -0,0 +1,71 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +using StarPU +using LinearAlgebra.BLAS + +include("cholesky_common.jl") + +function cholesky(mat :: Matrix{Float32}, size, nblocks) + include("cholesky_codelets.jl") + + horiz = starpu_data_filter(STARPU_MATRIX_FILTER_BLOCK, nblocks) + vert = starpu_data_filter(STARPU_MATRIX_FILTER_VERTICAL_BLOCK, nblocks) + + @starpu_block let + h_mat = starpu_data_register(mat) + starpu_data_map_filters(h_mat, horiz, vert) + + for k in 1:nblocks + + starpu_iteration_push(k) + + starpu_task_insert(cl = cl_potrf, handles = [h_mat[k, k]], tag_only = tag11(k)) + + for m in k+1:nblocks + starpu_task_insert(cl = cl_trsm, handles = [h_mat[k, k], h_mat[m, k]], tag_only = tag_trsm(m, k)) + end + starpu_data_wont_use(h_mat[k, k]) + + for m in k+1:nblocks + for n in k+1:nblocks + if n <= m + starpu_task_insert(cl = cl_gemm, handles = [h_mat[m, k], h_mat[n, k], h_mat[m, n]], tag_only= tag_gemm(k, m, n)) + end + end + starpu_data_wont_use(h_mat[m, k]) + end + + starpu_iteration_pop() + end + + starpu_task_wait_for_all() + end +end + +starpu_init() +starpu_cublas_init() + +println("# size\tms\tGFlops") + +if length(ARGS) > 0 && ARGS[1] == "-quickcheck" + main(1024, 8, verify = true) +else + for size in 1024:1024:15360 + main(size, 16) + end +end + +starpu_shutdown() diff --git a/julia/examples/cholesky/cholesky_native.jl b/julia/examples/cholesky/cholesky_native.jl new file mode 100644 index 0000000..8dc6de6 --- /dev/null +++ b/julia/examples/cholesky/cholesky_native.jl @@ -0,0 +1,94 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +using LinearAlgebra + +function check(mat::Matrix{Float32}) + size_p = size(mat, 1) + + for i in 1:size_p + for j in 1:size_p + if j < i + mat[i, j] = 0.0f0 + end + end + end + + test_mat ::Matrix{Float32} = zeros(Float32, size_p, size_p) + + BLAS.syrk!('L', 'T', 1.0f0, mat, 0.0f0, test_mat) + + for i in 1:size_p + for j in 1:size_p + if j <= i + orig = (1.0f0/(1.0f0+(i-1)+(j-1))) + ((i == j) ? 1.0f0*size_p : 0.0f0) + err = abs(test_mat[i,j] - orig) / orig + if err > 0.0001 + got = test_mat[i,j] + expected = orig + error("[$i, $j] -> $got != $expected (err $err)") + end + end + end + end + + println(stderr, "Verification successful !") +end + +function main(size_p :: Int; verify = false, verbose = false) + mat = zeros(Float32, size_p, size_p) + # create a simple definite positive symetric matrix + # Hilbert matrix h(i,j) = 1/(i+j+1) + + for i in 1:size_p + for j in 1:size_p + mat[i, j] = 1.0f0 / (1.0f0+(i-1)+(j-1)) + ((i == j) ? 1.0f0*size_p : 0.0f0) + end + end + + if verbose + display(mat) + end + + t_start = time_ns() + + cholesky!(mat) + + t_end = time_ns() + + flop = (1.0*size_p*size_p*size_p)/3.0 + time_ms = (t_end-t_start) / 1e6 + gflops = flop/(time_ms*1000)/1000 + println("$size_p\t$time_ms\t$gflops") + + if verbose + display(mat) + end + + if verify + check(mat) + end +end + +println("# size\tms\tGFlops") + +if length(ARGS) > 0 && ARGS[1] == "-quickcheck" + main(1024, verify = true) +else + for size in 1024:1024:15360 + main(size) + end +end + diff --git a/julia/examples/cholesky/cholesky_tag.jl b/julia/examples/cholesky/cholesky_tag.jl new file mode 100644 index 0000000..005bf9e --- /dev/null +++ b/julia/examples/cholesky/cholesky_tag.jl @@ -0,0 +1,93 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +using StarPU +using LinearAlgebra.BLAS + +include("cholesky_common.jl") + +function cholesky(mat :: Matrix{Float32}, size, nblocks) + include("cholesky_codelets.jl") + + horiz = starpu_data_filter(STARPU_MATRIX_FILTER_BLOCK, nblocks) + vert = starpu_data_filter(STARPU_MATRIX_FILTER_VERTICAL_BLOCK, nblocks) + + @starpu_block let + h_mat = starpu_data_register(mat) + starpu_data_set_sequential_consistency_flag(h_mat, 0) + starpu_data_map_filters(h_mat, horiz, vert) + + entry_task = starpu_task(cl = cl_potrf, + handles = [h_mat[1, 1]], + tag = tag_potrf(1)) + + for k in 1:nblocks + + starpu_iteration_push(k) + + if k > 1 + # enforce dependencies... + starpu_tag_declare_deps(tag_potrf(k), tag_gemm(k-1, k, k)) + starpu_task_insert(cl = cl_potrf, + handles = [h_mat[k, k]], + tag = tag_potrf(k)) + end + + for m in k+1:nblocks + # enforce dependencies... + if k > 1 + starpu_tag_declare_deps(tag_trsm(k, m), tag_potrf(k), tag_gemm(k-1, m, k)) + else + starpu_tag_declare_deps(tag_trsm(k, m), tag_potrf(k)) + end + + starpu_task_insert(cl = cl_trsm, handles = [h_mat[k, k], h_mat[m, k]], tag = tag_trsm(k, m)) + + for n in k+1:nblocks + if n <= m + # enforce dependencies... + if k > 1 + starpu_tag_declare_deps(tag_gemm(k, m, n), tag_gemm(k-1, m, n), tag_trsm(k, n), tag_trsm(k, m)) + else + starpu_tag_declare_deps(tag_gemm(k, m, n), tag_trsm(k, n), tag_trsm(k, m)) + end + + starpu_task_insert(cl = cl_gemm, handles = [h_mat[m, k], h_mat[n, k], h_mat[m, n]], tag = tag_gemm(k, m, n)) + end + end + end + + starpu_iteration_pop() + end + + starpu_task_submit(entry_task) + starpu_tag_wait(tag_potrf(nblocks)) + end +end + +starpu_init() +starpu_cublas_init() + +println("# size\tms\tGFlops") + +if length(ARGS) > 0 && ARGS[1] == "-quickcheck" + main(1024, 8, verify = true) +else + for size in 1024:1024:15360 + main(size, 16) + end +end + +starpu_shutdown() diff --git a/julia/examples/dependency/end_dep.jl b/julia/examples/dependency/end_dep.jl new file mode 100644 index 0000000..6a686f4 --- /dev/null +++ b/julia/examples/dependency/end_dep.jl @@ -0,0 +1,104 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +using StarPU + +@target STARPU_CPU +@codelet function codeletA() :: Nothing + # print("[Task A] Value = ", val[]); + # do nothing +end + +@target STARPU_CPU +@codelet function codeletB(val ::Ref{Int32}) :: Nothing + # println("[Task B] Value = ", val[]); + val[] = val[] *2 +end + +function callbackB(task) + sleep(1) + starpu_task_end_dep_release(task) +end + +@target STARPU_CPU +@codelet function codeletC(val ::Ref{Int32}) :: Nothing + # println("[Task C] Value = ", val[]); + val[] = val[] *2 +end + +function callbackC(task) + starpu_task_end_dep_release(task) +end + + +function main() + value = Ref(Int32(12)) + + @starpu_block let + perfmodel = starpu_perfmodel( + perf_type = starpu_perfmodel_type(STARPU_HISTORY_BASED), + symbol = "history_perf" + ) + + clA = starpu_codelet( + cpu_func = "codeletA", + perfmodel = perfmodel + ) + clB = starpu_codelet( + cpu_func = "codeletB", + modes = [STARPU_RW], + perfmodel = perfmodel + ) + clC = starpu_codelet( + cpu_func = "codeletC", + modes = [STARPU_RW], + perfmodel = perfmodel + ) + + handle = starpu_data_register(value) + + starpu_data_set_sequential_consistency_flag(handle, 0) + + taskA = starpu_task(cl = clA, detach=0) + taskB = starpu_task(cl = clB, handles = [handle], callback=callbackB, callback_arg=taskA) + taskC = starpu_task(cl = clC, handles = [handle], callback=callbackC, callback_arg=taskA) + + starpu_task_end_dep_add(taskA, 2) + starpu_task_declare_deps(taskC, taskB) + + starpu_task_submit(taskA) + starpu_task_submit(taskB) + starpu_task_submit(taskC) + starpu_task_wait(taskA) + + starpu_data_acquire_on_node(handle, STARPU_MAIN_RAM, STARPU_R); + # Waiting for taskA should have also waited for taskB and taskC + if value[] != 48 + error("Incorrect value $(value[]) (expected 48)") + end + starpu_data_release_on_node(handle, STARPU_MAIN_RAM); + end + + + println("Value = ", value[]) +end + +# Disable garbage collector because of random segfault/hang when using mutex. +# This issue should be solved with Julia release 1.5. +GC.enable(false) +starpu_init() +main() +starpu_shutdown() +GC.enable(true) diff --git a/julia/examples/dependency/end_dep.sh b/julia/examples/dependency/end_dep.sh new file mode 100755 index 0000000..bb2705c --- /dev/null +++ b/julia/examples/dependency/end_dep.sh @@ -0,0 +1,18 @@ +#!/bin/bash +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +$(dirname $0)/../execute.sh dependency/end_dep.jl diff --git a/julia/examples/dependency/tag_dep.jl b/julia/examples/dependency/tag_dep.jl new file mode 100644 index 0000000..00920fd --- /dev/null +++ b/julia/examples/dependency/tag_dep.jl @@ -0,0 +1,122 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +using StarPU + +@target STARPU_CPU +@codelet function codeletA(val ::Ref{Int32}) :: Nothing + # print("[Task A] Value = ", val[]); + val[] = val[] * 2 +end + +function callbackA(arg) + clB = arg[1] + handle = arg[2] + tagHoldC = arg[3] + + taskB = starpu_task(cl = clB, handles = [handle], + callback = starpu_tag_notify_from_apps, + callback_arg = tagHoldC, + sequential_consistency=false) + + starpu_task_submit(taskB) +end + +@target STARPU_CPU +@codelet function codeletB(val ::Ref{Int32}) :: Nothing + # println("[Task B] Value = ", val[]); + val[] = val[] +1 +end + +@target STARPU_CPU +@codelet function codeletC(val ::Ref{Int32}) :: Nothing + # println("[Task C] Value = ", val[]); + val[] = val[] *2 +end + + +# Submit taskA and hold it +# Submit taskC and hold it +# Release taskA +# Execute taskA --> callback: submit taskB +# Execute taskB --> callback: release taskC +# +# All three tasks use the same data in RW, taskB is submitted after +# taskC, so taskB should normally only execute after taskC but as the +# sequential consistency for (taskB, data) is unset, taskB can +# execute straightaway +function main() + value = Ref(Int32(12)) + + @starpu_block let + tagHoldA :: starpu_tag_t = 32 + tagHoldC :: starpu_tag_t = 84 + tagA :: starpu_tag_t = 421 + tagC :: starpu_tag_t = 842 + + starpu_tag_declare_deps(tagA, tagHoldA) + starpu_tag_declare_deps(tagC, tagHoldC) + + perfmodel = starpu_perfmodel( + perf_type = starpu_perfmodel_type(STARPU_HISTORY_BASED), + symbol = "history_perf" + ) + + clA = starpu_codelet( + cpu_func = "codeletA", + modes = [STARPU_RW], + perfmodel = perfmodel + ) + clB = starpu_codelet( + cpu_func = "codeletB", + modes = [STARPU_RW], + perfmodel = perfmodel + ) + clC = starpu_codelet( + cpu_func = "codeletC", + modes = [STARPU_RW], + perfmodel = perfmodel + ) + + handle = starpu_data_register(value) + + taskA = starpu_task(cl = clA, handles = [handle], tag = tagA, + callback = callbackA, + callback_arg=(clB, handle, tagHoldC)) + starpu_task_submit(taskA) + + taskC = starpu_task(cl = clC, handles = [handle], tag = tagC) + starpu_task_submit(taskC) + + # Release taskA (we want to make sure it will execute after taskC has been submitted) + starpu_tag_notify_from_apps(tagHoldA) + + starpu_task_wait_for_all() + end + + if value[] != 50 + error("Incorrect value $(value[]) (expected 50)") + end + + println("Value = ", value[]) +end + +# Disable garbage collector because of random segfault/hang when using mutex. +# This issue should be solved with Julia release 1.5. +GC.enable(false) +starpu_init() +main() +starpu_shutdown() +GC.enable(true) diff --git a/julia/examples/dependency/tag_dep.sh b/julia/examples/dependency/tag_dep.sh new file mode 100755 index 0000000..cf33c9d --- /dev/null +++ b/julia/examples/dependency/tag_dep.sh @@ -0,0 +1,18 @@ +#!/bin/bash +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +$(dirname $0)/../execute.sh dependency/tag_dep.jl diff --git a/julia/examples/dependency/task_dep.jl b/julia/examples/dependency/task_dep.jl new file mode 100644 index 0000000..0639856 --- /dev/null +++ b/julia/examples/dependency/task_dep.jl @@ -0,0 +1,88 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +using StarPU + +@target STARPU_CPU +@codelet function codeletA(val ::Ref{Int32}) :: Nothing + # print("[Task A] Value = ", val[]); + val[] = val[] * 2 +end + +@target STARPU_CPU +@codelet function codeletB(val ::Ref{Int32}) :: Nothing + # println("[Task B] Value = ", val[]); + val[] = val[] +1 +end + +@target STARPU_CPU +@codelet function codeletC(val ::Ref{Int32}) :: Nothing + # println("[Task C] Value = ", val[]); + val[] = val[] *2 +end + +function main() + value = Ref(Int32(12)) + + @starpu_block let + perfmodel = starpu_perfmodel( + perf_type = starpu_perfmodel_type(STARPU_HISTORY_BASED), + symbol = "history_perf" + ) + + clA = starpu_codelet( + cpu_func = "codeletA", + modes = [STARPU_RW], + perfmodel = perfmodel + ) + clB = starpu_codelet( + cpu_func = "codeletB", + modes = [STARPU_RW], + perfmodel = perfmodel + ) + clC = starpu_codelet( + cpu_func = "codeletC", + modes = [STARPU_RW], + perfmodel = perfmodel + ) + + starpu_data_set_default_sequential_consistency_flag(0) + + handle = starpu_data_register(value) + + taskA = starpu_task(cl = clA, handles = [handle]) + taskB = starpu_task(cl = clB, handles = [handle]) + taskC = starpu_task(cl = clC, handles = [handle]) + + starpu_task_declare_deps(taskA, taskB) + starpu_task_declare_deps(taskC, taskA, taskB) + + starpu_task_submit(taskA) + starpu_task_submit(taskB) + starpu_task_submit(taskC) + + starpu_task_wait_for_all() + end + + if value[] != 52 + error("Incorrect value $(value[]) (expected 52)") + end + + println("Value = ", value[]) +end + +starpu_init() +main() +starpu_shutdown() diff --git a/julia/examples/dependency/task_dep.sh b/julia/examples/dependency/task_dep.sh new file mode 100755 index 0000000..ccfe6ea --- /dev/null +++ b/julia/examples/dependency/task_dep.sh @@ -0,0 +1,18 @@ +#!/bin/bash +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +$(dirname $0)/../execute.sh dependency/task_dep.jl diff --git a/julia/examples/execute.sh.in b/julia/examples/execute.sh.in new file mode 100755 index 0000000..8f01e55 --- /dev/null +++ b/julia/examples/execute.sh.in @@ -0,0 +1,53 @@ +#!@REALBASH@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +set -x +export JULIA_LOAD_PATH=@STARPU_SRC_DIR@/julia/src:$JULIA_LOAD_PATH +export STARPU_BUILD_DIR=@STARPU_BUILD_DIR@ +export STARPU_SRC_DIR=@STARPU_SRC_DIR@ +export STARPU_JULIA_LIB=@STARPU_BUILD_DIR@/julia/src/.libs/libstarpujulia-1.3 +export STARPU_JULIA_BUILD=@STARPU_BUILD_DIR@/julia +export LD_LIBRARY_PATH=@STARPU_BUILD_DIR@/julia/src/.libs/:$LD_LIBRARY_PATH +export JULIA_NUM_THREADS=8 +export STARPU_NOPENCL=0 +export STARPU_SCHED=dmda + +srcdir=@STARPU_SRC_DIR@/julia/examples + +rm -f genc*.c gencuda*.cu genc*.o + +if test "$1" == "-calllib" +then + shift + pwd + rm -f extern_tasks.so + make -f @STARPU_BUILD_DIR@/julia/src/dynamic_compiler/Makefile extern_tasks.so SOURCES_CPU=$srcdir/$1 + shift + export JULIA_TASK_LIB=$PWD/extern_tasks.so +fi + +srcfile=$1 +if test ! -f $srcdir/$srcfile +then + echo "Error. File $srcdir/$srcfile not found" + exit 1 +fi +shift +#cd $srcdir/$(dirname $srcfile) +#exec @JULIA@ $(basename $srcfile) $* +exec @JULIA@ $srcdir/$srcfile $* + diff --git a/julia/examples/gemm/gemm.jl b/julia/examples/gemm/gemm.jl new file mode 100644 index 0000000..99efb0b --- /dev/null +++ b/julia/examples/gemm/gemm.jl @@ -0,0 +1,144 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +using StarPU +using LinearAlgebra.BLAS + +@target STARPU_CPU+STARPU_CUDA +@codelet function gemm(A :: Matrix{Float32}, B :: Matrix{Float32}, C :: Matrix{Float32}, alpha :: Float32, beta :: Float32) :: Nothing + + M :: Int32 = height(A) + N :: Int32 = width(B) + K :: Int32 = width(A) + lda :: Int32 = ld(A) + ldb :: Int32 = ld(B) + ldc :: Int32 = ld(C) + STARPU_SGEMM("N", "N", M, N, K, alpha, A, lda, B, ldb, beta, C, ldc) + + return +end + +function multiply_with_starpu(A :: Matrix{Float32}, B :: Matrix{Float32}, C :: Matrix{Float32}, alpha :: Float32, beta :: Float32, nslicesx, nslicesy) + scale= 3 + tmin=0 + vert = starpu_data_filter(STARPU_MATRIX_FILTER_VERTICAL_BLOCK, nslicesx) + horiz = starpu_data_filter(STARPU_MATRIX_FILTER_BLOCK, nslicesy) + @starpu_block let + hA,hB,hC = starpu_data_register(A, B, C) + starpu_data_partition(hB, vert) + starpu_data_partition(hA, horiz) + starpu_data_map_filters(hC, vert, horiz) + tmin=0 + + for i in (1 : 10 ) + t=time_ns() + @starpu_sync_tasks begin + for taskx in (1 : nslicesx) + for tasky in (1 : nslicesy) + starpu_task_insert(codelet_name = "gemm", + handles = [hA[tasky], hB[taskx], hC[taskx, tasky]], + cl_arg = (alpha, beta), + modes = [STARPU_R, STARPU_R, STARPU_RW]) + end + end + end + t=time_ns()-t + if (tmin==0 || tmin>t) + tmin=t + end + end + end + return tmin +end + + +function approximately_equals( + A :: Matrix{Cfloat}, + B :: Matrix{Cfloat}, + eps = 1e-2 +) + (height, width) = size(A) + + for j in (1 : width) + for i in (1 : height) + if (abs(A[i,j] - B[i,j]) > eps * max(abs(B[i,j]), abs(A[i,j]))) + println("A[$i,$j] : $(A[i,j]), B[$i,$j] : $(B[i,j])") + return false + end + end + end + + return true +end + +function check(expected, A, B, C, alpha, beta) + for i in 1 : 10 + gemm!('N', 'N', alpha, A, B, beta, expected) + end + + height,width = size(C) + for i in 1:height + for j in 1:width + got = C[i, j] + exp = expected[i, j] + + err = abs(exp - got) / exp + if err > 0.0001 + error("[$i] -> $got != $exp (err $err)") + end + end + end +end + +function compute_times(io,start_dim, step_dim, stop_dim, nslicesx, nslicesy) + for dim in (start_dim : step_dim : stop_dim) + A = Array(rand(Cfloat, dim, dim)) + B = Array(rand(Cfloat, dim, dim)) + C = zeros(Float32, dim, dim) + C_ref = copy(C) + starpu_memory_pin(A) + starpu_memory_pin(B) + starpu_memory_pin(C) + alpha = 4.0f0 + beta = 2.0f0 + mt = multiply_with_starpu(A, B, C, alpha, beta, nslicesx, nslicesy) + gflop = 2 * dim * dim * dim * 1.e-9 + gflops = gflop / (mt * 1.e-9) + size=dim*dim*dim*4*3/1024/1024 + println(io,"$dim $gflops") + println("$dim $gflops") + starpu_memory_unpin(A) + starpu_memory_unpin(B) + starpu_memory_unpin(C) + check(C_ref, A, B, C, alpha, beta) + end +end + +if size(ARGS, 1) < 1 + filename="x.dat" +else + filename=ARGS[1] +end + +starpu_init() +starpu_cublas_init() +nblock_x = Int32(ceil(sqrt(starpu_worker_get_count()))) +nblock_y = nblock_x +io=open(filename,"w") +compute_times(io,64,512,4096,nblock_x,nblock_y) +close(io) + +starpu_shutdown() + diff --git a/julia/examples/gemm/gemm.sh b/julia/examples/gemm/gemm.sh new file mode 100755 index 0000000..3a99fc6 --- /dev/null +++ b/julia/examples/gemm/gemm.sh @@ -0,0 +1,22 @@ +#!/bin/bash +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +$(dirname $0)/../execute.sh gemm/gemm_native.jl + +export OMP_NUM_THREADS=1 +$(dirname $0)/../execute.sh gemm/gemm.jl + diff --git a/julia/examples/gemm/gemm_native.jl b/julia/examples/gemm/gemm_native.jl new file mode 100644 index 0000000..a71b288 --- /dev/null +++ b/julia/examples/gemm/gemm_native.jl @@ -0,0 +1,56 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +using LinearAlgebra.BLAS + +function gemm_without_starpu(A :: Matrix{Float32}, B :: Matrix{Float32}, C :: Matrix{Float32}, alpha :: Float32, beta :: Float32) + tmin = 0 + for i in (1 : 10 ) + t=time_ns() + gemm!('N', 'N', alpha, A, B, beta, C) + t=time_ns() - t + if (tmin==0 || tmin>t) + tmin=t + end + end + return tmin +end + + +function compute_times(io,start_dim, step_dim, stop_dim) + for dim in (start_dim : step_dim : stop_dim) + A = Array(rand(Cfloat, dim, dim)) + B = Array(rand(Cfloat, dim, dim)) + C = zeros(Float32, dim, dim) + alpha = 4.0f0 + beta = 2.0f0 + mt = gemm_without_starpu(A, B, C, alpha, beta) + gflop = 2 * dim * dim * dim * 1.e-9 + gflops = gflop / (mt * 1.e-9) + size=dim*dim*dim*4*3/1024/1024 + println(io,"$dim $gflops") + println("$dim $gflops") + end +end + +if size(ARGS, 1) < 1 + filename="x.dat" +else + filename=ARGS[1] +end +io=open(filename,"w") +compute_times(io,64,512,4096) +close(io) + diff --git a/julia/examples/loader.c b/julia/examples/loader.c new file mode 100644 index 0000000..804797d --- /dev/null +++ b/julia/examples/loader.c @@ -0,0 +1,505 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if defined(_WIN32) && !defined(__MINGW32__) && !defined(__CYGWIN__) +#include +#else +#include +#endif + +#ifdef STARPU_QUICK_CHECK +/* Quick checks are supposed to be real quick, typically less than 1s each, sometimes 10s + add some extra times for tests which run with all schedulers +*/ +#define DEFAULT_TIMEOUT 100 +#elif !defined(STARPU_LONG_CHECK) +/* Normal checks are supposed to be short enough, typically less than 10s each, sometimes 1-2m */ +#define DEFAULT_TIMEOUT 300 +#else +/* Long checks can be very long */ +#define DEFAULT_TIMEOUT 1000 +#endif +#define AUTOTEST_SKIPPED_TEST 77 + +static pid_t child_pid = 0; +static int timeout; + +#if defined(_WIN32) && !defined(__MINGW32__) && !defined(__CYGWIN__) +static int mygettimeofday(struct timeval *tv, void *tz) +{ + if (tv) + { + FILETIME ft; + unsigned long long res; + GetSystemTimeAsFileTime(&ft); + /* 100-nanosecond intervals since January 1, 1601 */ + res = ft.dwHighDateTime; + res <<= 32; + res |= ft.dwLowDateTime; + res /= 10; + /* Now we have microseconds */ + res -= (((1970-1601)*365) + 89) * 24ULL * 3600ULL * 1000000ULL; + /* Now we are based on epoch */ + tv->tv_sec = res / 1000000ULL; + tv->tv_usec = res % 1000000ULL; + } +} +#else +#define mygettimeofday(tv,tz) gettimeofday(tv,tz) +#endif + +#ifdef STARPU_GDB_PATH +static int try_launch_gdb(const char *exe, const char *core) +{ +# define GDB_COMMANDS \ + "-ex", "py-list", \ + "-ex", "starpu-tasks", \ + "-ex", "starpu-workers", \ + "-ex", "starpu-print-datas-summary", \ + "-ex", "starpu-memusage", \ + "-ex", "starpu-print-archs", \ + "-ex", "starpu-print-registered-models", \ + "-ex", "bt full", \ + "-ex", "py-bt", \ + "-ex", "thread apply all bt full", \ + "-ex", "thread apply all py-bt", \ + + int err; + pid_t pid; + struct stat st; + const char *top_builddir; + char *gdb; + + err = stat(core, &st); + if (err != 0) + { + fprintf(stderr, "while looking for core file of %s: %s: %m\n", + exe, core); + return -1; + } + + if (!(st.st_mode & S_IFREG)) + { + fprintf(stderr, "%s: not a regular file\n", core); + return -1; + } + + top_builddir = getenv("top_builddir"); + + pid = fork(); + switch (pid) + { + case 0: /* kid */ + if (top_builddir != NULL) + { + /* Run gdb with Libtool. */ + gdb = alloca(strlen(top_builddir) + + sizeof("/libtool") + 1); + strcpy(gdb, top_builddir); + strcat(gdb, "/libtool"); + err = execl(gdb, "gdb", "--mode=execute", + STARPU_GDB_PATH, "--batch", + GDB_COMMANDS + exe, core, NULL); + } + else + { + /* Run gdb directly */ + gdb = STARPU_GDB_PATH; + err = execl(gdb, "gdb", "--batch", + GDB_COMMANDS + exe, core, NULL); + } + if (err != 0) + { + fprintf(stderr, "while launching `%s': %m\n", gdb); + exit(EXIT_FAILURE); + } + exit(EXIT_SUCCESS); + break; + + case -1: + fprintf(stderr, "fork: %m\n"); + return -1; + + default: /* parent */ + { + pid_t who; + int status; + who = waitpid(pid, &status, 0); + if (who != pid) + fprintf(stderr, "while waiting for gdb " + "process %d: %m\n", pid); + } + } + return 0; +# undef GDB_COMMANDS +} +#endif /* STARPU_GDB_PATH */ + +static void launch_gdb(const char *exe) +{ +#ifdef STARPU_GDB_PATH + char s[32]; + snprintf(s, sizeof(s), "core.%d", child_pid); + if (try_launch_gdb(exe, s) < 0) + try_launch_gdb(exe, "core"); +#endif /* STARPU_GDB_PATH */ +} + +static char *test_name; + +static void test_cleaner(int sig) +{ + pid_t child_gid; + int status; + (void) sig; + + // send signal to all loader family members + fprintf(stderr, "[error] test %s has been blocked for %d seconds. Mark it as failed\n", test_name, timeout); + child_gid = getpgid(child_pid); + kill(-child_gid, SIGQUIT); + waitpid(child_pid, &status, 0); + launch_gdb(test_name); + raise(SIGALRM); + exit(EXIT_FAILURE); +} + +static void forwardsig(int sig) +{ + pid_t child_gid; + child_gid = getpgid(child_pid); + kill(-child_gid, sig); +} + +static int _decode(char **src, char *motif, const char *value) +{ + char *found; + + found = strstr(*src, motif); + if (found == NULL) return 0; + + char *new_src = calloc(1, strlen(*src)-strlen(motif)+strlen(value)+1); + + strncpy(new_src, *src, found - *src); + strcat(new_src, value); + strcat(new_src, found+strlen(motif)); + + *src = new_src; + return 1; +} + +static void decode(char **src, char *motif, const char *value) +{ + if (*src) + { + if (strstr(*src, motif) && value == NULL) + { + fprintf(stderr, "error: $%s undefined\n", motif); + exit(EXIT_FAILURE); + } + int d = _decode(src, motif, value); + while (d) + d = _decode(src, motif, value); + } +} + +int main(int argc, char *argv[]) +{ + int child_exit_status; + char *test_args; + char *launcher; + char *launcher_args; + char *libtool; + char *cflags; + const char *top_builddir = getenv("top_builddir"); + struct sigaction sa; + int ret; + struct timeval start; + struct timeval end; + double timing; + int x=1; + int asan = 0, lsan = 0, tsan = 0, usan = 0; + + (void) argc; + test_args = NULL; + timeout = 0; + + launcher=getenv("STARPU_CHECK_LAUNCHER"); + launcher_args=getenv("STARPU_CHECK_LAUNCHER_ARGS"); + cflags = getenv("CFLAGS"); + if (cflags) + { + if (strstr(cflags, "-fsanitize=address")) + asan = 1; + if (strstr(cflags, "-fsanitize=leak")) + lsan = 1; + if (strstr(cflags, "-fsanitize=thread")) + tsan = 1; + if (strstr(cflags, "-fsanitize=undefined")) + usan = 1; + } + + if (argv[x] && strcmp(argv[x], "-t") == 0) + { + timeout = strtol(argv[x+1], NULL, 10); + x += 2; + } + else if (getenv("STARPU_TIMEOUT_ENV")) + { + /* get user-defined iter_max value */ + timeout = strtol(getenv("STARPU_TIMEOUT_ENV"), NULL, 10); + } + else if (timeout <= 0) + { + timeout = DEFAULT_TIMEOUT; + if ((launcher && strstr(launcher, "valgrind")) || + (launcher && strstr(launcher, "helgrind")) || + tsan) + timeout *= 20; + if (asan || usan || lsan || + (launcher && strstr(launcher, "compute-sanitizer"))) + timeout *= 5; + + if (timeout > 1750) + timeout = 1750; + } + +#ifdef STARPU_SIMGRID +#ifdef STARPU_DEBUG + timeout *= 20; +#endif +#endif + +#ifdef STARPU_USE_MPI_MASTER_SLAVE + /* compare values between the 2 values of timeout */ + if (getenv("MPIEXEC_TIMEOUT")) + { + int mpiexec_timeout = strtol(getenv("MPIEXEC_TIMEOUT"), NULL, 10); + if (mpiexec_timeout != timeout) + fprintf(stderr, "[warning] MPIEXEC_TIMEOUT and STARPU_TIMEOUT_ENV values are different (%d and %d). The behavior may be different than expected !\n", mpiexec_timeout, timeout); + } +#endif + + if (argv[x] && strcmp(argv[x], "-p") == 0) + { + test_name = malloc(strlen(argv[x+1]) + 1 + strlen(argv[x+2]) + 1); + sprintf(test_name, "%s/%s", argv[x+1], argv[x+2]); + x += 3; + } + else + { + test_name = argv[x]; + x += 1; + } + + if (!test_name) + { + fprintf(stderr, "[error] Need name of program to start\n"); + exit(EXIT_FAILURE); + } + + size_t len = strlen(test_name); + if (len >= 3 && + test_name[len-3] == '.' && + test_name[len-2] == 's' && + test_name[len-1] == 'h') + { + /* This is a shell script, don't run ourself on bash, but make + * the script call us for each program invocation */ + + char *launch = NULL; + if (top_builddir == NULL) + // this may fail if .libs is in the directory path + setenv("STARPU_LAUNCH", argv[0], 1); + else + { + launch = malloc(strlen(top_builddir) + strlen("/tests/loader") + 1); + strcpy(launch, top_builddir); + strcat(launch, "/tests/loader"); + setenv("STARPU_LAUNCH", launch, 1); + } + + execvp(test_name, argv+x-1); + + fprintf(stderr, "[error] '%s' failed to exec. test marked as failed\n", test_name); + free(launch); + exit(EXIT_FAILURE); + } + + if (strstr(test_name, "spmv/dw_block_spmv")) + { + test_args = (char *) calloc(512, sizeof(char)); + snprintf(test_args, 512, "%s/examples/spmv/matrix_market/examples/fidapm05.mtx", STARPU_SRC_DIR); + } + else if (strstr(test_name, "starpu_perfmodel_display")) + { + if (x >= argc) + test_args = strdup("-l"); + } + else if (strstr(test_name, "starpu_perfmodel_plot")) + { + if (x >= argc) + test_args = strdup("-l"); + } + + /* get launcher program */ + if (launcher_args) + launcher_args=strdup(launcher_args); + + if (top_builddir == NULL) + { + fprintf(stderr, + "warning: $top_builddir undefined, " + "so $STARPU_CHECK_LAUNCHER ignored\n"); + launcher = NULL; + launcher_args = NULL; + libtool = NULL; + } + else + { + libtool = malloc(strlen(top_builddir) + 1 + strlen("libtool") + 1); + strcpy(libtool, top_builddir); + strcat(libtool, "/libtool"); + } + + if (launcher) + { + const char *top_srcdir = getenv("top_srcdir"); + decode(&launcher, "@top_srcdir@", top_srcdir); + decode(&launcher_args, "@top_srcdir@", top_srcdir); + } + + setenv("STARPU_OPENCL_PROGRAM_DIR", STARPU_SRC_DIR, 1); + + /* set SIGALARM handler */ + sa.sa_flags = SA_RESETHAND | SA_NODEFER; + sigemptyset(&sa.sa_mask); + sa.sa_handler = test_cleaner; + if (-1 == sigaction(SIGALRM, &sa, NULL)) + perror("sigaction"); + + signal(SIGINT, forwardsig); + signal(SIGHUP, forwardsig); + signal(SIGPIPE, forwardsig); + signal(SIGTERM, forwardsig); + + child_pid = fork(); + if (child_pid == 0) + { + char *launcher_argv[100]; + int i=0; + + setpgid(0, 0); + + /* "Launchers" such as Valgrind need to be inserted + * after the Libtool-generated wrapper scripts, hence + * this special-case. */ + if (launcher && top_builddir != NULL) + { + launcher_argv[i++] = libtool; + launcher_argv[i++] = "--mode=execute"; + launcher_argv[i++] = launcher; + if (launcher_args) + { + launcher_argv[i++] = strtok(launcher_args, " "); + while (launcher_argv[i-1]) + { + launcher_argv[i++] = strtok(NULL, " "); + } + } + } + + launcher_argv[i++] = test_name; + if (test_args) + launcher_argv[i++] = test_args; + else while (argv[x]) + { + launcher_argv[i++] = argv[x++]; + } +#ifdef STARPU_SIMGRID +#ifdef STARPU_DEBUG + launcher_argv[i++] = "--cfg=contexts/factory:thread"; +#endif +#endif + launcher_argv[i++] = NULL; + execvp(*launcher_argv, launcher_argv); + + fprintf(stderr, "[error] '%s' failed to exec. test marked as failed\n", test_name); + exit(EXIT_FAILURE); + } + if (child_pid == -1) + { + fprintf(stderr, "[error] fork. test marked as failed\n"); + exit(EXIT_FAILURE); + } + free(test_args); + free(libtool); + + ret = EXIT_SUCCESS; + gettimeofday(&start, NULL); + alarm(timeout); + if (child_pid == waitpid(child_pid, &child_exit_status, 0)) + { + if (WIFEXITED(child_exit_status)) + { + int status = WEXITSTATUS(child_exit_status); + if (status == EXIT_SUCCESS) + { + alarm(0); + } + else + { + if (status != AUTOTEST_SKIPPED_TEST) + fprintf(stdout, "`%s' exited with return code %d\n", + test_name, status); + ret = status; + } + } + else if (WIFSIGNALED(child_exit_status)) + { + fprintf(stderr, "[error] `%s' killed with signal %d; test marked as failed\n", + test_name, WTERMSIG(child_exit_status)); + launch_gdb(test_name); + ret = EXIT_FAILURE; + } + else + { + fprintf(stderr, "[error] `%s' did not terminate normally; test marked as failed\n", + test_name); + ret = EXIT_FAILURE; + } + } + + gettimeofday(&end, NULL); + timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec)); + fprintf(stderr, "#Execution_time_in_seconds %f %s\n", timing/1000000, test_name); + + return ret; +} diff --git a/julia/examples/mandelbrot/cpu_mandelbrot.c b/julia/examples/mandelbrot/cpu_mandelbrot.c new file mode 100644 index 0000000..195975c --- /dev/null +++ b/julia/examples/mandelbrot/cpu_mandelbrot.c @@ -0,0 +1,79 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ +#include +#include +#include +#include "cpu_mandelbrot.h" + +void cpu_mandelbrot(void *descr[], void *cl_arg) +{ + long long *pixels; + + pixels = (long long int *)STARPU_MATRIX_GET_PTR(descr[0]); + struct params *params = (struct params *) cl_arg; + + long width = STARPU_MATRIX_GET_NY(descr[0]); + long height = STARPU_MATRIX_GET_NX(descr[0]); + double zoom = width * 0.25296875; + double iz = 1. / zoom; + float diverge = 4.0; + float max_iterations = (width/2) * 0.049715909 * log10(zoom); + float imi = 1. / max_iterations; + double centerr = params->centerr; + double centeri = params->centeri; + long offset = params->offset; + long dim = params->dim; + double cr = 0; + double zr = 0; + double ci = 0; + double zi = 0; + long n = 0; + double tmp = 0; + int ldP = STARPU_MATRIX_GET_LD(descr[0]); + + long long x,y; + + for (y = 0; y < height; y++) + { + for (x = 0; x < width; x++) + { + cr = centerr + (x - (dim/2)) * iz; + zr = cr; + ci = centeri + (y+offset - (dim/2)) * iz; + zi = ci; + + for (n = 0; n <= max_iterations; n++) + { + if (zr*zr + zi*zi>diverge) break; + tmp = zr*zr - zi*zi + cr; + zi = 2*zr*zi + ci; + zr = tmp; + } + if (n +#include +#include +#include "cpu_mandelbrot.h" + +void cpu_mandelbrot(void **, void *); +void gpu_mandelbrot(void **, void *); + +static struct starpu_perfmodel model = +{ + .type = STARPU_HISTORY_BASED, + .symbol = "history_perf" +}; + +static struct starpu_codelet cl = +{ + .cpu_funcs = {cpu_mandelbrot}, + //.cuda_funcs = {gpu_mandelbrot}, + .nbuffers = 1, + .modes = {STARPU_W}, + .model = &model +}; + +void mandelbrot_with_starpu(long long *pixels, struct params *p, long long dim, long long nslicesx) +{ + starpu_data_handle_t pixels_handle; + + starpu_matrix_data_register(&pixels_handle, STARPU_MAIN_RAM, (uintptr_t)pixels, dim, dim, dim, sizeof(long long)); + + struct starpu_data_filter horiz = + { + .filter_func = starpu_matrix_filter_block, + .nchildren = nslicesx + }; + + starpu_data_partition(pixels_handle, &horiz); + + long long taskx; + + for (taskx = 0; taskx < nslicesx; taskx++) + { + struct starpu_task *task = starpu_task_create(); + + task->cl = &cl; + task->handles[0] = starpu_data_get_child(pixels_handle, taskx); + task->cl_arg = p; + task->cl_arg_size = sizeof(*p); + if (starpu_task_submit(task)!=0) fprintf(stderr,"submit task error\n"); + } + + starpu_task_wait_for_all(); + + starpu_data_unpartition(pixels_handle, STARPU_MAIN_RAM); + starpu_data_unregister(pixels_handle); +} + +void pixels2img(long long *pixels, long long width, long long height, const char *filename) +{ + FILE *fp = fopen(filename, "w"); + if (!fp) + return; + + int MAPPING[16][3] = {{66,30,15},{25,7,26},{9,1,47},{4,4,73},{0,7,100},{12,44,138},{24,82,177},{57,125,209},{134,181,229},{211,236,248},{241,233,191},{248,201,95},{255,170,0},{204,128,0},{153,87,0},{106,52,3}}; + + fprintf(fp, "P3\n%lld %lld\n255\n", width, height); + long long i, j; + for (i = 0; i < height; ++i) + { + for (j = 0; j < width; ++j) + { + fprintf(fp, "%d %d %d ", MAPPING[pixels[j*width+i]][0], MAPPING[pixels[j*width+i]][1], MAPPING[pixels[j*width+i]][2]); + } + } + + fclose(fp); +} + +double min_times(double cr, double ci, long long dim, long long nslices, int gen_images) +{ + long long *pixels = calloc(dim*dim, sizeof(long long)); + struct params *p = calloc(nslices, sizeof(struct params)); + + double t_min = 0; + long long i; + + for (i=0; iexec_t) + t_min = exec_t; + } + + if (gen_images == 1) + { + char filename[64]; + snprintf(filename, 64, "out%lld.ppm", dim); + pixels2img(pixels,dim,dim,filename); + } + + free(pixels); + free(p); + + return t_min; +} + +void display_times(double cr, double ci, long long start_dim, long long step_dim, long long stop_dim, long long nslices, int gen_images) +{ + long long dim; + + for (dim = start_dim; dim <= stop_dim; dim += step_dim) + { + printf("Dimension: %lld...\n", dim); + double res = min_times(cr, ci, dim, nslices, gen_images); + res = res / dim / dim; // time per pixel + printf("%lld %lf\n", dim, res); + } +} + +int main(int argc, char **argv) +{ + double cr, ci; + long long start_dim, step_dim, stop_dim, nslices; + int gen_images; + + if (argc != 8) + { + printf("Usage: %s cr ci start_dim step_dim stop_dim nslices(must divide dims) gen_images. Using default parameters\n", argv[0]); + + cr = -0.800671; + ci = -0.158392; + start_dim = 32; + step_dim = 32; + stop_dim = 512; + nslices = 4; + gen_images = 0; + } + else + { + cr = (float) atof(argv[1]); + ci = (float) atof(argv[2]); + start_dim = atoll(argv[3]); + step_dim = atoll(argv[4]); + stop_dim = atoll(argv[5]); + nslices = atoll(argv[6]); + gen_images = atoi(argv[7]); + } + + if (starpu_init(NULL) != EXIT_SUCCESS) + { + fprintf(stderr, "ERROR\n"); + return 77; + } + + display_times(cr, ci, start_dim, step_dim, stop_dim, nslices, gen_images); + + starpu_shutdown(); + + return 0; +} diff --git a/julia/examples/mandelbrot/mandelbrot.jl b/julia/examples/mandelbrot/mandelbrot.jl new file mode 100644 index 0000000..7dfd9d3 --- /dev/null +++ b/julia/examples/mandelbrot/mandelbrot.jl @@ -0,0 +1,123 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +import Libdl +using StarPU +using LinearAlgebra + +@target STARPU_CPU+STARPU_CUDA +@codelet function mandelbrot(pixels ::Matrix{Int64}, centerr ::Float64, centeri ::Float64, offset ::Int64, dim ::Int64 ) :: Nothing + height :: Int64 = height(pixels) + width :: Int64 = width(pixels) + zoom :: Float64 = width * 0.25296875 + iz :: Float64 = 1. / zoom + diverge :: Float32 = 4.0 + max_iterations :: Float32 = ((width/2) * 0.049715909 * log10(zoom)); + imi :: Float32 = 1. / max_iterations + cr :: Float64 = 0. + zr :: Float64 = 0. + ci :: Float64 = 0. + zi :: Float64 = 0. + n :: Int64 = 0 + tmp :: Float64 = 0. + @parallel for y = 1:height + for x = 1:width + cr = centerr + (x-1 - (dim / 2)) * iz + zr = cr + ci = centeri + (y-1+offset - (dim / 2)) * iz + zi = ci + max_it :: Float64 = max_iterations + n = 0 + for i = 0:max_it + n = i + if (zr*zr + zi*zi > diverge) + break + end + tmp = zr*zr - zi*zi + cr + zi = 2*zr*zi + ci + zr = tmp + end + + if (n < max_iterations) + pixels[y,x] = round(15 * n * imi) + else + pixels[y,x] = 0 + end + end + end + + return +end + +starpu_init() + +function mandelbrot_with_starpu(A ::Matrix{Int64}, cr ::Float64, ci ::Float64, dim ::Int64, nslicesx ::Int64) + horiz = starpu_data_filter(STARPU_MATRIX_FILTER_BLOCK, nslicesx) + @starpu_block let + hA = starpu_data_register(A) + starpu_data_partition(hA,horiz) + + @starpu_sync_tasks for taskx in (1 : nslicesx) + starpu_task_insert(codelet_name = "mandelbrot", + handles = [hA[taskx]], + modes = [STARPU_W], + cl_arg = (cr, ci, Int64((taskx-1)*dim/nslicesx), dim)) + end + end +end + +function pixels2img(pixels ::Matrix{Int64}, width ::Int64, height ::Int64, filename ::String) + MAPPING = [[66,30,15],[25,7,26],[9,1,47],[4,4,73],[0,7,100],[12,44,138],[24,82,177],[57,125,209],[134,181,229],[211,236,248],[241,233,191],[248,201,95],[255,170,0],[204,128,0],[153,87,0],[106,52,3]] + open(filename, "w") do f + write(f, "P3\n$width $height\n255\n") + for i = 1:height + for j = 1:width + write(f,"$(MAPPING[1+pixels[i,j]][1]) $(MAPPING[1+pixels[i,j]][2]) $(MAPPING[1+pixels[i,j]][3]) ") + end + write(f, "\n") + end + end +end + +function min_times(cr ::Float64, ci ::Float64, dim ::Int64, nslices ::Int64, gen_images) + tmin=0; + + pixels ::Matrix{Int64} = zeros(dim, dim) + for i = 1:10 + t = time_ns(); + mandelbrot_with_starpu(pixels, cr, ci, dim, nslices) + t = time_ns()-t + if (tmin==0 || tmin>t) + tmin=t + end + end + if (gen_images == 1) + pixels2img(pixels,dim,dim,"out$(dim).ppm") + end + return tmin +end + +function display_time(cr ::Float64, ci ::Float64, start_dim ::Int64, step_dim ::Int64, stop_dim ::Int64, nslices ::Int64, gen_images) + for dim in (start_dim : step_dim : stop_dim) + res = min_times(cr, ci, dim, nslices, gen_images) + res=res/dim/dim; # time per pixel + println("$(dim) $(res)") + end +end + + +display_time(-0.800671,-0.158392,32,32,512,4, 0) + +starpu_shutdown() diff --git a/julia/examples/mandelbrot/mandelbrot.sh b/julia/examples/mandelbrot/mandelbrot.sh new file mode 100755 index 0000000..e6da51d --- /dev/null +++ b/julia/examples/mandelbrot/mandelbrot.sh @@ -0,0 +1,21 @@ +#!/bin/bash +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +$(dirname $0)/../execute.sh mandelbrot/mandelbrot.jl +$(dirname $0)/../execute.sh mandelbrot/mandelbrot_native.jl +$(dirname $0)/../execute.sh -calllib mandelbrot/cpu_mandelbrot.c mandelbrot/mandelbrot.jl + diff --git a/julia/examples/mandelbrot/mandelbrot_native.jl b/julia/examples/mandelbrot/mandelbrot_native.jl new file mode 100644 index 0000000..8bce0dd --- /dev/null +++ b/julia/examples/mandelbrot/mandelbrot_native.jl @@ -0,0 +1,113 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +using LinearAlgebra + +function mandelbrot(pixels, centerr ::Float64, centeri ::Float64, offset ::Int64, dim ::Int64) :: Nothing + height :: Int64, width :: Int64 = size(pixels) + zoom :: Float64 = width * 0.25296875 + iz :: Float64 = 1. / zoom + diverge :: Float32 = 4.0 + max_iterations :: Float32 = ((width/2) * 0.049715909 * log10(zoom)); + imi :: Float64 = 1. / max_iterations + cr :: Float64 = 0. + zr :: Float64 = 0. + ci :: Float64 = 0. + zi :: Float64 = 0. + n :: Int64 = 0 + tmp :: Float64 = 0. + for y = 1:height + for x = 1:width + cr = centerr + (x-1 - (dim / 2)) * iz + zr = cr + ci = centeri + (y-1+offset - (dim / 2)) * iz + zi = ci + n = 0 + for i = 0:max_iterations + n = i + if (zr*zr + zi*zi > diverge) + break + end + tmp = zr*zr - zi*zi + cr + zi = 2*zr*zi + ci + zr = tmp + end + + if (n < max_iterations) + pixels[y,x] = round(15 * n * imi) + else + pixels[y,x] = 0 + end + end + end + + return +end + +function mandelbrot_without_starpu(A ::Matrix{Int64}, cr ::Float64, ci ::Float64, dim ::Int64, nslicesx ::Int64) + width,height = size(A) + step = height / nslicesx + + for taskx in (1 : nslicesx) + start_id = floor(Int64, (taskx-1)*step+1) + end_id = floor(Int64, (taskx-1)*step+step) + a = view(A, start_id:end_id, :) + + offset ::Int64 = (taskx-1)*dim/nslicesx + mandelbrot(a, cr, ci, offset, dim) + end +end + +function pixels2img(pixels ::Matrix{Int64}, width ::Int64, height ::Int64, filename ::String) + MAPPING = [[66,30,15],[25,7,26],[9,1,47],[4,4,73],[0,7,100],[12,44,138],[24,82,177],[57,125,209],[134,181,229],[211,236,248],[241,233,191],[248,201,95],[255,170,0],[204,128,0],[153,87,0],[106,52,3]] + open(filename, "w") do f + write(f, "P3\n$width $height\n255\n") + for i = 1:height + for j = 1:width + write(f,"$(MAPPING[1+pixels[i,j]][1]) $(MAPPING[1+pixels[i,j]][2]) $(MAPPING[1+pixels[i,j]][3]) ") + end + write(f, "\n") + end + end +end + +function min_times(cr ::Float64, ci ::Float64, dim ::Int64, nslices ::Int64, gen_images) + tmin=0; + + pixels ::Matrix{Int64} = zeros(dim, dim) + for i = 1:10 + t = time_ns(); + mandelbrot_without_starpu(pixels, cr, ci, dim, nslices) + t = time_ns()-t + if (tmin==0 || tmin>t) + tmin=t + end + end + if (gen_images == 1) + pixels2img(pixels,dim,dim,"out$(dim).ppm") + end + return tmin +end + +function display_time(cr ::Float64, ci ::Float64, start_dim ::Int64, step_dim ::Int64, stop_dim ::Int64, nslices ::Int64, gen_images) + for dim in (start_dim : step_dim : stop_dim) + res = min_times(cr, ci, dim, nslices, gen_images) + res=res/dim/dim; # time per pixel + println("$(dim) $(res)") + end +end + + +display_time(-0.800671,-0.158392,32,32,512,4, 0) diff --git a/julia/examples/mult/cpu_mult.c b/julia/examples/mult/cpu_mult.c new file mode 100644 index 0000000..36d7922 --- /dev/null +++ b/julia/examples/mult/cpu_mult.c @@ -0,0 +1,101 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2018-2018 Alexis Juven + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include + +/* + * The codelet is passed 3 matrices, the "descr" union-type field gives a + * description of the layout of those 3 matrices in the local memory (ie. RAM + * in the case of CPU, GPU frame buffer in the case of GPU etc.). Since we have + * registered data with the "matrix" data interface, we use the matrix macros. + */ +void cpu_mult(void *descr[], void *cl_arg) +{ + int stride; + float *subA, *subB, *subC; + + stride = *((int *)cl_arg); + + /* .blas.ptr gives a pointer to the first element of the local copy */ + subA = (float *)STARPU_MATRIX_GET_PTR(descr[0]); + subB = (float *)STARPU_MATRIX_GET_PTR(descr[1]); + subC = (float *)STARPU_MATRIX_GET_PTR(descr[2]); + + /* .blas.nx is the number of rows (consecutive elements) and .blas.ny + * is the number of lines that are separated by .blas.ld elements (ld + * stands for leading dimension). + * NB: in case some filters were used, the leading dimension is not + * guaranteed to be the same in main memory (on the original matrix) + * and on the accelerator! */ + const uint32_t nxC = STARPU_MATRIX_GET_NX(descr[2]); + const uint32_t nyC = STARPU_MATRIX_GET_NY(descr[2]); + const uint32_t nyA = STARPU_MATRIX_GET_NY(descr[0]); + + const uint32_t ldA = STARPU_MATRIX_GET_LD(descr[0]); + const uint32_t ldB = STARPU_MATRIX_GET_LD(descr[1]); + const uint32_t ldC = STARPU_MATRIX_GET_LD(descr[2]); + /* we assume a FORTRAN-ordering! */ + int i,j,k,ii,jj,kk; + for (i = 0; i < nyC*nxC; i++) subC[i] = 0; + //fprintf(stderr,"inside cpu_mult %dx%dx%d %d/%d on %d\n",nyC,nyA,nxC,starpu_worker_get_id(),STARPU_NMAXWORKERS,starpu_worker_get_devid(starpu_worker_get_id())); + for (i=0;i +#include +#include +#include + +#include + +/* + * That program should compute C = A * B + * + * A of size (z,y) + * B of size (x,z) + * C of size (x,y) + + |---------------| + z | B | + |---------------| + z x + |----| |---------------| + | | | | + | | | | + | A | y | C | + | | | | + | | | | + |----| |---------------| + + */ + +//void gpu_mult(void **, void *); +void cpu_mult(void **, void *); + +static struct starpu_perfmodel model = +{ + .type = STARPU_HISTORY_BASED, + .symbol = "history_perf" +}; + +static struct starpu_codelet cl = +{ + .cpu_funcs = {cpu_mult}, + .cpu_funcs_name = {"cpu_mult"}, + //.cuda_funcs = {gpu_mult}, + .nbuffers = 3, + .modes = {STARPU_R, STARPU_R, STARPU_W}, + .model = &model +}; + +void multiply_with_starpu(float *A, float *B, float *C, unsigned xdim, unsigned ydim, unsigned zdim, unsigned nslicesx, unsigned nslicesy, int stride) +{ + starpu_data_handle_t A_handle, B_handle, C_handle; + + starpu_matrix_data_register(&A_handle, STARPU_MAIN_RAM, (uintptr_t)A, ydim, ydim, zdim, sizeof(float)); + starpu_matrix_data_register(&B_handle, STARPU_MAIN_RAM, (uintptr_t)B, zdim, zdim, xdim, sizeof(float)); + starpu_matrix_data_register(&C_handle, STARPU_MAIN_RAM, (uintptr_t)C, ydim, ydim, xdim, sizeof(float)); + + struct starpu_data_filter vert = + { + .filter_func = starpu_matrix_filter_vertical_block, + .nchildren = nslicesx + }; + + struct starpu_data_filter horiz = + { + .filter_func = starpu_matrix_filter_block, + .nchildren = nslicesy + }; + + starpu_data_partition(B_handle, &vert); + starpu_data_partition(A_handle, &horiz); + starpu_data_map_filters(C_handle, 2, &vert, &horiz); + + unsigned taskx, tasky; + + for (taskx = 0; taskx < nslicesx; taskx++) + { + for (tasky = 0; tasky < nslicesy; tasky++) + { + struct starpu_task *task = starpu_task_create(); + + task->cl = &cl; + task->handles[0] = starpu_data_get_sub_data(A_handle, 1, tasky); + task->handles[1] = starpu_data_get_sub_data(B_handle, 1, taskx); + task->handles[2] = starpu_data_get_sub_data(C_handle, 2, taskx, tasky); + task->cl_arg = &stride; + task->cl_arg_size = sizeof(stride); + + int ret = starpu_task_submit(task); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + } + + starpu_task_wait_for_all(); + + starpu_data_unpartition(A_handle, STARPU_MAIN_RAM); + starpu_data_unpartition(B_handle, STARPU_MAIN_RAM); + starpu_data_unpartition(C_handle, STARPU_MAIN_RAM); + + starpu_data_unregister(A_handle); + starpu_data_unregister(B_handle); + starpu_data_unregister(C_handle); +} + +void init_rand(float * m, unsigned width, unsigned height) +{ + unsigned i,j; + + for (j = 0 ; j < height ; j++) + { + for (i = 0 ; i < width ; i++) + { + m[j+i*height] = (float)(starpu_drand48()); + } + } +} + +void init_zero(float * m, unsigned width, unsigned height) +{ + memset(m, 0, sizeof(float) * width * height); +} + +double min_time(unsigned nb_test, unsigned xdim, unsigned ydim, unsigned zdim, unsigned nsclicesx, unsigned nsclicesy, int stride) +{ + unsigned i; + + float * A = (float *) malloc(zdim*ydim*sizeof(float)); + float * B = (float *) malloc(xdim*zdim*sizeof(float)); + float * C = (float *) malloc(xdim*ydim*sizeof(float)); + + double exec_times=-1; + + for (i = 0 ; i < nb_test ; i++) + { + double start, stop, exec_t; + + init_rand(A, zdim, ydim); + init_rand(B, xdim, zdim); + init_zero(C, xdim, ydim); + + start = starpu_timing_now(); + multiply_with_starpu(A, B, C, xdim, ydim, zdim, nsclicesx, nsclicesy, stride); + stop = starpu_timing_now(); + + exec_t = (stop - start)*1.e3; // Put in ns instead of us + if (exec_times<0 || exec_times>exec_t) exec_times= exec_t; + } + + free(A); + free(B); + free(C); + return exec_times; +} + +void display_times(unsigned start_dim, unsigned step_dim, unsigned stop_dim, unsigned nb_tests, unsigned nsclicesx, unsigned nsclicesy, int stride) +{ + unsigned dim; + + for (dim = start_dim ; dim <= stop_dim ; dim += step_dim) + { + double t = min_time(nb_tests, dim, dim, dim, nsclicesx, nsclicesy, stride); + printf("%f %f\n", dim*dim*4.*3./1024./1024, (2.*dim-1.)*dim*dim/t); + } +} + +#define STRIDE_DEFAULT 8 + +int main(int argc, char * argv[]) +{ + int stride=STRIDE_DEFAULT; + if (argc >= 2) + stride = atoi(argv[1]); + if (stride % 4 != 0) + { + fprintf(stderr, "STRIDE must be a multiple of 4 (%d)\n", stride); + return -1; + } + + if (starpu_init(NULL) != EXIT_SUCCESS) + { + fprintf(stderr, "ERROR\n"); + return 77; + } + + unsigned start_dim = 16*stride; + unsigned step_dim = 4*stride; + unsigned stop_dim = 128*stride; + unsigned nb_tests = 10; + unsigned nsclicesx = 2; + unsigned nsclicesy = 2; + + display_times(start_dim, step_dim, stop_dim, nb_tests, nsclicesx, nsclicesy, stride); + + starpu_shutdown(); + + return 0; +} + diff --git a/julia/examples/mult/mult.jl b/julia/examples/mult/mult.jl new file mode 100644 index 0000000..629cf30 --- /dev/null +++ b/julia/examples/mult/mult.jl @@ -0,0 +1,150 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +import Libdl +using StarPU +using LinearAlgebra + +@target STARPU_CPU+STARPU_CUDA +@codelet function matrix_mult(m1 :: Matrix{Float32}, m2 :: Matrix{Float32}, m3 :: Matrix{Float32}, stride ::Int32) :: Nothing + + width_m2 :: Int32 = width(m2) + height_m1 :: Int32 = height(m1) + width_m1 :: Int32 = width(m1) + # Naive version + @parallel for j in (1 : width_m2) + @parallel for i in (1 : height_m1) + + sum :: Float32 = 0. + + for k in (1 : width_m1) + sum = sum + m1[i, k] * m2[k, j] + end + + m3[i, j] = sum + end + end + # ##### Tiled and unrolled version + # for l in (1 : width_m2) + # for m in (1 : height_m1) + # m3[m,l] = 0 + # end + # end + # @parallel for i in (1 : STRIDE : height_m1) + # for k in (1 : STRIDE : width_m1 ) + # for j in (1 : STRIDE : width_m2 ) + # for kk in (k : 4 : k+STRIDE-1) + # for jj in (j : 2 : j+STRIDE-1) + # alpha00 :: Float32 =m2[kk,jj] + # alpha01 :: Float32 =m2[kk,jj+1] + # alpha10 :: Float32 =m2[kk+1,jj] + # alpha11 :: Float32 =m2[kk+1,jj+1] + # alpha20 :: Float32 =m2[kk+2,jj] + # alpha21 :: Float32 =m2[kk+2,jj+1] + # alpha30 :: Float32 =m2[kk+3,jj] + # alpha31 :: Float32 =m2[kk+3,jj+1] + # for ii in (i : 1 : i+STRIDE-1) + # m3[ii, jj] = m3[ii, jj] + m1[ii, kk] * alpha00 + m1[ii, kk+1] * alpha10 + m1[ii, kk+2] * alpha20 + m1[ii,kk+3]*alpha30 + # m3[ii, jj+1] = m3[ii, jj+1] + m1[ii, kk] * alpha01 + m1[ii, kk+1] * alpha11 + m1[ii, kk+2]*alpha21 + m1[ii,kk+3]*alpha31 + # end + # end + # end + # end + # end + # end + + return +end + + +starpu_init() + +function multiply_with_starpu(A :: Matrix{Float32}, B :: Matrix{Float32}, C :: Matrix{Float32}, nslicesx, nslicesy, stride) + scale= 3 + tmin=0 + vert = starpu_data_filter(STARPU_MATRIX_FILTER_VERTICAL_BLOCK, nslicesx) + horiz = starpu_data_filter(STARPU_MATRIX_FILTER_BLOCK, nslicesy) + @starpu_block let + hA,hB,hC = starpu_data_register(A, B, C) + starpu_data_partition(hB, vert) + starpu_data_partition(hA, horiz) + starpu_data_map_filters(hC, vert, horiz) + tmin=0 + + for i in (1 : 10 ) + t=time_ns() + @starpu_sync_tasks begin + for taskx in (1 : nslicesx) + for tasky in (1 : nslicesy) + starpu_task_insert(codelet_name = "matrix_mult", + modes = [STARPU_R, STARPU_R, STARPU_W], + handles = [hA[tasky], hB[taskx], hC[taskx, tasky]], + cl_arg = (Int32(stride),)) + end + end + end + t=time_ns()-t + if (tmin==0 || tmin>t) + tmin=t + end + end + end + return tmin +end + + +function check(A, B, C) + expected = A * B + height,width = size(C) + for i in 1:height + for j in 1:width + got = C[i, j] + exp = expected[i, j] + + err = abs(exp - got) / exp + if err > 0.0001 + error("[$i] -> $got != $exp (err $err)") + end + end + end +end + +function compute_times(io,start_dim, step_dim, stop_dim, nslicesx, nslicesy, stride) + for dim in (start_dim : step_dim : stop_dim) + A = Array(rand(Cfloat, dim, dim)) + B = Array(rand(Cfloat, dim, dim)) + C = zeros(Float32, dim, dim) + mt = multiply_with_starpu(A, B, C, nslicesx, nslicesy, stride) + flops = (2*dim-1)*dim*dim/mt + size=dim*dim*4*3/1024/1024 + println(io,"$size $flops") + println("$size $flops") + check(A, B, C) + end +end + +if size(ARGS, 1) < 2 + stride=4 + filename="x.dat" +else + stride=parse(Int, ARGS[1]) + filename=ARGS[2] +end +io=open(filename,"w") +compute_times(io,16*stride,4*stride,128*stride,2,2,stride) +close(io) + +starpu_shutdown() + diff --git a/julia/examples/mult/mult_native.jl b/julia/examples/mult/mult_native.jl new file mode 100644 index 0000000..c6d45c4 --- /dev/null +++ b/julia/examples/mult/mult_native.jl @@ -0,0 +1,57 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +import Libdl +using StarPU +using LinearAlgebra + +function multiply_without_starpu(A :: Matrix{Float32}, B :: Matrix{Float32}, C :: Matrix{Float32}, nslicesx, nslicesy, stride) + tmin = 0 + for i in (1 : 10 ) + t=time_ns() + C = A * B; + t=time_ns() - t + if (tmin==0 || tmin>t) + tmin=t + end + end + return tmin +end + + +function compute_times(io,start_dim, step_dim, stop_dim, nslicesx, nslicesy, stride) + for dim in (start_dim : step_dim : stop_dim) + A = Array(rand(Cfloat, dim, dim)) + B = Array(rand(Cfloat, dim, dim)) + C = zeros(Float32, dim, dim) + mt = multiply_without_starpu(A, B, C, nslicesx, nslicesy, stride) + flops = (2*dim-1)*dim*dim/mt + size=dim*dim*4*3/1024/1024 + println(io,"$size $flops") + println("$size $flops") + end +end + +if size(ARGS, 1) < 2 + stride=4 + filename="x.dat" +else + stride=parse(Int, ARGS[1]) + filename=ARGS[2] +end +io=open(filename,"w") +compute_times(io,16*stride,4*stride,128*stride,2,2,stride) +close(io) + diff --git a/julia/examples/mult/mult_starpu.sh b/julia/examples/mult/mult_starpu.sh new file mode 100755 index 0000000..b46b917 --- /dev/null +++ b/julia/examples/mult/mult_starpu.sh @@ -0,0 +1,22 @@ +#!/bin/bash +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +$(dirname $0)/../execute.sh mult/mult.jl +$(dirname $0)/../execute.sh mult/mult_native.jl +$(dirname $0)/../execute.sh -calllib mult/cpu_mult.c mult/mult.jl + + diff --git a/julia/examples/mult/perf.sh b/julia/examples/mult/perf.sh new file mode 100755 index 0000000..3d6bfdf --- /dev/null +++ b/julia/examples/mult/perf.sh @@ -0,0 +1,38 @@ +#!/bin/bash +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +stride=72 +#stride=4 + +export STARPU_NOPENCL=0 +export STARPU_SCHED=dmda +export STARPU_CALIBRATE=1 + +rm -f ./cstarpu.dat julia_generatedc.dat julia_native.dat julia_calllib.dat + +$(dirname $0)/mult $stride > ./cstarpu.dat +$(dirname $0)/../execute.sh mult/mult.jl $stride julia_generatedc.dat +$(dirname $0)/../execute.sh mult/mult_native.jl $stride julia_native.dat +$(dirname $0)/../execute.sh -calllib mult/cpu_mult.c mult/mult.jl $stride julia_calllib.dat + +( + cat < + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +void func(void *descr[], void *_args) +{ + int *x = (int *)STARPU_VARIABLE_GET_PTR(descr[0]); + (void)_args; + + *x *= 2; +} + +struct starpu_codelet mycodelet = +{ + .modes = { STARPU_RW }, + .cpu_funcs = {func}, + .cpu_funcs_name = {"func"}, + .nbuffers = 1 +}; + +struct starpu_codelet mycodelet_color = +{ + .modes = { STARPU_RW }, + .cpu_funcs = {func}, + .cpu_funcs_name = {"func"}, + .nbuffers = 1, + .color = 0x0000FF, +}; + +int main(void) +{ + int value=42; + starpu_data_handle_t handle; + int ret; + + ret = starpu_init(NULL); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + starpu_variable_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)&value, sizeof(value)); + + // In the trace file, the following task should be green (executed on CPU) + ret = starpu_task_insert(&mycodelet, STARPU_RW, handle, STARPU_NAME, "mytask", + 0); + if (STARPU_UNLIKELY(ret == -ENODEV)) + { + starpu_data_unregister(handle); + goto enodev; + } + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + + // In the trace file, the following task will be red as specified by STARPU_TASK_COLOR + ret = starpu_task_insert(&mycodelet, STARPU_RW, handle, STARPU_NAME, "mytask", + STARPU_TASK_COLOR, 0xFF0000, + 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + + // In the trace file, the following task will be blue as specified by the field color of mycodelet_color + ret = starpu_task_insert(&mycodelet_color, STARPU_RW, handle, STARPU_NAME, "mytask", + 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + + starpu_task_wait_for_all(); + starpu_data_unregister(handle); + + starpu_shutdown(); + + return 0; + + enodev: + return 77; +} diff --git a/julia/examples/task_insert_color/task_insert_color.jl b/julia/examples/task_insert_color/task_insert_color.jl new file mode 100644 index 0000000..34c9e24 --- /dev/null +++ b/julia/examples/task_insert_color/task_insert_color.jl @@ -0,0 +1,70 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +import Libdl +using StarPU + +@target STARPU_CPU +@codelet function task_insert_color(val ::Ref{Int32}) :: Nothing + val[] = val[] * 2 + + return +end + +starpu_init() + +function task_insert_color_with_starpu(val ::Ref{Int32}) + @starpu_block let + hVal = starpu_data_register(val) + + perfmodel = starpu_perfmodel( + perf_type = starpu_perfmodel_type(STARPU_HISTORY_BASED), + symbol = "history_perf" + ) + + cl1 = starpu_codelet( + cpu_func = "task_insert_color", + modes = [STARPU_RW], + perfmodel = perfmodel + ) + + cl2 = starpu_codelet( + cpu_func = "task_insert_color", + modes = [STARPU_RW], + perfmodel = perfmodel, + color = 0x0000FF + ) + + @starpu_sync_tasks begin + + # In the trace file, the following task should be green (executed on CPU) + starpu_task_submit(starpu_task(cl = cl1, handles = [hVal])) + + # In the trace file, the following task will be blue as specified by the field color of cl2 + starpu_task_submit(starpu_task(cl = cl2, handles = [hVal])) + + # In the trace file, the following tasks will be red as specified in @starpu_async_cl + @starpu_async_cl task_insert_color(hVal) [STARPU_RW] () 0xFF0000 + + end + end +end + + +foo = Ref(convert(Int32, 42)) + +task_insert_color_with_starpu(foo) + +starpu_shutdown() diff --git a/julia/examples/task_insert_color/task_insert_color.sh b/julia/examples/task_insert_color/task_insert_color.sh new file mode 100755 index 0000000..a854f91 --- /dev/null +++ b/julia/examples/task_insert_color/task_insert_color.sh @@ -0,0 +1,19 @@ +#!/bin/bash +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +$(dirname $0)/../execute.sh task_insert_color/task_insert_color.jl + diff --git a/julia/examples/variable/variable.jl b/julia/examples/variable/variable.jl new file mode 100644 index 0000000..63ba048 --- /dev/null +++ b/julia/examples/variable/variable.jl @@ -0,0 +1,53 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +import Libdl +using StarPU + +@target STARPU_CPU +@codelet function variable(val ::Ref{Float32}) :: Nothing + val[] = val[] + 1 + + return +end + +starpu_init() + +function variable_with_starpu(val ::Ref{Float32}, niter) + @starpu_block let + hVal = starpu_data_register(val) + + @starpu_sync_tasks for task in (1 : niter) + @starpu_async_cl variable(hVal) [STARPU_RW] + end + end +end + +function display(niter) + foo = Ref(0.0f0) + + variable_with_starpu(foo, niter) + + println("variable -> ", foo[]) + if foo[] == niter + println("result is correct") + else + error("result is incorret") + end +end + +display(10) + +starpu_shutdown() diff --git a/julia/examples/variable/variable.sh b/julia/examples/variable/variable.sh new file mode 100755 index 0000000..4098716 --- /dev/null +++ b/julia/examples/variable/variable.sh @@ -0,0 +1,20 @@ +#!/bin/bash +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +$(dirname $0)/../execute.sh variable/variable.jl +$(dirname $0)/../execute.sh variable/variable_native.jl + diff --git a/julia/examples/variable/variable_native.jl b/julia/examples/variable/variable_native.jl new file mode 100644 index 0000000..1823803 --- /dev/null +++ b/julia/examples/variable/variable_native.jl @@ -0,0 +1,41 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +function variable(val ::Ref{Float32}) :: Nothing + val[] = val[] + 1 + + return +end + +function variable_without_starpu(val ::Ref{Float32}, niter) + for i = 1:niter + variable(val) + end +end + +function display(niter) + foo = Ref(0.0f0) + + variable_without_starpu(foo, niter) + + println("variable -> ", foo[]) + if foo[] == niter + println("result is correct") + else + println("result is incorret") + end +end + +display(10) diff --git a/julia/examples/vector_scal/vector_scal.jl b/julia/examples/vector_scal/vector_scal.jl new file mode 100644 index 0000000..b9c032d --- /dev/null +++ b/julia/examples/vector_scal/vector_scal.jl @@ -0,0 +1,106 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +import Libdl +using StarPU +using LinearAlgebra + +@target STARPU_CPU+STARPU_CUDA +@codelet function vector_scal(m::Int32, v :: Vector{Float32}, k :: Float32, l :: Float32) :: Float32 + + N :: Int32 = length(v) + # Naive version + @parallel for i in (1 : N) + v[i] = v[i] * m + l + k + end +end + + +starpu_init() + +function vector_scal_with_starpu(v :: Vector{Float32}, m :: Int32, k :: Float32, l :: Float32) + tmin=0 + + @starpu_block let + hV = starpu_data_register(v) + tmin=0 + + for i in (1 : 1) + t=time_ns() + @starpu_sync_tasks begin + starpu_task_insert(codelet_name = "vector_scal", + modes = [STARPU_RW], + handles = [hV], + cl_arg=(m, k, l)) + end + t=time_ns()-t + if (tmin==0 || tmin>t) + tmin=t + end + end + end + return tmin +end + +function check(ref, res, m, k, l) + expected = ref .* m .+ (k+l) + + for i in 1:length(expected) + got = res[i] + exp = expected[i] + + err = abs(exp - got) / exp + if err > 0.0001 + error("[$i] -> $got != $exp (err $err)") + end + end +end + +function compute_times(io,start_dim, step_dim, stop_dim) + for size in (start_dim : step_dim : stop_dim) + V = Array(rand(Cfloat, size)) + V_ref = copy(V) + starpu_memory_pin(V) + + m :: Int32 = 10 + k :: Float32 = 2. + l :: Float32 = 3. + + println("INPUT ", V[1:10]) + + mt = vector_scal_with_starpu(V, m, k, l) + + starpu_memory_unpin(V) + + println("OUTPUT ", V[1:10]) + println(io,"$size $mt") + println("$size $mt") + + check(V_ref, V, m, k, l) + end +end + +if size(ARGS, 1) < 1 + filename="x.dat" +else + filename=ARGS[1] +end + +io=open(filename,"w") +compute_times(io,1024,1024,4096) +close(io) + +starpu_shutdown() + diff --git a/julia/examples/vector_scal/vector_scal.sh b/julia/examples/vector_scal/vector_scal.sh new file mode 100755 index 0000000..444c26a --- /dev/null +++ b/julia/examples/vector_scal/vector_scal.sh @@ -0,0 +1,20 @@ +#!/bin/bash +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +$(dirname $0)/../execute.sh vector_scal/vector_scal.jl +$(dirname $0)/../execute.sh -calllib vector_scal/cpu_vector_scal.c vector_scal/vector_scal.jl + diff --git a/julia/src/Makefile.am b/julia/src/Makefile.am new file mode 100644 index 0000000..867863f --- /dev/null +++ b/julia/src/Makefile.am @@ -0,0 +1,59 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +include $(top_srcdir)/make/starpu-notests.mk + +CLEANFILES = *.gcno *.gcda + +AM_CFLAGS += -fPIC +AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_builddir)/include -I$(top_builddir)/src -I$(top_srcdir)/src -I$(top_srcdir)/julia/src $(STARPU_H_CPPFLAGS) +LIBS += $(top_builddir)/src/@LIBSTARPU_LINK@ `@JULIA@ $(top_srcdir)/julia/src/openblas_ldflags.jl` $(STARPU_EXPORTED_LIBS) + +SUBDIRS = dynamic_compiler + +lib_LTLIBRARIES = libstarpujulia-@STARPU_EFFECTIVE_VERSION@.la + +noinst_HEADERS = + +libstarpujulia_@STARPU_EFFECTIVE_VERSION@_la_LDFLAGS = $(ldflags) -no-undefined \ + -version-info $(LIBSTARPUJULIA_INTERFACE_CURRENT):$(LIBSTARPUJULIA_INTERFACE_REVISION):$(LIBSTARPUJULIA_INTERFACE_AGE) + +libstarpujulia_@STARPU_EFFECTIVE_VERSION@_la_SOURCES = \ + callback_wrapper.c \ + blas_wrapper.c \ + blas.c + +EXTRA_DIST = blas.h \ + blas.jl \ + data.jl \ + destructible.jl \ + globals.jl \ + init.jl \ + linked_list.jl \ + perfmodel.jl \ + StarPU.jl \ + task_dep.jl \ + task.jl \ + translate_headers.jl \ + utils.jl \ + compiler/c.jl \ + compiler/cuda.jl \ + compiler/expression_manipulation.jl \ + compiler/expressions.jl \ + compiler/file_generation.jl \ + compiler/include.jl \ + compiler/parsing.jl \ + compiler/utils.jl diff --git a/julia/src/Makefile.in b/julia/src/Makefile.in new file mode 100644 index 0000000..7ea9414 --- /dev/null +++ b/julia/src/Makefile.in @@ -0,0 +1,1136 @@ +# Makefile.in generated by automake 1.16.5 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2021 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +target_triplet = @target@ +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@am__append_1 = --compiler-options -fno-strict-aliasing -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ $(STARPU_NVCC_H_CPPFLAGS) +@STARPU_USE_HIP_TRUE@am__append_2 = -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ +subdir = julia/src +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ + $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ + $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ + $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(noinst_HEADERS) \ + $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/src/common/config.h \ + $(top_builddir)/src/common/config-src-build.h \ + $(top_builddir)/include/starpu_config.h \ + $(top_builddir)/starpurm/include/starpurm_config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__installdirs = "$(DESTDIR)$(libdir)" +LTLIBRARIES = $(lib_LTLIBRARIES) +libstarpujulia_@STARPU_EFFECTIVE_VERSION@_la_LIBADD = +am_libstarpujulia_@STARPU_EFFECTIVE_VERSION@_la_OBJECTS = \ + callback_wrapper.lo blas_wrapper.lo blas.lo +libstarpujulia_@STARPU_EFFECTIVE_VERSION@_la_OBJECTS = \ + $(am_libstarpujulia_@STARPU_EFFECTIVE_VERSION@_la_OBJECTS) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +libstarpujulia_@STARPU_EFFECTIVE_VERSION@_la_LINK = $(LIBTOOL) \ + $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ + --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(libstarpujulia_@STARPU_EFFECTIVE_VERSION@_la_LDFLAGS) \ + $(LDFLAGS) -o $@ +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)/src/common -I$(top_builddir)/include -I$(top_builddir)/starpurm/include +depcomp = $(SHELL) $(top_srcdir)/build-aux/depcomp +am__maybe_remake_depfiles = depfiles +am__depfiles_remade = ./$(DEPDIR)/blas.Plo \ + ./$(DEPDIR)/blas_wrapper.Plo ./$(DEPDIR)/callback_wrapper.Plo +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(libstarpujulia_@STARPU_EFFECTIVE_VERSION@_la_SOURCES) +DIST_SOURCES = \ + $(libstarpujulia_@STARPU_EFFECTIVE_VERSION@_la_SOURCES) +RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \ + ctags-recursive dvi-recursive html-recursive info-recursive \ + install-data-recursive install-dvi-recursive \ + install-exec-recursive install-html-recursive \ + install-info-recursive install-pdf-recursive \ + install-ps-recursive install-recursive installcheck-recursive \ + installdirs-recursive pdf-recursive ps-recursive \ + tags-recursive uninstall-recursive +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +HEADERS = $(noinst_HEADERS) +RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ + distclean-recursive maintainer-clean-recursive +am__recursive_targets = \ + $(RECURSIVE_TARGETS) \ + $(RECURSIVE_CLEAN_TARGETS) \ + $(am__extra_recursive_targets) +AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \ + distdir distdir-am +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +DIST_SUBDIRS = $(SUBDIRS) +am__DIST_COMMON = $(srcdir)/Makefile.in \ + $(top_srcdir)/build-aux/depcomp \ + $(top_srcdir)/make/starpu-notests.mk \ + $(top_srcdir)/make/starpu.mk +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +am__relativize = \ + dir0=`pwd`; \ + sed_first='s,^\([^/]*\)/.*$$,\1,'; \ + sed_rest='s,^[^/]*/*,,'; \ + sed_last='s,^.*/\([^/]*\)$$,\1,'; \ + sed_butlast='s,/*[^/]*$$,,'; \ + while test -n "$$dir1"; do \ + first=`echo "$$dir1" | sed -e "$$sed_first"`; \ + if test "$$first" != "."; then \ + if test "$$first" = ".."; then \ + dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ + dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ + else \ + first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ + if test "$$first2" = "$$first"; then \ + dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ + else \ + dir2="../$$dir2"; \ + fi; \ + dir0="$$dir0"/"$$first"; \ + fi; \ + fi; \ + dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ + done; \ + reldir="$$dir2" +pkglibdir = @pkglibdir@ +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +APP_CFLAGS = @APP_CFLAGS@ +APP_CXXFLAGS = @APP_CXXFLAGS@ +APP_FCFLAGS = @APP_FCFLAGS@ +APP_FFLAGS = @APP_FFLAGS@ +AR = @AR@ +AS = @AS@ +ATLASDIR = @ATLASDIR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BLAS_LIB = @BLAS_LIB@ +BLAS_LIBS = @BLAS_LIBS@ +BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ +BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CC_OR_MPICC = @CC_OR_MPICC@ +CC_OR_NVCC = @CC_OR_NVCC@ +CFLAGS = @CFLAGS@ +COVERAGE = @COVERAGE@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CSCOPE = @CSCOPE@ +CTAGS = @CTAGS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DGELS_LIBS = @DGELS_LIBS@ +DLB_CFLAGS = @DLB_CFLAGS@ +DLB_LIBS = @DLB_LIBS@ +DLLTOOL = @DLLTOOL@ +DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +ECLIPSE = @ECLIPSE@ +EGREP = @EGREP@ +ETAGS = @ETAGS@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FC = @FC@ +FCFLAGS = @FCFLAGS@ +FFLAGS = @FFLAGS@ +FFTWF_CFLAGS = @FFTWF_CFLAGS@ +FFTWF_LIBS = @FFTWF_LIBS@ +FFTWL_CFLAGS = @FFTWL_CFLAGS@ +FFTWL_LIBS = @FFTWL_LIBS@ +FFTW_CFLAGS = @FFTW_CFLAGS@ +FFTW_LIBS = @FFTW_LIBS@ +FGREP = @FGREP@ +FILECMD = @FILECMD@ +FXTDIR = @FXTDIR@ +FXT_CFLAGS = @FXT_CFLAGS@ +FXT_LDFLAGS = @FXT_LDFLAGS@ +FXT_LIBS = @FXT_LIBS@ +GDB = @GDB@ +GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ +GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ +GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ +GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ +GOTODIR = @GOTODIR@ +GREP = @GREP@ +HAVE_CXX11 = @HAVE_CXX11@ +HAVE_FFTWFL = @HAVE_FFTWFL@ +HELP2MAN = @HELP2MAN@ +HIPCC = @HIPCC@ +HIPCCFLAGS = @HIPCCFLAGS@ $(am__append_2) +HIPCONFIG = @HIPCONFIG@ +HWLOC_CFLAGS = @HWLOC_CFLAGS@ +HWLOC_LIBS = @HWLOC_LIBS@ +HWLOC_REQUIRES = @HWLOC_REQUIRES@ +ICC = @ICC@ +ICC_ARGS = @ICC_ARGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JULIA = @JULIA@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ $(top_builddir)/src/@LIBSTARPU_LINK@ `@JULIA@ \ + $(top_srcdir)/julia/src/openblas_ldflags.jl` \ + $(STARPU_EXPORTED_LIBS) +LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ +LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ +LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ +LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ +LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ +LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ +LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ +LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ +LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ +LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ +LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ +LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ +LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ +LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ +LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ +LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ +LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ +LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ +LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ +LIBSTARPU_LINK = @LIBSTARPU_LINK@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAGMA_CFLAGS = @MAGMA_CFLAGS@ +MAGMA_LIBS = @MAGMA_LIBS@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPICC_LDFLAGS = @MPICC_LDFLAGS@ +MPICXX = @MPICXX@ +MPIEXEC = @MPIEXEC@ +MPIEXEC_ARGS = @MPIEXEC_ARGS@ +MPIFORT = @MPIFORT@ +MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ +MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ +NM = @NM@ +NMAD_CFLAGS = @NMAD_CFLAGS@ +NMAD_LIBS = @NMAD_LIBS@ +NMEDIT = @NMEDIT@ +NVCC = @NVCC@ +NVCCFLAGS = @NVCCFLAGS@ $(am__append_1) +NVCC_CC = @NVCC_CC@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ +OPENBLAS_LIBS = @OPENBLAS_LIBS@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PAPI_CFLAGS = @PAPI_CFLAGS@ +PAPI_LIBS = @PAPI_LIBS@ +PARALLEL = @PARALLEL@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PKG_CONFIG = @PKG_CONFIG@ +POTI_CFLAGS = @POTI_CFLAGS@ +POTI_LIBS = @POTI_LIBS@ +PROG_CLANG = @PROG_CLANG@ +PROG_DATE = @PROG_DATE@ +PROG_FIND = @PROG_FIND@ +PROG_STAT = @PROG_STAT@ +PYTHON = @PYTHON@ +PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ +PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ +PYTHON_VERSION = @PYTHON_VERSION@ +RANLIB = @RANLIB@ +REALBASH = @REALBASH@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ +SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ +SIMGRID_LIBS = @SIMGRID_LIBS@ +SIMGRID_MC = @SIMGRID_MC@ +SLIC_CONFIG = @SLIC_CONFIG@ +SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ +SOCL_VENDORS = @SOCL_VENDORS@ +STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ +STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ +STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ +STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ +STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ +STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ +STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ +STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ +STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ +STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ +STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ +STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ +STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ +STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ +STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ +STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ +STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ +STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ +STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ +STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ +STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ +STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ +STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ +STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ +STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ +STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ +STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ +STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ +STARPU_LIB_PATH = @STARPU_LIB_PATH@ +STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ +STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ +STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ +STARPU_MS_LIB = @STARPU_MS_LIB@ +STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ +STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ +STARPU_OPENBLAS = @STARPU_OPENBLAS@ +STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ +STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ +STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ +STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ +STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ +STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ +STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ +STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ +STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ +STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ +STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ +STARPU_SRC_DIR = @STARPU_SRC_DIR@ +STARPU_USE_CPU = @STARPU_USE_CPU@ +STARPU_USE_CUDA = @STARPU_USE_CUDA@ +STARPU_USE_FXT = @STARPU_USE_FXT@ +STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ +STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ +STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ +STRIP = @STRIP@ +VERSION = @VERSION@ +XMKMF = @XMKMF@ +X_CFLAGS = @X_CFLAGS@ +X_EXTRA_LIBS = @X_EXTRA_LIBS@ +X_LIBS = @X_LIBS@ +X_PRE_LIBS = @X_PRE_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_ct_F77 = @ac_ct_F77@ +ac_ct_FC = @ac_ct_FC@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +doxygencommand = @doxygencommand@ +dvidir = @dvidir@ +eclipsepath = @eclipsepath@ +epstopdfcommand = @epstopdfcommand@ +exec_prefix = @exec_prefix@ +gitcommand = @gitcommand@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +hwloccalccommand = @hwloccalccommand@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +juliapath = @juliapath@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +mpicc_path = @mpicc_path@ +mpicxx_path = @mpicxx_path@ +mpiexec_path = @mpiexec_path@ +mpifort_path = @mpifort_path@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +pdflatexcommand = @pdflatexcommand@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +runstatedir = @runstatedir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target = @target@ +target_alias = @target_alias@ +target_cpu = @target_cpu@ +target_os = @target_os@ +target_vendor = @target_vendor@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +AM_CFLAGS = $(GLOBAL_AM_CFLAGS) -fPIC +AM_CXXFLAGS = $(GLOBAL_AM_CXXFLAGS) +AM_FFLAGS = $(GLOBAL_AM_FFLAGS) +AM_FCFLAGS = $(GLOBAL_AM_FCFLAGS) +@STARPU_USE_CUDA_TRUE@V_nvcc_ = $(V_nvcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_USE_CUDA_TRUE@V_nvcc_0 = @echo " NVCC " $@; +@STARPU_USE_CUDA_TRUE@V_nvcc_1 = +@STARPU_USE_CUDA_TRUE@V_nvcc = $(V_nvcc_$(V)) + +# Avoid using nvcc when making a coverity build, nvcc produces millions of +# lines of code which we don't want to analyze. Instead, build dumb .o files +# containing empty functions. +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_ = $(V_mynvcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_0 = @echo " myNVCC " $@; +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_1 = +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc = $(V_mynvcc_$(V)) +@STARPU_USE_HIP_TRUE@V_hipcc_ = $(V_hipcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_USE_HIP_TRUE@V_hipcc_0 = @echo " HIPCC " $@; +@STARPU_USE_HIP_TRUE@V_hipcc_1 = +@STARPU_USE_HIP_TRUE@V_hipcc = $(V_hipcc_$(V)) +V_icc_ = $(V_icc_$(AM_DEFAULT_VERBOSITY)) +V_icc_0 = @echo " ICC " $@; +V_icc_1 = +V_icc = $(V_icc_$(V)) +V_ln_ = $(V_ln_$(AM_DEFAULT_VERBOSITY)) +V_ln_0 = @echo " LN " $@; +V_ln_1 = +V_ln = $(V_ln_$(V)) +V_help2man_ = $(V_help2man_$(AM_DEFAULT_VERBOSITY)) +V_help2man_0 = @echo " HELP2MAN" $@; +V_help2man_1 = +V_help2man = $(V_help2man_$(V)) +CLEANFILES = *.gcno *.gcda +AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_builddir)/include -I$(top_builddir)/src -I$(top_srcdir)/src -I$(top_srcdir)/julia/src $(STARPU_H_CPPFLAGS) +SUBDIRS = dynamic_compiler +lib_LTLIBRARIES = libstarpujulia-@STARPU_EFFECTIVE_VERSION@.la +noinst_HEADERS = +libstarpujulia_@STARPU_EFFECTIVE_VERSION@_la_LDFLAGS = $(ldflags) -no-undefined \ + -version-info $(LIBSTARPUJULIA_INTERFACE_CURRENT):$(LIBSTARPUJULIA_INTERFACE_REVISION):$(LIBSTARPUJULIA_INTERFACE_AGE) + +libstarpujulia_@STARPU_EFFECTIVE_VERSION@_la_SOURCES = \ + callback_wrapper.c \ + blas_wrapper.c \ + blas.c + +EXTRA_DIST = blas.h \ + blas.jl \ + data.jl \ + destructible.jl \ + globals.jl \ + init.jl \ + linked_list.jl \ + perfmodel.jl \ + StarPU.jl \ + task_dep.jl \ + task.jl \ + translate_headers.jl \ + utils.jl \ + compiler/c.jl \ + compiler/cuda.jl \ + compiler/expression_manipulation.jl \ + compiler/expressions.jl \ + compiler/file_generation.jl \ + compiler/include.jl \ + compiler/parsing.jl \ + compiler/utils.jl + +all: all-recursive + +.SUFFIXES: +.SUFFIXES: .c .cu .cubin .hip .lo .o .obj +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/make/starpu-notests.mk $(top_srcdir)/make/starpu.mk $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign julia/src/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign julia/src/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; +$(top_srcdir)/make/starpu-notests.mk $(top_srcdir)/make/starpu.mk $(am__empty): + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +install-libLTLIBRARIES: $(lib_LTLIBRARIES) + @$(NORMAL_INSTALL) + @list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \ + list2=; for p in $$list; do \ + if test -f $$p; then \ + list2="$$list2 $$p"; \ + else :; fi; \ + done; \ + test -z "$$list2" || { \ + echo " $(MKDIR_P) '$(DESTDIR)$(libdir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(libdir)" || exit 1; \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(libdir)'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(libdir)"; \ + } + +uninstall-libLTLIBRARIES: + @$(NORMAL_UNINSTALL) + @list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \ + for p in $$list; do \ + $(am__strip_dir) \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(libdir)/$$f'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(libdir)/$$f"; \ + done + +clean-libLTLIBRARIES: + -test -z "$(lib_LTLIBRARIES)" || rm -f $(lib_LTLIBRARIES) + @list='$(lib_LTLIBRARIES)'; \ + locs=`for p in $$list; do echo $$p; done | \ + sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ + sort -u`; \ + test -z "$$locs" || { \ + echo rm -f $${locs}; \ + rm -f $${locs}; \ + } + +libstarpujulia-@STARPU_EFFECTIVE_VERSION@.la: $(libstarpujulia_@STARPU_EFFECTIVE_VERSION@_la_OBJECTS) $(libstarpujulia_@STARPU_EFFECTIVE_VERSION@_la_DEPENDENCIES) $(EXTRA_libstarpujulia_@STARPU_EFFECTIVE_VERSION@_la_DEPENDENCIES) + $(AM_V_CCLD)$(libstarpujulia_@STARPU_EFFECTIVE_VERSION@_la_LINK) -rpath $(libdir) $(libstarpujulia_@STARPU_EFFECTIVE_VERSION@_la_OBJECTS) $(libstarpujulia_@STARPU_EFFECTIVE_VERSION@_la_LIBADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/blas.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/blas_wrapper.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/callback_wrapper.Plo@am__quote@ # am--include-marker + +$(am__depfiles_remade): + @$(MKDIR_P) $(@D) + @echo '# dummy' >$@-t && $(am__mv) $@-t $@ + +am--depfiles: $(am__depfiles_remade) + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ +@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +# This directory's subdirectories are mostly independent; you can cd +# into them and run 'make' without going through this Makefile. +# To change the values of 'make' variables: instead of editing Makefiles, +# (1) if the variable is set in 'config.status', edit 'config.status' +# (which will cause the Makefiles to be regenerated when you run 'make'); +# (2) otherwise, pass the desired values on the 'make' command line. +$(am__recursive_targets): + @fail=; \ + if $(am__make_keepgoing); then \ + failcom='fail=yes'; \ + else \ + failcom='exit 1'; \ + fi; \ + dot_seen=no; \ + target=`echo $@ | sed s/-recursive//`; \ + case "$@" in \ + distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ + *) list='$(SUBDIRS)' ;; \ + esac; \ + for subdir in $$list; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + dot_seen=yes; \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || eval $$failcom; \ + done; \ + if test "$$dot_seen" = "no"; then \ + $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ + fi; test -z "$$fail" + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-recursive +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ + include_option=--etags-include; \ + empty_fix=.; \ + else \ + include_option=--include; \ + empty_fix=; \ + fi; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + test ! -f $$subdir/TAGS || \ + set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ + fi; \ + done; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-recursive + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-recursive + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done + @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + $(am__make_dryrun) \ + || test -d "$(distdir)/$$subdir" \ + || $(MKDIR_P) "$(distdir)/$$subdir" \ + || exit 1; \ + dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ + $(am__relativize); \ + new_distdir=$$reldir; \ + dir1=$$subdir; dir2="$(top_distdir)"; \ + $(am__relativize); \ + new_top_distdir=$$reldir; \ + echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ + echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ + ($(am__cd) $$subdir && \ + $(MAKE) $(AM_MAKEFLAGS) \ + top_distdir="$$new_top_distdir" \ + distdir="$$new_distdir" \ + am__remove_distdir=: \ + am__skip_length_check=: \ + am__skip_mode_fix=: \ + distdir) \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-recursive +all-am: Makefile $(LTLIBRARIES) $(HEADERS) +installdirs: installdirs-recursive +installdirs-am: + for dir in "$(DESTDIR)$(libdir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-recursive +install-exec: install-exec-recursive +install-data: install-data-recursive +uninstall: uninstall-recursive + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-recursive +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-recursive + +clean-am: clean-generic clean-libLTLIBRARIES clean-libtool \ + mostlyclean-am + +distclean: distclean-recursive + -rm -f ./$(DEPDIR)/blas.Plo + -rm -f ./$(DEPDIR)/blas_wrapper.Plo + -rm -f ./$(DEPDIR)/callback_wrapper.Plo + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-recursive + +dvi-am: + +html: html-recursive + +html-am: + +info: info-recursive + +info-am: + +install-data-am: + +install-dvi: install-dvi-recursive + +install-dvi-am: + +install-exec-am: install-libLTLIBRARIES + +install-html: install-html-recursive + +install-html-am: + +install-info: install-info-recursive + +install-info-am: + +install-man: + +install-pdf: install-pdf-recursive + +install-pdf-am: + +install-ps: install-ps-recursive + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-recursive + -rm -f ./$(DEPDIR)/blas.Plo + -rm -f ./$(DEPDIR)/blas_wrapper.Plo + -rm -f ./$(DEPDIR)/callback_wrapper.Plo + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-recursive + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-recursive + +pdf-am: + +ps: ps-recursive + +ps-am: + +uninstall-am: uninstall-libLTLIBRARIES + +.MAKE: $(am__recursive_targets) install-am install-strip + +.PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am \ + am--depfiles check check-am clean clean-generic \ + clean-libLTLIBRARIES clean-libtool cscopelist-am ctags \ + ctags-am distclean distclean-compile distclean-generic \ + distclean-libtool distclean-tags distdir dvi dvi-am html \ + html-am info info-am install install-am install-data \ + install-data-am install-dvi install-dvi-am install-exec \ + install-exec-am install-html install-html-am install-info \ + install-info-am install-libLTLIBRARIES install-man install-pdf \ + install-pdf-am install-ps install-ps-am install-strip \ + installcheck installcheck-am installdirs installdirs-am \ + maintainer-clean maintainer-clean-generic mostlyclean \ + mostlyclean-compile mostlyclean-generic mostlyclean-libtool \ + pdf pdf-am ps ps-am tags tags-am uninstall uninstall-am \ + uninstall-libLTLIBRARIES + +.PRECIOUS: Makefile + +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@.cu.o: +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ @$(MKDIR_P) `dirname $@` +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ $(V_mynvcc)grep 'extern *"C" *void *' $< | sed -ne 's/extern *"C" *void *\([a-zA-Z0-9_]*\) *(.*/void \1(void) {}/p' | $(CC) -x c - -o $@ -c + +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.cubin: +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) -cubin $< -o $@ $(NVCCFLAGS) + +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.o: +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) $< -c -o $@ $(NVCCFLAGS) +@STARPU_USE_HIP_TRUE@.hip.o: +@STARPU_USE_HIP_TRUE@ $(V_hipcc) $(HIPCC) $< -c -o $@ $(HIPCCFLAGS) + +recheck: + -cat /dev/null + +showcheckfailed: + @-cat /dev/null + +showfailed: + @-cat /dev/null + +showcheck: + -cat /dev/null + +showsuite: + -cat /dev/null + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/julia/src/StarPU.jl b/julia/src/StarPU.jl new file mode 100644 index 0000000..8757b72 --- /dev/null +++ b/julia/src/StarPU.jl @@ -0,0 +1,120 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +""" +__precompile__() +""" +module StarPU +import Libdl +using CBinding + +include("utils.jl") + +const starpu_wrapper_library_name=fstarpu_task_library_name() + +include("translate_headers.jl") + +if !isfile(joinpath(fstarpu_build_dir(), "julia/gen/libstarpu_common.jl")) || !isfile(joinpath(fstarpu_build_dir(), "julia/gen/libstarpu_api.jl")) || + mtime(joinpath(@__FILE__, "translate_headers.jl")) > mtime(joinpath(fstarpu_build_dir(), "julia/gen/libstarpu_api.jl")) + starpu_translate_headers() +end + +include(joinpath(fstarpu_build_dir(), "julia/gen/libstarpu_common.jl")) +include(joinpath(fstarpu_build_dir(), "julia/gen/libstarpu_api.jl")) +include("globals.jl") + +include("compiler/include.jl") +include("linked_list.jl") +include("destructible.jl") +include("perfmodel.jl") +include("data.jl") +include("blas.jl") +include("task.jl") +include("task_dep.jl") +include("init.jl") + +# macro +export @starpu_filter +export @starpu_block +export @starpu_async_cl +export @starpu_sync_tasks + +# enum / define +export STARPU_CPU +export STARPU_CUDA +export STARPU_CUDA_ASYNC +export STARPU_OPENCL +export STARPU_MAIN_RAM +export StarpuDataFilterFunc +export STARPU_MATRIX_FILTER_VERTICAL_BLOCK, STARPU_MATRIX_FILTER_BLOCK +export STARPU_VECTOR_FILTER_BLOCK +export STARPU_PERFMODEL_INVALID, STARPU_PER_ARCH, STARPU_COMMON +export STARPU_HISTORY_BASED, STARPU_REGRESSION_BASED +export STARPU_NL_REGRESSION_BASED, STARPU_MULTIPLE_REGRESSION_BASED +export starpu_tag_t +export STARPU_NONE,STARPU_R,STARPU_W,STARPU_RW, STARPU_SCRATCH +export STARPU_MPI_REDUX, STARPU_REDUX,STARPU_COMMUTE, STARPU_SSEND, STARPU_LOCALITY +export STARPU_ACCESS_MODE_MAX + +# BLAS +export STARPU_SAXPY + +# functions +export starpu_cublas_init +export starpu_init +export starpu_shutdown +export starpu_memory_pin +export starpu_memory_unpin +export starpu_data_access_mode +export starpu_data_acquire_on_node +export starpu_data_release_on_node +export starpu_data_unregister +export starpu_data_register +export starpu_data_get_sub_data +export starpu_data_partition +export starpu_data_unpartition +export starpu_data_map_filters +export starpu_data_wont_use +export starpu_task_insert +export starpu_task_wait_for_all +export starpu_task_submit +export starpu_task_end_dep_add +export starpu_task_end_dep_release +export starpu_task_declare_deps +export starpu_task_declare_end_deps +export starpu_task_wait_for_n_submitted +export starpu_task_destroy +export starpu_tag_remove +export starpu_tag_wait +export starpu_tag_notify_from_apps +export starpu_iteration_pop +export starpu_iteration_push +export starpu_tag_declare_deps +export starpu_task +export starpu_task_wait +export starpu_codelet +export starpu_perfmodel +export starpu_perfmodel_type +export starpu_translate_headers +export starpu_data_get_default_sequential_consistency_flag +export starpu_data_set_default_sequential_consistency_flag +export starpu_data_get_sequential_consistency_flag +export starpu_data_set_sequential_consistency_flag +export starpu_worker_get_count +export starpu_cpu_worker_get_count +export starpu_cuda_worker_get_count +export starpu_opencl_worker_get_count + +end diff --git a/julia/src/blas.c b/julia/src/blas.c new file mode 100644 index 0000000..d2e3681 --- /dev/null +++ b/julia/src/blas.c @@ -0,0 +1,194 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include + +#include "blas.h" + +inline void STARPU_SGEMM(char *transa, char *transb, BLASINT M, BLASINT N, BLASINT K, + float alpha, const float *A, BLASINT lda, const float *B, BLASINT ldb, + float beta, float *C, BLASINT ldc) +{ + sgemm_64_(transa, transb, &M, &N, &K, &alpha, + A, &lda, B, &ldb, + &beta, C, &ldc); +} + +inline void STARPU_DGEMM(char *transa, char *transb, BLASINT M, BLASINT N, BLASINT K, + double alpha, double *A, BLASINT lda, double *B, BLASINT ldb, + double beta, double *C, BLASINT ldc) +{ + dgemm_64_(transa, transb, &M, &N, &K, &alpha, + A, &lda, B, &ldb, + &beta, C, &ldc); +} + + +inline void STARPU_SGEMV(char *transa, BLASINT M, BLASINT N, float alpha, float *A, BLASINT lda, + float *X, BLASINT incX, float beta, float *Y, BLASINT incY) +{ + sgemv_64_(transa, &M, &N, &alpha, A, &lda, X, &incX, &beta, Y, &incY); +} + +inline void STARPU_DGEMV(char *transa, BLASINT M, BLASINT N, double alpha, double *A, BLASINT lda, + double *X, BLASINT incX, double beta, double *Y, BLASINT incY) +{ + dgemv_64_(transa, &M, &N, &alpha, A, &lda, X, &incX, &beta, Y, &incY); +} + +inline float STARPU_SASUM(BLASINT N, float *X, BLASINT incX) +{ + return sasum_64_(&N, X, &incX); +} + +inline double STARPU_DASUM(BLASINT N, double *X, BLASINT incX) +{ + return dasum_64_(&N, X, &incX); +} + +void STARPU_SSCAL(BLASINT N, float alpha, float *X, BLASINT incX) +{ + sscal_64_(&N, &alpha, X, &incX); +} + +void STARPU_DSCAL(BLASINT N, double alpha, double *X, BLASINT incX) +{ + dscal_64_(&N, &alpha, X, &incX); +} + +void STARPU_STRSM (const char *side, const char *uplo, const char *transa, + const char *diag, const BLASINT m, const BLASINT n, + const float alpha, const float *A, const BLASINT lda, + float *B, const BLASINT ldb) +{ + strsm_64_(side, uplo, transa, diag, &m, &n, &alpha, A, &lda, B, &ldb); +} + +void STARPU_DTRSM (const char *side, const char *uplo, const char *transa, + const char *diag, const BLASINT m, const BLASINT n, + const double alpha, const double *A, const BLASINT lda, + double *B, const BLASINT ldb) +{ + dtrsm_64_(side, uplo, transa, diag, &m, &n, &alpha, A, &lda, B, &ldb); +} + +void STARPU_SSYR (const char *uplo, const BLASINT n, const float alpha, + const float *x, const BLASINT incx, float *A, const BLASINT lda) +{ + ssyr_64_(uplo, &n, &alpha, x, &incx, A, &lda); +} + +void STARPU_SSYRK (const char *uplo, const char *trans, const BLASINT n, + const BLASINT k, const float alpha, const float *A, + const BLASINT lda, const float beta, float *C, + const BLASINT ldc) +{ + ssyrk_64_(uplo, trans, &n, &k, &alpha, A, &lda, &beta, C, &ldc); +} + +void STARPU_SGER(const BLASINT m, const BLASINT n, const float alpha, + const float *x, const BLASINT incx, const float *y, + const BLASINT incy, float *A, const BLASINT lda) +{ + sger_64_(&m, &n, &alpha, x, &incx, y, &incy, A, &lda); +} + +void STARPU_DGER(const BLASINT m, const BLASINT n, const double alpha, + const double *x, const BLASINT incx, const double *y, + const BLASINT incy, double *A, const BLASINT lda) +{ + dger_64_(&m, &n, &alpha, x, &incx, y, &incy, A, &lda); +} + +void STARPU_STRSV (const char *uplo, const char *trans, const char *diag, + const BLASINT n, const float *A, const BLASINT lda, float *x, + const BLASINT incx) +{ + strsv_64_(uplo, trans, diag, &n, A, &lda, x, &incx); +} + +void STARPU_STRMM(const char *side, const char *uplo, const char *transA, + const char *diag, const BLASINT m, const BLASINT n, + const float alpha, const float *A, const BLASINT lda, + float *B, const BLASINT ldb) +{ + strmm_64_(side, uplo, transA, diag, &m, &n, &alpha, A, &lda, B, &ldb); +} + +void STARPU_DTRMM(const char *side, const char *uplo, const char *transA, + const char *diag, const BLASINT m, const BLASINT n, + const double alpha, const double *A, const BLASINT lda, + double *B, const BLASINT ldb) +{ + dtrmm_64_(side, uplo, transA, diag, &m, &n, &alpha, A, &lda, B, &ldb); +} + +void STARPU_STRMV(const char *uplo, const char *transA, const char *diag, + const BLASINT n, const float *A, const BLASINT lda, float *X, + const BLASINT incX) +{ + strmv_64_(uplo, transA, diag, &n, A, &lda, X, &incX); +} + +void STARPU_SAXPY(const BLASINT n, const float alpha, float *X, const BLASINT incX, float *Y, const BLASINT incY) +{ + saxpy_64_(&n, &alpha, X, &incX, Y, &incY); +} + +void STARPU_DAXPY(const BLASINT n, const double alpha, double *X, const BLASINT incX, double *Y, const BLASINT incY) +{ + daxpy_64_(&n, &alpha, X, &incX, Y, &incY); +} + +BLASINT STARPU_ISAMAX (const BLASINT n, float *X, const BLASINT incX) +{ + BLASINT retVal; + retVal = isamax_64_ (&n, X, &incX); + return retVal; +} + +BLASINT STARPU_IDAMAX (const BLASINT n, double *X, const BLASINT incX) +{ + BLASINT retVal; + retVal = idamax_64_ (&n, X, &incX); + return retVal; +} + +float STARPU_SDOT(const BLASINT n, const float *x, const BLASINT incx, const float *y, const BLASINT incy) +{ + float retVal = 0; + + /* GOTOBLAS will return a FLOATRET which is a double, not a float */ + retVal = (float)sdot_64_(&n, x, &incx, y, &incy); + + return retVal; +} + +double STARPU_DDOT(const BLASINT n, const double *x, const BLASINT incx, const double *y, const BLASINT incy) +{ + return ddot_64_(&n, x, &incx, y, &incy); +} + +void STARPU_SSWAP(const BLASINT n, float *X, const BLASINT incX, float *Y, const BLASINT incY) +{ + sswap_64_(&n, X, &incX, Y, &incY); +} + +void STARPU_DSWAP(const BLASINT n, double *X, const BLASINT incX, double *Y, const BLASINT incY) +{ + dswap_64_(&n, X, &incX, Y, &incY); +} diff --git a/julia/src/blas.h b/julia/src/blas.h new file mode 100644 index 0000000..f5aed28 --- /dev/null +++ b/julia/src/blas.h @@ -0,0 +1,148 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __BLAS_H__ +#define __BLAS_H__ + +#include + +#define BLASINT int64_t + +void STARPU_SGEMM(char *transa, char *transb, BLASINT M, BLASINT N, BLASINT K, float alpha, const float *A, BLASINT lda, + const float *B, BLASINT ldb, float beta, float *C, BLASINT ldc); +void STARPU_DGEMM(char *transa, char *transb, BLASINT M, BLASINT N, BLASINT K, double alpha, double *A, BLASINT lda, + double *B, BLASINT ldb, double beta, double *C, BLASINT ldc); +void STARPU_SGEMV(char *transa, BLASINT M, BLASINT N, float alpha, float *A, BLASINT lda, + float *X, BLASINT incX, float beta, float *Y, BLASINT incY); +void STARPU_DGEMV(char *transa, BLASINT M, BLASINT N, double alpha, double *A, BLASINT lda, + double *X, BLASINT incX, double beta, double *Y, BLASINT incY); +float STARPU_SASUM(BLASINT N, float *X, BLASINT incX); +double STARPU_DASUM(BLASINT N, double *X, BLASINT incX); +void STARPU_SSCAL(BLASINT N, float alpha, float *X, BLASINT incX); +void STARPU_DSCAL(BLASINT N, double alpha, double *X, BLASINT incX); +void STARPU_STRSM (const char *side, const char *uplo, const char *transa, + const char *diag, const BLASINT m, const BLASINT n, + const float alpha, const float *A, const BLASINT lda, + float *B, const BLASINT ldb); +void STARPU_DTRSM (const char *side, const char *uplo, const char *transa, + const char *diag, const BLASINT m, const BLASINT n, + const double alpha, const double *A, const BLASINT lda, + double *B, const BLASINT ldb); +void STARPU_SSYR (const char *uplo, const BLASINT n, const float alpha, + const float *x, const BLASINT incx, float *A, const BLASINT lda); +void STARPU_SSYRK (const char *uplo, const char *trans, const BLASINT n, + const BLASINT k, const float alpha, const float *A, + const BLASINT lda, const float beta, float *C, + const BLASINT ldc); +void STARPU_SGER (const BLASINT m, const BLASINT n, const float alpha, + const float *x, const BLASINT incx, const float *y, + const BLASINT incy, float *A, const BLASINT lda); +void STARPU_DGER(const BLASINT m, const BLASINT n, const double alpha, + const double *x, const BLASINT incx, const double *y, + const BLASINT incy, double *A, const BLASINT lda); +void STARPU_STRSV (const char *uplo, const char *trans, const char *diag, + const BLASINT n, const float *A, const BLASINT lda, float *x, + const BLASINT incx); +void STARPU_STRMM(const char *side, const char *uplo, const char *transA, + const char *diag, const BLASINT m, const BLASINT n, + const float alpha, const float *A, const BLASINT lda, + float *B, const BLASINT ldb); +void STARPU_DTRMM(const char *side, const char *uplo, const char *transA, + const char *diag, const BLASINT m, const BLASINT n, + const double alpha, const double *A, const BLASINT lda, + double *B, const BLASINT ldb); +void STARPU_STRMV(const char *uplo, const char *transA, const char *diag, + const BLASINT n, const float *A, const BLASINT lda, float *X, + const BLASINT incX); +void STARPU_SAXPY(const BLASINT n, const float alpha, float *X, const BLASINT incX, float *Y, const BLASINT incy); +void STARPU_DAXPY(const BLASINT n, const double alpha, double *X, const BLASINT incX, double *Y, const BLASINT incY); +BLASINT STARPU_ISAMAX (const BLASINT n, float *X, const BLASINT incX); +BLASINT STARPU_IDAMAX (const BLASINT n, double *X, const BLASINT incX); +float STARPU_SDOT(const BLASINT n, const float *x, const BLASINT incx, const float *y, const BLASINT incy); +double STARPU_DDOT(const BLASINT n, const double *x, const BLASINT incx, const double *y, const BLASINT incy); +void STARPU_SSWAP(const BLASINT n, float *x, const BLASINT incx, float *y, const BLASINT incy); +void STARPU_DSWAP(const BLASINT n, double *x, const BLASINT incx, double *y, const BLASINT incy); + + +extern void sgemm_64_ (const char *transa, const char *transb, const BLASINT *m, + const BLASINT *n, const BLASINT *k, const float *alpha, + const float *A, const BLASINT *lda, const float *B, + const BLASINT *ldb, const float *beta, float *C, + const BLASINT *ldc); +extern void dgemm_64_ (const char *transa, const char *transb, const BLASINT *m, + const BLASINT *n, const BLASINT *k, const double *alpha, + const double *A, const BLASINT *lda, const double *B, + const BLASINT *ldb, const double *beta, double *C, + const BLASINT *ldc); +extern void sgemv_64_(const char *trans, const BLASINT *m, const BLASINT *n, const float *alpha, + const float *a, const BLASINT *lda, const float *x, const BLASINT *incx, + const float *beta, float *y, const BLASINT *incy); +extern void dgemv_64_(const char *trans, const BLASINT *m, const BLASINT *n, const double *alpha, + const double *a, const BLASINT *lda, const double *x, const BLASINT *incx, + const double *beta, double *y, const BLASINT *incy); +extern void ssyr_64_ (const char *uplo, const BLASINT *n, const float *alpha, + const float *x, const BLASINT *incx, float *A, const BLASINT *lda); +extern void ssyrk_64_ (const char *uplo, const char *trans, const BLASINT *n, + const BLASINT *k, const float *alpha, const float *A, + const BLASINT *lda, const float *beta, float *C, + const BLASINT *ldc); +extern void strsm_64_ (const char *side, const char *uplo, const char *transa, + const char *diag, const BLASINT *m, const BLASINT *n, + const float *alpha, const float *A, const BLASINT *lda, + float *B, const BLASINT *ldb); +extern void dtrsm_64_ (const char *side, const char *uplo, const char *transa, + const char *diag, const BLASINT *m, const BLASINT *n, + const double *alpha, const double *A, const BLASINT *lda, + double *B, const BLASINT *ldb); +extern double sasum_64_ (const BLASINT *n, const float *x, const BLASINT *incx); +extern double dasum_64_ (const BLASINT *n, const double *x, const BLASINT *incx); +extern void sscal_64_ (const BLASINT *n, const float *alpha, float *x, + const BLASINT *incx); +extern void dscal_64_ (const BLASINT *n, const double *alpha, double *x, + const BLASINT *incx); +extern void sger_64_(const BLASINT *m, const BLASINT *n, const float *alpha, + const float *x, const BLASINT *incx, const float *y, + const BLASINT *incy, float *A, const BLASINT *lda); +extern void dger_64_(const BLASINT *m, const BLASINT *n, const double *alpha, + const double *x, const BLASINT *incx, const double *y, + const BLASINT *incy, double *A, const BLASINT *lda); +extern void strsv_64_ (const char *uplo, const char *trans, const char *diag, + const BLASINT *n, const float *A, const BLASINT *lda, float *x, + const BLASINT *incx); +extern void strmm_64_(const char *side, const char *uplo, const char *transA, + const char *diag, const BLASINT *m, const BLASINT *n, + const float *alpha, const float *A, const BLASINT *lda, + float *B, const BLASINT *ldb); +extern void dtrmm_64_(const char *side, const char *uplo, const char *transA, + const char *diag, const BLASINT *m, const BLASINT *n, + const double *alpha, const double *A, const BLASINT *lda, + double *B, const BLASINT *ldb); +extern void strmv_64_(const char *uplo, const char *transA, const char *diag, + const BLASINT *n, const float *A, const BLASINT *lda, float *X, + const BLASINT *incX); +extern void saxpy_64_(const BLASINT *n, const float *alpha, const float *X, const BLASINT *incX, + float *Y, const BLASINT *incy); +extern void daxpy_64_(const BLASINT *n, const double *alpha, const double *X, const BLASINT *incX, + double *Y, const BLASINT *incy); +extern BLASINT isamax_64_(const BLASINT *n, const float *X, const BLASINT *incX); +extern BLASINT idamax_64_(const BLASINT *n, const double *X, const BLASINT *incX); +/* for some reason, FLOATRET is not a float but a double in GOTOBLAS */ +extern double sdot_64_(const BLASINT *n, const float *x, const BLASINT *incx, const float *y, const BLASINT *incy); +extern double ddot_64_(const BLASINT *n, const double *x, const BLASINT *incx, const double *y, const BLASINT *incy); +extern void sswap_64_(const BLASINT *n, float *x, const BLASINT *incx, float *y, const BLASINT *incy); +extern void dswap_64_(const BLASINT *n, double *x, const BLASINT *incx, double *y, const BLASINT *incy); + +#endif /* __BLAS_H__ */ diff --git a/julia/src/blas.jl b/julia/src/blas.jl new file mode 100644 index 0000000..cacc849 --- /dev/null +++ b/julia/src/blas.jl @@ -0,0 +1,21 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +@enum STARPU_BLAS begin + STARPU_SAXPY +end + +cuda_blas_codelets = Dict(STARPU_SAXPY => "julia_saxpy_cuda_codelet") +cpu_blas_codelets = Dict(STARPU_SAXPY => "julia_saxpy_cpu_codelet") diff --git a/julia/src/blas_wrapper.c b/julia/src/blas_wrapper.c new file mode 100644 index 0000000..19a052d --- /dev/null +++ b/julia/src/blas_wrapper.c @@ -0,0 +1,50 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ +#include +#include + +#if defined(STARPU_ATLAS) || defined(STARPU_OPENBLAS) || defined(STARPU_MKL) +void julia_saxpy_cpu_codelet(void *descr[], void *arg) +{ + float alpha = *((float *)arg); + + unsigned n = STARPU_VECTOR_GET_NX(descr[0]); + + float *block_x = (float *)STARPU_VECTOR_GET_PTR(descr[0]); + float *block_y = (float *)STARPU_VECTOR_GET_PTR(descr[1]); + + STARPU_SAXPY((int)n, alpha, block_x, 1, block_y, 1); +} +#endif + +#ifdef STARPU_USE_CUDA + +#include + +void julia_saxpy_cuda_codelet(void *descr[], void *arg) +{ + float alpha = *((float *)arg); + + unsigned n = STARPU_VECTOR_GET_NX(descr[0]); + + float *block_x = (float *)STARPU_VECTOR_GET_PTR(descr[0]); + float *block_y = (float *)STARPU_VECTOR_GET_PTR(descr[1]); + + cublasStatus_t status = cublasSaxpy(starpu_cublas_get_local_handle(), (int)n, &alpha, block_x, 1, block_y, 1); + if (status != CUBLAS_STATUS_SUCCESS) + STARPU_CUBLAS_REPORT_ERROR(status); +} +#endif diff --git a/julia/src/callback_wrapper.c b/julia/src/callback_wrapper.c new file mode 100644 index 0000000..4615c35 --- /dev/null +++ b/julia/src/callback_wrapper.c @@ -0,0 +1,39 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ +#include + +void *dummy_function_list[] = { + starpu_matrix_filter_vertical_block, + starpu_matrix_filter_block, + starpu_vector_filter_block, + starpu_init, +}; + +void julia_callback_func(void *user_data) +{ + volatile int *signal = (int *) user_data; + + // wakeup callback + *(signal) = 1; + + // Wait for callback to end. + while ((*signal) != 0); +} + +void julia_wait_signal(volatile int *signal) +{ + while ((*signal) == 0); +} diff --git a/julia/src/compiler/c.jl b/julia/src/compiler/c.jl new file mode 100644 index 0000000..f1dfa4f --- /dev/null +++ b/julia/src/compiler/c.jl @@ -0,0 +1,313 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +""" + Returns the list of instruction that will be added before for loop of shape + "for for_index_var in set ..." +""" +function interval_evaluation_declarations(set :: StarpuExprInterval, for_index_var :: Symbol) + + decl_pattern = @parse € :: Int64 + affect_pattern = @parse € :: Int64 = € + interv_size_affect_pattern = @parse € :: Int64 = jlstarpu_interval_size(€, €, €) + + id = set.id + + start_var = starpu_parse(Symbol(:start_, id)) + start_decl = replace_pattern(affect_pattern, start_var, set.start) + + index_var = starpu_parse(for_index_var) + index_decl = replace_pattern(decl_pattern, index_var) + + if isa(set.step, StarpuExprValue) + + stop_var = starpu_parse(Symbol(:stop_, id)) + stop_decl = replace_pattern(affect_pattern, stop_var, set.stop) + + return StarpuExpr[start_decl, stop_decl, index_decl] + end + + step_var = starpu_parse(Symbol(:step_, id)) + step_decl = replace_pattern(affect_pattern, step_var, set.step) + + dim_var = starpu_parse(Symbol(:dim_, id)) + dim_decl = replace_pattern(interv_size_affect_pattern, dim_var, start_var, step_var, set.stop) + + iter_var = starpu_parse(Symbol(:iter_, id)) + iter_decl = replace_pattern(decl_pattern, iter_var) + + + return StarpuExpr[start_decl, step_decl, dim_decl, iter_decl, index_decl] +end + + +function add_for_loop_declarations(expr :: StarpuExpr) + + function func_to_apply(x :: StarpuExpr) + + if !isa(x, StarpuExprFor) + return x + end + + interval_decl = interval_evaluation_declarations(x.set, x.iter) + + return StarpuExprFor(x.iter, x.set, x.body, x.is_independant, interval_decl) + end + + return apply(func_to_apply, expr) +end + +function transform_to_cpu_kernel(expr :: StarpuExprFunction) + output = add_for_loop_declarations(expr) + output = substitute_args(output) + output = substitute_func_calls(output) + output = substitute_views(output) + output = substitute_indexing(output) + output = flatten_blocks(output) + + return output +end + +function generate_c_struct_param_declaration(codelet_name) + scalar_parameters = CODELETS_SCALARS[codelet_name] + struct_params_name = CODELETS_PARAMS_STRUCT[codelet_name] + + output = "struct $struct_params_name {\n" + for p in scalar_parameters + arg_name = p[1] + arg_type = p[2] + output *= "\t" * starpu_type_traduction(arg_type) * " $arg_name;\n" + end + output *= "};\n\n" + + return output +end + +function flatten_blocks(expr :: StarpuExpr) + + function func_to_run(x :: StarpuExpr) + + if !isa(x, StarpuExprBlock) + return x + end + + instrs = StarpuExpr[] + + for sub_expr in x.exprs + + if isa(sub_expr, StarpuExprBlock) + push!(instrs, sub_expr.exprs...) + else + push!(instrs, sub_expr) + end + end + + return StarpuExprBlock(instrs) + end + + return apply(func_to_run, expr) +end + + +function substitute_argument_usage(expr :: StarpuExpr, arg_index, buffer_name :: Symbol, arg_name :: Symbol, ptr_name :: Symbol) + function func_to_apply(x :: StarpuExpr) + + if x == StarpuExprVar(arg_name) + return StarpuExprVar(ptr_name) + end + + if !(isa(x, StarpuExprCall) && x.func in keys(func_substitution)) + return x + end + + if (length(x.args) != 1) + error("Invalid arity for function $(x.func)") + end + + if (x.args[1] != StarpuExprVar(ptr_name)) + return x + end + + new_func = func_substitution[x.func] + new_arg = starpu_parse(:($buffer_name[$arg_index])) + + return StarpuExprCall(new_func, [new_arg]) + end + + return apply(func_to_apply, expr) +end + + + +function substitute_args(expr :: StarpuExprFunction) + new_body = expr.body + func_id = rand_string() + buffer_arg_name = Symbol("buffers_", func_id) + cl_arg_name = Symbol("cl_arg_", func_id) + function_start_affectations = StarpuExpr[] + + buffer_id = 1 + scalar_id = 1 + + # get scalar parameters and structure name + scalar_parameters = CODELETS_SCALARS[string(expr.func)] + struct_params_name = CODELETS_PARAMS_STRUCT[string(expr.func)] + + for i in (1 : length(expr.args)) + + var_id = rand_string() + ptr = Symbol(:ptr_, var_id) + var_name = ptr + + if (expr.args[i].typ <: Vector) + func_interface = :STARPU_VECTOR_GET_PTR + type_in_arg = eltype(expr.args[i].typ) + new_affect = starpu_parse( :($ptr :: Ptr{$type_in_arg} = $func_interface($buffer_arg_name[$buffer_id])) ) + push!(function_start_affectations, new_affect) + new_body = substitute_argument_usage(new_body, buffer_id, buffer_arg_name, expr.args[i].name, var_name) + buffer_id += 1 + elseif (expr.args[i].typ <: Matrix) + func_interface = :STARPU_MATRIX_GET_PTR + ld_name = Symbol("ld_", var_id) + post_affect = starpu_parse( :($ld_name :: UInt32 = STARPU_MATRIX_GET_LD($buffer_arg_name[$buffer_id])) ) + type_in_arg = eltype(expr.args[i].typ) + new_affect = starpu_parse( :($ptr :: Ptr{$type_in_arg} = $func_interface($buffer_arg_name[$buffer_id])) ) + push!(function_start_affectations, new_affect) + push!(function_start_affectations, post_affect) + new_body = substitute_argument_usage(new_body, buffer_id, buffer_arg_name, expr.args[i].name, var_name) + buffer_id += 1 + elseif (expr.args[i].typ <: Ref) + func_interface = :STARPU_VARIABLE_GET_PTR + type_in_arg = eltype(expr.args[i].typ) + new_affect = starpu_parse( :($ptr :: Ptr{$type_in_arg} = $func_interface($buffer_arg_name[$buffer_id])) ) + push!(function_start_affectations, new_affect) + new_body = substitute_argument_usage(new_body, buffer_id, buffer_arg_name, expr.args[i].name, Symbol("(*$var_name)")) + buffer_id += 1 + elseif (expr.args[i].typ <: Number || expr.args[i].typ <: AbstractChar) + type_in_arg = eltype(expr.args[i].typ) + field_name = scalar_parameters[scalar_id][1] + var_name = field_name + post_affect = starpu_parse( :($var_name :: $type_in_arg = *($ptr).$field_name)) + new_affect = starpu_parse( :($ptr :: Ptr{$struct_params_name} = $cl_arg_name)) + push!(function_start_affectations, new_affect) + push!(function_start_affectations, post_affect) + scalar_id += 1 + else + error("Task arguments must be either matrix, vector, ref or scalar (got $(expr.args[i].typ))") + end + + + end + + + new_args = [ + starpu_parse(:($buffer_arg_name :: Ptr{Ptr{Nothing}})), + starpu_parse(:($cl_arg_name :: Vector{Nothing})) + ] + new_body = StarpuExprBlock([function_start_affectations..., new_body.exprs...]) + + return StarpuExprFunction(expr.ret_type, expr.func, new_args, new_body) +end + +func_substitution = Dict( + :width => :STARPU_MATRIX_GET_NY, + :height => :STARPU_MATRIX_GET_NX, + :ld => :STARPU_MATRIX_GET_LD, + :length => :STARPU_VECTOR_GET_NX +) + + + +function substitute_func_calls(expr :: StarpuExpr) + + function func_to_apply(x :: StarpuExpr) + + if !isa(x, StarpuExprCall) || !(x.func in keys(func_substitution)) + return x + end + + return StarpuExprCall(func_substitution[x.func], x.args) + end + + return apply(func_to_apply, expr) +end + +function substitute_views(expr :: StarpuExpr) + function func_to_apply(x :: StarpuExpr) + + if !isa(x, StarpuExprCall) || x.func != :view + return x + end + + ref = x.args[1] + indexes = map(i -> isa(i, StarpuExprInterval) ? i.start : i, x.args[2:end]) + + return StarpuExprAddress(StarpuExprRef(ref, indexes)) + end + + return apply(func_to_apply, expr) + +end + +function substitute_indexing(expr :: StarpuExpr) + + function func_to_run(x :: StarpuExpr) + + if !isa(x, StarpuExprRef) + return x + end + + #if !isa(x.ref, StarpuExprVar) + # error("Only variable indexing is allowed") #TODO allow more ? + #end + + + nb_indexes = length(x.indexes) + + if (nb_indexes >= 3) + error("Indexing with more than 2 indexes is not allowed") # TODO : blocks + end + + if (nb_indexes == 0) + return x + + elseif nb_indexes == 1 + new_index = StarpuExprCall(:-, [x.indexes[1], StarpuExprValue(1)]) #TODO : add field "offset" from STARPU_VECTOR_GET interface + #TODO : detect when it is a matrix used with one index only + return StarpuExprRef(x.ref, [new_index]) + + elseif nb_indexes == 2 + + var_name = String(x.ref.name) + + if !occursin(r"ptr_", var_name) || isempty(var_name[5:end]) + error("Invalid variable ($var_name) for multiple index dereferencing") + end + + var_id = var_name[5:end] + ld_name = Symbol("ld_", var_id) # TODO : check if this variable is legit (var_name must refer to a matrix) + + new_index = x.indexes[2] + new_index = StarpuExprCall(:(-), [new_index, StarpuExprValue(1)]) + new_index = StarpuExprCall(:(*), [new_index, StarpuExprVar(ld_name)]) + new_index = StarpuExprCall(:(+), [x.indexes[1], new_index]) + new_index = StarpuExprCall(:(-), [new_index, StarpuExprValue(1)]) + + return StarpuExprRef(x.ref, [new_index]) + end + end + + return apply(func_to_run, expr) +end diff --git a/julia/src/compiler/cuda.jl b/julia/src/compiler/cuda.jl new file mode 100644 index 0000000..8847ef9 --- /dev/null +++ b/julia/src/compiler/cuda.jl @@ -0,0 +1,640 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + + +function is_indep_for_expr(x :: StarpuExpr) + return isa(x, StarpuExprFor) && x.is_independant +end + + +function extract_init_indep_finish(expr :: StarpuExpr) # TODO : it is not a correct extraction (example : if (cond) {@indep for ...} else {return} would not work) + # better use apply() (NOTE :assert_no_indep_for already exists) to find recursively every for loops + init = StarpuExpr[] + finish = StarpuExpr[] + + if is_indep_for_expr(expr) + return init, StarpuIndepFor(expr), finish + end + + if !isa(expr, StarpuExprBlock) + return [expr], nothing, finish + end + + for i in (1 : length(expr.exprs)) + + if !is_indep_for_expr(expr.exprs[i]) + continue + end + + init = expr.exprs[1 : i-1] + indep = StarpuIndepFor(expr.exprs[i]) + finish = expr.exprs[i+1 : end] + + if any(is_indep_for_expr, finish) + error("Sequence of several independant loops is not allowed") #same it may be tricked by a Block(Indep_for(...)) + end + + return init, indep, finish + end + + return expr.exprs, nothing, finish +end + + + + +function analyse_variable_declarations(expr :: StarpuExpr, already_defined :: Vector{StarpuExprTypedVar} = StarpuExprTypedVar[]) + + undefined_variables = Symbol[] + defined_variable_names = map((x -> x.name), already_defined) + defined_variable_types = map((x -> x.typ), already_defined) + + function func_to_apply(x :: StarpuExpr) + + if isa(x, StarpuExprFunction) + error("No function declaration allowed in this section") + end + + if isa(x, StarpuExprVar) || isa(x, StarpuExprTypedVar) + + if !(x.name in defined_variable_names) && !(x.name in undefined_variables) + push!(undefined_variables, x.name) + end + + return x + end + + if isa(x, StarpuExprAffect) || isa(x, StarpuExprFor) + + if isa(x, StarpuExprAffect) + + var = x.var + + if !isa(var, StarpuExprTypedVar) + return x + end + + name = var.name + typ = var.typ + + else + name = x.iter + typ = Int64 + end + + if name in defined_variable_names + error("Multiple definition of variable $name") + end + + filter!((sym -> sym != name), undefined_variables) + push!(defined_variable_names, name) + push!(defined_variable_types, typ) + + return x + end + + return x + end + + apply(func_to_apply, expr) + defined_variable = map(StarpuExprTypedVar, defined_variable_names, defined_variable_types) + + return defined_variable, undefined_variables +end + + + +function find_variable(name :: Symbol, vars :: Vector{StarpuExprTypedVar}) + + for x in vars + if x.name == name + return x + end + end + + return nothing +end + + + +function add_device_to_interval_call(expr :: StarpuExpr) + + function func_to_apply(x :: StarpuExpr) + + if isa(x, StarpuExprCall) && x.func == :jlstarpu_interval_size + return StarpuExprCall(:jlstarpu_interval_size__device, x.args) + end + + return x + end + + return apply(func_to_apply, expr) +end + +function translate_cublas(expr :: StarpuExpr) + function func_to_run(x :: StarpuExpr) + # STARPU_BLAS => (CUBLAS, TRANS, FILLMODE, ALPHA, SIDE, DIAG) + blas_to_cublas = Dict(:STARPU_SGEMM => (:cublasSgemm, [1, 2], [], [6, 11], [], []), + :STARPU_DGEMM => (:cublasDgemm, [1, 2], [], [6, 11], [], []), + :STARPU_SGEMV => (:cublasSgemv, [1], [], [4,9], [], []), + :STARPU_DGEMV => (:cublasDgemv, [1], [], [4,9], [], []), + :STARPU_SSCAL => (:cublasSscal, [], [], [2], [], []), + :STARPU_DSCAL => (:cublasDscal, [], [], [2], [], []), + :STARPU_STRSM => (:cublasStrsm, [3], [2], [7], [1], [4]), + :STARPU_DTRSM => (:cublasDtrsm, [3], [2], [7], [1], [4]), + :STARPU_SSYR => (:cublasSsyr, [], [1], [3], [], []), + :STARPU_SSYRK => (:cublasSsyrk, [2], [1], [5,8], [], []), + :STARPU_SGER => (:cublasSger, [], [], [3], [], []), + :STARPU_DGER => (:cublasDger, [], [], [3], [], []), + :STARPU_STRSV => (:cublasStrsv, [2], [1], [], [], [3]), + :STARPU_STRMM => (:cublasStrmm, [3], [2], [7], [1], [4]), + :STARPU_DTRMM => (:cublasDtrmm, [3], [2], [7], [1], [4]), + :STARPU_STRMV => (:cublasStrmv, [2], [1], [], [], [3]), + :STARPU_SAXPY => (:cublasSaxpy, [], [], [2], [], []), + :STARPU_DAXPY => (:cublasDaxpy, [], [], [2], [], []), + :STARPU_SSWAP => (:cublasSswap, [], [], [], [], []), + :STARPU_DSWAP => (:cublasDswap, [], [], [], [], [])) + + if !(isa(x, StarpuExprCall) && x.func in keys(blas_to_cublas)) + return x + end + + new_args = x.args + + # cublasOperation_t parameters (e.g. StarpuExprValue("N") + for i in blas_to_cublas[x.func][2] + if !isa(new_args[i], StarpuExprValue) || !isa(new_args[i].value, String) + error("Argument $i of ", x.func, " must be a string") + end + + value = new_args[i].value + + if value == "N" || value == "n" + new_args[i] = StarpuExprVar(:CUBLAS_OP_N) + elseif value == "T" || value == "t" + new_args[i] = StarpuExprVar(:CUBLAS_OP_T) + elseif value == "C" || value == "c" + new_args[i] = StarpuExprVar(:CUBLAS_OP_C) + else + error("Unhandled value for rgument $i of ", x.func, ": ", value, + "expecting (\"N\", \"T\", or \"C\")") + end + end + + # cublasFillMode_t parameters (e.g. StarpuExprValue("L") + for i in blas_to_cublas[x.func][3] + if !isa(new_args[i], StarpuExprValue) || !isa(new_args[i].value, String) + error("Argument $i of ", x.func, " must be a string") + end + + value = new_args[i].value + + if value == "L" || value == "l" + new_args[i] = StarpuExprVar(:CUBLAS_FILL_MODE_LOWER) + elseif value == "U" || value == "u" + new_args[i] = StarpuExprVar(:CUBLAS_FILL_MODE_UPPER) + else + error("Unhandled value for rgument $i of ", x.func, ": ", value, + "expecting (\"L\" or \"U\")") + end + end + + # scalar parameters (alpha, beta, ...): alpha -> &alpha + for i in blas_to_cublas[x.func][4] + if !isa(new_args[i], StarpuExprVar) + error("Argument $i of ", x.func, " must be a variable") + end + var_name = new_args[i].name + new_args[i] = StarpuExprVar(Symbol("&$var_name")) + end + + # cublasSideMode_t parameters (e.g. StarpuExprValue("L") + for i in blas_to_cublas[x.func][5] + if !isa(new_args[i], StarpuExprValue) || !isa(new_args[i].value, String) + error("Argument $i of ", x.func, " must be a string, got: ", new_args[i]) + end + + value = new_args[i].value + + if value == "L" || value == "l" + new_args[i] = StarpuExprVar(:CUBLAS_SIDE_LEFT) + elseif value == "R" || value == "r" + new_args[i] = StarpuExprVar(:CUBLAS_SIDE_RIGHT) + else + error("Unhandled value for rgument $i of ", x.func, ": ", value, + "expecting (\"L\" or \"R\")") + end + end + + # cublasDiag_Typet parameters (e.g. StarpuExprValue("N") + for i in blas_to_cublas[x.func][6] + if !isa(new_args[i], StarpuExprValue) || !isa(new_args[i].value, String) + error("Argument $i of ", x.func, " must be a string") + end + + value = new_args[i].value + + if value == "N" || value == "n" + new_args[i] = StarpuExprVar(:CUBLAS_DIAG_NON_UNIT) + elseif value == "U" || value == "u" + new_args[i] = StarpuExprVar(:CUBLAS_DIAG_UNIT) + else + error("Unhandled value for rgument $i of ", x.func, ": ", value, + "expecting (\"N\" or \"U\")") + end + end + + new_args = [@parse(starpu_cublas_get_local_handle()), x.args...] + + status_varname = "status"*rand_string() + status_var = StarpuExprVar(Symbol("cublasStatus_t "*status_varname)) + call_expr = StarpuExprCall(blas_to_cublas[x.func][1], new_args) + + return StarpuExprBlock([StarpuExprAffect(status_var, call_expr), + starpu_parse(Meta.parse("""if $status_varname != CUBLAS_STATUS_SUCCESS + STARPU_CUBLAS_REPORT_ERROR($status_varname) + end""")), + @parse cudaStreamSynchronize(starpu_cuda_get_local_stream())]) + end + + return apply(func_to_run, expr) +end + +function get_all_assignments(cpu_instr) + ret = StarpuExpr[] + + function func_to_run(x :: StarpuExpr) + if isa(x, StarpuExprAffect) + push!(ret, x) + end + + return x + end + + apply(func_to_run, cpu_instr) + return ret +end + +function get_all_buffer_vars(cpu_instr) + ret = StarpuExprTypedVar[] + assignments = get_all_assignments(cpu_instr) + for x in assignments + var = x.var + expr = x.expr + if isa(expr, StarpuExprCall) && expr.func in [:STARPU_MATRIX_GET_PTR, :STARPU_VECTOR_GET_PTR] + push!(ret, var) + end + end + + return ret +end + +function get_all_buffer_stores(cpu_instr, vars) + ret = StarpuExprAffect[] + + function func_to_run(x :: StarpuExpr) + if isa(x, StarpuExprAffect) && isa(x.var, StarpuExprRef) && isa(x.var.ref, StarpuExprVar) && + x.var.ref.name in map(x -> x.name, vars) + push!(ret, x) + end + + return x + end + + apply(func_to_run, cpu_instr) + return ret +end + +function get_all_buffer_refs(cpu_instr, vars) + ret = [] + + current_instr = nothing + InstrTy = Union{StarpuExprAffect, + StarpuExprCall, + StarpuExprCudaCall, + StarpuExprFor, + StarpuExprIf, + StarpuExprIfElse, + StarpuExprReturn, + StarpuExprBreak, + StarpuExprWhile} + parent = nothing + + function func_to_run(x :: StarpuExpr) + if isa(x, InstrTy) && !(isa(x, StarpuExprCall) && x.func in [:(+), :(-), :(*), :(/), :(%), :(<), :(<=), :(==), :(!=), :(>=), :(>), :sqrt]) + current_instr = x + end + + if isa(x, StarpuExprRef) && isa(x.ref, StarpuExprVar) && x.ref.name in map(x -> x.name, vars) && # var[...] + !isa(parent, StarpuExprAddress) && # filter &var[..] + !(isa(current_instr, StarpuExprAffect) && current_instr.var == x) # filter lhs ref + push!(ret, (current_instr, x)) + end + + parent = x + return x + end + + visit_preorder(func_to_run, cpu_instr) + return ret +end + +function transform_cuda_device_loadstore(cpu_instr :: StarpuExprBlock) + # Get all CUDA buffer pointers + buffer_vars = get_all_buffer_vars(cpu_instr) + + buffer_types = Dict{Symbol, Type}() + for var in buffer_vars + buffer_types[var.name] = var.typ + end + + # Get all store to a CUDA buffer + stores = get_all_buffer_stores(cpu_instr, buffer_vars) + + # Get all load from CUDA buffer + loads = get_all_buffer_refs(cpu_instr, buffer_vars) + + # Replace each load L: + # L: ... buffer[id] + # With the following instruction block: + # Type varX + # cudaMemcpy(&varX, &buffer[id], sizeof(Type), cudaMemcpyDeviceToHost) + # L: ... varX + for l in loads + (instr, ref) = l + block = [] + buffer = ref.ref.name + varX = "var"*rand_string() + type = buffer_types[Symbol(buffer)] + ctype = starpu_type_traduction(eltype(type)) + push!(block, StarpuExprTypedVar(Symbol(varX), eltype(type))) + push!(block, StarpuExprCall(:cudaMemcpy, + [StarpuExprAddress(StarpuExprVar(Symbol(varX))), + StarpuExprAddress(ref), + StarpuExprVar(Symbol("sizeof($ctype)")), + StarpuExprVar(:cudaMemcpyDeviceToHost)])) + push!(block, substitute(instr, ref, StarpuExprVar(Symbol("$varX")))) + + cpu_instr = substitute(cpu_instr, instr, StarpuExprBlock(block)) + end + + # Replace each Store S: + # S: buffer[id] = expr + # With the following instruction block: + # Type varX + # varX = expr + # cudaMemcpy(&buffer[id], &varX, sizeof(Type), cudaMemcpyHostToDevice) + for s in stores + block = [] + buffer = s.var.ref.name + varX = "var"*rand_string() + type = buffer_types[Symbol(buffer)] + ctype = starpu_type_traduction(eltype(type)) + push!(block, StarpuExprTypedVar(Symbol(varX), eltype(type))) + push!(block, StarpuExprAffect(StarpuExprVar(Symbol("$varX")), s.expr)) + push!(block, StarpuExprCall(:cudaMemcpy, + [StarpuExprAddress(s.var), + StarpuExprAddress(StarpuExprVar(Symbol(varX))), + StarpuExprVar(Symbol("sizeof($ctype)")), + StarpuExprVar(:cudaMemcpyHostToDevice)])) + + cpu_instr = substitute(cpu_instr, s, StarpuExprBlock(block)) + end + + return cpu_instr +end + +function transform_to_cuda_kernel(func :: StarpuExprFunction) + + cpu_func = transform_to_cpu_kernel(func) + + init, indep, finish = extract_init_indep_finish(cpu_func.body) + + cpu_instr = init + kernel = nothing + + # Generate a CUDA kernel only if there is an independent loop (@parallel macro). + if (indep != nothing) + prekernel_instr, kernel_args, kernel_instr = analyse_sets(indep) + + kernel_call = StarpuExprCudaCall(:cudaKernel, (@parse nblocks), (@parse THREADS_PER_BLOCK), StarpuExpr[]) + cpu_instr = vcat(cpu_instr, prekernel_instr) + kernel_instr = vcat(kernel_instr, indep.body) + + indep_for_def, indep_for_undef = analyse_variable_declarations(StarpuExprBlock(kernel_instr), kernel_args) + prekernel_def, prekernel_undef = analyse_variable_declarations(StarpuExprBlock(cpu_instr), cpu_func.args) + + for undef_var in indep_for_undef + + found_var = find_variable(undef_var, prekernel_def) + + if found_var == nothing # TODO : error then ? + continue + end + + push!(kernel_args, found_var) + end + + call_args = map((x -> StarpuExprVar(x.name)), kernel_args) + kernelname=Symbol("KERNEL_",func.func); + cuda_call = StarpuExprCudaCall(kernelname, (@parse nblocks), (@parse THREADS_PER_BLOCK), call_args) + push!(cpu_instr, cuda_call) + push!(cpu_instr, @parse cudaStreamSynchronize(starpu_cuda_get_local_stream())) + kernel = StarpuExprFunction(Nothing, kernelname, kernel_args, StarpuExprBlock(kernel_instr)) + kernel = add_device_to_interval_call(kernel) + kernel = flatten_blocks(kernel) + end + + cpu_instr = vcat(cpu_instr, finish) + cpu_instr = StarpuExprBlock(cpu_instr) + cpu_instr = transform_cuda_device_loadstore(cpu_instr) + + prekernel_name = Symbol("CUDA_", func.func) + prekernel = StarpuExprFunction(Nothing, prekernel_name, cpu_func.args, cpu_instr) + prekernel = translate_cublas(prekernel) + prekernel = flatten_blocks(prekernel) + + return prekernel, kernel +end + + +struct StarpuIndepFor + + iters :: Vector{Symbol} + sets :: Vector{StarpuExprInterval} + + body :: StarpuExpr +end + + +function assert_no_indep_for(expr :: StarpuExpr) + + function func_to_run(x :: StarpuExpr) + if (isa(x, StarpuExprFor) && x.is_independant) + error("Invalid usage of intricated @indep for loops") + end + + return x + end + + return apply(func_to_run, expr) +end + + +function StarpuIndepFor(expr :: StarpuExprFor) + + if !expr.is_independant + error("For expression must be prefixed by @indep") + end + + iters = [] + sets = [] + for_loop = expr + + while isa(for_loop, StarpuExprFor) && for_loop.is_independant + + push!(iters, for_loop.iter) + push!(sets, for_loop.set) + for_loop = for_loop.body + + while (isa(for_loop, StarpuExprBlock) && length(for_loop.exprs) == 1) + for_loop = for_loop.exprs[1] + end + end + + return StarpuIndepFor(iters, sets, assert_no_indep_for(for_loop)) +end + + +function translate_index_code(dims :: Vector{StarpuExprVar}) + + ndims = length(dims) + + if ndims == 0 + error("No dimension specified") + end + + prod = StarpuExprValue(1) + output = StarpuExpr[] + reversed_dim = reverse(dims) + thread_index_patern = @parse € :: Int64 = (€ / €) % € + thread_id = @parse THREAD_ID + + for i in (1 : ndims) + index_lvalue = StarpuExprVar(Symbol(:kernel_ids__index_, ndims - i + 1)) + expr = replace_pattern(thread_index_patern, index_lvalue, thread_id, prod, reversed_dim[i]) + push!(output, expr) + + prod = StarpuExprCall(:(*), [prod, reversed_dim[i]]) + end + + thread_id_pattern = @parse begin + + € :: Int64 = blockIdx.x * blockDim.x + threadIdx.x + + if (€ >= €) + return + end + end + + bound_verif = replace_pattern(thread_id_pattern, thread_id, thread_id, prod) + push!(output, bound_verif) + + return reverse(output) +end + + + + + + + +function kernel_index_declarations(ind_for :: StarpuIndepFor) + + pre_kernel_instr = StarpuExpr[] + kernel_args = StarpuExprTypedVar[] + kernel_instr = StarpuExpr[] + + decl_pattern = @parse € :: Int64 = € + interv_size_decl_pattern = @parse € :: Int64 = jlstarpu_interval_size(€, €, €) + iter_pattern = @parse € :: Int64 = € + € * € + + dims = StarpuExprVar[] + ker_instr_to_add_later_on = StarpuExpr[] + + for k in (1 : length(ind_for.sets)) + + set = ind_for.sets[k] + + start_var = starpu_parse(Symbol(:kernel_ids__start_, k)) + start_decl = replace_pattern(decl_pattern, start_var, set.start) + + step_var = starpu_parse(Symbol(:kernel_ids__step_, k)) + step_decl = replace_pattern(decl_pattern, step_var, set.step) + + dim_var = starpu_parse(Symbol(:kernel_ids__dim_, k)) + dim_decl = replace_pattern(interv_size_decl_pattern, dim_var, start_var, step_var, set.stop) + + push!(dims, dim_var) + + push!(pre_kernel_instr, start_decl, step_decl, dim_decl) + push!(kernel_args, StarpuExprTypedVar(start_var.name, Int64)) + push!(kernel_args, StarpuExprTypedVar(step_var.name, Int64)) + push!(kernel_args, StarpuExprTypedVar(dim_var.name, Int64)) + + iter_var = starpu_parse(ind_for.iters[k]) + index_var = starpu_parse(Symbol(:kernel_ids__index_, k)) + iter_decl = replace_pattern(iter_pattern, iter_var, start_var, index_var, step_var) + + push!(ker_instr_to_add_later_on, iter_decl) + end + + + return dims, ker_instr_to_add_later_on, pre_kernel_instr , kernel_args, kernel_instr +end + + + +function analyse_sets(ind_for :: StarpuIndepFor) + + + decl_pattern = @parse € :: Int64 = € + nblocks_decl_pattern = @parse € :: Int64 = (€ + THREADS_PER_BLOCK - 1)/THREADS_PER_BLOCK + + dims, ker_instr_to_add, pre_kernel_instr, kernel_args, kernel_instr = kernel_index_declarations(ind_for) + + dim_prod = @parse 1 + + for d in dims + dim_prod = StarpuExprCall(:(*), [dim_prod, d]) + end + + nthreads_var = @parse nthreads + nthreads_decl = replace_pattern(decl_pattern, nthreads_var, dim_prod) + push!(pre_kernel_instr, nthreads_decl) + + nblocks_var = @parse nblocks + nblocks_decl = replace_pattern(nblocks_decl_pattern, nblocks_var, nthreads_var) + push!(pre_kernel_instr, nblocks_decl) + + + index_decomposition = translate_index_code(dims) + + push!(kernel_instr, index_decomposition...) + push!(kernel_instr, ker_instr_to_add...) + + return pre_kernel_instr, kernel_args, kernel_instr +end diff --git a/julia/src/compiler/expression_manipulation.jl b/julia/src/compiler/expression_manipulation.jl new file mode 100644 index 0000000..c588a5f --- /dev/null +++ b/julia/src/compiler/expression_manipulation.jl @@ -0,0 +1,475 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +""" + Lenient comparison operator for structures and arrays. +""" +@generated function ≂(x, y) + if x != y || x <: Type + :(x == y) + elseif !isempty(fieldnames(x)) + mapreduce(n -> :(x.$n ≂ y.$n), (a,b)->:($a && $b), fieldnames(x)) + elseif x <: Array + quote + if length(x) != length(y) + return false + end + for i in 1:length(x) + if !(x[i] ≂ y[i]) + return false + end + end + return true + end + else + :(x == y) + end +end + +""" + Returns a new expression where every occurrence of expr_to_replace into expr + has been replaced by new_expr +""" +function substitute(expr :: StarpuExpr, expr_to_replace :: StarpuExpr, new_expr :: StarpuExpr) + + function func_to_apply(x :: StarpuExpr) + if (x ≂ expr_to_replace) + return new_expr + end + + return x + end + + return apply(func_to_apply, expr) +end + +""" + Returns an expression where "€" symbols in expr were replaced + by the following expression list. + + Ex : replace_pattern((@parse € = €), (@parse x), (@parse 1 + 1)) + --> (StarpuExpr) "x = 1 + 1" +""" +function replace_pattern(expr :: StarpuExpr, replace_€ :: StarpuExpr...) + + replace_index = 0 + + function func_to_apply(x :: StarpuExpr) + + if x == @parse € + replace_index += 1 + return replace_€[replace_index] + end + + if isa(x, StarpuExprTypedVar) && x.name == :€ + + replace_index += 1 + + if isa(replace_€[replace_index], StarpuExprVar) + return StarpuExprTypedVar(replace_€[replace_index].name, x.typ) + end + + return StarpuExprTypedExpr(replace_€[replace_index], x.typ) + end + + if isa(x, StarpuExprFunction) && x.func == :€ + + replace_index += 1 + + if !(isa(replace_€[replace_index], StarpuExprVar)) + error("Can only replace a function name by a variable") + end + + return StarpuExprFunction(x.ret_type, replace_€[replace_index].name, x.args, x.body) + end + + return x + end + + return apply(func_to_apply, expr) +end + + + +import Base.any + +""" + Returns true if one of the sub-expression x in expr + is such as cond(x) is true, otherwise, it returns false. +""" +function any(cond :: Function, expr :: StarpuExpr) + + err_to_catch = "Catch me, condition is true somewhere !" + + function func_to_apply(x :: StarpuExpr) + + if cond(x) + error(err_to_catch) # dirty but osef + end + + return x + end + + try + apply(func_to_apply, expr) + catch err + + if (isa(err, ErrorException) && err.msg == err_to_catch) + return true + end + + throw(err) + end + + return false +end + + +import Base.all + +""" + Returns true if every sub-expression x in expr + is such as cond(x) is true, otherwise, it returns false. +""" +function all(cond :: Function, expr :: StarpuExpr) + return !any(!cond, expr) +end + +function visit_preorder(func :: Function, expr :: StarpuExprAffect) + func(expr) + visit_preorder(func, expr.var) + visit_preorder(func, expr.expr) + return expr +end + +function visit_preorder(func :: Function, expr :: StarpuExprBlock) + func(expr) + for e in expr.exprs + visit_preorder(func, e) + end + return expr +end + +function visit_preorder(func :: Function, expr :: StarpuExprCall) + func(expr) + for a in expr.args + visit_preorder(func, a) + end + return expr +end + +function visit_preorder(func :: Function, expr :: StarpuExprCudaCall) + func(expr) + func(expr.nblocks) + func(expr.threads_per_block) + for a in expr.args + visit_preorder(func, a) + end + return expr +end + +function visit_preorder(func :: Function, expr :: StarpuExprField) + func(expr) + func(expr.left) + func(expr.field) + func(expr.is_an_arrow) + return expr +end + +function visit_preorder(func :: Function, expr :: StarpuExprFor) + func(expr) + for d in expr.set_declarations + visit_preorder(func, d) + end + visit_preorder(func, expr.set) + visit_preorder(func, expr.body) + return expr +end + +function visit_preorder(func :: Function, expr :: StarpuExprFunction) + func(expr) + for a in expr.args + visit_preorder(func, a) + end + visit_preorder(func, e.body) + return expr +end + +function visit_preorder(func :: Function, expr :: StarpuExprIf) + func(expr) + visit_preorder(func, expr.cond) + visit_preorder(func, expr.then_statement) + return expr +end + + + +function visit_preorder(func :: Function, expr :: StarpuExprIfElse) + func(expr) + visit_preorder(func, expr.cond) + visit_preorder(func, expr.then_statement) + visit_preorder(func, expr.else_statement) + return expr +end + +function visit_preorder(func :: Function, expr :: StarpuExprInterval) + func(expr) + visit_preorder(func, expr.start) + visit_preorder(func, expr.step) + visit_preorder(func, expr.stop) + return expr +end + +function visit_preorder(func :: Function, expr :: StarpuExprRef) + func(expr) + visit_preorder(func, expr.ref) + for i in expr.indexes + visit_preorder(func, i) + end + return expr +end + +function visit_preorder(func :: Function, expr :: StarpuExprAddress) + func(expr) + visit_preorder(func, expr.ref) + return expr +end + +function visit_preorder(func :: Function, expr :: StarpuExprBreak) + func(expr) + return expr +end + +function visit_preorder(func :: Function, expr :: StarpuExprReturn) + func(expr) + visit_preorder(func, expr.value) + return expr +end + +function visit_preorder(func :: Function, expr :: StarpuExpr) + func(expr) + return expr +end + +function visit_preorder(func :: Function, expr :: StarpuExprTypedExpr) + func(expr) + visit_preorder(func, expr.expr) + return expr +end + +function visit_preorder(func :: Function, expr :: StarpuExprWhile) + func(expr) + visit_preorder(func, expr.cond) + visit_preorder(func, expr.body) + return expr +end + +# function substitute_preorder(expr :: StarpuExprAffect, match :: StarpuExpr, replace :: StarpuExpr) +# if expr == match +# return replace +# end +# var = substitute_preorder(func, expr.var) +# expr = substitute_preorder(func, expr.expr) + +# if var != expr.var || expr != expr.expr +# return StarpuExprAffect(var, expr) +# end +# return expr +# end + +# function substitute_preorder(expr :: StarpuExprBlock, match :: StarpuExpr, replace :: StarpuExpr) +# if expr == match +# return replace +# end + +# modified = false +# new_exprs = Vector{StarpuExpr}() +# for e in expr.exprs +# push!(new_exprs, substitute_preorder(func, e)) +# end +# if new_exprs != expr.exprs +# return StarpuExprBlock(new_exprs) +# end +# return expr +# end + +# function substitute_preorder(expr :: StarpuExprCall, match :: StarpuExpr, replace :: StarpuExpr) +# if expr == match +# return replace +# end + +# new_args = Vector{StarpuExpr}() +# for a in expr.args +# push!(new_args, substitute_preorder(func, a)) +# end +# if new_args != expr.args +# return StarpuExprCall(expr.func, new_args) +# end +# return expr +# end + +# function substitute_preorder(expr :: StarpuExprCudaCall, match :: StarpuExpr, replace :: StarpuExpr) +# if expr == match +# return replace +# end + +# new_args = Vector{StarpuExpr}() +# for a in expr.args +# push!(new_args, substitute_preorder(func, a)) +# end +# if new_args != expr.args +# return new StarpuExprCudaCall(expr.ker_name, expr.nblocks, expr.threads_per_block, new_args) +# end +# return expr +# end + +# function substitute_preorder(expr :: StarpuExprField, match :: StarpuExpr, replace :: StarpuExpr) +# if expr == match +# return replace +# end + +# left = substitute_preorder(expr.left, match, replace) +# if left != expr.left +# return StarpuExprField(left, expr.field, expr.is_an_arrow) +# end +# return expr +# end + +# function substitute_preorder(expr :: StarpuExprFor, match :: StarpuExpr, replace :: StarpuExpr) +# if expr == match +# return replace +# end + +# new_set_declarations = Vector{StarpuExpr}() + +# for d in expr.set_declarations +# substitute_preorder(func, d) +# end +# substitute_preorder(expr.set, match :: StarpuExpr, replace :: StarpuExpr) +# substitute_preorder(func, expr.body) +# return expr +# end + +# function substitute_preorder(expr :: StarpuExprFunction, match :: StarpuExpr, replace :: StarpuExpr) +# if expr == match +# return replace +# end + +# for a in expr.args +# substitute_preorder(func, a) +# end +# substitute_preorder(e.body, match :: StarpuExpr, replace :: StarpuExpr) +# return expr +# end + +# function substitute_preorder(expr :: StarpuExprIf, match :: StarpuExpr, replace :: StarpuExpr) +# if expr == match +# return replace +# end + +# substitute_preorder(func, expr.cond) +# substitute_preorder(func, expr.then_statement) +# return expr +# end + + + +# function substitute_preorder(expr :: StarpuExprIfElse, match :: StarpuExpr, replace :: StarpuExpr) +# if expr == match +# return replace +# end + +# substitute_preorder(func, expr.cond) +# substitute_preorder(func, expr.then_statement) +# substitute_preorder(func, expr.else_statement) +# return expr +# end + +# function substitute_preorder(expr :: StarpuExprInterval, match :: StarpuExpr, replace :: StarpuExpr) +# if expr == match +# return replace +# end + +# substitute_preorder(func, expr.start) +# substitute_preorder(func, expr.step) +# substitute_preorder(func, expr.stop) +# return expr +# end + +# function substitute_preorder(expr :: StarpuExprRef, match :: StarpuExpr, replace :: StarpuExpr) +# if expr == match +# return replace +# end + +# substitute_preorder(func, expr.ref) +# for i in expr.indexes +# substitute_preorder(func, i) +# end +# return expr +# end + +# function substitute_preorder(expr :: StarpuExprAddress, match :: StarpuExpr, replace :: StarpuExpr) +# if expr == match +# return replace +# end + +# substitute_preorder(func, expr.ref) +# return expr +# end + +# function substitute_preorder(expr :: StarpuExprBreak, match :: StarpuExpr, replace :: StarpuExpr) +# if expr == match +# return replace +# end + +# return expr +# end + +# function substitute_preorder(expr :: StarpuExprReturn, match :: StarpuExpr, replace :: StarpuExpr) +# if expr == match +# return replace +# end + +# substitute_preorder(func, expr.value) +# return expr +# end + +# function substitute_preorder(expr :: StarpuExpr, match :: StarpuExpr, replace :: StarpuExpr) +# if expr == match +# return replace +# end + +# return expr +# end + +# function substitute_preorder(expr :: StarpuExprTypedExpr, match :: StarpuExpr, replace :: StarpuExpr) +# if expr == match +# return replace +# end + +# substitute_preorder(func, expr.expr) +# return expr +# end + +# function substitute_preorder(expr :: StarpuExprWhile, match :: StarpuExpr, replace :: StarpuExpr) +# if expr == match +# return replace +# end + +# substitute_preorder(func, expr.cond) +# substitute_preorder(func, expr.body) +# return expr +# end diff --git a/julia/src/compiler/expressions.jl b/julia/src/compiler/expressions.jl new file mode 100644 index 0000000..68f5f75 --- /dev/null +++ b/julia/src/compiler/expressions.jl @@ -0,0 +1,972 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +#====================================================== + AFFECTATION +======================================================# +abstract type StarpuExpr end +abstract type StarpuExprTyped <: StarpuExpr end + + +struct StarpuExprTypedVar <: StarpuExprTyped + name :: Symbol + typ :: Type +end + +struct StarpuExprTypedExpr <: StarpuExprTyped # TODO : remove typed expression ? + expr :: StarpuExpr + typ :: Type +end + +struct StarpuExprAffect <: StarpuExpr + var :: StarpuExpr + expr :: StarpuExpr +end + +struct StarpuExprBlock <: StarpuExpr + exprs :: Vector{StarpuExpr} +end + +struct StarpuExprCall <: StarpuExpr + func :: Symbol + args :: Vector{StarpuExpr} +end +struct StarpuExprCudaCall <: StarpuExpr + + ker_name :: Symbol + + nblocks :: StarpuExpr + threads_per_block :: StarpuExpr + + args :: Vector{StarpuExpr} + +end +struct StarpuExprField <: StarpuExpr + + left :: StarpuExpr + field :: Symbol + + is_an_arrow :: Bool +end +struct StarpuExprInterval <: StarpuExpr + start :: StarpuExpr + step :: StarpuExpr + stop :: StarpuExpr + + id :: String + + function StarpuExprInterval(start :: StarpuExpr, step :: StarpuExpr, stop :: StarpuExpr ; id :: String = rand_string()) + return new(start, step, stop, id) + end + +end +struct StarpuExprFor <: StarpuExpr + + iter :: Symbol + set:: StarpuExprInterval + body :: StarpuExpr + + is_independant :: Bool + set_declarations :: Vector{StarpuExpr} + +end +struct StarpuExprFunction <: StarpuExpr + ret_type :: Type + func :: Symbol + args :: Vector{StarpuExprTypedVar} + body :: StarpuExpr +end +struct StarpuExprIf <: StarpuExpr + cond :: StarpuExpr + then_statement :: StarpuExpr +end + + +struct StarpuExprIfElse <: StarpuExpr + cond :: StarpuExpr + then_statement :: StarpuExpr + else_statement :: StarpuExpr +end + +struct StarpuExprRef <: StarpuExpr + ref :: StarpuExpr + indexes :: Vector{StarpuExpr} +end +struct StarpuExprReturn <: StarpuExpr + value :: StarpuExpr +end +struct StarpuExprBreak <: StarpuExpr +end +struct StarpuExprVar <: StarpuExpr + name :: Symbol +end +struct StarpuExprInvalid <: StarpuExpr +end + +struct StarpuExprValue <: StarpuExpr + value :: Any +end + +struct StarpuExprWhile <: StarpuExpr + cond :: StarpuExpr + body :: StarpuExpr +end + +struct StarpuExprAddress <: StarpuExpr + ref :: StarpuExpr +end + +function starpu_parse_affect(x :: Expr) + + if (x.head != :(=)) + error("Invalid \"affectation\" expression") + end + + var = starpu_parse(x.args[1]) + expr = starpu_parse(x.args[2]) + + return StarpuExprAffect(var, expr) +end + + +function equals(x :: StarpuExprAffect, y :: StarpuExpr) + + if typeof(y) != StarpuExprAffect + return false + end + + return equals(x.var, y.var) && equals(x.expr, y.expr) +end + + +function print(io :: IO, x :: StarpuExprAffect ; indent = 0, restrict = false) + + print(io, x.var, indent = indent) + print(io, " = ") + + need_to_transtyp = isa(x.var, StarpuExprTypedVar) # transtyping to avoid warning (or errors for cuda) during compilation time + + if need_to_transtyp + print(io, "(", starpu_type_traduction(x.var.typ), ") (") + end + + print(io, x.expr, indent = indent) + + if need_to_transtyp + print(io, ")") + end + +end + +function apply(func :: Function, expr :: StarpuExprAffect) + + var = apply(func, expr.var) + new_expr = apply(func, expr.expr) + + return func(StarpuExprAffect(var, new_expr)) +end + +#====================================================== + BLOCK +(series of instruction, not C variable scoping block) +======================================================# + + + + +function is_unwanted(x :: Symbol) + return false +end + +function is_unwanted(x :: LineNumberNode) + return true +end + +function is_unwanted(x :: Expr) + return false +end + +function starpu_parse_block(x :: Expr) + if (x.head != :block) + error("Invalid \"block\" expression") + end + exprs = map(starpu_parse, filter(!is_unwanted, x.args)) + + return StarpuExprBlock(exprs) +end + + +function print(io :: IO, x :: StarpuExprBlock ; indent = 0, restrict=false) + for i in (1 : length(x.exprs)) + print(io, x.exprs[i], indent = indent) + print(io, ";") + if (i != length(x.exprs)) + print_newline(io, indent) + end + end +end + + + + +function apply(func :: Function, expr :: StarpuExprBlock) + + return func(StarpuExprBlock(map((x -> apply(func, x)), expr.exprs))) +end + +#====================================================== + FUNCTION CALL +======================================================# + + + + +function starpu_parse_call(x :: Expr) + + if (x.head != :call) + error("Invalid \"call\" expression") + end + + func = starpu_parse(x.args[1]) + if (x.args[1] == Symbol(":")) + return starpu_parse_interval(x) + end + if (!isa(func, StarpuExprVar)) + error("Invalid \"call\" expression : function must be a variable") + end + + args = map(starpu_parse, x.args[2:end]) + + return StarpuExprCall(func.name, args) +end + + +starpu_infix_operators = (:(+), :(*), :(-), :(/), :(<), :(>), :(<=), :(>=), :(!=), :(%)) + + +function print_prefix(io :: IO, x :: StarpuExprCall ; indent = 0, restrict=false) + + print(io, x.func, "(") + + for i in (1 : length(x.args)) + if (i != 1) + print(io, ", ") + end + print(io, x.args[i], indent = indent) + end + + print(io, ")") +end + + +function print_infix(io :: IO, x :: StarpuExprCall ; indent = 0,restrict=false) + for i in (1 : length(x.args)) + if (i != 1) + print(io, " ", x.func, " ") + end + print(io, "(") + print(io, x.args[i], indent = indent) + print(io, ")") + end +end + +function print(io :: IO, x :: StarpuExprCall ; indent = 0,restrict=false) + + if (length(x.args) >= 2 && x.func in starpu_infix_operators) + print_infix(io, x, indent = indent) + else + print_prefix(io, x, indent = indent) + end +end + + + + +function apply(func :: Function, expr :: StarpuExprCall) + + return func(StarpuExprCall(expr.func, map((x -> apply(func, x)), expr.args))) +end + +#====================================================== + CUDA KERNEL CALL +======================================================# + + + + + +function print(io :: IO, expr :: StarpuExprCudaCall ; indent = 0,restrict=false) + + print_newline(io, indent) + print(io, expr.ker_name) + print_newline(io, indent + starpu_indent_size) + print(io, "<<< ") + print(io, expr.nblocks, indent = indent + 2 * starpu_indent_size) + print(io, ", ") + print(io, expr.threads_per_block, indent = indent + 2 * starpu_indent_size) + print(io, ", 0, starpu_cuda_get_local_stream()") + print_newline(io, indent + starpu_indent_size) + print(io, ">>> (") + + for i in (1 : length(expr.args)) + + if (i != 1) + print(io, ", ") + if (i % 4 == 1) + print_newline(io, indent + 2 * starpu_indent_size + 1) + end + end + + print(io, expr.args[i], indent = indent + 2 * starpu_indent_size) + + end + + print(io, ");") + print_newline(io, indent) + print(io, "cudaError_t status = cudaGetLastError();") + print_newline(io, indent) + print(io, "if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status);") + print_newline(io, indent) + +end + + +function apply(func :: Function, expr :: StarpuExprCudaCall) + + nblocks = func(expr.nblocks) + threads_per_block = func(expr.threads_per_block) + args = map((x -> apply(func, x)), expr.args) + + return StarpuExprCudaCall(expr.ker_name, nblocks, threads_per_block, args) +end + + +#====================================================== + STRUCTURE FIELDS +======================================================# + + + + + +function starpu_parse_field(x :: Expr) + + if x.head != :(.) || length(x.args) != 2 + error("Invalid parsing of dot expression") + end + + left = starpu_parse(x.args[1]) + + if (!isa(x.args[2], QuoteNode) || !isa(x.args[2].value, Symbol)) + error("Invalid parsing of dot expression") + end + + return StarpuExprField(left, x.args[2].value, false) +end + + +function print(io :: IO, x :: StarpuExprField ; indent = 0,restrict=false) + print(io, "(") + print(io, x.left, indent = indent) + print(io, ")", x.is_an_arrow ? "->" : '.', x.field) +end + + + +function apply(func :: Function, expr :: StarpuExprField) + return func(StarpuExprField(func(expr.left), expr.field, expr.is_an_arrow)) +end + +#====================================================== + FOR LOOPS +======================================================# + + + + + +function starpu_parse_for(x :: Expr; is_independant = false) + + if (x.head != :for) + error("Invalid \"for\" expression") + end + affect = x.args[1] + + if (affect.head != :(=)) + error("Invalid \"for\" iterator affectation") + end + + iter = starpu_parse(affect.args[1]) + + if (!isa(iter, StarpuExprVar)) + error("Invalid \"for\" iterator") + end + + set = starpu_parse(affect.args[2]) + if (!isa(set, StarpuExprInterval)) + error("Set of values in \"for\" loop must be an interval") + end + + body = starpu_parse(x.args[2]) + + return StarpuExprFor(iter.name, set, body, is_independant, StarpuExpr[]) +end + + + + + +function print(io :: IO, x :: StarpuExprFor ; indent = 0,restrict=false) + + print_newline(io, indent) + print(io, "{") + indent += starpu_indent_size + print_newline(io, indent) + + print(io, StarpuExprBlock(x.set_declarations), indent = indent) + + id = x.set.id + + start = "start_" * id + stop = "stop_" * id + step = "step_" * id + dim = "dim_" * id + iter = "iter_" * id + + print_newline(io, indent, 2) + + if isa(x.set.step, StarpuExprValue) + print(io, "for ($(x.iter) = $start ; ") + comparison_op = (x.set.step.value >= 0) ? "<=" : ">=" + print(io, "$(x.iter) $comparison_op $stop ; ") + print(io, "$(x.iter) += $(x.set.step.value))") + + else + print(io, "for ($iter = 0, $(x.iter) = $start ; ") + print(io, "$iter < $dim ; ") + print(io, "$iter += 1, $(x.iter) += $step)") + + end + + print_newline(io, indent) + print(io, "{") + indent += starpu_indent_size + + print_newline(io, indent) + print(io, x.body, indent = indent) + + indent -= starpu_indent_size + print_newline(io, indent) + print(io, "}") + + indent -= starpu_indent_size + print_newline(io, indent) + print(io, "}") + + print_newline(io, indent) +end + + + +function apply(func :: Function, expr :: StarpuExprFor) + + set_declarations = map( (x -> apply(func, x)), expr.set_declarations) + set = apply(func, expr.set) + body = apply(func, expr.body) + + return func(StarpuExprFor(expr.iter, set, body, expr.is_independant, set_declarations)) +end + + +#====================================================== + FUNCTION DECLARATION +======================================================# + + + + +function starpu_parse_function(x :: Expr) + + if (x.head != :function) + error("Invalid \"function\" expression") + end + + typed_decl = starpu_parse(x.args[1]) + + if (!isa(typed_decl, StarpuExprTypedExpr)) + error("Invalid \"function\" prototype : a return type must me explicited") + end + + prototype = typed_decl.expr + + if (!isa(prototype, StarpuExprCall)) + error("Invalid \"function\" prototype") + end + + arg_list = StarpuExprTypedVar[] + + for type_arg in prototype.args + if (!isa(type_arg, StarpuExprTypedVar)) + error("Invalid \"function\" argument list") + end + push!(arg_list, type_arg) + end + + body = starpu_parse(x.args[2]) + return StarpuExprFunction(typed_decl.typ, prototype.func, arg_list, body) +end + + + +function print(io :: IO, x :: StarpuExprFunction ; indent = 0,restrict=false) + + print(io, starpu_type_traduction(x.ret_type), " ") + print(io, x.func, '(') + + for i in (1 : length(x.args)) + + if (i != 1) + print(io, ", ") + if (i % 4 == 1) + print_newline(io, indent + starpu_indent_size + length(String(x.func)) + 13) + end + end + print(io, x.args[i], indent = indent + starpu_indent_size, restrict = true) + end + + print(io, ")") + print_newline(io, indent) + print(io, "{") + print_newline(io, indent + starpu_indent_size) + print(io, x.body, indent = indent + starpu_indent_size) + print_newline(io, indent) + print(io, "}\n\n") + print_newline(io, indent) +end + + + +function apply(func :: Function, expr :: StarpuExprFunction) + + args = map((x -> apply(func, x)), expr.args) + body = apply(func, expr.body) + + return func(StarpuExprFunction(expr.ret_type, expr.func, args, body)) +end + + +#====================================================== + IF STATEMENT +======================================================# + + + + + +function starpu_parse_if(x :: Expr) + + if (x.head != :if) + error("Invalid \"if\" expression") + end + + len = length(x.args) + + if (len < 2) + error("Invalid \"if\" statement") + end + + cond = starpu_parse(x.args[1]) + then_statement = starpu_parse(x.args[2]) + + if (len == 2) + return StarpuExprIf(cond, then_statement) + end + + else_statement = starpu_parse(x.args[3]) + + return StarpuExprIfElse(cond, then_statement, else_statement) +end + + +function print(io :: IO, x :: Union{StarpuExprIf, StarpuExprIfElse}; indent = 0,restrict=false) + + print_newline(io, indent) + print(io, "if (") + print(io, x.cond, indent = indent + starpu_indent_size) + print(io, ")") + print_newline(io, indent) + print(io, "{") + print_newline(io, indent + starpu_indent_size) + print(io, x.then_statement, indent = indent + starpu_indent_size) + print_newline(io, indent) + print(io, "}") + + if (!isa(x, StarpuExprIfElse)) + return + end + + print(io, " else") + print_newline(io, indent) + print(io, "{") + print_newline(io, indent + starpu_indent_size) + print(io, x.else_statement, indent = indent + starpu_indent_size) + print_newline(io, indent) + print(io, "}") + print_newline(io, indent) + +end + + + +function apply(func :: Function, expr :: StarpuExprIf) + + cond = apply(func, expr.cond) + then_statement = apply(func, expr.then_statement) + + return func(StarpuExprIf(cond, then_statement)) +end + + + +function apply(func :: Function, expr :: StarpuExprIfElse) + + cond = apply(func, expr.cond) + then_statement = apply(func, expr.then_statement) + else_statement = apply(func, expr.else_statement) + + return func(StarpuExprIfElse(cond, then_statement, else_statement)) +end + +#====================================================== + INTERVALS +======================================================# + + + + +function starpu_parse_interval(x :: Expr) + + if (x.head != :(call)) + error("Invalid \"interval\" expression") + end + start = starpu_parse(x.args[2]) + steop = starpu_parse(x.args[3]) + + if (length(x.args) == 3) + return StarpuExprInterval(start, StarpuExprValue(1), steop) + end + + stop = starpu_parse(x.args[4]) + + return StarpuExprInterval(start, steop, stop) +end + + + +function apply(func :: Function, expr :: StarpuExprInterval) + + start = apply(func, expr.start) + step = apply(func, expr.step) + stop = apply(func, expr.stop) + + return func(StarpuExprInterval(start, step, stop, id = expr.id)) +end + +#====================================================== + ARRAYS AND REFERENCES +======================================================# + + + + +function starpu_parse_ref(x :: Expr) + + if (x.head != :ref) + error("Invalid \"reference\" expression") + end + + ref = starpu_parse(x.args[1]) + indexes = map(starpu_parse, x.args[2:end]) + + #= + StarpuExpr[] + + for i in (2 : length(x.args)) + push!(indexes, starpu_parse(x.args[i])) + end=# + + return StarpuExprRef(ref, indexes) +end + + + +function equals(x :: StarpuExprRef, y :: StarpuExpr) + + if typeof(y) != StarpuExprRef + return false + end + + if !equals(x.ref, y.ref) || length(x.indexes) != length(y.indexes) + return false + end + + return all(map(equals, x.indexes, y.indexes)) +end + + + + +function print(io :: IO, x :: StarpuExprRef ; indent = 0,restrict=false) + + print(io, x.ref, indent = indent) + + for i in (1 : length(x.indexes)) + print(io, "[") + print(io, x.indexes[i], indent = indent) + print(io, "]") + end + +end + +function apply(func :: Function, expr :: StarpuExprRef) + + ref = apply(func, expr.ref) + indexes = map((x -> apply(func, x)), expr.indexes) + + return func(StarpuExprRef(ref, indexes)) +end + +function print(io :: IO, x :: StarpuExprAddress ; indent = 0, restrict=false) + print(io, "&") + print(io, x.ref, indent = indent) +end + +function apply(func :: Function, expr :: StarpuExprAddress) + ref = apply(func, expr.ref) + return func(StarpuExprAddress(ref)) +end + +#====================================================== + BREAK EXPRESSION +======================================================# + +function starpu_parse_break(x :: Expr) + if (x.head != :break) + error("Invalid \"break\" expression") + end + + return StarpuExprBreak() +end + +function print(io :: IO, x :: StarpuExprBreak ; indent = 0) + print(io, "break") +end + +function apply(func :: Function, expr :: StarpuExprBreak) + + return func(StarpuExprBreak()) +end +#====================================================== + RETURN EXPRESSION +======================================================# + + + +function starpu_parse_return(x :: Expr) + if (x.head != :return) + error("Invalid \"return\" expression") + end + + value = starpu_parse(x.args[1]) + # Remove type associated to a single, for a return + # allows matching with ExprVar + if (isa(value, StarpuExprTypedVar)) + value = StarpuExprVar(value.name) + end + + return StarpuExprReturn(value) +end + +function print(io :: IO, x :: StarpuExprReturn ; indent = 0,restrict=false) + print(io, "return ") + print(io, x.value, indent = indent) +end + +function apply(func :: Function, expr :: StarpuExprReturn) + + return func(StarpuExprReturn(apply(func, expr.value))) +end + +function apply(func :: Function, expr :: StarpuExpr) + return func(expr) +end + +print(io :: IO, x :: StarpuExprVar ; indent = 0, restrict = false) = print(io, x.name) + +function print(io :: IO, x :: StarpuExprValue ; indent = 0,restrict=false) + + value = x.value + + if value == nothing + return + end + + if isa(value, AbstractString) + print(io, '"', value, '"') + return + end + + if isa(value, Char) + print(io, '\'', value, '\'') + return + end + + print(io, value) +end + + + + + +print(io :: IO, x :: StarpuExprInvalid ; indent = 0) = print(io, "INVALID") + + + +function starpu_parse(raw_value :: Any) + return StarpuExprValue(raw_value) +end + +function starpu_parse(sym :: Symbol) + return StarpuExprVar(sym) +end + +#====================================================== + TYPED EXPRESSION +======================================================# + + + +function starpu_parse_typed(x :: Expr) + + if (x.head != :(::)) + error("Invalid type assigned expression") + end + + expr = starpu_parse(x.args[1]) + typ = nothing + + try + typ = eval(x.args[2]) :: Type + catch + print(x.args[2]) + error("Invalid type in type assigned expression") + end + + if (isa(expr, StarpuExprVar)) + return StarpuExprTypedVar(expr.name, typ) + end + + return StarpuExprTypedExpr(expr, typ) +end + +function starpu_type_traduction(x) + if x <: Array + return starpu_type_traduction(eltype(x)) * "*" + end + + if x <: Ptr + depth = 1 + type = eltype(x) + while type <: Ptr + depth +=1 + type = eltype(type) + end + + return starpu_type_traduction(type) * "*"^depth + end + + return starpu_type_traduction_dict[x] + +end + +function print(io :: IO, x :: StarpuExprTyped ; indent = 0,restrict=false) + + if (isa(x, StarpuExprTypedVar)) + print(io,starpu_type_traduction(x.typ), " ") + #if (restrict) + # print(io,"restrict "); + #end + print(io, x.name) + else + print(io, x.expr, indent = indent) + end +end + + + +function apply(func :: Function, expr :: StarpuExprTypedExpr) + + new_expr = apply(func, expr.expr) + + return func(StarpuExprTypedExpr(new_expr, expr.typ)) +end + +#====================================================== + While loop +======================================================# + + +function starpu_parse_while(x :: Expr) + + if (x.head != :while) + error("Invalid \"while\" loop") + end + + len = length(x.args) + + if (len < 2) + error("Invalid \"while\" loop") + end + + cond = starpu_parse(x.args[1]) + body = starpu_parse(x.args[2]) + + return StarpuExprWhile(cond, body) +end + + +function print(io :: IO, x :: StarpuExprWhile ; indent = 0) + print_newline(io, indent) + print(io, "while (") + print(io, x.cond, indent = indent + starpu_indent_size) + print(io, ")") + print_newline(io, indent) + print(io, "{") + print_newline(io, indent + starpu_indent_size) + print(io, x.body, indent = indent + starpu_indent_size) + print_newline(io, indent) + print(io, "}") + print_newline(io, indent) +end + + + +function apply(func :: Function, expr :: StarpuExprWhile) + + cond = apply(func, expr.cond) + body = apply(func, expr.body) + + return func(StarpuExprWhile(cond, body)) +end diff --git a/julia/src/compiler/file_generation.jl b/julia/src/compiler/file_generation.jl new file mode 100644 index 0000000..a14ce70 --- /dev/null +++ b/julia/src/compiler/file_generation.jl @@ -0,0 +1,170 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +const cpu_kernel_file_start = "#include +#include +#include +#include + +#include \"blas.h\" + +static inline long long jlstarpu_max(long long a, long long b) +{ + return (a > b) ? a : b; +} + +static inline long long jlstarpu_interval_size(long long start, long long step, long long stop) +{ + if (stop >= start){ + return jlstarpu_max(0, (stop - start + 1) / step); + } else { + return jlstarpu_max(0, (stop - start - 1) / step); + } +} + +" + +const cuda_kernel_file_start = "#include +#include +#include +#include +#include + +#define THREADS_PER_BLOCK 64 + +__attribute__((unused)) static inline long long jlstarpu_max(long long a, long long b) +{ + return (a > b) ? a : b; +} + +__attribute__((unused)) static inline long long jlstarpu_interval_size(long long start, long long step, long long stop) +{ + if (stop >= start){ + return jlstarpu_max(0, (stop - start + 1) / step); + } else { + return jlstarpu_max(0, (stop - start - 1) / step); + } +} + + +__attribute__((unused)) __device__ static inline long long jlstarpu_max__device(long long a, long long b) +{ + return (a > b) ? a : b; +} + +__attribute__((unused)) __device__ static inline long long jlstarpu_interval_size__device(long long start, long long step, long long stop) +{ + if (stop >= start){ + return jlstarpu_max__device(0, (stop - start + 1) / step); + } else { + return jlstarpu_max__device(0, (stop - start - 1) / step); + } +} + +" + +""" + Opens a new Cuda source file, where generated GPU kernels will be written +""" +function starpu_new_cuda_kernel_file(file_name :: String) + + global generated_cuda_kernel_file_name = file_name + + kernel_file = open(file_name, "w") + print(kernel_file, cuda_kernel_file_start) + close(kernel_file) + + return nothing +end + +export target +macro target(x) + targets = eval(x) + return quote + starpu_target=$targets + global starpu_target + end +end + +""" + Executes @cuda_kernel and @cpu_kernel + """ +macro codelet(x) + parsed = starpu_parse(x) + name=string(x.args[1].args[1].args[1]); + cpu_name = name + cuda_name = "CUDA_"*name + dump(name) + parse_scalar_parameters(parsed, name) + c_struct_param_decl = generate_c_struct_param_declaration(name) + cpu_expr = transform_to_cpu_kernel(parsed) + + generated_cpu_kernel_file_name=string("genc_",string(x.args[1].args[1].args[1]),".c") + generated_cuda_kernel_file_name=string("gencuda_",string(x.args[1].args[1].args[1]),".cu") + + if (starpu_target & STARPU_CPU != 0) + kernel_file = open(generated_cpu_kernel_file_name, "w") + debug_print("generating ", generated_cpu_kernel_file_name) + print(kernel_file, cpu_kernel_file_start) + print(kernel_file, c_struct_param_decl) + print(kernel_file, cpu_expr) + close(kernel_file) + CPU_CODELETS[name]=cpu_name + end + + if (starpu_target & STARPU_CUDA!=0) && STARPU_USE_CUDA == 1 + kernel_file = open(generated_cuda_kernel_file_name, "w") + debug_print("generating ", generated_cuda_kernel_file_name) + print(kernel_file, cuda_kernel_file_start) + prekernel, kernel = transform_to_cuda_kernel(parsed) + + if kernel != nothing + print(kernel_file, "__global__ ", kernel) + end + + print(kernel_file, c_struct_param_decl) + print(kernel_file, "\nextern \"C\" ", prekernel) + close(kernel_file) + CUDA_CODELETS[name]=cuda_name + end +end + +function parse_scalar_parameters(expr :: StarpuExprFunction, codelet_name) + scalar_parameters = [] + for i in (1 : length(expr.args)) + type = expr.args[i].typ + if (type <: Number || type <: AbstractChar) + push!(scalar_parameters, (expr.args[i].name, type)) + end + end + + CODELETS_SCALARS[codelet_name] = scalar_parameters + + # declare structure carrying scalar parameters + struct_params_name = Symbol("params_", rand_string()) + structure_decl_str = "mutable struct " * "$struct_params_name\n" + for p in scalar_parameters + structure_decl_str *= "$(p[1])::$(p[2])\n" + end + structure_decl_str *= "end" + eval(Meta.parse(structure_decl_str)) + + # add structure type to dictionnary + add_to_dict_str = "starpu_type_traduction_dict[$struct_params_name] = \"struct $struct_params_name\"" + eval(Meta.parse(add_to_dict_str)) + + # save structure name + CODELETS_PARAMS_STRUCT[codelet_name] = struct_params_name +end diff --git a/julia/src/compiler/include.jl b/julia/src/compiler/include.jl new file mode 100644 index 0000000..1783f38 --- /dev/null +++ b/julia/src/compiler/include.jl @@ -0,0 +1,28 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +export starpu_new_cpu_kernel_file +export starpu_new_cuda_kernel_file +export @codelet +export @target + +include("utils.jl") +include("expressions.jl") +include("parsing.jl") +include("expression_manipulation.jl") +include("c.jl") +include("cuda.jl") +include("file_generation.jl") + diff --git a/julia/src/compiler/parsing.jl b/julia/src/compiler/parsing.jl new file mode 100644 index 0000000..2769d93 --- /dev/null +++ b/julia/src/compiler/parsing.jl @@ -0,0 +1,67 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + + +#====================================================== + GLOBAL PARSING +======================================================# + + + +starpu_parse_key_word_parsing_function = Dict{Symbol, Function}() + +""" + Translates x Expr into a new StarpuExpr object +""" +function starpu_parse(x :: Expr) + + if (x.head == :macrocall) + if (x.args[1] != Symbol("@parallel")) + error("Only @parallel macro, used before a for loop, is allowed ($(x.args[1]) was found)") + end + + if (length(x.args) != 3) + error("Invalid usage of @parallel macro", length(x.args)) + end + return starpu_parse_for(x.args[3], is_independant = true) + end + + if !(x.head in keys(starpu_parse_key_word_parsing_function)) + return StarpuExprInvalid() #TODO error ? + end + + return starpu_parse_key_word_parsing_function[x.head](x) + +end + +for kw in (:if, :call, :for, :block, :return, :function, :while, :ref, :break) + starpu_parse_key_word_parsing_function[kw] = eval(Symbol(:starpu_parse_, kw)) +end + +starpu_parse_key_word_parsing_function[:(:)] = starpu_parse_interval +starpu_parse_key_word_parsing_function[:(::)] = starpu_parse_typed +starpu_parse_key_word_parsing_function[:(=)] = starpu_parse_affect +starpu_parse_key_word_parsing_function[:(.)] = starpu_parse_field + + +""" + Executes the starpu_parse function on the following expression, + and returns the obtained StarpuExpr +""" +macro parse(x) + y = Expr(:quote, x) + :(starpu_parse($y)) +end diff --git a/julia/src/compiler/utils.jl b/julia/src/compiler/utils.jl new file mode 100644 index 0000000..1f40f39 --- /dev/null +++ b/julia/src/compiler/utils.jl @@ -0,0 +1,53 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +import Base.print + +function print_newline(io :: IO, indent = 0, n_lines = 1) + for i in (1 : n_lines) + print(io, "\n") + end + + for i in (1 : indent) + print(io, " ") + end +end + +starpu_indent_size = 4 + +function rand_char() + r = rand(UInt) % 62 + + if (0 <= r < 10) + return '0' + r + elseif (10 <= r < 36) + return 'a' + (r - 10) + else + return 'A' + (r - 36) + end +end + +function rand_string(size = 8) + output = "" + + for i in (1 : size) + output *= string(rand_char()) + end + return output +end + +function system(cmd :: String) + ccall((:system, "libc"), Cint, (Cstring,), cmd) +end diff --git a/julia/src/data.jl b/julia/src/data.jl new file mode 100644 index 0000000..25f1e48 --- /dev/null +++ b/julia/src/data.jl @@ -0,0 +1,235 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +const StarpuDataHandlePointer = Ptr{Cvoid} +StarpuDataHandle = StarpuDestructible{StarpuDataHandlePointer} + +@enum(StarpuDataFilterFunc, + STARPU_MATRIX_FILTER_VERTICAL_BLOCK = 0, + STARPU_MATRIX_FILTER_BLOCK = 1, + STARPU_VECTOR_FILTER_BLOCK = 2, +) + +export starpu_data_filter +function starpu_data_filter(filter_func ::StarpuDataFilterFunc, nchildren ::Integer) + output = starpu_data_filter(zero) + output.nchildren = UInt32(nchildren) + + if filter_func == STARPU_MATRIX_FILTER_VERTICAL_BLOCK + output.filter_func = Libdl.dlsym(starpu_wrapper_library_handle, "starpu_matrix_filter_vertical_block") + elseif filter_func == STARPU_MATRIX_FILTER_BLOCK + output.filter_func = Libdl.dlsym(starpu_wrapper_library_handle, "starpu_matrix_filter_block") + else filter_func == STARPU_VECTOR_FILTER_BLOCK + output.filter_func = Libdl.dlsym(starpu_wrapper_library_handle, "starpu_vector_filter_block") + end + + return output +end + +function starpu_memory_pin(data :: Union{Vector{T}, Matrix{T}}) where T + starpu_memory_pin(data, sizeof(data))::Cint +end + +function starpu_memory_unpin(data :: Union{Vector{T}, Matrix{T}}) where T + starpu_memory_unpin(data, sizeof(data))::Cint +end + +function StarpuNewDataHandle(ptr :: StarpuDataHandlePointer, destr :: Function...) :: StarpuDataHandle + return StarpuDestructible(ptr, destr...) +end + + + +function starpu_data_unregister_pointer(ptr :: StarpuDataHandlePointer) + starpu_data_unregister(ptr) +end + +function starpu_data_unregister(handles :: StarpuDataHandle...) + for h in handles + starpu_execute_destructor!(h, starpu_data_unregister_pointer) + end +end + +function starpu_data_register(v :: Vector{T}) where T + output = Ref{Ptr{Cvoid}}(0) + data_pointer = pointer(v) + + starpu_vector_data_register(output, STARPU_MAIN_RAM, data_pointer, length(v), sizeof(T)) + return StarpuNewDataHandle(output[], starpu_data_unregister_pointer)#, [starpu_data_unregister_pointer]) +end + +function starpu_data_register(m :: Matrix{T}) where T + + output = Ref{Ptr{Cvoid}}(0) + data_pointer = pointer(m) + (height, width) = size(m) + + starpu_matrix_data_register(output, STARPU_MAIN_RAM, data_pointer, height, height, width, sizeof(T)) + return StarpuNewDataHandle(output[], starpu_data_unregister_pointer)#, [starpu_data_unregister_pointer]) +end + +function starpu_data_register(block :: Array{T,3}) where T + + output = Ref{Ptr{Cvoid}}(0) + data_pointer = pointer(block) + (height, width, depth) = size(block) + + starpu_block_data_register(output, STARPU_MAIN_RAM, data_pointer, height, height * width, height, width, depth, sizeof(T)) + return StarpuNewDataHandle(output[], starpu_data_unregister_pointer) +end + +function starpu_data_register(ref :: Ref{T}) where T + + output = Ref{Ptr{Cvoid}}(0) + + starpu_variable_data_register(output, STARPU_MAIN_RAM, ref, sizeof(T)) + return StarpuNewDataHandle(output[], starpu_data_unregister_pointer) +end + +function starpu_data_register(x1, x2, next_args...) + + handle_1 = starpu_data_register(x1) + handle_2 = starpu_data_register(x2) + + next_handles = map(starpu_data_register, next_args) + + return [handle_1, handle_2, next_handles...] +end + +import Base.getindex +function Base.getindex(handle :: StarpuDataHandle, indexes...) + output = starpu_data_get_sub_data(handle.object, length(indexes), + map(x->x-1, indexes)...) + return StarpuNewDataHandle(output) +end + +function starpu_data_unpartition_pointer(ptr :: StarpuDataHandlePointer) + starpu_data_unpartition(ptr, STARPU_MAIN_RAM) +end + +function starpu_data_partition(handle :: StarpuDataHandle, filter :: starpu_data_filter) + + starpu_add_destructor!(handle, starpu_data_unpartition_pointer) + starpu_data_partition(handle.object, pointer_from_objref(filter)) +end + +function starpu_data_unpartition(handles :: StarpuDataHandle...) + + for h in handles + starpu_execute_destructor!(h, starpu_data_unpartition_pointer) + end + + return nothing +end + +function starpu_data_map_filters(handle :: StarpuDataHandle, filter :: starpu_data_filter) + starpu_add_destructor!(handle, starpu_data_unpartition_pointer) + starpu_data_map_filters(handle.object, 1, pointer_from_objref(filter)) +end + +function starpu_data_map_filters(handle :: StarpuDataHandle, filter_1 :: starpu_data_filter, filter_2 :: starpu_data_filter) + starpu_add_destructor!(handle, starpu_data_unpartition_pointer) + starpu_data_map_filters(handle.object, 2, pointer_from_objref(filter_1), pointer_from_objref(filter_2)) +end + +function starpu_data_get_sequential_consistency_flag(handle :: StarpuDataHandle) + return starpu_data_get_sequential_consistency_flag(handle.object) +end + +function starpu_data_set_sequential_consistency_flag(handle :: StarpuDataHandle, flag :: Int) + starpu_data_set_sequential_consistency_flag(handle.object, flag) +end + +function starpu_data_acquire_on_node(handle :: StarpuDataHandle, node :: Int, mode) + starpu_data_acquire_on_node(handle.object, node, mode) +end + +function starpu_data_release_on_node(handle :: StarpuDataHandle, node :: Int) + starpu_data_release_on_node(handle.object, node) +end + +function starpu_data_wont_use(handle :: StarpuDataHandle) + starpu_data_wont_use(handle.object) +end + +function repl(x::Symbol) + return x +end +function repl(x::Number) + return x +end +function repl(x :: Expr) + if (x.head == :call && x.args[1] == :+) + if (x.args[2] == :_) + return x.args[3] + elseif (x.args[3] == :_) + return x.args[2] + else return Expr(:call,:+,repl(x.args[2]),repl(x.args[3])) + end + elseif (x.head == :call && x.args[1] == :-) + if (x.args[2] == :_) + return Expr(:call,:-,x.args[3]) + elseif (x.args[3] == :_) + return x.args[2] + else return Expr(:call,:-,repl(x.args[2]),repl(x.args[3])) + end + else return Expr(:call,x.args[1],repl(x.args[2]),repl(x.args[3])) + end +end +""" + Declares a subarray. + Ex : @starpu_filter ha = A[ _:_+1, : ] + +""" +macro starpu_filter(expr) + #dump(expr, maxdepth=20) + if (expr.head==Symbol("=")) + region = expr.args[2] + if (region.head == Symbol("ref")) + farray = expr.args[1] + println("starpu filter") + index = 0 + filter2=nothing + filter3=nothing + if (region.args[2]==Symbol(":")) + index = 3 + filter2=:(STARPU_MATRIX_FILTER_BLOCK) + elseif (region.args[3] == Symbol(":")) + index = 2 + filter3=:(STARPU_MATRIX_FILTER_VERTICAL_BLOCK) + else + end + ex = repl(region.args[index].args[3]) + if (region.args[index].args[2] != Symbol("_")) + throw(AssertionError("LHS must be _")) + end + ret = quote + # escape and not global for farray! + $(esc(farray)) = starpu_data_register($(esc(region.args[1]))) + starpu_data_partition( $(esc(farray)),starpu_data_filter($(esc(filter)),$(esc(ex)))) + end + return ret + else + ret = quote + $(esc(farray))= starpu_data_register($(esc(region.args[1]))) + end + + dump("coucou"); #dump(region.args[2]) + # dump(region.args[2]) + # dump(region.args[3]) + return ret + end + end +end diff --git a/julia/src/destructible.jl b/julia/src/destructible.jl new file mode 100644 index 0000000..c167c7a --- /dev/null +++ b/julia/src/destructible.jl @@ -0,0 +1,126 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +""" + Object used to store a lot of function which must + be applied to and object + """ +mutable struct StarpuDestructible{T} + + object :: T + destructors :: LinkedList{Function} + +end + +starpu_block_list = Vector{LinkedList{StarpuDestructible}}() + +""" + Declares a block of code. Every declared StarpuDestructible in this code + will execute its destructors on its object, once the block is exited +""" +macro starpu_block(expr) + quote + starpu_enter_new_block() + local z=$(esc(expr)) + starpu_exit_block() + z + end +end + + +function StarpuDestructible(obj :: T, destructors :: Function...) where T + + if (isempty(starpu_block_list)) + error("Creation of a StarpuDestructible object while not beeing in a @starpu_block") + end + + l = LinkedList{Function}() + + for destr in destructors + add_to_tail!(l, destr) + end + + output = StarpuDestructible{T}(obj, l) + add_to_head!(starpu_block_list[end], output) + + return output +end + +function starpu_enter_new_block() + + push!(starpu_block_list, LinkedList{StarpuDestructible}()) +end + +function starpu_destruct!(x :: StarpuDestructible) + + @foreach_asc x.destructors destr begin + destr.data(x.object) + end + + empty!(x.destructors) + + return nothing +end + + +function starpu_exit_block() + + destr_list = pop!(starpu_block_list) + + @foreach_asc destr_list x begin + starpu_destruct!(x.data) + end +end + +""" + Adds new destructors to the list of function. They will be executed before + already stored ones when calling starpu_destruct! +""" +function starpu_add_destructor!(x :: StarpuDestructible, destrs :: Function...) + + for d in destrs + add_to_head!(x.destructors, d) + end + + return nothing +end + +""" + Removes detsructor without executing it +""" +function starpu_remove_destructor!(x :: StarpuDestructible, destr :: Function) + + @foreach_asc x.destructors lnk begin + + if (lnk.data == destr) + remove_link!(lnk) + break + end + end + + return nothing +end + +""" + Executes "destr" function. If it was one of the stored destructors, it + is removed. + This function can be used to allow user to execute a specific action manually + (ex : explicit call to starpu_data_unpartition() without unregistering) +""" +function starpu_execute_destructor!(x :: StarpuDestructible, destr :: Function) + + starpu_remove_destructor!(x, destr) + return destr(x.object) +end diff --git a/julia/src/dynamic_compiler/Makefile.am b/julia/src/dynamic_compiler/Makefile.am new file mode 100644 index 0000000..432f613 --- /dev/null +++ b/julia/src/dynamic_compiler/Makefile.am @@ -0,0 +1,49 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +include $(top_srcdir)/make/starpu-notests.mk + +LD=$(CC_OR_NVCC) +AM_CPPFLAGS = -I$(abs_top_srcdir)/include/ -I$(abs_top_builddir)/src -I$(abs_top_builddir)/include \ + -I$(abs_top_srcdir)/julia/src/ $(STARPU_H_CPPFLAGS) + +AM_CFLAGS += -fPIC -DSTRIDE=${STRIDE} -mavx -fomit-frame-pointer -march=native -ffast-math +LIBS += $(top_builddir)/src/@LIBSTARPU_LINK@ +LIBS += -L $(top_builddir)/julia/src/libstarpujulia-$(STARPU_EFFECTIVE_VERSION).1.3 +CUDA_CFLAGS = $(STARPU_CUDA_CPPFLAGS) -Wno-deprecated-gpu-targets +EXTERNLIB=extern_tasks.so +GENERATEDLIB=generated_tasks.so + +C_OBJECTS=$(patsubst %.c,%.o,$(wildcard gen*.c)) + +if STARPU_USE_CUDA +CUDA_OBJECTS=$(patsubst %.cu,%.o,$(wildcard gen*.cu)) +else +CUDA_OBJECTS= +endif + +%.o: %.c + $(CC) -c $(AM_CPPFLAGS) $(AM_CFLAGS) $^ -o $@ + +%.o: %.cu + $(NVCC) -dc $(AM_CPPFLAGS) $(CUDA_CFLAGS) $^ --shared --compiler-options '-fPIC' -o $@ $(LDFLAGS) + +${EXTERNLIB}: $(SOURCES_CPU) + $(CC) $(AM_CPPFLAGS) $(AM_CFLAGS) -shared -fPIC $(LDFLAGS) $^ -o $@ + +${GENERATEDLIB}: $(C_OBJECTS) $(CUDA_OBJECTS) + $(LD) -shared $^ -o $@ $(LDFLAGS) + diff --git a/julia/src/dynamic_compiler/Makefile.in b/julia/src/dynamic_compiler/Makefile.in new file mode 100644 index 0000000..b8eec30 --- /dev/null +++ b/julia/src/dynamic_compiler/Makefile.in @@ -0,0 +1,777 @@ +# Makefile.in generated by automake 1.16.5 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2021 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +target_triplet = @target@ +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@am__append_1 = --compiler-options -fno-strict-aliasing -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ $(STARPU_NVCC_H_CPPFLAGS) +@STARPU_USE_HIP_TRUE@am__append_2 = -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ +subdir = julia/src/dynamic_compiler +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ + $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ + $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ + $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/src/common/config.h \ + $(top_builddir)/src/common/config-src-build.h \ + $(top_builddir)/include/starpu_config.h \ + $(top_builddir)/starpurm/include/starpurm_config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +SOURCES = +DIST_SOURCES = +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +am__DIST_COMMON = $(srcdir)/Makefile.in \ + $(top_srcdir)/make/starpu-notests.mk \ + $(top_srcdir)/make/starpu.mk +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +pkglibdir = @pkglibdir@ +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +APP_CFLAGS = @APP_CFLAGS@ +APP_CXXFLAGS = @APP_CXXFLAGS@ +APP_FCFLAGS = @APP_FCFLAGS@ +APP_FFLAGS = @APP_FFLAGS@ +AR = @AR@ +AS = @AS@ +ATLASDIR = @ATLASDIR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BLAS_LIB = @BLAS_LIB@ +BLAS_LIBS = @BLAS_LIBS@ +BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ +BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CC_OR_MPICC = @CC_OR_MPICC@ +CC_OR_NVCC = @CC_OR_NVCC@ +CFLAGS = @CFLAGS@ +COVERAGE = @COVERAGE@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CSCOPE = @CSCOPE@ +CTAGS = @CTAGS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DGELS_LIBS = @DGELS_LIBS@ +DLB_CFLAGS = @DLB_CFLAGS@ +DLB_LIBS = @DLB_LIBS@ +DLLTOOL = @DLLTOOL@ +DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +ECLIPSE = @ECLIPSE@ +EGREP = @EGREP@ +ETAGS = @ETAGS@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FC = @FC@ +FCFLAGS = @FCFLAGS@ +FFLAGS = @FFLAGS@ +FFTWF_CFLAGS = @FFTWF_CFLAGS@ +FFTWF_LIBS = @FFTWF_LIBS@ +FFTWL_CFLAGS = @FFTWL_CFLAGS@ +FFTWL_LIBS = @FFTWL_LIBS@ +FFTW_CFLAGS = @FFTW_CFLAGS@ +FFTW_LIBS = @FFTW_LIBS@ +FGREP = @FGREP@ +FILECMD = @FILECMD@ +FXTDIR = @FXTDIR@ +FXT_CFLAGS = @FXT_CFLAGS@ +FXT_LDFLAGS = @FXT_LDFLAGS@ +FXT_LIBS = @FXT_LIBS@ +GDB = @GDB@ +GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ +GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ +GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ +GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ +GOTODIR = @GOTODIR@ +GREP = @GREP@ +HAVE_CXX11 = @HAVE_CXX11@ +HAVE_FFTWFL = @HAVE_FFTWFL@ +HELP2MAN = @HELP2MAN@ +HIPCC = @HIPCC@ +HIPCCFLAGS = @HIPCCFLAGS@ $(am__append_2) +HIPCONFIG = @HIPCONFIG@ +HWLOC_CFLAGS = @HWLOC_CFLAGS@ +HWLOC_LIBS = @HWLOC_LIBS@ +HWLOC_REQUIRES = @HWLOC_REQUIRES@ +ICC = @ICC@ +ICC_ARGS = @ICC_ARGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JULIA = @JULIA@ +LD = $(CC_OR_NVCC) +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ $(top_builddir)/src/@LIBSTARPU_LINK@ -L \ + $(top_builddir)/julia/src/libstarpujulia-$(STARPU_EFFECTIVE_VERSION).1.3 +LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ +LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ +LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ +LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ +LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ +LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ +LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ +LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ +LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ +LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ +LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ +LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ +LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ +LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ +LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ +LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ +LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ +LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ +LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ +LIBSTARPU_LINK = @LIBSTARPU_LINK@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAGMA_CFLAGS = @MAGMA_CFLAGS@ +MAGMA_LIBS = @MAGMA_LIBS@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPICC_LDFLAGS = @MPICC_LDFLAGS@ +MPICXX = @MPICXX@ +MPIEXEC = @MPIEXEC@ +MPIEXEC_ARGS = @MPIEXEC_ARGS@ +MPIFORT = @MPIFORT@ +MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ +MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ +NM = @NM@ +NMAD_CFLAGS = @NMAD_CFLAGS@ +NMAD_LIBS = @NMAD_LIBS@ +NMEDIT = @NMEDIT@ +NVCC = @NVCC@ +NVCCFLAGS = @NVCCFLAGS@ $(am__append_1) +NVCC_CC = @NVCC_CC@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ +OPENBLAS_LIBS = @OPENBLAS_LIBS@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PAPI_CFLAGS = @PAPI_CFLAGS@ +PAPI_LIBS = @PAPI_LIBS@ +PARALLEL = @PARALLEL@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PKG_CONFIG = @PKG_CONFIG@ +POTI_CFLAGS = @POTI_CFLAGS@ +POTI_LIBS = @POTI_LIBS@ +PROG_CLANG = @PROG_CLANG@ +PROG_DATE = @PROG_DATE@ +PROG_FIND = @PROG_FIND@ +PROG_STAT = @PROG_STAT@ +PYTHON = @PYTHON@ +PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ +PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ +PYTHON_VERSION = @PYTHON_VERSION@ +RANLIB = @RANLIB@ +REALBASH = @REALBASH@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ +SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ +SIMGRID_LIBS = @SIMGRID_LIBS@ +SIMGRID_MC = @SIMGRID_MC@ +SLIC_CONFIG = @SLIC_CONFIG@ +SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ +SOCL_VENDORS = @SOCL_VENDORS@ +STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ +STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ +STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ +STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ +STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ +STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ +STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ +STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ +STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ +STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ +STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ +STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ +STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ +STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ +STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ +STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ +STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ +STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ +STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ +STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ +STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ +STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ +STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ +STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ +STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ +STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ +STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ +STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ +STARPU_LIB_PATH = @STARPU_LIB_PATH@ +STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ +STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ +STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ +STARPU_MS_LIB = @STARPU_MS_LIB@ +STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ +STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ +STARPU_OPENBLAS = @STARPU_OPENBLAS@ +STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ +STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ +STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ +STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ +STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ +STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ +STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ +STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ +STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ +STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ +STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ +STARPU_SRC_DIR = @STARPU_SRC_DIR@ +STARPU_USE_CPU = @STARPU_USE_CPU@ +STARPU_USE_CUDA = @STARPU_USE_CUDA@ +STARPU_USE_FXT = @STARPU_USE_FXT@ +STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ +STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ +STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ +STRIP = @STRIP@ +VERSION = @VERSION@ +XMKMF = @XMKMF@ +X_CFLAGS = @X_CFLAGS@ +X_EXTRA_LIBS = @X_EXTRA_LIBS@ +X_LIBS = @X_LIBS@ +X_PRE_LIBS = @X_PRE_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_ct_F77 = @ac_ct_F77@ +ac_ct_FC = @ac_ct_FC@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +doxygencommand = @doxygencommand@ +dvidir = @dvidir@ +eclipsepath = @eclipsepath@ +epstopdfcommand = @epstopdfcommand@ +exec_prefix = @exec_prefix@ +gitcommand = @gitcommand@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +hwloccalccommand = @hwloccalccommand@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +juliapath = @juliapath@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +mpicc_path = @mpicc_path@ +mpicxx_path = @mpicxx_path@ +mpiexec_path = @mpiexec_path@ +mpifort_path = @mpifort_path@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +pdflatexcommand = @pdflatexcommand@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +runstatedir = @runstatedir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target = @target@ +target_alias = @target_alias@ +target_cpu = @target_cpu@ +target_os = @target_os@ +target_vendor = @target_vendor@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +AM_CFLAGS = $(GLOBAL_AM_CFLAGS) -fPIC -DSTRIDE=${STRIDE} -mavx \ + -fomit-frame-pointer -march=native -ffast-math +AM_CXXFLAGS = $(GLOBAL_AM_CXXFLAGS) +AM_FFLAGS = $(GLOBAL_AM_FFLAGS) +AM_FCFLAGS = $(GLOBAL_AM_FCFLAGS) +@STARPU_USE_CUDA_TRUE@V_nvcc_ = $(V_nvcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_USE_CUDA_TRUE@V_nvcc_0 = @echo " NVCC " $@; +@STARPU_USE_CUDA_TRUE@V_nvcc_1 = +@STARPU_USE_CUDA_TRUE@V_nvcc = $(V_nvcc_$(V)) + +# Avoid using nvcc when making a coverity build, nvcc produces millions of +# lines of code which we don't want to analyze. Instead, build dumb .o files +# containing empty functions. +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_ = $(V_mynvcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_0 = @echo " myNVCC " $@; +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_1 = +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc = $(V_mynvcc_$(V)) +@STARPU_USE_HIP_TRUE@V_hipcc_ = $(V_hipcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_USE_HIP_TRUE@V_hipcc_0 = @echo " HIPCC " $@; +@STARPU_USE_HIP_TRUE@V_hipcc_1 = +@STARPU_USE_HIP_TRUE@V_hipcc = $(V_hipcc_$(V)) +V_icc_ = $(V_icc_$(AM_DEFAULT_VERBOSITY)) +V_icc_0 = @echo " ICC " $@; +V_icc_1 = +V_icc = $(V_icc_$(V)) +V_ln_ = $(V_ln_$(AM_DEFAULT_VERBOSITY)) +V_ln_0 = @echo " LN " $@; +V_ln_1 = +V_ln = $(V_ln_$(V)) +V_help2man_ = $(V_help2man_$(AM_DEFAULT_VERBOSITY)) +V_help2man_0 = @echo " HELP2MAN" $@; +V_help2man_1 = +V_help2man = $(V_help2man_$(V)) +AM_CPPFLAGS = -I$(abs_top_srcdir)/include/ -I$(abs_top_builddir)/src -I$(abs_top_builddir)/include \ + -I$(abs_top_srcdir)/julia/src/ $(STARPU_H_CPPFLAGS) + +CUDA_CFLAGS = $(STARPU_CUDA_CPPFLAGS) -Wno-deprecated-gpu-targets +EXTERNLIB = extern_tasks.so +GENERATEDLIB = generated_tasks.so +C_OBJECTS = $(patsubst %.c,%.o,$(wildcard gen*.c)) +@STARPU_USE_CUDA_FALSE@CUDA_OBJECTS = +@STARPU_USE_CUDA_TRUE@CUDA_OBJECTS = $(patsubst %.cu,%.o,$(wildcard gen*.cu)) +all: all-am + +.SUFFIXES: +.SUFFIXES: .cu .cubin .hip .o +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/make/starpu-notests.mk $(top_srcdir)/make/starpu.mk $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign julia/src/dynamic_compiler/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign julia/src/dynamic_compiler/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; +$(top_srcdir)/make/starpu-notests.mk $(top_srcdir)/make/starpu.mk $(am__empty): + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs +tags TAGS: + +ctags CTAGS: + +cscope cscopelist: + +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile +installdirs: +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-generic clean-libtool mostlyclean-am + +distclean: distclean-am + -rm -f Makefile +distclean-am: clean-am distclean-generic + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-generic mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: + +.MAKE: install-am install-strip + +.PHONY: all all-am check check-am clean clean-generic clean-libtool \ + cscopelist-am ctags-am distclean distclean-generic \ + distclean-libtool distdir dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-html \ + install-html-am install-info install-info-am install-man \ + install-pdf install-pdf-am install-ps install-ps-am \ + install-strip installcheck installcheck-am installdirs \ + maintainer-clean maintainer-clean-generic mostlyclean \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags-am uninstall uninstall-am + +.PRECIOUS: Makefile + +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@.cu.o: +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ @$(MKDIR_P) `dirname $@` +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ $(V_mynvcc)grep 'extern *"C" *void *' $< | sed -ne 's/extern *"C" *void *\([a-zA-Z0-9_]*\) *(.*/void \1(void) {}/p' | $(CC) -x c - -o $@ -c + +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.cubin: +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) -cubin $< -o $@ $(NVCCFLAGS) + +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.o: +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) $< -c -o $@ $(NVCCFLAGS) +@STARPU_USE_HIP_TRUE@.hip.o: +@STARPU_USE_HIP_TRUE@ $(V_hipcc) $(HIPCC) $< -c -o $@ $(HIPCCFLAGS) + +recheck: + -cat /dev/null + +showcheckfailed: + @-cat /dev/null + +showfailed: + @-cat /dev/null + +showcheck: + -cat /dev/null + +showsuite: + -cat /dev/null + +%.o: %.c + $(CC) -c $(AM_CPPFLAGS) $(AM_CFLAGS) $^ -o $@ + +%.o: %.cu + $(NVCC) -dc $(AM_CPPFLAGS) $(CUDA_CFLAGS) $^ --shared --compiler-options '-fPIC' -o $@ $(LDFLAGS) + +${EXTERNLIB}: $(SOURCES_CPU) + $(CC) $(AM_CPPFLAGS) $(AM_CFLAGS) -shared -fPIC $(LDFLAGS) $^ -o $@ + +${GENERATEDLIB}: $(C_OBJECTS) $(CUDA_OBJECTS) + $(LD) -shared $^ -o $@ $(LDFLAGS) + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/julia/src/globals.jl b/julia/src/globals.jl new file mode 100644 index 0000000..77386c0 --- /dev/null +++ b/julia/src/globals.jl @@ -0,0 +1,50 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +global starpu_wrapper_library_handle = C_NULL + +global starpu_tasks_library_handle = C_NULL + +global starpu_target=STARPU_CPU + +global generated_cuda_kernel_file_name = "PRINT TO STDOUT" +global generated_cpu_kernel_file_name = "PRINT TO STDOUT" + +global CPU_CODELETS=Dict{String,String}() +global CUDA_CODELETS=Dict{String,String}() + +global CODELETS_SCALARS=Dict{String,Any}() +global CODELETS_PARAMS_STRUCT=Dict{String,Any}() + +global starpu_type_traduction_dict = Dict( + Int32 => "int32_t", + UInt32 => "uint32_t", + Float32 => "float", + Int64 => "int64_t", + UInt64 => "uint64_t", + Float64 => "double", + Nothing => "void" +) +export starpu_type_traduction_dict + +global mutex = Threads.SpinLock() + +# detect CUDA support +try + STARPU_USE_CUDA == 1 +catch + global const STARPU_USE_CUDA = 0 +end diff --git a/julia/src/init.jl b/julia/src/init.jl new file mode 100644 index 0000000..debcd33 --- /dev/null +++ b/julia/src/init.jl @@ -0,0 +1,73 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +""" + Must be called before any other starpu function. Field extern_task_path is the + shared library path which will be used to find StarpuCodelet + cpu and gpu function names +""" +function starpu_init() + debug_print("starpu_init") + + if (get(ENV,"JULIA_TASK_LIB",0)!=0) + global starpu_tasks_library_handle= Libdl.dlopen(ENV["JULIA_TASK_LIB"]) + debug_print("Loading external codelet library") + ff = Libdl.dlsym(starpu_tasks_library_handle,:starpu_find_function) + dump(ff) + for k in keys(CPU_CODELETS) + CPU_CODELETS[k]=unsafe_string(ccall(ff,Cstring, (Cstring,Cstring),Cstring_from_String(string(k)),Cstring_from_String("cpu"))) + if STARPU_USE_CUDA == 1 + CUDA_CODELETS[k]=unsafe_string(ccall(ff,Cstring, (Cstring,Cstring),Cstring_from_String(string(k)),Cstring_from_String("gpu"))) + end + print(k,">>>>",CPU_CODELETS[k],"\n") + end + else + srcdir=get(ENV,"STARPU_JULIA_BUILD",0) + if (srcdir == 0) + error("Must define environment variable STARPU_JULIA_BUILD") + end + makefile=string(srcdir, "/src/dynamic_compiler/Makefile") + debug_print("generating codelet library with ") + debug_print(makefile) + run(`make -f $makefile generated_tasks.so`) + global starpu_tasks_library_handle=Libdl.dlopen("generated_tasks.so") + end + global starpu_wrapper_library_handle= Libdl.dlopen(starpu_wrapper_library_name) + output = starpu_init(C_NULL) + + global task_pool = ThreadPools.QueuePool(2) + + starpu_enter_new_block() + + return output +end + +""" + Must be called at the end of the program +""" +function starpu_shutdown() + debug_print("starpu_shutdown") + + starpu_exit_block() + @starpucall starpu_shutdown Cvoid () + + lock(mutex) + empty!(perfmodel_list) + empty!(codelet_list) + empty!(task_list) + unlock(mutex) + + return nothing +end diff --git a/julia/src/linked_list.jl b/julia/src/linked_list.jl new file mode 100644 index 0000000..e9b17ce --- /dev/null +++ b/julia/src/linked_list.jl @@ -0,0 +1,316 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + export Link + mutable struct Link{T} + + data :: T + + previous :: Union{Nothing, Link{T}} + next :: Union{Nothing, Link{T}} + + list + + function Link{T}(x :: T, l) where {T} + output = new() + output.data = x + output.previous = Nothing() + output.next = Nothing() + output.list = l + return output + end + end + + + export LinkedList + mutable struct LinkedList{T} + + nelement :: Int64 + + first :: Union{Nothing, Link{T}} + last :: Union{Nothing, Link{T}} + + function LinkedList{T}() where {T} + output = new() + output.nelement = 0 + output.first = Nothing() + output.last = Nothing() + + return output + end + + end + + export add_to_head! + function add_to_head!(l :: LinkedList{T}, el :: T) where {T} + + new_first = Link{T}(el, l) + old_first = l.first + + l.first = new_first + new_first.next = old_first + + if (isnothing(old_first)) + l.last = new_first + else + old_first.previous = new_first + end + + l.nelement += 1 + + return new_first + end + + + export add_to_tail! + function add_to_tail!(l :: LinkedList{T}, el :: T) where {T} + + new_last = Link{T}(el, l) + old_last = l.last + + l.last = new_last + new_last.previous = old_last + + if (isnothing(old_last)) + l.first = new_last + else + old_last.next = new_last + end + + l.nelement += 1 + + return new_last + end + + + function LinkedList(v :: Union{Array{T,N}, NTuple{N,T}}) where {N,T} + + output = LinkedList{T}() + + for x in v + add_to_tail!(output, x) + end + + return output + end + + + export remove_link! + function remove_link!(lnk :: Link{T}) where {T} + + if (lnk.list == nothing) + return lnk.data + end + + l = lnk.list + next = lnk.next + previous = lnk.previous + + if (isnothing(next)) + l.last = previous + else + next.previous = previous + end + + if (isnothing(previous)) + l.first = next + else + previous.next = next + end + + l.nelement -= 1 + lnk.list = nothing + + return lnk.data + end + + + export is_linked + function is_linked(lnk :: Link) + return (lnk.list != nothing) + end + + + + + + export foreach_asc + macro foreach_asc(list, lnk_iterator, expression) + + quote + $(esc(lnk_iterator)) = $(esc(list)).first + + while (!isnothing($(esc(lnk_iterator)))) + __next_lnk_iterator = $(esc(lnk_iterator)).next + $(esc(expression)) + $(esc(lnk_iterator)) = __next_lnk_iterator + end + end + end + + + export foreach_desc + macro foreach_desc(list, lnk_iterator, expression) + + quote + $(esc(lnk_iterator)) = $(esc(list)).last + + while (!isnothing($(esc(lnk_iterator)))) + __next_lnk_iterator = $(esc(lnk_iterator)).previous + $(esc(expression)) + $(esc(lnk_iterator)) = __next_lnk_iterator + end + end + end + + + + + function Base.show(io :: IO, lnk :: Link{T}) where {T} + + print(io, "Link{$T}{data: ") + print(io, lnk.data) + + print(io, " ; previous: ") + + if (isnothing(lnk.previous)) + print(io, "NONE") + else + print(io, lnk.previous.data) + end + + print(io, " ; next: ") + + if (isnothing(lnk.next)) + print(io, "NONE") + else + print(io, lnk.next.data) + end + + print(io, "}") + + end + + + + function Base.show(io :: IO, l :: LinkedList{T}) where {T} + + print(io, "LinkedList{$T}{") + + @foreach_asc l lnk begin + + if (!isnothing(lnk.previous)) + print(io, ", ") + end + + print(io, lnk.data) + + end + + print(io, "}") + + end + + + + #import Base.start + function start(l :: LinkedList) + return nothing + end + + + #import Base.done + function done(l :: LinkedList, state) + + if (state == nothing) + return isnothing(l.first) + end + + return isnothing(state.next) + end + + + #import Base.next + function next(l :: LinkedList, state) + + if (state == nothing) + next_link = l.first + else + next_link = state.next + end + + return (next_link.data, next_link) + end + + + #import Base.endof + function endof(l :: LinkedList) + return l.nelement + end + + export index_to_link + function index_to_link(l :: LinkedList, ind) + + if (ind > l.nelement || ind <= 0) + error("Invalid index") + end + + lnk = l.first + + for i in (1:(ind - 1)) + lnk = lnk.next + end + + return lnk + end + + + import Base.getindex + function getindex(l :: LinkedList, ind) + return index_to_link(l,ind).data + end + + import Base.setindex! + function setindex!(l :: LinkedList{T}, ind, value :: T) where T + lnk = index_to_link(l,ind) + lnk.data = value + end + + + + + + import Base.eltype + function eltype(l :: LinkedList{T}) where T + return T + end + + + import Base.isempty + function isempty(l :: LinkedList) + return (l.nelement == 0) + end + + + import Base.empty! + function empty!(l :: LinkedList) + @foreach_asc l lnk remove_link!(lnk) + end + + + import Base.length + function length(l :: LinkedList) + return l.nelement + end diff --git a/julia/src/perfmodel.jl b/julia/src/perfmodel.jl new file mode 100644 index 0000000..e52c2d2 --- /dev/null +++ b/julia/src/perfmodel.jl @@ -0,0 +1,31 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +perfmodel_list = Vector{starpu_perfmodel}() + +function starpu_perfmodel(; perf_type::starpu_perfmodel_type, symbol::String) + output = starpu_perfmodel(zero) + output.type = perf_type + output.symbol = Cstring_from_String(symbol) + + # Performance models must not be garbage collected before starpu_shutdown + # is called. + lock(mutex) + push!(perfmodel_list, output) + unlock(mutex) + + return output +end diff --git a/julia/src/task.jl b/julia/src/task.jl new file mode 100644 index 0000000..45b9a58 --- /dev/null +++ b/julia/src/task.jl @@ -0,0 +1,400 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +using ThreadPools + +mutable struct jl_starpu_codelet + c_codelet :: starpu_codelet + perfmodel :: starpu_perfmodel + cpu_func :: Union{String, STARPU_BLAS} + cuda_func :: Union{String, STARPU_BLAS} + opencl_func :: String + modes +end + +global codelet_list = Vector{jl_starpu_codelet}() + +function starpu_codelet(; + cpu_func :: Union{String, STARPU_BLAS, Cvoid} = "", + cuda_func :: Union{String, STARPU_BLAS, Cvoid} = "", + opencl_func :: String = "", + modes = [], + perfmodel :: starpu_perfmodel, + where_to_execute :: Union{Cvoid, UInt32} = nothing, + color :: UInt32 = 0x00000000 + ) + + if (length(modes) > STARPU_NMAXBUFS) + error("Codelet has too much buffers ($(length(modes)) but only $STARPU_NMAXBUFS are allowed)") + end + + + if (where_to_execute == nothing) + real_where = ((cpu_func != nothing) * STARPU_CPU) | ((cuda_func != nothing) * STARPU_CUDA) + else + real_where = where_to_execute + end + + output = jl_starpu_codelet(starpu_codelet(zero), perfmodel, cpu_func, cuda_func, opencl_func, modes) + ## TODO: starpu_codelet_init + + output.c_codelet.where = real_where + + for i in 1:length(modes) + output.c_codelet.modes[i] = modes[i] + end + output.c_codelet.nbuffers = length(modes) + output.c_codelet.model = pointer_from_objref(perfmodel) + output.c_codelet.color = color + + if typeof(cpu_func) == STARPU_BLAS + output.cpu_func = cpu_blas_codelets[cpu_func] + output.c_codelet.cpu_func = load_wrapper_function_pointer(output.cpu_func) + else + output.c_codelet.cpu_func = load_starpu_function_pointer(get(CPU_CODELETS, cpu_func, "")) + end + + if typeof(cuda_func) == STARPU_BLAS + output.cuda_func = cuda_blas_codelets[cuda_func] + output.c_codelet.cuda_func = load_wrapper_function_pointer(output.cuda_func) + output.c_codelet.cuda_flags[1] = STARPU_CUDA_ASYNC + else + output.c_codelet.cuda_func = load_starpu_function_pointer(get(CUDA_CODELETS, cuda_func, "")) + end + + output.c_codelet.opencl_func = load_starpu_function_pointer("") + + # Codelets must not be garbage collected before starpu shutdown is called. + lock(mutex) + push!(codelet_list, output) + unlock(mutex) + + return output +end + +mutable struct jl_starpu_task + + cl :: jl_starpu_codelet + handles :: Vector{StarpuDataHandle} + handle_pointers :: Vector{StarpuDataHandlePointer} + synchronous :: Bool + cl_arg # type depends on codelet + callback_signal :: Vector{Cint} + callback_function :: Union{Cvoid, Function} + callback_arg + c_task :: starpu_task +end + +task_list = Vector{jl_starpu_task}() + +""" + starpu_task(; cl :: jl_starpu_codelet, handles :: Vector{StarpuDataHandle}, cl_arg :: Ref) + + Creates a new task which will run the specified codelet on handle buffers and cl_args data + """ +function starpu_task(; + cl :: Union{Cvoid, jl_starpu_codelet} = nothing, + handles :: Vector{StarpuDataHandle} = StarpuDataHandle[], + cl_arg = (), + callback :: Union{Cvoid, Function} = nothing, + callback_arg = nothing, + tag :: Union{Cvoid, starpu_tag_t} = nothing, + tag_only :: Union{Cvoid, starpu_tag_t} = nothing, + sequential_consistency = true, + detach = 1, + color :: Union{Cvoid, UInt32} = nothing, + where :: Union{Cvoid, Int32} = nothing) + if (cl == nothing) + error("\"cl\" field can't be empty when creating a StarpuTask") + end + + output = jl_starpu_task(cl, handles, map((x -> x.object), handles), false, nothing, Vector{Cint}(undef, 1), callback, callback_arg, starpu_task(zero)) + + # handle scalar_parameters + codelet_name = "" + if isa(cl.cpu_func, String) && cl.cpu_func != "" + codelet = cl.cpu_func + elseif isa(cl.gpu_func, String) && cl.gpu_func != "" + codelet = cl.gpu_func + end + scalar_parameters = get(CODELETS_SCALARS, codelet_name, nothing) + if scalar_parameters != nothing + nb_scalar_required = length(scalar_parameters) + nb_scalar_provided = tuple_len(cl_arg) + if (nb_scalar_provided != nb_scalar_required) + error("$nb_scalar_provided scalar parameters provided but $nb_scalar_required are required by $codelet_name.") + end + output.cl_arg = create_param_struct_from_clarg(codelet_name, cl_arg) + else + output.cl_arg = cl_arg + end + + starpu_task_init(Ref(output.c_task)) + output.c_task.cl = pointer_from_objref(cl.c_codelet) + output.c_task.synchronous = false + output.c_task.sequential_consistency = sequential_consistency + output.c_task.detach = detach + + ## TODO: check num handles equals num codelet buffers + for i in 1:length(handles) + output.c_task.handles[i] = output.handle_pointers[i] + end + if tuple_len(cl_arg) > 0 + output.c_task.cl_arg = Base.unsafe_convert(Ptr{Cvoid}, Ref(output.cl_arg)) + output.c_task.cl_arg_size = sizeof(output.cl_arg) + end + + # callback + if output.callback_function != nothing + output.callback_signal[1] = 0 + output.c_task.callback_arg = Base.unsafe_convert(Ptr{Cvoid}, output.callback_signal) + output.c_task.callback_func = load_wrapper_function_pointer("julia_callback_func") + end + + if tag != nothing + output.c_task.tag_id = tag + output.c_task.use_tag = 1 + end + + if tag_only != nothing + output.c_task.tag_id = tag_only + end + + if color != nothing + output.c_task.color = color + end + + if where != nothing + output.c_task.where = where + end + + # Tasks must not be garbage collected before starpu_task_wait_for_all is called. + # This is necessary in particular for tasks created inside callback functions. + lock(mutex) + push!(task_list, output) + unlock(mutex) + + return output +end + + +function create_param_struct_from_clarg(codelet_name, cl_arg) + struct_params_name = CODELETS_PARAMS_STRUCT[codelet_name] + + if struct_params_name == false + error("structure name not found in CODELET_PARAMS_STRUCT") + end + + nb_scalar_provided = length(cl_arg) + create_struct_param_str = "output = $struct_params_name(" + for i in 1:nb_scalar_provided-1 + arg = cl_arg[i] + create_struct_param_str *= "$arg, " + end + if (nb_scalar_provided > 0) + arg = cl_arg[nb_scalar_provided] + create_struct_param_str *= "$arg" + end + create_struct_param_str *= ")" + eval(Meta.parse(create_struct_param_str)) + return output +end + +""" + Launches task execution, if "synchronous" task field is set to "false", call + returns immediately +""" +function starpu_task_submit(task :: jl_starpu_task) + if (length(task.handles) != length(task.cl.modes)) + error("Invalid number of handles for task : $(length(task.handles)) where given while codelet has $(task.cl.modes) modes") + end + + starpu_task_submit(Ref(task.c_task)) + + if task.callback_function != nothing + callback_arg = task.callback_arg + callback_signal = task.callback_signal + callback_function = task.callback_function + + lock(mutex) + put!(task_pool) do + + # Active waiting loop + @starpucall(julia_wait_signal, Cvoid, (Ptr{Cvoid},), Base.unsafe_convert(Ptr{Cvoid}, callback_signal)) + + # We've received the signal from the pthread, now execute the callback. + callback_function(callback_arg) + + # Tell the pthread that the callback is done. + callback_signal[1] = 0 + end + unlock(mutex) + end +end + +function starpu_modes(x :: Symbol) + if (x == Symbol("STARPU_RW")) + return STARPU_RW + elseif (x == Symbol("STARPU_R")) + return STARPU_R + else return STARPU_W + end +end + +default_codelet = Dict{String, jl_starpu_codelet}() +default_perfmodel = Dict{String, starpu_perfmodel}() + +function get_default_perfmodel(name) + if name in keys(default_perfmodel) + return default_perfmodel[name] + end + + perfmodel = starpu_perfmodel( + perf_type = starpu_perfmodel_type(STARPU_HISTORY_BASED), + symbol = name + ) + default_perfmodel[name] = perfmodel + return perfmodel +end + +function get_default_codelet(codelet_name, perfmodel, modes) :: jl_starpu_codelet + if codelet_name in keys(default_codelet) + return default_codelet[codelet_name] + end + + cl = starpu_codelet( + cpu_func = codelet_name in keys(CPU_CODELETS) ? codelet_name : "", + cuda_func = codelet_name in keys(CUDA_CODELETS) ? codelet_name : "", + modes = modes, + perfmodel = perfmodel, + ) + default_codelet[codelet_name] = cl + return cl +end + +function starpu_task_insert(; + codelet_name :: Union{Cvoid, String} = nothing, + cl :: Union{Cvoid, jl_starpu_codelet} = nothing, + perfmodel :: Union{starpu_perfmodel, Cvoid} = nothing, + handles :: Vector{StarpuDataHandle} = StarpuDataHandle[], + cl_arg = (), + callback :: Union{Cvoid, Function} = nothing, + callback_arg = nothing, + tag :: Union{Cvoid, starpu_tag_t} = nothing, + tag_only :: Union{Cvoid, starpu_tag_t} = nothing, + sequential_consistency = true, + detach = 1, + where :: Union{Cvoid, Int32} = nothing, + color :: Union{Cvoid, UInt32} = nothing, + modes = nothing) + if cl == nothing && codelet_name == nothing + error("At least one of the two parameters codelet_name or cl must be provided when calling starpu_task_insert.") + + end + if cl == nothing && modes == nothing + error("Modes must be defined when calling starpu_task_insert without a codelet.") + end + + if perfmodel == nothing + perfmodel = get_default_perfmodel(codelet_name == nothing ? "default" : codelet_name) + end + + if cl == nothing + cl = get_default_codelet(codelet_name, perfmodel, modes) + end + + task = starpu_task(cl = cl, handles = handles, cl_arg = cl_arg, callback = callback, + callback_arg = callback_arg, tag = tag, tag_only = tag_only, + sequential_consistency = sequential_consistency, + detach = detach, color = color, where = where) + + starpu_task_submit(task) +end + +""" + Creates and submits an asynchronous task running cl Codelet function. + Ex : @starpu_async_cl cl(handle1, handle2) +""" +macro starpu_async_cl(expr, modes, cl_arg=(), color ::UInt32=0x00000000) + + if (!isa(expr, Expr) || expr.head != :call) + error("Invalid task submit syntax") + end + if (!isa(expr, Expr)||modes.head != :vect) + error("Invalid task submit syntax") + end + perfmodel = starpu_perfmodel( + perf_type = starpu_perfmodel_type(STARPU_HISTORY_BASED), + symbol = "history_perf" + ) + println(CPU_CODELETS[string(expr.args[1])]) + cl = starpu_codelet( + cpu_func = string(expr.args[1]), + cuda_func = string(expr.args[1]), + #opencl_func="ocl_matrix_mult", + ### TODO: CORRECT ! + modes = map((x -> starpu_modes(x)),modes.args), + perfmodel = perfmodel, + color = color + ) + handles = Expr(:vect, expr.args[2:end]...) + #dump(handles) + quote + task = starpu_task(cl = $(esc(cl)), handles = $(esc(handles)), cl_arg=$(esc(cl_arg))) + starpu_task_submit(task) + end +end + +function starpu_task_wait(task :: jl_starpu_task) + @threadcall(@starpufunc(:starpu_task_wait), + Cint, (Ptr{Cvoid},), Ref(task.c_task)) + + # starpu_task_wait(Ref(task.c_task)) +end + + +""" + Blocks until every submitted task has finished. +""" +function starpu_task_wait_for_all() + @threadcall(@starpufunc(:starpu_task_wait_for_all), + Cint, ()) + + lock(mutex) + empty!(task_list) + unlock(mutex) +end + +""" + Blocks until every submitted task has finished. + Ex : @starpu_sync_tasks begin + [...] + starpu_task_submit(task) + [...] + end + + TODO : Make the macro only wait for tasks declared inside the following expression. + (similar mechanism as @starpu_block) +""" +macro starpu_sync_tasks(expr) + quote + $(esc(expr)) + starpu_task_wait_for_all() + end +end + +function starpu_task_destroy(task :: jl_starpu_task) + starpu_task_destroy(Ref(task.c_task)) +end diff --git a/julia/src/task_dep.jl b/julia/src/task_dep.jl new file mode 100644 index 0000000..64d0df9 --- /dev/null +++ b/julia/src/task_dep.jl @@ -0,0 +1,48 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +function starpu_tag_declare_deps(id :: starpu_tag_t, dep :: starpu_tag_t, other_deps :: starpu_tag_t...) + + v = [dep, other_deps...] + starpu_tag_declare_deps_array(id, length(v), pointer(v)) +end + +""" + starpu_task_declare_deps(task :: StarpuTask, dep :: StarpuTask [, other_deps :: StarpuTask...]) + + Declare task dependencies between a task and the following provided ones. This function must be called + prior to the submission of the task, but it may called after the submission or the execution of the tasks in the array, + provided the tasks are still valid (i.e. they were not automatically destroyed). Calling this function on a task that was + already submitted or with an entry of task_array that is no longer a valid task results in an undefined behaviour. +""" +function starpu_task_declare_deps(task :: jl_starpu_task, dep :: jl_starpu_task, other_deps :: jl_starpu_task...) + + task_array = [pointer_from_objref(dep.c_task), map((t -> pointer_from_objref(t.c_task)), other_deps)...] + starpu_task_declare_deps_array(pointer_from_objref(task.c_task), length(task_array), task_array) +end + +function starpu_task_end_dep_add(task :: jl_starpu_task, nb_deps :: Int) + starpu_task_end_dep_add(Ref(task.c_task), nb_deps) +end + +function starpu_task_end_dep_release(task :: jl_starpu_task) + starpu_task_end_dep_release(Ref(task.c_task)) +end + +function starpu_task_declare_end_deps(task :: jl_starpu_task, dep :: jl_starpu_task, other_deps :: jl_starpu_task...) + + task_array = [pointer_from_objref(dep.c_task), map((t -> pointer_from_objref(t.c_task)), other_deps)...] + starpu_task_declare_end_deps_array(pointer_from_objref(task.c_task), length(task_array), pointer(task_array)) +end diff --git a/julia/src/translate_headers.jl b/julia/src/translate_headers.jl new file mode 100644 index 0000000..77130a0 --- /dev/null +++ b/julia/src/translate_headers.jl @@ -0,0 +1,113 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +using Clang +using Clang.LibClang.LLVM_jll + +function starpu_translate_headers() + debug_print("Translating StarPU headers...") + + if !isdir(joinpath(fstarpu_build_dir(), "julia/gen")) + mkdir(joinpath(fstarpu_build_dir(), "julia/gen")) + end + + STARPU_BUILD_INCLUDE=joinpath(fstarpu_build_dir(), "include") + STARPU_SRC_INCLUDE=joinpath(fstarpu_src_dir(), "include") + STARPU_HEADERS = [joinpath(STARPU_BUILD_INCLUDE, header) for header in readdir(STARPU_BUILD_INCLUDE) if endswith(header, ".h")] + if STARPU_SRC_INCLUDE != STARPU_BUILD_INCLUDE + for header in readdir(STARPU_SRC_INCLUDE) + if endswith(header, ".h") + push!(STARPU_HEADERS, joinpath(STARPU_SRC_INCLUDE, header)) + end + end + end + + LIBCLANG_INCLUDE = joinpath(dirname(LLVM_jll.libclang_path), "..", "include", "clang-c") |> normpath + + clang_args = ["-I", STARPU_BUILD_INCLUDE, "-I", STARPU_SRC_INCLUDE] + + for header in find_std_headers() + push!(clang_args, "-I") + push!(clang_args, header) + end + + only_select_symbols = Set(["starpu_task", + "starpu_cublas_init", + "starpu_codelet", + "starpu_data_filter", + "starpu_tag_t", + "starpu_perfmodel", + "starpu_perfmodel_type", + "starpu_data_handle_t", + "starpu_init", + "starpu_data_acquire_on_node", + "starpu_data_release_on_node", + "starpu_data_unregister", + "starpu_data_partition", + "starpu_data_unpartition", + "starpu_data_get_sub_data", + "starpu_data_map_filters", + "starpu_data_get_default_sequential_consistency_flag", + "starpu_data_set_default_sequential_consistency_flag", + "starpu_data_get_sequential_consistency_flag", + "starpu_data_set_sequential_consistency_flag", + "starpu_data_wont_use", + "starpu_matrix_data_register", + "starpu_block_data_register", + "starpu_vector_data_register", + "starpu_variable_data_register", + "starpu_memory_pin", + "starpu_memory_unpin", + "starpu_task_end_dep_add", + "starpu_task_end_dep_release", + "starpu_task_init", + "starpu_task_destroy", + "starpu_task_submit", + "starpu_task_wait", + "starpu_task_wait_for_n_submitted", + "starpu_tag_remove", + "starpu_tag_wait", + "starpu_tag_declare_deps_array", + "starpu_tag_notify_from_apps", + "starpu_task_declare_end_deps_array", + "starpu_task_declare_deps_array", + "starpu_iteration_push", + "starpu_iteration_pop", + "starpu_worker_get_count", + "starpu_cpu_worker_get_count", + "starpu_cuda_worker_get_count", + "starpu_opencl_worker_get_count", + "STARPU_CPU", + "STARPU_CUDA", + "STARPU_CUDA_ASYNC", + "STARPU_OPENCL", + "STARPU_MAIN_RAM", + "STARPU_NMAXBUFS", + "STARPU_USE_CUDA"]) + + wc = init(; headers = STARPU_HEADERS, + output_file = joinpath(fstarpu_build_dir(), "julia/gen/libstarpu_api.jl"), + common_file = joinpath(fstarpu_build_dir(), "julia/gen/libstarpu_common.jl"), + clang_includes = vcat(LIBCLANG_INCLUDE, CLANG_INCLUDE), + clang_args = clang_args, + header_library = x->"starpu_wrapper_library_name", + clang_diagnostics = false, + rewriter = x->x, + only_select_symbols = only_select_symbols, + fields_align = Dict((:starpu_pthread_spinlock_t,:taken) => 16) + ) + + run(wc) +end diff --git a/julia/src/utils.jl b/julia/src/utils.jl new file mode 100644 index 0000000..2aa0d69 --- /dev/null +++ b/julia/src/utils.jl @@ -0,0 +1,115 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +function fstarpu_task_library_name() + x=get(ENV, "STARPU_JULIA_LIB", C_NULL) + if (x == C_NULL) + error("Environment variable STARPU_JULIA_LIB must be defined") + end + return x +end + +function fstarpu_build_dir() + x=get(ENV, "STARPU_BUILD_DIR", C_NULL) + if (x == C_NULL) + error("Environment variable STARPU_BUILD_DIR must be defined") + end + return x +end + +function fstarpu_src_dir() + x=get(ENV, "STARPU_SRC_DIR", C_NULL) + if (x == C_NULL) + error("Environment variable STARPU_SRC_DIR must be defined") + end + return x +end + +macro starpufunc(symbol) + :($symbol, starpu_wrapper_library_name) +end + +""" + Used to call a StarPU function compiled inside "libjlstarpu_c_wrapper.so" + Works as ccall function +""" +macro starpucall(func, ret_type, arg_types, args...) + return Expr(:call, :ccall, (func, starpu_wrapper_library_name), esc(ret_type), esc(arg_types), map(esc, args)...) +end + +function debug_print(x...) + println("\x1b[32m", x..., "\x1b[0m") + flush(stdout) +end + +function Cstring_from_String(str :: String) + return Cstring(pointer(str)) +end + +tuple_len(::NTuple{N, Any}) where {N} = N + +function starpu_find_function(name :: String, device :: String ) + s=ccall(:starpu_find_function,Cstring, (Cstring,Cstring),Cstring_from_String(name),Cstring_from_String(device)) + if s == C_NULL + print("NULL STRING\n") + error("dead") + end + return s +end + +function load_starpu_function_pointer(func_name :: String) + + if (isempty(func_name)) + return C_NULL + end + #func_pointer = ccall(:dlsym,"libdl",Ptr{Cvoid}); + func_pointer=Libdl.dlsym(starpu_tasks_library_handle, func_name) + + if (func_pointer == C_NULL) + error("Couldn't find function symbol $func_name into extern library file $starpu_tasks_library") + end + + return func_pointer +end + +function load_wrapper_function_pointer(func_name :: String) + if (isempty(func_name)) + return C_NULL + end + + func_pointer=Libdl.dlsym(starpu_wrapper_library_handle, func_name) + + if (func_pointer == C_NULL) + error("Couldn't find function symbol $func_name into extern library file $starpu_tasks_library") + end + + return func_pointer +end + + +""" + Declares a Julia function which is just calling the StarPU function + having the same name. +""" +macro starpu_noparam_function(func_name, ret_type) + + func = Symbol(func_name) + + quote + export $func + global $func() = ccall(($func_name, starpu_wrapper_library_name), + $ret_type, ()) :: $ret_type + end +end diff --git a/m4/acinclude.m4 b/m4/acinclude.m4 new file mode 100644 index 0000000..3516213 --- /dev/null +++ b/m4/acinclude.m4 @@ -0,0 +1,270 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +# Check whether the target supports __sync_val_compare_and_swap. +AC_DEFUN([STARPU_CHECK_SYNC_VAL_COMPARE_AND_SWAP], [ + AC_CACHE_CHECK([whether the target supports __sync_val_compare_and_swap], + ac_cv_have_sync_val_compare_and_swap, [ + AC_LINK_IFELSE([AC_LANG_PROGRAM([int foo, bar;], + [bar = __sync_val_compare_and_swap(&foo, 0, 1);])], + [ac_cv_have_sync_val_compare_and_swap=yes], + [ac_cv_have_sync_val_compare_and_swap=no])]) + if test $ac_cv_have_sync_val_compare_and_swap = yes; then + AC_DEFINE(STARPU_HAVE_SYNC_VAL_COMPARE_AND_SWAP, 1, + [Define to 1 if the target supports __sync_val_compare_and_swap]) + fi]) + +# Check whether the target supports 64bit __sync_val_compare_and_swap. +AC_DEFUN([STARPU_CHECK_SYNC_VAL_COMPARE_AND_SWAP_8], [ + AC_CACHE_CHECK([whether the target supports __sync_val_compare_and_swap_8], + ac_cv_have_sync_val_compare_and_swap_8, [ + AC_LINK_IFELSE([AC_LANG_PROGRAM([#include + int64_t foo, bar;], + [bar = __sync_val_compare_and_swap(&foo, 0, 1);])], + [ac_cv_have_sync_val_compare_and_swap_8=yes], + [ac_cv_have_sync_val_compare_and_swap_8=no])]) + if test $ac_cv_have_sync_val_compare_and_swap_8 = yes; then + AC_DEFINE(STARPU_HAVE_SYNC_VAL_COMPARE_AND_SWAP_8, 1, + [Define to 1 if the target supports __sync_val_compare_and_swap_8]) + fi]) + +# Check whether the target supports __sync_bool_compare_and_swap. +AC_DEFUN([STARPU_CHECK_SYNC_BOOL_COMPARE_AND_SWAP], [ + AC_CACHE_CHECK([whether the target supports __sync_bool_compare_and_swap], + ac_cv_have_sync_bool_compare_and_swap, [ + AC_LINK_IFELSE([AC_LANG_PROGRAM([int foo, bar;], + [bar = __sync_bool_compare_and_swap(&foo, 0, 1);])], + [ac_cv_have_sync_bool_compare_and_swap=yes], + [ac_cv_have_sync_bool_compare_and_swap=no])]) + if test $ac_cv_have_sync_bool_compare_and_swap = yes; then + AC_DEFINE(STARPU_HAVE_SYNC_BOOL_COMPARE_AND_SWAP, 1, + [Define to 1 if the target supports __sync_bool_compare_and_swap]) + fi]) + +# Check whether the target supports __sync_bool_compare_and_swap_8. +AC_DEFUN([STARPU_CHECK_SYNC_BOOL_COMPARE_AND_SWAP_8], [ + AC_CACHE_CHECK([whether the target supports __sync_bool_compare_and_swap_8], + ac_cv_have_sync_bool_compare_and_swap_8, [ + AC_LINK_IFELSE([AC_LANG_PROGRAM([#include + int64_t foo, bar;], + [bar = __sync_bool_compare_and_swap(&foo, 0, 1);])], + [ac_cv_have_sync_bool_compare_and_swap_8=yes], + [ac_cv_have_sync_bool_compare_and_swap_8=no])]) + if test $ac_cv_have_sync_bool_compare_and_swap_8 = yes; then + AC_DEFINE(STARPU_HAVE_SYNC_BOOL_COMPARE_AND_SWAP_8, 1, + [Define to 1 if the target supports __sync_bool_compare_and_swap_8]) + fi]) + +# Check whether the target supports __sync_fetch_and_add. +AC_DEFUN([STARPU_CHECK_SYNC_FETCH_AND_ADD], [ + AC_CACHE_CHECK([whether the target supports __sync_fetch_and_add], + ac_cv_have_sync_fetch_and_add, [ + AC_LINK_IFELSE([AC_LANG_PROGRAM([int foo, bar;], + [bar = __sync_fetch_and_add(&foo, 1);])], + [ac_cv_have_sync_fetch_and_add=yes], + [ac_cv_have_sync_fetch_and_add=no])]) + if test $ac_cv_have_sync_fetch_and_add = yes; then + AC_DEFINE(STARPU_HAVE_SYNC_FETCH_AND_ADD, 1, + [Define to 1 if the target supports __sync_fetch_and_add]) + fi]) + +# Check whether the target supports __sync_fetch_and_add_8. +AC_DEFUN([STARPU_CHECK_SYNC_FETCH_AND_ADD_8], [ + AC_CACHE_CHECK([whether the target supports __sync_fetch_and_add_8], + ac_cv_have_sync_fetch_and_add_8, [ + AC_LINK_IFELSE([AC_LANG_PROGRAM([#include + int64_t foo, bar;], + [bar = __sync_fetch_and_add(&foo, 1);])], + [ac_cv_have_sync_fetch_and_add_8=yes], + [ac_cv_have_sync_fetch_and_add_8=no])]) + if test $ac_cv_have_sync_fetch_and_add_8 = yes; then + AC_DEFINE(STARPU_HAVE_SYNC_FETCH_AND_ADD_8, 1, + [Define to 1 if the target supports __sync_fetch_and_add_8]) + fi]) + +# Check whether the target supports __sync_fetch_and_or. +AC_DEFUN([STARPU_CHECK_SYNC_FETCH_AND_OR], [ + AC_CACHE_CHECK([whether the target supports __sync_fetch_and_or], + ac_cv_have_sync_fetch_and_or, [ + AC_LINK_IFELSE([AC_LANG_PROGRAM([int foo, bar;], + [bar = __sync_fetch_and_or(&foo, 1);])], + [ac_cv_have_sync_fetch_and_or=yes], + [ac_cv_have_sync_fetch_and_or=no])]) + if test $ac_cv_have_sync_fetch_and_or = yes; then + AC_DEFINE(STARPU_HAVE_SYNC_FETCH_AND_OR, 1, + [Define to 1 if the target supports __sync_fetch_and_or]) + fi]) + +# Check whether the target supports __sync_fetch_and_or_8. +AC_DEFUN([STARPU_CHECK_SYNC_FETCH_AND_OR_8], [ + AC_CACHE_CHECK([whether the target supports __sync_fetch_and_or_8], + ac_cv_have_sync_fetch_and_or_8, [ + AC_LINK_IFELSE([AC_LANG_PROGRAM([#include + int64_t foo, bar;], + [bar = __sync_fetch_and_or(&foo, 1);])], + [ac_cv_have_sync_fetch_and_or_8=yes], + [ac_cv_have_sync_fetch_and_or_8=no])]) + if test $ac_cv_have_sync_fetch_and_or_8 = yes; then + AC_DEFINE(STARPU_HAVE_SYNC_FETCH_AND_OR_8, 1, + [Define to 1 if the target supports __sync_fetch_and_or_8]) + fi]) + +# Check whether the target supports __sync_lock_test_and_set. +AC_DEFUN([STARPU_CHECK_SYNC_LOCK_TEST_AND_SET], [ + AC_CACHE_CHECK([whether the target supports __sync_lock_test_and_set], + ac_cv_have_sync_lock_test_and_set, [ + AC_LINK_IFELSE([AC_LANG_PROGRAM([int foo, bar;], + [bar = __sync_lock_test_and_set(&foo, 1);])], + [ac_cv_have_sync_lock_test_and_set=yes], + [ac_cv_have_sync_lock_test_and_set=no])]) + if test $ac_cv_have_sync_lock_test_and_set = yes; then + AC_DEFINE(STARPU_HAVE_SYNC_LOCK_TEST_AND_SET, 1, + [Define to 1 if the target supports __sync_lock_test_and_set]) + fi]) + +# Check whether the target supports __atomic_compare_exchange_n. +AC_DEFUN([STARPU_CHECK_ATOMIC_COMPARE_EXCHANGE_N], [ + AC_CACHE_CHECK([whether the target supports __atomic_compare_exchange_n], + ac_cv_have_atomic_compare_exchange_n, [ + AC_LINK_IFELSE([AC_LANG_PROGRAM([int foo, bar, baz;], + [baz = __atomic_compare_exchange_n(&foo, &bar, 1, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);])], + [ac_cv_have_atomic_compare_exchange_n=yes], + [ac_cv_have_atomic_compare_exchange_n=no])]) + if test $ac_cv_have_atomic_compare_exchange_n = yes; then + AC_DEFINE(STARPU_HAVE_ATOMIC_COMPARE_EXCHANGE_N, 1, + [Define to 1 if the target supports __atomic_compare_exchange_n]) + fi]) + +# Check whether the target supports __atomic_compare_exchange_n_8. +AC_DEFUN([STARPU_CHECK_ATOMIC_COMPARE_EXCHANGE_N_8], [ + AC_CACHE_CHECK([whether the target supports __atomic_compare_exchange_n_8], + ac_cv_have_atomic_compare_exchange_n_8, [ + AC_LINK_IFELSE([AC_LANG_PROGRAM([#include + int64_t foo, bar, baz;], + [baz = __atomic_compare_exchange_n(&foo, &bar, 1, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);])], + [ac_cv_have_atomic_compare_exchange_n_8=yes], + [ac_cv_have_atomic_compare_exchange_n_8=no])]) + if test $ac_cv_have_atomic_compare_exchange_n_8 = yes; then + AC_DEFINE(STARPU_HAVE_ATOMIC_COMPARE_EXCHANGE_N_8, 1, + [Define to 1 if the target supports __atomic_compare_exchange_n_8]) + fi]) + +# Check whether the target supports __atomic_exchange_n. +AC_DEFUN([STARPU_CHECK_ATOMIC_EXCHANGE_N], [ + AC_CACHE_CHECK([whether the target supports __atomic_exchange_n], + ac_cv_have_atomic_exchange_n, [ + AC_LINK_IFELSE([AC_LANG_PROGRAM([int foo, bar;], + [bar = __atomic_exchange_n(&foo, 1, __ATOMIC_SEQ_CST);])], + [ac_cv_have_atomic_exchange_n=yes], + [ac_cv_have_atomic_exchange_n=no])]) + if test $ac_cv_have_atomic_exchange_n = yes; then + AC_DEFINE(STARPU_HAVE_ATOMIC_EXCHANGE_N, 1, + [Define to 1 if the target supports __atomic_exchange_n]) + fi]) + +# Check whether the target supports __atomic_exchange_n_8. +AC_DEFUN([STARPU_CHECK_ATOMIC_EXCHANGE_N_8], [ + AC_CACHE_CHECK([whether the target supports __atomic_exchange_n_8], + ac_cv_have_atomic_exchange_n_8, [ + AC_LINK_IFELSE([AC_LANG_PROGRAM([#include + int64_t foo, bar;], + [bar = __atomic_exchange_n(&foo, 1, __ATOMIC_SEQ_CST);])], + [ac_cv_have_atomic_exchange_n_8=yes], + [ac_cv_have_atomic_exchange_n_8=no])]) + if test $ac_cv_have_atomic_exchange_n_8 = yes; then + AC_DEFINE(STARPU_HAVE_ATOMIC_EXCHANGE_N_8, 1, + [Define to 1 if the target supports __atomic_exchange_n_8]) + fi]) + +# Check whether the target supports __atomic_fetch_add. +AC_DEFUN([STARPU_CHECK_ATOMIC_FETCH_ADD], [ + AC_CACHE_CHECK([whether the target supports __atomic_fetch_add], + ac_cv_have_atomic_fetch_add, [ + AC_LINK_IFELSE([AC_LANG_PROGRAM([int foo, bar;], + [bar = __atomic_fetch_add(&foo, 1, __ATOMIC_SEQ_CST);])], + [ac_cv_have_atomic_fetch_add=yes], + [ac_cv_have_atomic_fetch_add=no])]) + if test $ac_cv_have_atomic_fetch_add = yes; then + AC_DEFINE(STARPU_HAVE_ATOMIC_FETCH_ADD, 1, + [Define to 1 if the target supports __atomic_fetch_add]) + fi]) + +# Check whether the target supports __atomic_fetch_add_8. +AC_DEFUN([STARPU_CHECK_ATOMIC_FETCH_ADD_8], [ + AC_CACHE_CHECK([whether the target supports __atomic_fetch_add_8], + ac_cv_have_atomic_fetch_add_8, [ + AC_LINK_IFELSE([AC_LANG_PROGRAM([#include + int64_t foo, bar;], + [bar = __atomic_fetch_add(&foo, 1, __ATOMIC_SEQ_CST);])], + [ac_cv_have_atomic_fetch_add_8=yes], + [ac_cv_have_atomic_fetch_add_8=no])]) + if test $ac_cv_have_atomic_fetch_add_8 = yes; then + AC_DEFINE(STARPU_HAVE_ATOMIC_FETCH_ADD_8, 1, + [Define to 1 if the target supports __atomic_fetch_add_8]) + fi]) + +# Check whether the target supports __atomic_fetch_or. +AC_DEFUN([STARPU_CHECK_ATOMIC_FETCH_OR], [ + AC_CACHE_CHECK([whether the target supports __atomic_fetch_or], + ac_cv_have_atomic_fetch_or, [ + AC_LINK_IFELSE([AC_LANG_PROGRAM([int foo, bar;], + [bar = __atomic_fetch_or(&foo, 1, __ATOMIC_SEQ_CST);])], + [ac_cv_have_atomic_fetch_or=yes], + [ac_cv_have_atomic_fetch_or=no])]) + if test $ac_cv_have_atomic_fetch_or = yes; then + AC_DEFINE(STARPU_HAVE_ATOMIC_FETCH_OR, 1, + [Define to 1 if the target supports __atomic_fetch_or]) + fi]) + +# Check whether the target supports __atomic_fetch_or_8. +AC_DEFUN([STARPU_CHECK_ATOMIC_FETCH_OR_8], [ + AC_CACHE_CHECK([whether the target supports __atomic_fetch_or_8], + ac_cv_have_atomic_fetch_or_8, [ + AC_LINK_IFELSE([AC_LANG_PROGRAM([#include + int64_t foo, bar;], + [bar = __atomic_fetch_or(&foo, 1, __ATOMIC_SEQ_CST);])], + [ac_cv_have_atomic_fetch_or_8=yes], + [ac_cv_have_atomic_fetch_or_8=no])]) + if test $ac_cv_have_atomic_fetch_or_8 = yes; then + AC_DEFINE(STARPU_HAVE_ATOMIC_FETCH_OR_8, 1, + [Define to 1 if the target supports __atomic_fetch_or_8]) + fi]) + +# Check whether the target supports __atomic_test_and_set. +AC_DEFUN([STARPU_CHECK_ATOMIC_TEST_AND_SET], [ + AC_CACHE_CHECK([whether the target supports __atomic_test_and_set], + ac_cv_have_atomic_test_and_set, [ + AC_LINK_IFELSE([AC_LANG_PROGRAM([int foo, bar;], + [bar = __atomic_test_and_set(&foo, __ATOMIC_SEQ_CST);])], + [ac_cv_have_atomic_test_and_set=yes], + [ac_cv_have_atomic_test_and_set=no])]) + if test $ac_cv_have_atomic_test_and_set = yes; then + AC_DEFINE(STARPU_HAVE_ATOMIC_TEST_AND_SET, 1, + [Define to 1 if the target supports __atomic_test_and_set]) + fi]) + +# Check whether the target supports __sync_synchronize. +AC_DEFUN([STARPU_CHECK_SYNC_SYNCHRONIZE], [ + AC_CACHE_CHECK([whether the target supports __sync_synchronize], + ac_cv_have_sync_synchronize, [ + AC_LINK_IFELSE([AC_LANG_PROGRAM(, + [__sync_synchronize();])], + [ac_cv_have_sync_synchronize=yes], + [ac_cv_have_sync_synchronize=no])]) + if test $ac_cv_have_sync_synchronize = yes; then + AC_DEFINE(STARPU_HAVE_SYNC_SYNCHRONIZE, 1, + [Define to 1 if the target supports __sync_synchronize]) + fi]) diff --git a/m4/ax_cxx_compile_stdcxx.m4 b/m4/ax_cxx_compile_stdcxx.m4 new file mode 100644 index 0000000..2c18e49 --- /dev/null +++ b/m4/ax_cxx_compile_stdcxx.m4 @@ -0,0 +1,562 @@ +# =========================================================================== +# http://www.gnu.org/software/autoconf-archive/ax_cxx_compile_stdcxx.html +# =========================================================================== +# +# SYNOPSIS +# +# AX_CXX_COMPILE_STDCXX(VERSION, [ext|noext], [mandatory|optional]) +# +# DESCRIPTION +# +# Check for baseline language coverage in the compiler for the specified +# version of the C++ standard. If necessary, add switches to CXX and +# CXXCPP to enable support. VERSION may be '11' (for the C++11 standard) +# or '14' (for the C++14 standard). +# +# The second argument, if specified, indicates whether you insist on an +# extended mode (e.g. -std=gnu++11) or a strict conformance mode (e.g. +# -std=c++11). If neither is specified, you get whatever works, with +# preference for an extended mode. +# +# The third argument, if specified 'mandatory' or if left unspecified, +# indicates that baseline support for the specified C++ standard is +# required and that the macro should error out if no mode with that +# support is found. If specified 'optional', then configuration proceeds +# regardless, after defining HAVE_CXX${VERSION} if and only if a +# supporting mode is found. +# +# LICENSE +# +# Copyright (c) 2008 Benjamin Kosnik +# Copyright (c) 2012 Zack Weinberg +# Copyright (c) 2013 Roy Stogner +# Copyright (c) 2014, 2015 Google Inc.; contributed by Alexey Sokolov +# Copyright (c) 2015 Paul Norman +# Copyright (c) 2015 Moritz Klammler +# +# Copying and distribution of this file, with or without modification, are +# permitted in any medium without royalty provided the copyright notice +# and this notice are preserved. This file is offered as-is, without any +# warranty. + +#serial 4 + +dnl This macro is based on the code from the AX_CXX_COMPILE_STDCXX_11 macro +dnl (serial version number 13). + +AC_DEFUN([AX_CXX_COMPILE_STDCXX], [dnl + m4_if([$1], [11], [], + [$1], [14], [], + [$1], [17], [m4_fatal([support for C++17 not yet implemented in AX_CXX_COMPILE_STDCXX])], + [m4_fatal([invalid first argument `$1' to AX_CXX_COMPILE_STDCXX])])dnl + m4_if([$2], [], [], + [$2], [ext], [], + [$2], [noext], [], + [m4_fatal([invalid second argument `$2' to AX_CXX_COMPILE_STDCXX])])dnl + m4_if([$3], [], [ax_cxx_compile_cxx$1_required=true], + [$3], [mandatory], [ax_cxx_compile_cxx$1_required=true], + [$3], [optional], [ax_cxx_compile_cxx$1_required=false], + [m4_fatal([invalid third argument `$3' to AX_CXX_COMPILE_STDCXX])]) + AC_LANG_PUSH([C++])dnl + ac_success=no + AC_CACHE_CHECK(whether $CXX supports C++$1 features by default, + ax_cv_cxx_compile_cxx$1, + [AC_COMPILE_IFELSE([AC_LANG_SOURCE([_AX_CXX_COMPILE_STDCXX_testbody_$1])], + [ax_cv_cxx_compile_cxx$1=yes], + [ax_cv_cxx_compile_cxx$1=no])]) + if test x$ax_cv_cxx_compile_cxx$1 = xyes; then + ac_success=yes + fi + + m4_if([$2], [noext], [], [dnl + if test x$ac_success = xno; then + for switch in -std=gnu++$1 -std=gnu++0x; do + cachevar=AS_TR_SH([ax_cv_cxx_compile_cxx$1_$switch]) + AC_CACHE_CHECK(whether $CXX supports C++$1 features with $switch, + $cachevar, + [ac_save_CXX="$CXX" + CXX="$CXX $switch" + AC_COMPILE_IFELSE([AC_LANG_SOURCE([_AX_CXX_COMPILE_STDCXX_testbody_$1])], + [eval $cachevar=yes], + [eval $cachevar=no]) + CXX="$ac_save_CXX"]) + if eval test x\$$cachevar = xyes; then + CXX="$CXX $switch" + if test -n "$CXXCPP" ; then + CXXCPP="$CXXCPP $switch" + fi + ac_success=yes + break + fi + done + fi]) + + m4_if([$2], [ext], [], [dnl + if test x$ac_success = xno; then + dnl HP's aCC needs +std=c++11 according to: + dnl http://h21007.www2.hp.com/portal/download/files/unprot/aCxx/PDF_Release_Notes/769149-001.pdf + dnl Cray's crayCC needs "-h std=c++11" + for switch in -std=c++$1 -std=c++0x +std=c++$1 "-h std=c++$1"; do + cachevar=AS_TR_SH([ax_cv_cxx_compile_cxx$1_$switch]) + AC_CACHE_CHECK(whether $CXX supports C++$1 features with $switch, + $cachevar, + [ac_save_CXX="$CXX" + CXX="$CXX $switch" + AC_COMPILE_IFELSE([AC_LANG_SOURCE([_AX_CXX_COMPILE_STDCXX_testbody_$1])], + [eval $cachevar=yes], + [eval $cachevar=no]) + CXX="$ac_save_CXX"]) + if eval test x\$$cachevar = xyes; then + CXX="$CXX $switch" + if test -n "$CXXCPP" ; then + CXXCPP="$CXXCPP $switch" + fi + ac_success=yes + break + fi + done + fi]) + AC_LANG_POP([C++]) + if test x$ax_cxx_compile_cxx$1_required = xtrue; then + if test x$ac_success = xno; then + AC_MSG_ERROR([*** A compiler with support for C++$1 language features is required.]) + fi + fi + if test x$ac_success = xno; then + HAVE_CXX$1=0 + AC_MSG_NOTICE([No compiler with C++$1 support was found]) + else + HAVE_CXX$1=1 + AC_DEFINE(HAVE_CXX$1,1, + [define if the compiler supports basic C++$1 syntax]) + fi + AC_SUBST(HAVE_CXX$1) +]) + + +dnl Test body for checking C++11 support + +m4_define([_AX_CXX_COMPILE_STDCXX_testbody_11], + _AX_CXX_COMPILE_STDCXX_testbody_new_in_11 +) + + +dnl Test body for checking C++14 support + +m4_define([_AX_CXX_COMPILE_STDCXX_testbody_14], + _AX_CXX_COMPILE_STDCXX_testbody_new_in_11 + _AX_CXX_COMPILE_STDCXX_testbody_new_in_14 +) + + +dnl Tests for new features in C++11 + +m4_define([_AX_CXX_COMPILE_STDCXX_testbody_new_in_11], [[ + +// If the compiler admits that it is not ready for C++11, why torture it? +// Hopefully, this will speed up the test. + +#ifndef __cplusplus + +#error "This is not a C++ compiler" + +#elif __cplusplus < 201103L + +#error "This is not a C++11 compiler" + +#else + +namespace cxx11 +{ + + namespace test_static_assert + { + + template + struct check + { + static_assert(sizeof(int) <= sizeof(T), "not big enough"); + }; + + } + + namespace test_final_override + { + + struct Base + { + virtual void f() {} + }; + + struct Derived : public Base + { + virtual void f() override {} + }; + + } + + namespace test_double_right_angle_brackets + { + + template < typename T > + struct check {}; + + typedef check single_type; + typedef check> double_type; + typedef check>> triple_type; + typedef check>>> quadruple_type; + + } + + namespace test_decltype + { + + int + f() + { + int a = 1; + decltype(a) b = 2; + return a + b; + } + + } + + namespace test_type_deduction + { + + template < typename T1, typename T2 > + struct is_same + { + static const bool value = false; + }; + + template < typename T > + struct is_same + { + static const bool value = true; + }; + + template < typename T1, typename T2 > + auto + add(T1 a1, T2 a2) -> decltype(a1 + a2) + { + return a1 + a2; + } + + int + test(const int c, volatile int v) + { + static_assert(is_same::value == true, ""); + static_assert(is_same::value == false, ""); + static_assert(is_same::value == false, ""); + auto ac = c; + auto av = v; + auto sumi = ac + av + 'x'; + auto sumf = ac + av + 1.0; + static_assert(is_same::value == true, ""); + static_assert(is_same::value == true, ""); + static_assert(is_same::value == true, ""); + static_assert(is_same::value == false, ""); + static_assert(is_same::value == true, ""); + return (sumf > 0.0) ? sumi : add(c, v); + } + + } + + namespace test_noexcept + { + + int f() { return 0; } + int g() noexcept { return 0; } + + static_assert(noexcept(f()) == false, ""); + static_assert(noexcept(g()) == true, ""); + + } + + namespace test_constexpr + { + + template < typename CharT > + unsigned long constexpr + strlen_c_r(const CharT *const s, const unsigned long acc) noexcept + { + return *s ? strlen_c_r(s + 1, acc + 1) : acc; + } + + template < typename CharT > + unsigned long constexpr + strlen_c(const CharT *const s) noexcept + { + return strlen_c_r(s, 0UL); + } + + static_assert(strlen_c("") == 0UL, ""); + static_assert(strlen_c("1") == 1UL, ""); + static_assert(strlen_c("example") == 7UL, ""); + static_assert(strlen_c("another\0example") == 7UL, ""); + + } + + namespace test_rvalue_references + { + + template < int N > + struct answer + { + static constexpr int value = N; + }; + + answer<1> f(int&) { return answer<1>(); } + answer<2> f(const int&) { return answer<2>(); } + answer<3> f(int&&) { return answer<3>(); } + + void + test() + { + int i = 0; + const int c = 0; + static_assert(decltype(f(i))::value == 1, ""); + static_assert(decltype(f(c))::value == 2, ""); + static_assert(decltype(f(0))::value == 3, ""); + } + + } + + namespace test_uniform_initialization + { + + struct test + { + static const int zero {}; + static const int one {1}; + }; + + static_assert(test::zero == 0, ""); + static_assert(test::one == 1, ""); + + } + + namespace test_lambdas + { + + void + test1() + { + auto lambda1 = [](){}; + auto lambda2 = lambda1; + lambda1(); + lambda2(); + } + + int + test2() + { + auto a = [](int i, int j){ return i + j; }(1, 2); + auto b = []() -> int { return '0'; }(); + auto c = [=](){ return a + b; }(); + auto d = [&](){ return c; }(); + auto e = [a, &b](int x) mutable { + const auto identity = [](int y){ return y; }; + for (auto i = 0; i < a; ++i) + a += b--; + return x + identity(a + b); + }(0); + return a + b + c + d + e; + } + + int + test3() + { + const auto nullary = [](){ return 0; }; + const auto unary = [](int x){ return x; }; + using nullary_t = decltype(nullary); + using unary_t = decltype(unary); + const auto higher1st = [](nullary_t f){ return f(); }; + const auto higher2nd = [unary](nullary_t f1){ + return [unary, f1](unary_t f2){ return f2(unary(f1())); }; + }; + return higher1st(nullary) + higher2nd(nullary)(unary); + } + + } + + namespace test_variadic_templates + { + + template + struct sum; + + template + struct sum + { + static constexpr auto value = N0 + sum::value; + }; + + template <> + struct sum<> + { + static constexpr auto value = 0; + }; + + static_assert(sum<>::value == 0, ""); + static_assert(sum<1>::value == 1, ""); + static_assert(sum<23>::value == 23, ""); + static_assert(sum<1, 2>::value == 3, ""); + static_assert(sum<5, 5, 11>::value == 21, ""); + static_assert(sum<2, 3, 5, 7, 11, 13>::value == 41, ""); + + } + + // http://stackoverflow.com/questions/13728184/template-aliases-and-sfinae + // Clang 3.1 fails with headers of libstd++ 4.8.3 when using std::function + // because of this. + namespace test_template_alias_sfinae + { + + struct foo {}; + + template + using member = typename T::member_type; + + template + void func(...) {} + + template + void func(member*) {} + + void test(); + + void test() { func(0); } + + } + +} // namespace cxx11 + +#endif // __cplusplus >= 201103L + +]]) + + +dnl Tests for new features in C++14 + +m4_define([_AX_CXX_COMPILE_STDCXX_testbody_new_in_14], [[ + +// If the compiler admits that it is not ready for C++14, why torture it? +// Hopefully, this will speed up the test. + +#ifndef __cplusplus + +#error "This is not a C++ compiler" + +#elif __cplusplus < 201402L + +#error "This is not a C++14 compiler" + +#else + +namespace cxx14 +{ + + namespace test_polymorphic_lambdas + { + + int + test() + { + const auto lambda = [](auto&&... args){ + const auto istiny = [](auto x){ + return (sizeof(x) == 1UL) ? 1 : 0; + }; + const int aretiny[] = { istiny(args)... }; + return aretiny[0]; + }; + return lambda(1, 1L, 1.0f, '1'); + } + + } + + namespace test_binary_literals + { + + constexpr auto ivii = 0b0000000000101010; + static_assert(ivii == 42, "wrong value"); + + } + + namespace test_generalized_constexpr + { + + template < typename CharT > + constexpr unsigned long + strlen_c(const CharT *const s) noexcept + { + auto length = 0UL; + for (auto p = s; *p; ++p) + ++length; + return length; + } + + static_assert(strlen_c("") == 0UL, ""); + static_assert(strlen_c("x") == 1UL, ""); + static_assert(strlen_c("test") == 4UL, ""); + static_assert(strlen_c("another\0test") == 7UL, ""); + + } + + namespace test_lambda_init_capture + { + + int + test() + { + auto x = 0; + const auto lambda1 = [a = x](int b){ return a + b; }; + const auto lambda2 = [a = lambda1(x)](){ return a; }; + return lambda2(); + } + + } + + namespace test_digit_seperators + { + + constexpr auto ten_million = 100'000'000; + static_assert(ten_million == 100000000, ""); + + } + + namespace test_return_type_deduction + { + + auto f(int& x) { return x; } + decltype(auto) g(int& x) { return x; } + + template < typename T1, typename T2 > + struct is_same + { + static constexpr auto value = false; + }; + + template < typename T > + struct is_same + { + static constexpr auto value = true; + }; + + int + test() + { + auto x = 0; + static_assert(is_same::value, ""); + static_assert(is_same::value, ""); + return x; + } + + } + +} // namespace cxx14 + +#endif // __cplusplus >= 201402L + +]]) diff --git a/m4/ax_dlb_callback_arg.m4 b/m4/ax_dlb_callback_arg.m4 new file mode 100644 index 0000000..7fecfe8 --- /dev/null +++ b/m4/ax_dlb_callback_arg.m4 @@ -0,0 +1,37 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2018-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +# Check whether DLB callbacks expect an user argument +AC_DEFUN([AX_DLB_CALLBACK_ARG], +[AC_MSG_CHECKING([whether DLB callbacks expect an user argument]) + AC_CACHE_VAL(ac_cv_dlb_callback_arg,dnl + [AC_TRY_COMPILE(dnl +[#include +dlb_handler_t dlb_handle; +void _dlb_callback_disable_cpu(int cpuid, void *arg) { + (void)cpuid; + (void)arg; +} +void f(void) { +(void)DLB_CallbackSet_sp(dlb_handle, dlb_callback_disable_cpu, (dlb_callback_t)_dlb_callback_disable_cpu, 0); +} +],, ac_cv_dlb_callback_arg=yes, ac_cv_dlb_callback_arg=no) + ])dnl AC_CACHE_VAL + AC_MSG_RESULT([$ac_cv_dlb_callback_arg]) + if test $ac_cv_dlb_callback_arg = yes; then + AC_DEFINE(STARPURM_HAVE_DLB_CALLBACK_ARG,1,[Define to 1 if DLB callbacks expect an user argument]) + fi +]) diff --git a/m4/libs.m4 b/m4/libs.m4 new file mode 100644 index 0000000..bf79358 --- /dev/null +++ b/m4/libs.m4 @@ -0,0 +1,262 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +# STARPU_SEARCH_LIBS(NAME, FUNCTION, SEARCH-LIBS, +# [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND], +# [OTHER-LIBRARIES]) +# +# Like AC_SEARCH_LIBS, but puts -l flags into $1_LDFLAGS instead of LIBS, and +# AC_SUBSTs it +AC_DEFUN([STARPU_SEARCH_LIBS], [dnl + _LIBS_SAV="$LIBS" + LIBS="" + AC_SEARCH_LIBS([$2], [$3], [$4], [$5], [$6]) + STARPU_$1_LDFLAGS="$STARPU_$1_LDFLAGS $LIBS" + LIBS=$_LIBS_SAV + AC_SUBST(STARPU_$1_LDFLAGS) +])dnl + +# STARPU_CHECK_LIB(NAME, LIBRARY, FUNCTION, +# [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND], +# [OTHER-LIBRARIES]) +# +# Like AC_CHECK_LIB, but puts -l flags into $1_LDFLAGS instead of LIBS, and +# AC_SUBSTs it +AC_DEFUN([STARPU_CHECK_LIB], [dnl + _LIBS_SAV="$LIBS" + LIBS="" + AC_CHECK_LIB([$2], [$3], [$4], [$5], [$6]) + STARPU_$1_LDFLAGS="$STARPU_$1_LDFLAGS $LIBS" + LIBS=$_LIBS_SAV + AC_SUBST(STARPU_$1_LDFLAGS) +])dnl + +# STARPU_HAVE_LIBRARY(NAME, LIBRARY, +# [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND], +# [OTHER-LIBRARIES]) +# Like AC_HAVE_LIBRARY, but puts -l flags into $1_LDFLAGS instead of LIBS, and +# AC_SUBSTs it +AC_DEFUN([STARPU_HAVE_LIBRARY], [dnl +STARPU_CHECK_LIB([$1], [$2], main, [$3], [$4], [$5]) +])dnl + +# STARPU_INIT_ZERO(INCLUDES, TYPE, INIT_MACRO) +# Checks whether when TYPE is initialized with INIT_MACRO, the content is just +# plain zeroes +AC_DEFUN([STARPU_INIT_ZERO], [dnl +AC_MSG_CHECKING(whether $3 just zeroes) +AC_RUN_IFELSE([AC_LANG_PROGRAM( + $1, + [[$2 var = $3; + char *p; + for (p = (char*) &var; p < (char*) (&var+1); p++) + if (*p != 0) + return 1; + return 0; + ]], + )], + [AC_DEFINE([STARPU_$3_ZERO], [1], [Define to 1 if `$3' is just zeroes]) + AC_MSG_RESULT(yes)], + [AC_MSG_RESULT(no)]) +])dnl + +# IS_SUPPORTED_CFLAG(flag) +# ------------------------ +# Check if the CFLAGS `flag' is supported by the compiler +AC_DEFUN([IS_SUPPORTED_CFLAG], +[ + AC_REQUIRE([AC_PROG_CC]) + AC_MSG_CHECKING([whether C compiler supports $1]) + + SAVED_CFLAGS="$CFLAGS" + CFLAGS="$1" + + check_mpi="no" + AC_LINK_IFELSE( + AC_LANG_PROGRAM( + [[]], + [[AC_LANG_SOURCE([const char *hello = "Hello World";])]] + ), + [ + m4_default_nblank([$2], check_mpi="yes") + AC_MSG_RESULT(yes) + ], + [ + AC_MSG_RESULT(no) + ] + ) + + if test "$build_mpi_lib" = "no" -a "$build_nmad_lib" = "no" + then + if test "$check_mpi" = "yes" ; then + GLOBAL_AM_CFLAGS="$GLOBAL_AM_CFLAGS $1" + fi + elif test "$check_mpi" = "yes" ; then + SAVED_CC="$CC" + CC="$MPICC" + AC_MSG_CHECKING([whether MPI C compiler supports $1]) + AC_LINK_IFELSE( + AC_LANG_PROGRAM( + [[]], + [[AC_LANG_SOURCE([const char *hello = "Hello World";])]] + ), + [ + m4_default_nblank([$2], [GLOBAL_AM_CFLAGS="$GLOBAL_AM_CFLAGS $1"]) + AC_MSG_RESULT(yes) + ], + [ + AC_MSG_RESULT(no) + ] + ) + CC="$SAVED_CC" + fi + CFLAGS="$SAVED_CFLAGS" +]) + +# IS_SUPPORTED_CXXFLAG(flag) +# ------------------------ +# Check if the CXXFLAGS `flag' is supported by the compiler +AC_DEFUN([IS_SUPPORTED_CXXFLAG], +[ + AC_REQUIRE([AC_PROG_CXX]) + AC_LANG_PUSH([C++]) + AC_MSG_CHECKING([whether CXX compiler supports $1]) + + SAVED_CXXFLAGS="$CXXFLAGS" + CXXFLAGS="$1" + + AC_LINK_IFELSE( + AC_LANG_PROGRAM( + [[]], + [[AC_LANG_SOURCE([const char *hello = "Hello World";])]] + ), + [ + m4_default_nblank([$2], [GLOBAL_AM_CXXFLAGS="$GLOBAL_AM_CXXFLAGS $1"]) + AC_MSG_RESULT(yes) + ], + [ + AC_MSG_RESULT(no) + ] + ) + CXXFLAGS="$SAVED_CXXFLAGS" + AC_LANG_POP([C++]) +]) + +# IS_SUPPORTED_FFLAG(flag) +# ------------------------ +# Check if the FFLAGS `flag' is supported by the compiler +AC_DEFUN([IS_SUPPORTED_FFLAG], +[ + AC_LANG_PUSH([Fortran 77]) + AC_MSG_CHECKING([whether Fortran 77 compiler supports $1]) + + SAVED_FFLAGS="$FFLAGS" + FFLAGS="$1" + + AC_LINK_IFELSE( + AC_LANG_PROGRAM( + [], + [[AC_LANG_SOURCE([])]] + ), + [ + m4_default_nblank([$2], [GLOBAL_AM_FFLAGS="$GLOBAL_AM_FFLAGS $1"]) + AC_MSG_RESULT(yes) + ], + [ + AC_MSG_RESULT(no) + ] + ) + FFLAGS="$SAVED_FFLAGS" + AC_LANG_POP([Fortran 77]) +]) + +# IS_SUPPORTED_FCFLAG(flag) +# ------------------------ +# Check if the FCLAGS `flag' is supported by the compiler +AC_DEFUN([IS_SUPPORTED_FCFLAG], +[ + AC_LANG_PUSH([Fortran]) + AC_MSG_CHECKING([whether Fortran compiler supports $1]) + + SAVED_FCFLAGS="$FCFLAGS" + FCFLAGS="$1" + + check_mpi="no" + AC_LINK_IFELSE( + AC_LANG_PROGRAM( + [], + [[AC_LANG_SOURCE([])]] + ), + [ + m4_default_nblank([$2], check_mpi="yes") + AC_MSG_RESULT(yes) + ], + [ + AC_MSG_RESULT(no) + ] + ) + if test "$check_mpi" = "yes" ; then + SAVED_FC="$FC" + FC="$MPIFORT" + AC_MSG_CHECKING([whether MPI Fortran compiler supports $1]) + AC_LINK_IFELSE( + AC_LANG_PROGRAM( + [], + [[AC_LANG_SOURCE([])]] + ), + [ + m4_default_nblank([$2], [GLOBAL_AM_FCFLAGS="$GLOBAL_AM_FCFLAGS $1"]) + AC_MSG_RESULT(yes) + ], + [ + AC_MSG_RESULT(no) + ] + ) + FC="$SAVED_FC" + fi + FCFLAGS="$SAVED_FCFLAGS" + AC_LANG_POP([Fortran]) +]) + +# IS_SUPPORTED_FLAG(flag) +# ------------------------ +# Check with C, C++, F77 and F90 that the `flag' is supported by the compiler +AC_DEFUN([IS_SUPPORTED_FLAG], +[ + IS_SUPPORTED_CFLAG($1) + IS_SUPPORTED_CXXFLAG($1) + IS_SUPPORTED_FFLAG($1) + IS_SUPPORTED_FCFLAG($1) +]) + +AC_DEFUN([IS_SUPPORTED_FLAG_VAR], +[ + IS_SUPPORTED_CFLAG($1,[$2_CFLAGS="$$2_CFLAGS $1"]) + IS_SUPPORTED_CXXFLAG($1,[$2_CXXFLAGS="$$2_CXXFLAGS $1"]) + IS_SUPPORTED_FFLAG($1,[$2_FFLAGS="$$2_FFLAGS $1"]) + IS_SUPPORTED_FCFLAG($1,[$2_FCFLAGS="$$2_FCFLAGS $1"]) +]) + +# AC_PYTHON_MODULE(modulename, [action-if-found], [action-if-not-found]) +# Check if the given python module is available +AC_DEFUN([AC_PYTHON_MODULE], +[ + echo "import $1" | $PYTHON - 2>/dev/null + if test $? -ne 0 ; then + $3 + else + $2 + fi +]) diff --git a/m4/libtool.m4 b/m4/libtool.m4 new file mode 100644 index 0000000..e7b6833 --- /dev/null +++ b/m4/libtool.m4 @@ -0,0 +1,8427 @@ +# libtool.m4 - Configure libtool for the host system. -*-Autoconf-*- +# +# Copyright (C) 1996-2001, 2003-2019, 2021-2022 Free Software +# Foundation, Inc. +# Written by Gordon Matzigkeit, 1996 +# +# This file is free software; the Free Software Foundation gives +# unlimited permission to copy and/or distribute it, with or without +# modifications, as long as this notice is preserved. + +m4_define([_LT_COPYING], [dnl +# Copyright (C) 2014 Free Software Foundation, Inc. +# This is free software; see the source for copying conditions. There is NO +# warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + +# GNU Libtool is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of of the License, or +# (at your option) any later version. +# +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program or library that is built +# using GNU Libtool, you may include this file under the same +# distribution terms that you use for the rest of that program. +# +# GNU Libtool is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +]) + +# serial 59 LT_INIT + + +# LT_PREREQ(VERSION) +# ------------------ +# Complain and exit if this libtool version is less that VERSION. +m4_defun([LT_PREREQ], +[m4_if(m4_version_compare(m4_defn([LT_PACKAGE_VERSION]), [$1]), -1, + [m4_default([$3], + [m4_fatal([Libtool version $1 or higher is required], + 63)])], + [$2])]) + + +# _LT_CHECK_BUILDDIR +# ------------------ +# Complain if the absolute build directory name contains unusual characters +m4_defun([_LT_CHECK_BUILDDIR], +[case `pwd` in + *\ * | *\ *) + AC_MSG_WARN([Libtool does not cope well with whitespace in `pwd`]) ;; +esac +]) + + +# LT_INIT([OPTIONS]) +# ------------------ +AC_DEFUN([LT_INIT], +[AC_PREREQ([2.62])dnl We use AC_PATH_PROGS_FEATURE_CHECK +AC_REQUIRE([AC_CONFIG_AUX_DIR_DEFAULT])dnl +AC_BEFORE([$0], [LT_LANG])dnl +AC_BEFORE([$0], [LT_OUTPUT])dnl +AC_BEFORE([$0], [LTDL_INIT])dnl +m4_require([_LT_CHECK_BUILDDIR])dnl + +dnl Autoconf doesn't catch unexpanded LT_ macros by default: +m4_pattern_forbid([^_?LT_[A-Z_]+$])dnl +m4_pattern_allow([^(_LT_EOF|LT_DLGLOBAL|LT_DLLAZY_OR_NOW|LT_MULTI_MODULE)$])dnl +dnl aclocal doesn't pull ltoptions.m4, ltsugar.m4, or ltversion.m4 +dnl unless we require an AC_DEFUNed macro: +AC_REQUIRE([LTOPTIONS_VERSION])dnl +AC_REQUIRE([LTSUGAR_VERSION])dnl +AC_REQUIRE([LTVERSION_VERSION])dnl +AC_REQUIRE([LTOBSOLETE_VERSION])dnl +m4_require([_LT_PROG_LTMAIN])dnl + +_LT_SHELL_INIT([SHELL=${CONFIG_SHELL-/bin/sh}]) + +dnl Parse OPTIONS +_LT_SET_OPTIONS([$0], [$1]) + +# This can be used to rebuild libtool when needed +LIBTOOL_DEPS=$ltmain + +# Always use our own libtool. +LIBTOOL='$(SHELL) $(top_builddir)/libtool' +AC_SUBST(LIBTOOL)dnl + +_LT_SETUP + +# Only expand once: +m4_define([LT_INIT]) +])# LT_INIT + +# Old names: +AU_ALIAS([AC_PROG_LIBTOOL], [LT_INIT]) +AU_ALIAS([AM_PROG_LIBTOOL], [LT_INIT]) +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AC_PROG_LIBTOOL], []) +dnl AC_DEFUN([AM_PROG_LIBTOOL], []) + + +# _LT_PREPARE_CC_BASENAME +# ----------------------- +m4_defun([_LT_PREPARE_CC_BASENAME], [ +# Calculate cc_basename. Skip known compiler wrappers and cross-prefix. +func_cc_basename () +{ + for cc_temp in @S|@*""; do + case $cc_temp in + compile | *[[\\/]]compile | ccache | *[[\\/]]ccache ) ;; + distcc | *[[\\/]]distcc | purify | *[[\\/]]purify ) ;; + \-*) ;; + *) break;; + esac + done + func_cc_basename_result=`$ECHO "$cc_temp" | $SED "s%.*/%%; s%^$host_alias-%%"` +} +])# _LT_PREPARE_CC_BASENAME + + +# _LT_CC_BASENAME(CC) +# ------------------- +# It would be clearer to call AC_REQUIREs from _LT_PREPARE_CC_BASENAME, +# but that macro is also expanded into generated libtool script, which +# arranges for $SED and $ECHO to be set by different means. +m4_defun([_LT_CC_BASENAME], +[m4_require([_LT_PREPARE_CC_BASENAME])dnl +AC_REQUIRE([_LT_DECL_SED])dnl +AC_REQUIRE([_LT_PROG_ECHO_BACKSLASH])dnl +func_cc_basename $1 +cc_basename=$func_cc_basename_result +]) + + +# _LT_FILEUTILS_DEFAULTS +# ---------------------- +# It is okay to use these file commands and assume they have been set +# sensibly after 'm4_require([_LT_FILEUTILS_DEFAULTS])'. +m4_defun([_LT_FILEUTILS_DEFAULTS], +[: ${CP="cp -f"} +: ${MV="mv -f"} +: ${RM="rm -f"} +])# _LT_FILEUTILS_DEFAULTS + + +# _LT_SETUP +# --------- +m4_defun([_LT_SETUP], +[AC_REQUIRE([AC_CANONICAL_HOST])dnl +AC_REQUIRE([AC_CANONICAL_BUILD])dnl +AC_REQUIRE([_LT_PREPARE_SED_QUOTE_VARS])dnl +AC_REQUIRE([_LT_PROG_ECHO_BACKSLASH])dnl + +_LT_DECL([], [PATH_SEPARATOR], [1], [The PATH separator for the build system])dnl +dnl +_LT_DECL([], [host_alias], [0], [The host system])dnl +_LT_DECL([], [host], [0])dnl +_LT_DECL([], [host_os], [0])dnl +dnl +_LT_DECL([], [build_alias], [0], [The build system])dnl +_LT_DECL([], [build], [0])dnl +_LT_DECL([], [build_os], [0])dnl +dnl +AC_REQUIRE([AC_PROG_CC])dnl +AC_REQUIRE([LT_PATH_LD])dnl +AC_REQUIRE([LT_PATH_NM])dnl +dnl +AC_REQUIRE([AC_PROG_LN_S])dnl +test -z "$LN_S" && LN_S="ln -s" +_LT_DECL([], [LN_S], [1], [Whether we need soft or hard links])dnl +dnl +AC_REQUIRE([LT_CMD_MAX_LEN])dnl +_LT_DECL([objext], [ac_objext], [0], [Object file suffix (normally "o")])dnl +_LT_DECL([], [exeext], [0], [Executable file suffix (normally "")])dnl +dnl +m4_require([_LT_FILEUTILS_DEFAULTS])dnl +m4_require([_LT_CHECK_SHELL_FEATURES])dnl +m4_require([_LT_PATH_CONVERSION_FUNCTIONS])dnl +m4_require([_LT_CMD_RELOAD])dnl +m4_require([_LT_DECL_FILECMD])dnl +m4_require([_LT_CHECK_MAGIC_METHOD])dnl +m4_require([_LT_CHECK_SHAREDLIB_FROM_LINKLIB])dnl +m4_require([_LT_CMD_OLD_ARCHIVE])dnl +m4_require([_LT_CMD_GLOBAL_SYMBOLS])dnl +m4_require([_LT_WITH_SYSROOT])dnl +m4_require([_LT_CMD_TRUNCATE])dnl + +_LT_CONFIG_LIBTOOL_INIT([ +# See if we are running on zsh, and set the options that allow our +# commands through without removal of \ escapes INIT. +if test -n "\${ZSH_VERSION+set}"; then + setopt NO_GLOB_SUBST +fi +]) +if test -n "${ZSH_VERSION+set}"; then + setopt NO_GLOB_SUBST +fi + +_LT_CHECK_OBJDIR + +m4_require([_LT_TAG_COMPILER])dnl + +case $host_os in +aix3*) + # AIX sometimes has problems with the GCC collect2 program. For some + # reason, if we set the COLLECT_NAMES environment variable, the problems + # vanish in a puff of smoke. + if test set != "${COLLECT_NAMES+set}"; then + COLLECT_NAMES= + export COLLECT_NAMES + fi + ;; +esac + +# Global variables: +ofile=libtool +can_build_shared=yes + +# All known linkers require a '.a' archive for static linking (except MSVC and +# ICC, which need '.lib'). +libext=a + +with_gnu_ld=$lt_cv_prog_gnu_ld + +old_CC=$CC +old_CFLAGS=$CFLAGS + +# Set sane defaults for various variables +test -z "$CC" && CC=cc +test -z "$LTCC" && LTCC=$CC +test -z "$LTCFLAGS" && LTCFLAGS=$CFLAGS +test -z "$LD" && LD=ld +test -z "$ac_objext" && ac_objext=o + +_LT_CC_BASENAME([$compiler]) + +# Only perform the check for file, if the check method requires it +test -z "$MAGIC_CMD" && MAGIC_CMD=file +case $deplibs_check_method in +file_magic*) + if test "$file_magic_cmd" = '$MAGIC_CMD'; then + _LT_PATH_MAGIC + fi + ;; +esac + +# Use C for the default configuration in the libtool script +LT_SUPPORTED_TAG([CC]) +_LT_LANG_C_CONFIG +_LT_LANG_DEFAULT_CONFIG +_LT_CONFIG_COMMANDS +])# _LT_SETUP + + +# _LT_PREPARE_SED_QUOTE_VARS +# -------------------------- +# Define a few sed substitution that help us do robust quoting. +m4_defun([_LT_PREPARE_SED_QUOTE_VARS], +[# Backslashify metacharacters that are still active within +# double-quoted strings. +sed_quote_subst='s/\([["`$\\]]\)/\\\1/g' + +# Same as above, but do not quote variable references. +double_quote_subst='s/\([["`\\]]\)/\\\1/g' + +# Sed substitution to delay expansion of an escaped shell variable in a +# double_quote_subst'ed string. +delay_variable_subst='s/\\\\\\\\\\\$/\\\\\\$/g' + +# Sed substitution to delay expansion of an escaped single quote. +delay_single_quote_subst='s/'\''/'\'\\\\\\\'\''/g' + +# Sed substitution to avoid accidental globbing in evaled expressions +no_glob_subst='s/\*/\\\*/g' +]) + +# _LT_PROG_LTMAIN +# --------------- +# Note that this code is called both from 'configure', and 'config.status' +# now that we use AC_CONFIG_COMMANDS to generate libtool. Notably, +# 'config.status' has no value for ac_aux_dir unless we are using Automake, +# so we pass a copy along to make sure it has a sensible value anyway. +m4_defun([_LT_PROG_LTMAIN], +[m4_ifdef([AC_REQUIRE_AUX_FILE], [AC_REQUIRE_AUX_FILE([ltmain.sh])])dnl +_LT_CONFIG_LIBTOOL_INIT([ac_aux_dir='$ac_aux_dir']) +ltmain=$ac_aux_dir/ltmain.sh +])# _LT_PROG_LTMAIN + + +## ------------------------------------- ## +## Accumulate code for creating libtool. ## +## ------------------------------------- ## + +# So that we can recreate a full libtool script including additional +# tags, we accumulate the chunks of code to send to AC_CONFIG_COMMANDS +# in macros and then make a single call at the end using the 'libtool' +# label. + + +# _LT_CONFIG_LIBTOOL_INIT([INIT-COMMANDS]) +# ---------------------------------------- +# Register INIT-COMMANDS to be passed to AC_CONFIG_COMMANDS later. +m4_define([_LT_CONFIG_LIBTOOL_INIT], +[m4_ifval([$1], + [m4_append([_LT_OUTPUT_LIBTOOL_INIT], + [$1 +])])]) + +# Initialize. +m4_define([_LT_OUTPUT_LIBTOOL_INIT]) + + +# _LT_CONFIG_LIBTOOL([COMMANDS]) +# ------------------------------ +# Register COMMANDS to be passed to AC_CONFIG_COMMANDS later. +m4_define([_LT_CONFIG_LIBTOOL], +[m4_ifval([$1], + [m4_append([_LT_OUTPUT_LIBTOOL_COMMANDS], + [$1 +])])]) + +# Initialize. +m4_define([_LT_OUTPUT_LIBTOOL_COMMANDS]) + + +# _LT_CONFIG_SAVE_COMMANDS([COMMANDS], [INIT_COMMANDS]) +# ----------------------------------------------------- +m4_defun([_LT_CONFIG_SAVE_COMMANDS], +[_LT_CONFIG_LIBTOOL([$1]) +_LT_CONFIG_LIBTOOL_INIT([$2]) +]) + + +# _LT_FORMAT_COMMENT([COMMENT]) +# ----------------------------- +# Add leading comment marks to the start of each line, and a trailing +# full-stop to the whole comment if one is not present already. +m4_define([_LT_FORMAT_COMMENT], +[m4_ifval([$1], [ +m4_bpatsubst([m4_bpatsubst([$1], [^ *], [# ])], + [['`$\]], [\\\&])]m4_bmatch([$1], [[!?.]$], [], [.]) +)]) + + + +## ------------------------ ## +## FIXME: Eliminate VARNAME ## +## ------------------------ ## + + +# _LT_DECL([CONFIGNAME], VARNAME, VALUE, [DESCRIPTION], [IS-TAGGED?]) +# ------------------------------------------------------------------- +# CONFIGNAME is the name given to the value in the libtool script. +# VARNAME is the (base) name used in the configure script. +# VALUE may be 0, 1 or 2 for a computed quote escaped value based on +# VARNAME. Any other value will be used directly. +m4_define([_LT_DECL], +[lt_if_append_uniq([lt_decl_varnames], [$2], [, ], + [lt_dict_add_subkey([lt_decl_dict], [$2], [libtool_name], + [m4_ifval([$1], [$1], [$2])]) + lt_dict_add_subkey([lt_decl_dict], [$2], [value], [$3]) + m4_ifval([$4], + [lt_dict_add_subkey([lt_decl_dict], [$2], [description], [$4])]) + lt_dict_add_subkey([lt_decl_dict], [$2], + [tagged?], [m4_ifval([$5], [yes], [no])])]) +]) + + +# _LT_TAGDECL([CONFIGNAME], VARNAME, VALUE, [DESCRIPTION]) +# -------------------------------------------------------- +m4_define([_LT_TAGDECL], [_LT_DECL([$1], [$2], [$3], [$4], [yes])]) + + +# lt_decl_tag_varnames([SEPARATOR], [VARNAME1...]) +# ------------------------------------------------ +m4_define([lt_decl_tag_varnames], +[_lt_decl_filter([tagged?], [yes], $@)]) + + +# _lt_decl_filter(SUBKEY, VALUE, [SEPARATOR], [VARNAME1..]) +# --------------------------------------------------------- +m4_define([_lt_decl_filter], +[m4_case([$#], + [0], [m4_fatal([$0: too few arguments: $#])], + [1], [m4_fatal([$0: too few arguments: $#: $1])], + [2], [lt_dict_filter([lt_decl_dict], [$1], [$2], [], lt_decl_varnames)], + [3], [lt_dict_filter([lt_decl_dict], [$1], [$2], [$3], lt_decl_varnames)], + [lt_dict_filter([lt_decl_dict], $@)])[]dnl +]) + + +# lt_decl_quote_varnames([SEPARATOR], [VARNAME1...]) +# -------------------------------------------------- +m4_define([lt_decl_quote_varnames], +[_lt_decl_filter([value], [1], $@)]) + + +# lt_decl_dquote_varnames([SEPARATOR], [VARNAME1...]) +# --------------------------------------------------- +m4_define([lt_decl_dquote_varnames], +[_lt_decl_filter([value], [2], $@)]) + + +# lt_decl_varnames_tagged([SEPARATOR], [VARNAME1...]) +# --------------------------------------------------- +m4_define([lt_decl_varnames_tagged], +[m4_assert([$# <= 2])dnl +_$0(m4_quote(m4_default([$1], [[, ]])), + m4_ifval([$2], [[$2]], [m4_dquote(lt_decl_tag_varnames)]), + m4_split(m4_normalize(m4_quote(_LT_TAGS)), [ ]))]) +m4_define([_lt_decl_varnames_tagged], +[m4_ifval([$3], [lt_combine([$1], [$2], [_], $3)])]) + + +# lt_decl_all_varnames([SEPARATOR], [VARNAME1...]) +# ------------------------------------------------ +m4_define([lt_decl_all_varnames], +[_$0(m4_quote(m4_default([$1], [[, ]])), + m4_if([$2], [], + m4_quote(lt_decl_varnames), + m4_quote(m4_shift($@))))[]dnl +]) +m4_define([_lt_decl_all_varnames], +[lt_join($@, lt_decl_varnames_tagged([$1], + lt_decl_tag_varnames([[, ]], m4_shift($@))))dnl +]) + + +# _LT_CONFIG_STATUS_DECLARE([VARNAME]) +# ------------------------------------ +# Quote a variable value, and forward it to 'config.status' so that its +# declaration there will have the same value as in 'configure'. VARNAME +# must have a single quote delimited value for this to work. +m4_define([_LT_CONFIG_STATUS_DECLARE], +[$1='`$ECHO "$][$1" | $SED "$delay_single_quote_subst"`']) + + +# _LT_CONFIG_STATUS_DECLARATIONS +# ------------------------------ +# We delimit libtool config variables with single quotes, so when +# we write them to config.status, we have to be sure to quote all +# embedded single quotes properly. In configure, this macro expands +# each variable declared with _LT_DECL (and _LT_TAGDECL) into: +# +# ='`$ECHO "$" | $SED "$delay_single_quote_subst"`' +m4_defun([_LT_CONFIG_STATUS_DECLARATIONS], +[m4_foreach([_lt_var], m4_quote(lt_decl_all_varnames), + [m4_n([_LT_CONFIG_STATUS_DECLARE(_lt_var)])])]) + + +# _LT_LIBTOOL_TAGS +# ---------------- +# Output comment and list of tags supported by the script +m4_defun([_LT_LIBTOOL_TAGS], +[_LT_FORMAT_COMMENT([The names of the tagged configurations supported by this script])dnl +available_tags='_LT_TAGS'dnl +]) + + +# _LT_LIBTOOL_DECLARE(VARNAME, [TAG]) +# ----------------------------------- +# Extract the dictionary values for VARNAME (optionally with TAG) and +# expand to a commented shell variable setting: +# +# # Some comment about what VAR is for. +# visible_name=$lt_internal_name +m4_define([_LT_LIBTOOL_DECLARE], +[_LT_FORMAT_COMMENT(m4_quote(lt_dict_fetch([lt_decl_dict], [$1], + [description])))[]dnl +m4_pushdef([_libtool_name], + m4_quote(lt_dict_fetch([lt_decl_dict], [$1], [libtool_name])))[]dnl +m4_case(m4_quote(lt_dict_fetch([lt_decl_dict], [$1], [value])), + [0], [_libtool_name=[$]$1], + [1], [_libtool_name=$lt_[]$1], + [2], [_libtool_name=$lt_[]$1], + [_libtool_name=lt_dict_fetch([lt_decl_dict], [$1], [value])])[]dnl +m4_ifval([$2], [_$2])[]m4_popdef([_libtool_name])[]dnl +]) + + +# _LT_LIBTOOL_CONFIG_VARS +# ----------------------- +# Produce commented declarations of non-tagged libtool config variables +# suitable for insertion in the LIBTOOL CONFIG section of the 'libtool' +# script. Tagged libtool config variables (even for the LIBTOOL CONFIG +# section) are produced by _LT_LIBTOOL_TAG_VARS. +m4_defun([_LT_LIBTOOL_CONFIG_VARS], +[m4_foreach([_lt_var], + m4_quote(_lt_decl_filter([tagged?], [no], [], lt_decl_varnames)), + [m4_n([_LT_LIBTOOL_DECLARE(_lt_var)])])]) + + +# _LT_LIBTOOL_TAG_VARS(TAG) +# ------------------------- +m4_define([_LT_LIBTOOL_TAG_VARS], +[m4_foreach([_lt_var], m4_quote(lt_decl_tag_varnames), + [m4_n([_LT_LIBTOOL_DECLARE(_lt_var, [$1])])])]) + + +# _LT_TAGVAR(VARNAME, [TAGNAME]) +# ------------------------------ +m4_define([_LT_TAGVAR], [m4_ifval([$2], [$1_$2], [$1])]) + + +# _LT_CONFIG_COMMANDS +# ------------------- +# Send accumulated output to $CONFIG_STATUS. Thanks to the lists of +# variables for single and double quote escaping we saved from calls +# to _LT_DECL, we can put quote escaped variables declarations +# into 'config.status', and then the shell code to quote escape them in +# for loops in 'config.status'. Finally, any additional code accumulated +# from calls to _LT_CONFIG_LIBTOOL_INIT is expanded. +m4_defun([_LT_CONFIG_COMMANDS], +[AC_PROVIDE_IFELSE([LT_OUTPUT], + dnl If the libtool generation code has been placed in $CONFIG_LT, + dnl instead of duplicating it all over again into config.status, + dnl then we will have config.status run $CONFIG_LT later, so it + dnl needs to know what name is stored there: + [AC_CONFIG_COMMANDS([libtool], + [$SHELL $CONFIG_LT || AS_EXIT(1)], [CONFIG_LT='$CONFIG_LT'])], + dnl If the libtool generation code is destined for config.status, + dnl expand the accumulated commands and init code now: + [AC_CONFIG_COMMANDS([libtool], + [_LT_OUTPUT_LIBTOOL_COMMANDS], [_LT_OUTPUT_LIBTOOL_COMMANDS_INIT])]) +])#_LT_CONFIG_COMMANDS + + +# Initialize. +m4_define([_LT_OUTPUT_LIBTOOL_COMMANDS_INIT], +[ + +# The HP-UX ksh and POSIX shell print the target directory to stdout +# if CDPATH is set. +(unset CDPATH) >/dev/null 2>&1 && unset CDPATH + +sed_quote_subst='$sed_quote_subst' +double_quote_subst='$double_quote_subst' +delay_variable_subst='$delay_variable_subst' +_LT_CONFIG_STATUS_DECLARATIONS +LTCC='$LTCC' +LTCFLAGS='$LTCFLAGS' +compiler='$compiler_DEFAULT' + +# A function that is used when there is no print builtin or printf. +func_fallback_echo () +{ + eval 'cat <<_LTECHO_EOF +\$[]1 +_LTECHO_EOF' +} + +# Quote evaled strings. +for var in lt_decl_all_varnames([[ \ +]], lt_decl_quote_varnames); do + case \`eval \\\\\$ECHO \\\\""\\\\\$\$var"\\\\"\` in + *[[\\\\\\\`\\"\\\$]]*) + eval "lt_\$var=\\\\\\"\\\`\\\$ECHO \\"\\\$\$var\\" | \\\$SED \\"\\\$sed_quote_subst\\"\\\`\\\\\\"" ## exclude from sc_prohibit_nested_quotes + ;; + *) + eval "lt_\$var=\\\\\\"\\\$\$var\\\\\\"" + ;; + esac +done + +# Double-quote double-evaled strings. +for var in lt_decl_all_varnames([[ \ +]], lt_decl_dquote_varnames); do + case \`eval \\\\\$ECHO \\\\""\\\\\$\$var"\\\\"\` in + *[[\\\\\\\`\\"\\\$]]*) + eval "lt_\$var=\\\\\\"\\\`\\\$ECHO \\"\\\$\$var\\" | \\\$SED -e \\"\\\$double_quote_subst\\" -e \\"\\\$sed_quote_subst\\" -e \\"\\\$delay_variable_subst\\"\\\`\\\\\\"" ## exclude from sc_prohibit_nested_quotes + ;; + *) + eval "lt_\$var=\\\\\\"\\\$\$var\\\\\\"" + ;; + esac +done + +_LT_OUTPUT_LIBTOOL_INIT +]) + +# _LT_GENERATED_FILE_INIT(FILE, [COMMENT]) +# ------------------------------------ +# Generate a child script FILE with all initialization necessary to +# reuse the environment learned by the parent script, and make the +# file executable. If COMMENT is supplied, it is inserted after the +# '#!' sequence but before initialization text begins. After this +# macro, additional text can be appended to FILE to form the body of +# the child script. The macro ends with non-zero status if the +# file could not be fully written (such as if the disk is full). +m4_ifdef([AS_INIT_GENERATED], +[m4_defun([_LT_GENERATED_FILE_INIT],[AS_INIT_GENERATED($@)])], +[m4_defun([_LT_GENERATED_FILE_INIT], +[m4_require([AS_PREPARE])]dnl +[m4_pushdef([AS_MESSAGE_LOG_FD])]dnl +[lt_write_fail=0 +cat >$1 <<_ASEOF || lt_write_fail=1 +#! $SHELL +# Generated by $as_me. +$2 +SHELL=\${CONFIG_SHELL-$SHELL} +export SHELL +_ASEOF +cat >>$1 <<\_ASEOF || lt_write_fail=1 +AS_SHELL_SANITIZE +_AS_PREPARE +exec AS_MESSAGE_FD>&1 +_ASEOF +test 0 = "$lt_write_fail" && chmod +x $1[]dnl +m4_popdef([AS_MESSAGE_LOG_FD])])])# _LT_GENERATED_FILE_INIT + +# LT_OUTPUT +# --------- +# This macro allows early generation of the libtool script (before +# AC_OUTPUT is called), incase it is used in configure for compilation +# tests. +AC_DEFUN([LT_OUTPUT], +[: ${CONFIG_LT=./config.lt} +AC_MSG_NOTICE([creating $CONFIG_LT]) +_LT_GENERATED_FILE_INIT(["$CONFIG_LT"], +[# Run this file to recreate a libtool stub with the current configuration.]) + +cat >>"$CONFIG_LT" <<\_LTEOF +lt_cl_silent=false +exec AS_MESSAGE_LOG_FD>>config.log +{ + echo + AS_BOX([Running $as_me.]) +} >&AS_MESSAGE_LOG_FD + +lt_cl_help="\ +'$as_me' creates a local libtool stub from the current configuration, +for use in further configure time tests before the real libtool is +generated. + +Usage: $[0] [[OPTIONS]] + + -h, --help print this help, then exit + -V, --version print version number, then exit + -q, --quiet do not print progress messages + -d, --debug don't remove temporary files + +Report bugs to ." + +lt_cl_version="\ +m4_ifset([AC_PACKAGE_NAME], [AC_PACKAGE_NAME ])config.lt[]dnl +m4_ifset([AC_PACKAGE_VERSION], [ AC_PACKAGE_VERSION]) +configured by $[0], generated by m4_PACKAGE_STRING. + +Copyright (C) 2011 Free Software Foundation, Inc. +This config.lt script is free software; the Free Software Foundation +gives unlimited permision to copy, distribute and modify it." + +while test 0 != $[#] +do + case $[1] in + --version | --v* | -V ) + echo "$lt_cl_version"; exit 0 ;; + --help | --h* | -h ) + echo "$lt_cl_help"; exit 0 ;; + --debug | --d* | -d ) + debug=: ;; + --quiet | --q* | --silent | --s* | -q ) + lt_cl_silent=: ;; + + -*) AC_MSG_ERROR([unrecognized option: $[1] +Try '$[0] --help' for more information.]) ;; + + *) AC_MSG_ERROR([unrecognized argument: $[1] +Try '$[0] --help' for more information.]) ;; + esac + shift +done + +if $lt_cl_silent; then + exec AS_MESSAGE_FD>/dev/null +fi +_LTEOF + +cat >>"$CONFIG_LT" <<_LTEOF +_LT_OUTPUT_LIBTOOL_COMMANDS_INIT +_LTEOF + +cat >>"$CONFIG_LT" <<\_LTEOF +AC_MSG_NOTICE([creating $ofile]) +_LT_OUTPUT_LIBTOOL_COMMANDS +AS_EXIT(0) +_LTEOF +chmod +x "$CONFIG_LT" + +# configure is writing to config.log, but config.lt does its own redirection, +# appending to config.log, which fails on DOS, as config.log is still kept +# open by configure. Here we exec the FD to /dev/null, effectively closing +# config.log, so it can be properly (re)opened and appended to by config.lt. +lt_cl_success=: +test yes = "$silent" && + lt_config_lt_args="$lt_config_lt_args --quiet" +exec AS_MESSAGE_LOG_FD>/dev/null +$SHELL "$CONFIG_LT" $lt_config_lt_args || lt_cl_success=false +exec AS_MESSAGE_LOG_FD>>config.log +$lt_cl_success || AS_EXIT(1) +])# LT_OUTPUT + + +# _LT_CONFIG(TAG) +# --------------- +# If TAG is the built-in tag, create an initial libtool script with a +# default configuration from the untagged config vars. Otherwise add code +# to config.status for appending the configuration named by TAG from the +# matching tagged config vars. +m4_defun([_LT_CONFIG], +[m4_require([_LT_FILEUTILS_DEFAULTS])dnl +_LT_CONFIG_SAVE_COMMANDS([ + m4_define([_LT_TAG], m4_if([$1], [], [C], [$1]))dnl + m4_if(_LT_TAG, [C], [ + # See if we are running on zsh, and set the options that allow our + # commands through without removal of \ escapes. + if test -n "${ZSH_VERSION+set}"; then + setopt NO_GLOB_SUBST + fi + + cfgfile=${ofile}T + trap "$RM \"$cfgfile\"; exit 1" 1 2 15 + $RM "$cfgfile" + + cat <<_LT_EOF >> "$cfgfile" +#! $SHELL +# Generated automatically by $as_me ($PACKAGE) $VERSION +# NOTE: Changes made to this file will be lost: look at ltmain.sh. + +# Provide generalized library-building support services. +# Written by Gordon Matzigkeit, 1996 + +_LT_COPYING +_LT_LIBTOOL_TAGS + +# Configured defaults for sys_lib_dlsearch_path munging. +: \${LT_SYS_LIBRARY_PATH="$configure_time_lt_sys_library_path"} + +# ### BEGIN LIBTOOL CONFIG +_LT_LIBTOOL_CONFIG_VARS +_LT_LIBTOOL_TAG_VARS +# ### END LIBTOOL CONFIG + +_LT_EOF + + cat <<'_LT_EOF' >> "$cfgfile" + +# ### BEGIN FUNCTIONS SHARED WITH CONFIGURE + +_LT_PREPARE_MUNGE_PATH_LIST +_LT_PREPARE_CC_BASENAME + +# ### END FUNCTIONS SHARED WITH CONFIGURE + +_LT_EOF + + case $host_os in + aix3*) + cat <<\_LT_EOF >> "$cfgfile" +# AIX sometimes has problems with the GCC collect2 program. For some +# reason, if we set the COLLECT_NAMES environment variable, the problems +# vanish in a puff of smoke. +if test set != "${COLLECT_NAMES+set}"; then + COLLECT_NAMES= + export COLLECT_NAMES +fi +_LT_EOF + ;; + esac + + _LT_PROG_LTMAIN + + # We use sed instead of cat because bash on DJGPP gets confused if + # if finds mixed CR/LF and LF-only lines. Since sed operates in + # text mode, it properly converts lines to CR/LF. This bash problem + # is reportedly fixed, but why not run on old versions too? + $SED '$q' "$ltmain" >> "$cfgfile" \ + || (rm -f "$cfgfile"; exit 1) + + mv -f "$cfgfile" "$ofile" || + (rm -f "$ofile" && cp "$cfgfile" "$ofile" && rm -f "$cfgfile") + chmod +x "$ofile" +], +[cat <<_LT_EOF >> "$ofile" + +dnl Unfortunately we have to use $1 here, since _LT_TAG is not expanded +dnl in a comment (ie after a #). +# ### BEGIN LIBTOOL TAG CONFIG: $1 +_LT_LIBTOOL_TAG_VARS(_LT_TAG) +# ### END LIBTOOL TAG CONFIG: $1 +_LT_EOF +])dnl /m4_if +], +[m4_if([$1], [], [ + PACKAGE='$PACKAGE' + VERSION='$VERSION' + RM='$RM' + ofile='$ofile'], []) +])dnl /_LT_CONFIG_SAVE_COMMANDS +])# _LT_CONFIG + + +# LT_SUPPORTED_TAG(TAG) +# --------------------- +# Trace this macro to discover what tags are supported by the libtool +# --tag option, using: +# autoconf --trace 'LT_SUPPORTED_TAG:$1' +AC_DEFUN([LT_SUPPORTED_TAG], []) + + +# C support is built-in for now +m4_define([_LT_LANG_C_enabled], []) +m4_define([_LT_TAGS], []) + + +# LT_LANG(LANG) +# ------------- +# Enable libtool support for the given language if not already enabled. +AC_DEFUN([LT_LANG], +[AC_BEFORE([$0], [LT_OUTPUT])dnl +m4_case([$1], + [C], [_LT_LANG(C)], + [C++], [_LT_LANG(CXX)], + [Go], [_LT_LANG(GO)], + [Java], [_LT_LANG(GCJ)], + [Fortran 77], [_LT_LANG(F77)], + [Fortran], [_LT_LANG(FC)], + [Windows Resource], [_LT_LANG(RC)], + [m4_ifdef([_LT_LANG_]$1[_CONFIG], + [_LT_LANG($1)], + [m4_fatal([$0: unsupported language: "$1"])])])dnl +])# LT_LANG + + +# _LT_LANG(LANGNAME) +# ------------------ +m4_defun([_LT_LANG], +[m4_ifdef([_LT_LANG_]$1[_enabled], [], + [LT_SUPPORTED_TAG([$1])dnl + m4_append([_LT_TAGS], [$1 ])dnl + m4_define([_LT_LANG_]$1[_enabled], [])dnl + _LT_LANG_$1_CONFIG($1)])dnl +])# _LT_LANG + + +m4_ifndef([AC_PROG_GO], [ +############################################################ +# NOTE: This macro has been submitted for inclusion into # +# GNU Autoconf as AC_PROG_GO. When it is available in # +# a released version of Autoconf we should remove this # +# macro and use it instead. # +############################################################ +m4_defun([AC_PROG_GO], +[AC_LANG_PUSH(Go)dnl +AC_ARG_VAR([GOC], [Go compiler command])dnl +AC_ARG_VAR([GOFLAGS], [Go compiler flags])dnl +_AC_ARG_VAR_LDFLAGS()dnl +AC_CHECK_TOOL(GOC, gccgo) +if test -z "$GOC"; then + if test -n "$ac_tool_prefix"; then + AC_CHECK_PROG(GOC, [${ac_tool_prefix}gccgo], [${ac_tool_prefix}gccgo]) + fi +fi +if test -z "$GOC"; then + AC_CHECK_PROG(GOC, gccgo, gccgo, false) +fi +])#m4_defun +])#m4_ifndef + + +# _LT_LANG_DEFAULT_CONFIG +# ----------------------- +m4_defun([_LT_LANG_DEFAULT_CONFIG], +[AC_PROVIDE_IFELSE([AC_PROG_CXX], + [LT_LANG(CXX)], + [m4_define([AC_PROG_CXX], defn([AC_PROG_CXX])[LT_LANG(CXX)])]) + +AC_PROVIDE_IFELSE([AC_PROG_F77], + [LT_LANG(F77)], + [m4_define([AC_PROG_F77], defn([AC_PROG_F77])[LT_LANG(F77)])]) + +AC_PROVIDE_IFELSE([AC_PROG_FC], + [LT_LANG(FC)], + [m4_define([AC_PROG_FC], defn([AC_PROG_FC])[LT_LANG(FC)])]) + +dnl The call to [A][M_PROG_GCJ] is quoted like that to stop aclocal +dnl pulling things in needlessly. +AC_PROVIDE_IFELSE([AC_PROG_GCJ], + [LT_LANG(GCJ)], + [AC_PROVIDE_IFELSE([A][M_PROG_GCJ], + [LT_LANG(GCJ)], + [AC_PROVIDE_IFELSE([LT_PROG_GCJ], + [LT_LANG(GCJ)], + [m4_ifdef([AC_PROG_GCJ], + [m4_define([AC_PROG_GCJ], defn([AC_PROG_GCJ])[LT_LANG(GCJ)])]) + m4_ifdef([A][M_PROG_GCJ], + [m4_define([A][M_PROG_GCJ], defn([A][M_PROG_GCJ])[LT_LANG(GCJ)])]) + m4_ifdef([LT_PROG_GCJ], + [m4_define([LT_PROG_GCJ], defn([LT_PROG_GCJ])[LT_LANG(GCJ)])])])])]) + +AC_PROVIDE_IFELSE([AC_PROG_GO], + [LT_LANG(GO)], + [m4_define([AC_PROG_GO], defn([AC_PROG_GO])[LT_LANG(GO)])]) + +AC_PROVIDE_IFELSE([LT_PROG_RC], + [LT_LANG(RC)], + [m4_define([LT_PROG_RC], defn([LT_PROG_RC])[LT_LANG(RC)])]) +])# _LT_LANG_DEFAULT_CONFIG + +# Obsolete macros: +AU_DEFUN([AC_LIBTOOL_CXX], [LT_LANG(C++)]) +AU_DEFUN([AC_LIBTOOL_F77], [LT_LANG(Fortran 77)]) +AU_DEFUN([AC_LIBTOOL_FC], [LT_LANG(Fortran)]) +AU_DEFUN([AC_LIBTOOL_GCJ], [LT_LANG(Java)]) +AU_DEFUN([AC_LIBTOOL_RC], [LT_LANG(Windows Resource)]) +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AC_LIBTOOL_CXX], []) +dnl AC_DEFUN([AC_LIBTOOL_F77], []) +dnl AC_DEFUN([AC_LIBTOOL_FC], []) +dnl AC_DEFUN([AC_LIBTOOL_GCJ], []) +dnl AC_DEFUN([AC_LIBTOOL_RC], []) + + +# _LT_TAG_COMPILER +# ---------------- +m4_defun([_LT_TAG_COMPILER], +[AC_REQUIRE([AC_PROG_CC])dnl + +_LT_DECL([LTCC], [CC], [1], [A C compiler])dnl +_LT_DECL([LTCFLAGS], [CFLAGS], [1], [LTCC compiler flags])dnl +_LT_TAGDECL([CC], [compiler], [1], [A language specific compiler])dnl +_LT_TAGDECL([with_gcc], [GCC], [0], [Is the compiler the GNU compiler?])dnl + +# If no C compiler was specified, use CC. +LTCC=${LTCC-"$CC"} + +# If no C compiler flags were specified, use CFLAGS. +LTCFLAGS=${LTCFLAGS-"$CFLAGS"} + +# Allow CC to be a program name with arguments. +compiler=$CC +])# _LT_TAG_COMPILER + + +# _LT_COMPILER_BOILERPLATE +# ------------------------ +# Check for compiler boilerplate output or warnings with +# the simple compiler test code. +m4_defun([_LT_COMPILER_BOILERPLATE], +[m4_require([_LT_DECL_SED])dnl +ac_outfile=conftest.$ac_objext +echo "$lt_simple_compile_test_code" >conftest.$ac_ext +eval "$ac_compile" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err +_lt_compiler_boilerplate=`cat conftest.err` +$RM conftest* +])# _LT_COMPILER_BOILERPLATE + + +# _LT_LINKER_BOILERPLATE +# ---------------------- +# Check for linker boilerplate output or warnings with +# the simple link test code. +m4_defun([_LT_LINKER_BOILERPLATE], +[m4_require([_LT_DECL_SED])dnl +ac_outfile=conftest.$ac_objext +echo "$lt_simple_link_test_code" >conftest.$ac_ext +eval "$ac_link" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err +_lt_linker_boilerplate=`cat conftest.err` +$RM -r conftest* +])# _LT_LINKER_BOILERPLATE + +# _LT_REQUIRED_DARWIN_CHECKS +# ------------------------- +m4_defun_once([_LT_REQUIRED_DARWIN_CHECKS],[ + case $host_os in + rhapsody* | darwin*) + AC_CHECK_TOOL([DSYMUTIL], [dsymutil], [:]) + AC_CHECK_TOOL([NMEDIT], [nmedit], [:]) + AC_CHECK_TOOL([LIPO], [lipo], [:]) + AC_CHECK_TOOL([OTOOL], [otool], [:]) + AC_CHECK_TOOL([OTOOL64], [otool64], [:]) + _LT_DECL([], [DSYMUTIL], [1], + [Tool to manipulate archived DWARF debug symbol files on Mac OS X]) + _LT_DECL([], [NMEDIT], [1], + [Tool to change global to local symbols on Mac OS X]) + _LT_DECL([], [LIPO], [1], + [Tool to manipulate fat objects and archives on Mac OS X]) + _LT_DECL([], [OTOOL], [1], + [ldd/readelf like tool for Mach-O binaries on Mac OS X]) + _LT_DECL([], [OTOOL64], [1], + [ldd/readelf like tool for 64 bit Mach-O binaries on Mac OS X 10.4]) + + AC_CACHE_CHECK([for -single_module linker flag],[lt_cv_apple_cc_single_mod], + [lt_cv_apple_cc_single_mod=no + if test -z "$LT_MULTI_MODULE"; then + # By default we will add the -single_module flag. You can override + # by either setting the environment variable LT_MULTI_MODULE + # non-empty at configure time, or by adding -multi_module to the + # link flags. + rm -rf libconftest.dylib* + echo "int foo(void){return 1;}" > conftest.c + echo "$LTCC $LTCFLAGS $LDFLAGS -o libconftest.dylib \ +-dynamiclib -Wl,-single_module conftest.c" >&AS_MESSAGE_LOG_FD + $LTCC $LTCFLAGS $LDFLAGS -o libconftest.dylib \ + -dynamiclib -Wl,-single_module conftest.c 2>conftest.err + _lt_result=$? + # If there is a non-empty error log, and "single_module" + # appears in it, assume the flag caused a linker warning + if test -s conftest.err && $GREP single_module conftest.err; then + cat conftest.err >&AS_MESSAGE_LOG_FD + # Otherwise, if the output was created with a 0 exit code from + # the compiler, it worked. + elif test -f libconftest.dylib && test 0 = "$_lt_result"; then + lt_cv_apple_cc_single_mod=yes + else + cat conftest.err >&AS_MESSAGE_LOG_FD + fi + rm -rf libconftest.dylib* + rm -f conftest.* + fi]) + + AC_CACHE_CHECK([for -exported_symbols_list linker flag], + [lt_cv_ld_exported_symbols_list], + [lt_cv_ld_exported_symbols_list=no + save_LDFLAGS=$LDFLAGS + echo "_main" > conftest.sym + LDFLAGS="$LDFLAGS -Wl,-exported_symbols_list,conftest.sym" + AC_LINK_IFELSE([AC_LANG_PROGRAM([],[])], + [lt_cv_ld_exported_symbols_list=yes], + [lt_cv_ld_exported_symbols_list=no]) + LDFLAGS=$save_LDFLAGS + ]) + + AC_CACHE_CHECK([for -force_load linker flag],[lt_cv_ld_force_load], + [lt_cv_ld_force_load=no + cat > conftest.c << _LT_EOF +int forced_loaded() { return 2;} +_LT_EOF + echo "$LTCC $LTCFLAGS -c -o conftest.o conftest.c" >&AS_MESSAGE_LOG_FD + $LTCC $LTCFLAGS -c -o conftest.o conftest.c 2>&AS_MESSAGE_LOG_FD + echo "$AR $AR_FLAGS libconftest.a conftest.o" >&AS_MESSAGE_LOG_FD + $AR $AR_FLAGS libconftest.a conftest.o 2>&AS_MESSAGE_LOG_FD + echo "$RANLIB libconftest.a" >&AS_MESSAGE_LOG_FD + $RANLIB libconftest.a 2>&AS_MESSAGE_LOG_FD + cat > conftest.c << _LT_EOF +int main() { return 0;} +_LT_EOF + echo "$LTCC $LTCFLAGS $LDFLAGS -o conftest conftest.c -Wl,-force_load,./libconftest.a" >&AS_MESSAGE_LOG_FD + $LTCC $LTCFLAGS $LDFLAGS -o conftest conftest.c -Wl,-force_load,./libconftest.a 2>conftest.err + _lt_result=$? + if test -s conftest.err && $GREP force_load conftest.err; then + cat conftest.err >&AS_MESSAGE_LOG_FD + elif test -f conftest && test 0 = "$_lt_result" && $GREP forced_load conftest >/dev/null 2>&1; then + lt_cv_ld_force_load=yes + else + cat conftest.err >&AS_MESSAGE_LOG_FD + fi + rm -f conftest.err libconftest.a conftest conftest.c + rm -rf conftest.dSYM + ]) + case $host_os in + rhapsody* | darwin1.[[012]]) + _lt_dar_allow_undefined='$wl-undefined ${wl}suppress' ;; + darwin1.*) + _lt_dar_allow_undefined='$wl-flat_namespace $wl-undefined ${wl}suppress' ;; + darwin*) + case $MACOSX_DEPLOYMENT_TARGET,$host in + 10.[[012]],*|,*powerpc*-darwin[[5-8]]*) + _lt_dar_allow_undefined='$wl-flat_namespace $wl-undefined ${wl}suppress' ;; + *) + _lt_dar_allow_undefined='$wl-undefined ${wl}dynamic_lookup' ;; + esac + ;; + esac + if test yes = "$lt_cv_apple_cc_single_mod"; then + _lt_dar_single_mod='$single_module' + fi + if test yes = "$lt_cv_ld_exported_symbols_list"; then + _lt_dar_export_syms=' $wl-exported_symbols_list,$output_objdir/$libname-symbols.expsym' + else + _lt_dar_export_syms='~$NMEDIT -s $output_objdir/$libname-symbols.expsym $lib' + fi + if test : != "$DSYMUTIL" && test no = "$lt_cv_ld_force_load"; then + _lt_dsymutil='~$DSYMUTIL $lib || :' + else + _lt_dsymutil= + fi + ;; + esac +]) + + +# _LT_DARWIN_LINKER_FEATURES([TAG]) +# --------------------------------- +# Checks for linker and compiler features on darwin +m4_defun([_LT_DARWIN_LINKER_FEATURES], +[ + m4_require([_LT_REQUIRED_DARWIN_CHECKS]) + _LT_TAGVAR(archive_cmds_need_lc, $1)=no + _LT_TAGVAR(hardcode_direct, $1)=no + _LT_TAGVAR(hardcode_automatic, $1)=yes + _LT_TAGVAR(hardcode_shlibpath_var, $1)=unsupported + if test yes = "$lt_cv_ld_force_load"; then + _LT_TAGVAR(whole_archive_flag_spec, $1)='`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience $wl-force_load,$conv\"; done; func_echo_all \"$new_convenience\"`' + m4_case([$1], [F77], [_LT_TAGVAR(compiler_needs_object, $1)=yes], + [FC], [_LT_TAGVAR(compiler_needs_object, $1)=yes]) + else + _LT_TAGVAR(whole_archive_flag_spec, $1)='' + fi + _LT_TAGVAR(link_all_deplibs, $1)=yes + _LT_TAGVAR(allow_undefined_flag, $1)=$_lt_dar_allow_undefined + case $cc_basename in + ifort*|nagfor*) _lt_dar_can_shared=yes ;; + *) _lt_dar_can_shared=$GCC ;; + esac + if test yes = "$_lt_dar_can_shared"; then + output_verbose_link_cmd=func_echo_all + _LT_TAGVAR(archive_cmds, $1)="\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring $_lt_dar_single_mod$_lt_dsymutil" + _LT_TAGVAR(module_cmds, $1)="\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags$_lt_dsymutil" + _LT_TAGVAR(archive_expsym_cmds, $1)="$SED 's|^|_|' < \$export_symbols > \$output_objdir/\$libname-symbols.expsym~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring $_lt_dar_single_mod$_lt_dar_export_syms$_lt_dsymutil" + _LT_TAGVAR(module_expsym_cmds, $1)="$SED -e 's|^|_|' < \$export_symbols > \$output_objdir/\$libname-symbols.expsym~\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags$_lt_dar_export_syms$_lt_dsymutil" + m4_if([$1], [CXX], +[ if test yes != "$lt_cv_apple_cc_single_mod"; then + _LT_TAGVAR(archive_cmds, $1)="\$CC -r -keep_private_externs -nostdlib -o \$lib-master.o \$libobjs~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$lib-master.o \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring$_lt_dsymutil" + _LT_TAGVAR(archive_expsym_cmds, $1)="$SED 's|^|_|' < \$export_symbols > \$output_objdir/\$libname-symbols.expsym~\$CC -r -keep_private_externs -nostdlib -o \$lib-master.o \$libobjs~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$lib-master.o \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring$_lt_dar_export_syms$_lt_dsymutil" + fi +],[]) + else + _LT_TAGVAR(ld_shlibs, $1)=no + fi +]) + +# _LT_SYS_MODULE_PATH_AIX([TAGNAME]) +# ---------------------------------- +# Links a minimal program and checks the executable +# for the system default hardcoded library path. In most cases, +# this is /usr/lib:/lib, but when the MPI compilers are used +# the location of the communication and MPI libs are included too. +# If we don't find anything, use the default library path according +# to the aix ld manual. +# Store the results from the different compilers for each TAGNAME. +# Allow to override them for all tags through lt_cv_aix_libpath. +m4_defun([_LT_SYS_MODULE_PATH_AIX], +[m4_require([_LT_DECL_SED])dnl +if test set = "${lt_cv_aix_libpath+set}"; then + aix_libpath=$lt_cv_aix_libpath +else + AC_CACHE_VAL([_LT_TAGVAR([lt_cv_aix_libpath_], [$1])], + [AC_LINK_IFELSE([AC_LANG_PROGRAM],[ + lt_aix_libpath_sed='[ + /Import File Strings/,/^$/ { + /^0/ { + s/^0 *\([^ ]*\) *$/\1/ + p + } + }]' + _LT_TAGVAR([lt_cv_aix_libpath_], [$1])=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` + # Check for a 64-bit object if we didn't find anything. + if test -z "$_LT_TAGVAR([lt_cv_aix_libpath_], [$1])"; then + _LT_TAGVAR([lt_cv_aix_libpath_], [$1])=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` + fi],[]) + if test -z "$_LT_TAGVAR([lt_cv_aix_libpath_], [$1])"; then + _LT_TAGVAR([lt_cv_aix_libpath_], [$1])=/usr/lib:/lib + fi + ]) + aix_libpath=$_LT_TAGVAR([lt_cv_aix_libpath_], [$1]) +fi +])# _LT_SYS_MODULE_PATH_AIX + + +# _LT_SHELL_INIT(ARG) +# ------------------- +m4_define([_LT_SHELL_INIT], +[m4_divert_text([M4SH-INIT], [$1 +])])# _LT_SHELL_INIT + + + +# _LT_PROG_ECHO_BACKSLASH +# ----------------------- +# Find how we can fake an echo command that does not interpret backslash. +# In particular, with Autoconf 2.60 or later we add some code to the start +# of the generated configure script that will find a shell with a builtin +# printf (that we can use as an echo command). +m4_defun([_LT_PROG_ECHO_BACKSLASH], +[ECHO='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' +ECHO=$ECHO$ECHO$ECHO$ECHO$ECHO +ECHO=$ECHO$ECHO$ECHO$ECHO$ECHO$ECHO + +AC_MSG_CHECKING([how to print strings]) +# Test print first, because it will be a builtin if present. +if test "X`( print -r -- -n ) 2>/dev/null`" = X-n && \ + test "X`print -r -- $ECHO 2>/dev/null`" = "X$ECHO"; then + ECHO='print -r --' +elif test "X`printf %s $ECHO 2>/dev/null`" = "X$ECHO"; then + ECHO='printf %s\n' +else + # Use this function as a fallback that always works. + func_fallback_echo () + { + eval 'cat <<_LTECHO_EOF +$[]1 +_LTECHO_EOF' + } + ECHO='func_fallback_echo' +fi + +# func_echo_all arg... +# Invoke $ECHO with all args, space-separated. +func_echo_all () +{ + $ECHO "$*" +} + +case $ECHO in + printf*) AC_MSG_RESULT([printf]) ;; + print*) AC_MSG_RESULT([print -r]) ;; + *) AC_MSG_RESULT([cat]) ;; +esac + +m4_ifdef([_AS_DETECT_SUGGESTED], +[_AS_DETECT_SUGGESTED([ + test -n "${ZSH_VERSION+set}${BASH_VERSION+set}" || ( + ECHO='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' + ECHO=$ECHO$ECHO$ECHO$ECHO$ECHO + ECHO=$ECHO$ECHO$ECHO$ECHO$ECHO$ECHO + PATH=/empty FPATH=/empty; export PATH FPATH + test "X`printf %s $ECHO`" = "X$ECHO" \ + || test "X`print -r -- $ECHO`" = "X$ECHO" )])]) + +_LT_DECL([], [SHELL], [1], [Shell to use when invoking shell scripts]) +_LT_DECL([], [ECHO], [1], [An echo program that protects backslashes]) +])# _LT_PROG_ECHO_BACKSLASH + + +# _LT_WITH_SYSROOT +# ---------------- +AC_DEFUN([_LT_WITH_SYSROOT], +[m4_require([_LT_DECL_SED])dnl +AC_MSG_CHECKING([for sysroot]) +AC_ARG_WITH([sysroot], +[AS_HELP_STRING([--with-sysroot@<:@=DIR@:>@], + [Search for dependent libraries within DIR (or the compiler's sysroot + if not specified).])], +[], [with_sysroot=no]) + +dnl lt_sysroot will always be passed unquoted. We quote it here +dnl in case the user passed a directory name. +lt_sysroot= +case $with_sysroot in #( + yes) + if test yes = "$GCC"; then + lt_sysroot=`$CC --print-sysroot 2>/dev/null` + fi + ;; #( + /*) + lt_sysroot=`echo "$with_sysroot" | $SED -e "$sed_quote_subst"` + ;; #( + no|'') + ;; #( + *) + AC_MSG_RESULT([$with_sysroot]) + AC_MSG_ERROR([The sysroot must be an absolute path.]) + ;; +esac + + AC_MSG_RESULT([${lt_sysroot:-no}]) +_LT_DECL([], [lt_sysroot], [0], [The root where to search for ]dnl +[dependent libraries, and where our libraries should be installed.])]) + +# _LT_ENABLE_LOCK +# --------------- +m4_defun([_LT_ENABLE_LOCK], +[AC_ARG_ENABLE([libtool-lock], + [AS_HELP_STRING([--disable-libtool-lock], + [avoid locking (might break parallel builds)])]) +test no = "$enable_libtool_lock" || enable_libtool_lock=yes + +# Some flags need to be propagated to the compiler or linker for good +# libtool support. +case $host in +ia64-*-hpux*) + # Find out what ABI is being produced by ac_compile, and set mode + # options accordingly. + echo 'int i;' > conftest.$ac_ext + if AC_TRY_EVAL(ac_compile); then + case `$FILECMD conftest.$ac_objext` in + *ELF-32*) + HPUX_IA64_MODE=32 + ;; + *ELF-64*) + HPUX_IA64_MODE=64 + ;; + esac + fi + rm -rf conftest* + ;; +*-*-irix6*) + # Find out what ABI is being produced by ac_compile, and set linker + # options accordingly. + echo '[#]line '$LINENO' "configure"' > conftest.$ac_ext + if AC_TRY_EVAL(ac_compile); then + if test yes = "$lt_cv_prog_gnu_ld"; then + case `$FILECMD conftest.$ac_objext` in + *32-bit*) + LD="${LD-ld} -melf32bsmip" + ;; + *N32*) + LD="${LD-ld} -melf32bmipn32" + ;; + *64-bit*) + LD="${LD-ld} -melf64bmip" + ;; + esac + else + case `$FILECMD conftest.$ac_objext` in + *32-bit*) + LD="${LD-ld} -32" + ;; + *N32*) + LD="${LD-ld} -n32" + ;; + *64-bit*) + LD="${LD-ld} -64" + ;; + esac + fi + fi + rm -rf conftest* + ;; + +mips64*-*linux*) + # Find out what ABI is being produced by ac_compile, and set linker + # options accordingly. + echo '[#]line '$LINENO' "configure"' > conftest.$ac_ext + if AC_TRY_EVAL(ac_compile); then + emul=elf + case `$FILECMD conftest.$ac_objext` in + *32-bit*) + emul="${emul}32" + ;; + *64-bit*) + emul="${emul}64" + ;; + esac + case `$FILECMD conftest.$ac_objext` in + *MSB*) + emul="${emul}btsmip" + ;; + *LSB*) + emul="${emul}ltsmip" + ;; + esac + case `$FILECMD conftest.$ac_objext` in + *N32*) + emul="${emul}n32" + ;; + esac + LD="${LD-ld} -m $emul" + fi + rm -rf conftest* + ;; + +x86_64-*kfreebsd*-gnu|x86_64-*linux*|powerpc*-*linux*| \ +s390*-*linux*|s390*-*tpf*|sparc*-*linux*) + # Find out what ABI is being produced by ac_compile, and set linker + # options accordingly. Note that the listed cases only cover the + # situations where additional linker options are needed (such as when + # doing 32-bit compilation for a host where ld defaults to 64-bit, or + # vice versa); the common cases where no linker options are needed do + # not appear in the list. + echo 'int i;' > conftest.$ac_ext + if AC_TRY_EVAL(ac_compile); then + case `$FILECMD conftest.o` in + *32-bit*) + case $host in + x86_64-*kfreebsd*-gnu) + LD="${LD-ld} -m elf_i386_fbsd" + ;; + x86_64-*linux*) + case `$FILECMD conftest.o` in + *x86-64*) + LD="${LD-ld} -m elf32_x86_64" + ;; + *) + LD="${LD-ld} -m elf_i386" + ;; + esac + ;; + powerpc64le-*linux*) + LD="${LD-ld} -m elf32lppclinux" + ;; + powerpc64-*linux*) + LD="${LD-ld} -m elf32ppclinux" + ;; + s390x-*linux*) + LD="${LD-ld} -m elf_s390" + ;; + sparc64-*linux*) + LD="${LD-ld} -m elf32_sparc" + ;; + esac + ;; + *64-bit*) + case $host in + x86_64-*kfreebsd*-gnu) + LD="${LD-ld} -m elf_x86_64_fbsd" + ;; + x86_64-*linux*) + LD="${LD-ld} -m elf_x86_64" + ;; + powerpcle-*linux*) + LD="${LD-ld} -m elf64lppc" + ;; + powerpc-*linux*) + LD="${LD-ld} -m elf64ppc" + ;; + s390*-*linux*|s390*-*tpf*) + LD="${LD-ld} -m elf64_s390" + ;; + sparc*-*linux*) + LD="${LD-ld} -m elf64_sparc" + ;; + esac + ;; + esac + fi + rm -rf conftest* + ;; + +*-*-sco3.2v5*) + # On SCO OpenServer 5, we need -belf to get full-featured binaries. + SAVE_CFLAGS=$CFLAGS + CFLAGS="$CFLAGS -belf" + AC_CACHE_CHECK([whether the C compiler needs -belf], lt_cv_cc_needs_belf, + [AC_LANG_PUSH(C) + AC_LINK_IFELSE([AC_LANG_PROGRAM([[]],[[]])],[lt_cv_cc_needs_belf=yes],[lt_cv_cc_needs_belf=no]) + AC_LANG_POP]) + if test yes != "$lt_cv_cc_needs_belf"; then + # this is probably gcc 2.8.0, egcs 1.0 or newer; no need for -belf + CFLAGS=$SAVE_CFLAGS + fi + ;; +*-*solaris*) + # Find out what ABI is being produced by ac_compile, and set linker + # options accordingly. + echo 'int i;' > conftest.$ac_ext + if AC_TRY_EVAL(ac_compile); then + case `$FILECMD conftest.o` in + *64-bit*) + case $lt_cv_prog_gnu_ld in + yes*) + case $host in + i?86-*-solaris*|x86_64-*-solaris*) + LD="${LD-ld} -m elf_x86_64" + ;; + sparc*-*-solaris*) + LD="${LD-ld} -m elf64_sparc" + ;; + esac + # GNU ld 2.21 introduced _sol2 emulations. Use them if available. + if ${LD-ld} -V | grep _sol2 >/dev/null 2>&1; then + LD=${LD-ld}_sol2 + fi + ;; + *) + if ${LD-ld} -64 -r -o conftest2.o conftest.o >/dev/null 2>&1; then + LD="${LD-ld} -64" + fi + ;; + esac + ;; + esac + fi + rm -rf conftest* + ;; +esac + +need_locks=$enable_libtool_lock +])# _LT_ENABLE_LOCK + + +# _LT_PROG_AR +# ----------- +m4_defun([_LT_PROG_AR], +[AC_CHECK_TOOLS(AR, [ar], false) +: ${AR=ar} +_LT_DECL([], [AR], [1], [The archiver]) + +# Use ARFLAGS variable as AR's operation code to sync the variable naming with +# Automake. If both AR_FLAGS and ARFLAGS are specified, AR_FLAGS should have +# higher priority because thats what people were doing historically (setting +# ARFLAGS for automake and AR_FLAGS for libtool). FIXME: Make the AR_FLAGS +# variable obsoleted/removed. + +test ${AR_FLAGS+y} || AR_FLAGS=${ARFLAGS-cr} +lt_ar_flags=$AR_FLAGS +_LT_DECL([], [lt_ar_flags], [0], [Flags to create an archive (by configure)]) + +# Make AR_FLAGS overridable by 'make ARFLAGS='. Don't try to run-time override +# by AR_FLAGS because that was never working and AR_FLAGS is about to die. +_LT_DECL([], [AR_FLAGS], [\@S|@{ARFLAGS-"\@S|@lt_ar_flags"}], + [Flags to create an archive]) + +AC_CACHE_CHECK([for archiver @FILE support], [lt_cv_ar_at_file], + [lt_cv_ar_at_file=no + AC_COMPILE_IFELSE([AC_LANG_PROGRAM], + [echo conftest.$ac_objext > conftest.lst + lt_ar_try='$AR $AR_FLAGS libconftest.a @conftest.lst >&AS_MESSAGE_LOG_FD' + AC_TRY_EVAL([lt_ar_try]) + if test 0 -eq "$ac_status"; then + # Ensure the archiver fails upon bogus file names. + rm -f conftest.$ac_objext libconftest.a + AC_TRY_EVAL([lt_ar_try]) + if test 0 -ne "$ac_status"; then + lt_cv_ar_at_file=@ + fi + fi + rm -f conftest.* libconftest.a + ]) + ]) + +if test no = "$lt_cv_ar_at_file"; then + archiver_list_spec= +else + archiver_list_spec=$lt_cv_ar_at_file +fi +_LT_DECL([], [archiver_list_spec], [1], + [How to feed a file listing to the archiver]) +])# _LT_PROG_AR + + +# _LT_CMD_OLD_ARCHIVE +# ------------------- +m4_defun([_LT_CMD_OLD_ARCHIVE], +[_LT_PROG_AR + +AC_CHECK_TOOL(STRIP, strip, :) +test -z "$STRIP" && STRIP=: +_LT_DECL([], [STRIP], [1], [A symbol stripping program]) + +AC_CHECK_TOOL(RANLIB, ranlib, :) +test -z "$RANLIB" && RANLIB=: +_LT_DECL([], [RANLIB], [1], + [Commands used to install an old-style archive]) + +# Determine commands to create old-style static archives. +old_archive_cmds='$AR $AR_FLAGS $oldlib$oldobjs' +old_postinstall_cmds='chmod 644 $oldlib' +old_postuninstall_cmds= + +if test -n "$RANLIB"; then + case $host_os in + bitrig* | openbsd*) + old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB -t \$tool_oldlib" + ;; + *) + old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB \$tool_oldlib" + ;; + esac + old_archive_cmds="$old_archive_cmds~\$RANLIB \$tool_oldlib" +fi + +case $host_os in + darwin*) + lock_old_archive_extraction=yes ;; + *) + lock_old_archive_extraction=no ;; +esac +_LT_DECL([], [old_postinstall_cmds], [2]) +_LT_DECL([], [old_postuninstall_cmds], [2]) +_LT_TAGDECL([], [old_archive_cmds], [2], + [Commands used to build an old-style archive]) +_LT_DECL([], [lock_old_archive_extraction], [0], + [Whether to use a lock for old archive extraction]) +])# _LT_CMD_OLD_ARCHIVE + + +# _LT_COMPILER_OPTION(MESSAGE, VARIABLE-NAME, FLAGS, +# [OUTPUT-FILE], [ACTION-SUCCESS], [ACTION-FAILURE]) +# ---------------------------------------------------------------- +# Check whether the given compiler option works +AC_DEFUN([_LT_COMPILER_OPTION], +[m4_require([_LT_FILEUTILS_DEFAULTS])dnl +m4_require([_LT_DECL_SED])dnl +AC_CACHE_CHECK([$1], [$2], + [$2=no + m4_if([$4], , [ac_outfile=conftest.$ac_objext], [ac_outfile=$4]) + echo "$lt_simple_compile_test_code" > conftest.$ac_ext + lt_compiler_flag="$3" ## exclude from sc_useless_quotes_in_assignment + # Insert the option either (1) after the last *FLAGS variable, or + # (2) before a word containing "conftest.", or (3) at the end. + # Note that $ac_compile itself does not contain backslashes and begins + # with a dollar sign (not a hyphen), so the echo should work correctly. + # The option is referenced via a variable to avoid confusing sed. + lt_compile=`echo "$ac_compile" | $SED \ + -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ + -e 's: [[^ ]]*conftest\.: $lt_compiler_flag&:; t' \ + -e 's:$: $lt_compiler_flag:'` + (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&AS_MESSAGE_LOG_FD) + (eval "$lt_compile" 2>conftest.err) + ac_status=$? + cat conftest.err >&AS_MESSAGE_LOG_FD + echo "$as_me:$LINENO: \$? = $ac_status" >&AS_MESSAGE_LOG_FD + if (exit $ac_status) && test -s "$ac_outfile"; then + # The compiler can only warn and ignore the option if not recognized + # So say no if there are warnings other than the usual output. + $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' >conftest.exp + $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 + if test ! -s conftest.er2 || diff conftest.exp conftest.er2 >/dev/null; then + $2=yes + fi + fi + $RM conftest* +]) + +if test yes = "[$]$2"; then + m4_if([$5], , :, [$5]) +else + m4_if([$6], , :, [$6]) +fi +])# _LT_COMPILER_OPTION + +# Old name: +AU_ALIAS([AC_LIBTOOL_COMPILER_OPTION], [_LT_COMPILER_OPTION]) +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AC_LIBTOOL_COMPILER_OPTION], []) + + +# _LT_LINKER_OPTION(MESSAGE, VARIABLE-NAME, FLAGS, +# [ACTION-SUCCESS], [ACTION-FAILURE]) +# ---------------------------------------------------- +# Check whether the given linker option works +AC_DEFUN([_LT_LINKER_OPTION], +[m4_require([_LT_FILEUTILS_DEFAULTS])dnl +m4_require([_LT_DECL_SED])dnl +AC_CACHE_CHECK([$1], [$2], + [$2=no + save_LDFLAGS=$LDFLAGS + LDFLAGS="$LDFLAGS $3" + echo "$lt_simple_link_test_code" > conftest.$ac_ext + if (eval $ac_link 2>conftest.err) && test -s conftest$ac_exeext; then + # The linker can only warn and ignore the option if not recognized + # So say no if there are warnings + if test -s conftest.err; then + # Append any errors to the config.log. + cat conftest.err 1>&AS_MESSAGE_LOG_FD + $ECHO "$_lt_linker_boilerplate" | $SED '/^$/d' > conftest.exp + $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 + if diff conftest.exp conftest.er2 >/dev/null; then + $2=yes + fi + else + $2=yes + fi + fi + $RM -r conftest* + LDFLAGS=$save_LDFLAGS +]) + +if test yes = "[$]$2"; then + m4_if([$4], , :, [$4]) +else + m4_if([$5], , :, [$5]) +fi +])# _LT_LINKER_OPTION + +# Old name: +AU_ALIAS([AC_LIBTOOL_LINKER_OPTION], [_LT_LINKER_OPTION]) +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AC_LIBTOOL_LINKER_OPTION], []) + + +# LT_CMD_MAX_LEN +#--------------- +AC_DEFUN([LT_CMD_MAX_LEN], +[AC_REQUIRE([AC_CANONICAL_HOST])dnl +# find the maximum length of command line arguments +AC_MSG_CHECKING([the maximum length of command line arguments]) +AC_CACHE_VAL([lt_cv_sys_max_cmd_len], [dnl + i=0 + teststring=ABCD + + case $build_os in + msdosdjgpp*) + # On DJGPP, this test can blow up pretty badly due to problems in libc + # (any single argument exceeding 2000 bytes causes a buffer overrun + # during glob expansion). Even if it were fixed, the result of this + # check would be larger than it should be. + lt_cv_sys_max_cmd_len=12288; # 12K is about right + ;; + + gnu*) + # Under GNU Hurd, this test is not required because there is + # no limit to the length of command line arguments. + # Libtool will interpret -1 as no limit whatsoever + lt_cv_sys_max_cmd_len=-1; + ;; + + cygwin* | mingw* | cegcc*) + # On Win9x/ME, this test blows up -- it succeeds, but takes + # about 5 minutes as the teststring grows exponentially. + # Worse, since 9x/ME are not pre-emptively multitasking, + # you end up with a "frozen" computer, even though with patience + # the test eventually succeeds (with a max line length of 256k). + # Instead, let's just punt: use the minimum linelength reported by + # all of the supported platforms: 8192 (on NT/2K/XP). + lt_cv_sys_max_cmd_len=8192; + ;; + + mint*) + # On MiNT this can take a long time and run out of memory. + lt_cv_sys_max_cmd_len=8192; + ;; + + amigaos*) + # On AmigaOS with pdksh, this test takes hours, literally. + # So we just punt and use a minimum line length of 8192. + lt_cv_sys_max_cmd_len=8192; + ;; + + bitrig* | darwin* | dragonfly* | freebsd* | midnightbsd* | netbsd* | openbsd*) + # This has been around since 386BSD, at least. Likely further. + if test -x /sbin/sysctl; then + lt_cv_sys_max_cmd_len=`/sbin/sysctl -n kern.argmax` + elif test -x /usr/sbin/sysctl; then + lt_cv_sys_max_cmd_len=`/usr/sbin/sysctl -n kern.argmax` + else + lt_cv_sys_max_cmd_len=65536 # usable default for all BSDs + fi + # And add a safety zone + lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 4` + lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \* 3` + ;; + + interix*) + # We know the value 262144 and hardcode it with a safety zone (like BSD) + lt_cv_sys_max_cmd_len=196608 + ;; + + os2*) + # The test takes a long time on OS/2. + lt_cv_sys_max_cmd_len=8192 + ;; + + osf*) + # Dr. Hans Ekkehard Plesser reports seeing a kernel panic running configure + # due to this test when exec_disable_arg_limit is 1 on Tru64. It is not + # nice to cause kernel panics so lets avoid the loop below. + # First set a reasonable default. + lt_cv_sys_max_cmd_len=16384 + # + if test -x /sbin/sysconfig; then + case `/sbin/sysconfig -q proc exec_disable_arg_limit` in + *1*) lt_cv_sys_max_cmd_len=-1 ;; + esac + fi + ;; + sco3.2v5*) + lt_cv_sys_max_cmd_len=102400 + ;; + sysv5* | sco5v6* | sysv4.2uw2*) + kargmax=`grep ARG_MAX /etc/conf/cf.d/stune 2>/dev/null` + if test -n "$kargmax"; then + lt_cv_sys_max_cmd_len=`echo $kargmax | $SED 's/.*[[ ]]//'` + else + lt_cv_sys_max_cmd_len=32768 + fi + ;; + *) + lt_cv_sys_max_cmd_len=`(getconf ARG_MAX) 2> /dev/null` + if test -n "$lt_cv_sys_max_cmd_len" && \ + test undefined != "$lt_cv_sys_max_cmd_len"; then + lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 4` + lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \* 3` + else + # Make teststring a little bigger before we do anything with it. + # a 1K string should be a reasonable start. + for i in 1 2 3 4 5 6 7 8; do + teststring=$teststring$teststring + done + SHELL=${SHELL-${CONFIG_SHELL-/bin/sh}} + # If test is not a shell built-in, we'll probably end up computing a + # maximum length that is only half of the actual maximum length, but + # we can't tell. + while { test X`env echo "$teststring$teststring" 2>/dev/null` \ + = "X$teststring$teststring"; } >/dev/null 2>&1 && + test 17 != "$i" # 1/2 MB should be enough + do + i=`expr $i + 1` + teststring=$teststring$teststring + done + # Only check the string length outside the loop. + lt_cv_sys_max_cmd_len=`expr "X$teststring" : ".*" 2>&1` + teststring= + # Add a significant safety factor because C++ compilers can tack on + # massive amounts of additional arguments before passing them to the + # linker. It appears as though 1/2 is a usable value. + lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 2` + fi + ;; + esac +]) +if test -n "$lt_cv_sys_max_cmd_len"; then + AC_MSG_RESULT($lt_cv_sys_max_cmd_len) +else + AC_MSG_RESULT(none) +fi +max_cmd_len=$lt_cv_sys_max_cmd_len +_LT_DECL([], [max_cmd_len], [0], + [What is the maximum length of a command?]) +])# LT_CMD_MAX_LEN + +# Old name: +AU_ALIAS([AC_LIBTOOL_SYS_MAX_CMD_LEN], [LT_CMD_MAX_LEN]) +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AC_LIBTOOL_SYS_MAX_CMD_LEN], []) + + +# _LT_HEADER_DLFCN +# ---------------- +m4_defun([_LT_HEADER_DLFCN], +[AC_CHECK_HEADERS([dlfcn.h], [], [], [AC_INCLUDES_DEFAULT])dnl +])# _LT_HEADER_DLFCN + + +# _LT_TRY_DLOPEN_SELF (ACTION-IF-TRUE, ACTION-IF-TRUE-W-USCORE, +# ACTION-IF-FALSE, ACTION-IF-CROSS-COMPILING) +# ---------------------------------------------------------------- +m4_defun([_LT_TRY_DLOPEN_SELF], +[m4_require([_LT_HEADER_DLFCN])dnl +if test yes = "$cross_compiling"; then : + [$4] +else + lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 + lt_status=$lt_dlunknown + cat > conftest.$ac_ext <<_LT_EOF +[#line $LINENO "configure" +#include "confdefs.h" + +#if HAVE_DLFCN_H +#include +#endif + +#include + +#ifdef RTLD_GLOBAL +# define LT_DLGLOBAL RTLD_GLOBAL +#else +# ifdef DL_GLOBAL +# define LT_DLGLOBAL DL_GLOBAL +# else +# define LT_DLGLOBAL 0 +# endif +#endif + +/* We may have to define LT_DLLAZY_OR_NOW in the command line if we + find out it does not work in some platform. */ +#ifndef LT_DLLAZY_OR_NOW +# ifdef RTLD_LAZY +# define LT_DLLAZY_OR_NOW RTLD_LAZY +# else +# ifdef DL_LAZY +# define LT_DLLAZY_OR_NOW DL_LAZY +# else +# ifdef RTLD_NOW +# define LT_DLLAZY_OR_NOW RTLD_NOW +# else +# ifdef DL_NOW +# define LT_DLLAZY_OR_NOW DL_NOW +# else +# define LT_DLLAZY_OR_NOW 0 +# endif +# endif +# endif +# endif +#endif + +/* When -fvisibility=hidden is used, assume the code has been annotated + correspondingly for the symbols needed. */ +#if defined __GNUC__ && (((__GNUC__ == 3) && (__GNUC_MINOR__ >= 3)) || (__GNUC__ > 3)) +int fnord () __attribute__((visibility("default"))); +#endif + +int fnord () { return 42; } +int main () +{ + void *self = dlopen (0, LT_DLGLOBAL|LT_DLLAZY_OR_NOW); + int status = $lt_dlunknown; + + if (self) + { + if (dlsym (self,"fnord")) status = $lt_dlno_uscore; + else + { + if (dlsym( self,"_fnord")) status = $lt_dlneed_uscore; + else puts (dlerror ()); + } + /* dlclose (self); */ + } + else + puts (dlerror ()); + + return status; +}] +_LT_EOF + if AC_TRY_EVAL(ac_link) && test -s "conftest$ac_exeext" 2>/dev/null; then + (./conftest; exit; ) >&AS_MESSAGE_LOG_FD 2>/dev/null + lt_status=$? + case x$lt_status in + x$lt_dlno_uscore) $1 ;; + x$lt_dlneed_uscore) $2 ;; + x$lt_dlunknown|x*) $3 ;; + esac + else : + # compilation failed + $3 + fi +fi +rm -fr conftest* +])# _LT_TRY_DLOPEN_SELF + + +# LT_SYS_DLOPEN_SELF +# ------------------ +AC_DEFUN([LT_SYS_DLOPEN_SELF], +[m4_require([_LT_HEADER_DLFCN])dnl +if test yes != "$enable_dlopen"; then + enable_dlopen=unknown + enable_dlopen_self=unknown + enable_dlopen_self_static=unknown +else + lt_cv_dlopen=no + lt_cv_dlopen_libs= + + case $host_os in + beos*) + lt_cv_dlopen=load_add_on + lt_cv_dlopen_libs= + lt_cv_dlopen_self=yes + ;; + + mingw* | pw32* | cegcc*) + lt_cv_dlopen=LoadLibrary + lt_cv_dlopen_libs= + ;; + + cygwin*) + lt_cv_dlopen=dlopen + lt_cv_dlopen_libs= + ;; + + darwin*) + # if libdl is installed we need to link against it + AC_CHECK_LIB([dl], [dlopen], + [lt_cv_dlopen=dlopen lt_cv_dlopen_libs=-ldl],[ + lt_cv_dlopen=dyld + lt_cv_dlopen_libs= + lt_cv_dlopen_self=yes + ]) + ;; + + tpf*) + # Don't try to run any link tests for TPF. We know it's impossible + # because TPF is a cross-compiler, and we know how we open DSOs. + lt_cv_dlopen=dlopen + lt_cv_dlopen_libs= + lt_cv_dlopen_self=no + ;; + + *) + AC_CHECK_FUNC([shl_load], + [lt_cv_dlopen=shl_load], + [AC_CHECK_LIB([dld], [shl_load], + [lt_cv_dlopen=shl_load lt_cv_dlopen_libs=-ldld], + [AC_CHECK_FUNC([dlopen], + [lt_cv_dlopen=dlopen], + [AC_CHECK_LIB([dl], [dlopen], + [lt_cv_dlopen=dlopen lt_cv_dlopen_libs=-ldl], + [AC_CHECK_LIB([svld], [dlopen], + [lt_cv_dlopen=dlopen lt_cv_dlopen_libs=-lsvld], + [AC_CHECK_LIB([dld], [dld_link], + [lt_cv_dlopen=dld_link lt_cv_dlopen_libs=-ldld]) + ]) + ]) + ]) + ]) + ]) + ;; + esac + + if test no = "$lt_cv_dlopen"; then + enable_dlopen=no + else + enable_dlopen=yes + fi + + case $lt_cv_dlopen in + dlopen) + save_CPPFLAGS=$CPPFLAGS + test yes = "$ac_cv_header_dlfcn_h" && CPPFLAGS="$CPPFLAGS -DHAVE_DLFCN_H" + + save_LDFLAGS=$LDFLAGS + wl=$lt_prog_compiler_wl eval LDFLAGS=\"\$LDFLAGS $export_dynamic_flag_spec\" + + save_LIBS=$LIBS + LIBS="$lt_cv_dlopen_libs $LIBS" + + AC_CACHE_CHECK([whether a program can dlopen itself], + lt_cv_dlopen_self, [dnl + _LT_TRY_DLOPEN_SELF( + lt_cv_dlopen_self=yes, lt_cv_dlopen_self=yes, + lt_cv_dlopen_self=no, lt_cv_dlopen_self=cross) + ]) + + if test yes = "$lt_cv_dlopen_self"; then + wl=$lt_prog_compiler_wl eval LDFLAGS=\"\$LDFLAGS $lt_prog_compiler_static\" + AC_CACHE_CHECK([whether a statically linked program can dlopen itself], + lt_cv_dlopen_self_static, [dnl + _LT_TRY_DLOPEN_SELF( + lt_cv_dlopen_self_static=yes, lt_cv_dlopen_self_static=yes, + lt_cv_dlopen_self_static=no, lt_cv_dlopen_self_static=cross) + ]) + fi + + CPPFLAGS=$save_CPPFLAGS + LDFLAGS=$save_LDFLAGS + LIBS=$save_LIBS + ;; + esac + + case $lt_cv_dlopen_self in + yes|no) enable_dlopen_self=$lt_cv_dlopen_self ;; + *) enable_dlopen_self=unknown ;; + esac + + case $lt_cv_dlopen_self_static in + yes|no) enable_dlopen_self_static=$lt_cv_dlopen_self_static ;; + *) enable_dlopen_self_static=unknown ;; + esac +fi +_LT_DECL([dlopen_support], [enable_dlopen], [0], + [Whether dlopen is supported]) +_LT_DECL([dlopen_self], [enable_dlopen_self], [0], + [Whether dlopen of programs is supported]) +_LT_DECL([dlopen_self_static], [enable_dlopen_self_static], [0], + [Whether dlopen of statically linked programs is supported]) +])# LT_SYS_DLOPEN_SELF + +# Old name: +AU_ALIAS([AC_LIBTOOL_DLOPEN_SELF], [LT_SYS_DLOPEN_SELF]) +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AC_LIBTOOL_DLOPEN_SELF], []) + + +# _LT_COMPILER_C_O([TAGNAME]) +# --------------------------- +# Check to see if options -c and -o are simultaneously supported by compiler. +# This macro does not hard code the compiler like AC_PROG_CC_C_O. +m4_defun([_LT_COMPILER_C_O], +[m4_require([_LT_DECL_SED])dnl +m4_require([_LT_FILEUTILS_DEFAULTS])dnl +m4_require([_LT_TAG_COMPILER])dnl +AC_CACHE_CHECK([if $compiler supports -c -o file.$ac_objext], + [_LT_TAGVAR(lt_cv_prog_compiler_c_o, $1)], + [_LT_TAGVAR(lt_cv_prog_compiler_c_o, $1)=no + $RM -r conftest 2>/dev/null + mkdir conftest + cd conftest + mkdir out + echo "$lt_simple_compile_test_code" > conftest.$ac_ext + + lt_compiler_flag="-o out/conftest2.$ac_objext" + # Insert the option either (1) after the last *FLAGS variable, or + # (2) before a word containing "conftest.", or (3) at the end. + # Note that $ac_compile itself does not contain backslashes and begins + # with a dollar sign (not a hyphen), so the echo should work correctly. + lt_compile=`echo "$ac_compile" | $SED \ + -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ + -e 's: [[^ ]]*conftest\.: $lt_compiler_flag&:; t' \ + -e 's:$: $lt_compiler_flag:'` + (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&AS_MESSAGE_LOG_FD) + (eval "$lt_compile" 2>out/conftest.err) + ac_status=$? + cat out/conftest.err >&AS_MESSAGE_LOG_FD + echo "$as_me:$LINENO: \$? = $ac_status" >&AS_MESSAGE_LOG_FD + if (exit $ac_status) && test -s out/conftest2.$ac_objext + then + # The compiler can only warn and ignore the option if not recognized + # So say no if there are warnings + $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' > out/conftest.exp + $SED '/^$/d; /^ *+/d' out/conftest.err >out/conftest.er2 + if test ! -s out/conftest.er2 || diff out/conftest.exp out/conftest.er2 >/dev/null; then + _LT_TAGVAR(lt_cv_prog_compiler_c_o, $1)=yes + fi + fi + chmod u+w . 2>&AS_MESSAGE_LOG_FD + $RM conftest* + # SGI C++ compiler will create directory out/ii_files/ for + # template instantiation + test -d out/ii_files && $RM out/ii_files/* && rmdir out/ii_files + $RM out/* && rmdir out + cd .. + $RM -r conftest + $RM conftest* +]) +_LT_TAGDECL([compiler_c_o], [lt_cv_prog_compiler_c_o], [1], + [Does compiler simultaneously support -c and -o options?]) +])# _LT_COMPILER_C_O + + +# _LT_COMPILER_FILE_LOCKS([TAGNAME]) +# ---------------------------------- +# Check to see if we can do hard links to lock some files if needed +m4_defun([_LT_COMPILER_FILE_LOCKS], +[m4_require([_LT_ENABLE_LOCK])dnl +m4_require([_LT_FILEUTILS_DEFAULTS])dnl +_LT_COMPILER_C_O([$1]) + +hard_links=nottested +if test no = "$_LT_TAGVAR(lt_cv_prog_compiler_c_o, $1)" && test no != "$need_locks"; then + # do not overwrite the value of need_locks provided by the user + AC_MSG_CHECKING([if we can lock with hard links]) + hard_links=yes + $RM conftest* + ln conftest.a conftest.b 2>/dev/null && hard_links=no + touch conftest.a + ln conftest.a conftest.b 2>&5 || hard_links=no + ln conftest.a conftest.b 2>/dev/null && hard_links=no + AC_MSG_RESULT([$hard_links]) + if test no = "$hard_links"; then + AC_MSG_WARN(['$CC' does not support '-c -o', so 'make -j' may be unsafe]) + need_locks=warn + fi +else + need_locks=no +fi +_LT_DECL([], [need_locks], [1], [Must we lock files when doing compilation?]) +])# _LT_COMPILER_FILE_LOCKS + + +# _LT_CHECK_OBJDIR +# ---------------- +m4_defun([_LT_CHECK_OBJDIR], +[AC_CACHE_CHECK([for objdir], [lt_cv_objdir], +[rm -f .libs 2>/dev/null +mkdir .libs 2>/dev/null +if test -d .libs; then + lt_cv_objdir=.libs +else + # MS-DOS does not allow filenames that begin with a dot. + lt_cv_objdir=_libs +fi +rmdir .libs 2>/dev/null]) +objdir=$lt_cv_objdir +_LT_DECL([], [objdir], [0], + [The name of the directory that contains temporary libtool files])dnl +m4_pattern_allow([LT_OBJDIR])dnl +AC_DEFINE_UNQUOTED([LT_OBJDIR], "$lt_cv_objdir/", + [Define to the sub-directory where libtool stores uninstalled libraries.]) +])# _LT_CHECK_OBJDIR + + +# _LT_LINKER_HARDCODE_LIBPATH([TAGNAME]) +# -------------------------------------- +# Check hardcoding attributes. +m4_defun([_LT_LINKER_HARDCODE_LIBPATH], +[AC_MSG_CHECKING([how to hardcode library paths into programs]) +_LT_TAGVAR(hardcode_action, $1)= +if test -n "$_LT_TAGVAR(hardcode_libdir_flag_spec, $1)" || + test -n "$_LT_TAGVAR(runpath_var, $1)" || + test yes = "$_LT_TAGVAR(hardcode_automatic, $1)"; then + + # We can hardcode non-existent directories. + if test no != "$_LT_TAGVAR(hardcode_direct, $1)" && + # If the only mechanism to avoid hardcoding is shlibpath_var, we + # have to relink, otherwise we might link with an installed library + # when we should be linking with a yet-to-be-installed one + ## test no != "$_LT_TAGVAR(hardcode_shlibpath_var, $1)" && + test no != "$_LT_TAGVAR(hardcode_minus_L, $1)"; then + # Linking always hardcodes the temporary library directory. + _LT_TAGVAR(hardcode_action, $1)=relink + else + # We can link without hardcoding, and we can hardcode nonexisting dirs. + _LT_TAGVAR(hardcode_action, $1)=immediate + fi +else + # We cannot hardcode anything, or else we can only hardcode existing + # directories. + _LT_TAGVAR(hardcode_action, $1)=unsupported +fi +AC_MSG_RESULT([$_LT_TAGVAR(hardcode_action, $1)]) + +if test relink = "$_LT_TAGVAR(hardcode_action, $1)" || + test yes = "$_LT_TAGVAR(inherit_rpath, $1)"; then + # Fast installation is not supported + enable_fast_install=no +elif test yes = "$shlibpath_overrides_runpath" || + test no = "$enable_shared"; then + # Fast installation is not necessary + enable_fast_install=needless +fi +_LT_TAGDECL([], [hardcode_action], [0], + [How to hardcode a shared library path into an executable]) +])# _LT_LINKER_HARDCODE_LIBPATH + + +# _LT_CMD_STRIPLIB +# ---------------- +m4_defun([_LT_CMD_STRIPLIB], +[m4_require([_LT_DECL_EGREP]) +striplib= +old_striplib= +AC_MSG_CHECKING([whether stripping libraries is possible]) +if test -z "$STRIP"; then + AC_MSG_RESULT([no]) +else + if $STRIP -V 2>&1 | $GREP "GNU strip" >/dev/null; then + old_striplib="$STRIP --strip-debug" + striplib="$STRIP --strip-unneeded" + AC_MSG_RESULT([yes]) + else + case $host_os in + darwin*) + # FIXME - insert some real tests, host_os isn't really good enough + striplib="$STRIP -x" + old_striplib="$STRIP -S" + AC_MSG_RESULT([yes]) + ;; + freebsd*) + if $STRIP -V 2>&1 | $GREP "elftoolchain" >/dev/null; then + old_striplib="$STRIP --strip-debug" + striplib="$STRIP --strip-unneeded" + AC_MSG_RESULT([yes]) + else + AC_MSG_RESULT([no]) + fi + ;; + *) + AC_MSG_RESULT([no]) + ;; + esac + fi +fi +_LT_DECL([], [old_striplib], [1], [Commands to strip libraries]) +_LT_DECL([], [striplib], [1]) +])# _LT_CMD_STRIPLIB + + +# _LT_PREPARE_MUNGE_PATH_LIST +# --------------------------- +# Make sure func_munge_path_list() is defined correctly. +m4_defun([_LT_PREPARE_MUNGE_PATH_LIST], +[[# func_munge_path_list VARIABLE PATH +# ----------------------------------- +# VARIABLE is name of variable containing _space_ separated list of +# directories to be munged by the contents of PATH, which is string +# having a format: +# "DIR[:DIR]:" +# string "DIR[ DIR]" will be prepended to VARIABLE +# ":DIR[:DIR]" +# string "DIR[ DIR]" will be appended to VARIABLE +# "DIRP[:DIRP]::[DIRA:]DIRA" +# string "DIRP[ DIRP]" will be prepended to VARIABLE and string +# "DIRA[ DIRA]" will be appended to VARIABLE +# "DIR[:DIR]" +# VARIABLE will be replaced by "DIR[ DIR]" +func_munge_path_list () +{ + case x@S|@2 in + x) + ;; + *:) + eval @S|@1=\"`$ECHO @S|@2 | $SED 's/:/ /g'` \@S|@@S|@1\" + ;; + x:*) + eval @S|@1=\"\@S|@@S|@1 `$ECHO @S|@2 | $SED 's/:/ /g'`\" + ;; + *::*) + eval @S|@1=\"\@S|@@S|@1\ `$ECHO @S|@2 | $SED -e 's/.*:://' -e 's/:/ /g'`\" + eval @S|@1=\"`$ECHO @S|@2 | $SED -e 's/::.*//' -e 's/:/ /g'`\ \@S|@@S|@1\" + ;; + *) + eval @S|@1=\"`$ECHO @S|@2 | $SED 's/:/ /g'`\" + ;; + esac +} +]])# _LT_PREPARE_PATH_LIST + + +# _LT_SYS_DYNAMIC_LINKER([TAG]) +# ----------------------------- +# PORTME Fill in your ld.so characteristics +m4_defun([_LT_SYS_DYNAMIC_LINKER], +[AC_REQUIRE([AC_CANONICAL_HOST])dnl +m4_require([_LT_DECL_EGREP])dnl +m4_require([_LT_FILEUTILS_DEFAULTS])dnl +m4_require([_LT_DECL_OBJDUMP])dnl +m4_require([_LT_DECL_SED])dnl +m4_require([_LT_CHECK_SHELL_FEATURES])dnl +m4_require([_LT_PREPARE_MUNGE_PATH_LIST])dnl +AC_MSG_CHECKING([dynamic linker characteristics]) +m4_if([$1], + [], [ +if test yes = "$GCC"; then + case $host_os in + darwin*) lt_awk_arg='/^libraries:/,/LR/' ;; + *) lt_awk_arg='/^libraries:/' ;; + esac + case $host_os in + mingw* | cegcc*) lt_sed_strip_eq='s|=\([[A-Za-z]]:\)|\1|g' ;; + *) lt_sed_strip_eq='s|=/|/|g' ;; + esac + lt_search_path_spec=`$CC -print-search-dirs | awk $lt_awk_arg | $SED -e "s/^libraries://" -e $lt_sed_strip_eq` + case $lt_search_path_spec in + *\;*) + # if the path contains ";" then we assume it to be the separator + # otherwise default to the standard path separator (i.e. ":") - it is + # assumed that no part of a normal pathname contains ";" but that should + # okay in the real world where ";" in dirpaths is itself problematic. + lt_search_path_spec=`$ECHO "$lt_search_path_spec" | $SED 's/;/ /g'` + ;; + *) + lt_search_path_spec=`$ECHO "$lt_search_path_spec" | $SED "s/$PATH_SEPARATOR/ /g"` + ;; + esac + # Ok, now we have the path, separated by spaces, we can step through it + # and add multilib dir if necessary... + lt_tmp_lt_search_path_spec= + lt_multi_os_dir=/`$CC $CPPFLAGS $CFLAGS $LDFLAGS -print-multi-os-directory 2>/dev/null` + # ...but if some path component already ends with the multilib dir we assume + # that all is fine and trust -print-search-dirs as is (GCC 4.2? or newer). + case "$lt_multi_os_dir; $lt_search_path_spec " in + "/; "* | "/.; "* | "/./; "* | *"$lt_multi_os_dir "* | *"$lt_multi_os_dir/ "*) + lt_multi_os_dir= + ;; + esac + for lt_sys_path in $lt_search_path_spec; do + if test -d "$lt_sys_path$lt_multi_os_dir"; then + lt_tmp_lt_search_path_spec="$lt_tmp_lt_search_path_spec $lt_sys_path$lt_multi_os_dir" + elif test -n "$lt_multi_os_dir"; then + test -d "$lt_sys_path" && \ + lt_tmp_lt_search_path_spec="$lt_tmp_lt_search_path_spec $lt_sys_path" + fi + done + lt_search_path_spec=`$ECHO "$lt_tmp_lt_search_path_spec" | awk ' +BEGIN {RS = " "; FS = "/|\n";} { + lt_foo = ""; + lt_count = 0; + for (lt_i = NF; lt_i > 0; lt_i--) { + if ($lt_i != "" && $lt_i != ".") { + if ($lt_i == "..") { + lt_count++; + } else { + if (lt_count == 0) { + lt_foo = "/" $lt_i lt_foo; + } else { + lt_count--; + } + } + } + } + if (lt_foo != "") { lt_freq[[lt_foo]]++; } + if (lt_freq[[lt_foo]] == 1) { print lt_foo; } +}'` + # AWK program above erroneously prepends '/' to C:/dos/paths + # for these hosts. + case $host_os in + mingw* | cegcc*) lt_search_path_spec=`$ECHO "$lt_search_path_spec" |\ + $SED 's|/\([[A-Za-z]]:\)|\1|g'` ;; + esac + sys_lib_search_path_spec=`$ECHO "$lt_search_path_spec" | $lt_NL2SP` +else + sys_lib_search_path_spec="/lib /usr/lib /usr/local/lib" +fi]) +library_names_spec= +libname_spec='lib$name' +soname_spec= +shrext_cmds=.so +postinstall_cmds= +postuninstall_cmds= +finish_cmds= +finish_eval= +shlibpath_var= +shlibpath_overrides_runpath=unknown +version_type=none +dynamic_linker="$host_os ld.so" +sys_lib_dlsearch_path_spec="/lib /usr/lib" +need_lib_prefix=unknown +hardcode_into_libs=no + +# when you set need_version to no, make sure it does not cause -set_version +# flags to be left without arguments +need_version=unknown + +AC_ARG_VAR([LT_SYS_LIBRARY_PATH], +[User-defined run-time library search path.]) + +case $host_os in +aix3*) + version_type=linux # correct to gnu/linux during the next big refactor + library_names_spec='$libname$release$shared_ext$versuffix $libname.a' + shlibpath_var=LIBPATH + + # AIX 3 has no versioning support, so we append a major version to the name. + soname_spec='$libname$release$shared_ext$major' + ;; + +aix[[4-9]]*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + hardcode_into_libs=yes + if test ia64 = "$host_cpu"; then + # AIX 5 supports IA64 + library_names_spec='$libname$release$shared_ext$major $libname$release$shared_ext$versuffix $libname$shared_ext' + shlibpath_var=LD_LIBRARY_PATH + else + # With GCC up to 2.95.x, collect2 would create an import file + # for dependence libraries. The import file would start with + # the line '#! .'. This would cause the generated library to + # depend on '.', always an invalid library. This was fixed in + # development snapshots of GCC prior to 3.0. + case $host_os in + aix4 | aix4.[[01]] | aix4.[[01]].*) + if { echo '#if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 97)' + echo ' yes ' + echo '#endif'; } | $CC -E - | $GREP yes > /dev/null; then + : + else + can_build_shared=no + fi + ;; + esac + # Using Import Files as archive members, it is possible to support + # filename-based versioning of shared library archives on AIX. While + # this would work for both with and without runtime linking, it will + # prevent static linking of such archives. So we do filename-based + # shared library versioning with .so extension only, which is used + # when both runtime linking and shared linking is enabled. + # Unfortunately, runtime linking may impact performance, so we do + # not want this to be the default eventually. Also, we use the + # versioned .so libs for executables only if there is the -brtl + # linker flag in LDFLAGS as well, or --with-aix-soname=svr4 only. + # To allow for filename-based versioning support, we need to create + # libNAME.so.V as an archive file, containing: + # *) an Import File, referring to the versioned filename of the + # archive as well as the shared archive member, telling the + # bitwidth (32 or 64) of that shared object, and providing the + # list of exported symbols of that shared object, eventually + # decorated with the 'weak' keyword + # *) the shared object with the F_LOADONLY flag set, to really avoid + # it being seen by the linker. + # At run time we better use the real file rather than another symlink, + # but for link time we create the symlink libNAME.so -> libNAME.so.V + + case $with_aix_soname,$aix_use_runtimelinking in + # AIX (on Power*) has no versioning support, so currently we cannot hardcode correct + # soname into executable. Probably we can add versioning support to + # collect2, so additional links can be useful in future. + aix,yes) # traditional libtool + dynamic_linker='AIX unversionable lib.so' + # If using run time linking (on AIX 4.2 or later) use lib.so + # instead of lib.a to let people know that these are not + # typical AIX shared libraries. + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + ;; + aix,no) # traditional AIX only + dynamic_linker='AIX lib.a[(]lib.so.V[)]' + # We preserve .a as extension for shared libraries through AIX4.2 + # and later when we are not doing run time linking. + library_names_spec='$libname$release.a $libname.a' + soname_spec='$libname$release$shared_ext$major' + ;; + svr4,*) # full svr4 only + dynamic_linker="AIX lib.so.V[(]$shared_archive_member_spec.o[)]" + library_names_spec='$libname$release$shared_ext$major $libname$shared_ext' + # We do not specify a path in Import Files, so LIBPATH fires. + shlibpath_overrides_runpath=yes + ;; + *,yes) # both, prefer svr4 + dynamic_linker="AIX lib.so.V[(]$shared_archive_member_spec.o[)], lib.a[(]lib.so.V[)]" + library_names_spec='$libname$release$shared_ext$major $libname$shared_ext' + # unpreferred sharedlib libNAME.a needs extra handling + postinstall_cmds='test -n "$linkname" || linkname="$realname"~func_stripname "" ".so" "$linkname"~$install_shared_prog "$dir/$func_stripname_result.$libext" "$destdir/$func_stripname_result.$libext"~test -z "$tstripme" || test -z "$striplib" || $striplib "$destdir/$func_stripname_result.$libext"' + postuninstall_cmds='for n in $library_names $old_library; do :; done~func_stripname "" ".so" "$n"~test "$func_stripname_result" = "$n" || func_append rmfiles " $odir/$func_stripname_result.$libext"' + # We do not specify a path in Import Files, so LIBPATH fires. + shlibpath_overrides_runpath=yes + ;; + *,no) # both, prefer aix + dynamic_linker="AIX lib.a[(]lib.so.V[)], lib.so.V[(]$shared_archive_member_spec.o[)]" + library_names_spec='$libname$release.a $libname.a' + soname_spec='$libname$release$shared_ext$major' + # unpreferred sharedlib libNAME.so.V and symlink libNAME.so need extra handling + postinstall_cmds='test -z "$dlname" || $install_shared_prog $dir/$dlname $destdir/$dlname~test -z "$tstripme" || test -z "$striplib" || $striplib $destdir/$dlname~test -n "$linkname" || linkname=$realname~func_stripname "" ".a" "$linkname"~(cd "$destdir" && $LN_S -f $dlname $func_stripname_result.so)' + postuninstall_cmds='test -z "$dlname" || func_append rmfiles " $odir/$dlname"~for n in $old_library $library_names; do :; done~func_stripname "" ".a" "$n"~func_append rmfiles " $odir/$func_stripname_result.so"' + ;; + esac + shlibpath_var=LIBPATH + fi + ;; + +amigaos*) + case $host_cpu in + powerpc) + # Since July 2007 AmigaOS4 officially supports .so libraries. + # When compiling the executable, add -use-dynld -Lsobjs: to the compileline. + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + ;; + m68k) + library_names_spec='$libname.ixlibrary $libname.a' + # Create ${libname}_ixlibrary.a entries in /sys/libs. + finish_eval='for lib in `ls $libdir/*.ixlibrary 2>/dev/null`; do libname=`func_echo_all "$lib" | $SED '\''s%^.*/\([[^/]]*\)\.ixlibrary$%\1%'\''`; $RM /sys/libs/${libname}_ixlibrary.a; $show "cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a"; cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a || exit 1; done' + ;; + esac + ;; + +beos*) + library_names_spec='$libname$shared_ext' + dynamic_linker="$host_os ld.so" + shlibpath_var=LIBRARY_PATH + ;; + +bsdi[[45]]*) + version_type=linux # correct to gnu/linux during the next big refactor + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + finish_cmds='PATH="\$PATH:/sbin" ldconfig $libdir' + shlibpath_var=LD_LIBRARY_PATH + sys_lib_search_path_spec="/shlib /usr/lib /usr/X11/lib /usr/contrib/lib /lib /usr/local/lib" + sys_lib_dlsearch_path_spec="/shlib /usr/lib /usr/local/lib" + # the default ld.so.conf also contains /usr/contrib/lib and + # /usr/X11R6/lib (/usr/X11 is a link to /usr/X11R6), but let us allow + # libtool to hard-code these into programs + ;; + +cygwin* | mingw* | pw32* | cegcc*) + version_type=windows + shrext_cmds=.dll + need_version=no + need_lib_prefix=no + + case $GCC,$cc_basename in + yes,*) + # gcc + library_names_spec='$libname.dll.a' + # DLL is installed to $(libdir)/../bin by postinstall_cmds + postinstall_cmds='base_file=`basename \$file`~ + dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\$base_file'\''i; echo \$dlname'\''`~ + dldir=$destdir/`dirname \$dlpath`~ + test -d \$dldir || mkdir -p \$dldir~ + $install_prog $dir/$dlname \$dldir/$dlname~ + chmod a+x \$dldir/$dlname~ + if test -n '\''$stripme'\'' && test -n '\''$striplib'\''; then + eval '\''$striplib \$dldir/$dlname'\'' || exit \$?; + fi' + postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~ + dlpath=$dir/\$dldll~ + $RM \$dlpath' + shlibpath_overrides_runpath=yes + + case $host_os in + cygwin*) + # Cygwin DLLs use 'cyg' prefix rather than 'lib' + soname_spec='`echo $libname | $SED -e 's/^lib/cyg/'``echo $release | $SED -e 's/[[.]]/-/g'`$versuffix$shared_ext' +m4_if([$1], [],[ + sys_lib_search_path_spec="$sys_lib_search_path_spec /usr/lib/w32api"]) + ;; + mingw* | cegcc*) + # MinGW DLLs use traditional 'lib' prefix + soname_spec='$libname`echo $release | $SED -e 's/[[.]]/-/g'`$versuffix$shared_ext' + ;; + pw32*) + # pw32 DLLs use 'pw' prefix rather than 'lib' + library_names_spec='`echo $libname | $SED -e 's/^lib/pw/'``echo $release | $SED -e 's/[[.]]/-/g'`$versuffix$shared_ext' + ;; + esac + dynamic_linker='Win32 ld.exe' + ;; + + *,cl* | *,icl*) + # Native MSVC or ICC + libname_spec='$name' + soname_spec='$libname`echo $release | $SED -e 's/[[.]]/-/g'`$versuffix$shared_ext' + library_names_spec='$libname.dll.lib' + + case $build_os in + mingw*) + sys_lib_search_path_spec= + lt_save_ifs=$IFS + IFS=';' + for lt_path in $LIB + do + IFS=$lt_save_ifs + # Let DOS variable expansion print the short 8.3 style file name. + lt_path=`cd "$lt_path" 2>/dev/null && cmd //C "for %i in (".") do @echo %~si"` + sys_lib_search_path_spec="$sys_lib_search_path_spec $lt_path" + done + IFS=$lt_save_ifs + # Convert to MSYS style. + sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e 's|\\\\|/|g' -e 's| \\([[a-zA-Z]]\\):| /\\1|g' -e 's|^ ||'` + ;; + cygwin*) + # Convert to unix form, then to dos form, then back to unix form + # but this time dos style (no spaces!) so that the unix form looks + # like /cygdrive/c/PROGRA~1:/cygdr... + sys_lib_search_path_spec=`cygpath --path --unix "$LIB"` + sys_lib_search_path_spec=`cygpath --path --dos "$sys_lib_search_path_spec" 2>/dev/null` + sys_lib_search_path_spec=`cygpath --path --unix "$sys_lib_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"` + ;; + *) + sys_lib_search_path_spec=$LIB + if $ECHO "$sys_lib_search_path_spec" | [$GREP ';[c-zC-Z]:/' >/dev/null]; then + # It is most probably a Windows format PATH. + sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e 's/;/ /g'` + else + sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"` + fi + # FIXME: find the short name or the path components, as spaces are + # common. (e.g. "Program Files" -> "PROGRA~1") + ;; + esac + + # DLL is installed to $(libdir)/../bin by postinstall_cmds + postinstall_cmds='base_file=`basename \$file`~ + dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\$base_file'\''i; echo \$dlname'\''`~ + dldir=$destdir/`dirname \$dlpath`~ + test -d \$dldir || mkdir -p \$dldir~ + $install_prog $dir/$dlname \$dldir/$dlname' + postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~ + dlpath=$dir/\$dldll~ + $RM \$dlpath' + shlibpath_overrides_runpath=yes + dynamic_linker='Win32 link.exe' + ;; + + *) + # Assume MSVC and ICC wrapper + library_names_spec='$libname`echo $release | $SED -e 's/[[.]]/-/g'`$versuffix$shared_ext $libname.lib' + dynamic_linker='Win32 ld.exe' + ;; + esac + # FIXME: first we should search . and the directory the executable is in + shlibpath_var=PATH + ;; + +darwin* | rhapsody*) + dynamic_linker="$host_os dyld" + version_type=darwin + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$major$shared_ext $libname$shared_ext' + soname_spec='$libname$release$major$shared_ext' + shlibpath_overrides_runpath=yes + shlibpath_var=DYLD_LIBRARY_PATH + shrext_cmds='`test .$module = .yes && echo .so || echo .dylib`' +m4_if([$1], [],[ + sys_lib_search_path_spec="$sys_lib_search_path_spec /usr/local/lib"]) + sys_lib_dlsearch_path_spec='/usr/local/lib /lib /usr/lib' + ;; + +dgux*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + shlibpath_var=LD_LIBRARY_PATH + ;; + +freebsd* | dragonfly* | midnightbsd*) + # DragonFly does not have aout. When/if they implement a new + # versioning mechanism, adjust this. + if test -x /usr/bin/objformat; then + objformat=`/usr/bin/objformat` + else + case $host_os in + freebsd[[23]].*) objformat=aout ;; + *) objformat=elf ;; + esac + fi + version_type=freebsd-$objformat + case $version_type in + freebsd-elf*) + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + need_version=no + need_lib_prefix=no + ;; + freebsd-*) + library_names_spec='$libname$release$shared_ext$versuffix $libname$shared_ext$versuffix' + need_version=yes + ;; + esac + shlibpath_var=LD_LIBRARY_PATH + case $host_os in + freebsd2.*) + shlibpath_overrides_runpath=yes + ;; + freebsd3.[[01]]* | freebsdelf3.[[01]]*) + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + ;; + freebsd3.[[2-9]]* | freebsdelf3.[[2-9]]* | \ + freebsd4.[[0-5]] | freebsdelf4.[[0-5]] | freebsd4.1.1 | freebsdelf4.1.1) + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + ;; + *) # from 4.6 on, and DragonFly + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + ;; + esac + ;; + +haiku*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + dynamic_linker="$host_os runtime_loader" + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + shlibpath_var=LIBRARY_PATH + shlibpath_overrides_runpath=no + sys_lib_dlsearch_path_spec='/boot/home/config/lib /boot/common/lib /boot/system/lib' + hardcode_into_libs=yes + ;; + +hpux9* | hpux10* | hpux11*) + # Give a soname corresponding to the major version so that dld.sl refuses to + # link against other versions. + version_type=sunos + need_lib_prefix=no + need_version=no + case $host_cpu in + ia64*) + shrext_cmds='.so' + hardcode_into_libs=yes + dynamic_linker="$host_os dld.so" + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes # Unless +noenvvar is specified. + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + if test 32 = "$HPUX_IA64_MODE"; then + sys_lib_search_path_spec="/usr/lib/hpux32 /usr/local/lib/hpux32 /usr/local/lib" + sys_lib_dlsearch_path_spec=/usr/lib/hpux32 + else + sys_lib_search_path_spec="/usr/lib/hpux64 /usr/local/lib/hpux64" + sys_lib_dlsearch_path_spec=/usr/lib/hpux64 + fi + ;; + hppa*64*) + shrext_cmds='.sl' + hardcode_into_libs=yes + dynamic_linker="$host_os dld.sl" + shlibpath_var=LD_LIBRARY_PATH # How should we handle SHLIB_PATH + shlibpath_overrides_runpath=yes # Unless +noenvvar is specified. + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + sys_lib_search_path_spec="/usr/lib/pa20_64 /usr/ccs/lib/pa20_64" + sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec + ;; + *) + shrext_cmds='.sl' + dynamic_linker="$host_os dld.sl" + shlibpath_var=SHLIB_PATH + shlibpath_overrides_runpath=no # +s is required to enable SHLIB_PATH + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + ;; + esac + # HP-UX runs *really* slowly unless shared libraries are mode 555, ... + postinstall_cmds='chmod 555 $lib' + # or fails outright, so override atomically: + install_override_mode=555 + ;; + +interix[[3-9]]*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + dynamic_linker='Interix 3.x ld.so.1 (PE, like ELF)' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + ;; + +irix5* | irix6* | nonstopux*) + case $host_os in + nonstopux*) version_type=nonstopux ;; + *) + if test yes = "$lt_cv_prog_gnu_ld"; then + version_type=linux # correct to gnu/linux during the next big refactor + else + version_type=irix + fi ;; + esac + need_lib_prefix=no + need_version=no + soname_spec='$libname$release$shared_ext$major' + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$release$shared_ext $libname$shared_ext' + case $host_os in + irix5* | nonstopux*) + libsuff= shlibsuff= + ;; + *) + case $LD in # libtool.m4 will add one of these switches to LD + *-32|*"-32 "|*-melf32bsmip|*"-melf32bsmip ") + libsuff= shlibsuff= libmagic=32-bit;; + *-n32|*"-n32 "|*-melf32bmipn32|*"-melf32bmipn32 ") + libsuff=32 shlibsuff=N32 libmagic=N32;; + *-64|*"-64 "|*-melf64bmip|*"-melf64bmip ") + libsuff=64 shlibsuff=64 libmagic=64-bit;; + *) libsuff= shlibsuff= libmagic=never-match;; + esac + ;; + esac + shlibpath_var=LD_LIBRARY${shlibsuff}_PATH + shlibpath_overrides_runpath=no + sys_lib_search_path_spec="/usr/lib$libsuff /lib$libsuff /usr/local/lib$libsuff" + sys_lib_dlsearch_path_spec="/usr/lib$libsuff /lib$libsuff" + hardcode_into_libs=yes + ;; + +# No shared lib support for Linux oldld, aout, or coff. +linux*oldld* | linux*aout* | linux*coff*) + dynamic_linker=no + ;; + +linux*android*) + version_type=none # Android doesn't support versioned libraries. + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext' + soname_spec='$libname$release$shared_ext' + finish_cmds= + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + + # This implies no fast_install, which is unacceptable. + # Some rework will be needed to allow for fast_install + # before this can be enabled. + hardcode_into_libs=yes + + dynamic_linker='Android linker' + # Don't embed -rpath directories since the linker doesn't support them. + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' + ;; + +# This must be glibc/ELF. +linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + finish_cmds='PATH="\$PATH:/sbin" ldconfig -n $libdir' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + + # Some binutils ld are patched to set DT_RUNPATH + AC_CACHE_VAL([lt_cv_shlibpath_overrides_runpath], + [lt_cv_shlibpath_overrides_runpath=no + save_LDFLAGS=$LDFLAGS + save_libdir=$libdir + eval "libdir=/foo; wl=\"$_LT_TAGVAR(lt_prog_compiler_wl, $1)\"; \ + LDFLAGS=\"\$LDFLAGS $_LT_TAGVAR(hardcode_libdir_flag_spec, $1)\"" + AC_LINK_IFELSE([AC_LANG_PROGRAM([],[])], + [AS_IF([ ($OBJDUMP -p conftest$ac_exeext) 2>/dev/null | grep "RUNPATH.*$libdir" >/dev/null], + [lt_cv_shlibpath_overrides_runpath=yes])]) + LDFLAGS=$save_LDFLAGS + libdir=$save_libdir + ]) + shlibpath_overrides_runpath=$lt_cv_shlibpath_overrides_runpath + + # This implies no fast_install, which is unacceptable. + # Some rework will be needed to allow for fast_install + # before this can be enabled. + hardcode_into_libs=yes + + # Ideally, we could use ldconfig to report *all* directores which are + # searched for libraries, however this is still not possible. Aside from not + # being certain /sbin/ldconfig is available, command + # 'ldconfig -N -X -v | grep ^/' on 64bit Fedora does not report /usr/lib64, + # even though it is searched at run-time. Try to do the best guess by + # appending ld.so.conf contents (and includes) to the search path. + if test -f /etc/ld.so.conf; then + lt_ld_extra=`awk '/^include / { system(sprintf("cd /etc; cat %s 2>/dev/null", \[$]2)); skip = 1; } { if (!skip) print \[$]0; skip = 0; }' < /etc/ld.so.conf | $SED -e 's/#.*//;/^[ ]*hwcap[ ]/d;s/[:, ]/ /g;s/=[^=]*$//;s/=[^= ]* / /g;s/"//g;/^$/d' | tr '\n' ' '` + sys_lib_dlsearch_path_spec="/lib /usr/lib $lt_ld_extra" + fi + + # We used to test for /lib/ld.so.1 and disable shared libraries on + # powerpc, because MkLinux only supported shared libraries with the + # GNU dynamic linker. Since this was broken with cross compilers, + # most powerpc-linux boxes support dynamic linking these days and + # people can always --disable-shared, the test was removed, and we + # assume the GNU/Linux dynamic linker is in use. + dynamic_linker='GNU/Linux ld.so' + ;; + +netbsdelf*-gnu) + version_type=linux + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + dynamic_linker='NetBSD ld.elf_so' + ;; + +netbsd*) + version_type=sunos + need_lib_prefix=no + need_version=no + if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then + library_names_spec='$libname$release$shared_ext$versuffix $libname$shared_ext$versuffix' + finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir' + dynamic_linker='NetBSD (a.out) ld.so' + else + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + dynamic_linker='NetBSD ld.elf_so' + fi + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + ;; + +newsos6) + version_type=linux # correct to gnu/linux during the next big refactor + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + ;; + +*nto* | *qnx*) + version_type=qnx + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + dynamic_linker='ldqnx.so' + ;; + +openbsd* | bitrig*) + version_type=sunos + sys_lib_dlsearch_path_spec=/usr/lib + need_lib_prefix=no + if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`"; then + need_version=no + else + need_version=yes + fi + library_names_spec='$libname$release$shared_ext$versuffix $libname$shared_ext$versuffix' + finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + ;; + +os2*) + libname_spec='$name' + version_type=windows + shrext_cmds=.dll + need_version=no + need_lib_prefix=no + # OS/2 can only load a DLL with a base name of 8 characters or less. + soname_spec='`test -n "$os2dllname" && libname="$os2dllname"; + v=$($ECHO $release$versuffix | tr -d .-); + n=$($ECHO $libname | cut -b -$((8 - ${#v})) | tr . _); + $ECHO $n$v`$shared_ext' + library_names_spec='${libname}_dll.$libext' + dynamic_linker='OS/2 ld.exe' + shlibpath_var=BEGINLIBPATH + sys_lib_search_path_spec="/lib /usr/lib /usr/local/lib" + sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec + postinstall_cmds='base_file=`basename \$file`~ + dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\$base_file'\''i; $ECHO \$dlname'\''`~ + dldir=$destdir/`dirname \$dlpath`~ + test -d \$dldir || mkdir -p \$dldir~ + $install_prog $dir/$dlname \$dldir/$dlname~ + chmod a+x \$dldir/$dlname~ + if test -n '\''$stripme'\'' && test -n '\''$striplib'\''; then + eval '\''$striplib \$dldir/$dlname'\'' || exit \$?; + fi' + postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; $ECHO \$dlname'\''`~ + dlpath=$dir/\$dldll~ + $RM \$dlpath' + ;; + +osf3* | osf4* | osf5*) + version_type=osf + need_lib_prefix=no + need_version=no + soname_spec='$libname$release$shared_ext$major' + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + shlibpath_var=LD_LIBRARY_PATH + sys_lib_search_path_spec="/usr/shlib /usr/ccs/lib /usr/lib/cmplrs/cc /usr/lib /usr/local/lib /var/shlib" + sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec + ;; + +rdos*) + dynamic_linker=no + ;; + +solaris*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + # ldd complains unless libraries are executable + postinstall_cmds='chmod +x $lib' + ;; + +sunos4*) + version_type=sunos + library_names_spec='$libname$release$shared_ext$versuffix $libname$shared_ext$versuffix' + finish_cmds='PATH="\$PATH:/usr/etc" ldconfig $libdir' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + if test yes = "$with_gnu_ld"; then + need_lib_prefix=no + fi + need_version=yes + ;; + +sysv4 | sysv4.3*) + version_type=linux # correct to gnu/linux during the next big refactor + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + shlibpath_var=LD_LIBRARY_PATH + case $host_vendor in + sni) + shlibpath_overrides_runpath=no + need_lib_prefix=no + runpath_var=LD_RUN_PATH + ;; + siemens) + need_lib_prefix=no + ;; + motorola) + need_lib_prefix=no + need_version=no + shlibpath_overrides_runpath=no + sys_lib_search_path_spec='/lib /usr/lib /usr/ccs/lib' + ;; + esac + ;; + +sysv4*MP*) + if test -d /usr/nec; then + version_type=linux # correct to gnu/linux during the next big refactor + library_names_spec='$libname$shared_ext.$versuffix $libname$shared_ext.$major $libname$shared_ext' + soname_spec='$libname$shared_ext.$major' + shlibpath_var=LD_LIBRARY_PATH + fi + ;; + +sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*) + version_type=sco + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + if test yes = "$with_gnu_ld"; then + sys_lib_search_path_spec='/usr/local/lib /usr/gnu/lib /usr/ccs/lib /usr/lib /lib' + else + sys_lib_search_path_spec='/usr/ccs/lib /usr/lib' + case $host_os in + sco3.2v5*) + sys_lib_search_path_spec="$sys_lib_search_path_spec /lib" + ;; + esac + fi + sys_lib_dlsearch_path_spec='/usr/lib' + ;; + +tpf*) + # TPF is a cross-target only. Preferred cross-host = GNU/Linux. + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + ;; + +uts4*) + version_type=linux # correct to gnu/linux during the next big refactor + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + shlibpath_var=LD_LIBRARY_PATH + ;; + +*) + dynamic_linker=no + ;; +esac +AC_MSG_RESULT([$dynamic_linker]) +test no = "$dynamic_linker" && can_build_shared=no + +variables_saved_for_relink="PATH $shlibpath_var $runpath_var" +if test yes = "$GCC"; then + variables_saved_for_relink="$variables_saved_for_relink GCC_EXEC_PREFIX COMPILER_PATH LIBRARY_PATH" +fi + +if test set = "${lt_cv_sys_lib_search_path_spec+set}"; then + sys_lib_search_path_spec=$lt_cv_sys_lib_search_path_spec +fi + +if test set = "${lt_cv_sys_lib_dlsearch_path_spec+set}"; then + sys_lib_dlsearch_path_spec=$lt_cv_sys_lib_dlsearch_path_spec +fi + +# remember unaugmented sys_lib_dlsearch_path content for libtool script decls... +configure_time_dlsearch_path=$sys_lib_dlsearch_path_spec + +# ... but it needs LT_SYS_LIBRARY_PATH munging for other configure-time code +func_munge_path_list sys_lib_dlsearch_path_spec "$LT_SYS_LIBRARY_PATH" + +# to be used as default LT_SYS_LIBRARY_PATH value in generated libtool +configure_time_lt_sys_library_path=$LT_SYS_LIBRARY_PATH + +_LT_DECL([], [variables_saved_for_relink], [1], + [Variables whose values should be saved in libtool wrapper scripts and + restored at link time]) +_LT_DECL([], [need_lib_prefix], [0], + [Do we need the "lib" prefix for modules?]) +_LT_DECL([], [need_version], [0], [Do we need a version for libraries?]) +_LT_DECL([], [version_type], [0], [Library versioning type]) +_LT_DECL([], [runpath_var], [0], [Shared library runtime path variable]) +_LT_DECL([], [shlibpath_var], [0],[Shared library path variable]) +_LT_DECL([], [shlibpath_overrides_runpath], [0], + [Is shlibpath searched before the hard-coded library search path?]) +_LT_DECL([], [libname_spec], [1], [Format of library name prefix]) +_LT_DECL([], [library_names_spec], [1], + [[List of archive names. First name is the real one, the rest are links. + The last name is the one that the linker finds with -lNAME]]) +_LT_DECL([], [soname_spec], [1], + [[The coded name of the library, if different from the real name]]) +_LT_DECL([], [install_override_mode], [1], + [Permission mode override for installation of shared libraries]) +_LT_DECL([], [postinstall_cmds], [2], + [Command to use after installation of a shared archive]) +_LT_DECL([], [postuninstall_cmds], [2], + [Command to use after uninstallation of a shared archive]) +_LT_DECL([], [finish_cmds], [2], + [Commands used to finish a libtool library installation in a directory]) +_LT_DECL([], [finish_eval], [1], + [[As "finish_cmds", except a single script fragment to be evaled but + not shown]]) +_LT_DECL([], [hardcode_into_libs], [0], + [Whether we should hardcode library paths into libraries]) +_LT_DECL([], [sys_lib_search_path_spec], [2], + [Compile-time system search path for libraries]) +_LT_DECL([sys_lib_dlsearch_path_spec], [configure_time_dlsearch_path], [2], + [Detected run-time system search path for libraries]) +_LT_DECL([], [configure_time_lt_sys_library_path], [2], + [Explicit LT_SYS_LIBRARY_PATH set during ./configure time]) +])# _LT_SYS_DYNAMIC_LINKER + + +# _LT_PATH_TOOL_PREFIX(TOOL) +# -------------------------- +# find a file program that can recognize shared library +AC_DEFUN([_LT_PATH_TOOL_PREFIX], +[m4_require([_LT_DECL_EGREP])dnl +AC_MSG_CHECKING([for $1]) +AC_CACHE_VAL(lt_cv_path_MAGIC_CMD, +[case $MAGIC_CMD in +[[\\/*] | ?:[\\/]*]) + lt_cv_path_MAGIC_CMD=$MAGIC_CMD # Let the user override the test with a path. + ;; +*) + lt_save_MAGIC_CMD=$MAGIC_CMD + lt_save_ifs=$IFS; IFS=$PATH_SEPARATOR +dnl $ac_dummy forces splitting on constant user-supplied paths. +dnl POSIX.2 word splitting is done only on the output of word expansions, +dnl not every word. This closes a longstanding sh security hole. + ac_dummy="m4_if([$2], , $PATH, [$2])" + for ac_dir in $ac_dummy; do + IFS=$lt_save_ifs + test -z "$ac_dir" && ac_dir=. + if test -f "$ac_dir/$1"; then + lt_cv_path_MAGIC_CMD=$ac_dir/"$1" + if test -n "$file_magic_test_file"; then + case $deplibs_check_method in + "file_magic "*) + file_magic_regex=`expr "$deplibs_check_method" : "file_magic \(.*\)"` + MAGIC_CMD=$lt_cv_path_MAGIC_CMD + if eval $file_magic_cmd \$file_magic_test_file 2> /dev/null | + $EGREP "$file_magic_regex" > /dev/null; then + : + else + cat <<_LT_EOF 1>&2 + +*** Warning: the command libtool uses to detect shared libraries, +*** $file_magic_cmd, produces output that libtool cannot recognize. +*** The result is that libtool may fail to recognize shared libraries +*** as such. This will affect the creation of libtool libraries that +*** depend on shared libraries, but programs linked with such libtool +*** libraries will work regardless of this problem. Nevertheless, you +*** may want to report the problem to your system manager and/or to +*** bug-libtool@gnu.org + +_LT_EOF + fi ;; + esac + fi + break + fi + done + IFS=$lt_save_ifs + MAGIC_CMD=$lt_save_MAGIC_CMD + ;; +esac]) +MAGIC_CMD=$lt_cv_path_MAGIC_CMD +if test -n "$MAGIC_CMD"; then + AC_MSG_RESULT($MAGIC_CMD) +else + AC_MSG_RESULT(no) +fi +_LT_DECL([], [MAGIC_CMD], [0], + [Used to examine libraries when file_magic_cmd begins with "file"])dnl +])# _LT_PATH_TOOL_PREFIX + +# Old name: +AU_ALIAS([AC_PATH_TOOL_PREFIX], [_LT_PATH_TOOL_PREFIX]) +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AC_PATH_TOOL_PREFIX], []) + + +# _LT_PATH_MAGIC +# -------------- +# find a file program that can recognize a shared library +m4_defun([_LT_PATH_MAGIC], +[_LT_PATH_TOOL_PREFIX(${ac_tool_prefix}file, /usr/bin$PATH_SEPARATOR$PATH) +if test -z "$lt_cv_path_MAGIC_CMD"; then + if test -n "$ac_tool_prefix"; then + _LT_PATH_TOOL_PREFIX(file, /usr/bin$PATH_SEPARATOR$PATH) + else + MAGIC_CMD=: + fi +fi +])# _LT_PATH_MAGIC + + +# LT_PATH_LD +# ---------- +# find the pathname to the GNU or non-GNU linker +AC_DEFUN([LT_PATH_LD], +[AC_REQUIRE([AC_PROG_CC])dnl +AC_REQUIRE([AC_CANONICAL_HOST])dnl +AC_REQUIRE([AC_CANONICAL_BUILD])dnl +m4_require([_LT_DECL_SED])dnl +m4_require([_LT_DECL_EGREP])dnl +m4_require([_LT_PROG_ECHO_BACKSLASH])dnl + +AC_ARG_WITH([gnu-ld], + [AS_HELP_STRING([--with-gnu-ld], + [assume the C compiler uses GNU ld @<:@default=no@:>@])], + [test no = "$withval" || with_gnu_ld=yes], + [with_gnu_ld=no])dnl + +ac_prog=ld +if test yes = "$GCC"; then + # Check if gcc -print-prog-name=ld gives a path. + AC_MSG_CHECKING([for ld used by $CC]) + case $host in + *-*-mingw*) + # gcc leaves a trailing carriage return, which upsets mingw + ac_prog=`($CC -print-prog-name=ld) 2>&5 | tr -d '\015'` ;; + *) + ac_prog=`($CC -print-prog-name=ld) 2>&5` ;; + esac + case $ac_prog in + # Accept absolute paths. + [[\\/]]* | ?:[[\\/]]*) + re_direlt='/[[^/]][[^/]]*/\.\./' + # Canonicalize the pathname of ld + ac_prog=`$ECHO "$ac_prog"| $SED 's%\\\\%/%g'` + while $ECHO "$ac_prog" | $GREP "$re_direlt" > /dev/null 2>&1; do + ac_prog=`$ECHO $ac_prog| $SED "s%$re_direlt%/%"` + done + test -z "$LD" && LD=$ac_prog + ;; + "") + # If it fails, then pretend we aren't using GCC. + ac_prog=ld + ;; + *) + # If it is relative, then search for the first ld in PATH. + with_gnu_ld=unknown + ;; + esac +elif test yes = "$with_gnu_ld"; then + AC_MSG_CHECKING([for GNU ld]) +else + AC_MSG_CHECKING([for non-GNU ld]) +fi +AC_CACHE_VAL(lt_cv_path_LD, +[if test -z "$LD"; then + lt_save_ifs=$IFS; IFS=$PATH_SEPARATOR + for ac_dir in $PATH; do + IFS=$lt_save_ifs + test -z "$ac_dir" && ac_dir=. + if test -f "$ac_dir/$ac_prog" || test -f "$ac_dir/$ac_prog$ac_exeext"; then + lt_cv_path_LD=$ac_dir/$ac_prog + # Check to see if the program is GNU ld. I'd rather use --version, + # but apparently some variants of GNU ld only accept -v. + # Break only if it was the GNU/non-GNU ld that we prefer. + case `"$lt_cv_path_LD" -v 2>&1 &1 conftest.i +cat conftest.i conftest.i >conftest2.i +: ${lt_DD:=$DD} +AC_PATH_PROGS_FEATURE_CHECK([lt_DD], [dd], +[if "$ac_path_lt_DD" bs=32 count=1 conftest.out 2>/dev/null; then + cmp -s conftest.i conftest.out \ + && ac_cv_path_lt_DD="$ac_path_lt_DD" ac_path_lt_DD_found=: +fi]) +rm -f conftest.i conftest2.i conftest.out]) +])# _LT_PATH_DD + + +# _LT_CMD_TRUNCATE +# ---------------- +# find command to truncate a binary pipe +m4_defun([_LT_CMD_TRUNCATE], +[m4_require([_LT_PATH_DD]) +AC_CACHE_CHECK([how to truncate binary pipes], [lt_cv_truncate_bin], +[printf 0123456789abcdef0123456789abcdef >conftest.i +cat conftest.i conftest.i >conftest2.i +lt_cv_truncate_bin= +if "$ac_cv_path_lt_DD" bs=32 count=1 conftest.out 2>/dev/null; then + cmp -s conftest.i conftest.out \ + && lt_cv_truncate_bin="$ac_cv_path_lt_DD bs=4096 count=1" +fi +rm -f conftest.i conftest2.i conftest.out +test -z "$lt_cv_truncate_bin" && lt_cv_truncate_bin="$SED -e 4q"]) +_LT_DECL([lt_truncate_bin], [lt_cv_truncate_bin], [1], + [Command to truncate a binary pipe]) +])# _LT_CMD_TRUNCATE + + +# _LT_CHECK_MAGIC_METHOD +# ---------------------- +# how to check for library dependencies +# -- PORTME fill in with the dynamic library characteristics +m4_defun([_LT_CHECK_MAGIC_METHOD], +[m4_require([_LT_DECL_EGREP]) +m4_require([_LT_DECL_OBJDUMP]) +AC_CACHE_CHECK([how to recognize dependent libraries], +lt_cv_deplibs_check_method, +[lt_cv_file_magic_cmd='$MAGIC_CMD' +lt_cv_file_magic_test_file= +lt_cv_deplibs_check_method='unknown' +# Need to set the preceding variable on all platforms that support +# interlibrary dependencies. +# 'none' -- dependencies not supported. +# 'unknown' -- same as none, but documents that we really don't know. +# 'pass_all' -- all dependencies passed with no checks. +# 'test_compile' -- check by making test program. +# 'file_magic [[regex]]' -- check by looking for files in library path +# that responds to the $file_magic_cmd with a given extended regex. +# If you have 'file' or equivalent on your system and you're not sure +# whether 'pass_all' will *always* work, you probably want this one. + +case $host_os in +aix[[4-9]]*) + lt_cv_deplibs_check_method=pass_all + ;; + +beos*) + lt_cv_deplibs_check_method=pass_all + ;; + +bsdi[[45]]*) + lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[ML]]SB (shared object|dynamic lib)' + lt_cv_file_magic_cmd='$FILECMD -L' + lt_cv_file_magic_test_file=/shlib/libc.so + ;; + +cygwin*) + # func_win32_libid is a shell function defined in ltmain.sh + lt_cv_deplibs_check_method='file_magic ^x86 archive import|^x86 DLL' + lt_cv_file_magic_cmd='func_win32_libid' + ;; + +mingw* | pw32*) + # Base MSYS/MinGW do not provide the 'file' command needed by + # func_win32_libid shell function, so use a weaker test based on 'objdump', + # unless we find 'file', for example because we are cross-compiling. + if ( file / ) >/dev/null 2>&1; then + lt_cv_deplibs_check_method='file_magic ^x86 archive import|^x86 DLL' + lt_cv_file_magic_cmd='func_win32_libid' + else + # Keep this pattern in sync with the one in func_win32_libid. + lt_cv_deplibs_check_method='file_magic file format (pei*-i386(.*architecture: i386)?|pe-arm-wince|pe-x86-64)' + lt_cv_file_magic_cmd='$OBJDUMP -f' + fi + ;; + +cegcc*) + # use the weaker test based on 'objdump'. See mingw*. + lt_cv_deplibs_check_method='file_magic file format pe-arm-.*little(.*architecture: arm)?' + lt_cv_file_magic_cmd='$OBJDUMP -f' + ;; + +darwin* | rhapsody*) + lt_cv_deplibs_check_method=pass_all + ;; + +freebsd* | dragonfly* | midnightbsd*) + if echo __ELF__ | $CC -E - | $GREP __ELF__ > /dev/null; then + case $host_cpu in + i*86 ) + # Not sure whether the presence of OpenBSD here was a mistake. + # Let's accept both of them until this is cleared up. + lt_cv_deplibs_check_method='file_magic (FreeBSD|OpenBSD|DragonFly)/i[[3-9]]86 (compact )?demand paged shared library' + lt_cv_file_magic_cmd=$FILECMD + lt_cv_file_magic_test_file=`echo /usr/lib/libc.so.*` + ;; + esac + else + lt_cv_deplibs_check_method=pass_all + fi + ;; + +haiku*) + lt_cv_deplibs_check_method=pass_all + ;; + +hpux10.20* | hpux11*) + lt_cv_file_magic_cmd=$FILECMD + case $host_cpu in + ia64*) + lt_cv_deplibs_check_method='file_magic (s[[0-9]][[0-9]][[0-9]]|ELF-[[0-9]][[0-9]]) shared object file - IA64' + lt_cv_file_magic_test_file=/usr/lib/hpux32/libc.so + ;; + hppa*64*) + [lt_cv_deplibs_check_method='file_magic (s[0-9][0-9][0-9]|ELF[ -][0-9][0-9])(-bit)?( [LM]SB)? shared object( file)?[, -]* PA-RISC [0-9]\.[0-9]'] + lt_cv_file_magic_test_file=/usr/lib/pa20_64/libc.sl + ;; + *) + lt_cv_deplibs_check_method='file_magic (s[[0-9]][[0-9]][[0-9]]|PA-RISC[[0-9]]\.[[0-9]]) shared library' + lt_cv_file_magic_test_file=/usr/lib/libc.sl + ;; + esac + ;; + +interix[[3-9]]*) + # PIC code is broken on Interix 3.x, that's why |\.a not |_pic\.a here + lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so|\.a)$' + ;; + +irix5* | irix6* | nonstopux*) + case $LD in + *-32|*"-32 ") libmagic=32-bit;; + *-n32|*"-n32 ") libmagic=N32;; + *-64|*"-64 ") libmagic=64-bit;; + *) libmagic=never-match;; + esac + lt_cv_deplibs_check_method=pass_all + ;; + +# This must be glibc/ELF. +linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*) + lt_cv_deplibs_check_method=pass_all + ;; + +netbsd* | netbsdelf*-gnu) + if echo __ELF__ | $CC -E - | $GREP __ELF__ > /dev/null; then + lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so\.[[0-9]]+\.[[0-9]]+|_pic\.a)$' + else + lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so|_pic\.a)$' + fi + ;; + +newos6*) + lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[ML]]SB (executable|dynamic lib)' + lt_cv_file_magic_cmd=$FILECMD + lt_cv_file_magic_test_file=/usr/lib/libnls.so + ;; + +*nto* | *qnx*) + lt_cv_deplibs_check_method=pass_all + ;; + +openbsd* | bitrig*) + if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`"; then + lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so\.[[0-9]]+\.[[0-9]]+|\.so|_pic\.a)$' + else + lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so\.[[0-9]]+\.[[0-9]]+|_pic\.a)$' + fi + ;; + +osf3* | osf4* | osf5*) + lt_cv_deplibs_check_method=pass_all + ;; + +rdos*) + lt_cv_deplibs_check_method=pass_all + ;; + +solaris*) + lt_cv_deplibs_check_method=pass_all + ;; + +sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*) + lt_cv_deplibs_check_method=pass_all + ;; + +sysv4 | sysv4.3*) + case $host_vendor in + motorola) + lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[ML]]SB (shared object|dynamic lib) M[[0-9]][[0-9]]* Version [[0-9]]' + lt_cv_file_magic_test_file=`echo /usr/lib/libc.so*` + ;; + ncr) + lt_cv_deplibs_check_method=pass_all + ;; + sequent) + lt_cv_file_magic_cmd='/bin/file' + lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[LM]]SB (shared object|dynamic lib )' + ;; + sni) + lt_cv_file_magic_cmd='/bin/file' + lt_cv_deplibs_check_method="file_magic ELF [[0-9]][[0-9]]*-bit [[LM]]SB dynamic lib" + lt_cv_file_magic_test_file=/lib/libc.so + ;; + siemens) + lt_cv_deplibs_check_method=pass_all + ;; + pc) + lt_cv_deplibs_check_method=pass_all + ;; + esac + ;; + +tpf*) + lt_cv_deplibs_check_method=pass_all + ;; +os2*) + lt_cv_deplibs_check_method=pass_all + ;; +esac +]) + +file_magic_glob= +want_nocaseglob=no +if test "$build" = "$host"; then + case $host_os in + mingw* | pw32*) + if ( shopt | grep nocaseglob ) >/dev/null 2>&1; then + want_nocaseglob=yes + else + file_magic_glob=`echo aAbBcCdDeEfFgGhHiIjJkKlLmMnNoOpPqQrRsStTuUvVwWxXyYzZ | $SED -e "s/\(..\)/s\/[[\1]]\/[[\1]]\/g;/g"` + fi + ;; + esac +fi + +file_magic_cmd=$lt_cv_file_magic_cmd +deplibs_check_method=$lt_cv_deplibs_check_method +test -z "$deplibs_check_method" && deplibs_check_method=unknown + +_LT_DECL([], [deplibs_check_method], [1], + [Method to check whether dependent libraries are shared objects]) +_LT_DECL([], [file_magic_cmd], [1], + [Command to use when deplibs_check_method = "file_magic"]) +_LT_DECL([], [file_magic_glob], [1], + [How to find potential files when deplibs_check_method = "file_magic"]) +_LT_DECL([], [want_nocaseglob], [1], + [Find potential files using nocaseglob when deplibs_check_method = "file_magic"]) +])# _LT_CHECK_MAGIC_METHOD + + +# LT_PATH_NM +# ---------- +# find the pathname to a BSD- or MS-compatible name lister +AC_DEFUN([LT_PATH_NM], +[AC_REQUIRE([AC_PROG_CC])dnl +AC_CACHE_CHECK([for BSD- or MS-compatible name lister (nm)], lt_cv_path_NM, +[if test -n "$NM"; then + # Let the user override the test. + lt_cv_path_NM=$NM +else + lt_nm_to_check=${ac_tool_prefix}nm + if test -n "$ac_tool_prefix" && test "$build" = "$host"; then + lt_nm_to_check="$lt_nm_to_check nm" + fi + for lt_tmp_nm in $lt_nm_to_check; do + lt_save_ifs=$IFS; IFS=$PATH_SEPARATOR + for ac_dir in $PATH /usr/ccs/bin/elf /usr/ccs/bin /usr/ucb /bin; do + IFS=$lt_save_ifs + test -z "$ac_dir" && ac_dir=. + tmp_nm=$ac_dir/$lt_tmp_nm + if test -f "$tmp_nm" || test -f "$tmp_nm$ac_exeext"; then + # Check to see if the nm accepts a BSD-compat flag. + # Adding the 'sed 1q' prevents false positives on HP-UX, which says: + # nm: unknown option "B" ignored + # Tru64's nm complains that /dev/null is an invalid object file + # MSYS converts /dev/null to NUL, MinGW nm treats NUL as empty + case $build_os in + mingw*) lt_bad_file=conftest.nm/nofile ;; + *) lt_bad_file=/dev/null ;; + esac + case `"$tmp_nm" -B $lt_bad_file 2>&1 | $SED '1q'` in + *$lt_bad_file* | *'Invalid file or object type'*) + lt_cv_path_NM="$tmp_nm -B" + break 2 + ;; + *) + case `"$tmp_nm" -p /dev/null 2>&1 | $SED '1q'` in + */dev/null*) + lt_cv_path_NM="$tmp_nm -p" + break 2 + ;; + *) + lt_cv_path_NM=${lt_cv_path_NM="$tmp_nm"} # keep the first match, but + continue # so that we can try to find one that supports BSD flags + ;; + esac + ;; + esac + fi + done + IFS=$lt_save_ifs + done + : ${lt_cv_path_NM=no} +fi]) +if test no != "$lt_cv_path_NM"; then + NM=$lt_cv_path_NM +else + # Didn't find any BSD compatible name lister, look for dumpbin. + if test -n "$DUMPBIN"; then : + # Let the user override the test. + else + AC_CHECK_TOOLS(DUMPBIN, [dumpbin "link -dump"], :) + case `$DUMPBIN -symbols -headers /dev/null 2>&1 | $SED '1q'` in + *COFF*) + DUMPBIN="$DUMPBIN -symbols -headers" + ;; + *) + DUMPBIN=: + ;; + esac + fi + AC_SUBST([DUMPBIN]) + if test : != "$DUMPBIN"; then + NM=$DUMPBIN + fi +fi +test -z "$NM" && NM=nm +AC_SUBST([NM]) +_LT_DECL([], [NM], [1], [A BSD- or MS-compatible name lister])dnl + +AC_CACHE_CHECK([the name lister ($NM) interface], [lt_cv_nm_interface], + [lt_cv_nm_interface="BSD nm" + echo "int some_variable = 0;" > conftest.$ac_ext + (eval echo "\"\$as_me:$LINENO: $ac_compile\"" >&AS_MESSAGE_LOG_FD) + (eval "$ac_compile" 2>conftest.err) + cat conftest.err >&AS_MESSAGE_LOG_FD + (eval echo "\"\$as_me:$LINENO: $NM \\\"conftest.$ac_objext\\\"\"" >&AS_MESSAGE_LOG_FD) + (eval "$NM \"conftest.$ac_objext\"" 2>conftest.err > conftest.out) + cat conftest.err >&AS_MESSAGE_LOG_FD + (eval echo "\"\$as_me:$LINENO: output\"" >&AS_MESSAGE_LOG_FD) + cat conftest.out >&AS_MESSAGE_LOG_FD + if $GREP 'External.*some_variable' conftest.out > /dev/null; then + lt_cv_nm_interface="MS dumpbin" + fi + rm -f conftest*]) +])# LT_PATH_NM + +# Old names: +AU_ALIAS([AM_PROG_NM], [LT_PATH_NM]) +AU_ALIAS([AC_PROG_NM], [LT_PATH_NM]) +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AM_PROG_NM], []) +dnl AC_DEFUN([AC_PROG_NM], []) + +# _LT_CHECK_SHAREDLIB_FROM_LINKLIB +# -------------------------------- +# how to determine the name of the shared library +# associated with a specific link library. +# -- PORTME fill in with the dynamic library characteristics +m4_defun([_LT_CHECK_SHAREDLIB_FROM_LINKLIB], +[m4_require([_LT_DECL_EGREP]) +m4_require([_LT_DECL_OBJDUMP]) +m4_require([_LT_DECL_DLLTOOL]) +AC_CACHE_CHECK([how to associate runtime and link libraries], +lt_cv_sharedlib_from_linklib_cmd, +[lt_cv_sharedlib_from_linklib_cmd='unknown' + +case $host_os in +cygwin* | mingw* | pw32* | cegcc*) + # two different shell functions defined in ltmain.sh; + # decide which one to use based on capabilities of $DLLTOOL + case `$DLLTOOL --help 2>&1` in + *--identify-strict*) + lt_cv_sharedlib_from_linklib_cmd=func_cygming_dll_for_implib + ;; + *) + lt_cv_sharedlib_from_linklib_cmd=func_cygming_dll_for_implib_fallback + ;; + esac + ;; +*) + # fallback: assume linklib IS sharedlib + lt_cv_sharedlib_from_linklib_cmd=$ECHO + ;; +esac +]) +sharedlib_from_linklib_cmd=$lt_cv_sharedlib_from_linklib_cmd +test -z "$sharedlib_from_linklib_cmd" && sharedlib_from_linklib_cmd=$ECHO + +_LT_DECL([], [sharedlib_from_linklib_cmd], [1], + [Command to associate shared and link libraries]) +])# _LT_CHECK_SHAREDLIB_FROM_LINKLIB + + +# _LT_PATH_MANIFEST_TOOL +# ---------------------- +# locate the manifest tool +m4_defun([_LT_PATH_MANIFEST_TOOL], +[AC_CHECK_TOOL(MANIFEST_TOOL, mt, :) +test -z "$MANIFEST_TOOL" && MANIFEST_TOOL=mt +AC_CACHE_CHECK([if $MANIFEST_TOOL is a manifest tool], [lt_cv_path_mainfest_tool], + [lt_cv_path_mainfest_tool=no + echo "$as_me:$LINENO: $MANIFEST_TOOL '-?'" >&AS_MESSAGE_LOG_FD + $MANIFEST_TOOL '-?' 2>conftest.err > conftest.out + cat conftest.err >&AS_MESSAGE_LOG_FD + if $GREP 'Manifest Tool' conftest.out > /dev/null; then + lt_cv_path_mainfest_tool=yes + fi + rm -f conftest*]) +if test yes != "$lt_cv_path_mainfest_tool"; then + MANIFEST_TOOL=: +fi +_LT_DECL([], [MANIFEST_TOOL], [1], [Manifest tool])dnl +])# _LT_PATH_MANIFEST_TOOL + + +# _LT_DLL_DEF_P([FILE]) +# --------------------- +# True iff FILE is a Windows DLL '.def' file. +# Keep in sync with func_dll_def_p in the libtool script +AC_DEFUN([_LT_DLL_DEF_P], +[dnl + test DEF = "`$SED -n dnl + -e '\''s/^[[ ]]*//'\'' dnl Strip leading whitespace + -e '\''/^\(;.*\)*$/d'\'' dnl Delete empty lines and comments + -e '\''s/^\(EXPORTS\|LIBRARY\)\([[ ]].*\)*$/DEF/p'\'' dnl + -e q dnl Only consider the first "real" line + $1`" dnl +])# _LT_DLL_DEF_P + + +# LT_LIB_M +# -------- +# check for math library +AC_DEFUN([LT_LIB_M], +[AC_REQUIRE([AC_CANONICAL_HOST])dnl +LIBM= +case $host in +*-*-beos* | *-*-cegcc* | *-*-cygwin* | *-*-haiku* | *-*-pw32* | *-*-darwin*) + # These system don't have libm, or don't need it + ;; +*-ncr-sysv4.3*) + AC_CHECK_LIB(mw, _mwvalidcheckl, LIBM=-lmw) + AC_CHECK_LIB(m, cos, LIBM="$LIBM -lm") + ;; +*) + AC_CHECK_LIB(m, cos, LIBM=-lm) + ;; +esac +AC_SUBST([LIBM]) +])# LT_LIB_M + +# Old name: +AU_ALIAS([AC_CHECK_LIBM], [LT_LIB_M]) +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AC_CHECK_LIBM], []) + + +# _LT_COMPILER_NO_RTTI([TAGNAME]) +# ------------------------------- +m4_defun([_LT_COMPILER_NO_RTTI], +[m4_require([_LT_TAG_COMPILER])dnl + +_LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)= + +if test yes = "$GCC"; then + case $cc_basename in + nvcc*) + _LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)=' -Xcompiler -fno-builtin' ;; + *) + _LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)=' -fno-builtin' ;; + esac + + _LT_COMPILER_OPTION([if $compiler supports -fno-rtti -fno-exceptions], + lt_cv_prog_compiler_rtti_exceptions, + [-fno-rtti -fno-exceptions], [], + [_LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)="$_LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1) -fno-rtti -fno-exceptions"]) +fi +_LT_TAGDECL([no_builtin_flag], [lt_prog_compiler_no_builtin_flag], [1], + [Compiler flag to turn off builtin functions]) +])# _LT_COMPILER_NO_RTTI + + +# _LT_CMD_GLOBAL_SYMBOLS +# ---------------------- +m4_defun([_LT_CMD_GLOBAL_SYMBOLS], +[AC_REQUIRE([AC_CANONICAL_HOST])dnl +AC_REQUIRE([AC_PROG_CC])dnl +AC_REQUIRE([AC_PROG_AWK])dnl +AC_REQUIRE([LT_PATH_NM])dnl +AC_REQUIRE([LT_PATH_LD])dnl +m4_require([_LT_DECL_SED])dnl +m4_require([_LT_DECL_EGREP])dnl +m4_require([_LT_TAG_COMPILER])dnl + +# Check for command to grab the raw symbol name followed by C symbol from nm. +AC_MSG_CHECKING([command to parse $NM output from $compiler object]) +AC_CACHE_VAL([lt_cv_sys_global_symbol_pipe], +[ +# These are sane defaults that work on at least a few old systems. +# [They come from Ultrix. What could be older than Ultrix?!! ;)] + +# Character class describing NM global symbol codes. +symcode='[[BCDEGRST]]' + +# Regexp to match symbols that can be accessed directly from C. +sympat='\([[_A-Za-z]][[_A-Za-z0-9]]*\)' + +# Define system-specific variables. +case $host_os in +aix*) + symcode='[[BCDT]]' + ;; +cygwin* | mingw* | pw32* | cegcc*) + symcode='[[ABCDGISTW]]' + ;; +hpux*) + if test ia64 = "$host_cpu"; then + symcode='[[ABCDEGRST]]' + fi + ;; +irix* | nonstopux*) + symcode='[[BCDEGRST]]' + ;; +osf*) + symcode='[[BCDEGQRST]]' + ;; +solaris*) + symcode='[[BDRT]]' + ;; +sco3.2v5*) + symcode='[[DT]]' + ;; +sysv4.2uw2*) + symcode='[[DT]]' + ;; +sysv5* | sco5v6* | unixware* | OpenUNIX*) + symcode='[[ABDT]]' + ;; +sysv4) + symcode='[[DFNSTU]]' + ;; +esac + +# If we're using GNU nm, then use its standard symbol codes. +case `$NM -V 2>&1` in +*GNU* | *'with BFD'*) + symcode='[[ABCDGIRSTW]]' ;; +esac + +if test "$lt_cv_nm_interface" = "MS dumpbin"; then + # Gets list of data symbols to import. + lt_cv_sys_global_symbol_to_import="$SED -n -e 's/^I .* \(.*\)$/\1/p'" + # Adjust the below global symbol transforms to fixup imported variables. + lt_cdecl_hook=" -e 's/^I .* \(.*\)$/extern __declspec(dllimport) char \1;/p'" + lt_c_name_hook=" -e 's/^I .* \(.*\)$/ {\"\1\", (void *) 0},/p'" + lt_c_name_lib_hook="\ + -e 's/^I .* \(lib.*\)$/ {\"\1\", (void *) 0},/p'\ + -e 's/^I .* \(.*\)$/ {\"lib\1\", (void *) 0},/p'" +else + # Disable hooks by default. + lt_cv_sys_global_symbol_to_import= + lt_cdecl_hook= + lt_c_name_hook= + lt_c_name_lib_hook= +fi + +# Transform an extracted symbol line into a proper C declaration. +# Some systems (esp. on ia64) link data and code symbols differently, +# so use this general approach. +lt_cv_sys_global_symbol_to_cdecl="$SED -n"\ +$lt_cdecl_hook\ +" -e 's/^T .* \(.*\)$/extern int \1();/p'"\ +" -e 's/^$symcode$symcode* .* \(.*\)$/extern char \1;/p'" + +# Transform an extracted symbol line into symbol name and symbol address +lt_cv_sys_global_symbol_to_c_name_address="$SED -n"\ +$lt_c_name_hook\ +" -e 's/^: \(.*\) .*$/ {\"\1\", (void *) 0},/p'"\ +" -e 's/^$symcode$symcode* .* \(.*\)$/ {\"\1\", (void *) \&\1},/p'" + +# Transform an extracted symbol line into symbol name with lib prefix and +# symbol address. +lt_cv_sys_global_symbol_to_c_name_address_lib_prefix="$SED -n"\ +$lt_c_name_lib_hook\ +" -e 's/^: \(.*\) .*$/ {\"\1\", (void *) 0},/p'"\ +" -e 's/^$symcode$symcode* .* \(lib.*\)$/ {\"\1\", (void *) \&\1},/p'"\ +" -e 's/^$symcode$symcode* .* \(.*\)$/ {\"lib\1\", (void *) \&\1},/p'" + +# Handle CRLF in mingw tool chain +opt_cr= +case $build_os in +mingw*) + opt_cr=`$ECHO 'x\{0,1\}' | tr x '\015'` # option cr in regexp + ;; +esac + +# Try without a prefix underscore, then with it. +for ac_symprfx in "" "_"; do + + # Transform symcode, sympat, and symprfx into a raw symbol and a C symbol. + symxfrm="\\1 $ac_symprfx\\2 \\2" + + # Write the raw and C identifiers. + if test "$lt_cv_nm_interface" = "MS dumpbin"; then + # Fake it for dumpbin and say T for any non-static function, + # D for any global variable and I for any imported variable. + # Also find C++ and __fastcall symbols from MSVC++ or ICC, + # which start with @ or ?. + lt_cv_sys_global_symbol_pipe="$AWK ['"\ +" {last_section=section; section=\$ 3};"\ +" /^COFF SYMBOL TABLE/{for(i in hide) delete hide[i]};"\ +" /Section length .*#relocs.*(pick any)/{hide[last_section]=1};"\ +" /^ *Symbol name *: /{split(\$ 0,sn,\":\"); si=substr(sn[2],2)};"\ +" /^ *Type *: code/{print \"T\",si,substr(si,length(prfx))};"\ +" /^ *Type *: data/{print \"I\",si,substr(si,length(prfx))};"\ +" \$ 0!~/External *\|/{next};"\ +" / 0+ UNDEF /{next}; / UNDEF \([^|]\)*()/{next};"\ +" {if(hide[section]) next};"\ +" {f=\"D\"}; \$ 0~/\(\).*\|/{f=\"T\"};"\ +" {split(\$ 0,a,/\||\r/); split(a[2],s)};"\ +" s[1]~/^[@?]/{print f,s[1],s[1]; next};"\ +" s[1]~prfx {split(s[1],t,\"@\"); print f,t[1],substr(t[1],length(prfx))}"\ +" ' prfx=^$ac_symprfx]" + else + lt_cv_sys_global_symbol_pipe="$SED -n -e 's/^.*[[ ]]\($symcode$symcode*\)[[ ]][[ ]]*$ac_symprfx$sympat$opt_cr$/$symxfrm/p'" + fi + lt_cv_sys_global_symbol_pipe="$lt_cv_sys_global_symbol_pipe | $SED '/ __gnu_lto/d'" + + # Check to see that the pipe works correctly. + pipe_works=no + + rm -f conftest* + cat > conftest.$ac_ext <<_LT_EOF +#ifdef __cplusplus +extern "C" { +#endif +char nm_test_var; +void nm_test_func(void); +void nm_test_func(void){} +#ifdef __cplusplus +} +#endif +int main(){nm_test_var='a';nm_test_func();return(0);} +_LT_EOF + + if AC_TRY_EVAL(ac_compile); then + # Now try to grab the symbols. + nlist=conftest.nm + $ECHO "$as_me:$LINENO: $NM conftest.$ac_objext | $lt_cv_sys_global_symbol_pipe > $nlist" >&AS_MESSAGE_LOG_FD + if eval "$NM" conftest.$ac_objext \| "$lt_cv_sys_global_symbol_pipe" \> $nlist 2>&AS_MESSAGE_LOG_FD && test -s "$nlist"; then + # Try sorting and uniquifying the output. + if sort "$nlist" | uniq > "$nlist"T; then + mv -f "$nlist"T "$nlist" + else + rm -f "$nlist"T + fi + + # Make sure that we snagged all the symbols we need. + if $GREP ' nm_test_var$' "$nlist" >/dev/null; then + if $GREP ' nm_test_func$' "$nlist" >/dev/null; then + cat <<_LT_EOF > conftest.$ac_ext +/* Keep this code in sync between libtool.m4, ltmain, lt_system.h, and tests. */ +#if defined _WIN32 || defined __CYGWIN__ || defined _WIN32_WCE +/* DATA imports from DLLs on WIN32 can't be const, because runtime + relocations are performed -- see ld's documentation on pseudo-relocs. */ +# define LT@&t@_DLSYM_CONST +#elif defined __osf__ +/* This system does not cope well with relocations in const data. */ +# define LT@&t@_DLSYM_CONST +#else +# define LT@&t@_DLSYM_CONST const +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +_LT_EOF + # Now generate the symbol file. + eval "$lt_cv_sys_global_symbol_to_cdecl"' < "$nlist" | $GREP -v main >> conftest.$ac_ext' + + cat <<_LT_EOF >> conftest.$ac_ext + +/* The mapping between symbol names and symbols. */ +LT@&t@_DLSYM_CONST struct { + const char *name; + void *address; +} +lt__PROGRAM__LTX_preloaded_symbols[[]] = +{ + { "@PROGRAM@", (void *) 0 }, +_LT_EOF + $SED "s/^$symcode$symcode* .* \(.*\)$/ {\"\1\", (void *) \&\1},/" < "$nlist" | $GREP -v main >> conftest.$ac_ext + cat <<\_LT_EOF >> conftest.$ac_ext + {0, (void *) 0} +}; + +/* This works around a problem in FreeBSD linker */ +#ifdef FREEBSD_WORKAROUND +static const void *lt_preloaded_setup() { + return lt__PROGRAM__LTX_preloaded_symbols; +} +#endif + +#ifdef __cplusplus +} +#endif +_LT_EOF + # Now try linking the two files. + mv conftest.$ac_objext conftstm.$ac_objext + lt_globsym_save_LIBS=$LIBS + lt_globsym_save_CFLAGS=$CFLAGS + LIBS=conftstm.$ac_objext + CFLAGS="$CFLAGS$_LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)" + if AC_TRY_EVAL(ac_link) && test -s conftest$ac_exeext; then + pipe_works=yes + fi + LIBS=$lt_globsym_save_LIBS + CFLAGS=$lt_globsym_save_CFLAGS + else + echo "cannot find nm_test_func in $nlist" >&AS_MESSAGE_LOG_FD + fi + else + echo "cannot find nm_test_var in $nlist" >&AS_MESSAGE_LOG_FD + fi + else + echo "cannot run $lt_cv_sys_global_symbol_pipe" >&AS_MESSAGE_LOG_FD + fi + else + echo "$progname: failed program was:" >&AS_MESSAGE_LOG_FD + cat conftest.$ac_ext >&5 + fi + rm -rf conftest* conftst* + + # Do not use the global_symbol_pipe unless it works. + if test yes = "$pipe_works"; then + break + else + lt_cv_sys_global_symbol_pipe= + fi +done +]) +if test -z "$lt_cv_sys_global_symbol_pipe"; then + lt_cv_sys_global_symbol_to_cdecl= +fi +if test -z "$lt_cv_sys_global_symbol_pipe$lt_cv_sys_global_symbol_to_cdecl"; then + AC_MSG_RESULT(failed) +else + AC_MSG_RESULT(ok) +fi + +# Response file support. +if test "$lt_cv_nm_interface" = "MS dumpbin"; then + nm_file_list_spec='@' +elif $NM --help 2>/dev/null | grep '[[@]]FILE' >/dev/null; then + nm_file_list_spec='@' +fi + +_LT_DECL([global_symbol_pipe], [lt_cv_sys_global_symbol_pipe], [1], + [Take the output of nm and produce a listing of raw symbols and C names]) +_LT_DECL([global_symbol_to_cdecl], [lt_cv_sys_global_symbol_to_cdecl], [1], + [Transform the output of nm in a proper C declaration]) +_LT_DECL([global_symbol_to_import], [lt_cv_sys_global_symbol_to_import], [1], + [Transform the output of nm into a list of symbols to manually relocate]) +_LT_DECL([global_symbol_to_c_name_address], + [lt_cv_sys_global_symbol_to_c_name_address], [1], + [Transform the output of nm in a C name address pair]) +_LT_DECL([global_symbol_to_c_name_address_lib_prefix], + [lt_cv_sys_global_symbol_to_c_name_address_lib_prefix], [1], + [Transform the output of nm in a C name address pair when lib prefix is needed]) +_LT_DECL([nm_interface], [lt_cv_nm_interface], [1], + [The name lister interface]) +_LT_DECL([], [nm_file_list_spec], [1], + [Specify filename containing input files for $NM]) +]) # _LT_CMD_GLOBAL_SYMBOLS + + +# _LT_COMPILER_PIC([TAGNAME]) +# --------------------------- +m4_defun([_LT_COMPILER_PIC], +[m4_require([_LT_TAG_COMPILER])dnl +_LT_TAGVAR(lt_prog_compiler_wl, $1)= +_LT_TAGVAR(lt_prog_compiler_pic, $1)= +_LT_TAGVAR(lt_prog_compiler_static, $1)= + +m4_if([$1], [CXX], [ + # C++ specific cases for pic, static, wl, etc. + if test yes = "$GXX"; then + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' + + case $host_os in + aix*) + # All AIX code is PIC. + if test ia64 = "$host_cpu"; then + # AIX 5 now supports IA64 processor + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + fi + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' + ;; + + amigaos*) + case $host_cpu in + powerpc) + # see comment about AmigaOS4 .so support + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' + ;; + m68k) + # FIXME: we need at least 68020 code to build shared libraries, but + # adding the '-m68020' flag to GCC prevents building anything better, + # like '-m68040'. + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-m68020 -resident32 -malways-restore-a4' + ;; + esac + ;; + + beos* | irix5* | irix6* | nonstopux* | osf3* | osf4* | osf5*) + # PIC is the default for these OSes. + ;; + mingw* | cygwin* | os2* | pw32* | cegcc*) + # This hack is so that the source file can tell whether it is being + # built for inclusion in a dll (and should export symbols for example). + # Although the cygwin gcc ignores -fPIC, still need this for old-style + # (--disable-auto-import) libraries + m4_if([$1], [GCJ], [], + [_LT_TAGVAR(lt_prog_compiler_pic, $1)='-DDLL_EXPORT']) + case $host_os in + os2*) + _LT_TAGVAR(lt_prog_compiler_static, $1)='$wl-static' + ;; + esac + ;; + darwin* | rhapsody*) + # PIC is the default on this platform + # Common symbols not allowed in MH_DYLIB files + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fno-common' + ;; + *djgpp*) + # DJGPP does not support shared libraries at all + _LT_TAGVAR(lt_prog_compiler_pic, $1)= + ;; + haiku*) + # PIC is the default for Haiku. + # The "-static" flag exists, but is broken. + _LT_TAGVAR(lt_prog_compiler_static, $1)= + ;; + interix[[3-9]]*) + # Interix 3.x gcc -fpic/-fPIC options generate broken code. + # Instead, we relocate shared libraries at runtime. + ;; + sysv4*MP*) + if test -d /usr/nec; then + _LT_TAGVAR(lt_prog_compiler_pic, $1)=-Kconform_pic + fi + ;; + hpux*) + # PIC is the default for 64-bit PA HP-UX, but not for 32-bit + # PA HP-UX. On IA64 HP-UX, PIC is the default but the pic flag + # sets the default TLS model and affects inlining. + case $host_cpu in + hppa*64*) + ;; + *) + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' + ;; + esac + ;; + *qnx* | *nto*) + # QNX uses GNU C++, but need to define -shared option too, otherwise + # it will coredump. + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC -shared' + ;; + *) + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' + ;; + esac + else + case $host_os in + aix[[4-9]]*) + # All AIX code is PIC. + if test ia64 = "$host_cpu"; then + # AIX 5 now supports IA64 processor + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + else + _LT_TAGVAR(lt_prog_compiler_static, $1)='-bnso -bI:/lib/syscalls.exp' + fi + ;; + chorus*) + case $cc_basename in + cxch68*) + # Green Hills C++ Compiler + # _LT_TAGVAR(lt_prog_compiler_static, $1)="--no_auto_instantiation -u __main -u __premain -u _abort -r $COOL_DIR/lib/libOrb.a $MVME_DIR/lib/CC/libC.a $MVME_DIR/lib/classix/libcx.s.a" + ;; + esac + ;; + mingw* | cygwin* | os2* | pw32* | cegcc*) + # This hack is so that the source file can tell whether it is being + # built for inclusion in a dll (and should export symbols for example). + m4_if([$1], [GCJ], [], + [_LT_TAGVAR(lt_prog_compiler_pic, $1)='-DDLL_EXPORT']) + ;; + dgux*) + case $cc_basename in + ec++*) + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + ;; + ghcx*) + # Green Hills C++ Compiler + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-pic' + ;; + *) + ;; + esac + ;; + freebsd* | dragonfly* | midnightbsd*) + # FreeBSD uses GNU C++ + ;; + hpux9* | hpux10* | hpux11*) + case $cc_basename in + CC*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_static, $1)='$wl-a ${wl}archive' + if test ia64 != "$host_cpu"; then + _LT_TAGVAR(lt_prog_compiler_pic, $1)='+Z' + fi + ;; + aCC*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_static, $1)='$wl-a ${wl}archive' + case $host_cpu in + hppa*64*|ia64*) + # +Z the default + ;; + *) + _LT_TAGVAR(lt_prog_compiler_pic, $1)='+Z' + ;; + esac + ;; + *) + ;; + esac + ;; + interix*) + # This is c89, which is MS Visual C++ (no shared libs) + # Anyone wants to do a port? + ;; + irix5* | irix6* | nonstopux*) + case $cc_basename in + CC*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' + # CC pic flag -KPIC is the default. + ;; + *) + ;; + esac + ;; + linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*) + case $cc_basename in + KCC*) + # KAI C++ Compiler + _LT_TAGVAR(lt_prog_compiler_wl, $1)='--backend -Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' + ;; + ecpc* ) + # old Intel C++ for x86_64, which still supported -KPIC. + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' + ;; + icpc* ) + # Intel C++, used to be incompatible with GCC. + # ICC 10 doesn't accept -KPIC any more. + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' + ;; + pgCC* | pgcpp*) + # Portland Group C++ compiler + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fpic' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + ;; + cxx*) + # Compaq C++ + # Make sure the PIC flag is empty. It appears that all Alpha + # Linux and Compaq Tru64 Unix objects are PIC. + _LT_TAGVAR(lt_prog_compiler_pic, $1)= + _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' + ;; + xlc* | xlC* | bgxl[[cC]]* | mpixl[[cC]]*) + # IBM XL 8.0, 9.0 on PPC and BlueGene + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-qpic' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-qstaticlink' + ;; + *) + case `$CC -V 2>&1 | $SED 5q` in + *Sun\ C*) + # Sun C++ 5.9 + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld ' + ;; + esac + ;; + esac + ;; + lynxos*) + ;; + m88k*) + ;; + mvs*) + case $cc_basename in + cxx*) + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-W c,exportall' + ;; + *) + ;; + esac + ;; + netbsd* | netbsdelf*-gnu) + ;; + *qnx* | *nto*) + # QNX uses GNU C++, but need to define -shared option too, otherwise + # it will coredump. + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC -shared' + ;; + osf3* | osf4* | osf5*) + case $cc_basename in + KCC*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='--backend -Wl,' + ;; + RCC*) + # Rational C++ 2.4.1 + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-pic' + ;; + cxx*) + # Digital/Compaq C++ + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + # Make sure the PIC flag is empty. It appears that all Alpha + # Linux and Compaq Tru64 Unix objects are PIC. + _LT_TAGVAR(lt_prog_compiler_pic, $1)= + _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' + ;; + *) + ;; + esac + ;; + psos*) + ;; + solaris*) + case $cc_basename in + CC* | sunCC*) + # Sun C++ 4.2, 5.x and Centerline C++ + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld ' + ;; + gcx*) + # Green Hills C++ Compiler + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-PIC' + ;; + *) + ;; + esac + ;; + sunos4*) + case $cc_basename in + CC*) + # Sun C++ 4.x + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-pic' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + ;; + lcc*) + # Lucid + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-pic' + ;; + *) + ;; + esac + ;; + sysv5* | unixware* | sco3.2v5* | sco5v6* | OpenUNIX*) + case $cc_basename in + CC*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + ;; + esac + ;; + tandem*) + case $cc_basename in + NCC*) + # NonStop-UX NCC 3.20 + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + ;; + *) + ;; + esac + ;; + vxworks*) + ;; + *) + _LT_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no + ;; + esac + fi +], +[ + if test yes = "$GCC"; then + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' + + case $host_os in + aix*) + # All AIX code is PIC. + if test ia64 = "$host_cpu"; then + # AIX 5 now supports IA64 processor + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + fi + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' + ;; + + amigaos*) + case $host_cpu in + powerpc) + # see comment about AmigaOS4 .so support + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' + ;; + m68k) + # FIXME: we need at least 68020 code to build shared libraries, but + # adding the '-m68020' flag to GCC prevents building anything better, + # like '-m68040'. + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-m68020 -resident32 -malways-restore-a4' + ;; + esac + ;; + + beos* | irix5* | irix6* | nonstopux* | osf3* | osf4* | osf5*) + # PIC is the default for these OSes. + ;; + + mingw* | cygwin* | pw32* | os2* | cegcc*) + # This hack is so that the source file can tell whether it is being + # built for inclusion in a dll (and should export symbols for example). + # Although the cygwin gcc ignores -fPIC, still need this for old-style + # (--disable-auto-import) libraries + m4_if([$1], [GCJ], [], + [_LT_TAGVAR(lt_prog_compiler_pic, $1)='-DDLL_EXPORT']) + case $host_os in + os2*) + _LT_TAGVAR(lt_prog_compiler_static, $1)='$wl-static' + ;; + esac + ;; + + darwin* | rhapsody*) + # PIC is the default on this platform + # Common symbols not allowed in MH_DYLIB files + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fno-common' + ;; + + haiku*) + # PIC is the default for Haiku. + # The "-static" flag exists, but is broken. + _LT_TAGVAR(lt_prog_compiler_static, $1)= + ;; + + hpux*) + # PIC is the default for 64-bit PA HP-UX, but not for 32-bit + # PA HP-UX. On IA64 HP-UX, PIC is the default but the pic flag + # sets the default TLS model and affects inlining. + case $host_cpu in + hppa*64*) + # +Z the default + ;; + *) + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' + ;; + esac + ;; + + interix[[3-9]]*) + # Interix 3.x gcc -fpic/-fPIC options generate broken code. + # Instead, we relocate shared libraries at runtime. + ;; + + msdosdjgpp*) + # Just because we use GCC doesn't mean we suddenly get shared libraries + # on systems that don't support them. + _LT_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no + enable_shared=no + ;; + + *nto* | *qnx*) + # QNX uses GNU C++, but need to define -shared option too, otherwise + # it will coredump. + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC -shared' + ;; + + sysv4*MP*) + if test -d /usr/nec; then + _LT_TAGVAR(lt_prog_compiler_pic, $1)=-Kconform_pic + fi + ;; + + *) + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' + ;; + esac + + case $cc_basename in + nvcc*) # Cuda Compiler Driver 2.2 + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Xlinker ' + if test -n "$_LT_TAGVAR(lt_prog_compiler_pic, $1)"; then + _LT_TAGVAR(lt_prog_compiler_pic, $1)="-Xcompiler $_LT_TAGVAR(lt_prog_compiler_pic, $1)" + fi + ;; + esac + else + # PORTME Check for flag to pass linker flags through the system compiler. + case $host_os in + aix*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + if test ia64 = "$host_cpu"; then + # AIX 5 now supports IA64 processor + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + else + _LT_TAGVAR(lt_prog_compiler_static, $1)='-bnso -bI:/lib/syscalls.exp' + fi + ;; + + darwin* | rhapsody*) + # PIC is the default on this platform + # Common symbols not allowed in MH_DYLIB files + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fno-common' + case $cc_basename in + nagfor*) + # NAG Fortran compiler + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,-Wl,,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-PIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + ;; + esac + ;; + + mingw* | cygwin* | pw32* | os2* | cegcc*) + # This hack is so that the source file can tell whether it is being + # built for inclusion in a dll (and should export symbols for example). + m4_if([$1], [GCJ], [], + [_LT_TAGVAR(lt_prog_compiler_pic, $1)='-DDLL_EXPORT']) + case $host_os in + os2*) + _LT_TAGVAR(lt_prog_compiler_static, $1)='$wl-static' + ;; + esac + ;; + + hpux9* | hpux10* | hpux11*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + # PIC is the default for IA64 HP-UX and 64-bit HP-UX, but + # not for PA HP-UX. + case $host_cpu in + hppa*64*|ia64*) + # +Z the default + ;; + *) + _LT_TAGVAR(lt_prog_compiler_pic, $1)='+Z' + ;; + esac + # Is there a better lt_prog_compiler_static that works with the bundled CC? + _LT_TAGVAR(lt_prog_compiler_static, $1)='$wl-a ${wl}archive' + ;; + + irix5* | irix6* | nonstopux*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + # PIC (with -KPIC) is the default. + _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' + ;; + + linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*) + case $cc_basename in + # old Intel for x86_64, which still supported -KPIC. + ecc*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' + ;; + # flang / f18. f95 an alias for gfortran or flang on Debian + flang* | f18* | f95*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' + ;; + # icc used to be incompatible with GCC. + # ICC 10 doesn't accept -KPIC any more. + icc* | ifort*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' + ;; + # Lahey Fortran 8.1. + lf95*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='--shared' + _LT_TAGVAR(lt_prog_compiler_static, $1)='--static' + ;; + nagfor*) + # NAG Fortran compiler + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,-Wl,,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-PIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + ;; + tcc*) + # Fabrice Bellard et al's Tiny C Compiler + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' + ;; + pgcc* | pgf77* | pgf90* | pgf95* | pgfortran*) + # Portland Group compilers (*not* the Pentium gcc compiler, + # which looks to be a dead project) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fpic' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + ;; + ccc*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + # All Alpha code is PIC. + _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' + ;; + xl* | bgxl* | bgf* | mpixl*) + # IBM XL C 8.0/Fortran 10.1, 11.1 on PPC and BlueGene + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-qpic' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-qstaticlink' + ;; + *) + case `$CC -V 2>&1 | $SED 5q` in + *Sun\ Ceres\ Fortran* | *Sun*Fortran*\ [[1-7]].* | *Sun*Fortran*\ 8.[[0-3]]*) + # Sun Fortran 8.3 passes all unrecognized flags to the linker + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + _LT_TAGVAR(lt_prog_compiler_wl, $1)='' + ;; + *Sun\ F* | *Sun*Fortran*) + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld ' + ;; + *Sun\ C*) + # Sun C 5.9 + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + ;; + *Intel*\ [[CF]]*Compiler*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' + ;; + *Portland\ Group*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fpic' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + ;; + esac + ;; + esac + ;; + + newsos6) + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + ;; + + *nto* | *qnx*) + # QNX uses GNU C++, but need to define -shared option too, otherwise + # it will coredump. + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC -shared' + ;; + + osf3* | osf4* | osf5*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + # All OSF/1 code is PIC. + _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' + ;; + + rdos*) + _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' + ;; + + solaris*) + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + case $cc_basename in + f77* | f90* | f95* | sunf77* | sunf90* | sunf95*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld ';; + *) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,';; + esac + ;; + + sunos4*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld ' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-PIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + ;; + + sysv4 | sysv4.2uw2* | sysv4.3*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + ;; + + sysv4*MP*) + if test -d /usr/nec; then + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-Kconform_pic' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + fi + ;; + + sysv5* | unixware* | sco3.2v5* | sco5v6* | OpenUNIX*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + ;; + + unicos*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no + ;; + + uts4*) + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-pic' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + ;; + + *) + _LT_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no + ;; + esac + fi +]) +case $host_os in + # For platforms that do not support PIC, -DPIC is meaningless: + *djgpp*) + _LT_TAGVAR(lt_prog_compiler_pic, $1)= + ;; + *) + _LT_TAGVAR(lt_prog_compiler_pic, $1)="$_LT_TAGVAR(lt_prog_compiler_pic, $1)@&t@m4_if([$1],[],[ -DPIC],[m4_if([$1],[CXX],[ -DPIC],[])])" + ;; +esac + +AC_CACHE_CHECK([for $compiler option to produce PIC], + [_LT_TAGVAR(lt_cv_prog_compiler_pic, $1)], + [_LT_TAGVAR(lt_cv_prog_compiler_pic, $1)=$_LT_TAGVAR(lt_prog_compiler_pic, $1)]) +_LT_TAGVAR(lt_prog_compiler_pic, $1)=$_LT_TAGVAR(lt_cv_prog_compiler_pic, $1) + +# +# Check to make sure the PIC flag actually works. +# +if test -n "$_LT_TAGVAR(lt_prog_compiler_pic, $1)"; then + _LT_COMPILER_OPTION([if $compiler PIC flag $_LT_TAGVAR(lt_prog_compiler_pic, $1) works], + [_LT_TAGVAR(lt_cv_prog_compiler_pic_works, $1)], + [$_LT_TAGVAR(lt_prog_compiler_pic, $1)@&t@m4_if([$1],[],[ -DPIC],[m4_if([$1],[CXX],[ -DPIC],[])])], [], + [case $_LT_TAGVAR(lt_prog_compiler_pic, $1) in + "" | " "*) ;; + *) _LT_TAGVAR(lt_prog_compiler_pic, $1)=" $_LT_TAGVAR(lt_prog_compiler_pic, $1)" ;; + esac], + [_LT_TAGVAR(lt_prog_compiler_pic, $1)= + _LT_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no]) +fi +_LT_TAGDECL([pic_flag], [lt_prog_compiler_pic], [1], + [Additional compiler flags for building library objects]) + +_LT_TAGDECL([wl], [lt_prog_compiler_wl], [1], + [How to pass a linker flag through the compiler]) +# +# Check to make sure the static flag actually works. +# +wl=$_LT_TAGVAR(lt_prog_compiler_wl, $1) eval lt_tmp_static_flag=\"$_LT_TAGVAR(lt_prog_compiler_static, $1)\" +_LT_LINKER_OPTION([if $compiler static flag $lt_tmp_static_flag works], + _LT_TAGVAR(lt_cv_prog_compiler_static_works, $1), + $lt_tmp_static_flag, + [], + [_LT_TAGVAR(lt_prog_compiler_static, $1)=]) +_LT_TAGDECL([link_static_flag], [lt_prog_compiler_static], [1], + [Compiler flag to prevent dynamic linking]) +])# _LT_COMPILER_PIC + + +# _LT_LINKER_SHLIBS([TAGNAME]) +# ---------------------------- +# See if the linker supports building shared libraries. +m4_defun([_LT_LINKER_SHLIBS], +[AC_REQUIRE([LT_PATH_LD])dnl +AC_REQUIRE([LT_PATH_NM])dnl +m4_require([_LT_PATH_MANIFEST_TOOL])dnl +m4_require([_LT_FILEUTILS_DEFAULTS])dnl +m4_require([_LT_DECL_EGREP])dnl +m4_require([_LT_DECL_SED])dnl +m4_require([_LT_CMD_GLOBAL_SYMBOLS])dnl +m4_require([_LT_TAG_COMPILER])dnl +AC_MSG_CHECKING([whether the $compiler linker ($LD) supports shared libraries]) +m4_if([$1], [CXX], [ + _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols' + _LT_TAGVAR(exclude_expsyms, $1)=['_GLOBAL_OFFSET_TABLE_|_GLOBAL__F[ID]_.*'] + case $host_os in + aix[[4-9]]*) + # If we're using GNU nm, then we don't want the "-C" option. + # -C means demangle to GNU nm, but means don't demangle to AIX nm. + # Without the "-l" option, or with the "-B" option, AIX nm treats + # weak defined symbols like other global defined symbols, whereas + # GNU nm marks them as "W". + # While the 'weak' keyword is ignored in the Export File, we need + # it in the Import File for the 'aix-soname' feature, so we have + # to replace the "-B" option with "-P" for AIX nm. + if $NM -V 2>&1 | $GREP 'GNU' > /dev/null; then + _LT_TAGVAR(export_symbols_cmds, $1)='$NM -Bpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "W")) && ([substr](\$ 3,1,1) != ".")) { if (\$ 2 == "W") { print \$ 3 " weak" } else { print \$ 3 } } }'\'' | sort -u > $export_symbols' + else + _LT_TAGVAR(export_symbols_cmds, $1)='`func_echo_all $NM | $SED -e '\''s/B\([[^B]]*\)$/P\1/'\''` -PCpgl $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "L") || (\$ 2 == "W") || (\$ 2 == "V") || (\$ 2 == "Z")) && ([substr](\$ 1,1,1) != ".")) { if ((\$ 2 == "W") || (\$ 2 == "V") || (\$ 2 == "Z")) { print \$ 1 " weak" } else { print \$ 1 } } }'\'' | sort -u > $export_symbols' + fi + ;; + pw32*) + _LT_TAGVAR(export_symbols_cmds, $1)=$ltdll_cmds + ;; + cygwin* | mingw* | cegcc*) + case $cc_basename in + cl* | icl*) + _LT_TAGVAR(exclude_expsyms, $1)='_NULL_IMPORT_DESCRIPTOR|_IMPORT_DESCRIPTOR_.*' + ;; + *) + _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[[BCDGRS]][[ ]]/s/.*[[ ]]\([[^ ]]*\)/\1 DATA/;s/^.*[[ ]]__nm__\([[^ ]]*\)[[ ]][[^ ]]*/\1 DATA/;/^I[[ ]]/d;/^[[AITW]][[ ]]/s/.* //'\'' | sort | uniq > $export_symbols' + _LT_TAGVAR(exclude_expsyms, $1)=['[_]+GLOBAL_OFFSET_TABLE_|[_]+GLOBAL__[FID]_.*|[_]+head_[A-Za-z0-9_]+_dll|[A-Za-z0-9_]+_dll_iname'] + ;; + esac + ;; + linux* | k*bsd*-gnu | gnu*) + _LT_TAGVAR(link_all_deplibs, $1)=no + ;; + *) + _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols' + ;; + esac +], [ + runpath_var= + _LT_TAGVAR(allow_undefined_flag, $1)= + _LT_TAGVAR(always_export_symbols, $1)=no + _LT_TAGVAR(archive_cmds, $1)= + _LT_TAGVAR(archive_expsym_cmds, $1)= + _LT_TAGVAR(compiler_needs_object, $1)=no + _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=no + _LT_TAGVAR(export_dynamic_flag_spec, $1)= + _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols' + _LT_TAGVAR(hardcode_automatic, $1)=no + _LT_TAGVAR(hardcode_direct, $1)=no + _LT_TAGVAR(hardcode_direct_absolute, $1)=no + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)= + _LT_TAGVAR(hardcode_libdir_separator, $1)= + _LT_TAGVAR(hardcode_minus_L, $1)=no + _LT_TAGVAR(hardcode_shlibpath_var, $1)=unsupported + _LT_TAGVAR(inherit_rpath, $1)=no + _LT_TAGVAR(link_all_deplibs, $1)=unknown + _LT_TAGVAR(module_cmds, $1)= + _LT_TAGVAR(module_expsym_cmds, $1)= + _LT_TAGVAR(old_archive_from_new_cmds, $1)= + _LT_TAGVAR(old_archive_from_expsyms_cmds, $1)= + _LT_TAGVAR(thread_safe_flag_spec, $1)= + _LT_TAGVAR(whole_archive_flag_spec, $1)= + # include_expsyms should be a list of space-separated symbols to be *always* + # included in the symbol list + _LT_TAGVAR(include_expsyms, $1)= + # exclude_expsyms can be an extended regexp of symbols to exclude + # it will be wrapped by ' (' and ')$', so one must not match beginning or + # end of line. Example: 'a|bc|.*d.*' will exclude the symbols 'a' and 'bc', + # as well as any symbol that contains 'd'. + _LT_TAGVAR(exclude_expsyms, $1)=['_GLOBAL_OFFSET_TABLE_|_GLOBAL__F[ID]_.*'] + # Although _GLOBAL_OFFSET_TABLE_ is a valid symbol C name, most a.out + # platforms (ab)use it in PIC code, but their linkers get confused if + # the symbol is explicitly referenced. Since portable code cannot + # rely on this symbol name, it's probably fine to never include it in + # preloaded symbol tables. + # Exclude shared library initialization/finalization symbols. +dnl Note also adjust exclude_expsyms for C++ above. + extract_expsyms_cmds= + + case $host_os in + cygwin* | mingw* | pw32* | cegcc*) + # FIXME: the MSVC++ and ICC port hasn't been tested in a loooong time + # When not using gcc, we currently assume that we are using + # Microsoft Visual C++ or Intel C++ Compiler. + if test yes != "$GCC"; then + with_gnu_ld=no + fi + ;; + interix*) + # we just hope/assume this is gcc and not c89 (= MSVC++ or ICC) + with_gnu_ld=yes + ;; + openbsd* | bitrig*) + with_gnu_ld=no + ;; + linux* | k*bsd*-gnu | gnu*) + _LT_TAGVAR(link_all_deplibs, $1)=no + ;; + esac + + _LT_TAGVAR(ld_shlibs, $1)=yes + + # On some targets, GNU ld is compatible enough with the native linker + # that we're better off using the native interface for both. + lt_use_gnu_ld_interface=no + if test yes = "$with_gnu_ld"; then + case $host_os in + aix*) + # The AIX port of GNU ld has always aspired to compatibility + # with the native linker. However, as the warning in the GNU ld + # block says, versions before 2.19.5* couldn't really create working + # shared libraries, regardless of the interface used. + case `$LD -v 2>&1` in + *\ \(GNU\ Binutils\)\ 2.19.5*) ;; + *\ \(GNU\ Binutils\)\ 2.[[2-9]]*) ;; + *\ \(GNU\ Binutils\)\ [[3-9]]*) ;; + *) + lt_use_gnu_ld_interface=yes + ;; + esac + ;; + *) + lt_use_gnu_ld_interface=yes + ;; + esac + fi + + if test yes = "$lt_use_gnu_ld_interface"; then + # If archive_cmds runs LD, not CC, wlarc should be empty + wlarc='$wl' + + # Set some defaults for GNU ld with shared library support. These + # are reset later if shared libraries are not supported. Putting them + # here allows them to be overridden if necessary. + runpath_var=LD_RUN_PATH + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath $wl$libdir' + _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl--export-dynamic' + # ancient GNU ld didn't support --whole-archive et. al. + if $LD --help 2>&1 | $GREP 'no-whole-archive' > /dev/null; then + _LT_TAGVAR(whole_archive_flag_spec, $1)=$wlarc'--whole-archive$convenience '$wlarc'--no-whole-archive' + else + _LT_TAGVAR(whole_archive_flag_spec, $1)= + fi + supports_anon_versioning=no + case `$LD -v | $SED -e 's/([[^)]]\+)\s\+//' 2>&1` in + *GNU\ gold*) supports_anon_versioning=yes ;; + *\ [[01]].* | *\ 2.[[0-9]].* | *\ 2.10.*) ;; # catch versions < 2.11 + *\ 2.11.93.0.2\ *) supports_anon_versioning=yes ;; # RH7.3 ... + *\ 2.11.92.0.12\ *) supports_anon_versioning=yes ;; # Mandrake 8.2 ... + *\ 2.11.*) ;; # other 2.11 versions + *) supports_anon_versioning=yes ;; + esac + + # See if GNU ld supports shared libraries. + case $host_os in + aix[[3-9]]*) + # On AIX/PPC, the GNU linker is very broken + if test ia64 != "$host_cpu"; then + _LT_TAGVAR(ld_shlibs, $1)=no + cat <<_LT_EOF 1>&2 + +*** Warning: the GNU linker, at least up to release 2.19, is reported +*** to be unable to reliably create shared libraries on AIX. +*** Therefore, libtool is disabling shared libraries support. If you +*** really care for shared libraries, you may want to install binutils +*** 2.20 or above, or modify your PATH so that a non-GNU linker is found. +*** You will then need to restart the configuration process. + +_LT_EOF + fi + ;; + + amigaos*) + case $host_cpu in + powerpc) + # see comment about AmigaOS4 .so support + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='' + ;; + m68k) + _LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/a2ixlibrary.data~$ECHO "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$ECHO "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$ECHO "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$ECHO "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR $AR_FLAGS $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' + _LT_TAGVAR(hardcode_minus_L, $1)=yes + ;; + esac + ;; + + beos*) + if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then + _LT_TAGVAR(allow_undefined_flag, $1)=unsupported + # Joseph Beckenbach says some releases of gcc + # support --undefined. This deserves some investigation. FIXME + _LT_TAGVAR(archive_cmds, $1)='$CC -nostart $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + else + _LT_TAGVAR(ld_shlibs, $1)=no + fi + ;; + + cygwin* | mingw* | pw32* | cegcc*) + # _LT_TAGVAR(hardcode_libdir_flag_spec, $1) is actually meaningless, + # as there is no search path for DLLs. + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' + _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl--export-all-symbols' + _LT_TAGVAR(allow_undefined_flag, $1)=unsupported + _LT_TAGVAR(always_export_symbols, $1)=no + _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes + _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[[BCDGRS]][[ ]]/s/.*[[ ]]\([[^ ]]*\)/\1 DATA/;s/^.*[[ ]]__nm__\([[^ ]]*\)[[ ]][[^ ]]*/\1 DATA/;/^I[[ ]]/d;/^[[AITW]][[ ]]/s/.* //'\'' | sort | uniq > $export_symbols' + _LT_TAGVAR(exclude_expsyms, $1)=['[_]+GLOBAL_OFFSET_TABLE_|[_]+GLOBAL__[FID]_.*|[_]+head_[A-Za-z0-9_]+_dll|[A-Za-z0-9_]+_dll_iname'] + + if $LD --help 2>&1 | $GREP 'auto-import' > /dev/null; then + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags -o $output_objdir/$soname $wl--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' + # If the export-symbols file already is a .def file, use it as + # is; otherwise, prepend EXPORTS... + _LT_TAGVAR(archive_expsym_cmds, $1)='if _LT_DLL_DEF_P([$export_symbols]); then + cp $export_symbols $output_objdir/$soname.def; + else + echo EXPORTS > $output_objdir/$soname.def; + cat $export_symbols >> $output_objdir/$soname.def; + fi~ + $CC -shared $output_objdir/$soname.def $libobjs $deplibs $compiler_flags -o $output_objdir/$soname $wl--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' + else + _LT_TAGVAR(ld_shlibs, $1)=no + fi + ;; + + haiku*) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + _LT_TAGVAR(link_all_deplibs, $1)=yes + ;; + + os2*) + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' + _LT_TAGVAR(hardcode_minus_L, $1)=yes + _LT_TAGVAR(allow_undefined_flag, $1)=unsupported + shrext_cmds=.dll + _LT_TAGVAR(archive_cmds, $1)='$ECHO "LIBRARY ${soname%$shared_ext} INITINSTANCE TERMINSTANCE" > $output_objdir/$libname.def~ + $ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~ + $ECHO "DATA MULTIPLE NONSHARED" >> $output_objdir/$libname.def~ + $ECHO EXPORTS >> $output_objdir/$libname.def~ + emxexp $libobjs | $SED /"_DLL_InitTerm"/d >> $output_objdir/$libname.def~ + $CC -Zdll -Zcrtdll -o $output_objdir/$soname $libobjs $deplibs $compiler_flags $output_objdir/$libname.def~ + emximp -o $lib $output_objdir/$libname.def' + _LT_TAGVAR(archive_expsym_cmds, $1)='$ECHO "LIBRARY ${soname%$shared_ext} INITINSTANCE TERMINSTANCE" > $output_objdir/$libname.def~ + $ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~ + $ECHO "DATA MULTIPLE NONSHARED" >> $output_objdir/$libname.def~ + $ECHO EXPORTS >> $output_objdir/$libname.def~ + prefix_cmds="$SED"~ + if test EXPORTS = "`$SED 1q $export_symbols`"; then + prefix_cmds="$prefix_cmds -e 1d"; + fi~ + prefix_cmds="$prefix_cmds -e \"s/^\(.*\)$/_\1/g\""~ + cat $export_symbols | $prefix_cmds >> $output_objdir/$libname.def~ + $CC -Zdll -Zcrtdll -o $output_objdir/$soname $libobjs $deplibs $compiler_flags $output_objdir/$libname.def~ + emximp -o $lib $output_objdir/$libname.def' + _LT_TAGVAR(old_archive_From_new_cmds, $1)='emximp -o $output_objdir/${libname}_dll.a $output_objdir/$libname.def' + _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes + _LT_TAGVAR(file_list_spec, $1)='@' + ;; + + interix[[3-9]]*) + _LT_TAGVAR(hardcode_direct, $1)=no + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath,$libdir' + _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl-E' + # Hack: On Interix 3.x, we cannot compile PIC because of a broken gcc. + # Instead, shared libraries are loaded at an image base (0x10000000 by + # default) and relocated if they conflict, which is a slow very memory + # consuming and fragmenting process. To avoid this, we pick a random, + # 256 KiB-aligned image base between 0x50000000 and 0x6FFC0000 at link + # time. Moving up from 0x10000000 also allows more sbrk(2) space. + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-h,$soname $wl--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='$SED "s|^|_|" $export_symbols >$output_objdir/$soname.expsym~$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-h,$soname $wl--retain-symbols-file,$output_objdir/$soname.expsym $wl--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' + ;; + + gnu* | linux* | tpf* | k*bsd*-gnu | kopensolaris*-gnu) + tmp_diet=no + if test linux-dietlibc = "$host_os"; then + case $cc_basename in + diet\ *) tmp_diet=yes;; # linux-dietlibc with static linking (!diet-dyn) + esac + fi + if $LD --help 2>&1 | $EGREP ': supported targets:.* elf' > /dev/null \ + && test no = "$tmp_diet" + then + tmp_addflag=' $pic_flag' + tmp_sharedflag='-shared' + case $cc_basename,$host_cpu in + pgcc*) # Portland Group C compiler + _LT_TAGVAR(whole_archive_flag_spec, $1)='$wl--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive' + tmp_addflag=' $pic_flag' + ;; + pgf77* | pgf90* | pgf95* | pgfortran*) + # Portland Group f77 and f90 compilers + _LT_TAGVAR(whole_archive_flag_spec, $1)='$wl--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive' + tmp_addflag=' $pic_flag -Mnomain' ;; + ecc*,ia64* | icc*,ia64*) # Intel C compiler on ia64 + tmp_addflag=' -i_dynamic' ;; + efc*,ia64* | ifort*,ia64*) # Intel Fortran compiler on ia64 + tmp_addflag=' -i_dynamic -nofor_main' ;; + ifc* | ifort*) # Intel Fortran compiler + tmp_addflag=' -nofor_main' ;; + lf95*) # Lahey Fortran 8.1 + _LT_TAGVAR(whole_archive_flag_spec, $1)= + tmp_sharedflag='--shared' ;; + nagfor*) # NAGFOR 5.3 + tmp_sharedflag='-Wl,-shared' ;; + xl[[cC]]* | bgxl[[cC]]* | mpixl[[cC]]*) # IBM XL C 8.0 on PPC (deal with xlf below) + tmp_sharedflag='-qmkshrobj' + tmp_addflag= ;; + nvcc*) # Cuda Compiler Driver 2.2 + _LT_TAGVAR(whole_archive_flag_spec, $1)='$wl--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive' + _LT_TAGVAR(compiler_needs_object, $1)=yes + ;; + esac + case `$CC -V 2>&1 | $SED 5q` in + *Sun\ C*) # Sun C 5.9 + _LT_TAGVAR(whole_archive_flag_spec, $1)='$wl--whole-archive`new_convenience=; for conv in $convenience\"\"; do test -z \"$conv\" || new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive' + _LT_TAGVAR(compiler_needs_object, $1)=yes + tmp_sharedflag='-G' ;; + *Sun\ F*) # Sun Fortran 8.3 + tmp_sharedflag='-G' ;; + esac + _LT_TAGVAR(archive_cmds, $1)='$CC '"$tmp_sharedflag""$tmp_addflag"' $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + + if test yes = "$supports_anon_versioning"; then + _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $output_objdir/$libname.ver~ + cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~ + echo "local: *; };" >> $output_objdir/$libname.ver~ + $CC '"$tmp_sharedflag""$tmp_addflag"' $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-version-script $wl$output_objdir/$libname.ver -o $lib' + fi + + case $cc_basename in + tcc*) + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath $wl$libdir' + _LT_TAGVAR(export_dynamic_flag_spec, $1)='-rdynamic' + ;; + xlf* | bgf* | bgxlf* | mpixlf*) + # IBM XL Fortran 10.1 on PPC cannot create shared libs itself + _LT_TAGVAR(whole_archive_flag_spec, $1)='--whole-archive$convenience --no-whole-archive' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath $wl$libdir' + _LT_TAGVAR(archive_cmds, $1)='$LD -shared $libobjs $deplibs $linker_flags -soname $soname -o $lib' + if test yes = "$supports_anon_versioning"; then + _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $output_objdir/$libname.ver~ + cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~ + echo "local: *; };" >> $output_objdir/$libname.ver~ + $LD -shared $libobjs $deplibs $linker_flags -soname $soname -version-script $output_objdir/$libname.ver -o $lib' + fi + ;; + esac + else + _LT_TAGVAR(ld_shlibs, $1)=no + fi + ;; + + netbsd* | netbsdelf*-gnu) + if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then + _LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable $libobjs $deplibs $linker_flags -o $lib' + wlarc= + else + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' + fi + ;; + + solaris*) + if $LD -v 2>&1 | $GREP 'BFD 2\.8' > /dev/null; then + _LT_TAGVAR(ld_shlibs, $1)=no + cat <<_LT_EOF 1>&2 + +*** Warning: The releases 2.8.* of the GNU linker cannot reliably +*** create shared libraries on Solaris systems. Therefore, libtool +*** is disabling shared libraries support. We urge you to upgrade GNU +*** binutils to release 2.9.1 or newer. Another option is to modify +*** your PATH or compiler configuration so that the native linker is +*** used, and then restart. + +_LT_EOF + elif $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' + else + _LT_TAGVAR(ld_shlibs, $1)=no + fi + ;; + + sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX*) + case `$LD -v 2>&1` in + *\ [[01]].* | *\ 2.[[0-9]].* | *\ 2.1[[0-5]].*) + _LT_TAGVAR(ld_shlibs, $1)=no + cat <<_LT_EOF 1>&2 + +*** Warning: Releases of the GNU linker prior to 2.16.91.0.3 cannot +*** reliably create shared libraries on SCO systems. Therefore, libtool +*** is disabling shared libraries support. We urge you to upgrade GNU +*** binutils to release 2.16.91.0.3 or newer. Another option is to modify +*** your PATH or compiler configuration so that the native linker is +*** used, and then restart. + +_LT_EOF + ;; + *) + # For security reasons, it is highly recommended that you always + # use absolute paths for naming shared libraries, and exclude the + # DT_RUNPATH tag from executables and libraries. But doing so + # requires that you compile everything twice, which is a pain. + if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath $wl$libdir' + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' + else + _LT_TAGVAR(ld_shlibs, $1)=no + fi + ;; + esac + ;; + + sunos4*) + _LT_TAGVAR(archive_cmds, $1)='$LD -assert pure-text -Bshareable -o $lib $libobjs $deplibs $linker_flags' + wlarc= + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + ;; + + *) + if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' + else + _LT_TAGVAR(ld_shlibs, $1)=no + fi + ;; + esac + + if test no = "$_LT_TAGVAR(ld_shlibs, $1)"; then + runpath_var= + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)= + _LT_TAGVAR(export_dynamic_flag_spec, $1)= + _LT_TAGVAR(whole_archive_flag_spec, $1)= + fi + else + # PORTME fill in a description of your system's linker (not GNU ld) + case $host_os in + aix3*) + _LT_TAGVAR(allow_undefined_flag, $1)=unsupported + _LT_TAGVAR(always_export_symbols, $1)=yes + _LT_TAGVAR(archive_expsym_cmds, $1)='$LD -o $output_objdir/$soname $libobjs $deplibs $linker_flags -bE:$export_symbols -T512 -H512 -bM:SRE~$AR $AR_FLAGS $lib $output_objdir/$soname' + # Note: this linker hardcodes the directories in LIBPATH if there + # are no directories specified by -L. + _LT_TAGVAR(hardcode_minus_L, $1)=yes + if test yes = "$GCC" && test -z "$lt_prog_compiler_static"; then + # Neither direct hardcoding nor static linking is supported with a + # broken collect2. + _LT_TAGVAR(hardcode_direct, $1)=unsupported + fi + ;; + + aix[[4-9]]*) + if test ia64 = "$host_cpu"; then + # On IA64, the linker does run time linking by default, so we don't + # have to do anything special. + aix_use_runtimelinking=no + exp_sym_flag='-Bexport' + no_entry_flag= + else + # If we're using GNU nm, then we don't want the "-C" option. + # -C means demangle to GNU nm, but means don't demangle to AIX nm. + # Without the "-l" option, or with the "-B" option, AIX nm treats + # weak defined symbols like other global defined symbols, whereas + # GNU nm marks them as "W". + # While the 'weak' keyword is ignored in the Export File, we need + # it in the Import File for the 'aix-soname' feature, so we have + # to replace the "-B" option with "-P" for AIX nm. + if $NM -V 2>&1 | $GREP 'GNU' > /dev/null; then + _LT_TAGVAR(export_symbols_cmds, $1)='$NM -Bpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "W")) && ([substr](\$ 3,1,1) != ".")) { if (\$ 2 == "W") { print \$ 3 " weak" } else { print \$ 3 } } }'\'' | sort -u > $export_symbols' + else + _LT_TAGVAR(export_symbols_cmds, $1)='`func_echo_all $NM | $SED -e '\''s/B\([[^B]]*\)$/P\1/'\''` -PCpgl $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "L") || (\$ 2 == "W") || (\$ 2 == "V") || (\$ 2 == "Z")) && ([substr](\$ 1,1,1) != ".")) { if ((\$ 2 == "W") || (\$ 2 == "V") || (\$ 2 == "Z")) { print \$ 1 " weak" } else { print \$ 1 } } }'\'' | sort -u > $export_symbols' + fi + aix_use_runtimelinking=no + + # Test if we are trying to use run time linking or normal + # AIX style linking. If -brtl is somewhere in LDFLAGS, we + # have runtime linking enabled, and use it for executables. + # For shared libraries, we enable/disable runtime linking + # depending on the kind of the shared library created - + # when "with_aix_soname,aix_use_runtimelinking" is: + # "aix,no" lib.a(lib.so.V) shared, rtl:no, for executables + # "aix,yes" lib.so shared, rtl:yes, for executables + # lib.a static archive + # "both,no" lib.so.V(shr.o) shared, rtl:yes + # lib.a(lib.so.V) shared, rtl:no, for executables + # "both,yes" lib.so.V(shr.o) shared, rtl:yes, for executables + # lib.a(lib.so.V) shared, rtl:no + # "svr4,*" lib.so.V(shr.o) shared, rtl:yes, for executables + # lib.a static archive + case $host_os in aix4.[[23]]|aix4.[[23]].*|aix[[5-9]]*) + for ld_flag in $LDFLAGS; do + if (test x-brtl = "x$ld_flag" || test x-Wl,-brtl = "x$ld_flag"); then + aix_use_runtimelinking=yes + break + fi + done + if test svr4,no = "$with_aix_soname,$aix_use_runtimelinking"; then + # With aix-soname=svr4, we create the lib.so.V shared archives only, + # so we don't have lib.a shared libs to link our executables. + # We have to force runtime linking in this case. + aix_use_runtimelinking=yes + LDFLAGS="$LDFLAGS -Wl,-brtl" + fi + ;; + esac + + exp_sym_flag='-bexport' + no_entry_flag='-bnoentry' + fi + + # When large executables or shared objects are built, AIX ld can + # have problems creating the table of contents. If linking a library + # or program results in "error TOC overflow" add -mminimal-toc to + # CXXFLAGS/CFLAGS for g++/gcc. In the cases where that is not + # enough to fix the problem, add -Wl,-bbigtoc to LDFLAGS. + + _LT_TAGVAR(archive_cmds, $1)='' + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_direct_absolute, $1)=yes + _LT_TAGVAR(hardcode_libdir_separator, $1)=':' + _LT_TAGVAR(link_all_deplibs, $1)=yes + _LT_TAGVAR(file_list_spec, $1)='$wl-f,' + case $with_aix_soname,$aix_use_runtimelinking in + aix,*) ;; # traditional, no import file + svr4,* | *,yes) # use import file + # The Import File defines what to hardcode. + _LT_TAGVAR(hardcode_direct, $1)=no + _LT_TAGVAR(hardcode_direct_absolute, $1)=no + ;; + esac + + if test yes = "$GCC"; then + case $host_os in aix4.[[012]]|aix4.[[012]].*) + # We only want to do this on AIX 4.2 and lower, the check + # below for broken collect2 doesn't work under 4.3+ + collect2name=`$CC -print-prog-name=collect2` + if test -f "$collect2name" && + strings "$collect2name" | $GREP resolve_lib_name >/dev/null + then + # We have reworked collect2 + : + else + # We have old collect2 + _LT_TAGVAR(hardcode_direct, $1)=unsupported + # It fails to find uninstalled libraries when the uninstalled + # path is not listed in the libpath. Setting hardcode_minus_L + # to unsupported forces relinking + _LT_TAGVAR(hardcode_minus_L, $1)=yes + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)= + fi + ;; + esac + shared_flag='-shared' + if test yes = "$aix_use_runtimelinking"; then + shared_flag="$shared_flag "'$wl-G' + fi + # Need to ensure runtime linking is disabled for the traditional + # shared library, or the linker may eventually find shared libraries + # /with/ Import File - we do not want to mix them. + shared_flag_aix='-shared' + shared_flag_svr4='-shared $wl-G' + else + # not using gcc + if test ia64 = "$host_cpu"; then + # VisualAge C++, Version 5.5 for AIX 5L for IA-64, Beta 3 Release + # chokes on -Wl,-G. The following line is correct: + shared_flag='-G' + else + if test yes = "$aix_use_runtimelinking"; then + shared_flag='$wl-G' + else + shared_flag='$wl-bM:SRE' + fi + shared_flag_aix='$wl-bM:SRE' + shared_flag_svr4='$wl-G' + fi + fi + + _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl-bexpall' + # It seems that -bexpall does not export symbols beginning with + # underscore (_), so it is better to generate a list of symbols to export. + _LT_TAGVAR(always_export_symbols, $1)=yes + if test aix,yes = "$with_aix_soname,$aix_use_runtimelinking"; then + # Warning - without using the other runtime loading flags (-brtl), + # -berok will link without error, but may produce a broken library. + _LT_TAGVAR(allow_undefined_flag, $1)='-berok' + # Determine the default libpath from the value encoded in an + # empty executable. + _LT_SYS_MODULE_PATH_AIX([$1]) + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-blibpath:$libdir:'"$aix_libpath" + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -o $output_objdir/$soname $libobjs $deplibs $wl'$no_entry_flag' $compiler_flags `if test -n "$allow_undefined_flag"; then func_echo_all "$wl$allow_undefined_flag"; else :; fi` $wl'$exp_sym_flag:\$export_symbols' '$shared_flag + else + if test ia64 = "$host_cpu"; then + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-R $libdir:/usr/lib:/lib' + _LT_TAGVAR(allow_undefined_flag, $1)="-z nodefs" + _LT_TAGVAR(archive_expsym_cmds, $1)="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs '"\$wl$no_entry_flag"' $compiler_flags $wl$allow_undefined_flag '"\$wl$exp_sym_flag:\$export_symbols" + else + # Determine the default libpath from the value encoded in an + # empty executable. + _LT_SYS_MODULE_PATH_AIX([$1]) + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-blibpath:$libdir:'"$aix_libpath" + # Warning - without using the other run time loading flags, + # -berok will link without error, but may produce a broken library. + _LT_TAGVAR(no_undefined_flag, $1)=' $wl-bernotok' + _LT_TAGVAR(allow_undefined_flag, $1)=' $wl-berok' + if test yes = "$with_gnu_ld"; then + # We only use this code for GNU lds that support --whole-archive. + _LT_TAGVAR(whole_archive_flag_spec, $1)='$wl--whole-archive$convenience $wl--no-whole-archive' + else + # Exported symbols can be pulled into shared objects from archives + _LT_TAGVAR(whole_archive_flag_spec, $1)='$convenience' + fi + _LT_TAGVAR(archive_cmds_need_lc, $1)=yes + _LT_TAGVAR(archive_expsym_cmds, $1)='$RM -r $output_objdir/$realname.d~$MKDIR $output_objdir/$realname.d' + # -brtl affects multiple linker settings, -berok does not and is overridden later + compiler_flags_filtered='`func_echo_all "$compiler_flags " | $SED -e "s%-brtl\\([[, ]]\\)%-berok\\1%g"`' + if test svr4 != "$with_aix_soname"; then + # This is similar to how AIX traditionally builds its shared libraries. + _LT_TAGVAR(archive_expsym_cmds, $1)="$_LT_TAGVAR(archive_expsym_cmds, $1)"'~$CC '$shared_flag_aix' -o $output_objdir/$realname.d/$soname $libobjs $deplibs $wl-bnoentry '$compiler_flags_filtered'$wl-bE:$export_symbols$allow_undefined_flag~$AR $AR_FLAGS $output_objdir/$libname$release.a $output_objdir/$realname.d/$soname' + fi + if test aix != "$with_aix_soname"; then + _LT_TAGVAR(archive_expsym_cmds, $1)="$_LT_TAGVAR(archive_expsym_cmds, $1)"'~$CC '$shared_flag_svr4' -o $output_objdir/$realname.d/$shared_archive_member_spec.o $libobjs $deplibs $wl-bnoentry '$compiler_flags_filtered'$wl-bE:$export_symbols$allow_undefined_flag~$STRIP -e $output_objdir/$realname.d/$shared_archive_member_spec.o~( func_echo_all "#! $soname($shared_archive_member_spec.o)"; if test shr_64 = "$shared_archive_member_spec"; then func_echo_all "# 64"; else func_echo_all "# 32"; fi; cat $export_symbols ) > $output_objdir/$realname.d/$shared_archive_member_spec.imp~$AR $AR_FLAGS $output_objdir/$soname $output_objdir/$realname.d/$shared_archive_member_spec.o $output_objdir/$realname.d/$shared_archive_member_spec.imp' + else + # used by -dlpreopen to get the symbols + _LT_TAGVAR(archive_expsym_cmds, $1)="$_LT_TAGVAR(archive_expsym_cmds, $1)"'~$MV $output_objdir/$realname.d/$soname $output_objdir' + fi + _LT_TAGVAR(archive_expsym_cmds, $1)="$_LT_TAGVAR(archive_expsym_cmds, $1)"'~$RM -r $output_objdir/$realname.d' + fi + fi + ;; + + amigaos*) + case $host_cpu in + powerpc) + # see comment about AmigaOS4 .so support + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='' + ;; + m68k) + _LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/a2ixlibrary.data~$ECHO "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$ECHO "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$ECHO "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$ECHO "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR $AR_FLAGS $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' + _LT_TAGVAR(hardcode_minus_L, $1)=yes + ;; + esac + ;; + + bsdi[[45]]*) + _LT_TAGVAR(export_dynamic_flag_spec, $1)=-rdynamic + ;; + + cygwin* | mingw* | pw32* | cegcc*) + # When not using gcc, we currently assume that we are using + # Microsoft Visual C++ or Intel C++ Compiler. + # hardcode_libdir_flag_spec is actually meaningless, as there is + # no search path for DLLs. + case $cc_basename in + cl* | icl*) + # Native MSVC or ICC + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)=' ' + _LT_TAGVAR(allow_undefined_flag, $1)=unsupported + _LT_TAGVAR(always_export_symbols, $1)=yes + _LT_TAGVAR(file_list_spec, $1)='@' + # Tell ltmain to make .lib files, not .a files. + libext=lib + # Tell ltmain to make .dll files, not .so files. + shrext_cmds=.dll + # FIXME: Setting linknames here is a bad hack. + _LT_TAGVAR(archive_cmds, $1)='$CC -o $output_objdir/$soname $libobjs $compiler_flags $deplibs -Wl,-DLL,-IMPLIB:"$tool_output_objdir$libname.dll.lib"~linknames=' + _LT_TAGVAR(archive_expsym_cmds, $1)='if _LT_DLL_DEF_P([$export_symbols]); then + cp "$export_symbols" "$output_objdir/$soname.def"; + echo "$tool_output_objdir$soname.def" > "$output_objdir/$soname.exp"; + else + $SED -e '\''s/^/-link -EXPORT:/'\'' < $export_symbols > $output_objdir/$soname.exp; + fi~ + $CC -o $tool_output_objdir$soname $libobjs $compiler_flags $deplibs "@$tool_output_objdir$soname.exp" -Wl,-DLL,-IMPLIB:"$tool_output_objdir$libname.dll.lib"~ + linknames=' + # The linker will not automatically build a static lib if we build a DLL. + # _LT_TAGVAR(old_archive_from_new_cmds, $1)='true' + _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes + _LT_TAGVAR(exclude_expsyms, $1)='_NULL_IMPORT_DESCRIPTOR|_IMPORT_DESCRIPTOR_.*' + _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[[BCDGRS]][[ ]]/s/.*[[ ]]\([[^ ]]*\)/\1,DATA/'\'' | $SED -e '\''/^[[AITW]][[ ]]/s/.*[[ ]]//'\'' | sort | uniq > $export_symbols' + # Don't use ranlib + _LT_TAGVAR(old_postinstall_cmds, $1)='chmod 644 $oldlib' + _LT_TAGVAR(postlink_cmds, $1)='lt_outputfile="@OUTPUT@"~ + lt_tool_outputfile="@TOOL_OUTPUT@"~ + case $lt_outputfile in + *.exe|*.EXE) ;; + *) + lt_outputfile=$lt_outputfile.exe + lt_tool_outputfile=$lt_tool_outputfile.exe + ;; + esac~ + if test : != "$MANIFEST_TOOL" && test -f "$lt_outputfile.manifest"; then + $MANIFEST_TOOL -manifest "$lt_tool_outputfile.manifest" -outputresource:"$lt_tool_outputfile" || exit 1; + $RM "$lt_outputfile.manifest"; + fi' + ;; + *) + # Assume MSVC and ICC wrapper + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)=' ' + _LT_TAGVAR(allow_undefined_flag, $1)=unsupported + # Tell ltmain to make .lib files, not .a files. + libext=lib + # Tell ltmain to make .dll files, not .so files. + shrext_cmds=.dll + # FIXME: Setting linknames here is a bad hack. + _LT_TAGVAR(archive_cmds, $1)='$CC -o $lib $libobjs $compiler_flags `func_echo_all "$deplibs" | $SED '\''s/ -lc$//'\''` -link -dll~linknames=' + # The linker will automatically build a .lib file if we build a DLL. + _LT_TAGVAR(old_archive_from_new_cmds, $1)='true' + # FIXME: Should let the user specify the lib program. + _LT_TAGVAR(old_archive_cmds, $1)='lib -OUT:$oldlib$oldobjs$old_deplibs' + _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes + ;; + esac + ;; + + darwin* | rhapsody*) + _LT_DARWIN_LINKER_FEATURES($1) + ;; + + dgux*) + _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + ;; + + # FreeBSD 2.2.[012] allows us to include c++rt0.o to get C++ constructor + # support. Future versions do this automatically, but an explicit c++rt0.o + # does not break anything, and helps significantly (at the cost of a little + # extra space). + freebsd2.2*) + _LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags /usr/lib/c++rt0.o' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + ;; + + # Unfortunately, older versions of FreeBSD 2 do not have this feature. + freebsd2.*) + _LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_minus_L, $1)=yes + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + ;; + + # FreeBSD 3 and greater uses gcc -shared to do shared libraries. + freebsd* | dragonfly* | midnightbsd*) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + ;; + + hpux9*) + if test yes = "$GCC"; then + _LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/$soname~$CC -shared $pic_flag $wl+b $wl$install_libdir -o $output_objdir/$soname $libobjs $deplibs $compiler_flags~test "x$output_objdir/$soname" = "x$lib" || mv $output_objdir/$soname $lib' + else + _LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/$soname~$LD -b +b $install_libdir -o $output_objdir/$soname $libobjs $deplibs $linker_flags~test "x$output_objdir/$soname" = "x$lib" || mv $output_objdir/$soname $lib' + fi + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl+b $wl$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + _LT_TAGVAR(hardcode_direct, $1)=yes + + # hardcode_minus_L: Not really in the search PATH, + # but as the default location of the library. + _LT_TAGVAR(hardcode_minus_L, $1)=yes + _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl-E' + ;; + + hpux10*) + if test yes,no = "$GCC,$with_gnu_ld"; then + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $wl+h $wl$soname $wl+b $wl$install_libdir -o $lib $libobjs $deplibs $compiler_flags' + else + _LT_TAGVAR(archive_cmds, $1)='$LD -b +h $soname +b $install_libdir -o $lib $libobjs $deplibs $linker_flags' + fi + if test no = "$with_gnu_ld"; then + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl+b $wl$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_direct_absolute, $1)=yes + _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl-E' + # hardcode_minus_L: Not really in the search PATH, + # but as the default location of the library. + _LT_TAGVAR(hardcode_minus_L, $1)=yes + fi + ;; + + hpux11*) + if test yes,no = "$GCC,$with_gnu_ld"; then + case $host_cpu in + hppa*64*) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $wl+h $wl$soname -o $lib $libobjs $deplibs $compiler_flags' + ;; + ia64*) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $wl+h $wl$soname $wl+nodefaultrpath -o $lib $libobjs $deplibs $compiler_flags' + ;; + *) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $wl+h $wl$soname $wl+b $wl$install_libdir -o $lib $libobjs $deplibs $compiler_flags' + ;; + esac + else + case $host_cpu in + hppa*64*) + _LT_TAGVAR(archive_cmds, $1)='$CC -b $wl+h $wl$soname -o $lib $libobjs $deplibs $compiler_flags' + ;; + ia64*) + _LT_TAGVAR(archive_cmds, $1)='$CC -b $wl+h $wl$soname $wl+nodefaultrpath -o $lib $libobjs $deplibs $compiler_flags' + ;; + *) + m4_if($1, [], [ + # Older versions of the 11.00 compiler do not understand -b yet + # (HP92453-01 A.11.01.20 doesn't, HP92453-01 B.11.X.35175-35176.GP does) + _LT_LINKER_OPTION([if $CC understands -b], + _LT_TAGVAR(lt_cv_prog_compiler__b, $1), [-b], + [_LT_TAGVAR(archive_cmds, $1)='$CC -b $wl+h $wl$soname $wl+b $wl$install_libdir -o $lib $libobjs $deplibs $compiler_flags'], + [_LT_TAGVAR(archive_cmds, $1)='$LD -b +h $soname +b $install_libdir -o $lib $libobjs $deplibs $linker_flags'])], + [_LT_TAGVAR(archive_cmds, $1)='$CC -b $wl+h $wl$soname $wl+b $wl$install_libdir -o $lib $libobjs $deplibs $compiler_flags']) + ;; + esac + fi + if test no = "$with_gnu_ld"; then + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl+b $wl$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + + case $host_cpu in + hppa*64*|ia64*) + _LT_TAGVAR(hardcode_direct, $1)=no + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + ;; + *) + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_direct_absolute, $1)=yes + _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl-E' + + # hardcode_minus_L: Not really in the search PATH, + # but as the default location of the library. + _LT_TAGVAR(hardcode_minus_L, $1)=yes + ;; + esac + fi + ;; + + irix5* | irix6* | nonstopux*) + if test yes = "$GCC"; then + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` $wl-update_registry $wl$output_objdir/so_locations -o $lib' + # Try to use the -exported_symbol ld option, if it does not + # work, assume that -exports_file does not work either and + # implicitly export all symbols. + # This should be the same for all languages, so no per-tag cache variable. + AC_CACHE_CHECK([whether the $host_os linker accepts -exported_symbol], + [lt_cv_irix_exported_symbol], + [save_LDFLAGS=$LDFLAGS + LDFLAGS="$LDFLAGS -shared $wl-exported_symbol ${wl}foo $wl-update_registry $wl/dev/null" + AC_LINK_IFELSE( + [AC_LANG_SOURCE( + [AC_LANG_CASE([C], [[int foo (void) { return 0; }]], + [C++], [[int foo (void) { return 0; }]], + [Fortran 77], [[ + subroutine foo + end]], + [Fortran], [[ + subroutine foo + end]])])], + [lt_cv_irix_exported_symbol=yes], + [lt_cv_irix_exported_symbol=no]) + LDFLAGS=$save_LDFLAGS]) + if test yes = "$lt_cv_irix_exported_symbol"; then + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` $wl-update_registry $wl$output_objdir/so_locations $wl-exports_file $wl$export_symbols -o $lib' + fi + _LT_TAGVAR(link_all_deplibs, $1)=no + else + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry $output_objdir/so_locations -exports_file $export_symbols -o $lib' + fi + _LT_TAGVAR(archive_cmds_need_lc, $1)='no' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath $wl$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + _LT_TAGVAR(inherit_rpath, $1)=yes + _LT_TAGVAR(link_all_deplibs, $1)=yes + ;; + + linux*) + case $cc_basename in + tcc*) + # Fabrice Bellard et al's Tiny C Compiler + _LT_TAGVAR(ld_shlibs, $1)=yes + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath $wl$libdir' + ;; + esac + ;; + + netbsd* | netbsdelf*-gnu) + if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then + _LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' # a.out + else + _LT_TAGVAR(archive_cmds, $1)='$LD -shared -o $lib $libobjs $deplibs $linker_flags' # ELF + fi + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + ;; + + newsos6) + _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath $wl$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + ;; + + *nto* | *qnx*) + ;; + + openbsd* | bitrig*) + if test -f /usr/libexec/ld.so; then + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + _LT_TAGVAR(hardcode_direct_absolute, $1)=yes + if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`"; then + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags $wl-retain-symbols-file,$export_symbols' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath,$libdir' + _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl-E' + else + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath,$libdir' + fi + else + _LT_TAGVAR(ld_shlibs, $1)=no + fi + ;; + + os2*) + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' + _LT_TAGVAR(hardcode_minus_L, $1)=yes + _LT_TAGVAR(allow_undefined_flag, $1)=unsupported + shrext_cmds=.dll + _LT_TAGVAR(archive_cmds, $1)='$ECHO "LIBRARY ${soname%$shared_ext} INITINSTANCE TERMINSTANCE" > $output_objdir/$libname.def~ + $ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~ + $ECHO "DATA MULTIPLE NONSHARED" >> $output_objdir/$libname.def~ + $ECHO EXPORTS >> $output_objdir/$libname.def~ + emxexp $libobjs | $SED /"_DLL_InitTerm"/d >> $output_objdir/$libname.def~ + $CC -Zdll -Zcrtdll -o $output_objdir/$soname $libobjs $deplibs $compiler_flags $output_objdir/$libname.def~ + emximp -o $lib $output_objdir/$libname.def' + _LT_TAGVAR(archive_expsym_cmds, $1)='$ECHO "LIBRARY ${soname%$shared_ext} INITINSTANCE TERMINSTANCE" > $output_objdir/$libname.def~ + $ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~ + $ECHO "DATA MULTIPLE NONSHARED" >> $output_objdir/$libname.def~ + $ECHO EXPORTS >> $output_objdir/$libname.def~ + prefix_cmds="$SED"~ + if test EXPORTS = "`$SED 1q $export_symbols`"; then + prefix_cmds="$prefix_cmds -e 1d"; + fi~ + prefix_cmds="$prefix_cmds -e \"s/^\(.*\)$/_\1/g\""~ + cat $export_symbols | $prefix_cmds >> $output_objdir/$libname.def~ + $CC -Zdll -Zcrtdll -o $output_objdir/$soname $libobjs $deplibs $compiler_flags $output_objdir/$libname.def~ + emximp -o $lib $output_objdir/$libname.def' + _LT_TAGVAR(old_archive_From_new_cmds, $1)='emximp -o $output_objdir/${libname}_dll.a $output_objdir/$libname.def' + _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes + _LT_TAGVAR(file_list_spec, $1)='@' + ;; + + osf3*) + if test yes = "$GCC"; then + _LT_TAGVAR(allow_undefined_flag, $1)=' $wl-expect_unresolved $wl\*' + _LT_TAGVAR(archive_cmds, $1)='$CC -shared$allow_undefined_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` $wl-update_registry $wl$output_objdir/so_locations -o $lib' + else + _LT_TAGVAR(allow_undefined_flag, $1)=' -expect_unresolved \*' + _LT_TAGVAR(archive_cmds, $1)='$CC -shared$allow_undefined_flag $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib' + fi + _LT_TAGVAR(archive_cmds_need_lc, $1)='no' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath $wl$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + ;; + + osf4* | osf5*) # as osf3* with the addition of -msym flag + if test yes = "$GCC"; then + _LT_TAGVAR(allow_undefined_flag, $1)=' $wl-expect_unresolved $wl\*' + _LT_TAGVAR(archive_cmds, $1)='$CC -shared$allow_undefined_flag $pic_flag $libobjs $deplibs $compiler_flags $wl-msym $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` $wl-update_registry $wl$output_objdir/so_locations -o $lib' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath $wl$libdir' + else + _LT_TAGVAR(allow_undefined_flag, $1)=' -expect_unresolved \*' + _LT_TAGVAR(archive_cmds, $1)='$CC -shared$allow_undefined_flag $libobjs $deplibs $compiler_flags -msym -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='for i in `cat $export_symbols`; do printf "%s %s\\n" -exported_symbol "\$i" >> $lib.exp; done; printf "%s\\n" "-hidden">> $lib.exp~ + $CC -shared$allow_undefined_flag $wl-input $wl$lib.exp $compiler_flags $libobjs $deplibs -soname $soname `test -n "$verstring" && $ECHO "-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib~$RM $lib.exp' + + # Both c and cxx compiler support -rpath directly + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-rpath $libdir' + fi + _LT_TAGVAR(archive_cmds_need_lc, $1)='no' + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + ;; + + solaris*) + _LT_TAGVAR(no_undefined_flag, $1)=' -z defs' + if test yes = "$GCC"; then + wlarc='$wl' + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $wl-z ${wl}text $wl-h $wl$soname -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ + $CC -shared $pic_flag $wl-z ${wl}text $wl-M $wl$lib.exp $wl-h $wl$soname -o $lib $libobjs $deplibs $compiler_flags~$RM $lib.exp' + else + case `$CC -V 2>&1` in + *"Compilers 5.0"*) + wlarc='' + _LT_TAGVAR(archive_cmds, $1)='$LD -G$allow_undefined_flag -h $soname -o $lib $libobjs $deplibs $linker_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ + $LD -G$allow_undefined_flag -M $lib.exp -h $soname -o $lib $libobjs $deplibs $linker_flags~$RM $lib.exp' + ;; + *) + wlarc='$wl' + _LT_TAGVAR(archive_cmds, $1)='$CC -G$allow_undefined_flag -h $soname -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ + $CC -G$allow_undefined_flag -M $lib.exp -h $soname -o $lib $libobjs $deplibs $compiler_flags~$RM $lib.exp' + ;; + esac + fi + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + case $host_os in + solaris2.[[0-5]] | solaris2.[[0-5]].*) ;; + *) + # The compiler driver will combine and reorder linker options, + # but understands '-z linker_flag'. GCC discards it without '$wl', + # but is careful enough not to reorder. + # Supported since Solaris 2.6 (maybe 2.5.1?) + if test yes = "$GCC"; then + _LT_TAGVAR(whole_archive_flag_spec, $1)='$wl-z ${wl}allextract$convenience $wl-z ${wl}defaultextract' + else + _LT_TAGVAR(whole_archive_flag_spec, $1)='-z allextract$convenience -z defaultextract' + fi + ;; + esac + _LT_TAGVAR(link_all_deplibs, $1)=yes + ;; + + sunos4*) + if test sequent = "$host_vendor"; then + # Use $CC to link under sequent, because it throws in some extra .o + # files that make .init and .fini sections work. + _LT_TAGVAR(archive_cmds, $1)='$CC -G $wl-h $soname -o $lib $libobjs $deplibs $compiler_flags' + else + _LT_TAGVAR(archive_cmds, $1)='$LD -assert pure-text -Bstatic -o $lib $libobjs $deplibs $linker_flags' + fi + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_minus_L, $1)=yes + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + ;; + + sysv4) + case $host_vendor in + sni) + _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + _LT_TAGVAR(hardcode_direct, $1)=yes # is this really true??? + ;; + siemens) + ## LD is ld it makes a PLAMLIB + ## CC just makes a GrossModule. + _LT_TAGVAR(archive_cmds, $1)='$LD -G -o $lib $libobjs $deplibs $linker_flags' + _LT_TAGVAR(reload_cmds, $1)='$CC -r -o $output$reload_objs' + _LT_TAGVAR(hardcode_direct, $1)=no + ;; + motorola) + _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + _LT_TAGVAR(hardcode_direct, $1)=no #Motorola manual says yes, but my tests say they lie + ;; + esac + runpath_var='LD_RUN_PATH' + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + ;; + + sysv4.3*) + _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + _LT_TAGVAR(export_dynamic_flag_spec, $1)='-Bexport' + ;; + + sysv4*MP*) + if test -d /usr/nec; then + _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + runpath_var=LD_RUN_PATH + hardcode_runpath_var=yes + _LT_TAGVAR(ld_shlibs, $1)=yes + fi + ;; + + sysv4*uw2* | sysv5OpenUNIX* | sysv5UnixWare7.[[01]].[[10]]* | unixware7* | sco3.2v5.0.[[024]]*) + _LT_TAGVAR(no_undefined_flag, $1)='$wl-z,text' + _LT_TAGVAR(archive_cmds_need_lc, $1)=no + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + runpath_var='LD_RUN_PATH' + + if test yes = "$GCC"; then + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + else + _LT_TAGVAR(archive_cmds, $1)='$CC -G $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -G $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + fi + ;; + + sysv5* | sco3.2v5* | sco5v6*) + # Note: We CANNOT use -z defs as we might desire, because we do not + # link with -lc, and that would cause any symbols used from libc to + # always be unresolved, which means just about no library would + # ever link correctly. If we're not using GNU ld we use -z text + # though, which does catch some bad symbols but isn't as heavy-handed + # as -z defs. + _LT_TAGVAR(no_undefined_flag, $1)='$wl-z,text' + _LT_TAGVAR(allow_undefined_flag, $1)='$wl-z,nodefs' + _LT_TAGVAR(archive_cmds_need_lc, $1)=no + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-R,$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=':' + _LT_TAGVAR(link_all_deplibs, $1)=yes + _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl-Bexport' + runpath_var='LD_RUN_PATH' + + if test yes = "$GCC"; then + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + else + _LT_TAGVAR(archive_cmds, $1)='$CC -G $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -G $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + fi + ;; + + uts4*) + _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + ;; + + *) + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + esac + + if test sni = "$host_vendor"; then + case $host in + sysv4 | sysv4.2uw2* | sysv4.3* | sysv5*) + _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl-Blargedynsym' + ;; + esac + fi + fi +]) +AC_MSG_RESULT([$_LT_TAGVAR(ld_shlibs, $1)]) +test no = "$_LT_TAGVAR(ld_shlibs, $1)" && can_build_shared=no + +_LT_TAGVAR(with_gnu_ld, $1)=$with_gnu_ld + +_LT_DECL([], [libext], [0], [Old archive suffix (normally "a")])dnl +_LT_DECL([], [shrext_cmds], [1], [Shared library suffix (normally ".so")])dnl +_LT_DECL([], [extract_expsyms_cmds], [2], + [The commands to extract the exported symbol list from a shared archive]) + +# +# Do we need to explicitly link libc? +# +case "x$_LT_TAGVAR(archive_cmds_need_lc, $1)" in +x|xyes) + # Assume -lc should be added + _LT_TAGVAR(archive_cmds_need_lc, $1)=yes + + if test yes,yes = "$GCC,$enable_shared"; then + case $_LT_TAGVAR(archive_cmds, $1) in + *'~'*) + # FIXME: we may have to deal with multi-command sequences. + ;; + '$CC '*) + # Test whether the compiler implicitly links with -lc since on some + # systems, -lgcc has to come before -lc. If gcc already passes -lc + # to ld, don't add -lc before -lgcc. + AC_CACHE_CHECK([whether -lc should be explicitly linked in], + [lt_cv_]_LT_TAGVAR(archive_cmds_need_lc, $1), + [$RM conftest* + echo "$lt_simple_compile_test_code" > conftest.$ac_ext + + if AC_TRY_EVAL(ac_compile) 2>conftest.err; then + soname=conftest + lib=conftest + libobjs=conftest.$ac_objext + deplibs= + wl=$_LT_TAGVAR(lt_prog_compiler_wl, $1) + pic_flag=$_LT_TAGVAR(lt_prog_compiler_pic, $1) + compiler_flags=-v + linker_flags=-v + verstring= + output_objdir=. + libname=conftest + lt_save_allow_undefined_flag=$_LT_TAGVAR(allow_undefined_flag, $1) + _LT_TAGVAR(allow_undefined_flag, $1)= + if AC_TRY_EVAL(_LT_TAGVAR(archive_cmds, $1) 2\>\&1 \| $GREP \" -lc \" \>/dev/null 2\>\&1) + then + lt_cv_[]_LT_TAGVAR(archive_cmds_need_lc, $1)=no + else + lt_cv_[]_LT_TAGVAR(archive_cmds_need_lc, $1)=yes + fi + _LT_TAGVAR(allow_undefined_flag, $1)=$lt_save_allow_undefined_flag + else + cat conftest.err 1>&5 + fi + $RM conftest* + ]) + _LT_TAGVAR(archive_cmds_need_lc, $1)=$lt_cv_[]_LT_TAGVAR(archive_cmds_need_lc, $1) + ;; + esac + fi + ;; +esac + +_LT_TAGDECL([build_libtool_need_lc], [archive_cmds_need_lc], [0], + [Whether or not to add -lc for building shared libraries]) +_LT_TAGDECL([allow_libtool_libs_with_static_runtimes], + [enable_shared_with_static_runtimes], [0], + [Whether or not to disallow shared libs when runtime libs are static]) +_LT_TAGDECL([], [export_dynamic_flag_spec], [1], + [Compiler flag to allow reflexive dlopens]) +_LT_TAGDECL([], [whole_archive_flag_spec], [1], + [Compiler flag to generate shared objects directly from archives]) +_LT_TAGDECL([], [compiler_needs_object], [1], + [Whether the compiler copes with passing no objects directly]) +_LT_TAGDECL([], [old_archive_from_new_cmds], [2], + [Create an old-style archive from a shared archive]) +_LT_TAGDECL([], [old_archive_from_expsyms_cmds], [2], + [Create a temporary old-style archive to link instead of a shared archive]) +_LT_TAGDECL([], [archive_cmds], [2], [Commands used to build a shared archive]) +_LT_TAGDECL([], [archive_expsym_cmds], [2]) +_LT_TAGDECL([], [module_cmds], [2], + [Commands used to build a loadable module if different from building + a shared archive.]) +_LT_TAGDECL([], [module_expsym_cmds], [2]) +_LT_TAGDECL([], [with_gnu_ld], [1], + [Whether we are building with GNU ld or not]) +_LT_TAGDECL([], [allow_undefined_flag], [1], + [Flag that allows shared libraries with undefined symbols to be built]) +_LT_TAGDECL([], [no_undefined_flag], [1], + [Flag that enforces no undefined symbols]) +_LT_TAGDECL([], [hardcode_libdir_flag_spec], [1], + [Flag to hardcode $libdir into a binary during linking. + This must work even if $libdir does not exist]) +_LT_TAGDECL([], [hardcode_libdir_separator], [1], + [Whether we need a single "-rpath" flag with a separated argument]) +_LT_TAGDECL([], [hardcode_direct], [0], + [Set to "yes" if using DIR/libNAME$shared_ext during linking hardcodes + DIR into the resulting binary]) +_LT_TAGDECL([], [hardcode_direct_absolute], [0], + [Set to "yes" if using DIR/libNAME$shared_ext during linking hardcodes + DIR into the resulting binary and the resulting library dependency is + "absolute", i.e impossible to change by setting $shlibpath_var if the + library is relocated]) +_LT_TAGDECL([], [hardcode_minus_L], [0], + [Set to "yes" if using the -LDIR flag during linking hardcodes DIR + into the resulting binary]) +_LT_TAGDECL([], [hardcode_shlibpath_var], [0], + [Set to "yes" if using SHLIBPATH_VAR=DIR during linking hardcodes DIR + into the resulting binary]) +_LT_TAGDECL([], [hardcode_automatic], [0], + [Set to "yes" if building a shared library automatically hardcodes DIR + into the library and all subsequent libraries and executables linked + against it]) +_LT_TAGDECL([], [inherit_rpath], [0], + [Set to yes if linker adds runtime paths of dependent libraries + to runtime path list]) +_LT_TAGDECL([], [link_all_deplibs], [0], + [Whether libtool must link a program against all its dependency libraries]) +_LT_TAGDECL([], [always_export_symbols], [0], + [Set to "yes" if exported symbols are required]) +_LT_TAGDECL([], [export_symbols_cmds], [2], + [The commands to list exported symbols]) +_LT_TAGDECL([], [exclude_expsyms], [1], + [Symbols that should not be listed in the preloaded symbols]) +_LT_TAGDECL([], [include_expsyms], [1], + [Symbols that must always be exported]) +_LT_TAGDECL([], [prelink_cmds], [2], + [Commands necessary for linking programs (against libraries) with templates]) +_LT_TAGDECL([], [postlink_cmds], [2], + [Commands necessary for finishing linking programs]) +_LT_TAGDECL([], [file_list_spec], [1], + [Specify filename containing input files]) +dnl FIXME: Not yet implemented +dnl _LT_TAGDECL([], [thread_safe_flag_spec], [1], +dnl [Compiler flag to generate thread safe objects]) +])# _LT_LINKER_SHLIBS + + +# _LT_LANG_C_CONFIG([TAG]) +# ------------------------ +# Ensure that the configuration variables for a C compiler are suitably +# defined. These variables are subsequently used by _LT_CONFIG to write +# the compiler configuration to 'libtool'. +m4_defun([_LT_LANG_C_CONFIG], +[m4_require([_LT_DECL_EGREP])dnl +lt_save_CC=$CC +AC_LANG_PUSH(C) + +# Source file extension for C test sources. +ac_ext=c + +# Object file extension for compiled C test sources. +objext=o +_LT_TAGVAR(objext, $1)=$objext + +# Code to be used in simple compile tests +lt_simple_compile_test_code="int some_variable = 0;" + +# Code to be used in simple link tests +lt_simple_link_test_code='int main(){return(0);}' + +_LT_TAG_COMPILER +# Save the default compiler, since it gets overwritten when the other +# tags are being tested, and _LT_TAGVAR(compiler, []) is a NOP. +compiler_DEFAULT=$CC + +# save warnings/boilerplate of simple test code +_LT_COMPILER_BOILERPLATE +_LT_LINKER_BOILERPLATE + +## CAVEAT EMPTOR: +## There is no encapsulation within the following macros, do not change +## the running order or otherwise move them around unless you know exactly +## what you are doing... +if test -n "$compiler"; then + _LT_COMPILER_NO_RTTI($1) + _LT_COMPILER_PIC($1) + _LT_COMPILER_C_O($1) + _LT_COMPILER_FILE_LOCKS($1) + _LT_LINKER_SHLIBS($1) + _LT_SYS_DYNAMIC_LINKER($1) + _LT_LINKER_HARDCODE_LIBPATH($1) + LT_SYS_DLOPEN_SELF + _LT_CMD_STRIPLIB + + # Report what library types will actually be built + AC_MSG_CHECKING([if libtool supports shared libraries]) + AC_MSG_RESULT([$can_build_shared]) + + AC_MSG_CHECKING([whether to build shared libraries]) + test no = "$can_build_shared" && enable_shared=no + + # On AIX, shared libraries and static libraries use the same namespace, and + # are all built from PIC. + case $host_os in + aix3*) + test yes = "$enable_shared" && enable_static=no + if test -n "$RANLIB"; then + archive_cmds="$archive_cmds~\$RANLIB \$lib" + postinstall_cmds='$RANLIB $lib' + fi + ;; + + aix[[4-9]]*) + if test ia64 != "$host_cpu"; then + case $enable_shared,$with_aix_soname,$aix_use_runtimelinking in + yes,aix,yes) ;; # shared object as lib.so file only + yes,svr4,*) ;; # shared object as lib.so archive member only + yes,*) enable_static=no ;; # shared object in lib.a archive as well + esac + fi + ;; + esac + AC_MSG_RESULT([$enable_shared]) + + AC_MSG_CHECKING([whether to build static libraries]) + # Make sure either enable_shared or enable_static is yes. + test yes = "$enable_shared" || enable_static=yes + AC_MSG_RESULT([$enable_static]) + + _LT_CONFIG($1) +fi +AC_LANG_POP +CC=$lt_save_CC +])# _LT_LANG_C_CONFIG + + +# _LT_LANG_CXX_CONFIG([TAG]) +# -------------------------- +# Ensure that the configuration variables for a C++ compiler are suitably +# defined. These variables are subsequently used by _LT_CONFIG to write +# the compiler configuration to 'libtool'. +m4_defun([_LT_LANG_CXX_CONFIG], +[m4_require([_LT_FILEUTILS_DEFAULTS])dnl +m4_require([_LT_DECL_EGREP])dnl +m4_require([_LT_PATH_MANIFEST_TOOL])dnl +if test -n "$CXX" && ( test no != "$CXX" && + ( (test g++ = "$CXX" && `g++ -v >/dev/null 2>&1` ) || + (test g++ != "$CXX"))); then + AC_PROG_CXXCPP +else + _lt_caught_CXX_error=yes +fi + +AC_LANG_PUSH(C++) +_LT_TAGVAR(archive_cmds_need_lc, $1)=no +_LT_TAGVAR(allow_undefined_flag, $1)= +_LT_TAGVAR(always_export_symbols, $1)=no +_LT_TAGVAR(archive_expsym_cmds, $1)= +_LT_TAGVAR(compiler_needs_object, $1)=no +_LT_TAGVAR(export_dynamic_flag_spec, $1)= +_LT_TAGVAR(hardcode_direct, $1)=no +_LT_TAGVAR(hardcode_direct_absolute, $1)=no +_LT_TAGVAR(hardcode_libdir_flag_spec, $1)= +_LT_TAGVAR(hardcode_libdir_separator, $1)= +_LT_TAGVAR(hardcode_minus_L, $1)=no +_LT_TAGVAR(hardcode_shlibpath_var, $1)=unsupported +_LT_TAGVAR(hardcode_automatic, $1)=no +_LT_TAGVAR(inherit_rpath, $1)=no +_LT_TAGVAR(module_cmds, $1)= +_LT_TAGVAR(module_expsym_cmds, $1)= +_LT_TAGVAR(link_all_deplibs, $1)=unknown +_LT_TAGVAR(old_archive_cmds, $1)=$old_archive_cmds +_LT_TAGVAR(reload_flag, $1)=$reload_flag +_LT_TAGVAR(reload_cmds, $1)=$reload_cmds +_LT_TAGVAR(no_undefined_flag, $1)= +_LT_TAGVAR(whole_archive_flag_spec, $1)= +_LT_TAGVAR(enable_shared_with_static_runtimes, $1)=no + +# Source file extension for C++ test sources. +ac_ext=cpp + +# Object file extension for compiled C++ test sources. +objext=o +_LT_TAGVAR(objext, $1)=$objext + +# No sense in running all these tests if we already determined that +# the CXX compiler isn't working. Some variables (like enable_shared) +# are currently assumed to apply to all compilers on this platform, +# and will be corrupted by setting them based on a non-working compiler. +if test yes != "$_lt_caught_CXX_error"; then + # Code to be used in simple compile tests + lt_simple_compile_test_code="int some_variable = 0;" + + # Code to be used in simple link tests + lt_simple_link_test_code='int main(int, char *[[]]) { return(0); }' + + # ltmain only uses $CC for tagged configurations so make sure $CC is set. + _LT_TAG_COMPILER + + # save warnings/boilerplate of simple test code + _LT_COMPILER_BOILERPLATE + _LT_LINKER_BOILERPLATE + + # Allow CC to be a program name with arguments. + lt_save_CC=$CC + lt_save_CFLAGS=$CFLAGS + lt_save_LD=$LD + lt_save_GCC=$GCC + GCC=$GXX + lt_save_with_gnu_ld=$with_gnu_ld + lt_save_path_LD=$lt_cv_path_LD + if test -n "${lt_cv_prog_gnu_ldcxx+set}"; then + lt_cv_prog_gnu_ld=$lt_cv_prog_gnu_ldcxx + else + $as_unset lt_cv_prog_gnu_ld + fi + if test -n "${lt_cv_path_LDCXX+set}"; then + lt_cv_path_LD=$lt_cv_path_LDCXX + else + $as_unset lt_cv_path_LD + fi + test -z "${LDCXX+set}" || LD=$LDCXX + CC=${CXX-"c++"} + CFLAGS=$CXXFLAGS + compiler=$CC + _LT_TAGVAR(compiler, $1)=$CC + _LT_CC_BASENAME([$compiler]) + + if test -n "$compiler"; then + # We don't want -fno-exception when compiling C++ code, so set the + # no_builtin_flag separately + if test yes = "$GXX"; then + _LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)=' -fno-builtin' + else + _LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)= + fi + + if test yes = "$GXX"; then + # Set up default GNU C++ configuration + + LT_PATH_LD + + # Check if GNU C++ uses GNU ld as the underlying linker, since the + # archiving commands below assume that GNU ld is being used. + if test yes = "$with_gnu_ld"; then + _LT_TAGVAR(archive_cmds, $1)='$CC $pic_flag -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC $pic_flag -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' + + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath $wl$libdir' + _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl--export-dynamic' + + # If archive_cmds runs LD, not CC, wlarc should be empty + # XXX I think wlarc can be eliminated in ltcf-cxx, but I need to + # investigate it a little bit more. (MM) + wlarc='$wl' + + # ancient GNU ld didn't support --whole-archive et. al. + if eval "`$CC -print-prog-name=ld` --help 2>&1" | + $GREP 'no-whole-archive' > /dev/null; then + _LT_TAGVAR(whole_archive_flag_spec, $1)=$wlarc'--whole-archive$convenience '$wlarc'--no-whole-archive' + else + _LT_TAGVAR(whole_archive_flag_spec, $1)= + fi + else + with_gnu_ld=no + wlarc= + + # A generic and very simple default shared library creation + # command for GNU C++ for the case where it uses the native + # linker, instead of GNU ld. If possible, this setting should + # overridden to take advantage of the native linker features on + # the platform it is being used on. + _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $lib' + fi + + # Commands to make compiler produce verbose output that lists + # what "hidden" libraries, object files and flags are used when + # linking a shared library. + output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP " \-L"' + + else + GXX=no + with_gnu_ld=no + wlarc= + fi + + # PORTME: fill in a description of your system's C++ link characteristics + AC_MSG_CHECKING([whether the $compiler linker ($LD) supports shared libraries]) + _LT_TAGVAR(ld_shlibs, $1)=yes + case $host_os in + aix3*) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + aix[[4-9]]*) + if test ia64 = "$host_cpu"; then + # On IA64, the linker does run time linking by default, so we don't + # have to do anything special. + aix_use_runtimelinking=no + exp_sym_flag='-Bexport' + no_entry_flag= + else + aix_use_runtimelinking=no + + # Test if we are trying to use run time linking or normal + # AIX style linking. If -brtl is somewhere in LDFLAGS, we + # have runtime linking enabled, and use it for executables. + # For shared libraries, we enable/disable runtime linking + # depending on the kind of the shared library created - + # when "with_aix_soname,aix_use_runtimelinking" is: + # "aix,no" lib.a(lib.so.V) shared, rtl:no, for executables + # "aix,yes" lib.so shared, rtl:yes, for executables + # lib.a static archive + # "both,no" lib.so.V(shr.o) shared, rtl:yes + # lib.a(lib.so.V) shared, rtl:no, for executables + # "both,yes" lib.so.V(shr.o) shared, rtl:yes, for executables + # lib.a(lib.so.V) shared, rtl:no + # "svr4,*" lib.so.V(shr.o) shared, rtl:yes, for executables + # lib.a static archive + case $host_os in aix4.[[23]]|aix4.[[23]].*|aix[[5-9]]*) + for ld_flag in $LDFLAGS; do + case $ld_flag in + *-brtl*) + aix_use_runtimelinking=yes + break + ;; + esac + done + if test svr4,no = "$with_aix_soname,$aix_use_runtimelinking"; then + # With aix-soname=svr4, we create the lib.so.V shared archives only, + # so we don't have lib.a shared libs to link our executables. + # We have to force runtime linking in this case. + aix_use_runtimelinking=yes + LDFLAGS="$LDFLAGS -Wl,-brtl" + fi + ;; + esac + + exp_sym_flag='-bexport' + no_entry_flag='-bnoentry' + fi + + # When large executables or shared objects are built, AIX ld can + # have problems creating the table of contents. If linking a library + # or program results in "error TOC overflow" add -mminimal-toc to + # CXXFLAGS/CFLAGS for g++/gcc. In the cases where that is not + # enough to fix the problem, add -Wl,-bbigtoc to LDFLAGS. + + _LT_TAGVAR(archive_cmds, $1)='' + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_direct_absolute, $1)=yes + _LT_TAGVAR(hardcode_libdir_separator, $1)=':' + _LT_TAGVAR(link_all_deplibs, $1)=yes + _LT_TAGVAR(file_list_spec, $1)='$wl-f,' + case $with_aix_soname,$aix_use_runtimelinking in + aix,*) ;; # no import file + svr4,* | *,yes) # use import file + # The Import File defines what to hardcode. + _LT_TAGVAR(hardcode_direct, $1)=no + _LT_TAGVAR(hardcode_direct_absolute, $1)=no + ;; + esac + + if test yes = "$GXX"; then + case $host_os in aix4.[[012]]|aix4.[[012]].*) + # We only want to do this on AIX 4.2 and lower, the check + # below for broken collect2 doesn't work under 4.3+ + collect2name=`$CC -print-prog-name=collect2` + if test -f "$collect2name" && + strings "$collect2name" | $GREP resolve_lib_name >/dev/null + then + # We have reworked collect2 + : + else + # We have old collect2 + _LT_TAGVAR(hardcode_direct, $1)=unsupported + # It fails to find uninstalled libraries when the uninstalled + # path is not listed in the libpath. Setting hardcode_minus_L + # to unsupported forces relinking + _LT_TAGVAR(hardcode_minus_L, $1)=yes + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)= + fi + esac + shared_flag='-shared' + if test yes = "$aix_use_runtimelinking"; then + shared_flag=$shared_flag' $wl-G' + fi + # Need to ensure runtime linking is disabled for the traditional + # shared library, or the linker may eventually find shared libraries + # /with/ Import File - we do not want to mix them. + shared_flag_aix='-shared' + shared_flag_svr4='-shared $wl-G' + else + # not using gcc + if test ia64 = "$host_cpu"; then + # VisualAge C++, Version 5.5 for AIX 5L for IA-64, Beta 3 Release + # chokes on -Wl,-G. The following line is correct: + shared_flag='-G' + else + if test yes = "$aix_use_runtimelinking"; then + shared_flag='$wl-G' + else + shared_flag='$wl-bM:SRE' + fi + shared_flag_aix='$wl-bM:SRE' + shared_flag_svr4='$wl-G' + fi + fi + + _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl-bexpall' + # It seems that -bexpall does not export symbols beginning with + # underscore (_), so it is better to generate a list of symbols to + # export. + _LT_TAGVAR(always_export_symbols, $1)=yes + if test aix,yes = "$with_aix_soname,$aix_use_runtimelinking"; then + # Warning - without using the other runtime loading flags (-brtl), + # -berok will link without error, but may produce a broken library. + # The "-G" linker flag allows undefined symbols. + _LT_TAGVAR(no_undefined_flag, $1)='-bernotok' + # Determine the default libpath from the value encoded in an empty + # executable. + _LT_SYS_MODULE_PATH_AIX([$1]) + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-blibpath:$libdir:'"$aix_libpath" + + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -o $output_objdir/$soname $libobjs $deplibs $wl'$no_entry_flag' $compiler_flags `if test -n "$allow_undefined_flag"; then func_echo_all "$wl$allow_undefined_flag"; else :; fi` $wl'$exp_sym_flag:\$export_symbols' '$shared_flag + else + if test ia64 = "$host_cpu"; then + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-R $libdir:/usr/lib:/lib' + _LT_TAGVAR(allow_undefined_flag, $1)="-z nodefs" + _LT_TAGVAR(archive_expsym_cmds, $1)="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs '"\$wl$no_entry_flag"' $compiler_flags $wl$allow_undefined_flag '"\$wl$exp_sym_flag:\$export_symbols" + else + # Determine the default libpath from the value encoded in an + # empty executable. + _LT_SYS_MODULE_PATH_AIX([$1]) + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-blibpath:$libdir:'"$aix_libpath" + # Warning - without using the other run time loading flags, + # -berok will link without error, but may produce a broken library. + _LT_TAGVAR(no_undefined_flag, $1)=' $wl-bernotok' + _LT_TAGVAR(allow_undefined_flag, $1)=' $wl-berok' + if test yes = "$with_gnu_ld"; then + # We only use this code for GNU lds that support --whole-archive. + _LT_TAGVAR(whole_archive_flag_spec, $1)='$wl--whole-archive$convenience $wl--no-whole-archive' + else + # Exported symbols can be pulled into shared objects from archives + _LT_TAGVAR(whole_archive_flag_spec, $1)='$convenience' + fi + _LT_TAGVAR(archive_cmds_need_lc, $1)=yes + _LT_TAGVAR(archive_expsym_cmds, $1)='$RM -r $output_objdir/$realname.d~$MKDIR $output_objdir/$realname.d' + # -brtl affects multiple linker settings, -berok does not and is overridden later + compiler_flags_filtered='`func_echo_all "$compiler_flags " | $SED -e "s%-brtl\\([[, ]]\\)%-berok\\1%g"`' + if test svr4 != "$with_aix_soname"; then + # This is similar to how AIX traditionally builds its shared + # libraries. Need -bnortl late, we may have -brtl in LDFLAGS. + _LT_TAGVAR(archive_expsym_cmds, $1)="$_LT_TAGVAR(archive_expsym_cmds, $1)"'~$CC '$shared_flag_aix' -o $output_objdir/$realname.d/$soname $libobjs $deplibs $wl-bnoentry '$compiler_flags_filtered'$wl-bE:$export_symbols$allow_undefined_flag~$AR $AR_FLAGS $output_objdir/$libname$release.a $output_objdir/$realname.d/$soname' + fi + if test aix != "$with_aix_soname"; then + _LT_TAGVAR(archive_expsym_cmds, $1)="$_LT_TAGVAR(archive_expsym_cmds, $1)"'~$CC '$shared_flag_svr4' -o $output_objdir/$realname.d/$shared_archive_member_spec.o $libobjs $deplibs $wl-bnoentry '$compiler_flags_filtered'$wl-bE:$export_symbols$allow_undefined_flag~$STRIP -e $output_objdir/$realname.d/$shared_archive_member_spec.o~( func_echo_all "#! $soname($shared_archive_member_spec.o)"; if test shr_64 = "$shared_archive_member_spec"; then func_echo_all "# 64"; else func_echo_all "# 32"; fi; cat $export_symbols ) > $output_objdir/$realname.d/$shared_archive_member_spec.imp~$AR $AR_FLAGS $output_objdir/$soname $output_objdir/$realname.d/$shared_archive_member_spec.o $output_objdir/$realname.d/$shared_archive_member_spec.imp' + else + # used by -dlpreopen to get the symbols + _LT_TAGVAR(archive_expsym_cmds, $1)="$_LT_TAGVAR(archive_expsym_cmds, $1)"'~$MV $output_objdir/$realname.d/$soname $output_objdir' + fi + _LT_TAGVAR(archive_expsym_cmds, $1)="$_LT_TAGVAR(archive_expsym_cmds, $1)"'~$RM -r $output_objdir/$realname.d' + fi + fi + ;; + + beos*) + if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then + _LT_TAGVAR(allow_undefined_flag, $1)=unsupported + # Joseph Beckenbach says some releases of gcc + # support --undefined. This deserves some investigation. FIXME + _LT_TAGVAR(archive_cmds, $1)='$CC -nostart $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + else + _LT_TAGVAR(ld_shlibs, $1)=no + fi + ;; + + chorus*) + case $cc_basename in + *) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + esac + ;; + + cygwin* | mingw* | pw32* | cegcc*) + case $GXX,$cc_basename in + ,cl* | no,cl* | ,icl* | no,icl*) + # Native MSVC or ICC + # hardcode_libdir_flag_spec is actually meaningless, as there is + # no search path for DLLs. + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)=' ' + _LT_TAGVAR(allow_undefined_flag, $1)=unsupported + _LT_TAGVAR(always_export_symbols, $1)=yes + _LT_TAGVAR(file_list_spec, $1)='@' + # Tell ltmain to make .lib files, not .a files. + libext=lib + # Tell ltmain to make .dll files, not .so files. + shrext_cmds=.dll + # FIXME: Setting linknames here is a bad hack. + _LT_TAGVAR(archive_cmds, $1)='$CC -o $output_objdir/$soname $libobjs $compiler_flags $deplibs -Wl,-DLL,-IMPLIB:"$tool_output_objdir$libname.dll.lib"~linknames=' + _LT_TAGVAR(archive_expsym_cmds, $1)='if _LT_DLL_DEF_P([$export_symbols]); then + cp "$export_symbols" "$output_objdir/$soname.def"; + echo "$tool_output_objdir$soname.def" > "$output_objdir/$soname.exp"; + else + $SED -e '\''s/^/-link -EXPORT:/'\'' < $export_symbols > $output_objdir/$soname.exp; + fi~ + $CC -o $tool_output_objdir$soname $libobjs $compiler_flags $deplibs "@$tool_output_objdir$soname.exp" -Wl,-DLL,-IMPLIB:"$tool_output_objdir$libname.dll.lib"~ + linknames=' + # The linker will not automatically build a static lib if we build a DLL. + # _LT_TAGVAR(old_archive_from_new_cmds, $1)='true' + _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes + # Don't use ranlib + _LT_TAGVAR(old_postinstall_cmds, $1)='chmod 644 $oldlib' + _LT_TAGVAR(postlink_cmds, $1)='lt_outputfile="@OUTPUT@"~ + lt_tool_outputfile="@TOOL_OUTPUT@"~ + case $lt_outputfile in + *.exe|*.EXE) ;; + *) + lt_outputfile=$lt_outputfile.exe + lt_tool_outputfile=$lt_tool_outputfile.exe + ;; + esac~ + func_to_tool_file "$lt_outputfile"~ + if test : != "$MANIFEST_TOOL" && test -f "$lt_outputfile.manifest"; then + $MANIFEST_TOOL -manifest "$lt_tool_outputfile.manifest" -outputresource:"$lt_tool_outputfile" || exit 1; + $RM "$lt_outputfile.manifest"; + fi' + ;; + *) + # g++ + # _LT_TAGVAR(hardcode_libdir_flag_spec, $1) is actually meaningless, + # as there is no search path for DLLs. + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' + _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl--export-all-symbols' + _LT_TAGVAR(allow_undefined_flag, $1)=unsupported + _LT_TAGVAR(always_export_symbols, $1)=no + _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes + + if $LD --help 2>&1 | $GREP 'auto-import' > /dev/null; then + _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $output_objdir/$soname $wl--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' + # If the export-symbols file already is a .def file, use it as + # is; otherwise, prepend EXPORTS... + _LT_TAGVAR(archive_expsym_cmds, $1)='if _LT_DLL_DEF_P([$export_symbols]); then + cp $export_symbols $output_objdir/$soname.def; + else + echo EXPORTS > $output_objdir/$soname.def; + cat $export_symbols >> $output_objdir/$soname.def; + fi~ + $CC -shared -nostdlib $output_objdir/$soname.def $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $output_objdir/$soname $wl--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' + else + _LT_TAGVAR(ld_shlibs, $1)=no + fi + ;; + esac + ;; + darwin* | rhapsody*) + _LT_DARWIN_LINKER_FEATURES($1) + ;; + + os2*) + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' + _LT_TAGVAR(hardcode_minus_L, $1)=yes + _LT_TAGVAR(allow_undefined_flag, $1)=unsupported + shrext_cmds=.dll + _LT_TAGVAR(archive_cmds, $1)='$ECHO "LIBRARY ${soname%$shared_ext} INITINSTANCE TERMINSTANCE" > $output_objdir/$libname.def~ + $ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~ + $ECHO "DATA MULTIPLE NONSHARED" >> $output_objdir/$libname.def~ + $ECHO EXPORTS >> $output_objdir/$libname.def~ + emxexp $libobjs | $SED /"_DLL_InitTerm"/d >> $output_objdir/$libname.def~ + $CC -Zdll -Zcrtdll -o $output_objdir/$soname $libobjs $deplibs $compiler_flags $output_objdir/$libname.def~ + emximp -o $lib $output_objdir/$libname.def' + _LT_TAGVAR(archive_expsym_cmds, $1)='$ECHO "LIBRARY ${soname%$shared_ext} INITINSTANCE TERMINSTANCE" > $output_objdir/$libname.def~ + $ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~ + $ECHO "DATA MULTIPLE NONSHARED" >> $output_objdir/$libname.def~ + $ECHO EXPORTS >> $output_objdir/$libname.def~ + prefix_cmds="$SED"~ + if test EXPORTS = "`$SED 1q $export_symbols`"; then + prefix_cmds="$prefix_cmds -e 1d"; + fi~ + prefix_cmds="$prefix_cmds -e \"s/^\(.*\)$/_\1/g\""~ + cat $export_symbols | $prefix_cmds >> $output_objdir/$libname.def~ + $CC -Zdll -Zcrtdll -o $output_objdir/$soname $libobjs $deplibs $compiler_flags $output_objdir/$libname.def~ + emximp -o $lib $output_objdir/$libname.def' + _LT_TAGVAR(old_archive_From_new_cmds, $1)='emximp -o $output_objdir/${libname}_dll.a $output_objdir/$libname.def' + _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes + _LT_TAGVAR(file_list_spec, $1)='@' + ;; + + dgux*) + case $cc_basename in + ec++*) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + ghcx*) + # Green Hills C++ Compiler + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + *) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + esac + ;; + + freebsd2.*) + # C++ shared libraries reported to be fairly broken before + # switch to ELF + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + + freebsd-elf*) + _LT_TAGVAR(archive_cmds_need_lc, $1)=no + ;; + + freebsd* | dragonfly* | midnightbsd*) + # FreeBSD 3 and later use GNU C++ and GNU ld with standard ELF + # conventions + _LT_TAGVAR(ld_shlibs, $1)=yes + ;; + + haiku*) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + _LT_TAGVAR(link_all_deplibs, $1)=yes + ;; + + hpux9*) + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl+b $wl$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl-E' + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_minus_L, $1)=yes # Not in the search PATH, + # but as the default + # location of the library. + + case $cc_basename in + CC*) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + aCC*) + _LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/$soname~$CC -b $wl+b $wl$install_libdir -o $output_objdir/$soname $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~test "x$output_objdir/$soname" = "x$lib" || mv $output_objdir/$soname $lib' + # Commands to make compiler produce verbose output that lists + # what "hidden" libraries, object files and flags are used when + # linking a shared library. + # + # There doesn't appear to be a way to prevent this compiler from + # explicitly linking system object files so we need to strip them + # from the output so that they don't get included in the library + # dependencies. + output_verbose_link_cmd='templist=`($CC -b $CFLAGS -v conftest.$objext 2>&1) | $EGREP " \-L"`; list= ; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"' + ;; + *) + if test yes = "$GXX"; then + _LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/$soname~$CC -shared -nostdlib $pic_flag $wl+b $wl$install_libdir -o $output_objdir/$soname $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~test "x$output_objdir/$soname" = "x$lib" || mv $output_objdir/$soname $lib' + else + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + fi + ;; + esac + ;; + + hpux10*|hpux11*) + if test no = "$with_gnu_ld"; then + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl+b $wl$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + + case $host_cpu in + hppa*64*|ia64*) + ;; + *) + _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl-E' + ;; + esac + fi + case $host_cpu in + hppa*64*|ia64*) + _LT_TAGVAR(hardcode_direct, $1)=no + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + ;; + *) + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_direct_absolute, $1)=yes + _LT_TAGVAR(hardcode_minus_L, $1)=yes # Not in the search PATH, + # but as the default + # location of the library. + ;; + esac + + case $cc_basename in + CC*) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + aCC*) + case $host_cpu in + hppa*64*) + _LT_TAGVAR(archive_cmds, $1)='$CC -b $wl+h $wl$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' + ;; + ia64*) + _LT_TAGVAR(archive_cmds, $1)='$CC -b $wl+h $wl$soname $wl+nodefaultrpath -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' + ;; + *) + _LT_TAGVAR(archive_cmds, $1)='$CC -b $wl+h $wl$soname $wl+b $wl$install_libdir -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' + ;; + esac + # Commands to make compiler produce verbose output that lists + # what "hidden" libraries, object files and flags are used when + # linking a shared library. + # + # There doesn't appear to be a way to prevent this compiler from + # explicitly linking system object files so we need to strip them + # from the output so that they don't get included in the library + # dependencies. + output_verbose_link_cmd='templist=`($CC -b $CFLAGS -v conftest.$objext 2>&1) | $GREP " \-L"`; list= ; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"' + ;; + *) + if test yes = "$GXX"; then + if test no = "$with_gnu_ld"; then + case $host_cpu in + hppa*64*) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib -fPIC $wl+h $wl$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' + ;; + ia64*) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib $pic_flag $wl+h $wl$soname $wl+nodefaultrpath -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' + ;; + *) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib $pic_flag $wl+h $wl$soname $wl+b $wl$install_libdir -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' + ;; + esac + fi + else + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + fi + ;; + esac + ;; + + interix[[3-9]]*) + _LT_TAGVAR(hardcode_direct, $1)=no + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath,$libdir' + _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl-E' + # Hack: On Interix 3.x, we cannot compile PIC because of a broken gcc. + # Instead, shared libraries are loaded at an image base (0x10000000 by + # default) and relocated if they conflict, which is a slow very memory + # consuming and fragmenting process. To avoid this, we pick a random, + # 256 KiB-aligned image base between 0x50000000 and 0x6FFC0000 at link + # time. Moving up from 0x10000000 also allows more sbrk(2) space. + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-h,$soname $wl--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='$SED "s|^|_|" $export_symbols >$output_objdir/$soname.expsym~$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-h,$soname $wl--retain-symbols-file,$output_objdir/$soname.expsym $wl--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' + ;; + irix5* | irix6*) + case $cc_basename in + CC*) + # SGI C++ + _LT_TAGVAR(archive_cmds, $1)='$CC -shared -all -multigot $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib' + + # Archives containing C++ object files must be created using + # "CC -ar", where "CC" is the IRIX C++ compiler. This is + # necessary to make sure instantiated templates are included + # in the archive. + _LT_TAGVAR(old_archive_cmds, $1)='$CC -ar -WR,-u -o $oldlib $oldobjs' + ;; + *) + if test yes = "$GXX"; then + if test no = "$with_gnu_ld"; then + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` $wl-update_registry $wl$output_objdir/so_locations -o $lib' + else + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` -o $lib' + fi + fi + _LT_TAGVAR(link_all_deplibs, $1)=yes + ;; + esac + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath $wl$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + _LT_TAGVAR(inherit_rpath, $1)=yes + ;; + + linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*) + case $cc_basename in + KCC*) + # Kuck and Associates, Inc. (KAI) C++ Compiler + + # KCC will only create a shared library if the output file + # ends with ".so" (or ".sl" for HP-UX), so rename the library + # to its proper name (with version) after linking. + _LT_TAGVAR(archive_cmds, $1)='tempext=`echo $shared_ext | $SED -e '\''s/\([[^()0-9A-Za-z{}]]\)/\\\\\1/g'\''`; templib=`echo $lib | $SED -e "s/\$tempext\..*/.so/"`; $CC $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags --soname $soname -o \$templib; mv \$templib $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='tempext=`echo $shared_ext | $SED -e '\''s/\([[^()0-9A-Za-z{}]]\)/\\\\\1/g'\''`; templib=`echo $lib | $SED -e "s/\$tempext\..*/.so/"`; $CC $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags --soname $soname -o \$templib $wl-retain-symbols-file,$export_symbols; mv \$templib $lib' + # Commands to make compiler produce verbose output that lists + # what "hidden" libraries, object files and flags are used when + # linking a shared library. + # + # There doesn't appear to be a way to prevent this compiler from + # explicitly linking system object files so we need to strip them + # from the output so that they don't get included in the library + # dependencies. + output_verbose_link_cmd='templist=`$CC $CFLAGS -v conftest.$objext -o libconftest$shared_ext 2>&1 | $GREP "ld"`; rm -f libconftest$shared_ext; list= ; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"' + + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath,$libdir' + _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl--export-dynamic' + + # Archives containing C++ object files must be created using + # "CC -Bstatic", where "CC" is the KAI C++ compiler. + _LT_TAGVAR(old_archive_cmds, $1)='$CC -Bstatic -o $oldlib $oldobjs' + ;; + icpc* | ecpc* ) + # Intel C++ + with_gnu_ld=yes + # version 8.0 and above of icpc choke on multiply defined symbols + # if we add $predep_objects and $postdep_objects, however 7.1 and + # earlier do not add the objects themselves. + case `$CC -V 2>&1` in + *"Version 7."*) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' + ;; + *) # Version 8.0 or newer + tmp_idyn= + case $host_cpu in + ia64*) tmp_idyn=' -i_dynamic';; + esac + _LT_TAGVAR(archive_cmds, $1)='$CC -shared'"$tmp_idyn"' $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared'"$tmp_idyn"' $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' + ;; + esac + _LT_TAGVAR(archive_cmds_need_lc, $1)=no + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath,$libdir' + _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl--export-dynamic' + _LT_TAGVAR(whole_archive_flag_spec, $1)='$wl--whole-archive$convenience $wl--no-whole-archive' + ;; + pgCC* | pgcpp*) + # Portland Group C++ compiler + case `$CC -V` in + *pgCC\ [[1-5]].* | *pgcpp\ [[1-5]].*) + _LT_TAGVAR(prelink_cmds, $1)='tpldir=Template.dir~ + rm -rf $tpldir~ + $CC --prelink_objects --instantiation_dir $tpldir $objs $libobjs $compile_deplibs~ + compile_command="$compile_command `find $tpldir -name \*.o | sort | $NL2SP`"' + _LT_TAGVAR(old_archive_cmds, $1)='tpldir=Template.dir~ + rm -rf $tpldir~ + $CC --prelink_objects --instantiation_dir $tpldir $oldobjs$old_deplibs~ + $AR $AR_FLAGS $oldlib$oldobjs$old_deplibs `find $tpldir -name \*.o | sort | $NL2SP`~ + $RANLIB $oldlib' + _LT_TAGVAR(archive_cmds, $1)='tpldir=Template.dir~ + rm -rf $tpldir~ + $CC --prelink_objects --instantiation_dir $tpldir $predep_objects $libobjs $deplibs $convenience $postdep_objects~ + $CC -shared $pic_flag $predep_objects $libobjs $deplibs `find $tpldir -name \*.o | sort | $NL2SP` $postdep_objects $compiler_flags $wl-soname $wl$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='tpldir=Template.dir~ + rm -rf $tpldir~ + $CC --prelink_objects --instantiation_dir $tpldir $predep_objects $libobjs $deplibs $convenience $postdep_objects~ + $CC -shared $pic_flag $predep_objects $libobjs $deplibs `find $tpldir -name \*.o | sort | $NL2SP` $postdep_objects $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' + ;; + *) # Version 6 and above use weak symbols + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' + ;; + esac + + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl--rpath $wl$libdir' + _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl--export-dynamic' + _LT_TAGVAR(whole_archive_flag_spec, $1)='$wl--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive' + ;; + cxx*) + # Compaq C++ + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname -o $lib $wl-retain-symbols-file $wl$export_symbols' + + runpath_var=LD_RUN_PATH + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-rpath $libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + + # Commands to make compiler produce verbose output that lists + # what "hidden" libraries, object files and flags are used when + # linking a shared library. + # + # There doesn't appear to be a way to prevent this compiler from + # explicitly linking system object files so we need to strip them + # from the output so that they don't get included in the library + # dependencies. + output_verbose_link_cmd='templist=`$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP "ld"`; templist=`func_echo_all "$templist" | $SED "s/\(^.*ld.*\)\( .*ld .*$\)/\1/"`; list= ; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "X$list" | $Xsed' + ;; + xl* | mpixl* | bgxl*) + # IBM XL 8.0 on PPC, with GNU ld + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath $wl$libdir' + _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl--export-dynamic' + _LT_TAGVAR(archive_cmds, $1)='$CC -qmkshrobj $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + if test yes = "$supports_anon_versioning"; then + _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $output_objdir/$libname.ver~ + cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~ + echo "local: *; };" >> $output_objdir/$libname.ver~ + $CC -qmkshrobj $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-version-script $wl$output_objdir/$libname.ver -o $lib' + fi + ;; + *) + case `$CC -V 2>&1 | $SED 5q` in + *Sun\ C*) + # Sun C++ 5.9 + _LT_TAGVAR(no_undefined_flag, $1)=' -zdefs' + _LT_TAGVAR(archive_cmds, $1)='$CC -G$allow_undefined_flag -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -G$allow_undefined_flag -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-retain-symbols-file $wl$export_symbols' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' + _LT_TAGVAR(whole_archive_flag_spec, $1)='$wl--whole-archive`new_convenience=; for conv in $convenience\"\"; do test -z \"$conv\" || new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive' + _LT_TAGVAR(compiler_needs_object, $1)=yes + + # Not sure whether something based on + # $CC $CFLAGS -v conftest.$objext -o libconftest$shared_ext 2>&1 + # would be better. + output_verbose_link_cmd='func_echo_all' + + # Archives containing C++ object files must be created using + # "CC -xar", where "CC" is the Sun C++ compiler. This is + # necessary to make sure instantiated templates are included + # in the archive. + _LT_TAGVAR(old_archive_cmds, $1)='$CC -xar -o $oldlib $oldobjs' + ;; + esac + ;; + esac + ;; + + lynxos*) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + + m88k*) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + + mvs*) + case $cc_basename in + cxx*) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + *) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + esac + ;; + + netbsd*) + if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then + _LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $predep_objects $libobjs $deplibs $postdep_objects $linker_flags' + wlarc= + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + fi + # Workaround some broken pre-1.5 toolchains + output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP conftest.$objext | $SED -e "s:-lgcc -lc -lgcc::"' + ;; + + *nto* | *qnx*) + _LT_TAGVAR(ld_shlibs, $1)=yes + ;; + + openbsd* | bitrig*) + if test -f /usr/libexec/ld.so; then + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + _LT_TAGVAR(hardcode_direct_absolute, $1)=yes + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $lib' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath,$libdir' + if test -z "`echo __ELF__ | $CC -E - | grep __ELF__`"; then + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-retain-symbols-file,$export_symbols -o $lib' + _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl-E' + _LT_TAGVAR(whole_archive_flag_spec, $1)=$wlarc'--whole-archive$convenience '$wlarc'--no-whole-archive' + fi + output_verbose_link_cmd=func_echo_all + else + _LT_TAGVAR(ld_shlibs, $1)=no + fi + ;; + + osf3* | osf4* | osf5*) + case $cc_basename in + KCC*) + # Kuck and Associates, Inc. (KAI) C++ Compiler + + # KCC will only create a shared library if the output file + # ends with ".so" (or ".sl" for HP-UX), so rename the library + # to its proper name (with version) after linking. + _LT_TAGVAR(archive_cmds, $1)='tempext=`echo $shared_ext | $SED -e '\''s/\([[^()0-9A-Za-z{}]]\)/\\\\\1/g'\''`; templib=`echo "$lib" | $SED -e "s/\$tempext\..*/.so/"`; $CC $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags --soname $soname -o \$templib; mv \$templib $lib' + + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath,$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + + # Archives containing C++ object files must be created using + # the KAI C++ compiler. + case $host in + osf3*) _LT_TAGVAR(old_archive_cmds, $1)='$CC -Bstatic -o $oldlib $oldobjs' ;; + *) _LT_TAGVAR(old_archive_cmds, $1)='$CC -o $oldlib $oldobjs' ;; + esac + ;; + RCC*) + # Rational C++ 2.4.1 + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + cxx*) + case $host in + osf3*) + _LT_TAGVAR(allow_undefined_flag, $1)=' $wl-expect_unresolved $wl\*' + _LT_TAGVAR(archive_cmds, $1)='$CC -shared$allow_undefined_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $soname `test -n "$verstring" && func_echo_all "$wl-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath $wl$libdir' + ;; + *) + _LT_TAGVAR(allow_undefined_flag, $1)=' -expect_unresolved \*' + _LT_TAGVAR(archive_cmds, $1)='$CC -shared$allow_undefined_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -msym -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='for i in `cat $export_symbols`; do printf "%s %s\\n" -exported_symbol "\$i" >> $lib.exp; done~ + echo "-hidden">> $lib.exp~ + $CC -shared$allow_undefined_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -msym -soname $soname $wl-input $wl$lib.exp `test -n "$verstring" && $ECHO "-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib~ + $RM $lib.exp' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-rpath $libdir' + ;; + esac + + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + + # Commands to make compiler produce verbose output that lists + # what "hidden" libraries, object files and flags are used when + # linking a shared library. + # + # There doesn't appear to be a way to prevent this compiler from + # explicitly linking system object files so we need to strip them + # from the output so that they don't get included in the library + # dependencies. + output_verbose_link_cmd='templist=`$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP "ld" | $GREP -v "ld:"`; templist=`func_echo_all "$templist" | $SED "s/\(^.*ld.*\)\( .*ld.*$\)/\1/"`; list= ; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"' + ;; + *) + if test yes,no = "$GXX,$with_gnu_ld"; then + _LT_TAGVAR(allow_undefined_flag, $1)=' $wl-expect_unresolved $wl\*' + case $host in + osf3*) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib $allow_undefined_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` $wl-update_registry $wl$output_objdir/so_locations -o $lib' + ;; + *) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -nostdlib $allow_undefined_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-msym $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` $wl-update_registry $wl$output_objdir/so_locations -o $lib' + ;; + esac + + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath $wl$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + + # Commands to make compiler produce verbose output that lists + # what "hidden" libraries, object files and flags are used when + # linking a shared library. + output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP " \-L"' + + else + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + fi + ;; + esac + ;; + + psos*) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + + sunos4*) + case $cc_basename in + CC*) + # Sun C++ 4.x + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + lcc*) + # Lucid + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + *) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + esac + ;; + + solaris*) + case $cc_basename in + CC* | sunCC*) + # Sun C++ 4.2, 5.x and Centerline C++ + _LT_TAGVAR(archive_cmds_need_lc,$1)=yes + _LT_TAGVAR(no_undefined_flag, $1)=' -zdefs' + _LT_TAGVAR(archive_cmds, $1)='$CC -G$allow_undefined_flag -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ + $CC -G$allow_undefined_flag $wl-M $wl$lib.exp -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~$RM $lib.exp' + + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + case $host_os in + solaris2.[[0-5]] | solaris2.[[0-5]].*) ;; + *) + # The compiler driver will combine and reorder linker options, + # but understands '-z linker_flag'. + # Supported since Solaris 2.6 (maybe 2.5.1?) + _LT_TAGVAR(whole_archive_flag_spec, $1)='-z allextract$convenience -z defaultextract' + ;; + esac + _LT_TAGVAR(link_all_deplibs, $1)=yes + + output_verbose_link_cmd='func_echo_all' + + # Archives containing C++ object files must be created using + # "CC -xar", where "CC" is the Sun C++ compiler. This is + # necessary to make sure instantiated templates are included + # in the archive. + _LT_TAGVAR(old_archive_cmds, $1)='$CC -xar -o $oldlib $oldobjs' + ;; + gcx*) + # Green Hills C++ Compiler + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-h $wl$soname -o $lib' + + # The C++ compiler must be used to create the archive. + _LT_TAGVAR(old_archive_cmds, $1)='$CC $LDFLAGS -archive -o $oldlib $oldobjs' + ;; + *) + # GNU C++ compiler with Solaris linker + if test yes,no = "$GXX,$with_gnu_ld"; then + _LT_TAGVAR(no_undefined_flag, $1)=' $wl-z ${wl}defs' + if $CC --version | $GREP -v '^2\.7' > /dev/null; then + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-h $wl$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ + $CC -shared $pic_flag -nostdlib $wl-M $wl$lib.exp $wl-h $wl$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~$RM $lib.exp' + + # Commands to make compiler produce verbose output that lists + # what "hidden" libraries, object files and flags are used when + # linking a shared library. + output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP " \-L"' + else + # g++ 2.7 appears to require '-G' NOT '-shared' on this + # platform. + _LT_TAGVAR(archive_cmds, $1)='$CC -G -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-h $wl$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ + $CC -G -nostdlib $wl-M $wl$lib.exp $wl-h $wl$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~$RM $lib.exp' + + # Commands to make compiler produce verbose output that lists + # what "hidden" libraries, object files and flags are used when + # linking a shared library. + output_verbose_link_cmd='$CC -G $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP " \-L"' + fi + + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-R $wl$libdir' + case $host_os in + solaris2.[[0-5]] | solaris2.[[0-5]].*) ;; + *) + _LT_TAGVAR(whole_archive_flag_spec, $1)='$wl-z ${wl}allextract$convenience $wl-z ${wl}defaultextract' + ;; + esac + fi + ;; + esac + ;; + + sysv4*uw2* | sysv5OpenUNIX* | sysv5UnixWare7.[[01]].[[10]]* | unixware7* | sco3.2v5.0.[[024]]*) + _LT_TAGVAR(no_undefined_flag, $1)='$wl-z,text' + _LT_TAGVAR(archive_cmds_need_lc, $1)=no + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + runpath_var='LD_RUN_PATH' + + case $cc_basename in + CC*) + _LT_TAGVAR(archive_cmds, $1)='$CC -G $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -G $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + ;; + *) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + ;; + esac + ;; + + sysv5* | sco3.2v5* | sco5v6*) + # Note: We CANNOT use -z defs as we might desire, because we do not + # link with -lc, and that would cause any symbols used from libc to + # always be unresolved, which means just about no library would + # ever link correctly. If we're not using GNU ld we use -z text + # though, which does catch some bad symbols but isn't as heavy-handed + # as -z defs. + _LT_TAGVAR(no_undefined_flag, $1)='$wl-z,text' + _LT_TAGVAR(allow_undefined_flag, $1)='$wl-z,nodefs' + _LT_TAGVAR(archive_cmds_need_lc, $1)=no + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-R,$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=':' + _LT_TAGVAR(link_all_deplibs, $1)=yes + _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl-Bexport' + runpath_var='LD_RUN_PATH' + + case $cc_basename in + CC*) + _LT_TAGVAR(archive_cmds, $1)='$CC -G $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -G $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(old_archive_cmds, $1)='$CC -Tprelink_objects $oldobjs~ + '"$_LT_TAGVAR(old_archive_cmds, $1)" + _LT_TAGVAR(reload_cmds, $1)='$CC -Tprelink_objects $reload_objs~ + '"$_LT_TAGVAR(reload_cmds, $1)" + ;; + *) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + ;; + esac + ;; + + tandem*) + case $cc_basename in + NCC*) + # NonStop-UX NCC 3.20 + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + *) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + esac + ;; + + vxworks*) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + + *) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + esac + + AC_MSG_RESULT([$_LT_TAGVAR(ld_shlibs, $1)]) + test no = "$_LT_TAGVAR(ld_shlibs, $1)" && can_build_shared=no + + _LT_TAGVAR(GCC, $1)=$GXX + _LT_TAGVAR(LD, $1)=$LD + + ## CAVEAT EMPTOR: + ## There is no encapsulation within the following macros, do not change + ## the running order or otherwise move them around unless you know exactly + ## what you are doing... + _LT_SYS_HIDDEN_LIBDEPS($1) + _LT_COMPILER_PIC($1) + _LT_COMPILER_C_O($1) + _LT_COMPILER_FILE_LOCKS($1) + _LT_LINKER_SHLIBS($1) + _LT_SYS_DYNAMIC_LINKER($1) + _LT_LINKER_HARDCODE_LIBPATH($1) + + _LT_CONFIG($1) + fi # test -n "$compiler" + + CC=$lt_save_CC + CFLAGS=$lt_save_CFLAGS + LDCXX=$LD + LD=$lt_save_LD + GCC=$lt_save_GCC + with_gnu_ld=$lt_save_with_gnu_ld + lt_cv_path_LDCXX=$lt_cv_path_LD + lt_cv_path_LD=$lt_save_path_LD + lt_cv_prog_gnu_ldcxx=$lt_cv_prog_gnu_ld + lt_cv_prog_gnu_ld=$lt_save_with_gnu_ld +fi # test yes != "$_lt_caught_CXX_error" + +AC_LANG_POP +])# _LT_LANG_CXX_CONFIG + + +# _LT_FUNC_STRIPNAME_CNF +# ---------------------- +# func_stripname_cnf prefix suffix name +# strip PREFIX and SUFFIX off of NAME. +# PREFIX and SUFFIX must not contain globbing or regex special +# characters, hashes, percent signs, but SUFFIX may contain a leading +# dot (in which case that matches only a dot). +# +# This function is identical to the (non-XSI) version of func_stripname, +# except this one can be used by m4 code that may be executed by configure, +# rather than the libtool script. +m4_defun([_LT_FUNC_STRIPNAME_CNF],[dnl +AC_REQUIRE([_LT_DECL_SED]) +AC_REQUIRE([_LT_PROG_ECHO_BACKSLASH]) +func_stripname_cnf () +{ + case @S|@2 in + .*) func_stripname_result=`$ECHO "@S|@3" | $SED "s%^@S|@1%%; s%\\\\@S|@2\$%%"`;; + *) func_stripname_result=`$ECHO "@S|@3" | $SED "s%^@S|@1%%; s%@S|@2\$%%"`;; + esac +} # func_stripname_cnf +])# _LT_FUNC_STRIPNAME_CNF + + +# _LT_SYS_HIDDEN_LIBDEPS([TAGNAME]) +# --------------------------------- +# Figure out "hidden" library dependencies from verbose +# compiler output when linking a shared library. +# Parse the compiler output and extract the necessary +# objects, libraries and library flags. +m4_defun([_LT_SYS_HIDDEN_LIBDEPS], +[m4_require([_LT_FILEUTILS_DEFAULTS])dnl +AC_REQUIRE([_LT_FUNC_STRIPNAME_CNF])dnl +# Dependencies to place before and after the object being linked: +_LT_TAGVAR(predep_objects, $1)= +_LT_TAGVAR(postdep_objects, $1)= +_LT_TAGVAR(predeps, $1)= +_LT_TAGVAR(postdeps, $1)= +_LT_TAGVAR(compiler_lib_search_path, $1)= + +dnl we can't use the lt_simple_compile_test_code here, +dnl because it contains code intended for an executable, +dnl not a library. It's possible we should let each +dnl tag define a new lt_????_link_test_code variable, +dnl but it's only used here... +m4_if([$1], [], [cat > conftest.$ac_ext <<_LT_EOF +int a; +void foo (void) { a = 0; } +_LT_EOF +], [$1], [CXX], [cat > conftest.$ac_ext <<_LT_EOF +class Foo +{ +public: + Foo (void) { a = 0; } +private: + int a; +}; +_LT_EOF +], [$1], [F77], [cat > conftest.$ac_ext <<_LT_EOF + subroutine foo + implicit none + integer*4 a + a=0 + return + end +_LT_EOF +], [$1], [FC], [cat > conftest.$ac_ext <<_LT_EOF + subroutine foo + implicit none + integer a + a=0 + return + end +_LT_EOF +], [$1], [GCJ], [cat > conftest.$ac_ext <<_LT_EOF +public class foo { + private int a; + public void bar (void) { + a = 0; + } +}; +_LT_EOF +], [$1], [GO], [cat > conftest.$ac_ext <<_LT_EOF +package foo +func foo() { +} +_LT_EOF +]) + +_lt_libdeps_save_CFLAGS=$CFLAGS +case "$CC $CFLAGS " in #( +*\ -flto*\ *) CFLAGS="$CFLAGS -fno-lto" ;; +*\ -fwhopr*\ *) CFLAGS="$CFLAGS -fno-whopr" ;; +*\ -fuse-linker-plugin*\ *) CFLAGS="$CFLAGS -fno-use-linker-plugin" ;; +esac + +dnl Parse the compiler output and extract the necessary +dnl objects, libraries and library flags. +if AC_TRY_EVAL(ac_compile); then + # Parse the compiler output and extract the necessary + # objects, libraries and library flags. + + # Sentinel used to keep track of whether or not we are before + # the conftest object file. + pre_test_object_deps_done=no + + for p in `eval "$output_verbose_link_cmd"`; do + case $prev$p in + + -L* | -R* | -l*) + # Some compilers place space between "-{L,R}" and the path. + # Remove the space. + if test x-L = "$p" || + test x-R = "$p"; then + prev=$p + continue + fi + + # Expand the sysroot to ease extracting the directories later. + if test -z "$prev"; then + case $p in + -L*) func_stripname_cnf '-L' '' "$p"; prev=-L; p=$func_stripname_result ;; + -R*) func_stripname_cnf '-R' '' "$p"; prev=-R; p=$func_stripname_result ;; + -l*) func_stripname_cnf '-l' '' "$p"; prev=-l; p=$func_stripname_result ;; + esac + fi + case $p in + =*) func_stripname_cnf '=' '' "$p"; p=$lt_sysroot$func_stripname_result ;; + esac + if test no = "$pre_test_object_deps_done"; then + case $prev in + -L | -R) + # Internal compiler library paths should come after those + # provided the user. The postdeps already come after the + # user supplied libs so there is no need to process them. + if test -z "$_LT_TAGVAR(compiler_lib_search_path, $1)"; then + _LT_TAGVAR(compiler_lib_search_path, $1)=$prev$p + else + _LT_TAGVAR(compiler_lib_search_path, $1)="${_LT_TAGVAR(compiler_lib_search_path, $1)} $prev$p" + fi + ;; + # The "-l" case would never come before the object being + # linked, so don't bother handling this case. + esac + else + if test -z "$_LT_TAGVAR(postdeps, $1)"; then + _LT_TAGVAR(postdeps, $1)=$prev$p + else + _LT_TAGVAR(postdeps, $1)="${_LT_TAGVAR(postdeps, $1)} $prev$p" + fi + fi + prev= + ;; + + *.lto.$objext) ;; # Ignore GCC LTO objects + *.$objext) + # This assumes that the test object file only shows up + # once in the compiler output. + if test "$p" = "conftest.$objext"; then + pre_test_object_deps_done=yes + continue + fi + + if test no = "$pre_test_object_deps_done"; then + if test -z "$_LT_TAGVAR(predep_objects, $1)"; then + _LT_TAGVAR(predep_objects, $1)=$p + else + _LT_TAGVAR(predep_objects, $1)="$_LT_TAGVAR(predep_objects, $1) $p" + fi + else + if test -z "$_LT_TAGVAR(postdep_objects, $1)"; then + _LT_TAGVAR(postdep_objects, $1)=$p + else + _LT_TAGVAR(postdep_objects, $1)="$_LT_TAGVAR(postdep_objects, $1) $p" + fi + fi + ;; + + *) ;; # Ignore the rest. + + esac + done + + # Clean up. + rm -f a.out a.exe +else + echo "libtool.m4: error: problem compiling $1 test program" +fi + +$RM -f confest.$objext +CFLAGS=$_lt_libdeps_save_CFLAGS + +# PORTME: override above test on systems where it is broken +m4_if([$1], [CXX], +[case $host_os in +interix[[3-9]]*) + # Interix 3.5 installs completely hosed .la files for C++, so rather than + # hack all around it, let's just trust "g++" to DTRT. + _LT_TAGVAR(predep_objects,$1)= + _LT_TAGVAR(postdep_objects,$1)= + _LT_TAGVAR(postdeps,$1)= + ;; +esac +]) + +case " $_LT_TAGVAR(postdeps, $1) " in +*" -lc "*) _LT_TAGVAR(archive_cmds_need_lc, $1)=no ;; +esac + _LT_TAGVAR(compiler_lib_search_dirs, $1)= +if test -n "${_LT_TAGVAR(compiler_lib_search_path, $1)}"; then + _LT_TAGVAR(compiler_lib_search_dirs, $1)=`echo " ${_LT_TAGVAR(compiler_lib_search_path, $1)}" | $SED -e 's! -L! !g' -e 's!^ !!'` +fi +_LT_TAGDECL([], [compiler_lib_search_dirs], [1], + [The directories searched by this compiler when creating a shared library]) +_LT_TAGDECL([], [predep_objects], [1], + [Dependencies to place before and after the objects being linked to + create a shared library]) +_LT_TAGDECL([], [postdep_objects], [1]) +_LT_TAGDECL([], [predeps], [1]) +_LT_TAGDECL([], [postdeps], [1]) +_LT_TAGDECL([], [compiler_lib_search_path], [1], + [The library search path used internally by the compiler when linking + a shared library]) +])# _LT_SYS_HIDDEN_LIBDEPS + + +# _LT_LANG_F77_CONFIG([TAG]) +# -------------------------- +# Ensure that the configuration variables for a Fortran 77 compiler are +# suitably defined. These variables are subsequently used by _LT_CONFIG +# to write the compiler configuration to 'libtool'. +m4_defun([_LT_LANG_F77_CONFIG], +[AC_LANG_PUSH(Fortran 77) +if test -z "$F77" || test no = "$F77"; then + _lt_disable_F77=yes +fi + +_LT_TAGVAR(archive_cmds_need_lc, $1)=no +_LT_TAGVAR(allow_undefined_flag, $1)= +_LT_TAGVAR(always_export_symbols, $1)=no +_LT_TAGVAR(archive_expsym_cmds, $1)= +_LT_TAGVAR(export_dynamic_flag_spec, $1)= +_LT_TAGVAR(hardcode_direct, $1)=no +_LT_TAGVAR(hardcode_direct_absolute, $1)=no +_LT_TAGVAR(hardcode_libdir_flag_spec, $1)= +_LT_TAGVAR(hardcode_libdir_separator, $1)= +_LT_TAGVAR(hardcode_minus_L, $1)=no +_LT_TAGVAR(hardcode_automatic, $1)=no +_LT_TAGVAR(inherit_rpath, $1)=no +_LT_TAGVAR(module_cmds, $1)= +_LT_TAGVAR(module_expsym_cmds, $1)= +_LT_TAGVAR(link_all_deplibs, $1)=unknown +_LT_TAGVAR(old_archive_cmds, $1)=$old_archive_cmds +_LT_TAGVAR(reload_flag, $1)=$reload_flag +_LT_TAGVAR(reload_cmds, $1)=$reload_cmds +_LT_TAGVAR(no_undefined_flag, $1)= +_LT_TAGVAR(whole_archive_flag_spec, $1)= +_LT_TAGVAR(enable_shared_with_static_runtimes, $1)=no + +# Source file extension for f77 test sources. +ac_ext=f + +# Object file extension for compiled f77 test sources. +objext=o +_LT_TAGVAR(objext, $1)=$objext + +# No sense in running all these tests if we already determined that +# the F77 compiler isn't working. Some variables (like enable_shared) +# are currently assumed to apply to all compilers on this platform, +# and will be corrupted by setting them based on a non-working compiler. +if test yes != "$_lt_disable_F77"; then + # Code to be used in simple compile tests + lt_simple_compile_test_code="\ + subroutine t + return + end +" + + # Code to be used in simple link tests + lt_simple_link_test_code="\ + program t + end +" + + # ltmain only uses $CC for tagged configurations so make sure $CC is set. + _LT_TAG_COMPILER + + # save warnings/boilerplate of simple test code + _LT_COMPILER_BOILERPLATE + _LT_LINKER_BOILERPLATE + + # Allow CC to be a program name with arguments. + lt_save_CC=$CC + lt_save_GCC=$GCC + lt_save_CFLAGS=$CFLAGS + CC=${F77-"f77"} + CFLAGS=$FFLAGS + compiler=$CC + _LT_TAGVAR(compiler, $1)=$CC + _LT_CC_BASENAME([$compiler]) + GCC=$G77 + if test -n "$compiler"; then + AC_MSG_CHECKING([if libtool supports shared libraries]) + AC_MSG_RESULT([$can_build_shared]) + + AC_MSG_CHECKING([whether to build shared libraries]) + test no = "$can_build_shared" && enable_shared=no + + # On AIX, shared libraries and static libraries use the same namespace, and + # are all built from PIC. + case $host_os in + aix3*) + test yes = "$enable_shared" && enable_static=no + if test -n "$RANLIB"; then + archive_cmds="$archive_cmds~\$RANLIB \$lib" + postinstall_cmds='$RANLIB $lib' + fi + ;; + aix[[4-9]]*) + if test ia64 != "$host_cpu"; then + case $enable_shared,$with_aix_soname,$aix_use_runtimelinking in + yes,aix,yes) ;; # shared object as lib.so file only + yes,svr4,*) ;; # shared object as lib.so archive member only + yes,*) enable_static=no ;; # shared object in lib.a archive as well + esac + fi + ;; + esac + AC_MSG_RESULT([$enable_shared]) + + AC_MSG_CHECKING([whether to build static libraries]) + # Make sure either enable_shared or enable_static is yes. + test yes = "$enable_shared" || enable_static=yes + AC_MSG_RESULT([$enable_static]) + + _LT_TAGVAR(GCC, $1)=$G77 + _LT_TAGVAR(LD, $1)=$LD + + ## CAVEAT EMPTOR: + ## There is no encapsulation within the following macros, do not change + ## the running order or otherwise move them around unless you know exactly + ## what you are doing... + _LT_COMPILER_PIC($1) + _LT_COMPILER_C_O($1) + _LT_COMPILER_FILE_LOCKS($1) + _LT_LINKER_SHLIBS($1) + _LT_SYS_DYNAMIC_LINKER($1) + _LT_LINKER_HARDCODE_LIBPATH($1) + + _LT_CONFIG($1) + fi # test -n "$compiler" + + GCC=$lt_save_GCC + CC=$lt_save_CC + CFLAGS=$lt_save_CFLAGS +fi # test yes != "$_lt_disable_F77" + +AC_LANG_POP +])# _LT_LANG_F77_CONFIG + + +# _LT_LANG_FC_CONFIG([TAG]) +# ------------------------- +# Ensure that the configuration variables for a Fortran compiler are +# suitably defined. These variables are subsequently used by _LT_CONFIG +# to write the compiler configuration to 'libtool'. +m4_defun([_LT_LANG_FC_CONFIG], +[AC_LANG_PUSH(Fortran) + +if test -z "$FC" || test no = "$FC"; then + _lt_disable_FC=yes +fi + +_LT_TAGVAR(archive_cmds_need_lc, $1)=no +_LT_TAGVAR(allow_undefined_flag, $1)= +_LT_TAGVAR(always_export_symbols, $1)=no +_LT_TAGVAR(archive_expsym_cmds, $1)= +_LT_TAGVAR(export_dynamic_flag_spec, $1)= +_LT_TAGVAR(hardcode_direct, $1)=no +_LT_TAGVAR(hardcode_direct_absolute, $1)=no +_LT_TAGVAR(hardcode_libdir_flag_spec, $1)= +_LT_TAGVAR(hardcode_libdir_separator, $1)= +_LT_TAGVAR(hardcode_minus_L, $1)=no +_LT_TAGVAR(hardcode_automatic, $1)=no +_LT_TAGVAR(inherit_rpath, $1)=no +_LT_TAGVAR(module_cmds, $1)= +_LT_TAGVAR(module_expsym_cmds, $1)= +_LT_TAGVAR(link_all_deplibs, $1)=unknown +_LT_TAGVAR(old_archive_cmds, $1)=$old_archive_cmds +_LT_TAGVAR(reload_flag, $1)=$reload_flag +_LT_TAGVAR(reload_cmds, $1)=$reload_cmds +_LT_TAGVAR(no_undefined_flag, $1)= +_LT_TAGVAR(whole_archive_flag_spec, $1)= +_LT_TAGVAR(enable_shared_with_static_runtimes, $1)=no + +# Source file extension for fc test sources. +ac_ext=${ac_fc_srcext-f} + +# Object file extension for compiled fc test sources. +objext=o +_LT_TAGVAR(objext, $1)=$objext + +# No sense in running all these tests if we already determined that +# the FC compiler isn't working. Some variables (like enable_shared) +# are currently assumed to apply to all compilers on this platform, +# and will be corrupted by setting them based on a non-working compiler. +if test yes != "$_lt_disable_FC"; then + # Code to be used in simple compile tests + lt_simple_compile_test_code="\ + subroutine t + return + end +" + + # Code to be used in simple link tests + lt_simple_link_test_code="\ + program t + end +" + + # ltmain only uses $CC for tagged configurations so make sure $CC is set. + _LT_TAG_COMPILER + + # save warnings/boilerplate of simple test code + _LT_COMPILER_BOILERPLATE + _LT_LINKER_BOILERPLATE + + # Allow CC to be a program name with arguments. + lt_save_CC=$CC + lt_save_GCC=$GCC + lt_save_CFLAGS=$CFLAGS + CC=${FC-"f95"} + CFLAGS=$FCFLAGS + compiler=$CC + GCC=$ac_cv_fc_compiler_gnu + + _LT_TAGVAR(compiler, $1)=$CC + _LT_CC_BASENAME([$compiler]) + + if test -n "$compiler"; then + AC_MSG_CHECKING([if libtool supports shared libraries]) + AC_MSG_RESULT([$can_build_shared]) + + AC_MSG_CHECKING([whether to build shared libraries]) + test no = "$can_build_shared" && enable_shared=no + + # On AIX, shared libraries and static libraries use the same namespace, and + # are all built from PIC. + case $host_os in + aix3*) + test yes = "$enable_shared" && enable_static=no + if test -n "$RANLIB"; then + archive_cmds="$archive_cmds~\$RANLIB \$lib" + postinstall_cmds='$RANLIB $lib' + fi + ;; + aix[[4-9]]*) + if test ia64 != "$host_cpu"; then + case $enable_shared,$with_aix_soname,$aix_use_runtimelinking in + yes,aix,yes) ;; # shared object as lib.so file only + yes,svr4,*) ;; # shared object as lib.so archive member only + yes,*) enable_static=no ;; # shared object in lib.a archive as well + esac + fi + ;; + esac + AC_MSG_RESULT([$enable_shared]) + + AC_MSG_CHECKING([whether to build static libraries]) + # Make sure either enable_shared or enable_static is yes. + test yes = "$enable_shared" || enable_static=yes + AC_MSG_RESULT([$enable_static]) + + _LT_TAGVAR(GCC, $1)=$ac_cv_fc_compiler_gnu + _LT_TAGVAR(LD, $1)=$LD + + ## CAVEAT EMPTOR: + ## There is no encapsulation within the following macros, do not change + ## the running order or otherwise move them around unless you know exactly + ## what you are doing... + _LT_SYS_HIDDEN_LIBDEPS($1) + _LT_COMPILER_PIC($1) + _LT_COMPILER_C_O($1) + _LT_COMPILER_FILE_LOCKS($1) + _LT_LINKER_SHLIBS($1) + _LT_SYS_DYNAMIC_LINKER($1) + _LT_LINKER_HARDCODE_LIBPATH($1) + + _LT_CONFIG($1) + fi # test -n "$compiler" + + GCC=$lt_save_GCC + CC=$lt_save_CC + CFLAGS=$lt_save_CFLAGS +fi # test yes != "$_lt_disable_FC" + +AC_LANG_POP +])# _LT_LANG_FC_CONFIG + + +# _LT_LANG_GCJ_CONFIG([TAG]) +# -------------------------- +# Ensure that the configuration variables for the GNU Java Compiler compiler +# are suitably defined. These variables are subsequently used by _LT_CONFIG +# to write the compiler configuration to 'libtool'. +m4_defun([_LT_LANG_GCJ_CONFIG], +[AC_REQUIRE([LT_PROG_GCJ])dnl +AC_LANG_SAVE + +# Source file extension for Java test sources. +ac_ext=java + +# Object file extension for compiled Java test sources. +objext=o +_LT_TAGVAR(objext, $1)=$objext + +# Code to be used in simple compile tests +lt_simple_compile_test_code="class foo {}" + +# Code to be used in simple link tests +lt_simple_link_test_code='public class conftest { public static void main(String[[]] argv) {}; }' + +# ltmain only uses $CC for tagged configurations so make sure $CC is set. +_LT_TAG_COMPILER + +# save warnings/boilerplate of simple test code +_LT_COMPILER_BOILERPLATE +_LT_LINKER_BOILERPLATE + +# Allow CC to be a program name with arguments. +lt_save_CC=$CC +lt_save_CFLAGS=$CFLAGS +lt_save_GCC=$GCC +GCC=yes +CC=${GCJ-"gcj"} +CFLAGS=$GCJFLAGS +compiler=$CC +_LT_TAGVAR(compiler, $1)=$CC +_LT_TAGVAR(LD, $1)=$LD +_LT_CC_BASENAME([$compiler]) + +# GCJ did not exist at the time GCC didn't implicitly link libc in. +_LT_TAGVAR(archive_cmds_need_lc, $1)=no + +_LT_TAGVAR(old_archive_cmds, $1)=$old_archive_cmds +_LT_TAGVAR(reload_flag, $1)=$reload_flag +_LT_TAGVAR(reload_cmds, $1)=$reload_cmds + +## CAVEAT EMPTOR: +## There is no encapsulation within the following macros, do not change +## the running order or otherwise move them around unless you know exactly +## what you are doing... +if test -n "$compiler"; then + _LT_COMPILER_NO_RTTI($1) + _LT_COMPILER_PIC($1) + _LT_COMPILER_C_O($1) + _LT_COMPILER_FILE_LOCKS($1) + _LT_LINKER_SHLIBS($1) + _LT_LINKER_HARDCODE_LIBPATH($1) + + _LT_CONFIG($1) +fi + +AC_LANG_RESTORE + +GCC=$lt_save_GCC +CC=$lt_save_CC +CFLAGS=$lt_save_CFLAGS +])# _LT_LANG_GCJ_CONFIG + + +# _LT_LANG_GO_CONFIG([TAG]) +# -------------------------- +# Ensure that the configuration variables for the GNU Go compiler +# are suitably defined. These variables are subsequently used by _LT_CONFIG +# to write the compiler configuration to 'libtool'. +m4_defun([_LT_LANG_GO_CONFIG], +[AC_REQUIRE([LT_PROG_GO])dnl +AC_LANG_SAVE + +# Source file extension for Go test sources. +ac_ext=go + +# Object file extension for compiled Go test sources. +objext=o +_LT_TAGVAR(objext, $1)=$objext + +# Code to be used in simple compile tests +lt_simple_compile_test_code="package main; func main() { }" + +# Code to be used in simple link tests +lt_simple_link_test_code='package main; func main() { }' + +# ltmain only uses $CC for tagged configurations so make sure $CC is set. +_LT_TAG_COMPILER + +# save warnings/boilerplate of simple test code +_LT_COMPILER_BOILERPLATE +_LT_LINKER_BOILERPLATE + +# Allow CC to be a program name with arguments. +lt_save_CC=$CC +lt_save_CFLAGS=$CFLAGS +lt_save_GCC=$GCC +GCC=yes +CC=${GOC-"gccgo"} +CFLAGS=$GOFLAGS +compiler=$CC +_LT_TAGVAR(compiler, $1)=$CC +_LT_TAGVAR(LD, $1)=$LD +_LT_CC_BASENAME([$compiler]) + +# Go did not exist at the time GCC didn't implicitly link libc in. +_LT_TAGVAR(archive_cmds_need_lc, $1)=no + +_LT_TAGVAR(old_archive_cmds, $1)=$old_archive_cmds +_LT_TAGVAR(reload_flag, $1)=$reload_flag +_LT_TAGVAR(reload_cmds, $1)=$reload_cmds + +## CAVEAT EMPTOR: +## There is no encapsulation within the following macros, do not change +## the running order or otherwise move them around unless you know exactly +## what you are doing... +if test -n "$compiler"; then + _LT_COMPILER_NO_RTTI($1) + _LT_COMPILER_PIC($1) + _LT_COMPILER_C_O($1) + _LT_COMPILER_FILE_LOCKS($1) + _LT_LINKER_SHLIBS($1) + _LT_LINKER_HARDCODE_LIBPATH($1) + + _LT_CONFIG($1) +fi + +AC_LANG_RESTORE + +GCC=$lt_save_GCC +CC=$lt_save_CC +CFLAGS=$lt_save_CFLAGS +])# _LT_LANG_GO_CONFIG + + +# _LT_LANG_RC_CONFIG([TAG]) +# ------------------------- +# Ensure that the configuration variables for the Windows resource compiler +# are suitably defined. These variables are subsequently used by _LT_CONFIG +# to write the compiler configuration to 'libtool'. +m4_defun([_LT_LANG_RC_CONFIG], +[AC_REQUIRE([LT_PROG_RC])dnl +AC_LANG_SAVE + +# Source file extension for RC test sources. +ac_ext=rc + +# Object file extension for compiled RC test sources. +objext=o +_LT_TAGVAR(objext, $1)=$objext + +# Code to be used in simple compile tests +lt_simple_compile_test_code='sample MENU { MENUITEM "&Soup", 100, CHECKED }' + +# Code to be used in simple link tests +lt_simple_link_test_code=$lt_simple_compile_test_code + +# ltmain only uses $CC for tagged configurations so make sure $CC is set. +_LT_TAG_COMPILER + +# save warnings/boilerplate of simple test code +_LT_COMPILER_BOILERPLATE +_LT_LINKER_BOILERPLATE + +# Allow CC to be a program name with arguments. +lt_save_CC=$CC +lt_save_CFLAGS=$CFLAGS +lt_save_GCC=$GCC +GCC= +CC=${RC-"windres"} +CFLAGS= +compiler=$CC +_LT_TAGVAR(compiler, $1)=$CC +_LT_CC_BASENAME([$compiler]) +_LT_TAGVAR(lt_cv_prog_compiler_c_o, $1)=yes + +if test -n "$compiler"; then + : + _LT_CONFIG($1) +fi + +GCC=$lt_save_GCC +AC_LANG_RESTORE +CC=$lt_save_CC +CFLAGS=$lt_save_CFLAGS +])# _LT_LANG_RC_CONFIG + + +# LT_PROG_GCJ +# ----------- +AC_DEFUN([LT_PROG_GCJ], +[m4_ifdef([AC_PROG_GCJ], [AC_PROG_GCJ], + [m4_ifdef([A][M_PROG_GCJ], [A][M_PROG_GCJ], + [AC_CHECK_TOOL(GCJ, gcj,) + test set = "${GCJFLAGS+set}" || GCJFLAGS="-g -O2" + AC_SUBST(GCJFLAGS)])])[]dnl +]) + +# Old name: +AU_ALIAS([LT_AC_PROG_GCJ], [LT_PROG_GCJ]) +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([LT_AC_PROG_GCJ], []) + + +# LT_PROG_GO +# ---------- +AC_DEFUN([LT_PROG_GO], +[AC_CHECK_TOOL(GOC, gccgo,) +]) + + +# LT_PROG_RC +# ---------- +AC_DEFUN([LT_PROG_RC], +[AC_CHECK_TOOL(RC, windres,) +]) + +# Old name: +AU_ALIAS([LT_AC_PROG_RC], [LT_PROG_RC]) +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([LT_AC_PROG_RC], []) + + +# _LT_DECL_EGREP +# -------------- +# If we don't have a new enough Autoconf to choose the best grep +# available, choose the one first in the user's PATH. +m4_defun([_LT_DECL_EGREP], +[AC_REQUIRE([AC_PROG_EGREP])dnl +AC_REQUIRE([AC_PROG_FGREP])dnl +test -z "$GREP" && GREP=grep +_LT_DECL([], [GREP], [1], [A grep program that handles long lines]) +_LT_DECL([], [EGREP], [1], [An ERE matcher]) +_LT_DECL([], [FGREP], [1], [A literal string matcher]) +dnl Non-bleeding-edge autoconf doesn't subst GREP, so do it here too +AC_SUBST([GREP]) +]) + + +# _LT_DECL_OBJDUMP +# -------------- +# If we don't have a new enough Autoconf to choose the best objdump +# available, choose the one first in the user's PATH. +m4_defun([_LT_DECL_OBJDUMP], +[AC_CHECK_TOOL(OBJDUMP, objdump, false) +test -z "$OBJDUMP" && OBJDUMP=objdump +_LT_DECL([], [OBJDUMP], [1], [An object symbol dumper]) +AC_SUBST([OBJDUMP]) +]) + +# _LT_DECL_DLLTOOL +# ---------------- +# Ensure DLLTOOL variable is set. +m4_defun([_LT_DECL_DLLTOOL], +[AC_CHECK_TOOL(DLLTOOL, dlltool, false) +test -z "$DLLTOOL" && DLLTOOL=dlltool +_LT_DECL([], [DLLTOOL], [1], [DLL creation program]) +AC_SUBST([DLLTOOL]) +]) + +# _LT_DECL_FILECMD +# ---------------- +# Check for a file(cmd) program that can be used to detect file type and magic +m4_defun([_LT_DECL_FILECMD], +[AC_CHECK_TOOL([FILECMD], [file], [:]) +_LT_DECL([], [FILECMD], [1], [A file(cmd) program that detects file types]) +])# _LD_DECL_FILECMD + +# _LT_DECL_SED +# ------------ +# Check for a fully-functional sed program, that truncates +# as few characters as possible. Prefer GNU sed if found. +m4_defun([_LT_DECL_SED], +[AC_PROG_SED +test -z "$SED" && SED=sed +Xsed="$SED -e 1s/^X//" +_LT_DECL([], [SED], [1], [A sed program that does not truncate output]) +_LT_DECL([], [Xsed], ["\$SED -e 1s/^X//"], + [Sed that helps us avoid accidentally triggering echo(1) options like -n]) +])# _LT_DECL_SED + +m4_ifndef([AC_PROG_SED], [ +############################################################ +# NOTE: This macro has been submitted for inclusion into # +# GNU Autoconf as AC_PROG_SED. When it is available in # +# a released version of Autoconf we should remove this # +# macro and use it instead. # +############################################################ + +m4_defun([AC_PROG_SED], +[AC_MSG_CHECKING([for a sed that does not truncate output]) +AC_CACHE_VAL(lt_cv_path_SED, +[# Loop through the user's path and test for sed and gsed. +# Then use that list of sed's as ones to test for truncation. +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for lt_ac_prog in sed gsed; do + for ac_exec_ext in '' $ac_executable_extensions; do + if $as_executable_p "$as_dir/$lt_ac_prog$ac_exec_ext"; then + lt_ac_sed_list="$lt_ac_sed_list $as_dir/$lt_ac_prog$ac_exec_ext" + fi + done + done +done +IFS=$as_save_IFS +lt_ac_max=0 +lt_ac_count=0 +# Add /usr/xpg4/bin/sed as it is typically found on Solaris +# along with /bin/sed that truncates output. +for lt_ac_sed in $lt_ac_sed_list /usr/xpg4/bin/sed; do + test ! -f "$lt_ac_sed" && continue + cat /dev/null > conftest.in + lt_ac_count=0 + echo $ECHO_N "0123456789$ECHO_C" >conftest.in + # Check for GNU sed and select it if it is found. + if "$lt_ac_sed" --version 2>&1 < /dev/null | grep 'GNU' > /dev/null; then + lt_cv_path_SED=$lt_ac_sed + break + fi + while true; do + cat conftest.in conftest.in >conftest.tmp + mv conftest.tmp conftest.in + cp conftest.in conftest.nl + echo >>conftest.nl + $lt_ac_sed -e 's/a$//' < conftest.nl >conftest.out || break + cmp -s conftest.out conftest.nl || break + # 10000 chars as input seems more than enough + test 10 -lt "$lt_ac_count" && break + lt_ac_count=`expr $lt_ac_count + 1` + if test "$lt_ac_count" -gt "$lt_ac_max"; then + lt_ac_max=$lt_ac_count + lt_cv_path_SED=$lt_ac_sed + fi + done +done +]) +SED=$lt_cv_path_SED +AC_SUBST([SED]) +AC_MSG_RESULT([$SED]) +])#AC_PROG_SED +])#m4_ifndef + +# Old name: +AU_ALIAS([LT_AC_PROG_SED], [AC_PROG_SED]) +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([LT_AC_PROG_SED], []) + + +# _LT_CHECK_SHELL_FEATURES +# ------------------------ +# Find out whether the shell is Bourne or XSI compatible, +# or has some other useful features. +m4_defun([_LT_CHECK_SHELL_FEATURES], +[if ( (MAIL=60; unset MAIL) || exit) >/dev/null 2>&1; then + lt_unset=unset +else + lt_unset=false +fi +_LT_DECL([], [lt_unset], [0], [whether the shell understands "unset"])dnl + +# test EBCDIC or ASCII +case `echo X|tr X '\101'` in + A) # ASCII based system + # \n is not interpreted correctly by Solaris 8 /usr/ucb/tr + lt_SP2NL='tr \040 \012' + lt_NL2SP='tr \015\012 \040\040' + ;; + *) # EBCDIC based system + lt_SP2NL='tr \100 \n' + lt_NL2SP='tr \r\n \100\100' + ;; +esac +_LT_DECL([SP2NL], [lt_SP2NL], [1], [turn spaces into newlines])dnl +_LT_DECL([NL2SP], [lt_NL2SP], [1], [turn newlines into spaces])dnl +])# _LT_CHECK_SHELL_FEATURES + + +# _LT_PATH_CONVERSION_FUNCTIONS +# ----------------------------- +# Determine what file name conversion functions should be used by +# func_to_host_file (and, implicitly, by func_to_host_path). These are needed +# for certain cross-compile configurations and native mingw. +m4_defun([_LT_PATH_CONVERSION_FUNCTIONS], +[AC_REQUIRE([AC_CANONICAL_HOST])dnl +AC_REQUIRE([AC_CANONICAL_BUILD])dnl +AC_MSG_CHECKING([how to convert $build file names to $host format]) +AC_CACHE_VAL(lt_cv_to_host_file_cmd, +[case $host in + *-*-mingw* ) + case $build in + *-*-mingw* ) # actually msys + lt_cv_to_host_file_cmd=func_convert_file_msys_to_w32 + ;; + *-*-cygwin* ) + lt_cv_to_host_file_cmd=func_convert_file_cygwin_to_w32 + ;; + * ) # otherwise, assume *nix + lt_cv_to_host_file_cmd=func_convert_file_nix_to_w32 + ;; + esac + ;; + *-*-cygwin* ) + case $build in + *-*-mingw* ) # actually msys + lt_cv_to_host_file_cmd=func_convert_file_msys_to_cygwin + ;; + *-*-cygwin* ) + lt_cv_to_host_file_cmd=func_convert_file_noop + ;; + * ) # otherwise, assume *nix + lt_cv_to_host_file_cmd=func_convert_file_nix_to_cygwin + ;; + esac + ;; + * ) # unhandled hosts (and "normal" native builds) + lt_cv_to_host_file_cmd=func_convert_file_noop + ;; +esac +]) +to_host_file_cmd=$lt_cv_to_host_file_cmd +AC_MSG_RESULT([$lt_cv_to_host_file_cmd]) +_LT_DECL([to_host_file_cmd], [lt_cv_to_host_file_cmd], + [0], [convert $build file names to $host format])dnl + +AC_MSG_CHECKING([how to convert $build file names to toolchain format]) +AC_CACHE_VAL(lt_cv_to_tool_file_cmd, +[#assume ordinary cross tools, or native build. +lt_cv_to_tool_file_cmd=func_convert_file_noop +case $host in + *-*-mingw* ) + case $build in + *-*-mingw* ) # actually msys + lt_cv_to_tool_file_cmd=func_convert_file_msys_to_w32 + ;; + esac + ;; +esac +]) +to_tool_file_cmd=$lt_cv_to_tool_file_cmd +AC_MSG_RESULT([$lt_cv_to_tool_file_cmd]) +_LT_DECL([to_tool_file_cmd], [lt_cv_to_tool_file_cmd], + [0], [convert $build files to toolchain format])dnl +])# _LT_PATH_CONVERSION_FUNCTIONS diff --git a/m4/ltoptions.m4 b/m4/ltoptions.m4 new file mode 100644 index 0000000..b0b5e9c --- /dev/null +++ b/m4/ltoptions.m4 @@ -0,0 +1,437 @@ +# Helper functions for option handling. -*- Autoconf -*- +# +# Copyright (C) 2004-2005, 2007-2009, 2011-2019, 2021-2022 Free +# Software Foundation, Inc. +# Written by Gary V. Vaughan, 2004 +# +# This file is free software; the Free Software Foundation gives +# unlimited permission to copy and/or distribute it, with or without +# modifications, as long as this notice is preserved. + +# serial 8 ltoptions.m4 + +# This is to help aclocal find these macros, as it can't see m4_define. +AC_DEFUN([LTOPTIONS_VERSION], [m4_if([1])]) + + +# _LT_MANGLE_OPTION(MACRO-NAME, OPTION-NAME) +# ------------------------------------------ +m4_define([_LT_MANGLE_OPTION], +[[_LT_OPTION_]m4_bpatsubst($1__$2, [[^a-zA-Z0-9_]], [_])]) + + +# _LT_SET_OPTION(MACRO-NAME, OPTION-NAME) +# --------------------------------------- +# Set option OPTION-NAME for macro MACRO-NAME, and if there is a +# matching handler defined, dispatch to it. Other OPTION-NAMEs are +# saved as a flag. +m4_define([_LT_SET_OPTION], +[m4_define(_LT_MANGLE_OPTION([$1], [$2]))dnl +m4_ifdef(_LT_MANGLE_DEFUN([$1], [$2]), + _LT_MANGLE_DEFUN([$1], [$2]), + [m4_warning([Unknown $1 option '$2'])])[]dnl +]) + + +# _LT_IF_OPTION(MACRO-NAME, OPTION-NAME, IF-SET, [IF-NOT-SET]) +# ------------------------------------------------------------ +# Execute IF-SET if OPTION is set, IF-NOT-SET otherwise. +m4_define([_LT_IF_OPTION], +[m4_ifdef(_LT_MANGLE_OPTION([$1], [$2]), [$3], [$4])]) + + +# _LT_UNLESS_OPTIONS(MACRO-NAME, OPTION-LIST, IF-NOT-SET) +# ------------------------------------------------------- +# Execute IF-NOT-SET unless all options in OPTION-LIST for MACRO-NAME +# are set. +m4_define([_LT_UNLESS_OPTIONS], +[m4_foreach([_LT_Option], m4_split(m4_normalize([$2])), + [m4_ifdef(_LT_MANGLE_OPTION([$1], _LT_Option), + [m4_define([$0_found])])])[]dnl +m4_ifdef([$0_found], [m4_undefine([$0_found])], [$3 +])[]dnl +]) + + +# _LT_SET_OPTIONS(MACRO-NAME, OPTION-LIST) +# ---------------------------------------- +# OPTION-LIST is a space-separated list of Libtool options associated +# with MACRO-NAME. If any OPTION has a matching handler declared with +# LT_OPTION_DEFINE, dispatch to that macro; otherwise complain about +# the unknown option and exit. +m4_defun([_LT_SET_OPTIONS], +[# Set options +m4_foreach([_LT_Option], m4_split(m4_normalize([$2])), + [_LT_SET_OPTION([$1], _LT_Option)]) + +m4_if([$1],[LT_INIT],[ + dnl + dnl Simply set some default values (i.e off) if boolean options were not + dnl specified: + _LT_UNLESS_OPTIONS([LT_INIT], [dlopen], [enable_dlopen=no + ]) + _LT_UNLESS_OPTIONS([LT_INIT], [win32-dll], [enable_win32_dll=no + ]) + dnl + dnl If no reference was made to various pairs of opposing options, then + dnl we run the default mode handler for the pair. For example, if neither + dnl 'shared' nor 'disable-shared' was passed, we enable building of shared + dnl archives by default: + _LT_UNLESS_OPTIONS([LT_INIT], [shared disable-shared], [_LT_ENABLE_SHARED]) + _LT_UNLESS_OPTIONS([LT_INIT], [static disable-static], [_LT_ENABLE_STATIC]) + _LT_UNLESS_OPTIONS([LT_INIT], [pic-only no-pic], [_LT_WITH_PIC]) + _LT_UNLESS_OPTIONS([LT_INIT], [fast-install disable-fast-install], + [_LT_ENABLE_FAST_INSTALL]) + _LT_UNLESS_OPTIONS([LT_INIT], [aix-soname=aix aix-soname=both aix-soname=svr4], + [_LT_WITH_AIX_SONAME([aix])]) + ]) +])# _LT_SET_OPTIONS + + +## --------------------------------- ## +## Macros to handle LT_INIT options. ## +## --------------------------------- ## + +# _LT_MANGLE_DEFUN(MACRO-NAME, OPTION-NAME) +# ----------------------------------------- +m4_define([_LT_MANGLE_DEFUN], +[[_LT_OPTION_DEFUN_]m4_bpatsubst(m4_toupper([$1__$2]), [[^A-Z0-9_]], [_])]) + + +# LT_OPTION_DEFINE(MACRO-NAME, OPTION-NAME, CODE) +# ----------------------------------------------- +m4_define([LT_OPTION_DEFINE], +[m4_define(_LT_MANGLE_DEFUN([$1], [$2]), [$3])[]dnl +])# LT_OPTION_DEFINE + + +# dlopen +# ------ +LT_OPTION_DEFINE([LT_INIT], [dlopen], [enable_dlopen=yes +]) + +AU_DEFUN([AC_LIBTOOL_DLOPEN], +[_LT_SET_OPTION([LT_INIT], [dlopen]) +AC_DIAGNOSE([obsolete], +[$0: Remove this warning and the call to _LT_SET_OPTION when you +put the 'dlopen' option into LT_INIT's first parameter.]) +]) + +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AC_LIBTOOL_DLOPEN], []) + + +# win32-dll +# --------- +# Declare package support for building win32 dll's. +LT_OPTION_DEFINE([LT_INIT], [win32-dll], +[enable_win32_dll=yes + +case $host in +*-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-cegcc*) + AC_CHECK_TOOL(AS, as, false) + AC_CHECK_TOOL(DLLTOOL, dlltool, false) + AC_CHECK_TOOL(OBJDUMP, objdump, false) + ;; +esac + +test -z "$AS" && AS=as +_LT_DECL([], [AS], [1], [Assembler program])dnl + +test -z "$DLLTOOL" && DLLTOOL=dlltool +_LT_DECL([], [DLLTOOL], [1], [DLL creation program])dnl + +test -z "$OBJDUMP" && OBJDUMP=objdump +_LT_DECL([], [OBJDUMP], [1], [Object dumper program])dnl +])# win32-dll + +AU_DEFUN([AC_LIBTOOL_WIN32_DLL], +[AC_REQUIRE([AC_CANONICAL_HOST])dnl +_LT_SET_OPTION([LT_INIT], [win32-dll]) +AC_DIAGNOSE([obsolete], +[$0: Remove this warning and the call to _LT_SET_OPTION when you +put the 'win32-dll' option into LT_INIT's first parameter.]) +]) + +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AC_LIBTOOL_WIN32_DLL], []) + + +# _LT_ENABLE_SHARED([DEFAULT]) +# ---------------------------- +# implement the --enable-shared flag, and supports the 'shared' and +# 'disable-shared' LT_INIT options. +# DEFAULT is either 'yes' or 'no'. If omitted, it defaults to 'yes'. +m4_define([_LT_ENABLE_SHARED], +[m4_define([_LT_ENABLE_SHARED_DEFAULT], [m4_if($1, no, no, yes)])dnl +AC_ARG_ENABLE([shared], + [AS_HELP_STRING([--enable-shared@<:@=PKGS@:>@], + [build shared libraries @<:@default=]_LT_ENABLE_SHARED_DEFAULT[@:>@])], + [p=${PACKAGE-default} + case $enableval in + yes) enable_shared=yes ;; + no) enable_shared=no ;; + *) + enable_shared=no + # Look at the argument we got. We use all the common list separators. + lt_save_ifs=$IFS; IFS=$IFS$PATH_SEPARATOR, + for pkg in $enableval; do + IFS=$lt_save_ifs + if test "X$pkg" = "X$p"; then + enable_shared=yes + fi + done + IFS=$lt_save_ifs + ;; + esac], + [enable_shared=]_LT_ENABLE_SHARED_DEFAULT) + + _LT_DECL([build_libtool_libs], [enable_shared], [0], + [Whether or not to build shared libraries]) +])# _LT_ENABLE_SHARED + +LT_OPTION_DEFINE([LT_INIT], [shared], [_LT_ENABLE_SHARED([yes])]) +LT_OPTION_DEFINE([LT_INIT], [disable-shared], [_LT_ENABLE_SHARED([no])]) + +# Old names: +AC_DEFUN([AC_ENABLE_SHARED], +[_LT_SET_OPTION([LT_INIT], m4_if([$1], [no], [disable-])[shared]) +]) + +AC_DEFUN([AC_DISABLE_SHARED], +[_LT_SET_OPTION([LT_INIT], [disable-shared]) +]) + +AU_DEFUN([AM_ENABLE_SHARED], [AC_ENABLE_SHARED($@)]) +AU_DEFUN([AM_DISABLE_SHARED], [AC_DISABLE_SHARED($@)]) + +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AM_ENABLE_SHARED], []) +dnl AC_DEFUN([AM_DISABLE_SHARED], []) + + + +# _LT_ENABLE_STATIC([DEFAULT]) +# ---------------------------- +# implement the --enable-static flag, and support the 'static' and +# 'disable-static' LT_INIT options. +# DEFAULT is either 'yes' or 'no'. If omitted, it defaults to 'yes'. +m4_define([_LT_ENABLE_STATIC], +[m4_define([_LT_ENABLE_STATIC_DEFAULT], [m4_if($1, no, no, yes)])dnl +AC_ARG_ENABLE([static], + [AS_HELP_STRING([--enable-static@<:@=PKGS@:>@], + [build static libraries @<:@default=]_LT_ENABLE_STATIC_DEFAULT[@:>@])], + [p=${PACKAGE-default} + case $enableval in + yes) enable_static=yes ;; + no) enable_static=no ;; + *) + enable_static=no + # Look at the argument we got. We use all the common list separators. + lt_save_ifs=$IFS; IFS=$IFS$PATH_SEPARATOR, + for pkg in $enableval; do + IFS=$lt_save_ifs + if test "X$pkg" = "X$p"; then + enable_static=yes + fi + done + IFS=$lt_save_ifs + ;; + esac], + [enable_static=]_LT_ENABLE_STATIC_DEFAULT) + + _LT_DECL([build_old_libs], [enable_static], [0], + [Whether or not to build static libraries]) +])# _LT_ENABLE_STATIC + +LT_OPTION_DEFINE([LT_INIT], [static], [_LT_ENABLE_STATIC([yes])]) +LT_OPTION_DEFINE([LT_INIT], [disable-static], [_LT_ENABLE_STATIC([no])]) + +# Old names: +AC_DEFUN([AC_ENABLE_STATIC], +[_LT_SET_OPTION([LT_INIT], m4_if([$1], [no], [disable-])[static]) +]) + +AC_DEFUN([AC_DISABLE_STATIC], +[_LT_SET_OPTION([LT_INIT], [disable-static]) +]) + +AU_DEFUN([AM_ENABLE_STATIC], [AC_ENABLE_STATIC($@)]) +AU_DEFUN([AM_DISABLE_STATIC], [AC_DISABLE_STATIC($@)]) + +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AM_ENABLE_STATIC], []) +dnl AC_DEFUN([AM_DISABLE_STATIC], []) + + + +# _LT_ENABLE_FAST_INSTALL([DEFAULT]) +# ---------------------------------- +# implement the --enable-fast-install flag, and support the 'fast-install' +# and 'disable-fast-install' LT_INIT options. +# DEFAULT is either 'yes' or 'no'. If omitted, it defaults to 'yes'. +m4_define([_LT_ENABLE_FAST_INSTALL], +[m4_define([_LT_ENABLE_FAST_INSTALL_DEFAULT], [m4_if($1, no, no, yes)])dnl +AC_ARG_ENABLE([fast-install], + [AS_HELP_STRING([--enable-fast-install@<:@=PKGS@:>@], + [optimize for fast installation @<:@default=]_LT_ENABLE_FAST_INSTALL_DEFAULT[@:>@])], + [p=${PACKAGE-default} + case $enableval in + yes) enable_fast_install=yes ;; + no) enable_fast_install=no ;; + *) + enable_fast_install=no + # Look at the argument we got. We use all the common list separators. + lt_save_ifs=$IFS; IFS=$IFS$PATH_SEPARATOR, + for pkg in $enableval; do + IFS=$lt_save_ifs + if test "X$pkg" = "X$p"; then + enable_fast_install=yes + fi + done + IFS=$lt_save_ifs + ;; + esac], + [enable_fast_install=]_LT_ENABLE_FAST_INSTALL_DEFAULT) + +_LT_DECL([fast_install], [enable_fast_install], [0], + [Whether or not to optimize for fast installation])dnl +])# _LT_ENABLE_FAST_INSTALL + +LT_OPTION_DEFINE([LT_INIT], [fast-install], [_LT_ENABLE_FAST_INSTALL([yes])]) +LT_OPTION_DEFINE([LT_INIT], [disable-fast-install], [_LT_ENABLE_FAST_INSTALL([no])]) + +# Old names: +AU_DEFUN([AC_ENABLE_FAST_INSTALL], +[_LT_SET_OPTION([LT_INIT], m4_if([$1], [no], [disable-])[fast-install]) +AC_DIAGNOSE([obsolete], +[$0: Remove this warning and the call to _LT_SET_OPTION when you put +the 'fast-install' option into LT_INIT's first parameter.]) +]) + +AU_DEFUN([AC_DISABLE_FAST_INSTALL], +[_LT_SET_OPTION([LT_INIT], [disable-fast-install]) +AC_DIAGNOSE([obsolete], +[$0: Remove this warning and the call to _LT_SET_OPTION when you put +the 'disable-fast-install' option into LT_INIT's first parameter.]) +]) + +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AC_ENABLE_FAST_INSTALL], []) +dnl AC_DEFUN([AM_DISABLE_FAST_INSTALL], []) + + +# _LT_WITH_AIX_SONAME([DEFAULT]) +# ---------------------------------- +# implement the --with-aix-soname flag, and support the `aix-soname=aix' +# and `aix-soname=both' and `aix-soname=svr4' LT_INIT options. DEFAULT +# is either `aix', `both' or `svr4'. If omitted, it defaults to `aix'. +m4_define([_LT_WITH_AIX_SONAME], +[m4_define([_LT_WITH_AIX_SONAME_DEFAULT], [m4_if($1, svr4, svr4, m4_if($1, both, both, aix))])dnl +shared_archive_member_spec= +case $host,$enable_shared in +power*-*-aix[[5-9]]*,yes) + AC_MSG_CHECKING([which variant of shared library versioning to provide]) + AC_ARG_WITH([aix-soname], + [AS_HELP_STRING([--with-aix-soname=aix|svr4|both], + [shared library versioning (aka "SONAME") variant to provide on AIX, @<:@default=]_LT_WITH_AIX_SONAME_DEFAULT[@:>@.])], + [case $withval in + aix|svr4|both) + ;; + *) + AC_MSG_ERROR([Unknown argument to --with-aix-soname]) + ;; + esac + lt_cv_with_aix_soname=$with_aix_soname], + [AC_CACHE_VAL([lt_cv_with_aix_soname], + [lt_cv_with_aix_soname=]_LT_WITH_AIX_SONAME_DEFAULT) + with_aix_soname=$lt_cv_with_aix_soname]) + AC_MSG_RESULT([$with_aix_soname]) + if test aix != "$with_aix_soname"; then + # For the AIX way of multilib, we name the shared archive member + # based on the bitwidth used, traditionally 'shr.o' or 'shr_64.o', + # and 'shr.imp' or 'shr_64.imp', respectively, for the Import File. + # Even when GNU compilers ignore OBJECT_MODE but need '-maix64' flag, + # the AIX toolchain works better with OBJECT_MODE set (default 32). + if test 64 = "${OBJECT_MODE-32}"; then + shared_archive_member_spec=shr_64 + else + shared_archive_member_spec=shr + fi + fi + ;; +*) + with_aix_soname=aix + ;; +esac + +_LT_DECL([], [shared_archive_member_spec], [0], + [Shared archive member basename, for filename based shared library versioning on AIX])dnl +])# _LT_WITH_AIX_SONAME + +LT_OPTION_DEFINE([LT_INIT], [aix-soname=aix], [_LT_WITH_AIX_SONAME([aix])]) +LT_OPTION_DEFINE([LT_INIT], [aix-soname=both], [_LT_WITH_AIX_SONAME([both])]) +LT_OPTION_DEFINE([LT_INIT], [aix-soname=svr4], [_LT_WITH_AIX_SONAME([svr4])]) + + +# _LT_WITH_PIC([MODE]) +# -------------------- +# implement the --with-pic flag, and support the 'pic-only' and 'no-pic' +# LT_INIT options. +# MODE is either 'yes' or 'no'. If omitted, it defaults to 'both'. +m4_define([_LT_WITH_PIC], +[AC_ARG_WITH([pic], + [AS_HELP_STRING([--with-pic@<:@=PKGS@:>@], + [try to use only PIC/non-PIC objects @<:@default=use both@:>@])], + [lt_p=${PACKAGE-default} + case $withval in + yes|no) pic_mode=$withval ;; + *) + pic_mode=default + # Look at the argument we got. We use all the common list separators. + lt_save_ifs=$IFS; IFS=$IFS$PATH_SEPARATOR, + for lt_pkg in $withval; do + IFS=$lt_save_ifs + if test "X$lt_pkg" = "X$lt_p"; then + pic_mode=yes + fi + done + IFS=$lt_save_ifs + ;; + esac], + [pic_mode=m4_default([$1], [default])]) + +_LT_DECL([], [pic_mode], [0], [What type of objects to build])dnl +])# _LT_WITH_PIC + +LT_OPTION_DEFINE([LT_INIT], [pic-only], [_LT_WITH_PIC([yes])]) +LT_OPTION_DEFINE([LT_INIT], [no-pic], [_LT_WITH_PIC([no])]) + +# Old name: +AU_DEFUN([AC_LIBTOOL_PICMODE], +[_LT_SET_OPTION([LT_INIT], [pic-only]) +AC_DIAGNOSE([obsolete], +[$0: Remove this warning and the call to _LT_SET_OPTION when you +put the 'pic-only' option into LT_INIT's first parameter.]) +]) + +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AC_LIBTOOL_PICMODE], []) + +## ----------------- ## +## LTDL_INIT Options ## +## ----------------- ## + +m4_define([_LTDL_MODE], []) +LT_OPTION_DEFINE([LTDL_INIT], [nonrecursive], + [m4_define([_LTDL_MODE], [nonrecursive])]) +LT_OPTION_DEFINE([LTDL_INIT], [recursive], + [m4_define([_LTDL_MODE], [recursive])]) +LT_OPTION_DEFINE([LTDL_INIT], [subproject], + [m4_define([_LTDL_MODE], [subproject])]) + +m4_define([_LTDL_TYPE], []) +LT_OPTION_DEFINE([LTDL_INIT], [installable], + [m4_define([_LTDL_TYPE], [installable])]) +LT_OPTION_DEFINE([LTDL_INIT], [convenience], + [m4_define([_LTDL_TYPE], [convenience])]) diff --git a/m4/ltsugar.m4 b/m4/ltsugar.m4 new file mode 100644 index 0000000..902508b --- /dev/null +++ b/m4/ltsugar.m4 @@ -0,0 +1,124 @@ +# ltsugar.m4 -- libtool m4 base layer. -*-Autoconf-*- +# +# Copyright (C) 2004-2005, 2007-2008, 2011-2019, 2021-2022 Free Software +# Foundation, Inc. +# Written by Gary V. Vaughan, 2004 +# +# This file is free software; the Free Software Foundation gives +# unlimited permission to copy and/or distribute it, with or without +# modifications, as long as this notice is preserved. + +# serial 6 ltsugar.m4 + +# This is to help aclocal find these macros, as it can't see m4_define. +AC_DEFUN([LTSUGAR_VERSION], [m4_if([0.1])]) + + +# lt_join(SEP, ARG1, [ARG2...]) +# ----------------------------- +# Produce ARG1SEPARG2...SEPARGn, omitting [] arguments and their +# associated separator. +# Needed until we can rely on m4_join from Autoconf 2.62, since all earlier +# versions in m4sugar had bugs. +m4_define([lt_join], +[m4_if([$#], [1], [], + [$#], [2], [[$2]], + [m4_if([$2], [], [], [[$2]_])$0([$1], m4_shift(m4_shift($@)))])]) +m4_define([_lt_join], +[m4_if([$#$2], [2], [], + [m4_if([$2], [], [], [[$1$2]])$0([$1], m4_shift(m4_shift($@)))])]) + + +# lt_car(LIST) +# lt_cdr(LIST) +# ------------ +# Manipulate m4 lists. +# These macros are necessary as long as will still need to support +# Autoconf-2.59, which quotes differently. +m4_define([lt_car], [[$1]]) +m4_define([lt_cdr], +[m4_if([$#], 0, [m4_fatal([$0: cannot be called without arguments])], + [$#], 1, [], + [m4_dquote(m4_shift($@))])]) +m4_define([lt_unquote], $1) + + +# lt_append(MACRO-NAME, STRING, [SEPARATOR]) +# ------------------------------------------ +# Redefine MACRO-NAME to hold its former content plus 'SEPARATOR''STRING'. +# Note that neither SEPARATOR nor STRING are expanded; they are appended +# to MACRO-NAME as is (leaving the expansion for when MACRO-NAME is invoked). +# No SEPARATOR is output if MACRO-NAME was previously undefined (different +# than defined and empty). +# +# This macro is needed until we can rely on Autoconf 2.62, since earlier +# versions of m4sugar mistakenly expanded SEPARATOR but not STRING. +m4_define([lt_append], +[m4_define([$1], + m4_ifdef([$1], [m4_defn([$1])[$3]])[$2])]) + + + +# lt_combine(SEP, PREFIX-LIST, INFIX, SUFFIX1, [SUFFIX2...]) +# ---------------------------------------------------------- +# Produce a SEP delimited list of all paired combinations of elements of +# PREFIX-LIST with SUFFIX1 through SUFFIXn. Each element of the list +# has the form PREFIXmINFIXSUFFIXn. +# Needed until we can rely on m4_combine added in Autoconf 2.62. +m4_define([lt_combine], +[m4_if(m4_eval([$# > 3]), [1], + [m4_pushdef([_Lt_sep], [m4_define([_Lt_sep], m4_defn([lt_car]))])]]dnl +[[m4_foreach([_Lt_prefix], [$2], + [m4_foreach([_Lt_suffix], + ]m4_dquote(m4_dquote(m4_shift(m4_shift(m4_shift($@)))))[, + [_Lt_sep([$1])[]m4_defn([_Lt_prefix])[$3]m4_defn([_Lt_suffix])])])])]) + + +# lt_if_append_uniq(MACRO-NAME, VARNAME, [SEPARATOR], [UNIQ], [NOT-UNIQ]) +# ----------------------------------------------------------------------- +# Iff MACRO-NAME does not yet contain VARNAME, then append it (delimited +# by SEPARATOR if supplied) and expand UNIQ, else NOT-UNIQ. +m4_define([lt_if_append_uniq], +[m4_ifdef([$1], + [m4_if(m4_index([$3]m4_defn([$1])[$3], [$3$2$3]), [-1], + [lt_append([$1], [$2], [$3])$4], + [$5])], + [lt_append([$1], [$2], [$3])$4])]) + + +# lt_dict_add(DICT, KEY, VALUE) +# ----------------------------- +m4_define([lt_dict_add], +[m4_define([$1($2)], [$3])]) + + +# lt_dict_add_subkey(DICT, KEY, SUBKEY, VALUE) +# -------------------------------------------- +m4_define([lt_dict_add_subkey], +[m4_define([$1($2:$3)], [$4])]) + + +# lt_dict_fetch(DICT, KEY, [SUBKEY]) +# ---------------------------------- +m4_define([lt_dict_fetch], +[m4_ifval([$3], + m4_ifdef([$1($2:$3)], [m4_defn([$1($2:$3)])]), + m4_ifdef([$1($2)], [m4_defn([$1($2)])]))]) + + +# lt_if_dict_fetch(DICT, KEY, [SUBKEY], VALUE, IF-TRUE, [IF-FALSE]) +# ----------------------------------------------------------------- +m4_define([lt_if_dict_fetch], +[m4_if(lt_dict_fetch([$1], [$2], [$3]), [$4], + [$5], + [$6])]) + + +# lt_dict_filter(DICT, [SUBKEY], VALUE, [SEPARATOR], KEY, [...]) +# -------------------------------------------------------------- +m4_define([lt_dict_filter], +[m4_if([$5], [], [], + [lt_join(m4_quote(m4_default([$4], [[, ]])), + lt_unquote(m4_split(m4_normalize(m4_foreach(_Lt_key, lt_car([m4_shiftn(4, $@)]), + [lt_if_dict_fetch([$1], _Lt_key, [$2], [$3], [_Lt_key ])])))))])[]dnl +]) diff --git a/m4/ltversion.m4 b/m4/ltversion.m4 new file mode 100644 index 0000000..b155d0a --- /dev/null +++ b/m4/ltversion.m4 @@ -0,0 +1,24 @@ +# ltversion.m4 -- version numbers -*- Autoconf -*- +# +# Copyright (C) 2004, 2011-2019, 2021-2022 Free Software Foundation, +# Inc. +# Written by Scott James Remnant, 2004 +# +# This file is free software; the Free Software Foundation gives +# unlimited permission to copy and/or distribute it, with or without +# modifications, as long as this notice is preserved. + +# @configure_input@ + +# serial 4245 ltversion.m4 +# This file is part of GNU Libtool + +m4_define([LT_PACKAGE_VERSION], [2.4.7]) +m4_define([LT_PACKAGE_REVISION], [2.4.7]) + +AC_DEFUN([LTVERSION_VERSION], +[macro_version='2.4.7' +macro_revision='2.4.7' +_LT_DECL(, macro_version, 0, [Which release of libtool.m4 was used?]) +_LT_DECL(, macro_revision, 0) +]) diff --git a/m4/lt~obsolete.m4 b/m4/lt~obsolete.m4 new file mode 100644 index 0000000..0f7a875 --- /dev/null +++ b/m4/lt~obsolete.m4 @@ -0,0 +1,99 @@ +# lt~obsolete.m4 -- aclocal satisfying obsolete definitions. -*-Autoconf-*- +# +# Copyright (C) 2004-2005, 2007, 2009, 2011-2019, 2021-2022 Free +# Software Foundation, Inc. +# Written by Scott James Remnant, 2004. +# +# This file is free software; the Free Software Foundation gives +# unlimited permission to copy and/or distribute it, with or without +# modifications, as long as this notice is preserved. + +# serial 5 lt~obsolete.m4 + +# These exist entirely to fool aclocal when bootstrapping libtool. +# +# In the past libtool.m4 has provided macros via AC_DEFUN (or AU_DEFUN), +# which have later been changed to m4_define as they aren't part of the +# exported API, or moved to Autoconf or Automake where they belong. +# +# The trouble is, aclocal is a bit thick. It'll see the old AC_DEFUN +# in /usr/share/aclocal/libtool.m4 and remember it, then when it sees us +# using a macro with the same name in our local m4/libtool.m4 it'll +# pull the old libtool.m4 in (it doesn't see our shiny new m4_define +# and doesn't know about Autoconf macros at all.) +# +# So we provide this file, which has a silly filename so it's always +# included after everything else. This provides aclocal with the +# AC_DEFUNs it wants, but when m4 processes it, it doesn't do anything +# because those macros already exist, or will be overwritten later. +# We use AC_DEFUN over AU_DEFUN for compatibility with aclocal-1.6. +# +# Anytime we withdraw an AC_DEFUN or AU_DEFUN, remember to add it here. +# Yes, that means every name once taken will need to remain here until +# we give up compatibility with versions before 1.7, at which point +# we need to keep only those names which we still refer to. + +# This is to help aclocal find these macros, as it can't see m4_define. +AC_DEFUN([LTOBSOLETE_VERSION], [m4_if([1])]) + +m4_ifndef([AC_LIBTOOL_LINKER_OPTION], [AC_DEFUN([AC_LIBTOOL_LINKER_OPTION])]) +m4_ifndef([AC_PROG_EGREP], [AC_DEFUN([AC_PROG_EGREP])]) +m4_ifndef([_LT_AC_PROG_ECHO_BACKSLASH], [AC_DEFUN([_LT_AC_PROG_ECHO_BACKSLASH])]) +m4_ifndef([_LT_AC_SHELL_INIT], [AC_DEFUN([_LT_AC_SHELL_INIT])]) +m4_ifndef([_LT_AC_SYS_LIBPATH_AIX], [AC_DEFUN([_LT_AC_SYS_LIBPATH_AIX])]) +m4_ifndef([_LT_PROG_LTMAIN], [AC_DEFUN([_LT_PROG_LTMAIN])]) +m4_ifndef([_LT_AC_TAGVAR], [AC_DEFUN([_LT_AC_TAGVAR])]) +m4_ifndef([AC_LTDL_ENABLE_INSTALL], [AC_DEFUN([AC_LTDL_ENABLE_INSTALL])]) +m4_ifndef([AC_LTDL_PREOPEN], [AC_DEFUN([AC_LTDL_PREOPEN])]) +m4_ifndef([_LT_AC_SYS_COMPILER], [AC_DEFUN([_LT_AC_SYS_COMPILER])]) +m4_ifndef([_LT_AC_LOCK], [AC_DEFUN([_LT_AC_LOCK])]) +m4_ifndef([AC_LIBTOOL_SYS_OLD_ARCHIVE], [AC_DEFUN([AC_LIBTOOL_SYS_OLD_ARCHIVE])]) +m4_ifndef([_LT_AC_TRY_DLOPEN_SELF], [AC_DEFUN([_LT_AC_TRY_DLOPEN_SELF])]) +m4_ifndef([AC_LIBTOOL_PROG_CC_C_O], [AC_DEFUN([AC_LIBTOOL_PROG_CC_C_O])]) +m4_ifndef([AC_LIBTOOL_SYS_HARD_LINK_LOCKS], [AC_DEFUN([AC_LIBTOOL_SYS_HARD_LINK_LOCKS])]) +m4_ifndef([AC_LIBTOOL_OBJDIR], [AC_DEFUN([AC_LIBTOOL_OBJDIR])]) +m4_ifndef([AC_LTDL_OBJDIR], [AC_DEFUN([AC_LTDL_OBJDIR])]) +m4_ifndef([AC_LIBTOOL_PROG_LD_HARDCODE_LIBPATH], [AC_DEFUN([AC_LIBTOOL_PROG_LD_HARDCODE_LIBPATH])]) +m4_ifndef([AC_LIBTOOL_SYS_LIB_STRIP], [AC_DEFUN([AC_LIBTOOL_SYS_LIB_STRIP])]) +m4_ifndef([AC_PATH_MAGIC], [AC_DEFUN([AC_PATH_MAGIC])]) +m4_ifndef([AC_PROG_LD_GNU], [AC_DEFUN([AC_PROG_LD_GNU])]) +m4_ifndef([AC_PROG_LD_RELOAD_FLAG], [AC_DEFUN([AC_PROG_LD_RELOAD_FLAG])]) +m4_ifndef([AC_DEPLIBS_CHECK_METHOD], [AC_DEFUN([AC_DEPLIBS_CHECK_METHOD])]) +m4_ifndef([AC_LIBTOOL_PROG_COMPILER_NO_RTTI], [AC_DEFUN([AC_LIBTOOL_PROG_COMPILER_NO_RTTI])]) +m4_ifndef([AC_LIBTOOL_SYS_GLOBAL_SYMBOL_PIPE], [AC_DEFUN([AC_LIBTOOL_SYS_GLOBAL_SYMBOL_PIPE])]) +m4_ifndef([AC_LIBTOOL_PROG_COMPILER_PIC], [AC_DEFUN([AC_LIBTOOL_PROG_COMPILER_PIC])]) +m4_ifndef([AC_LIBTOOL_PROG_LD_SHLIBS], [AC_DEFUN([AC_LIBTOOL_PROG_LD_SHLIBS])]) +m4_ifndef([AC_LIBTOOL_POSTDEP_PREDEP], [AC_DEFUN([AC_LIBTOOL_POSTDEP_PREDEP])]) +m4_ifndef([LT_AC_PROG_EGREP], [AC_DEFUN([LT_AC_PROG_EGREP])]) +m4_ifndef([LT_AC_PROG_SED], [AC_DEFUN([LT_AC_PROG_SED])]) +m4_ifndef([_LT_CC_BASENAME], [AC_DEFUN([_LT_CC_BASENAME])]) +m4_ifndef([_LT_COMPILER_BOILERPLATE], [AC_DEFUN([_LT_COMPILER_BOILERPLATE])]) +m4_ifndef([_LT_LINKER_BOILERPLATE], [AC_DEFUN([_LT_LINKER_BOILERPLATE])]) +m4_ifndef([_AC_PROG_LIBTOOL], [AC_DEFUN([_AC_PROG_LIBTOOL])]) +m4_ifndef([AC_LIBTOOL_SETUP], [AC_DEFUN([AC_LIBTOOL_SETUP])]) +m4_ifndef([_LT_AC_CHECK_DLFCN], [AC_DEFUN([_LT_AC_CHECK_DLFCN])]) +m4_ifndef([AC_LIBTOOL_SYS_DYNAMIC_LINKER], [AC_DEFUN([AC_LIBTOOL_SYS_DYNAMIC_LINKER])]) +m4_ifndef([_LT_AC_TAGCONFIG], [AC_DEFUN([_LT_AC_TAGCONFIG])]) +m4_ifndef([AC_DISABLE_FAST_INSTALL], [AC_DEFUN([AC_DISABLE_FAST_INSTALL])]) +m4_ifndef([_LT_AC_LANG_CXX], [AC_DEFUN([_LT_AC_LANG_CXX])]) +m4_ifndef([_LT_AC_LANG_F77], [AC_DEFUN([_LT_AC_LANG_F77])]) +m4_ifndef([_LT_AC_LANG_GCJ], [AC_DEFUN([_LT_AC_LANG_GCJ])]) +m4_ifndef([AC_LIBTOOL_LANG_C_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_C_CONFIG])]) +m4_ifndef([_LT_AC_LANG_C_CONFIG], [AC_DEFUN([_LT_AC_LANG_C_CONFIG])]) +m4_ifndef([AC_LIBTOOL_LANG_CXX_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_CXX_CONFIG])]) +m4_ifndef([_LT_AC_LANG_CXX_CONFIG], [AC_DEFUN([_LT_AC_LANG_CXX_CONFIG])]) +m4_ifndef([AC_LIBTOOL_LANG_F77_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_F77_CONFIG])]) +m4_ifndef([_LT_AC_LANG_F77_CONFIG], [AC_DEFUN([_LT_AC_LANG_F77_CONFIG])]) +m4_ifndef([AC_LIBTOOL_LANG_GCJ_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_GCJ_CONFIG])]) +m4_ifndef([_LT_AC_LANG_GCJ_CONFIG], [AC_DEFUN([_LT_AC_LANG_GCJ_CONFIG])]) +m4_ifndef([AC_LIBTOOL_LANG_RC_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_RC_CONFIG])]) +m4_ifndef([_LT_AC_LANG_RC_CONFIG], [AC_DEFUN([_LT_AC_LANG_RC_CONFIG])]) +m4_ifndef([AC_LIBTOOL_CONFIG], [AC_DEFUN([AC_LIBTOOL_CONFIG])]) +m4_ifndef([_LT_AC_FILE_LTDLL_C], [AC_DEFUN([_LT_AC_FILE_LTDLL_C])]) +m4_ifndef([_LT_REQUIRED_DARWIN_CHECKS], [AC_DEFUN([_LT_REQUIRED_DARWIN_CHECKS])]) +m4_ifndef([_LT_AC_PROG_CXXCPP], [AC_DEFUN([_LT_AC_PROG_CXXCPP])]) +m4_ifndef([_LT_PREPARE_SED_QUOTE_VARS], [AC_DEFUN([_LT_PREPARE_SED_QUOTE_VARS])]) +m4_ifndef([_LT_PROG_ECHO_BACKSLASH], [AC_DEFUN([_LT_PROG_ECHO_BACKSLASH])]) +m4_ifndef([_LT_PROG_F77], [AC_DEFUN([_LT_PROG_F77])]) +m4_ifndef([_LT_PROG_FC], [AC_DEFUN([_LT_PROG_FC])]) +m4_ifndef([_LT_PROG_CXX], [AC_DEFUN([_LT_PROG_CXX])]) diff --git a/m4/pkg.m4 b/m4/pkg.m4 new file mode 100644 index 0000000..0048a3f --- /dev/null +++ b/m4/pkg.m4 @@ -0,0 +1,157 @@ +# pkg.m4 - Macros to locate and utilise pkg-config. -*- Autoconf -*- +# +# Copyright © 2004 Scott James Remnant . +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that program. + +# PKG_PROG_PKG_CONFIG([MIN-VERSION]) +# ---------------------------------- +AC_DEFUN([PKG_PROG_PKG_CONFIG], +[m4_pattern_forbid([^_?PKG_[A-Z_]+$]) +m4_pattern_allow([^PKG_CONFIG(_PATH)?$]) +AC_ARG_VAR([PKG_CONFIG], [path to pkg-config utility])dnl +if test "x$ac_cv_env_PKG_CONFIG_set" != "xset"; then + AC_PATH_TOOL([PKG_CONFIG], [pkg-config]) +fi +if test -n "$PKG_CONFIG"; then + _pkg_min_version=m4_default([$1], [0.9.0]) + AC_MSG_CHECKING([pkg-config is at least version $_pkg_min_version]) + if $PKG_CONFIG --atleast-pkgconfig-version $_pkg_min_version; then + AC_MSG_RESULT([yes]) + else + AC_MSG_RESULT([no]) + PKG_CONFIG="" + fi + +fi[]dnl +])# PKG_PROG_PKG_CONFIG + +# PKG_CHECK_EXISTS(MODULES, [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND]) +# +# Check to see whether a particular set of modules exists. Similar +# to PKG_CHECK_MODULES(), but does not set variables or print errors. +# +# +# Similar to PKG_CHECK_MODULES, make sure that the first instance of +# this or PKG_CHECK_MODULES is called, or make sure to call +# PKG_CHECK_EXISTS manually +# -------------------------------------------------------------- +AC_DEFUN([PKG_CHECK_EXISTS], +[AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl +if test -n "$PKG_CONFIG" && \ + AC_RUN_LOG([$PKG_CONFIG --exists --print-errors "$1"]); then + m4_ifval([$2], [$2], [:]) +m4_ifvaln([$3], [else + $3])dnl +fi]) + + +# _PKG_CONFIG([VARIABLE], [COMMAND], [MODULES]) +# --------------------------------------------- +m4_define([_PKG_CONFIG], +[if test -n "$PKG_CONFIG"; then + if test -n "$$1"; then + pkg_cv_[]$1="$$1" + else + PKG_CHECK_EXISTS([$3], + [pkg_cv_[]$1=`$PKG_CONFIG --[]$2 "$3" 2>/dev/null`], + [pkg_failed=yes]) + fi +else + pkg_failed=untried +fi[]dnl +])# _PKG_CONFIG + +# _PKG_SHORT_ERRORS_SUPPORTED +# ----------------------------- +AC_DEFUN([_PKG_SHORT_ERRORS_SUPPORTED], +[AC_REQUIRE([PKG_PROG_PKG_CONFIG]) +if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then + _pkg_short_errors_supported=yes +else + _pkg_short_errors_supported=no +fi[]dnl +])# _PKG_SHORT_ERRORS_SUPPORTED + + +# PKG_CHECK_MODULES(VARIABLE-PREFIX, MODULES, [ACTION-IF-FOUND], +# [ACTION-IF-NOT-FOUND]) +# +# +# Note that if there is a possibility the first call to +# PKG_CHECK_MODULES might not happen, you should be sure to include an +# explicit call to PKG_PROG_PKG_CONFIG in your configure.ac +# +# +# -------------------------------------------------------------- +AC_DEFUN([PKG_CHECK_MODULES], +[AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl +AC_ARG_VAR([$1][_CFLAGS], [C compiler flags for $1, overriding pkg-config])dnl +AC_ARG_VAR([$1][_LIBS], [linker flags for $1, overriding pkg-config])dnl + +pkg_failed=no +AC_MSG_CHECKING([for $1]) + +_PKG_CONFIG([$1][_CFLAGS], [cflags], [$2]) +_PKG_CONFIG([$1][_LIBS], [libs], [$2]) + +m4_define([_PKG_TEXT], [Alternatively, you may set the environment variables $1[]_CFLAGS +and $1[]_LIBS to avoid the need to call pkg-config. +See the pkg-config man page for more details.]) + +if test $pkg_failed = yes; then + _PKG_SHORT_ERRORS_SUPPORTED + if test $_pkg_short_errors_supported = yes; then + $1[]_PKG_ERRORS=`$PKG_CONFIG --short-errors --errors-to-stdout --print-errors "$2"` + else + $1[]_PKG_ERRORS=`$PKG_CONFIG --errors-to-stdout --print-errors "$2"` + fi + # Put the nasty error message in config.log where it belongs + echo "$$1[]_PKG_ERRORS" >&AS_MESSAGE_LOG_FD + + ifelse([$4], , [AC_MSG_ERROR(dnl +[Package requirements ($2) were not met: + +$$1_PKG_ERRORS + +Consider adjusting the PKG_CONFIG_PATH environment variable if you +installed software in a non-standard prefix. + +_PKG_TEXT +])], + [AC_MSG_RESULT([no]) + $4]) +elif test $pkg_failed = untried; then + ifelse([$4], , [AC_MSG_FAILURE(dnl +[The pkg-config script could not be found or is too old. Make sure it +is in your PATH or set the PKG_CONFIG environment variable to the full +path to pkg-config. + +_PKG_TEXT + +To get pkg-config, see .])], + [$4]) +else + $1[]_CFLAGS=$pkg_cv_[]$1[]_CFLAGS + $1[]_LIBS=$pkg_cv_[]$1[]_LIBS + AC_MSG_RESULT([yes]) + ifelse([$3], , :, [$3]) +fi[]dnl +])# PKG_CHECK_MODULES diff --git a/make/starpu-loader.mk b/make/starpu-loader.mk new file mode 100644 index 0000000..c2d398d --- /dev/null +++ b/make/starpu-loader.mk @@ -0,0 +1,95 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +noinst_PROGRAMS = + +# +# Test loading goes through a lot of launchers: +# +# - $(LAUNCHER) is called first, to run the test through starpu_msexec, i.e. +# either mpirun or starpu_tcpipexec +# +# - $(LOADER), i.e. tests/loader, is then called to implement timeout, running +# gdb, etc. But if it detects that the test is a .sh script, it just executes +# it +# +# - $(STARPU_CHECK_LAUNCHER) $(STARPU_CHECK_LAUNCHER_ARGS) is called by loader +# to run the program through e.g. valgrind.sh +# +# When the program is a shell script, additionally: +# +# - $(STARPU_SUB_PARALLEL) is called to control parallelism (see below) +# +# - $(MS_LAUNCHER) is called to run the test through starpu_msexec +# +# - $(STARPU_LAUNCH) was set by tests/loader to its own path, to run the program +# through it. +# +# - $(STARPU_CHECK_LAUNCHER) $(STARPU_CHECK_LAUNCHER_ARGS) is called by loader +# + +export LAUNCHER + +if HAVE_PARALLEL +# When GNU parallel is available and -j is passed to make, run tests through +# parallel, using a "starpu" semaphore. +# Also make test shell scripts run its tests through parallel, using a +# "substarpu" semaphore. This brings some overload, but only one level. +STARPU_SUB_PARALLEL=$(shell echo $(MAKEFLAGS) | sed -ne 's/.*-j\([0-9]\+\).*/parallel --semaphore --id substarpu --fg --fg-exit -j \1/p') +export STARPU_SUB_PARALLEL +endif + +export MS_LAUNCHER +if STARPU_USE_MPI_MASTER_SLAVE +# Make tests run through mpiexec +LAUNCHER += $(abs_top_srcdir)/tools/starpu_msexec +MS_LAUNCHER = $(STARPU_MPIEXEC) +LAUNCHER_ENV += $(MPI_RUN_ENV) STARPU_NMPIMSTHREADS=4 +endif + +if STARPU_USE_TCPIP_MASTER_SLAVE +LAUNCHER += $(abs_top_srcdir)/tools/starpu_msexec +MS_LAUNCHER = $(abs_top_builddir)/tools/starpu_tcpipexec -np 2 -nobind -ncpus 1 +# switch off local socket usage +#MS_LAUNCHER = $(abs_top_builddir)/tools/starpu_tcpipexec -np 2 -nobind -ncpus 1 -nolocal +LAUNCHER_ENV += STARPU_RESERVE_NCPU=2 +endif + +LAUNCHER ?= +MS_LAUNCHER ?= + +if STARPU_HAVE_WINDOWS +LOADER_BIN = $(LAUNCHER) $(EXTERNAL) +else +LOADER ?= ./loader +loader_CPPFLAGS = $(AM_CPPFLAGS) -I$(top_builddir)/src/ +LOADER_BIN = $(LAUNCHER) $(LOADER) $(EXTERNAL) +noinst_PROGRAMS += loader +endif + +LSAN_OPTIONS ?= suppressions=$(abs_top_srcdir)/tools/dev/lsan/suppressions +TSAN_OPTIONS ?= suppressions=$(abs_top_srcdir)/tools/dev/tsan/starpu.suppr +export LSAN_OPTIONS +export TSAN_OPTIONS + +if STARPU_HAVE_AM111 +TESTS_ENVIRONMENT = $(LAUNCHER_ENV) top_builddir="$(abs_top_builddir)" top_srcdir="$(abs_top_srcdir)" +LOG_COMPILER = $(LOADER_BIN) +else +TESTS_ENVIRONMENT = $(LAUNCHER_ENV) top_builddir="$(abs_top_builddir)" top_srcdir="$(abs_top_srcdir)" $(LOADER_BIN) +endif + +AM_TESTS_FD_REDIRECT = 9>&2 diff --git a/make/starpu-notests.mk b/make/starpu-notests.mk new file mode 100644 index 0000000..1b8c6de --- /dev/null +++ b/make/starpu-notests.mk @@ -0,0 +1,32 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +include $(top_srcdir)/make/starpu.mk + +recheck: + -cat /dev/null + +showcheckfailed: + @-cat /dev/null + +showfailed: + @-cat /dev/null + +showcheck: + -cat /dev/null + +showsuite: + -cat /dev/null diff --git a/make/starpu-subdirtests.mk b/make/starpu-subdirtests.mk new file mode 100644 index 0000000..b6a84f9 --- /dev/null +++ b/make/starpu-subdirtests.mk @@ -0,0 +1,49 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +recheck: + RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i recheck || RET=1 ; \ + done ; \ + exit $$RET + +showcheckfailed: + @RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i showcheckfailed || RET=1 ; \ + done ; \ + exit $$RET + +showfailed: + @RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -s -C $$i showfailed || RET=1 ; \ + done ; \ + exit $$RET + +showcheck: + RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i showcheck || RET=1 ; \ + done ; \ + exit $$RET + +showsuite: + RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i showsuite || RET=1 ; \ + done ; \ + exit $$RET diff --git a/make/starpu-tests.mk b/make/starpu-tests.mk new file mode 100644 index 0000000..8dec1cc --- /dev/null +++ b/make/starpu-tests.mk @@ -0,0 +1,105 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +LAUNCHER_ENV = +LAUNCHER = +include $(top_srcdir)/make/starpu.mk + +STARPU_MPI_NP ?= 4 +# These are always defined, both for starpu-mpi and for mpi-ms +# For MPI tests we don't want to oversubscribe the system +MPI_RUN_ENV = STARPU_WORKERS_GETBIND=0 STARPU_WORKERS_NOBIND=1 STARPU_NCPU=3 +if STARPU_SIMGRID +STARPU_MPIEXEC = $(abs_top_builddir)/tools/starpu_smpirun -np $(STARPU_MPI_NP) -platform $(abs_top_srcdir)/tools/perfmodels/cluster.xml -hostfile $(abs_top_srcdir)/tools/perfmodels/hostfile +else +STARPU_MPIEXEC = $(MPIEXEC) $(MPIEXEC_ARGS) -np $(STARPU_MPI_NP) +endif + +showcheckfailed: + @ for x in $(shell grep -l "^FAIL " $(TEST_LOGS) /dev/null 2>/dev/null) ; do cat $$x ; done + @RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i showcheckfailed || RET=1 ; \ + done ; \ + exit $$RET + +showfailed: + @! grep "^FAIL " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "ERROR: AddressSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "WARNING: AddressSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "ERROR: ThreadSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "WARNING: ThreadSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "ERROR: LeakSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "WARNING: LeakSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l " runtime error: " $(TEST_LOGS) /dev/null 2>/dev/null + @RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -s -C $$i showfailed || RET=1 ; \ + done ; \ + exit $$RET + +showcheck: + -cat $(TEST_LOGS) /dev/null + @! grep -q "ERROR: AddressSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q "WARNING: AddressSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q "ERROR: ThreadSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q "WARNING: ThreadSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q "ERROR: LeakSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q "WARNING: LeakSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q " runtime error: " $(TEST_LOGS) /dev/null + RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i showcheck || RET=1 ; \ + done ; \ + exit $$RET + +showsuite: + -cat $(TEST_SUITE_LOG) /dev/null + @! grep -q "ERROR: AddressSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q "WARNING: AddressSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q "ERROR: ThreadSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q "WARNING: ThreadSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q "ERROR: LeakSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q "WARNING: LeakSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q " runtime error: " $(TEST_SUITE_LOG) /dev/null + RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i showsuite || RET=1 ; \ + done ; \ + exit $$RET + +if STARPU_SIMGRID +export STARPU_PERF_MODEL_DIR=$(abs_top_srcdir)/tools/perfmodels/sampling +export STARPU_HOSTNAME=mirage +export MALLOC_PERTURB_=0 + +env: + @echo export STARPU_PERF_MODEL_DIR=$(STARPU_PERF_MODEL_DIR) + @echo export STARPU_HOSTNAME=$(STARPU_HOSTNAME) + @echo export MALLOC_PERTURB_=$(MALLOC_PERTURB_) +endif + +if STARPU_SIMGRID +export STARPU_SIMGRID=1 +endif + +if STARPU_QUICK_CHECK +export STARPU_QUICK_CHECK=1 +endif + +if STARPU_LONG_CHECK +export STARPU_LONG_CHECK=1 +endif diff --git a/make/starpu.mk b/make/starpu.mk new file mode 100644 index 0000000..ee2791e --- /dev/null +++ b/make/starpu.mk @@ -0,0 +1,74 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +AM_CFLAGS = $(GLOBAL_AM_CFLAGS) +AM_CXXFLAGS = $(GLOBAL_AM_CXXFLAGS) +AM_FFLAGS = $(GLOBAL_AM_FFLAGS) +AM_FCFLAGS = $(GLOBAL_AM_FCFLAGS) + +if STARPU_USE_CUDA +V_nvcc_ = $(V_nvcc_$(AM_DEFAULT_VERBOSITY)) +V_nvcc_0 = @echo " NVCC " $@; +V_nvcc_1 = +V_nvcc = $(V_nvcc_$(V)) + +if STARPU_COVERITY +# Avoid using nvcc when making a coverity build, nvcc produces millions of +# lines of code which we don't want to analyze. Instead, build dumb .o files +# containing empty functions. +V_mynvcc_ = $(V_mynvcc_$(AM_DEFAULT_VERBOSITY)) +V_mynvcc_0 = @echo " myNVCC " $@; +V_mynvcc_1 = +V_mynvcc = $(V_mynvcc_$(V)) +.cu.o: + @$(MKDIR_P) `dirname $@` + $(V_mynvcc)grep 'extern *"C" *void *' $< | sed -ne 's/extern *"C" *void *\([a-zA-Z0-9_]*\) *(.*/void \1(void) {}/p' | $(CC) -x c - -o $@ -c +else +NVCCFLAGS += --compiler-options -fno-strict-aliasing -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ $(STARPU_NVCC_H_CPPFLAGS) + +.cu.cubin: + $(V_nvcc) $(NVCC) -cubin $< -o $@ $(NVCCFLAGS) + +.cu.o: + $(V_nvcc) $(NVCC) $< -c -o $@ $(NVCCFLAGS) +endif +endif + +if STARPU_USE_HIP +V_hipcc_ = $(V_hipcc_$(AM_DEFAULT_VERBOSITY)) +V_hipcc_0 = @echo " HIPCC " $@; +V_hipcc_1 = +V_hipcc = $(V_hipcc_$(V)) + +HIPCCFLAGS += -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ +.hip.o: + $(V_hipcc) $(HIPCC) $< -c -o $@ $(HIPCCFLAGS) +endif + +V_icc_ = $(V_icc_$(AM_DEFAULT_VERBOSITY)) +V_icc_0 = @echo " ICC " $@; +V_icc_1 = +V_icc = $(V_icc_$(V)) + +V_ln_ = $(V_ln_$(AM_DEFAULT_VERBOSITY)) +V_ln_0 = @echo " LN " $@; +V_ln_1 = +V_ln = $(V_ln_$(V)) + +V_help2man_ = $(V_help2man_$(AM_DEFAULT_VERBOSITY)) +V_help2man_0 = @echo " HELP2MAN" $@; +V_help2man_1 = +V_help2man = $(V_help2man_$(V)) diff --git a/min-dgels/Makefile b/min-dgels/Makefile new file mode 100644 index 0000000..53882f1 --- /dev/null +++ b/min-dgels/Makefile @@ -0,0 +1,48 @@ +CC = gcc +LD = /usr/bin/ld -m elf_x86_64 +srcdir = . + +CLAPACK=base +ADDITIONAL=additional + +all: + mkdir -p build + [ -d "$(CLAPACK)" ] || ( cp -a $(srcdir)/$(CLAPACK) . ; chmod -R +rwX $(CLAPACK) ) + cd $(CLAPACK) && $(MAKE) blaslib CC="$(CC)" LD="$(LD)" + cd $(CLAPACK) && $(MAKE) f2clib CC="$(CC)" LD="$(LD)" + [ -d "$(ADDITIONAL)" ] || ( cp -a $(srcdir)/$(ADDITIONAL) . ; chmod -R +rwX $(ADDITIONAL) ) + cd $(ADDITIONAL) && $(CC) -c -fPIC *.c && ar cr ../build/minlibdgels.a *.o && ranlib ../build/minlibdgels.a + +install: +installcheck: +uninstall: +distuninstallcheck: +dvi: + +clean: + -cd $(CLAPACK) && $(MAKE) clean && rm -rf *~ + -cd $(ADDITIONAL) && rm -rf *.o *~ + rm -rf build *~ + +distclean: clean + [ -f Makefile.in ] || rm -fr $(CLAPACK) $(ADDITIONAL) + +# This part is needed by StarPU + +STARPU_SRCDIR = . + +distdir: + cp -fRp $(STARPU_SRCDIR)/* $(distdir) + cd $(distdir) && make -f Makefile.in clean + +check: + echo "No checks are implemented for min-dgels" + +showfailed: + @: + +showcheck: check + +showsuite: check + +recheck: check diff --git a/min-dgels/Makefile.in b/min-dgels/Makefile.in new file mode 100644 index 0000000..50ddede --- /dev/null +++ b/min-dgels/Makefile.in @@ -0,0 +1,48 @@ +CC = @CC@ +LD = @LD@ +srcdir = @srcdir@ + +CLAPACK=base +ADDITIONAL=additional + +all: + mkdir -p build + [ -d "$(CLAPACK)" ] || ( cp -a $(srcdir)/$(CLAPACK) . ; chmod -R +rwX $(CLAPACK) ) + cd $(CLAPACK) && $(MAKE) blaslib CC="$(CC)" LD="$(LD)" + cd $(CLAPACK) && $(MAKE) f2clib CC="$(CC)" LD="$(LD)" + [ -d "$(ADDITIONAL)" ] || ( cp -a $(srcdir)/$(ADDITIONAL) . ; chmod -R +rwX $(ADDITIONAL) ) + cd $(ADDITIONAL) && $(CC) -c -fPIC *.c && ar cr ../build/minlibdgels.a *.o && ranlib ../build/minlibdgels.a + +install: +installcheck: +uninstall: +distuninstallcheck: +dvi: + +clean: + -cd $(CLAPACK) && $(MAKE) clean && rm -rf *~ + -cd $(ADDITIONAL) && rm -rf *.o *~ + rm -rf build *~ + +distclean: clean + [ -f Makefile.in ] || rm -fr $(CLAPACK) $(ADDITIONAL) + +# This part is needed by StarPU + +STARPU_SRCDIR = @srcdir@ + +distdir: + cp -fRp $(STARPU_SRCDIR)/* $(distdir) + cd $(distdir) && make -f Makefile.in clean + +check: + echo "No checks are implemented for min-dgels" + +showfailed: + @: + +showcheck: check + +showsuite: check + +recheck: check diff --git a/min-dgels/additional/blaswrap.h b/min-dgels/additional/blaswrap.h new file mode 100644 index 0000000..769b599 --- /dev/null +++ b/min-dgels/additional/blaswrap.h @@ -0,0 +1,8 @@ +/* CLAPACK 3.0 BLAS wrapper macros + * Feb 5, 2000 + */ + +#ifndef __BLASWRAP_H +#define __BLASWRAP_H + +#endif /* __BLASWRAP_H */ diff --git a/min-dgels/additional/clapack.h b/min-dgels/additional/clapack.h new file mode 100644 index 0000000..60f778a --- /dev/null +++ b/min-dgels/additional/clapack.h @@ -0,0 +1,7262 @@ +/* header file for clapack 3.2.1 */ + +#ifndef __CLAPACK_H +#define __CLAPACK_H + +#ifdef __cplusplus +extern "C" { +#endif + +/* Subroutine */ int _starpu_caxpy_(integer *n, complex *ca, complex *cx, integer * + incx, complex *cy, integer *incy); + +/* Subroutine */ int _starpu_ccopy_(integer *n, complex *cx, integer *incx, complex * + cy, integer *incy); + +/* Complex */ VOID _starpu_cdotc_(complex * ret_val, integer *n, complex *cx, integer + *incx, complex *cy, integer *incy); + +/* Complex */ VOID _starpu_cdotu_(complex * ret_val, integer *n, complex *cx, integer + *incx, complex *cy, integer *incy); + +/* Subroutine */ int _starpu_cgbmv_(char *trans, integer *m, integer *n, integer *kl, + integer *ku, complex *alpha, complex *a, integer *lda, complex *x, + integer *incx, complex *beta, complex *y, integer *incy); + +/* Subroutine */ int _starpu_cgemm_(char *transa, char *transb, integer *m, integer * + n, integer *k, complex *alpha, complex *a, integer *lda, complex *b, + integer *ldb, complex *beta, complex *c__, integer *ldc); + +/* Subroutine */ int _starpu_cgemv_(char *trans, integer *m, integer *n, complex * + alpha, complex *a, integer *lda, complex *x, integer *incx, complex * + beta, complex *y, integer *incy); + +/* Subroutine */ int _starpu_cgerc_(integer *m, integer *n, complex *alpha, complex * + x, integer *incx, complex *y, integer *incy, complex *a, integer *lda); + +/* Subroutine */ int _starpu_cgeru_(integer *m, integer *n, complex *alpha, complex * + x, integer *incx, complex *y, integer *incy, complex *a, integer *lda); + +/* Subroutine */ int _starpu_chbmv_(char *uplo, integer *n, integer *k, complex * + alpha, complex *a, integer *lda, complex *x, integer *incx, complex * + beta, complex *y, integer *incy); + +/* Subroutine */ int _starpu_chemm_(char *side, char *uplo, integer *m, integer *n, + complex *alpha, complex *a, integer *lda, complex *b, integer *ldb, + complex *beta, complex *c__, integer *ldc); + +/* Subroutine */ int _starpu_chemv_(char *uplo, integer *n, complex *alpha, complex * + a, integer *lda, complex *x, integer *incx, complex *beta, complex *y, + integer *incy); + +/* Subroutine */ int _starpu_cher_(char *uplo, integer *n, real *alpha, complex *x, + integer *incx, complex *a, integer *lda); + +/* Subroutine */ int _starpu_cher2_(char *uplo, integer *n, complex *alpha, complex * + x, integer *incx, complex *y, integer *incy, complex *a, integer *lda); + +/* Subroutine */ int _starpu_cher2k_(char *uplo, char *trans, integer *n, integer *k, + complex *alpha, complex *a, integer *lda, complex *b, integer *ldb, + real *beta, complex *c__, integer *ldc); + +/* Subroutine */ int _starpu_cherk_(char *uplo, char *trans, integer *n, integer *k, + real *alpha, complex *a, integer *lda, real *beta, complex *c__, + integer *ldc); + +/* Subroutine */ int _starpu_chpmv_(char *uplo, integer *n, complex *alpha, complex * + ap, complex *x, integer *incx, complex *beta, complex *y, integer * + incy); + +/* Subroutine */ int _starpu_chpr_(char *uplo, integer *n, real *alpha, complex *x, + integer *incx, complex *ap); + +/* Subroutine */ int _starpu_chpr2_(char *uplo, integer *n, complex *alpha, complex * + x, integer *incx, complex *y, integer *incy, complex *ap); + +/* Subroutine */ int _starpu_crotg_(complex *ca, complex *cb, real *c__, complex *s); + +/* Subroutine */ int _starpu_cscal_(integer *n, complex *ca, complex *cx, integer * + incx); + +/* Subroutine */ int _starpu__starpu_csrot_(integer *n, complex *cx, integer *incx, complex * + cy, integer *incy, real *c__, real *s); + +/* Subroutine */ int _starpu_csscal_(integer *n, real *sa, complex *cx, integer *incx); + +/* Subroutine */ int _starpu_cswap_(integer *n, complex *cx, integer *incx, complex * + cy, integer *incy); + +/* Subroutine */ int _starpu_csymm_(char *side, char *uplo, integer *m, integer *n, + complex *alpha, complex *a, integer *lda, complex *b, integer *ldb, + complex *beta, complex *c__, integer *ldc); + +/* Subroutine */ int _starpu_csyr2k_(char *uplo, char *trans, integer *n, integer *k, + complex *alpha, complex *a, integer *lda, complex *b, integer *ldb, + complex *beta, complex *c__, integer *ldc); + +/* Subroutine */ int _starpu_csyrk_(char *uplo, char *trans, integer *n, integer *k, + complex *alpha, complex *a, integer *lda, complex *beta, complex *c__, + integer *ldc); + +/* Subroutine */ int _starpu_ctbmv_(char *uplo, char *trans, char *diag, integer *n, + integer *k, complex *a, integer *lda, complex *x, integer *incx); + +/* Subroutine */ int _starpu_ctbsv_(char *uplo, char *trans, char *diag, integer *n, + integer *k, complex *a, integer *lda, complex *x, integer *incx); + +/* Subroutine */ int _starpu_ctpmv_(char *uplo, char *trans, char *diag, integer *n, + complex *ap, complex *x, integer *incx); + +/* Subroutine */ int _starpu_ctpsv_(char *uplo, char *trans, char *diag, integer *n, + complex *ap, complex *x, integer *incx); + +/* Subroutine */ int _starpu_ctrmm_(char *side, char *uplo, char *transa, char *diag, + integer *m, integer *n, complex *alpha, complex *a, integer *lda, + complex *b, integer *ldb); + +/* Subroutine */ int _starpu_ctrmv_(char *uplo, char *trans, char *diag, integer *n, + complex *a, integer *lda, complex *x, integer *incx); + +/* Subroutine */ int _starpu_ctrsm_(char *side, char *uplo, char *transa, char *diag, + integer *m, integer *n, complex *alpha, complex *a, integer *lda, + complex *b, integer *ldb); + +/* Subroutine */ int _starpu_ctrsv_(char *uplo, char *trans, char *diag, integer *n, + complex *a, integer *lda, complex *x, integer *incx); + +doublereal _starpu_dasum_(integer *n, doublereal *dx, integer *incx); + +/* Subroutine */ int _starpu_daxpy_(integer *n, doublereal *da, doublereal *dx, + integer *incx, doublereal *dy, integer *incy); + +doublereal _starpu_dcabs1_(doublecomplex *z__); + +/* Subroutine */ int _starpu_dcopy_(integer *n, doublereal *dx, integer *incx, + doublereal *dy, integer *incy); + +doublereal _starpu_ddot_(integer *n, doublereal *dx, integer *incx, doublereal *dy, + integer *incy); + +/* Subroutine */ int _starpu_dgbmv_(char *trans, integer *m, integer *n, integer *kl, + integer *ku, doublereal *alpha, doublereal *a, integer *lda, + doublereal *x, integer *incx, doublereal *beta, doublereal *y, + integer *incy); + +/* Subroutine */ int _starpu_dgemm_(char *transa, char *transb, integer *m, integer * + n, integer *k, doublereal *alpha, doublereal *a, integer *lda, + doublereal *b, integer *ldb, doublereal *beta, doublereal *c__, + integer *ldc); + +/* Subroutine */ int _starpu_dgemv_(char *trans, integer *m, integer *n, doublereal * + alpha, doublereal *a, integer *lda, doublereal *x, integer *incx, + doublereal *beta, doublereal *y, integer *incy); + +/* Subroutine */ int _starpu_dger_(integer *m, integer *n, doublereal *alpha, + doublereal *x, integer *incx, doublereal *y, integer *incy, + doublereal *a, integer *lda); + +doublereal _starpu_dnrm2_(integer *n, doublereal *x, integer *incx); + +/* Subroutine */ int _starpu_drot_(integer *n, doublereal *dx, integer *incx, + doublereal *dy, integer *incy, doublereal *c__, doublereal *s); + +/* Subroutine */ int _starpu_drotg_(doublereal *da, doublereal *db, doublereal *c__, + doublereal *s); + +/* Subroutine */ int _starpu_drotm_(integer *n, doublereal *dx, integer *incx, + doublereal *dy, integer *incy, doublereal *dparam); + +/* Subroutine */ int _starpu_drotmg_(doublereal *dd1, doublereal *dd2, doublereal * + dx1, doublereal *dy1, doublereal *dparam); + +/* Subroutine */ int _starpu_dsbmv_(char *uplo, integer *n, integer *k, doublereal * + alpha, doublereal *a, integer *lda, doublereal *x, integer *incx, + doublereal *beta, doublereal *y, integer *incy); + +/* Subroutine */ int _starpu_dscal_(integer *n, doublereal *da, doublereal *dx, + integer *incx); + +doublereal _starpu_dsdot_(integer *n, real *sx, integer *incx, real *sy, integer * + incy); + +/* Subroutine */ int _starpu_dspmv_(char *uplo, integer *n, doublereal *alpha, + doublereal *ap, doublereal *x, integer *incx, doublereal *beta, + doublereal *y, integer *incy); + +/* Subroutine */ int _starpu_dspr_(char *uplo, integer *n, doublereal *alpha, + doublereal *x, integer *incx, doublereal *ap); + +/* Subroutine */ int _starpu_dspr2_(char *uplo, integer *n, doublereal *alpha, + doublereal *x, integer *incx, doublereal *y, integer *incy, + doublereal *ap); + +/* Subroutine */ int _starpu_dswap_(integer *n, doublereal *dx, integer *incx, + doublereal *dy, integer *incy); + +/* Subroutine */ int _starpu_dsymm_(char *side, char *uplo, integer *m, integer *n, + doublereal *alpha, doublereal *a, integer *lda, doublereal *b, + integer *ldb, doublereal *beta, doublereal *c__, integer *ldc); + +/* Subroutine */ int _starpu_dsymv_(char *uplo, integer *n, doublereal *alpha, + doublereal *a, integer *lda, doublereal *x, integer *incx, doublereal + *beta, doublereal *y, integer *incy); + +/* Subroutine */ int _starpu_dsyr_(char *uplo, integer *n, doublereal *alpha, + doublereal *x, integer *incx, doublereal *a, integer *lda); + +/* Subroutine */ int _starpu_dsyr2_(char *uplo, integer *n, doublereal *alpha, + doublereal *x, integer *incx, doublereal *y, integer *incy, + doublereal *a, integer *lda); + +/* Subroutine */ int _starpu_dsyr2k_(char *uplo, char *trans, integer *n, integer *k, + doublereal *alpha, doublereal *a, integer *lda, doublereal *b, + integer *ldb, doublereal *beta, doublereal *c__, integer *ldc); + +/* Subroutine */ int _starpu_dsyrk_(char *uplo, char *trans, integer *n, integer *k, + doublereal *alpha, doublereal *a, integer *lda, doublereal *beta, + doublereal *c__, integer *ldc); + +/* Subroutine */ int _starpu_dtbmv_(char *uplo, char *trans, char *diag, integer *n, + integer *k, doublereal *a, integer *lda, doublereal *x, integer *incx); + +/* Subroutine */ int _starpu_dtbsv_(char *uplo, char *trans, char *diag, integer *n, + integer *k, doublereal *a, integer *lda, doublereal *x, integer *incx); + +/* Subroutine */ int _starpu_dtpmv_(char *uplo, char *trans, char *diag, integer *n, + doublereal *ap, doublereal *x, integer *incx); + +/* Subroutine */ int _starpu_dtpsv_(char *uplo, char *trans, char *diag, integer *n, + doublereal *ap, doublereal *x, integer *incx); + +/* Subroutine */ int _starpu_dtrmm_(char *side, char *uplo, char *transa, char *diag, + integer *m, integer *n, doublereal *alpha, doublereal *a, integer * + lda, doublereal *b, integer *ldb); + +/* Subroutine */ int _starpu_dtrmv_(char *uplo, char *trans, char *diag, integer *n, + doublereal *a, integer *lda, doublereal *x, integer *incx); + +/* Subroutine */ int _starpu_dtrsm_(char *side, char *uplo, char *transa, char *diag, + integer *m, integer *n, doublereal *alpha, doublereal *a, integer * + lda, doublereal *b, integer *ldb); + +/* Subroutine */ int _starpu_dtrsv_(char *uplo, char *trans, char *diag, integer *n, + doublereal *a, integer *lda, doublereal *x, integer *incx); + +doublereal _starpu_dzasum_(integer *n, doublecomplex *zx, integer *incx); + +doublereal _starpu_dznrm2_(integer *n, doublecomplex *x, integer *incx); + +integer _starpu_icamax_(integer *n, complex *cx, integer *incx); + +integer _starpu_idamax_(integer *n, doublereal *dx, integer *incx); + +integer _starpu_isamax_(integer *n, real *sx, integer *incx); + +integer _starpu_izamax_(integer *n, doublecomplex *zx, integer *incx); + +logical _starpu_lsame_(char *ca, char *cb); + +doublereal _starpu_sasum_(integer *n, real *sx, integer *incx); + +/* Subroutine */ int _starpu_saxpy_(integer *n, real *sa, real *sx, integer *incx, + real *sy, integer *incy); + +doublereal _starpu_scabs1_(complex *z__); + +doublereal _starpu_scasum_(integer *n, complex *cx, integer *incx); + +doublereal _starpu_scnrm2_(integer *n, complex *x, integer *incx); + +/* Subroutine */ int _starpu_scopy_(integer *n, real *sx, integer *incx, real *sy, + integer *incy); + +doublereal _starpu_sdot_(integer *n, real *sx, integer *incx, real *sy, integer *incy); + +doublereal _starpu_sdsdot_(integer *n, real *sb, real *sx, integer *incx, real *sy, + integer *incy); + +/* Subroutine */ int _starpu_sgbmv_(char *trans, integer *m, integer *n, integer *kl, + integer *ku, real *alpha, real *a, integer *lda, real *x, integer * + incx, real *beta, real *y, integer *incy); + +/* Subroutine */ int _starpu_sgemm_(char *transa, char *transb, integer *m, integer * + n, integer *k, real *alpha, real *a, integer *lda, real *b, integer * + ldb, real *beta, real *c__, integer *ldc); + +/* Subroutine */ int _starpu_sgemv_(char *trans, integer *m, integer *n, real *alpha, + real *a, integer *lda, real *x, integer *incx, real *beta, real *y, + integer *incy); + +/* Subroutine */ int _starpu_sger_(integer *m, integer *n, real *alpha, real *x, + integer *incx, real *y, integer *incy, real *a, integer *lda); + +doublereal _starpu_snrm2_(integer *n, real *x, integer *incx); + +/* Subroutine */ int _starpu_srot_(integer *n, real *sx, integer *incx, real *sy, + integer *incy, real *c__, real *s); + +/* Subroutine */ int _starpu_srotg_(real *sa, real *sb, real *c__, real *s); + +/* Subroutine */ int _starpu_srotm_(integer *n, real *sx, integer *incx, real *sy, + integer *incy, real *sparam); + +/* Subroutine */ int _starpu_srotmg_(real *sd1, real *sd2, real *sx1, real *sy1, real + *sparam); + +/* Subroutine */ int _starpu_ssbmv_(char *uplo, integer *n, integer *k, real *alpha, + real *a, integer *lda, real *x, integer *incx, real *beta, real *y, + integer *incy); + +/* Subroutine */ int _starpu_sscal_(integer *n, real *sa, real *sx, integer *incx); + +/* Subroutine */ int _starpu_sspmv_(char *uplo, integer *n, real *alpha, real *ap, + real *x, integer *incx, real *beta, real *y, integer *incy); + +/* Subroutine */ int _starpu_sspr_(char *uplo, integer *n, real *alpha, real *x, + integer *incx, real *ap); + +/* Subroutine */ int _starpu_sspr2_(char *uplo, integer *n, real *alpha, real *x, + integer *incx, real *y, integer *incy, real *ap); + +/* Subroutine */ int _starpu_sswap_(integer *n, real *sx, integer *incx, real *sy, + integer *incy); + +/* Subroutine */ int _starpu_ssymm_(char *side, char *uplo, integer *m, integer *n, + real *alpha, real *a, integer *lda, real *b, integer *ldb, real *beta, + real *c__, integer *ldc); + +/* Subroutine */ int _starpu_ssymv_(char *uplo, integer *n, real *alpha, real *a, + integer *lda, real *x, integer *incx, real *beta, real *y, integer * + incy); + +/* Subroutine */ int _starpu_ssyr_(char *uplo, integer *n, real *alpha, real *x, + integer *incx, real *a, integer *lda); + +/* Subroutine */ int _starpu_ssyr2_(char *uplo, integer *n, real *alpha, real *x, + integer *incx, real *y, integer *incy, real *a, integer *lda); + +/* Subroutine */ int _starpu_ssyr2k_(char *uplo, char *trans, integer *n, integer *k, + real *alpha, real *a, integer *lda, real *b, integer *ldb, real *beta, + real *c__, integer *ldc); + +/* Subroutine */ int _starpu_ssyrk_(char *uplo, char *trans, integer *n, integer *k, + real *alpha, real *a, integer *lda, real *beta, real *c__, integer * + ldc); + +/* Subroutine */ int _starpu_stbmv_(char *uplo, char *trans, char *diag, integer *n, + integer *k, real *a, integer *lda, real *x, integer *incx); + +/* Subroutine */ int _starpu_stbsv_(char *uplo, char *trans, char *diag, integer *n, + integer *k, real *a, integer *lda, real *x, integer *incx); + +/* Subroutine */ int _starpu_stpmv_(char *uplo, char *trans, char *diag, integer *n, + real *ap, real *x, integer *incx); + +/* Subroutine */ int _starpu_stpsv_(char *uplo, char *trans, char *diag, integer *n, + real *ap, real *x, integer *incx); + +/* Subroutine */ int _starpu_strmm_(char *side, char *uplo, char *transa, char *diag, + integer *m, integer *n, real *alpha, real *a, integer *lda, real *b, + integer *ldb); + +/* Subroutine */ int _starpu_strmv_(char *uplo, char *trans, char *diag, integer *n, + real *a, integer *lda, real *x, integer *incx); + +/* Subroutine */ int _starpu_strsm_(char *side, char *uplo, char *transa, char *diag, + integer *m, integer *n, real *alpha, real *a, integer *lda, real *b, + integer *ldb); + +/* Subroutine */ int _starpu_strsv_(char *uplo, char *trans, char *diag, integer *n, + real *a, integer *lda, real *x, integer *incx); + +/* Subroutine */ int _starpu_xerbla_(char *srname, integer *info); + +/* Subroutine */ int _starpu_xerbla_array__(char *srname_array__, integer * + srname_len__, integer *info, ftnlen srname_array_len); + +/* Subroutine */ int _starpu_zaxpy_(integer *n, doublecomplex *za, doublecomplex *zx, + integer *incx, doublecomplex *zy, integer *incy); + +/* Subroutine */ int _starpu_zcopy_(integer *n, doublecomplex *zx, integer *incx, + doublecomplex *zy, integer *incy); + +/* Double Complex */ VOID _starpu_zdotc_(doublecomplex * ret_val, integer *n, + doublecomplex *zx, integer *incx, doublecomplex *zy, integer *incy); + +/* Double Complex */ VOID _starpu_zdotu_(doublecomplex * ret_val, integer *n, + doublecomplex *zx, integer *incx, doublecomplex *zy, integer *incy); + +/* Subroutine */ int _starpu_zdrot_(integer *n, doublecomplex *cx, integer *incx, + doublecomplex *cy, integer *incy, doublereal *c__, doublereal *s); + +/* Subroutine */ int _starpu_zdscal_(integer *n, doublereal *da, doublecomplex *zx, + integer *incx); + +/* Subroutine */ int _starpu_zgbmv_(char *trans, integer *m, integer *n, integer *kl, + integer *ku, doublecomplex *alpha, doublecomplex *a, integer *lda, + doublecomplex *x, integer *incx, doublecomplex *beta, doublecomplex * + y, integer *incy); + +/* Subroutine */ int _starpu_zgemm_(char *transa, char *transb, integer *m, integer * + n, integer *k, doublecomplex *alpha, doublecomplex *a, integer *lda, + doublecomplex *b, integer *ldb, doublecomplex *beta, doublecomplex * + c__, integer *ldc); + +/* Subroutine */ int _starpu_zgemv_(char *trans, integer *m, integer *n, + doublecomplex *alpha, doublecomplex *a, integer *lda, doublecomplex * + x, integer *incx, doublecomplex *beta, doublecomplex *y, integer * + incy); + +/* Subroutine */ int _starpu_zgerc_(integer *m, integer *n, doublecomplex *alpha, + doublecomplex *x, integer *incx, doublecomplex *y, integer *incy, + doublecomplex *a, integer *lda); + +/* Subroutine */ int _starpu_zgeru_(integer *m, integer *n, doublecomplex *alpha, + doublecomplex *x, integer *incx, doublecomplex *y, integer *incy, + doublecomplex *a, integer *lda); + +/* Subroutine */ int _starpu_zhbmv_(char *uplo, integer *n, integer *k, doublecomplex + *alpha, doublecomplex *a, integer *lda, doublecomplex *x, integer * + incx, doublecomplex *beta, doublecomplex *y, integer *incy); + +/* Subroutine */ int _starpu_zhemm_(char *side, char *uplo, integer *m, integer *n, + doublecomplex *alpha, doublecomplex *a, integer *lda, doublecomplex * + b, integer *ldb, doublecomplex *beta, doublecomplex *c__, integer * + ldc); + +/* Subroutine */ int _starpu_zhemv_(char *uplo, integer *n, doublecomplex *alpha, + doublecomplex *a, integer *lda, doublecomplex *x, integer *incx, + doublecomplex *beta, doublecomplex *y, integer *incy); + +/* Subroutine */ int _starpu_zher_(char *uplo, integer *n, doublereal *alpha, + doublecomplex *x, integer *incx, doublecomplex *a, integer *lda); + +/* Subroutine */ int _starpu_zher2_(char *uplo, integer *n, doublecomplex *alpha, + doublecomplex *x, integer *incx, doublecomplex *y, integer *incy, + doublecomplex *a, integer *lda); + +/* Subroutine */ int _starpu_zher2k_(char *uplo, char *trans, integer *n, integer *k, + doublecomplex *alpha, doublecomplex *a, integer *lda, doublecomplex * + b, integer *ldb, doublereal *beta, doublecomplex *c__, integer *ldc); + +/* Subroutine */ int _starpu_zherk_(char *uplo, char *trans, integer *n, integer *k, + doublereal *alpha, doublecomplex *a, integer *lda, doublereal *beta, + doublecomplex *c__, integer *ldc); + +/* Subroutine */ int _starpu_zhpmv_(char *uplo, integer *n, doublecomplex *alpha, + doublecomplex *ap, doublecomplex *x, integer *incx, doublecomplex * + beta, doublecomplex *y, integer *incy); + +/* Subroutine */ int _starpu_zhpr_(char *uplo, integer *n, doublereal *alpha, + doublecomplex *x, integer *incx, doublecomplex *ap); + +/* Subroutine */ int _starpu_zhpr2_(char *uplo, integer *n, doublecomplex *alpha, + doublecomplex *x, integer *incx, doublecomplex *y, integer *incy, + doublecomplex *ap); + +/* Subroutine */ int _starpu_zrotg_(doublecomplex *ca, doublecomplex *cb, doublereal * + c__, doublecomplex *s); + +/* Subroutine */ int _starpu_zscal_(integer *n, doublecomplex *za, doublecomplex *zx, + integer *incx); + +/* Subroutine */ int _starpu_zswap_(integer *n, doublecomplex *zx, integer *incx, + doublecomplex *zy, integer *incy); + +/* Subroutine */ int _starpu_zsymm_(char *side, char *uplo, integer *m, integer *n, + doublecomplex *alpha, doublecomplex *a, integer *lda, doublecomplex * + b, integer *ldb, doublecomplex *beta, doublecomplex *c__, integer * + ldc); + +/* Subroutine */ int _starpu_zsyr2k_(char *uplo, char *trans, integer *n, integer *k, + doublecomplex *alpha, doublecomplex *a, integer *lda, doublecomplex * + b, integer *ldb, doublecomplex *beta, doublecomplex *c__, integer * + ldc); + +/* Subroutine */ int _starpu_zsyrk_(char *uplo, char *trans, integer *n, integer *k, + doublecomplex *alpha, doublecomplex *a, integer *lda, doublecomplex * + beta, doublecomplex *c__, integer *ldc); + +/* Subroutine */ int _starpu_ztbmv_(char *uplo, char *trans, char *diag, integer *n, + integer *k, doublecomplex *a, integer *lda, doublecomplex *x, integer + *incx); + +/* Subroutine */ int _starpu_ztbsv_(char *uplo, char *trans, char *diag, integer *n, + integer *k, doublecomplex *a, integer *lda, doublecomplex *x, integer + *incx); + +/* Subroutine */ int _starpu_ztpmv_(char *uplo, char *trans, char *diag, integer *n, + doublecomplex *ap, doublecomplex *x, integer *incx); + +/* Subroutine */ int _starpu_ztpsv_(char *uplo, char *trans, char *diag, integer *n, + doublecomplex *ap, doublecomplex *x, integer *incx); + +/* Subroutine */ int _starpu_ztrmm_(char *side, char *uplo, char *transa, char *diag, + integer *m, integer *n, doublecomplex *alpha, doublecomplex *a, + integer *lda, doublecomplex *b, integer *ldb); + +/* Subroutine */ int _starpu_ztrmv_(char *uplo, char *trans, char *diag, integer *n, + doublecomplex *a, integer *lda, doublecomplex *x, integer *incx); + +/* Subroutine */ int _starpu_ztrsm_(char *side, char *uplo, char *transa, char *diag, + integer *m, integer *n, doublecomplex *alpha, doublecomplex *a, + integer *lda, doublecomplex *b, integer *ldb); + +/* Subroutine */ int _starpu_ztrsv_(char *uplo, char *trans, char *diag, integer *n, + doublecomplex *a, integer *lda, doublecomplex *x, integer *incx); + +/* Subroutine */ int _starpu_cbdsqr_(char *uplo, integer *n, integer *ncvt, integer * + nru, integer *ncc, real *d__, real *e, complex *vt, integer *ldvt, + complex *u, integer *ldu, complex *c__, integer *ldc, real *rwork, + integer *info); + +/* Subroutine */ int _starpu_cgbbrd_(char *vect, integer *m, integer *n, integer *ncc, + integer *kl, integer *ku, complex *ab, integer *ldab, real *d__, + real *e, complex *q, integer *ldq, complex *pt, integer *ldpt, + complex *c__, integer *ldc, complex *work, real *rwork, integer *info); + +/* Subroutine */ int _starpu_cgbcon_(char *norm, integer *n, integer *kl, integer *ku, + complex *ab, integer *ldab, integer *ipiv, real *anorm, real *rcond, + complex *work, real *rwork, integer *info); + +/* Subroutine */ int _starpu_cgbequ_(integer *m, integer *n, integer *kl, integer *ku, + complex *ab, integer *ldab, real *r__, real *c__, real *rowcnd, real + *colcnd, real *amax, integer *info); + +/* Subroutine */ int _starpu_cgbequb_(integer *m, integer *n, integer *kl, integer * + ku, complex *ab, integer *ldab, real *r__, real *c__, real *rowcnd, + real *colcnd, real *amax, integer *info); + +/* Subroutine */ int _starpu_cgbrfs_(char *trans, integer *n, integer *kl, integer * + ku, integer *nrhs, complex *ab, integer *ldab, complex *afb, integer * + ldafb, integer *ipiv, complex *b, integer *ldb, complex *x, integer * + ldx, real *ferr, real *berr, complex *work, real *rwork, integer * + info); + +/* Subroutine */ int _starpu_cgbrfsx_(char *trans, char *equed, integer *n, integer * + kl, integer *ku, integer *nrhs, complex *ab, integer *ldab, complex * + afb, integer *ldafb, integer *ipiv, real *r__, real *c__, complex *b, + integer *ldb, complex *x, integer *ldx, real *rcond, real *berr, + integer *n_err_bnds__, real *err_bnds_norm__, real *err_bnds_comp__, + integer *nparams, real *params, complex *work, real *rwork, integer * + info); + +/* Subroutine */ int _starpu_cgbsv_(integer *n, integer *kl, integer *ku, integer * + nrhs, complex *ab, integer *ldab, integer *ipiv, complex *b, integer * + ldb, integer *info); + +/* Subroutine */ int _starpu_cgbsvx_(char *fact, char *trans, integer *n, integer *kl, + integer *ku, integer *nrhs, complex *ab, integer *ldab, complex *afb, + integer *ldafb, integer *ipiv, char *equed, real *r__, real *c__, + complex *b, integer *ldb, complex *x, integer *ldx, real *rcond, real + *ferr, real *berr, complex *work, real *rwork, integer *info); + +/* Subroutine */ int _starpu_cgbsvxx_(char *fact, char *trans, integer *n, integer * + kl, integer *ku, integer *nrhs, complex *ab, integer *ldab, complex * + afb, integer *ldafb, integer *ipiv, char *equed, real *r__, real *c__, + complex *b, integer *ldb, complex *x, integer *ldx, real *rcond, + real *rpvgrw, real *berr, integer *n_err_bnds__, real * + err_bnds_norm__, real *err_bnds_comp__, integer *nparams, real * + params, complex *work, real *rwork, integer *info); + +/* Subroutine */ int _starpu_cgbtf2_(integer *m, integer *n, integer *kl, integer *ku, + complex *ab, integer *ldab, integer *ipiv, integer *info); + +/* Subroutine */ int _starpu_cgbtrf_(integer *m, integer *n, integer *kl, integer *ku, + complex *ab, integer *ldab, integer *ipiv, integer *info); + +/* Subroutine */ int _starpu_cgbtrs_(char *trans, integer *n, integer *kl, integer * + ku, integer *nrhs, complex *ab, integer *ldab, integer *ipiv, complex + *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_cgebak_(char *job, char *side, integer *n, integer *ilo, + integer *ihi, real *scale, integer *m, complex *v, integer *ldv, + integer *info); + +/* Subroutine */ int _starpu_cgebal_(char *job, integer *n, complex *a, integer *lda, + integer *ilo, integer *ihi, real *scale, integer *info); + +/* Subroutine */ int _starpu_cgebd2_(integer *m, integer *n, complex *a, integer *lda, + real *d__, real *e, complex *tauq, complex *taup, complex *work, + integer *info); + +/* Subroutine */ int _starpu_cgebrd_(integer *m, integer *n, complex *a, integer *lda, + real *d__, real *e, complex *tauq, complex *taup, complex *work, + integer *lwork, integer *info); + +/* Subroutine */ int _starpu_cgecon_(char *norm, integer *n, complex *a, integer *lda, + real *anorm, real *rcond, complex *work, real *rwork, integer *info); + +/* Subroutine */ int _starpu_cgeequ_(integer *m, integer *n, complex *a, integer *lda, + real *r__, real *c__, real *rowcnd, real *colcnd, real *amax, + integer *info); + +/* Subroutine */ int _starpu_cgeequb_(integer *m, integer *n, complex *a, integer * + lda, real *r__, real *c__, real *rowcnd, real *colcnd, real *amax, + integer *info); + +/* Subroutine */ int _starpu_cgees_(char *jobvs, char *sort, L_fp select, integer *n, + complex *a, integer *lda, integer *sdim, complex *w, complex *vs, + integer *ldvs, complex *work, integer *lwork, real *rwork, logical * + bwork, integer *info); + +/* Subroutine */ int _starpu_cgeesx_(char *jobvs, char *sort, L_fp select, char * + sense, integer *n, complex *a, integer *lda, integer *sdim, complex * + w, complex *vs, integer *ldvs, real *rconde, real *rcondv, complex * + work, integer *lwork, real *rwork, logical *bwork, integer *info); + +/* Subroutine */ int _starpu_cgeev_(char *jobvl, char *jobvr, integer *n, complex *a, + integer *lda, complex *w, complex *vl, integer *ldvl, complex *vr, + integer *ldvr, complex *work, integer *lwork, real *rwork, integer * + info); + +/* Subroutine */ int _starpu_cgeevx_(char *balanc, char *jobvl, char *jobvr, char * + sense, integer *n, complex *a, integer *lda, complex *w, complex *vl, + integer *ldvl, complex *vr, integer *ldvr, integer *ilo, integer *ihi, + real *scale, real *abnrm, real *rconde, real *rcondv, complex *work, + integer *lwork, real *rwork, integer *info); + +/* Subroutine */ int _starpu_cgegs_(char *jobvsl, char *jobvsr, integer *n, complex * + a, integer *lda, complex *b, integer *ldb, complex *alpha, complex * + beta, complex *vsl, integer *ldvsl, complex *vsr, integer *ldvsr, + complex *work, integer *lwork, real *rwork, integer *info); + +/* Subroutine */ int _starpu_cgegv_(char *jobvl, char *jobvr, integer *n, complex *a, + integer *lda, complex *b, integer *ldb, complex *alpha, complex *beta, + complex *vl, integer *ldvl, complex *vr, integer *ldvr, complex * + work, integer *lwork, real *rwork, integer *info); + +/* Subroutine */ int _starpu_cgehd2_(integer *n, integer *ilo, integer *ihi, complex * + a, integer *lda, complex *tau, complex *work, integer *info); + +/* Subroutine */ int _starpu_cgehrd_(integer *n, integer *ilo, integer *ihi, complex * + a, integer *lda, complex *tau, complex *work, integer *lwork, integer + *info); + +/* Subroutine */ int _starpu_cgelq2_(integer *m, integer *n, complex *a, integer *lda, + complex *tau, complex *work, integer *info); + +/* Subroutine */ int _starpu_cgelqf_(integer *m, integer *n, complex *a, integer *lda, + complex *tau, complex *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_cgels_(char *trans, integer *m, integer *n, integer * + nrhs, complex *a, integer *lda, complex *b, integer *ldb, complex * + work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_cgelsd_(integer *m, integer *n, integer *nrhs, complex * + a, integer *lda, complex *b, integer *ldb, real *s, real *rcond, + integer *rank, complex *work, integer *lwork, real *rwork, integer * + iwork, integer *info); + +/* Subroutine */ int _starpu_cgelss_(integer *m, integer *n, integer *nrhs, complex * + a, integer *lda, complex *b, integer *ldb, real *s, real *rcond, + integer *rank, complex *work, integer *lwork, real *rwork, integer * + info); + +/* Subroutine */ int _starpu_cgelsx_(integer *m, integer *n, integer *nrhs, complex * + a, integer *lda, complex *b, integer *ldb, integer *jpvt, real *rcond, + integer *rank, complex *work, real *rwork, integer *info); + +/* Subroutine */ int _starpu_cgelsy_(integer *m, integer *n, integer *nrhs, complex * + a, integer *lda, complex *b, integer *ldb, integer *jpvt, real *rcond, + integer *rank, complex *work, integer *lwork, real *rwork, integer * + info); + +/* Subroutine */ int _starpu_cgeql2_(integer *m, integer *n, complex *a, integer *lda, + complex *tau, complex *work, integer *info); + +/* Subroutine */ int _starpu_cgeqlf_(integer *m, integer *n, complex *a, integer *lda, + complex *tau, complex *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_cgeqp3_(integer *m, integer *n, complex *a, integer *lda, + integer *jpvt, complex *tau, complex *work, integer *lwork, real * + rwork, integer *info); + +/* Subroutine */ int _starpu_cgeqpf_(integer *m, integer *n, complex *a, integer *lda, + integer *jpvt, complex *tau, complex *work, real *rwork, integer * + info); + +/* Subroutine */ int _starpu_cgeqr2_(integer *m, integer *n, complex *a, integer *lda, + complex *tau, complex *work, integer *info); + +/* Subroutine */ int _starpu_cgeqrf_(integer *m, integer *n, complex *a, integer *lda, + complex *tau, complex *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_cgerfs_(char *trans, integer *n, integer *nrhs, complex * + a, integer *lda, complex *af, integer *ldaf, integer *ipiv, complex * + b, integer *ldb, complex *x, integer *ldx, real *ferr, real *berr, + complex *work, real *rwork, integer *info); + +/* Subroutine */ int _starpu_cgerfsx_(char *trans, char *equed, integer *n, integer * + nrhs, complex *a, integer *lda, complex *af, integer *ldaf, integer * + ipiv, real *r__, real *c__, complex *b, integer *ldb, complex *x, + integer *ldx, real *rcond, real *berr, integer *n_err_bnds__, real * + err_bnds_norm__, real *err_bnds_comp__, integer *nparams, real * + params, complex *work, real *rwork, integer *info); + +/* Subroutine */ int _starpu_cgerq2_(integer *m, integer *n, complex *a, integer *lda, + complex *tau, complex *work, integer *info); + +/* Subroutine */ int _starpu_cgerqf_(integer *m, integer *n, complex *a, integer *lda, + complex *tau, complex *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_cgesc2_(integer *n, complex *a, integer *lda, complex * + rhs, integer *ipiv, integer *jpiv, real *scale); + +/* Subroutine */ int _starpu_cgesdd_(char *jobz, integer *m, integer *n, complex *a, + integer *lda, real *s, complex *u, integer *ldu, complex *vt, integer + *ldvt, complex *work, integer *lwork, real *rwork, integer *iwork, + integer *info); + +/* Subroutine */ int _starpu_cgesv_(integer *n, integer *nrhs, complex *a, integer * + lda, integer *ipiv, complex *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_cgesvd_(char *jobu, char *jobvt, integer *m, integer *n, + complex *a, integer *lda, real *s, complex *u, integer *ldu, complex * + vt, integer *ldvt, complex *work, integer *lwork, real *rwork, + integer *info); + +/* Subroutine */ int _starpu_cgesvx_(char *fact, char *trans, integer *n, integer * + nrhs, complex *a, integer *lda, complex *af, integer *ldaf, integer * + ipiv, char *equed, real *r__, real *c__, complex *b, integer *ldb, + complex *x, integer *ldx, real *rcond, real *ferr, real *berr, + complex *work, real *rwork, integer *info); + +/* Subroutine */ int _starpu_cgesvxx_(char *fact, char *trans, integer *n, integer * + nrhs, complex *a, integer *lda, complex *af, integer *ldaf, integer * + ipiv, char *equed, real *r__, real *c__, complex *b, integer *ldb, + complex *x, integer *ldx, real *rcond, real *rpvgrw, real *berr, + integer *n_err_bnds__, real *err_bnds_norm__, real *err_bnds_comp__, + integer *nparams, real *params, complex *work, real *rwork, integer * + info); + +/* Subroutine */ int _starpu_cgetc2_(integer *n, complex *a, integer *lda, integer * + ipiv, integer *jpiv, integer *info); + +/* Subroutine */ int _starpu_cgetf2_(integer *m, integer *n, complex *a, integer *lda, + integer *ipiv, integer *info); + +/* Subroutine */ int _starpu_cgetrf_(integer *m, integer *n, complex *a, integer *lda, + integer *ipiv, integer *info); + +/* Subroutine */ int _starpu_cgetri_(integer *n, complex *a, integer *lda, integer * + ipiv, complex *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_cgetrs_(char *trans, integer *n, integer *nrhs, complex * + a, integer *lda, integer *ipiv, complex *b, integer *ldb, integer * + info); + +/* Subroutine */ int _starpu_cggbak_(char *job, char *side, integer *n, integer *ilo, + integer *ihi, real *lscale, real *rscale, integer *m, complex *v, + integer *ldv, integer *info); + +/* Subroutine */ int _starpu_cggbal_(char *job, integer *n, complex *a, integer *lda, + complex *b, integer *ldb, integer *ilo, integer *ihi, real *lscale, + real *rscale, real *work, integer *info); + +/* Subroutine */ int _starpu_cgges_(char *jobvsl, char *jobvsr, char *sort, L_fp + selctg, integer *n, complex *a, integer *lda, complex *b, integer * + ldb, integer *sdim, complex *alpha, complex *beta, complex *vsl, + integer *ldvsl, complex *vsr, integer *ldvsr, complex *work, integer * + lwork, real *rwork, logical *bwork, integer *info); + +/* Subroutine */ int _starpu_cggesx_(char *jobvsl, char *jobvsr, char *sort, L_fp + selctg, char *sense, integer *n, complex *a, integer *lda, complex *b, + integer *ldb, integer *sdim, complex *alpha, complex *beta, complex * + vsl, integer *ldvsl, complex *vsr, integer *ldvsr, real *rconde, real + *rcondv, complex *work, integer *lwork, real *rwork, integer *iwork, + integer *liwork, logical *bwork, integer *info); + +/* Subroutine */ int _starpu_cggev_(char *jobvl, char *jobvr, integer *n, complex *a, + integer *lda, complex *b, integer *ldb, complex *alpha, complex *beta, + complex *vl, integer *ldvl, complex *vr, integer *ldvr, complex * + work, integer *lwork, real *rwork, integer *info); + +/* Subroutine */ int _starpu_cggevx_(char *balanc, char *jobvl, char *jobvr, char * + sense, integer *n, complex *a, integer *lda, complex *b, integer *ldb, + complex *alpha, complex *beta, complex *vl, integer *ldvl, complex * + vr, integer *ldvr, integer *ilo, integer *ihi, real *lscale, real * + rscale, real *abnrm, real *bbnrm, real *rconde, real *rcondv, complex + *work, integer *lwork, real *rwork, integer *iwork, logical *bwork, + integer *info); + +/* Subroutine */ int _starpu_cggglm_(integer *n, integer *m, integer *p, complex *a, + integer *lda, complex *b, integer *ldb, complex *d__, complex *x, + complex *y, complex *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_cgghrd_(char *compq, char *compz, integer *n, integer * + ilo, integer *ihi, complex *a, integer *lda, complex *b, integer *ldb, + complex *q, integer *ldq, complex *z__, integer *ldz, integer *info); + +/* Subroutine */ int _starpu_cgglse_(integer *m, integer *n, integer *p, complex *a, + integer *lda, complex *b, integer *ldb, complex *c__, complex *d__, + complex *x, complex *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_cggqrf_(integer *n, integer *m, integer *p, complex *a, + integer *lda, complex *taua, complex *b, integer *ldb, complex *taub, + complex *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_cggrqf_(integer *m, integer *p, integer *n, complex *a, + integer *lda, complex *taua, complex *b, integer *ldb, complex *taub, + complex *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_cggsvd_(char *jobu, char *jobv, char *jobq, integer *m, + integer *n, integer *p, integer *k, integer *l, complex *a, integer * + lda, complex *b, integer *ldb, real *alpha, real *beta, complex *u, + integer *ldu, complex *v, integer *ldv, complex *q, integer *ldq, + complex *work, real *rwork, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_cggsvp_(char *jobu, char *jobv, char *jobq, integer *m, + integer *p, integer *n, complex *a, integer *lda, complex *b, integer + *ldb, real *tola, real *tolb, integer *k, integer *l, complex *u, + integer *ldu, complex *v, integer *ldv, complex *q, integer *ldq, + integer *iwork, real *rwork, complex *tau, complex *work, integer * + info); + +/* Subroutine */ int _starpu_cgtcon_(char *norm, integer *n, complex *dl, complex * + d__, complex *du, complex *du2, integer *ipiv, real *anorm, real * + rcond, complex *work, integer *info); + +/* Subroutine */ int _starpu_cgtrfs_(char *trans, integer *n, integer *nrhs, complex * + dl, complex *d__, complex *du, complex *dlf, complex *df, complex * + duf, complex *du2, integer *ipiv, complex *b, integer *ldb, complex * + x, integer *ldx, real *ferr, real *berr, complex *work, real *rwork, + integer *info); + +/* Subroutine */ int _starpu_cgtsv_(integer *n, integer *nrhs, complex *dl, complex * + d__, complex *du, complex *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_cgtsvx_(char *fact, char *trans, integer *n, integer * + nrhs, complex *dl, complex *d__, complex *du, complex *dlf, complex * + df, complex *duf, complex *du2, integer *ipiv, complex *b, integer * + ldb, complex *x, integer *ldx, real *rcond, real *ferr, real *berr, + complex *work, real *rwork, integer *info); + +/* Subroutine */ int _starpu_cgttrf_(integer *n, complex *dl, complex *d__, complex * + du, complex *du2, integer *ipiv, integer *info); + +/* Subroutine */ int _starpu_cgttrs_(char *trans, integer *n, integer *nrhs, complex * + dl, complex *d__, complex *du, complex *du2, integer *ipiv, complex * + b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_cgtts2_(integer *itrans, integer *n, integer *nrhs, + complex *dl, complex *d__, complex *du, complex *du2, integer *ipiv, + complex *b, integer *ldb); + +/* Subroutine */ int _starpu_chbev_(char *jobz, char *uplo, integer *n, integer *kd, + complex *ab, integer *ldab, real *w, complex *z__, integer *ldz, + complex *work, real *rwork, integer *info); + +/* Subroutine */ int _starpu_chbevd_(char *jobz, char *uplo, integer *n, integer *kd, + complex *ab, integer *ldab, real *w, complex *z__, integer *ldz, + complex *work, integer *lwork, real *rwork, integer *lrwork, integer * + iwork, integer *liwork, integer *info); + +/* Subroutine */ int _starpu_chbevx_(char *jobz, char *range, char *uplo, integer *n, + integer *kd, complex *ab, integer *ldab, complex *q, integer *ldq, + real *vl, real *vu, integer *il, integer *iu, real *abstol, integer * + m, real *w, complex *z__, integer *ldz, complex *work, real *rwork, + integer *iwork, integer *ifail, integer *info); + +/* Subroutine */ int _starpu_chbgst_(char *vect, char *uplo, integer *n, integer *ka, + integer *kb, complex *ab, integer *ldab, complex *bb, integer *ldbb, + complex *x, integer *ldx, complex *work, real *rwork, integer *info); + +/* Subroutine */ int _starpu_chbgv_(char *jobz, char *uplo, integer *n, integer *ka, + integer *kb, complex *ab, integer *ldab, complex *bb, integer *ldbb, + real *w, complex *z__, integer *ldz, complex *work, real *rwork, + integer *info); + +/* Subroutine */ int _starpu_chbgvd_(char *jobz, char *uplo, integer *n, integer *ka, + integer *kb, complex *ab, integer *ldab, complex *bb, integer *ldbb, + real *w, complex *z__, integer *ldz, complex *work, integer *lwork, + real *rwork, integer *lrwork, integer *iwork, integer *liwork, + integer *info); + +/* Subroutine */ int _starpu_chbgvx_(char *jobz, char *range, char *uplo, integer *n, + integer *ka, integer *kb, complex *ab, integer *ldab, complex *bb, + integer *ldbb, complex *q, integer *ldq, real *vl, real *vu, integer * + il, integer *iu, real *abstol, integer *m, real *w, complex *z__, + integer *ldz, complex *work, real *rwork, integer *iwork, integer * + ifail, integer *info); + +/* Subroutine */ int _starpu_chbtrd_(char *vect, char *uplo, integer *n, integer *kd, + complex *ab, integer *ldab, real *d__, real *e, complex *q, integer * + ldq, complex *work, integer *info); + +/* Subroutine */ int _starpu_checon_(char *uplo, integer *n, complex *a, integer *lda, + integer *ipiv, real *anorm, real *rcond, complex *work, integer * + info); + +/* Subroutine */ int _starpu_cheequb_(char *uplo, integer *n, complex *a, integer * + lda, real *s, real *scond, real *amax, complex *work, integer *info); + +/* Subroutine */ int _starpu_cheev_(char *jobz, char *uplo, integer *n, complex *a, + integer *lda, real *w, complex *work, integer *lwork, real *rwork, + integer *info); + +/* Subroutine */ int _starpu_cheevd_(char *jobz, char *uplo, integer *n, complex *a, + integer *lda, real *w, complex *work, integer *lwork, real *rwork, + integer *lrwork, integer *iwork, integer *liwork, integer *info); + +/* Subroutine */ int _starpu_cheevr_(char *jobz, char *range, char *uplo, integer *n, + complex *a, integer *lda, real *vl, real *vu, integer *il, integer * + iu, real *abstol, integer *m, real *w, complex *z__, integer *ldz, + integer *isuppz, complex *work, integer *lwork, real *rwork, integer * + lrwork, integer *iwork, integer *liwork, integer *info); + +/* Subroutine */ int _starpu_cheevx_(char *jobz, char *range, char *uplo, integer *n, + complex *a, integer *lda, real *vl, real *vu, integer *il, integer * + iu, real *abstol, integer *m, real *w, complex *z__, integer *ldz, + complex *work, integer *lwork, real *rwork, integer *iwork, integer * + ifail, integer *info); + +/* Subroutine */ int _starpu_chegs2_(integer *itype, char *uplo, integer *n, complex * + a, integer *lda, complex *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_chegst_(integer *itype, char *uplo, integer *n, complex * + a, integer *lda, complex *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_chegv_(integer *itype, char *jobz, char *uplo, integer * + n, complex *a, integer *lda, complex *b, integer *ldb, real *w, + complex *work, integer *lwork, real *rwork, integer *info); + +/* Subroutine */ int _starpu_chegvd_(integer *itype, char *jobz, char *uplo, integer * + n, complex *a, integer *lda, complex *b, integer *ldb, real *w, + complex *work, integer *lwork, real *rwork, integer *lrwork, integer * + iwork, integer *liwork, integer *info); + +/* Subroutine */ int _starpu_chegvx_(integer *itype, char *jobz, char *range, char * + uplo, integer *n, complex *a, integer *lda, complex *b, integer *ldb, + real *vl, real *vu, integer *il, integer *iu, real *abstol, integer * + m, real *w, complex *z__, integer *ldz, complex *work, integer *lwork, + real *rwork, integer *iwork, integer *ifail, integer *info); + +/* Subroutine */ int _starpu_cherfs_(char *uplo, integer *n, integer *nrhs, complex * + a, integer *lda, complex *af, integer *ldaf, integer *ipiv, complex * + b, integer *ldb, complex *x, integer *ldx, real *ferr, real *berr, + complex *work, real *rwork, integer *info); + +/* Subroutine */ int _starpu_cherfsx_(char *uplo, char *equed, integer *n, integer * + nrhs, complex *a, integer *lda, complex *af, integer *ldaf, integer * + ipiv, real *s, complex *b, integer *ldb, complex *x, integer *ldx, + real *rcond, real *berr, integer *n_err_bnds__, real *err_bnds_norm__, + real *err_bnds_comp__, integer *nparams, real *params, complex *work, + real *rwork, integer *info); + +/* Subroutine */ int _starpu_chesv_(char *uplo, integer *n, integer *nrhs, complex *a, + integer *lda, integer *ipiv, complex *b, integer *ldb, complex *work, + integer *lwork, integer *info); + +/* Subroutine */ int _starpu_chesvx_(char *fact, char *uplo, integer *n, integer * + nrhs, complex *a, integer *lda, complex *af, integer *ldaf, integer * + ipiv, complex *b, integer *ldb, complex *x, integer *ldx, real *rcond, + real *ferr, real *berr, complex *work, integer *lwork, real *rwork, + integer *info); + +/* Subroutine */ int _starpu_chesvxx_(char *fact, char *uplo, integer *n, integer * + nrhs, complex *a, integer *lda, complex *af, integer *ldaf, integer * + ipiv, char *equed, real *s, complex *b, integer *ldb, complex *x, + integer *ldx, real *rcond, real *rpvgrw, real *berr, integer * + n_err_bnds__, real *err_bnds_norm__, real *err_bnds_comp__, integer * + nparams, real *params, complex *work, real *rwork, integer *info); + +/* Subroutine */ int _starpu_chetd2_(char *uplo, integer *n, complex *a, integer *lda, + real *d__, real *e, complex *tau, integer *info); + +/* Subroutine */ int _starpu_chetf2_(char *uplo, integer *n, complex *a, integer *lda, + integer *ipiv, integer *info); + +/* Subroutine */ int _starpu_chetrd_(char *uplo, integer *n, complex *a, integer *lda, + real *d__, real *e, complex *tau, complex *work, integer *lwork, + integer *info); + +/* Subroutine */ int _starpu_chetrf_(char *uplo, integer *n, complex *a, integer *lda, + integer *ipiv, complex *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_chetri_(char *uplo, integer *n, complex *a, integer *lda, + integer *ipiv, complex *work, integer *info); + +/* Subroutine */ int _starpu_chetrs_(char *uplo, integer *n, integer *nrhs, complex * + a, integer *lda, integer *ipiv, complex *b, integer *ldb, integer * + info); + +/* Subroutine */ int _starpu_chfrk_(char *transr, char *uplo, char *trans, integer *n, + integer *k, real *alpha, complex *a, integer *lda, real *beta, + complex *c__); + +/* Subroutine */ int _starpu_chgeqz_(char *job, char *compq, char *compz, integer *n, + integer *ilo, integer *ihi, complex *h__, integer *ldh, complex *t, + integer *ldt, complex *alpha, complex *beta, complex *q, integer *ldq, + complex *z__, integer *ldz, complex *work, integer *lwork, real * + rwork, integer *info); + +/* Character */ VOID _starpu_chla_transtype__(char *ret_val, ftnlen ret_val_len, + integer *trans); + +/* Subroutine */ int _starpu_chpcon_(char *uplo, integer *n, complex *ap, integer * + ipiv, real *anorm, real *rcond, complex *work, integer *info); + +/* Subroutine */ int _starpu_chpev_(char *jobz, char *uplo, integer *n, complex *ap, + real *w, complex *z__, integer *ldz, complex *work, real *rwork, + integer *info); + +/* Subroutine */ int _starpu_chpevd_(char *jobz, char *uplo, integer *n, complex *ap, + real *w, complex *z__, integer *ldz, complex *work, integer *lwork, + real *rwork, integer *lrwork, integer *iwork, integer *liwork, + integer *info); + +/* Subroutine */ int _starpu_chpevx_(char *jobz, char *range, char *uplo, integer *n, + complex *ap, real *vl, real *vu, integer *il, integer *iu, real * + abstol, integer *m, real *w, complex *z__, integer *ldz, complex * + work, real *rwork, integer *iwork, integer *ifail, integer *info); + +/* Subroutine */ int _starpu_chpgst_(integer *itype, char *uplo, integer *n, complex * + ap, complex *bp, integer *info); + +/* Subroutine */ int _starpu_chpgv_(integer *itype, char *jobz, char *uplo, integer * + n, complex *ap, complex *bp, real *w, complex *z__, integer *ldz, + complex *work, real *rwork, integer *info); + +/* Subroutine */ int _starpu_chpgvd_(integer *itype, char *jobz, char *uplo, integer * + n, complex *ap, complex *bp, real *w, complex *z__, integer *ldz, + complex *work, integer *lwork, real *rwork, integer *lrwork, integer * + iwork, integer *liwork, integer *info); + +/* Subroutine */ int _starpu_chpgvx_(integer *itype, char *jobz, char *range, char * + uplo, integer *n, complex *ap, complex *bp, real *vl, real *vu, + integer *il, integer *iu, real *abstol, integer *m, real *w, complex * + z__, integer *ldz, complex *work, real *rwork, integer *iwork, + integer *ifail, integer *info); + +/* Subroutine */ int _starpu_chprfs_(char *uplo, integer *n, integer *nrhs, complex * + ap, complex *afp, integer *ipiv, complex *b, integer *ldb, complex *x, + integer *ldx, real *ferr, real *berr, complex *work, real *rwork, + integer *info); + +/* Subroutine */ int _starpu_chpsv_(char *uplo, integer *n, integer *nrhs, complex * + ap, integer *ipiv, complex *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_chpsvx_(char *fact, char *uplo, integer *n, integer * + nrhs, complex *ap, complex *afp, integer *ipiv, complex *b, integer * + ldb, complex *x, integer *ldx, real *rcond, real *ferr, real *berr, + complex *work, real *rwork, integer *info); + +/* Subroutine */ int _starpu_chptrd_(char *uplo, integer *n, complex *ap, real *d__, + real *e, complex *tau, integer *info); + +/* Subroutine */ int _starpu_chptrf_(char *uplo, integer *n, complex *ap, integer * + ipiv, integer *info); + +/* Subroutine */ int _starpu_chptri_(char *uplo, integer *n, complex *ap, integer * + ipiv, complex *work, integer *info); + +/* Subroutine */ int _starpu_chptrs_(char *uplo, integer *n, integer *nrhs, complex * + ap, integer *ipiv, complex *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_chsein_(char *side, char *eigsrc, char *initv, logical * + select, integer *n, complex *h__, integer *ldh, complex *w, complex * + vl, integer *ldvl, complex *vr, integer *ldvr, integer *mm, integer * + m, complex *work, real *rwork, integer *ifaill, integer *ifailr, + integer *info); + +/* Subroutine */ int _starpu_chseqr_(char *job, char *compz, integer *n, integer *ilo, + integer *ihi, complex *h__, integer *ldh, complex *w, complex *z__, + integer *ldz, complex *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_cla_gbamv__(integer *trans, integer *m, integer *n, + integer *kl, integer *ku, real *alpha, complex *ab, integer *ldab, + complex *x, integer *incx, real *beta, real *y, integer *incy); + +doublereal _starpu_cla_gbrcond_c__(char *trans, integer *n, integer *kl, integer *ku, + complex *ab, integer *ldab, complex *afb, integer *ldafb, integer * + ipiv, real *c__, logical *capply, integer *info, complex *work, real * + rwork, ftnlen trans_len); + +doublereal _starpu_cla_gbrcond_x__(char *trans, integer *n, integer *kl, integer *ku, + complex *ab, integer *ldab, complex *afb, integer *ldafb, integer * + ipiv, complex *x, integer *info, complex *work, real *rwork, ftnlen + trans_len); + +/* Subroutine */ int _starpu_cla_gbrfsx_extended__(integer *prec_type__, integer * + trans_type__, integer *n, integer *kl, integer *ku, integer *nrhs, + complex *ab, integer *ldab, complex *afb, integer *ldafb, integer * + ipiv, logical *colequ, real *c__, complex *b, integer *ldb, complex * + y, integer *ldy, real *berr_out__, integer *n_norms__, real *errs_n__, + real *errs_c__, complex *res, real *ayb, complex *dy, complex * + y_tail__, real *rcond, integer *ithresh, real *rthresh, real *dz_ub__, + logical *ignore_cwise__, integer *info); + +doublereal _starpu_cla_gbrpvgrw__(integer *n, integer *kl, integer *ku, integer * + ncols, complex *ab, integer *ldab, complex *afb, integer *ldafb); + +/* Subroutine */ int _starpu_cla_geamv__(integer *trans, integer *m, integer *n, real + *alpha, complex *a, integer *lda, complex *x, integer *incx, real * + beta, real *y, integer *incy); + +doublereal _starpu_cla_gercond_c__(char *trans, integer *n, complex *a, integer *lda, + complex *af, integer *ldaf, integer *ipiv, real *c__, logical *capply, + integer *info, complex *work, real *rwork, ftnlen trans_len); + +doublereal _starpu_cla_gercond_x__(char *trans, integer *n, complex *a, integer *lda, + complex *af, integer *ldaf, integer *ipiv, complex *x, integer *info, + complex *work, real *rwork, ftnlen trans_len); + +/* Subroutine */ int _starpu_cla_gerfsx_extended__(integer *prec_type__, integer * + trans_type__, integer *n, integer *nrhs, complex *a, integer *lda, + complex *af, integer *ldaf, integer *ipiv, logical *colequ, real *c__, + complex *b, integer *ldb, complex *y, integer *ldy, real *berr_out__, + integer *n_norms__, real *errs_n__, real *errs_c__, complex *res, + real *ayb, complex *dy, complex *y_tail__, real *rcond, integer * + ithresh, real *rthresh, real *dz_ub__, logical *ignore_cwise__, + integer *info); + +/* Subroutine */ int _starpu_cla_heamv__(integer *uplo, integer *n, real *alpha, + complex *a, integer *lda, complex *x, integer *incx, real *beta, real + *y, integer *incy); + +doublereal _starpu_cla_hercond_c__(char *uplo, integer *n, complex *a, integer *lda, + complex *af, integer *ldaf, integer *ipiv, real *c__, logical *capply, + integer *info, complex *work, real *rwork, ftnlen uplo_len); + +doublereal _starpu_cla_hercond_x__(char *uplo, integer *n, complex *a, integer *lda, + complex *af, integer *ldaf, integer *ipiv, complex *x, integer *info, + complex *work, real *rwork, ftnlen uplo_len); + +/* Subroutine */ int _starpu_cla_herfsx_extended__(integer *prec_type__, char *uplo, + integer *n, integer *nrhs, complex *a, integer *lda, complex *af, + integer *ldaf, integer *ipiv, logical *colequ, real *c__, complex *b, + integer *ldb, complex *y, integer *ldy, real *berr_out__, integer * + n_norms__, real *errs_n__, real *errs_c__, complex *res, real *ayb, + complex *dy, complex *y_tail__, real *rcond, integer *ithresh, real * + rthresh, real *dz_ub__, logical *ignore_cwise__, integer *info, + ftnlen uplo_len); + +doublereal _starpu_cla_herpvgrw__(char *uplo, integer *n, integer *info, complex *a, + integer *lda, complex *af, integer *ldaf, integer *ipiv, real *work, + ftnlen uplo_len); + +/* Subroutine */ int _starpu_cla_lin_berr__(integer *n, integer *nz, integer *nrhs, + complex *res, real *ayb, real *berr); + +doublereal _starpu_cla_porcond_c__(char *uplo, integer *n, complex *a, integer *lda, + complex *af, integer *ldaf, real *c__, logical *capply, integer *info, + complex *work, real *rwork, ftnlen uplo_len); + +doublereal _starpu_cla_porcond_x__(char *uplo, integer *n, complex *a, integer *lda, + complex *af, integer *ldaf, complex *x, integer *info, complex *work, + real *rwork, ftnlen uplo_len); + +/* Subroutine */ int _starpu_cla_porfsx_extended__(integer *prec_type__, char *uplo, + integer *n, integer *nrhs, complex *a, integer *lda, complex *af, + integer *ldaf, logical *colequ, real *c__, complex *b, integer *ldb, + complex *y, integer *ldy, real *berr_out__, integer *n_norms__, real * + errs_n__, real *errs_c__, complex *res, real *ayb, complex *dy, + complex *y_tail__, real *rcond, integer *ithresh, real *rthresh, real + *dz_ub__, logical *ignore_cwise__, integer *info, ftnlen uplo_len); + +doublereal _starpu_cla_porpvgrw__(char *uplo, integer *ncols, complex *a, integer * + lda, complex *af, integer *ldaf, real *work, ftnlen uplo_len); + +doublereal _starpu_cla_rpvgrw__(integer *n, integer *ncols, complex *a, integer *lda, + complex *af, integer *ldaf); + +/* Subroutine */ int _starpu_cla_syamv__(integer *uplo, integer *n, real *alpha, + complex *a, integer *lda, complex *x, integer *incx, real *beta, real + *y, integer *incy); + +doublereal _starpu_cla_syrcond_c__(char *uplo, integer *n, complex *a, integer *lda, + complex *af, integer *ldaf, integer *ipiv, real *c__, logical *capply, + integer *info, complex *work, real *rwork, ftnlen uplo_len); + +doublereal _starpu_cla_syrcond_x__(char *uplo, integer *n, complex *a, integer *lda, + complex *af, integer *ldaf, integer *ipiv, complex *x, integer *info, + complex *work, real *rwork, ftnlen uplo_len); + +/* Subroutine */ int _starpu_cla_syrfsx_extended__(integer *prec_type__, char *uplo, + integer *n, integer *nrhs, complex *a, integer *lda, complex *af, + integer *ldaf, integer *ipiv, logical *colequ, real *c__, complex *b, + integer *ldb, complex *y, integer *ldy, real *berr_out__, integer * + n_norms__, real *errs_n__, real *errs_c__, complex *res, real *ayb, + complex *dy, complex *y_tail__, real *rcond, integer *ithresh, real * + rthresh, real *dz_ub__, logical *ignore_cwise__, integer *info, + ftnlen uplo_len); + +doublereal _starpu_cla_syrpvgrw__(char *uplo, integer *n, integer *info, complex *a, + integer *lda, complex *af, integer *ldaf, integer *ipiv, real *work, + ftnlen uplo_len); + +/* Subroutine */ int _starpu_cla_wwaddw__(integer *n, complex *x, complex *y, complex + *w); + +/* Subroutine */ int _starpu_clabrd_(integer *m, integer *n, integer *nb, complex *a, + integer *lda, real *d__, real *e, complex *tauq, complex *taup, + complex *x, integer *ldx, complex *y, integer *ldy); + +/* Subroutine */ int _starpu_clacgv_(integer *n, complex *x, integer *incx); + +/* Subroutine */ int _starpu_clacn2_(integer *n, complex *v, complex *x, real *est, + integer *kase, integer *isave); + +/* Subroutine */ int _starpu_clacon_(integer *n, complex *v, complex *x, real *est, + integer *kase); + +/* Subroutine */ int _starpu_clacp2_(char *uplo, integer *m, integer *n, real *a, + integer *lda, complex *b, integer *ldb); + +/* Subroutine */ int _starpu_clacpy_(char *uplo, integer *m, integer *n, complex *a, + integer *lda, complex *b, integer *ldb); + +/* Subroutine */ int _starpu_clacrm_(integer *m, integer *n, complex *a, integer *lda, + real *b, integer *ldb, complex *c__, integer *ldc, real *rwork); + +/* Subroutine */ int _starpu_clacrt_(integer *n, complex *cx, integer *incx, complex * + cy, integer *incy, complex *c__, complex *s); + +/* Complex */ VOID _starpu_cladiv_(complex * ret_val, complex *x, complex *y); + +/* Subroutine */ int _starpu_claed0_(integer *qsiz, integer *n, real *d__, real *e, + complex *q, integer *ldq, complex *qstore, integer *ldqs, real *rwork, + integer *iwork, integer *info); + +/* Subroutine */ int _starpu_claed7_(integer *n, integer *cutpnt, integer *qsiz, + integer *tlvls, integer *curlvl, integer *curpbm, real *d__, complex * + q, integer *ldq, real *rho, integer *indxq, real *qstore, integer * + qptr, integer *prmptr, integer *perm, integer *givptr, integer * + givcol, real *givnum, complex *work, real *rwork, integer *iwork, + integer *info); + +/* Subroutine */ int _starpu_claed8_(integer *k, integer *n, integer *qsiz, complex * + q, integer *ldq, real *d__, real *rho, integer *cutpnt, real *z__, + real *dlamda, complex *q2, integer *ldq2, real *w, integer *indxp, + integer *indx, integer *indxq, integer *perm, integer *givptr, + integer *givcol, real *givnum, integer *info); + +/* Subroutine */ int _starpu_claein_(logical *rightv, logical *noinit, integer *n, + complex *h__, integer *ldh, complex *w, complex *v, complex *b, + integer *ldb, real *rwork, real *eps3, real *smlnum, integer *info); + +/* Subroutine */ int _starpu_claesy_(complex *a, complex *b, complex *c__, complex * + rt1, complex *rt2, complex *evscal, complex *cs1, complex *sn1); + +/* Subroutine */ int _starpu_claev2_(complex *a, complex *b, complex *c__, real *rt1, + real *rt2, real *cs1, complex *sn1); + +/* Subroutine */ int _starpu_clag2z_(integer *m, integer *n, complex *sa, integer * + ldsa, doublecomplex *a, integer *lda, integer *info); + +/* Subroutine */ int _starpu_clags2_(logical *upper, real *a1, complex *a2, real *a3, + real *b1, complex *b2, real *b3, real *csu, complex *snu, real *csv, + complex *snv, real *csq, complex *snq); + +/* Subroutine */ int _starpu_clagtm_(char *trans, integer *n, integer *nrhs, real * + alpha, complex *dl, complex *d__, complex *du, complex *x, integer * + ldx, real *beta, complex *b, integer *ldb); + +/* Subroutine */ int _starpu_clahef_(char *uplo, integer *n, integer *nb, integer *kb, + complex *a, integer *lda, integer *ipiv, complex *w, integer *ldw, + integer *info); + +/* Subroutine */ int _starpu_clahqr_(logical *wantt, logical *wantz, integer *n, + integer *ilo, integer *ihi, complex *h__, integer *ldh, complex *w, + integer *iloz, integer *ihiz, complex *z__, integer *ldz, integer * + info); + +/* Subroutine */ int _starpu_clahr2_(integer *n, integer *k, integer *nb, complex *a, + integer *lda, complex *tau, complex *t, integer *ldt, complex *y, + integer *ldy); + +/* Subroutine */ int _starpu_clahrd_(integer *n, integer *k, integer *nb, complex *a, + integer *lda, complex *tau, complex *t, integer *ldt, complex *y, + integer *ldy); + +/* Subroutine */ int _starpu_claic1_(integer *job, integer *j, complex *x, real *sest, + complex *w, complex *gamma, real *sestpr, complex *s, complex *c__); + +/* Subroutine */ int _starpu_clals0_(integer *icompq, integer *nl, integer *nr, + integer *sqre, integer *nrhs, complex *b, integer *ldb, complex *bx, + integer *ldbx, integer *perm, integer *givptr, integer *givcol, + integer *ldgcol, real *givnum, integer *ldgnum, real *poles, real * + difl, real *difr, real *z__, integer *k, real *c__, real *s, real * + rwork, integer *info); + +/* Subroutine */ int _starpu_clalsa_(integer *icompq, integer *smlsiz, integer *n, + integer *nrhs, complex *b, integer *ldb, complex *bx, integer *ldbx, + real *u, integer *ldu, real *vt, integer *k, real *difl, real *difr, + real *z__, real *poles, integer *givptr, integer *givcol, integer * + ldgcol, integer *perm, real *givnum, real *c__, real *s, real *rwork, + integer *iwork, integer *info); + +/* Subroutine */ int _starpu_clalsd_(char *uplo, integer *smlsiz, integer *n, integer + *nrhs, real *d__, real *e, complex *b, integer *ldb, real *rcond, + integer *rank, complex *work, real *rwork, integer *iwork, integer * + info); + +doublereal _starpu_clangb_(char *norm, integer *n, integer *kl, integer *ku, complex * + ab, integer *ldab, real *work); + +doublereal _starpu_clange_(char *norm, integer *m, integer *n, complex *a, integer * + lda, real *work); + +doublereal _starpu_clangt_(char *norm, integer *n, complex *dl, complex *d__, complex + *du); + +doublereal _starpu_clanhb_(char *norm, char *uplo, integer *n, integer *k, complex * + ab, integer *ldab, real *work); + +doublereal _starpu_clanhe_(char *norm, char *uplo, integer *n, complex *a, integer * + lda, real *work); + +doublereal _starpu_clanhf_(char *norm, char *transr, char *uplo, integer *n, complex * + a, real *work); + +doublereal _starpu_clanhp_(char *norm, char *uplo, integer *n, complex *ap, real * + work); + +doublereal _starpu_clanhs_(char *norm, integer *n, complex *a, integer *lda, real * + work); + +doublereal _starpu_clanht_(char *norm, integer *n, real *d__, complex *e); + +doublereal _starpu_clansb_(char *norm, char *uplo, integer *n, integer *k, complex * + ab, integer *ldab, real *work); + +doublereal _starpu_clansp_(char *norm, char *uplo, integer *n, complex *ap, real * + work); + +doublereal _starpu_clansy_(char *norm, char *uplo, integer *n, complex *a, integer * + lda, real *work); + +doublereal _starpu_clantb_(char *norm, char *uplo, char *diag, integer *n, integer *k, + complex *ab, integer *ldab, real *work); + +doublereal _starpu_clantp_(char *norm, char *uplo, char *diag, integer *n, complex * + ap, real *work); + +doublereal _starpu_clantr_(char *norm, char *uplo, char *diag, integer *m, integer *n, + complex *a, integer *lda, real *work); + +/* Subroutine */ int _starpu_clapll_(integer *n, complex *x, integer *incx, complex * + y, integer *incy, real *ssmin); + +/* Subroutine */ int _starpu_clapmt_(logical *forwrd, integer *m, integer *n, complex + *x, integer *ldx, integer *k); + +/* Subroutine */ int _starpu_claqgb_(integer *m, integer *n, integer *kl, integer *ku, + complex *ab, integer *ldab, real *r__, real *c__, real *rowcnd, real + *colcnd, real *amax, char *equed); + +/* Subroutine */ int _starpu_claqge_(integer *m, integer *n, complex *a, integer *lda, + real *r__, real *c__, real *rowcnd, real *colcnd, real *amax, char * + equed); + +/* Subroutine */ int _starpu_claqhb_(char *uplo, integer *n, integer *kd, complex *ab, + integer *ldab, real *s, real *scond, real *amax, char *equed); + +/* Subroutine */ int _starpu_claqhe_(char *uplo, integer *n, complex *a, integer *lda, + real *s, real *scond, real *amax, char *equed); + +/* Subroutine */ int _starpu_claqhp_(char *uplo, integer *n, complex *ap, real *s, + real *scond, real *amax, char *equed); + +/* Subroutine */ int _starpu_claqp2_(integer *m, integer *n, integer *offset, complex + *a, integer *lda, integer *jpvt, complex *tau, real *vn1, real *vn2, + complex *work); + +/* Subroutine */ int _starpu_claqps_(integer *m, integer *n, integer *offset, integer + *nb, integer *kb, complex *a, integer *lda, integer *jpvt, complex * + tau, real *vn1, real *vn2, complex *auxv, complex *f, integer *ldf); + +/* Subroutine */ int _starpu_claqr0_(logical *wantt, logical *wantz, integer *n, + integer *ilo, integer *ihi, complex *h__, integer *ldh, complex *w, + integer *iloz, integer *ihiz, complex *z__, integer *ldz, complex * + work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_claqr1_(integer *n, complex *h__, integer *ldh, complex * + s1, complex *s2, complex *v); + +/* Subroutine */ int _starpu_claqr2_(logical *wantt, logical *wantz, integer *n, + integer *ktop, integer *kbot, integer *nw, complex *h__, integer *ldh, + integer *iloz, integer *ihiz, complex *z__, integer *ldz, integer * + ns, integer *nd, complex *sh, complex *v, integer *ldv, integer *nh, + complex *t, integer *ldt, integer *nv, complex *wv, integer *ldwv, + complex *work, integer *lwork); + +/* Subroutine */ int _starpu_claqr3_(logical *wantt, logical *wantz, integer *n, + integer *ktop, integer *kbot, integer *nw, complex *h__, integer *ldh, + integer *iloz, integer *ihiz, complex *z__, integer *ldz, integer * + ns, integer *nd, complex *sh, complex *v, integer *ldv, integer *nh, + complex *t, integer *ldt, integer *nv, complex *wv, integer *ldwv, + complex *work, integer *lwork); + +/* Subroutine */ int _starpu_claqr4_(logical *wantt, logical *wantz, integer *n, + integer *ilo, integer *ihi, complex *h__, integer *ldh, complex *w, + integer *iloz, integer *ihiz, complex *z__, integer *ldz, complex * + work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_claqr5_(logical *wantt, logical *wantz, integer *kacc22, + integer *n, integer *ktop, integer *kbot, integer *nshfts, complex *s, + complex *h__, integer *ldh, integer *iloz, integer *ihiz, complex * + z__, integer *ldz, complex *v, integer *ldv, complex *u, integer *ldu, + integer *nv, complex *wv, integer *ldwv, integer *nh, complex *wh, + integer *ldwh); + +/* Subroutine */ int _starpu_claqsb_(char *uplo, integer *n, integer *kd, complex *ab, + integer *ldab, real *s, real *scond, real *amax, char *equed); + +/* Subroutine */ int _starpu_claqsp_(char *uplo, integer *n, complex *ap, real *s, + real *scond, real *amax, char *equed); + +/* Subroutine */ int _starpu_claqsy_(char *uplo, integer *n, complex *a, integer *lda, + real *s, real *scond, real *amax, char *equed); + +/* Subroutine */ int _starpu_clar1v_(integer *n, integer *b1, integer *bn, real * + lambda, real *d__, real *l, real *ld, real *lld, real *pivmin, real * + gaptol, complex *z__, logical *wantnc, integer *negcnt, real *ztz, + real *mingma, integer *r__, integer *isuppz, real *nrminv, real * + resid, real *rqcorr, real *work); + +/* Subroutine */ int _starpu_clar2v_(integer *n, complex *x, complex *y, complex *z__, + integer *incx, real *c__, complex *s, integer *incc); + +/* Subroutine */ int _starpu_clarcm_(integer *m, integer *n, real *a, integer *lda, + complex *b, integer *ldb, complex *c__, integer *ldc, real *rwork); + +/* Subroutine */ int _starpu_clarf_(char *side, integer *m, integer *n, complex *v, + integer *incv, complex *tau, complex *c__, integer *ldc, complex * + work); + +/* Subroutine */ int _starpu_clarfb_(char *side, char *trans, char *direct, char * + storev, integer *m, integer *n, integer *k, complex *v, integer *ldv, + complex *t, integer *ldt, complex *c__, integer *ldc, complex *work, + integer *ldwork); + +/* Subroutine */ int _starpu_clarfg_(integer *n, complex *alpha, complex *x, integer * + incx, complex *tau); + +/* Subroutine */ int _starpu_clarfp_(integer *n, complex *alpha, complex *x, integer * + incx, complex *tau); + +/* Subroutine */ int _starpu_clarft_(char *direct, char *storev, integer *n, integer * + k, complex *v, integer *ldv, complex *tau, complex *t, integer *ldt); + +/* Subroutine */ int _starpu_clarfx_(char *side, integer *m, integer *n, complex *v, + complex *tau, complex *c__, integer *ldc, complex *work); + +/* Subroutine */ int _starpu_clargv_(integer *n, complex *x, integer *incx, complex * + y, integer *incy, real *c__, integer *incc); + +/* Subroutine */ int _starpu_clarnv_(integer *idist, integer *iseed, integer *n, + complex *x); + +/* Subroutine */ int _starpu_clarrv_(integer *n, real *vl, real *vu, real *d__, real * + l, real *pivmin, integer *isplit, integer *m, integer *dol, integer * + dou, real *minrgp, real *rtol1, real *rtol2, real *w, real *werr, + real *wgap, integer *iblock, integer *indexw, real *gers, complex * + z__, integer *ldz, integer *isuppz, real *work, integer *iwork, + integer *info); + +/* Subroutine */ int _starpu_clarscl2_(integer *m, integer *n, real *d__, complex *x, + integer *ldx); + +/* Subroutine */ int _starpu_clartg_(complex *f, complex *g, real *cs, complex *sn, + complex *r__); + +/* Subroutine */ int _starpu_clartv_(integer *n, complex *x, integer *incx, complex * + y, integer *incy, real *c__, complex *s, integer *incc); + +/* Subroutine */ int _starpu_clarz_(char *side, integer *m, integer *n, integer *l, + complex *v, integer *incv, complex *tau, complex *c__, integer *ldc, + complex *work); + +/* Subroutine */ int _starpu_clarzb_(char *side, char *trans, char *direct, char * + storev, integer *m, integer *n, integer *k, integer *l, complex *v, + integer *ldv, complex *t, integer *ldt, complex *c__, integer *ldc, + complex *work, integer *ldwork); + +/* Subroutine */ int _starpu_clarzt_(char *direct, char *storev, integer *n, integer * + k, complex *v, integer *ldv, complex *tau, complex *t, integer *ldt); + +/* Subroutine */ int _starpu_clascl_(char *type__, integer *kl, integer *ku, real * + cfrom, real *cto, integer *m, integer *n, complex *a, integer *lda, + integer *info); + +/* Subroutine */ int _starpu_clascl2_(integer *m, integer *n, real *d__, complex *x, + integer *ldx); + +/* Subroutine */ int _starpu_claset_(char *uplo, integer *m, integer *n, complex * + alpha, complex *beta, complex *a, integer *lda); + +/* Subroutine */ int _starpu_clasr_(char *side, char *pivot, char *direct, integer *m, + integer *n, real *c__, real *s, complex *a, integer *lda); + +/* Subroutine */ int _starpu_classq_(integer *n, complex *x, integer *incx, real * + scale, real *sumsq); + +/* Subroutine */ int _starpu_claswp_(integer *n, complex *a, integer *lda, integer * + k1, integer *k2, integer *ipiv, integer *incx); + +/* Subroutine */ int _starpu_clasyf_(char *uplo, integer *n, integer *nb, integer *kb, + complex *a, integer *lda, integer *ipiv, complex *w, integer *ldw, + integer *info); + +/* Subroutine */ int _starpu_clatbs_(char *uplo, char *trans, char *diag, char * + normin, integer *n, integer *kd, complex *ab, integer *ldab, complex * + x, real *scale, real *cnorm, integer *info); + +/* Subroutine */ int _starpu_clatdf_(integer *ijob, integer *n, complex *z__, integer + *ldz, complex *rhs, real *rdsum, real *rdscal, integer *ipiv, integer + *jpiv); + +/* Subroutine */ int _starpu_clatps_(char *uplo, char *trans, char *diag, char * + normin, integer *n, complex *ap, complex *x, real *scale, real *cnorm, + integer *info); + +/* Subroutine */ int _starpu_clatrd_(char *uplo, integer *n, integer *nb, complex *a, + integer *lda, real *e, complex *tau, complex *w, integer *ldw); + +/* Subroutine */ int _starpu_clatrs_(char *uplo, char *trans, char *diag, char * + normin, integer *n, complex *a, integer *lda, complex *x, real *scale, + real *cnorm, integer *info); + +/* Subroutine */ int _starpu_clatrz_(integer *m, integer *n, integer *l, complex *a, + integer *lda, complex *tau, complex *work); + +/* Subroutine */ int _starpu_clatzm_(char *side, integer *m, integer *n, complex *v, + integer *incv, complex *tau, complex *c1, complex *c2, integer *ldc, + complex *work); + +/* Subroutine */ int _starpu_clauu2_(char *uplo, integer *n, complex *a, integer *lda, + integer *info); + +/* Subroutine */ int _starpu_clauum_(char *uplo, integer *n, complex *a, integer *lda, + integer *info); + +/* Subroutine */ int _starpu_cpbcon_(char *uplo, integer *n, integer *kd, complex *ab, + integer *ldab, real *anorm, real *rcond, complex *work, real *rwork, + integer *info); + +/* Subroutine */ int _starpu_cpbequ_(char *uplo, integer *n, integer *kd, complex *ab, + integer *ldab, real *s, real *scond, real *amax, integer *info); + +/* Subroutine */ int _starpu_cpbrfs_(char *uplo, integer *n, integer *kd, integer * + nrhs, complex *ab, integer *ldab, complex *afb, integer *ldafb, + complex *b, integer *ldb, complex *x, integer *ldx, real *ferr, real * + berr, complex *work, real *rwork, integer *info); + +/* Subroutine */ int _starpu_cpbstf_(char *uplo, integer *n, integer *kd, complex *ab, + integer *ldab, integer *info); + +/* Subroutine */ int _starpu_cpbsv_(char *uplo, integer *n, integer *kd, integer * + nrhs, complex *ab, integer *ldab, complex *b, integer *ldb, integer * + info); + +/* Subroutine */ int _starpu_cpbsvx_(char *fact, char *uplo, integer *n, integer *kd, + integer *nrhs, complex *ab, integer *ldab, complex *afb, integer * + ldafb, char *equed, real *s, complex *b, integer *ldb, complex *x, + integer *ldx, real *rcond, real *ferr, real *berr, complex *work, + real *rwork, integer *info); + +/* Subroutine */ int _starpu_cpbtf2_(char *uplo, integer *n, integer *kd, complex *ab, + integer *ldab, integer *info); + +/* Subroutine */ int _starpu_cpbtrf_(char *uplo, integer *n, integer *kd, complex *ab, + integer *ldab, integer *info); + +/* Subroutine */ int _starpu_cpbtrs_(char *uplo, integer *n, integer *kd, integer * + nrhs, complex *ab, integer *ldab, complex *b, integer *ldb, integer * + info); + +/* Subroutine */ int _starpu_cpftrf_(char *transr, char *uplo, integer *n, complex *a, + integer *info); + +/* Subroutine */ int _starpu_cpftri_(char *transr, char *uplo, integer *n, complex *a, + integer *info); + +/* Subroutine */ int _starpu_cpftrs_(char *transr, char *uplo, integer *n, integer * + nrhs, complex *a, complex *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_cpocon_(char *uplo, integer *n, complex *a, integer *lda, + real *anorm, real *rcond, complex *work, real *rwork, integer *info); + +/* Subroutine */ int _starpu_cpoequ_(integer *n, complex *a, integer *lda, real *s, + real *scond, real *amax, integer *info); + +/* Subroutine */ int _starpu_cpoequb_(integer *n, complex *a, integer *lda, real *s, + real *scond, real *amax, integer *info); + +/* Subroutine */ int _starpu_cporfs_(char *uplo, integer *n, integer *nrhs, complex * + a, integer *lda, complex *af, integer *ldaf, complex *b, integer *ldb, + complex *x, integer *ldx, real *ferr, real *berr, complex *work, + real *rwork, integer *info); + +/* Subroutine */ int _starpu_cporfsx_(char *uplo, char *equed, integer *n, integer * + nrhs, complex *a, integer *lda, complex *af, integer *ldaf, real *s, + complex *b, integer *ldb, complex *x, integer *ldx, real *rcond, real + *berr, integer *n_err_bnds__, real *err_bnds_norm__, real * + err_bnds_comp__, integer *nparams, real *params, complex *work, real * + rwork, integer *info); + +/* Subroutine */ int _starpu_cposv_(char *uplo, integer *n, integer *nrhs, complex *a, + integer *lda, complex *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_cposvx_(char *fact, char *uplo, integer *n, integer * + nrhs, complex *a, integer *lda, complex *af, integer *ldaf, char * + equed, real *s, complex *b, integer *ldb, complex *x, integer *ldx, + real *rcond, real *ferr, real *berr, complex *work, real *rwork, + integer *info); + +/* Subroutine */ int _starpu_cposvxx_(char *fact, char *uplo, integer *n, integer * + nrhs, complex *a, integer *lda, complex *af, integer *ldaf, char * + equed, real *s, complex *b, integer *ldb, complex *x, integer *ldx, + real *rcond, real *rpvgrw, real *berr, integer *n_err_bnds__, real * + err_bnds_norm__, real *err_bnds_comp__, integer *nparams, real * + params, complex *work, real *rwork, integer *info); + +/* Subroutine */ int _starpu_cpotf2_(char *uplo, integer *n, complex *a, integer *lda, + integer *info); + +/* Subroutine */ int _starpu_cpotrf_(char *uplo, integer *n, complex *a, integer *lda, + integer *info); + +/* Subroutine */ int _starpu_cpotri_(char *uplo, integer *n, complex *a, integer *lda, + integer *info); + +/* Subroutine */ int _starpu_cpotrs_(char *uplo, integer *n, integer *nrhs, complex * + a, integer *lda, complex *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_cppcon_(char *uplo, integer *n, complex *ap, real *anorm, + real *rcond, complex *work, real *rwork, integer *info); + +/* Subroutine */ int _starpu_cppequ_(char *uplo, integer *n, complex *ap, real *s, + real *scond, real *amax, integer *info); + +/* Subroutine */ int _starpu_cpprfs_(char *uplo, integer *n, integer *nrhs, complex * + ap, complex *afp, complex *b, integer *ldb, complex *x, integer *ldx, + real *ferr, real *berr, complex *work, real *rwork, integer *info); + +/* Subroutine */ int _starpu_cppsv_(char *uplo, integer *n, integer *nrhs, complex * + ap, complex *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_cppsvx_(char *fact, char *uplo, integer *n, integer * + nrhs, complex *ap, complex *afp, char *equed, real *s, complex *b, + integer *ldb, complex *x, integer *ldx, real *rcond, real *ferr, real + *berr, complex *work, real *rwork, integer *info); + +/* Subroutine */ int _starpu_cpptrf_(char *uplo, integer *n, complex *ap, integer * + info); + +/* Subroutine */ int _starpu_cpptri_(char *uplo, integer *n, complex *ap, integer * + info); + +/* Subroutine */ int _starpu_cpptrs_(char *uplo, integer *n, integer *nrhs, complex * + ap, complex *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_cpstf2_(char *uplo, integer *n, complex *a, integer *lda, + integer *piv, integer *rank, real *tol, real *work, integer *info); + +/* Subroutine */ int _starpu_cpstrf_(char *uplo, integer *n, complex *a, integer *lda, + integer *piv, integer *rank, real *tol, real *work, integer *info); + +/* Subroutine */ int _starpu_cptcon_(integer *n, real *d__, complex *e, real *anorm, + real *rcond, real *rwork, integer *info); + +/* Subroutine */ int _starpu_cpteqr_(char *compz, integer *n, real *d__, real *e, + complex *z__, integer *ldz, real *work, integer *info); + +/* Subroutine */ int _starpu_cptrfs_(char *uplo, integer *n, integer *nrhs, real *d__, + complex *e, real *df, complex *ef, complex *b, integer *ldb, complex + *x, integer *ldx, real *ferr, real *berr, complex *work, real *rwork, + integer *info); + +/* Subroutine */ int _starpu_cptsv_(integer *n, integer *nrhs, real *d__, complex *e, + complex *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_cptsvx_(char *fact, integer *n, integer *nrhs, real *d__, + complex *e, real *df, complex *ef, complex *b, integer *ldb, complex + *x, integer *ldx, real *rcond, real *ferr, real *berr, complex *work, + real *rwork, integer *info); + +/* Subroutine */ int _starpu_cpttrf_(integer *n, real *d__, complex *e, integer *info); + +/* Subroutine */ int _starpu_cpttrs_(char *uplo, integer *n, integer *nrhs, real *d__, + complex *e, complex *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_cptts2_(integer *iuplo, integer *n, integer *nrhs, real * + d__, complex *e, complex *b, integer *ldb); + +/* Subroutine */ int _starpu_crot_(integer *n, complex *cx, integer *incx, complex * + cy, integer *incy, real *c__, complex *s); + +/* Subroutine */ int _starpu_cspcon_(char *uplo, integer *n, complex *ap, integer * + ipiv, real *anorm, real *rcond, complex *work, integer *info); + +/* Subroutine */ int _starpu_cspmv_(char *uplo, integer *n, complex *alpha, complex * + ap, complex *x, integer *incx, complex *beta, complex *y, integer * + incy); + +/* Subroutine */ int _starpu_cspr_(char *uplo, integer *n, complex *alpha, complex *x, + integer *incx, complex *ap); + +/* Subroutine */ int _starpu_csprfs_(char *uplo, integer *n, integer *nrhs, complex * + ap, complex *afp, integer *ipiv, complex *b, integer *ldb, complex *x, + integer *ldx, real *ferr, real *berr, complex *work, real *rwork, + integer *info); + +/* Subroutine */ int _starpu_cspsv_(char *uplo, integer *n, integer *nrhs, complex * + ap, integer *ipiv, complex *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_cspsvx_(char *fact, char *uplo, integer *n, integer * + nrhs, complex *ap, complex *afp, integer *ipiv, complex *b, integer * + ldb, complex *x, integer *ldx, real *rcond, real *ferr, real *berr, + complex *work, real *rwork, integer *info); + +/* Subroutine */ int _starpu_csptrf_(char *uplo, integer *n, complex *ap, integer * + ipiv, integer *info); + +/* Subroutine */ int _starpu_csptri_(char *uplo, integer *n, complex *ap, integer * + ipiv, complex *work, integer *info); + +/* Subroutine */ int _starpu_csptrs_(char *uplo, integer *n, integer *nrhs, complex * + ap, integer *ipiv, complex *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu__starpu_csrscl_(integer *n, real *sa, complex *sx, integer *incx); + +/* Subroutine */ int _starpu_cstedc_(char *compz, integer *n, real *d__, real *e, + complex *z__, integer *ldz, complex *work, integer *lwork, real * + rwork, integer *lrwork, integer *iwork, integer *liwork, integer * + info); + +/* Subroutine */ int _starpu_cstegr_(char *jobz, char *range, integer *n, real *d__, + real *e, real *vl, real *vu, integer *il, integer *iu, real *abstol, + integer *m, real *w, complex *z__, integer *ldz, integer *isuppz, + real *work, integer *lwork, integer *iwork, integer *liwork, integer * + info); + +/* Subroutine */ int _starpu_cstein_(integer *n, real *d__, real *e, integer *m, real + *w, integer *iblock, integer *isplit, complex *z__, integer *ldz, + real *work, integer *iwork, integer *ifail, integer *info); + +/* Subroutine */ int _starpu_cstemr_(char *jobz, char *range, integer *n, real *d__, + real *e, real *vl, real *vu, integer *il, integer *iu, integer *m, + real *w, complex *z__, integer *ldz, integer *nzc, integer *isuppz, + logical *tryrac, real *work, integer *lwork, integer *iwork, integer * + liwork, integer *info); + +/* Subroutine */ int _starpu_csteqr_(char *compz, integer *n, real *d__, real *e, + complex *z__, integer *ldz, real *work, integer *info); + +/* Subroutine */ int _starpu_csycon_(char *uplo, integer *n, complex *a, integer *lda, + integer *ipiv, real *anorm, real *rcond, complex *work, integer * + info); + +/* Subroutine */ int _starpu_csyequb_(char *uplo, integer *n, complex *a, integer * + lda, real *s, real *scond, real *amax, complex *work, integer *info); + +/* Subroutine */ int _starpu_csymv_(char *uplo, integer *n, complex *alpha, complex * + a, integer *lda, complex *x, integer *incx, complex *beta, complex *y, + integer *incy); + +/* Subroutine */ int _starpu_csyr_(char *uplo, integer *n, complex *alpha, complex *x, + integer *incx, complex *a, integer *lda); + +/* Subroutine */ int _starpu_csyrfs_(char *uplo, integer *n, integer *nrhs, complex * + a, integer *lda, complex *af, integer *ldaf, integer *ipiv, complex * + b, integer *ldb, complex *x, integer *ldx, real *ferr, real *berr, + complex *work, real *rwork, integer *info); + +/* Subroutine */ int _starpu_csyrfsx_(char *uplo, char *equed, integer *n, integer * + nrhs, complex *a, integer *lda, complex *af, integer *ldaf, integer * + ipiv, real *s, complex *b, integer *ldb, complex *x, integer *ldx, + real *rcond, real *berr, integer *n_err_bnds__, real *err_bnds_norm__, + real *err_bnds_comp__, integer *nparams, real *params, complex *work, + real *rwork, integer *info); + +/* Subroutine */ int _starpu_csysv_(char *uplo, integer *n, integer *nrhs, complex *a, + integer *lda, integer *ipiv, complex *b, integer *ldb, complex *work, + integer *lwork, integer *info); + +/* Subroutine */ int _starpu_csysvx_(char *fact, char *uplo, integer *n, integer * + nrhs, complex *a, integer *lda, complex *af, integer *ldaf, integer * + ipiv, complex *b, integer *ldb, complex *x, integer *ldx, real *rcond, + real *ferr, real *berr, complex *work, integer *lwork, real *rwork, + integer *info); + +/* Subroutine */ int _starpu_csysvxx_(char *fact, char *uplo, integer *n, integer * + nrhs, complex *a, integer *lda, complex *af, integer *ldaf, integer * + ipiv, char *equed, real *s, complex *b, integer *ldb, complex *x, + integer *ldx, real *rcond, real *rpvgrw, real *berr, integer * + n_err_bnds__, real *err_bnds_norm__, real *err_bnds_comp__, integer * + nparams, real *params, complex *work, real *rwork, integer *info); + +/* Subroutine */ int _starpu_csytf2_(char *uplo, integer *n, complex *a, integer *lda, + integer *ipiv, integer *info); + +/* Subroutine */ int _starpu_csytrf_(char *uplo, integer *n, complex *a, integer *lda, + integer *ipiv, complex *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_csytri_(char *uplo, integer *n, complex *a, integer *lda, + integer *ipiv, complex *work, integer *info); + +/* Subroutine */ int _starpu_csytrs_(char *uplo, integer *n, integer *nrhs, complex * + a, integer *lda, integer *ipiv, complex *b, integer *ldb, integer * + info); + +/* Subroutine */ int _starpu_ctbcon_(char *norm, char *uplo, char *diag, integer *n, + integer *kd, complex *ab, integer *ldab, real *rcond, complex *work, + real *rwork, integer *info); + +/* Subroutine */ int _starpu_ctbrfs_(char *uplo, char *trans, char *diag, integer *n, + integer *kd, integer *nrhs, complex *ab, integer *ldab, complex *b, + integer *ldb, complex *x, integer *ldx, real *ferr, real *berr, + complex *work, real *rwork, integer *info); + +/* Subroutine */ int _starpu_ctbtrs_(char *uplo, char *trans, char *diag, integer *n, + integer *kd, integer *nrhs, complex *ab, integer *ldab, complex *b, + integer *ldb, integer *info); + +/* Subroutine */ int _starpu_ctfsm_(char *transr, char *side, char *uplo, char *trans, + char *diag, integer *m, integer *n, complex *alpha, complex *a, + complex *b, integer *ldb); + +/* Subroutine */ int _starpu_ctftri_(char *transr, char *uplo, char *diag, integer *n, + complex *a, integer *info); + +/* Subroutine */ int _starpu_ctfttp_(char *transr, char *uplo, integer *n, complex * + arf, complex *ap, integer *info); + +/* Subroutine */ int _starpu_ctfttr_(char *transr, char *uplo, integer *n, complex * + arf, complex *a, integer *lda, integer *info); + +/* Subroutine */ int _starpu_ctgevc_(char *side, char *howmny, logical *select, + integer *n, complex *s, integer *lds, complex *p, integer *ldp, + complex *vl, integer *ldvl, complex *vr, integer *ldvr, integer *mm, + integer *m, complex *work, real *rwork, integer *info); + +/* Subroutine */ int _starpu_ctgex2_(logical *wantq, logical *wantz, integer *n, + complex *a, integer *lda, complex *b, integer *ldb, complex *q, + integer *ldq, complex *z__, integer *ldz, integer *j1, integer *info); + +/* Subroutine */ int _starpu_ctgexc_(logical *wantq, logical *wantz, integer *n, + complex *a, integer *lda, complex *b, integer *ldb, complex *q, + integer *ldq, complex *z__, integer *ldz, integer *ifst, integer * + ilst, integer *info); + +/* Subroutine */ int _starpu_ctgsen_(integer *ijob, logical *wantq, logical *wantz, + logical *select, integer *n, complex *a, integer *lda, complex *b, + integer *ldb, complex *alpha, complex *beta, complex *q, integer *ldq, + complex *z__, integer *ldz, integer *m, real *pl, real *pr, real * + dif, complex *work, integer *lwork, integer *iwork, integer *liwork, + integer *info); + +/* Subroutine */ int _starpu_ctgsja_(char *jobu, char *jobv, char *jobq, integer *m, + integer *p, integer *n, integer *k, integer *l, complex *a, integer * + lda, complex *b, integer *ldb, real *tola, real *tolb, real *alpha, + real *beta, complex *u, integer *ldu, complex *v, integer *ldv, + complex *q, integer *ldq, complex *work, integer *ncycle, integer * + info); + +/* Subroutine */ int _starpu_ctgsna_(char *job, char *howmny, logical *select, + integer *n, complex *a, integer *lda, complex *b, integer *ldb, + complex *vl, integer *ldvl, complex *vr, integer *ldvr, real *s, real + *dif, integer *mm, integer *m, complex *work, integer *lwork, integer + *iwork, integer *info); + +/* Subroutine */ int _starpu_ctgsy2_(char *trans, integer *ijob, integer *m, integer * + n, complex *a, integer *lda, complex *b, integer *ldb, complex *c__, + integer *ldc, complex *d__, integer *ldd, complex *e, integer *lde, + complex *f, integer *ldf, real *scale, real *rdsum, real *rdscal, + integer *info); + +/* Subroutine */ int _starpu_ctgsyl_(char *trans, integer *ijob, integer *m, integer * + n, complex *a, integer *lda, complex *b, integer *ldb, complex *c__, + integer *ldc, complex *d__, integer *ldd, complex *e, integer *lde, + complex *f, integer *ldf, real *scale, real *dif, complex *work, + integer *lwork, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_ctpcon_(char *norm, char *uplo, char *diag, integer *n, + complex *ap, real *rcond, complex *work, real *rwork, integer *info); + +/* Subroutine */ int _starpu_ctprfs_(char *uplo, char *trans, char *diag, integer *n, + integer *nrhs, complex *ap, complex *b, integer *ldb, complex *x, + integer *ldx, real *ferr, real *berr, complex *work, real *rwork, + integer *info); + +/* Subroutine */ int _starpu_ctptri_(char *uplo, char *diag, integer *n, complex *ap, + integer *info); + +/* Subroutine */ int _starpu_ctptrs_(char *uplo, char *trans, char *diag, integer *n, + integer *nrhs, complex *ap, complex *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_ctpttf_(char *transr, char *uplo, integer *n, complex * + ap, complex *arf, integer *info); + +/* Subroutine */ int _starpu_ctpttr_(char *uplo, integer *n, complex *ap, complex *a, + integer *lda, integer *info); + +/* Subroutine */ int _starpu_ctrcon_(char *norm, char *uplo, char *diag, integer *n, + complex *a, integer *lda, real *rcond, complex *work, real *rwork, + integer *info); + +/* Subroutine */ int _starpu_ctrevc_(char *side, char *howmny, logical *select, + integer *n, complex *t, integer *ldt, complex *vl, integer *ldvl, + complex *vr, integer *ldvr, integer *mm, integer *m, complex *work, + real *rwork, integer *info); + +/* Subroutine */ int _starpu_ctrexc_(char *compq, integer *n, complex *t, integer * + ldt, complex *q, integer *ldq, integer *ifst, integer *ilst, integer * + info); + +/* Subroutine */ int _starpu_ctrrfs_(char *uplo, char *trans, char *diag, integer *n, + integer *nrhs, complex *a, integer *lda, complex *b, integer *ldb, + complex *x, integer *ldx, real *ferr, real *berr, complex *work, real + *rwork, integer *info); + +/* Subroutine */ int _starpu_ctrsen_(char *job, char *compq, logical *select, integer + *n, complex *t, integer *ldt, complex *q, integer *ldq, complex *w, + integer *m, real *s, real *sep, complex *work, integer *lwork, + integer *info); + +/* Subroutine */ int _starpu_ctrsna_(char *job, char *howmny, logical *select, + integer *n, complex *t, integer *ldt, complex *vl, integer *ldvl, + complex *vr, integer *ldvr, real *s, real *sep, integer *mm, integer * + m, complex *work, integer *ldwork, real *rwork, integer *info); + +/* Subroutine */ int _starpu_ctrsyl_(char *trana, char *tranb, integer *isgn, integer + *m, integer *n, complex *a, integer *lda, complex *b, integer *ldb, + complex *c__, integer *ldc, real *scale, integer *info); + +/* Subroutine */ int _starpu_ctrti2_(char *uplo, char *diag, integer *n, complex *a, + integer *lda, integer *info); + +/* Subroutine */ int _starpu_ctrtri_(char *uplo, char *diag, integer *n, complex *a, + integer *lda, integer *info); + +/* Subroutine */ int _starpu_ctrtrs_(char *uplo, char *trans, char *diag, integer *n, + integer *nrhs, complex *a, integer *lda, complex *b, integer *ldb, + integer *info); + +/* Subroutine */ int _starpu_ctrttf_(char *transr, char *uplo, integer *n, complex *a, + integer *lda, complex *arf, integer *info); + +/* Subroutine */ int _starpu_ctrttp_(char *uplo, integer *n, complex *a, integer *lda, + complex *ap, integer *info); + +/* Subroutine */ int _starpu_ctzrqf_(integer *m, integer *n, complex *a, integer *lda, + complex *tau, integer *info); + +/* Subroutine */ int _starpu_ctzrzf_(integer *m, integer *n, complex *a, integer *lda, + complex *tau, complex *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_cung2l_(integer *m, integer *n, integer *k, complex *a, + integer *lda, complex *tau, complex *work, integer *info); + +/* Subroutine */ int _starpu_cung2r_(integer *m, integer *n, integer *k, complex *a, + integer *lda, complex *tau, complex *work, integer *info); + +/* Subroutine */ int _starpu_cungbr_(char *vect, integer *m, integer *n, integer *k, + complex *a, integer *lda, complex *tau, complex *work, integer *lwork, + integer *info); + +/* Subroutine */ int _starpu_cunghr_(integer *n, integer *ilo, integer *ihi, complex * + a, integer *lda, complex *tau, complex *work, integer *lwork, integer + *info); + +/* Subroutine */ int _starpu_cungl2_(integer *m, integer *n, integer *k, complex *a, + integer *lda, complex *tau, complex *work, integer *info); + +/* Subroutine */ int _starpu_cunglq_(integer *m, integer *n, integer *k, complex *a, + integer *lda, complex *tau, complex *work, integer *lwork, integer * + info); + +/* Subroutine */ int _starpu_cungql_(integer *m, integer *n, integer *k, complex *a, + integer *lda, complex *tau, complex *work, integer *lwork, integer * + info); + +/* Subroutine */ int _starpu_cungqr_(integer *m, integer *n, integer *k, complex *a, + integer *lda, complex *tau, complex *work, integer *lwork, integer * + info); + +/* Subroutine */ int _starpu_cungr2_(integer *m, integer *n, integer *k, complex *a, + integer *lda, complex *tau, complex *work, integer *info); + +/* Subroutine */ int _starpu_cungrq_(integer *m, integer *n, integer *k, complex *a, + integer *lda, complex *tau, complex *work, integer *lwork, integer * + info); + +/* Subroutine */ int _starpu_cungtr_(char *uplo, integer *n, complex *a, integer *lda, + complex *tau, complex *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_cunm2l_(char *side, char *trans, integer *m, integer *n, + integer *k, complex *a, integer *lda, complex *tau, complex *c__, + integer *ldc, complex *work, integer *info); + +/* Subroutine */ int _starpu_cunm2r_(char *side, char *trans, integer *m, integer *n, + integer *k, complex *a, integer *lda, complex *tau, complex *c__, + integer *ldc, complex *work, integer *info); + +/* Subroutine */ int _starpu_cunmbr_(char *vect, char *side, char *trans, integer *m, + integer *n, integer *k, complex *a, integer *lda, complex *tau, + complex *c__, integer *ldc, complex *work, integer *lwork, integer * + info); + +/* Subroutine */ int _starpu_cunmhr_(char *side, char *trans, integer *m, integer *n, + integer *ilo, integer *ihi, complex *a, integer *lda, complex *tau, + complex *c__, integer *ldc, complex *work, integer *lwork, integer * + info); + +/* Subroutine */ int _starpu_cunml2_(char *side, char *trans, integer *m, integer *n, + integer *k, complex *a, integer *lda, complex *tau, complex *c__, + integer *ldc, complex *work, integer *info); + +/* Subroutine */ int _starpu_cunmlq_(char *side, char *trans, integer *m, integer *n, + integer *k, complex *a, integer *lda, complex *tau, complex *c__, + integer *ldc, complex *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_cunmql_(char *side, char *trans, integer *m, integer *n, + integer *k, complex *a, integer *lda, complex *tau, complex *c__, + integer *ldc, complex *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_cunmqr_(char *side, char *trans, integer *m, integer *n, + integer *k, complex *a, integer *lda, complex *tau, complex *c__, + integer *ldc, complex *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_cunmr2_(char *side, char *trans, integer *m, integer *n, + integer *k, complex *a, integer *lda, complex *tau, complex *c__, + integer *ldc, complex *work, integer *info); + +/* Subroutine */ int _starpu_cunmr3_(char *side, char *trans, integer *m, integer *n, + integer *k, integer *l, complex *a, integer *lda, complex *tau, + complex *c__, integer *ldc, complex *work, integer *info); + +/* Subroutine */ int _starpu_cunmrq_(char *side, char *trans, integer *m, integer *n, + integer *k, complex *a, integer *lda, complex *tau, complex *c__, + integer *ldc, complex *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_cunmrz_(char *side, char *trans, integer *m, integer *n, + integer *k, integer *l, complex *a, integer *lda, complex *tau, + complex *c__, integer *ldc, complex *work, integer *lwork, integer * + info); + +/* Subroutine */ int _starpu_cunmtr_(char *side, char *uplo, char *trans, integer *m, + integer *n, complex *a, integer *lda, complex *tau, complex *c__, + integer *ldc, complex *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_cupgtr_(char *uplo, integer *n, complex *ap, complex * + tau, complex *q, integer *ldq, complex *work, integer *info); + +/* Subroutine */ int _starpu_cupmtr_(char *side, char *uplo, char *trans, integer *m, + integer *n, complex *ap, complex *tau, complex *c__, integer *ldc, + complex *work, integer *info); + +/* Subroutine */ int _starpu_dbdsdc_(char *uplo, char *compq, integer *n, doublereal * + d__, doublereal *e, doublereal *u, integer *ldu, doublereal *vt, + integer *ldvt, doublereal *q, integer *iq, doublereal *work, integer * + iwork, integer *info); + +/* Subroutine */ int _starpu_dbdsqr_(char *uplo, integer *n, integer *ncvt, integer * + nru, integer *ncc, doublereal *d__, doublereal *e, doublereal *vt, + integer *ldvt, doublereal *u, integer *ldu, doublereal *c__, integer * + ldc, doublereal *work, integer *info); + +/* Subroutine */ int _starpu_ddisna_(char *job, integer *m, integer *n, doublereal * + d__, doublereal *sep, integer *info); + +/* Subroutine */ int _starpu_dgbbrd_(char *vect, integer *m, integer *n, integer *ncc, + integer *kl, integer *ku, doublereal *ab, integer *ldab, doublereal * + d__, doublereal *e, doublereal *q, integer *ldq, doublereal *pt, + integer *ldpt, doublereal *c__, integer *ldc, doublereal *work, + integer *info); + +/* Subroutine */ int _starpu_dgbcon_(char *norm, integer *n, integer *kl, integer *ku, + doublereal *ab, integer *ldab, integer *ipiv, doublereal *anorm, + doublereal *rcond, doublereal *work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_dgbequ_(integer *m, integer *n, integer *kl, integer *ku, + doublereal *ab, integer *ldab, doublereal *r__, doublereal *c__, + doublereal *rowcnd, doublereal *colcnd, doublereal *amax, integer * + info); + +/* Subroutine */ int _starpu_dgbequb_(integer *m, integer *n, integer *kl, integer * + ku, doublereal *ab, integer *ldab, doublereal *r__, doublereal *c__, + doublereal *rowcnd, doublereal *colcnd, doublereal *amax, integer * + info); + +/* Subroutine */ int _starpu_dgbrfs_(char *trans, integer *n, integer *kl, integer * + ku, integer *nrhs, doublereal *ab, integer *ldab, doublereal *afb, + integer *ldafb, integer *ipiv, doublereal *b, integer *ldb, + doublereal *x, integer *ldx, doublereal *ferr, doublereal *berr, + doublereal *work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_dgbrfsx_(char *trans, char *equed, integer *n, integer * + kl, integer *ku, integer *nrhs, doublereal *ab, integer *ldab, + doublereal *afb, integer *ldafb, integer *ipiv, doublereal *r__, + doublereal *c__, doublereal *b, integer *ldb, doublereal *x, integer * + ldx, doublereal *rcond, doublereal *berr, integer *n_err_bnds__, + doublereal *err_bnds_norm__, doublereal *err_bnds_comp__, integer * + nparams, doublereal *params, doublereal *work, integer *iwork, + integer *info); + +/* Subroutine */ int _starpu_dgbsv_(integer *n, integer *kl, integer *ku, integer * + nrhs, doublereal *ab, integer *ldab, integer *ipiv, doublereal *b, + integer *ldb, integer *info); + +/* Subroutine */ int _starpu_dgbsvx_(char *fact, char *trans, integer *n, integer *kl, + integer *ku, integer *nrhs, doublereal *ab, integer *ldab, + doublereal *afb, integer *ldafb, integer *ipiv, char *equed, + doublereal *r__, doublereal *c__, doublereal *b, integer *ldb, + doublereal *x, integer *ldx, doublereal *rcond, doublereal *ferr, + doublereal *berr, doublereal *work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_dgbsvxx_(char *fact, char *trans, integer *n, integer * + kl, integer *ku, integer *nrhs, doublereal *ab, integer *ldab, + doublereal *afb, integer *ldafb, integer *ipiv, char *equed, + doublereal *r__, doublereal *c__, doublereal *b, integer *ldb, + doublereal *x, integer *ldx, doublereal *rcond, doublereal *rpvgrw, + doublereal *berr, integer *n_err_bnds__, doublereal *err_bnds_norm__, + doublereal *err_bnds_comp__, integer *nparams, doublereal *params, + doublereal *work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_dgbtf2_(integer *m, integer *n, integer *kl, integer *ku, + doublereal *ab, integer *ldab, integer *ipiv, integer *info); + +/* Subroutine */ int _starpu_dgbtrf_(integer *m, integer *n, integer *kl, integer *ku, + doublereal *ab, integer *ldab, integer *ipiv, integer *info); + +/* Subroutine */ int _starpu_dgbtrs_(char *trans, integer *n, integer *kl, integer * + ku, integer *nrhs, doublereal *ab, integer *ldab, integer *ipiv, + doublereal *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_dgebak_(char *job, char *side, integer *n, integer *ilo, + integer *ihi, doublereal *scale, integer *m, doublereal *v, integer * + ldv, integer *info); + +/* Subroutine */ int _starpu_dgebal_(char *job, integer *n, doublereal *a, integer * + lda, integer *ilo, integer *ihi, doublereal *scale, integer *info); + +/* Subroutine */ int _starpu_dgebd2_(integer *m, integer *n, doublereal *a, integer * + lda, doublereal *d__, doublereal *e, doublereal *tauq, doublereal * + taup, doublereal *work, integer *info); + +/* Subroutine */ int _starpu_dgebrd_(integer *m, integer *n, doublereal *a, integer * + lda, doublereal *d__, doublereal *e, doublereal *tauq, doublereal * + taup, doublereal *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_dgecon_(char *norm, integer *n, doublereal *a, integer * + lda, doublereal *anorm, doublereal *rcond, doublereal *work, integer * + iwork, integer *info); + +/* Subroutine */ int _starpu_dgeequ_(integer *m, integer *n, doublereal *a, integer * + lda, doublereal *r__, doublereal *c__, doublereal *rowcnd, doublereal + *colcnd, doublereal *amax, integer *info); + +/* Subroutine */ int _starpu_dgeequb_(integer *m, integer *n, doublereal *a, integer * + lda, doublereal *r__, doublereal *c__, doublereal *rowcnd, doublereal + *colcnd, doublereal *amax, integer *info); + +/* Subroutine */ int _starpu_dgees_(char *jobvs, char *sort, L_fp select, integer *n, + doublereal *a, integer *lda, integer *sdim, doublereal *wr, + doublereal *wi, doublereal *vs, integer *ldvs, doublereal *work, + integer *lwork, logical *bwork, integer *info); + +/* Subroutine */ int _starpu_dgeesx_(char *jobvs, char *sort, L_fp select, char * + sense, integer *n, doublereal *a, integer *lda, integer *sdim, + doublereal *wr, doublereal *wi, doublereal *vs, integer *ldvs, + doublereal *rconde, doublereal *rcondv, doublereal *work, integer * + lwork, integer *iwork, integer *liwork, logical *bwork, integer *info); + +/* Subroutine */ int _starpu_dgeev_(char *jobvl, char *jobvr, integer *n, doublereal * + a, integer *lda, doublereal *wr, doublereal *wi, doublereal *vl, + integer *ldvl, doublereal *vr, integer *ldvr, doublereal *work, + integer *lwork, integer *info); + +/* Subroutine */ int _starpu_dgeevx_(char *balanc, char *jobvl, char *jobvr, char * + sense, integer *n, doublereal *a, integer *lda, doublereal *wr, + doublereal *wi, doublereal *vl, integer *ldvl, doublereal *vr, + integer *ldvr, integer *ilo, integer *ihi, doublereal *scale, + doublereal *abnrm, doublereal *rconde, doublereal *rcondv, doublereal + *work, integer *lwork, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_dgegs_(char *jobvsl, char *jobvsr, integer *n, + doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal * + alphar, doublereal *alphai, doublereal *beta, doublereal *vsl, + integer *ldvsl, doublereal *vsr, integer *ldvsr, doublereal *work, + integer *lwork, integer *info); + +/* Subroutine */ int _starpu_dgegv_(char *jobvl, char *jobvr, integer *n, doublereal * + a, integer *lda, doublereal *b, integer *ldb, doublereal *alphar, + doublereal *alphai, doublereal *beta, doublereal *vl, integer *ldvl, + doublereal *vr, integer *ldvr, doublereal *work, integer *lwork, + integer *info); + +/* Subroutine */ int _starpu_dgehd2_(integer *n, integer *ilo, integer *ihi, + doublereal *a, integer *lda, doublereal *tau, doublereal *work, + integer *info); + +/* Subroutine */ int _starpu_dgehrd_(integer *n, integer *ilo, integer *ihi, + doublereal *a, integer *lda, doublereal *tau, doublereal *work, + integer *lwork, integer *info); + +/* Subroutine */ int _starpu_dgejsv_(char *joba, char *jobu, char *jobv, char *jobr, + char *jobt, char *jobp, integer *m, integer *n, doublereal *a, + integer *lda, doublereal *sva, doublereal *u, integer *ldu, + doublereal *v, integer *ldv, doublereal *work, integer *lwork, + integer *iwork, integer *info); + +/* Subroutine */ int _starpu_dgelq2_(integer *m, integer *n, doublereal *a, integer * + lda, doublereal *tau, doublereal *work, integer *info); + +/* Subroutine */ int _starpu_dgelqf_(integer *m, integer *n, doublereal *a, integer * + lda, doublereal *tau, doublereal *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_dgels_(char *trans, integer *m, integer *n, integer * + nrhs, doublereal *a, integer *lda, doublereal *b, integer *ldb, + doublereal *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_dgelsd_(integer *m, integer *n, integer *nrhs, + doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal * + s, doublereal *rcond, integer *rank, doublereal *work, integer *lwork, + integer *iwork, integer *info); + +/* Subroutine */ int _starpu_dgelss_(integer *m, integer *n, integer *nrhs, + doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal * + s, doublereal *rcond, integer *rank, doublereal *work, integer *lwork, + integer *info); + +/* Subroutine */ int _starpu_dgelsx_(integer *m, integer *n, integer *nrhs, + doublereal *a, integer *lda, doublereal *b, integer *ldb, integer * + jpvt, doublereal *rcond, integer *rank, doublereal *work, integer * + info); + +/* Subroutine */ int _starpu_dgelsy_(integer *m, integer *n, integer *nrhs, + doublereal *a, integer *lda, doublereal *b, integer *ldb, integer * + jpvt, doublereal *rcond, integer *rank, doublereal *work, integer * + lwork, integer *info); + +/* Subroutine */ int _starpu_dgeql2_(integer *m, integer *n, doublereal *a, integer * + lda, doublereal *tau, doublereal *work, integer *info); + +/* Subroutine */ int _starpu_dgeqlf_(integer *m, integer *n, doublereal *a, integer * + lda, doublereal *tau, doublereal *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_dgeqp3_(integer *m, integer *n, doublereal *a, integer * + lda, integer *jpvt, doublereal *tau, doublereal *work, integer *lwork, + integer *info); + +/* Subroutine */ int _starpu_dgeqpf_(integer *m, integer *n, doublereal *a, integer * + lda, integer *jpvt, doublereal *tau, doublereal *work, integer *info); + +/* Subroutine */ int _starpu_dgeqr2_(integer *m, integer *n, doublereal *a, integer * + lda, doublereal *tau, doublereal *work, integer *info); + +/* Subroutine */ int _starpu_dgeqrf_(integer *m, integer *n, doublereal *a, integer * + lda, doublereal *tau, doublereal *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_dgerfs_(char *trans, integer *n, integer *nrhs, + doublereal *a, integer *lda, doublereal *af, integer *ldaf, integer * + ipiv, doublereal *b, integer *ldb, doublereal *x, integer *ldx, + doublereal *ferr, doublereal *berr, doublereal *work, integer *iwork, + integer *info); + +/* Subroutine */ int _starpu_dgerfsx_(char *trans, char *equed, integer *n, integer * + nrhs, doublereal *a, integer *lda, doublereal *af, integer *ldaf, + integer *ipiv, doublereal *r__, doublereal *c__, doublereal *b, + integer *ldb, doublereal *x, integer *ldx, doublereal *rcond, + doublereal *berr, integer *n_err_bnds__, doublereal *err_bnds_norm__, + doublereal *err_bnds_comp__, integer *nparams, doublereal *params, + doublereal *work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_dgerq2_(integer *m, integer *n, doublereal *a, integer * + lda, doublereal *tau, doublereal *work, integer *info); + +/* Subroutine */ int _starpu_dgerqf_(integer *m, integer *n, doublereal *a, integer * + lda, doublereal *tau, doublereal *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_dgesc2_(integer *n, doublereal *a, integer *lda, + doublereal *rhs, integer *ipiv, integer *jpiv, doublereal *scale); + +/* Subroutine */ int _starpu_dgesdd_(char *jobz, integer *m, integer *n, doublereal * + a, integer *lda, doublereal *s, doublereal *u, integer *ldu, + doublereal *vt, integer *ldvt, doublereal *work, integer *lwork, + integer *iwork, integer *info); + +/* Subroutine */ int _starpu_dgesv_(integer *n, integer *nrhs, doublereal *a, integer + *lda, integer *ipiv, doublereal *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_dgesvd_(char *jobu, char *jobvt, integer *m, integer *n, + doublereal *a, integer *lda, doublereal *s, doublereal *u, integer * + ldu, doublereal *vt, integer *ldvt, doublereal *work, integer *lwork, + integer *info); + +/* Subroutine */ int _starpu_dgesvj_(char *joba, char *jobu, char *jobv, integer *m, + integer *n, doublereal *a, integer *lda, doublereal *sva, integer *mv, + doublereal *v, integer *ldv, doublereal *work, integer *lwork, + integer *info); + +/* Subroutine */ int _starpu_dgesvx_(char *fact, char *trans, integer *n, integer * + nrhs, doublereal *a, integer *lda, doublereal *af, integer *ldaf, + integer *ipiv, char *equed, doublereal *r__, doublereal *c__, + doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal * + rcond, doublereal *ferr, doublereal *berr, doublereal *work, integer * + iwork, integer *info); + +/* Subroutine */ int _starpu_dgesvxx_(char *fact, char *trans, integer *n, integer * + nrhs, doublereal *a, integer *lda, doublereal *af, integer *ldaf, + integer *ipiv, char *equed, doublereal *r__, doublereal *c__, + doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal * + rcond, doublereal *rpvgrw, doublereal *berr, integer *n_err_bnds__, + doublereal *err_bnds_norm__, doublereal *err_bnds_comp__, integer * + nparams, doublereal *params, doublereal *work, integer *iwork, + integer *info); + +/* Subroutine */ int _starpu_dgetc2_(integer *n, doublereal *a, integer *lda, integer + *ipiv, integer *jpiv, integer *info); + +/* Subroutine */ int _starpu_dgetf2_(integer *m, integer *n, doublereal *a, integer * + lda, integer *ipiv, integer *info); + +/* Subroutine */ int _starpu_dgetrf_(integer *m, integer *n, doublereal *a, integer * + lda, integer *ipiv, integer *info); + +/* Subroutine */ int _starpu_dgetri_(integer *n, doublereal *a, integer *lda, integer + *ipiv, doublereal *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_dgetrs_(char *trans, integer *n, integer *nrhs, + doublereal *a, integer *lda, integer *ipiv, doublereal *b, integer * + ldb, integer *info); + +/* Subroutine */ int _starpu_dggbak_(char *job, char *side, integer *n, integer *ilo, + integer *ihi, doublereal *lscale, doublereal *rscale, integer *m, + doublereal *v, integer *ldv, integer *info); + +/* Subroutine */ int _starpu_dggbal_(char *job, integer *n, doublereal *a, integer * + lda, doublereal *b, integer *ldb, integer *ilo, integer *ihi, + doublereal *lscale, doublereal *rscale, doublereal *work, integer * + info); + +/* Subroutine */ int _starpu_dgges_(char *jobvsl, char *jobvsr, char *sort, L_fp + selctg, integer *n, doublereal *a, integer *lda, doublereal *b, + integer *ldb, integer *sdim, doublereal *alphar, doublereal *alphai, + doublereal *beta, doublereal *vsl, integer *ldvsl, doublereal *vsr, + integer *ldvsr, doublereal *work, integer *lwork, logical *bwork, + integer *info); + +/* Subroutine */ int _starpu_dggesx_(char *jobvsl, char *jobvsr, char *sort, L_fp + selctg, char *sense, integer *n, doublereal *a, integer *lda, + doublereal *b, integer *ldb, integer *sdim, doublereal *alphar, + doublereal *alphai, doublereal *beta, doublereal *vsl, integer *ldvsl, + doublereal *vsr, integer *ldvsr, doublereal *rconde, doublereal * + rcondv, doublereal *work, integer *lwork, integer *iwork, integer * + liwork, logical *bwork, integer *info); + +/* Subroutine */ int _starpu_dggev_(char *jobvl, char *jobvr, integer *n, doublereal * + a, integer *lda, doublereal *b, integer *ldb, doublereal *alphar, + doublereal *alphai, doublereal *beta, doublereal *vl, integer *ldvl, + doublereal *vr, integer *ldvr, doublereal *work, integer *lwork, + integer *info); + +/* Subroutine */ int _starpu_dggevx_(char *balanc, char *jobvl, char *jobvr, char * + sense, integer *n, doublereal *a, integer *lda, doublereal *b, + integer *ldb, doublereal *alphar, doublereal *alphai, doublereal * + beta, doublereal *vl, integer *ldvl, doublereal *vr, integer *ldvr, + integer *ilo, integer *ihi, doublereal *lscale, doublereal *rscale, + doublereal *abnrm, doublereal *bbnrm, doublereal *rconde, doublereal * + rcondv, doublereal *work, integer *lwork, integer *iwork, logical * + bwork, integer *info); + +/* Subroutine */ int _starpu_dggglm_(integer *n, integer *m, integer *p, doublereal * + a, integer *lda, doublereal *b, integer *ldb, doublereal *d__, + doublereal *x, doublereal *y, doublereal *work, integer *lwork, + integer *info); + +/* Subroutine */ int _starpu_dgghrd_(char *compq, char *compz, integer *n, integer * + ilo, integer *ihi, doublereal *a, integer *lda, doublereal *b, + integer *ldb, doublereal *q, integer *ldq, doublereal *z__, integer * + ldz, integer *info); + +/* Subroutine */ int _starpu_dgglse_(integer *m, integer *n, integer *p, doublereal * + a, integer *lda, doublereal *b, integer *ldb, doublereal *c__, + doublereal *d__, doublereal *x, doublereal *work, integer *lwork, + integer *info); + +/* Subroutine */ int _starpu_dggqrf_(integer *n, integer *m, integer *p, doublereal * + a, integer *lda, doublereal *taua, doublereal *b, integer *ldb, + doublereal *taub, doublereal *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_dggrqf_(integer *m, integer *p, integer *n, doublereal * + a, integer *lda, doublereal *taua, doublereal *b, integer *ldb, + doublereal *taub, doublereal *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_dggsvd_(char *jobu, char *jobv, char *jobq, integer *m, + integer *n, integer *p, integer *k, integer *l, doublereal *a, + integer *lda, doublereal *b, integer *ldb, doublereal *alpha, + doublereal *beta, doublereal *u, integer *ldu, doublereal *v, integer + *ldv, doublereal *q, integer *ldq, doublereal *work, integer *iwork, + integer *info); + +/* Subroutine */ int _starpu_dggsvp_(char *jobu, char *jobv, char *jobq, integer *m, + integer *p, integer *n, doublereal *a, integer *lda, doublereal *b, + integer *ldb, doublereal *tola, doublereal *tolb, integer *k, integer + *l, doublereal *u, integer *ldu, doublereal *v, integer *ldv, + doublereal *q, integer *ldq, integer *iwork, doublereal *tau, + doublereal *work, integer *info); + +/* Subroutine */ int _starpu_dgsvj0_(char *jobv, integer *m, integer *n, doublereal * + a, integer *lda, doublereal *d__, doublereal *sva, integer *mv, + doublereal *v, integer *ldv, doublereal *eps, doublereal *sfmin, + doublereal *tol, integer *nsweep, doublereal *work, integer *lwork, + integer *info); + +/* Subroutine */ int _starpu_dgsvj1_(char *jobv, integer *m, integer *n, integer *n1, + doublereal *a, integer *lda, doublereal *d__, doublereal *sva, + integer *mv, doublereal *v, integer *ldv, doublereal *eps, doublereal + *sfmin, doublereal *tol, integer *nsweep, doublereal *work, integer * + lwork, integer *info); + +/* Subroutine */ int _starpu_dgtcon_(char *norm, integer *n, doublereal *dl, + doublereal *d__, doublereal *du, doublereal *du2, integer *ipiv, + doublereal *anorm, doublereal *rcond, doublereal *work, integer * + iwork, integer *info); + +/* Subroutine */ int _starpu_dgtrfs_(char *trans, integer *n, integer *nrhs, + doublereal *dl, doublereal *d__, doublereal *du, doublereal *dlf, + doublereal *df, doublereal *duf, doublereal *du2, integer *ipiv, + doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal * + ferr, doublereal *berr, doublereal *work, integer *iwork, integer * + info); + +/* Subroutine */ int _starpu_dgtsv_(integer *n, integer *nrhs, doublereal *dl, + doublereal *d__, doublereal *du, doublereal *b, integer *ldb, integer + *info); + +/* Subroutine */ int _starpu_dgtsvx_(char *fact, char *trans, integer *n, integer * + nrhs, doublereal *dl, doublereal *d__, doublereal *du, doublereal * + dlf, doublereal *df, doublereal *duf, doublereal *du2, integer *ipiv, + doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal * + rcond, doublereal *ferr, doublereal *berr, doublereal *work, integer * + iwork, integer *info); + +/* Subroutine */ int _starpu_dgttrf_(integer *n, doublereal *dl, doublereal *d__, + doublereal *du, doublereal *du2, integer *ipiv, integer *info); + +/* Subroutine */ int _starpu_dgttrs_(char *trans, integer *n, integer *nrhs, + doublereal *dl, doublereal *d__, doublereal *du, doublereal *du2, + integer *ipiv, doublereal *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_dgtts2_(integer *itrans, integer *n, integer *nrhs, + doublereal *dl, doublereal *d__, doublereal *du, doublereal *du2, + integer *ipiv, doublereal *b, integer *ldb); + +/* Subroutine */ int _starpu_dhgeqz_(char *job, char *compq, char *compz, integer *n, + integer *ilo, integer *ihi, doublereal *h__, integer *ldh, doublereal + *t, integer *ldt, doublereal *alphar, doublereal *alphai, doublereal * + beta, doublereal *q, integer *ldq, doublereal *z__, integer *ldz, + doublereal *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_dhsein_(char *side, char *eigsrc, char *initv, logical * + select, integer *n, doublereal *h__, integer *ldh, doublereal *wr, + doublereal *wi, doublereal *vl, integer *ldvl, doublereal *vr, + integer *ldvr, integer *mm, integer *m, doublereal *work, integer * + ifaill, integer *ifailr, integer *info); + +/* Subroutine */ int _starpu_dhseqr_(char *job, char *compz, integer *n, integer *ilo, + integer *ihi, doublereal *h__, integer *ldh, doublereal *wr, + doublereal *wi, doublereal *z__, integer *ldz, doublereal *work, + integer *lwork, integer *info); + +logical _starpu_disnan_(doublereal *din); + +/* Subroutine */ int _starpu_dla_gbamv__(integer *trans, integer *m, integer *n, + integer *kl, integer *ku, doublereal *alpha, doublereal *ab, integer * + ldab, doublereal *x, integer *incx, doublereal *beta, doublereal *y, + integer *incy); + +doublereal _starpu_dla_gbrcond__(char *trans, integer *n, integer *kl, integer *ku, + doublereal *ab, integer *ldab, doublereal *afb, integer *ldafb, + integer *ipiv, integer *cmode, doublereal *c__, integer *info, + doublereal *work, integer *iwork, ftnlen trans_len); + +/* Subroutine */ int _starpu_dla_gbrfsx_extended__(integer *prec_type__, integer * + trans_type__, integer *n, integer *kl, integer *ku, integer *nrhs, + doublereal *ab, integer *ldab, doublereal *afb, integer *ldafb, + integer *ipiv, logical *colequ, doublereal *c__, doublereal *b, + integer *ldb, doublereal *y, integer *ldy, doublereal *berr_out__, + integer *n_norms__, doublereal *errs_n__, doublereal *errs_c__, + doublereal *res, doublereal *ayb, doublereal *dy, doublereal * + y_tail__, doublereal *rcond, integer *ithresh, doublereal *rthresh, + doublereal *dz_ub__, logical *ignore_cwise__, integer *info); + +doublereal _starpu_dla_gbrpvgrw__(integer *n, integer *kl, integer *ku, integer * + ncols, doublereal *ab, integer *ldab, doublereal *afb, integer *ldafb); + +/* Subroutine */ int _starpu_dla_geamv__(integer *trans, integer *m, integer *n, + doublereal *alpha, doublereal *a, integer *lda, doublereal *x, + integer *incx, doublereal *beta, doublereal *y, integer *incy); + +doublereal _starpu_dla_gercond__(char *trans, integer *n, doublereal *a, integer *lda, + doublereal *af, integer *ldaf, integer *ipiv, integer *cmode, + doublereal *c__, integer *info, doublereal *work, integer *iwork, + ftnlen trans_len); + +/* Subroutine */ int _starpu_dla_gerfsx_extended__(integer *prec_type__, integer * + trans_type__, integer *n, integer *nrhs, doublereal *a, integer *lda, + doublereal *af, integer *ldaf, integer *ipiv, logical *colequ, + doublereal *c__, doublereal *b, integer *ldb, doublereal *y, integer * + ldy, doublereal *berr_out__, integer *n_norms__, doublereal *errs_n__, + doublereal *errs_c__, doublereal *res, doublereal *ayb, doublereal * + dy, doublereal *y_tail__, doublereal *rcond, integer *ithresh, + doublereal *rthresh, doublereal *dz_ub__, logical *ignore_cwise__, + integer *info); + +/* Subroutine */ int _starpu_dla_lin_berr__(integer *n, integer *nz, integer *nrhs, + doublereal *res, doublereal *ayb, doublereal *berr); + +doublereal _starpu_dla_porcond__(char *uplo, integer *n, doublereal *a, integer *lda, + doublereal *af, integer *ldaf, integer *cmode, doublereal *c__, + integer *info, doublereal *work, integer *iwork, ftnlen uplo_len); + +/* Subroutine */ int _starpu_dla_porfsx_extended__(integer *prec_type__, char *uplo, + integer *n, integer *nrhs, doublereal *a, integer *lda, doublereal * + af, integer *ldaf, logical *colequ, doublereal *c__, doublereal *b, + integer *ldb, doublereal *y, integer *ldy, doublereal *berr_out__, + integer *n_norms__, doublereal *errs_n__, doublereal *errs_c__, + doublereal *res, doublereal *ayb, doublereal *dy, doublereal * + y_tail__, doublereal *rcond, integer *ithresh, doublereal *rthresh, + doublereal *dz_ub__, logical *ignore_cwise__, integer *info, ftnlen + uplo_len); + +doublereal _starpu_dla_porpvgrw__(char *uplo, integer *ncols, doublereal *a, integer * + lda, doublereal *af, integer *ldaf, doublereal *work, ftnlen uplo_len); + +doublereal _starpu_dla_rpvgrw__(integer *n, integer *ncols, doublereal *a, integer * + lda, doublereal *af, integer *ldaf); + +/* Subroutine */ int _starpu_dla_syamv__(integer *uplo, integer *n, doublereal *alpha, + doublereal *a, integer *lda, doublereal *x, integer *incx, + doublereal *beta, doublereal *y, integer *incy); + +doublereal _starpu_dla_syrcond__(char *uplo, integer *n, doublereal *a, integer *lda, + doublereal *af, integer *ldaf, integer *ipiv, integer *cmode, + doublereal *c__, integer *info, doublereal *work, integer *iwork, + ftnlen uplo_len); + +/* Subroutine */ int _starpu_dla_syrfsx_extended__(integer *prec_type__, char *uplo, + integer *n, integer *nrhs, doublereal *a, integer *lda, doublereal * + af, integer *ldaf, integer *ipiv, logical *colequ, doublereal *c__, + doublereal *b, integer *ldb, doublereal *y, integer *ldy, doublereal * + berr_out__, integer *n_norms__, doublereal *errs_n__, doublereal * + errs_c__, doublereal *res, doublereal *ayb, doublereal *dy, + doublereal *y_tail__, doublereal *rcond, integer *ithresh, doublereal + *rthresh, doublereal *dz_ub__, logical *ignore_cwise__, integer *info, + ftnlen uplo_len); + +doublereal _starpu_dla_syrpvgrw__(char *uplo, integer *n, integer *info, doublereal * + a, integer *lda, doublereal *af, integer *ldaf, integer *ipiv, + doublereal *work, ftnlen uplo_len); + +/* Subroutine */ int _starpu_dla_wwaddw__(integer *n, doublereal *x, doublereal *y, + doublereal *w); + +/* Subroutine */ int _starpu_dlabad_(doublereal *small, doublereal *large); + +/* Subroutine */ int _starpu_dlabrd_(integer *m, integer *n, integer *nb, doublereal * + a, integer *lda, doublereal *d__, doublereal *e, doublereal *tauq, + doublereal *taup, doublereal *x, integer *ldx, doublereal *y, integer + *ldy); + +/* Subroutine */ int _starpu_dlacn2_(integer *n, doublereal *v, doublereal *x, + integer *isgn, doublereal *est, integer *kase, integer *isave); + +/* Subroutine */ int _starpu_dlacon_(integer *n, doublereal *v, doublereal *x, + integer *isgn, doublereal *est, integer *kase); + +/* Subroutine */ int _starpu_dlacpy_(char *uplo, integer *m, integer *n, doublereal * + a, integer *lda, doublereal *b, integer *ldb); + +/* Subroutine */ int _starpu_dladiv_(doublereal *a, doublereal *b, doublereal *c__, + doublereal *d__, doublereal *p, doublereal *q); + +/* Subroutine */ int _starpu_dlae2_(doublereal *a, doublereal *b, doublereal *c__, + doublereal *rt1, doublereal *rt2); + +/* Subroutine */ int _starpu_dlaebz_(integer *ijob, integer *nitmax, integer *n, + integer *mmax, integer *minp, integer *nbmin, doublereal *abstol, + doublereal *reltol, doublereal *pivmin, doublereal *d__, doublereal * + e, doublereal *e2, integer *nval, doublereal *ab, doublereal *c__, + integer *mout, integer *nab, doublereal *work, integer *iwork, + integer *info); + +/* Subroutine */ int _starpu_dlaed0_(integer *icompq, integer *qsiz, integer *n, + doublereal *d__, doublereal *e, doublereal *q, integer *ldq, + doublereal *qstore, integer *ldqs, doublereal *work, integer *iwork, + integer *info); + +/* Subroutine */ int _starpu_dlaed1_(integer *n, doublereal *d__, doublereal *q, + integer *ldq, integer *indxq, doublereal *rho, integer *cutpnt, + doublereal *work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_dlaed2_(integer *k, integer *n, integer *n1, doublereal * + d__, doublereal *q, integer *ldq, integer *indxq, doublereal *rho, + doublereal *z__, doublereal *dlamda, doublereal *w, doublereal *q2, + integer *indx, integer *indxc, integer *indxp, integer *coltyp, + integer *info); + +/* Subroutine */ int _starpu_dlaed3_(integer *k, integer *n, integer *n1, doublereal * + d__, doublereal *q, integer *ldq, doublereal *rho, doublereal *dlamda, + doublereal *q2, integer *indx, integer *ctot, doublereal *w, + doublereal *s, integer *info); + +/* Subroutine */ int _starpu_dlaed4_(integer *n, integer *i__, doublereal *d__, + doublereal *z__, doublereal *delta, doublereal *rho, doublereal *dlam, + integer *info); + +/* Subroutine */ int _starpu_dlaed5_(integer *i__, doublereal *d__, doublereal *z__, + doublereal *delta, doublereal *rho, doublereal *dlam); + +/* Subroutine */ int _starpu_dlaed6_(integer *kniter, logical *orgati, doublereal * + rho, doublereal *d__, doublereal *z__, doublereal *finit, doublereal * + tau, integer *info); + +/* Subroutine */ int _starpu_dlaed7_(integer *icompq, integer *n, integer *qsiz, + integer *tlvls, integer *curlvl, integer *curpbm, doublereal *d__, + doublereal *q, integer *ldq, integer *indxq, doublereal *rho, integer + *cutpnt, doublereal *qstore, integer *qptr, integer *prmptr, integer * + perm, integer *givptr, integer *givcol, doublereal *givnum, + doublereal *work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_dlaed8_(integer *icompq, integer *k, integer *n, integer + *qsiz, doublereal *d__, doublereal *q, integer *ldq, integer *indxq, + doublereal *rho, integer *cutpnt, doublereal *z__, doublereal *dlamda, + doublereal *q2, integer *ldq2, doublereal *w, integer *perm, integer + *givptr, integer *givcol, doublereal *givnum, integer *indxp, integer + *indx, integer *info); + +/* Subroutine */ int _starpu_dlaed9_(integer *k, integer *kstart, integer *kstop, + integer *n, doublereal *d__, doublereal *q, integer *ldq, doublereal * + rho, doublereal *dlamda, doublereal *w, doublereal *s, integer *lds, + integer *info); + +/* Subroutine */ int _starpu_dlaeda_(integer *n, integer *tlvls, integer *curlvl, + integer *curpbm, integer *prmptr, integer *perm, integer *givptr, + integer *givcol, doublereal *givnum, doublereal *q, integer *qptr, + doublereal *z__, doublereal *ztemp, integer *info); + +/* Subroutine */ int _starpu_dlaein_(logical *rightv, logical *noinit, integer *n, + doublereal *h__, integer *ldh, doublereal *wr, doublereal *wi, + doublereal *vr, doublereal *vi, doublereal *b, integer *ldb, + doublereal *work, doublereal *eps3, doublereal *smlnum, doublereal * + bignum, integer *info); + +/* Subroutine */ int _starpu_dlaev2_(doublereal *a, doublereal *b, doublereal *c__, + doublereal *rt1, doublereal *rt2, doublereal *cs1, doublereal *sn1); + +/* Subroutine */ int _starpu_dlaexc_(logical *wantq, integer *n, doublereal *t, + integer *ldt, doublereal *q, integer *ldq, integer *j1, integer *n1, + integer *n2, doublereal *work, integer *info); + +/* Subroutine */ int _starpu_dlag2_(doublereal *a, integer *lda, doublereal *b, + integer *ldb, doublereal *safmin, doublereal *scale1, doublereal * + scale2, doublereal *wr1, doublereal *wr2, doublereal *wi); + +/* Subroutine */ int _starpu_dlag2s_(integer *m, integer *n, doublereal *a, integer * + lda, real *sa, integer *ldsa, integer *info); + +/* Subroutine */ int _starpu_dlags2_(logical *upper, doublereal *a1, doublereal *a2, + doublereal *a3, doublereal *b1, doublereal *b2, doublereal *b3, + doublereal *csu, doublereal *snu, doublereal *csv, doublereal *snv, + doublereal *csq, doublereal *snq); + +/* Subroutine */ int _starpu_dlagtf_(integer *n, doublereal *a, doublereal *lambda, + doublereal *b, doublereal *c__, doublereal *tol, doublereal *d__, + integer *in, integer *info); + +/* Subroutine */ int _starpu_dlagtm_(char *trans, integer *n, integer *nrhs, + doublereal *alpha, doublereal *dl, doublereal *d__, doublereal *du, + doublereal *x, integer *ldx, doublereal *beta, doublereal *b, integer + *ldb); + +/* Subroutine */ int _starpu_dlagts_(integer *job, integer *n, doublereal *a, + doublereal *b, doublereal *c__, doublereal *d__, integer *in, + doublereal *y, doublereal *tol, integer *info); + +/* Subroutine */ int _starpu_dlagv2_(doublereal *a, integer *lda, doublereal *b, + integer *ldb, doublereal *alphar, doublereal *alphai, doublereal * + beta, doublereal *csl, doublereal *snl, doublereal *csr, doublereal * + snr); + +/* Subroutine */ int _starpu_dlahqr_(logical *wantt, logical *wantz, integer *n, + integer *ilo, integer *ihi, doublereal *h__, integer *ldh, doublereal + *wr, doublereal *wi, integer *iloz, integer *ihiz, doublereal *z__, + integer *ldz, integer *info); + +/* Subroutine */ int _starpu_dlahr2_(integer *n, integer *k, integer *nb, doublereal * + a, integer *lda, doublereal *tau, doublereal *t, integer *ldt, + doublereal *y, integer *ldy); + +/* Subroutine */ int _starpu_dlahrd_(integer *n, integer *k, integer *nb, doublereal * + a, integer *lda, doublereal *tau, doublereal *t, integer *ldt, + doublereal *y, integer *ldy); + +/* Subroutine */ int _starpu_dlaic1_(integer *job, integer *j, doublereal *x, + doublereal *sest, doublereal *w, doublereal *gamma, doublereal * + sestpr, doublereal *s, doublereal *c__); + +logical _starpu_dlaisnan_(doublereal *din1, doublereal *din2); + +/* Subroutine */ int _starpu_dlaln2_(logical *ltrans, integer *na, integer *nw, + doublereal *smin, doublereal *ca, doublereal *a, integer *lda, + doublereal *d1, doublereal *d2, doublereal *b, integer *ldb, + doublereal *wr, doublereal *wi, doublereal *x, integer *ldx, + doublereal *scale, doublereal *xnorm, integer *info); + +/* Subroutine */ int _starpu_dlals0_(integer *icompq, integer *nl, integer *nr, + integer *sqre, integer *nrhs, doublereal *b, integer *ldb, doublereal + *bx, integer *ldbx, integer *perm, integer *givptr, integer *givcol, + integer *ldgcol, doublereal *givnum, integer *ldgnum, doublereal * + poles, doublereal *difl, doublereal *difr, doublereal *z__, integer * + k, doublereal *c__, doublereal *s, doublereal *work, integer *info); + +/* Subroutine */ int _starpu_dlalsa_(integer *icompq, integer *smlsiz, integer *n, + integer *nrhs, doublereal *b, integer *ldb, doublereal *bx, integer * + ldbx, doublereal *u, integer *ldu, doublereal *vt, integer *k, + doublereal *difl, doublereal *difr, doublereal *z__, doublereal * + poles, integer *givptr, integer *givcol, integer *ldgcol, integer * + perm, doublereal *givnum, doublereal *c__, doublereal *s, doublereal * + work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_dlalsd_(char *uplo, integer *smlsiz, integer *n, integer + *nrhs, doublereal *d__, doublereal *e, doublereal *b, integer *ldb, + doublereal *rcond, integer *rank, doublereal *work, integer *iwork, + integer *info); + +/* Subroutine */ int _starpu_dlamrg_(integer *n1, integer *n2, doublereal *a, integer + *dtrd1, integer *dtrd2, integer *index); + +integer _starpu_dlaneg_(integer *n, doublereal *d__, doublereal *lld, doublereal * + sigma, doublereal *pivmin, integer *r__); + +doublereal _starpu_dlangb_(char *norm, integer *n, integer *kl, integer *ku, + doublereal *ab, integer *ldab, doublereal *work); + +doublereal _starpu_dlange_(char *norm, integer *m, integer *n, doublereal *a, integer + *lda, doublereal *work); + +doublereal _starpu_dlangt_(char *norm, integer *n, doublereal *dl, doublereal *d__, + doublereal *du); + +doublereal _starpu_dlanhs_(char *norm, integer *n, doublereal *a, integer *lda, + doublereal *work); + +doublereal _starpu_dlansb_(char *norm, char *uplo, integer *n, integer *k, doublereal + *ab, integer *ldab, doublereal *work); + +doublereal _starpu_dlansf_(char *norm, char *transr, char *uplo, integer *n, + doublereal *a, doublereal *work); + +doublereal _starpu_dlansp_(char *norm, char *uplo, integer *n, doublereal *ap, + doublereal *work); + +doublereal _starpu_dlanst_(char *norm, integer *n, doublereal *d__, doublereal *e); + +doublereal _starpu_dlansy_(char *norm, char *uplo, integer *n, doublereal *a, integer + *lda, doublereal *work); + +doublereal _starpu_dlantb_(char *norm, char *uplo, char *diag, integer *n, integer *k, + doublereal *ab, integer *ldab, doublereal *work); + +doublereal _starpu_dlantp_(char *norm, char *uplo, char *diag, integer *n, doublereal + *ap, doublereal *work); + +doublereal _starpu_dlantr_(char *norm, char *uplo, char *diag, integer *m, integer *n, + doublereal *a, integer *lda, doublereal *work); + +/* Subroutine */ int _starpu_dlanv2_(doublereal *a, doublereal *b, doublereal *c__, + doublereal *d__, doublereal *rt1r, doublereal *rt1i, doublereal *rt2r, + doublereal *rt2i, doublereal *cs, doublereal *sn); + +/* Subroutine */ int _starpu_dlapll_(integer *n, doublereal *x, integer *incx, + doublereal *y, integer *incy, doublereal *ssmin); + +/* Subroutine */ int _starpu_dlapmt_(logical *forwrd, integer *m, integer *n, + doublereal *x, integer *ldx, integer *k); + +doublereal _starpu_dlapy2_(doublereal *x, doublereal *y); + +doublereal _starpu_dlapy3_(doublereal *x, doublereal *y, doublereal *z__); + +/* Subroutine */ int _starpu_dlaqgb_(integer *m, integer *n, integer *kl, integer *ku, + doublereal *ab, integer *ldab, doublereal *r__, doublereal *c__, + doublereal *rowcnd, doublereal *colcnd, doublereal *amax, char *equed); + +/* Subroutine */ int _starpu_dlaqge_(integer *m, integer *n, doublereal *a, integer * + lda, doublereal *r__, doublereal *c__, doublereal *rowcnd, doublereal + *colcnd, doublereal *amax, char *equed); + +/* Subroutine */ int _starpu_dlaqp2_(integer *m, integer *n, integer *offset, + doublereal *a, integer *lda, integer *jpvt, doublereal *tau, + doublereal *vn1, doublereal *vn2, doublereal *work); + +/* Subroutine */ int _starpu_dlaqps_(integer *m, integer *n, integer *offset, integer + *nb, integer *kb, doublereal *a, integer *lda, integer *jpvt, + doublereal *tau, doublereal *vn1, doublereal *vn2, doublereal *auxv, + doublereal *f, integer *ldf); + +/* Subroutine */ int _starpu_dlaqr0_(logical *wantt, logical *wantz, integer *n, + integer *ilo, integer *ihi, doublereal *h__, integer *ldh, doublereal + *wr, doublereal *wi, integer *iloz, integer *ihiz, doublereal *z__, + integer *ldz, doublereal *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_dlaqr1_(integer *n, doublereal *h__, integer *ldh, + doublereal *sr1, doublereal *si1, doublereal *sr2, doublereal *si2, + doublereal *v); + +/* Subroutine */ int _starpu_dlaqr2_(logical *wantt, logical *wantz, integer *n, + integer *ktop, integer *kbot, integer *nw, doublereal *h__, integer * + ldh, integer *iloz, integer *ihiz, doublereal *z__, integer *ldz, + integer *ns, integer *nd, doublereal *sr, doublereal *si, doublereal * + v, integer *ldv, integer *nh, doublereal *t, integer *ldt, integer * + nv, doublereal *wv, integer *ldwv, doublereal *work, integer *lwork); + +/* Subroutine */ int _starpu_dlaqr3_(logical *wantt, logical *wantz, integer *n, + integer *ktop, integer *kbot, integer *nw, doublereal *h__, integer * + ldh, integer *iloz, integer *ihiz, doublereal *z__, integer *ldz, + integer *ns, integer *nd, doublereal *sr, doublereal *si, doublereal * + v, integer *ldv, integer *nh, doublereal *t, integer *ldt, integer * + nv, doublereal *wv, integer *ldwv, doublereal *work, integer *lwork); + +/* Subroutine */ int _starpu_dlaqr4_(logical *wantt, logical *wantz, integer *n, + integer *ilo, integer *ihi, doublereal *h__, integer *ldh, doublereal + *wr, doublereal *wi, integer *iloz, integer *ihiz, doublereal *z__, + integer *ldz, doublereal *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_dlaqr5_(logical *wantt, logical *wantz, integer *kacc22, + integer *n, integer *ktop, integer *kbot, integer *nshfts, doublereal + *sr, doublereal *si, doublereal *h__, integer *ldh, integer *iloz, + integer *ihiz, doublereal *z__, integer *ldz, doublereal *v, integer * + ldv, doublereal *u, integer *ldu, integer *nv, doublereal *wv, + integer *ldwv, integer *nh, doublereal *wh, integer *ldwh); + +/* Subroutine */ int _starpu_dlaqsb_(char *uplo, integer *n, integer *kd, doublereal * + ab, integer *ldab, doublereal *s, doublereal *scond, doublereal *amax, + char *equed); + +/* Subroutine */ int _starpu_dlaqsp_(char *uplo, integer *n, doublereal *ap, + doublereal *s, doublereal *scond, doublereal *amax, char *equed); + +/* Subroutine */ int _starpu_dlaqsy_(char *uplo, integer *n, doublereal *a, integer * + lda, doublereal *s, doublereal *scond, doublereal *amax, char *equed); + +/* Subroutine */ int _starpu_dlaqtr_(logical *ltran, logical *lreal, integer *n, + doublereal *t, integer *ldt, doublereal *b, doublereal *w, doublereal + *scale, doublereal *x, doublereal *work, integer *info); + +/* Subroutine */ int _starpu_dlar1v_(integer *n, integer *b1, integer *bn, doublereal + *lambda, doublereal *d__, doublereal *l, doublereal *ld, doublereal * + lld, doublereal *pivmin, doublereal *gaptol, doublereal *z__, logical + *wantnc, integer *negcnt, doublereal *ztz, doublereal *mingma, + integer *r__, integer *isuppz, doublereal *nrminv, doublereal *resid, + doublereal *rqcorr, doublereal *work); + +/* Subroutine */ int _starpu_dlar2v_(integer *n, doublereal *x, doublereal *y, + doublereal *z__, integer *incx, doublereal *c__, doublereal *s, + integer *incc); + +/* Subroutine */ int _starpu_dlarf_(char *side, integer *m, integer *n, doublereal *v, + integer *incv, doublereal *tau, doublereal *c__, integer *ldc, + doublereal *work); + +/* Subroutine */ int _starpu_dlarfb_(char *side, char *trans, char *direct, char * + storev, integer *m, integer *n, integer *k, doublereal *v, integer * + ldv, doublereal *t, integer *ldt, doublereal *c__, integer *ldc, + doublereal *work, integer *ldwork); + +/* Subroutine */ int _starpu_dlarfg_(integer *n, doublereal *alpha, doublereal *x, + integer *incx, doublereal *tau); + +/* Subroutine */ int _starpu_dlarfp_(integer *n, doublereal *alpha, doublereal *x, + integer *incx, doublereal *tau); + +/* Subroutine */ int _starpu_dlarft_(char *direct, char *storev, integer *n, integer * + k, doublereal *v, integer *ldv, doublereal *tau, doublereal *t, + integer *ldt); + +/* Subroutine */ int _starpu_dlarfx_(char *side, integer *m, integer *n, doublereal * + v, doublereal *tau, doublereal *c__, integer *ldc, doublereal *work); + +/* Subroutine */ int _starpu_dlargv_(integer *n, doublereal *x, integer *incx, + doublereal *y, integer *incy, doublereal *c__, integer *incc); + +/* Subroutine */ int _starpu_dlarnv_(integer *idist, integer *iseed, integer *n, + doublereal *x); + +/* Subroutine */ int _starpu_dlarra_(integer *n, doublereal *d__, doublereal *e, + doublereal *e2, doublereal *spltol, doublereal *tnrm, integer *nsplit, + integer *isplit, integer *info); + +/* Subroutine */ int _starpu_dlarrb_(integer *n, doublereal *d__, doublereal *lld, + integer *ifirst, integer *ilast, doublereal *rtol1, doublereal *rtol2, + integer *offset, doublereal *w, doublereal *wgap, doublereal *werr, + doublereal *work, integer *iwork, doublereal *pivmin, doublereal * + spdiam, integer *twist, integer *info); + +/* Subroutine */ int _starpu_dlarrc_(char *jobt, integer *n, doublereal *vl, + doublereal *vu, doublereal *d__, doublereal *e, doublereal *pivmin, + integer *eigcnt, integer *lcnt, integer *rcnt, integer *info); + +/* Subroutine */ int _starpu_dlarrd_(char *range, char *order, integer *n, doublereal + *vl, doublereal *vu, integer *il, integer *iu, doublereal *gers, + doublereal *reltol, doublereal *d__, doublereal *e, doublereal *e2, + doublereal *pivmin, integer *nsplit, integer *isplit, integer *m, + doublereal *w, doublereal *werr, doublereal *wl, doublereal *wu, + integer *iblock, integer *indexw, doublereal *work, integer *iwork, + integer *info); + +/* Subroutine */ int _starpu_dlarre_(char *range, integer *n, doublereal *vl, + doublereal *vu, integer *il, integer *iu, doublereal *d__, doublereal + *e, doublereal *e2, doublereal *rtol1, doublereal *rtol2, doublereal * + spltol, integer *nsplit, integer *isplit, integer *m, doublereal *w, + doublereal *werr, doublereal *wgap, integer *iblock, integer *indexw, + doublereal *gers, doublereal *pivmin, doublereal *work, integer * + iwork, integer *info); + +/* Subroutine */ int _starpu_dlarrf_(integer *n, doublereal *d__, doublereal *l, + doublereal *ld, integer *clstrt, integer *clend, doublereal *w, + doublereal *wgap, doublereal *werr, doublereal *spdiam, doublereal * + clgapl, doublereal *clgapr, doublereal *pivmin, doublereal *sigma, + doublereal *dplus, doublereal *lplus, doublereal *work, integer *info); + +/* Subroutine */ int _starpu_dlarrj_(integer *n, doublereal *d__, doublereal *e2, + integer *ifirst, integer *ilast, doublereal *rtol, integer *offset, + doublereal *w, doublereal *werr, doublereal *work, integer *iwork, + doublereal *pivmin, doublereal *spdiam, integer *info); + +/* Subroutine */ int _starpu_dlarrk_(integer *n, integer *iw, doublereal *gl, + doublereal *gu, doublereal *d__, doublereal *e2, doublereal *pivmin, + doublereal *reltol, doublereal *w, doublereal *werr, integer *info); + +/* Subroutine */ int _starpu_dlarrr_(integer *n, doublereal *d__, doublereal *e, + integer *info); + +/* Subroutine */ int _starpu_dlarrv_(integer *n, doublereal *vl, doublereal *vu, + doublereal *d__, doublereal *l, doublereal *pivmin, integer *isplit, + integer *m, integer *dol, integer *dou, doublereal *minrgp, + doublereal *rtol1, doublereal *rtol2, doublereal *w, doublereal *werr, + doublereal *wgap, integer *iblock, integer *indexw, doublereal *gers, + doublereal *z__, integer *ldz, integer *isuppz, doublereal *work, + integer *iwork, integer *info); + +/* Subroutine */ int _starpu_dlarscl2_(integer *m, integer *n, doublereal *d__, + doublereal *x, integer *ldx); + +/* Subroutine */ int _starpu_dlartg_(doublereal *f, doublereal *g, doublereal *cs, + doublereal *sn, doublereal *r__); + +/* Subroutine */ int _starpu_dlartv_(integer *n, doublereal *x, integer *incx, + doublereal *y, integer *incy, doublereal *c__, doublereal *s, integer + *incc); + +/* Subroutine */ int _starpu_dlaruv_(integer *iseed, integer *n, doublereal *x); + +/* Subroutine */ int _starpu_dlarz_(char *side, integer *m, integer *n, integer *l, + doublereal *v, integer *incv, doublereal *tau, doublereal *c__, + integer *ldc, doublereal *work); + +/* Subroutine */ int _starpu_dlarzb_(char *side, char *trans, char *direct, char * + storev, integer *m, integer *n, integer *k, integer *l, doublereal *v, + integer *ldv, doublereal *t, integer *ldt, doublereal *c__, integer * + ldc, doublereal *work, integer *ldwork); + +/* Subroutine */ int _starpu_dlarzt_(char *direct, char *storev, integer *n, integer * + k, doublereal *v, integer *ldv, doublereal *tau, doublereal *t, + integer *ldt); + +/* Subroutine */ int _starpu_dlas2_(doublereal *f, doublereal *g, doublereal *h__, + doublereal *ssmin, doublereal *ssmax); + +/* Subroutine */ int _starpu_dlascl_(char *type__, integer *kl, integer *ku, + doublereal *cfrom, doublereal *cto, integer *m, integer *n, + doublereal *a, integer *lda, integer *info); + +/* Subroutine */ int _starpu_dlascl2_(integer *m, integer *n, doublereal *d__, + doublereal *x, integer *ldx); + +/* Subroutine */ int _starpu_dlasd0_(integer *n, integer *sqre, doublereal *d__, + doublereal *e, doublereal *u, integer *ldu, doublereal *vt, integer * + ldvt, integer *smlsiz, integer *iwork, doublereal *work, integer * + info); + +/* Subroutine */ int _starpu_dlasd1_(integer *nl, integer *nr, integer *sqre, + doublereal *d__, doublereal *alpha, doublereal *beta, doublereal *u, + integer *ldu, doublereal *vt, integer *ldvt, integer *idxq, integer * + iwork, doublereal *work, integer *info); + +/* Subroutine */ int _starpu_dlasd2_(integer *nl, integer *nr, integer *sqre, integer + *k, doublereal *d__, doublereal *z__, doublereal *alpha, doublereal * + beta, doublereal *u, integer *ldu, doublereal *vt, integer *ldvt, + doublereal *dsigma, doublereal *u2, integer *ldu2, doublereal *vt2, + integer *ldvt2, integer *idxp, integer *idx, integer *idxc, integer * + idxq, integer *coltyp, integer *info); + +/* Subroutine */ int _starpu_dlasd3_(integer *nl, integer *nr, integer *sqre, integer + *k, doublereal *d__, doublereal *q, integer *ldq, doublereal *dsigma, + doublereal *u, integer *ldu, doublereal *u2, integer *ldu2, + doublereal *vt, integer *ldvt, doublereal *vt2, integer *ldvt2, + integer *idxc, integer *ctot, doublereal *z__, integer *info); + +/* Subroutine */ int _starpu_dlasd4_(integer *n, integer *i__, doublereal *d__, + doublereal *z__, doublereal *delta, doublereal *rho, doublereal * + sigma, doublereal *work, integer *info); + +/* Subroutine */ int _starpu_dlasd5_(integer *i__, doublereal *d__, doublereal *z__, + doublereal *delta, doublereal *rho, doublereal *dsigma, doublereal * + work); + +/* Subroutine */ int _starpu_dlasd6_(integer *icompq, integer *nl, integer *nr, + integer *sqre, doublereal *d__, doublereal *vf, doublereal *vl, + doublereal *alpha, doublereal *beta, integer *idxq, integer *perm, + integer *givptr, integer *givcol, integer *ldgcol, doublereal *givnum, + integer *ldgnum, doublereal *poles, doublereal *difl, doublereal * + difr, doublereal *z__, integer *k, doublereal *c__, doublereal *s, + doublereal *work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_dlasd7_(integer *icompq, integer *nl, integer *nr, + integer *sqre, integer *k, doublereal *d__, doublereal *z__, + doublereal *zw, doublereal *vf, doublereal *vfw, doublereal *vl, + doublereal *vlw, doublereal *alpha, doublereal *beta, doublereal * + dsigma, integer *idx, integer *idxp, integer *idxq, integer *perm, + integer *givptr, integer *givcol, integer *ldgcol, doublereal *givnum, + integer *ldgnum, doublereal *c__, doublereal *s, integer *info); + +/* Subroutine */ int _starpu_dlasd8_(integer *icompq, integer *k, doublereal *d__, + doublereal *z__, doublereal *vf, doublereal *vl, doublereal *difl, + doublereal *difr, integer *lddifr, doublereal *dsigma, doublereal * + work, integer *info); + +/* Subroutine */ int _starpu_dlasda_(integer *icompq, integer *smlsiz, integer *n, + integer *sqre, doublereal *d__, doublereal *e, doublereal *u, integer + *ldu, doublereal *vt, integer *k, doublereal *difl, doublereal *difr, + doublereal *z__, doublereal *poles, integer *givptr, integer *givcol, + integer *ldgcol, integer *perm, doublereal *givnum, doublereal *c__, + doublereal *s, doublereal *work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_dlasdq_(char *uplo, integer *sqre, integer *n, integer * + ncvt, integer *nru, integer *ncc, doublereal *d__, doublereal *e, + doublereal *vt, integer *ldvt, doublereal *u, integer *ldu, + doublereal *c__, integer *ldc, doublereal *work, integer *info); + +/* Subroutine */ int _starpu_dlasdt_(integer *n, integer *lvl, integer *nd, integer * + inode, integer *ndiml, integer *ndimr, integer *msub); + +/* Subroutine */ int _starpu_dlaset_(char *uplo, integer *m, integer *n, doublereal * + alpha, doublereal *beta, doublereal *a, integer *lda); + +/* Subroutine */ int _starpu_dlasq1_(integer *n, doublereal *d__, doublereal *e, + doublereal *work, integer *info); + +/* Subroutine */ int _starpu_dlasq2_(integer *n, doublereal *z__, integer *info); + +/* Subroutine */ int _starpu_dlasq3_(integer *i0, integer *n0, doublereal *z__, + integer *pp, doublereal *dmin__, doublereal *sigma, doublereal *desig, + doublereal *qmax, integer *nfail, integer *iter, integer *ndiv, + logical *ieee, integer *ttype, doublereal *dmin1, doublereal *dmin2, + doublereal *dn, doublereal *dn1, doublereal *dn2, doublereal *g, + doublereal *tau); + +/* Subroutine */ int _starpu_dlasq4_(integer *i0, integer *n0, doublereal *z__, + integer *pp, integer *n0in, doublereal *dmin__, doublereal *dmin1, + doublereal *dmin2, doublereal *dn, doublereal *dn1, doublereal *dn2, + doublereal *tau, integer *ttype, doublereal *g); + +/* Subroutine */ int _starpu_dlasq5_(integer *i0, integer *n0, doublereal *z__, + integer *pp, doublereal *tau, doublereal *dmin__, doublereal *dmin1, + doublereal *dmin2, doublereal *dn, doublereal *dnm1, doublereal *dnm2, + logical *ieee); + +/* Subroutine */ int _starpu_dlasq6_(integer *i0, integer *n0, doublereal *z__, + integer *pp, doublereal *dmin__, doublereal *dmin1, doublereal *dmin2, + doublereal *dn, doublereal *dnm1, doublereal *dnm2); + +/* Subroutine */ int _starpu_dlasr_(char *side, char *pivot, char *direct, integer *m, + integer *n, doublereal *c__, doublereal *s, doublereal *a, integer * + lda); + +/* Subroutine */ int _starpu_dlasrt_(char *id, integer *n, doublereal *d__, integer * + info); + +/* Subroutine */ int _starpu_dlassq_(integer *n, doublereal *x, integer *incx, + doublereal *scale, doublereal *sumsq); + +/* Subroutine */ int _starpu_dlasv2_(doublereal *f, doublereal *g, doublereal *h__, + doublereal *ssmin, doublereal *ssmax, doublereal *snr, doublereal * + csr, doublereal *snl, doublereal *csl); + +/* Subroutine */ int _starpu_dlaswp_(integer *n, doublereal *a, integer *lda, integer + *k1, integer *k2, integer *ipiv, integer *incx); + +/* Subroutine */ int _starpu_dlasy2_(logical *ltranl, logical *ltranr, integer *isgn, + integer *n1, integer *n2, doublereal *tl, integer *ldtl, doublereal * + tr, integer *ldtr, doublereal *b, integer *ldb, doublereal *scale, + doublereal *x, integer *ldx, doublereal *xnorm, integer *info); + +/* Subroutine */ int _starpu_dlasyf_(char *uplo, integer *n, integer *nb, integer *kb, + doublereal *a, integer *lda, integer *ipiv, doublereal *w, integer * + ldw, integer *info); + +/* Subroutine */ int _starpu_dlat2s_(char *uplo, integer *n, doublereal *a, integer * + lda, real *sa, integer *ldsa, integer *info); + +/* Subroutine */ int _starpu_dlatbs_(char *uplo, char *trans, char *diag, char * + normin, integer *n, integer *kd, doublereal *ab, integer *ldab, + doublereal *x, doublereal *scale, doublereal *cnorm, integer *info); + +/* Subroutine */ int _starpu_dlatdf_(integer *ijob, integer *n, doublereal *z__, + integer *ldz, doublereal *rhs, doublereal *rdsum, doublereal *rdscal, + integer *ipiv, integer *jpiv); + +/* Subroutine */ int _starpu_dlatps_(char *uplo, char *trans, char *diag, char * + normin, integer *n, doublereal *ap, doublereal *x, doublereal *scale, + doublereal *cnorm, integer *info); + +/* Subroutine */ int _starpu_dlatrd_(char *uplo, integer *n, integer *nb, doublereal * + a, integer *lda, doublereal *e, doublereal *tau, doublereal *w, + integer *ldw); + +/* Subroutine */ int _starpu_dlatrs_(char *uplo, char *trans, char *diag, char * + normin, integer *n, doublereal *a, integer *lda, doublereal *x, + doublereal *scale, doublereal *cnorm, integer *info); + +/* Subroutine */ int _starpu_dlatrz_(integer *m, integer *n, integer *l, doublereal * + a, integer *lda, doublereal *tau, doublereal *work); + +/* Subroutine */ int _starpu_dlatzm_(char *side, integer *m, integer *n, doublereal * + v, integer *incv, doublereal *tau, doublereal *c1, doublereal *c2, + integer *ldc, doublereal *work); + +/* Subroutine */ int _starpu_dlauu2_(char *uplo, integer *n, doublereal *a, integer * + lda, integer *info); + +/* Subroutine */ int _starpu_dlauum_(char *uplo, integer *n, doublereal *a, integer * + lda, integer *info); + +/* Subroutine */ int _starpu_dopgtr_(char *uplo, integer *n, doublereal *ap, + doublereal *tau, doublereal *q, integer *ldq, doublereal *work, + integer *info); + +/* Subroutine */ int _starpu_dopmtr_(char *side, char *uplo, char *trans, integer *m, + integer *n, doublereal *ap, doublereal *tau, doublereal *c__, integer + *ldc, doublereal *work, integer *info); + +/* Subroutine */ int _starpu_dorg2l_(integer *m, integer *n, integer *k, doublereal * + a, integer *lda, doublereal *tau, doublereal *work, integer *info); + +/* Subroutine */ int _starpu_dorg2r_(integer *m, integer *n, integer *k, doublereal * + a, integer *lda, doublereal *tau, doublereal *work, integer *info); + +/* Subroutine */ int _starpu_dorgbr_(char *vect, integer *m, integer *n, integer *k, + doublereal *a, integer *lda, doublereal *tau, doublereal *work, + integer *lwork, integer *info); + +/* Subroutine */ int _starpu_dorghr_(integer *n, integer *ilo, integer *ihi, + doublereal *a, integer *lda, doublereal *tau, doublereal *work, + integer *lwork, integer *info); + +/* Subroutine */ int _starpu_dorgl2_(integer *m, integer *n, integer *k, doublereal * + a, integer *lda, doublereal *tau, doublereal *work, integer *info); + +/* Subroutine */ int _starpu_dorglq_(integer *m, integer *n, integer *k, doublereal * + a, integer *lda, doublereal *tau, doublereal *work, integer *lwork, + integer *info); + +/* Subroutine */ int _starpu_dorgql_(integer *m, integer *n, integer *k, doublereal * + a, integer *lda, doublereal *tau, doublereal *work, integer *lwork, + integer *info); + +/* Subroutine */ int _starpu_dorgqr_(integer *m, integer *n, integer *k, doublereal * + a, integer *lda, doublereal *tau, doublereal *work, integer *lwork, + integer *info); + +/* Subroutine */ int _starpu_dorgr2_(integer *m, integer *n, integer *k, doublereal * + a, integer *lda, doublereal *tau, doublereal *work, integer *info); + +/* Subroutine */ int _starpu_dorgrq_(integer *m, integer *n, integer *k, doublereal * + a, integer *lda, doublereal *tau, doublereal *work, integer *lwork, + integer *info); + +/* Subroutine */ int _starpu_dorgtr_(char *uplo, integer *n, doublereal *a, integer * + lda, doublereal *tau, doublereal *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_dorm2l_(char *side, char *trans, integer *m, integer *n, + integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal * + c__, integer *ldc, doublereal *work, integer *info); + +/* Subroutine */ int _starpu_dorm2r_(char *side, char *trans, integer *m, integer *n, + integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal * + c__, integer *ldc, doublereal *work, integer *info); + +/* Subroutine */ int _starpu_dormbr_(char *vect, char *side, char *trans, integer *m, + integer *n, integer *k, doublereal *a, integer *lda, doublereal *tau, + doublereal *c__, integer *ldc, doublereal *work, integer *lwork, + integer *info); + +/* Subroutine */ int _starpu_dormhr_(char *side, char *trans, integer *m, integer *n, + integer *ilo, integer *ihi, doublereal *a, integer *lda, doublereal * + tau, doublereal *c__, integer *ldc, doublereal *work, integer *lwork, + integer *info); + +/* Subroutine */ int _starpu_dorml2_(char *side, char *trans, integer *m, integer *n, + integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal * + c__, integer *ldc, doublereal *work, integer *info); + +/* Subroutine */ int _starpu_dormlq_(char *side, char *trans, integer *m, integer *n, + integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal * + c__, integer *ldc, doublereal *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_dormql_(char *side, char *trans, integer *m, integer *n, + integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal * + c__, integer *ldc, doublereal *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_dormqr_(char *side, char *trans, integer *m, integer *n, + integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal * + c__, integer *ldc, doublereal *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_dormr2_(char *side, char *trans, integer *m, integer *n, + integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal * + c__, integer *ldc, doublereal *work, integer *info); + +/* Subroutine */ int _starpu_dormr3_(char *side, char *trans, integer *m, integer *n, + integer *k, integer *l, doublereal *a, integer *lda, doublereal *tau, + doublereal *c__, integer *ldc, doublereal *work, integer *info); + +/* Subroutine */ int _starpu_dormrq_(char *side, char *trans, integer *m, integer *n, + integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal * + c__, integer *ldc, doublereal *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_dormrz_(char *side, char *trans, integer *m, integer *n, + integer *k, integer *l, doublereal *a, integer *lda, doublereal *tau, + doublereal *c__, integer *ldc, doublereal *work, integer *lwork, + integer *info); + +/* Subroutine */ int _starpu_dormtr_(char *side, char *uplo, char *trans, integer *m, + integer *n, doublereal *a, integer *lda, doublereal *tau, doublereal * + c__, integer *ldc, doublereal *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_dpbcon_(char *uplo, integer *n, integer *kd, doublereal * + ab, integer *ldab, doublereal *anorm, doublereal *rcond, doublereal * + work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_dpbequ_(char *uplo, integer *n, integer *kd, doublereal * + ab, integer *ldab, doublereal *s, doublereal *scond, doublereal *amax, + integer *info); + +/* Subroutine */ int _starpu_dpbrfs_(char *uplo, integer *n, integer *kd, integer * + nrhs, doublereal *ab, integer *ldab, doublereal *afb, integer *ldafb, + doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal * + ferr, doublereal *berr, doublereal *work, integer *iwork, integer * + info); + +/* Subroutine */ int _starpu_dpbstf_(char *uplo, integer *n, integer *kd, doublereal * + ab, integer *ldab, integer *info); + +/* Subroutine */ int _starpu_dpbsv_(char *uplo, integer *n, integer *kd, integer * + nrhs, doublereal *ab, integer *ldab, doublereal *b, integer *ldb, + integer *info); + +/* Subroutine */ int _starpu_dpbsvx_(char *fact, char *uplo, integer *n, integer *kd, + integer *nrhs, doublereal *ab, integer *ldab, doublereal *afb, + integer *ldafb, char *equed, doublereal *s, doublereal *b, integer * + ldb, doublereal *x, integer *ldx, doublereal *rcond, doublereal *ferr, + doublereal *berr, doublereal *work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_dpbtf2_(char *uplo, integer *n, integer *kd, doublereal * + ab, integer *ldab, integer *info); + +/* Subroutine */ int _starpu_dpbtrf_(char *uplo, integer *n, integer *kd, doublereal * + ab, integer *ldab, integer *info); + +/* Subroutine */ int _starpu_dpbtrs_(char *uplo, integer *n, integer *kd, integer * + nrhs, doublereal *ab, integer *ldab, doublereal *b, integer *ldb, + integer *info); + +/* Subroutine */ int _starpu_dpftrf_(char *transr, char *uplo, integer *n, doublereal + *a, integer *info); + +/* Subroutine */ int _starpu_dpftri_(char *transr, char *uplo, integer *n, doublereal + *a, integer *info); + +/* Subroutine */ int _starpu_dpftrs_(char *transr, char *uplo, integer *n, integer * + nrhs, doublereal *a, doublereal *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_dpocon_(char *uplo, integer *n, doublereal *a, integer * + lda, doublereal *anorm, doublereal *rcond, doublereal *work, integer * + iwork, integer *info); + +/* Subroutine */ int _starpu_dpoequ_(integer *n, doublereal *a, integer *lda, + doublereal *s, doublereal *scond, doublereal *amax, integer *info); + +/* Subroutine */ int _starpu_dpoequb_(integer *n, doublereal *a, integer *lda, + doublereal *s, doublereal *scond, doublereal *amax, integer *info); + +/* Subroutine */ int _starpu_dporfs_(char *uplo, integer *n, integer *nrhs, + doublereal *a, integer *lda, doublereal *af, integer *ldaf, + doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal * + ferr, doublereal *berr, doublereal *work, integer *iwork, integer * + info); + +/* Subroutine */ int _starpu_dporfsx_(char *uplo, char *equed, integer *n, integer * + nrhs, doublereal *a, integer *lda, doublereal *af, integer *ldaf, + doublereal *s, doublereal *b, integer *ldb, doublereal *x, integer * + ldx, doublereal *rcond, doublereal *berr, integer *n_err_bnds__, + doublereal *err_bnds_norm__, doublereal *err_bnds_comp__, integer * + nparams, doublereal *params, doublereal *work, integer *iwork, + integer *info); + +/* Subroutine */ int _starpu_dposv_(char *uplo, integer *n, integer *nrhs, doublereal + *a, integer *lda, doublereal *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_dposvx_(char *fact, char *uplo, integer *n, integer * + nrhs, doublereal *a, integer *lda, doublereal *af, integer *ldaf, + char *equed, doublereal *s, doublereal *b, integer *ldb, doublereal * + x, integer *ldx, doublereal *rcond, doublereal *ferr, doublereal * + berr, doublereal *work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_dposvxx_(char *fact, char *uplo, integer *n, integer * + nrhs, doublereal *a, integer *lda, doublereal *af, integer *ldaf, + char *equed, doublereal *s, doublereal *b, integer *ldb, doublereal * + x, integer *ldx, doublereal *rcond, doublereal *rpvgrw, doublereal * + berr, integer *n_err_bnds__, doublereal *err_bnds_norm__, doublereal * + err_bnds_comp__, integer *nparams, doublereal *params, doublereal * + work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_dpotf2_(char *uplo, integer *n, doublereal *a, integer * + lda, integer *info); + +/* Subroutine */ int _starpu_dpotrf_(char *uplo, integer *n, doublereal *a, integer * + lda, integer *info); + +/* Subroutine */ int _starpu_dpotri_(char *uplo, integer *n, doublereal *a, integer * + lda, integer *info); + +/* Subroutine */ int _starpu_dpotrs_(char *uplo, integer *n, integer *nrhs, + doublereal *a, integer *lda, doublereal *b, integer *ldb, integer * + info); + +/* Subroutine */ int _starpu_dppcon_(char *uplo, integer *n, doublereal *ap, + doublereal *anorm, doublereal *rcond, doublereal *work, integer * + iwork, integer *info); + +/* Subroutine */ int _starpu_dppequ_(char *uplo, integer *n, doublereal *ap, + doublereal *s, doublereal *scond, doublereal *amax, integer *info); + +/* Subroutine */ int _starpu_dpprfs_(char *uplo, integer *n, integer *nrhs, + doublereal *ap, doublereal *afp, doublereal *b, integer *ldb, + doublereal *x, integer *ldx, doublereal *ferr, doublereal *berr, + doublereal *work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_dppsv_(char *uplo, integer *n, integer *nrhs, doublereal + *ap, doublereal *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_dppsvx_(char *fact, char *uplo, integer *n, integer * + nrhs, doublereal *ap, doublereal *afp, char *equed, doublereal *s, + doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal * + rcond, doublereal *ferr, doublereal *berr, doublereal *work, integer * + iwork, integer *info); + +/* Subroutine */ int _starpu_dpptrf_(char *uplo, integer *n, doublereal *ap, integer * + info); + +/* Subroutine */ int _starpu_dpptri_(char *uplo, integer *n, doublereal *ap, integer * + info); + +/* Subroutine */ int _starpu_dpptrs_(char *uplo, integer *n, integer *nrhs, + doublereal *ap, doublereal *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_dpstf2_(char *uplo, integer *n, doublereal *a, integer * + lda, integer *piv, integer *rank, doublereal *tol, doublereal *work, + integer *info); + +/* Subroutine */ int _starpu_dpstrf_(char *uplo, integer *n, doublereal *a, integer * + lda, integer *piv, integer *rank, doublereal *tol, doublereal *work, + integer *info); + +/* Subroutine */ int _starpu_dptcon_(integer *n, doublereal *d__, doublereal *e, + doublereal *anorm, doublereal *rcond, doublereal *work, integer *info); + +/* Subroutine */ int _starpu_dpteqr_(char *compz, integer *n, doublereal *d__, + doublereal *e, doublereal *z__, integer *ldz, doublereal *work, + integer *info); + +/* Subroutine */ int _starpu_dptrfs_(integer *n, integer *nrhs, doublereal *d__, + doublereal *e, doublereal *df, doublereal *ef, doublereal *b, integer + *ldb, doublereal *x, integer *ldx, doublereal *ferr, doublereal *berr, + doublereal *work, integer *info); + +/* Subroutine */ int _starpu_dptsv_(integer *n, integer *nrhs, doublereal *d__, + doublereal *e, doublereal *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_dptsvx_(char *fact, integer *n, integer *nrhs, + doublereal *d__, doublereal *e, doublereal *df, doublereal *ef, + doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal * + rcond, doublereal *ferr, doublereal *berr, doublereal *work, integer * + info); + +/* Subroutine */ int _starpu_dpttrf_(integer *n, doublereal *d__, doublereal *e, + integer *info); + +/* Subroutine */ int _starpu_dpttrs_(integer *n, integer *nrhs, doublereal *d__, + doublereal *e, doublereal *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_dptts2_(integer *n, integer *nrhs, doublereal *d__, + doublereal *e, doublereal *b, integer *ldb); + +/* Subroutine */ int _starpu_drscl_(integer *n, doublereal *sa, doublereal *sx, + integer *incx); + +/* Subroutine */ int _starpu_dsbev_(char *jobz, char *uplo, integer *n, integer *kd, + doublereal *ab, integer *ldab, doublereal *w, doublereal *z__, + integer *ldz, doublereal *work, integer *info); + +/* Subroutine */ int _starpu_dsbevd_(char *jobz, char *uplo, integer *n, integer *kd, + doublereal *ab, integer *ldab, doublereal *w, doublereal *z__, + integer *ldz, doublereal *work, integer *lwork, integer *iwork, + integer *liwork, integer *info); + +/* Subroutine */ int _starpu_dsbevx_(char *jobz, char *range, char *uplo, integer *n, + integer *kd, doublereal *ab, integer *ldab, doublereal *q, integer * + ldq, doublereal *vl, doublereal *vu, integer *il, integer *iu, + doublereal *abstol, integer *m, doublereal *w, doublereal *z__, + integer *ldz, doublereal *work, integer *iwork, integer *ifail, + integer *info); + +/* Subroutine */ int _starpu_dsbgst_(char *vect, char *uplo, integer *n, integer *ka, + integer *kb, doublereal *ab, integer *ldab, doublereal *bb, integer * + ldbb, doublereal *x, integer *ldx, doublereal *work, integer *info); + +/* Subroutine */ int _starpu_dsbgv_(char *jobz, char *uplo, integer *n, integer *ka, + integer *kb, doublereal *ab, integer *ldab, doublereal *bb, integer * + ldbb, doublereal *w, doublereal *z__, integer *ldz, doublereal *work, + integer *info); + +/* Subroutine */ int _starpu_dsbgvd_(char *jobz, char *uplo, integer *n, integer *ka, + integer *kb, doublereal *ab, integer *ldab, doublereal *bb, integer * + ldbb, doublereal *w, doublereal *z__, integer *ldz, doublereal *work, + integer *lwork, integer *iwork, integer *liwork, integer *info); + +/* Subroutine */ int _starpu_dsbgvx_(char *jobz, char *range, char *uplo, integer *n, + integer *ka, integer *kb, doublereal *ab, integer *ldab, doublereal * + bb, integer *ldbb, doublereal *q, integer *ldq, doublereal *vl, + doublereal *vu, integer *il, integer *iu, doublereal *abstol, integer + *m, doublereal *w, doublereal *z__, integer *ldz, doublereal *work, + integer *iwork, integer *ifail, integer *info); + +/* Subroutine */ int _starpu_dsbtrd_(char *vect, char *uplo, integer *n, integer *kd, + doublereal *ab, integer *ldab, doublereal *d__, doublereal *e, + doublereal *q, integer *ldq, doublereal *work, integer *info); + +/* Subroutine */ int _starpu_dsfrk_(char *transr, char *uplo, char *trans, integer *n, + integer *k, doublereal *alpha, doublereal *a, integer *lda, + doublereal *beta, doublereal *c__); + +/* Subroutine */ int _starpu__starpu_dsgesv_(integer *n, integer *nrhs, doublereal *a, + integer *lda, integer *ipiv, doublereal *b, integer *ldb, doublereal * + x, integer *ldx, doublereal *work, real *swork, integer *iter, + integer *info); + +/* Subroutine */ int _starpu_dspcon_(char *uplo, integer *n, doublereal *ap, integer * + ipiv, doublereal *anorm, doublereal *rcond, doublereal *work, integer + *iwork, integer *info); + +/* Subroutine */ int _starpu_dspev_(char *jobz, char *uplo, integer *n, doublereal * + ap, doublereal *w, doublereal *z__, integer *ldz, doublereal *work, + integer *info); + +/* Subroutine */ int _starpu_dspevd_(char *jobz, char *uplo, integer *n, doublereal * + ap, doublereal *w, doublereal *z__, integer *ldz, doublereal *work, + integer *lwork, integer *iwork, integer *liwork, integer *info); + +/* Subroutine */ int _starpu_dspevx_(char *jobz, char *range, char *uplo, integer *n, + doublereal *ap, doublereal *vl, doublereal *vu, integer *il, integer * + iu, doublereal *abstol, integer *m, doublereal *w, doublereal *z__, + integer *ldz, doublereal *work, integer *iwork, integer *ifail, + integer *info); + +/* Subroutine */ int _starpu_dspgst_(integer *itype, char *uplo, integer *n, + doublereal *ap, doublereal *bp, integer *info); + +/* Subroutine */ int _starpu_dspgv_(integer *itype, char *jobz, char *uplo, integer * + n, doublereal *ap, doublereal *bp, doublereal *w, doublereal *z__, + integer *ldz, doublereal *work, integer *info); + +/* Subroutine */ int _starpu_dspgvd_(integer *itype, char *jobz, char *uplo, integer * + n, doublereal *ap, doublereal *bp, doublereal *w, doublereal *z__, + integer *ldz, doublereal *work, integer *lwork, integer *iwork, + integer *liwork, integer *info); + +/* Subroutine */ int _starpu_dspgvx_(integer *itype, char *jobz, char *range, char * + uplo, integer *n, doublereal *ap, doublereal *bp, doublereal *vl, + doublereal *vu, integer *il, integer *iu, doublereal *abstol, integer + *m, doublereal *w, doublereal *z__, integer *ldz, doublereal *work, + integer *iwork, integer *ifail, integer *info); + +/* Subroutine */ int _starpu__starpu_dsposv_(char *uplo, integer *n, integer *nrhs, + doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal * + x, integer *ldx, doublereal *work, real *swork, integer *iter, + integer *info); + +/* Subroutine */ int _starpu_dsprfs_(char *uplo, integer *n, integer *nrhs, + doublereal *ap, doublereal *afp, integer *ipiv, doublereal *b, + integer *ldb, doublereal *x, integer *ldx, doublereal *ferr, + doublereal *berr, doublereal *work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_dspsv_(char *uplo, integer *n, integer *nrhs, doublereal + *ap, integer *ipiv, doublereal *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_dspsvx_(char *fact, char *uplo, integer *n, integer * + nrhs, doublereal *ap, doublereal *afp, integer *ipiv, doublereal *b, + integer *ldb, doublereal *x, integer *ldx, doublereal *rcond, + doublereal *ferr, doublereal *berr, doublereal *work, integer *iwork, + integer *info); + +/* Subroutine */ int _starpu_dsptrd_(char *uplo, integer *n, doublereal *ap, + doublereal *d__, doublereal *e, doublereal *tau, integer *info); + +/* Subroutine */ int _starpu_dsptrf_(char *uplo, integer *n, doublereal *ap, integer * + ipiv, integer *info); + +/* Subroutine */ int _starpu_dsptri_(char *uplo, integer *n, doublereal *ap, integer * + ipiv, doublereal *work, integer *info); + +/* Subroutine */ int _starpu_dsptrs_(char *uplo, integer *n, integer *nrhs, + doublereal *ap, integer *ipiv, doublereal *b, integer *ldb, integer * + info); + +/* Subroutine */ int _starpu_dstebz_(char *range, char *order, integer *n, doublereal + *vl, doublereal *vu, integer *il, integer *iu, doublereal *abstol, + doublereal *d__, doublereal *e, integer *m, integer *nsplit, + doublereal *w, integer *iblock, integer *isplit, doublereal *work, + integer *iwork, integer *info); + +/* Subroutine */ int _starpu_dstedc_(char *compz, integer *n, doublereal *d__, + doublereal *e, doublereal *z__, integer *ldz, doublereal *work, + integer *lwork, integer *iwork, integer *liwork, integer *info); + +/* Subroutine */ int _starpu_dstegr_(char *jobz, char *range, integer *n, doublereal * + d__, doublereal *e, doublereal *vl, doublereal *vu, integer *il, + integer *iu, doublereal *abstol, integer *m, doublereal *w, + doublereal *z__, integer *ldz, integer *isuppz, doublereal *work, + integer *lwork, integer *iwork, integer *liwork, integer *info); + +/* Subroutine */ int _starpu_dstein_(integer *n, doublereal *d__, doublereal *e, + integer *m, doublereal *w, integer *iblock, integer *isplit, + doublereal *z__, integer *ldz, doublereal *work, integer *iwork, + integer *ifail, integer *info); + +/* Subroutine */ int _starpu_dstemr_(char *jobz, char *range, integer *n, doublereal * + d__, doublereal *e, doublereal *vl, doublereal *vu, integer *il, + integer *iu, integer *m, doublereal *w, doublereal *z__, integer *ldz, + integer *nzc, integer *isuppz, logical *tryrac, doublereal *work, + integer *lwork, integer *iwork, integer *liwork, integer *info); + +/* Subroutine */ int _starpu_dsteqr_(char *compz, integer *n, doublereal *d__, + doublereal *e, doublereal *z__, integer *ldz, doublereal *work, + integer *info); + +/* Subroutine */ int _starpu_dsterf_(integer *n, doublereal *d__, doublereal *e, + integer *info); + +/* Subroutine */ int _starpu_dstev_(char *jobz, integer *n, doublereal *d__, + doublereal *e, doublereal *z__, integer *ldz, doublereal *work, + integer *info); + +/* Subroutine */ int _starpu_dstevd_(char *jobz, integer *n, doublereal *d__, + doublereal *e, doublereal *z__, integer *ldz, doublereal *work, + integer *lwork, integer *iwork, integer *liwork, integer *info); + +/* Subroutine */ int _starpu_dstevr_(char *jobz, char *range, integer *n, doublereal * + d__, doublereal *e, doublereal *vl, doublereal *vu, integer *il, + integer *iu, doublereal *abstol, integer *m, doublereal *w, + doublereal *z__, integer *ldz, integer *isuppz, doublereal *work, + integer *lwork, integer *iwork, integer *liwork, integer *info); + +/* Subroutine */ int _starpu_dstevx_(char *jobz, char *range, integer *n, doublereal * + d__, doublereal *e, doublereal *vl, doublereal *vu, integer *il, + integer *iu, doublereal *abstol, integer *m, doublereal *w, + doublereal *z__, integer *ldz, doublereal *work, integer *iwork, + integer *ifail, integer *info); + +/* Subroutine */ int _starpu_dsycon_(char *uplo, integer *n, doublereal *a, integer * + lda, integer *ipiv, doublereal *anorm, doublereal *rcond, doublereal * + work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_dsyequb_(char *uplo, integer *n, doublereal *a, integer * + lda, doublereal *s, doublereal *scond, doublereal *amax, doublereal * + work, integer *info); + +/* Subroutine */ int _starpu_dsyev_(char *jobz, char *uplo, integer *n, doublereal *a, + integer *lda, doublereal *w, doublereal *work, integer *lwork, + integer *info); + +/* Subroutine */ int _starpu_dsyevd_(char *jobz, char *uplo, integer *n, doublereal * + a, integer *lda, doublereal *w, doublereal *work, integer *lwork, + integer *iwork, integer *liwork, integer *info); + +/* Subroutine */ int _starpu_dsyevr_(char *jobz, char *range, char *uplo, integer *n, + doublereal *a, integer *lda, doublereal *vl, doublereal *vu, integer * + il, integer *iu, doublereal *abstol, integer *m, doublereal *w, + doublereal *z__, integer *ldz, integer *isuppz, doublereal *work, + integer *lwork, integer *iwork, integer *liwork, integer *info); + +/* Subroutine */ int _starpu_dsyevx_(char *jobz, char *range, char *uplo, integer *n, + doublereal *a, integer *lda, doublereal *vl, doublereal *vu, integer * + il, integer *iu, doublereal *abstol, integer *m, doublereal *w, + doublereal *z__, integer *ldz, doublereal *work, integer *lwork, + integer *iwork, integer *ifail, integer *info); + +/* Subroutine */ int _starpu_dsygs2_(integer *itype, char *uplo, integer *n, + doublereal *a, integer *lda, doublereal *b, integer *ldb, integer * + info); + +/* Subroutine */ int _starpu_dsygst_(integer *itype, char *uplo, integer *n, + doublereal *a, integer *lda, doublereal *b, integer *ldb, integer * + info); + +/* Subroutine */ int _starpu_dsygv_(integer *itype, char *jobz, char *uplo, integer * + n, doublereal *a, integer *lda, doublereal *b, integer *ldb, + doublereal *w, doublereal *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_dsygvd_(integer *itype, char *jobz, char *uplo, integer * + n, doublereal *a, integer *lda, doublereal *b, integer *ldb, + doublereal *w, doublereal *work, integer *lwork, integer *iwork, + integer *liwork, integer *info); + +/* Subroutine */ int _starpu_dsygvx_(integer *itype, char *jobz, char *range, char * + uplo, integer *n, doublereal *a, integer *lda, doublereal *b, integer + *ldb, doublereal *vl, doublereal *vu, integer *il, integer *iu, + doublereal *abstol, integer *m, doublereal *w, doublereal *z__, + integer *ldz, doublereal *work, integer *lwork, integer *iwork, + integer *ifail, integer *info); + +/* Subroutine */ int _starpu_dsyrfs_(char *uplo, integer *n, integer *nrhs, + doublereal *a, integer *lda, doublereal *af, integer *ldaf, integer * + ipiv, doublereal *b, integer *ldb, doublereal *x, integer *ldx, + doublereal *ferr, doublereal *berr, doublereal *work, integer *iwork, + integer *info); + +/* Subroutine */ int _starpu_dsyrfsx_(char *uplo, char *equed, integer *n, integer * + nrhs, doublereal *a, integer *lda, doublereal *af, integer *ldaf, + integer *ipiv, doublereal *s, doublereal *b, integer *ldb, doublereal + *x, integer *ldx, doublereal *rcond, doublereal *berr, integer * + n_err_bnds__, doublereal *err_bnds_norm__, doublereal * + err_bnds_comp__, integer *nparams, doublereal *params, doublereal * + work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_dsysv_(char *uplo, integer *n, integer *nrhs, doublereal + *a, integer *lda, integer *ipiv, doublereal *b, integer *ldb, + doublereal *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_dsysvx_(char *fact, char *uplo, integer *n, integer * + nrhs, doublereal *a, integer *lda, doublereal *af, integer *ldaf, + integer *ipiv, doublereal *b, integer *ldb, doublereal *x, integer * + ldx, doublereal *rcond, doublereal *ferr, doublereal *berr, + doublereal *work, integer *lwork, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_dsysvxx_(char *fact, char *uplo, integer *n, integer * + nrhs, doublereal *a, integer *lda, doublereal *af, integer *ldaf, + integer *ipiv, char *equed, doublereal *s, doublereal *b, integer * + ldb, doublereal *x, integer *ldx, doublereal *rcond, doublereal * + rpvgrw, doublereal *berr, integer *n_err_bnds__, doublereal * + err_bnds_norm__, doublereal *err_bnds_comp__, integer *nparams, + doublereal *params, doublereal *work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_dsytd2_(char *uplo, integer *n, doublereal *a, integer * + lda, doublereal *d__, doublereal *e, doublereal *tau, integer *info); + +/* Subroutine */ int _starpu_dsytf2_(char *uplo, integer *n, doublereal *a, integer * + lda, integer *ipiv, integer *info); + +/* Subroutine */ int _starpu_dsytrd_(char *uplo, integer *n, doublereal *a, integer * + lda, doublereal *d__, doublereal *e, doublereal *tau, doublereal * + work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_dsytrf_(char *uplo, integer *n, doublereal *a, integer * + lda, integer *ipiv, doublereal *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_dsytri_(char *uplo, integer *n, doublereal *a, integer * + lda, integer *ipiv, doublereal *work, integer *info); + +/* Subroutine */ int _starpu_dsytrs_(char *uplo, integer *n, integer *nrhs, + doublereal *a, integer *lda, integer *ipiv, doublereal *b, integer * + ldb, integer *info); + +/* Subroutine */ int _starpu_dtbcon_(char *norm, char *uplo, char *diag, integer *n, + integer *kd, doublereal *ab, integer *ldab, doublereal *rcond, + doublereal *work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_dtbrfs_(char *uplo, char *trans, char *diag, integer *n, + integer *kd, integer *nrhs, doublereal *ab, integer *ldab, doublereal + *b, integer *ldb, doublereal *x, integer *ldx, doublereal *ferr, + doublereal *berr, doublereal *work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_dtbtrs_(char *uplo, char *trans, char *diag, integer *n, + integer *kd, integer *nrhs, doublereal *ab, integer *ldab, doublereal + *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_dtfsm_(char *transr, char *side, char *uplo, char *trans, + char *diag, integer *m, integer *n, doublereal *alpha, doublereal *a, + doublereal *b, integer *ldb); + +/* Subroutine */ int _starpu_dtftri_(char *transr, char *uplo, char *diag, integer *n, + doublereal *a, integer *info); + +/* Subroutine */ int _starpu_dtfttp_(char *transr, char *uplo, integer *n, doublereal + *arf, doublereal *ap, integer *info); + +/* Subroutine */ int _starpu_dtfttr_(char *transr, char *uplo, integer *n, doublereal + *arf, doublereal *a, integer *lda, integer *info); + +/* Subroutine */ int _starpu_dtgevc_(char *side, char *howmny, logical *select, + integer *n, doublereal *s, integer *lds, doublereal *p, integer *ldp, + doublereal *vl, integer *ldvl, doublereal *vr, integer *ldvr, integer + *mm, integer *m, doublereal *work, integer *info); + +/* Subroutine */ int _starpu_dtgex2_(logical *wantq, logical *wantz, integer *n, + doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal * + q, integer *ldq, doublereal *z__, integer *ldz, integer *j1, integer * + n1, integer *n2, doublereal *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_dtgexc_(logical *wantq, logical *wantz, integer *n, + doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal * + q, integer *ldq, doublereal *z__, integer *ldz, integer *ifst, + integer *ilst, doublereal *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_dtgsen_(integer *ijob, logical *wantq, logical *wantz, + logical *select, integer *n, doublereal *a, integer *lda, doublereal * + b, integer *ldb, doublereal *alphar, doublereal *alphai, doublereal * + beta, doublereal *q, integer *ldq, doublereal *z__, integer *ldz, + integer *m, doublereal *pl, doublereal *pr, doublereal *dif, + doublereal *work, integer *lwork, integer *iwork, integer *liwork, + integer *info); + +/* Subroutine */ int _starpu_dtgsja_(char *jobu, char *jobv, char *jobq, integer *m, + integer *p, integer *n, integer *k, integer *l, doublereal *a, + integer *lda, doublereal *b, integer *ldb, doublereal *tola, + doublereal *tolb, doublereal *alpha, doublereal *beta, doublereal *u, + integer *ldu, doublereal *v, integer *ldv, doublereal *q, integer * + ldq, doublereal *work, integer *ncycle, integer *info); + +/* Subroutine */ int _starpu_dtgsna_(char *job, char *howmny, logical *select, + integer *n, doublereal *a, integer *lda, doublereal *b, integer *ldb, + doublereal *vl, integer *ldvl, doublereal *vr, integer *ldvr, + doublereal *s, doublereal *dif, integer *mm, integer *m, doublereal * + work, integer *lwork, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_dtgsy2_(char *trans, integer *ijob, integer *m, integer * + n, doublereal *a, integer *lda, doublereal *b, integer *ldb, + doublereal *c__, integer *ldc, doublereal *d__, integer *ldd, + doublereal *e, integer *lde, doublereal *f, integer *ldf, doublereal * + scale, doublereal *rdsum, doublereal *rdscal, integer *iwork, integer + *pq, integer *info); + +/* Subroutine */ int _starpu_dtgsyl_(char *trans, integer *ijob, integer *m, integer * + n, doublereal *a, integer *lda, doublereal *b, integer *ldb, + doublereal *c__, integer *ldc, doublereal *d__, integer *ldd, + doublereal *e, integer *lde, doublereal *f, integer *ldf, doublereal * + scale, doublereal *dif, doublereal *work, integer *lwork, integer * + iwork, integer *info); + +/* Subroutine */ int _starpu_dtpcon_(char *norm, char *uplo, char *diag, integer *n, + doublereal *ap, doublereal *rcond, doublereal *work, integer *iwork, + integer *info); + +/* Subroutine */ int _starpu_dtprfs_(char *uplo, char *trans, char *diag, integer *n, + integer *nrhs, doublereal *ap, doublereal *b, integer *ldb, + doublereal *x, integer *ldx, doublereal *ferr, doublereal *berr, + doublereal *work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_dtptri_(char *uplo, char *diag, integer *n, doublereal * + ap, integer *info); + +/* Subroutine */ int _starpu_dtptrs_(char *uplo, char *trans, char *diag, integer *n, + integer *nrhs, doublereal *ap, doublereal *b, integer *ldb, integer * + info); + +/* Subroutine */ int _starpu_dtpttf_(char *transr, char *uplo, integer *n, doublereal + *ap, doublereal *arf, integer *info); + +/* Subroutine */ int _starpu_dtpttr_(char *uplo, integer *n, doublereal *ap, + doublereal *a, integer *lda, integer *info); + +/* Subroutine */ int _starpu_dtrcon_(char *norm, char *uplo, char *diag, integer *n, + doublereal *a, integer *lda, doublereal *rcond, doublereal *work, + integer *iwork, integer *info); + +/* Subroutine */ int _starpu_dtrevc_(char *side, char *howmny, logical *select, + integer *n, doublereal *t, integer *ldt, doublereal *vl, integer * + ldvl, doublereal *vr, integer *ldvr, integer *mm, integer *m, + doublereal *work, integer *info); + +/* Subroutine */ int _starpu_dtrexc_(char *compq, integer *n, doublereal *t, integer * + ldt, doublereal *q, integer *ldq, integer *ifst, integer *ilst, + doublereal *work, integer *info); + +/* Subroutine */ int _starpu_dtrrfs_(char *uplo, char *trans, char *diag, integer *n, + integer *nrhs, doublereal *a, integer *lda, doublereal *b, integer * + ldb, doublereal *x, integer *ldx, doublereal *ferr, doublereal *berr, + doublereal *work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_dtrsen_(char *job, char *compq, logical *select, integer + *n, doublereal *t, integer *ldt, doublereal *q, integer *ldq, + doublereal *wr, doublereal *wi, integer *m, doublereal *s, doublereal + *sep, doublereal *work, integer *lwork, integer *iwork, integer * + liwork, integer *info); + +/* Subroutine */ int _starpu_dtrsna_(char *job, char *howmny, logical *select, + integer *n, doublereal *t, integer *ldt, doublereal *vl, integer * + ldvl, doublereal *vr, integer *ldvr, doublereal *s, doublereal *sep, + integer *mm, integer *m, doublereal *work, integer *ldwork, integer * + iwork, integer *info); + +/* Subroutine */ int _starpu_dtrsyl_(char *trana, char *tranb, integer *isgn, integer + *m, integer *n, doublereal *a, integer *lda, doublereal *b, integer * + ldb, doublereal *c__, integer *ldc, doublereal *scale, integer *info); + +/* Subroutine */ int _starpu_dtrti2_(char *uplo, char *diag, integer *n, doublereal * + a, integer *lda, integer *info); + +/* Subroutine */ int _starpu_dtrtri_(char *uplo, char *diag, integer *n, doublereal * + a, integer *lda, integer *info); + +/* Subroutine */ int _starpu_dtrtrs_(char *uplo, char *trans, char *diag, integer *n, + integer *nrhs, doublereal *a, integer *lda, doublereal *b, integer * + ldb, integer *info); + +/* Subroutine */ int _starpu_dtrttf_(char *transr, char *uplo, integer *n, doublereal + *a, integer *lda, doublereal *arf, integer *info); + +/* Subroutine */ int _starpu_dtrttp_(char *uplo, integer *n, doublereal *a, integer * + lda, doublereal *ap, integer *info); + +/* Subroutine */ int _starpu_dtzrqf_(integer *m, integer *n, doublereal *a, integer * + lda, doublereal *tau, integer *info); + +/* Subroutine */ int _starpu_dtzrzf_(integer *m, integer *n, doublereal *a, integer * + lda, doublereal *tau, doublereal *work, integer *lwork, integer *info); + +doublereal _starpu_dzsum1_(integer *n, doublecomplex *cx, integer *incx); + +integer _starpu_icmax1_(integer *n, complex *cx, integer *incx); + +integer _starpu_ieeeck_(integer *ispec, real *zero, real *one); + +integer _starpu_ilaclc_(integer *m, integer *n, complex *a, integer *lda); + +integer _starpu_ilaclr_(integer *m, integer *n, complex *a, integer *lda); + +integer _starpu_iladiag_(char *diag); + +integer _starpu_iladlc_(integer *m, integer *n, doublereal *a, integer *lda); + +integer _starpu_iladlr_(integer *m, integer *n, doublereal *a, integer *lda); + +integer _starpu_ilaenv_(integer *ispec, char *name__, char *opts, integer *n1, + integer *n2, integer *n3, integer *n4); + +integer _starpu_ilaprec_(char *prec); + +integer _starpu_ilaslc_(integer *m, integer *n, real *a, integer *lda); + +integer _starpu_ilaslr_(integer *m, integer *n, real *a, integer *lda); + +integer _starpu_ilatrans_(char *trans); + +integer _starpu_ilauplo_(char *uplo); + +/* Subroutine */ int _starpu_ilaver_(integer *vers_major__, integer *vers_minor__, + integer *vers_patch__); + +integer _starpu_ilazlc_(integer *m, integer *n, doublecomplex *a, integer *lda); + +integer _starpu_ilazlr_(integer *m, integer *n, doublecomplex *a, integer *lda); + +integer _starpu_iparmq_(integer *ispec, char *name__, char *opts, integer *n, integer + *ilo, integer *ihi, integer *lwork); + +integer _starpu_izmax1_(integer *n, doublecomplex *cx, integer *incx); + +logical _starpu_lsamen_(integer *n, char *ca, char *cb); + +integer _starpu_smaxloc_(real *a, integer *dimm); + +/* Subroutine */ int _starpu_sbdsdc_(char *uplo, char *compq, integer *n, real *d__, + real *e, real *u, integer *ldu, real *vt, integer *ldvt, real *q, + integer *iq, real *work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_sbdsqr_(char *uplo, integer *n, integer *ncvt, integer * + nru, integer *ncc, real *d__, real *e, real *vt, integer *ldvt, real * + u, integer *ldu, real *c__, integer *ldc, real *work, integer *info); + +doublereal _starpu_scsum1_(integer *n, complex *cx, integer *incx); + +/* Subroutine */ int _starpu_sdisna_(char *job, integer *m, integer *n, real *d__, + real *sep, integer *info); + +/* Subroutine */ int _starpu_sgbbrd_(char *vect, integer *m, integer *n, integer *ncc, + integer *kl, integer *ku, real *ab, integer *ldab, real *d__, real * + e, real *q, integer *ldq, real *pt, integer *ldpt, real *c__, integer + *ldc, real *work, integer *info); + +/* Subroutine */ int _starpu_sgbcon_(char *norm, integer *n, integer *kl, integer *ku, + real *ab, integer *ldab, integer *ipiv, real *anorm, real *rcond, + real *work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_sgbequ_(integer *m, integer *n, integer *kl, integer *ku, + real *ab, integer *ldab, real *r__, real *c__, real *rowcnd, real * + colcnd, real *amax, integer *info); + +/* Subroutine */ int _starpu_sgbequb_(integer *m, integer *n, integer *kl, integer * + ku, real *ab, integer *ldab, real *r__, real *c__, real *rowcnd, real + *colcnd, real *amax, integer *info); + +/* Subroutine */ int _starpu_sgbrfs_(char *trans, integer *n, integer *kl, integer * + ku, integer *nrhs, real *ab, integer *ldab, real *afb, integer *ldafb, + integer *ipiv, real *b, integer *ldb, real *x, integer *ldx, real * + ferr, real *berr, real *work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_sgbrfsx_(char *trans, char *equed, integer *n, integer * + kl, integer *ku, integer *nrhs, real *ab, integer *ldab, real *afb, + integer *ldafb, integer *ipiv, real *r__, real *c__, real *b, integer + *ldb, real *x, integer *ldx, real *rcond, real *berr, integer * + n_err_bnds__, real *err_bnds_norm__, real *err_bnds_comp__, integer * + nparams, real *params, real *work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_sgbsv_(integer *n, integer *kl, integer *ku, integer * + nrhs, real *ab, integer *ldab, integer *ipiv, real *b, integer *ldb, + integer *info); + +/* Subroutine */ int _starpu_sgbsvx_(char *fact, char *trans, integer *n, integer *kl, + integer *ku, integer *nrhs, real *ab, integer *ldab, real *afb, + integer *ldafb, integer *ipiv, char *equed, real *r__, real *c__, + real *b, integer *ldb, real *x, integer *ldx, real *rcond, real *ferr, + real *berr, real *work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_sgbsvxx_(char *fact, char *trans, integer *n, integer * + kl, integer *ku, integer *nrhs, real *ab, integer *ldab, real *afb, + integer *ldafb, integer *ipiv, char *equed, real *r__, real *c__, + real *b, integer *ldb, real *x, integer *ldx, real *rcond, real * + rpvgrw, real *berr, integer *n_err_bnds__, real *err_bnds_norm__, + real *err_bnds_comp__, integer *nparams, real *params, real *work, + integer *iwork, integer *info); + +/* Subroutine */ int _starpu_sgbtf2_(integer *m, integer *n, integer *kl, integer *ku, + real *ab, integer *ldab, integer *ipiv, integer *info); + +/* Subroutine */ int _starpu_sgbtrf_(integer *m, integer *n, integer *kl, integer *ku, + real *ab, integer *ldab, integer *ipiv, integer *info); + +/* Subroutine */ int _starpu_sgbtrs_(char *trans, integer *n, integer *kl, integer * + ku, integer *nrhs, real *ab, integer *ldab, integer *ipiv, real *b, + integer *ldb, integer *info); + +/* Subroutine */ int _starpu_sgebak_(char *job, char *side, integer *n, integer *ilo, + integer *ihi, real *scale, integer *m, real *v, integer *ldv, integer + *info); + +/* Subroutine */ int _starpu_sgebal_(char *job, integer *n, real *a, integer *lda, + integer *ilo, integer *ihi, real *scale, integer *info); + +/* Subroutine */ int _starpu_sgebd2_(integer *m, integer *n, real *a, integer *lda, + real *d__, real *e, real *tauq, real *taup, real *work, integer *info); + +/* Subroutine */ int _starpu_sgebrd_(integer *m, integer *n, real *a, integer *lda, + real *d__, real *e, real *tauq, real *taup, real *work, integer * + lwork, integer *info); + +/* Subroutine */ int _starpu_sgecon_(char *norm, integer *n, real *a, integer *lda, + real *anorm, real *rcond, real *work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_sgeequ_(integer *m, integer *n, real *a, integer *lda, + real *r__, real *c__, real *rowcnd, real *colcnd, real *amax, integer + *info); + +/* Subroutine */ int _starpu_sgeequb_(integer *m, integer *n, real *a, integer *lda, + real *r__, real *c__, real *rowcnd, real *colcnd, real *amax, integer + *info); + +/* Subroutine */ int _starpu_sgees_(char *jobvs, char *sort, L_fp select, integer *n, + real *a, integer *lda, integer *sdim, real *wr, real *wi, real *vs, + integer *ldvs, real *work, integer *lwork, logical *bwork, integer * + info); + +/* Subroutine */ int _starpu_sgeesx_(char *jobvs, char *sort, L_fp select, char * + sense, integer *n, real *a, integer *lda, integer *sdim, real *wr, + real *wi, real *vs, integer *ldvs, real *rconde, real *rcondv, real * + work, integer *lwork, integer *iwork, integer *liwork, logical *bwork, + integer *info); + +/* Subroutine */ int _starpu_sgeev_(char *jobvl, char *jobvr, integer *n, real *a, + integer *lda, real *wr, real *wi, real *vl, integer *ldvl, real *vr, + integer *ldvr, real *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_sgeevx_(char *balanc, char *jobvl, char *jobvr, char * + sense, integer *n, real *a, integer *lda, real *wr, real *wi, real * + vl, integer *ldvl, real *vr, integer *ldvr, integer *ilo, integer * + ihi, real *scale, real *abnrm, real *rconde, real *rcondv, real *work, + integer *lwork, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_sgegs_(char *jobvsl, char *jobvsr, integer *n, real *a, + integer *lda, real *b, integer *ldb, real *alphar, real *alphai, real + *beta, real *vsl, integer *ldvsl, real *vsr, integer *ldvsr, real * + work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_sgegv_(char *jobvl, char *jobvr, integer *n, real *a, + integer *lda, real *b, integer *ldb, real *alphar, real *alphai, real + *beta, real *vl, integer *ldvl, real *vr, integer *ldvr, real *work, + integer *lwork, integer *info); + +/* Subroutine */ int _starpu_sgehd2_(integer *n, integer *ilo, integer *ihi, real *a, + integer *lda, real *tau, real *work, integer *info); + +/* Subroutine */ int _starpu_sgehrd_(integer *n, integer *ilo, integer *ihi, real *a, + integer *lda, real *tau, real *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_sgejsv_(char *joba, char *jobu, char *jobv, char *jobr, + char *jobt, char *jobp, integer *m, integer *n, real *a, integer *lda, + real *sva, real *u, integer *ldu, real *v, integer *ldv, real *work, + integer *lwork, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_sgelq2_(integer *m, integer *n, real *a, integer *lda, + real *tau, real *work, integer *info); + +/* Subroutine */ int _starpu_sgelqf_(integer *m, integer *n, real *a, integer *lda, + real *tau, real *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_sgels_(char *trans, integer *m, integer *n, integer * + nrhs, real *a, integer *lda, real *b, integer *ldb, real *work, + integer *lwork, integer *info); + +/* Subroutine */ int _starpu_sgelsd_(integer *m, integer *n, integer *nrhs, real *a, + integer *lda, real *b, integer *ldb, real *s, real *rcond, integer * + rank, real *work, integer *lwork, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_sgelss_(integer *m, integer *n, integer *nrhs, real *a, + integer *lda, real *b, integer *ldb, real *s, real *rcond, integer * + rank, real *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_sgelsx_(integer *m, integer *n, integer *nrhs, real *a, + integer *lda, real *b, integer *ldb, integer *jpvt, real *rcond, + integer *rank, real *work, integer *info); + +/* Subroutine */ int _starpu_sgelsy_(integer *m, integer *n, integer *nrhs, real *a, + integer *lda, real *b, integer *ldb, integer *jpvt, real *rcond, + integer *rank, real *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_sgeql2_(integer *m, integer *n, real *a, integer *lda, + real *tau, real *work, integer *info); + +/* Subroutine */ int _starpu_sgeqlf_(integer *m, integer *n, real *a, integer *lda, + real *tau, real *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_sgeqp3_(integer *m, integer *n, real *a, integer *lda, + integer *jpvt, real *tau, real *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_sgeqpf_(integer *m, integer *n, real *a, integer *lda, + integer *jpvt, real *tau, real *work, integer *info); + +/* Subroutine */ int _starpu_sgeqr2_(integer *m, integer *n, real *a, integer *lda, + real *tau, real *work, integer *info); + +/* Subroutine */ int _starpu_sgeqrf_(integer *m, integer *n, real *a, integer *lda, + real *tau, real *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_sgerfs_(char *trans, integer *n, integer *nrhs, real *a, + integer *lda, real *af, integer *ldaf, integer *ipiv, real *b, + integer *ldb, real *x, integer *ldx, real *ferr, real *berr, real * + work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_sgerfsx_(char *trans, char *equed, integer *n, integer * + nrhs, real *a, integer *lda, real *af, integer *ldaf, integer *ipiv, + real *r__, real *c__, real *b, integer *ldb, real *x, integer *ldx, + real *rcond, real *berr, integer *n_err_bnds__, real *err_bnds_norm__, + real *err_bnds_comp__, integer *nparams, real *params, real *work, + integer *iwork, integer *info); + +/* Subroutine */ int _starpu_sgerq2_(integer *m, integer *n, real *a, integer *lda, + real *tau, real *work, integer *info); + +/* Subroutine */ int _starpu_sgerqf_(integer *m, integer *n, real *a, integer *lda, + real *tau, real *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_sgesc2_(integer *n, real *a, integer *lda, real *rhs, + integer *ipiv, integer *jpiv, real *scale); + +/* Subroutine */ int _starpu_sgesdd_(char *jobz, integer *m, integer *n, real *a, + integer *lda, real *s, real *u, integer *ldu, real *vt, integer *ldvt, + real *work, integer *lwork, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_sgesv_(integer *n, integer *nrhs, real *a, integer *lda, + integer *ipiv, real *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_sgesvd_(char *jobu, char *jobvt, integer *m, integer *n, + real *a, integer *lda, real *s, real *u, integer *ldu, real *vt, + integer *ldvt, real *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_sgesvj_(char *joba, char *jobu, char *jobv, integer *m, + integer *n, real *a, integer *lda, real *sva, integer *mv, real *v, + integer *ldv, real *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_sgesvx_(char *fact, char *trans, integer *n, integer * + nrhs, real *a, integer *lda, real *af, integer *ldaf, integer *ipiv, + char *equed, real *r__, real *c__, real *b, integer *ldb, real *x, + integer *ldx, real *rcond, real *ferr, real *berr, real *work, + integer *iwork, integer *info); + +/* Subroutine */ int _starpu_sgesvxx_(char *fact, char *trans, integer *n, integer * + nrhs, real *a, integer *lda, real *af, integer *ldaf, integer *ipiv, + char *equed, real *r__, real *c__, real *b, integer *ldb, real *x, + integer *ldx, real *rcond, real *rpvgrw, real *berr, integer * + n_err_bnds__, real *err_bnds_norm__, real *err_bnds_comp__, integer * + nparams, real *params, real *work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_sgetc2_(integer *n, real *a, integer *lda, integer *ipiv, + integer *jpiv, integer *info); + +/* Subroutine */ int _starpu_sgetf2_(integer *m, integer *n, real *a, integer *lda, + integer *ipiv, integer *info); + +/* Subroutine */ int _starpu_sgetrf_(integer *m, integer *n, real *a, integer *lda, + integer *ipiv, integer *info); + +/* Subroutine */ int _starpu_sgetri_(integer *n, real *a, integer *lda, integer *ipiv, + real *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_sgetrs_(char *trans, integer *n, integer *nrhs, real *a, + integer *lda, integer *ipiv, real *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_sggbak_(char *job, char *side, integer *n, integer *ilo, + integer *ihi, real *lscale, real *rscale, integer *m, real *v, + integer *ldv, integer *info); + +/* Subroutine */ int _starpu_sggbal_(char *job, integer *n, real *a, integer *lda, + real *b, integer *ldb, integer *ilo, integer *ihi, real *lscale, real + *rscale, real *work, integer *info); + +/* Subroutine */ int _starpu_sgges_(char *jobvsl, char *jobvsr, char *sort, L_fp + selctg, integer *n, real *a, integer *lda, real *b, integer *ldb, + integer *sdim, real *alphar, real *alphai, real *beta, real *vsl, + integer *ldvsl, real *vsr, integer *ldvsr, real *work, integer *lwork, + logical *bwork, integer *info); + +/* Subroutine */ int _starpu_sggesx_(char *jobvsl, char *jobvsr, char *sort, L_fp + selctg, char *sense, integer *n, real *a, integer *lda, real *b, + integer *ldb, integer *sdim, real *alphar, real *alphai, real *beta, + real *vsl, integer *ldvsl, real *vsr, integer *ldvsr, real *rconde, + real *rcondv, real *work, integer *lwork, integer *iwork, integer * + liwork, logical *bwork, integer *info); + +/* Subroutine */ int _starpu_sggev_(char *jobvl, char *jobvr, integer *n, real *a, + integer *lda, real *b, integer *ldb, real *alphar, real *alphai, real + *beta, real *vl, integer *ldvl, real *vr, integer *ldvr, real *work, + integer *lwork, integer *info); + +/* Subroutine */ int _starpu_sggevx_(char *balanc, char *jobvl, char *jobvr, char * + sense, integer *n, real *a, integer *lda, real *b, integer *ldb, real + *alphar, real *alphai, real *beta, real *vl, integer *ldvl, real *vr, + integer *ldvr, integer *ilo, integer *ihi, real *lscale, real *rscale, + real *abnrm, real *bbnrm, real *rconde, real *rcondv, real *work, + integer *lwork, integer *iwork, logical *bwork, integer *info); + +/* Subroutine */ int _starpu_sggglm_(integer *n, integer *m, integer *p, real *a, + integer *lda, real *b, integer *ldb, real *d__, real *x, real *y, + real *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_sgghrd_(char *compq, char *compz, integer *n, integer * + ilo, integer *ihi, real *a, integer *lda, real *b, integer *ldb, real + *q, integer *ldq, real *z__, integer *ldz, integer *info); + +/* Subroutine */ int _starpu_sgglse_(integer *m, integer *n, integer *p, real *a, + integer *lda, real *b, integer *ldb, real *c__, real *d__, real *x, + real *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_sggqrf_(integer *n, integer *m, integer *p, real *a, + integer *lda, real *taua, real *b, integer *ldb, real *taub, real * + work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_sggrqf_(integer *m, integer *p, integer *n, real *a, + integer *lda, real *taua, real *b, integer *ldb, real *taub, real * + work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_sggsvd_(char *jobu, char *jobv, char *jobq, integer *m, + integer *n, integer *p, integer *k, integer *l, real *a, integer *lda, + real *b, integer *ldb, real *alpha, real *beta, real *u, integer * + ldu, real *v, integer *ldv, real *q, integer *ldq, real *work, + integer *iwork, integer *info); + +/* Subroutine */ int _starpu_sggsvp_(char *jobu, char *jobv, char *jobq, integer *m, + integer *p, integer *n, real *a, integer *lda, real *b, integer *ldb, + real *tola, real *tolb, integer *k, integer *l, real *u, integer *ldu, + real *v, integer *ldv, real *q, integer *ldq, integer *iwork, real * + tau, real *work, integer *info); + +/* Subroutine */ int _starpu_sgsvj0_(char *jobv, integer *m, integer *n, real *a, + integer *lda, real *d__, real *sva, integer *mv, real *v, integer * + ldv, real *eps, real *sfmin, real *tol, integer *nsweep, real *work, + integer *lwork, integer *info); + +/* Subroutine */ int _starpu_sgsvj1_(char *jobv, integer *m, integer *n, integer *n1, + real *a, integer *lda, real *d__, real *sva, integer *mv, real *v, + integer *ldv, real *eps, real *sfmin, real *tol, integer *nsweep, + real *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_sgtcon_(char *norm, integer *n, real *dl, real *d__, + real *du, real *du2, integer *ipiv, real *anorm, real *rcond, real * + work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_sgtrfs_(char *trans, integer *n, integer *nrhs, real *dl, + real *d__, real *du, real *dlf, real *df, real *duf, real *du2, + integer *ipiv, real *b, integer *ldb, real *x, integer *ldx, real * + ferr, real *berr, real *work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_sgtsv_(integer *n, integer *nrhs, real *dl, real *d__, + real *du, real *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_sgtsvx_(char *fact, char *trans, integer *n, integer * + nrhs, real *dl, real *d__, real *du, real *dlf, real *df, real *duf, + real *du2, integer *ipiv, real *b, integer *ldb, real *x, integer * + ldx, real *rcond, real *ferr, real *berr, real *work, integer *iwork, + integer *info); + +/* Subroutine */ int _starpu_sgttrf_(integer *n, real *dl, real *d__, real *du, real * + du2, integer *ipiv, integer *info); + +/* Subroutine */ int _starpu_sgttrs_(char *trans, integer *n, integer *nrhs, real *dl, + real *d__, real *du, real *du2, integer *ipiv, real *b, integer *ldb, + integer *info); + +/* Subroutine */ int _starpu_sgtts2_(integer *itrans, integer *n, integer *nrhs, real + *dl, real *d__, real *du, real *du2, integer *ipiv, real *b, integer * + ldb); + +/* Subroutine */ int _starpu_shgeqz_(char *job, char *compq, char *compz, integer *n, + integer *ilo, integer *ihi, real *h__, integer *ldh, real *t, integer + *ldt, real *alphar, real *alphai, real *beta, real *q, integer *ldq, + real *z__, integer *ldz, real *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_shsein_(char *side, char *eigsrc, char *initv, logical * + select, integer *n, real *h__, integer *ldh, real *wr, real *wi, real + *vl, integer *ldvl, real *vr, integer *ldvr, integer *mm, integer *m, + real *work, integer *ifaill, integer *ifailr, integer *info); + +/* Subroutine */ int _starpu_shseqr_(char *job, char *compz, integer *n, integer *ilo, + integer *ihi, real *h__, integer *ldh, real *wr, real *wi, real *z__, + integer *ldz, real *work, integer *lwork, integer *info); + +logical _starpu_sisnan_(real *sin__); + +/* Subroutine */ int _starpu_sla_gbamv__(integer *trans, integer *m, integer *n, + integer *kl, integer *ku, real *alpha, real *ab, integer *ldab, real * + x, integer *incx, real *beta, real *y, integer *incy); + +doublereal _starpu_sla_gbrcond__(char *trans, integer *n, integer *kl, integer *ku, + real *ab, integer *ldab, real *afb, integer *ldafb, integer *ipiv, + integer *cmode, real *c__, integer *info, real *work, integer *iwork, + ftnlen trans_len); + +/* Subroutine */ int _starpu_sla_gbrfsx_extended__(integer *prec_type__, integer * + trans_type__, integer *n, integer *kl, integer *ku, integer *nrhs, + real *ab, integer *ldab, real *afb, integer *ldafb, integer *ipiv, + logical *colequ, real *c__, real *b, integer *ldb, real *y, integer * + ldy, real *berr_out__, integer *n_norms__, real *errs_n__, real * + errs_c__, real *res, real *ayb, real *dy, real *y_tail__, real *rcond, + integer *ithresh, real *rthresh, real *dz_ub__, logical * + ignore_cwise__, integer *info); + +doublereal _starpu_sla_gbrpvgrw__(integer *n, integer *kl, integer *ku, integer * + ncols, real *ab, integer *ldab, real *afb, integer *ldafb); + +/* Subroutine */ int _starpu_sla_geamv__(integer *trans, integer *m, integer *n, real + *alpha, real *a, integer *lda, real *x, integer *incx, real *beta, + real *y, integer *incy); + +doublereal _starpu_sla_gercond__(char *trans, integer *n, real *a, integer *lda, real + *af, integer *ldaf, integer *ipiv, integer *cmode, real *c__, integer + *info, real *work, integer *iwork, ftnlen trans_len); + +/* Subroutine */ int _starpu_sla_gerfsx_extended__(integer *prec_type__, integer * + trans_type__, integer *n, integer *nrhs, real *a, integer *lda, real * + af, integer *ldaf, integer *ipiv, logical *colequ, real *c__, real *b, + integer *ldb, real *y, integer *ldy, real *berr_out__, integer * + n_norms__, real *errs_n__, real *errs_c__, real *res, real *ayb, real + *dy, real *y_tail__, real *rcond, integer *ithresh, real *rthresh, + real *dz_ub__, logical *ignore_cwise__, integer *info); + +/* Subroutine */ int _starpu_sla_lin_berr__(integer *n, integer *nz, integer *nrhs, + real *res, real *ayb, real *berr); + +doublereal _starpu_sla_porcond__(char *uplo, integer *n, real *a, integer *lda, real * + af, integer *ldaf, integer *cmode, real *c__, integer *info, real * + work, integer *iwork, ftnlen uplo_len); + +/* Subroutine */ int _starpu_sla_porfsx_extended__(integer *prec_type__, char *uplo, + integer *n, integer *nrhs, real *a, integer *lda, real *af, integer * + ldaf, logical *colequ, real *c__, real *b, integer *ldb, real *y, + integer *ldy, real *berr_out__, integer *n_norms__, real *errs_n__, + real *errs_c__, real *res, real *ayb, real *dy, real *y_tail__, real * + rcond, integer *ithresh, real *rthresh, real *dz_ub__, logical * + ignore_cwise__, integer *info, ftnlen uplo_len); + +doublereal _starpu_sla_porpvgrw__(char *uplo, integer *ncols, real *a, integer *lda, + real *af, integer *ldaf, real *work, ftnlen uplo_len); + +doublereal _starpu_sla_rpvgrw__(integer *n, integer *ncols, real *a, integer *lda, + real *af, integer *ldaf); + +/* Subroutine */ int _starpu_sla_syamv__(integer *uplo, integer *n, real *alpha, real + *a, integer *lda, real *x, integer *incx, real *beta, real *y, + integer *incy); + +doublereal _starpu_sla_syrcond__(char *uplo, integer *n, real *a, integer *lda, real * + af, integer *ldaf, integer *ipiv, integer *cmode, real *c__, integer * + info, real *work, integer *iwork, ftnlen uplo_len); + +/* Subroutine */ int _starpu_sla_syrfsx_extended__(integer *prec_type__, char *uplo, + integer *n, integer *nrhs, real *a, integer *lda, real *af, integer * + ldaf, integer *ipiv, logical *colequ, real *c__, real *b, integer * + ldb, real *y, integer *ldy, real *berr_out__, integer *n_norms__, + real *errs_n__, real *errs_c__, real *res, real *ayb, real *dy, real * + y_tail__, real *rcond, integer *ithresh, real *rthresh, real *dz_ub__, + logical *ignore_cwise__, integer *info, ftnlen uplo_len); + +doublereal _starpu_sla_syrpvgrw__(char *uplo, integer *n, integer *info, real *a, + integer *lda, real *af, integer *ldaf, integer *ipiv, real *work, + ftnlen uplo_len); + +/* Subroutine */ int _starpu_sla_wwaddw__(integer *n, real *x, real *y, real *w); + +/* Subroutine */ int _starpu_slabad_(real *small, real *large); + +/* Subroutine */ int _starpu_slabrd_(integer *m, integer *n, integer *nb, real *a, + integer *lda, real *d__, real *e, real *tauq, real *taup, real *x, + integer *ldx, real *y, integer *ldy); + +/* Subroutine */ int _starpu_slacn2_(integer *n, real *v, real *x, integer *isgn, + real *est, integer *kase, integer *isave); + +/* Subroutine */ int _starpu_slacon_(integer *n, real *v, real *x, integer *isgn, + real *est, integer *kase); + +/* Subroutine */ int _starpu_slacpy_(char *uplo, integer *m, integer *n, real *a, + integer *lda, real *b, integer *ldb); + +/* Subroutine */ int _starpu_sladiv_(real *a, real *b, real *c__, real *d__, real *p, + real *q); + +/* Subroutine */ int _starpu_slae2_(real *a, real *b, real *c__, real *rt1, real *rt2); + +/* Subroutine */ int _starpu_slaebz_(integer *ijob, integer *nitmax, integer *n, + integer *mmax, integer *minp, integer *nbmin, real *abstol, real * + reltol, real *pivmin, real *d__, real *e, real *e2, integer *nval, + real *ab, real *c__, integer *mout, integer *nab, real *work, integer + *iwork, integer *info); + +/* Subroutine */ int _starpu_slaed0_(integer *icompq, integer *qsiz, integer *n, real + *d__, real *e, real *q, integer *ldq, real *qstore, integer *ldqs, + real *work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_slaed1_(integer *n, real *d__, real *q, integer *ldq, + integer *indxq, real *rho, integer *cutpnt, real *work, integer * + iwork, integer *info); + +/* Subroutine */ int _starpu_slaed2_(integer *k, integer *n, integer *n1, real *d__, + real *q, integer *ldq, integer *indxq, real *rho, real *z__, real * + dlamda, real *w, real *q2, integer *indx, integer *indxc, integer * + indxp, integer *coltyp, integer *info); + +/* Subroutine */ int _starpu_slaed3_(integer *k, integer *n, integer *n1, real *d__, + real *q, integer *ldq, real *rho, real *dlamda, real *q2, integer * + indx, integer *ctot, real *w, real *s, integer *info); + +/* Subroutine */ int _starpu_slaed4_(integer *n, integer *i__, real *d__, real *z__, + real *delta, real *rho, real *dlam, integer *info); + +/* Subroutine */ int _starpu_slaed5_(integer *i__, real *d__, real *z__, real *delta, + real *rho, real *dlam); + +/* Subroutine */ int _starpu_slaed6_(integer *kniter, logical *orgati, real *rho, + real *d__, real *z__, real *finit, real *tau, integer *info); + +/* Subroutine */ int _starpu_slaed7_(integer *icompq, integer *n, integer *qsiz, + integer *tlvls, integer *curlvl, integer *curpbm, real *d__, real *q, + integer *ldq, integer *indxq, real *rho, integer *cutpnt, real * + qstore, integer *qptr, integer *prmptr, integer *perm, integer * + givptr, integer *givcol, real *givnum, real *work, integer *iwork, + integer *info); + +/* Subroutine */ int _starpu_slaed8_(integer *icompq, integer *k, integer *n, integer + *qsiz, real *d__, real *q, integer *ldq, integer *indxq, real *rho, + integer *cutpnt, real *z__, real *dlamda, real *q2, integer *ldq2, + real *w, integer *perm, integer *givptr, integer *givcol, real * + givnum, integer *indxp, integer *indx, integer *info); + +/* Subroutine */ int _starpu_slaed9_(integer *k, integer *kstart, integer *kstop, + integer *n, real *d__, real *q, integer *ldq, real *rho, real *dlamda, + real *w, real *s, integer *lds, integer *info); + +/* Subroutine */ int _starpu_slaeda_(integer *n, integer *tlvls, integer *curlvl, + integer *curpbm, integer *prmptr, integer *perm, integer *givptr, + integer *givcol, real *givnum, real *q, integer *qptr, real *z__, + real *ztemp, integer *info); + +/* Subroutine */ int _starpu_slaein_(logical *rightv, logical *noinit, integer *n, + real *h__, integer *ldh, real *wr, real *wi, real *vr, real *vi, real + *b, integer *ldb, real *work, real *eps3, real *smlnum, real *bignum, + integer *info); + +/* Subroutine */ int _starpu_slaev2_(real *a, real *b, real *c__, real *rt1, real * + rt2, real *cs1, real *sn1); + +/* Subroutine */ int _starpu_slaexc_(logical *wantq, integer *n, real *t, integer * + ldt, real *q, integer *ldq, integer *j1, integer *n1, integer *n2, + real *work, integer *info); + +/* Subroutine */ int _starpu_slag2_(real *a, integer *lda, real *b, integer *ldb, + real *safmin, real *scale1, real *scale2, real *wr1, real *wr2, real * + wi); + +/* Subroutine */ int _starpu_slag2d_(integer *m, integer *n, real *sa, integer *ldsa, + doublereal *a, integer *lda, integer *info); + +/* Subroutine */ int _starpu_slags2_(logical *upper, real *a1, real *a2, real *a3, + real *b1, real *b2, real *b3, real *csu, real *snu, real *csv, real * + snv, real *csq, real *snq); + +/* Subroutine */ int _starpu_slagtf_(integer *n, real *a, real *lambda, real *b, real + *c__, real *tol, real *d__, integer *in, integer *info); + +/* Subroutine */ int _starpu_slagtm_(char *trans, integer *n, integer *nrhs, real * + alpha, real *dl, real *d__, real *du, real *x, integer *ldx, real * + beta, real *b, integer *ldb); + +/* Subroutine */ int _starpu_slagts_(integer *job, integer *n, real *a, real *b, real + *c__, real *d__, integer *in, real *y, real *tol, integer *info); + +/* Subroutine */ int _starpu_slagv2_(real *a, integer *lda, real *b, integer *ldb, + real *alphar, real *alphai, real *beta, real *csl, real *snl, real * + csr, real *snr); + +/* Subroutine */ int _starpu_slahqr_(logical *wantt, logical *wantz, integer *n, + integer *ilo, integer *ihi, real *h__, integer *ldh, real *wr, real * + wi, integer *iloz, integer *ihiz, real *z__, integer *ldz, integer * + info); + +/* Subroutine */ int _starpu_slahr2_(integer *n, integer *k, integer *nb, real *a, + integer *lda, real *tau, real *t, integer *ldt, real *y, integer *ldy); + +/* Subroutine */ int _starpu_slahrd_(integer *n, integer *k, integer *nb, real *a, + integer *lda, real *tau, real *t, integer *ldt, real *y, integer *ldy); + +/* Subroutine */ int _starpu_slaic1_(integer *job, integer *j, real *x, real *sest, + real *w, real *gamma, real *sestpr, real *s, real *c__); + +logical _starpu_slaisnan_(real *sin1, real *sin2); + +/* Subroutine */ int _starpu_slaln2_(logical *ltrans, integer *na, integer *nw, real * + smin, real *ca, real *a, integer *lda, real *d1, real *d2, real *b, + integer *ldb, real *wr, real *wi, real *x, integer *ldx, real *scale, + real *xnorm, integer *info); + +/* Subroutine */ int _starpu_slals0_(integer *icompq, integer *nl, integer *nr, + integer *sqre, integer *nrhs, real *b, integer *ldb, real *bx, + integer *ldbx, integer *perm, integer *givptr, integer *givcol, + integer *ldgcol, real *givnum, integer *ldgnum, real *poles, real * + difl, real *difr, real *z__, integer *k, real *c__, real *s, real * + work, integer *info); + +/* Subroutine */ int _starpu_slalsa_(integer *icompq, integer *smlsiz, integer *n, + integer *nrhs, real *b, integer *ldb, real *bx, integer *ldbx, real * + u, integer *ldu, real *vt, integer *k, real *difl, real *difr, real * + z__, real *poles, integer *givptr, integer *givcol, integer *ldgcol, + integer *perm, real *givnum, real *c__, real *s, real *work, integer * + iwork, integer *info); + +/* Subroutine */ int _starpu_slalsd_(char *uplo, integer *smlsiz, integer *n, integer + *nrhs, real *d__, real *e, real *b, integer *ldb, real *rcond, + integer *rank, real *work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_slamrg_(integer *n1, integer *n2, real *a, integer * + strd1, integer *strd2, integer *index); + +integer _starpu_slaneg_(integer *n, real *d__, real *lld, real *sigma, real *pivmin, + integer *r__); + +doublereal _starpu_slangb_(char *norm, integer *n, integer *kl, integer *ku, real *ab, + integer *ldab, real *work); + +doublereal _starpu_slange_(char *norm, integer *m, integer *n, real *a, integer *lda, + real *work); + +doublereal _starpu_slangt_(char *norm, integer *n, real *dl, real *d__, real *du); + +doublereal _starpu_slanhs_(char *norm, integer *n, real *a, integer *lda, real *work); + +doublereal _starpu_slansb_(char *norm, char *uplo, integer *n, integer *k, real *ab, + integer *ldab, real *work); + +doublereal _starpu_slansf_(char *norm, char *transr, char *uplo, integer *n, real *a, + real *work); + +doublereal _starpu_slansp_(char *norm, char *uplo, integer *n, real *ap, real *work); + +doublereal _starpu_slanst_(char *norm, integer *n, real *d__, real *e); + +doublereal _starpu_slansy_(char *norm, char *uplo, integer *n, real *a, integer *lda, + real *work); + +doublereal _starpu_slantb_(char *norm, char *uplo, char *diag, integer *n, integer *k, + real *ab, integer *ldab, real *work); + +doublereal _starpu_slantp_(char *norm, char *uplo, char *diag, integer *n, real *ap, + real *work); + +doublereal _starpu_slantr_(char *norm, char *uplo, char *diag, integer *m, integer *n, + real *a, integer *lda, real *work); + +/* Subroutine */ int _starpu_slanv2_(real *a, real *b, real *c__, real *d__, real * + rt1r, real *rt1i, real *rt2r, real *rt2i, real *cs, real *sn); + +/* Subroutine */ int _starpu_slapll_(integer *n, real *x, integer *incx, real *y, + integer *incy, real *ssmin); + +/* Subroutine */ int _starpu_slapmt_(logical *forwrd, integer *m, integer *n, real *x, + integer *ldx, integer *k); + +doublereal _starpu_slapy2_(real *x, real *y); + +doublereal _starpu_slapy3_(real *x, real *y, real *z__); + +/* Subroutine */ int _starpu_slaqgb_(integer *m, integer *n, integer *kl, integer *ku, + real *ab, integer *ldab, real *r__, real *c__, real *rowcnd, real * + colcnd, real *amax, char *equed); + +/* Subroutine */ int _starpu_slaqge_(integer *m, integer *n, real *a, integer *lda, + real *r__, real *c__, real *rowcnd, real *colcnd, real *amax, char * + equed); + +/* Subroutine */ int _starpu_slaqp2_(integer *m, integer *n, integer *offset, real *a, + integer *lda, integer *jpvt, real *tau, real *vn1, real *vn2, real * + work); + +/* Subroutine */ int _starpu_slaqps_(integer *m, integer *n, integer *offset, integer + *nb, integer *kb, real *a, integer *lda, integer *jpvt, real *tau, + real *vn1, real *vn2, real *auxv, real *f, integer *ldf); + +/* Subroutine */ int _starpu_slaqr0_(logical *wantt, logical *wantz, integer *n, + integer *ilo, integer *ihi, real *h__, integer *ldh, real *wr, real * + wi, integer *iloz, integer *ihiz, real *z__, integer *ldz, real *work, + integer *lwork, integer *info); + +/* Subroutine */ int _starpu_slaqr1_(integer *n, real *h__, integer *ldh, real *sr1, + real *si1, real *sr2, real *si2, real *v); + +/* Subroutine */ int _starpu_slaqr2_(logical *wantt, logical *wantz, integer *n, + integer *ktop, integer *kbot, integer *nw, real *h__, integer *ldh, + integer *iloz, integer *ihiz, real *z__, integer *ldz, integer *ns, + integer *nd, real *sr, real *si, real *v, integer *ldv, integer *nh, + real *t, integer *ldt, integer *nv, real *wv, integer *ldwv, real * + work, integer *lwork); + +/* Subroutine */ int _starpu_slaqr3_(logical *wantt, logical *wantz, integer *n, + integer *ktop, integer *kbot, integer *nw, real *h__, integer *ldh, + integer *iloz, integer *ihiz, real *z__, integer *ldz, integer *ns, + integer *nd, real *sr, real *si, real *v, integer *ldv, integer *nh, + real *t, integer *ldt, integer *nv, real *wv, integer *ldwv, real * + work, integer *lwork); + +/* Subroutine */ int _starpu_slaqr4_(logical *wantt, logical *wantz, integer *n, + integer *ilo, integer *ihi, real *h__, integer *ldh, real *wr, real * + wi, integer *iloz, integer *ihiz, real *z__, integer *ldz, real *work, + integer *lwork, integer *info); + +/* Subroutine */ int _starpu_slaqr5_(logical *wantt, logical *wantz, integer *kacc22, + integer *n, integer *ktop, integer *kbot, integer *nshfts, real *sr, + real *si, real *h__, integer *ldh, integer *iloz, integer *ihiz, real + *z__, integer *ldz, real *v, integer *ldv, real *u, integer *ldu, + integer *nv, real *wv, integer *ldwv, integer *nh, real *wh, integer * + ldwh); + +/* Subroutine */ int _starpu_slaqsb_(char *uplo, integer *n, integer *kd, real *ab, + integer *ldab, real *s, real *scond, real *amax, char *equed); + +/* Subroutine */ int _starpu_slaqsp_(char *uplo, integer *n, real *ap, real *s, real * + scond, real *amax, char *equed); + +/* Subroutine */ int _starpu_slaqsy_(char *uplo, integer *n, real *a, integer *lda, + real *s, real *scond, real *amax, char *equed); + +/* Subroutine */ int _starpu_slaqtr_(logical *ltran, logical *lreal, integer *n, real + *t, integer *ldt, real *b, real *w, real *scale, real *x, real *work, + integer *info); + +/* Subroutine */ int _starpu_slar1v_(integer *n, integer *b1, integer *bn, real * + lambda, real *d__, real *l, real *ld, real *lld, real *pivmin, real * + gaptol, real *z__, logical *wantnc, integer *negcnt, real *ztz, real * + mingma, integer *r__, integer *isuppz, real *nrminv, real *resid, + real *rqcorr, real *work); + +/* Subroutine */ int _starpu_slar2v_(integer *n, real *x, real *y, real *z__, integer + *incx, real *c__, real *s, integer *incc); + +/* Subroutine */ int _starpu_slarf_(char *side, integer *m, integer *n, real *v, + integer *incv, real *tau, real *c__, integer *ldc, real *work); + +/* Subroutine */ int _starpu_slarfb_(char *side, char *trans, char *direct, char * + storev, integer *m, integer *n, integer *k, real *v, integer *ldv, + real *t, integer *ldt, real *c__, integer *ldc, real *work, integer * + ldwork); + +/* Subroutine */ int _starpu_slarfg_(integer *n, real *alpha, real *x, integer *incx, + real *tau); + +/* Subroutine */ int _starpu_slarfp_(integer *n, real *alpha, real *x, integer *incx, + real *tau); + +/* Subroutine */ int _starpu_slarft_(char *direct, char *storev, integer *n, integer * + k, real *v, integer *ldv, real *tau, real *t, integer *ldt); + +/* Subroutine */ int _starpu_slarfx_(char *side, integer *m, integer *n, real *v, + real *tau, real *c__, integer *ldc, real *work); + +/* Subroutine */ int _starpu_slargv_(integer *n, real *x, integer *incx, real *y, + integer *incy, real *c__, integer *incc); + +/* Subroutine */ int _starpu_slarnv_(integer *idist, integer *iseed, integer *n, real + *x); + +/* Subroutine */ int _starpu_slarra_(integer *n, real *d__, real *e, real *e2, real * + spltol, real *tnrm, integer *nsplit, integer *isplit, integer *info); + +/* Subroutine */ int _starpu_slarrb_(integer *n, real *d__, real *lld, integer * + ifirst, integer *ilast, real *rtol1, real *rtol2, integer *offset, + real *w, real *wgap, real *werr, real *work, integer *iwork, real * + pivmin, real *spdiam, integer *twist, integer *info); + +/* Subroutine */ int _starpu_slarrc_(char *jobt, integer *n, real *vl, real *vu, real + *d__, real *e, real *pivmin, integer *eigcnt, integer *lcnt, integer * + rcnt, integer *info); + +/* Subroutine */ int _starpu_slarrd_(char *range, char *order, integer *n, real *vl, + real *vu, integer *il, integer *iu, real *gers, real *reltol, real * + d__, real *e, real *e2, real *pivmin, integer *nsplit, integer * + isplit, integer *m, real *w, real *werr, real *wl, real *wu, integer * + iblock, integer *indexw, real *work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_slarre_(char *range, integer *n, real *vl, real *vu, + integer *il, integer *iu, real *d__, real *e, real *e2, real *rtol1, + real *rtol2, real *spltol, integer *nsplit, integer *isplit, integer * + m, real *w, real *werr, real *wgap, integer *iblock, integer *indexw, + real *gers, real *pivmin, real *work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_slarrf_(integer *n, real *d__, real *l, real *ld, + integer *clstrt, integer *clend, real *w, real *wgap, real *werr, + real *spdiam, real *clgapl, real *clgapr, real *pivmin, real *sigma, + real *dplus, real *lplus, real *work, integer *info); + +/* Subroutine */ int _starpu_slarrj_(integer *n, real *d__, real *e2, integer *ifirst, + integer *ilast, real *rtol, integer *offset, real *w, real *werr, + real *work, integer *iwork, real *pivmin, real *spdiam, integer *info); + +/* Subroutine */ int _starpu_slarrk_(integer *n, integer *iw, real *gl, real *gu, + real *d__, real *e2, real *pivmin, real *reltol, real *w, real *werr, + integer *info); + +/* Subroutine */ int _starpu_slarrr_(integer *n, real *d__, real *e, integer *info); + +/* Subroutine */ int _starpu_slarrv_(integer *n, real *vl, real *vu, real *d__, real * + l, real *pivmin, integer *isplit, integer *m, integer *dol, integer * + dou, real *minrgp, real *rtol1, real *rtol2, real *w, real *werr, + real *wgap, integer *iblock, integer *indexw, real *gers, real *z__, + integer *ldz, integer *isuppz, real *work, integer *iwork, integer * + info); + +/* Subroutine */ int _starpu_slarscl2_(integer *m, integer *n, real *d__, real *x, + integer *ldx); + +/* Subroutine */ int _starpu_slartg_(real *f, real *g, real *cs, real *sn, real *r__); + +/* Subroutine */ int _starpu_slartv_(integer *n, real *x, integer *incx, real *y, + integer *incy, real *c__, real *s, integer *incc); + +/* Subroutine */ int _starpu_slaruv_(integer *iseed, integer *n, real *x); + +/* Subroutine */ int _starpu_slarz_(char *side, integer *m, integer *n, integer *l, + real *v, integer *incv, real *tau, real *c__, integer *ldc, real * + work); + +/* Subroutine */ int _starpu_slarzb_(char *side, char *trans, char *direct, char * + storev, integer *m, integer *n, integer *k, integer *l, real *v, + integer *ldv, real *t, integer *ldt, real *c__, integer *ldc, real * + work, integer *ldwork); + +/* Subroutine */ int _starpu_slarzt_(char *direct, char *storev, integer *n, integer * + k, real *v, integer *ldv, real *tau, real *t, integer *ldt); + +/* Subroutine */ int _starpu_slas2_(real *f, real *g, real *h__, real *ssmin, real * + ssmax); + +/* Subroutine */ int _starpu_slascl_(char *type__, integer *kl, integer *ku, real * + cfrom, real *cto, integer *m, integer *n, real *a, integer *lda, + integer *info); + +/* Subroutine */ int _starpu_slascl2_(integer *m, integer *n, real *d__, real *x, + integer *ldx); + +/* Subroutine */ int _starpu_slasd0_(integer *n, integer *sqre, real *d__, real *e, + real *u, integer *ldu, real *vt, integer *ldvt, integer *smlsiz, + integer *iwork, real *work, integer *info); + +/* Subroutine */ int _starpu_slasd1_(integer *nl, integer *nr, integer *sqre, real * + d__, real *alpha, real *beta, real *u, integer *ldu, real *vt, + integer *ldvt, integer *idxq, integer *iwork, real *work, integer * + info); + +/* Subroutine */ int _starpu_slasd2_(integer *nl, integer *nr, integer *sqre, integer + *k, real *d__, real *z__, real *alpha, real *beta, real *u, integer * + ldu, real *vt, integer *ldvt, real *dsigma, real *u2, integer *ldu2, + real *vt2, integer *ldvt2, integer *idxp, integer *idx, integer *idxc, + integer *idxq, integer *coltyp, integer *info); + +/* Subroutine */ int _starpu_slasd3_(integer *nl, integer *nr, integer *sqre, integer + *k, real *d__, real *q, integer *ldq, real *dsigma, real *u, integer * + ldu, real *u2, integer *ldu2, real *vt, integer *ldvt, real *vt2, + integer *ldvt2, integer *idxc, integer *ctot, real *z__, integer * + info); + +/* Subroutine */ int _starpu_slasd4_(integer *n, integer *i__, real *d__, real *z__, + real *delta, real *rho, real *sigma, real *work, integer *info); + +/* Subroutine */ int _starpu_slasd5_(integer *i__, real *d__, real *z__, real *delta, + real *rho, real *dsigma, real *work); + +/* Subroutine */ int _starpu_slasd6_(integer *icompq, integer *nl, integer *nr, + integer *sqre, real *d__, real *vf, real *vl, real *alpha, real *beta, + integer *idxq, integer *perm, integer *givptr, integer *givcol, + integer *ldgcol, real *givnum, integer *ldgnum, real *poles, real * + difl, real *difr, real *z__, integer *k, real *c__, real *s, real * + work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_slasd7_(integer *icompq, integer *nl, integer *nr, + integer *sqre, integer *k, real *d__, real *z__, real *zw, real *vf, + real *vfw, real *vl, real *vlw, real *alpha, real *beta, real *dsigma, + integer *idx, integer *idxp, integer *idxq, integer *perm, integer * + givptr, integer *givcol, integer *ldgcol, real *givnum, integer * + ldgnum, real *c__, real *s, integer *info); + +/* Subroutine */ int _starpu_slasd8_(integer *icompq, integer *k, real *d__, real * + z__, real *vf, real *vl, real *difl, real *difr, integer *lddifr, + real *dsigma, real *work, integer *info); + +/* Subroutine */ int _starpu_slasda_(integer *icompq, integer *smlsiz, integer *n, + integer *sqre, real *d__, real *e, real *u, integer *ldu, real *vt, + integer *k, real *difl, real *difr, real *z__, real *poles, integer * + givptr, integer *givcol, integer *ldgcol, integer *perm, real *givnum, + real *c__, real *s, real *work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_slasdq_(char *uplo, integer *sqre, integer *n, integer * + ncvt, integer *nru, integer *ncc, real *d__, real *e, real *vt, + integer *ldvt, real *u, integer *ldu, real *c__, integer *ldc, real * + work, integer *info); + +/* Subroutine */ int _starpu_slasdt_(integer *n, integer *lvl, integer *nd, integer * + inode, integer *ndiml, integer *ndimr, integer *msub); + +/* Subroutine */ int _starpu_slaset_(char *uplo, integer *m, integer *n, real *alpha, + real *beta, real *a, integer *lda); + +/* Subroutine */ int _starpu_slasq1_(integer *n, real *d__, real *e, real *work, + integer *info); + +/* Subroutine */ int _starpu_slasq2_(integer *n, real *z__, integer *info); + +/* Subroutine */ int _starpu_slasq3_(integer *i0, integer *n0, real *z__, integer *pp, + real *dmin__, real *sigma, real *desig, real *qmax, integer *nfail, + integer *iter, integer *ndiv, logical *ieee, integer *ttype, real * + dmin1, real *dmin2, real *dn, real *dn1, real *dn2, real *g, real * + tau); + +/* Subroutine */ int _starpu_slasq4_(integer *i0, integer *n0, real *z__, integer *pp, + integer *n0in, real *dmin__, real *dmin1, real *dmin2, real *dn, + real *dn1, real *dn2, real *tau, integer *ttype, real *g); + +/* Subroutine */ int _starpu_slasq5_(integer *i0, integer *n0, real *z__, integer *pp, + real *tau, real *dmin__, real *dmin1, real *dmin2, real *dn, real * + dnm1, real *dnm2, logical *ieee); + +/* Subroutine */ int _starpu_slasq6_(integer *i0, integer *n0, real *z__, integer *pp, + real *dmin__, real *dmin1, real *dmin2, real *dn, real *dnm1, real * + dnm2); + +/* Subroutine */ int _starpu_slasr_(char *side, char *pivot, char *direct, integer *m, + integer *n, real *c__, real *s, real *a, integer *lda); + +/* Subroutine */ int _starpu_slasrt_(char *id, integer *n, real *d__, integer *info); + +/* Subroutine */ int _starpu_slassq_(integer *n, real *x, integer *incx, real *scale, + real *sumsq); + +/* Subroutine */ int _starpu_slasv2_(real *f, real *g, real *h__, real *ssmin, real * + ssmax, real *snr, real *csr, real *snl, real *csl); + +/* Subroutine */ int _starpu_slaswp_(integer *n, real *a, integer *lda, integer *k1, + integer *k2, integer *ipiv, integer *incx); + +/* Subroutine */ int _starpu_slasy2_(logical *ltranl, logical *ltranr, integer *isgn, + integer *n1, integer *n2, real *tl, integer *ldtl, real *tr, integer * + ldtr, real *b, integer *ldb, real *scale, real *x, integer *ldx, real + *xnorm, integer *info); + +/* Subroutine */ int _starpu_slasyf_(char *uplo, integer *n, integer *nb, integer *kb, + real *a, integer *lda, integer *ipiv, real *w, integer *ldw, integer + *info); + +/* Subroutine */ int _starpu_slatbs_(char *uplo, char *trans, char *diag, char * + normin, integer *n, integer *kd, real *ab, integer *ldab, real *x, + real *scale, real *cnorm, integer *info); + +/* Subroutine */ int _starpu_slatdf_(integer *ijob, integer *n, real *z__, integer * + ldz, real *rhs, real *rdsum, real *rdscal, integer *ipiv, integer * + jpiv); + +/* Subroutine */ int _starpu_slatps_(char *uplo, char *trans, char *diag, char * + normin, integer *n, real *ap, real *x, real *scale, real *cnorm, + integer *info); + +/* Subroutine */ int _starpu_slatrd_(char *uplo, integer *n, integer *nb, real *a, + integer *lda, real *e, real *tau, real *w, integer *ldw); + +/* Subroutine */ int _starpu_slatrs_(char *uplo, char *trans, char *diag, char * + normin, integer *n, real *a, integer *lda, real *x, real *scale, real + *cnorm, integer *info); + +/* Subroutine */ int _starpu_slatrz_(integer *m, integer *n, integer *l, real *a, + integer *lda, real *tau, real *work); + +/* Subroutine */ int _starpu_slatzm_(char *side, integer *m, integer *n, real *v, + integer *incv, real *tau, real *c1, real *c2, integer *ldc, real * + work); + +/* Subroutine */ int _starpu_slauu2_(char *uplo, integer *n, real *a, integer *lda, + integer *info); + +/* Subroutine */ int _starpu_slauum_(char *uplo, integer *n, real *a, integer *lda, + integer *info); + +/* Subroutine */ int _starpu_sopgtr_(char *uplo, integer *n, real *ap, real *tau, + real *q, integer *ldq, real *work, integer *info); + +/* Subroutine */ int _starpu_sopmtr_(char *side, char *uplo, char *trans, integer *m, + integer *n, real *ap, real *tau, real *c__, integer *ldc, real *work, + integer *info); + +/* Subroutine */ int _starpu_sorg2l_(integer *m, integer *n, integer *k, real *a, + integer *lda, real *tau, real *work, integer *info); + +/* Subroutine */ int _starpu_sorg2r_(integer *m, integer *n, integer *k, real *a, + integer *lda, real *tau, real *work, integer *info); + +/* Subroutine */ int _starpu_sorgbr_(char *vect, integer *m, integer *n, integer *k, + real *a, integer *lda, real *tau, real *work, integer *lwork, integer + *info); + +/* Subroutine */ int _starpu_sorghr_(integer *n, integer *ilo, integer *ihi, real *a, + integer *lda, real *tau, real *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_sorgl2_(integer *m, integer *n, integer *k, real *a, + integer *lda, real *tau, real *work, integer *info); + +/* Subroutine */ int _starpu_sorglq_(integer *m, integer *n, integer *k, real *a, + integer *lda, real *tau, real *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_sorgql_(integer *m, integer *n, integer *k, real *a, + integer *lda, real *tau, real *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_sorgqr_(integer *m, integer *n, integer *k, real *a, + integer *lda, real *tau, real *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_sorgr2_(integer *m, integer *n, integer *k, real *a, + integer *lda, real *tau, real *work, integer *info); + +/* Subroutine */ int _starpu_sorgrq_(integer *m, integer *n, integer *k, real *a, + integer *lda, real *tau, real *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_sorgtr_(char *uplo, integer *n, real *a, integer *lda, + real *tau, real *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_sorm2l_(char *side, char *trans, integer *m, integer *n, + integer *k, real *a, integer *lda, real *tau, real *c__, integer *ldc, + real *work, integer *info); + +/* Subroutine */ int _starpu_sorm2r_(char *side, char *trans, integer *m, integer *n, + integer *k, real *a, integer *lda, real *tau, real *c__, integer *ldc, + real *work, integer *info); + +/* Subroutine */ int _starpu_sormbr_(char *vect, char *side, char *trans, integer *m, + integer *n, integer *k, real *a, integer *lda, real *tau, real *c__, + integer *ldc, real *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_sormhr_(char *side, char *trans, integer *m, integer *n, + integer *ilo, integer *ihi, real *a, integer *lda, real *tau, real * + c__, integer *ldc, real *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_sorml2_(char *side, char *trans, integer *m, integer *n, + integer *k, real *a, integer *lda, real *tau, real *c__, integer *ldc, + real *work, integer *info); + +/* Subroutine */ int _starpu_sormlq_(char *side, char *trans, integer *m, integer *n, + integer *k, real *a, integer *lda, real *tau, real *c__, integer *ldc, + real *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_sormql_(char *side, char *trans, integer *m, integer *n, + integer *k, real *a, integer *lda, real *tau, real *c__, integer *ldc, + real *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_sormqr_(char *side, char *trans, integer *m, integer *n, + integer *k, real *a, integer *lda, real *tau, real *c__, integer *ldc, + real *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_sormr2_(char *side, char *trans, integer *m, integer *n, + integer *k, real *a, integer *lda, real *tau, real *c__, integer *ldc, + real *work, integer *info); + +/* Subroutine */ int _starpu_sormr3_(char *side, char *trans, integer *m, integer *n, + integer *k, integer *l, real *a, integer *lda, real *tau, real *c__, + integer *ldc, real *work, integer *info); + +/* Subroutine */ int _starpu_sormrq_(char *side, char *trans, integer *m, integer *n, + integer *k, real *a, integer *lda, real *tau, real *c__, integer *ldc, + real *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_sormrz_(char *side, char *trans, integer *m, integer *n, + integer *k, integer *l, real *a, integer *lda, real *tau, real *c__, + integer *ldc, real *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_sormtr_(char *side, char *uplo, char *trans, integer *m, + integer *n, real *a, integer *lda, real *tau, real *c__, integer *ldc, + real *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_spbcon_(char *uplo, integer *n, integer *kd, real *ab, + integer *ldab, real *anorm, real *rcond, real *work, integer *iwork, + integer *info); + +/* Subroutine */ int _starpu_spbequ_(char *uplo, integer *n, integer *kd, real *ab, + integer *ldab, real *s, real *scond, real *amax, integer *info); + +/* Subroutine */ int _starpu_spbrfs_(char *uplo, integer *n, integer *kd, integer * + nrhs, real *ab, integer *ldab, real *afb, integer *ldafb, real *b, + integer *ldb, real *x, integer *ldx, real *ferr, real *berr, real * + work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_spbstf_(char *uplo, integer *n, integer *kd, real *ab, + integer *ldab, integer *info); + +/* Subroutine */ int _starpu_spbsv_(char *uplo, integer *n, integer *kd, integer * + nrhs, real *ab, integer *ldab, real *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_spbsvx_(char *fact, char *uplo, integer *n, integer *kd, + integer *nrhs, real *ab, integer *ldab, real *afb, integer *ldafb, + char *equed, real *s, real *b, integer *ldb, real *x, integer *ldx, + real *rcond, real *ferr, real *berr, real *work, integer *iwork, + integer *info); + +/* Subroutine */ int _starpu_spbtf2_(char *uplo, integer *n, integer *kd, real *ab, + integer *ldab, integer *info); + +/* Subroutine */ int _starpu_spbtrf_(char *uplo, integer *n, integer *kd, real *ab, + integer *ldab, integer *info); + +/* Subroutine */ int _starpu_spbtrs_(char *uplo, integer *n, integer *kd, integer * + nrhs, real *ab, integer *ldab, real *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_spftrf_(char *transr, char *uplo, integer *n, real *a, + integer *info); + +/* Subroutine */ int _starpu_spftri_(char *transr, char *uplo, integer *n, real *a, + integer *info); + +/* Subroutine */ int _starpu_spftrs_(char *transr, char *uplo, integer *n, integer * + nrhs, real *a, real *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_spocon_(char *uplo, integer *n, real *a, integer *lda, + real *anorm, real *rcond, real *work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_spoequ_(integer *n, real *a, integer *lda, real *s, real + *scond, real *amax, integer *info); + +/* Subroutine */ int _starpu_spoequb_(integer *n, real *a, integer *lda, real *s, + real *scond, real *amax, integer *info); + +/* Subroutine */ int _starpu_sporfs_(char *uplo, integer *n, integer *nrhs, real *a, + integer *lda, real *af, integer *ldaf, real *b, integer *ldb, real *x, + integer *ldx, real *ferr, real *berr, real *work, integer *iwork, + integer *info); + +/* Subroutine */ int _starpu_sporfsx_(char *uplo, char *equed, integer *n, integer * + nrhs, real *a, integer *lda, real *af, integer *ldaf, real *s, real * + b, integer *ldb, real *x, integer *ldx, real *rcond, real *berr, + integer *n_err_bnds__, real *err_bnds_norm__, real *err_bnds_comp__, + integer *nparams, real *params, real *work, integer *iwork, integer * + info); + +/* Subroutine */ int _starpu_sposv_(char *uplo, integer *n, integer *nrhs, real *a, + integer *lda, real *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_sposvx_(char *fact, char *uplo, integer *n, integer * + nrhs, real *a, integer *lda, real *af, integer *ldaf, char *equed, + real *s, real *b, integer *ldb, real *x, integer *ldx, real *rcond, + real *ferr, real *berr, real *work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_sposvxx_(char *fact, char *uplo, integer *n, integer * + nrhs, real *a, integer *lda, real *af, integer *ldaf, char *equed, + real *s, real *b, integer *ldb, real *x, integer *ldx, real *rcond, + real *rpvgrw, real *berr, integer *n_err_bnds__, real * + err_bnds_norm__, real *err_bnds_comp__, integer *nparams, real * + params, real *work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_spotf2_(char *uplo, integer *n, real *a, integer *lda, + integer *info); + +/* Subroutine */ int _starpu_spotrf_(char *uplo, integer *n, real *a, integer *lda, + integer *info); + +/* Subroutine */ int _starpu_spotri_(char *uplo, integer *n, real *a, integer *lda, + integer *info); + +/* Subroutine */ int _starpu_spotrs_(char *uplo, integer *n, integer *nrhs, real *a, + integer *lda, real *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_sppcon_(char *uplo, integer *n, real *ap, real *anorm, + real *rcond, real *work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_sppequ_(char *uplo, integer *n, real *ap, real *s, real * + scond, real *amax, integer *info); + +/* Subroutine */ int _starpu_spprfs_(char *uplo, integer *n, integer *nrhs, real *ap, + real *afp, real *b, integer *ldb, real *x, integer *ldx, real *ferr, + real *berr, real *work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_sppsv_(char *uplo, integer *n, integer *nrhs, real *ap, + real *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_sppsvx_(char *fact, char *uplo, integer *n, integer * + nrhs, real *ap, real *afp, char *equed, real *s, real *b, integer * + ldb, real *x, integer *ldx, real *rcond, real *ferr, real *berr, real + *work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_spptrf_(char *uplo, integer *n, real *ap, integer *info); + +/* Subroutine */ int _starpu_spptri_(char *uplo, integer *n, real *ap, integer *info); + +/* Subroutine */ int _starpu_spptrs_(char *uplo, integer *n, integer *nrhs, real *ap, + real *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_spstf2_(char *uplo, integer *n, real *a, integer *lda, + integer *piv, integer *rank, real *tol, real *work, integer *info); + +/* Subroutine */ int _starpu_spstrf_(char *uplo, integer *n, real *a, integer *lda, + integer *piv, integer *rank, real *tol, real *work, integer *info); + +/* Subroutine */ int _starpu_sptcon_(integer *n, real *d__, real *e, real *anorm, + real *rcond, real *work, integer *info); + +/* Subroutine */ int _starpu_spteqr_(char *compz, integer *n, real *d__, real *e, + real *z__, integer *ldz, real *work, integer *info); + +/* Subroutine */ int _starpu_sptrfs_(integer *n, integer *nrhs, real *d__, real *e, + real *df, real *ef, real *b, integer *ldb, real *x, integer *ldx, + real *ferr, real *berr, real *work, integer *info); + +/* Subroutine */ int _starpu_sptsv_(integer *n, integer *nrhs, real *d__, real *e, + real *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_sptsvx_(char *fact, integer *n, integer *nrhs, real *d__, + real *e, real *df, real *ef, real *b, integer *ldb, real *x, integer + *ldx, real *rcond, real *ferr, real *berr, real *work, integer *info); + +/* Subroutine */ int _starpu_spttrf_(integer *n, real *d__, real *e, integer *info); + +/* Subroutine */ int _starpu_spttrs_(integer *n, integer *nrhs, real *d__, real *e, + real *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_sptts2_(integer *n, integer *nrhs, real *d__, real *e, + real *b, integer *ldb); + +/* Subroutine */ int _starpu_srscl_(integer *n, real *sa, real *sx, integer *incx); + +/* Subroutine */ int _starpu_ssbev_(char *jobz, char *uplo, integer *n, integer *kd, + real *ab, integer *ldab, real *w, real *z__, integer *ldz, real *work, + integer *info); + +/* Subroutine */ int _starpu_ssbevd_(char *jobz, char *uplo, integer *n, integer *kd, + real *ab, integer *ldab, real *w, real *z__, integer *ldz, real *work, + integer *lwork, integer *iwork, integer *liwork, integer *info); + +/* Subroutine */ int _starpu_ssbevx_(char *jobz, char *range, char *uplo, integer *n, + integer *kd, real *ab, integer *ldab, real *q, integer *ldq, real *vl, + real *vu, integer *il, integer *iu, real *abstol, integer *m, real * + w, real *z__, integer *ldz, real *work, integer *iwork, integer * + ifail, integer *info); + +/* Subroutine */ int _starpu_ssbgst_(char *vect, char *uplo, integer *n, integer *ka, + integer *kb, real *ab, integer *ldab, real *bb, integer *ldbb, real * + x, integer *ldx, real *work, integer *info); + +/* Subroutine */ int _starpu_ssbgv_(char *jobz, char *uplo, integer *n, integer *ka, + integer *kb, real *ab, integer *ldab, real *bb, integer *ldbb, real * + w, real *z__, integer *ldz, real *work, integer *info); + +/* Subroutine */ int _starpu_ssbgvd_(char *jobz, char *uplo, integer *n, integer *ka, + integer *kb, real *ab, integer *ldab, real *bb, integer *ldbb, real * + w, real *z__, integer *ldz, real *work, integer *lwork, integer * + iwork, integer *liwork, integer *info); + +/* Subroutine */ int _starpu_ssbgvx_(char *jobz, char *range, char *uplo, integer *n, + integer *ka, integer *kb, real *ab, integer *ldab, real *bb, integer * + ldbb, real *q, integer *ldq, real *vl, real *vu, integer *il, integer + *iu, real *abstol, integer *m, real *w, real *z__, integer *ldz, real + *work, integer *iwork, integer *ifail, integer *info); + +/* Subroutine */ int _starpu_ssbtrd_(char *vect, char *uplo, integer *n, integer *kd, + real *ab, integer *ldab, real *d__, real *e, real *q, integer *ldq, + real *work, integer *info); + +/* Subroutine */ int _starpu_ssfrk_(char *transr, char *uplo, char *trans, integer *n, + integer *k, real *alpha, real *a, integer *lda, real *beta, real * + c__); + +/* Subroutine */ int _starpu_sspcon_(char *uplo, integer *n, real *ap, integer *ipiv, + real *anorm, real *rcond, real *work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_sspev_(char *jobz, char *uplo, integer *n, real *ap, + real *w, real *z__, integer *ldz, real *work, integer *info); + +/* Subroutine */ int _starpu_sspevd_(char *jobz, char *uplo, integer *n, real *ap, + real *w, real *z__, integer *ldz, real *work, integer *lwork, integer + *iwork, integer *liwork, integer *info); + +/* Subroutine */ int _starpu_sspevx_(char *jobz, char *range, char *uplo, integer *n, + real *ap, real *vl, real *vu, integer *il, integer *iu, real *abstol, + integer *m, real *w, real *z__, integer *ldz, real *work, integer * + iwork, integer *ifail, integer *info); + +/* Subroutine */ int _starpu_sspgst_(integer *itype, char *uplo, integer *n, real *ap, + real *bp, integer *info); + +/* Subroutine */ int _starpu_sspgv_(integer *itype, char *jobz, char *uplo, integer * + n, real *ap, real *bp, real *w, real *z__, integer *ldz, real *work, + integer *info); + +/* Subroutine */ int _starpu_sspgvd_(integer *itype, char *jobz, char *uplo, integer * + n, real *ap, real *bp, real *w, real *z__, integer *ldz, real *work, + integer *lwork, integer *iwork, integer *liwork, integer *info); + +/* Subroutine */ int _starpu_sspgvx_(integer *itype, char *jobz, char *range, char * + uplo, integer *n, real *ap, real *bp, real *vl, real *vu, integer *il, + integer *iu, real *abstol, integer *m, real *w, real *z__, integer * + ldz, real *work, integer *iwork, integer *ifail, integer *info); + +/* Subroutine */ int _starpu_ssprfs_(char *uplo, integer *n, integer *nrhs, real *ap, + real *afp, integer *ipiv, real *b, integer *ldb, real *x, integer * + ldx, real *ferr, real *berr, real *work, integer *iwork, integer * + info); + +/* Subroutine */ int _starpu_sspsv_(char *uplo, integer *n, integer *nrhs, real *ap, + integer *ipiv, real *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_sspsvx_(char *fact, char *uplo, integer *n, integer * + nrhs, real *ap, real *afp, integer *ipiv, real *b, integer *ldb, real + *x, integer *ldx, real *rcond, real *ferr, real *berr, real *work, + integer *iwork, integer *info); + +/* Subroutine */ int _starpu_ssptrd_(char *uplo, integer *n, real *ap, real *d__, + real *e, real *tau, integer *info); + +/* Subroutine */ int _starpu_ssptrf_(char *uplo, integer *n, real *ap, integer *ipiv, + integer *info); + +/* Subroutine */ int _starpu_ssptri_(char *uplo, integer *n, real *ap, integer *ipiv, + real *work, integer *info); + +/* Subroutine */ int _starpu_ssptrs_(char *uplo, integer *n, integer *nrhs, real *ap, + integer *ipiv, real *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_sstebz_(char *range, char *order, integer *n, real *vl, + real *vu, integer *il, integer *iu, real *abstol, real *d__, real *e, + integer *m, integer *nsplit, real *w, integer *iblock, integer * + isplit, real *work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_sstedc_(char *compz, integer *n, real *d__, real *e, + real *z__, integer *ldz, real *work, integer *lwork, integer *iwork, + integer *liwork, integer *info); + +/* Subroutine */ int _starpu_sstegr_(char *jobz, char *range, integer *n, real *d__, + real *e, real *vl, real *vu, integer *il, integer *iu, real *abstol, + integer *m, real *w, real *z__, integer *ldz, integer *isuppz, real * + work, integer *lwork, integer *iwork, integer *liwork, integer *info); + +/* Subroutine */ int _starpu_sstein_(integer *n, real *d__, real *e, integer *m, real + *w, integer *iblock, integer *isplit, real *z__, integer *ldz, real * + work, integer *iwork, integer *ifail, integer *info); + +/* Subroutine */ int _starpu_sstemr_(char *jobz, char *range, integer *n, real *d__, + real *e, real *vl, real *vu, integer *il, integer *iu, integer *m, + real *w, real *z__, integer *ldz, integer *nzc, integer *isuppz, + logical *tryrac, real *work, integer *lwork, integer *iwork, integer * + liwork, integer *info); + +/* Subroutine */ int _starpu_ssteqr_(char *compz, integer *n, real *d__, real *e, + real *z__, integer *ldz, real *work, integer *info); + +/* Subroutine */ int _starpu_ssterf_(integer *n, real *d__, real *e, integer *info); + +/* Subroutine */ int _starpu_sstev_(char *jobz, integer *n, real *d__, real *e, real * + z__, integer *ldz, real *work, integer *info); + +/* Subroutine */ int _starpu_sstevd_(char *jobz, integer *n, real *d__, real *e, real + *z__, integer *ldz, real *work, integer *lwork, integer *iwork, + integer *liwork, integer *info); + +/* Subroutine */ int _starpu_sstevr_(char *jobz, char *range, integer *n, real *d__, + real *e, real *vl, real *vu, integer *il, integer *iu, real *abstol, + integer *m, real *w, real *z__, integer *ldz, integer *isuppz, real * + work, integer *lwork, integer *iwork, integer *liwork, integer *info); + +/* Subroutine */ int _starpu_sstevx_(char *jobz, char *range, integer *n, real *d__, + real *e, real *vl, real *vu, integer *il, integer *iu, real *abstol, + integer *m, real *w, real *z__, integer *ldz, real *work, integer * + iwork, integer *ifail, integer *info); + +/* Subroutine */ int _starpu_ssycon_(char *uplo, integer *n, real *a, integer *lda, + integer *ipiv, real *anorm, real *rcond, real *work, integer *iwork, + integer *info); + +/* Subroutine */ int _starpu_ssyequb_(char *uplo, integer *n, real *a, integer *lda, + real *s, real *scond, real *amax, real *work, integer *info); + +/* Subroutine */ int _starpu_ssyev_(char *jobz, char *uplo, integer *n, real *a, + integer *lda, real *w, real *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_ssyevd_(char *jobz, char *uplo, integer *n, real *a, + integer *lda, real *w, real *work, integer *lwork, integer *iwork, + integer *liwork, integer *info); + +/* Subroutine */ int _starpu_ssyevr_(char *jobz, char *range, char *uplo, integer *n, + real *a, integer *lda, real *vl, real *vu, integer *il, integer *iu, + real *abstol, integer *m, real *w, real *z__, integer *ldz, integer * + isuppz, real *work, integer *lwork, integer *iwork, integer *liwork, + integer *info); + +/* Subroutine */ int _starpu_ssyevx_(char *jobz, char *range, char *uplo, integer *n, + real *a, integer *lda, real *vl, real *vu, integer *il, integer *iu, + real *abstol, integer *m, real *w, real *z__, integer *ldz, real * + work, integer *lwork, integer *iwork, integer *ifail, integer *info); + +/* Subroutine */ int _starpu_ssygs2_(integer *itype, char *uplo, integer *n, real *a, + integer *lda, real *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_ssygst_(integer *itype, char *uplo, integer *n, real *a, + integer *lda, real *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_ssygv_(integer *itype, char *jobz, char *uplo, integer * + n, real *a, integer *lda, real *b, integer *ldb, real *w, real *work, + integer *lwork, integer *info); + +/* Subroutine */ int _starpu_ssygvd_(integer *itype, char *jobz, char *uplo, integer * + n, real *a, integer *lda, real *b, integer *ldb, real *w, real *work, + integer *lwork, integer *iwork, integer *liwork, integer *info); + +/* Subroutine */ int _starpu_ssygvx_(integer *itype, char *jobz, char *range, char * + uplo, integer *n, real *a, integer *lda, real *b, integer *ldb, real * + vl, real *vu, integer *il, integer *iu, real *abstol, integer *m, + real *w, real *z__, integer *ldz, real *work, integer *lwork, integer + *iwork, integer *ifail, integer *info); + +/* Subroutine */ int _starpu_ssyrfs_(char *uplo, integer *n, integer *nrhs, real *a, + integer *lda, real *af, integer *ldaf, integer *ipiv, real *b, + integer *ldb, real *x, integer *ldx, real *ferr, real *berr, real * + work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_ssyrfsx_(char *uplo, char *equed, integer *n, integer * + nrhs, real *a, integer *lda, real *af, integer *ldaf, integer *ipiv, + real *s, real *b, integer *ldb, real *x, integer *ldx, real *rcond, + real *berr, integer *n_err_bnds__, real *err_bnds_norm__, real * + err_bnds_comp__, integer *nparams, real *params, real *work, integer * + iwork, integer *info); + +/* Subroutine */ int _starpu_ssysv_(char *uplo, integer *n, integer *nrhs, real *a, + integer *lda, integer *ipiv, real *b, integer *ldb, real *work, + integer *lwork, integer *info); + +/* Subroutine */ int _starpu_ssysvx_(char *fact, char *uplo, integer *n, integer * + nrhs, real *a, integer *lda, real *af, integer *ldaf, integer *ipiv, + real *b, integer *ldb, real *x, integer *ldx, real *rcond, real *ferr, + real *berr, real *work, integer *lwork, integer *iwork, integer * + info); + +/* Subroutine */ int _starpu_ssysvxx_(char *fact, char *uplo, integer *n, integer * + nrhs, real *a, integer *lda, real *af, integer *ldaf, integer *ipiv, + char *equed, real *s, real *b, integer *ldb, real *x, integer *ldx, + real *rcond, real *rpvgrw, real *berr, integer *n_err_bnds__, real * + err_bnds_norm__, real *err_bnds_comp__, integer *nparams, real * + params, real *work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_ssytd2_(char *uplo, integer *n, real *a, integer *lda, + real *d__, real *e, real *tau, integer *info); + +/* Subroutine */ int _starpu_ssytf2_(char *uplo, integer *n, real *a, integer *lda, + integer *ipiv, integer *info); + +/* Subroutine */ int _starpu_ssytrd_(char *uplo, integer *n, real *a, integer *lda, + real *d__, real *e, real *tau, real *work, integer *lwork, integer * + info); + +/* Subroutine */ int _starpu_ssytrf_(char *uplo, integer *n, real *a, integer *lda, + integer *ipiv, real *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_ssytri_(char *uplo, integer *n, real *a, integer *lda, + integer *ipiv, real *work, integer *info); + +/* Subroutine */ int _starpu_ssytrs_(char *uplo, integer *n, integer *nrhs, real *a, + integer *lda, integer *ipiv, real *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_stbcon_(char *norm, char *uplo, char *diag, integer *n, + integer *kd, real *ab, integer *ldab, real *rcond, real *work, + integer *iwork, integer *info); + +/* Subroutine */ int _starpu_stbrfs_(char *uplo, char *trans, char *diag, integer *n, + integer *kd, integer *nrhs, real *ab, integer *ldab, real *b, integer + *ldb, real *x, integer *ldx, real *ferr, real *berr, real *work, + integer *iwork, integer *info); + +/* Subroutine */ int _starpu_stbtrs_(char *uplo, char *trans, char *diag, integer *n, + integer *kd, integer *nrhs, real *ab, integer *ldab, real *b, integer + *ldb, integer *info); + +/* Subroutine */ int _starpu_stfsm_(char *transr, char *side, char *uplo, char *trans, + char *diag, integer *m, integer *n, real *alpha, real *a, real *b, + integer *ldb); + +/* Subroutine */ int _starpu_stftri_(char *transr, char *uplo, char *diag, integer *n, + real *a, integer *info); + +/* Subroutine */ int _starpu_stfttp_(char *transr, char *uplo, integer *n, real *arf, + real *ap, integer *info); + +/* Subroutine */ int _starpu_stfttr_(char *transr, char *uplo, integer *n, real *arf, + real *a, integer *lda, integer *info); + +/* Subroutine */ int _starpu_stgevc_(char *side, char *howmny, logical *select, + integer *n, real *s, integer *lds, real *p, integer *ldp, real *vl, + integer *ldvl, real *vr, integer *ldvr, integer *mm, integer *m, real + *work, integer *info); + +/* Subroutine */ int _starpu_stgex2_(logical *wantq, logical *wantz, integer *n, real + *a, integer *lda, real *b, integer *ldb, real *q, integer *ldq, real * + z__, integer *ldz, integer *j1, integer *n1, integer *n2, real *work, + integer *lwork, integer *info); + +/* Subroutine */ int _starpu_stgexc_(logical *wantq, logical *wantz, integer *n, real + *a, integer *lda, real *b, integer *ldb, real *q, integer *ldq, real * + z__, integer *ldz, integer *ifst, integer *ilst, real *work, integer * + lwork, integer *info); + +/* Subroutine */ int _starpu_stgsen_(integer *ijob, logical *wantq, logical *wantz, + logical *select, integer *n, real *a, integer *lda, real *b, integer * + ldb, real *alphar, real *alphai, real *beta, real *q, integer *ldq, + real *z__, integer *ldz, integer *m, real *pl, real *pr, real *dif, + real *work, integer *lwork, integer *iwork, integer *liwork, integer * + info); + +/* Subroutine */ int _starpu_stgsja_(char *jobu, char *jobv, char *jobq, integer *m, + integer *p, integer *n, integer *k, integer *l, real *a, integer *lda, + real *b, integer *ldb, real *tola, real *tolb, real *alpha, real * + beta, real *u, integer *ldu, real *v, integer *ldv, real *q, integer * + ldq, real *work, integer *ncycle, integer *info); + +/* Subroutine */ int _starpu_stgsna_(char *job, char *howmny, logical *select, + integer *n, real *a, integer *lda, real *b, integer *ldb, real *vl, + integer *ldvl, real *vr, integer *ldvr, real *s, real *dif, integer * + mm, integer *m, real *work, integer *lwork, integer *iwork, integer * + info); + +/* Subroutine */ int _starpu_stgsy2_(char *trans, integer *ijob, integer *m, integer * + n, real *a, integer *lda, real *b, integer *ldb, real *c__, integer * + ldc, real *d__, integer *ldd, real *e, integer *lde, real *f, integer + *ldf, real *scale, real *rdsum, real *rdscal, integer *iwork, integer + *pq, integer *info); + +/* Subroutine */ int _starpu_stgsyl_(char *trans, integer *ijob, integer *m, integer * + n, real *a, integer *lda, real *b, integer *ldb, real *c__, integer * + ldc, real *d__, integer *ldd, real *e, integer *lde, real *f, integer + *ldf, real *scale, real *dif, real *work, integer *lwork, integer * + iwork, integer *info); + +/* Subroutine */ int _starpu_stpcon_(char *norm, char *uplo, char *diag, integer *n, + real *ap, real *rcond, real *work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_stprfs_(char *uplo, char *trans, char *diag, integer *n, + integer *nrhs, real *ap, real *b, integer *ldb, real *x, integer *ldx, + real *ferr, real *berr, real *work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_stptri_(char *uplo, char *diag, integer *n, real *ap, + integer *info); + +/* Subroutine */ int _starpu_stptrs_(char *uplo, char *trans, char *diag, integer *n, + integer *nrhs, real *ap, real *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_stpttf_(char *transr, char *uplo, integer *n, real *ap, + real *arf, integer *info); + +/* Subroutine */ int _starpu_stpttr_(char *uplo, integer *n, real *ap, real *a, + integer *lda, integer *info); + +/* Subroutine */ int _starpu_strcon_(char *norm, char *uplo, char *diag, integer *n, + real *a, integer *lda, real *rcond, real *work, integer *iwork, + integer *info); + +/* Subroutine */ int _starpu_strevc_(char *side, char *howmny, logical *select, + integer *n, real *t, integer *ldt, real *vl, integer *ldvl, real *vr, + integer *ldvr, integer *mm, integer *m, real *work, integer *info); + +/* Subroutine */ int _starpu_strexc_(char *compq, integer *n, real *t, integer *ldt, + real *q, integer *ldq, integer *ifst, integer *ilst, real *work, + integer *info); + +/* Subroutine */ int _starpu_strrfs_(char *uplo, char *trans, char *diag, integer *n, + integer *nrhs, real *a, integer *lda, real *b, integer *ldb, real *x, + integer *ldx, real *ferr, real *berr, real *work, integer *iwork, + integer *info); + +/* Subroutine */ int _starpu_strsen_(char *job, char *compq, logical *select, integer + *n, real *t, integer *ldt, real *q, integer *ldq, real *wr, real *wi, + integer *m, real *s, real *sep, real *work, integer *lwork, integer * + iwork, integer *liwork, integer *info); + +/* Subroutine */ int _starpu_strsna_(char *job, char *howmny, logical *select, + integer *n, real *t, integer *ldt, real *vl, integer *ldvl, real *vr, + integer *ldvr, real *s, real *sep, integer *mm, integer *m, real * + work, integer *ldwork, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_strsyl_(char *trana, char *tranb, integer *isgn, integer + *m, integer *n, real *a, integer *lda, real *b, integer *ldb, real * + c__, integer *ldc, real *scale, integer *info); + +/* Subroutine */ int _starpu_strti2_(char *uplo, char *diag, integer *n, real *a, + integer *lda, integer *info); + +/* Subroutine */ int _starpu_strtri_(char *uplo, char *diag, integer *n, real *a, + integer *lda, integer *info); + +/* Subroutine */ int _starpu_strtrs_(char *uplo, char *trans, char *diag, integer *n, + integer *nrhs, real *a, integer *lda, real *b, integer *ldb, integer * + info); + +/* Subroutine */ int _starpu_strttf_(char *transr, char *uplo, integer *n, real *a, + integer *lda, real *arf, integer *info); + +/* Subroutine */ int _starpu_strttp_(char *uplo, integer *n, real *a, integer *lda, + real *ap, integer *info); + +/* Subroutine */ int _starpu_stzrqf_(integer *m, integer *n, real *a, integer *lda, + real *tau, integer *info); + +/* Subroutine */ int _starpu_stzrzf_(integer *m, integer *n, real *a, integer *lda, + real *tau, real *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_xerbla_(char *srname, integer *info); + +/* Subroutine */ int _starpu_xerbla_array__(char *srname_array__, integer * + srname_len__, integer *info, ftnlen srname_array_len); + +/* Subroutine */ int _starpu_zbdsqr_(char *uplo, integer *n, integer *ncvt, integer * + nru, integer *ncc, doublereal *d__, doublereal *e, doublecomplex *vt, + integer *ldvt, doublecomplex *u, integer *ldu, doublecomplex *c__, + integer *ldc, doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_zcgesv_(integer *n, integer *nrhs, doublecomplex *a, + integer *lda, integer *ipiv, doublecomplex *b, integer *ldb, + doublecomplex *x, integer *ldx, doublecomplex *work, complex *swork, + doublereal *rwork, integer *iter, integer *info); + +/* Subroutine */ int _starpu_zcposv_(char *uplo, integer *n, integer *nrhs, + doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, + doublecomplex *x, integer *ldx, doublecomplex *work, complex *swork, + doublereal *rwork, integer *iter, integer *info); + +/* Subroutine */ int _starpu_zdrscl_(integer *n, doublereal *sa, doublecomplex *sx, + integer *incx); + +/* Subroutine */ int _starpu_zgbbrd_(char *vect, integer *m, integer *n, integer *ncc, + integer *kl, integer *ku, doublecomplex *ab, integer *ldab, + doublereal *d__, doublereal *e, doublecomplex *q, integer *ldq, + doublecomplex *pt, integer *ldpt, doublecomplex *c__, integer *ldc, + doublecomplex *work, doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_zgbcon_(char *norm, integer *n, integer *kl, integer *ku, + doublecomplex *ab, integer *ldab, integer *ipiv, doublereal *anorm, + doublereal *rcond, doublecomplex *work, doublereal *rwork, integer * + info); + +/* Subroutine */ int _starpu_zgbequ_(integer *m, integer *n, integer *kl, integer *ku, + doublecomplex *ab, integer *ldab, doublereal *r__, doublereal *c__, + doublereal *rowcnd, doublereal *colcnd, doublereal *amax, integer * + info); + +/* Subroutine */ int _starpu_zgbequb_(integer *m, integer *n, integer *kl, integer * + ku, doublecomplex *ab, integer *ldab, doublereal *r__, doublereal * + c__, doublereal *rowcnd, doublereal *colcnd, doublereal *amax, + integer *info); + +/* Subroutine */ int _starpu_zgbrfs_(char *trans, integer *n, integer *kl, integer * + ku, integer *nrhs, doublecomplex *ab, integer *ldab, doublecomplex * + afb, integer *ldafb, integer *ipiv, doublecomplex *b, integer *ldb, + doublecomplex *x, integer *ldx, doublereal *ferr, doublereal *berr, + doublecomplex *work, doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_zgbrfsx_(char *trans, char *equed, integer *n, integer * + kl, integer *ku, integer *nrhs, doublecomplex *ab, integer *ldab, + doublecomplex *afb, integer *ldafb, integer *ipiv, doublereal *r__, + doublereal *c__, doublecomplex *b, integer *ldb, doublecomplex *x, + integer *ldx, doublereal *rcond, doublereal *berr, integer * + n_err_bnds__, doublereal *err_bnds_norm__, doublereal * + err_bnds_comp__, integer *nparams, doublereal *params, doublecomplex * + work, doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_zgbsv_(integer *n, integer *kl, integer *ku, integer * + nrhs, doublecomplex *ab, integer *ldab, integer *ipiv, doublecomplex * + b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_zgbsvx_(char *fact, char *trans, integer *n, integer *kl, + integer *ku, integer *nrhs, doublecomplex *ab, integer *ldab, + doublecomplex *afb, integer *ldafb, integer *ipiv, char *equed, + doublereal *r__, doublereal *c__, doublecomplex *b, integer *ldb, + doublecomplex *x, integer *ldx, doublereal *rcond, doublereal *ferr, + doublereal *berr, doublecomplex *work, doublereal *rwork, integer * + info); + +/* Subroutine */ int _starpu_zgbsvxx_(char *fact, char *trans, integer *n, integer * + kl, integer *ku, integer *nrhs, doublecomplex *ab, integer *ldab, + doublecomplex *afb, integer *ldafb, integer *ipiv, char *equed, + doublereal *r__, doublereal *c__, doublecomplex *b, integer *ldb, + doublecomplex *x, integer *ldx, doublereal *rcond, doublereal *rpvgrw, + doublereal *berr, integer *n_err_bnds__, doublereal *err_bnds_norm__, + doublereal *err_bnds_comp__, integer *nparams, doublereal *params, + doublecomplex *work, doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_zgbtf2_(integer *m, integer *n, integer *kl, integer *ku, + doublecomplex *ab, integer *ldab, integer *ipiv, integer *info); + +/* Subroutine */ int _starpu_zgbtrf_(integer *m, integer *n, integer *kl, integer *ku, + doublecomplex *ab, integer *ldab, integer *ipiv, integer *info); + +/* Subroutine */ int _starpu_zgbtrs_(char *trans, integer *n, integer *kl, integer * + ku, integer *nrhs, doublecomplex *ab, integer *ldab, integer *ipiv, + doublecomplex *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_zgebak_(char *job, char *side, integer *n, integer *ilo, + integer *ihi, doublereal *scale, integer *m, doublecomplex *v, + integer *ldv, integer *info); + +/* Subroutine */ int _starpu_zgebal_(char *job, integer *n, doublecomplex *a, integer + *lda, integer *ilo, integer *ihi, doublereal *scale, integer *info); + +/* Subroutine */ int _starpu_zgebd2_(integer *m, integer *n, doublecomplex *a, + integer *lda, doublereal *d__, doublereal *e, doublecomplex *tauq, + doublecomplex *taup, doublecomplex *work, integer *info); + +/* Subroutine */ int _starpu_zgebrd_(integer *m, integer *n, doublecomplex *a, + integer *lda, doublereal *d__, doublereal *e, doublecomplex *tauq, + doublecomplex *taup, doublecomplex *work, integer *lwork, integer * + info); + +/* Subroutine */ int _starpu_zgecon_(char *norm, integer *n, doublecomplex *a, + integer *lda, doublereal *anorm, doublereal *rcond, doublecomplex * + work, doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_zgeequ_(integer *m, integer *n, doublecomplex *a, + integer *lda, doublereal *r__, doublereal *c__, doublereal *rowcnd, + doublereal *colcnd, doublereal *amax, integer *info); + +/* Subroutine */ int _starpu_zgeequb_(integer *m, integer *n, doublecomplex *a, + integer *lda, doublereal *r__, doublereal *c__, doublereal *rowcnd, + doublereal *colcnd, doublereal *amax, integer *info); + +/* Subroutine */ int _starpu_zgees_(char *jobvs, char *sort, L_fp select, integer *n, + doublecomplex *a, integer *lda, integer *sdim, doublecomplex *w, + doublecomplex *vs, integer *ldvs, doublecomplex *work, integer *lwork, + doublereal *rwork, logical *bwork, integer *info); + +/* Subroutine */ int _starpu_zgeesx_(char *jobvs, char *sort, L_fp select, char * + sense, integer *n, doublecomplex *a, integer *lda, integer *sdim, + doublecomplex *w, doublecomplex *vs, integer *ldvs, doublereal * + rconde, doublereal *rcondv, doublecomplex *work, integer *lwork, + doublereal *rwork, logical *bwork, integer *info); + +/* Subroutine */ int _starpu_zgeev_(char *jobvl, char *jobvr, integer *n, + doublecomplex *a, integer *lda, doublecomplex *w, doublecomplex *vl, + integer *ldvl, doublecomplex *vr, integer *ldvr, doublecomplex *work, + integer *lwork, doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_zgeevx_(char *balanc, char *jobvl, char *jobvr, char * + sense, integer *n, doublecomplex *a, integer *lda, doublecomplex *w, + doublecomplex *vl, integer *ldvl, doublecomplex *vr, integer *ldvr, + integer *ilo, integer *ihi, doublereal *scale, doublereal *abnrm, + doublereal *rconde, doublereal *rcondv, doublecomplex *work, integer * + lwork, doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_zgegs_(char *jobvsl, char *jobvsr, integer *n, + doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, + doublecomplex *alpha, doublecomplex *beta, doublecomplex *vsl, + integer *ldvsl, doublecomplex *vsr, integer *ldvsr, doublecomplex * + work, integer *lwork, doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_zgegv_(char *jobvl, char *jobvr, integer *n, + doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, + doublecomplex *alpha, doublecomplex *beta, doublecomplex *vl, integer + *ldvl, doublecomplex *vr, integer *ldvr, doublecomplex *work, integer + *lwork, doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_zgehd2_(integer *n, integer *ilo, integer *ihi, + doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex * + work, integer *info); + +/* Subroutine */ int _starpu_zgehrd_(integer *n, integer *ilo, integer *ihi, + doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex * + work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_zgelq2_(integer *m, integer *n, doublecomplex *a, + integer *lda, doublecomplex *tau, doublecomplex *work, integer *info); + +/* Subroutine */ int _starpu_zgelqf_(integer *m, integer *n, doublecomplex *a, + integer *lda, doublecomplex *tau, doublecomplex *work, integer *lwork, + integer *info); + +/* Subroutine */ int _starpu_zgels_(char *trans, integer *m, integer *n, integer * + nrhs, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, + doublecomplex *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_zgelsd_(integer *m, integer *n, integer *nrhs, + doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, + doublereal *s, doublereal *rcond, integer *rank, doublecomplex *work, + integer *lwork, doublereal *rwork, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_zgelss_(integer *m, integer *n, integer *nrhs, + doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, + doublereal *s, doublereal *rcond, integer *rank, doublecomplex *work, + integer *lwork, doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_zgelsx_(integer *m, integer *n, integer *nrhs, + doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, + integer *jpvt, doublereal *rcond, integer *rank, doublecomplex *work, + doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_zgelsy_(integer *m, integer *n, integer *nrhs, + doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, + integer *jpvt, doublereal *rcond, integer *rank, doublecomplex *work, + integer *lwork, doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_zgeql2_(integer *m, integer *n, doublecomplex *a, + integer *lda, doublecomplex *tau, doublecomplex *work, integer *info); + +/* Subroutine */ int _starpu_zgeqlf_(integer *m, integer *n, doublecomplex *a, + integer *lda, doublecomplex *tau, doublecomplex *work, integer *lwork, + integer *info); + +/* Subroutine */ int _starpu_zgeqp3_(integer *m, integer *n, doublecomplex *a, + integer *lda, integer *jpvt, doublecomplex *tau, doublecomplex *work, + integer *lwork, doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_zgeqpf_(integer *m, integer *n, doublecomplex *a, + integer *lda, integer *jpvt, doublecomplex *tau, doublecomplex *work, + doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_zgeqr2_(integer *m, integer *n, doublecomplex *a, + integer *lda, doublecomplex *tau, doublecomplex *work, integer *info); + +/* Subroutine */ int _starpu_zgeqrf_(integer *m, integer *n, doublecomplex *a, + integer *lda, doublecomplex *tau, doublecomplex *work, integer *lwork, + integer *info); + +/* Subroutine */ int _starpu_zgerfs_(char *trans, integer *n, integer *nrhs, + doublecomplex *a, integer *lda, doublecomplex *af, integer *ldaf, + integer *ipiv, doublecomplex *b, integer *ldb, doublecomplex *x, + integer *ldx, doublereal *ferr, doublereal *berr, doublecomplex *work, + doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_zgerfsx_(char *trans, char *equed, integer *n, integer * + nrhs, doublecomplex *a, integer *lda, doublecomplex *af, integer * + ldaf, integer *ipiv, doublereal *r__, doublereal *c__, doublecomplex * + b, integer *ldb, doublecomplex *x, integer *ldx, doublereal *rcond, + doublereal *berr, integer *n_err_bnds__, doublereal *err_bnds_norm__, + doublereal *err_bnds_comp__, integer *nparams, doublereal *params, + doublecomplex *work, doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_zgerq2_(integer *m, integer *n, doublecomplex *a, + integer *lda, doublecomplex *tau, doublecomplex *work, integer *info); + +/* Subroutine */ int _starpu_zgerqf_(integer *m, integer *n, doublecomplex *a, + integer *lda, doublecomplex *tau, doublecomplex *work, integer *lwork, + integer *info); + +/* Subroutine */ int _starpu_zgesc2_(integer *n, doublecomplex *a, integer *lda, + doublecomplex *rhs, integer *ipiv, integer *jpiv, doublereal *scale); + +/* Subroutine */ int _starpu_zgesdd_(char *jobz, integer *m, integer *n, + doublecomplex *a, integer *lda, doublereal *s, doublecomplex *u, + integer *ldu, doublecomplex *vt, integer *ldvt, doublecomplex *work, + integer *lwork, doublereal *rwork, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_zgesv_(integer *n, integer *nrhs, doublecomplex *a, + integer *lda, integer *ipiv, doublecomplex *b, integer *ldb, integer * + info); + +/* Subroutine */ int _starpu_zgesvd_(char *jobu, char *jobvt, integer *m, integer *n, + doublecomplex *a, integer *lda, doublereal *s, doublecomplex *u, + integer *ldu, doublecomplex *vt, integer *ldvt, doublecomplex *work, + integer *lwork, doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_zgesvx_(char *fact, char *trans, integer *n, integer * + nrhs, doublecomplex *a, integer *lda, doublecomplex *af, integer * + ldaf, integer *ipiv, char *equed, doublereal *r__, doublereal *c__, + doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, + doublereal *rcond, doublereal *ferr, doublereal *berr, doublecomplex * + work, doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_zgesvxx_(char *fact, char *trans, integer *n, integer * + nrhs, doublecomplex *a, integer *lda, doublecomplex *af, integer * + ldaf, integer *ipiv, char *equed, doublereal *r__, doublereal *c__, + doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, + doublereal *rcond, doublereal *rpvgrw, doublereal *berr, integer * + n_err_bnds__, doublereal *err_bnds_norm__, doublereal * + err_bnds_comp__, integer *nparams, doublereal *params, doublecomplex * + work, doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_zgetc2_(integer *n, doublecomplex *a, integer *lda, + integer *ipiv, integer *jpiv, integer *info); + +/* Subroutine */ int _starpu_zgetf2_(integer *m, integer *n, doublecomplex *a, + integer *lda, integer *ipiv, integer *info); + +/* Subroutine */ int _starpu_zgetrf_(integer *m, integer *n, doublecomplex *a, + integer *lda, integer *ipiv, integer *info); + +/* Subroutine */ int _starpu_zgetri_(integer *n, doublecomplex *a, integer *lda, + integer *ipiv, doublecomplex *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_zgetrs_(char *trans, integer *n, integer *nrhs, + doublecomplex *a, integer *lda, integer *ipiv, doublecomplex *b, + integer *ldb, integer *info); + +/* Subroutine */ int _starpu_zggbak_(char *job, char *side, integer *n, integer *ilo, + integer *ihi, doublereal *lscale, doublereal *rscale, integer *m, + doublecomplex *v, integer *ldv, integer *info); + +/* Subroutine */ int _starpu_zggbal_(char *job, integer *n, doublecomplex *a, integer + *lda, doublecomplex *b, integer *ldb, integer *ilo, integer *ihi, + doublereal *lscale, doublereal *rscale, doublereal *work, integer * + info); + +/* Subroutine */ int _starpu_zgges_(char *jobvsl, char *jobvsr, char *sort, L_fp + selctg, integer *n, doublecomplex *a, integer *lda, doublecomplex *b, + integer *ldb, integer *sdim, doublecomplex *alpha, doublecomplex * + beta, doublecomplex *vsl, integer *ldvsl, doublecomplex *vsr, integer + *ldvsr, doublecomplex *work, integer *lwork, doublereal *rwork, + logical *bwork, integer *info); + +/* Subroutine */ int _starpu_zggesx_(char *jobvsl, char *jobvsr, char *sort, L_fp + selctg, char *sense, integer *n, doublecomplex *a, integer *lda, + doublecomplex *b, integer *ldb, integer *sdim, doublecomplex *alpha, + doublecomplex *beta, doublecomplex *vsl, integer *ldvsl, + doublecomplex *vsr, integer *ldvsr, doublereal *rconde, doublereal * + rcondv, doublecomplex *work, integer *lwork, doublereal *rwork, + integer *iwork, integer *liwork, logical *bwork, integer *info); + +/* Subroutine */ int _starpu_zggev_(char *jobvl, char *jobvr, integer *n, + doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, + doublecomplex *alpha, doublecomplex *beta, doublecomplex *vl, integer + *ldvl, doublecomplex *vr, integer *ldvr, doublecomplex *work, integer + *lwork, doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_zggevx_(char *balanc, char *jobvl, char *jobvr, char * + sense, integer *n, doublecomplex *a, integer *lda, doublecomplex *b, + integer *ldb, doublecomplex *alpha, doublecomplex *beta, + doublecomplex *vl, integer *ldvl, doublecomplex *vr, integer *ldvr, + integer *ilo, integer *ihi, doublereal *lscale, doublereal *rscale, + doublereal *abnrm, doublereal *bbnrm, doublereal *rconde, doublereal * + rcondv, doublecomplex *work, integer *lwork, doublereal *rwork, + integer *iwork, logical *bwork, integer *info); + +/* Subroutine */ int _starpu_zggglm_(integer *n, integer *m, integer *p, + doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, + doublecomplex *d__, doublecomplex *x, doublecomplex *y, doublecomplex + *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_zgghrd_(char *compq, char *compz, integer *n, integer * + ilo, integer *ihi, doublecomplex *a, integer *lda, doublecomplex *b, + integer *ldb, doublecomplex *q, integer *ldq, doublecomplex *z__, + integer *ldz, integer *info); + +/* Subroutine */ int _starpu_zgglse_(integer *m, integer *n, integer *p, + doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, + doublecomplex *c__, doublecomplex *d__, doublecomplex *x, + doublecomplex *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_zggqrf_(integer *n, integer *m, integer *p, + doublecomplex *a, integer *lda, doublecomplex *taua, doublecomplex *b, + integer *ldb, doublecomplex *taub, doublecomplex *work, integer * + lwork, integer *info); + +/* Subroutine */ int _starpu_zggrqf_(integer *m, integer *p, integer *n, + doublecomplex *a, integer *lda, doublecomplex *taua, doublecomplex *b, + integer *ldb, doublecomplex *taub, doublecomplex *work, integer * + lwork, integer *info); + +/* Subroutine */ int _starpu_zggsvd_(char *jobu, char *jobv, char *jobq, integer *m, + integer *n, integer *p, integer *k, integer *l, doublecomplex *a, + integer *lda, doublecomplex *b, integer *ldb, doublereal *alpha, + doublereal *beta, doublecomplex *u, integer *ldu, doublecomplex *v, + integer *ldv, doublecomplex *q, integer *ldq, doublecomplex *work, + doublereal *rwork, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_zggsvp_(char *jobu, char *jobv, char *jobq, integer *m, + integer *p, integer *n, doublecomplex *a, integer *lda, doublecomplex + *b, integer *ldb, doublereal *tola, doublereal *tolb, integer *k, + integer *l, doublecomplex *u, integer *ldu, doublecomplex *v, integer + *ldv, doublecomplex *q, integer *ldq, integer *iwork, doublereal * + rwork, doublecomplex *tau, doublecomplex *work, integer *info); + +/* Subroutine */ int _starpu_zgtcon_(char *norm, integer *n, doublecomplex *dl, + doublecomplex *d__, doublecomplex *du, doublecomplex *du2, integer * + ipiv, doublereal *anorm, doublereal *rcond, doublecomplex *work, + integer *info); + +/* Subroutine */ int _starpu_zgtrfs_(char *trans, integer *n, integer *nrhs, + doublecomplex *dl, doublecomplex *d__, doublecomplex *du, + doublecomplex *dlf, doublecomplex *df, doublecomplex *duf, + doublecomplex *du2, integer *ipiv, doublecomplex *b, integer *ldb, + doublecomplex *x, integer *ldx, doublereal *ferr, doublereal *berr, + doublecomplex *work, doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_zgtsv_(integer *n, integer *nrhs, doublecomplex *dl, + doublecomplex *d__, doublecomplex *du, doublecomplex *b, integer *ldb, + integer *info); + +/* Subroutine */ int _starpu_zgtsvx_(char *fact, char *trans, integer *n, integer * + nrhs, doublecomplex *dl, doublecomplex *d__, doublecomplex *du, + doublecomplex *dlf, doublecomplex *df, doublecomplex *duf, + doublecomplex *du2, integer *ipiv, doublecomplex *b, integer *ldb, + doublecomplex *x, integer *ldx, doublereal *rcond, doublereal *ferr, + doublereal *berr, doublecomplex *work, doublereal *rwork, integer * + info); + +/* Subroutine */ int _starpu_zgttrf_(integer *n, doublecomplex *dl, doublecomplex * + d__, doublecomplex *du, doublecomplex *du2, integer *ipiv, integer * + info); + +/* Subroutine */ int _starpu_zgttrs_(char *trans, integer *n, integer *nrhs, + doublecomplex *dl, doublecomplex *d__, doublecomplex *du, + doublecomplex *du2, integer *ipiv, doublecomplex *b, integer *ldb, + integer *info); + +/* Subroutine */ int _starpu_zgtts2_(integer *itrans, integer *n, integer *nrhs, + doublecomplex *dl, doublecomplex *d__, doublecomplex *du, + doublecomplex *du2, integer *ipiv, doublecomplex *b, integer *ldb); + +/* Subroutine */ int _starpu_zhbev_(char *jobz, char *uplo, integer *n, integer *kd, + doublecomplex *ab, integer *ldab, doublereal *w, doublecomplex *z__, + integer *ldz, doublecomplex *work, doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_zhbevd_(char *jobz, char *uplo, integer *n, integer *kd, + doublecomplex *ab, integer *ldab, doublereal *w, doublecomplex *z__, + integer *ldz, doublecomplex *work, integer *lwork, doublereal *rwork, + integer *lrwork, integer *iwork, integer *liwork, integer *info); + +/* Subroutine */ int _starpu_zhbevx_(char *jobz, char *range, char *uplo, integer *n, + integer *kd, doublecomplex *ab, integer *ldab, doublecomplex *q, + integer *ldq, doublereal *vl, doublereal *vu, integer *il, integer * + iu, doublereal *abstol, integer *m, doublereal *w, doublecomplex *z__, + integer *ldz, doublecomplex *work, doublereal *rwork, integer *iwork, + integer *ifail, integer *info); + +/* Subroutine */ int _starpu_zhbgst_(char *vect, char *uplo, integer *n, integer *ka, + integer *kb, doublecomplex *ab, integer *ldab, doublecomplex *bb, + integer *ldbb, doublecomplex *x, integer *ldx, doublecomplex *work, + doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_zhbgv_(char *jobz, char *uplo, integer *n, integer *ka, + integer *kb, doublecomplex *ab, integer *ldab, doublecomplex *bb, + integer *ldbb, doublereal *w, doublecomplex *z__, integer *ldz, + doublecomplex *work, doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_zhbgvd_(char *jobz, char *uplo, integer *n, integer *ka, + integer *kb, doublecomplex *ab, integer *ldab, doublecomplex *bb, + integer *ldbb, doublereal *w, doublecomplex *z__, integer *ldz, + doublecomplex *work, integer *lwork, doublereal *rwork, integer * + lrwork, integer *iwork, integer *liwork, integer *info); + +/* Subroutine */ int _starpu_zhbgvx_(char *jobz, char *range, char *uplo, integer *n, + integer *ka, integer *kb, doublecomplex *ab, integer *ldab, + doublecomplex *bb, integer *ldbb, doublecomplex *q, integer *ldq, + doublereal *vl, doublereal *vu, integer *il, integer *iu, doublereal * + abstol, integer *m, doublereal *w, doublecomplex *z__, integer *ldz, + doublecomplex *work, doublereal *rwork, integer *iwork, integer * + ifail, integer *info); + +/* Subroutine */ int _starpu_zhbtrd_(char *vect, char *uplo, integer *n, integer *kd, + doublecomplex *ab, integer *ldab, doublereal *d__, doublereal *e, + doublecomplex *q, integer *ldq, doublecomplex *work, integer *info); + +/* Subroutine */ int _starpu_zhecon_(char *uplo, integer *n, doublecomplex *a, + integer *lda, integer *ipiv, doublereal *anorm, doublereal *rcond, + doublecomplex *work, integer *info); + +/* Subroutine */ int _starpu_zheequb_(char *uplo, integer *n, doublecomplex *a, + integer *lda, doublereal *s, doublereal *scond, doublereal *amax, + doublecomplex *work, integer *info); + +/* Subroutine */ int _starpu_zheev_(char *jobz, char *uplo, integer *n, doublecomplex + *a, integer *lda, doublereal *w, doublecomplex *work, integer *lwork, + doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_zheevd_(char *jobz, char *uplo, integer *n, + doublecomplex *a, integer *lda, doublereal *w, doublecomplex *work, + integer *lwork, doublereal *rwork, integer *lrwork, integer *iwork, + integer *liwork, integer *info); + +/* Subroutine */ int _starpu_zheevr_(char *jobz, char *range, char *uplo, integer *n, + doublecomplex *a, integer *lda, doublereal *vl, doublereal *vu, + integer *il, integer *iu, doublereal *abstol, integer *m, doublereal * + w, doublecomplex *z__, integer *ldz, integer *isuppz, doublecomplex * + work, integer *lwork, doublereal *rwork, integer *lrwork, integer * + iwork, integer *liwork, integer *info); + +/* Subroutine */ int _starpu_zheevx_(char *jobz, char *range, char *uplo, integer *n, + doublecomplex *a, integer *lda, doublereal *vl, doublereal *vu, + integer *il, integer *iu, doublereal *abstol, integer *m, doublereal * + w, doublecomplex *z__, integer *ldz, doublecomplex *work, integer * + lwork, doublereal *rwork, integer *iwork, integer *ifail, integer * + info); + +/* Subroutine */ int _starpu_zhegs2_(integer *itype, char *uplo, integer *n, + doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, + integer *info); + +/* Subroutine */ int _starpu_zhegst_(integer *itype, char *uplo, integer *n, + doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, + integer *info); + +/* Subroutine */ int _starpu_zhegv_(integer *itype, char *jobz, char *uplo, integer * + n, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, + doublereal *w, doublecomplex *work, integer *lwork, doublereal *rwork, + integer *info); + +/* Subroutine */ int _starpu_zhegvd_(integer *itype, char *jobz, char *uplo, integer * + n, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, + doublereal *w, doublecomplex *work, integer *lwork, doublereal *rwork, + integer *lrwork, integer *iwork, integer *liwork, integer *info); + +/* Subroutine */ int _starpu_zhegvx_(integer *itype, char *jobz, char *range, char * + uplo, integer *n, doublecomplex *a, integer *lda, doublecomplex *b, + integer *ldb, doublereal *vl, doublereal *vu, integer *il, integer * + iu, doublereal *abstol, integer *m, doublereal *w, doublecomplex *z__, + integer *ldz, doublecomplex *work, integer *lwork, doublereal *rwork, + integer *iwork, integer *ifail, integer *info); + +/* Subroutine */ int _starpu_zherfs_(char *uplo, integer *n, integer *nrhs, + doublecomplex *a, integer *lda, doublecomplex *af, integer *ldaf, + integer *ipiv, doublecomplex *b, integer *ldb, doublecomplex *x, + integer *ldx, doublereal *ferr, doublereal *berr, doublecomplex *work, + doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_zherfsx_(char *uplo, char *equed, integer *n, integer * + nrhs, doublecomplex *a, integer *lda, doublecomplex *af, integer * + ldaf, integer *ipiv, doublereal *s, doublecomplex *b, integer *ldb, + doublecomplex *x, integer *ldx, doublereal *rcond, doublereal *berr, + integer *n_err_bnds__, doublereal *err_bnds_norm__, doublereal * + err_bnds_comp__, integer *nparams, doublereal *params, doublecomplex * + work, doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_zhesv_(char *uplo, integer *n, integer *nrhs, + doublecomplex *a, integer *lda, integer *ipiv, doublecomplex *b, + integer *ldb, doublecomplex *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_zhesvx_(char *fact, char *uplo, integer *n, integer * + nrhs, doublecomplex *a, integer *lda, doublecomplex *af, integer * + ldaf, integer *ipiv, doublecomplex *b, integer *ldb, doublecomplex *x, + integer *ldx, doublereal *rcond, doublereal *ferr, doublereal *berr, + doublecomplex *work, integer *lwork, doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_zhesvxx_(char *fact, char *uplo, integer *n, integer * + nrhs, doublecomplex *a, integer *lda, doublecomplex *af, integer * + ldaf, integer *ipiv, char *equed, doublereal *s, doublecomplex *b, + integer *ldb, doublecomplex *x, integer *ldx, doublereal *rcond, + doublereal *rpvgrw, doublereal *berr, integer *n_err_bnds__, + doublereal *err_bnds_norm__, doublereal *err_bnds_comp__, integer * + nparams, doublereal *params, doublecomplex *work, doublereal *rwork, + integer *info); + +/* Subroutine */ int _starpu_zhetd2_(char *uplo, integer *n, doublecomplex *a, + integer *lda, doublereal *d__, doublereal *e, doublecomplex *tau, + integer *info); + +/* Subroutine */ int _starpu_zhetf2_(char *uplo, integer *n, doublecomplex *a, + integer *lda, integer *ipiv, integer *info); + +/* Subroutine */ int _starpu_zhetrd_(char *uplo, integer *n, doublecomplex *a, + integer *lda, doublereal *d__, doublereal *e, doublecomplex *tau, + doublecomplex *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_zhetrf_(char *uplo, integer *n, doublecomplex *a, + integer *lda, integer *ipiv, doublecomplex *work, integer *lwork, + integer *info); + +/* Subroutine */ int _starpu_zhetri_(char *uplo, integer *n, doublecomplex *a, + integer *lda, integer *ipiv, doublecomplex *work, integer *info); + +/* Subroutine */ int _starpu_zhetrs_(char *uplo, integer *n, integer *nrhs, + doublecomplex *a, integer *lda, integer *ipiv, doublecomplex *b, + integer *ldb, integer *info); + +/* Subroutine */ int _starpu_zhfrk_(char *transr, char *uplo, char *trans, integer *n, + integer *k, doublereal *alpha, doublecomplex *a, integer *lda, + doublereal *beta, doublecomplex *c__); + +/* Subroutine */ int _starpu_zhgeqz_(char *job, char *compq, char *compz, integer *n, + integer *ilo, integer *ihi, doublecomplex *h__, integer *ldh, + doublecomplex *t, integer *ldt, doublecomplex *alpha, doublecomplex * + beta, doublecomplex *q, integer *ldq, doublecomplex *z__, integer * + ldz, doublecomplex *work, integer *lwork, doublereal *rwork, integer * + info); + +/* Subroutine */ int _starpu_zhpcon_(char *uplo, integer *n, doublecomplex *ap, + integer *ipiv, doublereal *anorm, doublereal *rcond, doublecomplex * + work, integer *info); + +/* Subroutine */ int _starpu_zhpev_(char *jobz, char *uplo, integer *n, doublecomplex + *ap, doublereal *w, doublecomplex *z__, integer *ldz, doublecomplex * + work, doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_zhpevd_(char *jobz, char *uplo, integer *n, + doublecomplex *ap, doublereal *w, doublecomplex *z__, integer *ldz, + doublecomplex *work, integer *lwork, doublereal *rwork, integer * + lrwork, integer *iwork, integer *liwork, integer *info); + +/* Subroutine */ int _starpu_zhpevx_(char *jobz, char *range, char *uplo, integer *n, + doublecomplex *ap, doublereal *vl, doublereal *vu, integer *il, + integer *iu, doublereal *abstol, integer *m, doublereal *w, + doublecomplex *z__, integer *ldz, doublecomplex *work, doublereal * + rwork, integer *iwork, integer *ifail, integer *info); + +/* Subroutine */ int _starpu_zhpgst_(integer *itype, char *uplo, integer *n, + doublecomplex *ap, doublecomplex *bp, integer *info); + +/* Subroutine */ int _starpu_zhpgv_(integer *itype, char *jobz, char *uplo, integer * + n, doublecomplex *ap, doublecomplex *bp, doublereal *w, doublecomplex + *z__, integer *ldz, doublecomplex *work, doublereal *rwork, integer * + info); + +/* Subroutine */ int _starpu_zhpgvd_(integer *itype, char *jobz, char *uplo, integer * + n, doublecomplex *ap, doublecomplex *bp, doublereal *w, doublecomplex + *z__, integer *ldz, doublecomplex *work, integer *lwork, doublereal * + rwork, integer *lrwork, integer *iwork, integer *liwork, integer * + info); + +/* Subroutine */ int _starpu_zhpgvx_(integer *itype, char *jobz, char *range, char * + uplo, integer *n, doublecomplex *ap, doublecomplex *bp, doublereal * + vl, doublereal *vu, integer *il, integer *iu, doublereal *abstol, + integer *m, doublereal *w, doublecomplex *z__, integer *ldz, + doublecomplex *work, doublereal *rwork, integer *iwork, integer * + ifail, integer *info); + +/* Subroutine */ int _starpu_zhprfs_(char *uplo, integer *n, integer *nrhs, + doublecomplex *ap, doublecomplex *afp, integer *ipiv, doublecomplex * + b, integer *ldb, doublecomplex *x, integer *ldx, doublereal *ferr, + doublereal *berr, doublecomplex *work, doublereal *rwork, integer * + info); + +/* Subroutine */ int _starpu_zhpsv_(char *uplo, integer *n, integer *nrhs, + doublecomplex *ap, integer *ipiv, doublecomplex *b, integer *ldb, + integer *info); + +/* Subroutine */ int _starpu_zhpsvx_(char *fact, char *uplo, integer *n, integer * + nrhs, doublecomplex *ap, doublecomplex *afp, integer *ipiv, + doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, + doublereal *rcond, doublereal *ferr, doublereal *berr, doublecomplex * + work, doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_zhptrd_(char *uplo, integer *n, doublecomplex *ap, + doublereal *d__, doublereal *e, doublecomplex *tau, integer *info); + +/* Subroutine */ int _starpu_zhptrf_(char *uplo, integer *n, doublecomplex *ap, + integer *ipiv, integer *info); + +/* Subroutine */ int _starpu_zhptri_(char *uplo, integer *n, doublecomplex *ap, + integer *ipiv, doublecomplex *work, integer *info); + +/* Subroutine */ int _starpu_zhptrs_(char *uplo, integer *n, integer *nrhs, + doublecomplex *ap, integer *ipiv, doublecomplex *b, integer *ldb, + integer *info); + +/* Subroutine */ int _starpu_zhsein_(char *side, char *eigsrc, char *initv, logical * + select, integer *n, doublecomplex *h__, integer *ldh, doublecomplex * + w, doublecomplex *vl, integer *ldvl, doublecomplex *vr, integer *ldvr, + integer *mm, integer *m, doublecomplex *work, doublereal *rwork, + integer *ifaill, integer *ifailr, integer *info); + +/* Subroutine */ int _starpu_zhseqr_(char *job, char *compz, integer *n, integer *ilo, + integer *ihi, doublecomplex *h__, integer *ldh, doublecomplex *w, + doublecomplex *z__, integer *ldz, doublecomplex *work, integer *lwork, + integer *info); + +/* Subroutine */ int _starpu_zla_gbamv__(integer *trans, integer *m, integer *n, + integer *kl, integer *ku, doublereal *alpha, doublecomplex *ab, + integer *ldab, doublecomplex *x, integer *incx, doublereal *beta, + doublereal *y, integer *incy); + +doublereal _starpu_zla_gbrcond_c__(char *trans, integer *n, integer *kl, integer *ku, + doublecomplex *ab, integer *ldab, doublecomplex *afb, integer *ldafb, + integer *ipiv, doublereal *c__, logical *capply, integer *info, + doublecomplex *work, doublereal *rwork, ftnlen trans_len); + +doublereal _starpu_zla_gbrcond_x__(char *trans, integer *n, integer *kl, integer *ku, + doublecomplex *ab, integer *ldab, doublecomplex *afb, integer *ldafb, + integer *ipiv, doublecomplex *x, integer *info, doublecomplex *work, + doublereal *rwork, ftnlen trans_len); + +/* Subroutine */ int _starpu_zla_gbrfsx_extended__(integer *prec_type__, integer * + trans_type__, integer *n, integer *kl, integer *ku, integer *nrhs, + doublecomplex *ab, integer *ldab, doublecomplex *afb, integer *ldafb, + integer *ipiv, logical *colequ, doublereal *c__, doublecomplex *b, + integer *ldb, doublecomplex *y, integer *ldy, doublereal *berr_out__, + integer *n_norms__, doublereal *errs_n__, doublereal *errs_c__, + doublecomplex *res, doublereal *ayb, doublecomplex *dy, doublecomplex + *y_tail__, doublereal *rcond, integer *ithresh, doublereal *rthresh, + doublereal *dz_ub__, logical *ignore_cwise__, integer *info); + +doublereal _starpu_zla_gbrpvgrw__(integer *n, integer *kl, integer *ku, integer * + ncols, doublecomplex *ab, integer *ldab, doublecomplex *afb, integer * + ldafb); + +/* Subroutine */ int _starpu_zla_geamv__(integer *trans, integer *m, integer *n, + doublereal *alpha, doublecomplex *a, integer *lda, doublecomplex *x, + integer *incx, doublereal *beta, doublereal *y, integer *incy); + +doublereal _starpu_zla_gercond_c__(char *trans, integer *n, doublecomplex *a, integer + *lda, doublecomplex *af, integer *ldaf, integer *ipiv, doublereal * + c__, logical *capply, integer *info, doublecomplex *work, doublereal * + rwork, ftnlen trans_len); + +doublereal _starpu_zla_gercond_x__(char *trans, integer *n, doublecomplex *a, integer + *lda, doublecomplex *af, integer *ldaf, integer *ipiv, doublecomplex * + x, integer *info, doublecomplex *work, doublereal *rwork, ftnlen + trans_len); + +/* Subroutine */ int _starpu_zla_gerfsx_extended__(integer *prec_type__, integer * + trans_type__, integer *n, integer *nrhs, doublecomplex *a, integer * + lda, doublecomplex *af, integer *ldaf, integer *ipiv, logical *colequ, + doublereal *c__, doublecomplex *b, integer *ldb, doublecomplex *y, + integer *ldy, doublereal *berr_out__, integer *n_norms__, doublereal * + errs_n__, doublereal *errs_c__, doublecomplex *res, doublereal *ayb, + doublecomplex *dy, doublecomplex *y_tail__, doublereal *rcond, + integer *ithresh, doublereal *rthresh, doublereal *dz_ub__, logical * + ignore_cwise__, integer *info); + +/* Subroutine */ int _starpu_zla_heamv__(integer *uplo, integer *n, doublereal *alpha, + doublecomplex *a, integer *lda, doublecomplex *x, integer *incx, + doublereal *beta, doublereal *y, integer *incy); + +doublereal _starpu_zla_hercond_c__(char *uplo, integer *n, doublecomplex *a, integer * + lda, doublecomplex *af, integer *ldaf, integer *ipiv, doublereal *c__, + logical *capply, integer *info, doublecomplex *work, doublereal * + rwork, ftnlen uplo_len); + +doublereal _starpu_zla_hercond_x__(char *uplo, integer *n, doublecomplex *a, integer * + lda, doublecomplex *af, integer *ldaf, integer *ipiv, doublecomplex * + x, integer *info, doublecomplex *work, doublereal *rwork, ftnlen + uplo_len); + +/* Subroutine */ int _starpu_zla_herfsx_extended__(integer *prec_type__, char *uplo, + integer *n, integer *nrhs, doublecomplex *a, integer *lda, + doublecomplex *af, integer *ldaf, integer *ipiv, logical *colequ, + doublereal *c__, doublecomplex *b, integer *ldb, doublecomplex *y, + integer *ldy, doublereal *berr_out__, integer *n_norms__, doublereal * + errs_n__, doublereal *errs_c__, doublecomplex *res, doublereal *ayb, + doublecomplex *dy, doublecomplex *y_tail__, doublereal *rcond, + integer *ithresh, doublereal *rthresh, doublereal *dz_ub__, logical * + ignore_cwise__, integer *info, ftnlen uplo_len); + +doublereal _starpu_zla_herpvgrw__(char *uplo, integer *n, integer *info, + doublecomplex *a, integer *lda, doublecomplex *af, integer *ldaf, + integer *ipiv, doublereal *work, ftnlen uplo_len); + +/* Subroutine */ int _starpu_zla_lin_berr__(integer *n, integer *nz, integer *nrhs, + doublecomplex *res, doublereal *ayb, doublereal *berr); + +doublereal _starpu_zla_porcond_c__(char *uplo, integer *n, doublecomplex *a, integer * + lda, doublecomplex *af, integer *ldaf, doublereal *c__, logical * + capply, integer *info, doublecomplex *work, doublereal *rwork, ftnlen + uplo_len); + +doublereal _starpu_zla_porcond_x__(char *uplo, integer *n, doublecomplex *a, integer * + lda, doublecomplex *af, integer *ldaf, doublecomplex *x, integer * + info, doublecomplex *work, doublereal *rwork, ftnlen uplo_len); + +/* Subroutine */ int _starpu_zla_porfsx_extended__(integer *prec_type__, char *uplo, + integer *n, integer *nrhs, doublecomplex *a, integer *lda, + doublecomplex *af, integer *ldaf, logical *colequ, doublereal *c__, + doublecomplex *b, integer *ldb, doublecomplex *y, integer *ldy, + doublereal *berr_out__, integer *n_norms__, doublereal *errs_n__, + doublereal *errs_c__, doublecomplex *res, doublereal *ayb, + doublecomplex *dy, doublecomplex *y_tail__, doublereal *rcond, + integer *ithresh, doublereal *rthresh, doublereal *dz_ub__, logical * + ignore_cwise__, integer *info, ftnlen uplo_len); + +doublereal _starpu_zla_porpvgrw__(char *uplo, integer *ncols, doublecomplex *a, + integer *lda, doublecomplex *af, integer *ldaf, doublereal *work, + ftnlen uplo_len); + +doublereal _starpu_zla_rpvgrw__(integer *n, integer *ncols, doublecomplex *a, integer + *lda, doublecomplex *af, integer *ldaf); + +/* Subroutine */ int _starpu_zla_syamv__(integer *uplo, integer *n, doublereal *alpha, + doublecomplex *a, integer *lda, doublecomplex *x, integer *incx, + doublereal *beta, doublereal *y, integer *incy); + +doublereal _starpu_zla_syrcond_c__(char *uplo, integer *n, doublecomplex *a, integer * + lda, doublecomplex *af, integer *ldaf, integer *ipiv, doublereal *c__, + logical *capply, integer *info, doublecomplex *work, doublereal * + rwork, ftnlen uplo_len); + +doublereal _starpu_zla_syrcond_x__(char *uplo, integer *n, doublecomplex *a, integer * + lda, doublecomplex *af, integer *ldaf, integer *ipiv, doublecomplex * + x, integer *info, doublecomplex *work, doublereal *rwork, ftnlen + uplo_len); + +/* Subroutine */ int _starpu_zla_syrfsx_extended__(integer *prec_type__, char *uplo, + integer *n, integer *nrhs, doublecomplex *a, integer *lda, + doublecomplex *af, integer *ldaf, integer *ipiv, logical *colequ, + doublereal *c__, doublecomplex *b, integer *ldb, doublecomplex *y, + integer *ldy, doublereal *berr_out__, integer *n_norms__, doublereal * + errs_n__, doublereal *errs_c__, doublecomplex *res, doublereal *ayb, + doublecomplex *dy, doublecomplex *y_tail__, doublereal *rcond, + integer *ithresh, doublereal *rthresh, doublereal *dz_ub__, logical * + ignore_cwise__, integer *info, ftnlen uplo_len); + +doublereal _starpu_zla_syrpvgrw__(char *uplo, integer *n, integer *info, + doublecomplex *a, integer *lda, doublecomplex *af, integer *ldaf, + integer *ipiv, doublereal *work, ftnlen uplo_len); + +/* Subroutine */ int _starpu_zla_wwaddw__(integer *n, doublecomplex *x, doublecomplex + *y, doublecomplex *w); + +/* Subroutine */ int _starpu_zlabrd_(integer *m, integer *n, integer *nb, + doublecomplex *a, integer *lda, doublereal *d__, doublereal *e, + doublecomplex *tauq, doublecomplex *taup, doublecomplex *x, integer * + ldx, doublecomplex *y, integer *ldy); + +/* Subroutine */ int _starpu_zlacgv_(integer *n, doublecomplex *x, integer *incx); + +/* Subroutine */ int _starpu_zlacn2_(integer *n, doublecomplex *v, doublecomplex *x, + doublereal *est, integer *kase, integer *isave); + +/* Subroutine */ int _starpu_zlacon_(integer *n, doublecomplex *v, doublecomplex *x, + doublereal *est, integer *kase); + +/* Subroutine */ int _starpu_zlacp2_(char *uplo, integer *m, integer *n, doublereal * + a, integer *lda, doublecomplex *b, integer *ldb); + +/* Subroutine */ int _starpu_zlacpy_(char *uplo, integer *m, integer *n, + doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb); + +/* Subroutine */ int _starpu_zlacrm_(integer *m, integer *n, doublecomplex *a, + integer *lda, doublereal *b, integer *ldb, doublecomplex *c__, + integer *ldc, doublereal *rwork); + +/* Subroutine */ int _starpu_zlacrt_(integer *n, doublecomplex *cx, integer *incx, + doublecomplex *cy, integer *incy, doublecomplex *c__, doublecomplex * + s); + +/* Double Complex */ VOID _starpu_zladiv_(doublecomplex * ret_val, doublecomplex *x, + doublecomplex *y); + +/* Subroutine */ int _starpu_zlaed0_(integer *qsiz, integer *n, doublereal *d__, + doublereal *e, doublecomplex *q, integer *ldq, doublecomplex *qstore, + integer *ldqs, doublereal *rwork, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_zlaed7_(integer *n, integer *cutpnt, integer *qsiz, + integer *tlvls, integer *curlvl, integer *curpbm, doublereal *d__, + doublecomplex *q, integer *ldq, doublereal *rho, integer *indxq, + doublereal *qstore, integer *qptr, integer *prmptr, integer *perm, + integer *givptr, integer *givcol, doublereal *givnum, doublecomplex * + work, doublereal *rwork, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_zlaed8_(integer *k, integer *n, integer *qsiz, + doublecomplex *q, integer *ldq, doublereal *d__, doublereal *rho, + integer *cutpnt, doublereal *z__, doublereal *dlamda, doublecomplex * + q2, integer *ldq2, doublereal *w, integer *indxp, integer *indx, + integer *indxq, integer *perm, integer *givptr, integer *givcol, + doublereal *givnum, integer *info); + +/* Subroutine */ int _starpu_zlaein_(logical *rightv, logical *noinit, integer *n, + doublecomplex *h__, integer *ldh, doublecomplex *w, doublecomplex *v, + doublecomplex *b, integer *ldb, doublereal *rwork, doublereal *eps3, + doublereal *smlnum, integer *info); + +/* Subroutine */ int _starpu_zlaesy_(doublecomplex *a, doublecomplex *b, + doublecomplex *c__, doublecomplex *rt1, doublecomplex *rt2, + doublecomplex *evscal, doublecomplex *cs1, doublecomplex *sn1); + +/* Subroutine */ int _starpu_zlaev2_(doublecomplex *a, doublecomplex *b, + doublecomplex *c__, doublereal *rt1, doublereal *rt2, doublereal *cs1, + doublecomplex *sn1); + +/* Subroutine */ int _starpu_zlag2c_(integer *m, integer *n, doublecomplex *a, + integer *lda, complex *sa, integer *ldsa, integer *info); + +/* Subroutine */ int _starpu_zlags2_(logical *upper, doublereal *a1, doublecomplex * + a2, doublereal *a3, doublereal *b1, doublecomplex *b2, doublereal *b3, + doublereal *csu, doublecomplex *snu, doublereal *csv, doublecomplex * + snv, doublereal *csq, doublecomplex *snq); + +/* Subroutine */ int _starpu_zlagtm_(char *trans, integer *n, integer *nrhs, + doublereal *alpha, doublecomplex *dl, doublecomplex *d__, + doublecomplex *du, doublecomplex *x, integer *ldx, doublereal *beta, + doublecomplex *b, integer *ldb); + +/* Subroutine */ int _starpu_zlahef_(char *uplo, integer *n, integer *nb, integer *kb, + doublecomplex *a, integer *lda, integer *ipiv, doublecomplex *w, + integer *ldw, integer *info); + +/* Subroutine */ int _starpu_zlahqr_(logical *wantt, logical *wantz, integer *n, + integer *ilo, integer *ihi, doublecomplex *h__, integer *ldh, + doublecomplex *w, integer *iloz, integer *ihiz, doublecomplex *z__, + integer *ldz, integer *info); + +/* Subroutine */ int _starpu_zlahr2_(integer *n, integer *k, integer *nb, + doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex *t, + integer *ldt, doublecomplex *y, integer *ldy); + +/* Subroutine */ int _starpu_zlahrd_(integer *n, integer *k, integer *nb, + doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex *t, + integer *ldt, doublecomplex *y, integer *ldy); + +/* Subroutine */ int _starpu_zlaic1_(integer *job, integer *j, doublecomplex *x, + doublereal *sest, doublecomplex *w, doublecomplex *gamma, doublereal * + sestpr, doublecomplex *s, doublecomplex *c__); + +/* Subroutine */ int _starpu_zlals0_(integer *icompq, integer *nl, integer *nr, + integer *sqre, integer *nrhs, doublecomplex *b, integer *ldb, + doublecomplex *bx, integer *ldbx, integer *perm, integer *givptr, + integer *givcol, integer *ldgcol, doublereal *givnum, integer *ldgnum, + doublereal *poles, doublereal *difl, doublereal *difr, doublereal * + z__, integer *k, doublereal *c__, doublereal *s, doublereal *rwork, + integer *info); + +/* Subroutine */ int _starpu_zlalsa_(integer *icompq, integer *smlsiz, integer *n, + integer *nrhs, doublecomplex *b, integer *ldb, doublecomplex *bx, + integer *ldbx, doublereal *u, integer *ldu, doublereal *vt, integer * + k, doublereal *difl, doublereal *difr, doublereal *z__, doublereal * + poles, integer *givptr, integer *givcol, integer *ldgcol, integer * + perm, doublereal *givnum, doublereal *c__, doublereal *s, doublereal * + rwork, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_zlalsd_(char *uplo, integer *smlsiz, integer *n, integer + *nrhs, doublereal *d__, doublereal *e, doublecomplex *b, integer *ldb, + doublereal *rcond, integer *rank, doublecomplex *work, doublereal * + rwork, integer *iwork, integer *info); + +doublereal _starpu_zlangb_(char *norm, integer *n, integer *kl, integer *ku, + doublecomplex *ab, integer *ldab, doublereal *work); + +doublereal _starpu_zlange_(char *norm, integer *m, integer *n, doublecomplex *a, + integer *lda, doublereal *work); + +doublereal _starpu_zlangt_(char *norm, integer *n, doublecomplex *dl, doublecomplex * + d__, doublecomplex *du); + +doublereal _starpu_zlanhb_(char *norm, char *uplo, integer *n, integer *k, + doublecomplex *ab, integer *ldab, doublereal *work); + +doublereal _starpu_zlanhe_(char *norm, char *uplo, integer *n, doublecomplex *a, + integer *lda, doublereal *work); + +doublereal _starpu_zlanhf_(char *norm, char *transr, char *uplo, integer *n, + doublecomplex *a, doublereal *work); + +doublereal _starpu_zlanhp_(char *norm, char *uplo, integer *n, doublecomplex *ap, + doublereal *work); + +doublereal _starpu_zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda, + doublereal *work); + +doublereal _starpu_zlanht_(char *norm, integer *n, doublereal *d__, doublecomplex *e); + +doublereal _starpu_zlansb_(char *norm, char *uplo, integer *n, integer *k, + doublecomplex *ab, integer *ldab, doublereal *work); + +doublereal _starpu_zlansp_(char *norm, char *uplo, integer *n, doublecomplex *ap, + doublereal *work); + +doublereal _starpu_zlansy_(char *norm, char *uplo, integer *n, doublecomplex *a, + integer *lda, doublereal *work); + +doublereal _starpu_zlantb_(char *norm, char *uplo, char *diag, integer *n, integer *k, + doublecomplex *ab, integer *ldab, doublereal *work); + +doublereal _starpu_zlantp_(char *norm, char *uplo, char *diag, integer *n, + doublecomplex *ap, doublereal *work); + +doublereal _starpu_zlantr_(char *norm, char *uplo, char *diag, integer *m, integer *n, + doublecomplex *a, integer *lda, doublereal *work); + +/* Subroutine */ int _starpu_zlapll_(integer *n, doublecomplex *x, integer *incx, + doublecomplex *y, integer *incy, doublereal *ssmin); + +/* Subroutine */ int _starpu_zlapmt_(logical *forwrd, integer *m, integer *n, + doublecomplex *x, integer *ldx, integer *k); + +/* Subroutine */ int _starpu_zlaqgb_(integer *m, integer *n, integer *kl, integer *ku, + doublecomplex *ab, integer *ldab, doublereal *r__, doublereal *c__, + doublereal *rowcnd, doublereal *colcnd, doublereal *amax, char *equed); + +/* Subroutine */ int _starpu_zlaqge_(integer *m, integer *n, doublecomplex *a, + integer *lda, doublereal *r__, doublereal *c__, doublereal *rowcnd, + doublereal *colcnd, doublereal *amax, char *equed); + +/* Subroutine */ int _starpu_zlaqhb_(char *uplo, integer *n, integer *kd, + doublecomplex *ab, integer *ldab, doublereal *s, doublereal *scond, + doublereal *amax, char *equed); + +/* Subroutine */ int _starpu_zlaqhe_(char *uplo, integer *n, doublecomplex *a, + integer *lda, doublereal *s, doublereal *scond, doublereal *amax, + char *equed); + +/* Subroutine */ int _starpu_zlaqhp_(char *uplo, integer *n, doublecomplex *ap, + doublereal *s, doublereal *scond, doublereal *amax, char *equed); + +/* Subroutine */ int _starpu_zlaqp2_(integer *m, integer *n, integer *offset, + doublecomplex *a, integer *lda, integer *jpvt, doublecomplex *tau, + doublereal *vn1, doublereal *vn2, doublecomplex *work); + +/* Subroutine */ int _starpu_zlaqps_(integer *m, integer *n, integer *offset, integer + *nb, integer *kb, doublecomplex *a, integer *lda, integer *jpvt, + doublecomplex *tau, doublereal *vn1, doublereal *vn2, doublecomplex * + auxv, doublecomplex *f, integer *ldf); + +/* Subroutine */ int _starpu_zlaqr0_(logical *wantt, logical *wantz, integer *n, + integer *ilo, integer *ihi, doublecomplex *h__, integer *ldh, + doublecomplex *w, integer *iloz, integer *ihiz, doublecomplex *z__, + integer *ldz, doublecomplex *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_zlaqr1_(integer *n, doublecomplex *h__, integer *ldh, + doublecomplex *s1, doublecomplex *s2, doublecomplex *v); + +/* Subroutine */ int _starpu_zlaqr2_(logical *wantt, logical *wantz, integer *n, + integer *ktop, integer *kbot, integer *nw, doublecomplex *h__, + integer *ldh, integer *iloz, integer *ihiz, doublecomplex *z__, + integer *ldz, integer *ns, integer *nd, doublecomplex *sh, + doublecomplex *v, integer *ldv, integer *nh, doublecomplex *t, + integer *ldt, integer *nv, doublecomplex *wv, integer *ldwv, + doublecomplex *work, integer *lwork); + +/* Subroutine */ int _starpu_zlaqr3_(logical *wantt, logical *wantz, integer *n, + integer *ktop, integer *kbot, integer *nw, doublecomplex *h__, + integer *ldh, integer *iloz, integer *ihiz, doublecomplex *z__, + integer *ldz, integer *ns, integer *nd, doublecomplex *sh, + doublecomplex *v, integer *ldv, integer *nh, doublecomplex *t, + integer *ldt, integer *nv, doublecomplex *wv, integer *ldwv, + doublecomplex *work, integer *lwork); + +/* Subroutine */ int _starpu_zlaqr4_(logical *wantt, logical *wantz, integer *n, + integer *ilo, integer *ihi, doublecomplex *h__, integer *ldh, + doublecomplex *w, integer *iloz, integer *ihiz, doublecomplex *z__, + integer *ldz, doublecomplex *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_zlaqr5_(logical *wantt, logical *wantz, integer *kacc22, + integer *n, integer *ktop, integer *kbot, integer *nshfts, + doublecomplex *s, doublecomplex *h__, integer *ldh, integer *iloz, + integer *ihiz, doublecomplex *z__, integer *ldz, doublecomplex *v, + integer *ldv, doublecomplex *u, integer *ldu, integer *nv, + doublecomplex *wv, integer *ldwv, integer *nh, doublecomplex *wh, + integer *ldwh); + +/* Subroutine */ int _starpu_zlaqsb_(char *uplo, integer *n, integer *kd, + doublecomplex *ab, integer *ldab, doublereal *s, doublereal *scond, + doublereal *amax, char *equed); + +/* Subroutine */ int _starpu_zlaqsp_(char *uplo, integer *n, doublecomplex *ap, + doublereal *s, doublereal *scond, doublereal *amax, char *equed); + +/* Subroutine */ int _starpu_zlaqsy_(char *uplo, integer *n, doublecomplex *a, + integer *lda, doublereal *s, doublereal *scond, doublereal *amax, + char *equed); + +/* Subroutine */ int _starpu_zlar1v_(integer *n, integer *b1, integer *bn, doublereal + *lambda, doublereal *d__, doublereal *l, doublereal *ld, doublereal * + lld, doublereal *pivmin, doublereal *gaptol, doublecomplex *z__, + logical *wantnc, integer *negcnt, doublereal *ztz, doublereal *mingma, + integer *r__, integer *isuppz, doublereal *nrminv, doublereal *resid, + doublereal *rqcorr, doublereal *work); + +/* Subroutine */ int _starpu_zlar2v_(integer *n, doublecomplex *x, doublecomplex *y, + doublecomplex *z__, integer *incx, doublereal *c__, doublecomplex *s, + integer *incc); + +/* Subroutine */ int _starpu_zlarcm_(integer *m, integer *n, doublereal *a, integer * + lda, doublecomplex *b, integer *ldb, doublecomplex *c__, integer *ldc, + doublereal *rwork); + +/* Subroutine */ int _starpu_zlarf_(char *side, integer *m, integer *n, doublecomplex + *v, integer *incv, doublecomplex *tau, doublecomplex *c__, integer * + ldc, doublecomplex *work); + +/* Subroutine */ int _starpu_zlarfb_(char *side, char *trans, char *direct, char * + storev, integer *m, integer *n, integer *k, doublecomplex *v, integer + *ldv, doublecomplex *t, integer *ldt, doublecomplex *c__, integer * + ldc, doublecomplex *work, integer *ldwork); + +/* Subroutine */ int _starpu_zlarfg_(integer *n, doublecomplex *alpha, doublecomplex * + x, integer *incx, doublecomplex *tau); + +/* Subroutine */ int _starpu_zlarfp_(integer *n, doublecomplex *alpha, doublecomplex * + x, integer *incx, doublecomplex *tau); + +/* Subroutine */ int _starpu_zlarft_(char *direct, char *storev, integer *n, integer * + k, doublecomplex *v, integer *ldv, doublecomplex *tau, doublecomplex * + t, integer *ldt); + +/* Subroutine */ int _starpu_zlarfx_(char *side, integer *m, integer *n, + doublecomplex *v, doublecomplex *tau, doublecomplex *c__, integer * + ldc, doublecomplex *work); + +/* Subroutine */ int _starpu_zlargv_(integer *n, doublecomplex *x, integer *incx, + doublecomplex *y, integer *incy, doublereal *c__, integer *incc); + +/* Subroutine */ int _starpu_zlarnv_(integer *idist, integer *iseed, integer *n, + doublecomplex *x); + +/* Subroutine */ int _starpu_zlarrv_(integer *n, doublereal *vl, doublereal *vu, + doublereal *d__, doublereal *l, doublereal *pivmin, integer *isplit, + integer *m, integer *dol, integer *dou, doublereal *minrgp, + doublereal *rtol1, doublereal *rtol2, doublereal *w, doublereal *werr, + doublereal *wgap, integer *iblock, integer *indexw, doublereal *gers, + doublecomplex *z__, integer *ldz, integer *isuppz, doublereal *work, + integer *iwork, integer *info); + +/* Subroutine */ int _starpu_zlarscl2_(integer *m, integer *n, doublereal *d__, + doublecomplex *x, integer *ldx); + +/* Subroutine */ int _starpu_zlartg_(doublecomplex *f, doublecomplex *g, doublereal * + cs, doublecomplex *sn, doublecomplex *r__); + +/* Subroutine */ int _starpu_zlartv_(integer *n, doublecomplex *x, integer *incx, + doublecomplex *y, integer *incy, doublereal *c__, doublecomplex *s, + integer *incc); + +/* Subroutine */ int _starpu_zlarz_(char *side, integer *m, integer *n, integer *l, + doublecomplex *v, integer *incv, doublecomplex *tau, doublecomplex * + c__, integer *ldc, doublecomplex *work); + +/* Subroutine */ int _starpu_zlarzb_(char *side, char *trans, char *direct, char * + storev, integer *m, integer *n, integer *k, integer *l, doublecomplex + *v, integer *ldv, doublecomplex *t, integer *ldt, doublecomplex *c__, + integer *ldc, doublecomplex *work, integer *ldwork); + +/* Subroutine */ int _starpu_zlarzt_(char *direct, char *storev, integer *n, integer * + k, doublecomplex *v, integer *ldv, doublecomplex *tau, doublecomplex * + t, integer *ldt); + +/* Subroutine */ int _starpu_zlascl_(char *type__, integer *kl, integer *ku, + doublereal *cfrom, doublereal *cto, integer *m, integer *n, + doublecomplex *a, integer *lda, integer *info); + +/* Subroutine */ int _starpu_zlascl2_(integer *m, integer *n, doublereal *d__, + doublecomplex *x, integer *ldx); + +/* Subroutine */ int _starpu_zlaset_(char *uplo, integer *m, integer *n, + doublecomplex *alpha, doublecomplex *beta, doublecomplex *a, integer * + lda); + +/* Subroutine */ int _starpu_zlasr_(char *side, char *pivot, char *direct, integer *m, + integer *n, doublereal *c__, doublereal *s, doublecomplex *a, + integer *lda); + +/* Subroutine */ int _starpu_zlassq_(integer *n, doublecomplex *x, integer *incx, + doublereal *scale, doublereal *sumsq); + +/* Subroutine */ int _starpu_zlaswp_(integer *n, doublecomplex *a, integer *lda, + integer *k1, integer *k2, integer *ipiv, integer *incx); + +/* Subroutine */ int _starpu_zlasyf_(char *uplo, integer *n, integer *nb, integer *kb, + doublecomplex *a, integer *lda, integer *ipiv, doublecomplex *w, + integer *ldw, integer *info); + +/* Subroutine */ int _starpu_zlat2c_(char *uplo, integer *n, doublecomplex *a, + integer *lda, complex *sa, integer *ldsa, integer *info); + +/* Subroutine */ int _starpu_zlatbs_(char *uplo, char *trans, char *diag, char * + normin, integer *n, integer *kd, doublecomplex *ab, integer *ldab, + doublecomplex *x, doublereal *scale, doublereal *cnorm, integer *info); + +/* Subroutine */ int _starpu_zlatdf_(integer *ijob, integer *n, doublecomplex *z__, + integer *ldz, doublecomplex *rhs, doublereal *rdsum, doublereal * + rdscal, integer *ipiv, integer *jpiv); + +/* Subroutine */ int _starpu_zlatps_(char *uplo, char *trans, char *diag, char * + normin, integer *n, doublecomplex *ap, doublecomplex *x, doublereal * + scale, doublereal *cnorm, integer *info); + +/* Subroutine */ int _starpu_zlatrd_(char *uplo, integer *n, integer *nb, + doublecomplex *a, integer *lda, doublereal *e, doublecomplex *tau, + doublecomplex *w, integer *ldw); + +/* Subroutine */ int _starpu_zlatrs_(char *uplo, char *trans, char *diag, char * + normin, integer *n, doublecomplex *a, integer *lda, doublecomplex *x, + doublereal *scale, doublereal *cnorm, integer *info); + +/* Subroutine */ int _starpu_zlatrz_(integer *m, integer *n, integer *l, + doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex * + work); + +/* Subroutine */ int _starpu_zlatzm_(char *side, integer *m, integer *n, + doublecomplex *v, integer *incv, doublecomplex *tau, doublecomplex * + c1, doublecomplex *c2, integer *ldc, doublecomplex *work); + +/* Subroutine */ int _starpu_zlauu2_(char *uplo, integer *n, doublecomplex *a, + integer *lda, integer *info); + +/* Subroutine */ int _starpu_zlauum_(char *uplo, integer *n, doublecomplex *a, + integer *lda, integer *info); + +/* Subroutine */ int _starpu_zpbcon_(char *uplo, integer *n, integer *kd, + doublecomplex *ab, integer *ldab, doublereal *anorm, doublereal * + rcond, doublecomplex *work, doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_zpbequ_(char *uplo, integer *n, integer *kd, + doublecomplex *ab, integer *ldab, doublereal *s, doublereal *scond, + doublereal *amax, integer *info); + +/* Subroutine */ int _starpu_zpbrfs_(char *uplo, integer *n, integer *kd, integer * + nrhs, doublecomplex *ab, integer *ldab, doublecomplex *afb, integer * + ldafb, doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, + doublereal *ferr, doublereal *berr, doublecomplex *work, doublereal * + rwork, integer *info); + +/* Subroutine */ int _starpu_zpbstf_(char *uplo, integer *n, integer *kd, + doublecomplex *ab, integer *ldab, integer *info); + +/* Subroutine */ int _starpu_zpbsv_(char *uplo, integer *n, integer *kd, integer * + nrhs, doublecomplex *ab, integer *ldab, doublecomplex *b, integer * + ldb, integer *info); + +/* Subroutine */ int _starpu_zpbsvx_(char *fact, char *uplo, integer *n, integer *kd, + integer *nrhs, doublecomplex *ab, integer *ldab, doublecomplex *afb, + integer *ldafb, char *equed, doublereal *s, doublecomplex *b, integer + *ldb, doublecomplex *x, integer *ldx, doublereal *rcond, doublereal * + ferr, doublereal *berr, doublecomplex *work, doublereal *rwork, + integer *info); + +/* Subroutine */ int _starpu_zpbtf2_(char *uplo, integer *n, integer *kd, + doublecomplex *ab, integer *ldab, integer *info); + +/* Subroutine */ int _starpu_zpbtrf_(char *uplo, integer *n, integer *kd, + doublecomplex *ab, integer *ldab, integer *info); + +/* Subroutine */ int _starpu_zpbtrs_(char *uplo, integer *n, integer *kd, integer * + nrhs, doublecomplex *ab, integer *ldab, doublecomplex *b, integer * + ldb, integer *info); + +/* Subroutine */ int _starpu_zpftrf_(char *transr, char *uplo, integer *n, + doublecomplex *a, integer *info); + +/* Subroutine */ int _starpu_zpftri_(char *transr, char *uplo, integer *n, + doublecomplex *a, integer *info); + +/* Subroutine */ int _starpu_zpftrs_(char *transr, char *uplo, integer *n, integer * + nrhs, doublecomplex *a, doublecomplex *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_zpocon_(char *uplo, integer *n, doublecomplex *a, + integer *lda, doublereal *anorm, doublereal *rcond, doublecomplex * + work, doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_zpoequ_(integer *n, doublecomplex *a, integer *lda, + doublereal *s, doublereal *scond, doublereal *amax, integer *info); + +/* Subroutine */ int _starpu_zpoequb_(integer *n, doublecomplex *a, integer *lda, + doublereal *s, doublereal *scond, doublereal *amax, integer *info); + +/* Subroutine */ int _starpu_zporfs_(char *uplo, integer *n, integer *nrhs, + doublecomplex *a, integer *lda, doublecomplex *af, integer *ldaf, + doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, + doublereal *ferr, doublereal *berr, doublecomplex *work, doublereal * + rwork, integer *info); + +/* Subroutine */ int _starpu_zporfsx_(char *uplo, char *equed, integer *n, integer * + nrhs, doublecomplex *a, integer *lda, doublecomplex *af, integer * + ldaf, doublereal *s, doublecomplex *b, integer *ldb, doublecomplex *x, + integer *ldx, doublereal *rcond, doublereal *berr, integer * + n_err_bnds__, doublereal *err_bnds_norm__, doublereal * + err_bnds_comp__, integer *nparams, doublereal *params, doublecomplex * + work, doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_zposv_(char *uplo, integer *n, integer *nrhs, + doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, + integer *info); + +/* Subroutine */ int _starpu_zposvx_(char *fact, char *uplo, integer *n, integer * + nrhs, doublecomplex *a, integer *lda, doublecomplex *af, integer * + ldaf, char *equed, doublereal *s, doublecomplex *b, integer *ldb, + doublecomplex *x, integer *ldx, doublereal *rcond, doublereal *ferr, + doublereal *berr, doublecomplex *work, doublereal *rwork, integer * + info); + +/* Subroutine */ int _starpu_zposvxx_(char *fact, char *uplo, integer *n, integer * + nrhs, doublecomplex *a, integer *lda, doublecomplex *af, integer * + ldaf, char *equed, doublereal *s, doublecomplex *b, integer *ldb, + doublecomplex *x, integer *ldx, doublereal *rcond, doublereal *rpvgrw, + doublereal *berr, integer *n_err_bnds__, doublereal *err_bnds_norm__, + doublereal *err_bnds_comp__, integer *nparams, doublereal *params, + doublecomplex *work, doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_zpotf2_(char *uplo, integer *n, doublecomplex *a, + integer *lda, integer *info); + +/* Subroutine */ int _starpu_zpotrf_(char *uplo, integer *n, doublecomplex *a, + integer *lda, integer *info); + +/* Subroutine */ int _starpu_zpotri_(char *uplo, integer *n, doublecomplex *a, + integer *lda, integer *info); + +/* Subroutine */ int _starpu_zpotrs_(char *uplo, integer *n, integer *nrhs, + doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, + integer *info); + +/* Subroutine */ int _starpu_zppcon_(char *uplo, integer *n, doublecomplex *ap, + doublereal *anorm, doublereal *rcond, doublecomplex *work, doublereal + *rwork, integer *info); + +/* Subroutine */ int _starpu_zppequ_(char *uplo, integer *n, doublecomplex *ap, + doublereal *s, doublereal *scond, doublereal *amax, integer *info); + +/* Subroutine */ int _starpu_zpprfs_(char *uplo, integer *n, integer *nrhs, + doublecomplex *ap, doublecomplex *afp, doublecomplex *b, integer *ldb, + doublecomplex *x, integer *ldx, doublereal *ferr, doublereal *berr, + doublecomplex *work, doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_zppsv_(char *uplo, integer *n, integer *nrhs, + doublecomplex *ap, doublecomplex *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_zppsvx_(char *fact, char *uplo, integer *n, integer * + nrhs, doublecomplex *ap, doublecomplex *afp, char *equed, doublereal * + s, doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, + doublereal *rcond, doublereal *ferr, doublereal *berr, doublecomplex * + work, doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_zpptrf_(char *uplo, integer *n, doublecomplex *ap, + integer *info); + +/* Subroutine */ int _starpu_zpptri_(char *uplo, integer *n, doublecomplex *ap, + integer *info); + +/* Subroutine */ int _starpu_zpptrs_(char *uplo, integer *n, integer *nrhs, + doublecomplex *ap, doublecomplex *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_zpstf2_(char *uplo, integer *n, doublecomplex *a, + integer *lda, integer *piv, integer *rank, doublereal *tol, + doublereal *work, integer *info); + +/* Subroutine */ int _starpu_zpstrf_(char *uplo, integer *n, doublecomplex *a, + integer *lda, integer *piv, integer *rank, doublereal *tol, + doublereal *work, integer *info); + +/* Subroutine */ int _starpu_zptcon_(integer *n, doublereal *d__, doublecomplex *e, + doublereal *anorm, doublereal *rcond, doublereal *rwork, integer * + info); + +/* Subroutine */ int _starpu_zpteqr_(char *compz, integer *n, doublereal *d__, + doublereal *e, doublecomplex *z__, integer *ldz, doublereal *work, + integer *info); + +/* Subroutine */ int _starpu_zptrfs_(char *uplo, integer *n, integer *nrhs, + doublereal *d__, doublecomplex *e, doublereal *df, doublecomplex *ef, + doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, + doublereal *ferr, doublereal *berr, doublecomplex *work, doublereal * + rwork, integer *info); + +/* Subroutine */ int _starpu_zptsv_(integer *n, integer *nrhs, doublereal *d__, + doublecomplex *e, doublecomplex *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_zptsvx_(char *fact, integer *n, integer *nrhs, + doublereal *d__, doublecomplex *e, doublereal *df, doublecomplex *ef, + doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, + doublereal *rcond, doublereal *ferr, doublereal *berr, doublecomplex * + work, doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_zpttrf_(integer *n, doublereal *d__, doublecomplex *e, + integer *info); + +/* Subroutine */ int _starpu_zpttrs_(char *uplo, integer *n, integer *nrhs, + doublereal *d__, doublecomplex *e, doublecomplex *b, integer *ldb, + integer *info); + +/* Subroutine */ int _starpu_zptts2_(integer *iuplo, integer *n, integer *nrhs, + doublereal *d__, doublecomplex *e, doublecomplex *b, integer *ldb); + +/* Subroutine */ int _starpu_zrot_(integer *n, doublecomplex *cx, integer *incx, + doublecomplex *cy, integer *incy, doublereal *c__, doublecomplex *s); + +/* Subroutine */ int _starpu_zspcon_(char *uplo, integer *n, doublecomplex *ap, + integer *ipiv, doublereal *anorm, doublereal *rcond, doublecomplex * + work, integer *info); + +/* Subroutine */ int _starpu_zspmv_(char *uplo, integer *n, doublecomplex *alpha, + doublecomplex *ap, doublecomplex *x, integer *incx, doublecomplex * + beta, doublecomplex *y, integer *incy); + +/* Subroutine */ int _starpu_zspr_(char *uplo, integer *n, doublecomplex *alpha, + doublecomplex *x, integer *incx, doublecomplex *ap); + +/* Subroutine */ int _starpu_zsprfs_(char *uplo, integer *n, integer *nrhs, + doublecomplex *ap, doublecomplex *afp, integer *ipiv, doublecomplex * + b, integer *ldb, doublecomplex *x, integer *ldx, doublereal *ferr, + doublereal *berr, doublecomplex *work, doublereal *rwork, integer * + info); + +/* Subroutine */ int _starpu_zspsv_(char *uplo, integer *n, integer *nrhs, + doublecomplex *ap, integer *ipiv, doublecomplex *b, integer *ldb, + integer *info); + +/* Subroutine */ int _starpu_zspsvx_(char *fact, char *uplo, integer *n, integer * + nrhs, doublecomplex *ap, doublecomplex *afp, integer *ipiv, + doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, + doublereal *rcond, doublereal *ferr, doublereal *berr, doublecomplex * + work, doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_zsptrf_(char *uplo, integer *n, doublecomplex *ap, + integer *ipiv, integer *info); + +/* Subroutine */ int _starpu_zsptri_(char *uplo, integer *n, doublecomplex *ap, + integer *ipiv, doublecomplex *work, integer *info); + +/* Subroutine */ int _starpu_zsptrs_(char *uplo, integer *n, integer *nrhs, + doublecomplex *ap, integer *ipiv, doublecomplex *b, integer *ldb, + integer *info); + +/* Subroutine */ int _starpu_zstedc_(char *compz, integer *n, doublereal *d__, + doublereal *e, doublecomplex *z__, integer *ldz, doublecomplex *work, + integer *lwork, doublereal *rwork, integer *lrwork, integer *iwork, + integer *liwork, integer *info); + +/* Subroutine */ int _starpu_zstegr_(char *jobz, char *range, integer *n, doublereal * + d__, doublereal *e, doublereal *vl, doublereal *vu, integer *il, + integer *iu, doublereal *abstol, integer *m, doublereal *w, + doublecomplex *z__, integer *ldz, integer *isuppz, doublereal *work, + integer *lwork, integer *iwork, integer *liwork, integer *info); + +/* Subroutine */ int _starpu_zstein_(integer *n, doublereal *d__, doublereal *e, + integer *m, doublereal *w, integer *iblock, integer *isplit, + doublecomplex *z__, integer *ldz, doublereal *work, integer *iwork, + integer *ifail, integer *info); + +/* Subroutine */ int _starpu_zstemr_(char *jobz, char *range, integer *n, doublereal * + d__, doublereal *e, doublereal *vl, doublereal *vu, integer *il, + integer *iu, integer *m, doublereal *w, doublecomplex *z__, integer * + ldz, integer *nzc, integer *isuppz, logical *tryrac, doublereal *work, + integer *lwork, integer *iwork, integer *liwork, integer *info); + +/* Subroutine */ int _starpu_zsteqr_(char *compz, integer *n, doublereal *d__, + doublereal *e, doublecomplex *z__, integer *ldz, doublereal *work, + integer *info); + +/* Subroutine */ int _starpu_zsycon_(char *uplo, integer *n, doublecomplex *a, + integer *lda, integer *ipiv, doublereal *anorm, doublereal *rcond, + doublecomplex *work, integer *info); + +/* Subroutine */ int _starpu_zsyequb_(char *uplo, integer *n, doublecomplex *a, + integer *lda, doublereal *s, doublereal *scond, doublereal *amax, + doublecomplex *work, integer *info); + +/* Subroutine */ int _starpu_zsymv_(char *uplo, integer *n, doublecomplex *alpha, + doublecomplex *a, integer *lda, doublecomplex *x, integer *incx, + doublecomplex *beta, doublecomplex *y, integer *incy); + +/* Subroutine */ int _starpu_zsyr_(char *uplo, integer *n, doublecomplex *alpha, + doublecomplex *x, integer *incx, doublecomplex *a, integer *lda); + +/* Subroutine */ int _starpu_zsyrfs_(char *uplo, integer *n, integer *nrhs, + doublecomplex *a, integer *lda, doublecomplex *af, integer *ldaf, + integer *ipiv, doublecomplex *b, integer *ldb, doublecomplex *x, + integer *ldx, doublereal *ferr, doublereal *berr, doublecomplex *work, + doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_zsyrfsx_(char *uplo, char *equed, integer *n, integer * + nrhs, doublecomplex *a, integer *lda, doublecomplex *af, integer * + ldaf, integer *ipiv, doublereal *s, doublecomplex *b, integer *ldb, + doublecomplex *x, integer *ldx, doublereal *rcond, doublereal *berr, + integer *n_err_bnds__, doublereal *err_bnds_norm__, doublereal * + err_bnds_comp__, integer *nparams, doublereal *params, doublecomplex * + work, doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_zsysv_(char *uplo, integer *n, integer *nrhs, + doublecomplex *a, integer *lda, integer *ipiv, doublecomplex *b, + integer *ldb, doublecomplex *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_zsysvx_(char *fact, char *uplo, integer *n, integer * + nrhs, doublecomplex *a, integer *lda, doublecomplex *af, integer * + ldaf, integer *ipiv, doublecomplex *b, integer *ldb, doublecomplex *x, + integer *ldx, doublereal *rcond, doublereal *ferr, doublereal *berr, + doublecomplex *work, integer *lwork, doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_zsysvxx_(char *fact, char *uplo, integer *n, integer * + nrhs, doublecomplex *a, integer *lda, doublecomplex *af, integer * + ldaf, integer *ipiv, char *equed, doublereal *s, doublecomplex *b, + integer *ldb, doublecomplex *x, integer *ldx, doublereal *rcond, + doublereal *rpvgrw, doublereal *berr, integer *n_err_bnds__, + doublereal *err_bnds_norm__, doublereal *err_bnds_comp__, integer * + nparams, doublereal *params, doublecomplex *work, doublereal *rwork, + integer *info); + +/* Subroutine */ int _starpu_zsytf2_(char *uplo, integer *n, doublecomplex *a, + integer *lda, integer *ipiv, integer *info); + +/* Subroutine */ int _starpu_zsytrf_(char *uplo, integer *n, doublecomplex *a, + integer *lda, integer *ipiv, doublecomplex *work, integer *lwork, + integer *info); + +/* Subroutine */ int _starpu_zsytri_(char *uplo, integer *n, doublecomplex *a, + integer *lda, integer *ipiv, doublecomplex *work, integer *info); + +/* Subroutine */ int _starpu_zsytrs_(char *uplo, integer *n, integer *nrhs, + doublecomplex *a, integer *lda, integer *ipiv, doublecomplex *b, + integer *ldb, integer *info); + +/* Subroutine */ int _starpu_ztbcon_(char *norm, char *uplo, char *diag, integer *n, + integer *kd, doublecomplex *ab, integer *ldab, doublereal *rcond, + doublecomplex *work, doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_ztbrfs_(char *uplo, char *trans, char *diag, integer *n, + integer *kd, integer *nrhs, doublecomplex *ab, integer *ldab, + doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, + doublereal *ferr, doublereal *berr, doublecomplex *work, doublereal * + rwork, integer *info); + +/* Subroutine */ int _starpu_ztbtrs_(char *uplo, char *trans, char *diag, integer *n, + integer *kd, integer *nrhs, doublecomplex *ab, integer *ldab, + doublecomplex *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_ztfsm_(char *transr, char *side, char *uplo, char *trans, + char *diag, integer *m, integer *n, doublecomplex *alpha, + doublecomplex *a, doublecomplex *b, integer *ldb); + +/* Subroutine */ int _starpu_ztftri_(char *transr, char *uplo, char *diag, integer *n, + doublecomplex *a, integer *info); + +/* Subroutine */ int _starpu_ztfttp_(char *transr, char *uplo, integer *n, + doublecomplex *arf, doublecomplex *ap, integer *info); + +/* Subroutine */ int _starpu_ztfttr_(char *transr, char *uplo, integer *n, + doublecomplex *arf, doublecomplex *a, integer *lda, integer *info); + +/* Subroutine */ int _starpu_ztgevc_(char *side, char *howmny, logical *select, + integer *n, doublecomplex *s, integer *lds, doublecomplex *p, integer + *ldp, doublecomplex *vl, integer *ldvl, doublecomplex *vr, integer * + ldvr, integer *mm, integer *m, doublecomplex *work, doublereal *rwork, + integer *info); + +/* Subroutine */ int _starpu_ztgex2_(logical *wantq, logical *wantz, integer *n, + doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, + doublecomplex *q, integer *ldq, doublecomplex *z__, integer *ldz, + integer *j1, integer *info); + +/* Subroutine */ int _starpu_ztgexc_(logical *wantq, logical *wantz, integer *n, + doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, + doublecomplex *q, integer *ldq, doublecomplex *z__, integer *ldz, + integer *ifst, integer *ilst, integer *info); + +/* Subroutine */ int _starpu_ztgsen_(integer *ijob, logical *wantq, logical *wantz, + logical *select, integer *n, doublecomplex *a, integer *lda, + doublecomplex *b, integer *ldb, doublecomplex *alpha, doublecomplex * + beta, doublecomplex *q, integer *ldq, doublecomplex *z__, integer * + ldz, integer *m, doublereal *pl, doublereal *pr, doublereal *dif, + doublecomplex *work, integer *lwork, integer *iwork, integer *liwork, + integer *info); + +/* Subroutine */ int _starpu_ztgsja_(char *jobu, char *jobv, char *jobq, integer *m, + integer *p, integer *n, integer *k, integer *l, doublecomplex *a, + integer *lda, doublecomplex *b, integer *ldb, doublereal *tola, + doublereal *tolb, doublereal *alpha, doublereal *beta, doublecomplex * + u, integer *ldu, doublecomplex *v, integer *ldv, doublecomplex *q, + integer *ldq, doublecomplex *work, integer *ncycle, integer *info); + +/* Subroutine */ int _starpu_ztgsna_(char *job, char *howmny, logical *select, + integer *n, doublecomplex *a, integer *lda, doublecomplex *b, integer + *ldb, doublecomplex *vl, integer *ldvl, doublecomplex *vr, integer * + ldvr, doublereal *s, doublereal *dif, integer *mm, integer *m, + doublecomplex *work, integer *lwork, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_ztgsy2_(char *trans, integer *ijob, integer *m, integer * + n, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, + doublecomplex *c__, integer *ldc, doublecomplex *d__, integer *ldd, + doublecomplex *e, integer *lde, doublecomplex *f, integer *ldf, + doublereal *scale, doublereal *rdsum, doublereal *rdscal, integer * + info); + +/* Subroutine */ int _starpu_ztgsyl_(char *trans, integer *ijob, integer *m, integer * + n, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, + doublecomplex *c__, integer *ldc, doublecomplex *d__, integer *ldd, + doublecomplex *e, integer *lde, doublecomplex *f, integer *ldf, + doublereal *scale, doublereal *dif, doublecomplex *work, integer * + lwork, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_ztpcon_(char *norm, char *uplo, char *diag, integer *n, + doublecomplex *ap, doublereal *rcond, doublecomplex *work, doublereal + *rwork, integer *info); + +/* Subroutine */ int _starpu_ztprfs_(char *uplo, char *trans, char *diag, integer *n, + integer *nrhs, doublecomplex *ap, doublecomplex *b, integer *ldb, + doublecomplex *x, integer *ldx, doublereal *ferr, doublereal *berr, + doublecomplex *work, doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_ztptri_(char *uplo, char *diag, integer *n, + doublecomplex *ap, integer *info); + +/* Subroutine */ int _starpu_ztptrs_(char *uplo, char *trans, char *diag, integer *n, + integer *nrhs, doublecomplex *ap, doublecomplex *b, integer *ldb, + integer *info); + +/* Subroutine */ int _starpu_ztpttf_(char *transr, char *uplo, integer *n, + doublecomplex *ap, doublecomplex *arf, integer *info); + +/* Subroutine */ int _starpu_ztpttr_(char *uplo, integer *n, doublecomplex *ap, + doublecomplex *a, integer *lda, integer *info); + +/* Subroutine */ int _starpu_ztrcon_(char *norm, char *uplo, char *diag, integer *n, + doublecomplex *a, integer *lda, doublereal *rcond, doublecomplex * + work, doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_ztrevc_(char *side, char *howmny, logical *select, + integer *n, doublecomplex *t, integer *ldt, doublecomplex *vl, + integer *ldvl, doublecomplex *vr, integer *ldvr, integer *mm, integer + *m, doublecomplex *work, doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_ztrexc_(char *compq, integer *n, doublecomplex *t, + integer *ldt, doublecomplex *q, integer *ldq, integer *ifst, integer * + ilst, integer *info); + +/* Subroutine */ int _starpu_ztrrfs_(char *uplo, char *trans, char *diag, integer *n, + integer *nrhs, doublecomplex *a, integer *lda, doublecomplex *b, + integer *ldb, doublecomplex *x, integer *ldx, doublereal *ferr, + doublereal *berr, doublecomplex *work, doublereal *rwork, integer * + info); + +/* Subroutine */ int _starpu_ztrsen_(char *job, char *compq, logical *select, integer + *n, doublecomplex *t, integer *ldt, doublecomplex *q, integer *ldq, + doublecomplex *w, integer *m, doublereal *s, doublereal *sep, + doublecomplex *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_ztrsna_(char *job, char *howmny, logical *select, + integer *n, doublecomplex *t, integer *ldt, doublecomplex *vl, + integer *ldvl, doublecomplex *vr, integer *ldvr, doublereal *s, + doublereal *sep, integer *mm, integer *m, doublecomplex *work, + integer *ldwork, doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_ztrsyl_(char *trana, char *tranb, integer *isgn, integer + *m, integer *n, doublecomplex *a, integer *lda, doublecomplex *b, + integer *ldb, doublecomplex *c__, integer *ldc, doublereal *scale, + integer *info); + +/* Subroutine */ int _starpu_ztrti2_(char *uplo, char *diag, integer *n, + doublecomplex *a, integer *lda, integer *info); + +/* Subroutine */ int _starpu_ztrtri_(char *uplo, char *diag, integer *n, + doublecomplex *a, integer *lda, integer *info); + +/* Subroutine */ int _starpu_ztrtrs_(char *uplo, char *trans, char *diag, integer *n, + integer *nrhs, doublecomplex *a, integer *lda, doublecomplex *b, + integer *ldb, integer *info); + +/* Subroutine */ int _starpu_ztrttf_(char *transr, char *uplo, integer *n, + doublecomplex *a, integer *lda, doublecomplex *arf, integer *info); + +/* Subroutine */ int _starpu_ztrttp_(char *uplo, integer *n, doublecomplex *a, + integer *lda, doublecomplex *ap, integer *info); + +/* Subroutine */ int _starpu_ztzrqf_(integer *m, integer *n, doublecomplex *a, + integer *lda, doublecomplex *tau, integer *info); + +/* Subroutine */ int _starpu_ztzrzf_(integer *m, integer *n, doublecomplex *a, + integer *lda, doublecomplex *tau, doublecomplex *work, integer *lwork, + integer *info); + +/* Subroutine */ int _starpu_zung2l_(integer *m, integer *n, integer *k, + doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex * + work, integer *info); + +/* Subroutine */ int _starpu_zung2r_(integer *m, integer *n, integer *k, + doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex * + work, integer *info); + +/* Subroutine */ int _starpu_zungbr_(char *vect, integer *m, integer *n, integer *k, + doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex * + work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_zunghr_(integer *n, integer *ilo, integer *ihi, + doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex * + work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_zungl2_(integer *m, integer *n, integer *k, + doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex * + work, integer *info); + +/* Subroutine */ int _starpu_zunglq_(integer *m, integer *n, integer *k, + doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex * + work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_zungql_(integer *m, integer *n, integer *k, + doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex * + work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_zungqr_(integer *m, integer *n, integer *k, + doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex * + work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_zungr2_(integer *m, integer *n, integer *k, + doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex * + work, integer *info); + +/* Subroutine */ int _starpu_zungrq_(integer *m, integer *n, integer *k, + doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex * + work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_zungtr_(char *uplo, integer *n, doublecomplex *a, + integer *lda, doublecomplex *tau, doublecomplex *work, integer *lwork, + integer *info); + +/* Subroutine */ int _starpu_zunm2l_(char *side, char *trans, integer *m, integer *n, + integer *k, doublecomplex *a, integer *lda, doublecomplex *tau, + doublecomplex *c__, integer *ldc, doublecomplex *work, integer *info); + +/* Subroutine */ int _starpu_zunm2r_(char *side, char *trans, integer *m, integer *n, + integer *k, doublecomplex *a, integer *lda, doublecomplex *tau, + doublecomplex *c__, integer *ldc, doublecomplex *work, integer *info); + +/* Subroutine */ int _starpu_zunmbr_(char *vect, char *side, char *trans, integer *m, + integer *n, integer *k, doublecomplex *a, integer *lda, doublecomplex + *tau, doublecomplex *c__, integer *ldc, doublecomplex *work, integer * + lwork, integer *info); + +/* Subroutine */ int _starpu_zunmhr_(char *side, char *trans, integer *m, integer *n, + integer *ilo, integer *ihi, doublecomplex *a, integer *lda, + doublecomplex *tau, doublecomplex *c__, integer *ldc, doublecomplex * + work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_zunml2_(char *side, char *trans, integer *m, integer *n, + integer *k, doublecomplex *a, integer *lda, doublecomplex *tau, + doublecomplex *c__, integer *ldc, doublecomplex *work, integer *info); + +/* Subroutine */ int _starpu_zunmlq_(char *side, char *trans, integer *m, integer *n, + integer *k, doublecomplex *a, integer *lda, doublecomplex *tau, + doublecomplex *c__, integer *ldc, doublecomplex *work, integer *lwork, + integer *info); + +/* Subroutine */ int _starpu_zunmql_(char *side, char *trans, integer *m, integer *n, + integer *k, doublecomplex *a, integer *lda, doublecomplex *tau, + doublecomplex *c__, integer *ldc, doublecomplex *work, integer *lwork, + integer *info); + +/* Subroutine */ int _starpu_zunmqr_(char *side, char *trans, integer *m, integer *n, + integer *k, doublecomplex *a, integer *lda, doublecomplex *tau, + doublecomplex *c__, integer *ldc, doublecomplex *work, integer *lwork, + integer *info); + +/* Subroutine */ int _starpu_zunmr2_(char *side, char *trans, integer *m, integer *n, + integer *k, doublecomplex *a, integer *lda, doublecomplex *tau, + doublecomplex *c__, integer *ldc, doublecomplex *work, integer *info); + +/* Subroutine */ int _starpu_zunmr3_(char *side, char *trans, integer *m, integer *n, + integer *k, integer *l, doublecomplex *a, integer *lda, doublecomplex + *tau, doublecomplex *c__, integer *ldc, doublecomplex *work, integer * + info); + +/* Subroutine */ int _starpu_zunmrq_(char *side, char *trans, integer *m, integer *n, + integer *k, doublecomplex *a, integer *lda, doublecomplex *tau, + doublecomplex *c__, integer *ldc, doublecomplex *work, integer *lwork, + integer *info); + +/* Subroutine */ int _starpu_zunmrz_(char *side, char *trans, integer *m, integer *n, + integer *k, integer *l, doublecomplex *a, integer *lda, doublecomplex + *tau, doublecomplex *c__, integer *ldc, doublecomplex *work, integer * + lwork, integer *info); + +/* Subroutine */ int _starpu_zunmtr_(char *side, char *uplo, char *trans, integer *m, + integer *n, doublecomplex *a, integer *lda, doublecomplex *tau, + doublecomplex *c__, integer *ldc, doublecomplex *work, integer *lwork, + integer *info); + +/* Subroutine */ int _starpu_zupgtr_(char *uplo, integer *n, doublecomplex *ap, + doublecomplex *tau, doublecomplex *q, integer *ldq, doublecomplex * + work, integer *info); + +/* Subroutine */ int _starpu_zupmtr_(char *side, char *uplo, char *trans, integer *m, + integer *n, doublecomplex *ap, doublecomplex *tau, doublecomplex *c__, + integer *ldc, doublecomplex *work, integer *info); + +/* Subroutine */ int _starpu_dlamc1_(integer *beta, integer *t, logical *rnd, logical + *ieee1); + +doublereal _starpu_dsecnd_(); + +/* Subroutine */ int _starpu_ilaver_(integer *vers_major__, integer *vers_minor__, + integer *vers_patch__); + +logical _starpu_lsame_(char *ca, char *cb); + +doublereal _starpu_second_(); + +doublereal _starpu_slamch_(char *cmach); + +/* Subroutine */ int _starpu_slamc1_(integer *beta, integer *t, logical *rnd, logical + *ieee1); + +/* Subroutine */ int _starpu_slamc2_(integer *beta, integer *t, logical *rnd, real * + eps, integer *emin, real *rmin, integer *emax, real *rmax); + +doublereal _starpu_slamc3_(real *a, real *b); + +/* Subroutine */ int _starpu_slamc4_(integer *emin, real *start, integer *base); + +/* Subroutine */ int _starpu_slamc5_(integer *beta, integer *p, integer *emin, + logical *ieee, integer *emax, real *rmax); + + +doublereal _starpu_dlamch_(char *cmach); + +/* Subroutine */ int _starpu_dlamc1_(integer *beta, integer *t, logical *rnd, logical + *ieee1); + +/* Subroutine */ int _starpu_dlamc2_(integer *beta, integer *t, logical *rnd, + doublereal *eps, integer *emin, doublereal *rmin, integer *emax, + doublereal *rmax); + +doublereal _starpu_dlamc3_(doublereal *a, doublereal *b); + +/* Subroutine */ int _starpu_dlamc4_(integer *emin, doublereal *start, integer *base); + +/* Subroutine */ int _starpu_dlamc5_(integer *beta, integer *p, integer *emin, + logical *ieee, integer *emax, doublereal *rmax); + +integer _starpu_ilaenv_(integer *ispec, char *name__, char *opts, integer *n1, + integer *n2, integer *n3, integer *n4); + +#ifdef __cplusplus +} +#endif + + +#endif /* __CLAPACK_H */ diff --git a/min-dgels/additional/d_lg10.c b/min-dgels/additional/d_lg10.c new file mode 100644 index 0000000..664c19d --- /dev/null +++ b/min-dgels/additional/d_lg10.c @@ -0,0 +1,21 @@ +#include "f2c.h" + +#define log10e 0.43429448190325182765 + +#ifdef KR_headers +double log(); +double d_lg10(x) doublereal *x; +#else +#undef abs +#include "math.h" +#ifdef __cplusplus +extern "C" { +#endif +double d_lg10(doublereal *x) +#endif +{ +return( log10e * log(*x) ); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/additional/d_sign.c b/min-dgels/additional/d_sign.c new file mode 100644 index 0000000..d06e0d1 --- /dev/null +++ b/min-dgels/additional/d_sign.c @@ -0,0 +1,18 @@ +#include "f2c.h" +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef KR_headers +double d_sign(a,b) doublereal *a, *b; +#else +double d_sign(doublereal *a, doublereal *b) +#endif +{ +double x; +x = (*a >= 0 ? *a : - *a); +return( *b >= 0 ? x : -x); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/additional/dcopy.c b/min-dgels/additional/dcopy.c new file mode 100644 index 0000000..2171077 --- /dev/null +++ b/min-dgels/additional/dcopy.c @@ -0,0 +1,107 @@ +/* dcopy.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dcopy_(integer *n, doublereal *dx, integer *incx, + doublereal *dy, integer *incy) +{ + /* System generated locals */ + integer i__1; + + /* Local variables */ + integer i__, m, ix, iy, mp1; + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* copies a vector, x, to a vector, y. */ +/* uses unrolled loops for increments equal to one. */ +/* jack dongarra, linpack, 3/11/78. */ +/* modified 12/3/93, array(1) declarations changed to array(*) */ + + +/* .. Local Scalars .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ + /* Parameter adjustments */ + --dy; + --dx; + + /* Function Body */ + if (*n <= 0) { + return 0; + } + if (*incx == 1 && *incy == 1) { + goto L20; + } + +/* code for unequal increments or equal increments */ +/* not equal to 1 */ + + ix = 1; + iy = 1; + if (*incx < 0) { + ix = (-(*n) + 1) * *incx + 1; + } + if (*incy < 0) { + iy = (-(*n) + 1) * *incy + 1; + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + dy[iy] = dx[ix]; + ix += *incx; + iy += *incy; +/* L10: */ + } + return 0; + +/* code for both increments equal to 1 */ + + +/* clean-up loop */ + +L20: + m = *n % 7; + if (m == 0) { + goto L40; + } + i__1 = m; + for (i__ = 1; i__ <= i__1; ++i__) { + dy[i__] = dx[i__]; +/* L30: */ + } + if (*n < 7) { + return 0; + } +L40: + mp1 = m + 1; + i__1 = *n; + for (i__ = mp1; i__ <= i__1; i__ += 7) { + dy[i__] = dx[i__]; + dy[i__ + 1] = dx[i__ + 1]; + dy[i__ + 2] = dx[i__ + 2]; + dy[i__ + 3] = dx[i__ + 3]; + dy[i__ + 4] = dx[i__ + 4]; + dy[i__ + 5] = dx[i__ + 5]; + dy[i__ + 6] = dx[i__ + 6]; +/* L50: */ + } + return 0; +} /* _starpu_dcopy_ */ diff --git a/min-dgels/additional/dgelq2.c b/min-dgels/additional/dgelq2.c new file mode 100644 index 0000000..a5ae415 --- /dev/null +++ b/min-dgels/additional/dgelq2.c @@ -0,0 +1,157 @@ +/* dgelq2.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dgelq2_(integer *m, integer *n, doublereal *a, integer * + lda, doublereal *tau, doublereal *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + integer i__, k; + doublereal aii; + extern /* Subroutine */ int _starpu_dlarf_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + doublereal *), _starpu_dlarfp_(integer *, doublereal *, + doublereal *, integer *, doublereal *), _starpu_xerbla_(char *, integer *); + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DGELQ2 computes an LQ factorization of a real m by n matrix A: */ +/* A = L * Q. */ + +/* Arguments */ +/* ========= */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix A. M >= 0. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix A. N >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the m by n matrix A. */ +/* On exit, the elements on and below the diagonal of the array */ +/* contain the m by min(m,n) lower trapezoidal matrix L (L is */ +/* lower triangular if m <= n); the elements above the diagonal, */ +/* with the array TAU, represent the orthogonal matrix Q as a */ +/* product of elementary reflectors (see Further Details). */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,M). */ + +/* TAU (output) DOUBLE PRECISION array, dimension (min(M,N)) */ +/* The scalar factors of the elementary reflectors (see Further */ +/* Details). */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (M) */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ + +/* Further Details */ +/* =============== */ + +/* The matrix Q is represented as a product of elementary reflectors */ + +/* Q = H(k) . . . H(2) H(1), where k = min(m,n). */ + +/* Each H(i) has the form */ + +/* H(i) = I - tau * v * v' */ + +/* where tau is a real scalar, and v is a real vector with */ +/* v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n), */ +/* and tau in TAU(i). */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DGELQ2", &i__1); + return 0; + } + + k = min(*m,*n); + + i__1 = k; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Generate elementary reflector H(i) to annihilate A(i,i+1:n) */ + + i__2 = *n - i__ + 1; +/* Computing MIN */ + i__3 = i__ + 1; + _starpu_dlarfp_(&i__2, &a[i__ + i__ * a_dim1], &a[i__ + min(i__3, *n)* a_dim1] +, lda, &tau[i__]); + if (i__ < *m) { + +/* Apply H(i) to A(i+1:m,i:n) from the right */ + + aii = a[i__ + i__ * a_dim1]; + a[i__ + i__ * a_dim1] = 1.; + i__2 = *m - i__; + i__3 = *n - i__ + 1; + _starpu_dlarf_("Right", &i__2, &i__3, &a[i__ + i__ * a_dim1], lda, &tau[ + i__], &a[i__ + 1 + i__ * a_dim1], lda, &work[1]); + a[i__ + i__ * a_dim1] = aii; + } +/* L10: */ + } + return 0; + +/* End of DGELQ2 */ + +} /* _starpu_dgelq2_ */ diff --git a/min-dgels/additional/dgelqf.c b/min-dgels/additional/dgelqf.c new file mode 100644 index 0000000..a1e1147 --- /dev/null +++ b/min-dgels/additional/dgelqf.c @@ -0,0 +1,251 @@ +/* dgelqf.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; +static integer c__3 = 3; +static integer c__2 = 2; + +/* Subroutine */ int _starpu_dgelqf_(integer *m, integer *n, doublereal *a, integer * + lda, doublereal *tau, doublereal *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + integer i__, k, ib, nb, nx, iws, nbmin, iinfo; + extern /* Subroutine */ int _starpu_dgelq2_(integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *), _starpu_dlarfb_(char *, + char *, char *, char *, integer *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *, doublereal *, integer *), _starpu_dlarft_(char *, char *, integer *, integer *, doublereal + *, integer *, doublereal *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); + extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *); + integer ldwork, lwkopt; + logical lquery; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DGELQF computes an LQ factorization of a real M-by-N matrix A: */ +/* A = L * Q. */ + +/* Arguments */ +/* ========= */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix A. M >= 0. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix A. N >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the M-by-N matrix A. */ +/* On exit, the elements on and below the diagonal of the array */ +/* contain the m-by-min(m,n) lower trapezoidal matrix L (L is */ +/* lower triangular if m <= n); the elements above the diagonal, */ +/* with the array TAU, represent the orthogonal matrix Q as a */ +/* product of elementary reflectors (see Further Details). */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,M). */ + +/* TAU (output) DOUBLE PRECISION array, dimension (min(M,N)) */ +/* The scalar factors of the elementary reflectors (see Further */ +/* Details). */ + +/* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ + +/* LWORK (input) INTEGER */ +/* The dimension of the array WORK. LWORK >= max(1,M). */ +/* For optimum performance LWORK >= M*NB, where NB is the */ +/* optimal blocksize. */ + +/* If LWORK = -1, then a workspace query is assumed; the routine */ +/* only calculates the optimal size of the WORK array, returns */ +/* this value as the first entry of the WORK array, and no error */ +/* message related to LWORK is issued by XERBLA. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ + +/* Further Details */ +/* =============== */ + +/* The matrix Q is represented as a product of elementary reflectors */ + +/* Q = H(k) . . . H(2) H(1), where k = min(m,n). */ + +/* Each H(i) has the form */ + +/* H(i) = I - tau * v * v' */ + +/* where tau is a real scalar, and v is a real vector with */ +/* v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n), */ +/* and tau in TAU(i). */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + nb = _starpu_ilaenv_(&c__1, "DGELQF", " ", m, n, &c_n1, &c_n1); + lwkopt = *m * nb; + work[1] = (doublereal) lwkopt; + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } else if (*lwork < max(1,*m) && ! lquery) { + *info = -7; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DGELQF", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + k = min(*m,*n); + if (k == 0) { + work[1] = 1.; + return 0; + } + + nbmin = 2; + nx = 0; + iws = *m; + if (nb > 1 && nb < k) { + +/* Determine when to cross over from blocked to unblocked code. */ + +/* Computing MAX */ + i__1 = 0, i__2 = _starpu_ilaenv_(&c__3, "DGELQF", " ", m, n, &c_n1, &c_n1); + nx = max(i__1,i__2); + if (nx < k) { + +/* Determine if workspace is large enough for blocked code. */ + + ldwork = *m; + iws = ldwork * nb; + if (*lwork < iws) { + +/* Not enough workspace to use optimal NB: reduce NB and */ +/* determine the minimum value of NB. */ + + nb = *lwork / ldwork; +/* Computing MAX */ + i__1 = 2, i__2 = _starpu_ilaenv_(&c__2, "DGELQF", " ", m, n, &c_n1, & + c_n1); + nbmin = max(i__1,i__2); + } + } + } + + if (nb >= nbmin && nb < k && nx < k) { + +/* Use blocked code initially */ + + i__1 = k - nx; + i__2 = nb; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__3 = k - i__ + 1; + ib = min(i__3,nb); + +/* Compute the LQ factorization of the current block */ +/* A(i:i+ib-1,i:n) */ + + i__3 = *n - i__ + 1; + _starpu_dgelq2_(&ib, &i__3, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[ + 1], &iinfo); + if (i__ + ib <= *m) { + +/* Form the triangular factor of the block reflector */ +/* H = H(i) H(i+1) . . . H(i+ib-1) */ + + i__3 = *n - i__ + 1; + _starpu_dlarft_("Forward", "Rowwise", &i__3, &ib, &a[i__ + i__ * + a_dim1], lda, &tau[i__], &work[1], &ldwork); + +/* Apply H to A(i+ib:m,i:n) from the right */ + + i__3 = *m - i__ - ib + 1; + i__4 = *n - i__ + 1; + _starpu_dlarfb_("Right", "No transpose", "Forward", "Rowwise", &i__3, + &i__4, &ib, &a[i__ + i__ * a_dim1], lda, &work[1], & + ldwork, &a[i__ + ib + i__ * a_dim1], lda, &work[ib + + 1], &ldwork); + } +/* L10: */ + } + } else { + i__ = 1; + } + +/* Use unblocked code to factor the last or only block. */ + + if (i__ <= k) { + i__2 = *m - i__ + 1; + i__1 = *n - i__ + 1; + _starpu_dgelq2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1] +, &iinfo); + } + + work[1] = (doublereal) iws; + return 0; + +/* End of DGELQF */ + +} /* _starpu_dgelqf_ */ diff --git a/min-dgels/additional/dgels.c b/min-dgels/additional/dgels.c new file mode 100644 index 0000000..173cb62 --- /dev/null +++ b/min-dgels/additional/dgels.c @@ -0,0 +1,515 @@ +/* dgels.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; +static doublereal c_b33 = 0.; +static integer c__0 = 0; + +/* Subroutine */ int _starpu_dgels_(char *trans, integer *m, integer *n, integer * + nrhs, doublereal *a, integer *lda, doublereal *b, integer *ldb, + doublereal *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2; + + /* Local variables */ + integer i__, j, nb, mn; + doublereal anrm, bnrm; + integer brow; + logical tpsd; + integer iascl, ibscl; + extern logical _starpu_lsame_(char *, char *); + integer wsize; + doublereal rwork[1]; + extern /* Subroutine */ int _starpu_dlabad_(doublereal *, doublereal *); + extern doublereal _starpu_dlamch_(char *), _starpu_dlange_(char *, integer *, + integer *, doublereal *, integer *, doublereal *); + extern /* Subroutine */ int _starpu_dgelqf_(integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, integer *), + _starpu_dlascl_(char *, integer *, integer *, doublereal *, doublereal *, + integer *, integer *, doublereal *, integer *, integer *), + _starpu_dgeqrf_(integer *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *, integer *), _starpu_dlaset_(char *, + integer *, integer *, doublereal *, doublereal *, doublereal *, + integer *), _starpu_xerbla_(char *, integer *); + extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *); + integer scllen; + doublereal bignum; + extern /* Subroutine */ int _starpu_dormlq_(char *, char *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *), + _starpu_dormqr_(char *, char *, integer *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *, integer *); + doublereal smlnum; + logical lquery; + extern /* Subroutine */ int _starpu_dtrtrs_(char *, char *, char *, integer *, + integer *, doublereal *, integer *, doublereal *, integer *, + integer *); + + +/* -- LAPACK driver routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DGELS solves overdetermined or underdetermined real linear systems */ +/* involving an M-by-N matrix A, or its transpose, using a QR or LQ */ +/* factorization of A. It is assumed that A has full rank. */ + +/* The following options are provided: */ + +/* 1. If TRANS = 'N' and m >= n: find the least squares solution of */ +/* an overdetermined system, i.e., solve the least squares problem */ +/* minimize || B - A*X ||. */ + +/* 2. If TRANS = 'N' and m < n: find the minimum norm solution of */ +/* an underdetermined system A * X = B. */ + +/* 3. If TRANS = 'T' and m >= n: find the minimum norm solution of */ +/* an undetermined system A**T * X = B. */ + +/* 4. If TRANS = 'T' and m < n: find the least squares solution of */ +/* an overdetermined system, i.e., solve the least squares problem */ +/* minimize || B - A**T * X ||. */ + +/* Several right hand side vectors b and solution vectors x can be */ +/* handled in a single call; they are stored as the columns of the */ +/* M-by-NRHS right hand side matrix B and the N-by-NRHS solution */ +/* matrix X. */ + +/* Arguments */ +/* ========= */ + +/* TRANS (input) CHARACTER*1 */ +/* = 'N': the linear system involves A; */ +/* = 'T': the linear system involves A**T. */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix A. M >= 0. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix A. N >= 0. */ + +/* NRHS (input) INTEGER */ +/* The number of right hand sides, i.e., the number of */ +/* columns of the matrices B and X. NRHS >=0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the M-by-N matrix A. */ +/* On exit, */ +/* if M >= N, A is overwritten by details of its QR */ +/* factorization as returned by DGEQRF; */ +/* if M < N, A is overwritten by details of its LQ */ +/* factorization as returned by DGELQF. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,M). */ + +/* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* On entry, the matrix B of right hand side vectors, stored */ +/* columnwise; B is M-by-NRHS if TRANS = 'N', or N-by-NRHS */ +/* if TRANS = 'T'. */ +/* On exit, if INFO = 0, B is overwritten by the solution */ +/* vectors, stored columnwise: */ +/* if TRANS = 'N' and m >= n, rows 1 to n of B contain the least */ +/* squares solution vectors; the residual sum of squares for the */ +/* solution in each column is given by the sum of squares of */ +/* elements N+1 to M in that column; */ +/* if TRANS = 'N' and m < n, rows 1 to N of B contain the */ +/* minimum norm solution vectors; */ +/* if TRANS = 'T' and m >= n, rows 1 to M of B contain the */ +/* minimum norm solution vectors; */ +/* if TRANS = 'T' and m < n, rows 1 to M of B contain the */ +/* least squares solution vectors; the residual sum of squares */ +/* for the solution in each column is given by the sum of */ +/* squares of elements M+1 to N in that column. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the array B. LDB >= MAX(1,M,N). */ + +/* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ + +/* LWORK (input) INTEGER */ +/* The dimension of the array WORK. */ +/* LWORK >= max( 1, MN + max( MN, NRHS ) ). */ +/* For optimal performance, */ +/* LWORK >= max( 1, MN + max( MN, NRHS )*NB ). */ +/* where MN = min(M,N) and NB is the optimum block size. */ + +/* If LWORK = -1, then a workspace query is assumed; the routine */ +/* only calculates the optimal size of the WORK array, returns */ +/* this value as the first entry of the WORK array, and no error */ +/* message related to LWORK is issued by XERBLA. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > 0: if INFO = i, the i-th diagonal element of the */ +/* triangular factor of A is zero, so that A does not have */ +/* full rank; the least squares solution could not be */ +/* computed. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + --work; + + /* Function Body */ + *info = 0; + mn = min(*m,*n); + lquery = *lwork == -1; + if (! (_starpu_lsame_(trans, "N") || _starpu_lsame_(trans, "T"))) { + *info = -1; + } else if (*m < 0) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*nrhs < 0) { + *info = -4; + } else if (*lda < max(1,*m)) { + *info = -6; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = max(1,*m); + if (*ldb < max(i__1,*n)) { + *info = -8; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = 1, i__2 = mn + max(mn,*nrhs); + if (*lwork < max(i__1,i__2) && ! lquery) { + *info = -10; + } + } + } + +/* Figure out optimal block size */ + + if (*info == 0 || *info == -10) { + + tpsd = TRUE_; + if (_starpu_lsame_(trans, "N")) { + tpsd = FALSE_; + } + + if (*m >= *n) { + nb = _starpu_ilaenv_(&c__1, "DGEQRF", " ", m, n, &c_n1, &c_n1); + if (tpsd) { +/* Computing MAX */ + i__1 = nb, i__2 = _starpu_ilaenv_(&c__1, "DORMQR", "LN", m, nrhs, n, & + c_n1); + nb = max(i__1,i__2); + } else { +/* Computing MAX */ + i__1 = nb, i__2 = _starpu_ilaenv_(&c__1, "DORMQR", "LT", m, nrhs, n, & + c_n1); + nb = max(i__1,i__2); + } + } else { + nb = _starpu_ilaenv_(&c__1, "DGELQF", " ", m, n, &c_n1, &c_n1); + if (tpsd) { +/* Computing MAX */ + i__1 = nb, i__2 = _starpu_ilaenv_(&c__1, "DORMLQ", "LT", n, nrhs, m, & + c_n1); + nb = max(i__1,i__2); + } else { +/* Computing MAX */ + i__1 = nb, i__2 = _starpu_ilaenv_(&c__1, "DORMLQ", "LN", n, nrhs, m, & + c_n1); + nb = max(i__1,i__2); + } + } + +/* Computing MAX */ + i__1 = 1, i__2 = mn + max(mn,*nrhs) * nb; + wsize = max(i__1,i__2); + work[1] = (doublereal) wsize; + + } + + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DGELS ", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + +/* Computing MIN */ + i__1 = min(*m,*n); + if (min(i__1,*nrhs) == 0) { + i__1 = max(*m,*n); + _starpu_dlaset_("Full", &i__1, nrhs, &c_b33, &c_b33, &b[b_offset], ldb); + return 0; + } + +/* Get machine parameters */ + + smlnum = _starpu_dlamch_("S") / _starpu_dlamch_("P"); + bignum = 1. / smlnum; + _starpu_dlabad_(&smlnum, &bignum); + +/* Scale A, B if max element outside range [SMLNUM,BIGNUM] */ + + anrm = _starpu_dlange_("M", m, n, &a[a_offset], lda, rwork); + iascl = 0; + if (anrm > 0. && anrm < smlnum) { + +/* Scale matrix norm up to SMLNUM */ + + _starpu_dlascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, + info); + iascl = 1; + } else if (anrm > bignum) { + +/* Scale matrix norm down to BIGNUM */ + + _starpu_dlascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, + info); + iascl = 2; + } else if (anrm == 0.) { + +/* Matrix all zero. Return zero solution. */ + + i__1 = max(*m,*n); + _starpu_dlaset_("F", &i__1, nrhs, &c_b33, &c_b33, &b[b_offset], ldb); + goto L50; + } + + brow = *m; + if (tpsd) { + brow = *n; + } + bnrm = _starpu_dlange_("M", &brow, nrhs, &b[b_offset], ldb, rwork); + ibscl = 0; + if (bnrm > 0. && bnrm < smlnum) { + +/* Scale matrix norm up to SMLNUM */ + + _starpu_dlascl_("G", &c__0, &c__0, &bnrm, &smlnum, &brow, nrhs, &b[b_offset], + ldb, info); + ibscl = 1; + } else if (bnrm > bignum) { + +/* Scale matrix norm down to BIGNUM */ + + _starpu_dlascl_("G", &c__0, &c__0, &bnrm, &bignum, &brow, nrhs, &b[b_offset], + ldb, info); + ibscl = 2; + } + + if (*m >= *n) { + +/* compute QR factorization of A */ + + i__1 = *lwork - mn; + _starpu_dgeqrf_(m, n, &a[a_offset], lda, &work[1], &work[mn + 1], &i__1, info) + ; + +/* workspace at least N, optimally N*NB */ + + if (! tpsd) { + +/* Least-Squares Problem min || A * X - B || */ + +/* B(1:M,1:NRHS) := Q' * B(1:M,1:NRHS) */ + + i__1 = *lwork - mn; + _starpu_dormqr_("Left", "Transpose", m, nrhs, n, &a[a_offset], lda, &work[ + 1], &b[b_offset], ldb, &work[mn + 1], &i__1, info); + +/* workspace at least NRHS, optimally NRHS*NB */ + +/* B(1:N,1:NRHS) := inv(R) * B(1:N,1:NRHS) */ + + _starpu_dtrtrs_("Upper", "No transpose", "Non-unit", n, nrhs, &a[a_offset] +, lda, &b[b_offset], ldb, info); + + if (*info > 0) { + return 0; + } + + scllen = *n; + + } else { + +/* Overdetermined system of equations A' * X = B */ + +/* B(1:N,1:NRHS) := inv(R') * B(1:N,1:NRHS) */ + + _starpu_dtrtrs_("Upper", "Transpose", "Non-unit", n, nrhs, &a[a_offset], + lda, &b[b_offset], ldb, info); + + if (*info > 0) { + return 0; + } + +/* B(N+1:M,1:NRHS) = ZERO */ + + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = *n + 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = 0.; +/* L10: */ + } +/* L20: */ + } + +/* B(1:M,1:NRHS) := Q(1:N,:) * B(1:N,1:NRHS) */ + + i__1 = *lwork - mn; + _starpu_dormqr_("Left", "No transpose", m, nrhs, n, &a[a_offset], lda, & + work[1], &b[b_offset], ldb, &work[mn + 1], &i__1, info); + +/* workspace at least NRHS, optimally NRHS*NB */ + + scllen = *m; + + } + + } else { + +/* Compute LQ factorization of A */ + + i__1 = *lwork - mn; + _starpu_dgelqf_(m, n, &a[a_offset], lda, &work[1], &work[mn + 1], &i__1, info) + ; + +/* workspace at least M, optimally M*NB. */ + + if (! tpsd) { + +/* underdetermined system of equations A * X = B */ + +/* B(1:M,1:NRHS) := inv(L) * B(1:M,1:NRHS) */ + + _starpu_dtrtrs_("Lower", "No transpose", "Non-unit", m, nrhs, &a[a_offset] +, lda, &b[b_offset], ldb, info); + + if (*info > 0) { + return 0; + } + +/* B(M+1:N,1:NRHS) = 0 */ + + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = *m + 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = 0.; +/* L30: */ + } +/* L40: */ + } + +/* B(1:N,1:NRHS) := Q(1:N,:)' * B(1:M,1:NRHS) */ + + i__1 = *lwork - mn; + _starpu_dormlq_("Left", "Transpose", n, nrhs, m, &a[a_offset], lda, &work[ + 1], &b[b_offset], ldb, &work[mn + 1], &i__1, info); + +/* workspace at least NRHS, optimally NRHS*NB */ + + scllen = *n; + + } else { + +/* overdetermined system min || A' * X - B || */ + +/* B(1:N,1:NRHS) := Q * B(1:N,1:NRHS) */ + + i__1 = *lwork - mn; + _starpu_dormlq_("Left", "No transpose", n, nrhs, m, &a[a_offset], lda, & + work[1], &b[b_offset], ldb, &work[mn + 1], &i__1, info); + +/* workspace at least NRHS, optimally NRHS*NB */ + +/* B(1:M,1:NRHS) := inv(L') * B(1:M,1:NRHS) */ + + _starpu_dtrtrs_("Lower", "Transpose", "Non-unit", m, nrhs, &a[a_offset], + lda, &b[b_offset], ldb, info); + + if (*info > 0) { + return 0; + } + + scllen = *m; + + } + + } + +/* Undo scaling */ + + if (iascl == 1) { + _starpu_dlascl_("G", &c__0, &c__0, &anrm, &smlnum, &scllen, nrhs, &b[b_offset] +, ldb, info); + } else if (iascl == 2) { + _starpu_dlascl_("G", &c__0, &c__0, &anrm, &bignum, &scllen, nrhs, &b[b_offset] +, ldb, info); + } + if (ibscl == 1) { + _starpu_dlascl_("G", &c__0, &c__0, &smlnum, &bnrm, &scllen, nrhs, &b[b_offset] +, ldb, info); + } else if (ibscl == 2) { + _starpu_dlascl_("G", &c__0, &c__0, &bignum, &bnrm, &scllen, nrhs, &b[b_offset] +, ldb, info); + } + +L50: + work[1] = (doublereal) wsize; + + return 0; + +/* End of DGELS */ + +} /* _starpu_dgels_ */ diff --git a/min-dgels/additional/dgemm.c b/min-dgels/additional/dgemm.c new file mode 100644 index 0000000..f1c700f --- /dev/null +++ b/min-dgels/additional/dgemm.c @@ -0,0 +1,389 @@ +/* dgemm.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dgemm_(char *transa, char *transb, integer *m, integer * + n, integer *k, doublereal *alpha, doublereal *a, integer *lda, + doublereal *b, integer *ldb, doublereal *beta, doublereal *c__, + integer *ldc) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, + i__3; + + /* Local variables */ + integer i__, j, l, info; + logical nota, notb; + doublereal temp; + integer ncola; + extern logical _starpu_lsame_(char *, char *); + integer nrowa, nrowb; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DGEMM performs one of the matrix-matrix operations */ + +/* C := alpha*op( A )*op( B ) + beta*C, */ + +/* where op( X ) is one of */ + +/* op( X ) = X or op( X ) = X', */ + +/* alpha and beta are scalars, and A, B and C are matrices, with op( A ) */ +/* an m by k matrix, op( B ) a k by n matrix and C an m by n matrix. */ + +/* Arguments */ +/* ========== */ + +/* TRANSA - CHARACTER*1. */ +/* On entry, TRANSA specifies the form of op( A ) to be used in */ +/* the matrix multiplication as follows: */ + +/* TRANSA = 'N' or 'n', op( A ) = A. */ + +/* TRANSA = 'T' or 't', op( A ) = A'. */ + +/* TRANSA = 'C' or 'c', op( A ) = A'. */ + +/* Unchanged on exit. */ + +/* TRANSB - CHARACTER*1. */ +/* On entry, TRANSB specifies the form of op( B ) to be used in */ +/* the matrix multiplication as follows: */ + +/* TRANSB = 'N' or 'n', op( B ) = B. */ + +/* TRANSB = 'T' or 't', op( B ) = B'. */ + +/* TRANSB = 'C' or 'c', op( B ) = B'. */ + +/* Unchanged on exit. */ + +/* M - INTEGER. */ +/* On entry, M specifies the number of rows of the matrix */ +/* op( A ) and of the matrix C. M must be at least zero. */ +/* Unchanged on exit. */ + +/* N - INTEGER. */ +/* On entry, N specifies the number of columns of the matrix */ +/* op( B ) and the number of columns of the matrix C. N must be */ +/* at least zero. */ +/* Unchanged on exit. */ + +/* K - INTEGER. */ +/* On entry, K specifies the number of columns of the matrix */ +/* op( A ) and the number of rows of the matrix op( B ). K must */ +/* be at least zero. */ +/* Unchanged on exit. */ + +/* ALPHA - DOUBLE PRECISION. */ +/* On entry, ALPHA specifies the scalar alpha. */ +/* Unchanged on exit. */ + +/* A - DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is */ +/* k when TRANSA = 'N' or 'n', and is m otherwise. */ +/* Before entry with TRANSA = 'N' or 'n', the leading m by k */ +/* part of the array A must contain the matrix A, otherwise */ +/* the leading k by m part of the array A must contain the */ +/* matrix A. */ +/* Unchanged on exit. */ + +/* LDA - INTEGER. */ +/* On entry, LDA specifies the first dimension of A as declared */ +/* in the calling (sub) program. When TRANSA = 'N' or 'n' then */ +/* LDA must be at least max( 1, m ), otherwise LDA must be at */ +/* least max( 1, k ). */ +/* Unchanged on exit. */ + +/* B - DOUBLE PRECISION array of DIMENSION ( LDB, kb ), where kb is */ +/* n when TRANSB = 'N' or 'n', and is k otherwise. */ +/* Before entry with TRANSB = 'N' or 'n', the leading k by n */ +/* part of the array B must contain the matrix B, otherwise */ +/* the leading n by k part of the array B must contain the */ +/* matrix B. */ +/* Unchanged on exit. */ + +/* LDB - INTEGER. */ +/* On entry, LDB specifies the first dimension of B as declared */ +/* in the calling (sub) program. When TRANSB = 'N' or 'n' then */ +/* LDB must be at least max( 1, k ), otherwise LDB must be at */ +/* least max( 1, n ). */ +/* Unchanged on exit. */ + +/* BETA - DOUBLE PRECISION. */ +/* On entry, BETA specifies the scalar beta. When BETA is */ +/* supplied as zero then C need not be set on input. */ +/* Unchanged on exit. */ + +/* C - DOUBLE PRECISION array of DIMENSION ( LDC, n ). */ +/* Before entry, the leading m by n part of the array C must */ +/* contain the matrix C, except when beta is zero, in which */ +/* case C need not be set on entry. */ +/* On exit, the array C is overwritten by the m by n matrix */ +/* ( alpha*op( A )*op( B ) + beta*C ). */ + +/* LDC - INTEGER. */ +/* On entry, LDC specifies the first dimension of C as declared */ +/* in the calling (sub) program. LDC must be at least */ +/* max( 1, m ). */ +/* Unchanged on exit. */ + + +/* Level 3 Blas routine. */ + +/* -- Written on 8-February-1989. */ +/* Jack Dongarra, Argonne National Laboratory. */ +/* Iain Duff, AERE Harwell. */ +/* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ +/* Sven Hammarling, Numerical Algorithms Group Ltd. */ + + +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Parameters .. */ +/* .. */ + +/* Set NOTA and NOTB as true if A and B respectively are not */ +/* transposed and set NROWA, NCOLA and NROWB as the number of rows */ +/* and columns of A and the number of rows of B respectively. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + + /* Function Body */ + nota = _starpu_lsame_(transa, "N"); + notb = _starpu_lsame_(transb, "N"); + if (nota) { + nrowa = *m; + ncola = *k; + } else { + nrowa = *k; + ncola = *m; + } + if (notb) { + nrowb = *k; + } else { + nrowb = *n; + } + +/* Test the input parameters. */ + + info = 0; + if (! nota && ! _starpu_lsame_(transa, "C") && ! _starpu_lsame_( + transa, "T")) { + info = 1; + } else if (! notb && ! _starpu_lsame_(transb, "C") && ! + _starpu_lsame_(transb, "T")) { + info = 2; + } else if (*m < 0) { + info = 3; + } else if (*n < 0) { + info = 4; + } else if (*k < 0) { + info = 5; + } else if (*lda < max(1,nrowa)) { + info = 8; + } else if (*ldb < max(1,nrowb)) { + info = 10; + } else if (*ldc < max(1,*m)) { + info = 13; + } + if (info != 0) { + _starpu_xerbla_("DGEMM ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*m == 0 || *n == 0 || (*alpha == 0. || *k == 0) && *beta == 1.) { + return 0; + } + +/* And if alpha.eq.zero. */ + + if (*alpha == 0.) { + if (*beta == 0.) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = 0.; +/* L10: */ + } +/* L20: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; +/* L30: */ + } +/* L40: */ + } + } + return 0; + } + +/* Start the operations. */ + + if (notb) { + if (nota) { + +/* Form C := alpha*A*B + beta*C. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*beta == 0.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = 0.; +/* L50: */ + } + } else if (*beta != 1.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; +/* L60: */ + } + } + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + if (b[l + j * b_dim1] != 0.) { + temp = *alpha * b[l + j * b_dim1]; + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + c__[i__ + j * c_dim1] += temp * a[i__ + l * + a_dim1]; +/* L70: */ + } + } +/* L80: */ + } +/* L90: */ + } + } else { + +/* Form C := alpha*A'*B + beta*C */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp = 0.; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + temp += a[l + i__ * a_dim1] * b[l + j * b_dim1]; +/* L100: */ + } + if (*beta == 0.) { + c__[i__ + j * c_dim1] = *alpha * temp; + } else { + c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[ + i__ + j * c_dim1]; + } +/* L110: */ + } +/* L120: */ + } + } + } else { + if (nota) { + +/* Form C := alpha*A*B' + beta*C */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*beta == 0.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = 0.; +/* L130: */ + } + } else if (*beta != 1.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; +/* L140: */ + } + } + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + if (b[j + l * b_dim1] != 0.) { + temp = *alpha * b[j + l * b_dim1]; + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + c__[i__ + j * c_dim1] += temp * a[i__ + l * + a_dim1]; +/* L150: */ + } + } +/* L160: */ + } +/* L170: */ + } + } else { + +/* Form C := alpha*A'*B' + beta*C */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp = 0.; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + temp += a[l + i__ * a_dim1] * b[j + l * b_dim1]; +/* L180: */ + } + if (*beta == 0.) { + c__[i__ + j * c_dim1] = *alpha * temp; + } else { + c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[ + i__ + j * c_dim1]; + } +/* L190: */ + } +/* L200: */ + } + } + } + + return 0; + +/* End of DGEMM . */ + +} /* _starpu_dgemm_ */ diff --git a/min-dgels/additional/dgemv.c b/min-dgels/additional/dgemv.c new file mode 100644 index 0000000..a216ee4 --- /dev/null +++ b/min-dgels/additional/dgemv.c @@ -0,0 +1,312 @@ +/* dgemv.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dgemv_(char *trans, integer *m, integer *n, doublereal * + alpha, doublereal *a, integer *lda, doublereal *x, integer *incx, + doublereal *beta, doublereal *y, integer *incy) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + + /* Local variables */ + integer i__, j, ix, iy, jx, jy, kx, ky, info; + doublereal temp; + integer lenx, leny; + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DGEMV performs one of the matrix-vector operations */ + +/* y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y, */ + +/* where alpha and beta are scalars, x and y are vectors and A is an */ +/* m by n matrix. */ + +/* Arguments */ +/* ========== */ + +/* TRANS - CHARACTER*1. */ +/* On entry, TRANS specifies the operation to be performed as */ +/* follows: */ + +/* TRANS = 'N' or 'n' y := alpha*A*x + beta*y. */ + +/* TRANS = 'T' or 't' y := alpha*A'*x + beta*y. */ + +/* TRANS = 'C' or 'c' y := alpha*A'*x + beta*y. */ + +/* Unchanged on exit. */ + +/* M - INTEGER. */ +/* On entry, M specifies the number of rows of the matrix A. */ +/* M must be at least zero. */ +/* Unchanged on exit. */ + +/* N - INTEGER. */ +/* On entry, N specifies the number of columns of the matrix A. */ +/* N must be at least zero. */ +/* Unchanged on exit. */ + +/* ALPHA - DOUBLE PRECISION. */ +/* On entry, ALPHA specifies the scalar alpha. */ +/* Unchanged on exit. */ + +/* A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). */ +/* Before entry, the leading m by n part of the array A must */ +/* contain the matrix of coefficients. */ +/* Unchanged on exit. */ + +/* LDA - INTEGER. */ +/* On entry, LDA specifies the first dimension of A as declared */ +/* in the calling (sub) program. LDA must be at least */ +/* max( 1, m ). */ +/* Unchanged on exit. */ + +/* X - DOUBLE PRECISION array of DIMENSION at least */ +/* ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' */ +/* and at least */ +/* ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. */ +/* Before entry, the incremented array X must contain the */ +/* vector x. */ +/* Unchanged on exit. */ + +/* INCX - INTEGER. */ +/* On entry, INCX specifies the increment for the elements of */ +/* X. INCX must not be zero. */ +/* Unchanged on exit. */ + +/* BETA - DOUBLE PRECISION. */ +/* On entry, BETA specifies the scalar beta. When BETA is */ +/* supplied as zero then Y need not be set on input. */ +/* Unchanged on exit. */ + +/* Y - DOUBLE PRECISION array of DIMENSION at least */ +/* ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' */ +/* and at least */ +/* ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. */ +/* Before entry with BETA non-zero, the incremented array Y */ +/* must contain the vector y. On exit, Y is overwritten by the */ +/* updated vector y. */ + +/* INCY - INTEGER. */ +/* On entry, INCY specifies the increment for the elements of */ +/* Y. INCY must not be zero. */ +/* Unchanged on exit. */ + + +/* Level 2 Blas routine. */ + +/* -- Written on 22-October-1986. */ +/* Jack Dongarra, Argonne National Lab. */ +/* Jeremy Du Croz, Nag Central Office. */ +/* Sven Hammarling, Nag Central Office. */ +/* Richard Hanson, Sandia National Labs. */ + + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --x; + --y; + + /* Function Body */ + info = 0; + if (! _starpu_lsame_(trans, "N") && ! _starpu_lsame_(trans, "T") && ! _starpu_lsame_(trans, "C") + ) { + info = 1; + } else if (*m < 0) { + info = 2; + } else if (*n < 0) { + info = 3; + } else if (*lda < max(1,*m)) { + info = 6; + } else if (*incx == 0) { + info = 8; + } else if (*incy == 0) { + info = 11; + } + if (info != 0) { + _starpu_xerbla_("DGEMV ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*m == 0 || *n == 0 || *alpha == 0. && *beta == 1.) { + return 0; + } + +/* Set LENX and LENY, the lengths of the vectors x and y, and set */ +/* up the start points in X and Y. */ + + if (_starpu_lsame_(trans, "N")) { + lenx = *n; + leny = *m; + } else { + lenx = *m; + leny = *n; + } + if (*incx > 0) { + kx = 1; + } else { + kx = 1 - (lenx - 1) * *incx; + } + if (*incy > 0) { + ky = 1; + } else { + ky = 1 - (leny - 1) * *incy; + } + +/* Start the operations. In this version the elements of A are */ +/* accessed sequentially with one pass through A. */ + +/* First form y := beta*y. */ + + if (*beta != 1.) { + if (*incy == 1) { + if (*beta == 0.) { + i__1 = leny; + for (i__ = 1; i__ <= i__1; ++i__) { + y[i__] = 0.; +/* L10: */ + } + } else { + i__1 = leny; + for (i__ = 1; i__ <= i__1; ++i__) { + y[i__] = *beta * y[i__]; +/* L20: */ + } + } + } else { + iy = ky; + if (*beta == 0.) { + i__1 = leny; + for (i__ = 1; i__ <= i__1; ++i__) { + y[iy] = 0.; + iy += *incy; +/* L30: */ + } + } else { + i__1 = leny; + for (i__ = 1; i__ <= i__1; ++i__) { + y[iy] = *beta * y[iy]; + iy += *incy; +/* L40: */ + } + } + } + } + if (*alpha == 0.) { + return 0; + } + if (_starpu_lsame_(trans, "N")) { + +/* Form y := alpha*A*x + y. */ + + jx = kx; + if (*incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[jx] != 0.) { + temp = *alpha * x[jx]; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + y[i__] += temp * a[i__ + j * a_dim1]; +/* L50: */ + } + } + jx += *incx; +/* L60: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[jx] != 0.) { + temp = *alpha * x[jx]; + iy = ky; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + y[iy] += temp * a[i__ + j * a_dim1]; + iy += *incy; +/* L70: */ + } + } + jx += *incx; +/* L80: */ + } + } + } else { + +/* Form y := alpha*A'*x + y. */ + + jy = ky; + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp = 0.; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp += a[i__ + j * a_dim1] * x[i__]; +/* L90: */ + } + y[jy] += *alpha * temp; + jy += *incy; +/* L100: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp = 0.; + ix = kx; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp += a[i__ + j * a_dim1] * x[ix]; + ix += *incx; +/* L110: */ + } + y[jy] += *alpha * temp; + jy += *incy; +/* L120: */ + } + } + } + + return 0; + +/* End of DGEMV . */ + +} /* _starpu_dgemv_ */ diff --git a/min-dgels/additional/dgeqr2.c b/min-dgels/additional/dgeqr2.c new file mode 100644 index 0000000..feb9504 --- /dev/null +++ b/min-dgels/additional/dgeqr2.c @@ -0,0 +1,161 @@ +/* dgeqr2.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +/* Subroutine */ int _starpu_dgeqr2_(integer *m, integer *n, doublereal *a, integer * + lda, doublereal *tau, doublereal *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + integer i__, k; + doublereal aii; + extern /* Subroutine */ int _starpu_dlarf_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + doublereal *), _starpu_dlarfp_(integer *, doublereal *, + doublereal *, integer *, doublereal *), _starpu_xerbla_(char *, integer *); + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DGEQR2 computes a QR factorization of a real m by n matrix A: */ +/* A = Q * R. */ + +/* Arguments */ +/* ========= */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix A. M >= 0. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix A. N >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the m by n matrix A. */ +/* On exit, the elements on and above the diagonal of the array */ +/* contain the min(m,n) by n upper trapezoidal matrix R (R is */ +/* upper triangular if m >= n); the elements below the diagonal, */ +/* with the array TAU, represent the orthogonal matrix Q as a */ +/* product of elementary reflectors (see Further Details). */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,M). */ + +/* TAU (output) DOUBLE PRECISION array, dimension (min(M,N)) */ +/* The scalar factors of the elementary reflectors (see Further */ +/* Details). */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (N) */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ + +/* Further Details */ +/* =============== */ + +/* The matrix Q is represented as a product of elementary reflectors */ + +/* Q = H(1) H(2) . . . H(k), where k = min(m,n). */ + +/* Each H(i) has the form */ + +/* H(i) = I - tau * v * v' */ + +/* where tau is a real scalar, and v is a real vector with */ +/* v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), */ +/* and tau in TAU(i). */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DGEQR2", &i__1); + return 0; + } + + k = min(*m,*n); + + i__1 = k; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Generate elementary reflector H(i) to annihilate A(i+1:m,i) */ + + i__2 = *m - i__ + 1; +/* Computing MIN */ + i__3 = i__ + 1; + _starpu_dlarfp_(&i__2, &a[i__ + i__ * a_dim1], &a[min(i__3, *m)+ i__ * a_dim1] +, &c__1, &tau[i__]); + if (i__ < *n) { + +/* Apply H(i) to A(i:m,i+1:n) from the left */ + + aii = a[i__ + i__ * a_dim1]; + a[i__ + i__ * a_dim1] = 1.; + i__2 = *m - i__ + 1; + i__3 = *n - i__; + _starpu_dlarf_("Left", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, &tau[ + i__], &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]); + a[i__ + i__ * a_dim1] = aii; + } +/* L10: */ + } + return 0; + +/* End of DGEQR2 */ + +} /* _starpu_dgeqr2_ */ diff --git a/min-dgels/additional/dgeqrf.c b/min-dgels/additional/dgeqrf.c new file mode 100644 index 0000000..cdd83e5 --- /dev/null +++ b/min-dgels/additional/dgeqrf.c @@ -0,0 +1,252 @@ +/* dgeqrf.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; +static integer c__3 = 3; +static integer c__2 = 2; + +/* Subroutine */ int _starpu_dgeqrf_(integer *m, integer *n, doublereal *a, integer * + lda, doublereal *tau, doublereal *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + integer i__, k, ib, nb, nx, iws, nbmin, iinfo; + extern /* Subroutine */ int _starpu_dgeqr2_(integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *), _starpu_dlarfb_(char *, + char *, char *, char *, integer *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *, doublereal *, integer *), _starpu_dlarft_(char *, char *, integer *, integer *, doublereal + *, integer *, doublereal *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); + extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *); + integer ldwork, lwkopt; + logical lquery; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DGEQRF computes a QR factorization of a real M-by-N matrix A: */ +/* A = Q * R. */ + +/* Arguments */ +/* ========= */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix A. M >= 0. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix A. N >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the M-by-N matrix A. */ +/* On exit, the elements on and above the diagonal of the array */ +/* contain the min(M,N)-by-N upper trapezoidal matrix R (R is */ +/* upper triangular if m >= n); the elements below the diagonal, */ +/* with the array TAU, represent the orthogonal matrix Q as a */ +/* product of min(m,n) elementary reflectors (see Further */ +/* Details). */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,M). */ + +/* TAU (output) DOUBLE PRECISION array, dimension (min(M,N)) */ +/* The scalar factors of the elementary reflectors (see Further */ +/* Details). */ + +/* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ + +/* LWORK (input) INTEGER */ +/* The dimension of the array WORK. LWORK >= max(1,N). */ +/* For optimum performance LWORK >= N*NB, where NB is */ +/* the optimal blocksize. */ + +/* If LWORK = -1, then a workspace query is assumed; the routine */ +/* only calculates the optimal size of the WORK array, returns */ +/* this value as the first entry of the WORK array, and no error */ +/* message related to LWORK is issued by XERBLA. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ + +/* Further Details */ +/* =============== */ + +/* The matrix Q is represented as a product of elementary reflectors */ + +/* Q = H(1) H(2) . . . H(k), where k = min(m,n). */ + +/* Each H(i) has the form */ + +/* H(i) = I - tau * v * v' */ + +/* where tau is a real scalar, and v is a real vector with */ +/* v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), */ +/* and tau in TAU(i). */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + nb = _starpu_ilaenv_(&c__1, "DGEQRF", " ", m, n, &c_n1, &c_n1); + lwkopt = *n * nb; + work[1] = (doublereal) lwkopt; + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } else if (*lwork < max(1,*n) && ! lquery) { + *info = -7; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DGEQRF", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + k = min(*m,*n); + if (k == 0) { + work[1] = 1.; + return 0; + } + + nbmin = 2; + nx = 0; + iws = *n; + if (nb > 1 && nb < k) { + +/* Determine when to cross over from blocked to unblocked code. */ + +/* Computing MAX */ + i__1 = 0, i__2 = _starpu_ilaenv_(&c__3, "DGEQRF", " ", m, n, &c_n1, &c_n1); + nx = max(i__1,i__2); + if (nx < k) { + +/* Determine if workspace is large enough for blocked code. */ + + ldwork = *n; + iws = ldwork * nb; + if (*lwork < iws) { + +/* Not enough workspace to use optimal NB: reduce NB and */ +/* determine the minimum value of NB. */ + + nb = *lwork / ldwork; +/* Computing MAX */ + i__1 = 2, i__2 = _starpu_ilaenv_(&c__2, "DGEQRF", " ", m, n, &c_n1, & + c_n1); + nbmin = max(i__1,i__2); + } + } + } + + if (nb >= nbmin && nb < k && nx < k) { + +/* Use blocked code initially */ + + i__1 = k - nx; + i__2 = nb; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__3 = k - i__ + 1; + ib = min(i__3,nb); + +/* Compute the QR factorization of the current block */ +/* A(i:m,i:i+ib-1) */ + + i__3 = *m - i__ + 1; + _starpu_dgeqr2_(&i__3, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[ + 1], &iinfo); + if (i__ + ib <= *n) { + +/* Form the triangular factor of the block reflector */ +/* H = H(i) H(i+1) . . . H(i+ib-1) */ + + i__3 = *m - i__ + 1; + _starpu_dlarft_("Forward", "Columnwise", &i__3, &ib, &a[i__ + i__ * + a_dim1], lda, &tau[i__], &work[1], &ldwork); + +/* Apply H' to A(i:m,i+ib:n) from the left */ + + i__3 = *m - i__ + 1; + i__4 = *n - i__ - ib + 1; + _starpu_dlarfb_("Left", "Transpose", "Forward", "Columnwise", &i__3, & + i__4, &ib, &a[i__ + i__ * a_dim1], lda, &work[1], & + ldwork, &a[i__ + (i__ + ib) * a_dim1], lda, &work[ib + + 1], &ldwork); + } +/* L10: */ + } + } else { + i__ = 1; + } + +/* Use unblocked code to factor the last or only block. */ + + if (i__ <= k) { + i__2 = *m - i__ + 1; + i__1 = *n - i__ + 1; + _starpu_dgeqr2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1] +, &iinfo); + } + + work[1] = (doublereal) iws; + return 0; + +/* End of DGEQRF */ + +} /* _starpu_dgeqrf_ */ diff --git a/min-dgels/additional/dger.c b/min-dgels/additional/dger.c new file mode 100644 index 0000000..a6a3153 --- /dev/null +++ b/min-dgels/additional/dger.c @@ -0,0 +1,194 @@ +/* dger.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dger_(integer *m, integer *n, doublereal *alpha, + doublereal *x, integer *incx, doublereal *y, integer *incy, + doublereal *a, integer *lda) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + + /* Local variables */ + integer i__, j, ix, jy, kx, info; + doublereal temp; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DGER performs the rank 1 operation */ + +/* A := alpha*x*y' + A, */ + +/* where alpha is a scalar, x is an m element vector, y is an n element */ +/* vector and A is an m by n matrix. */ + +/* Arguments */ +/* ========== */ + +/* M - INTEGER. */ +/* On entry, M specifies the number of rows of the matrix A. */ +/* M must be at least zero. */ +/* Unchanged on exit. */ + +/* N - INTEGER. */ +/* On entry, N specifies the number of columns of the matrix A. */ +/* N must be at least zero. */ +/* Unchanged on exit. */ + +/* ALPHA - DOUBLE PRECISION. */ +/* On entry, ALPHA specifies the scalar alpha. */ +/* Unchanged on exit. */ + +/* X - DOUBLE PRECISION array of dimension at least */ +/* ( 1 + ( m - 1 )*abs( INCX ) ). */ +/* Before entry, the incremented array X must contain the m */ +/* element vector x. */ +/* Unchanged on exit. */ + +/* INCX - INTEGER. */ +/* On entry, INCX specifies the increment for the elements of */ +/* X. INCX must not be zero. */ +/* Unchanged on exit. */ + +/* Y - DOUBLE PRECISION array of dimension at least */ +/* ( 1 + ( n - 1 )*abs( INCY ) ). */ +/* Before entry, the incremented array Y must contain the n */ +/* element vector y. */ +/* Unchanged on exit. */ + +/* INCY - INTEGER. */ +/* On entry, INCY specifies the increment for the elements of */ +/* Y. INCY must not be zero. */ +/* Unchanged on exit. */ + +/* A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). */ +/* Before entry, the leading m by n part of the array A must */ +/* contain the matrix of coefficients. On exit, A is */ +/* overwritten by the updated matrix. */ + +/* LDA - INTEGER. */ +/* On entry, LDA specifies the first dimension of A as declared */ +/* in the calling (sub) program. LDA must be at least */ +/* max( 1, m ). */ +/* Unchanged on exit. */ + + +/* Level 2 Blas routine. */ + +/* -- Written on 22-October-1986. */ +/* Jack Dongarra, Argonne National Lab. */ +/* Jeremy Du Croz, Nag Central Office. */ +/* Sven Hammarling, Nag Central Office. */ +/* Richard Hanson, Sandia National Labs. */ + + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --x; + --y; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + info = 0; + if (*m < 0) { + info = 1; + } else if (*n < 0) { + info = 2; + } else if (*incx == 0) { + info = 5; + } else if (*incy == 0) { + info = 7; + } else if (*lda < max(1,*m)) { + info = 9; + } + if (info != 0) { + _starpu_xerbla_("DGER ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*m == 0 || *n == 0 || *alpha == 0.) { + return 0; + } + +/* Start the operations. In this version the elements of A are */ +/* accessed sequentially with one pass through A. */ + + if (*incy > 0) { + jy = 1; + } else { + jy = 1 - (*n - 1) * *incy; + } + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (y[jy] != 0.) { + temp = *alpha * y[jy]; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] += x[i__] * temp; +/* L10: */ + } + } + jy += *incy; +/* L20: */ + } + } else { + if (*incx > 0) { + kx = 1; + } else { + kx = 1 - (*m - 1) * *incx; + } + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (y[jy] != 0.) { + temp = *alpha * y[jy]; + ix = kx; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] += x[ix] * temp; + ix += *incx; +/* L30: */ + } + } + jy += *incy; +/* L40: */ + } + } + + return 0; + +/* End of DGER . */ + +} /* _starpu_dger_ */ diff --git a/min-dgels/additional/disnan.c b/min-dgels/additional/disnan.c new file mode 100644 index 0000000..77623a6 --- /dev/null +++ b/min-dgels/additional/disnan.c @@ -0,0 +1,52 @@ +/* disnan.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +logical _starpu_disnan_(doublereal *din) +{ + /* System generated locals */ + logical ret_val; + + /* Local variables */ + extern logical _starpu_dlaisnan_(doublereal *, doublereal *); + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DISNAN returns .TRUE. if its argument is NaN, and .FALSE. */ +/* otherwise. To be replaced by the Fortran 2003 intrinsic in the */ +/* future. */ + +/* Arguments */ +/* ========= */ + +/* DIN (input) DOUBLE PRECISION */ +/* Input to test for NaN. */ + +/* ===================================================================== */ + +/* .. External Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + ret_val = _starpu_dlaisnan_(din, din); + return ret_val; +} /* _starpu_disnan_ */ diff --git a/min-dgels/additional/dlabad.c b/min-dgels/additional/dlabad.c new file mode 100644 index 0000000..dda6e97 --- /dev/null +++ b/min-dgels/additional/dlabad.c @@ -0,0 +1,72 @@ +/* dlabad.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dlabad_(doublereal *small, doublereal *large) +{ + /* Builtin functions */ + double d_lg10(doublereal *), sqrt(doublereal); + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLABAD takes as input the values computed by DLAMCH for underflow and */ +/* overflow, and returns the square root of each of these values if the */ +/* log of LARGE is sufficiently large. This subroutine is intended to */ +/* identify machines with a large exponent range, such as the Crays, and */ +/* redefine the underflow and overflow limits to be the square roots of */ +/* the values computed by DLAMCH. This subroutine is needed because */ +/* DLAMCH does not compensate for poor arithmetic in the upper half of */ +/* the exponent range, as is found on a Cray. */ + +/* Arguments */ +/* ========= */ + +/* SMALL (input/output) DOUBLE PRECISION */ +/* On entry, the underflow threshold as computed by DLAMCH. */ +/* On exit, if LOG10(LARGE) is sufficiently large, the square */ +/* root of SMALL, otherwise unchanged. */ + +/* LARGE (input/output) DOUBLE PRECISION */ +/* On entry, the overflow threshold as computed by DLAMCH. */ +/* On exit, if LOG10(LARGE) is sufficiently large, the square */ +/* root of LARGE, otherwise unchanged. */ + +/* ===================================================================== */ + +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* If it looks like we're on a Cray, take the square root of */ +/* SMALL and LARGE to avoid overflow and underflow problems. */ + + if (d_lg10(large) > 2e3) { + *small = sqrt(*small); + *large = sqrt(*large); + } + + return 0; + +/* End of DLABAD */ + +} /* _starpu_dlabad_ */ diff --git a/min-dgels/additional/dlaisnan.c b/min-dgels/additional/dlaisnan.c new file mode 100644 index 0000000..fd59014 --- /dev/null +++ b/min-dgels/additional/dlaisnan.c @@ -0,0 +1,58 @@ +/* dlaisnan.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +logical _starpu_dlaisnan_(doublereal *din1, doublereal *din2) +{ + /* System generated locals */ + logical ret_val; + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* This routine is not for general use. It exists solely to avoid */ +/* over-optimization in DISNAN. */ + +/* DLAISNAN checks for NaNs by comparing its two arguments for */ +/* inequality. NaN is the only floating-point value where NaN != NaN */ +/* returns .TRUE. To check for NaNs, pass the same variable as both */ +/* arguments. */ + +/* A compiler must assume that the two arguments are */ +/* not the same variable, and the test will not be optimized away. */ +/* Interprocedural or whole-program optimization may delete this */ +/* test. The ISNAN functions will be replaced by the correct */ +/* Fortran 03 intrinsic once the intrinsic is widely available. */ + +/* Arguments */ +/* ========= */ + +/* DIN1 (input) DOUBLE PRECISION */ +/* DIN2 (input) DOUBLE PRECISION */ +/* Two numbers to compare for inequality. */ + +/* ===================================================================== */ + +/* .. Executable Statements .. */ + ret_val = *din1 != *din2; + return ret_val; +} /* _starpu_dlaisnan_ */ diff --git a/min-dgels/additional/dlamch.c b/min-dgels/additional/dlamch.c new file mode 100644 index 0000000..f1130b9 --- /dev/null +++ b/min-dgels/additional/dlamch.c @@ -0,0 +1,1001 @@ +/* dlamch.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static doublereal c_b32 = 0.; + +doublereal _starpu_dlamch_(char *cmach) +{ + /* Initialized data */ + + static logical first = TRUE_; + + /* System generated locals */ + integer i__1; + doublereal ret_val; + + /* Builtin functions */ + double pow_di(doublereal *, integer *); + + /* Local variables */ + static doublereal t; + integer it; + static doublereal rnd, eps, base; + integer beta; + static doublereal emin, prec, emax; + integer imin, imax; + logical lrnd; + static doublereal rmin, rmax; + doublereal rmach; + extern logical _starpu_lsame_(char *, char *); + doublereal small; + static doublereal sfmin; + extern /* Subroutine */ int _starpu_dlamc2_(integer *, integer *, logical *, + doublereal *, integer *, doublereal *, integer *, doublereal *); + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLAMCH determines double precision machine parameters. */ + +/* Arguments */ +/* ========= */ + +/* CMACH (input) CHARACTER*1 */ +/* Specifies the value to be returned by DLAMCH: */ +/* = 'E' or 'e', DLAMCH := eps */ +/* = 'S' or 's , DLAMCH := sfmin */ +/* = 'B' or 'b', DLAMCH := base */ +/* = 'P' or 'p', DLAMCH := eps*base */ +/* = 'N' or 'n', DLAMCH := t */ +/* = 'R' or 'r', DLAMCH := rnd */ +/* = 'M' or 'm', DLAMCH := emin */ +/* = 'U' or 'u', DLAMCH := rmin */ +/* = 'L' or 'l', DLAMCH := emax */ +/* = 'O' or 'o', DLAMCH := rmax */ + +/* where */ + +/* eps = relative machine precision */ +/* sfmin = safe minimum, such that 1/sfmin does not overflow */ +/* base = base of the machine */ +/* prec = eps*base */ +/* t = number of (base) digits in the mantissa */ +/* rnd = 1.0 when rounding occurs in addition, 0.0 otherwise */ +/* emin = minimum exponent before (gradual) underflow */ +/* rmin = underflow threshold - base**(emin-1) */ +/* emax = largest exponent before overflow */ +/* rmax = overflow threshold - (base**emax)*(1-eps) */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Save statement .. */ +/* .. */ +/* .. Data statements .. */ +/* .. */ +/* .. Executable Statements .. */ + + if (first) { + _starpu_dlamc2_(&beta, &it, &lrnd, &eps, &imin, &rmin, &imax, &rmax); + base = (doublereal) beta; + t = (doublereal) it; + if (lrnd) { + rnd = 1.; + i__1 = 1 - it; + eps = pow_di(&base, &i__1) / 2; + } else { + rnd = 0.; + i__1 = 1 - it; + eps = pow_di(&base, &i__1); + } + prec = eps * base; + emin = (doublereal) imin; + emax = (doublereal) imax; + sfmin = rmin; + small = 1. / rmax; + if (small >= sfmin) { + +/* Use SMALL plus a bit, to avoid the possibility of rounding */ +/* causing overflow when computing 1/sfmin. */ + + sfmin = small * (eps + 1.); + } + } + + if (_starpu_lsame_(cmach, "E")) { + rmach = eps; + } else if (_starpu_lsame_(cmach, "S")) { + rmach = sfmin; + } else if (_starpu_lsame_(cmach, "B")) { + rmach = base; + } else if (_starpu_lsame_(cmach, "P")) { + rmach = prec; + } else if (_starpu_lsame_(cmach, "N")) { + rmach = t; + } else if (_starpu_lsame_(cmach, "R")) { + rmach = rnd; + } else if (_starpu_lsame_(cmach, "M")) { + rmach = emin; + } else if (_starpu_lsame_(cmach, "U")) { + rmach = rmin; + } else if (_starpu_lsame_(cmach, "L")) { + rmach = emax; + } else if (_starpu_lsame_(cmach, "O")) { + rmach = rmax; + } + + ret_val = rmach; + first = FALSE_; + return ret_val; + +/* End of DLAMCH */ + +} /* _starpu_dlamch_ */ + + +/* *********************************************************************** */ + +/* Subroutine */ int _starpu_dlamc1_(integer *beta, integer *t, logical *rnd, logical + *ieee1) +{ + /* Initialized data */ + + static logical first = TRUE_; + + /* System generated locals */ + doublereal d__1, d__2; + + /* Local variables */ + doublereal a, b, c__, f, t1, t2; + static integer lt; + doublereal one, qtr; + static logical lrnd; + static integer lbeta; + doublereal savec; + extern doublereal _starpu_dlamc3_(doublereal *, doublereal *); + static logical lieee1; + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLAMC1 determines the machine parameters given by BETA, T, RND, and */ +/* IEEE1. */ + +/* Arguments */ +/* ========= */ + +/* BETA (output) INTEGER */ +/* The base of the machine. */ + +/* T (output) INTEGER */ +/* The number of ( BETA ) digits in the mantissa. */ + +/* RND (output) LOGICAL */ +/* Specifies whether proper rounding ( RND = .TRUE. ) or */ +/* chopping ( RND = .FALSE. ) occurs in addition. This may not */ +/* be a reliable guide to the way in which the machine performs */ +/* its arithmetic. */ + +/* IEEE1 (output) LOGICAL */ +/* Specifies whether rounding appears to be done in the IEEE */ +/* 'round to nearest' style. */ + +/* Further Details */ +/* =============== */ + +/* The routine is based on the routine ENVRON by Malcolm and */ +/* incorporates suggestions by Gentleman and Marovich. See */ + +/* Malcolm M. A. (1972) Algorithms to reveal properties of */ +/* floating-point arithmetic. Comms. of the ACM, 15, 949-951. */ + +/* Gentleman W. M. and Marovich S. B. (1974) More on algorithms */ +/* that reveal properties of floating point arithmetic units. */ +/* Comms. of the ACM, 17, 276-277. */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Save statement .. */ +/* .. */ +/* .. Data statements .. */ +/* .. */ +/* .. Executable Statements .. */ + + if (first) { + one = 1.; + +/* LBETA, LIEEE1, LT and LRND are the local values of BETA, */ +/* IEEE1, T and RND. */ + +/* Throughout this routine we use the function DLAMC3 to ensure */ +/* that relevant values are stored and not held in registers, or */ +/* are not affected by optimizers. */ + +/* Compute a = 2.0**m with the smallest positive integer m such */ +/* that */ + +/* fl( a + 1.0 ) = a. */ + + a = 1.; + c__ = 1.; + +/* + WHILE( C.EQ.ONE )LOOP */ +L10: + if (c__ == one) { + a *= 2; + c__ = _starpu_dlamc3_(&a, &one); + d__1 = -a; + c__ = _starpu_dlamc3_(&c__, &d__1); + goto L10; + } +/* + END WHILE */ + +/* Now compute b = 2.0**m with the smallest positive integer m */ +/* such that */ + +/* fl( a + b ) .gt. a. */ + + b = 1.; + c__ = _starpu_dlamc3_(&a, &b); + +/* + WHILE( C.EQ.A )LOOP */ +L20: + if (c__ == a) { + b *= 2; + c__ = _starpu_dlamc3_(&a, &b); + goto L20; + } +/* + END WHILE */ + +/* Now compute the base. a and c are neighbouring floating point */ +/* numbers in the interval ( beta**t, beta**( t + 1 ) ) and so */ +/* their difference is beta. Adding 0.25 to c is to ensure that it */ +/* is truncated to beta and not ( beta - 1 ). */ + + qtr = one / 4; + savec = c__; + d__1 = -a; + c__ = _starpu_dlamc3_(&c__, &d__1); + lbeta = (integer) (c__ + qtr); + +/* Now determine whether rounding or chopping occurs, by adding a */ +/* bit less than beta/2 and a bit more than beta/2 to a. */ + + b = (doublereal) lbeta; + d__1 = b / 2; + d__2 = -b / 100; + f = _starpu_dlamc3_(&d__1, &d__2); + c__ = _starpu_dlamc3_(&f, &a); + if (c__ == a) { + lrnd = TRUE_; + } else { + lrnd = FALSE_; + } + d__1 = b / 2; + d__2 = b / 100; + f = _starpu_dlamc3_(&d__1, &d__2); + c__ = _starpu_dlamc3_(&f, &a); + if (lrnd && c__ == a) { + lrnd = FALSE_; + } + +/* Try and decide whether rounding is done in the IEEE 'round to */ +/* nearest' style. B/2 is half a unit in the last place of the two */ +/* numbers A and SAVEC. Furthermore, A is even, i.e. has last bit */ +/* zero, and SAVEC is odd. Thus adding B/2 to A should not change */ +/* A, but adding B/2 to SAVEC should change SAVEC. */ + + d__1 = b / 2; + t1 = _starpu_dlamc3_(&d__1, &a); + d__1 = b / 2; + t2 = _starpu_dlamc3_(&d__1, &savec); + lieee1 = t1 == a && t2 > savec && lrnd; + +/* Now find the mantissa, t. It should be the integer part of */ +/* log to the base beta of a, however it is safer to determine t */ +/* by powering. So we find t as the smallest positive integer for */ +/* which */ + +/* fl( beta**t + 1.0 ) = 1.0. */ + + lt = 0; + a = 1.; + c__ = 1.; + +/* + WHILE( C.EQ.ONE )LOOP */ +L30: + if (c__ == one) { + ++lt; + a *= lbeta; + c__ = _starpu_dlamc3_(&a, &one); + d__1 = -a; + c__ = _starpu_dlamc3_(&c__, &d__1); + goto L30; + } +/* + END WHILE */ + + } + + *beta = lbeta; + *t = lt; + *rnd = lrnd; + *ieee1 = lieee1; + first = FALSE_; + return 0; + +/* End of DLAMC1 */ + +} /* _starpu_dlamc1_ */ + + +/* *********************************************************************** */ + +/* Subroutine */ int _starpu_dlamc2_(integer *beta, integer *t, logical *rnd, + doublereal *eps, integer *emin, doublereal *rmin, integer *emax, + doublereal *rmax) +{ + /* Initialized data */ + + static logical first = TRUE_; + static logical iwarn = FALSE_; + + /* Format strings */ + static char fmt_9999[] = "(//\002 WARNING. The value EMIN may be incorre" + "ct:-\002,\002 EMIN = \002,i8,/\002 If, after inspection, the va" + "lue EMIN looks\002,\002 acceptable please comment out \002,/\002" + " the IF block as marked within the code of routine\002,\002 DLAM" + "C2,\002,/\002 otherwise supply EMIN explicitly.\002,/)"; + + /* System generated locals */ + integer i__1; + doublereal d__1, d__2, d__3, d__4, d__5; + + /* Builtin functions */ + double pow_di(doublereal *, integer *); + integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void); + + /* Local variables */ + doublereal a, b, c__; + integer i__; + static integer lt; + doublereal one, two; + logical ieee; + doublereal half; + logical lrnd; + static doublereal leps; + doublereal zero; + static integer lbeta; + doublereal rbase; + static integer lemin, lemax; + integer gnmin; + doublereal small; + integer gpmin; + doublereal third; + static doublereal lrmin, lrmax; + doublereal sixth; + extern /* Subroutine */ int _starpu_dlamc1_(integer *, integer *, logical *, + logical *); + extern doublereal _starpu_dlamc3_(doublereal *, doublereal *); + logical lieee1; + extern /* Subroutine */ int _starpu_dlamc4_(integer *, doublereal *, integer *), + _starpu_dlamc5_(integer *, integer *, integer *, logical *, integer *, + doublereal *); + integer ngnmin, ngpmin; + + /* Fortran I/O blocks */ + static cilist io___58 = { 0, 6, 0, fmt_9999, 0 }; + + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLAMC2 determines the machine parameters specified in its argument */ +/* list. */ + +/* Arguments */ +/* ========= */ + +/* BETA (output) INTEGER */ +/* The base of the machine. */ + +/* T (output) INTEGER */ +/* The number of ( BETA ) digits in the mantissa. */ + +/* RND (output) LOGICAL */ +/* Specifies whether proper rounding ( RND = .TRUE. ) or */ +/* chopping ( RND = .FALSE. ) occurs in addition. This may not */ +/* be a reliable guide to the way in which the machine performs */ +/* its arithmetic. */ + +/* EPS (output) DOUBLE PRECISION */ +/* The smallest positive number such that */ + +/* fl( 1.0 - EPS ) .LT. 1.0, */ + +/* where fl denotes the computed value. */ + +/* EMIN (output) INTEGER */ +/* The minimum exponent before (gradual) underflow occurs. */ + +/* RMIN (output) DOUBLE PRECISION */ +/* The smallest normalized number for the machine, given by */ +/* BASE**( EMIN - 1 ), where BASE is the floating point value */ +/* of BETA. */ + +/* EMAX (output) INTEGER */ +/* The maximum exponent before overflow occurs. */ + +/* RMAX (output) DOUBLE PRECISION */ +/* The largest positive number for the machine, given by */ +/* BASE**EMAX * ( 1 - EPS ), where BASE is the floating point */ +/* value of BETA. */ + +/* Further Details */ +/* =============== */ + +/* The computation of EPS is based on a routine PARANOIA by */ +/* W. Kahan of the University of California at Berkeley. */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Save statement .. */ +/* .. */ +/* .. Data statements .. */ +/* .. */ +/* .. Executable Statements .. */ + + if (first) { + zero = 0.; + one = 1.; + two = 2.; + +/* LBETA, LT, LRND, LEPS, LEMIN and LRMIN are the local values of */ +/* BETA, T, RND, EPS, EMIN and RMIN. */ + +/* Throughout this routine we use the function DLAMC3 to ensure */ +/* that relevant values are stored and not held in registers, or */ +/* are not affected by optimizers. */ + +/* DLAMC1 returns the parameters LBETA, LT, LRND and LIEEE1. */ + + _starpu_dlamc1_(&lbeta, <, &lrnd, &lieee1); + +/* Start to find EPS. */ + + b = (doublereal) lbeta; + i__1 = -lt; + a = pow_di(&b, &i__1); + leps = a; + +/* Try some tricks to see whether or not this is the correct EPS. */ + + b = two / 3; + half = one / 2; + d__1 = -half; + sixth = _starpu_dlamc3_(&b, &d__1); + third = _starpu_dlamc3_(&sixth, &sixth); + d__1 = -half; + b = _starpu_dlamc3_(&third, &d__1); + b = _starpu_dlamc3_(&b, &sixth); + b = abs(b); + if (b < leps) { + b = leps; + } + + leps = 1.; + +/* + WHILE( ( LEPS.GT.B ).AND.( B.GT.ZERO ) )LOOP */ +L10: + if (leps > b && b > zero) { + leps = b; + d__1 = half * leps; +/* Computing 5th power */ + d__3 = two, d__4 = d__3, d__3 *= d__3; +/* Computing 2nd power */ + d__5 = leps; + d__2 = d__4 * (d__3 * d__3) * (d__5 * d__5); + c__ = _starpu_dlamc3_(&d__1, &d__2); + d__1 = -c__; + c__ = _starpu_dlamc3_(&half, &d__1); + b = _starpu_dlamc3_(&half, &c__); + d__1 = -b; + c__ = _starpu_dlamc3_(&half, &d__1); + b = _starpu_dlamc3_(&half, &c__); + goto L10; + } +/* + END WHILE */ + + if (a < leps) { + leps = a; + } + +/* Computation of EPS complete. */ + +/* Now find EMIN. Let A = + or - 1, and + or - (1 + BASE**(-3)). */ +/* Keep dividing A by BETA until (gradual) underflow occurs. This */ +/* is detected when we cannot recover the previous A. */ + + rbase = one / lbeta; + small = one; + for (i__ = 1; i__ <= 3; ++i__) { + d__1 = small * rbase; + small = _starpu_dlamc3_(&d__1, &zero); +/* L20: */ + } + a = _starpu_dlamc3_(&one, &small); + _starpu_dlamc4_(&ngpmin, &one, &lbeta); + d__1 = -one; + _starpu_dlamc4_(&ngnmin, &d__1, &lbeta); + _starpu_dlamc4_(&gpmin, &a, &lbeta); + d__1 = -a; + _starpu_dlamc4_(&gnmin, &d__1, &lbeta); + ieee = FALSE_; + + if (ngpmin == ngnmin && gpmin == gnmin) { + if (ngpmin == gpmin) { + lemin = ngpmin; +/* ( Non twos-complement machines, no gradual underflow; */ +/* e.g., VAX ) */ + } else if (gpmin - ngpmin == 3) { + lemin = ngpmin - 1 + lt; + ieee = TRUE_; +/* ( Non twos-complement machines, with gradual underflow; */ +/* e.g., IEEE standard followers ) */ + } else { + lemin = min(ngpmin,gpmin); +/* ( A guess; no known machine ) */ + iwarn = TRUE_; + } + + } else if (ngpmin == gpmin && ngnmin == gnmin) { + if ((i__1 = ngpmin - ngnmin, abs(i__1)) == 1) { + lemin = max(ngpmin,ngnmin); +/* ( Twos-complement machines, no gradual underflow; */ +/* e.g., CYBER 205 ) */ + } else { + lemin = min(ngpmin,ngnmin); +/* ( A guess; no known machine ) */ + iwarn = TRUE_; + } + + } else if ((i__1 = ngpmin - ngnmin, abs(i__1)) == 1 && gpmin == gnmin) + { + if (gpmin - min(ngpmin,ngnmin) == 3) { + lemin = max(ngpmin,ngnmin) - 1 + lt; +/* ( Twos-complement machines with gradual underflow; */ +/* no known machine ) */ + } else { + lemin = min(ngpmin,ngnmin); +/* ( A guess; no known machine ) */ + iwarn = TRUE_; + } + + } else { +/* Computing MIN */ + i__1 = min(ngpmin,ngnmin), i__1 = min(i__1,gpmin); + lemin = min(i__1,gnmin); +/* ( A guess; no known machine ) */ + iwarn = TRUE_; + } + first = FALSE_; +/* ** */ +/* Comment out this if block if EMIN is ok */ + if (iwarn) { + first = TRUE_; + s_wsfe(&io___58); + do_fio(&c__1, (char *)&lemin, (ftnlen)sizeof(integer)); + e_wsfe(); + } +/* ** */ + +/* Assume IEEE arithmetic if we found denormalised numbers above, */ +/* or if arithmetic seems to round in the IEEE style, determined */ +/* in routine DLAMC1. A true IEEE machine should have both things */ +/* true; however, faulty machines may have one or the other. */ + + ieee = ieee || lieee1; + +/* Compute RMIN by successive division by BETA. We could compute */ +/* RMIN as BASE**( EMIN - 1 ), but some machines underflow during */ +/* this computation. */ + + lrmin = 1.; + i__1 = 1 - lemin; + for (i__ = 1; i__ <= i__1; ++i__) { + d__1 = lrmin * rbase; + lrmin = _starpu_dlamc3_(&d__1, &zero); +/* L30: */ + } + +/* Finally, call DLAMC5 to compute EMAX and RMAX. */ + + _starpu_dlamc5_(&lbeta, <, &lemin, &ieee, &lemax, &lrmax); + } + + *beta = lbeta; + *t = lt; + *rnd = lrnd; + *eps = leps; + *emin = lemin; + *rmin = lrmin; + *emax = lemax; + *rmax = lrmax; + + return 0; + + +/* End of DLAMC2 */ + +} /* _starpu_dlamc2_ */ + + +/* *********************************************************************** */ + +doublereal _starpu_dlamc3_(doublereal *a, doublereal *b) +{ + /* System generated locals */ + doublereal ret_val; + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLAMC3 is intended to force A and B to be stored prior to doing */ +/* the addition of A and B , for use in situations where optimizers */ +/* might hold one of these in a register. */ + +/* Arguments */ +/* ========= */ + +/* A (input) DOUBLE PRECISION */ +/* B (input) DOUBLE PRECISION */ +/* The values A and B. */ + +/* ===================================================================== */ + +/* .. Executable Statements .. */ + + ret_val = *a + *b; + + return ret_val; + +/* End of DLAMC3 */ + +} /* _starpu_dlamc3_ */ + + +/* *********************************************************************** */ + +/* Subroutine */ int _starpu_dlamc4_(integer *emin, doublereal *start, integer *base) +{ + /* System generated locals */ + integer i__1; + doublereal d__1; + + /* Local variables */ + doublereal a; + integer i__; + doublereal b1, b2, c1, c2, d1, d2, one, zero, rbase; + extern doublereal _starpu_dlamc3_(doublereal *, doublereal *); + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLAMC4 is a service routine for DLAMC2. */ + +/* Arguments */ +/* ========= */ + +/* EMIN (output) INTEGER */ +/* The minimum exponent before (gradual) underflow, computed by */ +/* setting A = START and dividing by BASE until the previous A */ +/* can not be recovered. */ + +/* START (input) DOUBLE PRECISION */ +/* The starting point for determining EMIN. */ + +/* BASE (input) INTEGER */ +/* The base of the machine. */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + a = *start; + one = 1.; + rbase = one / *base; + zero = 0.; + *emin = 1; + d__1 = a * rbase; + b1 = _starpu_dlamc3_(&d__1, &zero); + c1 = a; + c2 = a; + d1 = a; + d2 = a; +/* + WHILE( ( C1.EQ.A ).AND.( C2.EQ.A ).AND. */ +/* $ ( D1.EQ.A ).AND.( D2.EQ.A ) )LOOP */ +L10: + if (c1 == a && c2 == a && d1 == a && d2 == a) { + --(*emin); + a = b1; + d__1 = a / *base; + b1 = _starpu_dlamc3_(&d__1, &zero); + d__1 = b1 * *base; + c1 = _starpu_dlamc3_(&d__1, &zero); + d1 = zero; + i__1 = *base; + for (i__ = 1; i__ <= i__1; ++i__) { + d1 += b1; +/* L20: */ + } + d__1 = a * rbase; + b2 = _starpu_dlamc3_(&d__1, &zero); + d__1 = b2 / rbase; + c2 = _starpu_dlamc3_(&d__1, &zero); + d2 = zero; + i__1 = *base; + for (i__ = 1; i__ <= i__1; ++i__) { + d2 += b2; +/* L30: */ + } + goto L10; + } +/* + END WHILE */ + + return 0; + +/* End of DLAMC4 */ + +} /* _starpu_dlamc4_ */ + + +/* *********************************************************************** */ + +/* Subroutine */ int _starpu_dlamc5_(integer *beta, integer *p, integer *emin, + logical *ieee, integer *emax, doublereal *rmax) +{ + /* System generated locals */ + integer i__1; + doublereal d__1; + + /* Local variables */ + integer i__; + doublereal y, z__; + integer try__, lexp; + doublereal oldy; + integer uexp, nbits; + extern doublereal _starpu_dlamc3_(doublereal *, doublereal *); + doublereal recbas; + integer exbits, expsum; + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLAMC5 attempts to compute RMAX, the largest machine floating-point */ +/* number, without overflow. It assumes that EMAX + abs(EMIN) sum */ +/* approximately to a power of 2. It will fail on machines where this */ +/* assumption does not hold, for example, the Cyber 205 (EMIN = -28625, */ +/* EMAX = 28718). It will also fail if the value supplied for EMIN is */ +/* too large (i.e. too close to zero), probably with overflow. */ + +/* Arguments */ +/* ========= */ + +/* BETA (input) INTEGER */ +/* The base of floating-point arithmetic. */ + +/* P (input) INTEGER */ +/* The number of base BETA digits in the mantissa of a */ +/* floating-point value. */ + +/* EMIN (input) INTEGER */ +/* The minimum exponent before (gradual) underflow. */ + +/* IEEE (input) LOGICAL */ +/* A logical flag specifying whether or not the arithmetic */ +/* system is thought to comply with the IEEE standard. */ + +/* EMAX (output) INTEGER */ +/* The largest exponent before overflow */ + +/* RMAX (output) DOUBLE PRECISION */ +/* The largest machine floating-point number. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* First compute LEXP and UEXP, two powers of 2 that bound */ +/* abs(EMIN). We then assume that EMAX + abs(EMIN) will sum */ +/* approximately to the bound that is closest to abs(EMIN). */ +/* (EMAX is the exponent of the required number RMAX). */ + + lexp = 1; + exbits = 1; +L10: + try__ = lexp << 1; + if (try__ <= -(*emin)) { + lexp = try__; + ++exbits; + goto L10; + } + if (lexp == -(*emin)) { + uexp = lexp; + } else { + uexp = try__; + ++exbits; + } + +/* Now -LEXP is less than or equal to EMIN, and -UEXP is greater */ +/* than or equal to EMIN. EXBITS is the number of bits needed to */ +/* store the exponent. */ + + if (uexp + *emin > -lexp - *emin) { + expsum = lexp << 1; + } else { + expsum = uexp << 1; + } + +/* EXPSUM is the exponent range, approximately equal to */ +/* EMAX - EMIN + 1 . */ + + *emax = expsum + *emin - 1; + nbits = exbits + 1 + *p; + +/* NBITS is the total number of bits needed to store a */ +/* floating-point number. */ + + if (nbits % 2 == 1 && *beta == 2) { + +/* Either there are an odd number of bits used to store a */ +/* floating-point number, which is unlikely, or some bits are */ +/* not used in the representation of numbers, which is possible, */ +/* (e.g. Cray machines) or the mantissa has an implicit bit, */ +/* (e.g. IEEE machines, Dec Vax machines), which is perhaps the */ +/* most likely. We have to assume the last alternative. */ +/* If this is true, then we need to reduce EMAX by one because */ +/* there must be some way of representing zero in an implicit-bit */ +/* system. On machines like Cray, we are reducing EMAX by one */ +/* unnecessarily. */ + + --(*emax); + } + + if (*ieee) { + +/* Assume we are on an IEEE machine which reserves one exponent */ +/* for infinity and NaN. */ + + --(*emax); + } + +/* Now create RMAX, the largest machine number, which should */ +/* be equal to (1.0 - BETA**(-P)) * BETA**EMAX . */ + +/* First compute 1.0 - BETA**(-P), being careful that the */ +/* result is less than 1.0 . */ + + recbas = 1. / *beta; + z__ = *beta - 1.; + y = 0.; + i__1 = *p; + for (i__ = 1; i__ <= i__1; ++i__) { + z__ *= recbas; + if (y < 1.) { + oldy = y; + } + y = _starpu_dlamc3_(&y, &z__); +/* L20: */ + } + if (y >= 1.) { + y = oldy; + } + +/* Now multiply by BETA**EMAX to get RMAX. */ + + i__1 = *emax; + for (i__ = 1; i__ <= i__1; ++i__) { + d__1 = y * *beta; + y = _starpu_dlamc3_(&d__1, &c_b32); +/* L30: */ + } + + *rmax = y; + return 0; + +/* End of DLAMC5 */ + +} /* _starpu_dlamc5_ */ diff --git a/min-dgels/additional/dlange.c b/min-dgels/additional/dlange.c new file mode 100644 index 0000000..1a7c787 --- /dev/null +++ b/min-dgels/additional/dlange.c @@ -0,0 +1,199 @@ +/* dlange.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +doublereal _starpu_dlange_(char *norm, integer *m, integer *n, doublereal *a, integer + *lda, doublereal *work) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + doublereal ret_val, d__1, d__2, d__3; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + integer i__, j; + doublereal sum, scale; + extern logical _starpu_lsame_(char *, char *); + doublereal value; + extern /* Subroutine */ int _starpu_dlassq_(integer *, doublereal *, integer *, + doublereal *, doublereal *); + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLANGE returns the value of the one norm, or the Frobenius norm, or */ +/* the infinity norm, or the element of largest absolute value of a */ +/* real matrix A. */ + +/* Description */ +/* =========== */ + +/* DLANGE returns the value */ + +/* DLANGE = ( max(abs(A(i,j))), NORM = 'M' or 'm' */ +/* ( */ +/* ( norm1(A), NORM = '1', 'O' or 'o' */ +/* ( */ +/* ( normI(A), NORM = 'I' or 'i' */ +/* ( */ +/* ( normF(A), NORM = 'F', 'f', 'E' or 'e' */ + +/* where norm1 denotes the one norm of a matrix (maximum column sum), */ +/* normI denotes the infinity norm of a matrix (maximum row sum) and */ +/* normF denotes the Frobenius norm of a matrix (square root of sum of */ +/* squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. */ + +/* Arguments */ +/* ========= */ + +/* NORM (input) CHARACTER*1 */ +/* Specifies the value to be returned in DLANGE as described */ +/* above. */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix A. M >= 0. When M = 0, */ +/* DLANGE is set to zero. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix A. N >= 0. When N = 0, */ +/* DLANGE is set to zero. */ + +/* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ +/* The m by n matrix A. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(M,1). */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (MAX(1,LWORK)), */ +/* where LWORK >= M when NORM = 'I'; otherwise, WORK is not */ +/* referenced. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --work; + + /* Function Body */ + if (min(*m,*n) == 0) { + value = 0.; + } else if (_starpu_lsame_(norm, "M")) { + +/* Find max(abs(A(i,j))). */ + + value = 0.; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { +/* Computing MAX */ + d__2 = value, d__3 = (d__1 = a[i__ + j * a_dim1], abs(d__1)); + value = max(d__2,d__3); +/* L10: */ + } +/* L20: */ + } + } else if (_starpu_lsame_(norm, "O") || *(unsigned char *) + norm == '1') { + +/* Find norm1(A). */ + + value = 0.; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = 0.; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + sum += (d__1 = a[i__ + j * a_dim1], abs(d__1)); +/* L30: */ + } + value = max(value,sum); +/* L40: */ + } + } else if (_starpu_lsame_(norm, "I")) { + +/* Find normI(A). */ + + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + work[i__] = 0.; +/* L50: */ + } + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + work[i__] += (d__1 = a[i__ + j * a_dim1], abs(d__1)); +/* L60: */ + } +/* L70: */ + } + value = 0.; + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing MAX */ + d__1 = value, d__2 = work[i__]; + value = max(d__1,d__2); +/* L80: */ + } + } else if (_starpu_lsame_(norm, "F") || _starpu_lsame_(norm, "E")) { + +/* Find normF(A). */ + + scale = 0.; + sum = 1.; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + _starpu_dlassq_(m, &a[j * a_dim1 + 1], &c__1, &scale, &sum); +/* L90: */ + } + value = scale * sqrt(sum); + } + + ret_val = value; + return ret_val; + +/* End of DLANGE */ + +} /* _starpu_dlange_ */ diff --git a/min-dgels/additional/dlapy2.c b/min-dgels/additional/dlapy2.c new file mode 100644 index 0000000..ac89cb8 --- /dev/null +++ b/min-dgels/additional/dlapy2.c @@ -0,0 +1,73 @@ +/* dlapy2.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +doublereal _starpu_dlapy2_(doublereal *x, doublereal *y) +{ + /* System generated locals */ + doublereal ret_val, d__1; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + doublereal w, z__, xabs, yabs; + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLAPY2 returns sqrt(x**2+y**2), taking care not to cause unnecessary */ +/* overflow. */ + +/* Arguments */ +/* ========= */ + +/* X (input) DOUBLE PRECISION */ +/* Y (input) DOUBLE PRECISION */ +/* X and Y specify the values x and y. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + xabs = abs(*x); + yabs = abs(*y); + w = max(xabs,yabs); + z__ = min(xabs,yabs); + if (z__ == 0.) { + ret_val = w; + } else { +/* Computing 2nd power */ + d__1 = z__ / w; + ret_val = w * sqrt(d__1 * d__1 + 1.); + } + return ret_val; + +/* End of DLAPY2 */ + +} /* _starpu_dlapy2_ */ diff --git a/min-dgels/additional/dlarf.c b/min-dgels/additional/dlarf.c new file mode 100644 index 0000000..167887e --- /dev/null +++ b/min-dgels/additional/dlarf.c @@ -0,0 +1,193 @@ +/* dlarf.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static doublereal c_b4 = 1.; +static doublereal c_b5 = 0.; +static integer c__1 = 1; + +/* Subroutine */ int _starpu_dlarf_(char *side, integer *m, integer *n, doublereal *v, + integer *incv, doublereal *tau, doublereal *c__, integer *ldc, + doublereal *work) +{ + /* System generated locals */ + integer c_dim1, c_offset; + doublereal d__1; + + /* Local variables */ + integer i__; + logical applyleft; + extern /* Subroutine */ int _starpu_dger_(integer *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *); + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int _starpu_dgemv_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *); + integer lastc, lastv; + extern integer _starpu_iladlc_(integer *, integer *, doublereal *, integer *), + _starpu_iladlr_(integer *, integer *, doublereal *, integer *); + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLARF applies a real elementary reflector H to a real m by n matrix */ +/* C, from either the left or the right. H is represented in the form */ + +/* H = I - tau * v * v' */ + +/* where tau is a real scalar and v is a real vector. */ + +/* If tau = 0, then H is taken to be the unit matrix. */ + +/* Arguments */ +/* ========= */ + +/* SIDE (input) CHARACTER*1 */ +/* = 'L': form H * C */ +/* = 'R': form C * H */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix C. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix C. */ + +/* V (input) DOUBLE PRECISION array, dimension */ +/* (1 + (M-1)*abs(INCV)) if SIDE = 'L' */ +/* or (1 + (N-1)*abs(INCV)) if SIDE = 'R' */ +/* The vector v in the representation of H. V is not used if */ +/* TAU = 0. */ + +/* INCV (input) INTEGER */ +/* The increment between elements of v. INCV <> 0. */ + +/* TAU (input) DOUBLE PRECISION */ +/* The value tau in the representation of H. */ + +/* C (input/output) DOUBLE PRECISION array, dimension (LDC,N) */ +/* On entry, the m by n matrix C. */ +/* On exit, C is overwritten by the matrix H * C if SIDE = 'L', */ +/* or C * H if SIDE = 'R'. */ + +/* LDC (input) INTEGER */ +/* The leading dimension of the array C. LDC >= max(1,M). */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension */ +/* (N) if SIDE = 'L' */ +/* or (M) if SIDE = 'R' */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + --v; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + applyleft = _starpu_lsame_(side, "L"); + lastv = 0; + lastc = 0; + if (*tau != 0.) { +/* Set up variables for scanning V. LASTV begins pointing to the end */ +/* of V. */ + if (applyleft) { + lastv = *m; + } else { + lastv = *n; + } + if (*incv > 0) { + i__ = (lastv - 1) * *incv + 1; + } else { + i__ = 1; + } +/* Look for the last non-zero row in V. */ + while(lastv > 0 && v[i__] == 0.) { + --lastv; + i__ -= *incv; + } + if (applyleft) { +/* Scan for the last non-zero column in C(1:lastv,:). */ + lastc = _starpu_iladlc_(&lastv, n, &c__[c_offset], ldc); + } else { +/* Scan for the last non-zero row in C(:,1:lastv). */ + lastc = _starpu_iladlr_(m, &lastv, &c__[c_offset], ldc); + } + } +/* Note that lastc.eq.0 renders the BLAS operations null; no special */ +/* case is needed at this level. */ + if (applyleft) { + +/* Form H * C */ + + if (lastv > 0) { + +/* w(1:lastc,1) := C(1:lastv,1:lastc)' * v(1:lastv,1) */ + + _starpu_dgemv_("Transpose", &lastv, &lastc, &c_b4, &c__[c_offset], ldc, & + v[1], incv, &c_b5, &work[1], &c__1); + +/* C(1:lastv,1:lastc) := C(...) - v(1:lastv,1) * w(1:lastc,1)' */ + + d__1 = -(*tau); + _starpu_dger_(&lastv, &lastc, &d__1, &v[1], incv, &work[1], &c__1, &c__[ + c_offset], ldc); + } + } else { + +/* Form C * H */ + + if (lastv > 0) { + +/* w(1:lastc,1) := C(1:lastc,1:lastv) * v(1:lastv,1) */ + + _starpu_dgemv_("No transpose", &lastc, &lastv, &c_b4, &c__[c_offset], ldc, + &v[1], incv, &c_b5, &work[1], &c__1); + +/* C(1:lastc,1:lastv) := C(...) - w(1:lastc,1) * v(1:lastv,1)' */ + + d__1 = -(*tau); + _starpu_dger_(&lastc, &lastv, &d__1, &work[1], &c__1, &v[1], incv, &c__[ + c_offset], ldc); + } + } + return 0; + +/* End of DLARF */ + +} /* _starpu_dlarf_ */ diff --git a/min-dgels/additional/dlarfb.c b/min-dgels/additional/dlarfb.c new file mode 100644 index 0000000..7829f2a --- /dev/null +++ b/min-dgels/additional/dlarfb.c @@ -0,0 +1,774 @@ +/* dlarfb.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static doublereal c_b14 = 1.; +static doublereal c_b25 = -1.; + +/* Subroutine */ int _starpu_dlarfb_(char *side, char *trans, char *direct, char * + storev, integer *m, integer *n, integer *k, doublereal *v, integer * + ldv, doublereal *t, integer *ldt, doublereal *c__, integer *ldc, + doublereal *work, integer *ldwork) +{ + /* System generated locals */ + integer c_dim1, c_offset, t_dim1, t_offset, v_dim1, v_offset, work_dim1, + work_offset, i__1, i__2; + + /* Local variables */ + integer i__, j; + extern /* Subroutine */ int _starpu_dgemm_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *); + extern logical _starpu_lsame_(char *, char *); + integer lastc; + extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *), _starpu_dtrmm_(char *, char *, char *, char *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *); + integer lastv; + extern integer _starpu_iladlc_(integer *, integer *, doublereal *, integer *), + _starpu_iladlr_(integer *, integer *, doublereal *, integer *); + char transt[1]; + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLARFB applies a real block reflector H or its transpose H' to a */ +/* real m by n matrix C, from either the left or the right. */ + +/* Arguments */ +/* ========= */ + +/* SIDE (input) CHARACTER*1 */ +/* = 'L': apply H or H' from the Left */ +/* = 'R': apply H or H' from the Right */ + +/* TRANS (input) CHARACTER*1 */ +/* = 'N': apply H (No transpose) */ +/* = 'T': apply H' (Transpose) */ + +/* DIRECT (input) CHARACTER*1 */ +/* Indicates how H is formed from a product of elementary */ +/* reflectors */ +/* = 'F': H = H(1) H(2) . . . H(k) (Forward) */ +/* = 'B': H = H(k) . . . H(2) H(1) (Backward) */ + +/* STOREV (input) CHARACTER*1 */ +/* Indicates how the vectors which define the elementary */ +/* reflectors are stored: */ +/* = 'C': Columnwise */ +/* = 'R': Rowwise */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix C. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix C. */ + +/* K (input) INTEGER */ +/* The order of the matrix T (= the number of elementary */ +/* reflectors whose product defines the block reflector). */ + +/* V (input) DOUBLE PRECISION array, dimension */ +/* (LDV,K) if STOREV = 'C' */ +/* (LDV,M) if STOREV = 'R' and SIDE = 'L' */ +/* (LDV,N) if STOREV = 'R' and SIDE = 'R' */ +/* The matrix V. See further details. */ + +/* LDV (input) INTEGER */ +/* The leading dimension of the array V. */ +/* If STOREV = 'C' and SIDE = 'L', LDV >= max(1,M); */ +/* if STOREV = 'C' and SIDE = 'R', LDV >= max(1,N); */ +/* if STOREV = 'R', LDV >= K. */ + +/* T (input) DOUBLE PRECISION array, dimension (LDT,K) */ +/* The triangular k by k matrix T in the representation of the */ +/* block reflector. */ + +/* LDT (input) INTEGER */ +/* The leading dimension of the array T. LDT >= K. */ + +/* C (input/output) DOUBLE PRECISION array, dimension (LDC,N) */ +/* On entry, the m by n matrix C. */ +/* On exit, C is overwritten by H*C or H'*C or C*H or C*H'. */ + +/* LDC (input) INTEGER */ +/* The leading dimension of the array C. LDA >= max(1,M). */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (LDWORK,K) */ + +/* LDWORK (input) INTEGER */ +/* The leading dimension of the array WORK. */ +/* If SIDE = 'L', LDWORK >= max(1,N); */ +/* if SIDE = 'R', LDWORK >= max(1,M). */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Quick return if possible */ + + /* Parameter adjustments */ + v_dim1 = *ldv; + v_offset = 1 + v_dim1; + v -= v_offset; + t_dim1 = *ldt; + t_offset = 1 + t_dim1; + t -= t_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + work_dim1 = *ldwork; + work_offset = 1 + work_dim1; + work -= work_offset; + + /* Function Body */ + if (*m <= 0 || *n <= 0) { + return 0; + } + + if (_starpu_lsame_(trans, "N")) { + *(unsigned char *)transt = 'T'; + } else { + *(unsigned char *)transt = 'N'; + } + + if (_starpu_lsame_(storev, "C")) { + + if (_starpu_lsame_(direct, "F")) { + +/* Let V = ( V1 ) (first K rows) */ +/* ( V2 ) */ +/* where V1 is unit lower triangular. */ + + if (_starpu_lsame_(side, "L")) { + +/* Form H * C or H' * C where C = ( C1 ) */ +/* ( C2 ) */ + +/* Computing MAX */ + i__1 = *k, i__2 = _starpu_iladlr_(m, k, &v[v_offset], ldv); + lastv = max(i__1,i__2); + lastc = _starpu_iladlc_(&lastv, n, &c__[c_offset], ldc); + +/* W := C' * V = (C1'*V1 + C2'*V2) (stored in WORK) */ + +/* W := C1' */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + _starpu_dcopy_(&lastc, &c__[j + c_dim1], ldc, &work[j * work_dim1 + + 1], &c__1); +/* L10: */ + } + +/* W := W * V1 */ + + _starpu_dtrmm_("Right", "Lower", "No transpose", "Unit", &lastc, k, & + c_b14, &v[v_offset], ldv, &work[work_offset], ldwork); + if (lastv > *k) { + +/* W := W + C2'*V2 */ + + i__1 = lastv - *k; + _starpu_dgemm_("Transpose", "No transpose", &lastc, k, &i__1, & + c_b14, &c__[*k + 1 + c_dim1], ldc, &v[*k + 1 + + v_dim1], ldv, &c_b14, &work[work_offset], ldwork); + } + +/* W := W * T' or W * T */ + + _starpu_dtrmm_("Right", "Upper", transt, "Non-unit", &lastc, k, & + c_b14, &t[t_offset], ldt, &work[work_offset], ldwork); + +/* C := C - V * W' */ + + if (lastv > *k) { + +/* C2 := C2 - V2 * W' */ + + i__1 = lastv - *k; + _starpu_dgemm_("No transpose", "Transpose", &i__1, &lastc, k, & + c_b25, &v[*k + 1 + v_dim1], ldv, &work[ + work_offset], ldwork, &c_b14, &c__[*k + 1 + + c_dim1], ldc); + } + +/* W := W * V1' */ + + _starpu_dtrmm_("Right", "Lower", "Transpose", "Unit", &lastc, k, & + c_b14, &v[v_offset], ldv, &work[work_offset], ldwork); + +/* C1 := C1 - W' */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = lastc; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[j + i__ * c_dim1] -= work[i__ + j * work_dim1]; +/* L20: */ + } +/* L30: */ + } + + } else if (_starpu_lsame_(side, "R")) { + +/* Form C * H or C * H' where C = ( C1 C2 ) */ + +/* Computing MAX */ + i__1 = *k, i__2 = _starpu_iladlr_(n, k, &v[v_offset], ldv); + lastv = max(i__1,i__2); + lastc = _starpu_iladlr_(m, &lastv, &c__[c_offset], ldc); + +/* W := C * V = (C1*V1 + C2*V2) (stored in WORK) */ + +/* W := C1 */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + _starpu_dcopy_(&lastc, &c__[j * c_dim1 + 1], &c__1, &work[j * + work_dim1 + 1], &c__1); +/* L40: */ + } + +/* W := W * V1 */ + + _starpu_dtrmm_("Right", "Lower", "No transpose", "Unit", &lastc, k, & + c_b14, &v[v_offset], ldv, &work[work_offset], ldwork); + if (lastv > *k) { + +/* W := W + C2 * V2 */ + + i__1 = lastv - *k; + _starpu_dgemm_("No transpose", "No transpose", &lastc, k, &i__1, & + c_b14, &c__[(*k + 1) * c_dim1 + 1], ldc, &v[*k + + 1 + v_dim1], ldv, &c_b14, &work[work_offset], + ldwork); + } + +/* W := W * T or W * T' */ + + _starpu_dtrmm_("Right", "Upper", trans, "Non-unit", &lastc, k, &c_b14, + &t[t_offset], ldt, &work[work_offset], ldwork); + +/* C := C - W * V' */ + + if (lastv > *k) { + +/* C2 := C2 - W * V2' */ + + i__1 = lastv - *k; + _starpu_dgemm_("No transpose", "Transpose", &lastc, &i__1, k, & + c_b25, &work[work_offset], ldwork, &v[*k + 1 + + v_dim1], ldv, &c_b14, &c__[(*k + 1) * c_dim1 + 1], + ldc); + } + +/* W := W * V1' */ + + _starpu_dtrmm_("Right", "Lower", "Transpose", "Unit", &lastc, k, & + c_b14, &v[v_offset], ldv, &work[work_offset], ldwork); + +/* C1 := C1 - W */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = lastc; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] -= work[i__ + j * work_dim1]; +/* L50: */ + } +/* L60: */ + } + } + + } else { + +/* Let V = ( V1 ) */ +/* ( V2 ) (last K rows) */ +/* where V2 is unit upper triangular. */ + + if (_starpu_lsame_(side, "L")) { + +/* Form H * C or H' * C where C = ( C1 ) */ +/* ( C2 ) */ + +/* Computing MAX */ + i__1 = *k, i__2 = _starpu_iladlr_(m, k, &v[v_offset], ldv); + lastv = max(i__1,i__2); + lastc = _starpu_iladlc_(&lastv, n, &c__[c_offset], ldc); + +/* W := C' * V = (C1'*V1 + C2'*V2) (stored in WORK) */ + +/* W := C2' */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + _starpu_dcopy_(&lastc, &c__[lastv - *k + j + c_dim1], ldc, &work[ + j * work_dim1 + 1], &c__1); +/* L70: */ + } + +/* W := W * V2 */ + + _starpu_dtrmm_("Right", "Upper", "No transpose", "Unit", &lastc, k, & + c_b14, &v[lastv - *k + 1 + v_dim1], ldv, &work[ + work_offset], ldwork); + if (lastv > *k) { + +/* W := W + C1'*V1 */ + + i__1 = lastv - *k; + _starpu_dgemm_("Transpose", "No transpose", &lastc, k, &i__1, & + c_b14, &c__[c_offset], ldc, &v[v_offset], ldv, & + c_b14, &work[work_offset], ldwork); + } + +/* W := W * T' or W * T */ + + _starpu_dtrmm_("Right", "Lower", transt, "Non-unit", &lastc, k, & + c_b14, &t[t_offset], ldt, &work[work_offset], ldwork); + +/* C := C - V * W' */ + + if (lastv > *k) { + +/* C1 := C1 - V1 * W' */ + + i__1 = lastv - *k; + _starpu_dgemm_("No transpose", "Transpose", &i__1, &lastc, k, & + c_b25, &v[v_offset], ldv, &work[work_offset], + ldwork, &c_b14, &c__[c_offset], ldc); + } + +/* W := W * V2' */ + + _starpu_dtrmm_("Right", "Upper", "Transpose", "Unit", &lastc, k, & + c_b14, &v[lastv - *k + 1 + v_dim1], ldv, &work[ + work_offset], ldwork); + +/* C2 := C2 - W' */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = lastc; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[lastv - *k + j + i__ * c_dim1] -= work[i__ + j * + work_dim1]; +/* L80: */ + } +/* L90: */ + } + + } else if (_starpu_lsame_(side, "R")) { + +/* Form C * H or C * H' where C = ( C1 C2 ) */ + +/* Computing MAX */ + i__1 = *k, i__2 = _starpu_iladlr_(n, k, &v[v_offset], ldv); + lastv = max(i__1,i__2); + lastc = _starpu_iladlr_(m, &lastv, &c__[c_offset], ldc); + +/* W := C * V = (C1*V1 + C2*V2) (stored in WORK) */ + +/* W := C2 */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + _starpu_dcopy_(&lastc, &c__[(*n - *k + j) * c_dim1 + 1], &c__1, & + work[j * work_dim1 + 1], &c__1); +/* L100: */ + } + +/* W := W * V2 */ + + _starpu_dtrmm_("Right", "Upper", "No transpose", "Unit", &lastc, k, & + c_b14, &v[lastv - *k + 1 + v_dim1], ldv, &work[ + work_offset], ldwork); + if (lastv > *k) { + +/* W := W + C1 * V1 */ + + i__1 = lastv - *k; + _starpu_dgemm_("No transpose", "No transpose", &lastc, k, &i__1, & + c_b14, &c__[c_offset], ldc, &v[v_offset], ldv, & + c_b14, &work[work_offset], ldwork); + } + +/* W := W * T or W * T' */ + + _starpu_dtrmm_("Right", "Lower", trans, "Non-unit", &lastc, k, &c_b14, + &t[t_offset], ldt, &work[work_offset], ldwork); + +/* C := C - W * V' */ + + if (lastv > *k) { + +/* C1 := C1 - W * V1' */ + + i__1 = lastv - *k; + _starpu_dgemm_("No transpose", "Transpose", &lastc, &i__1, k, & + c_b25, &work[work_offset], ldwork, &v[v_offset], + ldv, &c_b14, &c__[c_offset], ldc); + } + +/* W := W * V2' */ + + _starpu_dtrmm_("Right", "Upper", "Transpose", "Unit", &lastc, k, & + c_b14, &v[lastv - *k + 1 + v_dim1], ldv, &work[ + work_offset], ldwork); + +/* C2 := C2 - W */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = lastc; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + (lastv - *k + j) * c_dim1] -= work[i__ + j * + work_dim1]; +/* L110: */ + } +/* L120: */ + } + } + } + + } else if (_starpu_lsame_(storev, "R")) { + + if (_starpu_lsame_(direct, "F")) { + +/* Let V = ( V1 V2 ) (V1: first K columns) */ +/* where V1 is unit upper triangular. */ + + if (_starpu_lsame_(side, "L")) { + +/* Form H * C or H' * C where C = ( C1 ) */ +/* ( C2 ) */ + +/* Computing MAX */ + i__1 = *k, i__2 = _starpu_iladlc_(k, m, &v[v_offset], ldv); + lastv = max(i__1,i__2); + lastc = _starpu_iladlc_(&lastv, n, &c__[c_offset], ldc); + +/* W := C' * V' = (C1'*V1' + C2'*V2') (stored in WORK) */ + +/* W := C1' */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + _starpu_dcopy_(&lastc, &c__[j + c_dim1], ldc, &work[j * work_dim1 + + 1], &c__1); +/* L130: */ + } + +/* W := W * V1' */ + + _starpu_dtrmm_("Right", "Upper", "Transpose", "Unit", &lastc, k, & + c_b14, &v[v_offset], ldv, &work[work_offset], ldwork); + if (lastv > *k) { + +/* W := W + C2'*V2' */ + + i__1 = lastv - *k; + _starpu_dgemm_("Transpose", "Transpose", &lastc, k, &i__1, &c_b14, + &c__[*k + 1 + c_dim1], ldc, &v[(*k + 1) * v_dim1 + + 1], ldv, &c_b14, &work[work_offset], ldwork); + } + +/* W := W * T' or W * T */ + + _starpu_dtrmm_("Right", "Upper", transt, "Non-unit", &lastc, k, & + c_b14, &t[t_offset], ldt, &work[work_offset], ldwork); + +/* C := C - V' * W' */ + + if (lastv > *k) { + +/* C2 := C2 - V2' * W' */ + + i__1 = lastv - *k; + _starpu_dgemm_("Transpose", "Transpose", &i__1, &lastc, k, &c_b25, + &v[(*k + 1) * v_dim1 + 1], ldv, &work[ + work_offset], ldwork, &c_b14, &c__[*k + 1 + + c_dim1], ldc); + } + +/* W := W * V1 */ + + _starpu_dtrmm_("Right", "Upper", "No transpose", "Unit", &lastc, k, & + c_b14, &v[v_offset], ldv, &work[work_offset], ldwork); + +/* C1 := C1 - W' */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = lastc; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[j + i__ * c_dim1] -= work[i__ + j * work_dim1]; +/* L140: */ + } +/* L150: */ + } + + } else if (_starpu_lsame_(side, "R")) { + +/* Form C * H or C * H' where C = ( C1 C2 ) */ + +/* Computing MAX */ + i__1 = *k, i__2 = _starpu_iladlc_(k, n, &v[v_offset], ldv); + lastv = max(i__1,i__2); + lastc = _starpu_iladlr_(m, &lastv, &c__[c_offset], ldc); + +/* W := C * V' = (C1*V1' + C2*V2') (stored in WORK) */ + +/* W := C1 */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + _starpu_dcopy_(&lastc, &c__[j * c_dim1 + 1], &c__1, &work[j * + work_dim1 + 1], &c__1); +/* L160: */ + } + +/* W := W * V1' */ + + _starpu_dtrmm_("Right", "Upper", "Transpose", "Unit", &lastc, k, & + c_b14, &v[v_offset], ldv, &work[work_offset], ldwork); + if (lastv > *k) { + +/* W := W + C2 * V2' */ + + i__1 = lastv - *k; + _starpu_dgemm_("No transpose", "Transpose", &lastc, k, &i__1, & + c_b14, &c__[(*k + 1) * c_dim1 + 1], ldc, &v[(*k + + 1) * v_dim1 + 1], ldv, &c_b14, &work[work_offset], + ldwork); + } + +/* W := W * T or W * T' */ + + _starpu_dtrmm_("Right", "Upper", trans, "Non-unit", &lastc, k, &c_b14, + &t[t_offset], ldt, &work[work_offset], ldwork); + +/* C := C - W * V */ + + if (lastv > *k) { + +/* C2 := C2 - W * V2 */ + + i__1 = lastv - *k; + _starpu_dgemm_("No transpose", "No transpose", &lastc, &i__1, k, & + c_b25, &work[work_offset], ldwork, &v[(*k + 1) * + v_dim1 + 1], ldv, &c_b14, &c__[(*k + 1) * c_dim1 + + 1], ldc); + } + +/* W := W * V1 */ + + _starpu_dtrmm_("Right", "Upper", "No transpose", "Unit", &lastc, k, & + c_b14, &v[v_offset], ldv, &work[work_offset], ldwork); + +/* C1 := C1 - W */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = lastc; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] -= work[i__ + j * work_dim1]; +/* L170: */ + } +/* L180: */ + } + + } + + } else { + +/* Let V = ( V1 V2 ) (V2: last K columns) */ +/* where V2 is unit lower triangular. */ + + if (_starpu_lsame_(side, "L")) { + +/* Form H * C or H' * C where C = ( C1 ) */ +/* ( C2 ) */ + +/* Computing MAX */ + i__1 = *k, i__2 = _starpu_iladlc_(k, m, &v[v_offset], ldv); + lastv = max(i__1,i__2); + lastc = _starpu_iladlc_(&lastv, n, &c__[c_offset], ldc); + +/* W := C' * V' = (C1'*V1' + C2'*V2') (stored in WORK) */ + +/* W := C2' */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + _starpu_dcopy_(&lastc, &c__[lastv - *k + j + c_dim1], ldc, &work[ + j * work_dim1 + 1], &c__1); +/* L190: */ + } + +/* W := W * V2' */ + + _starpu_dtrmm_("Right", "Lower", "Transpose", "Unit", &lastc, k, & + c_b14, &v[(lastv - *k + 1) * v_dim1 + 1], ldv, &work[ + work_offset], ldwork); + if (lastv > *k) { + +/* W := W + C1'*V1' */ + + i__1 = lastv - *k; + _starpu_dgemm_("Transpose", "Transpose", &lastc, k, &i__1, &c_b14, + &c__[c_offset], ldc, &v[v_offset], ldv, &c_b14, & + work[work_offset], ldwork); + } + +/* W := W * T' or W * T */ + + _starpu_dtrmm_("Right", "Lower", transt, "Non-unit", &lastc, k, & + c_b14, &t[t_offset], ldt, &work[work_offset], ldwork); + +/* C := C - V' * W' */ + + if (lastv > *k) { + +/* C1 := C1 - V1' * W' */ + + i__1 = lastv - *k; + _starpu_dgemm_("Transpose", "Transpose", &i__1, &lastc, k, &c_b25, + &v[v_offset], ldv, &work[work_offset], ldwork, & + c_b14, &c__[c_offset], ldc); + } + +/* W := W * V2 */ + + _starpu_dtrmm_("Right", "Lower", "No transpose", "Unit", &lastc, k, & + c_b14, &v[(lastv - *k + 1) * v_dim1 + 1], ldv, &work[ + work_offset], ldwork); + +/* C2 := C2 - W' */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = lastc; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[lastv - *k + j + i__ * c_dim1] -= work[i__ + j * + work_dim1]; +/* L200: */ + } +/* L210: */ + } + + } else if (_starpu_lsame_(side, "R")) { + +/* Form C * H or C * H' where C = ( C1 C2 ) */ + +/* Computing MAX */ + i__1 = *k, i__2 = _starpu_iladlc_(k, n, &v[v_offset], ldv); + lastv = max(i__1,i__2); + lastc = _starpu_iladlr_(m, &lastv, &c__[c_offset], ldc); + +/* W := C * V' = (C1*V1' + C2*V2') (stored in WORK) */ + +/* W := C2 */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + _starpu_dcopy_(&lastc, &c__[(lastv - *k + j) * c_dim1 + 1], &c__1, + &work[j * work_dim1 + 1], &c__1); +/* L220: */ + } + +/* W := W * V2' */ + + _starpu_dtrmm_("Right", "Lower", "Transpose", "Unit", &lastc, k, & + c_b14, &v[(lastv - *k + 1) * v_dim1 + 1], ldv, &work[ + work_offset], ldwork); + if (lastv > *k) { + +/* W := W + C1 * V1' */ + + i__1 = lastv - *k; + _starpu_dgemm_("No transpose", "Transpose", &lastc, k, &i__1, & + c_b14, &c__[c_offset], ldc, &v[v_offset], ldv, & + c_b14, &work[work_offset], ldwork); + } + +/* W := W * T or W * T' */ + + _starpu_dtrmm_("Right", "Lower", trans, "Non-unit", &lastc, k, &c_b14, + &t[t_offset], ldt, &work[work_offset], ldwork); + +/* C := C - W * V */ + + if (lastv > *k) { + +/* C1 := C1 - W * V1 */ + + i__1 = lastv - *k; + _starpu_dgemm_("No transpose", "No transpose", &lastc, &i__1, k, & + c_b25, &work[work_offset], ldwork, &v[v_offset], + ldv, &c_b14, &c__[c_offset], ldc); + } + +/* W := W * V2 */ + + _starpu_dtrmm_("Right", "Lower", "No transpose", "Unit", &lastc, k, & + c_b14, &v[(lastv - *k + 1) * v_dim1 + 1], ldv, &work[ + work_offset], ldwork); + +/* C1 := C1 - W */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = lastc; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + (lastv - *k + j) * c_dim1] -= work[i__ + j * + work_dim1]; +/* L230: */ + } +/* L240: */ + } + + } + + } + } + + return 0; + +/* End of DLARFB */ + +} /* _starpu_dlarfb_ */ diff --git a/min-dgels/additional/dlarfg.c b/min-dgels/additional/dlarfg.c new file mode 100644 index 0000000..09405a4 --- /dev/null +++ b/min-dgels/additional/dlarfg.c @@ -0,0 +1,170 @@ +/* dlarfg.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dlarfg_(integer *n, doublereal *alpha, doublereal *x, + integer *incx, doublereal *tau) +{ + /* System generated locals */ + integer i__1; + doublereal d__1; + + /* Builtin functions */ + double d_sign(doublereal *, doublereal *); + + /* Local variables */ + integer j, knt; + doublereal beta; + extern doublereal _starpu_dnrm2_(integer *, doublereal *, integer *); + extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, + integer *); + doublereal xnorm; + extern doublereal _starpu_dlapy2_(doublereal *, doublereal *), _starpu_dlamch_(char *); + doublereal safmin, rsafmn; + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLARFG generates a real elementary reflector H of order n, such */ +/* that */ + +/* H * ( alpha ) = ( beta ), H' * H = I. */ +/* ( x ) ( 0 ) */ + +/* where alpha and beta are scalars, and x is an (n-1)-element real */ +/* vector. H is represented in the form */ + +/* H = I - tau * ( 1 ) * ( 1 v' ) , */ +/* ( v ) */ + +/* where tau is a real scalar and v is a real (n-1)-element */ +/* vector. */ + +/* If the elements of x are all zero, then tau = 0 and H is taken to be */ +/* the unit matrix. */ + +/* Otherwise 1 <= tau <= 2. */ + +/* Arguments */ +/* ========= */ + +/* N (input) INTEGER */ +/* The order of the elementary reflector. */ + +/* ALPHA (input/output) DOUBLE PRECISION */ +/* On entry, the value alpha. */ +/* On exit, it is overwritten with the value beta. */ + +/* X (input/output) DOUBLE PRECISION array, dimension */ +/* (1+(N-2)*abs(INCX)) */ +/* On entry, the vector x. */ +/* On exit, it is overwritten with the vector v. */ + +/* INCX (input) INTEGER */ +/* The increment between elements of X. INCX > 0. */ + +/* TAU (output) DOUBLE PRECISION */ +/* The value tau. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + --x; + + /* Function Body */ + if (*n <= 1) { + *tau = 0.; + return 0; + } + + i__1 = *n - 1; + xnorm = _starpu_dnrm2_(&i__1, &x[1], incx); + + if (xnorm == 0.) { + +/* H = I */ + + *tau = 0.; + } else { + +/* general case */ + + d__1 = _starpu_dlapy2_(alpha, &xnorm); + beta = -d_sign(&d__1, alpha); + safmin = _starpu_dlamch_("S") / _starpu_dlamch_("E"); + knt = 0; + if (abs(beta) < safmin) { + +/* XNORM, BETA may be inaccurate; scale X and recompute them */ + + rsafmn = 1. / safmin; +L10: + ++knt; + i__1 = *n - 1; + _starpu_dscal_(&i__1, &rsafmn, &x[1], incx); + beta *= rsafmn; + *alpha *= rsafmn; + if (abs(beta) < safmin) { + goto L10; + } + +/* New BETA is at most 1, at least SAFMIN */ + + i__1 = *n - 1; + xnorm = _starpu_dnrm2_(&i__1, &x[1], incx); + d__1 = _starpu_dlapy2_(alpha, &xnorm); + beta = -d_sign(&d__1, alpha); + } + *tau = (beta - *alpha) / beta; + i__1 = *n - 1; + d__1 = 1. / (*alpha - beta); + _starpu_dscal_(&i__1, &d__1, &x[1], incx); + +/* If ALPHA is subnormal, it may lose relative accuracy */ + + i__1 = knt; + for (j = 1; j <= i__1; ++j) { + beta *= safmin; +/* L20: */ + } + *alpha = beta; + } + + return 0; + +/* End of DLARFG */ + +} /* _starpu_dlarfg_ */ diff --git a/min-dgels/additional/dlarfp.c b/min-dgels/additional/dlarfp.c new file mode 100644 index 0000000..5baf1d5 --- /dev/null +++ b/min-dgels/additional/dlarfp.c @@ -0,0 +1,192 @@ +/* dlarfp.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dlarfp_(integer *n, doublereal *alpha, doublereal *x, + integer *incx, doublereal *tau) +{ + /* System generated locals */ + integer i__1; + doublereal d__1; + + /* Builtin functions */ + double d_sign(doublereal *, doublereal *); + + /* Local variables */ + integer j, knt; + doublereal beta; + extern doublereal _starpu_dnrm2_(integer *, doublereal *, integer *); + extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, + integer *); + doublereal xnorm; + extern doublereal _starpu_dlapy2_(doublereal *, doublereal *), _starpu_dlamch_(char *); + doublereal safmin, rsafmn; + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLARFP generates a real elementary reflector H of order n, such */ +/* that */ + +/* H * ( alpha ) = ( beta ), H' * H = I. */ +/* ( x ) ( 0 ) */ + +/* where alpha and beta are scalars, beta is non-negative, and x is */ +/* an (n-1)-element real vector. H is represented in the form */ + +/* H = I - tau * ( 1 ) * ( 1 v' ) , */ +/* ( v ) */ + +/* where tau is a real scalar and v is a real (n-1)-element */ +/* vector. */ + +/* If the elements of x are all zero, then tau = 0 and H is taken to be */ +/* the unit matrix. */ + +/* Otherwise 1 <= tau <= 2. */ + +/* Arguments */ +/* ========= */ + +/* N (input) INTEGER */ +/* The order of the elementary reflector. */ + +/* ALPHA (input/output) DOUBLE PRECISION */ +/* On entry, the value alpha. */ +/* On exit, it is overwritten with the value beta. */ + +/* X (input/output) DOUBLE PRECISION array, dimension */ +/* (1+(N-2)*abs(INCX)) */ +/* On entry, the vector x. */ +/* On exit, it is overwritten with the vector v. */ + +/* INCX (input) INTEGER */ +/* The increment between elements of X. INCX > 0. */ + +/* TAU (output) DOUBLE PRECISION */ +/* The value tau. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + --x; + + /* Function Body */ + if (*n <= 0) { + *tau = 0.; + return 0; + } + + i__1 = *n - 1; + xnorm = _starpu_dnrm2_(&i__1, &x[1], incx); + + if (xnorm == 0.) { + +/* H = [+/-1, 0; I], sign chosen so ALPHA >= 0 */ + + if (*alpha >= 0.) { +/* When TAU.eq.ZERO, the vector is special-cased to be */ +/* all zeros in the application routines. We do not need */ +/* to clear it. */ + *tau = 0.; + } else { +/* However, the application routines rely on explicit */ +/* zero checks when TAU.ne.ZERO, and we must clear X. */ + *tau = 2.; + i__1 = *n - 1; + for (j = 1; j <= i__1; ++j) { + x[(j - 1) * *incx + 1] = 0.; + } + *alpha = -(*alpha); + } + } else { + +/* general case */ + + d__1 = _starpu_dlapy2_(alpha, &xnorm); + beta = d_sign(&d__1, alpha); + safmin = _starpu_dlamch_("S") / _starpu_dlamch_("E"); + knt = 0; + if (abs(beta) < safmin) { + +/* XNORM, BETA may be inaccurate; scale X and recompute them */ + + rsafmn = 1. / safmin; +L10: + ++knt; + i__1 = *n - 1; + _starpu_dscal_(&i__1, &rsafmn, &x[1], incx); + beta *= rsafmn; + *alpha *= rsafmn; + if (abs(beta) < safmin) { + goto L10; + } + +/* New BETA is at most 1, at least SAFMIN */ + + i__1 = *n - 1; + xnorm = _starpu_dnrm2_(&i__1, &x[1], incx); + d__1 = _starpu_dlapy2_(alpha, &xnorm); + beta = d_sign(&d__1, alpha); + } + *alpha += beta; + if (beta < 0.) { + beta = -beta; + *tau = -(*alpha) / beta; + } else { + *alpha = xnorm * (xnorm / *alpha); + *tau = *alpha / beta; + *alpha = -(*alpha); + } + i__1 = *n - 1; + d__1 = 1. / *alpha; + _starpu_dscal_(&i__1, &d__1, &x[1], incx); + +/* If BETA is subnormal, it may lose relative accuracy */ + + i__1 = knt; + for (j = 1; j <= i__1; ++j) { + beta *= safmin; +/* L20: */ + } + *alpha = beta; + } + + return 0; + +/* End of DLARFP */ + +} /* _starpu_dlarfp_ */ diff --git a/min-dgels/additional/dlarft.c b/min-dgels/additional/dlarft.c new file mode 100644 index 0000000..8fc445f --- /dev/null +++ b/min-dgels/additional/dlarft.c @@ -0,0 +1,325 @@ +/* dlarft.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static doublereal c_b8 = 0.; + +/* Subroutine */ int _starpu_dlarft_(char *direct, char *storev, integer *n, integer * + k, doublereal *v, integer *ldv, doublereal *tau, doublereal *t, + integer *ldt) +{ + /* System generated locals */ + integer t_dim1, t_offset, v_dim1, v_offset, i__1, i__2, i__3; + doublereal d__1; + + /* Local variables */ + integer i__, j, prevlastv; + doublereal vii; + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int _starpu_dgemv_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *); + integer lastv; + extern /* Subroutine */ int _starpu_dtrmv_(char *, char *, char *, integer *, + doublereal *, integer *, doublereal *, integer *); + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLARFT forms the triangular factor T of a real block reflector H */ +/* of order n, which is defined as a product of k elementary reflectors. */ + +/* If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; */ + +/* If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. */ + +/* If STOREV = 'C', the vector which defines the elementary reflector */ +/* H(i) is stored in the i-th column of the array V, and */ + +/* H = I - V * T * V' */ + +/* If STOREV = 'R', the vector which defines the elementary reflector */ +/* H(i) is stored in the i-th row of the array V, and */ + +/* H = I - V' * T * V */ + +/* Arguments */ +/* ========= */ + +/* DIRECT (input) CHARACTER*1 */ +/* Specifies the order in which the elementary reflectors are */ +/* multiplied to form the block reflector: */ +/* = 'F': H = H(1) H(2) . . . H(k) (Forward) */ +/* = 'B': H = H(k) . . . H(2) H(1) (Backward) */ + +/* STOREV (input) CHARACTER*1 */ +/* Specifies how the vectors which define the elementary */ +/* reflectors are stored (see also Further Details): */ +/* = 'C': columnwise */ +/* = 'R': rowwise */ + +/* N (input) INTEGER */ +/* The order of the block reflector H. N >= 0. */ + +/* K (input) INTEGER */ +/* The order of the triangular factor T (= the number of */ +/* elementary reflectors). K >= 1. */ + +/* V (input/output) DOUBLE PRECISION array, dimension */ +/* (LDV,K) if STOREV = 'C' */ +/* (LDV,N) if STOREV = 'R' */ +/* The matrix V. See further details. */ + +/* LDV (input) INTEGER */ +/* The leading dimension of the array V. */ +/* If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K. */ + +/* TAU (input) DOUBLE PRECISION array, dimension (K) */ +/* TAU(i) must contain the scalar factor of the elementary */ +/* reflector H(i). */ + +/* T (output) DOUBLE PRECISION array, dimension (LDT,K) */ +/* The k by k triangular factor T of the block reflector. */ +/* If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is */ +/* lower triangular. The rest of the array is not used. */ + +/* LDT (input) INTEGER */ +/* The leading dimension of the array T. LDT >= K. */ + +/* Further Details */ +/* =============== */ + +/* The shape of the matrix V and the storage of the vectors which define */ +/* the H(i) is best illustrated by the following example with n = 5 and */ +/* k = 3. The elements equal to 1 are not stored; the corresponding */ +/* array elements are modified but restored on exit. The rest of the */ +/* array is not used. */ + +/* DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R': */ + +/* V = ( 1 ) V = ( 1 v1 v1 v1 v1 ) */ +/* ( v1 1 ) ( 1 v2 v2 v2 ) */ +/* ( v1 v2 1 ) ( 1 v3 v3 ) */ +/* ( v1 v2 v3 ) */ +/* ( v1 v2 v3 ) */ + +/* DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R': */ + +/* V = ( v1 v2 v3 ) V = ( v1 v1 1 ) */ +/* ( v1 v2 v3 ) ( v2 v2 v2 1 ) */ +/* ( 1 v2 v3 ) ( v3 v3 v3 v3 1 ) */ +/* ( 1 v3 ) */ +/* ( 1 ) */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Quick return if possible */ + + /* Parameter adjustments */ + v_dim1 = *ldv; + v_offset = 1 + v_dim1; + v -= v_offset; + --tau; + t_dim1 = *ldt; + t_offset = 1 + t_dim1; + t -= t_offset; + + /* Function Body */ + if (*n == 0) { + return 0; + } + + if (_starpu_lsame_(direct, "F")) { + prevlastv = *n; + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + prevlastv = max(i__,prevlastv); + if (tau[i__] == 0.) { + +/* H(i) = I */ + + i__2 = i__; + for (j = 1; j <= i__2; ++j) { + t[j + i__ * t_dim1] = 0.; +/* L10: */ + } + } else { + +/* general case */ + + vii = v[i__ + i__ * v_dim1]; + v[i__ + i__ * v_dim1] = 1.; + if (_starpu_lsame_(storev, "C")) { +/* Skip any trailing zeros. */ + i__2 = i__ + 1; + for (lastv = *n; lastv >= i__2; --lastv) { + if (v[lastv + i__ * v_dim1] != 0.) { + break; + } + } + j = min(lastv,prevlastv); + +/* T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)' * V(i:j,i) */ + + i__2 = j - i__ + 1; + i__3 = i__ - 1; + d__1 = -tau[i__]; + _starpu_dgemv_("Transpose", &i__2, &i__3, &d__1, &v[i__ + v_dim1], + ldv, &v[i__ + i__ * v_dim1], &c__1, &c_b8, &t[ + i__ * t_dim1 + 1], &c__1); + } else { +/* Skip any trailing zeros. */ + i__2 = i__ + 1; + for (lastv = *n; lastv >= i__2; --lastv) { + if (v[i__ + lastv * v_dim1] != 0.) { + break; + } + } + j = min(lastv,prevlastv); + +/* T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)' */ + + i__2 = i__ - 1; + i__3 = j - i__ + 1; + d__1 = -tau[i__]; + _starpu_dgemv_("No transpose", &i__2, &i__3, &d__1, &v[i__ * + v_dim1 + 1], ldv, &v[i__ + i__ * v_dim1], ldv, & + c_b8, &t[i__ * t_dim1 + 1], &c__1); + } + v[i__ + i__ * v_dim1] = vii; + +/* T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) */ + + i__2 = i__ - 1; + _starpu_dtrmv_("Upper", "No transpose", "Non-unit", &i__2, &t[ + t_offset], ldt, &t[i__ * t_dim1 + 1], &c__1); + t[i__ + i__ * t_dim1] = tau[i__]; + if (i__ > 1) { + prevlastv = max(prevlastv,lastv); + } else { + prevlastv = lastv; + } + } +/* L20: */ + } + } else { + prevlastv = 1; + for (i__ = *k; i__ >= 1; --i__) { + if (tau[i__] == 0.) { + +/* H(i) = I */ + + i__1 = *k; + for (j = i__; j <= i__1; ++j) { + t[j + i__ * t_dim1] = 0.; +/* L30: */ + } + } else { + +/* general case */ + + if (i__ < *k) { + if (_starpu_lsame_(storev, "C")) { + vii = v[*n - *k + i__ + i__ * v_dim1]; + v[*n - *k + i__ + i__ * v_dim1] = 1.; +/* Skip any leading zeros. */ + i__1 = i__ - 1; + for (lastv = 1; lastv <= i__1; ++lastv) { + if (v[lastv + i__ * v_dim1] != 0.) { + break; + } + } + j = max(lastv,prevlastv); + +/* T(i+1:k,i) := */ +/* - tau(i) * V(j:n-k+i,i+1:k)' * V(j:n-k+i,i) */ + + i__1 = *n - *k + i__ - j + 1; + i__2 = *k - i__; + d__1 = -tau[i__]; + _starpu_dgemv_("Transpose", &i__1, &i__2, &d__1, &v[j + (i__ + + 1) * v_dim1], ldv, &v[j + i__ * v_dim1], & + c__1, &c_b8, &t[i__ + 1 + i__ * t_dim1], & + c__1); + v[*n - *k + i__ + i__ * v_dim1] = vii; + } else { + vii = v[i__ + (*n - *k + i__) * v_dim1]; + v[i__ + (*n - *k + i__) * v_dim1] = 1.; +/* Skip any leading zeros. */ + i__1 = i__ - 1; + for (lastv = 1; lastv <= i__1; ++lastv) { + if (v[i__ + lastv * v_dim1] != 0.) { + break; + } + } + j = max(lastv,prevlastv); + +/* T(i+1:k,i) := */ +/* - tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)' */ + + i__1 = *k - i__; + i__2 = *n - *k + i__ - j + 1; + d__1 = -tau[i__]; + _starpu_dgemv_("No transpose", &i__1, &i__2, &d__1, &v[i__ + + 1 + j * v_dim1], ldv, &v[i__ + j * v_dim1], + ldv, &c_b8, &t[i__ + 1 + i__ * t_dim1], &c__1); + v[i__ + (*n - *k + i__) * v_dim1] = vii; + } + +/* T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) */ + + i__1 = *k - i__; + _starpu_dtrmv_("Lower", "No transpose", "Non-unit", &i__1, &t[i__ + + 1 + (i__ + 1) * t_dim1], ldt, &t[i__ + 1 + i__ * + t_dim1], &c__1) + ; + if (i__ > 1) { + prevlastv = min(prevlastv,lastv); + } else { + prevlastv = lastv; + } + } + t[i__ + i__ * t_dim1] = tau[i__]; + } +/* L40: */ + } + } + return 0; + +/* End of DLARFT */ + +} /* _starpu_dlarft_ */ diff --git a/min-dgels/additional/dlascl.c b/min-dgels/additional/dlascl.c new file mode 100644 index 0000000..c6bf9ae --- /dev/null +++ b/min-dgels/additional/dlascl.c @@ -0,0 +1,354 @@ +/* dlascl.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dlascl_(char *type__, integer *kl, integer *ku, + doublereal *cfrom, doublereal *cto, integer *m, integer *n, + doublereal *a, integer *lda, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + + /* Local variables */ + integer i__, j, k1, k2, k3, k4; + doublereal mul, cto1; + logical done; + doublereal ctoc; + extern logical _starpu_lsame_(char *, char *); + integer itype; + doublereal cfrom1; + extern doublereal _starpu_dlamch_(char *); + doublereal cfromc; + extern logical _starpu_disnan_(doublereal *); + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + doublereal bignum, smlnum; + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLASCL multiplies the M by N real matrix A by the real scalar */ +/* CTO/CFROM. This is done without over/underflow as long as the final */ +/* result CTO*A(I,J)/CFROM does not over/underflow. TYPE specifies that */ +/* A may be full, upper triangular, lower triangular, upper Hessenberg, */ +/* or banded. */ + +/* Arguments */ +/* ========= */ + +/* TYPE (input) CHARACTER*1 */ +/* TYPE indices the storage type of the input matrix. */ +/* = 'G': A is a full matrix. */ +/* = 'L': A is a lower triangular matrix. */ +/* = 'U': A is an upper triangular matrix. */ +/* = 'H': A is an upper Hessenberg matrix. */ +/* = 'B': A is a symmetric band matrix with lower bandwidth KL */ +/* and upper bandwidth KU and with the only the lower */ +/* half stored. */ +/* = 'Q': A is a symmetric band matrix with lower bandwidth KL */ +/* and upper bandwidth KU and with the only the upper */ +/* half stored. */ +/* = 'Z': A is a band matrix with lower bandwidth KL and upper */ +/* bandwidth KU. */ + +/* KL (input) INTEGER */ +/* The lower bandwidth of A. Referenced only if TYPE = 'B', */ +/* 'Q' or 'Z'. */ + +/* KU (input) INTEGER */ +/* The upper bandwidth of A. Referenced only if TYPE = 'B', */ +/* 'Q' or 'Z'. */ + +/* CFROM (input) DOUBLE PRECISION */ +/* CTO (input) DOUBLE PRECISION */ +/* The matrix A is multiplied by CTO/CFROM. A(I,J) is computed */ +/* without over/underflow if the final result CTO*A(I,J)/CFROM */ +/* can be represented without over/underflow. CFROM must be */ +/* nonzero. */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix A. M >= 0. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix A. N >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* The matrix to be multiplied by CTO/CFROM. See TYPE for the */ +/* storage type. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,M). */ + +/* INFO (output) INTEGER */ +/* 0 - successful exit */ +/* <0 - if INFO = -i, the i-th argument had an illegal value. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + *info = 0; + + if (_starpu_lsame_(type__, "G")) { + itype = 0; + } else if (_starpu_lsame_(type__, "L")) { + itype = 1; + } else if (_starpu_lsame_(type__, "U")) { + itype = 2; + } else if (_starpu_lsame_(type__, "H")) { + itype = 3; + } else if (_starpu_lsame_(type__, "B")) { + itype = 4; + } else if (_starpu_lsame_(type__, "Q")) { + itype = 5; + } else if (_starpu_lsame_(type__, "Z")) { + itype = 6; + } else { + itype = -1; + } + + if (itype == -1) { + *info = -1; + } else if (*cfrom == 0. || _starpu_disnan_(cfrom)) { + *info = -4; + } else if (_starpu_disnan_(cto)) { + *info = -5; + } else if (*m < 0) { + *info = -6; + } else if (*n < 0 || itype == 4 && *n != *m || itype == 5 && *n != *m) { + *info = -7; + } else if (itype <= 3 && *lda < max(1,*m)) { + *info = -9; + } else if (itype >= 4) { +/* Computing MAX */ + i__1 = *m - 1; + if (*kl < 0 || *kl > max(i__1,0)) { + *info = -2; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = *n - 1; + if (*ku < 0 || *ku > max(i__1,0) || (itype == 4 || itype == 5) && + *kl != *ku) { + *info = -3; + } else if (itype == 4 && *lda < *kl + 1 || itype == 5 && *lda < * + ku + 1 || itype == 6 && *lda < (*kl << 1) + *ku + 1) { + *info = -9; + } + } + } + + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DLASCL", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0 || *m == 0) { + return 0; + } + +/* Get machine parameters */ + + smlnum = _starpu_dlamch_("S"); + bignum = 1. / smlnum; + + cfromc = *cfrom; + ctoc = *cto; + +L10: + cfrom1 = cfromc * smlnum; + if (cfrom1 == cfromc) { +/* CFROMC is an inf. Multiply by a correctly signed zero for */ +/* finite CTOC, or a NaN if CTOC is infinite. */ + mul = ctoc / cfromc; + done = TRUE_; + cto1 = ctoc; + } else { + cto1 = ctoc / bignum; + if (cto1 == ctoc) { +/* CTOC is either 0 or an inf. In both cases, CTOC itself */ +/* serves as the correct multiplication factor. */ + mul = ctoc; + done = TRUE_; + cfromc = 1.; + } else if (abs(cfrom1) > abs(ctoc) && ctoc != 0.) { + mul = smlnum; + done = FALSE_; + cfromc = cfrom1; + } else if (abs(cto1) > abs(cfromc)) { + mul = bignum; + done = FALSE_; + ctoc = cto1; + } else { + mul = ctoc / cfromc; + done = TRUE_; + } + } + + if (itype == 0) { + +/* Full matrix */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] *= mul; +/* L20: */ + } +/* L30: */ + } + + } else if (itype == 1) { + +/* Lower triangular matrix */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = j; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] *= mul; +/* L40: */ + } +/* L50: */ + } + + } else if (itype == 2) { + +/* Upper triangular matrix */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = min(j,*m); + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] *= mul; +/* L60: */ + } +/* L70: */ + } + + } else if (itype == 3) { + +/* Upper Hessenberg matrix */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + i__3 = j + 1; + i__2 = min(i__3,*m); + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] *= mul; +/* L80: */ + } +/* L90: */ + } + + } else if (itype == 4) { + +/* Lower half of a symmetric band matrix */ + + k3 = *kl + 1; + k4 = *n + 1; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + i__3 = k3, i__4 = k4 - j; + i__2 = min(i__3,i__4); + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] *= mul; +/* L100: */ + } +/* L110: */ + } + + } else if (itype == 5) { + +/* Upper half of a symmetric band matrix */ + + k1 = *ku + 2; + k3 = *ku + 1; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MAX */ + i__2 = k1 - j; + i__3 = k3; + for (i__ = max(i__2,1); i__ <= i__3; ++i__) { + a[i__ + j * a_dim1] *= mul; +/* L120: */ + } +/* L130: */ + } + + } else if (itype == 6) { + +/* Band matrix */ + + k1 = *kl + *ku + 2; + k2 = *kl + 1; + k3 = (*kl << 1) + *ku + 1; + k4 = *kl + *ku + 1 + *m; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MAX */ + i__3 = k1 - j; +/* Computing MIN */ + i__4 = k3, i__5 = k4 - j; + i__2 = min(i__4,i__5); + for (i__ = max(i__3,k2); i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] *= mul; +/* L140: */ + } +/* L150: */ + } + + } + + if (! done) { + goto L10; + } + + return 0; + +/* End of DLASCL */ + +} /* _starpu_dlascl_ */ diff --git a/min-dgels/additional/dlaset.c b/min-dgels/additional/dlaset.c new file mode 100644 index 0000000..82e4676 --- /dev/null +++ b/min-dgels/additional/dlaset.c @@ -0,0 +1,152 @@ +/* dlaset.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dlaset_(char *uplo, integer *m, integer *n, doublereal * + alpha, doublereal *beta, doublereal *a, integer *lda) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + integer i__, j; + extern logical _starpu_lsame_(char *, char *); + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLASET initializes an m-by-n matrix A to BETA on the diagonal and */ +/* ALPHA on the offdiagonals. */ + +/* Arguments */ +/* ========= */ + +/* UPLO (input) CHARACTER*1 */ +/* Specifies the part of the matrix A to be set. */ +/* = 'U': Upper triangular part is set; the strictly lower */ +/* triangular part of A is not changed. */ +/* = 'L': Lower triangular part is set; the strictly upper */ +/* triangular part of A is not changed. */ +/* Otherwise: All of the matrix A is set. */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix A. M >= 0. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix A. N >= 0. */ + +/* ALPHA (input) DOUBLE PRECISION */ +/* The constant to which the offdiagonal elements are to be set. */ + +/* BETA (input) DOUBLE PRECISION */ +/* The constant to which the diagonal elements are to be set. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On exit, the leading m-by-n submatrix of A is set as follows: */ + +/* if UPLO = 'U', A(i,j) = ALPHA, 1<=i<=j-1, 1<=j<=n, */ +/* if UPLO = 'L', A(i,j) = ALPHA, j+1<=i<=m, 1<=j<=n, */ +/* otherwise, A(i,j) = ALPHA, 1<=i<=m, 1<=j<=n, i.ne.j, */ + +/* and, for all UPLO, A(i,i) = BETA, 1<=i<=min(m,n). */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,M). */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + if (_starpu_lsame_(uplo, "U")) { + +/* Set the strictly upper triangular or trapezoidal part of the */ +/* array to ALPHA. */ + + i__1 = *n; + for (j = 2; j <= i__1; ++j) { +/* Computing MIN */ + i__3 = j - 1; + i__2 = min(i__3,*m); + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = *alpha; +/* L10: */ + } +/* L20: */ + } + + } else if (_starpu_lsame_(uplo, "L")) { + +/* Set the strictly lower triangular or trapezoidal part of the */ +/* array to ALPHA. */ + + i__1 = min(*m,*n); + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = j + 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = *alpha; +/* L30: */ + } +/* L40: */ + } + + } else { + +/* Set the leading m-by-n submatrix to ALPHA. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = *alpha; +/* L50: */ + } +/* L60: */ + } + } + +/* Set the first min(M,N) diagonal elements to BETA. */ + + i__1 = min(*m,*n); + for (i__ = 1; i__ <= i__1; ++i__) { + a[i__ + i__ * a_dim1] = *beta; +/* L70: */ + } + + return 0; + +/* End of DLASET */ + +} /* _starpu_dlaset_ */ diff --git a/min-dgels/additional/dlassq.c b/min-dgels/additional/dlassq.c new file mode 100644 index 0000000..34baa3d --- /dev/null +++ b/min-dgels/additional/dlassq.c @@ -0,0 +1,116 @@ +/* dlassq.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dlassq_(integer *n, doublereal *x, integer *incx, + doublereal *scale, doublereal *sumsq) +{ + /* System generated locals */ + integer i__1, i__2; + doublereal d__1; + + /* Local variables */ + integer ix; + doublereal absxi; + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLASSQ returns the values scl and smsq such that */ + +/* ( scl**2 )*smsq = x( 1 )**2 +...+ x( n )**2 + ( scale**2 )*sumsq, */ + +/* where x( i ) = X( 1 + ( i - 1 )*INCX ). The value of sumsq is */ +/* assumed to be non-negative and scl returns the value */ + +/* scl = max( scale, abs( x( i ) ) ). */ + +/* scale and sumsq must be supplied in SCALE and SUMSQ and */ +/* scl and smsq are overwritten on SCALE and SUMSQ respectively. */ + +/* The routine makes only one pass through the vector x. */ + +/* Arguments */ +/* ========= */ + +/* N (input) INTEGER */ +/* The number of elements to be used from the vector X. */ + +/* X (input) DOUBLE PRECISION array, dimension (N) */ +/* The vector for which a scaled sum of squares is computed. */ +/* x( i ) = X( 1 + ( i - 1 )*INCX ), 1 <= i <= n. */ + +/* INCX (input) INTEGER */ +/* The increment between successive values of the vector X. */ +/* INCX > 0. */ + +/* SCALE (input/output) DOUBLE PRECISION */ +/* On entry, the value scale in the equation above. */ +/* On exit, SCALE is overwritten with scl , the scaling factor */ +/* for the sum of squares. */ + +/* SUMSQ (input/output) DOUBLE PRECISION */ +/* On entry, the value sumsq in the equation above. */ +/* On exit, SUMSQ is overwritten with smsq , the basic sum of */ +/* squares from which scl has been factored out. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + --x; + + /* Function Body */ + if (*n > 0) { + i__1 = (*n - 1) * *incx + 1; + i__2 = *incx; + for (ix = 1; i__2 < 0 ? ix >= i__1 : ix <= i__1; ix += i__2) { + if (x[ix] != 0.) { + absxi = (d__1 = x[ix], abs(d__1)); + if (*scale < absxi) { +/* Computing 2nd power */ + d__1 = *scale / absxi; + *sumsq = *sumsq * (d__1 * d__1) + 1; + *scale = absxi; + } else { +/* Computing 2nd power */ + d__1 = absxi / *scale; + *sumsq += d__1 * d__1; + } + } +/* L10: */ + } + } + return 0; + +/* End of DLASSQ */ + +} /* _starpu_dlassq_ */ diff --git a/min-dgels/additional/dnrm2.c b/min-dgels/additional/dnrm2.c new file mode 100644 index 0000000..10b4522 --- /dev/null +++ b/min-dgels/additional/dnrm2.c @@ -0,0 +1,95 @@ +/* dnrm2.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +doublereal _starpu_dnrm2_(integer *n, doublereal *x, integer *incx) +{ + /* System generated locals */ + integer i__1, i__2; + doublereal ret_val, d__1; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + integer ix; + doublereal ssq, norm, scale, absxi; + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DNRM2 returns the euclidean norm of a vector via the function */ +/* name, so that */ + +/* DNRM2 := sqrt( x'*x ) */ + + +/* -- This version written on 25-October-1982. */ +/* Modified on 14-October-1993 to inline the call to DLASSQ. */ +/* Sven Hammarling, Nag Ltd. */ + + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ + /* Parameter adjustments */ + --x; + + /* Function Body */ + if (*n < 1 || *incx < 1) { + norm = 0.; + } else if (*n == 1) { + norm = abs(x[1]); + } else { + scale = 0.; + ssq = 1.; +/* The following loop is equivalent to this call to the LAPACK */ +/* auxiliary routine: */ +/* CALL DLASSQ( N, X, INCX, SCALE, SSQ ) */ + + i__1 = (*n - 1) * *incx + 1; + i__2 = *incx; + for (ix = 1; i__2 < 0 ? ix >= i__1 : ix <= i__1; ix += i__2) { + if (x[ix] != 0.) { + absxi = (d__1 = x[ix], abs(d__1)); + if (scale < absxi) { +/* Computing 2nd power */ + d__1 = scale / absxi; + ssq = ssq * (d__1 * d__1) + 1.; + scale = absxi; + } else { +/* Computing 2nd power */ + d__1 = absxi / scale; + ssq += d__1 * d__1; + } + } +/* L10: */ + } + norm = scale * sqrt(ssq); + } + + ret_val = norm; + return ret_val; + +/* End of DNRM2. */ + +} /* _starpu_dnrm2_ */ diff --git a/min-dgels/additional/dorm2r.c b/min-dgels/additional/dorm2r.c new file mode 100644 index 0000000..ad37a22 --- /dev/null +++ b/min-dgels/additional/dorm2r.c @@ -0,0 +1,235 @@ +/* dorm2r.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +/* Subroutine */ int _starpu_dorm2r_(char *side, char *trans, integer *m, integer *n, + integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal * + c__, integer *ldc, doublereal *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2; + + /* Local variables */ + integer i__, i1, i2, i3, ic, jc, mi, ni, nq; + doublereal aii; + logical left; + extern /* Subroutine */ int _starpu_dlarf_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + doublereal *); + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + logical notran; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DORM2R overwrites the general real m by n matrix C with */ + +/* Q * C if SIDE = 'L' and TRANS = 'N', or */ + +/* Q'* C if SIDE = 'L' and TRANS = 'T', or */ + +/* C * Q if SIDE = 'R' and TRANS = 'N', or */ + +/* C * Q' if SIDE = 'R' and TRANS = 'T', */ + +/* where Q is a real orthogonal matrix defined as the product of k */ +/* elementary reflectors */ + +/* Q = H(1) H(2) . . . H(k) */ + +/* as returned by DGEQRF. Q is of order m if SIDE = 'L' and of order n */ +/* if SIDE = 'R'. */ + +/* Arguments */ +/* ========= */ + +/* SIDE (input) CHARACTER*1 */ +/* = 'L': apply Q or Q' from the Left */ +/* = 'R': apply Q or Q' from the Right */ + +/* TRANS (input) CHARACTER*1 */ +/* = 'N': apply Q (No transpose) */ +/* = 'T': apply Q' (Transpose) */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix C. M >= 0. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix C. N >= 0. */ + +/* K (input) INTEGER */ +/* The number of elementary reflectors whose product defines */ +/* the matrix Q. */ +/* If SIDE = 'L', M >= K >= 0; */ +/* if SIDE = 'R', N >= K >= 0. */ + +/* A (input) DOUBLE PRECISION array, dimension (LDA,K) */ +/* The i-th column must contain the vector which defines the */ +/* elementary reflector H(i), for i = 1,2,...,k, as returned by */ +/* DGEQRF in the first k columns of its array argument A. */ +/* A is modified by the routine but restored on exit. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. */ +/* If SIDE = 'L', LDA >= max(1,M); */ +/* if SIDE = 'R', LDA >= max(1,N). */ + +/* TAU (input) DOUBLE PRECISION array, dimension (K) */ +/* TAU(i) must contain the scalar factor of the elementary */ +/* reflector H(i), as returned by DGEQRF. */ + +/* C (input/output) DOUBLE PRECISION array, dimension (LDC,N) */ +/* On entry, the m by n matrix C. */ +/* On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q. */ + +/* LDC (input) INTEGER */ +/* The leading dimension of the array C. LDC >= max(1,M). */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension */ +/* (N) if SIDE = 'L', */ +/* (M) if SIDE = 'R' */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + left = _starpu_lsame_(side, "L"); + notran = _starpu_lsame_(trans, "N"); + +/* NQ is the order of Q */ + + if (left) { + nq = *m; + } else { + nq = *n; + } + if (! left && ! _starpu_lsame_(side, "R")) { + *info = -1; + } else if (! notran && ! _starpu_lsame_(trans, "T")) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*k < 0 || *k > nq) { + *info = -5; + } else if (*lda < max(1,nq)) { + *info = -7; + } else if (*ldc < max(1,*m)) { + *info = -10; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DORM2R", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0 || *k == 0) { + return 0; + } + + if (left && ! notran || ! left && notran) { + i1 = 1; + i2 = *k; + i3 = 1; + } else { + i1 = *k; + i2 = 1; + i3 = -1; + } + + if (left) { + ni = *n; + jc = 1; + } else { + mi = *m; + ic = 1; + } + + i__1 = i2; + i__2 = i3; + for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + if (left) { + +/* H(i) is applied to C(i:m,1:n) */ + + mi = *m - i__ + 1; + ic = i__; + } else { + +/* H(i) is applied to C(1:m,i:n) */ + + ni = *n - i__ + 1; + jc = i__; + } + +/* Apply H(i) */ + + aii = a[i__ + i__ * a_dim1]; + a[i__ + i__ * a_dim1] = 1.; + _starpu_dlarf_(side, &mi, &ni, &a[i__ + i__ * a_dim1], &c__1, &tau[i__], &c__[ + ic + jc * c_dim1], ldc, &work[1]); + a[i__ + i__ * a_dim1] = aii; +/* L10: */ + } + return 0; + +/* End of DORM2R */ + +} /* _starpu_dorm2r_ */ diff --git a/min-dgels/additional/dorml2.c b/min-dgels/additional/dorml2.c new file mode 100644 index 0000000..a0d5394 --- /dev/null +++ b/min-dgels/additional/dorml2.c @@ -0,0 +1,231 @@ +/* dorml2.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dorml2_(char *side, char *trans, integer *m, integer *n, + integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal * + c__, integer *ldc, doublereal *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2; + + /* Local variables */ + integer i__, i1, i2, i3, ic, jc, mi, ni, nq; + doublereal aii; + logical left; + extern /* Subroutine */ int _starpu_dlarf_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + doublereal *); + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + logical notran; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DORML2 overwrites the general real m by n matrix C with */ + +/* Q * C if SIDE = 'L' and TRANS = 'N', or */ + +/* Q'* C if SIDE = 'L' and TRANS = 'T', or */ + +/* C * Q if SIDE = 'R' and TRANS = 'N', or */ + +/* C * Q' if SIDE = 'R' and TRANS = 'T', */ + +/* where Q is a real orthogonal matrix defined as the product of k */ +/* elementary reflectors */ + +/* Q = H(k) . . . H(2) H(1) */ + +/* as returned by DGELQF. Q is of order m if SIDE = 'L' and of order n */ +/* if SIDE = 'R'. */ + +/* Arguments */ +/* ========= */ + +/* SIDE (input) CHARACTER*1 */ +/* = 'L': apply Q or Q' from the Left */ +/* = 'R': apply Q or Q' from the Right */ + +/* TRANS (input) CHARACTER*1 */ +/* = 'N': apply Q (No transpose) */ +/* = 'T': apply Q' (Transpose) */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix C. M >= 0. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix C. N >= 0. */ + +/* K (input) INTEGER */ +/* The number of elementary reflectors whose product defines */ +/* the matrix Q. */ +/* If SIDE = 'L', M >= K >= 0; */ +/* if SIDE = 'R', N >= K >= 0. */ + +/* A (input) DOUBLE PRECISION array, dimension */ +/* (LDA,M) if SIDE = 'L', */ +/* (LDA,N) if SIDE = 'R' */ +/* The i-th row must contain the vector which defines the */ +/* elementary reflector H(i), for i = 1,2,...,k, as returned by */ +/* DGELQF in the first k rows of its array argument A. */ +/* A is modified by the routine but restored on exit. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,K). */ + +/* TAU (input) DOUBLE PRECISION array, dimension (K) */ +/* TAU(i) must contain the scalar factor of the elementary */ +/* reflector H(i), as returned by DGELQF. */ + +/* C (input/output) DOUBLE PRECISION array, dimension (LDC,N) */ +/* On entry, the m by n matrix C. */ +/* On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q. */ + +/* LDC (input) INTEGER */ +/* The leading dimension of the array C. LDC >= max(1,M). */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension */ +/* (N) if SIDE = 'L', */ +/* (M) if SIDE = 'R' */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + left = _starpu_lsame_(side, "L"); + notran = _starpu_lsame_(trans, "N"); + +/* NQ is the order of Q */ + + if (left) { + nq = *m; + } else { + nq = *n; + } + if (! left && ! _starpu_lsame_(side, "R")) { + *info = -1; + } else if (! notran && ! _starpu_lsame_(trans, "T")) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*k < 0 || *k > nq) { + *info = -5; + } else if (*lda < max(1,*k)) { + *info = -7; + } else if (*ldc < max(1,*m)) { + *info = -10; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DORML2", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0 || *k == 0) { + return 0; + } + + if (left && notran || ! left && ! notran) { + i1 = 1; + i2 = *k; + i3 = 1; + } else { + i1 = *k; + i2 = 1; + i3 = -1; + } + + if (left) { + ni = *n; + jc = 1; + } else { + mi = *m; + ic = 1; + } + + i__1 = i2; + i__2 = i3; + for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + if (left) { + +/* H(i) is applied to C(i:m,1:n) */ + + mi = *m - i__ + 1; + ic = i__; + } else { + +/* H(i) is applied to C(1:m,i:n) */ + + ni = *n - i__ + 1; + jc = i__; + } + +/* Apply H(i) */ + + aii = a[i__ + i__ * a_dim1]; + a[i__ + i__ * a_dim1] = 1.; + _starpu_dlarf_(side, &mi, &ni, &a[i__ + i__ * a_dim1], lda, &tau[i__], &c__[ + ic + jc * c_dim1], ldc, &work[1]); + a[i__ + i__ * a_dim1] = aii; +/* L10: */ + } + return 0; + +/* End of DORML2 */ + +} /* _starpu_dorml2_ */ diff --git a/min-dgels/additional/dormlq.c b/min-dgels/additional/dormlq.c new file mode 100644 index 0000000..870ee89 --- /dev/null +++ b/min-dgels/additional/dormlq.c @@ -0,0 +1,334 @@ +/* dormlq.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; +static integer c__2 = 2; +static integer c__65 = 65; + +/* Subroutine */ int _starpu_dormlq_(char *side, char *trans, integer *m, integer *n, + integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal * + c__, integer *ldc, doublereal *work, integer *lwork, integer *info) +{ + /* System generated locals */ + address a__1[2]; + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2], i__4, + i__5; + char ch__1[2]; + + /* Builtin functions */ + /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); + + /* Local variables */ + integer i__; + doublereal t[4160] /* was [65][64] */; + integer i1, i2, i3, ib, ic, jc, nb, mi, ni, nq, nw, iws; + logical left; + extern logical _starpu_lsame_(char *, char *); + integer nbmin, iinfo; + extern /* Subroutine */ int _starpu_dorml2_(char *, char *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *), _starpu_dlarfb_(char + *, char *, char *, char *, integer *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *, doublereal *, integer *), _starpu_dlarft_(char *, char *, integer *, integer *, doublereal + *, integer *, doublereal *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); + extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *); + logical notran; + integer ldwork; + char transt[1]; + integer lwkopt; + logical lquery; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DORMLQ overwrites the general real M-by-N matrix C with */ + +/* SIDE = 'L' SIDE = 'R' */ +/* TRANS = 'N': Q * C C * Q */ +/* TRANS = 'T': Q**T * C C * Q**T */ + +/* where Q is a real orthogonal matrix defined as the product of k */ +/* elementary reflectors */ + +/* Q = H(k) . . . H(2) H(1) */ + +/* as returned by DGELQF. Q is of order M if SIDE = 'L' and of order N */ +/* if SIDE = 'R'. */ + +/* Arguments */ +/* ========= */ + +/* SIDE (input) CHARACTER*1 */ +/* = 'L': apply Q or Q**T from the Left; */ +/* = 'R': apply Q or Q**T from the Right. */ + +/* TRANS (input) CHARACTER*1 */ +/* = 'N': No transpose, apply Q; */ +/* = 'T': Transpose, apply Q**T. */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix C. M >= 0. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix C. N >= 0. */ + +/* K (input) INTEGER */ +/* The number of elementary reflectors whose product defines */ +/* the matrix Q. */ +/* If SIDE = 'L', M >= K >= 0; */ +/* if SIDE = 'R', N >= K >= 0. */ + +/* A (input) DOUBLE PRECISION array, dimension */ +/* (LDA,M) if SIDE = 'L', */ +/* (LDA,N) if SIDE = 'R' */ +/* The i-th row must contain the vector which defines the */ +/* elementary reflector H(i), for i = 1,2,...,k, as returned by */ +/* DGELQF in the first k rows of its array argument A. */ +/* A is modified by the routine but restored on exit. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,K). */ + +/* TAU (input) DOUBLE PRECISION array, dimension (K) */ +/* TAU(i) must contain the scalar factor of the elementary */ +/* reflector H(i), as returned by DGELQF. */ + +/* C (input/output) DOUBLE PRECISION array, dimension (LDC,N) */ +/* On entry, the M-by-N matrix C. */ +/* On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. */ + +/* LDC (input) INTEGER */ +/* The leading dimension of the array C. LDC >= max(1,M). */ + +/* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ + +/* LWORK (input) INTEGER */ +/* The dimension of the array WORK. */ +/* If SIDE = 'L', LWORK >= max(1,N); */ +/* if SIDE = 'R', LWORK >= max(1,M). */ +/* For optimum performance LWORK >= N*NB if SIDE = 'L', and */ +/* LWORK >= M*NB if SIDE = 'R', where NB is the optimal */ +/* blocksize. */ + +/* If LWORK = -1, then a workspace query is assumed; the routine */ +/* only calculates the optimal size of the WORK array, returns */ +/* this value as the first entry of the WORK array, and no error */ +/* message related to LWORK is issued by XERBLA. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + left = _starpu_lsame_(side, "L"); + notran = _starpu_lsame_(trans, "N"); + lquery = *lwork == -1; + +/* NQ is the order of Q and NW is the minimum dimension of WORK */ + + if (left) { + nq = *m; + nw = *n; + } else { + nq = *n; + nw = *m; + } + if (! left && ! _starpu_lsame_(side, "R")) { + *info = -1; + } else if (! notran && ! _starpu_lsame_(trans, "T")) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*k < 0 || *k > nq) { + *info = -5; + } else if (*lda < max(1,*k)) { + *info = -7; + } else if (*ldc < max(1,*m)) { + *info = -10; + } else if (*lwork < max(1,nw) && ! lquery) { + *info = -12; + } + + if (*info == 0) { + +/* Determine the block size. NB may be at most NBMAX, where NBMAX */ +/* is used to define the local array T. */ + +/* Computing MIN */ +/* Writing concatenation */ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = 64, i__2 = _starpu_ilaenv_(&c__1, "DORMLQ", ch__1, m, n, k, &c_n1); + nb = min(i__1,i__2); + lwkopt = max(1,nw) * nb; + work[1] = (doublereal) lwkopt; + } + + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DORMLQ", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0 || *k == 0) { + work[1] = 1.; + return 0; + } + + nbmin = 2; + ldwork = nw; + if (nb > 1 && nb < *k) { + iws = nw * nb; + if (*lwork < iws) { + nb = *lwork / ldwork; +/* Computing MAX */ +/* Writing concatenation */ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = 2, i__2 = _starpu_ilaenv_(&c__2, "DORMLQ", ch__1, m, n, k, &c_n1); + nbmin = max(i__1,i__2); + } + } else { + iws = nw; + } + + if (nb < nbmin || nb >= *k) { + +/* Use unblocked code */ + + _starpu_dorml2_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[ + c_offset], ldc, &work[1], &iinfo); + } else { + +/* Use blocked code */ + + if (left && notran || ! left && ! notran) { + i1 = 1; + i2 = *k; + i3 = nb; + } else { + i1 = (*k - 1) / nb * nb + 1; + i2 = 1; + i3 = -nb; + } + + if (left) { + ni = *n; + jc = 1; + } else { + mi = *m; + ic = 1; + } + + if (notran) { + *(unsigned char *)transt = 'T'; + } else { + *(unsigned char *)transt = 'N'; + } + + i__1 = i2; + i__2 = i3; + for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__4 = nb, i__5 = *k - i__ + 1; + ib = min(i__4,i__5); + +/* Form the triangular factor of the block reflector */ +/* H = H(i) H(i+1) . . . H(i+ib-1) */ + + i__4 = nq - i__ + 1; + _starpu_dlarft_("Forward", "Rowwise", &i__4, &ib, &a[i__ + i__ * a_dim1], + lda, &tau[i__], t, &c__65); + if (left) { + +/* H or H' is applied to C(i:m,1:n) */ + + mi = *m - i__ + 1; + ic = i__; + } else { + +/* H or H' is applied to C(1:m,i:n) */ + + ni = *n - i__ + 1; + jc = i__; + } + +/* Apply H or H' */ + + _starpu_dlarfb_(side, transt, "Forward", "Rowwise", &mi, &ni, &ib, &a[i__ + + i__ * a_dim1], lda, t, &c__65, &c__[ic + jc * c_dim1], + ldc, &work[1], &ldwork); +/* L10: */ + } + } + work[1] = (doublereal) lwkopt; + return 0; + +/* End of DORMLQ */ + +} /* _starpu_dormlq_ */ diff --git a/min-dgels/additional/dormqr.c b/min-dgels/additional/dormqr.c new file mode 100644 index 0000000..50d0d0e --- /dev/null +++ b/min-dgels/additional/dormqr.c @@ -0,0 +1,327 @@ +/* dormqr.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; +static integer c__2 = 2; +static integer c__65 = 65; + +/* Subroutine */ int _starpu_dormqr_(char *side, char *trans, integer *m, integer *n, + integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal * + c__, integer *ldc, doublereal *work, integer *lwork, integer *info) +{ + /* System generated locals */ + address a__1[2]; + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2], i__4, + i__5; + char ch__1[2]; + + /* Builtin functions */ + /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); + + /* Local variables */ + integer i__; + doublereal t[4160] /* was [65][64] */; + integer i1, i2, i3, ib, ic, jc, nb, mi, ni, nq, nw, iws; + logical left; + extern logical _starpu_lsame_(char *, char *); + integer nbmin, iinfo; + extern /* Subroutine */ int _starpu_dorm2r_(char *, char *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *), _starpu_dlarfb_(char + *, char *, char *, char *, integer *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *, doublereal *, integer *), _starpu_dlarft_(char *, char *, integer *, integer *, doublereal + *, integer *, doublereal *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); + extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *); + logical notran; + integer ldwork, lwkopt; + logical lquery; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DORMQR overwrites the general real M-by-N matrix C with */ + +/* SIDE = 'L' SIDE = 'R' */ +/* TRANS = 'N': Q * C C * Q */ +/* TRANS = 'T': Q**T * C C * Q**T */ + +/* where Q is a real orthogonal matrix defined as the product of k */ +/* elementary reflectors */ + +/* Q = H(1) H(2) . . . H(k) */ + +/* as returned by DGEQRF. Q is of order M if SIDE = 'L' and of order N */ +/* if SIDE = 'R'. */ + +/* Arguments */ +/* ========= */ + +/* SIDE (input) CHARACTER*1 */ +/* = 'L': apply Q or Q**T from the Left; */ +/* = 'R': apply Q or Q**T from the Right. */ + +/* TRANS (input) CHARACTER*1 */ +/* = 'N': No transpose, apply Q; */ +/* = 'T': Transpose, apply Q**T. */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix C. M >= 0. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix C. N >= 0. */ + +/* K (input) INTEGER */ +/* The number of elementary reflectors whose product defines */ +/* the matrix Q. */ +/* If SIDE = 'L', M >= K >= 0; */ +/* if SIDE = 'R', N >= K >= 0. */ + +/* A (input) DOUBLE PRECISION array, dimension (LDA,K) */ +/* The i-th column must contain the vector which defines the */ +/* elementary reflector H(i), for i = 1,2,...,k, as returned by */ +/* DGEQRF in the first k columns of its array argument A. */ +/* A is modified by the routine but restored on exit. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. */ +/* If SIDE = 'L', LDA >= max(1,M); */ +/* if SIDE = 'R', LDA >= max(1,N). */ + +/* TAU (input) DOUBLE PRECISION array, dimension (K) */ +/* TAU(i) must contain the scalar factor of the elementary */ +/* reflector H(i), as returned by DGEQRF. */ + +/* C (input/output) DOUBLE PRECISION array, dimension (LDC,N) */ +/* On entry, the M-by-N matrix C. */ +/* On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. */ + +/* LDC (input) INTEGER */ +/* The leading dimension of the array C. LDC >= max(1,M). */ + +/* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ + +/* LWORK (input) INTEGER */ +/* The dimension of the array WORK. */ +/* If SIDE = 'L', LWORK >= max(1,N); */ +/* if SIDE = 'R', LWORK >= max(1,M). */ +/* For optimum performance LWORK >= N*NB if SIDE = 'L', and */ +/* LWORK >= M*NB if SIDE = 'R', where NB is the optimal */ +/* blocksize. */ + +/* If LWORK = -1, then a workspace query is assumed; the routine */ +/* only calculates the optimal size of the WORK array, returns */ +/* this value as the first entry of the WORK array, and no error */ +/* message related to LWORK is issued by XERBLA. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + left = _starpu_lsame_(side, "L"); + notran = _starpu_lsame_(trans, "N"); + lquery = *lwork == -1; + +/* NQ is the order of Q and NW is the minimum dimension of WORK */ + + if (left) { + nq = *m; + nw = *n; + } else { + nq = *n; + nw = *m; + } + if (! left && ! _starpu_lsame_(side, "R")) { + *info = -1; + } else if (! notran && ! _starpu_lsame_(trans, "T")) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*k < 0 || *k > nq) { + *info = -5; + } else if (*lda < max(1,nq)) { + *info = -7; + } else if (*ldc < max(1,*m)) { + *info = -10; + } else if (*lwork < max(1,nw) && ! lquery) { + *info = -12; + } + + if (*info == 0) { + +/* Determine the block size. NB may be at most NBMAX, where NBMAX */ +/* is used to define the local array T. */ + +/* Computing MIN */ +/* Writing concatenation */ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = 64, i__2 = _starpu_ilaenv_(&c__1, "DORMQR", ch__1, m, n, k, &c_n1); + nb = min(i__1,i__2); + lwkopt = max(1,nw) * nb; + work[1] = (doublereal) lwkopt; + } + + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DORMQR", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0 || *k == 0) { + work[1] = 1.; + return 0; + } + + nbmin = 2; + ldwork = nw; + if (nb > 1 && nb < *k) { + iws = nw * nb; + if (*lwork < iws) { + nb = *lwork / ldwork; +/* Computing MAX */ +/* Writing concatenation */ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = 2, i__2 = _starpu_ilaenv_(&c__2, "DORMQR", ch__1, m, n, k, &c_n1); + nbmin = max(i__1,i__2); + } + } else { + iws = nw; + } + + if (nb < nbmin || nb >= *k) { + +/* Use unblocked code */ + + _starpu_dorm2r_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[ + c_offset], ldc, &work[1], &iinfo); + } else { + +/* Use blocked code */ + + if (left && ! notran || ! left && notran) { + i1 = 1; + i2 = *k; + i3 = nb; + } else { + i1 = (*k - 1) / nb * nb + 1; + i2 = 1; + i3 = -nb; + } + + if (left) { + ni = *n; + jc = 1; + } else { + mi = *m; + ic = 1; + } + + i__1 = i2; + i__2 = i3; + for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__4 = nb, i__5 = *k - i__ + 1; + ib = min(i__4,i__5); + +/* Form the triangular factor of the block reflector */ +/* H = H(i) H(i+1) . . . H(i+ib-1) */ + + i__4 = nq - i__ + 1; + _starpu_dlarft_("Forward", "Columnwise", &i__4, &ib, &a[i__ + i__ * + a_dim1], lda, &tau[i__], t, &c__65) + ; + if (left) { + +/* H or H' is applied to C(i:m,1:n) */ + + mi = *m - i__ + 1; + ic = i__; + } else { + +/* H or H' is applied to C(1:m,i:n) */ + + ni = *n - i__ + 1; + jc = i__; + } + +/* Apply H or H' */ + + _starpu_dlarfb_(side, trans, "Forward", "Columnwise", &mi, &ni, &ib, &a[ + i__ + i__ * a_dim1], lda, t, &c__65, &c__[ic + jc * + c_dim1], ldc, &work[1], &ldwork); +/* L10: */ + } + } + work[1] = (doublereal) lwkopt; + return 0; + +/* End of DORMQR */ + +} /* _starpu_dormqr_ */ diff --git a/min-dgels/additional/dscal.c b/min-dgels/additional/dscal.c new file mode 100644 index 0000000..a27b8dd --- /dev/null +++ b/min-dgels/additional/dscal.c @@ -0,0 +1,96 @@ +/* dscal.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dscal_(integer *n, doublereal *da, doublereal *dx, + integer *incx) +{ + /* System generated locals */ + integer i__1, i__2; + + /* Local variables */ + integer i__, m, mp1, nincx; + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ +/* * */ +/* scales a vector by a constant. */ +/* uses unrolled loops for increment equal to one. */ +/* jack dongarra, linpack, 3/11/78. */ +/* modified 3/93 to return if incx .le. 0. */ +/* modified 12/3/93, array(1) declarations changed to array(*) */ + + +/* .. Local Scalars .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ + /* Parameter adjustments */ + --dx; + + /* Function Body */ + if (*n <= 0 || *incx <= 0) { + return 0; + } + if (*incx == 1) { + goto L20; + } + +/* code for increment not equal to 1 */ + + nincx = *n * *incx; + i__1 = nincx; + i__2 = *incx; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + dx[i__] = *da * dx[i__]; +/* L10: */ + } + return 0; + +/* code for increment equal to 1 */ + + +/* clean-up loop */ + +L20: + m = *n % 5; + if (m == 0) { + goto L40; + } + i__2 = m; + for (i__ = 1; i__ <= i__2; ++i__) { + dx[i__] = *da * dx[i__]; +/* L30: */ + } + if (*n < 5) { + return 0; + } +L40: + mp1 = m + 1; + i__2 = *n; + for (i__ = mp1; i__ <= i__2; i__ += 5) { + dx[i__] = *da * dx[i__]; + dx[i__ + 1] = *da * dx[i__ + 1]; + dx[i__ + 2] = *da * dx[i__ + 2]; + dx[i__ + 3] = *da * dx[i__ + 3]; + dx[i__ + 4] = *da * dx[i__ + 4]; +/* L50: */ + } + return 0; +} /* _starpu_dscal_ */ diff --git a/min-dgels/additional/dtrmm.c b/min-dgels/additional/dtrmm.c new file mode 100644 index 0000000..f2c5234 --- /dev/null +++ b/min-dgels/additional/dtrmm.c @@ -0,0 +1,453 @@ +/* dtrmm.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dtrmm_(char *side, char *uplo, char *transa, char *diag, + integer *m, integer *n, doublereal *alpha, doublereal *a, integer * + lda, doublereal *b, integer *ldb) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3; + + /* Local variables */ + integer i__, j, k, info; + doublereal temp; + logical lside; + extern logical _starpu_lsame_(char *, char *); + integer nrowa; + logical upper; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + logical nounit; + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DTRMM performs one of the matrix-matrix operations */ + +/* B := alpha*op( A )*B, or B := alpha*B*op( A ), */ + +/* where alpha is a scalar, B is an m by n matrix, A is a unit, or */ +/* non-unit, upper or lower triangular matrix and op( A ) is one of */ + +/* op( A ) = A or op( A ) = A'. */ + +/* Arguments */ +/* ========== */ + +/* SIDE - CHARACTER*1. */ +/* On entry, SIDE specifies whether op( A ) multiplies B from */ +/* the left or right as follows: */ + +/* SIDE = 'L' or 'l' B := alpha*op( A )*B. */ + +/* SIDE = 'R' or 'r' B := alpha*B*op( A ). */ + +/* Unchanged on exit. */ + +/* UPLO - CHARACTER*1. */ +/* On entry, UPLO specifies whether the matrix A is an upper or */ +/* lower triangular matrix as follows: */ + +/* UPLO = 'U' or 'u' A is an upper triangular matrix. */ + +/* UPLO = 'L' or 'l' A is a lower triangular matrix. */ + +/* Unchanged on exit. */ + +/* TRANSA - CHARACTER*1. */ +/* On entry, TRANSA specifies the form of op( A ) to be used in */ +/* the matrix multiplication as follows: */ + +/* TRANSA = 'N' or 'n' op( A ) = A. */ + +/* TRANSA = 'T' or 't' op( A ) = A'. */ + +/* TRANSA = 'C' or 'c' op( A ) = A'. */ + +/* Unchanged on exit. */ + +/* DIAG - CHARACTER*1. */ +/* On entry, DIAG specifies whether or not A is unit triangular */ +/* as follows: */ + +/* DIAG = 'U' or 'u' A is assumed to be unit triangular. */ + +/* DIAG = 'N' or 'n' A is not assumed to be unit */ +/* triangular. */ + +/* Unchanged on exit. */ + +/* M - INTEGER. */ +/* On entry, M specifies the number of rows of B. M must be at */ +/* least zero. */ +/* Unchanged on exit. */ + +/* N - INTEGER. */ +/* On entry, N specifies the number of columns of B. N must be */ +/* at least zero. */ +/* Unchanged on exit. */ + +/* ALPHA - DOUBLE PRECISION. */ +/* On entry, ALPHA specifies the scalar alpha. When alpha is */ +/* zero then A is not referenced and B need not be set before */ +/* entry. */ +/* Unchanged on exit. */ + +/* A - DOUBLE PRECISION array of DIMENSION ( LDA, k ), where k is m */ +/* when SIDE = 'L' or 'l' and is n when SIDE = 'R' or 'r'. */ +/* Before entry with UPLO = 'U' or 'u', the leading k by k */ +/* upper triangular part of the array A must contain the upper */ +/* triangular matrix and the strictly lower triangular part of */ +/* A is not referenced. */ +/* Before entry with UPLO = 'L' or 'l', the leading k by k */ +/* lower triangular part of the array A must contain the lower */ +/* triangular matrix and the strictly upper triangular part of */ +/* A is not referenced. */ +/* Note that when DIAG = 'U' or 'u', the diagonal elements of */ +/* A are not referenced either, but are assumed to be unity. */ +/* Unchanged on exit. */ + +/* LDA - INTEGER. */ +/* On entry, LDA specifies the first dimension of A as declared */ +/* in the calling (sub) program. When SIDE = 'L' or 'l' then */ +/* LDA must be at least max( 1, m ), when SIDE = 'R' or 'r' */ +/* then LDA must be at least max( 1, n ). */ +/* Unchanged on exit. */ + +/* B - DOUBLE PRECISION array of DIMENSION ( LDB, n ). */ +/* Before entry, the leading m by n part of the array B must */ +/* contain the matrix B, and on exit is overwritten by the */ +/* transformed matrix. */ + +/* LDB - INTEGER. */ +/* On entry, LDB specifies the first dimension of B as declared */ +/* in the calling (sub) program. LDB must be at least */ +/* max( 1, m ). */ +/* Unchanged on exit. */ + + +/* Level 3 Blas routine. */ + +/* -- Written on 8-February-1989. */ +/* Jack Dongarra, Argonne National Laboratory. */ +/* Iain Duff, AERE Harwell. */ +/* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ +/* Sven Hammarling, Numerical Algorithms Group Ltd. */ + + +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Parameters .. */ +/* .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + lside = _starpu_lsame_(side, "L"); + if (lside) { + nrowa = *m; + } else { + nrowa = *n; + } + nounit = _starpu_lsame_(diag, "N"); + upper = _starpu_lsame_(uplo, "U"); + + info = 0; + if (! lside && ! _starpu_lsame_(side, "R")) { + info = 1; + } else if (! upper && ! _starpu_lsame_(uplo, "L")) { + info = 2; + } else if (! _starpu_lsame_(transa, "N") && ! _starpu_lsame_(transa, + "T") && ! _starpu_lsame_(transa, "C")) { + info = 3; + } else if (! _starpu_lsame_(diag, "U") && ! _starpu_lsame_(diag, + "N")) { + info = 4; + } else if (*m < 0) { + info = 5; + } else if (*n < 0) { + info = 6; + } else if (*lda < max(1,nrowa)) { + info = 9; + } else if (*ldb < max(1,*m)) { + info = 11; + } + if (info != 0) { + _starpu_xerbla_("DTRMM ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*m == 0 || *n == 0) { + return 0; + } + +/* And when alpha.eq.zero. */ + + if (*alpha == 0.) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = 0.; +/* L10: */ + } +/* L20: */ + } + return 0; + } + +/* Start the operations. */ + + if (lside) { + if (_starpu_lsame_(transa, "N")) { + +/* Form B := alpha*A*B. */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (k = 1; k <= i__2; ++k) { + if (b[k + j * b_dim1] != 0.) { + temp = *alpha * b[k + j * b_dim1]; + i__3 = k - 1; + for (i__ = 1; i__ <= i__3; ++i__) { + b[i__ + j * b_dim1] += temp * a[i__ + k * + a_dim1]; +/* L30: */ + } + if (nounit) { + temp *= a[k + k * a_dim1]; + } + b[k + j * b_dim1] = temp; + } +/* L40: */ + } +/* L50: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + for (k = *m; k >= 1; --k) { + if (b[k + j * b_dim1] != 0.) { + temp = *alpha * b[k + j * b_dim1]; + b[k + j * b_dim1] = temp; + if (nounit) { + b[k + j * b_dim1] *= a[k + k * a_dim1]; + } + i__2 = *m; + for (i__ = k + 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] += temp * a[i__ + k * + a_dim1]; +/* L60: */ + } + } +/* L70: */ + } +/* L80: */ + } + } + } else { + +/* Form B := alpha*A'*B. */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + for (i__ = *m; i__ >= 1; --i__) { + temp = b[i__ + j * b_dim1]; + if (nounit) { + temp *= a[i__ + i__ * a_dim1]; + } + i__2 = i__ - 1; + for (k = 1; k <= i__2; ++k) { + temp += a[k + i__ * a_dim1] * b[k + j * b_dim1]; +/* L90: */ + } + b[i__ + j * b_dim1] = *alpha * temp; +/* L100: */ + } +/* L110: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp = b[i__ + j * b_dim1]; + if (nounit) { + temp *= a[i__ + i__ * a_dim1]; + } + i__3 = *m; + for (k = i__ + 1; k <= i__3; ++k) { + temp += a[k + i__ * a_dim1] * b[k + j * b_dim1]; +/* L120: */ + } + b[i__ + j * b_dim1] = *alpha * temp; +/* L130: */ + } +/* L140: */ + } + } + } + } else { + if (_starpu_lsame_(transa, "N")) { + +/* Form B := alpha*B*A. */ + + if (upper) { + for (j = *n; j >= 1; --j) { + temp = *alpha; + if (nounit) { + temp *= a[j + j * a_dim1]; + } + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1]; +/* L150: */ + } + i__1 = j - 1; + for (k = 1; k <= i__1; ++k) { + if (a[k + j * a_dim1] != 0.) { + temp = *alpha * a[k + j * a_dim1]; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] += temp * b[i__ + k * + b_dim1]; +/* L160: */ + } + } +/* L170: */ + } +/* L180: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp = *alpha; + if (nounit) { + temp *= a[j + j * a_dim1]; + } + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1]; +/* L190: */ + } + i__2 = *n; + for (k = j + 1; k <= i__2; ++k) { + if (a[k + j * a_dim1] != 0.) { + temp = *alpha * a[k + j * a_dim1]; + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + b[i__ + j * b_dim1] += temp * b[i__ + k * + b_dim1]; +/* L200: */ + } + } +/* L210: */ + } +/* L220: */ + } + } + } else { + +/* Form B := alpha*B*A'. */ + + if (upper) { + i__1 = *n; + for (k = 1; k <= i__1; ++k) { + i__2 = k - 1; + for (j = 1; j <= i__2; ++j) { + if (a[j + k * a_dim1] != 0.) { + temp = *alpha * a[j + k * a_dim1]; + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + b[i__ + j * b_dim1] += temp * b[i__ + k * + b_dim1]; +/* L230: */ + } + } +/* L240: */ + } + temp = *alpha; + if (nounit) { + temp *= a[k + k * a_dim1]; + } + if (temp != 1.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1]; +/* L250: */ + } + } +/* L260: */ + } + } else { + for (k = *n; k >= 1; --k) { + i__1 = *n; + for (j = k + 1; j <= i__1; ++j) { + if (a[j + k * a_dim1] != 0.) { + temp = *alpha * a[j + k * a_dim1]; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] += temp * b[i__ + k * + b_dim1]; +/* L270: */ + } + } +/* L280: */ + } + temp = *alpha; + if (nounit) { + temp *= a[k + k * a_dim1]; + } + if (temp != 1.) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1]; +/* L290: */ + } + } +/* L300: */ + } + } + } + } + + return 0; + +/* End of DTRMM . */ + +} /* _starpu_dtrmm_ */ diff --git a/min-dgels/additional/dtrmv.c b/min-dgels/additional/dtrmv.c new file mode 100644 index 0000000..6acf45c --- /dev/null +++ b/min-dgels/additional/dtrmv.c @@ -0,0 +1,345 @@ +/* dtrmv.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dtrmv_(char *uplo, char *trans, char *diag, integer *n, + doublereal *a, integer *lda, doublereal *x, integer *incx) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + + /* Local variables */ + integer i__, j, ix, jx, kx, info; + doublereal temp; + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + logical nounit; + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DTRMV performs one of the matrix-vector operations */ + +/* x := A*x, or x := A'*x, */ + +/* where x is an n element vector and A is an n by n unit, or non-unit, */ +/* upper or lower triangular matrix. */ + +/* Arguments */ +/* ========== */ + +/* UPLO - CHARACTER*1. */ +/* On entry, UPLO specifies whether the matrix is an upper or */ +/* lower triangular matrix as follows: */ + +/* UPLO = 'U' or 'u' A is an upper triangular matrix. */ + +/* UPLO = 'L' or 'l' A is a lower triangular matrix. */ + +/* Unchanged on exit. */ + +/* TRANS - CHARACTER*1. */ +/* On entry, TRANS specifies the operation to be performed as */ +/* follows: */ + +/* TRANS = 'N' or 'n' x := A*x. */ + +/* TRANS = 'T' or 't' x := A'*x. */ + +/* TRANS = 'C' or 'c' x := A'*x. */ + +/* Unchanged on exit. */ + +/* DIAG - CHARACTER*1. */ +/* On entry, DIAG specifies whether or not A is unit */ +/* triangular as follows: */ + +/* DIAG = 'U' or 'u' A is assumed to be unit triangular. */ + +/* DIAG = 'N' or 'n' A is not assumed to be unit */ +/* triangular. */ + +/* Unchanged on exit. */ + +/* N - INTEGER. */ +/* On entry, N specifies the order of the matrix A. */ +/* N must be at least zero. */ +/* Unchanged on exit. */ + +/* A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). */ +/* Before entry with UPLO = 'U' or 'u', the leading n by n */ +/* upper triangular part of the array A must contain the upper */ +/* triangular matrix and the strictly lower triangular part of */ +/* A is not referenced. */ +/* Before entry with UPLO = 'L' or 'l', the leading n by n */ +/* lower triangular part of the array A must contain the lower */ +/* triangular matrix and the strictly upper triangular part of */ +/* A is not referenced. */ +/* Note that when DIAG = 'U' or 'u', the diagonal elements of */ +/* A are not referenced either, but are assumed to be unity. */ +/* Unchanged on exit. */ + +/* LDA - INTEGER. */ +/* On entry, LDA specifies the first dimension of A as declared */ +/* in the calling (sub) program. LDA must be at least */ +/* max( 1, n ). */ +/* Unchanged on exit. */ + +/* X - DOUBLE PRECISION array of dimension at least */ +/* ( 1 + ( n - 1 )*abs( INCX ) ). */ +/* Before entry, the incremented array X must contain the n */ +/* element vector x. On exit, X is overwritten with the */ +/* tranformed vector x. */ + +/* INCX - INTEGER. */ +/* On entry, INCX specifies the increment for the elements of */ +/* X. INCX must not be zero. */ +/* Unchanged on exit. */ + + +/* Level 2 Blas routine. */ + +/* -- Written on 22-October-1986. */ +/* Jack Dongarra, Argonne National Lab. */ +/* Jeremy Du Croz, Nag Central Office. */ +/* Sven Hammarling, Nag Central Office. */ +/* Richard Hanson, Sandia National Labs. */ + + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --x; + + /* Function Body */ + info = 0; + if (! _starpu_lsame_(uplo, "U") && ! _starpu_lsame_(uplo, "L")) { + info = 1; + } else if (! _starpu_lsame_(trans, "N") && ! _starpu_lsame_(trans, + "T") && ! _starpu_lsame_(trans, "C")) { + info = 2; + } else if (! _starpu_lsame_(diag, "U") && ! _starpu_lsame_(diag, + "N")) { + info = 3; + } else if (*n < 0) { + info = 4; + } else if (*lda < max(1,*n)) { + info = 6; + } else if (*incx == 0) { + info = 8; + } + if (info != 0) { + _starpu_xerbla_("DTRMV ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0) { + return 0; + } + + nounit = _starpu_lsame_(diag, "N"); + +/* Set up the start point in X if the increment is not unity. This */ +/* will be ( N - 1 )*INCX too small for descending loops. */ + + if (*incx <= 0) { + kx = 1 - (*n - 1) * *incx; + } else if (*incx != 1) { + kx = 1; + } + +/* Start the operations. In this version the elements of A are */ +/* accessed sequentially with one pass through A. */ + + if (_starpu_lsame_(trans, "N")) { + +/* Form x := A*x. */ + + if (_starpu_lsame_(uplo, "U")) { + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[j] != 0.) { + temp = x[j]; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + x[i__] += temp * a[i__ + j * a_dim1]; +/* L10: */ + } + if (nounit) { + x[j] *= a[j + j * a_dim1]; + } + } +/* L20: */ + } + } else { + jx = kx; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[jx] != 0.) { + temp = x[jx]; + ix = kx; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + x[ix] += temp * a[i__ + j * a_dim1]; + ix += *incx; +/* L30: */ + } + if (nounit) { + x[jx] *= a[j + j * a_dim1]; + } + } + jx += *incx; +/* L40: */ + } + } + } else { + if (*incx == 1) { + for (j = *n; j >= 1; --j) { + if (x[j] != 0.) { + temp = x[j]; + i__1 = j + 1; + for (i__ = *n; i__ >= i__1; --i__) { + x[i__] += temp * a[i__ + j * a_dim1]; +/* L50: */ + } + if (nounit) { + x[j] *= a[j + j * a_dim1]; + } + } +/* L60: */ + } + } else { + kx += (*n - 1) * *incx; + jx = kx; + for (j = *n; j >= 1; --j) { + if (x[jx] != 0.) { + temp = x[jx]; + ix = kx; + i__1 = j + 1; + for (i__ = *n; i__ >= i__1; --i__) { + x[ix] += temp * a[i__ + j * a_dim1]; + ix -= *incx; +/* L70: */ + } + if (nounit) { + x[jx] *= a[j + j * a_dim1]; + } + } + jx -= *incx; +/* L80: */ + } + } + } + } else { + +/* Form x := A'*x. */ + + if (_starpu_lsame_(uplo, "U")) { + if (*incx == 1) { + for (j = *n; j >= 1; --j) { + temp = x[j]; + if (nounit) { + temp *= a[j + j * a_dim1]; + } + for (i__ = j - 1; i__ >= 1; --i__) { + temp += a[i__ + j * a_dim1] * x[i__]; +/* L90: */ + } + x[j] = temp; +/* L100: */ + } + } else { + jx = kx + (*n - 1) * *incx; + for (j = *n; j >= 1; --j) { + temp = x[jx]; + ix = jx; + if (nounit) { + temp *= a[j + j * a_dim1]; + } + for (i__ = j - 1; i__ >= 1; --i__) { + ix -= *incx; + temp += a[i__ + j * a_dim1] * x[ix]; +/* L110: */ + } + x[jx] = temp; + jx -= *incx; +/* L120: */ + } + } + } else { + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp = x[j]; + if (nounit) { + temp *= a[j + j * a_dim1]; + } + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + temp += a[i__ + j * a_dim1] * x[i__]; +/* L130: */ + } + x[j] = temp; +/* L140: */ + } + } else { + jx = kx; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp = x[jx]; + ix = jx; + if (nounit) { + temp *= a[j + j * a_dim1]; + } + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + ix += *incx; + temp += a[i__ + j * a_dim1] * x[ix]; +/* L150: */ + } + x[jx] = temp; + jx += *incx; +/* L160: */ + } + } + } + } + + return 0; + +/* End of DTRMV . */ + +} /* _starpu_dtrmv_ */ diff --git a/min-dgels/additional/dtrsm.c b/min-dgels/additional/dtrsm.c new file mode 100644 index 0000000..546cd79 --- /dev/null +++ b/min-dgels/additional/dtrsm.c @@ -0,0 +1,490 @@ +/* dtrsm.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dtrsm_(char *side, char *uplo, char *transa, char *diag, + integer *m, integer *n, doublereal *alpha, doublereal *a, integer * + lda, doublereal *b, integer *ldb) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3; + + /* Local variables */ + integer i__, j, k, info; + doublereal temp; + logical lside; + extern logical _starpu_lsame_(char *, char *); + integer nrowa; + logical upper; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + logical nounit; + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DTRSM solves one of the matrix equations */ + +/* op( A )*X = alpha*B, or X*op( A ) = alpha*B, */ + +/* where alpha is a scalar, X and B are m by n matrices, A is a unit, or */ +/* non-unit, upper or lower triangular matrix and op( A ) is one of */ + +/* op( A ) = A or op( A ) = A'. */ + +/* The matrix X is overwritten on B. */ + +/* Arguments */ +/* ========== */ + +/* SIDE - CHARACTER*1. */ +/* On entry, SIDE specifies whether op( A ) appears on the left */ +/* or right of X as follows: */ + +/* SIDE = 'L' or 'l' op( A )*X = alpha*B. */ + +/* SIDE = 'R' or 'r' X*op( A ) = alpha*B. */ + +/* Unchanged on exit. */ + +/* UPLO - CHARACTER*1. */ +/* On entry, UPLO specifies whether the matrix A is an upper or */ +/* lower triangular matrix as follows: */ + +/* UPLO = 'U' or 'u' A is an upper triangular matrix. */ + +/* UPLO = 'L' or 'l' A is a lower triangular matrix. */ + +/* Unchanged on exit. */ + +/* TRANSA - CHARACTER*1. */ +/* On entry, TRANSA specifies the form of op( A ) to be used in */ +/* the matrix multiplication as follows: */ + +/* TRANSA = 'N' or 'n' op( A ) = A. */ + +/* TRANSA = 'T' or 't' op( A ) = A'. */ + +/* TRANSA = 'C' or 'c' op( A ) = A'. */ + +/* Unchanged on exit. */ + +/* DIAG - CHARACTER*1. */ +/* On entry, DIAG specifies whether or not A is unit triangular */ +/* as follows: */ + +/* DIAG = 'U' or 'u' A is assumed to be unit triangular. */ + +/* DIAG = 'N' or 'n' A is not assumed to be unit */ +/* triangular. */ + +/* Unchanged on exit. */ + +/* M - INTEGER. */ +/* On entry, M specifies the number of rows of B. M must be at */ +/* least zero. */ +/* Unchanged on exit. */ + +/* N - INTEGER. */ +/* On entry, N specifies the number of columns of B. N must be */ +/* at least zero. */ +/* Unchanged on exit. */ + +/* ALPHA - DOUBLE PRECISION. */ +/* On entry, ALPHA specifies the scalar alpha. When alpha is */ +/* zero then A is not referenced and B need not be set before */ +/* entry. */ +/* Unchanged on exit. */ + +/* A - DOUBLE PRECISION array of DIMENSION ( LDA, k ), where k is m */ +/* when SIDE = 'L' or 'l' and is n when SIDE = 'R' or 'r'. */ +/* Before entry with UPLO = 'U' or 'u', the leading k by k */ +/* upper triangular part of the array A must contain the upper */ +/* triangular matrix and the strictly lower triangular part of */ +/* A is not referenced. */ +/* Before entry with UPLO = 'L' or 'l', the leading k by k */ +/* lower triangular part of the array A must contain the lower */ +/* triangular matrix and the strictly upper triangular part of */ +/* A is not referenced. */ +/* Note that when DIAG = 'U' or 'u', the diagonal elements of */ +/* A are not referenced either, but are assumed to be unity. */ +/* Unchanged on exit. */ + +/* LDA - INTEGER. */ +/* On entry, LDA specifies the first dimension of A as declared */ +/* in the calling (sub) program. When SIDE = 'L' or 'l' then */ +/* LDA must be at least max( 1, m ), when SIDE = 'R' or 'r' */ +/* then LDA must be at least max( 1, n ). */ +/* Unchanged on exit. */ + +/* B - DOUBLE PRECISION array of DIMENSION ( LDB, n ). */ +/* Before entry, the leading m by n part of the array B must */ +/* contain the right-hand side matrix B, and on exit is */ +/* overwritten by the solution matrix X. */ + +/* LDB - INTEGER. */ +/* On entry, LDB specifies the first dimension of B as declared */ +/* in the calling (sub) program. LDB must be at least */ +/* max( 1, m ). */ +/* Unchanged on exit. */ + + +/* Level 3 Blas routine. */ + + +/* -- Written on 8-February-1989. */ +/* Jack Dongarra, Argonne National Laboratory. */ +/* Iain Duff, AERE Harwell. */ +/* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ +/* Sven Hammarling, Numerical Algorithms Group Ltd. */ + + +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Parameters .. */ +/* .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + lside = _starpu_lsame_(side, "L"); + if (lside) { + nrowa = *m; + } else { + nrowa = *n; + } + nounit = _starpu_lsame_(diag, "N"); + upper = _starpu_lsame_(uplo, "U"); + + info = 0; + if (! lside && ! _starpu_lsame_(side, "R")) { + info = 1; + } else if (! upper && ! _starpu_lsame_(uplo, "L")) { + info = 2; + } else if (! _starpu_lsame_(transa, "N") && ! _starpu_lsame_(transa, + "T") && ! _starpu_lsame_(transa, "C")) { + info = 3; + } else if (! _starpu_lsame_(diag, "U") && ! _starpu_lsame_(diag, + "N")) { + info = 4; + } else if (*m < 0) { + info = 5; + } else if (*n < 0) { + info = 6; + } else if (*lda < max(1,nrowa)) { + info = 9; + } else if (*ldb < max(1,*m)) { + info = 11; + } + if (info != 0) { + _starpu_xerbla_("DTRSM ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*m == 0 || *n == 0) { + return 0; + } + +/* And when alpha.eq.zero. */ + + if (*alpha == 0.) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = 0.; +/* L10: */ + } +/* L20: */ + } + return 0; + } + +/* Start the operations. */ + + if (lside) { + if (_starpu_lsame_(transa, "N")) { + +/* Form B := alpha*inv( A )*B. */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*alpha != 1.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1] + ; +/* L30: */ + } + } + for (k = *m; k >= 1; --k) { + if (b[k + j * b_dim1] != 0.) { + if (nounit) { + b[k + j * b_dim1] /= a[k + k * a_dim1]; + } + i__2 = k - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] -= b[k + j * b_dim1] * a[ + i__ + k * a_dim1]; +/* L40: */ + } + } +/* L50: */ + } +/* L60: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*alpha != 1.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1] + ; +/* L70: */ + } + } + i__2 = *m; + for (k = 1; k <= i__2; ++k) { + if (b[k + j * b_dim1] != 0.) { + if (nounit) { + b[k + j * b_dim1] /= a[k + k * a_dim1]; + } + i__3 = *m; + for (i__ = k + 1; i__ <= i__3; ++i__) { + b[i__ + j * b_dim1] -= b[k + j * b_dim1] * a[ + i__ + k * a_dim1]; +/* L80: */ + } + } +/* L90: */ + } +/* L100: */ + } + } + } else { + +/* Form B := alpha*inv( A' )*B. */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp = *alpha * b[i__ + j * b_dim1]; + i__3 = i__ - 1; + for (k = 1; k <= i__3; ++k) { + temp -= a[k + i__ * a_dim1] * b[k + j * b_dim1]; +/* L110: */ + } + if (nounit) { + temp /= a[i__ + i__ * a_dim1]; + } + b[i__ + j * b_dim1] = temp; +/* L120: */ + } +/* L130: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + for (i__ = *m; i__ >= 1; --i__) { + temp = *alpha * b[i__ + j * b_dim1]; + i__2 = *m; + for (k = i__ + 1; k <= i__2; ++k) { + temp -= a[k + i__ * a_dim1] * b[k + j * b_dim1]; +/* L140: */ + } + if (nounit) { + temp /= a[i__ + i__ * a_dim1]; + } + b[i__ + j * b_dim1] = temp; +/* L150: */ + } +/* L160: */ + } + } + } + } else { + if (_starpu_lsame_(transa, "N")) { + +/* Form B := alpha*B*inv( A ). */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*alpha != 1.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1] + ; +/* L170: */ + } + } + i__2 = j - 1; + for (k = 1; k <= i__2; ++k) { + if (a[k + j * a_dim1] != 0.) { + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + b[i__ + j * b_dim1] -= a[k + j * a_dim1] * b[ + i__ + k * b_dim1]; +/* L180: */ + } + } +/* L190: */ + } + if (nounit) { + temp = 1. / a[j + j * a_dim1]; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1]; +/* L200: */ + } + } +/* L210: */ + } + } else { + for (j = *n; j >= 1; --j) { + if (*alpha != 1.) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1] + ; +/* L220: */ + } + } + i__1 = *n; + for (k = j + 1; k <= i__1; ++k) { + if (a[k + j * a_dim1] != 0.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] -= a[k + j * a_dim1] * b[ + i__ + k * b_dim1]; +/* L230: */ + } + } +/* L240: */ + } + if (nounit) { + temp = 1. / a[j + j * a_dim1]; + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1]; +/* L250: */ + } + } +/* L260: */ + } + } + } else { + +/* Form B := alpha*B*inv( A' ). */ + + if (upper) { + for (k = *n; k >= 1; --k) { + if (nounit) { + temp = 1. / a[k + k * a_dim1]; + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1]; +/* L270: */ + } + } + i__1 = k - 1; + for (j = 1; j <= i__1; ++j) { + if (a[j + k * a_dim1] != 0.) { + temp = a[j + k * a_dim1]; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] -= temp * b[i__ + k * + b_dim1]; +/* L280: */ + } + } +/* L290: */ + } + if (*alpha != 1.) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + b[i__ + k * b_dim1] = *alpha * b[i__ + k * b_dim1] + ; +/* L300: */ + } + } +/* L310: */ + } + } else { + i__1 = *n; + for (k = 1; k <= i__1; ++k) { + if (nounit) { + temp = 1. / a[k + k * a_dim1]; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1]; +/* L320: */ + } + } + i__2 = *n; + for (j = k + 1; j <= i__2; ++j) { + if (a[j + k * a_dim1] != 0.) { + temp = a[j + k * a_dim1]; + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + b[i__ + j * b_dim1] -= temp * b[i__ + k * + b_dim1]; +/* L330: */ + } + } +/* L340: */ + } + if (*alpha != 1.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + k * b_dim1] = *alpha * b[i__ + k * b_dim1] + ; +/* L350: */ + } + } +/* L360: */ + } + } + } + } + + return 0; + +/* End of DTRSM . */ + +} /* _starpu_dtrsm_ */ diff --git a/min-dgels/additional/dtrtrs.c b/min-dgels/additional/dtrtrs.c new file mode 100644 index 0000000..6b41d70 --- /dev/null +++ b/min-dgels/additional/dtrtrs.c @@ -0,0 +1,183 @@ +/* dtrtrs.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static doublereal c_b12 = 1.; + +/* Subroutine */ int _starpu_dtrtrs_(char *uplo, char *trans, char *diag, integer *n, + integer *nrhs, doublereal *a, integer *lda, doublereal *b, integer * + ldb, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1; + + /* Local variables */ + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int _starpu_dtrsm_(char *, char *, char *, char *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *), _starpu_xerbla_( + char *, integer *); + logical nounit; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DTRTRS solves a triangular system of the form */ + +/* A * X = B or A**T * X = B, */ + +/* where A is a triangular matrix of order N, and B is an N-by-NRHS */ +/* matrix. A check is made to verify that A is nonsingular. */ + +/* Arguments */ +/* ========= */ + +/* UPLO (input) CHARACTER*1 */ +/* = 'U': A is upper triangular; */ +/* = 'L': A is lower triangular. */ + +/* TRANS (input) CHARACTER*1 */ +/* Specifies the form of the system of equations: */ +/* = 'N': A * X = B (No transpose) */ +/* = 'T': A**T * X = B (Transpose) */ +/* = 'C': A**H * X = B (Conjugate transpose = Transpose) */ + +/* DIAG (input) CHARACTER*1 */ +/* = 'N': A is non-unit triangular; */ +/* = 'U': A is unit triangular. */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* NRHS (input) INTEGER */ +/* The number of right hand sides, i.e., the number of columns */ +/* of the matrix B. NRHS >= 0. */ + +/* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ +/* The triangular matrix A. If UPLO = 'U', the leading N-by-N */ +/* upper triangular part of the array A contains the upper */ +/* triangular matrix, and the strictly lower triangular part of */ +/* A is not referenced. If UPLO = 'L', the leading N-by-N lower */ +/* triangular part of the array A contains the lower triangular */ +/* matrix, and the strictly upper triangular part of A is not */ +/* referenced. If DIAG = 'U', the diagonal elements of A are */ +/* also not referenced and are assumed to be 1. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,N). */ + +/* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* On entry, the right hand side matrix B. */ +/* On exit, if INFO = 0, the solution matrix X. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the array B. LDB >= max(1,N). */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > 0: if INFO = i, the i-th diagonal element of A is zero, */ +/* indicating that the matrix is singular and the solutions */ +/* X have not been computed. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + *info = 0; + nounit = _starpu_lsame_(diag, "N"); + if (! _starpu_lsame_(uplo, "U") && ! _starpu_lsame_(uplo, "L")) { + *info = -1; + } else if (! _starpu_lsame_(trans, "N") && ! _starpu_lsame_(trans, + "T") && ! _starpu_lsame_(trans, "C")) { + *info = -2; + } else if (! nounit && ! _starpu_lsame_(diag, "U")) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*nrhs < 0) { + *info = -5; + } else if (*lda < max(1,*n)) { + *info = -7; + } else if (*ldb < max(1,*n)) { + *info = -9; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DTRTRS", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Check for singularity. */ + + if (nounit) { + i__1 = *n; + for (*info = 1; *info <= i__1; ++(*info)) { + if (a[*info + *info * a_dim1] == 0.) { + return 0; + } +/* L10: */ + } + } + *info = 0; + +/* Solve A * x = b or A' * x = b. */ + + _starpu_dtrsm_("Left", uplo, trans, diag, n, nrhs, &c_b12, &a[a_offset], lda, &b[ + b_offset], ldb); + + return 0; + +/* End of DTRTRS */ + +} /* _starpu_dtrtrs_ */ diff --git a/min-dgels/additional/f2c.h b/min-dgels/additional/f2c.h new file mode 100644 index 0000000..b94ee7c --- /dev/null +++ b/min-dgels/additional/f2c.h @@ -0,0 +1,223 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +typedef long int integer; +typedef unsigned long int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +typedef long int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; +#ifdef INTEGER_STAR_8 /* Adjust for integer*8. */ +typedef long long longint; /* system-dependent */ +typedef unsigned long long ulongint; /* system-dependent */ +#define qbit_clear(a,b) ((a) & ~((ulongint)1 << (b))) +#define qbit_set(a,b) ((a) | ((ulongint)1 << (b))) +#endif + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +#ifdef f2c_i2 +/* for -i2 */ +typedef short flag; +typedef short ftnlen; +typedef short ftnint; +#else +typedef long int flag; +typedef long int ftnlen; +typedef long int ftnint; +#endif + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +/*typedef long int Long;*/ /* No longer used; formerly in Namelist */ + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (doublereal)abs(x) +#define min(a,b) ((a) <= (b) ? (a) : (b)) +#define max(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (doublereal)min(a,b) +#define dmax(a,b) (doublereal)max(a,b) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef int /* Unknown procedure type */ (*U_fp)(...); +typedef shortint (*J_fp)(...); +typedef integer (*I_fp)(...); +typedef real (*R_fp)(...); +typedef doublereal (*D_fp)(...), (*E_fp)(...); +typedef /* Complex */ VOID (*C_fp)(...); +typedef /* Double Complex */ VOID (*Z_fp)(...); +typedef logical (*L_fp)(...); +typedef shortlogical (*K_fp)(...); +typedef /* Character */ VOID (*H_fp)(...); +typedef /* Subroutine */ int (*S_fp)(...); +#else +typedef int /* Unknown procedure type */ (*U_fp)(); +typedef shortint (*J_fp)(); +typedef integer (*I_fp)(); +typedef real (*R_fp)(); +typedef doublereal (*D_fp)(), (*E_fp)(); +typedef /* Complex */ VOID (*C_fp)(); +typedef /* Double Complex */ VOID (*Z_fp)(); +typedef logical (*L_fp)(); +typedef shortlogical (*K_fp)(); +typedef /* Character */ VOID (*H_fp)(); +typedef /* Subroutine */ int (*S_fp)(); +#endif +/* E_fp is for real functions when -R is not specified */ +typedef VOID C_f; /* complex function */ +typedef VOID H_f; /* character function */ +typedef VOID Z_f; /* double complex function */ +typedef doublereal E_f; /* real function with -R not specified */ + +/* undef any lower-case symbols that your C compiler predefines, e.g.: */ + +#ifndef Skip_f2c_Undefs +#undef cray +#undef gcos +#undef mc68010 +#undef mc68020 +#undef mips +#undef pdp11 +#undef sgi +#undef sparc +#undef sun +#undef sun2 +#undef sun3 +#undef sun4 +#undef u370 +#undef u3b +#undef u3b2 +#undef u3b5 +#undef unix +#undef vax +#endif +#endif diff --git a/min-dgels/additional/fio.h b/min-dgels/additional/fio.h new file mode 100644 index 0000000..ebf7696 --- /dev/null +++ b/min-dgels/additional/fio.h @@ -0,0 +1,141 @@ +#ifndef SYSDEP_H_INCLUDED +#include "sysdep1.h" +#endif +#include "stdio.h" +#include "errno.h" +#ifndef NULL +/* ANSI C */ +#include "stddef.h" +#endif + +#ifndef SEEK_SET +#define SEEK_SET 0 +#define SEEK_CUR 1 +#define SEEK_END 2 +#endif + +#ifndef FOPEN +#define FOPEN fopen +#endif + +#ifndef FREOPEN +#define FREOPEN freopen +#endif + +#ifndef FSEEK +#define FSEEK fseek +#endif + +#ifndef FSTAT +#define FSTAT fstat +#endif + +#ifndef FTELL +#define FTELL ftell +#endif + +#ifndef OFF_T +#define OFF_T long +#endif + +#ifndef STAT_ST +#define STAT_ST stat +#endif + +#ifndef STAT +#define STAT stat +#endif + +#ifdef MSDOS +#ifndef NON_UNIX_STDIO +#define NON_UNIX_STDIO +#endif +#endif + +#ifdef UIOLEN_int +typedef int uiolen; +#else +typedef long uiolen; +#endif + +/*units*/ +typedef struct +{ FILE *ufd; /*0=unconnected*/ + char *ufnm; +#ifndef MSDOS + long uinode; + int udev; +#endif + int url; /*0=sequential*/ + flag useek; /*true=can backspace, use dir, ...*/ + flag ufmt; + flag urw; /* (1 for can read) | (2 for can write) */ + flag ublnk; + flag uend; + flag uwrt; /*last io was write*/ + flag uscrtch; +} unit; + +#undef Void +#ifdef KR_headers +#define Void /*void*/ +extern int (*f__getn)(); /* for formatted input */ +extern void (*f__putn)(); /* for formatted output */ +extern void x_putc(); +extern long f__inode(); +extern VOID sig_die(); +extern int (*f__donewrec)(), t_putc(), x_wSL(); +extern int c_sfe(), err__fl(), xrd_SL(), f__putbuf(); +#else +#define Void void +#ifdef __cplusplus +extern "C" { +#endif +extern int (*f__getn)(void); /* for formatted input */ +extern void (*f__putn)(int); /* for formatted output */ +extern void x_putc(int); +extern long f__inode(char*,int*); +extern void sig_die(const char*,int); +extern void f__fatal(int, const char*); +extern int t_runc(alist*); +extern int f__nowreading(unit*), f__nowwriting(unit*); +extern int fk_open(int,int,ftnint); +extern int en_fio(void); +extern void f_init(void); +extern int (*f__donewrec)(void), t_putc(int), x_wSL(void); +extern void b_char(const char*,char*,ftnlen), g_char(const char*,ftnlen,char*); +extern int c_sfe(cilist*), z_rnew(void); +extern int err__fl(int,int,const char*); +extern int xrd_SL(void); +extern int f__putbuf(int); +#endif +extern flag f__init; +extern cilist *f__elist; /*active external io list*/ +extern flag f__reading,f__external,f__sequential,f__formatted; +extern int (*f__doend)(Void); +extern FILE *f__cf; /*current file*/ +extern unit *f__curunit; /*current unit*/ +extern unit f__units[]; +#define err(f,m,s) {if(f) errno= m; else f__fatal(m,s); return(m);} +#define errfl(f,m,s) return err__fl((int)f,m,s) + +/*Table sizes*/ +#define MXUNIT 100 + +extern int f__recpos; /*position in current record*/ +extern OFF_T f__cursor; /* offset to move to */ +extern OFF_T f__hiwater; /* so TL doesn't confuse us */ +#ifdef __cplusplus + } +#endif + +#define WRITE 1 +#define READ 2 +#define SEQ 3 +#define DIR 4 +#define FMT 5 +#define UNF 6 +#define EXT 7 +#define INT 8 + +#define buf_end(x) (x->_flag & _IONBF ? x->_ptr : x->_base + BUFSIZ) diff --git a/min-dgels/additional/fmt.c b/min-dgels/additional/fmt.c new file mode 100644 index 0000000..286c98f --- /dev/null +++ b/min-dgels/additional/fmt.c @@ -0,0 +1,530 @@ +#include "f2c.h" +#include "fio.h" +#include "fmt.h" +#ifdef __cplusplus +extern "C" { +#endif +#define skip(s) while(*s==' ') s++ +#ifdef interdata +#define SYLMX 300 +#endif +#ifdef pdp11 +#define SYLMX 300 +#endif +#ifdef vax +#define SYLMX 300 +#endif +#ifndef SYLMX +#define SYLMX 300 +#endif +#define GLITCH '\2' + /* special quote character for stu */ +extern flag f__cblank,f__cplus; /*blanks in I and compulsory plus*/ +static struct syl f__syl[SYLMX]; +int f__parenlvl,f__pc,f__revloc; +#ifdef KR_headers +#define Const /*nothing*/ +#else +#define Const const +#endif + + static +#ifdef KR_headers +char *ap_end(s) char *s; +#else +const char *ap_end(const char *s) +#endif +{ char quote; + quote= *s++; + for(;*s;s++) + { if(*s!=quote) continue; + if(*++s!=quote) return(s); + } + if(f__elist->cierr) { + errno = 100; + return(NULL); + } + f__fatal(100, "bad string"); + /*NOTREACHED*/ return 0; +} + static int +#ifdef KR_headers +op_gen(a,b,c,d) +#else +op_gen(int a, int b, int c, int d) +#endif +{ struct syl *p= &f__syl[f__pc]; + if(f__pc>=SYLMX) + { fprintf(stderr,"format too complicated:\n"); + sig_die(f__fmtbuf, 1); + } + p->op=a; + p->p1=b; + p->p2.i[0]=c; + p->p2.i[1]=d; + return(f__pc++); +} +#ifdef KR_headers +static char *f_list(); +static char *gt_num(s,n,n1) char *s; int *n, n1; +#else +static const char *f_list(const char*); +static const char *gt_num(const char *s, int *n, int n1) +#endif +{ int m=0,f__cnt=0; + char c; + for(c= *s;;c = *s) + { if(c==' ') + { s++; + continue; + } + if(c>'9' || c<'0') break; + m=10*m+c-'0'; + f__cnt++; + s++; + } + if(f__cnt==0) { + if (!n1) + s = 0; + *n=n1; + } + else *n=m; + return(s); +} + + static +#ifdef KR_headers +char *f_s(s,curloc) char *s; +#else +const char *f_s(const char *s, int curloc) +#endif +{ + skip(s); + if(*s++!='(') + { + return(NULL); + } + if(f__parenlvl++ ==1) f__revloc=curloc; + if(op_gen(RET1,curloc,0,0)<0 || + (s=f_list(s))==NULL) + { + return(NULL); + } + skip(s); + return(s); +} + + static int +#ifdef KR_headers +ne_d(s,p) char *s,**p; +#else +ne_d(const char *s, const char **p) +#endif +{ int n,x,sign=0; + struct syl *sp; + switch(*s) + { + default: + return(0); + case ':': (void) op_gen(COLON,0,0,0); break; + case '$': + (void) op_gen(NONL, 0, 0, 0); break; + case 'B': + case 'b': + if(*++s=='z' || *s == 'Z') (void) op_gen(BZ,0,0,0); + else (void) op_gen(BN,0,0,0); + break; + case 'S': + case 's': + if(*(s+1)=='s' || *(s+1) == 'S') + { x=SS; + s++; + } + else if(*(s+1)=='p' || *(s+1) == 'P') + { x=SP; + s++; + } + else x=S; + (void) op_gen(x,0,0,0); + break; + case '/': (void) op_gen(SLASH,0,0,0); break; + case '-': sign=1; + case '+': s++; /*OUTRAGEOUS CODING TRICK*/ + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + if (!(s=gt_num(s,&n,0))) { + bad: *p = 0; + return 1; + } + switch(*s) + { + default: + return(0); + case 'P': + case 'p': if(sign) n= -n; (void) op_gen(P,n,0,0); break; + case 'X': + case 'x': (void) op_gen(X,n,0,0); break; + case 'H': + case 'h': + sp = &f__syl[op_gen(H,n,0,0)]; + sp->p2.s = (char*)s + 1; + s+=n; + break; + } + break; + case GLITCH: + case '"': + case '\'': + sp = &f__syl[op_gen(APOS,0,0,0)]; + sp->p2.s = (char*)s; + if((*p = ap_end(s)) == NULL) + return(0); + return(1); + case 'T': + case 't': + if(*(s+1)=='l' || *(s+1) == 'L') + { x=TL; + s++; + } + else if(*(s+1)=='r'|| *(s+1) == 'R') + { x=TR; + s++; + } + else x=T; + if (!(s=gt_num(s+1,&n,0))) + goto bad; + s--; + (void) op_gen(x,n,0,0); + break; + case 'X': + case 'x': (void) op_gen(X,1,0,0); break; + case 'P': + case 'p': (void) op_gen(P,1,0,0); break; + } + s++; + *p=s; + return(1); +} + + static int +#ifdef KR_headers +e_d(s,p) char *s,**p; +#else +e_d(const char *s, const char **p) +#endif +{ int i,im,n,w,d,e,found=0,x=0; + Const char *sv=s; + s=gt_num(s,&n,1); + (void) op_gen(STACK,n,0,0); + switch(*s++) + { + default: break; + case 'E': + case 'e': x=1; + case 'G': + case 'g': + found=1; + if (!(s=gt_num(s,&w,0))) { + bad: + *p = 0; + return 1; + } + if(w==0) break; + if(*s=='.') { + if (!(s=gt_num(s+1,&d,0))) + goto bad; + } + else d=0; + if(*s!='E' && *s != 'e') + (void) op_gen(x==1?E:G,w,d,0); /* default is Ew.dE2 */ + else { + if (!(s=gt_num(s+1,&e,0))) + goto bad; + (void) op_gen(x==1?EE:GE,w,d,e); + } + break; + case 'O': + case 'o': + i = O; + im = OM; + goto finish_I; + case 'Z': + case 'z': + i = Z; + im = ZM; + goto finish_I; + case 'L': + case 'l': + found=1; + if (!(s=gt_num(s,&w,0))) + goto bad; + if(w==0) break; + (void) op_gen(L,w,0,0); + break; + case 'A': + case 'a': + found=1; + skip(s); + if(*s>='0' && *s<='9') + { s=gt_num(s,&w,1); + if(w==0) break; + (void) op_gen(AW,w,0,0); + break; + } + (void) op_gen(A,0,0,0); + break; + case 'F': + case 'f': + if (!(s=gt_num(s,&w,0))) + goto bad; + found=1; + if(w==0) break; + if(*s=='.') { + if (!(s=gt_num(s+1,&d,0))) + goto bad; + } + else d=0; + (void) op_gen(F,w,d,0); + break; + case 'D': + case 'd': + found=1; + if (!(s=gt_num(s,&w,0))) + goto bad; + if(w==0) break; + if(*s=='.') { + if (!(s=gt_num(s+1,&d,0))) + goto bad; + } + else d=0; + (void) op_gen(D,w,d,0); + break; + case 'I': + case 'i': + i = I; + im = IM; + finish_I: + if (!(s=gt_num(s,&w,0))) + goto bad; + found=1; + if(w==0) break; + if(*s!='.') + { (void) op_gen(i,w,0,0); + break; + } + if (!(s=gt_num(s+1,&d,0))) + goto bad; + (void) op_gen(im,w,d,0); + break; + } + if(found==0) + { f__pc--; /*unSTACK*/ + *p=sv; + return(0); + } + *p=s; + return(1); +} + static +#ifdef KR_headers +char *i_tem(s) char *s; +#else +const char *i_tem(const char *s) +#endif +{ const char *t; + int n,curloc; + if(*s==')') return(s); + if(ne_d(s,&t)) return(t); + if(e_d(s,&t)) return(t); + s=gt_num(s,&n,1); + if((curloc=op_gen(STACK,n,0,0))<0) return(NULL); + return(f_s(s,curloc)); +} + + static +#ifdef KR_headers +char *f_list(s) char *s; +#else +const char *f_list(const char *s) +#endif +{ + for(;*s!=0;) + { skip(s); + if((s=i_tem(s))==NULL) return(NULL); + skip(s); + if(*s==',') s++; + else if(*s==')') + { if(--f__parenlvl==0) + { + (void) op_gen(REVERT,f__revloc,0,0); + return(++s); + } + (void) op_gen(GOTO,0,0,0); + return(++s); + } + } + return(NULL); +} + + int +#ifdef KR_headers +pars_f(s) char *s; +#else +pars_f(const char *s) +#endif +{ + f__parenlvl=f__revloc=f__pc=0; + if(f_s(s,0) == NULL) + { + return(-1); + } + return(0); +} +#define STKSZ 10 +int f__cnt[STKSZ],f__ret[STKSZ],f__cp,f__rp; +flag f__workdone, f__nonl; + + static int +#ifdef KR_headers +type_f(n) +#else +type_f(int n) +#endif +{ + switch(n) + { + default: + return(n); + case RET1: + return(RET1); + case REVERT: return(REVERT); + case GOTO: return(GOTO); + case STACK: return(STACK); + case X: + case SLASH: + case APOS: case H: + case T: case TL: case TR: + return(NED); + case F: + case I: + case IM: + case A: case AW: + case O: case OM: + case L: + case E: case EE: case D: + case G: case GE: + case Z: case ZM: + return(ED); + } +} +#ifdef KR_headers +integer do_fio(number,ptr,len) ftnint *number; ftnlen len; char *ptr; +#else +integer do_fio(ftnint *number, char *ptr, ftnlen len) +#endif +{ struct syl *p; + int n,i; + for(i=0;i<*number;i++,ptr+=len) + { +loop: switch(type_f((p= &f__syl[f__pc])->op)) + { + default: + fprintf(stderr,"unknown code in do_fio: %d\n%s\n", + p->op,f__fmtbuf); + err(f__elist->cierr,100,"do_fio"); + case NED: + if((*f__doned)(p)) + { f__pc++; + goto loop; + } + f__pc++; + continue; + case ED: + if(f__cnt[f__cp]<=0) + { f__cp--; + f__pc++; + goto loop; + } + if(ptr==NULL) + return((*f__doend)()); + f__cnt[f__cp]--; + f__workdone=1; + if((n=(*f__doed)(p,ptr,len))>0) + errfl(f__elist->cierr,errno,"fmt"); + if(n<0) + err(f__elist->ciend,(EOF),"fmt"); + continue; + case STACK: + f__cnt[++f__cp]=p->p1; + f__pc++; + goto loop; + case RET1: + f__ret[++f__rp]=p->p1; + f__pc++; + goto loop; + case GOTO: + if(--f__cnt[f__cp]<=0) + { f__cp--; + f__rp--; + f__pc++; + goto loop; + } + f__pc=1+f__ret[f__rp--]; + goto loop; + case REVERT: + f__rp=f__cp=0; + f__pc = p->p1; + if(ptr==NULL) + return((*f__doend)()); + if(!f__workdone) return(0); + if((n=(*f__dorevert)()) != 0) return(n); + goto loop; + case COLON: + if(ptr==NULL) + return((*f__doend)()); + f__pc++; + goto loop; + case NONL: + f__nonl = 1; + f__pc++; + goto loop; + case S: + case SS: + f__cplus=0; + f__pc++; + goto loop; + case SP: + f__cplus = 1; + f__pc++; + goto loop; + case P: f__scale=p->p1; + f__pc++; + goto loop; + case BN: + f__cblank=0; + f__pc++; + goto loop; + case BZ: + f__cblank=1; + f__pc++; + goto loop; + } + } + return(0); +} + + int +en_fio(Void) +{ ftnint one=1; + return(do_fio(&one,(char *)NULL,(ftnint)0)); +} + + VOID +fmt_bg(Void) +{ + f__workdone=f__cp=f__rp=f__pc=f__cursor=0; + f__cnt[0]=f__ret[0]=0; +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/additional/fmt.h b/min-dgels/additional/fmt.h new file mode 100644 index 0000000..ddfa551 --- /dev/null +++ b/min-dgels/additional/fmt.h @@ -0,0 +1,105 @@ +struct syl +{ int op; + int p1; + union { int i[2]; char *s;} p2; + }; +#define RET1 1 +#define REVERT 2 +#define GOTO 3 +#define X 4 +#define SLASH 5 +#define STACK 6 +#define I 7 +#define ED 8 +#define NED 9 +#define IM 10 +#define APOS 11 +#define H 12 +#define TL 13 +#define TR 14 +#define T 15 +#define COLON 16 +#define S 17 +#define SP 18 +#define SS 19 +#define P 20 +#define BN 21 +#define BZ 22 +#define F 23 +#define E 24 +#define EE 25 +#define D 26 +#define G 27 +#define GE 28 +#define L 29 +#define A 30 +#define AW 31 +#define O 32 +#define NONL 33 +#define OM 34 +#define Z 35 +#define ZM 36 +typedef union +{ real pf; + doublereal pd; +} ufloat; +typedef union +{ short is; +#ifndef KR_headers + signed +#endif + char ic; + integer il; +#ifdef Allow_TYQUAD + longint ili; +#endif +} Uint; +#ifdef KR_headers +extern int (*f__doed)(),(*f__doned)(); +extern int (*f__dorevert)(); +extern int rd_ed(),rd_ned(); +extern int w_ed(),w_ned(); +extern int signbit_f2c(); +extern char *f__fmtbuf; +#else +#ifdef __cplusplus +extern "C" { +#define Cextern extern "C" +#else +#define Cextern extern +#endif +extern const char *f__fmtbuf; +extern int (*f__doed)(struct syl*, char*, ftnlen),(*f__doned)(struct syl*); +extern int (*f__dorevert)(void); +extern void fmt_bg(void); +extern int pars_f(const char*); +extern int rd_ed(struct syl*, char*, ftnlen),rd_ned(struct syl*); +extern int signbit_f2c(double*); +extern int w_ed(struct syl*, char*, ftnlen),w_ned(struct syl*); +extern int wrt_E(ufloat*, int, int, int, ftnlen); +extern int wrt_F(ufloat*, int, int, ftnlen); +extern int wrt_L(Uint*, int, ftnlen); +#endif +extern int f__pc,f__parenlvl,f__revloc; +extern flag f__cblank,f__cplus,f__workdone, f__nonl; +extern int f__scale; +#ifdef __cplusplus + } +#endif +#define GET(x) if((x=(*f__getn)())<0) return(x) +#define VAL(x) (x!='\n'?x:' ') +#define PUT(x) (*f__putn)(x) + +#undef TYQUAD +#ifndef Allow_TYQUAD +#undef longint +#define longint long +#else +#define TYQUAD 14 +#endif + +#ifdef KR_headers +extern char *f__icvt(); +#else +Cextern char *f__icvt(longint, int*, int*, int); +#endif diff --git a/min-dgels/additional/ieeeck.c b/min-dgels/additional/ieeeck.c new file mode 100644 index 0000000..7172d72 --- /dev/null +++ b/min-dgels/additional/ieeeck.c @@ -0,0 +1,166 @@ +/* ieeeck.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +integer _starpu_ieeeck_(integer *ispec, real *zero, real *one) +{ + /* System generated locals */ + integer ret_val; + + /* Local variables */ + real nan1, nan2, nan3, nan4, nan5, nan6, neginf, posinf, negzro, newzro; + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* IEEECK is called from the ILAENV to verify that Infinity and */ +/* possibly NaN arithmetic is safe (i.e. will not trap). */ + +/* Arguments */ +/* ========= */ + +/* ISPEC (input) INTEGER */ +/* Specifies whether to test just for inifinity arithmetic */ +/* or whether to test for infinity and NaN arithmetic. */ +/* = 0: Verify infinity arithmetic only. */ +/* = 1: Verify infinity and NaN arithmetic. */ + +/* ZERO (input) REAL */ +/* Must contain the value 0.0 */ +/* This is passed to prevent the compiler from optimizing */ +/* away this code. */ + +/* ONE (input) REAL */ +/* Must contain the value 1.0 */ +/* This is passed to prevent the compiler from optimizing */ +/* away this code. */ + +/* RETURN VALUE: INTEGER */ +/* = 0: Arithmetic failed to produce the correct answers */ +/* = 1: Arithmetic produced the correct answers */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. Executable Statements .. */ + ret_val = 1; + + posinf = *one / *zero; + if (posinf <= *one) { + ret_val = 0; + return ret_val; + } + + neginf = -(*one) / *zero; + if (neginf >= *zero) { + ret_val = 0; + return ret_val; + } + + negzro = *one / (neginf + *one); + if (negzro != *zero) { + ret_val = 0; + return ret_val; + } + + neginf = *one / negzro; + if (neginf >= *zero) { + ret_val = 0; + return ret_val; + } + + newzro = negzro + *zero; + if (newzro != *zero) { + ret_val = 0; + return ret_val; + } + + posinf = *one / newzro; + if (posinf <= *one) { + ret_val = 0; + return ret_val; + } + + neginf *= posinf; + if (neginf >= *zero) { + ret_val = 0; + return ret_val; + } + + posinf *= posinf; + if (posinf <= *one) { + ret_val = 0; + return ret_val; + } + + + + +/* Return if we were only asked to check infinity arithmetic */ + + if (*ispec == 0) { + return ret_val; + } + + nan1 = posinf + neginf; + + nan2 = posinf / neginf; + + nan3 = posinf / posinf; + + nan4 = posinf * *zero; + + nan5 = neginf * negzro; + + nan6 = nan5 * 0.f; + + if (nan1 == nan1) { + ret_val = 0; + return ret_val; + } + + if (nan2 == nan2) { + ret_val = 0; + return ret_val; + } + + if (nan3 == nan3) { + ret_val = 0; + return ret_val; + } + + if (nan4 == nan4) { + ret_val = 0; + return ret_val; + } + + if (nan5 == nan5) { + ret_val = 0; + return ret_val; + } + + if (nan6 == nan6) { + ret_val = 0; + return ret_val; + } + + return ret_val; +} /* _starpu_ieeeck_ */ diff --git a/min-dgels/additional/iladlc.c b/min-dgels/additional/iladlc.c new file mode 100644 index 0000000..55c605f --- /dev/null +++ b/min-dgels/additional/iladlc.c @@ -0,0 +1,88 @@ +/* iladlc.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +integer _starpu_iladlc_(integer *m, integer *n, doublereal *a, integer *lda) +{ + /* System generated locals */ + integer a_dim1, a_offset, ret_val, i__1; + + /* Local variables */ + integer i__; + + +/* -- LAPACK auxiliary routine (version 3.2.1) -- */ + +/* -- April 2009 -- */ + +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* ILADLC scans A for its last non-zero column. */ + +/* Arguments */ +/* ========= */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix A. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix A. */ + +/* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ +/* The m by n matrix A. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,M). */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Quick test for the common case where one corner is non-zero. */ + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + if (*n == 0) { + ret_val = *n; + } else if (a[*n * a_dim1 + 1] != 0. || a[*m + *n * a_dim1] != 0.) { + ret_val = *n; + } else { +/* Now scan each column from the end, returning with the first non-zero. */ + for (ret_val = *n; ret_val >= 1; --ret_val) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + if (a[i__ + ret_val * a_dim1] != 0.) { + return ret_val; + } + } + } + } + return ret_val; +} /* _starpu_iladlc_ */ diff --git a/min-dgels/additional/iladlr.c b/min-dgels/additional/iladlr.c new file mode 100644 index 0000000..a0fc23e --- /dev/null +++ b/min-dgels/additional/iladlr.c @@ -0,0 +1,90 @@ +/* iladlr.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +integer _starpu_iladlr_(integer *m, integer *n, doublereal *a, integer *lda) +{ + /* System generated locals */ + integer a_dim1, a_offset, ret_val, i__1; + + /* Local variables */ + integer i__, j; + + +/* -- LAPACK auxiliary routine (version 3.2.1) -- */ + +/* -- April 2009 -- */ + +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* ILADLR scans A for its last non-zero row. */ + +/* Arguments */ +/* ========= */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix A. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix A. */ + +/* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ +/* The m by n matrix A. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,M). */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Quick test for the common case where one corner is non-zero. */ + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + if (*m == 0) { + ret_val = *m; + } else if (a[*m + a_dim1] != 0. || a[*m + *n * a_dim1] != 0.) { + ret_val = *m; + } else { +/* Scan up each column tracking the last zero row seen. */ + ret_val = 0; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + for (i__ = *m; i__ >= 1; --i__) { + if (a[i__ + j * a_dim1] != 0.) { + break; + } + } + ret_val = max(ret_val,i__); + } + } + return ret_val; +} /* _starpu_iladlr_ */ diff --git a/min-dgels/additional/ilaenv.c b/min-dgels/additional/ilaenv.c new file mode 100644 index 0000000..9d84120 --- /dev/null +++ b/min-dgels/additional/ilaenv.c @@ -0,0 +1,654 @@ +/* ilaenv.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" +#include "string.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static real c_b163 = 0.f; +static real c_b164 = 1.f; +static integer c__0 = 0; + +integer _starpu_ilaenv_(integer *ispec, char *name__, char *opts, integer *n1, + integer *n2, integer *n3, integer *n4) +{ + /* System generated locals */ + integer ret_val; + + /* Builtin functions */ + /* Subroutine */ int s_copy(char *, char *, ftnlen, ftnlen); + integer s_cmp(char *, char *, ftnlen, ftnlen); + + /* Local variables */ + integer i__; + char c1[1], c2[1], c3[1], c4[1]; + integer ic, nb, iz, nx; + logical cname; + integer nbmin; + logical sname; + extern integer _starpu_ieeeck_(integer *, real *, real *); + char subnam[1]; + extern integer _starpu_iparmq_(integer *, char *, char *, integer *, integer *, + integer *, integer *); + + ftnlen name_len, opts_len; + + name_len = strlen (name__); + opts_len = strlen (opts); + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* January 2007 */ + +/* .. Scalar Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* ILAENV is called from the LAPACK routines to choose problem-dependent */ +/* parameters for the local environment. See ISPEC for a description of */ +/* the parameters. */ + +/* ILAENV returns an INTEGER */ +/* if ILAENV >= 0: ILAENV returns the value of the parameter specified by ISPEC */ +/* if ILAENV < 0: if ILAENV = -k, the k-th argument had an illegal value. */ + +/* This version provides a set of parameters which should give good, */ +/* but not optimal, performance on many of the currently available */ +/* computers. Users are encouraged to modify this subroutine to set */ +/* the tuning parameters for their particular machine using the option */ +/* and problem size information in the arguments. */ + +/* This routine will not function correctly if it is converted to all */ +/* lower case. Converting it to all upper case is allowed. */ + +/* Arguments */ +/* ========= */ + +/* ISPEC (input) INTEGER */ +/* Specifies the parameter to be returned as the value of */ +/* ILAENV. */ +/* = 1: the optimal blocksize; if this value is 1, an unblocked */ +/* algorithm will give the best performance. */ +/* = 2: the minimum block size for which the block routine */ +/* should be used; if the usable block size is less than */ +/* this value, an unblocked routine should be used. */ +/* = 3: the crossover point (in a block routine, for N less */ +/* than this value, an unblocked routine should be used) */ +/* = 4: the number of shifts, used in the nonsymmetric */ +/* eigenvalue routines (DEPRECATED) */ +/* = 5: the minimum column dimension for blocking to be used; */ +/* rectangular blocks must have dimension at least k by m, */ +/* where k is given by ILAENV(2,...) and m by ILAENV(5,...) */ +/* = 6: the crossover point for the SVD (when reducing an m by n */ +/* matrix to bidiagonal form, if max(m,n)/min(m,n) exceeds */ +/* this value, a QR factorization is used first to reduce */ +/* the matrix to a triangular form.) */ +/* = 7: the number of processors */ +/* = 8: the crossover point for the multishift QR method */ +/* for nonsymmetric eigenvalue problems (DEPRECATED) */ +/* = 9: maximum size of the subproblems at the bottom of the */ +/* computation tree in the divide-and-conquer algorithm */ +/* (used by xGELSD and xGESDD) */ +/* =10: ieee NaN arithmetic can be trusted not to trap */ +/* =11: infinity arithmetic can be trusted not to trap */ +/* 12 <= ISPEC <= 16: */ +/* xHSEQR or one of its subroutines, */ +/* see IPARMQ for detailed explanation */ + +/* NAME (input) CHARACTER*(*) */ +/* The name of the calling subroutine, in either upper case or */ +/* lower case. */ + +/* OPTS (input) CHARACTER*(*) */ +/* The character options to the subroutine NAME, concatenated */ +/* into a single character string. For example, UPLO = 'U', */ +/* TRANS = 'T', and DIAG = 'N' for a triangular routine would */ +/* be specified as OPTS = 'UTN'. */ + +/* N1 (input) INTEGER */ +/* N2 (input) INTEGER */ +/* N3 (input) INTEGER */ +/* N4 (input) INTEGER */ +/* Problem dimensions for the subroutine NAME; these may not all */ +/* be required. */ + +/* Further Details */ +/* =============== */ + +/* The following conventions have been used when calling ILAENV from the */ +/* LAPACK routines: */ +/* 1) OPTS is a concatenation of all of the character options to */ +/* subroutine NAME, in the same order that they appear in the */ +/* argument list for NAME, even if they are not used in determining */ +/* the value of the parameter specified by ISPEC. */ +/* 2) The problem dimensions N1, N2, N3, N4 are specified in the order */ +/* that they appear in the argument list for NAME. N1 is used */ +/* first, N2 second, and so on, and unused problem dimensions are */ +/* passed a value of -1. */ +/* 3) The parameter value returned by ILAENV is checked for validity in */ +/* the calling subroutine. For example, ILAENV is used to retrieve */ +/* the optimal blocksize for STRTRI as follows: */ + +/* NB = ILAENV( 1, 'STRTRI', UPLO // DIAG, N, -1, -1, -1 ) */ +/* IF( NB.LE.1 ) NB = MAX( 1, N ) */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + switch (*ispec) { + case 1: goto L10; + case 2: goto L10; + case 3: goto L10; + case 4: goto L80; + case 5: goto L90; + case 6: goto L100; + case 7: goto L110; + case 8: goto L120; + case 9: goto L130; + case 10: goto L140; + case 11: goto L150; + case 12: goto L160; + case 13: goto L160; + case 14: goto L160; + case 15: goto L160; + case 16: goto L160; + } + +/* Invalid value for ISPEC */ + + ret_val = -1; + return ret_val; + +L10: + +/* Convert NAME to upper case if the first character is lower case. */ + + ret_val = 1; + s_copy(subnam, name__, (ftnlen)1, name_len); + ic = *(unsigned char *)subnam; + iz = 'Z'; + if (iz == 90 || iz == 122) { + +/* ASCII character set */ + + if (ic >= 97 && ic <= 122) { + *(unsigned char *)subnam = (char) (ic - 32); + for (i__ = 2; i__ <= 6; ++i__) { + ic = *(unsigned char *)&subnam[i__ - 1]; + if (ic >= 97 && ic <= 122) { + *(unsigned char *)&subnam[i__ - 1] = (char) (ic - 32); + } +/* L20: */ + } + } + + } else if (iz == 233 || iz == 169) { + +/* EBCDIC character set */ + + if (ic >= 129 && ic <= 137 || ic >= 145 && ic <= 153 || ic >= 162 && + ic <= 169) { + *(unsigned char *)subnam = (char) (ic + 64); + for (i__ = 2; i__ <= 6; ++i__) { + ic = *(unsigned char *)&subnam[i__ - 1]; + if (ic >= 129 && ic <= 137 || ic >= 145 && ic <= 153 || ic >= + 162 && ic <= 169) { + *(unsigned char *)&subnam[i__ - 1] = (char) (ic + 64); + } +/* L30: */ + } + } + + } else if (iz == 218 || iz == 250) { + +/* Prime machines: ASCII+128 */ + + if (ic >= 225 && ic <= 250) { + *(unsigned char *)subnam = (char) (ic - 32); + for (i__ = 2; i__ <= 6; ++i__) { + ic = *(unsigned char *)&subnam[i__ - 1]; + if (ic >= 225 && ic <= 250) { + *(unsigned char *)&subnam[i__ - 1] = (char) (ic - 32); + } +/* L40: */ + } + } + } + + *(unsigned char *)c1 = *(unsigned char *)subnam; + sname = *(unsigned char *)c1 == 'S' || *(unsigned char *)c1 == 'D'; + cname = *(unsigned char *)c1 == 'C' || *(unsigned char *)c1 == 'Z'; + if (! (cname || sname)) { + return ret_val; + } + s_copy(c2, subnam + 1, (ftnlen)1, (ftnlen)2); + s_copy(c3, subnam + 3, (ftnlen)1, (ftnlen)3); + s_copy(c4, c3 + 1, (ftnlen)1, (ftnlen)2); + + switch (*ispec) { + case 1: goto L50; + case 2: goto L60; + case 3: goto L70; + } + +L50: + +/* ISPEC = 1: block size */ + +/* In these examples, separate code is provided for setting NB for */ +/* real and complex. We assume that NB will take the same value in */ +/* single or double precision. */ + + nb = 1; + + if (s_cmp(c2, "GE", (ftnlen)1, (ftnlen)2) == 0) { + if (s_cmp(c3, "TRF", (ftnlen)1, (ftnlen)3) == 0) { + if (sname) { + nb = 64; + } else { + nb = 64; + } + } else if (s_cmp(c3, "QRF", (ftnlen)1, (ftnlen)3) == 0 || s_cmp(c3, + "RQF", (ftnlen)1, (ftnlen)3) == 0 || s_cmp(c3, "LQF", (ftnlen) + 1, (ftnlen)3) == 0 || s_cmp(c3, "QLF", (ftnlen)1, (ftnlen)3) + == 0) { + if (sname) { + nb = 32; + } else { + nb = 32; + } + } else if (s_cmp(c3, "HRD", (ftnlen)1, (ftnlen)3) == 0) { + if (sname) { + nb = 32; + } else { + nb = 32; + } + } else if (s_cmp(c3, "BRD", (ftnlen)1, (ftnlen)3) == 0) { + if (sname) { + nb = 32; + } else { + nb = 32; + } + } else if (s_cmp(c3, "TRI", (ftnlen)1, (ftnlen)3) == 0) { + if (sname) { + nb = 64; + } else { + nb = 64; + } + } + } else if (s_cmp(c2, "PO", (ftnlen)1, (ftnlen)2) == 0) { + if (s_cmp(c3, "TRF", (ftnlen)1, (ftnlen)3) == 0) { + if (sname) { + nb = 64; + } else { + nb = 64; + } + } + } else if (s_cmp(c2, "SY", (ftnlen)1, (ftnlen)2) == 0) { + if (s_cmp(c3, "TRF", (ftnlen)1, (ftnlen)3) == 0) { + if (sname) { + nb = 64; + } else { + nb = 64; + } + } else if (sname && s_cmp(c3, "TRD", (ftnlen)1, (ftnlen)3) == 0) { + nb = 32; + } else if (sname && s_cmp(c3, "GST", (ftnlen)1, (ftnlen)3) == 0) { + nb = 64; + } + } else if (cname && s_cmp(c2, "HE", (ftnlen)1, (ftnlen)2) == 0) { + if (s_cmp(c3, "TRF", (ftnlen)1, (ftnlen)3) == 0) { + nb = 64; + } else if (s_cmp(c3, "TRD", (ftnlen)1, (ftnlen)3) == 0) { + nb = 32; + } else if (s_cmp(c3, "GST", (ftnlen)1, (ftnlen)3) == 0) { + nb = 64; + } + } else if (sname && s_cmp(c2, "OR", (ftnlen)1, (ftnlen)2) == 0) { + if (*(unsigned char *)c3 == 'G') { + if (s_cmp(c4, "QR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "RQ", + (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)1, ( + ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)1, (ftnlen)2) == + 0 || s_cmp(c4, "HR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp( + c4, "TR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( + ftnlen)1, (ftnlen)2) == 0) { + nb = 32; + } + } else if (*(unsigned char *)c3 == 'M') { + if (s_cmp(c4, "QR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "RQ", + (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)1, ( + ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)1, (ftnlen)2) == + 0 || s_cmp(c4, "HR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp( + c4, "TR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( + ftnlen)1, (ftnlen)2) == 0) { + nb = 32; + } + } + } else if (cname && s_cmp(c2, "UN", (ftnlen)1, (ftnlen)2) == 0) { + if (*(unsigned char *)c3 == 'G') { + if (s_cmp(c4, "QR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "RQ", + (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)1, ( + ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)1, (ftnlen)2) == + 0 || s_cmp(c4, "HR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp( + c4, "TR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( + ftnlen)1, (ftnlen)2) == 0) { + nb = 32; + } + } else if (*(unsigned char *)c3 == 'M') { + if (s_cmp(c4, "QR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "RQ", + (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)1, ( + ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)1, (ftnlen)2) == + 0 || s_cmp(c4, "HR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp( + c4, "TR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( + ftnlen)1, (ftnlen)2) == 0) { + nb = 32; + } + } + } else if (s_cmp(c2, "GB", (ftnlen)1, (ftnlen)2) == 0) { + if (s_cmp(c3, "TRF", (ftnlen)1, (ftnlen)3) == 0) { + if (sname) { + if (*n4 <= 64) { + nb = 1; + } else { + nb = 32; + } + } else { + if (*n4 <= 64) { + nb = 1; + } else { + nb = 32; + } + } + } + } else if (s_cmp(c2, "PB", (ftnlen)1, (ftnlen)2) == 0) { + if (s_cmp(c3, "TRF", (ftnlen)1, (ftnlen)3) == 0) { + if (sname) { + if (*n2 <= 64) { + nb = 1; + } else { + nb = 32; + } + } else { + if (*n2 <= 64) { + nb = 1; + } else { + nb = 32; + } + } + } + } else if (s_cmp(c2, "TR", (ftnlen)1, (ftnlen)2) == 0) { + if (s_cmp(c3, "TRI", (ftnlen)1, (ftnlen)3) == 0) { + if (sname) { + nb = 64; + } else { + nb = 64; + } + } + } else if (s_cmp(c2, "LA", (ftnlen)1, (ftnlen)2) == 0) { + if (s_cmp(c3, "UUM", (ftnlen)1, (ftnlen)3) == 0) { + if (sname) { + nb = 64; + } else { + nb = 64; + } + } + } else if (sname && s_cmp(c2, "ST", (ftnlen)1, (ftnlen)2) == 0) { + if (s_cmp(c3, "EBZ", (ftnlen)1, (ftnlen)3) == 0) { + nb = 1; + } + } + ret_val = nb; + return ret_val; + +L60: + +/* ISPEC = 2: minimum block size */ + + nbmin = 2; + if (s_cmp(c2, "GE", (ftnlen)1, (ftnlen)2) == 0) { + if (s_cmp(c3, "QRF", (ftnlen)1, (ftnlen)3) == 0 || s_cmp(c3, "RQF", ( + ftnlen)1, (ftnlen)3) == 0 || s_cmp(c3, "LQF", (ftnlen)1, ( + ftnlen)3) == 0 || s_cmp(c3, "QLF", (ftnlen)1, (ftnlen)3) == 0) + { + if (sname) { + nbmin = 2; + } else { + nbmin = 2; + } + } else if (s_cmp(c3, "HRD", (ftnlen)1, (ftnlen)3) == 0) { + if (sname) { + nbmin = 2; + } else { + nbmin = 2; + } + } else if (s_cmp(c3, "BRD", (ftnlen)1, (ftnlen)3) == 0) { + if (sname) { + nbmin = 2; + } else { + nbmin = 2; + } + } else if (s_cmp(c3, "TRI", (ftnlen)1, (ftnlen)3) == 0) { + if (sname) { + nbmin = 2; + } else { + nbmin = 2; + } + } + } else if (s_cmp(c2, "SY", (ftnlen)1, (ftnlen)2) == 0) { + if (s_cmp(c3, "TRF", (ftnlen)1, (ftnlen)3) == 0) { + if (sname) { + nbmin = 8; + } else { + nbmin = 8; + } + } else if (sname && s_cmp(c3, "TRD", (ftnlen)1, (ftnlen)3) == 0) { + nbmin = 2; + } + } else if (cname && s_cmp(c2, "HE", (ftnlen)1, (ftnlen)2) == 0) { + if (s_cmp(c3, "TRD", (ftnlen)1, (ftnlen)3) == 0) { + nbmin = 2; + } + } else if (sname && s_cmp(c2, "OR", (ftnlen)1, (ftnlen)2) == 0) { + if (*(unsigned char *)c3 == 'G') { + if (s_cmp(c4, "QR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "RQ", + (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)1, ( + ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)1, (ftnlen)2) == + 0 || s_cmp(c4, "HR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp( + c4, "TR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( + ftnlen)1, (ftnlen)2) == 0) { + nbmin = 2; + } + } else if (*(unsigned char *)c3 == 'M') { + if (s_cmp(c4, "QR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "RQ", + (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)1, ( + ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)1, (ftnlen)2) == + 0 || s_cmp(c4, "HR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp( + c4, "TR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( + ftnlen)1, (ftnlen)2) == 0) { + nbmin = 2; + } + } + } else if (cname && s_cmp(c2, "UN", (ftnlen)1, (ftnlen)2) == 0) { + if (*(unsigned char *)c3 == 'G') { + if (s_cmp(c4, "QR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "RQ", + (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)1, ( + ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)1, (ftnlen)2) == + 0 || s_cmp(c4, "HR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp( + c4, "TR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( + ftnlen)1, (ftnlen)2) == 0) { + nbmin = 2; + } + } else if (*(unsigned char *)c3 == 'M') { + if (s_cmp(c4, "QR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "RQ", + (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)1, ( + ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)1, (ftnlen)2) == + 0 || s_cmp(c4, "HR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp( + c4, "TR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( + ftnlen)1, (ftnlen)2) == 0) { + nbmin = 2; + } + } + } + ret_val = nbmin; + return ret_val; + +L70: + +/* ISPEC = 3: crossover point */ + + nx = 0; + if (s_cmp(c2, "GE", (ftnlen)1, (ftnlen)2) == 0) { + if (s_cmp(c3, "QRF", (ftnlen)1, (ftnlen)3) == 0 || s_cmp(c3, "RQF", ( + ftnlen)1, (ftnlen)3) == 0 || s_cmp(c3, "LQF", (ftnlen)1, ( + ftnlen)3) == 0 || s_cmp(c3, "QLF", (ftnlen)1, (ftnlen)3) == 0) + { + if (sname) { + nx = 128; + } else { + nx = 128; + } + } else if (s_cmp(c3, "HRD", (ftnlen)1, (ftnlen)3) == 0) { + if (sname) { + nx = 128; + } else { + nx = 128; + } + } else if (s_cmp(c3, "BRD", (ftnlen)1, (ftnlen)3) == 0) { + if (sname) { + nx = 128; + } else { + nx = 128; + } + } + } else if (s_cmp(c2, "SY", (ftnlen)1, (ftnlen)2) == 0) { + if (sname && s_cmp(c3, "TRD", (ftnlen)1, (ftnlen)3) == 0) { + nx = 32; + } + } else if (cname && s_cmp(c2, "HE", (ftnlen)1, (ftnlen)2) == 0) { + if (s_cmp(c3, "TRD", (ftnlen)1, (ftnlen)3) == 0) { + nx = 32; + } + } else if (sname && s_cmp(c2, "OR", (ftnlen)1, (ftnlen)2) == 0) { + if (*(unsigned char *)c3 == 'G') { + if (s_cmp(c4, "QR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "RQ", + (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)1, ( + ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)1, (ftnlen)2) == + 0 || s_cmp(c4, "HR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp( + c4, "TR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( + ftnlen)1, (ftnlen)2) == 0) { + nx = 128; + } + } + } else if (cname && s_cmp(c2, "UN", (ftnlen)1, (ftnlen)2) == 0) { + if (*(unsigned char *)c3 == 'G') { + if (s_cmp(c4, "QR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "RQ", + (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)1, ( + ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)1, (ftnlen)2) == + 0 || s_cmp(c4, "HR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp( + c4, "TR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( + ftnlen)1, (ftnlen)2) == 0) { + nx = 128; + } + } + } + ret_val = nx; + return ret_val; + +L80: + +/* ISPEC = 4: number of shifts (used by xHSEQR) */ + + ret_val = 6; + return ret_val; + +L90: + +/* ISPEC = 5: minimum column dimension (not used) */ + + ret_val = 2; + return ret_val; + +L100: + +/* ISPEC = 6: crossover point for SVD (used by xGELSS and xGESVD) */ + + ret_val = (integer) ((real) min(*n1,*n2) * 1.6f); + return ret_val; + +L110: + +/* ISPEC = 7: number of processors (not used) */ + + ret_val = 1; + return ret_val; + +L120: + +/* ISPEC = 8: crossover point for multishift (used by xHSEQR) */ + + ret_val = 50; + return ret_val; + +L130: + +/* ISPEC = 9: maximum size of the subproblems at the bottom of the */ +/* computation tree in the divide-and-conquer algorithm */ +/* (used by xGELSD and xGESDD) */ + + ret_val = 25; + return ret_val; + +L140: + +/* ISPEC = 10: ieee NaN arithmetic can be trusted not to trap */ + +/* ILAENV = 0 */ + ret_val = 1; + if (ret_val == 1) { + ret_val = _starpu_ieeeck_(&c__1, &c_b163, &c_b164); + } + return ret_val; + +L150: + +/* ISPEC = 11: infinity arithmetic can be trusted not to trap */ + +/* ILAENV = 0 */ + ret_val = 1; + if (ret_val == 1) { + ret_val = _starpu_ieeeck_(&c__0, &c_b163, &c_b164); + } + return ret_val; + +L160: + +/* 12 <= ISPEC <= 16: xHSEQR or one of its subroutines. */ + + ret_val = _starpu_iparmq_(ispec, name__, opts, n1, n2, n3, n4) + ; + return ret_val; + +/* End of ILAENV */ + +} /* _starpu_ilaenv_ */ diff --git a/min-dgels/additional/iparmq.c b/min-dgels/additional/iparmq.c new file mode 100644 index 0000000..356c8e3 --- /dev/null +++ b/min-dgels/additional/iparmq.c @@ -0,0 +1,282 @@ +/* iparmq.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +integer _starpu_iparmq_(integer *ispec, char *name__, char *opts, integer *n, integer + *ilo, integer *ihi, integer *lwork) +{ + /* System generated locals */ + integer ret_val, i__1, i__2; + real r__1; + + /* Builtin functions */ + double log(doublereal); + integer i_nint(real *); + + /* Local variables */ + integer nh, ns; + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ + +/* Purpose */ +/* ======= */ + +/* This program sets problem and machine dependent parameters */ +/* useful for xHSEQR and its subroutines. It is called whenever */ +/* ILAENV is called with 12 <= ISPEC <= 16 */ + +/* Arguments */ +/* ========= */ + +/* ISPEC (input) integer scalar */ +/* ISPEC specifies which tunable parameter IPARMQ should */ +/* return. */ + +/* ISPEC=12: (INMIN) Matrices of order nmin or less */ +/* are sent directly to xLAHQR, the implicit */ +/* double shift QR algorithm. NMIN must be */ +/* at least 11. */ + +/* ISPEC=13: (INWIN) Size of the deflation window. */ +/* This is best set greater than or equal to */ +/* the number of simultaneous shifts NS. */ +/* Larger matrices benefit from larger deflation */ +/* windows. */ + +/* ISPEC=14: (INIBL) Determines when to stop nibbling and */ +/* invest in an (expensive) multi-shift QR sweep. */ +/* If the aggressive early deflation subroutine */ +/* finds LD converged eigenvalues from an order */ +/* NW deflation window and LD.GT.(NW*NIBBLE)/100, */ +/* then the next QR sweep is skipped and early */ +/* deflation is applied immediately to the */ +/* remaining active diagonal block. Setting */ +/* IPARMQ(ISPEC=14) = 0 causes TTQRE to skip a */ +/* multi-shift QR sweep whenever early deflation */ +/* finds a converged eigenvalue. Setting */ +/* IPARMQ(ISPEC=14) greater than or equal to 100 */ +/* prevents TTQRE from skipping a multi-shift */ +/* QR sweep. */ + +/* ISPEC=15: (NSHFTS) The number of simultaneous shifts in */ +/* a multi-shift QR iteration. */ + +/* ISPEC=16: (IACC22) IPARMQ is set to 0, 1 or 2 with the */ +/* following meanings. */ +/* 0: During the multi-shift QR sweep, */ +/* xLAQR5 does not accumulate reflections and */ +/* does not use matrix-matrix multiply to */ +/* update the far-from-diagonal matrix */ +/* entries. */ +/* 1: During the multi-shift QR sweep, */ +/* xLAQR5 and/or xLAQRaccumulates reflections and uses */ +/* matrix-matrix multiply to update the */ +/* far-from-diagonal matrix entries. */ +/* 2: During the multi-shift QR sweep. */ +/* xLAQR5 accumulates reflections and takes */ +/* advantage of 2-by-2 block structure during */ +/* matrix-matrix multiplies. */ +/* (If xTRMM is slower than xGEMM, then */ +/* IPARMQ(ISPEC=16)=1 may be more efficient than */ +/* IPARMQ(ISPEC=16)=2 despite the greater level of */ +/* arithmetic work implied by the latter choice.) */ + +/* NAME (input) character string */ +/* Name of the calling subroutine */ + +/* OPTS (input) character string */ +/* This is a concatenation of the string arguments to */ +/* TTQRE. */ + +/* N (input) integer scalar */ +/* N is the order of the Hessenberg matrix H. */ + +/* ILO (input) INTEGER */ +/* IHI (input) INTEGER */ +/* It is assumed that H is already upper triangular */ +/* in rows and columns 1:ILO-1 and IHI+1:N. */ + +/* LWORK (input) integer scalar */ +/* The amount of workspace available. */ + +/* Further Details */ +/* =============== */ + +/* Little is known about how best to choose these parameters. */ +/* It is possible to use different values of the parameters */ +/* for each of CHSEQR, DHSEQR, SHSEQR and ZHSEQR. */ + +/* It is probably best to choose different parameters for */ +/* different matrices and different parameters at different */ +/* times during the iteration, but this has not been */ +/* implemented --- yet. */ + + +/* The best choices of most of the parameters depend */ +/* in an ill-understood way on the relative execution */ +/* rate of xLAQR3 and xLAQR5 and on the nature of each */ +/* particular eigenvalue problem. Experiment may be the */ +/* only practical way to determine which choices are most */ +/* effective. */ + +/* Following is a list of default values supplied by IPARMQ. */ +/* These defaults may be adjusted in order to attain better */ +/* performance in any particular computational environment. */ + +/* IPARMQ(ISPEC=12) The xLAHQR vs xLAQR0 crossover point. */ +/* Default: 75. (Must be at least 11.) */ + +/* IPARMQ(ISPEC=13) Recommended deflation window size. */ +/* This depends on ILO, IHI and NS, the */ +/* number of simultaneous shifts returned */ +/* by IPARMQ(ISPEC=15). The default for */ +/* (IHI-ILO+1).LE.500 is NS. The default */ +/* for (IHI-ILO+1).GT.500 is 3*NS/2. */ + +/* IPARMQ(ISPEC=14) Nibble crossover point. Default: 14. */ + +/* IPARMQ(ISPEC=15) Number of simultaneous shifts, NS. */ +/* a multi-shift QR iteration. */ + +/* If IHI-ILO+1 is ... */ + +/* greater than ...but less ... the */ +/* or equal to ... than default is */ + +/* 0 30 NS = 2+ */ +/* 30 60 NS = 4+ */ +/* 60 150 NS = 10 */ +/* 150 590 NS = ** */ +/* 590 3000 NS = 64 */ +/* 3000 6000 NS = 128 */ +/* 6000 infinity NS = 256 */ + +/* (+) By default matrices of this order are */ +/* passed to the implicit double shift routine */ +/* xLAHQR. See IPARMQ(ISPEC=12) above. These */ +/* values of NS are used only in case of a rare */ +/* xLAHQR failure. */ + +/* (**) The asterisks (**) indicate an ad-hoc */ +/* function increasing from 10 to 64. */ + +/* IPARMQ(ISPEC=16) Select structured matrix multiply. */ +/* (See ISPEC=16 above for details.) */ +/* Default: 3. */ + +/* ================================================================ */ +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + if (*ispec == 15 || *ispec == 13 || *ispec == 16) { + +/* ==== Set the number simultaneous shifts ==== */ + + nh = *ihi - *ilo + 1; + ns = 2; + if (nh >= 30) { + ns = 4; + } + if (nh >= 60) { + ns = 10; + } + if (nh >= 150) { +/* Computing MAX */ + r__1 = log((real) nh) / log(2.f); + i__1 = 10, i__2 = nh / i_nint(&r__1); + ns = max(i__1,i__2); + } + if (nh >= 590) { + ns = 64; + } + if (nh >= 3000) { + ns = 128; + } + if (nh >= 6000) { + ns = 256; + } +/* Computing MAX */ + i__1 = 2, i__2 = ns - ns % 2; + ns = max(i__1,i__2); + } + + if (*ispec == 12) { + + +/* ===== Matrices of order smaller than NMIN get sent */ +/* . to xLAHQR, the classic double shift algorithm. */ +/* . This must be at least 11. ==== */ + + ret_val = 75; + + } else if (*ispec == 14) { + +/* ==== INIBL: skip a multi-shift qr iteration and */ +/* . whenever aggressive early deflation finds */ +/* . at least (NIBBLE*(window size)/100) deflations. ==== */ + + ret_val = 14; + + } else if (*ispec == 15) { + +/* ==== NSHFTS: The number of simultaneous shifts ===== */ + + ret_val = ns; + + } else if (*ispec == 13) { + +/* ==== NW: deflation window size. ==== */ + + if (nh <= 500) { + ret_val = ns; + } else { + ret_val = ns * 3 / 2; + } + + } else if (*ispec == 16) { + +/* ==== IACC22: Whether to accumulate reflections */ +/* . before updating the far-from-diagonal elements */ +/* . and whether to use 2-by-2 block structure while */ +/* . doing it. A small amount of work could be saved */ +/* . by making this choice dependent also upon the */ +/* . NH=IHI-ILO+1. */ + + ret_val = 0; + if (ns >= 14) { + ret_val = 1; + } + if (ns >= 14) { + ret_val = 2; + } + + } else { +/* ===== invalid value of ispec ===== */ + ret_val = -1; + + } + +/* ==== End of IPARMQ ==== */ + + return ret_val; +} /* _starpu_iparmq_ */ diff --git a/min-dgels/additional/lsame.c b/min-dgels/additional/lsame.c new file mode 100644 index 0000000..3bec9b3 --- /dev/null +++ b/min-dgels/additional/lsame.c @@ -0,0 +1,117 @@ +/* lsame.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +logical _starpu_lsame_(char *ca, char *cb) +{ + /* System generated locals */ + logical ret_val; + + /* Local variables */ + integer inta, intb, zcode; + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* LSAME returns .TRUE. if CA is the same letter as CB regardless of */ +/* case. */ + +/* Arguments */ +/* ========= */ + +/* CA (input) CHARACTER*1 */ +/* CB (input) CHARACTER*1 */ +/* CA and CB specify the single characters to be compared. */ + +/* ===================================================================== */ + +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test if the characters are equal */ + + ret_val = *(unsigned char *)ca == *(unsigned char *)cb; + if (ret_val) { + return ret_val; + } + +/* Now test for equivalence if both characters are alphabetic. */ + + zcode = 'Z'; + +/* Use 'Z' rather than 'A' so that ASCII can be detected on Prime */ +/* machines, on which ICHAR returns a value with bit 8 set. */ +/* ICHAR('A') on Prime machines returns 193 which is the same as */ +/* ICHAR('A') on an EBCDIC machine. */ + + inta = *(unsigned char *)ca; + intb = *(unsigned char *)cb; + + if (zcode == 90 || zcode == 122) { + +/* ASCII is assumed - ZCODE is the ASCII code of either lower or */ +/* upper case 'Z'. */ + + if (inta >= 97 && inta <= 122) { + inta += -32; + } + if (intb >= 97 && intb <= 122) { + intb += -32; + } + + } else if (zcode == 233 || zcode == 169) { + +/* EBCDIC is assumed - ZCODE is the EBCDIC code of either lower or */ +/* upper case 'Z'. */ + + if (inta >= 129 && inta <= 137 || inta >= 145 && inta <= 153 || inta + >= 162 && inta <= 169) { + inta += 64; + } + if (intb >= 129 && intb <= 137 || intb >= 145 && intb <= 153 || intb + >= 162 && intb <= 169) { + intb += 64; + } + + } else if (zcode == 218 || zcode == 250) { + +/* ASCII is assumed, on Prime machines - ZCODE is the ASCII code */ +/* plus 128 of either lower or upper case 'Z'. */ + + if (inta >= 225 && inta <= 250) { + inta += -32; + } + if (intb >= 225 && intb <= 250) { + intb += -32; + } + } + ret_val = inta == intb; + +/* RETURN */ + +/* End of LSAME */ + + return ret_val; +} /* _starpu_lsame_ */ diff --git a/min-dgels/additional/mindgels.h b/min-dgels/additional/mindgels.h new file mode 100644 index 0000000..db2a51b --- /dev/null +++ b/min-dgels/additional/mindgels.h @@ -0,0 +1,8 @@ +#ifndef DGELS_H +#define DGELS_H + +#include "f2c.h" + +int _starpu_dgels_(char *trans, integer *m, integer *n, integer *nrhs, doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal *work, integer *lwork, integer *info); + +#endif diff --git a/min-dgels/additional/pow_di.c b/min-dgels/additional/pow_di.c new file mode 100644 index 0000000..abf36cb --- /dev/null +++ b/min-dgels/additional/pow_di.c @@ -0,0 +1,41 @@ +#include "f2c.h" +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef KR_headers +double pow_di(ap, bp) doublereal *ap; integer *bp; +#else +double pow_di(doublereal *ap, integer *bp) +#endif +{ +double pow, x; +integer n; +unsigned long u; + +pow = 1; +x = *ap; +n = *bp; + +if(n != 0) + { + if(n < 0) + { + n = -n; + x = 1/x; + } + for(u = n; ; ) + { + if(u & 01) + pow *= x; + if(u >>= 1) + x *= x; + else + break; + } + } +return(pow); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/additional/s_cat.c b/min-dgels/additional/s_cat.c new file mode 100644 index 0000000..8d92a63 --- /dev/null +++ b/min-dgels/additional/s_cat.c @@ -0,0 +1,86 @@ +/* Unless compiled with -DNO_OVERWRITE, this variant of s_cat allows the + * target of a concatenation to appear on its right-hand side (contrary + * to the Fortran 77 Standard, but in accordance with Fortran 90). + */ + +#include "f2c.h" +#ifndef NO_OVERWRITE +#include "stdio.h" +#undef abs +#ifdef KR_headers + extern char *F77_aloc(); + extern void free(); + extern void exit_(); +#else +#undef min +#undef max +#include "stdlib.h" +extern +#ifdef __cplusplus + "C" +#endif + char *F77_aloc(ftnlen, const char*); +#endif +#include "string.h" +#endif /* NO_OVERWRITE */ + +#ifdef __cplusplus +extern "C" { +#endif + + VOID +#ifdef KR_headers +s_cat(lp, rpp, rnp, np, ll) char *lp, *rpp[]; ftnint rnp[], *np; ftnlen ll; +#else +s_cat(char *lp, char *rpp[], ftnint rnp[], ftnint *np, ftnlen ll) +#endif +{ + ftnlen i, nc; + char *rp; + ftnlen n = *np; +#ifndef NO_OVERWRITE + ftnlen L, m; + char *lp0, *lp1; + + lp0 = 0; + lp1 = lp; + L = ll; + i = 0; + while(i < n) { + rp = rpp[i]; + m = rnp[i++]; + if (rp >= lp1 || rp + m <= lp) { + if ((L -= m) <= 0) { + n = i; + break; + } + lp1 += m; + continue; + } + lp0 = lp; + lp = lp1 = F77_aloc(L = ll, "s_cat"); + break; + } + lp1 = lp; +#endif /* NO_OVERWRITE */ + for(i = 0 ; i < n ; ++i) { + nc = ll; + if(rnp[i] < nc) + nc = rnp[i]; + ll -= nc; + rp = rpp[i]; + while(--nc >= 0) + *lp++ = *rp++; + } + while(--ll >= 0) + *lp++ = ' '; +#ifndef NO_OVERWRITE + if (lp0) { + memcpy(lp0, lp1, L); + free(lp1); + } +#endif + } +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/additional/sysdep1.h b/min-dgels/additional/sysdep1.h new file mode 100644 index 0000000..4c026a2 --- /dev/null +++ b/min-dgels/additional/sysdep1.h @@ -0,0 +1,66 @@ +#ifndef SYSDEP_H_INCLUDED +#define SYSDEP_H_INCLUDED +#undef USE_LARGEFILE +#ifndef NO_LONG_LONG + +#ifdef __sun__ +#define USE_LARGEFILE +#define OFF_T off64_t +#endif + +#ifdef __linux__ +#define USE_LARGEFILE +#define OFF_T __off64_t +#endif + +#ifdef _AIX43 +#define _LARGE_FILES +#define _LARGE_FILE_API +#define USE_LARGEFILE +#endif /*_AIX43*/ + +#ifdef __hpux +#define _FILE64 +#define _LARGEFILE64_SOURCE +#define USE_LARGEFILE +#endif /*__hpux*/ + +#ifdef __sgi +#define USE_LARGEFILE +#endif /*__sgi*/ + +#ifdef __FreeBSD__ +#define OFF_T off_t +#define FSEEK fseeko +#define FTELL ftello +#endif + +#ifdef USE_LARGEFILE +#ifndef OFF_T +#define OFF_T off64_t +#endif +#define _LARGEFILE_SOURCE +#define _LARGEFILE64_SOURCE +#include +#include +#define FOPEN fopen64 +#define FREOPEN freopen64 +#define FSEEK fseeko64 +#define FSTAT fstat64 +#define FTELL ftello64 +#define FTRUNCATE ftruncate64 +#define STAT stat64 +#define STAT_ST stat64 +#endif /*USE_LARGEFILE*/ +#endif /*NO_LONG_LONG*/ + +#ifndef NON_UNIX_STDIO +#ifndef USE_LARGEFILE +#define _INCLUDE_POSIX_SOURCE /* for HP-UX */ +#define _INCLUDE_XOPEN_SOURCE /* for HP-UX */ +#include "sys/types.h" +#include "sys/stat.h" +#endif +#endif + +#endif /*SYSDEP_H_INCLUDED*/ diff --git a/min-dgels/additional/wsfe.c b/min-dgels/additional/wsfe.c new file mode 100644 index 0000000..8709f3b --- /dev/null +++ b/min-dgels/additional/wsfe.c @@ -0,0 +1,78 @@ +/*write sequential formatted external*/ +#include "f2c.h" +#include "fio.h" +#include "fmt.h" +#ifdef __cplusplus +extern "C" { +#endif + + int +x_wSL(Void) +{ + int n = f__putbuf('\n'); + f__hiwater = f__recpos = f__cursor = 0; + return(n == 0); +} + + static int +xw_end(Void) +{ + int n; + + if(f__nonl) { + f__putbuf(n = 0); + fflush(f__cf); + } + else + n = f__putbuf('\n'); + f__hiwater = f__recpos = f__cursor = 0; + return n; +} + + static int +xw_rev(Void) +{ + int n = 0; + if(f__workdone) { + n = f__putbuf('\n'); + f__workdone = 0; + } + f__hiwater = f__recpos = f__cursor = 0; + return n; +} + +#ifdef KR_headers +integer s_wsfe(a) cilist *a; /*start*/ +#else +integer s_wsfe(cilist *a) /*start*/ +#endif +{ int n; + if(!f__init) f_init(); + f__reading=0; + f__sequential=1; + f__formatted=1; + f__external=1; + if(n=c_sfe(a)) return(n); + f__elist=a; + f__hiwater = f__cursor=f__recpos=0; + f__nonl = 0; + f__scale=0; + f__fmtbuf=a->cifmt; + f__cf=f__curunit->ufd; + if(pars_f(f__fmtbuf)<0) err(a->cierr,100,"startio"); + f__putn= x_putc; + f__doed= w_ed; + f__doned= w_ned; + f__doend=xw_end; + f__dorevert=xw_rev; + f__donewrec=x_wSL; + fmt_bg(); + f__cplus=0; + f__cblank=f__curunit->ublnk; + if(f__curunit->uwrt != 1 && f__nowwriting(f__curunit)) + err(a->cierr,errno,"write start"); + return(0); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/additional/xerbla.c b/min-dgels/additional/xerbla.c new file mode 100644 index 0000000..e6dbc08 --- /dev/null +++ b/min-dgels/additional/xerbla.c @@ -0,0 +1,65 @@ +/* xerbla.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" +#include "stdio.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +/* Subroutine */ int _starpu_xerbla_(char *srname, integer *info) +{ + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* XERBLA is an error handler for the LAPACK routines. */ +/* It is called by an LAPACK routine if an input parameter has an */ +/* invalid value. A message is printed and execution stops. */ + +/* Installers may consider modifying the STOP statement in order to */ +/* call system-specific exception-handling facilities. */ + +/* Arguments */ +/* ========= */ + +/* SRNAME (input) CHARACTER*(*) */ +/* The name of the routine which called XERBLA. */ + +/* INFO (input) INTEGER */ +/* The position of the invalid parameter in the parameter list */ +/* of the calling routine. */ + +/* ===================================================================== */ + +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + printf("** On entry to %s, parameter number %ld had an illegal value\n", + srname, *info); + + +/* End of XERBLA */ + + return 0; +} /* _starpu_xerbla_ */ diff --git a/min-dgels/base/BLAS/SRC/Makefile b/min-dgels/base/BLAS/SRC/Makefile new file mode 100644 index 0000000..2412c94 --- /dev/null +++ b/min-dgels/base/BLAS/SRC/Makefile @@ -0,0 +1,115 @@ +TOPDIR=../.. +include $(TOPDIR)/make.inc + +####################################################################### +# This is the makefile to create a library for the BLAS. +# The files are grouped as follows: +# +# SBLAS1 -- Single precision real BLAS routines +# CBLAS1 -- Single precision complex BLAS routines +# DBLAS1 -- Double precision real BLAS routines +# ZBLAS1 -- Double precision complex BLAS routines +# +# CB1AUX -- Real BLAS routines called by complex routines +# ZB1AUX -- D.P. real BLAS routines called by d.p. complex +# routines +# +# ALLBLAS -- Auxiliary routines for Level 2 and 3 BLAS +# +# SBLAS2 -- Single precision real BLAS2 routines +# CBLAS2 -- Single precision complex BLAS2 routines +# DBLAS2 -- Double precision real BLAS2 routines +# ZBLAS2 -- Double precision complex BLAS2 routines +# +# SBLAS3 -- Single precision real BLAS3 routines +# CBLAS3 -- Single precision complex BLAS3 routines +# DBLAS3 -- Double precision real BLAS3 routines +# ZBLAS3 -- Double precision complex BLAS3 routines +# +# The library can be set up to include routines for any combination +# of the four precisions. To create or add to the library, enter make +# followed by one or more of the precisions desired. Some examples: +# make single +# make single complex +# make single double complex complex16 +# Note that these commands are not safe for parallel builds. +# +# Alternatively, the commands +# make all +# or +# make +# without any arguments creates a library of all four precisions. +# The name of the library is held in BLASLIB, which is set in the +# top-level make.inc +# +# To remove the object files after the library is created, enter +# make clean +# To force the source files to be recompiled, enter, for example, +# make single FRC=FRC +# +#--------------------------------------------------------------------- +# +# Edward Anderson, University of Tennessee +# March 26, 1990 +# Susan Ostrouchov, Last updated September 30, 1994 +# ejr, May 2006. +# +####################################################################### + +all: $(BLASLIB) + +#--------------------------------------------------------- +# Comment out the next 6 definitions if you already have +# the Level 1 BLAS. +#--------------------------------------------------------- + +DBLAS1 = idamax.o dasum.o daxpy.o dcopy.o ddot.o dnrm2.o \ + drot.o drotg.o dscal.o dsdot.o dswap.o drotmg.o drotm.o +$(DBLAS1): $(FRC) + +#--------------------------------------------------------------------- +# The following line defines auxiliary routines needed by both the +# Level 2 and Level 3 BLAS. Comment it out only if you already have +# both the Level 2 and 3 BLAS. +#--------------------------------------------------------------------- + +ALLBLAS = lsame.o xerbla.o xerbla_array.o +$(ALLBLAS) : $(FRC) + +#--------------------------------------------------------- +# Comment out the next 4 definitions if you already have +# the Level 2 BLAS. +#--------------------------------------------------------- + +DBLAS2 = dgemv.o dgbmv.o dsymv.o dsbmv.o dspmv.o \ + dtrmv.o dtbmv.o dtpmv.o dtrsv.o dtbsv.o dtpsv.o \ + dger.o dsyr.o dspr.o dsyr2.o dspr2.o +$(DBLAS2): $(FRC) + +#--------------------------------------------------------- +# Comment out the next 4 definitions if you already have +# the Level 3 BLAS. +#--------------------------------------------------------- + +DBLAS3 = dgemm.o dsymm.o dsyrk.o dsyr2k.o dtrmm.o dtrsm.o +$(DBLAS3): $(FRC) + +ALLOBJ= $(DBLAS1) $(DBLAS2) $(DBLAS3) $(ALLBLAS) + +$(BLASLIB): $(ALLOBJ) + $(ARCH) $(ARCHFLAGS) $@ $(ALLOBJ) + $(RANLIB) $@ + +double: $(DBLAS1) $(ALLBLAS) $(DBLAS2) $(DBLAS3) + $(ARCH) $(ARCHFLAGS) $(BLASLIB) $(DBLAS1) $(ALLBLAS) \ + $(DBLAS2) $(DBLAS3) + $(RANLIB) $(BLASLIB) + +FRC: + @FRC=$(FRC) + +clean: + rm -f *.o + +.c.o: + $(CC) $(CFLAGS) -c $< -o $@ diff --git a/min-dgels/base/BLAS/SRC/dasum.c b/min-dgels/base/BLAS/SRC/dasum.c new file mode 100644 index 0000000..c7de1d9 --- /dev/null +++ b/min-dgels/base/BLAS/SRC/dasum.c @@ -0,0 +1,101 @@ +/* dasum.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +doublereal _starpu_dasum_(integer *n, doublereal *dx, integer *incx) +{ + /* System generated locals */ + integer i__1, i__2; + doublereal ret_val, d__1, d__2, d__3, d__4, d__5, d__6; + + /* Local variables */ + integer i__, m, mp1; + doublereal dtemp; + integer nincx; + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* takes the sum of the absolute values. */ +/* jack dongarra, linpack, 3/11/78. */ +/* modified 3/93 to return if incx .le. 0. */ +/* modified 12/3/93, array(1) declarations changed to array(*) */ + + +/* .. Local Scalars .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ + /* Parameter adjustments */ + --dx; + + /* Function Body */ + ret_val = 0.; + dtemp = 0.; + if (*n <= 0 || *incx <= 0) { + return ret_val; + } + if (*incx == 1) { + goto L20; + } + +/* code for increment not equal to 1 */ + + nincx = *n * *incx; + i__1 = nincx; + i__2 = *incx; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + dtemp += (d__1 = dx[i__], abs(d__1)); +/* L10: */ + } + ret_val = dtemp; + return ret_val; + +/* code for increment equal to 1 */ + + +/* clean-up loop */ + +L20: + m = *n % 6; + if (m == 0) { + goto L40; + } + i__2 = m; + for (i__ = 1; i__ <= i__2; ++i__) { + dtemp += (d__1 = dx[i__], abs(d__1)); +/* L30: */ + } + if (*n < 6) { + goto L60; + } +L40: + mp1 = m + 1; + i__2 = *n; + for (i__ = mp1; i__ <= i__2; i__ += 6) { + dtemp = dtemp + (d__1 = dx[i__], abs(d__1)) + (d__2 = dx[i__ + 1], + abs(d__2)) + (d__3 = dx[i__ + 2], abs(d__3)) + (d__4 = dx[i__ + + 3], abs(d__4)) + (d__5 = dx[i__ + 4], abs(d__5)) + (d__6 = + dx[i__ + 5], abs(d__6)); +/* L50: */ + } +L60: + ret_val = dtemp; + return ret_val; +} /* _starpu_dasum_ */ diff --git a/min-dgels/base/BLAS/SRC/daxpy.c b/min-dgels/base/BLAS/SRC/daxpy.c new file mode 100644 index 0000000..b5a5268 --- /dev/null +++ b/min-dgels/base/BLAS/SRC/daxpy.c @@ -0,0 +1,107 @@ +/* daxpy.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_daxpy_(integer *n, doublereal *da, doublereal *dx, + integer *incx, doublereal *dy, integer *incy) +{ + /* System generated locals */ + integer i__1; + + /* Local variables */ + integer i__, m, ix, iy, mp1; + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* constant times a vector plus a vector. */ +/* uses unrolled loops for increments equal to one. */ +/* jack dongarra, linpack, 3/11/78. */ +/* modified 12/3/93, array(1) declarations changed to array(*) */ + + +/* .. Local Scalars .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ + /* Parameter adjustments */ + --dy; + --dx; + + /* Function Body */ + if (*n <= 0) { + return 0; + } + if (*da == 0.) { + return 0; + } + if (*incx == 1 && *incy == 1) { + goto L20; + } + +/* code for unequal increments or equal increments */ +/* not equal to 1 */ + + ix = 1; + iy = 1; + if (*incx < 0) { + ix = (-(*n) + 1) * *incx + 1; + } + if (*incy < 0) { + iy = (-(*n) + 1) * *incy + 1; + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + dy[iy] += *da * dx[ix]; + ix += *incx; + iy += *incy; +/* L10: */ + } + return 0; + +/* code for both increments equal to 1 */ + + +/* clean-up loop */ + +L20: + m = *n % 4; + if (m == 0) { + goto L40; + } + i__1 = m; + for (i__ = 1; i__ <= i__1; ++i__) { + dy[i__] += *da * dx[i__]; +/* L30: */ + } + if (*n < 4) { + return 0; + } +L40: + mp1 = m + 1; + i__1 = *n; + for (i__ = mp1; i__ <= i__1; i__ += 4) { + dy[i__] += *da * dx[i__]; + dy[i__ + 1] += *da * dx[i__ + 1]; + dy[i__ + 2] += *da * dx[i__ + 2]; + dy[i__ + 3] += *da * dx[i__ + 3]; +/* L50: */ + } + return 0; +} /* _starpu_daxpy_ */ diff --git a/min-dgels/base/BLAS/SRC/dcabs1.c b/min-dgels/base/BLAS/SRC/dcabs1.c new file mode 100644 index 0000000..f4a4486 --- /dev/null +++ b/min-dgels/base/BLAS/SRC/dcabs1.c @@ -0,0 +1,36 @@ +/* dcabs1.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +doublereal _starpu_dcabs1_(doublecomplex *z__) +{ + /* System generated locals */ + doublereal ret_val, d__1, d__2; + + /* Builtin functions */ + double d_imag(doublecomplex *); + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. */ +/* Purpose */ +/* ======= */ + +/* DCABS1 computes absolute value of a double complex number */ + +/* .. Intrinsic Functions .. */ + + ret_val = (d__1 = z__->r, abs(d__1)) + (d__2 = d_imag(z__), abs(d__2)); + return ret_val; +} /* _starpu_dcabs1_ */ diff --git a/min-dgels/base/BLAS/SRC/dcopy.c b/min-dgels/base/BLAS/SRC/dcopy.c new file mode 100644 index 0000000..2171077 --- /dev/null +++ b/min-dgels/base/BLAS/SRC/dcopy.c @@ -0,0 +1,107 @@ +/* dcopy.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dcopy_(integer *n, doublereal *dx, integer *incx, + doublereal *dy, integer *incy) +{ + /* System generated locals */ + integer i__1; + + /* Local variables */ + integer i__, m, ix, iy, mp1; + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* copies a vector, x, to a vector, y. */ +/* uses unrolled loops for increments equal to one. */ +/* jack dongarra, linpack, 3/11/78. */ +/* modified 12/3/93, array(1) declarations changed to array(*) */ + + +/* .. Local Scalars .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ + /* Parameter adjustments */ + --dy; + --dx; + + /* Function Body */ + if (*n <= 0) { + return 0; + } + if (*incx == 1 && *incy == 1) { + goto L20; + } + +/* code for unequal increments or equal increments */ +/* not equal to 1 */ + + ix = 1; + iy = 1; + if (*incx < 0) { + ix = (-(*n) + 1) * *incx + 1; + } + if (*incy < 0) { + iy = (-(*n) + 1) * *incy + 1; + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + dy[iy] = dx[ix]; + ix += *incx; + iy += *incy; +/* L10: */ + } + return 0; + +/* code for both increments equal to 1 */ + + +/* clean-up loop */ + +L20: + m = *n % 7; + if (m == 0) { + goto L40; + } + i__1 = m; + for (i__ = 1; i__ <= i__1; ++i__) { + dy[i__] = dx[i__]; +/* L30: */ + } + if (*n < 7) { + return 0; + } +L40: + mp1 = m + 1; + i__1 = *n; + for (i__ = mp1; i__ <= i__1; i__ += 7) { + dy[i__] = dx[i__]; + dy[i__ + 1] = dx[i__ + 1]; + dy[i__ + 2] = dx[i__ + 2]; + dy[i__ + 3] = dx[i__ + 3]; + dy[i__ + 4] = dx[i__ + 4]; + dy[i__ + 5] = dx[i__ + 5]; + dy[i__ + 6] = dx[i__ + 6]; +/* L50: */ + } + return 0; +} /* _starpu_dcopy_ */ diff --git a/min-dgels/base/BLAS/SRC/ddot.c b/min-dgels/base/BLAS/SRC/ddot.c new file mode 100644 index 0000000..08cb46e --- /dev/null +++ b/min-dgels/base/BLAS/SRC/ddot.c @@ -0,0 +1,110 @@ +/* ddot.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +doublereal _starpu_ddot_(integer *n, doublereal *dx, integer *incx, doublereal *dy, + integer *incy) +{ + /* System generated locals */ + integer i__1; + doublereal ret_val; + + /* Local variables */ + integer i__, m, ix, iy, mp1; + doublereal dtemp; + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* forms the dot product of two vectors. */ +/* uses unrolled loops for increments equal to one. */ +/* jack dongarra, linpack, 3/11/78. */ +/* modified 12/3/93, array(1) declarations changed to array(*) */ + + +/* .. Local Scalars .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ + /* Parameter adjustments */ + --dy; + --dx; + + /* Function Body */ + ret_val = 0.; + dtemp = 0.; + if (*n <= 0) { + return ret_val; + } + if (*incx == 1 && *incy == 1) { + goto L20; + } + +/* code for unequal increments or equal increments */ +/* not equal to 1 */ + + ix = 1; + iy = 1; + if (*incx < 0) { + ix = (-(*n) + 1) * *incx + 1; + } + if (*incy < 0) { + iy = (-(*n) + 1) * *incy + 1; + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + dtemp += dx[ix] * dy[iy]; + ix += *incx; + iy += *incy; +/* L10: */ + } + ret_val = dtemp; + return ret_val; + +/* code for both increments equal to 1 */ + + +/* clean-up loop */ + +L20: + m = *n % 5; + if (m == 0) { + goto L40; + } + i__1 = m; + for (i__ = 1; i__ <= i__1; ++i__) { + dtemp += dx[i__] * dy[i__]; +/* L30: */ + } + if (*n < 5) { + goto L60; + } +L40: + mp1 = m + 1; + i__1 = *n; + for (i__ = mp1; i__ <= i__1; i__ += 5) { + dtemp = dtemp + dx[i__] * dy[i__] + dx[i__ + 1] * dy[i__ + 1] + dx[ + i__ + 2] * dy[i__ + 2] + dx[i__ + 3] * dy[i__ + 3] + dx[i__ + + 4] * dy[i__ + 4]; +/* L50: */ + } +L60: + ret_val = dtemp; + return ret_val; +} /* _starpu_ddot_ */ diff --git a/min-dgels/base/BLAS/SRC/dgbmv.c b/min-dgels/base/BLAS/SRC/dgbmv.c new file mode 100644 index 0000000..3bb4355 --- /dev/null +++ b/min-dgels/base/BLAS/SRC/dgbmv.c @@ -0,0 +1,369 @@ +/* dgbmv.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dgbmv_(char *trans, integer *m, integer *n, integer *kl, + integer *ku, doublereal *alpha, doublereal *a, integer *lda, + doublereal *x, integer *incx, doublereal *beta, doublereal *y, + integer *incy) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6; + + /* Local variables */ + integer i__, j, k, ix, iy, jx, jy, kx, ky, kup1, info; + doublereal temp; + integer lenx, leny; + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DGBMV performs one of the matrix-vector operations */ + +/* y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y, */ + +/* where alpha and beta are scalars, x and y are vectors and A is an */ +/* m by n band matrix, with kl sub-diagonals and ku super-diagonals. */ + +/* Arguments */ +/* ========== */ + +/* TRANS - CHARACTER*1. */ +/* On entry, TRANS specifies the operation to be performed as */ +/* follows: */ + +/* TRANS = 'N' or 'n' y := alpha*A*x + beta*y. */ + +/* TRANS = 'T' or 't' y := alpha*A'*x + beta*y. */ + +/* TRANS = 'C' or 'c' y := alpha*A'*x + beta*y. */ + +/* Unchanged on exit. */ + +/* M - INTEGER. */ +/* On entry, M specifies the number of rows of the matrix A. */ +/* M must be at least zero. */ +/* Unchanged on exit. */ + +/* N - INTEGER. */ +/* On entry, N specifies the number of columns of the matrix A. */ +/* N must be at least zero. */ +/* Unchanged on exit. */ + +/* KL - INTEGER. */ +/* On entry, KL specifies the number of sub-diagonals of the */ +/* matrix A. KL must satisfy 0 .le. KL. */ +/* Unchanged on exit. */ + +/* KU - INTEGER. */ +/* On entry, KU specifies the number of super-diagonals of the */ +/* matrix A. KU must satisfy 0 .le. KU. */ +/* Unchanged on exit. */ + +/* ALPHA - DOUBLE PRECISION. */ +/* On entry, ALPHA specifies the scalar alpha. */ +/* Unchanged on exit. */ + +/* A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). */ +/* Before entry, the leading ( kl + ku + 1 ) by n part of the */ +/* array A must contain the matrix of coefficients, supplied */ +/* column by column, with the leading diagonal of the matrix in */ +/* row ( ku + 1 ) of the array, the first super-diagonal */ +/* starting at position 2 in row ku, the first sub-diagonal */ +/* starting at position 1 in row ( ku + 2 ), and so on. */ +/* Elements in the array A that do not correspond to elements */ +/* in the band matrix (such as the top left ku by ku triangle) */ +/* are not referenced. */ +/* The following program segment will transfer a band matrix */ +/* from conventional full matrix storage to band storage: */ + +/* DO 20, J = 1, N */ +/* K = KU + 1 - J */ +/* DO 10, I = MAX( 1, J - KU ), MIN( M, J + KL ) */ +/* A( K + I, J ) = matrix( I, J ) */ +/* 10 CONTINUE */ +/* 20 CONTINUE */ + +/* Unchanged on exit. */ + +/* LDA - INTEGER. */ +/* On entry, LDA specifies the first dimension of A as declared */ +/* in the calling (sub) program. LDA must be at least */ +/* ( kl + ku + 1 ). */ +/* Unchanged on exit. */ + +/* X - DOUBLE PRECISION array of DIMENSION at least */ +/* ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' */ +/* and at least */ +/* ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. */ +/* Before entry, the incremented array X must contain the */ +/* vector x. */ +/* Unchanged on exit. */ + +/* INCX - INTEGER. */ +/* On entry, INCX specifies the increment for the elements of */ +/* X. INCX must not be zero. */ +/* Unchanged on exit. */ + +/* BETA - DOUBLE PRECISION. */ +/* On entry, BETA specifies the scalar beta. When BETA is */ +/* supplied as zero then Y need not be set on input. */ +/* Unchanged on exit. */ + +/* Y - DOUBLE PRECISION array of DIMENSION at least */ +/* ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' */ +/* and at least */ +/* ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. */ +/* Before entry, the incremented array Y must contain the */ +/* vector y. On exit, Y is overwritten by the updated vector y. */ + +/* INCY - INTEGER. */ +/* On entry, INCY specifies the increment for the elements of */ +/* Y. INCY must not be zero. */ +/* Unchanged on exit. */ + + +/* Level 2 Blas routine. */ + +/* -- Written on 22-October-1986. */ +/* Jack Dongarra, Argonne National Lab. */ +/* Jeremy Du Croz, Nag Central Office. */ +/* Sven Hammarling, Nag Central Office. */ +/* Richard Hanson, Sandia National Labs. */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --x; + --y; + + /* Function Body */ + info = 0; + if (! _starpu_lsame_(trans, "N") && ! _starpu_lsame_(trans, "T") && ! _starpu_lsame_(trans, "C") + ) { + info = 1; + } else if (*m < 0) { + info = 2; + } else if (*n < 0) { + info = 3; + } else if (*kl < 0) { + info = 4; + } else if (*ku < 0) { + info = 5; + } else if (*lda < *kl + *ku + 1) { + info = 8; + } else if (*incx == 0) { + info = 10; + } else if (*incy == 0) { + info = 13; + } + if (info != 0) { + _starpu_xerbla_("DGBMV ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*m == 0 || *n == 0 || *alpha == 0. && *beta == 1.) { + return 0; + } + +/* Set LENX and LENY, the lengths of the vectors x and y, and set */ +/* up the start points in X and Y. */ + + if (_starpu_lsame_(trans, "N")) { + lenx = *n; + leny = *m; + } else { + lenx = *m; + leny = *n; + } + if (*incx > 0) { + kx = 1; + } else { + kx = 1 - (lenx - 1) * *incx; + } + if (*incy > 0) { + ky = 1; + } else { + ky = 1 - (leny - 1) * *incy; + } + +/* Start the operations. In this version the elements of A are */ +/* accessed sequentially with one pass through the band part of A. */ + +/* First form y := beta*y. */ + + if (*beta != 1.) { + if (*incy == 1) { + if (*beta == 0.) { + i__1 = leny; + for (i__ = 1; i__ <= i__1; ++i__) { + y[i__] = 0.; +/* L10: */ + } + } else { + i__1 = leny; + for (i__ = 1; i__ <= i__1; ++i__) { + y[i__] = *beta * y[i__]; +/* L20: */ + } + } + } else { + iy = ky; + if (*beta == 0.) { + i__1 = leny; + for (i__ = 1; i__ <= i__1; ++i__) { + y[iy] = 0.; + iy += *incy; +/* L30: */ + } + } else { + i__1 = leny; + for (i__ = 1; i__ <= i__1; ++i__) { + y[iy] = *beta * y[iy]; + iy += *incy; +/* L40: */ + } + } + } + } + if (*alpha == 0.) { + return 0; + } + kup1 = *ku + 1; + if (_starpu_lsame_(trans, "N")) { + +/* Form y := alpha*A*x + y. */ + + jx = kx; + if (*incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[jx] != 0.) { + temp = *alpha * x[jx]; + k = kup1 - j; +/* Computing MAX */ + i__2 = 1, i__3 = j - *ku; +/* Computing MIN */ + i__5 = *m, i__6 = j + *kl; + i__4 = min(i__5,i__6); + for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) { + y[i__] += temp * a[k + i__ + j * a_dim1]; +/* L50: */ + } + } + jx += *incx; +/* L60: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[jx] != 0.) { + temp = *alpha * x[jx]; + iy = ky; + k = kup1 - j; +/* Computing MAX */ + i__4 = 1, i__2 = j - *ku; +/* Computing MIN */ + i__5 = *m, i__6 = j + *kl; + i__3 = min(i__5,i__6); + for (i__ = max(i__4,i__2); i__ <= i__3; ++i__) { + y[iy] += temp * a[k + i__ + j * a_dim1]; + iy += *incy; +/* L70: */ + } + } + jx += *incx; + if (j > *ku) { + ky += *incy; + } +/* L80: */ + } + } + } else { + +/* Form y := alpha*A'*x + y. */ + + jy = ky; + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp = 0.; + k = kup1 - j; +/* Computing MAX */ + i__3 = 1, i__4 = j - *ku; +/* Computing MIN */ + i__5 = *m, i__6 = j + *kl; + i__2 = min(i__5,i__6); + for (i__ = max(i__3,i__4); i__ <= i__2; ++i__) { + temp += a[k + i__ + j * a_dim1] * x[i__]; +/* L90: */ + } + y[jy] += *alpha * temp; + jy += *incy; +/* L100: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp = 0.; + ix = kx; + k = kup1 - j; +/* Computing MAX */ + i__2 = 1, i__3 = j - *ku; +/* Computing MIN */ + i__5 = *m, i__6 = j + *kl; + i__4 = min(i__5,i__6); + for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) { + temp += a[k + i__ + j * a_dim1] * x[ix]; + ix += *incx; +/* L110: */ + } + y[jy] += *alpha * temp; + jy += *incy; + if (j > *ku) { + kx += *incx; + } +/* L120: */ + } + } + } + + return 0; + +/* End of DGBMV . */ + +} /* _starpu_dgbmv_ */ diff --git a/min-dgels/base/BLAS/SRC/dgemm.c b/min-dgels/base/BLAS/SRC/dgemm.c new file mode 100644 index 0000000..f1c700f --- /dev/null +++ b/min-dgels/base/BLAS/SRC/dgemm.c @@ -0,0 +1,389 @@ +/* dgemm.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dgemm_(char *transa, char *transb, integer *m, integer * + n, integer *k, doublereal *alpha, doublereal *a, integer *lda, + doublereal *b, integer *ldb, doublereal *beta, doublereal *c__, + integer *ldc) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, + i__3; + + /* Local variables */ + integer i__, j, l, info; + logical nota, notb; + doublereal temp; + integer ncola; + extern logical _starpu_lsame_(char *, char *); + integer nrowa, nrowb; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DGEMM performs one of the matrix-matrix operations */ + +/* C := alpha*op( A )*op( B ) + beta*C, */ + +/* where op( X ) is one of */ + +/* op( X ) = X or op( X ) = X', */ + +/* alpha and beta are scalars, and A, B and C are matrices, with op( A ) */ +/* an m by k matrix, op( B ) a k by n matrix and C an m by n matrix. */ + +/* Arguments */ +/* ========== */ + +/* TRANSA - CHARACTER*1. */ +/* On entry, TRANSA specifies the form of op( A ) to be used in */ +/* the matrix multiplication as follows: */ + +/* TRANSA = 'N' or 'n', op( A ) = A. */ + +/* TRANSA = 'T' or 't', op( A ) = A'. */ + +/* TRANSA = 'C' or 'c', op( A ) = A'. */ + +/* Unchanged on exit. */ + +/* TRANSB - CHARACTER*1. */ +/* On entry, TRANSB specifies the form of op( B ) to be used in */ +/* the matrix multiplication as follows: */ + +/* TRANSB = 'N' or 'n', op( B ) = B. */ + +/* TRANSB = 'T' or 't', op( B ) = B'. */ + +/* TRANSB = 'C' or 'c', op( B ) = B'. */ + +/* Unchanged on exit. */ + +/* M - INTEGER. */ +/* On entry, M specifies the number of rows of the matrix */ +/* op( A ) and of the matrix C. M must be at least zero. */ +/* Unchanged on exit. */ + +/* N - INTEGER. */ +/* On entry, N specifies the number of columns of the matrix */ +/* op( B ) and the number of columns of the matrix C. N must be */ +/* at least zero. */ +/* Unchanged on exit. */ + +/* K - INTEGER. */ +/* On entry, K specifies the number of columns of the matrix */ +/* op( A ) and the number of rows of the matrix op( B ). K must */ +/* be at least zero. */ +/* Unchanged on exit. */ + +/* ALPHA - DOUBLE PRECISION. */ +/* On entry, ALPHA specifies the scalar alpha. */ +/* Unchanged on exit. */ + +/* A - DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is */ +/* k when TRANSA = 'N' or 'n', and is m otherwise. */ +/* Before entry with TRANSA = 'N' or 'n', the leading m by k */ +/* part of the array A must contain the matrix A, otherwise */ +/* the leading k by m part of the array A must contain the */ +/* matrix A. */ +/* Unchanged on exit. */ + +/* LDA - INTEGER. */ +/* On entry, LDA specifies the first dimension of A as declared */ +/* in the calling (sub) program. When TRANSA = 'N' or 'n' then */ +/* LDA must be at least max( 1, m ), otherwise LDA must be at */ +/* least max( 1, k ). */ +/* Unchanged on exit. */ + +/* B - DOUBLE PRECISION array of DIMENSION ( LDB, kb ), where kb is */ +/* n when TRANSB = 'N' or 'n', and is k otherwise. */ +/* Before entry with TRANSB = 'N' or 'n', the leading k by n */ +/* part of the array B must contain the matrix B, otherwise */ +/* the leading n by k part of the array B must contain the */ +/* matrix B. */ +/* Unchanged on exit. */ + +/* LDB - INTEGER. */ +/* On entry, LDB specifies the first dimension of B as declared */ +/* in the calling (sub) program. When TRANSB = 'N' or 'n' then */ +/* LDB must be at least max( 1, k ), otherwise LDB must be at */ +/* least max( 1, n ). */ +/* Unchanged on exit. */ + +/* BETA - DOUBLE PRECISION. */ +/* On entry, BETA specifies the scalar beta. When BETA is */ +/* supplied as zero then C need not be set on input. */ +/* Unchanged on exit. */ + +/* C - DOUBLE PRECISION array of DIMENSION ( LDC, n ). */ +/* Before entry, the leading m by n part of the array C must */ +/* contain the matrix C, except when beta is zero, in which */ +/* case C need not be set on entry. */ +/* On exit, the array C is overwritten by the m by n matrix */ +/* ( alpha*op( A )*op( B ) + beta*C ). */ + +/* LDC - INTEGER. */ +/* On entry, LDC specifies the first dimension of C as declared */ +/* in the calling (sub) program. LDC must be at least */ +/* max( 1, m ). */ +/* Unchanged on exit. */ + + +/* Level 3 Blas routine. */ + +/* -- Written on 8-February-1989. */ +/* Jack Dongarra, Argonne National Laboratory. */ +/* Iain Duff, AERE Harwell. */ +/* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ +/* Sven Hammarling, Numerical Algorithms Group Ltd. */ + + +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Parameters .. */ +/* .. */ + +/* Set NOTA and NOTB as true if A and B respectively are not */ +/* transposed and set NROWA, NCOLA and NROWB as the number of rows */ +/* and columns of A and the number of rows of B respectively. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + + /* Function Body */ + nota = _starpu_lsame_(transa, "N"); + notb = _starpu_lsame_(transb, "N"); + if (nota) { + nrowa = *m; + ncola = *k; + } else { + nrowa = *k; + ncola = *m; + } + if (notb) { + nrowb = *k; + } else { + nrowb = *n; + } + +/* Test the input parameters. */ + + info = 0; + if (! nota && ! _starpu_lsame_(transa, "C") && ! _starpu_lsame_( + transa, "T")) { + info = 1; + } else if (! notb && ! _starpu_lsame_(transb, "C") && ! + _starpu_lsame_(transb, "T")) { + info = 2; + } else if (*m < 0) { + info = 3; + } else if (*n < 0) { + info = 4; + } else if (*k < 0) { + info = 5; + } else if (*lda < max(1,nrowa)) { + info = 8; + } else if (*ldb < max(1,nrowb)) { + info = 10; + } else if (*ldc < max(1,*m)) { + info = 13; + } + if (info != 0) { + _starpu_xerbla_("DGEMM ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*m == 0 || *n == 0 || (*alpha == 0. || *k == 0) && *beta == 1.) { + return 0; + } + +/* And if alpha.eq.zero. */ + + if (*alpha == 0.) { + if (*beta == 0.) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = 0.; +/* L10: */ + } +/* L20: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; +/* L30: */ + } +/* L40: */ + } + } + return 0; + } + +/* Start the operations. */ + + if (notb) { + if (nota) { + +/* Form C := alpha*A*B + beta*C. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*beta == 0.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = 0.; +/* L50: */ + } + } else if (*beta != 1.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; +/* L60: */ + } + } + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + if (b[l + j * b_dim1] != 0.) { + temp = *alpha * b[l + j * b_dim1]; + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + c__[i__ + j * c_dim1] += temp * a[i__ + l * + a_dim1]; +/* L70: */ + } + } +/* L80: */ + } +/* L90: */ + } + } else { + +/* Form C := alpha*A'*B + beta*C */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp = 0.; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + temp += a[l + i__ * a_dim1] * b[l + j * b_dim1]; +/* L100: */ + } + if (*beta == 0.) { + c__[i__ + j * c_dim1] = *alpha * temp; + } else { + c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[ + i__ + j * c_dim1]; + } +/* L110: */ + } +/* L120: */ + } + } + } else { + if (nota) { + +/* Form C := alpha*A*B' + beta*C */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*beta == 0.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = 0.; +/* L130: */ + } + } else if (*beta != 1.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; +/* L140: */ + } + } + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + if (b[j + l * b_dim1] != 0.) { + temp = *alpha * b[j + l * b_dim1]; + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + c__[i__ + j * c_dim1] += temp * a[i__ + l * + a_dim1]; +/* L150: */ + } + } +/* L160: */ + } +/* L170: */ + } + } else { + +/* Form C := alpha*A'*B' + beta*C */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp = 0.; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + temp += a[l + i__ * a_dim1] * b[j + l * b_dim1]; +/* L180: */ + } + if (*beta == 0.) { + c__[i__ + j * c_dim1] = *alpha * temp; + } else { + c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[ + i__ + j * c_dim1]; + } +/* L190: */ + } +/* L200: */ + } + } + } + + return 0; + +/* End of DGEMM . */ + +} /* _starpu_dgemm_ */ diff --git a/min-dgels/base/BLAS/SRC/dgemv.c b/min-dgels/base/BLAS/SRC/dgemv.c new file mode 100644 index 0000000..a216ee4 --- /dev/null +++ b/min-dgels/base/BLAS/SRC/dgemv.c @@ -0,0 +1,312 @@ +/* dgemv.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dgemv_(char *trans, integer *m, integer *n, doublereal * + alpha, doublereal *a, integer *lda, doublereal *x, integer *incx, + doublereal *beta, doublereal *y, integer *incy) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + + /* Local variables */ + integer i__, j, ix, iy, jx, jy, kx, ky, info; + doublereal temp; + integer lenx, leny; + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DGEMV performs one of the matrix-vector operations */ + +/* y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y, */ + +/* where alpha and beta are scalars, x and y are vectors and A is an */ +/* m by n matrix. */ + +/* Arguments */ +/* ========== */ + +/* TRANS - CHARACTER*1. */ +/* On entry, TRANS specifies the operation to be performed as */ +/* follows: */ + +/* TRANS = 'N' or 'n' y := alpha*A*x + beta*y. */ + +/* TRANS = 'T' or 't' y := alpha*A'*x + beta*y. */ + +/* TRANS = 'C' or 'c' y := alpha*A'*x + beta*y. */ + +/* Unchanged on exit. */ + +/* M - INTEGER. */ +/* On entry, M specifies the number of rows of the matrix A. */ +/* M must be at least zero. */ +/* Unchanged on exit. */ + +/* N - INTEGER. */ +/* On entry, N specifies the number of columns of the matrix A. */ +/* N must be at least zero. */ +/* Unchanged on exit. */ + +/* ALPHA - DOUBLE PRECISION. */ +/* On entry, ALPHA specifies the scalar alpha. */ +/* Unchanged on exit. */ + +/* A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). */ +/* Before entry, the leading m by n part of the array A must */ +/* contain the matrix of coefficients. */ +/* Unchanged on exit. */ + +/* LDA - INTEGER. */ +/* On entry, LDA specifies the first dimension of A as declared */ +/* in the calling (sub) program. LDA must be at least */ +/* max( 1, m ). */ +/* Unchanged on exit. */ + +/* X - DOUBLE PRECISION array of DIMENSION at least */ +/* ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' */ +/* and at least */ +/* ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. */ +/* Before entry, the incremented array X must contain the */ +/* vector x. */ +/* Unchanged on exit. */ + +/* INCX - INTEGER. */ +/* On entry, INCX specifies the increment for the elements of */ +/* X. INCX must not be zero. */ +/* Unchanged on exit. */ + +/* BETA - DOUBLE PRECISION. */ +/* On entry, BETA specifies the scalar beta. When BETA is */ +/* supplied as zero then Y need not be set on input. */ +/* Unchanged on exit. */ + +/* Y - DOUBLE PRECISION array of DIMENSION at least */ +/* ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' */ +/* and at least */ +/* ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. */ +/* Before entry with BETA non-zero, the incremented array Y */ +/* must contain the vector y. On exit, Y is overwritten by the */ +/* updated vector y. */ + +/* INCY - INTEGER. */ +/* On entry, INCY specifies the increment for the elements of */ +/* Y. INCY must not be zero. */ +/* Unchanged on exit. */ + + +/* Level 2 Blas routine. */ + +/* -- Written on 22-October-1986. */ +/* Jack Dongarra, Argonne National Lab. */ +/* Jeremy Du Croz, Nag Central Office. */ +/* Sven Hammarling, Nag Central Office. */ +/* Richard Hanson, Sandia National Labs. */ + + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --x; + --y; + + /* Function Body */ + info = 0; + if (! _starpu_lsame_(trans, "N") && ! _starpu_lsame_(trans, "T") && ! _starpu_lsame_(trans, "C") + ) { + info = 1; + } else if (*m < 0) { + info = 2; + } else if (*n < 0) { + info = 3; + } else if (*lda < max(1,*m)) { + info = 6; + } else if (*incx == 0) { + info = 8; + } else if (*incy == 0) { + info = 11; + } + if (info != 0) { + _starpu_xerbla_("DGEMV ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*m == 0 || *n == 0 || *alpha == 0. && *beta == 1.) { + return 0; + } + +/* Set LENX and LENY, the lengths of the vectors x and y, and set */ +/* up the start points in X and Y. */ + + if (_starpu_lsame_(trans, "N")) { + lenx = *n; + leny = *m; + } else { + lenx = *m; + leny = *n; + } + if (*incx > 0) { + kx = 1; + } else { + kx = 1 - (lenx - 1) * *incx; + } + if (*incy > 0) { + ky = 1; + } else { + ky = 1 - (leny - 1) * *incy; + } + +/* Start the operations. In this version the elements of A are */ +/* accessed sequentially with one pass through A. */ + +/* First form y := beta*y. */ + + if (*beta != 1.) { + if (*incy == 1) { + if (*beta == 0.) { + i__1 = leny; + for (i__ = 1; i__ <= i__1; ++i__) { + y[i__] = 0.; +/* L10: */ + } + } else { + i__1 = leny; + for (i__ = 1; i__ <= i__1; ++i__) { + y[i__] = *beta * y[i__]; +/* L20: */ + } + } + } else { + iy = ky; + if (*beta == 0.) { + i__1 = leny; + for (i__ = 1; i__ <= i__1; ++i__) { + y[iy] = 0.; + iy += *incy; +/* L30: */ + } + } else { + i__1 = leny; + for (i__ = 1; i__ <= i__1; ++i__) { + y[iy] = *beta * y[iy]; + iy += *incy; +/* L40: */ + } + } + } + } + if (*alpha == 0.) { + return 0; + } + if (_starpu_lsame_(trans, "N")) { + +/* Form y := alpha*A*x + y. */ + + jx = kx; + if (*incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[jx] != 0.) { + temp = *alpha * x[jx]; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + y[i__] += temp * a[i__ + j * a_dim1]; +/* L50: */ + } + } + jx += *incx; +/* L60: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[jx] != 0.) { + temp = *alpha * x[jx]; + iy = ky; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + y[iy] += temp * a[i__ + j * a_dim1]; + iy += *incy; +/* L70: */ + } + } + jx += *incx; +/* L80: */ + } + } + } else { + +/* Form y := alpha*A'*x + y. */ + + jy = ky; + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp = 0.; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp += a[i__ + j * a_dim1] * x[i__]; +/* L90: */ + } + y[jy] += *alpha * temp; + jy += *incy; +/* L100: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp = 0.; + ix = kx; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp += a[i__ + j * a_dim1] * x[ix]; + ix += *incx; +/* L110: */ + } + y[jy] += *alpha * temp; + jy += *incy; +/* L120: */ + } + } + } + + return 0; + +/* End of DGEMV . */ + +} /* _starpu_dgemv_ */ diff --git a/min-dgels/base/BLAS/SRC/dger.c b/min-dgels/base/BLAS/SRC/dger.c new file mode 100644 index 0000000..a6a3153 --- /dev/null +++ b/min-dgels/base/BLAS/SRC/dger.c @@ -0,0 +1,194 @@ +/* dger.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dger_(integer *m, integer *n, doublereal *alpha, + doublereal *x, integer *incx, doublereal *y, integer *incy, + doublereal *a, integer *lda) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + + /* Local variables */ + integer i__, j, ix, jy, kx, info; + doublereal temp; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DGER performs the rank 1 operation */ + +/* A := alpha*x*y' + A, */ + +/* where alpha is a scalar, x is an m element vector, y is an n element */ +/* vector and A is an m by n matrix. */ + +/* Arguments */ +/* ========== */ + +/* M - INTEGER. */ +/* On entry, M specifies the number of rows of the matrix A. */ +/* M must be at least zero. */ +/* Unchanged on exit. */ + +/* N - INTEGER. */ +/* On entry, N specifies the number of columns of the matrix A. */ +/* N must be at least zero. */ +/* Unchanged on exit. */ + +/* ALPHA - DOUBLE PRECISION. */ +/* On entry, ALPHA specifies the scalar alpha. */ +/* Unchanged on exit. */ + +/* X - DOUBLE PRECISION array of dimension at least */ +/* ( 1 + ( m - 1 )*abs( INCX ) ). */ +/* Before entry, the incremented array X must contain the m */ +/* element vector x. */ +/* Unchanged on exit. */ + +/* INCX - INTEGER. */ +/* On entry, INCX specifies the increment for the elements of */ +/* X. INCX must not be zero. */ +/* Unchanged on exit. */ + +/* Y - DOUBLE PRECISION array of dimension at least */ +/* ( 1 + ( n - 1 )*abs( INCY ) ). */ +/* Before entry, the incremented array Y must contain the n */ +/* element vector y. */ +/* Unchanged on exit. */ + +/* INCY - INTEGER. */ +/* On entry, INCY specifies the increment for the elements of */ +/* Y. INCY must not be zero. */ +/* Unchanged on exit. */ + +/* A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). */ +/* Before entry, the leading m by n part of the array A must */ +/* contain the matrix of coefficients. On exit, A is */ +/* overwritten by the updated matrix. */ + +/* LDA - INTEGER. */ +/* On entry, LDA specifies the first dimension of A as declared */ +/* in the calling (sub) program. LDA must be at least */ +/* max( 1, m ). */ +/* Unchanged on exit. */ + + +/* Level 2 Blas routine. */ + +/* -- Written on 22-October-1986. */ +/* Jack Dongarra, Argonne National Lab. */ +/* Jeremy Du Croz, Nag Central Office. */ +/* Sven Hammarling, Nag Central Office. */ +/* Richard Hanson, Sandia National Labs. */ + + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --x; + --y; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + info = 0; + if (*m < 0) { + info = 1; + } else if (*n < 0) { + info = 2; + } else if (*incx == 0) { + info = 5; + } else if (*incy == 0) { + info = 7; + } else if (*lda < max(1,*m)) { + info = 9; + } + if (info != 0) { + _starpu_xerbla_("DGER ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*m == 0 || *n == 0 || *alpha == 0.) { + return 0; + } + +/* Start the operations. In this version the elements of A are */ +/* accessed sequentially with one pass through A. */ + + if (*incy > 0) { + jy = 1; + } else { + jy = 1 - (*n - 1) * *incy; + } + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (y[jy] != 0.) { + temp = *alpha * y[jy]; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] += x[i__] * temp; +/* L10: */ + } + } + jy += *incy; +/* L20: */ + } + } else { + if (*incx > 0) { + kx = 1; + } else { + kx = 1 - (*m - 1) * *incx; + } + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (y[jy] != 0.) { + temp = *alpha * y[jy]; + ix = kx; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] += x[ix] * temp; + ix += *incx; +/* L30: */ + } + } + jy += *incy; +/* L40: */ + } + } + + return 0; + +/* End of DGER . */ + +} /* _starpu_dger_ */ diff --git a/min-dgels/base/BLAS/SRC/dnrm2.c b/min-dgels/base/BLAS/SRC/dnrm2.c new file mode 100644 index 0000000..10b4522 --- /dev/null +++ b/min-dgels/base/BLAS/SRC/dnrm2.c @@ -0,0 +1,95 @@ +/* dnrm2.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +doublereal _starpu_dnrm2_(integer *n, doublereal *x, integer *incx) +{ + /* System generated locals */ + integer i__1, i__2; + doublereal ret_val, d__1; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + integer ix; + doublereal ssq, norm, scale, absxi; + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DNRM2 returns the euclidean norm of a vector via the function */ +/* name, so that */ + +/* DNRM2 := sqrt( x'*x ) */ + + +/* -- This version written on 25-October-1982. */ +/* Modified on 14-October-1993 to inline the call to DLASSQ. */ +/* Sven Hammarling, Nag Ltd. */ + + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ + /* Parameter adjustments */ + --x; + + /* Function Body */ + if (*n < 1 || *incx < 1) { + norm = 0.; + } else if (*n == 1) { + norm = abs(x[1]); + } else { + scale = 0.; + ssq = 1.; +/* The following loop is equivalent to this call to the LAPACK */ +/* auxiliary routine: */ +/* CALL DLASSQ( N, X, INCX, SCALE, SSQ ) */ + + i__1 = (*n - 1) * *incx + 1; + i__2 = *incx; + for (ix = 1; i__2 < 0 ? ix >= i__1 : ix <= i__1; ix += i__2) { + if (x[ix] != 0.) { + absxi = (d__1 = x[ix], abs(d__1)); + if (scale < absxi) { +/* Computing 2nd power */ + d__1 = scale / absxi; + ssq = ssq * (d__1 * d__1) + 1.; + scale = absxi; + } else { +/* Computing 2nd power */ + d__1 = absxi / scale; + ssq += d__1 * d__1; + } + } +/* L10: */ + } + norm = scale * sqrt(ssq); + } + + ret_val = norm; + return ret_val; + +/* End of DNRM2. */ + +} /* _starpu_dnrm2_ */ diff --git a/min-dgels/base/BLAS/SRC/drot.c b/min-dgels/base/BLAS/SRC/drot.c new file mode 100644 index 0000000..e07b0b1 --- /dev/null +++ b/min-dgels/base/BLAS/SRC/drot.c @@ -0,0 +1,86 @@ +/* drot.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_drot_(integer *n, doublereal *dx, integer *incx, + doublereal *dy, integer *incy, doublereal *c__, doublereal *s) +{ + /* System generated locals */ + integer i__1; + + /* Local variables */ + integer i__, ix, iy; + doublereal dtemp; + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* applies a plane rotation. */ +/* jack dongarra, linpack, 3/11/78. */ +/* modified 12/3/93, array(1) declarations changed to array(*) */ + + +/* .. Local Scalars .. */ +/* .. */ + /* Parameter adjustments */ + --dy; + --dx; + + /* Function Body */ + if (*n <= 0) { + return 0; + } + if (*incx == 1 && *incy == 1) { + goto L20; + } + +/* code for unequal increments or equal increments not equal */ +/* to 1 */ + + ix = 1; + iy = 1; + if (*incx < 0) { + ix = (-(*n) + 1) * *incx + 1; + } + if (*incy < 0) { + iy = (-(*n) + 1) * *incy + 1; + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + dtemp = *c__ * dx[ix] + *s * dy[iy]; + dy[iy] = *c__ * dy[iy] - *s * dx[ix]; + dx[ix] = dtemp; + ix += *incx; + iy += *incy; +/* L10: */ + } + return 0; + +/* code for both increments equal to 1 */ + +L20: + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + dtemp = *c__ * dx[i__] + *s * dy[i__]; + dy[i__] = *c__ * dy[i__] - *s * dx[i__]; + dx[i__] = dtemp; +/* L30: */ + } + return 0; +} /* _starpu_drot_ */ diff --git a/min-dgels/base/BLAS/SRC/drotg.c b/min-dgels/base/BLAS/SRC/drotg.c new file mode 100644 index 0000000..365d053 --- /dev/null +++ b/min-dgels/base/BLAS/SRC/drotg.c @@ -0,0 +1,79 @@ +/* drotg.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static doublereal c_b4 = 1.; + +/* Subroutine */ int _starpu_drotg_(doublereal *da, doublereal *db, doublereal *c__, + doublereal *s) +{ + /* System generated locals */ + doublereal d__1, d__2; + + /* Builtin functions */ + double sqrt(doublereal), d_sign(doublereal *, doublereal *); + + /* Local variables */ + doublereal r__, z__, roe, scale; + +/* .. Scalar Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* construct givens plane rotation. */ +/* jack dongarra, linpack, 3/11/78. */ + + +/* .. Local Scalars .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ + roe = *db; + if (abs(*da) > abs(*db)) { + roe = *da; + } + scale = abs(*da) + abs(*db); + if (scale != 0.) { + goto L10; + } + *c__ = 1.; + *s = 0.; + r__ = 0.; + z__ = 0.; + goto L20; +L10: +/* Computing 2nd power */ + d__1 = *da / scale; +/* Computing 2nd power */ + d__2 = *db / scale; + r__ = scale * sqrt(d__1 * d__1 + d__2 * d__2); + r__ = d_sign(&c_b4, &roe) * r__; + *c__ = *da / r__; + *s = *db / r__; + z__ = 1.; + if (abs(*da) > abs(*db)) { + z__ = *s; + } + if (abs(*db) >= abs(*da) && *c__ != 0.) { + z__ = 1. / *c__; + } +L20: + *da = r__; + *db = z__; + return 0; +} /* _starpu_drotg_ */ diff --git a/min-dgels/base/BLAS/SRC/drotm.c b/min-dgels/base/BLAS/SRC/drotm.c new file mode 100644 index 0000000..a1b181c --- /dev/null +++ b/min-dgels/base/BLAS/SRC/drotm.c @@ -0,0 +1,215 @@ +/* drotm.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_drotm_(integer *n, doublereal *dx, integer *incx, + doublereal *dy, integer *incy, doublereal *dparam) +{ + /* Initialized data */ + + static doublereal zero = 0.; + static doublereal two = 2.; + + /* System generated locals */ + integer i__1, i__2; + + /* Local variables */ + integer i__; + doublereal w, z__; + integer kx, ky; + doublereal dh11, dh12, dh21, dh22, dflag; + integer nsteps; + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* APPLY THE MODIFIED GIVENS TRANSFORMATION, H, TO THE 2 BY N MATRIX */ + +/* (DX**T) , WHERE **T INDICATES TRANSPOSE. THE ELEMENTS OF DX ARE IN */ +/* (DY**T) */ + +/* DX(LX+I*INCX), I = 0 TO N-1, WHERE LX = 1 IF INCX .GE. 0, ELSE */ +/* LX = (-INCX)*N, AND SIMILARLY FOR SY USING LY AND INCY. */ +/* WITH DPARAM(1)=DFLAG, H HAS ONE OF THE FOLLOWING FORMS.. */ + +/* DFLAG=-1.D0 DFLAG=0.D0 DFLAG=1.D0 DFLAG=-2.D0 */ + +/* (DH11 DH12) (1.D0 DH12) (DH11 1.D0) (1.D0 0.D0) */ +/* H=( ) ( ) ( ) ( ) */ +/* (DH21 DH22), (DH21 1.D0), (-1.D0 DH22), (0.D0 1.D0). */ +/* SEE DROTMG FOR A DESCRIPTION OF DATA STORAGE IN DPARAM. */ + +/* Arguments */ +/* ========= */ + +/* N (input) INTEGER */ +/* number of elements in input vector(s) */ + +/* DX (input/output) DOUBLE PRECISION array, dimension N */ +/* double precision vector with N elements */ + +/* INCX (input) INTEGER */ +/* storage spacing between elements of DX */ + +/* DY (input/output) DOUBLE PRECISION array, dimension N */ +/* double precision vector with N elements */ + +/* INCY (input) INTEGER */ +/* storage spacing between elements of DY */ + +/* DPARAM (input/output) DOUBLE PRECISION array, dimension 5 */ +/* DPARAM(1)=DFLAG */ +/* DPARAM(2)=DH11 */ +/* DPARAM(3)=DH21 */ +/* DPARAM(4)=DH12 */ +/* DPARAM(5)=DH22 */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. Data statements .. */ + /* Parameter adjustments */ + --dparam; + --dy; + --dx; + + /* Function Body */ +/* .. */ + + dflag = dparam[1]; + if (*n <= 0 || dflag + two == zero) { + goto L140; + } + if (! (*incx == *incy && *incx > 0)) { + goto L70; + } + + nsteps = *n * *incx; + if (dflag < 0.) { + goto L50; + } else if (dflag == 0) { + goto L10; + } else { + goto L30; + } +L10: + dh12 = dparam[4]; + dh21 = dparam[3]; + i__1 = nsteps; + i__2 = *incx; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + w = dx[i__]; + z__ = dy[i__]; + dx[i__] = w + z__ * dh12; + dy[i__] = w * dh21 + z__; +/* L20: */ + } + goto L140; +L30: + dh11 = dparam[2]; + dh22 = dparam[5]; + i__2 = nsteps; + i__1 = *incx; + for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) { + w = dx[i__]; + z__ = dy[i__]; + dx[i__] = w * dh11 + z__; + dy[i__] = -w + dh22 * z__; +/* L40: */ + } + goto L140; +L50: + dh11 = dparam[2]; + dh12 = dparam[4]; + dh21 = dparam[3]; + dh22 = dparam[5]; + i__1 = nsteps; + i__2 = *incx; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + w = dx[i__]; + z__ = dy[i__]; + dx[i__] = w * dh11 + z__ * dh12; + dy[i__] = w * dh21 + z__ * dh22; +/* L60: */ + } + goto L140; +L70: + kx = 1; + ky = 1; + if (*incx < 0) { + kx = (1 - *n) * *incx + 1; + } + if (*incy < 0) { + ky = (1 - *n) * *incy + 1; + } + + if (dflag < 0.) { + goto L120; + } else if (dflag == 0) { + goto L80; + } else { + goto L100; + } +L80: + dh12 = dparam[4]; + dh21 = dparam[3]; + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + w = dx[kx]; + z__ = dy[ky]; + dx[kx] = w + z__ * dh12; + dy[ky] = w * dh21 + z__; + kx += *incx; + ky += *incy; +/* L90: */ + } + goto L140; +L100: + dh11 = dparam[2]; + dh22 = dparam[5]; + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + w = dx[kx]; + z__ = dy[ky]; + dx[kx] = w * dh11 + z__; + dy[ky] = -w + dh22 * z__; + kx += *incx; + ky += *incy; +/* L110: */ + } + goto L140; +L120: + dh11 = dparam[2]; + dh12 = dparam[4]; + dh21 = dparam[3]; + dh22 = dparam[5]; + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + w = dx[kx]; + z__ = dy[ky]; + dx[kx] = w * dh11 + z__ * dh12; + dy[ky] = w * dh21 + z__ * dh22; + kx += *incx; + ky += *incy; +/* L130: */ + } +L140: + return 0; +} /* _starpu_drotm_ */ diff --git a/min-dgels/base/BLAS/SRC/drotmg.c b/min-dgels/base/BLAS/SRC/drotmg.c new file mode 100644 index 0000000..088497c --- /dev/null +++ b/min-dgels/base/BLAS/SRC/drotmg.c @@ -0,0 +1,293 @@ +/* drotmg.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_drotmg_(doublereal *dd1, doublereal *dd2, doublereal * + dx1, doublereal *dy1, doublereal *dparam) +{ + /* Initialized data */ + + static doublereal zero = 0.; + static doublereal one = 1.; + static doublereal two = 2.; + static doublereal gam = 4096.; + static doublereal gamsq = 16777216.; + static doublereal rgamsq = 5.9604645e-8; + + /* Format strings */ + static char fmt_120[] = ""; + static char fmt_150[] = ""; + static char fmt_180[] = ""; + static char fmt_210[] = ""; + + /* System generated locals */ + doublereal d__1; + + /* Local variables */ + doublereal du, dp1, dp2, dq1, dq2, dh11, dh12, dh21, dh22; + integer igo; + doublereal dflag, dtemp; + + /* Assigned format variables */ + static char *igo_fmt; + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* CONSTRUCT THE MODIFIED GIVENS TRANSFORMATION MATRIX H WHICH ZEROS */ +/* THE SECOND COMPONENT OF THE 2-VECTOR (DSQRT(DD1)*DX1,DSQRT(DD2)* */ +/* DY2)**T. */ +/* WITH DPARAM(1)=DFLAG, H HAS ONE OF THE FOLLOWING FORMS.. */ + +/* DFLAG=-1.D0 DFLAG=0.D0 DFLAG=1.D0 DFLAG=-2.D0 */ + +/* (DH11 DH12) (1.D0 DH12) (DH11 1.D0) (1.D0 0.D0) */ +/* H=( ) ( ) ( ) ( ) */ +/* (DH21 DH22), (DH21 1.D0), (-1.D0 DH22), (0.D0 1.D0). */ +/* LOCATIONS 2-4 OF DPARAM CONTAIN DH11, DH21, DH12, AND DH22 */ +/* RESPECTIVELY. (VALUES OF 1.D0, -1.D0, OR 0.D0 IMPLIED BY THE */ +/* VALUE OF DPARAM(1) ARE NOT STORED IN DPARAM.) */ + +/* THE VALUES OF GAMSQ AND RGAMSQ SET IN THE DATA STATEMENT MAY BE */ +/* INEXACT. THIS IS OK AS THEY ARE ONLY USED FOR TESTING THE SIZE */ +/* OF DD1 AND DD2. ALL ACTUAL SCALING OF DATA IS DONE USING GAM. */ + + +/* Arguments */ +/* ========= */ + +/* DD1 (input/output) DOUBLE PRECISION */ + +/* DD2 (input/output) DOUBLE PRECISION */ + +/* DX1 (input/output) DOUBLE PRECISION */ + +/* DY1 (input) DOUBLE PRECISION */ + +/* DPARAM (input/output) DOUBLE PRECISION array, dimension 5 */ +/* DPARAM(1)=DFLAG */ +/* DPARAM(2)=DH11 */ +/* DPARAM(3)=DH21 */ +/* DPARAM(4)=DH12 */ +/* DPARAM(5)=DH22 */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Data statements .. */ + + /* Parameter adjustments */ + --dparam; + + /* Function Body */ +/* .. */ + if (! (*dd1 < zero)) { + goto L10; + } +/* GO ZERO-H-D-AND-DX1.. */ + goto L60; +L10: +/* CASE-DD1-NONNEGATIVE */ + dp2 = *dd2 * *dy1; + if (! (dp2 == zero)) { + goto L20; + } + dflag = -two; + goto L260; +/* REGULAR-CASE.. */ +L20: + dp1 = *dd1 * *dx1; + dq2 = dp2 * *dy1; + dq1 = dp1 * *dx1; + + if (! (abs(dq1) > abs(dq2))) { + goto L40; + } + dh21 = -(*dy1) / *dx1; + dh12 = dp2 / dp1; + + du = one - dh12 * dh21; + + if (! (du <= zero)) { + goto L30; + } +/* GO ZERO-H-D-AND-DX1.. */ + goto L60; +L30: + dflag = zero; + *dd1 /= du; + *dd2 /= du; + *dx1 *= du; +/* GO SCALE-CHECK.. */ + goto L100; +L40: + if (! (dq2 < zero)) { + goto L50; + } +/* GO ZERO-H-D-AND-DX1.. */ + goto L60; +L50: + dflag = one; + dh11 = dp1 / dp2; + dh22 = *dx1 / *dy1; + du = one + dh11 * dh22; + dtemp = *dd2 / du; + *dd2 = *dd1 / du; + *dd1 = dtemp; + *dx1 = *dy1 * du; +/* GO SCALE-CHECK */ + goto L100; +/* PROCEDURE..ZERO-H-D-AND-DX1.. */ +L60: + dflag = -one; + dh11 = zero; + dh12 = zero; + dh21 = zero; + dh22 = zero; + + *dd1 = zero; + *dd2 = zero; + *dx1 = zero; +/* RETURN.. */ + goto L220; +/* PROCEDURE..FIX-H.. */ +L70: + if (! (dflag >= zero)) { + goto L90; + } + + if (! (dflag == zero)) { + goto L80; + } + dh11 = one; + dh22 = one; + dflag = -one; + goto L90; +L80: + dh21 = -one; + dh12 = one; + dflag = -one; +L90: + switch (igo) { + case 0: goto L120; + case 1: goto L150; + case 2: goto L180; + case 3: goto L210; + } +/* PROCEDURE..SCALE-CHECK */ +L100: +L110: + if (! (*dd1 <= rgamsq)) { + goto L130; + } + if (*dd1 == zero) { + goto L160; + } + igo = 0; + igo_fmt = fmt_120; +/* FIX-H.. */ + goto L70; +L120: +/* Computing 2nd power */ + d__1 = gam; + *dd1 *= d__1 * d__1; + *dx1 /= gam; + dh11 /= gam; + dh12 /= gam; + goto L110; +L130: +L140: + if (! (*dd1 >= gamsq)) { + goto L160; + } + igo = 1; + igo_fmt = fmt_150; +/* FIX-H.. */ + goto L70; +L150: +/* Computing 2nd power */ + d__1 = gam; + *dd1 /= d__1 * d__1; + *dx1 *= gam; + dh11 *= gam; + dh12 *= gam; + goto L140; +L160: +L170: + if (! (abs(*dd2) <= rgamsq)) { + goto L190; + } + if (*dd2 == zero) { + goto L220; + } + igo = 2; + igo_fmt = fmt_180; +/* FIX-H.. */ + goto L70; +L180: +/* Computing 2nd power */ + d__1 = gam; + *dd2 *= d__1 * d__1; + dh21 /= gam; + dh22 /= gam; + goto L170; +L190: +L200: + if (! (abs(*dd2) >= gamsq)) { + goto L220; + } + igo = 3; + igo_fmt = fmt_210; +/* FIX-H.. */ + goto L70; +L210: +/* Computing 2nd power */ + d__1 = gam; + *dd2 /= d__1 * d__1; + dh21 *= gam; + dh22 *= gam; + goto L200; +L220: + if (dflag < 0.) { + goto L250; + } else if (dflag == 0) { + goto L230; + } else { + goto L240; + } +L230: + dparam[3] = dh21; + dparam[4] = dh12; + goto L260; +L240: + dparam[2] = dh11; + dparam[5] = dh22; + goto L260; +L250: + dparam[2] = dh11; + dparam[3] = dh21; + dparam[4] = dh12; + dparam[5] = dh22; +L260: + dparam[1] = dflag; + return 0; +} /* _starpu_drotmg_ */ diff --git a/min-dgels/base/BLAS/SRC/dsbmv.c b/min-dgels/base/BLAS/SRC/dsbmv.c new file mode 100644 index 0000000..671ff51 --- /dev/null +++ b/min-dgels/base/BLAS/SRC/dsbmv.c @@ -0,0 +1,364 @@ +/* dsbmv.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dsbmv_(char *uplo, integer *n, integer *k, doublereal * + alpha, doublereal *a, integer *lda, doublereal *x, integer *incx, + doublereal *beta, doublereal *y, integer *incy) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + integer i__, j, l, ix, iy, jx, jy, kx, ky, info; + doublereal temp1, temp2; + extern logical _starpu_lsame_(char *, char *); + integer kplus1; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DSBMV performs the matrix-vector operation */ + +/* y := alpha*A*x + beta*y, */ + +/* where alpha and beta are scalars, x and y are n element vectors and */ +/* A is an n by n symmetric band matrix, with k super-diagonals. */ + +/* Arguments */ +/* ========== */ + +/* UPLO - CHARACTER*1. */ +/* On entry, UPLO specifies whether the upper or lower */ +/* triangular part of the band matrix A is being supplied as */ +/* follows: */ + +/* UPLO = 'U' or 'u' The upper triangular part of A is */ +/* being supplied. */ + +/* UPLO = 'L' or 'l' The lower triangular part of A is */ +/* being supplied. */ + +/* Unchanged on exit. */ + +/* N - INTEGER. */ +/* On entry, N specifies the order of the matrix A. */ +/* N must be at least zero. */ +/* Unchanged on exit. */ + +/* K - INTEGER. */ +/* On entry, K specifies the number of super-diagonals of the */ +/* matrix A. K must satisfy 0 .le. K. */ +/* Unchanged on exit. */ + +/* ALPHA - DOUBLE PRECISION. */ +/* On entry, ALPHA specifies the scalar alpha. */ +/* Unchanged on exit. */ + +/* A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). */ +/* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */ +/* by n part of the array A must contain the upper triangular */ +/* band part of the symmetric matrix, supplied column by */ +/* column, with the leading diagonal of the matrix in row */ +/* ( k + 1 ) of the array, the first super-diagonal starting at */ +/* position 2 in row k, and so on. The top left k by k triangle */ +/* of the array A is not referenced. */ +/* The following program segment will transfer the upper */ +/* triangular part of a symmetric band matrix from conventional */ +/* full matrix storage to band storage: */ + +/* DO 20, J = 1, N */ +/* M = K + 1 - J */ +/* DO 10, I = MAX( 1, J - K ), J */ +/* A( M + I, J ) = matrix( I, J ) */ +/* 10 CONTINUE */ +/* 20 CONTINUE */ + +/* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */ +/* by n part of the array A must contain the lower triangular */ +/* band part of the symmetric matrix, supplied column by */ +/* column, with the leading diagonal of the matrix in row 1 of */ +/* the array, the first sub-diagonal starting at position 1 in */ +/* row 2, and so on. The bottom right k by k triangle of the */ +/* array A is not referenced. */ +/* The following program segment will transfer the lower */ +/* triangular part of a symmetric band matrix from conventional */ +/* full matrix storage to band storage: */ + +/* DO 20, J = 1, N */ +/* M = 1 - J */ +/* DO 10, I = J, MIN( N, J + K ) */ +/* A( M + I, J ) = matrix( I, J ) */ +/* 10 CONTINUE */ +/* 20 CONTINUE */ + +/* Unchanged on exit. */ + +/* LDA - INTEGER. */ +/* On entry, LDA specifies the first dimension of A as declared */ +/* in the calling (sub) program. LDA must be at least */ +/* ( k + 1 ). */ +/* Unchanged on exit. */ + +/* X - DOUBLE PRECISION array of DIMENSION at least */ +/* ( 1 + ( n - 1 )*abs( INCX ) ). */ +/* Before entry, the incremented array X must contain the */ +/* vector x. */ +/* Unchanged on exit. */ + +/* INCX - INTEGER. */ +/* On entry, INCX specifies the increment for the elements of */ +/* X. INCX must not be zero. */ +/* Unchanged on exit. */ + +/* BETA - DOUBLE PRECISION. */ +/* On entry, BETA specifies the scalar beta. */ +/* Unchanged on exit. */ + +/* Y - DOUBLE PRECISION array of DIMENSION at least */ +/* ( 1 + ( n - 1 )*abs( INCY ) ). */ +/* Before entry, the incremented array Y must contain the */ +/* vector y. On exit, Y is overwritten by the updated vector y. */ + +/* INCY - INTEGER. */ +/* On entry, INCY specifies the increment for the elements of */ +/* Y. INCY must not be zero. */ +/* Unchanged on exit. */ + + +/* Level 2 Blas routine. */ + +/* -- Written on 22-October-1986. */ +/* Jack Dongarra, Argonne National Lab. */ +/* Jeremy Du Croz, Nag Central Office. */ +/* Sven Hammarling, Nag Central Office. */ +/* Richard Hanson, Sandia National Labs. */ + + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --x; + --y; + + /* Function Body */ + info = 0; + if (! _starpu_lsame_(uplo, "U") && ! _starpu_lsame_(uplo, "L")) { + info = 1; + } else if (*n < 0) { + info = 2; + } else if (*k < 0) { + info = 3; + } else if (*lda < *k + 1) { + info = 6; + } else if (*incx == 0) { + info = 8; + } else if (*incy == 0) { + info = 11; + } + if (info != 0) { + _starpu_xerbla_("DSBMV ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0 || *alpha == 0. && *beta == 1.) { + return 0; + } + +/* Set up the start points in X and Y. */ + + if (*incx > 0) { + kx = 1; + } else { + kx = 1 - (*n - 1) * *incx; + } + if (*incy > 0) { + ky = 1; + } else { + ky = 1 - (*n - 1) * *incy; + } + +/* Start the operations. In this version the elements of the array A */ +/* are accessed sequentially with one pass through A. */ + +/* First form y := beta*y. */ + + if (*beta != 1.) { + if (*incy == 1) { + if (*beta == 0.) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + y[i__] = 0.; +/* L10: */ + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + y[i__] = *beta * y[i__]; +/* L20: */ + } + } + } else { + iy = ky; + if (*beta == 0.) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + y[iy] = 0.; + iy += *incy; +/* L30: */ + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + y[iy] = *beta * y[iy]; + iy += *incy; +/* L40: */ + } + } + } + } + if (*alpha == 0.) { + return 0; + } + if (_starpu_lsame_(uplo, "U")) { + +/* Form y when upper triangle of A is stored. */ + + kplus1 = *k + 1; + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp1 = *alpha * x[j]; + temp2 = 0.; + l = kplus1 - j; +/* Computing MAX */ + i__2 = 1, i__3 = j - *k; + i__4 = j - 1; + for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) { + y[i__] += temp1 * a[l + i__ + j * a_dim1]; + temp2 += a[l + i__ + j * a_dim1] * x[i__]; +/* L50: */ + } + y[j] = y[j] + temp1 * a[kplus1 + j * a_dim1] + *alpha * temp2; +/* L60: */ + } + } else { + jx = kx; + jy = ky; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp1 = *alpha * x[jx]; + temp2 = 0.; + ix = kx; + iy = ky; + l = kplus1 - j; +/* Computing MAX */ + i__4 = 1, i__2 = j - *k; + i__3 = j - 1; + for (i__ = max(i__4,i__2); i__ <= i__3; ++i__) { + y[iy] += temp1 * a[l + i__ + j * a_dim1]; + temp2 += a[l + i__ + j * a_dim1] * x[ix]; + ix += *incx; + iy += *incy; +/* L70: */ + } + y[jy] = y[jy] + temp1 * a[kplus1 + j * a_dim1] + *alpha * + temp2; + jx += *incx; + jy += *incy; + if (j > *k) { + kx += *incx; + ky += *incy; + } +/* L80: */ + } + } + } else { + +/* Form y when lower triangle of A is stored. */ + + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp1 = *alpha * x[j]; + temp2 = 0.; + y[j] += temp1 * a[j * a_dim1 + 1]; + l = 1 - j; +/* Computing MIN */ + i__4 = *n, i__2 = j + *k; + i__3 = min(i__4,i__2); + for (i__ = j + 1; i__ <= i__3; ++i__) { + y[i__] += temp1 * a[l + i__ + j * a_dim1]; + temp2 += a[l + i__ + j * a_dim1] * x[i__]; +/* L90: */ + } + y[j] += *alpha * temp2; +/* L100: */ + } + } else { + jx = kx; + jy = ky; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp1 = *alpha * x[jx]; + temp2 = 0.; + y[jy] += temp1 * a[j * a_dim1 + 1]; + l = 1 - j; + ix = jx; + iy = jy; +/* Computing MIN */ + i__4 = *n, i__2 = j + *k; + i__3 = min(i__4,i__2); + for (i__ = j + 1; i__ <= i__3; ++i__) { + ix += *incx; + iy += *incy; + y[iy] += temp1 * a[l + i__ + j * a_dim1]; + temp2 += a[l + i__ + j * a_dim1] * x[ix]; +/* L110: */ + } + y[jy] += *alpha * temp2; + jx += *incx; + jy += *incy; +/* L120: */ + } + } + } + + return 0; + +/* End of DSBMV . */ + +} /* _starpu_dsbmv_ */ diff --git a/min-dgels/base/BLAS/SRC/dscal.c b/min-dgels/base/BLAS/SRC/dscal.c new file mode 100644 index 0000000..a27b8dd --- /dev/null +++ b/min-dgels/base/BLAS/SRC/dscal.c @@ -0,0 +1,96 @@ +/* dscal.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dscal_(integer *n, doublereal *da, doublereal *dx, + integer *incx) +{ + /* System generated locals */ + integer i__1, i__2; + + /* Local variables */ + integer i__, m, mp1, nincx; + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ +/* * */ +/* scales a vector by a constant. */ +/* uses unrolled loops for increment equal to one. */ +/* jack dongarra, linpack, 3/11/78. */ +/* modified 3/93 to return if incx .le. 0. */ +/* modified 12/3/93, array(1) declarations changed to array(*) */ + + +/* .. Local Scalars .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ + /* Parameter adjustments */ + --dx; + + /* Function Body */ + if (*n <= 0 || *incx <= 0) { + return 0; + } + if (*incx == 1) { + goto L20; + } + +/* code for increment not equal to 1 */ + + nincx = *n * *incx; + i__1 = nincx; + i__2 = *incx; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + dx[i__] = *da * dx[i__]; +/* L10: */ + } + return 0; + +/* code for increment equal to 1 */ + + +/* clean-up loop */ + +L20: + m = *n % 5; + if (m == 0) { + goto L40; + } + i__2 = m; + for (i__ = 1; i__ <= i__2; ++i__) { + dx[i__] = *da * dx[i__]; +/* L30: */ + } + if (*n < 5) { + return 0; + } +L40: + mp1 = m + 1; + i__2 = *n; + for (i__ = mp1; i__ <= i__2; i__ += 5) { + dx[i__] = *da * dx[i__]; + dx[i__ + 1] = *da * dx[i__ + 1]; + dx[i__ + 2] = *da * dx[i__ + 2]; + dx[i__ + 3] = *da * dx[i__ + 3]; + dx[i__ + 4] = *da * dx[i__ + 4]; +/* L50: */ + } + return 0; +} /* _starpu_dscal_ */ diff --git a/min-dgels/base/BLAS/SRC/dsdot.c b/min-dgels/base/BLAS/SRC/dsdot.c new file mode 100644 index 0000000..77df336 --- /dev/null +++ b/min-dgels/base/BLAS/SRC/dsdot.c @@ -0,0 +1,135 @@ +/* dsdot.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +doublereal _starpu_dsdot_(integer *n, real *sx, integer *incx, real *sy, integer * + incy) +{ + /* System generated locals */ + integer i__1, i__2; + doublereal ret_val; + + /* Local variables */ + integer i__, ns, kx, ky; + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* AUTHORS */ +/* ======= */ +/* Lawson, C. L., (JPL), Hanson, R. J., (SNLA), */ +/* Kincaid, D. R., (U. of Texas), Krogh, F. T., (JPL) */ + +/* Purpose */ +/* ======= */ +/* Compute the inner product of two vectors with extended */ +/* precision accumulation and result. */ + +/* Returns D.P. dot product accumulated in D.P., for S.P. SX and SY */ +/* DSDOT = sum for I = 0 to N-1 of SX(LX+I*INCX) * SY(LY+I*INCY), */ +/* where LX = 1 if INCX .GE. 0, else LX = 1+(1-N)*INCX, and LY is */ +/* defined in a similar way using INCY. */ + +/* Arguments */ +/* ========= */ + +/* N (input) INTEGER */ +/* number of elements in input vector(s) */ + +/* SX (input) REAL array, dimension(N) */ +/* single precision vector with N elements */ + +/* INCX (input) INTEGER */ +/* storage spacing between elements of SX */ + +/* SY (input) REAL array, dimension(N) */ +/* single precision vector with N elements */ + +/* INCY (input) INTEGER */ +/* storage spacing between elements of SY */ + +/* DSDOT (output) DOUBLE PRECISION */ +/* DSDOT double precision dot product (zero if N.LE.0) */ + +/* REFERENCES */ +/* ========== */ + +/* C. L. Lawson, R. J. Hanson, D. R. Kincaid and F. T. */ +/* Krogh, Basic linear algebra subprograms for Fortran */ +/* usage, Algorithm No. 539, Transactions on Mathematical */ +/* Software 5, 3 (September 1979), pp. 308-323. */ + +/* REVISION HISTORY (YYMMDD) */ +/* ========================== */ + +/* 791001 DATE WRITTEN */ +/* 890831 Modified array declarations. (WRB) */ +/* 890831 REVISION DATE from Version 3.2 */ +/* 891214 Prologue converted to Version 4.0 format. (BAB) */ +/* 920310 Corrected definition of LX in DESCRIPTION. (WRB) */ +/* 920501 Reformatted the REFERENCES section. (WRB) */ +/* 070118 Reformat to LAPACK style (JL) */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ + /* Parameter adjustments */ + --sy; + --sx; + + /* Function Body */ + ret_val = 0.; + if (*n <= 0) { + return ret_val; + } + if (*incx == *incy && *incx > 0) { + goto L20; + } + +/* Code for unequal or nonpositive increments. */ + + kx = 1; + ky = 1; + if (*incx < 0) { + kx = (1 - *n) * *incx + 1; + } + if (*incy < 0) { + ky = (1 - *n) * *incy + 1; + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + ret_val += (doublereal) sx[kx] * (doublereal) sy[ky]; + kx += *incx; + ky += *incy; +/* L10: */ + } + return ret_val; + +/* Code for equal, positive, non-unit increments. */ + +L20: + ns = *n * *incx; + i__1 = ns; + i__2 = *incx; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + ret_val += (doublereal) sx[i__] * (doublereal) sy[i__]; +/* L30: */ + } + return ret_val; +} /* _starpu_dsdot_ */ diff --git a/min-dgels/base/BLAS/SRC/dspmv.c b/min-dgels/base/BLAS/SRC/dspmv.c new file mode 100644 index 0000000..8fd1fc0 --- /dev/null +++ b/min-dgels/base/BLAS/SRC/dspmv.c @@ -0,0 +1,312 @@ +/* dspmv.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dspmv_(char *uplo, integer *n, doublereal *alpha, + doublereal *ap, doublereal *x, integer *incx, doublereal *beta, + doublereal *y, integer *incy) +{ + /* System generated locals */ + integer i__1, i__2; + + /* Local variables */ + integer i__, j, k, kk, ix, iy, jx, jy, kx, ky, info; + doublereal temp1, temp2; + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DSPMV performs the matrix-vector operation */ + +/* y := alpha*A*x + beta*y, */ + +/* where alpha and beta are scalars, x and y are n element vectors and */ +/* A is an n by n symmetric matrix, supplied in packed form. */ + +/* Arguments */ +/* ========== */ + +/* UPLO - CHARACTER*1. */ +/* On entry, UPLO specifies whether the upper or lower */ +/* triangular part of the matrix A is supplied in the packed */ +/* array AP as follows: */ + +/* UPLO = 'U' or 'u' The upper triangular part of A is */ +/* supplied in AP. */ + +/* UPLO = 'L' or 'l' The lower triangular part of A is */ +/* supplied in AP. */ + +/* Unchanged on exit. */ + +/* N - INTEGER. */ +/* On entry, N specifies the order of the matrix A. */ +/* N must be at least zero. */ +/* Unchanged on exit. */ + +/* ALPHA - DOUBLE PRECISION. */ +/* On entry, ALPHA specifies the scalar alpha. */ +/* Unchanged on exit. */ + +/* AP - DOUBLE PRECISION array of DIMENSION at least */ +/* ( ( n*( n + 1 ) )/2 ). */ +/* Before entry with UPLO = 'U' or 'u', the array AP must */ +/* contain the upper triangular part of the symmetric matrix */ +/* packed sequentially, column by column, so that AP( 1 ) */ +/* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) */ +/* and a( 2, 2 ) respectively, and so on. */ +/* Before entry with UPLO = 'L' or 'l', the array AP must */ +/* contain the lower triangular part of the symmetric matrix */ +/* packed sequentially, column by column, so that AP( 1 ) */ +/* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) */ +/* and a( 3, 1 ) respectively, and so on. */ +/* Unchanged on exit. */ + +/* X - DOUBLE PRECISION array of dimension at least */ +/* ( 1 + ( n - 1 )*abs( INCX ) ). */ +/* Before entry, the incremented array X must contain the n */ +/* element vector x. */ +/* Unchanged on exit. */ + +/* INCX - INTEGER. */ +/* On entry, INCX specifies the increment for the elements of */ +/* X. INCX must not be zero. */ +/* Unchanged on exit. */ + +/* BETA - DOUBLE PRECISION. */ +/* On entry, BETA specifies the scalar beta. When BETA is */ +/* supplied as zero then Y need not be set on input. */ +/* Unchanged on exit. */ + +/* Y - DOUBLE PRECISION array of dimension at least */ +/* ( 1 + ( n - 1 )*abs( INCY ) ). */ +/* Before entry, the incremented array Y must contain the n */ +/* element vector y. On exit, Y is overwritten by the updated */ +/* vector y. */ + +/* INCY - INTEGER. */ +/* On entry, INCY specifies the increment for the elements of */ +/* Y. INCY must not be zero. */ +/* Unchanged on exit. */ + + +/* Level 2 Blas routine. */ + +/* -- Written on 22-October-1986. */ +/* Jack Dongarra, Argonne National Lab. */ +/* Jeremy Du Croz, Nag Central Office. */ +/* Sven Hammarling, Nag Central Office. */ +/* Richard Hanson, Sandia National Labs. */ + + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --y; + --x; + --ap; + + /* Function Body */ + info = 0; + if (! _starpu_lsame_(uplo, "U") && ! _starpu_lsame_(uplo, "L")) { + info = 1; + } else if (*n < 0) { + info = 2; + } else if (*incx == 0) { + info = 6; + } else if (*incy == 0) { + info = 9; + } + if (info != 0) { + _starpu_xerbla_("DSPMV ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0 || *alpha == 0. && *beta == 1.) { + return 0; + } + +/* Set up the start points in X and Y. */ + + if (*incx > 0) { + kx = 1; + } else { + kx = 1 - (*n - 1) * *incx; + } + if (*incy > 0) { + ky = 1; + } else { + ky = 1 - (*n - 1) * *incy; + } + +/* Start the operations. In this version the elements of the array AP */ +/* are accessed sequentially with one pass through AP. */ + +/* First form y := beta*y. */ + + if (*beta != 1.) { + if (*incy == 1) { + if (*beta == 0.) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + y[i__] = 0.; +/* L10: */ + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + y[i__] = *beta * y[i__]; +/* L20: */ + } + } + } else { + iy = ky; + if (*beta == 0.) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + y[iy] = 0.; + iy += *incy; +/* L30: */ + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + y[iy] = *beta * y[iy]; + iy += *incy; +/* L40: */ + } + } + } + } + if (*alpha == 0.) { + return 0; + } + kk = 1; + if (_starpu_lsame_(uplo, "U")) { + +/* Form y when AP contains the upper triangle. */ + + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp1 = *alpha * x[j]; + temp2 = 0.; + k = kk; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + y[i__] += temp1 * ap[k]; + temp2 += ap[k] * x[i__]; + ++k; +/* L50: */ + } + y[j] = y[j] + temp1 * ap[kk + j - 1] + *alpha * temp2; + kk += j; +/* L60: */ + } + } else { + jx = kx; + jy = ky; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp1 = *alpha * x[jx]; + temp2 = 0.; + ix = kx; + iy = ky; + i__2 = kk + j - 2; + for (k = kk; k <= i__2; ++k) { + y[iy] += temp1 * ap[k]; + temp2 += ap[k] * x[ix]; + ix += *incx; + iy += *incy; +/* L70: */ + } + y[jy] = y[jy] + temp1 * ap[kk + j - 1] + *alpha * temp2; + jx += *incx; + jy += *incy; + kk += j; +/* L80: */ + } + } + } else { + +/* Form y when AP contains the lower triangle. */ + + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp1 = *alpha * x[j]; + temp2 = 0.; + y[j] += temp1 * ap[kk]; + k = kk + 1; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + y[i__] += temp1 * ap[k]; + temp2 += ap[k] * x[i__]; + ++k; +/* L90: */ + } + y[j] += *alpha * temp2; + kk += *n - j + 1; +/* L100: */ + } + } else { + jx = kx; + jy = ky; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp1 = *alpha * x[jx]; + temp2 = 0.; + y[jy] += temp1 * ap[kk]; + ix = jx; + iy = jy; + i__2 = kk + *n - j; + for (k = kk + 1; k <= i__2; ++k) { + ix += *incx; + iy += *incy; + y[iy] += temp1 * ap[k]; + temp2 += ap[k] * x[ix]; +/* L110: */ + } + y[jy] += *alpha * temp2; + jx += *incx; + jy += *incy; + kk += *n - j + 1; +/* L120: */ + } + } + } + + return 0; + +/* End of DSPMV . */ + +} /* _starpu_dspmv_ */ diff --git a/min-dgels/base/BLAS/SRC/dspr.c b/min-dgels/base/BLAS/SRC/dspr.c new file mode 100644 index 0000000..24dc708 --- /dev/null +++ b/min-dgels/base/BLAS/SRC/dspr.c @@ -0,0 +1,237 @@ +/* dspr.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dspr_(char *uplo, integer *n, doublereal *alpha, + doublereal *x, integer *incx, doublereal *ap) +{ + /* System generated locals */ + integer i__1, i__2; + + /* Local variables */ + integer i__, j, k, kk, ix, jx, kx, info; + doublereal temp; + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DSPR performs the symmetric rank 1 operation */ + +/* A := alpha*x*x' + A, */ + +/* where alpha is a real scalar, x is an n element vector and A is an */ +/* n by n symmetric matrix, supplied in packed form. */ + +/* Arguments */ +/* ========== */ + +/* UPLO - CHARACTER*1. */ +/* On entry, UPLO specifies whether the upper or lower */ +/* triangular part of the matrix A is supplied in the packed */ +/* array AP as follows: */ + +/* UPLO = 'U' or 'u' The upper triangular part of A is */ +/* supplied in AP. */ + +/* UPLO = 'L' or 'l' The lower triangular part of A is */ +/* supplied in AP. */ + +/* Unchanged on exit. */ + +/* N - INTEGER. */ +/* On entry, N specifies the order of the matrix A. */ +/* N must be at least zero. */ +/* Unchanged on exit. */ + +/* ALPHA - DOUBLE PRECISION. */ +/* On entry, ALPHA specifies the scalar alpha. */ +/* Unchanged on exit. */ + +/* X - DOUBLE PRECISION array of dimension at least */ +/* ( 1 + ( n - 1 )*abs( INCX ) ). */ +/* Before entry, the incremented array X must contain the n */ +/* element vector x. */ +/* Unchanged on exit. */ + +/* INCX - INTEGER. */ +/* On entry, INCX specifies the increment for the elements of */ +/* X. INCX must not be zero. */ +/* Unchanged on exit. */ + +/* AP - DOUBLE PRECISION array of DIMENSION at least */ +/* ( ( n*( n + 1 ) )/2 ). */ +/* Before entry with UPLO = 'U' or 'u', the array AP must */ +/* contain the upper triangular part of the symmetric matrix */ +/* packed sequentially, column by column, so that AP( 1 ) */ +/* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) */ +/* and a( 2, 2 ) respectively, and so on. On exit, the array */ +/* AP is overwritten by the upper triangular part of the */ +/* updated matrix. */ +/* Before entry with UPLO = 'L' or 'l', the array AP must */ +/* contain the lower triangular part of the symmetric matrix */ +/* packed sequentially, column by column, so that AP( 1 ) */ +/* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) */ +/* and a( 3, 1 ) respectively, and so on. On exit, the array */ +/* AP is overwritten by the lower triangular part of the */ +/* updated matrix. */ + + +/* Level 2 Blas routine. */ + +/* -- Written on 22-October-1986. */ +/* Jack Dongarra, Argonne National Lab. */ +/* Jeremy Du Croz, Nag Central Office. */ +/* Sven Hammarling, Nag Central Office. */ +/* Richard Hanson, Sandia National Labs. */ + + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --ap; + --x; + + /* Function Body */ + info = 0; + if (! _starpu_lsame_(uplo, "U") && ! _starpu_lsame_(uplo, "L")) { + info = 1; + } else if (*n < 0) { + info = 2; + } else if (*incx == 0) { + info = 5; + } + if (info != 0) { + _starpu_xerbla_("DSPR ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0 || *alpha == 0.) { + return 0; + } + +/* Set the start point in X if the increment is not unity. */ + + if (*incx <= 0) { + kx = 1 - (*n - 1) * *incx; + } else if (*incx != 1) { + kx = 1; + } + +/* Start the operations. In this version the elements of the array AP */ +/* are accessed sequentially with one pass through AP. */ + + kk = 1; + if (_starpu_lsame_(uplo, "U")) { + +/* Form A when upper triangle is stored in AP. */ + + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[j] != 0.) { + temp = *alpha * x[j]; + k = kk; + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + ap[k] += x[i__] * temp; + ++k; +/* L10: */ + } + } + kk += j; +/* L20: */ + } + } else { + jx = kx; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[jx] != 0.) { + temp = *alpha * x[jx]; + ix = kx; + i__2 = kk + j - 1; + for (k = kk; k <= i__2; ++k) { + ap[k] += x[ix] * temp; + ix += *incx; +/* L30: */ + } + } + jx += *incx; + kk += j; +/* L40: */ + } + } + } else { + +/* Form A when lower triangle is stored in AP. */ + + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[j] != 0.) { + temp = *alpha * x[j]; + k = kk; + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + ap[k] += x[i__] * temp; + ++k; +/* L50: */ + } + } + kk = kk + *n - j + 1; +/* L60: */ + } + } else { + jx = kx; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[jx] != 0.) { + temp = *alpha * x[jx]; + ix = jx; + i__2 = kk + *n - j; + for (k = kk; k <= i__2; ++k) { + ap[k] += x[ix] * temp; + ix += *incx; +/* L70: */ + } + } + jx += *incx; + kk = kk + *n - j + 1; +/* L80: */ + } + } + } + + return 0; + +/* End of DSPR . */ + +} /* _starpu_dspr_ */ diff --git a/min-dgels/base/BLAS/SRC/dspr2.c b/min-dgels/base/BLAS/SRC/dspr2.c new file mode 100644 index 0000000..9d5bce9 --- /dev/null +++ b/min-dgels/base/BLAS/SRC/dspr2.c @@ -0,0 +1,270 @@ +/* dspr2.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dspr2_(char *uplo, integer *n, doublereal *alpha, + doublereal *x, integer *incx, doublereal *y, integer *incy, + doublereal *ap) +{ + /* System generated locals */ + integer i__1, i__2; + + /* Local variables */ + integer i__, j, k, kk, ix, iy, jx, jy, kx, ky, info; + doublereal temp1, temp2; + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DSPR2 performs the symmetric rank 2 operation */ + +/* A := alpha*x*y' + alpha*y*x' + A, */ + +/* where alpha is a scalar, x and y are n element vectors and A is an */ +/* n by n symmetric matrix, supplied in packed form. */ + +/* Arguments */ +/* ========== */ + +/* UPLO - CHARACTER*1. */ +/* On entry, UPLO specifies whether the upper or lower */ +/* triangular part of the matrix A is supplied in the packed */ +/* array AP as follows: */ + +/* UPLO = 'U' or 'u' The upper triangular part of A is */ +/* supplied in AP. */ + +/* UPLO = 'L' or 'l' The lower triangular part of A is */ +/* supplied in AP. */ + +/* Unchanged on exit. */ + +/* N - INTEGER. */ +/* On entry, N specifies the order of the matrix A. */ +/* N must be at least zero. */ +/* Unchanged on exit. */ + +/* ALPHA - DOUBLE PRECISION. */ +/* On entry, ALPHA specifies the scalar alpha. */ +/* Unchanged on exit. */ + +/* X - DOUBLE PRECISION array of dimension at least */ +/* ( 1 + ( n - 1 )*abs( INCX ) ). */ +/* Before entry, the incremented array X must contain the n */ +/* element vector x. */ +/* Unchanged on exit. */ + +/* INCX - INTEGER. */ +/* On entry, INCX specifies the increment for the elements of */ +/* X. INCX must not be zero. */ +/* Unchanged on exit. */ + +/* Y - DOUBLE PRECISION array of dimension at least */ +/* ( 1 + ( n - 1 )*abs( INCY ) ). */ +/* Before entry, the incremented array Y must contain the n */ +/* element vector y. */ +/* Unchanged on exit. */ + +/* INCY - INTEGER. */ +/* On entry, INCY specifies the increment for the elements of */ +/* Y. INCY must not be zero. */ +/* Unchanged on exit. */ + +/* AP - DOUBLE PRECISION array of DIMENSION at least */ +/* ( ( n*( n + 1 ) )/2 ). */ +/* Before entry with UPLO = 'U' or 'u', the array AP must */ +/* contain the upper triangular part of the symmetric matrix */ +/* packed sequentially, column by column, so that AP( 1 ) */ +/* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) */ +/* and a( 2, 2 ) respectively, and so on. On exit, the array */ +/* AP is overwritten by the upper triangular part of the */ +/* updated matrix. */ +/* Before entry with UPLO = 'L' or 'l', the array AP must */ +/* contain the lower triangular part of the symmetric matrix */ +/* packed sequentially, column by column, so that AP( 1 ) */ +/* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) */ +/* and a( 3, 1 ) respectively, and so on. On exit, the array */ +/* AP is overwritten by the lower triangular part of the */ +/* updated matrix. */ + + +/* Level 2 Blas routine. */ + +/* -- Written on 22-October-1986. */ +/* Jack Dongarra, Argonne National Lab. */ +/* Jeremy Du Croz, Nag Central Office. */ +/* Sven Hammarling, Nag Central Office. */ +/* Richard Hanson, Sandia National Labs. */ + + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --ap; + --y; + --x; + + /* Function Body */ + info = 0; + if (! _starpu_lsame_(uplo, "U") && ! _starpu_lsame_(uplo, "L")) { + info = 1; + } else if (*n < 0) { + info = 2; + } else if (*incx == 0) { + info = 5; + } else if (*incy == 0) { + info = 7; + } + if (info != 0) { + _starpu_xerbla_("DSPR2 ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0 || *alpha == 0.) { + return 0; + } + +/* Set up the start points in X and Y if the increments are not both */ +/* unity. */ + + if (*incx != 1 || *incy != 1) { + if (*incx > 0) { + kx = 1; + } else { + kx = 1 - (*n - 1) * *incx; + } + if (*incy > 0) { + ky = 1; + } else { + ky = 1 - (*n - 1) * *incy; + } + jx = kx; + jy = ky; + } + +/* Start the operations. In this version the elements of the array AP */ +/* are accessed sequentially with one pass through AP. */ + + kk = 1; + if (_starpu_lsame_(uplo, "U")) { + +/* Form A when upper triangle is stored in AP. */ + + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[j] != 0. || y[j] != 0.) { + temp1 = *alpha * y[j]; + temp2 = *alpha * x[j]; + k = kk; + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + ap[k] = ap[k] + x[i__] * temp1 + y[i__] * temp2; + ++k; +/* L10: */ + } + } + kk += j; +/* L20: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[jx] != 0. || y[jy] != 0.) { + temp1 = *alpha * y[jy]; + temp2 = *alpha * x[jx]; + ix = kx; + iy = ky; + i__2 = kk + j - 1; + for (k = kk; k <= i__2; ++k) { + ap[k] = ap[k] + x[ix] * temp1 + y[iy] * temp2; + ix += *incx; + iy += *incy; +/* L30: */ + } + } + jx += *incx; + jy += *incy; + kk += j; +/* L40: */ + } + } + } else { + +/* Form A when lower triangle is stored in AP. */ + + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[j] != 0. || y[j] != 0.) { + temp1 = *alpha * y[j]; + temp2 = *alpha * x[j]; + k = kk; + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + ap[k] = ap[k] + x[i__] * temp1 + y[i__] * temp2; + ++k; +/* L50: */ + } + } + kk = kk + *n - j + 1; +/* L60: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[jx] != 0. || y[jy] != 0.) { + temp1 = *alpha * y[jy]; + temp2 = *alpha * x[jx]; + ix = jx; + iy = jy; + i__2 = kk + *n - j; + for (k = kk; k <= i__2; ++k) { + ap[k] = ap[k] + x[ix] * temp1 + y[iy] * temp2; + ix += *incx; + iy += *incy; +/* L70: */ + } + } + jx += *incx; + jy += *incy; + kk = kk + *n - j + 1; +/* L80: */ + } + } + } + + return 0; + +/* End of DSPR2 . */ + +} /* _starpu_dspr2_ */ diff --git a/min-dgels/base/BLAS/SRC/dswap.c b/min-dgels/base/BLAS/SRC/dswap.c new file mode 100644 index 0000000..2e36179 --- /dev/null +++ b/min-dgels/base/BLAS/SRC/dswap.c @@ -0,0 +1,114 @@ +/* dswap.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dswap_(integer *n, doublereal *dx, integer *incx, + doublereal *dy, integer *incy) +{ + /* System generated locals */ + integer i__1; + + /* Local variables */ + integer i__, m, ix, iy, mp1; + doublereal dtemp; + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* interchanges two vectors. */ +/* uses unrolled loops for increments equal one. */ +/* jack dongarra, linpack, 3/11/78. */ +/* modified 12/3/93, array(1) declarations changed to array(*) */ + + +/* .. Local Scalars .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ + /* Parameter adjustments */ + --dy; + --dx; + + /* Function Body */ + if (*n <= 0) { + return 0; + } + if (*incx == 1 && *incy == 1) { + goto L20; + } + +/* code for unequal increments or equal increments not equal */ +/* to 1 */ + + ix = 1; + iy = 1; + if (*incx < 0) { + ix = (-(*n) + 1) * *incx + 1; + } + if (*incy < 0) { + iy = (-(*n) + 1) * *incy + 1; + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + dtemp = dx[ix]; + dx[ix] = dy[iy]; + dy[iy] = dtemp; + ix += *incx; + iy += *incy; +/* L10: */ + } + return 0; + +/* code for both increments equal to 1 */ + + +/* clean-up loop */ + +L20: + m = *n % 3; + if (m == 0) { + goto L40; + } + i__1 = m; + for (i__ = 1; i__ <= i__1; ++i__) { + dtemp = dx[i__]; + dx[i__] = dy[i__]; + dy[i__] = dtemp; +/* L30: */ + } + if (*n < 3) { + return 0; + } +L40: + mp1 = m + 1; + i__1 = *n; + for (i__ = mp1; i__ <= i__1; i__ += 3) { + dtemp = dx[i__]; + dx[i__] = dy[i__]; + dy[i__] = dtemp; + dtemp = dx[i__ + 1]; + dx[i__ + 1] = dy[i__ + 1]; + dy[i__ + 1] = dtemp; + dtemp = dx[i__ + 2]; + dx[i__ + 2] = dy[i__ + 2]; + dy[i__ + 2] = dtemp; +/* L50: */ + } + return 0; +} /* _starpu_dswap_ */ diff --git a/min-dgels/base/BLAS/SRC/dsymm.c b/min-dgels/base/BLAS/SRC/dsymm.c new file mode 100644 index 0000000..d91c85a --- /dev/null +++ b/min-dgels/base/BLAS/SRC/dsymm.c @@ -0,0 +1,362 @@ +/* dsymm.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dsymm_(char *side, char *uplo, integer *m, integer *n, + doublereal *alpha, doublereal *a, integer *lda, doublereal *b, + integer *ldb, doublereal *beta, doublereal *c__, integer *ldc) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, + i__3; + + /* Local variables */ + integer i__, j, k, info; + doublereal temp1, temp2; + extern logical _starpu_lsame_(char *, char *); + integer nrowa; + logical upper; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DSYMM performs one of the matrix-matrix operations */ + +/* C := alpha*A*B + beta*C, */ + +/* or */ + +/* C := alpha*B*A + beta*C, */ + +/* where alpha and beta are scalars, A is a symmetric matrix and B and */ +/* C are m by n matrices. */ + +/* Arguments */ +/* ========== */ + +/* SIDE - CHARACTER*1. */ +/* On entry, SIDE specifies whether the symmetric matrix A */ +/* appears on the left or right in the operation as follows: */ + +/* SIDE = 'L' or 'l' C := alpha*A*B + beta*C, */ + +/* SIDE = 'R' or 'r' C := alpha*B*A + beta*C, */ + +/* Unchanged on exit. */ + +/* UPLO - CHARACTER*1. */ +/* On entry, UPLO specifies whether the upper or lower */ +/* triangular part of the symmetric matrix A is to be */ +/* referenced as follows: */ + +/* UPLO = 'U' or 'u' Only the upper triangular part of the */ +/* symmetric matrix is to be referenced. */ + +/* UPLO = 'L' or 'l' Only the lower triangular part of the */ +/* symmetric matrix is to be referenced. */ + +/* Unchanged on exit. */ + +/* M - INTEGER. */ +/* On entry, M specifies the number of rows of the matrix C. */ +/* M must be at least zero. */ +/* Unchanged on exit. */ + +/* N - INTEGER. */ +/* On entry, N specifies the number of columns of the matrix C. */ +/* N must be at least zero. */ +/* Unchanged on exit. */ + +/* ALPHA - DOUBLE PRECISION. */ +/* On entry, ALPHA specifies the scalar alpha. */ +/* Unchanged on exit. */ + +/* A - DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is */ +/* m when SIDE = 'L' or 'l' and is n otherwise. */ +/* Before entry with SIDE = 'L' or 'l', the m by m part of */ +/* the array A must contain the symmetric matrix, such that */ +/* when UPLO = 'U' or 'u', the leading m by m upper triangular */ +/* part of the array A must contain the upper triangular part */ +/* of the symmetric matrix and the strictly lower triangular */ +/* part of A is not referenced, and when UPLO = 'L' or 'l', */ +/* the leading m by m lower triangular part of the array A */ +/* must contain the lower triangular part of the symmetric */ +/* matrix and the strictly upper triangular part of A is not */ +/* referenced. */ +/* Before entry with SIDE = 'R' or 'r', the n by n part of */ +/* the array A must contain the symmetric matrix, such that */ +/* when UPLO = 'U' or 'u', the leading n by n upper triangular */ +/* part of the array A must contain the upper triangular part */ +/* of the symmetric matrix and the strictly lower triangular */ +/* part of A is not referenced, and when UPLO = 'L' or 'l', */ +/* the leading n by n lower triangular part of the array A */ +/* must contain the lower triangular part of the symmetric */ +/* matrix and the strictly upper triangular part of A is not */ +/* referenced. */ +/* Unchanged on exit. */ + +/* LDA - INTEGER. */ +/* On entry, LDA specifies the first dimension of A as declared */ +/* in the calling (sub) program. When SIDE = 'L' or 'l' then */ +/* LDA must be at least max( 1, m ), otherwise LDA must be at */ +/* least max( 1, n ). */ +/* Unchanged on exit. */ + +/* B - DOUBLE PRECISION array of DIMENSION ( LDB, n ). */ +/* Before entry, the leading m by n part of the array B must */ +/* contain the matrix B. */ +/* Unchanged on exit. */ + +/* LDB - INTEGER. */ +/* On entry, LDB specifies the first dimension of B as declared */ +/* in the calling (sub) program. LDB must be at least */ +/* max( 1, m ). */ +/* Unchanged on exit. */ + +/* BETA - DOUBLE PRECISION. */ +/* On entry, BETA specifies the scalar beta. When BETA is */ +/* supplied as zero then C need not be set on input. */ +/* Unchanged on exit. */ + +/* C - DOUBLE PRECISION array of DIMENSION ( LDC, n ). */ +/* Before entry, the leading m by n part of the array C must */ +/* contain the matrix C, except when beta is zero, in which */ +/* case C need not be set on entry. */ +/* On exit, the array C is overwritten by the m by n updated */ +/* matrix. */ + +/* LDC - INTEGER. */ +/* On entry, LDC specifies the first dimension of C as declared */ +/* in the calling (sub) program. LDC must be at least */ +/* max( 1, m ). */ +/* Unchanged on exit. */ + + +/* Level 3 Blas routine. */ + +/* -- Written on 8-February-1989. */ +/* Jack Dongarra, Argonne National Laboratory. */ +/* Iain Duff, AERE Harwell. */ +/* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ +/* Sven Hammarling, Numerical Algorithms Group Ltd. */ + + +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Parameters .. */ +/* .. */ + +/* Set NROWA as the number of rows of A. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + + /* Function Body */ + if (_starpu_lsame_(side, "L")) { + nrowa = *m; + } else { + nrowa = *n; + } + upper = _starpu_lsame_(uplo, "U"); + +/* Test the input parameters. */ + + info = 0; + if (! _starpu_lsame_(side, "L") && ! _starpu_lsame_(side, "R")) { + info = 1; + } else if (! upper && ! _starpu_lsame_(uplo, "L")) { + info = 2; + } else if (*m < 0) { + info = 3; + } else if (*n < 0) { + info = 4; + } else if (*lda < max(1,nrowa)) { + info = 7; + } else if (*ldb < max(1,*m)) { + info = 9; + } else if (*ldc < max(1,*m)) { + info = 12; + } + if (info != 0) { + _starpu_xerbla_("DSYMM ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*m == 0 || *n == 0 || *alpha == 0. && *beta == 1.) { + return 0; + } + +/* And when alpha.eq.zero. */ + + if (*alpha == 0.) { + if (*beta == 0.) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = 0.; +/* L10: */ + } +/* L20: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; +/* L30: */ + } +/* L40: */ + } + } + return 0; + } + +/* Start the operations. */ + + if (_starpu_lsame_(side, "L")) { + +/* Form C := alpha*A*B + beta*C. */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp1 = *alpha * b[i__ + j * b_dim1]; + temp2 = 0.; + i__3 = i__ - 1; + for (k = 1; k <= i__3; ++k) { + c__[k + j * c_dim1] += temp1 * a[k + i__ * a_dim1]; + temp2 += b[k + j * b_dim1] * a[k + i__ * a_dim1]; +/* L50: */ + } + if (*beta == 0.) { + c__[i__ + j * c_dim1] = temp1 * a[i__ + i__ * a_dim1] + + *alpha * temp2; + } else { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1] + + temp1 * a[i__ + i__ * a_dim1] + *alpha * + temp2; + } +/* L60: */ + } +/* L70: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + for (i__ = *m; i__ >= 1; --i__) { + temp1 = *alpha * b[i__ + j * b_dim1]; + temp2 = 0.; + i__2 = *m; + for (k = i__ + 1; k <= i__2; ++k) { + c__[k + j * c_dim1] += temp1 * a[k + i__ * a_dim1]; + temp2 += b[k + j * b_dim1] * a[k + i__ * a_dim1]; +/* L80: */ + } + if (*beta == 0.) { + c__[i__ + j * c_dim1] = temp1 * a[i__ + i__ * a_dim1] + + *alpha * temp2; + } else { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1] + + temp1 * a[i__ + i__ * a_dim1] + *alpha * + temp2; + } +/* L90: */ + } +/* L100: */ + } + } + } else { + +/* Form C := alpha*B*A + beta*C. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp1 = *alpha * a[j + j * a_dim1]; + if (*beta == 0.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = temp1 * b[i__ + j * b_dim1]; +/* L110: */ + } + } else { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1] + + temp1 * b[i__ + j * b_dim1]; +/* L120: */ + } + } + i__2 = j - 1; + for (k = 1; k <= i__2; ++k) { + if (upper) { + temp1 = *alpha * a[k + j * a_dim1]; + } else { + temp1 = *alpha * a[j + k * a_dim1]; + } + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + c__[i__ + j * c_dim1] += temp1 * b[i__ + k * b_dim1]; +/* L130: */ + } +/* L140: */ + } + i__2 = *n; + for (k = j + 1; k <= i__2; ++k) { + if (upper) { + temp1 = *alpha * a[j + k * a_dim1]; + } else { + temp1 = *alpha * a[k + j * a_dim1]; + } + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + c__[i__ + j * c_dim1] += temp1 * b[i__ + k * b_dim1]; +/* L150: */ + } +/* L160: */ + } +/* L170: */ + } + } + + return 0; + +/* End of DSYMM . */ + +} /* _starpu_dsymm_ */ diff --git a/min-dgels/base/BLAS/SRC/dsymv.c b/min-dgels/base/BLAS/SRC/dsymv.c new file mode 100644 index 0000000..0df26e5 --- /dev/null +++ b/min-dgels/base/BLAS/SRC/dsymv.c @@ -0,0 +1,313 @@ +/* dsymv.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dsymv_(char *uplo, integer *n, doublereal *alpha, + doublereal *a, integer *lda, doublereal *x, integer *incx, doublereal + *beta, doublereal *y, integer *incy) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + + /* Local variables */ + integer i__, j, ix, iy, jx, jy, kx, ky, info; + doublereal temp1, temp2; + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DSYMV performs the matrix-vector operation */ + +/* y := alpha*A*x + beta*y, */ + +/* where alpha and beta are scalars, x and y are n element vectors and */ +/* A is an n by n symmetric matrix. */ + +/* Arguments */ +/* ========== */ + +/* UPLO - CHARACTER*1. */ +/* On entry, UPLO specifies whether the upper or lower */ +/* triangular part of the array A is to be referenced as */ +/* follows: */ + +/* UPLO = 'U' or 'u' Only the upper triangular part of A */ +/* is to be referenced. */ + +/* UPLO = 'L' or 'l' Only the lower triangular part of A */ +/* is to be referenced. */ + +/* Unchanged on exit. */ + +/* N - INTEGER. */ +/* On entry, N specifies the order of the matrix A. */ +/* N must be at least zero. */ +/* Unchanged on exit. */ + +/* ALPHA - DOUBLE PRECISION. */ +/* On entry, ALPHA specifies the scalar alpha. */ +/* Unchanged on exit. */ + +/* A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). */ +/* Before entry with UPLO = 'U' or 'u', the leading n by n */ +/* upper triangular part of the array A must contain the upper */ +/* triangular part of the symmetric matrix and the strictly */ +/* lower triangular part of A is not referenced. */ +/* Before entry with UPLO = 'L' or 'l', the leading n by n */ +/* lower triangular part of the array A must contain the lower */ +/* triangular part of the symmetric matrix and the strictly */ +/* upper triangular part of A is not referenced. */ +/* Unchanged on exit. */ + +/* LDA - INTEGER. */ +/* On entry, LDA specifies the first dimension of A as declared */ +/* in the calling (sub) program. LDA must be at least */ +/* max( 1, n ). */ +/* Unchanged on exit. */ + +/* X - DOUBLE PRECISION array of dimension at least */ +/* ( 1 + ( n - 1 )*abs( INCX ) ). */ +/* Before entry, the incremented array X must contain the n */ +/* element vector x. */ +/* Unchanged on exit. */ + +/* INCX - INTEGER. */ +/* On entry, INCX specifies the increment for the elements of */ +/* X. INCX must not be zero. */ +/* Unchanged on exit. */ + +/* BETA - DOUBLE PRECISION. */ +/* On entry, BETA specifies the scalar beta. When BETA is */ +/* supplied as zero then Y need not be set on input. */ +/* Unchanged on exit. */ + +/* Y - DOUBLE PRECISION array of dimension at least */ +/* ( 1 + ( n - 1 )*abs( INCY ) ). */ +/* Before entry, the incremented array Y must contain the n */ +/* element vector y. On exit, Y is overwritten by the updated */ +/* vector y. */ + +/* INCY - INTEGER. */ +/* On entry, INCY specifies the increment for the elements of */ +/* Y. INCY must not be zero. */ +/* Unchanged on exit. */ + + +/* Level 2 Blas routine. */ + +/* -- Written on 22-October-1986. */ +/* Jack Dongarra, Argonne National Lab. */ +/* Jeremy Du Croz, Nag Central Office. */ +/* Sven Hammarling, Nag Central Office. */ +/* Richard Hanson, Sandia National Labs. */ + + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --x; + --y; + + /* Function Body */ + info = 0; + if (! _starpu_lsame_(uplo, "U") && ! _starpu_lsame_(uplo, "L")) { + info = 1; + } else if (*n < 0) { + info = 2; + } else if (*lda < max(1,*n)) { + info = 5; + } else if (*incx == 0) { + info = 7; + } else if (*incy == 0) { + info = 10; + } + if (info != 0) { + _starpu_xerbla_("DSYMV ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0 || *alpha == 0. && *beta == 1.) { + return 0; + } + +/* Set up the start points in X and Y. */ + + if (*incx > 0) { + kx = 1; + } else { + kx = 1 - (*n - 1) * *incx; + } + if (*incy > 0) { + ky = 1; + } else { + ky = 1 - (*n - 1) * *incy; + } + +/* Start the operations. In this version the elements of A are */ +/* accessed sequentially with one pass through the triangular part */ +/* of A. */ + +/* First form y := beta*y. */ + + if (*beta != 1.) { + if (*incy == 1) { + if (*beta == 0.) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + y[i__] = 0.; +/* L10: */ + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + y[i__] = *beta * y[i__]; +/* L20: */ + } + } + } else { + iy = ky; + if (*beta == 0.) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + y[iy] = 0.; + iy += *incy; +/* L30: */ + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + y[iy] = *beta * y[iy]; + iy += *incy; +/* L40: */ + } + } + } + } + if (*alpha == 0.) { + return 0; + } + if (_starpu_lsame_(uplo, "U")) { + +/* Form y when A is stored in upper triangle. */ + + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp1 = *alpha * x[j]; + temp2 = 0.; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + y[i__] += temp1 * a[i__ + j * a_dim1]; + temp2 += a[i__ + j * a_dim1] * x[i__]; +/* L50: */ + } + y[j] = y[j] + temp1 * a[j + j * a_dim1] + *alpha * temp2; +/* L60: */ + } + } else { + jx = kx; + jy = ky; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp1 = *alpha * x[jx]; + temp2 = 0.; + ix = kx; + iy = ky; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + y[iy] += temp1 * a[i__ + j * a_dim1]; + temp2 += a[i__ + j * a_dim1] * x[ix]; + ix += *incx; + iy += *incy; +/* L70: */ + } + y[jy] = y[jy] + temp1 * a[j + j * a_dim1] + *alpha * temp2; + jx += *incx; + jy += *incy; +/* L80: */ + } + } + } else { + +/* Form y when A is stored in lower triangle. */ + + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp1 = *alpha * x[j]; + temp2 = 0.; + y[j] += temp1 * a[j + j * a_dim1]; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + y[i__] += temp1 * a[i__ + j * a_dim1]; + temp2 += a[i__ + j * a_dim1] * x[i__]; +/* L90: */ + } + y[j] += *alpha * temp2; +/* L100: */ + } + } else { + jx = kx; + jy = ky; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp1 = *alpha * x[jx]; + temp2 = 0.; + y[jy] += temp1 * a[j + j * a_dim1]; + ix = jx; + iy = jy; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + ix += *incx; + iy += *incy; + y[iy] += temp1 * a[i__ + j * a_dim1]; + temp2 += a[i__ + j * a_dim1] * x[ix]; +/* L110: */ + } + y[jy] += *alpha * temp2; + jx += *incx; + jy += *incy; +/* L120: */ + } + } + } + + return 0; + +/* End of DSYMV . */ + +} /* _starpu_dsymv_ */ diff --git a/min-dgels/base/BLAS/SRC/dsyr.c b/min-dgels/base/BLAS/SRC/dsyr.c new file mode 100644 index 0000000..a4e6c00 --- /dev/null +++ b/min-dgels/base/BLAS/SRC/dsyr.c @@ -0,0 +1,238 @@ +/* dsyr.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dsyr_(char *uplo, integer *n, doublereal *alpha, + doublereal *x, integer *incx, doublereal *a, integer *lda) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + + /* Local variables */ + integer i__, j, ix, jx, kx, info; + doublereal temp; + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DSYR performs the symmetric rank 1 operation */ + +/* A := alpha*x*x' + A, */ + +/* where alpha is a real scalar, x is an n element vector and A is an */ +/* n by n symmetric matrix. */ + +/* Arguments */ +/* ========== */ + +/* UPLO - CHARACTER*1. */ +/* On entry, UPLO specifies whether the upper or lower */ +/* triangular part of the array A is to be referenced as */ +/* follows: */ + +/* UPLO = 'U' or 'u' Only the upper triangular part of A */ +/* is to be referenced. */ + +/* UPLO = 'L' or 'l' Only the lower triangular part of A */ +/* is to be referenced. */ + +/* Unchanged on exit. */ + +/* N - INTEGER. */ +/* On entry, N specifies the order of the matrix A. */ +/* N must be at least zero. */ +/* Unchanged on exit. */ + +/* ALPHA - DOUBLE PRECISION. */ +/* On entry, ALPHA specifies the scalar alpha. */ +/* Unchanged on exit. */ + +/* X - DOUBLE PRECISION array of dimension at least */ +/* ( 1 + ( n - 1 )*abs( INCX ) ). */ +/* Before entry, the incremented array X must contain the n */ +/* element vector x. */ +/* Unchanged on exit. */ + +/* INCX - INTEGER. */ +/* On entry, INCX specifies the increment for the elements of */ +/* X. INCX must not be zero. */ +/* Unchanged on exit. */ + +/* A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). */ +/* Before entry with UPLO = 'U' or 'u', the leading n by n */ +/* upper triangular part of the array A must contain the upper */ +/* triangular part of the symmetric matrix and the strictly */ +/* lower triangular part of A is not referenced. On exit, the */ +/* upper triangular part of the array A is overwritten by the */ +/* upper triangular part of the updated matrix. */ +/* Before entry with UPLO = 'L' or 'l', the leading n by n */ +/* lower triangular part of the array A must contain the lower */ +/* triangular part of the symmetric matrix and the strictly */ +/* upper triangular part of A is not referenced. On exit, the */ +/* lower triangular part of the array A is overwritten by the */ +/* lower triangular part of the updated matrix. */ + +/* LDA - INTEGER. */ +/* On entry, LDA specifies the first dimension of A as declared */ +/* in the calling (sub) program. LDA must be at least */ +/* max( 1, n ). */ +/* Unchanged on exit. */ + + +/* Level 2 Blas routine. */ + +/* -- Written on 22-October-1986. */ +/* Jack Dongarra, Argonne National Lab. */ +/* Jeremy Du Croz, Nag Central Office. */ +/* Sven Hammarling, Nag Central Office. */ +/* Richard Hanson, Sandia National Labs. */ + + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --x; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + info = 0; + if (! _starpu_lsame_(uplo, "U") && ! _starpu_lsame_(uplo, "L")) { + info = 1; + } else if (*n < 0) { + info = 2; + } else if (*incx == 0) { + info = 5; + } else if (*lda < max(1,*n)) { + info = 7; + } + if (info != 0) { + _starpu_xerbla_("DSYR ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0 || *alpha == 0.) { + return 0; + } + +/* Set the start point in X if the increment is not unity. */ + + if (*incx <= 0) { + kx = 1 - (*n - 1) * *incx; + } else if (*incx != 1) { + kx = 1; + } + +/* Start the operations. In this version the elements of A are */ +/* accessed sequentially with one pass through the triangular part */ +/* of A. */ + + if (_starpu_lsame_(uplo, "U")) { + +/* Form A when A is stored in upper triangle. */ + + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[j] != 0.) { + temp = *alpha * x[j]; + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] += x[i__] * temp; +/* L10: */ + } + } +/* L20: */ + } + } else { + jx = kx; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[jx] != 0.) { + temp = *alpha * x[jx]; + ix = kx; + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] += x[ix] * temp; + ix += *incx; +/* L30: */ + } + } + jx += *incx; +/* L40: */ + } + } + } else { + +/* Form A when A is stored in lower triangle. */ + + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[j] != 0.) { + temp = *alpha * x[j]; + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] += x[i__] * temp; +/* L50: */ + } + } +/* L60: */ + } + } else { + jx = kx; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[jx] != 0.) { + temp = *alpha * x[jx]; + ix = jx; + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] += x[ix] * temp; + ix += *incx; +/* L70: */ + } + } + jx += *incx; +/* L80: */ + } + } + } + + return 0; + +/* End of DSYR . */ + +} /* _starpu_dsyr_ */ diff --git a/min-dgels/base/BLAS/SRC/dsyr2.c b/min-dgels/base/BLAS/SRC/dsyr2.c new file mode 100644 index 0000000..7aa28f9 --- /dev/null +++ b/min-dgels/base/BLAS/SRC/dsyr2.c @@ -0,0 +1,275 @@ +/* dsyr2.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dsyr2_(char *uplo, integer *n, doublereal *alpha, + doublereal *x, integer *incx, doublereal *y, integer *incy, + doublereal *a, integer *lda) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + + /* Local variables */ + integer i__, j, ix, iy, jx, jy, kx, ky, info; + doublereal temp1, temp2; + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DSYR2 performs the symmetric rank 2 operation */ + +/* A := alpha*x*y' + alpha*y*x' + A, */ + +/* where alpha is a scalar, x and y are n element vectors and A is an n */ +/* by n symmetric matrix. */ + +/* Arguments */ +/* ========== */ + +/* UPLO - CHARACTER*1. */ +/* On entry, UPLO specifies whether the upper or lower */ +/* triangular part of the array A is to be referenced as */ +/* follows: */ + +/* UPLO = 'U' or 'u' Only the upper triangular part of A */ +/* is to be referenced. */ + +/* UPLO = 'L' or 'l' Only the lower triangular part of A */ +/* is to be referenced. */ + +/* Unchanged on exit. */ + +/* N - INTEGER. */ +/* On entry, N specifies the order of the matrix A. */ +/* N must be at least zero. */ +/* Unchanged on exit. */ + +/* ALPHA - DOUBLE PRECISION. */ +/* On entry, ALPHA specifies the scalar alpha. */ +/* Unchanged on exit. */ + +/* X - DOUBLE PRECISION array of dimension at least */ +/* ( 1 + ( n - 1 )*abs( INCX ) ). */ +/* Before entry, the incremented array X must contain the n */ +/* element vector x. */ +/* Unchanged on exit. */ + +/* INCX - INTEGER. */ +/* On entry, INCX specifies the increment for the elements of */ +/* X. INCX must not be zero. */ +/* Unchanged on exit. */ + +/* Y - DOUBLE PRECISION array of dimension at least */ +/* ( 1 + ( n - 1 )*abs( INCY ) ). */ +/* Before entry, the incremented array Y must contain the n */ +/* element vector y. */ +/* Unchanged on exit. */ + +/* INCY - INTEGER. */ +/* On entry, INCY specifies the increment for the elements of */ +/* Y. INCY must not be zero. */ +/* Unchanged on exit. */ + +/* A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). */ +/* Before entry with UPLO = 'U' or 'u', the leading n by n */ +/* upper triangular part of the array A must contain the upper */ +/* triangular part of the symmetric matrix and the strictly */ +/* lower triangular part of A is not referenced. On exit, the */ +/* upper triangular part of the array A is overwritten by the */ +/* upper triangular part of the updated matrix. */ +/* Before entry with UPLO = 'L' or 'l', the leading n by n */ +/* lower triangular part of the array A must contain the lower */ +/* triangular part of the symmetric matrix and the strictly */ +/* upper triangular part of A is not referenced. On exit, the */ +/* lower triangular part of the array A is overwritten by the */ +/* lower triangular part of the updated matrix. */ + +/* LDA - INTEGER. */ +/* On entry, LDA specifies the first dimension of A as declared */ +/* in the calling (sub) program. LDA must be at least */ +/* max( 1, n ). */ +/* Unchanged on exit. */ + + +/* Level 2 Blas routine. */ + +/* -- Written on 22-October-1986. */ +/* Jack Dongarra, Argonne National Lab. */ +/* Jeremy Du Croz, Nag Central Office. */ +/* Sven Hammarling, Nag Central Office. */ +/* Richard Hanson, Sandia National Labs. */ + + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --x; + --y; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + info = 0; + if (! _starpu_lsame_(uplo, "U") && ! _starpu_lsame_(uplo, "L")) { + info = 1; + } else if (*n < 0) { + info = 2; + } else if (*incx == 0) { + info = 5; + } else if (*incy == 0) { + info = 7; + } else if (*lda < max(1,*n)) { + info = 9; + } + if (info != 0) { + _starpu_xerbla_("DSYR2 ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0 || *alpha == 0.) { + return 0; + } + +/* Set up the start points in X and Y if the increments are not both */ +/* unity. */ + + if (*incx != 1 || *incy != 1) { + if (*incx > 0) { + kx = 1; + } else { + kx = 1 - (*n - 1) * *incx; + } + if (*incy > 0) { + ky = 1; + } else { + ky = 1 - (*n - 1) * *incy; + } + jx = kx; + jy = ky; + } + +/* Start the operations. In this version the elements of A are */ +/* accessed sequentially with one pass through the triangular part */ +/* of A. */ + + if (_starpu_lsame_(uplo, "U")) { + +/* Form A when A is stored in the upper triangle. */ + + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[j] != 0. || y[j] != 0.) { + temp1 = *alpha * y[j]; + temp2 = *alpha * x[j]; + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = a[i__ + j * a_dim1] + x[i__] * + temp1 + y[i__] * temp2; +/* L10: */ + } + } +/* L20: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[jx] != 0. || y[jy] != 0.) { + temp1 = *alpha * y[jy]; + temp2 = *alpha * x[jx]; + ix = kx; + iy = ky; + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = a[i__ + j * a_dim1] + x[ix] * + temp1 + y[iy] * temp2; + ix += *incx; + iy += *incy; +/* L30: */ + } + } + jx += *incx; + jy += *incy; +/* L40: */ + } + } + } else { + +/* Form A when A is stored in the lower triangle. */ + + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[j] != 0. || y[j] != 0.) { + temp1 = *alpha * y[j]; + temp2 = *alpha * x[j]; + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = a[i__ + j * a_dim1] + x[i__] * + temp1 + y[i__] * temp2; +/* L50: */ + } + } +/* L60: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[jx] != 0. || y[jy] != 0.) { + temp1 = *alpha * y[jy]; + temp2 = *alpha * x[jx]; + ix = jx; + iy = jy; + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = a[i__ + j * a_dim1] + x[ix] * + temp1 + y[iy] * temp2; + ix += *incx; + iy += *incy; +/* L70: */ + } + } + jx += *incx; + jy += *incy; +/* L80: */ + } + } + } + + return 0; + +/* End of DSYR2 . */ + +} /* _starpu_dsyr2_ */ diff --git a/min-dgels/base/BLAS/SRC/dsyr2k.c b/min-dgels/base/BLAS/SRC/dsyr2k.c new file mode 100644 index 0000000..c440c5d --- /dev/null +++ b/min-dgels/base/BLAS/SRC/dsyr2k.c @@ -0,0 +1,407 @@ +/* dsyr2k.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dsyr2k_(char *uplo, char *trans, integer *n, integer *k, + doublereal *alpha, doublereal *a, integer *lda, doublereal *b, + integer *ldb, doublereal *beta, doublereal *c__, integer *ldc) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, + i__3; + + /* Local variables */ + integer i__, j, l, info; + doublereal temp1, temp2; + extern logical _starpu_lsame_(char *, char *); + integer nrowa; + logical upper; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DSYR2K performs one of the symmetric rank 2k operations */ + +/* C := alpha*A*B' + alpha*B*A' + beta*C, */ + +/* or */ + +/* C := alpha*A'*B + alpha*B'*A + beta*C, */ + +/* where alpha and beta are scalars, C is an n by n symmetric matrix */ +/* and A and B are n by k matrices in the first case and k by n */ +/* matrices in the second case. */ + +/* Arguments */ +/* ========== */ + +/* UPLO - CHARACTER*1. */ +/* On entry, UPLO specifies whether the upper or lower */ +/* triangular part of the array C is to be referenced as */ +/* follows: */ + +/* UPLO = 'U' or 'u' Only the upper triangular part of C */ +/* is to be referenced. */ + +/* UPLO = 'L' or 'l' Only the lower triangular part of C */ +/* is to be referenced. */ + +/* Unchanged on exit. */ + +/* TRANS - CHARACTER*1. */ +/* On entry, TRANS specifies the operation to be performed as */ +/* follows: */ + +/* TRANS = 'N' or 'n' C := alpha*A*B' + alpha*B*A' + */ +/* beta*C. */ + +/* TRANS = 'T' or 't' C := alpha*A'*B + alpha*B'*A + */ +/* beta*C. */ + +/* TRANS = 'C' or 'c' C := alpha*A'*B + alpha*B'*A + */ +/* beta*C. */ + +/* Unchanged on exit. */ + +/* N - INTEGER. */ +/* On entry, N specifies the order of the matrix C. N must be */ +/* at least zero. */ +/* Unchanged on exit. */ + +/* K - INTEGER. */ +/* On entry with TRANS = 'N' or 'n', K specifies the number */ +/* of columns of the matrices A and B, and on entry with */ +/* TRANS = 'T' or 't' or 'C' or 'c', K specifies the number */ +/* of rows of the matrices A and B. K must be at least zero. */ +/* Unchanged on exit. */ + +/* ALPHA - DOUBLE PRECISION. */ +/* On entry, ALPHA specifies the scalar alpha. */ +/* Unchanged on exit. */ + +/* A - DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is */ +/* k when TRANS = 'N' or 'n', and is n otherwise. */ +/* Before entry with TRANS = 'N' or 'n', the leading n by k */ +/* part of the array A must contain the matrix A, otherwise */ +/* the leading k by n part of the array A must contain the */ +/* matrix A. */ +/* Unchanged on exit. */ + +/* LDA - INTEGER. */ +/* On entry, LDA specifies the first dimension of A as declared */ +/* in the calling (sub) program. When TRANS = 'N' or 'n' */ +/* then LDA must be at least max( 1, n ), otherwise LDA must */ +/* be at least max( 1, k ). */ +/* Unchanged on exit. */ + +/* B - DOUBLE PRECISION array of DIMENSION ( LDB, kb ), where kb is */ +/* k when TRANS = 'N' or 'n', and is n otherwise. */ +/* Before entry with TRANS = 'N' or 'n', the leading n by k */ +/* part of the array B must contain the matrix B, otherwise */ +/* the leading k by n part of the array B must contain the */ +/* matrix B. */ +/* Unchanged on exit. */ + +/* LDB - INTEGER. */ +/* On entry, LDB specifies the first dimension of B as declared */ +/* in the calling (sub) program. When TRANS = 'N' or 'n' */ +/* then LDB must be at least max( 1, n ), otherwise LDB must */ +/* be at least max( 1, k ). */ +/* Unchanged on exit. */ + +/* BETA - DOUBLE PRECISION. */ +/* On entry, BETA specifies the scalar beta. */ +/* Unchanged on exit. */ + +/* C - DOUBLE PRECISION array of DIMENSION ( LDC, n ). */ +/* Before entry with UPLO = 'U' or 'u', the leading n by n */ +/* upper triangular part of the array C must contain the upper */ +/* triangular part of the symmetric matrix and the strictly */ +/* lower triangular part of C is not referenced. On exit, the */ +/* upper triangular part of the array C is overwritten by the */ +/* upper triangular part of the updated matrix. */ +/* Before entry with UPLO = 'L' or 'l', the leading n by n */ +/* lower triangular part of the array C must contain the lower */ +/* triangular part of the symmetric matrix and the strictly */ +/* upper triangular part of C is not referenced. On exit, the */ +/* lower triangular part of the array C is overwritten by the */ +/* lower triangular part of the updated matrix. */ + +/* LDC - INTEGER. */ +/* On entry, LDC specifies the first dimension of C as declared */ +/* in the calling (sub) program. LDC must be at least */ +/* max( 1, n ). */ +/* Unchanged on exit. */ + + +/* Level 3 Blas routine. */ + + +/* -- Written on 8-February-1989. */ +/* Jack Dongarra, Argonne National Laboratory. */ +/* Iain Duff, AERE Harwell. */ +/* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ +/* Sven Hammarling, Numerical Algorithms Group Ltd. */ + + +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Parameters .. */ +/* .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + + /* Function Body */ + if (_starpu_lsame_(trans, "N")) { + nrowa = *n; + } else { + nrowa = *k; + } + upper = _starpu_lsame_(uplo, "U"); + + info = 0; + if (! upper && ! _starpu_lsame_(uplo, "L")) { + info = 1; + } else if (! _starpu_lsame_(trans, "N") && ! _starpu_lsame_(trans, + "T") && ! _starpu_lsame_(trans, "C")) { + info = 2; + } else if (*n < 0) { + info = 3; + } else if (*k < 0) { + info = 4; + } else if (*lda < max(1,nrowa)) { + info = 7; + } else if (*ldb < max(1,nrowa)) { + info = 9; + } else if (*ldc < max(1,*n)) { + info = 12; + } + if (info != 0) { + _starpu_xerbla_("DSYR2K", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0 || (*alpha == 0. || *k == 0) && *beta == 1.) { + return 0; + } + +/* And when alpha.eq.zero. */ + + if (*alpha == 0.) { + if (upper) { + if (*beta == 0.) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = 0.; +/* L10: */ + } +/* L20: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; +/* L30: */ + } +/* L40: */ + } + } + } else { + if (*beta == 0.) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = 0.; +/* L50: */ + } +/* L60: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; +/* L70: */ + } +/* L80: */ + } + } + } + return 0; + } + +/* Start the operations. */ + + if (_starpu_lsame_(trans, "N")) { + +/* Form C := alpha*A*B' + alpha*B*A' + C. */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*beta == 0.) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = 0.; +/* L90: */ + } + } else if (*beta != 1.) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; +/* L100: */ + } + } + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + if (a[j + l * a_dim1] != 0. || b[j + l * b_dim1] != 0.) { + temp1 = *alpha * b[j + l * b_dim1]; + temp2 = *alpha * a[j + l * a_dim1]; + i__3 = j; + for (i__ = 1; i__ <= i__3; ++i__) { + c__[i__ + j * c_dim1] = c__[i__ + j * c_dim1] + a[ + i__ + l * a_dim1] * temp1 + b[i__ + l * + b_dim1] * temp2; +/* L110: */ + } + } +/* L120: */ + } +/* L130: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*beta == 0.) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = 0.; +/* L140: */ + } + } else if (*beta != 1.) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; +/* L150: */ + } + } + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + if (a[j + l * a_dim1] != 0. || b[j + l * b_dim1] != 0.) { + temp1 = *alpha * b[j + l * b_dim1]; + temp2 = *alpha * a[j + l * a_dim1]; + i__3 = *n; + for (i__ = j; i__ <= i__3; ++i__) { + c__[i__ + j * c_dim1] = c__[i__ + j * c_dim1] + a[ + i__ + l * a_dim1] * temp1 + b[i__ + l * + b_dim1] * temp2; +/* L160: */ + } + } +/* L170: */ + } +/* L180: */ + } + } + } else { + +/* Form C := alpha*A'*B + alpha*B'*A + C. */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + temp1 = 0.; + temp2 = 0.; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + temp1 += a[l + i__ * a_dim1] * b[l + j * b_dim1]; + temp2 += b[l + i__ * b_dim1] * a[l + j * a_dim1]; +/* L190: */ + } + if (*beta == 0.) { + c__[i__ + j * c_dim1] = *alpha * temp1 + *alpha * + temp2; + } else { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1] + + *alpha * temp1 + *alpha * temp2; + } +/* L200: */ + } +/* L210: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + temp1 = 0.; + temp2 = 0.; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + temp1 += a[l + i__ * a_dim1] * b[l + j * b_dim1]; + temp2 += b[l + i__ * b_dim1] * a[l + j * a_dim1]; +/* L220: */ + } + if (*beta == 0.) { + c__[i__ + j * c_dim1] = *alpha * temp1 + *alpha * + temp2; + } else { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1] + + *alpha * temp1 + *alpha * temp2; + } +/* L230: */ + } +/* L240: */ + } + } + } + + return 0; + +/* End of DSYR2K. */ + +} /* _starpu_dsyr2k_ */ diff --git a/min-dgels/base/BLAS/SRC/dsyrk.c b/min-dgels/base/BLAS/SRC/dsyrk.c new file mode 100644 index 0000000..9a3af72 --- /dev/null +++ b/min-dgels/base/BLAS/SRC/dsyrk.c @@ -0,0 +1,372 @@ +/* dsyrk.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dsyrk_(char *uplo, char *trans, integer *n, integer *k, + doublereal *alpha, doublereal *a, integer *lda, doublereal *beta, + doublereal *c__, integer *ldc) +{ + /* System generated locals */ + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3; + + /* Local variables */ + integer i__, j, l, info; + doublereal temp; + extern logical _starpu_lsame_(char *, char *); + integer nrowa; + logical upper; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DSYRK performs one of the symmetric rank k operations */ + +/* C := alpha*A*A' + beta*C, */ + +/* or */ + +/* C := alpha*A'*A + beta*C, */ + +/* where alpha and beta are scalars, C is an n by n symmetric matrix */ +/* and A is an n by k matrix in the first case and a k by n matrix */ +/* in the second case. */ + +/* Arguments */ +/* ========== */ + +/* UPLO - CHARACTER*1. */ +/* On entry, UPLO specifies whether the upper or lower */ +/* triangular part of the array C is to be referenced as */ +/* follows: */ + +/* UPLO = 'U' or 'u' Only the upper triangular part of C */ +/* is to be referenced. */ + +/* UPLO = 'L' or 'l' Only the lower triangular part of C */ +/* is to be referenced. */ + +/* Unchanged on exit. */ + +/* TRANS - CHARACTER*1. */ +/* On entry, TRANS specifies the operation to be performed as */ +/* follows: */ + +/* TRANS = 'N' or 'n' C := alpha*A*A' + beta*C. */ + +/* TRANS = 'T' or 't' C := alpha*A'*A + beta*C. */ + +/* TRANS = 'C' or 'c' C := alpha*A'*A + beta*C. */ + +/* Unchanged on exit. */ + +/* N - INTEGER. */ +/* On entry, N specifies the order of the matrix C. N must be */ +/* at least zero. */ +/* Unchanged on exit. */ + +/* K - INTEGER. */ +/* On entry with TRANS = 'N' or 'n', K specifies the number */ +/* of columns of the matrix A, and on entry with */ +/* TRANS = 'T' or 't' or 'C' or 'c', K specifies the number */ +/* of rows of the matrix A. K must be at least zero. */ +/* Unchanged on exit. */ + +/* ALPHA - DOUBLE PRECISION. */ +/* On entry, ALPHA specifies the scalar alpha. */ +/* Unchanged on exit. */ + +/* A - DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is */ +/* k when TRANS = 'N' or 'n', and is n otherwise. */ +/* Before entry with TRANS = 'N' or 'n', the leading n by k */ +/* part of the array A must contain the matrix A, otherwise */ +/* the leading k by n part of the array A must contain the */ +/* matrix A. */ +/* Unchanged on exit. */ + +/* LDA - INTEGER. */ +/* On entry, LDA specifies the first dimension of A as declared */ +/* in the calling (sub) program. When TRANS = 'N' or 'n' */ +/* then LDA must be at least max( 1, n ), otherwise LDA must */ +/* be at least max( 1, k ). */ +/* Unchanged on exit. */ + +/* BETA - DOUBLE PRECISION. */ +/* On entry, BETA specifies the scalar beta. */ +/* Unchanged on exit. */ + +/* C - DOUBLE PRECISION array of DIMENSION ( LDC, n ). */ +/* Before entry with UPLO = 'U' or 'u', the leading n by n */ +/* upper triangular part of the array C must contain the upper */ +/* triangular part of the symmetric matrix and the strictly */ +/* lower triangular part of C is not referenced. On exit, the */ +/* upper triangular part of the array C is overwritten by the */ +/* upper triangular part of the updated matrix. */ +/* Before entry with UPLO = 'L' or 'l', the leading n by n */ +/* lower triangular part of the array C must contain the lower */ +/* triangular part of the symmetric matrix and the strictly */ +/* upper triangular part of C is not referenced. On exit, the */ +/* lower triangular part of the array C is overwritten by the */ +/* lower triangular part of the updated matrix. */ + +/* LDC - INTEGER. */ +/* On entry, LDC specifies the first dimension of C as declared */ +/* in the calling (sub) program. LDC must be at least */ +/* max( 1, n ). */ +/* Unchanged on exit. */ + + +/* Level 3 Blas routine. */ + +/* -- Written on 8-February-1989. */ +/* Jack Dongarra, Argonne National Laboratory. */ +/* Iain Duff, AERE Harwell. */ +/* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ +/* Sven Hammarling, Numerical Algorithms Group Ltd. */ + + +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Parameters .. */ +/* .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + + /* Function Body */ + if (_starpu_lsame_(trans, "N")) { + nrowa = *n; + } else { + nrowa = *k; + } + upper = _starpu_lsame_(uplo, "U"); + + info = 0; + if (! upper && ! _starpu_lsame_(uplo, "L")) { + info = 1; + } else if (! _starpu_lsame_(trans, "N") && ! _starpu_lsame_(trans, + "T") && ! _starpu_lsame_(trans, "C")) { + info = 2; + } else if (*n < 0) { + info = 3; + } else if (*k < 0) { + info = 4; + } else if (*lda < max(1,nrowa)) { + info = 7; + } else if (*ldc < max(1,*n)) { + info = 10; + } + if (info != 0) { + _starpu_xerbla_("DSYRK ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0 || (*alpha == 0. || *k == 0) && *beta == 1.) { + return 0; + } + +/* And when alpha.eq.zero. */ + + if (*alpha == 0.) { + if (upper) { + if (*beta == 0.) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = 0.; +/* L10: */ + } +/* L20: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; +/* L30: */ + } +/* L40: */ + } + } + } else { + if (*beta == 0.) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = 0.; +/* L50: */ + } +/* L60: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; +/* L70: */ + } +/* L80: */ + } + } + } + return 0; + } + +/* Start the operations. */ + + if (_starpu_lsame_(trans, "N")) { + +/* Form C := alpha*A*A' + beta*C. */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*beta == 0.) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = 0.; +/* L90: */ + } + } else if (*beta != 1.) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; +/* L100: */ + } + } + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + if (a[j + l * a_dim1] != 0.) { + temp = *alpha * a[j + l * a_dim1]; + i__3 = j; + for (i__ = 1; i__ <= i__3; ++i__) { + c__[i__ + j * c_dim1] += temp * a[i__ + l * + a_dim1]; +/* L110: */ + } + } +/* L120: */ + } +/* L130: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*beta == 0.) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = 0.; +/* L140: */ + } + } else if (*beta != 1.) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; +/* L150: */ + } + } + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + if (a[j + l * a_dim1] != 0.) { + temp = *alpha * a[j + l * a_dim1]; + i__3 = *n; + for (i__ = j; i__ <= i__3; ++i__) { + c__[i__ + j * c_dim1] += temp * a[i__ + l * + a_dim1]; +/* L160: */ + } + } +/* L170: */ + } +/* L180: */ + } + } + } else { + +/* Form C := alpha*A'*A + beta*C. */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + temp = 0.; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + temp += a[l + i__ * a_dim1] * a[l + j * a_dim1]; +/* L190: */ + } + if (*beta == 0.) { + c__[i__ + j * c_dim1] = *alpha * temp; + } else { + c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[ + i__ + j * c_dim1]; + } +/* L200: */ + } +/* L210: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + temp = 0.; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + temp += a[l + i__ * a_dim1] * a[l + j * a_dim1]; +/* L220: */ + } + if (*beta == 0.) { + c__[i__ + j * c_dim1] = *alpha * temp; + } else { + c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[ + i__ + j * c_dim1]; + } +/* L230: */ + } +/* L240: */ + } + } + } + + return 0; + +/* End of DSYRK . */ + +} /* _starpu_dsyrk_ */ diff --git a/min-dgels/base/BLAS/SRC/dtbmv.c b/min-dgels/base/BLAS/SRC/dtbmv.c new file mode 100644 index 0000000..3fa14e6 --- /dev/null +++ b/min-dgels/base/BLAS/SRC/dtbmv.c @@ -0,0 +1,422 @@ +/* dtbmv.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dtbmv_(char *uplo, char *trans, char *diag, integer *n, + integer *k, doublereal *a, integer *lda, doublereal *x, integer *incx) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + integer i__, j, l, ix, jx, kx, info; + doublereal temp; + extern logical _starpu_lsame_(char *, char *); + integer kplus1; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + logical nounit; + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DTBMV performs one of the matrix-vector operations */ + +/* x := A*x, or x := A'*x, */ + +/* where x is an n element vector and A is an n by n unit, or non-unit, */ +/* upper or lower triangular band matrix, with ( k + 1 ) diagonals. */ + +/* Arguments */ +/* ========== */ + +/* UPLO - CHARACTER*1. */ +/* On entry, UPLO specifies whether the matrix is an upper or */ +/* lower triangular matrix as follows: */ + +/* UPLO = 'U' or 'u' A is an upper triangular matrix. */ + +/* UPLO = 'L' or 'l' A is a lower triangular matrix. */ + +/* Unchanged on exit. */ + +/* TRANS - CHARACTER*1. */ +/* On entry, TRANS specifies the operation to be performed as */ +/* follows: */ + +/* TRANS = 'N' or 'n' x := A*x. */ + +/* TRANS = 'T' or 't' x := A'*x. */ + +/* TRANS = 'C' or 'c' x := A'*x. */ + +/* Unchanged on exit. */ + +/* DIAG - CHARACTER*1. */ +/* On entry, DIAG specifies whether or not A is unit */ +/* triangular as follows: */ + +/* DIAG = 'U' or 'u' A is assumed to be unit triangular. */ + +/* DIAG = 'N' or 'n' A is not assumed to be unit */ +/* triangular. */ + +/* Unchanged on exit. */ + +/* N - INTEGER. */ +/* On entry, N specifies the order of the matrix A. */ +/* N must be at least zero. */ +/* Unchanged on exit. */ + +/* K - INTEGER. */ +/* On entry with UPLO = 'U' or 'u', K specifies the number of */ +/* super-diagonals of the matrix A. */ +/* On entry with UPLO = 'L' or 'l', K specifies the number of */ +/* sub-diagonals of the matrix A. */ +/* K must satisfy 0 .le. K. */ +/* Unchanged on exit. */ + +/* A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). */ +/* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */ +/* by n part of the array A must contain the upper triangular */ +/* band part of the matrix of coefficients, supplied column by */ +/* column, with the leading diagonal of the matrix in row */ +/* ( k + 1 ) of the array, the first super-diagonal starting at */ +/* position 2 in row k, and so on. The top left k by k triangle */ +/* of the array A is not referenced. */ +/* The following program segment will transfer an upper */ +/* triangular band matrix from conventional full matrix storage */ +/* to band storage: */ + +/* DO 20, J = 1, N */ +/* M = K + 1 - J */ +/* DO 10, I = MAX( 1, J - K ), J */ +/* A( M + I, J ) = matrix( I, J ) */ +/* 10 CONTINUE */ +/* 20 CONTINUE */ + +/* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */ +/* by n part of the array A must contain the lower triangular */ +/* band part of the matrix of coefficients, supplied column by */ +/* column, with the leading diagonal of the matrix in row 1 of */ +/* the array, the first sub-diagonal starting at position 1 in */ +/* row 2, and so on. The bottom right k by k triangle of the */ +/* array A is not referenced. */ +/* The following program segment will transfer a lower */ +/* triangular band matrix from conventional full matrix storage */ +/* to band storage: */ + +/* DO 20, J = 1, N */ +/* M = 1 - J */ +/* DO 10, I = J, MIN( N, J + K ) */ +/* A( M + I, J ) = matrix( I, J ) */ +/* 10 CONTINUE */ +/* 20 CONTINUE */ + +/* Note that when DIAG = 'U' or 'u' the elements of the array A */ +/* corresponding to the diagonal elements of the matrix are not */ +/* referenced, but are assumed to be unity. */ +/* Unchanged on exit. */ + +/* LDA - INTEGER. */ +/* On entry, LDA specifies the first dimension of A as declared */ +/* in the calling (sub) program. LDA must be at least */ +/* ( k + 1 ). */ +/* Unchanged on exit. */ + +/* X - DOUBLE PRECISION array of dimension at least */ +/* ( 1 + ( n - 1 )*abs( INCX ) ). */ +/* Before entry, the incremented array X must contain the n */ +/* element vector x. On exit, X is overwritten with the */ +/* tranformed vector x. */ + +/* INCX - INTEGER. */ +/* On entry, INCX specifies the increment for the elements of */ +/* X. INCX must not be zero. */ +/* Unchanged on exit. */ + + +/* Level 2 Blas routine. */ + +/* -- Written on 22-October-1986. */ +/* Jack Dongarra, Argonne National Lab. */ +/* Jeremy Du Croz, Nag Central Office. */ +/* Sven Hammarling, Nag Central Office. */ +/* Richard Hanson, Sandia National Labs. */ + + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --x; + + /* Function Body */ + info = 0; + if (! _starpu_lsame_(uplo, "U") && ! _starpu_lsame_(uplo, "L")) { + info = 1; + } else if (! _starpu_lsame_(trans, "N") && ! _starpu_lsame_(trans, + "T") && ! _starpu_lsame_(trans, "C")) { + info = 2; + } else if (! _starpu_lsame_(diag, "U") && ! _starpu_lsame_(diag, + "N")) { + info = 3; + } else if (*n < 0) { + info = 4; + } else if (*k < 0) { + info = 5; + } else if (*lda < *k + 1) { + info = 7; + } else if (*incx == 0) { + info = 9; + } + if (info != 0) { + _starpu_xerbla_("DTBMV ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0) { + return 0; + } + + nounit = _starpu_lsame_(diag, "N"); + +/* Set up the start point in X if the increment is not unity. This */ +/* will be ( N - 1 )*INCX too small for descending loops. */ + + if (*incx <= 0) { + kx = 1 - (*n - 1) * *incx; + } else if (*incx != 1) { + kx = 1; + } + +/* Start the operations. In this version the elements of A are */ +/* accessed sequentially with one pass through A. */ + + if (_starpu_lsame_(trans, "N")) { + +/* Form x := A*x. */ + + if (_starpu_lsame_(uplo, "U")) { + kplus1 = *k + 1; + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[j] != 0.) { + temp = x[j]; + l = kplus1 - j; +/* Computing MAX */ + i__2 = 1, i__3 = j - *k; + i__4 = j - 1; + for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) { + x[i__] += temp * a[l + i__ + j * a_dim1]; +/* L10: */ + } + if (nounit) { + x[j] *= a[kplus1 + j * a_dim1]; + } + } +/* L20: */ + } + } else { + jx = kx; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[jx] != 0.) { + temp = x[jx]; + ix = kx; + l = kplus1 - j; +/* Computing MAX */ + i__4 = 1, i__2 = j - *k; + i__3 = j - 1; + for (i__ = max(i__4,i__2); i__ <= i__3; ++i__) { + x[ix] += temp * a[l + i__ + j * a_dim1]; + ix += *incx; +/* L30: */ + } + if (nounit) { + x[jx] *= a[kplus1 + j * a_dim1]; + } + } + jx += *incx; + if (j > *k) { + kx += *incx; + } +/* L40: */ + } + } + } else { + if (*incx == 1) { + for (j = *n; j >= 1; --j) { + if (x[j] != 0.) { + temp = x[j]; + l = 1 - j; +/* Computing MIN */ + i__1 = *n, i__3 = j + *k; + i__4 = j + 1; + for (i__ = min(i__1,i__3); i__ >= i__4; --i__) { + x[i__] += temp * a[l + i__ + j * a_dim1]; +/* L50: */ + } + if (nounit) { + x[j] *= a[j * a_dim1 + 1]; + } + } +/* L60: */ + } + } else { + kx += (*n - 1) * *incx; + jx = kx; + for (j = *n; j >= 1; --j) { + if (x[jx] != 0.) { + temp = x[jx]; + ix = kx; + l = 1 - j; +/* Computing MIN */ + i__4 = *n, i__1 = j + *k; + i__3 = j + 1; + for (i__ = min(i__4,i__1); i__ >= i__3; --i__) { + x[ix] += temp * a[l + i__ + j * a_dim1]; + ix -= *incx; +/* L70: */ + } + if (nounit) { + x[jx] *= a[j * a_dim1 + 1]; + } + } + jx -= *incx; + if (*n - j >= *k) { + kx -= *incx; + } +/* L80: */ + } + } + } + } else { + +/* Form x := A'*x. */ + + if (_starpu_lsame_(uplo, "U")) { + kplus1 = *k + 1; + if (*incx == 1) { + for (j = *n; j >= 1; --j) { + temp = x[j]; + l = kplus1 - j; + if (nounit) { + temp *= a[kplus1 + j * a_dim1]; + } +/* Computing MAX */ + i__4 = 1, i__1 = j - *k; + i__3 = max(i__4,i__1); + for (i__ = j - 1; i__ >= i__3; --i__) { + temp += a[l + i__ + j * a_dim1] * x[i__]; +/* L90: */ + } + x[j] = temp; +/* L100: */ + } + } else { + kx += (*n - 1) * *incx; + jx = kx; + for (j = *n; j >= 1; --j) { + temp = x[jx]; + kx -= *incx; + ix = kx; + l = kplus1 - j; + if (nounit) { + temp *= a[kplus1 + j * a_dim1]; + } +/* Computing MAX */ + i__4 = 1, i__1 = j - *k; + i__3 = max(i__4,i__1); + for (i__ = j - 1; i__ >= i__3; --i__) { + temp += a[l + i__ + j * a_dim1] * x[ix]; + ix -= *incx; +/* L110: */ + } + x[jx] = temp; + jx -= *incx; +/* L120: */ + } + } + } else { + if (*incx == 1) { + i__3 = *n; + for (j = 1; j <= i__3; ++j) { + temp = x[j]; + l = 1 - j; + if (nounit) { + temp *= a[j * a_dim1 + 1]; + } +/* Computing MIN */ + i__1 = *n, i__2 = j + *k; + i__4 = min(i__1,i__2); + for (i__ = j + 1; i__ <= i__4; ++i__) { + temp += a[l + i__ + j * a_dim1] * x[i__]; +/* L130: */ + } + x[j] = temp; +/* L140: */ + } + } else { + jx = kx; + i__3 = *n; + for (j = 1; j <= i__3; ++j) { + temp = x[jx]; + kx += *incx; + ix = kx; + l = 1 - j; + if (nounit) { + temp *= a[j * a_dim1 + 1]; + } +/* Computing MIN */ + i__1 = *n, i__2 = j + *k; + i__4 = min(i__1,i__2); + for (i__ = j + 1; i__ <= i__4; ++i__) { + temp += a[l + i__ + j * a_dim1] * x[ix]; + ix += *incx; +/* L150: */ + } + x[jx] = temp; + jx += *incx; +/* L160: */ + } + } + } + } + + return 0; + +/* End of DTBMV . */ + +} /* _starpu_dtbmv_ */ diff --git a/min-dgels/base/BLAS/SRC/dtbsv.c b/min-dgels/base/BLAS/SRC/dtbsv.c new file mode 100644 index 0000000..f34c3f5 --- /dev/null +++ b/min-dgels/base/BLAS/SRC/dtbsv.c @@ -0,0 +1,426 @@ +/* dtbsv.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dtbsv_(char *uplo, char *trans, char *diag, integer *n, + integer *k, doublereal *a, integer *lda, doublereal *x, integer *incx) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + integer i__, j, l, ix, jx, kx, info; + doublereal temp; + extern logical _starpu_lsame_(char *, char *); + integer kplus1; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + logical nounit; + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DTBSV solves one of the systems of equations */ + +/* A*x = b, or A'*x = b, */ + +/* where b and x are n element vectors and A is an n by n unit, or */ +/* non-unit, upper or lower triangular band matrix, with ( k + 1 ) */ +/* diagonals. */ + +/* No test for singularity or near-singularity is included in this */ +/* routine. Such tests must be performed before calling this routine. */ + +/* Arguments */ +/* ========== */ + +/* UPLO - CHARACTER*1. */ +/* On entry, UPLO specifies whether the matrix is an upper or */ +/* lower triangular matrix as follows: */ + +/* UPLO = 'U' or 'u' A is an upper triangular matrix. */ + +/* UPLO = 'L' or 'l' A is a lower triangular matrix. */ + +/* Unchanged on exit. */ + +/* TRANS - CHARACTER*1. */ +/* On entry, TRANS specifies the equations to be solved as */ +/* follows: */ + +/* TRANS = 'N' or 'n' A*x = b. */ + +/* TRANS = 'T' or 't' A'*x = b. */ + +/* TRANS = 'C' or 'c' A'*x = b. */ + +/* Unchanged on exit. */ + +/* DIAG - CHARACTER*1. */ +/* On entry, DIAG specifies whether or not A is unit */ +/* triangular as follows: */ + +/* DIAG = 'U' or 'u' A is assumed to be unit triangular. */ + +/* DIAG = 'N' or 'n' A is not assumed to be unit */ +/* triangular. */ + +/* Unchanged on exit. */ + +/* N - INTEGER. */ +/* On entry, N specifies the order of the matrix A. */ +/* N must be at least zero. */ +/* Unchanged on exit. */ + +/* K - INTEGER. */ +/* On entry with UPLO = 'U' or 'u', K specifies the number of */ +/* super-diagonals of the matrix A. */ +/* On entry with UPLO = 'L' or 'l', K specifies the number of */ +/* sub-diagonals of the matrix A. */ +/* K must satisfy 0 .le. K. */ +/* Unchanged on exit. */ + +/* A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). */ +/* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */ +/* by n part of the array A must contain the upper triangular */ +/* band part of the matrix of coefficients, supplied column by */ +/* column, with the leading diagonal of the matrix in row */ +/* ( k + 1 ) of the array, the first super-diagonal starting at */ +/* position 2 in row k, and so on. The top left k by k triangle */ +/* of the array A is not referenced. */ +/* The following program segment will transfer an upper */ +/* triangular band matrix from conventional full matrix storage */ +/* to band storage: */ + +/* DO 20, J = 1, N */ +/* M = K + 1 - J */ +/* DO 10, I = MAX( 1, J - K ), J */ +/* A( M + I, J ) = matrix( I, J ) */ +/* 10 CONTINUE */ +/* 20 CONTINUE */ + +/* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */ +/* by n part of the array A must contain the lower triangular */ +/* band part of the matrix of coefficients, supplied column by */ +/* column, with the leading diagonal of the matrix in row 1 of */ +/* the array, the first sub-diagonal starting at position 1 in */ +/* row 2, and so on. The bottom right k by k triangle of the */ +/* array A is not referenced. */ +/* The following program segment will transfer a lower */ +/* triangular band matrix from conventional full matrix storage */ +/* to band storage: */ + +/* DO 20, J = 1, N */ +/* M = 1 - J */ +/* DO 10, I = J, MIN( N, J + K ) */ +/* A( M + I, J ) = matrix( I, J ) */ +/* 10 CONTINUE */ +/* 20 CONTINUE */ + +/* Note that when DIAG = 'U' or 'u' the elements of the array A */ +/* corresponding to the diagonal elements of the matrix are not */ +/* referenced, but are assumed to be unity. */ +/* Unchanged on exit. */ + +/* LDA - INTEGER. */ +/* On entry, LDA specifies the first dimension of A as declared */ +/* in the calling (sub) program. LDA must be at least */ +/* ( k + 1 ). */ +/* Unchanged on exit. */ + +/* X - DOUBLE PRECISION array of dimension at least */ +/* ( 1 + ( n - 1 )*abs( INCX ) ). */ +/* Before entry, the incremented array X must contain the n */ +/* element right-hand side vector b. On exit, X is overwritten */ +/* with the solution vector x. */ + +/* INCX - INTEGER. */ +/* On entry, INCX specifies the increment for the elements of */ +/* X. INCX must not be zero. */ +/* Unchanged on exit. */ + + +/* Level 2 Blas routine. */ + +/* -- Written on 22-October-1986. */ +/* Jack Dongarra, Argonne National Lab. */ +/* Jeremy Du Croz, Nag Central Office. */ +/* Sven Hammarling, Nag Central Office. */ +/* Richard Hanson, Sandia National Labs. */ + + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --x; + + /* Function Body */ + info = 0; + if (! _starpu_lsame_(uplo, "U") && ! _starpu_lsame_(uplo, "L")) { + info = 1; + } else if (! _starpu_lsame_(trans, "N") && ! _starpu_lsame_(trans, + "T") && ! _starpu_lsame_(trans, "C")) { + info = 2; + } else if (! _starpu_lsame_(diag, "U") && ! _starpu_lsame_(diag, + "N")) { + info = 3; + } else if (*n < 0) { + info = 4; + } else if (*k < 0) { + info = 5; + } else if (*lda < *k + 1) { + info = 7; + } else if (*incx == 0) { + info = 9; + } + if (info != 0) { + _starpu_xerbla_("DTBSV ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0) { + return 0; + } + + nounit = _starpu_lsame_(diag, "N"); + +/* Set up the start point in X if the increment is not unity. This */ +/* will be ( N - 1 )*INCX too small for descending loops. */ + + if (*incx <= 0) { + kx = 1 - (*n - 1) * *incx; + } else if (*incx != 1) { + kx = 1; + } + +/* Start the operations. In this version the elements of A are */ +/* accessed by sequentially with one pass through A. */ + + if (_starpu_lsame_(trans, "N")) { + +/* Form x := inv( A )*x. */ + + if (_starpu_lsame_(uplo, "U")) { + kplus1 = *k + 1; + if (*incx == 1) { + for (j = *n; j >= 1; --j) { + if (x[j] != 0.) { + l = kplus1 - j; + if (nounit) { + x[j] /= a[kplus1 + j * a_dim1]; + } + temp = x[j]; +/* Computing MAX */ + i__2 = 1, i__3 = j - *k; + i__1 = max(i__2,i__3); + for (i__ = j - 1; i__ >= i__1; --i__) { + x[i__] -= temp * a[l + i__ + j * a_dim1]; +/* L10: */ + } + } +/* L20: */ + } + } else { + kx += (*n - 1) * *incx; + jx = kx; + for (j = *n; j >= 1; --j) { + kx -= *incx; + if (x[jx] != 0.) { + ix = kx; + l = kplus1 - j; + if (nounit) { + x[jx] /= a[kplus1 + j * a_dim1]; + } + temp = x[jx]; +/* Computing MAX */ + i__2 = 1, i__3 = j - *k; + i__1 = max(i__2,i__3); + for (i__ = j - 1; i__ >= i__1; --i__) { + x[ix] -= temp * a[l + i__ + j * a_dim1]; + ix -= *incx; +/* L30: */ + } + } + jx -= *incx; +/* L40: */ + } + } + } else { + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[j] != 0.) { + l = 1 - j; + if (nounit) { + x[j] /= a[j * a_dim1 + 1]; + } + temp = x[j]; +/* Computing MIN */ + i__3 = *n, i__4 = j + *k; + i__2 = min(i__3,i__4); + for (i__ = j + 1; i__ <= i__2; ++i__) { + x[i__] -= temp * a[l + i__ + j * a_dim1]; +/* L50: */ + } + } +/* L60: */ + } + } else { + jx = kx; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + kx += *incx; + if (x[jx] != 0.) { + ix = kx; + l = 1 - j; + if (nounit) { + x[jx] /= a[j * a_dim1 + 1]; + } + temp = x[jx]; +/* Computing MIN */ + i__3 = *n, i__4 = j + *k; + i__2 = min(i__3,i__4); + for (i__ = j + 1; i__ <= i__2; ++i__) { + x[ix] -= temp * a[l + i__ + j * a_dim1]; + ix += *incx; +/* L70: */ + } + } + jx += *incx; +/* L80: */ + } + } + } + } else { + +/* Form x := inv( A')*x. */ + + if (_starpu_lsame_(uplo, "U")) { + kplus1 = *k + 1; + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp = x[j]; + l = kplus1 - j; +/* Computing MAX */ + i__2 = 1, i__3 = j - *k; + i__4 = j - 1; + for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) { + temp -= a[l + i__ + j * a_dim1] * x[i__]; +/* L90: */ + } + if (nounit) { + temp /= a[kplus1 + j * a_dim1]; + } + x[j] = temp; +/* L100: */ + } + } else { + jx = kx; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp = x[jx]; + ix = kx; + l = kplus1 - j; +/* Computing MAX */ + i__4 = 1, i__2 = j - *k; + i__3 = j - 1; + for (i__ = max(i__4,i__2); i__ <= i__3; ++i__) { + temp -= a[l + i__ + j * a_dim1] * x[ix]; + ix += *incx; +/* L110: */ + } + if (nounit) { + temp /= a[kplus1 + j * a_dim1]; + } + x[jx] = temp; + jx += *incx; + if (j > *k) { + kx += *incx; + } +/* L120: */ + } + } + } else { + if (*incx == 1) { + for (j = *n; j >= 1; --j) { + temp = x[j]; + l = 1 - j; +/* Computing MIN */ + i__1 = *n, i__3 = j + *k; + i__4 = j + 1; + for (i__ = min(i__1,i__3); i__ >= i__4; --i__) { + temp -= a[l + i__ + j * a_dim1] * x[i__]; +/* L130: */ + } + if (nounit) { + temp /= a[j * a_dim1 + 1]; + } + x[j] = temp; +/* L140: */ + } + } else { + kx += (*n - 1) * *incx; + jx = kx; + for (j = *n; j >= 1; --j) { + temp = x[jx]; + ix = kx; + l = 1 - j; +/* Computing MIN */ + i__4 = *n, i__1 = j + *k; + i__3 = j + 1; + for (i__ = min(i__4,i__1); i__ >= i__3; --i__) { + temp -= a[l + i__ + j * a_dim1] * x[ix]; + ix -= *incx; +/* L150: */ + } + if (nounit) { + temp /= a[j * a_dim1 + 1]; + } + x[jx] = temp; + jx -= *incx; + if (*n - j >= *k) { + kx -= *incx; + } +/* L160: */ + } + } + } + } + + return 0; + +/* End of DTBSV . */ + +} /* _starpu_dtbsv_ */ diff --git a/min-dgels/base/BLAS/SRC/dtpmv.c b/min-dgels/base/BLAS/SRC/dtpmv.c new file mode 100644 index 0000000..632ef11 --- /dev/null +++ b/min-dgels/base/BLAS/SRC/dtpmv.c @@ -0,0 +1,357 @@ +/* dtpmv.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dtpmv_(char *uplo, char *trans, char *diag, integer *n, + doublereal *ap, doublereal *x, integer *incx) +{ + /* System generated locals */ + integer i__1, i__2; + + /* Local variables */ + integer i__, j, k, kk, ix, jx, kx, info; + doublereal temp; + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + logical nounit; + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DTPMV performs one of the matrix-vector operations */ + +/* x := A*x, or x := A'*x, */ + +/* where x is an n element vector and A is an n by n unit, or non-unit, */ +/* upper or lower triangular matrix, supplied in packed form. */ + +/* Arguments */ +/* ========== */ + +/* UPLO - CHARACTER*1. */ +/* On entry, UPLO specifies whether the matrix is an upper or */ +/* lower triangular matrix as follows: */ + +/* UPLO = 'U' or 'u' A is an upper triangular matrix. */ + +/* UPLO = 'L' or 'l' A is a lower triangular matrix. */ + +/* Unchanged on exit. */ + +/* TRANS - CHARACTER*1. */ +/* On entry, TRANS specifies the operation to be performed as */ +/* follows: */ + +/* TRANS = 'N' or 'n' x := A*x. */ + +/* TRANS = 'T' or 't' x := A'*x. */ + +/* TRANS = 'C' or 'c' x := A'*x. */ + +/* Unchanged on exit. */ + +/* DIAG - CHARACTER*1. */ +/* On entry, DIAG specifies whether or not A is unit */ +/* triangular as follows: */ + +/* DIAG = 'U' or 'u' A is assumed to be unit triangular. */ + +/* DIAG = 'N' or 'n' A is not assumed to be unit */ +/* triangular. */ + +/* Unchanged on exit. */ + +/* N - INTEGER. */ +/* On entry, N specifies the order of the matrix A. */ +/* N must be at least zero. */ +/* Unchanged on exit. */ + +/* AP - DOUBLE PRECISION array of DIMENSION at least */ +/* ( ( n*( n + 1 ) )/2 ). */ +/* Before entry with UPLO = 'U' or 'u', the array AP must */ +/* contain the upper triangular matrix packed sequentially, */ +/* column by column, so that AP( 1 ) contains a( 1, 1 ), */ +/* AP( 2 ) and AP( 3 ) contain a( 1, 2 ) and a( 2, 2 ) */ +/* respectively, and so on. */ +/* Before entry with UPLO = 'L' or 'l', the array AP must */ +/* contain the lower triangular matrix packed sequentially, */ +/* column by column, so that AP( 1 ) contains a( 1, 1 ), */ +/* AP( 2 ) and AP( 3 ) contain a( 2, 1 ) and a( 3, 1 ) */ +/* respectively, and so on. */ +/* Note that when DIAG = 'U' or 'u', the diagonal elements of */ +/* A are not referenced, but are assumed to be unity. */ +/* Unchanged on exit. */ + +/* X - DOUBLE PRECISION array of dimension at least */ +/* ( 1 + ( n - 1 )*abs( INCX ) ). */ +/* Before entry, the incremented array X must contain the n */ +/* element vector x. On exit, X is overwritten with the */ +/* tranformed vector x. */ + +/* INCX - INTEGER. */ +/* On entry, INCX specifies the increment for the elements of */ +/* X. INCX must not be zero. */ +/* Unchanged on exit. */ + + +/* Level 2 Blas routine. */ + +/* -- Written on 22-October-1986. */ +/* Jack Dongarra, Argonne National Lab. */ +/* Jeremy Du Croz, Nag Central Office. */ +/* Sven Hammarling, Nag Central Office. */ +/* Richard Hanson, Sandia National Labs. */ + + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --x; + --ap; + + /* Function Body */ + info = 0; + if (! _starpu_lsame_(uplo, "U") && ! _starpu_lsame_(uplo, "L")) { + info = 1; + } else if (! _starpu_lsame_(trans, "N") && ! _starpu_lsame_(trans, + "T") && ! _starpu_lsame_(trans, "C")) { + info = 2; + } else if (! _starpu_lsame_(diag, "U") && ! _starpu_lsame_(diag, + "N")) { + info = 3; + } else if (*n < 0) { + info = 4; + } else if (*incx == 0) { + info = 7; + } + if (info != 0) { + _starpu_xerbla_("DTPMV ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0) { + return 0; + } + + nounit = _starpu_lsame_(diag, "N"); + +/* Set up the start point in X if the increment is not unity. This */ +/* will be ( N - 1 )*INCX too small for descending loops. */ + + if (*incx <= 0) { + kx = 1 - (*n - 1) * *incx; + } else if (*incx != 1) { + kx = 1; + } + +/* Start the operations. In this version the elements of AP are */ +/* accessed sequentially with one pass through AP. */ + + if (_starpu_lsame_(trans, "N")) { + +/* Form x:= A*x. */ + + if (_starpu_lsame_(uplo, "U")) { + kk = 1; + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[j] != 0.) { + temp = x[j]; + k = kk; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + x[i__] += temp * ap[k]; + ++k; +/* L10: */ + } + if (nounit) { + x[j] *= ap[kk + j - 1]; + } + } + kk += j; +/* L20: */ + } + } else { + jx = kx; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[jx] != 0.) { + temp = x[jx]; + ix = kx; + i__2 = kk + j - 2; + for (k = kk; k <= i__2; ++k) { + x[ix] += temp * ap[k]; + ix += *incx; +/* L30: */ + } + if (nounit) { + x[jx] *= ap[kk + j - 1]; + } + } + jx += *incx; + kk += j; +/* L40: */ + } + } + } else { + kk = *n * (*n + 1) / 2; + if (*incx == 1) { + for (j = *n; j >= 1; --j) { + if (x[j] != 0.) { + temp = x[j]; + k = kk; + i__1 = j + 1; + for (i__ = *n; i__ >= i__1; --i__) { + x[i__] += temp * ap[k]; + --k; +/* L50: */ + } + if (nounit) { + x[j] *= ap[kk - *n + j]; + } + } + kk -= *n - j + 1; +/* L60: */ + } + } else { + kx += (*n - 1) * *incx; + jx = kx; + for (j = *n; j >= 1; --j) { + if (x[jx] != 0.) { + temp = x[jx]; + ix = kx; + i__1 = kk - (*n - (j + 1)); + for (k = kk; k >= i__1; --k) { + x[ix] += temp * ap[k]; + ix -= *incx; +/* L70: */ + } + if (nounit) { + x[jx] *= ap[kk - *n + j]; + } + } + jx -= *incx; + kk -= *n - j + 1; +/* L80: */ + } + } + } + } else { + +/* Form x := A'*x. */ + + if (_starpu_lsame_(uplo, "U")) { + kk = *n * (*n + 1) / 2; + if (*incx == 1) { + for (j = *n; j >= 1; --j) { + temp = x[j]; + if (nounit) { + temp *= ap[kk]; + } + k = kk - 1; + for (i__ = j - 1; i__ >= 1; --i__) { + temp += ap[k] * x[i__]; + --k; +/* L90: */ + } + x[j] = temp; + kk -= j; +/* L100: */ + } + } else { + jx = kx + (*n - 1) * *incx; + for (j = *n; j >= 1; --j) { + temp = x[jx]; + ix = jx; + if (nounit) { + temp *= ap[kk]; + } + i__1 = kk - j + 1; + for (k = kk - 1; k >= i__1; --k) { + ix -= *incx; + temp += ap[k] * x[ix]; +/* L110: */ + } + x[jx] = temp; + jx -= *incx; + kk -= j; +/* L120: */ + } + } + } else { + kk = 1; + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp = x[j]; + if (nounit) { + temp *= ap[kk]; + } + k = kk + 1; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + temp += ap[k] * x[i__]; + ++k; +/* L130: */ + } + x[j] = temp; + kk += *n - j + 1; +/* L140: */ + } + } else { + jx = kx; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp = x[jx]; + ix = jx; + if (nounit) { + temp *= ap[kk]; + } + i__2 = kk + *n - j; + for (k = kk + 1; k <= i__2; ++k) { + ix += *incx; + temp += ap[k] * x[ix]; +/* L150: */ + } + x[jx] = temp; + jx += *incx; + kk += *n - j + 1; +/* L160: */ + } + } + } + } + + return 0; + +/* End of DTPMV . */ + +} /* _starpu_dtpmv_ */ diff --git a/min-dgels/base/BLAS/SRC/dtpsv.c b/min-dgels/base/BLAS/SRC/dtpsv.c new file mode 100644 index 0000000..2895d76 --- /dev/null +++ b/min-dgels/base/BLAS/SRC/dtpsv.c @@ -0,0 +1,360 @@ +/* dtpsv.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dtpsv_(char *uplo, char *trans, char *diag, integer *n, + doublereal *ap, doublereal *x, integer *incx) +{ + /* System generated locals */ + integer i__1, i__2; + + /* Local variables */ + integer i__, j, k, kk, ix, jx, kx, info; + doublereal temp; + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + logical nounit; + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DTPSV solves one of the systems of equations */ + +/* A*x = b, or A'*x = b, */ + +/* where b and x are n element vectors and A is an n by n unit, or */ +/* non-unit, upper or lower triangular matrix, supplied in packed form. */ + +/* No test for singularity or near-singularity is included in this */ +/* routine. Such tests must be performed before calling this routine. */ + +/* Arguments */ +/* ========== */ + +/* UPLO - CHARACTER*1. */ +/* On entry, UPLO specifies whether the matrix is an upper or */ +/* lower triangular matrix as follows: */ + +/* UPLO = 'U' or 'u' A is an upper triangular matrix. */ + +/* UPLO = 'L' or 'l' A is a lower triangular matrix. */ + +/* Unchanged on exit. */ + +/* TRANS - CHARACTER*1. */ +/* On entry, TRANS specifies the equations to be solved as */ +/* follows: */ + +/* TRANS = 'N' or 'n' A*x = b. */ + +/* TRANS = 'T' or 't' A'*x = b. */ + +/* TRANS = 'C' or 'c' A'*x = b. */ + +/* Unchanged on exit. */ + +/* DIAG - CHARACTER*1. */ +/* On entry, DIAG specifies whether or not A is unit */ +/* triangular as follows: */ + +/* DIAG = 'U' or 'u' A is assumed to be unit triangular. */ + +/* DIAG = 'N' or 'n' A is not assumed to be unit */ +/* triangular. */ + +/* Unchanged on exit. */ + +/* N - INTEGER. */ +/* On entry, N specifies the order of the matrix A. */ +/* N must be at least zero. */ +/* Unchanged on exit. */ + +/* AP - DOUBLE PRECISION array of DIMENSION at least */ +/* ( ( n*( n + 1 ) )/2 ). */ +/* Before entry with UPLO = 'U' or 'u', the array AP must */ +/* contain the upper triangular matrix packed sequentially, */ +/* column by column, so that AP( 1 ) contains a( 1, 1 ), */ +/* AP( 2 ) and AP( 3 ) contain a( 1, 2 ) and a( 2, 2 ) */ +/* respectively, and so on. */ +/* Before entry with UPLO = 'L' or 'l', the array AP must */ +/* contain the lower triangular matrix packed sequentially, */ +/* column by column, so that AP( 1 ) contains a( 1, 1 ), */ +/* AP( 2 ) and AP( 3 ) contain a( 2, 1 ) and a( 3, 1 ) */ +/* respectively, and so on. */ +/* Note that when DIAG = 'U' or 'u', the diagonal elements of */ +/* A are not referenced, but are assumed to be unity. */ +/* Unchanged on exit. */ + +/* X - DOUBLE PRECISION array of dimension at least */ +/* ( 1 + ( n - 1 )*abs( INCX ) ). */ +/* Before entry, the incremented array X must contain the n */ +/* element right-hand side vector b. On exit, X is overwritten */ +/* with the solution vector x. */ + +/* INCX - INTEGER. */ +/* On entry, INCX specifies the increment for the elements of */ +/* X. INCX must not be zero. */ +/* Unchanged on exit. */ + + +/* Level 2 Blas routine. */ + +/* -- Written on 22-October-1986. */ +/* Jack Dongarra, Argonne National Lab. */ +/* Jeremy Du Croz, Nag Central Office. */ +/* Sven Hammarling, Nag Central Office. */ +/* Richard Hanson, Sandia National Labs. */ + + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --x; + --ap; + + /* Function Body */ + info = 0; + if (! _starpu_lsame_(uplo, "U") && ! _starpu_lsame_(uplo, "L")) { + info = 1; + } else if (! _starpu_lsame_(trans, "N") && ! _starpu_lsame_(trans, + "T") && ! _starpu_lsame_(trans, "C")) { + info = 2; + } else if (! _starpu_lsame_(diag, "U") && ! _starpu_lsame_(diag, + "N")) { + info = 3; + } else if (*n < 0) { + info = 4; + } else if (*incx == 0) { + info = 7; + } + if (info != 0) { + _starpu_xerbla_("DTPSV ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0) { + return 0; + } + + nounit = _starpu_lsame_(diag, "N"); + +/* Set up the start point in X if the increment is not unity. This */ +/* will be ( N - 1 )*INCX too small for descending loops. */ + + if (*incx <= 0) { + kx = 1 - (*n - 1) * *incx; + } else if (*incx != 1) { + kx = 1; + } + +/* Start the operations. In this version the elements of AP are */ +/* accessed sequentially with one pass through AP. */ + + if (_starpu_lsame_(trans, "N")) { + +/* Form x := inv( A )*x. */ + + if (_starpu_lsame_(uplo, "U")) { + kk = *n * (*n + 1) / 2; + if (*incx == 1) { + for (j = *n; j >= 1; --j) { + if (x[j] != 0.) { + if (nounit) { + x[j] /= ap[kk]; + } + temp = x[j]; + k = kk - 1; + for (i__ = j - 1; i__ >= 1; --i__) { + x[i__] -= temp * ap[k]; + --k; +/* L10: */ + } + } + kk -= j; +/* L20: */ + } + } else { + jx = kx + (*n - 1) * *incx; + for (j = *n; j >= 1; --j) { + if (x[jx] != 0.) { + if (nounit) { + x[jx] /= ap[kk]; + } + temp = x[jx]; + ix = jx; + i__1 = kk - j + 1; + for (k = kk - 1; k >= i__1; --k) { + ix -= *incx; + x[ix] -= temp * ap[k]; +/* L30: */ + } + } + jx -= *incx; + kk -= j; +/* L40: */ + } + } + } else { + kk = 1; + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[j] != 0.) { + if (nounit) { + x[j] /= ap[kk]; + } + temp = x[j]; + k = kk + 1; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + x[i__] -= temp * ap[k]; + ++k; +/* L50: */ + } + } + kk += *n - j + 1; +/* L60: */ + } + } else { + jx = kx; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[jx] != 0.) { + if (nounit) { + x[jx] /= ap[kk]; + } + temp = x[jx]; + ix = jx; + i__2 = kk + *n - j; + for (k = kk + 1; k <= i__2; ++k) { + ix += *incx; + x[ix] -= temp * ap[k]; +/* L70: */ + } + } + jx += *incx; + kk += *n - j + 1; +/* L80: */ + } + } + } + } else { + +/* Form x := inv( A' )*x. */ + + if (_starpu_lsame_(uplo, "U")) { + kk = 1; + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp = x[j]; + k = kk; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + temp -= ap[k] * x[i__]; + ++k; +/* L90: */ + } + if (nounit) { + temp /= ap[kk + j - 1]; + } + x[j] = temp; + kk += j; +/* L100: */ + } + } else { + jx = kx; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp = x[jx]; + ix = kx; + i__2 = kk + j - 2; + for (k = kk; k <= i__2; ++k) { + temp -= ap[k] * x[ix]; + ix += *incx; +/* L110: */ + } + if (nounit) { + temp /= ap[kk + j - 1]; + } + x[jx] = temp; + jx += *incx; + kk += j; +/* L120: */ + } + } + } else { + kk = *n * (*n + 1) / 2; + if (*incx == 1) { + for (j = *n; j >= 1; --j) { + temp = x[j]; + k = kk; + i__1 = j + 1; + for (i__ = *n; i__ >= i__1; --i__) { + temp -= ap[k] * x[i__]; + --k; +/* L130: */ + } + if (nounit) { + temp /= ap[kk - *n + j]; + } + x[j] = temp; + kk -= *n - j + 1; +/* L140: */ + } + } else { + kx += (*n - 1) * *incx; + jx = kx; + for (j = *n; j >= 1; --j) { + temp = x[jx]; + ix = kx; + i__1 = kk - (*n - (j + 1)); + for (k = kk; k >= i__1; --k) { + temp -= ap[k] * x[ix]; + ix -= *incx; +/* L150: */ + } + if (nounit) { + temp /= ap[kk - *n + j]; + } + x[jx] = temp; + jx -= *incx; + kk -= *n - j + 1; +/* L160: */ + } + } + } + } + + return 0; + +/* End of DTPSV . */ + +} /* _starpu_dtpsv_ */ diff --git a/min-dgels/base/BLAS/SRC/dtrmm.c b/min-dgels/base/BLAS/SRC/dtrmm.c new file mode 100644 index 0000000..f2c5234 --- /dev/null +++ b/min-dgels/base/BLAS/SRC/dtrmm.c @@ -0,0 +1,453 @@ +/* dtrmm.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dtrmm_(char *side, char *uplo, char *transa, char *diag, + integer *m, integer *n, doublereal *alpha, doublereal *a, integer * + lda, doublereal *b, integer *ldb) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3; + + /* Local variables */ + integer i__, j, k, info; + doublereal temp; + logical lside; + extern logical _starpu_lsame_(char *, char *); + integer nrowa; + logical upper; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + logical nounit; + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DTRMM performs one of the matrix-matrix operations */ + +/* B := alpha*op( A )*B, or B := alpha*B*op( A ), */ + +/* where alpha is a scalar, B is an m by n matrix, A is a unit, or */ +/* non-unit, upper or lower triangular matrix and op( A ) is one of */ + +/* op( A ) = A or op( A ) = A'. */ + +/* Arguments */ +/* ========== */ + +/* SIDE - CHARACTER*1. */ +/* On entry, SIDE specifies whether op( A ) multiplies B from */ +/* the left or right as follows: */ + +/* SIDE = 'L' or 'l' B := alpha*op( A )*B. */ + +/* SIDE = 'R' or 'r' B := alpha*B*op( A ). */ + +/* Unchanged on exit. */ + +/* UPLO - CHARACTER*1. */ +/* On entry, UPLO specifies whether the matrix A is an upper or */ +/* lower triangular matrix as follows: */ + +/* UPLO = 'U' or 'u' A is an upper triangular matrix. */ + +/* UPLO = 'L' or 'l' A is a lower triangular matrix. */ + +/* Unchanged on exit. */ + +/* TRANSA - CHARACTER*1. */ +/* On entry, TRANSA specifies the form of op( A ) to be used in */ +/* the matrix multiplication as follows: */ + +/* TRANSA = 'N' or 'n' op( A ) = A. */ + +/* TRANSA = 'T' or 't' op( A ) = A'. */ + +/* TRANSA = 'C' or 'c' op( A ) = A'. */ + +/* Unchanged on exit. */ + +/* DIAG - CHARACTER*1. */ +/* On entry, DIAG specifies whether or not A is unit triangular */ +/* as follows: */ + +/* DIAG = 'U' or 'u' A is assumed to be unit triangular. */ + +/* DIAG = 'N' or 'n' A is not assumed to be unit */ +/* triangular. */ + +/* Unchanged on exit. */ + +/* M - INTEGER. */ +/* On entry, M specifies the number of rows of B. M must be at */ +/* least zero. */ +/* Unchanged on exit. */ + +/* N - INTEGER. */ +/* On entry, N specifies the number of columns of B. N must be */ +/* at least zero. */ +/* Unchanged on exit. */ + +/* ALPHA - DOUBLE PRECISION. */ +/* On entry, ALPHA specifies the scalar alpha. When alpha is */ +/* zero then A is not referenced and B need not be set before */ +/* entry. */ +/* Unchanged on exit. */ + +/* A - DOUBLE PRECISION array of DIMENSION ( LDA, k ), where k is m */ +/* when SIDE = 'L' or 'l' and is n when SIDE = 'R' or 'r'. */ +/* Before entry with UPLO = 'U' or 'u', the leading k by k */ +/* upper triangular part of the array A must contain the upper */ +/* triangular matrix and the strictly lower triangular part of */ +/* A is not referenced. */ +/* Before entry with UPLO = 'L' or 'l', the leading k by k */ +/* lower triangular part of the array A must contain the lower */ +/* triangular matrix and the strictly upper triangular part of */ +/* A is not referenced. */ +/* Note that when DIAG = 'U' or 'u', the diagonal elements of */ +/* A are not referenced either, but are assumed to be unity. */ +/* Unchanged on exit. */ + +/* LDA - INTEGER. */ +/* On entry, LDA specifies the first dimension of A as declared */ +/* in the calling (sub) program. When SIDE = 'L' or 'l' then */ +/* LDA must be at least max( 1, m ), when SIDE = 'R' or 'r' */ +/* then LDA must be at least max( 1, n ). */ +/* Unchanged on exit. */ + +/* B - DOUBLE PRECISION array of DIMENSION ( LDB, n ). */ +/* Before entry, the leading m by n part of the array B must */ +/* contain the matrix B, and on exit is overwritten by the */ +/* transformed matrix. */ + +/* LDB - INTEGER. */ +/* On entry, LDB specifies the first dimension of B as declared */ +/* in the calling (sub) program. LDB must be at least */ +/* max( 1, m ). */ +/* Unchanged on exit. */ + + +/* Level 3 Blas routine. */ + +/* -- Written on 8-February-1989. */ +/* Jack Dongarra, Argonne National Laboratory. */ +/* Iain Duff, AERE Harwell. */ +/* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ +/* Sven Hammarling, Numerical Algorithms Group Ltd. */ + + +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Parameters .. */ +/* .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + lside = _starpu_lsame_(side, "L"); + if (lside) { + nrowa = *m; + } else { + nrowa = *n; + } + nounit = _starpu_lsame_(diag, "N"); + upper = _starpu_lsame_(uplo, "U"); + + info = 0; + if (! lside && ! _starpu_lsame_(side, "R")) { + info = 1; + } else if (! upper && ! _starpu_lsame_(uplo, "L")) { + info = 2; + } else if (! _starpu_lsame_(transa, "N") && ! _starpu_lsame_(transa, + "T") && ! _starpu_lsame_(transa, "C")) { + info = 3; + } else if (! _starpu_lsame_(diag, "U") && ! _starpu_lsame_(diag, + "N")) { + info = 4; + } else if (*m < 0) { + info = 5; + } else if (*n < 0) { + info = 6; + } else if (*lda < max(1,nrowa)) { + info = 9; + } else if (*ldb < max(1,*m)) { + info = 11; + } + if (info != 0) { + _starpu_xerbla_("DTRMM ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*m == 0 || *n == 0) { + return 0; + } + +/* And when alpha.eq.zero. */ + + if (*alpha == 0.) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = 0.; +/* L10: */ + } +/* L20: */ + } + return 0; + } + +/* Start the operations. */ + + if (lside) { + if (_starpu_lsame_(transa, "N")) { + +/* Form B := alpha*A*B. */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (k = 1; k <= i__2; ++k) { + if (b[k + j * b_dim1] != 0.) { + temp = *alpha * b[k + j * b_dim1]; + i__3 = k - 1; + for (i__ = 1; i__ <= i__3; ++i__) { + b[i__ + j * b_dim1] += temp * a[i__ + k * + a_dim1]; +/* L30: */ + } + if (nounit) { + temp *= a[k + k * a_dim1]; + } + b[k + j * b_dim1] = temp; + } +/* L40: */ + } +/* L50: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + for (k = *m; k >= 1; --k) { + if (b[k + j * b_dim1] != 0.) { + temp = *alpha * b[k + j * b_dim1]; + b[k + j * b_dim1] = temp; + if (nounit) { + b[k + j * b_dim1] *= a[k + k * a_dim1]; + } + i__2 = *m; + for (i__ = k + 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] += temp * a[i__ + k * + a_dim1]; +/* L60: */ + } + } +/* L70: */ + } +/* L80: */ + } + } + } else { + +/* Form B := alpha*A'*B. */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + for (i__ = *m; i__ >= 1; --i__) { + temp = b[i__ + j * b_dim1]; + if (nounit) { + temp *= a[i__ + i__ * a_dim1]; + } + i__2 = i__ - 1; + for (k = 1; k <= i__2; ++k) { + temp += a[k + i__ * a_dim1] * b[k + j * b_dim1]; +/* L90: */ + } + b[i__ + j * b_dim1] = *alpha * temp; +/* L100: */ + } +/* L110: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp = b[i__ + j * b_dim1]; + if (nounit) { + temp *= a[i__ + i__ * a_dim1]; + } + i__3 = *m; + for (k = i__ + 1; k <= i__3; ++k) { + temp += a[k + i__ * a_dim1] * b[k + j * b_dim1]; +/* L120: */ + } + b[i__ + j * b_dim1] = *alpha * temp; +/* L130: */ + } +/* L140: */ + } + } + } + } else { + if (_starpu_lsame_(transa, "N")) { + +/* Form B := alpha*B*A. */ + + if (upper) { + for (j = *n; j >= 1; --j) { + temp = *alpha; + if (nounit) { + temp *= a[j + j * a_dim1]; + } + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1]; +/* L150: */ + } + i__1 = j - 1; + for (k = 1; k <= i__1; ++k) { + if (a[k + j * a_dim1] != 0.) { + temp = *alpha * a[k + j * a_dim1]; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] += temp * b[i__ + k * + b_dim1]; +/* L160: */ + } + } +/* L170: */ + } +/* L180: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp = *alpha; + if (nounit) { + temp *= a[j + j * a_dim1]; + } + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1]; +/* L190: */ + } + i__2 = *n; + for (k = j + 1; k <= i__2; ++k) { + if (a[k + j * a_dim1] != 0.) { + temp = *alpha * a[k + j * a_dim1]; + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + b[i__ + j * b_dim1] += temp * b[i__ + k * + b_dim1]; +/* L200: */ + } + } +/* L210: */ + } +/* L220: */ + } + } + } else { + +/* Form B := alpha*B*A'. */ + + if (upper) { + i__1 = *n; + for (k = 1; k <= i__1; ++k) { + i__2 = k - 1; + for (j = 1; j <= i__2; ++j) { + if (a[j + k * a_dim1] != 0.) { + temp = *alpha * a[j + k * a_dim1]; + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + b[i__ + j * b_dim1] += temp * b[i__ + k * + b_dim1]; +/* L230: */ + } + } +/* L240: */ + } + temp = *alpha; + if (nounit) { + temp *= a[k + k * a_dim1]; + } + if (temp != 1.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1]; +/* L250: */ + } + } +/* L260: */ + } + } else { + for (k = *n; k >= 1; --k) { + i__1 = *n; + for (j = k + 1; j <= i__1; ++j) { + if (a[j + k * a_dim1] != 0.) { + temp = *alpha * a[j + k * a_dim1]; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] += temp * b[i__ + k * + b_dim1]; +/* L270: */ + } + } +/* L280: */ + } + temp = *alpha; + if (nounit) { + temp *= a[k + k * a_dim1]; + } + if (temp != 1.) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1]; +/* L290: */ + } + } +/* L300: */ + } + } + } + } + + return 0; + +/* End of DTRMM . */ + +} /* _starpu_dtrmm_ */ diff --git a/min-dgels/base/BLAS/SRC/dtrmv.c b/min-dgels/base/BLAS/SRC/dtrmv.c new file mode 100644 index 0000000..6acf45c --- /dev/null +++ b/min-dgels/base/BLAS/SRC/dtrmv.c @@ -0,0 +1,345 @@ +/* dtrmv.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dtrmv_(char *uplo, char *trans, char *diag, integer *n, + doublereal *a, integer *lda, doublereal *x, integer *incx) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + + /* Local variables */ + integer i__, j, ix, jx, kx, info; + doublereal temp; + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + logical nounit; + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DTRMV performs one of the matrix-vector operations */ + +/* x := A*x, or x := A'*x, */ + +/* where x is an n element vector and A is an n by n unit, or non-unit, */ +/* upper or lower triangular matrix. */ + +/* Arguments */ +/* ========== */ + +/* UPLO - CHARACTER*1. */ +/* On entry, UPLO specifies whether the matrix is an upper or */ +/* lower triangular matrix as follows: */ + +/* UPLO = 'U' or 'u' A is an upper triangular matrix. */ + +/* UPLO = 'L' or 'l' A is a lower triangular matrix. */ + +/* Unchanged on exit. */ + +/* TRANS - CHARACTER*1. */ +/* On entry, TRANS specifies the operation to be performed as */ +/* follows: */ + +/* TRANS = 'N' or 'n' x := A*x. */ + +/* TRANS = 'T' or 't' x := A'*x. */ + +/* TRANS = 'C' or 'c' x := A'*x. */ + +/* Unchanged on exit. */ + +/* DIAG - CHARACTER*1. */ +/* On entry, DIAG specifies whether or not A is unit */ +/* triangular as follows: */ + +/* DIAG = 'U' or 'u' A is assumed to be unit triangular. */ + +/* DIAG = 'N' or 'n' A is not assumed to be unit */ +/* triangular. */ + +/* Unchanged on exit. */ + +/* N - INTEGER. */ +/* On entry, N specifies the order of the matrix A. */ +/* N must be at least zero. */ +/* Unchanged on exit. */ + +/* A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). */ +/* Before entry with UPLO = 'U' or 'u', the leading n by n */ +/* upper triangular part of the array A must contain the upper */ +/* triangular matrix and the strictly lower triangular part of */ +/* A is not referenced. */ +/* Before entry with UPLO = 'L' or 'l', the leading n by n */ +/* lower triangular part of the array A must contain the lower */ +/* triangular matrix and the strictly upper triangular part of */ +/* A is not referenced. */ +/* Note that when DIAG = 'U' or 'u', the diagonal elements of */ +/* A are not referenced either, but are assumed to be unity. */ +/* Unchanged on exit. */ + +/* LDA - INTEGER. */ +/* On entry, LDA specifies the first dimension of A as declared */ +/* in the calling (sub) program. LDA must be at least */ +/* max( 1, n ). */ +/* Unchanged on exit. */ + +/* X - DOUBLE PRECISION array of dimension at least */ +/* ( 1 + ( n - 1 )*abs( INCX ) ). */ +/* Before entry, the incremented array X must contain the n */ +/* element vector x. On exit, X is overwritten with the */ +/* tranformed vector x. */ + +/* INCX - INTEGER. */ +/* On entry, INCX specifies the increment for the elements of */ +/* X. INCX must not be zero. */ +/* Unchanged on exit. */ + + +/* Level 2 Blas routine. */ + +/* -- Written on 22-October-1986. */ +/* Jack Dongarra, Argonne National Lab. */ +/* Jeremy Du Croz, Nag Central Office. */ +/* Sven Hammarling, Nag Central Office. */ +/* Richard Hanson, Sandia National Labs. */ + + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --x; + + /* Function Body */ + info = 0; + if (! _starpu_lsame_(uplo, "U") && ! _starpu_lsame_(uplo, "L")) { + info = 1; + } else if (! _starpu_lsame_(trans, "N") && ! _starpu_lsame_(trans, + "T") && ! _starpu_lsame_(trans, "C")) { + info = 2; + } else if (! _starpu_lsame_(diag, "U") && ! _starpu_lsame_(diag, + "N")) { + info = 3; + } else if (*n < 0) { + info = 4; + } else if (*lda < max(1,*n)) { + info = 6; + } else if (*incx == 0) { + info = 8; + } + if (info != 0) { + _starpu_xerbla_("DTRMV ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0) { + return 0; + } + + nounit = _starpu_lsame_(diag, "N"); + +/* Set up the start point in X if the increment is not unity. This */ +/* will be ( N - 1 )*INCX too small for descending loops. */ + + if (*incx <= 0) { + kx = 1 - (*n - 1) * *incx; + } else if (*incx != 1) { + kx = 1; + } + +/* Start the operations. In this version the elements of A are */ +/* accessed sequentially with one pass through A. */ + + if (_starpu_lsame_(trans, "N")) { + +/* Form x := A*x. */ + + if (_starpu_lsame_(uplo, "U")) { + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[j] != 0.) { + temp = x[j]; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + x[i__] += temp * a[i__ + j * a_dim1]; +/* L10: */ + } + if (nounit) { + x[j] *= a[j + j * a_dim1]; + } + } +/* L20: */ + } + } else { + jx = kx; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[jx] != 0.) { + temp = x[jx]; + ix = kx; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + x[ix] += temp * a[i__ + j * a_dim1]; + ix += *incx; +/* L30: */ + } + if (nounit) { + x[jx] *= a[j + j * a_dim1]; + } + } + jx += *incx; +/* L40: */ + } + } + } else { + if (*incx == 1) { + for (j = *n; j >= 1; --j) { + if (x[j] != 0.) { + temp = x[j]; + i__1 = j + 1; + for (i__ = *n; i__ >= i__1; --i__) { + x[i__] += temp * a[i__ + j * a_dim1]; +/* L50: */ + } + if (nounit) { + x[j] *= a[j + j * a_dim1]; + } + } +/* L60: */ + } + } else { + kx += (*n - 1) * *incx; + jx = kx; + for (j = *n; j >= 1; --j) { + if (x[jx] != 0.) { + temp = x[jx]; + ix = kx; + i__1 = j + 1; + for (i__ = *n; i__ >= i__1; --i__) { + x[ix] += temp * a[i__ + j * a_dim1]; + ix -= *incx; +/* L70: */ + } + if (nounit) { + x[jx] *= a[j + j * a_dim1]; + } + } + jx -= *incx; +/* L80: */ + } + } + } + } else { + +/* Form x := A'*x. */ + + if (_starpu_lsame_(uplo, "U")) { + if (*incx == 1) { + for (j = *n; j >= 1; --j) { + temp = x[j]; + if (nounit) { + temp *= a[j + j * a_dim1]; + } + for (i__ = j - 1; i__ >= 1; --i__) { + temp += a[i__ + j * a_dim1] * x[i__]; +/* L90: */ + } + x[j] = temp; +/* L100: */ + } + } else { + jx = kx + (*n - 1) * *incx; + for (j = *n; j >= 1; --j) { + temp = x[jx]; + ix = jx; + if (nounit) { + temp *= a[j + j * a_dim1]; + } + for (i__ = j - 1; i__ >= 1; --i__) { + ix -= *incx; + temp += a[i__ + j * a_dim1] * x[ix]; +/* L110: */ + } + x[jx] = temp; + jx -= *incx; +/* L120: */ + } + } + } else { + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp = x[j]; + if (nounit) { + temp *= a[j + j * a_dim1]; + } + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + temp += a[i__ + j * a_dim1] * x[i__]; +/* L130: */ + } + x[j] = temp; +/* L140: */ + } + } else { + jx = kx; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp = x[jx]; + ix = jx; + if (nounit) { + temp *= a[j + j * a_dim1]; + } + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + ix += *incx; + temp += a[i__ + j * a_dim1] * x[ix]; +/* L150: */ + } + x[jx] = temp; + jx += *incx; +/* L160: */ + } + } + } + } + + return 0; + +/* End of DTRMV . */ + +} /* _starpu_dtrmv_ */ diff --git a/min-dgels/base/BLAS/SRC/dtrsm.c b/min-dgels/base/BLAS/SRC/dtrsm.c new file mode 100644 index 0000000..546cd79 --- /dev/null +++ b/min-dgels/base/BLAS/SRC/dtrsm.c @@ -0,0 +1,490 @@ +/* dtrsm.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dtrsm_(char *side, char *uplo, char *transa, char *diag, + integer *m, integer *n, doublereal *alpha, doublereal *a, integer * + lda, doublereal *b, integer *ldb) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3; + + /* Local variables */ + integer i__, j, k, info; + doublereal temp; + logical lside; + extern logical _starpu_lsame_(char *, char *); + integer nrowa; + logical upper; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + logical nounit; + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DTRSM solves one of the matrix equations */ + +/* op( A )*X = alpha*B, or X*op( A ) = alpha*B, */ + +/* where alpha is a scalar, X and B are m by n matrices, A is a unit, or */ +/* non-unit, upper or lower triangular matrix and op( A ) is one of */ + +/* op( A ) = A or op( A ) = A'. */ + +/* The matrix X is overwritten on B. */ + +/* Arguments */ +/* ========== */ + +/* SIDE - CHARACTER*1. */ +/* On entry, SIDE specifies whether op( A ) appears on the left */ +/* or right of X as follows: */ + +/* SIDE = 'L' or 'l' op( A )*X = alpha*B. */ + +/* SIDE = 'R' or 'r' X*op( A ) = alpha*B. */ + +/* Unchanged on exit. */ + +/* UPLO - CHARACTER*1. */ +/* On entry, UPLO specifies whether the matrix A is an upper or */ +/* lower triangular matrix as follows: */ + +/* UPLO = 'U' or 'u' A is an upper triangular matrix. */ + +/* UPLO = 'L' or 'l' A is a lower triangular matrix. */ + +/* Unchanged on exit. */ + +/* TRANSA - CHARACTER*1. */ +/* On entry, TRANSA specifies the form of op( A ) to be used in */ +/* the matrix multiplication as follows: */ + +/* TRANSA = 'N' or 'n' op( A ) = A. */ + +/* TRANSA = 'T' or 't' op( A ) = A'. */ + +/* TRANSA = 'C' or 'c' op( A ) = A'. */ + +/* Unchanged on exit. */ + +/* DIAG - CHARACTER*1. */ +/* On entry, DIAG specifies whether or not A is unit triangular */ +/* as follows: */ + +/* DIAG = 'U' or 'u' A is assumed to be unit triangular. */ + +/* DIAG = 'N' or 'n' A is not assumed to be unit */ +/* triangular. */ + +/* Unchanged on exit. */ + +/* M - INTEGER. */ +/* On entry, M specifies the number of rows of B. M must be at */ +/* least zero. */ +/* Unchanged on exit. */ + +/* N - INTEGER. */ +/* On entry, N specifies the number of columns of B. N must be */ +/* at least zero. */ +/* Unchanged on exit. */ + +/* ALPHA - DOUBLE PRECISION. */ +/* On entry, ALPHA specifies the scalar alpha. When alpha is */ +/* zero then A is not referenced and B need not be set before */ +/* entry. */ +/* Unchanged on exit. */ + +/* A - DOUBLE PRECISION array of DIMENSION ( LDA, k ), where k is m */ +/* when SIDE = 'L' or 'l' and is n when SIDE = 'R' or 'r'. */ +/* Before entry with UPLO = 'U' or 'u', the leading k by k */ +/* upper triangular part of the array A must contain the upper */ +/* triangular matrix and the strictly lower triangular part of */ +/* A is not referenced. */ +/* Before entry with UPLO = 'L' or 'l', the leading k by k */ +/* lower triangular part of the array A must contain the lower */ +/* triangular matrix and the strictly upper triangular part of */ +/* A is not referenced. */ +/* Note that when DIAG = 'U' or 'u', the diagonal elements of */ +/* A are not referenced either, but are assumed to be unity. */ +/* Unchanged on exit. */ + +/* LDA - INTEGER. */ +/* On entry, LDA specifies the first dimension of A as declared */ +/* in the calling (sub) program. When SIDE = 'L' or 'l' then */ +/* LDA must be at least max( 1, m ), when SIDE = 'R' or 'r' */ +/* then LDA must be at least max( 1, n ). */ +/* Unchanged on exit. */ + +/* B - DOUBLE PRECISION array of DIMENSION ( LDB, n ). */ +/* Before entry, the leading m by n part of the array B must */ +/* contain the right-hand side matrix B, and on exit is */ +/* overwritten by the solution matrix X. */ + +/* LDB - INTEGER. */ +/* On entry, LDB specifies the first dimension of B as declared */ +/* in the calling (sub) program. LDB must be at least */ +/* max( 1, m ). */ +/* Unchanged on exit. */ + + +/* Level 3 Blas routine. */ + + +/* -- Written on 8-February-1989. */ +/* Jack Dongarra, Argonne National Laboratory. */ +/* Iain Duff, AERE Harwell. */ +/* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ +/* Sven Hammarling, Numerical Algorithms Group Ltd. */ + + +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Parameters .. */ +/* .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + lside = _starpu_lsame_(side, "L"); + if (lside) { + nrowa = *m; + } else { + nrowa = *n; + } + nounit = _starpu_lsame_(diag, "N"); + upper = _starpu_lsame_(uplo, "U"); + + info = 0; + if (! lside && ! _starpu_lsame_(side, "R")) { + info = 1; + } else if (! upper && ! _starpu_lsame_(uplo, "L")) { + info = 2; + } else if (! _starpu_lsame_(transa, "N") && ! _starpu_lsame_(transa, + "T") && ! _starpu_lsame_(transa, "C")) { + info = 3; + } else if (! _starpu_lsame_(diag, "U") && ! _starpu_lsame_(diag, + "N")) { + info = 4; + } else if (*m < 0) { + info = 5; + } else if (*n < 0) { + info = 6; + } else if (*lda < max(1,nrowa)) { + info = 9; + } else if (*ldb < max(1,*m)) { + info = 11; + } + if (info != 0) { + _starpu_xerbla_("DTRSM ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*m == 0 || *n == 0) { + return 0; + } + +/* And when alpha.eq.zero. */ + + if (*alpha == 0.) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = 0.; +/* L10: */ + } +/* L20: */ + } + return 0; + } + +/* Start the operations. */ + + if (lside) { + if (_starpu_lsame_(transa, "N")) { + +/* Form B := alpha*inv( A )*B. */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*alpha != 1.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1] + ; +/* L30: */ + } + } + for (k = *m; k >= 1; --k) { + if (b[k + j * b_dim1] != 0.) { + if (nounit) { + b[k + j * b_dim1] /= a[k + k * a_dim1]; + } + i__2 = k - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] -= b[k + j * b_dim1] * a[ + i__ + k * a_dim1]; +/* L40: */ + } + } +/* L50: */ + } +/* L60: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*alpha != 1.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1] + ; +/* L70: */ + } + } + i__2 = *m; + for (k = 1; k <= i__2; ++k) { + if (b[k + j * b_dim1] != 0.) { + if (nounit) { + b[k + j * b_dim1] /= a[k + k * a_dim1]; + } + i__3 = *m; + for (i__ = k + 1; i__ <= i__3; ++i__) { + b[i__ + j * b_dim1] -= b[k + j * b_dim1] * a[ + i__ + k * a_dim1]; +/* L80: */ + } + } +/* L90: */ + } +/* L100: */ + } + } + } else { + +/* Form B := alpha*inv( A' )*B. */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp = *alpha * b[i__ + j * b_dim1]; + i__3 = i__ - 1; + for (k = 1; k <= i__3; ++k) { + temp -= a[k + i__ * a_dim1] * b[k + j * b_dim1]; +/* L110: */ + } + if (nounit) { + temp /= a[i__ + i__ * a_dim1]; + } + b[i__ + j * b_dim1] = temp; +/* L120: */ + } +/* L130: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + for (i__ = *m; i__ >= 1; --i__) { + temp = *alpha * b[i__ + j * b_dim1]; + i__2 = *m; + for (k = i__ + 1; k <= i__2; ++k) { + temp -= a[k + i__ * a_dim1] * b[k + j * b_dim1]; +/* L140: */ + } + if (nounit) { + temp /= a[i__ + i__ * a_dim1]; + } + b[i__ + j * b_dim1] = temp; +/* L150: */ + } +/* L160: */ + } + } + } + } else { + if (_starpu_lsame_(transa, "N")) { + +/* Form B := alpha*B*inv( A ). */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*alpha != 1.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1] + ; +/* L170: */ + } + } + i__2 = j - 1; + for (k = 1; k <= i__2; ++k) { + if (a[k + j * a_dim1] != 0.) { + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + b[i__ + j * b_dim1] -= a[k + j * a_dim1] * b[ + i__ + k * b_dim1]; +/* L180: */ + } + } +/* L190: */ + } + if (nounit) { + temp = 1. / a[j + j * a_dim1]; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1]; +/* L200: */ + } + } +/* L210: */ + } + } else { + for (j = *n; j >= 1; --j) { + if (*alpha != 1.) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1] + ; +/* L220: */ + } + } + i__1 = *n; + for (k = j + 1; k <= i__1; ++k) { + if (a[k + j * a_dim1] != 0.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] -= a[k + j * a_dim1] * b[ + i__ + k * b_dim1]; +/* L230: */ + } + } +/* L240: */ + } + if (nounit) { + temp = 1. / a[j + j * a_dim1]; + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1]; +/* L250: */ + } + } +/* L260: */ + } + } + } else { + +/* Form B := alpha*B*inv( A' ). */ + + if (upper) { + for (k = *n; k >= 1; --k) { + if (nounit) { + temp = 1. / a[k + k * a_dim1]; + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1]; +/* L270: */ + } + } + i__1 = k - 1; + for (j = 1; j <= i__1; ++j) { + if (a[j + k * a_dim1] != 0.) { + temp = a[j + k * a_dim1]; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] -= temp * b[i__ + k * + b_dim1]; +/* L280: */ + } + } +/* L290: */ + } + if (*alpha != 1.) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + b[i__ + k * b_dim1] = *alpha * b[i__ + k * b_dim1] + ; +/* L300: */ + } + } +/* L310: */ + } + } else { + i__1 = *n; + for (k = 1; k <= i__1; ++k) { + if (nounit) { + temp = 1. / a[k + k * a_dim1]; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1]; +/* L320: */ + } + } + i__2 = *n; + for (j = k + 1; j <= i__2; ++j) { + if (a[j + k * a_dim1] != 0.) { + temp = a[j + k * a_dim1]; + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + b[i__ + j * b_dim1] -= temp * b[i__ + k * + b_dim1]; +/* L330: */ + } + } +/* L340: */ + } + if (*alpha != 1.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + k * b_dim1] = *alpha * b[i__ + k * b_dim1] + ; +/* L350: */ + } + } +/* L360: */ + } + } + } + } + + return 0; + +/* End of DTRSM . */ + +} /* _starpu_dtrsm_ */ diff --git a/min-dgels/base/BLAS/SRC/dtrsv.c b/min-dgels/base/BLAS/SRC/dtrsv.c new file mode 100644 index 0000000..d5017ff --- /dev/null +++ b/min-dgels/base/BLAS/SRC/dtrsv.c @@ -0,0 +1,348 @@ +/* dtrsv.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dtrsv_(char *uplo, char *trans, char *diag, integer *n, + doublereal *a, integer *lda, doublereal *x, integer *incx) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + + /* Local variables */ + integer i__, j, ix, jx, kx, info; + doublereal temp; + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + logical nounit; + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DTRSV solves one of the systems of equations */ + +/* A*x = b, or A'*x = b, */ + +/* where b and x are n element vectors and A is an n by n unit, or */ +/* non-unit, upper or lower triangular matrix. */ + +/* No test for singularity or near-singularity is included in this */ +/* routine. Such tests must be performed before calling this routine. */ + +/* Arguments */ +/* ========== */ + +/* UPLO - CHARACTER*1. */ +/* On entry, UPLO specifies whether the matrix is an upper or */ +/* lower triangular matrix as follows: */ + +/* UPLO = 'U' or 'u' A is an upper triangular matrix. */ + +/* UPLO = 'L' or 'l' A is a lower triangular matrix. */ + +/* Unchanged on exit. */ + +/* TRANS - CHARACTER*1. */ +/* On entry, TRANS specifies the equations to be solved as */ +/* follows: */ + +/* TRANS = 'N' or 'n' A*x = b. */ + +/* TRANS = 'T' or 't' A'*x = b. */ + +/* TRANS = 'C' or 'c' A'*x = b. */ + +/* Unchanged on exit. */ + +/* DIAG - CHARACTER*1. */ +/* On entry, DIAG specifies whether or not A is unit */ +/* triangular as follows: */ + +/* DIAG = 'U' or 'u' A is assumed to be unit triangular. */ + +/* DIAG = 'N' or 'n' A is not assumed to be unit */ +/* triangular. */ + +/* Unchanged on exit. */ + +/* N - INTEGER. */ +/* On entry, N specifies the order of the matrix A. */ +/* N must be at least zero. */ +/* Unchanged on exit. */ + +/* A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). */ +/* Before entry with UPLO = 'U' or 'u', the leading n by n */ +/* upper triangular part of the array A must contain the upper */ +/* triangular matrix and the strictly lower triangular part of */ +/* A is not referenced. */ +/* Before entry with UPLO = 'L' or 'l', the leading n by n */ +/* lower triangular part of the array A must contain the lower */ +/* triangular matrix and the strictly upper triangular part of */ +/* A is not referenced. */ +/* Note that when DIAG = 'U' or 'u', the diagonal elements of */ +/* A are not referenced either, but are assumed to be unity. */ +/* Unchanged on exit. */ + +/* LDA - INTEGER. */ +/* On entry, LDA specifies the first dimension of A as declared */ +/* in the calling (sub) program. LDA must be at least */ +/* max( 1, n ). */ +/* Unchanged on exit. */ + +/* X - DOUBLE PRECISION array of dimension at least */ +/* ( 1 + ( n - 1 )*abs( INCX ) ). */ +/* Before entry, the incremented array X must contain the n */ +/* element right-hand side vector b. On exit, X is overwritten */ +/* with the solution vector x. */ + +/* INCX - INTEGER. */ +/* On entry, INCX specifies the increment for the elements of */ +/* X. INCX must not be zero. */ +/* Unchanged on exit. */ + + +/* Level 2 Blas routine. */ + +/* -- Written on 22-October-1986. */ +/* Jack Dongarra, Argonne National Lab. */ +/* Jeremy Du Croz, Nag Central Office. */ +/* Sven Hammarling, Nag Central Office. */ +/* Richard Hanson, Sandia National Labs. */ + + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --x; + + /* Function Body */ + info = 0; + if (! _starpu_lsame_(uplo, "U") && ! _starpu_lsame_(uplo, "L")) { + info = 1; + } else if (! _starpu_lsame_(trans, "N") && ! _starpu_lsame_(trans, + "T") && ! _starpu_lsame_(trans, "C")) { + info = 2; + } else if (! _starpu_lsame_(diag, "U") && ! _starpu_lsame_(diag, + "N")) { + info = 3; + } else if (*n < 0) { + info = 4; + } else if (*lda < max(1,*n)) { + info = 6; + } else if (*incx == 0) { + info = 8; + } + if (info != 0) { + _starpu_xerbla_("DTRSV ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0) { + return 0; + } + + nounit = _starpu_lsame_(diag, "N"); + +/* Set up the start point in X if the increment is not unity. This */ +/* will be ( N - 1 )*INCX too small for descending loops. */ + + if (*incx <= 0) { + kx = 1 - (*n - 1) * *incx; + } else if (*incx != 1) { + kx = 1; + } + +/* Start the operations. In this version the elements of A are */ +/* accessed sequentially with one pass through A. */ + + if (_starpu_lsame_(trans, "N")) { + +/* Form x := inv( A )*x. */ + + if (_starpu_lsame_(uplo, "U")) { + if (*incx == 1) { + for (j = *n; j >= 1; --j) { + if (x[j] != 0.) { + if (nounit) { + x[j] /= a[j + j * a_dim1]; + } + temp = x[j]; + for (i__ = j - 1; i__ >= 1; --i__) { + x[i__] -= temp * a[i__ + j * a_dim1]; +/* L10: */ + } + } +/* L20: */ + } + } else { + jx = kx + (*n - 1) * *incx; + for (j = *n; j >= 1; --j) { + if (x[jx] != 0.) { + if (nounit) { + x[jx] /= a[j + j * a_dim1]; + } + temp = x[jx]; + ix = jx; + for (i__ = j - 1; i__ >= 1; --i__) { + ix -= *incx; + x[ix] -= temp * a[i__ + j * a_dim1]; +/* L30: */ + } + } + jx -= *incx; +/* L40: */ + } + } + } else { + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[j] != 0.) { + if (nounit) { + x[j] /= a[j + j * a_dim1]; + } + temp = x[j]; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + x[i__] -= temp * a[i__ + j * a_dim1]; +/* L50: */ + } + } +/* L60: */ + } + } else { + jx = kx; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[jx] != 0.) { + if (nounit) { + x[jx] /= a[j + j * a_dim1]; + } + temp = x[jx]; + ix = jx; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + ix += *incx; + x[ix] -= temp * a[i__ + j * a_dim1]; +/* L70: */ + } + } + jx += *incx; +/* L80: */ + } + } + } + } else { + +/* Form x := inv( A' )*x. */ + + if (_starpu_lsame_(uplo, "U")) { + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp = x[j]; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + temp -= a[i__ + j * a_dim1] * x[i__]; +/* L90: */ + } + if (nounit) { + temp /= a[j + j * a_dim1]; + } + x[j] = temp; +/* L100: */ + } + } else { + jx = kx; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp = x[jx]; + ix = kx; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + temp -= a[i__ + j * a_dim1] * x[ix]; + ix += *incx; +/* L110: */ + } + if (nounit) { + temp /= a[j + j * a_dim1]; + } + x[jx] = temp; + jx += *incx; +/* L120: */ + } + } + } else { + if (*incx == 1) { + for (j = *n; j >= 1; --j) { + temp = x[j]; + i__1 = j + 1; + for (i__ = *n; i__ >= i__1; --i__) { + temp -= a[i__ + j * a_dim1] * x[i__]; +/* L130: */ + } + if (nounit) { + temp /= a[j + j * a_dim1]; + } + x[j] = temp; +/* L140: */ + } + } else { + kx += (*n - 1) * *incx; + jx = kx; + for (j = *n; j >= 1; --j) { + temp = x[jx]; + ix = kx; + i__1 = j + 1; + for (i__ = *n; i__ >= i__1; --i__) { + temp -= a[i__ + j * a_dim1] * x[ix]; + ix -= *incx; +/* L150: */ + } + if (nounit) { + temp /= a[j + j * a_dim1]; + } + x[jx] = temp; + jx -= *incx; +/* L160: */ + } + } + } + } + + return 0; + +/* End of DTRSV . */ + +} /* _starpu_dtrsv_ */ diff --git a/min-dgels/base/BLAS/SRC/dzasum.c b/min-dgels/base/BLAS/SRC/dzasum.c new file mode 100644 index 0000000..203d4bf --- /dev/null +++ b/min-dgels/base/BLAS/SRC/dzasum.c @@ -0,0 +1,80 @@ +/* dzasum.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +doublereal _starpu_dzasum_(integer *n, doublecomplex *zx, integer *incx) +{ + /* System generated locals */ + integer i__1; + doublereal ret_val; + + /* Local variables */ + integer i__, ix; + doublereal stemp; + extern doublereal _starpu_dcabs1_(doublecomplex *); + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* takes the sum of the absolute values. */ +/* jack dongarra, 3/11/78. */ +/* modified 3/93 to return if incx .le. 0. */ +/* modified 12/3/93, array(1) declarations changed to array(*) */ + + +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ + /* Parameter adjustments */ + --zx; + + /* Function Body */ + ret_val = 0.; + stemp = 0.; + if (*n <= 0 || *incx <= 0) { + return ret_val; + } + if (*incx == 1) { + goto L20; + } + +/* code for increment not equal to 1 */ + + ix = 1; + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + stemp += _starpu_dcabs1_(&zx[ix]); + ix += *incx; +/* L10: */ + } + ret_val = stemp; + return ret_val; + +/* code for increment equal to 1 */ + +L20: + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + stemp += _starpu_dcabs1_(&zx[i__]); +/* L30: */ + } + ret_val = stemp; + return ret_val; +} /* _starpu_dzasum_ */ diff --git a/min-dgels/base/BLAS/SRC/dznrm2.c b/min-dgels/base/BLAS/SRC/dznrm2.c new file mode 100644 index 0000000..822e1a9 --- /dev/null +++ b/min-dgels/base/BLAS/SRC/dznrm2.c @@ -0,0 +1,108 @@ +/* dznrm2.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +doublereal _starpu_dznrm2_(integer *n, doublecomplex *x, integer *incx) +{ + /* System generated locals */ + integer i__1, i__2, i__3; + doublereal ret_val, d__1; + + /* Builtin functions */ + double d_imag(doublecomplex *), sqrt(doublereal); + + /* Local variables */ + integer ix; + doublereal ssq, temp, norm, scale; + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DZNRM2 returns the euclidean norm of a vector via the function */ +/* name, so that */ + +/* DZNRM2 := sqrt( conjg( x' )*x ) */ + + +/* -- This version written on 25-October-1982. */ +/* Modified on 14-October-1993 to inline the call to ZLASSQ. */ +/* Sven Hammarling, Nag Ltd. */ + + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ + /* Parameter adjustments */ + --x; + + /* Function Body */ + if (*n < 1 || *incx < 1) { + norm = 0.; + } else { + scale = 0.; + ssq = 1.; +/* The following loop is equivalent to this call to the LAPACK */ +/* auxiliary routine: */ +/* CALL ZLASSQ( N, X, INCX, SCALE, SSQ ) */ + + i__1 = (*n - 1) * *incx + 1; + i__2 = *incx; + for (ix = 1; i__2 < 0 ? ix >= i__1 : ix <= i__1; ix += i__2) { + i__3 = ix; + if (x[i__3].r != 0.) { + i__3 = ix; + temp = (d__1 = x[i__3].r, abs(d__1)); + if (scale < temp) { +/* Computing 2nd power */ + d__1 = scale / temp; + ssq = ssq * (d__1 * d__1) + 1.; + scale = temp; + } else { +/* Computing 2nd power */ + d__1 = temp / scale; + ssq += d__1 * d__1; + } + } + if (d_imag(&x[ix]) != 0.) { + temp = (d__1 = d_imag(&x[ix]), abs(d__1)); + if (scale < temp) { +/* Computing 2nd power */ + d__1 = scale / temp; + ssq = ssq * (d__1 * d__1) + 1.; + scale = temp; + } else { +/* Computing 2nd power */ + d__1 = temp / scale; + ssq += d__1 * d__1; + } + } +/* L10: */ + } + norm = scale * sqrt(ssq); + } + + ret_val = norm; + return ret_val; + +/* End of DZNRM2. */ + +} /* _starpu_dznrm2_ */ diff --git a/min-dgels/base/BLAS/SRC/idamax.c b/min-dgels/base/BLAS/SRC/idamax.c new file mode 100644 index 0000000..5a36923 --- /dev/null +++ b/min-dgels/base/BLAS/SRC/idamax.c @@ -0,0 +1,93 @@ +/* idamax.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +integer _starpu_idamax_(integer *n, doublereal *dx, integer *incx) +{ + /* System generated locals */ + integer ret_val, i__1; + doublereal d__1; + + /* Local variables */ + integer i__, ix; + doublereal dmax__; + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* finds the index of element having max. absolute value. */ +/* jack dongarra, linpack, 3/11/78. */ +/* modified 3/93 to return if incx .le. 0. */ +/* modified 12/3/93, array(1) declarations changed to array(*) */ + + +/* .. Local Scalars .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ + /* Parameter adjustments */ + --dx; + + /* Function Body */ + ret_val = 0; + if (*n < 1 || *incx <= 0) { + return ret_val; + } + ret_val = 1; + if (*n == 1) { + return ret_val; + } + if (*incx == 1) { + goto L20; + } + +/* code for increment not equal to 1 */ + + ix = 1; + dmax__ = abs(dx[1]); + ix += *incx; + i__1 = *n; + for (i__ = 2; i__ <= i__1; ++i__) { + if ((d__1 = dx[ix], abs(d__1)) <= dmax__) { + goto L5; + } + ret_val = i__; + dmax__ = (d__1 = dx[ix], abs(d__1)); +L5: + ix += *incx; +/* L10: */ + } + return ret_val; + +/* code for increment equal to 1 */ + +L20: + dmax__ = abs(dx[1]); + i__1 = *n; + for (i__ = 2; i__ <= i__1; ++i__) { + if ((d__1 = dx[i__], abs(d__1)) <= dmax__) { + goto L30; + } + ret_val = i__; + dmax__ = (d__1 = dx[i__], abs(d__1)); +L30: + ; + } + return ret_val; +} /* _starpu_idamax_ */ diff --git a/min-dgels/base/BLAS/SRC/izamax.c b/min-dgels/base/BLAS/SRC/izamax.c new file mode 100644 index 0000000..e8404d6 --- /dev/null +++ b/min-dgels/base/BLAS/SRC/izamax.c @@ -0,0 +1,93 @@ +/* izamax.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +integer _starpu_izamax_(integer *n, doublecomplex *zx, integer *incx) +{ + /* System generated locals */ + integer ret_val, i__1; + + /* Local variables */ + integer i__, ix; + doublereal smax; + extern doublereal _starpu_dcabs1_(doublecomplex *); + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* finds the index of element having max. absolute value. */ +/* jack dongarra, 1/15/85. */ +/* modified 3/93 to return if incx .le. 0. */ +/* modified 12/3/93, array(1) declarations changed to array(*) */ + + +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ + /* Parameter adjustments */ + --zx; + + /* Function Body */ + ret_val = 0; + if (*n < 1 || *incx <= 0) { + return ret_val; + } + ret_val = 1; + if (*n == 1) { + return ret_val; + } + if (*incx == 1) { + goto L20; + } + +/* code for increment not equal to 1 */ + + ix = 1; + smax = _starpu_dcabs1_(&zx[1]); + ix += *incx; + i__1 = *n; + for (i__ = 2; i__ <= i__1; ++i__) { + if (_starpu_dcabs1_(&zx[ix]) <= smax) { + goto L5; + } + ret_val = i__; + smax = _starpu_dcabs1_(&zx[ix]); +L5: + ix += *incx; +/* L10: */ + } + return ret_val; + +/* code for increment equal to 1 */ + +L20: + smax = _starpu_dcabs1_(&zx[1]); + i__1 = *n; + for (i__ = 2; i__ <= i__1; ++i__) { + if (_starpu_dcabs1_(&zx[i__]) <= smax) { + goto L30; + } + ret_val = i__; + smax = _starpu_dcabs1_(&zx[i__]); +L30: + ; + } + return ret_val; +} /* _starpu_izamax_ */ diff --git a/min-dgels/base/BLAS/SRC/lsame.c b/min-dgels/base/BLAS/SRC/lsame.c new file mode 100644 index 0000000..4f68e69 --- /dev/null +++ b/min-dgels/base/BLAS/SRC/lsame.c @@ -0,0 +1,117 @@ +/* lsame.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +logical _starpu_lsame_(char *ca, char *cb) +{ + /* System generated locals */ + logical ret_val; + + /* Local variables */ + integer inta, intb, zcode; + + +/* -- LAPACK auxiliary routine (version 3.1) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* LSAME returns .TRUE. if CA is the same letter as CB regardless of */ +/* case. */ + +/* Arguments */ +/* ========= */ + +/* CA (input) CHARACTER*1 */ + +/* CB (input) CHARACTER*1 */ +/* CA and CB specify the single characters to be compared. */ + +/* ===================================================================== */ + +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ + +/* Test if the characters are equal */ + + ret_val = *(unsigned char *)ca == *(unsigned char *)cb; + if (ret_val) { + return ret_val; + } + +/* Now test for equivalence if both characters are alphabetic. */ + + zcode = 'Z'; + +/* Use 'Z' rather than 'A' so that ASCII can be detected on Prime */ +/* machines, on which ICHAR returns a value with bit 8 set. */ +/* ICHAR('A') on Prime machines returns 193 which is the same as */ +/* ICHAR('A') on an EBCDIC machine. */ + + inta = *(unsigned char *)ca; + intb = *(unsigned char *)cb; + + if (zcode == 90 || zcode == 122) { + +/* ASCII is assumed - ZCODE is the ASCII code of either lower or */ +/* upper case 'Z'. */ + + if (inta >= 97 && inta <= 122) { + inta += -32; + } + if (intb >= 97 && intb <= 122) { + intb += -32; + } + + } else if (zcode == 233 || zcode == 169) { + +/* EBCDIC is assumed - ZCODE is the EBCDIC code of either lower or */ +/* upper case 'Z'. */ + + if (inta >= 129 && inta <= 137 || inta >= 145 && inta <= 153 || inta + >= 162 && inta <= 169) { + inta += 64; + } + if (intb >= 129 && intb <= 137 || intb >= 145 && intb <= 153 || intb + >= 162 && intb <= 169) { + intb += 64; + } + + } else if (zcode == 218 || zcode == 250) { + +/* ASCII is assumed, on Prime machines - ZCODE is the ASCII code */ +/* plus 128 of either lower or upper case 'Z'. */ + + if (inta >= 225 && inta <= 250) { + inta += -32; + } + if (intb >= 225 && intb <= 250) { + intb += -32; + } + } + ret_val = inta == intb; + +/* RETURN */ + +/* End of LSAME */ + + return ret_val; +} /* _starpu_lsame_ */ diff --git a/min-dgels/base/BLAS/SRC/xerbla.c b/min-dgels/base/BLAS/SRC/xerbla.c new file mode 100644 index 0000000..a15bd28 --- /dev/null +++ b/min-dgels/base/BLAS/SRC/xerbla.c @@ -0,0 +1,77 @@ +/* xerbla.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" +#include "stdio.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +/* Subroutine */ int _starpu_xerbla_(char *srname, integer *info) +{ + /* Format strings */ + static char fmt_9999[] = "(\002 ** On entry to \002,a,\002 parameter num" + "ber \002,i2,\002 had \002,\002an illegal value\002)"; + + /* Builtin functions */ + integer s_wsfe(cilist *), i_len_trim(char *, ftnlen), do_fio(integer *, + char *, ftnlen), e_wsfe(void); + /* Subroutine */ int s_stop(char *, ftnlen); + + /* Fortran I/O blocks */ + static cilist io___1 = { 0, 6, 0, fmt_9999, 0 }; + + + +/* -- LAPACK auxiliary routine (preliminary version) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* XERBLA is an error handler for the LAPACK routines. */ +/* It is called by an LAPACK routine if an input parameter has an */ +/* invalid value. A message is printed and execution stops. */ + +/* Installers may consider modifying the STOP statement in order to */ +/* call system-specific exception-handling facilities. */ + +/* Arguments */ +/* ========= */ + +/* SRNAME (input) CHARACTER*(*) */ +/* The name of the routine which called XERBLA. */ + +/* INFO (input) INTEGER */ +/* The position of the invalid parameter in the parameter list */ +/* of the calling routine. */ + +/* ===================================================================== */ + +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + printf("** On entry to %s, parameter number %ld had an illegal value\n", + srname, *info); + + +/* End of XERBLA */ + + return 0; +} /* _starpu_xerbla_ */ diff --git a/min-dgels/base/BLAS/SRC/xerbla_array.c b/min-dgels/base/BLAS/SRC/xerbla_array.c new file mode 100644 index 0000000..3469039 --- /dev/null +++ b/min-dgels/base/BLAS/SRC/xerbla_array.c @@ -0,0 +1,102 @@ +/* _starpu_xerbla_array.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_xerbla_array__(char *srname_array__, integer * + srname_len__, integer *info, ftnlen srname_array_len) +{ + /* System generated locals */ + integer i__1, i__2, i__3; + + /* Builtin functions */ + /* Subroutine */ int s_copy(char *, char *, ftnlen, ftnlen); + integer i_len(char *, ftnlen); + + /* Local variables */ + integer i__; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + char srname[32]; + + +/* -- LAPACK auxiliary routine (version 3.0) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., */ +/* September 19, 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* XERBLA_ARRAY assists other languages in calling XERBLA, the LAPACK */ +/* and BLAS error handler. Rather than taking a Fortran string argument */ +/* as the function's name, XERBLA_ARRAY takes an array of single */ +/* characters along with the array's length. XERBLA_ARRAY then copies */ +/* up to 32 characters of that array into a Fortran string and passes */ +/* that to XERBLA. If called with a non-positive SRNAME_LEN, */ +/* XERBLA_ARRAY will call XERBLA with a string of all blank characters. */ + +/* Say some macro or other device makes XERBLA_ARRAY available to C99 */ +/* by a name lapack_xerbla and with a common Fortran calling convention. */ +/* Then a C99 program could invoke XERBLA via: */ +/* { */ +/* int flen = strlen(__func__); */ +/* lapack_xerbla(__func__, &flen, &info); */ +/* } */ + +/* Providing XERBLA_ARRAY is not necessary for intercepting LAPACK */ +/* errors. XERBLA_ARRAY calls XERBLA. */ + +/* Arguments */ +/* ========= */ + +/* SRNAME_ARRAY (input) CHARACTER(1) array, dimension (SRNAME_LEN) */ +/* The name of the routine which called XERBLA_ARRAY. */ + +/* SRNAME_LEN (input) INTEGER */ +/* The length of the name in SRNAME_ARRAY. */ + +/* INFO (input) INTEGER */ +/* The position of the invalid parameter in the parameter list */ +/* of the calling routine. */ + +/* ===================================================================== */ + +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + /* Parameter adjustments */ + --srname_array__; + + /* Function Body */ + s_copy(srname, "", (ftnlen)32, (ftnlen)0); +/* Computing MIN */ + i__2 = *srname_len__, i__3 = i_len(srname, (ftnlen)32); + i__1 = min(i__2,i__3); + for (i__ = 1; i__ <= i__1; ++i__) { + *(unsigned char *)&srname[i__ - 1] = *(unsigned char *)& + srname_array__[i__]; + } + _starpu_xerbla_(srname, info); + return 0; +} /* _starpu_xerbla_array__ */ diff --git a/min-dgels/base/BLAS/WRAP/Makefile b/min-dgels/base/BLAS/WRAP/Makefile new file mode 100644 index 0000000..d13fc97 --- /dev/null +++ b/min-dgels/base/BLAS/WRAP/Makefile @@ -0,0 +1,23 @@ +TOPDIR=../.. +include $(TOPDIR)/make.inc + +####################################################################### +# This is the makefile to create a wrapper for the CBLAS. +####################################################################### + +all: libcblaswr.a + +libcblaswr.a: cblaswr.o + $(ARCH) $(ARCHFLAGS) libcblaswr.a cblaswr.o + $(RANLIB) libcblaswr.a + +libfblaswr.a: fblaswr.o + $(ARCH) $(ARCHFLAGS) libfblaswr.a fblaswr.o + $(RANLIB) libfblaswr.a + +clean: + rm -f *.o *.a + +.c.o: + $(CC) $(CFLAGS) -c $*.c + diff --git a/min-dgels/base/BLAS/WRAP/README b/min-dgels/base/BLAS/WRAP/README new file mode 100644 index 0000000..098d617 --- /dev/null +++ b/min-dgels/base/BLAS/WRAP/README @@ -0,0 +1,30 @@ +f2c'd BLAS wrapper + + +The f2c translated BLAS interfaces used by CLAPACK (and other f2c'd codes) +unfortunately often don't quite match the Fortran interfaces on various +platforms. Consequently, it is difficult to use f2c'd codes with high +performance BLAS routines, which may substantially impede the performance +of these codes. These simple wrappers provide a way around this difficulty +by providing f2c style interfaces (preceded with "f2c_" to avoid name +collisions) that call through to an underlying CBLAS or F77 BLAS. + + +f2c.h: The f2c header file + +blaswrap.h: A header file to be included in f2c codes that will use the + wrapper. Just #include it at the top of an f2c generated file. + +cblaswr.c: A wrapper around the CBLAS interface. This interface is + provided, for instance, by ATLAS (see www.netlib.org/atlas) + +fblaswr.c: A sample wrapper around a conventional Fortran BLAS interface. + This works on a Sun platform, but will require substantial tinkering + on platforms with different Fortran calling conventions. + +cblas.h: A header file for the CBLAS interface. + +fblaswr.h: A header file for the (Sun-style) Fortran BLAS interface + +[cz]rotg.c: Complex Givens rotation routines. For some reason, C interfaces + to these routines were not specified in the CBLAS interface. diff --git a/min-dgels/base/BLAS/WRAP/cblas.h b/min-dgels/base/BLAS/WRAP/cblas.h new file mode 100644 index 0000000..9571f2b --- /dev/null +++ b/min-dgels/base/BLAS/WRAP/cblas.h @@ -0,0 +1,577 @@ +#ifndef CBLAS_H +#define CBLAS_H + +#include + +#define CBLAS_INDEX size_t + +enum CBLAS_ORDER {CblasRowMajor=101, CblasColMajor=102}; +enum CBLAS_TRANSPOSE {CblasNoTrans=111, CblasTrans=112, CblasConjTrans=113}; +enum CBLAS_UPLO {CblasUpper=121, CblasLower=122}; +enum CBLAS_DIAG {CblasNonUnit=131, CblasUnit=132}; +enum CBLAS_SIDE {CblasLeft=141, CblasRight=142}; + +int cblas_errprn(int ierr, int info, char *form, ...); + +/* + * =========================================================================== + * Prototypes for level 1 BLAS functions (complex are recast as routines) + * =========================================================================== + */ +float cblas_sdsdot(const int N, const float alpha, const float *X, + const int incX, const float *Y, const int incY); +double cblas_dsdot(const int N, const float *X, const int incX, const float *Y, + const int incY); +float cblas_sdot(const int N, const float *X, const int incX, + const float *Y, const int incY); +double cblas_ddot(const int N, const double *X, const int incX, + const double *Y, const int incY); +/* + * Functions having prefixes Z and C only + */ +void _starpu_cblas_cdotu_sub(const int N, const void *X, const int incX, + const void *Y, const int incY, void *dotu); +void _starpu_cblas_cdotc_sub(const int N, const void *X, const int incX, + const void *Y, const int incY, void *dotc); + +void _starpu_cblas_zdotu_sub(const int N, const void *X, const int incX, + const void *Y, const int incY, void *dotu); +void _starpu_cblas_zdotc_sub(const int N, const void *X, const int incX, + const void *Y, const int incY, void *dotc); + + +/* + * Functions having prefixes S D SC DZ + */ +float cblas_snrm2(const int N, const float *X, const int incX); +float cblas_sasum(const int N, const float *X, const int incX); + +double cblas_dnrm2(const int N, const double *X, const int incX); +double cblas_dasum(const int N, const double *X, const int incX); + +float cblas_scnrm2(const int N, const void *X, const int incX); +float cblas_scasum(const int N, const void *X, const int incX); + +double cblas_dznrm2(const int N, const void *X, const int incX); +double cblas_dzasum(const int N, const void *X, const int incX); + + +/* + * Functions having standard 4 prefixes (S D C Z) + */ +CBLAS_INDEX cblas_isamax(const int N, const float *X, const int incX); +CBLAS_INDEX cblas_idamax(const int N, const double *X, const int incX); +CBLAS_INDEX cblas_icamax(const int N, const void *X, const int incX); +CBLAS_INDEX cblas_izamax(const int N, const void *X, const int incX); + + +/* + * =========================================================================== + * Prototypes for level 0 BLAS routines + * =========================================================================== + */ +void cblas_srotg(float a, float b, float c, float s); +void cblas_crotg(complex a, complex b, complex c, float s); +void cblas_drotg(double a, double b, double c, double s); +void cblas_zrotg(doublecomplex a, doublecomplex b, doublecomplex c, double s); + + +/* + * =========================================================================== + * Prototypes for level 1 BLAS routines + * =========================================================================== + */ + +/* + * Routines with standard 4 prefixes (s, d, c, z) + */ +void cblas_sswap(const int N, float *X, const int incX, + float *Y, const int incY); +void cblas_scopy(const int N, const float *X, const int incX, + float *Y, const int incY); +void cblas_saxpy(const int N, const float alpha, const float *X, + const int incX, float *Y, const int incY); + +void cblas_dswap(const int N, double *X, const int incX, + double *Y, const int incY); +void cblas_dcopy(const int N, const double *X, const int incX, + double *Y, const int incY); +void cblas_daxpy(const int N, const double alpha, const double *X, + const int incX, double *Y, const int incY); + +void cblas_cswap(const int N, void *X, const int incX, + void *Y, const int incY); +void cblas_ccopy(const int N, const void *X, const int incX, + void *Y, const int incY); +void cblas_caxpy(const int N, const void *alpha, const void *X, + const int incX, void *Y, const int incY); + +void cblas_zswap(const int N, void *X, const int incX, + void *Y, const int incY); +void cblas_zcopy(const int N, const void *X, const int incX, + void *Y, const int incY); +void cblas_zaxpy(const int N, const void *alpha, const void *X, + const int incX, void *Y, const int incY); + + +/* + * Routines with S and D prefix only + */ +void cblas_srotmg(float *d1, float *d2, float *b1, const float b2, float *P); +void cblas_srot(const int N, float *X, const int incX, + float *Y, const int incY, const float c, const float s); +void cblas_srotm(const int N, float *X, const int incX, + float *Y, const int incY, const float *P); + +void cblas_drotmg(double *d1, double *d2, double *b1, const double b2, double *P); +void cblas_drot(const int N, double *X, const int incX, + double *Y, const int incY, const double c, const double s); +void cblas_drotm(const int N, double *X, const int incX, + double *Y, const int incY, const double *P); + + +/* + * Routines with S D C Z CS and ZD prefixes + */ +void cblas_sscal(const int N, const float alpha, float *X, const int incX); +void cblas_dscal(const int N, const double alpha, double *X, const int incX); +void cblas_cscal(const int N, const void *alpha, void *X, const int incX); +void cblas_zscal(const int N, const void *alpha, void *X, const int incX); +void cblas_csscal(const int N, const float alpha, void *X, const int incX); +void cblas_zdscal(const int N, const double alpha, void *X, const int incX); + +/* + * =========================================================================== + * Prototypes for level 2 BLAS + * =========================================================================== + */ + +/* + * Routines with standard 4 prefixes (S, D, C, Z) + */ +void cblas_sgemv(const enum CBLAS_ORDER Order, + const enum CBLAS_TRANSPOSE TransA, const int M, const int N, + const float alpha, const float *A, const int lda, + const float *X, const int incX, const float beta, + float *Y, const int incY); +void cblas_sgbmv(const enum CBLAS_ORDER Order, + const enum CBLAS_TRANSPOSE TransA, const int M, const int N, + const int KL, const int KU, const float alpha, + const float *A, const int lda, const float *X, + const int incX, const float beta, float *Y, const int incY); +void cblas_strmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const float *A, const int lda, + float *X, const int incX); +void cblas_stbmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const int K, const float *A, const int lda, + float *X, const int incX); +void cblas_stpmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const float *Ap, float *X, const int incX); +void cblas_strsv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const float *A, const int lda, float *X, + const int incX); +void cblas_stbsv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const int K, const float *A, const int lda, + float *X, const int incX); +void cblas_stpsv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const float *Ap, float *X, const int incX); + +void cblas_dgemv(const enum CBLAS_ORDER Order, + const enum CBLAS_TRANSPOSE TransA, const int M, const int N, + const double alpha, const double *A, const int lda, + const double *X, const int incX, const double beta, + double *Y, const int incY); +void cblas_dgbmv(const enum CBLAS_ORDER Order, + const enum CBLAS_TRANSPOSE TransA, const int M, const int N, + const int KL, const int KU, const double alpha, + const double *A, const int lda, const double *X, + const int incX, const double beta, double *Y, const int incY); +void cblas_dtrmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const double *A, const int lda, + double *X, const int incX); +void cblas_dtbmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const int K, const double *A, const int lda, + double *X, const int incX); +void cblas_dtpmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const double *Ap, double *X, const int incX); +void cblas_dtrsv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const double *A, const int lda, double *X, + const int incX); +void cblas_dtbsv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const int K, const double *A, const int lda, + double *X, const int incX); +void cblas_dtpsv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const double *Ap, double *X, const int incX); + +void cblas_cgemv(const enum CBLAS_ORDER Order, + const enum CBLAS_TRANSPOSE TransA, const int M, const int N, + const void *alpha, const void *A, const int lda, + const void *X, const int incX, const void *beta, + void *Y, const int incY); +void cblas_cgbmv(const enum CBLAS_ORDER Order, + const enum CBLAS_TRANSPOSE TransA, const int M, const int N, + const int KL, const int KU, const void *alpha, + const void *A, const int lda, const void *X, + const int incX, const void *beta, void *Y, const int incY); +void cblas_ctrmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const void *A, const int lda, + void *X, const int incX); +void cblas_ctbmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const int K, const void *A, const int lda, + void *X, const int incX); +void cblas_ctpmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const void *Ap, void *X, const int incX); +void cblas_ctrsv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const void *A, const int lda, void *X, + const int incX); +void cblas_ctbsv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const int K, const void *A, const int lda, + void *X, const int incX); +void cblas_ctpsv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const void *Ap, void *X, const int incX); + +void cblas_zgemv(const enum CBLAS_ORDER Order, + const enum CBLAS_TRANSPOSE TransA, const int M, const int N, + const void *alpha, const void *A, const int lda, + const void *X, const int incX, const void *beta, + void *Y, const int incY); +void cblas_zgbmv(const enum CBLAS_ORDER Order, + const enum CBLAS_TRANSPOSE TransA, const int M, const int N, + const int KL, const int KU, const void *alpha, + const void *A, const int lda, const void *X, + const int incX, const void *beta, void *Y, const int incY); +void cblas_ztrmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const void *A, const int lda, + void *X, const int incX); +void cblas_ztbmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const int K, const void *A, const int lda, + void *X, const int incX); +void cblas_ztpmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const void *Ap, void *X, const int incX); +void cblas_ztrsv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const void *A, const int lda, void *X, + const int incX); +void cblas_ztbsv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const int K, const void *A, const int lda, + void *X, const int incX); +void cblas_ztpsv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const void *Ap, void *X, const int incX); + + +/* + * Routines with S and D prefixes only + */ +void cblas_ssymv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const int N, const float alpha, const float *A, + const int lda, const float *X, const int incX, + const float beta, float *Y, const int incY); +void cblas_ssbmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const int N, const int K, const float alpha, const float *A, + const int lda, const float *X, const int incX, + const float beta, float *Y, const int incY); +void cblas_sspmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const int N, const float alpha, const float *Ap, + const float *X, const int incX, + const float beta, float *Y, const int incY); +void cblas_sger(const enum CBLAS_ORDER Order, const int M, const int N, + const float alpha, const float *X, const int incX, + const float *Y, const int incY, float *A, const int lda); +void cblas_ssyr(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const int N, const float alpha, const float *X, + const int incX, float *A, const int lda); +void cblas_sspr(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const int N, const float alpha, const float *X, + const int incX, float *Ap); +void cblas_ssyr2(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const int N, const float alpha, const float *X, + const int incX, const float *Y, const int incY, float *A, + const int lda); +void cblas_sspr2(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const int N, const float alpha, const float *X, + const int incX, const float *Y, const int incY, float *A); + +void cblas_dsymv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const int N, const double alpha, const double *A, + const int lda, const double *X, const int incX, + const double beta, double *Y, const int incY); +void cblas_dsbmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const int N, const int K, const double alpha, const double *A, + const int lda, const double *X, const int incX, + const double beta, double *Y, const int incY); +void cblas_dspmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const int N, const double alpha, const double *Ap, + const double *X, const int incX, + const double beta, double *Y, const int incY); +void cblas_dger(const enum CBLAS_ORDER Order, const int M, const int N, + const double alpha, const double *X, const int incX, + const double *Y, const int incY, double *A, const int lda); +void cblas_dsyr(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const int N, const double alpha, const double *X, + const int incX, double *A, const int lda); +void cblas_dspr(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const int N, const double alpha, const double *X, + const int incX, double *Ap); +void cblas_dsyr2(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const int N, const double alpha, const double *X, + const int incX, const double *Y, const int incY, double *A, + const int lda); +void cblas_dspr2(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const int N, const double alpha, const double *X, + const int incX, const double *Y, const int incY, double *A); + + +/* + * Routines with C and Z prefixes only + */ +void cblas_chemv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const int N, const void *alpha, const void *A, + const int lda, const void *X, const int incX, + const void *beta, void *Y, const int incY); +void cblas_chbmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const int N, const int K, const void *alpha, const void *A, + const int lda, const void *X, const int incX, + const void *beta, void *Y, const int incY); +void cblas_chpmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const int N, const void *alpha, const void *Ap, + const void *X, const int incX, + const void *beta, void *Y, const int incY); +void cblas_cgeru(const enum CBLAS_ORDER Order, const int M, const int N, + const void *alpha, const void *X, const int incX, + const void *Y, const int incY, void *A, const int lda); +void cblas_cgerc(const enum CBLAS_ORDER Order, const int M, const int N, + const void *alpha, const void *X, const int incX, + const void *Y, const int incY, void *A, const int lda); +void cblas_cher(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const int N, const float alpha, const void *X, const int incX, + void *A, const int lda); +void cblas_chpr(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const int N, const float alpha, const void *X, + const int incX, void *A); +void cblas_cher2(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const int N, + const void *alpha, const void *X, const int incX, + const void *Y, const int incY, void *A, const int lda); +void cblas_chpr2(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const int N, + const void *alpha, const void *X, const int incX, + const void *Y, const int incY, void *Ap); + +void cblas_zhemv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const int N, const void *alpha, const void *A, + const int lda, const void *X, const int incX, + const void *beta, void *Y, const int incY); +void cblas_zhbmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const int N, const int K, const void *alpha, const void *A, + const int lda, const void *X, const int incX, + const void *beta, void *Y, const int incY); +void cblas_zhpmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const int N, const void *alpha, const void *Ap, + const void *X, const int incX, + const void *beta, void *Y, const int incY); +void cblas_zgeru(const enum CBLAS_ORDER Order, const int M, const int N, + const void *alpha, const void *X, const int incX, + const void *Y, const int incY, void *A, const int lda); +void cblas_zgerc(const enum CBLAS_ORDER Order, const int M, const int N, + const void *alpha, const void *X, const int incX, + const void *Y, const int incY, void *A, const int lda); +void cblas_zher(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const int N, const double alpha, const void *X, const int incX, + void *A, const int lda); +void cblas_zhpr(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const int N, const double alpha, const void *X, + const int incX, void *A); +void cblas_zher2(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const int N, + const void *alpha, const void *X, const int incX, + const void *Y, const int incY, void *A, const int lda); +void cblas_zhpr2(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const int N, + const void *alpha, const void *X, const int incX, + const void *Y, const int incY, void *Ap); + +/* + * =========================================================================== + * Prototypes for level 3 BLAS + * =========================================================================== + */ + +/* + * Routines with standard 4 prefixes (S, D, C, Z) + */ +void cblas_sgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, + const enum CBLAS_TRANSPOSE TransB, const int M, const int N, + const int K, const float alpha, const float *A, + const int lda, const float *B, const int ldb, + const float beta, float *C, const int ldc); +void cblas_ssymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, + const enum CBLAS_UPLO Uplo, const int M, const int N, + const float alpha, const float *A, const int lda, + const float *B, const int ldb, const float beta, + float *C, const int ldc); +void cblas_ssyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE Trans, const int N, const int K, + const float alpha, const float *A, const int lda, + const float beta, float *C, const int ldc); +void cblas_ssyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE Trans, const int N, const int K, + const float alpha, const float *A, const int lda, + const float *B, const int ldb, const float beta, + float *C, const int ldc); +void cblas_strmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, + const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, + const enum CBLAS_DIAG Diag, const int M, const int N, + const float alpha, const float *A, const int lda, + float *B, const int ldb); +void cblas_strsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, + const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, + const enum CBLAS_DIAG Diag, const int M, const int N, + const float alpha, const float *A, const int lda, + float *B, const int ldb); + +void cblas_dgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, + const enum CBLAS_TRANSPOSE TransB, const int M, const int N, + const int K, const double alpha, const double *A, + const int lda, const double *B, const int ldb, + const double beta, double *C, const int ldc); +void cblas_dsymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, + const enum CBLAS_UPLO Uplo, const int M, const int N, + const double alpha, const double *A, const int lda, + const double *B, const int ldb, const double beta, + double *C, const int ldc); +void cblas_dsyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE Trans, const int N, const int K, + const double alpha, const double *A, const int lda, + const double beta, double *C, const int ldc); +void cblas_dsyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE Trans, const int N, const int K, + const double alpha, const double *A, const int lda, + const double *B, const int ldb, const double beta, + double *C, const int ldc); +void cblas_dtrmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, + const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, + const enum CBLAS_DIAG Diag, const int M, const int N, + const double alpha, const double *A, const int lda, + double *B, const int ldb); +void cblas_dtrsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, + const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, + const enum CBLAS_DIAG Diag, const int M, const int N, + const double alpha, const double *A, const int lda, + double *B, const int ldb); + +void cblas_cgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, + const enum CBLAS_TRANSPOSE TransB, const int M, const int N, + const int K, const void *alpha, const void *A, + const int lda, const void *B, const int ldb, + const void *beta, void *C, const int ldc); +void cblas_csymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, + const enum CBLAS_UPLO Uplo, const int M, const int N, + const void *alpha, const void *A, const int lda, + const void *B, const int ldb, const void *beta, + void *C, const int ldc); +void cblas_csyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE Trans, const int N, const int K, + const void *alpha, const void *A, const int lda, + const void *beta, void *C, const int ldc); +void cblas_csyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE Trans, const int N, const int K, + const void *alpha, const void *A, const int lda, + const void *B, const int ldb, const void *beta, + void *C, const int ldc); +void cblas_ctrmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, + const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, + const enum CBLAS_DIAG Diag, const int M, const int N, + const void *alpha, const void *A, const int lda, + void *B, const int ldb); +void cblas_ctrsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, + const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, + const enum CBLAS_DIAG Diag, const int M, const int N, + const void *alpha, const void *A, const int lda, + void *B, const int ldb); + +void cblas_zgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, + const enum CBLAS_TRANSPOSE TransB, const int M, const int N, + const int K, const void *alpha, const void *A, + const int lda, const void *B, const int ldb, + const void *beta, void *C, const int ldc); +void cblas_zsymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, + const enum CBLAS_UPLO Uplo, const int M, const int N, + const void *alpha, const void *A, const int lda, + const void *B, const int ldb, const void *beta, + void *C, const int ldc); +void cblas_zsyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE Trans, const int N, const int K, + const void *alpha, const void *A, const int lda, + const void *beta, void *C, const int ldc); +void cblas_zsyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE Trans, const int N, const int K, + const void *alpha, const void *A, const int lda, + const void *B, const int ldb, const void *beta, + void *C, const int ldc); +void cblas_ztrmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, + const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, + const enum CBLAS_DIAG Diag, const int M, const int N, + const void *alpha, const void *A, const int lda, + void *B, const int ldb); +void cblas_ztrsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, + const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, + const enum CBLAS_DIAG Diag, const int M, const int N, + const void *alpha, const void *A, const int lda, + void *B, const int ldb); + + +/* + * Routines with prefixes C and Z only + */ +void cblas_chemm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, + const enum CBLAS_UPLO Uplo, const int M, const int N, + const void *alpha, const void *A, const int lda, + const void *B, const int ldb, const void *beta, + void *C, const int ldc); +void cblas_cherk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE Trans, const int N, const int K, + const float alpha, const void *A, const int lda, + const float beta, void *C, const int ldc); +void cblas_cher2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE Trans, const int N, const int K, + const void *alpha, const void *A, const int lda, + const void *B, const int ldb, const float beta, + void *C, const int ldc); +void cblas_zhemm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, + const enum CBLAS_UPLO Uplo, const int M, const int N, + const void *alpha, const void *A, const int lda, + const void *B, const int ldb, const void *beta, + void *C, const int ldc); +void cblas_zherk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE Trans, const int N, const int K, + const double alpha, const void *A, const int lda, + const double beta, void *C, const int ldc); +void cblas_zher2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE Trans, const int N, const int K, + const void *alpha, const void *A, const int lda, + const void *B, const int ldb, const double beta, + void *C, const int ldc); + +int cblas_errprn(int ierr, int info, char *form, ...); + + +#endif diff --git a/min-dgels/base/BLAS/WRAP/cblaswr.c b/min-dgels/base/BLAS/WRAP/cblaswr.c new file mode 100644 index 0000000..f80943d --- /dev/null +++ b/min-dgels/base/BLAS/WRAP/cblaswr.c @@ -0,0 +1,1744 @@ +#include "f2c.h" +#include "cblas.h" + +/* +#define CBLAS_INDEX size_t + +enum CBLAS_ORDER {CblasRowMajor=101, CblasColMajor=102}; +enum CBLAS_TRANSPOSE {CblasNoTrans=111, CblasTrans=112, CblasConjTrans=113}; +enum CBLAS_UPLO {CblasUpper=121, CblasLower=122}; +enum CBLAS_DIAG {CblasNonUnit=131, CblasUnit=132}; +enum CBLAS_SIDE {CblasLeft=141, CblasRight=142}; +*/ + +#define CVT_TRANSPOSE(c) \ + (((c) == 'N' || (c) == 'n') ? CblasNoTrans : \ + ((c) == 'T' || (c) == 't') ? CblasTrans : \ + ((c) == 'C' || (c) == 'c') ? CblasConjTrans : \ + -1) + +#define CVT_UPLO(c) \ + (((c) == 'U' || (c) == 'u') ? CblasUpper : \ + ((c) == 'L' || (c) == 'l') ? CblasLower : \ + -1) + +#define CVT_DIAG(c) \ + (((c) == 'U' || (c) == 'u') ? CblasUnit : \ + ((c) == 'N' || (c) == 'n') ? CblasNonUnit : \ + -1) + +#define CVT_SIDE(c) \ + (((c) == 'L' || (c) == 'l') ? CblasLeft : \ + ((c) == 'R' || (c) == 'r') ? CblasRight : \ + -1) + + + +/* + * =========================================================================== + * Prototypes for level 1 BLAS functions (complex are recast as routines) + * =========================================================================== + */ + +doublereal +f2c_sdot(integer* N, + real* X, integer* incX, + real* Y, integer* incY) +{ + return cblas_sdot(*N, X, *incX, Y, *incY); +} + +doublereal +f2c_ddot(integer* N, + doublereal* X, integer* incX, + doublereal* Y, integer* incY) +{ + return cblas_ddot(*N, X, *incX, Y, *incY); +} + + +/* + * Functions having prefixes Z and C only + */ + +void +f2c_cdotu(complex* retval, + integer* N, + complex* X, integer* incX, + complex* Y, integer* incY) +{ + _starpu_cblas_cdotu_sub(*N, X, *incX, Y, *incY, retval); +} + +void +f2c_cdotc(complex* retval, + integer* N, + complex* X, integer* incX, + complex* Y, integer* incY) +{ + _starpu_cblas_cdotc_sub(*N, X, *incX, Y, *incY, retval); +} + +void +f2c_zdotu(doublecomplex* retval, + integer* N, + doublecomplex* X, integer* incX, + doublecomplex* Y, integer* incY) +{ + _starpu_cblas_zdotu_sub(*N, X, *incX, Y, *incY, retval); +} + +void +f2c_zdotc(doublecomplex* retval, + integer* N, + doublecomplex* X, integer* incX, + doublecomplex* Y, integer* incY) +{ + _starpu_cblas_zdotc_sub(*N, X, *incX, Y, *incY, retval); +} + + +/* + * Functions having prefixes S D SC DZ + */ + +doublereal +f2c_snrm2(integer* N, + real* X, integer* incX) +{ + return cblas_snrm2(*N, X, *incX); +} + +doublereal +f2c_sasum(integer* N, + real* X, integer* incX) +{ + return cblas_sasum(*N, X, *incX); +} + +doublereal +f2c_dnrm2(integer* N, + doublereal* X, integer* incX) +{ + return cblas_dnrm2(*N, X, *incX); +} + +doublereal +f2c_dasum(integer* N, + doublereal* X, integer* incX) +{ + return cblas_dasum(*N, X, *incX); +} + +doublereal +f2c_scnrm2(integer* N, + complex* X, integer* incX) +{ + return cblas_scnrm2(*N, X, *incX); +} + +doublereal +f2c_scasum(integer* N, + complex* X, integer* incX) +{ + return cblas_scasum(*N, X, *incX); +} + +doublereal +f2c_dznrm2(integer* N, + doublecomplex* X, integer* incX) +{ + return cblas_dznrm2(*N, X, *incX); +} + +doublereal +f2c_dzasum(integer* N, + doublecomplex* X, integer* incX) +{ + return cblas_dzasum(*N, X, *incX); +} + + +/* + * Functions having standard 4 prefixes (S D C Z) + */ +integer +f2c_isamax(integer* N, + real* X, integer* incX) +{ + if (*N == 0) + return 0; + return (integer) cblas_isamax(*N, X, *incX) + 1; +} + +integer +f2c_idamax(integer* N, + doublereal* X, integer* incX) +{ + if (*N == 0) + return 0; + return (integer) cblas_idamax(*N, X, *incX) + 1; +} + +integer +f2c_icamax(integer* N, + complex* X, integer* incX) +{ + if (*N == 0) + return 0; + return (integer) cblas_icamax(*N, X, *incX) + 1; +} + +integer +f2c_izamax(integer* N, + doublecomplex* X, integer* incX) +{ + if (*N == 0) + return 0; + return (integer) cblas_izamax(*N, X, *incX) + 1; +} + +/* + * =========================================================================== + * Prototypes for level 0 BLAS routines + * =========================================================================== + */ +int +f2c_srotg(real* a, + real* b, + real* c, + real* s) +{ + cblas_srotg(*a, *b, *c, *s); + return 0; +} + +int +f2c_crotg(complex* CA, + complex* CB, + complex* C, + real* S) +{ + cblas_crotg(*CA, *CB, *C, *S); + return 0; +} + +int +f2c_drotg(doublereal* a, + doublereal* b, + doublereal* c, + doublereal* s) +{ + cblas_drotg(*a, *b, *c, *s); + return 0; +} +int +f2c_zrotg(doublecomplex* CA, + doublecomplex* CB, + doublecomplex* C, + doublereal* S) +{ + cblas_zrotg(*CA, *CB, *C, *S); + return 0; +} +/* + * =========================================================================== + * Prototypes for level 1 BLAS routines + * =========================================================================== + */ + +/* + * Routines with standard 4 prefixes (s, d, c, z) + */ + +int +f2c_sswap(integer* N, + real* X, integer* incX, + real* Y, integer* incY) +{ + cblas_sswap(*N, X, *incX, Y, *incY); + return 0; +} + +int +f2c_scopy(integer* N, + real* X, integer* incX, + real* Y, integer* incY) +{ + cblas_scopy(*N, X, *incX, Y, *incY); + return 0; +} + +int +f2c_saxpy(integer* N, + real* alpha, + real* X, integer* incX, + real* Y, integer* incY) +{ + cblas_saxpy(*N, *alpha, X, *incX, Y, *incY); + return 0; +} + +int +f2c_dswap(integer* N, + doublereal* X, integer* incX, + doublereal* Y, integer* incY) +{ + cblas_dswap(*N, X, *incX, Y, *incY); + return 0; +} + +int +f2c_dcopy(integer* N, + doublereal* X, integer* incX, + doublereal* Y, integer* incY) +{ + cblas_dcopy(*N, X, *incX, Y, *incY); + return 0; +} + +int +f2c_daxpy(integer* N, + doublereal* alpha, + doublereal* X, integer* incX, + doublereal* Y, integer* incY) +{ + cblas_daxpy(*N, *alpha, X, *incX, Y, *incY); + return 0; +} + +int +f2c_cswap(integer* N, + complex* X, integer* incX, + complex* Y, integer* incY) +{ + cblas_cswap(*N, X, *incX, Y, *incY); + return 0; +} + +int +f2c_ccopy(integer* N, + complex* X, integer* incX, + complex* Y, integer* incY) +{ + cblas_ccopy(*N, X, *incX, Y, *incY); + return 0; +} + +int +f2c_caxpy(integer* N, + complex* alpha, + complex* X, integer* incX, + complex* Y, integer* incY) +{ + cblas_caxpy(*N, alpha, X, *incX, Y, *incY); + return 0; +} + +int +f2c_zswap(integer* N, + doublecomplex* X, integer* incX, + doublecomplex* Y, integer* incY) +{ + cblas_zswap(*N, X, *incX, Y, *incY); + return 0; +} + +int +f2c_zcopy(integer* N, + doublecomplex* X, integer* incX, + doublecomplex* Y, integer* incY) +{ + cblas_zcopy(*N, X, *incX, Y, *incY); + return 0; +} + +int +f2c_zaxpy(integer* N, + doublecomplex* alpha, + doublecomplex* X, integer* incX, + doublecomplex* Y, integer* incY) +{ + cblas_zaxpy(*N, alpha, X, *incX, Y, *incY); + return 0; +} + + +/* + * Routines with S and D prefix only + */ + + + +int +f2c_srot(integer* N, + real* X, integer* incX, + real* Y, integer* incY, + real* c, real* s) +{ + cblas_srot(*N, X, *incX, Y, *incY, *c, *s); + return 0; +} + +int +f2c_drot(integer* N, + doublereal* X, integer* incX, + doublereal* Y, integer* incY, + doublereal* c, doublereal* s) +{ + cblas_drot(*N, X, *incX, Y, *incY, *c, *s); + return 0; +} + + +/* + * Routines with S D C Z CS and ZD prefixes + */ + +int +f2c_sscal(integer* N, + real* alpha, + real* X, integer* incX) +{ + cblas_sscal(*N, *alpha, X, *incX); + return 0; +} + +int +f2c_dscal(integer* N, + doublereal* alpha, + doublereal* X, integer* incX) +{ + cblas_dscal(*N, *alpha, X, *incX); + return 0; +} + +int +f2c_cscal(integer* N, + complex* alpha, + complex* X, integer* incX) +{ + cblas_cscal(*N, alpha, X, *incX); + return 0; +} + + +int +f2c_zscal(integer* N, + doublecomplex* alpha, + doublecomplex* X, integer* incX) +{ + cblas_zscal(*N, alpha, X, *incX); + return 0; +} + + +int +f2c_csscal(integer* N, + real* alpha, + complex* X, integer* incX) +{ + cblas_csscal(*N, *alpha, X, *incX); + return 0; +} + + +int +f2c_zdscal(integer* N, + doublereal* alpha, + doublecomplex* X, integer* incX) +{ + cblas_zdscal(*N, *alpha, X, *incX); + return 0; +} + + + +/* + * =========================================================================== + * Prototypes for level 2 BLAS + * =========================================================================== + */ + +/* + * Routines with standard 4 prefixes (S, D, C, Z) + */ +int +f2c_sgemv(char* trans, integer* M, integer* N, + real* alpha, + real* A, integer* lda, + real* X, integer* incX, + real* beta, + real* Y, integer* incY) +{ + cblas_sgemv(CblasColMajor, + CVT_TRANSPOSE(*trans), *M, *N, + *alpha, A, *lda, X, *incX, *beta, Y, *incY); + return 0; +} + +int +f2c_sgbmv(char *trans, integer *M, integer *N, integer *KL, integer *KU, + real *alpha, + real *A, integer *lda, + real *X, integer *incX, + real *beta, + real *Y, integer *incY) +{ + cblas_sgbmv(CblasColMajor, + CVT_TRANSPOSE(*trans), *M, *N, *KL, *KU, + *alpha, A, *lda, X, *incX, *beta, Y, *incY); + return 0; +} + +int +f2c_strmv(char* uplo, char *trans, char* diag, integer *N, + real *A, integer *lda, + real *X, integer *incX) +{ + cblas_strmv(CblasColMajor, + CVT_UPLO(*uplo), CVT_TRANSPOSE(*trans), CVT_DIAG(*diag), + *N, A, *lda, X, *incX); + return 0; +} + +int +f2c_stbmv(char* uplo, char* trans, char* diag, integer* N, integer* K, + real* A, integer* lda, + real* X, integer* incX) +{ + cblas_stbmv(CblasColMajor, + CVT_UPLO(*uplo), CVT_TRANSPOSE(*trans), CVT_DIAG(*diag), + *N, *K, A, *lda, X, *incX); + return 0; +} + +int +f2c_stpmv(char* uplo, char* trans, char* diag, integer* N, + real* Ap, + real* X, integer* incX) +{ + cblas_stpmv(CblasColMajor, + CVT_UPLO(*uplo), CVT_TRANSPOSE(*trans), CVT_DIAG(*diag), + *N, Ap, X, *incX); + return 0; +} + +int +f2c_strsv(char* uplo, char* trans, char* diag, integer* N, + real* A, integer* lda, + real* X, integer* incX) +{ + cblas_strsv(CblasColMajor, + CVT_UPLO(*uplo), CVT_TRANSPOSE(*trans), CVT_DIAG(*diag), + *N, A, *lda, X, *incX); + return 0; +} + +int +f2c_stbsv(char* uplo, char* trans, char* diag, integer* N, integer* K, + real* A, integer* lda, + real* X, integer* incX) +{ + cblas_stbsv(CblasColMajor, + CVT_UPLO(*uplo), CVT_TRANSPOSE(*trans), CVT_DIAG(*diag), + *N, *K, A, *lda, X, *incX); + return 0; +} + +int +f2c_stpsv(char* uplo, char* trans, char* diag, integer* N, + real* Ap, + real* X, integer* incX) +{ + cblas_stpsv(CblasColMajor, + CVT_UPLO(*uplo), CVT_TRANSPOSE(*trans), CVT_DIAG(*diag), + *N, Ap, X, *incX); + return 0; +} + + + +int +f2c_dgemv(char* trans, integer* M, integer* N, + doublereal* alpha, + doublereal* A, integer* lda, + doublereal* X, integer* incX, + doublereal* beta, + doublereal* Y, integer* incY) +{ + cblas_dgemv(CblasColMajor, + CVT_TRANSPOSE(*trans), *M, *N, + *alpha, A, *lda, X, *incX, *beta, Y, *incY); + return 0; +} + +int +f2c_dgbmv(char *trans, integer *M, integer *N, integer *KL, integer *KU, + doublereal *alpha, + doublereal *A, integer *lda, + doublereal *X, integer *incX, + doublereal *beta, + doublereal *Y, integer *incY) +{ + cblas_dgbmv(CblasColMajor, + CVT_TRANSPOSE(*trans), *M, *N, *KL, *KU, + *alpha, A, *lda, X, *incX, *beta, Y, *incY); + return 0; +} + +int +f2c_dtrmv(char* uplo, char *trans, char* diag, integer *N, + doublereal *A, integer *lda, + doublereal *X, integer *incX) +{ + cblas_dtrmv(CblasColMajor, + CVT_UPLO(*uplo), CVT_TRANSPOSE(*trans), CVT_DIAG(*diag), + *N, A, *lda, X, *incX); + return 0; +} + +int +f2c_dtbmv(char* uplo, char* trans, char* diag, integer* N, integer* K, + doublereal* A, integer* lda, + doublereal* X, integer* incX) +{ + cblas_dtbmv(CblasColMajor, + CVT_UPLO(*uplo), CVT_TRANSPOSE(*trans), CVT_DIAG(*diag), + *N, *K, A, *lda, X, *incX); + return 0; +} + +int +f2c_dtpmv(char* uplo, char* trans, char* diag, integer* N, + doublereal* Ap, + doublereal* X, integer* incX) +{ + cblas_dtpmv(CblasColMajor, + CVT_UPLO(*uplo), CVT_TRANSPOSE(*trans), CVT_DIAG(*diag), + *N, Ap, X, *incX); + return 0; +} + +int +f2c_dtrsv(char* uplo, char* trans, char* diag, integer* N, + doublereal* A, integer* lda, + doublereal* X, integer* incX) +{ + cblas_dtrsv(CblasColMajor, + CVT_UPLO(*uplo), CVT_TRANSPOSE(*trans), CVT_DIAG(*diag), + *N, A, *lda, X, *incX); + return 0; +} + +int +f2c_dtbsv(char* uplo, char* trans, char* diag, integer* N, integer* K, + doublereal* A, integer* lda, + doublereal* X, integer* incX) +{ + cblas_dtbsv(CblasColMajor, + CVT_UPLO(*uplo), CVT_TRANSPOSE(*trans), CVT_DIAG(*diag), + *N, *K, A, *lda, X, *incX); + return 0; +} + +int +f2c_dtpsv(char* uplo, char* trans, char* diag, integer* N, + doublereal* Ap, + doublereal* X, integer* incX) +{ + cblas_dtpsv(CblasColMajor, + CVT_UPLO(*uplo), CVT_TRANSPOSE(*trans), CVT_DIAG(*diag), + *N, Ap, X, *incX); + return 0; +} + + + +int +f2c_cgemv(char* trans, integer* M, integer* N, + complex* alpha, + complex* A, integer* lda, + complex* X, integer* incX, + complex* beta, + complex* Y, integer* incY) +{ + cblas_cgemv(CblasColMajor, + CVT_TRANSPOSE(*trans), *M, *N, + alpha, A, *lda, X, *incX, beta, Y, *incY); + return 0; +} + +int +f2c_cgbmv(char *trans, integer *M, integer *N, integer *KL, integer *KU, + complex *alpha, + complex *A, integer *lda, + complex *X, integer *incX, + complex *beta, + complex *Y, integer *incY) +{ + cblas_cgbmv(CblasColMajor, + CVT_TRANSPOSE(*trans), *M, *N, *KL, *KU, + alpha, A, *lda, X, *incX, beta, Y, *incY); + return 0; +} + +int +f2c_ctrmv(char* uplo, char *trans, char* diag, integer *N, + complex *A, integer *lda, + complex *X, integer *incX) +{ + cblas_ctrmv(CblasColMajor, + CVT_UPLO(*uplo), CVT_TRANSPOSE(*trans), CVT_DIAG(*diag), + *N, A, *lda, X, *incX); + return 0; +} + +int +f2c_ctbmv(char* uplo, char* trans, char* diag, integer* N, integer* K, + complex* A, integer* lda, + complex* X, integer* incX) +{ + cblas_ctbmv(CblasColMajor, + CVT_UPLO(*uplo), CVT_TRANSPOSE(*trans), CVT_DIAG(*diag), + *N, *K, A, *lda, X, *incX); + return 0; +} + +int +f2c_ctpmv(char* uplo, char* trans, char* diag, integer* N, + complex* Ap, + complex* X, integer* incX) +{ + cblas_ctpmv(CblasColMajor, + CVT_UPLO(*uplo), CVT_TRANSPOSE(*trans), CVT_DIAG(*diag), + *N, Ap, X, *incX); + return 0; +} + +int +f2c_ctrsv(char* uplo, char* trans, char* diag, integer* N, + complex* A, integer* lda, + complex* X, integer* incX) +{ + cblas_ctrsv(CblasColMajor, + CVT_UPLO(*uplo), CVT_TRANSPOSE(*trans), CVT_DIAG(*diag), + *N, A, *lda, X, *incX); + return 0; +} + +int +f2c_ctbsv(char* uplo, char* trans, char* diag, integer* N, integer* K, + complex* A, integer* lda, + complex* X, integer* incX) +{ + cblas_ctbsv(CblasColMajor, + CVT_UPLO(*uplo), CVT_TRANSPOSE(*trans), CVT_DIAG(*diag), + *N, *K, A, *lda, X, *incX); + return 0; +} + +int +f2c_ctpsv(char* uplo, char* trans, char* diag, integer* N, + complex* Ap, + complex* X, integer* incX) +{ + cblas_ctpsv(CblasColMajor, + CVT_UPLO(*uplo), CVT_TRANSPOSE(*trans), CVT_DIAG(*diag), + *N, Ap, X, *incX); + return 0; +} + + + +int +f2c_zgemv(char* trans, integer* M, integer* N, + doublecomplex* alpha, + doublecomplex* A, integer* lda, + doublecomplex* X, integer* incX, + doublecomplex* beta, + doublecomplex* Y, integer* incY) +{ + cblas_zgemv(CblasColMajor, + CVT_TRANSPOSE(*trans), *M, *N, + alpha, A, *lda, X, *incX, beta, Y, *incY); + return 0; +} + +int +f2c_zgbmv(char *trans, integer *M, integer *N, integer *KL, integer *KU, + doublecomplex *alpha, + doublecomplex *A, integer *lda, + doublecomplex *X, integer *incX, + doublecomplex *beta, + doublecomplex *Y, integer *incY) +{ + cblas_zgbmv(CblasColMajor, + CVT_TRANSPOSE(*trans), *M, *N, *KL, *KU, + alpha, A, *lda, X, *incX, beta, Y, *incY); + return 0; +} + +int +f2c_ztrmv(char* uplo, char *trans, char* diag, integer *N, + doublecomplex *A, integer *lda, + doublecomplex *X, integer *incX) +{ + cblas_ztrmv(CblasColMajor, + CVT_UPLO(*uplo), CVT_TRANSPOSE(*trans), CVT_DIAG(*diag), + *N, A, *lda, X, *incX); + return 0; +} + +int +f2c_ztbmv(char* uplo, char* trans, char* diag, integer* N, integer* K, + doublecomplex* A, integer* lda, + doublecomplex* X, integer* incX) +{ + cblas_ztbmv(CblasColMajor, + CVT_UPLO(*uplo), CVT_TRANSPOSE(*trans), CVT_DIAG(*diag), + *N, *K, A, *lda, X, *incX); + return 0; +} + +int +f2c_ztpmv(char* uplo, char* trans, char* diag, integer* N, + doublecomplex* Ap, + doublecomplex* X, integer* incX) +{ + cblas_ztpmv(CblasColMajor, + CVT_UPLO(*uplo), CVT_TRANSPOSE(*trans), CVT_DIAG(*diag), + *N, Ap, X, *incX); + return 0; +} + +int +f2c_ztrsv(char* uplo, char* trans, char* diag, integer* N, + doublecomplex* A, integer* lda, + doublecomplex* X, integer* incX) +{ + cblas_ztrsv(CblasColMajor, + CVT_UPLO(*uplo), CVT_TRANSPOSE(*trans), CVT_DIAG(*diag), + *N, A, *lda, X, *incX); + return 0; +} + +int +f2c_ztbsv(char* uplo, char* trans, char* diag, integer* N, integer* K, + doublecomplex* A, integer* lda, + doublecomplex* X, integer* incX) +{ + cblas_ztbsv(CblasColMajor, + CVT_UPLO(*uplo), CVT_TRANSPOSE(*trans), CVT_DIAG(*diag), + *N, *K, A, *lda, X, *incX); + return 0; +} + +int +f2c_ztpsv(char* uplo, char* trans, char* diag, integer* N, + doublecomplex* Ap, + doublecomplex* X, integer* incX) +{ + cblas_ztpsv(CblasColMajor, + CVT_UPLO(*uplo), CVT_TRANSPOSE(*trans), CVT_DIAG(*diag), + *N, Ap, X, *incX); + return 0; +} + + +/* + * Routines with S and D prefixes only + */ + +int +f2c_ssymv(char* uplo, integer* N, + real* alpha, + real* A, integer* lda, + real* X, integer* incX, + real* beta, + real* Y, integer* incY) +{ + cblas_ssymv(CblasColMajor, + CVT_UPLO(*uplo), *N, *alpha, A, *lda, + X, *incX, *beta, Y, *incY); + return 0; +} + +int +f2c_ssbmv(char* uplo, integer* N, integer* K, + real* alpha, + real* A, integer* lda, + real* X, integer* incX, + real* beta, + real* Y, integer* incY) +{ + cblas_ssbmv(CblasColMajor, + CVT_UPLO(*uplo), *N, *K, *alpha, A, *lda, + X, *incX, *beta, Y, *incY); + return 0; +} + +int +f2c_sspmv(char* uplo, integer* N, + real* alpha, + real* Ap, + real* X, integer* incX, + real* beta, + real* Y, integer* incY) +{ + cblas_sspmv(CblasColMajor, + CVT_UPLO(*uplo), *N, *alpha, Ap, + X, *incX, *beta, Y, *incY); + return 0; +} + +int +f2c_sger(integer* M, integer* N, + real* alpha, + real* X, integer* incX, + real* Y, integer* incY, + real* A, integer* lda) +{ + cblas_sger(CblasColMajor, + *M, *N, *alpha, + X, *incX, Y, *incY, A, *lda); + return 0; +} + +int +f2c_ssyr(char* uplo, integer* N, + real* alpha, + real* X, integer* incX, + real* A, integer* lda) +{ + cblas_ssyr(CblasColMajor, + CVT_UPLO(*uplo), *N, *alpha, X, *incX, A, *lda); + return 0; +} + +int +f2c_sspr(char* uplo, integer* N, + real* alpha, + real* X, integer* incX, + real* Ap) +{ + cblas_sspr(CblasColMajor, + CVT_UPLO(*uplo), *N, *alpha, X, *incX, Ap); + return 0; +} + +int +f2c_ssyr2(char* uplo, integer* N, + real* alpha, + real* X, integer* incX, + real* Y, integer* incY, + real* A, integer* lda) +{ + cblas_ssyr2(CblasColMajor, + CVT_UPLO(*uplo), *N, *alpha, + X, *incX, Y, *incY, A, *lda); + return 0; +} + +int +f2c_sspr2(char* uplo, integer* N, + real* alpha, + real* X, integer* incX, + real* Y, integer* incY, + real* A) +{ + cblas_sspr2(CblasColMajor, + CVT_UPLO(*uplo), *N, *alpha, + X, *incX, Y, *incY, A); + return 0; +} + + + +int +f2c_dsymv(char* uplo, integer* N, + doublereal* alpha, + doublereal* A, integer* lda, + doublereal* X, integer* incX, + doublereal* beta, + doublereal* Y, integer* incY) +{ + cblas_dsymv(CblasColMajor, + CVT_UPLO(*uplo), *N, *alpha, A, *lda, + X, *incX, *beta, Y, *incY); + return 0; +} + +int +f2c_dsbmv(char* uplo, integer* N, integer* K, + doublereal* alpha, + doublereal* A, integer* lda, + doublereal* X, integer* incX, + doublereal* beta, + doublereal* Y, integer* incY) +{ + cblas_dsbmv(CblasColMajor, + CVT_UPLO(*uplo), *N, *K, *alpha, A, *lda, + X, *incX, *beta, Y, *incY); + return 0; +} + +int +f2c_dspmv(char* uplo, integer* N, + doublereal* alpha, + doublereal* Ap, + doublereal* X, integer* incX, + doublereal* beta, + doublereal* Y, integer* incY) +{ + cblas_dspmv(CblasColMajor, + CVT_UPLO(*uplo), *N, *alpha, Ap, + X, *incX, *beta, Y, *incY); + return 0; +} + +int +f2c_dger(integer* M, integer* N, + doublereal* alpha, + doublereal* X, integer* incX, + doublereal* Y, integer* incY, + doublereal* A, integer* lda) +{ + cblas_dger(CblasColMajor, + *M, *N, *alpha, + X, *incX, Y, *incY, A, *lda); + return 0; +} + +int +f2c_dsyr(char* uplo, integer* N, + doublereal* alpha, + doublereal* X, integer* incX, + doublereal* A, integer* lda) +{ + cblas_dsyr(CblasColMajor, + CVT_UPLO(*uplo), *N, *alpha, X, *incX, A, *lda); + return 0; +} + +int +f2c_dspr(char* uplo, integer* N, + doublereal* alpha, + doublereal* X, integer* incX, + doublereal* Ap) +{ + cblas_dspr(CblasColMajor, + CVT_UPLO(*uplo), *N, *alpha, X, *incX, Ap); + return 0; +} + +int +f2c_dsyr2(char* uplo, integer* N, + doublereal* alpha, + doublereal* X, integer* incX, + doublereal* Y, integer* incY, + doublereal* A, integer* lda) +{ + cblas_dsyr2(CblasColMajor, + CVT_UPLO(*uplo), *N, *alpha, + X, *incX, Y, *incY, A, *lda); + return 0; +} + +int +f2c_dspr2(char* uplo, integer* N, + doublereal* alpha, + doublereal* X, integer* incX, + doublereal* Y, integer* incY, + doublereal* A) +{ + cblas_dspr2(CblasColMajor, + CVT_UPLO(*uplo), *N, *alpha, + X, *incX, Y, *incY, A); + return 0; +} + + + +/* + * Routines with C and Z prefixes only + */ + +int +f2c_chemv(char* uplo, integer* N, + complex* alpha, + complex* A, integer* lda, + complex* X, integer* incX, + complex* beta, + complex* Y, integer* incY) +{ + cblas_chemv(CblasColMajor, + CVT_UPLO(*uplo), *N, alpha, A, *lda, + X, *incX, beta, Y, *incY); + return 0; +} + +int +f2c_chbmv(char* uplo, integer* N, integer* K, + complex* alpha, + complex* A, integer* lda, + complex* X, integer* incX, + complex* beta, + complex* Y, integer* incY) +{ + cblas_chbmv(CblasColMajor, + CVT_UPLO(*uplo), *N, *K, alpha, A, *lda, + X, *incX, beta, Y, *incY); + return 0; +} + +int +f2c_chpmv(char* uplo, integer* N, + complex* alpha, + complex* Ap, + complex* X, integer* incX, + complex* beta, + complex* Y, integer* incY) +{ + cblas_chpmv(CblasColMajor, + CVT_UPLO(*uplo), *N, alpha, Ap, + X, *incX, beta, Y, *incY); + return 0; +} + +int +f2c_cgeru(integer* M, integer* N, + complex* alpha, + complex* X, integer* incX, + complex* Y, integer* incY, + complex* A, integer* lda) +{ + cblas_cgeru(CblasColMajor, + *M, *N, alpha, + X, *incX, Y, *incY, A, *lda); + return 0; +} + +int +f2c_cgerc(integer* M, integer* N, + complex* alpha, + complex* X, integer* incX, + complex* Y, integer* incY, + complex* A, integer* lda) +{ + cblas_cgerc(CblasColMajor, + *M, *N, alpha, + X, *incX, Y, *incY, A, *lda); + return 0; +} + +int +f2c_cher(char* uplo, integer* N, + real* alpha, + complex* X, integer* incX, + complex* A, integer* lda) +{ + cblas_cher(CblasColMajor, + CVT_UPLO(*uplo), *N, *alpha, + X, *incX, A, *lda); + return 0; +} + +int +f2c_chpr(char* uplo, integer* N, + real* alpha, + complex* X, integer* incX, + complex* Ap) +{ + cblas_chpr(CblasColMajor, + CVT_UPLO(*uplo), *N, *alpha, + X, *incX, Ap); + return 0; +} + +int +f2c_cher2(char* uplo, integer* N, + complex* alpha, + complex* X, integer* incX, + complex* Y, integer* incY, + complex* A, integer* lda) +{ + cblas_cher2(CblasColMajor, + CVT_UPLO(*uplo), *N, alpha, + X, *incX, Y, *incY, A, *lda); + return 0; +} + +int +f2c_chpr2(char* uplo, integer* N, + complex* alpha, + complex* X, integer* incX, + complex* Y, integer* incY, + complex* Ap) +{ + cblas_chpr2(CblasColMajor, + CVT_UPLO(*uplo), *N, alpha, + X, *incX, Y, *incY, Ap); + return 0; +} + + + +int +f2c_zhemv(char* uplo, integer* N, + doublecomplex* alpha, + doublecomplex* A, integer* lda, + doublecomplex* X, integer* incX, + doublecomplex* beta, + doublecomplex* Y, integer* incY) +{ + cblas_zhemv(CblasColMajor, + CVT_UPLO(*uplo), *N, alpha, A, *lda, + X, *incX, beta, Y, *incY); + return 0; +} + +int +f2c_zhbmv(char* uplo, integer* N, integer* K, + doublecomplex* alpha, + doublecomplex* A, integer* lda, + doublecomplex* X, integer* incX, + doublecomplex* beta, + doublecomplex* Y, integer* incY) +{ + cblas_zhbmv(CblasColMajor, + CVT_UPLO(*uplo), *N, *K, alpha, A, *lda, + X, *incX, beta, Y, *incY); + return 0; +} + +int +f2c_zhpmv(char* uplo, integer* N, + doublecomplex* alpha, + doublecomplex* Ap, + doublecomplex* X, integer* incX, + doublecomplex* beta, + doublecomplex* Y, integer* incY) +{ + cblas_zhpmv(CblasColMajor, + CVT_UPLO(*uplo), *N, alpha, Ap, + X, *incX, beta, Y, *incY); + return 0; +} + +int +f2c_zgeru(integer* M, integer* N, + doublecomplex* alpha, + doublecomplex* X, integer* incX, + doublecomplex* Y, integer* incY, + doublecomplex* A, integer* lda) +{ + cblas_zgeru(CblasColMajor, + *M, *N, alpha, + X, *incX, Y, *incY, A, *lda); + return 0; +} + +int +f2c_zgerc(integer* M, integer* N, + doublecomplex* alpha, + doublecomplex* X, integer* incX, + doublecomplex* Y, integer* incY, + doublecomplex* A, integer* lda) +{ + cblas_zgerc(CblasColMajor, + *M, *N, alpha, + X, *incX, Y, *incY, A, *lda); + return 0; +} + +int +f2c_zher(char* uplo, integer* N, + doublereal* alpha, + doublecomplex* X, integer* incX, + doublecomplex* A, integer* lda) +{ + cblas_zher(CblasColMajor, + CVT_UPLO(*uplo), *N, *alpha, + X, *incX, A, *lda); + return 0; +} + +int +f2c_zhpr(char* uplo, integer* N, + doublereal* alpha, + doublecomplex* X, integer* incX, + doublecomplex* Ap) +{ + cblas_zhpr(CblasColMajor, + CVT_UPLO(*uplo), *N, *alpha, + X, *incX, Ap); + return 0; +} + +int +f2c_zher2(char* uplo, integer* N, + doublecomplex* alpha, + doublecomplex* X, integer* incX, + doublecomplex* Y, integer* incY, + doublecomplex* A, integer* lda) +{ + cblas_zher2(CblasColMajor, + CVT_UPLO(*uplo), *N, alpha, + X, *incX, Y, *incY, A, *lda); + return 0; +} + +int +f2c_zhpr2(char* uplo, integer* N, + doublecomplex* alpha, + doublecomplex* X, integer* incX, + doublecomplex* Y, integer* incY, + doublecomplex* Ap) +{ + cblas_zhpr2(CblasColMajor, + CVT_UPLO(*uplo), *N, alpha, + X, *incX, Y, *incY, Ap); + return 0; +} + + + +/* + * =========================================================================== + * Prototypes for level 3 BLAS + * =========================================================================== + */ + +/* + * Routines with standard 4 prefixes (S, D, C, Z) + */ + +int +f2c_sgemm(char* transA, char* transB, integer* M, integer* N, integer* K, + real* alpha, + real* A, integer* lda, + real* B, integer* ldb, + real* beta, + real* C, integer* ldc) +{ + cblas_sgemm(CblasColMajor, + CVT_TRANSPOSE(*transA), CVT_TRANSPOSE(*transB), *M, *N, *K, + *alpha, A, *lda, B, *ldb, *beta, C, *ldc); + return 0; +} + +int +f2c_ssymm(char* side, char* uplo, integer* M, integer* N, + real* alpha, + real* A, integer* lda, + real* B, integer* ldb, + real* beta, + real* C, integer* ldc) +{ + cblas_ssymm(CblasColMajor, + CVT_SIDE(*side), CVT_UPLO(*uplo), *M, *N, + *alpha, A, *lda, B, *ldb, *beta, C, *ldc); + return 0; +} + +int +f2c_ssyrk(char* uplo, char* trans, integer* N, integer* K, + real* alpha, + real* A, integer* lda, + real* beta, + real* C, integer* ldc) +{ + cblas_ssyrk(CblasColMajor, + CVT_UPLO(*uplo), CVT_TRANSPOSE(*trans), *N, *K, + *alpha, A, *lda, *beta, C, *ldc); + return 0; +} + +int +f2c_ssyr2k(char* uplo, char* trans, integer* N, integer* K, + real* alpha, + real* A, integer* lda, + real* B, integer* ldb, + real* beta, + real* C, integer* ldc) +{ + cblas_ssyr2k(CblasColMajor, + CVT_UPLO(*uplo), CVT_TRANSPOSE(*trans), *N, *K, + *alpha, A, *lda, B, *ldb, *beta, C, *ldc); + return 0; +} + +int +f2c_strmm(char* side, char* uplo, char* trans, char* diag, + integer* M, integer* N, + real* alpha, + real* A, integer* lda, + real* B, integer* ldb) +{ + cblas_strmm(CblasColMajor, + CVT_SIDE(*side), CVT_UPLO(*uplo), + CVT_TRANSPOSE(*trans), CVT_DIAG(*diag), + *M, *N, *alpha, A, *lda, B, *ldb); + return 0; +} + +int +f2c_strsm(char* side, char* uplo, char* trans, char* diag, + integer* M, integer* N, + real* alpha, + real* A, integer* lda, + real* B, integer* ldb) +{ + cblas_strsm(CblasColMajor, + CVT_SIDE(*side), CVT_UPLO(*uplo), + CVT_TRANSPOSE(*trans), CVT_DIAG(*diag), + *M, *N, *alpha, A, *lda, B, *ldb); + return 0; +} + + + +int +f2c_dgemm(char* transA, char* transB, integer* M, integer* N, integer* K, + doublereal* alpha, + doublereal* A, integer* lda, + doublereal* B, integer* ldb, + doublereal* beta, + doublereal* C, integer* ldc) +{ + cblas_dgemm(CblasColMajor, + CVT_TRANSPOSE(*transA), CVT_TRANSPOSE(*transB), *M, *N, *K, + *alpha, A, *lda, B, *ldb, *beta, C, *ldc); + return 0; +} + +int +f2c_dsymm(char* side, char* uplo, integer* M, integer* N, + doublereal* alpha, + doublereal* A, integer* lda, + doublereal* B, integer* ldb, + doublereal* beta, + doublereal* C, integer* ldc) +{ + cblas_dsymm(CblasColMajor, + CVT_SIDE(*side), CVT_UPLO(*uplo), *M, *N, + *alpha, A, *lda, B, *ldb, *beta, C, *ldc); + return 0; +} + +int +f2c_dsyrk(char* uplo, char* trans, integer* N, integer* K, + doublereal* alpha, + doublereal* A, integer* lda, + doublereal* beta, + doublereal* C, integer* ldc) +{ + cblas_dsyrk(CblasColMajor, + CVT_UPLO(*uplo), CVT_TRANSPOSE(*trans), *N, *K, + *alpha, A, *lda, *beta, C, *ldc); + return 0; +} + +int +f2c_dsyr2k(char* uplo, char* trans, integer* N, integer* K, + doublereal* alpha, + doublereal* A, integer* lda, + doublereal* B, integer* ldb, + doublereal* beta, + doublereal* C, integer* ldc) +{ + cblas_dsyr2k(CblasColMajor, + CVT_UPLO(*uplo), CVT_TRANSPOSE(*trans), *N, *K, + *alpha, A, *lda, B, *ldb, *beta, C, *ldc); + return 0; +} + +int +f2c_dtrmm(char* side, char* uplo, char* trans, char* diag, + integer* M, integer* N, + doublereal* alpha, + doublereal* A, integer* lda, + doublereal* B, integer* ldb) +{ + cblas_dtrmm(CblasColMajor, + CVT_SIDE(*side), CVT_UPLO(*uplo), + CVT_TRANSPOSE(*trans), CVT_DIAG(*diag), + *M, *N, *alpha, A, *lda, B, *ldb); + return 0; +} + +int +f2c_dtrsm(char* side, char* uplo, char* trans, char* diag, + integer* M, integer* N, + doublereal* alpha, + doublereal* A, integer* lda, + doublereal* B, integer* ldb) +{ + cblas_dtrsm(CblasColMajor, + CVT_SIDE(*side), CVT_UPLO(*uplo), + CVT_TRANSPOSE(*trans), CVT_DIAG(*diag), + *M, *N, *alpha, A, *lda, B, *ldb); + return 0; +} + + + +int +f2c_cgemm(char* transA, char* transB, integer* M, integer* N, integer* K, + complex* alpha, + complex* A, integer* lda, + complex* B, integer* ldb, + complex* beta, + complex* C, integer* ldc) +{ + cblas_cgemm(CblasColMajor, + CVT_TRANSPOSE(*transA), CVT_TRANSPOSE(*transB), *M, *N, *K, + alpha, A, *lda, B, *ldb, beta, C, *ldc); + return 0; +} + +int +f2c_csymm(char* side, char* uplo, integer* M, integer* N, + complex* alpha, + complex* A, integer* lda, + complex* B, integer* ldb, + complex* beta, + complex* C, integer* ldc) +{ + cblas_csymm(CblasColMajor, + CVT_SIDE(*side), CVT_UPLO(*uplo), *M, *N, + alpha, A, *lda, B, *ldb, beta, C, *ldc); + return 0; +} + +int +f2c_csyrk(char* uplo, char* trans, integer* N, integer* K, + complex* alpha, + complex* A, integer* lda, + complex* beta, + complex* C, integer* ldc) +{ + cblas_csyrk(CblasColMajor, + CVT_UPLO(*uplo), CVT_TRANSPOSE(*trans), *N, *K, + alpha, A, *lda, beta, C, *ldc); + return 0; +} + +int +f2c_csyr2k(char* uplo, char* trans, integer* N, integer* K, + complex* alpha, + complex* A, integer* lda, + complex* B, integer* ldb, + complex* beta, + complex* C, integer* ldc) +{ + cblas_csyr2k(CblasColMajor, + CVT_UPLO(*uplo), CVT_TRANSPOSE(*trans), *N, *K, + alpha, A, *lda, B, *ldb, beta, C, *ldc); + return 0; +} + +int +f2c_ctrmm(char* side, char* uplo, char* trans, char* diag, + integer* M, integer* N, + complex* alpha, + complex* A, integer* lda, + complex* B, integer* ldb) +{ + cblas_ctrmm(CblasColMajor, + CVT_SIDE(*side), CVT_UPLO(*uplo), + CVT_TRANSPOSE(*trans), CVT_DIAG(*diag), + *M, *N, alpha, A, *lda, B, *ldb); + return 0; +} + +int +f2c_ctrsm(char* side, char* uplo, char* trans, char* diag, + integer* M, integer* N, + complex* alpha, + complex* A, integer* lda, + complex* B, integer* ldb) +{ + cblas_ctrsm(CblasColMajor, + CVT_SIDE(*side), CVT_UPLO(*uplo), + CVT_TRANSPOSE(*trans), CVT_DIAG(*diag), + *M, *N, alpha, A, *lda, B, *ldb); + return 0; +} + + + +int +f2c_zgemm(char* transA, char* transB, integer* M, integer* N, integer* K, + doublecomplex* alpha, + doublecomplex* A, integer* lda, + doublecomplex* B, integer* ldb, + doublecomplex* beta, + doublecomplex* C, integer* ldc) +{ + cblas_zgemm(CblasColMajor, + CVT_TRANSPOSE(*transA), CVT_TRANSPOSE(*transB), *M, *N, *K, + alpha, A, *lda, B, *ldb, beta, C, *ldc); + return 0; +} + +int +f2c_zsymm(char* side, char* uplo, integer* M, integer* N, + doublecomplex* alpha, + doublecomplex* A, integer* lda, + doublecomplex* B, integer* ldb, + doublecomplex* beta, + doublecomplex* C, integer* ldc) +{ + cblas_zsymm(CblasColMajor, + CVT_SIDE(*side), CVT_UPLO(*uplo), *M, *N, + alpha, A, *lda, B, *ldb, beta, C, *ldc); + return 0; +} + +int +f2c_zsyrk(char* uplo, char* trans, integer* N, integer* K, + doublecomplex* alpha, + doublecomplex* A, integer* lda, + doublecomplex* beta, + doublecomplex* C, integer* ldc) +{ + cblas_zsyrk(CblasColMajor, + CVT_UPLO(*uplo), CVT_TRANSPOSE(*trans), *N, *K, + alpha, A, *lda, beta, C, *ldc); + return 0; +} + +int +f2c_zsyr2k(char* uplo, char* trans, integer* N, integer* K, + doublecomplex* alpha, + doublecomplex* A, integer* lda, + doublecomplex* B, integer* ldb, + doublecomplex* beta, + doublecomplex* C, integer* ldc) +{ + cblas_zsyr2k(CblasColMajor, + CVT_UPLO(*uplo), CVT_TRANSPOSE(*trans), *N, *K, + alpha, A, *lda, B, *ldb, beta, C, *ldc); + return 0; +} + +int +f2c_ztrmm(char* side, char* uplo, char* trans, char* diag, + integer* M, integer* N, + doublecomplex* alpha, + doublecomplex* A, integer* lda, + doublecomplex* B, integer* ldb) +{ + cblas_ztrmm(CblasColMajor, + CVT_SIDE(*side), CVT_UPLO(*uplo), + CVT_TRANSPOSE(*trans), CVT_DIAG(*diag), + *M, *N, alpha, A, *lda, B, *ldb); + return 0; +} + +int +f2c_ztrsm(char* side, char* uplo, char* trans, char* diag, + integer* M, integer* N, + doublecomplex* alpha, + doublecomplex* A, integer* lda, + doublecomplex* B, integer* ldb) +{ + cblas_ztrsm(CblasColMajor, + CVT_SIDE(*side), CVT_UPLO(*uplo), + CVT_TRANSPOSE(*trans), CVT_DIAG(*diag), + *M, *N, alpha, A, *lda, B, *ldb); + return 0; +} + + + +/* + * Routines with prefixes C and Z only + */ + +int +f2c_chemm(char* side, char* uplo, integer* M, integer* N, + complex* alpha, + complex* A, integer* lda, + complex* B, integer* ldb, + complex* beta, + complex* C, integer* ldc) +{ + cblas_chemm(CblasColMajor, + CVT_SIDE(*side), CVT_UPLO(*uplo), *M, *N, + alpha, A, *lda, B, *ldb, beta, C, *ldc); + return 0; +} + +int +f2c_cherk(char* uplo, char* trans, integer* N, integer* K, + real* alpha, + complex* A, integer* lda, + real* beta, + complex* C, integer* ldc) +{ + cblas_cherk(CblasColMajor, + CVT_UPLO(*uplo), CVT_TRANSPOSE(*trans), *N, *K, + *alpha, A, *lda, *beta, C, *ldc); + return 0; +} + +int +f2c_cher2k(char* uplo, char* trans, integer* N, integer* K, + complex* alpha, + complex* A, integer* lda, + complex* B, integer* ldb, + real* beta, + complex* C, integer* ldc) +{ + cblas_cher2k(CblasColMajor, + CVT_UPLO(*uplo), CVT_TRANSPOSE(*trans), *N, *K, + alpha, A, *lda, B, *ldb, *beta, C, *ldc); + return 0; +} + + + +int +f2c_zhemm(char* side, char* uplo, integer* M, integer* N, + doublecomplex* alpha, + doublecomplex* A, integer* lda, + doublecomplex* B, integer* ldb, + doublecomplex* beta, + doublecomplex* C, integer* ldc) +{ + cblas_zhemm(CblasColMajor, + CVT_SIDE(*side), CVT_UPLO(*uplo), *M, *N, + alpha, A, *lda, B, *ldb, beta, C, *ldc); + return 0; +} + +int +f2c_zherk(char* uplo, char* trans, integer* N, integer* K, + doublereal* alpha, + doublecomplex* A, integer* lda, + doublereal* beta, + doublecomplex* C, integer* ldc) +{ + cblas_zherk(CblasColMajor, + CVT_UPLO(*uplo), CVT_TRANSPOSE(*trans), *N, *K, + *alpha, A, *lda, *beta, C, *ldc); + return 0; +} + +int +f2c_zher2k(char* uplo, char* trans, integer* N, integer* K, + doublecomplex* alpha, + doublecomplex* A, integer* lda, + doublecomplex* B, integer* ldb, + doublereal* beta, + doublecomplex* C, integer* ldc) +{ + cblas_zher2k(CblasColMajor, + CVT_UPLO(*uplo), CVT_TRANSPOSE(*trans), *N, *K, + alpha, A, *lda, B, *ldb, *beta, C, *ldc); + return 0; +} + diff --git a/min-dgels/base/BLAS/WRAP/fblaswr.c b/min-dgels/base/BLAS/WRAP/fblaswr.c new file mode 100644 index 0000000..82d69e4 --- /dev/null +++ b/min-dgels/base/BLAS/WRAP/fblaswr.c @@ -0,0 +1,1600 @@ +#include "f2c.h" +#include "fblaswr.h" + +/* + * =========================================================================== + * Prototypes for level 1 BLAS functions (complex are recast as routines) + * =========================================================================== + */ + +doublereal +f2c_sdot(integer* N, + real* X, integer* incX, + real* Y, integer* incY) +{ + return _starpu_sdot_(N, X, incX, Y, incY); +} + +doublereal +f2c_ddot(integer* N, + doublereal* X, integer* incX, + doublereal* Y, integer* incY) +{ + return _starpu_ddot_(N, X, incX, Y, incY); +} + + +/* + * Functions having prefixes Z and C only + */ + +void +f2c_cdotu(complex* retval, + integer* N, + complex* X, integer* incX, + complex* Y, integer* incY) +{ + _starpu_cdotu_(retval, N, X, incX, Y, incY); +} + +void +f2c_cdotc(complex* retval, + integer* N, + complex* X, integer* incX, + complex* Y, integer* incY) +{ + _starpu_cdotc_(retval, N, X, incX, Y, incY); +} + +void +f2c_zdotu(doublecomplex* retval, + integer* N, + doublecomplex* X, integer* incX, + doublecomplex* Y, integer* incY) +{ + _starpu_zdotu_(retval, N, X, incX, Y, incY); +} + +void +f2c_zdotc(doublecomplex* retval, + integer* N, + doublecomplex* X, integer* incX, + doublecomplex* Y, integer* incY) +{ + _starpu_zdotc_(retval, N, X, incX, Y, incY); +} + + +/* + * Functions having prefixes S D SC DZ + */ + +doublereal +f2c_snrm2(integer* N, + real* X, integer* incX) +{ + return _starpu_snrm2_(N, X, incX); +} + +doublereal +f2c_sasum(integer* N, + real* X, integer* incX) +{ + return _starpu_sasum_(N, X, incX); +} + +doublereal +f2c_dnrm2(integer* N, + doublereal* X, integer* incX) +{ + return _starpu_dnrm2_(N, X, incX); +} + +doublereal +f2c_dasum(integer* N, + doublereal* X, integer* incX) +{ + return _starpu_dasum_(N, X, incX); +} + +doublereal +f2c_scnrm2(integer* N, + complex* X, integer* incX) +{ + return _starpu_scnrm2_(N, X, incX); +} + +doublereal +f2c_scasum(integer* N, + complex* X, integer* incX) +{ + return _starpu_scasum_(N, X, incX); +} + +doublereal +f2c_dznrm2(integer* N, + doublecomplex* X, integer* incX) +{ + return _starpu_dznrm2_(N, X, incX); +} + +doublereal +f2c_dzasum(integer* N, + doublecomplex* X, integer* incX) +{ + return _starpu_dzasum_(N, X, incX); +} + + +/* + * Functions having standard 4 prefixes (S D C Z) + */ +integer +f2c_isamax(integer* N, + real* X, integer* incX) +{ + return _starpu_isamax_(N, X, incX); +} + +integer +f2c_idamax(integer* N, + doublereal* X, integer* incX) +{ + return _starpu_idamax_(N, X, incX); +} + +integer +f2c_icamax(integer* N, + complex* X, integer* incX) +{ + return _starpu_icamax_(N, X, incX); +} + +integer +f2c_izamax(integer* N, + doublecomplex* X, integer* incX) +{ + return _starpu_izamax_(N, X, incX); +} + +/* + * =========================================================================== + * Prototypes for level 0 BLAS routines + * =========================================================================== + */ +int +f2c_srotg(real* a, + real* b, + real* c, + real* s) +{ + _starpu_srotg_(a, b, c, s); + return 0; +} + +int +f2c_crotg(complex* CA, + complex* CB, + complex* C, + real* S) +{ + _starpu_crotg_(CA, CB, C, S); + return 0; +} + +int +f2c_drotg(doublereal* a, + doublereal* b, + doublereal* c, + doublereal* s) +{ + _starpu_drotg_(a, b, c, s); + return 0; +} + +int +f2c_zrotg(doublecomplex* CA, + doublecomplex* CB, + doublecomplex* C, + doublereal* S) +{ + _starpu_zrotg_(CA, CB, C, S); + return 0; +} +/* + * =========================================================================== + * Prototypes for level 1 BLAS routines + * =========================================================================== + */ + +/* + * Routines with standard 4 prefixes (s, d, c, z) + */ + +int +f2c_sswap(integer* N, + real* X, integer* incX, + real* Y, integer* incY) +{ + _starpu_sswap_(N, X, incX, Y, incY); + return 0; +} + +int +f2c_scopy(integer* N, + real* X, integer* incX, + real* Y, integer* incY) +{ + _starpu_scopy_(N, X, incX, Y, incY); + return 0; +} + +int +f2c_saxpy(integer* N, + real* alpha, + real* X, integer* incX, + real* Y, integer* incY) +{ + _starpu_saxpy_(N, alpha, X, incX, Y, incY); + return 0; +} + +int +f2c_dswap(integer* N, + doublereal* X, integer* incX, + doublereal* Y, integer* incY) +{ + _starpu_dswap_(N, X, incX, Y, incY); + return 0; +} + +int +f2c_dcopy(integer* N, + doublereal* X, integer* incX, + doublereal* Y, integer* incY) +{ + _starpu_dcopy_(N, X, incX, Y, incY); + return 0; +} + +int +f2c_daxpy(integer* N, + doublereal* alpha, + doublereal* X, integer* incX, + doublereal* Y, integer* incY) +{ + _starpu_daxpy_(N, alpha, X, incX, Y, incY); + return 0; +} + +int +f2c_cswap(integer* N, + complex* X, integer* incX, + complex* Y, integer* incY) +{ + _starpu_cswap_(N, X, incX, Y, incY); + return 0; +} + +int +f2c_ccopy(integer* N, + complex* X, integer* incX, + complex* Y, integer* incY) +{ + _starpu_ccopy_(N, X, incX, Y, incY); + return 0; +} + +int +f2c_caxpy(integer* N, + complex* alpha, + complex* X, integer* incX, + complex* Y, integer* incY) +{ + _starpu_caxpy_(N, alpha, X, incX, Y, incY); + return 0; +} + +int +f2c_zswap(integer* N, + doublecomplex* X, integer* incX, + doublecomplex* Y, integer* incY) +{ + _starpu_zswap_(N, X, incX, Y, incY); + return 0; +} + +int +f2c_zcopy(integer* N, + doublecomplex* X, integer* incX, + doublecomplex* Y, integer* incY) +{ + _starpu_zcopy_(N, X, incX, Y, incY); + return 0; +} + +int +f2c_zaxpy(integer* N, + doublecomplex* alpha, + doublecomplex* X, integer* incX, + doublecomplex* Y, integer* incY) +{ + _starpu_zaxpy_(N, alpha, X, incX, Y, incY); + return 0; +} + + +/* + * Routines with S and D prefix only + */ + +int +f2c_srot(integer* N, + real* X, integer* incX, + real* Y, integer* incY, + real* c, real* s) +{ + _starpu_srot_(N, X, incX, Y, incY, c, s); + return 0; +} + +int +f2c_drot(integer* N, + doublereal* X, integer* incX, + doublereal* Y, integer* incY, + doublereal* c, doublereal* s) +{ + _starpu_drot_(N, X, incX, Y, incY, c, s); + return 0; +} + + +/* + * Routines with S D C Z CS and ZD prefixes + */ + +int +f2c_sscal(integer* N, + real* alpha, + real* X, integer* incX) +{ + _starpu_sscal_(N, alpha, X, incX); + return 0; +} + +int +f2c_dscal(integer* N, + doublereal* alpha, + doublereal* X, integer* incX) +{ + _starpu_dscal_(N, alpha, X, incX); + return 0; +} + +int +f2c_cscal(integer* N, + complex* alpha, + complex* X, integer* incX) +{ + _starpu_cscal_(N, alpha, X, incX); + return 0; +} + + +int +f2c_zscal(integer* N, + doublecomplex* alpha, + doublecomplex* X, integer* incX) +{ + _starpu_zscal_(N, alpha, X, incX); + return 0; +} + + +int +f2c_csscal(integer* N, + real* alpha, + complex* X, integer* incX) +{ + _starpu_csscal_(N, alpha, X, incX); + return 0; +} + + +int +f2c_zdscal(integer* N, + doublereal* alpha, + doublecomplex* X, integer* incX) +{ + _starpu_zdscal_(N, alpha, X, incX); + return 0; +} + + + +/* + * =========================================================================== + * Prototypes for level 2 BLAS + * =========================================================================== + */ + +/* + * Routines with standard 4 prefixes (S, D, C, Z) + */ +int +f2c_sgemv(char* trans, integer* M, integer* N, + real* alpha, + real* A, integer* lda, + real* X, integer* incX, + real* beta, + real* Y, integer* incY) +{ + _starpu_sgemv_(trans, M, N, + alpha, A, lda, X, incX, beta, Y, incY); + return 0; +} + +int +f2c_sgbmv(char *trans, integer *M, integer *N, integer *KL, integer *KU, + real *alpha, + real *A, integer *lda, + real *X, integer *incX, + real *beta, + real *Y, integer *incY) +{ + _starpu_sgbmv_(trans, M, N, KL, KU, + alpha, A, lda, X, incX, beta, Y, incY); + return 0; +} + +int +f2c_strmv(char* uplo, char *trans, char* diag, integer *N, + real *A, integer *lda, + real *X, integer *incX) +{ + _starpu_strmv_(uplo, trans, diag, + N, A, lda, X, incX); + return 0; +} + +int +f2c_stbmv(char* uplo, char* trans, char* diag, integer* N, integer* K, + real* A, integer* lda, + real* X, integer* incX) +{ + _starpu_stbmv_(uplo, trans, diag, + N, K, A, lda, X, incX); + return 0; +} + +int +f2c_stpmv(char* uplo, char* trans, char* diag, integer* N, + real* Ap, + real* X, integer* incX) +{ + _starpu_stpmv_(uplo, trans, diag, + N, Ap, X, incX); + return 0; +} + +int +f2c_strsv(char* uplo, char* trans, char* diag, integer* N, + real* A, integer* lda, + real* X, integer* incX) +{ + _starpu_strsv_(uplo, trans, diag, + N, A, lda, X, incX); + return 0; +} + +int +f2c_stbsv(char* uplo, char* trans, char* diag, integer* N, integer* K, + real* A, integer* lda, + real* X, integer* incX) +{ + _starpu_stbsv_(uplo, trans, diag, + N, K, A, lda, X, incX); + return 0; +} + +int +f2c_stpsv(char* uplo, char* trans, char* diag, integer* N, + real* Ap, + real* X, integer* incX) +{ + _starpu_stpsv_(uplo, trans, diag, + N, Ap, X, incX); + return 0; +} + + + +int +f2c_dgemv(char* trans, integer* M, integer* N, + doublereal* alpha, + doublereal* A, integer* lda, + doublereal* X, integer* incX, + doublereal* beta, + doublereal* Y, integer* incY) +{ + _starpu_dgemv_(trans, M, N, + alpha, A, lda, X, incX, beta, Y, incY); + return 0; +} + +int +f2c_dgbmv(char *trans, integer *M, integer *N, integer *KL, integer *KU, + doublereal *alpha, + doublereal *A, integer *lda, + doublereal *X, integer *incX, + doublereal *beta, + doublereal *Y, integer *incY) +{ + _starpu_dgbmv_(trans, M, N, KL, KU, + alpha, A, lda, X, incX, beta, Y, incY); + return 0; +} + +int +f2c_dtrmv(char* uplo, char *trans, char* diag, integer *N, + doublereal *A, integer *lda, + doublereal *X, integer *incX) +{ + _starpu_dtrmv_(uplo, trans, diag, + N, A, lda, X, incX); + return 0; +} + +int +f2c_dtbmv(char* uplo, char* trans, char* diag, integer* N, integer* K, + doublereal* A, integer* lda, + doublereal* X, integer* incX) +{ + _starpu_dtbmv_(uplo, trans, diag, + N, K, A, lda, X, incX); + return 0; +} + +int +f2c_dtpmv(char* uplo, char* trans, char* diag, integer* N, + doublereal* Ap, + doublereal* X, integer* incX) +{ + _starpu_dtpmv_(uplo, trans, diag, + N, Ap, X, incX); + return 0; +} + +int +f2c_dtrsv(char* uplo, char* trans, char* diag, integer* N, + doublereal* A, integer* lda, + doublereal* X, integer* incX) +{ + _starpu_dtrsv_(uplo, trans, diag, + N, A, lda, X, incX); + return 0; +} + +int +f2c_dtbsv(char* uplo, char* trans, char* diag, integer* N, integer* K, + doublereal* A, integer* lda, + doublereal* X, integer* incX) +{ + _starpu_dtbsv_(uplo, trans, diag, + N, K, A, lda, X, incX); + return 0; +} + +int +f2c_dtpsv(char* uplo, char* trans, char* diag, integer* N, + doublereal* Ap, + doublereal* X, integer* incX) +{ + _starpu_dtpsv_(uplo, trans, diag, + N, Ap, X, incX); + return 0; +} + + + +int +f2c_cgemv(char* trans, integer* M, integer* N, + complex* alpha, + complex* A, integer* lda, + complex* X, integer* incX, + complex* beta, + complex* Y, integer* incY) +{ + _starpu_cgemv_(trans, M, N, + alpha, A, lda, X, incX, beta, Y, incY); + return 0; +} + +int +f2c_cgbmv(char *trans, integer *M, integer *N, integer *KL, integer *KU, + complex *alpha, + complex *A, integer *lda, + complex *X, integer *incX, + complex *beta, + complex *Y, integer *incY) +{ + _starpu_cgbmv_(trans, M, N, KL, KU, + alpha, A, lda, X, incX, beta, Y, incY); + return 0; +} + +int +f2c_ctrmv(char* uplo, char *trans, char* diag, integer *N, + complex *A, integer *lda, + complex *X, integer *incX) +{ + _starpu_ctrmv_(uplo, trans, diag, + N, A, lda, X, incX); + return 0; +} + +int +f2c_ctbmv(char* uplo, char* trans, char* diag, integer* N, integer* K, + complex* A, integer* lda, + complex* X, integer* incX) +{ + _starpu_ctbmv_(uplo, trans, diag, + N, K, A, lda, X, incX); + return 0; +} + +int +f2c_ctpmv(char* uplo, char* trans, char* diag, integer* N, + complex* Ap, + complex* X, integer* incX) +{ + _starpu_ctpmv_(uplo, trans, diag, + N, Ap, X, incX); + return 0; +} + +int +f2c_ctrsv(char* uplo, char* trans, char* diag, integer* N, + complex* A, integer* lda, + complex* X, integer* incX) +{ + _starpu_ctrsv_(uplo, trans, diag, + N, A, lda, X, incX); + return 0; +} + +int +f2c_ctbsv(char* uplo, char* trans, char* diag, integer* N, integer* K, + complex* A, integer* lda, + complex* X, integer* incX) +{ + _starpu_ctbsv_(uplo, trans, diag, + N, K, A, lda, X, incX); + return 0; +} + +int +f2c_ctpsv(char* uplo, char* trans, char* diag, integer* N, + complex* Ap, + complex* X, integer* incX) +{ + _starpu_ctpsv_(uplo, trans, diag, + N, Ap, X, incX); + return 0; +} + + + +int +f2c_zgemv(char* trans, integer* M, integer* N, + doublecomplex* alpha, + doublecomplex* A, integer* lda, + doublecomplex* X, integer* incX, + doublecomplex* beta, + doublecomplex* Y, integer* incY) +{ + _starpu_zgemv_(trans, M, N, + alpha, A, lda, X, incX, beta, Y, incY); + return 0; +} + +int +f2c_zgbmv(char *trans, integer *M, integer *N, integer *KL, integer *KU, + doublecomplex *alpha, + doublecomplex *A, integer *lda, + doublecomplex *X, integer *incX, + doublecomplex *beta, + doublecomplex *Y, integer *incY) +{ + _starpu_zgbmv_(trans, M, N, KL, KU, + alpha, A, lda, X, incX, beta, Y, incY); + return 0; +} + +int +f2c_ztrmv(char* uplo, char *trans, char* diag, integer *N, + doublecomplex *A, integer *lda, + doublecomplex *X, integer *incX) +{ + _starpu_ztrmv_(uplo, trans, diag, + N, A, lda, X, incX); + return 0; +} + +int +f2c_ztbmv(char* uplo, char* trans, char* diag, integer* N, integer* K, + doublecomplex* A, integer* lda, + doublecomplex* X, integer* incX) +{ + _starpu_ztbmv_(uplo, trans, diag, + N, K, A, lda, X, incX); + return 0; +} + +int +f2c_ztpmv(char* uplo, char* trans, char* diag, integer* N, + doublecomplex* Ap, + doublecomplex* X, integer* incX) +{ + _starpu_ztpmv_(uplo, trans, diag, + N, Ap, X, incX); + return 0; +} + +int +f2c_ztrsv(char* uplo, char* trans, char* diag, integer* N, + doublecomplex* A, integer* lda, + doublecomplex* X, integer* incX) +{ + _starpu_ztrsv_(uplo, trans, diag, + N, A, lda, X, incX); + return 0; +} + +int +f2c_ztbsv(char* uplo, char* trans, char* diag, integer* N, integer* K, + doublecomplex* A, integer* lda, + doublecomplex* X, integer* incX) +{ + _starpu_ztbsv_(uplo, trans, diag, + N, K, A, lda, X, incX); + return 0; +} + +int +f2c_ztpsv(char* uplo, char* trans, char* diag, integer* N, + doublecomplex* Ap, + doublecomplex* X, integer* incX) +{ + _starpu_ztpsv_(uplo, trans, diag, + N, Ap, X, incX); + return 0; +} + + +/* + * Routines with S and D prefixes only + */ + +int +f2c_ssymv(char* uplo, integer* N, + real* alpha, + real* A, integer* lda, + real* X, integer* incX, + real* beta, + real* Y, integer* incY) +{ + _starpu_ssymv_(uplo, N, alpha, A, lda, + X, incX, beta, Y, incY); + return 0; +} + +int +f2c_ssbmv(char* uplo, integer* N, integer* K, + real* alpha, + real* A, integer* lda, + real* X, integer* incX, + real* beta, + real* Y, integer* incY) +{ + _starpu_ssbmv_(uplo, N, K, alpha, A, lda, + X, incX, beta, Y, incY); + return 0; +} + +int +f2c_sspmv(char* uplo, integer* N, + real* alpha, + real* Ap, + real* X, integer* incX, + real* beta, + real* Y, integer* incY) +{ + _starpu_sspmv_(uplo, N, alpha, Ap, + X, incX, beta, Y, incY); + return 0; +} + +int +f2c_sger(integer* M, integer* N, + real* alpha, + real* X, integer* incX, + real* Y, integer* incY, + real* A, integer* lda) +{ + _starpu_sger_(M, N, alpha, + X, incX, Y, incY, A, lda); + return 0; +} + +int +f2c_ssyr(char* uplo, integer* N, + real* alpha, + real* X, integer* incX, + real* A, integer* lda) +{ + _starpu_ssyr_(uplo, N, alpha, X, incX, A, lda); + return 0; +} + +int +f2c_sspr(char* uplo, integer* N, + real* alpha, + real* X, integer* incX, + real* Ap) +{ + _starpu_sspr_(uplo, N, alpha, X, incX, Ap); + return 0; +} + +int +f2c_ssyr2(char* uplo, integer* N, + real* alpha, + real* X, integer* incX, + real* Y, integer* incY, + real* A, integer* lda) +{ + _starpu_ssyr2_(uplo, N, alpha, + X, incX, Y, incY, A, lda); + return 0; +} + +int +f2c_sspr2(char* uplo, integer* N, + real* alpha, + real* X, integer* incX, + real* Y, integer* incY, + real* A) +{ + _starpu_sspr2_(uplo, N, alpha, + X, incX, Y, incY, A); + return 0; +} + + + +int +f2c_dsymv(char* uplo, integer* N, + doublereal* alpha, + doublereal* A, integer* lda, + doublereal* X, integer* incX, + doublereal* beta, + doublereal* Y, integer* incY) +{ + _starpu_dsymv_(uplo, N, alpha, A, lda, + X, incX, beta, Y, incY); + return 0; +} + +int +f2c_dsbmv(char* uplo, integer* N, integer* K, + doublereal* alpha, + doublereal* A, integer* lda, + doublereal* X, integer* incX, + doublereal* beta, + doublereal* Y, integer* incY) +{ + _starpu_dsbmv_(uplo, N, K, alpha, A, lda, + X, incX, beta, Y, incY); + return 0; +} + +int +f2c_dspmv(char* uplo, integer* N, + doublereal* alpha, + doublereal* Ap, + doublereal* X, integer* incX, + doublereal* beta, + doublereal* Y, integer* incY) +{ + _starpu_dspmv_(uplo, N, alpha, Ap, + X, incX, beta, Y, incY); + return 0; +} + +int +f2c_dger(integer* M, integer* N, + doublereal* alpha, + doublereal* X, integer* incX, + doublereal* Y, integer* incY, + doublereal* A, integer* lda) +{ + _starpu_dger_(M, N, alpha, + X, incX, Y, incY, A, lda); + return 0; +} + +int +f2c_dsyr(char* uplo, integer* N, + doublereal* alpha, + doublereal* X, integer* incX, + doublereal* A, integer* lda) +{ + _starpu_dsyr_(uplo, N, alpha, X, incX, A, lda); + return 0; +} + +int +f2c_dspr(char* uplo, integer* N, + doublereal* alpha, + doublereal* X, integer* incX, + doublereal* Ap) +{ + _starpu_dspr_(uplo, N, alpha, X, incX, Ap); + return 0; +} + +int +f2c_dsyr2(char* uplo, integer* N, + doublereal* alpha, + doublereal* X, integer* incX, + doublereal* Y, integer* incY, + doublereal* A, integer* lda) +{ + _starpu_dsyr2_(uplo, N, alpha, + X, incX, Y, incY, A, lda); + return 0; +} + +int +f2c_dspr2(char* uplo, integer* N, + doublereal* alpha, + doublereal* X, integer* incX, + doublereal* Y, integer* incY, + doublereal* A) +{ + _starpu_dspr2_(uplo, N, alpha, + X, incX, Y, incY, A); + return 0; +} + + + +/* + * Routines with C and Z prefixes only + */ + +int +f2c_chemv(char* uplo, integer* N, + complex* alpha, + complex* A, integer* lda, + complex* X, integer* incX, + complex* beta, + complex* Y, integer* incY) +{ + _starpu_chemv_(uplo, N, alpha, A, lda, + X, incX, beta, Y, incY); + return 0; +} + +int +f2c_chbmv(char* uplo, integer* N, integer* K, + complex* alpha, + complex* A, integer* lda, + complex* X, integer* incX, + complex* beta, + complex* Y, integer* incY) +{ + _starpu_chbmv_(uplo, N, K, alpha, A, lda, + X, incX, beta, Y, incY); + return 0; +} + +int +f2c_chpmv(char* uplo, integer* N, + complex* alpha, + complex* Ap, + complex* X, integer* incX, + complex* beta, + complex* Y, integer* incY) +{ + _starpu_chpmv_(uplo, N, alpha, Ap, + X, incX, beta, Y, incY); + return 0; +} + +int +f2c_cgeru(integer* M, integer* N, + complex* alpha, + complex* X, integer* incX, + complex* Y, integer* incY, + complex* A, integer* lda) +{ + _starpu_cgeru_(M, N, alpha, + X, incX, Y, incY, A, lda); + return 0; +} + +int +f2c_cgerc(integer* M, integer* N, + complex* alpha, + complex* X, integer* incX, + complex* Y, integer* incY, + complex* A, integer* lda) +{ + _starpu_cgerc_(M, N, alpha, + X, incX, Y, incY, A, lda); + return 0; +} + +int +f2c_cher(char* uplo, integer* N, + real* alpha, + complex* X, integer* incX, + complex* A, integer* lda) +{ + _starpu_cher_(uplo, N, alpha, + X, incX, A, lda); + return 0; +} + +int +f2c_chpr(char* uplo, integer* N, + real* alpha, + complex* X, integer* incX, + complex* Ap) +{ + _starpu_chpr_(uplo, N, alpha, + X, incX, Ap); + return 0; +} + +int +f2c_cher2(char* uplo, integer* N, + complex* alpha, + complex* X, integer* incX, + complex* Y, integer* incY, + complex* A, integer* lda) +{ + _starpu_cher2_(uplo, N, alpha, + X, incX, Y, incY, A, lda); + return 0; +} + +int +f2c_chpr2(char* uplo, integer* N, + complex* alpha, + complex* X, integer* incX, + complex* Y, integer* incY, + complex* Ap) +{ + _starpu_chpr2_(uplo, N, alpha, + X, incX, Y, incY, Ap); + return 0; +} + + + +int +f2c_zhemv(char* uplo, integer* N, + doublecomplex* alpha, + doublecomplex* A, integer* lda, + doublecomplex* X, integer* incX, + doublecomplex* beta, + doublecomplex* Y, integer* incY) +{ + _starpu_zhemv_(uplo, N, alpha, A, lda, + X, incX, beta, Y, incY); + return 0; +} + +int +f2c_zhbmv(char* uplo, integer* N, integer* K, + doublecomplex* alpha, + doublecomplex* A, integer* lda, + doublecomplex* X, integer* incX, + doublecomplex* beta, + doublecomplex* Y, integer* incY) +{ + _starpu_zhbmv_(uplo, N, K, alpha, A, lda, + X, incX, beta, Y, incY); + return 0; +} + +int +f2c_zhpmv(char* uplo, integer* N, + doublecomplex* alpha, + doublecomplex* Ap, + doublecomplex* X, integer* incX, + doublecomplex* beta, + doublecomplex* Y, integer* incY) +{ + _starpu_zhpmv_(uplo, N, alpha, Ap, + X, incX, beta, Y, incY); + return 0; +} + +int +f2c_zgeru(integer* M, integer* N, + doublecomplex* alpha, + doublecomplex* X, integer* incX, + doublecomplex* Y, integer* incY, + doublecomplex* A, integer* lda) +{ + _starpu_zgeru_(M, N, alpha, + X, incX, Y, incY, A, lda); + return 0; +} + +int +f2c_zgerc(integer* M, integer* N, + doublecomplex* alpha, + doublecomplex* X, integer* incX, + doublecomplex* Y, integer* incY, + doublecomplex* A, integer* lda) +{ + _starpu_zgerc_(M, N, alpha, + X, incX, Y, incY, A, lda); + return 0; +} + +int +f2c_zher(char* uplo, integer* N, + doublereal* alpha, + doublecomplex* X, integer* incX, + doublecomplex* A, integer* lda) +{ + _starpu_zher_(uplo, N, alpha, + X, incX, A, lda); + return 0; +} + +int +f2c_zhpr(char* uplo, integer* N, + doublereal* alpha, + doublecomplex* X, integer* incX, + doublecomplex* Ap) +{ + _starpu_zhpr_(uplo, N, alpha, + X, incX, Ap); + return 0; +} + +int +f2c_zher2(char* uplo, integer* N, + doublecomplex* alpha, + doublecomplex* X, integer* incX, + doublecomplex* Y, integer* incY, + doublecomplex* A, integer* lda) +{ + _starpu_zher2_(uplo, N, alpha, + X, incX, Y, incY, A, lda); + return 0; +} + +int +f2c_zhpr2(char* uplo, integer* N, + doublecomplex* alpha, + doublecomplex* X, integer* incX, + doublecomplex* Y, integer* incY, + doublecomplex* Ap) +{ + _starpu_zhpr2_(uplo, N, alpha, + X, incX, Y, incY, Ap); + return 0; +} + + + +/* + * =========================================================================== + * Prototypes for level 3 BLAS + * =========================================================================== + */ + +/* + * Routines with standard 4 prefixes (S, D, C, Z) + */ + +int +f2c_sgemm(char* transA, char* transB, integer* M, integer* N, integer* K, + real* alpha, + real* A, integer* lda, + real* B, integer* ldb, + real* beta, + real* C, integer* ldc) +{ + _starpu_sgemm_(transA, transB, M, N, K, + alpha, A, lda, B, ldb, beta, C, ldc); + return 0; +} + +int +f2c_ssymm(char* side, char* uplo, integer* M, integer* N, + real* alpha, + real* A, integer* lda, + real* B, integer* ldb, + real* beta, + real* C, integer* ldc) +{ + _starpu_ssymm_(side, uplo, M, N, + alpha, A, lda, B, ldb, beta, C, ldc); + return 0; +} + +int +f2c_ssyrk(char* uplo, char* trans, integer* N, integer* K, + real* alpha, + real* A, integer* lda, + real* beta, + real* C, integer* ldc) +{ + _starpu_ssyrk_(uplo, trans, N, K, + alpha, A, lda, beta, C, ldc); + return 0; +} + +int +f2c_ssyr2k(char* uplo, char* trans, integer* N, integer* K, + real* alpha, + real* A, integer* lda, + real* B, integer* ldb, + real* beta, + real* C, integer* ldc) +{ + _starpu_ssyr2k_(uplo, trans, N, K, + alpha, A, lda, B, ldb, beta, C, ldc); + return 0; +} + +int +f2c_strmm(char* side, char* uplo, char* trans, char* diag, + integer* M, integer* N, + real* alpha, + real* A, integer* lda, + real* B, integer* ldb) +{ + _starpu_strmm_(side, uplo, + trans, diag, + M, N, alpha, A, lda, B, ldb); + return 0; +} + +int +f2c_strsm(char* side, char* uplo, char* trans, char* diag, + integer* M, integer* N, + real* alpha, + real* A, integer* lda, + real* B, integer* ldb) +{ + _starpu_strsm_(side, uplo, + trans, diag, + M, N, alpha, A, lda, B, ldb); + return 0; +} + + + +int +f2c_dgemm(char* transA, char* transB, integer* M, integer* N, integer* K, + doublereal* alpha, + doublereal* A, integer* lda, + doublereal* B, integer* ldb, + doublereal* beta, + doublereal* C, integer* ldc) +{ + _starpu_dgemm_(transA, transB, M, N, K, + alpha, A, lda, B, ldb, beta, C, ldc); + return 0; +} + +int +f2c_dsymm(char* side, char* uplo, integer* M, integer* N, + doublereal* alpha, + doublereal* A, integer* lda, + doublereal* B, integer* ldb, + doublereal* beta, + doublereal* C, integer* ldc) +{ + _starpu_dsymm_(side, uplo, M, N, + alpha, A, lda, B, ldb, beta, C, ldc); + return 0; +} + +int +f2c_dsyrk(char* uplo, char* trans, integer* N, integer* K, + doublereal* alpha, + doublereal* A, integer* lda, + doublereal* beta, + doublereal* C, integer* ldc) +{ + _starpu_dsyrk_(uplo, trans, N, K, + alpha, A, lda, beta, C, ldc); + return 0; +} + +int +f2c_dsyr2k(char* uplo, char* trans, integer* N, integer* K, + doublereal* alpha, + doublereal* A, integer* lda, + doublereal* B, integer* ldb, + doublereal* beta, + doublereal* C, integer* ldc) +{ + _starpu_dsyr2k_(uplo, trans, N, K, + alpha, A, lda, B, ldb, beta, C, ldc); + return 0; +} + +int +f2c_dtrmm(char* side, char* uplo, char* trans, char* diag, + integer* M, integer* N, + doublereal* alpha, + doublereal* A, integer* lda, + doublereal* B, integer* ldb) +{ + _starpu_dtrmm_(side, uplo, trans, diag, + M, N, alpha, A, lda, B, ldb); + return 0; +} + +int +f2c_dtrsm(char* side, char* uplo, char* trans, char* diag, + integer* M, integer* N, + doublereal* alpha, + doublereal* A, integer* lda, + doublereal* B, integer* ldb) +{ + _starpu_dtrsm_(side, uplo, trans, diag, + M, N, alpha, A, lda, B, ldb); + return 0; +} + + + +int +f2c_cgemm(char* transA, char* transB, integer* M, integer* N, integer* K, + complex* alpha, + complex* A, integer* lda, + complex* B, integer* ldb, + complex* beta, + complex* C, integer* ldc) +{ + _starpu_cgemm_(transA, transB, M, N, K, + alpha, A, lda, B, ldb, beta, C, ldc); + return 0; +} + +int +f2c_csymm(char* side, char* uplo, integer* M, integer* N, + complex* alpha, + complex* A, integer* lda, + complex* B, integer* ldb, + complex* beta, + complex* C, integer* ldc) +{ + _starpu_csymm_(side, uplo, M, N, + alpha, A, lda, B, ldb, beta, C, ldc); + return 0; +} + +int +f2c_csyrk(char* uplo, char* trans, integer* N, integer* K, + complex* alpha, + complex* A, integer* lda, + complex* beta, + complex* C, integer* ldc) +{ + _starpu_csyrk_(uplo, trans, N, K, + alpha, A, lda, beta, C, ldc); + return 0; +} + +int +f2c_csyr2k(char* uplo, char* trans, integer* N, integer* K, + complex* alpha, + complex* A, integer* lda, + complex* B, integer* ldb, + complex* beta, + complex* C, integer* ldc) +{ + _starpu_csyr2k_(uplo, trans, N, K, + alpha, A, lda, B, ldb, beta, C, ldc); + return 0; +} + +int +f2c_ctrmm(char* side, char* uplo, char* trans, char* diag, + integer* M, integer* N, + complex* alpha, + complex* A, integer* lda, + complex* B, integer* ldb) +{ + _starpu_ctrmm_(side, uplo, trans, diag, + M, N, alpha, A, lda, B, ldb); + return 0; +} + +int +f2c_ctrsm(char* side, char* uplo, char* trans, char* diag, + integer* M, integer* N, + complex* alpha, + complex* A, integer* lda, + complex* B, integer* ldb) +{ + _starpu_ctrsm_(side, uplo, trans, diag, + M, N, alpha, A, lda, B, ldb); + return 0; +} + + + +int +f2c_zgemm(char* transA, char* transB, integer* M, integer* N, integer* K, + doublecomplex* alpha, + doublecomplex* A, integer* lda, + doublecomplex* B, integer* ldb, + doublecomplex* beta, + doublecomplex* C, integer* ldc) +{ + _starpu_zgemm_(transA, transB, M, N, K, + alpha, A, lda, B, ldb, beta, C, ldc); + return 0; +} + +int +f2c_zsymm(char* side, char* uplo, integer* M, integer* N, + doublecomplex* alpha, + doublecomplex* A, integer* lda, + doublecomplex* B, integer* ldb, + doublecomplex* beta, + doublecomplex* C, integer* ldc) +{ + _starpu_zsymm_(side, uplo, M, N, + alpha, A, lda, B, ldb, beta, C, ldc); + return 0; +} + +int +f2c_zsyrk(char* uplo, char* trans, integer* N, integer* K, + doublecomplex* alpha, + doublecomplex* A, integer* lda, + doublecomplex* beta, + doublecomplex* C, integer* ldc) +{ + _starpu_zsyrk_(uplo, trans, N, K, + alpha, A, lda, beta, C, ldc); + return 0; +} + +int +f2c_zsyr2k(char* uplo, char* trans, integer* N, integer* K, + doublecomplex* alpha, + doublecomplex* A, integer* lda, + doublecomplex* B, integer* ldb, + doublecomplex* beta, + doublecomplex* C, integer* ldc) +{ + _starpu_zsyr2k_(uplo, trans, N, K, + alpha, A, lda, B, ldb, beta, C, ldc); + return 0; +} + +int +f2c_ztrmm(char* side, char* uplo, char* trans, char* diag, + integer* M, integer* N, + doublecomplex* alpha, + doublecomplex* A, integer* lda, + doublecomplex* B, integer* ldb) +{ + _starpu_ztrmm_(side, uplo, trans, diag, + M, N, alpha, A, lda, B, ldb); + return 0; +} + +int +f2c_ztrsm(char* side, char* uplo, char* trans, char* diag, + integer* M, integer* N, + doublecomplex* alpha, + doublecomplex* A, integer* lda, + doublecomplex* B, integer* ldb) +{ + _starpu_ztrsm_(side, uplo, trans, diag, + M, N, alpha, A, lda, B, ldb); + return 0; +} + + + +/* + * Routines with prefixes C and Z only + */ + +int +f2c_chemm(char* side, char* uplo, integer* M, integer* N, + complex* alpha, + complex* A, integer* lda, + complex* B, integer* ldb, + complex* beta, + complex* C, integer* ldc) +{ + _starpu_chemm_(side, uplo, M, N, + alpha, A, lda, B, ldb, beta, C, ldc); + return 0; +} + +int +f2c_cherk(char* uplo, char* trans, integer* N, integer* K, + real* alpha, + complex* A, integer* lda, + real* beta, + complex* C, integer* ldc) +{ + _starpu_cherk_(uplo, trans, N, K, + alpha, A, lda, beta, C, ldc); + return 0; +} + +int +f2c_cher2k(char* uplo, char* trans, integer* N, integer* K, + complex* alpha, + complex* A, integer* lda, + complex* B, integer* ldb, + real* beta, + complex* C, integer* ldc) +{ + _starpu_cher2k_(uplo, trans, N, K, + alpha, A, lda, B, ldb, beta, C, ldc); + return 0; +} + + + +int +f2c_zhemm(char* side, char* uplo, integer* M, integer* N, + doublecomplex* alpha, + doublecomplex* A, integer* lda, + doublecomplex* B, integer* ldb, + doublecomplex* beta, + doublecomplex* C, integer* ldc) +{ + _starpu_zhemm_(side, uplo, M, N, + alpha, A, lda, B, ldb, beta, C, ldc); + return 0; +} + +int +f2c_zherk(char* uplo, char* trans, integer* N, integer* K, + doublereal* alpha, + doublecomplex* A, integer* lda, + doublereal* beta, + doublecomplex* C, integer* ldc) +{ + _starpu_zherk_(uplo, trans, N, K, + alpha, A, lda, beta, C, ldc); + return 0; +} + +int +f2c_zher2k(char* uplo, char* trans, integer* N, integer* K, + doublecomplex* alpha, + doublecomplex* A, integer* lda, + doublecomplex* B, integer* ldb, + doublereal* beta, + doublecomplex* C, integer* ldc) +{ + _starpu_zher2k_(uplo, trans, N, K, + alpha, A, lda, B, ldb, beta, C, ldc); + return 0; +} + diff --git a/min-dgels/base/BLAS/WRAP/fblaswr.h b/min-dgels/base/BLAS/WRAP/fblaswr.h new file mode 100644 index 0000000..f4df4e5 --- /dev/null +++ b/min-dgels/base/BLAS/WRAP/fblaswr.h @@ -0,0 +1,851 @@ +real +_starpu_sdot_(integer* N, + real* X, integer* incX, + real* Y, integer* incY); + +doublereal +_starpu_ddot_(integer* N, + doublereal* X, integer* incX, + doublereal* Y, integer* incY); + +void +_starpu_cdotu_(complex* retval, + integer* N, + complex* X, integer* incX, + complex* Y, integer* incY); + +void +_starpu_cdotc_(complex* retval, + integer* N, + complex* X, integer* incX, + complex* Y, integer* incY); + +void +_starpu_zdotu_(doublecomplex* retval, + integer* N, + doublecomplex* X, integer* incX, + doublecomplex* Y, integer* incY); + +void +_starpu_zdotc_(doublecomplex* retval, + integer* N, + doublecomplex* X, integer* incX, + doublecomplex* Y, integer* incY); + +real +_starpu_snrm2_(integer* N, + real* X, integer* incX); + +real +_starpu_sasum_(integer* N, + real* X, integer* incX); + +doublereal +_starpu_dnrm2_(integer* N, + doublereal* X, integer* incX); + +doublereal +_starpu_dasum_(integer* N, + doublereal* X, integer* incX); + +real +_starpu_scnrm2_(integer* N, + complex* X, integer* incX); + +real +_starpu_scasum_(integer* N, + complex* X, integer* incX); + +doublereal +_starpu_dznrm2_(integer* N, + doublecomplex* X, integer* incX); + +doublereal +_starpu_dzasum_(integer* N, + doublecomplex* X, integer* incX); + +integer +_starpu_isamax_(integer* N, + real* X, integer* incX); + +integer +_starpu_idamax_(integer* N, + doublereal* X, integer* incX); + +integer +_starpu_icamax_(integer* N, + complex* X, integer* incX); + +integer +_starpu_izamax_(integer* N, + doublecomplex* X, integer* incX); + +int +_starpu_sswap_(integer* N, + real* X, integer* incX, + real* Y, integer* incY); + +int +_starpu_scopy_(integer* N, + real* X, integer* incX, + real* Y, integer* incY); + +int +_starpu_saxpy_(integer* N, + real* alpha, + real* X, integer* incX, + real* Y, integer* incY); + +int +_starpu_dswap_(integer* N, + doublereal* X, integer* incX, + doublereal* Y, integer* incY); + +int +_starpu_dcopy_(integer* N, + doublereal* X, integer* incX, + doublereal* Y, integer* incY); + +int +_starpu_daxpy_(integer* N, + doublereal* alpha, + doublereal* X, integer* incX, + doublereal* Y, integer* incY); + +int +_starpu_cswap_(integer* N, + complex* X, integer* incX, + complex* Y, integer* incY); + +int +_starpu_ccopy_(integer* N, + complex* X, integer* incX, + complex* Y, integer* incY); + +int +_starpu_caxpy_(integer* N, + complex* alpha, + complex* X, integer* incX, + complex* Y, integer* incY); + +int +_starpu_zswap_(integer* N, + doublecomplex* X, integer* incX, + doublecomplex* Y, integer* incY); + +int +_starpu_zcopy_(integer* N, + doublecomplex* X, integer* incX, + doublecomplex* Y, integer* incY); + +int +_starpu_zaxpy_(integer* N, + doublecomplex* alpha, + doublecomplex* X, integer* incX, + doublecomplex* Y, integer* incY); + +int +_starpu_srotg_(real* a, real* b, real* c, real* s); + +int +_starpu_srot_(integer* N, + real* X, integer* incX, + real* Y, integer* incY, + real* c, real* s); + +int +_starpu_crotg_(complex* a, complex* b, complex* c, complex* s); + +int +_starpu_drotg_(doublereal* a, doublereal* b, doublereal* c, doublereal* s); + +int +_starpu_drot_(integer* N, + doublereal* X, integer* incX, + doublereal* Y, integer* incY, + doublereal* c, doublereal* s); + +int +_starpu_zrotg_(doublecomplex* a, doublecomplex* b, doublecomplex* c, doublecomplex* s); + +int +_starpu_sscal_(integer* N, + real* alpha, + real* X, integer* incX); + +int +_starpu_dscal_(integer* N, + doublereal* alpha, + doublereal* X, integer* incX); + +int +_starpu_cscal_(integer* N, + complex* alpha, + complex* X, integer* incX); + +int +_starpu_zscal_(integer* N, + doublecomplex* alpha, + doublecomplex* X, integer* incX); + +int +_starpu_csscal_(integer* N, + real* alpha, + complex* X, integer* incX); + +int +_starpu_zdscal_(integer* N, + doublereal* alpha, + doublecomplex* X, integer* incX); + +int +_starpu_sgemv_(char* trans, integer* M, integer* N, + real* alpha, + real* A, integer* lda, + real* X, integer* incX, + real* beta, + real* Y, integer* incY); + +int +_starpu_sgbmv_(char *trans, integer *M, integer *N, integer *KL, integer *KU, + real *alpha, + real *A, integer *lda, + real *X, integer *incX, + real *beta, + real *Y, integer *incY); + +int +_starpu_strmv_(char* uplo, char *trans, char* diag, integer *N, + real *A, integer *lda, + real *X, integer *incX); + +int +_starpu_stbmv_(char* uplo, char* trans, char* diag, integer* N, integer* K, + real* A, integer* lda, + real* X, integer* incX); + +int +_starpu_stpmv_(char* uplo, char* trans, char* diag, integer* N, + real* Ap, + real* X, integer* incX); + +int +_starpu_strsv_(char* uplo, char* trans, char* diag, integer* N, + real* A, integer* lda, + real* X, integer* incX); + +int +_starpu_stbsv_(char* uplo, char* trans, char* diag, integer* N, integer* K, + real* A, integer* lda, + real* X, integer* incX); + +int +_starpu_stpsv_(char* uplo, char* trans, char* diag, integer* N, + real* Ap, + real* X, integer* incX); + +int +_starpu_dgemv_(char* trans, integer* M, integer* N, + doublereal* alpha, + doublereal* A, integer* lda, + doublereal* X, integer* incX, + doublereal* beta, + doublereal* Y, integer* incY); + +int +_starpu_dgbmv_(char *trans, integer *M, integer *N, integer *KL, integer *KU, + doublereal *alpha, + doublereal *A, integer *lda, + doublereal *X, integer *incX, + doublereal *beta, + doublereal *Y, integer *incY); + +int +_starpu_dtrmv_(char* uplo, char *trans, char* diag, integer *N, + doublereal *A, integer *lda, + doublereal *X, integer *incX); + +int +_starpu_dtbmv_(char* uplo, char* trans, char* diag, integer* N, integer* K, + doublereal* A, integer* lda, + doublereal* X, integer* incX); + +int +_starpu_dtpmv_(char* uplo, char* trans, char* diag, integer* N, + doublereal* Ap, + doublereal* X, integer* incX); + +int +_starpu_dtrsv_(char* uplo, char* trans, char* diag, integer* N, + doublereal* A, integer* lda, + doublereal* X, integer* incX); + +int +_starpu_dtbsv_(char* uplo, char* trans, char* diag, integer* N, integer* K, + doublereal* A, integer* lda, + doublereal* X, integer* incX); + +int +_starpu_dtpsv_(char* uplo, char* trans, char* diag, integer* N, + doublereal* Ap, + doublereal* X, integer* incX); + +int +_starpu_cgemv_(char* trans, integer* M, integer* N, + complex* alpha, + complex* A, integer* lda, + complex* X, integer* incX, + complex* beta, + complex* Y, integer* incY); + +int +_starpu_cgbmv_(char *trans, integer *M, integer *N, integer *KL, integer *KU, + complex *alpha, + complex *A, integer *lda, + complex *X, integer *incX, + complex *beta, + complex *Y, integer *incY); + +int +_starpu_ctrmv_(char* uplo, char *trans, char* diag, integer *N, + complex *A, integer *lda, + complex *X, integer *incX); + +int +_starpu_ctbmv_(char* uplo, char* trans, char* diag, integer* N, integer* K, + complex* A, integer* lda, + complex* X, integer* incX); + +int +_starpu_ctpmv_(char* uplo, char* trans, char* diag, integer* N, + complex* Ap, + complex* X, integer* incX); + +int +_starpu_ctrsv_(char* uplo, char* trans, char* diag, integer* N, + complex* A, integer* lda, + complex* X, integer* incX); + +int +_starpu_ctbsv_(char* uplo, char* trans, char* diag, integer* N, integer* K, + complex* A, integer* lda, + complex* X, integer* incX); + +int +_starpu_ctpsv_(char* uplo, char* trans, char* diag, integer* N, + complex* Ap, + complex* X, integer* incX); + +int +_starpu_zgemv_(char* trans, integer* M, integer* N, + doublecomplex* alpha, + doublecomplex* A, integer* lda, + doublecomplex* X, integer* incX, + doublecomplex* beta, + doublecomplex* Y, integer* incY); + +int +_starpu_zgbmv_(char *trans, integer *M, integer *N, integer *KL, integer *KU, + doublecomplex *alpha, + doublecomplex *A, integer *lda, + doublecomplex *X, integer *incX, + doublecomplex *beta, + doublecomplex *Y, integer *incY); + +int +_starpu_ztrmv_(char* uplo, char *trans, char* diag, integer *N, + doublecomplex *A, integer *lda, + doublecomplex *X, integer *incX); + +int +_starpu_ztbmv_(char* uplo, char* trans, char* diag, integer* N, integer* K, + doublecomplex* A, integer* lda, + doublecomplex* X, integer* incX); + + void +_starpu_ztpmv_(char* uplo, char* trans, char* diag, integer* N, + doublecomplex* Ap, + doublecomplex* X, integer* incX); + +int +_starpu_ztrsv_(char* uplo, char* trans, char* diag, integer* N, + doublecomplex* A, integer* lda, + doublecomplex* X, integer* incX); + +int +_starpu_ztbsv_(char* uplo, char* trans, char* diag, integer* N, integer* K, + doublecomplex* A, integer* lda, + doublecomplex* X, integer* incX); + +int +_starpu_ztpsv_(char* uplo, char* trans, char* diag, integer* N, + doublecomplex* Ap, + doublecomplex* X, integer* incX); + +int +_starpu_ssymv_(char* uplo, integer* N, + real* alpha, + real* A, integer* lda, + real* X, integer* incX, + real* beta, + real* Y, integer* incY); + +int +_starpu_ssbmv_(char* uplo, integer* N, integer* K, + real* alpha, + real* A, integer* lda, + real* X, integer* incX, + real* beta, + real* Y, integer* incY); + +int +_starpu_sspmv_(char* uplo, integer* N, + real* alpha, + real* Ap, + real* X, integer* incX, + real* beta, + real* Y, integer* incY); + +int +_starpu_sger_(integer* M, integer* N, + real* alpha, + real* X, integer* incX, + real* Y, integer* incY, + real* A, integer* lda); + +int +_starpu_ssyr_(char* uplo, integer* N, + real* alpha, + real* X, integer* incX, + real* A, integer* lda); + +int +_starpu_sspr_(char* uplo, integer* N, + real* alpha, + real* X, integer* incX, + real* Ap); + +int +_starpu_ssyr2_(char* uplo, integer* N, + real* alpha, + real* X, integer* incX, + real* Y, integer* incY, + real* A, integer* lda); + +int +_starpu_sspr2_(char* uplo, integer* N, + real* alpha, + real* X, integer* incX, + real* Y, integer* incY, + real* A); + +int +_starpu_dsymv_(char* uplo, integer* N, + doublereal* alpha, + doublereal* A, integer* lda, + doublereal* X, integer* incX, + doublereal* beta, + doublereal* Y, integer* incY); + +int +_starpu_dsbmv_(char* uplo, integer* N, integer* K, + doublereal* alpha, + doublereal* A, integer* lda, + doublereal* X, integer* incX, + doublereal* beta, + doublereal* Y, integer* incY); + +int +_starpu_dspmv_(char* uplo, integer* N, + doublereal* alpha, + doublereal* Ap, + doublereal* X, integer* incX, + doublereal* beta, + doublereal* Y, integer* incY); + +int +_starpu_dger_(integer* M, integer* N, + doublereal* alpha, + doublereal* X, integer* incX, + doublereal* Y, integer* incY, + doublereal* A, integer* lda); + +int +_starpu_dsyr_(char* uplo, integer* N, + doublereal* alpha, + doublereal* X, integer* incX, + doublereal* A, integer* lda); + +int +_starpu_dspr_(char* uplo, integer* N, + doublereal* alpha, + doublereal* X, integer* incX, + doublereal* Ap); + +int +_starpu_dsyr2_(char* uplo, integer* N, + doublereal* alpha, + doublereal* X, integer* incX, + doublereal* Y, integer* incY, + doublereal* A, integer* lda); + +int +_starpu_dspr2_(char* uplo, integer* N, + doublereal* alpha, + doublereal* X, integer* incX, + doublereal* Y, integer* incY, + doublereal* A); + +int +_starpu_chemv_(char* uplo, integer* N, + complex* alpha, + complex* A, integer* lda, + complex* X, integer* incX, + complex* beta, + complex* Y, integer* incY); + +int +_starpu_chbmv_(char* uplo, integer* N, integer* K, + complex* alpha, + complex* A, integer* lda, + complex* X, integer* incX, + complex* beta, + complex* Y, integer* incY); + +int +_starpu_chpmv_(char* uplo, integer* N, + complex* alpha, + complex* Ap, + complex* X, integer* incX, + complex* beta, + complex* Y, integer* incY); + +int +_starpu_cgeru_(integer* M, integer* N, + complex* alpha, + complex* X, integer* incX, + complex* Y, integer* incY, + complex* A, integer* lda); + +int +_starpu_cgerc_(integer* M, integer* N, + complex* alpha, + complex* X, integer* incX, + complex* Y, integer* incY, + complex* A, integer* lda); + +int +_starpu_cher_(char* uplo, integer* N, + real* alpha, + complex* X, integer* incX, + complex* A, integer* lda); + +int +_starpu_chpr_(char* uplo, integer* N, + real* alpha, + complex* X, integer* incX, + complex* Ap); + +int +_starpu_cher2_(char* uplo, integer* N, + complex* alpha, + complex* X, integer* incX, + complex* Y, integer* incY, + complex* A, integer* lda); + +int +_starpu_chpr2_(char* uplo, integer* N, + complex* alpha, + complex* X, integer* incX, + complex* Y, integer* incY, + complex* Ap); + +int +_starpu_zhemv_(char* uplo, integer* N, + doublecomplex* alpha, + doublecomplex* A, integer* lda, + doublecomplex* X, integer* incX, + doublecomplex* beta, + doublecomplex* Y, integer* incY); + +int +_starpu_zhbmv_(char* uplo, integer* N, integer* K, + doublecomplex* alpha, + doublecomplex* A, integer* lda, + doublecomplex* X, integer* incX, + doublecomplex* beta, + doublecomplex* Y, integer* incY); + +int +_starpu_zhpmv_(char* uplo, integer* N, + doublecomplex* alpha, + doublecomplex* Ap, + doublecomplex* X, integer* incX, + doublecomplex* beta, + doublecomplex* Y, integer* incY); + +int +_starpu_zgeru_(integer* M, integer* N, + doublecomplex* alpha, + doublecomplex* X, integer* incX, + doublecomplex* Y, integer* incY, + doublecomplex* A, integer* lda); + +int +_starpu_zgerc_(integer* M, integer* N, + doublecomplex* alpha, + doublecomplex* X, integer* incX, + doublecomplex* Y, integer* incY, + doublecomplex* A, integer* lda); + +int +_starpu_zher_(char* uplo, integer* N, + doublereal* alpha, + doublecomplex* X, integer* incX, + doublecomplex* A, integer* lda); + +int +_starpu_zhpr_(char* uplo, integer* N, + doublereal* alpha, + doublecomplex* X, integer* incX, + doublecomplex* Ap); + +int +_starpu_zher2_(char* uplo, integer* N, + doublecomplex* alpha, + doublecomplex* X, integer* incX, + doublecomplex* Y, integer* incY, + doublecomplex* A, integer* lda); + +int +_starpu_zhpr2_(char* uplo, integer* N, + doublecomplex* alpha, + doublecomplex* X, integer* incX, + doublecomplex* Y, integer* incY, + doublecomplex* Ap); + +int +_starpu_sgemm_(char* transA, char* transB, integer* M, integer* N, integer* K, + real* alpha, + real* A, integer* lda, + real* B, integer* ldb, + real* beta, + real* C, integer* ldc); + +int +_starpu_ssymm_(char* side, char* uplo, integer* M, integer* N, + real* alpha, + real* A, integer* lda, + real* B, integer* ldb, + real* beta, + real* C, integer* ldc); + +int +_starpu_ssyrk_(char* uplo, char* trans, integer* N, integer* K, + real* alpha, + real* A, integer* lda, + real* beta, + real* C, integer* ldc); + +int +_starpu_ssyr2k_(char* uplo, char* trans, integer* N, integer* K, + real* alpha, + real* A, integer* lda, + real* B, integer* ldb, + real* beta, + real* C, integer* ldc); + +int +_starpu_strmm_(char* side, char* uplo, char* trans, char* diag, + integer* M, integer* N, + real* alpha, + real* A, integer* lda, + real* B, integer* ldb); + +int +_starpu_strsm_(char* side, char* uplo, char* trans, char* diag, + integer* M, integer* N, + real* alpha, + real* A, integer* lda, + real* B, integer* ldb); + +int +_starpu_dgemm_(char* transA, char* transB, integer* M, integer* N, integer* K, + doublereal* alpha, + doublereal* A, integer* lda, + doublereal* B, integer* ldb, + doublereal* beta, + doublereal* C, integer* ldc); + +int +_starpu_dsymm_(char* side, char* uplo, integer* M, integer* N, + doublereal* alpha, + doublereal* A, integer* lda, + doublereal* B, integer* ldb, + doublereal* beta, + doublereal* C, integer* ldc); + +int +_starpu_dsyrk_(char* uplo, char* trans, integer* N, integer* K, + doublereal* alpha, + doublereal* A, integer* lda, + doublereal* beta, + doublereal* C, integer* ldc); + +int +_starpu_dsyr2k_(char* uplo, char* trans, integer* N, integer* K, + doublereal* alpha, + doublereal* A, integer* lda, + doublereal* B, integer* ldb, + doublereal* beta, + doublereal* C, integer* ldc); + +int +_starpu_dtrmm_(char* side, char* uplo, char* trans, char* diag, + integer* M, integer* N, + doublereal* alpha, + doublereal* A, integer* lda, + doublereal* B, integer* ldb); + +int +_starpu_dtrsm_(char* side, char* uplo, char* trans, char* diag, + integer* M, integer* N, + doublereal* alpha, + doublereal* A, integer* lda, + doublereal* B, integer* ldb); + +int +_starpu_cgemm_(char* transA, char* transB, integer* M, integer* N, integer* K, + complex* alpha, + complex* A, integer* lda, + complex* B, integer* ldb, + complex* beta, + complex* C, integer* ldc); + +int +_starpu_csymm_(char* side, char* uplo, integer* M, integer* N, + complex* alpha, + complex* A, integer* lda, + complex* B, integer* ldb, + complex* beta, + complex* C, integer* ldc); + +int +_starpu_csyrk_(char* uplo, char* trans, integer* N, integer* K, + complex* alpha, + complex* A, integer* lda, + complex* beta, + complex* C, integer* ldc); + +int +_starpu_csyr2k_(char* uplo, char* trans, integer* N, integer* K, + complex* alpha, + complex* A, integer* lda, + complex* B, integer* ldb, + complex* beta, + complex* C, integer* ldc); + +int +_starpu_ctrmm_(char* side, char* uplo, char* trans, char* diag, + integer* M, integer* N, + complex* alpha, + complex* A, integer* lda, + complex* B, integer* ldb); + +int +_starpu_ctrsm_(char* side, char* uplo, char* trans, char* diag, + integer* M, integer* N, + complex* alpha, + complex* A, integer* lda, + complex* B, integer* ldb); + +int +_starpu_zgemm_(char* transA, char* transB, integer* M, integer* N, integer* K, + doublecomplex* alpha, + doublecomplex* A, integer* lda, + doublecomplex* B, integer* ldb, + doublecomplex* beta, + doublecomplex* C, integer* ldc); + +int +_starpu_zsymm_(char* side, char* uplo, integer* M, integer* N, + doublecomplex* alpha, + doublecomplex* A, integer* lda, + doublecomplex* B, integer* ldb, + doublecomplex* beta, + doublecomplex* C, integer* ldc); + +int +_starpu_zsyrk_(char* uplo, char* trans, integer* N, integer* K, + doublecomplex* alpha, + doublecomplex* A, integer* lda, + doublecomplex* beta, + doublecomplex* C, integer* ldc); + +int +_starpu_zsyr2k_(char* uplo, char* trans, integer* N, integer* K, + doublecomplex* alpha, + doublecomplex* A, integer* lda, + doublecomplex* B, integer* ldb, + doublecomplex* beta, + doublecomplex* C, integer* ldc); + +int +_starpu_ztrmm_(char* side, char* uplo, char* trans, char* diag, + integer* M, integer* N, + doublecomplex* alpha, + doublecomplex* A, integer* lda, + doublecomplex* B, integer* ldb); + +int +_starpu_ztrsm_(char* side, char* uplo, char* trans, char* diag, + integer* M, integer* N, + doublecomplex* alpha, + doublecomplex* A, integer* lda, + doublecomplex* B, integer* ldb); + +int +_starpu_chemm_(char* side, char* uplo, integer* M, integer* N, + complex* alpha, + complex* A, integer* lda, + complex* B, integer* ldb, + complex* beta, + complex* C, integer* ldc); + +int +_starpu_cherk_(char* uplo, char* trans, integer* N, integer* K, + real* alpha, + complex* A, integer* lda, + real* beta, + complex* C, integer* ldc); + +int +_starpu_cher2k_(char* uplo, char* trans, integer* N, integer* K, + complex* alpha, + complex* A, integer* lda, + complex* B, integer* ldb, + real* beta, + complex* C, integer* ldc); + +int +_starpu_zhemm_(char* side, char* uplo, integer* M, integer* N, + doublecomplex* alpha, + doublecomplex* A, integer* lda, + doublecomplex* B, integer* ldb, + doublecomplex* beta, + doublecomplex* C, integer* ldc); + +int +_starpu_zherk_(char* uplo, char* trans, integer* N, integer* K, + doublereal* alpha, + doublecomplex* A, integer* lda, + doublereal* beta, + doublecomplex* C, integer* ldc); + +int +_starpu_zher2k_(char* uplo, char* trans, integer* N, integer* K, + doublecomplex* alpha, + doublecomplex* A, integer* lda, + doublecomplex* B, integer* ldb, + doublereal* beta, + doublecomplex* C, integer* ldc); diff --git a/min-dgels/base/BLAS/dblat2.in b/min-dgels/base/BLAS/dblat2.in new file mode 100644 index 0000000..d436350 --- /dev/null +++ b/min-dgels/base/BLAS/dblat2.in @@ -0,0 +1,34 @@ +'dblat2.out' NAME OF SUMMARY OUTPUT FILE +6 UNIT NUMBER OF SUMMARY FILE +'DBLAT2.SNAP' NAME OF SNAPSHOT OUTPUT FILE +-1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) +F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. +F LOGICAL FLAG, T TO STOP ON FAILURES. +T LOGICAL FLAG, T TO TEST ERROR EXITS. +16.0 THRESHOLD VALUE OF TEST RATIO +6 NUMBER OF VALUES OF N +0 1 2 3 5 9 VALUES OF N +4 NUMBER OF VALUES OF K +0 1 2 4 VALUES OF K +4 NUMBER OF VALUES OF INCX AND INCY +1 2 -1 -2 VALUES OF INCX AND INCY +3 NUMBER OF VALUES OF ALPHA +0.0 1.0 0.7 VALUES OF ALPHA +3 NUMBER OF VALUES OF BETA +0.0 1.0 0.9 VALUES OF BETA +DGEMV T PUT F FOR NO TEST. SAME COLUMNS. +DGBMV T PUT F FOR NO TEST. SAME COLUMNS. +DSYMV T PUT F FOR NO TEST. SAME COLUMNS. +DSBMV T PUT F FOR NO TEST. SAME COLUMNS. +DSPMV T PUT F FOR NO TEST. SAME COLUMNS. +DTRMV T PUT F FOR NO TEST. SAME COLUMNS. +DTBMV T PUT F FOR NO TEST. SAME COLUMNS. +DTPMV T PUT F FOR NO TEST. SAME COLUMNS. +DTRSV T PUT F FOR NO TEST. SAME COLUMNS. +DTBSV T PUT F FOR NO TEST. SAME COLUMNS. +DTPSV T PUT F FOR NO TEST. SAME COLUMNS. +DGER T PUT F FOR NO TEST. SAME COLUMNS. +DSYR T PUT F FOR NO TEST. SAME COLUMNS. +DSPR T PUT F FOR NO TEST. SAME COLUMNS. +DSYR2 T PUT F FOR NO TEST. SAME COLUMNS. +DSPR2 T PUT F FOR NO TEST. SAME COLUMNS. diff --git a/min-dgels/base/BLAS/dblat3.in b/min-dgels/base/BLAS/dblat3.in new file mode 100644 index 0000000..0098f3e --- /dev/null +++ b/min-dgels/base/BLAS/dblat3.in @@ -0,0 +1,20 @@ +'dblat3.out' NAME OF SUMMARY OUTPUT FILE +6 UNIT NUMBER OF SUMMARY FILE +'DBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE +-1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) +F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. +F LOGICAL FLAG, T TO STOP ON FAILURES. +T LOGICAL FLAG, T TO TEST ERROR EXITS. +16.0 THRESHOLD VALUE OF TEST RATIO +6 NUMBER OF VALUES OF N +0 1 2 3 5 9 VALUES OF N +3 NUMBER OF VALUES OF ALPHA +0.0 1.0 0.7 VALUES OF ALPHA +3 NUMBER OF VALUES OF BETA +0.0 1.0 1.3 VALUES OF BETA +DGEMM T PUT F FOR NO TEST. SAME COLUMNS. +DSYMM T PUT F FOR NO TEST. SAME COLUMNS. +DTRMM T PUT F FOR NO TEST. SAME COLUMNS. +DTRSM T PUT F FOR NO TEST. SAME COLUMNS. +DSYRK T PUT F FOR NO TEST. SAME COLUMNS. +DSYR2K T PUT F FOR NO TEST. SAME COLUMNS. diff --git a/min-dgels/base/COPYING b/min-dgels/base/COPYING new file mode 100644 index 0000000..d7bf953 --- /dev/null +++ b/min-dgels/base/COPYING @@ -0,0 +1,36 @@ +Copyright (c) 1992-2008 The University of Tennessee. All rights reserved. + +$COPYRIGHT$ + +Additional copyrights may follow + +$HEADER$ + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +- Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +- Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer listed + in this license in the documentation and/or other materials + provided with the distribution. + +- Neither the name of the copyright holders nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + diff --git a/min-dgels/base/F2CLIBS/libf2c/Makefile b/min-dgels/base/F2CLIBS/libf2c/Makefile new file mode 100644 index 0000000..dfb9529 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/Makefile @@ -0,0 +1,220 @@ + +TOPDIR=../.. +include $(TOPDIR)/make.inc + +# Unix makefile: see README. +# For C++, first "make hadd". +# If your compiler does not recognize ANSI C, add +# -DKR_headers +# to the CFLAGS = line below. +# On Sun and other BSD systems that do not provide an ANSI sprintf, add +# -DUSE_STRLEN +# to the CFLAGS = line below. +# On Linux systems, add +# -DNON_UNIX_STDIO +# to the CFLAGS = line below. For libf2c.so under Linux, also add +# -fPIC +# to the CFLAGS = line below. + +.SUFFIXES: .c .o + +# compile, then strip unnecessary symbols +.c.o: + $(CC) -c -DSkip_f2c_Undefs $(CFLAGS) $*.c + $(LD) -r -x -o $*.xxx $*.o + mv $*.xxx $*.o +## Under Solaris (and other systems that do not understand ld -x), +## omit -x in the ld line above. +## If your system does not have the ld command, comment out +## or remove both the ld and mv lines above. + +MISC = f77vers.o i77vers.o main.o s_rnge.o abort_.o exit_.o getarg_.o iargc_.o\ + getenv_.o signal_.o s_stop.o s_paus.o system_.o cabs.o ctype.o\ + derf_.o derfc_.o erf_.o erfc_.o sig_die.o uninit.o +POW = pow_ci.o pow_dd.o pow_di.o pow_hh.o pow_ii.o pow_ri.o pow_zi.o pow_zz.o +CX = c_abs.o c_cos.o c_div.o c_exp.o c_log.o c_sin.o c_sqrt.o +DCX = z_abs.o z_cos.o z_div.o z_exp.o z_log.o z_sin.o z_sqrt.o +REAL = r_abs.o r_acos.o r_asin.o r_atan.o r_atn2.o r_cnjg.o r_cos.o\ + r_cosh.o r_dim.o r_exp.o r_imag.o r_int.o\ + r_lg10.o r_log.o r_mod.o r_nint.o r_sign.o\ + r_sin.o r_sinh.o r_sqrt.o r_tan.o r_tanh.o +DBL = d_abs.o d_acos.o d_asin.o d_atan.o d_atn2.o\ + d_cnjg.o d_cos.o d_cosh.o d_dim.o d_exp.o\ + d_imag.o d_int.o d_lg10.o d_log.o d_mod.o\ + d_nint.o d_prod.o d_sign.o d_sin.o d_sinh.o\ + d_sqrt.o d_tan.o d_tanh.o +INT = i_abs.o i_dim.o i_dnnt.o i_indx.o i_len.o i_len_trim.o i_mod.o i_nint.o i_sign.o\ + lbitbits.o lbitshft.o i_ceiling.o +HALF = h_abs.o h_dim.o h_dnnt.o h_indx.o h_len.o h_mod.o h_nint.o h_sign.o +CMP = l_ge.o l_gt.o l_le.o l_lt.o hl_ge.o hl_gt.o hl_le.o hl_lt.o +EFL = ef1asc_.o ef1cmc_.o +CHAR = f77_aloc.o s_cat.o s_cmp.o s_copy.o +I77 = backspac.o close.o dfe.o dolio.o due.o endfile.o err.o\ + fmt.o fmtlib.o ftell_.o iio.o ilnw.o inquire.o lread.o lwrite.o\ + open.o rdfmt.o rewind.o rsfe.o rsli.o rsne.o sfe.o sue.o\ + typesize.o uio.o util.o wref.o wrtfmt.o wsfe.o wsle.o wsne.o xwsne.o +QINT = pow_qq.o qbitbits.o qbitshft.o ftell64_.o +TIME = dtime_.o etime_.o + +# If you get an error compiling dtime_.c or etime_.c, try adding +# -DUSE_CLOCK to the CFLAGS assignment above; if that does not work, +# omit $(TIME) from OFILES = assignment below. + +# To get signed zeros in write statements on IEEE-arithmetic systems, +# add -DSIGNED_ZEROS to the CFLAGS assignment below and add signbit.o +# to the end of the OFILES = assignment below. + +# For INTEGER*8 support (which requires system-dependent adjustments to +# f2c.h), add $(QINT) to the OFILES = assignment below... + +OFILES = $(MISC) $(POW) $(CX) $(DCX) $(REAL) $(DBL) $(INT) \ + $(HALF) $(CMP) $(EFL) $(CHAR) $(I77) $(TIME) + +all: f2c.h signal1.h sysdep1.h libminf2c.a + +libminf2c.a: $(OFILES) + $(ARCH) $(ARCHFLAGS) $(F2CLIB) $? + $(RANLIB) $(F2CLIB) + +## Shared-library variant: the following rule works on Linux +## systems. Details are system-dependent. Under Linux, -fPIC +## must appear in the CFLAGS assignment when making libf2c.so. +## Under Solaris, use -Kpic in CFLAGS and use "ld -G" instead +## of "cc -shared". + +libf2c.so: $(OFILES) + $(CC) -shared -o libf2c.so $(OFILES) + +### If your system lacks ranlib, you don't need it; see README. + +f77vers.o: f77vers.c + $(CC) -c f77vers.c + +i77vers.o: i77vers.c + $(CC) -c i77vers.c + +# To get an "f2c.h" for use with "f2c -C++", first "make hadd" +hadd: f2c.h0 f2ch.add + cat f2c.h0 f2ch.add >f2c.h + +# For use with "f2c" and "f2c -A": +f2c.h: f2c.h0 + cp f2c.h0 f2c.h + +# You may need to adjust signal1.h and sysdep1.h suitably for your system... +signal1.h: signal1.h0 + cp signal1.h0 signal1.h + +sysdep1.h: sysdep1.h0 + cp sysdep1.h0 sysdep1.h + +# If your system lacks onexit() and you are not using an +# ANSI C compiler, then you should uncomment the following +# two lines (for compiling main.o): +#main.o: main.c +# $(CC) -c -DNO_ONEXIT -DSkip_f2c_Undefs main.c +# On at least some Sun systems, it is more appropriate to +# uncomment the following two lines: +#main.o: main.c +# $(CC) -c -Donexit=on_exit -DSkip_f2c_Undefs main.c + +clean: + rm -f libminf2c.a *.o arith.h signal1.h sysdep1.h + +backspac.o: fio.h +close.o: fio.h +dfe.o: fio.h +dfe.o: fmt.h +due.o: fio.h +endfile.o: fio.h rawio.h +err.o: fio.h rawio.h +fmt.o: fio.h +fmt.o: fmt.h +iio.o: fio.h +iio.o: fmt.h +ilnw.o: fio.h +ilnw.o: lio.h +inquire.o: fio.h +lread.o: fio.h +lread.o: fmt.h +lread.o: lio.h +lread.o: fp.h +lwrite.o: fio.h +lwrite.o: fmt.h +lwrite.o: lio.h +open.o: fio.h rawio.h +rdfmt.o: fio.h +rdfmt.o: fmt.h +rdfmt.o: fp.h +rewind.o: fio.h +rsfe.o: fio.h +rsfe.o: fmt.h +rsli.o: fio.h +rsli.o: lio.h +rsne.o: fio.h +rsne.o: lio.h +sfe.o: fio.h +signbit.o: arith.h +sue.o: fio.h +uio.o: fio.h +uninit.o: arith.h +util.o: fio.h +wref.o: fio.h +wref.o: fmt.h +wref.o: fp.h +wrtfmt.o: fio.h +wrtfmt.o: fmt.h +wsfe.o: fio.h +wsfe.o: fmt.h +wsle.o: fio.h +wsle.o: fmt.h +wsle.o: lio.h +wsne.o: fio.h +wsne.o: lio.h +xwsne.o: fio.h +xwsne.o: lio.h +xwsne.o: fmt.h + +main.o: signal1.h +signal_.o: signal1.h +s_paus.o: signal1.h + +err.o: sysdep1.h +fio.h: sysdep1.h +util.c: sysdep1.h + +arith.h: arithchk.c + $(CC) $(CFLAGS) -DNO_FPINIT arithchk.c -lm ||\ + $(CC) -DNO_LONG_LONG $(CFLAGS) -DNO_FPINIT arithchk.c -lm + ./a.out >arith.h + rm -f a.out arithchk.o + +check: + xsum Notice README abort_.c arithchk.c backspac.c c_abs.c c_cos.c \ + c_div.c c_exp.c c_log.c c_sin.c c_sqrt.c cabs.c close.c comptry.bat \ + ctype.c ctype.h \ + d_abs.c d_acos.c d_asin.c d_atan.c d_atn2.c d_cnjg.c d_cos.c d_cosh.c \ + d_dim.c d_exp.c d_imag.c d_int.c d_lg10.c d_log.c d_mod.c \ + d_nint.c d_prod.c d_sign.c d_sin.c d_sinh.c d_sqrt.c d_tan.c \ + d_tanh.c derf_.c derfc_.c dfe.c dolio.c dtime_.c due.c ef1asc_.c \ + ef1cmc_.c endfile.c erf_.c erfc_.c err.c etime_.c exit_.c f2c.h0 \ + f2ch.add f77_aloc.c f77vers.c fio.h fmt.c fmt.h fmtlib.c \ + fp.h ftell_.c ftell64_.c i_ceiling.c \ + getarg_.c getenv_.c h_abs.c h_dim.c h_dnnt.c h_indx.c h_len.c \ + h_mod.c h_nint.c h_sign.c hl_ge.c hl_gt.c hl_le.c hl_lt.c \ + i77vers.c i_abs.c i_dim.c i_dnnt.c i_indx.c i_len.c i_len_trim.c i_mod.c \ + i_nint.c i_sign.c iargc_.c iio.c ilnw.c inquire.c l_ge.c l_gt.c \ + l_le.c l_lt.c lbitbits.c lbitshft.c libf2c.lbc libf2c.sy lio.h \ + lread.c lwrite.c main.c makefile.sy makefile.u makefile.vc \ + makefile.wat math.hvc mkfile.plan9 open.c pow_ci.c pow_dd.c \ + pow_di.c pow_hh.c pow_ii.c pow_qq.c pow_ri.c pow_zi.c pow_zz.c \ + qbitbits.c qbitshft.c r_abs.c r_acos.c r_asin.c r_atan.c r_atn2.c \ + r_cnjg.c r_cos.c r_cosh.c r_dim.c r_exp.c r_imag.c r_int.c r_lg10.c \ + r_log.c r_mod.c r_nint.c r_sign.c r_sin.c r_sinh.c r_sqrt.c \ + r_tan.c r_tanh.c rawio.h rdfmt.c rewind.c rsfe.c rsli.c rsne.c \ + s_cat.c s_cmp.c s_copy.c s_paus.c s_rnge.c s_stop.c scomptry.bat sfe.c \ + sig_die.c signal1.h0 signal_.c signbit.c sue.c sysdep1.h0 system_.c \ + typesize.c \ + uio.c uninit.c util.c wref.c wrtfmt.c wsfe.c wsle.c wsne.c xwsne.c \ + z_abs.c z_cos.c z_div.c z_exp.c z_log.c z_sin.c z_sqrt.c >xsum1.out + cmp xsum0.out xsum1.out && mv xsum1.out xsum.out || diff xsum[01].out diff --git a/min-dgels/base/F2CLIBS/libf2c/Notice b/min-dgels/base/F2CLIBS/libf2c/Notice new file mode 100644 index 0000000..261b719 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/Notice @@ -0,0 +1,23 @@ +/**************************************************************** +Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. + +Permission to use, copy, modify, and distribute this software +and its documentation for any purpose and without fee is hereby +granted, provided that the above copyright notice appear in all +copies and that both that the copyright notice and this +permission notice and warranty disclaimer appear in supporting +documentation, and that the names of AT&T, Bell Laboratories, +Lucent or Bellcore or any of their entities not be used in +advertising or publicity pertaining to distribution of the +software without specific, written prior permission. + +AT&T, Lucent and Bellcore disclaim all warranties with regard to +this software, including all implied warranties of +merchantability and fitness. In no event shall AT&T, Lucent or +Bellcore be liable for any special, indirect or consequential +damages or any damages whatsoever resulting from loss of use, +data or profits, whether in an action of contract, negligence or +other tortious action, arising out of or in connection with the +use or performance of this software. +****************************************************************/ + diff --git a/min-dgels/base/F2CLIBS/libf2c/README b/min-dgels/base/F2CLIBS/libf2c/README new file mode 100644 index 0000000..940a354 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/README @@ -0,0 +1,374 @@ +As shipped, "makefile" is a copy of "makefile.u", a Unix makefile. +Variants for other systems have names of the form makefile.* and +have initial comments saying how to invoke them. You may wish to +copy one of the other makefile.* files to makefile. + +If you use a C++ compiler, first say + + make hadd + +to create a suitable f2c.h from f2c.h0 and f2ch.add. Otherwise, + + make f2c.h + +will just copy f2c.h0 to f2c.h . + +If your compiler does not recognize ANSI C headers, +compile with KR_headers defined: either add -DKR_headers +to the definition of CFLAGS in the makefile, or insert + +#define KR_headers + +at the top of f2c.h . + +If your system lacks onexit() and you are not using an ANSI C +compiler, then you should compile main.c with NO_ONEXIT defined. +See the comments about onexit in makefile.u. + +If your system has a double drem() function such that drem(a,b) +is the IEEE remainder function (with double a, b), then you may +wish to compile r_mod.c and d_mod.c with IEEE_drem defined. + +To check for transmission errors, issue the command + make check +or + make -f makefile.u check + +This assumes you have the xsum program whose source, xsum.c, +is distributed as part of "all from f2c/src", and that it +is installed somewhere in your search path. If you do not +have xsum, you can obtain xsum.c by sending the following E-mail +message to netlib@netlib.bell-labs.com + send xsum.c from f2c/src + +For convenience, the f2c.h0 in this directory is a copy of netlib's +"f2c.h from f2c". It is best to install f2c.h in a standard place, +so "include f2c.h" will work in any directory without further ado. +Beware that the makefiles do not cause recompilation when f2c.h is +changed. + +On machines, such as those using a DEC Alpha processor, on which +sizeof(short) == 2, sizeof(int) == sizeof(float) == 4, and +sizeof(long) == sizeof(double) == 8, it suffices to modify f2c.h by +removing the first occurrence of "long " on each line containing +"long ". On Unix systems, you can do this by issuing the commands + mv f2c.h f2c.h0 + sed 's/long int /int /' f2c.h0 >f2c.h +On such machines, one can enable INTEGER*8 by uncommenting the typedefs +of longint and ulongint in f2c.h and adjusting them, so they read + typedef long longint; + typedef unsigned long ulongint; +and by compiling libf2c with -DAllow_TYQUAD, as discussed below. + + +Most of the routines in libf2c are support routines for Fortran +intrinsic functions or for operations that f2c chooses not +to do "in line". There are a few exceptions, summarized below -- +functions and subroutines that appear to your program as ordinary +external Fortran routines. + +If you use the REAL valued functions listed below (ERF, ERFC, +DTIME, and ETIME) with "f2c -R", then you need to compile the +corresponding source files with -DREAL=float. To do this, it is +perhaps simplest to add "-DREAL=float" to CFLAGS in the makefile. + +1. CALL ABORT prints a message and causes a core dump. + +2. ERF(r) and DERF(d) and the REAL and DOUBLE PRECISION + error functions (with x REAL and d DOUBLE PRECISION); + DERF must be declared DOUBLE PRECISION in your program. + Both ERF and DERF assume your C library provides the + underlying erf() function (which not all systems do). + +3. ERFC(r) and DERFC(d) are the complementary error functions: + ERFC(r) = 1 - ERF(r) and DERFC(d) = 1.d0 - DERFC(d) + (except that their results may be more accurate than + explicitly evaluating the above formulae would give). + Again, ERFC and r are REAL, and DERFC and d are DOUBLE + PRECISION (and must be declared as such in your program), + and ERFC and DERFC rely on your system's erfc(). + +4. CALL GETARG(n,s), where n is an INTEGER and s is a CHARACTER + variable, sets s to the n-th command-line argument (or to + all blanks if there are fewer than n command-line arguments); + CALL GETARG(0,s) sets s to the name of the program (on systems + that support this feature). See IARGC below. + +5. CALL GETENV(name, value), where name and value are of type + CHARACTER, sets value to the environment value, $name, of + name (or to blanks if $name has not been set). + +6. NARGS = IARGC() sets NARGS to the number of command-line + arguments (an INTEGER value). + +7. CALL SIGNAL(n,func), where n is an INTEGER and func is an + EXTERNAL procedure, arranges for func to be invoked when n + occurs (on systems where this makes sense). + +If your compiler complains about the signal calls in main.c, s_paus.c, +and signal_.c, you may need to adjust signal1.h suitably. See the +comments in signal1.h. + +8. ETIME(ARR) and DTIME(ARR) are REAL functions that return + execution times. ARR is declared REAL ARR(2). The elapsed + user and system CPU times are stored in ARR(1) and ARR(2), + respectively. ETIME returns the total elapsed CPU time, + i.e., ARR(1) + ARR(2). DTIME returns total elapsed CPU + time since the previous call on DTIME. + +9. CALL SYSTEM(cmd), where cmd is of type CHARACTER, passes + cmd to the system's command processor (on systems where + this can be done). + +10. CALL FLUSH flushes all buffers. + +11. FTELL(i) is an INTEGER function that returns the current + offset of Fortran unit i (or -1 if unit i is not open). + +12. CALL FSEEK(i, offset, whence, *errlab) attemps to move + Fortran unit i to the specified offset: absolute offset + if whence = 0; relative to the current offset if whence = 1; + relative to the end of the file if whence = 2. It branches + to label errlab if unit i is not open or if the call + otherwise fails. + +The routines whose objects are makefile.u's $(I77) are for I/O. +The following comments apply to them. + +If your system lacks /usr/include/local.h , +then you should create an appropriate local.h in +this directory. An appropriate local.h may simply +be empty, or it may #define VAX or #define CRAY +(or whatever else you must do to make fp.h work right). +Alternatively, edit fp.h to suite your machine. + +If your system lacks /usr/include/fcntl.h , then you +should simply create an empty fcntl.h in this directory. +If your compiler then complains about creat and open not +having a prototype, compile with OPEN_DECL defined. +On many systems, open and creat are declared in fcntl.h . + +If your system's sprintf does not work the way ANSI C +specifies -- specifically, if it does not return the +number of characters transmitted -- then insert the line + +#define USE_STRLEN + +at the end of fmt.h . This is necessary with +at least some versions of Sun software. +In particular, if you get a warning about an improper +pointer/integer combination in compiling wref.c, then +you need to compile with -DUSE_STRLEN . + +If your system's fopen does not like the ANSI binary +reading and writing modes "rb" and "wb", then you should +compile open.c with NON_ANSI_RW_MODES #defined. + +If you get error messages about references to cf->_ptr +and cf->_base when compiling wrtfmt.c and wsfe.c or to +stderr->_flag when compiling err.c, then insert the line + +#define NON_UNIX_STDIO + +at the beginning of fio.h, and recompile everything (or +at least those modules that contain NON_UNIX_STDIO). + +Unformatted sequential records consist of a length of record +contents, the record contents themselves, and the length of +record contents again (for backspace). Prior to 17 Oct. 1991, +the length was of type int; now it is of type long, but you +can change it back to int by inserting + +#define UIOLEN_int + +at the beginning of fio.h. This affects only sue.c and uio.c . + +If you have a really ancient K&R C compiler that does not understand +void, add -Dvoid=int to the definition of CFLAGS in the makefile. + +On VAX, Cray, or Research Tenth-Edition Unix systems, you may +need to add -DVAX, -DCRAY, or -DV10 (respectively) to CFLAGS +to make fp.h work correctly. Alternatively, you may need to +edit fp.h to suit your machine. + +If your compiler complains about the signal calls in main.c, s_paus.c, +and signal_.c, you may need to adjust signal1.h suitably. See the +comments in signal1.h. + +You may need to supply the following non-ANSI routines: + + fstat(int fileds, struct stat *buf) is similar +to stat(char *name, struct stat *buf), except that +the first argument, fileds, is the file descriptor +returned by open rather than the name of the file. +fstat is used in the system-dependent routine +canseek (in the libf2c source file err.c), which +is supposed to return 1 if it's possible to issue +seeks on the file in question, 0 if it's not; you may +need to suitably modify err.c . On non-UNIX systems, +you can avoid references to fstat and stat by compiling +with NON_UNIX_STDIO defined; in that case, you may need +to supply access(char *Name,0), which is supposed to +return 0 if file Name exists, nonzero otherwise. + + char * mktemp(char *buf) is supposed to replace the +6 trailing X's in buf with a unique number and then +return buf. The idea is to get a unique name for +a temporary file. + +On non-UNIX systems, you may need to change a few other, +e.g.: the form of name computed by mktemp() in endfile.c and +open.c; the use of the open(), close(), and creat() system +calls in endfile.c, err.c, open.c; and the modes in calls on +fopen() and fdopen() (and perhaps the use of fdopen() itself +-- it's supposed to return a FILE* corresponding to a given +an integer file descriptor) in err.c and open.c (component ufmt +of struct unit is 1 for formatted I/O -- text mode on some systems +-- and 0 for unformatted I/O -- binary mode on some systems). +Compiling with -DNON_UNIX_STDIO omits all references to creat() +and almost all references to open() and close(), the exception +being in the function f__isdev() (in open.c). + +If you wish to use translated Fortran that has funny notions +of record length for direct unformatted I/O (i.e., that assumes +RECL= values in OPEN statements are not bytes but rather counts +of some other units -- e.g., 4-character words for VMS), then you +should insert an appropriate #define for url_Adjust at the +beginning of open.c . For VMS Fortran, for example, +#define url_Adjust(x) x *= 4 +would suffice. + +By default, Fortran I/O units 5, 6, and 0 are pre-connected to +stdin, stdout, and stderr, respectively. You can change this +behavior by changing f_init() in err.c to suit your needs. +Note that f2c assumes READ(*... means READ(5... and WRITE(*... +means WRITE(6... . Moreover, an OPEN(n,... statement that does +not specify a file name (and does not specify STATUS='SCRATCH') +assumes FILE='fort.n' . You can change this by editing open.c +and endfile.c suitably. + +Unless you adjust the "#define MXUNIT" line in fio.h, Fortran units +0, 1, ..., 99 are available, i.e., the highest allowed unit number +is MXUNIT - 1. + +Lines protected from compilation by #ifdef Allow_TYQUAD +are for a possible extension to 64-bit integers in which +integer = int = 32 bits and longint = long = 64 bits. + +The makefile does not attempt to compile pow_qq.c, qbitbits.c, +and qbitshft.c, which are meant for use with INTEGER*8. To use +INTEGER*8, you must modify f2c.h to declare longint and ulongint +appropriately; then add $(QINT) to the end of the makefile's +dependency list for libf2c.a (if makefile is a copy of makefile.u; +for the PC makefiles, add pow_qq.obj qbitbits.obj qbitshft.obj +to the library's dependency list and adjust libf2c.lbc or libf2c.sy +accordingly). Also add -DAllow_TYQUAD to the makefile's CFLAGS +assignment. To make longint and ulongint available, it may suffice +to add -DINTEGER_STAR_8 to the CFLAGS assignment. + +Following Fortran 90, s_cat.c and s_copy.c allow the target of a +(character string) assignment to be appear on its right-hand, at +the cost of some extra overhead for all run-time concatenations. +If you prefer the extra efficiency that comes with the Fortran 77 +requirement that the left-hand side of a character assignment not +be involved in the right-hand side, compile s_cat.c and s_copy.c +with -DNO_OVERWRITE . + +Extensions (Feb. 1993) to NAMELIST processing: + 1. Reading a ? instead of &name (the start of a namelist) causes +the namelist being sought to be written to stdout (unit 6); +to omit this feature, compile rsne.c with -DNo_Namelist_Questions. + 2. Reading the wrong namelist name now leads to an error message +and an attempt to skip input until the right namelist name is found; +to omit this feature, compile rsne.c with -DNo_Bad_Namelist_Skip. + 3. Namelist writes now insert newlines before each variable; to omit +this feature, compile xwsne.c with -DNo_Extra_Namelist_Newlines. + 4. (Sept. 1995) When looking for the &name that starts namelist +input, lines whose first non-blank character is something other +than &, $, or ? are treated as comment lines and ignored, unless +rsne.c is compiled with -DNo_Namelist_Comments. + +Nonstandard extension (Feb. 1993) to open: for sequential files, +ACCESS='APPEND' (or access='anything else starting with "A" or "a"') +causes the file to be positioned at end-of-file, so a write will +append to the file. + +Some buggy Fortran programs use unformatted direct I/O to write +an incomplete record and later read more from that record than +they have written. For records other than the last, the unwritten +portion of the record reads as binary zeros. The last record is +a special case: attempting to read more from it than was written +gives end-of-file -- which may help one find a bug. Some other +Fortran I/O libraries treat the last record no differently than +others and thus give no help in finding the bug of reading more +than was written. If you wish to have this behavior, compile +uio.c with -DPad_UDread . + +If you want to be able to catch write failures (e.g., due to a +disk being full) with an ERR= specifier, compile dfe.c, due.c, +sfe.c, sue.c, and wsle.c with -DALWAYS_FLUSH. This will lead to +slower execution and more I/O, but should make ERR= work as +expected, provided fflush returns an error return when its +physical write fails. + +Carriage controls are meant to be interpreted by the UNIX col +program (or a similar program). Sometimes it's convenient to use +only ' ' as the carriage control character (normal single spacing). +If you compile lwrite.c and wsfe.c with -DOMIT_BLANK_CC, formatted +external output lines will have an initial ' ' quietly omitted, +making use of the col program unnecessary with output that only +has ' ' for carriage control. + +The Fortran 77 Standard leaves it up to the implementation whether +formatted writes of floating-point numbers of absolute value < 1 have +a zero before the decimal point. By default, libI77 omits such +superfluous zeros, but you can cause them to appear by compiling +lwrite.c, wref.c, and wrtfmt.c with -DWANT_LEAD_0 . + +If your (Unix) system lacks a ranlib command, you don't need it. +Either comment out the makefile's ranlib invocation, or install +a harmless "ranlib" command somewhere in your PATH, such as the +one-line shell script + + exit 0 + +or (on some systems) + + exec /usr/bin/ar lts $1 >/dev/null + +By default, the routines that implement complex and double complex +division, c_div.c and z_div.c, call sig_die to print an error message +and exit if they see a divisor of 0, as this is sometimes helpful for +debugging. On systems with IEEE arithmetic, compiling c_div.c and +z_div.c with -DIEEE_COMPLEX_DIVIDE causes them instead to set both +the real and imaginary parts of the result to +INFINITY if the +numerator is nonzero, or to NaN if it vanishes. + +Nowadays most Unix and Linux systems have function + int ftruncate(int fildes, off_t len); +defined in system header file unistd.h that adjusts the length of file +descriptor fildes to length len. Unless endfile.c is compiled with +-DNO_TRUNCATE, endfile.c #includes "unistd.h" and calls ftruncate() if +necessary to shorten files. If your system lacks ftruncate(), compile +endfile.c with -DNO_TRUNCATE to make endfile.c use the older and more +portable scheme of shortening a file by copying to a temporary file +and back again. + +The initializations for "f2c -trapuv" are done by _uninit_f2c(), +whose source is uninit.c, introduced June 2001. On IEEE-arithmetic +systems, _uninit_f2c should initialize floating-point variables to +signaling NaNs and, at its first invocation, should enable the +invalid operation exception. Alas, the rules for distinguishing +signaling from quiet NaNs were not specified in the IEEE P754 standard, +nor were the precise means of enabling and disabling IEEE-arithmetic +exceptions, and these details are thus system dependent. There are +#ifdef's in uninit.c that specify them for some popular systems. If +yours is not one of these systems, it may take some detective work to +discover the appropriate details for your system. Sometimes it helps +to look in the standard include directories for header files with +relevant-sounding names, such as ieeefp.h, nan.h, or trap.h, and +it may be simplest to run experiments to see what distinguishes a +signaling from a quiet NaN. (If x is initialized to a signaling +NaN and the invalid operation exception is masked off, as it should +be by default on IEEE-arithmetic systems, then computing, say, +y = x + 1 will yield a quiet NaN.) diff --git a/min-dgels/base/F2CLIBS/libf2c/abort_.c b/min-dgels/base/F2CLIBS/libf2c/abort_.c new file mode 100644 index 0000000..92c841a --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/abort_.c @@ -0,0 +1,22 @@ +#include "stdio.h" +#include "f2c.h" +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef KR_headers +extern VOID sig_die(); + +int abort_() +#else +extern void sig_die(const char*,int); + +int abort_(void) +#endif +{ +sig_die("Fortran abort routine called", 1); +return 0; /* not reached */ +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/arithchk.c b/min-dgels/base/F2CLIBS/libf2c/arithchk.c new file mode 100644 index 0000000..8288b94 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/arithchk.c @@ -0,0 +1,245 @@ +/**************************************************************** +Copyright (C) 1997, 1998, 2000 Lucent Technologies +All Rights Reserved + +Permission to use, copy, modify, and distribute this software and +its documentation for any purpose and without fee is hereby +granted, provided that the above copyright notice appear in all +copies and that both that the copyright notice and this +permission notice and warranty disclaimer appear in supporting +documentation, and that the name of Lucent or any of its entities +not be used in advertising or publicity pertaining to +distribution of the software without specific, written prior +permission. + +LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, +INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. +IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY +SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER +IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, +ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF +THIS SOFTWARE. +****************************************************************/ + +/* Try to deduce arith.h from arithmetic properties. */ + +#include +#include +#include + +#ifdef NO_FPINIT +#define fpinit_ASL() +#else +#ifndef KR_headers +extern +#ifdef __cplusplus + "C" +#endif + void fpinit_ASL(void); +#endif /*KR_headers*/ +#endif /*NO_FPINIT*/ + + static int dalign; + typedef struct +Akind { + char *name; + int kind; + } Akind; + + static Akind +IEEE_8087 = { "IEEE_8087", 1 }, +IEEE_MC68k = { "IEEE_MC68k", 2 }, +IBM = { "IBM", 3 }, +VAX = { "VAX", 4 }, +CRAY = { "CRAY", 5}; + + static double t_nan; + + static Akind * +Lcheck(void) +{ + union { + double d; + long L[2]; + } u; + struct { + double d; + long L; + } x[2]; + + if (sizeof(x) > 2*(sizeof(double) + sizeof(long))) + dalign = 1; + u.L[0] = u.L[1] = 0; + u.d = 1e13; + if (u.L[0] == 1117925532 && u.L[1] == -448790528) + return &IEEE_MC68k; + if (u.L[1] == 1117925532 && u.L[0] == -448790528) + return &IEEE_8087; + if (u.L[0] == -2065213935 && u.L[1] == 10752) + return &VAX; + if (u.L[0] == 1267827943 && u.L[1] == 704643072) + return &IBM; + return 0; + } + + static Akind * +icheck(void) +{ + union { + double d; + int L[2]; + } u; + struct { + double d; + int L; + } x[2]; + + if (sizeof(x) > 2*(sizeof(double) + sizeof(int))) + dalign = 1; + u.L[0] = u.L[1] = 0; + u.d = 1e13; + if (u.L[0] == 1117925532 && u.L[1] == -448790528) + return &IEEE_MC68k; + if (u.L[1] == 1117925532 && u.L[0] == -448790528) + return &IEEE_8087; + if (u.L[0] == -2065213935 && u.L[1] == 10752) + return &VAX; + if (u.L[0] == 1267827943 && u.L[1] == 704643072) + return &IBM; + return 0; + } + +char *emptyfmt = ""; /* avoid possible warning message with printf("") */ + + static Akind * +ccheck(void) +{ + union { + double d; + long L; + } u; + long Cray1; + + /* Cray1 = 4617762693716115456 -- without overflow on non-Crays */ + Cray1 = printf("%s",emptyfmt) < 0 ? 0 : 4617762; + if (printf(emptyfmt, Cray1) >= 0) + Cray1 = 1000000*Cray1 + 693716; + if (printf(emptyfmt, Cray1) >= 0) + Cray1 = 1000000*Cray1 + 115456; + u.d = 1e13; + if (u.L == Cray1) + return &CRAY; + return 0; + } + + static int +fzcheck(void) +{ + double a, b; + int i; + + a = 1.; + b = .1; + for(i = 155;; b *= b, i >>= 1) { + if (i & 1) { + a *= b; + if (i == 1) + break; + } + } + b = a * a; + return b == 0.; + } + + static int +need_nancheck(void) +{ + double t; + + errno = 0; + t = log(t_nan); + if (errno == 0) + return 1; + errno = 0; + t = sqrt(t_nan); + return errno == 0; + } + + void +get_nanbits(unsigned int *b, int k) +{ + union { double d; unsigned int z[2]; } u, u1, u2; + + k = 2 - k; + u1.z[k] = u2.z[k] = 0x7ff00000; + u1.z[1-k] = u2.z[1-k] = 0; + u.d = u1.d - u2.d; /* Infinity - Infinity */ + b[0] = u.z[0]; + b[1] = u.z[1]; + } + + int +main(void) +{ + FILE *f; + Akind *a = 0; + int Ldef = 0; + unsigned int nanbits[2]; + + fpinit_ASL(); +#ifdef WRITE_ARITH_H /* for Symantec's buggy "make" */ + f = fopen("arith.h", "w"); + if (!f) { + printf("Cannot open arith.h\n"); + return 1; + } +#else + f = stdout; +#endif + + if (sizeof(double) == 2*sizeof(long)) + a = Lcheck(); + else if (sizeof(double) == 2*sizeof(int)) { + Ldef = 1; + a = icheck(); + } + else if (sizeof(double) == sizeof(long)) + a = ccheck(); + if (a) { + fprintf(f, "#define %s\n#define Arith_Kind_ASL %d\n", + a->name, a->kind); + if (Ldef) + fprintf(f, "#define Long int\n#define Intcast (int)(long)\n"); + if (dalign) + fprintf(f, "#define Double_Align\n"); + if (sizeof(char*) == 8) + fprintf(f, "#define X64_bit_pointers\n"); +#ifndef NO_LONG_LONG + if (sizeof(long long) < 8) +#endif + fprintf(f, "#define NO_LONG_LONG\n"); + if (a->kind <= 2) { + if (fzcheck()) + fprintf(f, "#define Sudden_Underflow\n"); + t_nan = -a->kind; + if (need_nancheck()) + fprintf(f, "#define NANCHECK\n"); + if (sizeof(double) == 2*sizeof(unsigned int)) { + get_nanbits(nanbits, a->kind); + fprintf(f, "#define QNaN0 0x%x\n", nanbits[0]); + fprintf(f, "#define QNaN1 0x%x\n", nanbits[1]); + } + } + return 0; + } + fprintf(f, "/* Unknown arithmetic */\n"); + return 1; + } + +#ifdef __sun +#ifdef __i386 +/* kludge for Intel Solaris */ +void fpsetprec(int x) { } +#endif +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/backspac.c b/min-dgels/base/F2CLIBS/libf2c/backspac.c new file mode 100644 index 0000000..096fa69 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/backspac.c @@ -0,0 +1,77 @@ +#include "f2c.h" +#include "fio.h" +#ifdef __cplusplus +extern "C" { +#endif +#ifdef KR_headers +integer f_back(a) alist *a; +#else +integer f_back(alist *a) +#endif +{ unit *b; + OFF_T v, w, x, y, z; + uiolen n; + FILE *f; + + f__curunit = b = &f__units[a->aunit]; /* curunit for error messages */ + if(a->aunit >= MXUNIT || a->aunit < 0) + err(a->aerr,101,"backspace") + if(b->useek==0) err(a->aerr,106,"backspace") + if(b->ufd == NULL) { + fk_open(1, 1, a->aunit); + return(0); + } + if(b->uend==1) + { b->uend=0; + return(0); + } + if(b->uwrt) { + t_runc(a); + if (f__nowreading(b)) + err(a->aerr,errno,"backspace") + } + f = b->ufd; /* may have changed in t_runc() */ + if(b->url>0) + { + x=FTELL(f); + y = x % b->url; + if(y == 0) x--; + x /= b->url; + x *= b->url; + (void) FSEEK(f,x,SEEK_SET); + return(0); + } + + if(b->ufmt==0) + { FSEEK(f,-(OFF_T)sizeof(uiolen),SEEK_CUR); + if(fread((char *)&n,sizeof(uiolen),1,f)); + return (1); + FSEEK(f,-(OFF_T)n-2*sizeof(uiolen),SEEK_CUR); + return(0); + } + w = x = FTELL(f); + z = 0; + loop: + while(x) { + x -= x < 64 ? x : 64; + FSEEK(f,x,SEEK_SET); + for(y = x; y < w; y++) { + if (getc(f) != '\n') + continue; + v = FTELL(f); + if (v == w) { + if (z) + goto break2; + goto loop; + } + z = v; + } + err(a->aerr,(EOF),"backspace") + } + break2: + FSEEK(f, z, SEEK_SET); + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/c_abs.c b/min-dgels/base/F2CLIBS/libf2c/c_abs.c new file mode 100644 index 0000000..858f2c8 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/c_abs.c @@ -0,0 +1,20 @@ +#include "f2c.h" +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef KR_headers +extern double f__cabs(); + +double c_abs(z) complex *z; +#else +extern double f__cabs(double, double); + +double c_abs(complex *z) +#endif +{ +return( f__cabs( z->r, z->i ) ); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/c_cos.c b/min-dgels/base/F2CLIBS/libf2c/c_cos.c new file mode 100644 index 0000000..29fe49e --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/c_cos.c @@ -0,0 +1,23 @@ +#include "f2c.h" + +#ifdef KR_headers +extern double sin(), cos(), sinh(), cosh(); + +VOID c_cos(r, z) complex *r, *z; +#else +#undef abs +#include "math.h" +#ifdef __cplusplus +extern "C" { +#endif + +void c_cos(complex *r, complex *z) +#endif +{ + double zi = z->i, zr = z->r; + r->r = cos(zr) * cosh(zi); + r->i = - sin(zr) * sinh(zi); + } +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/c_div.c b/min-dgels/base/F2CLIBS/libf2c/c_div.c new file mode 100644 index 0000000..9463a43 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/c_div.c @@ -0,0 +1,53 @@ +#include "f2c.h" +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef KR_headers +extern VOID sig_die(); +VOID c_div(c, a, b) +complex *a, *b, *c; +#else +extern void sig_die(const char*,int); +void c_div(complex *c, complex *a, complex *b) +#endif +{ + double ratio, den; + double abr, abi, cr; + + if( (abr = b->r) < 0.) + abr = - abr; + if( (abi = b->i) < 0.) + abi = - abi; + if( abr <= abi ) + { + if(abi == 0) { +#ifdef IEEE_COMPLEX_DIVIDE + float af, bf; + af = bf = abr; + if (a->i != 0 || a->r != 0) + af = 1.; + c->i = c->r = af / bf; + return; +#else + sig_die("complex division by zero", 1); +#endif + } + ratio = (double)b->r / b->i ; + den = b->i * (1 + ratio*ratio); + cr = (a->r*ratio + a->i) / den; + c->i = (a->i*ratio - a->r) / den; + } + + else + { + ratio = (double)b->i / b->r ; + den = b->r * (1 + ratio*ratio); + cr = (a->r + a->i*ratio) / den; + c->i = (a->i - a->r*ratio) / den; + } + c->r = cr; + } +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/c_exp.c b/min-dgels/base/F2CLIBS/libf2c/c_exp.c new file mode 100644 index 0000000..f46508d --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/c_exp.c @@ -0,0 +1,25 @@ +#include "f2c.h" + +#ifdef KR_headers +extern double exp(), cos(), sin(); + + VOID c_exp(r, z) complex *r, *z; +#else +#undef abs +#include "math.h" +#ifdef __cplusplus +extern "C" { +#endif + +void c_exp(complex *r, complex *z) +#endif +{ + double expx, zi = z->i; + + expx = exp(z->r); + r->r = expx * cos(zi); + r->i = expx * sin(zi); + } +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/c_log.c b/min-dgels/base/F2CLIBS/libf2c/c_log.c new file mode 100644 index 0000000..a0ba3f0 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/c_log.c @@ -0,0 +1,23 @@ +#include "f2c.h" + +#ifdef KR_headers +extern double log(), f__cabs(), atan2(); +VOID c_log(r, z) complex *r, *z; +#else +#undef abs +#include "math.h" +#ifdef __cplusplus +extern "C" { +#endif +extern double f__cabs(double, double); + +void c_log(complex *r, complex *z) +#endif +{ + double zi, zr; + r->i = atan2(zi = z->i, zr = z->r); + r->r = log( f__cabs(zr, zi) ); + } +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/c_sin.c b/min-dgels/base/F2CLIBS/libf2c/c_sin.c new file mode 100644 index 0000000..c8bc30f --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/c_sin.c @@ -0,0 +1,23 @@ +#include "f2c.h" + +#ifdef KR_headers +extern double sin(), cos(), sinh(), cosh(); + +VOID c_sin(r, z) complex *r, *z; +#else +#undef abs +#include "math.h" +#ifdef __cplusplus +extern "C" { +#endif + +void c_sin(complex *r, complex *z) +#endif +{ + double zi = z->i, zr = z->r; + r->r = sin(zr) * cosh(zi); + r->i = cos(zr) * sinh(zi); + } +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/c_sqrt.c b/min-dgels/base/F2CLIBS/libf2c/c_sqrt.c new file mode 100644 index 0000000..1678c53 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/c_sqrt.c @@ -0,0 +1,41 @@ +#include "f2c.h" + +#ifdef KR_headers +extern double sqrt(), f__cabs(); + +VOID c_sqrt(r, z) complex *r, *z; +#else +#undef abs +#include "math.h" +#ifdef __cplusplus +extern "C" { +#endif +extern double f__cabs(double, double); + +void c_sqrt(complex *r, complex *z) +#endif +{ + double mag, t; + double zi = z->i, zr = z->r; + + if( (mag = f__cabs(zr, zi)) == 0.) + r->r = r->i = 0.; + else if(zr > 0) + { + r->r = t = sqrt(0.5 * (mag + zr) ); + t = zi / t; + r->i = 0.5 * t; + } + else + { + t = sqrt(0.5 * (mag - zr) ); + if(zi < 0) + t = -t; + r->i = t; + t = zi / t; + r->r = 0.5 * t; + } + } +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/cabs.c b/min-dgels/base/F2CLIBS/libf2c/cabs.c new file mode 100644 index 0000000..84750d5 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/cabs.c @@ -0,0 +1,33 @@ +#ifdef KR_headers +extern double sqrt(); +double f__cabs(real, imag) double real, imag; +#else +#undef abs +#include "math.h" +#ifdef __cplusplus +extern "C" { +#endif +double f__cabs(double real, double imag) +#endif +{ +double temp; + +if(real < 0) + real = -real; +if(imag < 0) + imag = -imag; +if(imag > real){ + temp = real; + real = imag; + imag = temp; +} +if((real+imag) == real) + return(real); + +temp = imag/real; +temp = real*sqrt(1.0 + temp*temp); /*overflow!!*/ +return(temp); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/close.c b/min-dgels/base/F2CLIBS/libf2c/close.c new file mode 100644 index 0000000..e958c71 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/close.c @@ -0,0 +1,101 @@ +#include "f2c.h" +#include "fio.h" +#ifdef KR_headers +integer f_clos(a) cllist *a; +#else +#undef abs +#undef min +#undef max +#include "stdlib.h" +#ifdef NON_UNIX_STDIO +#ifndef unlink +#define unlink remove +#endif +#else +#ifdef MSDOS +#include "io.h" +#else +#ifdef __cplusplus +extern "C" int unlink(const char*); +#else +extern int unlink(const char*); +#endif +#endif +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +integer f_clos(cllist *a) +#endif +{ unit *b; + + if(a->cunit >= MXUNIT) return(0); + b= &f__units[a->cunit]; + if(b->ufd==NULL) + goto done; + if (b->uscrtch == 1) + goto Delete; + if (!a->csta) + goto Keep; + switch(*a->csta) { + default: + Keep: + case 'k': + case 'K': + if(b->uwrt == 1) + t_runc((alist *)a); + if(b->ufnm) { + fclose(b->ufd); + free(b->ufnm); + } + break; + case 'd': + case 'D': + Delete: + fclose(b->ufd); + if(b->ufnm) { + unlink(b->ufnm); /*SYSDEP*/ + free(b->ufnm); + } + } + b->ufd=NULL; + done: + b->uend=0; + b->ufnm=NULL; + return(0); + } + void +#ifdef KR_headers +f_exit() +#else +f_exit(void) +#endif +{ int i; + static cllist xx; + if (!xx.cerr) { + xx.cerr=1; + xx.csta=NULL; + for(i=0;i +#else /*{*/ +#ifndef My_ctype_DEF +extern char My_ctype[]; +#else /*{*/ +char My_ctype[264] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 2, 2, 2, 2, 2, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 2, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0}; +#endif /*}*/ + +#define isdigit(x) (My_ctype[(x)+8] & 1) +#define isspace(x) (My_ctype[(x)+8] & 2) +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/d_abs.c b/min-dgels/base/F2CLIBS/libf2c/d_abs.c new file mode 100644 index 0000000..2f7a153 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/d_abs.c @@ -0,0 +1,18 @@ +#include "f2c.h" +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef KR_headers +double d_abs(x) doublereal *x; +#else +double d_abs(doublereal *x) +#endif +{ +if(*x >= 0) + return(*x); +return(- *x); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/d_acos.c b/min-dgels/base/F2CLIBS/libf2c/d_acos.c new file mode 100644 index 0000000..69005b5 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/d_acos.c @@ -0,0 +1,19 @@ +#include "f2c.h" + +#ifdef KR_headers +double acos(); +double d_acos(x) doublereal *x; +#else +#undef abs +#include "math.h" +#ifdef __cplusplus +extern "C" { +#endif +double d_acos(doublereal *x) +#endif +{ +return( acos(*x) ); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/d_asin.c b/min-dgels/base/F2CLIBS/libf2c/d_asin.c new file mode 100644 index 0000000..d5196ab --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/d_asin.c @@ -0,0 +1,19 @@ +#include "f2c.h" + +#ifdef KR_headers +double asin(); +double d_asin(x) doublereal *x; +#else +#undef abs +#include "math.h" +#ifdef __cplusplus +extern "C" { +#endif +double d_asin(doublereal *x) +#endif +{ +return( asin(*x) ); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/d_atan.c b/min-dgels/base/F2CLIBS/libf2c/d_atan.c new file mode 100644 index 0000000..d8856f8 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/d_atan.c @@ -0,0 +1,19 @@ +#include "f2c.h" + +#ifdef KR_headers +double atan(); +double d_atan(x) doublereal *x; +#else +#undef abs +#include "math.h" +#ifdef __cplusplus +extern "C" { +#endif +double d_atan(doublereal *x) +#endif +{ +return( atan(*x) ); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/d_atn2.c b/min-dgels/base/F2CLIBS/libf2c/d_atn2.c new file mode 100644 index 0000000..5611385 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/d_atn2.c @@ -0,0 +1,19 @@ +#include "f2c.h" + +#ifdef KR_headers +double atan2(); +double d_atn2(x,y) doublereal *x, *y; +#else +#undef abs +#include "math.h" +#ifdef __cplusplus +extern "C" { +#endif +double d_atn2(doublereal *x, doublereal *y) +#endif +{ +return( atan2(*x,*y) ); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/d_cnjg.c b/min-dgels/base/F2CLIBS/libf2c/d_cnjg.c new file mode 100644 index 0000000..38471d9 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/d_cnjg.c @@ -0,0 +1,19 @@ +#include "f2c.h" +#ifdef __cplusplus +extern "C" { +#endif + + VOID +#ifdef KR_headers +d_cnjg(r, z) doublecomplex *r, *z; +#else +d_cnjg(doublecomplex *r, doublecomplex *z) +#endif +{ + doublereal zi = z->i; + r->r = z->r; + r->i = -zi; + } +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/d_cos.c b/min-dgels/base/F2CLIBS/libf2c/d_cos.c new file mode 100644 index 0000000..12def9a --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/d_cos.c @@ -0,0 +1,19 @@ +#include "f2c.h" + +#ifdef KR_headers +double cos(); +double d_cos(x) doublereal *x; +#else +#undef abs +#include "math.h" +#ifdef __cplusplus +extern "C" { +#endif +double d_cos(doublereal *x) +#endif +{ +return( cos(*x) ); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/d_cosh.c b/min-dgels/base/F2CLIBS/libf2c/d_cosh.c new file mode 100644 index 0000000..9214c7a --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/d_cosh.c @@ -0,0 +1,19 @@ +#include "f2c.h" + +#ifdef KR_headers +double cosh(); +double d_cosh(x) doublereal *x; +#else +#undef abs +#include "math.h" +#ifdef __cplusplus +extern "C" { +#endif +double d_cosh(doublereal *x) +#endif +{ +return( cosh(*x) ); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/d_dim.c b/min-dgels/base/F2CLIBS/libf2c/d_dim.c new file mode 100644 index 0000000..627ddb6 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/d_dim.c @@ -0,0 +1,16 @@ +#include "f2c.h" +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef KR_headers +double d_dim(a,b) doublereal *a, *b; +#else +double d_dim(doublereal *a, doublereal *b) +#endif +{ +return( *a > *b ? *a - *b : 0); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/d_exp.c b/min-dgels/base/F2CLIBS/libf2c/d_exp.c new file mode 100644 index 0000000..e9ab5d4 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/d_exp.c @@ -0,0 +1,19 @@ +#include "f2c.h" + +#ifdef KR_headers +double exp(); +double d_exp(x) doublereal *x; +#else +#undef abs +#include "math.h" +#ifdef __cplusplus +extern "C" { +#endif +double d_exp(doublereal *x) +#endif +{ +return( exp(*x) ); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/d_imag.c b/min-dgels/base/F2CLIBS/libf2c/d_imag.c new file mode 100644 index 0000000..d17b9dd --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/d_imag.c @@ -0,0 +1,16 @@ +#include "f2c.h" +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef KR_headers +double d_imag(z) doublecomplex *z; +#else +double d_imag(doublecomplex *z) +#endif +{ +return(z->i); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/d_int.c b/min-dgels/base/F2CLIBS/libf2c/d_int.c new file mode 100644 index 0000000..6da4ce3 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/d_int.c @@ -0,0 +1,19 @@ +#include "f2c.h" + +#ifdef KR_headers +double floor(); +double d_int(x) doublereal *x; +#else +#undef abs +#include "math.h" +#ifdef __cplusplus +extern "C" { +#endif +double d_int(doublereal *x) +#endif +{ +return( (*x>0) ? floor(*x) : -floor(- *x) ); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/d_lg10.c b/min-dgels/base/F2CLIBS/libf2c/d_lg10.c new file mode 100644 index 0000000..664c19d --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/d_lg10.c @@ -0,0 +1,21 @@ +#include "f2c.h" + +#define log10e 0.43429448190325182765 + +#ifdef KR_headers +double log(); +double d_lg10(x) doublereal *x; +#else +#undef abs +#include "math.h" +#ifdef __cplusplus +extern "C" { +#endif +double d_lg10(doublereal *x) +#endif +{ +return( log10e * log(*x) ); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/d_log.c b/min-dgels/base/F2CLIBS/libf2c/d_log.c new file mode 100644 index 0000000..e74be02 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/d_log.c @@ -0,0 +1,19 @@ +#include "f2c.h" + +#ifdef KR_headers +double log(); +double d_log(x) doublereal *x; +#else +#undef abs +#include "math.h" +#ifdef __cplusplus +extern "C" { +#endif +double d_log(doublereal *x) +#endif +{ +return( log(*x) ); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/d_mod.c b/min-dgels/base/F2CLIBS/libf2c/d_mod.c new file mode 100644 index 0000000..3766d9f --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/d_mod.c @@ -0,0 +1,46 @@ +#include "f2c.h" + +#ifdef KR_headers +#ifdef IEEE_drem +double drem(); +#else +double floor(); +#endif +double d_mod(x,y) doublereal *x, *y; +#else +#ifdef IEEE_drem +double drem(double, double); +#else +#undef abs +#include "math.h" +#ifdef __cplusplus +extern "C" { +#endif +#endif +double d_mod(doublereal *x, doublereal *y) +#endif +{ +#ifdef IEEE_drem + double xa, ya, z; + if ((ya = *y) < 0.) + ya = -ya; + z = drem(xa = *x, ya); + if (xa > 0) { + if (z < 0) + z += ya; + } + else if (z > 0) + z -= ya; + return z; +#else + double quotient; + if( (quotient = *x / *y) >= 0) + quotient = floor(quotient); + else + quotient = -floor(-quotient); + return(*x - (*y) * quotient ); +#endif +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/d_nint.c b/min-dgels/base/F2CLIBS/libf2c/d_nint.c new file mode 100644 index 0000000..66f2dd0 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/d_nint.c @@ -0,0 +1,20 @@ +#include "f2c.h" + +#ifdef KR_headers +double floor(); +double d_nint(x) doublereal *x; +#else +#undef abs +#include "math.h" +#ifdef __cplusplus +extern "C" { +#endif +double d_nint(doublereal *x) +#endif +{ +return( (*x)>=0 ? + floor(*x + .5) : -floor(.5 - *x) ); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/d_prod.c b/min-dgels/base/F2CLIBS/libf2c/d_prod.c new file mode 100644 index 0000000..f9f348b --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/d_prod.c @@ -0,0 +1,16 @@ +#include "f2c.h" +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef KR_headers +double d_prod(x,y) real *x, *y; +#else +double d_prod(real *x, real *y) +#endif +{ +return( (*x) * (*y) ); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/d_sign.c b/min-dgels/base/F2CLIBS/libf2c/d_sign.c new file mode 100644 index 0000000..d06e0d1 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/d_sign.c @@ -0,0 +1,18 @@ +#include "f2c.h" +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef KR_headers +double d_sign(a,b) doublereal *a, *b; +#else +double d_sign(doublereal *a, doublereal *b) +#endif +{ +double x; +x = (*a >= 0 ? *a : - *a); +return( *b >= 0 ? x : -x); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/d_sin.c b/min-dgels/base/F2CLIBS/libf2c/d_sin.c new file mode 100644 index 0000000..ebd4eec --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/d_sin.c @@ -0,0 +1,19 @@ +#include "f2c.h" + +#ifdef KR_headers +double sin(); +double d_sin(x) doublereal *x; +#else +#undef abs +#include "math.h" +#ifdef __cplusplus +extern "C" { +#endif +double d_sin(doublereal *x) +#endif +{ +return( sin(*x) ); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/d_sinh.c b/min-dgels/base/F2CLIBS/libf2c/d_sinh.c new file mode 100644 index 0000000..2479a6f --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/d_sinh.c @@ -0,0 +1,19 @@ +#include "f2c.h" + +#ifdef KR_headers +double sinh(); +double d_sinh(x) doublereal *x; +#else +#undef abs +#include "math.h" +#ifdef __cplusplus +extern "C" { +#endif +double d_sinh(doublereal *x) +#endif +{ +return( sinh(*x) ); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/d_sqrt.c b/min-dgels/base/F2CLIBS/libf2c/d_sqrt.c new file mode 100644 index 0000000..a7fa66c --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/d_sqrt.c @@ -0,0 +1,19 @@ +#include "f2c.h" + +#ifdef KR_headers +double sqrt(); +double d_sqrt(x) doublereal *x; +#else +#undef abs +#include "math.h" +#ifdef __cplusplus +extern "C" { +#endif +double d_sqrt(doublereal *x) +#endif +{ +return( sqrt(*x) ); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/d_tan.c b/min-dgels/base/F2CLIBS/libf2c/d_tan.c new file mode 100644 index 0000000..7d252c4 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/d_tan.c @@ -0,0 +1,19 @@ +#include "f2c.h" + +#ifdef KR_headers +double tan(); +double d_tan(x) doublereal *x; +#else +#undef abs +#include "math.h" +#ifdef __cplusplus +extern "C" { +#endif +double d_tan(doublereal *x) +#endif +{ +return( tan(*x) ); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/d_tanh.c b/min-dgels/base/F2CLIBS/libf2c/d_tanh.c new file mode 100644 index 0000000..415b585 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/d_tanh.c @@ -0,0 +1,19 @@ +#include "f2c.h" + +#ifdef KR_headers +double tanh(); +double d_tanh(x) doublereal *x; +#else +#undef abs +#include "math.h" +#ifdef __cplusplus +extern "C" { +#endif +double d_tanh(doublereal *x) +#endif +{ +return( tanh(*x) ); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/derf_.c b/min-dgels/base/F2CLIBS/libf2c/derf_.c new file mode 100644 index 0000000..d935d31 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/derf_.c @@ -0,0 +1,18 @@ +#include "f2c.h" +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef KR_headers +double erf(); +double derf_(x) doublereal *x; +#else +extern double erf(double); +double derf_(doublereal *x) +#endif +{ +return( erf(*x) ); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/derfc_.c b/min-dgels/base/F2CLIBS/libf2c/derfc_.c new file mode 100644 index 0000000..18f5c61 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/derfc_.c @@ -0,0 +1,20 @@ +#include "f2c.h" +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef KR_headers +extern double erfc(); + +double derfc_(x) doublereal *x; +#else +extern double erfc(double); + +double derfc_(doublereal *x) +#endif +{ +return( erfc(*x) ); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/dfe.c b/min-dgels/base/F2CLIBS/libf2c/dfe.c new file mode 100644 index 0000000..c6b10d0 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/dfe.c @@ -0,0 +1,151 @@ +#include "f2c.h" +#include "fio.h" +#include "fmt.h" +#ifdef __cplusplus +extern "C" { +#endif + + int +y_rsk(Void) +{ + if(f__curunit->uend || f__curunit->url <= f__recpos + || f__curunit->url == 1) return 0; + do { + getc(f__cf); + } while(++f__recpos < f__curunit->url); + return 0; +} + + int +y_getc(Void) +{ + int ch; + if(f__curunit->uend) return(-1); + if((ch=getc(f__cf))!=EOF) + { + f__recpos++; + if(f__curunit->url>=f__recpos || + f__curunit->url==1) + return(ch); + else return(' '); + } + if(feof(f__cf)) + { + f__curunit->uend=1; + errno=0; + return(-1); + } + err(f__elist->cierr,errno,"readingd"); +} + + static int +y_rev(Void) +{ + if (f__recpos < f__hiwater) + f__recpos = f__hiwater; + if (f__curunit->url > 1) + while(f__recpos < f__curunit->url) + (*f__putn)(' '); + if (f__recpos) + f__putbuf(0); + f__recpos = 0; + return(0); +} + + static int +y_err(Void) +{ + err(f__elist->cierr, 110, "dfe"); +} + + static int +y_newrec(Void) +{ + y_rev(); + f__hiwater = f__cursor = 0; + return(1); +} + + int +#ifdef KR_headers +c_dfe(a) cilist *a; +#else +c_dfe(cilist *a) +#endif +{ + f__sequential=0; + f__formatted=f__external=1; + f__elist=a; + f__cursor=f__scale=f__recpos=0; + f__curunit = &f__units[a->ciunit]; + if(a->ciunit>MXUNIT || a->ciunit<0) + err(a->cierr,101,"startchk"); + if(f__curunit->ufd==NULL && fk_open(DIR,FMT,a->ciunit)) + err(a->cierr,104,"dfe"); + f__cf=f__curunit->ufd; + if(!f__curunit->ufmt) err(a->cierr,102,"dfe") + if(!f__curunit->useek) err(a->cierr,104,"dfe") + f__fmtbuf=a->cifmt; + if(a->cirec <= 0) + err(a->cierr,130,"dfe") + FSEEK(f__cf,(OFF_T)f__curunit->url * (a->cirec-1),SEEK_SET); + f__curunit->uend = 0; + return(0); +} +#ifdef KR_headers +integer s_rdfe(a) cilist *a; +#else +integer s_rdfe(cilist *a) +#endif +{ + int n; + if(!f__init) f_init(); + f__reading=1; + if(n=c_dfe(a))return(n); + if(f__curunit->uwrt && f__nowreading(f__curunit)) + err(a->cierr,errno,"read start"); + f__getn = y_getc; + f__doed = rd_ed; + f__doned = rd_ned; + f__dorevert = f__donewrec = y_err; + f__doend = y_rsk; + if(pars_f(f__fmtbuf)<0) + err(a->cierr,100,"read start"); + fmt_bg(); + return(0); +} +#ifdef KR_headers +integer s_wdfe(a) cilist *a; +#else +integer s_wdfe(cilist *a) +#endif +{ + int n; + if(!f__init) f_init(); + f__reading=0; + if(n=c_dfe(a)) return(n); + if(f__curunit->uwrt != 1 && f__nowwriting(f__curunit)) + err(a->cierr,errno,"startwrt"); + f__putn = x_putc; + f__doed = w_ed; + f__doned= w_ned; + f__dorevert = y_err; + f__donewrec = y_newrec; + f__doend = y_rev; + if(pars_f(f__fmtbuf)<0) + err(a->cierr,100,"startwrt"); + fmt_bg(); + return(0); +} +integer e_rdfe(Void) +{ + en_fio(); + return 0; +} +integer e_wdfe(Void) +{ + return en_fio(); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/dolio.c b/min-dgels/base/F2CLIBS/libf2c/dolio.c new file mode 100644 index 0000000..4070d87 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/dolio.c @@ -0,0 +1,26 @@ +#include "f2c.h" +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef __cplusplus +extern "C" { +#endif +#ifdef KR_headers +extern int (*f__lioproc)(); + +integer do_lio(type,number,ptr,len) ftnint *number,*type; char *ptr; ftnlen len; +#else +extern int (*f__lioproc)(ftnint*, char*, ftnlen, ftnint); + +integer do_lio(ftnint *type, ftnint *number, char *ptr, ftnlen len) +#endif +{ + return((*f__lioproc)(number,ptr,len,*type)); +} +#ifdef __cplusplus + } +#endif +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/dtime_.c b/min-dgels/base/F2CLIBS/libf2c/dtime_.c new file mode 100644 index 0000000..6a09b3e --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/dtime_.c @@ -0,0 +1,63 @@ +#include "time.h" + +#ifdef MSDOS +#undef USE_CLOCK +#define USE_CLOCK +#endif + +#ifndef REAL +#define REAL double +#endif + +#ifndef USE_CLOCK +#define _INCLUDE_POSIX_SOURCE /* for HP-UX */ +#define _INCLUDE_XOPEN_SOURCE /* for HP-UX */ +#include "sys/types.h" +#include "sys/times.h" +#ifdef __cplusplus +extern "C" { +#endif +#endif + +#undef Hz +#ifdef CLK_TCK +#define Hz CLK_TCK +#else +#ifdef HZ +#define Hz HZ +#else +#define Hz 60 +#endif +#endif + + REAL +#ifdef KR_headers +dtime_(tarray) float *tarray; +#else +dtime_(float *tarray) +#endif +{ +#ifdef USE_CLOCK +#ifndef CLOCKS_PER_SECOND +#define CLOCKS_PER_SECOND Hz +#endif + static double t0; + double t = clock(); + tarray[1] = 0; + tarray[0] = (t - t0) / CLOCKS_PER_SECOND; + t0 = t; + return tarray[0]; +#else + struct tms t; + static struct tms t0; + + times(&t); + tarray[0] = (double)(t.tms_utime - t0.tms_utime) / Hz; + tarray[1] = (double)(t.tms_stime - t0.tms_stime) / Hz; + t0 = t; + return tarray[0] + tarray[1]; +#endif + } +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/due.c b/min-dgels/base/F2CLIBS/libf2c/due.c new file mode 100644 index 0000000..a7f4cec --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/due.c @@ -0,0 +1,77 @@ +#include "f2c.h" +#include "fio.h" +#ifdef __cplusplus +extern "C" { +#endif + + int +#ifdef KR_headers +c_due(a) cilist *a; +#else +c_due(cilist *a) +#endif +{ + if(!f__init) f_init(); + f__sequential=f__formatted=f__recpos=0; + f__external=1; + f__curunit = &f__units[a->ciunit]; + if(a->ciunit>=MXUNIT || a->ciunit<0) + err(a->cierr,101,"startio"); + f__elist=a; + if(f__curunit->ufd==NULL && fk_open(DIR,UNF,a->ciunit) ) err(a->cierr,104,"due"); + f__cf=f__curunit->ufd; + if(f__curunit->ufmt) err(a->cierr,102,"cdue") + if(!f__curunit->useek) err(a->cierr,104,"cdue") + if(f__curunit->ufd==NULL) err(a->cierr,114,"cdue") + if(a->cirec <= 0) + err(a->cierr,130,"due") + FSEEK(f__cf,(OFF_T)(a->cirec-1)*f__curunit->url,SEEK_SET); + f__curunit->uend = 0; + return(0); +} +#ifdef KR_headers +integer s_rdue(a) cilist *a; +#else +integer s_rdue(cilist *a) +#endif +{ + int n; + f__reading=1; + if(n=c_due(a)) return(n); + if(f__curunit->uwrt && f__nowreading(f__curunit)) + err(a->cierr,errno,"read start"); + return(0); +} +#ifdef KR_headers +integer s_wdue(a) cilist *a; +#else +integer s_wdue(cilist *a) +#endif +{ + int n; + f__reading=0; + if(n=c_due(a)) return(n); + if(f__curunit->uwrt != 1 && f__nowwriting(f__curunit)) + err(a->cierr,errno,"write start"); + return(0); +} +integer e_rdue(Void) +{ + if(f__curunit->url==1 || f__recpos==f__curunit->url) + return(0); + FSEEK(f__cf,(OFF_T)(f__curunit->url-f__recpos),SEEK_CUR); + if(FTELL(f__cf)%f__curunit->url) + err(f__elist->cierr,200,"syserr"); + return(0); +} +integer e_wdue(Void) +{ +#ifdef ALWAYS_FLUSH + if (fflush(f__cf)) + err(f__elist->cierr,errno,"write end"); +#endif + return(e_rdue()); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/ef1asc_.c b/min-dgels/base/F2CLIBS/libf2c/ef1asc_.c new file mode 100644 index 0000000..70be0bc --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/ef1asc_.c @@ -0,0 +1,25 @@ +/* EFL support routine to copy string b to string a */ + +#include "f2c.h" +#ifdef __cplusplus +extern "C" { +#endif + + +#define M ( (long) (sizeof(long) - 1) ) +#define EVEN(x) ( ( (x)+ M) & (~M) ) + +#ifdef KR_headers +extern VOID s_copy(); +ef1asc_(a, la, b, lb) ftnint *a, *b; ftnlen *la, *lb; +#else +extern void s_copy(char*,char*,ftnlen,ftnlen); +int ef1asc_(ftnint *a, ftnlen *la, ftnint *b, ftnlen *lb) +#endif +{ +s_copy( (char *)a, (char *)b, EVEN(*la), *lb ); +return 0; /* ignored return value */ +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/ef1cmc_.c b/min-dgels/base/F2CLIBS/libf2c/ef1cmc_.c new file mode 100644 index 0000000..bc5388d --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/ef1cmc_.c @@ -0,0 +1,20 @@ +/* EFL support routine to compare two character strings */ + +#include "f2c.h" +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef KR_headers +extern integer s_cmp(); +integer _starpu_ef1cmc_(a, la, b, lb) ftnint *a, *b; ftnlen *la, *lb; +#else +extern integer s_cmp(char*,char*,ftnlen,ftnlen); +integer _starpu_ef1cmc_(ftnint *a, ftnlen *la, ftnint *b, ftnlen *lb) +#endif +{ +return( s_cmp( (char *)a, (char *)b, *la, *lb) ); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/endfile.c b/min-dgels/base/F2CLIBS/libf2c/endfile.c new file mode 100644 index 0000000..04020d3 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/endfile.c @@ -0,0 +1,160 @@ +#include "f2c.h" +#include "fio.h" + +/* Compile this with -DNO_TRUNCATE if unistd.h does not exist or */ +/* if it does not define int truncate(const char *name, off_t). */ + +#ifdef MSDOS +#undef NO_TRUNCATE +#define NO_TRUNCATE +#endif + +#ifndef NO_TRUNCATE +#include "unistd.h" +#endif + +#ifdef KR_headers +extern char *strcpy(); +extern FILE *tmpfile(); +#else +#undef abs +#undef min +#undef max +#include "stdlib.h" +#include "string.h" +#ifdef __cplusplus +extern "C" { +#endif +#endif + +extern char *f__r_mode[], *f__w_mode[]; + +#ifdef KR_headers +integer f_end(a) alist *a; +#else +integer f_end(alist *a) +#endif +{ + unit *b; + FILE *tf; + + if(a->aunit>=MXUNIT || a->aunit<0) err(a->aerr,101,"endfile"); + b = &f__units[a->aunit]; + if(b->ufd==NULL) { + char nbuf[10]; + sprintf(nbuf,"fort.%ld",(long)a->aunit); + if (tf = FOPEN(nbuf, f__w_mode[0])) + fclose(tf); + return(0); + } + b->uend=1; + return(b->useek ? t_runc(a) : 0); +} + +#ifdef NO_TRUNCATE + static int +#ifdef KR_headers +copy(from, len, to) FILE *from, *to; register long len; +#else +copy(FILE *from, register long len, FILE *to) +#endif +{ + int len1; + char buf[BUFSIZ]; + + while(fread(buf, len1 = len > BUFSIZ ? BUFSIZ : (int)len, 1, from)) { + if (!fwrite(buf, len1, 1, to)) + return 1; + if ((len -= len1) <= 0) + break; + } + return 0; + } +#endif /* NO_TRUNCATE */ + + int +#ifdef KR_headers +t_runc(a) alist *a; +#else +t_runc(alist *a) +#endif +{ + OFF_T loc, len; + unit *b; + int rc; + FILE *bf; +#ifdef NO_TRUNCATE + FILE *tf; +#endif + + b = &f__units[a->aunit]; + if(b->url) + return(0); /*don't truncate direct files*/ + loc=FTELL(bf = b->ufd); + FSEEK(bf,(OFF_T)0,SEEK_END); + len=FTELL(bf); + if (loc >= len || b->useek == 0) + return(0); +#ifdef NO_TRUNCATE + if (b->ufnm == NULL) + return 0; + rc = 0; + fclose(b->ufd); + if (!loc) { + if (!(bf = FOPEN(b->ufnm, f__w_mode[b->ufmt]))) + rc = 1; + if (b->uwrt) + b->uwrt = 1; + goto done; + } + if (!(bf = FOPEN(b->ufnm, f__r_mode[0])) + || !(tf = tmpfile())) { +#ifdef NON_UNIX_STDIO + bad: +#endif + rc = 1; + goto done; + } + if (copy(bf, (long)loc, tf)) { + bad1: + rc = 1; + goto done1; + } + if (!(bf = FREOPEN(b->ufnm, f__w_mode[0], bf))) + goto bad1; + rewind(tf); + if (copy(tf, (long)loc, bf)) + goto bad1; + b->uwrt = 1; + b->urw = 2; +#ifdef NON_UNIX_STDIO + if (b->ufmt) { + fclose(bf); + if (!(bf = FOPEN(b->ufnm, f__w_mode[3]))) + goto bad; + FSEEK(bf,(OFF_T)0,SEEK_END); + b->urw = 3; + } +#endif +done1: + fclose(tf); +done: + f__cf = b->ufd = bf; +#else /* NO_TRUNCATE */ + if (b->urw & 2) + fflush(b->ufd); /* necessary on some Linux systems */ +#ifndef FTRUNCATE +#define FTRUNCATE ftruncate +#endif + rc = FTRUNCATE(fileno(b->ufd), loc); + /* The following FSEEK is unnecessary on some systems, */ + /* but should be harmless. */ + FSEEK(b->ufd, (OFF_T)0, SEEK_END); +#endif /* NO_TRUNCATE */ + if (rc) + err(a->aerr,111,"endfile"); + return 0; + } +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/erf_.c b/min-dgels/base/F2CLIBS/libf2c/erf_.c new file mode 100644 index 0000000..532fec6 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/erf_.c @@ -0,0 +1,22 @@ +#include "f2c.h" +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef REAL +#define REAL double +#endif + +#ifdef KR_headers +double erf(); +REAL erf_(x) real *x; +#else +extern double erf(double); +REAL erf_(real *x) +#endif +{ +return( erf((double)*x) ); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/erfc_.c b/min-dgels/base/F2CLIBS/libf2c/erfc_.c new file mode 100644 index 0000000..6f6c9f1 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/erfc_.c @@ -0,0 +1,22 @@ +#include "f2c.h" +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef REAL +#define REAL double +#endif + +#ifdef KR_headers +double erfc(); +REAL erfc_(x) real *x; +#else +extern double erfc(double); +REAL erfc_(real *x) +#endif +{ +return( erfc((double)*x) ); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/err.c b/min-dgels/base/F2CLIBS/libf2c/err.c new file mode 100644 index 0000000..80a3b74 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/err.c @@ -0,0 +1,293 @@ +#include "sysdep1.h" /* here to get stat64 on some badly designed Linux systems */ +#include "f2c.h" +#ifdef KR_headers +#define Const /*nothing*/ +extern char *malloc(); +#else +#define Const const +#undef abs +#undef min +#undef max +#include "stdlib.h" +#endif +#include "fio.h" +#include "fmt.h" /* for struct syl */ + +/* Compile this with -DNO_ISATTY if unistd.h does not exist or */ +/* if it does not define int isatty(int). */ +#ifdef NO_ISATTY +#define isatty(x) 0 +#else +#include +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/*global definitions*/ +unit f__units[MXUNIT]; /*unit table*/ +flag f__init; /*0 on entry, 1 after initializations*/ +cilist *f__elist; /*active external io list*/ +icilist *f__svic; /*active internal io list*/ +flag f__reading; /*1 if reading, 0 if writing*/ +flag f__cplus,f__cblank; +Const char *f__fmtbuf; +flag f__external; /*1 if external io, 0 if internal */ +#ifdef KR_headers +int (*f__doed)(),(*f__doned)(); +int (*f__doend)(),(*f__donewrec)(),(*f__dorevert)(); +int (*f__getn)(); /* for formatted input */ +void (*f__putn)(); /* for formatted output */ +#else +int (*f__getn)(void); /* for formatted input */ +void (*f__putn)(int); /* for formatted output */ +int (*f__doed)(struct syl*, char*, ftnlen),(*f__doned)(struct syl*); +int (*f__dorevert)(void),(*f__donewrec)(void),(*f__doend)(void); +#endif +flag f__sequential; /*1 if sequential io, 0 if direct*/ +flag f__formatted; /*1 if formatted io, 0 if unformatted*/ +FILE *f__cf; /*current file*/ +unit *f__curunit; /*current unit*/ +int f__recpos; /*place in current record*/ +OFF_T f__cursor, f__hiwater; +int f__scale; +char *f__icptr; + +/*error messages*/ +Const char *F_err[] = +{ + "error in format", /* 100 */ + "illegal unit number", /* 101 */ + "formatted io not allowed", /* 102 */ + "unformatted io not allowed", /* 103 */ + "direct io not allowed", /* 104 */ + "sequential io not allowed", /* 105 */ + "can't backspace file", /* 106 */ + "null file name", /* 107 */ + "can't stat file", /* 108 */ + "unit not connected", /* 109 */ + "off end of record", /* 110 */ + "truncation failed in endfile", /* 111 */ + "incomprehensible list input", /* 112 */ + "out of free space", /* 113 */ + "unit not connected", /* 114 */ + "read unexpected character", /* 115 */ + "bad logical input field", /* 116 */ + "bad variable type", /* 117 */ + "bad namelist name", /* 118 */ + "variable not in namelist", /* 119 */ + "no end record", /* 120 */ + "variable count incorrect", /* 121 */ + "subscript for scalar variable", /* 122 */ + "invalid array section", /* 123 */ + "substring out of bounds", /* 124 */ + "subscript out of bounds", /* 125 */ + "can't read file", /* 126 */ + "can't write file", /* 127 */ + "'new' file exists", /* 128 */ + "can't append to file", /* 129 */ + "non-positive record number", /* 130 */ + "nmLbuf overflow" /* 131 */ +}; +#define MAXERR (sizeof(F_err)/sizeof(char *)+100) + + int +#ifdef KR_headers +f__canseek(f) FILE *f; /*SYSDEP*/ +#else +f__canseek(FILE *f) /*SYSDEP*/ +#endif +{ +#ifdef NON_UNIX_STDIO + return !isatty(fileno(f)); +#else + struct STAT_ST x; + + if (FSTAT(fileno(f),&x) < 0) + return(0); +#ifdef S_IFMT + switch(x.st_mode & S_IFMT) { + case S_IFDIR: + case S_IFREG: + if(x.st_nlink > 0) /* !pipe */ + return(1); + else + return(0); + case S_IFCHR: + if(isatty(fileno(f))) + return(0); + return(1); +#ifdef S_IFBLK + case S_IFBLK: + return(1); +#endif + } +#else +#ifdef S_ISDIR + /* POSIX version */ + if (S_ISREG(x.st_mode) || S_ISDIR(x.st_mode)) { + if(x.st_nlink > 0) /* !pipe */ + return(1); + else + return(0); + } + if (S_ISCHR(x.st_mode)) { + if(isatty(fileno(f))) + return(0); + return(1); + } + if (S_ISBLK(x.st_mode)) + return(1); +#else + Help! How does fstat work on this system? +#endif +#endif + return(0); /* who knows what it is? */ +#endif +} + + void +#ifdef KR_headers +f__fatal(n,s) char *s; +#else +f__fatal(int n, const char *s) +#endif +{ + if(n<100 && n>=0) perror(s); /*SYSDEP*/ + else if(n >= (int)MAXERR || n < -1) + { fprintf(stderr,"%s: illegal error number %d\n",s,n); + } + else if(n == -1) fprintf(stderr,"%s: end of file\n",s); + else + fprintf(stderr,"%s: %s\n",s,F_err[n-100]); + if (f__curunit) { + fprintf(stderr,"apparent state: unit %d ", + (int)(f__curunit-f__units)); + fprintf(stderr, f__curunit->ufnm ? "named %s\n" : "(unnamed)\n", + f__curunit->ufnm); + } + else + fprintf(stderr,"apparent state: internal I/O\n"); + if (f__fmtbuf) + fprintf(stderr,"last format: %s\n",f__fmtbuf); + fprintf(stderr,"lately %s %s %s %s",f__reading?"reading":"writing", + f__sequential?"sequential":"direct",f__formatted?"formatted":"unformatted", + f__external?"external":"internal"); + sig_die(" IO", 1); +} +/*initialization routine*/ + VOID +f_init(Void) +{ unit *p; + + f__init=1; + p= &f__units[0]; + p->ufd=stderr; + p->useek=f__canseek(stderr); + p->ufmt=1; + p->uwrt=1; + p = &f__units[5]; + p->ufd=stdin; + p->useek=f__canseek(stdin); + p->ufmt=1; + p->uwrt=0; + p= &f__units[6]; + p->ufd=stdout; + p->useek=f__canseek(stdout); + p->ufmt=1; + p->uwrt=1; +} + + int +#ifdef KR_headers +f__nowreading(x) unit *x; +#else +f__nowreading(unit *x) +#endif +{ + OFF_T loc; + int ufmt, urw; + extern char *f__r_mode[], *f__w_mode[]; + + if (x->urw & 1) + goto done; + if (!x->ufnm) + goto cantread; + ufmt = x->url ? 0 : x->ufmt; + loc = FTELL(x->ufd); + urw = 3; + if (!FREOPEN(x->ufnm, f__w_mode[ufmt|2], x->ufd)) { + urw = 1; + if(!FREOPEN(x->ufnm, f__r_mode[ufmt], x->ufd)) { + cantread: + errno = 126; + return 1; + } + } + FSEEK(x->ufd,loc,SEEK_SET); + x->urw = urw; + done: + x->uwrt = 0; + return 0; +} + + int +#ifdef KR_headers +f__nowwriting(x) unit *x; +#else +f__nowwriting(unit *x) +#endif +{ + OFF_T loc; + int ufmt; + extern char *f__w_mode[]; + + if (x->urw & 2) { + if (x->urw & 1) + FSEEK(x->ufd, (OFF_T)0, SEEK_CUR); + goto done; + } + if (!x->ufnm) + goto cantwrite; + ufmt = x->url ? 0 : x->ufmt; + if (x->uwrt == 3) { /* just did write, rewind */ + if (!(f__cf = x->ufd = + FREOPEN(x->ufnm,f__w_mode[ufmt],x->ufd))) + goto cantwrite; + x->urw = 2; + } + else { + loc=FTELL(x->ufd); + if (!(f__cf = x->ufd = + FREOPEN(x->ufnm, f__w_mode[ufmt | 2], x->ufd))) + { + x->ufd = NULL; + cantwrite: + errno = 127; + return(1); + } + x->urw = 3; + FSEEK(x->ufd,loc,SEEK_SET); + } + done: + x->uwrt = 1; + return 0; +} + + int +#ifdef KR_headers +err__fl(f, m, s) int f, m; char *s; +#else +err__fl(int f, int m, const char *s) +#endif +{ + if (!f) + f__fatal(m, s); + if (f__doend) + (*f__doend)(); + return errno = m; + } +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/etime_.c b/min-dgels/base/F2CLIBS/libf2c/etime_.c new file mode 100644 index 0000000..2d9a36d --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/etime_.c @@ -0,0 +1,57 @@ +#include "time.h" + +#ifdef MSDOS +#undef USE_CLOCK +#define USE_CLOCK +#endif + +#ifndef REAL +#define REAL double +#endif + +#ifndef USE_CLOCK +#define _INCLUDE_POSIX_SOURCE /* for HP-UX */ +#define _INCLUDE_XOPEN_SOURCE /* for HP-UX */ +#include "sys/types.h" +#include "sys/times.h" +#ifdef __cplusplus +extern "C" { +#endif +#endif + +#undef Hz +#ifdef CLK_TCK +#define Hz CLK_TCK +#else +#ifdef HZ +#define Hz HZ +#else +#define Hz 60 +#endif +#endif + + REAL +#ifdef KR_headers +etime_(tarray) float *tarray; +#else +etime_(float *tarray) +#endif +{ +#ifdef USE_CLOCK +#ifndef CLOCKS_PER_SECOND +#define CLOCKS_PER_SECOND Hz +#endif + double t = clock(); + tarray[1] = 0; + return tarray[0] = t / CLOCKS_PER_SECOND; +#else + struct tms t; + + times(&t); + return (tarray[0] = (double)t.tms_utime/Hz) + + (tarray[1] = (double)t.tms_stime/Hz); +#endif + } +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/exit_.c b/min-dgels/base/F2CLIBS/libf2c/exit_.c new file mode 100644 index 0000000..08e9d07 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/exit_.c @@ -0,0 +1,43 @@ +/* This gives the effect of + + subroutine exit(rc) + integer*4 rc + stop + end + + * with the added side effect of supplying rc as the program's exit code. + */ + +#include "f2c.h" +#undef abs +#undef min +#undef max +#ifndef KR_headers +#include "stdlib.h" +#ifdef __cplusplus +extern "C" { +#endif +#ifdef __cplusplus +extern "C" { +#endif +extern void f_exit(void); +#endif + + void +#ifdef KR_headers +exit_(rc) integer *rc; +#else +exit_(integer *rc) +#endif +{ +#ifdef NO_ONEXIT + f_exit(); +#endif + exit(*rc); + } +#ifdef __cplusplus +} +#endif +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/f2c.h b/min-dgels/base/F2CLIBS/libf2c/f2c.h new file mode 100644 index 0000000..b94ee7c --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/f2c.h @@ -0,0 +1,223 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +typedef long int integer; +typedef unsigned long int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +typedef long int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; +#ifdef INTEGER_STAR_8 /* Adjust for integer*8. */ +typedef long long longint; /* system-dependent */ +typedef unsigned long long ulongint; /* system-dependent */ +#define qbit_clear(a,b) ((a) & ~((ulongint)1 << (b))) +#define qbit_set(a,b) ((a) | ((ulongint)1 << (b))) +#endif + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +#ifdef f2c_i2 +/* for -i2 */ +typedef short flag; +typedef short ftnlen; +typedef short ftnint; +#else +typedef long int flag; +typedef long int ftnlen; +typedef long int ftnint; +#endif + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +/*typedef long int Long;*/ /* No longer used; formerly in Namelist */ + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (doublereal)abs(x) +#define min(a,b) ((a) <= (b) ? (a) : (b)) +#define max(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (doublereal)min(a,b) +#define dmax(a,b) (doublereal)max(a,b) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef int /* Unknown procedure type */ (*U_fp)(...); +typedef shortint (*J_fp)(...); +typedef integer (*I_fp)(...); +typedef real (*R_fp)(...); +typedef doublereal (*D_fp)(...), (*E_fp)(...); +typedef /* Complex */ VOID (*C_fp)(...); +typedef /* Double Complex */ VOID (*Z_fp)(...); +typedef logical (*L_fp)(...); +typedef shortlogical (*K_fp)(...); +typedef /* Character */ VOID (*H_fp)(...); +typedef /* Subroutine */ int (*S_fp)(...); +#else +typedef int /* Unknown procedure type */ (*U_fp)(); +typedef shortint (*J_fp)(); +typedef integer (*I_fp)(); +typedef real (*R_fp)(); +typedef doublereal (*D_fp)(), (*E_fp)(); +typedef /* Complex */ VOID (*C_fp)(); +typedef /* Double Complex */ VOID (*Z_fp)(); +typedef logical (*L_fp)(); +typedef shortlogical (*K_fp)(); +typedef /* Character */ VOID (*H_fp)(); +typedef /* Subroutine */ int (*S_fp)(); +#endif +/* E_fp is for real functions when -R is not specified */ +typedef VOID C_f; /* complex function */ +typedef VOID H_f; /* character function */ +typedef VOID Z_f; /* double complex function */ +typedef doublereal E_f; /* real function with -R not specified */ + +/* undef any lower-case symbols that your C compiler predefines, e.g.: */ + +#ifndef Skip_f2c_Undefs +#undef cray +#undef gcos +#undef mc68010 +#undef mc68020 +#undef mips +#undef pdp11 +#undef sgi +#undef sparc +#undef sun +#undef sun2 +#undef sun3 +#undef sun4 +#undef u370 +#undef u3b +#undef u3b2 +#undef u3b5 +#undef unix +#undef vax +#endif +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/f2c.h0 b/min-dgels/base/F2CLIBS/libf2c/f2c.h0 new file mode 100644 index 0000000..b94ee7c --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/f2c.h0 @@ -0,0 +1,223 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +typedef long int integer; +typedef unsigned long int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +typedef long int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; +#ifdef INTEGER_STAR_8 /* Adjust for integer*8. */ +typedef long long longint; /* system-dependent */ +typedef unsigned long long ulongint; /* system-dependent */ +#define qbit_clear(a,b) ((a) & ~((ulongint)1 << (b))) +#define qbit_set(a,b) ((a) | ((ulongint)1 << (b))) +#endif + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +#ifdef f2c_i2 +/* for -i2 */ +typedef short flag; +typedef short ftnlen; +typedef short ftnint; +#else +typedef long int flag; +typedef long int ftnlen; +typedef long int ftnint; +#endif + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +/*typedef long int Long;*/ /* No longer used; formerly in Namelist */ + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (doublereal)abs(x) +#define min(a,b) ((a) <= (b) ? (a) : (b)) +#define max(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (doublereal)min(a,b) +#define dmax(a,b) (doublereal)max(a,b) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef int /* Unknown procedure type */ (*U_fp)(...); +typedef shortint (*J_fp)(...); +typedef integer (*I_fp)(...); +typedef real (*R_fp)(...); +typedef doublereal (*D_fp)(...), (*E_fp)(...); +typedef /* Complex */ VOID (*C_fp)(...); +typedef /* Double Complex */ VOID (*Z_fp)(...); +typedef logical (*L_fp)(...); +typedef shortlogical (*K_fp)(...); +typedef /* Character */ VOID (*H_fp)(...); +typedef /* Subroutine */ int (*S_fp)(...); +#else +typedef int /* Unknown procedure type */ (*U_fp)(); +typedef shortint (*J_fp)(); +typedef integer (*I_fp)(); +typedef real (*R_fp)(); +typedef doublereal (*D_fp)(), (*E_fp)(); +typedef /* Complex */ VOID (*C_fp)(); +typedef /* Double Complex */ VOID (*Z_fp)(); +typedef logical (*L_fp)(); +typedef shortlogical (*K_fp)(); +typedef /* Character */ VOID (*H_fp)(); +typedef /* Subroutine */ int (*S_fp)(); +#endif +/* E_fp is for real functions when -R is not specified */ +typedef VOID C_f; /* complex function */ +typedef VOID H_f; /* character function */ +typedef VOID Z_f; /* double complex function */ +typedef doublereal E_f; /* real function with -R not specified */ + +/* undef any lower-case symbols that your C compiler predefines, e.g.: */ + +#ifndef Skip_f2c_Undefs +#undef cray +#undef gcos +#undef mc68010 +#undef mc68020 +#undef mips +#undef pdp11 +#undef sgi +#undef sparc +#undef sun +#undef sun2 +#undef sun3 +#undef sun4 +#undef u370 +#undef u3b +#undef u3b2 +#undef u3b5 +#undef unix +#undef vax +#endif +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/f2ch.add b/min-dgels/base/F2CLIBS/libf2c/f2ch.add new file mode 100644 index 0000000..9947249 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/f2ch.add @@ -0,0 +1,162 @@ +/* If you are using a C++ compiler, append the following to f2c.h + for compiling libF77 and libI77. */ + +#ifdef __cplusplus +extern "C" { +extern int abort_(void); +extern double c_abs(complex *); +extern void c_cos(complex *, complex *); +extern void c_div(complex *, complex *, complex *); +extern void c_exp(complex *, complex *); +extern void c_log(complex *, complex *); +extern void c_sin(complex *, complex *); +extern void c_sqrt(complex *, complex *); +extern double d_abs(double *); +extern double d_acos(double *); +extern double d_asin(double *); +extern double d_atan(double *); +extern double d_atn2(double *, double *); +extern void d_cnjg(doublecomplex *, doublecomplex *); +extern double d_cos(double *); +extern double d_cosh(double *); +extern double d_dim(double *, double *); +extern double d_exp(double *); +extern double d_imag(doublecomplex *); +extern double d_int(double *); +extern double d_lg10(double *); +extern double d_log(double *); +extern double d_mod(double *, double *); +extern double d_nint(double *); +extern double d_prod(float *, float *); +extern double d_sign(double *, double *); +extern double d_sin(double *); +extern double d_sinh(double *); +extern double d_sqrt(double *); +extern double d_tan(double *); +extern double d_tanh(double *); +extern double derf_(double *); +extern double derfc_(double *); +extern integer do_fio(ftnint *, char *, ftnlen); +extern integer do_lio(ftnint *, ftnint *, char *, ftnlen); +extern integer do_uio(ftnint *, char *, ftnlen); +extern integer e_rdfe(void); +extern integer e_rdue(void); +extern integer e_rsfe(void); +extern integer e_rsfi(void); +extern integer e_rsle(void); +extern integer e_rsli(void); +extern integer e_rsue(void); +extern integer e_wdfe(void); +extern integer e_wdue(void); +extern integer e_wsfe(void); +extern integer e_wsfi(void); +extern integer e_wsle(void); +extern integer e_wsli(void); +extern integer e_wsue(void); +extern int ef1asc_(ftnint *, ftnlen *, ftnint *, ftnlen *); +extern integer _starpu_ef1cmc_(ftnint *, ftnlen *, ftnint *, ftnlen *); +extern double erf(double); +extern double erf_(float *); +extern double erfc(double); +extern double erfc_(float *); +extern integer f_back(alist *); +extern integer f_clos(cllist *); +extern integer f_end(alist *); +extern void f_exit(void); +extern integer f_inqu(inlist *); +extern integer f_open(olist *); +extern integer f_rew(alist *); +extern int flush_(void); +extern void getarg_(integer *, char *, ftnlen); +extern void getenv_(char *, char *, ftnlen, ftnlen); +extern short h_abs(short *); +extern short h_dim(short *, short *); +extern short h_dnnt(double *); +extern short h_indx(char *, char *, ftnlen, ftnlen); +extern short h_len(char *, ftnlen); +extern short h_mod(short *, short *); +extern short h_nint(float *); +extern short h_sign(short *, short *); +extern short hl_ge(char *, char *, ftnlen, ftnlen); +extern short hl_gt(char *, char *, ftnlen, ftnlen); +extern short hl_le(char *, char *, ftnlen, ftnlen); +extern short hl_lt(char *, char *, ftnlen, ftnlen); +extern integer i_abs(integer *); +extern integer i_dim(integer *, integer *); +extern integer i_dnnt(double *); +extern integer i_indx(char *, char *, ftnlen, ftnlen); +extern integer i_len(char *, ftnlen); +extern integer i_mod(integer *, integer *); +extern integer i_nint(float *); +extern integer i_sign(integer *, integer *); +extern integer iargc_(void); +extern ftnlen l_ge(char *, char *, ftnlen, ftnlen); +extern ftnlen l_gt(char *, char *, ftnlen, ftnlen); +extern ftnlen l_le(char *, char *, ftnlen, ftnlen); +extern ftnlen l_lt(char *, char *, ftnlen, ftnlen); +extern void pow_ci(complex *, complex *, integer *); +extern double pow_dd(double *, double *); +extern double pow_di(double *, integer *); +extern short pow_hh(short *, shortint *); +extern integer pow_ii(integer *, integer *); +extern double pow_ri(float *, integer *); +extern void pow_zi(doublecomplex *, doublecomplex *, integer *); +extern void pow_zz(doublecomplex *, doublecomplex *, doublecomplex *); +extern double r_abs(float *); +extern double r_acos(float *); +extern double r_asin(float *); +extern double r_atan(float *); +extern double r_atn2(float *, float *); +extern void r_cnjg(complex *, complex *); +extern double r_cos(float *); +extern double r_cosh(float *); +extern double r_dim(float *, float *); +extern double r_exp(float *); +extern double r_imag(complex *); +extern double r_int(float *); +extern double r_lg10(float *); +extern double r_log(float *); +extern double r_mod(float *, float *); +extern double r_nint(float *); +extern double r_sign(float *, float *); +extern double r_sin(float *); +extern double r_sinh(float *); +extern double r_sqrt(float *); +extern double r_tan(float *); +extern double r_tanh(float *); +extern void s_cat(char *, char **, integer *, integer *, ftnlen); +extern integer s_cmp(char *, char *, ftnlen, ftnlen); +extern void s_copy(char *, char *, ftnlen, ftnlen); +extern int s_paus(char *, ftnlen); +extern integer s_rdfe(cilist *); +extern integer s_rdue(cilist *); +extern integer s_rnge(char *, integer, char *, integer); +extern integer s_rsfe(cilist *); +extern integer s_rsfi(icilist *); +extern integer s_rsle(cilist *); +extern integer s_rsli(icilist *); +extern integer s_rsne(cilist *); +extern integer s_rsni(icilist *); +extern integer s_rsue(cilist *); +extern int s_stop(char *, ftnlen); +extern integer s_wdfe(cilist *); +extern integer s_wdue(cilist *); +extern integer s_wsfe(cilist *); +extern integer s_wsfi(icilist *); +extern integer s_wsle(cilist *); +extern integer s_wsli(icilist *); +extern integer s_wsne(cilist *); +extern integer s_wsni(icilist *); +extern integer s_wsue(cilist *); +extern void sig_die(char *, int); +extern integer signal_(integer *, void (*)(int)); +extern integer system_(char *, ftnlen); +extern double z_abs(doublecomplex *); +extern void z_cos(doublecomplex *, doublecomplex *); +extern void z_div(doublecomplex *, doublecomplex *, doublecomplex *); +extern void z_exp(doublecomplex *, doublecomplex *); +extern void z_log(doublecomplex *, doublecomplex *); +extern void z_sin(doublecomplex *, doublecomplex *); +extern void z_sqrt(doublecomplex *, doublecomplex *); + } +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/f77_aloc.c b/min-dgels/base/F2CLIBS/libf2c/f77_aloc.c new file mode 100644 index 0000000..f536099 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/f77_aloc.c @@ -0,0 +1,44 @@ +#include "f2c.h" +#undef abs +#undef min +#undef max +#include "stdio.h" + +static integer memfailure = 3; + +#ifdef KR_headers +extern char *malloc(); +extern void exit_(); + + char * +F77_aloc(Len, whence) integer Len; char *whence; +#else +#include "stdlib.h" +#ifdef __cplusplus +extern "C" { +#endif +#ifdef __cplusplus +extern "C" { +#endif +extern void exit_(integer*); +#ifdef __cplusplus + } +#endif + + char * +F77_aloc(integer Len, const char *whence) +#endif +{ + char *rv; + unsigned int uLen = (unsigned int) Len; /* for K&R C */ + + if (!(rv = (char*)malloc(uLen))) { + fprintf(stderr, "malloc(%u) failure in %s\n", + uLen, whence); + exit_(&memfailure); + } + return rv; + } +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/f77vers.c b/min-dgels/base/F2CLIBS/libf2c/f77vers.c new file mode 100644 index 0000000..70cd6fe --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/f77vers.c @@ -0,0 +1,97 @@ + char +_libf77_version_f2c[] = "\n@(#) LIBF77 VERSION (f2c) 20051004\n"; + +/* +2.00 11 June 1980. File version.c added to library. +2.01 31 May 1988. s_paus() flushes stderr; names of hl_* fixed + [ d]erf[c ] added + 8 Aug. 1989: #ifdefs for f2c -i2 added to s_cat.c + 29 Nov. 1989: s_cmp returns long (for f2c) + 30 Nov. 1989: arg types from f2c.h + 12 Dec. 1989: s_rnge allows long names + 19 Dec. 1989: getenv_ allows unsorted environment + 28 Mar. 1990: add exit(0) to end of main() + 2 Oct. 1990: test signal(...) == SIG_IGN rather than & 01 in main + 17 Oct. 1990: abort() calls changed to sig_die(...,1) + 22 Oct. 1990: separate sig_die from main + 25 Apr. 1991: minor, theoretically invisible tweaks to s_cat, sig_die + 31 May 1991: make system_ return status + 18 Dec. 1991: change long to ftnlen (for -i2) many places + 28 Feb. 1992: repair z_sqrt.c (scribbled on input, gave wrong answer) + 18 July 1992: for n < 0, repair handling of 0**n in pow_[dr]i.c + and m**n in pow_hh.c and pow_ii.c; + catch SIGTRAP in main() for error msg before abort + 23 July 1992: switch to ANSI prototypes unless KR_headers is #defined + 23 Oct. 1992: fix botch in signal_.c (erroneous deref of 2nd arg); + change Cabs to f__cabs. + 12 March 1993: various tweaks for C++ + 2 June 1994: adjust so abnormal terminations invoke f_exit just once + 16 Sept. 1994: s_cmp: treat characters as unsigned in comparisons. + 19 Sept. 1994: s_paus: flush after end of PAUSE; add -DMSDOS + 12 Jan. 1995: pow_[dhiqrz][hiq]: adjust x**i to work on machines + that sign-extend right shifts when i is the most + negative integer. + 26 Jan. 1995: adjust s_cat.c, s_copy.c to permit the left-hand side + of character assignments to appear on the right-hand + side (unless compiled with -DNO_OVERWRITE). + 27 Jan. 1995: minor tweak to s_copy.c: copy forward whenever + possible (for better cache behavior). + 30 May 1995: added subroutine exit(rc) integer rc. Version not changed. + 29 Aug. 1995: add F77_aloc.c; use it in s_cat.c and system_.c. + 6 Sept. 1995: fix return type of system_ under -DKR_headers. + 19 Dec. 1995: s_cat.c: fix bug when 2nd or later arg overlaps lhs. + 19 Mar. 1996: s_cat.c: supply missing break after overlap detection. + 13 May 1996: add [lq]bitbits.c and [lq]bitshft.c (f90 bit intrinsics). + 19 June 1996: add casts to unsigned in [lq]bitshft.c. + 26 Feb. 1997: adjust functions with a complex output argument + to permit aliasing it with input arguments. + (For now, at least, this is just for possible + benefit of g77.) + 4 April 1997: [cz]_div.c: tweaks invisible on most systems (that may + affect systems using gratuitous extra precision). + 19 Sept. 1997: [de]time_.c (Unix systems only): change return + type to double. + 2 May 1999: getenv_.c: omit environ in favor of getenv(). + c_cos.c, c_exp.c, c_sin.c, d_cnjg.c, r_cnjg.c, + z_cos.c, z_exp.c, z_log.c, z_sin.c: cope fully with + overlapping arguments caused by equivalence. + 3 May 1999: "invisible" tweaks to omit compiler warnings in + abort_.c, ef1asc_.c, s_rnge.c, s_stop.c. + + 7 Sept. 1999: [cz]_div.c: arrange for compilation under + -DIEEE_COMPLEX_DIVIDE to make these routines + avoid calling sig_die when the denominator + vanishes; instead, they return pairs of NaNs + or Infinities, depending whether the numerator + also vanishes or not. VERSION not changed. + 15 Nov. 1999: s_rnge.c: add casts for the case of + sizeof(ftnint) == sizeof(int) < sizeof(long). + 10 March 2000: z_log.c: improve accuracy of Real(log(z)) for, e.g., + z near (+-1,eps) with |eps| small. For the old + evaluation, compile with -DPre20000310 . + 20 April 2000: s_cat.c: tweak argument types to accord with + calls by f2c when ftnint and ftnlen are of + different sizes (different numbers of bits). + 4 July 2000: adjustments to permit compilation by C++ compilers; + VERSION string remains unchanged. + 29 Sept. 2000: dtime_.c, etime_.c: use floating-point divide. + dtime_.d, erf_.c, erfc_.c, etime.c: for use with + "f2c -R", compile with -DREAL=float. + 23 June 2001: add uninit.c; [fi]77vers.c: make version strings + visible as extern char _lib[fi]77_version_f2c[]. + 5 July 2001: modify uninit.c for __mc68k__ under Linux. + 16 Nov. 2001: uninit.c: Linux Power PC logic supplied by Alan Bain. + 18 Jan. 2002: fix glitches in qbit_bits(): wrong return type, + missing ~ on y in return value. + 14 March 2002: z_log.c: add code to cope with buggy compilers + (e.g., some versions of gcc under -O2 or -O3) + that do floating-point comparisons against values + computed into extended-precision registers on some + systems (such as Intel IA32 systems). Compile with + -DNO_DOUBLE_EXTENDED to omit the new logic. + 4 Oct. 2002: uninit.c: on IRIX systems, omit use of shell variables. + 10 Oct 2005: uninit.c: on IA32 Linux systems, leave the rounding + precision alone rather than forcing it to 53 bits; + compile with -DUNINIT_F2C_PRECISION_53 to get the + former behavior. +*/ diff --git a/min-dgels/base/F2CLIBS/libf2c/fio.h b/min-dgels/base/F2CLIBS/libf2c/fio.h new file mode 100644 index 0000000..ebf7696 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/fio.h @@ -0,0 +1,141 @@ +#ifndef SYSDEP_H_INCLUDED +#include "sysdep1.h" +#endif +#include "stdio.h" +#include "errno.h" +#ifndef NULL +/* ANSI C */ +#include "stddef.h" +#endif + +#ifndef SEEK_SET +#define SEEK_SET 0 +#define SEEK_CUR 1 +#define SEEK_END 2 +#endif + +#ifndef FOPEN +#define FOPEN fopen +#endif + +#ifndef FREOPEN +#define FREOPEN freopen +#endif + +#ifndef FSEEK +#define FSEEK fseek +#endif + +#ifndef FSTAT +#define FSTAT fstat +#endif + +#ifndef FTELL +#define FTELL ftell +#endif + +#ifndef OFF_T +#define OFF_T long +#endif + +#ifndef STAT_ST +#define STAT_ST stat +#endif + +#ifndef STAT +#define STAT stat +#endif + +#ifdef MSDOS +#ifndef NON_UNIX_STDIO +#define NON_UNIX_STDIO +#endif +#endif + +#ifdef UIOLEN_int +typedef int uiolen; +#else +typedef long uiolen; +#endif + +/*units*/ +typedef struct +{ FILE *ufd; /*0=unconnected*/ + char *ufnm; +#ifndef MSDOS + long uinode; + int udev; +#endif + int url; /*0=sequential*/ + flag useek; /*true=can backspace, use dir, ...*/ + flag ufmt; + flag urw; /* (1 for can read) | (2 for can write) */ + flag ublnk; + flag uend; + flag uwrt; /*last io was write*/ + flag uscrtch; +} unit; + +#undef Void +#ifdef KR_headers +#define Void /*void*/ +extern int (*f__getn)(); /* for formatted input */ +extern void (*f__putn)(); /* for formatted output */ +extern void x_putc(); +extern long f__inode(); +extern VOID sig_die(); +extern int (*f__donewrec)(), t_putc(), x_wSL(); +extern int c_sfe(), err__fl(), xrd_SL(), f__putbuf(); +#else +#define Void void +#ifdef __cplusplus +extern "C" { +#endif +extern int (*f__getn)(void); /* for formatted input */ +extern void (*f__putn)(int); /* for formatted output */ +extern void x_putc(int); +extern long f__inode(char*,int*); +extern void sig_die(const char*,int); +extern void f__fatal(int, const char*); +extern int t_runc(alist*); +extern int f__nowreading(unit*), f__nowwriting(unit*); +extern int fk_open(int,int,ftnint); +extern int en_fio(void); +extern void f_init(void); +extern int (*f__donewrec)(void), t_putc(int), x_wSL(void); +extern void b_char(const char*,char*,ftnlen), g_char(const char*,ftnlen,char*); +extern int c_sfe(cilist*), z_rnew(void); +extern int err__fl(int,int,const char*); +extern int xrd_SL(void); +extern int f__putbuf(int); +#endif +extern flag f__init; +extern cilist *f__elist; /*active external io list*/ +extern flag f__reading,f__external,f__sequential,f__formatted; +extern int (*f__doend)(Void); +extern FILE *f__cf; /*current file*/ +extern unit *f__curunit; /*current unit*/ +extern unit f__units[]; +#define err(f,m,s) {if(f) errno= m; else f__fatal(m,s); return(m);} +#define errfl(f,m,s) return err__fl((int)f,m,s) + +/*Table sizes*/ +#define MXUNIT 100 + +extern int f__recpos; /*position in current record*/ +extern OFF_T f__cursor; /* offset to move to */ +extern OFF_T f__hiwater; /* so TL doesn't confuse us */ +#ifdef __cplusplus + } +#endif + +#define WRITE 1 +#define READ 2 +#define SEQ 3 +#define DIR 4 +#define FMT 5 +#define UNF 6 +#define EXT 7 +#define INT 8 + +#define buf_end(x) (x->_flag & _IONBF ? x->_ptr : x->_base + BUFSIZ) diff --git a/min-dgels/base/F2CLIBS/libf2c/fmt.c b/min-dgels/base/F2CLIBS/libf2c/fmt.c new file mode 100644 index 0000000..286c98f --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/fmt.c @@ -0,0 +1,530 @@ +#include "f2c.h" +#include "fio.h" +#include "fmt.h" +#ifdef __cplusplus +extern "C" { +#endif +#define skip(s) while(*s==' ') s++ +#ifdef interdata +#define SYLMX 300 +#endif +#ifdef pdp11 +#define SYLMX 300 +#endif +#ifdef vax +#define SYLMX 300 +#endif +#ifndef SYLMX +#define SYLMX 300 +#endif +#define GLITCH '\2' + /* special quote character for stu */ +extern flag f__cblank,f__cplus; /*blanks in I and compulsory plus*/ +static struct syl f__syl[SYLMX]; +int f__parenlvl,f__pc,f__revloc; +#ifdef KR_headers +#define Const /*nothing*/ +#else +#define Const const +#endif + + static +#ifdef KR_headers +char *ap_end(s) char *s; +#else +const char *ap_end(const char *s) +#endif +{ char quote; + quote= *s++; + for(;*s;s++) + { if(*s!=quote) continue; + if(*++s!=quote) return(s); + } + if(f__elist->cierr) { + errno = 100; + return(NULL); + } + f__fatal(100, "bad string"); + /*NOTREACHED*/ return 0; +} + static int +#ifdef KR_headers +op_gen(a,b,c,d) +#else +op_gen(int a, int b, int c, int d) +#endif +{ struct syl *p= &f__syl[f__pc]; + if(f__pc>=SYLMX) + { fprintf(stderr,"format too complicated:\n"); + sig_die(f__fmtbuf, 1); + } + p->op=a; + p->p1=b; + p->p2.i[0]=c; + p->p2.i[1]=d; + return(f__pc++); +} +#ifdef KR_headers +static char *f_list(); +static char *gt_num(s,n,n1) char *s; int *n, n1; +#else +static const char *f_list(const char*); +static const char *gt_num(const char *s, int *n, int n1) +#endif +{ int m=0,f__cnt=0; + char c; + for(c= *s;;c = *s) + { if(c==' ') + { s++; + continue; + } + if(c>'9' || c<'0') break; + m=10*m+c-'0'; + f__cnt++; + s++; + } + if(f__cnt==0) { + if (!n1) + s = 0; + *n=n1; + } + else *n=m; + return(s); +} + + static +#ifdef KR_headers +char *f_s(s,curloc) char *s; +#else +const char *f_s(const char *s, int curloc) +#endif +{ + skip(s); + if(*s++!='(') + { + return(NULL); + } + if(f__parenlvl++ ==1) f__revloc=curloc; + if(op_gen(RET1,curloc,0,0)<0 || + (s=f_list(s))==NULL) + { + return(NULL); + } + skip(s); + return(s); +} + + static int +#ifdef KR_headers +ne_d(s,p) char *s,**p; +#else +ne_d(const char *s, const char **p) +#endif +{ int n,x,sign=0; + struct syl *sp; + switch(*s) + { + default: + return(0); + case ':': (void) op_gen(COLON,0,0,0); break; + case '$': + (void) op_gen(NONL, 0, 0, 0); break; + case 'B': + case 'b': + if(*++s=='z' || *s == 'Z') (void) op_gen(BZ,0,0,0); + else (void) op_gen(BN,0,0,0); + break; + case 'S': + case 's': + if(*(s+1)=='s' || *(s+1) == 'S') + { x=SS; + s++; + } + else if(*(s+1)=='p' || *(s+1) == 'P') + { x=SP; + s++; + } + else x=S; + (void) op_gen(x,0,0,0); + break; + case '/': (void) op_gen(SLASH,0,0,0); break; + case '-': sign=1; + case '+': s++; /*OUTRAGEOUS CODING TRICK*/ + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + if (!(s=gt_num(s,&n,0))) { + bad: *p = 0; + return 1; + } + switch(*s) + { + default: + return(0); + case 'P': + case 'p': if(sign) n= -n; (void) op_gen(P,n,0,0); break; + case 'X': + case 'x': (void) op_gen(X,n,0,0); break; + case 'H': + case 'h': + sp = &f__syl[op_gen(H,n,0,0)]; + sp->p2.s = (char*)s + 1; + s+=n; + break; + } + break; + case GLITCH: + case '"': + case '\'': + sp = &f__syl[op_gen(APOS,0,0,0)]; + sp->p2.s = (char*)s; + if((*p = ap_end(s)) == NULL) + return(0); + return(1); + case 'T': + case 't': + if(*(s+1)=='l' || *(s+1) == 'L') + { x=TL; + s++; + } + else if(*(s+1)=='r'|| *(s+1) == 'R') + { x=TR; + s++; + } + else x=T; + if (!(s=gt_num(s+1,&n,0))) + goto bad; + s--; + (void) op_gen(x,n,0,0); + break; + case 'X': + case 'x': (void) op_gen(X,1,0,0); break; + case 'P': + case 'p': (void) op_gen(P,1,0,0); break; + } + s++; + *p=s; + return(1); +} + + static int +#ifdef KR_headers +e_d(s,p) char *s,**p; +#else +e_d(const char *s, const char **p) +#endif +{ int i,im,n,w,d,e,found=0,x=0; + Const char *sv=s; + s=gt_num(s,&n,1); + (void) op_gen(STACK,n,0,0); + switch(*s++) + { + default: break; + case 'E': + case 'e': x=1; + case 'G': + case 'g': + found=1; + if (!(s=gt_num(s,&w,0))) { + bad: + *p = 0; + return 1; + } + if(w==0) break; + if(*s=='.') { + if (!(s=gt_num(s+1,&d,0))) + goto bad; + } + else d=0; + if(*s!='E' && *s != 'e') + (void) op_gen(x==1?E:G,w,d,0); /* default is Ew.dE2 */ + else { + if (!(s=gt_num(s+1,&e,0))) + goto bad; + (void) op_gen(x==1?EE:GE,w,d,e); + } + break; + case 'O': + case 'o': + i = O; + im = OM; + goto finish_I; + case 'Z': + case 'z': + i = Z; + im = ZM; + goto finish_I; + case 'L': + case 'l': + found=1; + if (!(s=gt_num(s,&w,0))) + goto bad; + if(w==0) break; + (void) op_gen(L,w,0,0); + break; + case 'A': + case 'a': + found=1; + skip(s); + if(*s>='0' && *s<='9') + { s=gt_num(s,&w,1); + if(w==0) break; + (void) op_gen(AW,w,0,0); + break; + } + (void) op_gen(A,0,0,0); + break; + case 'F': + case 'f': + if (!(s=gt_num(s,&w,0))) + goto bad; + found=1; + if(w==0) break; + if(*s=='.') { + if (!(s=gt_num(s+1,&d,0))) + goto bad; + } + else d=0; + (void) op_gen(F,w,d,0); + break; + case 'D': + case 'd': + found=1; + if (!(s=gt_num(s,&w,0))) + goto bad; + if(w==0) break; + if(*s=='.') { + if (!(s=gt_num(s+1,&d,0))) + goto bad; + } + else d=0; + (void) op_gen(D,w,d,0); + break; + case 'I': + case 'i': + i = I; + im = IM; + finish_I: + if (!(s=gt_num(s,&w,0))) + goto bad; + found=1; + if(w==0) break; + if(*s!='.') + { (void) op_gen(i,w,0,0); + break; + } + if (!(s=gt_num(s+1,&d,0))) + goto bad; + (void) op_gen(im,w,d,0); + break; + } + if(found==0) + { f__pc--; /*unSTACK*/ + *p=sv; + return(0); + } + *p=s; + return(1); +} + static +#ifdef KR_headers +char *i_tem(s) char *s; +#else +const char *i_tem(const char *s) +#endif +{ const char *t; + int n,curloc; + if(*s==')') return(s); + if(ne_d(s,&t)) return(t); + if(e_d(s,&t)) return(t); + s=gt_num(s,&n,1); + if((curloc=op_gen(STACK,n,0,0))<0) return(NULL); + return(f_s(s,curloc)); +} + + static +#ifdef KR_headers +char *f_list(s) char *s; +#else +const char *f_list(const char *s) +#endif +{ + for(;*s!=0;) + { skip(s); + if((s=i_tem(s))==NULL) return(NULL); + skip(s); + if(*s==',') s++; + else if(*s==')') + { if(--f__parenlvl==0) + { + (void) op_gen(REVERT,f__revloc,0,0); + return(++s); + } + (void) op_gen(GOTO,0,0,0); + return(++s); + } + } + return(NULL); +} + + int +#ifdef KR_headers +pars_f(s) char *s; +#else +pars_f(const char *s) +#endif +{ + f__parenlvl=f__revloc=f__pc=0; + if(f_s(s,0) == NULL) + { + return(-1); + } + return(0); +} +#define STKSZ 10 +int f__cnt[STKSZ],f__ret[STKSZ],f__cp,f__rp; +flag f__workdone, f__nonl; + + static int +#ifdef KR_headers +type_f(n) +#else +type_f(int n) +#endif +{ + switch(n) + { + default: + return(n); + case RET1: + return(RET1); + case REVERT: return(REVERT); + case GOTO: return(GOTO); + case STACK: return(STACK); + case X: + case SLASH: + case APOS: case H: + case T: case TL: case TR: + return(NED); + case F: + case I: + case IM: + case A: case AW: + case O: case OM: + case L: + case E: case EE: case D: + case G: case GE: + case Z: case ZM: + return(ED); + } +} +#ifdef KR_headers +integer do_fio(number,ptr,len) ftnint *number; ftnlen len; char *ptr; +#else +integer do_fio(ftnint *number, char *ptr, ftnlen len) +#endif +{ struct syl *p; + int n,i; + for(i=0;i<*number;i++,ptr+=len) + { +loop: switch(type_f((p= &f__syl[f__pc])->op)) + { + default: + fprintf(stderr,"unknown code in do_fio: %d\n%s\n", + p->op,f__fmtbuf); + err(f__elist->cierr,100,"do_fio"); + case NED: + if((*f__doned)(p)) + { f__pc++; + goto loop; + } + f__pc++; + continue; + case ED: + if(f__cnt[f__cp]<=0) + { f__cp--; + f__pc++; + goto loop; + } + if(ptr==NULL) + return((*f__doend)()); + f__cnt[f__cp]--; + f__workdone=1; + if((n=(*f__doed)(p,ptr,len))>0) + errfl(f__elist->cierr,errno,"fmt"); + if(n<0) + err(f__elist->ciend,(EOF),"fmt"); + continue; + case STACK: + f__cnt[++f__cp]=p->p1; + f__pc++; + goto loop; + case RET1: + f__ret[++f__rp]=p->p1; + f__pc++; + goto loop; + case GOTO: + if(--f__cnt[f__cp]<=0) + { f__cp--; + f__rp--; + f__pc++; + goto loop; + } + f__pc=1+f__ret[f__rp--]; + goto loop; + case REVERT: + f__rp=f__cp=0; + f__pc = p->p1; + if(ptr==NULL) + return((*f__doend)()); + if(!f__workdone) return(0); + if((n=(*f__dorevert)()) != 0) return(n); + goto loop; + case COLON: + if(ptr==NULL) + return((*f__doend)()); + f__pc++; + goto loop; + case NONL: + f__nonl = 1; + f__pc++; + goto loop; + case S: + case SS: + f__cplus=0; + f__pc++; + goto loop; + case SP: + f__cplus = 1; + f__pc++; + goto loop; + case P: f__scale=p->p1; + f__pc++; + goto loop; + case BN: + f__cblank=0; + f__pc++; + goto loop; + case BZ: + f__cblank=1; + f__pc++; + goto loop; + } + } + return(0); +} + + int +en_fio(Void) +{ ftnint one=1; + return(do_fio(&one,(char *)NULL,(ftnint)0)); +} + + VOID +fmt_bg(Void) +{ + f__workdone=f__cp=f__rp=f__pc=f__cursor=0; + f__cnt[0]=f__ret[0]=0; +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/fmt.h b/min-dgels/base/F2CLIBS/libf2c/fmt.h new file mode 100644 index 0000000..ddfa551 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/fmt.h @@ -0,0 +1,105 @@ +struct syl +{ int op; + int p1; + union { int i[2]; char *s;} p2; + }; +#define RET1 1 +#define REVERT 2 +#define GOTO 3 +#define X 4 +#define SLASH 5 +#define STACK 6 +#define I 7 +#define ED 8 +#define NED 9 +#define IM 10 +#define APOS 11 +#define H 12 +#define TL 13 +#define TR 14 +#define T 15 +#define COLON 16 +#define S 17 +#define SP 18 +#define SS 19 +#define P 20 +#define BN 21 +#define BZ 22 +#define F 23 +#define E 24 +#define EE 25 +#define D 26 +#define G 27 +#define GE 28 +#define L 29 +#define A 30 +#define AW 31 +#define O 32 +#define NONL 33 +#define OM 34 +#define Z 35 +#define ZM 36 +typedef union +{ real pf; + doublereal pd; +} ufloat; +typedef union +{ short is; +#ifndef KR_headers + signed +#endif + char ic; + integer il; +#ifdef Allow_TYQUAD + longint ili; +#endif +} Uint; +#ifdef KR_headers +extern int (*f__doed)(),(*f__doned)(); +extern int (*f__dorevert)(); +extern int rd_ed(),rd_ned(); +extern int w_ed(),w_ned(); +extern int signbit_f2c(); +extern char *f__fmtbuf; +#else +#ifdef __cplusplus +extern "C" { +#define Cextern extern "C" +#else +#define Cextern extern +#endif +extern const char *f__fmtbuf; +extern int (*f__doed)(struct syl*, char*, ftnlen),(*f__doned)(struct syl*); +extern int (*f__dorevert)(void); +extern void fmt_bg(void); +extern int pars_f(const char*); +extern int rd_ed(struct syl*, char*, ftnlen),rd_ned(struct syl*); +extern int signbit_f2c(double*); +extern int w_ed(struct syl*, char*, ftnlen),w_ned(struct syl*); +extern int wrt_E(ufloat*, int, int, int, ftnlen); +extern int wrt_F(ufloat*, int, int, ftnlen); +extern int wrt_L(Uint*, int, ftnlen); +#endif +extern int f__pc,f__parenlvl,f__revloc; +extern flag f__cblank,f__cplus,f__workdone, f__nonl; +extern int f__scale; +#ifdef __cplusplus + } +#endif +#define GET(x) if((x=(*f__getn)())<0) return(x) +#define VAL(x) (x!='\n'?x:' ') +#define PUT(x) (*f__putn)(x) + +#undef TYQUAD +#ifndef Allow_TYQUAD +#undef longint +#define longint long +#else +#define TYQUAD 14 +#endif + +#ifdef KR_headers +extern char *f__icvt(); +#else +Cextern char *f__icvt(longint, int*, int*, int); +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/fmtlib.c b/min-dgels/base/F2CLIBS/libf2c/fmtlib.c new file mode 100644 index 0000000..279f66f --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/fmtlib.c @@ -0,0 +1,51 @@ +/* @(#)fmtlib.c 1.2 */ +#define MAXINTLENGTH 23 + +#include "f2c.h" +#ifdef __cplusplus +extern "C" { +#endif +#ifndef Allow_TYQUAD +#undef longint +#define longint long +#undef ulongint +#define ulongint unsigned long +#endif + +#ifdef KR_headers +char *f__icvt(value,ndigit,sign, base) longint value; int *ndigit,*sign; + register int base; +#else +char *f__icvt(longint value, int *ndigit, int *sign, int base) +#endif +{ + static char buf[MAXINTLENGTH+1]; + register int i; + ulongint uvalue; + + if(value > 0) { + uvalue = value; + *sign = 0; + } + else if (value < 0) { + uvalue = -value; + *sign = 1; + } + else { + *sign = 0; + *ndigit = 1; + buf[MAXINTLENGTH-1] = '0'; + return &buf[MAXINTLENGTH-1]; + } + i = MAXINTLENGTH; + do { + buf[--i] = (uvalue%base) + '0'; + uvalue /= base; + } + while(uvalue > 0); + *ndigit = MAXINTLENGTH - i; + return &buf[i]; + } +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/fp.h b/min-dgels/base/F2CLIBS/libf2c/fp.h new file mode 100644 index 0000000..40743d7 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/fp.h @@ -0,0 +1,28 @@ +#define FMAX 40 +#define EXPMAXDIGS 8 +#define EXPMAX 99999999 +/* FMAX = max number of nonzero digits passed to atof() */ +/* EXPMAX = 10^EXPMAXDIGS - 1 = largest allowed exponent absolute value */ + +#ifdef V10 /* Research Tenth-Edition Unix */ +#include "local.h" +#endif + +/* MAXFRACDIGS and MAXINTDIGS are for wrt_F -- bounds (not necessarily + tight) on the maximum number of digits to the right and left of + * the decimal point. + */ + +#ifdef VAX +#define MAXFRACDIGS 56 +#define MAXINTDIGS 38 +#else +#ifdef CRAY +#define MAXFRACDIGS 9880 +#define MAXINTDIGS 9864 +#else +/* values that suffice for IEEE double */ +#define MAXFRACDIGS 344 +#define MAXINTDIGS 308 +#endif +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/ftell64_.c b/min-dgels/base/F2CLIBS/libf2c/ftell64_.c new file mode 100644 index 0000000..9cc00cb --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/ftell64_.c @@ -0,0 +1,52 @@ +#include "f2c.h" +#include "fio.h" +#ifdef __cplusplus +extern "C" { +#endif + + static FILE * +#ifdef KR_headers +unit_chk(Unit, who) integer Unit; char *who; +#else +unit_chk(integer Unit, char *who) +#endif +{ + if (Unit >= MXUNIT || Unit < 0) + f__fatal(101, who); + return f__units[Unit].ufd; + } + + longint +#ifdef KR_headers +ftell64_(Unit) integer *Unit; +#else +ftell64_(integer *Unit) +#endif +{ + FILE *f; + return (f = unit_chk(*Unit, "ftell")) ? FTELL(f) : -1L; + } + + int +#ifdef KR_headers +fseek64_(Unit, offset, whence) integer *Unit, *whence; longint *offset; +#else +fseek64_(integer *Unit, longint *offset, integer *whence) +#endif +{ + FILE *f; + int w = (int)*whence; +#ifdef SEEK_SET + static int wohin[3] = { SEEK_SET, SEEK_CUR, SEEK_END }; +#endif + if (w < 0 || w > 2) + w = 0; +#ifdef SEEK_SET + w = wohin[w]; +#endif + return !(f = unit_chk(*Unit, "fseek")) + || FSEEK(f, (OFF_T)*offset, w) ? 1 : 0; + } +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/ftell_.c b/min-dgels/base/F2CLIBS/libf2c/ftell_.c new file mode 100644 index 0000000..0acd60f --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/ftell_.c @@ -0,0 +1,52 @@ +#include "f2c.h" +#include "fio.h" +#ifdef __cplusplus +extern "C" { +#endif + + static FILE * +#ifdef KR_headers +unit_chk(Unit, who) integer Unit; char *who; +#else +unit_chk(integer Unit, const char *who) +#endif +{ + if (Unit >= MXUNIT || Unit < 0) + f__fatal(101, who); + return f__units[Unit].ufd; + } + + integer +#ifdef KR_headers +ftell_(Unit) integer *Unit; +#else +ftell_(integer *Unit) +#endif +{ + FILE *f; + return (f = unit_chk(*Unit, "ftell")) ? ftell(f) : -1L; + } + + int +#ifdef KR_headers +fseek_(Unit, offset, whence) integer *Unit, *offset, *whence; +#else +fseek_(integer *Unit, integer *offset, integer *whence) +#endif +{ + FILE *f; + int w = (int)*whence; +#ifdef SEEK_SET + static int wohin[3] = { SEEK_SET, SEEK_CUR, SEEK_END }; +#endif + if (w < 0 || w > 2) + w = 0; +#ifdef SEEK_SET + w = wohin[w]; +#endif + return !(f = unit_chk(*Unit, "fseek")) + || fseek(f, *offset, w) ? 1 : 0; + } +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/getarg_.c b/min-dgels/base/F2CLIBS/libf2c/getarg_.c new file mode 100644 index 0000000..2b69a1e --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/getarg_.c @@ -0,0 +1,36 @@ +#include "f2c.h" +#ifdef __cplusplus +extern "C" { +#endif + +/* + * subroutine getarg(k, c) + * returns the kth unix command argument in fortran character + * variable argument c +*/ + +#ifdef KR_headers +VOID getarg_(n, s, ls) ftnint *n; char *s; ftnlen ls; +#define Const /*nothing*/ +#else +#define Const const +void getarg_(ftnint *n, char *s, ftnlen ls) +#endif +{ + extern int xargc; + extern char **xargv; + Const char *t; + int i; + + if(*n>=0 && *n +#include +#ifdef __cplusplus +extern "C" { +#endif +extern char *F77_aloc(ftnlen, const char*); +#endif + +/* + * getenv - f77 subroutine to return environment variables + * + * called by: + * call getenv (ENV_NAME, char_var) + * where: + * ENV_NAME is the name of an environment variable + * char_var is a character variable which will receive + * the current value of ENV_NAME, or all blanks + * if ENV_NAME is not defined + */ + +#ifdef KR_headers + VOID +getenv_(fname, value, flen, vlen) char *value, *fname; ftnlen vlen, flen; +#else + void +getenv_(char *fname, char *value, ftnlen flen, ftnlen vlen) +#endif +{ + char buf[256], *ep, *fp; + integer i; + + if (flen <= 0) + goto add_blanks; + for(i = 0; i < sizeof(buf); i++) { + if (i == flen || (buf[i] = fname[i]) == ' ') { + buf[i] = 0; + ep = getenv(buf); + goto have_ep; + } + } + while(i < flen && fname[i] != ' ') + i++; + strncpy(fp = F77_aloc(i+1, "getenv_"), fname, (int)i); + fp[i] = 0; + ep = getenv(fp); + free(fp); + have_ep: + if (ep) + while(*ep && vlen-- > 0) + *value++ = *ep++; + add_blanks: + while(vlen-- > 0) + *value++ = ' '; + } +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/h_abs.c b/min-dgels/base/F2CLIBS/libf2c/h_abs.c new file mode 100644 index 0000000..db69068 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/h_abs.c @@ -0,0 +1,18 @@ +#include "f2c.h" +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef KR_headers +shortint h_abs(x) shortint *x; +#else +shortint h_abs(shortint *x) +#endif +{ +if(*x >= 0) + return(*x); +return(- *x); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/h_dim.c b/min-dgels/base/F2CLIBS/libf2c/h_dim.c new file mode 100644 index 0000000..443427a --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/h_dim.c @@ -0,0 +1,16 @@ +#include "f2c.h" +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef KR_headers +shortint h_dim(a,b) shortint *a, *b; +#else +shortint h_dim(shortint *a, shortint *b) +#endif +{ +return( *a > *b ? *a - *b : 0); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/h_dnnt.c b/min-dgels/base/F2CLIBS/libf2c/h_dnnt.c new file mode 100644 index 0000000..1ec641c --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/h_dnnt.c @@ -0,0 +1,19 @@ +#include "f2c.h" + +#ifdef KR_headers +double floor(); +shortint h_dnnt(x) doublereal *x; +#else +#undef abs +#include "math.h" +#ifdef __cplusplus +extern "C" { +#endif +shortint h_dnnt(doublereal *x) +#endif +{ +return (shortint)(*x >= 0. ? floor(*x + .5) : -floor(.5 - *x)); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/h_indx.c b/min-dgels/base/F2CLIBS/libf2c/h_indx.c new file mode 100644 index 0000000..018f2f4 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/h_indx.c @@ -0,0 +1,32 @@ +#include "f2c.h" +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef KR_headers +shortint h_indx(a, b, la, lb) char *a, *b; ftnlen la, lb; +#else +shortint h_indx(char *a, char *b, ftnlen la, ftnlen lb) +#endif +{ +ftnlen i, n; +char *s, *t, *bend; + +n = la - lb + 1; +bend = b + lb; + +for(i = 0 ; i < n ; ++i) + { + s = a + i; + t = b; + while(t < bend) + if(*s++ != *t++) + goto no; + return((shortint)i+1); + no: ; + } +return(0); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/h_len.c b/min-dgels/base/F2CLIBS/libf2c/h_len.c new file mode 100644 index 0000000..8b0aea9 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/h_len.c @@ -0,0 +1,16 @@ +#include "f2c.h" +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef KR_headers +shortint h_len(s, n) char *s; ftnlen n; +#else +shortint h_len(char *s, ftnlen n) +#endif +{ +return(n); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/h_mod.c b/min-dgels/base/F2CLIBS/libf2c/h_mod.c new file mode 100644 index 0000000..611ef0a --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/h_mod.c @@ -0,0 +1,16 @@ +#include "f2c.h" +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef KR_headers +shortint h_mod(a,b) short *a, *b; +#else +shortint h_mod(short *a, short *b) +#endif +{ +return( *a % *b); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/h_nint.c b/min-dgels/base/F2CLIBS/libf2c/h_nint.c new file mode 100644 index 0000000..9e2282f --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/h_nint.c @@ -0,0 +1,19 @@ +#include "f2c.h" + +#ifdef KR_headers +double floor(); +shortint h_nint(x) real *x; +#else +#undef abs +#include "math.h" +#ifdef __cplusplus +extern "C" { +#endif +shortint h_nint(real *x) +#endif +{ +return (shortint)(*x >= 0 ? floor(*x + .5) : -floor(.5 - *x)); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/h_sign.c b/min-dgels/base/F2CLIBS/libf2c/h_sign.c new file mode 100644 index 0000000..4e21438 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/h_sign.c @@ -0,0 +1,18 @@ +#include "f2c.h" +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef KR_headers +shortint h_sign(a,b) shortint *a, *b; +#else +shortint h_sign(shortint *a, shortint *b) +#endif +{ +shortint x; +x = (*a >= 0 ? *a : - *a); +return( *b >= 0 ? x : -x); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/hl_ge.c b/min-dgels/base/F2CLIBS/libf2c/hl_ge.c new file mode 100644 index 0000000..8c72f03 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/hl_ge.c @@ -0,0 +1,18 @@ +#include "f2c.h" +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef KR_headers +extern integer s_cmp(); +shortlogical hl_ge(a,b,la,lb) char *a, *b; ftnlen la, lb; +#else +extern integer s_cmp(char *, char *, ftnlen, ftnlen); +shortlogical hl_ge(char *a, char *b, ftnlen la, ftnlen lb) +#endif +{ +return(s_cmp(a,b,la,lb) >= 0); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/hl_gt.c b/min-dgels/base/F2CLIBS/libf2c/hl_gt.c new file mode 100644 index 0000000..a448522 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/hl_gt.c @@ -0,0 +1,18 @@ +#include "f2c.h" +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef KR_headers +extern integer s_cmp(); +shortlogical hl_gt(a,b,la,lb) char *a, *b; ftnlen la, lb; +#else +extern integer s_cmp(char *, char *, ftnlen, ftnlen); +shortlogical hl_gt(char *a, char *b, ftnlen la, ftnlen lb) +#endif +{ +return(s_cmp(a,b,la,lb) > 0); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/hl_le.c b/min-dgels/base/F2CLIBS/libf2c/hl_le.c new file mode 100644 index 0000000..31cbc43 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/hl_le.c @@ -0,0 +1,18 @@ +#include "f2c.h" +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef KR_headers +extern integer s_cmp(); +shortlogical hl_le(a,b,la,lb) char *a, *b; ftnlen la, lb; +#else +extern integer s_cmp(char *, char *, ftnlen, ftnlen); +shortlogical hl_le(char *a, char *b, ftnlen la, ftnlen lb) +#endif +{ +return(s_cmp(a,b,la,lb) <= 0); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/hl_lt.c b/min-dgels/base/F2CLIBS/libf2c/hl_lt.c new file mode 100644 index 0000000..7ad3c71 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/hl_lt.c @@ -0,0 +1,18 @@ +#include "f2c.h" +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef KR_headers +extern integer s_cmp(); +shortlogical hl_lt(a,b,la,lb) char *a, *b; ftnlen la, lb; +#else +extern integer s_cmp(char *, char *, ftnlen, ftnlen); +shortlogical hl_lt(char *a, char *b, ftnlen la, ftnlen lb) +#endif +{ +return(s_cmp(a,b,la,lb) < 0); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/i77vers.c b/min-dgels/base/F2CLIBS/libf2c/i77vers.c new file mode 100644 index 0000000..60cc24e --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/i77vers.c @@ -0,0 +1,343 @@ + char +_libi77_version_f2c[] = "\n@(#) LIBI77 VERSION (f2c) pjw,dmg-mods 20030321\n"; + +/* +2.01 $ format added +2.02 Coding bug in open.c repaired +2.03 fixed bugs in lread.c (read * with negative f-format) and lio.c + and lio.h (e-format conforming to spec) +2.04 changed open.c and err.c (fopen and freopen respectively) to + update to new c-library (append mode) +2.05 added namelist capability +2.06 allow internal list and namelist I/O +*/ + +/* +close.c: + allow upper-case STATUS= values +endfile.c + create fort.nnn if unit nnn not open; + else if (file length == 0) use creat() rather than copy; + use local copy() rather than forking /bin/cp; + rewind, fseek to clear buffer (for no reading past EOF) +err.c + use neither setbuf nor setvbuf; make stderr buffered +fio.h + #define _bufend +inquire.c + upper case responses; + omit byfile test from SEQUENTIAL= + answer "YES" to DIRECT= for unopened file (open to debate) +lio.c + flush stderr, stdout at end of each stmt + space before character strings in list output only at line start +lio.h + adjust LEW, LED consistent with old libI77 +lread.c + use atof() + allow "nnn*," when reading complex constants +open.c + try opening for writing when open for read fails, with + special uwrt value (2) delaying creat() to first write; + set curunit so error messages don't drop core; + no file name ==> fort.nnn except for STATUS='SCRATCH' +rdfmt.c + use atof(); trust EOF == end-of-file (so don't read past + end-of-file after endfile stmt) +sfe.c + flush stderr, stdout at end of each stmt +wrtfmt.c: + use upper case + put wrt_E and wrt_F into wref.c, use sprintf() + rather than ecvt() and fcvt() [more accurate on VAX] +*/ + +/* 16 Oct. 1988: uwrt = 3 after write, rewind, so close won't zap the file. */ + +/* 10 July 1989: change _bufend to buf_end in fio.h, wsfe.c, wrtfmt.c */ + +/* 28 Nov. 1989: corrections for IEEE and Cray arithmetic */ +/* 29 Nov. 1989: change various int return types to long for f2c */ +/* 30 Nov. 1989: various types from f2c.h */ +/* 6 Dec. 1989: types corrected various places */ +/* 19 Dec. 1989: make iostat= work right for internal I/O */ +/* 8 Jan. 1990: add rsne, wsne -- routines for handling NAMELIST */ +/* 28 Jan. 1990: have NAMELIST read treat $ as &, general white + space as blank */ +/* 27 Mar. 1990: change an = to == in rd_L(rdfmt.c) so formatted reads + of logical values reject letters other than fFtT; + have nowwriting reset cf */ +/* 14 Aug. 1990: adjust lread.c to treat tabs as spaces in list input */ +/* 17 Aug. 1990: adjust open.c to recognize blank='Z...' as well as + blank='z...' when reopening an open file */ +/* 30 Aug. 1990: prevent embedded blanks in list output of complex values; + omit exponent field in list output of values of + magnitude between 10 and 1e8; prevent writing stdin + and reading stdout or stderr; don't close stdin, stdout, + or stderr when reopening units 5, 6, 0. */ +/* 18 Sep. 1990: add component udev to unit and consider old == new file + iff uinode and udev values agree; use stat rather than + access to check existence of file (when STATUS='OLD')*/ +/* 2 Oct. 1990: adjust rewind.c so two successive rewinds after a write + don't clobber the file. */ +/* 9 Oct. 1990: add #include "fcntl.h" to endfile.c, err.c, open.c; + adjust g_char in util.c for segmented memories. */ +/* 17 Oct. 1990: replace abort() and _cleanup() with calls on + sig_die(...,1) (defined in main.c). */ +/* 5 Nov. 1990: changes to open.c: complain if new= is specified and the + file already exists; allow file= to be omitted in open stmts + and allow status='replace' (Fortran 90 extensions). */ +/* 11 Dec. 1990: adjustments for POSIX. */ +/* 15 Jan. 1991: tweak i_ungetc in rsli.c to allow reading from + strings in read-only memory. */ +/* 25 Apr. 1991: adjust namelist stuff to work with f2c -i2 */ +/* 26 Apr. 1991: fix some bugs with NAMELIST read of multi-dim. arrays */ +/* 16 May 1991: increase LEFBL in lio.h to bypass NeXT bug */ +/* 17 Oct. 1991: change type of length field in sequential unformatted + records from int to long (for systems where sizeof(int) + can vary, depending on the compiler or compiler options). */ +/* 14 Nov. 1991: change uint to Uint in fmt.h, rdfmt.c, wrtfmt.c. */ +/* 25 Nov. 1991: change uint to Uint in lwrite.c; change sizeof(int) to + sizeof(uioint) in fseeks in sue.c (missed on 17 Oct.). */ +/* 1 Dec. 1991: uio.c: add test for read failure (seq. unformatted reads); + adjust an error return from EOF to off end of record */ +/* 12 Dec. 1991: rsli.c: fix bug with internal list input that caused + the last character of each record to be ignored. + iio.c: adjust error message in internal formatted + input from "end-of-file" to "off end of record" if + the format specifies more characters than the + record contains. */ +/* 17 Jan. 1992: lread.c, rsne.c: in list and namelist input, + treat "r* ," and "r*," alike (where r is a + positive integer constant), and fix a bug in + handling null values following items with repeat + counts (e.g., 2*1,,3); for namelist reading + of a numeric array, allow a new name-value subsequence + to terminate the current one (as though the current + one ended with the right number of null values). + lio.h, lwrite.c: omit insignificant zeros in + list and namelist output. To get the old + behavior, compile with -DOld_list_output . */ +/* 18 Jan. 1992: make list output consistent with F format by + printing .1 rather than 0.1 (introduced yesterday). */ +/* 3 Feb. 1992: rsne.c: fix namelist read bug that caused the + character following a comma to be ignored. */ +/* 19 May 1992: adjust iio.c, ilnw.c, rdfmt.c and rsli.c to make err= + work with internal list and formatted I/O. */ +/* 18 July 1992: adjust rsne.c to allow namelist input to stop at + an & (e.g. &end). */ +/* 23 July 1992: switch to ANSI prototypes unless KR_headers is #defined ; + recognize Z format (assuming 8-bit bytes). */ +/* 14 Aug. 1992: tweak wrt_E in wref.c to avoid -NaN */ +/* 23 Oct. 1992: Supply missing l_eof = 0 assignment to s_rsne() in rsne.c + (so end-of-file on other files won't confuse namelist + reads of external files). Prepend f__ to external + names that are only of internal interest to lib[FI]77. */ +/* 1 Feb. 1993: backspace.c: fix bug that bit when last char of 2nd + buffer == '\n'. + endfile.c: guard against tiny L_tmpnam; close and reopen + files in t_runc(). + lio.h: lengthen LINTW (buffer size in lwrite.c). + err.c, open.c: more prepending of f__ (to [rw]_mode). */ +/* 5 Feb. 1993: tweaks to NAMELIST: rsne.c: ? prints the namelist being + sought; namelists of the wrong name are skipped (after + an error message; xwsne.c: namelist writes have a + newline before each new variable. + open.c: ACCESS='APPEND' positions sequential files + at EOF (nonstandard extension -- that doesn't require + changing data structures). */ +/* 9 Feb. 1993: Change some #ifdef MSDOS lines to #ifdef NON_UNIX_STDIO. + err.c: under NON_UNIX_STDIO, avoid close(creat(name,0666)) + when the unit has another file descriptor for name. */ +/* 4 March 1993: err.c, open.c: take declaration of fdopen from rawio.h; + open.c: always give f__w_mode[] 4 elements for use + in t_runc (in endfile.c -- for change of 1 Feb. 1993). */ +/* 6 March 1993: uio.c: adjust off-end-of-record test for sequential + unformatted reads to respond to err= rather than end=. */ +/* 12 March 1993: various tweaks for C++ */ +/* 6 April 1993: adjust error returns for formatted inputs to flush + the current input line when err=label is specified. + To restore the old behavior (input left mid-line), + either adjust the #definition of errfl in fio.h or + omit the invocation of f__doend in err__fl (in err.c). */ +/* 23 June 1993: iio.c: fix bug in format reversions for internal writes. */ +/* 5 Aug. 1993: lread.c: fix bug in handling repetition counts for + logical data (during list or namelist input). + Change struct f__syl to struct syl (for buggy compilers). */ +/* 7 Aug. 1993: lread.c: fix bug in namelist reading of incomplete + logical arrays. */ +/* 9 Aug. 1993: lread.c: fix bug in namelist reading of an incomplete + array of numeric data followed by another namelist + item whose name starts with 'd', 'D', 'e', or 'E'. */ +/* 8 Sept. 1993: open.c: protect #include "sys/..." with + #ifndef NON_UNIX_STDIO; Version date not changed. */ +/* 10 Nov. 1993: backspace.c: add nonsense for #ifdef MSDOS */ +/* 8 Dec. 1993: iio.c: adjust internal formatted reads to treat + short records as though padded with blanks + (rather than causing an "off end of record" error). */ +/* 22 Feb. 1994: lread.c: check that realloc did not return NULL. */ +/* 6 June 1994: Under NON_UNIX_STDIO, use binary mode for direct + formatted files (avoiding any confusion regarding \n). */ +/* 5 July 1994: Fix bug (introduced 6 June 1994?) in reopening files + under NON_UNIX_STDIO. */ +/* 6 July 1994: wref.c: protect with #ifdef GOOD_SPRINTF_EXPONENT an + optimization that requires exponents to have 2 digits + when 2 digits suffice. + lwrite.c wsfe.c (list and formatted external output): + omit ' ' carriage-control when compiled with + -DOMIT_BLANK_CC . Off-by-one bug fixed in character + count for list output of character strings. + Omit '.' in list-directed printing of Nan, Infinity. */ +/* 12 July 1994: wrtfmt.c: under G11.4, write 0. as " .0000 " rather + than " .0000E+00". */ +/* 3 Aug. 1994: lwrite.c: do not insert a newline when appending an + oversize item to an empty line. */ +/* 12 Aug. 1994: rsli.c rsne.c: fix glitch (reset nml_read) that kept + ERR= (in list- or format-directed input) from working + after a NAMELIST READ. */ +/* 7 Sept. 1994: typesize.c: adjust to allow types LOGICAL*1, LOGICAL*2, + INTEGER*1, and (under -DAllow_TYQUAD) INTEGER*8 + in NAMELISTs. */ +/* 6 Oct. 1994: util.c: omit f__mvgbt, as it is never used. */ +/* 2 Nov. 1994: add #ifdef ALWAYS_FLUSH logic. */ +/* 26 Jan. 1995: wref.c: fix glitch in printing the exponent of 0 when + GOOD_SPRINTF_EXPONENT is not #defined. */ +/* 24 Feb. 1995: iio.c: z_getc: insert (unsigned char *) to allow + internal reading of characters with high-bit set + (on machines that sign-extend characters). */ +/* 14 March 1995:lread.c and rsfe.c: adjust s_rsle and s_rsfe to + check for end-of-file (to prevent infinite loops + with empty read statements). */ +/* 26 May 1995: iio.c: z_wnew: fix bug in handling T format items + in internal writes whose last item is written to + an earlier position than some previous item. */ +/* 29 Aug. 1995: backspace.c: adjust MSDOS logic. */ +/* 6 Sept. 1995: Adjust namelist input to treat a subscripted name + whose subscripts do not involve colons similarly + to the name without a subscript: accept several + values, stored in successive elements starting at + the indicated subscript. Adjust namelist output + to quote character strings (avoiding confusion with + arrays of character strings). Adjust f_init calls + for people who don't use libF77's main(); now open and + namelist read statements invoke f_init if needed. */ +/* 7 Sept. 1995: Fix some bugs with -DAllow_TYQUAD (for integer*8). + Add -DNo_Namelist_Comments lines to rsne.c. */ +/* 5 Oct. 1995: wrtfmt.c: fix bug with t editing (f__cursor was not + always zeroed in mv_cur). */ +/* 11 Oct. 1995: move defs of f__hiwater, f__svic, f__icptr from wrtfmt.c + to err.c */ +/* 15 Mar. 1996: lread.c, rsfe.c: honor END= in READ stmt with empty iolist */ + +/* 13 May 1996: add ftell_.c and fseek_.c */ +/* 9 June 1996: Adjust rsli.c and lread.c so internal list input with + too few items in the input string will honor end= . */ +/* 12 Sept. 1995:fmtlib.c: fix glitch in printing the most negative integer. */ +/* 25 Sept. 1995:fmt.h: for formatted writes of negative integer*1 values, + make ic signed on ANSI systems. If formatted writes of + integer*1 values trouble you when using a K&R C compiler, + switch to an ANSI compiler or use a compiler flag that + makes characters signed. */ +/* 9 Dec. 1996: d[fu]e.c, err.c: complain about non-positive rec= + in direct read and write statements. + ftell_.c: change param "unit" to "Unit" for -DKR_headers. */ +/* 26 Feb. 1997: ftell_.c: on systems that define SEEK_SET, etc., use + SEEK_SET, SEEK_CUR, SEEK_END for *whence = 0, 1, 2. */ +/* 7 Apr. 1997: fmt.c: adjust to complain at missing numbers in formats + (but still treat missing ".nnn" as ".0"). */ +/* 11 Apr. 1997: err.c: attempt to make stderr line buffered rather + than fully buffered. (Buffering is needed for format + items T and TR.) */ +/* 27 May 1997: ftell_.c: fix typo (that caused the third argument to be + treated as 2 on some systems). */ +/* 5 Aug. 1997: lread.c: adjust to accord with a change to the Fortran 8X + draft (in 1990 or 1991) that rescinded permission to elide + quote marks in namelist input of character data; compile + with -DF8X_NML_ELIDE_QUOTES to get the old behavior. + wrtfmt.o: wrt_G: tweak to print the right number of 0's + for zero under G format. */ +/* 16 Aug. 1997: iio.c: fix bug in internal writes to an array of character + strings that sometimes caused one more array element than + required by the format to be blank-filled. Example: + format(1x). */ +/* 16 Sept. 1997:fmt.[ch] rdfmt.c wrtfmt.c: tweak struct syl for machines + with 64-bit pointers and 32-bit ints that did not 64-bit + align struct syl (e.g., Linux on the DEC Alpha). */ +/* 19 Jan. 1998: backspace.c: for b->ufmt==0, change sizeof(int) to + sizeof(uiolen). On machines where this would make a + difference, it is best for portability to compile libI77 with + -DUIOLEN_int (which will render the change invisible). */ +/* 4 March 1998: open.c: fix glitch in comparing file names under + -DNON_UNIX_STDIO */ +/* 17 March 1998: endfile.c, open.c: acquire temporary files from tmpfile(), + unless compiled with -DNON_ANSI_STDIO, which uses mktemp(). + New buffering scheme independent of NON_UNIX_STDIO for + handling T format items. Now -DNON_UNIX_STDIO is no + longer be necessary for Linux, and libf2c no longer + causes stderr to be buffered -- the former setbuf or + setvbuf call for stderr was to make T format items work. + open.c: use the Posix access() function to check existence + or nonexistence of files, except under -DNON_POSIX_STDIO, + where trial fopen calls are used. */ +/* 5 April 1998: wsfe.c: make $ format item work: this was lost in the + changes of 17 March 1998. */ +/* 28 May 1998: backspace.c dfe.c due.c iio.c lread.c rsfe.c sue.c wsfe.c: + set f__curunit sooner so various error messages will + correctly identify the I/O unit involved. */ +/* 17 June 1998: lread.c: unless compiled with + ALLOW_FLOAT_IN_INTEGER_LIST_INPUT #defined, treat + floating-point numbers (containing either a decimal point + or an exponent field) as errors when they appear as list + input for integer data. */ +/* 7 Sept. 1998: move e_wdfe from sfe.c to dfe.c, where it was originally. + Why did it ever move to sfe.c? */ +/* 2 May 1999: open.c: set f__external (to get "external" versus "internal" + right in the error message if we cannot open the file). + err.c: cast a pointer difference to (int) for %d. + rdfmt.c: omit fixed-length buffer that could be overwritten + by formats Inn or Lnn with nn > 83. */ +/* 3 May 1999: open.c: insert two casts for machines with 64-bit longs. */ +/* 18 June 1999: backspace.c: allow for b->ufd changing in t_runc */ +/* 27 June 1999: rsne.c: fix bug in namelist input: a misplaced increment */ +/* could cause wrong array elements to be assigned; e.g., */ +/* "&input k(5)=10*1 &end" assigned k(5) and k(15..23) */ +/* 15 Nov. 1999: endfile.c: set state to writing (b->uwrt = 1) when an */ +/* endfile statement requires copying the file. */ +/* (Otherwise an immediately following rewind statement */ +/* could make the file appear empty.) Also, supply a */ +/* missing (long) cast in the sprintf call. */ +/* sfe.c: add #ifdef ALWAYS_FLUSH logic, for formatted I/O: */ +/* Compiling libf2c with -DALWAYS_FLUSH should prevent losing */ +/* any data in buffers should the program fault. It also */ +/* makes the program run more slowly. */ +/* 20 April 2000: rsne.c, xwsne.c: tweaks that only matter if ftnint and */ +/* ftnlen are of different fundamental types (different numbers */ +/* of bits). Since these files will not compile when this */ +/* change matters, the above VERSION string remains unchanged. */ +/* 4 July 2000: adjustments to permit compilation by C++ compilers; */ +/* VERSION string remains unchanged. */ +/* 5 Dec. 2000: lread.c: under namelist input, when reading a logical array, */ +/* treat Tstuff= and Fstuff= as new assignments rather than as */ +/* logical constants. */ +/* 22 Feb. 2001: endfile.c: adjust to use truncate() unless compiled with */ +/* -DNO_TRUNCATE (or with -DMSDOS). */ +/* 1 March 2001: endfile.c: switch to ftruncate (absent -DNO_TRUNCATE), */ +/* thus permitting truncation of scratch files on true Unix */ +/* systems, where scratch files have no name. Add an fflush() */ +/* (surprisingly) needed on some Linux systems. */ +/* 11 Oct. 2001: backspac.c dfe.c due.c endfile.c err.c fio.h fmt.c fmt.h */ +/* inquire.c open.c rdfmt.c sue.c util.c: change fseek and */ +/* ftell to FSEEK and FTELL (#defined to be fseek and ftell, */ +/* respectively, in fio.h unless otherwise #defined), and use */ +/* type OFF_T (#defined to be long unless otherwise #defined) */ +/* to permit handling files over 2GB long where possible, */ +/* with suitable -D options, provided for some systems in new */ +/* header file sysdep1.h (copied from sysdep1.h0 by default). */ +/* 15 Nov. 2001: endfile.c: add FSEEK after FTRUNCATE. */ +/* 28 Nov. 2001: fmt.h lwrite.c wref.c and (new) signbit.c: on IEEE systems, */ +/* print -0 as -0 when compiled with -DSIGNED_ZEROS. See */ +/* comments in makefile or (better) libf2c/makefile.* . */ +/* 6 Sept. 2002: rsne.c: fix bug with multiple repeat counts in reading */ +/* namelists, e.g., &nl a(2) = 3*1.0, 2*2.0, 3*3.0 / */ +/* 21 March 2003: err.c: before writing to a file after reading from it, */ +/* f_seek(file, 0, SEEK_CUR) to make writing legal in ANSI C. */ diff --git a/min-dgels/base/F2CLIBS/libf2c/i_abs.c b/min-dgels/base/F2CLIBS/libf2c/i_abs.c new file mode 100644 index 0000000..2b92c4a --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/i_abs.c @@ -0,0 +1,18 @@ +#include "f2c.h" +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef KR_headers +integer i_abs(x) integer *x; +#else +integer i_abs(integer *x) +#endif +{ +if(*x >= 0) + return(*x); +return(- *x); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/i_ceiling.c b/min-dgels/base/F2CLIBS/libf2c/i_ceiling.c new file mode 100644 index 0000000..f708a8b --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/i_ceiling.c @@ -0,0 +1,36 @@ +#include "f2c.h" + +#ifdef KR_headers +integer i_sceiling(x) real *x; +#else +#ifdef __cplusplus +extern "C" { +#endif +integer i_sceiling(real *x) +#endif +{ +#define CEIL(x) ((int)(x) + ((x) > 0 && (x) != (int)(x))) + + return (integer) CEIL(*x); +} +#ifdef __cplusplus +} +#endif + + +#ifdef KR_headers +integer i_dceiling(x) doublereal *x; +#else +#ifdef __cplusplus +extern "C" { +#endif +integer i_dceiling(doublereal *x) +#endif +{ +#define CEIL(x) ((int)(x) + ((x) > 0 && (x) != (int)(x))) + + return (integer) CEIL(*x); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/i_dim.c b/min-dgels/base/F2CLIBS/libf2c/i_dim.c new file mode 100644 index 0000000..60ed4d8 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/i_dim.c @@ -0,0 +1,16 @@ +#include "f2c.h" +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef KR_headers +integer i_dim(a,b) integer *a, *b; +#else +integer i_dim(integer *a, integer *b) +#endif +{ +return( *a > *b ? *a - *b : 0); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/i_dnnt.c b/min-dgels/base/F2CLIBS/libf2c/i_dnnt.c new file mode 100644 index 0000000..3abc2dc --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/i_dnnt.c @@ -0,0 +1,19 @@ +#include "f2c.h" + +#ifdef KR_headers +double floor(); +integer i_dnnt(x) doublereal *x; +#else +#undef abs +#include "math.h" +#ifdef __cplusplus +extern "C" { +#endif +integer i_dnnt(doublereal *x) +#endif +{ +return (integer)(*x >= 0. ? floor(*x + .5) : -floor(.5 - *x)); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/i_indx.c b/min-dgels/base/F2CLIBS/libf2c/i_indx.c new file mode 100644 index 0000000..1925639 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/i_indx.c @@ -0,0 +1,32 @@ +#include "f2c.h" +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef KR_headers +integer i_indx(a, b, la, lb) char *a, *b; ftnlen la, lb; +#else +integer i_indx(char *a, char *b, ftnlen la, ftnlen lb) +#endif +{ +ftnlen i, n; +char *s, *t, *bend; + +n = la - lb + 1; +bend = b + lb; + +for(i = 0 ; i < n ; ++i) + { + s = a + i; + t = b; + while(t < bend) + if(*s++ != *t++) + goto no; + return(i+1); + no: ; + } +return(0); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/i_len.c b/min-dgels/base/F2CLIBS/libf2c/i_len.c new file mode 100644 index 0000000..0f7b188 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/i_len.c @@ -0,0 +1,16 @@ +#include "f2c.h" +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef KR_headers +integer i_len(s, n) char *s; ftnlen n; +#else +integer i_len(char *s, ftnlen n) +#endif +{ +return(n); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/i_len_trim.c b/min-dgels/base/F2CLIBS/libf2c/i_len_trim.c new file mode 100644 index 0000000..c7b7680 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/i_len_trim.c @@ -0,0 +1,22 @@ +#include "f2c.h" +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef KR_headers +integer i_len_trim(s, n) char *s; ftnlen n; +#else +integer i_len_trim(char *s, ftnlen n) +#endif +{ + int i; + + for(i=n-1;i>=0;i--) + if(s[i] != ' ') + return i + 1; + + return(0); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/i_mod.c b/min-dgels/base/F2CLIBS/libf2c/i_mod.c new file mode 100644 index 0000000..4a9b560 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/i_mod.c @@ -0,0 +1,16 @@ +#include "f2c.h" +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef KR_headers +integer i_mod(a,b) integer *a, *b; +#else +integer i_mod(integer *a, integer *b) +#endif +{ +return( *a % *b); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/i_nint.c b/min-dgels/base/F2CLIBS/libf2c/i_nint.c new file mode 100644 index 0000000..fe9fd68 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/i_nint.c @@ -0,0 +1,19 @@ +#include "f2c.h" + +#ifdef KR_headers +double floor(); +integer i_nint(x) real *x; +#else +#undef abs +#include "math.h" +#ifdef __cplusplus +extern "C" { +#endif +integer i_nint(real *x) +#endif +{ +return (integer)(*x >= 0 ? floor(*x + .5) : -floor(.5 - *x)); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/i_sign.c b/min-dgels/base/F2CLIBS/libf2c/i_sign.c new file mode 100644 index 0000000..4c20e94 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/i_sign.c @@ -0,0 +1,18 @@ +#include "f2c.h" +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef KR_headers +integer i_sign(a,b) integer *a, *b; +#else +integer i_sign(integer *a, integer *b) +#endif +{ +integer x; +x = (*a >= 0 ? *a : - *a); +return( *b >= 0 ? x : -x); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/iargc_.c b/min-dgels/base/F2CLIBS/libf2c/iargc_.c new file mode 100644 index 0000000..2f29da0 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/iargc_.c @@ -0,0 +1,17 @@ +#include "f2c.h" +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef KR_headers +ftnint iargc_() +#else +ftnint iargc_(void) +#endif +{ +extern int xargc; +return ( xargc - 1 ); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/iio.c b/min-dgels/base/F2CLIBS/libf2c/iio.c new file mode 100644 index 0000000..8553efc --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/iio.c @@ -0,0 +1,159 @@ +#include "f2c.h" +#include "fio.h" +#include "fmt.h" +#ifdef __cplusplus +extern "C" { +#endif +extern char *f__icptr; +char *f__icend; +extern icilist *f__svic; +int f__icnum; + + int +z_getc(Void) +{ + if(f__recpos++ < f__svic->icirlen) { + if(f__icptr >= f__icend) err(f__svic->iciend,(EOF),"endfile"); + return(*(unsigned char *)f__icptr++); + } + return '\n'; +} + + void +#ifdef KR_headers +z_putc(c) +#else +z_putc(int c) +#endif +{ + if (f__icptr < f__icend && f__recpos++ < f__svic->icirlen) + *f__icptr++ = c; +} + + int +z_rnew(Void) +{ + f__icptr = f__svic->iciunit + (++f__icnum)*f__svic->icirlen; + f__recpos = 0; + f__cursor = 0; + f__hiwater = 0; + return 1; +} + + static int +z_endp(Void) +{ + (*f__donewrec)(); + return 0; + } + + int +#ifdef KR_headers +c_si(a) icilist *a; +#else +c_si(icilist *a) +#endif +{ + f__elist = (cilist *)a; + f__fmtbuf=a->icifmt; + f__curunit = 0; + f__sequential=f__formatted=1; + f__external=0; + if(pars_f(f__fmtbuf)<0) + err(a->icierr,100,"startint"); + fmt_bg(); + f__cblank=f__cplus=f__scale=0; + f__svic=a; + f__icnum=f__recpos=0; + f__cursor = 0; + f__hiwater = 0; + f__icptr = a->iciunit; + f__icend = f__icptr + a->icirlen*a->icirnum; + f__cf = 0; + return(0); +} + + int +iw_rev(Void) +{ + if(f__workdone) + z_endp(); + f__hiwater = f__recpos = f__cursor = 0; + return(f__workdone=0); + } + +#ifdef KR_headers +integer s_rsfi(a) icilist *a; +#else +integer s_rsfi(icilist *a) +#endif +{ int n; + if(n=c_si(a)) return(n); + f__reading=1; + f__doed=rd_ed; + f__doned=rd_ned; + f__getn=z_getc; + f__dorevert = z_endp; + f__donewrec = z_rnew; + f__doend = z_endp; + return(0); +} + + int +z_wnew(Void) +{ + if (f__recpos < f__hiwater) { + f__icptr += f__hiwater - f__recpos; + f__recpos = f__hiwater; + } + while(f__recpos++ < f__svic->icirlen) + *f__icptr++ = ' '; + f__recpos = 0; + f__cursor = 0; + f__hiwater = 0; + f__icnum++; + return 1; +} +#ifdef KR_headers +integer s_wsfi(a) icilist *a; +#else +integer s_wsfi(icilist *a) +#endif +{ int n; + if(n=c_si(a)) return(n); + f__reading=0; + f__doed=w_ed; + f__doned=w_ned; + f__putn=z_putc; + f__dorevert = iw_rev; + f__donewrec = z_wnew; + f__doend = z_endp; + return(0); +} +integer e_rsfi(Void) +{ int n = en_fio(); + f__fmtbuf = NULL; + return(n); +} +integer e_wsfi(Void) +{ + int n; + n = en_fio(); + f__fmtbuf = NULL; + if(f__svic->icirnum != 1 + && (f__icnum > f__svic->icirnum + || (f__icnum == f__svic->icirnum && (f__recpos | f__hiwater)))) + err(f__svic->icierr,110,"inwrite"); + if (f__recpos < f__hiwater) + f__recpos = f__hiwater; + if (f__recpos >= f__svic->icirlen) + err(f__svic->icierr,110,"recend"); + if (!f__recpos && f__icnum) + return n; + while(f__recpos++ < f__svic->icirlen) + *f__icptr++ = ' '; + return n; +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/ilnw.c b/min-dgels/base/F2CLIBS/libf2c/ilnw.c new file mode 100644 index 0000000..e8b3d49 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/ilnw.c @@ -0,0 +1,83 @@ +#include "f2c.h" +#include "fio.h" +#include "lio.h" +#ifdef __cplusplus +extern "C" { +#endif +extern char *f__icptr; +extern char *f__icend; +extern icilist *f__svic; +extern int f__icnum; +#ifdef KR_headers +extern void z_putc(); +#else +extern void z_putc(int); +#endif + + static int +z_wSL(Void) +{ + while(f__recpos < f__svic->icirlen) + z_putc(' '); + return z_rnew(); + } + + static void +#ifdef KR_headers +c_liw(a) icilist *a; +#else +c_liw(icilist *a) +#endif +{ + f__reading = 0; + f__external = 0; + f__formatted = 1; + f__putn = z_putc; + L_len = a->icirlen; + f__donewrec = z_wSL; + f__svic = a; + f__icnum = f__recpos = 0; + f__cursor = 0; + f__cf = 0; + f__curunit = 0; + f__icptr = a->iciunit; + f__icend = f__icptr + a->icirlen*a->icirnum; + f__elist = (cilist *)a; + } + + integer +#ifdef KR_headers +s_wsni(a) icilist *a; +#else +s_wsni(icilist *a) +#endif +{ + cilist ca; + + c_liw(a); + ca.cifmt = a->icifmt; + x_wsne(&ca); + z_wSL(); + return 0; + } + + integer +#ifdef KR_headers +s_wsli(a) icilist *a; +#else +s_wsli(icilist *a) +#endif +{ + f__lioproc = l_write; + c_liw(a); + return(0); + } + +integer e_wsli(Void) +{ + z_wSL(); + return(0); + } +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/inquire.c b/min-dgels/base/F2CLIBS/libf2c/inquire.c new file mode 100644 index 0000000..5936a67 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/inquire.c @@ -0,0 +1,117 @@ +#include "f2c.h" +#include "fio.h" +#include "string.h" +#ifdef NON_UNIX_STDIO +#ifndef MSDOS +#include "unistd.h" /* for access() */ +#endif +#endif +#ifdef KR_headers +integer f_inqu(a) inlist *a; +#else +#ifdef __cplusplus +extern "C" integer f_inqu(inlist*); +#endif +#ifdef MSDOS +#undef abs +#undef min +#undef max +#include "io.h" +#endif +integer f_inqu(inlist *a) +#endif +{ flag byfile; + int i; +#ifndef NON_UNIX_STDIO + int n; +#endif + unit *p; + char buf[256]; + long x; + if(a->infile!=NULL) + { byfile=1; + g_char(a->infile,a->infilen,buf); +#ifdef NON_UNIX_STDIO + x = access(buf,0) ? -1 : 0; + for(i=0,p=NULL;iinunitinunit>=0) + { + p= &f__units[a->inunit]; + } + else + { + p=NULL; + } + } + if(a->inex!=NULL) + if(byfile && x != -1 || !byfile && p!=NULL) + *a->inex=1; + else *a->inex=0; + if(a->inopen!=NULL) + if(byfile) *a->inopen=(p!=NULL); + else *a->inopen=(p!=NULL && p->ufd!=NULL); + if(a->innum!=NULL) *a->innum= p-f__units; + if(a->innamed!=NULL) + if(byfile || p!=NULL && p->ufnm!=NULL) + *a->innamed=1; + else *a->innamed=0; + if(a->inname!=NULL) + if(byfile) + b_char(buf,a->inname,a->innamlen); + else if(p!=NULL && p->ufnm!=NULL) + b_char(p->ufnm,a->inname,a->innamlen); + if(a->inacc!=NULL && p!=NULL && p->ufd!=NULL) + if(p->url) + b_char("DIRECT",a->inacc,a->inacclen); + else b_char("SEQUENTIAL",a->inacc,a->inacclen); + if(a->inseq!=NULL) + if(p!=NULL && p->url) + b_char("NO",a->inseq,a->inseqlen); + else b_char("YES",a->inseq,a->inseqlen); + if(a->indir!=NULL) + if(p==NULL || p->url) + b_char("YES",a->indir,a->indirlen); + else b_char("NO",a->indir,a->indirlen); + if(a->infmt!=NULL) + if(p!=NULL && p->ufmt==0) + b_char("UNFORMATTED",a->infmt,a->infmtlen); + else b_char("FORMATTED",a->infmt,a->infmtlen); + if(a->inform!=NULL) + if(p!=NULL && p->ufmt==0) + b_char("NO",a->inform,a->informlen); + else b_char("YES",a->inform,a->informlen); + if(a->inunf) + if(p!=NULL && p->ufmt==0) + b_char("YES",a->inunf,a->inunflen); + else if (p!=NULL) b_char("NO",a->inunf,a->inunflen); + else b_char("UNKNOWN",a->inunf,a->inunflen); + if(a->inrecl!=NULL && p!=NULL) + *a->inrecl=p->url; + if(a->innrec!=NULL && p!=NULL && p->url>0) + *a->innrec=(ftnint)(FTELL(p->ufd)/p->url+1); + if(a->inblank && p!=NULL && p->ufmt) + if(p->ublnk) + b_char("ZERO",a->inblank,a->inblanklen); + else b_char("NULL",a->inblank,a->inblanklen); + return(0); +} diff --git a/min-dgels/base/F2CLIBS/libf2c/l_ge.c b/min-dgels/base/F2CLIBS/libf2c/l_ge.c new file mode 100644 index 0000000..a84f0ee --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/l_ge.c @@ -0,0 +1,18 @@ +#include "f2c.h" +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef KR_headers +extern integer s_cmp(); +logical l_ge(a,b,la,lb) char *a, *b; ftnlen la, lb; +#else +extern integer s_cmp(char *, char *, ftnlen, ftnlen); +logical l_ge(char *a, char *b, ftnlen la, ftnlen lb) +#endif +{ +return(s_cmp(a,b,la,lb) >= 0); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/l_gt.c b/min-dgels/base/F2CLIBS/libf2c/l_gt.c new file mode 100644 index 0000000..ae6950d --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/l_gt.c @@ -0,0 +1,18 @@ +#include "f2c.h" +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef KR_headers +extern integer s_cmp(); +logical l_gt(a,b,la,lb) char *a, *b; ftnlen la, lb; +#else +extern integer s_cmp(char *, char *, ftnlen, ftnlen); +logical l_gt(char *a, char *b, ftnlen la, ftnlen lb) +#endif +{ +return(s_cmp(a,b,la,lb) > 0); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/l_le.c b/min-dgels/base/F2CLIBS/libf2c/l_le.c new file mode 100644 index 0000000..625b49a --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/l_le.c @@ -0,0 +1,18 @@ +#include "f2c.h" +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef KR_headers +extern integer s_cmp(); +logical l_le(a,b,la,lb) char *a, *b; ftnlen la, lb; +#else +extern integer s_cmp(char *, char *, ftnlen, ftnlen); +logical l_le(char *a, char *b, ftnlen la, ftnlen lb) +#endif +{ +return(s_cmp(a,b,la,lb) <= 0); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/l_lt.c b/min-dgels/base/F2CLIBS/libf2c/l_lt.c new file mode 100644 index 0000000..ab21b36 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/l_lt.c @@ -0,0 +1,18 @@ +#include "f2c.h" +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef KR_headers +extern integer s_cmp(); +logical l_lt(a,b,la,lb) char *a, *b; ftnlen la, lb; +#else +extern integer s_cmp(char *, char *, ftnlen, ftnlen); +logical l_lt(char *a, char *b, ftnlen la, ftnlen lb) +#endif +{ +return(s_cmp(a,b,la,lb) < 0); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/lbitbits.c b/min-dgels/base/F2CLIBS/libf2c/lbitbits.c new file mode 100644 index 0000000..5b6ccf7 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/lbitbits.c @@ -0,0 +1,68 @@ +#include "f2c.h" +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef LONGBITS +#define LONGBITS 32 +#endif + + integer +#ifdef KR_headers +lbit_bits(a, b, len) integer a, b, len; +#else +lbit_bits(integer a, integer b, integer len) +#endif +{ + /* Assume 2's complement arithmetic */ + + unsigned long x, y; + + x = (unsigned long) a; + y = (unsigned long)-1L; + x >>= b; + y <<= len; + return (integer)(x & ~y); + } + + integer +#ifdef KR_headers +lbit_cshift(a, b, len) integer a, b, len; +#else +lbit_cshift(integer a, integer b, integer len) +#endif +{ + unsigned long x, y, z; + + x = (unsigned long)a; + if (len <= 0) { + if (len == 0) + return 0; + goto full_len; + } + if (len >= LONGBITS) { + full_len: + if (b >= 0) { + b %= LONGBITS; + return (integer)(x << b | x >> LONGBITS -b ); + } + b = -b; + b %= LONGBITS; + return (integer)(x << LONGBITS - b | x >> b); + } + y = z = (unsigned long)-1; + y <<= len; + z &= ~y; + y &= x; + x &= z; + if (b >= 0) { + b %= len; + return (integer)(y | z & (x << b | x >> len - b)); + } + b = -b; + b %= len; + return (integer)(y | z & (x >> b | x << len - b)); + } +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/lbitshft.c b/min-dgels/base/F2CLIBS/libf2c/lbitshft.c new file mode 100644 index 0000000..fbee94f --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/lbitshft.c @@ -0,0 +1,17 @@ +#include "f2c.h" +#ifdef __cplusplus +extern "C" { +#endif + + integer +#ifdef KR_headers +lbit_shift(a, b) integer a; integer b; +#else +lbit_shift(integer a, integer b) +#endif +{ + return b >= 0 ? a << b : (integer)((uinteger)a >> -b); + } +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/libf2c.lbc b/min-dgels/base/F2CLIBS/libf2c/libf2c.lbc new file mode 100644 index 0000000..c51c0aa --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/libf2c.lbc @@ -0,0 +1,153 @@ +abort_.obj +backspac.obj +c_abs.obj +c_cos.obj +c_div.obj +c_exp.obj +c_log.obj +c_sin.obj +c_sqrt.obj +cabs.obj +close.obj +d_abs.obj +d_acos.obj +d_asin.obj +d_atan.obj +d_atn2.obj +d_cnjg.obj +d_cos.obj +d_cosh.obj +d_dim.obj +d_exp.obj +d_imag.obj +d_int.obj +d_lg10.obj +d_log.obj +d_mod.obj +d_nint.obj +d_prod.obj +d_sign.obj +d_sin.obj +d_sinh.obj +d_sqrt.obj +d_tan.obj +d_tanh.obj +derf_.obj +derfc_.obj +dfe.obj +dolio.obj +dtime_.obj +due.obj +ef1asc_.obj +ef1cmc_.obj +endfile.obj +erf_.obj +erfc_.obj +err.obj +etime_.obj +exit_.obj +f77_aloc.obj +f77vers.obj +fmt.obj +fmtlib.obj +ftell_.obj +getarg_.obj +getenv_.obj +h_abs.obj +h_dim.obj +h_dnnt.obj +h_indx.obj +h_len.obj +h_mod.obj +h_nint.obj +h_sign.obj +hl_ge.obj +hl_gt.obj +hl_le.obj +hl_lt.obj +i77vers.obj +i_abs.obj +i_dim.obj +i_dnnt.obj +i_indx.obj +i_len.obj +i_mod.obj +i_nint.obj +i_sign.obj +iargc_.obj +iio.obj +ilnw.obj +inquire.obj +l_ge.obj +l_gt.obj +l_le.obj +l_lt.obj +lbitbits.obj +lbitshft.obj +lread.obj +lwrite.obj +main.obj +open.obj +pow_ci.obj +pow_dd.obj +pow_di.obj +pow_hh.obj +pow_ii.obj +pow_ri.obj +pow_zi.obj +pow_zz.obj +r_abs.obj +r_acos.obj +r_asin.obj +r_atan.obj +r_atn2.obj +r_cnjg.obj +r_cos.obj +r_cosh.obj +r_dim.obj +r_exp.obj +r_imag.obj +r_int.obj +r_lg10.obj +r_log.obj +r_mod.obj +r_nint.obj +r_sign.obj +r_sin.obj +r_sinh.obj +r_sqrt.obj +r_tan.obj +r_tanh.obj +rdfmt.obj +rewind.obj +rsfe.obj +rsli.obj +rsne.obj +s_cat.obj +s_cmp.obj +s_copy.obj +s_paus.obj +s_rnge.obj +s_stop.obj +sfe.obj +sig_die.obj +signal_.obj +sue.obj +system_.obj +typesize.obj +uio.obj +uninit.obj +util.obj +wref.obj +wrtfmt.obj +wsfe.obj +wsle.obj +wsne.obj +xwsne.obj +z_abs.obj +z_cos.obj +z_div.obj +z_exp.obj +z_log.obj +z_sin.obj +z_sqrt.obj diff --git a/min-dgels/base/F2CLIBS/libf2c/libf2c.sy b/min-dgels/base/F2CLIBS/libf2c/libf2c.sy new file mode 100644 index 0000000..bcba643 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/libf2c.sy @@ -0,0 +1,153 @@ ++abort_.obj & ++backspac.obj & ++c_abs.obj & ++c_cos.obj & ++c_div.obj & ++c_exp.obj & ++c_log.obj & ++c_sin.obj & ++c_sqrt.obj & ++cabs.obj & ++close.obj & ++d_abs.obj & ++d_acos.obj & ++d_asin.obj & ++d_atan.obj & ++d_atn2.obj & ++d_cnjg.obj & ++d_cos.obj & ++d_cosh.obj & ++d_dim.obj & ++d_exp.obj & ++d_imag.obj & ++d_int.obj & ++d_lg10.obj & ++d_log.obj & ++d_mod.obj & ++d_nint.obj & ++d_prod.obj & ++d_sign.obj & ++d_sin.obj & ++d_sinh.obj & ++d_sqrt.obj & ++d_tan.obj & ++d_tanh.obj & ++derf_.obj & ++derfc_.obj & ++dfe.obj & ++dolio.obj & ++dtime_.obj & ++due.obj & ++ef1asc_.obj & ++ef1cmc_.obj & ++endfile.obj & ++erf_.obj & ++erfc_.obj & ++err.obj & ++etime_.obj & ++exit_.obj & ++f77_aloc.obj & ++f77vers.obj & ++fmt.obj & ++fmtlib.obj & ++ftell_.obj & ++getarg_.obj & ++getenv_.obj & ++h_abs.obj & ++h_dim.obj & ++h_dnnt.obj & ++h_indx.obj & ++h_len.obj & ++h_mod.obj & ++h_nint.obj & ++h_sign.obj & ++hl_ge.obj & ++hl_gt.obj & ++hl_le.obj & ++hl_lt.obj & ++i77vers.obj & ++i_abs.obj & ++i_dim.obj & ++i_dnnt.obj & ++i_indx.obj & ++i_len.obj & ++i_mod.obj & ++i_nint.obj & ++i_sign.obj & ++iargc_.obj & ++iio.obj & ++ilnw.obj & ++inquire.obj & ++l_ge.obj & ++l_gt.obj & ++l_le.obj & ++l_lt.obj & ++lbitbits.obj & ++lbitshft.obj & ++lread.obj & ++lwrite.obj & ++main.obj & ++open.obj & ++pow_ci.obj & ++pow_dd.obj & ++pow_di.obj & ++pow_hh.obj & ++pow_ii.obj & ++pow_ri.obj & ++pow_zi.obj & ++pow_zz.obj & ++r_abs.obj & ++r_acos.obj & ++r_asin.obj & ++r_atan.obj & ++r_atn2.obj & ++r_cnjg.obj & ++r_cos.obj & ++r_cosh.obj & ++r_dim.obj & ++r_exp.obj & ++r_imag.obj & ++r_int.obj & ++r_lg10.obj & ++r_log.obj & ++r_mod.obj & ++r_nint.obj & ++r_sign.obj & ++r_sin.obj & ++r_sinh.obj & ++r_sqrt.obj & ++r_tan.obj & ++r_tanh.obj & ++rdfmt.obj & ++rewind.obj & ++rsfe.obj & ++rsli.obj & ++rsne.obj & ++s_cat.obj & ++s_cmp.obj & ++s_copy.obj & ++s_paus.obj & ++s_rnge.obj & ++s_stop.obj & ++sfe.obj & ++sig_die.obj & ++signal_.obj & ++sue.obj & ++system_.obj & ++typesize.obj & ++uio.obj & ++uninit.obj & ++util.obj & ++wref.obj & ++wrtfmt.obj & ++wsfe.obj & ++wsle.obj & ++wsne.obj & ++xwsne.obj & ++z_abs.obj & ++z_cos.obj & ++z_div.obj & ++z_exp.obj & ++z_log.obj & ++z_sin.obj & ++z_sqrt.obj diff --git a/min-dgels/base/F2CLIBS/libf2c/lio.h b/min-dgels/base/F2CLIBS/libf2c/lio.h new file mode 100644 index 0000000..f9fd1cd --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/lio.h @@ -0,0 +1,74 @@ +/* copy of ftypes from the compiler */ +/* variable types + * numeric assumptions: + * int < reals < complexes + * TYDREAL-TYREAL = TYDCOMPLEX-TYCOMPLEX + */ + +/* 0-10 retain their old (pre LOGICAL*1, etc.) */ +/* values to allow mixing old and new objects. */ + +#define TYUNKNOWN 0 +#define TYADDR 1 +#define TYSHORT 2 +#define TYLONG 3 +#define TYREAL 4 +#define TYDREAL 5 +#define TYCOMPLEX 6 +#define TYDCOMPLEX 7 +#define TYLOGICAL 8 +#define TYCHAR 9 +#define TYSUBR 10 +#define TYINT1 11 +#define TYLOGICAL1 12 +#define TYLOGICAL2 13 +#ifdef Allow_TYQUAD +#undef TYQUAD +#define TYQUAD 14 +#endif + +#define LINTW 24 +#define LINE 80 +#define LLOGW 2 +#ifdef Old_list_output +#define LLOW 1.0 +#define LHIGH 1.e9 +#define LEFMT " %# .8E" +#define LFFMT " %# .9g" +#else +#define LGFMT "%.9G" +#endif +/* LEFBL 20 should suffice; 24 overcomes a NeXT bug. */ +#define LEFBL 24 + +typedef union +{ + char flchar; + short flshort; + ftnint flint; +#ifdef Allow_TYQUAD + longint fllongint; +#endif + real flreal; + doublereal fldouble; +} flex; +#ifdef KR_headers +extern int (*f__lioproc)(), (*l_getc)(), (*l_ungetc)(); +extern int l_read(), l_write(); +#else +#ifdef __cplusplus +extern "C" { +#endif +extern int (*f__lioproc)(ftnint*, char*, ftnlen, ftnint); +extern int l_write(ftnint*, char*, ftnlen, ftnint); +extern void x_wsne(cilist*); +extern int c_le(cilist*), (*l_getc)(void), (*l_ungetc)(int,FILE*); +extern int l_read(ftnint*,char*,ftnlen,ftnint); +extern integer e_rsle(void), e_wsle(void), s_wsne(cilist*); +extern int z_rnew(void); +#endif +extern ftnint L_len; +extern int f__scale; +#ifdef __cplusplus + } +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/lread.c b/min-dgels/base/F2CLIBS/libf2c/lread.c new file mode 100644 index 0000000..699cda1 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/lread.c @@ -0,0 +1,806 @@ +#include "f2c.h" +#include "fio.h" + +/* Compile with -DF8X_NML_ELIDE_QUOTES to permit eliding quotation */ +/* marks in namelist input a la the Fortran 8X Draft published in */ +/* the May 1989 issue of Fortran Forum. */ + + +#ifdef Allow_TYQUAD +static longint f__llx; +#endif + +#ifdef KR_headers +extern double atof(); +extern char *malloc(), *realloc(); +int (*f__lioproc)(), (*l_getc)(), (*l_ungetc)(); +#else +#undef abs +#undef min +#undef max +#include "stdlib.h" +#endif + +#include "fmt.h" +#include "lio.h" +#include "ctype.h" +#include "fp.h" +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef KR_headers +extern char *f__fmtbuf; +#else +extern const char *f__fmtbuf; +int (*f__lioproc)(ftnint*, char*, ftnlen, ftnint), (*l_getc)(void), + (*l_ungetc)(int,FILE*); +#endif + +int l_eof; + +#define isblnk(x) (f__ltab[x+1]&B) +#define issep(x) (f__ltab[x+1]&SX) +#define isapos(x) (f__ltab[x+1]&AX) +#define isexp(x) (f__ltab[x+1]&EX) +#define issign(x) (f__ltab[x+1]&SG) +#define iswhit(x) (f__ltab[x+1]&WH) +#define SX 1 +#define B 2 +#define AX 4 +#define EX 8 +#define SG 16 +#define WH 32 +char f__ltab[128+1] = { /* offset one for EOF */ + 0, + 0,0,AX,0,0,0,0,0,0,WH|B,SX|WH,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + SX|B|WH,0,AX,0,0,0,0,AX,0,0,0,SG,SX,SG,0,SX, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,EX,EX,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + AX,0,0,0,EX,EX,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +}; + +#ifdef ungetc + static int +#ifdef KR_headers +un_getc(x,f__cf) int x; FILE *f__cf; +#else +un_getc(int x, FILE *f__cf) +#endif +{ return ungetc(x,f__cf); } +#else +#define un_getc ungetc +#ifdef KR_headers + extern int ungetc(); +#else +extern int ungetc(int, FILE*); /* for systems with a buggy stdio.h */ +#endif +#endif + + int +t_getc(Void) +{ int ch; + if(f__curunit->uend) return(EOF); + if((ch=getc(f__cf))!=EOF) return(ch); + if(feof(f__cf)) + f__curunit->uend = l_eof = 1; + return(EOF); +} +integer e_rsle(Void) +{ + int ch; + if(f__curunit->uend) return(0); + while((ch=t_getc())!='\n') + if (ch == EOF) { + if(feof(f__cf)) + f__curunit->uend = l_eof = 1; + return EOF; + } + return(0); +} + +flag f__lquit; +int f__lcount,f__ltype,nml_read; +char *f__lchar; +double f__lx,f__ly; +#define ERR(x) if(n=(x)) return(n) +#define GETC(x) (x=(*l_getc)()) +#define Ungetc(x,y) (*l_ungetc)(x,y) + + static int +#ifdef KR_headers +l_R(poststar, reqint) int poststar, reqint; +#else +l_R(int poststar, int reqint) +#endif +{ + char s[FMAX+EXPMAXDIGS+4]; + register int ch; + register char *sp, *spe, *sp1; + long e, exp; + int havenum, havestar, se; + + if (!poststar) { + if (f__lcount > 0) + return(0); + f__lcount = 1; + } +#ifdef Allow_TYQUAD + f__llx = 0; +#endif + f__ltype = 0; + exp = 0; + havestar = 0; +retry: + sp1 = sp = s; + spe = sp + FMAX; + havenum = 0; + + switch(GETC(ch)) { + case '-': *sp++ = ch; sp1++; spe++; + case '+': + GETC(ch); + } + while(ch == '0') { + ++havenum; + GETC(ch); + } + while(isdigit(ch)) { + if (sp < spe) *sp++ = ch; + else ++exp; + GETC(ch); + } + if (ch == '*' && !poststar) { + if (sp == sp1 || exp || *s == '-') { + errfl(f__elist->cierr,112,"bad repetition count"); + } + poststar = havestar = 1; + *sp = 0; + f__lcount = atoi(s); + goto retry; + } + if (ch == '.') { +#ifndef ALLOW_FLOAT_IN_INTEGER_LIST_INPUT + if (reqint) + errfl(f__elist->cierr,115,"invalid integer"); +#endif + GETC(ch); + if (sp == sp1) + while(ch == '0') { + ++havenum; + --exp; + GETC(ch); + } + while(isdigit(ch)) { + if (sp < spe) + { *sp++ = ch; --exp; } + GETC(ch); + } + } + havenum += sp - sp1; + se = 0; + if (issign(ch)) + goto signonly; + if (havenum && isexp(ch)) { +#ifndef ALLOW_FLOAT_IN_INTEGER_LIST_INPUT + if (reqint) + errfl(f__elist->cierr,115,"invalid integer"); +#endif + GETC(ch); + if (issign(ch)) { +signonly: + if (ch == '-') se = 1; + GETC(ch); + } + if (!isdigit(ch)) { +bad: + errfl(f__elist->cierr,112,"exponent field"); + } + + e = ch - '0'; + while(isdigit(GETC(ch))) { + e = 10*e + ch - '0'; + if (e > EXPMAX) + goto bad; + } + if (se) + exp -= e; + else + exp += e; + } + (void) Ungetc(ch, f__cf); + if (sp > sp1) { + ++havenum; + while(*--sp == '0') + ++exp; + if (exp) + sprintf(sp+1, "e%ld", exp); + else + sp[1] = 0; + f__lx = atof(s); +#ifdef Allow_TYQUAD + if (reqint&2 && (se = sp - sp1 + exp) > 14 && se < 20) { + /* Assuming 64-bit longint and 32-bit long. */ + if (exp < 0) + sp += exp; + if (sp1 <= sp) { + f__llx = *sp1 - '0'; + while(++sp1 <= sp) + f__llx = 10*f__llx + (*sp1 - '0'); + } + while(--exp >= 0) + f__llx *= 10; + if (*s == '-') + f__llx = -f__llx; + } +#endif + } + else + f__lx = 0.; + if (havenum) + f__ltype = TYLONG; + else + switch(ch) { + case ',': + case '/': + break; + default: + if (havestar && ( ch == ' ' + ||ch == '\t' + ||ch == '\n')) + break; + if (nml_read > 1) { + f__lquit = 2; + return 0; + } + errfl(f__elist->cierr,112,"invalid number"); + } + return 0; + } + + static int +#ifdef KR_headers +rd_count(ch) register int ch; +#else +rd_count(register int ch) +#endif +{ + if (ch < '0' || ch > '9') + return 1; + f__lcount = ch - '0'; + while(GETC(ch) >= '0' && ch <= '9') + f__lcount = 10*f__lcount + ch - '0'; + Ungetc(ch,f__cf); + return f__lcount <= 0; + } + + static int +l_C(Void) +{ int ch, nml_save; + double lz; + if(f__lcount>0) return(0); + f__ltype=0; + GETC(ch); + if(ch!='(') + { + if (nml_read > 1 && (ch < '0' || ch > '9')) { + Ungetc(ch,f__cf); + f__lquit = 2; + return 0; + } + if (rd_count(ch)) + if(!f__cf || !feof(f__cf)) + errfl(f__elist->cierr,112,"complex format"); + else + err(f__elist->cierr,(EOF),"lread"); + if(GETC(ch)!='*') + { + if(!f__cf || !feof(f__cf)) + errfl(f__elist->cierr,112,"no star"); + else + err(f__elist->cierr,(EOF),"lread"); + } + if(GETC(ch)!='(') + { Ungetc(ch,f__cf); + return(0); + } + } + else + f__lcount = 1; + while(iswhit(GETC(ch))); + Ungetc(ch,f__cf); + nml_save = nml_read; + nml_read = 0; + if (ch = l_R(1,0)) + return ch; + if (!f__ltype) + errfl(f__elist->cierr,112,"no real part"); + lz = f__lx; + while(iswhit(GETC(ch))); + if(ch!=',') + { (void) Ungetc(ch,f__cf); + errfl(f__elist->cierr,112,"no comma"); + } + while(iswhit(GETC(ch))); + (void) Ungetc(ch,f__cf); + if (ch = l_R(1,0)) + return ch; + if (!f__ltype) + errfl(f__elist->cierr,112,"no imaginary part"); + while(iswhit(GETC(ch))); + if(ch!=')') errfl(f__elist->cierr,112,"no )"); + f__ly = f__lx; + f__lx = lz; +#ifdef Allow_TYQUAD + f__llx = 0; +#endif + nml_read = nml_save; + return(0); +} + + static char nmLbuf[256], *nmL_next; + static int (*nmL_getc_save)(Void); +#ifdef KR_headers + static int (*nmL_ungetc_save)(/* int, FILE* */); +#else + static int (*nmL_ungetc_save)(int, FILE*); +#endif + + static int +nmL_getc(Void) +{ + int rv; + if (rv = *nmL_next++) + return rv; + l_getc = nmL_getc_save; + l_ungetc = nmL_ungetc_save; + return (*l_getc)(); + } + + static int +#ifdef KR_headers +nmL_ungetc(x, f) int x; FILE *f; +#else +nmL_ungetc(int x, FILE *f) +#endif +{ + f = f; /* banish non-use warning */ + return *--nmL_next = x; + } + + static int +#ifdef KR_headers +Lfinish(ch, dot, rvp) int ch, dot, *rvp; +#else +Lfinish(int ch, int dot, int *rvp) +#endif +{ + char *s, *se; + static char what[] = "namelist input"; + + s = nmLbuf + 2; + se = nmLbuf + sizeof(nmLbuf) - 1; + *s++ = ch; + while(!issep(GETC(ch)) && ch!=EOF) { + if (s >= se) { + nmLbuf_ovfl: + return *rvp = err__fl(f__elist->cierr,131,what); + } + *s++ = ch; + if (ch != '=') + continue; + if (dot) + return *rvp = err__fl(f__elist->cierr,112,what); + got_eq: + *s = 0; + nmL_getc_save = l_getc; + l_getc = nmL_getc; + nmL_ungetc_save = l_ungetc; + l_ungetc = nmL_ungetc; + nmLbuf[1] = *(nmL_next = nmLbuf) = ','; + *rvp = f__lcount = 0; + return 1; + } + if (dot) + goto done; + for(;;) { + if (s >= se) + goto nmLbuf_ovfl; + *s++ = ch; + if (!isblnk(ch)) + break; + if (GETC(ch) == EOF) + goto done; + } + if (ch == '=') + goto got_eq; + done: + Ungetc(ch, f__cf); + return 0; + } + + static int +l_L(Void) +{ + int ch, rv, sawdot; + + if(f__lcount>0) + return(0); + f__lcount = 1; + f__ltype=0; + GETC(ch); + if(isdigit(ch)) + { + rd_count(ch); + if(GETC(ch)!='*') + if(!f__cf || !feof(f__cf)) + errfl(f__elist->cierr,112,"no star"); + else + err(f__elist->cierr,(EOF),"lread"); + GETC(ch); + } + sawdot = 0; + if(ch == '.') { + sawdot = 1; + GETC(ch); + } + switch(ch) + { + case 't': + case 'T': + if (nml_read && Lfinish(ch, sawdot, &rv)) + return rv; + f__lx=1; + break; + case 'f': + case 'F': + if (nml_read && Lfinish(ch, sawdot, &rv)) + return rv; + f__lx=0; + break; + default: + if(isblnk(ch) || issep(ch) || ch==EOF) + { (void) Ungetc(ch,f__cf); + return(0); + } + if (nml_read > 1) { + Ungetc(ch,f__cf); + f__lquit = 2; + return 0; + } + errfl(f__elist->cierr,112,"logical"); + } + f__ltype=TYLONG; + while(!issep(GETC(ch)) && ch!=EOF); + Ungetc(ch, f__cf); + return(0); +} + +#define BUFSIZE 128 + + static int +l_CHAR(Void) +{ int ch,size,i; + static char rafail[] = "realloc failure"; + char quote,*p; + if(f__lcount>0) return(0); + f__ltype=0; + if(f__lchar!=NULL) free(f__lchar); + size=BUFSIZE; + p=f__lchar = (char *)malloc((unsigned int)size); + if(f__lchar == NULL) + errfl(f__elist->cierr,113,"no space"); + + GETC(ch); + if(isdigit(ch)) { + /* allow Fortran 8x-style unquoted string... */ + /* either find a repetition count or the string */ + f__lcount = ch - '0'; + *p++ = ch; + for(i = 1;;) { + switch(GETC(ch)) { + case '*': + if (f__lcount == 0) { + f__lcount = 1; +#ifndef F8X_NML_ELIDE_QUOTES + if (nml_read) + goto no_quote; +#endif + goto noquote; + } + p = f__lchar; + goto have_lcount; + case ',': + case ' ': + case '\t': + case '\n': + case '/': + Ungetc(ch,f__cf); + /* no break */ + case EOF: + f__lcount = 1; + f__ltype = TYCHAR; + return *p = 0; + } + if (!isdigit(ch)) { + f__lcount = 1; +#ifndef F8X_NML_ELIDE_QUOTES + if (nml_read) { + no_quote: + errfl(f__elist->cierr,112, + "undelimited character string"); + } +#endif + goto noquote; + } + *p++ = ch; + f__lcount = 10*f__lcount + ch - '0'; + if (++i == size) { + f__lchar = (char *)realloc(f__lchar, + (unsigned int)(size += BUFSIZE)); + if(f__lchar == NULL) + errfl(f__elist->cierr,113,rafail); + p = f__lchar + i; + } + } + } + else (void) Ungetc(ch,f__cf); + have_lcount: + if(GETC(ch)=='\'' || ch=='"') quote=ch; + else if(isblnk(ch) || (issep(ch) && ch != '\n') || ch==EOF) { + Ungetc(ch,f__cf); + return 0; + } +#ifndef F8X_NML_ELIDE_QUOTES + else if (nml_read > 1) { + Ungetc(ch,f__cf); + f__lquit = 2; + return 0; + } +#endif + else { + /* Fortran 8x-style unquoted string */ + *p++ = ch; + for(i = 1;;) { + switch(GETC(ch)) { + case ',': + case ' ': + case '\t': + case '\n': + case '/': + Ungetc(ch,f__cf); + /* no break */ + case EOF: + f__ltype = TYCHAR; + return *p = 0; + } + noquote: + *p++ = ch; + if (++i == size) { + f__lchar = (char *)realloc(f__lchar, + (unsigned int)(size += BUFSIZE)); + if(f__lchar == NULL) + errfl(f__elist->cierr,113,rafail); + p = f__lchar + i; + } + } + } + f__ltype=TYCHAR; + for(i=0;;) + { while(GETC(ch)!=quote && ch!='\n' + && ch!=EOF && ++icierr,113,rafail); + p=f__lchar+i-1; + *p++ = ch; + } + else if(ch==EOF) return(EOF); + else if(ch=='\n') + { if(*(p-1) != '\\') continue; + i--; + p--; + if(++iciunit]; + if(a->ciunit>=MXUNIT || a->ciunit<0) + err(a->cierr,101,"stler"); + f__scale=f__recpos=0; + f__elist=a; + if(f__curunit->ufd==NULL && fk_open(SEQ,FMT,a->ciunit)) + err(a->cierr,102,"lio"); + f__cf=f__curunit->ufd; + if(!f__curunit->ufmt) err(a->cierr,103,"lio") + return(0); +} + + int +#ifdef KR_headers +l_read(number,ptr,len,type) ftnint *number,type; char *ptr; ftnlen len; +#else +l_read(ftnint *number, char *ptr, ftnlen len, ftnint type) +#endif +{ +#define Ptr ((flex *)ptr) + int i,n,ch; + doublereal *yy; + real *xx; + for(i=0;i<*number;i++) + { + if(f__lquit) return(0); + if(l_eof) + err(f__elist->ciend, EOF, "list in") + if(f__lcount == 0) { + f__ltype = 0; + for(;;) { + GETC(ch); + switch(ch) { + case EOF: + err(f__elist->ciend,(EOF),"list in") + case ' ': + case '\t': + case '\n': + continue; + case '/': + f__lquit = 1; + goto loopend; + case ',': + f__lcount = 1; + goto loopend; + default: + (void) Ungetc(ch, f__cf); + goto rddata; + } + } + } + rddata: + switch((int)type) + { + case TYINT1: + case TYSHORT: + case TYLONG: +#ifndef ALLOW_FLOAT_IN_INTEGER_LIST_INPUT + ERR(l_R(0,1)); + break; +#endif + case TYREAL: + case TYDREAL: + ERR(l_R(0,0)); + break; +#ifdef TYQUAD + case TYQUAD: + n = l_R(0,2); + if (n) + return n; + break; +#endif + case TYCOMPLEX: + case TYDCOMPLEX: + ERR(l_C()); + break; + case TYLOGICAL1: + case TYLOGICAL2: + case TYLOGICAL: + ERR(l_L()); + break; + case TYCHAR: + ERR(l_CHAR()); + break; + } + while (GETC(ch) == ' ' || ch == '\t'); + if (ch != ',' || f__lcount > 1) + Ungetc(ch,f__cf); + loopend: + if(f__lquit) return(0); + if(f__cf && ferror(f__cf)) { + clearerr(f__cf); + errfl(f__elist->cierr,errno,"list in"); + } + if(f__ltype==0) goto bump; + switch((int)type) + { + case TYINT1: + case TYLOGICAL1: + Ptr->flchar = (char)f__lx; + break; + case TYLOGICAL2: + case TYSHORT: + Ptr->flshort = (short)f__lx; + break; + case TYLOGICAL: + case TYLONG: + Ptr->flint = (ftnint)f__lx; + break; +#ifdef Allow_TYQUAD + case TYQUAD: + if (!(Ptr->fllongint = f__llx)) + Ptr->fllongint = f__lx; + break; +#endif + case TYREAL: + Ptr->flreal=f__lx; + break; + case TYDREAL: + Ptr->fldouble=f__lx; + break; + case TYCOMPLEX: + xx=(real *)ptr; + *xx++ = f__lx; + *xx = f__ly; + break; + case TYDCOMPLEX: + yy=(doublereal *)ptr; + *yy++ = f__lx; + *yy = f__ly; + break; + case TYCHAR: + b_char(f__lchar,ptr,len); + break; + } + bump: + if(f__lcount>0) f__lcount--; + ptr += len; + if (nml_read) + nml_read++; + } + return(0); +#undef Ptr +} +#ifdef KR_headers +integer s_rsle(a) cilist *a; +#else +integer s_rsle(cilist *a) +#endif +{ + int n; + + f__reading=1; + f__external=1; + f__formatted=1; + if(n=c_le(a)) return(n); + f__lioproc = l_read; + f__lquit = 0; + f__lcount = 0; + l_eof = 0; + if(f__curunit->uwrt && f__nowreading(f__curunit)) + err(a->cierr,errno,"read start"); + if(f__curunit->uend) + err(f__elist->ciend,(EOF),"read start"); + l_getc = t_getc; + l_ungetc = un_getc; + f__doend = xrd_SL; + return(0); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/lwrite.c b/min-dgels/base/F2CLIBS/libf2c/lwrite.c new file mode 100644 index 0000000..9e0d93d --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/lwrite.c @@ -0,0 +1,314 @@ +#include "f2c.h" +#include "fio.h" +#include "fmt.h" +#include "lio.h" +#ifdef __cplusplus +extern "C" { +#endif + +ftnint L_len; +int f__Aquote; + + static VOID +donewrec(Void) +{ + if (f__recpos) + (*f__donewrec)(); + } + + static VOID +#ifdef KR_headers +lwrt_I(n) longint n; +#else +lwrt_I(longint n) +#endif +{ + char *p; + int ndigit, sign; + + p = f__icvt(n, &ndigit, &sign, 10); + if(f__recpos + ndigit >= L_len) + donewrec(); + PUT(' '); + if (sign) + PUT('-'); + while(*p) + PUT(*p++); +} + static VOID +#ifdef KR_headers +lwrt_L(n, len) ftnint n; ftnlen len; +#else +lwrt_L(ftnint n, ftnlen len) +#endif +{ + if(f__recpos+LLOGW>=L_len) + donewrec(); + wrt_L((Uint *)&n,LLOGW, len); +} + static VOID +#ifdef KR_headers +lwrt_A(p,len) char *p; ftnlen len; +#else +lwrt_A(char *p, ftnlen len) +#endif +{ + int a; + char *p1, *pe; + + a = 0; + pe = p + len; + if (f__Aquote) { + a = 3; + if (len > 1 && p[len-1] == ' ') { + while(--len > 1 && p[len-1] == ' '); + pe = p + len; + } + p1 = p; + while(p1 < pe) + if (*p1++ == '\'') + a++; + } + if(f__recpos+len+a >= L_len) + donewrec(); + if (a +#ifndef OMIT_BLANK_CC + || !f__recpos +#endif + ) + PUT(' '); + if (a) { + PUT('\''); + while(p < pe) { + if (*p == '\'') + PUT('\''); + PUT(*p++); + } + PUT('\''); + } + else + while(p < pe) + PUT(*p++); +} + + static int +#ifdef KR_headers +l_g(buf, n) char *buf; double n; +#else +l_g(char *buf, double n) +#endif +{ +#ifdef Old_list_output + doublereal absn; + char *fmt; + + absn = n; + if (absn < 0) + absn = -absn; + fmt = LLOW <= absn && absn < LHIGH ? LFFMT : LEFMT; +#ifdef USE_STRLEN + sprintf(buf, fmt, n); + return strlen(buf); +#else + return sprintf(buf, fmt, n); +#endif + +#else + register char *b, c, c1; + + b = buf; + *b++ = ' '; + if (n < 0) { + *b++ = '-'; + n = -n; + } + else + *b++ = ' '; + if (n == 0) { +#ifdef SIGNED_ZEROS + if (signbit_f2c(&n)) + *b++ = '-'; +#endif + *b++ = '0'; + *b++ = '.'; + *b = 0; + goto f__ret; + } + sprintf(b, LGFMT, n); + switch(*b) { +#ifndef WANT_LEAD_0 + case '0': + while(b[0] = b[1]) + b++; + break; +#endif + case 'i': + case 'I': + /* Infinity */ + case 'n': + case 'N': + /* NaN */ + while(*++b); + break; + + default: + /* Fortran 77 insists on having a decimal point... */ + for(;; b++) + switch(*b) { + case 0: + *b++ = '.'; + *b = 0; + goto f__ret; + case '.': + while(*++b); + goto f__ret; + case 'E': + for(c1 = '.', c = 'E'; *b = c1; + c1 = c, c = *++b); + goto f__ret; + } + } + f__ret: + return b - buf; +#endif + } + + static VOID +#ifdef KR_headers +l_put(s) register char *s; +#else +l_put(register char *s) +#endif +{ +#ifdef KR_headers + register void (*pn)() = f__putn; +#else + register void (*pn)(int) = f__putn; +#endif + register int c; + + while(c = *s++) + (*pn)(c); + } + + static VOID +#ifdef KR_headers +lwrt_F(n) double n; +#else +lwrt_F(double n) +#endif +{ + char buf[LEFBL]; + + if(f__recpos + l_g(buf,n) >= L_len) + donewrec(); + l_put(buf); +} + static VOID +#ifdef KR_headers +lwrt_C(a,b) double a,b; +#else +lwrt_C(double a, double b) +#endif +{ + char *ba, *bb, bufa[LEFBL], bufb[LEFBL]; + int al, bl; + + al = l_g(bufa, a); + for(ba = bufa; *ba == ' '; ba++) + --al; + bl = l_g(bufb, b) + 1; /* intentionally high by 1 */ + for(bb = bufb; *bb == ' '; bb++) + --bl; + if(f__recpos + al + bl + 3 >= L_len) + donewrec(); +#ifdef OMIT_BLANK_CC + else +#endif + PUT(' '); + PUT('('); + l_put(ba); + PUT(','); + if (f__recpos + bl >= L_len) { + (*f__donewrec)(); +#ifndef OMIT_BLANK_CC + PUT(' '); +#endif + } + l_put(bb); + PUT(')'); +} + + int +#ifdef KR_headers +l_write(number,ptr,len,type) ftnint *number,type; char *ptr; ftnlen len; +#else +l_write(ftnint *number, char *ptr, ftnlen len, ftnint type) +#endif +{ +#define Ptr ((flex *)ptr) + int i; + longint x; + double y,z; + real *xx; + doublereal *yy; + for(i=0;i< *number; i++) + { + switch((int)type) + { + default: f__fatal(117,"unknown type in lio"); + case TYINT1: + x = Ptr->flchar; + goto xint; + case TYSHORT: + x=Ptr->flshort; + goto xint; +#ifdef Allow_TYQUAD + case TYQUAD: + x = Ptr->fllongint; + goto xint; +#endif + case TYLONG: + x=Ptr->flint; + xint: lwrt_I(x); + break; + case TYREAL: + y=Ptr->flreal; + goto xfloat; + case TYDREAL: + y=Ptr->fldouble; + xfloat: lwrt_F(y); + break; + case TYCOMPLEX: + xx= &Ptr->flreal; + y = *xx++; + z = *xx; + goto xcomplex; + case TYDCOMPLEX: + yy = &Ptr->fldouble; + y= *yy++; + z = *yy; + xcomplex: + lwrt_C(y,z); + break; + case TYLOGICAL1: + x = Ptr->flchar; + goto xlog; + case TYLOGICAL2: + x = Ptr->flshort; + goto xlog; + case TYLOGICAL: + x = Ptr->flint; + xlog: lwrt_L(Ptr->flint, len); + break; + case TYCHAR: + lwrt_A(ptr,len); + break; + } + ptr += len; + } + return(0); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/main.c b/min-dgels/base/F2CLIBS/libf2c/main.c new file mode 100644 index 0000000..d95fdc9 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/main.c @@ -0,0 +1,148 @@ +/* STARTUP PROCEDURE FOR UNIX FORTRAN PROGRAMS */ + +#include "stdio.h" +#include "signal1.h" + +#ifndef SIGIOT +#ifdef SIGABRT +#define SIGIOT SIGABRT +#endif +#endif + +#ifndef KR_headers +#undef VOID +#include "stdlib.h" +#ifdef __cplusplus +extern "C" { +#endif +#endif + +#ifndef VOID +#define VOID void +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef NO__STDC +#define ONEXIT onexit +extern VOID f_exit(); +#else +#ifndef KR_headers +extern void f_exit(void); +#ifndef NO_ONEXIT +#define ONEXIT atexit +extern int atexit(void (*)(void)); +#endif +#else +#ifndef NO_ONEXIT +#define ONEXIT onexit +extern VOID f_exit(); +#endif +#endif +#endif + +#ifdef KR_headers +extern VOID f_init(), sig_die(); +extern int MAIN__(); +#define Int /* int */ +#else +extern void f_init(void), sig_die(const char*, int); +extern int MAIN__(void); +#define Int int +#endif + +static VOID sigfdie(Sigarg) +{ +Use_Sigarg; +sig_die("Floating Exception", 1); +} + + +static VOID sigidie(Sigarg) +{ +Use_Sigarg; +sig_die("IOT Trap", 1); +} + +#ifdef SIGQUIT +static VOID sigqdie(Sigarg) +{ +Use_Sigarg; +sig_die("Quit signal", 1); +} +#endif + + +static VOID sigindie(Sigarg) +{ +Use_Sigarg; +sig_die("Interrupt", 0); +} + +static VOID sigtdie(Sigarg) +{ +Use_Sigarg; +sig_die("Killed", 0); +} + +#ifdef SIGTRAP +static VOID sigtrdie(Sigarg) +{ +Use_Sigarg; +sig_die("Trace trap", 1); +} +#endif + + +int xargc; +char **xargv; + +#ifdef __cplusplus + } +#endif + + int +#ifdef KR_headers +main(argc, argv) int argc; char **argv; +#else +main(int argc, char **argv) +#endif +{ +xargc = argc; +xargv = argv; +signal1(SIGFPE, sigfdie); /* ignore underflow, enable overflow */ +#ifdef SIGIOT +signal1(SIGIOT, sigidie); +#endif +#ifdef SIGTRAP +signal1(SIGTRAP, sigtrdie); +#endif +#ifdef SIGQUIT +if(signal1(SIGQUIT,sigqdie) == SIG_IGN) + signal1(SIGQUIT, SIG_IGN); +#endif +if(signal1(SIGINT, sigindie) == SIG_IGN) + signal1(SIGINT, SIG_IGN); +signal1(SIGTERM,sigtdie); + +#ifdef pdp11 + ldfps(01200); /* detect overflow as an exception */ +#endif + +f_init(); +#ifndef NO_ONEXIT +ONEXIT(f_exit); +#endif +MAIN__(); +#ifdef NO_ONEXIT +f_exit(); +#endif +exit(0); /* exit(0) rather than return(0) to bypass Cray bug */ +return 0; /* For compilers that complain of missing return values; */ + /* others will complain that this is unreachable code. */ +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/math.hvc b/min-dgels/base/F2CLIBS/libf2c/math.hvc new file mode 100644 index 0000000..52cfcee --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/math.hvc @@ -0,0 +1,3 @@ +/* for VC 4.2 */ +#include +#undef complex diff --git a/min-dgels/base/F2CLIBS/libf2c/mkfile.plan9 b/min-dgels/base/F2CLIBS/libf2c/mkfile.plan9 new file mode 100644 index 0000000..645e33d --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/mkfile.plan9 @@ -0,0 +1,162 @@ +# Plan 9 mkfile for libf2c.a$O + +f2c.h + +# For use with "f2c" and "f2c -A": +f2c.h: f2c.h0 + cp f2c.h0 f2c.h + +# You may need to adjust signal1.h suitably for your system... +signal1.h: signal1.h0 + cp signal1.h0 signal1.h + +clean: + rm -f libf2c.a$O *.$O arith.h + +backspac.$O: fio.h +close.$O: fio.h +dfe.$O: fio.h +dfe.$O: fmt.h +due.$O: fio.h +endfile.$O: fio.h rawio.h +err.$O: fio.h rawio.h +fmt.$O: fio.h +fmt.$O: fmt.h +iio.$O: fio.h +iio.$O: fmt.h +ilnw.$O: fio.h +ilnw.$O: lio.h +inquire.$O: fio.h +lread.$O: fio.h +lread.$O: fmt.h +lread.$O: lio.h +lread.$O: fp.h +lwrite.$O: fio.h +lwrite.$O: fmt.h +lwrite.$O: lio.h +open.$O: fio.h rawio.h +rdfmt.$O: fio.h +rdfmt.$O: fmt.h +rdfmt.$O: fp.h +rewind.$O: fio.h +rsfe.$O: fio.h +rsfe.$O: fmt.h +rsli.$O: fio.h +rsli.$O: lio.h +rsne.$O: fio.h +rsne.$O: lio.h +sfe.$O: fio.h +sue.$O: fio.h +uio.$O: fio.h +uninit.$O: arith.h +util.$O: fio.h +wref.$O: fio.h +wref.$O: fmt.h +wref.$O: fp.h +wrtfmt.$O: fio.h +wrtfmt.$O: fmt.h +wsfe.$O: fio.h +wsfe.$O: fmt.h +wsle.$O: fio.h +wsle.$O: fmt.h +wsle.$O: lio.h +wsne.$O: fio.h +wsne.$O: lio.h +xwsne.$O: fio.h +xwsne.$O: lio.h +xwsne.$O: fmt.h + +arith.h: arithchk.c + pcc -DNO_FPINIT -o arithchk arithchk.c + arithchk >$target + rm arithchk + +xsum.out:V: check + +check: + xsum Notice README abort_.c arithchk.c backspac.c c_abs.c c_cos.c \ + c_div.c c_exp.c c_log.c c_sin.c c_sqrt.c cabs.c close.c comptry.bat \ + d_abs.c d_acos.c d_asin.c d_atan.c d_atn2.c d_cnjg.c d_cos.c d_cosh.c \ + d_dim.c d_exp.c d_imag.c d_int.c d_lg10.c d_log.c d_mod.c \ + d_nint.c d_prod.c d_sign.c d_sin.c d_sinh.c d_sqrt.c d_tan.c \ + d_tanh.c derf_.c derfc_.c dfe.c dolio.c dtime_.c due.c ef1asc_.c \ + ef1cmc_.c endfile.c erf_.c erfc_.c err.c etime_.c exit_.c f2c.h0 \ + f2ch.add f77_aloc.c f77vers.c fio.h fmt.c fmt.h fmtlib.c \ + fp.h ftell_.c \ + getarg_.c getenv_.c h_abs.c h_dim.c h_dnnt.c h_indx.c h_len.c \ + h_mod.c h_nint.c h_sign.c hl_ge.c hl_gt.c hl_le.c hl_lt.c \ + i77vers.c i_abs.c i_dim.c i_dnnt.c i_indx.c i_len.c i_mod.c \ + i_nint.c i_sign.c iargc_.c iio.c ilnw.c inquire.c l_ge.c l_gt.c \ + l_le.c l_lt.c lbitbits.c lbitshft.c libf2c.lbc libf2c.sy lio.h \ + lread.c lwrite.c main.c makefile.sy makefile.u makefile.vc \ + makefile.wat math.hvc mkfile.plan9 open.c pow_ci.c pow_dd.c \ + pow_di.c pow_hh.c pow_ii.c pow_qq.c pow_ri.c pow_zi.c pow_zz.c \ + qbitbits.c qbitshft.c r_abs.c r_acos.c r_asin.c r_atan.c r_atn2.c \ + r_cnjg.c r_cos.c r_cosh.c r_dim.c r_exp.c r_imag.c r_int.c r_lg10.c \ + r_log.c r_mod.c r_nint.c r_sign.c r_sin.c r_sinh.c r_sqrt.c \ + r_tan.c r_tanh.c rawio.h rdfmt.c rewind.c rsfe.c rsli.c rsne.c \ + s_cat.c s_cmp.c s_copy.c s_paus.c s_rnge.c s_stop.c sfe.c \ + sig_die.c signal1.h0 signal_.c sue.c system_.c typesize.c uio.c \ + uninit.c util.c wref.c wrtfmt.c wsfe.c wsle.c wsne.c xwsne.c \ + z_abs.c z_cos.c z_div.c z_exp.c z_log.c z_sin.c z_sqrt.c >xsum1.out + cmp xsum0.out xsum1.out && mv xsum1.out xsum.out || diff xsum[01].out diff --git a/min-dgels/base/F2CLIBS/libf2c/open.c b/min-dgels/base/F2CLIBS/libf2c/open.c new file mode 100644 index 0000000..a06428d --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/open.c @@ -0,0 +1,301 @@ +#include "f2c.h" +#include "fio.h" +#include "string.h" +#ifndef NON_POSIX_STDIO +#ifdef MSDOS +#include "io.h" +#else +#include "unistd.h" /* for access */ +#endif +#endif + +#ifdef KR_headers +extern char *malloc(); +#ifdef NON_ANSI_STDIO +extern char *mktemp(); +#endif +extern integer f_clos(); +#define Const /*nothing*/ +#else +#define Const const +#undef abs +#undef min +#undef max +#include "stdlib.h" +#ifdef __cplusplus +extern "C" { +#endif +extern int f__canseek(FILE*); +extern integer f_clos(cllist*); +#endif + +#ifdef NON_ANSI_RW_MODES +Const char *f__r_mode[2] = {"r", "r"}; +Const char *f__w_mode[4] = {"w", "w", "r+w", "r+w"}; +#else +Const char *f__r_mode[2] = {"rb", "r"}; +Const char *f__w_mode[4] = {"wb", "w", "r+b", "r+"}; +#endif + + static char f__buf0[400], *f__buf = f__buf0; + int f__buflen = (int)sizeof(f__buf0); + + static void +#ifdef KR_headers +f__bufadj(n, c) int n, c; +#else +f__bufadj(int n, int c) +#endif +{ + unsigned int len; + char *nbuf, *s, *t, *te; + + if (f__buf == f__buf0) + f__buflen = 1024; + while(f__buflen <= n) + f__buflen <<= 1; + len = (unsigned int)f__buflen; + if (len != f__buflen || !(nbuf = (char*)malloc(len))) + f__fatal(113, "malloc failure"); + s = nbuf; + t = f__buf; + te = t + c; + while(t < te) + *s++ = *t++; + if (f__buf != f__buf0) + free(f__buf); + f__buf = nbuf; + } + + int +#ifdef KR_headers +f__putbuf(c) int c; +#else +f__putbuf(int c) +#endif +{ + char *s, *se; + int n; + + if (f__hiwater > f__recpos) + f__recpos = f__hiwater; + n = f__recpos + 1; + if (n >= f__buflen) + f__bufadj(n, f__recpos); + s = f__buf; + se = s + f__recpos; + if (c) + *se++ = c; + *se = 0; + for(;;) { + fputs(s, f__cf); + s += strlen(s); + if (s >= se) + break; /* normally happens the first time */ + putc(*s++, f__cf); + } + return 0; + } + + void +#ifdef KR_headers +x_putc(c) +#else +x_putc(int c) +#endif +{ + if (f__recpos >= f__buflen) + f__bufadj(f__recpos, f__buflen); + f__buf[f__recpos++] = c; + } + +#define opnerr(f,m,s) {if(f) errno= m; else opn_err(m,s,a); return(m);} + + static void +#ifdef KR_headers +opn_err(m, s, a) int m; char *s; olist *a; +#else +opn_err(int m, const char *s, olist *a) +#endif +{ + if (a->ofnm) { + /* supply file name to error message */ + if (a->ofnmlen >= f__buflen) + f__bufadj((int)a->ofnmlen, 0); + g_char(a->ofnm, a->ofnmlen, f__curunit->ufnm = f__buf); + } + f__fatal(m, s); + } + +#ifdef KR_headers +integer f_open(a) olist *a; +#else +integer f_open(olist *a) +#endif +{ unit *b; + integer rv; + char buf[256], *s; + cllist x; + int ufmt; + FILE *tf; +#ifndef NON_UNIX_STDIO + int n; +#endif + f__external = 1; + if(a->ounit>=MXUNIT || a->ounit<0) + err(a->oerr,101,"open") + if (!f__init) + f_init(); + f__curunit = b = &f__units[a->ounit]; + if(b->ufd) { + if(a->ofnm==0) + { + same: if (a->oblnk) + b->ublnk = *a->oblnk == 'z' || *a->oblnk == 'Z'; + return(0); + } +#ifdef NON_UNIX_STDIO + if (b->ufnm + && strlen(b->ufnm) == a->ofnmlen + && !strncmp(b->ufnm, a->ofnm, (unsigned)a->ofnmlen)) + goto same; +#else + g_char(a->ofnm,a->ofnmlen,buf); + if (f__inode(buf,&n) == b->uinode && n == b->udev) + goto same; +#endif + x.cunit=a->ounit; + x.csta=0; + x.cerr=a->oerr; + if ((rv = f_clos(&x)) != 0) + return rv; + } + b->url = (int)a->orl; + b->ublnk = a->oblnk && (*a->oblnk == 'z' || *a->oblnk == 'Z'); + if(a->ofm==0) + { if(b->url>0) b->ufmt=0; + else b->ufmt=1; + } + else if(*a->ofm=='f' || *a->ofm == 'F') b->ufmt=1; + else b->ufmt=0; + ufmt = b->ufmt; +#ifdef url_Adjust + if (b->url && !ufmt) + url_Adjust(b->url); +#endif + if (a->ofnm) { + g_char(a->ofnm,a->ofnmlen,buf); + if (!buf[0]) + opnerr(a->oerr,107,"open") + } + else + sprintf(buf, "fort.%ld", (long)a->ounit); + b->uscrtch = 0; + b->uend=0; + b->uwrt = 0; + b->ufd = 0; + b->urw = 3; + switch(a->osta ? *a->osta : 'u') + { + case 'o': + case 'O': +#ifdef NON_POSIX_STDIO + if (!(tf = FOPEN(buf,"r"))) + opnerr(a->oerr,errno,"open") + fclose(tf); +#else + if (access(buf,0)) + opnerr(a->oerr,errno,"open") +#endif + break; + case 's': + case 'S': + b->uscrtch=1; +#ifdef NON_ANSI_STDIO + (void) strcpy(buf,"tmp.FXXXXXX"); + (void) mktemp(buf); + goto replace; +#else + if (!(b->ufd = tmpfile())) + opnerr(a->oerr,errno,"open") + b->ufnm = 0; +#ifndef NON_UNIX_STDIO + b->uinode = b->udev = -1; +#endif + b->useek = 1; + return 0; +#endif + + case 'n': + case 'N': +#ifdef NON_POSIX_STDIO + if ((tf = FOPEN(buf,"r")) || (tf = FOPEN(buf,"a"))) { + fclose(tf); + opnerr(a->oerr,128,"open") + } +#else + if (!access(buf,0)) + opnerr(a->oerr,128,"open") +#endif + /* no break */ + case 'r': /* Fortran 90 replace option */ + case 'R': +#ifdef NON_ANSI_STDIO + replace: +#endif + if (tf = FOPEN(buf,f__w_mode[0])) + fclose(tf); + } + + b->ufnm=(char *) malloc((unsigned int)(strlen(buf)+1)); + if(b->ufnm==NULL) opnerr(a->oerr,113,"no space"); + (void) strcpy(b->ufnm,buf); + if ((s = a->oacc) && b->url) + ufmt = 0; + if(!(tf = FOPEN(buf, f__w_mode[ufmt|2]))) { + if (tf = FOPEN(buf, f__r_mode[ufmt])) + b->urw = 1; + else if (tf = FOPEN(buf, f__w_mode[ufmt])) { + b->uwrt = 1; + b->urw = 2; + } + else + err(a->oerr, errno, "open"); + } + b->useek = f__canseek(b->ufd = tf); +#ifndef NON_UNIX_STDIO + if((b->uinode = f__inode(buf,&b->udev)) == -1) + opnerr(a->oerr,108,"open") +#endif + if(b->useek) + if (a->orl) + rewind(b->ufd); + else if ((s = a->oacc) && (*s == 'a' || *s == 'A') + && FSEEK(b->ufd, 0L, SEEK_END)) + opnerr(a->oerr,129,"open"); + return(0); +} + + int +#ifdef KR_headers +fk_open(seq,fmt,n) ftnint n; +#else +fk_open(int seq, int fmt, ftnint n) +#endif +{ char nbuf[10]; + olist a; + (void) sprintf(nbuf,"fort.%ld",(long)n); + a.oerr=1; + a.ounit=n; + a.ofnm=nbuf; + a.ofnmlen=strlen(nbuf); + a.osta=NULL; + a.oacc= (char*)(seq==SEQ?"s":"d"); + a.ofm = (char*)(fmt==FMT?"f":"u"); + a.orl = seq==DIR?1:0; + a.oblnk=NULL; + return(f_open(&a)); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/pow_ci.c b/min-dgels/base/F2CLIBS/libf2c/pow_ci.c new file mode 100644 index 0000000..574e0b1 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/pow_ci.c @@ -0,0 +1,26 @@ +#include "f2c.h" +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef KR_headers +VOID pow_ci(p, a, b) /* p = a**b */ + complex *p, *a; integer *b; +#else +extern void pow_zi(doublecomplex*, doublecomplex*, integer*); +void pow_ci(complex *p, complex *a, integer *b) /* p = a**b */ +#endif +{ +doublecomplex p1, a1; + +a1.r = a->r; +a1.i = a->i; + +pow_zi(&p1, &a1, b); + +p->r = p1.r; +p->i = p1.i; +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/pow_dd.c b/min-dgels/base/F2CLIBS/libf2c/pow_dd.c new file mode 100644 index 0000000..08fc208 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/pow_dd.c @@ -0,0 +1,19 @@ +#include "f2c.h" + +#ifdef KR_headers +double pow(); +double pow_dd(ap, bp) doublereal *ap, *bp; +#else +#undef abs +#include "math.h" +#ifdef __cplusplus +extern "C" { +#endif +double pow_dd(doublereal *ap, doublereal *bp) +#endif +{ +return(pow(*ap, *bp) ); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/pow_di.c b/min-dgels/base/F2CLIBS/libf2c/pow_di.c new file mode 100644 index 0000000..abf36cb --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/pow_di.c @@ -0,0 +1,41 @@ +#include "f2c.h" +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef KR_headers +double pow_di(ap, bp) doublereal *ap; integer *bp; +#else +double pow_di(doublereal *ap, integer *bp) +#endif +{ +double pow, x; +integer n; +unsigned long u; + +pow = 1; +x = *ap; +n = *bp; + +if(n != 0) + { + if(n < 0) + { + n = -n; + x = 1/x; + } + for(u = n; ; ) + { + if(u & 01) + pow *= x; + if(u >>= 1) + x *= x; + else + break; + } + } +return(pow); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/pow_hh.c b/min-dgels/base/F2CLIBS/libf2c/pow_hh.c new file mode 100644 index 0000000..8821685 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/pow_hh.c @@ -0,0 +1,39 @@ +#include "f2c.h" +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef KR_headers +shortint pow_hh(ap, bp) shortint *ap, *bp; +#else +shortint pow_hh(shortint *ap, shortint *bp) +#endif +{ + shortint pow, x, n; + unsigned u; + + x = *ap; + n = *bp; + + if (n <= 0) { + if (n == 0 || x == 1) + return 1; + if (x != -1) + return x == 0 ? 1/x : 0; + n = -n; + } + u = n; + for(pow = 1; ; ) + { + if(u & 01) + pow *= x; + if(u >>= 1) + x *= x; + else + break; + } + return(pow); + } +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/pow_ii.c b/min-dgels/base/F2CLIBS/libf2c/pow_ii.c new file mode 100644 index 0000000..748d121 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/pow_ii.c @@ -0,0 +1,39 @@ +#include "f2c.h" +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef KR_headers +integer pow_ii(ap, bp) integer *ap, *bp; +#else +integer pow_ii(integer *ap, integer *bp) +#endif +{ + integer pow, x, n; + unsigned long u; + + x = *ap; + n = *bp; + + if (n <= 0) { + if (n == 0 || x == 1) + return 1; + if (x != -1) + return x == 0 ? 1/x : 0; + n = -n; + } + u = n; + for(pow = 1; ; ) + { + if(u & 01) + pow *= x; + if(u >>= 1) + x *= x; + else + break; + } + return(pow); + } +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/pow_qq.c b/min-dgels/base/F2CLIBS/libf2c/pow_qq.c new file mode 100644 index 0000000..09fe18e --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/pow_qq.c @@ -0,0 +1,39 @@ +#include "f2c.h" +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef KR_headers +longint pow_qq(ap, bp) longint *ap, *bp; +#else +longint pow_qq(longint *ap, longint *bp) +#endif +{ + longint pow, x, n; + unsigned long long u; /* system-dependent */ + + x = *ap; + n = *bp; + + if (n <= 0) { + if (n == 0 || x == 1) + return 1; + if (x != -1) + return x == 0 ? 1/x : 0; + n = -n; + } + u = n; + for(pow = 1; ; ) + { + if(u & 01) + pow *= x; + if(u >>= 1) + x *= x; + else + break; + } + return(pow); + } +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/pow_ri.c b/min-dgels/base/F2CLIBS/libf2c/pow_ri.c new file mode 100644 index 0000000..e29d416 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/pow_ri.c @@ -0,0 +1,41 @@ +#include "f2c.h" +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef KR_headers +double pow_ri(ap, bp) real *ap; integer *bp; +#else +double pow_ri(real *ap, integer *bp) +#endif +{ +double pow, x; +integer n; +unsigned long u; + +pow = 1; +x = *ap; +n = *bp; + +if(n != 0) + { + if(n < 0) + { + n = -n; + x = 1/x; + } + for(u = n; ; ) + { + if(u & 01) + pow *= x; + if(u >>= 1) + x *= x; + else + break; + } + } +return(pow); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/pow_zi.c b/min-dgels/base/F2CLIBS/libf2c/pow_zi.c new file mode 100644 index 0000000..1c0a4b0 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/pow_zi.c @@ -0,0 +1,60 @@ +#include "f2c.h" +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef KR_headers +VOID pow_zi(p, a, b) /* p = a**b */ + doublecomplex *p, *a; integer *b; +#else +extern void z_div(doublecomplex*, doublecomplex*, doublecomplex*); +void pow_zi(doublecomplex *p, doublecomplex *a, integer *b) /* p = a**b */ +#endif +{ + integer n; + unsigned long u; + double t; + doublecomplex q, x; + static doublecomplex one = {1.0, 0.0}; + + n = *b; + q.r = 1; + q.i = 0; + + if(n == 0) + goto done; + if(n < 0) + { + n = -n; + z_div(&x, &one, a); + } + else + { + x.r = a->r; + x.i = a->i; + } + + for(u = n; ; ) + { + if(u & 01) + { + t = q.r * x.r - q.i * x.i; + q.i = q.r * x.i + q.i * x.r; + q.r = t; + } + if(u >>= 1) + { + t = x.r * x.r - x.i * x.i; + x.i = 2 * x.r * x.i; + x.r = t; + } + else + break; + } + done: + p->i = q.i; + p->r = q.r; + } +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/pow_zz.c b/min-dgels/base/F2CLIBS/libf2c/pow_zz.c new file mode 100644 index 0000000..b5ffd33 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/pow_zz.c @@ -0,0 +1,29 @@ +#include "f2c.h" + +#ifdef KR_headers +double log(), exp(), cos(), sin(), atan2(), f__cabs(); +VOID pow_zz(r,a,b) doublecomplex *r, *a, *b; +#else +#undef abs +#include "math.h" +#ifdef __cplusplus +extern "C" { +#endif +extern double f__cabs(double,double); +void pow_zz(doublecomplex *r, doublecomplex *a, doublecomplex *b) +#endif +{ +double logr, logi, x, y; + +logr = log( f__cabs(a->r, a->i) ); +logi = atan2(a->i, a->r); + +x = exp( logr * b->r - logi * b->i ); +y = logr * b->i + logi * b->r; + +r->r = x * cos(y); +r->i = x * sin(y); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/qbitbits.c b/min-dgels/base/F2CLIBS/libf2c/qbitbits.c new file mode 100644 index 0000000..ba1b5bd --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/qbitbits.c @@ -0,0 +1,72 @@ +#include "f2c.h" +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef LONGBITS +#define LONGBITS 32 +#endif + +#ifndef LONG8BITS +#define LONG8BITS (2*LONGBITS) +#endif + + longint +#ifdef KR_headers +qbit_bits(a, b, len) longint a; integer b, len; +#else +qbit_bits(longint a, integer b, integer len) +#endif +{ + /* Assume 2's complement arithmetic */ + + ulongint x, y; + + x = (ulongint) a; + y = (ulongint)-1L; + x >>= b; + y <<= len; + return (longint)(x & ~y); + } + + longint +#ifdef KR_headers +qbit_cshift(a, b, len) longint a; integer b, len; +#else +qbit_cshift(longint a, integer b, integer len) +#endif +{ + ulongint x, y, z; + + x = (ulongint)a; + if (len <= 0) { + if (len == 0) + return 0; + goto full_len; + } + if (len >= LONG8BITS) { + full_len: + if (b >= 0) { + b %= LONG8BITS; + return (longint)(x << b | x >> LONG8BITS - b ); + } + b = -b; + b %= LONG8BITS; + return (longint)(x << LONG8BITS - b | x >> b); + } + y = z = (unsigned long)-1; + y <<= len; + z &= ~y; + y &= x; + x &= z; + if (b >= 0) { + b %= len; + return (longint)(y | z & (x << b | x >> len - b)); + } + b = -b; + b %= len; + return (longint)(y | z & (x >> b | x << len - b)); + } +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/qbitshft.c b/min-dgels/base/F2CLIBS/libf2c/qbitshft.c new file mode 100644 index 0000000..78e7b95 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/qbitshft.c @@ -0,0 +1,17 @@ +#include "f2c.h" +#ifdef __cplusplus +extern "C" { +#endif + + longint +#ifdef KR_headers +qbit_shift(a, b) longint a; integer b; +#else +qbit_shift(longint a, integer b) +#endif +{ + return b >= 0 ? a << b : (longint)((ulongint)a >> -b); + } +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/r_abs.c b/min-dgels/base/F2CLIBS/libf2c/r_abs.c new file mode 100644 index 0000000..f3291fb --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/r_abs.c @@ -0,0 +1,18 @@ +#include "f2c.h" +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef KR_headers +double r_abs(x) real *x; +#else +double r_abs(real *x) +#endif +{ +if(*x >= 0) + return(*x); +return(- *x); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/r_acos.c b/min-dgels/base/F2CLIBS/libf2c/r_acos.c new file mode 100644 index 0000000..103c7ff --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/r_acos.c @@ -0,0 +1,19 @@ +#include "f2c.h" + +#ifdef KR_headers +double acos(); +double r_acos(x) real *x; +#else +#undef abs +#include "math.h" +#ifdef __cplusplus +extern "C" { +#endif +double r_acos(real *x) +#endif +{ +return( acos(*x) ); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/r_asin.c b/min-dgels/base/F2CLIBS/libf2c/r_asin.c new file mode 100644 index 0000000..432b940 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/r_asin.c @@ -0,0 +1,19 @@ +#include "f2c.h" + +#ifdef KR_headers +double asin(); +double r_asin(x) real *x; +#else +#undef abs +#include "math.h" +#ifdef __cplusplus +extern "C" { +#endif +double r_asin(real *x) +#endif +{ +return( asin(*x) ); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/r_atan.c b/min-dgels/base/F2CLIBS/libf2c/r_atan.c new file mode 100644 index 0000000..7656982 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/r_atan.c @@ -0,0 +1,19 @@ +#include "f2c.h" + +#ifdef KR_headers +double atan(); +double r_atan(x) real *x; +#else +#undef abs +#include "math.h" +#ifdef __cplusplus +extern "C" { +#endif +double r_atan(real *x) +#endif +{ +return( atan(*x) ); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/r_atn2.c b/min-dgels/base/F2CLIBS/libf2c/r_atn2.c new file mode 100644 index 0000000..ab957b8 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/r_atn2.c @@ -0,0 +1,19 @@ +#include "f2c.h" + +#ifdef KR_headers +double atan2(); +double r_atn2(x,y) real *x, *y; +#else +#undef abs +#include "math.h" +#ifdef __cplusplus +extern "C" { +#endif +double r_atn2(real *x, real *y) +#endif +{ +return( atan2(*x,*y) ); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/r_cnjg.c b/min-dgels/base/F2CLIBS/libf2c/r_cnjg.c new file mode 100644 index 0000000..cef0e4b --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/r_cnjg.c @@ -0,0 +1,18 @@ +#include "f2c.h" +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef KR_headers +VOID r_cnjg(r, z) complex *r, *z; +#else +VOID r_cnjg(complex *r, complex *z) +#endif +{ + real zi = z->i; + r->r = z->r; + r->i = -zi; + } +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/r_cos.c b/min-dgels/base/F2CLIBS/libf2c/r_cos.c new file mode 100644 index 0000000..4418f0c --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/r_cos.c @@ -0,0 +1,19 @@ +#include "f2c.h" + +#ifdef KR_headers +double cos(); +double r_cos(x) real *x; +#else +#undef abs +#include "math.h" +#ifdef __cplusplus +extern "C" { +#endif +double r_cos(real *x) +#endif +{ +return( cos(*x) ); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/r_cosh.c b/min-dgels/base/F2CLIBS/libf2c/r_cosh.c new file mode 100644 index 0000000..f547835 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/r_cosh.c @@ -0,0 +1,19 @@ +#include "f2c.h" + +#ifdef KR_headers +double cosh(); +double r_cosh(x) real *x; +#else +#undef abs +#include "math.h" +#ifdef __cplusplus +extern "C" { +#endif +double r_cosh(real *x) +#endif +{ +return( cosh(*x) ); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/r_dim.c b/min-dgels/base/F2CLIBS/libf2c/r_dim.c new file mode 100644 index 0000000..d573ca3 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/r_dim.c @@ -0,0 +1,16 @@ +#include "f2c.h" +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef KR_headers +double r_dim(a,b) real *a, *b; +#else +double r_dim(real *a, real *b) +#endif +{ +return( *a > *b ? *a - *b : 0); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/r_exp.c b/min-dgels/base/F2CLIBS/libf2c/r_exp.c new file mode 100644 index 0000000..4e67979 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/r_exp.c @@ -0,0 +1,19 @@ +#include "f2c.h" + +#ifdef KR_headers +double exp(); +double r_exp(x) real *x; +#else +#undef abs +#include "math.h" +#ifdef __cplusplus +extern "C" { +#endif +double r_exp(real *x) +#endif +{ +return( exp(*x) ); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/r_imag.c b/min-dgels/base/F2CLIBS/libf2c/r_imag.c new file mode 100644 index 0000000..1b4de14 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/r_imag.c @@ -0,0 +1,16 @@ +#include "f2c.h" +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef KR_headers +double r_imag(z) complex *z; +#else +double r_imag(complex *z) +#endif +{ +return(z->i); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/r_int.c b/min-dgels/base/F2CLIBS/libf2c/r_int.c new file mode 100644 index 0000000..bff8717 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/r_int.c @@ -0,0 +1,19 @@ +#include "f2c.h" + +#ifdef KR_headers +double floor(); +double r_int(x) real *x; +#else +#undef abs +#include "math.h" +#ifdef __cplusplus +extern "C" { +#endif +double r_int(real *x) +#endif +{ +return( (*x>0) ? floor(*x) : -floor(- *x) ); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/r_lg10.c b/min-dgels/base/F2CLIBS/libf2c/r_lg10.c new file mode 100644 index 0000000..64ffddf --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/r_lg10.c @@ -0,0 +1,21 @@ +#include "f2c.h" + +#define log10e 0.43429448190325182765 + +#ifdef KR_headers +double log(); +double r_lg10(x) real *x; +#else +#undef abs +#include "math.h" +#ifdef __cplusplus +extern "C" { +#endif +double r_lg10(real *x) +#endif +{ +return( log10e * log(*x) ); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/r_log.c b/min-dgels/base/F2CLIBS/libf2c/r_log.c new file mode 100644 index 0000000..94c79b0 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/r_log.c @@ -0,0 +1,19 @@ +#include "f2c.h" + +#ifdef KR_headers +double log(); +double r_log(x) real *x; +#else +#undef abs +#include "math.h" +#ifdef __cplusplus +extern "C" { +#endif +double r_log(real *x) +#endif +{ +return( log(*x) ); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/r_mod.c b/min-dgels/base/F2CLIBS/libf2c/r_mod.c new file mode 100644 index 0000000..63ed175 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/r_mod.c @@ -0,0 +1,46 @@ +#include "f2c.h" + +#ifdef KR_headers +#ifdef IEEE_drem +double drem(); +#else +double floor(); +#endif +double r_mod(x,y) real *x, *y; +#else +#ifdef IEEE_drem +double drem(double, double); +#else +#undef abs +#include "math.h" +#ifdef __cplusplus +extern "C" { +#endif +#endif +double r_mod(real *x, real *y) +#endif +{ +#ifdef IEEE_drem + double xa, ya, z; + if ((ya = *y) < 0.) + ya = -ya; + z = drem(xa = *x, ya); + if (xa > 0) { + if (z < 0) + z += ya; + } + else if (z > 0) + z -= ya; + return z; +#else + double quotient; + if( (quotient = (double)*x / *y) >= 0) + quotient = floor(quotient); + else + quotient = -floor(-quotient); + return(*x - (*y) * quotient ); +#endif +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/r_nint.c b/min-dgels/base/F2CLIBS/libf2c/r_nint.c new file mode 100644 index 0000000..7cc3f1b --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/r_nint.c @@ -0,0 +1,20 @@ +#include "f2c.h" + +#ifdef KR_headers +double floor(); +double r_nint(x) real *x; +#else +#undef abs +#include "math.h" +#ifdef __cplusplus +extern "C" { +#endif +double r_nint(real *x) +#endif +{ +return( (*x)>=0 ? + floor(*x + .5) : -floor(.5 - *x) ); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/r_sign.c b/min-dgels/base/F2CLIBS/libf2c/r_sign.c new file mode 100644 index 0000000..797db1a --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/r_sign.c @@ -0,0 +1,18 @@ +#include "f2c.h" +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef KR_headers +double r_sign(a,b) real *a, *b; +#else +double r_sign(real *a, real *b) +#endif +{ +double x; +x = (*a >= 0 ? *a : - *a); +return( *b >= 0 ? x : -x); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/r_sin.c b/min-dgels/base/F2CLIBS/libf2c/r_sin.c new file mode 100644 index 0000000..37e0df2 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/r_sin.c @@ -0,0 +1,19 @@ +#include "f2c.h" + +#ifdef KR_headers +double sin(); +double r_sin(x) real *x; +#else +#undef abs +#include "math.h" +#ifdef __cplusplus +extern "C" { +#endif +double r_sin(real *x) +#endif +{ +return( sin(*x) ); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/r_sinh.c b/min-dgels/base/F2CLIBS/libf2c/r_sinh.c new file mode 100644 index 0000000..39878f0 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/r_sinh.c @@ -0,0 +1,19 @@ +#include "f2c.h" + +#ifdef KR_headers +double sinh(); +double r_sinh(x) real *x; +#else +#undef abs +#include "math.h" +#ifdef __cplusplus +extern "C" { +#endif +double r_sinh(real *x) +#endif +{ +return( sinh(*x) ); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/r_sqrt.c b/min-dgels/base/F2CLIBS/libf2c/r_sqrt.c new file mode 100644 index 0000000..e7b2c1c --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/r_sqrt.c @@ -0,0 +1,19 @@ +#include "f2c.h" + +#ifdef KR_headers +double sqrt(); +double r_sqrt(x) real *x; +#else +#undef abs +#include "math.h" +#ifdef __cplusplus +extern "C" { +#endif +double r_sqrt(real *x) +#endif +{ +return( sqrt(*x) ); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/r_tan.c b/min-dgels/base/F2CLIBS/libf2c/r_tan.c new file mode 100644 index 0000000..1774bed --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/r_tan.c @@ -0,0 +1,19 @@ +#include "f2c.h" + +#ifdef KR_headers +double tan(); +double r_tan(x) real *x; +#else +#undef abs +#include "math.h" +#ifdef __cplusplus +extern "C" { +#endif +double r_tan(real *x) +#endif +{ +return( tan(*x) ); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/r_tanh.c b/min-dgels/base/F2CLIBS/libf2c/r_tanh.c new file mode 100644 index 0000000..7739c6c --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/r_tanh.c @@ -0,0 +1,19 @@ +#include "f2c.h" + +#ifdef KR_headers +double tanh(); +double r_tanh(x) real *x; +#else +#undef abs +#include "math.h" +#ifdef __cplusplus +extern "C" { +#endif +double r_tanh(real *x) +#endif +{ +return( tanh(*x) ); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/rawio.h b/min-dgels/base/F2CLIBS/libf2c/rawio.h new file mode 100644 index 0000000..fd36a48 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/rawio.h @@ -0,0 +1,41 @@ +#ifndef KR_headers +#ifdef MSDOS +#include "io.h" +#ifndef WATCOM +#define close _close +#define creat _creat +#define open _open +#define read _read +#define write _write +#endif /*WATCOM*/ +#endif /*MSDOS*/ +#ifdef __cplusplus +extern "C" { +#endif +#ifndef MSDOS +#ifdef OPEN_DECL +extern int creat(const char*,int), open(const char*,int); +#endif +extern int close(int); +extern int read(int,void*,size_t), write(int,void*,size_t); +extern int unlink(const char*); +#ifndef _POSIX_SOURCE +#ifndef NON_UNIX_STDIO +extern FILE *fdopen(int, const char*); +#endif +#endif +#endif /*KR_HEADERS*/ + +extern char *mktemp(char*); + +#ifdef __cplusplus + } +#endif +#endif + +#include "fcntl.h" + +#ifndef O_WRONLY +#define O_RDONLY 0 +#define O_WRONLY 1 +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/rdfmt.c b/min-dgels/base/F2CLIBS/libf2c/rdfmt.c new file mode 100644 index 0000000..09f3ccf --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/rdfmt.c @@ -0,0 +1,553 @@ +#include "f2c.h" +#include "fio.h" + +#ifdef KR_headers +extern double atof(); +#define Const /*nothing*/ +#else +#define Const const +#undef abs +#undef min +#undef max +#include "stdlib.h" +#endif + +#include "fmt.h" +#include "fp.h" +#include "ctype.h" +#ifdef __cplusplus +extern "C" { +#endif + + static int +#ifdef KR_headers +rd_Z(n,w,len) Uint *n; ftnlen len; +#else +rd_Z(Uint *n, int w, ftnlen len) +#endif +{ + long x[9]; + char *s, *s0, *s1, *se, *t; + Const char *sc; + int ch, i, w1, w2; + static char hex[256]; + static int one = 1; + int bad = 0; + + if (!hex['0']) { + sc = "0123456789"; + while(ch = *sc++) + hex[ch] = ch - '0' + 1; + sc = "ABCDEF"; + while(ch = *sc++) + hex[ch] = hex[ch + 'a' - 'A'] = ch - 'A' + 11; + } + s = s0 = (char *)x; + s1 = (char *)&x[4]; + se = (char *)&x[8]; + if (len > 4*sizeof(long)) + return errno = 117; + while (w) { + GET(ch); + if (ch==',' || ch=='\n') + break; + w--; + if (ch > ' ') { + if (!hex[ch & 0xff]) + bad++; + *s++ = ch; + if (s == se) { + /* discard excess characters */ + for(t = s0, s = s1; t < s1;) + *t++ = *s++; + s = s1; + } + } + } + if (bad) + return errno = 115; + w = (int)len; + w1 = s - s0; + w2 = w1+1 >> 1; + t = (char *)n; + if (*(char *)&one) { + /* little endian */ + t += w - 1; + i = -1; + } + else + i = 1; + for(; w > w2; t += i, --w) + *t = 0; + if (!w) + return 0; + if (w < w2) + s0 = s - (w << 1); + else if (w1 & 1) { + *t = hex[*s0++ & 0xff] - 1; + if (!--w) + return 0; + t += i; + } + do { + *t = hex[*s0 & 0xff]-1 << 4 | hex[s0[1] & 0xff]-1; + t += i; + s0 += 2; + } + while(--w); + return 0; + } + + static int +#ifdef KR_headers +rd_I(n,w,len, base) Uint *n; int w; ftnlen len; register int base; +#else +rd_I(Uint *n, int w, ftnlen len, register int base) +#endif +{ + int ch, sign; + longint x = 0; + + if (w <= 0) + goto have_x; + for(;;) { + GET(ch); + if (ch != ' ') + break; + if (!--w) + goto have_x; + } + sign = 0; + switch(ch) { + case ',': + case '\n': + w = 0; + goto have_x; + case '-': + sign = 1; + case '+': + break; + default: + if (ch >= '0' && ch <= '9') { + x = ch - '0'; + break; + } + goto have_x; + } + while(--w) { + GET(ch); + if (ch >= '0' && ch <= '9') { + x = x*base + ch - '0'; + continue; + } + if (ch != ' ') { + if (ch == '\n' || ch == ',') + w = 0; + break; + } + if (f__cblank) + x *= base; + } + if (sign) + x = -x; + have_x: + if(len == sizeof(integer)) + n->il=x; + else if(len == sizeof(char)) + n->ic = (char)x; +#ifdef Allow_TYQUAD + else if (len == sizeof(longint)) + n->ili = x; +#endif + else + n->is = (short)x; + if (w) { + while(--w) + GET(ch); + return errno = 115; + } + return 0; +} + + static int +#ifdef KR_headers +rd_L(n,w,len) ftnint *n; ftnlen len; +#else +rd_L(ftnint *n, int w, ftnlen len) +#endif +{ int ch, dot, lv; + + if (w <= 0) + goto bad; + for(;;) { + GET(ch); + --w; + if (ch != ' ') + break; + if (!w) + goto bad; + } + dot = 0; + retry: + switch(ch) { + case '.': + if (dot++ || !w) + goto bad; + GET(ch); + --w; + goto retry; + case 't': + case 'T': + lv = 1; + break; + case 'f': + case 'F': + lv = 0; + break; + default: + bad: + for(; w > 0; --w) + GET(ch); + /* no break */ + case ',': + case '\n': + return errno = 116; + } + switch(len) { + case sizeof(char): *(char *)n = (char)lv; break; + case sizeof(short): *(short *)n = (short)lv; break; + default: *n = lv; + } + while(w-- > 0) { + GET(ch); + if (ch == ',' || ch == '\n') + break; + } + return 0; +} + + static int +#ifdef KR_headers +rd_F(p, w, d, len) ufloat *p; ftnlen len; +#else +rd_F(ufloat *p, int w, int d, ftnlen len) +#endif +{ + char s[FMAX+EXPMAXDIGS+4]; + register int ch; + register char *sp, *spe, *sp1; + double x; + int scale1, se; + long e, exp; + + sp1 = sp = s; + spe = sp + FMAX; + exp = -d; + x = 0.; + + do { + GET(ch); + w--; + } while (ch == ' ' && w); + switch(ch) { + case '-': *sp++ = ch; sp1++; spe++; + case '+': + if (!w) goto zero; + --w; + GET(ch); + } + while(ch == ' ') { +blankdrop: + if (!w--) goto zero; GET(ch); } + while(ch == '0') + { if (!w--) goto zero; GET(ch); } + if (ch == ' ' && f__cblank) + goto blankdrop; + scale1 = f__scale; + while(isdigit(ch)) { +digloop1: + if (sp < spe) *sp++ = ch; + else ++exp; +digloop1e: + if (!w--) goto done; + GET(ch); + } + if (ch == ' ') { + if (f__cblank) + { ch = '0'; goto digloop1; } + goto digloop1e; + } + if (ch == '.') { + exp += d; + if (!w--) goto done; + GET(ch); + if (sp == sp1) { /* no digits yet */ + while(ch == '0') { +skip01: + --exp; +skip0: + if (!w--) goto done; + GET(ch); + } + if (ch == ' ') { + if (f__cblank) goto skip01; + goto skip0; + } + } + while(isdigit(ch)) { +digloop2: + if (sp < spe) + { *sp++ = ch; --exp; } +digloop2e: + if (!w--) goto done; + GET(ch); + } + if (ch == ' ') { + if (f__cblank) + { ch = '0'; goto digloop2; } + goto digloop2e; + } + } + switch(ch) { + default: + break; + case '-': se = 1; goto signonly; + case '+': se = 0; goto signonly; + case 'e': + case 'E': + case 'd': + case 'D': + if (!w--) + goto bad; + GET(ch); + while(ch == ' ') { + if (!w--) + goto bad; + GET(ch); + } + se = 0; + switch(ch) { + case '-': se = 1; + case '+': +signonly: + if (!w--) + goto bad; + GET(ch); + } + while(ch == ' ') { + if (!w--) + goto bad; + GET(ch); + } + if (!isdigit(ch)) + goto bad; + + e = ch - '0'; + for(;;) { + if (!w--) + { ch = '\n'; break; } + GET(ch); + if (!isdigit(ch)) { + if (ch == ' ') { + if (f__cblank) + ch = '0'; + else continue; + } + else + break; + } + e = 10*e + ch - '0'; + if (e > EXPMAX && sp > sp1) + goto bad; + } + if (se) + exp -= e; + else + exp += e; + scale1 = 0; + } + switch(ch) { + case '\n': + case ',': + break; + default: +bad: + return (errno = 115); + } +done: + if (sp > sp1) { + while(*--sp == '0') + ++exp; + if (exp -= scale1) + sprintf(sp+1, "e%ld", exp); + else + sp[1] = 0; + x = atof(s); + } +zero: + if (len == sizeof(real)) + p->pf = x; + else + p->pd = x; + return(0); + } + + + static int +#ifdef KR_headers +rd_A(p,len) char *p; ftnlen len; +#else +rd_A(char *p, ftnlen len) +#endif +{ int i,ch; + for(i=0;i=len) + { for(i=0;i0;f__cursor--) if((ch=(*f__getn)())<0) return(ch); + if(f__cursor<0) + { if(f__recpos+f__cursor < 0) /*err(elist->cierr,110,"fmt")*/ + f__cursor = -f__recpos; /* is this in the standard? */ + if(f__external == 0) { + extern char *f__icptr; + f__icptr += f__cursor; + } + else if(f__curunit && f__curunit->useek) + (void) FSEEK(f__cf, f__cursor,SEEK_CUR); + else + err(f__elist->cierr,106,"fmt"); + f__recpos += f__cursor; + f__cursor=0; + } + switch(p->op) + { + default: fprintf(stderr,"rd_ed, unexpected code: %d\n", p->op); + sig_die(f__fmtbuf, 1); + case IM: + case I: ch = rd_I((Uint *)ptr,p->p1,len, 10); + break; + + /* O and OM don't work right for character, double, complex, */ + /* or doublecomplex, and they differ from Fortran 90 in */ + /* showing a minus sign for negative values. */ + + case OM: + case O: ch = rd_I((Uint *)ptr, p->p1, len, 8); + break; + case L: ch = rd_L((ftnint *)ptr,p->p1,len); + break; + case A: ch = rd_A(ptr,len); + break; + case AW: + ch = rd_AW(ptr,p->p1,len); + break; + case E: case EE: + case D: + case G: + case GE: + case F: ch = rd_F((ufloat *)ptr,p->p1,p->p2.i[0],len); + break; + + /* Z and ZM assume 8-bit bytes. */ + + case ZM: + case Z: + ch = rd_Z((Uint *)ptr, p->p1, len); + break; + } + if(ch == 0) return(ch); + else if(ch == EOF) return(EOF); + if (f__cf) + clearerr(f__cf); + return(errno); +} + + int +#ifdef KR_headers +rd_ned(p) struct syl *p; +#else +rd_ned(struct syl *p) +#endif +{ + switch(p->op) + { + default: fprintf(stderr,"rd_ned, unexpected code: %d\n", p->op); + sig_die(f__fmtbuf, 1); + case APOS: + return(rd_POS(p->p2.s)); + case H: return(rd_H(p->p1,p->p2.s)); + case SLASH: return((*f__donewrec)()); + case TR: + case X: f__cursor += p->p1; + return(1); + case T: f__cursor=p->p1-f__recpos - 1; + return(1); + case TL: f__cursor -= p->p1; + if(f__cursor < -f__recpos) /* TL1000, 1X */ + f__cursor = -f__recpos; + return(1); + } +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/rewind.c b/min-dgels/base/F2CLIBS/libf2c/rewind.c new file mode 100644 index 0000000..9a0e07e --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/rewind.c @@ -0,0 +1,30 @@ +#include "f2c.h" +#include "fio.h" +#ifdef __cplusplus +extern "C" { +#endif +#ifdef KR_headers +integer f_rew(a) alist *a; +#else +integer f_rew(alist *a) +#endif +{ + unit *b; + if(a->aunit>=MXUNIT || a->aunit<0) + err(a->aerr,101,"rewind"); + b = &f__units[a->aunit]; + if(b->ufd == NULL || b->uwrt == 3) + return(0); + if(!b->useek) + err(a->aerr,106,"rewind") + if(b->uwrt) { + (void) t_runc(a); + b->uwrt = 3; + } + rewind(b->ufd); + b->uend=0; + return(0); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/rsfe.c b/min-dgels/base/F2CLIBS/libf2c/rsfe.c new file mode 100644 index 0000000..abe9724 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/rsfe.c @@ -0,0 +1,91 @@ +/* read sequential formatted external */ +#include "f2c.h" +#include "fio.h" +#include "fmt.h" +#ifdef __cplusplus +extern "C" { +#endif + + int +xrd_SL(Void) +{ int ch; + if(!f__curunit->uend) + while((ch=getc(f__cf))!='\n') + if (ch == EOF) { + f__curunit->uend = 1; + break; + } + f__cursor=f__recpos=0; + return(1); +} + + int +x_getc(Void) +{ int ch; + if(f__curunit->uend) return(EOF); + ch = getc(f__cf); + if(ch!=EOF && ch!='\n') + { f__recpos++; + return(ch); + } + if(ch=='\n') + { (void) ungetc(ch,f__cf); + return(ch); + } + if(f__curunit->uend || feof(f__cf)) + { errno=0; + f__curunit->uend=1; + return(-1); + } + return(-1); +} + + int +x_endp(Void) +{ + xrd_SL(); + return f__curunit->uend == 1 ? EOF : 0; +} + + int +x_rev(Void) +{ + (void) xrd_SL(); + return(0); +} +#ifdef KR_headers +integer s_rsfe(a) cilist *a; /* start */ +#else +integer s_rsfe(cilist *a) /* start */ +#endif +{ int n; + if(!f__init) f_init(); + f__reading=1; + f__sequential=1; + f__formatted=1; + f__external=1; + if(n=c_sfe(a)) return(n); + f__elist=a; + f__cursor=f__recpos=0; + f__scale=0; + f__fmtbuf=a->cifmt; + f__cf=f__curunit->ufd; + if(pars_f(f__fmtbuf)<0) err(a->cierr,100,"startio"); + f__getn= x_getc; + f__doed= rd_ed; + f__doned= rd_ned; + fmt_bg(); + f__doend=x_endp; + f__donewrec=xrd_SL; + f__dorevert=x_rev; + f__cblank=f__curunit->ublnk; + f__cplus=0; + if(f__curunit->uwrt && f__nowreading(f__curunit)) + err(a->cierr,errno,"read start"); + if(f__curunit->uend) + err(f__elist->ciend,(EOF),"read start"); + return(0); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/rsli.c b/min-dgels/base/F2CLIBS/libf2c/rsli.c new file mode 100644 index 0000000..3d4ea42 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/rsli.c @@ -0,0 +1,109 @@ +#include "f2c.h" +#include "fio.h" +#include "lio.h" +#include "fmt.h" /* for f__doend */ +#ifdef __cplusplus +extern "C" { +#endif + +extern flag f__lquit; +extern int f__lcount; +extern char *f__icptr; +extern char *f__icend; +extern icilist *f__svic; +extern int f__icnum, f__recpos; + +static int i_getc(Void) +{ + if(f__recpos >= f__svic->icirlen) { + if (f__recpos++ == f__svic->icirlen) + return '\n'; + z_rnew(); + } + f__recpos++; + if(f__icptr >= f__icend) + return EOF; + return(*f__icptr++); + } + + static +#ifdef KR_headers +int i_ungetc(ch, f) int ch; FILE *f; +#else +int i_ungetc(int ch, FILE *f) +#endif +{ + if (--f__recpos == f__svic->icirlen) + return '\n'; + if (f__recpos < -1) + err(f__svic->icierr,110,"recend"); + /* *--icptr == ch, and icptr may point to read-only memory */ + return *--f__icptr /* = ch */; + } + + static void +#ifdef KR_headers +c_lir(a) icilist *a; +#else +c_lir(icilist *a) +#endif +{ + extern int l_eof; + f__reading = 1; + f__external = 0; + f__formatted = 1; + f__svic = a; + L_len = a->icirlen; + f__recpos = -1; + f__icnum = f__recpos = 0; + f__cursor = 0; + l_getc = i_getc; + l_ungetc = i_ungetc; + l_eof = 0; + f__icptr = a->iciunit; + f__icend = f__icptr + a->icirlen*a->icirnum; + f__cf = 0; + f__curunit = 0; + f__elist = (cilist *)a; + } + + +#ifdef KR_headers +integer s_rsli(a) icilist *a; +#else +integer s_rsli(icilist *a) +#endif +{ + f__lioproc = l_read; + f__lquit = 0; + f__lcount = 0; + c_lir(a); + f__doend = 0; + return(0); + } + +integer e_rsli(Void) +{ return 0; } + +#ifdef KR_headers +integer s_rsni(a) icilist *a; +#else +extern int x_rsne(cilist*); + +integer s_rsni(icilist *a) +#endif +{ + extern int nml_read; + integer rv; + cilist ca; + ca.ciend = a->iciend; + ca.cierr = a->icierr; + ca.cifmt = a->icifmt; + c_lir(a); + rv = x_rsne(&ca); + nml_read = 0; + return rv; + } +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/rsne.c b/min-dgels/base/F2CLIBS/libf2c/rsne.c new file mode 100644 index 0000000..e8e9dae --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/rsne.c @@ -0,0 +1,618 @@ +#include "f2c.h" +#include "fio.h" +#include "lio.h" + +#define MAX_NL_CACHE 3 /* maximum number of namelist hash tables to cache */ +#define MAXDIM 20 /* maximum number of subscripts */ + + struct dimen { + ftnlen extent; + ftnlen curval; + ftnlen delta; + ftnlen stride; + }; + typedef struct dimen dimen; + + struct hashentry { + struct hashentry *next; + char *name; + Vardesc *vd; + }; + typedef struct hashentry hashentry; + + struct hashtab { + struct hashtab *next; + Namelist *nl; + int htsize; + hashentry *tab[1]; + }; + typedef struct hashtab hashtab; + + static hashtab *nl_cache; + static int n_nlcache; + static hashentry **zot; + static int colonseen; + extern ftnlen f__typesize[]; + + extern flag f__lquit; + extern int f__lcount, nml_read; + extern int t_getc(Void); + +#ifdef KR_headers + extern char *malloc(), *memset(); +#define Const /*nothing*/ + +#ifdef ungetc + static int +un_getc(x,f__cf) int x; FILE *f__cf; +{ return ungetc(x,f__cf); } +#else +#define un_getc ungetc + extern int ungetc(); +#endif + +#else +#define Const const +#undef abs +#undef min +#undef max +#include "stdlib.h" +#include "string.h" +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef ungetc + static int +un_getc(int x, FILE *f__cf) +{ return ungetc(x,f__cf); } +#else +#define un_getc ungetc +extern int ungetc(int, FILE*); /* for systems with a buggy stdio.h */ +#endif +#endif + + static Vardesc * +#ifdef KR_headers +hash(ht, s) hashtab *ht; register char *s; +#else +hash(hashtab *ht, register char *s) +#endif +{ + register int c, x; + register hashentry *h; + char *s0 = s; + + for(x = 0; c = *s++; x = x & 0x4000 ? ((x << 1) & 0x7fff) + 1 : x << 1) + x += c; + for(h = *(zot = ht->tab + x % ht->htsize); h; h = h->next) + if (!strcmp(s0, h->name)) + return h->vd; + return 0; + } + + hashtab * +#ifdef KR_headers +mk_hashtab(nl) Namelist *nl; +#else +mk_hashtab(Namelist *nl) +#endif +{ + int nht, nv; + hashtab *ht; + Vardesc *v, **vd, **vde; + hashentry *he; + + hashtab **x, **x0, *y; + for(x = &nl_cache; y = *x; x0 = x, x = &y->next) + if (nl == y->nl) + return y; + if (n_nlcache >= MAX_NL_CACHE) { + /* discard least recently used namelist hash table */ + y = *x0; + free((char *)y->next); + y->next = 0; + } + else + n_nlcache++; + nv = nl->nvars; + if (nv >= 0x4000) + nht = 0x7fff; + else { + for(nht = 1; nht < nv; nht <<= 1); + nht += nht - 1; + } + ht = (hashtab *)malloc(sizeof(hashtab) + (nht-1)*sizeof(hashentry *) + + nv*sizeof(hashentry)); + if (!ht) + return 0; + he = (hashentry *)&ht->tab[nht]; + ht->nl = nl; + ht->htsize = nht; + ht->next = nl_cache; + nl_cache = ht; + memset((char *)ht->tab, 0, nht*sizeof(hashentry *)); + vd = nl->vars; + vde = vd + nv; + while(vd < vde) { + v = *vd++; + if (!hash(ht, v->name)) { + he->next = *zot; + *zot = he; + he->name = v->name; + he->vd = v; + he++; + } + } + return ht; + } + +static char Alpha[256], Alphanum[256]; + + static VOID +nl_init(Void) { + Const char *s; + int c; + + if(!f__init) + f_init(); + for(s = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; c = *s++; ) + Alpha[c] + = Alphanum[c] + = Alpha[c + 'a' - 'A'] + = Alphanum[c + 'a' - 'A'] + = c; + for(s = "0123456789_"; c = *s++; ) + Alphanum[c] = c; + } + +#define GETC(x) (x=(*l_getc)()) +#define Ungetc(x,y) (*l_ungetc)(x,y) + + static int +#ifdef KR_headers +getname(s, slen) register char *s; int slen; +#else +getname(register char *s, int slen) +#endif +{ + register char *se = s + slen - 1; + register int ch; + + GETC(ch); + if (!(*s++ = Alpha[ch & 0xff])) { + if (ch != EOF) + ch = 115; + errfl(f__elist->cierr, ch, "namelist read"); + } + while(*s = Alphanum[GETC(ch) & 0xff]) + if (s < se) + s++; + if (ch == EOF) + err(f__elist->cierr, EOF, "namelist read"); + if (ch > ' ') + Ungetc(ch,f__cf); + return *s = 0; + } + + static int +#ifdef KR_headers +getnum(chp, val) int *chp; ftnlen *val; +#else +getnum(int *chp, ftnlen *val) +#endif +{ + register int ch, sign; + register ftnlen x; + + while(GETC(ch) <= ' ' && ch >= 0); + if (ch == '-') { + sign = 1; + GETC(ch); + } + else { + sign = 0; + if (ch == '+') + GETC(ch); + } + x = ch - '0'; + if (x < 0 || x > 9) + return 115; + while(GETC(ch) >= '0' && ch <= '9') + x = 10*x + ch - '0'; + while(ch <= ' ' && ch >= 0) + GETC(ch); + if (ch == EOF) + return EOF; + *val = sign ? -x : x; + *chp = ch; + return 0; + } + + static int +#ifdef KR_headers +getdimen(chp, d, delta, extent, x1) + int *chp; dimen *d; ftnlen delta, extent, *x1; +#else +getdimen(int *chp, dimen *d, ftnlen delta, ftnlen extent, ftnlen *x1) +#endif +{ + register int k; + ftnlen x2, x3; + + if (k = getnum(chp, x1)) + return k; + x3 = 1; + if (*chp == ':') { + if (k = getnum(chp, &x2)) + return k; + x2 -= *x1; + if (*chp == ':') { + if (k = getnum(chp, &x3)) + return k; + if (!x3) + return 123; + x2 /= x3; + colonseen = 1; + } + if (x2 < 0 || x2 >= extent) + return 123; + d->extent = x2 + 1; + } + else + d->extent = 1; + d->curval = 0; + d->delta = delta; + d->stride = x3; + return 0; + } + +#ifndef No_Namelist_Questions + static Void +#ifdef KR_headers +print_ne(a) cilist *a; +#else +print_ne(cilist *a) +#endif +{ + flag intext = f__external; + int rpsave = f__recpos; + FILE *cfsave = f__cf; + unit *usave = f__curunit; + cilist t; + t = *a; + t.ciunit = 6; + s_wsne(&t); + fflush(f__cf); + f__external = intext; + f__reading = 1; + f__recpos = rpsave; + f__cf = cfsave; + f__curunit = usave; + f__elist = a; + } +#endif + + static char where0[] = "namelist read start "; + + int +#ifdef KR_headers +x_rsne(a) cilist *a; +#else +x_rsne(cilist *a) +#endif +{ + int ch, got1, k, n, nd, quote, readall; + Namelist *nl; + static char where[] = "namelist read"; + char buf[64]; + hashtab *ht; + Vardesc *v; + dimen *dn, *dn0, *dn1; + ftnlen *dims, *dims1; + ftnlen b, b0, b1, ex, no, nomax, size, span; + ftnint no1, no2, type; + char *vaddr; + long iva, ivae; + dimen dimens[MAXDIM], substr; + + if (!Alpha['a']) + nl_init(); + f__reading=1; + f__formatted=1; + got1 = 0; + top: + for(;;) switch(GETC(ch)) { + case EOF: + eof: + err(a->ciend,(EOF),where0); + case '&': + case '$': + goto have_amp; +#ifndef No_Namelist_Questions + case '?': + print_ne(a); + continue; +#endif + default: + if (ch <= ' ' && ch >= 0) + continue; +#ifndef No_Namelist_Comments + while(GETC(ch) != '\n') + if (ch == EOF) + goto eof; +#else + errfl(a->cierr, 115, where0); +#endif + } + have_amp: + if (ch = getname(buf,sizeof(buf))) + return ch; + nl = (Namelist *)a->cifmt; + if (strcmp(buf, nl->name)) +#ifdef No_Bad_Namelist_Skip + errfl(a->cierr, 118, where0); +#else + { + fprintf(stderr, + "Skipping namelist \"%s\": seeking namelist \"%s\".\n", + buf, nl->name); + fflush(stderr); + for(;;) switch(GETC(ch)) { + case EOF: + err(a->ciend, EOF, where0); + case '/': + case '&': + case '$': + if (f__external) + e_rsle(); + else + z_rnew(); + goto top; + case '"': + case '\'': + quote = ch; + more_quoted: + while(GETC(ch) != quote) + if (ch == EOF) + err(a->ciend, EOF, where0); + if (GETC(ch) == quote) + goto more_quoted; + Ungetc(ch,f__cf); + default: + continue; + } + } +#endif + ht = mk_hashtab(nl); + if (!ht) + errfl(f__elist->cierr, 113, where0); + for(;;) { + for(;;) switch(GETC(ch)) { + case EOF: + if (got1) + return 0; + err(a->ciend, EOF, where0); + case '/': + case '$': + case '&': + return 0; + default: + if (ch <= ' ' && ch >= 0 || ch == ',') + continue; + Ungetc(ch,f__cf); + if (ch = getname(buf,sizeof(buf))) + return ch; + goto havename; + } + havename: + v = hash(ht,buf); + if (!v) + errfl(a->cierr, 119, where); + while(GETC(ch) <= ' ' && ch >= 0); + vaddr = v->addr; + type = v->type; + if (type < 0) { + size = -type; + type = TYCHAR; + } + else + size = f__typesize[type]; + ivae = size; + iva = readall = 0; + if (ch == '(' /*)*/ ) { + dn = dimens; + if (!(dims = v->dims)) { + if (type != TYCHAR) + errfl(a->cierr, 122, where); + if (k = getdimen(&ch, dn, (ftnlen)size, + (ftnlen)size, &b)) + errfl(a->cierr, k, where); + if (ch != ')') + errfl(a->cierr, 115, where); + b1 = dn->extent; + if (--b < 0 || b + b1 > size) + return 124; + iva += b; + size = b1; + while(GETC(ch) <= ' ' && ch >= 0); + goto scalar; + } + nd = (int)dims[0]; + nomax = span = dims[1]; + ivae = iva + size*nomax; + colonseen = 0; + if (k = getdimen(&ch, dn, size, nomax, &b)) + errfl(a->cierr, k, where); + no = dn->extent; + b0 = dims[2]; + dims1 = dims += 3; + ex = 1; + for(n = 1; n++ < nd; dims++) { + if (ch != ',') + errfl(a->cierr, 115, where); + dn1 = dn + 1; + span /= *dims; + if (k = getdimen(&ch, dn1, dn->delta**dims, + span, &b1)) + errfl(a->cierr, k, where); + ex *= *dims; + b += b1*ex; + no *= dn1->extent; + dn = dn1; + } + if (ch != ')') + errfl(a->cierr, 115, where); + readall = 1 - colonseen; + b -= b0; + if (b < 0 || b >= nomax) + errfl(a->cierr, 125, where); + iva += size * b; + dims = dims1; + while(GETC(ch) <= ' ' && ch >= 0); + no1 = 1; + dn0 = dimens; + if (type == TYCHAR && ch == '(' /*)*/) { + if (k = getdimen(&ch, &substr, size, size, &b)) + errfl(a->cierr, k, where); + if (ch != ')') + errfl(a->cierr, 115, where); + b1 = substr.extent; + if (--b < 0 || b + b1 > size) + return 124; + iva += b; + b0 = size; + size = b1; + while(GETC(ch) <= ' ' && ch >= 0); + if (b1 < b0) + goto delta_adj; + } + if (readall) + goto delta_adj; + for(; dn0 < dn; dn0++) { + if (dn0->extent != *dims++ || dn0->stride != 1) + break; + no1 *= dn0->extent; + } + if (dn0 == dimens && dimens[0].stride == 1) { + no1 = dimens[0].extent; + dn0++; + } + delta_adj: + ex = 0; + for(dn1 = dn0; dn1 <= dn; dn1++) + ex += (dn1->extent-1) + * (dn1->delta *= dn1->stride); + for(dn1 = dn; dn1 > dn0; dn1--) { + ex -= (dn1->extent - 1) * dn1->delta; + dn1->delta -= ex; + } + } + else if (dims = v->dims) { + no = no1 = dims[1]; + ivae = iva + no*size; + } + else + scalar: + no = no1 = 1; + if (ch != '=') + errfl(a->cierr, 115, where); + got1 = nml_read = 1; + f__lcount = 0; + readloop: + for(;;) { + if (iva >= ivae || iva < 0) { + f__lquit = 1; + goto mustend; + } + else if (iva + no1*size > ivae) + no1 = (ivae - iva)/size; + f__lquit = 0; + if (k = l_read(&no1, vaddr + iva, size, type)) + return k; + if (f__lquit == 1) + return 0; + if (readall) { + iva += dn0->delta; + if (f__lcount > 0) { + no2 = (ivae - iva)/size; + if (no2 > f__lcount) + no2 = f__lcount; + if (k = l_read(&no2, vaddr + iva, + size, type)) + return k; + iva += no2 * dn0->delta; + } + } + mustend: + GETC(ch); + if (readall) + if (iva >= ivae) + readall = 0; + else for(;;) { + switch(ch) { + case ' ': + case '\t': + case '\n': + GETC(ch); + continue; + } + break; + } + if (ch == '/' || ch == '$' || ch == '&') { + f__lquit = 1; + return 0; + } + else if (f__lquit) { + while(ch <= ' ' && ch >= 0) + GETC(ch); + Ungetc(ch,f__cf); + if (!Alpha[ch & 0xff] && ch >= 0) + errfl(a->cierr, 125, where); + break; + } + Ungetc(ch,f__cf); + if (readall && !Alpha[ch & 0xff]) + goto readloop; + if ((no -= no1) <= 0) + break; + for(dn1 = dn0; dn1 <= dn; dn1++) { + if (++dn1->curval < dn1->extent) { + iva += dn1->delta; + goto readloop; + } + dn1->curval = 0; + } + break; + } + } + } + + integer +#ifdef KR_headers +s_rsne(a) cilist *a; +#else +s_rsne(cilist *a) +#endif +{ + extern int l_eof; + int n; + + f__external=1; + l_eof = 0; + if(n = c_le(a)) + return n; + if(f__curunit->uwrt && f__nowreading(f__curunit)) + err(a->cierr,errno,where0); + l_getc = t_getc; + l_ungetc = un_getc; + f__doend = xrd_SL; + n = x_rsne(a); + nml_read = 0; + if (n) + return n; + return e_rsle(); + } +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/s_cat.c b/min-dgels/base/F2CLIBS/libf2c/s_cat.c new file mode 100644 index 0000000..8d92a63 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/s_cat.c @@ -0,0 +1,86 @@ +/* Unless compiled with -DNO_OVERWRITE, this variant of s_cat allows the + * target of a concatenation to appear on its right-hand side (contrary + * to the Fortran 77 Standard, but in accordance with Fortran 90). + */ + +#include "f2c.h" +#ifndef NO_OVERWRITE +#include "stdio.h" +#undef abs +#ifdef KR_headers + extern char *F77_aloc(); + extern void free(); + extern void exit_(); +#else +#undef min +#undef max +#include "stdlib.h" +extern +#ifdef __cplusplus + "C" +#endif + char *F77_aloc(ftnlen, const char*); +#endif +#include "string.h" +#endif /* NO_OVERWRITE */ + +#ifdef __cplusplus +extern "C" { +#endif + + VOID +#ifdef KR_headers +s_cat(lp, rpp, rnp, np, ll) char *lp, *rpp[]; ftnint rnp[], *np; ftnlen ll; +#else +s_cat(char *lp, char *rpp[], ftnint rnp[], ftnint *np, ftnlen ll) +#endif +{ + ftnlen i, nc; + char *rp; + ftnlen n = *np; +#ifndef NO_OVERWRITE + ftnlen L, m; + char *lp0, *lp1; + + lp0 = 0; + lp1 = lp; + L = ll; + i = 0; + while(i < n) { + rp = rpp[i]; + m = rnp[i++]; + if (rp >= lp1 || rp + m <= lp) { + if ((L -= m) <= 0) { + n = i; + break; + } + lp1 += m; + continue; + } + lp0 = lp; + lp = lp1 = F77_aloc(L = ll, "s_cat"); + break; + } + lp1 = lp; +#endif /* NO_OVERWRITE */ + for(i = 0 ; i < n ; ++i) { + nc = ll; + if(rnp[i] < nc) + nc = rnp[i]; + ll -= nc; + rp = rpp[i]; + while(--nc >= 0) + *lp++ = *rp++; + } + while(--ll >= 0) + *lp++ = ' '; +#ifndef NO_OVERWRITE + if (lp0) { + memcpy(lp0, lp1, L); + free(lp1); + } +#endif + } +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/s_cmp.c b/min-dgels/base/F2CLIBS/libf2c/s_cmp.c new file mode 100644 index 0000000..3a2ea67 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/s_cmp.c @@ -0,0 +1,50 @@ +#include "f2c.h" +#ifdef __cplusplus +extern "C" { +#endif + +/* compare two strings */ + +#ifdef KR_headers +integer s_cmp(a0, b0, la, lb) char *a0, *b0; ftnlen la, lb; +#else +integer s_cmp(char *a0, char *b0, ftnlen la, ftnlen lb) +#endif +{ +register unsigned char *a, *aend, *b, *bend; +a = (unsigned char *)a0; +b = (unsigned char *)b0; +aend = a + la; +bend = b + lb; + +if(la <= lb) + { + while(a < aend) + if(*a != *b) + return( *a - *b ); + else + { ++a; ++b; } + + while(b < bend) + if(*b != ' ') + return( ' ' - *b ); + else ++b; + } + +else + { + while(b < bend) + if(*a == *b) + { ++a; ++b; } + else + return( *a - *b ); + while(a < aend) + if(*a != ' ') + return(*a - ' '); + else ++a; + } +return(0); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/s_copy.c b/min-dgels/base/F2CLIBS/libf2c/s_copy.c new file mode 100644 index 0000000..9dacfc7 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/s_copy.c @@ -0,0 +1,57 @@ +/* Unless compiled with -DNO_OVERWRITE, this variant of s_copy allows the + * target of an assignment to appear on its right-hand side (contrary + * to the Fortran 77 Standard, but in accordance with Fortran 90), + * as in a(2:5) = a(4:7) . + */ + +#include "f2c.h" +#ifdef __cplusplus +extern "C" { +#endif + +/* assign strings: a = b */ + +#ifdef KR_headers +VOID s_copy(a, b, la, lb) register char *a, *b; ftnlen la, lb; +#else +void s_copy(register char *a, register char *b, ftnlen la, ftnlen lb) +#endif +{ + register char *aend, *bend; + + aend = a + la; + + if(la <= lb) +#ifndef NO_OVERWRITE + if (a <= b || a >= b + la) +#endif + while(a < aend) + *a++ = *b++; +#ifndef NO_OVERWRITE + else + for(b += la; a < aend; ) + *--aend = *--b; +#endif + + else { + bend = b + lb; +#ifndef NO_OVERWRITE + if (a <= b || a >= bend) +#endif + while(b < bend) + *a++ = *b++; +#ifndef NO_OVERWRITE + else { + a += lb; + while(b < bend) + *--a = *--bend; + a += lb; + } +#endif + while(a < aend) + *a++ = ' '; + } + } +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/s_paus.c b/min-dgels/base/F2CLIBS/libf2c/s_paus.c new file mode 100644 index 0000000..51d80eb --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/s_paus.c @@ -0,0 +1,96 @@ +#include "stdio.h" +#include "f2c.h" +#define PAUSESIG 15 + +#include "signal1.h" +#ifdef KR_headers +#define Void /* void */ +#define Int /* int */ +#else +#define Void void +#define Int int +#undef abs +#undef min +#undef max +#include "stdlib.h" +#ifdef __cplusplus +extern "C" { +#endif +#ifdef __cplusplus +extern "C" { +#endif +extern int getpid(void), isatty(int), pause(void); +#endif + +extern VOID f_exit(Void); + +#ifndef MSDOS + static VOID +waitpause(Sigarg) +{ Use_Sigarg; + return; + } +#endif + + static VOID +#ifdef KR_headers +s_1paus(fin) FILE *fin; +#else +s_1paus(FILE *fin) +#endif +{ + fprintf(stderr, + "To resume execution, type go. Other input will terminate the job.\n"); + fflush(stderr); + if( getc(fin)!='g' || getc(fin)!='o' || getc(fin)!='\n' ) { + fprintf(stderr, "STOP\n"); +#ifdef NO_ONEXIT + f_exit(); +#endif + exit(0); + } + } + + int +#ifdef KR_headers +s_paus(s, n) char *s; ftnlen n; +#else +s_paus(char *s, ftnlen n) +#endif +{ + fprintf(stderr, "PAUSE "); + if(n > 0) + fprintf(stderr, " %.*s", (int)n, s); + fprintf(stderr, " statement executed\n"); + if( isatty(fileno(stdin)) ) + s_1paus(stdin); + else { +#ifdef MSDOS + FILE *fin; + fin = fopen("con", "r"); + if (!fin) { + fprintf(stderr, "s_paus: can't open con!\n"); + fflush(stderr); + exit(1); + } + s_1paus(fin); + fclose(fin); +#else + fprintf(stderr, + "To resume execution, execute a kill -%d %d command\n", + PAUSESIG, getpid() ); + signal1(PAUSESIG, waitpause); + fflush(stderr); + pause(); +#endif + } + fprintf(stderr, "Execution resumes after PAUSE.\n"); + fflush(stderr); + return 0; /* NOT REACHED */ +#ifdef __cplusplus + } +#endif +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/s_rnge.c b/min-dgels/base/F2CLIBS/libf2c/s_rnge.c new file mode 100644 index 0000000..3dbc513 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/s_rnge.c @@ -0,0 +1,32 @@ +#include "stdio.h" +#include "f2c.h" +#ifdef __cplusplus +extern "C" { +#endif + +/* called when a subscript is out of range */ + +#ifdef KR_headers +extern VOID sig_die(); +integer s_rnge(varn, offset, procn, line) char *varn, *procn; ftnint offset, line; +#else +extern VOID sig_die(const char*,int); +integer s_rnge(char *varn, ftnint offset, char *procn, ftnint line) +#endif +{ +register int i; + +fprintf(stderr, "Subscript out of range on file line %ld, procedure ", + (long)line); +while((i = *procn) && i != '_' && i != ' ') + putc(*procn++, stderr); +fprintf(stderr, ".\nAttempt to access the %ld-th element of variable ", + (long)offset+1); +while((i = *varn) && i != ' ') + putc(*varn++, stderr); +sig_die(".", 1); +return 0; /* not reached */ +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/s_stop.c b/min-dgels/base/F2CLIBS/libf2c/s_stop.c new file mode 100644 index 0000000..68233ae --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/s_stop.c @@ -0,0 +1,48 @@ +#include "stdio.h" +#include "f2c.h" + +#ifdef KR_headers +extern void f_exit(); +int s_stop(s, n) char *s; ftnlen n; +#else +#undef abs +#undef min +#undef max +#include "stdlib.h" +#ifdef __cplusplus +extern "C" { +#endif +#ifdef __cplusplus +extern "C" { +#endif +void f_exit(void); + +int s_stop(char *s, ftnlen n) +#endif +{ +int i; + +if(n > 0) + { + fprintf(stderr, "STOP "); + for(i = 0; iciunit]; + if(a->ciunit >= MXUNIT || a->ciunit<0) + err(a->cierr,101,"startio"); + if(p->ufd==NULL && fk_open(SEQ,FMT,a->ciunit)) err(a->cierr,114,"sfe") + if(!p->ufmt) err(a->cierr,102,"sfe") + return(0); +} +integer e_wsfe(Void) +{ + int n = en_fio(); + f__fmtbuf = NULL; +#ifdef ALWAYS_FLUSH + if (!n && fflush(f__cf)) + err(f__elist->cierr, errno, "write end"); +#endif + return n; +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/sig_die.c b/min-dgels/base/F2CLIBS/libf2c/sig_die.c new file mode 100644 index 0000000..63a73d9 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/sig_die.c @@ -0,0 +1,51 @@ +#include "stdio.h" +#include "signal.h" + +#ifndef SIGIOT +#ifdef SIGABRT +#define SIGIOT SIGABRT +#endif +#endif + +#ifdef KR_headers +void sig_die(s, kill) char *s; int kill; +#else +#include "stdlib.h" +#ifdef __cplusplus +extern "C" { +#endif +#ifdef __cplusplus +extern "C" { +#endif + extern void f_exit(void); + +void sig_die(const char *s, int kill) +#endif +{ + /* print error message, then clear buffers */ + fprintf(stderr, "%s\n", s); + + if(kill) + { + fflush(stderr); + f_exit(); + fflush(stderr); + /* now get a core */ +#ifdef SIGIOT + signal(SIGIOT, SIG_DFL); +#endif + abort(); + } + else { +#ifdef NO_ONEXIT + f_exit(); +#endif + exit(1); + } + } +#ifdef __cplusplus +} +#endif +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/signal1.h0 b/min-dgels/base/F2CLIBS/libf2c/signal1.h0 new file mode 100644 index 0000000..a383774 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/signal1.h0 @@ -0,0 +1,35 @@ +/* You may need to adjust the definition of signal1 to supply a */ +/* cast to the correct argument type. This detail is system- and */ +/* compiler-dependent. The #define below assumes signal.h declares */ +/* type SIG_PF for the signal function's second argument. */ + +/* For some C++ compilers, "#define Sigarg_t ..." may be appropriate. */ + +#include + +#ifndef Sigret_t +#define Sigret_t void +#endif +#ifndef Sigarg_t +#ifdef KR_headers +#define Sigarg_t +#else +#define Sigarg_t int +#endif +#endif /*Sigarg_t*/ + +#ifdef USE_SIG_PF /* compile with -DUSE_SIG_PF under IRIX */ +#define sig_pf SIG_PF +#else +typedef Sigret_t (*sig_pf)(Sigarg_t); +#endif + +#define signal1(a,b) signal(a,(sig_pf)b) + +#ifdef __cplusplus +#define Sigarg ... +#define Use_Sigarg +#else +#define Sigarg Int n +#define Use_Sigarg n = n /* shut up compiler warning */ +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/signal_.c b/min-dgels/base/F2CLIBS/libf2c/signal_.c new file mode 100644 index 0000000..3b0e6cf --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/signal_.c @@ -0,0 +1,21 @@ +#include "f2c.h" +#include "signal1.h" +#ifdef __cplusplus +extern "C" { +#endif + + ftnint +#ifdef KR_headers +signal_(sigp, proc) integer *sigp; sig_pf proc; +#else +signal_(integer *sigp, sig_pf proc) +#endif +{ + int sig; + sig = (int)*sigp; + + return (ftnint)signal(sig, proc); + } +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/signbit.c b/min-dgels/base/F2CLIBS/libf2c/signbit.c new file mode 100644 index 0000000..de95a3b --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/signbit.c @@ -0,0 +1,24 @@ +#include "arith.h" + +#ifndef Long +#define Long long +#endif + + int +#ifdef KR_headers +signbit_f2c(x) double *x; +#else +signbit_f2c(double *x) +#endif +{ +#ifdef IEEE_MC68k + if (*(Long*)x & 0x80000000) + return 1; +#else +#ifdef IEEE_8087 + if (((Long*)x)[1] & 0x80000000) + return 1; +#endif /*IEEE_8087*/ +#endif /*IEEE_MC68k*/ + return 0; + } diff --git a/min-dgels/base/F2CLIBS/libf2c/sue.c b/min-dgels/base/F2CLIBS/libf2c/sue.c new file mode 100644 index 0000000..191e326 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/sue.c @@ -0,0 +1,90 @@ +#include "f2c.h" +#include "fio.h" +#ifdef __cplusplus +extern "C" { +#endif +extern uiolen f__reclen; +OFF_T f__recloc; + + int +#ifdef KR_headers +c_sue(a) cilist *a; +#else +c_sue(cilist *a) +#endif +{ + f__external=f__sequential=1; + f__formatted=0; + f__curunit = &f__units[a->ciunit]; + if(a->ciunit >= MXUNIT || a->ciunit < 0) + err(a->cierr,101,"startio"); + f__elist=a; + if(f__curunit->ufd==NULL && fk_open(SEQ,UNF,a->ciunit)) + err(a->cierr,114,"sue"); + f__cf=f__curunit->ufd; + if(f__curunit->ufmt) err(a->cierr,103,"sue") + if(!f__curunit->useek) err(a->cierr,103,"sue") + return(0); +} +#ifdef KR_headers +integer s_rsue(a) cilist *a; +#else +integer s_rsue(cilist *a) +#endif +{ + int n; + if(!f__init) f_init(); + f__reading=1; + if(n=c_sue(a)) return(n); + f__recpos=0; + if(f__curunit->uwrt && f__nowreading(f__curunit)) + err(a->cierr, errno, "read start"); + if(fread((char *)&f__reclen,sizeof(uiolen),1,f__cf) + != 1) + { if(feof(f__cf)) + { f__curunit->uend = 1; + err(a->ciend, EOF, "start"); + } + clearerr(f__cf); + err(a->cierr, errno, "start"); + } + return(0); +} +#ifdef KR_headers +integer s_wsue(a) cilist *a; +#else +integer s_wsue(cilist *a) +#endif +{ + int n; + if(!f__init) f_init(); + if(n=c_sue(a)) return(n); + f__reading=0; + f__reclen=0; + if(f__curunit->uwrt != 1 && f__nowwriting(f__curunit)) + err(a->cierr, errno, "write start"); + f__recloc=FTELL(f__cf); + FSEEK(f__cf,(OFF_T)sizeof(uiolen),SEEK_CUR); + return(0); +} +integer e_wsue(Void) +{ OFF_T loc; + fwrite((char *)&f__reclen,sizeof(uiolen),1,f__cf); +#ifdef ALWAYS_FLUSH + if (fflush(f__cf)) + err(f__elist->cierr, errno, "write end"); +#endif + loc=FTELL(f__cf); + FSEEK(f__cf,f__recloc,SEEK_SET); + fwrite((char *)&f__reclen,sizeof(uiolen),1,f__cf); + FSEEK(f__cf,loc,SEEK_SET); + return(0); +} +integer e_rsue(Void) +{ + FSEEK(f__cf,(OFF_T)(f__reclen-f__recpos+sizeof(uiolen)),SEEK_CUR); + return(0); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/sysdep1.h0 b/min-dgels/base/F2CLIBS/libf2c/sysdep1.h0 new file mode 100644 index 0000000..4c026a2 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/sysdep1.h0 @@ -0,0 +1,66 @@ +#ifndef SYSDEP_H_INCLUDED +#define SYSDEP_H_INCLUDED +#undef USE_LARGEFILE +#ifndef NO_LONG_LONG + +#ifdef __sun__ +#define USE_LARGEFILE +#define OFF_T off64_t +#endif + +#ifdef __linux__ +#define USE_LARGEFILE +#define OFF_T __off64_t +#endif + +#ifdef _AIX43 +#define _LARGE_FILES +#define _LARGE_FILE_API +#define USE_LARGEFILE +#endif /*_AIX43*/ + +#ifdef __hpux +#define _FILE64 +#define _LARGEFILE64_SOURCE +#define USE_LARGEFILE +#endif /*__hpux*/ + +#ifdef __sgi +#define USE_LARGEFILE +#endif /*__sgi*/ + +#ifdef __FreeBSD__ +#define OFF_T off_t +#define FSEEK fseeko +#define FTELL ftello +#endif + +#ifdef USE_LARGEFILE +#ifndef OFF_T +#define OFF_T off64_t +#endif +#define _LARGEFILE_SOURCE +#define _LARGEFILE64_SOURCE +#include +#include +#define FOPEN fopen64 +#define FREOPEN freopen64 +#define FSEEK fseeko64 +#define FSTAT fstat64 +#define FTELL ftello64 +#define FTRUNCATE ftruncate64 +#define STAT stat64 +#define STAT_ST stat64 +#endif /*USE_LARGEFILE*/ +#endif /*NO_LONG_LONG*/ + +#ifndef NON_UNIX_STDIO +#ifndef USE_LARGEFILE +#define _INCLUDE_POSIX_SOURCE /* for HP-UX */ +#define _INCLUDE_XOPEN_SOURCE /* for HP-UX */ +#include "sys/types.h" +#include "sys/stat.h" +#endif +#endif + +#endif /*SYSDEP_H_INCLUDED*/ diff --git a/min-dgels/base/F2CLIBS/libf2c/system_.c b/min-dgels/base/F2CLIBS/libf2c/system_.c new file mode 100644 index 0000000..b18e8a6 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/system_.c @@ -0,0 +1,42 @@ +/* f77 interface to system routine */ + +#include "f2c.h" + +#ifdef KR_headers +extern char *F77_aloc(); + + integer +system_(s, n) register char *s; ftnlen n; +#else +#undef abs +#undef min +#undef max +#include "stdlib.h" +#ifdef __cplusplus +extern "C" { +#endif +extern char *F77_aloc(ftnlen, const char*); + + integer +system_(register char *s, ftnlen n) +#endif +{ + char buff0[256], *buff; + register char *bp, *blast; + integer rv; + + buff = bp = n < sizeof(buff0) + ? buff0 : F77_aloc(n+1, "system_"); + blast = bp + n; + + while(bp < blast && *s) + *bp++ = *s++; + *bp = 0; + rv = system(buff); + if (buff != buff0) + free(buff); + return rv; + } +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/typesize.c b/min-dgels/base/F2CLIBS/libf2c/typesize.c new file mode 100644 index 0000000..39097f4 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/typesize.c @@ -0,0 +1,18 @@ +#include "f2c.h" +#ifdef __cplusplus +extern "C" { +#endif + +ftnlen f__typesize[] = { 0, 0, sizeof(shortint), sizeof(integer), + sizeof(real), sizeof(doublereal), + sizeof(complex), sizeof(doublecomplex), + sizeof(logical), sizeof(char), + 0, sizeof(integer1), + sizeof(logical1), sizeof(shortlogical), +#ifdef Allow_TYQUAD + sizeof(longint), +#endif + 0}; +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/uio.c b/min-dgels/base/F2CLIBS/libf2c/uio.c new file mode 100644 index 0000000..44f768d --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/uio.c @@ -0,0 +1,75 @@ +#include "f2c.h" +#include "fio.h" +#ifdef __cplusplus +extern "C" { +#endif +uiolen f__reclen; + + int +#ifdef KR_headers +do_us(number,ptr,len) ftnint *number; char *ptr; ftnlen len; +#else +do_us(ftnint *number, char *ptr, ftnlen len) +#endif +{ + if(f__reading) + { + f__recpos += (int)(*number * len); + if(f__recpos>f__reclen) + err(f__elist->cierr, 110, "do_us"); + if (fread(ptr,(int)len,(int)(*number),f__cf) != *number) + err(f__elist->ciend, EOF, "do_us"); + return(0); + } + else + { + f__reclen += *number * len; + (void) fwrite(ptr,(int)len,(int)(*number),f__cf); + return(0); + } +} +#ifdef KR_headers +integer do_ud(number,ptr,len) ftnint *number; char *ptr; ftnlen len; +#else +integer do_ud(ftnint *number, char *ptr, ftnlen len) +#endif +{ + f__recpos += (int)(*number * len); + if(f__recpos > f__curunit->url && f__curunit->url!=1) + err(f__elist->cierr,110,"do_ud"); + if(f__reading) + { +#ifdef Pad_UDread +#ifdef KR_headers + int i; +#else + size_t i; +#endif + if (!(i = fread(ptr,(int)len,(int)(*number),f__cf)) + && !(f__recpos - *number*len)) + err(f__elist->cierr,EOF,"do_ud") + if (i < *number) + memset(ptr + i*len, 0, (*number - i)*len); + return 0; +#else + if(fread(ptr,(int)len,(int)(*number),f__cf) != *number) + err(f__elist->cierr,EOF,"do_ud") + else return(0); +#endif + } + (void) fwrite(ptr,(int)len,(int)(*number),f__cf); + return(0); +} +#ifdef KR_headers +integer do_uio(number,ptr,len) ftnint *number; char *ptr; ftnlen len; +#else +integer do_uio(ftnint *number, char *ptr, ftnlen len) +#endif +{ + if(f__sequential) + return(do_us(number,ptr,len)); + else return(do_ud(number,ptr,len)); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/uninit.c b/min-dgels/base/F2CLIBS/libf2c/uninit.c new file mode 100644 index 0000000..f15fe39 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/uninit.c @@ -0,0 +1,377 @@ +#include +#include +#include "arith.h" + +#define TYSHORT 2 +#define TYLONG 3 +#define TYREAL 4 +#define TYDREAL 5 +#define TYCOMPLEX 6 +#define TYDCOMPLEX 7 +#define TYINT1 11 +#define TYQUAD 14 +#ifndef Long +#define Long long +#endif + +#ifdef __mips +#define RNAN 0xffc00000 +#define DNAN0 0xfff80000 +#define DNAN1 0 +#endif + +#ifdef _PA_RISC1_1 +#define RNAN 0xffc00000 +#define DNAN0 0xfff80000 +#define DNAN1 0 +#endif + +#ifndef RNAN +#define RNAN 0xff800001 +#ifdef IEEE_MC68k +#define DNAN0 0xfff00000 +#define DNAN1 1 +#else +#define DNAN0 1 +#define DNAN1 0xfff00000 +#endif +#endif /*RNAN*/ + +#ifdef KR_headers +#define Void /*void*/ +#define FA7UL (unsigned Long) 0xfa7a7a7aL +#else +#define Void void +#define FA7UL 0xfa7a7a7aUL +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +static void ieee0(Void); + +static unsigned Long rnan = RNAN, + dnan0 = DNAN0, + dnan1 = DNAN1; + +double _0 = 0.; + + void +#ifdef KR_headers +_uninit_f2c(x, type, len) void *x; int type; long len; +#else +_uninit_f2c(void *x, int type, long len) +#endif +{ + static int first = 1; + + unsigned Long *lx, *lxe; + + if (first) { + first = 0; + ieee0(); + } + if (len == 1) + switch(type) { + case TYINT1: + *(char*)x = 'Z'; + return; + case TYSHORT: + *(short*)x = 0xfa7a; + break; + case TYLONG: + *(unsigned Long*)x = FA7UL; + return; + case TYQUAD: + case TYCOMPLEX: + case TYDCOMPLEX: + break; + case TYREAL: + *(unsigned Long*)x = rnan; + return; + case TYDREAL: + lx = (unsigned Long*)x; + lx[0] = dnan0; + lx[1] = dnan1; + return; + default: + printf("Surprise type %d in _uninit_f2c\n", type); + } + switch(type) { + case TYINT1: + memset(x, 'Z', len); + break; + case TYSHORT: + *(short*)x = 0xfa7a; + break; + case TYQUAD: + len *= 2; + /* no break */ + case TYLONG: + lx = (unsigned Long*)x; + lxe = lx + len; + while(lx < lxe) + *lx++ = FA7UL; + break; + case TYCOMPLEX: + len *= 2; + /* no break */ + case TYREAL: + lx = (unsigned Long*)x; + lxe = lx + len; + while(lx < lxe) + *lx++ = rnan; + break; + case TYDCOMPLEX: + len *= 2; + /* no break */ + case TYDREAL: + lx = (unsigned Long*)x; + for(lxe = lx + 2*len; lx < lxe; lx += 2) { + lx[0] = dnan0; + lx[1] = dnan1; + } + } + } +#ifdef __cplusplus +} +#endif + +#ifndef MSpc +#ifdef MSDOS +#define MSpc +#else +#ifdef _WIN32 +#define MSpc +#endif +#endif +#endif + +#ifdef MSpc +#define IEEE0_done +#include "float.h" +#include "signal.h" + + static void +ieee0(Void) +{ +#ifndef __alpha +#ifndef EM_DENORMAL +#define EM_DENORMAL _EM_DENORMAL +#endif +#ifndef EM_UNDERFLOW +#define EM_UNDERFLOW _EM_UNDERFLOW +#endif +#ifndef EM_INEXACT +#define EM_INEXACT _EM_INEXACT +#endif +#ifndef MCW_EM +#define MCW_EM _MCW_EM +#endif + _control87(EM_DENORMAL | EM_UNDERFLOW | EM_INEXACT, MCW_EM); +#endif + /* With MS VC++, compiling and linking with -Zi will permit */ + /* clicking to invoke the MS C++ debugger, which will show */ + /* the point of error -- provided SIGFPE is SIG_DFL. */ + signal(SIGFPE, SIG_DFL); + } +#endif /* MSpc */ + +#ifdef __mips /* must link with -lfpe */ +#define IEEE0_done +/* code from Eric Grosse */ +#include +#include +#include "/usr/include/sigfpe.h" /* full pathname for lcc -N */ +#include "/usr/include/sys/fpu.h" + + static void +#ifdef KR_headers +ieeeuserhand(exception, val) unsigned exception[5]; int val[2]; +#else +ieeeuserhand(unsigned exception[5], int val[2]) +#endif +{ + fflush(stdout); + fprintf(stderr,"ieee0() aborting because of "); + if(exception[0]==_OVERFL) fprintf(stderr,"overflow\n"); + else if(exception[0]==_UNDERFL) fprintf(stderr,"underflow\n"); + else if(exception[0]==_DIVZERO) fprintf(stderr,"divide by 0\n"); + else if(exception[0]==_INVALID) fprintf(stderr,"invalid operation\n"); + else fprintf(stderr,"\tunknown reason\n"); + fflush(stderr); + abort(); +} + + static void +#ifdef KR_headers +ieeeuserhand2(j) unsigned int **j; +#else +ieeeuserhand2(unsigned int **j) +#endif +{ + fprintf(stderr,"ieee0() aborting because of confusion\n"); + abort(); +} + + static void +ieee0(Void) +{ + int i; + for(i=1; i<=4; i++){ + sigfpe_[i].count = 1000; + sigfpe_[i].trace = 1; + sigfpe_[i].repls = _USER_DETERMINED; + } + sigfpe_[1].repls = _ZERO; /* underflow */ + handle_sigfpes( _ON, + _EN_UNDERFL|_EN_OVERFL|_EN_DIVZERO|_EN_INVALID, + ieeeuserhand,_ABORT_ON_ERROR,ieeeuserhand2); + } +#endif /* mips */ + +#ifdef __linux__ +#define IEEE0_done +#include "fpu_control.h" + +#ifdef __alpha__ +#ifndef USE_setfpucw +#define __setfpucw(x) __fpu_control = (x) +#endif +#endif + +#ifndef _FPU_SETCW +#undef Can_use__setfpucw +#define Can_use__setfpucw +#endif + + static void +ieee0(Void) +{ +#if (defined(__mc68000__) || defined(__mc68020__) || defined(mc68020) || defined (__mc68k__)) +/* Reported 20010705 by Alan Bain */ +/* Note that IEEE 754 IOP (illegal operation) */ +/* = Signaling NAN (SNAN) + operation error (OPERR). */ +#ifdef Can_use__setfpucw + __setfpucw(_FPU_IEEE + _FPU_DOUBLE + _FPU_MASK_OPERR + _FPU_MASK_DZ + _FPU_MASK_SNAN+_FPU_MASK_OVFL); +#else + __fpu_control = _FPU_IEEE + _FPU_DOUBLE + _FPU_MASK_OPERR + _FPU_MASK_DZ + _FPU_MASK_SNAN+_FPU_MASK_OVFL; + _FPU_SETCW(__fpu_control); +#endif + +#elif (defined(__powerpc__)||defined(_ARCH_PPC)||defined(_ARCH_PWR)) /* !__mc68k__ */ +/* Reported 20011109 by Alan Bain */ + +#ifdef Can_use__setfpucw + +/* The following is NOT a mistake -- the author of the fpu_control.h +for the PPC has erroneously defined IEEE mode to turn on exceptions +other than Inexact! Start from default then and turn on only the ones +which we want*/ + + __setfpucw(_FPU_DEFAULT + _FPU_MASK_IM+_FPU_MASK_OM+_FPU_MASK_UM); + +#else /* PPC && !Can_use__setfpucw */ + + __fpu_control = _FPU_DEFAULT +_FPU_MASK_OM+_FPU_MASK_IM+_FPU_MASK_UM; + _FPU_SETCW(__fpu_control); + +#endif /*Can_use__setfpucw*/ + +#else /* !(mc68000||powerpc) */ + +#ifdef _FPU_IEEE +#ifndef _FPU_EXTENDED /* e.g., ARM processor under Linux */ +#define _FPU_EXTENDED 0 +#endif +#ifndef _FPU_DOUBLE +#define _FPU_DOUBLE 0 +#endif +#ifdef Can_use__setfpucw /* pre-1997 (?) Linux */ + __setfpucw(_FPU_IEEE - _FPU_MASK_IM - _FPU_MASK_ZM - _FPU_MASK_OM); +#else +#ifdef UNINIT_F2C_PRECISION_53 /* 20051004 */ + /* unmask invalid, etc., and change rounding precision to double */ + __fpu_control = _FPU_IEEE - _FPU_EXTENDED + _FPU_DOUBLE - _FPU_MASK_IM - _FPU_MASK_ZM - _FPU_MASK_OM; + _FPU_SETCW(__fpu_control); +#else + /* unmask invalid, etc., and keep current rounding precision */ + fpu_control_t cw; + _FPU_GETCW(cw); + cw &= ~(_FPU_MASK_IM | _FPU_MASK_ZM | _FPU_MASK_OM); + _FPU_SETCW(cw); +#endif +#endif + +#else /* !_FPU_IEEE */ + + fprintf(stderr, "\n%s\n%s\n%s\n%s\n", + "WARNING: _uninit_f2c in libf2c does not know how", + "to enable trapping on this system, so f2c's -trapuv", + "option will not detect uninitialized variables unless", + "you can enable trapping manually."); + fflush(stderr); + +#endif /* _FPU_IEEE */ +#endif /* __mc68k__ */ + } +#endif /* __linux__ */ + +#ifdef __alpha +#ifndef IEEE0_done +#define IEEE0_done +#include + static void +ieee0(Void) +{ + ieee_set_fp_control(IEEE_TRAP_ENABLE_INV); + } +#endif /*IEEE0_done*/ +#endif /*__alpha*/ + +#ifdef __hpux +#define IEEE0_done +#define _INCLUDE_HPUX_SOURCE +#include + +#ifndef FP_X_INV +#include +#define fpsetmask fesettrapenable +#define FP_X_INV FE_INVALID +#endif + + static void +ieee0(Void) +{ + fpsetmask(FP_X_INV); + } +#endif /*__hpux*/ + +#ifdef _AIX +#define IEEE0_done +#include + + static void +ieee0(Void) +{ + fp_enable(TRP_INVALID); + fp_trap(FP_TRAP_SYNC); + } +#endif /*_AIX*/ + +#ifdef __sun +#define IEEE0_done +#include + + static void +ieee0(Void) +{ + fpsetmask(FP_X_INV); + } +#endif /*__sparc*/ + +#ifndef IEEE0_done + static void +ieee0(Void) {} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/util.c b/min-dgels/base/F2CLIBS/libf2c/util.c new file mode 100644 index 0000000..ad4bec5 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/util.c @@ -0,0 +1,57 @@ +#include "sysdep1.h" /* here to get stat64 on some badly designed Linux systems */ +#include "f2c.h" +#include "fio.h" +#ifdef __cplusplus +extern "C" { +#endif + + VOID +#ifdef KR_headers +#define Const /*nothing*/ +g_char(a,alen,b) char *a,*b; ftnlen alen; +#else +#define Const const +g_char(const char *a, ftnlen alen, char *b) +#endif +{ + Const char *x = a + alen; + char *y = b + alen; + + for(;; y--) { + if (x <= a) { + *b = 0; + return; + } + if (*--x != ' ') + break; + } + *y-- = 0; + do *y-- = *x; + while(x-- > a); + } + + VOID +#ifdef KR_headers +b_char(a,b,blen) char *a,*b; ftnlen blen; +#else +b_char(const char *a, char *b, ftnlen blen) +#endif +{ int i; + for(i=0;i= d + 2 || f__scale <= -d) + goto nogood; + } + if(f__scale <= 0) + --d; + if (len == sizeof(real)) + dd = p->pf; + else + dd = p->pd; + if (dd < 0.) { + signspace = sign = 1; + dd = -dd; + } + else { + sign = 0; + signspace = (int)f__cplus; +#ifndef VAX + if (!dd) { +#ifdef SIGNED_ZEROS + if (signbit_f2c(&dd)) + signspace = sign = 1; +#endif + dd = 0.; /* avoid -0 */ + } +#endif + } + delta = w - (2 /* for the . and the d adjustment above */ + + 2 /* for the E+ */ + signspace + d + e); +#ifdef WANT_LEAD_0 + if (f__scale <= 0 && delta > 0) { + delta--; + insert0 = 1; + } + else +#endif + if (delta < 0) { +nogood: + while(--w >= 0) + PUT('*'); + return(0); + } + if (f__scale < 0) + d += f__scale; + if (d > FMAX) { + d1 = d - FMAX; + d = FMAX; + } + else + d1 = 0; + sprintf(buf,"%#.*E", d, dd); +#ifndef VAX + /* check for NaN, Infinity */ + if (!isdigit(buf[0])) { + switch(buf[0]) { + case 'n': + case 'N': + signspace = 0; /* no sign for NaNs */ + } + delta = w - strlen(buf) - signspace; + if (delta < 0) + goto nogood; + while(--delta >= 0) + PUT(' '); + if (signspace) + PUT(sign ? '-' : '+'); + for(s = buf; *s; s++) + PUT(*s); + return 0; + } +#endif + se = buf + d + 3; +#ifdef GOOD_SPRINTF_EXPONENT /* When possible, exponent has 2 digits. */ + if (f__scale != 1 && dd) + sprintf(se, "%+.2d", atoi(se) + 1 - f__scale); +#else + if (dd) + sprintf(se, "%+.2d", atoi(se) + 1 - f__scale); + else + strcpy(se, "+00"); +#endif + s = ++se; + if (e < 2) { + if (*s != '0') + goto nogood; + } +#ifndef VAX + /* accommodate 3 significant digits in exponent */ + if (s[2]) { +#ifdef Pedantic + if (!e0 && !s[3]) + for(s -= 2, e1 = 2; s[0] = s[1]; s++); + + /* Pedantic gives the behavior that Fortran 77 specifies, */ + /* i.e., requires that E be specified for exponent fields */ + /* of more than 3 digits. With Pedantic undefined, we get */ + /* the behavior that Cray displays -- you get a bigger */ + /* exponent field if it fits. */ +#else + if (!e0) { + for(s -= 2, e1 = 2; s[0] = s[1]; s++) +#ifdef CRAY + delta--; + if ((delta += 4) < 0) + goto nogood +#endif + ; + } +#endif + else if (e0 >= 0) + goto shift; + else + e1 = e; + } + else + shift: +#endif + for(s += 2, e1 = 2; *s; ++e1, ++s) + if (e1 >= e) + goto nogood; + while(--delta >= 0) + PUT(' '); + if (signspace) + PUT(sign ? '-' : '+'); + s = buf; + i = f__scale; + if (f__scale <= 0) { +#ifdef WANT_LEAD_0 + if (insert0) + PUT('0'); +#endif + PUT('.'); + for(; i < 0; ++i) + PUT('0'); + PUT(*s); + s += 2; + } + else if (f__scale > 1) { + PUT(*s); + s += 2; + while(--i > 0) + PUT(*s++); + PUT('.'); + } + if (d1) { + se -= 2; + while(s < se) PUT(*s++); + se += 2; + do PUT('0'); while(--d1 > 0); + } + while(s < se) + PUT(*s++); + if (e < 2) + PUT(s[1]); + else { + while(++e1 <= e) + PUT('0'); + while(*s) + PUT(*s++); + } + return 0; + } + + int +#ifdef KR_headers +wrt_F(p,w,d,len) ufloat *p; ftnlen len; +#else +wrt_F(ufloat *p, int w, int d, ftnlen len) +#endif +{ + int d1, sign, n; + double x; + char *b, buf[MAXINTDIGS+MAXFRACDIGS+4], *s; + + x= (len==sizeof(real)?p->pf:p->pd); + if (d < MAXFRACDIGS) + d1 = 0; + else { + d1 = d - MAXFRACDIGS; + d = MAXFRACDIGS; + } + if (x < 0.) + { x = -x; sign = 1; } + else { + sign = 0; +#ifndef VAX + if (!x) { +#ifdef SIGNED_ZEROS + if (signbit_f2c(&x)) + sign = 2; +#endif + x = 0.; + } +#endif + } + + if (n = f__scale) + if (n > 0) + do x *= 10.; while(--n > 0); + else + do x *= 0.1; while(++n < 0); + +#ifdef USE_STRLEN + sprintf(b = buf, "%#.*f", d, x); + n = strlen(b) + d1; +#else + n = sprintf(b = buf, "%#.*f", d, x) + d1; +#endif + +#ifndef WANT_LEAD_0 + if (buf[0] == '0' && d) + { ++b; --n; } +#endif + if (sign == 1) { + /* check for all zeros */ + for(s = b;;) { + while(*s == '0') s++; + switch(*s) { + case '.': + s++; continue; + case 0: + sign = 0; + } + break; + } + } + if (sign || f__cplus) + ++n; + if (n > w) { +#ifdef WANT_LEAD_0 + if (buf[0] == '0' && --n == w) + ++b; + else +#endif + { + while(--w >= 0) + PUT('*'); + return 0; + } + } + for(w -= n; --w >= 0; ) + PUT(' '); + if (sign) + PUT('-'); + else if (f__cplus) + PUT('+'); + while(n = *b++) + PUT(n); + while(--d1 >= 0) + PUT('0'); + return 0; + } +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/wrtfmt.c b/min-dgels/base/F2CLIBS/libf2c/wrtfmt.c new file mode 100644 index 0000000..a970db9 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/wrtfmt.c @@ -0,0 +1,377 @@ +#include "f2c.h" +#include "fio.h" +#include "fmt.h" +#ifdef __cplusplus +extern "C" { +#endif + +extern icilist *f__svic; +extern char *f__icptr; + + static int +mv_cur(Void) /* shouldn't use fseek because it insists on calling fflush */ + /* instead we know too much about stdio */ +{ + int cursor = f__cursor; + f__cursor = 0; + if(f__external == 0) { + if(cursor < 0) { + if(f__hiwater < f__recpos) + f__hiwater = f__recpos; + f__recpos += cursor; + f__icptr += cursor; + if(f__recpos < 0) + err(f__elist->cierr, 110, "left off"); + } + else if(cursor > 0) { + if(f__recpos + cursor >= f__svic->icirlen) + err(f__elist->cierr, 110, "recend"); + if(f__hiwater <= f__recpos) + for(; cursor > 0; cursor--) + (*f__putn)(' '); + else if(f__hiwater <= f__recpos + cursor) { + cursor -= f__hiwater - f__recpos; + f__icptr += f__hiwater - f__recpos; + f__recpos = f__hiwater; + for(; cursor > 0; cursor--) + (*f__putn)(' '); + } + else { + f__icptr += cursor; + f__recpos += cursor; + } + } + return(0); + } + if (cursor > 0) { + if(f__hiwater <= f__recpos) + for(;cursor>0;cursor--) (*f__putn)(' '); + else if(f__hiwater <= f__recpos + cursor) { + cursor -= f__hiwater - f__recpos; + f__recpos = f__hiwater; + for(; cursor > 0; cursor--) + (*f__putn)(' '); + } + else { + f__recpos += cursor; + } + } + else if (cursor < 0) + { + if(cursor + f__recpos < 0) + err(f__elist->cierr,110,"left off"); + if(f__hiwater < f__recpos) + f__hiwater = f__recpos; + f__recpos += cursor; + } + return(0); +} + + static int +#ifdef KR_headers +wrt_Z(n,w,minlen,len) Uint *n; int w, minlen; ftnlen len; +#else +wrt_Z(Uint *n, int w, int minlen, ftnlen len) +#endif +{ + register char *s, *se; + register int i, w1; + static int one = 1; + static char hex[] = "0123456789ABCDEF"; + s = (char *)n; + --len; + if (*(char *)&one) { + /* little endian */ + se = s; + s += len; + i = -1; + } + else { + se = s + len; + i = 1; + } + for(;; s += i) + if (s == se || *s) + break; + w1 = (i*(se-s) << 1) + 1; + if (*s & 0xf0) + w1++; + if (w1 > w) + for(i = 0; i < w; i++) + (*f__putn)('*'); + else { + if ((minlen -= w1) > 0) + w1 += minlen; + while(--w >= w1) + (*f__putn)(' '); + while(--minlen >= 0) + (*f__putn)('0'); + if (!(*s & 0xf0)) { + (*f__putn)(hex[*s & 0xf]); + if (s == se) + return 0; + s += i; + } + for(;; s += i) { + (*f__putn)(hex[*s >> 4 & 0xf]); + (*f__putn)(hex[*s & 0xf]); + if (s == se) + break; + } + } + return 0; + } + + static int +#ifdef KR_headers +wrt_I(n,w,len, base) Uint *n; ftnlen len; register int base; +#else +wrt_I(Uint *n, int w, ftnlen len, register int base) +#endif +{ int ndigit,sign,spare,i; + longint x; + char *ans; + if(len==sizeof(integer)) x=n->il; + else if(len == sizeof(char)) x = n->ic; +#ifdef Allow_TYQUAD + else if (len == sizeof(longint)) x = n->ili; +#endif + else x=n->is; + ans=f__icvt(x,&ndigit,&sign, base); + spare=w-ndigit; + if(sign || f__cplus) spare--; + if(spare<0) + for(i=0;iil; + else if(len == sizeof(char)) x = n->ic; +#ifdef Allow_TYQUAD + else if (len == sizeof(longint)) x = n->ili; +#endif + else x=n->is; + ans=f__icvt(x,&ndigit,&sign, base); + if(sign || f__cplus) xsign=1; + else xsign=0; + if(ndigit+xsign>w || m+xsign>w) + { for(i=0;i=m) + spare=w-ndigit-xsign; + else + spare=w-m-xsign; + for(i=0;iil; + else if(sz == sizeof(char)) x = n->ic; + else x=n->is; + for(i=0;i 0) (*f__putn)(*p++); + return(0); +} + static int +#ifdef KR_headers +wrt_AW(p,w,len) char * p; ftnlen len; +#else +wrt_AW(char * p, int w, ftnlen len) +#endif +{ + while(w>len) + { w--; + (*f__putn)(' '); + } + while(w-- > 0) + (*f__putn)(*p++); + return(0); +} + + static int +#ifdef KR_headers +wrt_G(p,w,d,e,len) ufloat *p; ftnlen len; +#else +wrt_G(ufloat *p, int w, int d, int e, ftnlen len) +#endif +{ double up = 1,x; + int i=0,oldscale,n,j; + x = len==sizeof(real)?p->pf:p->pd; + if(x < 0 ) x = -x; + if(x<.1) { + if (x != 0.) + return(wrt_E(p,w,d,e,len)); + i = 1; + goto have_i; + } + for(;i<=d;i++,up*=10) + { if(x>=up) continue; + have_i: + oldscale = f__scale; + f__scale = 0; + if(e==0) n=4; + else n=e+2; + i=wrt_F(p,w-n,d-i,len); + for(j=0;jop) + { + default: + fprintf(stderr,"w_ed, unexpected code: %d\n", p->op); + sig_die(f__fmtbuf, 1); + case I: return(wrt_I((Uint *)ptr,p->p1,len, 10)); + case IM: + return(wrt_IM((Uint *)ptr,p->p1,p->p2.i[0],len,10)); + + /* O and OM don't work right for character, double, complex, */ + /* or doublecomplex, and they differ from Fortran 90 in */ + /* showing a minus sign for negative values. */ + + case O: return(wrt_I((Uint *)ptr, p->p1, len, 8)); + case OM: + return(wrt_IM((Uint *)ptr,p->p1,p->p2.i[0],len,8)); + case L: return(wrt_L((Uint *)ptr,p->p1, len)); + case A: return(wrt_A(ptr,len)); + case AW: + return(wrt_AW(ptr,p->p1,len)); + case D: + case E: + case EE: + return(wrt_E((ufloat *)ptr,p->p1,p->p2.i[0],p->p2.i[1],len)); + case G: + case GE: + return(wrt_G((ufloat *)ptr,p->p1,p->p2.i[0],p->p2.i[1],len)); + case F: return(wrt_F((ufloat *)ptr,p->p1,p->p2.i[0],len)); + + /* Z and ZM assume 8-bit bytes. */ + + case Z: return(wrt_Z((Uint *)ptr,p->p1,0,len)); + case ZM: + return(wrt_Z((Uint *)ptr,p->p1,p->p2.i[0],len)); + } +} + + int +#ifdef KR_headers +w_ned(p) struct syl *p; +#else +w_ned(struct syl *p) +#endif +{ + switch(p->op) + { + default: fprintf(stderr,"w_ned, unexpected code: %d\n", p->op); + sig_die(f__fmtbuf, 1); + case SLASH: + return((*f__donewrec)()); + case T: f__cursor = p->p1-f__recpos - 1; + return(1); + case TL: f__cursor -= p->p1; + if(f__cursor < -f__recpos) /* TL1000, 1X */ + f__cursor = -f__recpos; + return(1); + case TR: + case X: + f__cursor += p->p1; + return(1); + case APOS: + return(wrt_AP(p->p2.s)); + case H: + return(wrt_H(p->p1,p->p2.s)); + } +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/wsfe.c b/min-dgels/base/F2CLIBS/libf2c/wsfe.c new file mode 100644 index 0000000..8709f3b --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/wsfe.c @@ -0,0 +1,78 @@ +/*write sequential formatted external*/ +#include "f2c.h" +#include "fio.h" +#include "fmt.h" +#ifdef __cplusplus +extern "C" { +#endif + + int +x_wSL(Void) +{ + int n = f__putbuf('\n'); + f__hiwater = f__recpos = f__cursor = 0; + return(n == 0); +} + + static int +xw_end(Void) +{ + int n; + + if(f__nonl) { + f__putbuf(n = 0); + fflush(f__cf); + } + else + n = f__putbuf('\n'); + f__hiwater = f__recpos = f__cursor = 0; + return n; +} + + static int +xw_rev(Void) +{ + int n = 0; + if(f__workdone) { + n = f__putbuf('\n'); + f__workdone = 0; + } + f__hiwater = f__recpos = f__cursor = 0; + return n; +} + +#ifdef KR_headers +integer s_wsfe(a) cilist *a; /*start*/ +#else +integer s_wsfe(cilist *a) /*start*/ +#endif +{ int n; + if(!f__init) f_init(); + f__reading=0; + f__sequential=1; + f__formatted=1; + f__external=1; + if(n=c_sfe(a)) return(n); + f__elist=a; + f__hiwater = f__cursor=f__recpos=0; + f__nonl = 0; + f__scale=0; + f__fmtbuf=a->cifmt; + f__cf=f__curunit->ufd; + if(pars_f(f__fmtbuf)<0) err(a->cierr,100,"startio"); + f__putn= x_putc; + f__doed= w_ed; + f__doned= w_ned; + f__doend=xw_end; + f__dorevert=xw_rev; + f__donewrec=x_wSL; + fmt_bg(); + f__cplus=0; + f__cblank=f__curunit->ublnk; + if(f__curunit->uwrt != 1 && f__nowwriting(f__curunit)) + err(a->cierr,errno,"write start"); + return(0); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/wsle.c b/min-dgels/base/F2CLIBS/libf2c/wsle.c new file mode 100644 index 0000000..3e60270 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/wsle.c @@ -0,0 +1,42 @@ +#include "f2c.h" +#include "fio.h" +#include "fmt.h" +#include "lio.h" +#include "string.h" +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef KR_headers +integer s_wsle(a) cilist *a; +#else +integer s_wsle(cilist *a) +#endif +{ + int n; + if(n=c_le(a)) return(n); + f__reading=0; + f__external=1; + f__formatted=1; + f__putn = x_putc; + f__lioproc = l_write; + L_len = LINE; + f__donewrec = x_wSL; + if(f__curunit->uwrt != 1 && f__nowwriting(f__curunit)) + err(a->cierr, errno, "list output start"); + return(0); + } + +integer e_wsle(Void) +{ + int n = f__putbuf('\n'); + f__recpos=0; +#ifdef ALWAYS_FLUSH + if (!n && fflush(f__cf)) + err(f__elist->cierr, errno, "write end"); +#endif + return(n); + } +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/wsne.c b/min-dgels/base/F2CLIBS/libf2c/wsne.c new file mode 100644 index 0000000..e204a51 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/wsne.c @@ -0,0 +1,32 @@ +#include "f2c.h" +#include "fio.h" +#include "lio.h" +#ifdef __cplusplus +extern "C" { +#endif + + integer +#ifdef KR_headers +s_wsne(a) cilist *a; +#else +s_wsne(cilist *a) +#endif +{ + int n; + + if(n=c_le(a)) + return(n); + f__reading=0; + f__external=1; + f__formatted=1; + f__putn = x_putc; + L_len = LINE; + f__donewrec = x_wSL; + if(f__curunit->uwrt != 1 && f__nowwriting(f__curunit)) + err(a->cierr, errno, "namelist output start"); + x_wsne(a); + return e_wsle(); + } +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/xwsne.c b/min-dgels/base/F2CLIBS/libf2c/xwsne.c new file mode 100644 index 0000000..f810d3e --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/xwsne.c @@ -0,0 +1,77 @@ +#include "f2c.h" +#include "fio.h" +#include "lio.h" +#include "fmt.h" + +extern int f__Aquote; + + static VOID +nl_donewrec(Void) +{ + (*f__donewrec)(); + PUT(' '); + } + +#ifdef KR_headers +x_wsne(a) cilist *a; +#else +#include "string.h" +#ifdef __cplusplus +extern "C" { +#endif + + VOID +x_wsne(cilist *a) +#endif +{ + Namelist *nl; + char *s; + Vardesc *v, **vd, **vde; + ftnint number, type; + ftnlen *dims; + ftnlen size; + extern ftnlen f__typesize[]; + + nl = (Namelist *)a->cifmt; + PUT('&'); + for(s = nl->name; *s; s++) + PUT(*s); + PUT(' '); + f__Aquote = 1; + vd = nl->vars; + vde = vd + nl->nvars; + while(vd < vde) { + v = *vd++; + s = v->name; +#ifdef No_Extra_Namelist_Newlines + if (f__recpos+strlen(s)+2 >= L_len) +#endif + nl_donewrec(); + while(*s) + PUT(*s++); + PUT(' '); + PUT('='); + number = (dims = v->dims) ? dims[1] : 1; + type = v->type; + if (type < 0) { + size = -type; + type = TYCHAR; + } + else + size = f__typesize[type]; + l_write(&number, v->addr, size, type); + if (vd < vde) { + if (f__recpos+2 >= L_len) + nl_donewrec(); + PUT(','); + PUT(' '); + } + else if (f__recpos+1 >= L_len) + nl_donewrec(); + } + f__Aquote = 0; + PUT('/'); + } +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/z_abs.c b/min-dgels/base/F2CLIBS/libf2c/z_abs.c new file mode 100644 index 0000000..4d8a015 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/z_abs.c @@ -0,0 +1,18 @@ +#include "f2c.h" +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef KR_headers +double f__cabs(); +double z_abs(z) doublecomplex *z; +#else +double f__cabs(double, double); +double z_abs(doublecomplex *z) +#endif +{ +return( f__cabs( z->r, z->i ) ); +} +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/z_cos.c b/min-dgels/base/F2CLIBS/libf2c/z_cos.c new file mode 100644 index 0000000..4abe8bf --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/z_cos.c @@ -0,0 +1,21 @@ +#include "f2c.h" + +#ifdef KR_headers +double sin(), cos(), sinh(), cosh(); +VOID z_cos(r, z) doublecomplex *r, *z; +#else +#undef abs +#include "math.h" +#ifdef __cplusplus +extern "C" { +#endif +void z_cos(doublecomplex *r, doublecomplex *z) +#endif +{ + double zi = z->i, zr = z->r; + r->r = cos(zr) * cosh(zi); + r->i = - sin(zr) * sinh(zi); + } +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/z_div.c b/min-dgels/base/F2CLIBS/libf2c/z_div.c new file mode 100644 index 0000000..e45f360 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/z_div.c @@ -0,0 +1,50 @@ +#include "f2c.h" +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef KR_headers +extern VOID sig_die(); +VOID z_div(c, a, b) doublecomplex *a, *b, *c; +#else +extern void sig_die(const char*, int); +void z_div(doublecomplex *c, doublecomplex *a, doublecomplex *b) +#endif +{ + double ratio, den; + double abr, abi, cr; + + if( (abr = b->r) < 0.) + abr = - abr; + if( (abi = b->i) < 0.) + abi = - abi; + if( abr <= abi ) + { + if(abi == 0) { +#ifdef IEEE_COMPLEX_DIVIDE + if (a->i != 0 || a->r != 0) + abi = 1.; + c->i = c->r = abi / abr; + return; +#else + sig_die("complex division by zero", 1); +#endif + } + ratio = b->r / b->i ; + den = b->i * (1 + ratio*ratio); + cr = (a->r*ratio + a->i) / den; + c->i = (a->i*ratio - a->r) / den; + } + + else + { + ratio = b->i / b->r ; + den = b->r * (1 + ratio*ratio); + cr = (a->r + a->i*ratio) / den; + c->i = (a->i - a->r*ratio) / den; + } + c->r = cr; + } +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/z_exp.c b/min-dgels/base/F2CLIBS/libf2c/z_exp.c new file mode 100644 index 0000000..7b8edfe --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/z_exp.c @@ -0,0 +1,23 @@ +#include "f2c.h" + +#ifdef KR_headers +double exp(), cos(), sin(); +VOID z_exp(r, z) doublecomplex *r, *z; +#else +#undef abs +#include "math.h" +#ifdef __cplusplus +extern "C" { +#endif +void z_exp(doublecomplex *r, doublecomplex *z) +#endif +{ + double expx, zi = z->i; + + expx = exp(z->r); + r->r = expx * cos(zi); + r->i = expx * sin(zi); + } +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/z_log.c b/min-dgels/base/F2CLIBS/libf2c/z_log.c new file mode 100644 index 0000000..4f11bbe --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/z_log.c @@ -0,0 +1,121 @@ +#include "f2c.h" + +#ifdef KR_headers +double log(), f__cabs(), atan2(); +#define ANSI(x) () +#else +#define ANSI(x) x +#undef abs +#include "math.h" +#ifdef __cplusplus +extern "C" { +#endif +extern double f__cabs(double, double); +#endif + +#ifndef NO_DOUBLE_EXTENDED +#ifndef GCC_COMPARE_BUG_FIXED +#ifndef Pre20000310 +#ifdef Comment +Some versions of gcc, such as 2.95.3 and 3.0.4, are buggy under -O2 or -O3: +on IA32 (Intel 80x87) systems, they may do comparisons on values computed +in extended-precision registers. This can lead to the test "s > s0" that +was used below being carried out incorrectly. The fix below cannot be +spoiled by overzealous optimization, since the compiler cannot know +whether gcc_bug_bypass_diff_F2C will be nonzero. (We expect it always +to be zero. The weird name is unlikely to collide with anything.) + +An example (provided by Ulrich Jakobus) where the bug fix matters is + + double complex a, b + a = (.1099557428756427618354862829619, .9857360542953131909982289471372) + b = log(a) + +An alternative to the fix below would be to use 53-bit rounding precision, +but the means of specifying this 80x87 feature are highly unportable. +#endif /*Comment*/ +#define BYPASS_GCC_COMPARE_BUG +double (*gcc_bug_bypass_diff_F2C) ANSI((double*,double*)); + static double +#ifdef KR_headers +diff1(a,b) double *a, *b; +#else +diff1(double *a, double *b) +#endif +{ return *a - *b; } +#endif /*Pre20000310*/ +#endif /*GCC_COMPARE_BUG_FIXED*/ +#endif /*NO_DOUBLE_EXTENDED*/ + +#ifdef KR_headers +VOID z_log(r, z) doublecomplex *r, *z; +#else +void z_log(doublecomplex *r, doublecomplex *z) +#endif +{ + double s, s0, t, t2, u, v; + double zi = z->i, zr = z->r; +#ifdef BYPASS_GCC_COMPARE_BUG + double (*diff) ANSI((double*,double*)); +#endif + + r->i = atan2(zi, zr); +#ifdef Pre20000310 + r->r = log( f__cabs( zr, zi ) ); +#else + if (zi < 0) + zi = -zi; + if (zr < 0) + zr = -zr; + if (zr < zi) { + t = zi; + zi = zr; + zr = t; + } + t = zi/zr; + s = zr * sqrt(1 + t*t); + /* now s = f__cabs(zi,zr), and zr = |zr| >= |zi| = zi */ + if ((t = s - 1) < 0) + t = -t; + if (t > .01) + r->r = log(s); + else { + +#ifdef Comment + + log(1+x) = x - x^2/2 + x^3/3 - x^4/4 + - ... + + = x(1 - x/2 + x^2/3 -+...) + + [sqrt(y^2 + z^2) - 1] * [sqrt(y^2 + z^2) + 1] = y^2 + z^2 - 1, so + + sqrt(y^2 + z^2) - 1 = (y^2 + z^2 - 1) / [sqrt(y^2 + z^2) + 1] + +#endif /*Comment*/ + +#ifdef BYPASS_GCC_COMPARE_BUG + if (!(diff = gcc_bug_bypass_diff_F2C)) + diff = diff1; +#endif + t = ((zr*zr - 1.) + zi*zi) / (s + 1); + t2 = t*t; + s = 1. - 0.5*t; + u = v = 1; + do { + s0 = s; + u *= t2; + v += 2; + s += u/v - t*u/(v+1); + } +#ifdef BYPASS_GCC_COMPARE_BUG + while(s - s0 > 1e-18 || (*diff)(&s,&s0) > 0.); +#else + while(s > s0); +#endif + r->r = s*t; + } +#endif + } +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/z_sin.c b/min-dgels/base/F2CLIBS/libf2c/z_sin.c new file mode 100644 index 0000000..01225a9 --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/z_sin.c @@ -0,0 +1,21 @@ +#include "f2c.h" + +#ifdef KR_headers +double sin(), cos(), sinh(), cosh(); +VOID z_sin(r, z) doublecomplex *r, *z; +#else +#undef abs +#include "math.h" +#ifdef __cplusplus +extern "C" { +#endif +void z_sin(doublecomplex *r, doublecomplex *z) +#endif +{ + double zi = z->i, zr = z->r; + r->r = sin(zr) * cosh(zi); + r->i = cos(zr) * sinh(zi); + } +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/F2CLIBS/libf2c/z_sqrt.c b/min-dgels/base/F2CLIBS/libf2c/z_sqrt.c new file mode 100644 index 0000000..35bd44c --- /dev/null +++ b/min-dgels/base/F2CLIBS/libf2c/z_sqrt.c @@ -0,0 +1,35 @@ +#include "f2c.h" + +#ifdef KR_headers +double sqrt(), f__cabs(); +VOID z_sqrt(r, z) doublecomplex *r, *z; +#else +#undef abs +#include "math.h" +#ifdef __cplusplus +extern "C" { +#endif +extern double f__cabs(double, double); +void z_sqrt(doublecomplex *r, doublecomplex *z) +#endif +{ + double mag, zi = z->i, zr = z->r; + + if( (mag = f__cabs(zr, zi)) == 0.) + r->r = r->i = 0.; + else if(zr > 0) + { + r->r = sqrt(0.5 * (mag + zr) ); + r->i = zi / r->r / 2; + } + else + { + r->i = sqrt(0.5 * (mag - zr) ); + if(zi < 0) + r->i = - r->i; + r->r = zi / r->i / 2; + } + } +#ifdef __cplusplus +} +#endif diff --git a/min-dgels/base/INCLUDE/blaswrap.h b/min-dgels/base/INCLUDE/blaswrap.h new file mode 100644 index 0000000..769b599 --- /dev/null +++ b/min-dgels/base/INCLUDE/blaswrap.h @@ -0,0 +1,8 @@ +/* CLAPACK 3.0 BLAS wrapper macros + * Feb 5, 2000 + */ + +#ifndef __BLASWRAP_H +#define __BLASWRAP_H + +#endif /* __BLASWRAP_H */ diff --git a/min-dgels/base/INCLUDE/clapack.h b/min-dgels/base/INCLUDE/clapack.h new file mode 100644 index 0000000..60f778a --- /dev/null +++ b/min-dgels/base/INCLUDE/clapack.h @@ -0,0 +1,7262 @@ +/* header file for clapack 3.2.1 */ + +#ifndef __CLAPACK_H +#define __CLAPACK_H + +#ifdef __cplusplus +extern "C" { +#endif + +/* Subroutine */ int _starpu_caxpy_(integer *n, complex *ca, complex *cx, integer * + incx, complex *cy, integer *incy); + +/* Subroutine */ int _starpu_ccopy_(integer *n, complex *cx, integer *incx, complex * + cy, integer *incy); + +/* Complex */ VOID _starpu_cdotc_(complex * ret_val, integer *n, complex *cx, integer + *incx, complex *cy, integer *incy); + +/* Complex */ VOID _starpu_cdotu_(complex * ret_val, integer *n, complex *cx, integer + *incx, complex *cy, integer *incy); + +/* Subroutine */ int _starpu_cgbmv_(char *trans, integer *m, integer *n, integer *kl, + integer *ku, complex *alpha, complex *a, integer *lda, complex *x, + integer *incx, complex *beta, complex *y, integer *incy); + +/* Subroutine */ int _starpu_cgemm_(char *transa, char *transb, integer *m, integer * + n, integer *k, complex *alpha, complex *a, integer *lda, complex *b, + integer *ldb, complex *beta, complex *c__, integer *ldc); + +/* Subroutine */ int _starpu_cgemv_(char *trans, integer *m, integer *n, complex * + alpha, complex *a, integer *lda, complex *x, integer *incx, complex * + beta, complex *y, integer *incy); + +/* Subroutine */ int _starpu_cgerc_(integer *m, integer *n, complex *alpha, complex * + x, integer *incx, complex *y, integer *incy, complex *a, integer *lda); + +/* Subroutine */ int _starpu_cgeru_(integer *m, integer *n, complex *alpha, complex * + x, integer *incx, complex *y, integer *incy, complex *a, integer *lda); + +/* Subroutine */ int _starpu_chbmv_(char *uplo, integer *n, integer *k, complex * + alpha, complex *a, integer *lda, complex *x, integer *incx, complex * + beta, complex *y, integer *incy); + +/* Subroutine */ int _starpu_chemm_(char *side, char *uplo, integer *m, integer *n, + complex *alpha, complex *a, integer *lda, complex *b, integer *ldb, + complex *beta, complex *c__, integer *ldc); + +/* Subroutine */ int _starpu_chemv_(char *uplo, integer *n, complex *alpha, complex * + a, integer *lda, complex *x, integer *incx, complex *beta, complex *y, + integer *incy); + +/* Subroutine */ int _starpu_cher_(char *uplo, integer *n, real *alpha, complex *x, + integer *incx, complex *a, integer *lda); + +/* Subroutine */ int _starpu_cher2_(char *uplo, integer *n, complex *alpha, complex * + x, integer *incx, complex *y, integer *incy, complex *a, integer *lda); + +/* Subroutine */ int _starpu_cher2k_(char *uplo, char *trans, integer *n, integer *k, + complex *alpha, complex *a, integer *lda, complex *b, integer *ldb, + real *beta, complex *c__, integer *ldc); + +/* Subroutine */ int _starpu_cherk_(char *uplo, char *trans, integer *n, integer *k, + real *alpha, complex *a, integer *lda, real *beta, complex *c__, + integer *ldc); + +/* Subroutine */ int _starpu_chpmv_(char *uplo, integer *n, complex *alpha, complex * + ap, complex *x, integer *incx, complex *beta, complex *y, integer * + incy); + +/* Subroutine */ int _starpu_chpr_(char *uplo, integer *n, real *alpha, complex *x, + integer *incx, complex *ap); + +/* Subroutine */ int _starpu_chpr2_(char *uplo, integer *n, complex *alpha, complex * + x, integer *incx, complex *y, integer *incy, complex *ap); + +/* Subroutine */ int _starpu_crotg_(complex *ca, complex *cb, real *c__, complex *s); + +/* Subroutine */ int _starpu_cscal_(integer *n, complex *ca, complex *cx, integer * + incx); + +/* Subroutine */ int _starpu__starpu_csrot_(integer *n, complex *cx, integer *incx, complex * + cy, integer *incy, real *c__, real *s); + +/* Subroutine */ int _starpu_csscal_(integer *n, real *sa, complex *cx, integer *incx); + +/* Subroutine */ int _starpu_cswap_(integer *n, complex *cx, integer *incx, complex * + cy, integer *incy); + +/* Subroutine */ int _starpu_csymm_(char *side, char *uplo, integer *m, integer *n, + complex *alpha, complex *a, integer *lda, complex *b, integer *ldb, + complex *beta, complex *c__, integer *ldc); + +/* Subroutine */ int _starpu_csyr2k_(char *uplo, char *trans, integer *n, integer *k, + complex *alpha, complex *a, integer *lda, complex *b, integer *ldb, + complex *beta, complex *c__, integer *ldc); + +/* Subroutine */ int _starpu_csyrk_(char *uplo, char *trans, integer *n, integer *k, + complex *alpha, complex *a, integer *lda, complex *beta, complex *c__, + integer *ldc); + +/* Subroutine */ int _starpu_ctbmv_(char *uplo, char *trans, char *diag, integer *n, + integer *k, complex *a, integer *lda, complex *x, integer *incx); + +/* Subroutine */ int _starpu_ctbsv_(char *uplo, char *trans, char *diag, integer *n, + integer *k, complex *a, integer *lda, complex *x, integer *incx); + +/* Subroutine */ int _starpu_ctpmv_(char *uplo, char *trans, char *diag, integer *n, + complex *ap, complex *x, integer *incx); + +/* Subroutine */ int _starpu_ctpsv_(char *uplo, char *trans, char *diag, integer *n, + complex *ap, complex *x, integer *incx); + +/* Subroutine */ int _starpu_ctrmm_(char *side, char *uplo, char *transa, char *diag, + integer *m, integer *n, complex *alpha, complex *a, integer *lda, + complex *b, integer *ldb); + +/* Subroutine */ int _starpu_ctrmv_(char *uplo, char *trans, char *diag, integer *n, + complex *a, integer *lda, complex *x, integer *incx); + +/* Subroutine */ int _starpu_ctrsm_(char *side, char *uplo, char *transa, char *diag, + integer *m, integer *n, complex *alpha, complex *a, integer *lda, + complex *b, integer *ldb); + +/* Subroutine */ int _starpu_ctrsv_(char *uplo, char *trans, char *diag, integer *n, + complex *a, integer *lda, complex *x, integer *incx); + +doublereal _starpu_dasum_(integer *n, doublereal *dx, integer *incx); + +/* Subroutine */ int _starpu_daxpy_(integer *n, doublereal *da, doublereal *dx, + integer *incx, doublereal *dy, integer *incy); + +doublereal _starpu_dcabs1_(doublecomplex *z__); + +/* Subroutine */ int _starpu_dcopy_(integer *n, doublereal *dx, integer *incx, + doublereal *dy, integer *incy); + +doublereal _starpu_ddot_(integer *n, doublereal *dx, integer *incx, doublereal *dy, + integer *incy); + +/* Subroutine */ int _starpu_dgbmv_(char *trans, integer *m, integer *n, integer *kl, + integer *ku, doublereal *alpha, doublereal *a, integer *lda, + doublereal *x, integer *incx, doublereal *beta, doublereal *y, + integer *incy); + +/* Subroutine */ int _starpu_dgemm_(char *transa, char *transb, integer *m, integer * + n, integer *k, doublereal *alpha, doublereal *a, integer *lda, + doublereal *b, integer *ldb, doublereal *beta, doublereal *c__, + integer *ldc); + +/* Subroutine */ int _starpu_dgemv_(char *trans, integer *m, integer *n, doublereal * + alpha, doublereal *a, integer *lda, doublereal *x, integer *incx, + doublereal *beta, doublereal *y, integer *incy); + +/* Subroutine */ int _starpu_dger_(integer *m, integer *n, doublereal *alpha, + doublereal *x, integer *incx, doublereal *y, integer *incy, + doublereal *a, integer *lda); + +doublereal _starpu_dnrm2_(integer *n, doublereal *x, integer *incx); + +/* Subroutine */ int _starpu_drot_(integer *n, doublereal *dx, integer *incx, + doublereal *dy, integer *incy, doublereal *c__, doublereal *s); + +/* Subroutine */ int _starpu_drotg_(doublereal *da, doublereal *db, doublereal *c__, + doublereal *s); + +/* Subroutine */ int _starpu_drotm_(integer *n, doublereal *dx, integer *incx, + doublereal *dy, integer *incy, doublereal *dparam); + +/* Subroutine */ int _starpu_drotmg_(doublereal *dd1, doublereal *dd2, doublereal * + dx1, doublereal *dy1, doublereal *dparam); + +/* Subroutine */ int _starpu_dsbmv_(char *uplo, integer *n, integer *k, doublereal * + alpha, doublereal *a, integer *lda, doublereal *x, integer *incx, + doublereal *beta, doublereal *y, integer *incy); + +/* Subroutine */ int _starpu_dscal_(integer *n, doublereal *da, doublereal *dx, + integer *incx); + +doublereal _starpu_dsdot_(integer *n, real *sx, integer *incx, real *sy, integer * + incy); + +/* Subroutine */ int _starpu_dspmv_(char *uplo, integer *n, doublereal *alpha, + doublereal *ap, doublereal *x, integer *incx, doublereal *beta, + doublereal *y, integer *incy); + +/* Subroutine */ int _starpu_dspr_(char *uplo, integer *n, doublereal *alpha, + doublereal *x, integer *incx, doublereal *ap); + +/* Subroutine */ int _starpu_dspr2_(char *uplo, integer *n, doublereal *alpha, + doublereal *x, integer *incx, doublereal *y, integer *incy, + doublereal *ap); + +/* Subroutine */ int _starpu_dswap_(integer *n, doublereal *dx, integer *incx, + doublereal *dy, integer *incy); + +/* Subroutine */ int _starpu_dsymm_(char *side, char *uplo, integer *m, integer *n, + doublereal *alpha, doublereal *a, integer *lda, doublereal *b, + integer *ldb, doublereal *beta, doublereal *c__, integer *ldc); + +/* Subroutine */ int _starpu_dsymv_(char *uplo, integer *n, doublereal *alpha, + doublereal *a, integer *lda, doublereal *x, integer *incx, doublereal + *beta, doublereal *y, integer *incy); + +/* Subroutine */ int _starpu_dsyr_(char *uplo, integer *n, doublereal *alpha, + doublereal *x, integer *incx, doublereal *a, integer *lda); + +/* Subroutine */ int _starpu_dsyr2_(char *uplo, integer *n, doublereal *alpha, + doublereal *x, integer *incx, doublereal *y, integer *incy, + doublereal *a, integer *lda); + +/* Subroutine */ int _starpu_dsyr2k_(char *uplo, char *trans, integer *n, integer *k, + doublereal *alpha, doublereal *a, integer *lda, doublereal *b, + integer *ldb, doublereal *beta, doublereal *c__, integer *ldc); + +/* Subroutine */ int _starpu_dsyrk_(char *uplo, char *trans, integer *n, integer *k, + doublereal *alpha, doublereal *a, integer *lda, doublereal *beta, + doublereal *c__, integer *ldc); + +/* Subroutine */ int _starpu_dtbmv_(char *uplo, char *trans, char *diag, integer *n, + integer *k, doublereal *a, integer *lda, doublereal *x, integer *incx); + +/* Subroutine */ int _starpu_dtbsv_(char *uplo, char *trans, char *diag, integer *n, + integer *k, doublereal *a, integer *lda, doublereal *x, integer *incx); + +/* Subroutine */ int _starpu_dtpmv_(char *uplo, char *trans, char *diag, integer *n, + doublereal *ap, doublereal *x, integer *incx); + +/* Subroutine */ int _starpu_dtpsv_(char *uplo, char *trans, char *diag, integer *n, + doublereal *ap, doublereal *x, integer *incx); + +/* Subroutine */ int _starpu_dtrmm_(char *side, char *uplo, char *transa, char *diag, + integer *m, integer *n, doublereal *alpha, doublereal *a, integer * + lda, doublereal *b, integer *ldb); + +/* Subroutine */ int _starpu_dtrmv_(char *uplo, char *trans, char *diag, integer *n, + doublereal *a, integer *lda, doublereal *x, integer *incx); + +/* Subroutine */ int _starpu_dtrsm_(char *side, char *uplo, char *transa, char *diag, + integer *m, integer *n, doublereal *alpha, doublereal *a, integer * + lda, doublereal *b, integer *ldb); + +/* Subroutine */ int _starpu_dtrsv_(char *uplo, char *trans, char *diag, integer *n, + doublereal *a, integer *lda, doublereal *x, integer *incx); + +doublereal _starpu_dzasum_(integer *n, doublecomplex *zx, integer *incx); + +doublereal _starpu_dznrm2_(integer *n, doublecomplex *x, integer *incx); + +integer _starpu_icamax_(integer *n, complex *cx, integer *incx); + +integer _starpu_idamax_(integer *n, doublereal *dx, integer *incx); + +integer _starpu_isamax_(integer *n, real *sx, integer *incx); + +integer _starpu_izamax_(integer *n, doublecomplex *zx, integer *incx); + +logical _starpu_lsame_(char *ca, char *cb); + +doublereal _starpu_sasum_(integer *n, real *sx, integer *incx); + +/* Subroutine */ int _starpu_saxpy_(integer *n, real *sa, real *sx, integer *incx, + real *sy, integer *incy); + +doublereal _starpu_scabs1_(complex *z__); + +doublereal _starpu_scasum_(integer *n, complex *cx, integer *incx); + +doublereal _starpu_scnrm2_(integer *n, complex *x, integer *incx); + +/* Subroutine */ int _starpu_scopy_(integer *n, real *sx, integer *incx, real *sy, + integer *incy); + +doublereal _starpu_sdot_(integer *n, real *sx, integer *incx, real *sy, integer *incy); + +doublereal _starpu_sdsdot_(integer *n, real *sb, real *sx, integer *incx, real *sy, + integer *incy); + +/* Subroutine */ int _starpu_sgbmv_(char *trans, integer *m, integer *n, integer *kl, + integer *ku, real *alpha, real *a, integer *lda, real *x, integer * + incx, real *beta, real *y, integer *incy); + +/* Subroutine */ int _starpu_sgemm_(char *transa, char *transb, integer *m, integer * + n, integer *k, real *alpha, real *a, integer *lda, real *b, integer * + ldb, real *beta, real *c__, integer *ldc); + +/* Subroutine */ int _starpu_sgemv_(char *trans, integer *m, integer *n, real *alpha, + real *a, integer *lda, real *x, integer *incx, real *beta, real *y, + integer *incy); + +/* Subroutine */ int _starpu_sger_(integer *m, integer *n, real *alpha, real *x, + integer *incx, real *y, integer *incy, real *a, integer *lda); + +doublereal _starpu_snrm2_(integer *n, real *x, integer *incx); + +/* Subroutine */ int _starpu_srot_(integer *n, real *sx, integer *incx, real *sy, + integer *incy, real *c__, real *s); + +/* Subroutine */ int _starpu_srotg_(real *sa, real *sb, real *c__, real *s); + +/* Subroutine */ int _starpu_srotm_(integer *n, real *sx, integer *incx, real *sy, + integer *incy, real *sparam); + +/* Subroutine */ int _starpu_srotmg_(real *sd1, real *sd2, real *sx1, real *sy1, real + *sparam); + +/* Subroutine */ int _starpu_ssbmv_(char *uplo, integer *n, integer *k, real *alpha, + real *a, integer *lda, real *x, integer *incx, real *beta, real *y, + integer *incy); + +/* Subroutine */ int _starpu_sscal_(integer *n, real *sa, real *sx, integer *incx); + +/* Subroutine */ int _starpu_sspmv_(char *uplo, integer *n, real *alpha, real *ap, + real *x, integer *incx, real *beta, real *y, integer *incy); + +/* Subroutine */ int _starpu_sspr_(char *uplo, integer *n, real *alpha, real *x, + integer *incx, real *ap); + +/* Subroutine */ int _starpu_sspr2_(char *uplo, integer *n, real *alpha, real *x, + integer *incx, real *y, integer *incy, real *ap); + +/* Subroutine */ int _starpu_sswap_(integer *n, real *sx, integer *incx, real *sy, + integer *incy); + +/* Subroutine */ int _starpu_ssymm_(char *side, char *uplo, integer *m, integer *n, + real *alpha, real *a, integer *lda, real *b, integer *ldb, real *beta, + real *c__, integer *ldc); + +/* Subroutine */ int _starpu_ssymv_(char *uplo, integer *n, real *alpha, real *a, + integer *lda, real *x, integer *incx, real *beta, real *y, integer * + incy); + +/* Subroutine */ int _starpu_ssyr_(char *uplo, integer *n, real *alpha, real *x, + integer *incx, real *a, integer *lda); + +/* Subroutine */ int _starpu_ssyr2_(char *uplo, integer *n, real *alpha, real *x, + integer *incx, real *y, integer *incy, real *a, integer *lda); + +/* Subroutine */ int _starpu_ssyr2k_(char *uplo, char *trans, integer *n, integer *k, + real *alpha, real *a, integer *lda, real *b, integer *ldb, real *beta, + real *c__, integer *ldc); + +/* Subroutine */ int _starpu_ssyrk_(char *uplo, char *trans, integer *n, integer *k, + real *alpha, real *a, integer *lda, real *beta, real *c__, integer * + ldc); + +/* Subroutine */ int _starpu_stbmv_(char *uplo, char *trans, char *diag, integer *n, + integer *k, real *a, integer *lda, real *x, integer *incx); + +/* Subroutine */ int _starpu_stbsv_(char *uplo, char *trans, char *diag, integer *n, + integer *k, real *a, integer *lda, real *x, integer *incx); + +/* Subroutine */ int _starpu_stpmv_(char *uplo, char *trans, char *diag, integer *n, + real *ap, real *x, integer *incx); + +/* Subroutine */ int _starpu_stpsv_(char *uplo, char *trans, char *diag, integer *n, + real *ap, real *x, integer *incx); + +/* Subroutine */ int _starpu_strmm_(char *side, char *uplo, char *transa, char *diag, + integer *m, integer *n, real *alpha, real *a, integer *lda, real *b, + integer *ldb); + +/* Subroutine */ int _starpu_strmv_(char *uplo, char *trans, char *diag, integer *n, + real *a, integer *lda, real *x, integer *incx); + +/* Subroutine */ int _starpu_strsm_(char *side, char *uplo, char *transa, char *diag, + integer *m, integer *n, real *alpha, real *a, integer *lda, real *b, + integer *ldb); + +/* Subroutine */ int _starpu_strsv_(char *uplo, char *trans, char *diag, integer *n, + real *a, integer *lda, real *x, integer *incx); + +/* Subroutine */ int _starpu_xerbla_(char *srname, integer *info); + +/* Subroutine */ int _starpu_xerbla_array__(char *srname_array__, integer * + srname_len__, integer *info, ftnlen srname_array_len); + +/* Subroutine */ int _starpu_zaxpy_(integer *n, doublecomplex *za, doublecomplex *zx, + integer *incx, doublecomplex *zy, integer *incy); + +/* Subroutine */ int _starpu_zcopy_(integer *n, doublecomplex *zx, integer *incx, + doublecomplex *zy, integer *incy); + +/* Double Complex */ VOID _starpu_zdotc_(doublecomplex * ret_val, integer *n, + doublecomplex *zx, integer *incx, doublecomplex *zy, integer *incy); + +/* Double Complex */ VOID _starpu_zdotu_(doublecomplex * ret_val, integer *n, + doublecomplex *zx, integer *incx, doublecomplex *zy, integer *incy); + +/* Subroutine */ int _starpu_zdrot_(integer *n, doublecomplex *cx, integer *incx, + doublecomplex *cy, integer *incy, doublereal *c__, doublereal *s); + +/* Subroutine */ int _starpu_zdscal_(integer *n, doublereal *da, doublecomplex *zx, + integer *incx); + +/* Subroutine */ int _starpu_zgbmv_(char *trans, integer *m, integer *n, integer *kl, + integer *ku, doublecomplex *alpha, doublecomplex *a, integer *lda, + doublecomplex *x, integer *incx, doublecomplex *beta, doublecomplex * + y, integer *incy); + +/* Subroutine */ int _starpu_zgemm_(char *transa, char *transb, integer *m, integer * + n, integer *k, doublecomplex *alpha, doublecomplex *a, integer *lda, + doublecomplex *b, integer *ldb, doublecomplex *beta, doublecomplex * + c__, integer *ldc); + +/* Subroutine */ int _starpu_zgemv_(char *trans, integer *m, integer *n, + doublecomplex *alpha, doublecomplex *a, integer *lda, doublecomplex * + x, integer *incx, doublecomplex *beta, doublecomplex *y, integer * + incy); + +/* Subroutine */ int _starpu_zgerc_(integer *m, integer *n, doublecomplex *alpha, + doublecomplex *x, integer *incx, doublecomplex *y, integer *incy, + doublecomplex *a, integer *lda); + +/* Subroutine */ int _starpu_zgeru_(integer *m, integer *n, doublecomplex *alpha, + doublecomplex *x, integer *incx, doublecomplex *y, integer *incy, + doublecomplex *a, integer *lda); + +/* Subroutine */ int _starpu_zhbmv_(char *uplo, integer *n, integer *k, doublecomplex + *alpha, doublecomplex *a, integer *lda, doublecomplex *x, integer * + incx, doublecomplex *beta, doublecomplex *y, integer *incy); + +/* Subroutine */ int _starpu_zhemm_(char *side, char *uplo, integer *m, integer *n, + doublecomplex *alpha, doublecomplex *a, integer *lda, doublecomplex * + b, integer *ldb, doublecomplex *beta, doublecomplex *c__, integer * + ldc); + +/* Subroutine */ int _starpu_zhemv_(char *uplo, integer *n, doublecomplex *alpha, + doublecomplex *a, integer *lda, doublecomplex *x, integer *incx, + doublecomplex *beta, doublecomplex *y, integer *incy); + +/* Subroutine */ int _starpu_zher_(char *uplo, integer *n, doublereal *alpha, + doublecomplex *x, integer *incx, doublecomplex *a, integer *lda); + +/* Subroutine */ int _starpu_zher2_(char *uplo, integer *n, doublecomplex *alpha, + doublecomplex *x, integer *incx, doublecomplex *y, integer *incy, + doublecomplex *a, integer *lda); + +/* Subroutine */ int _starpu_zher2k_(char *uplo, char *trans, integer *n, integer *k, + doublecomplex *alpha, doublecomplex *a, integer *lda, doublecomplex * + b, integer *ldb, doublereal *beta, doublecomplex *c__, integer *ldc); + +/* Subroutine */ int _starpu_zherk_(char *uplo, char *trans, integer *n, integer *k, + doublereal *alpha, doublecomplex *a, integer *lda, doublereal *beta, + doublecomplex *c__, integer *ldc); + +/* Subroutine */ int _starpu_zhpmv_(char *uplo, integer *n, doublecomplex *alpha, + doublecomplex *ap, doublecomplex *x, integer *incx, doublecomplex * + beta, doublecomplex *y, integer *incy); + +/* Subroutine */ int _starpu_zhpr_(char *uplo, integer *n, doublereal *alpha, + doublecomplex *x, integer *incx, doublecomplex *ap); + +/* Subroutine */ int _starpu_zhpr2_(char *uplo, integer *n, doublecomplex *alpha, + doublecomplex *x, integer *incx, doublecomplex *y, integer *incy, + doublecomplex *ap); + +/* Subroutine */ int _starpu_zrotg_(doublecomplex *ca, doublecomplex *cb, doublereal * + c__, doublecomplex *s); + +/* Subroutine */ int _starpu_zscal_(integer *n, doublecomplex *za, doublecomplex *zx, + integer *incx); + +/* Subroutine */ int _starpu_zswap_(integer *n, doublecomplex *zx, integer *incx, + doublecomplex *zy, integer *incy); + +/* Subroutine */ int _starpu_zsymm_(char *side, char *uplo, integer *m, integer *n, + doublecomplex *alpha, doublecomplex *a, integer *lda, doublecomplex * + b, integer *ldb, doublecomplex *beta, doublecomplex *c__, integer * + ldc); + +/* Subroutine */ int _starpu_zsyr2k_(char *uplo, char *trans, integer *n, integer *k, + doublecomplex *alpha, doublecomplex *a, integer *lda, doublecomplex * + b, integer *ldb, doublecomplex *beta, doublecomplex *c__, integer * + ldc); + +/* Subroutine */ int _starpu_zsyrk_(char *uplo, char *trans, integer *n, integer *k, + doublecomplex *alpha, doublecomplex *a, integer *lda, doublecomplex * + beta, doublecomplex *c__, integer *ldc); + +/* Subroutine */ int _starpu_ztbmv_(char *uplo, char *trans, char *diag, integer *n, + integer *k, doublecomplex *a, integer *lda, doublecomplex *x, integer + *incx); + +/* Subroutine */ int _starpu_ztbsv_(char *uplo, char *trans, char *diag, integer *n, + integer *k, doublecomplex *a, integer *lda, doublecomplex *x, integer + *incx); + +/* Subroutine */ int _starpu_ztpmv_(char *uplo, char *trans, char *diag, integer *n, + doublecomplex *ap, doublecomplex *x, integer *incx); + +/* Subroutine */ int _starpu_ztpsv_(char *uplo, char *trans, char *diag, integer *n, + doublecomplex *ap, doublecomplex *x, integer *incx); + +/* Subroutine */ int _starpu_ztrmm_(char *side, char *uplo, char *transa, char *diag, + integer *m, integer *n, doublecomplex *alpha, doublecomplex *a, + integer *lda, doublecomplex *b, integer *ldb); + +/* Subroutine */ int _starpu_ztrmv_(char *uplo, char *trans, char *diag, integer *n, + doublecomplex *a, integer *lda, doublecomplex *x, integer *incx); + +/* Subroutine */ int _starpu_ztrsm_(char *side, char *uplo, char *transa, char *diag, + integer *m, integer *n, doublecomplex *alpha, doublecomplex *a, + integer *lda, doublecomplex *b, integer *ldb); + +/* Subroutine */ int _starpu_ztrsv_(char *uplo, char *trans, char *diag, integer *n, + doublecomplex *a, integer *lda, doublecomplex *x, integer *incx); + +/* Subroutine */ int _starpu_cbdsqr_(char *uplo, integer *n, integer *ncvt, integer * + nru, integer *ncc, real *d__, real *e, complex *vt, integer *ldvt, + complex *u, integer *ldu, complex *c__, integer *ldc, real *rwork, + integer *info); + +/* Subroutine */ int _starpu_cgbbrd_(char *vect, integer *m, integer *n, integer *ncc, + integer *kl, integer *ku, complex *ab, integer *ldab, real *d__, + real *e, complex *q, integer *ldq, complex *pt, integer *ldpt, + complex *c__, integer *ldc, complex *work, real *rwork, integer *info); + +/* Subroutine */ int _starpu_cgbcon_(char *norm, integer *n, integer *kl, integer *ku, + complex *ab, integer *ldab, integer *ipiv, real *anorm, real *rcond, + complex *work, real *rwork, integer *info); + +/* Subroutine */ int _starpu_cgbequ_(integer *m, integer *n, integer *kl, integer *ku, + complex *ab, integer *ldab, real *r__, real *c__, real *rowcnd, real + *colcnd, real *amax, integer *info); + +/* Subroutine */ int _starpu_cgbequb_(integer *m, integer *n, integer *kl, integer * + ku, complex *ab, integer *ldab, real *r__, real *c__, real *rowcnd, + real *colcnd, real *amax, integer *info); + +/* Subroutine */ int _starpu_cgbrfs_(char *trans, integer *n, integer *kl, integer * + ku, integer *nrhs, complex *ab, integer *ldab, complex *afb, integer * + ldafb, integer *ipiv, complex *b, integer *ldb, complex *x, integer * + ldx, real *ferr, real *berr, complex *work, real *rwork, integer * + info); + +/* Subroutine */ int _starpu_cgbrfsx_(char *trans, char *equed, integer *n, integer * + kl, integer *ku, integer *nrhs, complex *ab, integer *ldab, complex * + afb, integer *ldafb, integer *ipiv, real *r__, real *c__, complex *b, + integer *ldb, complex *x, integer *ldx, real *rcond, real *berr, + integer *n_err_bnds__, real *err_bnds_norm__, real *err_bnds_comp__, + integer *nparams, real *params, complex *work, real *rwork, integer * + info); + +/* Subroutine */ int _starpu_cgbsv_(integer *n, integer *kl, integer *ku, integer * + nrhs, complex *ab, integer *ldab, integer *ipiv, complex *b, integer * + ldb, integer *info); + +/* Subroutine */ int _starpu_cgbsvx_(char *fact, char *trans, integer *n, integer *kl, + integer *ku, integer *nrhs, complex *ab, integer *ldab, complex *afb, + integer *ldafb, integer *ipiv, char *equed, real *r__, real *c__, + complex *b, integer *ldb, complex *x, integer *ldx, real *rcond, real + *ferr, real *berr, complex *work, real *rwork, integer *info); + +/* Subroutine */ int _starpu_cgbsvxx_(char *fact, char *trans, integer *n, integer * + kl, integer *ku, integer *nrhs, complex *ab, integer *ldab, complex * + afb, integer *ldafb, integer *ipiv, char *equed, real *r__, real *c__, + complex *b, integer *ldb, complex *x, integer *ldx, real *rcond, + real *rpvgrw, real *berr, integer *n_err_bnds__, real * + err_bnds_norm__, real *err_bnds_comp__, integer *nparams, real * + params, complex *work, real *rwork, integer *info); + +/* Subroutine */ int _starpu_cgbtf2_(integer *m, integer *n, integer *kl, integer *ku, + complex *ab, integer *ldab, integer *ipiv, integer *info); + +/* Subroutine */ int _starpu_cgbtrf_(integer *m, integer *n, integer *kl, integer *ku, + complex *ab, integer *ldab, integer *ipiv, integer *info); + +/* Subroutine */ int _starpu_cgbtrs_(char *trans, integer *n, integer *kl, integer * + ku, integer *nrhs, complex *ab, integer *ldab, integer *ipiv, complex + *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_cgebak_(char *job, char *side, integer *n, integer *ilo, + integer *ihi, real *scale, integer *m, complex *v, integer *ldv, + integer *info); + +/* Subroutine */ int _starpu_cgebal_(char *job, integer *n, complex *a, integer *lda, + integer *ilo, integer *ihi, real *scale, integer *info); + +/* Subroutine */ int _starpu_cgebd2_(integer *m, integer *n, complex *a, integer *lda, + real *d__, real *e, complex *tauq, complex *taup, complex *work, + integer *info); + +/* Subroutine */ int _starpu_cgebrd_(integer *m, integer *n, complex *a, integer *lda, + real *d__, real *e, complex *tauq, complex *taup, complex *work, + integer *lwork, integer *info); + +/* Subroutine */ int _starpu_cgecon_(char *norm, integer *n, complex *a, integer *lda, + real *anorm, real *rcond, complex *work, real *rwork, integer *info); + +/* Subroutine */ int _starpu_cgeequ_(integer *m, integer *n, complex *a, integer *lda, + real *r__, real *c__, real *rowcnd, real *colcnd, real *amax, + integer *info); + +/* Subroutine */ int _starpu_cgeequb_(integer *m, integer *n, complex *a, integer * + lda, real *r__, real *c__, real *rowcnd, real *colcnd, real *amax, + integer *info); + +/* Subroutine */ int _starpu_cgees_(char *jobvs, char *sort, L_fp select, integer *n, + complex *a, integer *lda, integer *sdim, complex *w, complex *vs, + integer *ldvs, complex *work, integer *lwork, real *rwork, logical * + bwork, integer *info); + +/* Subroutine */ int _starpu_cgeesx_(char *jobvs, char *sort, L_fp select, char * + sense, integer *n, complex *a, integer *lda, integer *sdim, complex * + w, complex *vs, integer *ldvs, real *rconde, real *rcondv, complex * + work, integer *lwork, real *rwork, logical *bwork, integer *info); + +/* Subroutine */ int _starpu_cgeev_(char *jobvl, char *jobvr, integer *n, complex *a, + integer *lda, complex *w, complex *vl, integer *ldvl, complex *vr, + integer *ldvr, complex *work, integer *lwork, real *rwork, integer * + info); + +/* Subroutine */ int _starpu_cgeevx_(char *balanc, char *jobvl, char *jobvr, char * + sense, integer *n, complex *a, integer *lda, complex *w, complex *vl, + integer *ldvl, complex *vr, integer *ldvr, integer *ilo, integer *ihi, + real *scale, real *abnrm, real *rconde, real *rcondv, complex *work, + integer *lwork, real *rwork, integer *info); + +/* Subroutine */ int _starpu_cgegs_(char *jobvsl, char *jobvsr, integer *n, complex * + a, integer *lda, complex *b, integer *ldb, complex *alpha, complex * + beta, complex *vsl, integer *ldvsl, complex *vsr, integer *ldvsr, + complex *work, integer *lwork, real *rwork, integer *info); + +/* Subroutine */ int _starpu_cgegv_(char *jobvl, char *jobvr, integer *n, complex *a, + integer *lda, complex *b, integer *ldb, complex *alpha, complex *beta, + complex *vl, integer *ldvl, complex *vr, integer *ldvr, complex * + work, integer *lwork, real *rwork, integer *info); + +/* Subroutine */ int _starpu_cgehd2_(integer *n, integer *ilo, integer *ihi, complex * + a, integer *lda, complex *tau, complex *work, integer *info); + +/* Subroutine */ int _starpu_cgehrd_(integer *n, integer *ilo, integer *ihi, complex * + a, integer *lda, complex *tau, complex *work, integer *lwork, integer + *info); + +/* Subroutine */ int _starpu_cgelq2_(integer *m, integer *n, complex *a, integer *lda, + complex *tau, complex *work, integer *info); + +/* Subroutine */ int _starpu_cgelqf_(integer *m, integer *n, complex *a, integer *lda, + complex *tau, complex *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_cgels_(char *trans, integer *m, integer *n, integer * + nrhs, complex *a, integer *lda, complex *b, integer *ldb, complex * + work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_cgelsd_(integer *m, integer *n, integer *nrhs, complex * + a, integer *lda, complex *b, integer *ldb, real *s, real *rcond, + integer *rank, complex *work, integer *lwork, real *rwork, integer * + iwork, integer *info); + +/* Subroutine */ int _starpu_cgelss_(integer *m, integer *n, integer *nrhs, complex * + a, integer *lda, complex *b, integer *ldb, real *s, real *rcond, + integer *rank, complex *work, integer *lwork, real *rwork, integer * + info); + +/* Subroutine */ int _starpu_cgelsx_(integer *m, integer *n, integer *nrhs, complex * + a, integer *lda, complex *b, integer *ldb, integer *jpvt, real *rcond, + integer *rank, complex *work, real *rwork, integer *info); + +/* Subroutine */ int _starpu_cgelsy_(integer *m, integer *n, integer *nrhs, complex * + a, integer *lda, complex *b, integer *ldb, integer *jpvt, real *rcond, + integer *rank, complex *work, integer *lwork, real *rwork, integer * + info); + +/* Subroutine */ int _starpu_cgeql2_(integer *m, integer *n, complex *a, integer *lda, + complex *tau, complex *work, integer *info); + +/* Subroutine */ int _starpu_cgeqlf_(integer *m, integer *n, complex *a, integer *lda, + complex *tau, complex *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_cgeqp3_(integer *m, integer *n, complex *a, integer *lda, + integer *jpvt, complex *tau, complex *work, integer *lwork, real * + rwork, integer *info); + +/* Subroutine */ int _starpu_cgeqpf_(integer *m, integer *n, complex *a, integer *lda, + integer *jpvt, complex *tau, complex *work, real *rwork, integer * + info); + +/* Subroutine */ int _starpu_cgeqr2_(integer *m, integer *n, complex *a, integer *lda, + complex *tau, complex *work, integer *info); + +/* Subroutine */ int _starpu_cgeqrf_(integer *m, integer *n, complex *a, integer *lda, + complex *tau, complex *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_cgerfs_(char *trans, integer *n, integer *nrhs, complex * + a, integer *lda, complex *af, integer *ldaf, integer *ipiv, complex * + b, integer *ldb, complex *x, integer *ldx, real *ferr, real *berr, + complex *work, real *rwork, integer *info); + +/* Subroutine */ int _starpu_cgerfsx_(char *trans, char *equed, integer *n, integer * + nrhs, complex *a, integer *lda, complex *af, integer *ldaf, integer * + ipiv, real *r__, real *c__, complex *b, integer *ldb, complex *x, + integer *ldx, real *rcond, real *berr, integer *n_err_bnds__, real * + err_bnds_norm__, real *err_bnds_comp__, integer *nparams, real * + params, complex *work, real *rwork, integer *info); + +/* Subroutine */ int _starpu_cgerq2_(integer *m, integer *n, complex *a, integer *lda, + complex *tau, complex *work, integer *info); + +/* Subroutine */ int _starpu_cgerqf_(integer *m, integer *n, complex *a, integer *lda, + complex *tau, complex *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_cgesc2_(integer *n, complex *a, integer *lda, complex * + rhs, integer *ipiv, integer *jpiv, real *scale); + +/* Subroutine */ int _starpu_cgesdd_(char *jobz, integer *m, integer *n, complex *a, + integer *lda, real *s, complex *u, integer *ldu, complex *vt, integer + *ldvt, complex *work, integer *lwork, real *rwork, integer *iwork, + integer *info); + +/* Subroutine */ int _starpu_cgesv_(integer *n, integer *nrhs, complex *a, integer * + lda, integer *ipiv, complex *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_cgesvd_(char *jobu, char *jobvt, integer *m, integer *n, + complex *a, integer *lda, real *s, complex *u, integer *ldu, complex * + vt, integer *ldvt, complex *work, integer *lwork, real *rwork, + integer *info); + +/* Subroutine */ int _starpu_cgesvx_(char *fact, char *trans, integer *n, integer * + nrhs, complex *a, integer *lda, complex *af, integer *ldaf, integer * + ipiv, char *equed, real *r__, real *c__, complex *b, integer *ldb, + complex *x, integer *ldx, real *rcond, real *ferr, real *berr, + complex *work, real *rwork, integer *info); + +/* Subroutine */ int _starpu_cgesvxx_(char *fact, char *trans, integer *n, integer * + nrhs, complex *a, integer *lda, complex *af, integer *ldaf, integer * + ipiv, char *equed, real *r__, real *c__, complex *b, integer *ldb, + complex *x, integer *ldx, real *rcond, real *rpvgrw, real *berr, + integer *n_err_bnds__, real *err_bnds_norm__, real *err_bnds_comp__, + integer *nparams, real *params, complex *work, real *rwork, integer * + info); + +/* Subroutine */ int _starpu_cgetc2_(integer *n, complex *a, integer *lda, integer * + ipiv, integer *jpiv, integer *info); + +/* Subroutine */ int _starpu_cgetf2_(integer *m, integer *n, complex *a, integer *lda, + integer *ipiv, integer *info); + +/* Subroutine */ int _starpu_cgetrf_(integer *m, integer *n, complex *a, integer *lda, + integer *ipiv, integer *info); + +/* Subroutine */ int _starpu_cgetri_(integer *n, complex *a, integer *lda, integer * + ipiv, complex *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_cgetrs_(char *trans, integer *n, integer *nrhs, complex * + a, integer *lda, integer *ipiv, complex *b, integer *ldb, integer * + info); + +/* Subroutine */ int _starpu_cggbak_(char *job, char *side, integer *n, integer *ilo, + integer *ihi, real *lscale, real *rscale, integer *m, complex *v, + integer *ldv, integer *info); + +/* Subroutine */ int _starpu_cggbal_(char *job, integer *n, complex *a, integer *lda, + complex *b, integer *ldb, integer *ilo, integer *ihi, real *lscale, + real *rscale, real *work, integer *info); + +/* Subroutine */ int _starpu_cgges_(char *jobvsl, char *jobvsr, char *sort, L_fp + selctg, integer *n, complex *a, integer *lda, complex *b, integer * + ldb, integer *sdim, complex *alpha, complex *beta, complex *vsl, + integer *ldvsl, complex *vsr, integer *ldvsr, complex *work, integer * + lwork, real *rwork, logical *bwork, integer *info); + +/* Subroutine */ int _starpu_cggesx_(char *jobvsl, char *jobvsr, char *sort, L_fp + selctg, char *sense, integer *n, complex *a, integer *lda, complex *b, + integer *ldb, integer *sdim, complex *alpha, complex *beta, complex * + vsl, integer *ldvsl, complex *vsr, integer *ldvsr, real *rconde, real + *rcondv, complex *work, integer *lwork, real *rwork, integer *iwork, + integer *liwork, logical *bwork, integer *info); + +/* Subroutine */ int _starpu_cggev_(char *jobvl, char *jobvr, integer *n, complex *a, + integer *lda, complex *b, integer *ldb, complex *alpha, complex *beta, + complex *vl, integer *ldvl, complex *vr, integer *ldvr, complex * + work, integer *lwork, real *rwork, integer *info); + +/* Subroutine */ int _starpu_cggevx_(char *balanc, char *jobvl, char *jobvr, char * + sense, integer *n, complex *a, integer *lda, complex *b, integer *ldb, + complex *alpha, complex *beta, complex *vl, integer *ldvl, complex * + vr, integer *ldvr, integer *ilo, integer *ihi, real *lscale, real * + rscale, real *abnrm, real *bbnrm, real *rconde, real *rcondv, complex + *work, integer *lwork, real *rwork, integer *iwork, logical *bwork, + integer *info); + +/* Subroutine */ int _starpu_cggglm_(integer *n, integer *m, integer *p, complex *a, + integer *lda, complex *b, integer *ldb, complex *d__, complex *x, + complex *y, complex *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_cgghrd_(char *compq, char *compz, integer *n, integer * + ilo, integer *ihi, complex *a, integer *lda, complex *b, integer *ldb, + complex *q, integer *ldq, complex *z__, integer *ldz, integer *info); + +/* Subroutine */ int _starpu_cgglse_(integer *m, integer *n, integer *p, complex *a, + integer *lda, complex *b, integer *ldb, complex *c__, complex *d__, + complex *x, complex *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_cggqrf_(integer *n, integer *m, integer *p, complex *a, + integer *lda, complex *taua, complex *b, integer *ldb, complex *taub, + complex *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_cggrqf_(integer *m, integer *p, integer *n, complex *a, + integer *lda, complex *taua, complex *b, integer *ldb, complex *taub, + complex *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_cggsvd_(char *jobu, char *jobv, char *jobq, integer *m, + integer *n, integer *p, integer *k, integer *l, complex *a, integer * + lda, complex *b, integer *ldb, real *alpha, real *beta, complex *u, + integer *ldu, complex *v, integer *ldv, complex *q, integer *ldq, + complex *work, real *rwork, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_cggsvp_(char *jobu, char *jobv, char *jobq, integer *m, + integer *p, integer *n, complex *a, integer *lda, complex *b, integer + *ldb, real *tola, real *tolb, integer *k, integer *l, complex *u, + integer *ldu, complex *v, integer *ldv, complex *q, integer *ldq, + integer *iwork, real *rwork, complex *tau, complex *work, integer * + info); + +/* Subroutine */ int _starpu_cgtcon_(char *norm, integer *n, complex *dl, complex * + d__, complex *du, complex *du2, integer *ipiv, real *anorm, real * + rcond, complex *work, integer *info); + +/* Subroutine */ int _starpu_cgtrfs_(char *trans, integer *n, integer *nrhs, complex * + dl, complex *d__, complex *du, complex *dlf, complex *df, complex * + duf, complex *du2, integer *ipiv, complex *b, integer *ldb, complex * + x, integer *ldx, real *ferr, real *berr, complex *work, real *rwork, + integer *info); + +/* Subroutine */ int _starpu_cgtsv_(integer *n, integer *nrhs, complex *dl, complex * + d__, complex *du, complex *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_cgtsvx_(char *fact, char *trans, integer *n, integer * + nrhs, complex *dl, complex *d__, complex *du, complex *dlf, complex * + df, complex *duf, complex *du2, integer *ipiv, complex *b, integer * + ldb, complex *x, integer *ldx, real *rcond, real *ferr, real *berr, + complex *work, real *rwork, integer *info); + +/* Subroutine */ int _starpu_cgttrf_(integer *n, complex *dl, complex *d__, complex * + du, complex *du2, integer *ipiv, integer *info); + +/* Subroutine */ int _starpu_cgttrs_(char *trans, integer *n, integer *nrhs, complex * + dl, complex *d__, complex *du, complex *du2, integer *ipiv, complex * + b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_cgtts2_(integer *itrans, integer *n, integer *nrhs, + complex *dl, complex *d__, complex *du, complex *du2, integer *ipiv, + complex *b, integer *ldb); + +/* Subroutine */ int _starpu_chbev_(char *jobz, char *uplo, integer *n, integer *kd, + complex *ab, integer *ldab, real *w, complex *z__, integer *ldz, + complex *work, real *rwork, integer *info); + +/* Subroutine */ int _starpu_chbevd_(char *jobz, char *uplo, integer *n, integer *kd, + complex *ab, integer *ldab, real *w, complex *z__, integer *ldz, + complex *work, integer *lwork, real *rwork, integer *lrwork, integer * + iwork, integer *liwork, integer *info); + +/* Subroutine */ int _starpu_chbevx_(char *jobz, char *range, char *uplo, integer *n, + integer *kd, complex *ab, integer *ldab, complex *q, integer *ldq, + real *vl, real *vu, integer *il, integer *iu, real *abstol, integer * + m, real *w, complex *z__, integer *ldz, complex *work, real *rwork, + integer *iwork, integer *ifail, integer *info); + +/* Subroutine */ int _starpu_chbgst_(char *vect, char *uplo, integer *n, integer *ka, + integer *kb, complex *ab, integer *ldab, complex *bb, integer *ldbb, + complex *x, integer *ldx, complex *work, real *rwork, integer *info); + +/* Subroutine */ int _starpu_chbgv_(char *jobz, char *uplo, integer *n, integer *ka, + integer *kb, complex *ab, integer *ldab, complex *bb, integer *ldbb, + real *w, complex *z__, integer *ldz, complex *work, real *rwork, + integer *info); + +/* Subroutine */ int _starpu_chbgvd_(char *jobz, char *uplo, integer *n, integer *ka, + integer *kb, complex *ab, integer *ldab, complex *bb, integer *ldbb, + real *w, complex *z__, integer *ldz, complex *work, integer *lwork, + real *rwork, integer *lrwork, integer *iwork, integer *liwork, + integer *info); + +/* Subroutine */ int _starpu_chbgvx_(char *jobz, char *range, char *uplo, integer *n, + integer *ka, integer *kb, complex *ab, integer *ldab, complex *bb, + integer *ldbb, complex *q, integer *ldq, real *vl, real *vu, integer * + il, integer *iu, real *abstol, integer *m, real *w, complex *z__, + integer *ldz, complex *work, real *rwork, integer *iwork, integer * + ifail, integer *info); + +/* Subroutine */ int _starpu_chbtrd_(char *vect, char *uplo, integer *n, integer *kd, + complex *ab, integer *ldab, real *d__, real *e, complex *q, integer * + ldq, complex *work, integer *info); + +/* Subroutine */ int _starpu_checon_(char *uplo, integer *n, complex *a, integer *lda, + integer *ipiv, real *anorm, real *rcond, complex *work, integer * + info); + +/* Subroutine */ int _starpu_cheequb_(char *uplo, integer *n, complex *a, integer * + lda, real *s, real *scond, real *amax, complex *work, integer *info); + +/* Subroutine */ int _starpu_cheev_(char *jobz, char *uplo, integer *n, complex *a, + integer *lda, real *w, complex *work, integer *lwork, real *rwork, + integer *info); + +/* Subroutine */ int _starpu_cheevd_(char *jobz, char *uplo, integer *n, complex *a, + integer *lda, real *w, complex *work, integer *lwork, real *rwork, + integer *lrwork, integer *iwork, integer *liwork, integer *info); + +/* Subroutine */ int _starpu_cheevr_(char *jobz, char *range, char *uplo, integer *n, + complex *a, integer *lda, real *vl, real *vu, integer *il, integer * + iu, real *abstol, integer *m, real *w, complex *z__, integer *ldz, + integer *isuppz, complex *work, integer *lwork, real *rwork, integer * + lrwork, integer *iwork, integer *liwork, integer *info); + +/* Subroutine */ int _starpu_cheevx_(char *jobz, char *range, char *uplo, integer *n, + complex *a, integer *lda, real *vl, real *vu, integer *il, integer * + iu, real *abstol, integer *m, real *w, complex *z__, integer *ldz, + complex *work, integer *lwork, real *rwork, integer *iwork, integer * + ifail, integer *info); + +/* Subroutine */ int _starpu_chegs2_(integer *itype, char *uplo, integer *n, complex * + a, integer *lda, complex *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_chegst_(integer *itype, char *uplo, integer *n, complex * + a, integer *lda, complex *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_chegv_(integer *itype, char *jobz, char *uplo, integer * + n, complex *a, integer *lda, complex *b, integer *ldb, real *w, + complex *work, integer *lwork, real *rwork, integer *info); + +/* Subroutine */ int _starpu_chegvd_(integer *itype, char *jobz, char *uplo, integer * + n, complex *a, integer *lda, complex *b, integer *ldb, real *w, + complex *work, integer *lwork, real *rwork, integer *lrwork, integer * + iwork, integer *liwork, integer *info); + +/* Subroutine */ int _starpu_chegvx_(integer *itype, char *jobz, char *range, char * + uplo, integer *n, complex *a, integer *lda, complex *b, integer *ldb, + real *vl, real *vu, integer *il, integer *iu, real *abstol, integer * + m, real *w, complex *z__, integer *ldz, complex *work, integer *lwork, + real *rwork, integer *iwork, integer *ifail, integer *info); + +/* Subroutine */ int _starpu_cherfs_(char *uplo, integer *n, integer *nrhs, complex * + a, integer *lda, complex *af, integer *ldaf, integer *ipiv, complex * + b, integer *ldb, complex *x, integer *ldx, real *ferr, real *berr, + complex *work, real *rwork, integer *info); + +/* Subroutine */ int _starpu_cherfsx_(char *uplo, char *equed, integer *n, integer * + nrhs, complex *a, integer *lda, complex *af, integer *ldaf, integer * + ipiv, real *s, complex *b, integer *ldb, complex *x, integer *ldx, + real *rcond, real *berr, integer *n_err_bnds__, real *err_bnds_norm__, + real *err_bnds_comp__, integer *nparams, real *params, complex *work, + real *rwork, integer *info); + +/* Subroutine */ int _starpu_chesv_(char *uplo, integer *n, integer *nrhs, complex *a, + integer *lda, integer *ipiv, complex *b, integer *ldb, complex *work, + integer *lwork, integer *info); + +/* Subroutine */ int _starpu_chesvx_(char *fact, char *uplo, integer *n, integer * + nrhs, complex *a, integer *lda, complex *af, integer *ldaf, integer * + ipiv, complex *b, integer *ldb, complex *x, integer *ldx, real *rcond, + real *ferr, real *berr, complex *work, integer *lwork, real *rwork, + integer *info); + +/* Subroutine */ int _starpu_chesvxx_(char *fact, char *uplo, integer *n, integer * + nrhs, complex *a, integer *lda, complex *af, integer *ldaf, integer * + ipiv, char *equed, real *s, complex *b, integer *ldb, complex *x, + integer *ldx, real *rcond, real *rpvgrw, real *berr, integer * + n_err_bnds__, real *err_bnds_norm__, real *err_bnds_comp__, integer * + nparams, real *params, complex *work, real *rwork, integer *info); + +/* Subroutine */ int _starpu_chetd2_(char *uplo, integer *n, complex *a, integer *lda, + real *d__, real *e, complex *tau, integer *info); + +/* Subroutine */ int _starpu_chetf2_(char *uplo, integer *n, complex *a, integer *lda, + integer *ipiv, integer *info); + +/* Subroutine */ int _starpu_chetrd_(char *uplo, integer *n, complex *a, integer *lda, + real *d__, real *e, complex *tau, complex *work, integer *lwork, + integer *info); + +/* Subroutine */ int _starpu_chetrf_(char *uplo, integer *n, complex *a, integer *lda, + integer *ipiv, complex *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_chetri_(char *uplo, integer *n, complex *a, integer *lda, + integer *ipiv, complex *work, integer *info); + +/* Subroutine */ int _starpu_chetrs_(char *uplo, integer *n, integer *nrhs, complex * + a, integer *lda, integer *ipiv, complex *b, integer *ldb, integer * + info); + +/* Subroutine */ int _starpu_chfrk_(char *transr, char *uplo, char *trans, integer *n, + integer *k, real *alpha, complex *a, integer *lda, real *beta, + complex *c__); + +/* Subroutine */ int _starpu_chgeqz_(char *job, char *compq, char *compz, integer *n, + integer *ilo, integer *ihi, complex *h__, integer *ldh, complex *t, + integer *ldt, complex *alpha, complex *beta, complex *q, integer *ldq, + complex *z__, integer *ldz, complex *work, integer *lwork, real * + rwork, integer *info); + +/* Character */ VOID _starpu_chla_transtype__(char *ret_val, ftnlen ret_val_len, + integer *trans); + +/* Subroutine */ int _starpu_chpcon_(char *uplo, integer *n, complex *ap, integer * + ipiv, real *anorm, real *rcond, complex *work, integer *info); + +/* Subroutine */ int _starpu_chpev_(char *jobz, char *uplo, integer *n, complex *ap, + real *w, complex *z__, integer *ldz, complex *work, real *rwork, + integer *info); + +/* Subroutine */ int _starpu_chpevd_(char *jobz, char *uplo, integer *n, complex *ap, + real *w, complex *z__, integer *ldz, complex *work, integer *lwork, + real *rwork, integer *lrwork, integer *iwork, integer *liwork, + integer *info); + +/* Subroutine */ int _starpu_chpevx_(char *jobz, char *range, char *uplo, integer *n, + complex *ap, real *vl, real *vu, integer *il, integer *iu, real * + abstol, integer *m, real *w, complex *z__, integer *ldz, complex * + work, real *rwork, integer *iwork, integer *ifail, integer *info); + +/* Subroutine */ int _starpu_chpgst_(integer *itype, char *uplo, integer *n, complex * + ap, complex *bp, integer *info); + +/* Subroutine */ int _starpu_chpgv_(integer *itype, char *jobz, char *uplo, integer * + n, complex *ap, complex *bp, real *w, complex *z__, integer *ldz, + complex *work, real *rwork, integer *info); + +/* Subroutine */ int _starpu_chpgvd_(integer *itype, char *jobz, char *uplo, integer * + n, complex *ap, complex *bp, real *w, complex *z__, integer *ldz, + complex *work, integer *lwork, real *rwork, integer *lrwork, integer * + iwork, integer *liwork, integer *info); + +/* Subroutine */ int _starpu_chpgvx_(integer *itype, char *jobz, char *range, char * + uplo, integer *n, complex *ap, complex *bp, real *vl, real *vu, + integer *il, integer *iu, real *abstol, integer *m, real *w, complex * + z__, integer *ldz, complex *work, real *rwork, integer *iwork, + integer *ifail, integer *info); + +/* Subroutine */ int _starpu_chprfs_(char *uplo, integer *n, integer *nrhs, complex * + ap, complex *afp, integer *ipiv, complex *b, integer *ldb, complex *x, + integer *ldx, real *ferr, real *berr, complex *work, real *rwork, + integer *info); + +/* Subroutine */ int _starpu_chpsv_(char *uplo, integer *n, integer *nrhs, complex * + ap, integer *ipiv, complex *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_chpsvx_(char *fact, char *uplo, integer *n, integer * + nrhs, complex *ap, complex *afp, integer *ipiv, complex *b, integer * + ldb, complex *x, integer *ldx, real *rcond, real *ferr, real *berr, + complex *work, real *rwork, integer *info); + +/* Subroutine */ int _starpu_chptrd_(char *uplo, integer *n, complex *ap, real *d__, + real *e, complex *tau, integer *info); + +/* Subroutine */ int _starpu_chptrf_(char *uplo, integer *n, complex *ap, integer * + ipiv, integer *info); + +/* Subroutine */ int _starpu_chptri_(char *uplo, integer *n, complex *ap, integer * + ipiv, complex *work, integer *info); + +/* Subroutine */ int _starpu_chptrs_(char *uplo, integer *n, integer *nrhs, complex * + ap, integer *ipiv, complex *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_chsein_(char *side, char *eigsrc, char *initv, logical * + select, integer *n, complex *h__, integer *ldh, complex *w, complex * + vl, integer *ldvl, complex *vr, integer *ldvr, integer *mm, integer * + m, complex *work, real *rwork, integer *ifaill, integer *ifailr, + integer *info); + +/* Subroutine */ int _starpu_chseqr_(char *job, char *compz, integer *n, integer *ilo, + integer *ihi, complex *h__, integer *ldh, complex *w, complex *z__, + integer *ldz, complex *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_cla_gbamv__(integer *trans, integer *m, integer *n, + integer *kl, integer *ku, real *alpha, complex *ab, integer *ldab, + complex *x, integer *incx, real *beta, real *y, integer *incy); + +doublereal _starpu_cla_gbrcond_c__(char *trans, integer *n, integer *kl, integer *ku, + complex *ab, integer *ldab, complex *afb, integer *ldafb, integer * + ipiv, real *c__, logical *capply, integer *info, complex *work, real * + rwork, ftnlen trans_len); + +doublereal _starpu_cla_gbrcond_x__(char *trans, integer *n, integer *kl, integer *ku, + complex *ab, integer *ldab, complex *afb, integer *ldafb, integer * + ipiv, complex *x, integer *info, complex *work, real *rwork, ftnlen + trans_len); + +/* Subroutine */ int _starpu_cla_gbrfsx_extended__(integer *prec_type__, integer * + trans_type__, integer *n, integer *kl, integer *ku, integer *nrhs, + complex *ab, integer *ldab, complex *afb, integer *ldafb, integer * + ipiv, logical *colequ, real *c__, complex *b, integer *ldb, complex * + y, integer *ldy, real *berr_out__, integer *n_norms__, real *errs_n__, + real *errs_c__, complex *res, real *ayb, complex *dy, complex * + y_tail__, real *rcond, integer *ithresh, real *rthresh, real *dz_ub__, + logical *ignore_cwise__, integer *info); + +doublereal _starpu_cla_gbrpvgrw__(integer *n, integer *kl, integer *ku, integer * + ncols, complex *ab, integer *ldab, complex *afb, integer *ldafb); + +/* Subroutine */ int _starpu_cla_geamv__(integer *trans, integer *m, integer *n, real + *alpha, complex *a, integer *lda, complex *x, integer *incx, real * + beta, real *y, integer *incy); + +doublereal _starpu_cla_gercond_c__(char *trans, integer *n, complex *a, integer *lda, + complex *af, integer *ldaf, integer *ipiv, real *c__, logical *capply, + integer *info, complex *work, real *rwork, ftnlen trans_len); + +doublereal _starpu_cla_gercond_x__(char *trans, integer *n, complex *a, integer *lda, + complex *af, integer *ldaf, integer *ipiv, complex *x, integer *info, + complex *work, real *rwork, ftnlen trans_len); + +/* Subroutine */ int _starpu_cla_gerfsx_extended__(integer *prec_type__, integer * + trans_type__, integer *n, integer *nrhs, complex *a, integer *lda, + complex *af, integer *ldaf, integer *ipiv, logical *colequ, real *c__, + complex *b, integer *ldb, complex *y, integer *ldy, real *berr_out__, + integer *n_norms__, real *errs_n__, real *errs_c__, complex *res, + real *ayb, complex *dy, complex *y_tail__, real *rcond, integer * + ithresh, real *rthresh, real *dz_ub__, logical *ignore_cwise__, + integer *info); + +/* Subroutine */ int _starpu_cla_heamv__(integer *uplo, integer *n, real *alpha, + complex *a, integer *lda, complex *x, integer *incx, real *beta, real + *y, integer *incy); + +doublereal _starpu_cla_hercond_c__(char *uplo, integer *n, complex *a, integer *lda, + complex *af, integer *ldaf, integer *ipiv, real *c__, logical *capply, + integer *info, complex *work, real *rwork, ftnlen uplo_len); + +doublereal _starpu_cla_hercond_x__(char *uplo, integer *n, complex *a, integer *lda, + complex *af, integer *ldaf, integer *ipiv, complex *x, integer *info, + complex *work, real *rwork, ftnlen uplo_len); + +/* Subroutine */ int _starpu_cla_herfsx_extended__(integer *prec_type__, char *uplo, + integer *n, integer *nrhs, complex *a, integer *lda, complex *af, + integer *ldaf, integer *ipiv, logical *colequ, real *c__, complex *b, + integer *ldb, complex *y, integer *ldy, real *berr_out__, integer * + n_norms__, real *errs_n__, real *errs_c__, complex *res, real *ayb, + complex *dy, complex *y_tail__, real *rcond, integer *ithresh, real * + rthresh, real *dz_ub__, logical *ignore_cwise__, integer *info, + ftnlen uplo_len); + +doublereal _starpu_cla_herpvgrw__(char *uplo, integer *n, integer *info, complex *a, + integer *lda, complex *af, integer *ldaf, integer *ipiv, real *work, + ftnlen uplo_len); + +/* Subroutine */ int _starpu_cla_lin_berr__(integer *n, integer *nz, integer *nrhs, + complex *res, real *ayb, real *berr); + +doublereal _starpu_cla_porcond_c__(char *uplo, integer *n, complex *a, integer *lda, + complex *af, integer *ldaf, real *c__, logical *capply, integer *info, + complex *work, real *rwork, ftnlen uplo_len); + +doublereal _starpu_cla_porcond_x__(char *uplo, integer *n, complex *a, integer *lda, + complex *af, integer *ldaf, complex *x, integer *info, complex *work, + real *rwork, ftnlen uplo_len); + +/* Subroutine */ int _starpu_cla_porfsx_extended__(integer *prec_type__, char *uplo, + integer *n, integer *nrhs, complex *a, integer *lda, complex *af, + integer *ldaf, logical *colequ, real *c__, complex *b, integer *ldb, + complex *y, integer *ldy, real *berr_out__, integer *n_norms__, real * + errs_n__, real *errs_c__, complex *res, real *ayb, complex *dy, + complex *y_tail__, real *rcond, integer *ithresh, real *rthresh, real + *dz_ub__, logical *ignore_cwise__, integer *info, ftnlen uplo_len); + +doublereal _starpu_cla_porpvgrw__(char *uplo, integer *ncols, complex *a, integer * + lda, complex *af, integer *ldaf, real *work, ftnlen uplo_len); + +doublereal _starpu_cla_rpvgrw__(integer *n, integer *ncols, complex *a, integer *lda, + complex *af, integer *ldaf); + +/* Subroutine */ int _starpu_cla_syamv__(integer *uplo, integer *n, real *alpha, + complex *a, integer *lda, complex *x, integer *incx, real *beta, real + *y, integer *incy); + +doublereal _starpu_cla_syrcond_c__(char *uplo, integer *n, complex *a, integer *lda, + complex *af, integer *ldaf, integer *ipiv, real *c__, logical *capply, + integer *info, complex *work, real *rwork, ftnlen uplo_len); + +doublereal _starpu_cla_syrcond_x__(char *uplo, integer *n, complex *a, integer *lda, + complex *af, integer *ldaf, integer *ipiv, complex *x, integer *info, + complex *work, real *rwork, ftnlen uplo_len); + +/* Subroutine */ int _starpu_cla_syrfsx_extended__(integer *prec_type__, char *uplo, + integer *n, integer *nrhs, complex *a, integer *lda, complex *af, + integer *ldaf, integer *ipiv, logical *colequ, real *c__, complex *b, + integer *ldb, complex *y, integer *ldy, real *berr_out__, integer * + n_norms__, real *errs_n__, real *errs_c__, complex *res, real *ayb, + complex *dy, complex *y_tail__, real *rcond, integer *ithresh, real * + rthresh, real *dz_ub__, logical *ignore_cwise__, integer *info, + ftnlen uplo_len); + +doublereal _starpu_cla_syrpvgrw__(char *uplo, integer *n, integer *info, complex *a, + integer *lda, complex *af, integer *ldaf, integer *ipiv, real *work, + ftnlen uplo_len); + +/* Subroutine */ int _starpu_cla_wwaddw__(integer *n, complex *x, complex *y, complex + *w); + +/* Subroutine */ int _starpu_clabrd_(integer *m, integer *n, integer *nb, complex *a, + integer *lda, real *d__, real *e, complex *tauq, complex *taup, + complex *x, integer *ldx, complex *y, integer *ldy); + +/* Subroutine */ int _starpu_clacgv_(integer *n, complex *x, integer *incx); + +/* Subroutine */ int _starpu_clacn2_(integer *n, complex *v, complex *x, real *est, + integer *kase, integer *isave); + +/* Subroutine */ int _starpu_clacon_(integer *n, complex *v, complex *x, real *est, + integer *kase); + +/* Subroutine */ int _starpu_clacp2_(char *uplo, integer *m, integer *n, real *a, + integer *lda, complex *b, integer *ldb); + +/* Subroutine */ int _starpu_clacpy_(char *uplo, integer *m, integer *n, complex *a, + integer *lda, complex *b, integer *ldb); + +/* Subroutine */ int _starpu_clacrm_(integer *m, integer *n, complex *a, integer *lda, + real *b, integer *ldb, complex *c__, integer *ldc, real *rwork); + +/* Subroutine */ int _starpu_clacrt_(integer *n, complex *cx, integer *incx, complex * + cy, integer *incy, complex *c__, complex *s); + +/* Complex */ VOID _starpu_cladiv_(complex * ret_val, complex *x, complex *y); + +/* Subroutine */ int _starpu_claed0_(integer *qsiz, integer *n, real *d__, real *e, + complex *q, integer *ldq, complex *qstore, integer *ldqs, real *rwork, + integer *iwork, integer *info); + +/* Subroutine */ int _starpu_claed7_(integer *n, integer *cutpnt, integer *qsiz, + integer *tlvls, integer *curlvl, integer *curpbm, real *d__, complex * + q, integer *ldq, real *rho, integer *indxq, real *qstore, integer * + qptr, integer *prmptr, integer *perm, integer *givptr, integer * + givcol, real *givnum, complex *work, real *rwork, integer *iwork, + integer *info); + +/* Subroutine */ int _starpu_claed8_(integer *k, integer *n, integer *qsiz, complex * + q, integer *ldq, real *d__, real *rho, integer *cutpnt, real *z__, + real *dlamda, complex *q2, integer *ldq2, real *w, integer *indxp, + integer *indx, integer *indxq, integer *perm, integer *givptr, + integer *givcol, real *givnum, integer *info); + +/* Subroutine */ int _starpu_claein_(logical *rightv, logical *noinit, integer *n, + complex *h__, integer *ldh, complex *w, complex *v, complex *b, + integer *ldb, real *rwork, real *eps3, real *smlnum, integer *info); + +/* Subroutine */ int _starpu_claesy_(complex *a, complex *b, complex *c__, complex * + rt1, complex *rt2, complex *evscal, complex *cs1, complex *sn1); + +/* Subroutine */ int _starpu_claev2_(complex *a, complex *b, complex *c__, real *rt1, + real *rt2, real *cs1, complex *sn1); + +/* Subroutine */ int _starpu_clag2z_(integer *m, integer *n, complex *sa, integer * + ldsa, doublecomplex *a, integer *lda, integer *info); + +/* Subroutine */ int _starpu_clags2_(logical *upper, real *a1, complex *a2, real *a3, + real *b1, complex *b2, real *b3, real *csu, complex *snu, real *csv, + complex *snv, real *csq, complex *snq); + +/* Subroutine */ int _starpu_clagtm_(char *trans, integer *n, integer *nrhs, real * + alpha, complex *dl, complex *d__, complex *du, complex *x, integer * + ldx, real *beta, complex *b, integer *ldb); + +/* Subroutine */ int _starpu_clahef_(char *uplo, integer *n, integer *nb, integer *kb, + complex *a, integer *lda, integer *ipiv, complex *w, integer *ldw, + integer *info); + +/* Subroutine */ int _starpu_clahqr_(logical *wantt, logical *wantz, integer *n, + integer *ilo, integer *ihi, complex *h__, integer *ldh, complex *w, + integer *iloz, integer *ihiz, complex *z__, integer *ldz, integer * + info); + +/* Subroutine */ int _starpu_clahr2_(integer *n, integer *k, integer *nb, complex *a, + integer *lda, complex *tau, complex *t, integer *ldt, complex *y, + integer *ldy); + +/* Subroutine */ int _starpu_clahrd_(integer *n, integer *k, integer *nb, complex *a, + integer *lda, complex *tau, complex *t, integer *ldt, complex *y, + integer *ldy); + +/* Subroutine */ int _starpu_claic1_(integer *job, integer *j, complex *x, real *sest, + complex *w, complex *gamma, real *sestpr, complex *s, complex *c__); + +/* Subroutine */ int _starpu_clals0_(integer *icompq, integer *nl, integer *nr, + integer *sqre, integer *nrhs, complex *b, integer *ldb, complex *bx, + integer *ldbx, integer *perm, integer *givptr, integer *givcol, + integer *ldgcol, real *givnum, integer *ldgnum, real *poles, real * + difl, real *difr, real *z__, integer *k, real *c__, real *s, real * + rwork, integer *info); + +/* Subroutine */ int _starpu_clalsa_(integer *icompq, integer *smlsiz, integer *n, + integer *nrhs, complex *b, integer *ldb, complex *bx, integer *ldbx, + real *u, integer *ldu, real *vt, integer *k, real *difl, real *difr, + real *z__, real *poles, integer *givptr, integer *givcol, integer * + ldgcol, integer *perm, real *givnum, real *c__, real *s, real *rwork, + integer *iwork, integer *info); + +/* Subroutine */ int _starpu_clalsd_(char *uplo, integer *smlsiz, integer *n, integer + *nrhs, real *d__, real *e, complex *b, integer *ldb, real *rcond, + integer *rank, complex *work, real *rwork, integer *iwork, integer * + info); + +doublereal _starpu_clangb_(char *norm, integer *n, integer *kl, integer *ku, complex * + ab, integer *ldab, real *work); + +doublereal _starpu_clange_(char *norm, integer *m, integer *n, complex *a, integer * + lda, real *work); + +doublereal _starpu_clangt_(char *norm, integer *n, complex *dl, complex *d__, complex + *du); + +doublereal _starpu_clanhb_(char *norm, char *uplo, integer *n, integer *k, complex * + ab, integer *ldab, real *work); + +doublereal _starpu_clanhe_(char *norm, char *uplo, integer *n, complex *a, integer * + lda, real *work); + +doublereal _starpu_clanhf_(char *norm, char *transr, char *uplo, integer *n, complex * + a, real *work); + +doublereal _starpu_clanhp_(char *norm, char *uplo, integer *n, complex *ap, real * + work); + +doublereal _starpu_clanhs_(char *norm, integer *n, complex *a, integer *lda, real * + work); + +doublereal _starpu_clanht_(char *norm, integer *n, real *d__, complex *e); + +doublereal _starpu_clansb_(char *norm, char *uplo, integer *n, integer *k, complex * + ab, integer *ldab, real *work); + +doublereal _starpu_clansp_(char *norm, char *uplo, integer *n, complex *ap, real * + work); + +doublereal _starpu_clansy_(char *norm, char *uplo, integer *n, complex *a, integer * + lda, real *work); + +doublereal _starpu_clantb_(char *norm, char *uplo, char *diag, integer *n, integer *k, + complex *ab, integer *ldab, real *work); + +doublereal _starpu_clantp_(char *norm, char *uplo, char *diag, integer *n, complex * + ap, real *work); + +doublereal _starpu_clantr_(char *norm, char *uplo, char *diag, integer *m, integer *n, + complex *a, integer *lda, real *work); + +/* Subroutine */ int _starpu_clapll_(integer *n, complex *x, integer *incx, complex * + y, integer *incy, real *ssmin); + +/* Subroutine */ int _starpu_clapmt_(logical *forwrd, integer *m, integer *n, complex + *x, integer *ldx, integer *k); + +/* Subroutine */ int _starpu_claqgb_(integer *m, integer *n, integer *kl, integer *ku, + complex *ab, integer *ldab, real *r__, real *c__, real *rowcnd, real + *colcnd, real *amax, char *equed); + +/* Subroutine */ int _starpu_claqge_(integer *m, integer *n, complex *a, integer *lda, + real *r__, real *c__, real *rowcnd, real *colcnd, real *amax, char * + equed); + +/* Subroutine */ int _starpu_claqhb_(char *uplo, integer *n, integer *kd, complex *ab, + integer *ldab, real *s, real *scond, real *amax, char *equed); + +/* Subroutine */ int _starpu_claqhe_(char *uplo, integer *n, complex *a, integer *lda, + real *s, real *scond, real *amax, char *equed); + +/* Subroutine */ int _starpu_claqhp_(char *uplo, integer *n, complex *ap, real *s, + real *scond, real *amax, char *equed); + +/* Subroutine */ int _starpu_claqp2_(integer *m, integer *n, integer *offset, complex + *a, integer *lda, integer *jpvt, complex *tau, real *vn1, real *vn2, + complex *work); + +/* Subroutine */ int _starpu_claqps_(integer *m, integer *n, integer *offset, integer + *nb, integer *kb, complex *a, integer *lda, integer *jpvt, complex * + tau, real *vn1, real *vn2, complex *auxv, complex *f, integer *ldf); + +/* Subroutine */ int _starpu_claqr0_(logical *wantt, logical *wantz, integer *n, + integer *ilo, integer *ihi, complex *h__, integer *ldh, complex *w, + integer *iloz, integer *ihiz, complex *z__, integer *ldz, complex * + work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_claqr1_(integer *n, complex *h__, integer *ldh, complex * + s1, complex *s2, complex *v); + +/* Subroutine */ int _starpu_claqr2_(logical *wantt, logical *wantz, integer *n, + integer *ktop, integer *kbot, integer *nw, complex *h__, integer *ldh, + integer *iloz, integer *ihiz, complex *z__, integer *ldz, integer * + ns, integer *nd, complex *sh, complex *v, integer *ldv, integer *nh, + complex *t, integer *ldt, integer *nv, complex *wv, integer *ldwv, + complex *work, integer *lwork); + +/* Subroutine */ int _starpu_claqr3_(logical *wantt, logical *wantz, integer *n, + integer *ktop, integer *kbot, integer *nw, complex *h__, integer *ldh, + integer *iloz, integer *ihiz, complex *z__, integer *ldz, integer * + ns, integer *nd, complex *sh, complex *v, integer *ldv, integer *nh, + complex *t, integer *ldt, integer *nv, complex *wv, integer *ldwv, + complex *work, integer *lwork); + +/* Subroutine */ int _starpu_claqr4_(logical *wantt, logical *wantz, integer *n, + integer *ilo, integer *ihi, complex *h__, integer *ldh, complex *w, + integer *iloz, integer *ihiz, complex *z__, integer *ldz, complex * + work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_claqr5_(logical *wantt, logical *wantz, integer *kacc22, + integer *n, integer *ktop, integer *kbot, integer *nshfts, complex *s, + complex *h__, integer *ldh, integer *iloz, integer *ihiz, complex * + z__, integer *ldz, complex *v, integer *ldv, complex *u, integer *ldu, + integer *nv, complex *wv, integer *ldwv, integer *nh, complex *wh, + integer *ldwh); + +/* Subroutine */ int _starpu_claqsb_(char *uplo, integer *n, integer *kd, complex *ab, + integer *ldab, real *s, real *scond, real *amax, char *equed); + +/* Subroutine */ int _starpu_claqsp_(char *uplo, integer *n, complex *ap, real *s, + real *scond, real *amax, char *equed); + +/* Subroutine */ int _starpu_claqsy_(char *uplo, integer *n, complex *a, integer *lda, + real *s, real *scond, real *amax, char *equed); + +/* Subroutine */ int _starpu_clar1v_(integer *n, integer *b1, integer *bn, real * + lambda, real *d__, real *l, real *ld, real *lld, real *pivmin, real * + gaptol, complex *z__, logical *wantnc, integer *negcnt, real *ztz, + real *mingma, integer *r__, integer *isuppz, real *nrminv, real * + resid, real *rqcorr, real *work); + +/* Subroutine */ int _starpu_clar2v_(integer *n, complex *x, complex *y, complex *z__, + integer *incx, real *c__, complex *s, integer *incc); + +/* Subroutine */ int _starpu_clarcm_(integer *m, integer *n, real *a, integer *lda, + complex *b, integer *ldb, complex *c__, integer *ldc, real *rwork); + +/* Subroutine */ int _starpu_clarf_(char *side, integer *m, integer *n, complex *v, + integer *incv, complex *tau, complex *c__, integer *ldc, complex * + work); + +/* Subroutine */ int _starpu_clarfb_(char *side, char *trans, char *direct, char * + storev, integer *m, integer *n, integer *k, complex *v, integer *ldv, + complex *t, integer *ldt, complex *c__, integer *ldc, complex *work, + integer *ldwork); + +/* Subroutine */ int _starpu_clarfg_(integer *n, complex *alpha, complex *x, integer * + incx, complex *tau); + +/* Subroutine */ int _starpu_clarfp_(integer *n, complex *alpha, complex *x, integer * + incx, complex *tau); + +/* Subroutine */ int _starpu_clarft_(char *direct, char *storev, integer *n, integer * + k, complex *v, integer *ldv, complex *tau, complex *t, integer *ldt); + +/* Subroutine */ int _starpu_clarfx_(char *side, integer *m, integer *n, complex *v, + complex *tau, complex *c__, integer *ldc, complex *work); + +/* Subroutine */ int _starpu_clargv_(integer *n, complex *x, integer *incx, complex * + y, integer *incy, real *c__, integer *incc); + +/* Subroutine */ int _starpu_clarnv_(integer *idist, integer *iseed, integer *n, + complex *x); + +/* Subroutine */ int _starpu_clarrv_(integer *n, real *vl, real *vu, real *d__, real * + l, real *pivmin, integer *isplit, integer *m, integer *dol, integer * + dou, real *minrgp, real *rtol1, real *rtol2, real *w, real *werr, + real *wgap, integer *iblock, integer *indexw, real *gers, complex * + z__, integer *ldz, integer *isuppz, real *work, integer *iwork, + integer *info); + +/* Subroutine */ int _starpu_clarscl2_(integer *m, integer *n, real *d__, complex *x, + integer *ldx); + +/* Subroutine */ int _starpu_clartg_(complex *f, complex *g, real *cs, complex *sn, + complex *r__); + +/* Subroutine */ int _starpu_clartv_(integer *n, complex *x, integer *incx, complex * + y, integer *incy, real *c__, complex *s, integer *incc); + +/* Subroutine */ int _starpu_clarz_(char *side, integer *m, integer *n, integer *l, + complex *v, integer *incv, complex *tau, complex *c__, integer *ldc, + complex *work); + +/* Subroutine */ int _starpu_clarzb_(char *side, char *trans, char *direct, char * + storev, integer *m, integer *n, integer *k, integer *l, complex *v, + integer *ldv, complex *t, integer *ldt, complex *c__, integer *ldc, + complex *work, integer *ldwork); + +/* Subroutine */ int _starpu_clarzt_(char *direct, char *storev, integer *n, integer * + k, complex *v, integer *ldv, complex *tau, complex *t, integer *ldt); + +/* Subroutine */ int _starpu_clascl_(char *type__, integer *kl, integer *ku, real * + cfrom, real *cto, integer *m, integer *n, complex *a, integer *lda, + integer *info); + +/* Subroutine */ int _starpu_clascl2_(integer *m, integer *n, real *d__, complex *x, + integer *ldx); + +/* Subroutine */ int _starpu_claset_(char *uplo, integer *m, integer *n, complex * + alpha, complex *beta, complex *a, integer *lda); + +/* Subroutine */ int _starpu_clasr_(char *side, char *pivot, char *direct, integer *m, + integer *n, real *c__, real *s, complex *a, integer *lda); + +/* Subroutine */ int _starpu_classq_(integer *n, complex *x, integer *incx, real * + scale, real *sumsq); + +/* Subroutine */ int _starpu_claswp_(integer *n, complex *a, integer *lda, integer * + k1, integer *k2, integer *ipiv, integer *incx); + +/* Subroutine */ int _starpu_clasyf_(char *uplo, integer *n, integer *nb, integer *kb, + complex *a, integer *lda, integer *ipiv, complex *w, integer *ldw, + integer *info); + +/* Subroutine */ int _starpu_clatbs_(char *uplo, char *trans, char *diag, char * + normin, integer *n, integer *kd, complex *ab, integer *ldab, complex * + x, real *scale, real *cnorm, integer *info); + +/* Subroutine */ int _starpu_clatdf_(integer *ijob, integer *n, complex *z__, integer + *ldz, complex *rhs, real *rdsum, real *rdscal, integer *ipiv, integer + *jpiv); + +/* Subroutine */ int _starpu_clatps_(char *uplo, char *trans, char *diag, char * + normin, integer *n, complex *ap, complex *x, real *scale, real *cnorm, + integer *info); + +/* Subroutine */ int _starpu_clatrd_(char *uplo, integer *n, integer *nb, complex *a, + integer *lda, real *e, complex *tau, complex *w, integer *ldw); + +/* Subroutine */ int _starpu_clatrs_(char *uplo, char *trans, char *diag, char * + normin, integer *n, complex *a, integer *lda, complex *x, real *scale, + real *cnorm, integer *info); + +/* Subroutine */ int _starpu_clatrz_(integer *m, integer *n, integer *l, complex *a, + integer *lda, complex *tau, complex *work); + +/* Subroutine */ int _starpu_clatzm_(char *side, integer *m, integer *n, complex *v, + integer *incv, complex *tau, complex *c1, complex *c2, integer *ldc, + complex *work); + +/* Subroutine */ int _starpu_clauu2_(char *uplo, integer *n, complex *a, integer *lda, + integer *info); + +/* Subroutine */ int _starpu_clauum_(char *uplo, integer *n, complex *a, integer *lda, + integer *info); + +/* Subroutine */ int _starpu_cpbcon_(char *uplo, integer *n, integer *kd, complex *ab, + integer *ldab, real *anorm, real *rcond, complex *work, real *rwork, + integer *info); + +/* Subroutine */ int _starpu_cpbequ_(char *uplo, integer *n, integer *kd, complex *ab, + integer *ldab, real *s, real *scond, real *amax, integer *info); + +/* Subroutine */ int _starpu_cpbrfs_(char *uplo, integer *n, integer *kd, integer * + nrhs, complex *ab, integer *ldab, complex *afb, integer *ldafb, + complex *b, integer *ldb, complex *x, integer *ldx, real *ferr, real * + berr, complex *work, real *rwork, integer *info); + +/* Subroutine */ int _starpu_cpbstf_(char *uplo, integer *n, integer *kd, complex *ab, + integer *ldab, integer *info); + +/* Subroutine */ int _starpu_cpbsv_(char *uplo, integer *n, integer *kd, integer * + nrhs, complex *ab, integer *ldab, complex *b, integer *ldb, integer * + info); + +/* Subroutine */ int _starpu_cpbsvx_(char *fact, char *uplo, integer *n, integer *kd, + integer *nrhs, complex *ab, integer *ldab, complex *afb, integer * + ldafb, char *equed, real *s, complex *b, integer *ldb, complex *x, + integer *ldx, real *rcond, real *ferr, real *berr, complex *work, + real *rwork, integer *info); + +/* Subroutine */ int _starpu_cpbtf2_(char *uplo, integer *n, integer *kd, complex *ab, + integer *ldab, integer *info); + +/* Subroutine */ int _starpu_cpbtrf_(char *uplo, integer *n, integer *kd, complex *ab, + integer *ldab, integer *info); + +/* Subroutine */ int _starpu_cpbtrs_(char *uplo, integer *n, integer *kd, integer * + nrhs, complex *ab, integer *ldab, complex *b, integer *ldb, integer * + info); + +/* Subroutine */ int _starpu_cpftrf_(char *transr, char *uplo, integer *n, complex *a, + integer *info); + +/* Subroutine */ int _starpu_cpftri_(char *transr, char *uplo, integer *n, complex *a, + integer *info); + +/* Subroutine */ int _starpu_cpftrs_(char *transr, char *uplo, integer *n, integer * + nrhs, complex *a, complex *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_cpocon_(char *uplo, integer *n, complex *a, integer *lda, + real *anorm, real *rcond, complex *work, real *rwork, integer *info); + +/* Subroutine */ int _starpu_cpoequ_(integer *n, complex *a, integer *lda, real *s, + real *scond, real *amax, integer *info); + +/* Subroutine */ int _starpu_cpoequb_(integer *n, complex *a, integer *lda, real *s, + real *scond, real *amax, integer *info); + +/* Subroutine */ int _starpu_cporfs_(char *uplo, integer *n, integer *nrhs, complex * + a, integer *lda, complex *af, integer *ldaf, complex *b, integer *ldb, + complex *x, integer *ldx, real *ferr, real *berr, complex *work, + real *rwork, integer *info); + +/* Subroutine */ int _starpu_cporfsx_(char *uplo, char *equed, integer *n, integer * + nrhs, complex *a, integer *lda, complex *af, integer *ldaf, real *s, + complex *b, integer *ldb, complex *x, integer *ldx, real *rcond, real + *berr, integer *n_err_bnds__, real *err_bnds_norm__, real * + err_bnds_comp__, integer *nparams, real *params, complex *work, real * + rwork, integer *info); + +/* Subroutine */ int _starpu_cposv_(char *uplo, integer *n, integer *nrhs, complex *a, + integer *lda, complex *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_cposvx_(char *fact, char *uplo, integer *n, integer * + nrhs, complex *a, integer *lda, complex *af, integer *ldaf, char * + equed, real *s, complex *b, integer *ldb, complex *x, integer *ldx, + real *rcond, real *ferr, real *berr, complex *work, real *rwork, + integer *info); + +/* Subroutine */ int _starpu_cposvxx_(char *fact, char *uplo, integer *n, integer * + nrhs, complex *a, integer *lda, complex *af, integer *ldaf, char * + equed, real *s, complex *b, integer *ldb, complex *x, integer *ldx, + real *rcond, real *rpvgrw, real *berr, integer *n_err_bnds__, real * + err_bnds_norm__, real *err_bnds_comp__, integer *nparams, real * + params, complex *work, real *rwork, integer *info); + +/* Subroutine */ int _starpu_cpotf2_(char *uplo, integer *n, complex *a, integer *lda, + integer *info); + +/* Subroutine */ int _starpu_cpotrf_(char *uplo, integer *n, complex *a, integer *lda, + integer *info); + +/* Subroutine */ int _starpu_cpotri_(char *uplo, integer *n, complex *a, integer *lda, + integer *info); + +/* Subroutine */ int _starpu_cpotrs_(char *uplo, integer *n, integer *nrhs, complex * + a, integer *lda, complex *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_cppcon_(char *uplo, integer *n, complex *ap, real *anorm, + real *rcond, complex *work, real *rwork, integer *info); + +/* Subroutine */ int _starpu_cppequ_(char *uplo, integer *n, complex *ap, real *s, + real *scond, real *amax, integer *info); + +/* Subroutine */ int _starpu_cpprfs_(char *uplo, integer *n, integer *nrhs, complex * + ap, complex *afp, complex *b, integer *ldb, complex *x, integer *ldx, + real *ferr, real *berr, complex *work, real *rwork, integer *info); + +/* Subroutine */ int _starpu_cppsv_(char *uplo, integer *n, integer *nrhs, complex * + ap, complex *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_cppsvx_(char *fact, char *uplo, integer *n, integer * + nrhs, complex *ap, complex *afp, char *equed, real *s, complex *b, + integer *ldb, complex *x, integer *ldx, real *rcond, real *ferr, real + *berr, complex *work, real *rwork, integer *info); + +/* Subroutine */ int _starpu_cpptrf_(char *uplo, integer *n, complex *ap, integer * + info); + +/* Subroutine */ int _starpu_cpptri_(char *uplo, integer *n, complex *ap, integer * + info); + +/* Subroutine */ int _starpu_cpptrs_(char *uplo, integer *n, integer *nrhs, complex * + ap, complex *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_cpstf2_(char *uplo, integer *n, complex *a, integer *lda, + integer *piv, integer *rank, real *tol, real *work, integer *info); + +/* Subroutine */ int _starpu_cpstrf_(char *uplo, integer *n, complex *a, integer *lda, + integer *piv, integer *rank, real *tol, real *work, integer *info); + +/* Subroutine */ int _starpu_cptcon_(integer *n, real *d__, complex *e, real *anorm, + real *rcond, real *rwork, integer *info); + +/* Subroutine */ int _starpu_cpteqr_(char *compz, integer *n, real *d__, real *e, + complex *z__, integer *ldz, real *work, integer *info); + +/* Subroutine */ int _starpu_cptrfs_(char *uplo, integer *n, integer *nrhs, real *d__, + complex *e, real *df, complex *ef, complex *b, integer *ldb, complex + *x, integer *ldx, real *ferr, real *berr, complex *work, real *rwork, + integer *info); + +/* Subroutine */ int _starpu_cptsv_(integer *n, integer *nrhs, real *d__, complex *e, + complex *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_cptsvx_(char *fact, integer *n, integer *nrhs, real *d__, + complex *e, real *df, complex *ef, complex *b, integer *ldb, complex + *x, integer *ldx, real *rcond, real *ferr, real *berr, complex *work, + real *rwork, integer *info); + +/* Subroutine */ int _starpu_cpttrf_(integer *n, real *d__, complex *e, integer *info); + +/* Subroutine */ int _starpu_cpttrs_(char *uplo, integer *n, integer *nrhs, real *d__, + complex *e, complex *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_cptts2_(integer *iuplo, integer *n, integer *nrhs, real * + d__, complex *e, complex *b, integer *ldb); + +/* Subroutine */ int _starpu_crot_(integer *n, complex *cx, integer *incx, complex * + cy, integer *incy, real *c__, complex *s); + +/* Subroutine */ int _starpu_cspcon_(char *uplo, integer *n, complex *ap, integer * + ipiv, real *anorm, real *rcond, complex *work, integer *info); + +/* Subroutine */ int _starpu_cspmv_(char *uplo, integer *n, complex *alpha, complex * + ap, complex *x, integer *incx, complex *beta, complex *y, integer * + incy); + +/* Subroutine */ int _starpu_cspr_(char *uplo, integer *n, complex *alpha, complex *x, + integer *incx, complex *ap); + +/* Subroutine */ int _starpu_csprfs_(char *uplo, integer *n, integer *nrhs, complex * + ap, complex *afp, integer *ipiv, complex *b, integer *ldb, complex *x, + integer *ldx, real *ferr, real *berr, complex *work, real *rwork, + integer *info); + +/* Subroutine */ int _starpu_cspsv_(char *uplo, integer *n, integer *nrhs, complex * + ap, integer *ipiv, complex *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_cspsvx_(char *fact, char *uplo, integer *n, integer * + nrhs, complex *ap, complex *afp, integer *ipiv, complex *b, integer * + ldb, complex *x, integer *ldx, real *rcond, real *ferr, real *berr, + complex *work, real *rwork, integer *info); + +/* Subroutine */ int _starpu_csptrf_(char *uplo, integer *n, complex *ap, integer * + ipiv, integer *info); + +/* Subroutine */ int _starpu_csptri_(char *uplo, integer *n, complex *ap, integer * + ipiv, complex *work, integer *info); + +/* Subroutine */ int _starpu_csptrs_(char *uplo, integer *n, integer *nrhs, complex * + ap, integer *ipiv, complex *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu__starpu_csrscl_(integer *n, real *sa, complex *sx, integer *incx); + +/* Subroutine */ int _starpu_cstedc_(char *compz, integer *n, real *d__, real *e, + complex *z__, integer *ldz, complex *work, integer *lwork, real * + rwork, integer *lrwork, integer *iwork, integer *liwork, integer * + info); + +/* Subroutine */ int _starpu_cstegr_(char *jobz, char *range, integer *n, real *d__, + real *e, real *vl, real *vu, integer *il, integer *iu, real *abstol, + integer *m, real *w, complex *z__, integer *ldz, integer *isuppz, + real *work, integer *lwork, integer *iwork, integer *liwork, integer * + info); + +/* Subroutine */ int _starpu_cstein_(integer *n, real *d__, real *e, integer *m, real + *w, integer *iblock, integer *isplit, complex *z__, integer *ldz, + real *work, integer *iwork, integer *ifail, integer *info); + +/* Subroutine */ int _starpu_cstemr_(char *jobz, char *range, integer *n, real *d__, + real *e, real *vl, real *vu, integer *il, integer *iu, integer *m, + real *w, complex *z__, integer *ldz, integer *nzc, integer *isuppz, + logical *tryrac, real *work, integer *lwork, integer *iwork, integer * + liwork, integer *info); + +/* Subroutine */ int _starpu_csteqr_(char *compz, integer *n, real *d__, real *e, + complex *z__, integer *ldz, real *work, integer *info); + +/* Subroutine */ int _starpu_csycon_(char *uplo, integer *n, complex *a, integer *lda, + integer *ipiv, real *anorm, real *rcond, complex *work, integer * + info); + +/* Subroutine */ int _starpu_csyequb_(char *uplo, integer *n, complex *a, integer * + lda, real *s, real *scond, real *amax, complex *work, integer *info); + +/* Subroutine */ int _starpu_csymv_(char *uplo, integer *n, complex *alpha, complex * + a, integer *lda, complex *x, integer *incx, complex *beta, complex *y, + integer *incy); + +/* Subroutine */ int _starpu_csyr_(char *uplo, integer *n, complex *alpha, complex *x, + integer *incx, complex *a, integer *lda); + +/* Subroutine */ int _starpu_csyrfs_(char *uplo, integer *n, integer *nrhs, complex * + a, integer *lda, complex *af, integer *ldaf, integer *ipiv, complex * + b, integer *ldb, complex *x, integer *ldx, real *ferr, real *berr, + complex *work, real *rwork, integer *info); + +/* Subroutine */ int _starpu_csyrfsx_(char *uplo, char *equed, integer *n, integer * + nrhs, complex *a, integer *lda, complex *af, integer *ldaf, integer * + ipiv, real *s, complex *b, integer *ldb, complex *x, integer *ldx, + real *rcond, real *berr, integer *n_err_bnds__, real *err_bnds_norm__, + real *err_bnds_comp__, integer *nparams, real *params, complex *work, + real *rwork, integer *info); + +/* Subroutine */ int _starpu_csysv_(char *uplo, integer *n, integer *nrhs, complex *a, + integer *lda, integer *ipiv, complex *b, integer *ldb, complex *work, + integer *lwork, integer *info); + +/* Subroutine */ int _starpu_csysvx_(char *fact, char *uplo, integer *n, integer * + nrhs, complex *a, integer *lda, complex *af, integer *ldaf, integer * + ipiv, complex *b, integer *ldb, complex *x, integer *ldx, real *rcond, + real *ferr, real *berr, complex *work, integer *lwork, real *rwork, + integer *info); + +/* Subroutine */ int _starpu_csysvxx_(char *fact, char *uplo, integer *n, integer * + nrhs, complex *a, integer *lda, complex *af, integer *ldaf, integer * + ipiv, char *equed, real *s, complex *b, integer *ldb, complex *x, + integer *ldx, real *rcond, real *rpvgrw, real *berr, integer * + n_err_bnds__, real *err_bnds_norm__, real *err_bnds_comp__, integer * + nparams, real *params, complex *work, real *rwork, integer *info); + +/* Subroutine */ int _starpu_csytf2_(char *uplo, integer *n, complex *a, integer *lda, + integer *ipiv, integer *info); + +/* Subroutine */ int _starpu_csytrf_(char *uplo, integer *n, complex *a, integer *lda, + integer *ipiv, complex *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_csytri_(char *uplo, integer *n, complex *a, integer *lda, + integer *ipiv, complex *work, integer *info); + +/* Subroutine */ int _starpu_csytrs_(char *uplo, integer *n, integer *nrhs, complex * + a, integer *lda, integer *ipiv, complex *b, integer *ldb, integer * + info); + +/* Subroutine */ int _starpu_ctbcon_(char *norm, char *uplo, char *diag, integer *n, + integer *kd, complex *ab, integer *ldab, real *rcond, complex *work, + real *rwork, integer *info); + +/* Subroutine */ int _starpu_ctbrfs_(char *uplo, char *trans, char *diag, integer *n, + integer *kd, integer *nrhs, complex *ab, integer *ldab, complex *b, + integer *ldb, complex *x, integer *ldx, real *ferr, real *berr, + complex *work, real *rwork, integer *info); + +/* Subroutine */ int _starpu_ctbtrs_(char *uplo, char *trans, char *diag, integer *n, + integer *kd, integer *nrhs, complex *ab, integer *ldab, complex *b, + integer *ldb, integer *info); + +/* Subroutine */ int _starpu_ctfsm_(char *transr, char *side, char *uplo, char *trans, + char *diag, integer *m, integer *n, complex *alpha, complex *a, + complex *b, integer *ldb); + +/* Subroutine */ int _starpu_ctftri_(char *transr, char *uplo, char *diag, integer *n, + complex *a, integer *info); + +/* Subroutine */ int _starpu_ctfttp_(char *transr, char *uplo, integer *n, complex * + arf, complex *ap, integer *info); + +/* Subroutine */ int _starpu_ctfttr_(char *transr, char *uplo, integer *n, complex * + arf, complex *a, integer *lda, integer *info); + +/* Subroutine */ int _starpu_ctgevc_(char *side, char *howmny, logical *select, + integer *n, complex *s, integer *lds, complex *p, integer *ldp, + complex *vl, integer *ldvl, complex *vr, integer *ldvr, integer *mm, + integer *m, complex *work, real *rwork, integer *info); + +/* Subroutine */ int _starpu_ctgex2_(logical *wantq, logical *wantz, integer *n, + complex *a, integer *lda, complex *b, integer *ldb, complex *q, + integer *ldq, complex *z__, integer *ldz, integer *j1, integer *info); + +/* Subroutine */ int _starpu_ctgexc_(logical *wantq, logical *wantz, integer *n, + complex *a, integer *lda, complex *b, integer *ldb, complex *q, + integer *ldq, complex *z__, integer *ldz, integer *ifst, integer * + ilst, integer *info); + +/* Subroutine */ int _starpu_ctgsen_(integer *ijob, logical *wantq, logical *wantz, + logical *select, integer *n, complex *a, integer *lda, complex *b, + integer *ldb, complex *alpha, complex *beta, complex *q, integer *ldq, + complex *z__, integer *ldz, integer *m, real *pl, real *pr, real * + dif, complex *work, integer *lwork, integer *iwork, integer *liwork, + integer *info); + +/* Subroutine */ int _starpu_ctgsja_(char *jobu, char *jobv, char *jobq, integer *m, + integer *p, integer *n, integer *k, integer *l, complex *a, integer * + lda, complex *b, integer *ldb, real *tola, real *tolb, real *alpha, + real *beta, complex *u, integer *ldu, complex *v, integer *ldv, + complex *q, integer *ldq, complex *work, integer *ncycle, integer * + info); + +/* Subroutine */ int _starpu_ctgsna_(char *job, char *howmny, logical *select, + integer *n, complex *a, integer *lda, complex *b, integer *ldb, + complex *vl, integer *ldvl, complex *vr, integer *ldvr, real *s, real + *dif, integer *mm, integer *m, complex *work, integer *lwork, integer + *iwork, integer *info); + +/* Subroutine */ int _starpu_ctgsy2_(char *trans, integer *ijob, integer *m, integer * + n, complex *a, integer *lda, complex *b, integer *ldb, complex *c__, + integer *ldc, complex *d__, integer *ldd, complex *e, integer *lde, + complex *f, integer *ldf, real *scale, real *rdsum, real *rdscal, + integer *info); + +/* Subroutine */ int _starpu_ctgsyl_(char *trans, integer *ijob, integer *m, integer * + n, complex *a, integer *lda, complex *b, integer *ldb, complex *c__, + integer *ldc, complex *d__, integer *ldd, complex *e, integer *lde, + complex *f, integer *ldf, real *scale, real *dif, complex *work, + integer *lwork, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_ctpcon_(char *norm, char *uplo, char *diag, integer *n, + complex *ap, real *rcond, complex *work, real *rwork, integer *info); + +/* Subroutine */ int _starpu_ctprfs_(char *uplo, char *trans, char *diag, integer *n, + integer *nrhs, complex *ap, complex *b, integer *ldb, complex *x, + integer *ldx, real *ferr, real *berr, complex *work, real *rwork, + integer *info); + +/* Subroutine */ int _starpu_ctptri_(char *uplo, char *diag, integer *n, complex *ap, + integer *info); + +/* Subroutine */ int _starpu_ctptrs_(char *uplo, char *trans, char *diag, integer *n, + integer *nrhs, complex *ap, complex *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_ctpttf_(char *transr, char *uplo, integer *n, complex * + ap, complex *arf, integer *info); + +/* Subroutine */ int _starpu_ctpttr_(char *uplo, integer *n, complex *ap, complex *a, + integer *lda, integer *info); + +/* Subroutine */ int _starpu_ctrcon_(char *norm, char *uplo, char *diag, integer *n, + complex *a, integer *lda, real *rcond, complex *work, real *rwork, + integer *info); + +/* Subroutine */ int _starpu_ctrevc_(char *side, char *howmny, logical *select, + integer *n, complex *t, integer *ldt, complex *vl, integer *ldvl, + complex *vr, integer *ldvr, integer *mm, integer *m, complex *work, + real *rwork, integer *info); + +/* Subroutine */ int _starpu_ctrexc_(char *compq, integer *n, complex *t, integer * + ldt, complex *q, integer *ldq, integer *ifst, integer *ilst, integer * + info); + +/* Subroutine */ int _starpu_ctrrfs_(char *uplo, char *trans, char *diag, integer *n, + integer *nrhs, complex *a, integer *lda, complex *b, integer *ldb, + complex *x, integer *ldx, real *ferr, real *berr, complex *work, real + *rwork, integer *info); + +/* Subroutine */ int _starpu_ctrsen_(char *job, char *compq, logical *select, integer + *n, complex *t, integer *ldt, complex *q, integer *ldq, complex *w, + integer *m, real *s, real *sep, complex *work, integer *lwork, + integer *info); + +/* Subroutine */ int _starpu_ctrsna_(char *job, char *howmny, logical *select, + integer *n, complex *t, integer *ldt, complex *vl, integer *ldvl, + complex *vr, integer *ldvr, real *s, real *sep, integer *mm, integer * + m, complex *work, integer *ldwork, real *rwork, integer *info); + +/* Subroutine */ int _starpu_ctrsyl_(char *trana, char *tranb, integer *isgn, integer + *m, integer *n, complex *a, integer *lda, complex *b, integer *ldb, + complex *c__, integer *ldc, real *scale, integer *info); + +/* Subroutine */ int _starpu_ctrti2_(char *uplo, char *diag, integer *n, complex *a, + integer *lda, integer *info); + +/* Subroutine */ int _starpu_ctrtri_(char *uplo, char *diag, integer *n, complex *a, + integer *lda, integer *info); + +/* Subroutine */ int _starpu_ctrtrs_(char *uplo, char *trans, char *diag, integer *n, + integer *nrhs, complex *a, integer *lda, complex *b, integer *ldb, + integer *info); + +/* Subroutine */ int _starpu_ctrttf_(char *transr, char *uplo, integer *n, complex *a, + integer *lda, complex *arf, integer *info); + +/* Subroutine */ int _starpu_ctrttp_(char *uplo, integer *n, complex *a, integer *lda, + complex *ap, integer *info); + +/* Subroutine */ int _starpu_ctzrqf_(integer *m, integer *n, complex *a, integer *lda, + complex *tau, integer *info); + +/* Subroutine */ int _starpu_ctzrzf_(integer *m, integer *n, complex *a, integer *lda, + complex *tau, complex *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_cung2l_(integer *m, integer *n, integer *k, complex *a, + integer *lda, complex *tau, complex *work, integer *info); + +/* Subroutine */ int _starpu_cung2r_(integer *m, integer *n, integer *k, complex *a, + integer *lda, complex *tau, complex *work, integer *info); + +/* Subroutine */ int _starpu_cungbr_(char *vect, integer *m, integer *n, integer *k, + complex *a, integer *lda, complex *tau, complex *work, integer *lwork, + integer *info); + +/* Subroutine */ int _starpu_cunghr_(integer *n, integer *ilo, integer *ihi, complex * + a, integer *lda, complex *tau, complex *work, integer *lwork, integer + *info); + +/* Subroutine */ int _starpu_cungl2_(integer *m, integer *n, integer *k, complex *a, + integer *lda, complex *tau, complex *work, integer *info); + +/* Subroutine */ int _starpu_cunglq_(integer *m, integer *n, integer *k, complex *a, + integer *lda, complex *tau, complex *work, integer *lwork, integer * + info); + +/* Subroutine */ int _starpu_cungql_(integer *m, integer *n, integer *k, complex *a, + integer *lda, complex *tau, complex *work, integer *lwork, integer * + info); + +/* Subroutine */ int _starpu_cungqr_(integer *m, integer *n, integer *k, complex *a, + integer *lda, complex *tau, complex *work, integer *lwork, integer * + info); + +/* Subroutine */ int _starpu_cungr2_(integer *m, integer *n, integer *k, complex *a, + integer *lda, complex *tau, complex *work, integer *info); + +/* Subroutine */ int _starpu_cungrq_(integer *m, integer *n, integer *k, complex *a, + integer *lda, complex *tau, complex *work, integer *lwork, integer * + info); + +/* Subroutine */ int _starpu_cungtr_(char *uplo, integer *n, complex *a, integer *lda, + complex *tau, complex *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_cunm2l_(char *side, char *trans, integer *m, integer *n, + integer *k, complex *a, integer *lda, complex *tau, complex *c__, + integer *ldc, complex *work, integer *info); + +/* Subroutine */ int _starpu_cunm2r_(char *side, char *trans, integer *m, integer *n, + integer *k, complex *a, integer *lda, complex *tau, complex *c__, + integer *ldc, complex *work, integer *info); + +/* Subroutine */ int _starpu_cunmbr_(char *vect, char *side, char *trans, integer *m, + integer *n, integer *k, complex *a, integer *lda, complex *tau, + complex *c__, integer *ldc, complex *work, integer *lwork, integer * + info); + +/* Subroutine */ int _starpu_cunmhr_(char *side, char *trans, integer *m, integer *n, + integer *ilo, integer *ihi, complex *a, integer *lda, complex *tau, + complex *c__, integer *ldc, complex *work, integer *lwork, integer * + info); + +/* Subroutine */ int _starpu_cunml2_(char *side, char *trans, integer *m, integer *n, + integer *k, complex *a, integer *lda, complex *tau, complex *c__, + integer *ldc, complex *work, integer *info); + +/* Subroutine */ int _starpu_cunmlq_(char *side, char *trans, integer *m, integer *n, + integer *k, complex *a, integer *lda, complex *tau, complex *c__, + integer *ldc, complex *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_cunmql_(char *side, char *trans, integer *m, integer *n, + integer *k, complex *a, integer *lda, complex *tau, complex *c__, + integer *ldc, complex *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_cunmqr_(char *side, char *trans, integer *m, integer *n, + integer *k, complex *a, integer *lda, complex *tau, complex *c__, + integer *ldc, complex *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_cunmr2_(char *side, char *trans, integer *m, integer *n, + integer *k, complex *a, integer *lda, complex *tau, complex *c__, + integer *ldc, complex *work, integer *info); + +/* Subroutine */ int _starpu_cunmr3_(char *side, char *trans, integer *m, integer *n, + integer *k, integer *l, complex *a, integer *lda, complex *tau, + complex *c__, integer *ldc, complex *work, integer *info); + +/* Subroutine */ int _starpu_cunmrq_(char *side, char *trans, integer *m, integer *n, + integer *k, complex *a, integer *lda, complex *tau, complex *c__, + integer *ldc, complex *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_cunmrz_(char *side, char *trans, integer *m, integer *n, + integer *k, integer *l, complex *a, integer *lda, complex *tau, + complex *c__, integer *ldc, complex *work, integer *lwork, integer * + info); + +/* Subroutine */ int _starpu_cunmtr_(char *side, char *uplo, char *trans, integer *m, + integer *n, complex *a, integer *lda, complex *tau, complex *c__, + integer *ldc, complex *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_cupgtr_(char *uplo, integer *n, complex *ap, complex * + tau, complex *q, integer *ldq, complex *work, integer *info); + +/* Subroutine */ int _starpu_cupmtr_(char *side, char *uplo, char *trans, integer *m, + integer *n, complex *ap, complex *tau, complex *c__, integer *ldc, + complex *work, integer *info); + +/* Subroutine */ int _starpu_dbdsdc_(char *uplo, char *compq, integer *n, doublereal * + d__, doublereal *e, doublereal *u, integer *ldu, doublereal *vt, + integer *ldvt, doublereal *q, integer *iq, doublereal *work, integer * + iwork, integer *info); + +/* Subroutine */ int _starpu_dbdsqr_(char *uplo, integer *n, integer *ncvt, integer * + nru, integer *ncc, doublereal *d__, doublereal *e, doublereal *vt, + integer *ldvt, doublereal *u, integer *ldu, doublereal *c__, integer * + ldc, doublereal *work, integer *info); + +/* Subroutine */ int _starpu_ddisna_(char *job, integer *m, integer *n, doublereal * + d__, doublereal *sep, integer *info); + +/* Subroutine */ int _starpu_dgbbrd_(char *vect, integer *m, integer *n, integer *ncc, + integer *kl, integer *ku, doublereal *ab, integer *ldab, doublereal * + d__, doublereal *e, doublereal *q, integer *ldq, doublereal *pt, + integer *ldpt, doublereal *c__, integer *ldc, doublereal *work, + integer *info); + +/* Subroutine */ int _starpu_dgbcon_(char *norm, integer *n, integer *kl, integer *ku, + doublereal *ab, integer *ldab, integer *ipiv, doublereal *anorm, + doublereal *rcond, doublereal *work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_dgbequ_(integer *m, integer *n, integer *kl, integer *ku, + doublereal *ab, integer *ldab, doublereal *r__, doublereal *c__, + doublereal *rowcnd, doublereal *colcnd, doublereal *amax, integer * + info); + +/* Subroutine */ int _starpu_dgbequb_(integer *m, integer *n, integer *kl, integer * + ku, doublereal *ab, integer *ldab, doublereal *r__, doublereal *c__, + doublereal *rowcnd, doublereal *colcnd, doublereal *amax, integer * + info); + +/* Subroutine */ int _starpu_dgbrfs_(char *trans, integer *n, integer *kl, integer * + ku, integer *nrhs, doublereal *ab, integer *ldab, doublereal *afb, + integer *ldafb, integer *ipiv, doublereal *b, integer *ldb, + doublereal *x, integer *ldx, doublereal *ferr, doublereal *berr, + doublereal *work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_dgbrfsx_(char *trans, char *equed, integer *n, integer * + kl, integer *ku, integer *nrhs, doublereal *ab, integer *ldab, + doublereal *afb, integer *ldafb, integer *ipiv, doublereal *r__, + doublereal *c__, doublereal *b, integer *ldb, doublereal *x, integer * + ldx, doublereal *rcond, doublereal *berr, integer *n_err_bnds__, + doublereal *err_bnds_norm__, doublereal *err_bnds_comp__, integer * + nparams, doublereal *params, doublereal *work, integer *iwork, + integer *info); + +/* Subroutine */ int _starpu_dgbsv_(integer *n, integer *kl, integer *ku, integer * + nrhs, doublereal *ab, integer *ldab, integer *ipiv, doublereal *b, + integer *ldb, integer *info); + +/* Subroutine */ int _starpu_dgbsvx_(char *fact, char *trans, integer *n, integer *kl, + integer *ku, integer *nrhs, doublereal *ab, integer *ldab, + doublereal *afb, integer *ldafb, integer *ipiv, char *equed, + doublereal *r__, doublereal *c__, doublereal *b, integer *ldb, + doublereal *x, integer *ldx, doublereal *rcond, doublereal *ferr, + doublereal *berr, doublereal *work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_dgbsvxx_(char *fact, char *trans, integer *n, integer * + kl, integer *ku, integer *nrhs, doublereal *ab, integer *ldab, + doublereal *afb, integer *ldafb, integer *ipiv, char *equed, + doublereal *r__, doublereal *c__, doublereal *b, integer *ldb, + doublereal *x, integer *ldx, doublereal *rcond, doublereal *rpvgrw, + doublereal *berr, integer *n_err_bnds__, doublereal *err_bnds_norm__, + doublereal *err_bnds_comp__, integer *nparams, doublereal *params, + doublereal *work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_dgbtf2_(integer *m, integer *n, integer *kl, integer *ku, + doublereal *ab, integer *ldab, integer *ipiv, integer *info); + +/* Subroutine */ int _starpu_dgbtrf_(integer *m, integer *n, integer *kl, integer *ku, + doublereal *ab, integer *ldab, integer *ipiv, integer *info); + +/* Subroutine */ int _starpu_dgbtrs_(char *trans, integer *n, integer *kl, integer * + ku, integer *nrhs, doublereal *ab, integer *ldab, integer *ipiv, + doublereal *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_dgebak_(char *job, char *side, integer *n, integer *ilo, + integer *ihi, doublereal *scale, integer *m, doublereal *v, integer * + ldv, integer *info); + +/* Subroutine */ int _starpu_dgebal_(char *job, integer *n, doublereal *a, integer * + lda, integer *ilo, integer *ihi, doublereal *scale, integer *info); + +/* Subroutine */ int _starpu_dgebd2_(integer *m, integer *n, doublereal *a, integer * + lda, doublereal *d__, doublereal *e, doublereal *tauq, doublereal * + taup, doublereal *work, integer *info); + +/* Subroutine */ int _starpu_dgebrd_(integer *m, integer *n, doublereal *a, integer * + lda, doublereal *d__, doublereal *e, doublereal *tauq, doublereal * + taup, doublereal *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_dgecon_(char *norm, integer *n, doublereal *a, integer * + lda, doublereal *anorm, doublereal *rcond, doublereal *work, integer * + iwork, integer *info); + +/* Subroutine */ int _starpu_dgeequ_(integer *m, integer *n, doublereal *a, integer * + lda, doublereal *r__, doublereal *c__, doublereal *rowcnd, doublereal + *colcnd, doublereal *amax, integer *info); + +/* Subroutine */ int _starpu_dgeequb_(integer *m, integer *n, doublereal *a, integer * + lda, doublereal *r__, doublereal *c__, doublereal *rowcnd, doublereal + *colcnd, doublereal *amax, integer *info); + +/* Subroutine */ int _starpu_dgees_(char *jobvs, char *sort, L_fp select, integer *n, + doublereal *a, integer *lda, integer *sdim, doublereal *wr, + doublereal *wi, doublereal *vs, integer *ldvs, doublereal *work, + integer *lwork, logical *bwork, integer *info); + +/* Subroutine */ int _starpu_dgeesx_(char *jobvs, char *sort, L_fp select, char * + sense, integer *n, doublereal *a, integer *lda, integer *sdim, + doublereal *wr, doublereal *wi, doublereal *vs, integer *ldvs, + doublereal *rconde, doublereal *rcondv, doublereal *work, integer * + lwork, integer *iwork, integer *liwork, logical *bwork, integer *info); + +/* Subroutine */ int _starpu_dgeev_(char *jobvl, char *jobvr, integer *n, doublereal * + a, integer *lda, doublereal *wr, doublereal *wi, doublereal *vl, + integer *ldvl, doublereal *vr, integer *ldvr, doublereal *work, + integer *lwork, integer *info); + +/* Subroutine */ int _starpu_dgeevx_(char *balanc, char *jobvl, char *jobvr, char * + sense, integer *n, doublereal *a, integer *lda, doublereal *wr, + doublereal *wi, doublereal *vl, integer *ldvl, doublereal *vr, + integer *ldvr, integer *ilo, integer *ihi, doublereal *scale, + doublereal *abnrm, doublereal *rconde, doublereal *rcondv, doublereal + *work, integer *lwork, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_dgegs_(char *jobvsl, char *jobvsr, integer *n, + doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal * + alphar, doublereal *alphai, doublereal *beta, doublereal *vsl, + integer *ldvsl, doublereal *vsr, integer *ldvsr, doublereal *work, + integer *lwork, integer *info); + +/* Subroutine */ int _starpu_dgegv_(char *jobvl, char *jobvr, integer *n, doublereal * + a, integer *lda, doublereal *b, integer *ldb, doublereal *alphar, + doublereal *alphai, doublereal *beta, doublereal *vl, integer *ldvl, + doublereal *vr, integer *ldvr, doublereal *work, integer *lwork, + integer *info); + +/* Subroutine */ int _starpu_dgehd2_(integer *n, integer *ilo, integer *ihi, + doublereal *a, integer *lda, doublereal *tau, doublereal *work, + integer *info); + +/* Subroutine */ int _starpu_dgehrd_(integer *n, integer *ilo, integer *ihi, + doublereal *a, integer *lda, doublereal *tau, doublereal *work, + integer *lwork, integer *info); + +/* Subroutine */ int _starpu_dgejsv_(char *joba, char *jobu, char *jobv, char *jobr, + char *jobt, char *jobp, integer *m, integer *n, doublereal *a, + integer *lda, doublereal *sva, doublereal *u, integer *ldu, + doublereal *v, integer *ldv, doublereal *work, integer *lwork, + integer *iwork, integer *info); + +/* Subroutine */ int _starpu_dgelq2_(integer *m, integer *n, doublereal *a, integer * + lda, doublereal *tau, doublereal *work, integer *info); + +/* Subroutine */ int _starpu_dgelqf_(integer *m, integer *n, doublereal *a, integer * + lda, doublereal *tau, doublereal *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_dgels_(char *trans, integer *m, integer *n, integer * + nrhs, doublereal *a, integer *lda, doublereal *b, integer *ldb, + doublereal *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_dgelsd_(integer *m, integer *n, integer *nrhs, + doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal * + s, doublereal *rcond, integer *rank, doublereal *work, integer *lwork, + integer *iwork, integer *info); + +/* Subroutine */ int _starpu_dgelss_(integer *m, integer *n, integer *nrhs, + doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal * + s, doublereal *rcond, integer *rank, doublereal *work, integer *lwork, + integer *info); + +/* Subroutine */ int _starpu_dgelsx_(integer *m, integer *n, integer *nrhs, + doublereal *a, integer *lda, doublereal *b, integer *ldb, integer * + jpvt, doublereal *rcond, integer *rank, doublereal *work, integer * + info); + +/* Subroutine */ int _starpu_dgelsy_(integer *m, integer *n, integer *nrhs, + doublereal *a, integer *lda, doublereal *b, integer *ldb, integer * + jpvt, doublereal *rcond, integer *rank, doublereal *work, integer * + lwork, integer *info); + +/* Subroutine */ int _starpu_dgeql2_(integer *m, integer *n, doublereal *a, integer * + lda, doublereal *tau, doublereal *work, integer *info); + +/* Subroutine */ int _starpu_dgeqlf_(integer *m, integer *n, doublereal *a, integer * + lda, doublereal *tau, doublereal *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_dgeqp3_(integer *m, integer *n, doublereal *a, integer * + lda, integer *jpvt, doublereal *tau, doublereal *work, integer *lwork, + integer *info); + +/* Subroutine */ int _starpu_dgeqpf_(integer *m, integer *n, doublereal *a, integer * + lda, integer *jpvt, doublereal *tau, doublereal *work, integer *info); + +/* Subroutine */ int _starpu_dgeqr2_(integer *m, integer *n, doublereal *a, integer * + lda, doublereal *tau, doublereal *work, integer *info); + +/* Subroutine */ int _starpu_dgeqrf_(integer *m, integer *n, doublereal *a, integer * + lda, doublereal *tau, doublereal *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_dgerfs_(char *trans, integer *n, integer *nrhs, + doublereal *a, integer *lda, doublereal *af, integer *ldaf, integer * + ipiv, doublereal *b, integer *ldb, doublereal *x, integer *ldx, + doublereal *ferr, doublereal *berr, doublereal *work, integer *iwork, + integer *info); + +/* Subroutine */ int _starpu_dgerfsx_(char *trans, char *equed, integer *n, integer * + nrhs, doublereal *a, integer *lda, doublereal *af, integer *ldaf, + integer *ipiv, doublereal *r__, doublereal *c__, doublereal *b, + integer *ldb, doublereal *x, integer *ldx, doublereal *rcond, + doublereal *berr, integer *n_err_bnds__, doublereal *err_bnds_norm__, + doublereal *err_bnds_comp__, integer *nparams, doublereal *params, + doublereal *work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_dgerq2_(integer *m, integer *n, doublereal *a, integer * + lda, doublereal *tau, doublereal *work, integer *info); + +/* Subroutine */ int _starpu_dgerqf_(integer *m, integer *n, doublereal *a, integer * + lda, doublereal *tau, doublereal *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_dgesc2_(integer *n, doublereal *a, integer *lda, + doublereal *rhs, integer *ipiv, integer *jpiv, doublereal *scale); + +/* Subroutine */ int _starpu_dgesdd_(char *jobz, integer *m, integer *n, doublereal * + a, integer *lda, doublereal *s, doublereal *u, integer *ldu, + doublereal *vt, integer *ldvt, doublereal *work, integer *lwork, + integer *iwork, integer *info); + +/* Subroutine */ int _starpu_dgesv_(integer *n, integer *nrhs, doublereal *a, integer + *lda, integer *ipiv, doublereal *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_dgesvd_(char *jobu, char *jobvt, integer *m, integer *n, + doublereal *a, integer *lda, doublereal *s, doublereal *u, integer * + ldu, doublereal *vt, integer *ldvt, doublereal *work, integer *lwork, + integer *info); + +/* Subroutine */ int _starpu_dgesvj_(char *joba, char *jobu, char *jobv, integer *m, + integer *n, doublereal *a, integer *lda, doublereal *sva, integer *mv, + doublereal *v, integer *ldv, doublereal *work, integer *lwork, + integer *info); + +/* Subroutine */ int _starpu_dgesvx_(char *fact, char *trans, integer *n, integer * + nrhs, doublereal *a, integer *lda, doublereal *af, integer *ldaf, + integer *ipiv, char *equed, doublereal *r__, doublereal *c__, + doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal * + rcond, doublereal *ferr, doublereal *berr, doublereal *work, integer * + iwork, integer *info); + +/* Subroutine */ int _starpu_dgesvxx_(char *fact, char *trans, integer *n, integer * + nrhs, doublereal *a, integer *lda, doublereal *af, integer *ldaf, + integer *ipiv, char *equed, doublereal *r__, doublereal *c__, + doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal * + rcond, doublereal *rpvgrw, doublereal *berr, integer *n_err_bnds__, + doublereal *err_bnds_norm__, doublereal *err_bnds_comp__, integer * + nparams, doublereal *params, doublereal *work, integer *iwork, + integer *info); + +/* Subroutine */ int _starpu_dgetc2_(integer *n, doublereal *a, integer *lda, integer + *ipiv, integer *jpiv, integer *info); + +/* Subroutine */ int _starpu_dgetf2_(integer *m, integer *n, doublereal *a, integer * + lda, integer *ipiv, integer *info); + +/* Subroutine */ int _starpu_dgetrf_(integer *m, integer *n, doublereal *a, integer * + lda, integer *ipiv, integer *info); + +/* Subroutine */ int _starpu_dgetri_(integer *n, doublereal *a, integer *lda, integer + *ipiv, doublereal *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_dgetrs_(char *trans, integer *n, integer *nrhs, + doublereal *a, integer *lda, integer *ipiv, doublereal *b, integer * + ldb, integer *info); + +/* Subroutine */ int _starpu_dggbak_(char *job, char *side, integer *n, integer *ilo, + integer *ihi, doublereal *lscale, doublereal *rscale, integer *m, + doublereal *v, integer *ldv, integer *info); + +/* Subroutine */ int _starpu_dggbal_(char *job, integer *n, doublereal *a, integer * + lda, doublereal *b, integer *ldb, integer *ilo, integer *ihi, + doublereal *lscale, doublereal *rscale, doublereal *work, integer * + info); + +/* Subroutine */ int _starpu_dgges_(char *jobvsl, char *jobvsr, char *sort, L_fp + selctg, integer *n, doublereal *a, integer *lda, doublereal *b, + integer *ldb, integer *sdim, doublereal *alphar, doublereal *alphai, + doublereal *beta, doublereal *vsl, integer *ldvsl, doublereal *vsr, + integer *ldvsr, doublereal *work, integer *lwork, logical *bwork, + integer *info); + +/* Subroutine */ int _starpu_dggesx_(char *jobvsl, char *jobvsr, char *sort, L_fp + selctg, char *sense, integer *n, doublereal *a, integer *lda, + doublereal *b, integer *ldb, integer *sdim, doublereal *alphar, + doublereal *alphai, doublereal *beta, doublereal *vsl, integer *ldvsl, + doublereal *vsr, integer *ldvsr, doublereal *rconde, doublereal * + rcondv, doublereal *work, integer *lwork, integer *iwork, integer * + liwork, logical *bwork, integer *info); + +/* Subroutine */ int _starpu_dggev_(char *jobvl, char *jobvr, integer *n, doublereal * + a, integer *lda, doublereal *b, integer *ldb, doublereal *alphar, + doublereal *alphai, doublereal *beta, doublereal *vl, integer *ldvl, + doublereal *vr, integer *ldvr, doublereal *work, integer *lwork, + integer *info); + +/* Subroutine */ int _starpu_dggevx_(char *balanc, char *jobvl, char *jobvr, char * + sense, integer *n, doublereal *a, integer *lda, doublereal *b, + integer *ldb, doublereal *alphar, doublereal *alphai, doublereal * + beta, doublereal *vl, integer *ldvl, doublereal *vr, integer *ldvr, + integer *ilo, integer *ihi, doublereal *lscale, doublereal *rscale, + doublereal *abnrm, doublereal *bbnrm, doublereal *rconde, doublereal * + rcondv, doublereal *work, integer *lwork, integer *iwork, logical * + bwork, integer *info); + +/* Subroutine */ int _starpu_dggglm_(integer *n, integer *m, integer *p, doublereal * + a, integer *lda, doublereal *b, integer *ldb, doublereal *d__, + doublereal *x, doublereal *y, doublereal *work, integer *lwork, + integer *info); + +/* Subroutine */ int _starpu_dgghrd_(char *compq, char *compz, integer *n, integer * + ilo, integer *ihi, doublereal *a, integer *lda, doublereal *b, + integer *ldb, doublereal *q, integer *ldq, doublereal *z__, integer * + ldz, integer *info); + +/* Subroutine */ int _starpu_dgglse_(integer *m, integer *n, integer *p, doublereal * + a, integer *lda, doublereal *b, integer *ldb, doublereal *c__, + doublereal *d__, doublereal *x, doublereal *work, integer *lwork, + integer *info); + +/* Subroutine */ int _starpu_dggqrf_(integer *n, integer *m, integer *p, doublereal * + a, integer *lda, doublereal *taua, doublereal *b, integer *ldb, + doublereal *taub, doublereal *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_dggrqf_(integer *m, integer *p, integer *n, doublereal * + a, integer *lda, doublereal *taua, doublereal *b, integer *ldb, + doublereal *taub, doublereal *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_dggsvd_(char *jobu, char *jobv, char *jobq, integer *m, + integer *n, integer *p, integer *k, integer *l, doublereal *a, + integer *lda, doublereal *b, integer *ldb, doublereal *alpha, + doublereal *beta, doublereal *u, integer *ldu, doublereal *v, integer + *ldv, doublereal *q, integer *ldq, doublereal *work, integer *iwork, + integer *info); + +/* Subroutine */ int _starpu_dggsvp_(char *jobu, char *jobv, char *jobq, integer *m, + integer *p, integer *n, doublereal *a, integer *lda, doublereal *b, + integer *ldb, doublereal *tola, doublereal *tolb, integer *k, integer + *l, doublereal *u, integer *ldu, doublereal *v, integer *ldv, + doublereal *q, integer *ldq, integer *iwork, doublereal *tau, + doublereal *work, integer *info); + +/* Subroutine */ int _starpu_dgsvj0_(char *jobv, integer *m, integer *n, doublereal * + a, integer *lda, doublereal *d__, doublereal *sva, integer *mv, + doublereal *v, integer *ldv, doublereal *eps, doublereal *sfmin, + doublereal *tol, integer *nsweep, doublereal *work, integer *lwork, + integer *info); + +/* Subroutine */ int _starpu_dgsvj1_(char *jobv, integer *m, integer *n, integer *n1, + doublereal *a, integer *lda, doublereal *d__, doublereal *sva, + integer *mv, doublereal *v, integer *ldv, doublereal *eps, doublereal + *sfmin, doublereal *tol, integer *nsweep, doublereal *work, integer * + lwork, integer *info); + +/* Subroutine */ int _starpu_dgtcon_(char *norm, integer *n, doublereal *dl, + doublereal *d__, doublereal *du, doublereal *du2, integer *ipiv, + doublereal *anorm, doublereal *rcond, doublereal *work, integer * + iwork, integer *info); + +/* Subroutine */ int _starpu_dgtrfs_(char *trans, integer *n, integer *nrhs, + doublereal *dl, doublereal *d__, doublereal *du, doublereal *dlf, + doublereal *df, doublereal *duf, doublereal *du2, integer *ipiv, + doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal * + ferr, doublereal *berr, doublereal *work, integer *iwork, integer * + info); + +/* Subroutine */ int _starpu_dgtsv_(integer *n, integer *nrhs, doublereal *dl, + doublereal *d__, doublereal *du, doublereal *b, integer *ldb, integer + *info); + +/* Subroutine */ int _starpu_dgtsvx_(char *fact, char *trans, integer *n, integer * + nrhs, doublereal *dl, doublereal *d__, doublereal *du, doublereal * + dlf, doublereal *df, doublereal *duf, doublereal *du2, integer *ipiv, + doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal * + rcond, doublereal *ferr, doublereal *berr, doublereal *work, integer * + iwork, integer *info); + +/* Subroutine */ int _starpu_dgttrf_(integer *n, doublereal *dl, doublereal *d__, + doublereal *du, doublereal *du2, integer *ipiv, integer *info); + +/* Subroutine */ int _starpu_dgttrs_(char *trans, integer *n, integer *nrhs, + doublereal *dl, doublereal *d__, doublereal *du, doublereal *du2, + integer *ipiv, doublereal *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_dgtts2_(integer *itrans, integer *n, integer *nrhs, + doublereal *dl, doublereal *d__, doublereal *du, doublereal *du2, + integer *ipiv, doublereal *b, integer *ldb); + +/* Subroutine */ int _starpu_dhgeqz_(char *job, char *compq, char *compz, integer *n, + integer *ilo, integer *ihi, doublereal *h__, integer *ldh, doublereal + *t, integer *ldt, doublereal *alphar, doublereal *alphai, doublereal * + beta, doublereal *q, integer *ldq, doublereal *z__, integer *ldz, + doublereal *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_dhsein_(char *side, char *eigsrc, char *initv, logical * + select, integer *n, doublereal *h__, integer *ldh, doublereal *wr, + doublereal *wi, doublereal *vl, integer *ldvl, doublereal *vr, + integer *ldvr, integer *mm, integer *m, doublereal *work, integer * + ifaill, integer *ifailr, integer *info); + +/* Subroutine */ int _starpu_dhseqr_(char *job, char *compz, integer *n, integer *ilo, + integer *ihi, doublereal *h__, integer *ldh, doublereal *wr, + doublereal *wi, doublereal *z__, integer *ldz, doublereal *work, + integer *lwork, integer *info); + +logical _starpu_disnan_(doublereal *din); + +/* Subroutine */ int _starpu_dla_gbamv__(integer *trans, integer *m, integer *n, + integer *kl, integer *ku, doublereal *alpha, doublereal *ab, integer * + ldab, doublereal *x, integer *incx, doublereal *beta, doublereal *y, + integer *incy); + +doublereal _starpu_dla_gbrcond__(char *trans, integer *n, integer *kl, integer *ku, + doublereal *ab, integer *ldab, doublereal *afb, integer *ldafb, + integer *ipiv, integer *cmode, doublereal *c__, integer *info, + doublereal *work, integer *iwork, ftnlen trans_len); + +/* Subroutine */ int _starpu_dla_gbrfsx_extended__(integer *prec_type__, integer * + trans_type__, integer *n, integer *kl, integer *ku, integer *nrhs, + doublereal *ab, integer *ldab, doublereal *afb, integer *ldafb, + integer *ipiv, logical *colequ, doublereal *c__, doublereal *b, + integer *ldb, doublereal *y, integer *ldy, doublereal *berr_out__, + integer *n_norms__, doublereal *errs_n__, doublereal *errs_c__, + doublereal *res, doublereal *ayb, doublereal *dy, doublereal * + y_tail__, doublereal *rcond, integer *ithresh, doublereal *rthresh, + doublereal *dz_ub__, logical *ignore_cwise__, integer *info); + +doublereal _starpu_dla_gbrpvgrw__(integer *n, integer *kl, integer *ku, integer * + ncols, doublereal *ab, integer *ldab, doublereal *afb, integer *ldafb); + +/* Subroutine */ int _starpu_dla_geamv__(integer *trans, integer *m, integer *n, + doublereal *alpha, doublereal *a, integer *lda, doublereal *x, + integer *incx, doublereal *beta, doublereal *y, integer *incy); + +doublereal _starpu_dla_gercond__(char *trans, integer *n, doublereal *a, integer *lda, + doublereal *af, integer *ldaf, integer *ipiv, integer *cmode, + doublereal *c__, integer *info, doublereal *work, integer *iwork, + ftnlen trans_len); + +/* Subroutine */ int _starpu_dla_gerfsx_extended__(integer *prec_type__, integer * + trans_type__, integer *n, integer *nrhs, doublereal *a, integer *lda, + doublereal *af, integer *ldaf, integer *ipiv, logical *colequ, + doublereal *c__, doublereal *b, integer *ldb, doublereal *y, integer * + ldy, doublereal *berr_out__, integer *n_norms__, doublereal *errs_n__, + doublereal *errs_c__, doublereal *res, doublereal *ayb, doublereal * + dy, doublereal *y_tail__, doublereal *rcond, integer *ithresh, + doublereal *rthresh, doublereal *dz_ub__, logical *ignore_cwise__, + integer *info); + +/* Subroutine */ int _starpu_dla_lin_berr__(integer *n, integer *nz, integer *nrhs, + doublereal *res, doublereal *ayb, doublereal *berr); + +doublereal _starpu_dla_porcond__(char *uplo, integer *n, doublereal *a, integer *lda, + doublereal *af, integer *ldaf, integer *cmode, doublereal *c__, + integer *info, doublereal *work, integer *iwork, ftnlen uplo_len); + +/* Subroutine */ int _starpu_dla_porfsx_extended__(integer *prec_type__, char *uplo, + integer *n, integer *nrhs, doublereal *a, integer *lda, doublereal * + af, integer *ldaf, logical *colequ, doublereal *c__, doublereal *b, + integer *ldb, doublereal *y, integer *ldy, doublereal *berr_out__, + integer *n_norms__, doublereal *errs_n__, doublereal *errs_c__, + doublereal *res, doublereal *ayb, doublereal *dy, doublereal * + y_tail__, doublereal *rcond, integer *ithresh, doublereal *rthresh, + doublereal *dz_ub__, logical *ignore_cwise__, integer *info, ftnlen + uplo_len); + +doublereal _starpu_dla_porpvgrw__(char *uplo, integer *ncols, doublereal *a, integer * + lda, doublereal *af, integer *ldaf, doublereal *work, ftnlen uplo_len); + +doublereal _starpu_dla_rpvgrw__(integer *n, integer *ncols, doublereal *a, integer * + lda, doublereal *af, integer *ldaf); + +/* Subroutine */ int _starpu_dla_syamv__(integer *uplo, integer *n, doublereal *alpha, + doublereal *a, integer *lda, doublereal *x, integer *incx, + doublereal *beta, doublereal *y, integer *incy); + +doublereal _starpu_dla_syrcond__(char *uplo, integer *n, doublereal *a, integer *lda, + doublereal *af, integer *ldaf, integer *ipiv, integer *cmode, + doublereal *c__, integer *info, doublereal *work, integer *iwork, + ftnlen uplo_len); + +/* Subroutine */ int _starpu_dla_syrfsx_extended__(integer *prec_type__, char *uplo, + integer *n, integer *nrhs, doublereal *a, integer *lda, doublereal * + af, integer *ldaf, integer *ipiv, logical *colequ, doublereal *c__, + doublereal *b, integer *ldb, doublereal *y, integer *ldy, doublereal * + berr_out__, integer *n_norms__, doublereal *errs_n__, doublereal * + errs_c__, doublereal *res, doublereal *ayb, doublereal *dy, + doublereal *y_tail__, doublereal *rcond, integer *ithresh, doublereal + *rthresh, doublereal *dz_ub__, logical *ignore_cwise__, integer *info, + ftnlen uplo_len); + +doublereal _starpu_dla_syrpvgrw__(char *uplo, integer *n, integer *info, doublereal * + a, integer *lda, doublereal *af, integer *ldaf, integer *ipiv, + doublereal *work, ftnlen uplo_len); + +/* Subroutine */ int _starpu_dla_wwaddw__(integer *n, doublereal *x, doublereal *y, + doublereal *w); + +/* Subroutine */ int _starpu_dlabad_(doublereal *small, doublereal *large); + +/* Subroutine */ int _starpu_dlabrd_(integer *m, integer *n, integer *nb, doublereal * + a, integer *lda, doublereal *d__, doublereal *e, doublereal *tauq, + doublereal *taup, doublereal *x, integer *ldx, doublereal *y, integer + *ldy); + +/* Subroutine */ int _starpu_dlacn2_(integer *n, doublereal *v, doublereal *x, + integer *isgn, doublereal *est, integer *kase, integer *isave); + +/* Subroutine */ int _starpu_dlacon_(integer *n, doublereal *v, doublereal *x, + integer *isgn, doublereal *est, integer *kase); + +/* Subroutine */ int _starpu_dlacpy_(char *uplo, integer *m, integer *n, doublereal * + a, integer *lda, doublereal *b, integer *ldb); + +/* Subroutine */ int _starpu_dladiv_(doublereal *a, doublereal *b, doublereal *c__, + doublereal *d__, doublereal *p, doublereal *q); + +/* Subroutine */ int _starpu_dlae2_(doublereal *a, doublereal *b, doublereal *c__, + doublereal *rt1, doublereal *rt2); + +/* Subroutine */ int _starpu_dlaebz_(integer *ijob, integer *nitmax, integer *n, + integer *mmax, integer *minp, integer *nbmin, doublereal *abstol, + doublereal *reltol, doublereal *pivmin, doublereal *d__, doublereal * + e, doublereal *e2, integer *nval, doublereal *ab, doublereal *c__, + integer *mout, integer *nab, doublereal *work, integer *iwork, + integer *info); + +/* Subroutine */ int _starpu_dlaed0_(integer *icompq, integer *qsiz, integer *n, + doublereal *d__, doublereal *e, doublereal *q, integer *ldq, + doublereal *qstore, integer *ldqs, doublereal *work, integer *iwork, + integer *info); + +/* Subroutine */ int _starpu_dlaed1_(integer *n, doublereal *d__, doublereal *q, + integer *ldq, integer *indxq, doublereal *rho, integer *cutpnt, + doublereal *work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_dlaed2_(integer *k, integer *n, integer *n1, doublereal * + d__, doublereal *q, integer *ldq, integer *indxq, doublereal *rho, + doublereal *z__, doublereal *dlamda, doublereal *w, doublereal *q2, + integer *indx, integer *indxc, integer *indxp, integer *coltyp, + integer *info); + +/* Subroutine */ int _starpu_dlaed3_(integer *k, integer *n, integer *n1, doublereal * + d__, doublereal *q, integer *ldq, doublereal *rho, doublereal *dlamda, + doublereal *q2, integer *indx, integer *ctot, doublereal *w, + doublereal *s, integer *info); + +/* Subroutine */ int _starpu_dlaed4_(integer *n, integer *i__, doublereal *d__, + doublereal *z__, doublereal *delta, doublereal *rho, doublereal *dlam, + integer *info); + +/* Subroutine */ int _starpu_dlaed5_(integer *i__, doublereal *d__, doublereal *z__, + doublereal *delta, doublereal *rho, doublereal *dlam); + +/* Subroutine */ int _starpu_dlaed6_(integer *kniter, logical *orgati, doublereal * + rho, doublereal *d__, doublereal *z__, doublereal *finit, doublereal * + tau, integer *info); + +/* Subroutine */ int _starpu_dlaed7_(integer *icompq, integer *n, integer *qsiz, + integer *tlvls, integer *curlvl, integer *curpbm, doublereal *d__, + doublereal *q, integer *ldq, integer *indxq, doublereal *rho, integer + *cutpnt, doublereal *qstore, integer *qptr, integer *prmptr, integer * + perm, integer *givptr, integer *givcol, doublereal *givnum, + doublereal *work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_dlaed8_(integer *icompq, integer *k, integer *n, integer + *qsiz, doublereal *d__, doublereal *q, integer *ldq, integer *indxq, + doublereal *rho, integer *cutpnt, doublereal *z__, doublereal *dlamda, + doublereal *q2, integer *ldq2, doublereal *w, integer *perm, integer + *givptr, integer *givcol, doublereal *givnum, integer *indxp, integer + *indx, integer *info); + +/* Subroutine */ int _starpu_dlaed9_(integer *k, integer *kstart, integer *kstop, + integer *n, doublereal *d__, doublereal *q, integer *ldq, doublereal * + rho, doublereal *dlamda, doublereal *w, doublereal *s, integer *lds, + integer *info); + +/* Subroutine */ int _starpu_dlaeda_(integer *n, integer *tlvls, integer *curlvl, + integer *curpbm, integer *prmptr, integer *perm, integer *givptr, + integer *givcol, doublereal *givnum, doublereal *q, integer *qptr, + doublereal *z__, doublereal *ztemp, integer *info); + +/* Subroutine */ int _starpu_dlaein_(logical *rightv, logical *noinit, integer *n, + doublereal *h__, integer *ldh, doublereal *wr, doublereal *wi, + doublereal *vr, doublereal *vi, doublereal *b, integer *ldb, + doublereal *work, doublereal *eps3, doublereal *smlnum, doublereal * + bignum, integer *info); + +/* Subroutine */ int _starpu_dlaev2_(doublereal *a, doublereal *b, doublereal *c__, + doublereal *rt1, doublereal *rt2, doublereal *cs1, doublereal *sn1); + +/* Subroutine */ int _starpu_dlaexc_(logical *wantq, integer *n, doublereal *t, + integer *ldt, doublereal *q, integer *ldq, integer *j1, integer *n1, + integer *n2, doublereal *work, integer *info); + +/* Subroutine */ int _starpu_dlag2_(doublereal *a, integer *lda, doublereal *b, + integer *ldb, doublereal *safmin, doublereal *scale1, doublereal * + scale2, doublereal *wr1, doublereal *wr2, doublereal *wi); + +/* Subroutine */ int _starpu_dlag2s_(integer *m, integer *n, doublereal *a, integer * + lda, real *sa, integer *ldsa, integer *info); + +/* Subroutine */ int _starpu_dlags2_(logical *upper, doublereal *a1, doublereal *a2, + doublereal *a3, doublereal *b1, doublereal *b2, doublereal *b3, + doublereal *csu, doublereal *snu, doublereal *csv, doublereal *snv, + doublereal *csq, doublereal *snq); + +/* Subroutine */ int _starpu_dlagtf_(integer *n, doublereal *a, doublereal *lambda, + doublereal *b, doublereal *c__, doublereal *tol, doublereal *d__, + integer *in, integer *info); + +/* Subroutine */ int _starpu_dlagtm_(char *trans, integer *n, integer *nrhs, + doublereal *alpha, doublereal *dl, doublereal *d__, doublereal *du, + doublereal *x, integer *ldx, doublereal *beta, doublereal *b, integer + *ldb); + +/* Subroutine */ int _starpu_dlagts_(integer *job, integer *n, doublereal *a, + doublereal *b, doublereal *c__, doublereal *d__, integer *in, + doublereal *y, doublereal *tol, integer *info); + +/* Subroutine */ int _starpu_dlagv2_(doublereal *a, integer *lda, doublereal *b, + integer *ldb, doublereal *alphar, doublereal *alphai, doublereal * + beta, doublereal *csl, doublereal *snl, doublereal *csr, doublereal * + snr); + +/* Subroutine */ int _starpu_dlahqr_(logical *wantt, logical *wantz, integer *n, + integer *ilo, integer *ihi, doublereal *h__, integer *ldh, doublereal + *wr, doublereal *wi, integer *iloz, integer *ihiz, doublereal *z__, + integer *ldz, integer *info); + +/* Subroutine */ int _starpu_dlahr2_(integer *n, integer *k, integer *nb, doublereal * + a, integer *lda, doublereal *tau, doublereal *t, integer *ldt, + doublereal *y, integer *ldy); + +/* Subroutine */ int _starpu_dlahrd_(integer *n, integer *k, integer *nb, doublereal * + a, integer *lda, doublereal *tau, doublereal *t, integer *ldt, + doublereal *y, integer *ldy); + +/* Subroutine */ int _starpu_dlaic1_(integer *job, integer *j, doublereal *x, + doublereal *sest, doublereal *w, doublereal *gamma, doublereal * + sestpr, doublereal *s, doublereal *c__); + +logical _starpu_dlaisnan_(doublereal *din1, doublereal *din2); + +/* Subroutine */ int _starpu_dlaln2_(logical *ltrans, integer *na, integer *nw, + doublereal *smin, doublereal *ca, doublereal *a, integer *lda, + doublereal *d1, doublereal *d2, doublereal *b, integer *ldb, + doublereal *wr, doublereal *wi, doublereal *x, integer *ldx, + doublereal *scale, doublereal *xnorm, integer *info); + +/* Subroutine */ int _starpu_dlals0_(integer *icompq, integer *nl, integer *nr, + integer *sqre, integer *nrhs, doublereal *b, integer *ldb, doublereal + *bx, integer *ldbx, integer *perm, integer *givptr, integer *givcol, + integer *ldgcol, doublereal *givnum, integer *ldgnum, doublereal * + poles, doublereal *difl, doublereal *difr, doublereal *z__, integer * + k, doublereal *c__, doublereal *s, doublereal *work, integer *info); + +/* Subroutine */ int _starpu_dlalsa_(integer *icompq, integer *smlsiz, integer *n, + integer *nrhs, doublereal *b, integer *ldb, doublereal *bx, integer * + ldbx, doublereal *u, integer *ldu, doublereal *vt, integer *k, + doublereal *difl, doublereal *difr, doublereal *z__, doublereal * + poles, integer *givptr, integer *givcol, integer *ldgcol, integer * + perm, doublereal *givnum, doublereal *c__, doublereal *s, doublereal * + work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_dlalsd_(char *uplo, integer *smlsiz, integer *n, integer + *nrhs, doublereal *d__, doublereal *e, doublereal *b, integer *ldb, + doublereal *rcond, integer *rank, doublereal *work, integer *iwork, + integer *info); + +/* Subroutine */ int _starpu_dlamrg_(integer *n1, integer *n2, doublereal *a, integer + *dtrd1, integer *dtrd2, integer *index); + +integer _starpu_dlaneg_(integer *n, doublereal *d__, doublereal *lld, doublereal * + sigma, doublereal *pivmin, integer *r__); + +doublereal _starpu_dlangb_(char *norm, integer *n, integer *kl, integer *ku, + doublereal *ab, integer *ldab, doublereal *work); + +doublereal _starpu_dlange_(char *norm, integer *m, integer *n, doublereal *a, integer + *lda, doublereal *work); + +doublereal _starpu_dlangt_(char *norm, integer *n, doublereal *dl, doublereal *d__, + doublereal *du); + +doublereal _starpu_dlanhs_(char *norm, integer *n, doublereal *a, integer *lda, + doublereal *work); + +doublereal _starpu_dlansb_(char *norm, char *uplo, integer *n, integer *k, doublereal + *ab, integer *ldab, doublereal *work); + +doublereal _starpu_dlansf_(char *norm, char *transr, char *uplo, integer *n, + doublereal *a, doublereal *work); + +doublereal _starpu_dlansp_(char *norm, char *uplo, integer *n, doublereal *ap, + doublereal *work); + +doublereal _starpu_dlanst_(char *norm, integer *n, doublereal *d__, doublereal *e); + +doublereal _starpu_dlansy_(char *norm, char *uplo, integer *n, doublereal *a, integer + *lda, doublereal *work); + +doublereal _starpu_dlantb_(char *norm, char *uplo, char *diag, integer *n, integer *k, + doublereal *ab, integer *ldab, doublereal *work); + +doublereal _starpu_dlantp_(char *norm, char *uplo, char *diag, integer *n, doublereal + *ap, doublereal *work); + +doublereal _starpu_dlantr_(char *norm, char *uplo, char *diag, integer *m, integer *n, + doublereal *a, integer *lda, doublereal *work); + +/* Subroutine */ int _starpu_dlanv2_(doublereal *a, doublereal *b, doublereal *c__, + doublereal *d__, doublereal *rt1r, doublereal *rt1i, doublereal *rt2r, + doublereal *rt2i, doublereal *cs, doublereal *sn); + +/* Subroutine */ int _starpu_dlapll_(integer *n, doublereal *x, integer *incx, + doublereal *y, integer *incy, doublereal *ssmin); + +/* Subroutine */ int _starpu_dlapmt_(logical *forwrd, integer *m, integer *n, + doublereal *x, integer *ldx, integer *k); + +doublereal _starpu_dlapy2_(doublereal *x, doublereal *y); + +doublereal _starpu_dlapy3_(doublereal *x, doublereal *y, doublereal *z__); + +/* Subroutine */ int _starpu_dlaqgb_(integer *m, integer *n, integer *kl, integer *ku, + doublereal *ab, integer *ldab, doublereal *r__, doublereal *c__, + doublereal *rowcnd, doublereal *colcnd, doublereal *amax, char *equed); + +/* Subroutine */ int _starpu_dlaqge_(integer *m, integer *n, doublereal *a, integer * + lda, doublereal *r__, doublereal *c__, doublereal *rowcnd, doublereal + *colcnd, doublereal *amax, char *equed); + +/* Subroutine */ int _starpu_dlaqp2_(integer *m, integer *n, integer *offset, + doublereal *a, integer *lda, integer *jpvt, doublereal *tau, + doublereal *vn1, doublereal *vn2, doublereal *work); + +/* Subroutine */ int _starpu_dlaqps_(integer *m, integer *n, integer *offset, integer + *nb, integer *kb, doublereal *a, integer *lda, integer *jpvt, + doublereal *tau, doublereal *vn1, doublereal *vn2, doublereal *auxv, + doublereal *f, integer *ldf); + +/* Subroutine */ int _starpu_dlaqr0_(logical *wantt, logical *wantz, integer *n, + integer *ilo, integer *ihi, doublereal *h__, integer *ldh, doublereal + *wr, doublereal *wi, integer *iloz, integer *ihiz, doublereal *z__, + integer *ldz, doublereal *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_dlaqr1_(integer *n, doublereal *h__, integer *ldh, + doublereal *sr1, doublereal *si1, doublereal *sr2, doublereal *si2, + doublereal *v); + +/* Subroutine */ int _starpu_dlaqr2_(logical *wantt, logical *wantz, integer *n, + integer *ktop, integer *kbot, integer *nw, doublereal *h__, integer * + ldh, integer *iloz, integer *ihiz, doublereal *z__, integer *ldz, + integer *ns, integer *nd, doublereal *sr, doublereal *si, doublereal * + v, integer *ldv, integer *nh, doublereal *t, integer *ldt, integer * + nv, doublereal *wv, integer *ldwv, doublereal *work, integer *lwork); + +/* Subroutine */ int _starpu_dlaqr3_(logical *wantt, logical *wantz, integer *n, + integer *ktop, integer *kbot, integer *nw, doublereal *h__, integer * + ldh, integer *iloz, integer *ihiz, doublereal *z__, integer *ldz, + integer *ns, integer *nd, doublereal *sr, doublereal *si, doublereal * + v, integer *ldv, integer *nh, doublereal *t, integer *ldt, integer * + nv, doublereal *wv, integer *ldwv, doublereal *work, integer *lwork); + +/* Subroutine */ int _starpu_dlaqr4_(logical *wantt, logical *wantz, integer *n, + integer *ilo, integer *ihi, doublereal *h__, integer *ldh, doublereal + *wr, doublereal *wi, integer *iloz, integer *ihiz, doublereal *z__, + integer *ldz, doublereal *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_dlaqr5_(logical *wantt, logical *wantz, integer *kacc22, + integer *n, integer *ktop, integer *kbot, integer *nshfts, doublereal + *sr, doublereal *si, doublereal *h__, integer *ldh, integer *iloz, + integer *ihiz, doublereal *z__, integer *ldz, doublereal *v, integer * + ldv, doublereal *u, integer *ldu, integer *nv, doublereal *wv, + integer *ldwv, integer *nh, doublereal *wh, integer *ldwh); + +/* Subroutine */ int _starpu_dlaqsb_(char *uplo, integer *n, integer *kd, doublereal * + ab, integer *ldab, doublereal *s, doublereal *scond, doublereal *amax, + char *equed); + +/* Subroutine */ int _starpu_dlaqsp_(char *uplo, integer *n, doublereal *ap, + doublereal *s, doublereal *scond, doublereal *amax, char *equed); + +/* Subroutine */ int _starpu_dlaqsy_(char *uplo, integer *n, doublereal *a, integer * + lda, doublereal *s, doublereal *scond, doublereal *amax, char *equed); + +/* Subroutine */ int _starpu_dlaqtr_(logical *ltran, logical *lreal, integer *n, + doublereal *t, integer *ldt, doublereal *b, doublereal *w, doublereal + *scale, doublereal *x, doublereal *work, integer *info); + +/* Subroutine */ int _starpu_dlar1v_(integer *n, integer *b1, integer *bn, doublereal + *lambda, doublereal *d__, doublereal *l, doublereal *ld, doublereal * + lld, doublereal *pivmin, doublereal *gaptol, doublereal *z__, logical + *wantnc, integer *negcnt, doublereal *ztz, doublereal *mingma, + integer *r__, integer *isuppz, doublereal *nrminv, doublereal *resid, + doublereal *rqcorr, doublereal *work); + +/* Subroutine */ int _starpu_dlar2v_(integer *n, doublereal *x, doublereal *y, + doublereal *z__, integer *incx, doublereal *c__, doublereal *s, + integer *incc); + +/* Subroutine */ int _starpu_dlarf_(char *side, integer *m, integer *n, doublereal *v, + integer *incv, doublereal *tau, doublereal *c__, integer *ldc, + doublereal *work); + +/* Subroutine */ int _starpu_dlarfb_(char *side, char *trans, char *direct, char * + storev, integer *m, integer *n, integer *k, doublereal *v, integer * + ldv, doublereal *t, integer *ldt, doublereal *c__, integer *ldc, + doublereal *work, integer *ldwork); + +/* Subroutine */ int _starpu_dlarfg_(integer *n, doublereal *alpha, doublereal *x, + integer *incx, doublereal *tau); + +/* Subroutine */ int _starpu_dlarfp_(integer *n, doublereal *alpha, doublereal *x, + integer *incx, doublereal *tau); + +/* Subroutine */ int _starpu_dlarft_(char *direct, char *storev, integer *n, integer * + k, doublereal *v, integer *ldv, doublereal *tau, doublereal *t, + integer *ldt); + +/* Subroutine */ int _starpu_dlarfx_(char *side, integer *m, integer *n, doublereal * + v, doublereal *tau, doublereal *c__, integer *ldc, doublereal *work); + +/* Subroutine */ int _starpu_dlargv_(integer *n, doublereal *x, integer *incx, + doublereal *y, integer *incy, doublereal *c__, integer *incc); + +/* Subroutine */ int _starpu_dlarnv_(integer *idist, integer *iseed, integer *n, + doublereal *x); + +/* Subroutine */ int _starpu_dlarra_(integer *n, doublereal *d__, doublereal *e, + doublereal *e2, doublereal *spltol, doublereal *tnrm, integer *nsplit, + integer *isplit, integer *info); + +/* Subroutine */ int _starpu_dlarrb_(integer *n, doublereal *d__, doublereal *lld, + integer *ifirst, integer *ilast, doublereal *rtol1, doublereal *rtol2, + integer *offset, doublereal *w, doublereal *wgap, doublereal *werr, + doublereal *work, integer *iwork, doublereal *pivmin, doublereal * + spdiam, integer *twist, integer *info); + +/* Subroutine */ int _starpu_dlarrc_(char *jobt, integer *n, doublereal *vl, + doublereal *vu, doublereal *d__, doublereal *e, doublereal *pivmin, + integer *eigcnt, integer *lcnt, integer *rcnt, integer *info); + +/* Subroutine */ int _starpu_dlarrd_(char *range, char *order, integer *n, doublereal + *vl, doublereal *vu, integer *il, integer *iu, doublereal *gers, + doublereal *reltol, doublereal *d__, doublereal *e, doublereal *e2, + doublereal *pivmin, integer *nsplit, integer *isplit, integer *m, + doublereal *w, doublereal *werr, doublereal *wl, doublereal *wu, + integer *iblock, integer *indexw, doublereal *work, integer *iwork, + integer *info); + +/* Subroutine */ int _starpu_dlarre_(char *range, integer *n, doublereal *vl, + doublereal *vu, integer *il, integer *iu, doublereal *d__, doublereal + *e, doublereal *e2, doublereal *rtol1, doublereal *rtol2, doublereal * + spltol, integer *nsplit, integer *isplit, integer *m, doublereal *w, + doublereal *werr, doublereal *wgap, integer *iblock, integer *indexw, + doublereal *gers, doublereal *pivmin, doublereal *work, integer * + iwork, integer *info); + +/* Subroutine */ int _starpu_dlarrf_(integer *n, doublereal *d__, doublereal *l, + doublereal *ld, integer *clstrt, integer *clend, doublereal *w, + doublereal *wgap, doublereal *werr, doublereal *spdiam, doublereal * + clgapl, doublereal *clgapr, doublereal *pivmin, doublereal *sigma, + doublereal *dplus, doublereal *lplus, doublereal *work, integer *info); + +/* Subroutine */ int _starpu_dlarrj_(integer *n, doublereal *d__, doublereal *e2, + integer *ifirst, integer *ilast, doublereal *rtol, integer *offset, + doublereal *w, doublereal *werr, doublereal *work, integer *iwork, + doublereal *pivmin, doublereal *spdiam, integer *info); + +/* Subroutine */ int _starpu_dlarrk_(integer *n, integer *iw, doublereal *gl, + doublereal *gu, doublereal *d__, doublereal *e2, doublereal *pivmin, + doublereal *reltol, doublereal *w, doublereal *werr, integer *info); + +/* Subroutine */ int _starpu_dlarrr_(integer *n, doublereal *d__, doublereal *e, + integer *info); + +/* Subroutine */ int _starpu_dlarrv_(integer *n, doublereal *vl, doublereal *vu, + doublereal *d__, doublereal *l, doublereal *pivmin, integer *isplit, + integer *m, integer *dol, integer *dou, doublereal *minrgp, + doublereal *rtol1, doublereal *rtol2, doublereal *w, doublereal *werr, + doublereal *wgap, integer *iblock, integer *indexw, doublereal *gers, + doublereal *z__, integer *ldz, integer *isuppz, doublereal *work, + integer *iwork, integer *info); + +/* Subroutine */ int _starpu_dlarscl2_(integer *m, integer *n, doublereal *d__, + doublereal *x, integer *ldx); + +/* Subroutine */ int _starpu_dlartg_(doublereal *f, doublereal *g, doublereal *cs, + doublereal *sn, doublereal *r__); + +/* Subroutine */ int _starpu_dlartv_(integer *n, doublereal *x, integer *incx, + doublereal *y, integer *incy, doublereal *c__, doublereal *s, integer + *incc); + +/* Subroutine */ int _starpu_dlaruv_(integer *iseed, integer *n, doublereal *x); + +/* Subroutine */ int _starpu_dlarz_(char *side, integer *m, integer *n, integer *l, + doublereal *v, integer *incv, doublereal *tau, doublereal *c__, + integer *ldc, doublereal *work); + +/* Subroutine */ int _starpu_dlarzb_(char *side, char *trans, char *direct, char * + storev, integer *m, integer *n, integer *k, integer *l, doublereal *v, + integer *ldv, doublereal *t, integer *ldt, doublereal *c__, integer * + ldc, doublereal *work, integer *ldwork); + +/* Subroutine */ int _starpu_dlarzt_(char *direct, char *storev, integer *n, integer * + k, doublereal *v, integer *ldv, doublereal *tau, doublereal *t, + integer *ldt); + +/* Subroutine */ int _starpu_dlas2_(doublereal *f, doublereal *g, doublereal *h__, + doublereal *ssmin, doublereal *ssmax); + +/* Subroutine */ int _starpu_dlascl_(char *type__, integer *kl, integer *ku, + doublereal *cfrom, doublereal *cto, integer *m, integer *n, + doublereal *a, integer *lda, integer *info); + +/* Subroutine */ int _starpu_dlascl2_(integer *m, integer *n, doublereal *d__, + doublereal *x, integer *ldx); + +/* Subroutine */ int _starpu_dlasd0_(integer *n, integer *sqre, doublereal *d__, + doublereal *e, doublereal *u, integer *ldu, doublereal *vt, integer * + ldvt, integer *smlsiz, integer *iwork, doublereal *work, integer * + info); + +/* Subroutine */ int _starpu_dlasd1_(integer *nl, integer *nr, integer *sqre, + doublereal *d__, doublereal *alpha, doublereal *beta, doublereal *u, + integer *ldu, doublereal *vt, integer *ldvt, integer *idxq, integer * + iwork, doublereal *work, integer *info); + +/* Subroutine */ int _starpu_dlasd2_(integer *nl, integer *nr, integer *sqre, integer + *k, doublereal *d__, doublereal *z__, doublereal *alpha, doublereal * + beta, doublereal *u, integer *ldu, doublereal *vt, integer *ldvt, + doublereal *dsigma, doublereal *u2, integer *ldu2, doublereal *vt2, + integer *ldvt2, integer *idxp, integer *idx, integer *idxc, integer * + idxq, integer *coltyp, integer *info); + +/* Subroutine */ int _starpu_dlasd3_(integer *nl, integer *nr, integer *sqre, integer + *k, doublereal *d__, doublereal *q, integer *ldq, doublereal *dsigma, + doublereal *u, integer *ldu, doublereal *u2, integer *ldu2, + doublereal *vt, integer *ldvt, doublereal *vt2, integer *ldvt2, + integer *idxc, integer *ctot, doublereal *z__, integer *info); + +/* Subroutine */ int _starpu_dlasd4_(integer *n, integer *i__, doublereal *d__, + doublereal *z__, doublereal *delta, doublereal *rho, doublereal * + sigma, doublereal *work, integer *info); + +/* Subroutine */ int _starpu_dlasd5_(integer *i__, doublereal *d__, doublereal *z__, + doublereal *delta, doublereal *rho, doublereal *dsigma, doublereal * + work); + +/* Subroutine */ int _starpu_dlasd6_(integer *icompq, integer *nl, integer *nr, + integer *sqre, doublereal *d__, doublereal *vf, doublereal *vl, + doublereal *alpha, doublereal *beta, integer *idxq, integer *perm, + integer *givptr, integer *givcol, integer *ldgcol, doublereal *givnum, + integer *ldgnum, doublereal *poles, doublereal *difl, doublereal * + difr, doublereal *z__, integer *k, doublereal *c__, doublereal *s, + doublereal *work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_dlasd7_(integer *icompq, integer *nl, integer *nr, + integer *sqre, integer *k, doublereal *d__, doublereal *z__, + doublereal *zw, doublereal *vf, doublereal *vfw, doublereal *vl, + doublereal *vlw, doublereal *alpha, doublereal *beta, doublereal * + dsigma, integer *idx, integer *idxp, integer *idxq, integer *perm, + integer *givptr, integer *givcol, integer *ldgcol, doublereal *givnum, + integer *ldgnum, doublereal *c__, doublereal *s, integer *info); + +/* Subroutine */ int _starpu_dlasd8_(integer *icompq, integer *k, doublereal *d__, + doublereal *z__, doublereal *vf, doublereal *vl, doublereal *difl, + doublereal *difr, integer *lddifr, doublereal *dsigma, doublereal * + work, integer *info); + +/* Subroutine */ int _starpu_dlasda_(integer *icompq, integer *smlsiz, integer *n, + integer *sqre, doublereal *d__, doublereal *e, doublereal *u, integer + *ldu, doublereal *vt, integer *k, doublereal *difl, doublereal *difr, + doublereal *z__, doublereal *poles, integer *givptr, integer *givcol, + integer *ldgcol, integer *perm, doublereal *givnum, doublereal *c__, + doublereal *s, doublereal *work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_dlasdq_(char *uplo, integer *sqre, integer *n, integer * + ncvt, integer *nru, integer *ncc, doublereal *d__, doublereal *e, + doublereal *vt, integer *ldvt, doublereal *u, integer *ldu, + doublereal *c__, integer *ldc, doublereal *work, integer *info); + +/* Subroutine */ int _starpu_dlasdt_(integer *n, integer *lvl, integer *nd, integer * + inode, integer *ndiml, integer *ndimr, integer *msub); + +/* Subroutine */ int _starpu_dlaset_(char *uplo, integer *m, integer *n, doublereal * + alpha, doublereal *beta, doublereal *a, integer *lda); + +/* Subroutine */ int _starpu_dlasq1_(integer *n, doublereal *d__, doublereal *e, + doublereal *work, integer *info); + +/* Subroutine */ int _starpu_dlasq2_(integer *n, doublereal *z__, integer *info); + +/* Subroutine */ int _starpu_dlasq3_(integer *i0, integer *n0, doublereal *z__, + integer *pp, doublereal *dmin__, doublereal *sigma, doublereal *desig, + doublereal *qmax, integer *nfail, integer *iter, integer *ndiv, + logical *ieee, integer *ttype, doublereal *dmin1, doublereal *dmin2, + doublereal *dn, doublereal *dn1, doublereal *dn2, doublereal *g, + doublereal *tau); + +/* Subroutine */ int _starpu_dlasq4_(integer *i0, integer *n0, doublereal *z__, + integer *pp, integer *n0in, doublereal *dmin__, doublereal *dmin1, + doublereal *dmin2, doublereal *dn, doublereal *dn1, doublereal *dn2, + doublereal *tau, integer *ttype, doublereal *g); + +/* Subroutine */ int _starpu_dlasq5_(integer *i0, integer *n0, doublereal *z__, + integer *pp, doublereal *tau, doublereal *dmin__, doublereal *dmin1, + doublereal *dmin2, doublereal *dn, doublereal *dnm1, doublereal *dnm2, + logical *ieee); + +/* Subroutine */ int _starpu_dlasq6_(integer *i0, integer *n0, doublereal *z__, + integer *pp, doublereal *dmin__, doublereal *dmin1, doublereal *dmin2, + doublereal *dn, doublereal *dnm1, doublereal *dnm2); + +/* Subroutine */ int _starpu_dlasr_(char *side, char *pivot, char *direct, integer *m, + integer *n, doublereal *c__, doublereal *s, doublereal *a, integer * + lda); + +/* Subroutine */ int _starpu_dlasrt_(char *id, integer *n, doublereal *d__, integer * + info); + +/* Subroutine */ int _starpu_dlassq_(integer *n, doublereal *x, integer *incx, + doublereal *scale, doublereal *sumsq); + +/* Subroutine */ int _starpu_dlasv2_(doublereal *f, doublereal *g, doublereal *h__, + doublereal *ssmin, doublereal *ssmax, doublereal *snr, doublereal * + csr, doublereal *snl, doublereal *csl); + +/* Subroutine */ int _starpu_dlaswp_(integer *n, doublereal *a, integer *lda, integer + *k1, integer *k2, integer *ipiv, integer *incx); + +/* Subroutine */ int _starpu_dlasy2_(logical *ltranl, logical *ltranr, integer *isgn, + integer *n1, integer *n2, doublereal *tl, integer *ldtl, doublereal * + tr, integer *ldtr, doublereal *b, integer *ldb, doublereal *scale, + doublereal *x, integer *ldx, doublereal *xnorm, integer *info); + +/* Subroutine */ int _starpu_dlasyf_(char *uplo, integer *n, integer *nb, integer *kb, + doublereal *a, integer *lda, integer *ipiv, doublereal *w, integer * + ldw, integer *info); + +/* Subroutine */ int _starpu_dlat2s_(char *uplo, integer *n, doublereal *a, integer * + lda, real *sa, integer *ldsa, integer *info); + +/* Subroutine */ int _starpu_dlatbs_(char *uplo, char *trans, char *diag, char * + normin, integer *n, integer *kd, doublereal *ab, integer *ldab, + doublereal *x, doublereal *scale, doublereal *cnorm, integer *info); + +/* Subroutine */ int _starpu_dlatdf_(integer *ijob, integer *n, doublereal *z__, + integer *ldz, doublereal *rhs, doublereal *rdsum, doublereal *rdscal, + integer *ipiv, integer *jpiv); + +/* Subroutine */ int _starpu_dlatps_(char *uplo, char *trans, char *diag, char * + normin, integer *n, doublereal *ap, doublereal *x, doublereal *scale, + doublereal *cnorm, integer *info); + +/* Subroutine */ int _starpu_dlatrd_(char *uplo, integer *n, integer *nb, doublereal * + a, integer *lda, doublereal *e, doublereal *tau, doublereal *w, + integer *ldw); + +/* Subroutine */ int _starpu_dlatrs_(char *uplo, char *trans, char *diag, char * + normin, integer *n, doublereal *a, integer *lda, doublereal *x, + doublereal *scale, doublereal *cnorm, integer *info); + +/* Subroutine */ int _starpu_dlatrz_(integer *m, integer *n, integer *l, doublereal * + a, integer *lda, doublereal *tau, doublereal *work); + +/* Subroutine */ int _starpu_dlatzm_(char *side, integer *m, integer *n, doublereal * + v, integer *incv, doublereal *tau, doublereal *c1, doublereal *c2, + integer *ldc, doublereal *work); + +/* Subroutine */ int _starpu_dlauu2_(char *uplo, integer *n, doublereal *a, integer * + lda, integer *info); + +/* Subroutine */ int _starpu_dlauum_(char *uplo, integer *n, doublereal *a, integer * + lda, integer *info); + +/* Subroutine */ int _starpu_dopgtr_(char *uplo, integer *n, doublereal *ap, + doublereal *tau, doublereal *q, integer *ldq, doublereal *work, + integer *info); + +/* Subroutine */ int _starpu_dopmtr_(char *side, char *uplo, char *trans, integer *m, + integer *n, doublereal *ap, doublereal *tau, doublereal *c__, integer + *ldc, doublereal *work, integer *info); + +/* Subroutine */ int _starpu_dorg2l_(integer *m, integer *n, integer *k, doublereal * + a, integer *lda, doublereal *tau, doublereal *work, integer *info); + +/* Subroutine */ int _starpu_dorg2r_(integer *m, integer *n, integer *k, doublereal * + a, integer *lda, doublereal *tau, doublereal *work, integer *info); + +/* Subroutine */ int _starpu_dorgbr_(char *vect, integer *m, integer *n, integer *k, + doublereal *a, integer *lda, doublereal *tau, doublereal *work, + integer *lwork, integer *info); + +/* Subroutine */ int _starpu_dorghr_(integer *n, integer *ilo, integer *ihi, + doublereal *a, integer *lda, doublereal *tau, doublereal *work, + integer *lwork, integer *info); + +/* Subroutine */ int _starpu_dorgl2_(integer *m, integer *n, integer *k, doublereal * + a, integer *lda, doublereal *tau, doublereal *work, integer *info); + +/* Subroutine */ int _starpu_dorglq_(integer *m, integer *n, integer *k, doublereal * + a, integer *lda, doublereal *tau, doublereal *work, integer *lwork, + integer *info); + +/* Subroutine */ int _starpu_dorgql_(integer *m, integer *n, integer *k, doublereal * + a, integer *lda, doublereal *tau, doublereal *work, integer *lwork, + integer *info); + +/* Subroutine */ int _starpu_dorgqr_(integer *m, integer *n, integer *k, doublereal * + a, integer *lda, doublereal *tau, doublereal *work, integer *lwork, + integer *info); + +/* Subroutine */ int _starpu_dorgr2_(integer *m, integer *n, integer *k, doublereal * + a, integer *lda, doublereal *tau, doublereal *work, integer *info); + +/* Subroutine */ int _starpu_dorgrq_(integer *m, integer *n, integer *k, doublereal * + a, integer *lda, doublereal *tau, doublereal *work, integer *lwork, + integer *info); + +/* Subroutine */ int _starpu_dorgtr_(char *uplo, integer *n, doublereal *a, integer * + lda, doublereal *tau, doublereal *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_dorm2l_(char *side, char *trans, integer *m, integer *n, + integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal * + c__, integer *ldc, doublereal *work, integer *info); + +/* Subroutine */ int _starpu_dorm2r_(char *side, char *trans, integer *m, integer *n, + integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal * + c__, integer *ldc, doublereal *work, integer *info); + +/* Subroutine */ int _starpu_dormbr_(char *vect, char *side, char *trans, integer *m, + integer *n, integer *k, doublereal *a, integer *lda, doublereal *tau, + doublereal *c__, integer *ldc, doublereal *work, integer *lwork, + integer *info); + +/* Subroutine */ int _starpu_dormhr_(char *side, char *trans, integer *m, integer *n, + integer *ilo, integer *ihi, doublereal *a, integer *lda, doublereal * + tau, doublereal *c__, integer *ldc, doublereal *work, integer *lwork, + integer *info); + +/* Subroutine */ int _starpu_dorml2_(char *side, char *trans, integer *m, integer *n, + integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal * + c__, integer *ldc, doublereal *work, integer *info); + +/* Subroutine */ int _starpu_dormlq_(char *side, char *trans, integer *m, integer *n, + integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal * + c__, integer *ldc, doublereal *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_dormql_(char *side, char *trans, integer *m, integer *n, + integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal * + c__, integer *ldc, doublereal *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_dormqr_(char *side, char *trans, integer *m, integer *n, + integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal * + c__, integer *ldc, doublereal *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_dormr2_(char *side, char *trans, integer *m, integer *n, + integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal * + c__, integer *ldc, doublereal *work, integer *info); + +/* Subroutine */ int _starpu_dormr3_(char *side, char *trans, integer *m, integer *n, + integer *k, integer *l, doublereal *a, integer *lda, doublereal *tau, + doublereal *c__, integer *ldc, doublereal *work, integer *info); + +/* Subroutine */ int _starpu_dormrq_(char *side, char *trans, integer *m, integer *n, + integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal * + c__, integer *ldc, doublereal *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_dormrz_(char *side, char *trans, integer *m, integer *n, + integer *k, integer *l, doublereal *a, integer *lda, doublereal *tau, + doublereal *c__, integer *ldc, doublereal *work, integer *lwork, + integer *info); + +/* Subroutine */ int _starpu_dormtr_(char *side, char *uplo, char *trans, integer *m, + integer *n, doublereal *a, integer *lda, doublereal *tau, doublereal * + c__, integer *ldc, doublereal *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_dpbcon_(char *uplo, integer *n, integer *kd, doublereal * + ab, integer *ldab, doublereal *anorm, doublereal *rcond, doublereal * + work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_dpbequ_(char *uplo, integer *n, integer *kd, doublereal * + ab, integer *ldab, doublereal *s, doublereal *scond, doublereal *amax, + integer *info); + +/* Subroutine */ int _starpu_dpbrfs_(char *uplo, integer *n, integer *kd, integer * + nrhs, doublereal *ab, integer *ldab, doublereal *afb, integer *ldafb, + doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal * + ferr, doublereal *berr, doublereal *work, integer *iwork, integer * + info); + +/* Subroutine */ int _starpu_dpbstf_(char *uplo, integer *n, integer *kd, doublereal * + ab, integer *ldab, integer *info); + +/* Subroutine */ int _starpu_dpbsv_(char *uplo, integer *n, integer *kd, integer * + nrhs, doublereal *ab, integer *ldab, doublereal *b, integer *ldb, + integer *info); + +/* Subroutine */ int _starpu_dpbsvx_(char *fact, char *uplo, integer *n, integer *kd, + integer *nrhs, doublereal *ab, integer *ldab, doublereal *afb, + integer *ldafb, char *equed, doublereal *s, doublereal *b, integer * + ldb, doublereal *x, integer *ldx, doublereal *rcond, doublereal *ferr, + doublereal *berr, doublereal *work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_dpbtf2_(char *uplo, integer *n, integer *kd, doublereal * + ab, integer *ldab, integer *info); + +/* Subroutine */ int _starpu_dpbtrf_(char *uplo, integer *n, integer *kd, doublereal * + ab, integer *ldab, integer *info); + +/* Subroutine */ int _starpu_dpbtrs_(char *uplo, integer *n, integer *kd, integer * + nrhs, doublereal *ab, integer *ldab, doublereal *b, integer *ldb, + integer *info); + +/* Subroutine */ int _starpu_dpftrf_(char *transr, char *uplo, integer *n, doublereal + *a, integer *info); + +/* Subroutine */ int _starpu_dpftri_(char *transr, char *uplo, integer *n, doublereal + *a, integer *info); + +/* Subroutine */ int _starpu_dpftrs_(char *transr, char *uplo, integer *n, integer * + nrhs, doublereal *a, doublereal *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_dpocon_(char *uplo, integer *n, doublereal *a, integer * + lda, doublereal *anorm, doublereal *rcond, doublereal *work, integer * + iwork, integer *info); + +/* Subroutine */ int _starpu_dpoequ_(integer *n, doublereal *a, integer *lda, + doublereal *s, doublereal *scond, doublereal *amax, integer *info); + +/* Subroutine */ int _starpu_dpoequb_(integer *n, doublereal *a, integer *lda, + doublereal *s, doublereal *scond, doublereal *amax, integer *info); + +/* Subroutine */ int _starpu_dporfs_(char *uplo, integer *n, integer *nrhs, + doublereal *a, integer *lda, doublereal *af, integer *ldaf, + doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal * + ferr, doublereal *berr, doublereal *work, integer *iwork, integer * + info); + +/* Subroutine */ int _starpu_dporfsx_(char *uplo, char *equed, integer *n, integer * + nrhs, doublereal *a, integer *lda, doublereal *af, integer *ldaf, + doublereal *s, doublereal *b, integer *ldb, doublereal *x, integer * + ldx, doublereal *rcond, doublereal *berr, integer *n_err_bnds__, + doublereal *err_bnds_norm__, doublereal *err_bnds_comp__, integer * + nparams, doublereal *params, doublereal *work, integer *iwork, + integer *info); + +/* Subroutine */ int _starpu_dposv_(char *uplo, integer *n, integer *nrhs, doublereal + *a, integer *lda, doublereal *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_dposvx_(char *fact, char *uplo, integer *n, integer * + nrhs, doublereal *a, integer *lda, doublereal *af, integer *ldaf, + char *equed, doublereal *s, doublereal *b, integer *ldb, doublereal * + x, integer *ldx, doublereal *rcond, doublereal *ferr, doublereal * + berr, doublereal *work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_dposvxx_(char *fact, char *uplo, integer *n, integer * + nrhs, doublereal *a, integer *lda, doublereal *af, integer *ldaf, + char *equed, doublereal *s, doublereal *b, integer *ldb, doublereal * + x, integer *ldx, doublereal *rcond, doublereal *rpvgrw, doublereal * + berr, integer *n_err_bnds__, doublereal *err_bnds_norm__, doublereal * + err_bnds_comp__, integer *nparams, doublereal *params, doublereal * + work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_dpotf2_(char *uplo, integer *n, doublereal *a, integer * + lda, integer *info); + +/* Subroutine */ int _starpu_dpotrf_(char *uplo, integer *n, doublereal *a, integer * + lda, integer *info); + +/* Subroutine */ int _starpu_dpotri_(char *uplo, integer *n, doublereal *a, integer * + lda, integer *info); + +/* Subroutine */ int _starpu_dpotrs_(char *uplo, integer *n, integer *nrhs, + doublereal *a, integer *lda, doublereal *b, integer *ldb, integer * + info); + +/* Subroutine */ int _starpu_dppcon_(char *uplo, integer *n, doublereal *ap, + doublereal *anorm, doublereal *rcond, doublereal *work, integer * + iwork, integer *info); + +/* Subroutine */ int _starpu_dppequ_(char *uplo, integer *n, doublereal *ap, + doublereal *s, doublereal *scond, doublereal *amax, integer *info); + +/* Subroutine */ int _starpu_dpprfs_(char *uplo, integer *n, integer *nrhs, + doublereal *ap, doublereal *afp, doublereal *b, integer *ldb, + doublereal *x, integer *ldx, doublereal *ferr, doublereal *berr, + doublereal *work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_dppsv_(char *uplo, integer *n, integer *nrhs, doublereal + *ap, doublereal *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_dppsvx_(char *fact, char *uplo, integer *n, integer * + nrhs, doublereal *ap, doublereal *afp, char *equed, doublereal *s, + doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal * + rcond, doublereal *ferr, doublereal *berr, doublereal *work, integer * + iwork, integer *info); + +/* Subroutine */ int _starpu_dpptrf_(char *uplo, integer *n, doublereal *ap, integer * + info); + +/* Subroutine */ int _starpu_dpptri_(char *uplo, integer *n, doublereal *ap, integer * + info); + +/* Subroutine */ int _starpu_dpptrs_(char *uplo, integer *n, integer *nrhs, + doublereal *ap, doublereal *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_dpstf2_(char *uplo, integer *n, doublereal *a, integer * + lda, integer *piv, integer *rank, doublereal *tol, doublereal *work, + integer *info); + +/* Subroutine */ int _starpu_dpstrf_(char *uplo, integer *n, doublereal *a, integer * + lda, integer *piv, integer *rank, doublereal *tol, doublereal *work, + integer *info); + +/* Subroutine */ int _starpu_dptcon_(integer *n, doublereal *d__, doublereal *e, + doublereal *anorm, doublereal *rcond, doublereal *work, integer *info); + +/* Subroutine */ int _starpu_dpteqr_(char *compz, integer *n, doublereal *d__, + doublereal *e, doublereal *z__, integer *ldz, doublereal *work, + integer *info); + +/* Subroutine */ int _starpu_dptrfs_(integer *n, integer *nrhs, doublereal *d__, + doublereal *e, doublereal *df, doublereal *ef, doublereal *b, integer + *ldb, doublereal *x, integer *ldx, doublereal *ferr, doublereal *berr, + doublereal *work, integer *info); + +/* Subroutine */ int _starpu_dptsv_(integer *n, integer *nrhs, doublereal *d__, + doublereal *e, doublereal *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_dptsvx_(char *fact, integer *n, integer *nrhs, + doublereal *d__, doublereal *e, doublereal *df, doublereal *ef, + doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal * + rcond, doublereal *ferr, doublereal *berr, doublereal *work, integer * + info); + +/* Subroutine */ int _starpu_dpttrf_(integer *n, doublereal *d__, doublereal *e, + integer *info); + +/* Subroutine */ int _starpu_dpttrs_(integer *n, integer *nrhs, doublereal *d__, + doublereal *e, doublereal *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_dptts2_(integer *n, integer *nrhs, doublereal *d__, + doublereal *e, doublereal *b, integer *ldb); + +/* Subroutine */ int _starpu_drscl_(integer *n, doublereal *sa, doublereal *sx, + integer *incx); + +/* Subroutine */ int _starpu_dsbev_(char *jobz, char *uplo, integer *n, integer *kd, + doublereal *ab, integer *ldab, doublereal *w, doublereal *z__, + integer *ldz, doublereal *work, integer *info); + +/* Subroutine */ int _starpu_dsbevd_(char *jobz, char *uplo, integer *n, integer *kd, + doublereal *ab, integer *ldab, doublereal *w, doublereal *z__, + integer *ldz, doublereal *work, integer *lwork, integer *iwork, + integer *liwork, integer *info); + +/* Subroutine */ int _starpu_dsbevx_(char *jobz, char *range, char *uplo, integer *n, + integer *kd, doublereal *ab, integer *ldab, doublereal *q, integer * + ldq, doublereal *vl, doublereal *vu, integer *il, integer *iu, + doublereal *abstol, integer *m, doublereal *w, doublereal *z__, + integer *ldz, doublereal *work, integer *iwork, integer *ifail, + integer *info); + +/* Subroutine */ int _starpu_dsbgst_(char *vect, char *uplo, integer *n, integer *ka, + integer *kb, doublereal *ab, integer *ldab, doublereal *bb, integer * + ldbb, doublereal *x, integer *ldx, doublereal *work, integer *info); + +/* Subroutine */ int _starpu_dsbgv_(char *jobz, char *uplo, integer *n, integer *ka, + integer *kb, doublereal *ab, integer *ldab, doublereal *bb, integer * + ldbb, doublereal *w, doublereal *z__, integer *ldz, doublereal *work, + integer *info); + +/* Subroutine */ int _starpu_dsbgvd_(char *jobz, char *uplo, integer *n, integer *ka, + integer *kb, doublereal *ab, integer *ldab, doublereal *bb, integer * + ldbb, doublereal *w, doublereal *z__, integer *ldz, doublereal *work, + integer *lwork, integer *iwork, integer *liwork, integer *info); + +/* Subroutine */ int _starpu_dsbgvx_(char *jobz, char *range, char *uplo, integer *n, + integer *ka, integer *kb, doublereal *ab, integer *ldab, doublereal * + bb, integer *ldbb, doublereal *q, integer *ldq, doublereal *vl, + doublereal *vu, integer *il, integer *iu, doublereal *abstol, integer + *m, doublereal *w, doublereal *z__, integer *ldz, doublereal *work, + integer *iwork, integer *ifail, integer *info); + +/* Subroutine */ int _starpu_dsbtrd_(char *vect, char *uplo, integer *n, integer *kd, + doublereal *ab, integer *ldab, doublereal *d__, doublereal *e, + doublereal *q, integer *ldq, doublereal *work, integer *info); + +/* Subroutine */ int _starpu_dsfrk_(char *transr, char *uplo, char *trans, integer *n, + integer *k, doublereal *alpha, doublereal *a, integer *lda, + doublereal *beta, doublereal *c__); + +/* Subroutine */ int _starpu__starpu_dsgesv_(integer *n, integer *nrhs, doublereal *a, + integer *lda, integer *ipiv, doublereal *b, integer *ldb, doublereal * + x, integer *ldx, doublereal *work, real *swork, integer *iter, + integer *info); + +/* Subroutine */ int _starpu_dspcon_(char *uplo, integer *n, doublereal *ap, integer * + ipiv, doublereal *anorm, doublereal *rcond, doublereal *work, integer + *iwork, integer *info); + +/* Subroutine */ int _starpu_dspev_(char *jobz, char *uplo, integer *n, doublereal * + ap, doublereal *w, doublereal *z__, integer *ldz, doublereal *work, + integer *info); + +/* Subroutine */ int _starpu_dspevd_(char *jobz, char *uplo, integer *n, doublereal * + ap, doublereal *w, doublereal *z__, integer *ldz, doublereal *work, + integer *lwork, integer *iwork, integer *liwork, integer *info); + +/* Subroutine */ int _starpu_dspevx_(char *jobz, char *range, char *uplo, integer *n, + doublereal *ap, doublereal *vl, doublereal *vu, integer *il, integer * + iu, doublereal *abstol, integer *m, doublereal *w, doublereal *z__, + integer *ldz, doublereal *work, integer *iwork, integer *ifail, + integer *info); + +/* Subroutine */ int _starpu_dspgst_(integer *itype, char *uplo, integer *n, + doublereal *ap, doublereal *bp, integer *info); + +/* Subroutine */ int _starpu_dspgv_(integer *itype, char *jobz, char *uplo, integer * + n, doublereal *ap, doublereal *bp, doublereal *w, doublereal *z__, + integer *ldz, doublereal *work, integer *info); + +/* Subroutine */ int _starpu_dspgvd_(integer *itype, char *jobz, char *uplo, integer * + n, doublereal *ap, doublereal *bp, doublereal *w, doublereal *z__, + integer *ldz, doublereal *work, integer *lwork, integer *iwork, + integer *liwork, integer *info); + +/* Subroutine */ int _starpu_dspgvx_(integer *itype, char *jobz, char *range, char * + uplo, integer *n, doublereal *ap, doublereal *bp, doublereal *vl, + doublereal *vu, integer *il, integer *iu, doublereal *abstol, integer + *m, doublereal *w, doublereal *z__, integer *ldz, doublereal *work, + integer *iwork, integer *ifail, integer *info); + +/* Subroutine */ int _starpu__starpu_dsposv_(char *uplo, integer *n, integer *nrhs, + doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal * + x, integer *ldx, doublereal *work, real *swork, integer *iter, + integer *info); + +/* Subroutine */ int _starpu_dsprfs_(char *uplo, integer *n, integer *nrhs, + doublereal *ap, doublereal *afp, integer *ipiv, doublereal *b, + integer *ldb, doublereal *x, integer *ldx, doublereal *ferr, + doublereal *berr, doublereal *work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_dspsv_(char *uplo, integer *n, integer *nrhs, doublereal + *ap, integer *ipiv, doublereal *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_dspsvx_(char *fact, char *uplo, integer *n, integer * + nrhs, doublereal *ap, doublereal *afp, integer *ipiv, doublereal *b, + integer *ldb, doublereal *x, integer *ldx, doublereal *rcond, + doublereal *ferr, doublereal *berr, doublereal *work, integer *iwork, + integer *info); + +/* Subroutine */ int _starpu_dsptrd_(char *uplo, integer *n, doublereal *ap, + doublereal *d__, doublereal *e, doublereal *tau, integer *info); + +/* Subroutine */ int _starpu_dsptrf_(char *uplo, integer *n, doublereal *ap, integer * + ipiv, integer *info); + +/* Subroutine */ int _starpu_dsptri_(char *uplo, integer *n, doublereal *ap, integer * + ipiv, doublereal *work, integer *info); + +/* Subroutine */ int _starpu_dsptrs_(char *uplo, integer *n, integer *nrhs, + doublereal *ap, integer *ipiv, doublereal *b, integer *ldb, integer * + info); + +/* Subroutine */ int _starpu_dstebz_(char *range, char *order, integer *n, doublereal + *vl, doublereal *vu, integer *il, integer *iu, doublereal *abstol, + doublereal *d__, doublereal *e, integer *m, integer *nsplit, + doublereal *w, integer *iblock, integer *isplit, doublereal *work, + integer *iwork, integer *info); + +/* Subroutine */ int _starpu_dstedc_(char *compz, integer *n, doublereal *d__, + doublereal *e, doublereal *z__, integer *ldz, doublereal *work, + integer *lwork, integer *iwork, integer *liwork, integer *info); + +/* Subroutine */ int _starpu_dstegr_(char *jobz, char *range, integer *n, doublereal * + d__, doublereal *e, doublereal *vl, doublereal *vu, integer *il, + integer *iu, doublereal *abstol, integer *m, doublereal *w, + doublereal *z__, integer *ldz, integer *isuppz, doublereal *work, + integer *lwork, integer *iwork, integer *liwork, integer *info); + +/* Subroutine */ int _starpu_dstein_(integer *n, doublereal *d__, doublereal *e, + integer *m, doublereal *w, integer *iblock, integer *isplit, + doublereal *z__, integer *ldz, doublereal *work, integer *iwork, + integer *ifail, integer *info); + +/* Subroutine */ int _starpu_dstemr_(char *jobz, char *range, integer *n, doublereal * + d__, doublereal *e, doublereal *vl, doublereal *vu, integer *il, + integer *iu, integer *m, doublereal *w, doublereal *z__, integer *ldz, + integer *nzc, integer *isuppz, logical *tryrac, doublereal *work, + integer *lwork, integer *iwork, integer *liwork, integer *info); + +/* Subroutine */ int _starpu_dsteqr_(char *compz, integer *n, doublereal *d__, + doublereal *e, doublereal *z__, integer *ldz, doublereal *work, + integer *info); + +/* Subroutine */ int _starpu_dsterf_(integer *n, doublereal *d__, doublereal *e, + integer *info); + +/* Subroutine */ int _starpu_dstev_(char *jobz, integer *n, doublereal *d__, + doublereal *e, doublereal *z__, integer *ldz, doublereal *work, + integer *info); + +/* Subroutine */ int _starpu_dstevd_(char *jobz, integer *n, doublereal *d__, + doublereal *e, doublereal *z__, integer *ldz, doublereal *work, + integer *lwork, integer *iwork, integer *liwork, integer *info); + +/* Subroutine */ int _starpu_dstevr_(char *jobz, char *range, integer *n, doublereal * + d__, doublereal *e, doublereal *vl, doublereal *vu, integer *il, + integer *iu, doublereal *abstol, integer *m, doublereal *w, + doublereal *z__, integer *ldz, integer *isuppz, doublereal *work, + integer *lwork, integer *iwork, integer *liwork, integer *info); + +/* Subroutine */ int _starpu_dstevx_(char *jobz, char *range, integer *n, doublereal * + d__, doublereal *e, doublereal *vl, doublereal *vu, integer *il, + integer *iu, doublereal *abstol, integer *m, doublereal *w, + doublereal *z__, integer *ldz, doublereal *work, integer *iwork, + integer *ifail, integer *info); + +/* Subroutine */ int _starpu_dsycon_(char *uplo, integer *n, doublereal *a, integer * + lda, integer *ipiv, doublereal *anorm, doublereal *rcond, doublereal * + work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_dsyequb_(char *uplo, integer *n, doublereal *a, integer * + lda, doublereal *s, doublereal *scond, doublereal *amax, doublereal * + work, integer *info); + +/* Subroutine */ int _starpu_dsyev_(char *jobz, char *uplo, integer *n, doublereal *a, + integer *lda, doublereal *w, doublereal *work, integer *lwork, + integer *info); + +/* Subroutine */ int _starpu_dsyevd_(char *jobz, char *uplo, integer *n, doublereal * + a, integer *lda, doublereal *w, doublereal *work, integer *lwork, + integer *iwork, integer *liwork, integer *info); + +/* Subroutine */ int _starpu_dsyevr_(char *jobz, char *range, char *uplo, integer *n, + doublereal *a, integer *lda, doublereal *vl, doublereal *vu, integer * + il, integer *iu, doublereal *abstol, integer *m, doublereal *w, + doublereal *z__, integer *ldz, integer *isuppz, doublereal *work, + integer *lwork, integer *iwork, integer *liwork, integer *info); + +/* Subroutine */ int _starpu_dsyevx_(char *jobz, char *range, char *uplo, integer *n, + doublereal *a, integer *lda, doublereal *vl, doublereal *vu, integer * + il, integer *iu, doublereal *abstol, integer *m, doublereal *w, + doublereal *z__, integer *ldz, doublereal *work, integer *lwork, + integer *iwork, integer *ifail, integer *info); + +/* Subroutine */ int _starpu_dsygs2_(integer *itype, char *uplo, integer *n, + doublereal *a, integer *lda, doublereal *b, integer *ldb, integer * + info); + +/* Subroutine */ int _starpu_dsygst_(integer *itype, char *uplo, integer *n, + doublereal *a, integer *lda, doublereal *b, integer *ldb, integer * + info); + +/* Subroutine */ int _starpu_dsygv_(integer *itype, char *jobz, char *uplo, integer * + n, doublereal *a, integer *lda, doublereal *b, integer *ldb, + doublereal *w, doublereal *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_dsygvd_(integer *itype, char *jobz, char *uplo, integer * + n, doublereal *a, integer *lda, doublereal *b, integer *ldb, + doublereal *w, doublereal *work, integer *lwork, integer *iwork, + integer *liwork, integer *info); + +/* Subroutine */ int _starpu_dsygvx_(integer *itype, char *jobz, char *range, char * + uplo, integer *n, doublereal *a, integer *lda, doublereal *b, integer + *ldb, doublereal *vl, doublereal *vu, integer *il, integer *iu, + doublereal *abstol, integer *m, doublereal *w, doublereal *z__, + integer *ldz, doublereal *work, integer *lwork, integer *iwork, + integer *ifail, integer *info); + +/* Subroutine */ int _starpu_dsyrfs_(char *uplo, integer *n, integer *nrhs, + doublereal *a, integer *lda, doublereal *af, integer *ldaf, integer * + ipiv, doublereal *b, integer *ldb, doublereal *x, integer *ldx, + doublereal *ferr, doublereal *berr, doublereal *work, integer *iwork, + integer *info); + +/* Subroutine */ int _starpu_dsyrfsx_(char *uplo, char *equed, integer *n, integer * + nrhs, doublereal *a, integer *lda, doublereal *af, integer *ldaf, + integer *ipiv, doublereal *s, doublereal *b, integer *ldb, doublereal + *x, integer *ldx, doublereal *rcond, doublereal *berr, integer * + n_err_bnds__, doublereal *err_bnds_norm__, doublereal * + err_bnds_comp__, integer *nparams, doublereal *params, doublereal * + work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_dsysv_(char *uplo, integer *n, integer *nrhs, doublereal + *a, integer *lda, integer *ipiv, doublereal *b, integer *ldb, + doublereal *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_dsysvx_(char *fact, char *uplo, integer *n, integer * + nrhs, doublereal *a, integer *lda, doublereal *af, integer *ldaf, + integer *ipiv, doublereal *b, integer *ldb, doublereal *x, integer * + ldx, doublereal *rcond, doublereal *ferr, doublereal *berr, + doublereal *work, integer *lwork, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_dsysvxx_(char *fact, char *uplo, integer *n, integer * + nrhs, doublereal *a, integer *lda, doublereal *af, integer *ldaf, + integer *ipiv, char *equed, doublereal *s, doublereal *b, integer * + ldb, doublereal *x, integer *ldx, doublereal *rcond, doublereal * + rpvgrw, doublereal *berr, integer *n_err_bnds__, doublereal * + err_bnds_norm__, doublereal *err_bnds_comp__, integer *nparams, + doublereal *params, doublereal *work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_dsytd2_(char *uplo, integer *n, doublereal *a, integer * + lda, doublereal *d__, doublereal *e, doublereal *tau, integer *info); + +/* Subroutine */ int _starpu_dsytf2_(char *uplo, integer *n, doublereal *a, integer * + lda, integer *ipiv, integer *info); + +/* Subroutine */ int _starpu_dsytrd_(char *uplo, integer *n, doublereal *a, integer * + lda, doublereal *d__, doublereal *e, doublereal *tau, doublereal * + work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_dsytrf_(char *uplo, integer *n, doublereal *a, integer * + lda, integer *ipiv, doublereal *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_dsytri_(char *uplo, integer *n, doublereal *a, integer * + lda, integer *ipiv, doublereal *work, integer *info); + +/* Subroutine */ int _starpu_dsytrs_(char *uplo, integer *n, integer *nrhs, + doublereal *a, integer *lda, integer *ipiv, doublereal *b, integer * + ldb, integer *info); + +/* Subroutine */ int _starpu_dtbcon_(char *norm, char *uplo, char *diag, integer *n, + integer *kd, doublereal *ab, integer *ldab, doublereal *rcond, + doublereal *work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_dtbrfs_(char *uplo, char *trans, char *diag, integer *n, + integer *kd, integer *nrhs, doublereal *ab, integer *ldab, doublereal + *b, integer *ldb, doublereal *x, integer *ldx, doublereal *ferr, + doublereal *berr, doublereal *work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_dtbtrs_(char *uplo, char *trans, char *diag, integer *n, + integer *kd, integer *nrhs, doublereal *ab, integer *ldab, doublereal + *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_dtfsm_(char *transr, char *side, char *uplo, char *trans, + char *diag, integer *m, integer *n, doublereal *alpha, doublereal *a, + doublereal *b, integer *ldb); + +/* Subroutine */ int _starpu_dtftri_(char *transr, char *uplo, char *diag, integer *n, + doublereal *a, integer *info); + +/* Subroutine */ int _starpu_dtfttp_(char *transr, char *uplo, integer *n, doublereal + *arf, doublereal *ap, integer *info); + +/* Subroutine */ int _starpu_dtfttr_(char *transr, char *uplo, integer *n, doublereal + *arf, doublereal *a, integer *lda, integer *info); + +/* Subroutine */ int _starpu_dtgevc_(char *side, char *howmny, logical *select, + integer *n, doublereal *s, integer *lds, doublereal *p, integer *ldp, + doublereal *vl, integer *ldvl, doublereal *vr, integer *ldvr, integer + *mm, integer *m, doublereal *work, integer *info); + +/* Subroutine */ int _starpu_dtgex2_(logical *wantq, logical *wantz, integer *n, + doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal * + q, integer *ldq, doublereal *z__, integer *ldz, integer *j1, integer * + n1, integer *n2, doublereal *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_dtgexc_(logical *wantq, logical *wantz, integer *n, + doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal * + q, integer *ldq, doublereal *z__, integer *ldz, integer *ifst, + integer *ilst, doublereal *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_dtgsen_(integer *ijob, logical *wantq, logical *wantz, + logical *select, integer *n, doublereal *a, integer *lda, doublereal * + b, integer *ldb, doublereal *alphar, doublereal *alphai, doublereal * + beta, doublereal *q, integer *ldq, doublereal *z__, integer *ldz, + integer *m, doublereal *pl, doublereal *pr, doublereal *dif, + doublereal *work, integer *lwork, integer *iwork, integer *liwork, + integer *info); + +/* Subroutine */ int _starpu_dtgsja_(char *jobu, char *jobv, char *jobq, integer *m, + integer *p, integer *n, integer *k, integer *l, doublereal *a, + integer *lda, doublereal *b, integer *ldb, doublereal *tola, + doublereal *tolb, doublereal *alpha, doublereal *beta, doublereal *u, + integer *ldu, doublereal *v, integer *ldv, doublereal *q, integer * + ldq, doublereal *work, integer *ncycle, integer *info); + +/* Subroutine */ int _starpu_dtgsna_(char *job, char *howmny, logical *select, + integer *n, doublereal *a, integer *lda, doublereal *b, integer *ldb, + doublereal *vl, integer *ldvl, doublereal *vr, integer *ldvr, + doublereal *s, doublereal *dif, integer *mm, integer *m, doublereal * + work, integer *lwork, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_dtgsy2_(char *trans, integer *ijob, integer *m, integer * + n, doublereal *a, integer *lda, doublereal *b, integer *ldb, + doublereal *c__, integer *ldc, doublereal *d__, integer *ldd, + doublereal *e, integer *lde, doublereal *f, integer *ldf, doublereal * + scale, doublereal *rdsum, doublereal *rdscal, integer *iwork, integer + *pq, integer *info); + +/* Subroutine */ int _starpu_dtgsyl_(char *trans, integer *ijob, integer *m, integer * + n, doublereal *a, integer *lda, doublereal *b, integer *ldb, + doublereal *c__, integer *ldc, doublereal *d__, integer *ldd, + doublereal *e, integer *lde, doublereal *f, integer *ldf, doublereal * + scale, doublereal *dif, doublereal *work, integer *lwork, integer * + iwork, integer *info); + +/* Subroutine */ int _starpu_dtpcon_(char *norm, char *uplo, char *diag, integer *n, + doublereal *ap, doublereal *rcond, doublereal *work, integer *iwork, + integer *info); + +/* Subroutine */ int _starpu_dtprfs_(char *uplo, char *trans, char *diag, integer *n, + integer *nrhs, doublereal *ap, doublereal *b, integer *ldb, + doublereal *x, integer *ldx, doublereal *ferr, doublereal *berr, + doublereal *work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_dtptri_(char *uplo, char *diag, integer *n, doublereal * + ap, integer *info); + +/* Subroutine */ int _starpu_dtptrs_(char *uplo, char *trans, char *diag, integer *n, + integer *nrhs, doublereal *ap, doublereal *b, integer *ldb, integer * + info); + +/* Subroutine */ int _starpu_dtpttf_(char *transr, char *uplo, integer *n, doublereal + *ap, doublereal *arf, integer *info); + +/* Subroutine */ int _starpu_dtpttr_(char *uplo, integer *n, doublereal *ap, + doublereal *a, integer *lda, integer *info); + +/* Subroutine */ int _starpu_dtrcon_(char *norm, char *uplo, char *diag, integer *n, + doublereal *a, integer *lda, doublereal *rcond, doublereal *work, + integer *iwork, integer *info); + +/* Subroutine */ int _starpu_dtrevc_(char *side, char *howmny, logical *select, + integer *n, doublereal *t, integer *ldt, doublereal *vl, integer * + ldvl, doublereal *vr, integer *ldvr, integer *mm, integer *m, + doublereal *work, integer *info); + +/* Subroutine */ int _starpu_dtrexc_(char *compq, integer *n, doublereal *t, integer * + ldt, doublereal *q, integer *ldq, integer *ifst, integer *ilst, + doublereal *work, integer *info); + +/* Subroutine */ int _starpu_dtrrfs_(char *uplo, char *trans, char *diag, integer *n, + integer *nrhs, doublereal *a, integer *lda, doublereal *b, integer * + ldb, doublereal *x, integer *ldx, doublereal *ferr, doublereal *berr, + doublereal *work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_dtrsen_(char *job, char *compq, logical *select, integer + *n, doublereal *t, integer *ldt, doublereal *q, integer *ldq, + doublereal *wr, doublereal *wi, integer *m, doublereal *s, doublereal + *sep, doublereal *work, integer *lwork, integer *iwork, integer * + liwork, integer *info); + +/* Subroutine */ int _starpu_dtrsna_(char *job, char *howmny, logical *select, + integer *n, doublereal *t, integer *ldt, doublereal *vl, integer * + ldvl, doublereal *vr, integer *ldvr, doublereal *s, doublereal *sep, + integer *mm, integer *m, doublereal *work, integer *ldwork, integer * + iwork, integer *info); + +/* Subroutine */ int _starpu_dtrsyl_(char *trana, char *tranb, integer *isgn, integer + *m, integer *n, doublereal *a, integer *lda, doublereal *b, integer * + ldb, doublereal *c__, integer *ldc, doublereal *scale, integer *info); + +/* Subroutine */ int _starpu_dtrti2_(char *uplo, char *diag, integer *n, doublereal * + a, integer *lda, integer *info); + +/* Subroutine */ int _starpu_dtrtri_(char *uplo, char *diag, integer *n, doublereal * + a, integer *lda, integer *info); + +/* Subroutine */ int _starpu_dtrtrs_(char *uplo, char *trans, char *diag, integer *n, + integer *nrhs, doublereal *a, integer *lda, doublereal *b, integer * + ldb, integer *info); + +/* Subroutine */ int _starpu_dtrttf_(char *transr, char *uplo, integer *n, doublereal + *a, integer *lda, doublereal *arf, integer *info); + +/* Subroutine */ int _starpu_dtrttp_(char *uplo, integer *n, doublereal *a, integer * + lda, doublereal *ap, integer *info); + +/* Subroutine */ int _starpu_dtzrqf_(integer *m, integer *n, doublereal *a, integer * + lda, doublereal *tau, integer *info); + +/* Subroutine */ int _starpu_dtzrzf_(integer *m, integer *n, doublereal *a, integer * + lda, doublereal *tau, doublereal *work, integer *lwork, integer *info); + +doublereal _starpu_dzsum1_(integer *n, doublecomplex *cx, integer *incx); + +integer _starpu_icmax1_(integer *n, complex *cx, integer *incx); + +integer _starpu_ieeeck_(integer *ispec, real *zero, real *one); + +integer _starpu_ilaclc_(integer *m, integer *n, complex *a, integer *lda); + +integer _starpu_ilaclr_(integer *m, integer *n, complex *a, integer *lda); + +integer _starpu_iladiag_(char *diag); + +integer _starpu_iladlc_(integer *m, integer *n, doublereal *a, integer *lda); + +integer _starpu_iladlr_(integer *m, integer *n, doublereal *a, integer *lda); + +integer _starpu_ilaenv_(integer *ispec, char *name__, char *opts, integer *n1, + integer *n2, integer *n3, integer *n4); + +integer _starpu_ilaprec_(char *prec); + +integer _starpu_ilaslc_(integer *m, integer *n, real *a, integer *lda); + +integer _starpu_ilaslr_(integer *m, integer *n, real *a, integer *lda); + +integer _starpu_ilatrans_(char *trans); + +integer _starpu_ilauplo_(char *uplo); + +/* Subroutine */ int _starpu_ilaver_(integer *vers_major__, integer *vers_minor__, + integer *vers_patch__); + +integer _starpu_ilazlc_(integer *m, integer *n, doublecomplex *a, integer *lda); + +integer _starpu_ilazlr_(integer *m, integer *n, doublecomplex *a, integer *lda); + +integer _starpu_iparmq_(integer *ispec, char *name__, char *opts, integer *n, integer + *ilo, integer *ihi, integer *lwork); + +integer _starpu_izmax1_(integer *n, doublecomplex *cx, integer *incx); + +logical _starpu_lsamen_(integer *n, char *ca, char *cb); + +integer _starpu_smaxloc_(real *a, integer *dimm); + +/* Subroutine */ int _starpu_sbdsdc_(char *uplo, char *compq, integer *n, real *d__, + real *e, real *u, integer *ldu, real *vt, integer *ldvt, real *q, + integer *iq, real *work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_sbdsqr_(char *uplo, integer *n, integer *ncvt, integer * + nru, integer *ncc, real *d__, real *e, real *vt, integer *ldvt, real * + u, integer *ldu, real *c__, integer *ldc, real *work, integer *info); + +doublereal _starpu_scsum1_(integer *n, complex *cx, integer *incx); + +/* Subroutine */ int _starpu_sdisna_(char *job, integer *m, integer *n, real *d__, + real *sep, integer *info); + +/* Subroutine */ int _starpu_sgbbrd_(char *vect, integer *m, integer *n, integer *ncc, + integer *kl, integer *ku, real *ab, integer *ldab, real *d__, real * + e, real *q, integer *ldq, real *pt, integer *ldpt, real *c__, integer + *ldc, real *work, integer *info); + +/* Subroutine */ int _starpu_sgbcon_(char *norm, integer *n, integer *kl, integer *ku, + real *ab, integer *ldab, integer *ipiv, real *anorm, real *rcond, + real *work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_sgbequ_(integer *m, integer *n, integer *kl, integer *ku, + real *ab, integer *ldab, real *r__, real *c__, real *rowcnd, real * + colcnd, real *amax, integer *info); + +/* Subroutine */ int _starpu_sgbequb_(integer *m, integer *n, integer *kl, integer * + ku, real *ab, integer *ldab, real *r__, real *c__, real *rowcnd, real + *colcnd, real *amax, integer *info); + +/* Subroutine */ int _starpu_sgbrfs_(char *trans, integer *n, integer *kl, integer * + ku, integer *nrhs, real *ab, integer *ldab, real *afb, integer *ldafb, + integer *ipiv, real *b, integer *ldb, real *x, integer *ldx, real * + ferr, real *berr, real *work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_sgbrfsx_(char *trans, char *equed, integer *n, integer * + kl, integer *ku, integer *nrhs, real *ab, integer *ldab, real *afb, + integer *ldafb, integer *ipiv, real *r__, real *c__, real *b, integer + *ldb, real *x, integer *ldx, real *rcond, real *berr, integer * + n_err_bnds__, real *err_bnds_norm__, real *err_bnds_comp__, integer * + nparams, real *params, real *work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_sgbsv_(integer *n, integer *kl, integer *ku, integer * + nrhs, real *ab, integer *ldab, integer *ipiv, real *b, integer *ldb, + integer *info); + +/* Subroutine */ int _starpu_sgbsvx_(char *fact, char *trans, integer *n, integer *kl, + integer *ku, integer *nrhs, real *ab, integer *ldab, real *afb, + integer *ldafb, integer *ipiv, char *equed, real *r__, real *c__, + real *b, integer *ldb, real *x, integer *ldx, real *rcond, real *ferr, + real *berr, real *work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_sgbsvxx_(char *fact, char *trans, integer *n, integer * + kl, integer *ku, integer *nrhs, real *ab, integer *ldab, real *afb, + integer *ldafb, integer *ipiv, char *equed, real *r__, real *c__, + real *b, integer *ldb, real *x, integer *ldx, real *rcond, real * + rpvgrw, real *berr, integer *n_err_bnds__, real *err_bnds_norm__, + real *err_bnds_comp__, integer *nparams, real *params, real *work, + integer *iwork, integer *info); + +/* Subroutine */ int _starpu_sgbtf2_(integer *m, integer *n, integer *kl, integer *ku, + real *ab, integer *ldab, integer *ipiv, integer *info); + +/* Subroutine */ int _starpu_sgbtrf_(integer *m, integer *n, integer *kl, integer *ku, + real *ab, integer *ldab, integer *ipiv, integer *info); + +/* Subroutine */ int _starpu_sgbtrs_(char *trans, integer *n, integer *kl, integer * + ku, integer *nrhs, real *ab, integer *ldab, integer *ipiv, real *b, + integer *ldb, integer *info); + +/* Subroutine */ int _starpu_sgebak_(char *job, char *side, integer *n, integer *ilo, + integer *ihi, real *scale, integer *m, real *v, integer *ldv, integer + *info); + +/* Subroutine */ int _starpu_sgebal_(char *job, integer *n, real *a, integer *lda, + integer *ilo, integer *ihi, real *scale, integer *info); + +/* Subroutine */ int _starpu_sgebd2_(integer *m, integer *n, real *a, integer *lda, + real *d__, real *e, real *tauq, real *taup, real *work, integer *info); + +/* Subroutine */ int _starpu_sgebrd_(integer *m, integer *n, real *a, integer *lda, + real *d__, real *e, real *tauq, real *taup, real *work, integer * + lwork, integer *info); + +/* Subroutine */ int _starpu_sgecon_(char *norm, integer *n, real *a, integer *lda, + real *anorm, real *rcond, real *work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_sgeequ_(integer *m, integer *n, real *a, integer *lda, + real *r__, real *c__, real *rowcnd, real *colcnd, real *amax, integer + *info); + +/* Subroutine */ int _starpu_sgeequb_(integer *m, integer *n, real *a, integer *lda, + real *r__, real *c__, real *rowcnd, real *colcnd, real *amax, integer + *info); + +/* Subroutine */ int _starpu_sgees_(char *jobvs, char *sort, L_fp select, integer *n, + real *a, integer *lda, integer *sdim, real *wr, real *wi, real *vs, + integer *ldvs, real *work, integer *lwork, logical *bwork, integer * + info); + +/* Subroutine */ int _starpu_sgeesx_(char *jobvs, char *sort, L_fp select, char * + sense, integer *n, real *a, integer *lda, integer *sdim, real *wr, + real *wi, real *vs, integer *ldvs, real *rconde, real *rcondv, real * + work, integer *lwork, integer *iwork, integer *liwork, logical *bwork, + integer *info); + +/* Subroutine */ int _starpu_sgeev_(char *jobvl, char *jobvr, integer *n, real *a, + integer *lda, real *wr, real *wi, real *vl, integer *ldvl, real *vr, + integer *ldvr, real *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_sgeevx_(char *balanc, char *jobvl, char *jobvr, char * + sense, integer *n, real *a, integer *lda, real *wr, real *wi, real * + vl, integer *ldvl, real *vr, integer *ldvr, integer *ilo, integer * + ihi, real *scale, real *abnrm, real *rconde, real *rcondv, real *work, + integer *lwork, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_sgegs_(char *jobvsl, char *jobvsr, integer *n, real *a, + integer *lda, real *b, integer *ldb, real *alphar, real *alphai, real + *beta, real *vsl, integer *ldvsl, real *vsr, integer *ldvsr, real * + work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_sgegv_(char *jobvl, char *jobvr, integer *n, real *a, + integer *lda, real *b, integer *ldb, real *alphar, real *alphai, real + *beta, real *vl, integer *ldvl, real *vr, integer *ldvr, real *work, + integer *lwork, integer *info); + +/* Subroutine */ int _starpu_sgehd2_(integer *n, integer *ilo, integer *ihi, real *a, + integer *lda, real *tau, real *work, integer *info); + +/* Subroutine */ int _starpu_sgehrd_(integer *n, integer *ilo, integer *ihi, real *a, + integer *lda, real *tau, real *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_sgejsv_(char *joba, char *jobu, char *jobv, char *jobr, + char *jobt, char *jobp, integer *m, integer *n, real *a, integer *lda, + real *sva, real *u, integer *ldu, real *v, integer *ldv, real *work, + integer *lwork, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_sgelq2_(integer *m, integer *n, real *a, integer *lda, + real *tau, real *work, integer *info); + +/* Subroutine */ int _starpu_sgelqf_(integer *m, integer *n, real *a, integer *lda, + real *tau, real *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_sgels_(char *trans, integer *m, integer *n, integer * + nrhs, real *a, integer *lda, real *b, integer *ldb, real *work, + integer *lwork, integer *info); + +/* Subroutine */ int _starpu_sgelsd_(integer *m, integer *n, integer *nrhs, real *a, + integer *lda, real *b, integer *ldb, real *s, real *rcond, integer * + rank, real *work, integer *lwork, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_sgelss_(integer *m, integer *n, integer *nrhs, real *a, + integer *lda, real *b, integer *ldb, real *s, real *rcond, integer * + rank, real *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_sgelsx_(integer *m, integer *n, integer *nrhs, real *a, + integer *lda, real *b, integer *ldb, integer *jpvt, real *rcond, + integer *rank, real *work, integer *info); + +/* Subroutine */ int _starpu_sgelsy_(integer *m, integer *n, integer *nrhs, real *a, + integer *lda, real *b, integer *ldb, integer *jpvt, real *rcond, + integer *rank, real *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_sgeql2_(integer *m, integer *n, real *a, integer *lda, + real *tau, real *work, integer *info); + +/* Subroutine */ int _starpu_sgeqlf_(integer *m, integer *n, real *a, integer *lda, + real *tau, real *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_sgeqp3_(integer *m, integer *n, real *a, integer *lda, + integer *jpvt, real *tau, real *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_sgeqpf_(integer *m, integer *n, real *a, integer *lda, + integer *jpvt, real *tau, real *work, integer *info); + +/* Subroutine */ int _starpu_sgeqr2_(integer *m, integer *n, real *a, integer *lda, + real *tau, real *work, integer *info); + +/* Subroutine */ int _starpu_sgeqrf_(integer *m, integer *n, real *a, integer *lda, + real *tau, real *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_sgerfs_(char *trans, integer *n, integer *nrhs, real *a, + integer *lda, real *af, integer *ldaf, integer *ipiv, real *b, + integer *ldb, real *x, integer *ldx, real *ferr, real *berr, real * + work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_sgerfsx_(char *trans, char *equed, integer *n, integer * + nrhs, real *a, integer *lda, real *af, integer *ldaf, integer *ipiv, + real *r__, real *c__, real *b, integer *ldb, real *x, integer *ldx, + real *rcond, real *berr, integer *n_err_bnds__, real *err_bnds_norm__, + real *err_bnds_comp__, integer *nparams, real *params, real *work, + integer *iwork, integer *info); + +/* Subroutine */ int _starpu_sgerq2_(integer *m, integer *n, real *a, integer *lda, + real *tau, real *work, integer *info); + +/* Subroutine */ int _starpu_sgerqf_(integer *m, integer *n, real *a, integer *lda, + real *tau, real *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_sgesc2_(integer *n, real *a, integer *lda, real *rhs, + integer *ipiv, integer *jpiv, real *scale); + +/* Subroutine */ int _starpu_sgesdd_(char *jobz, integer *m, integer *n, real *a, + integer *lda, real *s, real *u, integer *ldu, real *vt, integer *ldvt, + real *work, integer *lwork, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_sgesv_(integer *n, integer *nrhs, real *a, integer *lda, + integer *ipiv, real *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_sgesvd_(char *jobu, char *jobvt, integer *m, integer *n, + real *a, integer *lda, real *s, real *u, integer *ldu, real *vt, + integer *ldvt, real *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_sgesvj_(char *joba, char *jobu, char *jobv, integer *m, + integer *n, real *a, integer *lda, real *sva, integer *mv, real *v, + integer *ldv, real *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_sgesvx_(char *fact, char *trans, integer *n, integer * + nrhs, real *a, integer *lda, real *af, integer *ldaf, integer *ipiv, + char *equed, real *r__, real *c__, real *b, integer *ldb, real *x, + integer *ldx, real *rcond, real *ferr, real *berr, real *work, + integer *iwork, integer *info); + +/* Subroutine */ int _starpu_sgesvxx_(char *fact, char *trans, integer *n, integer * + nrhs, real *a, integer *lda, real *af, integer *ldaf, integer *ipiv, + char *equed, real *r__, real *c__, real *b, integer *ldb, real *x, + integer *ldx, real *rcond, real *rpvgrw, real *berr, integer * + n_err_bnds__, real *err_bnds_norm__, real *err_bnds_comp__, integer * + nparams, real *params, real *work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_sgetc2_(integer *n, real *a, integer *lda, integer *ipiv, + integer *jpiv, integer *info); + +/* Subroutine */ int _starpu_sgetf2_(integer *m, integer *n, real *a, integer *lda, + integer *ipiv, integer *info); + +/* Subroutine */ int _starpu_sgetrf_(integer *m, integer *n, real *a, integer *lda, + integer *ipiv, integer *info); + +/* Subroutine */ int _starpu_sgetri_(integer *n, real *a, integer *lda, integer *ipiv, + real *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_sgetrs_(char *trans, integer *n, integer *nrhs, real *a, + integer *lda, integer *ipiv, real *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_sggbak_(char *job, char *side, integer *n, integer *ilo, + integer *ihi, real *lscale, real *rscale, integer *m, real *v, + integer *ldv, integer *info); + +/* Subroutine */ int _starpu_sggbal_(char *job, integer *n, real *a, integer *lda, + real *b, integer *ldb, integer *ilo, integer *ihi, real *lscale, real + *rscale, real *work, integer *info); + +/* Subroutine */ int _starpu_sgges_(char *jobvsl, char *jobvsr, char *sort, L_fp + selctg, integer *n, real *a, integer *lda, real *b, integer *ldb, + integer *sdim, real *alphar, real *alphai, real *beta, real *vsl, + integer *ldvsl, real *vsr, integer *ldvsr, real *work, integer *lwork, + logical *bwork, integer *info); + +/* Subroutine */ int _starpu_sggesx_(char *jobvsl, char *jobvsr, char *sort, L_fp + selctg, char *sense, integer *n, real *a, integer *lda, real *b, + integer *ldb, integer *sdim, real *alphar, real *alphai, real *beta, + real *vsl, integer *ldvsl, real *vsr, integer *ldvsr, real *rconde, + real *rcondv, real *work, integer *lwork, integer *iwork, integer * + liwork, logical *bwork, integer *info); + +/* Subroutine */ int _starpu_sggev_(char *jobvl, char *jobvr, integer *n, real *a, + integer *lda, real *b, integer *ldb, real *alphar, real *alphai, real + *beta, real *vl, integer *ldvl, real *vr, integer *ldvr, real *work, + integer *lwork, integer *info); + +/* Subroutine */ int _starpu_sggevx_(char *balanc, char *jobvl, char *jobvr, char * + sense, integer *n, real *a, integer *lda, real *b, integer *ldb, real + *alphar, real *alphai, real *beta, real *vl, integer *ldvl, real *vr, + integer *ldvr, integer *ilo, integer *ihi, real *lscale, real *rscale, + real *abnrm, real *bbnrm, real *rconde, real *rcondv, real *work, + integer *lwork, integer *iwork, logical *bwork, integer *info); + +/* Subroutine */ int _starpu_sggglm_(integer *n, integer *m, integer *p, real *a, + integer *lda, real *b, integer *ldb, real *d__, real *x, real *y, + real *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_sgghrd_(char *compq, char *compz, integer *n, integer * + ilo, integer *ihi, real *a, integer *lda, real *b, integer *ldb, real + *q, integer *ldq, real *z__, integer *ldz, integer *info); + +/* Subroutine */ int _starpu_sgglse_(integer *m, integer *n, integer *p, real *a, + integer *lda, real *b, integer *ldb, real *c__, real *d__, real *x, + real *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_sggqrf_(integer *n, integer *m, integer *p, real *a, + integer *lda, real *taua, real *b, integer *ldb, real *taub, real * + work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_sggrqf_(integer *m, integer *p, integer *n, real *a, + integer *lda, real *taua, real *b, integer *ldb, real *taub, real * + work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_sggsvd_(char *jobu, char *jobv, char *jobq, integer *m, + integer *n, integer *p, integer *k, integer *l, real *a, integer *lda, + real *b, integer *ldb, real *alpha, real *beta, real *u, integer * + ldu, real *v, integer *ldv, real *q, integer *ldq, real *work, + integer *iwork, integer *info); + +/* Subroutine */ int _starpu_sggsvp_(char *jobu, char *jobv, char *jobq, integer *m, + integer *p, integer *n, real *a, integer *lda, real *b, integer *ldb, + real *tola, real *tolb, integer *k, integer *l, real *u, integer *ldu, + real *v, integer *ldv, real *q, integer *ldq, integer *iwork, real * + tau, real *work, integer *info); + +/* Subroutine */ int _starpu_sgsvj0_(char *jobv, integer *m, integer *n, real *a, + integer *lda, real *d__, real *sva, integer *mv, real *v, integer * + ldv, real *eps, real *sfmin, real *tol, integer *nsweep, real *work, + integer *lwork, integer *info); + +/* Subroutine */ int _starpu_sgsvj1_(char *jobv, integer *m, integer *n, integer *n1, + real *a, integer *lda, real *d__, real *sva, integer *mv, real *v, + integer *ldv, real *eps, real *sfmin, real *tol, integer *nsweep, + real *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_sgtcon_(char *norm, integer *n, real *dl, real *d__, + real *du, real *du2, integer *ipiv, real *anorm, real *rcond, real * + work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_sgtrfs_(char *trans, integer *n, integer *nrhs, real *dl, + real *d__, real *du, real *dlf, real *df, real *duf, real *du2, + integer *ipiv, real *b, integer *ldb, real *x, integer *ldx, real * + ferr, real *berr, real *work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_sgtsv_(integer *n, integer *nrhs, real *dl, real *d__, + real *du, real *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_sgtsvx_(char *fact, char *trans, integer *n, integer * + nrhs, real *dl, real *d__, real *du, real *dlf, real *df, real *duf, + real *du2, integer *ipiv, real *b, integer *ldb, real *x, integer * + ldx, real *rcond, real *ferr, real *berr, real *work, integer *iwork, + integer *info); + +/* Subroutine */ int _starpu_sgttrf_(integer *n, real *dl, real *d__, real *du, real * + du2, integer *ipiv, integer *info); + +/* Subroutine */ int _starpu_sgttrs_(char *trans, integer *n, integer *nrhs, real *dl, + real *d__, real *du, real *du2, integer *ipiv, real *b, integer *ldb, + integer *info); + +/* Subroutine */ int _starpu_sgtts2_(integer *itrans, integer *n, integer *nrhs, real + *dl, real *d__, real *du, real *du2, integer *ipiv, real *b, integer * + ldb); + +/* Subroutine */ int _starpu_shgeqz_(char *job, char *compq, char *compz, integer *n, + integer *ilo, integer *ihi, real *h__, integer *ldh, real *t, integer + *ldt, real *alphar, real *alphai, real *beta, real *q, integer *ldq, + real *z__, integer *ldz, real *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_shsein_(char *side, char *eigsrc, char *initv, logical * + select, integer *n, real *h__, integer *ldh, real *wr, real *wi, real + *vl, integer *ldvl, real *vr, integer *ldvr, integer *mm, integer *m, + real *work, integer *ifaill, integer *ifailr, integer *info); + +/* Subroutine */ int _starpu_shseqr_(char *job, char *compz, integer *n, integer *ilo, + integer *ihi, real *h__, integer *ldh, real *wr, real *wi, real *z__, + integer *ldz, real *work, integer *lwork, integer *info); + +logical _starpu_sisnan_(real *sin__); + +/* Subroutine */ int _starpu_sla_gbamv__(integer *trans, integer *m, integer *n, + integer *kl, integer *ku, real *alpha, real *ab, integer *ldab, real * + x, integer *incx, real *beta, real *y, integer *incy); + +doublereal _starpu_sla_gbrcond__(char *trans, integer *n, integer *kl, integer *ku, + real *ab, integer *ldab, real *afb, integer *ldafb, integer *ipiv, + integer *cmode, real *c__, integer *info, real *work, integer *iwork, + ftnlen trans_len); + +/* Subroutine */ int _starpu_sla_gbrfsx_extended__(integer *prec_type__, integer * + trans_type__, integer *n, integer *kl, integer *ku, integer *nrhs, + real *ab, integer *ldab, real *afb, integer *ldafb, integer *ipiv, + logical *colequ, real *c__, real *b, integer *ldb, real *y, integer * + ldy, real *berr_out__, integer *n_norms__, real *errs_n__, real * + errs_c__, real *res, real *ayb, real *dy, real *y_tail__, real *rcond, + integer *ithresh, real *rthresh, real *dz_ub__, logical * + ignore_cwise__, integer *info); + +doublereal _starpu_sla_gbrpvgrw__(integer *n, integer *kl, integer *ku, integer * + ncols, real *ab, integer *ldab, real *afb, integer *ldafb); + +/* Subroutine */ int _starpu_sla_geamv__(integer *trans, integer *m, integer *n, real + *alpha, real *a, integer *lda, real *x, integer *incx, real *beta, + real *y, integer *incy); + +doublereal _starpu_sla_gercond__(char *trans, integer *n, real *a, integer *lda, real + *af, integer *ldaf, integer *ipiv, integer *cmode, real *c__, integer + *info, real *work, integer *iwork, ftnlen trans_len); + +/* Subroutine */ int _starpu_sla_gerfsx_extended__(integer *prec_type__, integer * + trans_type__, integer *n, integer *nrhs, real *a, integer *lda, real * + af, integer *ldaf, integer *ipiv, logical *colequ, real *c__, real *b, + integer *ldb, real *y, integer *ldy, real *berr_out__, integer * + n_norms__, real *errs_n__, real *errs_c__, real *res, real *ayb, real + *dy, real *y_tail__, real *rcond, integer *ithresh, real *rthresh, + real *dz_ub__, logical *ignore_cwise__, integer *info); + +/* Subroutine */ int _starpu_sla_lin_berr__(integer *n, integer *nz, integer *nrhs, + real *res, real *ayb, real *berr); + +doublereal _starpu_sla_porcond__(char *uplo, integer *n, real *a, integer *lda, real * + af, integer *ldaf, integer *cmode, real *c__, integer *info, real * + work, integer *iwork, ftnlen uplo_len); + +/* Subroutine */ int _starpu_sla_porfsx_extended__(integer *prec_type__, char *uplo, + integer *n, integer *nrhs, real *a, integer *lda, real *af, integer * + ldaf, logical *colequ, real *c__, real *b, integer *ldb, real *y, + integer *ldy, real *berr_out__, integer *n_norms__, real *errs_n__, + real *errs_c__, real *res, real *ayb, real *dy, real *y_tail__, real * + rcond, integer *ithresh, real *rthresh, real *dz_ub__, logical * + ignore_cwise__, integer *info, ftnlen uplo_len); + +doublereal _starpu_sla_porpvgrw__(char *uplo, integer *ncols, real *a, integer *lda, + real *af, integer *ldaf, real *work, ftnlen uplo_len); + +doublereal _starpu_sla_rpvgrw__(integer *n, integer *ncols, real *a, integer *lda, + real *af, integer *ldaf); + +/* Subroutine */ int _starpu_sla_syamv__(integer *uplo, integer *n, real *alpha, real + *a, integer *lda, real *x, integer *incx, real *beta, real *y, + integer *incy); + +doublereal _starpu_sla_syrcond__(char *uplo, integer *n, real *a, integer *lda, real * + af, integer *ldaf, integer *ipiv, integer *cmode, real *c__, integer * + info, real *work, integer *iwork, ftnlen uplo_len); + +/* Subroutine */ int _starpu_sla_syrfsx_extended__(integer *prec_type__, char *uplo, + integer *n, integer *nrhs, real *a, integer *lda, real *af, integer * + ldaf, integer *ipiv, logical *colequ, real *c__, real *b, integer * + ldb, real *y, integer *ldy, real *berr_out__, integer *n_norms__, + real *errs_n__, real *errs_c__, real *res, real *ayb, real *dy, real * + y_tail__, real *rcond, integer *ithresh, real *rthresh, real *dz_ub__, + logical *ignore_cwise__, integer *info, ftnlen uplo_len); + +doublereal _starpu_sla_syrpvgrw__(char *uplo, integer *n, integer *info, real *a, + integer *lda, real *af, integer *ldaf, integer *ipiv, real *work, + ftnlen uplo_len); + +/* Subroutine */ int _starpu_sla_wwaddw__(integer *n, real *x, real *y, real *w); + +/* Subroutine */ int _starpu_slabad_(real *small, real *large); + +/* Subroutine */ int _starpu_slabrd_(integer *m, integer *n, integer *nb, real *a, + integer *lda, real *d__, real *e, real *tauq, real *taup, real *x, + integer *ldx, real *y, integer *ldy); + +/* Subroutine */ int _starpu_slacn2_(integer *n, real *v, real *x, integer *isgn, + real *est, integer *kase, integer *isave); + +/* Subroutine */ int _starpu_slacon_(integer *n, real *v, real *x, integer *isgn, + real *est, integer *kase); + +/* Subroutine */ int _starpu_slacpy_(char *uplo, integer *m, integer *n, real *a, + integer *lda, real *b, integer *ldb); + +/* Subroutine */ int _starpu_sladiv_(real *a, real *b, real *c__, real *d__, real *p, + real *q); + +/* Subroutine */ int _starpu_slae2_(real *a, real *b, real *c__, real *rt1, real *rt2); + +/* Subroutine */ int _starpu_slaebz_(integer *ijob, integer *nitmax, integer *n, + integer *mmax, integer *minp, integer *nbmin, real *abstol, real * + reltol, real *pivmin, real *d__, real *e, real *e2, integer *nval, + real *ab, real *c__, integer *mout, integer *nab, real *work, integer + *iwork, integer *info); + +/* Subroutine */ int _starpu_slaed0_(integer *icompq, integer *qsiz, integer *n, real + *d__, real *e, real *q, integer *ldq, real *qstore, integer *ldqs, + real *work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_slaed1_(integer *n, real *d__, real *q, integer *ldq, + integer *indxq, real *rho, integer *cutpnt, real *work, integer * + iwork, integer *info); + +/* Subroutine */ int _starpu_slaed2_(integer *k, integer *n, integer *n1, real *d__, + real *q, integer *ldq, integer *indxq, real *rho, real *z__, real * + dlamda, real *w, real *q2, integer *indx, integer *indxc, integer * + indxp, integer *coltyp, integer *info); + +/* Subroutine */ int _starpu_slaed3_(integer *k, integer *n, integer *n1, real *d__, + real *q, integer *ldq, real *rho, real *dlamda, real *q2, integer * + indx, integer *ctot, real *w, real *s, integer *info); + +/* Subroutine */ int _starpu_slaed4_(integer *n, integer *i__, real *d__, real *z__, + real *delta, real *rho, real *dlam, integer *info); + +/* Subroutine */ int _starpu_slaed5_(integer *i__, real *d__, real *z__, real *delta, + real *rho, real *dlam); + +/* Subroutine */ int _starpu_slaed6_(integer *kniter, logical *orgati, real *rho, + real *d__, real *z__, real *finit, real *tau, integer *info); + +/* Subroutine */ int _starpu_slaed7_(integer *icompq, integer *n, integer *qsiz, + integer *tlvls, integer *curlvl, integer *curpbm, real *d__, real *q, + integer *ldq, integer *indxq, real *rho, integer *cutpnt, real * + qstore, integer *qptr, integer *prmptr, integer *perm, integer * + givptr, integer *givcol, real *givnum, real *work, integer *iwork, + integer *info); + +/* Subroutine */ int _starpu_slaed8_(integer *icompq, integer *k, integer *n, integer + *qsiz, real *d__, real *q, integer *ldq, integer *indxq, real *rho, + integer *cutpnt, real *z__, real *dlamda, real *q2, integer *ldq2, + real *w, integer *perm, integer *givptr, integer *givcol, real * + givnum, integer *indxp, integer *indx, integer *info); + +/* Subroutine */ int _starpu_slaed9_(integer *k, integer *kstart, integer *kstop, + integer *n, real *d__, real *q, integer *ldq, real *rho, real *dlamda, + real *w, real *s, integer *lds, integer *info); + +/* Subroutine */ int _starpu_slaeda_(integer *n, integer *tlvls, integer *curlvl, + integer *curpbm, integer *prmptr, integer *perm, integer *givptr, + integer *givcol, real *givnum, real *q, integer *qptr, real *z__, + real *ztemp, integer *info); + +/* Subroutine */ int _starpu_slaein_(logical *rightv, logical *noinit, integer *n, + real *h__, integer *ldh, real *wr, real *wi, real *vr, real *vi, real + *b, integer *ldb, real *work, real *eps3, real *smlnum, real *bignum, + integer *info); + +/* Subroutine */ int _starpu_slaev2_(real *a, real *b, real *c__, real *rt1, real * + rt2, real *cs1, real *sn1); + +/* Subroutine */ int _starpu_slaexc_(logical *wantq, integer *n, real *t, integer * + ldt, real *q, integer *ldq, integer *j1, integer *n1, integer *n2, + real *work, integer *info); + +/* Subroutine */ int _starpu_slag2_(real *a, integer *lda, real *b, integer *ldb, + real *safmin, real *scale1, real *scale2, real *wr1, real *wr2, real * + wi); + +/* Subroutine */ int _starpu_slag2d_(integer *m, integer *n, real *sa, integer *ldsa, + doublereal *a, integer *lda, integer *info); + +/* Subroutine */ int _starpu_slags2_(logical *upper, real *a1, real *a2, real *a3, + real *b1, real *b2, real *b3, real *csu, real *snu, real *csv, real * + snv, real *csq, real *snq); + +/* Subroutine */ int _starpu_slagtf_(integer *n, real *a, real *lambda, real *b, real + *c__, real *tol, real *d__, integer *in, integer *info); + +/* Subroutine */ int _starpu_slagtm_(char *trans, integer *n, integer *nrhs, real * + alpha, real *dl, real *d__, real *du, real *x, integer *ldx, real * + beta, real *b, integer *ldb); + +/* Subroutine */ int _starpu_slagts_(integer *job, integer *n, real *a, real *b, real + *c__, real *d__, integer *in, real *y, real *tol, integer *info); + +/* Subroutine */ int _starpu_slagv2_(real *a, integer *lda, real *b, integer *ldb, + real *alphar, real *alphai, real *beta, real *csl, real *snl, real * + csr, real *snr); + +/* Subroutine */ int _starpu_slahqr_(logical *wantt, logical *wantz, integer *n, + integer *ilo, integer *ihi, real *h__, integer *ldh, real *wr, real * + wi, integer *iloz, integer *ihiz, real *z__, integer *ldz, integer * + info); + +/* Subroutine */ int _starpu_slahr2_(integer *n, integer *k, integer *nb, real *a, + integer *lda, real *tau, real *t, integer *ldt, real *y, integer *ldy); + +/* Subroutine */ int _starpu_slahrd_(integer *n, integer *k, integer *nb, real *a, + integer *lda, real *tau, real *t, integer *ldt, real *y, integer *ldy); + +/* Subroutine */ int _starpu_slaic1_(integer *job, integer *j, real *x, real *sest, + real *w, real *gamma, real *sestpr, real *s, real *c__); + +logical _starpu_slaisnan_(real *sin1, real *sin2); + +/* Subroutine */ int _starpu_slaln2_(logical *ltrans, integer *na, integer *nw, real * + smin, real *ca, real *a, integer *lda, real *d1, real *d2, real *b, + integer *ldb, real *wr, real *wi, real *x, integer *ldx, real *scale, + real *xnorm, integer *info); + +/* Subroutine */ int _starpu_slals0_(integer *icompq, integer *nl, integer *nr, + integer *sqre, integer *nrhs, real *b, integer *ldb, real *bx, + integer *ldbx, integer *perm, integer *givptr, integer *givcol, + integer *ldgcol, real *givnum, integer *ldgnum, real *poles, real * + difl, real *difr, real *z__, integer *k, real *c__, real *s, real * + work, integer *info); + +/* Subroutine */ int _starpu_slalsa_(integer *icompq, integer *smlsiz, integer *n, + integer *nrhs, real *b, integer *ldb, real *bx, integer *ldbx, real * + u, integer *ldu, real *vt, integer *k, real *difl, real *difr, real * + z__, real *poles, integer *givptr, integer *givcol, integer *ldgcol, + integer *perm, real *givnum, real *c__, real *s, real *work, integer * + iwork, integer *info); + +/* Subroutine */ int _starpu_slalsd_(char *uplo, integer *smlsiz, integer *n, integer + *nrhs, real *d__, real *e, real *b, integer *ldb, real *rcond, + integer *rank, real *work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_slamrg_(integer *n1, integer *n2, real *a, integer * + strd1, integer *strd2, integer *index); + +integer _starpu_slaneg_(integer *n, real *d__, real *lld, real *sigma, real *pivmin, + integer *r__); + +doublereal _starpu_slangb_(char *norm, integer *n, integer *kl, integer *ku, real *ab, + integer *ldab, real *work); + +doublereal _starpu_slange_(char *norm, integer *m, integer *n, real *a, integer *lda, + real *work); + +doublereal _starpu_slangt_(char *norm, integer *n, real *dl, real *d__, real *du); + +doublereal _starpu_slanhs_(char *norm, integer *n, real *a, integer *lda, real *work); + +doublereal _starpu_slansb_(char *norm, char *uplo, integer *n, integer *k, real *ab, + integer *ldab, real *work); + +doublereal _starpu_slansf_(char *norm, char *transr, char *uplo, integer *n, real *a, + real *work); + +doublereal _starpu_slansp_(char *norm, char *uplo, integer *n, real *ap, real *work); + +doublereal _starpu_slanst_(char *norm, integer *n, real *d__, real *e); + +doublereal _starpu_slansy_(char *norm, char *uplo, integer *n, real *a, integer *lda, + real *work); + +doublereal _starpu_slantb_(char *norm, char *uplo, char *diag, integer *n, integer *k, + real *ab, integer *ldab, real *work); + +doublereal _starpu_slantp_(char *norm, char *uplo, char *diag, integer *n, real *ap, + real *work); + +doublereal _starpu_slantr_(char *norm, char *uplo, char *diag, integer *m, integer *n, + real *a, integer *lda, real *work); + +/* Subroutine */ int _starpu_slanv2_(real *a, real *b, real *c__, real *d__, real * + rt1r, real *rt1i, real *rt2r, real *rt2i, real *cs, real *sn); + +/* Subroutine */ int _starpu_slapll_(integer *n, real *x, integer *incx, real *y, + integer *incy, real *ssmin); + +/* Subroutine */ int _starpu_slapmt_(logical *forwrd, integer *m, integer *n, real *x, + integer *ldx, integer *k); + +doublereal _starpu_slapy2_(real *x, real *y); + +doublereal _starpu_slapy3_(real *x, real *y, real *z__); + +/* Subroutine */ int _starpu_slaqgb_(integer *m, integer *n, integer *kl, integer *ku, + real *ab, integer *ldab, real *r__, real *c__, real *rowcnd, real * + colcnd, real *amax, char *equed); + +/* Subroutine */ int _starpu_slaqge_(integer *m, integer *n, real *a, integer *lda, + real *r__, real *c__, real *rowcnd, real *colcnd, real *amax, char * + equed); + +/* Subroutine */ int _starpu_slaqp2_(integer *m, integer *n, integer *offset, real *a, + integer *lda, integer *jpvt, real *tau, real *vn1, real *vn2, real * + work); + +/* Subroutine */ int _starpu_slaqps_(integer *m, integer *n, integer *offset, integer + *nb, integer *kb, real *a, integer *lda, integer *jpvt, real *tau, + real *vn1, real *vn2, real *auxv, real *f, integer *ldf); + +/* Subroutine */ int _starpu_slaqr0_(logical *wantt, logical *wantz, integer *n, + integer *ilo, integer *ihi, real *h__, integer *ldh, real *wr, real * + wi, integer *iloz, integer *ihiz, real *z__, integer *ldz, real *work, + integer *lwork, integer *info); + +/* Subroutine */ int _starpu_slaqr1_(integer *n, real *h__, integer *ldh, real *sr1, + real *si1, real *sr2, real *si2, real *v); + +/* Subroutine */ int _starpu_slaqr2_(logical *wantt, logical *wantz, integer *n, + integer *ktop, integer *kbot, integer *nw, real *h__, integer *ldh, + integer *iloz, integer *ihiz, real *z__, integer *ldz, integer *ns, + integer *nd, real *sr, real *si, real *v, integer *ldv, integer *nh, + real *t, integer *ldt, integer *nv, real *wv, integer *ldwv, real * + work, integer *lwork); + +/* Subroutine */ int _starpu_slaqr3_(logical *wantt, logical *wantz, integer *n, + integer *ktop, integer *kbot, integer *nw, real *h__, integer *ldh, + integer *iloz, integer *ihiz, real *z__, integer *ldz, integer *ns, + integer *nd, real *sr, real *si, real *v, integer *ldv, integer *nh, + real *t, integer *ldt, integer *nv, real *wv, integer *ldwv, real * + work, integer *lwork); + +/* Subroutine */ int _starpu_slaqr4_(logical *wantt, logical *wantz, integer *n, + integer *ilo, integer *ihi, real *h__, integer *ldh, real *wr, real * + wi, integer *iloz, integer *ihiz, real *z__, integer *ldz, real *work, + integer *lwork, integer *info); + +/* Subroutine */ int _starpu_slaqr5_(logical *wantt, logical *wantz, integer *kacc22, + integer *n, integer *ktop, integer *kbot, integer *nshfts, real *sr, + real *si, real *h__, integer *ldh, integer *iloz, integer *ihiz, real + *z__, integer *ldz, real *v, integer *ldv, real *u, integer *ldu, + integer *nv, real *wv, integer *ldwv, integer *nh, real *wh, integer * + ldwh); + +/* Subroutine */ int _starpu_slaqsb_(char *uplo, integer *n, integer *kd, real *ab, + integer *ldab, real *s, real *scond, real *amax, char *equed); + +/* Subroutine */ int _starpu_slaqsp_(char *uplo, integer *n, real *ap, real *s, real * + scond, real *amax, char *equed); + +/* Subroutine */ int _starpu_slaqsy_(char *uplo, integer *n, real *a, integer *lda, + real *s, real *scond, real *amax, char *equed); + +/* Subroutine */ int _starpu_slaqtr_(logical *ltran, logical *lreal, integer *n, real + *t, integer *ldt, real *b, real *w, real *scale, real *x, real *work, + integer *info); + +/* Subroutine */ int _starpu_slar1v_(integer *n, integer *b1, integer *bn, real * + lambda, real *d__, real *l, real *ld, real *lld, real *pivmin, real * + gaptol, real *z__, logical *wantnc, integer *negcnt, real *ztz, real * + mingma, integer *r__, integer *isuppz, real *nrminv, real *resid, + real *rqcorr, real *work); + +/* Subroutine */ int _starpu_slar2v_(integer *n, real *x, real *y, real *z__, integer + *incx, real *c__, real *s, integer *incc); + +/* Subroutine */ int _starpu_slarf_(char *side, integer *m, integer *n, real *v, + integer *incv, real *tau, real *c__, integer *ldc, real *work); + +/* Subroutine */ int _starpu_slarfb_(char *side, char *trans, char *direct, char * + storev, integer *m, integer *n, integer *k, real *v, integer *ldv, + real *t, integer *ldt, real *c__, integer *ldc, real *work, integer * + ldwork); + +/* Subroutine */ int _starpu_slarfg_(integer *n, real *alpha, real *x, integer *incx, + real *tau); + +/* Subroutine */ int _starpu_slarfp_(integer *n, real *alpha, real *x, integer *incx, + real *tau); + +/* Subroutine */ int _starpu_slarft_(char *direct, char *storev, integer *n, integer * + k, real *v, integer *ldv, real *tau, real *t, integer *ldt); + +/* Subroutine */ int _starpu_slarfx_(char *side, integer *m, integer *n, real *v, + real *tau, real *c__, integer *ldc, real *work); + +/* Subroutine */ int _starpu_slargv_(integer *n, real *x, integer *incx, real *y, + integer *incy, real *c__, integer *incc); + +/* Subroutine */ int _starpu_slarnv_(integer *idist, integer *iseed, integer *n, real + *x); + +/* Subroutine */ int _starpu_slarra_(integer *n, real *d__, real *e, real *e2, real * + spltol, real *tnrm, integer *nsplit, integer *isplit, integer *info); + +/* Subroutine */ int _starpu_slarrb_(integer *n, real *d__, real *lld, integer * + ifirst, integer *ilast, real *rtol1, real *rtol2, integer *offset, + real *w, real *wgap, real *werr, real *work, integer *iwork, real * + pivmin, real *spdiam, integer *twist, integer *info); + +/* Subroutine */ int _starpu_slarrc_(char *jobt, integer *n, real *vl, real *vu, real + *d__, real *e, real *pivmin, integer *eigcnt, integer *lcnt, integer * + rcnt, integer *info); + +/* Subroutine */ int _starpu_slarrd_(char *range, char *order, integer *n, real *vl, + real *vu, integer *il, integer *iu, real *gers, real *reltol, real * + d__, real *e, real *e2, real *pivmin, integer *nsplit, integer * + isplit, integer *m, real *w, real *werr, real *wl, real *wu, integer * + iblock, integer *indexw, real *work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_slarre_(char *range, integer *n, real *vl, real *vu, + integer *il, integer *iu, real *d__, real *e, real *e2, real *rtol1, + real *rtol2, real *spltol, integer *nsplit, integer *isplit, integer * + m, real *w, real *werr, real *wgap, integer *iblock, integer *indexw, + real *gers, real *pivmin, real *work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_slarrf_(integer *n, real *d__, real *l, real *ld, + integer *clstrt, integer *clend, real *w, real *wgap, real *werr, + real *spdiam, real *clgapl, real *clgapr, real *pivmin, real *sigma, + real *dplus, real *lplus, real *work, integer *info); + +/* Subroutine */ int _starpu_slarrj_(integer *n, real *d__, real *e2, integer *ifirst, + integer *ilast, real *rtol, integer *offset, real *w, real *werr, + real *work, integer *iwork, real *pivmin, real *spdiam, integer *info); + +/* Subroutine */ int _starpu_slarrk_(integer *n, integer *iw, real *gl, real *gu, + real *d__, real *e2, real *pivmin, real *reltol, real *w, real *werr, + integer *info); + +/* Subroutine */ int _starpu_slarrr_(integer *n, real *d__, real *e, integer *info); + +/* Subroutine */ int _starpu_slarrv_(integer *n, real *vl, real *vu, real *d__, real * + l, real *pivmin, integer *isplit, integer *m, integer *dol, integer * + dou, real *minrgp, real *rtol1, real *rtol2, real *w, real *werr, + real *wgap, integer *iblock, integer *indexw, real *gers, real *z__, + integer *ldz, integer *isuppz, real *work, integer *iwork, integer * + info); + +/* Subroutine */ int _starpu_slarscl2_(integer *m, integer *n, real *d__, real *x, + integer *ldx); + +/* Subroutine */ int _starpu_slartg_(real *f, real *g, real *cs, real *sn, real *r__); + +/* Subroutine */ int _starpu_slartv_(integer *n, real *x, integer *incx, real *y, + integer *incy, real *c__, real *s, integer *incc); + +/* Subroutine */ int _starpu_slaruv_(integer *iseed, integer *n, real *x); + +/* Subroutine */ int _starpu_slarz_(char *side, integer *m, integer *n, integer *l, + real *v, integer *incv, real *tau, real *c__, integer *ldc, real * + work); + +/* Subroutine */ int _starpu_slarzb_(char *side, char *trans, char *direct, char * + storev, integer *m, integer *n, integer *k, integer *l, real *v, + integer *ldv, real *t, integer *ldt, real *c__, integer *ldc, real * + work, integer *ldwork); + +/* Subroutine */ int _starpu_slarzt_(char *direct, char *storev, integer *n, integer * + k, real *v, integer *ldv, real *tau, real *t, integer *ldt); + +/* Subroutine */ int _starpu_slas2_(real *f, real *g, real *h__, real *ssmin, real * + ssmax); + +/* Subroutine */ int _starpu_slascl_(char *type__, integer *kl, integer *ku, real * + cfrom, real *cto, integer *m, integer *n, real *a, integer *lda, + integer *info); + +/* Subroutine */ int _starpu_slascl2_(integer *m, integer *n, real *d__, real *x, + integer *ldx); + +/* Subroutine */ int _starpu_slasd0_(integer *n, integer *sqre, real *d__, real *e, + real *u, integer *ldu, real *vt, integer *ldvt, integer *smlsiz, + integer *iwork, real *work, integer *info); + +/* Subroutine */ int _starpu_slasd1_(integer *nl, integer *nr, integer *sqre, real * + d__, real *alpha, real *beta, real *u, integer *ldu, real *vt, + integer *ldvt, integer *idxq, integer *iwork, real *work, integer * + info); + +/* Subroutine */ int _starpu_slasd2_(integer *nl, integer *nr, integer *sqre, integer + *k, real *d__, real *z__, real *alpha, real *beta, real *u, integer * + ldu, real *vt, integer *ldvt, real *dsigma, real *u2, integer *ldu2, + real *vt2, integer *ldvt2, integer *idxp, integer *idx, integer *idxc, + integer *idxq, integer *coltyp, integer *info); + +/* Subroutine */ int _starpu_slasd3_(integer *nl, integer *nr, integer *sqre, integer + *k, real *d__, real *q, integer *ldq, real *dsigma, real *u, integer * + ldu, real *u2, integer *ldu2, real *vt, integer *ldvt, real *vt2, + integer *ldvt2, integer *idxc, integer *ctot, real *z__, integer * + info); + +/* Subroutine */ int _starpu_slasd4_(integer *n, integer *i__, real *d__, real *z__, + real *delta, real *rho, real *sigma, real *work, integer *info); + +/* Subroutine */ int _starpu_slasd5_(integer *i__, real *d__, real *z__, real *delta, + real *rho, real *dsigma, real *work); + +/* Subroutine */ int _starpu_slasd6_(integer *icompq, integer *nl, integer *nr, + integer *sqre, real *d__, real *vf, real *vl, real *alpha, real *beta, + integer *idxq, integer *perm, integer *givptr, integer *givcol, + integer *ldgcol, real *givnum, integer *ldgnum, real *poles, real * + difl, real *difr, real *z__, integer *k, real *c__, real *s, real * + work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_slasd7_(integer *icompq, integer *nl, integer *nr, + integer *sqre, integer *k, real *d__, real *z__, real *zw, real *vf, + real *vfw, real *vl, real *vlw, real *alpha, real *beta, real *dsigma, + integer *idx, integer *idxp, integer *idxq, integer *perm, integer * + givptr, integer *givcol, integer *ldgcol, real *givnum, integer * + ldgnum, real *c__, real *s, integer *info); + +/* Subroutine */ int _starpu_slasd8_(integer *icompq, integer *k, real *d__, real * + z__, real *vf, real *vl, real *difl, real *difr, integer *lddifr, + real *dsigma, real *work, integer *info); + +/* Subroutine */ int _starpu_slasda_(integer *icompq, integer *smlsiz, integer *n, + integer *sqre, real *d__, real *e, real *u, integer *ldu, real *vt, + integer *k, real *difl, real *difr, real *z__, real *poles, integer * + givptr, integer *givcol, integer *ldgcol, integer *perm, real *givnum, + real *c__, real *s, real *work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_slasdq_(char *uplo, integer *sqre, integer *n, integer * + ncvt, integer *nru, integer *ncc, real *d__, real *e, real *vt, + integer *ldvt, real *u, integer *ldu, real *c__, integer *ldc, real * + work, integer *info); + +/* Subroutine */ int _starpu_slasdt_(integer *n, integer *lvl, integer *nd, integer * + inode, integer *ndiml, integer *ndimr, integer *msub); + +/* Subroutine */ int _starpu_slaset_(char *uplo, integer *m, integer *n, real *alpha, + real *beta, real *a, integer *lda); + +/* Subroutine */ int _starpu_slasq1_(integer *n, real *d__, real *e, real *work, + integer *info); + +/* Subroutine */ int _starpu_slasq2_(integer *n, real *z__, integer *info); + +/* Subroutine */ int _starpu_slasq3_(integer *i0, integer *n0, real *z__, integer *pp, + real *dmin__, real *sigma, real *desig, real *qmax, integer *nfail, + integer *iter, integer *ndiv, logical *ieee, integer *ttype, real * + dmin1, real *dmin2, real *dn, real *dn1, real *dn2, real *g, real * + tau); + +/* Subroutine */ int _starpu_slasq4_(integer *i0, integer *n0, real *z__, integer *pp, + integer *n0in, real *dmin__, real *dmin1, real *dmin2, real *dn, + real *dn1, real *dn2, real *tau, integer *ttype, real *g); + +/* Subroutine */ int _starpu_slasq5_(integer *i0, integer *n0, real *z__, integer *pp, + real *tau, real *dmin__, real *dmin1, real *dmin2, real *dn, real * + dnm1, real *dnm2, logical *ieee); + +/* Subroutine */ int _starpu_slasq6_(integer *i0, integer *n0, real *z__, integer *pp, + real *dmin__, real *dmin1, real *dmin2, real *dn, real *dnm1, real * + dnm2); + +/* Subroutine */ int _starpu_slasr_(char *side, char *pivot, char *direct, integer *m, + integer *n, real *c__, real *s, real *a, integer *lda); + +/* Subroutine */ int _starpu_slasrt_(char *id, integer *n, real *d__, integer *info); + +/* Subroutine */ int _starpu_slassq_(integer *n, real *x, integer *incx, real *scale, + real *sumsq); + +/* Subroutine */ int _starpu_slasv2_(real *f, real *g, real *h__, real *ssmin, real * + ssmax, real *snr, real *csr, real *snl, real *csl); + +/* Subroutine */ int _starpu_slaswp_(integer *n, real *a, integer *lda, integer *k1, + integer *k2, integer *ipiv, integer *incx); + +/* Subroutine */ int _starpu_slasy2_(logical *ltranl, logical *ltranr, integer *isgn, + integer *n1, integer *n2, real *tl, integer *ldtl, real *tr, integer * + ldtr, real *b, integer *ldb, real *scale, real *x, integer *ldx, real + *xnorm, integer *info); + +/* Subroutine */ int _starpu_slasyf_(char *uplo, integer *n, integer *nb, integer *kb, + real *a, integer *lda, integer *ipiv, real *w, integer *ldw, integer + *info); + +/* Subroutine */ int _starpu_slatbs_(char *uplo, char *trans, char *diag, char * + normin, integer *n, integer *kd, real *ab, integer *ldab, real *x, + real *scale, real *cnorm, integer *info); + +/* Subroutine */ int _starpu_slatdf_(integer *ijob, integer *n, real *z__, integer * + ldz, real *rhs, real *rdsum, real *rdscal, integer *ipiv, integer * + jpiv); + +/* Subroutine */ int _starpu_slatps_(char *uplo, char *trans, char *diag, char * + normin, integer *n, real *ap, real *x, real *scale, real *cnorm, + integer *info); + +/* Subroutine */ int _starpu_slatrd_(char *uplo, integer *n, integer *nb, real *a, + integer *lda, real *e, real *tau, real *w, integer *ldw); + +/* Subroutine */ int _starpu_slatrs_(char *uplo, char *trans, char *diag, char * + normin, integer *n, real *a, integer *lda, real *x, real *scale, real + *cnorm, integer *info); + +/* Subroutine */ int _starpu_slatrz_(integer *m, integer *n, integer *l, real *a, + integer *lda, real *tau, real *work); + +/* Subroutine */ int _starpu_slatzm_(char *side, integer *m, integer *n, real *v, + integer *incv, real *tau, real *c1, real *c2, integer *ldc, real * + work); + +/* Subroutine */ int _starpu_slauu2_(char *uplo, integer *n, real *a, integer *lda, + integer *info); + +/* Subroutine */ int _starpu_slauum_(char *uplo, integer *n, real *a, integer *lda, + integer *info); + +/* Subroutine */ int _starpu_sopgtr_(char *uplo, integer *n, real *ap, real *tau, + real *q, integer *ldq, real *work, integer *info); + +/* Subroutine */ int _starpu_sopmtr_(char *side, char *uplo, char *trans, integer *m, + integer *n, real *ap, real *tau, real *c__, integer *ldc, real *work, + integer *info); + +/* Subroutine */ int _starpu_sorg2l_(integer *m, integer *n, integer *k, real *a, + integer *lda, real *tau, real *work, integer *info); + +/* Subroutine */ int _starpu_sorg2r_(integer *m, integer *n, integer *k, real *a, + integer *lda, real *tau, real *work, integer *info); + +/* Subroutine */ int _starpu_sorgbr_(char *vect, integer *m, integer *n, integer *k, + real *a, integer *lda, real *tau, real *work, integer *lwork, integer + *info); + +/* Subroutine */ int _starpu_sorghr_(integer *n, integer *ilo, integer *ihi, real *a, + integer *lda, real *tau, real *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_sorgl2_(integer *m, integer *n, integer *k, real *a, + integer *lda, real *tau, real *work, integer *info); + +/* Subroutine */ int _starpu_sorglq_(integer *m, integer *n, integer *k, real *a, + integer *lda, real *tau, real *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_sorgql_(integer *m, integer *n, integer *k, real *a, + integer *lda, real *tau, real *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_sorgqr_(integer *m, integer *n, integer *k, real *a, + integer *lda, real *tau, real *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_sorgr2_(integer *m, integer *n, integer *k, real *a, + integer *lda, real *tau, real *work, integer *info); + +/* Subroutine */ int _starpu_sorgrq_(integer *m, integer *n, integer *k, real *a, + integer *lda, real *tau, real *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_sorgtr_(char *uplo, integer *n, real *a, integer *lda, + real *tau, real *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_sorm2l_(char *side, char *trans, integer *m, integer *n, + integer *k, real *a, integer *lda, real *tau, real *c__, integer *ldc, + real *work, integer *info); + +/* Subroutine */ int _starpu_sorm2r_(char *side, char *trans, integer *m, integer *n, + integer *k, real *a, integer *lda, real *tau, real *c__, integer *ldc, + real *work, integer *info); + +/* Subroutine */ int _starpu_sormbr_(char *vect, char *side, char *trans, integer *m, + integer *n, integer *k, real *a, integer *lda, real *tau, real *c__, + integer *ldc, real *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_sormhr_(char *side, char *trans, integer *m, integer *n, + integer *ilo, integer *ihi, real *a, integer *lda, real *tau, real * + c__, integer *ldc, real *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_sorml2_(char *side, char *trans, integer *m, integer *n, + integer *k, real *a, integer *lda, real *tau, real *c__, integer *ldc, + real *work, integer *info); + +/* Subroutine */ int _starpu_sormlq_(char *side, char *trans, integer *m, integer *n, + integer *k, real *a, integer *lda, real *tau, real *c__, integer *ldc, + real *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_sormql_(char *side, char *trans, integer *m, integer *n, + integer *k, real *a, integer *lda, real *tau, real *c__, integer *ldc, + real *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_sormqr_(char *side, char *trans, integer *m, integer *n, + integer *k, real *a, integer *lda, real *tau, real *c__, integer *ldc, + real *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_sormr2_(char *side, char *trans, integer *m, integer *n, + integer *k, real *a, integer *lda, real *tau, real *c__, integer *ldc, + real *work, integer *info); + +/* Subroutine */ int _starpu_sormr3_(char *side, char *trans, integer *m, integer *n, + integer *k, integer *l, real *a, integer *lda, real *tau, real *c__, + integer *ldc, real *work, integer *info); + +/* Subroutine */ int _starpu_sormrq_(char *side, char *trans, integer *m, integer *n, + integer *k, real *a, integer *lda, real *tau, real *c__, integer *ldc, + real *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_sormrz_(char *side, char *trans, integer *m, integer *n, + integer *k, integer *l, real *a, integer *lda, real *tau, real *c__, + integer *ldc, real *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_sormtr_(char *side, char *uplo, char *trans, integer *m, + integer *n, real *a, integer *lda, real *tau, real *c__, integer *ldc, + real *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_spbcon_(char *uplo, integer *n, integer *kd, real *ab, + integer *ldab, real *anorm, real *rcond, real *work, integer *iwork, + integer *info); + +/* Subroutine */ int _starpu_spbequ_(char *uplo, integer *n, integer *kd, real *ab, + integer *ldab, real *s, real *scond, real *amax, integer *info); + +/* Subroutine */ int _starpu_spbrfs_(char *uplo, integer *n, integer *kd, integer * + nrhs, real *ab, integer *ldab, real *afb, integer *ldafb, real *b, + integer *ldb, real *x, integer *ldx, real *ferr, real *berr, real * + work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_spbstf_(char *uplo, integer *n, integer *kd, real *ab, + integer *ldab, integer *info); + +/* Subroutine */ int _starpu_spbsv_(char *uplo, integer *n, integer *kd, integer * + nrhs, real *ab, integer *ldab, real *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_spbsvx_(char *fact, char *uplo, integer *n, integer *kd, + integer *nrhs, real *ab, integer *ldab, real *afb, integer *ldafb, + char *equed, real *s, real *b, integer *ldb, real *x, integer *ldx, + real *rcond, real *ferr, real *berr, real *work, integer *iwork, + integer *info); + +/* Subroutine */ int _starpu_spbtf2_(char *uplo, integer *n, integer *kd, real *ab, + integer *ldab, integer *info); + +/* Subroutine */ int _starpu_spbtrf_(char *uplo, integer *n, integer *kd, real *ab, + integer *ldab, integer *info); + +/* Subroutine */ int _starpu_spbtrs_(char *uplo, integer *n, integer *kd, integer * + nrhs, real *ab, integer *ldab, real *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_spftrf_(char *transr, char *uplo, integer *n, real *a, + integer *info); + +/* Subroutine */ int _starpu_spftri_(char *transr, char *uplo, integer *n, real *a, + integer *info); + +/* Subroutine */ int _starpu_spftrs_(char *transr, char *uplo, integer *n, integer * + nrhs, real *a, real *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_spocon_(char *uplo, integer *n, real *a, integer *lda, + real *anorm, real *rcond, real *work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_spoequ_(integer *n, real *a, integer *lda, real *s, real + *scond, real *amax, integer *info); + +/* Subroutine */ int _starpu_spoequb_(integer *n, real *a, integer *lda, real *s, + real *scond, real *amax, integer *info); + +/* Subroutine */ int _starpu_sporfs_(char *uplo, integer *n, integer *nrhs, real *a, + integer *lda, real *af, integer *ldaf, real *b, integer *ldb, real *x, + integer *ldx, real *ferr, real *berr, real *work, integer *iwork, + integer *info); + +/* Subroutine */ int _starpu_sporfsx_(char *uplo, char *equed, integer *n, integer * + nrhs, real *a, integer *lda, real *af, integer *ldaf, real *s, real * + b, integer *ldb, real *x, integer *ldx, real *rcond, real *berr, + integer *n_err_bnds__, real *err_bnds_norm__, real *err_bnds_comp__, + integer *nparams, real *params, real *work, integer *iwork, integer * + info); + +/* Subroutine */ int _starpu_sposv_(char *uplo, integer *n, integer *nrhs, real *a, + integer *lda, real *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_sposvx_(char *fact, char *uplo, integer *n, integer * + nrhs, real *a, integer *lda, real *af, integer *ldaf, char *equed, + real *s, real *b, integer *ldb, real *x, integer *ldx, real *rcond, + real *ferr, real *berr, real *work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_sposvxx_(char *fact, char *uplo, integer *n, integer * + nrhs, real *a, integer *lda, real *af, integer *ldaf, char *equed, + real *s, real *b, integer *ldb, real *x, integer *ldx, real *rcond, + real *rpvgrw, real *berr, integer *n_err_bnds__, real * + err_bnds_norm__, real *err_bnds_comp__, integer *nparams, real * + params, real *work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_spotf2_(char *uplo, integer *n, real *a, integer *lda, + integer *info); + +/* Subroutine */ int _starpu_spotrf_(char *uplo, integer *n, real *a, integer *lda, + integer *info); + +/* Subroutine */ int _starpu_spotri_(char *uplo, integer *n, real *a, integer *lda, + integer *info); + +/* Subroutine */ int _starpu_spotrs_(char *uplo, integer *n, integer *nrhs, real *a, + integer *lda, real *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_sppcon_(char *uplo, integer *n, real *ap, real *anorm, + real *rcond, real *work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_sppequ_(char *uplo, integer *n, real *ap, real *s, real * + scond, real *amax, integer *info); + +/* Subroutine */ int _starpu_spprfs_(char *uplo, integer *n, integer *nrhs, real *ap, + real *afp, real *b, integer *ldb, real *x, integer *ldx, real *ferr, + real *berr, real *work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_sppsv_(char *uplo, integer *n, integer *nrhs, real *ap, + real *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_sppsvx_(char *fact, char *uplo, integer *n, integer * + nrhs, real *ap, real *afp, char *equed, real *s, real *b, integer * + ldb, real *x, integer *ldx, real *rcond, real *ferr, real *berr, real + *work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_spptrf_(char *uplo, integer *n, real *ap, integer *info); + +/* Subroutine */ int _starpu_spptri_(char *uplo, integer *n, real *ap, integer *info); + +/* Subroutine */ int _starpu_spptrs_(char *uplo, integer *n, integer *nrhs, real *ap, + real *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_spstf2_(char *uplo, integer *n, real *a, integer *lda, + integer *piv, integer *rank, real *tol, real *work, integer *info); + +/* Subroutine */ int _starpu_spstrf_(char *uplo, integer *n, real *a, integer *lda, + integer *piv, integer *rank, real *tol, real *work, integer *info); + +/* Subroutine */ int _starpu_sptcon_(integer *n, real *d__, real *e, real *anorm, + real *rcond, real *work, integer *info); + +/* Subroutine */ int _starpu_spteqr_(char *compz, integer *n, real *d__, real *e, + real *z__, integer *ldz, real *work, integer *info); + +/* Subroutine */ int _starpu_sptrfs_(integer *n, integer *nrhs, real *d__, real *e, + real *df, real *ef, real *b, integer *ldb, real *x, integer *ldx, + real *ferr, real *berr, real *work, integer *info); + +/* Subroutine */ int _starpu_sptsv_(integer *n, integer *nrhs, real *d__, real *e, + real *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_sptsvx_(char *fact, integer *n, integer *nrhs, real *d__, + real *e, real *df, real *ef, real *b, integer *ldb, real *x, integer + *ldx, real *rcond, real *ferr, real *berr, real *work, integer *info); + +/* Subroutine */ int _starpu_spttrf_(integer *n, real *d__, real *e, integer *info); + +/* Subroutine */ int _starpu_spttrs_(integer *n, integer *nrhs, real *d__, real *e, + real *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_sptts2_(integer *n, integer *nrhs, real *d__, real *e, + real *b, integer *ldb); + +/* Subroutine */ int _starpu_srscl_(integer *n, real *sa, real *sx, integer *incx); + +/* Subroutine */ int _starpu_ssbev_(char *jobz, char *uplo, integer *n, integer *kd, + real *ab, integer *ldab, real *w, real *z__, integer *ldz, real *work, + integer *info); + +/* Subroutine */ int _starpu_ssbevd_(char *jobz, char *uplo, integer *n, integer *kd, + real *ab, integer *ldab, real *w, real *z__, integer *ldz, real *work, + integer *lwork, integer *iwork, integer *liwork, integer *info); + +/* Subroutine */ int _starpu_ssbevx_(char *jobz, char *range, char *uplo, integer *n, + integer *kd, real *ab, integer *ldab, real *q, integer *ldq, real *vl, + real *vu, integer *il, integer *iu, real *abstol, integer *m, real * + w, real *z__, integer *ldz, real *work, integer *iwork, integer * + ifail, integer *info); + +/* Subroutine */ int _starpu_ssbgst_(char *vect, char *uplo, integer *n, integer *ka, + integer *kb, real *ab, integer *ldab, real *bb, integer *ldbb, real * + x, integer *ldx, real *work, integer *info); + +/* Subroutine */ int _starpu_ssbgv_(char *jobz, char *uplo, integer *n, integer *ka, + integer *kb, real *ab, integer *ldab, real *bb, integer *ldbb, real * + w, real *z__, integer *ldz, real *work, integer *info); + +/* Subroutine */ int _starpu_ssbgvd_(char *jobz, char *uplo, integer *n, integer *ka, + integer *kb, real *ab, integer *ldab, real *bb, integer *ldbb, real * + w, real *z__, integer *ldz, real *work, integer *lwork, integer * + iwork, integer *liwork, integer *info); + +/* Subroutine */ int _starpu_ssbgvx_(char *jobz, char *range, char *uplo, integer *n, + integer *ka, integer *kb, real *ab, integer *ldab, real *bb, integer * + ldbb, real *q, integer *ldq, real *vl, real *vu, integer *il, integer + *iu, real *abstol, integer *m, real *w, real *z__, integer *ldz, real + *work, integer *iwork, integer *ifail, integer *info); + +/* Subroutine */ int _starpu_ssbtrd_(char *vect, char *uplo, integer *n, integer *kd, + real *ab, integer *ldab, real *d__, real *e, real *q, integer *ldq, + real *work, integer *info); + +/* Subroutine */ int _starpu_ssfrk_(char *transr, char *uplo, char *trans, integer *n, + integer *k, real *alpha, real *a, integer *lda, real *beta, real * + c__); + +/* Subroutine */ int _starpu_sspcon_(char *uplo, integer *n, real *ap, integer *ipiv, + real *anorm, real *rcond, real *work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_sspev_(char *jobz, char *uplo, integer *n, real *ap, + real *w, real *z__, integer *ldz, real *work, integer *info); + +/* Subroutine */ int _starpu_sspevd_(char *jobz, char *uplo, integer *n, real *ap, + real *w, real *z__, integer *ldz, real *work, integer *lwork, integer + *iwork, integer *liwork, integer *info); + +/* Subroutine */ int _starpu_sspevx_(char *jobz, char *range, char *uplo, integer *n, + real *ap, real *vl, real *vu, integer *il, integer *iu, real *abstol, + integer *m, real *w, real *z__, integer *ldz, real *work, integer * + iwork, integer *ifail, integer *info); + +/* Subroutine */ int _starpu_sspgst_(integer *itype, char *uplo, integer *n, real *ap, + real *bp, integer *info); + +/* Subroutine */ int _starpu_sspgv_(integer *itype, char *jobz, char *uplo, integer * + n, real *ap, real *bp, real *w, real *z__, integer *ldz, real *work, + integer *info); + +/* Subroutine */ int _starpu_sspgvd_(integer *itype, char *jobz, char *uplo, integer * + n, real *ap, real *bp, real *w, real *z__, integer *ldz, real *work, + integer *lwork, integer *iwork, integer *liwork, integer *info); + +/* Subroutine */ int _starpu_sspgvx_(integer *itype, char *jobz, char *range, char * + uplo, integer *n, real *ap, real *bp, real *vl, real *vu, integer *il, + integer *iu, real *abstol, integer *m, real *w, real *z__, integer * + ldz, real *work, integer *iwork, integer *ifail, integer *info); + +/* Subroutine */ int _starpu_ssprfs_(char *uplo, integer *n, integer *nrhs, real *ap, + real *afp, integer *ipiv, real *b, integer *ldb, real *x, integer * + ldx, real *ferr, real *berr, real *work, integer *iwork, integer * + info); + +/* Subroutine */ int _starpu_sspsv_(char *uplo, integer *n, integer *nrhs, real *ap, + integer *ipiv, real *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_sspsvx_(char *fact, char *uplo, integer *n, integer * + nrhs, real *ap, real *afp, integer *ipiv, real *b, integer *ldb, real + *x, integer *ldx, real *rcond, real *ferr, real *berr, real *work, + integer *iwork, integer *info); + +/* Subroutine */ int _starpu_ssptrd_(char *uplo, integer *n, real *ap, real *d__, + real *e, real *tau, integer *info); + +/* Subroutine */ int _starpu_ssptrf_(char *uplo, integer *n, real *ap, integer *ipiv, + integer *info); + +/* Subroutine */ int _starpu_ssptri_(char *uplo, integer *n, real *ap, integer *ipiv, + real *work, integer *info); + +/* Subroutine */ int _starpu_ssptrs_(char *uplo, integer *n, integer *nrhs, real *ap, + integer *ipiv, real *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_sstebz_(char *range, char *order, integer *n, real *vl, + real *vu, integer *il, integer *iu, real *abstol, real *d__, real *e, + integer *m, integer *nsplit, real *w, integer *iblock, integer * + isplit, real *work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_sstedc_(char *compz, integer *n, real *d__, real *e, + real *z__, integer *ldz, real *work, integer *lwork, integer *iwork, + integer *liwork, integer *info); + +/* Subroutine */ int _starpu_sstegr_(char *jobz, char *range, integer *n, real *d__, + real *e, real *vl, real *vu, integer *il, integer *iu, real *abstol, + integer *m, real *w, real *z__, integer *ldz, integer *isuppz, real * + work, integer *lwork, integer *iwork, integer *liwork, integer *info); + +/* Subroutine */ int _starpu_sstein_(integer *n, real *d__, real *e, integer *m, real + *w, integer *iblock, integer *isplit, real *z__, integer *ldz, real * + work, integer *iwork, integer *ifail, integer *info); + +/* Subroutine */ int _starpu_sstemr_(char *jobz, char *range, integer *n, real *d__, + real *e, real *vl, real *vu, integer *il, integer *iu, integer *m, + real *w, real *z__, integer *ldz, integer *nzc, integer *isuppz, + logical *tryrac, real *work, integer *lwork, integer *iwork, integer * + liwork, integer *info); + +/* Subroutine */ int _starpu_ssteqr_(char *compz, integer *n, real *d__, real *e, + real *z__, integer *ldz, real *work, integer *info); + +/* Subroutine */ int _starpu_ssterf_(integer *n, real *d__, real *e, integer *info); + +/* Subroutine */ int _starpu_sstev_(char *jobz, integer *n, real *d__, real *e, real * + z__, integer *ldz, real *work, integer *info); + +/* Subroutine */ int _starpu_sstevd_(char *jobz, integer *n, real *d__, real *e, real + *z__, integer *ldz, real *work, integer *lwork, integer *iwork, + integer *liwork, integer *info); + +/* Subroutine */ int _starpu_sstevr_(char *jobz, char *range, integer *n, real *d__, + real *e, real *vl, real *vu, integer *il, integer *iu, real *abstol, + integer *m, real *w, real *z__, integer *ldz, integer *isuppz, real * + work, integer *lwork, integer *iwork, integer *liwork, integer *info); + +/* Subroutine */ int _starpu_sstevx_(char *jobz, char *range, integer *n, real *d__, + real *e, real *vl, real *vu, integer *il, integer *iu, real *abstol, + integer *m, real *w, real *z__, integer *ldz, real *work, integer * + iwork, integer *ifail, integer *info); + +/* Subroutine */ int _starpu_ssycon_(char *uplo, integer *n, real *a, integer *lda, + integer *ipiv, real *anorm, real *rcond, real *work, integer *iwork, + integer *info); + +/* Subroutine */ int _starpu_ssyequb_(char *uplo, integer *n, real *a, integer *lda, + real *s, real *scond, real *amax, real *work, integer *info); + +/* Subroutine */ int _starpu_ssyev_(char *jobz, char *uplo, integer *n, real *a, + integer *lda, real *w, real *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_ssyevd_(char *jobz, char *uplo, integer *n, real *a, + integer *lda, real *w, real *work, integer *lwork, integer *iwork, + integer *liwork, integer *info); + +/* Subroutine */ int _starpu_ssyevr_(char *jobz, char *range, char *uplo, integer *n, + real *a, integer *lda, real *vl, real *vu, integer *il, integer *iu, + real *abstol, integer *m, real *w, real *z__, integer *ldz, integer * + isuppz, real *work, integer *lwork, integer *iwork, integer *liwork, + integer *info); + +/* Subroutine */ int _starpu_ssyevx_(char *jobz, char *range, char *uplo, integer *n, + real *a, integer *lda, real *vl, real *vu, integer *il, integer *iu, + real *abstol, integer *m, real *w, real *z__, integer *ldz, real * + work, integer *lwork, integer *iwork, integer *ifail, integer *info); + +/* Subroutine */ int _starpu_ssygs2_(integer *itype, char *uplo, integer *n, real *a, + integer *lda, real *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_ssygst_(integer *itype, char *uplo, integer *n, real *a, + integer *lda, real *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_ssygv_(integer *itype, char *jobz, char *uplo, integer * + n, real *a, integer *lda, real *b, integer *ldb, real *w, real *work, + integer *lwork, integer *info); + +/* Subroutine */ int _starpu_ssygvd_(integer *itype, char *jobz, char *uplo, integer * + n, real *a, integer *lda, real *b, integer *ldb, real *w, real *work, + integer *lwork, integer *iwork, integer *liwork, integer *info); + +/* Subroutine */ int _starpu_ssygvx_(integer *itype, char *jobz, char *range, char * + uplo, integer *n, real *a, integer *lda, real *b, integer *ldb, real * + vl, real *vu, integer *il, integer *iu, real *abstol, integer *m, + real *w, real *z__, integer *ldz, real *work, integer *lwork, integer + *iwork, integer *ifail, integer *info); + +/* Subroutine */ int _starpu_ssyrfs_(char *uplo, integer *n, integer *nrhs, real *a, + integer *lda, real *af, integer *ldaf, integer *ipiv, real *b, + integer *ldb, real *x, integer *ldx, real *ferr, real *berr, real * + work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_ssyrfsx_(char *uplo, char *equed, integer *n, integer * + nrhs, real *a, integer *lda, real *af, integer *ldaf, integer *ipiv, + real *s, real *b, integer *ldb, real *x, integer *ldx, real *rcond, + real *berr, integer *n_err_bnds__, real *err_bnds_norm__, real * + err_bnds_comp__, integer *nparams, real *params, real *work, integer * + iwork, integer *info); + +/* Subroutine */ int _starpu_ssysv_(char *uplo, integer *n, integer *nrhs, real *a, + integer *lda, integer *ipiv, real *b, integer *ldb, real *work, + integer *lwork, integer *info); + +/* Subroutine */ int _starpu_ssysvx_(char *fact, char *uplo, integer *n, integer * + nrhs, real *a, integer *lda, real *af, integer *ldaf, integer *ipiv, + real *b, integer *ldb, real *x, integer *ldx, real *rcond, real *ferr, + real *berr, real *work, integer *lwork, integer *iwork, integer * + info); + +/* Subroutine */ int _starpu_ssysvxx_(char *fact, char *uplo, integer *n, integer * + nrhs, real *a, integer *lda, real *af, integer *ldaf, integer *ipiv, + char *equed, real *s, real *b, integer *ldb, real *x, integer *ldx, + real *rcond, real *rpvgrw, real *berr, integer *n_err_bnds__, real * + err_bnds_norm__, real *err_bnds_comp__, integer *nparams, real * + params, real *work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_ssytd2_(char *uplo, integer *n, real *a, integer *lda, + real *d__, real *e, real *tau, integer *info); + +/* Subroutine */ int _starpu_ssytf2_(char *uplo, integer *n, real *a, integer *lda, + integer *ipiv, integer *info); + +/* Subroutine */ int _starpu_ssytrd_(char *uplo, integer *n, real *a, integer *lda, + real *d__, real *e, real *tau, real *work, integer *lwork, integer * + info); + +/* Subroutine */ int _starpu_ssytrf_(char *uplo, integer *n, real *a, integer *lda, + integer *ipiv, real *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_ssytri_(char *uplo, integer *n, real *a, integer *lda, + integer *ipiv, real *work, integer *info); + +/* Subroutine */ int _starpu_ssytrs_(char *uplo, integer *n, integer *nrhs, real *a, + integer *lda, integer *ipiv, real *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_stbcon_(char *norm, char *uplo, char *diag, integer *n, + integer *kd, real *ab, integer *ldab, real *rcond, real *work, + integer *iwork, integer *info); + +/* Subroutine */ int _starpu_stbrfs_(char *uplo, char *trans, char *diag, integer *n, + integer *kd, integer *nrhs, real *ab, integer *ldab, real *b, integer + *ldb, real *x, integer *ldx, real *ferr, real *berr, real *work, + integer *iwork, integer *info); + +/* Subroutine */ int _starpu_stbtrs_(char *uplo, char *trans, char *diag, integer *n, + integer *kd, integer *nrhs, real *ab, integer *ldab, real *b, integer + *ldb, integer *info); + +/* Subroutine */ int _starpu_stfsm_(char *transr, char *side, char *uplo, char *trans, + char *diag, integer *m, integer *n, real *alpha, real *a, real *b, + integer *ldb); + +/* Subroutine */ int _starpu_stftri_(char *transr, char *uplo, char *diag, integer *n, + real *a, integer *info); + +/* Subroutine */ int _starpu_stfttp_(char *transr, char *uplo, integer *n, real *arf, + real *ap, integer *info); + +/* Subroutine */ int _starpu_stfttr_(char *transr, char *uplo, integer *n, real *arf, + real *a, integer *lda, integer *info); + +/* Subroutine */ int _starpu_stgevc_(char *side, char *howmny, logical *select, + integer *n, real *s, integer *lds, real *p, integer *ldp, real *vl, + integer *ldvl, real *vr, integer *ldvr, integer *mm, integer *m, real + *work, integer *info); + +/* Subroutine */ int _starpu_stgex2_(logical *wantq, logical *wantz, integer *n, real + *a, integer *lda, real *b, integer *ldb, real *q, integer *ldq, real * + z__, integer *ldz, integer *j1, integer *n1, integer *n2, real *work, + integer *lwork, integer *info); + +/* Subroutine */ int _starpu_stgexc_(logical *wantq, logical *wantz, integer *n, real + *a, integer *lda, real *b, integer *ldb, real *q, integer *ldq, real * + z__, integer *ldz, integer *ifst, integer *ilst, real *work, integer * + lwork, integer *info); + +/* Subroutine */ int _starpu_stgsen_(integer *ijob, logical *wantq, logical *wantz, + logical *select, integer *n, real *a, integer *lda, real *b, integer * + ldb, real *alphar, real *alphai, real *beta, real *q, integer *ldq, + real *z__, integer *ldz, integer *m, real *pl, real *pr, real *dif, + real *work, integer *lwork, integer *iwork, integer *liwork, integer * + info); + +/* Subroutine */ int _starpu_stgsja_(char *jobu, char *jobv, char *jobq, integer *m, + integer *p, integer *n, integer *k, integer *l, real *a, integer *lda, + real *b, integer *ldb, real *tola, real *tolb, real *alpha, real * + beta, real *u, integer *ldu, real *v, integer *ldv, real *q, integer * + ldq, real *work, integer *ncycle, integer *info); + +/* Subroutine */ int _starpu_stgsna_(char *job, char *howmny, logical *select, + integer *n, real *a, integer *lda, real *b, integer *ldb, real *vl, + integer *ldvl, real *vr, integer *ldvr, real *s, real *dif, integer * + mm, integer *m, real *work, integer *lwork, integer *iwork, integer * + info); + +/* Subroutine */ int _starpu_stgsy2_(char *trans, integer *ijob, integer *m, integer * + n, real *a, integer *lda, real *b, integer *ldb, real *c__, integer * + ldc, real *d__, integer *ldd, real *e, integer *lde, real *f, integer + *ldf, real *scale, real *rdsum, real *rdscal, integer *iwork, integer + *pq, integer *info); + +/* Subroutine */ int _starpu_stgsyl_(char *trans, integer *ijob, integer *m, integer * + n, real *a, integer *lda, real *b, integer *ldb, real *c__, integer * + ldc, real *d__, integer *ldd, real *e, integer *lde, real *f, integer + *ldf, real *scale, real *dif, real *work, integer *lwork, integer * + iwork, integer *info); + +/* Subroutine */ int _starpu_stpcon_(char *norm, char *uplo, char *diag, integer *n, + real *ap, real *rcond, real *work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_stprfs_(char *uplo, char *trans, char *diag, integer *n, + integer *nrhs, real *ap, real *b, integer *ldb, real *x, integer *ldx, + real *ferr, real *berr, real *work, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_stptri_(char *uplo, char *diag, integer *n, real *ap, + integer *info); + +/* Subroutine */ int _starpu_stptrs_(char *uplo, char *trans, char *diag, integer *n, + integer *nrhs, real *ap, real *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_stpttf_(char *transr, char *uplo, integer *n, real *ap, + real *arf, integer *info); + +/* Subroutine */ int _starpu_stpttr_(char *uplo, integer *n, real *ap, real *a, + integer *lda, integer *info); + +/* Subroutine */ int _starpu_strcon_(char *norm, char *uplo, char *diag, integer *n, + real *a, integer *lda, real *rcond, real *work, integer *iwork, + integer *info); + +/* Subroutine */ int _starpu_strevc_(char *side, char *howmny, logical *select, + integer *n, real *t, integer *ldt, real *vl, integer *ldvl, real *vr, + integer *ldvr, integer *mm, integer *m, real *work, integer *info); + +/* Subroutine */ int _starpu_strexc_(char *compq, integer *n, real *t, integer *ldt, + real *q, integer *ldq, integer *ifst, integer *ilst, real *work, + integer *info); + +/* Subroutine */ int _starpu_strrfs_(char *uplo, char *trans, char *diag, integer *n, + integer *nrhs, real *a, integer *lda, real *b, integer *ldb, real *x, + integer *ldx, real *ferr, real *berr, real *work, integer *iwork, + integer *info); + +/* Subroutine */ int _starpu_strsen_(char *job, char *compq, logical *select, integer + *n, real *t, integer *ldt, real *q, integer *ldq, real *wr, real *wi, + integer *m, real *s, real *sep, real *work, integer *lwork, integer * + iwork, integer *liwork, integer *info); + +/* Subroutine */ int _starpu_strsna_(char *job, char *howmny, logical *select, + integer *n, real *t, integer *ldt, real *vl, integer *ldvl, real *vr, + integer *ldvr, real *s, real *sep, integer *mm, integer *m, real * + work, integer *ldwork, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_strsyl_(char *trana, char *tranb, integer *isgn, integer + *m, integer *n, real *a, integer *lda, real *b, integer *ldb, real * + c__, integer *ldc, real *scale, integer *info); + +/* Subroutine */ int _starpu_strti2_(char *uplo, char *diag, integer *n, real *a, + integer *lda, integer *info); + +/* Subroutine */ int _starpu_strtri_(char *uplo, char *diag, integer *n, real *a, + integer *lda, integer *info); + +/* Subroutine */ int _starpu_strtrs_(char *uplo, char *trans, char *diag, integer *n, + integer *nrhs, real *a, integer *lda, real *b, integer *ldb, integer * + info); + +/* Subroutine */ int _starpu_strttf_(char *transr, char *uplo, integer *n, real *a, + integer *lda, real *arf, integer *info); + +/* Subroutine */ int _starpu_strttp_(char *uplo, integer *n, real *a, integer *lda, + real *ap, integer *info); + +/* Subroutine */ int _starpu_stzrqf_(integer *m, integer *n, real *a, integer *lda, + real *tau, integer *info); + +/* Subroutine */ int _starpu_stzrzf_(integer *m, integer *n, real *a, integer *lda, + real *tau, real *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_xerbla_(char *srname, integer *info); + +/* Subroutine */ int _starpu_xerbla_array__(char *srname_array__, integer * + srname_len__, integer *info, ftnlen srname_array_len); + +/* Subroutine */ int _starpu_zbdsqr_(char *uplo, integer *n, integer *ncvt, integer * + nru, integer *ncc, doublereal *d__, doublereal *e, doublecomplex *vt, + integer *ldvt, doublecomplex *u, integer *ldu, doublecomplex *c__, + integer *ldc, doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_zcgesv_(integer *n, integer *nrhs, doublecomplex *a, + integer *lda, integer *ipiv, doublecomplex *b, integer *ldb, + doublecomplex *x, integer *ldx, doublecomplex *work, complex *swork, + doublereal *rwork, integer *iter, integer *info); + +/* Subroutine */ int _starpu_zcposv_(char *uplo, integer *n, integer *nrhs, + doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, + doublecomplex *x, integer *ldx, doublecomplex *work, complex *swork, + doublereal *rwork, integer *iter, integer *info); + +/* Subroutine */ int _starpu_zdrscl_(integer *n, doublereal *sa, doublecomplex *sx, + integer *incx); + +/* Subroutine */ int _starpu_zgbbrd_(char *vect, integer *m, integer *n, integer *ncc, + integer *kl, integer *ku, doublecomplex *ab, integer *ldab, + doublereal *d__, doublereal *e, doublecomplex *q, integer *ldq, + doublecomplex *pt, integer *ldpt, doublecomplex *c__, integer *ldc, + doublecomplex *work, doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_zgbcon_(char *norm, integer *n, integer *kl, integer *ku, + doublecomplex *ab, integer *ldab, integer *ipiv, doublereal *anorm, + doublereal *rcond, doublecomplex *work, doublereal *rwork, integer * + info); + +/* Subroutine */ int _starpu_zgbequ_(integer *m, integer *n, integer *kl, integer *ku, + doublecomplex *ab, integer *ldab, doublereal *r__, doublereal *c__, + doublereal *rowcnd, doublereal *colcnd, doublereal *amax, integer * + info); + +/* Subroutine */ int _starpu_zgbequb_(integer *m, integer *n, integer *kl, integer * + ku, doublecomplex *ab, integer *ldab, doublereal *r__, doublereal * + c__, doublereal *rowcnd, doublereal *colcnd, doublereal *amax, + integer *info); + +/* Subroutine */ int _starpu_zgbrfs_(char *trans, integer *n, integer *kl, integer * + ku, integer *nrhs, doublecomplex *ab, integer *ldab, doublecomplex * + afb, integer *ldafb, integer *ipiv, doublecomplex *b, integer *ldb, + doublecomplex *x, integer *ldx, doublereal *ferr, doublereal *berr, + doublecomplex *work, doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_zgbrfsx_(char *trans, char *equed, integer *n, integer * + kl, integer *ku, integer *nrhs, doublecomplex *ab, integer *ldab, + doublecomplex *afb, integer *ldafb, integer *ipiv, doublereal *r__, + doublereal *c__, doublecomplex *b, integer *ldb, doublecomplex *x, + integer *ldx, doublereal *rcond, doublereal *berr, integer * + n_err_bnds__, doublereal *err_bnds_norm__, doublereal * + err_bnds_comp__, integer *nparams, doublereal *params, doublecomplex * + work, doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_zgbsv_(integer *n, integer *kl, integer *ku, integer * + nrhs, doublecomplex *ab, integer *ldab, integer *ipiv, doublecomplex * + b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_zgbsvx_(char *fact, char *trans, integer *n, integer *kl, + integer *ku, integer *nrhs, doublecomplex *ab, integer *ldab, + doublecomplex *afb, integer *ldafb, integer *ipiv, char *equed, + doublereal *r__, doublereal *c__, doublecomplex *b, integer *ldb, + doublecomplex *x, integer *ldx, doublereal *rcond, doublereal *ferr, + doublereal *berr, doublecomplex *work, doublereal *rwork, integer * + info); + +/* Subroutine */ int _starpu_zgbsvxx_(char *fact, char *trans, integer *n, integer * + kl, integer *ku, integer *nrhs, doublecomplex *ab, integer *ldab, + doublecomplex *afb, integer *ldafb, integer *ipiv, char *equed, + doublereal *r__, doublereal *c__, doublecomplex *b, integer *ldb, + doublecomplex *x, integer *ldx, doublereal *rcond, doublereal *rpvgrw, + doublereal *berr, integer *n_err_bnds__, doublereal *err_bnds_norm__, + doublereal *err_bnds_comp__, integer *nparams, doublereal *params, + doublecomplex *work, doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_zgbtf2_(integer *m, integer *n, integer *kl, integer *ku, + doublecomplex *ab, integer *ldab, integer *ipiv, integer *info); + +/* Subroutine */ int _starpu_zgbtrf_(integer *m, integer *n, integer *kl, integer *ku, + doublecomplex *ab, integer *ldab, integer *ipiv, integer *info); + +/* Subroutine */ int _starpu_zgbtrs_(char *trans, integer *n, integer *kl, integer * + ku, integer *nrhs, doublecomplex *ab, integer *ldab, integer *ipiv, + doublecomplex *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_zgebak_(char *job, char *side, integer *n, integer *ilo, + integer *ihi, doublereal *scale, integer *m, doublecomplex *v, + integer *ldv, integer *info); + +/* Subroutine */ int _starpu_zgebal_(char *job, integer *n, doublecomplex *a, integer + *lda, integer *ilo, integer *ihi, doublereal *scale, integer *info); + +/* Subroutine */ int _starpu_zgebd2_(integer *m, integer *n, doublecomplex *a, + integer *lda, doublereal *d__, doublereal *e, doublecomplex *tauq, + doublecomplex *taup, doublecomplex *work, integer *info); + +/* Subroutine */ int _starpu_zgebrd_(integer *m, integer *n, doublecomplex *a, + integer *lda, doublereal *d__, doublereal *e, doublecomplex *tauq, + doublecomplex *taup, doublecomplex *work, integer *lwork, integer * + info); + +/* Subroutine */ int _starpu_zgecon_(char *norm, integer *n, doublecomplex *a, + integer *lda, doublereal *anorm, doublereal *rcond, doublecomplex * + work, doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_zgeequ_(integer *m, integer *n, doublecomplex *a, + integer *lda, doublereal *r__, doublereal *c__, doublereal *rowcnd, + doublereal *colcnd, doublereal *amax, integer *info); + +/* Subroutine */ int _starpu_zgeequb_(integer *m, integer *n, doublecomplex *a, + integer *lda, doublereal *r__, doublereal *c__, doublereal *rowcnd, + doublereal *colcnd, doublereal *amax, integer *info); + +/* Subroutine */ int _starpu_zgees_(char *jobvs, char *sort, L_fp select, integer *n, + doublecomplex *a, integer *lda, integer *sdim, doublecomplex *w, + doublecomplex *vs, integer *ldvs, doublecomplex *work, integer *lwork, + doublereal *rwork, logical *bwork, integer *info); + +/* Subroutine */ int _starpu_zgeesx_(char *jobvs, char *sort, L_fp select, char * + sense, integer *n, doublecomplex *a, integer *lda, integer *sdim, + doublecomplex *w, doublecomplex *vs, integer *ldvs, doublereal * + rconde, doublereal *rcondv, doublecomplex *work, integer *lwork, + doublereal *rwork, logical *bwork, integer *info); + +/* Subroutine */ int _starpu_zgeev_(char *jobvl, char *jobvr, integer *n, + doublecomplex *a, integer *lda, doublecomplex *w, doublecomplex *vl, + integer *ldvl, doublecomplex *vr, integer *ldvr, doublecomplex *work, + integer *lwork, doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_zgeevx_(char *balanc, char *jobvl, char *jobvr, char * + sense, integer *n, doublecomplex *a, integer *lda, doublecomplex *w, + doublecomplex *vl, integer *ldvl, doublecomplex *vr, integer *ldvr, + integer *ilo, integer *ihi, doublereal *scale, doublereal *abnrm, + doublereal *rconde, doublereal *rcondv, doublecomplex *work, integer * + lwork, doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_zgegs_(char *jobvsl, char *jobvsr, integer *n, + doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, + doublecomplex *alpha, doublecomplex *beta, doublecomplex *vsl, + integer *ldvsl, doublecomplex *vsr, integer *ldvsr, doublecomplex * + work, integer *lwork, doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_zgegv_(char *jobvl, char *jobvr, integer *n, + doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, + doublecomplex *alpha, doublecomplex *beta, doublecomplex *vl, integer + *ldvl, doublecomplex *vr, integer *ldvr, doublecomplex *work, integer + *lwork, doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_zgehd2_(integer *n, integer *ilo, integer *ihi, + doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex * + work, integer *info); + +/* Subroutine */ int _starpu_zgehrd_(integer *n, integer *ilo, integer *ihi, + doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex * + work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_zgelq2_(integer *m, integer *n, doublecomplex *a, + integer *lda, doublecomplex *tau, doublecomplex *work, integer *info); + +/* Subroutine */ int _starpu_zgelqf_(integer *m, integer *n, doublecomplex *a, + integer *lda, doublecomplex *tau, doublecomplex *work, integer *lwork, + integer *info); + +/* Subroutine */ int _starpu_zgels_(char *trans, integer *m, integer *n, integer * + nrhs, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, + doublecomplex *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_zgelsd_(integer *m, integer *n, integer *nrhs, + doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, + doublereal *s, doublereal *rcond, integer *rank, doublecomplex *work, + integer *lwork, doublereal *rwork, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_zgelss_(integer *m, integer *n, integer *nrhs, + doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, + doublereal *s, doublereal *rcond, integer *rank, doublecomplex *work, + integer *lwork, doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_zgelsx_(integer *m, integer *n, integer *nrhs, + doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, + integer *jpvt, doublereal *rcond, integer *rank, doublecomplex *work, + doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_zgelsy_(integer *m, integer *n, integer *nrhs, + doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, + integer *jpvt, doublereal *rcond, integer *rank, doublecomplex *work, + integer *lwork, doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_zgeql2_(integer *m, integer *n, doublecomplex *a, + integer *lda, doublecomplex *tau, doublecomplex *work, integer *info); + +/* Subroutine */ int _starpu_zgeqlf_(integer *m, integer *n, doublecomplex *a, + integer *lda, doublecomplex *tau, doublecomplex *work, integer *lwork, + integer *info); + +/* Subroutine */ int _starpu_zgeqp3_(integer *m, integer *n, doublecomplex *a, + integer *lda, integer *jpvt, doublecomplex *tau, doublecomplex *work, + integer *lwork, doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_zgeqpf_(integer *m, integer *n, doublecomplex *a, + integer *lda, integer *jpvt, doublecomplex *tau, doublecomplex *work, + doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_zgeqr2_(integer *m, integer *n, doublecomplex *a, + integer *lda, doublecomplex *tau, doublecomplex *work, integer *info); + +/* Subroutine */ int _starpu_zgeqrf_(integer *m, integer *n, doublecomplex *a, + integer *lda, doublecomplex *tau, doublecomplex *work, integer *lwork, + integer *info); + +/* Subroutine */ int _starpu_zgerfs_(char *trans, integer *n, integer *nrhs, + doublecomplex *a, integer *lda, doublecomplex *af, integer *ldaf, + integer *ipiv, doublecomplex *b, integer *ldb, doublecomplex *x, + integer *ldx, doublereal *ferr, doublereal *berr, doublecomplex *work, + doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_zgerfsx_(char *trans, char *equed, integer *n, integer * + nrhs, doublecomplex *a, integer *lda, doublecomplex *af, integer * + ldaf, integer *ipiv, doublereal *r__, doublereal *c__, doublecomplex * + b, integer *ldb, doublecomplex *x, integer *ldx, doublereal *rcond, + doublereal *berr, integer *n_err_bnds__, doublereal *err_bnds_norm__, + doublereal *err_bnds_comp__, integer *nparams, doublereal *params, + doublecomplex *work, doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_zgerq2_(integer *m, integer *n, doublecomplex *a, + integer *lda, doublecomplex *tau, doublecomplex *work, integer *info); + +/* Subroutine */ int _starpu_zgerqf_(integer *m, integer *n, doublecomplex *a, + integer *lda, doublecomplex *tau, doublecomplex *work, integer *lwork, + integer *info); + +/* Subroutine */ int _starpu_zgesc2_(integer *n, doublecomplex *a, integer *lda, + doublecomplex *rhs, integer *ipiv, integer *jpiv, doublereal *scale); + +/* Subroutine */ int _starpu_zgesdd_(char *jobz, integer *m, integer *n, + doublecomplex *a, integer *lda, doublereal *s, doublecomplex *u, + integer *ldu, doublecomplex *vt, integer *ldvt, doublecomplex *work, + integer *lwork, doublereal *rwork, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_zgesv_(integer *n, integer *nrhs, doublecomplex *a, + integer *lda, integer *ipiv, doublecomplex *b, integer *ldb, integer * + info); + +/* Subroutine */ int _starpu_zgesvd_(char *jobu, char *jobvt, integer *m, integer *n, + doublecomplex *a, integer *lda, doublereal *s, doublecomplex *u, + integer *ldu, doublecomplex *vt, integer *ldvt, doublecomplex *work, + integer *lwork, doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_zgesvx_(char *fact, char *trans, integer *n, integer * + nrhs, doublecomplex *a, integer *lda, doublecomplex *af, integer * + ldaf, integer *ipiv, char *equed, doublereal *r__, doublereal *c__, + doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, + doublereal *rcond, doublereal *ferr, doublereal *berr, doublecomplex * + work, doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_zgesvxx_(char *fact, char *trans, integer *n, integer * + nrhs, doublecomplex *a, integer *lda, doublecomplex *af, integer * + ldaf, integer *ipiv, char *equed, doublereal *r__, doublereal *c__, + doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, + doublereal *rcond, doublereal *rpvgrw, doublereal *berr, integer * + n_err_bnds__, doublereal *err_bnds_norm__, doublereal * + err_bnds_comp__, integer *nparams, doublereal *params, doublecomplex * + work, doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_zgetc2_(integer *n, doublecomplex *a, integer *lda, + integer *ipiv, integer *jpiv, integer *info); + +/* Subroutine */ int _starpu_zgetf2_(integer *m, integer *n, doublecomplex *a, + integer *lda, integer *ipiv, integer *info); + +/* Subroutine */ int _starpu_zgetrf_(integer *m, integer *n, doublecomplex *a, + integer *lda, integer *ipiv, integer *info); + +/* Subroutine */ int _starpu_zgetri_(integer *n, doublecomplex *a, integer *lda, + integer *ipiv, doublecomplex *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_zgetrs_(char *trans, integer *n, integer *nrhs, + doublecomplex *a, integer *lda, integer *ipiv, doublecomplex *b, + integer *ldb, integer *info); + +/* Subroutine */ int _starpu_zggbak_(char *job, char *side, integer *n, integer *ilo, + integer *ihi, doublereal *lscale, doublereal *rscale, integer *m, + doublecomplex *v, integer *ldv, integer *info); + +/* Subroutine */ int _starpu_zggbal_(char *job, integer *n, doublecomplex *a, integer + *lda, doublecomplex *b, integer *ldb, integer *ilo, integer *ihi, + doublereal *lscale, doublereal *rscale, doublereal *work, integer * + info); + +/* Subroutine */ int _starpu_zgges_(char *jobvsl, char *jobvsr, char *sort, L_fp + selctg, integer *n, doublecomplex *a, integer *lda, doublecomplex *b, + integer *ldb, integer *sdim, doublecomplex *alpha, doublecomplex * + beta, doublecomplex *vsl, integer *ldvsl, doublecomplex *vsr, integer + *ldvsr, doublecomplex *work, integer *lwork, doublereal *rwork, + logical *bwork, integer *info); + +/* Subroutine */ int _starpu_zggesx_(char *jobvsl, char *jobvsr, char *sort, L_fp + selctg, char *sense, integer *n, doublecomplex *a, integer *lda, + doublecomplex *b, integer *ldb, integer *sdim, doublecomplex *alpha, + doublecomplex *beta, doublecomplex *vsl, integer *ldvsl, + doublecomplex *vsr, integer *ldvsr, doublereal *rconde, doublereal * + rcondv, doublecomplex *work, integer *lwork, doublereal *rwork, + integer *iwork, integer *liwork, logical *bwork, integer *info); + +/* Subroutine */ int _starpu_zggev_(char *jobvl, char *jobvr, integer *n, + doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, + doublecomplex *alpha, doublecomplex *beta, doublecomplex *vl, integer + *ldvl, doublecomplex *vr, integer *ldvr, doublecomplex *work, integer + *lwork, doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_zggevx_(char *balanc, char *jobvl, char *jobvr, char * + sense, integer *n, doublecomplex *a, integer *lda, doublecomplex *b, + integer *ldb, doublecomplex *alpha, doublecomplex *beta, + doublecomplex *vl, integer *ldvl, doublecomplex *vr, integer *ldvr, + integer *ilo, integer *ihi, doublereal *lscale, doublereal *rscale, + doublereal *abnrm, doublereal *bbnrm, doublereal *rconde, doublereal * + rcondv, doublecomplex *work, integer *lwork, doublereal *rwork, + integer *iwork, logical *bwork, integer *info); + +/* Subroutine */ int _starpu_zggglm_(integer *n, integer *m, integer *p, + doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, + doublecomplex *d__, doublecomplex *x, doublecomplex *y, doublecomplex + *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_zgghrd_(char *compq, char *compz, integer *n, integer * + ilo, integer *ihi, doublecomplex *a, integer *lda, doublecomplex *b, + integer *ldb, doublecomplex *q, integer *ldq, doublecomplex *z__, + integer *ldz, integer *info); + +/* Subroutine */ int _starpu_zgglse_(integer *m, integer *n, integer *p, + doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, + doublecomplex *c__, doublecomplex *d__, doublecomplex *x, + doublecomplex *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_zggqrf_(integer *n, integer *m, integer *p, + doublecomplex *a, integer *lda, doublecomplex *taua, doublecomplex *b, + integer *ldb, doublecomplex *taub, doublecomplex *work, integer * + lwork, integer *info); + +/* Subroutine */ int _starpu_zggrqf_(integer *m, integer *p, integer *n, + doublecomplex *a, integer *lda, doublecomplex *taua, doublecomplex *b, + integer *ldb, doublecomplex *taub, doublecomplex *work, integer * + lwork, integer *info); + +/* Subroutine */ int _starpu_zggsvd_(char *jobu, char *jobv, char *jobq, integer *m, + integer *n, integer *p, integer *k, integer *l, doublecomplex *a, + integer *lda, doublecomplex *b, integer *ldb, doublereal *alpha, + doublereal *beta, doublecomplex *u, integer *ldu, doublecomplex *v, + integer *ldv, doublecomplex *q, integer *ldq, doublecomplex *work, + doublereal *rwork, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_zggsvp_(char *jobu, char *jobv, char *jobq, integer *m, + integer *p, integer *n, doublecomplex *a, integer *lda, doublecomplex + *b, integer *ldb, doublereal *tola, doublereal *tolb, integer *k, + integer *l, doublecomplex *u, integer *ldu, doublecomplex *v, integer + *ldv, doublecomplex *q, integer *ldq, integer *iwork, doublereal * + rwork, doublecomplex *tau, doublecomplex *work, integer *info); + +/* Subroutine */ int _starpu_zgtcon_(char *norm, integer *n, doublecomplex *dl, + doublecomplex *d__, doublecomplex *du, doublecomplex *du2, integer * + ipiv, doublereal *anorm, doublereal *rcond, doublecomplex *work, + integer *info); + +/* Subroutine */ int _starpu_zgtrfs_(char *trans, integer *n, integer *nrhs, + doublecomplex *dl, doublecomplex *d__, doublecomplex *du, + doublecomplex *dlf, doublecomplex *df, doublecomplex *duf, + doublecomplex *du2, integer *ipiv, doublecomplex *b, integer *ldb, + doublecomplex *x, integer *ldx, doublereal *ferr, doublereal *berr, + doublecomplex *work, doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_zgtsv_(integer *n, integer *nrhs, doublecomplex *dl, + doublecomplex *d__, doublecomplex *du, doublecomplex *b, integer *ldb, + integer *info); + +/* Subroutine */ int _starpu_zgtsvx_(char *fact, char *trans, integer *n, integer * + nrhs, doublecomplex *dl, doublecomplex *d__, doublecomplex *du, + doublecomplex *dlf, doublecomplex *df, doublecomplex *duf, + doublecomplex *du2, integer *ipiv, doublecomplex *b, integer *ldb, + doublecomplex *x, integer *ldx, doublereal *rcond, doublereal *ferr, + doublereal *berr, doublecomplex *work, doublereal *rwork, integer * + info); + +/* Subroutine */ int _starpu_zgttrf_(integer *n, doublecomplex *dl, doublecomplex * + d__, doublecomplex *du, doublecomplex *du2, integer *ipiv, integer * + info); + +/* Subroutine */ int _starpu_zgttrs_(char *trans, integer *n, integer *nrhs, + doublecomplex *dl, doublecomplex *d__, doublecomplex *du, + doublecomplex *du2, integer *ipiv, doublecomplex *b, integer *ldb, + integer *info); + +/* Subroutine */ int _starpu_zgtts2_(integer *itrans, integer *n, integer *nrhs, + doublecomplex *dl, doublecomplex *d__, doublecomplex *du, + doublecomplex *du2, integer *ipiv, doublecomplex *b, integer *ldb); + +/* Subroutine */ int _starpu_zhbev_(char *jobz, char *uplo, integer *n, integer *kd, + doublecomplex *ab, integer *ldab, doublereal *w, doublecomplex *z__, + integer *ldz, doublecomplex *work, doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_zhbevd_(char *jobz, char *uplo, integer *n, integer *kd, + doublecomplex *ab, integer *ldab, doublereal *w, doublecomplex *z__, + integer *ldz, doublecomplex *work, integer *lwork, doublereal *rwork, + integer *lrwork, integer *iwork, integer *liwork, integer *info); + +/* Subroutine */ int _starpu_zhbevx_(char *jobz, char *range, char *uplo, integer *n, + integer *kd, doublecomplex *ab, integer *ldab, doublecomplex *q, + integer *ldq, doublereal *vl, doublereal *vu, integer *il, integer * + iu, doublereal *abstol, integer *m, doublereal *w, doublecomplex *z__, + integer *ldz, doublecomplex *work, doublereal *rwork, integer *iwork, + integer *ifail, integer *info); + +/* Subroutine */ int _starpu_zhbgst_(char *vect, char *uplo, integer *n, integer *ka, + integer *kb, doublecomplex *ab, integer *ldab, doublecomplex *bb, + integer *ldbb, doublecomplex *x, integer *ldx, doublecomplex *work, + doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_zhbgv_(char *jobz, char *uplo, integer *n, integer *ka, + integer *kb, doublecomplex *ab, integer *ldab, doublecomplex *bb, + integer *ldbb, doublereal *w, doublecomplex *z__, integer *ldz, + doublecomplex *work, doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_zhbgvd_(char *jobz, char *uplo, integer *n, integer *ka, + integer *kb, doublecomplex *ab, integer *ldab, doublecomplex *bb, + integer *ldbb, doublereal *w, doublecomplex *z__, integer *ldz, + doublecomplex *work, integer *lwork, doublereal *rwork, integer * + lrwork, integer *iwork, integer *liwork, integer *info); + +/* Subroutine */ int _starpu_zhbgvx_(char *jobz, char *range, char *uplo, integer *n, + integer *ka, integer *kb, doublecomplex *ab, integer *ldab, + doublecomplex *bb, integer *ldbb, doublecomplex *q, integer *ldq, + doublereal *vl, doublereal *vu, integer *il, integer *iu, doublereal * + abstol, integer *m, doublereal *w, doublecomplex *z__, integer *ldz, + doublecomplex *work, doublereal *rwork, integer *iwork, integer * + ifail, integer *info); + +/* Subroutine */ int _starpu_zhbtrd_(char *vect, char *uplo, integer *n, integer *kd, + doublecomplex *ab, integer *ldab, doublereal *d__, doublereal *e, + doublecomplex *q, integer *ldq, doublecomplex *work, integer *info); + +/* Subroutine */ int _starpu_zhecon_(char *uplo, integer *n, doublecomplex *a, + integer *lda, integer *ipiv, doublereal *anorm, doublereal *rcond, + doublecomplex *work, integer *info); + +/* Subroutine */ int _starpu_zheequb_(char *uplo, integer *n, doublecomplex *a, + integer *lda, doublereal *s, doublereal *scond, doublereal *amax, + doublecomplex *work, integer *info); + +/* Subroutine */ int _starpu_zheev_(char *jobz, char *uplo, integer *n, doublecomplex + *a, integer *lda, doublereal *w, doublecomplex *work, integer *lwork, + doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_zheevd_(char *jobz, char *uplo, integer *n, + doublecomplex *a, integer *lda, doublereal *w, doublecomplex *work, + integer *lwork, doublereal *rwork, integer *lrwork, integer *iwork, + integer *liwork, integer *info); + +/* Subroutine */ int _starpu_zheevr_(char *jobz, char *range, char *uplo, integer *n, + doublecomplex *a, integer *lda, doublereal *vl, doublereal *vu, + integer *il, integer *iu, doublereal *abstol, integer *m, doublereal * + w, doublecomplex *z__, integer *ldz, integer *isuppz, doublecomplex * + work, integer *lwork, doublereal *rwork, integer *lrwork, integer * + iwork, integer *liwork, integer *info); + +/* Subroutine */ int _starpu_zheevx_(char *jobz, char *range, char *uplo, integer *n, + doublecomplex *a, integer *lda, doublereal *vl, doublereal *vu, + integer *il, integer *iu, doublereal *abstol, integer *m, doublereal * + w, doublecomplex *z__, integer *ldz, doublecomplex *work, integer * + lwork, doublereal *rwork, integer *iwork, integer *ifail, integer * + info); + +/* Subroutine */ int _starpu_zhegs2_(integer *itype, char *uplo, integer *n, + doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, + integer *info); + +/* Subroutine */ int _starpu_zhegst_(integer *itype, char *uplo, integer *n, + doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, + integer *info); + +/* Subroutine */ int _starpu_zhegv_(integer *itype, char *jobz, char *uplo, integer * + n, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, + doublereal *w, doublecomplex *work, integer *lwork, doublereal *rwork, + integer *info); + +/* Subroutine */ int _starpu_zhegvd_(integer *itype, char *jobz, char *uplo, integer * + n, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, + doublereal *w, doublecomplex *work, integer *lwork, doublereal *rwork, + integer *lrwork, integer *iwork, integer *liwork, integer *info); + +/* Subroutine */ int _starpu_zhegvx_(integer *itype, char *jobz, char *range, char * + uplo, integer *n, doublecomplex *a, integer *lda, doublecomplex *b, + integer *ldb, doublereal *vl, doublereal *vu, integer *il, integer * + iu, doublereal *abstol, integer *m, doublereal *w, doublecomplex *z__, + integer *ldz, doublecomplex *work, integer *lwork, doublereal *rwork, + integer *iwork, integer *ifail, integer *info); + +/* Subroutine */ int _starpu_zherfs_(char *uplo, integer *n, integer *nrhs, + doublecomplex *a, integer *lda, doublecomplex *af, integer *ldaf, + integer *ipiv, doublecomplex *b, integer *ldb, doublecomplex *x, + integer *ldx, doublereal *ferr, doublereal *berr, doublecomplex *work, + doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_zherfsx_(char *uplo, char *equed, integer *n, integer * + nrhs, doublecomplex *a, integer *lda, doublecomplex *af, integer * + ldaf, integer *ipiv, doublereal *s, doublecomplex *b, integer *ldb, + doublecomplex *x, integer *ldx, doublereal *rcond, doublereal *berr, + integer *n_err_bnds__, doublereal *err_bnds_norm__, doublereal * + err_bnds_comp__, integer *nparams, doublereal *params, doublecomplex * + work, doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_zhesv_(char *uplo, integer *n, integer *nrhs, + doublecomplex *a, integer *lda, integer *ipiv, doublecomplex *b, + integer *ldb, doublecomplex *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_zhesvx_(char *fact, char *uplo, integer *n, integer * + nrhs, doublecomplex *a, integer *lda, doublecomplex *af, integer * + ldaf, integer *ipiv, doublecomplex *b, integer *ldb, doublecomplex *x, + integer *ldx, doublereal *rcond, doublereal *ferr, doublereal *berr, + doublecomplex *work, integer *lwork, doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_zhesvxx_(char *fact, char *uplo, integer *n, integer * + nrhs, doublecomplex *a, integer *lda, doublecomplex *af, integer * + ldaf, integer *ipiv, char *equed, doublereal *s, doublecomplex *b, + integer *ldb, doublecomplex *x, integer *ldx, doublereal *rcond, + doublereal *rpvgrw, doublereal *berr, integer *n_err_bnds__, + doublereal *err_bnds_norm__, doublereal *err_bnds_comp__, integer * + nparams, doublereal *params, doublecomplex *work, doublereal *rwork, + integer *info); + +/* Subroutine */ int _starpu_zhetd2_(char *uplo, integer *n, doublecomplex *a, + integer *lda, doublereal *d__, doublereal *e, doublecomplex *tau, + integer *info); + +/* Subroutine */ int _starpu_zhetf2_(char *uplo, integer *n, doublecomplex *a, + integer *lda, integer *ipiv, integer *info); + +/* Subroutine */ int _starpu_zhetrd_(char *uplo, integer *n, doublecomplex *a, + integer *lda, doublereal *d__, doublereal *e, doublecomplex *tau, + doublecomplex *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_zhetrf_(char *uplo, integer *n, doublecomplex *a, + integer *lda, integer *ipiv, doublecomplex *work, integer *lwork, + integer *info); + +/* Subroutine */ int _starpu_zhetri_(char *uplo, integer *n, doublecomplex *a, + integer *lda, integer *ipiv, doublecomplex *work, integer *info); + +/* Subroutine */ int _starpu_zhetrs_(char *uplo, integer *n, integer *nrhs, + doublecomplex *a, integer *lda, integer *ipiv, doublecomplex *b, + integer *ldb, integer *info); + +/* Subroutine */ int _starpu_zhfrk_(char *transr, char *uplo, char *trans, integer *n, + integer *k, doublereal *alpha, doublecomplex *a, integer *lda, + doublereal *beta, doublecomplex *c__); + +/* Subroutine */ int _starpu_zhgeqz_(char *job, char *compq, char *compz, integer *n, + integer *ilo, integer *ihi, doublecomplex *h__, integer *ldh, + doublecomplex *t, integer *ldt, doublecomplex *alpha, doublecomplex * + beta, doublecomplex *q, integer *ldq, doublecomplex *z__, integer * + ldz, doublecomplex *work, integer *lwork, doublereal *rwork, integer * + info); + +/* Subroutine */ int _starpu_zhpcon_(char *uplo, integer *n, doublecomplex *ap, + integer *ipiv, doublereal *anorm, doublereal *rcond, doublecomplex * + work, integer *info); + +/* Subroutine */ int _starpu_zhpev_(char *jobz, char *uplo, integer *n, doublecomplex + *ap, doublereal *w, doublecomplex *z__, integer *ldz, doublecomplex * + work, doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_zhpevd_(char *jobz, char *uplo, integer *n, + doublecomplex *ap, doublereal *w, doublecomplex *z__, integer *ldz, + doublecomplex *work, integer *lwork, doublereal *rwork, integer * + lrwork, integer *iwork, integer *liwork, integer *info); + +/* Subroutine */ int _starpu_zhpevx_(char *jobz, char *range, char *uplo, integer *n, + doublecomplex *ap, doublereal *vl, doublereal *vu, integer *il, + integer *iu, doublereal *abstol, integer *m, doublereal *w, + doublecomplex *z__, integer *ldz, doublecomplex *work, doublereal * + rwork, integer *iwork, integer *ifail, integer *info); + +/* Subroutine */ int _starpu_zhpgst_(integer *itype, char *uplo, integer *n, + doublecomplex *ap, doublecomplex *bp, integer *info); + +/* Subroutine */ int _starpu_zhpgv_(integer *itype, char *jobz, char *uplo, integer * + n, doublecomplex *ap, doublecomplex *bp, doublereal *w, doublecomplex + *z__, integer *ldz, doublecomplex *work, doublereal *rwork, integer * + info); + +/* Subroutine */ int _starpu_zhpgvd_(integer *itype, char *jobz, char *uplo, integer * + n, doublecomplex *ap, doublecomplex *bp, doublereal *w, doublecomplex + *z__, integer *ldz, doublecomplex *work, integer *lwork, doublereal * + rwork, integer *lrwork, integer *iwork, integer *liwork, integer * + info); + +/* Subroutine */ int _starpu_zhpgvx_(integer *itype, char *jobz, char *range, char * + uplo, integer *n, doublecomplex *ap, doublecomplex *bp, doublereal * + vl, doublereal *vu, integer *il, integer *iu, doublereal *abstol, + integer *m, doublereal *w, doublecomplex *z__, integer *ldz, + doublecomplex *work, doublereal *rwork, integer *iwork, integer * + ifail, integer *info); + +/* Subroutine */ int _starpu_zhprfs_(char *uplo, integer *n, integer *nrhs, + doublecomplex *ap, doublecomplex *afp, integer *ipiv, doublecomplex * + b, integer *ldb, doublecomplex *x, integer *ldx, doublereal *ferr, + doublereal *berr, doublecomplex *work, doublereal *rwork, integer * + info); + +/* Subroutine */ int _starpu_zhpsv_(char *uplo, integer *n, integer *nrhs, + doublecomplex *ap, integer *ipiv, doublecomplex *b, integer *ldb, + integer *info); + +/* Subroutine */ int _starpu_zhpsvx_(char *fact, char *uplo, integer *n, integer * + nrhs, doublecomplex *ap, doublecomplex *afp, integer *ipiv, + doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, + doublereal *rcond, doublereal *ferr, doublereal *berr, doublecomplex * + work, doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_zhptrd_(char *uplo, integer *n, doublecomplex *ap, + doublereal *d__, doublereal *e, doublecomplex *tau, integer *info); + +/* Subroutine */ int _starpu_zhptrf_(char *uplo, integer *n, doublecomplex *ap, + integer *ipiv, integer *info); + +/* Subroutine */ int _starpu_zhptri_(char *uplo, integer *n, doublecomplex *ap, + integer *ipiv, doublecomplex *work, integer *info); + +/* Subroutine */ int _starpu_zhptrs_(char *uplo, integer *n, integer *nrhs, + doublecomplex *ap, integer *ipiv, doublecomplex *b, integer *ldb, + integer *info); + +/* Subroutine */ int _starpu_zhsein_(char *side, char *eigsrc, char *initv, logical * + select, integer *n, doublecomplex *h__, integer *ldh, doublecomplex * + w, doublecomplex *vl, integer *ldvl, doublecomplex *vr, integer *ldvr, + integer *mm, integer *m, doublecomplex *work, doublereal *rwork, + integer *ifaill, integer *ifailr, integer *info); + +/* Subroutine */ int _starpu_zhseqr_(char *job, char *compz, integer *n, integer *ilo, + integer *ihi, doublecomplex *h__, integer *ldh, doublecomplex *w, + doublecomplex *z__, integer *ldz, doublecomplex *work, integer *lwork, + integer *info); + +/* Subroutine */ int _starpu_zla_gbamv__(integer *trans, integer *m, integer *n, + integer *kl, integer *ku, doublereal *alpha, doublecomplex *ab, + integer *ldab, doublecomplex *x, integer *incx, doublereal *beta, + doublereal *y, integer *incy); + +doublereal _starpu_zla_gbrcond_c__(char *trans, integer *n, integer *kl, integer *ku, + doublecomplex *ab, integer *ldab, doublecomplex *afb, integer *ldafb, + integer *ipiv, doublereal *c__, logical *capply, integer *info, + doublecomplex *work, doublereal *rwork, ftnlen trans_len); + +doublereal _starpu_zla_gbrcond_x__(char *trans, integer *n, integer *kl, integer *ku, + doublecomplex *ab, integer *ldab, doublecomplex *afb, integer *ldafb, + integer *ipiv, doublecomplex *x, integer *info, doublecomplex *work, + doublereal *rwork, ftnlen trans_len); + +/* Subroutine */ int _starpu_zla_gbrfsx_extended__(integer *prec_type__, integer * + trans_type__, integer *n, integer *kl, integer *ku, integer *nrhs, + doublecomplex *ab, integer *ldab, doublecomplex *afb, integer *ldafb, + integer *ipiv, logical *colequ, doublereal *c__, doublecomplex *b, + integer *ldb, doublecomplex *y, integer *ldy, doublereal *berr_out__, + integer *n_norms__, doublereal *errs_n__, doublereal *errs_c__, + doublecomplex *res, doublereal *ayb, doublecomplex *dy, doublecomplex + *y_tail__, doublereal *rcond, integer *ithresh, doublereal *rthresh, + doublereal *dz_ub__, logical *ignore_cwise__, integer *info); + +doublereal _starpu_zla_gbrpvgrw__(integer *n, integer *kl, integer *ku, integer * + ncols, doublecomplex *ab, integer *ldab, doublecomplex *afb, integer * + ldafb); + +/* Subroutine */ int _starpu_zla_geamv__(integer *trans, integer *m, integer *n, + doublereal *alpha, doublecomplex *a, integer *lda, doublecomplex *x, + integer *incx, doublereal *beta, doublereal *y, integer *incy); + +doublereal _starpu_zla_gercond_c__(char *trans, integer *n, doublecomplex *a, integer + *lda, doublecomplex *af, integer *ldaf, integer *ipiv, doublereal * + c__, logical *capply, integer *info, doublecomplex *work, doublereal * + rwork, ftnlen trans_len); + +doublereal _starpu_zla_gercond_x__(char *trans, integer *n, doublecomplex *a, integer + *lda, doublecomplex *af, integer *ldaf, integer *ipiv, doublecomplex * + x, integer *info, doublecomplex *work, doublereal *rwork, ftnlen + trans_len); + +/* Subroutine */ int _starpu_zla_gerfsx_extended__(integer *prec_type__, integer * + trans_type__, integer *n, integer *nrhs, doublecomplex *a, integer * + lda, doublecomplex *af, integer *ldaf, integer *ipiv, logical *colequ, + doublereal *c__, doublecomplex *b, integer *ldb, doublecomplex *y, + integer *ldy, doublereal *berr_out__, integer *n_norms__, doublereal * + errs_n__, doublereal *errs_c__, doublecomplex *res, doublereal *ayb, + doublecomplex *dy, doublecomplex *y_tail__, doublereal *rcond, + integer *ithresh, doublereal *rthresh, doublereal *dz_ub__, logical * + ignore_cwise__, integer *info); + +/* Subroutine */ int _starpu_zla_heamv__(integer *uplo, integer *n, doublereal *alpha, + doublecomplex *a, integer *lda, doublecomplex *x, integer *incx, + doublereal *beta, doublereal *y, integer *incy); + +doublereal _starpu_zla_hercond_c__(char *uplo, integer *n, doublecomplex *a, integer * + lda, doublecomplex *af, integer *ldaf, integer *ipiv, doublereal *c__, + logical *capply, integer *info, doublecomplex *work, doublereal * + rwork, ftnlen uplo_len); + +doublereal _starpu_zla_hercond_x__(char *uplo, integer *n, doublecomplex *a, integer * + lda, doublecomplex *af, integer *ldaf, integer *ipiv, doublecomplex * + x, integer *info, doublecomplex *work, doublereal *rwork, ftnlen + uplo_len); + +/* Subroutine */ int _starpu_zla_herfsx_extended__(integer *prec_type__, char *uplo, + integer *n, integer *nrhs, doublecomplex *a, integer *lda, + doublecomplex *af, integer *ldaf, integer *ipiv, logical *colequ, + doublereal *c__, doublecomplex *b, integer *ldb, doublecomplex *y, + integer *ldy, doublereal *berr_out__, integer *n_norms__, doublereal * + errs_n__, doublereal *errs_c__, doublecomplex *res, doublereal *ayb, + doublecomplex *dy, doublecomplex *y_tail__, doublereal *rcond, + integer *ithresh, doublereal *rthresh, doublereal *dz_ub__, logical * + ignore_cwise__, integer *info, ftnlen uplo_len); + +doublereal _starpu_zla_herpvgrw__(char *uplo, integer *n, integer *info, + doublecomplex *a, integer *lda, doublecomplex *af, integer *ldaf, + integer *ipiv, doublereal *work, ftnlen uplo_len); + +/* Subroutine */ int _starpu_zla_lin_berr__(integer *n, integer *nz, integer *nrhs, + doublecomplex *res, doublereal *ayb, doublereal *berr); + +doublereal _starpu_zla_porcond_c__(char *uplo, integer *n, doublecomplex *a, integer * + lda, doublecomplex *af, integer *ldaf, doublereal *c__, logical * + capply, integer *info, doublecomplex *work, doublereal *rwork, ftnlen + uplo_len); + +doublereal _starpu_zla_porcond_x__(char *uplo, integer *n, doublecomplex *a, integer * + lda, doublecomplex *af, integer *ldaf, doublecomplex *x, integer * + info, doublecomplex *work, doublereal *rwork, ftnlen uplo_len); + +/* Subroutine */ int _starpu_zla_porfsx_extended__(integer *prec_type__, char *uplo, + integer *n, integer *nrhs, doublecomplex *a, integer *lda, + doublecomplex *af, integer *ldaf, logical *colequ, doublereal *c__, + doublecomplex *b, integer *ldb, doublecomplex *y, integer *ldy, + doublereal *berr_out__, integer *n_norms__, doublereal *errs_n__, + doublereal *errs_c__, doublecomplex *res, doublereal *ayb, + doublecomplex *dy, doublecomplex *y_tail__, doublereal *rcond, + integer *ithresh, doublereal *rthresh, doublereal *dz_ub__, logical * + ignore_cwise__, integer *info, ftnlen uplo_len); + +doublereal _starpu_zla_porpvgrw__(char *uplo, integer *ncols, doublecomplex *a, + integer *lda, doublecomplex *af, integer *ldaf, doublereal *work, + ftnlen uplo_len); + +doublereal _starpu_zla_rpvgrw__(integer *n, integer *ncols, doublecomplex *a, integer + *lda, doublecomplex *af, integer *ldaf); + +/* Subroutine */ int _starpu_zla_syamv__(integer *uplo, integer *n, doublereal *alpha, + doublecomplex *a, integer *lda, doublecomplex *x, integer *incx, + doublereal *beta, doublereal *y, integer *incy); + +doublereal _starpu_zla_syrcond_c__(char *uplo, integer *n, doublecomplex *a, integer * + lda, doublecomplex *af, integer *ldaf, integer *ipiv, doublereal *c__, + logical *capply, integer *info, doublecomplex *work, doublereal * + rwork, ftnlen uplo_len); + +doublereal _starpu_zla_syrcond_x__(char *uplo, integer *n, doublecomplex *a, integer * + lda, doublecomplex *af, integer *ldaf, integer *ipiv, doublecomplex * + x, integer *info, doublecomplex *work, doublereal *rwork, ftnlen + uplo_len); + +/* Subroutine */ int _starpu_zla_syrfsx_extended__(integer *prec_type__, char *uplo, + integer *n, integer *nrhs, doublecomplex *a, integer *lda, + doublecomplex *af, integer *ldaf, integer *ipiv, logical *colequ, + doublereal *c__, doublecomplex *b, integer *ldb, doublecomplex *y, + integer *ldy, doublereal *berr_out__, integer *n_norms__, doublereal * + errs_n__, doublereal *errs_c__, doublecomplex *res, doublereal *ayb, + doublecomplex *dy, doublecomplex *y_tail__, doublereal *rcond, + integer *ithresh, doublereal *rthresh, doublereal *dz_ub__, logical * + ignore_cwise__, integer *info, ftnlen uplo_len); + +doublereal _starpu_zla_syrpvgrw__(char *uplo, integer *n, integer *info, + doublecomplex *a, integer *lda, doublecomplex *af, integer *ldaf, + integer *ipiv, doublereal *work, ftnlen uplo_len); + +/* Subroutine */ int _starpu_zla_wwaddw__(integer *n, doublecomplex *x, doublecomplex + *y, doublecomplex *w); + +/* Subroutine */ int _starpu_zlabrd_(integer *m, integer *n, integer *nb, + doublecomplex *a, integer *lda, doublereal *d__, doublereal *e, + doublecomplex *tauq, doublecomplex *taup, doublecomplex *x, integer * + ldx, doublecomplex *y, integer *ldy); + +/* Subroutine */ int _starpu_zlacgv_(integer *n, doublecomplex *x, integer *incx); + +/* Subroutine */ int _starpu_zlacn2_(integer *n, doublecomplex *v, doublecomplex *x, + doublereal *est, integer *kase, integer *isave); + +/* Subroutine */ int _starpu_zlacon_(integer *n, doublecomplex *v, doublecomplex *x, + doublereal *est, integer *kase); + +/* Subroutine */ int _starpu_zlacp2_(char *uplo, integer *m, integer *n, doublereal * + a, integer *lda, doublecomplex *b, integer *ldb); + +/* Subroutine */ int _starpu_zlacpy_(char *uplo, integer *m, integer *n, + doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb); + +/* Subroutine */ int _starpu_zlacrm_(integer *m, integer *n, doublecomplex *a, + integer *lda, doublereal *b, integer *ldb, doublecomplex *c__, + integer *ldc, doublereal *rwork); + +/* Subroutine */ int _starpu_zlacrt_(integer *n, doublecomplex *cx, integer *incx, + doublecomplex *cy, integer *incy, doublecomplex *c__, doublecomplex * + s); + +/* Double Complex */ VOID _starpu_zladiv_(doublecomplex * ret_val, doublecomplex *x, + doublecomplex *y); + +/* Subroutine */ int _starpu_zlaed0_(integer *qsiz, integer *n, doublereal *d__, + doublereal *e, doublecomplex *q, integer *ldq, doublecomplex *qstore, + integer *ldqs, doublereal *rwork, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_zlaed7_(integer *n, integer *cutpnt, integer *qsiz, + integer *tlvls, integer *curlvl, integer *curpbm, doublereal *d__, + doublecomplex *q, integer *ldq, doublereal *rho, integer *indxq, + doublereal *qstore, integer *qptr, integer *prmptr, integer *perm, + integer *givptr, integer *givcol, doublereal *givnum, doublecomplex * + work, doublereal *rwork, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_zlaed8_(integer *k, integer *n, integer *qsiz, + doublecomplex *q, integer *ldq, doublereal *d__, doublereal *rho, + integer *cutpnt, doublereal *z__, doublereal *dlamda, doublecomplex * + q2, integer *ldq2, doublereal *w, integer *indxp, integer *indx, + integer *indxq, integer *perm, integer *givptr, integer *givcol, + doublereal *givnum, integer *info); + +/* Subroutine */ int _starpu_zlaein_(logical *rightv, logical *noinit, integer *n, + doublecomplex *h__, integer *ldh, doublecomplex *w, doublecomplex *v, + doublecomplex *b, integer *ldb, doublereal *rwork, doublereal *eps3, + doublereal *smlnum, integer *info); + +/* Subroutine */ int _starpu_zlaesy_(doublecomplex *a, doublecomplex *b, + doublecomplex *c__, doublecomplex *rt1, doublecomplex *rt2, + doublecomplex *evscal, doublecomplex *cs1, doublecomplex *sn1); + +/* Subroutine */ int _starpu_zlaev2_(doublecomplex *a, doublecomplex *b, + doublecomplex *c__, doublereal *rt1, doublereal *rt2, doublereal *cs1, + doublecomplex *sn1); + +/* Subroutine */ int _starpu_zlag2c_(integer *m, integer *n, doublecomplex *a, + integer *lda, complex *sa, integer *ldsa, integer *info); + +/* Subroutine */ int _starpu_zlags2_(logical *upper, doublereal *a1, doublecomplex * + a2, doublereal *a3, doublereal *b1, doublecomplex *b2, doublereal *b3, + doublereal *csu, doublecomplex *snu, doublereal *csv, doublecomplex * + snv, doublereal *csq, doublecomplex *snq); + +/* Subroutine */ int _starpu_zlagtm_(char *trans, integer *n, integer *nrhs, + doublereal *alpha, doublecomplex *dl, doublecomplex *d__, + doublecomplex *du, doublecomplex *x, integer *ldx, doublereal *beta, + doublecomplex *b, integer *ldb); + +/* Subroutine */ int _starpu_zlahef_(char *uplo, integer *n, integer *nb, integer *kb, + doublecomplex *a, integer *lda, integer *ipiv, doublecomplex *w, + integer *ldw, integer *info); + +/* Subroutine */ int _starpu_zlahqr_(logical *wantt, logical *wantz, integer *n, + integer *ilo, integer *ihi, doublecomplex *h__, integer *ldh, + doublecomplex *w, integer *iloz, integer *ihiz, doublecomplex *z__, + integer *ldz, integer *info); + +/* Subroutine */ int _starpu_zlahr2_(integer *n, integer *k, integer *nb, + doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex *t, + integer *ldt, doublecomplex *y, integer *ldy); + +/* Subroutine */ int _starpu_zlahrd_(integer *n, integer *k, integer *nb, + doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex *t, + integer *ldt, doublecomplex *y, integer *ldy); + +/* Subroutine */ int _starpu_zlaic1_(integer *job, integer *j, doublecomplex *x, + doublereal *sest, doublecomplex *w, doublecomplex *gamma, doublereal * + sestpr, doublecomplex *s, doublecomplex *c__); + +/* Subroutine */ int _starpu_zlals0_(integer *icompq, integer *nl, integer *nr, + integer *sqre, integer *nrhs, doublecomplex *b, integer *ldb, + doublecomplex *bx, integer *ldbx, integer *perm, integer *givptr, + integer *givcol, integer *ldgcol, doublereal *givnum, integer *ldgnum, + doublereal *poles, doublereal *difl, doublereal *difr, doublereal * + z__, integer *k, doublereal *c__, doublereal *s, doublereal *rwork, + integer *info); + +/* Subroutine */ int _starpu_zlalsa_(integer *icompq, integer *smlsiz, integer *n, + integer *nrhs, doublecomplex *b, integer *ldb, doublecomplex *bx, + integer *ldbx, doublereal *u, integer *ldu, doublereal *vt, integer * + k, doublereal *difl, doublereal *difr, doublereal *z__, doublereal * + poles, integer *givptr, integer *givcol, integer *ldgcol, integer * + perm, doublereal *givnum, doublereal *c__, doublereal *s, doublereal * + rwork, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_zlalsd_(char *uplo, integer *smlsiz, integer *n, integer + *nrhs, doublereal *d__, doublereal *e, doublecomplex *b, integer *ldb, + doublereal *rcond, integer *rank, doublecomplex *work, doublereal * + rwork, integer *iwork, integer *info); + +doublereal _starpu_zlangb_(char *norm, integer *n, integer *kl, integer *ku, + doublecomplex *ab, integer *ldab, doublereal *work); + +doublereal _starpu_zlange_(char *norm, integer *m, integer *n, doublecomplex *a, + integer *lda, doublereal *work); + +doublereal _starpu_zlangt_(char *norm, integer *n, doublecomplex *dl, doublecomplex * + d__, doublecomplex *du); + +doublereal _starpu_zlanhb_(char *norm, char *uplo, integer *n, integer *k, + doublecomplex *ab, integer *ldab, doublereal *work); + +doublereal _starpu_zlanhe_(char *norm, char *uplo, integer *n, doublecomplex *a, + integer *lda, doublereal *work); + +doublereal _starpu_zlanhf_(char *norm, char *transr, char *uplo, integer *n, + doublecomplex *a, doublereal *work); + +doublereal _starpu_zlanhp_(char *norm, char *uplo, integer *n, doublecomplex *ap, + doublereal *work); + +doublereal _starpu_zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda, + doublereal *work); + +doublereal _starpu_zlanht_(char *norm, integer *n, doublereal *d__, doublecomplex *e); + +doublereal _starpu_zlansb_(char *norm, char *uplo, integer *n, integer *k, + doublecomplex *ab, integer *ldab, doublereal *work); + +doublereal _starpu_zlansp_(char *norm, char *uplo, integer *n, doublecomplex *ap, + doublereal *work); + +doublereal _starpu_zlansy_(char *norm, char *uplo, integer *n, doublecomplex *a, + integer *lda, doublereal *work); + +doublereal _starpu_zlantb_(char *norm, char *uplo, char *diag, integer *n, integer *k, + doublecomplex *ab, integer *ldab, doublereal *work); + +doublereal _starpu_zlantp_(char *norm, char *uplo, char *diag, integer *n, + doublecomplex *ap, doublereal *work); + +doublereal _starpu_zlantr_(char *norm, char *uplo, char *diag, integer *m, integer *n, + doublecomplex *a, integer *lda, doublereal *work); + +/* Subroutine */ int _starpu_zlapll_(integer *n, doublecomplex *x, integer *incx, + doublecomplex *y, integer *incy, doublereal *ssmin); + +/* Subroutine */ int _starpu_zlapmt_(logical *forwrd, integer *m, integer *n, + doublecomplex *x, integer *ldx, integer *k); + +/* Subroutine */ int _starpu_zlaqgb_(integer *m, integer *n, integer *kl, integer *ku, + doublecomplex *ab, integer *ldab, doublereal *r__, doublereal *c__, + doublereal *rowcnd, doublereal *colcnd, doublereal *amax, char *equed); + +/* Subroutine */ int _starpu_zlaqge_(integer *m, integer *n, doublecomplex *a, + integer *lda, doublereal *r__, doublereal *c__, doublereal *rowcnd, + doublereal *colcnd, doublereal *amax, char *equed); + +/* Subroutine */ int _starpu_zlaqhb_(char *uplo, integer *n, integer *kd, + doublecomplex *ab, integer *ldab, doublereal *s, doublereal *scond, + doublereal *amax, char *equed); + +/* Subroutine */ int _starpu_zlaqhe_(char *uplo, integer *n, doublecomplex *a, + integer *lda, doublereal *s, doublereal *scond, doublereal *amax, + char *equed); + +/* Subroutine */ int _starpu_zlaqhp_(char *uplo, integer *n, doublecomplex *ap, + doublereal *s, doublereal *scond, doublereal *amax, char *equed); + +/* Subroutine */ int _starpu_zlaqp2_(integer *m, integer *n, integer *offset, + doublecomplex *a, integer *lda, integer *jpvt, doublecomplex *tau, + doublereal *vn1, doublereal *vn2, doublecomplex *work); + +/* Subroutine */ int _starpu_zlaqps_(integer *m, integer *n, integer *offset, integer + *nb, integer *kb, doublecomplex *a, integer *lda, integer *jpvt, + doublecomplex *tau, doublereal *vn1, doublereal *vn2, doublecomplex * + auxv, doublecomplex *f, integer *ldf); + +/* Subroutine */ int _starpu_zlaqr0_(logical *wantt, logical *wantz, integer *n, + integer *ilo, integer *ihi, doublecomplex *h__, integer *ldh, + doublecomplex *w, integer *iloz, integer *ihiz, doublecomplex *z__, + integer *ldz, doublecomplex *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_zlaqr1_(integer *n, doublecomplex *h__, integer *ldh, + doublecomplex *s1, doublecomplex *s2, doublecomplex *v); + +/* Subroutine */ int _starpu_zlaqr2_(logical *wantt, logical *wantz, integer *n, + integer *ktop, integer *kbot, integer *nw, doublecomplex *h__, + integer *ldh, integer *iloz, integer *ihiz, doublecomplex *z__, + integer *ldz, integer *ns, integer *nd, doublecomplex *sh, + doublecomplex *v, integer *ldv, integer *nh, doublecomplex *t, + integer *ldt, integer *nv, doublecomplex *wv, integer *ldwv, + doublecomplex *work, integer *lwork); + +/* Subroutine */ int _starpu_zlaqr3_(logical *wantt, logical *wantz, integer *n, + integer *ktop, integer *kbot, integer *nw, doublecomplex *h__, + integer *ldh, integer *iloz, integer *ihiz, doublecomplex *z__, + integer *ldz, integer *ns, integer *nd, doublecomplex *sh, + doublecomplex *v, integer *ldv, integer *nh, doublecomplex *t, + integer *ldt, integer *nv, doublecomplex *wv, integer *ldwv, + doublecomplex *work, integer *lwork); + +/* Subroutine */ int _starpu_zlaqr4_(logical *wantt, logical *wantz, integer *n, + integer *ilo, integer *ihi, doublecomplex *h__, integer *ldh, + doublecomplex *w, integer *iloz, integer *ihiz, doublecomplex *z__, + integer *ldz, doublecomplex *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_zlaqr5_(logical *wantt, logical *wantz, integer *kacc22, + integer *n, integer *ktop, integer *kbot, integer *nshfts, + doublecomplex *s, doublecomplex *h__, integer *ldh, integer *iloz, + integer *ihiz, doublecomplex *z__, integer *ldz, doublecomplex *v, + integer *ldv, doublecomplex *u, integer *ldu, integer *nv, + doublecomplex *wv, integer *ldwv, integer *nh, doublecomplex *wh, + integer *ldwh); + +/* Subroutine */ int _starpu_zlaqsb_(char *uplo, integer *n, integer *kd, + doublecomplex *ab, integer *ldab, doublereal *s, doublereal *scond, + doublereal *amax, char *equed); + +/* Subroutine */ int _starpu_zlaqsp_(char *uplo, integer *n, doublecomplex *ap, + doublereal *s, doublereal *scond, doublereal *amax, char *equed); + +/* Subroutine */ int _starpu_zlaqsy_(char *uplo, integer *n, doublecomplex *a, + integer *lda, doublereal *s, doublereal *scond, doublereal *amax, + char *equed); + +/* Subroutine */ int _starpu_zlar1v_(integer *n, integer *b1, integer *bn, doublereal + *lambda, doublereal *d__, doublereal *l, doublereal *ld, doublereal * + lld, doublereal *pivmin, doublereal *gaptol, doublecomplex *z__, + logical *wantnc, integer *negcnt, doublereal *ztz, doublereal *mingma, + integer *r__, integer *isuppz, doublereal *nrminv, doublereal *resid, + doublereal *rqcorr, doublereal *work); + +/* Subroutine */ int _starpu_zlar2v_(integer *n, doublecomplex *x, doublecomplex *y, + doublecomplex *z__, integer *incx, doublereal *c__, doublecomplex *s, + integer *incc); + +/* Subroutine */ int _starpu_zlarcm_(integer *m, integer *n, doublereal *a, integer * + lda, doublecomplex *b, integer *ldb, doublecomplex *c__, integer *ldc, + doublereal *rwork); + +/* Subroutine */ int _starpu_zlarf_(char *side, integer *m, integer *n, doublecomplex + *v, integer *incv, doublecomplex *tau, doublecomplex *c__, integer * + ldc, doublecomplex *work); + +/* Subroutine */ int _starpu_zlarfb_(char *side, char *trans, char *direct, char * + storev, integer *m, integer *n, integer *k, doublecomplex *v, integer + *ldv, doublecomplex *t, integer *ldt, doublecomplex *c__, integer * + ldc, doublecomplex *work, integer *ldwork); + +/* Subroutine */ int _starpu_zlarfg_(integer *n, doublecomplex *alpha, doublecomplex * + x, integer *incx, doublecomplex *tau); + +/* Subroutine */ int _starpu_zlarfp_(integer *n, doublecomplex *alpha, doublecomplex * + x, integer *incx, doublecomplex *tau); + +/* Subroutine */ int _starpu_zlarft_(char *direct, char *storev, integer *n, integer * + k, doublecomplex *v, integer *ldv, doublecomplex *tau, doublecomplex * + t, integer *ldt); + +/* Subroutine */ int _starpu_zlarfx_(char *side, integer *m, integer *n, + doublecomplex *v, doublecomplex *tau, doublecomplex *c__, integer * + ldc, doublecomplex *work); + +/* Subroutine */ int _starpu_zlargv_(integer *n, doublecomplex *x, integer *incx, + doublecomplex *y, integer *incy, doublereal *c__, integer *incc); + +/* Subroutine */ int _starpu_zlarnv_(integer *idist, integer *iseed, integer *n, + doublecomplex *x); + +/* Subroutine */ int _starpu_zlarrv_(integer *n, doublereal *vl, doublereal *vu, + doublereal *d__, doublereal *l, doublereal *pivmin, integer *isplit, + integer *m, integer *dol, integer *dou, doublereal *minrgp, + doublereal *rtol1, doublereal *rtol2, doublereal *w, doublereal *werr, + doublereal *wgap, integer *iblock, integer *indexw, doublereal *gers, + doublecomplex *z__, integer *ldz, integer *isuppz, doublereal *work, + integer *iwork, integer *info); + +/* Subroutine */ int _starpu_zlarscl2_(integer *m, integer *n, doublereal *d__, + doublecomplex *x, integer *ldx); + +/* Subroutine */ int _starpu_zlartg_(doublecomplex *f, doublecomplex *g, doublereal * + cs, doublecomplex *sn, doublecomplex *r__); + +/* Subroutine */ int _starpu_zlartv_(integer *n, doublecomplex *x, integer *incx, + doublecomplex *y, integer *incy, doublereal *c__, doublecomplex *s, + integer *incc); + +/* Subroutine */ int _starpu_zlarz_(char *side, integer *m, integer *n, integer *l, + doublecomplex *v, integer *incv, doublecomplex *tau, doublecomplex * + c__, integer *ldc, doublecomplex *work); + +/* Subroutine */ int _starpu_zlarzb_(char *side, char *trans, char *direct, char * + storev, integer *m, integer *n, integer *k, integer *l, doublecomplex + *v, integer *ldv, doublecomplex *t, integer *ldt, doublecomplex *c__, + integer *ldc, doublecomplex *work, integer *ldwork); + +/* Subroutine */ int _starpu_zlarzt_(char *direct, char *storev, integer *n, integer * + k, doublecomplex *v, integer *ldv, doublecomplex *tau, doublecomplex * + t, integer *ldt); + +/* Subroutine */ int _starpu_zlascl_(char *type__, integer *kl, integer *ku, + doublereal *cfrom, doublereal *cto, integer *m, integer *n, + doublecomplex *a, integer *lda, integer *info); + +/* Subroutine */ int _starpu_zlascl2_(integer *m, integer *n, doublereal *d__, + doublecomplex *x, integer *ldx); + +/* Subroutine */ int _starpu_zlaset_(char *uplo, integer *m, integer *n, + doublecomplex *alpha, doublecomplex *beta, doublecomplex *a, integer * + lda); + +/* Subroutine */ int _starpu_zlasr_(char *side, char *pivot, char *direct, integer *m, + integer *n, doublereal *c__, doublereal *s, doublecomplex *a, + integer *lda); + +/* Subroutine */ int _starpu_zlassq_(integer *n, doublecomplex *x, integer *incx, + doublereal *scale, doublereal *sumsq); + +/* Subroutine */ int _starpu_zlaswp_(integer *n, doublecomplex *a, integer *lda, + integer *k1, integer *k2, integer *ipiv, integer *incx); + +/* Subroutine */ int _starpu_zlasyf_(char *uplo, integer *n, integer *nb, integer *kb, + doublecomplex *a, integer *lda, integer *ipiv, doublecomplex *w, + integer *ldw, integer *info); + +/* Subroutine */ int _starpu_zlat2c_(char *uplo, integer *n, doublecomplex *a, + integer *lda, complex *sa, integer *ldsa, integer *info); + +/* Subroutine */ int _starpu_zlatbs_(char *uplo, char *trans, char *diag, char * + normin, integer *n, integer *kd, doublecomplex *ab, integer *ldab, + doublecomplex *x, doublereal *scale, doublereal *cnorm, integer *info); + +/* Subroutine */ int _starpu_zlatdf_(integer *ijob, integer *n, doublecomplex *z__, + integer *ldz, doublecomplex *rhs, doublereal *rdsum, doublereal * + rdscal, integer *ipiv, integer *jpiv); + +/* Subroutine */ int _starpu_zlatps_(char *uplo, char *trans, char *diag, char * + normin, integer *n, doublecomplex *ap, doublecomplex *x, doublereal * + scale, doublereal *cnorm, integer *info); + +/* Subroutine */ int _starpu_zlatrd_(char *uplo, integer *n, integer *nb, + doublecomplex *a, integer *lda, doublereal *e, doublecomplex *tau, + doublecomplex *w, integer *ldw); + +/* Subroutine */ int _starpu_zlatrs_(char *uplo, char *trans, char *diag, char * + normin, integer *n, doublecomplex *a, integer *lda, doublecomplex *x, + doublereal *scale, doublereal *cnorm, integer *info); + +/* Subroutine */ int _starpu_zlatrz_(integer *m, integer *n, integer *l, + doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex * + work); + +/* Subroutine */ int _starpu_zlatzm_(char *side, integer *m, integer *n, + doublecomplex *v, integer *incv, doublecomplex *tau, doublecomplex * + c1, doublecomplex *c2, integer *ldc, doublecomplex *work); + +/* Subroutine */ int _starpu_zlauu2_(char *uplo, integer *n, doublecomplex *a, + integer *lda, integer *info); + +/* Subroutine */ int _starpu_zlauum_(char *uplo, integer *n, doublecomplex *a, + integer *lda, integer *info); + +/* Subroutine */ int _starpu_zpbcon_(char *uplo, integer *n, integer *kd, + doublecomplex *ab, integer *ldab, doublereal *anorm, doublereal * + rcond, doublecomplex *work, doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_zpbequ_(char *uplo, integer *n, integer *kd, + doublecomplex *ab, integer *ldab, doublereal *s, doublereal *scond, + doublereal *amax, integer *info); + +/* Subroutine */ int _starpu_zpbrfs_(char *uplo, integer *n, integer *kd, integer * + nrhs, doublecomplex *ab, integer *ldab, doublecomplex *afb, integer * + ldafb, doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, + doublereal *ferr, doublereal *berr, doublecomplex *work, doublereal * + rwork, integer *info); + +/* Subroutine */ int _starpu_zpbstf_(char *uplo, integer *n, integer *kd, + doublecomplex *ab, integer *ldab, integer *info); + +/* Subroutine */ int _starpu_zpbsv_(char *uplo, integer *n, integer *kd, integer * + nrhs, doublecomplex *ab, integer *ldab, doublecomplex *b, integer * + ldb, integer *info); + +/* Subroutine */ int _starpu_zpbsvx_(char *fact, char *uplo, integer *n, integer *kd, + integer *nrhs, doublecomplex *ab, integer *ldab, doublecomplex *afb, + integer *ldafb, char *equed, doublereal *s, doublecomplex *b, integer + *ldb, doublecomplex *x, integer *ldx, doublereal *rcond, doublereal * + ferr, doublereal *berr, doublecomplex *work, doublereal *rwork, + integer *info); + +/* Subroutine */ int _starpu_zpbtf2_(char *uplo, integer *n, integer *kd, + doublecomplex *ab, integer *ldab, integer *info); + +/* Subroutine */ int _starpu_zpbtrf_(char *uplo, integer *n, integer *kd, + doublecomplex *ab, integer *ldab, integer *info); + +/* Subroutine */ int _starpu_zpbtrs_(char *uplo, integer *n, integer *kd, integer * + nrhs, doublecomplex *ab, integer *ldab, doublecomplex *b, integer * + ldb, integer *info); + +/* Subroutine */ int _starpu_zpftrf_(char *transr, char *uplo, integer *n, + doublecomplex *a, integer *info); + +/* Subroutine */ int _starpu_zpftri_(char *transr, char *uplo, integer *n, + doublecomplex *a, integer *info); + +/* Subroutine */ int _starpu_zpftrs_(char *transr, char *uplo, integer *n, integer * + nrhs, doublecomplex *a, doublecomplex *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_zpocon_(char *uplo, integer *n, doublecomplex *a, + integer *lda, doublereal *anorm, doublereal *rcond, doublecomplex * + work, doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_zpoequ_(integer *n, doublecomplex *a, integer *lda, + doublereal *s, doublereal *scond, doublereal *amax, integer *info); + +/* Subroutine */ int _starpu_zpoequb_(integer *n, doublecomplex *a, integer *lda, + doublereal *s, doublereal *scond, doublereal *amax, integer *info); + +/* Subroutine */ int _starpu_zporfs_(char *uplo, integer *n, integer *nrhs, + doublecomplex *a, integer *lda, doublecomplex *af, integer *ldaf, + doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, + doublereal *ferr, doublereal *berr, doublecomplex *work, doublereal * + rwork, integer *info); + +/* Subroutine */ int _starpu_zporfsx_(char *uplo, char *equed, integer *n, integer * + nrhs, doublecomplex *a, integer *lda, doublecomplex *af, integer * + ldaf, doublereal *s, doublecomplex *b, integer *ldb, doublecomplex *x, + integer *ldx, doublereal *rcond, doublereal *berr, integer * + n_err_bnds__, doublereal *err_bnds_norm__, doublereal * + err_bnds_comp__, integer *nparams, doublereal *params, doublecomplex * + work, doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_zposv_(char *uplo, integer *n, integer *nrhs, + doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, + integer *info); + +/* Subroutine */ int _starpu_zposvx_(char *fact, char *uplo, integer *n, integer * + nrhs, doublecomplex *a, integer *lda, doublecomplex *af, integer * + ldaf, char *equed, doublereal *s, doublecomplex *b, integer *ldb, + doublecomplex *x, integer *ldx, doublereal *rcond, doublereal *ferr, + doublereal *berr, doublecomplex *work, doublereal *rwork, integer * + info); + +/* Subroutine */ int _starpu_zposvxx_(char *fact, char *uplo, integer *n, integer * + nrhs, doublecomplex *a, integer *lda, doublecomplex *af, integer * + ldaf, char *equed, doublereal *s, doublecomplex *b, integer *ldb, + doublecomplex *x, integer *ldx, doublereal *rcond, doublereal *rpvgrw, + doublereal *berr, integer *n_err_bnds__, doublereal *err_bnds_norm__, + doublereal *err_bnds_comp__, integer *nparams, doublereal *params, + doublecomplex *work, doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_zpotf2_(char *uplo, integer *n, doublecomplex *a, + integer *lda, integer *info); + +/* Subroutine */ int _starpu_zpotrf_(char *uplo, integer *n, doublecomplex *a, + integer *lda, integer *info); + +/* Subroutine */ int _starpu_zpotri_(char *uplo, integer *n, doublecomplex *a, + integer *lda, integer *info); + +/* Subroutine */ int _starpu_zpotrs_(char *uplo, integer *n, integer *nrhs, + doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, + integer *info); + +/* Subroutine */ int _starpu_zppcon_(char *uplo, integer *n, doublecomplex *ap, + doublereal *anorm, doublereal *rcond, doublecomplex *work, doublereal + *rwork, integer *info); + +/* Subroutine */ int _starpu_zppequ_(char *uplo, integer *n, doublecomplex *ap, + doublereal *s, doublereal *scond, doublereal *amax, integer *info); + +/* Subroutine */ int _starpu_zpprfs_(char *uplo, integer *n, integer *nrhs, + doublecomplex *ap, doublecomplex *afp, doublecomplex *b, integer *ldb, + doublecomplex *x, integer *ldx, doublereal *ferr, doublereal *berr, + doublecomplex *work, doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_zppsv_(char *uplo, integer *n, integer *nrhs, + doublecomplex *ap, doublecomplex *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_zppsvx_(char *fact, char *uplo, integer *n, integer * + nrhs, doublecomplex *ap, doublecomplex *afp, char *equed, doublereal * + s, doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, + doublereal *rcond, doublereal *ferr, doublereal *berr, doublecomplex * + work, doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_zpptrf_(char *uplo, integer *n, doublecomplex *ap, + integer *info); + +/* Subroutine */ int _starpu_zpptri_(char *uplo, integer *n, doublecomplex *ap, + integer *info); + +/* Subroutine */ int _starpu_zpptrs_(char *uplo, integer *n, integer *nrhs, + doublecomplex *ap, doublecomplex *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_zpstf2_(char *uplo, integer *n, doublecomplex *a, + integer *lda, integer *piv, integer *rank, doublereal *tol, + doublereal *work, integer *info); + +/* Subroutine */ int _starpu_zpstrf_(char *uplo, integer *n, doublecomplex *a, + integer *lda, integer *piv, integer *rank, doublereal *tol, + doublereal *work, integer *info); + +/* Subroutine */ int _starpu_zptcon_(integer *n, doublereal *d__, doublecomplex *e, + doublereal *anorm, doublereal *rcond, doublereal *rwork, integer * + info); + +/* Subroutine */ int _starpu_zpteqr_(char *compz, integer *n, doublereal *d__, + doublereal *e, doublecomplex *z__, integer *ldz, doublereal *work, + integer *info); + +/* Subroutine */ int _starpu_zptrfs_(char *uplo, integer *n, integer *nrhs, + doublereal *d__, doublecomplex *e, doublereal *df, doublecomplex *ef, + doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, + doublereal *ferr, doublereal *berr, doublecomplex *work, doublereal * + rwork, integer *info); + +/* Subroutine */ int _starpu_zptsv_(integer *n, integer *nrhs, doublereal *d__, + doublecomplex *e, doublecomplex *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_zptsvx_(char *fact, integer *n, integer *nrhs, + doublereal *d__, doublecomplex *e, doublereal *df, doublecomplex *ef, + doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, + doublereal *rcond, doublereal *ferr, doublereal *berr, doublecomplex * + work, doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_zpttrf_(integer *n, doublereal *d__, doublecomplex *e, + integer *info); + +/* Subroutine */ int _starpu_zpttrs_(char *uplo, integer *n, integer *nrhs, + doublereal *d__, doublecomplex *e, doublecomplex *b, integer *ldb, + integer *info); + +/* Subroutine */ int _starpu_zptts2_(integer *iuplo, integer *n, integer *nrhs, + doublereal *d__, doublecomplex *e, doublecomplex *b, integer *ldb); + +/* Subroutine */ int _starpu_zrot_(integer *n, doublecomplex *cx, integer *incx, + doublecomplex *cy, integer *incy, doublereal *c__, doublecomplex *s); + +/* Subroutine */ int _starpu_zspcon_(char *uplo, integer *n, doublecomplex *ap, + integer *ipiv, doublereal *anorm, doublereal *rcond, doublecomplex * + work, integer *info); + +/* Subroutine */ int _starpu_zspmv_(char *uplo, integer *n, doublecomplex *alpha, + doublecomplex *ap, doublecomplex *x, integer *incx, doublecomplex * + beta, doublecomplex *y, integer *incy); + +/* Subroutine */ int _starpu_zspr_(char *uplo, integer *n, doublecomplex *alpha, + doublecomplex *x, integer *incx, doublecomplex *ap); + +/* Subroutine */ int _starpu_zsprfs_(char *uplo, integer *n, integer *nrhs, + doublecomplex *ap, doublecomplex *afp, integer *ipiv, doublecomplex * + b, integer *ldb, doublecomplex *x, integer *ldx, doublereal *ferr, + doublereal *berr, doublecomplex *work, doublereal *rwork, integer * + info); + +/* Subroutine */ int _starpu_zspsv_(char *uplo, integer *n, integer *nrhs, + doublecomplex *ap, integer *ipiv, doublecomplex *b, integer *ldb, + integer *info); + +/* Subroutine */ int _starpu_zspsvx_(char *fact, char *uplo, integer *n, integer * + nrhs, doublecomplex *ap, doublecomplex *afp, integer *ipiv, + doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, + doublereal *rcond, doublereal *ferr, doublereal *berr, doublecomplex * + work, doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_zsptrf_(char *uplo, integer *n, doublecomplex *ap, + integer *ipiv, integer *info); + +/* Subroutine */ int _starpu_zsptri_(char *uplo, integer *n, doublecomplex *ap, + integer *ipiv, doublecomplex *work, integer *info); + +/* Subroutine */ int _starpu_zsptrs_(char *uplo, integer *n, integer *nrhs, + doublecomplex *ap, integer *ipiv, doublecomplex *b, integer *ldb, + integer *info); + +/* Subroutine */ int _starpu_zstedc_(char *compz, integer *n, doublereal *d__, + doublereal *e, doublecomplex *z__, integer *ldz, doublecomplex *work, + integer *lwork, doublereal *rwork, integer *lrwork, integer *iwork, + integer *liwork, integer *info); + +/* Subroutine */ int _starpu_zstegr_(char *jobz, char *range, integer *n, doublereal * + d__, doublereal *e, doublereal *vl, doublereal *vu, integer *il, + integer *iu, doublereal *abstol, integer *m, doublereal *w, + doublecomplex *z__, integer *ldz, integer *isuppz, doublereal *work, + integer *lwork, integer *iwork, integer *liwork, integer *info); + +/* Subroutine */ int _starpu_zstein_(integer *n, doublereal *d__, doublereal *e, + integer *m, doublereal *w, integer *iblock, integer *isplit, + doublecomplex *z__, integer *ldz, doublereal *work, integer *iwork, + integer *ifail, integer *info); + +/* Subroutine */ int _starpu_zstemr_(char *jobz, char *range, integer *n, doublereal * + d__, doublereal *e, doublereal *vl, doublereal *vu, integer *il, + integer *iu, integer *m, doublereal *w, doublecomplex *z__, integer * + ldz, integer *nzc, integer *isuppz, logical *tryrac, doublereal *work, + integer *lwork, integer *iwork, integer *liwork, integer *info); + +/* Subroutine */ int _starpu_zsteqr_(char *compz, integer *n, doublereal *d__, + doublereal *e, doublecomplex *z__, integer *ldz, doublereal *work, + integer *info); + +/* Subroutine */ int _starpu_zsycon_(char *uplo, integer *n, doublecomplex *a, + integer *lda, integer *ipiv, doublereal *anorm, doublereal *rcond, + doublecomplex *work, integer *info); + +/* Subroutine */ int _starpu_zsyequb_(char *uplo, integer *n, doublecomplex *a, + integer *lda, doublereal *s, doublereal *scond, doublereal *amax, + doublecomplex *work, integer *info); + +/* Subroutine */ int _starpu_zsymv_(char *uplo, integer *n, doublecomplex *alpha, + doublecomplex *a, integer *lda, doublecomplex *x, integer *incx, + doublecomplex *beta, doublecomplex *y, integer *incy); + +/* Subroutine */ int _starpu_zsyr_(char *uplo, integer *n, doublecomplex *alpha, + doublecomplex *x, integer *incx, doublecomplex *a, integer *lda); + +/* Subroutine */ int _starpu_zsyrfs_(char *uplo, integer *n, integer *nrhs, + doublecomplex *a, integer *lda, doublecomplex *af, integer *ldaf, + integer *ipiv, doublecomplex *b, integer *ldb, doublecomplex *x, + integer *ldx, doublereal *ferr, doublereal *berr, doublecomplex *work, + doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_zsyrfsx_(char *uplo, char *equed, integer *n, integer * + nrhs, doublecomplex *a, integer *lda, doublecomplex *af, integer * + ldaf, integer *ipiv, doublereal *s, doublecomplex *b, integer *ldb, + doublecomplex *x, integer *ldx, doublereal *rcond, doublereal *berr, + integer *n_err_bnds__, doublereal *err_bnds_norm__, doublereal * + err_bnds_comp__, integer *nparams, doublereal *params, doublecomplex * + work, doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_zsysv_(char *uplo, integer *n, integer *nrhs, + doublecomplex *a, integer *lda, integer *ipiv, doublecomplex *b, + integer *ldb, doublecomplex *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_zsysvx_(char *fact, char *uplo, integer *n, integer * + nrhs, doublecomplex *a, integer *lda, doublecomplex *af, integer * + ldaf, integer *ipiv, doublecomplex *b, integer *ldb, doublecomplex *x, + integer *ldx, doublereal *rcond, doublereal *ferr, doublereal *berr, + doublecomplex *work, integer *lwork, doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_zsysvxx_(char *fact, char *uplo, integer *n, integer * + nrhs, doublecomplex *a, integer *lda, doublecomplex *af, integer * + ldaf, integer *ipiv, char *equed, doublereal *s, doublecomplex *b, + integer *ldb, doublecomplex *x, integer *ldx, doublereal *rcond, + doublereal *rpvgrw, doublereal *berr, integer *n_err_bnds__, + doublereal *err_bnds_norm__, doublereal *err_bnds_comp__, integer * + nparams, doublereal *params, doublecomplex *work, doublereal *rwork, + integer *info); + +/* Subroutine */ int _starpu_zsytf2_(char *uplo, integer *n, doublecomplex *a, + integer *lda, integer *ipiv, integer *info); + +/* Subroutine */ int _starpu_zsytrf_(char *uplo, integer *n, doublecomplex *a, + integer *lda, integer *ipiv, doublecomplex *work, integer *lwork, + integer *info); + +/* Subroutine */ int _starpu_zsytri_(char *uplo, integer *n, doublecomplex *a, + integer *lda, integer *ipiv, doublecomplex *work, integer *info); + +/* Subroutine */ int _starpu_zsytrs_(char *uplo, integer *n, integer *nrhs, + doublecomplex *a, integer *lda, integer *ipiv, doublecomplex *b, + integer *ldb, integer *info); + +/* Subroutine */ int _starpu_ztbcon_(char *norm, char *uplo, char *diag, integer *n, + integer *kd, doublecomplex *ab, integer *ldab, doublereal *rcond, + doublecomplex *work, doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_ztbrfs_(char *uplo, char *trans, char *diag, integer *n, + integer *kd, integer *nrhs, doublecomplex *ab, integer *ldab, + doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, + doublereal *ferr, doublereal *berr, doublecomplex *work, doublereal * + rwork, integer *info); + +/* Subroutine */ int _starpu_ztbtrs_(char *uplo, char *trans, char *diag, integer *n, + integer *kd, integer *nrhs, doublecomplex *ab, integer *ldab, + doublecomplex *b, integer *ldb, integer *info); + +/* Subroutine */ int _starpu_ztfsm_(char *transr, char *side, char *uplo, char *trans, + char *diag, integer *m, integer *n, doublecomplex *alpha, + doublecomplex *a, doublecomplex *b, integer *ldb); + +/* Subroutine */ int _starpu_ztftri_(char *transr, char *uplo, char *diag, integer *n, + doublecomplex *a, integer *info); + +/* Subroutine */ int _starpu_ztfttp_(char *transr, char *uplo, integer *n, + doublecomplex *arf, doublecomplex *ap, integer *info); + +/* Subroutine */ int _starpu_ztfttr_(char *transr, char *uplo, integer *n, + doublecomplex *arf, doublecomplex *a, integer *lda, integer *info); + +/* Subroutine */ int _starpu_ztgevc_(char *side, char *howmny, logical *select, + integer *n, doublecomplex *s, integer *lds, doublecomplex *p, integer + *ldp, doublecomplex *vl, integer *ldvl, doublecomplex *vr, integer * + ldvr, integer *mm, integer *m, doublecomplex *work, doublereal *rwork, + integer *info); + +/* Subroutine */ int _starpu_ztgex2_(logical *wantq, logical *wantz, integer *n, + doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, + doublecomplex *q, integer *ldq, doublecomplex *z__, integer *ldz, + integer *j1, integer *info); + +/* Subroutine */ int _starpu_ztgexc_(logical *wantq, logical *wantz, integer *n, + doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, + doublecomplex *q, integer *ldq, doublecomplex *z__, integer *ldz, + integer *ifst, integer *ilst, integer *info); + +/* Subroutine */ int _starpu_ztgsen_(integer *ijob, logical *wantq, logical *wantz, + logical *select, integer *n, doublecomplex *a, integer *lda, + doublecomplex *b, integer *ldb, doublecomplex *alpha, doublecomplex * + beta, doublecomplex *q, integer *ldq, doublecomplex *z__, integer * + ldz, integer *m, doublereal *pl, doublereal *pr, doublereal *dif, + doublecomplex *work, integer *lwork, integer *iwork, integer *liwork, + integer *info); + +/* Subroutine */ int _starpu_ztgsja_(char *jobu, char *jobv, char *jobq, integer *m, + integer *p, integer *n, integer *k, integer *l, doublecomplex *a, + integer *lda, doublecomplex *b, integer *ldb, doublereal *tola, + doublereal *tolb, doublereal *alpha, doublereal *beta, doublecomplex * + u, integer *ldu, doublecomplex *v, integer *ldv, doublecomplex *q, + integer *ldq, doublecomplex *work, integer *ncycle, integer *info); + +/* Subroutine */ int _starpu_ztgsna_(char *job, char *howmny, logical *select, + integer *n, doublecomplex *a, integer *lda, doublecomplex *b, integer + *ldb, doublecomplex *vl, integer *ldvl, doublecomplex *vr, integer * + ldvr, doublereal *s, doublereal *dif, integer *mm, integer *m, + doublecomplex *work, integer *lwork, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_ztgsy2_(char *trans, integer *ijob, integer *m, integer * + n, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, + doublecomplex *c__, integer *ldc, doublecomplex *d__, integer *ldd, + doublecomplex *e, integer *lde, doublecomplex *f, integer *ldf, + doublereal *scale, doublereal *rdsum, doublereal *rdscal, integer * + info); + +/* Subroutine */ int _starpu_ztgsyl_(char *trans, integer *ijob, integer *m, integer * + n, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, + doublecomplex *c__, integer *ldc, doublecomplex *d__, integer *ldd, + doublecomplex *e, integer *lde, doublecomplex *f, integer *ldf, + doublereal *scale, doublereal *dif, doublecomplex *work, integer * + lwork, integer *iwork, integer *info); + +/* Subroutine */ int _starpu_ztpcon_(char *norm, char *uplo, char *diag, integer *n, + doublecomplex *ap, doublereal *rcond, doublecomplex *work, doublereal + *rwork, integer *info); + +/* Subroutine */ int _starpu_ztprfs_(char *uplo, char *trans, char *diag, integer *n, + integer *nrhs, doublecomplex *ap, doublecomplex *b, integer *ldb, + doublecomplex *x, integer *ldx, doublereal *ferr, doublereal *berr, + doublecomplex *work, doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_ztptri_(char *uplo, char *diag, integer *n, + doublecomplex *ap, integer *info); + +/* Subroutine */ int _starpu_ztptrs_(char *uplo, char *trans, char *diag, integer *n, + integer *nrhs, doublecomplex *ap, doublecomplex *b, integer *ldb, + integer *info); + +/* Subroutine */ int _starpu_ztpttf_(char *transr, char *uplo, integer *n, + doublecomplex *ap, doublecomplex *arf, integer *info); + +/* Subroutine */ int _starpu_ztpttr_(char *uplo, integer *n, doublecomplex *ap, + doublecomplex *a, integer *lda, integer *info); + +/* Subroutine */ int _starpu_ztrcon_(char *norm, char *uplo, char *diag, integer *n, + doublecomplex *a, integer *lda, doublereal *rcond, doublecomplex * + work, doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_ztrevc_(char *side, char *howmny, logical *select, + integer *n, doublecomplex *t, integer *ldt, doublecomplex *vl, + integer *ldvl, doublecomplex *vr, integer *ldvr, integer *mm, integer + *m, doublecomplex *work, doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_ztrexc_(char *compq, integer *n, doublecomplex *t, + integer *ldt, doublecomplex *q, integer *ldq, integer *ifst, integer * + ilst, integer *info); + +/* Subroutine */ int _starpu_ztrrfs_(char *uplo, char *trans, char *diag, integer *n, + integer *nrhs, doublecomplex *a, integer *lda, doublecomplex *b, + integer *ldb, doublecomplex *x, integer *ldx, doublereal *ferr, + doublereal *berr, doublecomplex *work, doublereal *rwork, integer * + info); + +/* Subroutine */ int _starpu_ztrsen_(char *job, char *compq, logical *select, integer + *n, doublecomplex *t, integer *ldt, doublecomplex *q, integer *ldq, + doublecomplex *w, integer *m, doublereal *s, doublereal *sep, + doublecomplex *work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_ztrsna_(char *job, char *howmny, logical *select, + integer *n, doublecomplex *t, integer *ldt, doublecomplex *vl, + integer *ldvl, doublecomplex *vr, integer *ldvr, doublereal *s, + doublereal *sep, integer *mm, integer *m, doublecomplex *work, + integer *ldwork, doublereal *rwork, integer *info); + +/* Subroutine */ int _starpu_ztrsyl_(char *trana, char *tranb, integer *isgn, integer + *m, integer *n, doublecomplex *a, integer *lda, doublecomplex *b, + integer *ldb, doublecomplex *c__, integer *ldc, doublereal *scale, + integer *info); + +/* Subroutine */ int _starpu_ztrti2_(char *uplo, char *diag, integer *n, + doublecomplex *a, integer *lda, integer *info); + +/* Subroutine */ int _starpu_ztrtri_(char *uplo, char *diag, integer *n, + doublecomplex *a, integer *lda, integer *info); + +/* Subroutine */ int _starpu_ztrtrs_(char *uplo, char *trans, char *diag, integer *n, + integer *nrhs, doublecomplex *a, integer *lda, doublecomplex *b, + integer *ldb, integer *info); + +/* Subroutine */ int _starpu_ztrttf_(char *transr, char *uplo, integer *n, + doublecomplex *a, integer *lda, doublecomplex *arf, integer *info); + +/* Subroutine */ int _starpu_ztrttp_(char *uplo, integer *n, doublecomplex *a, + integer *lda, doublecomplex *ap, integer *info); + +/* Subroutine */ int _starpu_ztzrqf_(integer *m, integer *n, doublecomplex *a, + integer *lda, doublecomplex *tau, integer *info); + +/* Subroutine */ int _starpu_ztzrzf_(integer *m, integer *n, doublecomplex *a, + integer *lda, doublecomplex *tau, doublecomplex *work, integer *lwork, + integer *info); + +/* Subroutine */ int _starpu_zung2l_(integer *m, integer *n, integer *k, + doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex * + work, integer *info); + +/* Subroutine */ int _starpu_zung2r_(integer *m, integer *n, integer *k, + doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex * + work, integer *info); + +/* Subroutine */ int _starpu_zungbr_(char *vect, integer *m, integer *n, integer *k, + doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex * + work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_zunghr_(integer *n, integer *ilo, integer *ihi, + doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex * + work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_zungl2_(integer *m, integer *n, integer *k, + doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex * + work, integer *info); + +/* Subroutine */ int _starpu_zunglq_(integer *m, integer *n, integer *k, + doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex * + work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_zungql_(integer *m, integer *n, integer *k, + doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex * + work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_zungqr_(integer *m, integer *n, integer *k, + doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex * + work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_zungr2_(integer *m, integer *n, integer *k, + doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex * + work, integer *info); + +/* Subroutine */ int _starpu_zungrq_(integer *m, integer *n, integer *k, + doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex * + work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_zungtr_(char *uplo, integer *n, doublecomplex *a, + integer *lda, doublecomplex *tau, doublecomplex *work, integer *lwork, + integer *info); + +/* Subroutine */ int _starpu_zunm2l_(char *side, char *trans, integer *m, integer *n, + integer *k, doublecomplex *a, integer *lda, doublecomplex *tau, + doublecomplex *c__, integer *ldc, doublecomplex *work, integer *info); + +/* Subroutine */ int _starpu_zunm2r_(char *side, char *trans, integer *m, integer *n, + integer *k, doublecomplex *a, integer *lda, doublecomplex *tau, + doublecomplex *c__, integer *ldc, doublecomplex *work, integer *info); + +/* Subroutine */ int _starpu_zunmbr_(char *vect, char *side, char *trans, integer *m, + integer *n, integer *k, doublecomplex *a, integer *lda, doublecomplex + *tau, doublecomplex *c__, integer *ldc, doublecomplex *work, integer * + lwork, integer *info); + +/* Subroutine */ int _starpu_zunmhr_(char *side, char *trans, integer *m, integer *n, + integer *ilo, integer *ihi, doublecomplex *a, integer *lda, + doublecomplex *tau, doublecomplex *c__, integer *ldc, doublecomplex * + work, integer *lwork, integer *info); + +/* Subroutine */ int _starpu_zunml2_(char *side, char *trans, integer *m, integer *n, + integer *k, doublecomplex *a, integer *lda, doublecomplex *tau, + doublecomplex *c__, integer *ldc, doublecomplex *work, integer *info); + +/* Subroutine */ int _starpu_zunmlq_(char *side, char *trans, integer *m, integer *n, + integer *k, doublecomplex *a, integer *lda, doublecomplex *tau, + doublecomplex *c__, integer *ldc, doublecomplex *work, integer *lwork, + integer *info); + +/* Subroutine */ int _starpu_zunmql_(char *side, char *trans, integer *m, integer *n, + integer *k, doublecomplex *a, integer *lda, doublecomplex *tau, + doublecomplex *c__, integer *ldc, doublecomplex *work, integer *lwork, + integer *info); + +/* Subroutine */ int _starpu_zunmqr_(char *side, char *trans, integer *m, integer *n, + integer *k, doublecomplex *a, integer *lda, doublecomplex *tau, + doublecomplex *c__, integer *ldc, doublecomplex *work, integer *lwork, + integer *info); + +/* Subroutine */ int _starpu_zunmr2_(char *side, char *trans, integer *m, integer *n, + integer *k, doublecomplex *a, integer *lda, doublecomplex *tau, + doublecomplex *c__, integer *ldc, doublecomplex *work, integer *info); + +/* Subroutine */ int _starpu_zunmr3_(char *side, char *trans, integer *m, integer *n, + integer *k, integer *l, doublecomplex *a, integer *lda, doublecomplex + *tau, doublecomplex *c__, integer *ldc, doublecomplex *work, integer * + info); + +/* Subroutine */ int _starpu_zunmrq_(char *side, char *trans, integer *m, integer *n, + integer *k, doublecomplex *a, integer *lda, doublecomplex *tau, + doublecomplex *c__, integer *ldc, doublecomplex *work, integer *lwork, + integer *info); + +/* Subroutine */ int _starpu_zunmrz_(char *side, char *trans, integer *m, integer *n, + integer *k, integer *l, doublecomplex *a, integer *lda, doublecomplex + *tau, doublecomplex *c__, integer *ldc, doublecomplex *work, integer * + lwork, integer *info); + +/* Subroutine */ int _starpu_zunmtr_(char *side, char *uplo, char *trans, integer *m, + integer *n, doublecomplex *a, integer *lda, doublecomplex *tau, + doublecomplex *c__, integer *ldc, doublecomplex *work, integer *lwork, + integer *info); + +/* Subroutine */ int _starpu_zupgtr_(char *uplo, integer *n, doublecomplex *ap, + doublecomplex *tau, doublecomplex *q, integer *ldq, doublecomplex * + work, integer *info); + +/* Subroutine */ int _starpu_zupmtr_(char *side, char *uplo, char *trans, integer *m, + integer *n, doublecomplex *ap, doublecomplex *tau, doublecomplex *c__, + integer *ldc, doublecomplex *work, integer *info); + +/* Subroutine */ int _starpu_dlamc1_(integer *beta, integer *t, logical *rnd, logical + *ieee1); + +doublereal _starpu_dsecnd_(); + +/* Subroutine */ int _starpu_ilaver_(integer *vers_major__, integer *vers_minor__, + integer *vers_patch__); + +logical _starpu_lsame_(char *ca, char *cb); + +doublereal _starpu_second_(); + +doublereal _starpu_slamch_(char *cmach); + +/* Subroutine */ int _starpu_slamc1_(integer *beta, integer *t, logical *rnd, logical + *ieee1); + +/* Subroutine */ int _starpu_slamc2_(integer *beta, integer *t, logical *rnd, real * + eps, integer *emin, real *rmin, integer *emax, real *rmax); + +doublereal _starpu_slamc3_(real *a, real *b); + +/* Subroutine */ int _starpu_slamc4_(integer *emin, real *start, integer *base); + +/* Subroutine */ int _starpu_slamc5_(integer *beta, integer *p, integer *emin, + logical *ieee, integer *emax, real *rmax); + + +doublereal _starpu_dlamch_(char *cmach); + +/* Subroutine */ int _starpu_dlamc1_(integer *beta, integer *t, logical *rnd, logical + *ieee1); + +/* Subroutine */ int _starpu_dlamc2_(integer *beta, integer *t, logical *rnd, + doublereal *eps, integer *emin, doublereal *rmin, integer *emax, + doublereal *rmax); + +doublereal _starpu_dlamc3_(doublereal *a, doublereal *b); + +/* Subroutine */ int _starpu_dlamc4_(integer *emin, doublereal *start, integer *base); + +/* Subroutine */ int _starpu_dlamc5_(integer *beta, integer *p, integer *emin, + logical *ieee, integer *emax, doublereal *rmax); + +integer _starpu_ilaenv_(integer *ispec, char *name__, char *opts, integer *n1, + integer *n2, integer *n3, integer *n4); + +#ifdef __cplusplus +} +#endif + + +#endif /* __CLAPACK_H */ diff --git a/min-dgels/base/INCLUDE/f2c.h b/min-dgels/base/INCLUDE/f2c.h new file mode 100644 index 0000000..b94ee7c --- /dev/null +++ b/min-dgels/base/INCLUDE/f2c.h @@ -0,0 +1,223 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +typedef long int integer; +typedef unsigned long int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +typedef long int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; +#ifdef INTEGER_STAR_8 /* Adjust for integer*8. */ +typedef long long longint; /* system-dependent */ +typedef unsigned long long ulongint; /* system-dependent */ +#define qbit_clear(a,b) ((a) & ~((ulongint)1 << (b))) +#define qbit_set(a,b) ((a) | ((ulongint)1 << (b))) +#endif + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +#ifdef f2c_i2 +/* for -i2 */ +typedef short flag; +typedef short ftnlen; +typedef short ftnint; +#else +typedef long int flag; +typedef long int ftnlen; +typedef long int ftnint; +#endif + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +/*typedef long int Long;*/ /* No longer used; formerly in Namelist */ + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (doublereal)abs(x) +#define min(a,b) ((a) <= (b) ? (a) : (b)) +#define max(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (doublereal)min(a,b) +#define dmax(a,b) (doublereal)max(a,b) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef int /* Unknown procedure type */ (*U_fp)(...); +typedef shortint (*J_fp)(...); +typedef integer (*I_fp)(...); +typedef real (*R_fp)(...); +typedef doublereal (*D_fp)(...), (*E_fp)(...); +typedef /* Complex */ VOID (*C_fp)(...); +typedef /* Double Complex */ VOID (*Z_fp)(...); +typedef logical (*L_fp)(...); +typedef shortlogical (*K_fp)(...); +typedef /* Character */ VOID (*H_fp)(...); +typedef /* Subroutine */ int (*S_fp)(...); +#else +typedef int /* Unknown procedure type */ (*U_fp)(); +typedef shortint (*J_fp)(); +typedef integer (*I_fp)(); +typedef real (*R_fp)(); +typedef doublereal (*D_fp)(), (*E_fp)(); +typedef /* Complex */ VOID (*C_fp)(); +typedef /* Double Complex */ VOID (*Z_fp)(); +typedef logical (*L_fp)(); +typedef shortlogical (*K_fp)(); +typedef /* Character */ VOID (*H_fp)(); +typedef /* Subroutine */ int (*S_fp)(); +#endif +/* E_fp is for real functions when -R is not specified */ +typedef VOID C_f; /* complex function */ +typedef VOID H_f; /* character function */ +typedef VOID Z_f; /* double complex function */ +typedef doublereal E_f; /* real function with -R not specified */ + +/* undef any lower-case symbols that your C compiler predefines, e.g.: */ + +#ifndef Skip_f2c_Undefs +#undef cray +#undef gcos +#undef mc68010 +#undef mc68020 +#undef mips +#undef pdp11 +#undef sgi +#undef sparc +#undef sun +#undef sun2 +#undef sun3 +#undef sun4 +#undef u370 +#undef u3b +#undef u3b2 +#undef u3b5 +#undef unix +#undef vax +#endif +#endif diff --git a/min-dgels/base/Makefile b/min-dgels/base/Makefile new file mode 100644 index 0000000..11b4d83 --- /dev/null +++ b/min-dgels/base/Makefile @@ -0,0 +1,25 @@ +# +# Top Level Makefile for LAPACK +# Version 3.2.1 +# June 2009 +# + +include make.inc + +all: lib + +lib: f2clib blaslib + +clean: cleanlib + +blaslib: + ( cd BLAS/SRC; $(MAKE) ) + +f2clib: + ( cd F2CLIBS/libf2c; $(MAKE) ) + +cleanlib: + ( cd BLAS/SRC; $(MAKE) clean ) + ( cd SRC; $(MAKE) clean ) + ( cd SRC/VARIANTS; $(MAKE) clean ) + ( cd F2CLIBS/libf2c; $(MAKE) clean ) diff --git a/min-dgels/base/README.install b/min-dgels/base/README.install new file mode 100644 index 0000000..684933a --- /dev/null +++ b/min-dgels/base/README.install @@ -0,0 +1,218 @@ + =================== + CLAPACK README FILE + =================== + +============================================================================================ + Version 3.2.1 (threadsafe) + Release date: June 2009 / April 2010 +F2C translation of LAPACK 3.2.1 +To get revisions info about LAPACK 3.2.1, please see http://www.netlib.org/lapack/lapack-3.2.1.html + +FOR SUPPORT: LAPACK Forum: http://icl.cs.utk.edu/lapack-forum/ +============================================================================================ + +This README file describes how and how to install the ANSI C translation of the +LAPACK library, called CLAPACK. CLAPACK must be compiled with an ANSI Standard +C compiler. If the C compiler on your machine is an old-style C compiler, you +will have to use gcc to compile the package. + +IMPORTANT NOTE: + + You *CANNOT* just go to www.netlib.org/clapack, download a routine like + sgesv.c and have it work unless you properly install and link to the + f2c and BLAS routines as described below. If your linker complains about + missing functions, you have probably accidentally neglected this step. + Also, you will need the file "f2c.h" (included with the f2c libraries) +in order to compile these routines. + The default BLAS routines included with CLAPACK in the BLAS/SRC + subdirectory may also be used these will most likely be + slower than a BLAS library optimized for your machine. If you do + not have such an optimized BLAS library, you can get one at + + http://www.netlib.org/atlas + + +============================================================================== + +For a fast default installation, you will need to + - Untar clapack.tar and modify the make.inc file (see step 1 below) + - Make the f2c libraries (see step 2 below) + - Make the BLAS library (see step 2 below) + - Make the main library, test it, and time it by simply typing + make + +If you encounter difficulties, you may find the installation manual for +the FORTRAN version (INSTALL/lawn81.*) useful. + + + Procedure for installing CLAPACK: +============================================================================== + +(1) 'tar xvf clapack.tar' to build the following directory structure: + CLAPACK/README.install this file + CLAPACK/BLAS/ C source for BLAS + CLAPACK/F2CLIBS/ f2c I/O functions (libI77) and math functions (libF77) + CLAPACK/INSTALL/ Testing functions and pre-tested make.inc files + for various platforms. + CLAPACK/INCLUDE/ header files - clapack.h is including C prototypes of all the + CLAPACK routines. + CLAPACK/SRC/ C source of LAPACK routines + CLAPACK/TESTING/ driver routines to test correctness + CLAPACK/make.inc compiler, compile flags and library definitions, + included in all Makefiles. + NOTE: It's better to use gcc compiler on some older + Sun systems. + You should be sure to modify the make.inc file for your system. + +(2) Build the f2c libraries by doing: + make f2clib + +############################################################################## +WARNING: 1) If your system lacks onexit() and you are not using an ANSI C + compiler, then you should change your F2CCFLAGS line in + make.inc to + F2CCFLAGS=$(CFLAGS) -DNO_ONEXIT + On at least some Sun systems, it is better to use + F2CCFLAGS=$(CFLAGS) -Donexit=on_exit + 2) On at least some Sun systems, the type declaration in + F2CLIBS/libI77/rawio.h: extern FILE *fdopen(int, char*) + is not consistent with the one defined in stdio.h. In this case + you should comment out this line. + +############################################################################## + +(3) To run CLAPACK, you need to create a BLAS library. + The performance of CLAPACK largely depends on the performance + of the BLAS library. + + You can either use the reference BLAS library included with + this distribution, which is easy to install but not optimized to be + fast on any particular machine, or else find a version of the + BLAS optimized for your machine. + + If you want to use the reference BLAS library included with + this distribution, build it by doing + make blaslib + from the main directory. + + If you want to find a BLAS library optimized for your machine, + see the note below for more details; + see also the README in the BLAS/WRAP directory. + +(4) Compile and run the BLAS TESTING code by doing: + cd CLAPACK/BLAS/TESTING; make -f Makeblat2 + cd CLAPACK/BLAS + xblat2s < sblat2.in + xblat2d < dblat2.in + xblat2c < cblat2.in + xblat2z < zblat2.in + cd CLAPACK/BLAS/TESTING; make -f Makeblat3 + cd CLAPACK/BLAS + xblat3s < sblat3.in + xblat3d < dblat3.in + xblat3c < cblat3.in + xblat3z < zblat3.in + + Inspect the output files *.SUMM to confirm that no errors occurred. + +{NOTE: If a compiling error involving _atexit appears then see information + within the WARNING above.} + +{NOTE: For the highest performance, it is best to use a version of the BLAS + optimized for your particular machine. This may be done by modifying + the line + BLASLIB = ../../blas$(PLAT).a + in CLAPACK/make.inc to point to the optimized BLAS. + +Link with BLAS which provides the standard CBLAS interface +========================================================== + If you are using a version of the BLAS which provides the standard + CBLAS interface (e.g. ATLAS), you need to add the appropriate "wrapper" library. + you can make the wrapper library libcblaswr.a by typing + "make cblaswrap" from the main directory. For this setup + (ATLAS with the CBLAS wrapper), the BLASLIB line might look like +Example: +Modification to make.inc +CC = gcc +BLASLIB = ../../libcblaswr.a -lcblas -latlas +Creation of libcblaswr.a : make cblaswrap + +Link with BLAS which Fortran calling interface +=============================================== +Two possibilities: + - add -DNO_BLAS_WRAP to the CC variable to when compiling and no need of a "wrapper" library +Example: +CC = gcc -DNO_BLAS_WRAP +BLASLIB = -lgoto -lpthread + + - add the sample Fortran calling interface (libfblaswr.a) for systems with + Sun-style Fortran calling conventions is also provided; however, + this interface will need modifications to work on other + architectures which have different Fortran calling convensions. + See the README in the BLAS/WRAP subdirectory for further information. +Example: +CC = gcc +BLASLIB = ../../libfblaswr.a -lgoto -lpthread +Creation of libfblaswr.a : make fblaswrap +} + +(5) Build the archive containing lapack source code by doing: + cd CLAPACK/SRC; make + +(6) Compile the matrix generation software, the eigenroutine TESTING + code, the linear system TESTING code, and run the LAPACK tests + by doing: + cd CLAPACK/TESTING/MATGEN; make + cd CLAPACK/TESTING; make + + Inspect the output files *.out to confirm that no errors occurred. + +I. Compile the matrix generation software, the eigenroutine TESTING code, + the linear system TESTING code, and run the LAPACK tests separately + by doing: + cd CLAPACK/TESTING/MATGEN; make + cd CLAPACK/TESTING/EIG; make + cd CLAPACK/TESTING/LIN; make + cd CLAPACK/TESTING; make +II. After the executable files and libraries have been created for each + of the compiles, the object files should be removed by doing: + make clean +III. Each 'make' may be accomplished just for one or a subset of the + precisions desired. For example: + make single + make single complex + make single double complex complex16 + Using make without any arguments will compile all four precisions. + +James Demmel +Xiaoye Li +Chris Puscasiu +Steve Timson + +UC Berkeley +Sept 27 1993 + + +{Revised by Susan Ostrouchov and Jude Toth} + {The University of Tennessee at Knoxville} + {October 15, 1993} + +{Revised by Xiaoye Li and James Demmel} + {University of California at Berkeley} + {November 22, 1994} + +{Revised by David Bindel and James Demmel} + {University of California at Berkeley} + {July 19, 2000} + +{Revised by Julie Langou} + {University of Tennessee} + {February 2008} + +{Revised by Julie Langou} +{University of Tennessee} + {October 2008, April 2010} + +{Revised by Peng Du} +{University of Tennessee} + {May 2009} diff --git a/min-dgels/base/SRC/Makefile b/min-dgels/base/SRC/Makefile new file mode 100644 index 0000000..a438098 --- /dev/null +++ b/min-dgels/base/SRC/Makefile @@ -0,0 +1,177 @@ +TOPDIR=.. +include $(TOPDIR)/make.inc + + +####################################################################### +# This is the makefile to create a library for LAPACK. +# The files are organized as follows: +# ALLAUX -- Auxiliary routines called from all precisions +# ALLXAUX -- Auxiliary routines called from all precisions but +# only from routines using extra precision. +# SCLAUX -- Auxiliary routines called from both REAL and COMPLEX +# DZLAUX -- Auxiliary routines called from both DOUBLE PRECISION +# and COMPLEX*16 +# SLASRC -- Single precision real LAPACK routines +# SXLASRC -- Single precision real LAPACK routines using extra +# precision. +# CLASRC -- Single precision complex LAPACK routines +# CXLASRC -- Single precision complex LAPACK routines using extra +# precision. +# DLASRC -- Double precision real LAPACK routines +# DXLASRC -- Double precision real LAPACK routines using extra +# precision. +# ZLASRC -- Double precision complex LAPACK routines +# ZXLASRC -- Double precision complex LAPACK routines using extra +# precision. +# +# The library can be set up to include routines for any combination +# of the four precisions. To create or add to the library, enter make +# followed by one or more of the precisions desired. Some examples: +# make single +# make single complex +# make single double complex complex16 +# Alternatively, the command +# make +# without any arguments creates a library of all four precisions. +# The library is called +# lapack.a +# and is created at the next higher directory level. +# +# To remove the object files after the library is created, enter +# make clean +# On some systems, you can force the source files to be recompiled by +# entering (for example) +# make single FRC=FRC +# +# ***Note*** +# The functions lsame, second, dsecnd, slamch, and dlamch may have +# to be installed before compiling the library. Refer to the +# installation guide, LAPACK Working Note 41, for instructions. +# +####################################################################### + +ALLAUX = maxloc.o ilaenv.o ieeeck.o lsamen.o xerbla.o xerbla_array.o iparmq.o \ + ilaprec.o ilatrans.o ilauplo.o iladiag.o chla_transtype.o \ + ../INSTALL/ilaver.o ../INSTALL/lsame.o + +ALLXAUX = + +DZLAUX = \ + dbdsdc.o \ + dbdsqr.o ddisna.o dlabad.o dlacpy.o dladiv.o dlae2.o dlaebz.o \ + dlaed0.o dlaed1.o dlaed2.o dlaed3.o dlaed4.o dlaed5.o dlaed6.o \ + dlaed7.o dlaed8.o dlaed9.o dlaeda.o dlaev2.o dlagtf.o \ + dlagts.o dlamrg.o dlanst.o \ + dlapy2.o dlapy3.o dlarnv.o \ + dlarra.o dlarrb.o dlarrc.o dlarrd.o dlarre.o dlarrf.o dlarrj.o \ + dlarrk.o dlarrr.o dlaneg.o \ + dlartg.o dlaruv.o dlas2.o dlascl.o \ + dlasd0.o dlasd1.o dlasd2.o dlasd3.o dlasd4.o dlasd5.o dlasd6.o \ + dlasd7.o dlasd8.o dlasda.o dlasdq.o dlasdt.o \ + dlaset.o dlasq1.o dlasq2.o dlasq3.o dlasq4.o dlasq5.o dlasq6.o \ + dlasr.o dlasrt.o dlassq.o dlasv2.o dpttrf.o dstebz.o dstedc.o \ + dsteqr.o dsterf.o dlaisnan.o disnan.o \ + ../INSTALL/dlamch.o ../INSTALL/dsecnd.o + +DLASRC = \ + dgbbrd.o dgbcon.o dgbequ.o dgbrfs.o dgbsv.o \ + dgbsvx.o dgbtf2.o dgbtrf.o dgbtrs.o dgebak.o dgebal.o dgebd2.o \ + dgebrd.o dgecon.o dgeequ.o dgees.o dgeesx.o dgeev.o dgeevx.o \ + dgegs.o dgegv.o dgehd2.o dgehrd.o dgelq2.o dgelqf.o \ + dgels.o dgelsd.o dgelss.o dgelsx.o dgelsy.o dgeql2.o dgeqlf.o \ + dgeqp3.o dgeqpf.o dgeqr2.o dgeqrf.o dgerfs.o dgerq2.o dgerqf.o \ + dgesc2.o dgesdd.o dgesv.o dgesvd.o dgesvx.o dgetc2.o dgetf2.o \ + dgetrf.o dgetri.o \ + dgetrs.o dggbak.o dggbal.o dgges.o dggesx.o dggev.o dggevx.o \ + dggglm.o dgghrd.o dgglse.o dggqrf.o \ + dggrqf.o dggsvd.o dggsvp.o dgtcon.o dgtrfs.o dgtsv.o \ + dgtsvx.o dgttrf.o dgttrs.o dgtts2.o dhgeqz.o \ + dhsein.o dhseqr.o dlabrd.o dlacon.o dlacn2.o \ + dlaein.o dlaexc.o dlag2.o dlags2.o dlagtm.o dlagv2.o dlahqr.o \ + dlahrd.o dlahr2.o dlaic1.o dlaln2.o dlals0.o dlalsa.o dlalsd.o \ + dlangb.o dlange.o dlangt.o dlanhs.o dlansb.o dlansp.o \ + dlansy.o dlantb.o dlantp.o dlantr.o dlanv2.o \ + dlapll.o dlapmt.o \ + dlaqgb.o dlaqge.o dlaqp2.o dlaqps.o dlaqsb.o dlaqsp.o dlaqsy.o \ + dlaqr0.o dlaqr1.o dlaqr2.o dlaqr3.o dlaqr4.o dlaqr5.o \ + dlaqtr.o dlar1v.o dlar2v.o iladlr.o iladlc.o \ + dlarf.o dlarfb.o dlarfg.o dlarft.o dlarfx.o dlargv.o \ + dlarrv.o dlartv.o dlarfp.o \ + dlarz.o dlarzb.o dlarzt.o dlaswp.o dlasy2.o dlasyf.o \ + dlatbs.o dlatdf.o dlatps.o dlatrd.o dlatrs.o dlatrz.o dlatzm.o dlauu2.o \ + dlauum.o dopgtr.o dopmtr.o dorg2l.o dorg2r.o \ + dorgbr.o dorghr.o dorgl2.o dorglq.o dorgql.o dorgqr.o dorgr2.o \ + dorgrq.o dorgtr.o dorm2l.o dorm2r.o \ + dormbr.o dormhr.o dorml2.o dormlq.o dormql.o dormqr.o dormr2.o \ + dormr3.o dormrq.o dormrz.o dormtr.o dpbcon.o dpbequ.o dpbrfs.o \ + dpbstf.o dpbsv.o dpbsvx.o \ + dpbtf2.o dpbtrf.o dpbtrs.o dpocon.o dpoequ.o dporfs.o dposv.o \ + dposvx.o dpotf2.o dpotrf.o dpotri.o dpotrs.o dpstrf.o dpstf2.o \ + dppcon.o dppequ.o \ + dpprfs.o dppsv.o dppsvx.o dpptrf.o dpptri.o dpptrs.o dptcon.o \ + dpteqr.o dptrfs.o dptsv.o dptsvx.o dpttrs.o dptts2.o drscl.o \ + dsbev.o dsbevd.o dsbevx.o dsbgst.o dsbgv.o dsbgvd.o dsbgvx.o \ + dsbtrd.o dspcon.o dspev.o dspevd.o dspevx.o dspgst.o \ + dspgv.o dspgvd.o dspgvx.o dsprfs.o dspsv.o dspsvx.o dsptrd.o \ + dsptrf.o dsptri.o dsptrs.o dstegr.o dstein.o dstev.o dstevd.o dstevr.o \ + dstevx.o dsycon.o dsyev.o dsyevd.o dsyevr.o \ + dsyevx.o dsygs2.o dsygst.o dsygv.o dsygvd.o dsygvx.o dsyrfs.o \ + dsysv.o dsysvx.o \ + dsytd2.o dsytf2.o dsytrd.o dsytrf.o dsytri.o dsytrs.o dtbcon.o \ + dtbrfs.o dtbtrs.o dtgevc.o dtgex2.o dtgexc.o dtgsen.o \ + dtgsja.o dtgsna.o dtgsy2.o dtgsyl.o dtpcon.o dtprfs.o dtptri.o \ + dtptrs.o \ + dtrcon.o dtrevc.o dtrexc.o dtrrfs.o dtrsen.o dtrsna.o dtrsyl.o \ + dtrti2.o dtrtri.o dtrtrs.o dtzrqf.o dtzrzf.o dstemr.o \ + dsgesv.o dsposv.o dlag2s.o slag2d.o dlat2s.o \ + dlansf.o dpftrf.o dpftri.o dpftrs.o dsfrk.o dtfsm.o dtftri.o dtfttp.o \ + dtfttr.o dtpttf.o dtpttr.o dtrttf.o dtrttp.o \ + dgejsv.o dgesvj.o dgsvj0.o dgsvj1.o \ + dgeequb.o dsyequb.o dpoequb.o dgbequb.o + +DXLASRC = dgesvxx.o dgerfsx.o dla_gerfsx_extended.o dla_geamv.o \ + dla_gercond.o dla_rpvgrw.o dsysvxx.o dsyrfsx.o \ + dla_syrfsx_extended.o dla_syamv.o dla_syrcond.o dla_syrpvgrw.o \ + dposvxx.o dporfsx.o dla_porfsx_extended.o dla_porcond.o \ + dla_porpvgrw.o dgbsvxx.o dgbrfsx.o dla_gbrfsx_extended.o \ + dla_gbamv.o dla_gbrcond.o dla_gbrpvgrw.o dla_lin_berr.o dlarscl2.o \ + dlascl2.o dla_wwaddw.o + +all: ../$(LAPACKLIB) + +ifdef USEXBLAS +ALLXOBJ=$(DXLASRC) $(ALLXAUX) +endif + +ALLOBJ=$(DLASRC) $(DZLAUX) \ + $(ALLAUX) + +../$(LAPACKLIB): $(ALLOBJ) $(ALLXOBJ) + $(ARCH) $(ARCHFLAGS) $@ $(ALLOBJ) $(ALLXOBJ) + $(RANLIB) $@ + +double: $(DLASRC) $(ALLAUX) $(DZLAUX) + $(ARCH) $(ARCHFLAGS) ../$(LAPACKLIB) $(DLASRC) $(ALLAUX) \ + $(DZLAUX) + $(RANLIB) ../$(LAPACKLIB) + +$(ALLAUX): $(FRC) +$(DZLAUX): $(FRC) +$(DLASRC): $(FRC) +ifdef USEXBLAS +$(ALLXAUX): $(FRC) +$(DXLASRC): $(FRC) +endif + +FRC: + @FRC=$(FRC) + +clean: + rm -f *.o + +.c.o: + $(CC) $(CFLAGS) -c $< + +dlaruv.o: dlaruv.c ; $(CC) $(NOOPT) -c $< -o $@ +dla_wwaddw.o: dla_wwaddw.c ; $(CC) $(NOOPT) -c $< -o $@ + diff --git a/min-dgels/base/SRC/VARIANTS/Makefile b/min-dgels/base/SRC/VARIANTS/Makefile new file mode 100644 index 0000000..94bb997 --- /dev/null +++ b/min-dgels/base/SRC/VARIANTS/Makefile @@ -0,0 +1,68 @@ +TOPDIR=../.. +include $(TOPDIR)/make.inc + +####################################################################### +# This is the makefile to create a the variants libraries for LAPACK. +# The files are organized as follows: +# CHOLRL -- Right looking block version of the algorithm, calling Level 3 BLAS +# CHOLTOP -- Top looking block version of the algorithm, calling Level 3 BLAS +# LUCR -- Crout Level 3 BLAS version of LU factorization +# LULL -- left-looking Level 3 BLAS version of LU factorization +# QRLL -- left-looking Level 3 BLAS version of QR factorization +# LUREC -- an iterative version of Sivan Toledo's recursive LU algorithm[1]. +# For square matrices, this iterative versions should +# be within a factor of two of the optimum number of memory transfers. +# +# [1] Toledo, S. 1997. Locality of Reference in LU Decomposition with +# Partial Pivoting. SIAM J. Matrix Anal. Appl. 18, 4 (Oct. 1997), +# 1065-1081. http://dx.doi.org/10.1137/S0895479896297744 +####################################################################### + +VARIANTSDIR=LIB + +CHOLRL = cholesky/RL/cpotrf.o cholesky/RL/dpotrf.o cholesky/RL/spotrf.o cholesky/RL/zpotrf.o + +CHOLTOP = cholesky/TOP/cpotrf.o cholesky/TOP/dpotrf.o cholesky/TOP/spotrf.o cholesky/TOP/zpotrf.o + +LUCR = lu/CR/cgetrf.o lu/CR/dgetrf.o lu/CR/sgetrf.o lu/CR/zgetrf.o + +LULL = lu/LL/cgetrf.o lu/LL/dgetrf.o lu/LL/sgetrf.o lu/LL/zgetrf.o + +LUREC = lu/REC/cgetrf.o lu/REC/dgetrf.o lu/REC/sgetrf.o lu/REC/zgetrf.o + +QRLL = qr/LL/cgeqrf.o qr/LL/dgeqrf.o qr/LL/sgeqrf.o qr/LL/zgeqrf.o qr/LL/sceil.o + + +all: cholrl choltop lucr lull lurec qrll + +cholrl: $(CHOLRL) + $(ARCH) $(ARCHFLAGS) $(VARIANTSDIR)/cholrl.a $(CHOLRL) + $(RANLIB) $(VARIANTSDIR)/cholrl.a + +choltop: $(CHOLTOP) + $(ARCH) $(ARCHFLAGS) $(VARIANTSDIR)/choltop.a $(CHOLTOP) + $(RANLIB) $(VARIANTSDIR)/choltop.a + +lucr: $(LUCR) + $(ARCH) $(ARCHFLAGS) $(VARIANTSDIR)/lucr.a $(LUCR) + $(RANLIB) $(VARIANTSDIR)/lucr.a + +lull: $(LULL) + $(ARCH) $(ARCHFLAGS) $(VARIANTSDIR)/lull.a $(LULL) + $(RANLIB) $(VARIANTSDIR)/lull.a + +lurec: $(LUREC) + $(ARCH) $(ARCHFLAGS) $(VARIANTSDIR)/lurec.a $(LUREC) + $(RANLIB) $(VARIANTSDIR)/lurec.a + +qrll: $(QRLL) + $(ARCH) $(ARCHFLAGS) $(VARIANTSDIR)/qrll.a $(QRLL) + $(RANLIB) $(VARIANTSDIR)/qrll.a + + +.c.o: + $(CC) $(CFLAGS) -c $< -o $@ + +clean: + rm -f $(CHOLRL) $(CHOLTOP) $(LUCR) $(LULL) $(LUREC) $(QRLL) \ + $(VARIANTSDIR)/*.a diff --git a/min-dgels/base/SRC/VARIANTS/README b/min-dgels/base/SRC/VARIANTS/README new file mode 100644 index 0000000..6b4f325 --- /dev/null +++ b/min-dgels/base/SRC/VARIANTS/README @@ -0,0 +1,84 @@ + =============== + = README File = + =============== + +This README File is for the LAPACK driver variants. +It is composed of 5 sections: + - Description: contents a quick description of each of the variants. For a more detailed description please refer to LAWN XXX. + - Build + - Testing + - Linking your program + - Support + +Author: Julie LANGOU, May 2008 + +=============== += DESCRIPTION = +=============== + +This directory contains several variants of LAPACK routines in single/double/complex/double complex precision: + - [sdcz]getrf with LU Crout Level 3 BLAS version algorithm [2]- Directory: SRC/VARIANTS/lu/CR + - [sdcz]getrf with LU Left Looking Level 3 BLAS version algorithm [2]- Directory: SRC/VARIANTS/lu/LL + - [sdcz]getrf with Sivan Toledo's recursive LU algorithm [1] - Directory: SRC/VARIANTS/lu/REC + - [sdcz]geqrf with QR Left Looking Level 3 BLAS version algorithm [2]- Directory: SRC/VARIANTS/qr/LL + - [sdcz]potrf with Cholesky Right Looking Level 3 BLAS version algorithm [2]- Directory: SRC/VARIANTS/cholesky/RL + - [sdcz]potrf with Cholesky Top Level 3 BLAS version algorithm [2]- Directory: SRC/VARIANTS/cholesky/TOP + +References:For a more detailed description please refer to + - [1] Toledo, S. 1997. Locality of Reference in LU Decomposition with Partial Pivoting. SIAM J. Matrix Anal. Appl. 18, 4 (Oct. 1997), + 1065-1081. http://dx.doi.org/10.1137/S0895479896297744 + - [2]LAWN XXX + +========= += BUILD = +========= + +These variants are compiled by default in the build process but they are not tested by default. +The build process creates one new library per variants in the four arithmetics (singel/double/comple/double complex). +The libraries are in the SRC/VARIANTS/LIB directory. + +Corresponding libraries created in SRC/VARIANTS/LIB: + - LU Crout : lucr.a + - LU Left Looking : lull.a + - LU Sivan Toledo's recursive : lurec.a + - QR Left Looking : qrll.a + - Cholesky Right Looking : cholrl.a + - Cholesky Top : choltop.a + + +=========== += TESTING = +=========== + +To test these variants you can type 'make variants-testing' +This will rerun the linear methods testings once per variants and append the short name of the variants to the output files. +You should then see the following files in the TESTING directory: +[scdz]test_cholrl.out +[scdz]test_choltop.out +[scdz]test_lucr.out +[scdz]test_lull.out +[scdz]test_lurec.out +[scdz]test_qrll.out + +======================== += LINKING YOUR PROGRAM = +======================== + +You just need to add the variants methods library in your linking sequence before your lapack libary. +Here is a quick example for LU + +Default using LU Right Looking version: + $(FORTRAN) -c myprog.f + $(FORTRAN) -o myexe myprog.o $(LAPACKLIB) $(BLASLIB) + +Using LU Left Looking version: + $(FORTRAN) -c myprog.f + $(FORTRAN) -o myexe myprog.o $(PATH TO LAPACK/SRC/VARIANTS/LIB)/lull.a $(LAPACKLIB) $(BLASLIB) + +=========== += SUPPORT = +=========== + +You can use either LAPACK forum or the LAPACK mailing list to get support. +LAPACK forum : http://icl.cs.utk.edu/lapack-forum +LAPACK mailing list : lapack@cs.utk.edu diff --git a/min-dgels/base/SRC/VARIANTS/cholesky/RL/dpotrf.c b/min-dgels/base/SRC/VARIANTS/cholesky/RL/dpotrf.c new file mode 100644 index 0000000..91d71af --- /dev/null +++ b/min-dgels/base/SRC/VARIANTS/cholesky/RL/dpotrf.c @@ -0,0 +1,233 @@ +/* dpotrf.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; +static doublereal c_b17 = 1.; +static doublereal c_b20 = -1.; + +/* Subroutine */ int _starpu_dpotrf_(char *uplo, integer *n, doublereal *a, integer * + lda, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + integer j, jb, nb; + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int _starpu_dtrsm_(char *, char *, char *, char *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *); + logical upper; + extern /* Subroutine */ int _starpu_dsyrk_(char *, char *, integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, doublereal *, + integer *), _starpu_dpotf2_(char *, integer *, + doublereal *, integer *, integer *), _starpu_xerbla_(char *, + integer *); + extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *); + + +/* -- LAPACK routine (version 3.1) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* March 2008 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DPOTRF computes the Cholesky factorization of a real symmetric */ +/* positive definite matrix A. */ + +/* The factorization has the form */ +/* A = U**T * U, if UPLO = 'U', or */ +/* A = L * L**T, if UPLO = 'L', */ +/* where U is an upper triangular matrix and L is lower triangular. */ + +/* This is the right looking block version of the algorithm, calling Level 3 BLAS. */ + +/* Arguments */ +/* ========= */ + +/* UPLO (input) CHARACTER*1 */ +/* = 'U': Upper triangle of A is stored; */ +/* = 'L': Lower triangle of A is stored. */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the symmetric matrix A. If UPLO = 'U', the leading */ +/* N-by-N upper triangular part of A contains the upper */ +/* triangular part of the matrix A, and the strictly lower */ +/* triangular part of A is not referenced. If UPLO = 'L', the */ +/* leading N-by-N lower triangular part of A contains the lower */ +/* triangular part of the matrix A, and the strictly upper */ +/* triangular part of A is not referenced. */ + +/* On exit, if INFO = 0, the factor U or L from the Cholesky */ +/* factorization A = U**T*U or A = L*L**T. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,N). */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > 0: if INFO = i, the leading minor of order i is not */ +/* positive definite, and the factorization could not be */ +/* completed. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + *info = 0; + upper = _starpu_lsame_(uplo, "U"); + if (! upper && ! _starpu_lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DPOTRF", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Determine the block size for this environment. */ + + nb = _starpu_ilaenv_(&c__1, "DPOTRF", uplo, n, &c_n1, &c_n1, &c_n1); + if (nb <= 1 || nb >= *n) { + +/* Use unblocked code. */ + + _starpu_dpotf2_(uplo, n, &a[a_offset], lda, info); + } else { + +/* Use blocked code. */ + + if (upper) { + +/* Compute the Cholesky factorization A = U'*U. */ + + i__1 = *n; + i__2 = nb; + for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { + +/* Update and factorize the current diagonal block and test */ +/* for non-positive-definiteness. */ + +/* Computing MIN */ + i__3 = nb, i__4 = *n - j + 1; + jb = min(i__3,i__4); + _starpu_dpotf2_("Upper", &jb, &a[j + j * a_dim1], lda, info); + if (*info != 0) { + goto L30; + } + if (j + jb <= *n) { + +/* Updating the trailing submatrix. */ + + i__3 = *n - j - jb + 1; + _starpu_dtrsm_("Left", "Upper", "Transpose", "Non-unit", &jb, & + i__3, &c_b17, &a[j + j * a_dim1], lda, &a[j + (j + + jb) * a_dim1], lda); + i__3 = *n - j - jb + 1; + _starpu_dsyrk_("Upper", "Transpose", &i__3, &jb, &c_b20, &a[j + ( + j + jb) * a_dim1], lda, &c_b17, &a[j + jb + (j + + jb) * a_dim1], lda); + } +/* L10: */ + } + + } else { + +/* Compute the Cholesky factorization A = L*L'. */ + + i__2 = *n; + i__1 = nb; + for (j = 1; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) { + +/* Update and factorize the current diagonal block and test */ +/* for non-positive-definiteness. */ + +/* Computing MIN */ + i__3 = nb, i__4 = *n - j + 1; + jb = min(i__3,i__4); + _starpu_dpotf2_("Lower", &jb, &a[j + j * a_dim1], lda, info); + if (*info != 0) { + goto L30; + } + if (j + jb <= *n) { + +/* Updating the trailing submatrix. */ + + i__3 = *n - j - jb + 1; + _starpu_dtrsm_("Right", "Lower", "Transpose", "Non-unit", &i__3, & + jb, &c_b17, &a[j + j * a_dim1], lda, &a[j + jb + + j * a_dim1], lda); + i__3 = *n - j - jb + 1; + _starpu_dsyrk_("Lower", "No Transpose", &i__3, &jb, &c_b20, &a[j + + jb + j * a_dim1], lda, &c_b17, &a[j + jb + (j + + jb) * a_dim1], lda); + } +/* L20: */ + } + } + } + goto L40; + +L30: + *info = *info + j - 1; + +L40: + return 0; + +/* End of DPOTRF */ + +} /* _starpu_dpotrf_ */ diff --git a/min-dgels/base/SRC/VARIANTS/cholesky/TOP/dpotrf.c b/min-dgels/base/SRC/VARIANTS/cholesky/TOP/dpotrf.c new file mode 100644 index 0000000..26d89b8 --- /dev/null +++ b/min-dgels/base/SRC/VARIANTS/cholesky/TOP/dpotrf.c @@ -0,0 +1,225 @@ +/* dpotrf.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; +static doublereal c_b15 = 1.; +static doublereal c_b18 = -1.; + +/* Subroutine */ int _starpu_dpotrf_(char *uplo, integer *n, doublereal *a, integer * + lda, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + integer j, jb, nb; + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int _starpu_dtrsm_(char *, char *, char *, char *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *); + logical upper; + extern /* Subroutine */ int _starpu_dsyrk_(char *, char *, integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, doublereal *, + integer *), _starpu_dpotf2_(char *, integer *, + doublereal *, integer *, integer *), _starpu_xerbla_(char *, + integer *); + extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *); + + +/* -- LAPACK routine (version 3.1) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* March 2008 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DPOTRF computes the Cholesky factorization of a real symmetric */ +/* positive definite matrix A. */ + +/* The factorization has the form */ +/* A = U**T * U, if UPLO = 'U', or */ +/* A = L * L**T, if UPLO = 'L', */ +/* where U is an upper triangular matrix and L is lower triangular. */ + +/* This is the top-looking block version of the algorithm, calling Level 3 BLAS. */ + +/* Arguments */ +/* ========= */ + +/* UPLO (input) CHARACTER*1 */ +/* = 'U': Upper triangle of A is stored; */ +/* = 'L': Lower triangle of A is stored. */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the symmetric matrix A. If UPLO = 'U', the leading */ +/* N-by-N upper triangular part of A contains the upper */ +/* triangular part of the matrix A, and the strictly lower */ +/* triangular part of A is not referenced. If UPLO = 'L', the */ +/* leading N-by-N lower triangular part of A contains the lower */ +/* triangular part of the matrix A, and the strictly upper */ +/* triangular part of A is not referenced. */ + +/* On exit, if INFO = 0, the factor U or L from the Cholesky */ +/* factorization A = U**T*U or A = L*L**T. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,N). */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > 0: if INFO = i, the leading minor of order i is not */ +/* positive definite, and the factorization could not be */ +/* completed. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + *info = 0; + upper = _starpu_lsame_(uplo, "U"); + if (! upper && ! _starpu_lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DPOTRF", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Determine the block size for this environment. */ + + nb = _starpu_ilaenv_(&c__1, "DPOTRF", uplo, n, &c_n1, &c_n1, &c_n1); + if (nb <= 1 || nb >= *n) { + +/* Use unblocked code. */ + + _starpu_dpotf2_(uplo, n, &a[a_offset], lda, info); + } else { + +/* Use blocked code. */ + + if (upper) { + +/* Compute the Cholesky factorization A = U'*U. */ + + i__1 = *n; + i__2 = nb; + for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { +/* Computing MIN */ + i__3 = nb, i__4 = *n - j + 1; + jb = min(i__3,i__4); + +/* Compute the current block. */ + + i__3 = j - 1; + _starpu_dtrsm_("Left", "Upper", "Transpose", "Non-unit", &i__3, &jb, & + c_b15, &a[a_dim1 + 1], lda, &a[j * a_dim1 + 1], lda); + i__3 = j - 1; + _starpu_dsyrk_("Upper", "Transpose", &jb, &i__3, &c_b18, &a[j * + a_dim1 + 1], lda, &c_b15, &a[j + j * a_dim1], lda); + +/* Update and factorize the current diagonal block and test */ +/* for non-positive-definiteness. */ + + _starpu_dpotf2_("Upper", &jb, &a[j + j * a_dim1], lda, info); + if (*info != 0) { + goto L30; + } +/* L10: */ + } + + } else { + +/* Compute the Cholesky factorization A = L*L'. */ + + i__2 = *n; + i__1 = nb; + for (j = 1; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) { +/* Computing MIN */ + i__3 = nb, i__4 = *n - j + 1; + jb = min(i__3,i__4); + +/* Compute the current block. */ + + i__3 = j - 1; + _starpu_dtrsm_("Right", "Lower", "Transpose", "Non-unit", &jb, &i__3, + &c_b15, &a[a_dim1 + 1], lda, &a[j + a_dim1], lda); + i__3 = j - 1; + _starpu_dsyrk_("Lower", "No Transpose", &jb, &i__3, &c_b18, &a[j + + a_dim1], lda, &c_b15, &a[j + j * a_dim1], lda); + +/* Update and factorize the current diagonal block and test */ +/* for non-positive-definiteness. */ + + _starpu_dpotf2_("Lower", &jb, &a[j + j * a_dim1], lda, info); + if (*info != 0) { + goto L30; + } +/* L20: */ + } + } + } + goto L40; + +L30: + *info = *info + j - 1; + +L40: + return 0; + +/* End of DPOTRF */ + +} /* _starpu_dpotrf_ */ diff --git a/min-dgels/base/SRC/VARIANTS/lu/CR/dgetrf.c b/min-dgels/base/SRC/VARIANTS/lu/CR/dgetrf.c new file mode 100644 index 0000000..89c489c --- /dev/null +++ b/min-dgels/base/SRC/VARIANTS/lu/CR/dgetrf.c @@ -0,0 +1,222 @@ +/* dgetrf.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; +static doublereal c_b11 = -1.; +static doublereal c_b12 = 1.; + +/* Subroutine */ int _starpu_dgetrf_(integer *m, integer *n, doublereal *a, integer * + lda, integer *ipiv, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + + /* Local variables */ + integer i__, j, jb, nb; + extern /* Subroutine */ int _starpu_dgemm_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *); + integer iinfo; + extern /* Subroutine */ int _starpu_dtrsm_(char *, char *, char *, char *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *), _starpu_dgetf2_( + integer *, integer *, doublereal *, integer *, integer *, integer + *), _starpu_xerbla_(char *, integer *); + extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *); + extern /* Subroutine */ int _starpu_dlaswp_(integer *, doublereal *, integer *, + integer *, integer *, integer *, integer *); + + +/* -- LAPACK routine (version 3.1) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* March 2008 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DGETRF computes an LU factorization of a general M-by-N matrix A */ +/* using partial pivoting with row interchanges. */ + +/* The factorization has the form */ +/* A = P * L * U */ +/* where P is a permutation matrix, L is lower triangular with unit */ +/* diagonal elements (lower trapezoidal if m > n), and U is upper */ +/* triangular (upper trapezoidal if m < n). */ + +/* This is the Crout Level 3 BLAS version of the algorithm. */ + +/* Arguments */ +/* ========= */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix A. M >= 0. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix A. N >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the M-by-N matrix to be factored. */ +/* On exit, the factors L and U from the factorization */ +/* A = P*L*U; the unit diagonal elements of L are not stored. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,M). */ + +/* IPIV (output) INTEGER array, dimension (min(M,N)) */ +/* The pivot indices; for 1 <= i <= min(M,N), row i of the */ +/* matrix was interchanged with row IPIV(i). */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > 0: if INFO = i, U(i,i) is exactly zero. The factorization */ +/* has been completed, but the factor U is exactly */ +/* singular, and division by zero will occur if it is used */ +/* to solve a system of equations. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DGETRF", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + return 0; + } + +/* Determine the block size for this environment. */ + + nb = _starpu_ilaenv_(&c__1, "DGETRF", " ", m, n, &c_n1, &c_n1); + if (nb <= 1 || nb >= min(*m,*n)) { + +/* Use unblocked code. */ + + _starpu_dgetf2_(m, n, &a[a_offset], lda, &ipiv[1], info); + } else { + +/* Use blocked code. */ + + i__1 = min(*m,*n); + i__2 = nb; + for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { +/* Computing MIN */ + i__3 = min(*m,*n) - j + 1; + jb = min(i__3,nb); + +/* Update current block. */ + + i__3 = *m - j + 1; + i__4 = j - 1; + _starpu_dgemm_("No transpose", "No transpose", &i__3, &jb, &i__4, &c_b11, + &a[j + a_dim1], lda, &a[j * a_dim1 + 1], lda, &c_b12, &a[ + j + j * a_dim1], lda); + +/* Factor diagonal and subdiagonal blocks and test for exact */ +/* singularity. */ + + i__3 = *m - j + 1; + _starpu_dgetf2_(&i__3, &jb, &a[j + j * a_dim1], lda, &ipiv[j], &iinfo); + +/* Adjust INFO and the pivot indices. */ + + if (*info == 0 && iinfo > 0) { + *info = iinfo + j - 1; + } +/* Computing MIN */ + i__4 = *m, i__5 = j + jb - 1; + i__3 = min(i__4,i__5); + for (i__ = j; i__ <= i__3; ++i__) { + ipiv[i__] = j - 1 + ipiv[i__]; +/* L10: */ + } + +/* Apply interchanges to column 1:J-1 */ + + i__3 = j - 1; + i__4 = j + jb - 1; + _starpu_dlaswp_(&i__3, &a[a_offset], lda, &j, &i__4, &ipiv[1], &c__1); + + if (j + jb <= *n) { + +/* Apply interchanges to column J+JB:N */ + + i__3 = *n - j - jb + 1; + i__4 = j + jb - 1; + _starpu_dlaswp_(&i__3, &a[(j + jb) * a_dim1 + 1], lda, &j, &i__4, & + ipiv[1], &c__1); + + i__3 = *n - j - jb + 1; + i__4 = j - 1; + _starpu_dgemm_("No transpose", "No transpose", &jb, &i__3, &i__4, & + c_b11, &a[j + a_dim1], lda, &a[(j + jb) * a_dim1 + 1], + lda, &c_b12, &a[j + (j + jb) * a_dim1], lda); + +/* Compute block row of U. */ + + i__3 = *n - j - jb + 1; + _starpu_dtrsm_("Left", "Lower", "No transpose", "Unit", &jb, &i__3, & + c_b12, &a[j + j * a_dim1], lda, &a[j + (j + jb) * + a_dim1], lda); + } +/* L20: */ + } + } + return 0; + +/* End of DGETRF */ + +} /* _starpu_dgetrf_ */ diff --git a/min-dgels/base/SRC/VARIANTS/lu/LL/dgetrf.c b/min-dgels/base/SRC/VARIANTS/lu/LL/dgetrf.c new file mode 100644 index 0000000..59e01ef --- /dev/null +++ b/min-dgels/base/SRC/VARIANTS/lu/LL/dgetrf.c @@ -0,0 +1,257 @@ +/* dgetrf.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; +static doublereal c_b15 = 1.; +static doublereal c_b18 = -1.; + +/* Subroutine */ int _starpu_dgetrf_(integer *m, integer *n, doublereal *a, integer * + lda, integer *ipiv, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6; + + /* Local variables */ + integer i__, j, k, jb, nb; + extern /* Subroutine */ int _starpu_dgemm_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *); + integer iinfo; + extern /* Subroutine */ int _starpu_dtrsm_(char *, char *, char *, char *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *), _starpu_dgetf2_( + integer *, integer *, doublereal *, integer *, integer *, integer + *), _starpu_xerbla_(char *, integer *); + extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *); + extern /* Subroutine */ int _starpu_dlaswp_(integer *, doublereal *, integer *, + integer *, integer *, integer *, integer *); + + +/* -- LAPACK routine (version 3.1) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* March 2008 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DGETRF computes an LU factorization of a general M-by-N matrix A */ +/* using partial pivoting with row interchanges. */ + +/* The factorization has the form */ +/* A = P * L * U */ +/* where P is a permutation matrix, L is lower triangular with unit */ +/* diagonal elements (lower trapezoidal if m > n), and U is upper */ +/* triangular (upper trapezoidal if m < n). */ + +/* This is the left-looking Level 3 BLAS version of the algorithm. */ + +/* Arguments */ +/* ========= */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix A. M >= 0. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix A. N >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the M-by-N matrix to be factored. */ +/* On exit, the factors L and U from the factorization */ +/* A = P*L*U; the unit diagonal elements of L are not stored. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,M). */ + +/* IPIV (output) INTEGER array, dimension (min(M,N)) */ +/* The pivot indices; for 1 <= i <= min(M,N), row i of the */ +/* matrix was interchanged with row IPIV(i). */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > 0: if INFO = i, U(i,i) is exactly zero. The factorization */ +/* has been completed, but the factor U is exactly */ +/* singular, and division by zero will occur if it is used */ +/* to solve a system of equations. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DGETRF", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + return 0; + } + +/* Determine the block size for this environment. */ + + nb = _starpu_ilaenv_(&c__1, "DGETRF", " ", m, n, &c_n1, &c_n1); + if (nb <= 1 || nb >= min(*m,*n)) { + +/* Use unblocked code. */ + + _starpu_dgetf2_(m, n, &a[a_offset], lda, &ipiv[1], info); + } else { + +/* Use blocked code. */ + + i__1 = min(*m,*n); + i__2 = nb; + for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { +/* Computing MIN */ + i__3 = min(*m,*n) - j + 1; + jb = min(i__3,nb); + +/* Update before factoring the current panel */ + + i__3 = j - nb; + i__4 = nb; + for (k = 1; i__4 < 0 ? k >= i__3 : k <= i__3; k += i__4) { + +/* Apply interchanges to rows K:K+NB-1. */ + + i__5 = k + nb - 1; + _starpu_dlaswp_(&jb, &a[j * a_dim1 + 1], lda, &k, &i__5, &ipiv[1], & + c__1); + +/* Compute block row of U. */ + + _starpu_dtrsm_("Left", "Lower", "No transpose", "Unit", &nb, &jb, & + c_b15, &a[k + k * a_dim1], lda, &a[k + j * a_dim1], + lda); + +/* Update trailing submatrix. */ + + i__5 = *m - k - nb + 1; + _starpu_dgemm_("No transpose", "No transpose", &i__5, &jb, &nb, & + c_b18, &a[k + nb + k * a_dim1], lda, &a[k + j * + a_dim1], lda, &c_b15, &a[k + nb + j * a_dim1], lda); +/* L30: */ + } + +/* Factor diagonal and subdiagonal blocks and test for exact */ +/* singularity. */ + + i__4 = *m - j + 1; + _starpu_dgetf2_(&i__4, &jb, &a[j + j * a_dim1], lda, &ipiv[j], &iinfo); + +/* Adjust INFO and the pivot indices. */ + + if (*info == 0 && iinfo > 0) { + *info = iinfo + j - 1; + } +/* Computing MIN */ + i__3 = *m, i__5 = j + jb - 1; + i__4 = min(i__3,i__5); + for (i__ = j; i__ <= i__4; ++i__) { + ipiv[i__] = j - 1 + ipiv[i__]; +/* L10: */ + } + +/* L20: */ + } + +/* Apply interchanges to the left-overs */ + + i__2 = min(*m,*n); + i__1 = nb; + for (k = 1; i__1 < 0 ? k >= i__2 : k <= i__2; k += i__1) { + i__4 = k - 1; +/* Computing MIN */ + i__5 = k + nb - 1, i__6 = min(*m,*n); + i__3 = min(i__5,i__6); + _starpu_dlaswp_(&i__4, &a[a_dim1 + 1], lda, &k, &i__3, &ipiv[1], &c__1); +/* L40: */ + } + +/* Apply update to the M+1:N columns when N > M */ + + if (*n > *m) { + i__1 = *n - *m; + _starpu_dlaswp_(&i__1, &a[(*m + 1) * a_dim1 + 1], lda, &c__1, m, &ipiv[1], + &c__1); + i__1 = *m; + i__2 = nb; + for (k = 1; i__2 < 0 ? k >= i__1 : k <= i__1; k += i__2) { +/* Computing MIN */ + i__4 = *m - k + 1; + jb = min(i__4,nb); + + i__4 = *n - *m; + _starpu_dtrsm_("Left", "Lower", "No transpose", "Unit", &jb, &i__4, & + c_b15, &a[k + k * a_dim1], lda, &a[k + (*m + 1) * + a_dim1], lda); + + if (k + nb <= *m) { + i__4 = *m - k - nb + 1; + i__3 = *n - *m; + _starpu_dgemm_("No transpose", "No transpose", &i__4, &i__3, &nb, + &c_b18, &a[k + nb + k * a_dim1], lda, &a[k + (*m + + 1) * a_dim1], lda, &c_b15, &a[k + nb + (*m + 1) + * a_dim1], lda); + } +/* L50: */ + } + } + + } + return 0; + +/* End of DGETRF */ + +} /* _starpu_dgetrf_ */ diff --git a/min-dgels/base/SRC/VARIANTS/lu/REC/dgetrf.c b/min-dgels/base/SRC/VARIANTS/lu/REC/dgetrf.c new file mode 100644 index 0000000..fbd7016 --- /dev/null +++ b/min-dgels/base/SRC/VARIANTS/lu/REC/dgetrf.c @@ -0,0 +1,268 @@ +/* dgetrf.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static doublereal c_b12 = 1.; +static doublereal c_b15 = -1.; + +/* Subroutine */ int _starpu_dgetrf_(integer *m, integer *n, doublereal *a, integer * + lda, integer *ipiv, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + doublereal d__1; + + /* Local variables */ + integer i__, j, ipivstart, jpivstart, jp; + doublereal tmp; + extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, + integer *), _starpu_dgemm_(char *, char *, integer *, integer *, integer * +, doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *); + integer kcols; + doublereal sfmin; + integer nstep; + extern /* Subroutine */ int _starpu_dtrsm_(char *, char *, char *, char *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *); + integer kahead; + extern doublereal _starpu_dlamch_(char *); + extern integer _starpu_idamax_(integer *, doublereal *, integer *); + extern logical _starpu_disnan_(doublereal *); + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + integer npived; + extern /* Subroutine */ int _starpu_dlaswp_(integer *, doublereal *, integer *, + integer *, integer *, integer *, integer *); + integer kstart, ntopiv; + + +/* -- LAPACK routine (version 3.X) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* May 2008 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DGETRF computes an LU factorization of a general M-by-N matrix A */ +/* using partial pivoting with row interchanges. */ + +/* The factorization has the form */ +/* A = P * L * U */ +/* where P is a permutation matrix, L is lower triangular with unit */ +/* diagonal elements (lower trapezoidal if m > n), and U is upper */ +/* triangular (upper trapezoidal if m < n). */ + +/* This code implements an iterative version of Sivan Toledo's recursive */ +/* LU algorithm[1]. For square matrices, this iterative versions should */ +/* be within a factor of two of the optimum number of memory transfers. */ + +/* The pattern is as follows, with the large blocks of U being updated */ +/* in one call to DTRSM, and the dotted lines denoting sections that */ +/* have had all pending permutations applied: */ + +/* 1 2 3 4 5 6 7 8 */ +/* +-+-+---+-------+------ */ +/* | |1| | | */ +/* |.+-+ 2 | | */ +/* | | | | | */ +/* |.|.+-+-+ 4 | */ +/* | | | |1| | */ +/* | | |.+-+ | */ +/* | | | | | | */ +/* |.|.|.|.+-+-+---+ 8 */ +/* | | | | | |1| | */ +/* | | | | |.+-+ 2 | */ +/* | | | | | | | | */ +/* | | | | |.|.+-+-+ */ +/* | | | | | | | |1| */ +/* | | | | | | |.+-+ */ +/* | | | | | | | | | */ +/* |.|.|.|.|.|.|.|.+----- */ +/* | | | | | | | | | */ + +/* The 1-2-1-4-1-2-1-8-... pattern is the position of the last 1 bit in */ +/* the binary expansion of the current column. Each Schur update is */ +/* applied as soon as the necessary portion of U is available. */ + +/* [1] Toledo, S. 1997. Locality of Reference in LU Decomposition with */ +/* Partial Pivoting. SIAM J. Matrix Anal. Appl. 18, 4 (Oct. 1997), */ +/* 1065-1081. http://dx.doi.org/10.1137/S0895479896297744 */ + +/* Arguments */ +/* ========= */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix A. M >= 0. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix A. N >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the M-by-N matrix to be factored. */ +/* On exit, the factors L and U from the factorization */ +/* A = P*L*U; the unit diagonal elements of L are not stored. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,M). */ + +/* IPIV (output) INTEGER array, dimension (min(M,N)) */ +/* The pivot indices; for 1 <= i <= min(M,N), row i of the */ +/* matrix was interchanged with row IPIV(i). */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > 0: if INFO = i, U(i,i) is exactly zero. The factorization */ +/* has been completed, but the factor U is exactly */ +/* singular, and division by zero will occur if it is used */ +/* to solve a system of equations. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DGETRF", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + return 0; + } + +/* Compute machine safe minimum */ + + sfmin = _starpu_dlamch_("S"); + + nstep = min(*m,*n); + i__1 = nstep; + for (j = 1; j <= i__1; ++j) { + kahead = j & -j; + kstart = j + 1 - kahead; +/* Computing MIN */ + i__2 = kahead, i__3 = *m - j; + kcols = min(i__2,i__3); + +/* Find pivot. */ + + i__2 = *m - j + 1; + jp = j - 1 + _starpu_idamax_(&i__2, &a[j + j * a_dim1], &c__1); + ipiv[j] = jp; +/* Permute just this column. */ + if (jp != j) { + tmp = a[j + j * a_dim1]; + a[j + j * a_dim1] = a[jp + j * a_dim1]; + a[jp + j * a_dim1] = tmp; + } +/* Apply pending permutations to L */ + ntopiv = 1; + ipivstart = j; + jpivstart = j - ntopiv; + while(ntopiv < kahead) { + _starpu_dlaswp_(&ntopiv, &a[jpivstart * a_dim1 + 1], lda, &ipivstart, &j, + &ipiv[1], &c__1); + ipivstart -= ntopiv; + ntopiv <<= 1; + jpivstart -= ntopiv; + } +/* Permute U block to match L */ + _starpu_dlaswp_(&kcols, &a[(j + 1) * a_dim1 + 1], lda, &kstart, &j, &ipiv[1], + &c__1); +/* Factor the current column */ + if (a[j + j * a_dim1] != 0. && ! _starpu_disnan_(&a[j + j * a_dim1])) { + if ((d__1 = a[j + j * a_dim1], abs(d__1)) >= sfmin) { + i__2 = *m - j; + d__1 = 1. / a[j + j * a_dim1]; + _starpu_dscal_(&i__2, &d__1, &a[j + 1 + j * a_dim1], &c__1); + } else { + i__2 = *m - j; + for (i__ = 1; i__ <= i__2; ++i__) { + a[j + i__ + j * a_dim1] /= a[j + j * a_dim1]; + } + } + } else if (a[j + j * a_dim1] == 0. && *info == 0) { + *info = j; + } +/* Solve for U block. */ + _starpu_dtrsm_("Left", "Lower", "No transpose", "Unit", &kahead, &kcols, & + c_b12, &a[kstart + kstart * a_dim1], lda, &a[kstart + (j + 1) + * a_dim1], lda); +/* Schur complement. */ + i__2 = *m - j; + _starpu_dgemm_("No transpose", "No transpose", &i__2, &kcols, &kahead, &c_b15, + &a[j + 1 + kstart * a_dim1], lda, &a[kstart + (j + 1) * + a_dim1], lda, &c_b12, &a[j + 1 + (j + 1) * a_dim1], lda); + } +/* Handle pivot permutations on the way out of the recursion */ + npived = nstep & -nstep; + j = nstep - npived; + while(j > 0) { + ntopiv = j & -j; + i__1 = j + 1; + _starpu_dlaswp_(&ntopiv, &a[(j - ntopiv + 1) * a_dim1 + 1], lda, &i__1, & + nstep, &ipiv[1], &c__1); + j -= ntopiv; + } +/* If short and wide, handle the rest of the columns. */ + if (*m < *n) { + i__1 = *n - *m; + _starpu_dlaswp_(&i__1, &a[(*m + kcols + 1) * a_dim1 + 1], lda, &c__1, m, & + ipiv[1], &c__1); + i__1 = *n - *m; + _starpu_dtrsm_("Left", "Lower", "No transpose", "Unit", m, &i__1, &c_b12, &a[ + a_offset], lda, &a[(*m + kcols + 1) * a_dim1 + 1], lda); + } + return 0; + +/* End of DGETRF */ + +} /* _starpu_dgetrf_ */ diff --git a/min-dgels/base/SRC/VARIANTS/qr/LL/dgeqrf.c b/min-dgels/base/SRC/VARIANTS/qr/LL/dgeqrf.c new file mode 100644 index 0000000..9856475 --- /dev/null +++ b/min-dgels/base/SRC/VARIANTS/qr/LL/dgeqrf.c @@ -0,0 +1,403 @@ +/* dgeqrf.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; +static integer c__3 = 3; +static integer c__2 = 2; + +/* Subroutine */ int _starpu_dgeqrf_(integer *m, integer *n, doublereal *a, integer * + lda, doublereal *tau, doublereal *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6; + real r__1; + + /* Local variables */ + integer i__, j, k, ib, nb, nt, nx, iws; + extern doublereal _starpu_sceil_(real *); + integer nbmin, iinfo; + extern /* Subroutine */ int _starpu_dgeqr2_(integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *), _starpu_dlarfb_(char *, + char *, char *, char *, integer *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *, doublereal *, integer *), _starpu_dlarft_(char *, char *, integer *, integer *, doublereal + *, integer *, doublereal *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); + extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *); + integer lbwork, llwork, lwkopt; + logical lquery; + + +/* -- LAPACK routine (version 3.1) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* March 2008 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DGEQRF computes a QR factorization of a real M-by-N matrix A: */ +/* A = Q * R. */ + +/* This is the left-looking Level 3 BLAS version of the algorithm. */ + +/* Arguments */ +/* ========= */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix A. M >= 0. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix A. N >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the M-by-N matrix A. */ +/* On exit, the elements on and above the diagonal of the array */ +/* contain the min(M,N)-by-N upper trapezoidal matrix R (R is */ +/* upper triangular if m >= n); the elements below the diagonal, */ +/* with the array TAU, represent the orthogonal matrix Q as a */ +/* product of min(m,n) elementary reflectors (see Further */ +/* Details). */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,M). */ + +/* TAU (output) DOUBLE PRECISION array, dimension (min(M,N)) */ +/* The scalar factors of the elementary reflectors (see Further */ +/* Details). */ + +/* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ + +/* LWORK (input) INTEGER */ + +/* The dimension of the array WORK. The dimension can be divided into three parts. */ + +/* 1) The part for the triangular factor T. If the very last T is not bigger */ +/* than any of the rest, then this part is NB x ceiling(K/NB), otherwise, */ +/* NB x (K-NT), where K = min(M,N) and NT is the dimension of the very last T */ + +/* 2) The part for the very last T when T is bigger than any of the rest T. */ +/* The size of this part is NT x NT, where NT = K - ceiling ((K-NX)/NB) x NB, */ +/* where K = min(M,N), NX is calculated by */ +/* NX = MAX( 0, ILAENV( 3, 'DGEQRF', ' ', M, N, -1, -1 ) ) */ + +/* 3) The part for dlarfb is of size max((N-M)*K, (N-M)*NB, K*NB, NB*NB) */ + +/* So LWORK = part1 + part2 + part3 */ + +/* If LWORK = -1, then a workspace query is assumed; the routine */ +/* only calculates the optimal size of the WORK array, returns */ +/* this value as the first entry of the WORK array, and no error */ +/* message related to LWORK is issued by XERBLA. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ + +/* Further Details */ +/* =============== */ + +/* The matrix Q is represented as a product of elementary reflectors */ + +/* Q = H(1) H(2) . . . H(k), where k = min(m,n). */ + +/* Each H(i) has the form */ + +/* H(i) = I - tau * v * v' */ + +/* where tau is a real scalar, and v is a real vector with */ +/* v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), */ +/* and tau in TAU(i). */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + nbmin = 2; + nx = 0; + iws = *n; + k = min(*m,*n); + nb = _starpu_ilaenv_(&c__1, "DGEQRF", " ", m, n, &c_n1, &c_n1); + if (nb > 1 && nb < k) { + +/* Determine when to cross over from blocked to unblocked code. */ + +/* Computing MAX */ + i__1 = 0, i__2 = _starpu_ilaenv_(&c__3, "DGEQRF", " ", m, n, &c_n1, &c_n1); + nx = max(i__1,i__2); + } + +/* Get NT, the size of the very last T, which is the left-over from in-between K-NX and K to K, eg.: */ + +/* NB=3 2NB=6 K=10 */ +/* | | | */ +/* 1--2--3--4--5--6--7--8--9--10 */ +/* | \________/ */ +/* K-NX=5 NT=4 */ + +/* So here 4 x 4 is the last T stored in the workspace */ + + r__1 = (real) (k - nx) / (real) nb; + nt = k - _starpu_sceil_(&r__1) * nb; + +/* optimal workspace = space for dlarfb + space for normal T's + space for the last T */ + +/* Computing MAX */ +/* Computing MAX */ + i__3 = (*n - *m) * k, i__4 = (*n - *m) * nb; +/* Computing MAX */ + i__5 = k * nb, i__6 = nb * nb; + i__1 = max(i__3,i__4), i__2 = max(i__5,i__6); + llwork = max(i__1,i__2); + r__1 = (real) llwork / (real) nb; + llwork = _starpu_sceil_(&r__1); + if (nt > nb) { + lbwork = k - nt; + +/* Optimal workspace for dlarfb = MAX(1,N)*NT */ + + lwkopt = (lbwork + llwork) * nb; + work[1] = (doublereal) (lwkopt + nt * nt); + } else { + r__1 = (real) k / (real) nb; + lbwork = _starpu_sceil_(&r__1) * nb; + lwkopt = (lbwork + llwork - nb) * nb; + work[1] = (doublereal) lwkopt; + } + +/* Test the input arguments */ + + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } else if (*lwork < max(1,*n) && ! lquery) { + *info = -7; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DGEQRF", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (k == 0) { + work[1] = 1.; + return 0; + } + + if (nb > 1 && nb < k) { + if (nx < k) { + +/* Determine if workspace is large enough for blocked code. */ + + if (nt <= nb) { + iws = (lbwork + llwork - nb) * nb; + } else { + iws = (lbwork + llwork) * nb + nt * nt; + } + if (*lwork < iws) { + +/* Not enough workspace to use optimal NB: reduce NB and */ +/* determine the minimum value of NB. */ + + if (nt <= nb) { + nb = *lwork / (llwork + (lbwork - nb)); + } else { + nb = (*lwork - nt * nt) / (lbwork + llwork); + } +/* Computing MAX */ + i__1 = 2, i__2 = _starpu_ilaenv_(&c__2, "DGEQRF", " ", m, n, &c_n1, & + c_n1); + nbmin = max(i__1,i__2); + } + } + } + + if (nb >= nbmin && nb < k && nx < k) { + +/* Use blocked code initially */ + + i__1 = k - nx; + i__2 = nb; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__3 = k - i__ + 1; + ib = min(i__3,nb); + +/* Update the current column using old T's */ + + i__3 = i__ - nb; + i__4 = nb; + for (j = 1; i__4 < 0 ? j >= i__3 : j <= i__3; j += i__4) { + +/* Apply H' to A(J:M,I:I+IB-1) from the left */ + + i__5 = *m - j + 1; + _starpu_dlarfb_("Left", "Transpose", "Forward", "Columnwise", &i__5, & + ib, &nb, &a[j + j * a_dim1], lda, &work[j], &lbwork, & + a[j + i__ * a_dim1], lda, &work[lbwork * nb + nt * nt + + 1], &ib); +/* L20: */ + } + +/* Compute the QR factorization of the current block */ +/* A(I:M,I:I+IB-1) */ + + i__4 = *m - i__ + 1; + _starpu_dgeqr2_(&i__4, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[ + lbwork * nb + nt * nt + 1], &iinfo); + if (i__ + ib <= *n) { + +/* Form the triangular factor of the block reflector */ +/* H = H(i) H(i+1) . . . H(i+ib-1) */ + + i__4 = *m - i__ + 1; + _starpu_dlarft_("Forward", "Columnwise", &i__4, &ib, &a[i__ + i__ * + a_dim1], lda, &tau[i__], &work[i__], &lbwork); + + } +/* L10: */ + } + } else { + i__ = 1; + } + +/* Use unblocked code to factor the last or only block. */ + + if (i__ <= k) { + if (i__ != 1) { + i__2 = i__ - nb; + i__1 = nb; + for (j = 1; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) { + +/* Apply H' to A(J:M,I:K) from the left */ + + i__4 = *m - j + 1; + i__3 = k - i__ + 1; + i__5 = k - i__ + 1; + _starpu_dlarfb_("Left", "Transpose", "Forward", "Columnwise", &i__4, & + i__3, &nb, &a[j + j * a_dim1], lda, &work[j], &lbwork, + &a[j + i__ * a_dim1], lda, &work[lbwork * nb + nt * + nt + 1], &i__5); +/* L30: */ + } + i__1 = *m - i__ + 1; + i__2 = k - i__ + 1; + _starpu_dgeqr2_(&i__1, &i__2, &a[i__ + i__ * a_dim1], lda, &tau[i__], & + work[lbwork * nb + nt * nt + 1], &iinfo); + } else { + +/* Use unblocked code to factor the last or only block. */ + + i__1 = *m - i__ + 1; + i__2 = *n - i__ + 1; + _starpu_dgeqr2_(&i__1, &i__2, &a[i__ + i__ * a_dim1], lda, &tau[i__], & + work[1], &iinfo); + } + } + +/* Apply update to the column M+1:N when N > M */ + + if (*m < *n && i__ != 1) { + +/* Form the last triangular factor of the block reflector */ +/* H = H(i) H(i+1) . . . H(i+ib-1) */ + + if (nt <= nb) { + i__1 = *m - i__ + 1; + i__2 = k - i__ + 1; + _starpu_dlarft_("Forward", "Columnwise", &i__1, &i__2, &a[i__ + i__ * + a_dim1], lda, &tau[i__], &work[i__], &lbwork); + } else { + i__1 = *m - i__ + 1; + i__2 = k - i__ + 1; + _starpu_dlarft_("Forward", "Columnwise", &i__1, &i__2, &a[i__ + i__ * + a_dim1], lda, &tau[i__], &work[lbwork * nb + 1], &nt); + } + +/* Apply H' to A(1:M,M+1:N) from the left */ + + i__1 = k - nx; + i__2 = nb; + for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { +/* Computing MIN */ + i__4 = k - j + 1; + ib = min(i__4,nb); + i__4 = *m - j + 1; + i__3 = *n - *m; + i__5 = *n - *m; + _starpu_dlarfb_("Left", "Transpose", "Forward", "Columnwise", &i__4, & + i__3, &ib, &a[j + j * a_dim1], lda, &work[j], &lbwork, &a[ + j + (*m + 1) * a_dim1], lda, &work[lbwork * nb + nt * nt + + 1], &i__5); +/* L40: */ + } + if (nt <= nb) { + i__2 = *m - j + 1; + i__1 = *n - *m; + i__4 = k - j + 1; + i__3 = *n - *m; + _starpu_dlarfb_("Left", "Transpose", "Forward", "Columnwise", &i__2, & + i__1, &i__4, &a[j + j * a_dim1], lda, &work[j], &lbwork, & + a[j + (*m + 1) * a_dim1], lda, &work[lbwork * nb + nt * + nt + 1], &i__3); + } else { + i__2 = *m - j + 1; + i__1 = *n - *m; + i__4 = k - j + 1; + i__3 = *n - *m; + _starpu_dlarfb_("Left", "Transpose", "Forward", "Columnwise", &i__2, & + i__1, &i__4, &a[j + j * a_dim1], lda, &work[lbwork * nb + + 1], &nt, &a[j + (*m + 1) * a_dim1], lda, &work[lbwork * + nb + nt * nt + 1], &i__3); + } + } + work[1] = (doublereal) iws; + return 0; + +/* End of DGEQRF */ + +} /* _starpu_dgeqrf_ */ diff --git a/min-dgels/base/SRC/VARIANTS/qr/LL/sceil.c b/min-dgels/base/SRC/VARIANTS/qr/LL/sceil.c new file mode 100644 index 0000000..4547a0c --- /dev/null +++ b/min-dgels/base/SRC/VARIANTS/qr/LL/sceil.c @@ -0,0 +1,44 @@ +/* sceil.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +doublereal _starpu_sceil_(real *a) +{ + /* System generated locals */ + real ret_val; + + +/* -- LAPACK routine (version 3.1) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* June 2008 */ + +/* .. Scalar Arguments ..* */ +/* .. */ + +/* ===================================================================== */ + +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements ..* */ + + if (*a - (integer) (*a) == 0.f) { + ret_val = *a; + } else if (*a > 0.f) { + ret_val = (real) ((integer) (*a) + 1); + } else { + ret_val = (real) ((integer) (*a)); + } + return ret_val; + +} /* _starpu_sceil_ */ diff --git a/min-dgels/base/SRC/chla_transtype.c b/min-dgels/base/SRC/chla_transtype.c new file mode 100644 index 0000000..63410e8 --- /dev/null +++ b/min-dgels/base/SRC/chla_transtype.c @@ -0,0 +1,62 @@ +/* _starpu_chla_transtype.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Character */ VOID _starpu_chla_transtype__(char *ret_val, ftnlen ret_val_len, + integer *trans) +{ + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* October 2008 */ +/* .. Scalar Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* This subroutine translates from a BLAST-specified integer constant to */ +/* the character string specifying a transposition operation. */ + +/* CHLA_TRANSTYPE returns an CHARACTER*1. If CHLA_TRANSTYPE is 'X', */ +/* then input is not an integer indicating a transposition operator. */ +/* Otherwise CHLA_TRANSTYPE returns the constant value corresponding to */ +/* TRANS. */ + +/* Arguments */ +/* ========= */ +/* TRANS (input) INTEGER */ +/* Specifies the form of the system of equations: */ +/* = BLAS_NO_TRANS = 111 : No Transpose */ +/* = BLAS_TRANS = 112 : Transpose */ +/* = BLAS_CONJ_TRANS = 113 : Conjugate Transpose */ +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Executable Statements .. */ + if (*trans == 111) { + *(unsigned char *)ret_val = 'N'; + } else if (*trans == 112) { + *(unsigned char *)ret_val = 'T'; + } else if (*trans == 113) { + *(unsigned char *)ret_val = 'C'; + } else { + *(unsigned char *)ret_val = 'X'; + } + return ; + +/* End of CHLA_TRANSTYPE */ + +} /* _starpu_chla_transtype__ */ diff --git a/min-dgels/base/SRC/dbdsdc.c b/min-dgels/base/SRC/dbdsdc.c new file mode 100644 index 0000000..6cd878e --- /dev/null +++ b/min-dgels/base/SRC/dbdsdc.c @@ -0,0 +1,514 @@ +/* dbdsdc.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__9 = 9; +static integer c__0 = 0; +static doublereal c_b15 = 1.; +static integer c__1 = 1; +static doublereal c_b29 = 0.; + +/* Subroutine */ int _starpu_dbdsdc_(char *uplo, char *compq, integer *n, doublereal * + d__, doublereal *e, doublereal *u, integer *ldu, doublereal *vt, + integer *ldvt, doublereal *q, integer *iq, doublereal *work, integer * + iwork, integer *info) +{ + /* System generated locals */ + integer u_dim1, u_offset, vt_dim1, vt_offset, i__1, i__2; + doublereal d__1; + + /* Builtin functions */ + double d_sign(doublereal *, doublereal *), log(doublereal); + + /* Local variables */ + integer i__, j, k; + doublereal p, r__; + integer z__, ic, ii, kk; + doublereal cs; + integer is, iu; + doublereal sn; + integer nm1; + doublereal eps; + integer ivt, difl, difr, ierr, perm, mlvl, sqre; + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int _starpu_dlasr_(char *, char *, char *, integer *, + integer *, doublereal *, doublereal *, doublereal *, integer *), _starpu_dcopy_(integer *, doublereal *, integer * +, doublereal *, integer *), _starpu_dswap_(integer *, doublereal *, + integer *, doublereal *, integer *); + integer poles, iuplo, nsize, start; + extern /* Subroutine */ int _starpu_dlasd0_(integer *, integer *, doublereal *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + integer *, integer *, doublereal *, integer *); + extern doublereal _starpu_dlamch_(char *); + extern /* Subroutine */ int _starpu_dlasda_(integer *, integer *, integer *, + integer *, doublereal *, doublereal *, doublereal *, integer *, + doublereal *, integer *, doublereal *, doublereal *, doublereal *, + doublereal *, integer *, integer *, integer *, integer *, + doublereal *, doublereal *, doublereal *, doublereal *, integer *, + integer *), _starpu_dlascl_(char *, integer *, integer *, doublereal *, + doublereal *, integer *, integer *, doublereal *, integer *, + integer *), _starpu_dlasdq_(char *, integer *, integer *, integer + *, integer *, integer *, doublereal *, doublereal *, doublereal *, + integer *, doublereal *, integer *, doublereal *, integer *, + doublereal *, integer *), _starpu_dlaset_(char *, integer *, + integer *, doublereal *, doublereal *, doublereal *, integer *), _starpu_dlartg_(doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *); + extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *); + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + integer givcol; + extern doublereal _starpu_dlanst_(char *, integer *, doublereal *, doublereal *); + integer icompq; + doublereal orgnrm; + integer givnum, givptr, qstart, smlsiz, wstart, smlszp; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DBDSDC computes the singular value decomposition (SVD) of a real */ +/* N-by-N (upper or lower) bidiagonal matrix B: B = U * S * VT, */ +/* using a divide and conquer method, where S is a diagonal matrix */ +/* with non-negative diagonal elements (the singular values of B), and */ +/* U and VT are orthogonal matrices of left and right singular vectors, */ +/* respectively. DBDSDC can be used to compute all singular values, */ +/* and optionally, singular vectors or singular vectors in compact form. */ + +/* This code makes very mild assumptions about floating point */ +/* arithmetic. It will work on machines with a guard digit in */ +/* add/subtract, or on those binary machines without guard digits */ +/* which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. */ +/* It could conceivably fail on hexadecimal or decimal machines */ +/* without guard digits, but we know of none. See DLASD3 for details. */ + +/* The code currently calls DLASDQ if singular values only are desired. */ +/* However, it can be slightly modified to compute singular values */ +/* using the divide and conquer method. */ + +/* Arguments */ +/* ========= */ + +/* UPLO (input) CHARACTER*1 */ +/* = 'U': B is upper bidiagonal. */ +/* = 'L': B is lower bidiagonal. */ + +/* COMPQ (input) CHARACTER*1 */ +/* Specifies whether singular vectors are to be computed */ +/* as follows: */ +/* = 'N': Compute singular values only; */ +/* = 'P': Compute singular values and compute singular */ +/* vectors in compact form; */ +/* = 'I': Compute singular values and singular vectors. */ + +/* N (input) INTEGER */ +/* The order of the matrix B. N >= 0. */ + +/* D (input/output) DOUBLE PRECISION array, dimension (N) */ +/* On entry, the n diagonal elements of the bidiagonal matrix B. */ +/* On exit, if INFO=0, the singular values of B. */ + +/* E (input/output) DOUBLE PRECISION array, dimension (N-1) */ +/* On entry, the elements of E contain the offdiagonal */ +/* elements of the bidiagonal matrix whose SVD is desired. */ +/* On exit, E has been destroyed. */ + +/* U (output) DOUBLE PRECISION array, dimension (LDU,N) */ +/* If COMPQ = 'I', then: */ +/* On exit, if INFO = 0, U contains the left singular vectors */ +/* of the bidiagonal matrix. */ +/* For other values of COMPQ, U is not referenced. */ + +/* LDU (input) INTEGER */ +/* The leading dimension of the array U. LDU >= 1. */ +/* If singular vectors are desired, then LDU >= max( 1, N ). */ + +/* VT (output) DOUBLE PRECISION array, dimension (LDVT,N) */ +/* If COMPQ = 'I', then: */ +/* On exit, if INFO = 0, VT' contains the right singular */ +/* vectors of the bidiagonal matrix. */ +/* For other values of COMPQ, VT is not referenced. */ + +/* LDVT (input) INTEGER */ +/* The leading dimension of the array VT. LDVT >= 1. */ +/* If singular vectors are desired, then LDVT >= max( 1, N ). */ + +/* Q (output) DOUBLE PRECISION array, dimension (LDQ) */ +/* If COMPQ = 'P', then: */ +/* On exit, if INFO = 0, Q and IQ contain the left */ +/* and right singular vectors in a compact form, */ +/* requiring O(N log N) space instead of 2*N**2. */ +/* In particular, Q contains all the DOUBLE PRECISION data in */ +/* LDQ >= N*(11 + 2*SMLSIZ + 8*INT(LOG_2(N/(SMLSIZ+1)))) */ +/* words of memory, where SMLSIZ is returned by ILAENV and */ +/* is equal to the maximum size of the subproblems at the */ +/* bottom of the computation tree (usually about 25). */ +/* For other values of COMPQ, Q is not referenced. */ + +/* IQ (output) INTEGER array, dimension (LDIQ) */ +/* If COMPQ = 'P', then: */ +/* On exit, if INFO = 0, Q and IQ contain the left */ +/* and right singular vectors in a compact form, */ +/* requiring O(N log N) space instead of 2*N**2. */ +/* In particular, IQ contains all INTEGER data in */ +/* LDIQ >= N*(3 + 3*INT(LOG_2(N/(SMLSIZ+1)))) */ +/* words of memory, where SMLSIZ is returned by ILAENV and */ +/* is equal to the maximum size of the subproblems at the */ +/* bottom of the computation tree (usually about 25). */ +/* For other values of COMPQ, IQ is not referenced. */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* If COMPQ = 'N' then LWORK >= (4 * N). */ +/* If COMPQ = 'P' then LWORK >= (6 * N). */ +/* If COMPQ = 'I' then LWORK >= (3 * N**2 + 4 * N). */ + +/* IWORK (workspace) INTEGER array, dimension (8*N) */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit. */ +/* < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > 0: The algorithm failed to compute an singular value. */ +/* The update process of divide and conquer failed. */ + +/* Further Details */ +/* =============== */ + +/* Based on contributions by */ +/* Ming Gu and Huan Ren, Computer Science Division, University of */ +/* California at Berkeley, USA */ + +/* ===================================================================== */ +/* Changed dimension statement in comment describing E from (N) to */ +/* (N-1). Sven, 17 Feb 05. */ +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --d__; + --e; + u_dim1 = *ldu; + u_offset = 1 + u_dim1; + u -= u_offset; + vt_dim1 = *ldvt; + vt_offset = 1 + vt_dim1; + vt -= vt_offset; + --q; + --iq; + --work; + --iwork; + + /* Function Body */ + *info = 0; + + iuplo = 0; + if (_starpu_lsame_(uplo, "U")) { + iuplo = 1; + } + if (_starpu_lsame_(uplo, "L")) { + iuplo = 2; + } + if (_starpu_lsame_(compq, "N")) { + icompq = 0; + } else if (_starpu_lsame_(compq, "P")) { + icompq = 1; + } else if (_starpu_lsame_(compq, "I")) { + icompq = 2; + } else { + icompq = -1; + } + if (iuplo == 0) { + *info = -1; + } else if (icompq < 0) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*ldu < 1 || icompq == 2 && *ldu < *n) { + *info = -7; + } else if (*ldvt < 1 || icompq == 2 && *ldvt < *n) { + *info = -9; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DBDSDC", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + smlsiz = _starpu_ilaenv_(&c__9, "DBDSDC", " ", &c__0, &c__0, &c__0, &c__0); + if (*n == 1) { + if (icompq == 1) { + q[1] = d_sign(&c_b15, &d__[1]); + q[smlsiz * *n + 1] = 1.; + } else if (icompq == 2) { + u[u_dim1 + 1] = d_sign(&c_b15, &d__[1]); + vt[vt_dim1 + 1] = 1.; + } + d__[1] = abs(d__[1]); + return 0; + } + nm1 = *n - 1; + +/* If matrix lower bidiagonal, rotate to be upper bidiagonal */ +/* by applying Givens rotations on the left */ + + wstart = 1; + qstart = 3; + if (icompq == 1) { + _starpu_dcopy_(n, &d__[1], &c__1, &q[1], &c__1); + i__1 = *n - 1; + _starpu_dcopy_(&i__1, &e[1], &c__1, &q[*n + 1], &c__1); + } + if (iuplo == 2) { + qstart = 5; + wstart = (*n << 1) - 1; + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + _starpu_dlartg_(&d__[i__], &e[i__], &cs, &sn, &r__); + d__[i__] = r__; + e[i__] = sn * d__[i__ + 1]; + d__[i__ + 1] = cs * d__[i__ + 1]; + if (icompq == 1) { + q[i__ + (*n << 1)] = cs; + q[i__ + *n * 3] = sn; + } else if (icompq == 2) { + work[i__] = cs; + work[nm1 + i__] = -sn; + } +/* L10: */ + } + } + +/* If ICOMPQ = 0, use DLASDQ to compute the singular values. */ + + if (icompq == 0) { + _starpu_dlasdq_("U", &c__0, n, &c__0, &c__0, &c__0, &d__[1], &e[1], &vt[ + vt_offset], ldvt, &u[u_offset], ldu, &u[u_offset], ldu, &work[ + wstart], info); + goto L40; + } + +/* If N is smaller than the minimum divide size SMLSIZ, then solve */ +/* the problem with another solver. */ + + if (*n <= smlsiz) { + if (icompq == 2) { + _starpu_dlaset_("A", n, n, &c_b29, &c_b15, &u[u_offset], ldu); + _starpu_dlaset_("A", n, n, &c_b29, &c_b15, &vt[vt_offset], ldvt); + _starpu_dlasdq_("U", &c__0, n, n, n, &c__0, &d__[1], &e[1], &vt[vt_offset] +, ldvt, &u[u_offset], ldu, &u[u_offset], ldu, &work[ + wstart], info); + } else if (icompq == 1) { + iu = 1; + ivt = iu + *n; + _starpu_dlaset_("A", n, n, &c_b29, &c_b15, &q[iu + (qstart - 1) * *n], n); + _starpu_dlaset_("A", n, n, &c_b29, &c_b15, &q[ivt + (qstart - 1) * *n], n); + _starpu_dlasdq_("U", &c__0, n, n, n, &c__0, &d__[1], &e[1], &q[ivt + ( + qstart - 1) * *n], n, &q[iu + (qstart - 1) * *n], n, &q[ + iu + (qstart - 1) * *n], n, &work[wstart], info); + } + goto L40; + } + + if (icompq == 2) { + _starpu_dlaset_("A", n, n, &c_b29, &c_b15, &u[u_offset], ldu); + _starpu_dlaset_("A", n, n, &c_b29, &c_b15, &vt[vt_offset], ldvt); + } + +/* Scale. */ + + orgnrm = _starpu_dlanst_("M", n, &d__[1], &e[1]); + if (orgnrm == 0.) { + return 0; + } + _starpu_dlascl_("G", &c__0, &c__0, &orgnrm, &c_b15, n, &c__1, &d__[1], n, &ierr); + _starpu_dlascl_("G", &c__0, &c__0, &orgnrm, &c_b15, &nm1, &c__1, &e[1], &nm1, & + ierr); + + eps = _starpu_dlamch_("Epsilon"); + + mlvl = (integer) (log((doublereal) (*n) / (doublereal) (smlsiz + 1)) / + log(2.)) + 1; + smlszp = smlsiz + 1; + + if (icompq == 1) { + iu = 1; + ivt = smlsiz + 1; + difl = ivt + smlszp; + difr = difl + mlvl; + z__ = difr + (mlvl << 1); + ic = z__ + mlvl; + is = ic + 1; + poles = is + 1; + givnum = poles + (mlvl << 1); + + k = 1; + givptr = 2; + perm = 3; + givcol = perm + mlvl; + } + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + if ((d__1 = d__[i__], abs(d__1)) < eps) { + d__[i__] = d_sign(&eps, &d__[i__]); + } +/* L20: */ + } + + start = 1; + sqre = 0; + + i__1 = nm1; + for (i__ = 1; i__ <= i__1; ++i__) { + if ((d__1 = e[i__], abs(d__1)) < eps || i__ == nm1) { + +/* Subproblem found. First determine its size and then */ +/* apply divide and conquer on it. */ + + if (i__ < nm1) { + +/* A subproblem with E(I) small for I < NM1. */ + + nsize = i__ - start + 1; + } else if ((d__1 = e[i__], abs(d__1)) >= eps) { + +/* A subproblem with E(NM1) not too small but I = NM1. */ + + nsize = *n - start + 1; + } else { + +/* A subproblem with E(NM1) small. This implies an */ +/* 1-by-1 subproblem at D(N). Solve this 1-by-1 problem */ +/* first. */ + + nsize = i__ - start + 1; + if (icompq == 2) { + u[*n + *n * u_dim1] = d_sign(&c_b15, &d__[*n]); + vt[*n + *n * vt_dim1] = 1.; + } else if (icompq == 1) { + q[*n + (qstart - 1) * *n] = d_sign(&c_b15, &d__[*n]); + q[*n + (smlsiz + qstart - 1) * *n] = 1.; + } + d__[*n] = (d__1 = d__[*n], abs(d__1)); + } + if (icompq == 2) { + _starpu_dlasd0_(&nsize, &sqre, &d__[start], &e[start], &u[start + + start * u_dim1], ldu, &vt[start + start * vt_dim1], + ldvt, &smlsiz, &iwork[1], &work[wstart], info); + } else { + _starpu_dlasda_(&icompq, &smlsiz, &nsize, &sqre, &d__[start], &e[ + start], &q[start + (iu + qstart - 2) * *n], n, &q[ + start + (ivt + qstart - 2) * *n], &iq[start + k * *n], + &q[start + (difl + qstart - 2) * *n], &q[start + ( + difr + qstart - 2) * *n], &q[start + (z__ + qstart - + 2) * *n], &q[start + (poles + qstart - 2) * *n], &iq[ + start + givptr * *n], &iq[start + givcol * *n], n, & + iq[start + perm * *n], &q[start + (givnum + qstart - + 2) * *n], &q[start + (ic + qstart - 2) * *n], &q[ + start + (is + qstart - 2) * *n], &work[wstart], & + iwork[1], info); + if (*info != 0) { + return 0; + } + } + start = i__ + 1; + } +/* L30: */ + } + +/* Unscale */ + + _starpu_dlascl_("G", &c__0, &c__0, &c_b15, &orgnrm, n, &c__1, &d__[1], n, &ierr); +L40: + +/* Use Selection Sort to minimize swaps of singular vectors */ + + i__1 = *n; + for (ii = 2; ii <= i__1; ++ii) { + i__ = ii - 1; + kk = i__; + p = d__[i__]; + i__2 = *n; + for (j = ii; j <= i__2; ++j) { + if (d__[j] > p) { + kk = j; + p = d__[j]; + } +/* L50: */ + } + if (kk != i__) { + d__[kk] = d__[i__]; + d__[i__] = p; + if (icompq == 1) { + iq[i__] = kk; + } else if (icompq == 2) { + _starpu_dswap_(n, &u[i__ * u_dim1 + 1], &c__1, &u[kk * u_dim1 + 1], & + c__1); + _starpu_dswap_(n, &vt[i__ + vt_dim1], ldvt, &vt[kk + vt_dim1], ldvt); + } + } else if (icompq == 1) { + iq[i__] = i__; + } +/* L60: */ + } + +/* If ICOMPQ = 1, use IQ(N,1) as the indicator for UPLO */ + + if (icompq == 1) { + if (iuplo == 1) { + iq[*n] = 1; + } else { + iq[*n] = 0; + } + } + +/* If B is lower bidiagonal, update U by those Givens rotations */ +/* which rotated B to be upper bidiagonal */ + + if (iuplo == 2 && icompq == 2) { + _starpu_dlasr_("L", "V", "B", n, n, &work[1], &work[*n], &u[u_offset], ldu); + } + + return 0; + +/* End of DBDSDC */ + +} /* _starpu_dbdsdc_ */ diff --git a/min-dgels/base/SRC/dbdsqr.c b/min-dgels/base/SRC/dbdsqr.c new file mode 100644 index 0000000..f2ad24a --- /dev/null +++ b/min-dgels/base/SRC/dbdsqr.c @@ -0,0 +1,918 @@ +/* dbdsqr.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static doublereal c_b15 = -.125; +static integer c__1 = 1; +static doublereal c_b49 = 1.; +static doublereal c_b72 = -1.; + +/* Subroutine */ int _starpu_dbdsqr_(char *uplo, integer *n, integer *ncvt, integer * + nru, integer *ncc, doublereal *d__, doublereal *e, doublereal *vt, + integer *ldvt, doublereal *u, integer *ldu, doublereal *c__, integer * + ldc, doublereal *work, integer *info) +{ + /* System generated locals */ + integer c_dim1, c_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1, + i__2; + doublereal d__1, d__2, d__3, d__4; + + /* Builtin functions */ + double pow_dd(doublereal *, doublereal *), sqrt(doublereal), d_sign( + doublereal *, doublereal *); + + /* Local variables */ + doublereal f, g, h__; + integer i__, j, m; + doublereal r__, cs; + integer ll; + doublereal sn, mu; + integer nm1, nm12, nm13, lll; + doublereal eps, sll, tol, abse; + integer idir; + doublereal abss; + integer oldm; + doublereal cosl; + integer isub, iter; + doublereal unfl, sinl, cosr, smin, smax, sinr; + extern /* Subroutine */ int _starpu_drot_(integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, doublereal *), _starpu_dlas2_( + doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *), _starpu_dscal_(integer *, doublereal *, doublereal *, + integer *); + extern logical _starpu_lsame_(char *, char *); + doublereal oldcs; + extern /* Subroutine */ int _starpu_dlasr_(char *, char *, char *, integer *, + integer *, doublereal *, doublereal *, doublereal *, integer *); + integer oldll; + doublereal shift, sigmn, oldsn; + extern /* Subroutine */ int _starpu_dswap_(integer *, doublereal *, integer *, + doublereal *, integer *); + integer maxit; + doublereal sminl, sigmx; + logical lower; + extern /* Subroutine */ int _starpu_dlasq1_(integer *, doublereal *, doublereal *, + doublereal *, integer *), _starpu_dlasv2_(doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *); + extern doublereal _starpu_dlamch_(char *); + extern /* Subroutine */ int _starpu_dlartg_(doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *), _starpu_xerbla_(char *, + integer *); + doublereal sminoa, thresh; + logical rotate; + doublereal tolmul; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* January 2007 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DBDSQR computes the singular values and, optionally, the right and/or */ +/* left singular vectors from the singular value decomposition (SVD) of */ +/* a real N-by-N (upper or lower) bidiagonal matrix B using the implicit */ +/* zero-shift QR algorithm. The SVD of B has the form */ + +/* B = Q * S * P**T */ + +/* where S is the diagonal matrix of singular values, Q is an orthogonal */ +/* matrix of left singular vectors, and P is an orthogonal matrix of */ +/* right singular vectors. If left singular vectors are requested, this */ +/* subroutine actually returns U*Q instead of Q, and, if right singular */ +/* vectors are requested, this subroutine returns P**T*VT instead of */ +/* P**T, for given real input matrices U and VT. When U and VT are the */ +/* orthogonal matrices that reduce a general matrix A to bidiagonal */ +/* form: A = U*B*VT, as computed by DGEBRD, then */ + +/* A = (U*Q) * S * (P**T*VT) */ + +/* is the SVD of A. Optionally, the subroutine may also compute Q**T*C */ +/* for a given real input matrix C. */ + +/* See "Computing Small Singular Values of Bidiagonal Matrices With */ +/* Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan, */ +/* LAPACK Working Note #3 (or SIAM J. Sci. Statist. Comput. vol. 11, */ +/* no. 5, pp. 873-912, Sept 1990) and */ +/* "Accurate singular values and differential qd algorithms," by */ +/* B. Parlett and V. Fernando, Technical Report CPAM-554, Mathematics */ +/* Department, University of California at Berkeley, July 1992 */ +/* for a detailed description of the algorithm. */ + +/* Arguments */ +/* ========= */ + +/* UPLO (input) CHARACTER*1 */ +/* = 'U': B is upper bidiagonal; */ +/* = 'L': B is lower bidiagonal. */ + +/* N (input) INTEGER */ +/* The order of the matrix B. N >= 0. */ + +/* NCVT (input) INTEGER */ +/* The number of columns of the matrix VT. NCVT >= 0. */ + +/* NRU (input) INTEGER */ +/* The number of rows of the matrix U. NRU >= 0. */ + +/* NCC (input) INTEGER */ +/* The number of columns of the matrix C. NCC >= 0. */ + +/* D (input/output) DOUBLE PRECISION array, dimension (N) */ +/* On entry, the n diagonal elements of the bidiagonal matrix B. */ +/* On exit, if INFO=0, the singular values of B in decreasing */ +/* order. */ + +/* E (input/output) DOUBLE PRECISION array, dimension (N-1) */ +/* On entry, the N-1 offdiagonal elements of the bidiagonal */ +/* matrix B. */ +/* On exit, if INFO = 0, E is destroyed; if INFO > 0, D and E */ +/* will contain the diagonal and superdiagonal elements of a */ +/* bidiagonal matrix orthogonally equivalent to the one given */ +/* as input. */ + +/* VT (input/output) DOUBLE PRECISION array, dimension (LDVT, NCVT) */ +/* On entry, an N-by-NCVT matrix VT. */ +/* On exit, VT is overwritten by P**T * VT. */ +/* Not referenced if NCVT = 0. */ + +/* LDVT (input) INTEGER */ +/* The leading dimension of the array VT. */ +/* LDVT >= max(1,N) if NCVT > 0; LDVT >= 1 if NCVT = 0. */ + +/* U (input/output) DOUBLE PRECISION array, dimension (LDU, N) */ +/* On entry, an NRU-by-N matrix U. */ +/* On exit, U is overwritten by U * Q. */ +/* Not referenced if NRU = 0. */ + +/* LDU (input) INTEGER */ +/* The leading dimension of the array U. LDU >= max(1,NRU). */ + +/* C (input/output) DOUBLE PRECISION array, dimension (LDC, NCC) */ +/* On entry, an N-by-NCC matrix C. */ +/* On exit, C is overwritten by Q**T * C. */ +/* Not referenced if NCC = 0. */ + +/* LDC (input) INTEGER */ +/* The leading dimension of the array C. */ +/* LDC >= max(1,N) if NCC > 0; LDC >=1 if NCC = 0. */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (4*N) */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: If INFO = -i, the i-th argument had an illegal value */ +/* > 0: */ +/* if NCVT = NRU = NCC = 0, */ +/* = 1, a split was marked by a positive value in E */ +/* = 2, current block of Z not diagonalized after 30*N */ +/* iterations (in inner while loop) */ +/* = 3, termination criterion of outer while loop not met */ +/* (program created more than N unreduced blocks) */ +/* else NCVT = NRU = NCC = 0, */ +/* the algorithm did not converge; D and E contain the */ +/* elements of a bidiagonal matrix which is orthogonally */ +/* similar to the input matrix B; if INFO = i, i */ +/* elements of E have not converged to zero. */ + +/* Internal Parameters */ +/* =================== */ + +/* TOLMUL DOUBLE PRECISION, default = max(10,min(100,EPS**(-1/8))) */ +/* TOLMUL controls the convergence criterion of the QR loop. */ +/* If it is positive, TOLMUL*EPS is the desired relative */ +/* precision in the computed singular values. */ +/* If it is negative, abs(TOLMUL*EPS*sigma_max) is the */ +/* desired absolute accuracy in the computed singular */ +/* values (corresponds to relative accuracy */ +/* abs(TOLMUL*EPS) in the largest singular value. */ +/* abs(TOLMUL) should be between 1 and 1/EPS, and preferably */ +/* between 10 (for fast convergence) and .1/EPS */ +/* (for there to be some accuracy in the results). */ +/* Default is to lose at either one eighth or 2 of the */ +/* available decimal digits in each computed singular value */ +/* (whichever is smaller). */ + +/* MAXITR INTEGER, default = 6 */ +/* MAXITR controls the maximum number of passes of the */ +/* algorithm through its inner loop. The algorithms stops */ +/* (and so fails to converge) if the number of passes */ +/* through the inner loop exceeds MAXITR*N**2. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --d__; + --e; + vt_dim1 = *ldvt; + vt_offset = 1 + vt_dim1; + vt -= vt_offset; + u_dim1 = *ldu; + u_offset = 1 + u_dim1; + u -= u_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + lower = _starpu_lsame_(uplo, "L"); + if (! _starpu_lsame_(uplo, "U") && ! lower) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*ncvt < 0) { + *info = -3; + } else if (*nru < 0) { + *info = -4; + } else if (*ncc < 0) { + *info = -5; + } else if (*ncvt == 0 && *ldvt < 1 || *ncvt > 0 && *ldvt < max(1,*n)) { + *info = -9; + } else if (*ldu < max(1,*nru)) { + *info = -11; + } else if (*ncc == 0 && *ldc < 1 || *ncc > 0 && *ldc < max(1,*n)) { + *info = -13; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DBDSQR", &i__1); + return 0; + } + if (*n == 0) { + return 0; + } + if (*n == 1) { + goto L160; + } + +/* ROTATE is true if any singular vectors desired, false otherwise */ + + rotate = *ncvt > 0 || *nru > 0 || *ncc > 0; + +/* If no singular vectors desired, use qd algorithm */ + + if (! rotate) { + _starpu_dlasq1_(n, &d__[1], &e[1], &work[1], info); + return 0; + } + + nm1 = *n - 1; + nm12 = nm1 + nm1; + nm13 = nm12 + nm1; + idir = 0; + +/* Get machine constants */ + + eps = _starpu_dlamch_("Epsilon"); + unfl = _starpu_dlamch_("Safe minimum"); + +/* If matrix lower bidiagonal, rotate to be upper bidiagonal */ +/* by applying Givens rotations on the left */ + + if (lower) { + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + _starpu_dlartg_(&d__[i__], &e[i__], &cs, &sn, &r__); + d__[i__] = r__; + e[i__] = sn * d__[i__ + 1]; + d__[i__ + 1] = cs * d__[i__ + 1]; + work[i__] = cs; + work[nm1 + i__] = sn; +/* L10: */ + } + +/* Update singular vectors if desired */ + + if (*nru > 0) { + _starpu_dlasr_("R", "V", "F", nru, n, &work[1], &work[*n], &u[u_offset], + ldu); + } + if (*ncc > 0) { + _starpu_dlasr_("L", "V", "F", n, ncc, &work[1], &work[*n], &c__[c_offset], + ldc); + } + } + +/* Compute singular values to relative accuracy TOL */ +/* (By setting TOL to be negative, algorithm will compute */ +/* singular values to absolute accuracy ABS(TOL)*norm(input matrix)) */ + +/* Computing MAX */ +/* Computing MIN */ + d__3 = 100., d__4 = pow_dd(&eps, &c_b15); + d__1 = 10., d__2 = min(d__3,d__4); + tolmul = max(d__1,d__2); + tol = tolmul * eps; + +/* Compute approximate maximum, minimum singular values */ + + smax = 0.; + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing MAX */ + d__2 = smax, d__3 = (d__1 = d__[i__], abs(d__1)); + smax = max(d__2,d__3); +/* L20: */ + } + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing MAX */ + d__2 = smax, d__3 = (d__1 = e[i__], abs(d__1)); + smax = max(d__2,d__3); +/* L30: */ + } + sminl = 0.; + if (tol >= 0.) { + +/* Relative accuracy desired */ + + sminoa = abs(d__[1]); + if (sminoa == 0.) { + goto L50; + } + mu = sminoa; + i__1 = *n; + for (i__ = 2; i__ <= i__1; ++i__) { + mu = (d__2 = d__[i__], abs(d__2)) * (mu / (mu + (d__1 = e[i__ - 1] + , abs(d__1)))); + sminoa = min(sminoa,mu); + if (sminoa == 0.) { + goto L50; + } +/* L40: */ + } +L50: + sminoa /= sqrt((doublereal) (*n)); +/* Computing MAX */ + d__1 = tol * sminoa, d__2 = *n * 6 * *n * unfl; + thresh = max(d__1,d__2); + } else { + +/* Absolute accuracy desired */ + +/* Computing MAX */ + d__1 = abs(tol) * smax, d__2 = *n * 6 * *n * unfl; + thresh = max(d__1,d__2); + } + +/* Prepare for main iteration loop for the singular values */ +/* (MAXIT is the maximum number of passes through the inner */ +/* loop permitted before nonconvergence signalled.) */ + + maxit = *n * 6 * *n; + iter = 0; + oldll = -1; + oldm = -1; + +/* M points to last element of unconverged part of matrix */ + + m = *n; + +/* Begin main iteration loop */ + +L60: + +/* Check for convergence or exceeding iteration count */ + + if (m <= 1) { + goto L160; + } + if (iter > maxit) { + goto L200; + } + +/* Find diagonal block of matrix to work on */ + + if (tol < 0. && (d__1 = d__[m], abs(d__1)) <= thresh) { + d__[m] = 0.; + } + smax = (d__1 = d__[m], abs(d__1)); + smin = smax; + i__1 = m - 1; + for (lll = 1; lll <= i__1; ++lll) { + ll = m - lll; + abss = (d__1 = d__[ll], abs(d__1)); + abse = (d__1 = e[ll], abs(d__1)); + if (tol < 0. && abss <= thresh) { + d__[ll] = 0.; + } + if (abse <= thresh) { + goto L80; + } + smin = min(smin,abss); +/* Computing MAX */ + d__1 = max(smax,abss); + smax = max(d__1,abse); +/* L70: */ + } + ll = 0; + goto L90; +L80: + e[ll] = 0.; + +/* Matrix splits since E(LL) = 0 */ + + if (ll == m - 1) { + +/* Convergence of bottom singular value, return to top of loop */ + + --m; + goto L60; + } +L90: + ++ll; + +/* E(LL) through E(M-1) are nonzero, E(LL-1) is zero */ + + if (ll == m - 1) { + +/* 2 by 2 block, handle separately */ + + _starpu_dlasv2_(&d__[m - 1], &e[m - 1], &d__[m], &sigmn, &sigmx, &sinr, &cosr, + &sinl, &cosl); + d__[m - 1] = sigmx; + e[m - 1] = 0.; + d__[m] = sigmn; + +/* Compute singular vectors, if desired */ + + if (*ncvt > 0) { + _starpu_drot_(ncvt, &vt[m - 1 + vt_dim1], ldvt, &vt[m + vt_dim1], ldvt, & + cosr, &sinr); + } + if (*nru > 0) { + _starpu_drot_(nru, &u[(m - 1) * u_dim1 + 1], &c__1, &u[m * u_dim1 + 1], & + c__1, &cosl, &sinl); + } + if (*ncc > 0) { + _starpu_drot_(ncc, &c__[m - 1 + c_dim1], ldc, &c__[m + c_dim1], ldc, & + cosl, &sinl); + } + m += -2; + goto L60; + } + +/* If working on new submatrix, choose shift direction */ +/* (from larger end diagonal element towards smaller) */ + + if (ll > oldm || m < oldll) { + if ((d__1 = d__[ll], abs(d__1)) >= (d__2 = d__[m], abs(d__2))) { + +/* Chase bulge from top (big end) to bottom (small end) */ + + idir = 1; + } else { + +/* Chase bulge from bottom (big end) to top (small end) */ + + idir = 2; + } + } + +/* Apply convergence tests */ + + if (idir == 1) { + +/* Run convergence test in forward direction */ +/* First apply standard test to bottom of matrix */ + + if ((d__2 = e[m - 1], abs(d__2)) <= abs(tol) * (d__1 = d__[m], abs( + d__1)) || tol < 0. && (d__3 = e[m - 1], abs(d__3)) <= thresh) + { + e[m - 1] = 0.; + goto L60; + } + + if (tol >= 0.) { + +/* If relative accuracy desired, */ +/* apply convergence criterion forward */ + + mu = (d__1 = d__[ll], abs(d__1)); + sminl = mu; + i__1 = m - 1; + for (lll = ll; lll <= i__1; ++lll) { + if ((d__1 = e[lll], abs(d__1)) <= tol * mu) { + e[lll] = 0.; + goto L60; + } + mu = (d__2 = d__[lll + 1], abs(d__2)) * (mu / (mu + (d__1 = e[ + lll], abs(d__1)))); + sminl = min(sminl,mu); +/* L100: */ + } + } + + } else { + +/* Run convergence test in backward direction */ +/* First apply standard test to top of matrix */ + + if ((d__2 = e[ll], abs(d__2)) <= abs(tol) * (d__1 = d__[ll], abs(d__1) + ) || tol < 0. && (d__3 = e[ll], abs(d__3)) <= thresh) { + e[ll] = 0.; + goto L60; + } + + if (tol >= 0.) { + +/* If relative accuracy desired, */ +/* apply convergence criterion backward */ + + mu = (d__1 = d__[m], abs(d__1)); + sminl = mu; + i__1 = ll; + for (lll = m - 1; lll >= i__1; --lll) { + if ((d__1 = e[lll], abs(d__1)) <= tol * mu) { + e[lll] = 0.; + goto L60; + } + mu = (d__2 = d__[lll], abs(d__2)) * (mu / (mu + (d__1 = e[lll] + , abs(d__1)))); + sminl = min(sminl,mu); +/* L110: */ + } + } + } + oldll = ll; + oldm = m; + +/* Compute shift. First, test if shifting would ruin relative */ +/* accuracy, and if so set the shift to zero. */ + +/* Computing MAX */ + d__1 = eps, d__2 = tol * .01; + if (tol >= 0. && *n * tol * (sminl / smax) <= max(d__1,d__2)) { + +/* Use a zero shift to avoid loss of relative accuracy */ + + shift = 0.; + } else { + +/* Compute the shift from 2-by-2 block at end of matrix */ + + if (idir == 1) { + sll = (d__1 = d__[ll], abs(d__1)); + _starpu_dlas2_(&d__[m - 1], &e[m - 1], &d__[m], &shift, &r__); + } else { + sll = (d__1 = d__[m], abs(d__1)); + _starpu_dlas2_(&d__[ll], &e[ll], &d__[ll + 1], &shift, &r__); + } + +/* Test if shift negligible, and if so set to zero */ + + if (sll > 0.) { +/* Computing 2nd power */ + d__1 = shift / sll; + if (d__1 * d__1 < eps) { + shift = 0.; + } + } + } + +/* Increment iteration count */ + + iter = iter + m - ll; + +/* If SHIFT = 0, do simplified QR iteration */ + + if (shift == 0.) { + if (idir == 1) { + +/* Chase bulge from top to bottom */ +/* Save cosines and sines for later singular vector updates */ + + cs = 1.; + oldcs = 1.; + i__1 = m - 1; + for (i__ = ll; i__ <= i__1; ++i__) { + d__1 = d__[i__] * cs; + _starpu_dlartg_(&d__1, &e[i__], &cs, &sn, &r__); + if (i__ > ll) { + e[i__ - 1] = oldsn * r__; + } + d__1 = oldcs * r__; + d__2 = d__[i__ + 1] * sn; + _starpu_dlartg_(&d__1, &d__2, &oldcs, &oldsn, &d__[i__]); + work[i__ - ll + 1] = cs; + work[i__ - ll + 1 + nm1] = sn; + work[i__ - ll + 1 + nm12] = oldcs; + work[i__ - ll + 1 + nm13] = oldsn; +/* L120: */ + } + h__ = d__[m] * cs; + d__[m] = h__ * oldcs; + e[m - 1] = h__ * oldsn; + +/* Update singular vectors */ + + if (*ncvt > 0) { + i__1 = m - ll + 1; + _starpu_dlasr_("L", "V", "F", &i__1, ncvt, &work[1], &work[*n], &vt[ + ll + vt_dim1], ldvt); + } + if (*nru > 0) { + i__1 = m - ll + 1; + _starpu_dlasr_("R", "V", "F", nru, &i__1, &work[nm12 + 1], &work[nm13 + + 1], &u[ll * u_dim1 + 1], ldu); + } + if (*ncc > 0) { + i__1 = m - ll + 1; + _starpu_dlasr_("L", "V", "F", &i__1, ncc, &work[nm12 + 1], &work[nm13 + + 1], &c__[ll + c_dim1], ldc); + } + +/* Test convergence */ + + if ((d__1 = e[m - 1], abs(d__1)) <= thresh) { + e[m - 1] = 0.; + } + + } else { + +/* Chase bulge from bottom to top */ +/* Save cosines and sines for later singular vector updates */ + + cs = 1.; + oldcs = 1.; + i__1 = ll + 1; + for (i__ = m; i__ >= i__1; --i__) { + d__1 = d__[i__] * cs; + _starpu_dlartg_(&d__1, &e[i__ - 1], &cs, &sn, &r__); + if (i__ < m) { + e[i__] = oldsn * r__; + } + d__1 = oldcs * r__; + d__2 = d__[i__ - 1] * sn; + _starpu_dlartg_(&d__1, &d__2, &oldcs, &oldsn, &d__[i__]); + work[i__ - ll] = cs; + work[i__ - ll + nm1] = -sn; + work[i__ - ll + nm12] = oldcs; + work[i__ - ll + nm13] = -oldsn; +/* L130: */ + } + h__ = d__[ll] * cs; + d__[ll] = h__ * oldcs; + e[ll] = h__ * oldsn; + +/* Update singular vectors */ + + if (*ncvt > 0) { + i__1 = m - ll + 1; + _starpu_dlasr_("L", "V", "B", &i__1, ncvt, &work[nm12 + 1], &work[ + nm13 + 1], &vt[ll + vt_dim1], ldvt); + } + if (*nru > 0) { + i__1 = m - ll + 1; + _starpu_dlasr_("R", "V", "B", nru, &i__1, &work[1], &work[*n], &u[ll * + u_dim1 + 1], ldu); + } + if (*ncc > 0) { + i__1 = m - ll + 1; + _starpu_dlasr_("L", "V", "B", &i__1, ncc, &work[1], &work[*n], &c__[ + ll + c_dim1], ldc); + } + +/* Test convergence */ + + if ((d__1 = e[ll], abs(d__1)) <= thresh) { + e[ll] = 0.; + } + } + } else { + +/* Use nonzero shift */ + + if (idir == 1) { + +/* Chase bulge from top to bottom */ +/* Save cosines and sines for later singular vector updates */ + + f = ((d__1 = d__[ll], abs(d__1)) - shift) * (d_sign(&c_b49, &d__[ + ll]) + shift / d__[ll]); + g = e[ll]; + i__1 = m - 1; + for (i__ = ll; i__ <= i__1; ++i__) { + _starpu_dlartg_(&f, &g, &cosr, &sinr, &r__); + if (i__ > ll) { + e[i__ - 1] = r__; + } + f = cosr * d__[i__] + sinr * e[i__]; + e[i__] = cosr * e[i__] - sinr * d__[i__]; + g = sinr * d__[i__ + 1]; + d__[i__ + 1] = cosr * d__[i__ + 1]; + _starpu_dlartg_(&f, &g, &cosl, &sinl, &r__); + d__[i__] = r__; + f = cosl * e[i__] + sinl * d__[i__ + 1]; + d__[i__ + 1] = cosl * d__[i__ + 1] - sinl * e[i__]; + if (i__ < m - 1) { + g = sinl * e[i__ + 1]; + e[i__ + 1] = cosl * e[i__ + 1]; + } + work[i__ - ll + 1] = cosr; + work[i__ - ll + 1 + nm1] = sinr; + work[i__ - ll + 1 + nm12] = cosl; + work[i__ - ll + 1 + nm13] = sinl; +/* L140: */ + } + e[m - 1] = f; + +/* Update singular vectors */ + + if (*ncvt > 0) { + i__1 = m - ll + 1; + _starpu_dlasr_("L", "V", "F", &i__1, ncvt, &work[1], &work[*n], &vt[ + ll + vt_dim1], ldvt); + } + if (*nru > 0) { + i__1 = m - ll + 1; + _starpu_dlasr_("R", "V", "F", nru, &i__1, &work[nm12 + 1], &work[nm13 + + 1], &u[ll * u_dim1 + 1], ldu); + } + if (*ncc > 0) { + i__1 = m - ll + 1; + _starpu_dlasr_("L", "V", "F", &i__1, ncc, &work[nm12 + 1], &work[nm13 + + 1], &c__[ll + c_dim1], ldc); + } + +/* Test convergence */ + + if ((d__1 = e[m - 1], abs(d__1)) <= thresh) { + e[m - 1] = 0.; + } + + } else { + +/* Chase bulge from bottom to top */ +/* Save cosines and sines for later singular vector updates */ + + f = ((d__1 = d__[m], abs(d__1)) - shift) * (d_sign(&c_b49, &d__[m] + ) + shift / d__[m]); + g = e[m - 1]; + i__1 = ll + 1; + for (i__ = m; i__ >= i__1; --i__) { + _starpu_dlartg_(&f, &g, &cosr, &sinr, &r__); + if (i__ < m) { + e[i__] = r__; + } + f = cosr * d__[i__] + sinr * e[i__ - 1]; + e[i__ - 1] = cosr * e[i__ - 1] - sinr * d__[i__]; + g = sinr * d__[i__ - 1]; + d__[i__ - 1] = cosr * d__[i__ - 1]; + _starpu_dlartg_(&f, &g, &cosl, &sinl, &r__); + d__[i__] = r__; + f = cosl * e[i__ - 1] + sinl * d__[i__ - 1]; + d__[i__ - 1] = cosl * d__[i__ - 1] - sinl * e[i__ - 1]; + if (i__ > ll + 1) { + g = sinl * e[i__ - 2]; + e[i__ - 2] = cosl * e[i__ - 2]; + } + work[i__ - ll] = cosr; + work[i__ - ll + nm1] = -sinr; + work[i__ - ll + nm12] = cosl; + work[i__ - ll + nm13] = -sinl; +/* L150: */ + } + e[ll] = f; + +/* Test convergence */ + + if ((d__1 = e[ll], abs(d__1)) <= thresh) { + e[ll] = 0.; + } + +/* Update singular vectors if desired */ + + if (*ncvt > 0) { + i__1 = m - ll + 1; + _starpu_dlasr_("L", "V", "B", &i__1, ncvt, &work[nm12 + 1], &work[ + nm13 + 1], &vt[ll + vt_dim1], ldvt); + } + if (*nru > 0) { + i__1 = m - ll + 1; + _starpu_dlasr_("R", "V", "B", nru, &i__1, &work[1], &work[*n], &u[ll * + u_dim1 + 1], ldu); + } + if (*ncc > 0) { + i__1 = m - ll + 1; + _starpu_dlasr_("L", "V", "B", &i__1, ncc, &work[1], &work[*n], &c__[ + ll + c_dim1], ldc); + } + } + } + +/* QR iteration finished, go back and check convergence */ + + goto L60; + +/* All singular values converged, so make them positive */ + +L160: + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + if (d__[i__] < 0.) { + d__[i__] = -d__[i__]; + +/* Change sign of singular vectors, if desired */ + + if (*ncvt > 0) { + _starpu_dscal_(ncvt, &c_b72, &vt[i__ + vt_dim1], ldvt); + } + } +/* L170: */ + } + +/* Sort the singular values into decreasing order (insertion sort on */ +/* singular values, but only one transposition per singular vector) */ + + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Scan for smallest D(I) */ + + isub = 1; + smin = d__[1]; + i__2 = *n + 1 - i__; + for (j = 2; j <= i__2; ++j) { + if (d__[j] <= smin) { + isub = j; + smin = d__[j]; + } +/* L180: */ + } + if (isub != *n + 1 - i__) { + +/* Swap singular values and vectors */ + + d__[isub] = d__[*n + 1 - i__]; + d__[*n + 1 - i__] = smin; + if (*ncvt > 0) { + _starpu_dswap_(ncvt, &vt[isub + vt_dim1], ldvt, &vt[*n + 1 - i__ + + vt_dim1], ldvt); + } + if (*nru > 0) { + _starpu_dswap_(nru, &u[isub * u_dim1 + 1], &c__1, &u[(*n + 1 - i__) * + u_dim1 + 1], &c__1); + } + if (*ncc > 0) { + _starpu_dswap_(ncc, &c__[isub + c_dim1], ldc, &c__[*n + 1 - i__ + + c_dim1], ldc); + } + } +/* L190: */ + } + goto L220; + +/* Maximum number of iterations exceeded, failure to converge */ + +L200: + *info = 0; + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + if (e[i__] != 0.) { + ++(*info); + } +/* L210: */ + } +L220: + return 0; + +/* End of DBDSQR */ + +} /* _starpu_dbdsqr_ */ diff --git a/min-dgels/base/SRC/ddisna.c b/min-dgels/base/SRC/ddisna.c new file mode 100644 index 0000000..e469215 --- /dev/null +++ b/min-dgels/base/SRC/ddisna.c @@ -0,0 +1,227 @@ +/* ddisna.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_ddisna_(char *job, integer *m, integer *n, doublereal * + d__, doublereal *sep, integer *info) +{ + /* System generated locals */ + integer i__1; + doublereal d__1, d__2, d__3; + + /* Local variables */ + integer i__, k; + doublereal eps; + logical decr, left, incr, sing, eigen; + extern logical _starpu_lsame_(char *, char *); + doublereal anorm; + logical right; + extern doublereal _starpu_dlamch_(char *); + doublereal oldgap, safmin; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + doublereal newgap, thresh; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DDISNA computes the reciprocal condition numbers for the eigenvectors */ +/* of a real symmetric or complex Hermitian matrix or for the left or */ +/* right singular vectors of a general m-by-n matrix. The reciprocal */ +/* condition number is the 'gap' between the corresponding eigenvalue or */ +/* singular value and the nearest other one. */ + +/* The bound on the error, measured by angle in radians, in the I-th */ +/* computed vector is given by */ + +/* DLAMCH( 'E' ) * ( ANORM / SEP( I ) ) */ + +/* where ANORM = 2-norm(A) = max( abs( D(j) ) ). SEP(I) is not allowed */ +/* to be smaller than DLAMCH( 'E' )*ANORM in order to limit the size of */ +/* the error bound. */ + +/* DDISNA may also be used to compute error bounds for eigenvectors of */ +/* the generalized symmetric definite eigenproblem. */ + +/* Arguments */ +/* ========= */ + +/* JOB (input) CHARACTER*1 */ +/* Specifies for which problem the reciprocal condition numbers */ +/* should be computed: */ +/* = 'E': the eigenvectors of a symmetric/Hermitian matrix; */ +/* = 'L': the left singular vectors of a general matrix; */ +/* = 'R': the right singular vectors of a general matrix. */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix. M >= 0. */ + +/* N (input) INTEGER */ +/* If JOB = 'L' or 'R', the number of columns of the matrix, */ +/* in which case N >= 0. Ignored if JOB = 'E'. */ + +/* D (input) DOUBLE PRECISION array, dimension (M) if JOB = 'E' */ +/* dimension (min(M,N)) if JOB = 'L' or 'R' */ +/* The eigenvalues (if JOB = 'E') or singular values (if JOB = */ +/* 'L' or 'R') of the matrix, in either increasing or decreasing */ +/* order. If singular values, they must be non-negative. */ + +/* SEP (output) DOUBLE PRECISION array, dimension (M) if JOB = 'E' */ +/* dimension (min(M,N)) if JOB = 'L' or 'R' */ +/* The reciprocal condition numbers of the vectors. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit. */ +/* < 0: if INFO = -i, the i-th argument had an illegal value. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments */ + + /* Parameter adjustments */ + --sep; + --d__; + + /* Function Body */ + *info = 0; + eigen = _starpu_lsame_(job, "E"); + left = _starpu_lsame_(job, "L"); + right = _starpu_lsame_(job, "R"); + sing = left || right; + if (eigen) { + k = *m; + } else if (sing) { + k = min(*m,*n); + } + if (! eigen && ! sing) { + *info = -1; + } else if (*m < 0) { + *info = -2; + } else if (k < 0) { + *info = -3; + } else { + incr = TRUE_; + decr = TRUE_; + i__1 = k - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + if (incr) { + incr = incr && d__[i__] <= d__[i__ + 1]; + } + if (decr) { + decr = decr && d__[i__] >= d__[i__ + 1]; + } +/* L10: */ + } + if (sing && k > 0) { + if (incr) { + incr = incr && 0. <= d__[1]; + } + if (decr) { + decr = decr && d__[k] >= 0.; + } + } + if (! (incr || decr)) { + *info = -4; + } + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DDISNA", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (k == 0) { + return 0; + } + +/* Compute reciprocal condition numbers */ + + if (k == 1) { + sep[1] = _starpu_dlamch_("O"); + } else { + oldgap = (d__1 = d__[2] - d__[1], abs(d__1)); + sep[1] = oldgap; + i__1 = k - 1; + for (i__ = 2; i__ <= i__1; ++i__) { + newgap = (d__1 = d__[i__ + 1] - d__[i__], abs(d__1)); + sep[i__] = min(oldgap,newgap); + oldgap = newgap; +/* L20: */ + } + sep[k] = oldgap; + } + if (sing) { + if (left && *m > *n || right && *m < *n) { + if (incr) { + sep[1] = min(sep[1],d__[1]); + } + if (decr) { +/* Computing MIN */ + d__1 = sep[k], d__2 = d__[k]; + sep[k] = min(d__1,d__2); + } + } + } + +/* Ensure that reciprocal condition numbers are not less than */ +/* threshold, in order to limit the size of the error bound */ + + eps = _starpu_dlamch_("E"); + safmin = _starpu_dlamch_("S"); +/* Computing MAX */ + d__2 = abs(d__[1]), d__3 = (d__1 = d__[k], abs(d__1)); + anorm = max(d__2,d__3); + if (anorm == 0.) { + thresh = eps; + } else { +/* Computing MAX */ + d__1 = eps * anorm; + thresh = max(d__1,safmin); + } + i__1 = k; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing MAX */ + d__1 = sep[i__]; + sep[i__] = max(d__1,thresh); +/* L30: */ + } + + return 0; + +/* End of DDISNA */ + +} /* _starpu_ddisna_ */ diff --git a/min-dgels/base/SRC/dgbbrd.c b/min-dgels/base/SRC/dgbbrd.c new file mode 100644 index 0000000..3188c7a --- /dev/null +++ b/min-dgels/base/SRC/dgbbrd.c @@ -0,0 +1,566 @@ +/* dgbbrd.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static doublereal c_b8 = 0.; +static doublereal c_b9 = 1.; +static integer c__1 = 1; + +/* Subroutine */ int _starpu_dgbbrd_(char *vect, integer *m, integer *n, integer *ncc, + integer *kl, integer *ku, doublereal *ab, integer *ldab, doublereal * + d__, doublereal *e, doublereal *q, integer *ldq, doublereal *pt, + integer *ldpt, doublereal *c__, integer *ldc, doublereal *work, + integer *info) +{ + /* System generated locals */ + integer ab_dim1, ab_offset, c_dim1, c_offset, pt_dim1, pt_offset, q_dim1, + q_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7; + + /* Local variables */ + integer i__, j, l, j1, j2, kb; + doublereal ra, rb, rc; + integer kk, ml, mn, nr, mu; + doublereal rs; + integer kb1, ml0, mu0, klm, kun, nrt, klu1, inca; + extern /* Subroutine */ int _starpu_drot_(integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, doublereal *); + extern logical _starpu_lsame_(char *, char *); + logical wantb, wantc; + integer minmn; + logical wantq; + extern /* Subroutine */ int _starpu_dlaset_(char *, integer *, integer *, + doublereal *, doublereal *, doublereal *, integer *), + _starpu_dlartg_(doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *), _starpu_xerbla_(char *, integer *), _starpu_dlargv_( + integer *, doublereal *, integer *, doublereal *, integer *, + doublereal *, integer *), _starpu_dlartv_(integer *, doublereal *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + integer *); + logical wantpt; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DGBBRD reduces a real general m-by-n band matrix A to upper */ +/* bidiagonal form B by an orthogonal transformation: Q' * A * P = B. */ + +/* The routine computes B, and optionally forms Q or P', or computes */ +/* Q'*C for a given matrix C. */ + +/* Arguments */ +/* ========= */ + +/* VECT (input) CHARACTER*1 */ +/* Specifies whether or not the matrices Q and P' are to be */ +/* formed. */ +/* = 'N': do not form Q or P'; */ +/* = 'Q': form Q only; */ +/* = 'P': form P' only; */ +/* = 'B': form both. */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix A. M >= 0. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix A. N >= 0. */ + +/* NCC (input) INTEGER */ +/* The number of columns of the matrix C. NCC >= 0. */ + +/* KL (input) INTEGER */ +/* The number of subdiagonals of the matrix A. KL >= 0. */ + +/* KU (input) INTEGER */ +/* The number of superdiagonals of the matrix A. KU >= 0. */ + +/* AB (input/output) DOUBLE PRECISION array, dimension (LDAB,N) */ +/* On entry, the m-by-n band matrix A, stored in rows 1 to */ +/* KL+KU+1. The j-th column of A is stored in the j-th column of */ +/* the array AB as follows: */ +/* AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl). */ +/* On exit, A is overwritten by values generated during the */ +/* reduction. */ + +/* LDAB (input) INTEGER */ +/* The leading dimension of the array A. LDAB >= KL+KU+1. */ + +/* D (output) DOUBLE PRECISION array, dimension (min(M,N)) */ +/* The diagonal elements of the bidiagonal matrix B. */ + +/* E (output) DOUBLE PRECISION array, dimension (min(M,N)-1) */ +/* The superdiagonal elements of the bidiagonal matrix B. */ + +/* Q (output) DOUBLE PRECISION array, dimension (LDQ,M) */ +/* If VECT = 'Q' or 'B', the m-by-m orthogonal matrix Q. */ +/* If VECT = 'N' or 'P', the array Q is not referenced. */ + +/* LDQ (input) INTEGER */ +/* The leading dimension of the array Q. */ +/* LDQ >= max(1,M) if VECT = 'Q' or 'B'; LDQ >= 1 otherwise. */ + +/* PT (output) DOUBLE PRECISION array, dimension (LDPT,N) */ +/* If VECT = 'P' or 'B', the n-by-n orthogonal matrix P'. */ +/* If VECT = 'N' or 'Q', the array PT is not referenced. */ + +/* LDPT (input) INTEGER */ +/* The leading dimension of the array PT. */ +/* LDPT >= max(1,N) if VECT = 'P' or 'B'; LDPT >= 1 otherwise. */ + +/* C (input/output) DOUBLE PRECISION array, dimension (LDC,NCC) */ +/* On entry, an m-by-ncc matrix C. */ +/* On exit, C is overwritten by Q'*C. */ +/* C is not referenced if NCC = 0. */ + +/* LDC (input) INTEGER */ +/* The leading dimension of the array C. */ +/* LDC >= max(1,M) if NCC > 0; LDC >= 1 if NCC = 0. */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (2*max(M,N)) */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit. */ +/* < 0: if INFO = -i, the i-th argument had an illegal value. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters */ + + /* Parameter adjustments */ + ab_dim1 = *ldab; + ab_offset = 1 + ab_dim1; + ab -= ab_offset; + --d__; + --e; + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + pt_dim1 = *ldpt; + pt_offset = 1 + pt_dim1; + pt -= pt_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + wantb = _starpu_lsame_(vect, "B"); + wantq = _starpu_lsame_(vect, "Q") || wantb; + wantpt = _starpu_lsame_(vect, "P") || wantb; + wantc = *ncc > 0; + klu1 = *kl + *ku + 1; + *info = 0; + if (! wantq && ! wantpt && ! _starpu_lsame_(vect, "N")) { + *info = -1; + } else if (*m < 0) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*ncc < 0) { + *info = -4; + } else if (*kl < 0) { + *info = -5; + } else if (*ku < 0) { + *info = -6; + } else if (*ldab < klu1) { + *info = -8; + } else if (*ldq < 1 || wantq && *ldq < max(1,*m)) { + *info = -12; + } else if (*ldpt < 1 || wantpt && *ldpt < max(1,*n)) { + *info = -14; + } else if (*ldc < 1 || wantc && *ldc < max(1,*m)) { + *info = -16; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DGBBRD", &i__1); + return 0; + } + +/* Initialize Q and P' to the unit matrix, if needed */ + + if (wantq) { + _starpu_dlaset_("Full", m, m, &c_b8, &c_b9, &q[q_offset], ldq); + } + if (wantpt) { + _starpu_dlaset_("Full", n, n, &c_b8, &c_b9, &pt[pt_offset], ldpt); + } + +/* Quick return if possible. */ + + if (*m == 0 || *n == 0) { + return 0; + } + + minmn = min(*m,*n); + + if (*kl + *ku > 1) { + +/* Reduce to upper bidiagonal form if KU > 0; if KU = 0, reduce */ +/* first to lower bidiagonal form and then transform to upper */ +/* bidiagonal */ + + if (*ku > 0) { + ml0 = 1; + mu0 = 2; + } else { + ml0 = 2; + mu0 = 1; + } + +/* Wherever possible, plane rotations are generated and applied in */ +/* vector operations of length NR over the index set J1:J2:KLU1. */ + +/* The sines of the plane rotations are stored in WORK(1:max(m,n)) */ +/* and the cosines in WORK(max(m,n)+1:2*max(m,n)). */ + + mn = max(*m,*n); +/* Computing MIN */ + i__1 = *m - 1; + klm = min(i__1,*kl); +/* Computing MIN */ + i__1 = *n - 1; + kun = min(i__1,*ku); + kb = klm + kun; + kb1 = kb + 1; + inca = kb1 * *ldab; + nr = 0; + j1 = klm + 2; + j2 = 1 - kun; + + i__1 = minmn; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Reduce i-th column and i-th row of matrix to bidiagonal form */ + + ml = klm + 1; + mu = kun + 1; + i__2 = kb; + for (kk = 1; kk <= i__2; ++kk) { + j1 += kb; + j2 += kb; + +/* generate plane rotations to annihilate nonzero elements */ +/* which have been created below the band */ + + if (nr > 0) { + _starpu_dlargv_(&nr, &ab[klu1 + (j1 - klm - 1) * ab_dim1], &inca, + &work[j1], &kb1, &work[mn + j1], &kb1); + } + +/* apply plane rotations from the left */ + + i__3 = kb; + for (l = 1; l <= i__3; ++l) { + if (j2 - klm + l - 1 > *n) { + nrt = nr - 1; + } else { + nrt = nr; + } + if (nrt > 0) { + _starpu_dlartv_(&nrt, &ab[klu1 - l + (j1 - klm + l - 1) * + ab_dim1], &inca, &ab[klu1 - l + 1 + (j1 - klm + + l - 1) * ab_dim1], &inca, &work[mn + j1], & + work[j1], &kb1); + } +/* L10: */ + } + + if (ml > ml0) { + if (ml <= *m - i__ + 1) { + +/* generate plane rotation to annihilate a(i+ml-1,i) */ +/* within the band, and apply rotation from the left */ + + _starpu_dlartg_(&ab[*ku + ml - 1 + i__ * ab_dim1], &ab[*ku + + ml + i__ * ab_dim1], &work[mn + i__ + ml - 1], + &work[i__ + ml - 1], &ra); + ab[*ku + ml - 1 + i__ * ab_dim1] = ra; + if (i__ < *n) { +/* Computing MIN */ + i__4 = *ku + ml - 2, i__5 = *n - i__; + i__3 = min(i__4,i__5); + i__6 = *ldab - 1; + i__7 = *ldab - 1; + _starpu_drot_(&i__3, &ab[*ku + ml - 2 + (i__ + 1) * + ab_dim1], &i__6, &ab[*ku + ml - 1 + (i__ + + 1) * ab_dim1], &i__7, &work[mn + i__ + + ml - 1], &work[i__ + ml - 1]); + } + } + ++nr; + j1 -= kb1; + } + + if (wantq) { + +/* accumulate product of plane rotations in Q */ + + i__3 = j2; + i__4 = kb1; + for (j = j1; i__4 < 0 ? j >= i__3 : j <= i__3; j += i__4) + { + _starpu_drot_(m, &q[(j - 1) * q_dim1 + 1], &c__1, &q[j * + q_dim1 + 1], &c__1, &work[mn + j], &work[j]); +/* L20: */ + } + } + + if (wantc) { + +/* apply plane rotations to C */ + + i__4 = j2; + i__3 = kb1; + for (j = j1; i__3 < 0 ? j >= i__4 : j <= i__4; j += i__3) + { + _starpu_drot_(ncc, &c__[j - 1 + c_dim1], ldc, &c__[j + c_dim1] +, ldc, &work[mn + j], &work[j]); +/* L30: */ + } + } + + if (j2 + kun > *n) { + +/* adjust J2 to keep within the bounds of the matrix */ + + --nr; + j2 -= kb1; + } + + i__3 = j2; + i__4 = kb1; + for (j = j1; i__4 < 0 ? j >= i__3 : j <= i__3; j += i__4) { + +/* create nonzero element a(j-1,j+ku) above the band */ +/* and store it in WORK(n+1:2*n) */ + + work[j + kun] = work[j] * ab[(j + kun) * ab_dim1 + 1]; + ab[(j + kun) * ab_dim1 + 1] = work[mn + j] * ab[(j + kun) + * ab_dim1 + 1]; +/* L40: */ + } + +/* generate plane rotations to annihilate nonzero elements */ +/* which have been generated above the band */ + + if (nr > 0) { + _starpu_dlargv_(&nr, &ab[(j1 + kun - 1) * ab_dim1 + 1], &inca, & + work[j1 + kun], &kb1, &work[mn + j1 + kun], &kb1); + } + +/* apply plane rotations from the right */ + + i__4 = kb; + for (l = 1; l <= i__4; ++l) { + if (j2 + l - 1 > *m) { + nrt = nr - 1; + } else { + nrt = nr; + } + if (nrt > 0) { + _starpu_dlartv_(&nrt, &ab[l + 1 + (j1 + kun - 1) * ab_dim1], & + inca, &ab[l + (j1 + kun) * ab_dim1], &inca, & + work[mn + j1 + kun], &work[j1 + kun], &kb1); + } +/* L50: */ + } + + if (ml == ml0 && mu > mu0) { + if (mu <= *n - i__ + 1) { + +/* generate plane rotation to annihilate a(i,i+mu-1) */ +/* within the band, and apply rotation from the right */ + + _starpu_dlartg_(&ab[*ku - mu + 3 + (i__ + mu - 2) * ab_dim1], + &ab[*ku - mu + 2 + (i__ + mu - 1) * ab_dim1], + &work[mn + i__ + mu - 1], &work[i__ + mu - 1], + &ra); + ab[*ku - mu + 3 + (i__ + mu - 2) * ab_dim1] = ra; +/* Computing MIN */ + i__3 = *kl + mu - 2, i__5 = *m - i__; + i__4 = min(i__3,i__5); + _starpu_drot_(&i__4, &ab[*ku - mu + 4 + (i__ + mu - 2) * + ab_dim1], &c__1, &ab[*ku - mu + 3 + (i__ + mu + - 1) * ab_dim1], &c__1, &work[mn + i__ + mu - + 1], &work[i__ + mu - 1]); + } + ++nr; + j1 -= kb1; + } + + if (wantpt) { + +/* accumulate product of plane rotations in P' */ + + i__4 = j2; + i__3 = kb1; + for (j = j1; i__3 < 0 ? j >= i__4 : j <= i__4; j += i__3) + { + _starpu_drot_(n, &pt[j + kun - 1 + pt_dim1], ldpt, &pt[j + + kun + pt_dim1], ldpt, &work[mn + j + kun], & + work[j + kun]); +/* L60: */ + } + } + + if (j2 + kb > *m) { + +/* adjust J2 to keep within the bounds of the matrix */ + + --nr; + j2 -= kb1; + } + + i__3 = j2; + i__4 = kb1; + for (j = j1; i__4 < 0 ? j >= i__3 : j <= i__3; j += i__4) { + +/* create nonzero element a(j+kl+ku,j+ku-1) below the */ +/* band and store it in WORK(1:n) */ + + work[j + kb] = work[j + kun] * ab[klu1 + (j + kun) * + ab_dim1]; + ab[klu1 + (j + kun) * ab_dim1] = work[mn + j + kun] * ab[ + klu1 + (j + kun) * ab_dim1]; +/* L70: */ + } + + if (ml > ml0) { + --ml; + } else { + --mu; + } +/* L80: */ + } +/* L90: */ + } + } + + if (*ku == 0 && *kl > 0) { + +/* A has been reduced to lower bidiagonal form */ + +/* Transform lower bidiagonal form to upper bidiagonal by applying */ +/* plane rotations from the left, storing diagonal elements in D */ +/* and off-diagonal elements in E */ + +/* Computing MIN */ + i__2 = *m - 1; + i__1 = min(i__2,*n); + for (i__ = 1; i__ <= i__1; ++i__) { + _starpu_dlartg_(&ab[i__ * ab_dim1 + 1], &ab[i__ * ab_dim1 + 2], &rc, &rs, + &ra); + d__[i__] = ra; + if (i__ < *n) { + e[i__] = rs * ab[(i__ + 1) * ab_dim1 + 1]; + ab[(i__ + 1) * ab_dim1 + 1] = rc * ab[(i__ + 1) * ab_dim1 + 1] + ; + } + if (wantq) { + _starpu_drot_(m, &q[i__ * q_dim1 + 1], &c__1, &q[(i__ + 1) * q_dim1 + + 1], &c__1, &rc, &rs); + } + if (wantc) { + _starpu_drot_(ncc, &c__[i__ + c_dim1], ldc, &c__[i__ + 1 + c_dim1], + ldc, &rc, &rs); + } +/* L100: */ + } + if (*m <= *n) { + d__[*m] = ab[*m * ab_dim1 + 1]; + } + } else if (*ku > 0) { + +/* A has been reduced to upper bidiagonal form */ + + if (*m < *n) { + +/* Annihilate a(m,m+1) by applying plane rotations from the */ +/* right, storing diagonal elements in D and off-diagonal */ +/* elements in E */ + + rb = ab[*ku + (*m + 1) * ab_dim1]; + for (i__ = *m; i__ >= 1; --i__) { + _starpu_dlartg_(&ab[*ku + 1 + i__ * ab_dim1], &rb, &rc, &rs, &ra); + d__[i__] = ra; + if (i__ > 1) { + rb = -rs * ab[*ku + i__ * ab_dim1]; + e[i__ - 1] = rc * ab[*ku + i__ * ab_dim1]; + } + if (wantpt) { + _starpu_drot_(n, &pt[i__ + pt_dim1], ldpt, &pt[*m + 1 + pt_dim1], + ldpt, &rc, &rs); + } +/* L110: */ + } + } else { + +/* Copy off-diagonal elements to E and diagonal elements to D */ + + i__1 = minmn - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + e[i__] = ab[*ku + (i__ + 1) * ab_dim1]; +/* L120: */ + } + i__1 = minmn; + for (i__ = 1; i__ <= i__1; ++i__) { + d__[i__] = ab[*ku + 1 + i__ * ab_dim1]; +/* L130: */ + } + } + } else { + +/* A is diagonal. Set elements of E to zero and copy diagonal */ +/* elements to D. */ + + i__1 = minmn - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + e[i__] = 0.; +/* L140: */ + } + i__1 = minmn; + for (i__ = 1; i__ <= i__1; ++i__) { + d__[i__] = ab[i__ * ab_dim1 + 1]; +/* L150: */ + } + } + return 0; + +/* End of DGBBRD */ + +} /* _starpu_dgbbrd_ */ diff --git a/min-dgels/base/SRC/dgbcon.c b/min-dgels/base/SRC/dgbcon.c new file mode 100644 index 0000000..1ce8a9b --- /dev/null +++ b/min-dgels/base/SRC/dgbcon.c @@ -0,0 +1,284 @@ +/* dgbcon.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +/* Subroutine */ int _starpu_dgbcon_(char *norm, integer *n, integer *kl, integer *ku, + doublereal *ab, integer *ldab, integer *ipiv, doublereal *anorm, + doublereal *rcond, doublereal *work, integer *iwork, integer *info) +{ + /* System generated locals */ + integer ab_dim1, ab_offset, i__1, i__2, i__3; + doublereal d__1; + + /* Local variables */ + integer j; + doublereal t; + integer kd, lm, jp, ix, kase; + extern doublereal _starpu_ddot_(integer *, doublereal *, integer *, doublereal *, + integer *); + integer kase1; + doublereal scale; + extern logical _starpu_lsame_(char *, char *); + integer isave[3]; + extern /* Subroutine */ int _starpu_drscl_(integer *, doublereal *, doublereal *, + integer *); + logical lnoti; + extern /* Subroutine */ int _starpu_daxpy_(integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *), _starpu_dlacn2_(integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + integer *); + extern doublereal _starpu_dlamch_(char *); + extern integer _starpu_idamax_(integer *, doublereal *, integer *); + extern /* Subroutine */ int _starpu_dlatbs_(char *, char *, char *, char *, + integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); + doublereal ainvnm; + logical onenrm; + char normin[1]; + doublereal smlnum; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* Modified to call DLACN2 in place of DLACON, 5 Feb 03, SJH. */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DGBCON estimates the reciprocal of the condition number of a real */ +/* general band matrix A, in either the 1-norm or the infinity-norm, */ +/* using the LU factorization computed by DGBTRF. */ + +/* An estimate is obtained for norm(inv(A)), and the reciprocal of the */ +/* condition number is computed as */ +/* RCOND = 1 / ( norm(A) * norm(inv(A)) ). */ + +/* Arguments */ +/* ========= */ + +/* NORM (input) CHARACTER*1 */ +/* Specifies whether the 1-norm condition number or the */ +/* infinity-norm condition number is required: */ +/* = '1' or 'O': 1-norm; */ +/* = 'I': Infinity-norm. */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* KL (input) INTEGER */ +/* The number of subdiagonals within the band of A. KL >= 0. */ + +/* KU (input) INTEGER */ +/* The number of superdiagonals within the band of A. KU >= 0. */ + +/* AB (input) DOUBLE PRECISION array, dimension (LDAB,N) */ +/* Details of the LU factorization of the band matrix A, as */ +/* computed by DGBTRF. U is stored as an upper triangular band */ +/* matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, and */ +/* the multipliers used during the factorization are stored in */ +/* rows KL+KU+2 to 2*KL+KU+1. */ + +/* LDAB (input) INTEGER */ +/* The leading dimension of the array AB. LDAB >= 2*KL+KU+1. */ + +/* IPIV (input) INTEGER array, dimension (N) */ +/* The pivot indices; for 1 <= i <= N, row i of the matrix was */ +/* interchanged with row IPIV(i). */ + +/* ANORM (input) DOUBLE PRECISION */ +/* If NORM = '1' or 'O', the 1-norm of the original matrix A. */ +/* If NORM = 'I', the infinity-norm of the original matrix A. */ + +/* RCOND (output) DOUBLE PRECISION */ +/* The reciprocal of the condition number of the matrix A, */ +/* computed as RCOND = 1/(norm(A) * norm(inv(A))). */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (3*N) */ + +/* IWORK (workspace) INTEGER array, dimension (N) */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + ab_dim1 = *ldab; + ab_offset = 1 + ab_dim1; + ab -= ab_offset; + --ipiv; + --work; + --iwork; + + /* Function Body */ + *info = 0; + onenrm = *(unsigned char *)norm == '1' || _starpu_lsame_(norm, "O"); + if (! onenrm && ! _starpu_lsame_(norm, "I")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*kl < 0) { + *info = -3; + } else if (*ku < 0) { + *info = -4; + } else if (*ldab < (*kl << 1) + *ku + 1) { + *info = -6; + } else if (*anorm < 0.) { + *info = -8; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DGBCON", &i__1); + return 0; + } + +/* Quick return if possible */ + + *rcond = 0.; + if (*n == 0) { + *rcond = 1.; + return 0; + } else if (*anorm == 0.) { + return 0; + } + + smlnum = _starpu_dlamch_("Safe minimum"); + +/* Estimate the norm of inv(A). */ + + ainvnm = 0.; + *(unsigned char *)normin = 'N'; + if (onenrm) { + kase1 = 1; + } else { + kase1 = 2; + } + kd = *kl + *ku + 1; + lnoti = *kl > 0; + kase = 0; +L10: + _starpu_dlacn2_(n, &work[*n + 1], &work[1], &iwork[1], &ainvnm, &kase, isave); + if (kase != 0) { + if (kase == kase1) { + +/* Multiply by inv(L). */ + + if (lnoti) { + i__1 = *n - 1; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + i__2 = *kl, i__3 = *n - j; + lm = min(i__2,i__3); + jp = ipiv[j]; + t = work[jp]; + if (jp != j) { + work[jp] = work[j]; + work[j] = t; + } + d__1 = -t; + _starpu_daxpy_(&lm, &d__1, &ab[kd + 1 + j * ab_dim1], &c__1, & + work[j + 1], &c__1); +/* L20: */ + } + } + +/* Multiply by inv(U). */ + + i__1 = *kl + *ku; + _starpu_dlatbs_("Upper", "No transpose", "Non-unit", normin, n, &i__1, & + ab[ab_offset], ldab, &work[1], &scale, &work[(*n << 1) + + 1], info); + } else { + +/* Multiply by inv(U'). */ + + i__1 = *kl + *ku; + _starpu_dlatbs_("Upper", "Transpose", "Non-unit", normin, n, &i__1, &ab[ + ab_offset], ldab, &work[1], &scale, &work[(*n << 1) + 1], + info); + +/* Multiply by inv(L'). */ + + if (lnoti) { + for (j = *n - 1; j >= 1; --j) { +/* Computing MIN */ + i__1 = *kl, i__2 = *n - j; + lm = min(i__1,i__2); + work[j] -= _starpu_ddot_(&lm, &ab[kd + 1 + j * ab_dim1], &c__1, & + work[j + 1], &c__1); + jp = ipiv[j]; + if (jp != j) { + t = work[jp]; + work[jp] = work[j]; + work[j] = t; + } +/* L30: */ + } + } + } + +/* Divide X by 1/SCALE if doing so will not cause overflow. */ + + *(unsigned char *)normin = 'Y'; + if (scale != 1.) { + ix = _starpu_idamax_(n, &work[1], &c__1); + if (scale < (d__1 = work[ix], abs(d__1)) * smlnum || scale == 0.) + { + goto L40; + } + _starpu_drscl_(n, &scale, &work[1], &c__1); + } + goto L10; + } + +/* Compute the estimate of the reciprocal condition number. */ + + if (ainvnm != 0.) { + *rcond = 1. / ainvnm / *anorm; + } + +L40: + return 0; + +/* End of DGBCON */ + +} /* _starpu_dgbcon_ */ diff --git a/min-dgels/base/SRC/dgbequ.c b/min-dgels/base/SRC/dgbequ.c new file mode 100644 index 0000000..4ff34f0 --- /dev/null +++ b/min-dgels/base/SRC/dgbequ.c @@ -0,0 +1,320 @@ +/* dgbequ.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dgbequ_(integer *m, integer *n, integer *kl, integer *ku, + doublereal *ab, integer *ldab, doublereal *r__, doublereal *c__, + doublereal *rowcnd, doublereal *colcnd, doublereal *amax, integer * + info) +{ + /* System generated locals */ + integer ab_dim1, ab_offset, i__1, i__2, i__3, i__4; + doublereal d__1, d__2, d__3; + + /* Local variables */ + integer i__, j, kd; + doublereal rcmin, rcmax; + extern doublereal _starpu_dlamch_(char *); + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + doublereal bignum, smlnum; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DGBEQU computes row and column scalings intended to equilibrate an */ +/* M-by-N band matrix A and reduce its condition number. R returns the */ +/* row scale factors and C the column scale factors, chosen to try to */ +/* make the largest element in each row and column of the matrix B with */ +/* elements B(i,j)=R(i)*A(i,j)*C(j) have absolute value 1. */ + +/* R(i) and C(j) are restricted to be between SMLNUM = smallest safe */ +/* number and BIGNUM = largest safe number. Use of these scaling */ +/* factors is not guaranteed to reduce the condition number of A but */ +/* works well in practice. */ + +/* Arguments */ +/* ========= */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix A. M >= 0. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix A. N >= 0. */ + +/* KL (input) INTEGER */ +/* The number of subdiagonals within the band of A. KL >= 0. */ + +/* KU (input) INTEGER */ +/* The number of superdiagonals within the band of A. KU >= 0. */ + +/* AB (input) DOUBLE PRECISION array, dimension (LDAB,N) */ +/* The band matrix A, stored in rows 1 to KL+KU+1. The j-th */ +/* column of A is stored in the j-th column of the array AB as */ +/* follows: */ +/* AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl). */ + +/* LDAB (input) INTEGER */ +/* The leading dimension of the array AB. LDAB >= KL+KU+1. */ + +/* R (output) DOUBLE PRECISION array, dimension (M) */ +/* If INFO = 0, or INFO > M, R contains the row scale factors */ +/* for A. */ + +/* C (output) DOUBLE PRECISION array, dimension (N) */ +/* If INFO = 0, C contains the column scale factors for A. */ + +/* ROWCND (output) DOUBLE PRECISION */ +/* If INFO = 0 or INFO > M, ROWCND contains the ratio of the */ +/* smallest R(i) to the largest R(i). If ROWCND >= 0.1 and */ +/* AMAX is neither too large nor too small, it is not worth */ +/* scaling by R. */ + +/* COLCND (output) DOUBLE PRECISION */ +/* If INFO = 0, COLCND contains the ratio of the smallest */ +/* C(i) to the largest C(i). If COLCND >= 0.1, it is not */ +/* worth scaling by C. */ + +/* AMAX (output) DOUBLE PRECISION */ +/* Absolute value of largest matrix element. If AMAX is very */ +/* close to overflow or very close to underflow, the matrix */ +/* should be scaled. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > 0: if INFO = i, and i is */ +/* <= M: the i-th row of A is exactly zero */ +/* > M: the (i-M)-th column of A is exactly zero */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters */ + + /* Parameter adjustments */ + ab_dim1 = *ldab; + ab_offset = 1 + ab_dim1; + ab -= ab_offset; + --r__; + --c__; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*kl < 0) { + *info = -3; + } else if (*ku < 0) { + *info = -4; + } else if (*ldab < *kl + *ku + 1) { + *info = -6; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DGBEQU", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + *rowcnd = 1.; + *colcnd = 1.; + *amax = 0.; + return 0; + } + +/* Get machine constants. */ + + smlnum = _starpu_dlamch_("S"); + bignum = 1. / smlnum; + +/* Compute row scale factors. */ + + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + r__[i__] = 0.; +/* L10: */ + } + +/* Find the maximum element in each row. */ + + kd = *ku + 1; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MAX */ + i__2 = j - *ku; +/* Computing MIN */ + i__4 = j + *kl; + i__3 = min(i__4,*m); + for (i__ = max(i__2,1); i__ <= i__3; ++i__) { +/* Computing MAX */ + d__2 = r__[i__], d__3 = (d__1 = ab[kd + i__ - j + j * ab_dim1], + abs(d__1)); + r__[i__] = max(d__2,d__3); +/* L20: */ + } +/* L30: */ + } + +/* Find the maximum and minimum scale factors. */ + + rcmin = bignum; + rcmax = 0.; + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing MAX */ + d__1 = rcmax, d__2 = r__[i__]; + rcmax = max(d__1,d__2); +/* Computing MIN */ + d__1 = rcmin, d__2 = r__[i__]; + rcmin = min(d__1,d__2); +/* L40: */ + } + *amax = rcmax; + + if (rcmin == 0.) { + +/* Find the first zero scale factor and return an error code. */ + + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + if (r__[i__] == 0.) { + *info = i__; + return 0; + } +/* L50: */ + } + } else { + +/* Invert the scale factors. */ + + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing MIN */ +/* Computing MAX */ + d__2 = r__[i__]; + d__1 = max(d__2,smlnum); + r__[i__] = 1. / min(d__1,bignum); +/* L60: */ + } + +/* Compute ROWCND = min(R(I)) / max(R(I)) */ + + *rowcnd = max(rcmin,smlnum) / min(rcmax,bignum); + } + +/* Compute column scale factors */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + c__[j] = 0.; +/* L70: */ + } + +/* Find the maximum element in each column, */ +/* assuming the row scaling computed above. */ + + kd = *ku + 1; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MAX */ + i__3 = j - *ku; +/* Computing MIN */ + i__4 = j + *kl; + i__2 = min(i__4,*m); + for (i__ = max(i__3,1); i__ <= i__2; ++i__) { +/* Computing MAX */ + d__2 = c__[j], d__3 = (d__1 = ab[kd + i__ - j + j * ab_dim1], abs( + d__1)) * r__[i__]; + c__[j] = max(d__2,d__3); +/* L80: */ + } +/* L90: */ + } + +/* Find the maximum and minimum scale factors. */ + + rcmin = bignum; + rcmax = 0.; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + d__1 = rcmin, d__2 = c__[j]; + rcmin = min(d__1,d__2); +/* Computing MAX */ + d__1 = rcmax, d__2 = c__[j]; + rcmax = max(d__1,d__2); +/* L100: */ + } + + if (rcmin == 0.) { + +/* Find the first zero scale factor and return an error code. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (c__[j] == 0.) { + *info = *m + j; + return 0; + } +/* L110: */ + } + } else { + +/* Invert the scale factors. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ +/* Computing MAX */ + d__2 = c__[j]; + d__1 = max(d__2,smlnum); + c__[j] = 1. / min(d__1,bignum); +/* L120: */ + } + +/* Compute COLCND = min(C(J)) / max(C(J)) */ + + *colcnd = max(rcmin,smlnum) / min(rcmax,bignum); + } + + return 0; + +/* End of DGBEQU */ + +} /* _starpu_dgbequ_ */ diff --git a/min-dgels/base/SRC/dgbequb.c b/min-dgels/base/SRC/dgbequb.c new file mode 100644 index 0000000..07c322a --- /dev/null +++ b/min-dgels/base/SRC/dgbequb.c @@ -0,0 +1,347 @@ +/* dgbequb.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dgbequb_(integer *m, integer *n, integer *kl, integer * + ku, doublereal *ab, integer *ldab, doublereal *r__, doublereal *c__, + doublereal *rowcnd, doublereal *colcnd, doublereal *amax, integer * + info) +{ + /* System generated locals */ + integer ab_dim1, ab_offset, i__1, i__2, i__3, i__4; + doublereal d__1, d__2, d__3; + + /* Builtin functions */ + double log(doublereal), pow_di(doublereal *, integer *); + + /* Local variables */ + integer i__, j, kd; + doublereal radix, rcmin, rcmax; + extern doublereal _starpu_dlamch_(char *); + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + doublereal bignum, logrdx, smlnum; + + +/* -- LAPACK routine (version 3.2) -- */ +/* -- Contributed by James Demmel, Deaglan Halligan, Yozo Hida and -- */ +/* -- Jason Riedy of Univ. of California Berkeley. -- */ +/* -- November 2008 -- */ + +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley and NAG Ltd. -- */ + +/* .. */ +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DGBEQUB computes row and column scalings intended to equilibrate an */ +/* M-by-N matrix A and reduce its condition number. R returns the row */ +/* scale factors and C the column scale factors, chosen to try to make */ +/* the largest element in each row and column of the matrix B with */ +/* elements B(i,j)=R(i)*A(i,j)*C(j) have an absolute value of at most */ +/* the radix. */ + +/* R(i) and C(j) are restricted to be a power of the radix between */ +/* SMLNUM = smallest safe number and BIGNUM = largest safe number. Use */ +/* of these scaling factors is not guaranteed to reduce the condition */ +/* number of A but works well in practice. */ + +/* This routine differs from DGEEQU by restricting the scaling factors */ +/* to a power of the radix. Baring over- and underflow, scaling by */ +/* these factors introduces no additional rounding errors. However, the */ +/* scaled entries' magnitured are no longer approximately 1 but lie */ +/* between sqrt(radix) and 1/sqrt(radix). */ + +/* Arguments */ +/* ========= */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix A. M >= 0. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix A. N >= 0. */ + +/* KL (input) INTEGER */ +/* The number of subdiagonals within the band of A. KL >= 0. */ + +/* KU (input) INTEGER */ +/* The number of superdiagonals within the band of A. KU >= 0. */ + +/* AB (input) DOUBLE PRECISION array, dimension (LDAB,N) */ +/* On entry, the matrix A in band storage, in rows 1 to KL+KU+1. */ +/* The j-th column of A is stored in the j-th column of the */ +/* array AB as follows: */ +/* AB(KU+1+i-j,j) = A(i,j) for max(1,j-KU)<=i<=min(N,j+kl) */ + +/* LDAB (input) INTEGER */ +/* The leading dimension of the array A. LDAB >= max(1,M). */ + +/* R (output) DOUBLE PRECISION array, dimension (M) */ +/* If INFO = 0 or INFO > M, R contains the row scale factors */ +/* for A. */ + +/* C (output) DOUBLE PRECISION array, dimension (N) */ +/* If INFO = 0, C contains the column scale factors for A. */ + +/* ROWCND (output) DOUBLE PRECISION */ +/* If INFO = 0 or INFO > M, ROWCND contains the ratio of the */ +/* smallest R(i) to the largest R(i). If ROWCND >= 0.1 and */ +/* AMAX is neither too large nor too small, it is not worth */ +/* scaling by R. */ + +/* COLCND (output) DOUBLE PRECISION */ +/* If INFO = 0, COLCND contains the ratio of the smallest */ +/* C(i) to the largest C(i). If COLCND >= 0.1, it is not */ +/* worth scaling by C. */ + +/* AMAX (output) DOUBLE PRECISION */ +/* Absolute value of largest matrix element. If AMAX is very */ +/* close to overflow or very close to underflow, the matrix */ +/* should be scaled. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > 0: if INFO = i, and i is */ +/* <= M: the i-th row of A is exactly zero */ +/* > M: the (i-M)-th column of A is exactly zero */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + ab_dim1 = *ldab; + ab_offset = 1 + ab_dim1; + ab -= ab_offset; + --r__; + --c__; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*kl < 0) { + *info = -3; + } else if (*ku < 0) { + *info = -4; + } else if (*ldab < *kl + *ku + 1) { + *info = -6; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DGBEQUB", &i__1); + return 0; + } + +/* Quick return if possible. */ + + if (*m == 0 || *n == 0) { + *rowcnd = 1.; + *colcnd = 1.; + *amax = 0.; + return 0; + } + +/* Get machine constants. Assume SMLNUM is a power of the radix. */ + + smlnum = _starpu_dlamch_("S"); + bignum = 1. / smlnum; + radix = _starpu_dlamch_("B"); + logrdx = log(radix); + +/* Compute row scale factors. */ + + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + r__[i__] = 0.; +/* L10: */ + } + +/* Find the maximum element in each row. */ + + kd = *ku + 1; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MAX */ + i__2 = j - *ku; +/* Computing MIN */ + i__4 = j + *kl; + i__3 = min(i__4,*m); + for (i__ = max(i__2,1); i__ <= i__3; ++i__) { +/* Computing MAX */ + d__2 = r__[i__], d__3 = (d__1 = ab[kd + i__ - j + j * ab_dim1], + abs(d__1)); + r__[i__] = max(d__2,d__3); +/* L20: */ + } +/* L30: */ + } + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + if (r__[i__] > 0.) { + i__3 = (integer) (log(r__[i__]) / logrdx); + r__[i__] = pow_di(&radix, &i__3); + } + } + +/* Find the maximum and minimum scale factors. */ + + rcmin = bignum; + rcmax = 0.; + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing MAX */ + d__1 = rcmax, d__2 = r__[i__]; + rcmax = max(d__1,d__2); +/* Computing MIN */ + d__1 = rcmin, d__2 = r__[i__]; + rcmin = min(d__1,d__2); +/* L40: */ + } + *amax = rcmax; + + if (rcmin == 0.) { + +/* Find the first zero scale factor and return an error code. */ + + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + if (r__[i__] == 0.) { + *info = i__; + return 0; + } +/* L50: */ + } + } else { + +/* Invert the scale factors. */ + + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing MIN */ +/* Computing MAX */ + d__2 = r__[i__]; + d__1 = max(d__2,smlnum); + r__[i__] = 1. / min(d__1,bignum); +/* L60: */ + } + +/* Compute ROWCND = min(R(I)) / max(R(I)). */ + + *rowcnd = max(rcmin,smlnum) / min(rcmax,bignum); + } + +/* Compute column scale factors. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + c__[j] = 0.; +/* L70: */ + } + +/* Find the maximum element in each column, */ +/* assuming the row scaling computed above. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MAX */ + i__3 = j - *ku; +/* Computing MIN */ + i__4 = j + *kl; + i__2 = min(i__4,*m); + for (i__ = max(i__3,1); i__ <= i__2; ++i__) { +/* Computing MAX */ + d__2 = c__[j], d__3 = (d__1 = ab[kd + i__ - j + j * ab_dim1], abs( + d__1)) * r__[i__]; + c__[j] = max(d__2,d__3); +/* L80: */ + } + if (c__[j] > 0.) { + i__2 = (integer) (log(c__[j]) / logrdx); + c__[j] = pow_di(&radix, &i__2); + } +/* L90: */ + } + +/* Find the maximum and minimum scale factors. */ + + rcmin = bignum; + rcmax = 0.; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + d__1 = rcmin, d__2 = c__[j]; + rcmin = min(d__1,d__2); +/* Computing MAX */ + d__1 = rcmax, d__2 = c__[j]; + rcmax = max(d__1,d__2); +/* L100: */ + } + + if (rcmin == 0.) { + +/* Find the first zero scale factor and return an error code. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (c__[j] == 0.) { + *info = *m + j; + return 0; + } +/* L110: */ + } + } else { + +/* Invert the scale factors. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ +/* Computing MAX */ + d__2 = c__[j]; + d__1 = max(d__2,smlnum); + c__[j] = 1. / min(d__1,bignum); +/* L120: */ + } + +/* Compute COLCND = min(C(J)) / max(C(J)). */ + + *colcnd = max(rcmin,smlnum) / min(rcmax,bignum); + } + + return 0; + +/* End of DGBEQUB */ + +} /* _starpu_dgbequb_ */ diff --git a/min-dgels/base/SRC/dgbrfs.c b/min-dgels/base/SRC/dgbrfs.c new file mode 100644 index 0000000..50057dd --- /dev/null +++ b/min-dgels/base/SRC/dgbrfs.c @@ -0,0 +1,455 @@ +/* dgbrfs.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static doublereal c_b15 = -1.; +static doublereal c_b17 = 1.; + +/* Subroutine */ int _starpu_dgbrfs_(char *trans, integer *n, integer *kl, integer * + ku, integer *nrhs, doublereal *ab, integer *ldab, doublereal *afb, + integer *ldafb, integer *ipiv, doublereal *b, integer *ldb, + doublereal *x, integer *ldx, doublereal *ferr, doublereal *berr, + doublereal *work, integer *iwork, integer *info) +{ + /* System generated locals */ + integer ab_dim1, ab_offset, afb_dim1, afb_offset, b_dim1, b_offset, + x_dim1, x_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7; + doublereal d__1, d__2, d__3; + + /* Local variables */ + integer i__, j, k; + doublereal s; + integer kk; + doublereal xk; + integer nz; + doublereal eps; + integer kase; + doublereal safe1, safe2; + extern /* Subroutine */ int _starpu_dgbmv_(char *, integer *, integer *, integer * +, integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *); + extern logical _starpu_lsame_(char *, char *); + integer isave[3]; + extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *), _starpu_daxpy_(integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *); + integer count; + extern /* Subroutine */ int _starpu_dlacn2_(integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *); + extern doublereal _starpu_dlamch_(char *); + doublereal safmin; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *), _starpu_dgbtrs_( + char *, integer *, integer *, integer *, integer *, doublereal *, + integer *, integer *, doublereal *, integer *, integer *); + logical notran; + char transt[1]; + doublereal lstres; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* Modified to call DLACN2 in place of DLACON, 5 Feb 03, SJH. */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DGBRFS improves the computed solution to a system of linear */ +/* equations when the coefficient matrix is banded, and provides */ +/* error bounds and backward error estimates for the solution. */ + +/* Arguments */ +/* ========= */ + +/* TRANS (input) CHARACTER*1 */ +/* Specifies the form of the system of equations: */ +/* = 'N': A * X = B (No transpose) */ +/* = 'T': A**T * X = B (Transpose) */ +/* = 'C': A**H * X = B (Conjugate transpose = Transpose) */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* KL (input) INTEGER */ +/* The number of subdiagonals within the band of A. KL >= 0. */ + +/* KU (input) INTEGER */ +/* The number of superdiagonals within the band of A. KU >= 0. */ + +/* NRHS (input) INTEGER */ +/* The number of right hand sides, i.e., the number of columns */ +/* of the matrices B and X. NRHS >= 0. */ + +/* AB (input) DOUBLE PRECISION array, dimension (LDAB,N) */ +/* The original band matrix A, stored in rows 1 to KL+KU+1. */ +/* The j-th column of A is stored in the j-th column of the */ +/* array AB as follows: */ +/* AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(n,j+kl). */ + +/* LDAB (input) INTEGER */ +/* The leading dimension of the array AB. LDAB >= KL+KU+1. */ + +/* AFB (input) DOUBLE PRECISION array, dimension (LDAFB,N) */ +/* Details of the LU factorization of the band matrix A, as */ +/* computed by DGBTRF. U is stored as an upper triangular band */ +/* matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, and */ +/* the multipliers used during the factorization are stored in */ +/* rows KL+KU+2 to 2*KL+KU+1. */ + +/* LDAFB (input) INTEGER */ +/* The leading dimension of the array AFB. LDAFB >= 2*KL*KU+1. */ + +/* IPIV (input) INTEGER array, dimension (N) */ +/* The pivot indices from DGBTRF; for 1<=i<=N, row i of the */ +/* matrix was interchanged with row IPIV(i). */ + +/* B (input) DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* The right hand side matrix B. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the array B. LDB >= max(1,N). */ + +/* X (input/output) DOUBLE PRECISION array, dimension (LDX,NRHS) */ +/* On entry, the solution matrix X, as computed by DGBTRS. */ +/* On exit, the improved solution matrix X. */ + +/* LDX (input) INTEGER */ +/* The leading dimension of the array X. LDX >= max(1,N). */ + +/* FERR (output) DOUBLE PRECISION array, dimension (NRHS) */ +/* The estimated forward error bound for each solution vector */ +/* X(j) (the j-th column of the solution matrix X). */ +/* If XTRUE is the true solution corresponding to X(j), FERR(j) */ +/* is an estimated upper bound for the magnitude of the largest */ +/* element in (X(j) - XTRUE) divided by the magnitude of the */ +/* largest element in X(j). The estimate is as reliable as */ +/* the estimate for RCOND, and is almost always a slight */ +/* overestimate of the true error. */ + +/* BERR (output) DOUBLE PRECISION array, dimension (NRHS) */ +/* The componentwise relative backward error of each solution */ +/* vector X(j) (i.e., the smallest relative change in */ +/* any element of A or B that makes X(j) an exact solution). */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (3*N) */ + +/* IWORK (workspace) INTEGER array, dimension (N) */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ + +/* Internal Parameters */ +/* =================== */ + +/* ITMAX is the maximum number of steps of iterative refinement. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + ab_dim1 = *ldab; + ab_offset = 1 + ab_dim1; + ab -= ab_offset; + afb_dim1 = *ldafb; + afb_offset = 1 + afb_dim1; + afb -= afb_offset; + --ipiv; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + x_dim1 = *ldx; + x_offset = 1 + x_dim1; + x -= x_offset; + --ferr; + --berr; + --work; + --iwork; + + /* Function Body */ + *info = 0; + notran = _starpu_lsame_(trans, "N"); + if (! notran && ! _starpu_lsame_(trans, "T") && ! _starpu_lsame_( + trans, "C")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*kl < 0) { + *info = -3; + } else if (*ku < 0) { + *info = -4; + } else if (*nrhs < 0) { + *info = -5; + } else if (*ldab < *kl + *ku + 1) { + *info = -7; + } else if (*ldafb < (*kl << 1) + *ku + 1) { + *info = -9; + } else if (*ldb < max(1,*n)) { + *info = -12; + } else if (*ldx < max(1,*n)) { + *info = -14; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DGBRFS", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0 || *nrhs == 0) { + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + ferr[j] = 0.; + berr[j] = 0.; +/* L10: */ + } + return 0; + } + + if (notran) { + *(unsigned char *)transt = 'T'; + } else { + *(unsigned char *)transt = 'N'; + } + +/* NZ = maximum number of nonzero elements in each row of A, plus 1 */ + +/* Computing MIN */ + i__1 = *kl + *ku + 2, i__2 = *n + 1; + nz = min(i__1,i__2); + eps = _starpu_dlamch_("Epsilon"); + safmin = _starpu_dlamch_("Safe minimum"); + safe1 = nz * safmin; + safe2 = safe1 / eps; + +/* Do for each right hand side */ + + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + + count = 1; + lstres = 3.; +L20: + +/* Loop until stopping criterion is satisfied. */ + +/* Compute residual R = B - op(A) * X, */ +/* where op(A) = A, A**T, or A**H, depending on TRANS. */ + + _starpu_dcopy_(n, &b[j * b_dim1 + 1], &c__1, &work[*n + 1], &c__1); + _starpu_dgbmv_(trans, n, n, kl, ku, &c_b15, &ab[ab_offset], ldab, &x[j * + x_dim1 + 1], &c__1, &c_b17, &work[*n + 1], &c__1); + +/* Compute componentwise relative backward error from formula */ + +/* max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) */ + +/* where abs(Z) is the componentwise absolute value of the matrix */ +/* or vector Z. If the i-th component of the denominator is less */ +/* than SAFE2, then SAFE1 is added to the i-th components of the */ +/* numerator and denominator before dividing. */ + + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + work[i__] = (d__1 = b[i__ + j * b_dim1], abs(d__1)); +/* L30: */ + } + +/* Compute abs(op(A))*abs(X) + abs(B). */ + + if (notran) { + i__2 = *n; + for (k = 1; k <= i__2; ++k) { + kk = *ku + 1 - k; + xk = (d__1 = x[k + j * x_dim1], abs(d__1)); +/* Computing MAX */ + i__3 = 1, i__4 = k - *ku; +/* Computing MIN */ + i__6 = *n, i__7 = k + *kl; + i__5 = min(i__6,i__7); + for (i__ = max(i__3,i__4); i__ <= i__5; ++i__) { + work[i__] += (d__1 = ab[kk + i__ + k * ab_dim1], abs(d__1) + ) * xk; +/* L40: */ + } +/* L50: */ + } + } else { + i__2 = *n; + for (k = 1; k <= i__2; ++k) { + s = 0.; + kk = *ku + 1 - k; +/* Computing MAX */ + i__5 = 1, i__3 = k - *ku; +/* Computing MIN */ + i__6 = *n, i__7 = k + *kl; + i__4 = min(i__6,i__7); + for (i__ = max(i__5,i__3); i__ <= i__4; ++i__) { + s += (d__1 = ab[kk + i__ + k * ab_dim1], abs(d__1)) * ( + d__2 = x[i__ + j * x_dim1], abs(d__2)); +/* L60: */ + } + work[k] += s; +/* L70: */ + } + } + s = 0.; + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + if (work[i__] > safe2) { +/* Computing MAX */ + d__2 = s, d__3 = (d__1 = work[*n + i__], abs(d__1)) / work[ + i__]; + s = max(d__2,d__3); + } else { +/* Computing MAX */ + d__2 = s, d__3 = ((d__1 = work[*n + i__], abs(d__1)) + safe1) + / (work[i__] + safe1); + s = max(d__2,d__3); + } +/* L80: */ + } + berr[j] = s; + +/* Test stopping criterion. Continue iterating if */ +/* 1) The residual BERR(J) is larger than machine epsilon, and */ +/* 2) BERR(J) decreased by at least a factor of 2 during the */ +/* last iteration, and */ +/* 3) At most ITMAX iterations tried. */ + + if (berr[j] > eps && berr[j] * 2. <= lstres && count <= 5) { + +/* Update solution and try again. */ + + _starpu_dgbtrs_(trans, n, kl, ku, &c__1, &afb[afb_offset], ldafb, &ipiv[1] +, &work[*n + 1], n, info); + _starpu_daxpy_(n, &c_b17, &work[*n + 1], &c__1, &x[j * x_dim1 + 1], &c__1) + ; + lstres = berr[j]; + ++count; + goto L20; + } + +/* Bound error from formula */ + +/* norm(X - XTRUE) / norm(X) .le. FERR = */ +/* norm( abs(inv(op(A)))* */ +/* ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) */ + +/* where */ +/* norm(Z) is the magnitude of the largest component of Z */ +/* inv(op(A)) is the inverse of op(A) */ +/* abs(Z) is the componentwise absolute value of the matrix or */ +/* vector Z */ +/* NZ is the maximum number of nonzeros in any row of A, plus 1 */ +/* EPS is machine epsilon */ + +/* The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) */ +/* is incremented by SAFE1 if the i-th component of */ +/* abs(op(A))*abs(X) + abs(B) is less than SAFE2. */ + +/* Use DLACN2 to estimate the infinity-norm of the matrix */ +/* inv(op(A)) * diag(W), */ +/* where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) */ + + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + if (work[i__] > safe2) { + work[i__] = (d__1 = work[*n + i__], abs(d__1)) + nz * eps * + work[i__]; + } else { + work[i__] = (d__1 = work[*n + i__], abs(d__1)) + nz * eps * + work[i__] + safe1; + } +/* L90: */ + } + + kase = 0; +L100: + _starpu_dlacn2_(n, &work[(*n << 1) + 1], &work[*n + 1], &iwork[1], &ferr[j], & + kase, isave); + if (kase != 0) { + if (kase == 1) { + +/* Multiply by diag(W)*inv(op(A)**T). */ + + _starpu_dgbtrs_(transt, n, kl, ku, &c__1, &afb[afb_offset], ldafb, & + ipiv[1], &work[*n + 1], n, info); + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + work[*n + i__] *= work[i__]; +/* L110: */ + } + } else { + +/* Multiply by inv(op(A))*diag(W). */ + + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + work[*n + i__] *= work[i__]; +/* L120: */ + } + _starpu_dgbtrs_(trans, n, kl, ku, &c__1, &afb[afb_offset], ldafb, & + ipiv[1], &work[*n + 1], n, info); + } + goto L100; + } + +/* Normalize error. */ + + lstres = 0.; + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { +/* Computing MAX */ + d__2 = lstres, d__3 = (d__1 = x[i__ + j * x_dim1], abs(d__1)); + lstres = max(d__2,d__3); +/* L130: */ + } + if (lstres != 0.) { + ferr[j] /= lstres; + } + +/* L140: */ + } + + return 0; + +/* End of DGBRFS */ + +} /* _starpu_dgbrfs_ */ diff --git a/min-dgels/base/SRC/dgbrfsx.c b/min-dgels/base/SRC/dgbrfsx.c new file mode 100644 index 0000000..9d216b5 --- /dev/null +++ b/min-dgels/base/SRC/dgbrfsx.c @@ -0,0 +1,687 @@ +/* dgbrfsx.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c_n1 = -1; +static integer c__0 = 0; +static integer c__1 = 1; + +/* Subroutine */ int _starpu_dgbrfsx_(char *trans, char *equed, integer *n, integer * + kl, integer *ku, integer *nrhs, doublereal *ab, integer *ldab, + doublereal *afb, integer *ldafb, integer *ipiv, doublereal *r__, + doublereal *c__, doublereal *b, integer *ldb, doublereal *x, integer * + ldx, doublereal *rcond, doublereal *berr, integer *n_err_bnds__, + doublereal *err_bnds_norm__, doublereal *err_bnds_comp__, integer * + nparams, doublereal *params, doublereal *work, integer *iwork, + integer *info) +{ + /* System generated locals */ + integer ab_dim1, ab_offset, afb_dim1, afb_offset, b_dim1, b_offset, + x_dim1, x_offset, err_bnds_norm_dim1, err_bnds_norm_offset, + err_bnds_comp_dim1, err_bnds_comp_offset, i__1; + doublereal d__1, d__2; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + doublereal illrcond_thresh__, unstable_thresh__, err_lbnd__; + integer ref_type__; + extern integer _starpu_ilatrans_(char *); + integer j; + doublereal rcond_tmp__; + integer prec_type__, trans_type__; + extern doublereal _starpu_dla_gbrcond__(char *, integer *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, integer *, + ftnlen); + doublereal cwise_wrong__; + extern /* Subroutine */ int _starpu_dla_gbrfsx_extended__(integer *, integer *, + integer *, integer *, integer *, integer *, doublereal *, integer + *, doublereal *, integer *, integer *, logical *, doublereal *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, integer *, doublereal * + , doublereal *, logical *, integer *); + char norm[1]; + logical ignore_cwise__; + extern logical _starpu_lsame_(char *, char *); + doublereal anorm; + extern doublereal _starpu_dlangb_(char *, integer *, integer *, integer *, + doublereal *, integer *, doublereal *), _starpu_dlamch_(char *); + extern /* Subroutine */ int _starpu_dgbcon_(char *, integer *, integer *, integer + *, doublereal *, integer *, integer *, doublereal *, doublereal *, + doublereal *, integer *, integer *), _starpu_xerbla_(char *, + integer *); + logical colequ, notran, rowequ; + extern integer _starpu_ilaprec_(char *); + integer ithresh, n_norms__; + doublereal rthresh; + + +/* -- LAPACK routine (version 3.2.1) -- */ +/* -- Contributed by James Demmel, Deaglan Halligan, Yozo Hida and -- */ +/* -- Jason Riedy of Univ. of California Berkeley. -- */ +/* -- April 2009 -- */ + +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley and NAG Ltd. -- */ + +/* .. */ +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DGBRFSX improves the computed solution to a system of linear */ +/* equations and provides error bounds and backward error estimates */ +/* for the solution. In addition to normwise error bound, the code */ +/* provides maximum componentwise error bound if possible. See */ +/* comments for ERR_BNDS_NORM and ERR_BNDS_COMP for details of the */ +/* error bounds. */ + +/* The original system of linear equations may have been equilibrated */ +/* before calling this routine, as described by arguments EQUED, R */ +/* and C below. In this case, the solution and error bounds returned */ +/* are for the original unequilibrated system. */ + +/* Arguments */ +/* ========= */ + +/* Some optional parameters are bundled in the PARAMS array. These */ +/* settings determine how refinement is performed, but often the */ +/* defaults are acceptable. If the defaults are acceptable, users */ +/* can pass NPARAMS = 0 which prevents the source code from accessing */ +/* the PARAMS argument. */ + +/* TRANS (input) CHARACTER*1 */ +/* Specifies the form of the system of equations: */ +/* = 'N': A * X = B (No transpose) */ +/* = 'T': A**T * X = B (Transpose) */ +/* = 'C': A**H * X = B (Conjugate transpose = Transpose) */ + +/* EQUED (input) CHARACTER*1 */ +/* Specifies the form of equilibration that was done to A */ +/* before calling this routine. This is needed to compute */ +/* the solution and error bounds correctly. */ +/* = 'N': No equilibration */ +/* = 'R': Row equilibration, i.e., A has been premultiplied by */ +/* diag(R). */ +/* = 'C': Column equilibration, i.e., A has been postmultiplied */ +/* by diag(C). */ +/* = 'B': Both row and column equilibration, i.e., A has been */ +/* replaced by diag(R) * A * diag(C). */ +/* The right hand side B has been changed accordingly. */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* KL (input) INTEGER */ +/* The number of subdiagonals within the band of A. KL >= 0. */ + +/* KU (input) INTEGER */ +/* The number of superdiagonals within the band of A. KU >= 0. */ + +/* NRHS (input) INTEGER */ +/* The number of right hand sides, i.e., the number of columns */ +/* of the matrices B and X. NRHS >= 0. */ + +/* AB (input) DOUBLE PRECISION array, dimension (LDAB,N) */ +/* The original band matrix A, stored in rows 1 to KL+KU+1. */ +/* The j-th column of A is stored in the j-th column of the */ +/* array AB as follows: */ +/* AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(n,j+kl). */ + +/* LDAB (input) INTEGER */ +/* The leading dimension of the array AB. LDAB >= KL+KU+1. */ + +/* AFB (input) DOUBLE PRECISION array, dimension (LDAFB,N) */ +/* Details of the LU factorization of the band matrix A, as */ +/* computed by DGBTRF. U is stored as an upper triangular band */ +/* matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, and */ +/* the multipliers used during the factorization are stored in */ +/* rows KL+KU+2 to 2*KL+KU+1. */ + +/* LDAFB (input) INTEGER */ +/* The leading dimension of the array AFB. LDAFB >= 2*KL*KU+1. */ + +/* IPIV (input) INTEGER array, dimension (N) */ +/* The pivot indices from DGETRF; for 1<=i<=N, row i of the */ +/* matrix was interchanged with row IPIV(i). */ + +/* R (input or output) DOUBLE PRECISION array, dimension (N) */ +/* The row scale factors for A. If EQUED = 'R' or 'B', A is */ +/* multiplied on the left by diag(R); if EQUED = 'N' or 'C', R */ +/* is not accessed. R is an input argument if FACT = 'F'; */ +/* otherwise, R is an output argument. If FACT = 'F' and */ +/* EQUED = 'R' or 'B', each element of R must be positive. */ +/* If R is output, each element of R is a power of the radix. */ +/* If R is input, each element of R should be a power of the radix */ +/* to ensure a reliable solution and error estimates. Scaling by */ +/* powers of the radix does not cause rounding errors unless the */ +/* result underflows or overflows. Rounding errors during scaling */ +/* lead to refining with a matrix that is not equivalent to the */ +/* input matrix, producing error estimates that may not be */ +/* reliable. */ + +/* C (input or output) DOUBLE PRECISION array, dimension (N) */ +/* The column scale factors for A. If EQUED = 'C' or 'B', A is */ +/* multiplied on the right by diag(C); if EQUED = 'N' or 'R', C */ +/* is not accessed. C is an input argument if FACT = 'F'; */ +/* otherwise, C is an output argument. If FACT = 'F' and */ +/* EQUED = 'C' or 'B', each element of C must be positive. */ +/* If C is output, each element of C is a power of the radix. */ +/* If C is input, each element of C should be a power of the radix */ +/* to ensure a reliable solution and error estimates. Scaling by */ +/* powers of the radix does not cause rounding errors unless the */ +/* result underflows or overflows. Rounding errors during scaling */ +/* lead to refining with a matrix that is not equivalent to the */ +/* input matrix, producing error estimates that may not be */ +/* reliable. */ + +/* B (input) DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* The right hand side matrix B. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the array B. LDB >= max(1,N). */ + +/* X (input/output) DOUBLE PRECISION array, dimension (LDX,NRHS) */ +/* On entry, the solution matrix X, as computed by DGETRS. */ +/* On exit, the improved solution matrix X. */ + +/* LDX (input) INTEGER */ +/* The leading dimension of the array X. LDX >= max(1,N). */ + +/* RCOND (output) DOUBLE PRECISION */ +/* Reciprocal scaled condition number. This is an estimate of the */ +/* reciprocal Skeel condition number of the matrix A after */ +/* equilibration (if done). If this is less than the machine */ +/* precision (in particular, if it is zero), the matrix is singular */ +/* to working precision. Note that the error may still be small even */ +/* if this number is very small and the matrix appears ill- */ +/* conditioned. */ + +/* BERR (output) DOUBLE PRECISION array, dimension (NRHS) */ +/* Componentwise relative backward error. This is the */ +/* componentwise relative backward error of each solution vector X(j) */ +/* (i.e., the smallest relative change in any element of A or B that */ +/* makes X(j) an exact solution). */ + +/* N_ERR_BNDS (input) INTEGER */ +/* Number of error bounds to return for each right hand side */ +/* and each type (normwise or componentwise). See ERR_BNDS_NORM and */ +/* ERR_BNDS_COMP below. */ + +/* ERR_BNDS_NORM (output) DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) */ +/* For each right-hand side, this array contains information about */ +/* various error bounds and condition numbers corresponding to the */ +/* normwise relative error, which is defined as follows: */ + +/* Normwise relative error in the ith solution vector: */ +/* max_j (abs(XTRUE(j,i) - X(j,i))) */ +/* ------------------------------ */ +/* max_j abs(X(j,i)) */ + +/* The array is indexed by the type of error information as described */ +/* below. There currently are up to three pieces of information */ +/* returned. */ + +/* The first index in ERR_BNDS_NORM(i,:) corresponds to the ith */ +/* right-hand side. */ + +/* The second index in ERR_BNDS_NORM(:,err) contains the following */ +/* three fields: */ +/* err = 1 "Trust/don't trust" boolean. Trust the answer if the */ +/* reciprocal condition number is less than the threshold */ +/* sqrt(n) * dlamch('Epsilon'). */ + +/* err = 2 "Guaranteed" error bound: The estimated forward error, */ +/* almost certainly within a factor of 10 of the true error */ +/* so long as the next entry is greater than the threshold */ +/* sqrt(n) * dlamch('Epsilon'). This error bound should only */ +/* be trusted if the previous boolean is true. */ + +/* err = 3 Reciprocal condition number: Estimated normwise */ +/* reciprocal condition number. Compared with the threshold */ +/* sqrt(n) * dlamch('Epsilon') to determine if the error */ +/* estimate is "guaranteed". These reciprocal condition */ +/* numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some */ +/* appropriately scaled matrix Z. */ +/* Let Z = S*A, where S scales each row by a power of the */ +/* radix so all absolute row sums of Z are approximately 1. */ + +/* See Lapack Working Note 165 for further details and extra */ +/* cautions. */ + +/* ERR_BNDS_COMP (output) DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) */ +/* For each right-hand side, this array contains information about */ +/* various error bounds and condition numbers corresponding to the */ +/* componentwise relative error, which is defined as follows: */ + +/* Componentwise relative error in the ith solution vector: */ +/* abs(XTRUE(j,i) - X(j,i)) */ +/* max_j ---------------------- */ +/* abs(X(j,i)) */ + +/* The array is indexed by the right-hand side i (on which the */ +/* componentwise relative error depends), and the type of error */ +/* information as described below. There currently are up to three */ +/* pieces of information returned for each right-hand side. If */ +/* componentwise accuracy is not requested (PARAMS(3) = 0.0), then */ +/* ERR_BNDS_COMP is not accessed. If N_ERR_BNDS .LT. 3, then at most */ +/* the first (:,N_ERR_BNDS) entries are returned. */ + +/* The first index in ERR_BNDS_COMP(i,:) corresponds to the ith */ +/* right-hand side. */ + +/* The second index in ERR_BNDS_COMP(:,err) contains the following */ +/* three fields: */ +/* err = 1 "Trust/don't trust" boolean. Trust the answer if the */ +/* reciprocal condition number is less than the threshold */ +/* sqrt(n) * dlamch('Epsilon'). */ + +/* err = 2 "Guaranteed" error bound: The estimated forward error, */ +/* almost certainly within a factor of 10 of the true error */ +/* so long as the next entry is greater than the threshold */ +/* sqrt(n) * dlamch('Epsilon'). This error bound should only */ +/* be trusted if the previous boolean is true. */ + +/* err = 3 Reciprocal condition number: Estimated componentwise */ +/* reciprocal condition number. Compared with the threshold */ +/* sqrt(n) * dlamch('Epsilon') to determine if the error */ +/* estimate is "guaranteed". These reciprocal condition */ +/* numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some */ +/* appropriately scaled matrix Z. */ +/* Let Z = S*(A*diag(x)), where x is the solution for the */ +/* current right-hand side and S scales each row of */ +/* A*diag(x) by a power of the radix so all absolute row */ +/* sums of Z are approximately 1. */ + +/* See Lapack Working Note 165 for further details and extra */ +/* cautions. */ + +/* NPARAMS (input) INTEGER */ +/* Specifies the number of parameters set in PARAMS. If .LE. 0, the */ +/* PARAMS array is never referenced and default values are used. */ + +/* PARAMS (input / output) DOUBLE PRECISION array, dimension NPARAMS */ +/* Specifies algorithm parameters. If an entry is .LT. 0.0, then */ +/* that entry will be filled with default value used for that */ +/* parameter. Only positions up to NPARAMS are accessed; defaults */ +/* are used for higher-numbered parameters. */ + +/* PARAMS(LA_LINRX_ITREF_I = 1) : Whether to perform iterative */ +/* refinement or not. */ +/* Default: 1.0D+0 */ +/* = 0.0 : No refinement is performed, and no error bounds are */ +/* computed. */ +/* = 1.0 : Use the double-precision refinement algorithm, */ +/* possibly with doubled-single computations if the */ +/* compilation environment does not support DOUBLE */ +/* PRECISION. */ +/* (other values are reserved for future use) */ + +/* PARAMS(LA_LINRX_ITHRESH_I = 2) : Maximum number of residual */ +/* computations allowed for refinement. */ +/* Default: 10 */ +/* Aggressive: Set to 100 to permit convergence using approximate */ +/* factorizations or factorizations other than LU. If */ +/* the factorization uses a technique other than */ +/* Gaussian elimination, the guarantees in */ +/* err_bnds_norm and err_bnds_comp may no longer be */ +/* trustworthy. */ + +/* PARAMS(LA_LINRX_CWISE_I = 3) : Flag determining if the code */ +/* will attempt to find a solution with small componentwise */ +/* relative error in the double-precision algorithm. Positive */ +/* is true, 0.0 is false. */ +/* Default: 1.0 (attempt componentwise convergence) */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (4*N) */ + +/* IWORK (workspace) INTEGER array, dimension (N) */ + +/* INFO (output) INTEGER */ +/* = 0: Successful exit. The solution to every right-hand side is */ +/* guaranteed. */ +/* < 0: If INFO = -i, the i-th argument had an illegal value */ +/* > 0 and <= N: U(INFO,INFO) is exactly zero. The factorization */ +/* has been completed, but the factor U is exactly singular, so */ +/* the solution and error bounds could not be computed. RCOND = 0 */ +/* is returned. */ +/* = N+J: The solution corresponding to the Jth right-hand side is */ +/* not guaranteed. The solutions corresponding to other right- */ +/* hand sides K with K > J may not be guaranteed as well, but */ +/* only the first such right-hand side is reported. If a small */ +/* componentwise error is not requested (PARAMS(3) = 0.0) then */ +/* the Jth right-hand side is the first with a normwise error */ +/* bound that is not guaranteed (the smallest J such */ +/* that ERR_BNDS_NORM(J,1) = 0.0). By default (PARAMS(3) = 1.0) */ +/* the Jth right-hand side is the first with either a normwise or */ +/* componentwise error bound that is not guaranteed (the smallest */ +/* J such that either ERR_BNDS_NORM(J,1) = 0.0 or */ +/* ERR_BNDS_COMP(J,1) = 0.0). See the definition of */ +/* ERR_BNDS_NORM(:,1) and ERR_BNDS_COMP(:,1). To get information */ +/* about all of the right-hand sides check ERR_BNDS_NORM or */ +/* ERR_BNDS_COMP. */ + +/* ================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Check the input parameters. */ + + /* Parameter adjustments */ + err_bnds_comp_dim1 = *nrhs; + err_bnds_comp_offset = 1 + err_bnds_comp_dim1; + err_bnds_comp__ -= err_bnds_comp_offset; + err_bnds_norm_dim1 = *nrhs; + err_bnds_norm_offset = 1 + err_bnds_norm_dim1; + err_bnds_norm__ -= err_bnds_norm_offset; + ab_dim1 = *ldab; + ab_offset = 1 + ab_dim1; + ab -= ab_offset; + afb_dim1 = *ldafb; + afb_offset = 1 + afb_dim1; + afb -= afb_offset; + --ipiv; + --r__; + --c__; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + x_dim1 = *ldx; + x_offset = 1 + x_dim1; + x -= x_offset; + --berr; + --params; + --work; + --iwork; + + /* Function Body */ + *info = 0; + trans_type__ = _starpu_ilatrans_(trans); + ref_type__ = 1; + if (*nparams >= 1) { + if (params[1] < 0.) { + params[1] = 1.; + } else { + ref_type__ = (integer) params[1]; + } + } + +/* Set default parameters. */ + + illrcond_thresh__ = (doublereal) (*n) * _starpu_dlamch_("Epsilon"); + ithresh = 10; + rthresh = .5; + unstable_thresh__ = .25; + ignore_cwise__ = FALSE_; + + if (*nparams >= 2) { + if (params[2] < 0.) { + params[2] = (doublereal) ithresh; + } else { + ithresh = (integer) params[2]; + } + } + if (*nparams >= 3) { + if (params[3] < 0.) { + if (ignore_cwise__) { + params[3] = 0.; + } else { + params[3] = 1.; + } + } else { + ignore_cwise__ = params[3] == 0.; + } + } + if (ref_type__ == 0 || *n_err_bnds__ == 0) { + n_norms__ = 0; + } else if (ignore_cwise__) { + n_norms__ = 1; + } else { + n_norms__ = 2; + } + + notran = _starpu_lsame_(trans, "N"); + rowequ = _starpu_lsame_(equed, "R") || _starpu_lsame_(equed, "B"); + colequ = _starpu_lsame_(equed, "C") || _starpu_lsame_(equed, "B"); + +/* Test input parameters. */ + + if (trans_type__ == -1) { + *info = -1; + } else if (! rowequ && ! colequ && ! _starpu_lsame_(equed, "N")) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*kl < 0) { + *info = -4; + } else if (*ku < 0) { + *info = -5; + } else if (*nrhs < 0) { + *info = -6; + } else if (*ldab < *kl + *ku + 1) { + *info = -8; + } else if (*ldafb < (*kl << 1) + *ku + 1) { + *info = -10; + } else if (*ldb < max(1,*n)) { + *info = -13; + } else if (*ldx < max(1,*n)) { + *info = -15; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DGBRFSX", &i__1); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0 || *nrhs == 0) { + *rcond = 1.; + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + berr[j] = 0.; + if (*n_err_bnds__ >= 1) { + err_bnds_norm__[j + err_bnds_norm_dim1] = 1.; + err_bnds_comp__[j + err_bnds_comp_dim1] = 1.; + } else if (*n_err_bnds__ >= 2) { + err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] = 0.; + err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] = 0.; + } else if (*n_err_bnds__ >= 3) { + err_bnds_norm__[j + err_bnds_norm_dim1 * 3] = 1.; + err_bnds_comp__[j + err_bnds_comp_dim1 * 3] = 1.; + } + } + return 0; + } + +/* Default to failure. */ + + *rcond = 0.; + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + berr[j] = 1.; + if (*n_err_bnds__ >= 1) { + err_bnds_norm__[j + err_bnds_norm_dim1] = 1.; + err_bnds_comp__[j + err_bnds_comp_dim1] = 1.; + } else if (*n_err_bnds__ >= 2) { + err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] = 1.; + err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] = 1.; + } else if (*n_err_bnds__ >= 3) { + err_bnds_norm__[j + err_bnds_norm_dim1 * 3] = 0.; + err_bnds_comp__[j + err_bnds_comp_dim1 * 3] = 0.; + } + } + +/* Compute the norm of A and the reciprocal of the condition */ +/* number of A. */ + + if (notran) { + *(unsigned char *)norm = 'I'; + } else { + *(unsigned char *)norm = '1'; + } + anorm = _starpu_dlangb_(norm, n, kl, ku, &ab[ab_offset], ldab, &work[1]); + _starpu_dgbcon_(norm, n, kl, ku, &afb[afb_offset], ldafb, &ipiv[1], &anorm, rcond, + &work[1], &iwork[1], info); + +/* Perform refinement on each right-hand side */ + + if (ref_type__ != 0) { + prec_type__ = _starpu_ilaprec_("E"); + if (notran) { + _starpu_dla_gbrfsx_extended__(&prec_type__, &trans_type__, n, kl, ku, + nrhs, &ab[ab_offset], ldab, &afb[afb_offset], ldafb, & + ipiv[1], &colequ, &c__[1], &b[b_offset], ldb, &x[x_offset] + , ldx, &berr[1], &n_norms__, &err_bnds_norm__[ + err_bnds_norm_offset], &err_bnds_comp__[ + err_bnds_comp_offset], &work[*n + 1], &work[1], &work[(*n + << 1) + 1], &work[1], rcond, &ithresh, &rthresh, & + unstable_thresh__, &ignore_cwise__, info); + } else { + _starpu_dla_gbrfsx_extended__(&prec_type__, &trans_type__, n, kl, ku, + nrhs, &ab[ab_offset], ldab, &afb[afb_offset], ldafb, & + ipiv[1], &rowequ, &r__[1], &b[b_offset], ldb, &x[x_offset] + , ldx, &berr[1], &n_norms__, &err_bnds_norm__[ + err_bnds_norm_offset], &err_bnds_comp__[ + err_bnds_comp_offset], &work[*n + 1], &work[1], &work[(*n + << 1) + 1], &work[1], rcond, &ithresh, &rthresh, & + unstable_thresh__, &ignore_cwise__, info); + } + } +/* Computing MAX */ + d__1 = 10., d__2 = sqrt((doublereal) (*n)); + err_lbnd__ = max(d__1,d__2) * _starpu_dlamch_("Epsilon"); + if (*n_err_bnds__ >= 1 && n_norms__ >= 1) { + +/* Compute scaled normwise condition number cond(A*C). */ + + if (colequ && notran) { + rcond_tmp__ = _starpu_dla_gbrcond__(trans, n, kl, ku, &ab[ab_offset], + ldab, &afb[afb_offset], ldafb, &ipiv[1], &c_n1, &c__[1], + info, &work[1], &iwork[1], (ftnlen)1); + } else if (rowequ && ! notran) { + rcond_tmp__ = _starpu_dla_gbrcond__(trans, n, kl, ku, &ab[ab_offset], + ldab, &afb[afb_offset], ldafb, &ipiv[1], &c_n1, &r__[1], + info, &work[1], &iwork[1], (ftnlen)1); + } else { + rcond_tmp__ = _starpu_dla_gbrcond__(trans, n, kl, ku, &ab[ab_offset], + ldab, &afb[afb_offset], ldafb, &ipiv[1], &c__0, &r__[1], + info, &work[1], &iwork[1], (ftnlen)1); + } + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + +/* Cap the error at 1.0. */ + + if (*n_err_bnds__ >= 2 && err_bnds_norm__[j + (err_bnds_norm_dim1 + << 1)] > 1.) { + err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] = 1.; + } + +/* Threshold the error (see LAWN). */ + + if (rcond_tmp__ < illrcond_thresh__) { + err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] = 1.; + err_bnds_norm__[j + err_bnds_norm_dim1] = 0.; + if (*info <= *n) { + *info = *n + j; + } + } else if (err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] < + err_lbnd__) { + err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] = err_lbnd__; + err_bnds_norm__[j + err_bnds_norm_dim1] = 1.; + } + +/* Save the condition number. */ + + if (*n_err_bnds__ >= 3) { + err_bnds_norm__[j + err_bnds_norm_dim1 * 3] = rcond_tmp__; + } + } + } + if (*n_err_bnds__ >= 1 && n_norms__ >= 2) { + +/* Compute componentwise condition number cond(A*diag(Y(:,J))) for */ +/* each right-hand side using the current solution as an estimate of */ +/* the true solution. If the componentwise error estimate is too */ +/* large, then the solution is a lousy estimate of truth and the */ +/* estimated RCOND may be too optimistic. To avoid misleading users, */ +/* the inverse condition number is set to 0.0 when the estimated */ +/* cwise error is at least CWISE_WRONG. */ + + cwise_wrong__ = sqrt(_starpu_dlamch_("Epsilon")); + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + if (err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] < + cwise_wrong__) { + rcond_tmp__ = _starpu_dla_gbrcond__(trans, n, kl, ku, &ab[ab_offset], + ldab, &afb[afb_offset], ldafb, &ipiv[1], &c__1, &x[j * + x_dim1 + 1], info, &work[1], &iwork[1], (ftnlen)1); + } else { + rcond_tmp__ = 0.; + } + +/* Cap the error at 1.0. */ + + if (*n_err_bnds__ >= 2 && err_bnds_comp__[j + (err_bnds_comp_dim1 + << 1)] > 1.) { + err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] = 1.; + } + +/* Threshold the error (see LAWN). */ + + if (rcond_tmp__ < illrcond_thresh__) { + err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] = 1.; + err_bnds_comp__[j + err_bnds_comp_dim1] = 0.; + if (params[3] == 1. && *info < *n + j) { + *info = *n + j; + } + } else if (err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] < + err_lbnd__) { + err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] = err_lbnd__; + err_bnds_comp__[j + err_bnds_comp_dim1] = 1.; + } + +/* Save the condition number. */ + + if (*n_err_bnds__ >= 3) { + err_bnds_comp__[j + err_bnds_comp_dim1 * 3] = rcond_tmp__; + } + } + } + + return 0; + +/* End of DGBRFSX */ + +} /* _starpu_dgbrfsx_ */ diff --git a/min-dgels/base/SRC/dgbsv.c b/min-dgels/base/SRC/dgbsv.c new file mode 100644 index 0000000..cc94129 --- /dev/null +++ b/min-dgels/base/SRC/dgbsv.c @@ -0,0 +1,176 @@ +/* dgbsv.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dgbsv_(integer *n, integer *kl, integer *ku, integer * + nrhs, doublereal *ab, integer *ldab, integer *ipiv, doublereal *b, + integer *ldb, integer *info) +{ + /* System generated locals */ + integer ab_dim1, ab_offset, b_dim1, b_offset, i__1; + + /* Local variables */ + extern /* Subroutine */ int _starpu_dgbtrf_(integer *, integer *, integer *, + integer *, doublereal *, integer *, integer *, integer *), + _starpu_xerbla_(char *, integer *), _starpu_dgbtrs_(char *, integer *, + integer *, integer *, integer *, doublereal *, integer *, integer + *, doublereal *, integer *, integer *); + + +/* -- LAPACK driver routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DGBSV computes the solution to a real system of linear equations */ +/* A * X = B, where A is a band matrix of order N with KL subdiagonals */ +/* and KU superdiagonals, and X and B are N-by-NRHS matrices. */ + +/* The LU decomposition with partial pivoting and row interchanges is */ +/* used to factor A as A = L * U, where L is a product of permutation */ +/* and unit lower triangular matrices with KL subdiagonals, and U is */ +/* upper triangular with KL+KU superdiagonals. The factored form of A */ +/* is then used to solve the system of equations A * X = B. */ + +/* Arguments */ +/* ========= */ + +/* N (input) INTEGER */ +/* The number of linear equations, i.e., the order of the */ +/* matrix A. N >= 0. */ + +/* KL (input) INTEGER */ +/* The number of subdiagonals within the band of A. KL >= 0. */ + +/* KU (input) INTEGER */ +/* The number of superdiagonals within the band of A. KU >= 0. */ + +/* NRHS (input) INTEGER */ +/* The number of right hand sides, i.e., the number of columns */ +/* of the matrix B. NRHS >= 0. */ + +/* AB (input/output) DOUBLE PRECISION array, dimension (LDAB,N) */ +/* On entry, the matrix A in band storage, in rows KL+1 to */ +/* 2*KL+KU+1; rows 1 to KL of the array need not be set. */ +/* The j-th column of A is stored in the j-th column of the */ +/* array AB as follows: */ +/* AB(KL+KU+1+i-j,j) = A(i,j) for max(1,j-KU)<=i<=min(N,j+KL) */ +/* On exit, details of the factorization: U is stored as an */ +/* upper triangular band matrix with KL+KU superdiagonals in */ +/* rows 1 to KL+KU+1, and the multipliers used during the */ +/* factorization are stored in rows KL+KU+2 to 2*KL+KU+1. */ +/* See below for further details. */ + +/* LDAB (input) INTEGER */ +/* The leading dimension of the array AB. LDAB >= 2*KL+KU+1. */ + +/* IPIV (output) INTEGER array, dimension (N) */ +/* The pivot indices that define the permutation matrix P; */ +/* row i of the matrix was interchanged with row IPIV(i). */ + +/* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* On entry, the N-by-NRHS right hand side matrix B. */ +/* On exit, if INFO = 0, the N-by-NRHS solution matrix X. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the array B. LDB >= max(1,N). */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > 0: if INFO = i, U(i,i) is exactly zero. The factorization */ +/* has been completed, but the factor U is exactly */ +/* singular, and the solution has not been computed. */ + +/* Further Details */ +/* =============== */ + +/* The band storage scheme is illustrated by the following example, when */ +/* M = N = 6, KL = 2, KU = 1: */ + +/* On entry: On exit: */ + +/* * * * + + + * * * u14 u25 u36 */ +/* * * + + + + * * u13 u24 u35 u46 */ +/* * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 */ +/* a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 */ +/* a21 a32 a43 a54 a65 * m21 m32 m43 m54 m65 * */ +/* a31 a42 a53 a64 * * m31 m42 m53 m64 * * */ + +/* Array elements marked * are not used by the routine; elements marked */ +/* + need not be set on entry, but are required by the routine to store */ +/* elements of U because of fill-in resulting from the row interchanges. */ + +/* ===================================================================== */ + +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + ab_dim1 = *ldab; + ab_offset = 1 + ab_dim1; + ab -= ab_offset; + --ipiv; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + *info = 0; + if (*n < 0) { + *info = -1; + } else if (*kl < 0) { + *info = -2; + } else if (*ku < 0) { + *info = -3; + } else if (*nrhs < 0) { + *info = -4; + } else if (*ldab < (*kl << 1) + *ku + 1) { + *info = -6; + } else if (*ldb < max(*n,1)) { + *info = -9; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DGBSV ", &i__1); + return 0; + } + +/* Compute the LU factorization of the band matrix A. */ + + _starpu_dgbtrf_(n, n, kl, ku, &ab[ab_offset], ldab, &ipiv[1], info); + if (*info == 0) { + +/* Solve the system A*X = B, overwriting B with X. */ + + _starpu_dgbtrs_("No transpose", n, kl, ku, nrhs, &ab[ab_offset], ldab, &ipiv[ + 1], &b[b_offset], ldb, info); + } + return 0; + +/* End of DGBSV */ + +} /* _starpu_dgbsv_ */ diff --git a/min-dgels/base/SRC/dgbsvx.c b/min-dgels/base/SRC/dgbsvx.c new file mode 100644 index 0000000..33ca734 --- /dev/null +++ b/min-dgels/base/SRC/dgbsvx.c @@ -0,0 +1,650 @@ +/* dgbsvx.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +/* Subroutine */ int _starpu_dgbsvx_(char *fact, char *trans, integer *n, integer *kl, + integer *ku, integer *nrhs, doublereal *ab, integer *ldab, + doublereal *afb, integer *ldafb, integer *ipiv, char *equed, + doublereal *r__, doublereal *c__, doublereal *b, integer *ldb, + doublereal *x, integer *ldx, doublereal *rcond, doublereal *ferr, + doublereal *berr, doublereal *work, integer *iwork, integer *info) +{ + /* System generated locals */ + integer ab_dim1, ab_offset, afb_dim1, afb_offset, b_dim1, b_offset, + x_dim1, x_offset, i__1, i__2, i__3, i__4, i__5; + doublereal d__1, d__2, d__3; + + /* Local variables */ + integer i__, j, j1, j2; + doublereal amax; + char norm[1]; + extern logical _starpu_lsame_(char *, char *); + doublereal rcmin, rcmax, anorm; + extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *); + logical equil; + extern doublereal _starpu_dlangb_(char *, integer *, integer *, integer *, + doublereal *, integer *, doublereal *), _starpu_dlamch_(char *); + extern /* Subroutine */ int _starpu_dlaqgb_(integer *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, char *), + _starpu_dgbcon_(char *, integer *, integer *, integer *, doublereal *, + integer *, integer *, doublereal *, doublereal *, doublereal *, + integer *, integer *); + doublereal colcnd; + extern doublereal _starpu_dlantb_(char *, char *, char *, integer *, integer *, + doublereal *, integer *, doublereal *); + extern /* Subroutine */ int _starpu_dgbequ_(integer *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, integer *), _starpu_dgbrfs_( + char *, integer *, integer *, integer *, integer *, doublereal *, + integer *, doublereal *, integer *, integer *, doublereal *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + doublereal *, integer *, integer *), _starpu_dgbtrf_(integer *, + integer *, integer *, integer *, doublereal *, integer *, integer + *, integer *); + logical nofact; + extern /* Subroutine */ int _starpu_dlacpy_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *), + _starpu_xerbla_(char *, integer *); + doublereal bignum; + extern /* Subroutine */ int _starpu_dgbtrs_(char *, integer *, integer *, integer + *, integer *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *); + integer infequ; + logical colequ; + doublereal rowcnd; + logical notran; + doublereal smlnum; + logical rowequ; + doublereal rpvgrw; + + +/* -- LAPACK driver routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DGBSVX uses the LU factorization to compute the solution to a real */ +/* system of linear equations A * X = B, A**T * X = B, or A**H * X = B, */ +/* where A is a band matrix of order N with KL subdiagonals and KU */ +/* superdiagonals, and X and B are N-by-NRHS matrices. */ + +/* Error bounds on the solution and a condition estimate are also */ +/* provided. */ + +/* Description */ +/* =========== */ + +/* The following steps are performed by this subroutine: */ + +/* 1. If FACT = 'E', real scaling factors are computed to equilibrate */ +/* the system: */ +/* TRANS = 'N': diag(R)*A*diag(C) *inv(diag(C))*X = diag(R)*B */ +/* TRANS = 'T': (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B */ +/* TRANS = 'C': (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B */ +/* Whether or not the system will be equilibrated depends on the */ +/* scaling of the matrix A, but if equilibration is used, A is */ +/* overwritten by diag(R)*A*diag(C) and B by diag(R)*B (if TRANS='N') */ +/* or diag(C)*B (if TRANS = 'T' or 'C'). */ + +/* 2. If FACT = 'N' or 'E', the LU decomposition is used to factor the */ +/* matrix A (after equilibration if FACT = 'E') as */ +/* A = L * U, */ +/* where L is a product of permutation and unit lower triangular */ +/* matrices with KL subdiagonals, and U is upper triangular with */ +/* KL+KU superdiagonals. */ + +/* 3. If some U(i,i)=0, so that U is exactly singular, then the routine */ +/* returns with INFO = i. Otherwise, the factored form of A is used */ +/* to estimate the condition number of the matrix A. If the */ +/* reciprocal of the condition number is less than machine precision, */ +/* INFO = N+1 is returned as a warning, but the routine still goes on */ +/* to solve for X and compute error bounds as described below. */ + +/* 4. The system of equations is solved for X using the factored form */ +/* of A. */ + +/* 5. Iterative refinement is applied to improve the computed solution */ +/* matrix and calculate error bounds and backward error estimates */ +/* for it. */ + +/* 6. If equilibration was used, the matrix X is premultiplied by */ +/* diag(C) (if TRANS = 'N') or diag(R) (if TRANS = 'T' or 'C') so */ +/* that it solves the original system before equilibration. */ + +/* Arguments */ +/* ========= */ + +/* FACT (input) CHARACTER*1 */ +/* Specifies whether or not the factored form of the matrix A is */ +/* supplied on entry, and if not, whether the matrix A should be */ +/* equilibrated before it is factored. */ +/* = 'F': On entry, AFB and IPIV contain the factored form of */ +/* A. If EQUED is not 'N', the matrix A has been */ +/* equilibrated with scaling factors given by R and C. */ +/* AB, AFB, and IPIV are not modified. */ +/* = 'N': The matrix A will be copied to AFB and factored. */ +/* = 'E': The matrix A will be equilibrated if necessary, then */ +/* copied to AFB and factored. */ + +/* TRANS (input) CHARACTER*1 */ +/* Specifies the form of the system of equations. */ +/* = 'N': A * X = B (No transpose) */ +/* = 'T': A**T * X = B (Transpose) */ +/* = 'C': A**H * X = B (Transpose) */ + +/* N (input) INTEGER */ +/* The number of linear equations, i.e., the order of the */ +/* matrix A. N >= 0. */ + +/* KL (input) INTEGER */ +/* The number of subdiagonals within the band of A. KL >= 0. */ + +/* KU (input) INTEGER */ +/* The number of superdiagonals within the band of A. KU >= 0. */ + +/* NRHS (input) INTEGER */ +/* The number of right hand sides, i.e., the number of columns */ +/* of the matrices B and X. NRHS >= 0. */ + +/* AB (input/output) DOUBLE PRECISION array, dimension (LDAB,N) */ +/* On entry, the matrix A in band storage, in rows 1 to KL+KU+1. */ +/* The j-th column of A is stored in the j-th column of the */ +/* array AB as follows: */ +/* AB(KU+1+i-j,j) = A(i,j) for max(1,j-KU)<=i<=min(N,j+kl) */ + +/* If FACT = 'F' and EQUED is not 'N', then A must have been */ +/* equilibrated by the scaling factors in R and/or C. AB is not */ +/* modified if FACT = 'F' or 'N', or if FACT = 'E' and */ +/* EQUED = 'N' on exit. */ + +/* On exit, if EQUED .ne. 'N', A is scaled as follows: */ +/* EQUED = 'R': A := diag(R) * A */ +/* EQUED = 'C': A := A * diag(C) */ +/* EQUED = 'B': A := diag(R) * A * diag(C). */ + +/* LDAB (input) INTEGER */ +/* The leading dimension of the array AB. LDAB >= KL+KU+1. */ + +/* AFB (input or output) DOUBLE PRECISION array, dimension (LDAFB,N) */ +/* If FACT = 'F', then AFB is an input argument and on entry */ +/* contains details of the LU factorization of the band matrix */ +/* A, as computed by DGBTRF. U is stored as an upper triangular */ +/* band matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, */ +/* and the multipliers used during the factorization are stored */ +/* in rows KL+KU+2 to 2*KL+KU+1. If EQUED .ne. 'N', then AFB is */ +/* the factored form of the equilibrated matrix A. */ + +/* If FACT = 'N', then AFB is an output argument and on exit */ +/* returns details of the LU factorization of A. */ + +/* If FACT = 'E', then AFB is an output argument and on exit */ +/* returns details of the LU factorization of the equilibrated */ +/* matrix A (see the description of AB for the form of the */ +/* equilibrated matrix). */ + +/* LDAFB (input) INTEGER */ +/* The leading dimension of the array AFB. LDAFB >= 2*KL+KU+1. */ + +/* IPIV (input or output) INTEGER array, dimension (N) */ +/* If FACT = 'F', then IPIV is an input argument and on entry */ +/* contains the pivot indices from the factorization A = L*U */ +/* as computed by DGBTRF; row i of the matrix was interchanged */ +/* with row IPIV(i). */ + +/* If FACT = 'N', then IPIV is an output argument and on exit */ +/* contains the pivot indices from the factorization A = L*U */ +/* of the original matrix A. */ + +/* If FACT = 'E', then IPIV is an output argument and on exit */ +/* contains the pivot indices from the factorization A = L*U */ +/* of the equilibrated matrix A. */ + +/* EQUED (input or output) CHARACTER*1 */ +/* Specifies the form of equilibration that was done. */ +/* = 'N': No equilibration (always true if FACT = 'N'). */ +/* = 'R': Row equilibration, i.e., A has been premultiplied by */ +/* diag(R). */ +/* = 'C': Column equilibration, i.e., A has been postmultiplied */ +/* by diag(C). */ +/* = 'B': Both row and column equilibration, i.e., A has been */ +/* replaced by diag(R) * A * diag(C). */ +/* EQUED is an input argument if FACT = 'F'; otherwise, it is an */ +/* output argument. */ + +/* R (input or output) DOUBLE PRECISION array, dimension (N) */ +/* The row scale factors for A. If EQUED = 'R' or 'B', A is */ +/* multiplied on the left by diag(R); if EQUED = 'N' or 'C', R */ +/* is not accessed. R is an input argument if FACT = 'F'; */ +/* otherwise, R is an output argument. If FACT = 'F' and */ +/* EQUED = 'R' or 'B', each element of R must be positive. */ + +/* C (input or output) DOUBLE PRECISION array, dimension (N) */ +/* The column scale factors for A. If EQUED = 'C' or 'B', A is */ +/* multiplied on the right by diag(C); if EQUED = 'N' or 'R', C */ +/* is not accessed. C is an input argument if FACT = 'F'; */ +/* otherwise, C is an output argument. If FACT = 'F' and */ +/* EQUED = 'C' or 'B', each element of C must be positive. */ + +/* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* On entry, the right hand side matrix B. */ +/* On exit, */ +/* if EQUED = 'N', B is not modified; */ +/* if TRANS = 'N' and EQUED = 'R' or 'B', B is overwritten by */ +/* diag(R)*B; */ +/* if TRANS = 'T' or 'C' and EQUED = 'C' or 'B', B is */ +/* overwritten by diag(C)*B. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the array B. LDB >= max(1,N). */ + +/* X (output) DOUBLE PRECISION array, dimension (LDX,NRHS) */ +/* If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X */ +/* to the original system of equations. Note that A and B are */ +/* modified on exit if EQUED .ne. 'N', and the solution to the */ +/* equilibrated system is inv(diag(C))*X if TRANS = 'N' and */ +/* EQUED = 'C' or 'B', or inv(diag(R))*X if TRANS = 'T' or 'C' */ +/* and EQUED = 'R' or 'B'. */ + +/* LDX (input) INTEGER */ +/* The leading dimension of the array X. LDX >= max(1,N). */ + +/* RCOND (output) DOUBLE PRECISION */ +/* The estimate of the reciprocal condition number of the matrix */ +/* A after equilibration (if done). If RCOND is less than the */ +/* machine precision (in particular, if RCOND = 0), the matrix */ +/* is singular to working precision. This condition is */ +/* indicated by a return code of INFO > 0. */ + +/* FERR (output) DOUBLE PRECISION array, dimension (NRHS) */ +/* The estimated forward error bound for each solution vector */ +/* X(j) (the j-th column of the solution matrix X). */ +/* If XTRUE is the true solution corresponding to X(j), FERR(j) */ +/* is an estimated upper bound for the magnitude of the largest */ +/* element in (X(j) - XTRUE) divided by the magnitude of the */ +/* largest element in X(j). The estimate is as reliable as */ +/* the estimate for RCOND, and is almost always a slight */ +/* overestimate of the true error. */ + +/* BERR (output) DOUBLE PRECISION array, dimension (NRHS) */ +/* The componentwise relative backward error of each solution */ +/* vector X(j) (i.e., the smallest relative change in */ +/* any element of A or B that makes X(j) an exact solution). */ + +/* WORK (workspace/output) DOUBLE PRECISION array, dimension (3*N) */ +/* On exit, WORK(1) contains the reciprocal pivot growth */ +/* factor norm(A)/norm(U). The "max absolute element" norm is */ +/* used. If WORK(1) is much less than 1, then the stability */ +/* of the LU factorization of the (equilibrated) matrix A */ +/* could be poor. This also means that the solution X, condition */ +/* estimator RCOND, and forward error bound FERR could be */ +/* unreliable. If factorization fails with 0 0: if INFO = i, and i is */ +/* <= N: U(i,i) is exactly zero. The factorization */ +/* has been completed, but the factor U is exactly */ +/* singular, so the solution and error bounds */ +/* could not be computed. RCOND = 0 is returned. */ +/* = N+1: U is nonsingular, but RCOND is less than machine */ +/* precision, meaning that the matrix is singular */ +/* to working precision. Nevertheless, the */ +/* solution and error bounds are computed because */ +/* there are a number of situations where the */ +/* computed solution can be more accurate than the */ +/* value of RCOND would suggest. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + ab_dim1 = *ldab; + ab_offset = 1 + ab_dim1; + ab -= ab_offset; + afb_dim1 = *ldafb; + afb_offset = 1 + afb_dim1; + afb -= afb_offset; + --ipiv; + --r__; + --c__; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + x_dim1 = *ldx; + x_offset = 1 + x_dim1; + x -= x_offset; + --ferr; + --berr; + --work; + --iwork; + + /* Function Body */ + *info = 0; + nofact = _starpu_lsame_(fact, "N"); + equil = _starpu_lsame_(fact, "E"); + notran = _starpu_lsame_(trans, "N"); + if (nofact || equil) { + *(unsigned char *)equed = 'N'; + rowequ = FALSE_; + colequ = FALSE_; + } else { + rowequ = _starpu_lsame_(equed, "R") || _starpu_lsame_(equed, + "B"); + colequ = _starpu_lsame_(equed, "C") || _starpu_lsame_(equed, + "B"); + smlnum = _starpu_dlamch_("Safe minimum"); + bignum = 1. / smlnum; + } + +/* Test the input parameters. */ + + if (! nofact && ! equil && ! _starpu_lsame_(fact, "F")) { + *info = -1; + } else if (! notran && ! _starpu_lsame_(trans, "T") && ! + _starpu_lsame_(trans, "C")) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*kl < 0) { + *info = -4; + } else if (*ku < 0) { + *info = -5; + } else if (*nrhs < 0) { + *info = -6; + } else if (*ldab < *kl + *ku + 1) { + *info = -8; + } else if (*ldafb < (*kl << 1) + *ku + 1) { + *info = -10; + } else if (_starpu_lsame_(fact, "F") && ! (rowequ || colequ + || _starpu_lsame_(equed, "N"))) { + *info = -12; + } else { + if (rowequ) { + rcmin = bignum; + rcmax = 0.; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + d__1 = rcmin, d__2 = r__[j]; + rcmin = min(d__1,d__2); +/* Computing MAX */ + d__1 = rcmax, d__2 = r__[j]; + rcmax = max(d__1,d__2); +/* L10: */ + } + if (rcmin <= 0.) { + *info = -13; + } else if (*n > 0) { + rowcnd = max(rcmin,smlnum) / min(rcmax,bignum); + } else { + rowcnd = 1.; + } + } + if (colequ && *info == 0) { + rcmin = bignum; + rcmax = 0.; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + d__1 = rcmin, d__2 = c__[j]; + rcmin = min(d__1,d__2); +/* Computing MAX */ + d__1 = rcmax, d__2 = c__[j]; + rcmax = max(d__1,d__2); +/* L20: */ + } + if (rcmin <= 0.) { + *info = -14; + } else if (*n > 0) { + colcnd = max(rcmin,smlnum) / min(rcmax,bignum); + } else { + colcnd = 1.; + } + } + if (*info == 0) { + if (*ldb < max(1,*n)) { + *info = -16; + } else if (*ldx < max(1,*n)) { + *info = -18; + } + } + } + + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DGBSVX", &i__1); + return 0; + } + + if (equil) { + +/* Compute row and column scalings to equilibrate the matrix A. */ + + _starpu_dgbequ_(n, n, kl, ku, &ab[ab_offset], ldab, &r__[1], &c__[1], &rowcnd, + &colcnd, &amax, &infequ); + if (infequ == 0) { + +/* Equilibrate the matrix. */ + + _starpu_dlaqgb_(n, n, kl, ku, &ab[ab_offset], ldab, &r__[1], &c__[1], & + rowcnd, &colcnd, &amax, equed); + rowequ = _starpu_lsame_(equed, "R") || _starpu_lsame_(equed, + "B"); + colequ = _starpu_lsame_(equed, "C") || _starpu_lsame_(equed, + "B"); + } + } + +/* Scale the right hand side. */ + + if (notran) { + if (rowequ) { + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = r__[i__] * b[i__ + j * b_dim1]; +/* L30: */ + } +/* L40: */ + } + } + } else if (colequ) { + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = c__[i__] * b[i__ + j * b_dim1]; +/* L50: */ + } +/* L60: */ + } + } + + if (nofact || equil) { + +/* Compute the LU factorization of the band matrix A. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MAX */ + i__2 = j - *ku; + j1 = max(i__2,1); +/* Computing MIN */ + i__2 = j + *kl; + j2 = min(i__2,*n); + i__2 = j2 - j1 + 1; + _starpu_dcopy_(&i__2, &ab[*ku + 1 - j + j1 + j * ab_dim1], &c__1, &afb[* + kl + *ku + 1 - j + j1 + j * afb_dim1], &c__1); +/* L70: */ + } + + _starpu_dgbtrf_(n, n, kl, ku, &afb[afb_offset], ldafb, &ipiv[1], info); + +/* Return if INFO is non-zero. */ + + if (*info > 0) { + +/* Compute the reciprocal pivot growth factor of the */ +/* leading rank-deficient INFO columns of A. */ + + anorm = 0.; + i__1 = *info; + for (j = 1; j <= i__1; ++j) { +/* Computing MAX */ + i__2 = *ku + 2 - j; +/* Computing MIN */ + i__4 = *n + *ku + 1 - j, i__5 = *kl + *ku + 1; + i__3 = min(i__4,i__5); + for (i__ = max(i__2,1); i__ <= i__3; ++i__) { +/* Computing MAX */ + d__2 = anorm, d__3 = (d__1 = ab[i__ + j * ab_dim1], abs( + d__1)); + anorm = max(d__2,d__3); +/* L80: */ + } +/* L90: */ + } +/* Computing MIN */ + i__3 = *info - 1, i__2 = *kl + *ku; + i__1 = min(i__3,i__2); +/* Computing MAX */ + i__4 = 1, i__5 = *kl + *ku + 2 - *info; + rpvgrw = _starpu_dlantb_("M", "U", "N", info, &i__1, &afb[max(i__4, i__5) + + afb_dim1], ldafb, &work[1]); + if (rpvgrw == 0.) { + rpvgrw = 1.; + } else { + rpvgrw = anorm / rpvgrw; + } + work[1] = rpvgrw; + *rcond = 0.; + return 0; + } + } + +/* Compute the norm of the matrix A and the */ +/* reciprocal pivot growth factor RPVGRW. */ + + if (notran) { + *(unsigned char *)norm = '1'; + } else { + *(unsigned char *)norm = 'I'; + } + anorm = _starpu_dlangb_(norm, n, kl, ku, &ab[ab_offset], ldab, &work[1]); + i__1 = *kl + *ku; + rpvgrw = _starpu_dlantb_("M", "U", "N", n, &i__1, &afb[afb_offset], ldafb, &work[ + 1]); + if (rpvgrw == 0.) { + rpvgrw = 1.; + } else { + rpvgrw = _starpu_dlangb_("M", n, kl, ku, &ab[ab_offset], ldab, &work[1]) / rpvgrw; + } + +/* Compute the reciprocal of the condition number of A. */ + + _starpu_dgbcon_(norm, n, kl, ku, &afb[afb_offset], ldafb, &ipiv[1], &anorm, rcond, + &work[1], &iwork[1], info); + +/* Compute the solution matrix X. */ + + _starpu_dlacpy_("Full", n, nrhs, &b[b_offset], ldb, &x[x_offset], ldx); + _starpu_dgbtrs_(trans, n, kl, ku, nrhs, &afb[afb_offset], ldafb, &ipiv[1], &x[ + x_offset], ldx, info); + +/* Use iterative refinement to improve the computed solution and */ +/* compute error bounds and backward error estimates for it. */ + + _starpu_dgbrfs_(trans, n, kl, ku, nrhs, &ab[ab_offset], ldab, &afb[afb_offset], + ldafb, &ipiv[1], &b[b_offset], ldb, &x[x_offset], ldx, &ferr[1], & + berr[1], &work[1], &iwork[1], info); + +/* Transform the solution matrix X to a solution of the original */ +/* system. */ + + if (notran) { + if (colequ) { + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + i__3 = *n; + for (i__ = 1; i__ <= i__3; ++i__) { + x[i__ + j * x_dim1] = c__[i__] * x[i__ + j * x_dim1]; +/* L100: */ + } +/* L110: */ + } + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + ferr[j] /= colcnd; +/* L120: */ + } + } + } else if (rowequ) { + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + i__3 = *n; + for (i__ = 1; i__ <= i__3; ++i__) { + x[i__ + j * x_dim1] = r__[i__] * x[i__ + j * x_dim1]; +/* L130: */ + } +/* L140: */ + } + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + ferr[j] /= rowcnd; +/* L150: */ + } + } + +/* Set INFO = N+1 if the matrix is singular to working precision. */ + + if (*rcond < _starpu_dlamch_("Epsilon")) { + *info = *n + 1; + } + + work[1] = rpvgrw; + return 0; + +/* End of DGBSVX */ + +} /* _starpu_dgbsvx_ */ diff --git a/min-dgels/base/SRC/dgbsvxx.c b/min-dgels/base/SRC/dgbsvxx.c new file mode 100644 index 0000000..d345399 --- /dev/null +++ b/min-dgels/base/SRC/dgbsvxx.c @@ -0,0 +1,745 @@ +/* dgbsvxx.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dgbsvxx_(char *fact, char *trans, integer *n, integer * + kl, integer *ku, integer *nrhs, doublereal *ab, integer *ldab, + doublereal *afb, integer *ldafb, integer *ipiv, char *equed, + doublereal *r__, doublereal *c__, doublereal *b, integer *ldb, + doublereal *x, integer *ldx, doublereal *rcond, doublereal *rpvgrw, + doublereal *berr, integer *n_err_bnds__, doublereal *err_bnds_norm__, + doublereal *err_bnds_comp__, integer *nparams, doublereal *params, + doublereal *work, integer *iwork, integer *info) +{ + /* System generated locals */ + integer ab_dim1, ab_offset, afb_dim1, afb_offset, b_dim1, b_offset, + x_dim1, x_offset, err_bnds_norm_dim1, err_bnds_norm_offset, + err_bnds_comp_dim1, err_bnds_comp_offset, i__1, i__2; + doublereal d__1, d__2; + + /* Local variables */ + integer i__, j; + doublereal amax; + extern doublereal _starpu_dla_gbrpvgrw__(integer *, integer *, integer *, integer + *, doublereal *, integer *, doublereal *, integer *); + extern logical _starpu_lsame_(char *, char *); + doublereal rcmin, rcmax; + logical equil; + extern doublereal _starpu_dlamch_(char *); + extern /* Subroutine */ int _starpu_dlaqgb_(integer *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, char *); + doublereal colcnd; + extern /* Subroutine */ int _starpu_dgbtrf_(integer *, integer *, integer *, + integer *, doublereal *, integer *, integer *, integer *); + logical nofact; + extern /* Subroutine */ int _starpu_dlacpy_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *), + _starpu_xerbla_(char *, integer *); + doublereal bignum; + extern /* Subroutine */ int _starpu_dgbtrs_(char *, integer *, integer *, integer + *, integer *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *); + integer infequ; + logical colequ; + doublereal rowcnd; + logical notran; + doublereal smlnum; + logical rowequ; + extern /* Subroutine */ int _starpu_dlascl2_(integer *, integer *, doublereal *, + doublereal *, integer *), _starpu_dgbequb_(integer *, integer *, integer * +, integer *, doublereal *, integer *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, integer *), _starpu_dgbrfsx_( + char *, char *, integer *, integer *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *, integer *, + doublereal *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + integer *); + + +/* -- LAPACK driver routine (version 3.2) -- */ +/* -- Contributed by James Demmel, Deaglan Halligan, Yozo Hida and -- */ +/* -- Jason Riedy of Univ. of California Berkeley. -- */ +/* -- November 2008 -- */ + +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley and NAG Ltd. -- */ + +/* .. */ +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DGBSVXX uses the LU factorization to compute the solution to a */ +/* double precision system of linear equations A * X = B, where A is an */ +/* N-by-N matrix and X and B are N-by-NRHS matrices. */ + +/* If requested, both normwise and maximum componentwise error bounds */ +/* are returned. DGBSVXX will return a solution with a tiny */ +/* guaranteed error (O(eps) where eps is the working machine */ +/* precision) unless the matrix is very ill-conditioned, in which */ +/* case a warning is returned. Relevant condition numbers also are */ +/* calculated and returned. */ + +/* DGBSVXX accepts user-provided factorizations and equilibration */ +/* factors; see the definitions of the FACT and EQUED options. */ +/* Solving with refinement and using a factorization from a previous */ +/* DGBSVXX call will also produce a solution with either O(eps) */ +/* errors or warnings, but we cannot make that claim for general */ +/* user-provided factorizations and equilibration factors if they */ +/* differ from what DGBSVXX would itself produce. */ + +/* Description */ +/* =========== */ + +/* The following steps are performed: */ + +/* 1. If FACT = 'E', double precision scaling factors are computed to equilibrate */ +/* the system: */ + +/* TRANS = 'N': diag(R)*A*diag(C) *inv(diag(C))*X = diag(R)*B */ +/* TRANS = 'T': (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B */ +/* TRANS = 'C': (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B */ + +/* Whether or not the system will be equilibrated depends on the */ +/* scaling of the matrix A, but if equilibration is used, A is */ +/* overwritten by diag(R)*A*diag(C) and B by diag(R)*B (if TRANS='N') */ +/* or diag(C)*B (if TRANS = 'T' or 'C'). */ + +/* 2. If FACT = 'N' or 'E', the LU decomposition is used to factor */ +/* the matrix A (after equilibration if FACT = 'E') as */ + +/* A = P * L * U, */ + +/* where P is a permutation matrix, L is a unit lower triangular */ +/* matrix, and U is upper triangular. */ + +/* 3. If some U(i,i)=0, so that U is exactly singular, then the */ +/* routine returns with INFO = i. Otherwise, the factored form of A */ +/* is used to estimate the condition number of the matrix A (see */ +/* argument RCOND). If the reciprocal of the condition number is less */ +/* than machine precision, the routine still goes on to solve for X */ +/* and compute error bounds as described below. */ + +/* 4. The system of equations is solved for X using the factored form */ +/* of A. */ + +/* 5. By default (unless PARAMS(LA_LINRX_ITREF_I) is set to zero), */ +/* the routine will use iterative refinement to try to get a small */ +/* error and error bounds. Refinement calculates the residual to at */ +/* least twice the working precision. */ + +/* 6. If equilibration was used, the matrix X is premultiplied by */ +/* diag(C) (if TRANS = 'N') or diag(R) (if TRANS = 'T' or 'C') so */ +/* that it solves the original system before equilibration. */ + +/* Arguments */ +/* ========= */ + +/* Some optional parameters are bundled in the PARAMS array. These */ +/* settings determine how refinement is performed, but often the */ +/* defaults are acceptable. If the defaults are acceptable, users */ +/* can pass NPARAMS = 0 which prevents the source code from accessing */ +/* the PARAMS argument. */ + +/* FACT (input) CHARACTER*1 */ +/* Specifies whether or not the factored form of the matrix A is */ +/* supplied on entry, and if not, whether the matrix A should be */ +/* equilibrated before it is factored. */ +/* = 'F': On entry, AF and IPIV contain the factored form of A. */ +/* If EQUED is not 'N', the matrix A has been */ +/* equilibrated with scaling factors given by R and C. */ +/* A, AF, and IPIV are not modified. */ +/* = 'N': The matrix A will be copied to AF and factored. */ +/* = 'E': The matrix A will be equilibrated if necessary, then */ +/* copied to AF and factored. */ + +/* TRANS (input) CHARACTER*1 */ +/* Specifies the form of the system of equations: */ +/* = 'N': A * X = B (No transpose) */ +/* = 'T': A**T * X = B (Transpose) */ +/* = 'C': A**H * X = B (Conjugate Transpose = Transpose) */ + +/* N (input) INTEGER */ +/* The number of linear equations, i.e., the order of the */ +/* matrix A. N >= 0. */ + +/* KL (input) INTEGER */ +/* The number of subdiagonals within the band of A. KL >= 0. */ + +/* KU (input) INTEGER */ +/* The number of superdiagonals within the band of A. KU >= 0. */ + +/* NRHS (input) INTEGER */ +/* The number of right hand sides, i.e., the number of columns */ +/* of the matrices B and X. NRHS >= 0. */ + +/* AB (input/output) DOUBLE PRECISION array, dimension (LDAB,N) */ +/* On entry, the matrix A in band storage, in rows 1 to KL+KU+1. */ +/* The j-th column of A is stored in the j-th column of the */ +/* array AB as follows: */ +/* AB(KU+1+i-j,j) = A(i,j) for max(1,j-KU)<=i<=min(N,j+kl) */ + +/* If FACT = 'F' and EQUED is not 'N', then AB must have been */ +/* equilibrated by the scaling factors in R and/or C. AB is not */ +/* modified if FACT = 'F' or 'N', or if FACT = 'E' and */ +/* EQUED = 'N' on exit. */ + +/* On exit, if EQUED .ne. 'N', A is scaled as follows: */ +/* EQUED = 'R': A := diag(R) * A */ +/* EQUED = 'C': A := A * diag(C) */ +/* EQUED = 'B': A := diag(R) * A * diag(C). */ + +/* LDAB (input) INTEGER */ +/* The leading dimension of the array AB. LDAB >= KL+KU+1. */ + +/* AFB (input or output) DOUBLE PRECISION array, dimension (LDAFB,N) */ +/* If FACT = 'F', then AFB is an input argument and on entry */ +/* contains details of the LU factorization of the band matrix */ +/* A, as computed by DGBTRF. U is stored as an upper triangular */ +/* band matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, */ +/* and the multipliers used during the factorization are stored */ +/* in rows KL+KU+2 to 2*KL+KU+1. If EQUED .ne. 'N', then AFB is */ +/* the factored form of the equilibrated matrix A. */ + +/* If FACT = 'N', then AF is an output argument and on exit */ +/* returns the factors L and U from the factorization A = P*L*U */ +/* of the original matrix A. */ + +/* If FACT = 'E', then AF is an output argument and on exit */ +/* returns the factors L and U from the factorization A = P*L*U */ +/* of the equilibrated matrix A (see the description of A for */ +/* the form of the equilibrated matrix). */ + +/* LDAFB (input) INTEGER */ +/* The leading dimension of the array AFB. LDAFB >= 2*KL+KU+1. */ + +/* IPIV (input or output) INTEGER array, dimension (N) */ +/* If FACT = 'F', then IPIV is an input argument and on entry */ +/* contains the pivot indices from the factorization A = P*L*U */ +/* as computed by DGETRF; row i of the matrix was interchanged */ +/* with row IPIV(i). */ + +/* If FACT = 'N', then IPIV is an output argument and on exit */ +/* contains the pivot indices from the factorization A = P*L*U */ +/* of the original matrix A. */ + +/* If FACT = 'E', then IPIV is an output argument and on exit */ +/* contains the pivot indices from the factorization A = P*L*U */ +/* of the equilibrated matrix A. */ + +/* EQUED (input or output) CHARACTER*1 */ +/* Specifies the form of equilibration that was done. */ +/* = 'N': No equilibration (always true if FACT = 'N'). */ +/* = 'R': Row equilibration, i.e., A has been premultiplied by */ +/* diag(R). */ +/* = 'C': Column equilibration, i.e., A has been postmultiplied */ +/* by diag(C). */ +/* = 'B': Both row and column equilibration, i.e., A has been */ +/* replaced by diag(R) * A * diag(C). */ +/* EQUED is an input argument if FACT = 'F'; otherwise, it is an */ +/* output argument. */ + +/* R (input or output) DOUBLE PRECISION array, dimension (N) */ +/* The row scale factors for A. If EQUED = 'R' or 'B', A is */ +/* multiplied on the left by diag(R); if EQUED = 'N' or 'C', R */ +/* is not accessed. R is an input argument if FACT = 'F'; */ +/* otherwise, R is an output argument. If FACT = 'F' and */ +/* EQUED = 'R' or 'B', each element of R must be positive. */ +/* If R is output, each element of R is a power of the radix. */ +/* If R is input, each element of R should be a power of the radix */ +/* to ensure a reliable solution and error estimates. Scaling by */ +/* powers of the radix does not cause rounding errors unless the */ +/* result underflows or overflows. Rounding errors during scaling */ +/* lead to refining with a matrix that is not equivalent to the */ +/* input matrix, producing error estimates that may not be */ +/* reliable. */ + +/* C (input or output) DOUBLE PRECISION array, dimension (N) */ +/* The column scale factors for A. If EQUED = 'C' or 'B', A is */ +/* multiplied on the right by diag(C); if EQUED = 'N' or 'R', C */ +/* is not accessed. C is an input argument if FACT = 'F'; */ +/* otherwise, C is an output argument. If FACT = 'F' and */ +/* EQUED = 'C' or 'B', each element of C must be positive. */ +/* If C is output, each element of C is a power of the radix. */ +/* If C is input, each element of C should be a power of the radix */ +/* to ensure a reliable solution and error estimates. Scaling by */ +/* powers of the radix does not cause rounding errors unless the */ +/* result underflows or overflows. Rounding errors during scaling */ +/* lead to refining with a matrix that is not equivalent to the */ +/* input matrix, producing error estimates that may not be */ +/* reliable. */ + +/* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* On entry, the N-by-NRHS right hand side matrix B. */ +/* On exit, */ +/* if EQUED = 'N', B is not modified; */ +/* if TRANS = 'N' and EQUED = 'R' or 'B', B is overwritten by */ +/* diag(R)*B; */ +/* if TRANS = 'T' or 'C' and EQUED = 'C' or 'B', B is */ +/* overwritten by diag(C)*B. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the array B. LDB >= max(1,N). */ + +/* X (output) DOUBLE PRECISION array, dimension (LDX,NRHS) */ +/* If INFO = 0, the N-by-NRHS solution matrix X to the original */ +/* system of equations. Note that A and B are modified on exit */ +/* if EQUED .ne. 'N', and the solution to the equilibrated system is */ +/* inv(diag(C))*X if TRANS = 'N' and EQUED = 'C' or 'B', or */ +/* inv(diag(R))*X if TRANS = 'T' or 'C' and EQUED = 'R' or 'B'. */ + +/* LDX (input) INTEGER */ +/* The leading dimension of the array X. LDX >= max(1,N). */ + +/* RCOND (output) DOUBLE PRECISION */ +/* Reciprocal scaled condition number. This is an estimate of the */ +/* reciprocal Skeel condition number of the matrix A after */ +/* equilibration (if done). If this is less than the machine */ +/* precision (in particular, if it is zero), the matrix is singular */ +/* to working precision. Note that the error may still be small even */ +/* if this number is very small and the matrix appears ill- */ +/* conditioned. */ + +/* RPVGRW (output) DOUBLE PRECISION */ +/* Reciprocal pivot growth. On exit, this contains the reciprocal */ +/* pivot growth factor norm(A)/norm(U). The "max absolute element" */ +/* norm is used. If this is much less than 1, then the stability of */ +/* the LU factorization of the (equilibrated) matrix A could be poor. */ +/* This also means that the solution X, estimated condition numbers, */ +/* and error bounds could be unreliable. If factorization fails with */ +/* 0 0 and <= N: U(INFO,INFO) is exactly zero. The factorization */ +/* has been completed, but the factor U is exactly singular, so */ +/* the solution and error bounds could not be computed. RCOND = 0 */ +/* is returned. */ +/* = N+J: The solution corresponding to the Jth right-hand side is */ +/* not guaranteed. The solutions corresponding to other right- */ +/* hand sides K with K > J may not be guaranteed as well, but */ +/* only the first such right-hand side is reported. If a small */ +/* componentwise error is not requested (PARAMS(3) = 0.0) then */ +/* the Jth right-hand side is the first with a normwise error */ +/* bound that is not guaranteed (the smallest J such */ +/* that ERR_BNDS_NORM(J,1) = 0.0). By default (PARAMS(3) = 1.0) */ +/* the Jth right-hand side is the first with either a normwise or */ +/* componentwise error bound that is not guaranteed (the smallest */ +/* J such that either ERR_BNDS_NORM(J,1) = 0.0 or */ +/* ERR_BNDS_COMP(J,1) = 0.0). See the definition of */ +/* ERR_BNDS_NORM(:,1) and ERR_BNDS_COMP(:,1). To get information */ +/* about all of the right-hand sides check ERR_BNDS_NORM or */ +/* ERR_BNDS_COMP. */ + +/* ================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + err_bnds_comp_dim1 = *nrhs; + err_bnds_comp_offset = 1 + err_bnds_comp_dim1; + err_bnds_comp__ -= err_bnds_comp_offset; + err_bnds_norm_dim1 = *nrhs; + err_bnds_norm_offset = 1 + err_bnds_norm_dim1; + err_bnds_norm__ -= err_bnds_norm_offset; + ab_dim1 = *ldab; + ab_offset = 1 + ab_dim1; + ab -= ab_offset; + afb_dim1 = *ldafb; + afb_offset = 1 + afb_dim1; + afb -= afb_offset; + --ipiv; + --r__; + --c__; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + x_dim1 = *ldx; + x_offset = 1 + x_dim1; + x -= x_offset; + --berr; + --params; + --work; + --iwork; + + /* Function Body */ + *info = 0; + nofact = _starpu_lsame_(fact, "N"); + equil = _starpu_lsame_(fact, "E"); + notran = _starpu_lsame_(trans, "N"); + smlnum = _starpu_dlamch_("Safe minimum"); + bignum = 1. / smlnum; + if (nofact || equil) { + *(unsigned char *)equed = 'N'; + rowequ = FALSE_; + colequ = FALSE_; + } else { + rowequ = _starpu_lsame_(equed, "R") || _starpu_lsame_(equed, + "B"); + colequ = _starpu_lsame_(equed, "C") || _starpu_lsame_(equed, + "B"); + } + +/* Default is failure. If an input parameter is wrong or */ +/* factorization fails, make everything look horrible. Only the */ +/* pivot growth is set here, the rest is initialized in DGBRFSX. */ + + *rpvgrw = 0.; + +/* Test the input parameters. PARAMS is not tested until DGBRFSX. */ + + if (! nofact && ! equil && ! _starpu_lsame_(fact, "F")) { + *info = -1; + } else if (! notran && ! _starpu_lsame_(trans, "T") && ! + _starpu_lsame_(trans, "C")) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*kl < 0) { + *info = -4; + } else if (*ku < 0) { + *info = -5; + } else if (*nrhs < 0) { + *info = -6; + } else if (*ldab < *kl + *ku + 1) { + *info = -8; + } else if (*ldafb < (*kl << 1) + *ku + 1) { + *info = -10; + } else if (_starpu_lsame_(fact, "F") && ! (rowequ || colequ + || _starpu_lsame_(equed, "N"))) { + *info = -12; + } else { + if (rowequ) { + rcmin = bignum; + rcmax = 0.; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + d__1 = rcmin, d__2 = r__[j]; + rcmin = min(d__1,d__2); +/* Computing MAX */ + d__1 = rcmax, d__2 = r__[j]; + rcmax = max(d__1,d__2); +/* L10: */ + } + if (rcmin <= 0.) { + *info = -13; + } else if (*n > 0) { + rowcnd = max(rcmin,smlnum) / min(rcmax,bignum); + } else { + rowcnd = 1.; + } + } + if (colequ && *info == 0) { + rcmin = bignum; + rcmax = 0.; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + d__1 = rcmin, d__2 = c__[j]; + rcmin = min(d__1,d__2); +/* Computing MAX */ + d__1 = rcmax, d__2 = c__[j]; + rcmax = max(d__1,d__2); +/* L20: */ + } + if (rcmin <= 0.) { + *info = -14; + } else if (*n > 0) { + colcnd = max(rcmin,smlnum) / min(rcmax,bignum); + } else { + colcnd = 1.; + } + } + if (*info == 0) { + if (*ldb < max(1,*n)) { + *info = -15; + } else if (*ldx < max(1,*n)) { + *info = -16; + } + } + } + + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DGBSVXX", &i__1); + return 0; + } + + if (equil) { + +/* Compute row and column scalings to equilibrate the matrix A. */ + + _starpu_dgbequb_(n, n, kl, ku, &ab[ab_offset], ldab, &r__[1], &c__[1], & + rowcnd, &colcnd, &amax, &infequ); + if (infequ == 0) { + +/* Equilibrate the matrix. */ + + _starpu_dlaqgb_(n, n, kl, ku, &ab[ab_offset], ldab, &r__[1], &c__[1], & + rowcnd, &colcnd, &amax, equed); + rowequ = _starpu_lsame_(equed, "R") || _starpu_lsame_(equed, + "B"); + colequ = _starpu_lsame_(equed, "C") || _starpu_lsame_(equed, + "B"); + } + +/* If the scaling factors are not applied, set them to 1.0. */ + + if (! rowequ) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + r__[j] = 1.; + } + } + if (! colequ) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + c__[j] = 1.; + } + } + } + +/* Scale the right hand side. */ + + if (notran) { + if (rowequ) { + _starpu_dlascl2_(n, nrhs, &r__[1], &b[b_offset], ldb); + } + } else { + if (colequ) { + _starpu_dlascl2_(n, nrhs, &c__[1], &b[b_offset], ldb); + } + } + + if (nofact || equil) { + +/* Compute the LU factorization of A. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = (*kl << 1) + *ku + 1; + for (i__ = *kl + 1; i__ <= i__2; ++i__) { + afb[i__ + j * afb_dim1] = ab[i__ - *kl + j * ab_dim1]; +/* L30: */ + } +/* L40: */ + } + _starpu_dgbtrf_(n, n, kl, ku, &afb[afb_offset], ldafb, &ipiv[1], info); + +/* Return if INFO is non-zero. */ + + if (*info > 0) { + +/* Pivot in column INFO is exactly 0 */ +/* Compute the reciprocal pivot growth factor of the */ +/* leading rank-deficient INFO columns of A. */ + + *rpvgrw = _starpu_dla_gbrpvgrw__(n, kl, ku, info, &ab[ab_offset], ldab, & + afb[afb_offset], ldafb); + return 0; + } + } + +/* Compute the reciprocal pivot growth factor RPVGRW. */ + + *rpvgrw = _starpu_dla_gbrpvgrw__(n, kl, ku, n, &ab[ab_offset], ldab, &afb[ + afb_offset], ldafb); + +/* Compute the solution matrix X. */ + + _starpu_dlacpy_("Full", n, nrhs, &b[b_offset], ldb, &x[x_offset], ldx); + _starpu_dgbtrs_(trans, n, kl, ku, nrhs, &afb[afb_offset], ldafb, &ipiv[1], &x[ + x_offset], ldx, info); + +/* Use iterative refinement to improve the computed solution and */ +/* compute error bounds and backward error estimates for it. */ + + _starpu_dgbrfsx_(trans, equed, n, kl, ku, nrhs, &ab[ab_offset], ldab, &afb[ + afb_offset], ldafb, &ipiv[1], &r__[1], &c__[1], &b[b_offset], ldb, + &x[x_offset], ldx, rcond, &berr[1], n_err_bnds__, & + err_bnds_norm__[err_bnds_norm_offset], &err_bnds_comp__[ + err_bnds_comp_offset], nparams, ¶ms[1], &work[1], &iwork[1], + info); + +/* Scale solutions. */ + + if (colequ && notran) { + _starpu_dlascl2_(n, nrhs, &c__[1], &x[x_offset], ldx); + } else if (rowequ && ! notran) { + _starpu_dlascl2_(n, nrhs, &r__[1], &x[x_offset], ldx); + } + + return 0; + +/* End of DGBSVXX */ + +} /* _starpu_dgbsvxx_ */ diff --git a/min-dgels/base/SRC/dgbtf2.c b/min-dgels/base/SRC/dgbtf2.c new file mode 100644 index 0000000..b4f6a35 --- /dev/null +++ b/min-dgels/base/SRC/dgbtf2.c @@ -0,0 +1,262 @@ +/* dgbtf2.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static doublereal c_b9 = -1.; + +/* Subroutine */ int _starpu_dgbtf2_(integer *m, integer *n, integer *kl, integer *ku, + doublereal *ab, integer *ldab, integer *ipiv, integer *info) +{ + /* System generated locals */ + integer ab_dim1, ab_offset, i__1, i__2, i__3, i__4; + doublereal d__1; + + /* Local variables */ + integer i__, j, km, jp, ju, kv; + extern /* Subroutine */ int _starpu_dger_(integer *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *), _starpu_dscal_(integer *, doublereal *, doublereal *, integer + *), _starpu_dswap_(integer *, doublereal *, integer *, doublereal *, + integer *); + extern integer _starpu_idamax_(integer *, doublereal *, integer *); + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DGBTF2 computes an LU factorization of a real m-by-n band matrix A */ +/* using partial pivoting with row interchanges. */ + +/* This is the unblocked version of the algorithm, calling Level 2 BLAS. */ + +/* Arguments */ +/* ========= */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix A. M >= 0. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix A. N >= 0. */ + +/* KL (input) INTEGER */ +/* The number of subdiagonals within the band of A. KL >= 0. */ + +/* KU (input) INTEGER */ +/* The number of superdiagonals within the band of A. KU >= 0. */ + +/* AB (input/output) DOUBLE PRECISION array, dimension (LDAB,N) */ +/* On entry, the matrix A in band storage, in rows KL+1 to */ +/* 2*KL+KU+1; rows 1 to KL of the array need not be set. */ +/* The j-th column of A is stored in the j-th column of the */ +/* array AB as follows: */ +/* AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl) */ + +/* On exit, details of the factorization: U is stored as an */ +/* upper triangular band matrix with KL+KU superdiagonals in */ +/* rows 1 to KL+KU+1, and the multipliers used during the */ +/* factorization are stored in rows KL+KU+2 to 2*KL+KU+1. */ +/* See below for further details. */ + +/* LDAB (input) INTEGER */ +/* The leading dimension of the array AB. LDAB >= 2*KL+KU+1. */ + +/* IPIV (output) INTEGER array, dimension (min(M,N)) */ +/* The pivot indices; for 1 <= i <= min(M,N), row i of the */ +/* matrix was interchanged with row IPIV(i). */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > 0: if INFO = +i, U(i,i) is exactly zero. The factorization */ +/* has been completed, but the factor U is exactly */ +/* singular, and division by zero will occur if it is used */ +/* to solve a system of equations. */ + +/* Further Details */ +/* =============== */ + +/* The band storage scheme is illustrated by the following example, when */ +/* M = N = 6, KL = 2, KU = 1: */ + +/* On entry: On exit: */ + +/* * * * + + + * * * u14 u25 u36 */ +/* * * + + + + * * u13 u24 u35 u46 */ +/* * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 */ +/* a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 */ +/* a21 a32 a43 a54 a65 * m21 m32 m43 m54 m65 * */ +/* a31 a42 a53 a64 * * m31 m42 m53 m64 * * */ + +/* Array elements marked * are not used by the routine; elements marked */ +/* + need not be set on entry, but are required by the routine to store */ +/* elements of U, because of fill-in resulting from the row */ +/* interchanges. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* KV is the number of superdiagonals in the factor U, allowing for */ +/* fill-in. */ + + /* Parameter adjustments */ + ab_dim1 = *ldab; + ab_offset = 1 + ab_dim1; + ab -= ab_offset; + --ipiv; + + /* Function Body */ + kv = *ku + *kl; + +/* Test the input parameters. */ + + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*kl < 0) { + *info = -3; + } else if (*ku < 0) { + *info = -4; + } else if (*ldab < *kl + kv + 1) { + *info = -6; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DGBTF2", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + return 0; + } + +/* Gaussian elimination with partial pivoting */ + +/* Set fill-in elements in columns KU+2 to KV to zero. */ + + i__1 = min(kv,*n); + for (j = *ku + 2; j <= i__1; ++j) { + i__2 = *kl; + for (i__ = kv - j + 2; i__ <= i__2; ++i__) { + ab[i__ + j * ab_dim1] = 0.; +/* L10: */ + } +/* L20: */ + } + +/* JU is the index of the last column affected by the current stage */ +/* of the factorization. */ + + ju = 1; + + i__1 = min(*m,*n); + for (j = 1; j <= i__1; ++j) { + +/* Set fill-in elements in column J+KV to zero. */ + + if (j + kv <= *n) { + i__2 = *kl; + for (i__ = 1; i__ <= i__2; ++i__) { + ab[i__ + (j + kv) * ab_dim1] = 0.; +/* L30: */ + } + } + +/* Find pivot and test for singularity. KM is the number of */ +/* subdiagonal elements in the current column. */ + +/* Computing MIN */ + i__2 = *kl, i__3 = *m - j; + km = min(i__2,i__3); + i__2 = km + 1; + jp = _starpu_idamax_(&i__2, &ab[kv + 1 + j * ab_dim1], &c__1); + ipiv[j] = jp + j - 1; + if (ab[kv + jp + j * ab_dim1] != 0.) { +/* Computing MAX */ +/* Computing MIN */ + i__4 = j + *ku + jp - 1; + i__2 = ju, i__3 = min(i__4,*n); + ju = max(i__2,i__3); + +/* Apply interchange to columns J to JU. */ + + if (jp != 1) { + i__2 = ju - j + 1; + i__3 = *ldab - 1; + i__4 = *ldab - 1; + _starpu_dswap_(&i__2, &ab[kv + jp + j * ab_dim1], &i__3, &ab[kv + 1 + + j * ab_dim1], &i__4); + } + + if (km > 0) { + +/* Compute multipliers. */ + + d__1 = 1. / ab[kv + 1 + j * ab_dim1]; + _starpu_dscal_(&km, &d__1, &ab[kv + 2 + j * ab_dim1], &c__1); + +/* Update trailing submatrix within the band. */ + + if (ju > j) { + i__2 = ju - j; + i__3 = *ldab - 1; + i__4 = *ldab - 1; + _starpu_dger_(&km, &i__2, &c_b9, &ab[kv + 2 + j * ab_dim1], &c__1, + &ab[kv + (j + 1) * ab_dim1], &i__3, &ab[kv + 1 + + (j + 1) * ab_dim1], &i__4); + } + } + } else { + +/* If pivot is zero, set INFO to the index of the pivot */ +/* unless a zero pivot has already been found. */ + + if (*info == 0) { + *info = j; + } + } +/* L40: */ + } + return 0; + +/* End of DGBTF2 */ + +} /* _starpu_dgbtf2_ */ diff --git a/min-dgels/base/SRC/dgbtrf.c b/min-dgels/base/SRC/dgbtrf.c new file mode 100644 index 0000000..b783887 --- /dev/null +++ b/min-dgels/base/SRC/dgbtrf.c @@ -0,0 +1,588 @@ +/* dgbtrf.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c__65 = 65; +static doublereal c_b18 = -1.; +static doublereal c_b31 = 1.; + +/* Subroutine */ int _starpu_dgbtrf_(integer *m, integer *n, integer *kl, integer *ku, + doublereal *ab, integer *ldab, integer *ipiv, integer *info) +{ + /* System generated locals */ + integer ab_dim1, ab_offset, i__1, i__2, i__3, i__4, i__5, i__6; + doublereal d__1; + + /* Local variables */ + integer i__, j, i2, i3, j2, j3, k2, jb, nb, ii, jj, jm, ip, jp, km, ju, + kv, nw; + extern /* Subroutine */ int _starpu_dger_(integer *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *); + doublereal temp; + extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, + integer *), _starpu_dgemm_(char *, char *, integer *, integer *, integer * +, doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *), _starpu_dcopy_( + integer *, doublereal *, integer *, doublereal *, integer *), + _starpu_dswap_(integer *, doublereal *, integer *, doublereal *, integer * +); + doublereal work13[4160] /* was [65][64] */, work31[4160] /* + was [65][64] */; + extern /* Subroutine */ int _starpu_dtrsm_(char *, char *, char *, char *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *), _starpu_dgbtf2_( + integer *, integer *, integer *, integer *, doublereal *, integer + *, integer *, integer *); + extern integer _starpu_idamax_(integer *, doublereal *, integer *); + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *); + extern /* Subroutine */ int _starpu_dlaswp_(integer *, doublereal *, integer *, + integer *, integer *, integer *, integer *); + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DGBTRF computes an LU factorization of a real m-by-n band matrix A */ +/* using partial pivoting with row interchanges. */ + +/* This is the blocked version of the algorithm, calling Level 3 BLAS. */ + +/* Arguments */ +/* ========= */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix A. M >= 0. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix A. N >= 0. */ + +/* KL (input) INTEGER */ +/* The number of subdiagonals within the band of A. KL >= 0. */ + +/* KU (input) INTEGER */ +/* The number of superdiagonals within the band of A. KU >= 0. */ + +/* AB (input/output) DOUBLE PRECISION array, dimension (LDAB,N) */ +/* On entry, the matrix A in band storage, in rows KL+1 to */ +/* 2*KL+KU+1; rows 1 to KL of the array need not be set. */ +/* The j-th column of A is stored in the j-th column of the */ +/* array AB as follows: */ +/* AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl) */ + +/* On exit, details of the factorization: U is stored as an */ +/* upper triangular band matrix with KL+KU superdiagonals in */ +/* rows 1 to KL+KU+1, and the multipliers used during the */ +/* factorization are stored in rows KL+KU+2 to 2*KL+KU+1. */ +/* See below for further details. */ + +/* LDAB (input) INTEGER */ +/* The leading dimension of the array AB. LDAB >= 2*KL+KU+1. */ + +/* IPIV (output) INTEGER array, dimension (min(M,N)) */ +/* The pivot indices; for 1 <= i <= min(M,N), row i of the */ +/* matrix was interchanged with row IPIV(i). */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > 0: if INFO = +i, U(i,i) is exactly zero. The factorization */ +/* has been completed, but the factor U is exactly */ +/* singular, and division by zero will occur if it is used */ +/* to solve a system of equations. */ + +/* Further Details */ +/* =============== */ + +/* The band storage scheme is illustrated by the following example, when */ +/* M = N = 6, KL = 2, KU = 1: */ + +/* On entry: On exit: */ + +/* * * * + + + * * * u14 u25 u36 */ +/* * * + + + + * * u13 u24 u35 u46 */ +/* * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 */ +/* a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 */ +/* a21 a32 a43 a54 a65 * m21 m32 m43 m54 m65 * */ +/* a31 a42 a53 a64 * * m31 m42 m53 m64 * * */ + +/* Array elements marked * are not used by the routine; elements marked */ +/* + need not be set on entry, but are required by the routine to store */ +/* elements of U because of fill-in resulting from the row interchanges. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* KV is the number of superdiagonals in the factor U, allowing for */ +/* fill-in */ + + /* Parameter adjustments */ + ab_dim1 = *ldab; + ab_offset = 1 + ab_dim1; + ab -= ab_offset; + --ipiv; + + /* Function Body */ + kv = *ku + *kl; + +/* Test the input parameters. */ + + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*kl < 0) { + *info = -3; + } else if (*ku < 0) { + *info = -4; + } else if (*ldab < *kl + kv + 1) { + *info = -6; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DGBTRF", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + return 0; + } + +/* Determine the block size for this environment */ + + nb = _starpu_ilaenv_(&c__1, "DGBTRF", " ", m, n, kl, ku); + +/* The block size must not exceed the limit set by the size of the */ +/* local arrays WORK13 and WORK31. */ + + nb = min(nb,64); + + if (nb <= 1 || nb > *kl) { + +/* Use unblocked code */ + + _starpu_dgbtf2_(m, n, kl, ku, &ab[ab_offset], ldab, &ipiv[1], info); + } else { + +/* Use blocked code */ + +/* Zero the superdiagonal elements of the work array WORK13 */ + + i__1 = nb; + for (j = 1; j <= i__1; ++j) { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + work13[i__ + j * 65 - 66] = 0.; +/* L10: */ + } +/* L20: */ + } + +/* Zero the subdiagonal elements of the work array WORK31 */ + + i__1 = nb; + for (j = 1; j <= i__1; ++j) { + i__2 = nb; + for (i__ = j + 1; i__ <= i__2; ++i__) { + work31[i__ + j * 65 - 66] = 0.; +/* L30: */ + } +/* L40: */ + } + +/* Gaussian elimination with partial pivoting */ + +/* Set fill-in elements in columns KU+2 to KV to zero */ + + i__1 = min(kv,*n); + for (j = *ku + 2; j <= i__1; ++j) { + i__2 = *kl; + for (i__ = kv - j + 2; i__ <= i__2; ++i__) { + ab[i__ + j * ab_dim1] = 0.; +/* L50: */ + } +/* L60: */ + } + +/* JU is the index of the last column affected by the current */ +/* stage of the factorization */ + + ju = 1; + + i__1 = min(*m,*n); + i__2 = nb; + for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { +/* Computing MIN */ + i__3 = nb, i__4 = min(*m,*n) - j + 1; + jb = min(i__3,i__4); + +/* The active part of the matrix is partitioned */ + +/* A11 A12 A13 */ +/* A21 A22 A23 */ +/* A31 A32 A33 */ + +/* Here A11, A21 and A31 denote the current block of JB columns */ +/* which is about to be factorized. The number of rows in the */ +/* partitioning are JB, I2, I3 respectively, and the numbers */ +/* of columns are JB, J2, J3. The superdiagonal elements of A13 */ +/* and the subdiagonal elements of A31 lie outside the band. */ + +/* Computing MIN */ + i__3 = *kl - jb, i__4 = *m - j - jb + 1; + i2 = min(i__3,i__4); +/* Computing MIN */ + i__3 = jb, i__4 = *m - j - *kl + 1; + i3 = min(i__3,i__4); + +/* J2 and J3 are computed after JU has been updated. */ + +/* Factorize the current block of JB columns */ + + i__3 = j + jb - 1; + for (jj = j; jj <= i__3; ++jj) { + +/* Set fill-in elements in column JJ+KV to zero */ + + if (jj + kv <= *n) { + i__4 = *kl; + for (i__ = 1; i__ <= i__4; ++i__) { + ab[i__ + (jj + kv) * ab_dim1] = 0.; +/* L70: */ + } + } + +/* Find pivot and test for singularity. KM is the number of */ +/* subdiagonal elements in the current column. */ + +/* Computing MIN */ + i__4 = *kl, i__5 = *m - jj; + km = min(i__4,i__5); + i__4 = km + 1; + jp = _starpu_idamax_(&i__4, &ab[kv + 1 + jj * ab_dim1], &c__1); + ipiv[jj] = jp + jj - j; + if (ab[kv + jp + jj * ab_dim1] != 0.) { +/* Computing MAX */ +/* Computing MIN */ + i__6 = jj + *ku + jp - 1; + i__4 = ju, i__5 = min(i__6,*n); + ju = max(i__4,i__5); + if (jp != 1) { + +/* Apply interchange to columns J to J+JB-1 */ + + if (jp + jj - 1 < j + *kl) { + + i__4 = *ldab - 1; + i__5 = *ldab - 1; + _starpu_dswap_(&jb, &ab[kv + 1 + jj - j + j * ab_dim1], & + i__4, &ab[kv + jp + jj - j + j * ab_dim1], + &i__5); + } else { + +/* The interchange affects columns J to JJ-1 of A31 */ +/* which are stored in the work array WORK31 */ + + i__4 = jj - j; + i__5 = *ldab - 1; + _starpu_dswap_(&i__4, &ab[kv + 1 + jj - j + j * ab_dim1], + &i__5, &work31[jp + jj - j - *kl - 1], & + c__65); + i__4 = j + jb - jj; + i__5 = *ldab - 1; + i__6 = *ldab - 1; + _starpu_dswap_(&i__4, &ab[kv + 1 + jj * ab_dim1], &i__5, & + ab[kv + jp + jj * ab_dim1], &i__6); + } + } + +/* Compute multipliers */ + + d__1 = 1. / ab[kv + 1 + jj * ab_dim1]; + _starpu_dscal_(&km, &d__1, &ab[kv + 2 + jj * ab_dim1], &c__1); + +/* Update trailing submatrix within the band and within */ +/* the current block. JM is the index of the last column */ +/* which needs to be updated. */ + +/* Computing MIN */ + i__4 = ju, i__5 = j + jb - 1; + jm = min(i__4,i__5); + if (jm > jj) { + i__4 = jm - jj; + i__5 = *ldab - 1; + i__6 = *ldab - 1; + _starpu_dger_(&km, &i__4, &c_b18, &ab[kv + 2 + jj * ab_dim1], + &c__1, &ab[kv + (jj + 1) * ab_dim1], &i__5, & + ab[kv + 1 + (jj + 1) * ab_dim1], &i__6); + } + } else { + +/* If pivot is zero, set INFO to the index of the pivot */ +/* unless a zero pivot has already been found. */ + + if (*info == 0) { + *info = jj; + } + } + +/* Copy current column of A31 into the work array WORK31 */ + +/* Computing MIN */ + i__4 = jj - j + 1; + nw = min(i__4,i3); + if (nw > 0) { + _starpu_dcopy_(&nw, &ab[kv + *kl + 1 - jj + j + jj * ab_dim1], & + c__1, &work31[(jj - j + 1) * 65 - 65], &c__1); + } +/* L80: */ + } + if (j + jb <= *n) { + +/* Apply the row interchanges to the other blocks. */ + +/* Computing MIN */ + i__3 = ju - j + 1; + j2 = min(i__3,kv) - jb; +/* Computing MAX */ + i__3 = 0, i__4 = ju - j - kv + 1; + j3 = max(i__3,i__4); + +/* Use DLASWP to apply the row interchanges to A12, A22, and */ +/* A32. */ + + i__3 = *ldab - 1; + _starpu_dlaswp_(&j2, &ab[kv + 1 - jb + (j + jb) * ab_dim1], &i__3, & + c__1, &jb, &ipiv[j], &c__1); + +/* Adjust the pivot indices. */ + + i__3 = j + jb - 1; + for (i__ = j; i__ <= i__3; ++i__) { + ipiv[i__] = ipiv[i__] + j - 1; +/* L90: */ + } + +/* Apply the row interchanges to A13, A23, and A33 */ +/* columnwise. */ + + k2 = j - 1 + jb + j2; + i__3 = j3; + for (i__ = 1; i__ <= i__3; ++i__) { + jj = k2 + i__; + i__4 = j + jb - 1; + for (ii = j + i__ - 1; ii <= i__4; ++ii) { + ip = ipiv[ii]; + if (ip != ii) { + temp = ab[kv + 1 + ii - jj + jj * ab_dim1]; + ab[kv + 1 + ii - jj + jj * ab_dim1] = ab[kv + 1 + + ip - jj + jj * ab_dim1]; + ab[kv + 1 + ip - jj + jj * ab_dim1] = temp; + } +/* L100: */ + } +/* L110: */ + } + +/* Update the relevant part of the trailing submatrix */ + + if (j2 > 0) { + +/* Update A12 */ + + i__3 = *ldab - 1; + i__4 = *ldab - 1; + _starpu_dtrsm_("Left", "Lower", "No transpose", "Unit", &jb, &j2, + &c_b31, &ab[kv + 1 + j * ab_dim1], &i__3, &ab[kv + + 1 - jb + (j + jb) * ab_dim1], &i__4); + + if (i2 > 0) { + +/* Update A22 */ + + i__3 = *ldab - 1; + i__4 = *ldab - 1; + i__5 = *ldab - 1; + _starpu_dgemm_("No transpose", "No transpose", &i2, &j2, &jb, + &c_b18, &ab[kv + 1 + jb + j * ab_dim1], &i__3, + &ab[kv + 1 - jb + (j + jb) * ab_dim1], &i__4, + &c_b31, &ab[kv + 1 + (j + jb) * ab_dim1], & + i__5); + } + + if (i3 > 0) { + +/* Update A32 */ + + i__3 = *ldab - 1; + i__4 = *ldab - 1; + _starpu_dgemm_("No transpose", "No transpose", &i3, &j2, &jb, + &c_b18, work31, &c__65, &ab[kv + 1 - jb + (j + + jb) * ab_dim1], &i__3, &c_b31, &ab[kv + *kl + + 1 - jb + (j + jb) * ab_dim1], &i__4); + } + } + + if (j3 > 0) { + +/* Copy the lower triangle of A13 into the work array */ +/* WORK13 */ + + i__3 = j3; + for (jj = 1; jj <= i__3; ++jj) { + i__4 = jb; + for (ii = jj; ii <= i__4; ++ii) { + work13[ii + jj * 65 - 66] = ab[ii - jj + 1 + (jj + + j + kv - 1) * ab_dim1]; +/* L120: */ + } +/* L130: */ + } + +/* Update A13 in the work array */ + + i__3 = *ldab - 1; + _starpu_dtrsm_("Left", "Lower", "No transpose", "Unit", &jb, &j3, + &c_b31, &ab[kv + 1 + j * ab_dim1], &i__3, work13, + &c__65); + + if (i2 > 0) { + +/* Update A23 */ + + i__3 = *ldab - 1; + i__4 = *ldab - 1; + _starpu_dgemm_("No transpose", "No transpose", &i2, &j3, &jb, + &c_b18, &ab[kv + 1 + jb + j * ab_dim1], &i__3, + work13, &c__65, &c_b31, &ab[jb + 1 + (j + kv) + * ab_dim1], &i__4); + } + + if (i3 > 0) { + +/* Update A33 */ + + i__3 = *ldab - 1; + _starpu_dgemm_("No transpose", "No transpose", &i3, &j3, &jb, + &c_b18, work31, &c__65, work13, &c__65, & + c_b31, &ab[*kl + 1 + (j + kv) * ab_dim1], & + i__3); + } + +/* Copy the lower triangle of A13 back into place */ + + i__3 = j3; + for (jj = 1; jj <= i__3; ++jj) { + i__4 = jb; + for (ii = jj; ii <= i__4; ++ii) { + ab[ii - jj + 1 + (jj + j + kv - 1) * ab_dim1] = + work13[ii + jj * 65 - 66]; +/* L140: */ + } +/* L150: */ + } + } + } else { + +/* Adjust the pivot indices. */ + + i__3 = j + jb - 1; + for (i__ = j; i__ <= i__3; ++i__) { + ipiv[i__] = ipiv[i__] + j - 1; +/* L160: */ + } + } + +/* Partially undo the interchanges in the current block to */ +/* restore the upper triangular form of A31 and copy the upper */ +/* triangle of A31 back into place */ + + i__3 = j; + for (jj = j + jb - 1; jj >= i__3; --jj) { + jp = ipiv[jj] - jj + 1; + if (jp != 1) { + +/* Apply interchange to columns J to JJ-1 */ + + if (jp + jj - 1 < j + *kl) { + +/* The interchange does not affect A31 */ + + i__4 = jj - j; + i__5 = *ldab - 1; + i__6 = *ldab - 1; + _starpu_dswap_(&i__4, &ab[kv + 1 + jj - j + j * ab_dim1], & + i__5, &ab[kv + jp + jj - j + j * ab_dim1], & + i__6); + } else { + +/* The interchange does affect A31 */ + + i__4 = jj - j; + i__5 = *ldab - 1; + _starpu_dswap_(&i__4, &ab[kv + 1 + jj - j + j * ab_dim1], & + i__5, &work31[jp + jj - j - *kl - 1], &c__65); + } + } + +/* Copy the current column of A31 back into place */ + +/* Computing MIN */ + i__4 = i3, i__5 = jj - j + 1; + nw = min(i__4,i__5); + if (nw > 0) { + _starpu_dcopy_(&nw, &work31[(jj - j + 1) * 65 - 65], &c__1, &ab[ + kv + *kl + 1 - jj + j + jj * ab_dim1], &c__1); + } +/* L170: */ + } +/* L180: */ + } + } + + return 0; + +/* End of DGBTRF */ + +} /* _starpu_dgbtrf_ */ diff --git a/min-dgels/base/SRC/dgbtrs.c b/min-dgels/base/SRC/dgbtrs.c new file mode 100644 index 0000000..2d16342 --- /dev/null +++ b/min-dgels/base/SRC/dgbtrs.c @@ -0,0 +1,244 @@ +/* dgbtrs.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static doublereal c_b7 = -1.; +static integer c__1 = 1; +static doublereal c_b23 = 1.; + +/* Subroutine */ int _starpu_dgbtrs_(char *trans, integer *n, integer *kl, integer * + ku, integer *nrhs, doublereal *ab, integer *ldab, integer *ipiv, + doublereal *b, integer *ldb, integer *info) +{ + /* System generated locals */ + integer ab_dim1, ab_offset, b_dim1, b_offset, i__1, i__2, i__3; + + /* Local variables */ + integer i__, j, l, kd, lm; + extern /* Subroutine */ int _starpu_dger_(integer *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *); + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int _starpu_dgemv_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *), _starpu_dswap_(integer *, + doublereal *, integer *, doublereal *, integer *), _starpu_dtbsv_(char *, + char *, char *, integer *, integer *, doublereal *, integer *, + doublereal *, integer *); + logical lnoti; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + logical notran; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DGBTRS solves a system of linear equations */ +/* A * X = B or A' * X = B */ +/* with a general band matrix A using the LU factorization computed */ +/* by DGBTRF. */ + +/* Arguments */ +/* ========= */ + +/* TRANS (input) CHARACTER*1 */ +/* Specifies the form of the system of equations. */ +/* = 'N': A * X = B (No transpose) */ +/* = 'T': A'* X = B (Transpose) */ +/* = 'C': A'* X = B (Conjugate transpose = Transpose) */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* KL (input) INTEGER */ +/* The number of subdiagonals within the band of A. KL >= 0. */ + +/* KU (input) INTEGER */ +/* The number of superdiagonals within the band of A. KU >= 0. */ + +/* NRHS (input) INTEGER */ +/* The number of right hand sides, i.e., the number of columns */ +/* of the matrix B. NRHS >= 0. */ + +/* AB (input) DOUBLE PRECISION array, dimension (LDAB,N) */ +/* Details of the LU factorization of the band matrix A, as */ +/* computed by DGBTRF. U is stored as an upper triangular band */ +/* matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, and */ +/* the multipliers used during the factorization are stored in */ +/* rows KL+KU+2 to 2*KL+KU+1. */ + +/* LDAB (input) INTEGER */ +/* The leading dimension of the array AB. LDAB >= 2*KL+KU+1. */ + +/* IPIV (input) INTEGER array, dimension (N) */ +/* The pivot indices; for 1 <= i <= N, row i of the matrix was */ +/* interchanged with row IPIV(i). */ + +/* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* On entry, the right hand side matrix B. */ +/* On exit, the solution matrix X. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the array B. LDB >= max(1,N). */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + ab_dim1 = *ldab; + ab_offset = 1 + ab_dim1; + ab -= ab_offset; + --ipiv; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + *info = 0; + notran = _starpu_lsame_(trans, "N"); + if (! notran && ! _starpu_lsame_(trans, "T") && ! _starpu_lsame_( + trans, "C")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*kl < 0) { + *info = -3; + } else if (*ku < 0) { + *info = -4; + } else if (*nrhs < 0) { + *info = -5; + } else if (*ldab < (*kl << 1) + *ku + 1) { + *info = -7; + } else if (*ldb < max(1,*n)) { + *info = -10; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DGBTRS", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0 || *nrhs == 0) { + return 0; + } + + kd = *ku + *kl + 1; + lnoti = *kl > 0; + + if (notran) { + +/* Solve A*X = B. */ + +/* Solve L*X = B, overwriting B with X. */ + +/* L is represented as a product of permutations and unit lower */ +/* triangular matrices L = P(1) * L(1) * ... * P(n-1) * L(n-1), */ +/* where each transformation L(i) is a rank-one modification of */ +/* the identity matrix. */ + + if (lnoti) { + i__1 = *n - 1; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + i__2 = *kl, i__3 = *n - j; + lm = min(i__2,i__3); + l = ipiv[j]; + if (l != j) { + _starpu_dswap_(nrhs, &b[l + b_dim1], ldb, &b[j + b_dim1], ldb); + } + _starpu_dger_(&lm, nrhs, &c_b7, &ab[kd + 1 + j * ab_dim1], &c__1, &b[ + j + b_dim1], ldb, &b[j + 1 + b_dim1], ldb); +/* L10: */ + } + } + + i__1 = *nrhs; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Solve U*X = B, overwriting B with X. */ + + i__2 = *kl + *ku; + _starpu_dtbsv_("Upper", "No transpose", "Non-unit", n, &i__2, &ab[ + ab_offset], ldab, &b[i__ * b_dim1 + 1], &c__1); +/* L20: */ + } + + } else { + +/* Solve A'*X = B. */ + + i__1 = *nrhs; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Solve U'*X = B, overwriting B with X. */ + + i__2 = *kl + *ku; + _starpu_dtbsv_("Upper", "Transpose", "Non-unit", n, &i__2, &ab[ab_offset], + ldab, &b[i__ * b_dim1 + 1], &c__1); +/* L30: */ + } + +/* Solve L'*X = B, overwriting B with X. */ + + if (lnoti) { + for (j = *n - 1; j >= 1; --j) { +/* Computing MIN */ + i__1 = *kl, i__2 = *n - j; + lm = min(i__1,i__2); + _starpu_dgemv_("Transpose", &lm, nrhs, &c_b7, &b[j + 1 + b_dim1], ldb, + &ab[kd + 1 + j * ab_dim1], &c__1, &c_b23, &b[j + + b_dim1], ldb); + l = ipiv[j]; + if (l != j) { + _starpu_dswap_(nrhs, &b[l + b_dim1], ldb, &b[j + b_dim1], ldb); + } +/* L40: */ + } + } + } + return 0; + +/* End of DGBTRS */ + +} /* _starpu_dgbtrs_ */ diff --git a/min-dgels/base/SRC/dgebak.c b/min-dgels/base/SRC/dgebak.c new file mode 100644 index 0000000..ba2f4d1 --- /dev/null +++ b/min-dgels/base/SRC/dgebak.c @@ -0,0 +1,237 @@ +/* dgebak.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dgebak_(char *job, char *side, integer *n, integer *ilo, + integer *ihi, doublereal *scale, integer *m, doublereal *v, integer * + ldv, integer *info) +{ + /* System generated locals */ + integer v_dim1, v_offset, i__1; + + /* Local variables */ + integer i__, k; + doublereal s; + integer ii; + extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, + integer *); + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int _starpu_dswap_(integer *, doublereal *, integer *, + doublereal *, integer *); + logical leftv; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + logical rightv; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DGEBAK forms the right or left eigenvectors of a real general matrix */ +/* by backward transformation on the computed eigenvectors of the */ +/* balanced matrix output by DGEBAL. */ + +/* Arguments */ +/* ========= */ + +/* JOB (input) CHARACTER*1 */ +/* Specifies the type of backward transformation required: */ +/* = 'N', do nothing, return immediately; */ +/* = 'P', do backward transformation for permutation only; */ +/* = 'S', do backward transformation for scaling only; */ +/* = 'B', do backward transformations for both permutation and */ +/* scaling. */ +/* JOB must be the same as the argument JOB supplied to DGEBAL. */ + +/* SIDE (input) CHARACTER*1 */ +/* = 'R': V contains right eigenvectors; */ +/* = 'L': V contains left eigenvectors. */ + +/* N (input) INTEGER */ +/* The number of rows of the matrix V. N >= 0. */ + +/* ILO (input) INTEGER */ +/* IHI (input) INTEGER */ +/* The integers ILO and IHI determined by DGEBAL. */ +/* 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. */ + +/* SCALE (input) DOUBLE PRECISION array, dimension (N) */ +/* Details of the permutation and scaling factors, as returned */ +/* by DGEBAL. */ + +/* M (input) INTEGER */ +/* The number of columns of the matrix V. M >= 0. */ + +/* V (input/output) DOUBLE PRECISION array, dimension (LDV,M) */ +/* On entry, the matrix of right or left eigenvectors to be */ +/* transformed, as returned by DHSEIN or DTREVC. */ +/* On exit, V is overwritten by the transformed eigenvectors. */ + +/* LDV (input) INTEGER */ +/* The leading dimension of the array V. LDV >= max(1,N). */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Decode and Test the input parameters */ + + /* Parameter adjustments */ + --scale; + v_dim1 = *ldv; + v_offset = 1 + v_dim1; + v -= v_offset; + + /* Function Body */ + rightv = _starpu_lsame_(side, "R"); + leftv = _starpu_lsame_(side, "L"); + + *info = 0; + if (! _starpu_lsame_(job, "N") && ! _starpu_lsame_(job, "P") && ! _starpu_lsame_(job, "S") + && ! _starpu_lsame_(job, "B")) { + *info = -1; + } else if (! rightv && ! leftv) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*ilo < 1 || *ilo > max(1,*n)) { + *info = -4; + } else if (*ihi < min(*ilo,*n) || *ihi > *n) { + *info = -5; + } else if (*m < 0) { + *info = -7; + } else if (*ldv < max(1,*n)) { + *info = -9; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DGEBAK", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + if (*m == 0) { + return 0; + } + if (_starpu_lsame_(job, "N")) { + return 0; + } + + if (*ilo == *ihi) { + goto L30; + } + +/* Backward balance */ + + if (_starpu_lsame_(job, "S") || _starpu_lsame_(job, "B")) { + + if (rightv) { + i__1 = *ihi; + for (i__ = *ilo; i__ <= i__1; ++i__) { + s = scale[i__]; + _starpu_dscal_(m, &s, &v[i__ + v_dim1], ldv); +/* L10: */ + } + } + + if (leftv) { + i__1 = *ihi; + for (i__ = *ilo; i__ <= i__1; ++i__) { + s = 1. / scale[i__]; + _starpu_dscal_(m, &s, &v[i__ + v_dim1], ldv); +/* L20: */ + } + } + + } + +/* Backward permutation */ + +/* For I = ILO-1 step -1 until 1, */ +/* IHI+1 step 1 until N do -- */ + +L30: + if (_starpu_lsame_(job, "P") || _starpu_lsame_(job, "B")) { + if (rightv) { + i__1 = *n; + for (ii = 1; ii <= i__1; ++ii) { + i__ = ii; + if (i__ >= *ilo && i__ <= *ihi) { + goto L40; + } + if (i__ < *ilo) { + i__ = *ilo - ii; + } + k = (integer) scale[i__]; + if (k == i__) { + goto L40; + } + _starpu_dswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv); +L40: + ; + } + } + + if (leftv) { + i__1 = *n; + for (ii = 1; ii <= i__1; ++ii) { + i__ = ii; + if (i__ >= *ilo && i__ <= *ihi) { + goto L50; + } + if (i__ < *ilo) { + i__ = *ilo - ii; + } + k = (integer) scale[i__]; + if (k == i__) { + goto L50; + } + _starpu_dswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv); +L50: + ; + } + } + } + + return 0; + +/* End of DGEBAK */ + +} /* _starpu_dgebak_ */ diff --git a/min-dgels/base/SRC/dgebal.c b/min-dgels/base/SRC/dgebal.c new file mode 100644 index 0000000..d97d918 --- /dev/null +++ b/min-dgels/base/SRC/dgebal.c @@ -0,0 +1,402 @@ +/* dgebal.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +/* Subroutine */ int _starpu_dgebal_(char *job, integer *n, doublereal *a, integer * + lda, integer *ilo, integer *ihi, doublereal *scale, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + doublereal d__1, d__2; + + /* Local variables */ + doublereal c__, f, g; + integer i__, j, k, l, m; + doublereal r__, s, ca, ra; + integer ica, ira, iexc; + extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, + integer *); + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int _starpu_dswap_(integer *, doublereal *, integer *, + doublereal *, integer *); + doublereal sfmin1, sfmin2, sfmax1, sfmax2; + extern doublereal _starpu_dlamch_(char *); + extern integer _starpu_idamax_(integer *, doublereal *, integer *); + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + logical noconv; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DGEBAL balances a general real matrix A. This involves, first, */ +/* permuting A by a similarity transformation to isolate eigenvalues */ +/* in the first 1 to ILO-1 and last IHI+1 to N elements on the */ +/* diagonal; and second, applying a diagonal similarity transformation */ +/* to rows and columns ILO to IHI to make the rows and columns as */ +/* close in norm as possible. Both steps are optional. */ + +/* Balancing may reduce the 1-norm of the matrix, and improve the */ +/* accuracy of the computed eigenvalues and/or eigenvectors. */ + +/* Arguments */ +/* ========= */ + +/* JOB (input) CHARACTER*1 */ +/* Specifies the operations to be performed on A: */ +/* = 'N': none: simply set ILO = 1, IHI = N, SCALE(I) = 1.0 */ +/* for i = 1,...,N; */ +/* = 'P': permute only; */ +/* = 'S': scale only; */ +/* = 'B': both permute and scale. */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the input matrix A. */ +/* On exit, A is overwritten by the balanced matrix. */ +/* If JOB = 'N', A is not referenced. */ +/* See Further Details. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,N). */ + +/* ILO (output) INTEGER */ +/* IHI (output) INTEGER */ +/* ILO and IHI are set to integers such that on exit */ +/* A(i,j) = 0 if i > j and j = 1,...,ILO-1 or I = IHI+1,...,N. */ +/* If JOB = 'N' or 'S', ILO = 1 and IHI = N. */ + +/* SCALE (output) DOUBLE PRECISION array, dimension (N) */ +/* Details of the permutations and scaling factors applied to */ +/* A. If P(j) is the index of the row and column interchanged */ +/* with row and column j and D(j) is the scaling factor */ +/* applied to row and column j, then */ +/* SCALE(j) = P(j) for j = 1,...,ILO-1 */ +/* = D(j) for j = ILO,...,IHI */ +/* = P(j) for j = IHI+1,...,N. */ +/* The order in which the interchanges are made is N to IHI+1, */ +/* then 1 to ILO-1. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit. */ +/* < 0: if INFO = -i, the i-th argument had an illegal value. */ + +/* Further Details */ +/* =============== */ + +/* The permutations consist of row and column interchanges which put */ +/* the matrix in the form */ + +/* ( T1 X Y ) */ +/* P A P = ( 0 B Z ) */ +/* ( 0 0 T2 ) */ + +/* where T1 and T2 are upper triangular matrices whose eigenvalues lie */ +/* along the diagonal. The column indices ILO and IHI mark the starting */ +/* and ending columns of the submatrix B. Balancing consists of applying */ +/* a diagonal similarity transformation inv(D) * B * D to make the */ +/* 1-norms of each row of B and its corresponding column nearly equal. */ +/* The output matrix is */ + +/* ( T1 X*D Y ) */ +/* ( 0 inv(D)*B*D inv(D)*Z ). */ +/* ( 0 0 T2 ) */ + +/* Information about the permutations P and the diagonal matrix D is */ +/* returned in the vector SCALE. */ + +/* This subroutine is based on the EISPACK routine BALANC. */ + +/* Modified by Tzu-Yi Chen, Computer Science Division, University of */ +/* California at Berkeley, USA */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --scale; + + /* Function Body */ + *info = 0; + if (! _starpu_lsame_(job, "N") && ! _starpu_lsame_(job, "P") && ! _starpu_lsame_(job, "S") + && ! _starpu_lsame_(job, "B")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DGEBAL", &i__1); + return 0; + } + + k = 1; + l = *n; + + if (*n == 0) { + goto L210; + } + + if (_starpu_lsame_(job, "N")) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + scale[i__] = 1.; +/* L10: */ + } + goto L210; + } + + if (_starpu_lsame_(job, "S")) { + goto L120; + } + +/* Permutation to isolate eigenvalues if possible */ + + goto L50; + +/* Row and column exchange. */ + +L20: + scale[m] = (doublereal) j; + if (j == m) { + goto L30; + } + + _starpu_dswap_(&l, &a[j * a_dim1 + 1], &c__1, &a[m * a_dim1 + 1], &c__1); + i__1 = *n - k + 1; + _starpu_dswap_(&i__1, &a[j + k * a_dim1], lda, &a[m + k * a_dim1], lda); + +L30: + switch (iexc) { + case 1: goto L40; + case 2: goto L80; + } + +/* Search for rows isolating an eigenvalue and push them down. */ + +L40: + if (l == 1) { + goto L210; + } + --l; + +L50: + for (j = l; j >= 1; --j) { + + i__1 = l; + for (i__ = 1; i__ <= i__1; ++i__) { + if (i__ == j) { + goto L60; + } + if (a[j + i__ * a_dim1] != 0.) { + goto L70; + } +L60: + ; + } + + m = l; + iexc = 1; + goto L20; +L70: + ; + } + + goto L90; + +/* Search for columns isolating an eigenvalue and push them left. */ + +L80: + ++k; + +L90: + i__1 = l; + for (j = k; j <= i__1; ++j) { + + i__2 = l; + for (i__ = k; i__ <= i__2; ++i__) { + if (i__ == j) { + goto L100; + } + if (a[i__ + j * a_dim1] != 0.) { + goto L110; + } +L100: + ; + } + + m = k; + iexc = 2; + goto L20; +L110: + ; + } + +L120: + i__1 = l; + for (i__ = k; i__ <= i__1; ++i__) { + scale[i__] = 1.; +/* L130: */ + } + + if (_starpu_lsame_(job, "P")) { + goto L210; + } + +/* Balance the submatrix in rows K to L. */ + +/* Iterative loop for norm reduction */ + + sfmin1 = _starpu_dlamch_("S") / _starpu_dlamch_("P"); + sfmax1 = 1. / sfmin1; + sfmin2 = sfmin1 * 2.; + sfmax2 = 1. / sfmin2; +L140: + noconv = FALSE_; + + i__1 = l; + for (i__ = k; i__ <= i__1; ++i__) { + c__ = 0.; + r__ = 0.; + + i__2 = l; + for (j = k; j <= i__2; ++j) { + if (j == i__) { + goto L150; + } + c__ += (d__1 = a[j + i__ * a_dim1], abs(d__1)); + r__ += (d__1 = a[i__ + j * a_dim1], abs(d__1)); +L150: + ; + } + ica = _starpu_idamax_(&l, &a[i__ * a_dim1 + 1], &c__1); + ca = (d__1 = a[ica + i__ * a_dim1], abs(d__1)); + i__2 = *n - k + 1; + ira = _starpu_idamax_(&i__2, &a[i__ + k * a_dim1], lda); + ra = (d__1 = a[i__ + (ira + k - 1) * a_dim1], abs(d__1)); + +/* Guard against zero C or R due to underflow. */ + + if (c__ == 0. || r__ == 0.) { + goto L200; + } + g = r__ / 2.; + f = 1.; + s = c__ + r__; +L160: +/* Computing MAX */ + d__1 = max(f,c__); +/* Computing MIN */ + d__2 = min(r__,g); + if (c__ >= g || max(d__1,ca) >= sfmax2 || min(d__2,ra) <= sfmin2) { + goto L170; + } + f *= 2.; + c__ *= 2.; + ca *= 2.; + r__ /= 2.; + g /= 2.; + ra /= 2.; + goto L160; + +L170: + g = c__ / 2.; +L180: +/* Computing MIN */ + d__1 = min(f,c__), d__1 = min(d__1,g); + if (g < r__ || max(r__,ra) >= sfmax2 || min(d__1,ca) <= sfmin2) { + goto L190; + } + f /= 2.; + c__ /= 2.; + g /= 2.; + ca /= 2.; + r__ *= 2.; + ra *= 2.; + goto L180; + +/* Now balance. */ + +L190: + if (c__ + r__ >= s * .95) { + goto L200; + } + if (f < 1. && scale[i__] < 1.) { + if (f * scale[i__] <= sfmin1) { + goto L200; + } + } + if (f > 1. && scale[i__] > 1.) { + if (scale[i__] >= sfmax1 / f) { + goto L200; + } + } + g = 1. / f; + scale[i__] *= f; + noconv = TRUE_; + + i__2 = *n - k + 1; + _starpu_dscal_(&i__2, &g, &a[i__ + k * a_dim1], lda); + _starpu_dscal_(&l, &f, &a[i__ * a_dim1 + 1], &c__1); + +L200: + ; + } + + if (noconv) { + goto L140; + } + +L210: + *ilo = k; + *ihi = l; + + return 0; + +/* End of DGEBAL */ + +} /* _starpu_dgebal_ */ diff --git a/min-dgels/base/SRC/dgebd2.c b/min-dgels/base/SRC/dgebd2.c new file mode 100644 index 0000000..dcf83ca --- /dev/null +++ b/min-dgels/base/SRC/dgebd2.c @@ -0,0 +1,304 @@ +/* dgebd2.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +/* Subroutine */ int _starpu_dgebd2_(integer *m, integer *n, doublereal *a, integer * + lda, doublereal *d__, doublereal *e, doublereal *tauq, doublereal * + taup, doublereal *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + integer i__; + extern /* Subroutine */ int _starpu_dlarf_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + doublereal *), _starpu_dlarfg_(integer *, doublereal *, + doublereal *, integer *, doublereal *), _starpu_xerbla_(char *, integer *); + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DGEBD2 reduces a real general m by n matrix A to upper or lower */ +/* bidiagonal form B by an orthogonal transformation: Q' * A * P = B. */ + +/* If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal. */ + +/* Arguments */ +/* ========= */ + +/* M (input) INTEGER */ +/* The number of rows in the matrix A. M >= 0. */ + +/* N (input) INTEGER */ +/* The number of columns in the matrix A. N >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the m by n general matrix to be reduced. */ +/* On exit, */ +/* if m >= n, the diagonal and the first superdiagonal are */ +/* overwritten with the upper bidiagonal matrix B; the */ +/* elements below the diagonal, with the array TAUQ, represent */ +/* the orthogonal matrix Q as a product of elementary */ +/* reflectors, and the elements above the first superdiagonal, */ +/* with the array TAUP, represent the orthogonal matrix P as */ +/* a product of elementary reflectors; */ +/* if m < n, the diagonal and the first subdiagonal are */ +/* overwritten with the lower bidiagonal matrix B; the */ +/* elements below the first subdiagonal, with the array TAUQ, */ +/* represent the orthogonal matrix Q as a product of */ +/* elementary reflectors, and the elements above the diagonal, */ +/* with the array TAUP, represent the orthogonal matrix P as */ +/* a product of elementary reflectors. */ +/* See Further Details. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,M). */ + +/* D (output) DOUBLE PRECISION array, dimension (min(M,N)) */ +/* The diagonal elements of the bidiagonal matrix B: */ +/* D(i) = A(i,i). */ + +/* E (output) DOUBLE PRECISION array, dimension (min(M,N)-1) */ +/* The off-diagonal elements of the bidiagonal matrix B: */ +/* if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1; */ +/* if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1. */ + +/* TAUQ (output) DOUBLE PRECISION array dimension (min(M,N)) */ +/* The scalar factors of the elementary reflectors which */ +/* represent the orthogonal matrix Q. See Further Details. */ + +/* TAUP (output) DOUBLE PRECISION array, dimension (min(M,N)) */ +/* The scalar factors of the elementary reflectors which */ +/* represent the orthogonal matrix P. See Further Details. */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (max(M,N)) */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit. */ +/* < 0: if INFO = -i, the i-th argument had an illegal value. */ + +/* Further Details */ +/* =============== */ + +/* The matrices Q and P are represented as products of elementary */ +/* reflectors: */ + +/* If m >= n, */ + +/* Q = H(1) H(2) . . . H(n) and P = G(1) G(2) . . . G(n-1) */ + +/* Each H(i) and G(i) has the form: */ + +/* H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' */ + +/* where tauq and taup are real scalars, and v and u are real vectors; */ +/* v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in A(i+1:m,i); */ +/* u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in A(i,i+2:n); */ +/* tauq is stored in TAUQ(i) and taup in TAUP(i). */ + +/* If m < n, */ + +/* Q = H(1) H(2) . . . H(m-1) and P = G(1) G(2) . . . G(m) */ + +/* Each H(i) and G(i) has the form: */ + +/* H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' */ + +/* where tauq and taup are real scalars, and v and u are real vectors; */ +/* v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i); */ +/* u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n); */ +/* tauq is stored in TAUQ(i) and taup in TAUP(i). */ + +/* The contents of A on exit are illustrated by the following examples: */ + +/* m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): */ + +/* ( d e u1 u1 u1 ) ( d u1 u1 u1 u1 u1 ) */ +/* ( v1 d e u2 u2 ) ( e d u2 u2 u2 u2 ) */ +/* ( v1 v2 d e u3 ) ( v1 e d u3 u3 u3 ) */ +/* ( v1 v2 v3 d e ) ( v1 v2 e d u4 u4 ) */ +/* ( v1 v2 v3 v4 d ) ( v1 v2 v3 e d u5 ) */ +/* ( v1 v2 v3 v4 v5 ) */ + +/* where d and e denote diagonal and off-diagonal elements of B, vi */ +/* denotes an element of the vector defining H(i), and ui an element of */ +/* the vector defining G(i). */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --d__; + --e; + --tauq; + --taup; + --work; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } + if (*info < 0) { + i__1 = -(*info); + _starpu_xerbla_("DGEBD2", &i__1); + return 0; + } + + if (*m >= *n) { + +/* Reduce to upper bidiagonal form */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Generate elementary reflector H(i) to annihilate A(i+1:m,i) */ + + i__2 = *m - i__ + 1; +/* Computing MIN */ + i__3 = i__ + 1; + _starpu_dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[min(i__3, *m)+ i__ * + a_dim1], &c__1, &tauq[i__]); + d__[i__] = a[i__ + i__ * a_dim1]; + a[i__ + i__ * a_dim1] = 1.; + +/* Apply H(i) to A(i:m,i+1:n) from the left */ + + if (i__ < *n) { + i__2 = *m - i__ + 1; + i__3 = *n - i__; + _starpu_dlarf_("Left", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, & + tauq[i__], &a[i__ + (i__ + 1) * a_dim1], lda, &work[1] +); + } + a[i__ + i__ * a_dim1] = d__[i__]; + + if (i__ < *n) { + +/* Generate elementary reflector G(i) to annihilate */ +/* A(i,i+2:n) */ + + i__2 = *n - i__; +/* Computing MIN */ + i__3 = i__ + 2; + _starpu_dlarfg_(&i__2, &a[i__ + (i__ + 1) * a_dim1], &a[i__ + min( + i__3, *n)* a_dim1], lda, &taup[i__]); + e[i__] = a[i__ + (i__ + 1) * a_dim1]; + a[i__ + (i__ + 1) * a_dim1] = 1.; + +/* Apply G(i) to A(i+1:m,i+1:n) from the right */ + + i__2 = *m - i__; + i__3 = *n - i__; + _starpu_dlarf_("Right", &i__2, &i__3, &a[i__ + (i__ + 1) * a_dim1], + lda, &taup[i__], &a[i__ + 1 + (i__ + 1) * a_dim1], + lda, &work[1]); + a[i__ + (i__ + 1) * a_dim1] = e[i__]; + } else { + taup[i__] = 0.; + } +/* L10: */ + } + } else { + +/* Reduce to lower bidiagonal form */ + + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Generate elementary reflector G(i) to annihilate A(i,i+1:n) */ + + i__2 = *n - i__ + 1; +/* Computing MIN */ + i__3 = i__ + 1; + _starpu_dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[i__ + min(i__3, *n)* + a_dim1], lda, &taup[i__]); + d__[i__] = a[i__ + i__ * a_dim1]; + a[i__ + i__ * a_dim1] = 1.; + +/* Apply G(i) to A(i+1:m,i:n) from the right */ + + if (i__ < *m) { + i__2 = *m - i__; + i__3 = *n - i__ + 1; + _starpu_dlarf_("Right", &i__2, &i__3, &a[i__ + i__ * a_dim1], lda, & + taup[i__], &a[i__ + 1 + i__ * a_dim1], lda, &work[1]); + } + a[i__ + i__ * a_dim1] = d__[i__]; + + if (i__ < *m) { + +/* Generate elementary reflector H(i) to annihilate */ +/* A(i+2:m,i) */ + + i__2 = *m - i__; +/* Computing MIN */ + i__3 = i__ + 2; + _starpu_dlarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3, *m)+ + i__ * a_dim1], &c__1, &tauq[i__]); + e[i__] = a[i__ + 1 + i__ * a_dim1]; + a[i__ + 1 + i__ * a_dim1] = 1.; + +/* Apply H(i) to A(i+1:m,i+1:n) from the left */ + + i__2 = *m - i__; + i__3 = *n - i__; + _starpu_dlarf_("Left", &i__2, &i__3, &a[i__ + 1 + i__ * a_dim1], & + c__1, &tauq[i__], &a[i__ + 1 + (i__ + 1) * a_dim1], + lda, &work[1]); + a[i__ + 1 + i__ * a_dim1] = e[i__]; + } else { + tauq[i__] = 0.; + } +/* L20: */ + } + } + return 0; + +/* End of DGEBD2 */ + +} /* _starpu_dgebd2_ */ diff --git a/min-dgels/base/SRC/dgebrd.c b/min-dgels/base/SRC/dgebrd.c new file mode 100644 index 0000000..c74e527 --- /dev/null +++ b/min-dgels/base/SRC/dgebrd.c @@ -0,0 +1,336 @@ +/* dgebrd.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; +static integer c__3 = 3; +static integer c__2 = 2; +static doublereal c_b21 = -1.; +static doublereal c_b22 = 1.; + +/* Subroutine */ int _starpu_dgebrd_(integer *m, integer *n, doublereal *a, integer * + lda, doublereal *d__, doublereal *e, doublereal *tauq, doublereal * + taup, doublereal *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + integer i__, j, nb, nx; + doublereal ws; + extern /* Subroutine */ int _starpu_dgemm_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *); + integer nbmin, iinfo, minmn; + extern /* Subroutine */ int _starpu_dgebd2_(integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, integer *), _starpu_dlabrd_(integer *, integer *, integer * +, doublereal *, integer *, doublereal *, doublereal *, doublereal + *, doublereal *, doublereal *, integer *, doublereal *, integer *) + , _starpu_xerbla_(char *, integer *); + extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *); + integer ldwrkx, ldwrky, lwkopt; + logical lquery; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DGEBRD reduces a general real M-by-N matrix A to upper or lower */ +/* bidiagonal form B by an orthogonal transformation: Q**T * A * P = B. */ + +/* If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal. */ + +/* Arguments */ +/* ========= */ + +/* M (input) INTEGER */ +/* The number of rows in the matrix A. M >= 0. */ + +/* N (input) INTEGER */ +/* The number of columns in the matrix A. N >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the M-by-N general matrix to be reduced. */ +/* On exit, */ +/* if m >= n, the diagonal and the first superdiagonal are */ +/* overwritten with the upper bidiagonal matrix B; the */ +/* elements below the diagonal, with the array TAUQ, represent */ +/* the orthogonal matrix Q as a product of elementary */ +/* reflectors, and the elements above the first superdiagonal, */ +/* with the array TAUP, represent the orthogonal matrix P as */ +/* a product of elementary reflectors; */ +/* if m < n, the diagonal and the first subdiagonal are */ +/* overwritten with the lower bidiagonal matrix B; the */ +/* elements below the first subdiagonal, with the array TAUQ, */ +/* represent the orthogonal matrix Q as a product of */ +/* elementary reflectors, and the elements above the diagonal, */ +/* with the array TAUP, represent the orthogonal matrix P as */ +/* a product of elementary reflectors. */ +/* See Further Details. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,M). */ + +/* D (output) DOUBLE PRECISION array, dimension (min(M,N)) */ +/* The diagonal elements of the bidiagonal matrix B: */ +/* D(i) = A(i,i). */ + +/* E (output) DOUBLE PRECISION array, dimension (min(M,N)-1) */ +/* The off-diagonal elements of the bidiagonal matrix B: */ +/* if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1; */ +/* if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1. */ + +/* TAUQ (output) DOUBLE PRECISION array dimension (min(M,N)) */ +/* The scalar factors of the elementary reflectors which */ +/* represent the orthogonal matrix Q. See Further Details. */ + +/* TAUP (output) DOUBLE PRECISION array, dimension (min(M,N)) */ +/* The scalar factors of the elementary reflectors which */ +/* represent the orthogonal matrix P. See Further Details. */ + +/* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ + +/* LWORK (input) INTEGER */ +/* The length of the array WORK. LWORK >= max(1,M,N). */ +/* For optimum performance LWORK >= (M+N)*NB, where NB */ +/* is the optimal blocksize. */ + +/* If LWORK = -1, then a workspace query is assumed; the routine */ +/* only calculates the optimal size of the WORK array, returns */ +/* this value as the first entry of the WORK array, and no error */ +/* message related to LWORK is issued by XERBLA. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value. */ + +/* Further Details */ +/* =============== */ + +/* The matrices Q and P are represented as products of elementary */ +/* reflectors: */ + +/* If m >= n, */ + +/* Q = H(1) H(2) . . . H(n) and P = G(1) G(2) . . . G(n-1) */ + +/* Each H(i) and G(i) has the form: */ + +/* H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' */ + +/* where tauq and taup are real scalars, and v and u are real vectors; */ +/* v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in A(i+1:m,i); */ +/* u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in A(i,i+2:n); */ +/* tauq is stored in TAUQ(i) and taup in TAUP(i). */ + +/* If m < n, */ + +/* Q = H(1) H(2) . . . H(m-1) and P = G(1) G(2) . . . G(m) */ + +/* Each H(i) and G(i) has the form: */ + +/* H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' */ + +/* where tauq and taup are real scalars, and v and u are real vectors; */ +/* v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i); */ +/* u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n); */ +/* tauq is stored in TAUQ(i) and taup in TAUP(i). */ + +/* The contents of A on exit are illustrated by the following examples: */ + +/* m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): */ + +/* ( d e u1 u1 u1 ) ( d u1 u1 u1 u1 u1 ) */ +/* ( v1 d e u2 u2 ) ( e d u2 u2 u2 u2 ) */ +/* ( v1 v2 d e u3 ) ( v1 e d u3 u3 u3 ) */ +/* ( v1 v2 v3 d e ) ( v1 v2 e d u4 u4 ) */ +/* ( v1 v2 v3 v4 d ) ( v1 v2 v3 e d u5 ) */ +/* ( v1 v2 v3 v4 v5 ) */ + +/* where d and e denote diagonal and off-diagonal elements of B, vi */ +/* denotes an element of the vector defining H(i), and ui an element of */ +/* the vector defining G(i). */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --d__; + --e; + --tauq; + --taup; + --work; + + /* Function Body */ + *info = 0; +/* Computing MAX */ + i__1 = 1, i__2 = _starpu_ilaenv_(&c__1, "DGEBRD", " ", m, n, &c_n1, &c_n1); + nb = max(i__1,i__2); + lwkopt = (*m + *n) * nb; + work[1] = (doublereal) lwkopt; + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = max(1,*m); + if (*lwork < max(i__1,*n) && ! lquery) { + *info = -10; + } + } + if (*info < 0) { + i__1 = -(*info); + _starpu_xerbla_("DGEBRD", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + minmn = min(*m,*n); + if (minmn == 0) { + work[1] = 1.; + return 0; + } + + ws = (doublereal) max(*m,*n); + ldwrkx = *m; + ldwrky = *n; + + if (nb > 1 && nb < minmn) { + +/* Set the crossover point NX. */ + +/* Computing MAX */ + i__1 = nb, i__2 = _starpu_ilaenv_(&c__3, "DGEBRD", " ", m, n, &c_n1, &c_n1); + nx = max(i__1,i__2); + +/* Determine when to switch from blocked to unblocked code. */ + + if (nx < minmn) { + ws = (doublereal) ((*m + *n) * nb); + if ((doublereal) (*lwork) < ws) { + +/* Not enough work space for the optimal NB, consider using */ +/* a smaller block size. */ + + nbmin = _starpu_ilaenv_(&c__2, "DGEBRD", " ", m, n, &c_n1, &c_n1); + if (*lwork >= (*m + *n) * nbmin) { + nb = *lwork / (*m + *n); + } else { + nb = 1; + nx = minmn; + } + } + } + } else { + nx = minmn; + } + + i__1 = minmn - nx; + i__2 = nb; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + +/* Reduce rows and columns i:i+nb-1 to bidiagonal form and return */ +/* the matrices X and Y which are needed to update the unreduced */ +/* part of the matrix */ + + i__3 = *m - i__ + 1; + i__4 = *n - i__ + 1; + _starpu_dlabrd_(&i__3, &i__4, &nb, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[ + i__], &tauq[i__], &taup[i__], &work[1], &ldwrkx, &work[ldwrkx + * nb + 1], &ldwrky); + +/* Update the trailing submatrix A(i+nb:m,i+nb:n), using an update */ +/* of the form A := A - V*Y' - X*U' */ + + i__3 = *m - i__ - nb + 1; + i__4 = *n - i__ - nb + 1; + _starpu_dgemm_("No transpose", "Transpose", &i__3, &i__4, &nb, &c_b21, &a[i__ + + nb + i__ * a_dim1], lda, &work[ldwrkx * nb + nb + 1], & + ldwrky, &c_b22, &a[i__ + nb + (i__ + nb) * a_dim1], lda); + i__3 = *m - i__ - nb + 1; + i__4 = *n - i__ - nb + 1; + _starpu_dgemm_("No transpose", "No transpose", &i__3, &i__4, &nb, &c_b21, & + work[nb + 1], &ldwrkx, &a[i__ + (i__ + nb) * a_dim1], lda, & + c_b22, &a[i__ + nb + (i__ + nb) * a_dim1], lda); + +/* Copy diagonal and off-diagonal elements of B back into A */ + + if (*m >= *n) { + i__3 = i__ + nb - 1; + for (j = i__; j <= i__3; ++j) { + a[j + j * a_dim1] = d__[j]; + a[j + (j + 1) * a_dim1] = e[j]; +/* L10: */ + } + } else { + i__3 = i__ + nb - 1; + for (j = i__; j <= i__3; ++j) { + a[j + j * a_dim1] = d__[j]; + a[j + 1 + j * a_dim1] = e[j]; +/* L20: */ + } + } +/* L30: */ + } + +/* Use unblocked code to reduce the remainder of the matrix */ + + i__2 = *m - i__ + 1; + i__1 = *n - i__ + 1; + _starpu_dgebd2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[i__], & + tauq[i__], &taup[i__], &work[1], &iinfo); + work[1] = ws; + return 0; + +/* End of DGEBRD */ + +} /* _starpu_dgebrd_ */ diff --git a/min-dgels/base/SRC/dgecon.c b/min-dgels/base/SRC/dgecon.c new file mode 100644 index 0000000..a2a36ba --- /dev/null +++ b/min-dgels/base/SRC/dgecon.c @@ -0,0 +1,226 @@ +/* dgecon.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +/* Subroutine */ int _starpu_dgecon_(char *norm, integer *n, doublereal *a, integer * + lda, doublereal *anorm, doublereal *rcond, doublereal *work, integer * + iwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1; + doublereal d__1; + + /* Local variables */ + doublereal sl; + integer ix; + doublereal su; + integer kase, kase1; + doublereal scale; + extern logical _starpu_lsame_(char *, char *); + integer isave[3]; + extern /* Subroutine */ int _starpu_drscl_(integer *, doublereal *, doublereal *, + integer *), _starpu_dlacn2_(integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *); + extern doublereal _starpu_dlamch_(char *); + extern integer _starpu_idamax_(integer *, doublereal *, integer *); + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + doublereal ainvnm; + extern /* Subroutine */ int _starpu_dlatrs_(char *, char *, char *, char *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + doublereal *, integer *); + logical onenrm; + char normin[1]; + doublereal smlnum; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* Modified to call DLACN2 in place of DLACON, 5 Feb 03, SJH. */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DGECON estimates the reciprocal of the condition number of a general */ +/* real matrix A, in either the 1-norm or the infinity-norm, using */ +/* the LU factorization computed by DGETRF. */ + +/* An estimate is obtained for norm(inv(A)), and the reciprocal of the */ +/* condition number is computed as */ +/* RCOND = 1 / ( norm(A) * norm(inv(A)) ). */ + +/* Arguments */ +/* ========= */ + +/* NORM (input) CHARACTER*1 */ +/* Specifies whether the 1-norm condition number or the */ +/* infinity-norm condition number is required: */ +/* = '1' or 'O': 1-norm; */ +/* = 'I': Infinity-norm. */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ +/* The factors L and U from the factorization A = P*L*U */ +/* as computed by DGETRF. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,N). */ + +/* ANORM (input) DOUBLE PRECISION */ +/* If NORM = '1' or 'O', the 1-norm of the original matrix A. */ +/* If NORM = 'I', the infinity-norm of the original matrix A. */ + +/* RCOND (output) DOUBLE PRECISION */ +/* The reciprocal of the condition number of the matrix A, */ +/* computed as RCOND = 1/(norm(A) * norm(inv(A))). */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (4*N) */ + +/* IWORK (workspace) INTEGER array, dimension (N) */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --work; + --iwork; + + /* Function Body */ + *info = 0; + onenrm = *(unsigned char *)norm == '1' || _starpu_lsame_(norm, "O"); + if (! onenrm && ! _starpu_lsame_(norm, "I")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } else if (*anorm < 0.) { + *info = -5; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DGECON", &i__1); + return 0; + } + +/* Quick return if possible */ + + *rcond = 0.; + if (*n == 0) { + *rcond = 1.; + return 0; + } else if (*anorm == 0.) { + return 0; + } + + smlnum = _starpu_dlamch_("Safe minimum"); + +/* Estimate the norm of inv(A). */ + + ainvnm = 0.; + *(unsigned char *)normin = 'N'; + if (onenrm) { + kase1 = 1; + } else { + kase1 = 2; + } + kase = 0; +L10: + _starpu_dlacn2_(n, &work[*n + 1], &work[1], &iwork[1], &ainvnm, &kase, isave); + if (kase != 0) { + if (kase == kase1) { + +/* Multiply by inv(L). */ + + _starpu_dlatrs_("Lower", "No transpose", "Unit", normin, n, &a[a_offset], + lda, &work[1], &sl, &work[(*n << 1) + 1], info); + +/* Multiply by inv(U). */ + + _starpu_dlatrs_("Upper", "No transpose", "Non-unit", normin, n, &a[ + a_offset], lda, &work[1], &su, &work[*n * 3 + 1], info); + } else { + +/* Multiply by inv(U'). */ + + _starpu_dlatrs_("Upper", "Transpose", "Non-unit", normin, n, &a[a_offset], + lda, &work[1], &su, &work[*n * 3 + 1], info); + +/* Multiply by inv(L'). */ + + _starpu_dlatrs_("Lower", "Transpose", "Unit", normin, n, &a[a_offset], + lda, &work[1], &sl, &work[(*n << 1) + 1], info); + } + +/* Divide X by 1/(SL*SU) if doing so will not cause overflow. */ + + scale = sl * su; + *(unsigned char *)normin = 'Y'; + if (scale != 1.) { + ix = _starpu_idamax_(n, &work[1], &c__1); + if (scale < (d__1 = work[ix], abs(d__1)) * smlnum || scale == 0.) + { + goto L20; + } + _starpu_drscl_(n, &scale, &work[1], &c__1); + } + goto L10; + } + +/* Compute the estimate of the reciprocal condition number. */ + + if (ainvnm != 0.) { + *rcond = 1. / ainvnm / *anorm; + } + +L20: + return 0; + +/* End of DGECON */ + +} /* _starpu_dgecon_ */ diff --git a/min-dgels/base/SRC/dgeequ.c b/min-dgels/base/SRC/dgeequ.c new file mode 100644 index 0000000..0c884a1 --- /dev/null +++ b/min-dgels/base/SRC/dgeequ.c @@ -0,0 +1,296 @@ +/* dgeequ.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dgeequ_(integer *m, integer *n, doublereal *a, integer * + lda, doublereal *r__, doublereal *c__, doublereal *rowcnd, doublereal + *colcnd, doublereal *amax, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + doublereal d__1, d__2, d__3; + + /* Local variables */ + integer i__, j; + doublereal rcmin, rcmax; + extern doublereal _starpu_dlamch_(char *); + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + doublereal bignum, smlnum; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DGEEQU computes row and column scalings intended to equilibrate an */ +/* M-by-N matrix A and reduce its condition number. R returns the row */ +/* scale factors and C the column scale factors, chosen to try to make */ +/* the largest element in each row and column of the matrix B with */ +/* elements B(i,j)=R(i)*A(i,j)*C(j) have absolute value 1. */ + +/* R(i) and C(j) are restricted to be between SMLNUM = smallest safe */ +/* number and BIGNUM = largest safe number. Use of these scaling */ +/* factors is not guaranteed to reduce the condition number of A but */ +/* works well in practice. */ + +/* Arguments */ +/* ========= */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix A. M >= 0. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix A. N >= 0. */ + +/* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ +/* The M-by-N matrix whose equilibration factors are */ +/* to be computed. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,M). */ + +/* R (output) DOUBLE PRECISION array, dimension (M) */ +/* If INFO = 0 or INFO > M, R contains the row scale factors */ +/* for A. */ + +/* C (output) DOUBLE PRECISION array, dimension (N) */ +/* If INFO = 0, C contains the column scale factors for A. */ + +/* ROWCND (output) DOUBLE PRECISION */ +/* If INFO = 0 or INFO > M, ROWCND contains the ratio of the */ +/* smallest R(i) to the largest R(i). If ROWCND >= 0.1 and */ +/* AMAX is neither too large nor too small, it is not worth */ +/* scaling by R. */ + +/* COLCND (output) DOUBLE PRECISION */ +/* If INFO = 0, COLCND contains the ratio of the smallest */ +/* C(i) to the largest C(i). If COLCND >= 0.1, it is not */ +/* worth scaling by C. */ + +/* AMAX (output) DOUBLE PRECISION */ +/* Absolute value of largest matrix element. If AMAX is very */ +/* close to overflow or very close to underflow, the matrix */ +/* should be scaled. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > 0: if INFO = i, and i is */ +/* <= M: the i-th row of A is exactly zero */ +/* > M: the (i-M)-th column of A is exactly zero */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --r__; + --c__; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DGEEQU", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + *rowcnd = 1.; + *colcnd = 1.; + *amax = 0.; + return 0; + } + +/* Get machine constants. */ + + smlnum = _starpu_dlamch_("S"); + bignum = 1. / smlnum; + +/* Compute row scale factors. */ + + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + r__[i__] = 0.; +/* L10: */ + } + +/* Find the maximum element in each row. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { +/* Computing MAX */ + d__2 = r__[i__], d__3 = (d__1 = a[i__ + j * a_dim1], abs(d__1)); + r__[i__] = max(d__2,d__3); +/* L20: */ + } +/* L30: */ + } + +/* Find the maximum and minimum scale factors. */ + + rcmin = bignum; + rcmax = 0.; + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing MAX */ + d__1 = rcmax, d__2 = r__[i__]; + rcmax = max(d__1,d__2); +/* Computing MIN */ + d__1 = rcmin, d__2 = r__[i__]; + rcmin = min(d__1,d__2); +/* L40: */ + } + *amax = rcmax; + + if (rcmin == 0.) { + +/* Find the first zero scale factor and return an error code. */ + + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + if (r__[i__] == 0.) { + *info = i__; + return 0; + } +/* L50: */ + } + } else { + +/* Invert the scale factors. */ + + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing MIN */ +/* Computing MAX */ + d__2 = r__[i__]; + d__1 = max(d__2,smlnum); + r__[i__] = 1. / min(d__1,bignum); +/* L60: */ + } + +/* Compute ROWCND = min(R(I)) / max(R(I)) */ + + *rowcnd = max(rcmin,smlnum) / min(rcmax,bignum); + } + +/* Compute column scale factors */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + c__[j] = 0.; +/* L70: */ + } + +/* Find the maximum element in each column, */ +/* assuming the row scaling computed above. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { +/* Computing MAX */ + d__2 = c__[j], d__3 = (d__1 = a[i__ + j * a_dim1], abs(d__1)) * + r__[i__]; + c__[j] = max(d__2,d__3); +/* L80: */ + } +/* L90: */ + } + +/* Find the maximum and minimum scale factors. */ + + rcmin = bignum; + rcmax = 0.; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + d__1 = rcmin, d__2 = c__[j]; + rcmin = min(d__1,d__2); +/* Computing MAX */ + d__1 = rcmax, d__2 = c__[j]; + rcmax = max(d__1,d__2); +/* L100: */ + } + + if (rcmin == 0.) { + +/* Find the first zero scale factor and return an error code. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (c__[j] == 0.) { + *info = *m + j; + return 0; + } +/* L110: */ + } + } else { + +/* Invert the scale factors. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ +/* Computing MAX */ + d__2 = c__[j]; + d__1 = max(d__2,smlnum); + c__[j] = 1. / min(d__1,bignum); +/* L120: */ + } + +/* Compute COLCND = min(C(J)) / max(C(J)) */ + + *colcnd = max(rcmin,smlnum) / min(rcmax,bignum); + } + + return 0; + +/* End of DGEEQU */ + +} /* _starpu_dgeequ_ */ diff --git a/min-dgels/base/SRC/dgeequb.c b/min-dgels/base/SRC/dgeequb.c new file mode 100644 index 0000000..cc426d7 --- /dev/null +++ b/min-dgels/base/SRC/dgeequb.c @@ -0,0 +1,324 @@ +/* dgeequb.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dgeequb_(integer *m, integer *n, doublereal *a, integer * + lda, doublereal *r__, doublereal *c__, doublereal *rowcnd, doublereal + *colcnd, doublereal *amax, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + doublereal d__1, d__2, d__3; + + /* Builtin functions */ + double log(doublereal), pow_di(doublereal *, integer *); + + /* Local variables */ + integer i__, j; + doublereal radix, rcmin, rcmax; + extern doublereal _starpu_dlamch_(char *); + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + doublereal bignum, logrdx, smlnum; + + +/* -- LAPACK routine (version 3.2) -- */ +/* -- Contributed by James Demmel, Deaglan Halligan, Yozo Hida and -- */ +/* -- Jason Riedy of Univ. of California Berkeley. -- */ +/* -- November 2008 -- */ + +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley and NAG Ltd. -- */ + +/* .. */ +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DGEEQUB computes row and column scalings intended to equilibrate an */ +/* M-by-N matrix A and reduce its condition number. R returns the row */ +/* scale factors and C the column scale factors, chosen to try to make */ +/* the largest element in each row and column of the matrix B with */ +/* elements B(i,j)=R(i)*A(i,j)*C(j) have an absolute value of at most */ +/* the radix. */ + +/* R(i) and C(j) are restricted to be a power of the radix between */ +/* SMLNUM = smallest safe number and BIGNUM = largest safe number. Use */ +/* of these scaling factors is not guaranteed to reduce the condition */ +/* number of A but works well in practice. */ + +/* This routine differs from DGEEQU by restricting the scaling factors */ +/* to a power of the radix. Baring over- and underflow, scaling by */ +/* these factors introduces no additional rounding errors. However, the */ +/* scaled entries' magnitured are no longer approximately 1 but lie */ +/* between sqrt(radix) and 1/sqrt(radix). */ + +/* Arguments */ +/* ========= */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix A. M >= 0. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix A. N >= 0. */ + +/* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ +/* The M-by-N matrix whose equilibration factors are */ +/* to be computed. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,M). */ + +/* R (output) DOUBLE PRECISION array, dimension (M) */ +/* If INFO = 0 or INFO > M, R contains the row scale factors */ +/* for A. */ + +/* C (output) DOUBLE PRECISION array, dimension (N) */ +/* If INFO = 0, C contains the column scale factors for A. */ + +/* ROWCND (output) DOUBLE PRECISION */ +/* If INFO = 0 or INFO > M, ROWCND contains the ratio of the */ +/* smallest R(i) to the largest R(i). If ROWCND >= 0.1 and */ +/* AMAX is neither too large nor too small, it is not worth */ +/* scaling by R. */ + +/* COLCND (output) DOUBLE PRECISION */ +/* If INFO = 0, COLCND contains the ratio of the smallest */ +/* C(i) to the largest C(i). If COLCND >= 0.1, it is not */ +/* worth scaling by C. */ + +/* AMAX (output) DOUBLE PRECISION */ +/* Absolute value of largest matrix element. If AMAX is very */ +/* close to overflow or very close to underflow, the matrix */ +/* should be scaled. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > 0: if INFO = i, and i is */ +/* <= M: the i-th row of A is exactly zero */ +/* > M: the (i-M)-th column of A is exactly zero */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --r__; + --c__; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DGEEQUB", &i__1); + return 0; + } + +/* Quick return if possible. */ + + if (*m == 0 || *n == 0) { + *rowcnd = 1.; + *colcnd = 1.; + *amax = 0.; + return 0; + } + +/* Get machine constants. Assume SMLNUM is a power of the radix. */ + + smlnum = _starpu_dlamch_("S"); + bignum = 1. / smlnum; + radix = _starpu_dlamch_("B"); + logrdx = log(radix); + +/* Compute row scale factors. */ + + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + r__[i__] = 0.; +/* L10: */ + } + +/* Find the maximum element in each row. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { +/* Computing MAX */ + d__2 = r__[i__], d__3 = (d__1 = a[i__ + j * a_dim1], abs(d__1)); + r__[i__] = max(d__2,d__3); +/* L20: */ + } +/* L30: */ + } + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + if (r__[i__] > 0.) { + i__2 = (integer) (log(r__[i__]) / logrdx); + r__[i__] = pow_di(&radix, &i__2); + } + } + +/* Find the maximum and minimum scale factors. */ + + rcmin = bignum; + rcmax = 0.; + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing MAX */ + d__1 = rcmax, d__2 = r__[i__]; + rcmax = max(d__1,d__2); +/* Computing MIN */ + d__1 = rcmin, d__2 = r__[i__]; + rcmin = min(d__1,d__2); +/* L40: */ + } + *amax = rcmax; + + if (rcmin == 0.) { + +/* Find the first zero scale factor and return an error code. */ + + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + if (r__[i__] == 0.) { + *info = i__; + return 0; + } +/* L50: */ + } + } else { + +/* Invert the scale factors. */ + + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing MIN */ +/* Computing MAX */ + d__2 = r__[i__]; + d__1 = max(d__2,smlnum); + r__[i__] = 1. / min(d__1,bignum); +/* L60: */ + } + +/* Compute ROWCND = min(R(I)) / max(R(I)). */ + + *rowcnd = max(rcmin,smlnum) / min(rcmax,bignum); + } + +/* Compute column scale factors */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + c__[j] = 0.; +/* L70: */ + } + +/* Find the maximum element in each column, */ +/* assuming the row scaling computed above. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { +/* Computing MAX */ + d__2 = c__[j], d__3 = (d__1 = a[i__ + j * a_dim1], abs(d__1)) * + r__[i__]; + c__[j] = max(d__2,d__3); +/* L80: */ + } + if (c__[j] > 0.) { + i__2 = (integer) (log(c__[j]) / logrdx); + c__[j] = pow_di(&radix, &i__2); + } +/* L90: */ + } + +/* Find the maximum and minimum scale factors. */ + + rcmin = bignum; + rcmax = 0.; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + d__1 = rcmin, d__2 = c__[j]; + rcmin = min(d__1,d__2); +/* Computing MAX */ + d__1 = rcmax, d__2 = c__[j]; + rcmax = max(d__1,d__2); +/* L100: */ + } + + if (rcmin == 0.) { + +/* Find the first zero scale factor and return an error code. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (c__[j] == 0.) { + *info = *m + j; + return 0; + } +/* L110: */ + } + } else { + +/* Invert the scale factors. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ +/* Computing MAX */ + d__2 = c__[j]; + d__1 = max(d__2,smlnum); + c__[j] = 1. / min(d__1,bignum); +/* L120: */ + } + +/* Compute COLCND = min(C(J)) / max(C(J)). */ + + *colcnd = max(rcmin,smlnum) / min(rcmax,bignum); + } + + return 0; + +/* End of DGEEQUB */ + +} /* _starpu_dgeequb_ */ diff --git a/min-dgels/base/SRC/dgees.c b/min-dgels/base/SRC/dgees.c new file mode 100644 index 0000000..d527e65 --- /dev/null +++ b/min-dgels/base/SRC/dgees.c @@ -0,0 +1,549 @@ +/* dgees.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c__0 = 0; +static integer c_n1 = -1; + +/* Subroutine */ int _starpu_dgees_(char *jobvs, char *sort, L_fp select, integer *n, + doublereal *a, integer *lda, integer *sdim, doublereal *wr, + doublereal *wi, doublereal *vs, integer *ldvs, doublereal *work, + integer *lwork, logical *bwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, vs_dim1, vs_offset, i__1, i__2, i__3; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + integer i__; + doublereal s; + integer i1, i2, ip, ihi, ilo; + doublereal dum[1], eps, sep; + integer ibal; + doublereal anrm; + integer idum[1], ierr, itau, iwrk, inxt, icond, ieval; + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *), _starpu_dswap_(integer *, doublereal *, integer + *, doublereal *, integer *); + logical cursl; + extern /* Subroutine */ int _starpu_dlabad_(doublereal *, doublereal *), _starpu_dgebak_( + char *, char *, integer *, integer *, integer *, doublereal *, + integer *, doublereal *, integer *, integer *), + _starpu_dgebal_(char *, integer *, doublereal *, integer *, integer *, + integer *, doublereal *, integer *); + logical lst2sl, scalea; + extern doublereal _starpu_dlamch_(char *); + doublereal cscale; + extern doublereal _starpu_dlange_(char *, integer *, integer *, doublereal *, + integer *, doublereal *); + extern /* Subroutine */ int _starpu_dgehrd_(integer *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + integer *), _starpu_dlascl_(char *, integer *, integer *, doublereal *, + doublereal *, integer *, integer *, doublereal *, integer *, + integer *), _starpu_dlacpy_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *), + _starpu_xerbla_(char *, integer *); + extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *); + doublereal bignum; + extern /* Subroutine */ int _starpu_dorghr_(integer *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + integer *), _starpu_dhseqr_(char *, char *, integer *, integer *, integer + *, doublereal *, integer *, doublereal *, doublereal *, + doublereal *, integer *, doublereal *, integer *, integer *), _starpu_dtrsen_(char *, char *, logical *, integer *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, doublereal *, doublereal *, doublereal *, + integer *, integer *, integer *, integer *); + logical lastsl; + integer minwrk, maxwrk; + doublereal smlnum; + integer hswork; + logical wantst, lquery, wantvs; + + +/* -- LAPACK driver routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ +/* .. Function Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DGEES computes for an N-by-N real nonsymmetric matrix A, the */ +/* eigenvalues, the real Schur form T, and, optionally, the matrix of */ +/* Schur vectors Z. This gives the Schur factorization A = Z*T*(Z**T). */ + +/* Optionally, it also orders the eigenvalues on the diagonal of the */ +/* real Schur form so that selected eigenvalues are at the top left. */ +/* The leading columns of Z then form an orthonormal basis for the */ +/* invariant subspace corresponding to the selected eigenvalues. */ + +/* A matrix is in real Schur form if it is upper quasi-triangular with */ +/* 1-by-1 and 2-by-2 blocks. 2-by-2 blocks will be standardized in the */ +/* form */ +/* [ a b ] */ +/* [ c a ] */ + +/* where b*c < 0. The eigenvalues of such a block are a +- sqrt(bc). */ + +/* Arguments */ +/* ========= */ + +/* JOBVS (input) CHARACTER*1 */ +/* = 'N': Schur vectors are not computed; */ +/* = 'V': Schur vectors are computed. */ + +/* SORT (input) CHARACTER*1 */ +/* Specifies whether or not to order the eigenvalues on the */ +/* diagonal of the Schur form. */ +/* = 'N': Eigenvalues are not ordered; */ +/* = 'S': Eigenvalues are ordered (see SELECT). */ + +/* SELECT (external procedure) LOGICAL FUNCTION of two DOUBLE PRECISION arguments */ +/* SELECT must be declared EXTERNAL in the calling subroutine. */ +/* If SORT = 'S', SELECT is used to select eigenvalues to sort */ +/* to the top left of the Schur form. */ +/* If SORT = 'N', SELECT is not referenced. */ +/* An eigenvalue WR(j)+sqrt(-1)*WI(j) is selected if */ +/* SELECT(WR(j),WI(j)) is true; i.e., if either one of a complex */ +/* conjugate pair of eigenvalues is selected, then both complex */ +/* eigenvalues are selected. */ +/* Note that a selected complex eigenvalue may no longer */ +/* satisfy SELECT(WR(j),WI(j)) = .TRUE. after ordering, since */ +/* ordering may change the value of complex eigenvalues */ +/* (especially if the eigenvalue is ill-conditioned); in this */ +/* case INFO is set to N+2 (see INFO below). */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the N-by-N matrix A. */ +/* On exit, A has been overwritten by its real Schur form T. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,N). */ + +/* SDIM (output) INTEGER */ +/* If SORT = 'N', SDIM = 0. */ +/* If SORT = 'S', SDIM = number of eigenvalues (after sorting) */ +/* for which SELECT is true. (Complex conjugate */ +/* pairs for which SELECT is true for either */ +/* eigenvalue count as 2.) */ + +/* WR (output) DOUBLE PRECISION array, dimension (N) */ +/* WI (output) DOUBLE PRECISION array, dimension (N) */ +/* WR and WI contain the real and imaginary parts, */ +/* respectively, of the computed eigenvalues in the same order */ +/* that they appear on the diagonal of the output Schur form T. */ +/* Complex conjugate pairs of eigenvalues will appear */ +/* consecutively with the eigenvalue having the positive */ +/* imaginary part first. */ + +/* VS (output) DOUBLE PRECISION array, dimension (LDVS,N) */ +/* If JOBVS = 'V', VS contains the orthogonal matrix Z of Schur */ +/* vectors. */ +/* If JOBVS = 'N', VS is not referenced. */ + +/* LDVS (input) INTEGER */ +/* The leading dimension of the array VS. LDVS >= 1; if */ +/* JOBVS = 'V', LDVS >= N. */ + +/* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* On exit, if INFO = 0, WORK(1) contains the optimal LWORK. */ + +/* LWORK (input) INTEGER */ +/* The dimension of the array WORK. LWORK >= max(1,3*N). */ +/* For good performance, LWORK must generally be larger. */ + +/* If LWORK = -1, then a workspace query is assumed; the routine */ +/* only calculates the optimal size of the WORK array, returns */ +/* this value as the first entry of the WORK array, and no error */ +/* message related to LWORK is issued by XERBLA. */ + +/* BWORK (workspace) LOGICAL array, dimension (N) */ +/* Not referenced if SORT = 'N'. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > 0: if INFO = i, and i is */ +/* <= N: the QR algorithm failed to compute all the */ +/* eigenvalues; elements 1:ILO-1 and i+1:N of WR and WI */ +/* contain those eigenvalues which have converged; if */ +/* JOBVS = 'V', VS contains the matrix which reduces A */ +/* to its partially converged Schur form. */ +/* = N+1: the eigenvalues could not be reordered because some */ +/* eigenvalues were too close to separate (the problem */ +/* is very ill-conditioned); */ +/* = N+2: after reordering, roundoff changed values of some */ +/* complex eigenvalues so that leading eigenvalues in */ +/* the Schur form no longer satisfy SELECT=.TRUE. This */ +/* could also be caused by underflow due to scaling. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --wr; + --wi; + vs_dim1 = *ldvs; + vs_offset = 1 + vs_dim1; + vs -= vs_offset; + --work; + --bwork; + + /* Function Body */ + *info = 0; + lquery = *lwork == -1; + wantvs = _starpu_lsame_(jobvs, "V"); + wantst = _starpu_lsame_(sort, "S"); + if (! wantvs && ! _starpu_lsame_(jobvs, "N")) { + *info = -1; + } else if (! wantst && ! _starpu_lsame_(sort, "N")) { + *info = -2; + } else if (*n < 0) { + *info = -4; + } else if (*lda < max(1,*n)) { + *info = -6; + } else if (*ldvs < 1 || wantvs && *ldvs < *n) { + *info = -11; + } + +/* Compute workspace */ +/* (Note: Comments in the code beginning "Workspace:" describe the */ +/* minimal amount of workspace needed at that point in the code, */ +/* as well as the preferred amount for good performance. */ +/* NB refers to the optimal block size for the immediately */ +/* following subroutine, as returned by ILAENV. */ +/* HSWORK refers to the workspace preferred by DHSEQR, as */ +/* calculated below. HSWORK is computed assuming ILO=1 and IHI=N, */ +/* the worst case.) */ + + if (*info == 0) { + if (*n == 0) { + minwrk = 1; + maxwrk = 1; + } else { + maxwrk = (*n << 1) + *n * _starpu_ilaenv_(&c__1, "DGEHRD", " ", n, &c__1, + n, &c__0); + minwrk = *n * 3; + + _starpu_dhseqr_("S", jobvs, n, &c__1, n, &a[a_offset], lda, &wr[1], &wi[1] +, &vs[vs_offset], ldvs, &work[1], &c_n1, &ieval); + hswork = (integer) work[1]; + + if (! wantvs) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n + hswork; + maxwrk = max(i__1,i__2); + } else { +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*n << 1) + (*n - 1) * _starpu_ilaenv_(&c__1, + "DORGHR", " ", n, &c__1, n, &c_n1); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n + hswork; + maxwrk = max(i__1,i__2); + } + } + work[1] = (doublereal) maxwrk; + + if (*lwork < minwrk && ! lquery) { + *info = -13; + } + } + + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DGEES ", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + *sdim = 0; + return 0; + } + +/* Get machine constants */ + + eps = _starpu_dlamch_("P"); + smlnum = _starpu_dlamch_("S"); + bignum = 1. / smlnum; + _starpu_dlabad_(&smlnum, &bignum); + smlnum = sqrt(smlnum) / eps; + bignum = 1. / smlnum; + +/* Scale A if max element outside range [SMLNUM,BIGNUM] */ + + anrm = _starpu_dlange_("M", n, n, &a[a_offset], lda, dum); + scalea = FALSE_; + if (anrm > 0. && anrm < smlnum) { + scalea = TRUE_; + cscale = smlnum; + } else if (anrm > bignum) { + scalea = TRUE_; + cscale = bignum; + } + if (scalea) { + _starpu_dlascl_("G", &c__0, &c__0, &anrm, &cscale, n, n, &a[a_offset], lda, & + ierr); + } + +/* Permute the matrix to make it more nearly triangular */ +/* (Workspace: need N) */ + + ibal = 1; + _starpu_dgebal_("P", n, &a[a_offset], lda, &ilo, &ihi, &work[ibal], &ierr); + +/* Reduce to upper Hessenberg form */ +/* (Workspace: need 3*N, prefer 2*N+N*NB) */ + + itau = *n + ibal; + iwrk = *n + itau; + i__1 = *lwork - iwrk + 1; + _starpu_dgehrd_(n, &ilo, &ihi, &a[a_offset], lda, &work[itau], &work[iwrk], &i__1, + &ierr); + + if (wantvs) { + +/* Copy Householder vectors to VS */ + + _starpu_dlacpy_("L", n, n, &a[a_offset], lda, &vs[vs_offset], ldvs) + ; + +/* Generate orthogonal matrix in VS */ +/* (Workspace: need 3*N-1, prefer 2*N+(N-1)*NB) */ + + i__1 = *lwork - iwrk + 1; + _starpu_dorghr_(n, &ilo, &ihi, &vs[vs_offset], ldvs, &work[itau], &work[iwrk], + &i__1, &ierr); + } + + *sdim = 0; + +/* Perform QR iteration, accumulating Schur vectors in VS if desired */ +/* (Workspace: need N+1, prefer N+HSWORK (see comments) ) */ + + iwrk = itau; + i__1 = *lwork - iwrk + 1; + _starpu_dhseqr_("S", jobvs, n, &ilo, &ihi, &a[a_offset], lda, &wr[1], &wi[1], &vs[ + vs_offset], ldvs, &work[iwrk], &i__1, &ieval); + if (ieval > 0) { + *info = ieval; + } + +/* Sort eigenvalues if desired */ + + if (wantst && *info == 0) { + if (scalea) { + _starpu_dlascl_("G", &c__0, &c__0, &cscale, &anrm, n, &c__1, &wr[1], n, & + ierr); + _starpu_dlascl_("G", &c__0, &c__0, &cscale, &anrm, n, &c__1, &wi[1], n, & + ierr); + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + bwork[i__] = (*select)(&wr[i__], &wi[i__]); +/* L10: */ + } + +/* Reorder eigenvalues and transform Schur vectors */ +/* (Workspace: none needed) */ + + i__1 = *lwork - iwrk + 1; + _starpu_dtrsen_("N", jobvs, &bwork[1], n, &a[a_offset], lda, &vs[vs_offset], + ldvs, &wr[1], &wi[1], sdim, &s, &sep, &work[iwrk], &i__1, + idum, &c__1, &icond); + if (icond > 0) { + *info = *n + icond; + } + } + + if (wantvs) { + +/* Undo balancing */ +/* (Workspace: need N) */ + + _starpu_dgebak_("P", "R", n, &ilo, &ihi, &work[ibal], n, &vs[vs_offset], ldvs, + &ierr); + } + + if (scalea) { + +/* Undo scaling for the Schur form of A */ + + _starpu_dlascl_("H", &c__0, &c__0, &cscale, &anrm, n, n, &a[a_offset], lda, & + ierr); + i__1 = *lda + 1; + _starpu_dcopy_(n, &a[a_offset], &i__1, &wr[1], &c__1); + if (cscale == smlnum) { + +/* If scaling back towards underflow, adjust WI if an */ +/* offdiagonal element of a 2-by-2 block in the Schur form */ +/* underflows. */ + + if (ieval > 0) { + i1 = ieval + 1; + i2 = ihi - 1; + i__1 = ilo - 1; +/* Computing MAX */ + i__3 = ilo - 1; + i__2 = max(i__3,1); + _starpu_dlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wi[ + 1], &i__2, &ierr); + } else if (wantst) { + i1 = 1; + i2 = *n - 1; + } else { + i1 = ilo; + i2 = ihi - 1; + } + inxt = i1 - 1; + i__1 = i2; + for (i__ = i1; i__ <= i__1; ++i__) { + if (i__ < inxt) { + goto L20; + } + if (wi[i__] == 0.) { + inxt = i__ + 1; + } else { + if (a[i__ + 1 + i__ * a_dim1] == 0.) { + wi[i__] = 0.; + wi[i__ + 1] = 0.; + } else if (a[i__ + 1 + i__ * a_dim1] != 0. && a[i__ + ( + i__ + 1) * a_dim1] == 0.) { + wi[i__] = 0.; + wi[i__ + 1] = 0.; + if (i__ > 1) { + i__2 = i__ - 1; + _starpu_dswap_(&i__2, &a[i__ * a_dim1 + 1], &c__1, &a[( + i__ + 1) * a_dim1 + 1], &c__1); + } + if (*n > i__ + 1) { + i__2 = *n - i__ - 1; + _starpu_dswap_(&i__2, &a[i__ + (i__ + 2) * a_dim1], lda, & + a[i__ + 1 + (i__ + 2) * a_dim1], lda); + } + if (wantvs) { + _starpu_dswap_(n, &vs[i__ * vs_dim1 + 1], &c__1, &vs[(i__ + + 1) * vs_dim1 + 1], &c__1); + } + a[i__ + (i__ + 1) * a_dim1] = a[i__ + 1 + i__ * + a_dim1]; + a[i__ + 1 + i__ * a_dim1] = 0.; + } + inxt = i__ + 2; + } +L20: + ; + } + } + +/* Undo scaling for the imaginary part of the eigenvalues */ + + i__1 = *n - ieval; +/* Computing MAX */ + i__3 = *n - ieval; + i__2 = max(i__3,1); + _starpu_dlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wi[ieval + + 1], &i__2, &ierr); + } + + if (wantst && *info == 0) { + +/* Check if reordering successful */ + + lastsl = TRUE_; + lst2sl = TRUE_; + *sdim = 0; + ip = 0; + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + cursl = (*select)(&wr[i__], &wi[i__]); + if (wi[i__] == 0.) { + if (cursl) { + ++(*sdim); + } + ip = 0; + if (cursl && ! lastsl) { + *info = *n + 2; + } + } else { + if (ip == 1) { + +/* Last eigenvalue of conjugate pair */ + + cursl = cursl || lastsl; + lastsl = cursl; + if (cursl) { + *sdim += 2; + } + ip = -1; + if (cursl && ! lst2sl) { + *info = *n + 2; + } + } else { + +/* First eigenvalue of conjugate pair */ + + ip = 1; + } + } + lst2sl = lastsl; + lastsl = cursl; +/* L30: */ + } + } + + work[1] = (doublereal) maxwrk; + return 0; + +/* End of DGEES */ + +} /* _starpu_dgees_ */ diff --git a/min-dgels/base/SRC/dgeesx.c b/min-dgels/base/SRC/dgeesx.c new file mode 100644 index 0000000..da17def --- /dev/null +++ b/min-dgels/base/SRC/dgeesx.c @@ -0,0 +1,649 @@ +/* dgeesx.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c__0 = 0; +static integer c_n1 = -1; + +/* Subroutine */ int _starpu_dgeesx_(char *jobvs, char *sort, L_fp select, char * + sense, integer *n, doublereal *a, integer *lda, integer *sdim, + doublereal *wr, doublereal *wi, doublereal *vs, integer *ldvs, + doublereal *rconde, doublereal *rcondv, doublereal *work, integer * + lwork, integer *iwork, integer *liwork, logical *bwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, vs_dim1, vs_offset, i__1, i__2, i__3; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + integer i__, i1, i2, ip, ihi, ilo; + doublereal dum[1], eps; + integer ibal; + doublereal anrm; + integer ierr, itau, iwrk, lwrk, inxt, icond, ieval; + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *), _starpu_dswap_(integer *, doublereal *, integer + *, doublereal *, integer *); + logical cursl; + integer liwrk; + extern /* Subroutine */ int _starpu_dlabad_(doublereal *, doublereal *), _starpu_dgebak_( + char *, char *, integer *, integer *, integer *, doublereal *, + integer *, doublereal *, integer *, integer *), + _starpu_dgebal_(char *, integer *, doublereal *, integer *, integer *, + integer *, doublereal *, integer *); + logical lst2sl, scalea; + extern doublereal _starpu_dlamch_(char *); + doublereal cscale; + extern doublereal _starpu_dlange_(char *, integer *, integer *, doublereal *, + integer *, doublereal *); + extern /* Subroutine */ int _starpu_dgehrd_(integer *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + integer *), _starpu_dlascl_(char *, integer *, integer *, doublereal *, + doublereal *, integer *, integer *, doublereal *, integer *, + integer *), _starpu_dlacpy_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *), + _starpu_xerbla_(char *, integer *); + extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *); + doublereal bignum; + extern /* Subroutine */ int _starpu_dorghr_(integer *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + integer *), _starpu_dhseqr_(char *, char *, integer *, integer *, integer + *, doublereal *, integer *, doublereal *, doublereal *, + doublereal *, integer *, doublereal *, integer *, integer *); + logical wantsb; + extern /* Subroutine */ int _starpu_dtrsen_(char *, char *, logical *, integer *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, doublereal *, doublereal *, doublereal *, + integer *, integer *, integer *, integer *); + logical wantse, lastsl; + integer minwrk, maxwrk; + logical wantsn; + doublereal smlnum; + integer hswork; + logical wantst, lquery, wantsv, wantvs; + + +/* -- LAPACK driver routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ +/* .. Function Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DGEESX computes for an N-by-N real nonsymmetric matrix A, the */ +/* eigenvalues, the real Schur form T, and, optionally, the matrix of */ +/* Schur vectors Z. This gives the Schur factorization A = Z*T*(Z**T). */ + +/* Optionally, it also orders the eigenvalues on the diagonal of the */ +/* real Schur form so that selected eigenvalues are at the top left; */ +/* computes a reciprocal condition number for the average of the */ +/* selected eigenvalues (RCONDE); and computes a reciprocal condition */ +/* number for the right invariant subspace corresponding to the */ +/* selected eigenvalues (RCONDV). The leading columns of Z form an */ +/* orthonormal basis for this invariant subspace. */ + +/* For further explanation of the reciprocal condition numbers RCONDE */ +/* and RCONDV, see Section 4.10 of the LAPACK Users' Guide (where */ +/* these quantities are called s and sep respectively). */ + +/* A real matrix is in real Schur form if it is upper quasi-triangular */ +/* with 1-by-1 and 2-by-2 blocks. 2-by-2 blocks will be standardized in */ +/* the form */ +/* [ a b ] */ +/* [ c a ] */ + +/* where b*c < 0. The eigenvalues of such a block are a +- sqrt(bc). */ + +/* Arguments */ +/* ========= */ + +/* JOBVS (input) CHARACTER*1 */ +/* = 'N': Schur vectors are not computed; */ +/* = 'V': Schur vectors are computed. */ + +/* SORT (input) CHARACTER*1 */ +/* Specifies whether or not to order the eigenvalues on the */ +/* diagonal of the Schur form. */ +/* = 'N': Eigenvalues are not ordered; */ +/* = 'S': Eigenvalues are ordered (see SELECT). */ + +/* SELECT (external procedure) LOGICAL FUNCTION of two DOUBLE PRECISION arguments */ +/* SELECT must be declared EXTERNAL in the calling subroutine. */ +/* If SORT = 'S', SELECT is used to select eigenvalues to sort */ +/* to the top left of the Schur form. */ +/* If SORT = 'N', SELECT is not referenced. */ +/* An eigenvalue WR(j)+sqrt(-1)*WI(j) is selected if */ +/* SELECT(WR(j),WI(j)) is true; i.e., if either one of a */ +/* complex conjugate pair of eigenvalues is selected, then both */ +/* are. Note that a selected complex eigenvalue may no longer */ +/* satisfy SELECT(WR(j),WI(j)) = .TRUE. after ordering, since */ +/* ordering may change the value of complex eigenvalues */ +/* (especially if the eigenvalue is ill-conditioned); in this */ +/* case INFO may be set to N+3 (see INFO below). */ + +/* SENSE (input) CHARACTER*1 */ +/* Determines which reciprocal condition numbers are computed. */ +/* = 'N': None are computed; */ +/* = 'E': Computed for average of selected eigenvalues only; */ +/* = 'V': Computed for selected right invariant subspace only; */ +/* = 'B': Computed for both. */ +/* If SENSE = 'E', 'V' or 'B', SORT must equal 'S'. */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA, N) */ +/* On entry, the N-by-N matrix A. */ +/* On exit, A is overwritten by its real Schur form T. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,N). */ + +/* SDIM (output) INTEGER */ +/* If SORT = 'N', SDIM = 0. */ +/* If SORT = 'S', SDIM = number of eigenvalues (after sorting) */ +/* for which SELECT is true. (Complex conjugate */ +/* pairs for which SELECT is true for either */ +/* eigenvalue count as 2.) */ + +/* WR (output) DOUBLE PRECISION array, dimension (N) */ +/* WI (output) DOUBLE PRECISION array, dimension (N) */ +/* WR and WI contain the real and imaginary parts, respectively, */ +/* of the computed eigenvalues, in the same order that they */ +/* appear on the diagonal of the output Schur form T. Complex */ +/* conjugate pairs of eigenvalues appear consecutively with the */ +/* eigenvalue having the positive imaginary part first. */ + +/* VS (output) DOUBLE PRECISION array, dimension (LDVS,N) */ +/* If JOBVS = 'V', VS contains the orthogonal matrix Z of Schur */ +/* vectors. */ +/* If JOBVS = 'N', VS is not referenced. */ + +/* LDVS (input) INTEGER */ +/* The leading dimension of the array VS. LDVS >= 1, and if */ +/* JOBVS = 'V', LDVS >= N. */ + +/* RCONDE (output) DOUBLE PRECISION */ +/* If SENSE = 'E' or 'B', RCONDE contains the reciprocal */ +/* condition number for the average of the selected eigenvalues. */ +/* Not referenced if SENSE = 'N' or 'V'. */ + +/* RCONDV (output) DOUBLE PRECISION */ +/* If SENSE = 'V' or 'B', RCONDV contains the reciprocal */ +/* condition number for the selected right invariant subspace. */ +/* Not referenced if SENSE = 'N' or 'E'. */ + +/* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ + +/* LWORK (input) INTEGER */ +/* The dimension of the array WORK. LWORK >= max(1,3*N). */ +/* Also, if SENSE = 'E' or 'V' or 'B', */ +/* LWORK >= N+2*SDIM*(N-SDIM), where SDIM is the number of */ +/* selected eigenvalues computed by this routine. Note that */ +/* N+2*SDIM*(N-SDIM) <= N+N*N/2. Note also that an error is only */ +/* returned if LWORK < max(1,3*N), but if SENSE = 'E' or 'V' or */ +/* 'B' this may not be large enough. */ +/* For good performance, LWORK must generally be larger. */ + +/* If LWORK = -1, then a workspace query is assumed; the routine */ +/* only calculates upper bounds on the optimal sizes of the */ +/* arrays WORK and IWORK, returns these values as the first */ +/* entries of the WORK and IWORK arrays, and no error messages */ +/* related to LWORK or LIWORK are issued by XERBLA. */ + +/* IWORK (workspace/output) INTEGER array, dimension (MAX(1,LIWORK)) */ +/* On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. */ + +/* LIWORK (input) INTEGER */ +/* The dimension of the array IWORK. */ +/* LIWORK >= 1; if SENSE = 'V' or 'B', LIWORK >= SDIM*(N-SDIM). */ +/* Note that SDIM*(N-SDIM) <= N*N/4. Note also that an error is */ +/* only returned if LIWORK < 1, but if SENSE = 'V' or 'B' this */ +/* may not be large enough. */ + +/* If LIWORK = -1, then a workspace query is assumed; the */ +/* routine only calculates upper bounds on the optimal sizes of */ +/* the arrays WORK and IWORK, returns these values as the first */ +/* entries of the WORK and IWORK arrays, and no error messages */ +/* related to LWORK or LIWORK are issued by XERBLA. */ + +/* BWORK (workspace) LOGICAL array, dimension (N) */ +/* Not referenced if SORT = 'N'. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > 0: if INFO = i, and i is */ +/* <= N: the QR algorithm failed to compute all the */ +/* eigenvalues; elements 1:ILO-1 and i+1:N of WR and WI */ +/* contain those eigenvalues which have converged; if */ +/* JOBVS = 'V', VS contains the transformation which */ +/* reduces A to its partially converged Schur form. */ +/* = N+1: the eigenvalues could not be reordered because some */ +/* eigenvalues were too close to separate (the problem */ +/* is very ill-conditioned); */ +/* = N+2: after reordering, roundoff changed values of some */ +/* complex eigenvalues so that leading eigenvalues in */ +/* the Schur form no longer satisfy SELECT=.TRUE. This */ +/* could also be caused by underflow due to scaling. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --wr; + --wi; + vs_dim1 = *ldvs; + vs_offset = 1 + vs_dim1; + vs -= vs_offset; + --work; + --iwork; + --bwork; + + /* Function Body */ + *info = 0; + wantvs = _starpu_lsame_(jobvs, "V"); + wantst = _starpu_lsame_(sort, "S"); + wantsn = _starpu_lsame_(sense, "N"); + wantse = _starpu_lsame_(sense, "E"); + wantsv = _starpu_lsame_(sense, "V"); + wantsb = _starpu_lsame_(sense, "B"); + lquery = *lwork == -1 || *liwork == -1; + if (! wantvs && ! _starpu_lsame_(jobvs, "N")) { + *info = -1; + } else if (! wantst && ! _starpu_lsame_(sort, "N")) { + *info = -2; + } else if (! (wantsn || wantse || wantsv || wantsb) || ! wantst && ! + wantsn) { + *info = -4; + } else if (*n < 0) { + *info = -5; + } else if (*lda < max(1,*n)) { + *info = -7; + } else if (*ldvs < 1 || wantvs && *ldvs < *n) { + *info = -12; + } + +/* Compute workspace */ +/* (Note: Comments in the code beginning "RWorkspace:" describe the */ +/* minimal amount of real workspace needed at that point in the */ +/* code, as well as the preferred amount for good performance. */ +/* IWorkspace refers to integer workspace. */ +/* NB refers to the optimal block size for the immediately */ +/* following subroutine, as returned by ILAENV. */ +/* HSWORK refers to the workspace preferred by DHSEQR, as */ +/* calculated below. HSWORK is computed assuming ILO=1 and IHI=N, */ +/* the worst case. */ +/* If SENSE = 'E', 'V' or 'B', then the amount of workspace needed */ +/* depends on SDIM, which is computed by the routine DTRSEN later */ +/* in the code.) */ + + if (*info == 0) { + liwrk = 1; + if (*n == 0) { + minwrk = 1; + lwrk = 1; + } else { + maxwrk = (*n << 1) + *n * _starpu_ilaenv_(&c__1, "DGEHRD", " ", n, &c__1, + n, &c__0); + minwrk = *n * 3; + + _starpu_dhseqr_("S", jobvs, n, &c__1, n, &a[a_offset], lda, &wr[1], &wi[1] +, &vs[vs_offset], ldvs, &work[1], &c_n1, &ieval); + hswork = (integer) work[1]; + + if (! wantvs) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n + hswork; + maxwrk = max(i__1,i__2); + } else { +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*n << 1) + (*n - 1) * _starpu_ilaenv_(&c__1, + "DORGHR", " ", n, &c__1, n, &c_n1); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n + hswork; + maxwrk = max(i__1,i__2); + } + lwrk = maxwrk; + if (! wantsn) { +/* Computing MAX */ + i__1 = lwrk, i__2 = *n + *n * *n / 2; + lwrk = max(i__1,i__2); + } + if (wantsv || wantsb) { + liwrk = *n * *n / 4; + } + } + iwork[1] = liwrk; + work[1] = (doublereal) lwrk; + + if (*lwork < minwrk && ! lquery) { + *info = -16; + } else if (*liwork < 1 && ! lquery) { + *info = -18; + } + } + + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DGEESX", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + *sdim = 0; + return 0; + } + +/* Get machine constants */ + + eps = _starpu_dlamch_("P"); + smlnum = _starpu_dlamch_("S"); + bignum = 1. / smlnum; + _starpu_dlabad_(&smlnum, &bignum); + smlnum = sqrt(smlnum) / eps; + bignum = 1. / smlnum; + +/* Scale A if max element outside range [SMLNUM,BIGNUM] */ + + anrm = _starpu_dlange_("M", n, n, &a[a_offset], lda, dum); + scalea = FALSE_; + if (anrm > 0. && anrm < smlnum) { + scalea = TRUE_; + cscale = smlnum; + } else if (anrm > bignum) { + scalea = TRUE_; + cscale = bignum; + } + if (scalea) { + _starpu_dlascl_("G", &c__0, &c__0, &anrm, &cscale, n, n, &a[a_offset], lda, & + ierr); + } + +/* Permute the matrix to make it more nearly triangular */ +/* (RWorkspace: need N) */ + + ibal = 1; + _starpu_dgebal_("P", n, &a[a_offset], lda, &ilo, &ihi, &work[ibal], &ierr); + +/* Reduce to upper Hessenberg form */ +/* (RWorkspace: need 3*N, prefer 2*N+N*NB) */ + + itau = *n + ibal; + iwrk = *n + itau; + i__1 = *lwork - iwrk + 1; + _starpu_dgehrd_(n, &ilo, &ihi, &a[a_offset], lda, &work[itau], &work[iwrk], &i__1, + &ierr); + + if (wantvs) { + +/* Copy Householder vectors to VS */ + + _starpu_dlacpy_("L", n, n, &a[a_offset], lda, &vs[vs_offset], ldvs) + ; + +/* Generate orthogonal matrix in VS */ +/* (RWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB) */ + + i__1 = *lwork - iwrk + 1; + _starpu_dorghr_(n, &ilo, &ihi, &vs[vs_offset], ldvs, &work[itau], &work[iwrk], + &i__1, &ierr); + } + + *sdim = 0; + +/* Perform QR iteration, accumulating Schur vectors in VS if desired */ +/* (RWorkspace: need N+1, prefer N+HSWORK (see comments) ) */ + + iwrk = itau; + i__1 = *lwork - iwrk + 1; + _starpu_dhseqr_("S", jobvs, n, &ilo, &ihi, &a[a_offset], lda, &wr[1], &wi[1], &vs[ + vs_offset], ldvs, &work[iwrk], &i__1, &ieval); + if (ieval > 0) { + *info = ieval; + } + +/* Sort eigenvalues if desired */ + + if (wantst && *info == 0) { + if (scalea) { + _starpu_dlascl_("G", &c__0, &c__0, &cscale, &anrm, n, &c__1, &wr[1], n, & + ierr); + _starpu_dlascl_("G", &c__0, &c__0, &cscale, &anrm, n, &c__1, &wi[1], n, & + ierr); + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + bwork[i__] = (*select)(&wr[i__], &wi[i__]); +/* L10: */ + } + +/* Reorder eigenvalues, transform Schur vectors, and compute */ +/* reciprocal condition numbers */ +/* (RWorkspace: if SENSE is not 'N', need N+2*SDIM*(N-SDIM) */ +/* otherwise, need N ) */ +/* (IWorkspace: if SENSE is 'V' or 'B', need SDIM*(N-SDIM) */ +/* otherwise, need 0 ) */ + + i__1 = *lwork - iwrk + 1; + _starpu_dtrsen_(sense, jobvs, &bwork[1], n, &a[a_offset], lda, &vs[vs_offset], + ldvs, &wr[1], &wi[1], sdim, rconde, rcondv, &work[iwrk], & + i__1, &iwork[1], liwork, &icond); + if (! wantsn) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n + (*sdim << 1) * (*n - *sdim); + maxwrk = max(i__1,i__2); + } + if (icond == -15) { + +/* Not enough real workspace */ + + *info = -16; + } else if (icond == -17) { + +/* Not enough integer workspace */ + + *info = -18; + } else if (icond > 0) { + +/* DTRSEN failed to reorder or to restore standard Schur form */ + + *info = icond + *n; + } + } + + if (wantvs) { + +/* Undo balancing */ +/* (RWorkspace: need N) */ + + _starpu_dgebak_("P", "R", n, &ilo, &ihi, &work[ibal], n, &vs[vs_offset], ldvs, + &ierr); + } + + if (scalea) { + +/* Undo scaling for the Schur form of A */ + + _starpu_dlascl_("H", &c__0, &c__0, &cscale, &anrm, n, n, &a[a_offset], lda, & + ierr); + i__1 = *lda + 1; + _starpu_dcopy_(n, &a[a_offset], &i__1, &wr[1], &c__1); + if ((wantsv || wantsb) && *info == 0) { + dum[0] = *rcondv; + _starpu_dlascl_("G", &c__0, &c__0, &cscale, &anrm, &c__1, &c__1, dum, & + c__1, &ierr); + *rcondv = dum[0]; + } + if (cscale == smlnum) { + +/* If scaling back towards underflow, adjust WI if an */ +/* offdiagonal element of a 2-by-2 block in the Schur form */ +/* underflows. */ + + if (ieval > 0) { + i1 = ieval + 1; + i2 = ihi - 1; + i__1 = ilo - 1; + _starpu_dlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wi[ + 1], n, &ierr); + } else if (wantst) { + i1 = 1; + i2 = *n - 1; + } else { + i1 = ilo; + i2 = ihi - 1; + } + inxt = i1 - 1; + i__1 = i2; + for (i__ = i1; i__ <= i__1; ++i__) { + if (i__ < inxt) { + goto L20; + } + if (wi[i__] == 0.) { + inxt = i__ + 1; + } else { + if (a[i__ + 1 + i__ * a_dim1] == 0.) { + wi[i__] = 0.; + wi[i__ + 1] = 0.; + } else if (a[i__ + 1 + i__ * a_dim1] != 0. && a[i__ + ( + i__ + 1) * a_dim1] == 0.) { + wi[i__] = 0.; + wi[i__ + 1] = 0.; + if (i__ > 1) { + i__2 = i__ - 1; + _starpu_dswap_(&i__2, &a[i__ * a_dim1 + 1], &c__1, &a[( + i__ + 1) * a_dim1 + 1], &c__1); + } + if (*n > i__ + 1) { + i__2 = *n - i__ - 1; + _starpu_dswap_(&i__2, &a[i__ + (i__ + 2) * a_dim1], lda, & + a[i__ + 1 + (i__ + 2) * a_dim1], lda); + } + _starpu_dswap_(n, &vs[i__ * vs_dim1 + 1], &c__1, &vs[(i__ + 1) + * vs_dim1 + 1], &c__1); + a[i__ + (i__ + 1) * a_dim1] = a[i__ + 1 + i__ * + a_dim1]; + a[i__ + 1 + i__ * a_dim1] = 0.; + } + inxt = i__ + 2; + } +L20: + ; + } + } + i__1 = *n - ieval; +/* Computing MAX */ + i__3 = *n - ieval; + i__2 = max(i__3,1); + _starpu_dlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wi[ieval + + 1], &i__2, &ierr); + } + + if (wantst && *info == 0) { + +/* Check if reordering successful */ + + lastsl = TRUE_; + lst2sl = TRUE_; + *sdim = 0; + ip = 0; + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + cursl = (*select)(&wr[i__], &wi[i__]); + if (wi[i__] == 0.) { + if (cursl) { + ++(*sdim); + } + ip = 0; + if (cursl && ! lastsl) { + *info = *n + 2; + } + } else { + if (ip == 1) { + +/* Last eigenvalue of conjugate pair */ + + cursl = cursl || lastsl; + lastsl = cursl; + if (cursl) { + *sdim += 2; + } + ip = -1; + if (cursl && ! lst2sl) { + *info = *n + 2; + } + } else { + +/* First eigenvalue of conjugate pair */ + + ip = 1; + } + } + lst2sl = lastsl; + lastsl = cursl; +/* L30: */ + } + } + + work[1] = (doublereal) maxwrk; + if (wantsv || wantsb) { +/* Computing MAX */ + i__1 = 1, i__2 = *sdim * (*n - *sdim); + iwork[1] = max(i__1,i__2); + } else { + iwork[1] = 1; + } + + return 0; + +/* End of DGEESX */ + +} /* _starpu_dgeesx_ */ diff --git a/min-dgels/base/SRC/dgeev.c b/min-dgels/base/SRC/dgeev.c new file mode 100644 index 0000000..caa39b3 --- /dev/null +++ b/min-dgels/base/SRC/dgeev.c @@ -0,0 +1,566 @@ +/* dgeev.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c__0 = 0; +static integer c_n1 = -1; + +/* Subroutine */ int _starpu_dgeev_(char *jobvl, char *jobvr, integer *n, doublereal * + a, integer *lda, doublereal *wr, doublereal *wi, doublereal *vl, + integer *ldvl, doublereal *vr, integer *ldvr, doublereal *work, + integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1, + i__2, i__3; + doublereal d__1, d__2; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + integer i__, k; + doublereal r__, cs, sn; + integer ihi; + doublereal scl; + integer ilo; + doublereal dum[1], eps; + integer ibal; + char side[1]; + doublereal anrm; + integer ierr, itau; + extern /* Subroutine */ int _starpu_drot_(integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, doublereal *); + integer iwrk, nout; + extern doublereal _starpu_dnrm2_(integer *, doublereal *, integer *); + extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, + integer *); + extern logical _starpu_lsame_(char *, char *); + extern doublereal _starpu_dlapy2_(doublereal *, doublereal *); + extern /* Subroutine */ int _starpu_dlabad_(doublereal *, doublereal *), _starpu_dgebak_( + char *, char *, integer *, integer *, integer *, doublereal *, + integer *, doublereal *, integer *, integer *), + _starpu_dgebal_(char *, integer *, doublereal *, integer *, integer *, + integer *, doublereal *, integer *); + logical scalea; + extern doublereal _starpu_dlamch_(char *); + doublereal cscale; + extern doublereal _starpu_dlange_(char *, integer *, integer *, doublereal *, + integer *, doublereal *); + extern /* Subroutine */ int _starpu_dgehrd_(integer *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + integer *), _starpu_dlascl_(char *, integer *, integer *, doublereal *, + doublereal *, integer *, integer *, doublereal *, integer *, + integer *); + extern integer _starpu_idamax_(integer *, doublereal *, integer *); + extern /* Subroutine */ int _starpu_dlacpy_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *), + _starpu_dlartg_(doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *), _starpu_xerbla_(char *, integer *); + logical select[1]; + extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *); + doublereal bignum; + extern /* Subroutine */ int _starpu_dorghr_(integer *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + integer *), _starpu_dhseqr_(char *, char *, integer *, integer *, integer + *, doublereal *, integer *, doublereal *, doublereal *, + doublereal *, integer *, doublereal *, integer *, integer *), _starpu_dtrevc_(char *, char *, logical *, integer *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *, integer *, integer *, doublereal *, integer *); + integer minwrk, maxwrk; + logical wantvl; + doublereal smlnum; + integer hswork; + logical lquery, wantvr; + + +/* -- LAPACK driver routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DGEEV computes for an N-by-N real nonsymmetric matrix A, the */ +/* eigenvalues and, optionally, the left and/or right eigenvectors. */ + +/* The right eigenvector v(j) of A satisfies */ +/* A * v(j) = lambda(j) * v(j) */ +/* where lambda(j) is its eigenvalue. */ +/* The left eigenvector u(j) of A satisfies */ +/* u(j)**H * A = lambda(j) * u(j)**H */ +/* where u(j)**H denotes the conjugate transpose of u(j). */ + +/* The computed eigenvectors are normalized to have Euclidean norm */ +/* equal to 1 and largest component real. */ + +/* Arguments */ +/* ========= */ + +/* JOBVL (input) CHARACTER*1 */ +/* = 'N': left eigenvectors of A are not computed; */ +/* = 'V': left eigenvectors of A are computed. */ + +/* JOBVR (input) CHARACTER*1 */ +/* = 'N': right eigenvectors of A are not computed; */ +/* = 'V': right eigenvectors of A are computed. */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the N-by-N matrix A. */ +/* On exit, A has been overwritten. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,N). */ + +/* WR (output) DOUBLE PRECISION array, dimension (N) */ +/* WI (output) DOUBLE PRECISION array, dimension (N) */ +/* WR and WI contain the real and imaginary parts, */ +/* respectively, of the computed eigenvalues. Complex */ +/* conjugate pairs of eigenvalues appear consecutively */ +/* with the eigenvalue having the positive imaginary part */ +/* first. */ + +/* VL (output) DOUBLE PRECISION array, dimension (LDVL,N) */ +/* If JOBVL = 'V', the left eigenvectors u(j) are stored one */ +/* after another in the columns of VL, in the same order */ +/* as their eigenvalues. */ +/* If JOBVL = 'N', VL is not referenced. */ +/* If the j-th eigenvalue is real, then u(j) = VL(:,j), */ +/* the j-th column of VL. */ +/* If the j-th and (j+1)-st eigenvalues form a complex */ +/* conjugate pair, then u(j) = VL(:,j) + i*VL(:,j+1) and */ +/* u(j+1) = VL(:,j) - i*VL(:,j+1). */ + +/* LDVL (input) INTEGER */ +/* The leading dimension of the array VL. LDVL >= 1; if */ +/* JOBVL = 'V', LDVL >= N. */ + +/* VR (output) DOUBLE PRECISION array, dimension (LDVR,N) */ +/* If JOBVR = 'V', the right eigenvectors v(j) are stored one */ +/* after another in the columns of VR, in the same order */ +/* as their eigenvalues. */ +/* If JOBVR = 'N', VR is not referenced. */ +/* If the j-th eigenvalue is real, then v(j) = VR(:,j), */ +/* the j-th column of VR. */ +/* If the j-th and (j+1)-st eigenvalues form a complex */ +/* conjugate pair, then v(j) = VR(:,j) + i*VR(:,j+1) and */ +/* v(j+1) = VR(:,j) - i*VR(:,j+1). */ + +/* LDVR (input) INTEGER */ +/* The leading dimension of the array VR. LDVR >= 1; if */ +/* JOBVR = 'V', LDVR >= N. */ + +/* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ + +/* LWORK (input) INTEGER */ +/* The dimension of the array WORK. LWORK >= max(1,3*N), and */ +/* if JOBVL = 'V' or JOBVR = 'V', LWORK >= 4*N. For good */ +/* performance, LWORK must generally be larger. */ + +/* If LWORK = -1, then a workspace query is assumed; the routine */ +/* only calculates the optimal size of the WORK array, returns */ +/* this value as the first entry of the WORK array, and no error */ +/* message related to LWORK is issued by XERBLA. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > 0: if INFO = i, the QR algorithm failed to compute all the */ +/* eigenvalues, and no eigenvectors have been computed; */ +/* elements i+1:N of WR and WI contain eigenvalues which */ +/* have converged. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --wr; + --wi; + vl_dim1 = *ldvl; + vl_offset = 1 + vl_dim1; + vl -= vl_offset; + vr_dim1 = *ldvr; + vr_offset = 1 + vr_dim1; + vr -= vr_offset; + --work; + + /* Function Body */ + *info = 0; + lquery = *lwork == -1; + wantvl = _starpu_lsame_(jobvl, "V"); + wantvr = _starpu_lsame_(jobvr, "V"); + if (! wantvl && ! _starpu_lsame_(jobvl, "N")) { + *info = -1; + } else if (! wantvr && ! _starpu_lsame_(jobvr, "N")) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } else if (*ldvl < 1 || wantvl && *ldvl < *n) { + *info = -9; + } else if (*ldvr < 1 || wantvr && *ldvr < *n) { + *info = -11; + } + +/* Compute workspace */ +/* (Note: Comments in the code beginning "Workspace:" describe the */ +/* minimal amount of workspace needed at that point in the code, */ +/* as well as the preferred amount for good performance. */ +/* NB refers to the optimal block size for the immediately */ +/* following subroutine, as returned by ILAENV. */ +/* HSWORK refers to the workspace preferred by DHSEQR, as */ +/* calculated below. HSWORK is computed assuming ILO=1 and IHI=N, */ +/* the worst case.) */ + + if (*info == 0) { + if (*n == 0) { + minwrk = 1; + maxwrk = 1; + } else { + maxwrk = (*n << 1) + *n * _starpu_ilaenv_(&c__1, "DGEHRD", " ", n, &c__1, + n, &c__0); + if (wantvl) { + minwrk = *n << 2; +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*n << 1) + (*n - 1) * _starpu_ilaenv_(&c__1, + "DORGHR", " ", n, &c__1, n, &c_n1); + maxwrk = max(i__1,i__2); + _starpu_dhseqr_("S", "V", n, &c__1, n, &a[a_offset], lda, &wr[1], &wi[ + 1], &vl[vl_offset], ldvl, &work[1], &c_n1, info); + hswork = (integer) work[1]; +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n + 1, i__1 = max(i__1,i__2), i__2 = * + n + hswork; + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n << 2; + maxwrk = max(i__1,i__2); + } else if (wantvr) { + minwrk = *n << 2; +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*n << 1) + (*n - 1) * _starpu_ilaenv_(&c__1, + "DORGHR", " ", n, &c__1, n, &c_n1); + maxwrk = max(i__1,i__2); + _starpu_dhseqr_("S", "V", n, &c__1, n, &a[a_offset], lda, &wr[1], &wi[ + 1], &vr[vr_offset], ldvr, &work[1], &c_n1, info); + hswork = (integer) work[1]; +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n + 1, i__1 = max(i__1,i__2), i__2 = * + n + hswork; + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n << 2; + maxwrk = max(i__1,i__2); + } else { + minwrk = *n * 3; + _starpu_dhseqr_("E", "N", n, &c__1, n, &a[a_offset], lda, &wr[1], &wi[ + 1], &vr[vr_offset], ldvr, &work[1], &c_n1, info); + hswork = (integer) work[1]; +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n + 1, i__1 = max(i__1,i__2), i__2 = * + n + hswork; + maxwrk = max(i__1,i__2); + } + maxwrk = max(maxwrk,minwrk); + } + work[1] = (doublereal) maxwrk; + + if (*lwork < minwrk && ! lquery) { + *info = -13; + } + } + + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DGEEV ", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Get machine constants */ + + eps = _starpu_dlamch_("P"); + smlnum = _starpu_dlamch_("S"); + bignum = 1. / smlnum; + _starpu_dlabad_(&smlnum, &bignum); + smlnum = sqrt(smlnum) / eps; + bignum = 1. / smlnum; + +/* Scale A if max element outside range [SMLNUM,BIGNUM] */ + + anrm = _starpu_dlange_("M", n, n, &a[a_offset], lda, dum); + scalea = FALSE_; + if (anrm > 0. && anrm < smlnum) { + scalea = TRUE_; + cscale = smlnum; + } else if (anrm > bignum) { + scalea = TRUE_; + cscale = bignum; + } + if (scalea) { + _starpu_dlascl_("G", &c__0, &c__0, &anrm, &cscale, n, n, &a[a_offset], lda, & + ierr); + } + +/* Balance the matrix */ +/* (Workspace: need N) */ + + ibal = 1; + _starpu_dgebal_("B", n, &a[a_offset], lda, &ilo, &ihi, &work[ibal], &ierr); + +/* Reduce to upper Hessenberg form */ +/* (Workspace: need 3*N, prefer 2*N+N*NB) */ + + itau = ibal + *n; + iwrk = itau + *n; + i__1 = *lwork - iwrk + 1; + _starpu_dgehrd_(n, &ilo, &ihi, &a[a_offset], lda, &work[itau], &work[iwrk], &i__1, + &ierr); + + if (wantvl) { + +/* Want left eigenvectors */ +/* Copy Householder vectors to VL */ + + *(unsigned char *)side = 'L'; + _starpu_dlacpy_("L", n, n, &a[a_offset], lda, &vl[vl_offset], ldvl) + ; + +/* Generate orthogonal matrix in VL */ +/* (Workspace: need 3*N-1, prefer 2*N+(N-1)*NB) */ + + i__1 = *lwork - iwrk + 1; + _starpu_dorghr_(n, &ilo, &ihi, &vl[vl_offset], ldvl, &work[itau], &work[iwrk], + &i__1, &ierr); + +/* Perform QR iteration, accumulating Schur vectors in VL */ +/* (Workspace: need N+1, prefer N+HSWORK (see comments) ) */ + + iwrk = itau; + i__1 = *lwork - iwrk + 1; + _starpu_dhseqr_("S", "V", n, &ilo, &ihi, &a[a_offset], lda, &wr[1], &wi[1], & + vl[vl_offset], ldvl, &work[iwrk], &i__1, info); + + if (wantvr) { + +/* Want left and right eigenvectors */ +/* Copy Schur vectors to VR */ + + *(unsigned char *)side = 'B'; + _starpu_dlacpy_("F", n, n, &vl[vl_offset], ldvl, &vr[vr_offset], ldvr); + } + + } else if (wantvr) { + +/* Want right eigenvectors */ +/* Copy Householder vectors to VR */ + + *(unsigned char *)side = 'R'; + _starpu_dlacpy_("L", n, n, &a[a_offset], lda, &vr[vr_offset], ldvr) + ; + +/* Generate orthogonal matrix in VR */ +/* (Workspace: need 3*N-1, prefer 2*N+(N-1)*NB) */ + + i__1 = *lwork - iwrk + 1; + _starpu_dorghr_(n, &ilo, &ihi, &vr[vr_offset], ldvr, &work[itau], &work[iwrk], + &i__1, &ierr); + +/* Perform QR iteration, accumulating Schur vectors in VR */ +/* (Workspace: need N+1, prefer N+HSWORK (see comments) ) */ + + iwrk = itau; + i__1 = *lwork - iwrk + 1; + _starpu_dhseqr_("S", "V", n, &ilo, &ihi, &a[a_offset], lda, &wr[1], &wi[1], & + vr[vr_offset], ldvr, &work[iwrk], &i__1, info); + + } else { + +/* Compute eigenvalues only */ +/* (Workspace: need N+1, prefer N+HSWORK (see comments) ) */ + + iwrk = itau; + i__1 = *lwork - iwrk + 1; + _starpu_dhseqr_("E", "N", n, &ilo, &ihi, &a[a_offset], lda, &wr[1], &wi[1], & + vr[vr_offset], ldvr, &work[iwrk], &i__1, info); + } + +/* If INFO > 0 from DHSEQR, then quit */ + + if (*info > 0) { + goto L50; + } + + if (wantvl || wantvr) { + +/* Compute left and/or right eigenvectors */ +/* (Workspace: need 4*N) */ + + _starpu_dtrevc_(side, "B", select, n, &a[a_offset], lda, &vl[vl_offset], ldvl, + &vr[vr_offset], ldvr, n, &nout, &work[iwrk], &ierr); + } + + if (wantvl) { + +/* Undo balancing of left eigenvectors */ +/* (Workspace: need N) */ + + _starpu_dgebak_("B", "L", n, &ilo, &ihi, &work[ibal], n, &vl[vl_offset], ldvl, + &ierr); + +/* Normalize left eigenvectors and make largest component real */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + if (wi[i__] == 0.) { + scl = 1. / _starpu_dnrm2_(n, &vl[i__ * vl_dim1 + 1], &c__1); + _starpu_dscal_(n, &scl, &vl[i__ * vl_dim1 + 1], &c__1); + } else if (wi[i__] > 0.) { + d__1 = _starpu_dnrm2_(n, &vl[i__ * vl_dim1 + 1], &c__1); + d__2 = _starpu_dnrm2_(n, &vl[(i__ + 1) * vl_dim1 + 1], &c__1); + scl = 1. / _starpu_dlapy2_(&d__1, &d__2); + _starpu_dscal_(n, &scl, &vl[i__ * vl_dim1 + 1], &c__1); + _starpu_dscal_(n, &scl, &vl[(i__ + 1) * vl_dim1 + 1], &c__1); + i__2 = *n; + for (k = 1; k <= i__2; ++k) { +/* Computing 2nd power */ + d__1 = vl[k + i__ * vl_dim1]; +/* Computing 2nd power */ + d__2 = vl[k + (i__ + 1) * vl_dim1]; + work[iwrk + k - 1] = d__1 * d__1 + d__2 * d__2; +/* L10: */ + } + k = _starpu_idamax_(n, &work[iwrk], &c__1); + _starpu_dlartg_(&vl[k + i__ * vl_dim1], &vl[k + (i__ + 1) * vl_dim1], + &cs, &sn, &r__); + _starpu_drot_(n, &vl[i__ * vl_dim1 + 1], &c__1, &vl[(i__ + 1) * + vl_dim1 + 1], &c__1, &cs, &sn); + vl[k + (i__ + 1) * vl_dim1] = 0.; + } +/* L20: */ + } + } + + if (wantvr) { + +/* Undo balancing of right eigenvectors */ +/* (Workspace: need N) */ + + _starpu_dgebak_("B", "R", n, &ilo, &ihi, &work[ibal], n, &vr[vr_offset], ldvr, + &ierr); + +/* Normalize right eigenvectors and make largest component real */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + if (wi[i__] == 0.) { + scl = 1. / _starpu_dnrm2_(n, &vr[i__ * vr_dim1 + 1], &c__1); + _starpu_dscal_(n, &scl, &vr[i__ * vr_dim1 + 1], &c__1); + } else if (wi[i__] > 0.) { + d__1 = _starpu_dnrm2_(n, &vr[i__ * vr_dim1 + 1], &c__1); + d__2 = _starpu_dnrm2_(n, &vr[(i__ + 1) * vr_dim1 + 1], &c__1); + scl = 1. / _starpu_dlapy2_(&d__1, &d__2); + _starpu_dscal_(n, &scl, &vr[i__ * vr_dim1 + 1], &c__1); + _starpu_dscal_(n, &scl, &vr[(i__ + 1) * vr_dim1 + 1], &c__1); + i__2 = *n; + for (k = 1; k <= i__2; ++k) { +/* Computing 2nd power */ + d__1 = vr[k + i__ * vr_dim1]; +/* Computing 2nd power */ + d__2 = vr[k + (i__ + 1) * vr_dim1]; + work[iwrk + k - 1] = d__1 * d__1 + d__2 * d__2; +/* L30: */ + } + k = _starpu_idamax_(n, &work[iwrk], &c__1); + _starpu_dlartg_(&vr[k + i__ * vr_dim1], &vr[k + (i__ + 1) * vr_dim1], + &cs, &sn, &r__); + _starpu_drot_(n, &vr[i__ * vr_dim1 + 1], &c__1, &vr[(i__ + 1) * + vr_dim1 + 1], &c__1, &cs, &sn); + vr[k + (i__ + 1) * vr_dim1] = 0.; + } +/* L40: */ + } + } + +/* Undo scaling if necessary */ + +L50: + if (scalea) { + i__1 = *n - *info; +/* Computing MAX */ + i__3 = *n - *info; + i__2 = max(i__3,1); + _starpu_dlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wr[*info + + 1], &i__2, &ierr); + i__1 = *n - *info; +/* Computing MAX */ + i__3 = *n - *info; + i__2 = max(i__3,1); + _starpu_dlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wi[*info + + 1], &i__2, &ierr); + if (*info > 0) { + i__1 = ilo - 1; + _starpu_dlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wr[1], + n, &ierr); + i__1 = ilo - 1; + _starpu_dlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wi[1], + n, &ierr); + } + } + + work[1] = (doublereal) maxwrk; + return 0; + +/* End of DGEEV */ + +} /* _starpu_dgeev_ */ diff --git a/min-dgels/base/SRC/dgeevx.c b/min-dgels/base/SRC/dgeevx.c new file mode 100644 index 0000000..f2ea76c --- /dev/null +++ b/min-dgels/base/SRC/dgeevx.c @@ -0,0 +1,703 @@ +/* dgeevx.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c__0 = 0; +static integer c_n1 = -1; + +/* Subroutine */ int _starpu_dgeevx_(char *balanc, char *jobvl, char *jobvr, char * + sense, integer *n, doublereal *a, integer *lda, doublereal *wr, + doublereal *wi, doublereal *vl, integer *ldvl, doublereal *vr, + integer *ldvr, integer *ilo, integer *ihi, doublereal *scale, + doublereal *abnrm, doublereal *rconde, doublereal *rcondv, doublereal + *work, integer *lwork, integer *iwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1, + i__2, i__3; + doublereal d__1, d__2; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + integer i__, k; + doublereal r__, cs, sn; + char job[1]; + doublereal scl, dum[1], eps; + char side[1]; + doublereal anrm; + integer ierr, itau; + extern /* Subroutine */ int _starpu_drot_(integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, doublereal *); + integer iwrk, nout; + extern doublereal _starpu_dnrm2_(integer *, doublereal *, integer *); + extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, + integer *); + integer icond; + extern logical _starpu_lsame_(char *, char *); + extern doublereal _starpu_dlapy2_(doublereal *, doublereal *); + extern /* Subroutine */ int _starpu_dlabad_(doublereal *, doublereal *), _starpu_dgebak_( + char *, char *, integer *, integer *, integer *, doublereal *, + integer *, doublereal *, integer *, integer *), + _starpu_dgebal_(char *, integer *, doublereal *, integer *, integer *, + integer *, doublereal *, integer *); + logical scalea; + extern doublereal _starpu_dlamch_(char *); + doublereal cscale; + extern doublereal _starpu_dlange_(char *, integer *, integer *, doublereal *, + integer *, doublereal *); + extern /* Subroutine */ int _starpu_dgehrd_(integer *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + integer *), _starpu_dlascl_(char *, integer *, integer *, doublereal *, + doublereal *, integer *, integer *, doublereal *, integer *, + integer *); + extern integer _starpu_idamax_(integer *, doublereal *, integer *); + extern /* Subroutine */ int _starpu_dlacpy_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *), + _starpu_dlartg_(doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *), _starpu_xerbla_(char *, integer *); + logical select[1]; + extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *); + doublereal bignum; + extern /* Subroutine */ int _starpu_dorghr_(integer *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + integer *), _starpu_dhseqr_(char *, char *, integer *, integer *, integer + *, doublereal *, integer *, doublereal *, doublereal *, + doublereal *, integer *, doublereal *, integer *, integer *), _starpu_dtrevc_(char *, char *, logical *, integer *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *, integer *, integer *, doublereal *, integer *), _starpu_dtrsna_(char *, char *, logical *, integer *, doublereal + *, integer *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *, integer *); + integer minwrk, maxwrk; + logical wantvl, wntsnb; + integer hswork; + logical wntsne; + doublereal smlnum; + logical lquery, wantvr, wntsnn, wntsnv; + + +/* -- LAPACK driver routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DGEEVX computes for an N-by-N real nonsymmetric matrix A, the */ +/* eigenvalues and, optionally, the left and/or right eigenvectors. */ + +/* Optionally also, it computes a balancing transformation to improve */ +/* the conditioning of the eigenvalues and eigenvectors (ILO, IHI, */ +/* SCALE, and ABNRM), reciprocal condition numbers for the eigenvalues */ +/* (RCONDE), and reciprocal condition numbers for the right */ +/* eigenvectors (RCONDV). */ + +/* The right eigenvector v(j) of A satisfies */ +/* A * v(j) = lambda(j) * v(j) */ +/* where lambda(j) is its eigenvalue. */ +/* The left eigenvector u(j) of A satisfies */ +/* u(j)**H * A = lambda(j) * u(j)**H */ +/* where u(j)**H denotes the conjugate transpose of u(j). */ + +/* The computed eigenvectors are normalized to have Euclidean norm */ +/* equal to 1 and largest component real. */ + +/* Balancing a matrix means permuting the rows and columns to make it */ +/* more nearly upper triangular, and applying a diagonal similarity */ +/* transformation D * A * D**(-1), where D is a diagonal matrix, to */ +/* make its rows and columns closer in norm and the condition numbers */ +/* of its eigenvalues and eigenvectors smaller. The computed */ +/* reciprocal condition numbers correspond to the balanced matrix. */ +/* Permuting rows and columns will not change the condition numbers */ +/* (in exact arithmetic) but diagonal scaling will. For further */ +/* explanation of balancing, see section 4.10.2 of the LAPACK */ +/* Users' Guide. */ + +/* Arguments */ +/* ========= */ + +/* BALANC (input) CHARACTER*1 */ +/* Indicates how the input matrix should be diagonally scaled */ +/* and/or permuted to improve the conditioning of its */ +/* eigenvalues. */ +/* = 'N': Do not diagonally scale or permute; */ +/* = 'P': Perform permutations to make the matrix more nearly */ +/* upper triangular. Do not diagonally scale; */ +/* = 'S': Diagonally scale the matrix, i.e. replace A by */ +/* D*A*D**(-1), where D is a diagonal matrix chosen */ +/* to make the rows and columns of A more equal in */ +/* norm. Do not permute; */ +/* = 'B': Both diagonally scale and permute A. */ + +/* Computed reciprocal condition numbers will be for the matrix */ +/* after balancing and/or permuting. Permuting does not change */ +/* condition numbers (in exact arithmetic), but balancing does. */ + +/* JOBVL (input) CHARACTER*1 */ +/* = 'N': left eigenvectors of A are not computed; */ +/* = 'V': left eigenvectors of A are computed. */ +/* If SENSE = 'E' or 'B', JOBVL must = 'V'. */ + +/* JOBVR (input) CHARACTER*1 */ +/* = 'N': right eigenvectors of A are not computed; */ +/* = 'V': right eigenvectors of A are computed. */ +/* If SENSE = 'E' or 'B', JOBVR must = 'V'. */ + +/* SENSE (input) CHARACTER*1 */ +/* Determines which reciprocal condition numbers are computed. */ +/* = 'N': None are computed; */ +/* = 'E': Computed for eigenvalues only; */ +/* = 'V': Computed for right eigenvectors only; */ +/* = 'B': Computed for eigenvalues and right eigenvectors. */ + +/* If SENSE = 'E' or 'B', both left and right eigenvectors */ +/* must also be computed (JOBVL = 'V' and JOBVR = 'V'). */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the N-by-N matrix A. */ +/* On exit, A has been overwritten. If JOBVL = 'V' or */ +/* JOBVR = 'V', A contains the real Schur form of the balanced */ +/* version of the input matrix A. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,N). */ + +/* WR (output) DOUBLE PRECISION array, dimension (N) */ +/* WI (output) DOUBLE PRECISION array, dimension (N) */ +/* WR and WI contain the real and imaginary parts, */ +/* respectively, of the computed eigenvalues. Complex */ +/* conjugate pairs of eigenvalues will appear consecutively */ +/* with the eigenvalue having the positive imaginary part */ +/* first. */ + +/* VL (output) DOUBLE PRECISION array, dimension (LDVL,N) */ +/* If JOBVL = 'V', the left eigenvectors u(j) are stored one */ +/* after another in the columns of VL, in the same order */ +/* as their eigenvalues. */ +/* If JOBVL = 'N', VL is not referenced. */ +/* If the j-th eigenvalue is real, then u(j) = VL(:,j), */ +/* the j-th column of VL. */ +/* If the j-th and (j+1)-st eigenvalues form a complex */ +/* conjugate pair, then u(j) = VL(:,j) + i*VL(:,j+1) and */ +/* u(j+1) = VL(:,j) - i*VL(:,j+1). */ + +/* LDVL (input) INTEGER */ +/* The leading dimension of the array VL. LDVL >= 1; if */ +/* JOBVL = 'V', LDVL >= N. */ + +/* VR (output) DOUBLE PRECISION array, dimension (LDVR,N) */ +/* If JOBVR = 'V', the right eigenvectors v(j) are stored one */ +/* after another in the columns of VR, in the same order */ +/* as their eigenvalues. */ +/* If JOBVR = 'N', VR is not referenced. */ +/* If the j-th eigenvalue is real, then v(j) = VR(:,j), */ +/* the j-th column of VR. */ +/* If the j-th and (j+1)-st eigenvalues form a complex */ +/* conjugate pair, then v(j) = VR(:,j) + i*VR(:,j+1) and */ +/* v(j+1) = VR(:,j) - i*VR(:,j+1). */ + +/* LDVR (input) INTEGER */ +/* The leading dimension of the array VR. LDVR >= 1, and if */ +/* JOBVR = 'V', LDVR >= N. */ + +/* ILO (output) INTEGER */ +/* IHI (output) INTEGER */ +/* ILO and IHI are integer values determined when A was */ +/* balanced. The balanced A(i,j) = 0 if I > J and */ +/* J = 1,...,ILO-1 or I = IHI+1,...,N. */ + +/* SCALE (output) DOUBLE PRECISION array, dimension (N) */ +/* Details of the permutations and scaling factors applied */ +/* when balancing A. If P(j) is the index of the row and column */ +/* interchanged with row and column j, and D(j) is the scaling */ +/* factor applied to row and column j, then */ +/* SCALE(J) = P(J), for J = 1,...,ILO-1 */ +/* = D(J), for J = ILO,...,IHI */ +/* = P(J) for J = IHI+1,...,N. */ +/* The order in which the interchanges are made is N to IHI+1, */ +/* then 1 to ILO-1. */ + +/* ABNRM (output) DOUBLE PRECISION */ +/* The one-norm of the balanced matrix (the maximum */ +/* of the sum of absolute values of elements of any column). */ + +/* RCONDE (output) DOUBLE PRECISION array, dimension (N) */ +/* RCONDE(j) is the reciprocal condition number of the j-th */ +/* eigenvalue. */ + +/* RCONDV (output) DOUBLE PRECISION array, dimension (N) */ +/* RCONDV(j) is the reciprocal condition number of the j-th */ +/* right eigenvector. */ + +/* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ + +/* LWORK (input) INTEGER */ +/* The dimension of the array WORK. If SENSE = 'N' or 'E', */ +/* LWORK >= max(1,2*N), and if JOBVL = 'V' or JOBVR = 'V', */ +/* LWORK >= 3*N. If SENSE = 'V' or 'B', LWORK >= N*(N+6). */ +/* For good performance, LWORK must generally be larger. */ + +/* If LWORK = -1, then a workspace query is assumed; the routine */ +/* only calculates the optimal size of the WORK array, returns */ +/* this value as the first entry of the WORK array, and no error */ +/* message related to LWORK is issued by XERBLA. */ + +/* IWORK (workspace) INTEGER array, dimension (2*N-2) */ +/* If SENSE = 'N' or 'E', not referenced. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > 0: if INFO = i, the QR algorithm failed to compute all the */ +/* eigenvalues, and no eigenvectors or condition numbers */ +/* have been computed; elements 1:ILO-1 and i+1:N of WR */ +/* and WI contain eigenvalues which have converged. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --wr; + --wi; + vl_dim1 = *ldvl; + vl_offset = 1 + vl_dim1; + vl -= vl_offset; + vr_dim1 = *ldvr; + vr_offset = 1 + vr_dim1; + vr -= vr_offset; + --scale; + --rconde; + --rcondv; + --work; + --iwork; + + /* Function Body */ + *info = 0; + lquery = *lwork == -1; + wantvl = _starpu_lsame_(jobvl, "V"); + wantvr = _starpu_lsame_(jobvr, "V"); + wntsnn = _starpu_lsame_(sense, "N"); + wntsne = _starpu_lsame_(sense, "E"); + wntsnv = _starpu_lsame_(sense, "V"); + wntsnb = _starpu_lsame_(sense, "B"); + if (! (_starpu_lsame_(balanc, "N") || _starpu_lsame_(balanc, "S") || _starpu_lsame_(balanc, "P") + || _starpu_lsame_(balanc, "B"))) { + *info = -1; + } else if (! wantvl && ! _starpu_lsame_(jobvl, "N")) { + *info = -2; + } else if (! wantvr && ! _starpu_lsame_(jobvr, "N")) { + *info = -3; + } else if (! (wntsnn || wntsne || wntsnb || wntsnv) || (wntsne || wntsnb) + && ! (wantvl && wantvr)) { + *info = -4; + } else if (*n < 0) { + *info = -5; + } else if (*lda < max(1,*n)) { + *info = -7; + } else if (*ldvl < 1 || wantvl && *ldvl < *n) { + *info = -11; + } else if (*ldvr < 1 || wantvr && *ldvr < *n) { + *info = -13; + } + +/* Compute workspace */ +/* (Note: Comments in the code beginning "Workspace:" describe the */ +/* minimal amount of workspace needed at that point in the code, */ +/* as well as the preferred amount for good performance. */ +/* NB refers to the optimal block size for the immediately */ +/* following subroutine, as returned by ILAENV. */ +/* HSWORK refers to the workspace preferred by DHSEQR, as */ +/* calculated below. HSWORK is computed assuming ILO=1 and IHI=N, */ +/* the worst case.) */ + + if (*info == 0) { + if (*n == 0) { + minwrk = 1; + maxwrk = 1; + } else { + maxwrk = *n + *n * _starpu_ilaenv_(&c__1, "DGEHRD", " ", n, &c__1, n, & + c__0); + + if (wantvl) { + _starpu_dhseqr_("S", "V", n, &c__1, n, &a[a_offset], lda, &wr[1], &wi[ + 1], &vl[vl_offset], ldvl, &work[1], &c_n1, info); + } else if (wantvr) { + _starpu_dhseqr_("S", "V", n, &c__1, n, &a[a_offset], lda, &wr[1], &wi[ + 1], &vr[vr_offset], ldvr, &work[1], &c_n1, info); + } else { + if (wntsnn) { + _starpu_dhseqr_("E", "N", n, &c__1, n, &a[a_offset], lda, &wr[1], + &wi[1], &vr[vr_offset], ldvr, &work[1], &c_n1, + info); + } else { + _starpu_dhseqr_("S", "N", n, &c__1, n, &a[a_offset], lda, &wr[1], + &wi[1], &vr[vr_offset], ldvr, &work[1], &c_n1, + info); + } + } + hswork = (integer) work[1]; + + if (! wantvl && ! wantvr) { + minwrk = *n << 1; + if (! wntsnn) { +/* Computing MAX */ + i__1 = minwrk, i__2 = *n * *n + *n * 6; + minwrk = max(i__1,i__2); + } + maxwrk = max(maxwrk,hswork); + if (! wntsnn) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n * *n + *n * 6; + maxwrk = max(i__1,i__2); + } + } else { + minwrk = *n * 3; + if (! wntsnn && ! wntsne) { +/* Computing MAX */ + i__1 = minwrk, i__2 = *n * *n + *n * 6; + minwrk = max(i__1,i__2); + } + maxwrk = max(maxwrk,hswork); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n + (*n - 1) * _starpu_ilaenv_(&c__1, "DORGHR", + " ", n, &c__1, n, &c_n1); + maxwrk = max(i__1,i__2); + if (! wntsnn && ! wntsne) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n * *n + *n * 6; + maxwrk = max(i__1,i__2); + } +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n * 3; + maxwrk = max(i__1,i__2); + } + maxwrk = max(maxwrk,minwrk); + } + work[1] = (doublereal) maxwrk; + + if (*lwork < minwrk && ! lquery) { + *info = -21; + } + } + + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DGEEVX", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Get machine constants */ + + eps = _starpu_dlamch_("P"); + smlnum = _starpu_dlamch_("S"); + bignum = 1. / smlnum; + _starpu_dlabad_(&smlnum, &bignum); + smlnum = sqrt(smlnum) / eps; + bignum = 1. / smlnum; + +/* Scale A if max element outside range [SMLNUM,BIGNUM] */ + + icond = 0; + anrm = _starpu_dlange_("M", n, n, &a[a_offset], lda, dum); + scalea = FALSE_; + if (anrm > 0. && anrm < smlnum) { + scalea = TRUE_; + cscale = smlnum; + } else if (anrm > bignum) { + scalea = TRUE_; + cscale = bignum; + } + if (scalea) { + _starpu_dlascl_("G", &c__0, &c__0, &anrm, &cscale, n, n, &a[a_offset], lda, & + ierr); + } + +/* Balance the matrix and compute ABNRM */ + + _starpu_dgebal_(balanc, n, &a[a_offset], lda, ilo, ihi, &scale[1], &ierr); + *abnrm = _starpu_dlange_("1", n, n, &a[a_offset], lda, dum); + if (scalea) { + dum[0] = *abnrm; + _starpu_dlascl_("G", &c__0, &c__0, &cscale, &anrm, &c__1, &c__1, dum, &c__1, & + ierr); + *abnrm = dum[0]; + } + +/* Reduce to upper Hessenberg form */ +/* (Workspace: need 2*N, prefer N+N*NB) */ + + itau = 1; + iwrk = itau + *n; + i__1 = *lwork - iwrk + 1; + _starpu_dgehrd_(n, ilo, ihi, &a[a_offset], lda, &work[itau], &work[iwrk], &i__1, & + ierr); + + if (wantvl) { + +/* Want left eigenvectors */ +/* Copy Householder vectors to VL */ + + *(unsigned char *)side = 'L'; + _starpu_dlacpy_("L", n, n, &a[a_offset], lda, &vl[vl_offset], ldvl) + ; + +/* Generate orthogonal matrix in VL */ +/* (Workspace: need 2*N-1, prefer N+(N-1)*NB) */ + + i__1 = *lwork - iwrk + 1; + _starpu_dorghr_(n, ilo, ihi, &vl[vl_offset], ldvl, &work[itau], &work[iwrk], & + i__1, &ierr); + +/* Perform QR iteration, accumulating Schur vectors in VL */ +/* (Workspace: need 1, prefer HSWORK (see comments) ) */ + + iwrk = itau; + i__1 = *lwork - iwrk + 1; + _starpu_dhseqr_("S", "V", n, ilo, ihi, &a[a_offset], lda, &wr[1], &wi[1], &vl[ + vl_offset], ldvl, &work[iwrk], &i__1, info); + + if (wantvr) { + +/* Want left and right eigenvectors */ +/* Copy Schur vectors to VR */ + + *(unsigned char *)side = 'B'; + _starpu_dlacpy_("F", n, n, &vl[vl_offset], ldvl, &vr[vr_offset], ldvr); + } + + } else if (wantvr) { + +/* Want right eigenvectors */ +/* Copy Householder vectors to VR */ + + *(unsigned char *)side = 'R'; + _starpu_dlacpy_("L", n, n, &a[a_offset], lda, &vr[vr_offset], ldvr) + ; + +/* Generate orthogonal matrix in VR */ +/* (Workspace: need 2*N-1, prefer N+(N-1)*NB) */ + + i__1 = *lwork - iwrk + 1; + _starpu_dorghr_(n, ilo, ihi, &vr[vr_offset], ldvr, &work[itau], &work[iwrk], & + i__1, &ierr); + +/* Perform QR iteration, accumulating Schur vectors in VR */ +/* (Workspace: need 1, prefer HSWORK (see comments) ) */ + + iwrk = itau; + i__1 = *lwork - iwrk + 1; + _starpu_dhseqr_("S", "V", n, ilo, ihi, &a[a_offset], lda, &wr[1], &wi[1], &vr[ + vr_offset], ldvr, &work[iwrk], &i__1, info); + + } else { + +/* Compute eigenvalues only */ +/* If condition numbers desired, compute Schur form */ + + if (wntsnn) { + *(unsigned char *)job = 'E'; + } else { + *(unsigned char *)job = 'S'; + } + +/* (Workspace: need 1, prefer HSWORK (see comments) ) */ + + iwrk = itau; + i__1 = *lwork - iwrk + 1; + _starpu_dhseqr_(job, "N", n, ilo, ihi, &a[a_offset], lda, &wr[1], &wi[1], &vr[ + vr_offset], ldvr, &work[iwrk], &i__1, info); + } + +/* If INFO > 0 from DHSEQR, then quit */ + + if (*info > 0) { + goto L50; + } + + if (wantvl || wantvr) { + +/* Compute left and/or right eigenvectors */ +/* (Workspace: need 3*N) */ + + _starpu_dtrevc_(side, "B", select, n, &a[a_offset], lda, &vl[vl_offset], ldvl, + &vr[vr_offset], ldvr, n, &nout, &work[iwrk], &ierr); + } + +/* Compute condition numbers if desired */ +/* (Workspace: need N*N+6*N unless SENSE = 'E') */ + + if (! wntsnn) { + _starpu_dtrsna_(sense, "A", select, n, &a[a_offset], lda, &vl[vl_offset], + ldvl, &vr[vr_offset], ldvr, &rconde[1], &rcondv[1], n, &nout, + &work[iwrk], n, &iwork[1], &icond); + } + + if (wantvl) { + +/* Undo balancing of left eigenvectors */ + + _starpu_dgebak_(balanc, "L", n, ilo, ihi, &scale[1], n, &vl[vl_offset], ldvl, + &ierr); + +/* Normalize left eigenvectors and make largest component real */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + if (wi[i__] == 0.) { + scl = 1. / _starpu_dnrm2_(n, &vl[i__ * vl_dim1 + 1], &c__1); + _starpu_dscal_(n, &scl, &vl[i__ * vl_dim1 + 1], &c__1); + } else if (wi[i__] > 0.) { + d__1 = _starpu_dnrm2_(n, &vl[i__ * vl_dim1 + 1], &c__1); + d__2 = _starpu_dnrm2_(n, &vl[(i__ + 1) * vl_dim1 + 1], &c__1); + scl = 1. / _starpu_dlapy2_(&d__1, &d__2); + _starpu_dscal_(n, &scl, &vl[i__ * vl_dim1 + 1], &c__1); + _starpu_dscal_(n, &scl, &vl[(i__ + 1) * vl_dim1 + 1], &c__1); + i__2 = *n; + for (k = 1; k <= i__2; ++k) { +/* Computing 2nd power */ + d__1 = vl[k + i__ * vl_dim1]; +/* Computing 2nd power */ + d__2 = vl[k + (i__ + 1) * vl_dim1]; + work[k] = d__1 * d__1 + d__2 * d__2; +/* L10: */ + } + k = _starpu_idamax_(n, &work[1], &c__1); + _starpu_dlartg_(&vl[k + i__ * vl_dim1], &vl[k + (i__ + 1) * vl_dim1], + &cs, &sn, &r__); + _starpu_drot_(n, &vl[i__ * vl_dim1 + 1], &c__1, &vl[(i__ + 1) * + vl_dim1 + 1], &c__1, &cs, &sn); + vl[k + (i__ + 1) * vl_dim1] = 0.; + } +/* L20: */ + } + } + + if (wantvr) { + +/* Undo balancing of right eigenvectors */ + + _starpu_dgebak_(balanc, "R", n, ilo, ihi, &scale[1], n, &vr[vr_offset], ldvr, + &ierr); + +/* Normalize right eigenvectors and make largest component real */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + if (wi[i__] == 0.) { + scl = 1. / _starpu_dnrm2_(n, &vr[i__ * vr_dim1 + 1], &c__1); + _starpu_dscal_(n, &scl, &vr[i__ * vr_dim1 + 1], &c__1); + } else if (wi[i__] > 0.) { + d__1 = _starpu_dnrm2_(n, &vr[i__ * vr_dim1 + 1], &c__1); + d__2 = _starpu_dnrm2_(n, &vr[(i__ + 1) * vr_dim1 + 1], &c__1); + scl = 1. / _starpu_dlapy2_(&d__1, &d__2); + _starpu_dscal_(n, &scl, &vr[i__ * vr_dim1 + 1], &c__1); + _starpu_dscal_(n, &scl, &vr[(i__ + 1) * vr_dim1 + 1], &c__1); + i__2 = *n; + for (k = 1; k <= i__2; ++k) { +/* Computing 2nd power */ + d__1 = vr[k + i__ * vr_dim1]; +/* Computing 2nd power */ + d__2 = vr[k + (i__ + 1) * vr_dim1]; + work[k] = d__1 * d__1 + d__2 * d__2; +/* L30: */ + } + k = _starpu_idamax_(n, &work[1], &c__1); + _starpu_dlartg_(&vr[k + i__ * vr_dim1], &vr[k + (i__ + 1) * vr_dim1], + &cs, &sn, &r__); + _starpu_drot_(n, &vr[i__ * vr_dim1 + 1], &c__1, &vr[(i__ + 1) * + vr_dim1 + 1], &c__1, &cs, &sn); + vr[k + (i__ + 1) * vr_dim1] = 0.; + } +/* L40: */ + } + } + +/* Undo scaling if necessary */ + +L50: + if (scalea) { + i__1 = *n - *info; +/* Computing MAX */ + i__3 = *n - *info; + i__2 = max(i__3,1); + _starpu_dlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wr[*info + + 1], &i__2, &ierr); + i__1 = *n - *info; +/* Computing MAX */ + i__3 = *n - *info; + i__2 = max(i__3,1); + _starpu_dlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wi[*info + + 1], &i__2, &ierr); + if (*info == 0) { + if ((wntsnv || wntsnb) && icond == 0) { + _starpu_dlascl_("G", &c__0, &c__0, &cscale, &anrm, n, &c__1, &rcondv[ + 1], n, &ierr); + } + } else { + i__1 = *ilo - 1; + _starpu_dlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wr[1], + n, &ierr); + i__1 = *ilo - 1; + _starpu_dlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wi[1], + n, &ierr); + } + } + + work[1] = (doublereal) maxwrk; + return 0; + +/* End of DGEEVX */ + +} /* _starpu_dgeevx_ */ diff --git a/min-dgels/base/SRC/dgegs.c b/min-dgels/base/SRC/dgegs.c new file mode 100644 index 0000000..070125c --- /dev/null +++ b/min-dgels/base/SRC/dgegs.c @@ -0,0 +1,548 @@ +/* dgegs.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; +static doublereal c_b36 = 0.; +static doublereal c_b37 = 1.; + +/* Subroutine */ int _starpu_dgegs_(char *jobvsl, char *jobvsr, integer *n, + doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal * + alphar, doublereal *alphai, doublereal *beta, doublereal *vsl, + integer *ldvsl, doublereal *vsr, integer *ldvsr, doublereal *work, + integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, vsl_dim1, vsl_offset, + vsr_dim1, vsr_offset, i__1, i__2; + + /* Local variables */ + integer nb, nb1, nb2, nb3, ihi, ilo; + doublereal eps, anrm, bnrm; + integer itau, lopt; + extern logical _starpu_lsame_(char *, char *); + integer ileft, iinfo, icols; + logical ilvsl; + integer iwork; + logical ilvsr; + integer irows; + extern /* Subroutine */ int _starpu_dggbak_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, integer *), _starpu_dggbal_(char *, integer *, + doublereal *, integer *, doublereal *, integer *, integer *, + integer *, doublereal *, doublereal *, doublereal *, integer *); + extern doublereal _starpu_dlamch_(char *), _starpu_dlange_(char *, integer *, + integer *, doublereal *, integer *, doublereal *); + extern /* Subroutine */ int _starpu_dgghrd_(char *, char *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, integer *, integer *), _starpu_dlascl_(char *, integer *, integer *, doublereal + *, doublereal *, integer *, integer *, doublereal *, integer *, + integer *); + logical ilascl, ilbscl; + extern /* Subroutine */ int _starpu_dgeqrf_(integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, integer *), + _starpu_dlacpy_(char *, integer *, integer *, doublereal *, integer *, + doublereal *, integer *); + doublereal safmin; + extern /* Subroutine */ int _starpu_dlaset_(char *, integer *, integer *, + doublereal *, doublereal *, doublereal *, integer *), + _starpu_xerbla_(char *, integer *); + extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *); + doublereal bignum; + extern /* Subroutine */ int _starpu_dhgeqz_(char *, char *, char *, integer *, + integer *, integer *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, doublereal *, doublereal *, + integer *, doublereal *, integer *, doublereal *, integer *, + integer *); + integer ijobvl, iright, ijobvr; + extern /* Subroutine */ int _starpu_dorgqr_(integer *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + integer *); + doublereal anrmto; + integer lwkmin; + doublereal bnrmto; + extern /* Subroutine */ int _starpu_dormqr_(char *, char *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *); + doublereal smlnum; + integer lwkopt; + logical lquery; + + +/* -- LAPACK driver routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* This routine is deprecated and has been replaced by routine DGGES. */ + +/* DGEGS computes the eigenvalues, real Schur form, and, optionally, */ +/* left and or/right Schur vectors of a real matrix pair (A,B). */ +/* Given two square matrices A and B, the generalized real Schur */ +/* factorization has the form */ + +/* A = Q*S*Z**T, B = Q*T*Z**T */ + +/* where Q and Z are orthogonal matrices, T is upper triangular, and S */ +/* is an upper quasi-triangular matrix with 1-by-1 and 2-by-2 diagonal */ +/* blocks, the 2-by-2 blocks corresponding to complex conjugate pairs */ +/* of eigenvalues of (A,B). The columns of Q are the left Schur vectors */ +/* and the columns of Z are the right Schur vectors. */ + +/* If only the eigenvalues of (A,B) are needed, the driver routine */ +/* DGEGV should be used instead. See DGEGV for a description of the */ +/* eigenvalues of the generalized nonsymmetric eigenvalue problem */ +/* (GNEP). */ + +/* Arguments */ +/* ========= */ + +/* JOBVSL (input) CHARACTER*1 */ +/* = 'N': do not compute the left Schur vectors; */ +/* = 'V': compute the left Schur vectors (returned in VSL). */ + +/* JOBVSR (input) CHARACTER*1 */ +/* = 'N': do not compute the right Schur vectors; */ +/* = 'V': compute the right Schur vectors (returned in VSR). */ + +/* N (input) INTEGER */ +/* The order of the matrices A, B, VSL, and VSR. N >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA, N) */ +/* On entry, the matrix A. */ +/* On exit, the upper quasi-triangular matrix S from the */ +/* generalized real Schur factorization. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of A. LDA >= max(1,N). */ + +/* B (input/output) DOUBLE PRECISION array, dimension (LDB, N) */ +/* On entry, the matrix B. */ +/* On exit, the upper triangular matrix T from the generalized */ +/* real Schur factorization. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of B. LDB >= max(1,N). */ + +/* ALPHAR (output) DOUBLE PRECISION array, dimension (N) */ +/* The real parts of each scalar alpha defining an eigenvalue */ +/* of GNEP. */ + +/* ALPHAI (output) DOUBLE PRECISION array, dimension (N) */ +/* The imaginary parts of each scalar alpha defining an */ +/* eigenvalue of GNEP. If ALPHAI(j) is zero, then the j-th */ +/* eigenvalue is real; if positive, then the j-th and (j+1)-st */ +/* eigenvalues are a complex conjugate pair, with */ +/* ALPHAI(j+1) = -ALPHAI(j). */ + +/* BETA (output) DOUBLE PRECISION array, dimension (N) */ +/* The scalars beta that define the eigenvalues of GNEP. */ +/* Together, the quantities alpha = (ALPHAR(j),ALPHAI(j)) and */ +/* beta = BETA(j) represent the j-th eigenvalue of the matrix */ +/* pair (A,B), in one of the forms lambda = alpha/beta or */ +/* mu = beta/alpha. Since either lambda or mu may overflow, */ +/* they should not, in general, be computed. */ + +/* VSL (output) DOUBLE PRECISION array, dimension (LDVSL,N) */ +/* If JOBVSL = 'V', the matrix of left Schur vectors Q. */ +/* Not referenced if JOBVSL = 'N'. */ + +/* LDVSL (input) INTEGER */ +/* The leading dimension of the matrix VSL. LDVSL >=1, and */ +/* if JOBVSL = 'V', LDVSL >= N. */ + +/* VSR (output) DOUBLE PRECISION array, dimension (LDVSR,N) */ +/* If JOBVSR = 'V', the matrix of right Schur vectors Z. */ +/* Not referenced if JOBVSR = 'N'. */ + +/* LDVSR (input) INTEGER */ +/* The leading dimension of the matrix VSR. LDVSR >= 1, and */ +/* if JOBVSR = 'V', LDVSR >= N. */ + +/* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ + +/* LWORK (input) INTEGER */ +/* The dimension of the array WORK. LWORK >= max(1,4*N). */ +/* For good performance, LWORK must generally be larger. */ +/* To compute the optimal value of LWORK, call ILAENV to get */ +/* blocksizes (for DGEQRF, DORMQR, and DORGQR.) Then compute: */ +/* NB -- MAX of the blocksizes for DGEQRF, DORMQR, and DORGQR */ +/* The optimal LWORK is 2*N + N*(NB+1). */ + +/* If LWORK = -1, then a workspace query is assumed; the routine */ +/* only calculates the optimal size of the WORK array, returns */ +/* this value as the first entry of the WORK array, and no error */ +/* message related to LWORK is issued by XERBLA. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* = 1,...,N: */ +/* The QZ iteration failed. (A,B) are not in Schur */ +/* form, but ALPHAR(j), ALPHAI(j), and BETA(j) should */ +/* be correct for j=INFO+1,...,N. */ +/* > N: errors that usually indicate LAPACK problems: */ +/* =N+1: error return from DGGBAL */ +/* =N+2: error return from DGEQRF */ +/* =N+3: error return from DORMQR */ +/* =N+4: error return from DORGQR */ +/* =N+5: error return from DGGHRD */ +/* =N+6: error return from DHGEQZ (other than failed */ +/* iteration) */ +/* =N+7: error return from DGGBAK (computing VSL) */ +/* =N+8: error return from DGGBAK (computing VSR) */ +/* =N+9: error return from DLASCL (various places) */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Decode the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + --alphar; + --alphai; + --beta; + vsl_dim1 = *ldvsl; + vsl_offset = 1 + vsl_dim1; + vsl -= vsl_offset; + vsr_dim1 = *ldvsr; + vsr_offset = 1 + vsr_dim1; + vsr -= vsr_offset; + --work; + + /* Function Body */ + if (_starpu_lsame_(jobvsl, "N")) { + ijobvl = 1; + ilvsl = FALSE_; + } else if (_starpu_lsame_(jobvsl, "V")) { + ijobvl = 2; + ilvsl = TRUE_; + } else { + ijobvl = -1; + ilvsl = FALSE_; + } + + if (_starpu_lsame_(jobvsr, "N")) { + ijobvr = 1; + ilvsr = FALSE_; + } else if (_starpu_lsame_(jobvsr, "V")) { + ijobvr = 2; + ilvsr = TRUE_; + } else { + ijobvr = -1; + ilvsr = FALSE_; + } + +/* Test the input arguments */ + +/* Computing MAX */ + i__1 = *n << 2; + lwkmin = max(i__1,1); + lwkopt = lwkmin; + work[1] = (doublereal) lwkopt; + lquery = *lwork == -1; + *info = 0; + if (ijobvl <= 0) { + *info = -1; + } else if (ijobvr <= 0) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } else if (*ldb < max(1,*n)) { + *info = -7; + } else if (*ldvsl < 1 || ilvsl && *ldvsl < *n) { + *info = -12; + } else if (*ldvsr < 1 || ilvsr && *ldvsr < *n) { + *info = -14; + } else if (*lwork < lwkmin && ! lquery) { + *info = -16; + } + + if (*info == 0) { + nb1 = _starpu_ilaenv_(&c__1, "DGEQRF", " ", n, n, &c_n1, &c_n1); + nb2 = _starpu_ilaenv_(&c__1, "DORMQR", " ", n, n, n, &c_n1); + nb3 = _starpu_ilaenv_(&c__1, "DORGQR", " ", n, n, n, &c_n1); +/* Computing MAX */ + i__1 = max(nb1,nb2); + nb = max(i__1,nb3); + lopt = (*n << 1) + *n * (nb + 1); + work[1] = (doublereal) lopt; + } + + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DGEGS ", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Get machine constants */ + + eps = _starpu_dlamch_("E") * _starpu_dlamch_("B"); + safmin = _starpu_dlamch_("S"); + smlnum = *n * safmin / eps; + bignum = 1. / smlnum; + +/* Scale A if max element outside range [SMLNUM,BIGNUM] */ + + anrm = _starpu_dlange_("M", n, n, &a[a_offset], lda, &work[1]); + ilascl = FALSE_; + if (anrm > 0. && anrm < smlnum) { + anrmto = smlnum; + ilascl = TRUE_; + } else if (anrm > bignum) { + anrmto = bignum; + ilascl = TRUE_; + } + + if (ilascl) { + _starpu_dlascl_("G", &c_n1, &c_n1, &anrm, &anrmto, n, n, &a[a_offset], lda, & + iinfo); + if (iinfo != 0) { + *info = *n + 9; + return 0; + } + } + +/* Scale B if max element outside range [SMLNUM,BIGNUM] */ + + bnrm = _starpu_dlange_("M", n, n, &b[b_offset], ldb, &work[1]); + ilbscl = FALSE_; + if (bnrm > 0. && bnrm < smlnum) { + bnrmto = smlnum; + ilbscl = TRUE_; + } else if (bnrm > bignum) { + bnrmto = bignum; + ilbscl = TRUE_; + } + + if (ilbscl) { + _starpu_dlascl_("G", &c_n1, &c_n1, &bnrm, &bnrmto, n, n, &b[b_offset], ldb, & + iinfo); + if (iinfo != 0) { + *info = *n + 9; + return 0; + } + } + +/* Permute the matrix to make it more nearly triangular */ +/* Workspace layout: (2*N words -- "work..." not actually used) */ +/* left_permutation, right_permutation, work... */ + + ileft = 1; + iright = *n + 1; + iwork = iright + *n; + _starpu_dggbal_("P", n, &a[a_offset], lda, &b[b_offset], ldb, &ilo, &ihi, &work[ + ileft], &work[iright], &work[iwork], &iinfo); + if (iinfo != 0) { + *info = *n + 1; + goto L10; + } + +/* Reduce B to triangular form, and initialize VSL and/or VSR */ +/* Workspace layout: ("work..." must have at least N words) */ +/* left_permutation, right_permutation, tau, work... */ + + irows = ihi + 1 - ilo; + icols = *n + 1 - ilo; + itau = iwork; + iwork = itau + irows; + i__1 = *lwork + 1 - iwork; + _starpu_dgeqrf_(&irows, &icols, &b[ilo + ilo * b_dim1], ldb, &work[itau], &work[ + iwork], &i__1, &iinfo); + if (iinfo >= 0) { +/* Computing MAX */ + i__1 = lwkopt, i__2 = (integer) work[iwork] + iwork - 1; + lwkopt = max(i__1,i__2); + } + if (iinfo != 0) { + *info = *n + 2; + goto L10; + } + + i__1 = *lwork + 1 - iwork; + _starpu_dormqr_("L", "T", &irows, &icols, &irows, &b[ilo + ilo * b_dim1], ldb, & + work[itau], &a[ilo + ilo * a_dim1], lda, &work[iwork], &i__1, & + iinfo); + if (iinfo >= 0) { +/* Computing MAX */ + i__1 = lwkopt, i__2 = (integer) work[iwork] + iwork - 1; + lwkopt = max(i__1,i__2); + } + if (iinfo != 0) { + *info = *n + 3; + goto L10; + } + + if (ilvsl) { + _starpu_dlaset_("Full", n, n, &c_b36, &c_b37, &vsl[vsl_offset], ldvsl); + i__1 = irows - 1; + i__2 = irows - 1; + _starpu_dlacpy_("L", &i__1, &i__2, &b[ilo + 1 + ilo * b_dim1], ldb, &vsl[ilo + + 1 + ilo * vsl_dim1], ldvsl); + i__1 = *lwork + 1 - iwork; + _starpu_dorgqr_(&irows, &irows, &irows, &vsl[ilo + ilo * vsl_dim1], ldvsl, & + work[itau], &work[iwork], &i__1, &iinfo); + if (iinfo >= 0) { +/* Computing MAX */ + i__1 = lwkopt, i__2 = (integer) work[iwork] + iwork - 1; + lwkopt = max(i__1,i__2); + } + if (iinfo != 0) { + *info = *n + 4; + goto L10; + } + } + + if (ilvsr) { + _starpu_dlaset_("Full", n, n, &c_b36, &c_b37, &vsr[vsr_offset], ldvsr); + } + +/* Reduce to generalized Hessenberg form */ + + _starpu_dgghrd_(jobvsl, jobvsr, n, &ilo, &ihi, &a[a_offset], lda, &b[b_offset], + ldb, &vsl[vsl_offset], ldvsl, &vsr[vsr_offset], ldvsr, &iinfo); + if (iinfo != 0) { + *info = *n + 5; + goto L10; + } + +/* Perform QZ algorithm, computing Schur vectors if desired */ +/* Workspace layout: ("work..." must have at least 1 word) */ +/* left_permutation, right_permutation, work... */ + + iwork = itau; + i__1 = *lwork + 1 - iwork; + _starpu_dhgeqz_("S", jobvsl, jobvsr, n, &ilo, &ihi, &a[a_offset], lda, &b[ + b_offset], ldb, &alphar[1], &alphai[1], &beta[1], &vsl[vsl_offset] +, ldvsl, &vsr[vsr_offset], ldvsr, &work[iwork], &i__1, &iinfo); + if (iinfo >= 0) { +/* Computing MAX */ + i__1 = lwkopt, i__2 = (integer) work[iwork] + iwork - 1; + lwkopt = max(i__1,i__2); + } + if (iinfo != 0) { + if (iinfo > 0 && iinfo <= *n) { + *info = iinfo; + } else if (iinfo > *n && iinfo <= *n << 1) { + *info = iinfo - *n; + } else { + *info = *n + 6; + } + goto L10; + } + +/* Apply permutation to VSL and VSR */ + + if (ilvsl) { + _starpu_dggbak_("P", "L", n, &ilo, &ihi, &work[ileft], &work[iright], n, &vsl[ + vsl_offset], ldvsl, &iinfo); + if (iinfo != 0) { + *info = *n + 7; + goto L10; + } + } + if (ilvsr) { + _starpu_dggbak_("P", "R", n, &ilo, &ihi, &work[ileft], &work[iright], n, &vsr[ + vsr_offset], ldvsr, &iinfo); + if (iinfo != 0) { + *info = *n + 8; + goto L10; + } + } + +/* Undo scaling */ + + if (ilascl) { + _starpu_dlascl_("H", &c_n1, &c_n1, &anrmto, &anrm, n, n, &a[a_offset], lda, & + iinfo); + if (iinfo != 0) { + *info = *n + 9; + return 0; + } + _starpu_dlascl_("G", &c_n1, &c_n1, &anrmto, &anrm, n, &c__1, &alphar[1], n, & + iinfo); + if (iinfo != 0) { + *info = *n + 9; + return 0; + } + _starpu_dlascl_("G", &c_n1, &c_n1, &anrmto, &anrm, n, &c__1, &alphai[1], n, & + iinfo); + if (iinfo != 0) { + *info = *n + 9; + return 0; + } + } + + if (ilbscl) { + _starpu_dlascl_("U", &c_n1, &c_n1, &bnrmto, &bnrm, n, n, &b[b_offset], ldb, & + iinfo); + if (iinfo != 0) { + *info = *n + 9; + return 0; + } + _starpu_dlascl_("G", &c_n1, &c_n1, &bnrmto, &bnrm, n, &c__1, &beta[1], n, & + iinfo); + if (iinfo != 0) { + *info = *n + 9; + return 0; + } + } + +L10: + work[1] = (doublereal) lwkopt; + + return 0; + +/* End of DGEGS */ + +} /* _starpu_dgegs_ */ diff --git a/min-dgels/base/SRC/dgegv.c b/min-dgels/base/SRC/dgegv.c new file mode 100644 index 0000000..7205111 --- /dev/null +++ b/min-dgels/base/SRC/dgegv.c @@ -0,0 +1,842 @@ +/* dgegv.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; +static doublereal c_b27 = 1.; +static doublereal c_b38 = 0.; + +/* Subroutine */ int _starpu_dgegv_(char *jobvl, char *jobvr, integer *n, doublereal * + a, integer *lda, doublereal *b, integer *ldb, doublereal *alphar, + doublereal *alphai, doublereal *beta, doublereal *vl, integer *ldvl, + doublereal *vr, integer *ldvr, doublereal *work, integer *lwork, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, vl_dim1, vl_offset, vr_dim1, + vr_offset, i__1, i__2; + doublereal d__1, d__2, d__3, d__4; + + /* Local variables */ + integer jc, nb, in, jr, nb1, nb2, nb3, ihi, ilo; + doublereal eps; + logical ilv; + doublereal absb, anrm, bnrm; + integer itau; + doublereal temp; + logical ilvl, ilvr; + integer lopt; + doublereal anrm1, anrm2, bnrm1, bnrm2, absai, scale, absar, sbeta; + extern logical _starpu_lsame_(char *, char *); + integer ileft, iinfo, icols, iwork, irows; + extern /* Subroutine */ int _starpu_dggbak_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, integer *), _starpu_dggbal_(char *, integer *, + doublereal *, integer *, doublereal *, integer *, integer *, + integer *, doublereal *, doublereal *, doublereal *, integer *); + extern doublereal _starpu_dlamch_(char *), _starpu_dlange_(char *, integer *, + integer *, doublereal *, integer *, doublereal *); + doublereal salfai; + extern /* Subroutine */ int _starpu_dgghrd_(char *, char *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, integer *, integer *), _starpu_dlascl_(char *, integer *, integer *, doublereal + *, doublereal *, integer *, integer *, doublereal *, integer *, + integer *); + doublereal salfar; + extern /* Subroutine */ int _starpu_dgeqrf_(integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, integer *), + _starpu_dlacpy_(char *, integer *, integer *, doublereal *, integer *, + doublereal *, integer *); + doublereal safmin; + extern /* Subroutine */ int _starpu_dlaset_(char *, integer *, integer *, + doublereal *, doublereal *, doublereal *, integer *); + doublereal safmax; + char chtemp[1]; + logical ldumma[1]; + extern /* Subroutine */ int _starpu_dhgeqz_(char *, char *, char *, integer *, + integer *, integer *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, doublereal *, doublereal *, + integer *, doublereal *, integer *, doublereal *, integer *, + integer *), _starpu_dtgevc_(char *, char *, + logical *, integer *, doublereal *, integer *, doublereal *, + integer *, doublereal *, integer *, doublereal *, integer *, + integer *, integer *, doublereal *, integer *), + _starpu_xerbla_(char *, integer *); + integer ijobvl, iright; + logical ilimit; + extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *); + integer ijobvr; + extern /* Subroutine */ int _starpu_dorgqr_(integer *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + integer *); + doublereal onepls; + integer lwkmin; + extern /* Subroutine */ int _starpu_dormqr_(char *, char *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *); + integer lwkopt; + logical lquery; + + +/* -- LAPACK driver routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* This routine is deprecated and has been replaced by routine DGGEV. */ + +/* DGEGV computes the eigenvalues and, optionally, the left and/or right */ +/* eigenvectors of a real matrix pair (A,B). */ +/* Given two square matrices A and B, */ +/* the generalized nonsymmetric eigenvalue problem (GNEP) is to find the */ +/* eigenvalues lambda and corresponding (non-zero) eigenvectors x such */ +/* that */ + +/* A*x = lambda*B*x. */ + +/* An alternate form is to find the eigenvalues mu and corresponding */ +/* eigenvectors y such that */ + +/* mu*A*y = B*y. */ + +/* These two forms are equivalent with mu = 1/lambda and x = y if */ +/* neither lambda nor mu is zero. In order to deal with the case that */ +/* lambda or mu is zero or small, two values alpha and beta are returned */ +/* for each eigenvalue, such that lambda = alpha/beta and */ +/* mu = beta/alpha. */ + +/* The vectors x and y in the above equations are right eigenvectors of */ +/* the matrix pair (A,B). Vectors u and v satisfying */ + +/* u**H*A = lambda*u**H*B or mu*v**H*A = v**H*B */ + +/* are left eigenvectors of (A,B). */ + +/* Note: this routine performs "full balancing" on A and B -- see */ +/* "Further Details", below. */ + +/* Arguments */ +/* ========= */ + +/* JOBVL (input) CHARACTER*1 */ +/* = 'N': do not compute the left generalized eigenvectors; */ +/* = 'V': compute the left generalized eigenvectors (returned */ +/* in VL). */ + +/* JOBVR (input) CHARACTER*1 */ +/* = 'N': do not compute the right generalized eigenvectors; */ +/* = 'V': compute the right generalized eigenvectors (returned */ +/* in VR). */ + +/* N (input) INTEGER */ +/* The order of the matrices A, B, VL, and VR. N >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA, N) */ +/* On entry, the matrix A. */ +/* If JOBVL = 'V' or JOBVR = 'V', then on exit A */ +/* contains the real Schur form of A from the generalized Schur */ +/* factorization of the pair (A,B) after balancing. */ +/* If no eigenvectors were computed, then only the diagonal */ +/* blocks from the Schur form will be correct. See DGGHRD and */ +/* DHGEQZ for details. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of A. LDA >= max(1,N). */ + +/* B (input/output) DOUBLE PRECISION array, dimension (LDB, N) */ +/* On entry, the matrix B. */ +/* If JOBVL = 'V' or JOBVR = 'V', then on exit B contains the */ +/* upper triangular matrix obtained from B in the generalized */ +/* Schur factorization of the pair (A,B) after balancing. */ +/* If no eigenvectors were computed, then only those elements of */ +/* B corresponding to the diagonal blocks from the Schur form of */ +/* A will be correct. See DGGHRD and DHGEQZ for details. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of B. LDB >= max(1,N). */ + +/* ALPHAR (output) DOUBLE PRECISION array, dimension (N) */ +/* The real parts of each scalar alpha defining an eigenvalue of */ +/* GNEP. */ + +/* ALPHAI (output) DOUBLE PRECISION array, dimension (N) */ +/* The imaginary parts of each scalar alpha defining an */ +/* eigenvalue of GNEP. If ALPHAI(j) is zero, then the j-th */ +/* eigenvalue is real; if positive, then the j-th and */ +/* (j+1)-st eigenvalues are a complex conjugate pair, with */ +/* ALPHAI(j+1) = -ALPHAI(j). */ + +/* BETA (output) DOUBLE PRECISION array, dimension (N) */ +/* The scalars beta that define the eigenvalues of GNEP. */ + +/* Together, the quantities alpha = (ALPHAR(j),ALPHAI(j)) and */ +/* beta = BETA(j) represent the j-th eigenvalue of the matrix */ +/* pair (A,B), in one of the forms lambda = alpha/beta or */ +/* mu = beta/alpha. Since either lambda or mu may overflow, */ +/* they should not, in general, be computed. */ + +/* VL (output) DOUBLE PRECISION array, dimension (LDVL,N) */ +/* If JOBVL = 'V', the left eigenvectors u(j) are stored */ +/* in the columns of VL, in the same order as their eigenvalues. */ +/* If the j-th eigenvalue is real, then u(j) = VL(:,j). */ +/* If the j-th and (j+1)-st eigenvalues form a complex conjugate */ +/* pair, then */ +/* u(j) = VL(:,j) + i*VL(:,j+1) */ +/* and */ +/* u(j+1) = VL(:,j) - i*VL(:,j+1). */ + +/* Each eigenvector is scaled so that its largest component has */ +/* abs(real part) + abs(imag. part) = 1, except for eigenvectors */ +/* corresponding to an eigenvalue with alpha = beta = 0, which */ +/* are set to zero. */ +/* Not referenced if JOBVL = 'N'. */ + +/* LDVL (input) INTEGER */ +/* The leading dimension of the matrix VL. LDVL >= 1, and */ +/* if JOBVL = 'V', LDVL >= N. */ + +/* VR (output) DOUBLE PRECISION array, dimension (LDVR,N) */ +/* If JOBVR = 'V', the right eigenvectors x(j) are stored */ +/* in the columns of VR, in the same order as their eigenvalues. */ +/* If the j-th eigenvalue is real, then x(j) = VR(:,j). */ +/* If the j-th and (j+1)-st eigenvalues form a complex conjugate */ +/* pair, then */ +/* x(j) = VR(:,j) + i*VR(:,j+1) */ +/* and */ +/* x(j+1) = VR(:,j) - i*VR(:,j+1). */ + +/* Each eigenvector is scaled so that its largest component has */ +/* abs(real part) + abs(imag. part) = 1, except for eigenvalues */ +/* corresponding to an eigenvalue with alpha = beta = 0, which */ +/* are set to zero. */ +/* Not referenced if JOBVR = 'N'. */ + +/* LDVR (input) INTEGER */ +/* The leading dimension of the matrix VR. LDVR >= 1, and */ +/* if JOBVR = 'V', LDVR >= N. */ + +/* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ + +/* LWORK (input) INTEGER */ +/* The dimension of the array WORK. LWORK >= max(1,8*N). */ +/* For good performance, LWORK must generally be larger. */ +/* To compute the optimal value of LWORK, call ILAENV to get */ +/* blocksizes (for DGEQRF, DORMQR, and DORGQR.) Then compute: */ +/* NB -- MAX of the blocksizes for DGEQRF, DORMQR, and DORGQR; */ +/* The optimal LWORK is: */ +/* 2*N + MAX( 6*N, N*(NB+1) ). */ + +/* If LWORK = -1, then a workspace query is assumed; the routine */ +/* only calculates the optimal size of the WORK array, returns */ +/* this value as the first entry of the WORK array, and no error */ +/* message related to LWORK is issued by XERBLA. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* = 1,...,N: */ +/* The QZ iteration failed. No eigenvectors have been */ +/* calculated, but ALPHAR(j), ALPHAI(j), and BETA(j) */ +/* should be correct for j=INFO+1,...,N. */ +/* > N: errors that usually indicate LAPACK problems: */ +/* =N+1: error return from DGGBAL */ +/* =N+2: error return from DGEQRF */ +/* =N+3: error return from DORMQR */ +/* =N+4: error return from DORGQR */ +/* =N+5: error return from DGGHRD */ +/* =N+6: error return from DHGEQZ (other than failed */ +/* iteration) */ +/* =N+7: error return from DTGEVC */ +/* =N+8: error return from DGGBAK (computing VL) */ +/* =N+9: error return from DGGBAK (computing VR) */ +/* =N+10: error return from DLASCL (various calls) */ + +/* Further Details */ +/* =============== */ + +/* Balancing */ +/* --------- */ + +/* This driver calls DGGBAL to both permute and scale rows and columns */ +/* of A and B. The permutations PL and PR are chosen so that PL*A*PR */ +/* and PL*B*R will be upper triangular except for the diagonal blocks */ +/* A(i:j,i:j) and B(i:j,i:j), with i and j as close together as */ +/* possible. The diagonal scaling matrices DL and DR are chosen so */ +/* that the pair DL*PL*A*PR*DR, DL*PL*B*PR*DR have elements close to */ +/* one (except for the elements that start out zero.) */ + +/* After the eigenvalues and eigenvectors of the balanced matrices */ +/* have been computed, DGGBAK transforms the eigenvectors back to what */ +/* they would have been (in perfect arithmetic) if they had not been */ +/* balanced. */ + +/* Contents of A and B on Exit */ +/* -------- -- - --- - -- ---- */ + +/* If any eigenvectors are computed (either JOBVL='V' or JOBVR='V' or */ +/* both), then on exit the arrays A and B will contain the real Schur */ +/* form[*] of the "balanced" versions of A and B. If no eigenvectors */ +/* are computed, then only the diagonal blocks will be correct. */ + +/* [*] See DHGEQZ, DGEGS, or read the book "Matrix Computations", */ +/* by Golub & van Loan, pub. by Johns Hopkins U. Press. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Decode the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + --alphar; + --alphai; + --beta; + vl_dim1 = *ldvl; + vl_offset = 1 + vl_dim1; + vl -= vl_offset; + vr_dim1 = *ldvr; + vr_offset = 1 + vr_dim1; + vr -= vr_offset; + --work; + + /* Function Body */ + if (_starpu_lsame_(jobvl, "N")) { + ijobvl = 1; + ilvl = FALSE_; + } else if (_starpu_lsame_(jobvl, "V")) { + ijobvl = 2; + ilvl = TRUE_; + } else { + ijobvl = -1; + ilvl = FALSE_; + } + + if (_starpu_lsame_(jobvr, "N")) { + ijobvr = 1; + ilvr = FALSE_; + } else if (_starpu_lsame_(jobvr, "V")) { + ijobvr = 2; + ilvr = TRUE_; + } else { + ijobvr = -1; + ilvr = FALSE_; + } + ilv = ilvl || ilvr; + +/* Test the input arguments */ + +/* Computing MAX */ + i__1 = *n << 3; + lwkmin = max(i__1,1); + lwkopt = lwkmin; + work[1] = (doublereal) lwkopt; + lquery = *lwork == -1; + *info = 0; + if (ijobvl <= 0) { + *info = -1; + } else if (ijobvr <= 0) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } else if (*ldb < max(1,*n)) { + *info = -7; + } else if (*ldvl < 1 || ilvl && *ldvl < *n) { + *info = -12; + } else if (*ldvr < 1 || ilvr && *ldvr < *n) { + *info = -14; + } else if (*lwork < lwkmin && ! lquery) { + *info = -16; + } + + if (*info == 0) { + nb1 = _starpu_ilaenv_(&c__1, "DGEQRF", " ", n, n, &c_n1, &c_n1); + nb2 = _starpu_ilaenv_(&c__1, "DORMQR", " ", n, n, n, &c_n1); + nb3 = _starpu_ilaenv_(&c__1, "DORGQR", " ", n, n, n, &c_n1); +/* Computing MAX */ + i__1 = max(nb1,nb2); + nb = max(i__1,nb3); +/* Computing MAX */ + i__1 = *n * 6, i__2 = *n * (nb + 1); + lopt = (*n << 1) + max(i__1,i__2); + work[1] = (doublereal) lopt; + } + + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DGEGV ", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Get machine constants */ + + eps = _starpu_dlamch_("E") * _starpu_dlamch_("B"); + safmin = _starpu_dlamch_("S"); + safmin += safmin; + safmax = 1. / safmin; + onepls = eps * 4 + 1.; + +/* Scale A */ + + anrm = _starpu_dlange_("M", n, n, &a[a_offset], lda, &work[1]); + anrm1 = anrm; + anrm2 = 1.; + if (anrm < 1.) { + if (safmax * anrm < 1.) { + anrm1 = safmin; + anrm2 = safmax * anrm; + } + } + + if (anrm > 0.) { + _starpu_dlascl_("G", &c_n1, &c_n1, &anrm, &c_b27, n, n, &a[a_offset], lda, & + iinfo); + if (iinfo != 0) { + *info = *n + 10; + return 0; + } + } + +/* Scale B */ + + bnrm = _starpu_dlange_("M", n, n, &b[b_offset], ldb, &work[1]); + bnrm1 = bnrm; + bnrm2 = 1.; + if (bnrm < 1.) { + if (safmax * bnrm < 1.) { + bnrm1 = safmin; + bnrm2 = safmax * bnrm; + } + } + + if (bnrm > 0.) { + _starpu_dlascl_("G", &c_n1, &c_n1, &bnrm, &c_b27, n, n, &b[b_offset], ldb, & + iinfo); + if (iinfo != 0) { + *info = *n + 10; + return 0; + } + } + +/* Permute the matrix to make it more nearly triangular */ +/* Workspace layout: (8*N words -- "work" requires 6*N words) */ +/* left_permutation, right_permutation, work... */ + + ileft = 1; + iright = *n + 1; + iwork = iright + *n; + _starpu_dggbal_("P", n, &a[a_offset], lda, &b[b_offset], ldb, &ilo, &ihi, &work[ + ileft], &work[iright], &work[iwork], &iinfo); + if (iinfo != 0) { + *info = *n + 1; + goto L120; + } + +/* Reduce B to triangular form, and initialize VL and/or VR */ +/* Workspace layout: ("work..." must have at least N words) */ +/* left_permutation, right_permutation, tau, work... */ + + irows = ihi + 1 - ilo; + if (ilv) { + icols = *n + 1 - ilo; + } else { + icols = irows; + } + itau = iwork; + iwork = itau + irows; + i__1 = *lwork + 1 - iwork; + _starpu_dgeqrf_(&irows, &icols, &b[ilo + ilo * b_dim1], ldb, &work[itau], &work[ + iwork], &i__1, &iinfo); + if (iinfo >= 0) { +/* Computing MAX */ + i__1 = lwkopt, i__2 = (integer) work[iwork] + iwork - 1; + lwkopt = max(i__1,i__2); + } + if (iinfo != 0) { + *info = *n + 2; + goto L120; + } + + i__1 = *lwork + 1 - iwork; + _starpu_dormqr_("L", "T", &irows, &icols, &irows, &b[ilo + ilo * b_dim1], ldb, & + work[itau], &a[ilo + ilo * a_dim1], lda, &work[iwork], &i__1, & + iinfo); + if (iinfo >= 0) { +/* Computing MAX */ + i__1 = lwkopt, i__2 = (integer) work[iwork] + iwork - 1; + lwkopt = max(i__1,i__2); + } + if (iinfo != 0) { + *info = *n + 3; + goto L120; + } + + if (ilvl) { + _starpu_dlaset_("Full", n, n, &c_b38, &c_b27, &vl[vl_offset], ldvl) + ; + i__1 = irows - 1; + i__2 = irows - 1; + _starpu_dlacpy_("L", &i__1, &i__2, &b[ilo + 1 + ilo * b_dim1], ldb, &vl[ilo + + 1 + ilo * vl_dim1], ldvl); + i__1 = *lwork + 1 - iwork; + _starpu_dorgqr_(&irows, &irows, &irows, &vl[ilo + ilo * vl_dim1], ldvl, &work[ + itau], &work[iwork], &i__1, &iinfo); + if (iinfo >= 0) { +/* Computing MAX */ + i__1 = lwkopt, i__2 = (integer) work[iwork] + iwork - 1; + lwkopt = max(i__1,i__2); + } + if (iinfo != 0) { + *info = *n + 4; + goto L120; + } + } + + if (ilvr) { + _starpu_dlaset_("Full", n, n, &c_b38, &c_b27, &vr[vr_offset], ldvr) + ; + } + +/* Reduce to generalized Hessenberg form */ + + if (ilv) { + +/* Eigenvectors requested -- work on whole matrix. */ + + _starpu_dgghrd_(jobvl, jobvr, n, &ilo, &ihi, &a[a_offset], lda, &b[b_offset], + ldb, &vl[vl_offset], ldvl, &vr[vr_offset], ldvr, &iinfo); + } else { + _starpu_dgghrd_("N", "N", &irows, &c__1, &irows, &a[ilo + ilo * a_dim1], lda, + &b[ilo + ilo * b_dim1], ldb, &vl[vl_offset], ldvl, &vr[ + vr_offset], ldvr, &iinfo); + } + if (iinfo != 0) { + *info = *n + 5; + goto L120; + } + +/* Perform QZ algorithm */ +/* Workspace layout: ("work..." must have at least 1 word) */ +/* left_permutation, right_permutation, work... */ + + iwork = itau; + if (ilv) { + *(unsigned char *)chtemp = 'S'; + } else { + *(unsigned char *)chtemp = 'E'; + } + i__1 = *lwork + 1 - iwork; + _starpu_dhgeqz_(chtemp, jobvl, jobvr, n, &ilo, &ihi, &a[a_offset], lda, &b[ + b_offset], ldb, &alphar[1], &alphai[1], &beta[1], &vl[vl_offset], + ldvl, &vr[vr_offset], ldvr, &work[iwork], &i__1, &iinfo); + if (iinfo >= 0) { +/* Computing MAX */ + i__1 = lwkopt, i__2 = (integer) work[iwork] + iwork - 1; + lwkopt = max(i__1,i__2); + } + if (iinfo != 0) { + if (iinfo > 0 && iinfo <= *n) { + *info = iinfo; + } else if (iinfo > *n && iinfo <= *n << 1) { + *info = iinfo - *n; + } else { + *info = *n + 6; + } + goto L120; + } + + if (ilv) { + +/* Compute Eigenvectors (DTGEVC requires 6*N words of workspace) */ + + if (ilvl) { + if (ilvr) { + *(unsigned char *)chtemp = 'B'; + } else { + *(unsigned char *)chtemp = 'L'; + } + } else { + *(unsigned char *)chtemp = 'R'; + } + + _starpu_dtgevc_(chtemp, "B", ldumma, n, &a[a_offset], lda, &b[b_offset], ldb, + &vl[vl_offset], ldvl, &vr[vr_offset], ldvr, n, &in, &work[ + iwork], &iinfo); + if (iinfo != 0) { + *info = *n + 7; + goto L120; + } + +/* Undo balancing on VL and VR, rescale */ + + if (ilvl) { + _starpu_dggbak_("P", "L", n, &ilo, &ihi, &work[ileft], &work[iright], n, & + vl[vl_offset], ldvl, &iinfo); + if (iinfo != 0) { + *info = *n + 8; + goto L120; + } + i__1 = *n; + for (jc = 1; jc <= i__1; ++jc) { + if (alphai[jc] < 0.) { + goto L50; + } + temp = 0.; + if (alphai[jc] == 0.) { + i__2 = *n; + for (jr = 1; jr <= i__2; ++jr) { +/* Computing MAX */ + d__2 = temp, d__3 = (d__1 = vl[jr + jc * vl_dim1], + abs(d__1)); + temp = max(d__2,d__3); +/* L10: */ + } + } else { + i__2 = *n; + for (jr = 1; jr <= i__2; ++jr) { +/* Computing MAX */ + d__3 = temp, d__4 = (d__1 = vl[jr + jc * vl_dim1], + abs(d__1)) + (d__2 = vl[jr + (jc + 1) * + vl_dim1], abs(d__2)); + temp = max(d__3,d__4); +/* L20: */ + } + } + if (temp < safmin) { + goto L50; + } + temp = 1. / temp; + if (alphai[jc] == 0.) { + i__2 = *n; + for (jr = 1; jr <= i__2; ++jr) { + vl[jr + jc * vl_dim1] *= temp; +/* L30: */ + } + } else { + i__2 = *n; + for (jr = 1; jr <= i__2; ++jr) { + vl[jr + jc * vl_dim1] *= temp; + vl[jr + (jc + 1) * vl_dim1] *= temp; +/* L40: */ + } + } +L50: + ; + } + } + if (ilvr) { + _starpu_dggbak_("P", "R", n, &ilo, &ihi, &work[ileft], &work[iright], n, & + vr[vr_offset], ldvr, &iinfo); + if (iinfo != 0) { + *info = *n + 9; + goto L120; + } + i__1 = *n; + for (jc = 1; jc <= i__1; ++jc) { + if (alphai[jc] < 0.) { + goto L100; + } + temp = 0.; + if (alphai[jc] == 0.) { + i__2 = *n; + for (jr = 1; jr <= i__2; ++jr) { +/* Computing MAX */ + d__2 = temp, d__3 = (d__1 = vr[jr + jc * vr_dim1], + abs(d__1)); + temp = max(d__2,d__3); +/* L60: */ + } + } else { + i__2 = *n; + for (jr = 1; jr <= i__2; ++jr) { +/* Computing MAX */ + d__3 = temp, d__4 = (d__1 = vr[jr + jc * vr_dim1], + abs(d__1)) + (d__2 = vr[jr + (jc + 1) * + vr_dim1], abs(d__2)); + temp = max(d__3,d__4); +/* L70: */ + } + } + if (temp < safmin) { + goto L100; + } + temp = 1. / temp; + if (alphai[jc] == 0.) { + i__2 = *n; + for (jr = 1; jr <= i__2; ++jr) { + vr[jr + jc * vr_dim1] *= temp; +/* L80: */ + } + } else { + i__2 = *n; + for (jr = 1; jr <= i__2; ++jr) { + vr[jr + jc * vr_dim1] *= temp; + vr[jr + (jc + 1) * vr_dim1] *= temp; +/* L90: */ + } + } +L100: + ; + } + } + +/* End of eigenvector calculation */ + + } + +/* Undo scaling in alpha, beta */ + +/* Note: this does not give the alpha and beta for the unscaled */ +/* problem. */ + +/* Un-scaling is limited to avoid underflow in alpha and beta */ +/* if they are significant. */ + + i__1 = *n; + for (jc = 1; jc <= i__1; ++jc) { + absar = (d__1 = alphar[jc], abs(d__1)); + absai = (d__1 = alphai[jc], abs(d__1)); + absb = (d__1 = beta[jc], abs(d__1)); + salfar = anrm * alphar[jc]; + salfai = anrm * alphai[jc]; + sbeta = bnrm * beta[jc]; + ilimit = FALSE_; + scale = 1.; + +/* Check for significant underflow in ALPHAI */ + +/* Computing MAX */ + d__1 = safmin, d__2 = eps * absar, d__1 = max(d__1,d__2), d__2 = eps * + absb; + if (abs(salfai) < safmin && absai >= max(d__1,d__2)) { + ilimit = TRUE_; +/* Computing MAX */ + d__1 = onepls * safmin, d__2 = anrm2 * absai; + scale = onepls * safmin / anrm1 / max(d__1,d__2); + + } else if (salfai == 0.) { + +/* If insignificant underflow in ALPHAI, then make the */ +/* conjugate eigenvalue real. */ + + if (alphai[jc] < 0. && jc > 1) { + alphai[jc - 1] = 0.; + } else if (alphai[jc] > 0. && jc < *n) { + alphai[jc + 1] = 0.; + } + } + +/* Check for significant underflow in ALPHAR */ + +/* Computing MAX */ + d__1 = safmin, d__2 = eps * absai, d__1 = max(d__1,d__2), d__2 = eps * + absb; + if (abs(salfar) < safmin && absar >= max(d__1,d__2)) { + ilimit = TRUE_; +/* Computing MAX */ +/* Computing MAX */ + d__3 = onepls * safmin, d__4 = anrm2 * absar; + d__1 = scale, d__2 = onepls * safmin / anrm1 / max(d__3,d__4); + scale = max(d__1,d__2); + } + +/* Check for significant underflow in BETA */ + +/* Computing MAX */ + d__1 = safmin, d__2 = eps * absar, d__1 = max(d__1,d__2), d__2 = eps * + absai; + if (abs(sbeta) < safmin && absb >= max(d__1,d__2)) { + ilimit = TRUE_; +/* Computing MAX */ +/* Computing MAX */ + d__3 = onepls * safmin, d__4 = bnrm2 * absb; + d__1 = scale, d__2 = onepls * safmin / bnrm1 / max(d__3,d__4); + scale = max(d__1,d__2); + } + +/* Check for possible overflow when limiting scaling */ + + if (ilimit) { +/* Computing MAX */ + d__1 = abs(salfar), d__2 = abs(salfai), d__1 = max(d__1,d__2), + d__2 = abs(sbeta); + temp = scale * safmin * max(d__1,d__2); + if (temp > 1.) { + scale /= temp; + } + if (scale < 1.) { + ilimit = FALSE_; + } + } + +/* Recompute un-scaled ALPHAR, ALPHAI, BETA if necessary. */ + + if (ilimit) { + salfar = scale * alphar[jc] * anrm; + salfai = scale * alphai[jc] * anrm; + sbeta = scale * beta[jc] * bnrm; + } + alphar[jc] = salfar; + alphai[jc] = salfai; + beta[jc] = sbeta; +/* L110: */ + } + +L120: + work[1] = (doublereal) lwkopt; + + return 0; + +/* End of DGEGV */ + +} /* _starpu_dgegv_ */ diff --git a/min-dgels/base/SRC/dgehd2.c b/min-dgels/base/SRC/dgehd2.c new file mode 100644 index 0000000..e700388 --- /dev/null +++ b/min-dgels/base/SRC/dgehd2.c @@ -0,0 +1,191 @@ +/* dgehd2.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +/* Subroutine */ int _starpu_dgehd2_(integer *n, integer *ilo, integer *ihi, + doublereal *a, integer *lda, doublereal *tau, doublereal *work, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + integer i__; + doublereal aii; + extern /* Subroutine */ int _starpu_dlarf_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + doublereal *), _starpu_dlarfg_(integer *, doublereal *, + doublereal *, integer *, doublereal *), _starpu_xerbla_(char *, integer *); + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DGEHD2 reduces a real general matrix A to upper Hessenberg form H by */ +/* an orthogonal similarity transformation: Q' * A * Q = H . */ + +/* Arguments */ +/* ========= */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* ILO (input) INTEGER */ +/* IHI (input) INTEGER */ +/* It is assumed that A is already upper triangular in rows */ +/* and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally */ +/* set by a previous call to DGEBAL; otherwise they should be */ +/* set to 1 and N respectively. See Further Details. */ +/* 1 <= ILO <= IHI <= max(1,N). */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the n by n general matrix to be reduced. */ +/* On exit, the upper triangle and the first subdiagonal of A */ +/* are overwritten with the upper Hessenberg matrix H, and the */ +/* elements below the first subdiagonal, with the array TAU, */ +/* represent the orthogonal matrix Q as a product of elementary */ +/* reflectors. See Further Details. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,N). */ + +/* TAU (output) DOUBLE PRECISION array, dimension (N-1) */ +/* The scalar factors of the elementary reflectors (see Further */ +/* Details). */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (N) */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit. */ +/* < 0: if INFO = -i, the i-th argument had an illegal value. */ + +/* Further Details */ +/* =============== */ + +/* The matrix Q is represented as a product of (ihi-ilo) elementary */ +/* reflectors */ + +/* Q = H(ilo) H(ilo+1) . . . H(ihi-1). */ + +/* Each H(i) has the form */ + +/* H(i) = I - tau * v * v' */ + +/* where tau is a real scalar, and v is a real vector with */ +/* v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on */ +/* exit in A(i+2:ihi,i), and tau in TAU(i). */ + +/* The contents of A are illustrated by the following example, with */ +/* n = 7, ilo = 2 and ihi = 6: */ + +/* on entry, on exit, */ + +/* ( a a a a a a a ) ( a a h h h h a ) */ +/* ( a a a a a a ) ( a h h h h a ) */ +/* ( a a a a a a ) ( h h h h h h ) */ +/* ( a a a a a a ) ( v2 h h h h h ) */ +/* ( a a a a a a ) ( v2 v3 h h h h ) */ +/* ( a a a a a a ) ( v2 v3 v4 h h h ) */ +/* ( a ) ( a ) */ + +/* where a denotes an element of the original matrix A, h denotes a */ +/* modified element of the upper Hessenberg matrix H, and vi denotes an */ +/* element of the vector defining H(i). */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + if (*n < 0) { + *info = -1; + } else if (*ilo < 1 || *ilo > max(1,*n)) { + *info = -2; + } else if (*ihi < min(*ilo,*n) || *ihi > *n) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DGEHD2", &i__1); + return 0; + } + + i__1 = *ihi - 1; + for (i__ = *ilo; i__ <= i__1; ++i__) { + +/* Compute elementary reflector H(i) to annihilate A(i+2:ihi,i) */ + + i__2 = *ihi - i__; +/* Computing MIN */ + i__3 = i__ + 2; + _starpu_dlarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3, *n)+ i__ * + a_dim1], &c__1, &tau[i__]); + aii = a[i__ + 1 + i__ * a_dim1]; + a[i__ + 1 + i__ * a_dim1] = 1.; + +/* Apply H(i) to A(1:ihi,i+1:ihi) from the right */ + + i__2 = *ihi - i__; + _starpu_dlarf_("Right", ihi, &i__2, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[ + i__], &a[(i__ + 1) * a_dim1 + 1], lda, &work[1]); + +/* Apply H(i) to A(i+1:ihi,i+1:n) from the left */ + + i__2 = *ihi - i__; + i__3 = *n - i__; + _starpu_dlarf_("Left", &i__2, &i__3, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[ + i__], &a[i__ + 1 + (i__ + 1) * a_dim1], lda, &work[1]); + + a[i__ + 1 + i__ * a_dim1] = aii; +/* L10: */ + } + + return 0; + +/* End of DGEHD2 */ + +} /* _starpu_dgehd2_ */ diff --git a/min-dgels/base/SRC/dgehrd.c b/min-dgels/base/SRC/dgehrd.c new file mode 100644 index 0000000..70347dd --- /dev/null +++ b/min-dgels/base/SRC/dgehrd.c @@ -0,0 +1,342 @@ +/* dgehrd.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; +static integer c__3 = 3; +static integer c__2 = 2; +static integer c__65 = 65; +static doublereal c_b25 = -1.; +static doublereal c_b26 = 1.; + +/* Subroutine */ int _starpu_dgehrd_(integer *n, integer *ilo, integer *ihi, + doublereal *a, integer *lda, doublereal *tau, doublereal *work, + integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + integer i__, j; + doublereal t[4160] /* was [65][64] */; + integer ib; + doublereal ei; + integer nb, nh, nx, iws; + extern /* Subroutine */ int _starpu_dgemm_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *); + integer nbmin, iinfo; + extern /* Subroutine */ int _starpu_dtrmm_(char *, char *, char *, char *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *), _starpu_daxpy_( + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *), _starpu_dgehd2_(integer *, integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *), _starpu_dlahr2_( + integer *, integer *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *), + _starpu_dlarfb_(char *, char *, char *, char *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); + extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *); + integer ldwork, lwkopt; + logical lquery; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DGEHRD reduces a real general matrix A to upper Hessenberg form H by */ +/* an orthogonal similarity transformation: Q' * A * Q = H . */ + +/* Arguments */ +/* ========= */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* ILO (input) INTEGER */ +/* IHI (input) INTEGER */ +/* It is assumed that A is already upper triangular in rows */ +/* and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally */ +/* set by a previous call to DGEBAL; otherwise they should be */ +/* set to 1 and N respectively. See Further Details. */ +/* 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the N-by-N general matrix to be reduced. */ +/* On exit, the upper triangle and the first subdiagonal of A */ +/* are overwritten with the upper Hessenberg matrix H, and the */ +/* elements below the first subdiagonal, with the array TAU, */ +/* represent the orthogonal matrix Q as a product of elementary */ +/* reflectors. See Further Details. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,N). */ + +/* TAU (output) DOUBLE PRECISION array, dimension (N-1) */ +/* The scalar factors of the elementary reflectors (see Further */ +/* Details). Elements 1:ILO-1 and IHI:N-1 of TAU are set to */ +/* zero. */ + +/* WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) */ +/* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ + +/* LWORK (input) INTEGER */ +/* The length of the array WORK. LWORK >= max(1,N). */ +/* For optimum performance LWORK >= N*NB, where NB is the */ +/* optimal blocksize. */ + +/* If LWORK = -1, then a workspace query is assumed; the routine */ +/* only calculates the optimal size of the WORK array, returns */ +/* this value as the first entry of the WORK array, and no error */ +/* message related to LWORK is issued by XERBLA. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value. */ + +/* Further Details */ +/* =============== */ + +/* The matrix Q is represented as a product of (ihi-ilo) elementary */ +/* reflectors */ + +/* Q = H(ilo) H(ilo+1) . . . H(ihi-1). */ + +/* Each H(i) has the form */ + +/* H(i) = I - tau * v * v' */ + +/* where tau is a real scalar, and v is a real vector with */ +/* v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on */ +/* exit in A(i+2:ihi,i), and tau in TAU(i). */ + +/* The contents of A are illustrated by the following example, with */ +/* n = 7, ilo = 2 and ihi = 6: */ + +/* on entry, on exit, */ + +/* ( a a a a a a a ) ( a a h h h h a ) */ +/* ( a a a a a a ) ( a h h h h a ) */ +/* ( a a a a a a ) ( h h h h h h ) */ +/* ( a a a a a a ) ( v2 h h h h h ) */ +/* ( a a a a a a ) ( v2 v3 h h h h ) */ +/* ( a a a a a a ) ( v2 v3 v4 h h h ) */ +/* ( a ) ( a ) */ + +/* where a denotes an element of the original matrix A, h denotes a */ +/* modified element of the upper Hessenberg matrix H, and vi denotes an */ +/* element of the vector defining H(i). */ + +/* This file is a slight modification of LAPACK-3.0's DGEHRD */ +/* subroutine incorporating improvements proposed by Quintana-Orti and */ +/* Van de Geijn (2005). */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; +/* Computing MIN */ + i__1 = 64, i__2 = _starpu_ilaenv_(&c__1, "DGEHRD", " ", n, ilo, ihi, &c_n1); + nb = min(i__1,i__2); + lwkopt = *n * nb; + work[1] = (doublereal) lwkopt; + lquery = *lwork == -1; + if (*n < 0) { + *info = -1; + } else if (*ilo < 1 || *ilo > max(1,*n)) { + *info = -2; + } else if (*ihi < min(*ilo,*n) || *ihi > *n) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } else if (*lwork < max(1,*n) && ! lquery) { + *info = -8; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DGEHRD", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Set elements 1:ILO-1 and IHI:N-1 of TAU to zero */ + + i__1 = *ilo - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + tau[i__] = 0.; +/* L10: */ + } + i__1 = *n - 1; + for (i__ = max(1,*ihi); i__ <= i__1; ++i__) { + tau[i__] = 0.; +/* L20: */ + } + +/* Quick return if possible */ + + nh = *ihi - *ilo + 1; + if (nh <= 1) { + work[1] = 1.; + return 0; + } + +/* Determine the block size */ + +/* Computing MIN */ + i__1 = 64, i__2 = _starpu_ilaenv_(&c__1, "DGEHRD", " ", n, ilo, ihi, &c_n1); + nb = min(i__1,i__2); + nbmin = 2; + iws = 1; + if (nb > 1 && nb < nh) { + +/* Determine when to cross over from blocked to unblocked code */ +/* (last block is always handled by unblocked code) */ + +/* Computing MAX */ + i__1 = nb, i__2 = _starpu_ilaenv_(&c__3, "DGEHRD", " ", n, ilo, ihi, &c_n1); + nx = max(i__1,i__2); + if (nx < nh) { + +/* Determine if workspace is large enough for blocked code */ + + iws = *n * nb; + if (*lwork < iws) { + +/* Not enough workspace to use optimal NB: determine the */ +/* minimum value of NB, and reduce NB or force use of */ +/* unblocked code */ + +/* Computing MAX */ + i__1 = 2, i__2 = _starpu_ilaenv_(&c__2, "DGEHRD", " ", n, ilo, ihi, & + c_n1); + nbmin = max(i__1,i__2); + if (*lwork >= *n * nbmin) { + nb = *lwork / *n; + } else { + nb = 1; + } + } + } + } + ldwork = *n; + + if (nb < nbmin || nb >= nh) { + +/* Use unblocked code below */ + + i__ = *ilo; + + } else { + +/* Use blocked code */ + + i__1 = *ihi - 1 - nx; + i__2 = nb; + for (i__ = *ilo; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__3 = nb, i__4 = *ihi - i__; + ib = min(i__3,i__4); + +/* Reduce columns i:i+ib-1 to Hessenberg form, returning the */ +/* matrices V and T of the block reflector H = I - V*T*V' */ +/* which performs the reduction, and also the matrix Y = A*V*T */ + + _starpu_dlahr2_(ihi, &i__, &ib, &a[i__ * a_dim1 + 1], lda, &tau[i__], t, & + c__65, &work[1], &ldwork); + +/* Apply the block reflector H to A(1:ihi,i+ib:ihi) from the */ +/* right, computing A := A - Y * V'. V(i+ib,ib-1) must be set */ +/* to 1 */ + + ei = a[i__ + ib + (i__ + ib - 1) * a_dim1]; + a[i__ + ib + (i__ + ib - 1) * a_dim1] = 1.; + i__3 = *ihi - i__ - ib + 1; + _starpu_dgemm_("No transpose", "Transpose", ihi, &i__3, &ib, &c_b25, & + work[1], &ldwork, &a[i__ + ib + i__ * a_dim1], lda, & + c_b26, &a[(i__ + ib) * a_dim1 + 1], lda); + a[i__ + ib + (i__ + ib - 1) * a_dim1] = ei; + +/* Apply the block reflector H to A(1:i,i+1:i+ib-1) from the */ +/* right */ + + i__3 = ib - 1; + _starpu_dtrmm_("Right", "Lower", "Transpose", "Unit", &i__, &i__3, &c_b26, + &a[i__ + 1 + i__ * a_dim1], lda, &work[1], &ldwork); + i__3 = ib - 2; + for (j = 0; j <= i__3; ++j) { + _starpu_daxpy_(&i__, &c_b25, &work[ldwork * j + 1], &c__1, &a[(i__ + + j + 1) * a_dim1 + 1], &c__1); +/* L30: */ + } + +/* Apply the block reflector H to A(i+1:ihi,i+ib:n) from the */ +/* left */ + + i__3 = *ihi - i__; + i__4 = *n - i__ - ib + 1; + _starpu_dlarfb_("Left", "Transpose", "Forward", "Columnwise", &i__3, & + i__4, &ib, &a[i__ + 1 + i__ * a_dim1], lda, t, &c__65, &a[ + i__ + 1 + (i__ + ib) * a_dim1], lda, &work[1], &ldwork); +/* L40: */ + } + } + +/* Use unblocked code to reduce the rest of the matrix */ + + _starpu_dgehd2_(n, &i__, ihi, &a[a_offset], lda, &tau[1], &work[1], &iinfo); + work[1] = (doublereal) iws; + + return 0; + +/* End of DGEHRD */ + +} /* _starpu_dgehrd_ */ diff --git a/min-dgels/base/SRC/dgejsv.c b/min-dgels/base/SRC/dgejsv.c new file mode 100644 index 0000000..e6d459b --- /dev/null +++ b/min-dgels/base/SRC/dgejsv.c @@ -0,0 +1,2218 @@ +/* dgejsv.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static doublereal c_b34 = 0.; +static doublereal c_b35 = 1.; +static integer c__0 = 0; +static integer c_n1 = -1; + +/* Subroutine */ int _starpu_dgejsv_(char *joba, char *jobu, char *jobv, char *jobr, + char *jobt, char *jobp, integer *m, integer *n, doublereal *a, + integer *lda, doublereal *sva, doublereal *u, integer *ldu, + doublereal *v, integer *ldv, doublereal *work, integer *lwork, + integer *iwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, u_dim1, u_offset, v_dim1, v_offset, i__1, i__2, + i__3, i__4, i__5, i__6, i__7, i__8, i__9, i__10; + doublereal d__1, d__2, d__3, d__4; + + /* Builtin functions */ + double sqrt(doublereal), log(doublereal), d_sign(doublereal *, doublereal + *); + integer i_dnnt(doublereal *); + + /* Local variables */ + integer p, q, n1, nr; + doublereal big, xsc, big1; + logical defr; + doublereal aapp, aaqq; + logical kill; + integer ierr; + extern doublereal _starpu_dnrm2_(integer *, doublereal *, integer *); + doublereal temp1; + logical jracc; + extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, + integer *); + extern logical _starpu_lsame_(char *, char *); + doublereal small, entra, sfmin; + logical lsvec; + extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *), _starpu_dswap_(integer *, doublereal *, integer + *, doublereal *, integer *); + doublereal epsln; + logical rsvec; + extern /* Subroutine */ int _starpu_dtrsm_(char *, char *, char *, char *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *); + logical l2aber; + extern /* Subroutine */ int _starpu_dgeqp3_(integer *, integer *, doublereal *, + integer *, integer *, doublereal *, doublereal *, integer *, + integer *); + doublereal condr1, condr2, uscal1, uscal2; + logical l2kill, l2rank, l2tran, l2pert; + extern doublereal _starpu_dlamch_(char *); + extern /* Subroutine */ int _starpu_dgelqf_(integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, integer *); + extern integer _starpu_idamax_(integer *, doublereal *, integer *); + doublereal scalem; + extern /* Subroutine */ int _starpu_dlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *); + doublereal sconda; + logical goscal; + doublereal aatmin; + extern /* Subroutine */ int _starpu_dgeqrf_(integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, integer *); + doublereal aatmax; + extern /* Subroutine */ int _starpu_dlacpy_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *), + _starpu_dlaset_(char *, integer *, integer *, doublereal *, doublereal *, + doublereal *, integer *), _starpu_xerbla_(char *, integer *); + logical noscal; + extern /* Subroutine */ int _starpu_dpocon_(char *, integer *, doublereal *, + integer *, doublereal *, doublereal *, doublereal *, integer *, + integer *), _starpu_dgesvj_(char *, char *, char *, integer *, + integer *, doublereal *, integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, integer *, integer *), _starpu_dlassq_(integer *, doublereal *, integer + *, doublereal *, doublereal *), _starpu_dlaswp_(integer *, doublereal *, + integer *, integer *, integer *, integer *, integer *); + doublereal entrat; + logical almort; + extern /* Subroutine */ int _starpu_dorgqr_(integer *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + integer *), _starpu_dormlq_(char *, char *, integer *, integer *, integer + *, doublereal *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *, integer *); + doublereal maxprj; + logical errest; + extern /* Subroutine */ int _starpu_dormqr_(char *, char *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *); + logical transp, rowpiv; + doublereal cond_ok__; + integer warning, numrank; + + +/* -- LAPACK routine (version 3.2) -- */ + +/* -- Contributed by Zlatko Drmac of the University of Zagreb and -- */ +/* -- Kresimir Veselic of the Fernuniversitaet Hagen -- */ +/* -- November 2008 -- */ + +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* This routine is also part of SIGMA (version 1.23, October 23. 2008.) */ +/* SIGMA is a library of algorithms for highly accurate algorithms for */ +/* computation of SVD, PSVD, QSVD, (H,K)-SVD, and for solution of the */ +/* eigenvalue problems Hx = lambda M x, H M x = lambda x with H, M > 0. */ + +/* -#- Scalar Arguments -#- */ + + +/* -#- Array Arguments -#- */ + +/* .. */ + +/* Purpose */ +/* ~~~~~~~ */ +/* DGEJSV computes the singular value decomposition (SVD) of a real M-by-N */ +/* matrix [A], where M >= N. The SVD of [A] is written as */ + +/* [A] = [U] * [SIGMA] * [V]^t, */ + +/* where [SIGMA] is an N-by-N (M-by-N) matrix which is zero except for its N */ +/* diagonal elements, [U] is an M-by-N (or M-by-M) orthonormal matrix, and */ +/* [V] is an N-by-N orthogonal matrix. The diagonal elements of [SIGMA] are */ +/* the singular values of [A]. The columns of [U] and [V] are the left and */ +/* the right singular vectors of [A], respectively. The matrices [U] and [V] */ +/* are computed and stored in the arrays U and V, respectively. The diagonal */ +/* of [SIGMA] is computed and stored in the array SVA. */ + +/* Further details */ +/* ~~~~~~~~~~~~~~~ */ +/* DGEJSV implements a preconditioned Jacobi SVD algorithm. It uses SGEQP3, */ +/* SGEQRF, and SGELQF as preprocessors and preconditioners. Optionally, an */ +/* additional row pivoting can be used as a preprocessor, which in some */ +/* cases results in much higher accuracy. An example is matrix A with the */ +/* structure A = D1 * C * D2, where D1, D2 are arbitrarily ill-conditioned */ +/* diagonal matrices and C is well-conditioned matrix. In that case, complete */ +/* pivoting in the first QR factorizations provides accuracy dependent on the */ +/* condition number of C, and independent of D1, D2. Such higher accuracy is */ +/* not completely understood theoretically, but it works well in practice. */ +/* Further, if A can be written as A = B*D, with well-conditioned B and some */ +/* diagonal D, then the high accuracy is guaranteed, both theoretically and */ +/* in software, independent of D. For more details see [1], [2]. */ +/* The computational range for the singular values can be the full range */ +/* ( UNDERFLOW,OVERFLOW ), provided that the machine arithmetic and the BLAS */ +/* & LAPACK routines called by DGEJSV are implemented to work in that range. */ +/* If that is not the case, then the restriction for safe computation with */ +/* the singular values in the range of normalized IEEE numbers is that the */ +/* spectral condition number kappa(A)=sigma_max(A)/sigma_min(A) does not */ +/* overflow. This code (DGEJSV) is best used in this restricted range, */ +/* meaning that singular values of magnitude below ||A||_2 / SLAMCH('O') are */ +/* returned as zeros. See JOBR for details on this. */ +/* Further, this implementation is somewhat slower than the one described */ +/* in [1,2] due to replacement of some non-LAPACK components, and because */ +/* the choice of some tuning parameters in the iterative part (DGESVJ) is */ +/* left to the implementer on a particular machine. */ +/* The rank revealing QR factorization (in this code: SGEQP3) should be */ +/* implemented as in [3]. We have a new version of SGEQP3 under development */ +/* that is more robust than the current one in LAPACK, with a cleaner cut in */ +/* rank defficient cases. It will be available in the SIGMA library [4]. */ +/* If M is much larger than N, it is obvious that the inital QRF with */ +/* column pivoting can be preprocessed by the QRF without pivoting. That */ +/* well known trick is not used in DGEJSV because in some cases heavy row */ +/* weighting can be treated with complete pivoting. The overhead in cases */ +/* M much larger than N is then only due to pivoting, but the benefits in */ +/* terms of accuracy have prevailed. The implementer/user can incorporate */ +/* this extra QRF step easily. The implementer can also improve data movement */ +/* (matrix transpose, matrix copy, matrix transposed copy) - this */ +/* implementation of DGEJSV uses only the simplest, naive data movement. */ + +/* Contributors */ +/* ~~~~~~~~~~~~ */ +/* Zlatko Drmac (Zagreb, Croatia) and Kresimir Veselic (Hagen, Germany) */ + +/* References */ +/* ~~~~~~~~~~ */ +/* [1] Z. Drmac and K. Veselic: New fast and accurate Jacobi SVD algorithm I. */ +/* SIAM J. Matrix Anal. Appl. Vol. 35, No. 2 (2008), pp. 1322-1342. */ +/* LAPACK Working note 169. */ +/* [2] Z. Drmac and K. Veselic: New fast and accurate Jacobi SVD algorithm II. */ +/* SIAM J. Matrix Anal. Appl. Vol. 35, No. 2 (2008), pp. 1343-1362. */ +/* LAPACK Working note 170. */ +/* [3] Z. Drmac and Z. Bujanovic: On the failure of rank-revealing QR */ +/* factorization software - a case study. */ +/* ACM Trans. Math. Softw. Vol. 35, No 2 (2008), pp. 1-28. */ +/* LAPACK Working note 176. */ +/* [4] Z. Drmac: SIGMA - mathematical software library for accurate SVD, PSV, */ +/* QSVD, (H,K)-SVD computations. */ +/* Department of Mathematics, University of Zagreb, 2008. */ + +/* Bugs, examples and comments */ +/* ~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ +/* Please report all bugs and send interesting examples and/or comments to */ +/* drmac@math.hr. Thank you. */ + +/* Arguments */ +/* ~~~~~~~~~ */ +/* ............................................................................ */ +/* . JOBA (input) CHARACTER*1 */ +/* . Specifies the level of accuracy: */ +/* . = 'C': This option works well (high relative accuracy) if A = B * D, */ +/* . with well-conditioned B and arbitrary diagonal matrix D. */ +/* . The accuracy cannot be spoiled by COLUMN scaling. The */ +/* . accuracy of the computed output depends on the condition of */ +/* . B, and the procedure aims at the best theoretical accuracy. */ +/* . The relative error max_{i=1:N}|d sigma_i| / sigma_i is */ +/* . bounded by f(M,N)*epsilon* cond(B), independent of D. */ +/* . The input matrix is preprocessed with the QRF with column */ +/* . pivoting. This initial preprocessing and preconditioning by */ +/* . a rank revealing QR factorization is common for all values of */ +/* . JOBA. Additional actions are specified as follows: */ +/* . = 'E': Computation as with 'C' with an additional estimate of the */ +/* . condition number of B. It provides a realistic error bound. */ +/* . = 'F': If A = D1 * C * D2 with ill-conditioned diagonal scalings */ +/* . D1, D2, and well-conditioned matrix C, this option gives */ +/* . higher accuracy than the 'C' option. If the structure of the */ +/* . input matrix is not known, and relative accuracy is */ +/* . desirable, then this option is advisable. The input matrix A */ +/* . is preprocessed with QR factorization with FULL (row and */ +/* . column) pivoting. */ +/* . = 'G' Computation as with 'F' with an additional estimate of the */ +/* . condition number of B, where A=D*B. If A has heavily weighted */ +/* . rows, then using this condition number gives too pessimistic */ +/* . error bound. */ +/* . = 'A': Small singular values are the noise and the matrix is treated */ +/* . as numerically rank defficient. The error in the computed */ +/* . singular values is bounded by f(m,n)*epsilon*||A||. */ +/* . The computed SVD A = U * S * V^t restores A up to */ +/* . f(m,n)*epsilon*||A||. */ +/* . This gives the procedure the licence to discard (set to zero) */ +/* . all singular values below N*epsilon*||A||. */ +/* . = 'R': Similar as in 'A'. Rank revealing property of the initial */ +/* . QR factorization is used do reveal (using triangular factor) */ +/* . a gap sigma_{r+1} < epsilon * sigma_r in which case the */ +/* . numerical RANK is declared to be r. The SVD is computed with */ +/* . absolute error bounds, but more accurately than with 'A'. */ +/* . */ +/* . JOBU (input) CHARACTER*1 */ +/* . Specifies whether to compute the columns of U: */ +/* . = 'U': N columns of U are returned in the array U. */ +/* . = 'F': full set of M left sing. vectors is returned in the array U. */ +/* . = 'W': U may be used as workspace of length M*N. See the description */ +/* . of U. */ +/* . = 'N': U is not computed. */ +/* . */ +/* . JOBV (input) CHARACTER*1 */ +/* . Specifies whether to compute the matrix V: */ +/* . = 'V': N columns of V are returned in the array V; Jacobi rotations */ +/* . are not explicitly accumulated. */ +/* . = 'J': N columns of V are returned in the array V, but they are */ +/* . computed as the product of Jacobi rotations. This option is */ +/* . allowed only if JOBU .NE. 'N', i.e. in computing the full SVD. */ +/* . = 'W': V may be used as workspace of length N*N. See the description */ +/* . of V. */ +/* . = 'N': V is not computed. */ +/* . */ +/* . JOBR (input) CHARACTER*1 */ +/* . Specifies the RANGE for the singular values. Issues the licence to */ +/* . set to zero small positive singular values if they are outside */ +/* . specified range. If A .NE. 0 is scaled so that the largest singular */ +/* . value of c*A is around DSQRT(BIG), BIG=SLAMCH('O'), then JOBR issues */ +/* . the licence to kill columns of A whose norm in c*A is less than */ +/* . DSQRT(SFMIN) (for JOBR.EQ.'R'), or less than SMALL=SFMIN/EPSLN, */ +/* . where SFMIN=SLAMCH('S'), EPSLN=SLAMCH('E'). */ +/* . = 'N': Do not kill small columns of c*A. This option assumes that */ +/* . BLAS and QR factorizations and triangular solvers are */ +/* . implemented to work in that range. If the condition of A */ +/* . is greater than BIG, use DGESVJ. */ +/* . = 'R': RESTRICTED range for sigma(c*A) is [DSQRT(SFMIN), DSQRT(BIG)] */ +/* . (roughly, as described above). This option is recommended. */ +/* . ~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ +/* . For computing the singular values in the FULL range [SFMIN,BIG] */ +/* . use DGESVJ. */ +/* . */ +/* . JOBT (input) CHARACTER*1 */ +/* . If the matrix is square then the procedure may determine to use */ +/* . transposed A if A^t seems to be better with respect to convergence. */ +/* . If the matrix is not square, JOBT is ignored. This is subject to */ +/* . changes in the future. */ +/* . The decision is based on two values of entropy over the adjoint */ +/* . orbit of A^t * A. See the descriptions of WORK(6) and WORK(7). */ +/* . = 'T': transpose if entropy test indicates possibly faster */ +/* . convergence of Jacobi process if A^t is taken as input. If A is */ +/* . replaced with A^t, then the row pivoting is included automatically. */ +/* . = 'N': do not speculate. */ +/* . This option can be used to compute only the singular values, or the */ +/* . full SVD (U, SIGMA and V). For only one set of singular vectors */ +/* . (U or V), the caller should provide both U and V, as one of the */ +/* . matrices is used as workspace if the matrix A is transposed. */ +/* . The implementer can easily remove this constraint and make the */ +/* . code more complicated. See the descriptions of U and V. */ +/* . */ +/* . JOBP (input) CHARACTER*1 */ +/* . Issues the licence to introduce structured perturbations to drown */ +/* . denormalized numbers. This licence should be active if the */ +/* . denormals are poorly implemented, causing slow computation, */ +/* . especially in cases of fast convergence (!). For details see [1,2]. */ +/* . For the sake of simplicity, this perturbations are included only */ +/* . when the full SVD or only the singular values are requested. The */ +/* . implementer/user can easily add the perturbation for the cases of */ +/* . computing one set of singular vectors. */ +/* . = 'P': introduce perturbation */ +/* . = 'N': do not perturb */ +/* ............................................................................ */ + +/* M (input) INTEGER */ +/* The number of rows of the input matrix A. M >= 0. */ + +/* N (input) INTEGER */ +/* The number of columns of the input matrix A. M >= N >= 0. */ + +/* A (input/workspace) REAL array, dimension (LDA,N) */ +/* On entry, the M-by-N matrix A. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,M). */ + +/* SVA (workspace/output) REAL array, dimension (N) */ +/* On exit, */ +/* - For WORK(1)/WORK(2) = ONE: The singular values of A. During the */ +/* computation SVA contains Euclidean column norms of the */ +/* iterated matrices in the array A. */ +/* - For WORK(1) .NE. WORK(2): The singular values of A are */ +/* (WORK(1)/WORK(2)) * SVA(1:N). This factored form is used if */ +/* sigma_max(A) overflows or if small singular values have been */ +/* saved from underflow by scaling the input matrix A. */ +/* - If JOBR='R' then some of the singular values may be returned */ +/* as exact zeros obtained by "set to zero" because they are */ +/* below the numerical rank threshold or are denormalized numbers. */ + +/* U (workspace/output) REAL array, dimension ( LDU, N ) */ +/* If JOBU = 'U', then U contains on exit the M-by-N matrix of */ +/* the left singular vectors. */ +/* If JOBU = 'F', then U contains on exit the M-by-M matrix of */ +/* the left singular vectors, including an ONB */ +/* of the orthogonal complement of the Range(A). */ +/* If JOBU = 'W' .AND. (JOBV.EQ.'V' .AND. JOBT.EQ.'T' .AND. M.EQ.N), */ +/* then U is used as workspace if the procedure */ +/* replaces A with A^t. In that case, [V] is computed */ +/* in U as left singular vectors of A^t and then */ +/* copied back to the V array. This 'W' option is just */ +/* a reminder to the caller that in this case U is */ +/* reserved as workspace of length N*N. */ +/* If JOBU = 'N' U is not referenced. */ + +/* LDU (input) INTEGER */ +/* The leading dimension of the array U, LDU >= 1. */ +/* IF JOBU = 'U' or 'F' or 'W', then LDU >= M. */ + +/* V (workspace/output) REAL array, dimension ( LDV, N ) */ +/* If JOBV = 'V', 'J' then V contains on exit the N-by-N matrix of */ +/* the right singular vectors; */ +/* If JOBV = 'W', AND (JOBU.EQ.'U' AND JOBT.EQ.'T' AND M.EQ.N), */ +/* then V is used as workspace if the pprocedure */ +/* replaces A with A^t. In that case, [U] is computed */ +/* in V as right singular vectors of A^t and then */ +/* copied back to the U array. This 'W' option is just */ +/* a reminder to the caller that in this case V is */ +/* reserved as workspace of length N*N. */ +/* If JOBV = 'N' V is not referenced. */ + +/* LDV (input) INTEGER */ +/* The leading dimension of the array V, LDV >= 1. */ +/* If JOBV = 'V' or 'J' or 'W', then LDV >= N. */ + +/* WORK (workspace/output) REAL array, dimension at least LWORK. */ +/* On exit, */ +/* WORK(1) = SCALE = WORK(2) / WORK(1) is the scaling factor such */ +/* that SCALE*SVA(1:N) are the computed singular values */ +/* of A. (See the description of SVA().) */ +/* WORK(2) = See the description of WORK(1). */ +/* WORK(3) = SCONDA is an estimate for the condition number of */ +/* column equilibrated A. (If JOBA .EQ. 'E' or 'G') */ +/* SCONDA is an estimate of DSQRT(||(R^t * R)^(-1)||_1). */ +/* It is computed using DPOCON. It holds */ +/* N^(-1/4) * SCONDA <= ||R^(-1)||_2 <= N^(1/4) * SCONDA */ +/* where R is the triangular factor from the QRF of A. */ +/* However, if R is truncated and the numerical rank is */ +/* determined to be strictly smaller than N, SCONDA is */ +/* returned as -1, thus indicating that the smallest */ +/* singular values might be lost. */ + +/* If full SVD is needed, the following two condition numbers are */ +/* useful for the analysis of the algorithm. They are provied for */ +/* a developer/implementer who is familiar with the details of */ +/* the method. */ + +/* WORK(4) = an estimate of the scaled condition number of the */ +/* triangular factor in the first QR factorization. */ +/* WORK(5) = an estimate of the scaled condition number of the */ +/* triangular factor in the second QR factorization. */ +/* The following two parameters are computed if JOBT .EQ. 'T'. */ +/* They are provided for a developer/implementer who is familiar */ +/* with the details of the method. */ + +/* WORK(6) = the entropy of A^t*A :: this is the Shannon entropy */ +/* of diag(A^t*A) / Trace(A^t*A) taken as point in the */ +/* probability simplex. */ +/* WORK(7) = the entropy of A*A^t. */ + +/* LWORK (input) INTEGER */ +/* Length of WORK to confirm proper allocation of work space. */ +/* LWORK depends on the job: */ + +/* If only SIGMA is needed ( JOBU.EQ.'N', JOBV.EQ.'N' ) and */ +/* -> .. no scaled condition estimate required ( JOBE.EQ.'N'): */ +/* LWORK >= max(2*M+N,4*N+1,7). This is the minimal requirement. */ +/* For optimal performance (blocked code) the optimal value */ +/* is LWORK >= max(2*M+N,3*N+(N+1)*NB,7). Here NB is the optimal */ +/* block size for xGEQP3/xGEQRF. */ +/* -> .. an estimate of the scaled condition number of A is */ +/* required (JOBA='E', 'G'). In this case, LWORK is the maximum */ +/* of the above and N*N+4*N, i.e. LWORK >= max(2*M+N,N*N+4N,7). */ + +/* If SIGMA and the right singular vectors are needed (JOBV.EQ.'V'), */ +/* -> the minimal requirement is LWORK >= max(2*N+M,7). */ +/* -> For optimal performance, LWORK >= max(2*N+M,2*N+N*NB,7), */ +/* where NB is the optimal block size. */ + +/* If SIGMA and the left singular vectors are needed */ +/* -> the minimal requirement is LWORK >= max(2*N+M,7). */ +/* -> For optimal performance, LWORK >= max(2*N+M,2*N+N*NB,7), */ +/* where NB is the optimal block size. */ + +/* If full SVD is needed ( JOBU.EQ.'U' or 'F', JOBV.EQ.'V' ) and */ +/* -> .. the singular vectors are computed without explicit */ +/* accumulation of the Jacobi rotations, LWORK >= 6*N+2*N*N */ +/* -> .. in the iterative part, the Jacobi rotations are */ +/* explicitly accumulated (option, see the description of JOBV), */ +/* then the minimal requirement is LWORK >= max(M+3*N+N*N,7). */ +/* For better performance, if NB is the optimal block size, */ +/* LWORK >= max(3*N+N*N+M,3*N+N*N+N*NB,7). */ + +/* IWORK (workspace/output) INTEGER array, dimension M+3*N. */ +/* On exit, */ +/* IWORK(1) = the numerical rank determined after the initial */ +/* QR factorization with pivoting. See the descriptions */ +/* of JOBA and JOBR. */ +/* IWORK(2) = the number of the computed nonzero singular values */ +/* IWORK(3) = if nonzero, a warning message: */ +/* If IWORK(3).EQ.1 then some of the column norms of A */ +/* were denormalized floats. The requested high accuracy */ +/* is not warranted by the data. */ + +/* INFO (output) INTEGER */ +/* < 0 : if INFO = -i, then the i-th argument had an illegal value. */ +/* = 0 : successfull exit; */ +/* > 0 : DGEJSV did not converge in the maximal allowed number */ +/* of sweeps. The computed values may be inaccurate. */ + +/* ............................................................................ */ + +/* Local Parameters: */ + + +/* Local Scalars: */ + + +/* Intrinsic Functions: */ + + +/* External Functions: */ + + +/* External Subroutines ( BLAS, LAPACK ): */ + + + +/* ............................................................................ */ + +/* Test the input arguments */ + + /* Parameter adjustments */ + --sva; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + u_dim1 = *ldu; + u_offset = 1 + u_dim1; + u -= u_offset; + v_dim1 = *ldv; + v_offset = 1 + v_dim1; + v -= v_offset; + --work; + --iwork; + + /* Function Body */ + lsvec = _starpu_lsame_(jobu, "U") || _starpu_lsame_(jobu, "F"); + jracc = _starpu_lsame_(jobv, "J"); + rsvec = _starpu_lsame_(jobv, "V") || jracc; + rowpiv = _starpu_lsame_(joba, "F") || _starpu_lsame_(joba, "G"); + l2rank = _starpu_lsame_(joba, "R"); + l2aber = _starpu_lsame_(joba, "A"); + errest = _starpu_lsame_(joba, "E") || _starpu_lsame_(joba, "G"); + l2tran = _starpu_lsame_(jobt, "T"); + l2kill = _starpu_lsame_(jobr, "R"); + defr = _starpu_lsame_(jobr, "N"); + l2pert = _starpu_lsame_(jobp, "P"); + + if (! (rowpiv || l2rank || l2aber || errest || _starpu_lsame_(joba, "C"))) { + *info = -1; + } else if (! (lsvec || _starpu_lsame_(jobu, "N") || _starpu_lsame_( + jobu, "W"))) { + *info = -2; + } else if (! (rsvec || _starpu_lsame_(jobv, "N") || _starpu_lsame_( + jobv, "W")) || jracc && ! lsvec) { + *info = -3; + } else if (! (l2kill || defr)) { + *info = -4; + } else if (! (l2tran || _starpu_lsame_(jobt, "N"))) { + *info = -5; + } else if (! (l2pert || _starpu_lsame_(jobp, "N"))) { + *info = -6; + } else if (*m < 0) { + *info = -7; + } else if (*n < 0 || *n > *m) { + *info = -8; + } else if (*lda < *m) { + *info = -10; + } else if (lsvec && *ldu < *m) { + *info = -13; + } else if (rsvec && *ldv < *n) { + *info = -14; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = 7, i__2 = (*n << 2) + 1, i__1 = max(i__1,i__2), i__2 = (*m << + 1) + *n; +/* Computing MAX */ + i__3 = 7, i__4 = (*n << 2) + *n * *n, i__3 = max(i__3,i__4), i__4 = (* + m << 1) + *n; +/* Computing MAX */ + i__5 = 7, i__6 = (*n << 1) + *m; +/* Computing MAX */ + i__7 = 7, i__8 = (*n << 1) + *m; +/* Computing MAX */ + i__9 = 7, i__10 = *m + *n * 3 + *n * *n; + if (! (lsvec || rsvec || errest) && *lwork < max(i__1,i__2) || ! ( + lsvec || lsvec) && errest && *lwork < max(i__3,i__4) || lsvec + && ! rsvec && *lwork < max(i__5,i__6) || rsvec && ! lsvec && * + lwork < max(i__7,i__8) || lsvec && rsvec && ! jracc && *lwork + < *n * 6 + (*n << 1) * *n || lsvec && rsvec && jracc && * + lwork < max(i__9,i__10)) { + *info = -17; + } else { +/* #:) */ + *info = 0; + } + } + + if (*info != 0) { +/* #:( */ + i__1 = -(*info); + _starpu_xerbla_("DGEJSV", &i__1); + } + +/* Quick return for void matrix (Y3K safe) */ +/* #:) */ + if (*m == 0 || *n == 0) { + return 0; + } + +/* Determine whether the matrix U should be M x N or M x M */ + + if (lsvec) { + n1 = *n; + if (_starpu_lsame_(jobu, "F")) { + n1 = *m; + } + } + +/* Set numerical parameters */ + +/* ! NOTE: Make sure DLAMCH() does not fail on the target architecture. */ + + epsln = _starpu_dlamch_("Epsilon"); + sfmin = _starpu_dlamch_("SafeMinimum"); + small = sfmin / epsln; + big = _starpu_dlamch_("O"); +/* BIG = ONE / SFMIN */ + +/* Initialize SVA(1:N) = diag( ||A e_i||_2 )_1^N */ + +/* (!) If necessary, scale SVA() to protect the largest norm from */ +/* overflow. It is possible that this scaling pushes the smallest */ +/* column norm left from the underflow threshold (extreme case). */ + + scalem = 1. / sqrt((doublereal) (*m) * (doublereal) (*n)); + noscal = TRUE_; + goscal = TRUE_; + i__1 = *n; + for (p = 1; p <= i__1; ++p) { + aapp = 0.; + aaqq = 0.; + _starpu_dlassq_(m, &a[p * a_dim1 + 1], &c__1, &aapp, &aaqq); + if (aapp > big) { + *info = -9; + i__2 = -(*info); + _starpu_xerbla_("DGEJSV", &i__2); + return 0; + } + aaqq = sqrt(aaqq); + if (aapp < big / aaqq && noscal) { + sva[p] = aapp * aaqq; + } else { + noscal = FALSE_; + sva[p] = aapp * (aaqq * scalem); + if (goscal) { + goscal = FALSE_; + i__2 = p - 1; + _starpu_dscal_(&i__2, &scalem, &sva[1], &c__1); + } + } +/* L1874: */ + } + + if (noscal) { + scalem = 1.; + } + + aapp = 0.; + aaqq = big; + i__1 = *n; + for (p = 1; p <= i__1; ++p) { +/* Computing MAX */ + d__1 = aapp, d__2 = sva[p]; + aapp = max(d__1,d__2); + if (sva[p] != 0.) { +/* Computing MIN */ + d__1 = aaqq, d__2 = sva[p]; + aaqq = min(d__1,d__2); + } +/* L4781: */ + } + +/* Quick return for zero M x N matrix */ +/* #:) */ + if (aapp == 0.) { + if (lsvec) { + _starpu_dlaset_("G", m, &n1, &c_b34, &c_b35, &u[u_offset], ldu) + ; + } + if (rsvec) { + _starpu_dlaset_("G", n, n, &c_b34, &c_b35, &v[v_offset], ldv); + } + work[1] = 1.; + work[2] = 1.; + if (errest) { + work[3] = 1.; + } + if (lsvec && rsvec) { + work[4] = 1.; + work[5] = 1.; + } + if (l2tran) { + work[6] = 0.; + work[7] = 0.; + } + iwork[1] = 0; + iwork[2] = 0; + return 0; + } + +/* Issue warning if denormalized column norms detected. Override the */ +/* high relative accuracy request. Issue licence to kill columns */ +/* (set them to zero) whose norm is less than sigma_max / BIG (roughly). */ +/* #:( */ + warning = 0; + if (aaqq <= sfmin) { + l2rank = TRUE_; + l2kill = TRUE_; + warning = 1; + } + +/* Quick return for one-column matrix */ +/* #:) */ + if (*n == 1) { + + if (lsvec) { + _starpu_dlascl_("G", &c__0, &c__0, &sva[1], &scalem, m, &c__1, &a[a_dim1 + + 1], lda, &ierr); + _starpu_dlacpy_("A", m, &c__1, &a[a_offset], lda, &u[u_offset], ldu); +/* computing all M left singular vectors of the M x 1 matrix */ + if (n1 != *n) { + i__1 = *lwork - *n; + _starpu_dgeqrf_(m, n, &u[u_offset], ldu, &work[1], &work[*n + 1], & + i__1, &ierr); + i__1 = *lwork - *n; + _starpu_dorgqr_(m, &n1, &c__1, &u[u_offset], ldu, &work[1], &work[*n + + 1], &i__1, &ierr); + _starpu_dcopy_(m, &a[a_dim1 + 1], &c__1, &u[u_dim1 + 1], &c__1); + } + } + if (rsvec) { + v[v_dim1 + 1] = 1.; + } + if (sva[1] < big * scalem) { + sva[1] /= scalem; + scalem = 1.; + } + work[1] = 1. / scalem; + work[2] = 1.; + if (sva[1] != 0.) { + iwork[1] = 1; + if (sva[1] / scalem >= sfmin) { + iwork[2] = 1; + } else { + iwork[2] = 0; + } + } else { + iwork[1] = 0; + iwork[2] = 0; + } + if (errest) { + work[3] = 1.; + } + if (lsvec && rsvec) { + work[4] = 1.; + work[5] = 1.; + } + if (l2tran) { + work[6] = 0.; + work[7] = 0.; + } + return 0; + + } + + transp = FALSE_; + l2tran = l2tran && *m == *n; + + aatmax = -1.; + aatmin = big; + if (rowpiv || l2tran) { + +/* Compute the row norms, needed to determine row pivoting sequence */ +/* (in the case of heavily row weighted A, row pivoting is strongly */ +/* advised) and to collect information needed to compare the */ +/* structures of A * A^t and A^t * A (in the case L2TRAN.EQ..TRUE.). */ + + if (l2tran) { + i__1 = *m; + for (p = 1; p <= i__1; ++p) { + xsc = 0.; + temp1 = 0.; + _starpu_dlassq_(n, &a[p + a_dim1], lda, &xsc, &temp1); +/* DLASSQ gets both the ell_2 and the ell_infinity norm */ +/* in one pass through the vector */ + work[*m + *n + p] = xsc * scalem; + work[*n + p] = xsc * (scalem * sqrt(temp1)); +/* Computing MAX */ + d__1 = aatmax, d__2 = work[*n + p]; + aatmax = max(d__1,d__2); + if (work[*n + p] != 0.) { +/* Computing MIN */ + d__1 = aatmin, d__2 = work[*n + p]; + aatmin = min(d__1,d__2); + } +/* L1950: */ + } + } else { + i__1 = *m; + for (p = 1; p <= i__1; ++p) { + work[*m + *n + p] = scalem * (d__1 = a[p + _starpu_idamax_(n, &a[p + + a_dim1], lda) * a_dim1], abs(d__1)); +/* Computing MAX */ + d__1 = aatmax, d__2 = work[*m + *n + p]; + aatmax = max(d__1,d__2); +/* Computing MIN */ + d__1 = aatmin, d__2 = work[*m + *n + p]; + aatmin = min(d__1,d__2); +/* L1904: */ + } + } + + } + +/* For square matrix A try to determine whether A^t would be better */ +/* input for the preconditioned Jacobi SVD, with faster convergence. */ +/* The decision is based on an O(N) function of the vector of column */ +/* and row norms of A, based on the Shannon entropy. This should give */ +/* the right choice in most cases when the difference actually matters. */ +/* It may fail and pick the slower converging side. */ + + entra = 0.; + entrat = 0.; + if (l2tran) { + + xsc = 0.; + temp1 = 0.; + _starpu_dlassq_(n, &sva[1], &c__1, &xsc, &temp1); + temp1 = 1. / temp1; + + entra = 0.; + i__1 = *n; + for (p = 1; p <= i__1; ++p) { +/* Computing 2nd power */ + d__1 = sva[p] / xsc; + big1 = d__1 * d__1 * temp1; + if (big1 != 0.) { + entra += big1 * log(big1); + } +/* L1113: */ + } + entra = -entra / log((doublereal) (*n)); + +/* Now, SVA().^2/Trace(A^t * A) is a point in the probability simplex. */ +/* It is derived from the diagonal of A^t * A. Do the same with the */ +/* diagonal of A * A^t, compute the entropy of the corresponding */ +/* probability distribution. Note that A * A^t and A^t * A have the */ +/* same trace. */ + + entrat = 0.; + i__1 = *n + *m; + for (p = *n + 1; p <= i__1; ++p) { +/* Computing 2nd power */ + d__1 = work[p] / xsc; + big1 = d__1 * d__1 * temp1; + if (big1 != 0.) { + entrat += big1 * log(big1); + } +/* L1114: */ + } + entrat = -entrat / log((doublereal) (*m)); + +/* Analyze the entropies and decide A or A^t. Smaller entropy */ +/* usually means better input for the algorithm. */ + + transp = entrat < entra; + +/* If A^t is better than A, transpose A. */ + + if (transp) { +/* In an optimal implementation, this trivial transpose */ +/* should be replaced with faster transpose. */ + i__1 = *n - 1; + for (p = 1; p <= i__1; ++p) { + i__2 = *n; + for (q = p + 1; q <= i__2; ++q) { + temp1 = a[q + p * a_dim1]; + a[q + p * a_dim1] = a[p + q * a_dim1]; + a[p + q * a_dim1] = temp1; +/* L1116: */ + } +/* L1115: */ + } + i__1 = *n; + for (p = 1; p <= i__1; ++p) { + work[*m + *n + p] = sva[p]; + sva[p] = work[*n + p]; +/* L1117: */ + } + temp1 = aapp; + aapp = aatmax; + aatmax = temp1; + temp1 = aaqq; + aaqq = aatmin; + aatmin = temp1; + kill = lsvec; + lsvec = rsvec; + rsvec = kill; + + rowpiv = TRUE_; + } + + } +/* END IF L2TRAN */ + +/* Scale the matrix so that its maximal singular value remains less */ +/* than DSQRT(BIG) -- the matrix is scaled so that its maximal column */ +/* has Euclidean norm equal to DSQRT(BIG/N). The only reason to keep */ +/* DSQRT(BIG) instead of BIG is the fact that DGEJSV uses LAPACK and */ +/* BLAS routines that, in some implementations, are not capable of */ +/* working in the full interval [SFMIN,BIG] and that they may provoke */ +/* overflows in the intermediate results. If the singular values spread */ +/* from SFMIN to BIG, then DGESVJ will compute them. So, in that case, */ +/* one should use DGESVJ instead of DGEJSV. */ + + big1 = sqrt(big); + temp1 = sqrt(big / (doublereal) (*n)); + + _starpu_dlascl_("G", &c__0, &c__0, &aapp, &temp1, n, &c__1, &sva[1], n, &ierr); + if (aaqq > aapp * sfmin) { + aaqq = aaqq / aapp * temp1; + } else { + aaqq = aaqq * temp1 / aapp; + } + temp1 *= scalem; + _starpu_dlascl_("G", &c__0, &c__0, &aapp, &temp1, m, n, &a[a_offset], lda, &ierr); + +/* To undo scaling at the end of this procedure, multiply the */ +/* computed singular values with USCAL2 / USCAL1. */ + + uscal1 = temp1; + uscal2 = aapp; + + if (l2kill) { +/* L2KILL enforces computation of nonzero singular values in */ +/* the restricted range of condition number of the initial A, */ +/* sigma_max(A) / sigma_min(A) approx. DSQRT(BIG)/DSQRT(SFMIN). */ + xsc = sqrt(sfmin); + } else { + xsc = small; + +/* Now, if the condition number of A is too big, */ +/* sigma_max(A) / sigma_min(A) .GT. DSQRT(BIG/N) * EPSLN / SFMIN, */ +/* as a precaution measure, the full SVD is computed using DGESVJ */ +/* with accumulated Jacobi rotations. This provides numerically */ +/* more robust computation, at the cost of slightly increased run */ +/* time. Depending on the concrete implementation of BLAS and LAPACK */ +/* (i.e. how they behave in presence of extreme ill-conditioning) the */ +/* implementor may decide to remove this switch. */ + if (aaqq < sqrt(sfmin) && lsvec && rsvec) { + jracc = TRUE_; + } + + } + if (aaqq < xsc) { + i__1 = *n; + for (p = 1; p <= i__1; ++p) { + if (sva[p] < xsc) { + _starpu_dlaset_("A", m, &c__1, &c_b34, &c_b34, &a[p * a_dim1 + 1], + lda); + sva[p] = 0.; + } +/* L700: */ + } + } + +/* Preconditioning using QR factorization with pivoting */ + + if (rowpiv) { +/* Optional row permutation (Bjoerck row pivoting): */ +/* A result by Cox and Higham shows that the Bjoerck's */ +/* row pivoting combined with standard column pivoting */ +/* has similar effect as Powell-Reid complete pivoting. */ +/* The ell-infinity norms of A are made nonincreasing. */ + i__1 = *m - 1; + for (p = 1; p <= i__1; ++p) { + i__2 = *m - p + 1; + q = _starpu_idamax_(&i__2, &work[*m + *n + p], &c__1) + p - 1; + iwork[(*n << 1) + p] = q; + if (p != q) { + temp1 = work[*m + *n + p]; + work[*m + *n + p] = work[*m + *n + q]; + work[*m + *n + q] = temp1; + } +/* L1952: */ + } + i__1 = *m - 1; + _starpu_dlaswp_(n, &a[a_offset], lda, &c__1, &i__1, &iwork[(*n << 1) + 1], & + c__1); + } + +/* End of the preparation phase (scaling, optional sorting and */ +/* transposing, optional flushing of small columns). */ + +/* Preconditioning */ + +/* If the full SVD is needed, the right singular vectors are computed */ +/* from a matrix equation, and for that we need theoretical analysis */ +/* of the Businger-Golub pivoting. So we use DGEQP3 as the first RR QRF. */ +/* In all other cases the first RR QRF can be chosen by other criteria */ +/* (eg speed by replacing global with restricted window pivoting, such */ +/* as in SGEQPX from TOMS # 782). Good results will be obtained using */ +/* SGEQPX with properly (!) chosen numerical parameters. */ +/* Any improvement of DGEQP3 improves overal performance of DGEJSV. */ + +/* A * P1 = Q1 * [ R1^t 0]^t: */ + i__1 = *n; + for (p = 1; p <= i__1; ++p) { +/* .. all columns are free columns */ + iwork[p] = 0; +/* L1963: */ + } + i__1 = *lwork - *n; + _starpu_dgeqp3_(m, n, &a[a_offset], lda, &iwork[1], &work[1], &work[*n + 1], & + i__1, &ierr); + +/* The upper triangular matrix R1 from the first QRF is inspected for */ +/* rank deficiency and possibilities for deflation, or possible */ +/* ill-conditioning. Depending on the user specified flag L2RANK, */ +/* the procedure explores possibilities to reduce the numerical */ +/* rank by inspecting the computed upper triangular factor. If */ +/* L2RANK or L2ABER are up, then DGEJSV will compute the SVD of */ +/* A + dA, where ||dA|| <= f(M,N)*EPSLN. */ + + nr = 1; + if (l2aber) { +/* Standard absolute error bound suffices. All sigma_i with */ +/* sigma_i < N*EPSLN*||A|| are flushed to zero. This is an */ +/* agressive enforcement of lower numerical rank by introducing a */ +/* backward error of the order of N*EPSLN*||A||. */ + temp1 = sqrt((doublereal) (*n)) * epsln; + i__1 = *n; + for (p = 2; p <= i__1; ++p) { + if ((d__2 = a[p + p * a_dim1], abs(d__2)) >= temp1 * (d__1 = a[ + a_dim1 + 1], abs(d__1))) { + ++nr; + } else { + goto L3002; + } +/* L3001: */ + } +L3002: + ; + } else if (l2rank) { +/* .. similarly as above, only slightly more gentle (less agressive). */ +/* Sudden drop on the diagonal of R1 is used as the criterion for */ +/* close-to-rank-defficient. */ + temp1 = sqrt(sfmin); + i__1 = *n; + for (p = 2; p <= i__1; ++p) { + if ((d__2 = a[p + p * a_dim1], abs(d__2)) < epsln * (d__1 = a[p - + 1 + (p - 1) * a_dim1], abs(d__1)) || (d__3 = a[p + p * + a_dim1], abs(d__3)) < small || l2kill && (d__4 = a[p + p * + a_dim1], abs(d__4)) < temp1) { + goto L3402; + } + ++nr; +/* L3401: */ + } +L3402: + + ; + } else { +/* The goal is high relative accuracy. However, if the matrix */ +/* has high scaled condition number the relative accuracy is in */ +/* general not feasible. Later on, a condition number estimator */ +/* will be deployed to estimate the scaled condition number. */ +/* Here we just remove the underflowed part of the triangular */ +/* factor. This prevents the situation in which the code is */ +/* working hard to get the accuracy not warranted by the data. */ + temp1 = sqrt(sfmin); + i__1 = *n; + for (p = 2; p <= i__1; ++p) { + if ((d__1 = a[p + p * a_dim1], abs(d__1)) < small || l2kill && ( + d__2 = a[p + p * a_dim1], abs(d__2)) < temp1) { + goto L3302; + } + ++nr; +/* L3301: */ + } +L3302: + + ; + } + + almort = FALSE_; + if (nr == *n) { + maxprj = 1.; + i__1 = *n; + for (p = 2; p <= i__1; ++p) { + temp1 = (d__1 = a[p + p * a_dim1], abs(d__1)) / sva[iwork[p]]; + maxprj = min(maxprj,temp1); +/* L3051: */ + } +/* Computing 2nd power */ + d__1 = maxprj; + if (d__1 * d__1 >= 1. - (doublereal) (*n) * epsln) { + almort = TRUE_; + } + } + + + sconda = -1.; + condr1 = -1.; + condr2 = -1.; + + if (errest) { + if (*n == nr) { + if (rsvec) { +/* .. V is available as workspace */ + _starpu_dlacpy_("U", n, n, &a[a_offset], lda, &v[v_offset], ldv); + i__1 = *n; + for (p = 1; p <= i__1; ++p) { + temp1 = sva[iwork[p]]; + d__1 = 1. / temp1; + _starpu_dscal_(&p, &d__1, &v[p * v_dim1 + 1], &c__1); +/* L3053: */ + } + _starpu_dpocon_("U", n, &v[v_offset], ldv, &c_b35, &temp1, &work[*n + + 1], &iwork[(*n << 1) + *m + 1], &ierr); + } else if (lsvec) { +/* .. U is available as workspace */ + _starpu_dlacpy_("U", n, n, &a[a_offset], lda, &u[u_offset], ldu); + i__1 = *n; + for (p = 1; p <= i__1; ++p) { + temp1 = sva[iwork[p]]; + d__1 = 1. / temp1; + _starpu_dscal_(&p, &d__1, &u[p * u_dim1 + 1], &c__1); +/* L3054: */ + } + _starpu_dpocon_("U", n, &u[u_offset], ldu, &c_b35, &temp1, &work[*n + + 1], &iwork[(*n << 1) + *m + 1], &ierr); + } else { + _starpu_dlacpy_("U", n, n, &a[a_offset], lda, &work[*n + 1], n); + i__1 = *n; + for (p = 1; p <= i__1; ++p) { + temp1 = sva[iwork[p]]; + d__1 = 1. / temp1; + _starpu_dscal_(&p, &d__1, &work[*n + (p - 1) * *n + 1], &c__1); +/* L3052: */ + } +/* .. the columns of R are scaled to have unit Euclidean lengths. */ + _starpu_dpocon_("U", n, &work[*n + 1], n, &c_b35, &temp1, &work[*n + * + n * *n + 1], &iwork[(*n << 1) + *m + 1], &ierr); + } + sconda = 1. / sqrt(temp1); +/* SCONDA is an estimate of DSQRT(||(R^t * R)^(-1)||_1). */ +/* N^(-1/4) * SCONDA <= ||R^(-1)||_2 <= N^(1/4) * SCONDA */ + } else { + sconda = -1.; + } + } + + l2pert = l2pert && (d__1 = a[a_dim1 + 1] / a[nr + nr * a_dim1], abs(d__1)) + > sqrt(big1); +/* If there is no violent scaling, artificial perturbation is not needed. */ + +/* Phase 3: */ + + if (! (rsvec || lsvec)) { + +/* Singular Values only */ + +/* .. transpose A(1:NR,1:N) */ +/* Computing MIN */ + i__2 = *n - 1; + i__1 = min(i__2,nr); + for (p = 1; p <= i__1; ++p) { + i__2 = *n - p; + _starpu_dcopy_(&i__2, &a[p + (p + 1) * a_dim1], lda, &a[p + 1 + p * + a_dim1], &c__1); +/* L1946: */ + } + +/* The following two DO-loops introduce small relative perturbation */ +/* into the strict upper triangle of the lower triangular matrix. */ +/* Small entries below the main diagonal are also changed. */ +/* This modification is useful if the computing environment does not */ +/* provide/allow FLUSH TO ZERO underflow, for it prevents many */ +/* annoying denormalized numbers in case of strongly scaled matrices. */ +/* The perturbation is structured so that it does not introduce any */ +/* new perturbation of the singular values, and it does not destroy */ +/* the job done by the preconditioner. */ +/* The licence for this perturbation is in the variable L2PERT, which */ +/* should be .FALSE. if FLUSH TO ZERO underflow is active. */ + + if (! almort) { + + if (l2pert) { +/* XSC = DSQRT(SMALL) */ + xsc = epsln / (doublereal) (*n); + i__1 = nr; + for (q = 1; q <= i__1; ++q) { + temp1 = xsc * (d__1 = a[q + q * a_dim1], abs(d__1)); + i__2 = *n; + for (p = 1; p <= i__2; ++p) { + if (p > q && (d__1 = a[p + q * a_dim1], abs(d__1)) <= + temp1 || p < q) { + a[p + q * a_dim1] = d_sign(&temp1, &a[p + q * + a_dim1]); + } +/* L4949: */ + } +/* L4947: */ + } + } else { + i__1 = nr - 1; + i__2 = nr - 1; + _starpu_dlaset_("U", &i__1, &i__2, &c_b34, &c_b34, &a[(a_dim1 << 1) + + 1], lda); + } + +/* .. second preconditioning using the QR factorization */ + + i__1 = *lwork - *n; + _starpu_dgeqrf_(n, &nr, &a[a_offset], lda, &work[1], &work[*n + 1], &i__1, + &ierr); + +/* .. and transpose upper to lower triangular */ + i__1 = nr - 1; + for (p = 1; p <= i__1; ++p) { + i__2 = nr - p; + _starpu_dcopy_(&i__2, &a[p + (p + 1) * a_dim1], lda, &a[p + 1 + p * + a_dim1], &c__1); +/* L1948: */ + } + + } + +/* Row-cyclic Jacobi SVD algorithm with column pivoting */ + +/* .. again some perturbation (a "background noise") is added */ +/* to drown denormals */ + if (l2pert) { +/* XSC = DSQRT(SMALL) */ + xsc = epsln / (doublereal) (*n); + i__1 = nr; + for (q = 1; q <= i__1; ++q) { + temp1 = xsc * (d__1 = a[q + q * a_dim1], abs(d__1)); + i__2 = nr; + for (p = 1; p <= i__2; ++p) { + if (p > q && (d__1 = a[p + q * a_dim1], abs(d__1)) <= + temp1 || p < q) { + a[p + q * a_dim1] = d_sign(&temp1, &a[p + q * a_dim1]) + ; + } +/* L1949: */ + } +/* L1947: */ + } + } else { + i__1 = nr - 1; + i__2 = nr - 1; + _starpu_dlaset_("U", &i__1, &i__2, &c_b34, &c_b34, &a[(a_dim1 << 1) + 1], + lda); + } + +/* .. and one-sided Jacobi rotations are started on a lower */ +/* triangular matrix (plus perturbation which is ignored in */ +/* the part which destroys triangular form (confusing?!)) */ + + _starpu_dgesvj_("L", "NoU", "NoV", &nr, &nr, &a[a_offset], lda, &sva[1], n, & + v[v_offset], ldv, &work[1], lwork, info); + + scalem = work[1]; + numrank = i_dnnt(&work[2]); + + + } else if (rsvec && ! lsvec) { + +/* -> Singular Values and Right Singular Vectors <- */ + + if (almort) { + +/* .. in this case NR equals N */ + i__1 = nr; + for (p = 1; p <= i__1; ++p) { + i__2 = *n - p + 1; + _starpu_dcopy_(&i__2, &a[p + p * a_dim1], lda, &v[p + p * v_dim1], & + c__1); +/* L1998: */ + } + i__1 = nr - 1; + i__2 = nr - 1; + _starpu_dlaset_("Upper", &i__1, &i__2, &c_b34, &c_b34, &v[(v_dim1 << 1) + + 1], ldv); + + _starpu_dgesvj_("L", "U", "N", n, &nr, &v[v_offset], ldv, &sva[1], &nr, & + a[a_offset], lda, &work[1], lwork, info); + scalem = work[1]; + numrank = i_dnnt(&work[2]); + } else { + +/* .. two more QR factorizations ( one QRF is not enough, two require */ +/* accumulated product of Jacobi rotations, three are perfect ) */ + + i__1 = nr - 1; + i__2 = nr - 1; + _starpu_dlaset_("Lower", &i__1, &i__2, &c_b34, &c_b34, &a[a_dim1 + 2], + lda); + i__1 = *lwork - *n; + _starpu_dgelqf_(&nr, n, &a[a_offset], lda, &work[1], &work[*n + 1], &i__1, + &ierr); + _starpu_dlacpy_("Lower", &nr, &nr, &a[a_offset], lda, &v[v_offset], ldv); + i__1 = nr - 1; + i__2 = nr - 1; + _starpu_dlaset_("Upper", &i__1, &i__2, &c_b34, &c_b34, &v[(v_dim1 << 1) + + 1], ldv); + i__1 = *lwork - (*n << 1); + _starpu_dgeqrf_(&nr, &nr, &v[v_offset], ldv, &work[*n + 1], &work[(*n << + 1) + 1], &i__1, &ierr); + i__1 = nr; + for (p = 1; p <= i__1; ++p) { + i__2 = nr - p + 1; + _starpu_dcopy_(&i__2, &v[p + p * v_dim1], ldv, &v[p + p * v_dim1], & + c__1); +/* L8998: */ + } + i__1 = nr - 1; + i__2 = nr - 1; + _starpu_dlaset_("Upper", &i__1, &i__2, &c_b34, &c_b34, &v[(v_dim1 << 1) + + 1], ldv); + + _starpu_dgesvj_("Lower", "U", "N", &nr, &nr, &v[v_offset], ldv, &sva[1], & + nr, &u[u_offset], ldu, &work[*n + 1], lwork, info); + scalem = work[*n + 1]; + numrank = i_dnnt(&work[*n + 2]); + if (nr < *n) { + i__1 = *n - nr; + _starpu_dlaset_("A", &i__1, &nr, &c_b34, &c_b34, &v[nr + 1 + v_dim1], + ldv); + i__1 = *n - nr; + _starpu_dlaset_("A", &nr, &i__1, &c_b34, &c_b34, &v[(nr + 1) * v_dim1 + + 1], ldv); + i__1 = *n - nr; + i__2 = *n - nr; + _starpu_dlaset_("A", &i__1, &i__2, &c_b34, &c_b35, &v[nr + 1 + (nr + + 1) * v_dim1], ldv); + } + + i__1 = *lwork - *n; + _starpu_dormlq_("Left", "Transpose", n, n, &nr, &a[a_offset], lda, &work[ + 1], &v[v_offset], ldv, &work[*n + 1], &i__1, &ierr); + + } + + i__1 = *n; + for (p = 1; p <= i__1; ++p) { + _starpu_dcopy_(n, &v[p + v_dim1], ldv, &a[iwork[p] + a_dim1], lda); +/* L8991: */ + } + _starpu_dlacpy_("All", n, n, &a[a_offset], lda, &v[v_offset], ldv); + + if (transp) { + _starpu_dlacpy_("All", n, n, &v[v_offset], ldv, &u[u_offset], ldu); + } + + } else if (lsvec && ! rsvec) { + +/* -#- Singular Values and Left Singular Vectors -#- */ + +/* .. second preconditioning step to avoid need to accumulate */ +/* Jacobi rotations in the Jacobi iterations. */ + i__1 = nr; + for (p = 1; p <= i__1; ++p) { + i__2 = *n - p + 1; + _starpu_dcopy_(&i__2, &a[p + p * a_dim1], lda, &u[p + p * u_dim1], &c__1); +/* L1965: */ + } + i__1 = nr - 1; + i__2 = nr - 1; + _starpu_dlaset_("Upper", &i__1, &i__2, &c_b34, &c_b34, &u[(u_dim1 << 1) + 1], + ldu); + + i__1 = *lwork - (*n << 1); + _starpu_dgeqrf_(n, &nr, &u[u_offset], ldu, &work[*n + 1], &work[(*n << 1) + 1] +, &i__1, &ierr); + + i__1 = nr - 1; + for (p = 1; p <= i__1; ++p) { + i__2 = nr - p; + _starpu_dcopy_(&i__2, &u[p + (p + 1) * u_dim1], ldu, &u[p + 1 + p * + u_dim1], &c__1); +/* L1967: */ + } + i__1 = nr - 1; + i__2 = nr - 1; + _starpu_dlaset_("Upper", &i__1, &i__2, &c_b34, &c_b34, &u[(u_dim1 << 1) + 1], + ldu); + + i__1 = *lwork - *n; + _starpu_dgesvj_("Lower", "U", "N", &nr, &nr, &u[u_offset], ldu, &sva[1], &nr, + &a[a_offset], lda, &work[*n + 1], &i__1, info); + scalem = work[*n + 1]; + numrank = i_dnnt(&work[*n + 2]); + + if (nr < *m) { + i__1 = *m - nr; + _starpu_dlaset_("A", &i__1, &nr, &c_b34, &c_b34, &u[nr + 1 + u_dim1], ldu); + if (nr < n1) { + i__1 = n1 - nr; + _starpu_dlaset_("A", &nr, &i__1, &c_b34, &c_b34, &u[(nr + 1) * u_dim1 + + 1], ldu); + i__1 = *m - nr; + i__2 = n1 - nr; + _starpu_dlaset_("A", &i__1, &i__2, &c_b34, &c_b35, &u[nr + 1 + (nr + + 1) * u_dim1], ldu); + } + } + + i__1 = *lwork - *n; + _starpu_dormqr_("Left", "No Tr", m, &n1, n, &a[a_offset], lda, &work[1], &u[ + u_offset], ldu, &work[*n + 1], &i__1, &ierr); + + if (rowpiv) { + i__1 = *m - 1; + _starpu_dlaswp_(&n1, &u[u_offset], ldu, &c__1, &i__1, &iwork[(*n << 1) + + 1], &c_n1); + } + + i__1 = n1; + for (p = 1; p <= i__1; ++p) { + xsc = 1. / _starpu_dnrm2_(m, &u[p * u_dim1 + 1], &c__1); + _starpu_dscal_(m, &xsc, &u[p * u_dim1 + 1], &c__1); +/* L1974: */ + } + + if (transp) { + _starpu_dlacpy_("All", n, n, &u[u_offset], ldu, &v[v_offset], ldv); + } + + } else { + +/* -#- Full SVD -#- */ + + if (! jracc) { + + if (! almort) { + +/* Second Preconditioning Step (QRF [with pivoting]) */ +/* Note that the composition of TRANSPOSE, QRF and TRANSPOSE is */ +/* equivalent to an LQF CALL. Since in many libraries the QRF */ +/* seems to be better optimized than the LQF, we do explicit */ +/* transpose and use the QRF. This is subject to changes in an */ +/* optimized implementation of DGEJSV. */ + + i__1 = nr; + for (p = 1; p <= i__1; ++p) { + i__2 = *n - p + 1; + _starpu_dcopy_(&i__2, &a[p + p * a_dim1], lda, &v[p + p * v_dim1], + &c__1); +/* L1968: */ + } + +/* .. the following two loops perturb small entries to avoid */ +/* denormals in the second QR factorization, where they are */ +/* as good as zeros. This is done to avoid painfully slow */ +/* computation with denormals. The relative size of the perturbation */ +/* is a parameter that can be changed by the implementer. */ +/* This perturbation device will be obsolete on machines with */ +/* properly implemented arithmetic. */ +/* To switch it off, set L2PERT=.FALSE. To remove it from the */ +/* code, remove the action under L2PERT=.TRUE., leave the ELSE part. */ +/* The following two loops should be blocked and fused with the */ +/* transposed copy above. */ + + if (l2pert) { + xsc = sqrt(small); + i__1 = nr; + for (q = 1; q <= i__1; ++q) { + temp1 = xsc * (d__1 = v[q + q * v_dim1], abs(d__1)); + i__2 = *n; + for (p = 1; p <= i__2; ++p) { + if (p > q && (d__1 = v[p + q * v_dim1], abs(d__1)) + <= temp1 || p < q) { + v[p + q * v_dim1] = d_sign(&temp1, &v[p + q * + v_dim1]); + } + if (p < q) { + v[p + q * v_dim1] = -v[p + q * v_dim1]; + } +/* L2968: */ + } +/* L2969: */ + } + } else { + i__1 = nr - 1; + i__2 = nr - 1; + _starpu_dlaset_("U", &i__1, &i__2, &c_b34, &c_b34, &v[(v_dim1 << + 1) + 1], ldv); + } + +/* Estimate the row scaled condition number of R1 */ +/* (If R1 is rectangular, N > NR, then the condition number */ +/* of the leading NR x NR submatrix is estimated.) */ + + _starpu_dlacpy_("L", &nr, &nr, &v[v_offset], ldv, &work[(*n << 1) + 1] +, &nr); + i__1 = nr; + for (p = 1; p <= i__1; ++p) { + i__2 = nr - p + 1; + temp1 = _starpu_dnrm2_(&i__2, &work[(*n << 1) + (p - 1) * nr + p], + &c__1); + i__2 = nr - p + 1; + d__1 = 1. / temp1; + _starpu_dscal_(&i__2, &d__1, &work[(*n << 1) + (p - 1) * nr + p], + &c__1); +/* L3950: */ + } + _starpu_dpocon_("Lower", &nr, &work[(*n << 1) + 1], &nr, &c_b35, & + temp1, &work[(*n << 1) + nr * nr + 1], &iwork[*m + (* + n << 1) + 1], &ierr); + condr1 = 1. / sqrt(temp1); +/* .. here need a second oppinion on the condition number */ +/* .. then assume worst case scenario */ +/* R1 is OK for inverse <=> CONDR1 .LT. DBLE(N) */ +/* more conservative <=> CONDR1 .LT. DSQRT(DBLE(N)) */ + + cond_ok__ = sqrt((doublereal) nr); +/* [TP] COND_OK is a tuning parameter. */ + if (condr1 < cond_ok__) { +/* .. the second QRF without pivoting. Note: in an optimized */ +/* implementation, this QRF should be implemented as the QRF */ +/* of a lower triangular matrix. */ +/* R1^t = Q2 * R2 */ + i__1 = *lwork - (*n << 1); + _starpu_dgeqrf_(n, &nr, &v[v_offset], ldv, &work[*n + 1], &work[(* + n << 1) + 1], &i__1, &ierr); + + if (l2pert) { + xsc = sqrt(small) / epsln; + i__1 = nr; + for (p = 2; p <= i__1; ++p) { + i__2 = p - 1; + for (q = 1; q <= i__2; ++q) { +/* Computing MIN */ + d__3 = (d__1 = v[p + p * v_dim1], abs(d__1)), + d__4 = (d__2 = v[q + q * v_dim1], abs( + d__2)); + temp1 = xsc * min(d__3,d__4); + if ((d__1 = v[q + p * v_dim1], abs(d__1)) <= + temp1) { + v[q + p * v_dim1] = d_sign(&temp1, &v[q + + p * v_dim1]); + } +/* L3958: */ + } +/* L3959: */ + } + } + + if (nr != *n) { + _starpu_dlacpy_("A", n, &nr, &v[v_offset], ldv, &work[(*n << + 1) + 1], n); + } +/* .. save ... */ + +/* .. this transposed copy should be better than naive */ + i__1 = nr - 1; + for (p = 1; p <= i__1; ++p) { + i__2 = nr - p; + _starpu_dcopy_(&i__2, &v[p + (p + 1) * v_dim1], ldv, &v[p + 1 + + p * v_dim1], &c__1); +/* L1969: */ + } + + condr2 = condr1; + + } else { + +/* .. ill-conditioned case: second QRF with pivoting */ +/* Note that windowed pivoting would be equaly good */ +/* numerically, and more run-time efficient. So, in */ +/* an optimal implementation, the next call to DGEQP3 */ +/* should be replaced with eg. CALL SGEQPX (ACM TOMS #782) */ +/* with properly (carefully) chosen parameters. */ + +/* R1^t * P2 = Q2 * R2 */ + i__1 = nr; + for (p = 1; p <= i__1; ++p) { + iwork[*n + p] = 0; +/* L3003: */ + } + i__1 = *lwork - (*n << 1); + _starpu_dgeqp3_(n, &nr, &v[v_offset], ldv, &iwork[*n + 1], &work[* + n + 1], &work[(*n << 1) + 1], &i__1, &ierr); +/* * CALL DGEQRF( N, NR, V, LDV, WORK(N+1), WORK(2*N+1), */ +/* * & LWORK-2*N, IERR ) */ + if (l2pert) { + xsc = sqrt(small); + i__1 = nr; + for (p = 2; p <= i__1; ++p) { + i__2 = p - 1; + for (q = 1; q <= i__2; ++q) { +/* Computing MIN */ + d__3 = (d__1 = v[p + p * v_dim1], abs(d__1)), + d__4 = (d__2 = v[q + q * v_dim1], abs( + d__2)); + temp1 = xsc * min(d__3,d__4); + if ((d__1 = v[q + p * v_dim1], abs(d__1)) <= + temp1) { + v[q + p * v_dim1] = d_sign(&temp1, &v[q + + p * v_dim1]); + } +/* L3968: */ + } +/* L3969: */ + } + } + + _starpu_dlacpy_("A", n, &nr, &v[v_offset], ldv, &work[(*n << 1) + + 1], n); + + if (l2pert) { + xsc = sqrt(small); + i__1 = nr; + for (p = 2; p <= i__1; ++p) { + i__2 = p - 1; + for (q = 1; q <= i__2; ++q) { +/* Computing MIN */ + d__3 = (d__1 = v[p + p * v_dim1], abs(d__1)), + d__4 = (d__2 = v[q + q * v_dim1], abs( + d__2)); + temp1 = xsc * min(d__3,d__4); + v[p + q * v_dim1] = -d_sign(&temp1, &v[q + p * + v_dim1]); +/* L8971: */ + } +/* L8970: */ + } + } else { + i__1 = nr - 1; + i__2 = nr - 1; + _starpu_dlaset_("L", &i__1, &i__2, &c_b34, &c_b34, &v[v_dim1 + + 2], ldv); + } +/* Now, compute R2 = L3 * Q3, the LQ factorization. */ + i__1 = *lwork - (*n << 1) - *n * nr - nr; + _starpu_dgelqf_(&nr, &nr, &v[v_offset], ldv, &work[(*n << 1) + *n + * nr + 1], &work[(*n << 1) + *n * nr + nr + 1], & + i__1, &ierr); +/* .. and estimate the condition number */ + _starpu_dlacpy_("L", &nr, &nr, &v[v_offset], ldv, &work[(*n << 1) + + *n * nr + nr + 1], &nr); + i__1 = nr; + for (p = 1; p <= i__1; ++p) { + temp1 = _starpu_dnrm2_(&p, &work[(*n << 1) + *n * nr + nr + p] +, &nr); + d__1 = 1. / temp1; + _starpu_dscal_(&p, &d__1, &work[(*n << 1) + *n * nr + nr + p], + &nr); +/* L4950: */ + } + _starpu_dpocon_("L", &nr, &work[(*n << 1) + *n * nr + nr + 1], & + nr, &c_b35, &temp1, &work[(*n << 1) + *n * nr + + nr + nr * nr + 1], &iwork[*m + (*n << 1) + 1], & + ierr); + condr2 = 1. / sqrt(temp1); + + if (condr2 >= cond_ok__) { +/* .. save the Householder vectors used for Q3 */ +/* (this overwrittes the copy of R2, as it will not be */ +/* needed in this branch, but it does not overwritte the */ +/* Huseholder vectors of Q2.). */ + _starpu_dlacpy_("U", &nr, &nr, &v[v_offset], ldv, &work[(*n << + 1) + 1], n); +/* .. and the rest of the information on Q3 is in */ +/* WORK(2*N+N*NR+1:2*N+N*NR+N) */ + } + + } + + if (l2pert) { + xsc = sqrt(small); + i__1 = nr; + for (q = 2; q <= i__1; ++q) { + temp1 = xsc * v[q + q * v_dim1]; + i__2 = q - 1; + for (p = 1; p <= i__2; ++p) { +/* V(p,q) = - DSIGN( TEMP1, V(q,p) ) */ + v[p + q * v_dim1] = -d_sign(&temp1, &v[p + q * + v_dim1]); +/* L4969: */ + } +/* L4968: */ + } + } else { + i__1 = nr - 1; + i__2 = nr - 1; + _starpu_dlaset_("U", &i__1, &i__2, &c_b34, &c_b34, &v[(v_dim1 << + 1) + 1], ldv); + } + +/* Second preconditioning finished; continue with Jacobi SVD */ +/* The input matrix is lower trinagular. */ + +/* Recover the right singular vectors as solution of a well */ +/* conditioned triangular matrix equation. */ + + if (condr1 < cond_ok__) { + + i__1 = *lwork - (*n << 1) - *n * nr - nr; + _starpu_dgesvj_("L", "U", "N", &nr, &nr, &v[v_offset], ldv, &sva[ + 1], &nr, &u[u_offset], ldu, &work[(*n << 1) + *n * + nr + nr + 1], &i__1, info); + scalem = work[(*n << 1) + *n * nr + nr + 1]; + numrank = i_dnnt(&work[(*n << 1) + *n * nr + nr + 2]); + i__1 = nr; + for (p = 1; p <= i__1; ++p) { + _starpu_dcopy_(&nr, &v[p * v_dim1 + 1], &c__1, &u[p * u_dim1 + + 1], &c__1); + _starpu_dscal_(&nr, &sva[p], &v[p * v_dim1 + 1], &c__1); +/* L3970: */ + } +/* .. pick the right matrix equation and solve it */ + + if (nr == *n) { +/* :)) .. best case, R1 is inverted. The solution of this matrix */ +/* equation is Q2*V2 = the product of the Jacobi rotations */ +/* used in DGESVJ, premultiplied with the orthogonal matrix */ +/* from the second QR factorization. */ + _starpu_dtrsm_("L", "U", "N", "N", &nr, &nr, &c_b35, &a[ + a_offset], lda, &v[v_offset], ldv); + } else { +/* .. R1 is well conditioned, but non-square. Transpose(R2) */ +/* is inverted to get the product of the Jacobi rotations */ +/* used in DGESVJ. The Q-factor from the second QR */ +/* factorization is then built in explicitly. */ + _starpu_dtrsm_("L", "U", "T", "N", &nr, &nr, &c_b35, &work[(* + n << 1) + 1], n, &v[v_offset], ldv); + if (nr < *n) { + i__1 = *n - nr; + _starpu_dlaset_("A", &i__1, &nr, &c_b34, &c_b34, &v[nr + + 1 + v_dim1], ldv); + i__1 = *n - nr; + _starpu_dlaset_("A", &nr, &i__1, &c_b34, &c_b34, &v[(nr + + 1) * v_dim1 + 1], ldv); + i__1 = *n - nr; + i__2 = *n - nr; + _starpu_dlaset_("A", &i__1, &i__2, &c_b34, &c_b35, &v[nr + + 1 + (nr + 1) * v_dim1], ldv); + } + i__1 = *lwork - (*n << 1) - *n * nr - nr; + _starpu_dormqr_("L", "N", n, n, &nr, &work[(*n << 1) + 1], n, + &work[*n + 1], &v[v_offset], ldv, &work[(*n << + 1) + *n * nr + nr + 1], &i__1, &ierr); + } + + } else if (condr2 < cond_ok__) { + +/* :) .. the input matrix A is very likely a relative of */ +/* the Kahan matrix :) */ +/* The matrix R2 is inverted. The solution of the matrix equation */ +/* is Q3^T*V3 = the product of the Jacobi rotations (appplied to */ +/* the lower triangular L3 from the LQ factorization of */ +/* R2=L3*Q3), pre-multiplied with the transposed Q3. */ + i__1 = *lwork - (*n << 1) - *n * nr - nr; + _starpu_dgesvj_("L", "U", "N", &nr, &nr, &v[v_offset], ldv, &sva[ + 1], &nr, &u[u_offset], ldu, &work[(*n << 1) + *n * + nr + nr + 1], &i__1, info); + scalem = work[(*n << 1) + *n * nr + nr + 1]; + numrank = i_dnnt(&work[(*n << 1) + *n * nr + nr + 2]); + i__1 = nr; + for (p = 1; p <= i__1; ++p) { + _starpu_dcopy_(&nr, &v[p * v_dim1 + 1], &c__1, &u[p * u_dim1 + + 1], &c__1); + _starpu_dscal_(&nr, &sva[p], &u[p * u_dim1 + 1], &c__1); +/* L3870: */ + } + _starpu_dtrsm_("L", "U", "N", "N", &nr, &nr, &c_b35, &work[(*n << + 1) + 1], n, &u[u_offset], ldu); +/* .. apply the permutation from the second QR factorization */ + i__1 = nr; + for (q = 1; q <= i__1; ++q) { + i__2 = nr; + for (p = 1; p <= i__2; ++p) { + work[(*n << 1) + *n * nr + nr + iwork[*n + p]] = + u[p + q * u_dim1]; +/* L872: */ + } + i__2 = nr; + for (p = 1; p <= i__2; ++p) { + u[p + q * u_dim1] = work[(*n << 1) + *n * nr + nr + + p]; +/* L874: */ + } +/* L873: */ + } + if (nr < *n) { + i__1 = *n - nr; + _starpu_dlaset_("A", &i__1, &nr, &c_b34, &c_b34, &v[nr + 1 + + v_dim1], ldv); + i__1 = *n - nr; + _starpu_dlaset_("A", &nr, &i__1, &c_b34, &c_b34, &v[(nr + 1) * + v_dim1 + 1], ldv); + i__1 = *n - nr; + i__2 = *n - nr; + _starpu_dlaset_("A", &i__1, &i__2, &c_b34, &c_b35, &v[nr + 1 + + (nr + 1) * v_dim1], ldv); + } + i__1 = *lwork - (*n << 1) - *n * nr - nr; + _starpu_dormqr_("L", "N", n, n, &nr, &work[(*n << 1) + 1], n, & + work[*n + 1], &v[v_offset], ldv, &work[(*n << 1) + + *n * nr + nr + 1], &i__1, &ierr); + } else { +/* Last line of defense. */ +/* #:( This is a rather pathological case: no scaled condition */ +/* improvement after two pivoted QR factorizations. Other */ +/* possibility is that the rank revealing QR factorization */ +/* or the condition estimator has failed, or the COND_OK */ +/* is set very close to ONE (which is unnecessary). Normally, */ +/* this branch should never be executed, but in rare cases of */ +/* failure of the RRQR or condition estimator, the last line of */ +/* defense ensures that DGEJSV completes the task. */ +/* Compute the full SVD of L3 using DGESVJ with explicit */ +/* accumulation of Jacobi rotations. */ + i__1 = *lwork - (*n << 1) - *n * nr - nr; + _starpu_dgesvj_("L", "U", "V", &nr, &nr, &v[v_offset], ldv, &sva[ + 1], &nr, &u[u_offset], ldu, &work[(*n << 1) + *n * + nr + nr + 1], &i__1, info); + scalem = work[(*n << 1) + *n * nr + nr + 1]; + numrank = i_dnnt(&work[(*n << 1) + *n * nr + nr + 2]); + if (nr < *n) { + i__1 = *n - nr; + _starpu_dlaset_("A", &i__1, &nr, &c_b34, &c_b34, &v[nr + 1 + + v_dim1], ldv); + i__1 = *n - nr; + _starpu_dlaset_("A", &nr, &i__1, &c_b34, &c_b34, &v[(nr + 1) * + v_dim1 + 1], ldv); + i__1 = *n - nr; + i__2 = *n - nr; + _starpu_dlaset_("A", &i__1, &i__2, &c_b34, &c_b35, &v[nr + 1 + + (nr + 1) * v_dim1], ldv); + } + i__1 = *lwork - (*n << 1) - *n * nr - nr; + _starpu_dormqr_("L", "N", n, n, &nr, &work[(*n << 1) + 1], n, & + work[*n + 1], &v[v_offset], ldv, &work[(*n << 1) + + *n * nr + nr + 1], &i__1, &ierr); + + i__1 = *lwork - (*n << 1) - *n * nr - nr; + _starpu_dormlq_("L", "T", &nr, &nr, &nr, &work[(*n << 1) + 1], n, + &work[(*n << 1) + *n * nr + 1], &u[u_offset], ldu, + &work[(*n << 1) + *n * nr + nr + 1], &i__1, & + ierr); + i__1 = nr; + for (q = 1; q <= i__1; ++q) { + i__2 = nr; + for (p = 1; p <= i__2; ++p) { + work[(*n << 1) + *n * nr + nr + iwork[*n + p]] = + u[p + q * u_dim1]; +/* L772: */ + } + i__2 = nr; + for (p = 1; p <= i__2; ++p) { + u[p + q * u_dim1] = work[(*n << 1) + *n * nr + nr + + p]; +/* L774: */ + } +/* L773: */ + } + + } + +/* Permute the rows of V using the (column) permutation from the */ +/* first QRF. Also, scale the columns to make them unit in */ +/* Euclidean norm. This applies to all cases. */ + + temp1 = sqrt((doublereal) (*n)) * epsln; + i__1 = *n; + for (q = 1; q <= i__1; ++q) { + i__2 = *n; + for (p = 1; p <= i__2; ++p) { + work[(*n << 1) + *n * nr + nr + iwork[p]] = v[p + q * + v_dim1]; +/* L972: */ + } + i__2 = *n; + for (p = 1; p <= i__2; ++p) { + v[p + q * v_dim1] = work[(*n << 1) + *n * nr + nr + p] + ; +/* L973: */ + } + xsc = 1. / _starpu_dnrm2_(n, &v[q * v_dim1 + 1], &c__1); + if (xsc < 1. - temp1 || xsc > temp1 + 1.) { + _starpu_dscal_(n, &xsc, &v[q * v_dim1 + 1], &c__1); + } +/* L1972: */ + } +/* At this moment, V contains the right singular vectors of A. */ +/* Next, assemble the left singular vector matrix U (M x N). */ + if (nr < *m) { + i__1 = *m - nr; + _starpu_dlaset_("A", &i__1, &nr, &c_b34, &c_b34, &u[nr + 1 + + u_dim1], ldu); + if (nr < n1) { + i__1 = n1 - nr; + _starpu_dlaset_("A", &nr, &i__1, &c_b34, &c_b34, &u[(nr + 1) * + u_dim1 + 1], ldu); + i__1 = *m - nr; + i__2 = n1 - nr; + _starpu_dlaset_("A", &i__1, &i__2, &c_b34, &c_b35, &u[nr + 1 + + (nr + 1) * u_dim1], ldu); + } + } + +/* The Q matrix from the first QRF is built into the left singular */ +/* matrix U. This applies to all cases. */ + + i__1 = *lwork - *n; + _starpu_dormqr_("Left", "No_Tr", m, &n1, n, &a[a_offset], lda, &work[ + 1], &u[u_offset], ldu, &work[*n + 1], &i__1, &ierr); +/* The columns of U are normalized. The cost is O(M*N) flops. */ + temp1 = sqrt((doublereal) (*m)) * epsln; + i__1 = nr; + for (p = 1; p <= i__1; ++p) { + xsc = 1. / _starpu_dnrm2_(m, &u[p * u_dim1 + 1], &c__1); + if (xsc < 1. - temp1 || xsc > temp1 + 1.) { + _starpu_dscal_(m, &xsc, &u[p * u_dim1 + 1], &c__1); + } +/* L1973: */ + } + +/* If the initial QRF is computed with row pivoting, the left */ +/* singular vectors must be adjusted. */ + + if (rowpiv) { + i__1 = *m - 1; + _starpu_dlaswp_(&n1, &u[u_offset], ldu, &c__1, &i__1, &iwork[(*n + << 1) + 1], &c_n1); + } + + } else { + +/* .. the initial matrix A has almost orthogonal columns and */ +/* the second QRF is not needed */ + + _starpu_dlacpy_("Upper", n, n, &a[a_offset], lda, &work[*n + 1], n); + if (l2pert) { + xsc = sqrt(small); + i__1 = *n; + for (p = 2; p <= i__1; ++p) { + temp1 = xsc * work[*n + (p - 1) * *n + p]; + i__2 = p - 1; + for (q = 1; q <= i__2; ++q) { + work[*n + (q - 1) * *n + p] = -d_sign(&temp1, & + work[*n + (p - 1) * *n + q]); +/* L5971: */ + } +/* L5970: */ + } + } else { + i__1 = *n - 1; + i__2 = *n - 1; + _starpu_dlaset_("Lower", &i__1, &i__2, &c_b34, &c_b34, &work[*n + + 2], n); + } + + i__1 = *lwork - *n - *n * *n; + _starpu_dgesvj_("Upper", "U", "N", n, n, &work[*n + 1], n, &sva[1], n, + &u[u_offset], ldu, &work[*n + *n * *n + 1], &i__1, + info); + + scalem = work[*n + *n * *n + 1]; + numrank = i_dnnt(&work[*n + *n * *n + 2]); + i__1 = *n; + for (p = 1; p <= i__1; ++p) { + _starpu_dcopy_(n, &work[*n + (p - 1) * *n + 1], &c__1, &u[p * + u_dim1 + 1], &c__1); + _starpu_dscal_(n, &sva[p], &work[*n + (p - 1) * *n + 1], &c__1); +/* L6970: */ + } + + _starpu_dtrsm_("Left", "Upper", "NoTrans", "No UD", n, n, &c_b35, &a[ + a_offset], lda, &work[*n + 1], n); + i__1 = *n; + for (p = 1; p <= i__1; ++p) { + _starpu_dcopy_(n, &work[*n + p], n, &v[iwork[p] + v_dim1], ldv); +/* L6972: */ + } + temp1 = sqrt((doublereal) (*n)) * epsln; + i__1 = *n; + for (p = 1; p <= i__1; ++p) { + xsc = 1. / _starpu_dnrm2_(n, &v[p * v_dim1 + 1], &c__1); + if (xsc < 1. - temp1 || xsc > temp1 + 1.) { + _starpu_dscal_(n, &xsc, &v[p * v_dim1 + 1], &c__1); + } +/* L6971: */ + } + +/* Assemble the left singular vector matrix U (M x N). */ + + if (*n < *m) { + i__1 = *m - *n; + _starpu_dlaset_("A", &i__1, n, &c_b34, &c_b34, &u[nr + 1 + u_dim1] +, ldu); + if (*n < n1) { + i__1 = n1 - *n; + _starpu_dlaset_("A", n, &i__1, &c_b34, &c_b34, &u[(*n + 1) * + u_dim1 + 1], ldu); + i__1 = *m - *n; + i__2 = n1 - *n; + _starpu_dlaset_("A", &i__1, &i__2, &c_b34, &c_b35, &u[nr + 1 + + (*n + 1) * u_dim1], ldu); + } + } + i__1 = *lwork - *n; + _starpu_dormqr_("Left", "No Tr", m, &n1, n, &a[a_offset], lda, &work[ + 1], &u[u_offset], ldu, &work[*n + 1], &i__1, &ierr); + temp1 = sqrt((doublereal) (*m)) * epsln; + i__1 = n1; + for (p = 1; p <= i__1; ++p) { + xsc = 1. / _starpu_dnrm2_(m, &u[p * u_dim1 + 1], &c__1); + if (xsc < 1. - temp1 || xsc > temp1 + 1.) { + _starpu_dscal_(m, &xsc, &u[p * u_dim1 + 1], &c__1); + } +/* L6973: */ + } + + if (rowpiv) { + i__1 = *m - 1; + _starpu_dlaswp_(&n1, &u[u_offset], ldu, &c__1, &i__1, &iwork[(*n + << 1) + 1], &c_n1); + } + + } + +/* end of the >> almost orthogonal case << in the full SVD */ + + } else { + +/* This branch deploys a preconditioned Jacobi SVD with explicitly */ +/* accumulated rotations. It is included as optional, mainly for */ +/* experimental purposes. It does perfom well, and can also be used. */ +/* In this implementation, this branch will be automatically activated */ +/* if the condition number sigma_max(A) / sigma_min(A) is predicted */ +/* to be greater than the overflow threshold. This is because the */ +/* a posteriori computation of the singular vectors assumes robust */ +/* implementation of BLAS and some LAPACK procedures, capable of working */ +/* in presence of extreme values. Since that is not always the case, ... */ + + i__1 = nr; + for (p = 1; p <= i__1; ++p) { + i__2 = *n - p + 1; + _starpu_dcopy_(&i__2, &a[p + p * a_dim1], lda, &v[p + p * v_dim1], & + c__1); +/* L7968: */ + } + + if (l2pert) { + xsc = sqrt(small / epsln); + i__1 = nr; + for (q = 1; q <= i__1; ++q) { + temp1 = xsc * (d__1 = v[q + q * v_dim1], abs(d__1)); + i__2 = *n; + for (p = 1; p <= i__2; ++p) { + if (p > q && (d__1 = v[p + q * v_dim1], abs(d__1)) <= + temp1 || p < q) { + v[p + q * v_dim1] = d_sign(&temp1, &v[p + q * + v_dim1]); + } + if (p < q) { + v[p + q * v_dim1] = -v[p + q * v_dim1]; + } +/* L5968: */ + } +/* L5969: */ + } + } else { + i__1 = nr - 1; + i__2 = nr - 1; + _starpu_dlaset_("U", &i__1, &i__2, &c_b34, &c_b34, &v[(v_dim1 << 1) + + 1], ldv); + } + i__1 = *lwork - (*n << 1); + _starpu_dgeqrf_(n, &nr, &v[v_offset], ldv, &work[*n + 1], &work[(*n << 1) + + 1], &i__1, &ierr); + _starpu_dlacpy_("L", n, &nr, &v[v_offset], ldv, &work[(*n << 1) + 1], n); + + i__1 = nr; + for (p = 1; p <= i__1; ++p) { + i__2 = nr - p + 1; + _starpu_dcopy_(&i__2, &v[p + p * v_dim1], ldv, &u[p + p * u_dim1], & + c__1); +/* L7969: */ + } + if (l2pert) { + xsc = sqrt(small / epsln); + i__1 = nr; + for (q = 2; q <= i__1; ++q) { + i__2 = q - 1; + for (p = 1; p <= i__2; ++p) { +/* Computing MIN */ + d__3 = (d__1 = u[p + p * u_dim1], abs(d__1)), d__4 = ( + d__2 = u[q + q * u_dim1], abs(d__2)); + temp1 = xsc * min(d__3,d__4); + u[p + q * u_dim1] = -d_sign(&temp1, &u[q + p * u_dim1] + ); +/* L9971: */ + } +/* L9970: */ + } + } else { + i__1 = nr - 1; + i__2 = nr - 1; + _starpu_dlaset_("U", &i__1, &i__2, &c_b34, &c_b34, &u[(u_dim1 << 1) + + 1], ldu); + } + i__1 = *lwork - (*n << 1) - *n * nr; + _starpu_dgesvj_("G", "U", "V", &nr, &nr, &u[u_offset], ldu, &sva[1], n, & + v[v_offset], ldv, &work[(*n << 1) + *n * nr + 1], &i__1, + info); + scalem = work[(*n << 1) + *n * nr + 1]; + numrank = i_dnnt(&work[(*n << 1) + *n * nr + 2]); + if (nr < *n) { + i__1 = *n - nr; + _starpu_dlaset_("A", &i__1, &nr, &c_b34, &c_b34, &v[nr + 1 + v_dim1], + ldv); + i__1 = *n - nr; + _starpu_dlaset_("A", &nr, &i__1, &c_b34, &c_b34, &v[(nr + 1) * v_dim1 + + 1], ldv); + i__1 = *n - nr; + i__2 = *n - nr; + _starpu_dlaset_("A", &i__1, &i__2, &c_b34, &c_b35, &v[nr + 1 + (nr + + 1) * v_dim1], ldv); + } + i__1 = *lwork - (*n << 1) - *n * nr - nr; + _starpu_dormqr_("L", "N", n, n, &nr, &work[(*n << 1) + 1], n, &work[*n + + 1], &v[v_offset], ldv, &work[(*n << 1) + *n * nr + nr + 1] +, &i__1, &ierr); + +/* Permute the rows of V using the (column) permutation from the */ +/* first QRF. Also, scale the columns to make them unit in */ +/* Euclidean norm. This applies to all cases. */ + + temp1 = sqrt((doublereal) (*n)) * epsln; + i__1 = *n; + for (q = 1; q <= i__1; ++q) { + i__2 = *n; + for (p = 1; p <= i__2; ++p) { + work[(*n << 1) + *n * nr + nr + iwork[p]] = v[p + q * + v_dim1]; +/* L8972: */ + } + i__2 = *n; + for (p = 1; p <= i__2; ++p) { + v[p + q * v_dim1] = work[(*n << 1) + *n * nr + nr + p]; +/* L8973: */ + } + xsc = 1. / _starpu_dnrm2_(n, &v[q * v_dim1 + 1], &c__1); + if (xsc < 1. - temp1 || xsc > temp1 + 1.) { + _starpu_dscal_(n, &xsc, &v[q * v_dim1 + 1], &c__1); + } +/* L7972: */ + } + +/* At this moment, V contains the right singular vectors of A. */ +/* Next, assemble the left singular vector matrix U (M x N). */ + + if (*n < *m) { + i__1 = *m - *n; + _starpu_dlaset_("A", &i__1, n, &c_b34, &c_b34, &u[nr + 1 + u_dim1], + ldu); + if (*n < n1) { + i__1 = n1 - *n; + _starpu_dlaset_("A", n, &i__1, &c_b34, &c_b34, &u[(*n + 1) * + u_dim1 + 1], ldu); + i__1 = *m - *n; + i__2 = n1 - *n; + _starpu_dlaset_("A", &i__1, &i__2, &c_b34, &c_b35, &u[nr + 1 + (* + n + 1) * u_dim1], ldu); + } + } + + i__1 = *lwork - *n; + _starpu_dormqr_("Left", "No Tr", m, &n1, n, &a[a_offset], lda, &work[1], & + u[u_offset], ldu, &work[*n + 1], &i__1, &ierr); + + if (rowpiv) { + i__1 = *m - 1; + _starpu_dlaswp_(&n1, &u[u_offset], ldu, &c__1, &i__1, &iwork[(*n << 1) + + 1], &c_n1); + } + + + } + if (transp) { +/* .. swap U and V because the procedure worked on A^t */ + i__1 = *n; + for (p = 1; p <= i__1; ++p) { + _starpu_dswap_(n, &u[p * u_dim1 + 1], &c__1, &v[p * v_dim1 + 1], & + c__1); +/* L6974: */ + } + } + + } +/* end of the full SVD */ + +/* Undo scaling, if necessary (and possible) */ + + if (uscal2 <= big / sva[1] * uscal1) { + _starpu_dlascl_("G", &c__0, &c__0, &uscal1, &uscal2, &nr, &c__1, &sva[1], n, & + ierr); + uscal1 = 1.; + uscal2 = 1.; + } + + if (nr < *n) { + i__1 = *n; + for (p = nr + 1; p <= i__1; ++p) { + sva[p] = 0.; +/* L3004: */ + } + } + + work[1] = uscal2 * scalem; + work[2] = uscal1; + if (errest) { + work[3] = sconda; + } + if (lsvec && rsvec) { + work[4] = condr1; + work[5] = condr2; + } + if (l2tran) { + work[6] = entra; + work[7] = entrat; + } + + iwork[1] = nr; + iwork[2] = numrank; + iwork[3] = warning; + + return 0; +/* .. */ +/* .. END OF DGEJSV */ +/* .. */ +} /* _starpu_dgejsv_ */ diff --git a/min-dgels/base/SRC/dgelq2.c b/min-dgels/base/SRC/dgelq2.c new file mode 100644 index 0000000..a5ae415 --- /dev/null +++ b/min-dgels/base/SRC/dgelq2.c @@ -0,0 +1,157 @@ +/* dgelq2.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dgelq2_(integer *m, integer *n, doublereal *a, integer * + lda, doublereal *tau, doublereal *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + integer i__, k; + doublereal aii; + extern /* Subroutine */ int _starpu_dlarf_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + doublereal *), _starpu_dlarfp_(integer *, doublereal *, + doublereal *, integer *, doublereal *), _starpu_xerbla_(char *, integer *); + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DGELQ2 computes an LQ factorization of a real m by n matrix A: */ +/* A = L * Q. */ + +/* Arguments */ +/* ========= */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix A. M >= 0. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix A. N >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the m by n matrix A. */ +/* On exit, the elements on and below the diagonal of the array */ +/* contain the m by min(m,n) lower trapezoidal matrix L (L is */ +/* lower triangular if m <= n); the elements above the diagonal, */ +/* with the array TAU, represent the orthogonal matrix Q as a */ +/* product of elementary reflectors (see Further Details). */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,M). */ + +/* TAU (output) DOUBLE PRECISION array, dimension (min(M,N)) */ +/* The scalar factors of the elementary reflectors (see Further */ +/* Details). */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (M) */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ + +/* Further Details */ +/* =============== */ + +/* The matrix Q is represented as a product of elementary reflectors */ + +/* Q = H(k) . . . H(2) H(1), where k = min(m,n). */ + +/* Each H(i) has the form */ + +/* H(i) = I - tau * v * v' */ + +/* where tau is a real scalar, and v is a real vector with */ +/* v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n), */ +/* and tau in TAU(i). */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DGELQ2", &i__1); + return 0; + } + + k = min(*m,*n); + + i__1 = k; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Generate elementary reflector H(i) to annihilate A(i,i+1:n) */ + + i__2 = *n - i__ + 1; +/* Computing MIN */ + i__3 = i__ + 1; + _starpu_dlarfp_(&i__2, &a[i__ + i__ * a_dim1], &a[i__ + min(i__3, *n)* a_dim1] +, lda, &tau[i__]); + if (i__ < *m) { + +/* Apply H(i) to A(i+1:m,i:n) from the right */ + + aii = a[i__ + i__ * a_dim1]; + a[i__ + i__ * a_dim1] = 1.; + i__2 = *m - i__; + i__3 = *n - i__ + 1; + _starpu_dlarf_("Right", &i__2, &i__3, &a[i__ + i__ * a_dim1], lda, &tau[ + i__], &a[i__ + 1 + i__ * a_dim1], lda, &work[1]); + a[i__ + i__ * a_dim1] = aii; + } +/* L10: */ + } + return 0; + +/* End of DGELQ2 */ + +} /* _starpu_dgelq2_ */ diff --git a/min-dgels/base/SRC/dgelqf.c b/min-dgels/base/SRC/dgelqf.c new file mode 100644 index 0000000..a1e1147 --- /dev/null +++ b/min-dgels/base/SRC/dgelqf.c @@ -0,0 +1,251 @@ +/* dgelqf.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; +static integer c__3 = 3; +static integer c__2 = 2; + +/* Subroutine */ int _starpu_dgelqf_(integer *m, integer *n, doublereal *a, integer * + lda, doublereal *tau, doublereal *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + integer i__, k, ib, nb, nx, iws, nbmin, iinfo; + extern /* Subroutine */ int _starpu_dgelq2_(integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *), _starpu_dlarfb_(char *, + char *, char *, char *, integer *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *, doublereal *, integer *), _starpu_dlarft_(char *, char *, integer *, integer *, doublereal + *, integer *, doublereal *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); + extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *); + integer ldwork, lwkopt; + logical lquery; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DGELQF computes an LQ factorization of a real M-by-N matrix A: */ +/* A = L * Q. */ + +/* Arguments */ +/* ========= */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix A. M >= 0. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix A. N >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the M-by-N matrix A. */ +/* On exit, the elements on and below the diagonal of the array */ +/* contain the m-by-min(m,n) lower trapezoidal matrix L (L is */ +/* lower triangular if m <= n); the elements above the diagonal, */ +/* with the array TAU, represent the orthogonal matrix Q as a */ +/* product of elementary reflectors (see Further Details). */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,M). */ + +/* TAU (output) DOUBLE PRECISION array, dimension (min(M,N)) */ +/* The scalar factors of the elementary reflectors (see Further */ +/* Details). */ + +/* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ + +/* LWORK (input) INTEGER */ +/* The dimension of the array WORK. LWORK >= max(1,M). */ +/* For optimum performance LWORK >= M*NB, where NB is the */ +/* optimal blocksize. */ + +/* If LWORK = -1, then a workspace query is assumed; the routine */ +/* only calculates the optimal size of the WORK array, returns */ +/* this value as the first entry of the WORK array, and no error */ +/* message related to LWORK is issued by XERBLA. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ + +/* Further Details */ +/* =============== */ + +/* The matrix Q is represented as a product of elementary reflectors */ + +/* Q = H(k) . . . H(2) H(1), where k = min(m,n). */ + +/* Each H(i) has the form */ + +/* H(i) = I - tau * v * v' */ + +/* where tau is a real scalar, and v is a real vector with */ +/* v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n), */ +/* and tau in TAU(i). */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + nb = _starpu_ilaenv_(&c__1, "DGELQF", " ", m, n, &c_n1, &c_n1); + lwkopt = *m * nb; + work[1] = (doublereal) lwkopt; + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } else if (*lwork < max(1,*m) && ! lquery) { + *info = -7; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DGELQF", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + k = min(*m,*n); + if (k == 0) { + work[1] = 1.; + return 0; + } + + nbmin = 2; + nx = 0; + iws = *m; + if (nb > 1 && nb < k) { + +/* Determine when to cross over from blocked to unblocked code. */ + +/* Computing MAX */ + i__1 = 0, i__2 = _starpu_ilaenv_(&c__3, "DGELQF", " ", m, n, &c_n1, &c_n1); + nx = max(i__1,i__2); + if (nx < k) { + +/* Determine if workspace is large enough for blocked code. */ + + ldwork = *m; + iws = ldwork * nb; + if (*lwork < iws) { + +/* Not enough workspace to use optimal NB: reduce NB and */ +/* determine the minimum value of NB. */ + + nb = *lwork / ldwork; +/* Computing MAX */ + i__1 = 2, i__2 = _starpu_ilaenv_(&c__2, "DGELQF", " ", m, n, &c_n1, & + c_n1); + nbmin = max(i__1,i__2); + } + } + } + + if (nb >= nbmin && nb < k && nx < k) { + +/* Use blocked code initially */ + + i__1 = k - nx; + i__2 = nb; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__3 = k - i__ + 1; + ib = min(i__3,nb); + +/* Compute the LQ factorization of the current block */ +/* A(i:i+ib-1,i:n) */ + + i__3 = *n - i__ + 1; + _starpu_dgelq2_(&ib, &i__3, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[ + 1], &iinfo); + if (i__ + ib <= *m) { + +/* Form the triangular factor of the block reflector */ +/* H = H(i) H(i+1) . . . H(i+ib-1) */ + + i__3 = *n - i__ + 1; + _starpu_dlarft_("Forward", "Rowwise", &i__3, &ib, &a[i__ + i__ * + a_dim1], lda, &tau[i__], &work[1], &ldwork); + +/* Apply H to A(i+ib:m,i:n) from the right */ + + i__3 = *m - i__ - ib + 1; + i__4 = *n - i__ + 1; + _starpu_dlarfb_("Right", "No transpose", "Forward", "Rowwise", &i__3, + &i__4, &ib, &a[i__ + i__ * a_dim1], lda, &work[1], & + ldwork, &a[i__ + ib + i__ * a_dim1], lda, &work[ib + + 1], &ldwork); + } +/* L10: */ + } + } else { + i__ = 1; + } + +/* Use unblocked code to factor the last or only block. */ + + if (i__ <= k) { + i__2 = *m - i__ + 1; + i__1 = *n - i__ + 1; + _starpu_dgelq2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1] +, &iinfo); + } + + work[1] = (doublereal) iws; + return 0; + +/* End of DGELQF */ + +} /* _starpu_dgelqf_ */ diff --git a/min-dgels/base/SRC/dgels.c b/min-dgels/base/SRC/dgels.c new file mode 100644 index 0000000..173cb62 --- /dev/null +++ b/min-dgels/base/SRC/dgels.c @@ -0,0 +1,515 @@ +/* dgels.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; +static doublereal c_b33 = 0.; +static integer c__0 = 0; + +/* Subroutine */ int _starpu_dgels_(char *trans, integer *m, integer *n, integer * + nrhs, doublereal *a, integer *lda, doublereal *b, integer *ldb, + doublereal *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2; + + /* Local variables */ + integer i__, j, nb, mn; + doublereal anrm, bnrm; + integer brow; + logical tpsd; + integer iascl, ibscl; + extern logical _starpu_lsame_(char *, char *); + integer wsize; + doublereal rwork[1]; + extern /* Subroutine */ int _starpu_dlabad_(doublereal *, doublereal *); + extern doublereal _starpu_dlamch_(char *), _starpu_dlange_(char *, integer *, + integer *, doublereal *, integer *, doublereal *); + extern /* Subroutine */ int _starpu_dgelqf_(integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, integer *), + _starpu_dlascl_(char *, integer *, integer *, doublereal *, doublereal *, + integer *, integer *, doublereal *, integer *, integer *), + _starpu_dgeqrf_(integer *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *, integer *), _starpu_dlaset_(char *, + integer *, integer *, doublereal *, doublereal *, doublereal *, + integer *), _starpu_xerbla_(char *, integer *); + extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *); + integer scllen; + doublereal bignum; + extern /* Subroutine */ int _starpu_dormlq_(char *, char *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *), + _starpu_dormqr_(char *, char *, integer *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *, integer *); + doublereal smlnum; + logical lquery; + extern /* Subroutine */ int _starpu_dtrtrs_(char *, char *, char *, integer *, + integer *, doublereal *, integer *, doublereal *, integer *, + integer *); + + +/* -- LAPACK driver routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DGELS solves overdetermined or underdetermined real linear systems */ +/* involving an M-by-N matrix A, or its transpose, using a QR or LQ */ +/* factorization of A. It is assumed that A has full rank. */ + +/* The following options are provided: */ + +/* 1. If TRANS = 'N' and m >= n: find the least squares solution of */ +/* an overdetermined system, i.e., solve the least squares problem */ +/* minimize || B - A*X ||. */ + +/* 2. If TRANS = 'N' and m < n: find the minimum norm solution of */ +/* an underdetermined system A * X = B. */ + +/* 3. If TRANS = 'T' and m >= n: find the minimum norm solution of */ +/* an undetermined system A**T * X = B. */ + +/* 4. If TRANS = 'T' and m < n: find the least squares solution of */ +/* an overdetermined system, i.e., solve the least squares problem */ +/* minimize || B - A**T * X ||. */ + +/* Several right hand side vectors b and solution vectors x can be */ +/* handled in a single call; they are stored as the columns of the */ +/* M-by-NRHS right hand side matrix B and the N-by-NRHS solution */ +/* matrix X. */ + +/* Arguments */ +/* ========= */ + +/* TRANS (input) CHARACTER*1 */ +/* = 'N': the linear system involves A; */ +/* = 'T': the linear system involves A**T. */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix A. M >= 0. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix A. N >= 0. */ + +/* NRHS (input) INTEGER */ +/* The number of right hand sides, i.e., the number of */ +/* columns of the matrices B and X. NRHS >=0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the M-by-N matrix A. */ +/* On exit, */ +/* if M >= N, A is overwritten by details of its QR */ +/* factorization as returned by DGEQRF; */ +/* if M < N, A is overwritten by details of its LQ */ +/* factorization as returned by DGELQF. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,M). */ + +/* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* On entry, the matrix B of right hand side vectors, stored */ +/* columnwise; B is M-by-NRHS if TRANS = 'N', or N-by-NRHS */ +/* if TRANS = 'T'. */ +/* On exit, if INFO = 0, B is overwritten by the solution */ +/* vectors, stored columnwise: */ +/* if TRANS = 'N' and m >= n, rows 1 to n of B contain the least */ +/* squares solution vectors; the residual sum of squares for the */ +/* solution in each column is given by the sum of squares of */ +/* elements N+1 to M in that column; */ +/* if TRANS = 'N' and m < n, rows 1 to N of B contain the */ +/* minimum norm solution vectors; */ +/* if TRANS = 'T' and m >= n, rows 1 to M of B contain the */ +/* minimum norm solution vectors; */ +/* if TRANS = 'T' and m < n, rows 1 to M of B contain the */ +/* least squares solution vectors; the residual sum of squares */ +/* for the solution in each column is given by the sum of */ +/* squares of elements M+1 to N in that column. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the array B. LDB >= MAX(1,M,N). */ + +/* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ + +/* LWORK (input) INTEGER */ +/* The dimension of the array WORK. */ +/* LWORK >= max( 1, MN + max( MN, NRHS ) ). */ +/* For optimal performance, */ +/* LWORK >= max( 1, MN + max( MN, NRHS )*NB ). */ +/* where MN = min(M,N) and NB is the optimum block size. */ + +/* If LWORK = -1, then a workspace query is assumed; the routine */ +/* only calculates the optimal size of the WORK array, returns */ +/* this value as the first entry of the WORK array, and no error */ +/* message related to LWORK is issued by XERBLA. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > 0: if INFO = i, the i-th diagonal element of the */ +/* triangular factor of A is zero, so that A does not have */ +/* full rank; the least squares solution could not be */ +/* computed. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + --work; + + /* Function Body */ + *info = 0; + mn = min(*m,*n); + lquery = *lwork == -1; + if (! (_starpu_lsame_(trans, "N") || _starpu_lsame_(trans, "T"))) { + *info = -1; + } else if (*m < 0) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*nrhs < 0) { + *info = -4; + } else if (*lda < max(1,*m)) { + *info = -6; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = max(1,*m); + if (*ldb < max(i__1,*n)) { + *info = -8; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = 1, i__2 = mn + max(mn,*nrhs); + if (*lwork < max(i__1,i__2) && ! lquery) { + *info = -10; + } + } + } + +/* Figure out optimal block size */ + + if (*info == 0 || *info == -10) { + + tpsd = TRUE_; + if (_starpu_lsame_(trans, "N")) { + tpsd = FALSE_; + } + + if (*m >= *n) { + nb = _starpu_ilaenv_(&c__1, "DGEQRF", " ", m, n, &c_n1, &c_n1); + if (tpsd) { +/* Computing MAX */ + i__1 = nb, i__2 = _starpu_ilaenv_(&c__1, "DORMQR", "LN", m, nrhs, n, & + c_n1); + nb = max(i__1,i__2); + } else { +/* Computing MAX */ + i__1 = nb, i__2 = _starpu_ilaenv_(&c__1, "DORMQR", "LT", m, nrhs, n, & + c_n1); + nb = max(i__1,i__2); + } + } else { + nb = _starpu_ilaenv_(&c__1, "DGELQF", " ", m, n, &c_n1, &c_n1); + if (tpsd) { +/* Computing MAX */ + i__1 = nb, i__2 = _starpu_ilaenv_(&c__1, "DORMLQ", "LT", n, nrhs, m, & + c_n1); + nb = max(i__1,i__2); + } else { +/* Computing MAX */ + i__1 = nb, i__2 = _starpu_ilaenv_(&c__1, "DORMLQ", "LN", n, nrhs, m, & + c_n1); + nb = max(i__1,i__2); + } + } + +/* Computing MAX */ + i__1 = 1, i__2 = mn + max(mn,*nrhs) * nb; + wsize = max(i__1,i__2); + work[1] = (doublereal) wsize; + + } + + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DGELS ", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + +/* Computing MIN */ + i__1 = min(*m,*n); + if (min(i__1,*nrhs) == 0) { + i__1 = max(*m,*n); + _starpu_dlaset_("Full", &i__1, nrhs, &c_b33, &c_b33, &b[b_offset], ldb); + return 0; + } + +/* Get machine parameters */ + + smlnum = _starpu_dlamch_("S") / _starpu_dlamch_("P"); + bignum = 1. / smlnum; + _starpu_dlabad_(&smlnum, &bignum); + +/* Scale A, B if max element outside range [SMLNUM,BIGNUM] */ + + anrm = _starpu_dlange_("M", m, n, &a[a_offset], lda, rwork); + iascl = 0; + if (anrm > 0. && anrm < smlnum) { + +/* Scale matrix norm up to SMLNUM */ + + _starpu_dlascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, + info); + iascl = 1; + } else if (anrm > bignum) { + +/* Scale matrix norm down to BIGNUM */ + + _starpu_dlascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, + info); + iascl = 2; + } else if (anrm == 0.) { + +/* Matrix all zero. Return zero solution. */ + + i__1 = max(*m,*n); + _starpu_dlaset_("F", &i__1, nrhs, &c_b33, &c_b33, &b[b_offset], ldb); + goto L50; + } + + brow = *m; + if (tpsd) { + brow = *n; + } + bnrm = _starpu_dlange_("M", &brow, nrhs, &b[b_offset], ldb, rwork); + ibscl = 0; + if (bnrm > 0. && bnrm < smlnum) { + +/* Scale matrix norm up to SMLNUM */ + + _starpu_dlascl_("G", &c__0, &c__0, &bnrm, &smlnum, &brow, nrhs, &b[b_offset], + ldb, info); + ibscl = 1; + } else if (bnrm > bignum) { + +/* Scale matrix norm down to BIGNUM */ + + _starpu_dlascl_("G", &c__0, &c__0, &bnrm, &bignum, &brow, nrhs, &b[b_offset], + ldb, info); + ibscl = 2; + } + + if (*m >= *n) { + +/* compute QR factorization of A */ + + i__1 = *lwork - mn; + _starpu_dgeqrf_(m, n, &a[a_offset], lda, &work[1], &work[mn + 1], &i__1, info) + ; + +/* workspace at least N, optimally N*NB */ + + if (! tpsd) { + +/* Least-Squares Problem min || A * X - B || */ + +/* B(1:M,1:NRHS) := Q' * B(1:M,1:NRHS) */ + + i__1 = *lwork - mn; + _starpu_dormqr_("Left", "Transpose", m, nrhs, n, &a[a_offset], lda, &work[ + 1], &b[b_offset], ldb, &work[mn + 1], &i__1, info); + +/* workspace at least NRHS, optimally NRHS*NB */ + +/* B(1:N,1:NRHS) := inv(R) * B(1:N,1:NRHS) */ + + _starpu_dtrtrs_("Upper", "No transpose", "Non-unit", n, nrhs, &a[a_offset] +, lda, &b[b_offset], ldb, info); + + if (*info > 0) { + return 0; + } + + scllen = *n; + + } else { + +/* Overdetermined system of equations A' * X = B */ + +/* B(1:N,1:NRHS) := inv(R') * B(1:N,1:NRHS) */ + + _starpu_dtrtrs_("Upper", "Transpose", "Non-unit", n, nrhs, &a[a_offset], + lda, &b[b_offset], ldb, info); + + if (*info > 0) { + return 0; + } + +/* B(N+1:M,1:NRHS) = ZERO */ + + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = *n + 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = 0.; +/* L10: */ + } +/* L20: */ + } + +/* B(1:M,1:NRHS) := Q(1:N,:) * B(1:N,1:NRHS) */ + + i__1 = *lwork - mn; + _starpu_dormqr_("Left", "No transpose", m, nrhs, n, &a[a_offset], lda, & + work[1], &b[b_offset], ldb, &work[mn + 1], &i__1, info); + +/* workspace at least NRHS, optimally NRHS*NB */ + + scllen = *m; + + } + + } else { + +/* Compute LQ factorization of A */ + + i__1 = *lwork - mn; + _starpu_dgelqf_(m, n, &a[a_offset], lda, &work[1], &work[mn + 1], &i__1, info) + ; + +/* workspace at least M, optimally M*NB. */ + + if (! tpsd) { + +/* underdetermined system of equations A * X = B */ + +/* B(1:M,1:NRHS) := inv(L) * B(1:M,1:NRHS) */ + + _starpu_dtrtrs_("Lower", "No transpose", "Non-unit", m, nrhs, &a[a_offset] +, lda, &b[b_offset], ldb, info); + + if (*info > 0) { + return 0; + } + +/* B(M+1:N,1:NRHS) = 0 */ + + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = *m + 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = 0.; +/* L30: */ + } +/* L40: */ + } + +/* B(1:N,1:NRHS) := Q(1:N,:)' * B(1:M,1:NRHS) */ + + i__1 = *lwork - mn; + _starpu_dormlq_("Left", "Transpose", n, nrhs, m, &a[a_offset], lda, &work[ + 1], &b[b_offset], ldb, &work[mn + 1], &i__1, info); + +/* workspace at least NRHS, optimally NRHS*NB */ + + scllen = *n; + + } else { + +/* overdetermined system min || A' * X - B || */ + +/* B(1:N,1:NRHS) := Q * B(1:N,1:NRHS) */ + + i__1 = *lwork - mn; + _starpu_dormlq_("Left", "No transpose", n, nrhs, m, &a[a_offset], lda, & + work[1], &b[b_offset], ldb, &work[mn + 1], &i__1, info); + +/* workspace at least NRHS, optimally NRHS*NB */ + +/* B(1:M,1:NRHS) := inv(L') * B(1:M,1:NRHS) */ + + _starpu_dtrtrs_("Lower", "Transpose", "Non-unit", m, nrhs, &a[a_offset], + lda, &b[b_offset], ldb, info); + + if (*info > 0) { + return 0; + } + + scllen = *m; + + } + + } + +/* Undo scaling */ + + if (iascl == 1) { + _starpu_dlascl_("G", &c__0, &c__0, &anrm, &smlnum, &scllen, nrhs, &b[b_offset] +, ldb, info); + } else if (iascl == 2) { + _starpu_dlascl_("G", &c__0, &c__0, &anrm, &bignum, &scllen, nrhs, &b[b_offset] +, ldb, info); + } + if (ibscl == 1) { + _starpu_dlascl_("G", &c__0, &c__0, &smlnum, &bnrm, &scllen, nrhs, &b[b_offset] +, ldb, info); + } else if (ibscl == 2) { + _starpu_dlascl_("G", &c__0, &c__0, &bignum, &bnrm, &scllen, nrhs, &b[b_offset] +, ldb, info); + } + +L50: + work[1] = (doublereal) wsize; + + return 0; + +/* End of DGELS */ + +} /* _starpu_dgels_ */ diff --git a/min-dgels/base/SRC/dgelsd.c b/min-dgels/base/SRC/dgelsd.c new file mode 100644 index 0000000..8c112c5 --- /dev/null +++ b/min-dgels/base/SRC/dgelsd.c @@ -0,0 +1,693 @@ +/* dgelsd.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__6 = 6; +static integer c_n1 = -1; +static integer c__9 = 9; +static integer c__0 = 0; +static integer c__1 = 1; +static doublereal c_b82 = 0.; + +/* Subroutine */ int _starpu_dgelsd_(integer *m, integer *n, integer *nrhs, + doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal * + s, doublereal *rcond, integer *rank, doublereal *work, integer *lwork, + integer *iwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4; + + /* Builtin functions */ + double log(doublereal); + + /* Local variables */ + integer ie, il, mm; + doublereal eps, anrm, bnrm; + integer itau, nlvl, iascl, ibscl; + doublereal sfmin; + integer minmn, maxmn, itaup, itauq, mnthr, nwork; + extern /* Subroutine */ int _starpu_dlabad_(doublereal *, doublereal *), _starpu_dgebrd_( + integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, integer *, + integer *); + extern doublereal _starpu_dlamch_(char *), _starpu_dlange_(char *, integer *, + integer *, doublereal *, integer *, doublereal *); + extern /* Subroutine */ int _starpu_dgelqf_(integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, integer *), + _starpu_dlalsd_(char *, integer *, integer *, integer *, doublereal *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, integer *, integer *), _starpu_dlascl_(char *, + integer *, integer *, doublereal *, doublereal *, integer *, + integer *, doublereal *, integer *, integer *), _starpu_dgeqrf_( + integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, integer *), _starpu_dlacpy_(char *, integer *, + integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dlaset_(char *, integer *, integer *, doublereal *, + doublereal *, doublereal *, integer *), _starpu_xerbla_(char *, + integer *); + extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *); + doublereal bignum; + extern /* Subroutine */ int _starpu_dormbr_(char *, char *, char *, integer *, + integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, integer *); + integer wlalsd; + extern /* Subroutine */ int _starpu_dormlq_(char *, char *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *); + integer ldwork; + extern /* Subroutine */ int _starpu_dormqr_(char *, char *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *); + integer minwrk, maxwrk; + doublereal smlnum; + logical lquery; + integer smlsiz; + + +/* -- LAPACK driver routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DGELSD computes the minimum-norm solution to a real linear least */ +/* squares problem: */ +/* minimize 2-norm(| b - A*x |) */ +/* using the singular value decomposition (SVD) of A. A is an M-by-N */ +/* matrix which may be rank-deficient. */ + +/* Several right hand side vectors b and solution vectors x can be */ +/* handled in a single call; they are stored as the columns of the */ +/* M-by-NRHS right hand side matrix B and the N-by-NRHS solution */ +/* matrix X. */ + +/* The problem is solved in three steps: */ +/* (1) Reduce the coefficient matrix A to bidiagonal form with */ +/* Householder transformations, reducing the original problem */ +/* into a "bidiagonal least squares problem" (BLS) */ +/* (2) Solve the BLS using a divide and conquer approach. */ +/* (3) Apply back all the Householder tranformations to solve */ +/* the original least squares problem. */ + +/* The effective rank of A is determined by treating as zero those */ +/* singular values which are less than RCOND times the largest singular */ +/* value. */ + +/* The divide and conquer algorithm makes very mild assumptions about */ +/* floating point arithmetic. It will work on machines with a guard */ +/* digit in add/subtract, or on those binary machines without guard */ +/* digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or */ +/* Cray-2. It could conceivably fail on hexadecimal or decimal machines */ +/* without guard digits, but we know of none. */ + +/* Arguments */ +/* ========= */ + +/* M (input) INTEGER */ +/* The number of rows of A. M >= 0. */ + +/* N (input) INTEGER */ +/* The number of columns of A. N >= 0. */ + +/* NRHS (input) INTEGER */ +/* The number of right hand sides, i.e., the number of columns */ +/* of the matrices B and X. NRHS >= 0. */ + +/* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the M-by-N matrix A. */ +/* On exit, A has been destroyed. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,M). */ + +/* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* On entry, the M-by-NRHS right hand side matrix B. */ +/* On exit, B is overwritten by the N-by-NRHS solution */ +/* matrix X. If m >= n and RANK = n, the residual */ +/* sum-of-squares for the solution in the i-th column is given */ +/* by the sum of squares of elements n+1:m in that column. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the array B. LDB >= max(1,max(M,N)). */ + +/* S (output) DOUBLE PRECISION array, dimension (min(M,N)) */ +/* The singular values of A in decreasing order. */ +/* The condition number of A in the 2-norm = S(1)/S(min(m,n)). */ + +/* RCOND (input) DOUBLE PRECISION */ +/* RCOND is used to determine the effective rank of A. */ +/* Singular values S(i) <= RCOND*S(1) are treated as zero. */ +/* If RCOND < 0, machine precision is used instead. */ + +/* RANK (output) INTEGER */ +/* The effective rank of A, i.e., the number of singular values */ +/* which are greater than RCOND*S(1). */ + +/* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ + +/* LWORK (input) INTEGER */ +/* The dimension of the array WORK. LWORK must be at least 1. */ +/* The exact minimum amount of workspace needed depends on M, */ +/* N and NRHS. As long as LWORK is at least */ +/* 12*N + 2*N*SMLSIZ + 8*N*NLVL + N*NRHS + (SMLSIZ+1)**2, */ +/* if M is greater than or equal to N or */ +/* 12*M + 2*M*SMLSIZ + 8*M*NLVL + M*NRHS + (SMLSIZ+1)**2, */ +/* if M is less than N, the code will execute correctly. */ +/* SMLSIZ is returned by ILAENV and is equal to the maximum */ +/* size of the subproblems at the bottom of the computation */ +/* tree (usually about 25), and */ +/* NLVL = MAX( 0, INT( LOG_2( MIN( M,N )/(SMLSIZ+1) ) ) + 1 ) */ +/* For good performance, LWORK should generally be larger. */ + +/* If LWORK = -1, then a workspace query is assumed; the routine */ +/* only calculates the optimal size of the WORK array, returns */ +/* this value as the first entry of the WORK array, and no error */ +/* message related to LWORK is issued by XERBLA. */ + +/* IWORK (workspace) INTEGER array, dimension (MAX(1,LIWORK)) */ +/* LIWORK >= 3 * MINMN * NLVL + 11 * MINMN, */ +/* where MINMN = MIN( M,N ). */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > 0: the algorithm for computing the SVD failed to converge; */ +/* if INFO = i, i off-diagonal elements of an intermediate */ +/* bidiagonal form did not converge to zero. */ + +/* Further Details */ +/* =============== */ + +/* Based on contributions by */ +/* Ming Gu and Ren-Cang Li, Computer Science Division, University of */ +/* California at Berkeley, USA */ +/* Osni Marques, LBNL/NERSC, USA */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + --s; + --work; + --iwork; + + /* Function Body */ + *info = 0; + minmn = min(*m,*n); + maxmn = max(*m,*n); + mnthr = _starpu_ilaenv_(&c__6, "DGELSD", " ", m, n, nrhs, &c_n1); + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*lda < max(1,*m)) { + *info = -5; + } else if (*ldb < max(1,maxmn)) { + *info = -7; + } + + smlsiz = _starpu_ilaenv_(&c__9, "DGELSD", " ", &c__0, &c__0, &c__0, &c__0); + +/* Compute workspace. */ +/* (Note: Comments in the code beginning "Workspace:" describe the */ +/* minimal amount of workspace needed at that point in the code, */ +/* as well as the preferred amount for good performance. */ +/* NB refers to the optimal block size for the immediately */ +/* following subroutine, as returned by ILAENV.) */ + + minwrk = 1; + minmn = max(1,minmn); +/* Computing MAX */ + i__1 = (integer) (log((doublereal) minmn / (doublereal) (smlsiz + 1)) / + log(2.)) + 1; + nlvl = max(i__1,0); + + if (*info == 0) { + maxwrk = 0; + mm = *m; + if (*m >= *n && *m >= mnthr) { + +/* Path 1a - overdetermined, with many more rows than columns. */ + + mm = *n; +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n + *n * _starpu_ilaenv_(&c__1, "DGEQRF", " ", m, + n, &c_n1, &c_n1); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n + *nrhs * _starpu_ilaenv_(&c__1, "DORMQR", "LT", + m, nrhs, n, &c_n1); + maxwrk = max(i__1,i__2); + } + if (*m >= *n) { + +/* Path 1 - overdetermined or exactly determined. */ + +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n * 3 + (mm + *n) * _starpu_ilaenv_(&c__1, "DGEBRD" +, " ", &mm, n, &c_n1, &c_n1); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n * 3 + *nrhs * _starpu_ilaenv_(&c__1, "DORMBR", + "QLT", &mm, nrhs, n, &c_n1); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n * 3 + (*n - 1) * _starpu_ilaenv_(&c__1, "DORMBR", + "PLN", n, nrhs, n, &c_n1); + maxwrk = max(i__1,i__2); +/* Computing 2nd power */ + i__1 = smlsiz + 1; + wlalsd = *n * 9 + (*n << 1) * smlsiz + (*n << 3) * nlvl + *n * * + nrhs + i__1 * i__1; +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n * 3 + wlalsd; + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = *n * 3 + mm, i__2 = *n * 3 + *nrhs, i__1 = max(i__1,i__2), + i__2 = *n * 3 + wlalsd; + minwrk = max(i__1,i__2); + } + if (*n > *m) { +/* Computing 2nd power */ + i__1 = smlsiz + 1; + wlalsd = *m * 9 + (*m << 1) * smlsiz + (*m << 3) * nlvl + *m * * + nrhs + i__1 * i__1; + if (*n >= mnthr) { + +/* Path 2a - underdetermined, with many more columns */ +/* than rows. */ + + maxwrk = *m + *m * _starpu_ilaenv_(&c__1, "DGELQF", " ", m, n, &c_n1, + &c_n1); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + (*m << 1) * + _starpu_ilaenv_(&c__1, "DGEBRD", " ", m, m, &c_n1, &c_n1); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + *nrhs * _starpu_ilaenv_(& + c__1, "DORMBR", "QLT", m, nrhs, m, &c_n1); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + (*m - 1) * + _starpu_ilaenv_(&c__1, "DORMBR", "PLN", m, nrhs, m, &c_n1); + maxwrk = max(i__1,i__2); + if (*nrhs > 1) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * *m + *m + *m * *nrhs; + maxwrk = max(i__1,i__2); + } else { +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * *m + (*m << 1); + maxwrk = max(i__1,i__2); + } +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m + *nrhs * _starpu_ilaenv_(&c__1, "DORMLQ", + "LT", n, nrhs, m, &c_n1); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + wlalsd; + maxwrk = max(i__1,i__2); +/* XXX: Ensure the Path 2a case below is triggered. The workspace */ +/* calculation should use queries for all routines eventually. */ +/* Computing MAX */ +/* Computing MAX */ + i__3 = *m, i__4 = (*m << 1) - 4, i__3 = max(i__3,i__4), i__3 = + max(i__3,*nrhs), i__4 = *n - *m * 3; + i__1 = maxwrk, i__2 = (*m << 2) + *m * *m + max(i__3,i__4); + maxwrk = max(i__1,i__2); + } else { + +/* Path 2 - remaining underdetermined cases. */ + + maxwrk = *m * 3 + (*n + *m) * _starpu_ilaenv_(&c__1, "DGEBRD", " ", m, + n, &c_n1, &c_n1); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * 3 + *nrhs * _starpu_ilaenv_(&c__1, "DORMBR" +, "QLT", m, nrhs, n, &c_n1); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * 3 + *m * _starpu_ilaenv_(&c__1, "DORMBR", + "PLN", n, nrhs, m, &c_n1); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * 3 + wlalsd; + maxwrk = max(i__1,i__2); + } +/* Computing MAX */ + i__1 = *m * 3 + *nrhs, i__2 = *m * 3 + *m, i__1 = max(i__1,i__2), + i__2 = *m * 3 + wlalsd; + minwrk = max(i__1,i__2); + } + minwrk = min(minwrk,maxwrk); + work[1] = (doublereal) maxwrk; + if (*lwork < minwrk && ! lquery) { + *info = -12; + } + } + + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DGELSD", &i__1); + return 0; + } else if (lquery) { + goto L10; + } + +/* Quick return if possible. */ + + if (*m == 0 || *n == 0) { + *rank = 0; + return 0; + } + +/* Get machine parameters. */ + + eps = _starpu_dlamch_("P"); + sfmin = _starpu_dlamch_("S"); + smlnum = sfmin / eps; + bignum = 1. / smlnum; + _starpu_dlabad_(&smlnum, &bignum); + +/* Scale A if max entry outside range [SMLNUM,BIGNUM]. */ + + anrm = _starpu_dlange_("M", m, n, &a[a_offset], lda, &work[1]); + iascl = 0; + if (anrm > 0. && anrm < smlnum) { + +/* Scale matrix norm up to SMLNUM. */ + + _starpu_dlascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, + info); + iascl = 1; + } else if (anrm > bignum) { + +/* Scale matrix norm down to BIGNUM. */ + + _starpu_dlascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, + info); + iascl = 2; + } else if (anrm == 0.) { + +/* Matrix all zero. Return zero solution. */ + + i__1 = max(*m,*n); + _starpu_dlaset_("F", &i__1, nrhs, &c_b82, &c_b82, &b[b_offset], ldb); + _starpu_dlaset_("F", &minmn, &c__1, &c_b82, &c_b82, &s[1], &c__1); + *rank = 0; + goto L10; + } + +/* Scale B if max entry outside range [SMLNUM,BIGNUM]. */ + + bnrm = _starpu_dlange_("M", m, nrhs, &b[b_offset], ldb, &work[1]); + ibscl = 0; + if (bnrm > 0. && bnrm < smlnum) { + +/* Scale matrix norm up to SMLNUM. */ + + _starpu_dlascl_("G", &c__0, &c__0, &bnrm, &smlnum, m, nrhs, &b[b_offset], ldb, + info); + ibscl = 1; + } else if (bnrm > bignum) { + +/* Scale matrix norm down to BIGNUM. */ + + _starpu_dlascl_("G", &c__0, &c__0, &bnrm, &bignum, m, nrhs, &b[b_offset], ldb, + info); + ibscl = 2; + } + +/* If M < N make sure certain entries of B are zero. */ + + if (*m < *n) { + i__1 = *n - *m; + _starpu_dlaset_("F", &i__1, nrhs, &c_b82, &c_b82, &b[*m + 1 + b_dim1], ldb); + } + +/* Overdetermined case. */ + + if (*m >= *n) { + +/* Path 1 - overdetermined or exactly determined. */ + + mm = *m; + if (*m >= mnthr) { + +/* Path 1a - overdetermined, with many more rows than columns. */ + + mm = *n; + itau = 1; + nwork = itau + *n; + +/* Compute A=Q*R. */ +/* (Workspace: need 2*N, prefer N+N*NB) */ + + i__1 = *lwork - nwork + 1; + _starpu_dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__1, + info); + +/* Multiply B by transpose(Q). */ +/* (Workspace: need N+NRHS, prefer N+NRHS*NB) */ + + i__1 = *lwork - nwork + 1; + _starpu_dormqr_("L", "T", m, nrhs, n, &a[a_offset], lda, &work[itau], &b[ + b_offset], ldb, &work[nwork], &i__1, info); + +/* Zero out below R. */ + + if (*n > 1) { + i__1 = *n - 1; + i__2 = *n - 1; + _starpu_dlaset_("L", &i__1, &i__2, &c_b82, &c_b82, &a[a_dim1 + 2], + lda); + } + } + + ie = 1; + itauq = ie + *n; + itaup = itauq + *n; + nwork = itaup + *n; + +/* Bidiagonalize R in A. */ +/* (Workspace: need 3*N+MM, prefer 3*N+(MM+N)*NB) */ + + i__1 = *lwork - nwork + 1; + _starpu_dgebrd_(&mm, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], & + work[itaup], &work[nwork], &i__1, info); + +/* Multiply B by transpose of left bidiagonalizing vectors of R. */ +/* (Workspace: need 3*N+NRHS, prefer 3*N+NRHS*NB) */ + + i__1 = *lwork - nwork + 1; + _starpu_dormbr_("Q", "L", "T", &mm, nrhs, n, &a[a_offset], lda, &work[itauq], + &b[b_offset], ldb, &work[nwork], &i__1, info); + +/* Solve the bidiagonal least squares problem. */ + + _starpu_dlalsd_("U", &smlsiz, n, nrhs, &s[1], &work[ie], &b[b_offset], ldb, + rcond, rank, &work[nwork], &iwork[1], info); + if (*info != 0) { + goto L10; + } + +/* Multiply B by right bidiagonalizing vectors of R. */ + + i__1 = *lwork - nwork + 1; + _starpu_dormbr_("P", "L", "N", n, nrhs, n, &a[a_offset], lda, &work[itaup], & + b[b_offset], ldb, &work[nwork], &i__1, info); + + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = *m, i__2 = (*m << 1) - 4, i__1 = max(i__1,i__2), i__1 = max( + i__1,*nrhs), i__2 = *n - *m * 3, i__1 = max(i__1,i__2); + if (*n >= mnthr && *lwork >= (*m << 2) + *m * *m + max(i__1,wlalsd)) { + +/* Path 2a - underdetermined, with many more columns than rows */ +/* and sufficient workspace for an efficient algorithm. */ + + ldwork = *m; +/* Computing MAX */ +/* Computing MAX */ + i__3 = *m, i__4 = (*m << 1) - 4, i__3 = max(i__3,i__4), i__3 = + max(i__3,*nrhs), i__4 = *n - *m * 3; + i__1 = (*m << 2) + *m * *lda + max(i__3,i__4), i__2 = *m * *lda + + *m + *m * *nrhs, i__1 = max(i__1,i__2), i__2 = (*m << 2) + + *m * *lda + wlalsd; + if (*lwork >= max(i__1,i__2)) { + ldwork = *lda; + } + itau = 1; + nwork = *m + 1; + +/* Compute A=L*Q. */ +/* (Workspace: need 2*M, prefer M+M*NB) */ + + i__1 = *lwork - nwork + 1; + _starpu_dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__1, + info); + il = nwork; + +/* Copy L to WORK(IL), zeroing out above its diagonal. */ + + _starpu_dlacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwork); + i__1 = *m - 1; + i__2 = *m - 1; + _starpu_dlaset_("U", &i__1, &i__2, &c_b82, &c_b82, &work[il + ldwork], & + ldwork); + ie = il + ldwork * *m; + itauq = ie + *m; + itaup = itauq + *m; + nwork = itaup + *m; + +/* Bidiagonalize L in WORK(IL). */ +/* (Workspace: need M*M+5*M, prefer M*M+4*M+2*M*NB) */ + + i__1 = *lwork - nwork + 1; + _starpu_dgebrd_(m, m, &work[il], &ldwork, &s[1], &work[ie], &work[itauq], + &work[itaup], &work[nwork], &i__1, info); + +/* Multiply B by transpose of left bidiagonalizing vectors of L. */ +/* (Workspace: need M*M+4*M+NRHS, prefer M*M+4*M+NRHS*NB) */ + + i__1 = *lwork - nwork + 1; + _starpu_dormbr_("Q", "L", "T", m, nrhs, m, &work[il], &ldwork, &work[ + itauq], &b[b_offset], ldb, &work[nwork], &i__1, info); + +/* Solve the bidiagonal least squares problem. */ + + _starpu_dlalsd_("U", &smlsiz, m, nrhs, &s[1], &work[ie], &b[b_offset], + ldb, rcond, rank, &work[nwork], &iwork[1], info); + if (*info != 0) { + goto L10; + } + +/* Multiply B by right bidiagonalizing vectors of L. */ + + i__1 = *lwork - nwork + 1; + _starpu_dormbr_("P", "L", "N", m, nrhs, m, &work[il], &ldwork, &work[ + itaup], &b[b_offset], ldb, &work[nwork], &i__1, info); + +/* Zero out below first M rows of B. */ + + i__1 = *n - *m; + _starpu_dlaset_("F", &i__1, nrhs, &c_b82, &c_b82, &b[*m + 1 + b_dim1], + ldb); + nwork = itau + *m; + +/* Multiply transpose(Q) by B. */ +/* (Workspace: need M+NRHS, prefer M+NRHS*NB) */ + + i__1 = *lwork - nwork + 1; + _starpu_dormlq_("L", "T", n, nrhs, m, &a[a_offset], lda, &work[itau], &b[ + b_offset], ldb, &work[nwork], &i__1, info); + + } else { + +/* Path 2 - remaining underdetermined cases. */ + + ie = 1; + itauq = ie + *m; + itaup = itauq + *m; + nwork = itaup + *m; + +/* Bidiagonalize A. */ +/* (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB) */ + + i__1 = *lwork - nwork + 1; + _starpu_dgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], & + work[itaup], &work[nwork], &i__1, info); + +/* Multiply B by transpose of left bidiagonalizing vectors. */ +/* (Workspace: need 3*M+NRHS, prefer 3*M+NRHS*NB) */ + + i__1 = *lwork - nwork + 1; + _starpu_dormbr_("Q", "L", "T", m, nrhs, n, &a[a_offset], lda, &work[itauq] +, &b[b_offset], ldb, &work[nwork], &i__1, info); + +/* Solve the bidiagonal least squares problem. */ + + _starpu_dlalsd_("L", &smlsiz, m, nrhs, &s[1], &work[ie], &b[b_offset], + ldb, rcond, rank, &work[nwork], &iwork[1], info); + if (*info != 0) { + goto L10; + } + +/* Multiply B by right bidiagonalizing vectors of A. */ + + i__1 = *lwork - nwork + 1; + _starpu_dormbr_("P", "L", "N", n, nrhs, m, &a[a_offset], lda, &work[itaup] +, &b[b_offset], ldb, &work[nwork], &i__1, info); + + } + } + +/* Undo scaling. */ + + if (iascl == 1) { + _starpu_dlascl_("G", &c__0, &c__0, &anrm, &smlnum, n, nrhs, &b[b_offset], ldb, + info); + _starpu_dlascl_("G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], & + minmn, info); + } else if (iascl == 2) { + _starpu_dlascl_("G", &c__0, &c__0, &anrm, &bignum, n, nrhs, &b[b_offset], ldb, + info); + _starpu_dlascl_("G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], & + minmn, info); + } + if (ibscl == 1) { + _starpu_dlascl_("G", &c__0, &c__0, &smlnum, &bnrm, n, nrhs, &b[b_offset], ldb, + info); + } else if (ibscl == 2) { + _starpu_dlascl_("G", &c__0, &c__0, &bignum, &bnrm, n, nrhs, &b[b_offset], ldb, + info); + } + +L10: + work[1] = (doublereal) maxwrk; + return 0; + +/* End of DGELSD */ + +} /* _starpu_dgelsd_ */ diff --git a/min-dgels/base/SRC/dgelss.c b/min-dgels/base/SRC/dgelss.c new file mode 100644 index 0000000..7013acf --- /dev/null +++ b/min-dgels/base/SRC/dgelss.c @@ -0,0 +1,828 @@ +/* dgelss.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__6 = 6; +static integer c_n1 = -1; +static integer c__1 = 1; +static integer c__0 = 0; +static doublereal c_b74 = 0.; +static doublereal c_b108 = 1.; + +/* Subroutine */ int _starpu_dgelss_(integer *m, integer *n, integer *nrhs, + doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal * + s, doublereal *rcond, integer *rank, doublereal *work, integer *lwork, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4; + doublereal d__1; + + /* Local variables */ + integer i__, bl, ie, il, mm; + doublereal eps, thr, anrm, bnrm; + integer itau; + doublereal vdum[1]; + extern /* Subroutine */ int _starpu_dgemm_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *); + integer iascl, ibscl; + extern /* Subroutine */ int _starpu_dgemv_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *), _starpu_drscl_(integer *, + doublereal *, doublereal *, integer *); + integer chunk; + doublereal sfmin; + integer minmn; + extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *); + integer maxmn, itaup, itauq, mnthr, iwork; + extern /* Subroutine */ int _starpu_dlabad_(doublereal *, doublereal *), _starpu_dgebrd_( + integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, integer *, + integer *); + extern doublereal _starpu_dlamch_(char *), _starpu_dlange_(char *, integer *, + integer *, doublereal *, integer *, doublereal *); + integer bdspac; + extern /* Subroutine */ int _starpu_dgelqf_(integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, integer *), + _starpu_dlascl_(char *, integer *, integer *, doublereal *, doublereal *, + integer *, integer *, doublereal *, integer *, integer *), + _starpu_dgeqrf_(integer *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *, integer *), _starpu_dlacpy_(char *, + integer *, integer *, doublereal *, integer *, doublereal *, + integer *), _starpu_dlaset_(char *, integer *, integer *, + doublereal *, doublereal *, doublereal *, integer *), + _starpu_xerbla_(char *, integer *), _starpu_dbdsqr_(char *, integer *, + integer *, integer *, integer *, doublereal *, doublereal *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *, doublereal *, integer *), _starpu_dorgbr_(char *, + integer *, integer *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *, integer *); + doublereal bignum; + extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *); + extern /* Subroutine */ int _starpu_dormbr_(char *, char *, char *, integer *, + integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, integer *), _starpu_dormlq_(char *, char *, integer *, + integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, integer *); + integer ldwork; + extern /* Subroutine */ int _starpu_dormqr_(char *, char *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *); + integer minwrk, maxwrk; + doublereal smlnum; + logical lquery; + + +/* -- LAPACK driver routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DGELSS computes the minimum norm solution to a real linear least */ +/* squares problem: */ + +/* Minimize 2-norm(| b - A*x |). */ + +/* using the singular value decomposition (SVD) of A. A is an M-by-N */ +/* matrix which may be rank-deficient. */ + +/* Several right hand side vectors b and solution vectors x can be */ +/* handled in a single call; they are stored as the columns of the */ +/* M-by-NRHS right hand side matrix B and the N-by-NRHS solution matrix */ +/* X. */ + +/* The effective rank of A is determined by treating as zero those */ +/* singular values which are less than RCOND times the largest singular */ +/* value. */ + +/* Arguments */ +/* ========= */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix A. M >= 0. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix A. N >= 0. */ + +/* NRHS (input) INTEGER */ +/* The number of right hand sides, i.e., the number of columns */ +/* of the matrices B and X. NRHS >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the M-by-N matrix A. */ +/* On exit, the first min(m,n) rows of A are overwritten with */ +/* its right singular vectors, stored rowwise. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,M). */ + +/* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* On entry, the M-by-NRHS right hand side matrix B. */ +/* On exit, B is overwritten by the N-by-NRHS solution */ +/* matrix X. If m >= n and RANK = n, the residual */ +/* sum-of-squares for the solution in the i-th column is given */ +/* by the sum of squares of elements n+1:m in that column. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the array B. LDB >= max(1,max(M,N)). */ + +/* S (output) DOUBLE PRECISION array, dimension (min(M,N)) */ +/* The singular values of A in decreasing order. */ +/* The condition number of A in the 2-norm = S(1)/S(min(m,n)). */ + +/* RCOND (input) DOUBLE PRECISION */ +/* RCOND is used to determine the effective rank of A. */ +/* Singular values S(i) <= RCOND*S(1) are treated as zero. */ +/* If RCOND < 0, machine precision is used instead. */ + +/* RANK (output) INTEGER */ +/* The effective rank of A, i.e., the number of singular values */ +/* which are greater than RCOND*S(1). */ + +/* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ + +/* LWORK (input) INTEGER */ +/* The dimension of the array WORK. LWORK >= 1, and also: */ +/* LWORK >= 3*min(M,N) + max( 2*min(M,N), max(M,N), NRHS ) */ +/* For good performance, LWORK should generally be larger. */ + +/* If LWORK = -1, then a workspace query is assumed; the routine */ +/* only calculates the optimal size of the WORK array, returns */ +/* this value as the first entry of the WORK array, and no error */ +/* message related to LWORK is issued by XERBLA. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > 0: the algorithm for computing the SVD failed to converge; */ +/* if INFO = i, i off-diagonal elements of an intermediate */ +/* bidiagonal form did not converge to zero. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + --s; + --work; + + /* Function Body */ + *info = 0; + minmn = min(*m,*n); + maxmn = max(*m,*n); + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*lda < max(1,*m)) { + *info = -5; + } else if (*ldb < max(1,maxmn)) { + *info = -7; + } + +/* Compute workspace */ +/* (Note: Comments in the code beginning "Workspace:" describe the */ +/* minimal amount of workspace needed at that point in the code, */ +/* as well as the preferred amount for good performance. */ +/* NB refers to the optimal block size for the immediately */ +/* following subroutine, as returned by ILAENV.) */ + + if (*info == 0) { + minwrk = 1; + maxwrk = 1; + if (minmn > 0) { + mm = *m; + mnthr = _starpu_ilaenv_(&c__6, "DGELSS", " ", m, n, nrhs, &c_n1); + if (*m >= *n && *m >= mnthr) { + +/* Path 1a - overdetermined, with many more rows than */ +/* columns */ + + mm = *n; +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n + *n * _starpu_ilaenv_(&c__1, "DGEQRF", + " ", m, n, &c_n1, &c_n1); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n + *nrhs * _starpu_ilaenv_(&c__1, "DORMQR", + "LT", m, nrhs, n, &c_n1); + maxwrk = max(i__1,i__2); + } + if (*m >= *n) { + +/* Path 1 - overdetermined or exactly determined */ + +/* Compute workspace needed for DBDSQR */ + +/* Computing MAX */ + i__1 = 1, i__2 = *n * 5; + bdspac = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n * 3 + (mm + *n) * _starpu_ilaenv_(&c__1, + "DGEBRD", " ", &mm, n, &c_n1, &c_n1); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n * 3 + *nrhs * _starpu_ilaenv_(&c__1, "DORMBR" +, "QLT", &mm, nrhs, n, &c_n1); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n * 3 + (*n - 1) * _starpu_ilaenv_(&c__1, + "DORGBR", "P", n, n, n, &c_n1); + maxwrk = max(i__1,i__2); + maxwrk = max(maxwrk,bdspac); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n * *nrhs; + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = *n * 3 + mm, i__2 = *n * 3 + *nrhs, i__1 = max(i__1, + i__2); + minwrk = max(i__1,bdspac); + maxwrk = max(minwrk,maxwrk); + } + if (*n > *m) { + +/* Compute workspace needed for DBDSQR */ + +/* Computing MAX */ + i__1 = 1, i__2 = *m * 5; + bdspac = max(i__1,i__2); +/* Computing MAX */ + i__1 = *m * 3 + *nrhs, i__2 = *m * 3 + *n, i__1 = max(i__1, + i__2); + minwrk = max(i__1,bdspac); + if (*n >= mnthr) { + +/* Path 2a - underdetermined, with many more columns */ +/* than rows */ + + maxwrk = *m + *m * _starpu_ilaenv_(&c__1, "DGELQF", " ", m, n, & + c_n1, &c_n1); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + (*m << 1) * + _starpu_ilaenv_(&c__1, "DGEBRD", " ", m, m, &c_n1, &c_n1); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + *nrhs * + _starpu_ilaenv_(&c__1, "DORMBR", "QLT", m, nrhs, m, &c_n1); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + (*m - 1) * + _starpu_ilaenv_(&c__1, "DORGBR", "P", m, m, m, &c_n1); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * *m + *m + bdspac; + maxwrk = max(i__1,i__2); + if (*nrhs > 1) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * *m + *m + *m * *nrhs; + maxwrk = max(i__1,i__2); + } else { +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * *m + (*m << 1); + maxwrk = max(i__1,i__2); + } +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m + *nrhs * _starpu_ilaenv_(&c__1, "DORMLQ" +, "LT", n, nrhs, m, &c_n1); + maxwrk = max(i__1,i__2); + } else { + +/* Path 2 - underdetermined */ + + maxwrk = *m * 3 + (*n + *m) * _starpu_ilaenv_(&c__1, "DGEBRD", + " ", m, n, &c_n1, &c_n1); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * 3 + *nrhs * _starpu_ilaenv_(&c__1, + "DORMBR", "QLT", m, nrhs, m, &c_n1); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * 3 + *m * _starpu_ilaenv_(&c__1, "DORG" + "BR", "P", m, n, m, &c_n1); + maxwrk = max(i__1,i__2); + maxwrk = max(maxwrk,bdspac); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n * *nrhs; + maxwrk = max(i__1,i__2); + } + } + maxwrk = max(minwrk,maxwrk); + } + work[1] = (doublereal) maxwrk; + + if (*lwork < minwrk && ! lquery) { + *info = -12; + } + } + + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DGELSS", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + *rank = 0; + return 0; + } + +/* Get machine parameters */ + + eps = _starpu_dlamch_("P"); + sfmin = _starpu_dlamch_("S"); + smlnum = sfmin / eps; + bignum = 1. / smlnum; + _starpu_dlabad_(&smlnum, &bignum); + +/* Scale A if max element outside range [SMLNUM,BIGNUM] */ + + anrm = _starpu_dlange_("M", m, n, &a[a_offset], lda, &work[1]); + iascl = 0; + if (anrm > 0. && anrm < smlnum) { + +/* Scale matrix norm up to SMLNUM */ + + _starpu_dlascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, + info); + iascl = 1; + } else if (anrm > bignum) { + +/* Scale matrix norm down to BIGNUM */ + + _starpu_dlascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, + info); + iascl = 2; + } else if (anrm == 0.) { + +/* Matrix all zero. Return zero solution. */ + + i__1 = max(*m,*n); + _starpu_dlaset_("F", &i__1, nrhs, &c_b74, &c_b74, &b[b_offset], ldb); + _starpu_dlaset_("F", &minmn, &c__1, &c_b74, &c_b74, &s[1], &c__1); + *rank = 0; + goto L70; + } + +/* Scale B if max element outside range [SMLNUM,BIGNUM] */ + + bnrm = _starpu_dlange_("M", m, nrhs, &b[b_offset], ldb, &work[1]); + ibscl = 0; + if (bnrm > 0. && bnrm < smlnum) { + +/* Scale matrix norm up to SMLNUM */ + + _starpu_dlascl_("G", &c__0, &c__0, &bnrm, &smlnum, m, nrhs, &b[b_offset], ldb, + info); + ibscl = 1; + } else if (bnrm > bignum) { + +/* Scale matrix norm down to BIGNUM */ + + _starpu_dlascl_("G", &c__0, &c__0, &bnrm, &bignum, m, nrhs, &b[b_offset], ldb, + info); + ibscl = 2; + } + +/* Overdetermined case */ + + if (*m >= *n) { + +/* Path 1 - overdetermined or exactly determined */ + + mm = *m; + if (*m >= mnthr) { + +/* Path 1a - overdetermined, with many more rows than columns */ + + mm = *n; + itau = 1; + iwork = itau + *n; + +/* Compute A=Q*R */ +/* (Workspace: need 2*N, prefer N+N*NB) */ + + i__1 = *lwork - iwork + 1; + _starpu_dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork], &i__1, + info); + +/* Multiply B by transpose(Q) */ +/* (Workspace: need N+NRHS, prefer N+NRHS*NB) */ + + i__1 = *lwork - iwork + 1; + _starpu_dormqr_("L", "T", m, nrhs, n, &a[a_offset], lda, &work[itau], &b[ + b_offset], ldb, &work[iwork], &i__1, info); + +/* Zero out below R */ + + if (*n > 1) { + i__1 = *n - 1; + i__2 = *n - 1; + _starpu_dlaset_("L", &i__1, &i__2, &c_b74, &c_b74, &a[a_dim1 + 2], + lda); + } + } + + ie = 1; + itauq = ie + *n; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Bidiagonalize R in A */ +/* (Workspace: need 3*N+MM, prefer 3*N+(MM+N)*NB) */ + + i__1 = *lwork - iwork + 1; + _starpu_dgebrd_(&mm, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], & + work[itaup], &work[iwork], &i__1, info); + +/* Multiply B by transpose of left bidiagonalizing vectors of R */ +/* (Workspace: need 3*N+NRHS, prefer 3*N+NRHS*NB) */ + + i__1 = *lwork - iwork + 1; + _starpu_dormbr_("Q", "L", "T", &mm, nrhs, n, &a[a_offset], lda, &work[itauq], + &b[b_offset], ldb, &work[iwork], &i__1, info); + +/* Generate right bidiagonalizing vectors of R in A */ +/* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) */ + + i__1 = *lwork - iwork + 1; + _starpu_dorgbr_("P", n, n, n, &a[a_offset], lda, &work[itaup], &work[iwork], & + i__1, info); + iwork = ie + *n; + +/* Perform bidiagonal QR iteration */ +/* multiply B by transpose of left singular vectors */ +/* compute right singular vectors in A */ +/* (Workspace: need BDSPAC) */ + + _starpu_dbdsqr_("U", n, n, &c__0, nrhs, &s[1], &work[ie], &a[a_offset], lda, + vdum, &c__1, &b[b_offset], ldb, &work[iwork], info) + ; + if (*info != 0) { + goto L70; + } + +/* Multiply B by reciprocals of singular values */ + +/* Computing MAX */ + d__1 = *rcond * s[1]; + thr = max(d__1,sfmin); + if (*rcond < 0.) { +/* Computing MAX */ + d__1 = eps * s[1]; + thr = max(d__1,sfmin); + } + *rank = 0; + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + if (s[i__] > thr) { + _starpu_drscl_(nrhs, &s[i__], &b[i__ + b_dim1], ldb); + ++(*rank); + } else { + _starpu_dlaset_("F", &c__1, nrhs, &c_b74, &c_b74, &b[i__ + b_dim1], + ldb); + } +/* L10: */ + } + +/* Multiply B by right singular vectors */ +/* (Workspace: need N, prefer N*NRHS) */ + + if (*lwork >= *ldb * *nrhs && *nrhs > 1) { + _starpu_dgemm_("T", "N", n, nrhs, n, &c_b108, &a[a_offset], lda, &b[ + b_offset], ldb, &c_b74, &work[1], ldb); + _starpu_dlacpy_("G", n, nrhs, &work[1], ldb, &b[b_offset], ldb) + ; + } else if (*nrhs > 1) { + chunk = *lwork / *n; + i__1 = *nrhs; + i__2 = chunk; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__3 = *nrhs - i__ + 1; + bl = min(i__3,chunk); + _starpu_dgemm_("T", "N", n, &bl, n, &c_b108, &a[a_offset], lda, &b[ + i__ * b_dim1 + 1], ldb, &c_b74, &work[1], n); + _starpu_dlacpy_("G", n, &bl, &work[1], n, &b[i__ * b_dim1 + 1], ldb); +/* L20: */ + } + } else { + _starpu_dgemv_("T", n, n, &c_b108, &a[a_offset], lda, &b[b_offset], &c__1, + &c_b74, &work[1], &c__1); + _starpu_dcopy_(n, &work[1], &c__1, &b[b_offset], &c__1); + } + + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__2 = *m, i__1 = (*m << 1) - 4, i__2 = max(i__2,i__1), i__2 = max( + i__2,*nrhs), i__1 = *n - *m * 3; + if (*n >= mnthr && *lwork >= (*m << 2) + *m * *m + max(i__2,i__1)) { + +/* Path 2a - underdetermined, with many more columns than rows */ +/* and sufficient workspace for an efficient algorithm */ + + ldwork = *m; +/* Computing MAX */ +/* Computing MAX */ + i__3 = *m, i__4 = (*m << 1) - 4, i__3 = max(i__3,i__4), i__3 = + max(i__3,*nrhs), i__4 = *n - *m * 3; + i__2 = (*m << 2) + *m * *lda + max(i__3,i__4), i__1 = *m * *lda + + *m + *m * *nrhs; + if (*lwork >= max(i__2,i__1)) { + ldwork = *lda; + } + itau = 1; + iwork = *m + 1; + +/* Compute A=L*Q */ +/* (Workspace: need 2*M, prefer M+M*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork], &i__2, + info); + il = iwork; + +/* Copy L to WORK(IL), zeroing out above it */ + + _starpu_dlacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwork); + i__2 = *m - 1; + i__1 = *m - 1; + _starpu_dlaset_("U", &i__2, &i__1, &c_b74, &c_b74, &work[il + ldwork], & + ldwork); + ie = il + ldwork * *m; + itauq = ie + *m; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Bidiagonalize L in WORK(IL) */ +/* (Workspace: need M*M+5*M, prefer M*M+4*M+2*M*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dgebrd_(m, m, &work[il], &ldwork, &s[1], &work[ie], &work[itauq], + &work[itaup], &work[iwork], &i__2, info); + +/* Multiply B by transpose of left bidiagonalizing vectors of L */ +/* (Workspace: need M*M+4*M+NRHS, prefer M*M+4*M+NRHS*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dormbr_("Q", "L", "T", m, nrhs, m, &work[il], &ldwork, &work[ + itauq], &b[b_offset], ldb, &work[iwork], &i__2, info); + +/* Generate right bidiagonalizing vectors of R in WORK(IL) */ +/* (Workspace: need M*M+5*M-1, prefer M*M+4*M+(M-1)*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dorgbr_("P", m, m, m, &work[il], &ldwork, &work[itaup], &work[ + iwork], &i__2, info); + iwork = ie + *m; + +/* Perform bidiagonal QR iteration, */ +/* computing right singular vectors of L in WORK(IL) and */ +/* multiplying B by transpose of left singular vectors */ +/* (Workspace: need M*M+M+BDSPAC) */ + + _starpu_dbdsqr_("U", m, m, &c__0, nrhs, &s[1], &work[ie], &work[il], & + ldwork, &a[a_offset], lda, &b[b_offset], ldb, &work[iwork] +, info); + if (*info != 0) { + goto L70; + } + +/* Multiply B by reciprocals of singular values */ + +/* Computing MAX */ + d__1 = *rcond * s[1]; + thr = max(d__1,sfmin); + if (*rcond < 0.) { +/* Computing MAX */ + d__1 = eps * s[1]; + thr = max(d__1,sfmin); + } + *rank = 0; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + if (s[i__] > thr) { + _starpu_drscl_(nrhs, &s[i__], &b[i__ + b_dim1], ldb); + ++(*rank); + } else { + _starpu_dlaset_("F", &c__1, nrhs, &c_b74, &c_b74, &b[i__ + b_dim1] +, ldb); + } +/* L30: */ + } + iwork = ie; + +/* Multiply B by right singular vectors of L in WORK(IL) */ +/* (Workspace: need M*M+2*M, prefer M*M+M+M*NRHS) */ + + if (*lwork >= *ldb * *nrhs + iwork - 1 && *nrhs > 1) { + _starpu_dgemm_("T", "N", m, nrhs, m, &c_b108, &work[il], &ldwork, &b[ + b_offset], ldb, &c_b74, &work[iwork], ldb); + _starpu_dlacpy_("G", m, nrhs, &work[iwork], ldb, &b[b_offset], ldb); + } else if (*nrhs > 1) { + chunk = (*lwork - iwork + 1) / *m; + i__2 = *nrhs; + i__1 = chunk; + for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += + i__1) { +/* Computing MIN */ + i__3 = *nrhs - i__ + 1; + bl = min(i__3,chunk); + _starpu_dgemm_("T", "N", m, &bl, m, &c_b108, &work[il], &ldwork, & + b[i__ * b_dim1 + 1], ldb, &c_b74, &work[iwork], m); + _starpu_dlacpy_("G", m, &bl, &work[iwork], m, &b[i__ * b_dim1 + 1] +, ldb); +/* L40: */ + } + } else { + _starpu_dgemv_("T", m, m, &c_b108, &work[il], &ldwork, &b[b_dim1 + 1], + &c__1, &c_b74, &work[iwork], &c__1); + _starpu_dcopy_(m, &work[iwork], &c__1, &b[b_dim1 + 1], &c__1); + } + +/* Zero out below first M rows of B */ + + i__1 = *n - *m; + _starpu_dlaset_("F", &i__1, nrhs, &c_b74, &c_b74, &b[*m + 1 + b_dim1], + ldb); + iwork = itau + *m; + +/* Multiply transpose(Q) by B */ +/* (Workspace: need M+NRHS, prefer M+NRHS*NB) */ + + i__1 = *lwork - iwork + 1; + _starpu_dormlq_("L", "T", n, nrhs, m, &a[a_offset], lda, &work[itau], &b[ + b_offset], ldb, &work[iwork], &i__1, info); + + } else { + +/* Path 2 - remaining underdetermined cases */ + + ie = 1; + itauq = ie + *m; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Bidiagonalize A */ +/* (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB) */ + + i__1 = *lwork - iwork + 1; + _starpu_dgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], & + work[itaup], &work[iwork], &i__1, info); + +/* Multiply B by transpose of left bidiagonalizing vectors */ +/* (Workspace: need 3*M+NRHS, prefer 3*M+NRHS*NB) */ + + i__1 = *lwork - iwork + 1; + _starpu_dormbr_("Q", "L", "T", m, nrhs, n, &a[a_offset], lda, &work[itauq] +, &b[b_offset], ldb, &work[iwork], &i__1, info); + +/* Generate right bidiagonalizing vectors in A */ +/* (Workspace: need 4*M, prefer 3*M+M*NB) */ + + i__1 = *lwork - iwork + 1; + _starpu_dorgbr_("P", m, n, m, &a[a_offset], lda, &work[itaup], &work[ + iwork], &i__1, info); + iwork = ie + *m; + +/* Perform bidiagonal QR iteration, */ +/* computing right singular vectors of A in A and */ +/* multiplying B by transpose of left singular vectors */ +/* (Workspace: need BDSPAC) */ + + _starpu_dbdsqr_("L", m, n, &c__0, nrhs, &s[1], &work[ie], &a[a_offset], + lda, vdum, &c__1, &b[b_offset], ldb, &work[iwork], info); + if (*info != 0) { + goto L70; + } + +/* Multiply B by reciprocals of singular values */ + +/* Computing MAX */ + d__1 = *rcond * s[1]; + thr = max(d__1,sfmin); + if (*rcond < 0.) { +/* Computing MAX */ + d__1 = eps * s[1]; + thr = max(d__1,sfmin); + } + *rank = 0; + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + if (s[i__] > thr) { + _starpu_drscl_(nrhs, &s[i__], &b[i__ + b_dim1], ldb); + ++(*rank); + } else { + _starpu_dlaset_("F", &c__1, nrhs, &c_b74, &c_b74, &b[i__ + b_dim1] +, ldb); + } +/* L50: */ + } + +/* Multiply B by right singular vectors of A */ +/* (Workspace: need N, prefer N*NRHS) */ + + if (*lwork >= *ldb * *nrhs && *nrhs > 1) { + _starpu_dgemm_("T", "N", n, nrhs, m, &c_b108, &a[a_offset], lda, &b[ + b_offset], ldb, &c_b74, &work[1], ldb); + _starpu_dlacpy_("F", n, nrhs, &work[1], ldb, &b[b_offset], ldb); + } else if (*nrhs > 1) { + chunk = *lwork / *n; + i__1 = *nrhs; + i__2 = chunk; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += + i__2) { +/* Computing MIN */ + i__3 = *nrhs - i__ + 1; + bl = min(i__3,chunk); + _starpu_dgemm_("T", "N", n, &bl, m, &c_b108, &a[a_offset], lda, & + b[i__ * b_dim1 + 1], ldb, &c_b74, &work[1], n); + _starpu_dlacpy_("F", n, &bl, &work[1], n, &b[i__ * b_dim1 + 1], + ldb); +/* L60: */ + } + } else { + _starpu_dgemv_("T", m, n, &c_b108, &a[a_offset], lda, &b[b_offset], & + c__1, &c_b74, &work[1], &c__1); + _starpu_dcopy_(n, &work[1], &c__1, &b[b_offset], &c__1); + } + } + } + +/* Undo scaling */ + + if (iascl == 1) { + _starpu_dlascl_("G", &c__0, &c__0, &anrm, &smlnum, n, nrhs, &b[b_offset], ldb, + info); + _starpu_dlascl_("G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], & + minmn, info); + } else if (iascl == 2) { + _starpu_dlascl_("G", &c__0, &c__0, &anrm, &bignum, n, nrhs, &b[b_offset], ldb, + info); + _starpu_dlascl_("G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], & + minmn, info); + } + if (ibscl == 1) { + _starpu_dlascl_("G", &c__0, &c__0, &smlnum, &bnrm, n, nrhs, &b[b_offset], ldb, + info); + } else if (ibscl == 2) { + _starpu_dlascl_("G", &c__0, &c__0, &bignum, &bnrm, n, nrhs, &b[b_offset], ldb, + info); + } + +L70: + work[1] = (doublereal) maxwrk; + return 0; + +/* End of DGELSS */ + +} /* _starpu_dgelss_ */ diff --git a/min-dgels/base/SRC/dgelsx.c b/min-dgels/base/SRC/dgelsx.c new file mode 100644 index 0000000..e56da9f --- /dev/null +++ b/min-dgels/base/SRC/dgelsx.c @@ -0,0 +1,438 @@ +/* dgelsx.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__0 = 0; +static doublereal c_b13 = 0.; +static integer c__2 = 2; +static integer c__1 = 1; +static doublereal c_b36 = 1.; + +/* Subroutine */ int _starpu_dgelsx_(integer *m, integer *n, integer *nrhs, + doublereal *a, integer *lda, doublereal *b, integer *ldb, integer * + jpvt, doublereal *rcond, integer *rank, doublereal *work, integer * + info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2; + doublereal d__1; + + /* Local variables */ + integer i__, j, k; + doublereal c1, c2, s1, s2, t1, t2; + integer mn; + doublereal anrm, bnrm, smin, smax; + integer iascl, ibscl, ismin, ismax; + extern /* Subroutine */ int _starpu_dtrsm_(char *, char *, char *, char *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *), _starpu_dlaic1_( + integer *, integer *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *), _starpu_dorm2r_( + char *, char *, integer *, integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *), _starpu_dlabad_(doublereal *, doublereal *); + extern doublereal _starpu_dlamch_(char *), _starpu_dlange_(char *, integer *, + integer *, doublereal *, integer *, doublereal *); + extern /* Subroutine */ int _starpu_dlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *), _starpu_dgeqpf_(integer *, integer *, + doublereal *, integer *, integer *, doublereal *, doublereal *, + integer *), _starpu_dlaset_(char *, integer *, integer *, doublereal *, + doublereal *, doublereal *, integer *), _starpu_xerbla_(char *, + integer *); + doublereal bignum; + extern /* Subroutine */ int _starpu_dlatzm_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, doublereal *, + integer *, doublereal *); + doublereal sminpr, smaxpr, smlnum; + extern /* Subroutine */ int _starpu_dtzrqf_(integer *, integer *, doublereal *, + integer *, doublereal *, integer *); + + +/* -- LAPACK driver routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* This routine is deprecated and has been replaced by routine DGELSY. */ + +/* DGELSX computes the minimum-norm solution to a real linear least */ +/* squares problem: */ +/* minimize || A * X - B || */ +/* using a complete orthogonal factorization of A. A is an M-by-N */ +/* matrix which may be rank-deficient. */ + +/* Several right hand side vectors b and solution vectors x can be */ +/* handled in a single call; they are stored as the columns of the */ +/* M-by-NRHS right hand side matrix B and the N-by-NRHS solution */ +/* matrix X. */ + +/* The routine first computes a QR factorization with column pivoting: */ +/* A * P = Q * [ R11 R12 ] */ +/* [ 0 R22 ] */ +/* with R11 defined as the largest leading submatrix whose estimated */ +/* condition number is less than 1/RCOND. The order of R11, RANK, */ +/* is the effective rank of A. */ + +/* Then, R22 is considered to be negligible, and R12 is annihilated */ +/* by orthogonal transformations from the right, arriving at the */ +/* complete orthogonal factorization: */ +/* A * P = Q * [ T11 0 ] * Z */ +/* [ 0 0 ] */ +/* The minimum-norm solution is then */ +/* X = P * Z' [ inv(T11)*Q1'*B ] */ +/* [ 0 ] */ +/* where Q1 consists of the first RANK columns of Q. */ + +/* Arguments */ +/* ========= */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix A. M >= 0. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix A. N >= 0. */ + +/* NRHS (input) INTEGER */ +/* The number of right hand sides, i.e., the number of */ +/* columns of matrices B and X. NRHS >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the M-by-N matrix A. */ +/* On exit, A has been overwritten by details of its */ +/* complete orthogonal factorization. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,M). */ + +/* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* On entry, the M-by-NRHS right hand side matrix B. */ +/* On exit, the N-by-NRHS solution matrix X. */ +/* If m >= n and RANK = n, the residual sum-of-squares for */ +/* the solution in the i-th column is given by the sum of */ +/* squares of elements N+1:M in that column. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the array B. LDB >= max(1,M,N). */ + +/* JPVT (input/output) INTEGER array, dimension (N) */ +/* On entry, if JPVT(i) .ne. 0, the i-th column of A is an */ +/* initial column, otherwise it is a free column. Before */ +/* the QR factorization of A, all initial columns are */ +/* permuted to the leading positions; only the remaining */ +/* free columns are moved as a result of column pivoting */ +/* during the factorization. */ +/* On exit, if JPVT(i) = k, then the i-th column of A*P */ +/* was the k-th column of A. */ + +/* RCOND (input) DOUBLE PRECISION */ +/* RCOND is used to determine the effective rank of A, which */ +/* is defined as the order of the largest leading triangular */ +/* submatrix R11 in the QR factorization with pivoting of A, */ +/* whose estimated condition number < 1/RCOND. */ + +/* RANK (output) INTEGER */ +/* The effective rank of A, i.e., the order of the submatrix */ +/* R11. This is the same as the order of the submatrix T11 */ +/* in the complete orthogonal factorization of A. */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension */ +/* (max( min(M,N)+3*N, 2*min(M,N)+NRHS )), */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + --jpvt; + --work; + + /* Function Body */ + mn = min(*m,*n); + ismin = mn + 1; + ismax = (mn << 1) + 1; + +/* Test the input arguments. */ + + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*lda < max(1,*m)) { + *info = -5; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = max(1,*m); + if (*ldb < max(i__1,*n)) { + *info = -7; + } + } + + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DGELSX", &i__1); + return 0; + } + +/* Quick return if possible */ + +/* Computing MIN */ + i__1 = min(*m,*n); + if (min(i__1,*nrhs) == 0) { + *rank = 0; + return 0; + } + +/* Get machine parameters */ + + smlnum = _starpu_dlamch_("S") / _starpu_dlamch_("P"); + bignum = 1. / smlnum; + _starpu_dlabad_(&smlnum, &bignum); + +/* Scale A, B if max elements outside range [SMLNUM,BIGNUM] */ + + anrm = _starpu_dlange_("M", m, n, &a[a_offset], lda, &work[1]); + iascl = 0; + if (anrm > 0. && anrm < smlnum) { + +/* Scale matrix norm up to SMLNUM */ + + _starpu_dlascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, + info); + iascl = 1; + } else if (anrm > bignum) { + +/* Scale matrix norm down to BIGNUM */ + + _starpu_dlascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, + info); + iascl = 2; + } else if (anrm == 0.) { + +/* Matrix all zero. Return zero solution. */ + + i__1 = max(*m,*n); + _starpu_dlaset_("F", &i__1, nrhs, &c_b13, &c_b13, &b[b_offset], ldb); + *rank = 0; + goto L100; + } + + bnrm = _starpu_dlange_("M", m, nrhs, &b[b_offset], ldb, &work[1]); + ibscl = 0; + if (bnrm > 0. && bnrm < smlnum) { + +/* Scale matrix norm up to SMLNUM */ + + _starpu_dlascl_("G", &c__0, &c__0, &bnrm, &smlnum, m, nrhs, &b[b_offset], ldb, + info); + ibscl = 1; + } else if (bnrm > bignum) { + +/* Scale matrix norm down to BIGNUM */ + + _starpu_dlascl_("G", &c__0, &c__0, &bnrm, &bignum, m, nrhs, &b[b_offset], ldb, + info); + ibscl = 2; + } + +/* Compute QR factorization with column pivoting of A: */ +/* A * P = Q * R */ + + _starpu_dgeqpf_(m, n, &a[a_offset], lda, &jpvt[1], &work[1], &work[mn + 1], info); + +/* workspace 3*N. Details of Householder rotations stored */ +/* in WORK(1:MN). */ + +/* Determine RANK using incremental condition estimation */ + + work[ismin] = 1.; + work[ismax] = 1.; + smax = (d__1 = a[a_dim1 + 1], abs(d__1)); + smin = smax; + if ((d__1 = a[a_dim1 + 1], abs(d__1)) == 0.) { + *rank = 0; + i__1 = max(*m,*n); + _starpu_dlaset_("F", &i__1, nrhs, &c_b13, &c_b13, &b[b_offset], ldb); + goto L100; + } else { + *rank = 1; + } + +L10: + if (*rank < mn) { + i__ = *rank + 1; + _starpu_dlaic1_(&c__2, rank, &work[ismin], &smin, &a[i__ * a_dim1 + 1], &a[ + i__ + i__ * a_dim1], &sminpr, &s1, &c1); + _starpu_dlaic1_(&c__1, rank, &work[ismax], &smax, &a[i__ * a_dim1 + 1], &a[ + i__ + i__ * a_dim1], &smaxpr, &s2, &c2); + + if (smaxpr * *rcond <= sminpr) { + i__1 = *rank; + for (i__ = 1; i__ <= i__1; ++i__) { + work[ismin + i__ - 1] = s1 * work[ismin + i__ - 1]; + work[ismax + i__ - 1] = s2 * work[ismax + i__ - 1]; +/* L20: */ + } + work[ismin + *rank] = c1; + work[ismax + *rank] = c2; + smin = sminpr; + smax = smaxpr; + ++(*rank); + goto L10; + } + } + +/* Logically partition R = [ R11 R12 ] */ +/* [ 0 R22 ] */ +/* where R11 = R(1:RANK,1:RANK) */ + +/* [R11,R12] = [ T11, 0 ] * Y */ + + if (*rank < *n) { + _starpu_dtzrqf_(rank, n, &a[a_offset], lda, &work[mn + 1], info); + } + +/* Details of Householder rotations stored in WORK(MN+1:2*MN) */ + +/* B(1:M,1:NRHS) := Q' * B(1:M,1:NRHS) */ + + _starpu_dorm2r_("Left", "Transpose", m, nrhs, &mn, &a[a_offset], lda, &work[1], & + b[b_offset], ldb, &work[(mn << 1) + 1], info); + +/* workspace NRHS */ + +/* B(1:RANK,1:NRHS) := inv(T11) * B(1:RANK,1:NRHS) */ + + _starpu_dtrsm_("Left", "Upper", "No transpose", "Non-unit", rank, nrhs, &c_b36, & + a[a_offset], lda, &b[b_offset], ldb); + + i__1 = *n; + for (i__ = *rank + 1; i__ <= i__1; ++i__) { + i__2 = *nrhs; + for (j = 1; j <= i__2; ++j) { + b[i__ + j * b_dim1] = 0.; +/* L30: */ + } +/* L40: */ + } + +/* B(1:N,1:NRHS) := Y' * B(1:N,1:NRHS) */ + + if (*rank < *n) { + i__1 = *rank; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = *n - *rank + 1; + _starpu_dlatzm_("Left", &i__2, nrhs, &a[i__ + (*rank + 1) * a_dim1], lda, + &work[mn + i__], &b[i__ + b_dim1], &b[*rank + 1 + b_dim1], + ldb, &work[(mn << 1) + 1]); +/* L50: */ + } + } + +/* workspace NRHS */ + +/* B(1:N,1:NRHS) := P * B(1:N,1:NRHS) */ + + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + work[(mn << 1) + i__] = 1.; +/* L60: */ + } + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + if (work[(mn << 1) + i__] == 1.) { + if (jpvt[i__] != i__) { + k = i__; + t1 = b[k + j * b_dim1]; + t2 = b[jpvt[k] + j * b_dim1]; +L70: + b[jpvt[k] + j * b_dim1] = t1; + work[(mn << 1) + k] = 0.; + t1 = t2; + k = jpvt[k]; + t2 = b[jpvt[k] + j * b_dim1]; + if (jpvt[k] != i__) { + goto L70; + } + b[i__ + j * b_dim1] = t1; + work[(mn << 1) + k] = 0.; + } + } +/* L80: */ + } +/* L90: */ + } + +/* Undo scaling */ + + if (iascl == 1) { + _starpu_dlascl_("G", &c__0, &c__0, &anrm, &smlnum, n, nrhs, &b[b_offset], ldb, + info); + _starpu_dlascl_("U", &c__0, &c__0, &smlnum, &anrm, rank, rank, &a[a_offset], + lda, info); + } else if (iascl == 2) { + _starpu_dlascl_("G", &c__0, &c__0, &anrm, &bignum, n, nrhs, &b[b_offset], ldb, + info); + _starpu_dlascl_("U", &c__0, &c__0, &bignum, &anrm, rank, rank, &a[a_offset], + lda, info); + } + if (ibscl == 1) { + _starpu_dlascl_("G", &c__0, &c__0, &smlnum, &bnrm, n, nrhs, &b[b_offset], ldb, + info); + } else if (ibscl == 2) { + _starpu_dlascl_("G", &c__0, &c__0, &bignum, &bnrm, n, nrhs, &b[b_offset], ldb, + info); + } + +L100: + + return 0; + +/* End of DGELSX */ + +} /* _starpu_dgelsx_ */ diff --git a/min-dgels/base/SRC/dgelsy.c b/min-dgels/base/SRC/dgelsy.c new file mode 100644 index 0000000..d33ccd5 --- /dev/null +++ b/min-dgels/base/SRC/dgelsy.c @@ -0,0 +1,495 @@ +/* dgelsy.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; +static integer c__0 = 0; +static doublereal c_b31 = 0.; +static integer c__2 = 2; +static doublereal c_b54 = 1.; + +/* Subroutine */ int _starpu_dgelsy_(integer *m, integer *n, integer *nrhs, + doublereal *a, integer *lda, doublereal *b, integer *ldb, integer * + jpvt, doublereal *rcond, integer *rank, doublereal *work, integer * + lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2; + doublereal d__1, d__2; + + /* Local variables */ + integer i__, j; + doublereal c1, c2, s1, s2; + integer nb, mn, nb1, nb2, nb3, nb4; + doublereal anrm, bnrm, smin, smax; + integer iascl, ibscl; + extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *); + integer ismin, ismax; + extern /* Subroutine */ int _starpu_dtrsm_(char *, char *, char *, char *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *), _starpu_dlaic1_( + integer *, integer *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *); + doublereal wsize; + extern /* Subroutine */ int _starpu_dgeqp3_(integer *, integer *, doublereal *, + integer *, integer *, doublereal *, doublereal *, integer *, + integer *), _starpu_dlabad_(doublereal *, doublereal *); + extern doublereal _starpu_dlamch_(char *), _starpu_dlange_(char *, integer *, + integer *, doublereal *, integer *, doublereal *); + extern /* Subroutine */ int _starpu_dlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *), _starpu_dlaset_(char *, integer *, integer + *, doublereal *, doublereal *, doublereal *, integer *), + _starpu_xerbla_(char *, integer *); + extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *); + doublereal bignum; + integer lwkmin; + extern /* Subroutine */ int _starpu_dormqr_(char *, char *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *); + doublereal sminpr, smaxpr, smlnum; + extern /* Subroutine */ int _starpu_dormrz_(char *, char *, integer *, integer *, + integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, integer *); + integer lwkopt; + logical lquery; + extern /* Subroutine */ int _starpu_dtzrzf_(integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, integer *); + + +/* -- LAPACK driver routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DGELSY computes the minimum-norm solution to a real linear least */ +/* squares problem: */ +/* minimize || A * X - B || */ +/* using a complete orthogonal factorization of A. A is an M-by-N */ +/* matrix which may be rank-deficient. */ + +/* Several right hand side vectors b and solution vectors x can be */ +/* handled in a single call; they are stored as the columns of the */ +/* M-by-NRHS right hand side matrix B and the N-by-NRHS solution */ +/* matrix X. */ + +/* The routine first computes a QR factorization with column pivoting: */ +/* A * P = Q * [ R11 R12 ] */ +/* [ 0 R22 ] */ +/* with R11 defined as the largest leading submatrix whose estimated */ +/* condition number is less than 1/RCOND. The order of R11, RANK, */ +/* is the effective rank of A. */ + +/* Then, R22 is considered to be negligible, and R12 is annihilated */ +/* by orthogonal transformations from the right, arriving at the */ +/* complete orthogonal factorization: */ +/* A * P = Q * [ T11 0 ] * Z */ +/* [ 0 0 ] */ +/* The minimum-norm solution is then */ +/* X = P * Z' [ inv(T11)*Q1'*B ] */ +/* [ 0 ] */ +/* where Q1 consists of the first RANK columns of Q. */ + +/* This routine is basically identical to the original xGELSX except */ +/* three differences: */ +/* o The call to the subroutine xGEQPF has been substituted by the */ +/* the call to the subroutine xGEQP3. This subroutine is a Blas-3 */ +/* version of the QR factorization with column pivoting. */ +/* o Matrix B (the right hand side) is updated with Blas-3. */ +/* o The permutation of matrix B (the right hand side) is faster and */ +/* more simple. */ + +/* Arguments */ +/* ========= */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix A. M >= 0. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix A. N >= 0. */ + +/* NRHS (input) INTEGER */ +/* The number of right hand sides, i.e., the number of */ +/* columns of matrices B and X. NRHS >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the M-by-N matrix A. */ +/* On exit, A has been overwritten by details of its */ +/* complete orthogonal factorization. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,M). */ + +/* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* On entry, the M-by-NRHS right hand side matrix B. */ +/* On exit, the N-by-NRHS solution matrix X. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the array B. LDB >= max(1,M,N). */ + +/* JPVT (input/output) INTEGER array, dimension (N) */ +/* On entry, if JPVT(i) .ne. 0, the i-th column of A is permuted */ +/* to the front of AP, otherwise column i is a free column. */ +/* On exit, if JPVT(i) = k, then the i-th column of AP */ +/* was the k-th column of A. */ + +/* RCOND (input) DOUBLE PRECISION */ +/* RCOND is used to determine the effective rank of A, which */ +/* is defined as the order of the largest leading triangular */ +/* submatrix R11 in the QR factorization with pivoting of A, */ +/* whose estimated condition number < 1/RCOND. */ + +/* RANK (output) INTEGER */ +/* The effective rank of A, i.e., the order of the submatrix */ +/* R11. This is the same as the order of the submatrix T11 */ +/* in the complete orthogonal factorization of A. */ + +/* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ + +/* LWORK (input) INTEGER */ +/* The dimension of the array WORK. */ +/* The unblocked strategy requires that: */ +/* LWORK >= MAX( MN+3*N+1, 2*MN+NRHS ), */ +/* where MN = min( M, N ). */ +/* The block algorithm requires that: */ +/* LWORK >= MAX( MN+2*N+NB*(N+1), 2*MN+NB*NRHS ), */ +/* where NB is an upper bound on the blocksize returned */ +/* by ILAENV for the routines DGEQP3, DTZRZF, STZRQF, DORMQR, */ +/* and DORMRZ. */ + +/* If LWORK = -1, then a workspace query is assumed; the routine */ +/* only calculates the optimal size of the WORK array, returns */ +/* this value as the first entry of the WORK array, and no error */ +/* message related to LWORK is issued by XERBLA. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: If INFO = -i, the i-th argument had an illegal value. */ + +/* Further Details */ +/* =============== */ + +/* Based on contributions by */ +/* A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA */ +/* E. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain */ +/* G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + --jpvt; + --work; + + /* Function Body */ + mn = min(*m,*n); + ismin = mn + 1; + ismax = (mn << 1) + 1; + +/* Test the input arguments. */ + + *info = 0; + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*lda < max(1,*m)) { + *info = -5; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = max(1,*m); + if (*ldb < max(i__1,*n)) { + *info = -7; + } + } + +/* Figure out optimal block size */ + + if (*info == 0) { + if (mn == 0 || *nrhs == 0) { + lwkmin = 1; + lwkopt = 1; + } else { + nb1 = _starpu_ilaenv_(&c__1, "DGEQRF", " ", m, n, &c_n1, &c_n1); + nb2 = _starpu_ilaenv_(&c__1, "DGERQF", " ", m, n, &c_n1, &c_n1); + nb3 = _starpu_ilaenv_(&c__1, "DORMQR", " ", m, n, nrhs, &c_n1); + nb4 = _starpu_ilaenv_(&c__1, "DORMRQ", " ", m, n, nrhs, &c_n1); +/* Computing MAX */ + i__1 = max(nb1,nb2), i__1 = max(i__1,nb3); + nb = max(i__1,nb4); +/* Computing MAX */ + i__1 = mn << 1, i__2 = *n + 1, i__1 = max(i__1,i__2), i__2 = mn + + *nrhs; + lwkmin = mn + max(i__1,i__2); +/* Computing MAX */ + i__1 = lwkmin, i__2 = mn + (*n << 1) + nb * (*n + 1), i__1 = max( + i__1,i__2), i__2 = (mn << 1) + nb * *nrhs; + lwkopt = max(i__1,i__2); + } + work[1] = (doublereal) lwkopt; + + if (*lwork < lwkmin && ! lquery) { + *info = -12; + } + } + + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DGELSY", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (mn == 0 || *nrhs == 0) { + *rank = 0; + return 0; + } + +/* Get machine parameters */ + + smlnum = _starpu_dlamch_("S") / _starpu_dlamch_("P"); + bignum = 1. / smlnum; + _starpu_dlabad_(&smlnum, &bignum); + +/* Scale A, B if max entries outside range [SMLNUM,BIGNUM] */ + + anrm = _starpu_dlange_("M", m, n, &a[a_offset], lda, &work[1]); + iascl = 0; + if (anrm > 0. && anrm < smlnum) { + +/* Scale matrix norm up to SMLNUM */ + + _starpu_dlascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, + info); + iascl = 1; + } else if (anrm > bignum) { + +/* Scale matrix norm down to BIGNUM */ + + _starpu_dlascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, + info); + iascl = 2; + } else if (anrm == 0.) { + +/* Matrix all zero. Return zero solution. */ + + i__1 = max(*m,*n); + _starpu_dlaset_("F", &i__1, nrhs, &c_b31, &c_b31, &b[b_offset], ldb); + *rank = 0; + goto L70; + } + + bnrm = _starpu_dlange_("M", m, nrhs, &b[b_offset], ldb, &work[1]); + ibscl = 0; + if (bnrm > 0. && bnrm < smlnum) { + +/* Scale matrix norm up to SMLNUM */ + + _starpu_dlascl_("G", &c__0, &c__0, &bnrm, &smlnum, m, nrhs, &b[b_offset], ldb, + info); + ibscl = 1; + } else if (bnrm > bignum) { + +/* Scale matrix norm down to BIGNUM */ + + _starpu_dlascl_("G", &c__0, &c__0, &bnrm, &bignum, m, nrhs, &b[b_offset], ldb, + info); + ibscl = 2; + } + +/* Compute QR factorization with column pivoting of A: */ +/* A * P = Q * R */ + + i__1 = *lwork - mn; + _starpu_dgeqp3_(m, n, &a[a_offset], lda, &jpvt[1], &work[1], &work[mn + 1], &i__1, + info); + wsize = mn + work[mn + 1]; + +/* workspace: MN+2*N+NB*(N+1). */ +/* Details of Householder rotations stored in WORK(1:MN). */ + +/* Determine RANK using incremental condition estimation */ + + work[ismin] = 1.; + work[ismax] = 1.; + smax = (d__1 = a[a_dim1 + 1], abs(d__1)); + smin = smax; + if ((d__1 = a[a_dim1 + 1], abs(d__1)) == 0.) { + *rank = 0; + i__1 = max(*m,*n); + _starpu_dlaset_("F", &i__1, nrhs, &c_b31, &c_b31, &b[b_offset], ldb); + goto L70; + } else { + *rank = 1; + } + +L10: + if (*rank < mn) { + i__ = *rank + 1; + _starpu_dlaic1_(&c__2, rank, &work[ismin], &smin, &a[i__ * a_dim1 + 1], &a[ + i__ + i__ * a_dim1], &sminpr, &s1, &c1); + _starpu_dlaic1_(&c__1, rank, &work[ismax], &smax, &a[i__ * a_dim1 + 1], &a[ + i__ + i__ * a_dim1], &smaxpr, &s2, &c2); + + if (smaxpr * *rcond <= sminpr) { + i__1 = *rank; + for (i__ = 1; i__ <= i__1; ++i__) { + work[ismin + i__ - 1] = s1 * work[ismin + i__ - 1]; + work[ismax + i__ - 1] = s2 * work[ismax + i__ - 1]; +/* L20: */ + } + work[ismin + *rank] = c1; + work[ismax + *rank] = c2; + smin = sminpr; + smax = smaxpr; + ++(*rank); + goto L10; + } + } + +/* workspace: 3*MN. */ + +/* Logically partition R = [ R11 R12 ] */ +/* [ 0 R22 ] */ +/* where R11 = R(1:RANK,1:RANK) */ + +/* [R11,R12] = [ T11, 0 ] * Y */ + + if (*rank < *n) { + i__1 = *lwork - (mn << 1); + _starpu_dtzrzf_(rank, n, &a[a_offset], lda, &work[mn + 1], &work[(mn << 1) + + 1], &i__1, info); + } + +/* workspace: 2*MN. */ +/* Details of Householder rotations stored in WORK(MN+1:2*MN) */ + +/* B(1:M,1:NRHS) := Q' * B(1:M,1:NRHS) */ + + i__1 = *lwork - (mn << 1); + _starpu_dormqr_("Left", "Transpose", m, nrhs, &mn, &a[a_offset], lda, &work[1], & + b[b_offset], ldb, &work[(mn << 1) + 1], &i__1, info); +/* Computing MAX */ + d__1 = wsize, d__2 = (mn << 1) + work[(mn << 1) + 1]; + wsize = max(d__1,d__2); + +/* workspace: 2*MN+NB*NRHS. */ + +/* B(1:RANK,1:NRHS) := inv(T11) * B(1:RANK,1:NRHS) */ + + _starpu_dtrsm_("Left", "Upper", "No transpose", "Non-unit", rank, nrhs, &c_b54, & + a[a_offset], lda, &b[b_offset], ldb); + + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = *rank + 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = 0.; +/* L30: */ + } +/* L40: */ + } + +/* B(1:N,1:NRHS) := Y' * B(1:N,1:NRHS) */ + + if (*rank < *n) { + i__1 = *n - *rank; + i__2 = *lwork - (mn << 1); + _starpu_dormrz_("Left", "Transpose", n, nrhs, rank, &i__1, &a[a_offset], lda, + &work[mn + 1], &b[b_offset], ldb, &work[(mn << 1) + 1], &i__2, + info); + } + +/* workspace: 2*MN+NRHS. */ + +/* B(1:N,1:NRHS) := P * B(1:N,1:NRHS) */ + + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + work[jpvt[i__]] = b[i__ + j * b_dim1]; +/* L50: */ + } + _starpu_dcopy_(n, &work[1], &c__1, &b[j * b_dim1 + 1], &c__1); +/* L60: */ + } + +/* workspace: N. */ + +/* Undo scaling */ + + if (iascl == 1) { + _starpu_dlascl_("G", &c__0, &c__0, &anrm, &smlnum, n, nrhs, &b[b_offset], ldb, + info); + _starpu_dlascl_("U", &c__0, &c__0, &smlnum, &anrm, rank, rank, &a[a_offset], + lda, info); + } else if (iascl == 2) { + _starpu_dlascl_("G", &c__0, &c__0, &anrm, &bignum, n, nrhs, &b[b_offset], ldb, + info); + _starpu_dlascl_("U", &c__0, &c__0, &bignum, &anrm, rank, rank, &a[a_offset], + lda, info); + } + if (ibscl == 1) { + _starpu_dlascl_("G", &c__0, &c__0, &smlnum, &bnrm, n, nrhs, &b[b_offset], ldb, + info); + } else if (ibscl == 2) { + _starpu_dlascl_("G", &c__0, &c__0, &bignum, &bnrm, n, nrhs, &b[b_offset], ldb, + info); + } + +L70: + work[1] = (doublereal) lwkopt; + + return 0; + +/* End of DGELSY */ + +} /* _starpu_dgelsy_ */ diff --git a/min-dgels/base/SRC/dgeql2.c b/min-dgels/base/SRC/dgeql2.c new file mode 100644 index 0000000..c35416c --- /dev/null +++ b/min-dgels/base/SRC/dgeql2.c @@ -0,0 +1,159 @@ +/* dgeql2.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +/* Subroutine */ int _starpu_dgeql2_(integer *m, integer *n, doublereal *a, integer * + lda, doublereal *tau, doublereal *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + + /* Local variables */ + integer i__, k; + doublereal aii; + extern /* Subroutine */ int _starpu_dlarf_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + doublereal *), _starpu_dlarfp_(integer *, doublereal *, + doublereal *, integer *, doublereal *), _starpu_xerbla_(char *, integer *); + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DGEQL2 computes a QL factorization of a real m by n matrix A: */ +/* A = Q * L. */ + +/* Arguments */ +/* ========= */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix A. M >= 0. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix A. N >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the m by n matrix A. */ +/* On exit, if m >= n, the lower triangle of the subarray */ +/* A(m-n+1:m,1:n) contains the n by n lower triangular matrix L; */ +/* if m <= n, the elements on and below the (n-m)-th */ +/* superdiagonal contain the m by n lower trapezoidal matrix L; */ +/* the remaining elements, with the array TAU, represent the */ +/* orthogonal matrix Q as a product of elementary reflectors */ +/* (see Further Details). */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,M). */ + +/* TAU (output) DOUBLE PRECISION array, dimension (min(M,N)) */ +/* The scalar factors of the elementary reflectors (see Further */ +/* Details). */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (N) */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ + +/* Further Details */ +/* =============== */ + +/* The matrix Q is represented as a product of elementary reflectors */ + +/* Q = H(k) . . . H(2) H(1), where k = min(m,n). */ + +/* Each H(i) has the form */ + +/* H(i) = I - tau * v * v' */ + +/* where tau is a real scalar, and v is a real vector with */ +/* v(m-k+i+1:m) = 0 and v(m-k+i) = 1; v(1:m-k+i-1) is stored on exit in */ +/* A(1:m-k+i-1,n-k+i), and tau in TAU(i). */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DGEQL2", &i__1); + return 0; + } + + k = min(*m,*n); + + for (i__ = k; i__ >= 1; --i__) { + +/* Generate elementary reflector H(i) to annihilate */ +/* A(1:m-k+i-1,n-k+i) */ + + i__1 = *m - k + i__; + _starpu_dlarfp_(&i__1, &a[*m - k + i__ + (*n - k + i__) * a_dim1], &a[(*n - k + + i__) * a_dim1 + 1], &c__1, &tau[i__]); + +/* Apply H(i) to A(1:m-k+i,1:n-k+i-1) from the left */ + + aii = a[*m - k + i__ + (*n - k + i__) * a_dim1]; + a[*m - k + i__ + (*n - k + i__) * a_dim1] = 1.; + i__1 = *m - k + i__; + i__2 = *n - k + i__ - 1; + _starpu_dlarf_("Left", &i__1, &i__2, &a[(*n - k + i__) * a_dim1 + 1], &c__1, & + tau[i__], &a[a_offset], lda, &work[1]); + a[*m - k + i__ + (*n - k + i__) * a_dim1] = aii; +/* L10: */ + } + return 0; + +/* End of DGEQL2 */ + +} /* _starpu_dgeql2_ */ diff --git a/min-dgels/base/SRC/dgeqlf.c b/min-dgels/base/SRC/dgeqlf.c new file mode 100644 index 0000000..741f159 --- /dev/null +++ b/min-dgels/base/SRC/dgeqlf.c @@ -0,0 +1,270 @@ +/* dgeqlf.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; +static integer c__3 = 3; +static integer c__2 = 2; + +/* Subroutine */ int _starpu_dgeqlf_(integer *m, integer *n, doublereal *a, integer * + lda, doublereal *tau, doublereal *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + integer i__, k, ib, nb, ki, kk, mu, nu, nx, iws, nbmin, iinfo; + extern /* Subroutine */ int _starpu_dgeql2_(integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *), _starpu_dlarfb_(char *, + char *, char *, char *, integer *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *, doublereal *, integer *), _starpu_dlarft_(char *, char *, integer *, integer *, doublereal + *, integer *, doublereal *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); + extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *); + integer ldwork, lwkopt; + logical lquery; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DGEQLF computes a QL factorization of a real M-by-N matrix A: */ +/* A = Q * L. */ + +/* Arguments */ +/* ========= */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix A. M >= 0. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix A. N >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the M-by-N matrix A. */ +/* On exit, */ +/* if m >= n, the lower triangle of the subarray */ +/* A(m-n+1:m,1:n) contains the N-by-N lower triangular matrix L; */ +/* if m <= n, the elements on and below the (n-m)-th */ +/* superdiagonal contain the M-by-N lower trapezoidal matrix L; */ +/* the remaining elements, with the array TAU, represent the */ +/* orthogonal matrix Q as a product of elementary reflectors */ +/* (see Further Details). */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,M). */ + +/* TAU (output) DOUBLE PRECISION array, dimension (min(M,N)) */ +/* The scalar factors of the elementary reflectors (see Further */ +/* Details). */ + +/* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ + +/* LWORK (input) INTEGER */ +/* The dimension of the array WORK. LWORK >= max(1,N). */ +/* For optimum performance LWORK >= N*NB, where NB is the */ +/* optimal blocksize. */ + +/* If LWORK = -1, then a workspace query is assumed; the routine */ +/* only calculates the optimal size of the WORK array, returns */ +/* this value as the first entry of the WORK array, and no error */ +/* message related to LWORK is issued by XERBLA. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ + +/* Further Details */ +/* =============== */ + +/* The matrix Q is represented as a product of elementary reflectors */ + +/* Q = H(k) . . . H(2) H(1), where k = min(m,n). */ + +/* Each H(i) has the form */ + +/* H(i) = I - tau * v * v' */ + +/* where tau is a real scalar, and v is a real vector with */ +/* v(m-k+i+1:m) = 0 and v(m-k+i) = 1; v(1:m-k+i-1) is stored on exit in */ +/* A(1:m-k+i-1,n-k+i), and tau in TAU(i). */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } + + if (*info == 0) { + k = min(*m,*n); + if (k == 0) { + lwkopt = 1; + } else { + nb = _starpu_ilaenv_(&c__1, "DGEQLF", " ", m, n, &c_n1, &c_n1); + lwkopt = *n * nb; + } + work[1] = (doublereal) lwkopt; + + if (*lwork < max(1,*n) && ! lquery) { + *info = -7; + } + } + + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DGEQLF", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (k == 0) { + return 0; + } + + nbmin = 2; + nx = 1; + iws = *n; + if (nb > 1 && nb < k) { + +/* Determine when to cross over from blocked to unblocked code. */ + +/* Computing MAX */ + i__1 = 0, i__2 = _starpu_ilaenv_(&c__3, "DGEQLF", " ", m, n, &c_n1, &c_n1); + nx = max(i__1,i__2); + if (nx < k) { + +/* Determine if workspace is large enough for blocked code. */ + + ldwork = *n; + iws = ldwork * nb; + if (*lwork < iws) { + +/* Not enough workspace to use optimal NB: reduce NB and */ +/* determine the minimum value of NB. */ + + nb = *lwork / ldwork; +/* Computing MAX */ + i__1 = 2, i__2 = _starpu_ilaenv_(&c__2, "DGEQLF", " ", m, n, &c_n1, & + c_n1); + nbmin = max(i__1,i__2); + } + } + } + + if (nb >= nbmin && nb < k && nx < k) { + +/* Use blocked code initially. */ +/* The last kk columns are handled by the block method. */ + + ki = (k - nx - 1) / nb * nb; +/* Computing MIN */ + i__1 = k, i__2 = ki + nb; + kk = min(i__1,i__2); + + i__1 = k - kk + 1; + i__2 = -nb; + for (i__ = k - kk + ki + 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ + += i__2) { +/* Computing MIN */ + i__3 = k - i__ + 1; + ib = min(i__3,nb); + +/* Compute the QL factorization of the current block */ +/* A(1:m-k+i+ib-1,n-k+i:n-k+i+ib-1) */ + + i__3 = *m - k + i__ + ib - 1; + _starpu_dgeql2_(&i__3, &ib, &a[(*n - k + i__) * a_dim1 + 1], lda, &tau[ + i__], &work[1], &iinfo); + if (*n - k + i__ > 1) { + +/* Form the triangular factor of the block reflector */ +/* H = H(i+ib-1) . . . H(i+1) H(i) */ + + i__3 = *m - k + i__ + ib - 1; + _starpu_dlarft_("Backward", "Columnwise", &i__3, &ib, &a[(*n - k + + i__) * a_dim1 + 1], lda, &tau[i__], &work[1], &ldwork); + +/* Apply H' to A(1:m-k+i+ib-1,1:n-k+i-1) from the left */ + + i__3 = *m - k + i__ + ib - 1; + i__4 = *n - k + i__ - 1; + _starpu_dlarfb_("Left", "Transpose", "Backward", "Columnwise", &i__3, + &i__4, &ib, &a[(*n - k + i__) * a_dim1 + 1], lda, & + work[1], &ldwork, &a[a_offset], lda, &work[ib + 1], & + ldwork); + } +/* L10: */ + } + mu = *m - k + i__ + nb - 1; + nu = *n - k + i__ + nb - 1; + } else { + mu = *m; + nu = *n; + } + +/* Use unblocked code to factor the last or only block */ + + if (mu > 0 && nu > 0) { + _starpu_dgeql2_(&mu, &nu, &a[a_offset], lda, &tau[1], &work[1], &iinfo); + } + + work[1] = (doublereal) iws; + return 0; + +/* End of DGEQLF */ + +} /* _starpu_dgeqlf_ */ diff --git a/min-dgels/base/SRC/dgeqp3.c b/min-dgels/base/SRC/dgeqp3.c new file mode 100644 index 0000000..608d7f3 --- /dev/null +++ b/min-dgels/base/SRC/dgeqp3.c @@ -0,0 +1,358 @@ +/* dgeqp3.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; +static integer c__3 = 3; +static integer c__2 = 2; + +/* Subroutine */ int _starpu_dgeqp3_(integer *m, integer *n, doublereal *a, integer * + lda, integer *jpvt, doublereal *tau, doublereal *work, integer *lwork, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + integer j, jb, na, nb, sm, sn, nx, fjb, iws, nfxd; + extern doublereal _starpu_dnrm2_(integer *, doublereal *, integer *); + integer nbmin, minmn; + extern /* Subroutine */ int _starpu_dswap_(integer *, doublereal *, integer *, + doublereal *, integer *); + integer minws; + extern /* Subroutine */ int _starpu_dlaqp2_(integer *, integer *, integer *, + doublereal *, integer *, integer *, doublereal *, doublereal *, + doublereal *, doublereal *), _starpu_dgeqrf_(integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + integer *), _starpu_xerbla_(char *, integer *); + extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *); + extern /* Subroutine */ int _starpu_dlaqps_(integer *, integer *, integer *, + integer *, integer *, doublereal *, integer *, integer *, + doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, integer *); + integer topbmn, sminmn; + extern /* Subroutine */ int _starpu_dormqr_(char *, char *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *); + integer lwkopt; + logical lquery; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DGEQP3 computes a QR factorization with column pivoting of a */ +/* matrix A: A*P = Q*R using Level 3 BLAS. */ + +/* Arguments */ +/* ========= */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix A. M >= 0. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix A. N >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the M-by-N matrix A. */ +/* On exit, the upper triangle of the array contains the */ +/* min(M,N)-by-N upper trapezoidal matrix R; the elements below */ +/* the diagonal, together with the array TAU, represent the */ +/* orthogonal matrix Q as a product of min(M,N) elementary */ +/* reflectors. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,M). */ + +/* JPVT (input/output) INTEGER array, dimension (N) */ +/* On entry, if JPVT(J).ne.0, the J-th column of A is permuted */ +/* to the front of A*P (a leading column); if JPVT(J)=0, */ +/* the J-th column of A is a free column. */ +/* On exit, if JPVT(J)=K, then the J-th column of A*P was the */ +/* the K-th column of A. */ + +/* TAU (output) DOUBLE PRECISION array, dimension (min(M,N)) */ +/* The scalar factors of the elementary reflectors. */ + +/* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* On exit, if INFO=0, WORK(1) returns the optimal LWORK. */ + +/* LWORK (input) INTEGER */ +/* The dimension of the array WORK. LWORK >= 3*N+1. */ +/* For optimal performance LWORK >= 2*N+( N+1 )*NB, where NB */ +/* is the optimal blocksize. */ + +/* If LWORK = -1, then a workspace query is assumed; the routine */ +/* only calculates the optimal size of the WORK array, returns */ +/* this value as the first entry of the WORK array, and no error */ +/* message related to LWORK is issued by XERBLA. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit. */ +/* < 0: if INFO = -i, the i-th argument had an illegal value. */ + +/* Further Details */ +/* =============== */ + +/* The matrix Q is represented as a product of elementary reflectors */ + +/* Q = H(1) H(2) . . . H(k), where k = min(m,n). */ + +/* Each H(i) has the form */ + +/* H(i) = I - tau * v * v' */ + +/* where tau is a real/complex scalar, and v is a real/complex vector */ +/* with v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in */ +/* A(i+1:m,i), and tau in TAU(i). */ + +/* Based on contributions by */ +/* G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain */ +/* X. Sun, Computer Science Dept., Duke University, USA */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test input arguments */ +/* ==================== */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --jpvt; + --tau; + --work; + + /* Function Body */ + *info = 0; + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } + + if (*info == 0) { + minmn = min(*m,*n); + if (minmn == 0) { + iws = 1; + lwkopt = 1; + } else { + iws = *n * 3 + 1; + nb = _starpu_ilaenv_(&c__1, "DGEQRF", " ", m, n, &c_n1, &c_n1); + lwkopt = (*n << 1) + (*n + 1) * nb; + } + work[1] = (doublereal) lwkopt; + + if (*lwork < iws && ! lquery) { + *info = -8; + } + } + + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DGEQP3", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible. */ + + if (minmn == 0) { + return 0; + } + +/* Move initial columns up front. */ + + nfxd = 1; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (jpvt[j] != 0) { + if (j != nfxd) { + _starpu_dswap_(m, &a[j * a_dim1 + 1], &c__1, &a[nfxd * a_dim1 + 1], & + c__1); + jpvt[j] = jpvt[nfxd]; + jpvt[nfxd] = j; + } else { + jpvt[j] = j; + } + ++nfxd; + } else { + jpvt[j] = j; + } +/* L10: */ + } + --nfxd; + +/* Factorize fixed columns */ +/* ======================= */ + +/* Compute the QR factorization of fixed columns and update */ +/* remaining columns. */ + + if (nfxd > 0) { + na = min(*m,nfxd); +/* CC CALL DGEQR2( M, NA, A, LDA, TAU, WORK, INFO ) */ + _starpu_dgeqrf_(m, &na, &a[a_offset], lda, &tau[1], &work[1], lwork, info); +/* Computing MAX */ + i__1 = iws, i__2 = (integer) work[1]; + iws = max(i__1,i__2); + if (na < *n) { +/* CC CALL DORM2R( 'Left', 'Transpose', M, N-NA, NA, A, LDA, */ +/* CC $ TAU, A( 1, NA+1 ), LDA, WORK, INFO ) */ + i__1 = *n - na; + _starpu_dormqr_("Left", "Transpose", m, &i__1, &na, &a[a_offset], lda, & + tau[1], &a[(na + 1) * a_dim1 + 1], lda, &work[1], lwork, + info); +/* Computing MAX */ + i__1 = iws, i__2 = (integer) work[1]; + iws = max(i__1,i__2); + } + } + +/* Factorize free columns */ +/* ====================== */ + + if (nfxd < minmn) { + + sm = *m - nfxd; + sn = *n - nfxd; + sminmn = minmn - nfxd; + +/* Determine the block size. */ + + nb = _starpu_ilaenv_(&c__1, "DGEQRF", " ", &sm, &sn, &c_n1, &c_n1); + nbmin = 2; + nx = 0; + + if (nb > 1 && nb < sminmn) { + +/* Determine when to cross over from blocked to unblocked code. */ + +/* Computing MAX */ + i__1 = 0, i__2 = _starpu_ilaenv_(&c__3, "DGEQRF", " ", &sm, &sn, &c_n1, & + c_n1); + nx = max(i__1,i__2); + + + if (nx < sminmn) { + +/* Determine if workspace is large enough for blocked code. */ + + minws = (sn << 1) + (sn + 1) * nb; + iws = max(iws,minws); + if (*lwork < minws) { + +/* Not enough workspace to use optimal NB: Reduce NB and */ +/* determine the minimum value of NB. */ + + nb = (*lwork - (sn << 1)) / (sn + 1); +/* Computing MAX */ + i__1 = 2, i__2 = _starpu_ilaenv_(&c__2, "DGEQRF", " ", &sm, &sn, & + c_n1, &c_n1); + nbmin = max(i__1,i__2); + + + } + } + } + +/* Initialize partial column norms. The first N elements of work */ +/* store the exact column norms. */ + + i__1 = *n; + for (j = nfxd + 1; j <= i__1; ++j) { + work[j] = _starpu_dnrm2_(&sm, &a[nfxd + 1 + j * a_dim1], &c__1); + work[*n + j] = work[j]; +/* L20: */ + } + + if (nb >= nbmin && nb < sminmn && nx < sminmn) { + +/* Use blocked code initially. */ + + j = nfxd + 1; + +/* Compute factorization: while loop. */ + + + topbmn = minmn - nx; +L30: + if (j <= topbmn) { +/* Computing MIN */ + i__1 = nb, i__2 = topbmn - j + 1; + jb = min(i__1,i__2); + +/* Factorize JB columns among columns J:N. */ + + i__1 = *n - j + 1; + i__2 = j - 1; + i__3 = *n - j + 1; + _starpu_dlaqps_(m, &i__1, &i__2, &jb, &fjb, &a[j * a_dim1 + 1], lda, & + jpvt[j], &tau[j], &work[j], &work[*n + j], &work[(*n + << 1) + 1], &work[(*n << 1) + jb + 1], &i__3); + + j += fjb; + goto L30; + } + } else { + j = nfxd + 1; + } + +/* Use unblocked code to factor the last or only block. */ + + + if (j <= minmn) { + i__1 = *n - j + 1; + i__2 = j - 1; + _starpu_dlaqp2_(m, &i__1, &i__2, &a[j * a_dim1 + 1], lda, &jpvt[j], &tau[ + j], &work[j], &work[*n + j], &work[(*n << 1) + 1]); + } + + } + + work[1] = (doublereal) iws; + return 0; + +/* End of DGEQP3 */ + +} /* _starpu_dgeqp3_ */ diff --git a/min-dgels/base/SRC/dgeqpf.c b/min-dgels/base/SRC/dgeqpf.c new file mode 100644 index 0000000..30ce0ba --- /dev/null +++ b/min-dgels/base/SRC/dgeqpf.c @@ -0,0 +1,304 @@ +/* dgeqpf.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +/* Subroutine */ int _starpu_dgeqpf_(integer *m, integer *n, doublereal *a, integer * + lda, integer *jpvt, doublereal *tau, doublereal *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + doublereal d__1, d__2; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + integer i__, j, ma, mn; + doublereal aii; + integer pvt; + doublereal temp; + extern doublereal _starpu_dnrm2_(integer *, doublereal *, integer *); + doublereal temp2, tol3z; + extern /* Subroutine */ int _starpu_dlarf_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + doublereal *); + integer itemp; + extern /* Subroutine */ int _starpu_dswap_(integer *, doublereal *, integer *, + doublereal *, integer *), _starpu_dgeqr2_(integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *), + _starpu_dorm2r_(char *, char *, integer *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *); + extern doublereal _starpu_dlamch_(char *); + extern integer _starpu_idamax_(integer *, doublereal *, integer *); + extern /* Subroutine */ int _starpu_dlarfp_(integer *, doublereal *, doublereal *, + integer *, doublereal *), _starpu_xerbla_(char *, integer *); + + +/* -- LAPACK deprecated driver routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* This routine is deprecated and has been replaced by routine DGEQP3. */ + +/* DGEQPF computes a QR factorization with column pivoting of a */ +/* real M-by-N matrix A: A*P = Q*R. */ + +/* Arguments */ +/* ========= */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix A. M >= 0. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix A. N >= 0 */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the M-by-N matrix A. */ +/* On exit, the upper triangle of the array contains the */ +/* min(M,N)-by-N upper triangular matrix R; the elements */ +/* below the diagonal, together with the array TAU, */ +/* represent the orthogonal matrix Q as a product of */ +/* min(m,n) elementary reflectors. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,M). */ + +/* JPVT (input/output) INTEGER array, dimension (N) */ +/* On entry, if JPVT(i) .ne. 0, the i-th column of A is permuted */ +/* to the front of A*P (a leading column); if JPVT(i) = 0, */ +/* the i-th column of A is a free column. */ +/* On exit, if JPVT(i) = k, then the i-th column of A*P */ +/* was the k-th column of A. */ + +/* TAU (output) DOUBLE PRECISION array, dimension (min(M,N)) */ +/* The scalar factors of the elementary reflectors. */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (3*N) */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ + +/* Further Details */ +/* =============== */ + +/* The matrix Q is represented as a product of elementary reflectors */ + +/* Q = H(1) H(2) . . . H(n) */ + +/* Each H(i) has the form */ + +/* H = I - tau * v * v' */ + +/* where tau is a real scalar, and v is a real vector with */ +/* v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i). */ + +/* The matrix P is represented in jpvt as follows: If */ +/* jpvt(j) = i */ +/* then the jth column of P is the ith canonical unit vector. */ + +/* Partial column norm updating strategy modified by */ +/* Z. Drmac and Z. Bujanovic, Dept. of Mathematics, */ +/* University of Zagreb, Croatia. */ +/* June 2006. */ +/* For more details see LAPACK Working Note 176. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --jpvt; + --tau; + --work; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DGEQPF", &i__1); + return 0; + } + + mn = min(*m,*n); + tol3z = sqrt(_starpu_dlamch_("Epsilon")); + +/* Move initial columns up front */ + + itemp = 1; + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + if (jpvt[i__] != 0) { + if (i__ != itemp) { + _starpu_dswap_(m, &a[i__ * a_dim1 + 1], &c__1, &a[itemp * a_dim1 + 1], + &c__1); + jpvt[i__] = jpvt[itemp]; + jpvt[itemp] = i__; + } else { + jpvt[i__] = i__; + } + ++itemp; + } else { + jpvt[i__] = i__; + } +/* L10: */ + } + --itemp; + +/* Compute the QR factorization and update remaining columns */ + + if (itemp > 0) { + ma = min(itemp,*m); + _starpu_dgeqr2_(m, &ma, &a[a_offset], lda, &tau[1], &work[1], info); + if (ma < *n) { + i__1 = *n - ma; + _starpu_dorm2r_("Left", "Transpose", m, &i__1, &ma, &a[a_offset], lda, & + tau[1], &a[(ma + 1) * a_dim1 + 1], lda, &work[1], info); + } + } + + if (itemp < mn) { + +/* Initialize partial column norms. The first n elements of */ +/* work store the exact column norms. */ + + i__1 = *n; + for (i__ = itemp + 1; i__ <= i__1; ++i__) { + i__2 = *m - itemp; + work[i__] = _starpu_dnrm2_(&i__2, &a[itemp + 1 + i__ * a_dim1], &c__1); + work[*n + i__] = work[i__]; +/* L20: */ + } + +/* Compute factorization */ + + i__1 = mn; + for (i__ = itemp + 1; i__ <= i__1; ++i__) { + +/* Determine ith pivot column and swap if necessary */ + + i__2 = *n - i__ + 1; + pvt = i__ - 1 + _starpu_idamax_(&i__2, &work[i__], &c__1); + + if (pvt != i__) { + _starpu_dswap_(m, &a[pvt * a_dim1 + 1], &c__1, &a[i__ * a_dim1 + 1], & + c__1); + itemp = jpvt[pvt]; + jpvt[pvt] = jpvt[i__]; + jpvt[i__] = itemp; + work[pvt] = work[i__]; + work[*n + pvt] = work[*n + i__]; + } + +/* Generate elementary reflector H(i) */ + + if (i__ < *m) { + i__2 = *m - i__ + 1; + _starpu_dlarfp_(&i__2, &a[i__ + i__ * a_dim1], &a[i__ + 1 + i__ * + a_dim1], &c__1, &tau[i__]); + } else { + _starpu_dlarfp_(&c__1, &a[*m + *m * a_dim1], &a[*m + *m * a_dim1], & + c__1, &tau[*m]); + } + + if (i__ < *n) { + +/* Apply H(i) to A(i:m,i+1:n) from the left */ + + aii = a[i__ + i__ * a_dim1]; + a[i__ + i__ * a_dim1] = 1.; + i__2 = *m - i__ + 1; + i__3 = *n - i__; + _starpu_dlarf_("LEFT", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, & + tau[i__], &a[i__ + (i__ + 1) * a_dim1], lda, &work[(* + n << 1) + 1]); + a[i__ + i__ * a_dim1] = aii; + } + +/* Update partial column norms */ + + i__2 = *n; + for (j = i__ + 1; j <= i__2; ++j) { + if (work[j] != 0.) { + +/* NOTE: The following 4 lines follow from the analysis in */ +/* Lapack Working Note 176. */ + + temp = (d__1 = a[i__ + j * a_dim1], abs(d__1)) / work[j]; +/* Computing MAX */ + d__1 = 0., d__2 = (temp + 1.) * (1. - temp); + temp = max(d__1,d__2); +/* Computing 2nd power */ + d__1 = work[j] / work[*n + j]; + temp2 = temp * (d__1 * d__1); + if (temp2 <= tol3z) { + if (*m - i__ > 0) { + i__3 = *m - i__; + work[j] = _starpu_dnrm2_(&i__3, &a[i__ + 1 + j * a_dim1], + &c__1); + work[*n + j] = work[j]; + } else { + work[j] = 0.; + work[*n + j] = 0.; + } + } else { + work[j] *= sqrt(temp); + } + } +/* L30: */ + } + +/* L40: */ + } + } + return 0; + +/* End of DGEQPF */ + +} /* _starpu_dgeqpf_ */ diff --git a/min-dgels/base/SRC/dgeqr2.c b/min-dgels/base/SRC/dgeqr2.c new file mode 100644 index 0000000..feb9504 --- /dev/null +++ b/min-dgels/base/SRC/dgeqr2.c @@ -0,0 +1,161 @@ +/* dgeqr2.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +/* Subroutine */ int _starpu_dgeqr2_(integer *m, integer *n, doublereal *a, integer * + lda, doublereal *tau, doublereal *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + integer i__, k; + doublereal aii; + extern /* Subroutine */ int _starpu_dlarf_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + doublereal *), _starpu_dlarfp_(integer *, doublereal *, + doublereal *, integer *, doublereal *), _starpu_xerbla_(char *, integer *); + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DGEQR2 computes a QR factorization of a real m by n matrix A: */ +/* A = Q * R. */ + +/* Arguments */ +/* ========= */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix A. M >= 0. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix A. N >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the m by n matrix A. */ +/* On exit, the elements on and above the diagonal of the array */ +/* contain the min(m,n) by n upper trapezoidal matrix R (R is */ +/* upper triangular if m >= n); the elements below the diagonal, */ +/* with the array TAU, represent the orthogonal matrix Q as a */ +/* product of elementary reflectors (see Further Details). */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,M). */ + +/* TAU (output) DOUBLE PRECISION array, dimension (min(M,N)) */ +/* The scalar factors of the elementary reflectors (see Further */ +/* Details). */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (N) */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ + +/* Further Details */ +/* =============== */ + +/* The matrix Q is represented as a product of elementary reflectors */ + +/* Q = H(1) H(2) . . . H(k), where k = min(m,n). */ + +/* Each H(i) has the form */ + +/* H(i) = I - tau * v * v' */ + +/* where tau is a real scalar, and v is a real vector with */ +/* v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), */ +/* and tau in TAU(i). */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DGEQR2", &i__1); + return 0; + } + + k = min(*m,*n); + + i__1 = k; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Generate elementary reflector H(i) to annihilate A(i+1:m,i) */ + + i__2 = *m - i__ + 1; +/* Computing MIN */ + i__3 = i__ + 1; + _starpu_dlarfp_(&i__2, &a[i__ + i__ * a_dim1], &a[min(i__3, *m)+ i__ * a_dim1] +, &c__1, &tau[i__]); + if (i__ < *n) { + +/* Apply H(i) to A(i:m,i+1:n) from the left */ + + aii = a[i__ + i__ * a_dim1]; + a[i__ + i__ * a_dim1] = 1.; + i__2 = *m - i__ + 1; + i__3 = *n - i__; + _starpu_dlarf_("Left", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, &tau[ + i__], &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]); + a[i__ + i__ * a_dim1] = aii; + } +/* L10: */ + } + return 0; + +/* End of DGEQR2 */ + +} /* _starpu_dgeqr2_ */ diff --git a/min-dgels/base/SRC/dgeqrf.c b/min-dgels/base/SRC/dgeqrf.c new file mode 100644 index 0000000..cdd83e5 --- /dev/null +++ b/min-dgels/base/SRC/dgeqrf.c @@ -0,0 +1,252 @@ +/* dgeqrf.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; +static integer c__3 = 3; +static integer c__2 = 2; + +/* Subroutine */ int _starpu_dgeqrf_(integer *m, integer *n, doublereal *a, integer * + lda, doublereal *tau, doublereal *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + integer i__, k, ib, nb, nx, iws, nbmin, iinfo; + extern /* Subroutine */ int _starpu_dgeqr2_(integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *), _starpu_dlarfb_(char *, + char *, char *, char *, integer *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *, doublereal *, integer *), _starpu_dlarft_(char *, char *, integer *, integer *, doublereal + *, integer *, doublereal *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); + extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *); + integer ldwork, lwkopt; + logical lquery; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DGEQRF computes a QR factorization of a real M-by-N matrix A: */ +/* A = Q * R. */ + +/* Arguments */ +/* ========= */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix A. M >= 0. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix A. N >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the M-by-N matrix A. */ +/* On exit, the elements on and above the diagonal of the array */ +/* contain the min(M,N)-by-N upper trapezoidal matrix R (R is */ +/* upper triangular if m >= n); the elements below the diagonal, */ +/* with the array TAU, represent the orthogonal matrix Q as a */ +/* product of min(m,n) elementary reflectors (see Further */ +/* Details). */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,M). */ + +/* TAU (output) DOUBLE PRECISION array, dimension (min(M,N)) */ +/* The scalar factors of the elementary reflectors (see Further */ +/* Details). */ + +/* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ + +/* LWORK (input) INTEGER */ +/* The dimension of the array WORK. LWORK >= max(1,N). */ +/* For optimum performance LWORK >= N*NB, where NB is */ +/* the optimal blocksize. */ + +/* If LWORK = -1, then a workspace query is assumed; the routine */ +/* only calculates the optimal size of the WORK array, returns */ +/* this value as the first entry of the WORK array, and no error */ +/* message related to LWORK is issued by XERBLA. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ + +/* Further Details */ +/* =============== */ + +/* The matrix Q is represented as a product of elementary reflectors */ + +/* Q = H(1) H(2) . . . H(k), where k = min(m,n). */ + +/* Each H(i) has the form */ + +/* H(i) = I - tau * v * v' */ + +/* where tau is a real scalar, and v is a real vector with */ +/* v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), */ +/* and tau in TAU(i). */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + nb = _starpu_ilaenv_(&c__1, "DGEQRF", " ", m, n, &c_n1, &c_n1); + lwkopt = *n * nb; + work[1] = (doublereal) lwkopt; + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } else if (*lwork < max(1,*n) && ! lquery) { + *info = -7; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DGEQRF", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + k = min(*m,*n); + if (k == 0) { + work[1] = 1.; + return 0; + } + + nbmin = 2; + nx = 0; + iws = *n; + if (nb > 1 && nb < k) { + +/* Determine when to cross over from blocked to unblocked code. */ + +/* Computing MAX */ + i__1 = 0, i__2 = _starpu_ilaenv_(&c__3, "DGEQRF", " ", m, n, &c_n1, &c_n1); + nx = max(i__1,i__2); + if (nx < k) { + +/* Determine if workspace is large enough for blocked code. */ + + ldwork = *n; + iws = ldwork * nb; + if (*lwork < iws) { + +/* Not enough workspace to use optimal NB: reduce NB and */ +/* determine the minimum value of NB. */ + + nb = *lwork / ldwork; +/* Computing MAX */ + i__1 = 2, i__2 = _starpu_ilaenv_(&c__2, "DGEQRF", " ", m, n, &c_n1, & + c_n1); + nbmin = max(i__1,i__2); + } + } + } + + if (nb >= nbmin && nb < k && nx < k) { + +/* Use blocked code initially */ + + i__1 = k - nx; + i__2 = nb; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__3 = k - i__ + 1; + ib = min(i__3,nb); + +/* Compute the QR factorization of the current block */ +/* A(i:m,i:i+ib-1) */ + + i__3 = *m - i__ + 1; + _starpu_dgeqr2_(&i__3, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[ + 1], &iinfo); + if (i__ + ib <= *n) { + +/* Form the triangular factor of the block reflector */ +/* H = H(i) H(i+1) . . . H(i+ib-1) */ + + i__3 = *m - i__ + 1; + _starpu_dlarft_("Forward", "Columnwise", &i__3, &ib, &a[i__ + i__ * + a_dim1], lda, &tau[i__], &work[1], &ldwork); + +/* Apply H' to A(i:m,i+ib:n) from the left */ + + i__3 = *m - i__ + 1; + i__4 = *n - i__ - ib + 1; + _starpu_dlarfb_("Left", "Transpose", "Forward", "Columnwise", &i__3, & + i__4, &ib, &a[i__ + i__ * a_dim1], lda, &work[1], & + ldwork, &a[i__ + (i__ + ib) * a_dim1], lda, &work[ib + + 1], &ldwork); + } +/* L10: */ + } + } else { + i__ = 1; + } + +/* Use unblocked code to factor the last or only block. */ + + if (i__ <= k) { + i__2 = *m - i__ + 1; + i__1 = *n - i__ + 1; + _starpu_dgeqr2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1] +, &iinfo); + } + + work[1] = (doublereal) iws; + return 0; + +/* End of DGEQRF */ + +} /* _starpu_dgeqrf_ */ diff --git a/min-dgels/base/SRC/dgerfs.c b/min-dgels/base/SRC/dgerfs.c new file mode 100644 index 0000000..cad3944 --- /dev/null +++ b/min-dgels/base/SRC/dgerfs.c @@ -0,0 +1,424 @@ +/* dgerfs.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static doublereal c_b15 = -1.; +static doublereal c_b17 = 1.; + +/* Subroutine */ int _starpu_dgerfs_(char *trans, integer *n, integer *nrhs, + doublereal *a, integer *lda, doublereal *af, integer *ldaf, integer * + ipiv, doublereal *b, integer *ldb, doublereal *x, integer *ldx, + doublereal *ferr, doublereal *berr, doublereal *work, integer *iwork, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, af_dim1, af_offset, b_dim1, b_offset, x_dim1, + x_offset, i__1, i__2, i__3; + doublereal d__1, d__2, d__3; + + /* Local variables */ + integer i__, j, k; + doublereal s, xk; + integer nz; + doublereal eps; + integer kase; + doublereal safe1, safe2; + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int _starpu_dgemv_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *); + integer isave[3]; + extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *), _starpu_daxpy_(integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *); + integer count; + extern /* Subroutine */ int _starpu_dlacn2_(integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *); + extern doublereal _starpu_dlamch_(char *); + doublereal safmin; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *), _starpu_dgetrs_( + char *, integer *, integer *, doublereal *, integer *, integer *, + doublereal *, integer *, integer *); + logical notran; + char transt[1]; + doublereal lstres; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* Modified to call DLACN2 in place of DLACON, 5 Feb 03, SJH. */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DGERFS improves the computed solution to a system of linear */ +/* equations and provides error bounds and backward error estimates for */ +/* the solution. */ + +/* Arguments */ +/* ========= */ + +/* TRANS (input) CHARACTER*1 */ +/* Specifies the form of the system of equations: */ +/* = 'N': A * X = B (No transpose) */ +/* = 'T': A**T * X = B (Transpose) */ +/* = 'C': A**H * X = B (Conjugate transpose = Transpose) */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* NRHS (input) INTEGER */ +/* The number of right hand sides, i.e., the number of columns */ +/* of the matrices B and X. NRHS >= 0. */ + +/* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ +/* The original N-by-N matrix A. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,N). */ + +/* AF (input) DOUBLE PRECISION array, dimension (LDAF,N) */ +/* The factors L and U from the factorization A = P*L*U */ +/* as computed by DGETRF. */ + +/* LDAF (input) INTEGER */ +/* The leading dimension of the array AF. LDAF >= max(1,N). */ + +/* IPIV (input) INTEGER array, dimension (N) */ +/* The pivot indices from DGETRF; for 1<=i<=N, row i of the */ +/* matrix was interchanged with row IPIV(i). */ + +/* B (input) DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* The right hand side matrix B. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the array B. LDB >= max(1,N). */ + +/* X (input/output) DOUBLE PRECISION array, dimension (LDX,NRHS) */ +/* On entry, the solution matrix X, as computed by DGETRS. */ +/* On exit, the improved solution matrix X. */ + +/* LDX (input) INTEGER */ +/* The leading dimension of the array X. LDX >= max(1,N). */ + +/* FERR (output) DOUBLE PRECISION array, dimension (NRHS) */ +/* The estimated forward error bound for each solution vector */ +/* X(j) (the j-th column of the solution matrix X). */ +/* If XTRUE is the true solution corresponding to X(j), FERR(j) */ +/* is an estimated upper bound for the magnitude of the largest */ +/* element in (X(j) - XTRUE) divided by the magnitude of the */ +/* largest element in X(j). The estimate is as reliable as */ +/* the estimate for RCOND, and is almost always a slight */ +/* overestimate of the true error. */ + +/* BERR (output) DOUBLE PRECISION array, dimension (NRHS) */ +/* The componentwise relative backward error of each solution */ +/* vector X(j) (i.e., the smallest relative change in */ +/* any element of A or B that makes X(j) an exact solution). */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (3*N) */ + +/* IWORK (workspace) INTEGER array, dimension (N) */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ + +/* Internal Parameters */ +/* =================== */ + +/* ITMAX is the maximum number of steps of iterative refinement. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + af_dim1 = *ldaf; + af_offset = 1 + af_dim1; + af -= af_offset; + --ipiv; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + x_dim1 = *ldx; + x_offset = 1 + x_dim1; + x -= x_offset; + --ferr; + --berr; + --work; + --iwork; + + /* Function Body */ + *info = 0; + notran = _starpu_lsame_(trans, "N"); + if (! notran && ! _starpu_lsame_(trans, "T") && ! _starpu_lsame_( + trans, "C")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } else if (*ldaf < max(1,*n)) { + *info = -7; + } else if (*ldb < max(1,*n)) { + *info = -10; + } else if (*ldx < max(1,*n)) { + *info = -12; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DGERFS", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0 || *nrhs == 0) { + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + ferr[j] = 0.; + berr[j] = 0.; +/* L10: */ + } + return 0; + } + + if (notran) { + *(unsigned char *)transt = 'T'; + } else { + *(unsigned char *)transt = 'N'; + } + +/* NZ = maximum number of nonzero elements in each row of A, plus 1 */ + + nz = *n + 1; + eps = _starpu_dlamch_("Epsilon"); + safmin = _starpu_dlamch_("Safe minimum"); + safe1 = nz * safmin; + safe2 = safe1 / eps; + +/* Do for each right hand side */ + + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + + count = 1; + lstres = 3.; +L20: + +/* Loop until stopping criterion is satisfied. */ + +/* Compute residual R = B - op(A) * X, */ +/* where op(A) = A, A**T, or A**H, depending on TRANS. */ + + _starpu_dcopy_(n, &b[j * b_dim1 + 1], &c__1, &work[*n + 1], &c__1); + _starpu_dgemv_(trans, n, n, &c_b15, &a[a_offset], lda, &x[j * x_dim1 + 1], & + c__1, &c_b17, &work[*n + 1], &c__1); + +/* Compute componentwise relative backward error from formula */ + +/* max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) */ + +/* where abs(Z) is the componentwise absolute value of the matrix */ +/* or vector Z. If the i-th component of the denominator is less */ +/* than SAFE2, then SAFE1 is added to the i-th components of the */ +/* numerator and denominator before dividing. */ + + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + work[i__] = (d__1 = b[i__ + j * b_dim1], abs(d__1)); +/* L30: */ + } + +/* Compute abs(op(A))*abs(X) + abs(B). */ + + if (notran) { + i__2 = *n; + for (k = 1; k <= i__2; ++k) { + xk = (d__1 = x[k + j * x_dim1], abs(d__1)); + i__3 = *n; + for (i__ = 1; i__ <= i__3; ++i__) { + work[i__] += (d__1 = a[i__ + k * a_dim1], abs(d__1)) * xk; +/* L40: */ + } +/* L50: */ + } + } else { + i__2 = *n; + for (k = 1; k <= i__2; ++k) { + s = 0.; + i__3 = *n; + for (i__ = 1; i__ <= i__3; ++i__) { + s += (d__1 = a[i__ + k * a_dim1], abs(d__1)) * (d__2 = x[ + i__ + j * x_dim1], abs(d__2)); +/* L60: */ + } + work[k] += s; +/* L70: */ + } + } + s = 0.; + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + if (work[i__] > safe2) { +/* Computing MAX */ + d__2 = s, d__3 = (d__1 = work[*n + i__], abs(d__1)) / work[ + i__]; + s = max(d__2,d__3); + } else { +/* Computing MAX */ + d__2 = s, d__3 = ((d__1 = work[*n + i__], abs(d__1)) + safe1) + / (work[i__] + safe1); + s = max(d__2,d__3); + } +/* L80: */ + } + berr[j] = s; + +/* Test stopping criterion. Continue iterating if */ +/* 1) The residual BERR(J) is larger than machine epsilon, and */ +/* 2) BERR(J) decreased by at least a factor of 2 during the */ +/* last iteration, and */ +/* 3) At most ITMAX iterations tried. */ + + if (berr[j] > eps && berr[j] * 2. <= lstres && count <= 5) { + +/* Update solution and try again. */ + + _starpu_dgetrs_(trans, n, &c__1, &af[af_offset], ldaf, &ipiv[1], &work[*n + + 1], n, info); + _starpu_daxpy_(n, &c_b17, &work[*n + 1], &c__1, &x[j * x_dim1 + 1], &c__1) + ; + lstres = berr[j]; + ++count; + goto L20; + } + +/* Bound error from formula */ + +/* norm(X - XTRUE) / norm(X) .le. FERR = */ +/* norm( abs(inv(op(A)))* */ +/* ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) */ + +/* where */ +/* norm(Z) is the magnitude of the largest component of Z */ +/* inv(op(A)) is the inverse of op(A) */ +/* abs(Z) is the componentwise absolute value of the matrix or */ +/* vector Z */ +/* NZ is the maximum number of nonzeros in any row of A, plus 1 */ +/* EPS is machine epsilon */ + +/* The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) */ +/* is incremented by SAFE1 if the i-th component of */ +/* abs(op(A))*abs(X) + abs(B) is less than SAFE2. */ + +/* Use DLACN2 to estimate the infinity-norm of the matrix */ +/* inv(op(A)) * diag(W), */ +/* where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) */ + + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + if (work[i__] > safe2) { + work[i__] = (d__1 = work[*n + i__], abs(d__1)) + nz * eps * + work[i__]; + } else { + work[i__] = (d__1 = work[*n + i__], abs(d__1)) + nz * eps * + work[i__] + safe1; + } +/* L90: */ + } + + kase = 0; +L100: + _starpu_dlacn2_(n, &work[(*n << 1) + 1], &work[*n + 1], &iwork[1], &ferr[j], & + kase, isave); + if (kase != 0) { + if (kase == 1) { + +/* Multiply by diag(W)*inv(op(A)**T). */ + + _starpu_dgetrs_(transt, n, &c__1, &af[af_offset], ldaf, &ipiv[1], & + work[*n + 1], n, info); + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + work[*n + i__] = work[i__] * work[*n + i__]; +/* L110: */ + } + } else { + +/* Multiply by inv(op(A))*diag(W). */ + + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + work[*n + i__] = work[i__] * work[*n + i__]; +/* L120: */ + } + _starpu_dgetrs_(trans, n, &c__1, &af[af_offset], ldaf, &ipiv[1], & + work[*n + 1], n, info); + } + goto L100; + } + +/* Normalize error. */ + + lstres = 0.; + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { +/* Computing MAX */ + d__2 = lstres, d__3 = (d__1 = x[i__ + j * x_dim1], abs(d__1)); + lstres = max(d__2,d__3); +/* L130: */ + } + if (lstres != 0.) { + ferr[j] /= lstres; + } + +/* L140: */ + } + + return 0; + +/* End of DGERFS */ + +} /* _starpu_dgerfs_ */ diff --git a/min-dgels/base/SRC/dgerfsx.c b/min-dgels/base/SRC/dgerfsx.c new file mode 100644 index 0000000..565fa54 --- /dev/null +++ b/min-dgels/base/SRC/dgerfsx.c @@ -0,0 +1,666 @@ +/* dgerfsx.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c_n1 = -1; +static integer c__0 = 0; +static integer c__1 = 1; + +/* Subroutine */ int _starpu_dgerfsx_(char *trans, char *equed, integer *n, integer * + nrhs, doublereal *a, integer *lda, doublereal *af, integer *ldaf, + integer *ipiv, doublereal *r__, doublereal *c__, doublereal *b, + integer *ldb, doublereal *x, integer *ldx, doublereal *rcond, + doublereal *berr, integer *n_err_bnds__, doublereal *err_bnds_norm__, + doublereal *err_bnds_comp__, integer *nparams, doublereal *params, + doublereal *work, integer *iwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, af_dim1, af_offset, b_dim1, b_offset, x_dim1, + x_offset, err_bnds_norm_dim1, err_bnds_norm_offset, + err_bnds_comp_dim1, err_bnds_comp_offset, i__1; + doublereal d__1, d__2; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + doublereal illrcond_thresh__, unstable_thresh__, err_lbnd__; + integer ref_type__; + extern integer _starpu_ilatrans_(char *); + integer j; + doublereal rcond_tmp__; + integer prec_type__, trans_type__; + extern doublereal _starpu_dla_gercond__(char *, integer *, doublereal *, integer * + , doublereal *, integer *, integer *, integer *, doublereal *, + integer *, doublereal *, integer *, ftnlen); + doublereal cwise_wrong__; + extern /* Subroutine */ int _starpu_dla_gerfsx_extended__(integer *, integer *, + integer *, integer *, doublereal *, integer *, doublereal *, + integer *, integer *, logical *, doublereal *, doublereal *, + integer *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, integer *, doublereal *, + doublereal *, logical *, integer *); + char norm[1]; + logical ignore_cwise__; + extern logical _starpu_lsame_(char *, char *); + doublereal anorm; + extern doublereal _starpu_dlamch_(char *), _starpu_dlange_(char *, integer *, + integer *, doublereal *, integer *, doublereal *); + extern /* Subroutine */ int _starpu_dgecon_(char *, integer *, doublereal *, + integer *, doublereal *, doublereal *, doublereal *, integer *, + integer *), _starpu_xerbla_(char *, integer *); + logical colequ, notran, rowequ; + extern integer _starpu_ilaprec_(char *); + integer ithresh, n_norms__; + doublereal rthresh; + + +/* -- LAPACK routine (version 3.2.1) -- */ +/* -- Contributed by James Demmel, Deaglan Halligan, Yozo Hida and -- */ +/* -- Jason Riedy of Univ. of California Berkeley. -- */ +/* -- April 2009 -- */ + +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley and NAG Ltd. -- */ + +/* .. */ +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DGERFSX improves the computed solution to a system of linear */ +/* equations and provides error bounds and backward error estimates */ +/* for the solution. In addition to normwise error bound, the code */ +/* provides maximum componentwise error bound if possible. See */ +/* comments for ERR_BNDS_NORM and ERR_BNDS_COMP for details of the */ +/* error bounds. */ + +/* The original system of linear equations may have been equilibrated */ +/* before calling this routine, as described by arguments EQUED, R */ +/* and C below. In this case, the solution and error bounds returned */ +/* are for the original unequilibrated system. */ + +/* Arguments */ +/* ========= */ + +/* Some optional parameters are bundled in the PARAMS array. These */ +/* settings determine how refinement is performed, but often the */ +/* defaults are acceptable. If the defaults are acceptable, users */ +/* can pass NPARAMS = 0 which prevents the source code from accessing */ +/* the PARAMS argument. */ + +/* TRANS (input) CHARACTER*1 */ +/* Specifies the form of the system of equations: */ +/* = 'N': A * X = B (No transpose) */ +/* = 'T': A**T * X = B (Transpose) */ +/* = 'C': A**H * X = B (Conjugate transpose = Transpose) */ + +/* EQUED (input) CHARACTER*1 */ +/* Specifies the form of equilibration that was done to A */ +/* before calling this routine. This is needed to compute */ +/* the solution and error bounds correctly. */ +/* = 'N': No equilibration */ +/* = 'R': Row equilibration, i.e., A has been premultiplied by */ +/* diag(R). */ +/* = 'C': Column equilibration, i.e., A has been postmultiplied */ +/* by diag(C). */ +/* = 'B': Both row and column equilibration, i.e., A has been */ +/* replaced by diag(R) * A * diag(C). */ +/* The right hand side B has been changed accordingly. */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* NRHS (input) INTEGER */ +/* The number of right hand sides, i.e., the number of columns */ +/* of the matrices B and X. NRHS >= 0. */ + +/* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ +/* The original N-by-N matrix A. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,N). */ + +/* AF (input) DOUBLE PRECISION array, dimension (LDAF,N) */ +/* The factors L and U from the factorization A = P*L*U */ +/* as computed by DGETRF. */ + +/* LDAF (input) INTEGER */ +/* The leading dimension of the array AF. LDAF >= max(1,N). */ + +/* IPIV (input) INTEGER array, dimension (N) */ +/* The pivot indices from DGETRF; for 1<=i<=N, row i of the */ +/* matrix was interchanged with row IPIV(i). */ + +/* R (input or output) DOUBLE PRECISION array, dimension (N) */ +/* The row scale factors for A. If EQUED = 'R' or 'B', A is */ +/* multiplied on the left by diag(R); if EQUED = 'N' or 'C', R */ +/* is not accessed. R is an input argument if FACT = 'F'; */ +/* otherwise, R is an output argument. If FACT = 'F' and */ +/* EQUED = 'R' or 'B', each element of R must be positive. */ +/* If R is output, each element of R is a power of the radix. */ +/* If R is input, each element of R should be a power of the radix */ +/* to ensure a reliable solution and error estimates. Scaling by */ +/* powers of the radix does not cause rounding errors unless the */ +/* result underflows or overflows. Rounding errors during scaling */ +/* lead to refining with a matrix that is not equivalent to the */ +/* input matrix, producing error estimates that may not be */ +/* reliable. */ + +/* C (input or output) DOUBLE PRECISION array, dimension (N) */ +/* The column scale factors for A. If EQUED = 'C' or 'B', A is */ +/* multiplied on the right by diag(C); if EQUED = 'N' or 'R', C */ +/* is not accessed. C is an input argument if FACT = 'F'; */ +/* otherwise, C is an output argument. If FACT = 'F' and */ +/* EQUED = 'C' or 'B', each element of C must be positive. */ +/* If C is output, each element of C is a power of the radix. */ +/* If C is input, each element of C should be a power of the radix */ +/* to ensure a reliable solution and error estimates. Scaling by */ +/* powers of the radix does not cause rounding errors unless the */ +/* result underflows or overflows. Rounding errors during scaling */ +/* lead to refining with a matrix that is not equivalent to the */ +/* input matrix, producing error estimates that may not be */ +/* reliable. */ + +/* B (input) DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* The right hand side matrix B. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the array B. LDB >= max(1,N). */ + +/* X (input/output) DOUBLE PRECISION array, dimension (LDX,NRHS) */ +/* On entry, the solution matrix X, as computed by DGETRS. */ +/* On exit, the improved solution matrix X. */ + +/* LDX (input) INTEGER */ +/* The leading dimension of the array X. LDX >= max(1,N). */ + +/* RCOND (output) DOUBLE PRECISION */ +/* Reciprocal scaled condition number. This is an estimate of the */ +/* reciprocal Skeel condition number of the matrix A after */ +/* equilibration (if done). If this is less than the machine */ +/* precision (in particular, if it is zero), the matrix is singular */ +/* to working precision. Note that the error may still be small even */ +/* if this number is very small and the matrix appears ill- */ +/* conditioned. */ + +/* BERR (output) DOUBLE PRECISION array, dimension (NRHS) */ +/* Componentwise relative backward error. This is the */ +/* componentwise relative backward error of each solution vector X(j) */ +/* (i.e., the smallest relative change in any element of A or B that */ +/* makes X(j) an exact solution). */ + +/* N_ERR_BNDS (input) INTEGER */ +/* Number of error bounds to return for each right hand side */ +/* and each type (normwise or componentwise). See ERR_BNDS_NORM and */ +/* ERR_BNDS_COMP below. */ + +/* ERR_BNDS_NORM (output) DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) */ +/* For each right-hand side, this array contains information about */ +/* various error bounds and condition numbers corresponding to the */ +/* normwise relative error, which is defined as follows: */ + +/* Normwise relative error in the ith solution vector: */ +/* max_j (abs(XTRUE(j,i) - X(j,i))) */ +/* ------------------------------ */ +/* max_j abs(X(j,i)) */ + +/* The array is indexed by the type of error information as described */ +/* below. There currently are up to three pieces of information */ +/* returned. */ + +/* The first index in ERR_BNDS_NORM(i,:) corresponds to the ith */ +/* right-hand side. */ + +/* The second index in ERR_BNDS_NORM(:,err) contains the following */ +/* three fields: */ +/* err = 1 "Trust/don't trust" boolean. Trust the answer if the */ +/* reciprocal condition number is less than the threshold */ +/* sqrt(n) * dlamch('Epsilon'). */ + +/* err = 2 "Guaranteed" error bound: The estimated forward error, */ +/* almost certainly within a factor of 10 of the true error */ +/* so long as the next entry is greater than the threshold */ +/* sqrt(n) * dlamch('Epsilon'). This error bound should only */ +/* be trusted if the previous boolean is true. */ + +/* err = 3 Reciprocal condition number: Estimated normwise */ +/* reciprocal condition number. Compared with the threshold */ +/* sqrt(n) * dlamch('Epsilon') to determine if the error */ +/* estimate is "guaranteed". These reciprocal condition */ +/* numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some */ +/* appropriately scaled matrix Z. */ +/* Let Z = S*A, where S scales each row by a power of the */ +/* radix so all absolute row sums of Z are approximately 1. */ + +/* See Lapack Working Note 165 for further details and extra */ +/* cautions. */ + +/* ERR_BNDS_COMP (output) DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) */ +/* For each right-hand side, this array contains information about */ +/* various error bounds and condition numbers corresponding to the */ +/* componentwise relative error, which is defined as follows: */ + +/* Componentwise relative error in the ith solution vector: */ +/* abs(XTRUE(j,i) - X(j,i)) */ +/* max_j ---------------------- */ +/* abs(X(j,i)) */ + +/* The array is indexed by the right-hand side i (on which the */ +/* componentwise relative error depends), and the type of error */ +/* information as described below. There currently are up to three */ +/* pieces of information returned for each right-hand side. If */ +/* componentwise accuracy is not requested (PARAMS(3) = 0.0), then */ +/* ERR_BNDS_COMP is not accessed. If N_ERR_BNDS .LT. 3, then at most */ +/* the first (:,N_ERR_BNDS) entries are returned. */ + +/* The first index in ERR_BNDS_COMP(i,:) corresponds to the ith */ +/* right-hand side. */ + +/* The second index in ERR_BNDS_COMP(:,err) contains the following */ +/* three fields: */ +/* err = 1 "Trust/don't trust" boolean. Trust the answer if the */ +/* reciprocal condition number is less than the threshold */ +/* sqrt(n) * dlamch('Epsilon'). */ + +/* err = 2 "Guaranteed" error bound: The estimated forward error, */ +/* almost certainly within a factor of 10 of the true error */ +/* so long as the next entry is greater than the threshold */ +/* sqrt(n) * dlamch('Epsilon'). This error bound should only */ +/* be trusted if the previous boolean is true. */ + +/* err = 3 Reciprocal condition number: Estimated componentwise */ +/* reciprocal condition number. Compared with the threshold */ +/* sqrt(n) * dlamch('Epsilon') to determine if the error */ +/* estimate is "guaranteed". These reciprocal condition */ +/* numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some */ +/* appropriately scaled matrix Z. */ +/* Let Z = S*(A*diag(x)), where x is the solution for the */ +/* current right-hand side and S scales each row of */ +/* A*diag(x) by a power of the radix so all absolute row */ +/* sums of Z are approximately 1. */ + +/* See Lapack Working Note 165 for further details and extra */ +/* cautions. */ + +/* NPARAMS (input) INTEGER */ +/* Specifies the number of parameters set in PARAMS. If .LE. 0, the */ +/* PARAMS array is never referenced and default values are used. */ + +/* PARAMS (input / output) DOUBLE PRECISION array, dimension NPARAMS */ +/* Specifies algorithm parameters. If an entry is .LT. 0.0, then */ +/* that entry will be filled with default value used for that */ +/* parameter. Only positions up to NPARAMS are accessed; defaults */ +/* are used for higher-numbered parameters. */ + +/* PARAMS(LA_LINRX_ITREF_I = 1) : Whether to perform iterative */ +/* refinement or not. */ +/* Default: 1.0D+0 */ +/* = 0.0 : No refinement is performed, and no error bounds are */ +/* computed. */ +/* = 1.0 : Use the double-precision refinement algorithm, */ +/* possibly with doubled-single computations if the */ +/* compilation environment does not support DOUBLE */ +/* PRECISION. */ +/* (other values are reserved for future use) */ + +/* PARAMS(LA_LINRX_ITHRESH_I = 2) : Maximum number of residual */ +/* computations allowed for refinement. */ +/* Default: 10 */ +/* Aggressive: Set to 100 to permit convergence using approximate */ +/* factorizations or factorizations other than LU. If */ +/* the factorization uses a technique other than */ +/* Gaussian elimination, the guarantees in */ +/* err_bnds_norm and err_bnds_comp may no longer be */ +/* trustworthy. */ + +/* PARAMS(LA_LINRX_CWISE_I = 3) : Flag determining if the code */ +/* will attempt to find a solution with small componentwise */ +/* relative error in the double-precision algorithm. Positive */ +/* is true, 0.0 is false. */ +/* Default: 1.0 (attempt componentwise convergence) */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (4*N) */ + +/* IWORK (workspace) INTEGER array, dimension (N) */ + +/* INFO (output) INTEGER */ +/* = 0: Successful exit. The solution to every right-hand side is */ +/* guaranteed. */ +/* < 0: If INFO = -i, the i-th argument had an illegal value */ +/* > 0 and <= N: U(INFO,INFO) is exactly zero. The factorization */ +/* has been completed, but the factor U is exactly singular, so */ +/* the solution and error bounds could not be computed. RCOND = 0 */ +/* is returned. */ +/* = N+J: The solution corresponding to the Jth right-hand side is */ +/* not guaranteed. The solutions corresponding to other right- */ +/* hand sides K with K > J may not be guaranteed as well, but */ +/* only the first such right-hand side is reported. If a small */ +/* componentwise error is not requested (PARAMS(3) = 0.0) then */ +/* the Jth right-hand side is the first with a normwise error */ +/* bound that is not guaranteed (the smallest J such */ +/* that ERR_BNDS_NORM(J,1) = 0.0). By default (PARAMS(3) = 1.0) */ +/* the Jth right-hand side is the first with either a normwise or */ +/* componentwise error bound that is not guaranteed (the smallest */ +/* J such that either ERR_BNDS_NORM(J,1) = 0.0 or */ +/* ERR_BNDS_COMP(J,1) = 0.0). See the definition of */ +/* ERR_BNDS_NORM(:,1) and ERR_BNDS_COMP(:,1). To get information */ +/* about all of the right-hand sides check ERR_BNDS_NORM or */ +/* ERR_BNDS_COMP. */ + +/* ================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Check the input parameters. */ + + /* Parameter adjustments */ + err_bnds_comp_dim1 = *nrhs; + err_bnds_comp_offset = 1 + err_bnds_comp_dim1; + err_bnds_comp__ -= err_bnds_comp_offset; + err_bnds_norm_dim1 = *nrhs; + err_bnds_norm_offset = 1 + err_bnds_norm_dim1; + err_bnds_norm__ -= err_bnds_norm_offset; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + af_dim1 = *ldaf; + af_offset = 1 + af_dim1; + af -= af_offset; + --ipiv; + --r__; + --c__; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + x_dim1 = *ldx; + x_offset = 1 + x_dim1; + x -= x_offset; + --berr; + --params; + --work; + --iwork; + + /* Function Body */ + *info = 0; + trans_type__ = _starpu_ilatrans_(trans); + ref_type__ = 1; + if (*nparams >= 1) { + if (params[1] < 0.) { + params[1] = 1.; + } else { + ref_type__ = (integer) params[1]; + } + } + +/* Set default parameters. */ + + illrcond_thresh__ = (doublereal) (*n) * _starpu_dlamch_("Epsilon"); + ithresh = 10; + rthresh = .5; + unstable_thresh__ = .25; + ignore_cwise__ = FALSE_; + + if (*nparams >= 2) { + if (params[2] < 0.) { + params[2] = (doublereal) ithresh; + } else { + ithresh = (integer) params[2]; + } + } + if (*nparams >= 3) { + if (params[3] < 0.) { + if (ignore_cwise__) { + params[3] = 0.; + } else { + params[3] = 1.; + } + } else { + ignore_cwise__ = params[3] == 0.; + } + } + if (ref_type__ == 0 || *n_err_bnds__ == 0) { + n_norms__ = 0; + } else if (ignore_cwise__) { + n_norms__ = 1; + } else { + n_norms__ = 2; + } + + notran = _starpu_lsame_(trans, "N"); + rowequ = _starpu_lsame_(equed, "R") || _starpu_lsame_(equed, "B"); + colequ = _starpu_lsame_(equed, "C") || _starpu_lsame_(equed, "B"); + +/* Test input parameters. */ + + if (trans_type__ == -1) { + *info = -1; + } else if (! rowequ && ! colequ && ! _starpu_lsame_(equed, "N")) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*nrhs < 0) { + *info = -4; + } else if (*lda < max(1,*n)) { + *info = -6; + } else if (*ldaf < max(1,*n)) { + *info = -8; + } else if (*ldb < max(1,*n)) { + *info = -13; + } else if (*ldx < max(1,*n)) { + *info = -15; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DGERFSX", &i__1); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0 || *nrhs == 0) { + *rcond = 1.; + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + berr[j] = 0.; + if (*n_err_bnds__ >= 1) { + err_bnds_norm__[j + err_bnds_norm_dim1] = 1.; + err_bnds_comp__[j + err_bnds_comp_dim1] = 1.; + } else if (*n_err_bnds__ >= 2) { + err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] = 0.; + err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] = 0.; + } else if (*n_err_bnds__ >= 3) { + err_bnds_norm__[j + err_bnds_norm_dim1 * 3] = 1.; + err_bnds_comp__[j + err_bnds_comp_dim1 * 3] = 1.; + } + } + return 0; + } + +/* Default to failure. */ + + *rcond = 0.; + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + berr[j] = 1.; + if (*n_err_bnds__ >= 1) { + err_bnds_norm__[j + err_bnds_norm_dim1] = 1.; + err_bnds_comp__[j + err_bnds_comp_dim1] = 1.; + } else if (*n_err_bnds__ >= 2) { + err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] = 1.; + err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] = 1.; + } else if (*n_err_bnds__ >= 3) { + err_bnds_norm__[j + err_bnds_norm_dim1 * 3] = 0.; + err_bnds_comp__[j + err_bnds_comp_dim1 * 3] = 0.; + } + } + +/* Compute the norm of A and the reciprocal of the condition */ +/* number of A. */ + + if (notran) { + *(unsigned char *)norm = 'I'; + } else { + *(unsigned char *)norm = '1'; + } + anorm = _starpu_dlange_(norm, n, n, &a[a_offset], lda, &work[1]); + _starpu_dgecon_(norm, n, &af[af_offset], ldaf, &anorm, rcond, &work[1], &iwork[1], + info); + +/* Perform refinement on each right-hand side */ + + if (ref_type__ != 0) { + prec_type__ = _starpu_ilaprec_("E"); + if (notran) { + _starpu_dla_gerfsx_extended__(&prec_type__, &trans_type__, n, nrhs, &a[ + a_offset], lda, &af[af_offset], ldaf, &ipiv[1], &colequ, & + c__[1], &b[b_offset], ldb, &x[x_offset], ldx, &berr[1], & + n_norms__, &err_bnds_norm__[err_bnds_norm_offset], & + err_bnds_comp__[err_bnds_comp_offset], &work[*n + 1], & + work[1], &work[(*n << 1) + 1], &work[1], rcond, &ithresh, + &rthresh, &unstable_thresh__, &ignore_cwise__, info); + } else { + _starpu_dla_gerfsx_extended__(&prec_type__, &trans_type__, n, nrhs, &a[ + a_offset], lda, &af[af_offset], ldaf, &ipiv[1], &rowequ, & + r__[1], &b[b_offset], ldb, &x[x_offset], ldx, &berr[1], & + n_norms__, &err_bnds_norm__[err_bnds_norm_offset], & + err_bnds_comp__[err_bnds_comp_offset], &work[*n + 1], & + work[1], &work[(*n << 1) + 1], &work[1], rcond, &ithresh, + &rthresh, &unstable_thresh__, &ignore_cwise__, info); + } + } +/* Computing MAX */ + d__1 = 10., d__2 = sqrt((doublereal) (*n)); + err_lbnd__ = max(d__1,d__2) * _starpu_dlamch_("Epsilon"); + if (*n_err_bnds__ >= 1 && n_norms__ >= 1) { + +/* Compute scaled normwise condition number cond(A*C). */ + + if (colequ && notran) { + rcond_tmp__ = _starpu_dla_gercond__(trans, n, &a[a_offset], lda, &af[ + af_offset], ldaf, &ipiv[1], &c_n1, &c__[1], info, &work[1] + , &iwork[1], (ftnlen)1); + } else if (rowequ && ! notran) { + rcond_tmp__ = _starpu_dla_gercond__(trans, n, &a[a_offset], lda, &af[ + af_offset], ldaf, &ipiv[1], &c_n1, &r__[1], info, &work[1] + , &iwork[1], (ftnlen)1); + } else { + rcond_tmp__ = _starpu_dla_gercond__(trans, n, &a[a_offset], lda, &af[ + af_offset], ldaf, &ipiv[1], &c__0, &r__[1], info, &work[1] + , &iwork[1], (ftnlen)1); + } + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + +/* Cap the error at 1.0. */ + + if (*n_err_bnds__ >= 2 && err_bnds_norm__[j + (err_bnds_norm_dim1 + << 1)] > 1.) { + err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] = 1.; + } + +/* Threshold the error (see LAWN). */ + + if (rcond_tmp__ < illrcond_thresh__) { + err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] = 1.; + err_bnds_norm__[j + err_bnds_norm_dim1] = 0.; + if (*info <= *n) { + *info = *n + j; + } + } else if (err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] < + err_lbnd__) { + err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] = err_lbnd__; + err_bnds_norm__[j + err_bnds_norm_dim1] = 1.; + } + +/* Save the condition number. */ + + if (*n_err_bnds__ >= 3) { + err_bnds_norm__[j + err_bnds_norm_dim1 * 3] = rcond_tmp__; + } + } + } + if (*n_err_bnds__ >= 1 && n_norms__ >= 2) { + +/* Compute componentwise condition number cond(A*diag(Y(:,J))) for */ +/* each right-hand side using the current solution as an estimate of */ +/* the true solution. If the componentwise error estimate is too */ +/* large, then the solution is a lousy estimate of truth and the */ +/* estimated RCOND may be too optimistic. To avoid misleading users, */ +/* the inverse condition number is set to 0.0 when the estimated */ +/* cwise error is at least CWISE_WRONG. */ + + cwise_wrong__ = sqrt(_starpu_dlamch_("Epsilon")); + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + if (err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] < + cwise_wrong__) { + rcond_tmp__ = _starpu_dla_gercond__(trans, n, &a[a_offset], lda, &af[ + af_offset], ldaf, &ipiv[1], &c__1, &x[j * x_dim1 + 1], + info, &work[1], &iwork[1], (ftnlen)1); + } else { + rcond_tmp__ = 0.; + } + +/* Cap the error at 1.0. */ + + if (*n_err_bnds__ >= 2 && err_bnds_comp__[j + (err_bnds_comp_dim1 + << 1)] > 1.) { + err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] = 1.; + } + +/* Threshold the error (see LAWN). */ + + if (rcond_tmp__ < illrcond_thresh__) { + err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] = 1.; + err_bnds_comp__[j + err_bnds_comp_dim1] = 0.; + if (params[3] == 1. && *info < *n + j) { + *info = *n + j; + } + } else if (err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] < + err_lbnd__) { + err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] = err_lbnd__; + err_bnds_comp__[j + err_bnds_comp_dim1] = 1.; + } + +/* Save the condition number. */ + + if (*n_err_bnds__ >= 3) { + err_bnds_comp__[j + err_bnds_comp_dim1 * 3] = rcond_tmp__; + } + } + } + + return 0; + +/* End of DGERFSX */ + +} /* _starpu_dgerfsx_ */ diff --git a/min-dgels/base/SRC/dgerq2.c b/min-dgels/base/SRC/dgerq2.c new file mode 100644 index 0000000..11c2538 --- /dev/null +++ b/min-dgels/base/SRC/dgerq2.c @@ -0,0 +1,155 @@ +/* dgerq2.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dgerq2_(integer *m, integer *n, doublereal *a, integer * + lda, doublereal *tau, doublereal *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + + /* Local variables */ + integer i__, k; + doublereal aii; + extern /* Subroutine */ int _starpu_dlarf_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + doublereal *), _starpu_dlarfp_(integer *, doublereal *, + doublereal *, integer *, doublereal *), _starpu_xerbla_(char *, integer *); + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DGERQ2 computes an RQ factorization of a real m by n matrix A: */ +/* A = R * Q. */ + +/* Arguments */ +/* ========= */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix A. M >= 0. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix A. N >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the m by n matrix A. */ +/* On exit, if m <= n, the upper triangle of the subarray */ +/* A(1:m,n-m+1:n) contains the m by m upper triangular matrix R; */ +/* if m >= n, the elements on and above the (m-n)-th subdiagonal */ +/* contain the m by n upper trapezoidal matrix R; the remaining */ +/* elements, with the array TAU, represent the orthogonal matrix */ +/* Q as a product of elementary reflectors (see Further */ +/* Details). */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,M). */ + +/* TAU (output) DOUBLE PRECISION array, dimension (min(M,N)) */ +/* The scalar factors of the elementary reflectors (see Further */ +/* Details). */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (M) */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ + +/* Further Details */ +/* =============== */ + +/* The matrix Q is represented as a product of elementary reflectors */ + +/* Q = H(1) H(2) . . . H(k), where k = min(m,n). */ + +/* Each H(i) has the form */ + +/* H(i) = I - tau * v * v' */ + +/* where tau is a real scalar, and v is a real vector with */ +/* v(n-k+i+1:n) = 0 and v(n-k+i) = 1; v(1:n-k+i-1) is stored on exit in */ +/* A(m-k+i,1:n-k+i-1), and tau in TAU(i). */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DGERQ2", &i__1); + return 0; + } + + k = min(*m,*n); + + for (i__ = k; i__ >= 1; --i__) { + +/* Generate elementary reflector H(i) to annihilate */ +/* A(m-k+i,1:n-k+i-1) */ + + i__1 = *n - k + i__; + _starpu_dlarfp_(&i__1, &a[*m - k + i__ + (*n - k + i__) * a_dim1], &a[*m - k + + i__ + a_dim1], lda, &tau[i__]); + +/* Apply H(i) to A(1:m-k+i-1,1:n-k+i) from the right */ + + aii = a[*m - k + i__ + (*n - k + i__) * a_dim1]; + a[*m - k + i__ + (*n - k + i__) * a_dim1] = 1.; + i__1 = *m - k + i__ - 1; + i__2 = *n - k + i__; + _starpu_dlarf_("Right", &i__1, &i__2, &a[*m - k + i__ + a_dim1], lda, &tau[ + i__], &a[a_offset], lda, &work[1]); + a[*m - k + i__ + (*n - k + i__) * a_dim1] = aii; +/* L10: */ + } + return 0; + +/* End of DGERQ2 */ + +} /* _starpu_dgerq2_ */ diff --git a/min-dgels/base/SRC/dgerqf.c b/min-dgels/base/SRC/dgerqf.c new file mode 100644 index 0000000..9ae3a44 --- /dev/null +++ b/min-dgels/base/SRC/dgerqf.c @@ -0,0 +1,269 @@ +/* dgerqf.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; +static integer c__3 = 3; +static integer c__2 = 2; + +/* Subroutine */ int _starpu_dgerqf_(integer *m, integer *n, doublereal *a, integer * + lda, doublereal *tau, doublereal *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + integer i__, k, ib, nb, ki, kk, mu, nu, nx, iws, nbmin, iinfo; + extern /* Subroutine */ int _starpu_dgerq2_(integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *), _starpu_dlarfb_(char *, + char *, char *, char *, integer *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *, doublereal *, integer *), _starpu_dlarft_(char *, char *, integer *, integer *, doublereal + *, integer *, doublereal *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); + extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *); + integer ldwork, lwkopt; + logical lquery; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DGERQF computes an RQ factorization of a real M-by-N matrix A: */ +/* A = R * Q. */ + +/* Arguments */ +/* ========= */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix A. M >= 0. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix A. N >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the M-by-N matrix A. */ +/* On exit, */ +/* if m <= n, the upper triangle of the subarray */ +/* A(1:m,n-m+1:n) contains the M-by-M upper triangular matrix R; */ +/* if m >= n, the elements on and above the (m-n)-th subdiagonal */ +/* contain the M-by-N upper trapezoidal matrix R; */ +/* the remaining elements, with the array TAU, represent the */ +/* orthogonal matrix Q as a product of min(m,n) elementary */ +/* reflectors (see Further Details). */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,M). */ + +/* TAU (output) DOUBLE PRECISION array, dimension (min(M,N)) */ +/* The scalar factors of the elementary reflectors (see Further */ +/* Details). */ + +/* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ + +/* LWORK (input) INTEGER */ +/* The dimension of the array WORK. LWORK >= max(1,M). */ +/* For optimum performance LWORK >= M*NB, where NB is */ +/* the optimal blocksize. */ + +/* If LWORK = -1, then a workspace query is assumed; the routine */ +/* only calculates the optimal size of the WORK array, returns */ +/* this value as the first entry of the WORK array, and no error */ +/* message related to LWORK is issued by XERBLA. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ + +/* Further Details */ +/* =============== */ + +/* The matrix Q is represented as a product of elementary reflectors */ + +/* Q = H(1) H(2) . . . H(k), where k = min(m,n). */ + +/* Each H(i) has the form */ + +/* H(i) = I - tau * v * v' */ + +/* where tau is a real scalar, and v is a real vector with */ +/* v(n-k+i+1:n) = 0 and v(n-k+i) = 1; v(1:n-k+i-1) is stored on exit in */ +/* A(m-k+i,1:n-k+i-1), and tau in TAU(i). */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } + + if (*info == 0) { + k = min(*m,*n); + if (k == 0) { + lwkopt = 1; + } else { + nb = _starpu_ilaenv_(&c__1, "DGERQF", " ", m, n, &c_n1, &c_n1); + lwkopt = *m * nb; + } + work[1] = (doublereal) lwkopt; + + if (*lwork < max(1,*m) && ! lquery) { + *info = -7; + } + } + + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DGERQF", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (k == 0) { + return 0; + } + + nbmin = 2; + nx = 1; + iws = *m; + if (nb > 1 && nb < k) { + +/* Determine when to cross over from blocked to unblocked code. */ + +/* Computing MAX */ + i__1 = 0, i__2 = _starpu_ilaenv_(&c__3, "DGERQF", " ", m, n, &c_n1, &c_n1); + nx = max(i__1,i__2); + if (nx < k) { + +/* Determine if workspace is large enough for blocked code. */ + + ldwork = *m; + iws = ldwork * nb; + if (*lwork < iws) { + +/* Not enough workspace to use optimal NB: reduce NB and */ +/* determine the minimum value of NB. */ + + nb = *lwork / ldwork; +/* Computing MAX */ + i__1 = 2, i__2 = _starpu_ilaenv_(&c__2, "DGERQF", " ", m, n, &c_n1, & + c_n1); + nbmin = max(i__1,i__2); + } + } + } + + if (nb >= nbmin && nb < k && nx < k) { + +/* Use blocked code initially. */ +/* The last kk rows are handled by the block method. */ + + ki = (k - nx - 1) / nb * nb; +/* Computing MIN */ + i__1 = k, i__2 = ki + nb; + kk = min(i__1,i__2); + + i__1 = k - kk + 1; + i__2 = -nb; + for (i__ = k - kk + ki + 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ + += i__2) { +/* Computing MIN */ + i__3 = k - i__ + 1; + ib = min(i__3,nb); + +/* Compute the RQ factorization of the current block */ +/* A(m-k+i:m-k+i+ib-1,1:n-k+i+ib-1) */ + + i__3 = *n - k + i__ + ib - 1; + _starpu_dgerq2_(&ib, &i__3, &a[*m - k + i__ + a_dim1], lda, &tau[i__], & + work[1], &iinfo); + if (*m - k + i__ > 1) { + +/* Form the triangular factor of the block reflector */ +/* H = H(i+ib-1) . . . H(i+1) H(i) */ + + i__3 = *n - k + i__ + ib - 1; + _starpu_dlarft_("Backward", "Rowwise", &i__3, &ib, &a[*m - k + i__ + + a_dim1], lda, &tau[i__], &work[1], &ldwork); + +/* Apply H to A(1:m-k+i-1,1:n-k+i+ib-1) from the right */ + + i__3 = *m - k + i__ - 1; + i__4 = *n - k + i__ + ib - 1; + _starpu_dlarfb_("Right", "No transpose", "Backward", "Rowwise", &i__3, + &i__4, &ib, &a[*m - k + i__ + a_dim1], lda, &work[1], + &ldwork, &a[a_offset], lda, &work[ib + 1], &ldwork); + } +/* L10: */ + } + mu = *m - k + i__ + nb - 1; + nu = *n - k + i__ + nb - 1; + } else { + mu = *m; + nu = *n; + } + +/* Use unblocked code to factor the last or only block */ + + if (mu > 0 && nu > 0) { + _starpu_dgerq2_(&mu, &nu, &a[a_offset], lda, &tau[1], &work[1], &iinfo); + } + + work[1] = (doublereal) iws; + return 0; + +/* End of DGERQF */ + +} /* _starpu_dgerqf_ */ diff --git a/min-dgels/base/SRC/dgesc2.c b/min-dgels/base/SRC/dgesc2.c new file mode 100644 index 0000000..f4d7b76 --- /dev/null +++ b/min-dgels/base/SRC/dgesc2.c @@ -0,0 +1,176 @@ +/* dgesc2.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; + +/* Subroutine */ int _starpu_dgesc2_(integer *n, doublereal *a, integer *lda, + doublereal *rhs, integer *ipiv, integer *jpiv, doublereal *scale) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + doublereal d__1, d__2; + + /* Local variables */ + integer i__, j; + doublereal eps, temp; + extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, + integer *), _starpu_dlabad_(doublereal *, doublereal *); + extern doublereal _starpu_dlamch_(char *); + extern integer _starpu_idamax_(integer *, doublereal *, integer *); + doublereal bignum; + extern /* Subroutine */ int _starpu_dlaswp_(integer *, doublereal *, integer *, + integer *, integer *, integer *, integer *); + doublereal smlnum; + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DGESC2 solves a system of linear equations */ + +/* A * X = scale* RHS */ + +/* with a general N-by-N matrix A using the LU factorization with */ +/* complete pivoting computed by DGETC2. */ + +/* Arguments */ +/* ========= */ + +/* N (input) INTEGER */ +/* The order of the matrix A. */ + +/* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the LU part of the factorization of the n-by-n */ +/* matrix A computed by DGETC2: A = P * L * U * Q */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1, N). */ + +/* RHS (input/output) DOUBLE PRECISION array, dimension (N). */ +/* On entry, the right hand side vector b. */ +/* On exit, the solution vector X. */ + +/* IPIV (input) INTEGER array, dimension (N). */ +/* The pivot indices; for 1 <= i <= N, row i of the */ +/* matrix has been interchanged with row IPIV(i). */ + +/* JPIV (input) INTEGER array, dimension (N). */ +/* The pivot indices; for 1 <= j <= N, column j of the */ +/* matrix has been interchanged with column JPIV(j). */ + +/* SCALE (output) DOUBLE PRECISION */ +/* On exit, SCALE contains the scale factor. SCALE is chosen */ +/* 0 <= SCALE <= 1 to prevent owerflow in the solution. */ + +/* Further Details */ +/* =============== */ + +/* Based on contributions by */ +/* Bo Kagstrom and Peter Poromaa, Department of Computing Science, */ +/* Umea University, S-901 87 Umea, Sweden. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Set constant to control owerflow */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --rhs; + --ipiv; + --jpiv; + + /* Function Body */ + eps = _starpu_dlamch_("P"); + smlnum = _starpu_dlamch_("S") / eps; + bignum = 1. / smlnum; + _starpu_dlabad_(&smlnum, &bignum); + +/* Apply permutations IPIV to RHS */ + + i__1 = *n - 1; + _starpu_dlaswp_(&c__1, &rhs[1], lda, &c__1, &i__1, &ipiv[1], &c__1); + +/* Solve for L part */ + + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = *n; + for (j = i__ + 1; j <= i__2; ++j) { + rhs[j] -= a[j + i__ * a_dim1] * rhs[i__]; +/* L10: */ + } +/* L20: */ + } + +/* Solve for U part */ + + *scale = 1.; + +/* Check for scaling */ + + i__ = _starpu_idamax_(n, &rhs[1], &c__1); + if (smlnum * 2. * (d__1 = rhs[i__], abs(d__1)) > (d__2 = a[*n + *n * + a_dim1], abs(d__2))) { + temp = .5 / (d__1 = rhs[i__], abs(d__1)); + _starpu_dscal_(n, &temp, &rhs[1], &c__1); + *scale *= temp; + } + + for (i__ = *n; i__ >= 1; --i__) { + temp = 1. / a[i__ + i__ * a_dim1]; + rhs[i__] *= temp; + i__1 = *n; + for (j = i__ + 1; j <= i__1; ++j) { + rhs[i__] -= rhs[j] * (a[i__ + j * a_dim1] * temp); +/* L30: */ + } +/* L40: */ + } + +/* Apply permutations JPIV to the solution (RHS) */ + + i__1 = *n - 1; + _starpu_dlaswp_(&c__1, &rhs[1], lda, &c__1, &i__1, &jpiv[1], &c_n1); + return 0; + +/* End of DGESC2 */ + +} /* _starpu_dgesc2_ */ diff --git a/min-dgels/base/SRC/dgesdd.c b/min-dgels/base/SRC/dgesdd.c new file mode 100644 index 0000000..f8251bc --- /dev/null +++ b/min-dgels/base/SRC/dgesdd.c @@ -0,0 +1,1609 @@ +/* dgesdd.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; +static integer c__0 = 0; +static doublereal c_b227 = 0.; +static doublereal c_b248 = 1.; + +/* Subroutine */ int _starpu_dgesdd_(char *jobz, integer *m, integer *n, doublereal * + a, integer *lda, doublereal *s, doublereal *u, integer *ldu, + doublereal *vt, integer *ldvt, doublereal *work, integer *lwork, + integer *iwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1, + i__2, i__3; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + integer i__, ie, il, ir, iu, blk; + doublereal dum[1], eps; + integer ivt, iscl; + doublereal anrm; + integer idum[1], ierr, itau; + extern /* Subroutine */ int _starpu_dgemm_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *); + extern logical _starpu_lsame_(char *, char *); + integer chunk, minmn, wrkbl, itaup, itauq, mnthr; + logical wntqa; + integer nwork; + logical wntqn, wntqo, wntqs; + extern /* Subroutine */ int _starpu_dbdsdc_(char *, char *, integer *, doublereal + *, doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, integer *, integer *), _starpu_dgebrd_(integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, integer *, integer *); + extern doublereal _starpu_dlamch_(char *), _starpu_dlange_(char *, integer *, + integer *, doublereal *, integer *, doublereal *); + integer bdspac; + extern /* Subroutine */ int _starpu_dgelqf_(integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, integer *), + _starpu_dlascl_(char *, integer *, integer *, doublereal *, doublereal *, + integer *, integer *, doublereal *, integer *, integer *), + _starpu_dgeqrf_(integer *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *, integer *), _starpu_dlacpy_(char *, + integer *, integer *, doublereal *, integer *, doublereal *, + integer *), _starpu_dlaset_(char *, integer *, integer *, + doublereal *, doublereal *, doublereal *, integer *), + _starpu_xerbla_(char *, integer *), _starpu_dorgbr_(char *, integer *, + integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, integer *); + extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *); + doublereal bignum; + extern /* Subroutine */ int _starpu_dormbr_(char *, char *, char *, integer *, + integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, integer *), _starpu_dorglq_(integer *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + integer *), _starpu_dorgqr_(integer *, integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, integer *); + integer ldwrkl, ldwrkr, minwrk, ldwrku, maxwrk, ldwkvt; + doublereal smlnum; + logical wntqas, lquery; + + +/* -- LAPACK driver routine (version 3.2.1) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* March 2009 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DGESDD computes the singular value decomposition (SVD) of a real */ +/* M-by-N matrix A, optionally computing the left and right singular */ +/* vectors. If singular vectors are desired, it uses a */ +/* divide-and-conquer algorithm. */ + +/* The SVD is written */ + +/* A = U * SIGMA * transpose(V) */ + +/* where SIGMA is an M-by-N matrix which is zero except for its */ +/* min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and */ +/* V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA */ +/* are the singular values of A; they are real and non-negative, and */ +/* are returned in descending order. The first min(m,n) columns of */ +/* U and V are the left and right singular vectors of A. */ + +/* Note that the routine returns VT = V**T, not V. */ + +/* The divide and conquer algorithm makes very mild assumptions about */ +/* floating point arithmetic. It will work on machines with a guard */ +/* digit in add/subtract, or on those binary machines without guard */ +/* digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or */ +/* Cray-2. It could conceivably fail on hexadecimal or decimal machines */ +/* without guard digits, but we know of none. */ + +/* Arguments */ +/* ========= */ + +/* JOBZ (input) CHARACTER*1 */ +/* Specifies options for computing all or part of the matrix U: */ +/* = 'A': all M columns of U and all N rows of V**T are */ +/* returned in the arrays U and VT; */ +/* = 'S': the first min(M,N) columns of U and the first */ +/* min(M,N) rows of V**T are returned in the arrays U */ +/* and VT; */ +/* = 'O': If M >= N, the first N columns of U are overwritten */ +/* on the array A and all rows of V**T are returned in */ +/* the array VT; */ +/* otherwise, all columns of U are returned in the */ +/* array U and the first M rows of V**T are overwritten */ +/* in the array A; */ +/* = 'N': no columns of U or rows of V**T are computed. */ + +/* M (input) INTEGER */ +/* The number of rows of the input matrix A. M >= 0. */ + +/* N (input) INTEGER */ +/* The number of columns of the input matrix A. N >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the M-by-N matrix A. */ +/* On exit, */ +/* if JOBZ = 'O', A is overwritten with the first N columns */ +/* of U (the left singular vectors, stored */ +/* columnwise) if M >= N; */ +/* A is overwritten with the first M rows */ +/* of V**T (the right singular vectors, stored */ +/* rowwise) otherwise. */ +/* if JOBZ .ne. 'O', the contents of A are destroyed. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,M). */ + +/* S (output) DOUBLE PRECISION array, dimension (min(M,N)) */ +/* The singular values of A, sorted so that S(i) >= S(i+1). */ + +/* U (output) DOUBLE PRECISION array, dimension (LDU,UCOL) */ +/* UCOL = M if JOBZ = 'A' or JOBZ = 'O' and M < N; */ +/* UCOL = min(M,N) if JOBZ = 'S'. */ +/* If JOBZ = 'A' or JOBZ = 'O' and M < N, U contains the M-by-M */ +/* orthogonal matrix U; */ +/* if JOBZ = 'S', U contains the first min(M,N) columns of U */ +/* (the left singular vectors, stored columnwise); */ +/* if JOBZ = 'O' and M >= N, or JOBZ = 'N', U is not referenced. */ + +/* LDU (input) INTEGER */ +/* The leading dimension of the array U. LDU >= 1; if */ +/* JOBZ = 'S' or 'A' or JOBZ = 'O' and M < N, LDU >= M. */ + +/* VT (output) DOUBLE PRECISION array, dimension (LDVT,N) */ +/* If JOBZ = 'A' or JOBZ = 'O' and M >= N, VT contains the */ +/* N-by-N orthogonal matrix V**T; */ +/* if JOBZ = 'S', VT contains the first min(M,N) rows of */ +/* V**T (the right singular vectors, stored rowwise); */ +/* if JOBZ = 'O' and M < N, or JOBZ = 'N', VT is not referenced. */ + +/* LDVT (input) INTEGER */ +/* The leading dimension of the array VT. LDVT >= 1; if */ +/* JOBZ = 'A' or JOBZ = 'O' and M >= N, LDVT >= N; */ +/* if JOBZ = 'S', LDVT >= min(M,N). */ + +/* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* On exit, if INFO = 0, WORK(1) returns the optimal LWORK; */ + +/* LWORK (input) INTEGER */ +/* The dimension of the array WORK. LWORK >= 1. */ +/* If JOBZ = 'N', */ +/* LWORK >= 3*min(M,N) + max(max(M,N),7*min(M,N)). */ +/* If JOBZ = 'O', */ +/* LWORK >= 3*min(M,N) + */ +/* max(max(M,N),5*min(M,N)*min(M,N)+4*min(M,N)). */ +/* If JOBZ = 'S' or 'A' */ +/* LWORK >= 3*min(M,N) + */ +/* max(max(M,N),4*min(M,N)*min(M,N)+4*min(M,N)). */ +/* For good performance, LWORK should generally be larger. */ +/* If LWORK = -1 but other input arguments are legal, WORK(1) */ +/* returns the optimal LWORK. */ + +/* IWORK (workspace) INTEGER array, dimension (8*min(M,N)) */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit. */ +/* < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > 0: DBDSDC did not converge, updating process failed. */ + +/* Further Details */ +/* =============== */ + +/* Based on contributions by */ +/* Ming Gu and Huan Ren, Computer Science Division, University of */ +/* California at Berkeley, USA */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --s; + u_dim1 = *ldu; + u_offset = 1 + u_dim1; + u -= u_offset; + vt_dim1 = *ldvt; + vt_offset = 1 + vt_dim1; + vt -= vt_offset; + --work; + --iwork; + + /* Function Body */ + *info = 0; + minmn = min(*m,*n); + wntqa = _starpu_lsame_(jobz, "A"); + wntqs = _starpu_lsame_(jobz, "S"); + wntqas = wntqa || wntqs; + wntqo = _starpu_lsame_(jobz, "O"); + wntqn = _starpu_lsame_(jobz, "N"); + lquery = *lwork == -1; + + if (! (wntqa || wntqs || wntqo || wntqn)) { + *info = -1; + } else if (*m < 0) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < max(1,*m)) { + *info = -5; + } else if (*ldu < 1 || wntqas && *ldu < *m || wntqo && *m < *n && *ldu < * + m) { + *info = -8; + } else if (*ldvt < 1 || wntqa && *ldvt < *n || wntqs && *ldvt < minmn || + wntqo && *m >= *n && *ldvt < *n) { + *info = -10; + } + +/* Compute workspace */ +/* (Note: Comments in the code beginning "Workspace:" describe the */ +/* minimal amount of workspace needed at that point in the code, */ +/* as well as the preferred amount for good performance. */ +/* NB refers to the optimal block size for the immediately */ +/* following subroutine, as returned by ILAENV.) */ + + if (*info == 0) { + minwrk = 1; + maxwrk = 1; + if (*m >= *n && minmn > 0) { + +/* Compute space needed for DBDSDC */ + + mnthr = (integer) (minmn * 11. / 6.); + if (wntqn) { + bdspac = *n * 7; + } else { + bdspac = *n * 3 * *n + (*n << 2); + } + if (*m >= mnthr) { + if (wntqn) { + +/* Path 1 (M much larger than N, JOBZ='N') */ + + wrkbl = *n + *n * _starpu_ilaenv_(&c__1, "DGEQRF", " ", m, n, & + c_n1, &c_n1); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + (*n << 1) * _starpu_ilaenv_(&c__1, + "DGEBRD", " ", n, n, &c_n1, &c_n1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = bdspac + *n; + maxwrk = max(i__1,i__2); + minwrk = bdspac + *n; + } else if (wntqo) { + +/* Path 2 (M much larger than N, JOBZ='O') */ + + wrkbl = *n + *n * _starpu_ilaenv_(&c__1, "DGEQRF", " ", m, n, & + c_n1, &c_n1); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n + *n * _starpu_ilaenv_(&c__1, "DORGQR", + " ", m, n, n, &c_n1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + (*n << 1) * _starpu_ilaenv_(&c__1, + "DGEBRD", " ", n, n, &c_n1, &c_n1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + *n * _starpu_ilaenv_(&c__1, "DORMBR" +, "QLN", n, n, n, &c_n1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + *n * _starpu_ilaenv_(&c__1, "DORMBR" +, "PRT", n, n, n, &c_n1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = bdspac + *n * 3; + wrkbl = max(i__1,i__2); + maxwrk = wrkbl + (*n << 1) * *n; + minwrk = bdspac + (*n << 1) * *n + *n * 3; + } else if (wntqs) { + +/* Path 3 (M much larger than N, JOBZ='S') */ + + wrkbl = *n + *n * _starpu_ilaenv_(&c__1, "DGEQRF", " ", m, n, & + c_n1, &c_n1); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n + *n * _starpu_ilaenv_(&c__1, "DORGQR", + " ", m, n, n, &c_n1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + (*n << 1) * _starpu_ilaenv_(&c__1, + "DGEBRD", " ", n, n, &c_n1, &c_n1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + *n * _starpu_ilaenv_(&c__1, "DORMBR" +, "QLN", n, n, n, &c_n1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + *n * _starpu_ilaenv_(&c__1, "DORMBR" +, "PRT", n, n, n, &c_n1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = bdspac + *n * 3; + wrkbl = max(i__1,i__2); + maxwrk = wrkbl + *n * *n; + minwrk = bdspac + *n * *n + *n * 3; + } else if (wntqa) { + +/* Path 4 (M much larger than N, JOBZ='A') */ + + wrkbl = *n + *n * _starpu_ilaenv_(&c__1, "DGEQRF", " ", m, n, & + c_n1, &c_n1); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n + *m * _starpu_ilaenv_(&c__1, "DORGQR", + " ", m, m, n, &c_n1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + (*n << 1) * _starpu_ilaenv_(&c__1, + "DGEBRD", " ", n, n, &c_n1, &c_n1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + *n * _starpu_ilaenv_(&c__1, "DORMBR" +, "QLN", n, n, n, &c_n1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + *n * _starpu_ilaenv_(&c__1, "DORMBR" +, "PRT", n, n, n, &c_n1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = bdspac + *n * 3; + wrkbl = max(i__1,i__2); + maxwrk = wrkbl + *n * *n; + minwrk = bdspac + *n * *n + *n * 3; + } + } else { + +/* Path 5 (M at least N, but not much larger) */ + + wrkbl = *n * 3 + (*m + *n) * _starpu_ilaenv_(&c__1, "DGEBRD", " ", m, + n, &c_n1, &c_n1); + if (wntqn) { +/* Computing MAX */ + i__1 = wrkbl, i__2 = bdspac + *n * 3; + maxwrk = max(i__1,i__2); + minwrk = *n * 3 + max(*m,bdspac); + } else if (wntqo) { +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + *n * _starpu_ilaenv_(&c__1, "DORMBR" +, "QLN", m, n, n, &c_n1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + *n * _starpu_ilaenv_(&c__1, "DORMBR" +, "PRT", n, n, n, &c_n1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = bdspac + *n * 3; + wrkbl = max(i__1,i__2); + maxwrk = wrkbl + *m * *n; +/* Computing MAX */ + i__1 = *m, i__2 = *n * *n + bdspac; + minwrk = *n * 3 + max(i__1,i__2); + } else if (wntqs) { +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + *n * _starpu_ilaenv_(&c__1, "DORMBR" +, "QLN", m, n, n, &c_n1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + *n * _starpu_ilaenv_(&c__1, "DORMBR" +, "PRT", n, n, n, &c_n1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = bdspac + *n * 3; + maxwrk = max(i__1,i__2); + minwrk = *n * 3 + max(*m,bdspac); + } else if (wntqa) { +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + *m * _starpu_ilaenv_(&c__1, "DORMBR" +, "QLN", m, m, n, &c_n1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + *n * _starpu_ilaenv_(&c__1, "DORMBR" +, "PRT", n, n, n, &c_n1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = bdspac + *n * 3; + maxwrk = max(i__1,i__2); + minwrk = *n * 3 + max(*m,bdspac); + } + } + } else if (minmn > 0) { + +/* Compute space needed for DBDSDC */ + + mnthr = (integer) (minmn * 11. / 6.); + if (wntqn) { + bdspac = *m * 7; + } else { + bdspac = *m * 3 * *m + (*m << 2); + } + if (*n >= mnthr) { + if (wntqn) { + +/* Path 1t (N much larger than M, JOBZ='N') */ + + wrkbl = *m + *m * _starpu_ilaenv_(&c__1, "DGELQF", " ", m, n, & + c_n1, &c_n1); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + (*m << 1) * _starpu_ilaenv_(&c__1, + "DGEBRD", " ", m, m, &c_n1, &c_n1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = bdspac + *m; + maxwrk = max(i__1,i__2); + minwrk = bdspac + *m; + } else if (wntqo) { + +/* Path 2t (N much larger than M, JOBZ='O') */ + + wrkbl = *m + *m * _starpu_ilaenv_(&c__1, "DGELQF", " ", m, n, & + c_n1, &c_n1); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m + *m * _starpu_ilaenv_(&c__1, "DORGLQ", + " ", m, n, m, &c_n1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + (*m << 1) * _starpu_ilaenv_(&c__1, + "DGEBRD", " ", m, m, &c_n1, &c_n1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + *m * _starpu_ilaenv_(&c__1, "DORMBR" +, "QLN", m, m, m, &c_n1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + *m * _starpu_ilaenv_(&c__1, "DORMBR" +, "PRT", m, m, m, &c_n1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = bdspac + *m * 3; + wrkbl = max(i__1,i__2); + maxwrk = wrkbl + (*m << 1) * *m; + minwrk = bdspac + (*m << 1) * *m + *m * 3; + } else if (wntqs) { + +/* Path 3t (N much larger than M, JOBZ='S') */ + + wrkbl = *m + *m * _starpu_ilaenv_(&c__1, "DGELQF", " ", m, n, & + c_n1, &c_n1); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m + *m * _starpu_ilaenv_(&c__1, "DORGLQ", + " ", m, n, m, &c_n1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + (*m << 1) * _starpu_ilaenv_(&c__1, + "DGEBRD", " ", m, m, &c_n1, &c_n1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + *m * _starpu_ilaenv_(&c__1, "DORMBR" +, "QLN", m, m, m, &c_n1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + *m * _starpu_ilaenv_(&c__1, "DORMBR" +, "PRT", m, m, m, &c_n1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = bdspac + *m * 3; + wrkbl = max(i__1,i__2); + maxwrk = wrkbl + *m * *m; + minwrk = bdspac + *m * *m + *m * 3; + } else if (wntqa) { + +/* Path 4t (N much larger than M, JOBZ='A') */ + + wrkbl = *m + *m * _starpu_ilaenv_(&c__1, "DGELQF", " ", m, n, & + c_n1, &c_n1); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m + *n * _starpu_ilaenv_(&c__1, "DORGLQ", + " ", n, n, m, &c_n1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + (*m << 1) * _starpu_ilaenv_(&c__1, + "DGEBRD", " ", m, m, &c_n1, &c_n1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + *m * _starpu_ilaenv_(&c__1, "DORMBR" +, "QLN", m, m, m, &c_n1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + *m * _starpu_ilaenv_(&c__1, "DORMBR" +, "PRT", m, m, m, &c_n1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = bdspac + *m * 3; + wrkbl = max(i__1,i__2); + maxwrk = wrkbl + *m * *m; + minwrk = bdspac + *m * *m + *m * 3; + } + } else { + +/* Path 5t (N greater than M, but not much larger) */ + + wrkbl = *m * 3 + (*m + *n) * _starpu_ilaenv_(&c__1, "DGEBRD", " ", m, + n, &c_n1, &c_n1); + if (wntqn) { +/* Computing MAX */ + i__1 = wrkbl, i__2 = bdspac + *m * 3; + maxwrk = max(i__1,i__2); + minwrk = *m * 3 + max(*n,bdspac); + } else if (wntqo) { +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + *m * _starpu_ilaenv_(&c__1, "DORMBR" +, "QLN", m, m, n, &c_n1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + *m * _starpu_ilaenv_(&c__1, "DORMBR" +, "PRT", m, n, m, &c_n1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = bdspac + *m * 3; + wrkbl = max(i__1,i__2); + maxwrk = wrkbl + *m * *n; +/* Computing MAX */ + i__1 = *n, i__2 = *m * *m + bdspac; + minwrk = *m * 3 + max(i__1,i__2); + } else if (wntqs) { +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + *m * _starpu_ilaenv_(&c__1, "DORMBR" +, "QLN", m, m, n, &c_n1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + *m * _starpu_ilaenv_(&c__1, "DORMBR" +, "PRT", m, n, m, &c_n1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = bdspac + *m * 3; + maxwrk = max(i__1,i__2); + minwrk = *m * 3 + max(*n,bdspac); + } else if (wntqa) { +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + *m * _starpu_ilaenv_(&c__1, "DORMBR" +, "QLN", m, m, n, &c_n1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + *m * _starpu_ilaenv_(&c__1, "DORMBR" +, "PRT", n, n, m, &c_n1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = bdspac + *m * 3; + maxwrk = max(i__1,i__2); + minwrk = *m * 3 + max(*n,bdspac); + } + } + } + maxwrk = max(maxwrk,minwrk); + work[1] = (doublereal) maxwrk; + + if (*lwork < minwrk && ! lquery) { + *info = -12; + } + } + + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DGESDD", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + return 0; + } + +/* Get machine constants */ + + eps = _starpu_dlamch_("P"); + smlnum = sqrt(_starpu_dlamch_("S")) / eps; + bignum = 1. / smlnum; + +/* Scale A if max element outside range [SMLNUM,BIGNUM] */ + + anrm = _starpu_dlange_("M", m, n, &a[a_offset], lda, dum); + iscl = 0; + if (anrm > 0. && anrm < smlnum) { + iscl = 1; + _starpu_dlascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, & + ierr); + } else if (anrm > bignum) { + iscl = 1; + _starpu_dlascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, & + ierr); + } + + if (*m >= *n) { + +/* A has at least as many rows as columns. If A has sufficiently */ +/* more rows than columns, first reduce using the QR */ +/* decomposition (if sufficient workspace available) */ + + if (*m >= mnthr) { + + if (wntqn) { + +/* Path 1 (M much larger than N, JOBZ='N') */ +/* No singular vectors to be computed */ + + itau = 1; + nwork = itau + *n; + +/* Compute A=Q*R */ +/* (Workspace: need 2*N, prefer N+N*NB) */ + + i__1 = *lwork - nwork + 1; + _starpu_dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__1, &ierr); + +/* Zero out below R */ + + i__1 = *n - 1; + i__2 = *n - 1; + _starpu_dlaset_("L", &i__1, &i__2, &c_b227, &c_b227, &a[a_dim1 + 2], + lda); + ie = 1; + itauq = ie + *n; + itaup = itauq + *n; + nwork = itaup + *n; + +/* Bidiagonalize R in A */ +/* (Workspace: need 4*N, prefer 3*N+2*N*NB) */ + + i__1 = *lwork - nwork + 1; + _starpu_dgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__1, &ierr); + nwork = ie + *n; + +/* Perform bidiagonal SVD, computing singular values only */ +/* (Workspace: need N+BDSPAC) */ + + _starpu_dbdsdc_("U", "N", n, &s[1], &work[ie], dum, &c__1, dum, &c__1, + dum, idum, &work[nwork], &iwork[1], info); + + } else if (wntqo) { + +/* Path 2 (M much larger than N, JOBZ = 'O') */ +/* N left singular vectors to be overwritten on A and */ +/* N right singular vectors to be computed in VT */ + + ir = 1; + +/* WORK(IR) is LDWRKR by N */ + + if (*lwork >= *lda * *n + *n * *n + *n * 3 + bdspac) { + ldwrkr = *lda; + } else { + ldwrkr = (*lwork - *n * *n - *n * 3 - bdspac) / *n; + } + itau = ir + ldwrkr * *n; + nwork = itau + *n; + +/* Compute A=Q*R */ +/* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */ + + i__1 = *lwork - nwork + 1; + _starpu_dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__1, &ierr); + +/* Copy R to WORK(IR), zeroing out below it */ + + _starpu_dlacpy_("U", n, n, &a[a_offset], lda, &work[ir], &ldwrkr); + i__1 = *n - 1; + i__2 = *n - 1; + _starpu_dlaset_("L", &i__1, &i__2, &c_b227, &c_b227, &work[ir + 1], & + ldwrkr); + +/* Generate Q in A */ +/* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */ + + i__1 = *lwork - nwork + 1; + _starpu_dorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[nwork], + &i__1, &ierr); + ie = itau; + itauq = ie + *n; + itaup = itauq + *n; + nwork = itaup + *n; + +/* Bidiagonalize R in VT, copying result to WORK(IR) */ +/* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) */ + + i__1 = *lwork - nwork + 1; + _starpu_dgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &work[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__1, &ierr); + +/* WORK(IU) is N by N */ + + iu = nwork; + nwork = iu + *n * *n; + +/* Perform bidiagonal SVD, computing left singular vectors */ +/* of bidiagonal matrix in WORK(IU) and computing right */ +/* singular vectors of bidiagonal matrix in VT */ +/* (Workspace: need N+N*N+BDSPAC) */ + + _starpu_dbdsdc_("U", "I", n, &s[1], &work[ie], &work[iu], n, &vt[ + vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], + info); + +/* Overwrite WORK(IU) by left singular vectors of R */ +/* and VT by right singular vectors of R */ +/* (Workspace: need 2*N*N+3*N, prefer 2*N*N+2*N+N*NB) */ + + i__1 = *lwork - nwork + 1; + _starpu_dormbr_("Q", "L", "N", n, n, n, &work[ir], &ldwrkr, &work[ + itauq], &work[iu], n, &work[nwork], &i__1, &ierr); + i__1 = *lwork - nwork + 1; + _starpu_dormbr_("P", "R", "T", n, n, n, &work[ir], &ldwrkr, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, & + ierr); + +/* Multiply Q in A by left singular vectors of R in */ +/* WORK(IU), storing result in WORK(IR) and copying to A */ +/* (Workspace: need 2*N*N, prefer N*N+M*N) */ + + i__1 = *m; + i__2 = ldwrkr; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += + i__2) { +/* Computing MIN */ + i__3 = *m - i__ + 1; + chunk = min(i__3,ldwrkr); + _starpu_dgemm_("N", "N", &chunk, n, n, &c_b248, &a[i__ + a_dim1], + lda, &work[iu], n, &c_b227, &work[ir], &ldwrkr); + _starpu_dlacpy_("F", &chunk, n, &work[ir], &ldwrkr, &a[i__ + + a_dim1], lda); +/* L10: */ + } + + } else if (wntqs) { + +/* Path 3 (M much larger than N, JOBZ='S') */ +/* N left singular vectors to be computed in U and */ +/* N right singular vectors to be computed in VT */ + + ir = 1; + +/* WORK(IR) is N by N */ + + ldwrkr = *n; + itau = ir + ldwrkr * *n; + nwork = itau + *n; + +/* Compute A=Q*R */ +/* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */ + + i__2 = *lwork - nwork + 1; + _starpu_dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__2, &ierr); + +/* Copy R to WORK(IR), zeroing out below it */ + + _starpu_dlacpy_("U", n, n, &a[a_offset], lda, &work[ir], &ldwrkr); + i__2 = *n - 1; + i__1 = *n - 1; + _starpu_dlaset_("L", &i__2, &i__1, &c_b227, &c_b227, &work[ir + 1], & + ldwrkr); + +/* Generate Q in A */ +/* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */ + + i__2 = *lwork - nwork + 1; + _starpu_dorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[nwork], + &i__2, &ierr); + ie = itau; + itauq = ie + *n; + itaup = itauq + *n; + nwork = itaup + *n; + +/* Bidiagonalize R in WORK(IR) */ +/* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) */ + + i__2 = *lwork - nwork + 1; + _starpu_dgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &work[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__2, &ierr); + +/* Perform bidiagonal SVD, computing left singular vectors */ +/* of bidiagoal matrix in U and computing right singular */ +/* vectors of bidiagonal matrix in VT */ +/* (Workspace: need N+BDSPAC) */ + + _starpu_dbdsdc_("U", "I", n, &s[1], &work[ie], &u[u_offset], ldu, &vt[ + vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], + info); + +/* Overwrite U by left singular vectors of R and VT */ +/* by right singular vectors of R */ +/* (Workspace: need N*N+3*N, prefer N*N+2*N+N*NB) */ + + i__2 = *lwork - nwork + 1; + _starpu_dormbr_("Q", "L", "N", n, n, n, &work[ir], &ldwrkr, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr); + + i__2 = *lwork - nwork + 1; + _starpu_dormbr_("P", "R", "T", n, n, n, &work[ir], &ldwrkr, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, & + ierr); + +/* Multiply Q in A by left singular vectors of R in */ +/* WORK(IR), storing result in U */ +/* (Workspace: need N*N) */ + + _starpu_dlacpy_("F", n, n, &u[u_offset], ldu, &work[ir], &ldwrkr); + _starpu_dgemm_("N", "N", m, n, n, &c_b248, &a[a_offset], lda, &work[ + ir], &ldwrkr, &c_b227, &u[u_offset], ldu); + + } else if (wntqa) { + +/* Path 4 (M much larger than N, JOBZ='A') */ +/* M left singular vectors to be computed in U and */ +/* N right singular vectors to be computed in VT */ + + iu = 1; + +/* WORK(IU) is N by N */ + + ldwrku = *n; + itau = iu + ldwrku * *n; + nwork = itau + *n; + +/* Compute A=Q*R, copying result to U */ +/* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */ + + i__2 = *lwork - nwork + 1; + _starpu_dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__2, &ierr); + _starpu_dlacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], ldu); + +/* Generate Q in U */ +/* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */ + i__2 = *lwork - nwork + 1; + _starpu_dorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], &work[nwork], + &i__2, &ierr); + +/* Produce R in A, zeroing out other entries */ + + i__2 = *n - 1; + i__1 = *n - 1; + _starpu_dlaset_("L", &i__2, &i__1, &c_b227, &c_b227, &a[a_dim1 + 2], + lda); + ie = itau; + itauq = ie + *n; + itaup = itauq + *n; + nwork = itaup + *n; + +/* Bidiagonalize R in A */ +/* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) */ + + i__2 = *lwork - nwork + 1; + _starpu_dgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__2, &ierr); + +/* Perform bidiagonal SVD, computing left singular vectors */ +/* of bidiagonal matrix in WORK(IU) and computing right */ +/* singular vectors of bidiagonal matrix in VT */ +/* (Workspace: need N+N*N+BDSPAC) */ + + _starpu_dbdsdc_("U", "I", n, &s[1], &work[ie], &work[iu], n, &vt[ + vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], + info); + +/* Overwrite WORK(IU) by left singular vectors of R and VT */ +/* by right singular vectors of R */ +/* (Workspace: need N*N+3*N, prefer N*N+2*N+N*NB) */ + + i__2 = *lwork - nwork + 1; + _starpu_dormbr_("Q", "L", "N", n, n, n, &a[a_offset], lda, &work[ + itauq], &work[iu], &ldwrku, &work[nwork], &i__2, & + ierr); + i__2 = *lwork - nwork + 1; + _starpu_dormbr_("P", "R", "T", n, n, n, &a[a_offset], lda, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, & + ierr); + +/* Multiply Q in U by left singular vectors of R in */ +/* WORK(IU), storing result in A */ +/* (Workspace: need N*N) */ + + _starpu_dgemm_("N", "N", m, n, n, &c_b248, &u[u_offset], ldu, &work[ + iu], &ldwrku, &c_b227, &a[a_offset], lda); + +/* Copy left singular vectors of A from A to U */ + + _starpu_dlacpy_("F", m, n, &a[a_offset], lda, &u[u_offset], ldu); + + } + + } else { + +/* M .LT. MNTHR */ + +/* Path 5 (M at least N, but not much larger) */ +/* Reduce to bidiagonal form without QR decomposition */ + + ie = 1; + itauq = ie + *n; + itaup = itauq + *n; + nwork = itaup + *n; + +/* Bidiagonalize A */ +/* (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB) */ + + i__2 = *lwork - nwork + 1; + _starpu_dgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], & + work[itaup], &work[nwork], &i__2, &ierr); + if (wntqn) { + +/* Perform bidiagonal SVD, only computing singular values */ +/* (Workspace: need N+BDSPAC) */ + + _starpu_dbdsdc_("U", "N", n, &s[1], &work[ie], dum, &c__1, dum, &c__1, + dum, idum, &work[nwork], &iwork[1], info); + } else if (wntqo) { + iu = nwork; + if (*lwork >= *m * *n + *n * 3 + bdspac) { + +/* WORK( IU ) is M by N */ + + ldwrku = *m; + nwork = iu + ldwrku * *n; + _starpu_dlaset_("F", m, n, &c_b227, &c_b227, &work[iu], &ldwrku); + } else { + +/* WORK( IU ) is N by N */ + + ldwrku = *n; + nwork = iu + ldwrku * *n; + +/* WORK(IR) is LDWRKR by N */ + + ir = nwork; + ldwrkr = (*lwork - *n * *n - *n * 3) / *n; + } + nwork = iu + ldwrku * *n; + +/* Perform bidiagonal SVD, computing left singular vectors */ +/* of bidiagonal matrix in WORK(IU) and computing right */ +/* singular vectors of bidiagonal matrix in VT */ +/* (Workspace: need N+N*N+BDSPAC) */ + + _starpu_dbdsdc_("U", "I", n, &s[1], &work[ie], &work[iu], &ldwrku, & + vt[vt_offset], ldvt, dum, idum, &work[nwork], &iwork[ + 1], info); + +/* Overwrite VT by right singular vectors of A */ +/* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */ + + i__2 = *lwork - nwork + 1; + _starpu_dormbr_("P", "R", "T", n, n, n, &a[a_offset], lda, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, & + ierr); + + if (*lwork >= *m * *n + *n * 3 + bdspac) { + +/* Overwrite WORK(IU) by left singular vectors of A */ +/* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */ + + i__2 = *lwork - nwork + 1; + _starpu_dormbr_("Q", "L", "N", m, n, n, &a[a_offset], lda, &work[ + itauq], &work[iu], &ldwrku, &work[nwork], &i__2, & + ierr); + +/* Copy left singular vectors of A from WORK(IU) to A */ + + _starpu_dlacpy_("F", m, n, &work[iu], &ldwrku, &a[a_offset], lda); + } else { + +/* Generate Q in A */ +/* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */ + + i__2 = *lwork - nwork + 1; + _starpu_dorgbr_("Q", m, n, n, &a[a_offset], lda, &work[itauq], & + work[nwork], &i__2, &ierr); + +/* Multiply Q in A by left singular vectors of */ +/* bidiagonal matrix in WORK(IU), storing result in */ +/* WORK(IR) and copying to A */ +/* (Workspace: need 2*N*N, prefer N*N+M*N) */ + + i__2 = *m; + i__1 = ldwrkr; + for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += + i__1) { +/* Computing MIN */ + i__3 = *m - i__ + 1; + chunk = min(i__3,ldwrkr); + _starpu_dgemm_("N", "N", &chunk, n, n, &c_b248, &a[i__ + + a_dim1], lda, &work[iu], &ldwrku, &c_b227, & + work[ir], &ldwrkr); + _starpu_dlacpy_("F", &chunk, n, &work[ir], &ldwrkr, &a[i__ + + a_dim1], lda); +/* L20: */ + } + } + + } else if (wntqs) { + +/* Perform bidiagonal SVD, computing left singular vectors */ +/* of bidiagonal matrix in U and computing right singular */ +/* vectors of bidiagonal matrix in VT */ +/* (Workspace: need N+BDSPAC) */ + + _starpu_dlaset_("F", m, n, &c_b227, &c_b227, &u[u_offset], ldu); + _starpu_dbdsdc_("U", "I", n, &s[1], &work[ie], &u[u_offset], ldu, &vt[ + vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], + info); + +/* Overwrite U by left singular vectors of A and VT */ +/* by right singular vectors of A */ +/* (Workspace: need 3*N, prefer 2*N+N*NB) */ + + i__1 = *lwork - nwork + 1; + _starpu_dormbr_("Q", "L", "N", m, n, n, &a[a_offset], lda, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr); + i__1 = *lwork - nwork + 1; + _starpu_dormbr_("P", "R", "T", n, n, n, &a[a_offset], lda, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, & + ierr); + } else if (wntqa) { + +/* Perform bidiagonal SVD, computing left singular vectors */ +/* of bidiagonal matrix in U and computing right singular */ +/* vectors of bidiagonal matrix in VT */ +/* (Workspace: need N+BDSPAC) */ + + _starpu_dlaset_("F", m, m, &c_b227, &c_b227, &u[u_offset], ldu); + _starpu_dbdsdc_("U", "I", n, &s[1], &work[ie], &u[u_offset], ldu, &vt[ + vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], + info); + +/* Set the right corner of U to identity matrix */ + + if (*m > *n) { + i__1 = *m - *n; + i__2 = *m - *n; + _starpu_dlaset_("F", &i__1, &i__2, &c_b227, &c_b248, &u[*n + 1 + ( + *n + 1) * u_dim1], ldu); + } + +/* Overwrite U by left singular vectors of A and VT */ +/* by right singular vectors of A */ +/* (Workspace: need N*N+2*N+M, prefer N*N+2*N+M*NB) */ + + i__1 = *lwork - nwork + 1; + _starpu_dormbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr); + i__1 = *lwork - nwork + 1; + _starpu_dormbr_("P", "R", "T", n, n, m, &a[a_offset], lda, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, & + ierr); + } + + } + + } else { + +/* A has more columns than rows. If A has sufficiently more */ +/* columns than rows, first reduce using the LQ decomposition (if */ +/* sufficient workspace available) */ + + if (*n >= mnthr) { + + if (wntqn) { + +/* Path 1t (N much larger than M, JOBZ='N') */ +/* No singular vectors to be computed */ + + itau = 1; + nwork = itau + *m; + +/* Compute A=L*Q */ +/* (Workspace: need 2*M, prefer M+M*NB) */ + + i__1 = *lwork - nwork + 1; + _starpu_dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__1, &ierr); + +/* Zero out above L */ + + i__1 = *m - 1; + i__2 = *m - 1; + _starpu_dlaset_("U", &i__1, &i__2, &c_b227, &c_b227, &a[(a_dim1 << 1) + + 1], lda); + ie = 1; + itauq = ie + *m; + itaup = itauq + *m; + nwork = itaup + *m; + +/* Bidiagonalize L in A */ +/* (Workspace: need 4*M, prefer 3*M+2*M*NB) */ + + i__1 = *lwork - nwork + 1; + _starpu_dgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__1, &ierr); + nwork = ie + *m; + +/* Perform bidiagonal SVD, computing singular values only */ +/* (Workspace: need M+BDSPAC) */ + + _starpu_dbdsdc_("U", "N", m, &s[1], &work[ie], dum, &c__1, dum, &c__1, + dum, idum, &work[nwork], &iwork[1], info); + + } else if (wntqo) { + +/* Path 2t (N much larger than M, JOBZ='O') */ +/* M right singular vectors to be overwritten on A and */ +/* M left singular vectors to be computed in U */ + + ivt = 1; + +/* IVT is M by M */ + + il = ivt + *m * *m; + if (*lwork >= *m * *n + *m * *m + *m * 3 + bdspac) { + +/* WORK(IL) is M by N */ + + ldwrkl = *m; + chunk = *n; + } else { + ldwrkl = *m; + chunk = (*lwork - *m * *m) / *m; + } + itau = il + ldwrkl * *m; + nwork = itau + *m; + +/* Compute A=L*Q */ +/* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */ + + i__1 = *lwork - nwork + 1; + _starpu_dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__1, &ierr); + +/* Copy L to WORK(IL), zeroing about above it */ + + _starpu_dlacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwrkl); + i__1 = *m - 1; + i__2 = *m - 1; + _starpu_dlaset_("U", &i__1, &i__2, &c_b227, &c_b227, &work[il + + ldwrkl], &ldwrkl); + +/* Generate Q in A */ +/* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */ + + i__1 = *lwork - nwork + 1; + _starpu_dorglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[nwork], + &i__1, &ierr); + ie = itau; + itauq = ie + *m; + itaup = itauq + *m; + nwork = itaup + *m; + +/* Bidiagonalize L in WORK(IL) */ +/* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) */ + + i__1 = *lwork - nwork + 1; + _starpu_dgebrd_(m, m, &work[il], &ldwrkl, &s[1], &work[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__1, &ierr); + +/* Perform bidiagonal SVD, computing left singular vectors */ +/* of bidiagonal matrix in U, and computing right singular */ +/* vectors of bidiagonal matrix in WORK(IVT) */ +/* (Workspace: need M+M*M+BDSPAC) */ + + _starpu_dbdsdc_("U", "I", m, &s[1], &work[ie], &u[u_offset], ldu, & + work[ivt], m, dum, idum, &work[nwork], &iwork[1], + info); + +/* Overwrite U by left singular vectors of L and WORK(IVT) */ +/* by right singular vectors of L */ +/* (Workspace: need 2*M*M+3*M, prefer 2*M*M+2*M+M*NB) */ + + i__1 = *lwork - nwork + 1; + _starpu_dormbr_("Q", "L", "N", m, m, m, &work[il], &ldwrkl, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr); + i__1 = *lwork - nwork + 1; + _starpu_dormbr_("P", "R", "T", m, m, m, &work[il], &ldwrkl, &work[ + itaup], &work[ivt], m, &work[nwork], &i__1, &ierr); + +/* Multiply right singular vectors of L in WORK(IVT) by Q */ +/* in A, storing result in WORK(IL) and copying to A */ +/* (Workspace: need 2*M*M, prefer M*M+M*N) */ + + i__1 = *n; + i__2 = chunk; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += + i__2) { +/* Computing MIN */ + i__3 = *n - i__ + 1; + blk = min(i__3,chunk); + _starpu_dgemm_("N", "N", m, &blk, m, &c_b248, &work[ivt], m, &a[ + i__ * a_dim1 + 1], lda, &c_b227, &work[il], & + ldwrkl); + _starpu_dlacpy_("F", m, &blk, &work[il], &ldwrkl, &a[i__ * a_dim1 + + 1], lda); +/* L30: */ + } + + } else if (wntqs) { + +/* Path 3t (N much larger than M, JOBZ='S') */ +/* M right singular vectors to be computed in VT and */ +/* M left singular vectors to be computed in U */ + + il = 1; + +/* WORK(IL) is M by M */ + + ldwrkl = *m; + itau = il + ldwrkl * *m; + nwork = itau + *m; + +/* Compute A=L*Q */ +/* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */ + + i__2 = *lwork - nwork + 1; + _starpu_dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__2, &ierr); + +/* Copy L to WORK(IL), zeroing out above it */ + + _starpu_dlacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwrkl); + i__2 = *m - 1; + i__1 = *m - 1; + _starpu_dlaset_("U", &i__2, &i__1, &c_b227, &c_b227, &work[il + + ldwrkl], &ldwrkl); + +/* Generate Q in A */ +/* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */ + + i__2 = *lwork - nwork + 1; + _starpu_dorglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[nwork], + &i__2, &ierr); + ie = itau; + itauq = ie + *m; + itaup = itauq + *m; + nwork = itaup + *m; + +/* Bidiagonalize L in WORK(IU), copying result to U */ +/* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) */ + + i__2 = *lwork - nwork + 1; + _starpu_dgebrd_(m, m, &work[il], &ldwrkl, &s[1], &work[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__2, &ierr); + +/* Perform bidiagonal SVD, computing left singular vectors */ +/* of bidiagonal matrix in U and computing right singular */ +/* vectors of bidiagonal matrix in VT */ +/* (Workspace: need M+BDSPAC) */ + + _starpu_dbdsdc_("U", "I", m, &s[1], &work[ie], &u[u_offset], ldu, &vt[ + vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], + info); + +/* Overwrite U by left singular vectors of L and VT */ +/* by right singular vectors of L */ +/* (Workspace: need M*M+3*M, prefer M*M+2*M+M*NB) */ + + i__2 = *lwork - nwork + 1; + _starpu_dormbr_("Q", "L", "N", m, m, m, &work[il], &ldwrkl, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr); + i__2 = *lwork - nwork + 1; + _starpu_dormbr_("P", "R", "T", m, m, m, &work[il], &ldwrkl, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, & + ierr); + +/* Multiply right singular vectors of L in WORK(IL) by */ +/* Q in A, storing result in VT */ +/* (Workspace: need M*M) */ + + _starpu_dlacpy_("F", m, m, &vt[vt_offset], ldvt, &work[il], &ldwrkl); + _starpu_dgemm_("N", "N", m, n, m, &c_b248, &work[il], &ldwrkl, &a[ + a_offset], lda, &c_b227, &vt[vt_offset], ldvt); + + } else if (wntqa) { + +/* Path 4t (N much larger than M, JOBZ='A') */ +/* N right singular vectors to be computed in VT and */ +/* M left singular vectors to be computed in U */ + + ivt = 1; + +/* WORK(IVT) is M by M */ + + ldwkvt = *m; + itau = ivt + ldwkvt * *m; + nwork = itau + *m; + +/* Compute A=L*Q, copying result to VT */ +/* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */ + + i__2 = *lwork - nwork + 1; + _starpu_dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__2, &ierr); + _starpu_dlacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt); + +/* Generate Q in VT */ +/* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */ + + i__2 = *lwork - nwork + 1; + _starpu_dorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], &work[ + nwork], &i__2, &ierr); + +/* Produce L in A, zeroing out other entries */ + + i__2 = *m - 1; + i__1 = *m - 1; + _starpu_dlaset_("U", &i__2, &i__1, &c_b227, &c_b227, &a[(a_dim1 << 1) + + 1], lda); + ie = itau; + itauq = ie + *m; + itaup = itauq + *m; + nwork = itaup + *m; + +/* Bidiagonalize L in A */ +/* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) */ + + i__2 = *lwork - nwork + 1; + _starpu_dgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__2, &ierr); + +/* Perform bidiagonal SVD, computing left singular vectors */ +/* of bidiagonal matrix in U and computing right singular */ +/* vectors of bidiagonal matrix in WORK(IVT) */ +/* (Workspace: need M+M*M+BDSPAC) */ + + _starpu_dbdsdc_("U", "I", m, &s[1], &work[ie], &u[u_offset], ldu, & + work[ivt], &ldwkvt, dum, idum, &work[nwork], &iwork[1] +, info); + +/* Overwrite U by left singular vectors of L and WORK(IVT) */ +/* by right singular vectors of L */ +/* (Workspace: need M*M+3*M, prefer M*M+2*M+M*NB) */ + + i__2 = *lwork - nwork + 1; + _starpu_dormbr_("Q", "L", "N", m, m, m, &a[a_offset], lda, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr); + i__2 = *lwork - nwork + 1; + _starpu_dormbr_("P", "R", "T", m, m, m, &a[a_offset], lda, &work[ + itaup], &work[ivt], &ldwkvt, &work[nwork], &i__2, & + ierr); + +/* Multiply right singular vectors of L in WORK(IVT) by */ +/* Q in VT, storing result in A */ +/* (Workspace: need M*M) */ + + _starpu_dgemm_("N", "N", m, n, m, &c_b248, &work[ivt], &ldwkvt, &vt[ + vt_offset], ldvt, &c_b227, &a[a_offset], lda); + +/* Copy right singular vectors of A from A to VT */ + + _starpu_dlacpy_("F", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt); + + } + + } else { + +/* N .LT. MNTHR */ + +/* Path 5t (N greater than M, but not much larger) */ +/* Reduce to bidiagonal form without LQ decomposition */ + + ie = 1; + itauq = ie + *m; + itaup = itauq + *m; + nwork = itaup + *m; + +/* Bidiagonalize A */ +/* (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB) */ + + i__2 = *lwork - nwork + 1; + _starpu_dgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], & + work[itaup], &work[nwork], &i__2, &ierr); + if (wntqn) { + +/* Perform bidiagonal SVD, only computing singular values */ +/* (Workspace: need M+BDSPAC) */ + + _starpu_dbdsdc_("L", "N", m, &s[1], &work[ie], dum, &c__1, dum, &c__1, + dum, idum, &work[nwork], &iwork[1], info); + } else if (wntqo) { + ldwkvt = *m; + ivt = nwork; + if (*lwork >= *m * *n + *m * 3 + bdspac) { + +/* WORK( IVT ) is M by N */ + + _starpu_dlaset_("F", m, n, &c_b227, &c_b227, &work[ivt], &ldwkvt); + nwork = ivt + ldwkvt * *n; + } else { + +/* WORK( IVT ) is M by M */ + + nwork = ivt + ldwkvt * *m; + il = nwork; + +/* WORK(IL) is M by CHUNK */ + + chunk = (*lwork - *m * *m - *m * 3) / *m; + } + +/* Perform bidiagonal SVD, computing left singular vectors */ +/* of bidiagonal matrix in U and computing right singular */ +/* vectors of bidiagonal matrix in WORK(IVT) */ +/* (Workspace: need M*M+BDSPAC) */ + + _starpu_dbdsdc_("L", "I", m, &s[1], &work[ie], &u[u_offset], ldu, & + work[ivt], &ldwkvt, dum, idum, &work[nwork], &iwork[1] +, info); + +/* Overwrite U by left singular vectors of A */ +/* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */ + + i__2 = *lwork - nwork + 1; + _starpu_dormbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr); + + if (*lwork >= *m * *n + *m * 3 + bdspac) { + +/* Overwrite WORK(IVT) by left singular vectors of A */ +/* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */ + + i__2 = *lwork - nwork + 1; + _starpu_dormbr_("P", "R", "T", m, n, m, &a[a_offset], lda, &work[ + itaup], &work[ivt], &ldwkvt, &work[nwork], &i__2, + &ierr); + +/* Copy right singular vectors of A from WORK(IVT) to A */ + + _starpu_dlacpy_("F", m, n, &work[ivt], &ldwkvt, &a[a_offset], lda); + } else { + +/* Generate P**T in A */ +/* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */ + + i__2 = *lwork - nwork + 1; + _starpu_dorgbr_("P", m, n, m, &a[a_offset], lda, &work[itaup], & + work[nwork], &i__2, &ierr); + +/* Multiply Q in A by right singular vectors of */ +/* bidiagonal matrix in WORK(IVT), storing result in */ +/* WORK(IL) and copying to A */ +/* (Workspace: need 2*M*M, prefer M*M+M*N) */ + + i__2 = *n; + i__1 = chunk; + for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += + i__1) { +/* Computing MIN */ + i__3 = *n - i__ + 1; + blk = min(i__3,chunk); + _starpu_dgemm_("N", "N", m, &blk, m, &c_b248, &work[ivt], & + ldwkvt, &a[i__ * a_dim1 + 1], lda, &c_b227, & + work[il], m); + _starpu_dlacpy_("F", m, &blk, &work[il], m, &a[i__ * a_dim1 + + 1], lda); +/* L40: */ + } + } + } else if (wntqs) { + +/* Perform bidiagonal SVD, computing left singular vectors */ +/* of bidiagonal matrix in U and computing right singular */ +/* vectors of bidiagonal matrix in VT */ +/* (Workspace: need M+BDSPAC) */ + + _starpu_dlaset_("F", m, n, &c_b227, &c_b227, &vt[vt_offset], ldvt); + _starpu_dbdsdc_("L", "I", m, &s[1], &work[ie], &u[u_offset], ldu, &vt[ + vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], + info); + +/* Overwrite U by left singular vectors of A and VT */ +/* by right singular vectors of A */ +/* (Workspace: need 3*M, prefer 2*M+M*NB) */ + + i__1 = *lwork - nwork + 1; + _starpu_dormbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr); + i__1 = *lwork - nwork + 1; + _starpu_dormbr_("P", "R", "T", m, n, m, &a[a_offset], lda, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, & + ierr); + } else if (wntqa) { + +/* Perform bidiagonal SVD, computing left singular vectors */ +/* of bidiagonal matrix in U and computing right singular */ +/* vectors of bidiagonal matrix in VT */ +/* (Workspace: need M+BDSPAC) */ + + _starpu_dlaset_("F", n, n, &c_b227, &c_b227, &vt[vt_offset], ldvt); + _starpu_dbdsdc_("L", "I", m, &s[1], &work[ie], &u[u_offset], ldu, &vt[ + vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], + info); + +/* Set the right corner of VT to identity matrix */ + + if (*n > *m) { + i__1 = *n - *m; + i__2 = *n - *m; + _starpu_dlaset_("F", &i__1, &i__2, &c_b227, &c_b248, &vt[*m + 1 + + (*m + 1) * vt_dim1], ldvt); + } + +/* Overwrite U by left singular vectors of A and VT */ +/* by right singular vectors of A */ +/* (Workspace: need 2*M+N, prefer 2*M+N*NB) */ + + i__1 = *lwork - nwork + 1; + _starpu_dormbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr); + i__1 = *lwork - nwork + 1; + _starpu_dormbr_("P", "R", "T", n, n, m, &a[a_offset], lda, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, & + ierr); + } + + } + + } + +/* Undo scaling if necessary */ + + if (iscl == 1) { + if (anrm > bignum) { + _starpu_dlascl_("G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], & + minmn, &ierr); + } + if (anrm < smlnum) { + _starpu_dlascl_("G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], & + minmn, &ierr); + } + } + +/* Return optimal workspace in WORK(1) */ + + work[1] = (doublereal) maxwrk; + + return 0; + +/* End of DGESDD */ + +} /* _starpu_dgesdd_ */ diff --git a/min-dgels/base/SRC/dgesv.c b/min-dgels/base/SRC/dgesv.c new file mode 100644 index 0000000..105e7e1 --- /dev/null +++ b/min-dgels/base/SRC/dgesv.c @@ -0,0 +1,138 @@ +/* dgesv.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dgesv_(integer *n, integer *nrhs, doublereal *a, integer + *lda, integer *ipiv, doublereal *b, integer *ldb, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1; + + /* Local variables */ + extern /* Subroutine */ int _starpu_dgetrf_(integer *, integer *, doublereal *, + integer *, integer *, integer *), _starpu_xerbla_(char *, integer *), _starpu_dgetrs_(char *, integer *, integer *, doublereal *, + integer *, integer *, doublereal *, integer *, integer *); + + +/* -- LAPACK driver routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DGESV computes the solution to a real system of linear equations */ +/* A * X = B, */ +/* where A is an N-by-N matrix and X and B are N-by-NRHS matrices. */ + +/* The LU decomposition with partial pivoting and row interchanges is */ +/* used to factor A as */ +/* A = P * L * U, */ +/* where P is a permutation matrix, L is unit lower triangular, and U is */ +/* upper triangular. The factored form of A is then used to solve the */ +/* system of equations A * X = B. */ + +/* Arguments */ +/* ========= */ + +/* N (input) INTEGER */ +/* The number of linear equations, i.e., the order of the */ +/* matrix A. N >= 0. */ + +/* NRHS (input) INTEGER */ +/* The number of right hand sides, i.e., the number of columns */ +/* of the matrix B. NRHS >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the N-by-N coefficient matrix A. */ +/* On exit, the factors L and U from the factorization */ +/* A = P*L*U; the unit diagonal elements of L are not stored. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,N). */ + +/* IPIV (output) INTEGER array, dimension (N) */ +/* The pivot indices that define the permutation matrix P; */ +/* row i of the matrix was interchanged with row IPIV(i). */ + +/* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* On entry, the N-by-NRHS matrix of right hand side matrix B. */ +/* On exit, if INFO = 0, the N-by-NRHS solution matrix X. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the array B. LDB >= max(1,N). */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > 0: if INFO = i, U(i,i) is exactly zero. The factorization */ +/* has been completed, but the factor U is exactly */ +/* singular, so the solution could not be computed. */ + +/* ===================================================================== */ + +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + *info = 0; + if (*n < 0) { + *info = -1; + } else if (*nrhs < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } else if (*ldb < max(1,*n)) { + *info = -7; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DGESV ", &i__1); + return 0; + } + +/* Compute the LU factorization of A. */ + + _starpu_dgetrf_(n, n, &a[a_offset], lda, &ipiv[1], info); + if (*info == 0) { + +/* Solve the system A*X = B, overwriting B with X. */ + + _starpu_dgetrs_("No transpose", n, nrhs, &a[a_offset], lda, &ipiv[1], &b[ + b_offset], ldb, info); + } + return 0; + +/* End of DGESV */ + +} /* _starpu_dgesv_ */ diff --git a/min-dgels/base/SRC/dgesvd.c b/min-dgels/base/SRC/dgesvd.c new file mode 100644 index 0000000..3612df7 --- /dev/null +++ b/min-dgels/base/SRC/dgesvd.c @@ -0,0 +1,4050 @@ +/* dgesvd.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__6 = 6; +static integer c__0 = 0; +static integer c__2 = 2; +static integer c__1 = 1; +static integer c_n1 = -1; +static doublereal c_b421 = 0.; +static doublereal c_b443 = 1.; + +/* Subroutine */ int _starpu_dgesvd_(char *jobu, char *jobvt, integer *m, integer *n, + doublereal *a, integer *lda, doublereal *s, doublereal *u, integer * + ldu, doublereal *vt, integer *ldvt, doublereal *work, integer *lwork, + integer *info) +{ + /* System generated locals */ + address a__1[2]; + integer a_dim1, a_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1[2], + i__2, i__3, i__4; + char ch__1[2]; + + /* Builtin functions */ + /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); + double sqrt(doublereal); + + /* Local variables */ + integer i__, ie, ir, iu, blk, ncu; + doublereal dum[1], eps; + integer nru, iscl; + doublereal anrm; + integer ierr, itau, ncvt, nrvt; + extern /* Subroutine */ int _starpu_dgemm_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *); + extern logical _starpu_lsame_(char *, char *); + integer chunk, minmn, wrkbl, itaup, itauq, mnthr, iwork; + logical wntua, wntva, wntun, wntuo, wntvn, wntvo, wntus, wntvs; + extern /* Subroutine */ int _starpu_dgebrd_(integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, integer *, integer *); + extern doublereal _starpu_dlamch_(char *), _starpu_dlange_(char *, integer *, + integer *, doublereal *, integer *, doublereal *); + integer bdspac; + extern /* Subroutine */ int _starpu_dgelqf_(integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, integer *), + _starpu_dlascl_(char *, integer *, integer *, doublereal *, doublereal *, + integer *, integer *, doublereal *, integer *, integer *), + _starpu_dgeqrf_(integer *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *, integer *), _starpu_dlacpy_(char *, + integer *, integer *, doublereal *, integer *, doublereal *, + integer *), _starpu_dlaset_(char *, integer *, integer *, + doublereal *, doublereal *, doublereal *, integer *), + _starpu_dbdsqr_(char *, integer *, integer *, integer *, integer *, + doublereal *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dorgbr_(char *, integer *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + integer *); + doublereal bignum; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *); + extern /* Subroutine */ int _starpu_dormbr_(char *, char *, char *, integer *, + integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, integer *), _starpu_dorglq_(integer *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + integer *), _starpu_dorgqr_(integer *, integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, integer *); + integer ldwrkr, minwrk, ldwrku, maxwrk; + doublereal smlnum; + logical lquery, wntuas, wntvas; + + +/* -- LAPACK driver routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DGESVD computes the singular value decomposition (SVD) of a real */ +/* M-by-N matrix A, optionally computing the left and/or right singular */ +/* vectors. The SVD is written */ + +/* A = U * SIGMA * transpose(V) */ + +/* where SIGMA is an M-by-N matrix which is zero except for its */ +/* min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and */ +/* V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA */ +/* are the singular values of A; they are real and non-negative, and */ +/* are returned in descending order. The first min(m,n) columns of */ +/* U and V are the left and right singular vectors of A. */ + +/* Note that the routine returns V**T, not V. */ + +/* Arguments */ +/* ========= */ + +/* JOBU (input) CHARACTER*1 */ +/* Specifies options for computing all or part of the matrix U: */ +/* = 'A': all M columns of U are returned in array U: */ +/* = 'S': the first min(m,n) columns of U (the left singular */ +/* vectors) are returned in the array U; */ +/* = 'O': the first min(m,n) columns of U (the left singular */ +/* vectors) are overwritten on the array A; */ +/* = 'N': no columns of U (no left singular vectors) are */ +/* computed. */ + +/* JOBVT (input) CHARACTER*1 */ +/* Specifies options for computing all or part of the matrix */ +/* V**T: */ +/* = 'A': all N rows of V**T are returned in the array VT; */ +/* = 'S': the first min(m,n) rows of V**T (the right singular */ +/* vectors) are returned in the array VT; */ +/* = 'O': the first min(m,n) rows of V**T (the right singular */ +/* vectors) are overwritten on the array A; */ +/* = 'N': no rows of V**T (no right singular vectors) are */ +/* computed. */ + +/* JOBVT and JOBU cannot both be 'O'. */ + +/* M (input) INTEGER */ +/* The number of rows of the input matrix A. M >= 0. */ + +/* N (input) INTEGER */ +/* The number of columns of the input matrix A. N >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the M-by-N matrix A. */ +/* On exit, */ +/* if JOBU = 'O', A is overwritten with the first min(m,n) */ +/* columns of U (the left singular vectors, */ +/* stored columnwise); */ +/* if JOBVT = 'O', A is overwritten with the first min(m,n) */ +/* rows of V**T (the right singular vectors, */ +/* stored rowwise); */ +/* if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A */ +/* are destroyed. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,M). */ + +/* S (output) DOUBLE PRECISION array, dimension (min(M,N)) */ +/* The singular values of A, sorted so that S(i) >= S(i+1). */ + +/* U (output) DOUBLE PRECISION array, dimension (LDU,UCOL) */ +/* (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'. */ +/* If JOBU = 'A', U contains the M-by-M orthogonal matrix U; */ +/* if JOBU = 'S', U contains the first min(m,n) columns of U */ +/* (the left singular vectors, stored columnwise); */ +/* if JOBU = 'N' or 'O', U is not referenced. */ + +/* LDU (input) INTEGER */ +/* The leading dimension of the array U. LDU >= 1; if */ +/* JOBU = 'S' or 'A', LDU >= M. */ + +/* VT (output) DOUBLE PRECISION array, dimension (LDVT,N) */ +/* If JOBVT = 'A', VT contains the N-by-N orthogonal matrix */ +/* V**T; */ +/* if JOBVT = 'S', VT contains the first min(m,n) rows of */ +/* V**T (the right singular vectors, stored rowwise); */ +/* if JOBVT = 'N' or 'O', VT is not referenced. */ + +/* LDVT (input) INTEGER */ +/* The leading dimension of the array VT. LDVT >= 1; if */ +/* JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N). */ + +/* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* On exit, if INFO = 0, WORK(1) returns the optimal LWORK; */ +/* if INFO > 0, WORK(2:MIN(M,N)) contains the unconverged */ +/* superdiagonal elements of an upper bidiagonal matrix B */ +/* whose diagonal is in S (not necessarily sorted). B */ +/* satisfies A = U * B * VT, so it has the same singular values */ +/* as A, and singular vectors related by U and VT. */ + +/* LWORK (input) INTEGER */ +/* The dimension of the array WORK. */ +/* LWORK >= MAX(1,3*MIN(M,N)+MAX(M,N),5*MIN(M,N)). */ +/* For good performance, LWORK should generally be larger. */ + +/* If LWORK = -1, then a workspace query is assumed; the routine */ +/* only calculates the optimal size of the WORK array, returns */ +/* this value as the first entry of the WORK array, and no error */ +/* message related to LWORK is issued by XERBLA. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit. */ +/* < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > 0: if DBDSQR did not converge, INFO specifies how many */ +/* superdiagonals of an intermediate bidiagonal form B */ +/* did not converge to zero. See the description of WORK */ +/* above for details. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --s; + u_dim1 = *ldu; + u_offset = 1 + u_dim1; + u -= u_offset; + vt_dim1 = *ldvt; + vt_offset = 1 + vt_dim1; + vt -= vt_offset; + --work; + + /* Function Body */ + *info = 0; + minmn = min(*m,*n); + wntua = _starpu_lsame_(jobu, "A"); + wntus = _starpu_lsame_(jobu, "S"); + wntuas = wntua || wntus; + wntuo = _starpu_lsame_(jobu, "O"); + wntun = _starpu_lsame_(jobu, "N"); + wntva = _starpu_lsame_(jobvt, "A"); + wntvs = _starpu_lsame_(jobvt, "S"); + wntvas = wntva || wntvs; + wntvo = _starpu_lsame_(jobvt, "O"); + wntvn = _starpu_lsame_(jobvt, "N"); + lquery = *lwork == -1; + + if (! (wntua || wntus || wntuo || wntun)) { + *info = -1; + } else if (! (wntva || wntvs || wntvo || wntvn) || wntvo && wntuo) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*lda < max(1,*m)) { + *info = -6; + } else if (*ldu < 1 || wntuas && *ldu < *m) { + *info = -9; + } else if (*ldvt < 1 || wntva && *ldvt < *n || wntvs && *ldvt < minmn) { + *info = -11; + } + +/* Compute workspace */ +/* (Note: Comments in the code beginning "Workspace:" describe the */ +/* minimal amount of workspace needed at that point in the code, */ +/* as well as the preferred amount for good performance. */ +/* NB refers to the optimal block size for the immediately */ +/* following subroutine, as returned by ILAENV.) */ + + if (*info == 0) { + minwrk = 1; + maxwrk = 1; + if (*m >= *n && minmn > 0) { + +/* Compute space needed for DBDSQR */ + +/* Writing concatenation */ + i__1[0] = 1, a__1[0] = jobu; + i__1[1] = 1, a__1[1] = jobvt; + s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); + mnthr = _starpu_ilaenv_(&c__6, "DGESVD", ch__1, m, n, &c__0, &c__0); + bdspac = *n * 5; + if (*m >= mnthr) { + if (wntun) { + +/* Path 1 (M much larger than N, JOBU='N') */ + + maxwrk = *n + *n * _starpu_ilaenv_(&c__1, "DGEQRF", " ", m, n, & + c_n1, &c_n1); +/* Computing MAX */ + i__2 = maxwrk, i__3 = *n * 3 + (*n << 1) * _starpu_ilaenv_(&c__1, + "DGEBRD", " ", n, n, &c_n1, &c_n1); + maxwrk = max(i__2,i__3); + if (wntvo || wntvas) { +/* Computing MAX */ + i__2 = maxwrk, i__3 = *n * 3 + (*n - 1) * _starpu_ilaenv_(& + c__1, "DORGBR", "P", n, n, n, &c_n1); + maxwrk = max(i__2,i__3); + } + maxwrk = max(maxwrk,bdspac); +/* Computing MAX */ + i__2 = *n << 2; + minwrk = max(i__2,bdspac); + } else if (wntuo && wntvn) { + +/* Path 2 (M much larger than N, JOBU='O', JOBVT='N') */ + + wrkbl = *n + *n * _starpu_ilaenv_(&c__1, "DGEQRF", " ", m, n, & + c_n1, &c_n1); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n + *n * _starpu_ilaenv_(&c__1, "DORGQR", + " ", m, n, n, &c_n1); + wrkbl = max(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n * 3 + (*n << 1) * _starpu_ilaenv_(&c__1, + "DGEBRD", " ", n, n, &c_n1, &c_n1); + wrkbl = max(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n * 3 + *n * _starpu_ilaenv_(&c__1, "DORGBR" +, "Q", n, n, n, &c_n1); + wrkbl = max(i__2,i__3); + wrkbl = max(wrkbl,bdspac); +/* Computing MAX */ + i__2 = *n * *n + wrkbl, i__3 = *n * *n + *m * *n + *n; + maxwrk = max(i__2,i__3); +/* Computing MAX */ + i__2 = *n * 3 + *m; + minwrk = max(i__2,bdspac); + } else if (wntuo && wntvas) { + +/* Path 3 (M much larger than N, JOBU='O', JOBVT='S' or */ +/* 'A') */ + + wrkbl = *n + *n * _starpu_ilaenv_(&c__1, "DGEQRF", " ", m, n, & + c_n1, &c_n1); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n + *n * _starpu_ilaenv_(&c__1, "DORGQR", + " ", m, n, n, &c_n1); + wrkbl = max(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n * 3 + (*n << 1) * _starpu_ilaenv_(&c__1, + "DGEBRD", " ", n, n, &c_n1, &c_n1); + wrkbl = max(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n * 3 + *n * _starpu_ilaenv_(&c__1, "DORGBR" +, "Q", n, n, n, &c_n1); + wrkbl = max(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n * 3 + (*n - 1) * _starpu_ilaenv_(&c__1, + "DORGBR", "P", n, n, n, &c_n1); + wrkbl = max(i__2,i__3); + wrkbl = max(wrkbl,bdspac); +/* Computing MAX */ + i__2 = *n * *n + wrkbl, i__3 = *n * *n + *m * *n + *n; + maxwrk = max(i__2,i__3); +/* Computing MAX */ + i__2 = *n * 3 + *m; + minwrk = max(i__2,bdspac); + } else if (wntus && wntvn) { + +/* Path 4 (M much larger than N, JOBU='S', JOBVT='N') */ + + wrkbl = *n + *n * _starpu_ilaenv_(&c__1, "DGEQRF", " ", m, n, & + c_n1, &c_n1); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n + *n * _starpu_ilaenv_(&c__1, "DORGQR", + " ", m, n, n, &c_n1); + wrkbl = max(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n * 3 + (*n << 1) * _starpu_ilaenv_(&c__1, + "DGEBRD", " ", n, n, &c_n1, &c_n1); + wrkbl = max(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n * 3 + *n * _starpu_ilaenv_(&c__1, "DORGBR" +, "Q", n, n, n, &c_n1); + wrkbl = max(i__2,i__3); + wrkbl = max(wrkbl,bdspac); + maxwrk = *n * *n + wrkbl; +/* Computing MAX */ + i__2 = *n * 3 + *m; + minwrk = max(i__2,bdspac); + } else if (wntus && wntvo) { + +/* Path 5 (M much larger than N, JOBU='S', JOBVT='O') */ + + wrkbl = *n + *n * _starpu_ilaenv_(&c__1, "DGEQRF", " ", m, n, & + c_n1, &c_n1); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n + *n * _starpu_ilaenv_(&c__1, "DORGQR", + " ", m, n, n, &c_n1); + wrkbl = max(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n * 3 + (*n << 1) * _starpu_ilaenv_(&c__1, + "DGEBRD", " ", n, n, &c_n1, &c_n1); + wrkbl = max(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n * 3 + *n * _starpu_ilaenv_(&c__1, "DORGBR" +, "Q", n, n, n, &c_n1); + wrkbl = max(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n * 3 + (*n - 1) * _starpu_ilaenv_(&c__1, + "DORGBR", "P", n, n, n, &c_n1); + wrkbl = max(i__2,i__3); + wrkbl = max(wrkbl,bdspac); + maxwrk = (*n << 1) * *n + wrkbl; +/* Computing MAX */ + i__2 = *n * 3 + *m; + minwrk = max(i__2,bdspac); + } else if (wntus && wntvas) { + +/* Path 6 (M much larger than N, JOBU='S', JOBVT='S' or */ +/* 'A') */ + + wrkbl = *n + *n * _starpu_ilaenv_(&c__1, "DGEQRF", " ", m, n, & + c_n1, &c_n1); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n + *n * _starpu_ilaenv_(&c__1, "DORGQR", + " ", m, n, n, &c_n1); + wrkbl = max(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n * 3 + (*n << 1) * _starpu_ilaenv_(&c__1, + "DGEBRD", " ", n, n, &c_n1, &c_n1); + wrkbl = max(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n * 3 + *n * _starpu_ilaenv_(&c__1, "DORGBR" +, "Q", n, n, n, &c_n1); + wrkbl = max(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n * 3 + (*n - 1) * _starpu_ilaenv_(&c__1, + "DORGBR", "P", n, n, n, &c_n1); + wrkbl = max(i__2,i__3); + wrkbl = max(wrkbl,bdspac); + maxwrk = *n * *n + wrkbl; +/* Computing MAX */ + i__2 = *n * 3 + *m; + minwrk = max(i__2,bdspac); + } else if (wntua && wntvn) { + +/* Path 7 (M much larger than N, JOBU='A', JOBVT='N') */ + + wrkbl = *n + *n * _starpu_ilaenv_(&c__1, "DGEQRF", " ", m, n, & + c_n1, &c_n1); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n + *m * _starpu_ilaenv_(&c__1, "DORGQR", + " ", m, m, n, &c_n1); + wrkbl = max(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n * 3 + (*n << 1) * _starpu_ilaenv_(&c__1, + "DGEBRD", " ", n, n, &c_n1, &c_n1); + wrkbl = max(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n * 3 + *n * _starpu_ilaenv_(&c__1, "DORGBR" +, "Q", n, n, n, &c_n1); + wrkbl = max(i__2,i__3); + wrkbl = max(wrkbl,bdspac); + maxwrk = *n * *n + wrkbl; +/* Computing MAX */ + i__2 = *n * 3 + *m; + minwrk = max(i__2,bdspac); + } else if (wntua && wntvo) { + +/* Path 8 (M much larger than N, JOBU='A', JOBVT='O') */ + + wrkbl = *n + *n * _starpu_ilaenv_(&c__1, "DGEQRF", " ", m, n, & + c_n1, &c_n1); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n + *m * _starpu_ilaenv_(&c__1, "DORGQR", + " ", m, m, n, &c_n1); + wrkbl = max(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n * 3 + (*n << 1) * _starpu_ilaenv_(&c__1, + "DGEBRD", " ", n, n, &c_n1, &c_n1); + wrkbl = max(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n * 3 + *n * _starpu_ilaenv_(&c__1, "DORGBR" +, "Q", n, n, n, &c_n1); + wrkbl = max(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n * 3 + (*n - 1) * _starpu_ilaenv_(&c__1, + "DORGBR", "P", n, n, n, &c_n1); + wrkbl = max(i__2,i__3); + wrkbl = max(wrkbl,bdspac); + maxwrk = (*n << 1) * *n + wrkbl; +/* Computing MAX */ + i__2 = *n * 3 + *m; + minwrk = max(i__2,bdspac); + } else if (wntua && wntvas) { + +/* Path 9 (M much larger than N, JOBU='A', JOBVT='S' or */ +/* 'A') */ + + wrkbl = *n + *n * _starpu_ilaenv_(&c__1, "DGEQRF", " ", m, n, & + c_n1, &c_n1); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n + *m * _starpu_ilaenv_(&c__1, "DORGQR", + " ", m, m, n, &c_n1); + wrkbl = max(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n * 3 + (*n << 1) * _starpu_ilaenv_(&c__1, + "DGEBRD", " ", n, n, &c_n1, &c_n1); + wrkbl = max(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n * 3 + *n * _starpu_ilaenv_(&c__1, "DORGBR" +, "Q", n, n, n, &c_n1); + wrkbl = max(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n * 3 + (*n - 1) * _starpu_ilaenv_(&c__1, + "DORGBR", "P", n, n, n, &c_n1); + wrkbl = max(i__2,i__3); + wrkbl = max(wrkbl,bdspac); + maxwrk = *n * *n + wrkbl; +/* Computing MAX */ + i__2 = *n * 3 + *m; + minwrk = max(i__2,bdspac); + } + } else { + +/* Path 10 (M at least N, but not much larger) */ + + maxwrk = *n * 3 + (*m + *n) * _starpu_ilaenv_(&c__1, "DGEBRD", " ", m, + n, &c_n1, &c_n1); + if (wntus || wntuo) { +/* Computing MAX */ + i__2 = maxwrk, i__3 = *n * 3 + *n * _starpu_ilaenv_(&c__1, "DORG" + "BR", "Q", m, n, n, &c_n1); + maxwrk = max(i__2,i__3); + } + if (wntua) { +/* Computing MAX */ + i__2 = maxwrk, i__3 = *n * 3 + *m * _starpu_ilaenv_(&c__1, "DORG" + "BR", "Q", m, m, n, &c_n1); + maxwrk = max(i__2,i__3); + } + if (! wntvn) { +/* Computing MAX */ + i__2 = maxwrk, i__3 = *n * 3 + (*n - 1) * _starpu_ilaenv_(&c__1, + "DORGBR", "P", n, n, n, &c_n1); + maxwrk = max(i__2,i__3); + } + maxwrk = max(maxwrk,bdspac); +/* Computing MAX */ + i__2 = *n * 3 + *m; + minwrk = max(i__2,bdspac); + } + } else if (minmn > 0) { + +/* Compute space needed for DBDSQR */ + +/* Writing concatenation */ + i__1[0] = 1, a__1[0] = jobu; + i__1[1] = 1, a__1[1] = jobvt; + s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); + mnthr = _starpu_ilaenv_(&c__6, "DGESVD", ch__1, m, n, &c__0, &c__0); + bdspac = *m * 5; + if (*n >= mnthr) { + if (wntvn) { + +/* Path 1t(N much larger than M, JOBVT='N') */ + + maxwrk = *m + *m * _starpu_ilaenv_(&c__1, "DGELQF", " ", m, n, & + c_n1, &c_n1); +/* Computing MAX */ + i__2 = maxwrk, i__3 = *m * 3 + (*m << 1) * _starpu_ilaenv_(&c__1, + "DGEBRD", " ", m, m, &c_n1, &c_n1); + maxwrk = max(i__2,i__3); + if (wntuo || wntuas) { +/* Computing MAX */ + i__2 = maxwrk, i__3 = *m * 3 + *m * _starpu_ilaenv_(&c__1, + "DORGBR", "Q", m, m, m, &c_n1); + maxwrk = max(i__2,i__3); + } + maxwrk = max(maxwrk,bdspac); +/* Computing MAX */ + i__2 = *m << 2; + minwrk = max(i__2,bdspac); + } else if (wntvo && wntun) { + +/* Path 2t(N much larger than M, JOBU='N', JOBVT='O') */ + + wrkbl = *m + *m * _starpu_ilaenv_(&c__1, "DGELQF", " ", m, n, & + c_n1, &c_n1); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m + *m * _starpu_ilaenv_(&c__1, "DORGLQ", + " ", m, n, m, &c_n1); + wrkbl = max(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m * 3 + (*m << 1) * _starpu_ilaenv_(&c__1, + "DGEBRD", " ", m, m, &c_n1, &c_n1); + wrkbl = max(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m * 3 + (*m - 1) * _starpu_ilaenv_(&c__1, + "DORGBR", "P", m, m, m, &c_n1); + wrkbl = max(i__2,i__3); + wrkbl = max(wrkbl,bdspac); +/* Computing MAX */ + i__2 = *m * *m + wrkbl, i__3 = *m * *m + *m * *n + *m; + maxwrk = max(i__2,i__3); +/* Computing MAX */ + i__2 = *m * 3 + *n; + minwrk = max(i__2,bdspac); + } else if (wntvo && wntuas) { + +/* Path 3t(N much larger than M, JOBU='S' or 'A', */ +/* JOBVT='O') */ + + wrkbl = *m + *m * _starpu_ilaenv_(&c__1, "DGELQF", " ", m, n, & + c_n1, &c_n1); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m + *m * _starpu_ilaenv_(&c__1, "DORGLQ", + " ", m, n, m, &c_n1); + wrkbl = max(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m * 3 + (*m << 1) * _starpu_ilaenv_(&c__1, + "DGEBRD", " ", m, m, &c_n1, &c_n1); + wrkbl = max(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m * 3 + (*m - 1) * _starpu_ilaenv_(&c__1, + "DORGBR", "P", m, m, m, &c_n1); + wrkbl = max(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m * 3 + *m * _starpu_ilaenv_(&c__1, "DORGBR" +, "Q", m, m, m, &c_n1); + wrkbl = max(i__2,i__3); + wrkbl = max(wrkbl,bdspac); +/* Computing MAX */ + i__2 = *m * *m + wrkbl, i__3 = *m * *m + *m * *n + *m; + maxwrk = max(i__2,i__3); +/* Computing MAX */ + i__2 = *m * 3 + *n; + minwrk = max(i__2,bdspac); + } else if (wntvs && wntun) { + +/* Path 4t(N much larger than M, JOBU='N', JOBVT='S') */ + + wrkbl = *m + *m * _starpu_ilaenv_(&c__1, "DGELQF", " ", m, n, & + c_n1, &c_n1); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m + *m * _starpu_ilaenv_(&c__1, "DORGLQ", + " ", m, n, m, &c_n1); + wrkbl = max(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m * 3 + (*m << 1) * _starpu_ilaenv_(&c__1, + "DGEBRD", " ", m, m, &c_n1, &c_n1); + wrkbl = max(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m * 3 + (*m - 1) * _starpu_ilaenv_(&c__1, + "DORGBR", "P", m, m, m, &c_n1); + wrkbl = max(i__2,i__3); + wrkbl = max(wrkbl,bdspac); + maxwrk = *m * *m + wrkbl; +/* Computing MAX */ + i__2 = *m * 3 + *n; + minwrk = max(i__2,bdspac); + } else if (wntvs && wntuo) { + +/* Path 5t(N much larger than M, JOBU='O', JOBVT='S') */ + + wrkbl = *m + *m * _starpu_ilaenv_(&c__1, "DGELQF", " ", m, n, & + c_n1, &c_n1); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m + *m * _starpu_ilaenv_(&c__1, "DORGLQ", + " ", m, n, m, &c_n1); + wrkbl = max(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m * 3 + (*m << 1) * _starpu_ilaenv_(&c__1, + "DGEBRD", " ", m, m, &c_n1, &c_n1); + wrkbl = max(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m * 3 + (*m - 1) * _starpu_ilaenv_(&c__1, + "DORGBR", "P", m, m, m, &c_n1); + wrkbl = max(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m * 3 + *m * _starpu_ilaenv_(&c__1, "DORGBR" +, "Q", m, m, m, &c_n1); + wrkbl = max(i__2,i__3); + wrkbl = max(wrkbl,bdspac); + maxwrk = (*m << 1) * *m + wrkbl; +/* Computing MAX */ + i__2 = *m * 3 + *n; + minwrk = max(i__2,bdspac); + } else if (wntvs && wntuas) { + +/* Path 6t(N much larger than M, JOBU='S' or 'A', */ +/* JOBVT='S') */ + + wrkbl = *m + *m * _starpu_ilaenv_(&c__1, "DGELQF", " ", m, n, & + c_n1, &c_n1); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m + *m * _starpu_ilaenv_(&c__1, "DORGLQ", + " ", m, n, m, &c_n1); + wrkbl = max(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m * 3 + (*m << 1) * _starpu_ilaenv_(&c__1, + "DGEBRD", " ", m, m, &c_n1, &c_n1); + wrkbl = max(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m * 3 + (*m - 1) * _starpu_ilaenv_(&c__1, + "DORGBR", "P", m, m, m, &c_n1); + wrkbl = max(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m * 3 + *m * _starpu_ilaenv_(&c__1, "DORGBR" +, "Q", m, m, m, &c_n1); + wrkbl = max(i__2,i__3); + wrkbl = max(wrkbl,bdspac); + maxwrk = *m * *m + wrkbl; +/* Computing MAX */ + i__2 = *m * 3 + *n; + minwrk = max(i__2,bdspac); + } else if (wntva && wntun) { + +/* Path 7t(N much larger than M, JOBU='N', JOBVT='A') */ + + wrkbl = *m + *m * _starpu_ilaenv_(&c__1, "DGELQF", " ", m, n, & + c_n1, &c_n1); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m + *n * _starpu_ilaenv_(&c__1, "DORGLQ", + " ", n, n, m, &c_n1); + wrkbl = max(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m * 3 + (*m << 1) * _starpu_ilaenv_(&c__1, + "DGEBRD", " ", m, m, &c_n1, &c_n1); + wrkbl = max(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m * 3 + (*m - 1) * _starpu_ilaenv_(&c__1, + "DORGBR", "P", m, m, m, &c_n1); + wrkbl = max(i__2,i__3); + wrkbl = max(wrkbl,bdspac); + maxwrk = *m * *m + wrkbl; +/* Computing MAX */ + i__2 = *m * 3 + *n; + minwrk = max(i__2,bdspac); + } else if (wntva && wntuo) { + +/* Path 8t(N much larger than M, JOBU='O', JOBVT='A') */ + + wrkbl = *m + *m * _starpu_ilaenv_(&c__1, "DGELQF", " ", m, n, & + c_n1, &c_n1); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m + *n * _starpu_ilaenv_(&c__1, "DORGLQ", + " ", n, n, m, &c_n1); + wrkbl = max(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m * 3 + (*m << 1) * _starpu_ilaenv_(&c__1, + "DGEBRD", " ", m, m, &c_n1, &c_n1); + wrkbl = max(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m * 3 + (*m - 1) * _starpu_ilaenv_(&c__1, + "DORGBR", "P", m, m, m, &c_n1); + wrkbl = max(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m * 3 + *m * _starpu_ilaenv_(&c__1, "DORGBR" +, "Q", m, m, m, &c_n1); + wrkbl = max(i__2,i__3); + wrkbl = max(wrkbl,bdspac); + maxwrk = (*m << 1) * *m + wrkbl; +/* Computing MAX */ + i__2 = *m * 3 + *n; + minwrk = max(i__2,bdspac); + } else if (wntva && wntuas) { + +/* Path 9t(N much larger than M, JOBU='S' or 'A', */ +/* JOBVT='A') */ + + wrkbl = *m + *m * _starpu_ilaenv_(&c__1, "DGELQF", " ", m, n, & + c_n1, &c_n1); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m + *n * _starpu_ilaenv_(&c__1, "DORGLQ", + " ", n, n, m, &c_n1); + wrkbl = max(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m * 3 + (*m << 1) * _starpu_ilaenv_(&c__1, + "DGEBRD", " ", m, m, &c_n1, &c_n1); + wrkbl = max(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m * 3 + (*m - 1) * _starpu_ilaenv_(&c__1, + "DORGBR", "P", m, m, m, &c_n1); + wrkbl = max(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m * 3 + *m * _starpu_ilaenv_(&c__1, "DORGBR" +, "Q", m, m, m, &c_n1); + wrkbl = max(i__2,i__3); + wrkbl = max(wrkbl,bdspac); + maxwrk = *m * *m + wrkbl; +/* Computing MAX */ + i__2 = *m * 3 + *n; + minwrk = max(i__2,bdspac); + } + } else { + +/* Path 10t(N greater than M, but not much larger) */ + + maxwrk = *m * 3 + (*m + *n) * _starpu_ilaenv_(&c__1, "DGEBRD", " ", m, + n, &c_n1, &c_n1); + if (wntvs || wntvo) { +/* Computing MAX */ + i__2 = maxwrk, i__3 = *m * 3 + *m * _starpu_ilaenv_(&c__1, "DORG" + "BR", "P", m, n, m, &c_n1); + maxwrk = max(i__2,i__3); + } + if (wntva) { +/* Computing MAX */ + i__2 = maxwrk, i__3 = *m * 3 + *n * _starpu_ilaenv_(&c__1, "DORG" + "BR", "P", n, n, m, &c_n1); + maxwrk = max(i__2,i__3); + } + if (! wntun) { +/* Computing MAX */ + i__2 = maxwrk, i__3 = *m * 3 + (*m - 1) * _starpu_ilaenv_(&c__1, + "DORGBR", "Q", m, m, m, &c_n1); + maxwrk = max(i__2,i__3); + } + maxwrk = max(maxwrk,bdspac); +/* Computing MAX */ + i__2 = *m * 3 + *n; + minwrk = max(i__2,bdspac); + } + } + maxwrk = max(maxwrk,minwrk); + work[1] = (doublereal) maxwrk; + + if (*lwork < minwrk && ! lquery) { + *info = -13; + } + } + + if (*info != 0) { + i__2 = -(*info); + _starpu_xerbla_("DGESVD", &i__2); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + return 0; + } + +/* Get machine constants */ + + eps = _starpu_dlamch_("P"); + smlnum = sqrt(_starpu_dlamch_("S")) / eps; + bignum = 1. / smlnum; + +/* Scale A if max element outside range [SMLNUM,BIGNUM] */ + + anrm = _starpu_dlange_("M", m, n, &a[a_offset], lda, dum); + iscl = 0; + if (anrm > 0. && anrm < smlnum) { + iscl = 1; + _starpu_dlascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, & + ierr); + } else if (anrm > bignum) { + iscl = 1; + _starpu_dlascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, & + ierr); + } + + if (*m >= *n) { + +/* A has at least as many rows as columns. If A has sufficiently */ +/* more rows than columns, first reduce using the QR */ +/* decomposition (if sufficient workspace available) */ + + if (*m >= mnthr) { + + if (wntun) { + +/* Path 1 (M much larger than N, JOBU='N') */ +/* No left singular vectors to be computed */ + + itau = 1; + iwork = itau + *n; + +/* Compute A=Q*R */ +/* (Workspace: need 2*N, prefer N+N*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork], & + i__2, &ierr); + +/* Zero out below R */ + + i__2 = *n - 1; + i__3 = *n - 1; + _starpu_dlaset_("L", &i__2, &i__3, &c_b421, &c_b421, &a[a_dim1 + 2], + lda); + ie = 1; + itauq = ie + *n; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Bidiagonalize R in A */ +/* (Workspace: need 4*N, prefer 3*N+2*N*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], &work[ + itauq], &work[itaup], &work[iwork], &i__2, &ierr); + ncvt = 0; + if (wntvo || wntvas) { + +/* If right singular vectors desired, generate P'. */ +/* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dorgbr_("P", n, n, n, &a[a_offset], lda, &work[itaup], & + work[iwork], &i__2, &ierr); + ncvt = *n; + } + iwork = ie + *n; + +/* Perform bidiagonal QR iteration, computing right */ +/* singular vectors of A in A if desired */ +/* (Workspace: need BDSPAC) */ + + _starpu_dbdsqr_("U", n, &ncvt, &c__0, &c__0, &s[1], &work[ie], &a[ + a_offset], lda, dum, &c__1, dum, &c__1, &work[iwork], + info); + +/* If right singular vectors desired in VT, copy them there */ + + if (wntvas) { + _starpu_dlacpy_("F", n, n, &a[a_offset], lda, &vt[vt_offset], + ldvt); + } + + } else if (wntuo && wntvn) { + +/* Path 2 (M much larger than N, JOBU='O', JOBVT='N') */ +/* N left singular vectors to be overwritten on A and */ +/* no right singular vectors to be computed */ + +/* Computing MAX */ + i__2 = *n << 2; + if (*lwork >= *n * *n + max(i__2,bdspac)) { + +/* Sufficient workspace for a fast algorithm */ + + ir = 1; +/* Computing MAX */ + i__2 = wrkbl, i__3 = *lda * *n + *n; + if (*lwork >= max(i__2,i__3) + *lda * *n) { + +/* WORK(IU) is LDA by N, WORK(IR) is LDA by N */ + + ldwrku = *lda; + ldwrkr = *lda; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__2 = wrkbl, i__3 = *lda * *n + *n; + if (*lwork >= max(i__2,i__3) + *n * *n) { + +/* WORK(IU) is LDA by N, WORK(IR) is N by N */ + + ldwrku = *lda; + ldwrkr = *n; + } else { + +/* WORK(IU) is LDWRKU by N, WORK(IR) is N by N */ + + ldwrku = (*lwork - *n * *n - *n) / *n; + ldwrkr = *n; + } + } + itau = ir + ldwrkr * *n; + iwork = itau + *n; + +/* Compute A=Q*R */ +/* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork] +, &i__2, &ierr); + +/* Copy R to WORK(IR) and zero out below it */ + + _starpu_dlacpy_("U", n, n, &a[a_offset], lda, &work[ir], &ldwrkr); + i__2 = *n - 1; + i__3 = *n - 1; + _starpu_dlaset_("L", &i__2, &i__3, &c_b421, &c_b421, &work[ir + 1] +, &ldwrkr); + +/* Generate Q in A */ +/* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + ie = itau; + itauq = ie + *n; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Bidiagonalize R in WORK(IR) */ +/* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &work[ie], &work[ + itauq], &work[itaup], &work[iwork], &i__2, &ierr); + +/* Generate left vectors bidiagonalizing R */ +/* (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dorgbr_("Q", n, n, n, &work[ir], &ldwrkr, &work[itauq], & + work[iwork], &i__2, &ierr); + iwork = ie + *n; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of R in WORK(IR) */ +/* (Workspace: need N*N+BDSPAC) */ + + _starpu_dbdsqr_("U", n, &c__0, n, &c__0, &s[1], &work[ie], dum, & + c__1, &work[ir], &ldwrkr, dum, &c__1, &work[iwork] +, info); + iu = ie + *n; + +/* Multiply Q in A by left singular vectors of R in */ +/* WORK(IR), storing result in WORK(IU) and copying to A */ +/* (Workspace: need N*N+2*N, prefer N*N+M*N+N) */ + + i__2 = *m; + i__3 = ldwrku; + for (i__ = 1; i__3 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += + i__3) { +/* Computing MIN */ + i__4 = *m - i__ + 1; + chunk = min(i__4,ldwrku); + _starpu_dgemm_("N", "N", &chunk, n, n, &c_b443, &a[i__ + + a_dim1], lda, &work[ir], &ldwrkr, &c_b421, & + work[iu], &ldwrku); + _starpu_dlacpy_("F", &chunk, n, &work[iu], &ldwrku, &a[i__ + + a_dim1], lda); +/* L10: */ + } + + } else { + +/* Insufficient workspace for a fast algorithm */ + + ie = 1; + itauq = ie + *n; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Bidiagonalize A */ +/* (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB) */ + + i__3 = *lwork - iwork + 1; + _starpu_dgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[ + itauq], &work[itaup], &work[iwork], &i__3, &ierr); + +/* Generate left vectors bidiagonalizing A */ +/* (Workspace: need 4*N, prefer 3*N+N*NB) */ + + i__3 = *lwork - iwork + 1; + _starpu_dorgbr_("Q", m, n, n, &a[a_offset], lda, &work[itauq], & + work[iwork], &i__3, &ierr); + iwork = ie + *n; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of A in A */ +/* (Workspace: need BDSPAC) */ + + _starpu_dbdsqr_("U", n, &c__0, m, &c__0, &s[1], &work[ie], dum, & + c__1, &a[a_offset], lda, dum, &c__1, &work[iwork], + info); + + } + + } else if (wntuo && wntvas) { + +/* Path 3 (M much larger than N, JOBU='O', JOBVT='S' or 'A') */ +/* N left singular vectors to be overwritten on A and */ +/* N right singular vectors to be computed in VT */ + +/* Computing MAX */ + i__3 = *n << 2; + if (*lwork >= *n * *n + max(i__3,bdspac)) { + +/* Sufficient workspace for a fast algorithm */ + + ir = 1; +/* Computing MAX */ + i__3 = wrkbl, i__2 = *lda * *n + *n; + if (*lwork >= max(i__3,i__2) + *lda * *n) { + +/* WORK(IU) is LDA by N and WORK(IR) is LDA by N */ + + ldwrku = *lda; + ldwrkr = *lda; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__3 = wrkbl, i__2 = *lda * *n + *n; + if (*lwork >= max(i__3,i__2) + *n * *n) { + +/* WORK(IU) is LDA by N and WORK(IR) is N by N */ + + ldwrku = *lda; + ldwrkr = *n; + } else { + +/* WORK(IU) is LDWRKU by N and WORK(IR) is N by N */ + + ldwrku = (*lwork - *n * *n - *n) / *n; + ldwrkr = *n; + } + } + itau = ir + ldwrkr * *n; + iwork = itau + *n; + +/* Compute A=Q*R */ +/* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */ + + i__3 = *lwork - iwork + 1; + _starpu_dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork] +, &i__3, &ierr); + +/* Copy R to VT, zeroing out below it */ + + _starpu_dlacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset], + ldvt); + if (*n > 1) { + i__3 = *n - 1; + i__2 = *n - 1; + _starpu_dlaset_("L", &i__3, &i__2, &c_b421, &c_b421, &vt[ + vt_dim1 + 2], ldvt); + } + +/* Generate Q in A */ +/* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */ + + i__3 = *lwork - iwork + 1; + _starpu_dorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__3, &ierr); + ie = itau; + itauq = ie + *n; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Bidiagonalize R in VT, copying result to WORK(IR) */ +/* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) */ + + i__3 = *lwork - iwork + 1; + _starpu_dgebrd_(n, n, &vt[vt_offset], ldvt, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], &i__3, & + ierr); + _starpu_dlacpy_("L", n, n, &vt[vt_offset], ldvt, &work[ir], & + ldwrkr); + +/* Generate left vectors bidiagonalizing R in WORK(IR) */ +/* (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) */ + + i__3 = *lwork - iwork + 1; + _starpu_dorgbr_("Q", n, n, n, &work[ir], &ldwrkr, &work[itauq], & + work[iwork], &i__3, &ierr); + +/* Generate right vectors bidiagonalizing R in VT */ +/* (Workspace: need N*N+4*N-1, prefer N*N+3*N+(N-1)*NB) */ + + i__3 = *lwork - iwork + 1; + _starpu_dorgbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[itaup], + &work[iwork], &i__3, &ierr); + iwork = ie + *n; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of R in WORK(IR) and computing right */ +/* singular vectors of R in VT */ +/* (Workspace: need N*N+BDSPAC) */ + + _starpu_dbdsqr_("U", n, n, n, &c__0, &s[1], &work[ie], &vt[ + vt_offset], ldvt, &work[ir], &ldwrkr, dum, &c__1, + &work[iwork], info); + iu = ie + *n; + +/* Multiply Q in A by left singular vectors of R in */ +/* WORK(IR), storing result in WORK(IU) and copying to A */ +/* (Workspace: need N*N+2*N, prefer N*N+M*N+N) */ + + i__3 = *m; + i__2 = ldwrku; + for (i__ = 1; i__2 < 0 ? i__ >= i__3 : i__ <= i__3; i__ += + i__2) { +/* Computing MIN */ + i__4 = *m - i__ + 1; + chunk = min(i__4,ldwrku); + _starpu_dgemm_("N", "N", &chunk, n, n, &c_b443, &a[i__ + + a_dim1], lda, &work[ir], &ldwrkr, &c_b421, & + work[iu], &ldwrku); + _starpu_dlacpy_("F", &chunk, n, &work[iu], &ldwrku, &a[i__ + + a_dim1], lda); +/* L20: */ + } + + } else { + +/* Insufficient workspace for a fast algorithm */ + + itau = 1; + iwork = itau + *n; + +/* Compute A=Q*R */ +/* (Workspace: need 2*N, prefer N+N*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork] +, &i__2, &ierr); + +/* Copy R to VT, zeroing out below it */ + + _starpu_dlacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset], + ldvt); + if (*n > 1) { + i__2 = *n - 1; + i__3 = *n - 1; + _starpu_dlaset_("L", &i__2, &i__3, &c_b421, &c_b421, &vt[ + vt_dim1 + 2], ldvt); + } + +/* Generate Q in A */ +/* (Workspace: need 2*N, prefer N+N*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + ie = itau; + itauq = ie + *n; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Bidiagonalize R in VT */ +/* (Workspace: need 4*N, prefer 3*N+2*N*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dgebrd_(n, n, &vt[vt_offset], ldvt, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], &i__2, & + ierr); + +/* Multiply Q in A by left vectors bidiagonalizing R */ +/* (Workspace: need 3*N+M, prefer 3*N+M*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dormbr_("Q", "R", "N", m, n, n, &vt[vt_offset], ldvt, & + work[itauq], &a[a_offset], lda, &work[iwork], & + i__2, &ierr); + +/* Generate right vectors bidiagonalizing R in VT */ +/* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dorgbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[itaup], + &work[iwork], &i__2, &ierr); + iwork = ie + *n; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of A in A and computing right */ +/* singular vectors of A in VT */ +/* (Workspace: need BDSPAC) */ + + _starpu_dbdsqr_("U", n, n, m, &c__0, &s[1], &work[ie], &vt[ + vt_offset], ldvt, &a[a_offset], lda, dum, &c__1, & + work[iwork], info); + + } + + } else if (wntus) { + + if (wntvn) { + +/* Path 4 (M much larger than N, JOBU='S', JOBVT='N') */ +/* N left singular vectors to be computed in U and */ +/* no right singular vectors to be computed */ + +/* Computing MAX */ + i__2 = *n << 2; + if (*lwork >= *n * *n + max(i__2,bdspac)) { + +/* Sufficient workspace for a fast algorithm */ + + ir = 1; + if (*lwork >= wrkbl + *lda * *n) { + +/* WORK(IR) is LDA by N */ + + ldwrkr = *lda; + } else { + +/* WORK(IR) is N by N */ + + ldwrkr = *n; + } + itau = ir + ldwrkr * *n; + iwork = itau + *n; + +/* Compute A=Q*R */ +/* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + +/* Copy R to WORK(IR), zeroing out below it */ + + _starpu_dlacpy_("U", n, n, &a[a_offset], lda, &work[ir], & + ldwrkr); + i__2 = *n - 1; + i__3 = *n - 1; + _starpu_dlaset_("L", &i__2, &i__3, &c_b421, &c_b421, &work[ir + + 1], &ldwrkr); + +/* Generate Q in A */ +/* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dorgqr_(m, n, n, &a[a_offset], lda, &work[itau], & + work[iwork], &i__2, &ierr); + ie = itau; + itauq = ie + *n; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Bidiagonalize R in WORK(IR) */ +/* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + +/* Generate left vectors bidiagonalizing R in WORK(IR) */ +/* (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dorgbr_("Q", n, n, n, &work[ir], &ldwrkr, &work[itauq] +, &work[iwork], &i__2, &ierr); + iwork = ie + *n; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of R in WORK(IR) */ +/* (Workspace: need N*N+BDSPAC) */ + + _starpu_dbdsqr_("U", n, &c__0, n, &c__0, &s[1], &work[ie], + dum, &c__1, &work[ir], &ldwrkr, dum, &c__1, & + work[iwork], info); + +/* Multiply Q in A by left singular vectors of R in */ +/* WORK(IR), storing result in U */ +/* (Workspace: need N*N) */ + + _starpu_dgemm_("N", "N", m, n, n, &c_b443, &a[a_offset], lda, + &work[ir], &ldwrkr, &c_b421, &u[u_offset], + ldu); + + } else { + +/* Insufficient workspace for a fast algorithm */ + + itau = 1; + iwork = itau + *n; + +/* Compute A=Q*R, copying result to U */ +/* (Workspace: need 2*N, prefer N+N*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + _starpu_dlacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], + ldu); + +/* Generate Q in U */ +/* (Workspace: need 2*N, prefer N+N*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dorgqr_(m, n, n, &u[u_offset], ldu, &work[itau], & + work[iwork], &i__2, &ierr); + ie = itau; + itauq = ie + *n; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Zero out below R in A */ + + i__2 = *n - 1; + i__3 = *n - 1; + _starpu_dlaset_("L", &i__2, &i__3, &c_b421, &c_b421, &a[ + a_dim1 + 2], lda); + +/* Bidiagonalize R in A */ +/* (Workspace: need 4*N, prefer 3*N+2*N*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + +/* Multiply Q in U by left vectors bidiagonalizing R */ +/* (Workspace: need 3*N+M, prefer 3*N+M*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dormbr_("Q", "R", "N", m, n, n, &a[a_offset], lda, & + work[itauq], &u[u_offset], ldu, &work[iwork], + &i__2, &ierr) + ; + iwork = ie + *n; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of A in U */ +/* (Workspace: need BDSPAC) */ + + _starpu_dbdsqr_("U", n, &c__0, m, &c__0, &s[1], &work[ie], + dum, &c__1, &u[u_offset], ldu, dum, &c__1, & + work[iwork], info); + + } + + } else if (wntvo) { + +/* Path 5 (M much larger than N, JOBU='S', JOBVT='O') */ +/* N left singular vectors to be computed in U and */ +/* N right singular vectors to be overwritten on A */ + +/* Computing MAX */ + i__2 = *n << 2; + if (*lwork >= (*n << 1) * *n + max(i__2,bdspac)) { + +/* Sufficient workspace for a fast algorithm */ + + iu = 1; + if (*lwork >= wrkbl + (*lda << 1) * *n) { + +/* WORK(IU) is LDA by N and WORK(IR) is LDA by N */ + + ldwrku = *lda; + ir = iu + ldwrku * *n; + ldwrkr = *lda; + } else if (*lwork >= wrkbl + (*lda + *n) * *n) { + +/* WORK(IU) is LDA by N and WORK(IR) is N by N */ + + ldwrku = *lda; + ir = iu + ldwrku * *n; + ldwrkr = *n; + } else { + +/* WORK(IU) is N by N and WORK(IR) is N by N */ + + ldwrku = *n; + ir = iu + ldwrku * *n; + ldwrkr = *n; + } + itau = ir + ldwrkr * *n; + iwork = itau + *n; + +/* Compute A=Q*R */ +/* (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + +/* Copy R to WORK(IU), zeroing out below it */ + + _starpu_dlacpy_("U", n, n, &a[a_offset], lda, &work[iu], & + ldwrku); + i__2 = *n - 1; + i__3 = *n - 1; + _starpu_dlaset_("L", &i__2, &i__3, &c_b421, &c_b421, &work[iu + + 1], &ldwrku); + +/* Generate Q in A */ +/* (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dorgqr_(m, n, n, &a[a_offset], lda, &work[itau], & + work[iwork], &i__2, &ierr); + ie = itau; + itauq = ie + *n; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Bidiagonalize R in WORK(IU), copying result to */ +/* WORK(IR) */ +/* (Workspace: need 2*N*N+4*N, */ +/* prefer 2*N*N+3*N+2*N*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dgebrd_(n, n, &work[iu], &ldwrku, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + _starpu_dlacpy_("U", n, n, &work[iu], &ldwrku, &work[ir], & + ldwrkr); + +/* Generate left bidiagonalizing vectors in WORK(IU) */ +/* (Workspace: need 2*N*N+4*N, prefer 2*N*N+3*N+N*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dorgbr_("Q", n, n, n, &work[iu], &ldwrku, &work[itauq] +, &work[iwork], &i__2, &ierr); + +/* Generate right bidiagonalizing vectors in WORK(IR) */ +/* (Workspace: need 2*N*N+4*N-1, */ +/* prefer 2*N*N+3*N+(N-1)*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dorgbr_("P", n, n, n, &work[ir], &ldwrkr, &work[itaup] +, &work[iwork], &i__2, &ierr); + iwork = ie + *n; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of R in WORK(IU) and computing */ +/* right singular vectors of R in WORK(IR) */ +/* (Workspace: need 2*N*N+BDSPAC) */ + + _starpu_dbdsqr_("U", n, n, n, &c__0, &s[1], &work[ie], &work[ + ir], &ldwrkr, &work[iu], &ldwrku, dum, &c__1, + &work[iwork], info); + +/* Multiply Q in A by left singular vectors of R in */ +/* WORK(IU), storing result in U */ +/* (Workspace: need N*N) */ + + _starpu_dgemm_("N", "N", m, n, n, &c_b443, &a[a_offset], lda, + &work[iu], &ldwrku, &c_b421, &u[u_offset], + ldu); + +/* Copy right singular vectors of R to A */ +/* (Workspace: need N*N) */ + + _starpu_dlacpy_("F", n, n, &work[ir], &ldwrkr, &a[a_offset], + lda); + + } else { + +/* Insufficient workspace for a fast algorithm */ + + itau = 1; + iwork = itau + *n; + +/* Compute A=Q*R, copying result to U */ +/* (Workspace: need 2*N, prefer N+N*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + _starpu_dlacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], + ldu); + +/* Generate Q in U */ +/* (Workspace: need 2*N, prefer N+N*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dorgqr_(m, n, n, &u[u_offset], ldu, &work[itau], & + work[iwork], &i__2, &ierr); + ie = itau; + itauq = ie + *n; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Zero out below R in A */ + + i__2 = *n - 1; + i__3 = *n - 1; + _starpu_dlaset_("L", &i__2, &i__3, &c_b421, &c_b421, &a[ + a_dim1 + 2], lda); + +/* Bidiagonalize R in A */ +/* (Workspace: need 4*N, prefer 3*N+2*N*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + +/* Multiply Q in U by left vectors bidiagonalizing R */ +/* (Workspace: need 3*N+M, prefer 3*N+M*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dormbr_("Q", "R", "N", m, n, n, &a[a_offset], lda, & + work[itauq], &u[u_offset], ldu, &work[iwork], + &i__2, &ierr) + ; + +/* Generate right vectors bidiagonalizing R in A */ +/* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dorgbr_("P", n, n, n, &a[a_offset], lda, &work[itaup], + &work[iwork], &i__2, &ierr); + iwork = ie + *n; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of A in U and computing right */ +/* singular vectors of A in A */ +/* (Workspace: need BDSPAC) */ + + _starpu_dbdsqr_("U", n, n, m, &c__0, &s[1], &work[ie], &a[ + a_offset], lda, &u[u_offset], ldu, dum, &c__1, + &work[iwork], info); + + } + + } else if (wntvas) { + +/* Path 6 (M much larger than N, JOBU='S', JOBVT='S' */ +/* or 'A') */ +/* N left singular vectors to be computed in U and */ +/* N right singular vectors to be computed in VT */ + +/* Computing MAX */ + i__2 = *n << 2; + if (*lwork >= *n * *n + max(i__2,bdspac)) { + +/* Sufficient workspace for a fast algorithm */ + + iu = 1; + if (*lwork >= wrkbl + *lda * *n) { + +/* WORK(IU) is LDA by N */ + + ldwrku = *lda; + } else { + +/* WORK(IU) is N by N */ + + ldwrku = *n; + } + itau = iu + ldwrku * *n; + iwork = itau + *n; + +/* Compute A=Q*R */ +/* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + +/* Copy R to WORK(IU), zeroing out below it */ + + _starpu_dlacpy_("U", n, n, &a[a_offset], lda, &work[iu], & + ldwrku); + i__2 = *n - 1; + i__3 = *n - 1; + _starpu_dlaset_("L", &i__2, &i__3, &c_b421, &c_b421, &work[iu + + 1], &ldwrku); + +/* Generate Q in A */ +/* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dorgqr_(m, n, n, &a[a_offset], lda, &work[itau], & + work[iwork], &i__2, &ierr); + ie = itau; + itauq = ie + *n; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Bidiagonalize R in WORK(IU), copying result to VT */ +/* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dgebrd_(n, n, &work[iu], &ldwrku, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + _starpu_dlacpy_("U", n, n, &work[iu], &ldwrku, &vt[vt_offset], + ldvt); + +/* Generate left bidiagonalizing vectors in WORK(IU) */ +/* (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dorgbr_("Q", n, n, n, &work[iu], &ldwrku, &work[itauq] +, &work[iwork], &i__2, &ierr); + +/* Generate right bidiagonalizing vectors in VT */ +/* (Workspace: need N*N+4*N-1, */ +/* prefer N*N+3*N+(N-1)*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dorgbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[ + itaup], &work[iwork], &i__2, &ierr) + ; + iwork = ie + *n; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of R in WORK(IU) and computing */ +/* right singular vectors of R in VT */ +/* (Workspace: need N*N+BDSPAC) */ + + _starpu_dbdsqr_("U", n, n, n, &c__0, &s[1], &work[ie], &vt[ + vt_offset], ldvt, &work[iu], &ldwrku, dum, & + c__1, &work[iwork], info); + +/* Multiply Q in A by left singular vectors of R in */ +/* WORK(IU), storing result in U */ +/* (Workspace: need N*N) */ + + _starpu_dgemm_("N", "N", m, n, n, &c_b443, &a[a_offset], lda, + &work[iu], &ldwrku, &c_b421, &u[u_offset], + ldu); + + } else { + +/* Insufficient workspace for a fast algorithm */ + + itau = 1; + iwork = itau + *n; + +/* Compute A=Q*R, copying result to U */ +/* (Workspace: need 2*N, prefer N+N*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + _starpu_dlacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], + ldu); + +/* Generate Q in U */ +/* (Workspace: need 2*N, prefer N+N*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dorgqr_(m, n, n, &u[u_offset], ldu, &work[itau], & + work[iwork], &i__2, &ierr); + +/* Copy R to VT, zeroing out below it */ + + _starpu_dlacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset], + ldvt); + if (*n > 1) { + i__2 = *n - 1; + i__3 = *n - 1; + _starpu_dlaset_("L", &i__2, &i__3, &c_b421, &c_b421, &vt[ + vt_dim1 + 2], ldvt); + } + ie = itau; + itauq = ie + *n; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Bidiagonalize R in VT */ +/* (Workspace: need 4*N, prefer 3*N+2*N*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dgebrd_(n, n, &vt[vt_offset], ldvt, &s[1], &work[ie], + &work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + +/* Multiply Q in U by left bidiagonalizing vectors */ +/* in VT */ +/* (Workspace: need 3*N+M, prefer 3*N+M*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dormbr_("Q", "R", "N", m, n, n, &vt[vt_offset], ldvt, + &work[itauq], &u[u_offset], ldu, &work[iwork], + &i__2, &ierr); + +/* Generate right bidiagonalizing vectors in VT */ +/* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dorgbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[ + itaup], &work[iwork], &i__2, &ierr) + ; + iwork = ie + *n; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of A in U and computing right */ +/* singular vectors of A in VT */ +/* (Workspace: need BDSPAC) */ + + _starpu_dbdsqr_("U", n, n, m, &c__0, &s[1], &work[ie], &vt[ + vt_offset], ldvt, &u[u_offset], ldu, dum, & + c__1, &work[iwork], info); + + } + + } + + } else if (wntua) { + + if (wntvn) { + +/* Path 7 (M much larger than N, JOBU='A', JOBVT='N') */ +/* M left singular vectors to be computed in U and */ +/* no right singular vectors to be computed */ + +/* Computing MAX */ + i__2 = *n + *m, i__3 = *n << 2, i__2 = max(i__2,i__3); + if (*lwork >= *n * *n + max(i__2,bdspac)) { + +/* Sufficient workspace for a fast algorithm */ + + ir = 1; + if (*lwork >= wrkbl + *lda * *n) { + +/* WORK(IR) is LDA by N */ + + ldwrkr = *lda; + } else { + +/* WORK(IR) is N by N */ + + ldwrkr = *n; + } + itau = ir + ldwrkr * *n; + iwork = itau + *n; + +/* Compute A=Q*R, copying result to U */ +/* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + _starpu_dlacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], + ldu); + +/* Copy R to WORK(IR), zeroing out below it */ + + _starpu_dlacpy_("U", n, n, &a[a_offset], lda, &work[ir], & + ldwrkr); + i__2 = *n - 1; + i__3 = *n - 1; + _starpu_dlaset_("L", &i__2, &i__3, &c_b421, &c_b421, &work[ir + + 1], &ldwrkr); + +/* Generate Q in U */ +/* (Workspace: need N*N+N+M, prefer N*N+N+M*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], & + work[iwork], &i__2, &ierr); + ie = itau; + itauq = ie + *n; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Bidiagonalize R in WORK(IR) */ +/* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + +/* Generate left bidiagonalizing vectors in WORK(IR) */ +/* (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dorgbr_("Q", n, n, n, &work[ir], &ldwrkr, &work[itauq] +, &work[iwork], &i__2, &ierr); + iwork = ie + *n; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of R in WORK(IR) */ +/* (Workspace: need N*N+BDSPAC) */ + + _starpu_dbdsqr_("U", n, &c__0, n, &c__0, &s[1], &work[ie], + dum, &c__1, &work[ir], &ldwrkr, dum, &c__1, & + work[iwork], info); + +/* Multiply Q in U by left singular vectors of R in */ +/* WORK(IR), storing result in A */ +/* (Workspace: need N*N) */ + + _starpu_dgemm_("N", "N", m, n, n, &c_b443, &u[u_offset], ldu, + &work[ir], &ldwrkr, &c_b421, &a[a_offset], + lda); + +/* Copy left singular vectors of A from A to U */ + + _starpu_dlacpy_("F", m, n, &a[a_offset], lda, &u[u_offset], + ldu); + + } else { + +/* Insufficient workspace for a fast algorithm */ + + itau = 1; + iwork = itau + *n; + +/* Compute A=Q*R, copying result to U */ +/* (Workspace: need 2*N, prefer N+N*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + _starpu_dlacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], + ldu); + +/* Generate Q in U */ +/* (Workspace: need N+M, prefer N+M*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], & + work[iwork], &i__2, &ierr); + ie = itau; + itauq = ie + *n; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Zero out below R in A */ + + i__2 = *n - 1; + i__3 = *n - 1; + _starpu_dlaset_("L", &i__2, &i__3, &c_b421, &c_b421, &a[ + a_dim1 + 2], lda); + +/* Bidiagonalize R in A */ +/* (Workspace: need 4*N, prefer 3*N+2*N*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + +/* Multiply Q in U by left bidiagonalizing vectors */ +/* in A */ +/* (Workspace: need 3*N+M, prefer 3*N+M*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dormbr_("Q", "R", "N", m, n, n, &a[a_offset], lda, & + work[itauq], &u[u_offset], ldu, &work[iwork], + &i__2, &ierr) + ; + iwork = ie + *n; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of A in U */ +/* (Workspace: need BDSPAC) */ + + _starpu_dbdsqr_("U", n, &c__0, m, &c__0, &s[1], &work[ie], + dum, &c__1, &u[u_offset], ldu, dum, &c__1, & + work[iwork], info); + + } + + } else if (wntvo) { + +/* Path 8 (M much larger than N, JOBU='A', JOBVT='O') */ +/* M left singular vectors to be computed in U and */ +/* N right singular vectors to be overwritten on A */ + +/* Computing MAX */ + i__2 = *n + *m, i__3 = *n << 2, i__2 = max(i__2,i__3); + if (*lwork >= (*n << 1) * *n + max(i__2,bdspac)) { + +/* Sufficient workspace for a fast algorithm */ + + iu = 1; + if (*lwork >= wrkbl + (*lda << 1) * *n) { + +/* WORK(IU) is LDA by N and WORK(IR) is LDA by N */ + + ldwrku = *lda; + ir = iu + ldwrku * *n; + ldwrkr = *lda; + } else if (*lwork >= wrkbl + (*lda + *n) * *n) { + +/* WORK(IU) is LDA by N and WORK(IR) is N by N */ + + ldwrku = *lda; + ir = iu + ldwrku * *n; + ldwrkr = *n; + } else { + +/* WORK(IU) is N by N and WORK(IR) is N by N */ + + ldwrku = *n; + ir = iu + ldwrku * *n; + ldwrkr = *n; + } + itau = ir + ldwrkr * *n; + iwork = itau + *n; + +/* Compute A=Q*R, copying result to U */ +/* (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + _starpu_dlacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], + ldu); + +/* Generate Q in U */ +/* (Workspace: need 2*N*N+N+M, prefer 2*N*N+N+M*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], & + work[iwork], &i__2, &ierr); + +/* Copy R to WORK(IU), zeroing out below it */ + + _starpu_dlacpy_("U", n, n, &a[a_offset], lda, &work[iu], & + ldwrku); + i__2 = *n - 1; + i__3 = *n - 1; + _starpu_dlaset_("L", &i__2, &i__3, &c_b421, &c_b421, &work[iu + + 1], &ldwrku); + ie = itau; + itauq = ie + *n; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Bidiagonalize R in WORK(IU), copying result to */ +/* WORK(IR) */ +/* (Workspace: need 2*N*N+4*N, */ +/* prefer 2*N*N+3*N+2*N*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dgebrd_(n, n, &work[iu], &ldwrku, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + _starpu_dlacpy_("U", n, n, &work[iu], &ldwrku, &work[ir], & + ldwrkr); + +/* Generate left bidiagonalizing vectors in WORK(IU) */ +/* (Workspace: need 2*N*N+4*N, prefer 2*N*N+3*N+N*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dorgbr_("Q", n, n, n, &work[iu], &ldwrku, &work[itauq] +, &work[iwork], &i__2, &ierr); + +/* Generate right bidiagonalizing vectors in WORK(IR) */ +/* (Workspace: need 2*N*N+4*N-1, */ +/* prefer 2*N*N+3*N+(N-1)*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dorgbr_("P", n, n, n, &work[ir], &ldwrkr, &work[itaup] +, &work[iwork], &i__2, &ierr); + iwork = ie + *n; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of R in WORK(IU) and computing */ +/* right singular vectors of R in WORK(IR) */ +/* (Workspace: need 2*N*N+BDSPAC) */ + + _starpu_dbdsqr_("U", n, n, n, &c__0, &s[1], &work[ie], &work[ + ir], &ldwrkr, &work[iu], &ldwrku, dum, &c__1, + &work[iwork], info); + +/* Multiply Q in U by left singular vectors of R in */ +/* WORK(IU), storing result in A */ +/* (Workspace: need N*N) */ + + _starpu_dgemm_("N", "N", m, n, n, &c_b443, &u[u_offset], ldu, + &work[iu], &ldwrku, &c_b421, &a[a_offset], + lda); + +/* Copy left singular vectors of A from A to U */ + + _starpu_dlacpy_("F", m, n, &a[a_offset], lda, &u[u_offset], + ldu); + +/* Copy right singular vectors of R from WORK(IR) to A */ + + _starpu_dlacpy_("F", n, n, &work[ir], &ldwrkr, &a[a_offset], + lda); + + } else { + +/* Insufficient workspace for a fast algorithm */ + + itau = 1; + iwork = itau + *n; + +/* Compute A=Q*R, copying result to U */ +/* (Workspace: need 2*N, prefer N+N*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + _starpu_dlacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], + ldu); + +/* Generate Q in U */ +/* (Workspace: need N+M, prefer N+M*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], & + work[iwork], &i__2, &ierr); + ie = itau; + itauq = ie + *n; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Zero out below R in A */ + + i__2 = *n - 1; + i__3 = *n - 1; + _starpu_dlaset_("L", &i__2, &i__3, &c_b421, &c_b421, &a[ + a_dim1 + 2], lda); + +/* Bidiagonalize R in A */ +/* (Workspace: need 4*N, prefer 3*N+2*N*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + +/* Multiply Q in U by left bidiagonalizing vectors */ +/* in A */ +/* (Workspace: need 3*N+M, prefer 3*N+M*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dormbr_("Q", "R", "N", m, n, n, &a[a_offset], lda, & + work[itauq], &u[u_offset], ldu, &work[iwork], + &i__2, &ierr) + ; + +/* Generate right bidiagonalizing vectors in A */ +/* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dorgbr_("P", n, n, n, &a[a_offset], lda, &work[itaup], + &work[iwork], &i__2, &ierr); + iwork = ie + *n; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of A in U and computing right */ +/* singular vectors of A in A */ +/* (Workspace: need BDSPAC) */ + + _starpu_dbdsqr_("U", n, n, m, &c__0, &s[1], &work[ie], &a[ + a_offset], lda, &u[u_offset], ldu, dum, &c__1, + &work[iwork], info); + + } + + } else if (wntvas) { + +/* Path 9 (M much larger than N, JOBU='A', JOBVT='S' */ +/* or 'A') */ +/* M left singular vectors to be computed in U and */ +/* N right singular vectors to be computed in VT */ + +/* Computing MAX */ + i__2 = *n + *m, i__3 = *n << 2, i__2 = max(i__2,i__3); + if (*lwork >= *n * *n + max(i__2,bdspac)) { + +/* Sufficient workspace for a fast algorithm */ + + iu = 1; + if (*lwork >= wrkbl + *lda * *n) { + +/* WORK(IU) is LDA by N */ + + ldwrku = *lda; + } else { + +/* WORK(IU) is N by N */ + + ldwrku = *n; + } + itau = iu + ldwrku * *n; + iwork = itau + *n; + +/* Compute A=Q*R, copying result to U */ +/* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + _starpu_dlacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], + ldu); + +/* Generate Q in U */ +/* (Workspace: need N*N+N+M, prefer N*N+N+M*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], & + work[iwork], &i__2, &ierr); + +/* Copy R to WORK(IU), zeroing out below it */ + + _starpu_dlacpy_("U", n, n, &a[a_offset], lda, &work[iu], & + ldwrku); + i__2 = *n - 1; + i__3 = *n - 1; + _starpu_dlaset_("L", &i__2, &i__3, &c_b421, &c_b421, &work[iu + + 1], &ldwrku); + ie = itau; + itauq = ie + *n; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Bidiagonalize R in WORK(IU), copying result to VT */ +/* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dgebrd_(n, n, &work[iu], &ldwrku, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + _starpu_dlacpy_("U", n, n, &work[iu], &ldwrku, &vt[vt_offset], + ldvt); + +/* Generate left bidiagonalizing vectors in WORK(IU) */ +/* (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dorgbr_("Q", n, n, n, &work[iu], &ldwrku, &work[itauq] +, &work[iwork], &i__2, &ierr); + +/* Generate right bidiagonalizing vectors in VT */ +/* (Workspace: need N*N+4*N-1, */ +/* prefer N*N+3*N+(N-1)*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dorgbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[ + itaup], &work[iwork], &i__2, &ierr) + ; + iwork = ie + *n; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of R in WORK(IU) and computing */ +/* right singular vectors of R in VT */ +/* (Workspace: need N*N+BDSPAC) */ + + _starpu_dbdsqr_("U", n, n, n, &c__0, &s[1], &work[ie], &vt[ + vt_offset], ldvt, &work[iu], &ldwrku, dum, & + c__1, &work[iwork], info); + +/* Multiply Q in U by left singular vectors of R in */ +/* WORK(IU), storing result in A */ +/* (Workspace: need N*N) */ + + _starpu_dgemm_("N", "N", m, n, n, &c_b443, &u[u_offset], ldu, + &work[iu], &ldwrku, &c_b421, &a[a_offset], + lda); + +/* Copy left singular vectors of A from A to U */ + + _starpu_dlacpy_("F", m, n, &a[a_offset], lda, &u[u_offset], + ldu); + + } else { + +/* Insufficient workspace for a fast algorithm */ + + itau = 1; + iwork = itau + *n; + +/* Compute A=Q*R, copying result to U */ +/* (Workspace: need 2*N, prefer N+N*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + _starpu_dlacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], + ldu); + +/* Generate Q in U */ +/* (Workspace: need N+M, prefer N+M*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], & + work[iwork], &i__2, &ierr); + +/* Copy R from A to VT, zeroing out below it */ + + _starpu_dlacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset], + ldvt); + if (*n > 1) { + i__2 = *n - 1; + i__3 = *n - 1; + _starpu_dlaset_("L", &i__2, &i__3, &c_b421, &c_b421, &vt[ + vt_dim1 + 2], ldvt); + } + ie = itau; + itauq = ie + *n; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Bidiagonalize R in VT */ +/* (Workspace: need 4*N, prefer 3*N+2*N*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dgebrd_(n, n, &vt[vt_offset], ldvt, &s[1], &work[ie], + &work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + +/* Multiply Q in U by left bidiagonalizing vectors */ +/* in VT */ +/* (Workspace: need 3*N+M, prefer 3*N+M*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dormbr_("Q", "R", "N", m, n, n, &vt[vt_offset], ldvt, + &work[itauq], &u[u_offset], ldu, &work[iwork], + &i__2, &ierr); + +/* Generate right bidiagonalizing vectors in VT */ +/* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dorgbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[ + itaup], &work[iwork], &i__2, &ierr) + ; + iwork = ie + *n; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of A in U and computing right */ +/* singular vectors of A in VT */ +/* (Workspace: need BDSPAC) */ + + _starpu_dbdsqr_("U", n, n, m, &c__0, &s[1], &work[ie], &vt[ + vt_offset], ldvt, &u[u_offset], ldu, dum, & + c__1, &work[iwork], info); + + } + + } + + } + + } else { + +/* M .LT. MNTHR */ + +/* Path 10 (M at least N, but not much larger) */ +/* Reduce to bidiagonal form without QR decomposition */ + + ie = 1; + itauq = ie + *n; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Bidiagonalize A */ +/* (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], & + work[itaup], &work[iwork], &i__2, &ierr); + if (wntuas) { + +/* If left singular vectors desired in U, copy result to U */ +/* and generate left bidiagonalizing vectors in U */ +/* (Workspace: need 3*N+NCU, prefer 3*N+NCU*NB) */ + + _starpu_dlacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], ldu); + if (wntus) { + ncu = *n; + } + if (wntua) { + ncu = *m; + } + i__2 = *lwork - iwork + 1; + _starpu_dorgbr_("Q", m, &ncu, n, &u[u_offset], ldu, &work[itauq], & + work[iwork], &i__2, &ierr); + } + if (wntvas) { + +/* If right singular vectors desired in VT, copy result to */ +/* VT and generate right bidiagonalizing vectors in VT */ +/* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) */ + + _starpu_dlacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset], ldvt); + i__2 = *lwork - iwork + 1; + _starpu_dorgbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[itaup], & + work[iwork], &i__2, &ierr); + } + if (wntuo) { + +/* If left singular vectors desired in A, generate left */ +/* bidiagonalizing vectors in A */ +/* (Workspace: need 4*N, prefer 3*N+N*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dorgbr_("Q", m, n, n, &a[a_offset], lda, &work[itauq], &work[ + iwork], &i__2, &ierr); + } + if (wntvo) { + +/* If right singular vectors desired in A, generate right */ +/* bidiagonalizing vectors in A */ +/* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dorgbr_("P", n, n, n, &a[a_offset], lda, &work[itaup], &work[ + iwork], &i__2, &ierr); + } + iwork = ie + *n; + if (wntuas || wntuo) { + nru = *m; + } + if (wntun) { + nru = 0; + } + if (wntvas || wntvo) { + ncvt = *n; + } + if (wntvn) { + ncvt = 0; + } + if (! wntuo && ! wntvo) { + +/* Perform bidiagonal QR iteration, if desired, computing */ +/* left singular vectors in U and computing right singular */ +/* vectors in VT */ +/* (Workspace: need BDSPAC) */ + + _starpu_dbdsqr_("U", n, &ncvt, &nru, &c__0, &s[1], &work[ie], &vt[ + vt_offset], ldvt, &u[u_offset], ldu, dum, &c__1, & + work[iwork], info); + } else if (! wntuo && wntvo) { + +/* Perform bidiagonal QR iteration, if desired, computing */ +/* left singular vectors in U and computing right singular */ +/* vectors in A */ +/* (Workspace: need BDSPAC) */ + + _starpu_dbdsqr_("U", n, &ncvt, &nru, &c__0, &s[1], &work[ie], &a[ + a_offset], lda, &u[u_offset], ldu, dum, &c__1, &work[ + iwork], info); + } else { + +/* Perform bidiagonal QR iteration, if desired, computing */ +/* left singular vectors in A and computing right singular */ +/* vectors in VT */ +/* (Workspace: need BDSPAC) */ + + _starpu_dbdsqr_("U", n, &ncvt, &nru, &c__0, &s[1], &work[ie], &vt[ + vt_offset], ldvt, &a[a_offset], lda, dum, &c__1, & + work[iwork], info); + } + + } + + } else { + +/* A has more columns than rows. If A has sufficiently more */ +/* columns than rows, first reduce using the LQ decomposition (if */ +/* sufficient workspace available) */ + + if (*n >= mnthr) { + + if (wntvn) { + +/* Path 1t(N much larger than M, JOBVT='N') */ +/* No right singular vectors to be computed */ + + itau = 1; + iwork = itau + *m; + +/* Compute A=L*Q */ +/* (Workspace: need 2*M, prefer M+M*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork], & + i__2, &ierr); + +/* Zero out above L */ + + i__2 = *m - 1; + i__3 = *m - 1; + _starpu_dlaset_("U", &i__2, &i__3, &c_b421, &c_b421, &a[(a_dim1 << 1) + + 1], lda); + ie = 1; + itauq = ie + *m; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Bidiagonalize L in A */ +/* (Workspace: need 4*M, prefer 3*M+2*M*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], &work[ + itauq], &work[itaup], &work[iwork], &i__2, &ierr); + if (wntuo || wntuas) { + +/* If left singular vectors desired, generate Q */ +/* (Workspace: need 4*M, prefer 3*M+M*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dorgbr_("Q", m, m, m, &a[a_offset], lda, &work[itauq], & + work[iwork], &i__2, &ierr); + } + iwork = ie + *m; + nru = 0; + if (wntuo || wntuas) { + nru = *m; + } + +/* Perform bidiagonal QR iteration, computing left singular */ +/* vectors of A in A if desired */ +/* (Workspace: need BDSPAC) */ + + _starpu_dbdsqr_("U", m, &c__0, &nru, &c__0, &s[1], &work[ie], dum, & + c__1, &a[a_offset], lda, dum, &c__1, &work[iwork], + info); + +/* If left singular vectors desired in U, copy them there */ + + if (wntuas) { + _starpu_dlacpy_("F", m, m, &a[a_offset], lda, &u[u_offset], ldu); + } + + } else if (wntvo && wntun) { + +/* Path 2t(N much larger than M, JOBU='N', JOBVT='O') */ +/* M right singular vectors to be overwritten on A and */ +/* no left singular vectors to be computed */ + +/* Computing MAX */ + i__2 = *m << 2; + if (*lwork >= *m * *m + max(i__2,bdspac)) { + +/* Sufficient workspace for a fast algorithm */ + + ir = 1; +/* Computing MAX */ + i__2 = wrkbl, i__3 = *lda * *n + *m; + if (*lwork >= max(i__2,i__3) + *lda * *m) { + +/* WORK(IU) is LDA by N and WORK(IR) is LDA by M */ + + ldwrku = *lda; + chunk = *n; + ldwrkr = *lda; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__2 = wrkbl, i__3 = *lda * *n + *m; + if (*lwork >= max(i__2,i__3) + *m * *m) { + +/* WORK(IU) is LDA by N and WORK(IR) is M by M */ + + ldwrku = *lda; + chunk = *n; + ldwrkr = *m; + } else { + +/* WORK(IU) is M by CHUNK and WORK(IR) is M by M */ + + ldwrku = *m; + chunk = (*lwork - *m * *m - *m) / *m; + ldwrkr = *m; + } + } + itau = ir + ldwrkr * *m; + iwork = itau + *m; + +/* Compute A=L*Q */ +/* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork] +, &i__2, &ierr); + +/* Copy L to WORK(IR) and zero out above it */ + + _starpu_dlacpy_("L", m, m, &a[a_offset], lda, &work[ir], &ldwrkr); + i__2 = *m - 1; + i__3 = *m - 1; + _starpu_dlaset_("U", &i__2, &i__3, &c_b421, &c_b421, &work[ir + + ldwrkr], &ldwrkr); + +/* Generate Q in A */ +/* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dorglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + ie = itau; + itauq = ie + *m; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Bidiagonalize L in WORK(IR) */ +/* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dgebrd_(m, m, &work[ir], &ldwrkr, &s[1], &work[ie], &work[ + itauq], &work[itaup], &work[iwork], &i__2, &ierr); + +/* Generate right vectors bidiagonalizing L */ +/* (Workspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dorgbr_("P", m, m, m, &work[ir], &ldwrkr, &work[itaup], & + work[iwork], &i__2, &ierr); + iwork = ie + *m; + +/* Perform bidiagonal QR iteration, computing right */ +/* singular vectors of L in WORK(IR) */ +/* (Workspace: need M*M+BDSPAC) */ + + _starpu_dbdsqr_("U", m, m, &c__0, &c__0, &s[1], &work[ie], &work[ + ir], &ldwrkr, dum, &c__1, dum, &c__1, &work[iwork] +, info); + iu = ie + *m; + +/* Multiply right singular vectors of L in WORK(IR) by Q */ +/* in A, storing result in WORK(IU) and copying to A */ +/* (Workspace: need M*M+2*M, prefer M*M+M*N+M) */ + + i__2 = *n; + i__3 = chunk; + for (i__ = 1; i__3 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += + i__3) { +/* Computing MIN */ + i__4 = *n - i__ + 1; + blk = min(i__4,chunk); + _starpu_dgemm_("N", "N", m, &blk, m, &c_b443, &work[ir], & + ldwrkr, &a[i__ * a_dim1 + 1], lda, &c_b421, & + work[iu], &ldwrku); + _starpu_dlacpy_("F", m, &blk, &work[iu], &ldwrku, &a[i__ * + a_dim1 + 1], lda); +/* L30: */ + } + + } else { + +/* Insufficient workspace for a fast algorithm */ + + ie = 1; + itauq = ie + *m; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Bidiagonalize A */ +/* (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB) */ + + i__3 = *lwork - iwork + 1; + _starpu_dgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[ + itauq], &work[itaup], &work[iwork], &i__3, &ierr); + +/* Generate right vectors bidiagonalizing A */ +/* (Workspace: need 4*M, prefer 3*M+M*NB) */ + + i__3 = *lwork - iwork + 1; + _starpu_dorgbr_("P", m, n, m, &a[a_offset], lda, &work[itaup], & + work[iwork], &i__3, &ierr); + iwork = ie + *m; + +/* Perform bidiagonal QR iteration, computing right */ +/* singular vectors of A in A */ +/* (Workspace: need BDSPAC) */ + + _starpu_dbdsqr_("L", m, n, &c__0, &c__0, &s[1], &work[ie], &a[ + a_offset], lda, dum, &c__1, dum, &c__1, &work[ + iwork], info); + + } + + } else if (wntvo && wntuas) { + +/* Path 3t(N much larger than M, JOBU='S' or 'A', JOBVT='O') */ +/* M right singular vectors to be overwritten on A and */ +/* M left singular vectors to be computed in U */ + +/* Computing MAX */ + i__3 = *m << 2; + if (*lwork >= *m * *m + max(i__3,bdspac)) { + +/* Sufficient workspace for a fast algorithm */ + + ir = 1; +/* Computing MAX */ + i__3 = wrkbl, i__2 = *lda * *n + *m; + if (*lwork >= max(i__3,i__2) + *lda * *m) { + +/* WORK(IU) is LDA by N and WORK(IR) is LDA by M */ + + ldwrku = *lda; + chunk = *n; + ldwrkr = *lda; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__3 = wrkbl, i__2 = *lda * *n + *m; + if (*lwork >= max(i__3,i__2) + *m * *m) { + +/* WORK(IU) is LDA by N and WORK(IR) is M by M */ + + ldwrku = *lda; + chunk = *n; + ldwrkr = *m; + } else { + +/* WORK(IU) is M by CHUNK and WORK(IR) is M by M */ + + ldwrku = *m; + chunk = (*lwork - *m * *m - *m) / *m; + ldwrkr = *m; + } + } + itau = ir + ldwrkr * *m; + iwork = itau + *m; + +/* Compute A=L*Q */ +/* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */ + + i__3 = *lwork - iwork + 1; + _starpu_dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork] +, &i__3, &ierr); + +/* Copy L to U, zeroing about above it */ + + _starpu_dlacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], ldu); + i__3 = *m - 1; + i__2 = *m - 1; + _starpu_dlaset_("U", &i__3, &i__2, &c_b421, &c_b421, &u[(u_dim1 << + 1) + 1], ldu); + +/* Generate Q in A */ +/* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */ + + i__3 = *lwork - iwork + 1; + _starpu_dorglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__3, &ierr); + ie = itau; + itauq = ie + *m; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Bidiagonalize L in U, copying result to WORK(IR) */ +/* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) */ + + i__3 = *lwork - iwork + 1; + _starpu_dgebrd_(m, m, &u[u_offset], ldu, &s[1], &work[ie], &work[ + itauq], &work[itaup], &work[iwork], &i__3, &ierr); + _starpu_dlacpy_("U", m, m, &u[u_offset], ldu, &work[ir], &ldwrkr); + +/* Generate right vectors bidiagonalizing L in WORK(IR) */ +/* (Workspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB) */ + + i__3 = *lwork - iwork + 1; + _starpu_dorgbr_("P", m, m, m, &work[ir], &ldwrkr, &work[itaup], & + work[iwork], &i__3, &ierr); + +/* Generate left vectors bidiagonalizing L in U */ +/* (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB) */ + + i__3 = *lwork - iwork + 1; + _starpu_dorgbr_("Q", m, m, m, &u[u_offset], ldu, &work[itauq], & + work[iwork], &i__3, &ierr); + iwork = ie + *m; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of L in U, and computing right */ +/* singular vectors of L in WORK(IR) */ +/* (Workspace: need M*M+BDSPAC) */ + + _starpu_dbdsqr_("U", m, m, m, &c__0, &s[1], &work[ie], &work[ir], + &ldwrkr, &u[u_offset], ldu, dum, &c__1, &work[ + iwork], info); + iu = ie + *m; + +/* Multiply right singular vectors of L in WORK(IR) by Q */ +/* in A, storing result in WORK(IU) and copying to A */ +/* (Workspace: need M*M+2*M, prefer M*M+M*N+M)) */ + + i__3 = *n; + i__2 = chunk; + for (i__ = 1; i__2 < 0 ? i__ >= i__3 : i__ <= i__3; i__ += + i__2) { +/* Computing MIN */ + i__4 = *n - i__ + 1; + blk = min(i__4,chunk); + _starpu_dgemm_("N", "N", m, &blk, m, &c_b443, &work[ir], & + ldwrkr, &a[i__ * a_dim1 + 1], lda, &c_b421, & + work[iu], &ldwrku); + _starpu_dlacpy_("F", m, &blk, &work[iu], &ldwrku, &a[i__ * + a_dim1 + 1], lda); +/* L40: */ + } + + } else { + +/* Insufficient workspace for a fast algorithm */ + + itau = 1; + iwork = itau + *m; + +/* Compute A=L*Q */ +/* (Workspace: need 2*M, prefer M+M*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork] +, &i__2, &ierr); + +/* Copy L to U, zeroing out above it */ + + _starpu_dlacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], ldu); + i__2 = *m - 1; + i__3 = *m - 1; + _starpu_dlaset_("U", &i__2, &i__3, &c_b421, &c_b421, &u[(u_dim1 << + 1) + 1], ldu); + +/* Generate Q in A */ +/* (Workspace: need 2*M, prefer M+M*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dorglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + ie = itau; + itauq = ie + *m; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Bidiagonalize L in U */ +/* (Workspace: need 4*M, prefer 3*M+2*M*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dgebrd_(m, m, &u[u_offset], ldu, &s[1], &work[ie], &work[ + itauq], &work[itaup], &work[iwork], &i__2, &ierr); + +/* Multiply right vectors bidiagonalizing L by Q in A */ +/* (Workspace: need 3*M+N, prefer 3*M+N*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dormbr_("P", "L", "T", m, n, m, &u[u_offset], ldu, &work[ + itaup], &a[a_offset], lda, &work[iwork], &i__2, & + ierr); + +/* Generate left vectors bidiagonalizing L in U */ +/* (Workspace: need 4*M, prefer 3*M+M*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dorgbr_("Q", m, m, m, &u[u_offset], ldu, &work[itauq], & + work[iwork], &i__2, &ierr); + iwork = ie + *m; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of A in U and computing right */ +/* singular vectors of A in A */ +/* (Workspace: need BDSPAC) */ + + _starpu_dbdsqr_("U", m, n, m, &c__0, &s[1], &work[ie], &a[ + a_offset], lda, &u[u_offset], ldu, dum, &c__1, & + work[iwork], info); + + } + + } else if (wntvs) { + + if (wntun) { + +/* Path 4t(N much larger than M, JOBU='N', JOBVT='S') */ +/* M right singular vectors to be computed in VT and */ +/* no left singular vectors to be computed */ + +/* Computing MAX */ + i__2 = *m << 2; + if (*lwork >= *m * *m + max(i__2,bdspac)) { + +/* Sufficient workspace for a fast algorithm */ + + ir = 1; + if (*lwork >= wrkbl + *lda * *m) { + +/* WORK(IR) is LDA by M */ + + ldwrkr = *lda; + } else { + +/* WORK(IR) is M by M */ + + ldwrkr = *m; + } + itau = ir + ldwrkr * *m; + iwork = itau + *m; + +/* Compute A=L*Q */ +/* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + +/* Copy L to WORK(IR), zeroing out above it */ + + _starpu_dlacpy_("L", m, m, &a[a_offset], lda, &work[ir], & + ldwrkr); + i__2 = *m - 1; + i__3 = *m - 1; + _starpu_dlaset_("U", &i__2, &i__3, &c_b421, &c_b421, &work[ir + + ldwrkr], &ldwrkr); + +/* Generate Q in A */ +/* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dorglq_(m, n, m, &a[a_offset], lda, &work[itau], & + work[iwork], &i__2, &ierr); + ie = itau; + itauq = ie + *m; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Bidiagonalize L in WORK(IR) */ +/* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dgebrd_(m, m, &work[ir], &ldwrkr, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + +/* Generate right vectors bidiagonalizing L in */ +/* WORK(IR) */ +/* (Workspace: need M*M+4*M, prefer M*M+3*M+(M-1)*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dorgbr_("P", m, m, m, &work[ir], &ldwrkr, &work[itaup] +, &work[iwork], &i__2, &ierr); + iwork = ie + *m; + +/* Perform bidiagonal QR iteration, computing right */ +/* singular vectors of L in WORK(IR) */ +/* (Workspace: need M*M+BDSPAC) */ + + _starpu_dbdsqr_("U", m, m, &c__0, &c__0, &s[1], &work[ie], & + work[ir], &ldwrkr, dum, &c__1, dum, &c__1, & + work[iwork], info); + +/* Multiply right singular vectors of L in WORK(IR) by */ +/* Q in A, storing result in VT */ +/* (Workspace: need M*M) */ + + _starpu_dgemm_("N", "N", m, n, m, &c_b443, &work[ir], &ldwrkr, + &a[a_offset], lda, &c_b421, &vt[vt_offset], + ldvt); + + } else { + +/* Insufficient workspace for a fast algorithm */ + + itau = 1; + iwork = itau + *m; + +/* Compute A=L*Q */ +/* (Workspace: need 2*M, prefer M+M*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + +/* Copy result to VT */ + + _starpu_dlacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], + ldvt); + +/* Generate Q in VT */ +/* (Workspace: need 2*M, prefer M+M*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dorglq_(m, n, m, &vt[vt_offset], ldvt, &work[itau], & + work[iwork], &i__2, &ierr); + ie = itau; + itauq = ie + *m; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Zero out above L in A */ + + i__2 = *m - 1; + i__3 = *m - 1; + _starpu_dlaset_("U", &i__2, &i__3, &c_b421, &c_b421, &a[( + a_dim1 << 1) + 1], lda); + +/* Bidiagonalize L in A */ +/* (Workspace: need 4*M, prefer 3*M+2*M*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + +/* Multiply right vectors bidiagonalizing L by Q in VT */ +/* (Workspace: need 3*M+N, prefer 3*M+N*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dormbr_("P", "L", "T", m, n, m, &a[a_offset], lda, & + work[itaup], &vt[vt_offset], ldvt, &work[ + iwork], &i__2, &ierr); + iwork = ie + *m; + +/* Perform bidiagonal QR iteration, computing right */ +/* singular vectors of A in VT */ +/* (Workspace: need BDSPAC) */ + + _starpu_dbdsqr_("U", m, n, &c__0, &c__0, &s[1], &work[ie], & + vt[vt_offset], ldvt, dum, &c__1, dum, &c__1, & + work[iwork], info); + + } + + } else if (wntuo) { + +/* Path 5t(N much larger than M, JOBU='O', JOBVT='S') */ +/* M right singular vectors to be computed in VT and */ +/* M left singular vectors to be overwritten on A */ + +/* Computing MAX */ + i__2 = *m << 2; + if (*lwork >= (*m << 1) * *m + max(i__2,bdspac)) { + +/* Sufficient workspace for a fast algorithm */ + + iu = 1; + if (*lwork >= wrkbl + (*lda << 1) * *m) { + +/* WORK(IU) is LDA by M and WORK(IR) is LDA by M */ + + ldwrku = *lda; + ir = iu + ldwrku * *m; + ldwrkr = *lda; + } else if (*lwork >= wrkbl + (*lda + *m) * *m) { + +/* WORK(IU) is LDA by M and WORK(IR) is M by M */ + + ldwrku = *lda; + ir = iu + ldwrku * *m; + ldwrkr = *m; + } else { + +/* WORK(IU) is M by M and WORK(IR) is M by M */ + + ldwrku = *m; + ir = iu + ldwrku * *m; + ldwrkr = *m; + } + itau = ir + ldwrkr * *m; + iwork = itau + *m; + +/* Compute A=L*Q */ +/* (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + +/* Copy L to WORK(IU), zeroing out below it */ + + _starpu_dlacpy_("L", m, m, &a[a_offset], lda, &work[iu], & + ldwrku); + i__2 = *m - 1; + i__3 = *m - 1; + _starpu_dlaset_("U", &i__2, &i__3, &c_b421, &c_b421, &work[iu + + ldwrku], &ldwrku); + +/* Generate Q in A */ +/* (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dorglq_(m, n, m, &a[a_offset], lda, &work[itau], & + work[iwork], &i__2, &ierr); + ie = itau; + itauq = ie + *m; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Bidiagonalize L in WORK(IU), copying result to */ +/* WORK(IR) */ +/* (Workspace: need 2*M*M+4*M, */ +/* prefer 2*M*M+3*M+2*M*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dgebrd_(m, m, &work[iu], &ldwrku, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + _starpu_dlacpy_("L", m, m, &work[iu], &ldwrku, &work[ir], & + ldwrkr); + +/* Generate right bidiagonalizing vectors in WORK(IU) */ +/* (Workspace: need 2*M*M+4*M-1, */ +/* prefer 2*M*M+3*M+(M-1)*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dorgbr_("P", m, m, m, &work[iu], &ldwrku, &work[itaup] +, &work[iwork], &i__2, &ierr); + +/* Generate left bidiagonalizing vectors in WORK(IR) */ +/* (Workspace: need 2*M*M+4*M, prefer 2*M*M+3*M+M*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dorgbr_("Q", m, m, m, &work[ir], &ldwrkr, &work[itauq] +, &work[iwork], &i__2, &ierr); + iwork = ie + *m; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of L in WORK(IR) and computing */ +/* right singular vectors of L in WORK(IU) */ +/* (Workspace: need 2*M*M+BDSPAC) */ + + _starpu_dbdsqr_("U", m, m, m, &c__0, &s[1], &work[ie], &work[ + iu], &ldwrku, &work[ir], &ldwrkr, dum, &c__1, + &work[iwork], info); + +/* Multiply right singular vectors of L in WORK(IU) by */ +/* Q in A, storing result in VT */ +/* (Workspace: need M*M) */ + + _starpu_dgemm_("N", "N", m, n, m, &c_b443, &work[iu], &ldwrku, + &a[a_offset], lda, &c_b421, &vt[vt_offset], + ldvt); + +/* Copy left singular vectors of L to A */ +/* (Workspace: need M*M) */ + + _starpu_dlacpy_("F", m, m, &work[ir], &ldwrkr, &a[a_offset], + lda); + + } else { + +/* Insufficient workspace for a fast algorithm */ + + itau = 1; + iwork = itau + *m; + +/* Compute A=L*Q, copying result to VT */ +/* (Workspace: need 2*M, prefer M+M*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + _starpu_dlacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], + ldvt); + +/* Generate Q in VT */ +/* (Workspace: need 2*M, prefer M+M*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dorglq_(m, n, m, &vt[vt_offset], ldvt, &work[itau], & + work[iwork], &i__2, &ierr); + ie = itau; + itauq = ie + *m; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Zero out above L in A */ + + i__2 = *m - 1; + i__3 = *m - 1; + _starpu_dlaset_("U", &i__2, &i__3, &c_b421, &c_b421, &a[( + a_dim1 << 1) + 1], lda); + +/* Bidiagonalize L in A */ +/* (Workspace: need 4*M, prefer 3*M+2*M*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + +/* Multiply right vectors bidiagonalizing L by Q in VT */ +/* (Workspace: need 3*M+N, prefer 3*M+N*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dormbr_("P", "L", "T", m, n, m, &a[a_offset], lda, & + work[itaup], &vt[vt_offset], ldvt, &work[ + iwork], &i__2, &ierr); + +/* Generate left bidiagonalizing vectors of L in A */ +/* (Workspace: need 4*M, prefer 3*M+M*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dorgbr_("Q", m, m, m, &a[a_offset], lda, &work[itauq], + &work[iwork], &i__2, &ierr); + iwork = ie + *m; + +/* Perform bidiagonal QR iteration, compute left */ +/* singular vectors of A in A and compute right */ +/* singular vectors of A in VT */ +/* (Workspace: need BDSPAC) */ + + _starpu_dbdsqr_("U", m, n, m, &c__0, &s[1], &work[ie], &vt[ + vt_offset], ldvt, &a[a_offset], lda, dum, & + c__1, &work[iwork], info); + + } + + } else if (wntuas) { + +/* Path 6t(N much larger than M, JOBU='S' or 'A', */ +/* JOBVT='S') */ +/* M right singular vectors to be computed in VT and */ +/* M left singular vectors to be computed in U */ + +/* Computing MAX */ + i__2 = *m << 2; + if (*lwork >= *m * *m + max(i__2,bdspac)) { + +/* Sufficient workspace for a fast algorithm */ + + iu = 1; + if (*lwork >= wrkbl + *lda * *m) { + +/* WORK(IU) is LDA by N */ + + ldwrku = *lda; + } else { + +/* WORK(IU) is LDA by M */ + + ldwrku = *m; + } + itau = iu + ldwrku * *m; + iwork = itau + *m; + +/* Compute A=L*Q */ +/* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + +/* Copy L to WORK(IU), zeroing out above it */ + + _starpu_dlacpy_("L", m, m, &a[a_offset], lda, &work[iu], & + ldwrku); + i__2 = *m - 1; + i__3 = *m - 1; + _starpu_dlaset_("U", &i__2, &i__3, &c_b421, &c_b421, &work[iu + + ldwrku], &ldwrku); + +/* Generate Q in A */ +/* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dorglq_(m, n, m, &a[a_offset], lda, &work[itau], & + work[iwork], &i__2, &ierr); + ie = itau; + itauq = ie + *m; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Bidiagonalize L in WORK(IU), copying result to U */ +/* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dgebrd_(m, m, &work[iu], &ldwrku, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + _starpu_dlacpy_("L", m, m, &work[iu], &ldwrku, &u[u_offset], + ldu); + +/* Generate right bidiagonalizing vectors in WORK(IU) */ +/* (Workspace: need M*M+4*M-1, */ +/* prefer M*M+3*M+(M-1)*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dorgbr_("P", m, m, m, &work[iu], &ldwrku, &work[itaup] +, &work[iwork], &i__2, &ierr); + +/* Generate left bidiagonalizing vectors in U */ +/* (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dorgbr_("Q", m, m, m, &u[u_offset], ldu, &work[itauq], + &work[iwork], &i__2, &ierr); + iwork = ie + *m; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of L in U and computing right */ +/* singular vectors of L in WORK(IU) */ +/* (Workspace: need M*M+BDSPAC) */ + + _starpu_dbdsqr_("U", m, m, m, &c__0, &s[1], &work[ie], &work[ + iu], &ldwrku, &u[u_offset], ldu, dum, &c__1, & + work[iwork], info); + +/* Multiply right singular vectors of L in WORK(IU) by */ +/* Q in A, storing result in VT */ +/* (Workspace: need M*M) */ + + _starpu_dgemm_("N", "N", m, n, m, &c_b443, &work[iu], &ldwrku, + &a[a_offset], lda, &c_b421, &vt[vt_offset], + ldvt); + + } else { + +/* Insufficient workspace for a fast algorithm */ + + itau = 1; + iwork = itau + *m; + +/* Compute A=L*Q, copying result to VT */ +/* (Workspace: need 2*M, prefer M+M*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + _starpu_dlacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], + ldvt); + +/* Generate Q in VT */ +/* (Workspace: need 2*M, prefer M+M*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dorglq_(m, n, m, &vt[vt_offset], ldvt, &work[itau], & + work[iwork], &i__2, &ierr); + +/* Copy L to U, zeroing out above it */ + + _starpu_dlacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], + ldu); + i__2 = *m - 1; + i__3 = *m - 1; + _starpu_dlaset_("U", &i__2, &i__3, &c_b421, &c_b421, &u[( + u_dim1 << 1) + 1], ldu); + ie = itau; + itauq = ie + *m; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Bidiagonalize L in U */ +/* (Workspace: need 4*M, prefer 3*M+2*M*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dgebrd_(m, m, &u[u_offset], ldu, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + +/* Multiply right bidiagonalizing vectors in U by Q */ +/* in VT */ +/* (Workspace: need 3*M+N, prefer 3*M+N*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dormbr_("P", "L", "T", m, n, m, &u[u_offset], ldu, & + work[itaup], &vt[vt_offset], ldvt, &work[ + iwork], &i__2, &ierr); + +/* Generate left bidiagonalizing vectors in U */ +/* (Workspace: need 4*M, prefer 3*M+M*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dorgbr_("Q", m, m, m, &u[u_offset], ldu, &work[itauq], + &work[iwork], &i__2, &ierr); + iwork = ie + *m; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of A in U and computing right */ +/* singular vectors of A in VT */ +/* (Workspace: need BDSPAC) */ + + _starpu_dbdsqr_("U", m, n, m, &c__0, &s[1], &work[ie], &vt[ + vt_offset], ldvt, &u[u_offset], ldu, dum, & + c__1, &work[iwork], info); + + } + + } + + } else if (wntva) { + + if (wntun) { + +/* Path 7t(N much larger than M, JOBU='N', JOBVT='A') */ +/* N right singular vectors to be computed in VT and */ +/* no left singular vectors to be computed */ + +/* Computing MAX */ + i__2 = *n + *m, i__3 = *m << 2, i__2 = max(i__2,i__3); + if (*lwork >= *m * *m + max(i__2,bdspac)) { + +/* Sufficient workspace for a fast algorithm */ + + ir = 1; + if (*lwork >= wrkbl + *lda * *m) { + +/* WORK(IR) is LDA by M */ + + ldwrkr = *lda; + } else { + +/* WORK(IR) is M by M */ + + ldwrkr = *m; + } + itau = ir + ldwrkr * *m; + iwork = itau + *m; + +/* Compute A=L*Q, copying result to VT */ +/* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + _starpu_dlacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], + ldvt); + +/* Copy L to WORK(IR), zeroing out above it */ + + _starpu_dlacpy_("L", m, m, &a[a_offset], lda, &work[ir], & + ldwrkr); + i__2 = *m - 1; + i__3 = *m - 1; + _starpu_dlaset_("U", &i__2, &i__3, &c_b421, &c_b421, &work[ir + + ldwrkr], &ldwrkr); + +/* Generate Q in VT */ +/* (Workspace: need M*M+M+N, prefer M*M+M+N*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], & + work[iwork], &i__2, &ierr); + ie = itau; + itauq = ie + *m; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Bidiagonalize L in WORK(IR) */ +/* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dgebrd_(m, m, &work[ir], &ldwrkr, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + +/* Generate right bidiagonalizing vectors in WORK(IR) */ +/* (Workspace: need M*M+4*M-1, */ +/* prefer M*M+3*M+(M-1)*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dorgbr_("P", m, m, m, &work[ir], &ldwrkr, &work[itaup] +, &work[iwork], &i__2, &ierr); + iwork = ie + *m; + +/* Perform bidiagonal QR iteration, computing right */ +/* singular vectors of L in WORK(IR) */ +/* (Workspace: need M*M+BDSPAC) */ + + _starpu_dbdsqr_("U", m, m, &c__0, &c__0, &s[1], &work[ie], & + work[ir], &ldwrkr, dum, &c__1, dum, &c__1, & + work[iwork], info); + +/* Multiply right singular vectors of L in WORK(IR) by */ +/* Q in VT, storing result in A */ +/* (Workspace: need M*M) */ + + _starpu_dgemm_("N", "N", m, n, m, &c_b443, &work[ir], &ldwrkr, + &vt[vt_offset], ldvt, &c_b421, &a[a_offset], + lda); + +/* Copy right singular vectors of A from A to VT */ + + _starpu_dlacpy_("F", m, n, &a[a_offset], lda, &vt[vt_offset], + ldvt); + + } else { + +/* Insufficient workspace for a fast algorithm */ + + itau = 1; + iwork = itau + *m; + +/* Compute A=L*Q, copying result to VT */ +/* (Workspace: need 2*M, prefer M+M*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + _starpu_dlacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], + ldvt); + +/* Generate Q in VT */ +/* (Workspace: need M+N, prefer M+N*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], & + work[iwork], &i__2, &ierr); + ie = itau; + itauq = ie + *m; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Zero out above L in A */ + + i__2 = *m - 1; + i__3 = *m - 1; + _starpu_dlaset_("U", &i__2, &i__3, &c_b421, &c_b421, &a[( + a_dim1 << 1) + 1], lda); + +/* Bidiagonalize L in A */ +/* (Workspace: need 4*M, prefer 3*M+2*M*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + +/* Multiply right bidiagonalizing vectors in A by Q */ +/* in VT */ +/* (Workspace: need 3*M+N, prefer 3*M+N*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dormbr_("P", "L", "T", m, n, m, &a[a_offset], lda, & + work[itaup], &vt[vt_offset], ldvt, &work[ + iwork], &i__2, &ierr); + iwork = ie + *m; + +/* Perform bidiagonal QR iteration, computing right */ +/* singular vectors of A in VT */ +/* (Workspace: need BDSPAC) */ + + _starpu_dbdsqr_("U", m, n, &c__0, &c__0, &s[1], &work[ie], & + vt[vt_offset], ldvt, dum, &c__1, dum, &c__1, & + work[iwork], info); + + } + + } else if (wntuo) { + +/* Path 8t(N much larger than M, JOBU='O', JOBVT='A') */ +/* N right singular vectors to be computed in VT and */ +/* M left singular vectors to be overwritten on A */ + +/* Computing MAX */ + i__2 = *n + *m, i__3 = *m << 2, i__2 = max(i__2,i__3); + if (*lwork >= (*m << 1) * *m + max(i__2,bdspac)) { + +/* Sufficient workspace for a fast algorithm */ + + iu = 1; + if (*lwork >= wrkbl + (*lda << 1) * *m) { + +/* WORK(IU) is LDA by M and WORK(IR) is LDA by M */ + + ldwrku = *lda; + ir = iu + ldwrku * *m; + ldwrkr = *lda; + } else if (*lwork >= wrkbl + (*lda + *m) * *m) { + +/* WORK(IU) is LDA by M and WORK(IR) is M by M */ + + ldwrku = *lda; + ir = iu + ldwrku * *m; + ldwrkr = *m; + } else { + +/* WORK(IU) is M by M and WORK(IR) is M by M */ + + ldwrku = *m; + ir = iu + ldwrku * *m; + ldwrkr = *m; + } + itau = ir + ldwrkr * *m; + iwork = itau + *m; + +/* Compute A=L*Q, copying result to VT */ +/* (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + _starpu_dlacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], + ldvt); + +/* Generate Q in VT */ +/* (Workspace: need 2*M*M+M+N, prefer 2*M*M+M+N*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], & + work[iwork], &i__2, &ierr); + +/* Copy L to WORK(IU), zeroing out above it */ + + _starpu_dlacpy_("L", m, m, &a[a_offset], lda, &work[iu], & + ldwrku); + i__2 = *m - 1; + i__3 = *m - 1; + _starpu_dlaset_("U", &i__2, &i__3, &c_b421, &c_b421, &work[iu + + ldwrku], &ldwrku); + ie = itau; + itauq = ie + *m; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Bidiagonalize L in WORK(IU), copying result to */ +/* WORK(IR) */ +/* (Workspace: need 2*M*M+4*M, */ +/* prefer 2*M*M+3*M+2*M*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dgebrd_(m, m, &work[iu], &ldwrku, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + _starpu_dlacpy_("L", m, m, &work[iu], &ldwrku, &work[ir], & + ldwrkr); + +/* Generate right bidiagonalizing vectors in WORK(IU) */ +/* (Workspace: need 2*M*M+4*M-1, */ +/* prefer 2*M*M+3*M+(M-1)*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dorgbr_("P", m, m, m, &work[iu], &ldwrku, &work[itaup] +, &work[iwork], &i__2, &ierr); + +/* Generate left bidiagonalizing vectors in WORK(IR) */ +/* (Workspace: need 2*M*M+4*M, prefer 2*M*M+3*M+M*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dorgbr_("Q", m, m, m, &work[ir], &ldwrkr, &work[itauq] +, &work[iwork], &i__2, &ierr); + iwork = ie + *m; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of L in WORK(IR) and computing */ +/* right singular vectors of L in WORK(IU) */ +/* (Workspace: need 2*M*M+BDSPAC) */ + + _starpu_dbdsqr_("U", m, m, m, &c__0, &s[1], &work[ie], &work[ + iu], &ldwrku, &work[ir], &ldwrkr, dum, &c__1, + &work[iwork], info); + +/* Multiply right singular vectors of L in WORK(IU) by */ +/* Q in VT, storing result in A */ +/* (Workspace: need M*M) */ + + _starpu_dgemm_("N", "N", m, n, m, &c_b443, &work[iu], &ldwrku, + &vt[vt_offset], ldvt, &c_b421, &a[a_offset], + lda); + +/* Copy right singular vectors of A from A to VT */ + + _starpu_dlacpy_("F", m, n, &a[a_offset], lda, &vt[vt_offset], + ldvt); + +/* Copy left singular vectors of A from WORK(IR) to A */ + + _starpu_dlacpy_("F", m, m, &work[ir], &ldwrkr, &a[a_offset], + lda); + + } else { + +/* Insufficient workspace for a fast algorithm */ + + itau = 1; + iwork = itau + *m; + +/* Compute A=L*Q, copying result to VT */ +/* (Workspace: need 2*M, prefer M+M*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + _starpu_dlacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], + ldvt); + +/* Generate Q in VT */ +/* (Workspace: need M+N, prefer M+N*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], & + work[iwork], &i__2, &ierr); + ie = itau; + itauq = ie + *m; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Zero out above L in A */ + + i__2 = *m - 1; + i__3 = *m - 1; + _starpu_dlaset_("U", &i__2, &i__3, &c_b421, &c_b421, &a[( + a_dim1 << 1) + 1], lda); + +/* Bidiagonalize L in A */ +/* (Workspace: need 4*M, prefer 3*M+2*M*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + +/* Multiply right bidiagonalizing vectors in A by Q */ +/* in VT */ +/* (Workspace: need 3*M+N, prefer 3*M+N*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dormbr_("P", "L", "T", m, n, m, &a[a_offset], lda, & + work[itaup], &vt[vt_offset], ldvt, &work[ + iwork], &i__2, &ierr); + +/* Generate left bidiagonalizing vectors in A */ +/* (Workspace: need 4*M, prefer 3*M+M*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dorgbr_("Q", m, m, m, &a[a_offset], lda, &work[itauq], + &work[iwork], &i__2, &ierr); + iwork = ie + *m; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of A in A and computing right */ +/* singular vectors of A in VT */ +/* (Workspace: need BDSPAC) */ + + _starpu_dbdsqr_("U", m, n, m, &c__0, &s[1], &work[ie], &vt[ + vt_offset], ldvt, &a[a_offset], lda, dum, & + c__1, &work[iwork], info); + + } + + } else if (wntuas) { + +/* Path 9t(N much larger than M, JOBU='S' or 'A', */ +/* JOBVT='A') */ +/* N right singular vectors to be computed in VT and */ +/* M left singular vectors to be computed in U */ + +/* Computing MAX */ + i__2 = *n + *m, i__3 = *m << 2, i__2 = max(i__2,i__3); + if (*lwork >= *m * *m + max(i__2,bdspac)) { + +/* Sufficient workspace for a fast algorithm */ + + iu = 1; + if (*lwork >= wrkbl + *lda * *m) { + +/* WORK(IU) is LDA by M */ + + ldwrku = *lda; + } else { + +/* WORK(IU) is M by M */ + + ldwrku = *m; + } + itau = iu + ldwrku * *m; + iwork = itau + *m; + +/* Compute A=L*Q, copying result to VT */ +/* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + _starpu_dlacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], + ldvt); + +/* Generate Q in VT */ +/* (Workspace: need M*M+M+N, prefer M*M+M+N*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], & + work[iwork], &i__2, &ierr); + +/* Copy L to WORK(IU), zeroing out above it */ + + _starpu_dlacpy_("L", m, m, &a[a_offset], lda, &work[iu], & + ldwrku); + i__2 = *m - 1; + i__3 = *m - 1; + _starpu_dlaset_("U", &i__2, &i__3, &c_b421, &c_b421, &work[iu + + ldwrku], &ldwrku); + ie = itau; + itauq = ie + *m; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Bidiagonalize L in WORK(IU), copying result to U */ +/* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dgebrd_(m, m, &work[iu], &ldwrku, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + _starpu_dlacpy_("L", m, m, &work[iu], &ldwrku, &u[u_offset], + ldu); + +/* Generate right bidiagonalizing vectors in WORK(IU) */ +/* (Workspace: need M*M+4*M, prefer M*M+3*M+(M-1)*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dorgbr_("P", m, m, m, &work[iu], &ldwrku, &work[itaup] +, &work[iwork], &i__2, &ierr); + +/* Generate left bidiagonalizing vectors in U */ +/* (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dorgbr_("Q", m, m, m, &u[u_offset], ldu, &work[itauq], + &work[iwork], &i__2, &ierr); + iwork = ie + *m; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of L in U and computing right */ +/* singular vectors of L in WORK(IU) */ +/* (Workspace: need M*M+BDSPAC) */ + + _starpu_dbdsqr_("U", m, m, m, &c__0, &s[1], &work[ie], &work[ + iu], &ldwrku, &u[u_offset], ldu, dum, &c__1, & + work[iwork], info); + +/* Multiply right singular vectors of L in WORK(IU) by */ +/* Q in VT, storing result in A */ +/* (Workspace: need M*M) */ + + _starpu_dgemm_("N", "N", m, n, m, &c_b443, &work[iu], &ldwrku, + &vt[vt_offset], ldvt, &c_b421, &a[a_offset], + lda); + +/* Copy right singular vectors of A from A to VT */ + + _starpu_dlacpy_("F", m, n, &a[a_offset], lda, &vt[vt_offset], + ldvt); + + } else { + +/* Insufficient workspace for a fast algorithm */ + + itau = 1; + iwork = itau + *m; + +/* Compute A=L*Q, copying result to VT */ +/* (Workspace: need 2*M, prefer M+M*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + _starpu_dlacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], + ldvt); + +/* Generate Q in VT */ +/* (Workspace: need M+N, prefer M+N*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], & + work[iwork], &i__2, &ierr); + +/* Copy L to U, zeroing out above it */ + + _starpu_dlacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], + ldu); + i__2 = *m - 1; + i__3 = *m - 1; + _starpu_dlaset_("U", &i__2, &i__3, &c_b421, &c_b421, &u[( + u_dim1 << 1) + 1], ldu); + ie = itau; + itauq = ie + *m; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Bidiagonalize L in U */ +/* (Workspace: need 4*M, prefer 3*M+2*M*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dgebrd_(m, m, &u[u_offset], ldu, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + +/* Multiply right bidiagonalizing vectors in U by Q */ +/* in VT */ +/* (Workspace: need 3*M+N, prefer 3*M+N*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dormbr_("P", "L", "T", m, n, m, &u[u_offset], ldu, & + work[itaup], &vt[vt_offset], ldvt, &work[ + iwork], &i__2, &ierr); + +/* Generate left bidiagonalizing vectors in U */ +/* (Workspace: need 4*M, prefer 3*M+M*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dorgbr_("Q", m, m, m, &u[u_offset], ldu, &work[itauq], + &work[iwork], &i__2, &ierr); + iwork = ie + *m; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of A in U and computing right */ +/* singular vectors of A in VT */ +/* (Workspace: need BDSPAC) */ + + _starpu_dbdsqr_("U", m, n, m, &c__0, &s[1], &work[ie], &vt[ + vt_offset], ldvt, &u[u_offset], ldu, dum, & + c__1, &work[iwork], info); + + } + + } + + } + + } else { + +/* N .LT. MNTHR */ + +/* Path 10t(N greater than M, but not much larger) */ +/* Reduce to bidiagonal form without LQ decomposition */ + + ie = 1; + itauq = ie + *m; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Bidiagonalize A */ +/* (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], & + work[itaup], &work[iwork], &i__2, &ierr); + if (wntuas) { + +/* If left singular vectors desired in U, copy result to U */ +/* and generate left bidiagonalizing vectors in U */ +/* (Workspace: need 4*M-1, prefer 3*M+(M-1)*NB) */ + + _starpu_dlacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], ldu); + i__2 = *lwork - iwork + 1; + _starpu_dorgbr_("Q", m, m, n, &u[u_offset], ldu, &work[itauq], &work[ + iwork], &i__2, &ierr); + } + if (wntvas) { + +/* If right singular vectors desired in VT, copy result to */ +/* VT and generate right bidiagonalizing vectors in VT */ +/* (Workspace: need 3*M+NRVT, prefer 3*M+NRVT*NB) */ + + _starpu_dlacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt); + if (wntva) { + nrvt = *n; + } + if (wntvs) { + nrvt = *m; + } + i__2 = *lwork - iwork + 1; + _starpu_dorgbr_("P", &nrvt, n, m, &vt[vt_offset], ldvt, &work[itaup], + &work[iwork], &i__2, &ierr); + } + if (wntuo) { + +/* If left singular vectors desired in A, generate left */ +/* bidiagonalizing vectors in A */ +/* (Workspace: need 4*M-1, prefer 3*M+(M-1)*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dorgbr_("Q", m, m, n, &a[a_offset], lda, &work[itauq], &work[ + iwork], &i__2, &ierr); + } + if (wntvo) { + +/* If right singular vectors desired in A, generate right */ +/* bidiagonalizing vectors in A */ +/* (Workspace: need 4*M, prefer 3*M+M*NB) */ + + i__2 = *lwork - iwork + 1; + _starpu_dorgbr_("P", m, n, m, &a[a_offset], lda, &work[itaup], &work[ + iwork], &i__2, &ierr); + } + iwork = ie + *m; + if (wntuas || wntuo) { + nru = *m; + } + if (wntun) { + nru = 0; + } + if (wntvas || wntvo) { + ncvt = *n; + } + if (wntvn) { + ncvt = 0; + } + if (! wntuo && ! wntvo) { + +/* Perform bidiagonal QR iteration, if desired, computing */ +/* left singular vectors in U and computing right singular */ +/* vectors in VT */ +/* (Workspace: need BDSPAC) */ + + _starpu_dbdsqr_("L", m, &ncvt, &nru, &c__0, &s[1], &work[ie], &vt[ + vt_offset], ldvt, &u[u_offset], ldu, dum, &c__1, & + work[iwork], info); + } else if (! wntuo && wntvo) { + +/* Perform bidiagonal QR iteration, if desired, computing */ +/* left singular vectors in U and computing right singular */ +/* vectors in A */ +/* (Workspace: need BDSPAC) */ + + _starpu_dbdsqr_("L", m, &ncvt, &nru, &c__0, &s[1], &work[ie], &a[ + a_offset], lda, &u[u_offset], ldu, dum, &c__1, &work[ + iwork], info); + } else { + +/* Perform bidiagonal QR iteration, if desired, computing */ +/* left singular vectors in A and computing right singular */ +/* vectors in VT */ +/* (Workspace: need BDSPAC) */ + + _starpu_dbdsqr_("L", m, &ncvt, &nru, &c__0, &s[1], &work[ie], &vt[ + vt_offset], ldvt, &a[a_offset], lda, dum, &c__1, & + work[iwork], info); + } + + } + + } + +/* If DBDSQR failed to converge, copy unconverged superdiagonals */ +/* to WORK( 2:MINMN ) */ + + if (*info != 0) { + if (ie > 2) { + i__2 = minmn - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + work[i__ + 1] = work[i__ + ie - 1]; +/* L50: */ + } + } + if (ie < 2) { + for (i__ = minmn - 1; i__ >= 1; --i__) { + work[i__ + 1] = work[i__ + ie - 1]; +/* L60: */ + } + } + } + +/* Undo scaling if necessary */ + + if (iscl == 1) { + if (anrm > bignum) { + _starpu_dlascl_("G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], & + minmn, &ierr); + } + if (*info != 0 && anrm > bignum) { + i__2 = minmn - 1; + _starpu_dlascl_("G", &c__0, &c__0, &bignum, &anrm, &i__2, &c__1, &work[2], + &minmn, &ierr); + } + if (anrm < smlnum) { + _starpu_dlascl_("G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], & + minmn, &ierr); + } + if (*info != 0 && anrm < smlnum) { + i__2 = minmn - 1; + _starpu_dlascl_("G", &c__0, &c__0, &smlnum, &anrm, &i__2, &c__1, &work[2], + &minmn, &ierr); + } + } + +/* Return optimal workspace in WORK(1) */ + + work[1] = (doublereal) maxwrk; + + return 0; + +/* End of DGESVD */ + +} /* _starpu_dgesvd_ */ diff --git a/min-dgels/base/SRC/dgesvj.c b/min-dgels/base/SRC/dgesvj.c new file mode 100644 index 0000000..e33ab6c --- /dev/null +++ b/min-dgels/base/SRC/dgesvj.c @@ -0,0 +1,1796 @@ +/* dgesvj.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static doublereal c_b17 = 0.; +static doublereal c_b18 = 1.; +static integer c__1 = 1; +static integer c__0 = 0; +static integer c__2 = 2; + +/* Subroutine */ int _starpu_dgesvj_(char *joba, char *jobu, char *jobv, integer *m, + integer *n, doublereal *a, integer *lda, doublereal *sva, integer *mv, + doublereal *v, integer *ldv, doublereal *work, integer *lwork, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, v_dim1, v_offset, i__1, i__2, i__3, i__4, i__5; + doublereal d__1, d__2; + + /* Builtin functions */ + double sqrt(doublereal), d_sign(doublereal *, doublereal *); + + /* Local variables */ + doublereal bigtheta; + integer pskipped, i__, p, q; + doublereal t; + integer n2, n4; + doublereal rootsfmin; + integer n34; + doublereal cs, sn; + integer ir1, jbc; + doublereal big; + integer kbl, igl, ibr, jgl, nbl; + doublereal tol; + integer mvl; + doublereal aapp, aapq, aaqq; + extern doublereal _starpu_ddot_(integer *, doublereal *, integer *, doublereal *, + integer *); + doublereal ctol; + integer ierr; + doublereal aapp0; + extern doublereal _starpu_dnrm2_(integer *, doublereal *, integer *); + doublereal temp1; + extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, + integer *); + doublereal scale, large, apoaq, aqoap; + extern logical _starpu_lsame_(char *, char *); + doublereal theta, small, sfmin; + logical lsvec; + extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *); + doublereal fastr[5]; + extern /* Subroutine */ int _starpu_dswap_(integer *, doublereal *, integer *, + doublereal *, integer *); + logical applv, rsvec; + extern /* Subroutine */ int _starpu_daxpy_(integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *); + logical uctol; + extern /* Subroutine */ int _starpu_drotm_(integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *); + logical lower, upper, rotok; + extern /* Subroutine */ int _starpu_dgsvj0_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *, doublereal *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *), _starpu_dgsvj1_( + char *, integer *, integer *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, doublereal *, integer *, doublereal *, + integer *, integer *); + extern doublereal _starpu_dlamch_(char *); + extern /* Subroutine */ int _starpu_dlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *); + extern integer _starpu_idamax_(integer *, doublereal *, integer *); + extern /* Subroutine */ int _starpu_dlaset_(char *, integer *, integer *, + doublereal *, doublereal *, doublereal *, integer *), + _starpu_xerbla_(char *, integer *); + integer ijblsk, swband, blskip; + doublereal mxaapq; + extern /* Subroutine */ int _starpu_dlassq_(integer *, doublereal *, integer *, + doublereal *, doublereal *); + doublereal thsign, mxsinj; + integer emptsw, notrot, iswrot, lkahead; + logical goscale, noscale; + doublereal rootbig, epsilon, rooteps; + integer rowskip; + doublereal roottol; + + +/* -- LAPACK routine (version 3.2) -- */ + +/* -- Contributed by Zlatko Drmac of the University of Zagreb and -- */ +/* -- Kresimir Veselic of the Fernuniversitaet Hagen -- */ +/* -- November 2008 -- */ + +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* This routine is also part of SIGMA (version 1.23, October 23. 2008.) */ +/* SIGMA is a library of algorithms for highly accurate algorithms for */ +/* computation of SVD, PSVD, QSVD, (H,K)-SVD, and for solution of the */ +/* eigenvalue problems Hx = lambda M x, H M x = lambda x with H, M > 0. */ + +/* -#- Scalar Arguments -#- */ + + +/* -#- Array Arguments -#- */ + +/* .. */ + +/* Purpose */ +/* ~~~~~~~ */ +/* DGESVJ computes the singular value decomposition (SVD) of a real */ +/* M-by-N matrix A, where M >= N. The SVD of A is written as */ +/* [++] [xx] [x0] [xx] */ +/* A = U * SIGMA * V^t, [++] = [xx] * [ox] * [xx] */ +/* [++] [xx] */ +/* where SIGMA is an N-by-N diagonal matrix, U is an M-by-N orthonormal */ +/* matrix, and V is an N-by-N orthogonal matrix. The diagonal elements */ +/* of SIGMA are the singular values of A. The columns of U and V are the */ +/* left and the right singular vectors of A, respectively. */ + +/* Further Details */ +/* ~~~~~~~~~~~~~~~ */ +/* The orthogonal N-by-N matrix V is obtained as a product of Jacobi plane */ +/* rotations. The rotations are implemented as fast scaled rotations of */ +/* Anda and Park [1]. In the case of underflow of the Jacobi angle, a */ +/* modified Jacobi transformation of Drmac [4] is used. Pivot strategy uses */ +/* column interchanges of de Rijk [2]. The relative accuracy of the computed */ +/* singular values and the accuracy of the computed singular vectors (in */ +/* angle metric) is as guaranteed by the theory of Demmel and Veselic [3]. */ +/* The condition number that determines the accuracy in the full rank case */ +/* is essentially min_{D=diag} kappa(A*D), where kappa(.) is the */ +/* spectral condition number. The best performance of this Jacobi SVD */ +/* procedure is achieved if used in an accelerated version of Drmac and */ +/* Veselic [5,6], and it is the kernel routine in the SIGMA library [7]. */ +/* Some tunning parameters (marked with [TP]) are available for the */ +/* implementer. */ +/* The computational range for the nonzero singular values is the machine */ +/* number interval ( UNDERFLOW , OVERFLOW ). In extreme cases, even */ +/* denormalized singular values can be computed with the corresponding */ +/* gradual loss of accurate digits. */ + +/* Contributors */ +/* ~~~~~~~~~~~~ */ +/* Zlatko Drmac (Zagreb, Croatia) and Kresimir Veselic (Hagen, Germany) */ + +/* References */ +/* ~~~~~~~~~~ */ +/* [1] A. A. Anda and H. Park: Fast plane rotations with dynamic scaling. */ +/* SIAM J. matrix Anal. Appl., Vol. 15 (1994), pp. 162-174. */ +/* [2] P. P. M. De Rijk: A one-sided Jacobi algorithm for computing the */ +/* singular value decomposition on a vector computer. */ +/* SIAM J. Sci. Stat. Comp., Vol. 10 (1998), pp. 359-371. */ +/* [3] J. Demmel and K. Veselic: Jacobi method is more accurate than QR. */ +/* [4] Z. Drmac: Implementation of Jacobi rotations for accurate singular */ +/* value computation in floating point arithmetic. */ +/* SIAM J. Sci. Comp., Vol. 18 (1997), pp. 1200-1222. */ +/* [5] Z. Drmac and K. Veselic: New fast and accurate Jacobi SVD algorithm I. */ +/* SIAM J. Matrix Anal. Appl. Vol. 35, No. 2 (2008), pp. 1322-1342. */ +/* LAPACK Working note 169. */ +/* [6] Z. Drmac and K. Veselic: New fast and accurate Jacobi SVD algorithm II. */ +/* SIAM J. Matrix Anal. Appl. Vol. 35, No. 2 (2008), pp. 1343-1362. */ +/* LAPACK Working note 170. */ +/* [7] Z. Drmac: SIGMA - mathematical software library for accurate SVD, PSV, */ +/* QSVD, (H,K)-SVD computations. */ +/* Department of Mathematics, University of Zagreb, 2008. */ + +/* Bugs, Examples and Comments */ +/* ~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ +/* Please report all bugs and send interesting test examples and comments to */ +/* drmac@math.hr. Thank you. */ + +/* Arguments */ +/* ~~~~~~~~~ */ + +/* JOBA (input) CHARACTER* 1 */ +/* Specifies the structure of A. */ +/* = 'L': The input matrix A is lower triangular; */ +/* = 'U': The input matrix A is upper triangular; */ +/* = 'G': The input matrix A is general M-by-N matrix, M >= N. */ + +/* JOBU (input) CHARACTER*1 */ +/* Specifies whether to compute the left singular vectors */ +/* (columns of U): */ + +/* = 'U': The left singular vectors corresponding to the nonzero */ +/* singular values are computed and returned in the leading */ +/* columns of A. See more details in the description of A. */ +/* The default numerical orthogonality threshold is set to */ +/* approximately TOL=CTOL*EPS, CTOL=DSQRT(M), EPS=DLAMCH('E'). */ +/* = 'C': Analogous to JOBU='U', except that user can control the */ +/* level of numerical orthogonality of the computed left */ +/* singular vectors. TOL can be set to TOL = CTOL*EPS, where */ +/* CTOL is given on input in the array WORK. */ +/* No CTOL smaller than ONE is allowed. CTOL greater */ +/* than 1 / EPS is meaningless. The option 'C' */ +/* can be used if M*EPS is satisfactory orthogonality */ +/* of the computed left singular vectors, so CTOL=M could */ +/* save few sweeps of Jacobi rotations. */ +/* See the descriptions of A and WORK(1). */ +/* = 'N': The matrix U is not computed. However, see the */ +/* description of A. */ + +/* JOBV (input) CHARACTER*1 */ +/* Specifies whether to compute the right singular vectors, that */ +/* is, the matrix V: */ +/* = 'V' : the matrix V is computed and returned in the array V */ +/* = 'A' : the Jacobi rotations are applied to the MV-by-N */ +/* array V. In other words, the right singular vector */ +/* matrix V is not computed explicitly, instead it is */ +/* applied to an MV-by-N matrix initially stored in the */ +/* first MV rows of V. */ +/* = 'N' : the matrix V is not computed and the array V is not */ +/* referenced */ + +/* M (input) INTEGER */ +/* The number of rows of the input matrix A. M >= 0. */ + +/* N (input) INTEGER */ +/* The number of columns of the input matrix A. */ +/* M >= N >= 0. */ + +/* A (input/output) REAL array, dimension (LDA,N) */ +/* On entry, the M-by-N matrix A. */ +/* On exit, */ +/* If JOBU .EQ. 'U' .OR. JOBU .EQ. 'C': */ +/* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ +/* If INFO .EQ. 0, */ +/* ~~~~~~~~~~~~~~~ */ +/* RANKA orthonormal columns of U are returned in the */ +/* leading RANKA columns of the array A. Here RANKA <= N */ +/* is the number of computed singular values of A that are */ +/* above the underflow threshold DLAMCH('S'). The singular */ +/* vectors corresponding to underflowed or zero singular */ +/* values are not computed. The value of RANKA is returned */ +/* in the array WORK as RANKA=NINT(WORK(2)). Also see the */ +/* descriptions of SVA and WORK. The computed columns of U */ +/* are mutually numerically orthogonal up to approximately */ +/* TOL=DSQRT(M)*EPS (default); or TOL=CTOL*EPS (JOBU.EQ.'C'), */ +/* see the description of JOBU. */ +/* If INFO .GT. 0, */ +/* ~~~~~~~~~~~~~~~ */ +/* the procedure DGESVJ did not converge in the given number */ +/* of iterations (sweeps). In that case, the computed */ +/* columns of U may not be orthogonal up to TOL. The output */ +/* U (stored in A), SIGMA (given by the computed singular */ +/* values in SVA(1:N)) and V is still a decomposition of the */ +/* input matrix A in the sense that the residual */ +/* ||A-SCALE*U*SIGMA*V^T||_2 / ||A||_2 is small. */ + +/* If JOBU .EQ. 'N': */ +/* ~~~~~~~~~~~~~~~~~ */ +/* If INFO .EQ. 0 */ +/* ~~~~~~~~~~~~~~ */ +/* Note that the left singular vectors are 'for free' in the */ +/* one-sided Jacobi SVD algorithm. However, if only the */ +/* singular values are needed, the level of numerical */ +/* orthogonality of U is not an issue and iterations are */ +/* stopped when the columns of the iterated matrix are */ +/* numerically orthogonal up to approximately M*EPS. Thus, */ +/* on exit, A contains the columns of U scaled with the */ +/* corresponding singular values. */ +/* If INFO .GT. 0, */ +/* ~~~~~~~~~~~~~~~ */ +/* the procedure DGESVJ did not converge in the given number */ +/* of iterations (sweeps). */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,M). */ + +/* SVA (workspace/output) REAL array, dimension (N) */ +/* On exit, */ +/* If INFO .EQ. 0, */ +/* ~~~~~~~~~~~~~~~ */ +/* depending on the value SCALE = WORK(1), we have: */ +/* If SCALE .EQ. ONE: */ +/* ~~~~~~~~~~~~~~~~~~ */ +/* SVA(1:N) contains the computed singular values of A. */ +/* During the computation SVA contains the Euclidean column */ +/* norms of the iterated matrices in the array A. */ +/* If SCALE .NE. ONE: */ +/* ~~~~~~~~~~~~~~~~~~ */ +/* The singular values of A are SCALE*SVA(1:N), and this */ +/* factored representation is due to the fact that some of the */ +/* singular values of A might underflow or overflow. */ + +/* If INFO .GT. 0, */ +/* ~~~~~~~~~~~~~~~ */ +/* the procedure DGESVJ did not converge in the given number of */ +/* iterations (sweeps) and SCALE*SVA(1:N) may not be accurate. */ + +/* MV (input) INTEGER */ +/* If JOBV .EQ. 'A', then the product of Jacobi rotations in DGESVJ */ +/* is applied to the first MV rows of V. See the description of JOBV. */ + +/* V (input/output) REAL array, dimension (LDV,N) */ +/* If JOBV = 'V', then V contains on exit the N-by-N matrix of */ +/* the right singular vectors; */ +/* If JOBV = 'A', then V contains the product of the computed right */ +/* singular vector matrix and the initial matrix in */ +/* the array V. */ +/* If JOBV = 'N', then V is not referenced. */ + +/* LDV (input) INTEGER */ +/* The leading dimension of the array V, LDV .GE. 1. */ +/* If JOBV .EQ. 'V', then LDV .GE. max(1,N). */ +/* If JOBV .EQ. 'A', then LDV .GE. max(1,MV) . */ + +/* WORK (input/workspace/output) REAL array, dimension max(4,M+N). */ +/* On entry, */ +/* If JOBU .EQ. 'C', */ +/* ~~~~~~~~~~~~~~~~~ */ +/* WORK(1) = CTOL, where CTOL defines the threshold for convergence. */ +/* The process stops if all columns of A are mutually */ +/* orthogonal up to CTOL*EPS, EPS=DLAMCH('E'). */ +/* It is required that CTOL >= ONE, i.e. it is not */ +/* allowed to force the routine to obtain orthogonality */ +/* below EPSILON. */ +/* On exit, */ +/* WORK(1) = SCALE is the scaling factor such that SCALE*SVA(1:N) */ +/* are the computed singular vcalues of A. */ +/* (See description of SVA().) */ +/* WORK(2) = NINT(WORK(2)) is the number of the computed nonzero */ +/* singular values. */ +/* WORK(3) = NINT(WORK(3)) is the number of the computed singular */ +/* values that are larger than the underflow threshold. */ +/* WORK(4) = NINT(WORK(4)) is the number of sweeps of Jacobi */ +/* rotations needed for numerical convergence. */ +/* WORK(5) = max_{i.NE.j} |COS(A(:,i),A(:,j))| in the last sweep. */ +/* This is useful information in cases when DGESVJ did */ +/* not converge, as it can be used to estimate whether */ +/* the output is stil useful and for post festum analysis. */ +/* WORK(6) = the largest absolute value over all sines of the */ +/* Jacobi rotation angles in the last sweep. It can be */ +/* useful for a post festum analysis. */ + +/* LWORK length of WORK, WORK >= MAX(6,M+N) */ + +/* INFO (output) INTEGER */ +/* = 0 : successful exit. */ +/* < 0 : if INFO = -i, then the i-th argument had an illegal value */ +/* > 0 : DGESVJ did not converge in the maximal allowed number (30) */ +/* of sweeps. The output may still be useful. See the */ +/* description of WORK. */ + +/* Local Parameters */ + + +/* Local Scalars */ + + +/* Local Arrays */ + + +/* Intrinsic Functions */ + + +/* External Functions */ +/* .. from BLAS */ +/* .. from LAPACK */ + +/* External Subroutines */ +/* .. from BLAS */ +/* .. from LAPACK */ + + +/* Test the input arguments */ + + /* Parameter adjustments */ + --sva; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + v_dim1 = *ldv; + v_offset = 1 + v_dim1; + v -= v_offset; + --work; + + /* Function Body */ + lsvec = _starpu_lsame_(jobu, "U"); + uctol = _starpu_lsame_(jobu, "C"); + rsvec = _starpu_lsame_(jobv, "V"); + applv = _starpu_lsame_(jobv, "A"); + upper = _starpu_lsame_(joba, "U"); + lower = _starpu_lsame_(joba, "L"); + + if (! (upper || lower || _starpu_lsame_(joba, "G"))) { + *info = -1; + } else if (! (lsvec || uctol || _starpu_lsame_(jobu, "N"))) + { + *info = -2; + } else if (! (rsvec || applv || _starpu_lsame_(jobv, "N"))) + { + *info = -3; + } else if (*m < 0) { + *info = -4; + } else if (*n < 0 || *n > *m) { + *info = -5; + } else if (*lda < *m) { + *info = -7; + } else if (*mv < 0) { + *info = -9; + } else if (rsvec && *ldv < *n || applv && *ldv < *mv) { + *info = -11; + } else if (uctol && work[1] <= 1.) { + *info = -12; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = *m + *n; + if (*lwork < max(i__1,6)) { + *info = -13; + } else { + *info = 0; + } + } + +/* #:( */ + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DGESVJ", &i__1); + return 0; + } + +/* #:) Quick return for void matrix */ + + if (*m == 0 || *n == 0) { + return 0; + } + +/* Set numerical parameters */ +/* The stopping criterion for Jacobi rotations is */ + +/* max_{i<>j}|A(:,i)^T * A(:,j)|/(||A(:,i)||*||A(:,j)||) < CTOL*EPS */ + +/* where EPS is the round-off and CTOL is defined as follows: */ + + if (uctol) { +/* ... user controlled */ + ctol = work[1]; + } else { +/* ... default */ + if (lsvec || rsvec || applv) { + ctol = sqrt((doublereal) (*m)); + } else { + ctol = (doublereal) (*m); + } + } +/* ... and the machine dependent parameters are */ +/* [!] (Make sure that DLAMCH() works properly on the target machine.) */ + + epsilon = _starpu_dlamch_("Epsilon"); + rooteps = sqrt(epsilon); + sfmin = _starpu_dlamch_("SafeMinimum"); + rootsfmin = sqrt(sfmin); + small = sfmin / epsilon; + big = _starpu_dlamch_("Overflow"); +/* BIG = ONE / SFMIN */ + rootbig = 1. / rootsfmin; + large = big / sqrt((doublereal) (*m * *n)); + bigtheta = 1. / rooteps; + + tol = ctol * epsilon; + roottol = sqrt(tol); + + if ((doublereal) (*m) * epsilon >= 1.) { + *info = -5; + i__1 = -(*info); + _starpu_xerbla_("DGESVJ", &i__1); + return 0; + } + +/* Initialize the right singular vector matrix. */ + + if (rsvec) { + mvl = *n; + _starpu_dlaset_("A", &mvl, n, &c_b17, &c_b18, &v[v_offset], ldv); + } else if (applv) { + mvl = *mv; + } + rsvec = rsvec || applv; + +/* Initialize SVA( 1:N ) = ( ||A e_i||_2, i = 1:N ) */ +/* (!) If necessary, scale A to protect the largest singular value */ +/* from overflow. It is possible that saving the largest singular */ +/* value destroys the information about the small ones. */ +/* This initial scaling is almost minimal in the sense that the */ +/* goal is to make sure that no column norm overflows, and that */ +/* DSQRT(N)*max_i SVA(i) does not overflow. If INFinite entries */ +/* in A are detected, the procedure returns with INFO=-6. */ + + scale = 1. / sqrt((doublereal) (*m) * (doublereal) (*n)); + noscale = TRUE_; + goscale = TRUE_; + + if (lower) { +/* the input matrix is M-by-N lower triangular (trapezoidal) */ + i__1 = *n; + for (p = 1; p <= i__1; ++p) { + aapp = 0.; + aaqq = 0.; + i__2 = *m - p + 1; + _starpu_dlassq_(&i__2, &a[p + p * a_dim1], &c__1, &aapp, &aaqq); + if (aapp > big) { + *info = -6; + i__2 = -(*info); + _starpu_xerbla_("DGESVJ", &i__2); + return 0; + } + aaqq = sqrt(aaqq); + if (aapp < big / aaqq && noscale) { + sva[p] = aapp * aaqq; + } else { + noscale = FALSE_; + sva[p] = aapp * (aaqq * scale); + if (goscale) { + goscale = FALSE_; + i__2 = p - 1; + for (q = 1; q <= i__2; ++q) { + sva[q] *= scale; +/* L1873: */ + } + } + } +/* L1874: */ + } + } else if (upper) { +/* the input matrix is M-by-N upper triangular (trapezoidal) */ + i__1 = *n; + for (p = 1; p <= i__1; ++p) { + aapp = 0.; + aaqq = 0.; + _starpu_dlassq_(&p, &a[p * a_dim1 + 1], &c__1, &aapp, &aaqq); + if (aapp > big) { + *info = -6; + i__2 = -(*info); + _starpu_xerbla_("DGESVJ", &i__2); + return 0; + } + aaqq = sqrt(aaqq); + if (aapp < big / aaqq && noscale) { + sva[p] = aapp * aaqq; + } else { + noscale = FALSE_; + sva[p] = aapp * (aaqq * scale); + if (goscale) { + goscale = FALSE_; + i__2 = p - 1; + for (q = 1; q <= i__2; ++q) { + sva[q] *= scale; +/* L2873: */ + } + } + } +/* L2874: */ + } + } else { +/* the input matrix is M-by-N general dense */ + i__1 = *n; + for (p = 1; p <= i__1; ++p) { + aapp = 0.; + aaqq = 0.; + _starpu_dlassq_(m, &a[p * a_dim1 + 1], &c__1, &aapp, &aaqq); + if (aapp > big) { + *info = -6; + i__2 = -(*info); + _starpu_xerbla_("DGESVJ", &i__2); + return 0; + } + aaqq = sqrt(aaqq); + if (aapp < big / aaqq && noscale) { + sva[p] = aapp * aaqq; + } else { + noscale = FALSE_; + sva[p] = aapp * (aaqq * scale); + if (goscale) { + goscale = FALSE_; + i__2 = p - 1; + for (q = 1; q <= i__2; ++q) { + sva[q] *= scale; +/* L3873: */ + } + } + } +/* L3874: */ + } + } + + if (noscale) { + scale = 1.; + } + +/* Move the smaller part of the spectrum from the underflow threshold */ +/* (!) Start by determining the position of the nonzero entries of the */ +/* array SVA() relative to ( SFMIN, BIG ). */ + + aapp = 0.; + aaqq = big; + i__1 = *n; + for (p = 1; p <= i__1; ++p) { + if (sva[p] != 0.) { +/* Computing MIN */ + d__1 = aaqq, d__2 = sva[p]; + aaqq = min(d__1,d__2); + } +/* Computing MAX */ + d__1 = aapp, d__2 = sva[p]; + aapp = max(d__1,d__2); +/* L4781: */ + } + +/* #:) Quick return for zero matrix */ + + if (aapp == 0.) { + if (lsvec) { + _starpu_dlaset_("G", m, n, &c_b17, &c_b18, &a[a_offset], lda); + } + work[1] = 1.; + work[2] = 0.; + work[3] = 0.; + work[4] = 0.; + work[5] = 0.; + work[6] = 0.; + return 0; + } + +/* #:) Quick return for one-column matrix */ + + if (*n == 1) { + if (lsvec) { + _starpu_dlascl_("G", &c__0, &c__0, &sva[1], &scale, m, &c__1, &a[a_dim1 + + 1], lda, &ierr); + } + work[1] = 1. / scale; + if (sva[1] >= sfmin) { + work[2] = 1.; + } else { + work[2] = 0.; + } + work[3] = 0.; + work[4] = 0.; + work[5] = 0.; + work[6] = 0.; + return 0; + } + +/* Protect small singular values from underflow, and try to */ +/* avoid underflows/overflows in computing Jacobi rotations. */ + + sn = sqrt(sfmin / epsilon); + temp1 = sqrt(big / (doublereal) (*n)); + if (aapp <= sn || aaqq >= temp1 || sn <= aaqq && aapp <= temp1) { +/* Computing MIN */ + d__1 = big, d__2 = temp1 / aapp; + temp1 = min(d__1,d__2); +/* AAQQ = AAQQ*TEMP1 */ +/* AAPP = AAPP*TEMP1 */ + } else if (aaqq <= sn && aapp <= temp1) { +/* Computing MIN */ + d__1 = sn / aaqq, d__2 = big / (aapp * sqrt((doublereal) (*n))); + temp1 = min(d__1,d__2); +/* AAQQ = AAQQ*TEMP1 */ +/* AAPP = AAPP*TEMP1 */ + } else if (aaqq >= sn && aapp >= temp1) { +/* Computing MAX */ + d__1 = sn / aaqq, d__2 = temp1 / aapp; + temp1 = max(d__1,d__2); +/* AAQQ = AAQQ*TEMP1 */ +/* AAPP = AAPP*TEMP1 */ + } else if (aaqq <= sn && aapp >= temp1) { +/* Computing MIN */ + d__1 = sn / aaqq, d__2 = big / (sqrt((doublereal) (*n)) * aapp); + temp1 = min(d__1,d__2); +/* AAQQ = AAQQ*TEMP1 */ +/* AAPP = AAPP*TEMP1 */ + } else { + temp1 = 1.; + } + +/* Scale, if necessary */ + + if (temp1 != 1.) { + _starpu_dlascl_("G", &c__0, &c__0, &c_b18, &temp1, n, &c__1, &sva[1], n, & + ierr); + } + scale = temp1 * scale; + if (scale != 1.) { + _starpu_dlascl_(joba, &c__0, &c__0, &c_b18, &scale, m, n, &a[a_offset], lda, & + ierr); + scale = 1. / scale; + } + +/* Row-cyclic Jacobi SVD algorithm with column pivoting */ + + emptsw = *n * (*n - 1) / 2; + notrot = 0; + fastr[0] = 0.; + +/* A is represented in factored form A = A * diag(WORK), where diag(WORK) */ +/* is initialized to identity. WORK is updated during fast scaled */ +/* rotations. */ + + i__1 = *n; + for (q = 1; q <= i__1; ++q) { + work[q] = 1.; +/* L1868: */ + } + + + swband = 3; +/* [TP] SWBAND is a tuning parameter [TP]. It is meaningful and effective */ +/* if DGESVJ is used as a computational routine in the preconditioned */ +/* Jacobi SVD algorithm DGESVJ. For sweeps i=1:SWBAND the procedure */ +/* works on pivots inside a band-like region around the diagonal. */ +/* The boundaries are determined dynamically, based on the number of */ +/* pivots above a threshold. */ + + kbl = min(8,*n); +/* [TP] KBL is a tuning parameter that defines the tile size in the */ +/* tiling of the p-q loops of pivot pairs. In general, an optimal */ +/* value of KBL depends on the matrix dimensions and on the */ +/* parameters of the computer's memory. */ + + nbl = *n / kbl; + if (nbl * kbl != *n) { + ++nbl; + } + +/* Computing 2nd power */ + i__1 = kbl; + blskip = i__1 * i__1; +/* [TP] BLKSKIP is a tuning parameter that depends on SWBAND and KBL. */ + + rowskip = min(5,kbl); +/* [TP] ROWSKIP is a tuning parameter. */ + + lkahead = 1; +/* [TP] LKAHEAD is a tuning parameter. */ + +/* Quasi block transformations, using the lower (upper) triangular */ +/* structure of the input matrix. The quasi-block-cycling usually */ +/* invokes cubic convergence. Big part of this cycle is done inside */ +/* canonical subspaces of dimensions less than M. */ + +/* Computing MAX */ + i__1 = 64, i__2 = kbl << 2; + if ((lower || upper) && *n > max(i__1,i__2)) { +/* [TP] The number of partition levels and the actual partition are */ +/* tuning parameters. */ + n4 = *n / 4; + n2 = *n / 2; + n34 = n4 * 3; + if (applv) { + q = 0; + } else { + q = 1; + } + + if (lower) { + +/* This works very well on lower triangular matrices, in particular */ +/* in the framework of the preconditioned Jacobi SVD (xGEJSV). */ +/* The idea is simple: */ +/* [+ 0 0 0] Note that Jacobi transformations of [0 0] */ +/* [+ + 0 0] [0 0] */ +/* [+ + x 0] actually work on [x 0] [x 0] */ +/* [+ + x x] [x x]. [x x] */ + + i__1 = *m - n34; + i__2 = *n - n34; + i__3 = *lwork - *n; + _starpu_dgsvj0_(jobv, &i__1, &i__2, &a[n34 + 1 + (n34 + 1) * a_dim1], lda, + &work[n34 + 1], &sva[n34 + 1], &mvl, &v[n34 * q + 1 + ( + n34 + 1) * v_dim1], ldv, &epsilon, &sfmin, &tol, &c__2, & + work[*n + 1], &i__3, &ierr); + + i__1 = *m - n2; + i__2 = n34 - n2; + i__3 = *lwork - *n; + _starpu_dgsvj0_(jobv, &i__1, &i__2, &a[n2 + 1 + (n2 + 1) * a_dim1], lda, & + work[n2 + 1], &sva[n2 + 1], &mvl, &v[n2 * q + 1 + (n2 + 1) + * v_dim1], ldv, &epsilon, &sfmin, &tol, &c__2, &work[*n + + 1], &i__3, &ierr); + + i__1 = *m - n2; + i__2 = *n - n2; + i__3 = *lwork - *n; + _starpu_dgsvj1_(jobv, &i__1, &i__2, &n4, &a[n2 + 1 + (n2 + 1) * a_dim1], + lda, &work[n2 + 1], &sva[n2 + 1], &mvl, &v[n2 * q + 1 + ( + n2 + 1) * v_dim1], ldv, &epsilon, &sfmin, &tol, &c__1, & + work[*n + 1], &i__3, &ierr); + + i__1 = *m - n4; + i__2 = n2 - n4; + i__3 = *lwork - *n; + _starpu_dgsvj0_(jobv, &i__1, &i__2, &a[n4 + 1 + (n4 + 1) * a_dim1], lda, & + work[n4 + 1], &sva[n4 + 1], &mvl, &v[n4 * q + 1 + (n4 + 1) + * v_dim1], ldv, &epsilon, &sfmin, &tol, &c__1, &work[*n + + 1], &i__3, &ierr); + + i__1 = *lwork - *n; + _starpu_dgsvj0_(jobv, m, &n4, &a[a_offset], lda, &work[1], &sva[1], &mvl, + &v[v_offset], ldv, &epsilon, &sfmin, &tol, &c__1, &work[* + n + 1], &i__1, &ierr); + + i__1 = *lwork - *n; + _starpu_dgsvj1_(jobv, m, &n2, &n4, &a[a_offset], lda, &work[1], &sva[1], & + mvl, &v[v_offset], ldv, &epsilon, &sfmin, &tol, &c__1, & + work[*n + 1], &i__1, &ierr); + + + } else if (upper) { + + + i__1 = *lwork - *n; + _starpu_dgsvj0_(jobv, &n4, &n4, &a[a_offset], lda, &work[1], &sva[1], & + mvl, &v[v_offset], ldv, &epsilon, &sfmin, &tol, &c__2, & + work[*n + 1], &i__1, &ierr); + + i__1 = *lwork - *n; + _starpu_dgsvj0_(jobv, &n2, &n4, &a[(n4 + 1) * a_dim1 + 1], lda, &work[n4 + + 1], &sva[n4 + 1], &mvl, &v[n4 * q + 1 + (n4 + 1) * + v_dim1], ldv, &epsilon, &sfmin, &tol, &c__1, &work[*n + 1] +, &i__1, &ierr); + + i__1 = *lwork - *n; + _starpu_dgsvj1_(jobv, &n2, &n2, &n4, &a[a_offset], lda, &work[1], &sva[1], + &mvl, &v[v_offset], ldv, &epsilon, &sfmin, &tol, &c__1, & + work[*n + 1], &i__1, &ierr); + + i__1 = n2 + n4; + i__2 = *lwork - *n; + _starpu_dgsvj0_(jobv, &i__1, &n4, &a[(n2 + 1) * a_dim1 + 1], lda, &work[ + n2 + 1], &sva[n2 + 1], &mvl, &v[n2 * q + 1 + (n2 + 1) * + v_dim1], ldv, &epsilon, &sfmin, &tol, &c__1, &work[*n + 1] +, &i__2, &ierr); + } + + } + +/* -#- Row-cyclic pivot strategy with de Rijk's pivoting -#- */ + + for (i__ = 1; i__ <= 30; ++i__) { + +/* .. go go go ... */ + + mxaapq = 0.; + mxsinj = 0.; + iswrot = 0; + + notrot = 0; + pskipped = 0; + +/* Each sweep is unrolled using KBL-by-KBL tiles over the pivot pairs */ +/* 1 <= p < q <= N. This is the first step toward a blocked implementation */ +/* of the rotations. New implementation, based on block transformations, */ +/* is under development. */ + + i__1 = nbl; + for (ibr = 1; ibr <= i__1; ++ibr) { + + igl = (ibr - 1) * kbl + 1; + +/* Computing MIN */ + i__3 = lkahead, i__4 = nbl - ibr; + i__2 = min(i__3,i__4); + for (ir1 = 0; ir1 <= i__2; ++ir1) { + + igl += ir1 * kbl; + +/* Computing MIN */ + i__4 = igl + kbl - 1, i__5 = *n - 1; + i__3 = min(i__4,i__5); + for (p = igl; p <= i__3; ++p) { + +/* .. de Rijk's pivoting */ + + i__4 = *n - p + 1; + q = _starpu_idamax_(&i__4, &sva[p], &c__1) + p - 1; + if (p != q) { + _starpu_dswap_(m, &a[p * a_dim1 + 1], &c__1, &a[q * a_dim1 + + 1], &c__1); + if (rsvec) { + _starpu_dswap_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[q * + v_dim1 + 1], &c__1); + } + temp1 = sva[p]; + sva[p] = sva[q]; + sva[q] = temp1; + temp1 = work[p]; + work[p] = work[q]; + work[q] = temp1; + } + + if (ir1 == 0) { + +/* Column norms are periodically updated by explicit */ +/* norm computation. */ +/* Caveat: */ +/* Unfortunately, some BLAS implementations compute DNRM2(M,A(1,p),1) */ +/* as DSQRT(DDOT(M,A(1,p),1,A(1,p),1)), which may cause the result to */ +/* overflow for ||A(:,p)||_2 > DSQRT(overflow_threshold), and to */ +/* underflow for ||A(:,p)||_2 < DSQRT(underflow_threshold). */ +/* Hence, DNRM2 cannot be trusted, not even in the case when */ +/* the true norm is far from the under(over)flow boundaries. */ +/* If properly implemented DNRM2 is available, the IF-THEN-ELSE */ +/* below should read "AAPP = DNRM2( M, A(1,p), 1 ) * WORK(p)". */ + + if (sva[p] < rootbig && sva[p] > rootsfmin) { + sva[p] = _starpu_dnrm2_(m, &a[p * a_dim1 + 1], &c__1) * + work[p]; + } else { + temp1 = 0.; + aapp = 0.; + _starpu_dlassq_(m, &a[p * a_dim1 + 1], &c__1, &temp1, & + aapp); + sva[p] = temp1 * sqrt(aapp) * work[p]; + } + aapp = sva[p]; + } else { + aapp = sva[p]; + } + + if (aapp > 0.) { + + pskipped = 0; + +/* Computing MIN */ + i__5 = igl + kbl - 1; + i__4 = min(i__5,*n); + for (q = p + 1; q <= i__4; ++q) { + + aaqq = sva[q]; + + if (aaqq > 0.) { + + aapp0 = aapp; + if (aaqq >= 1.) { + rotok = small * aapp <= aaqq; + if (aapp < big / aaqq) { + aapq = _starpu_ddot_(m, &a[p * a_dim1 + 1], & + c__1, &a[q * a_dim1 + 1], & + c__1) * work[p] * work[q] / + aaqq / aapp; + } else { + _starpu_dcopy_(m, &a[p * a_dim1 + 1], &c__1, & + work[*n + 1], &c__1); + _starpu_dlascl_("G", &c__0, &c__0, &aapp, & + work[p], m, &c__1, &work[*n + + 1], lda, &ierr); + aapq = _starpu_ddot_(m, &work[*n + 1], &c__1, + &a[q * a_dim1 + 1], &c__1) * + work[q] / aaqq; + } + } else { + rotok = aapp <= aaqq / small; + if (aapp > small / aaqq) { + aapq = _starpu_ddot_(m, &a[p * a_dim1 + 1], & + c__1, &a[q * a_dim1 + 1], & + c__1) * work[p] * work[q] / + aaqq / aapp; + } else { + _starpu_dcopy_(m, &a[q * a_dim1 + 1], &c__1, & + work[*n + 1], &c__1); + _starpu_dlascl_("G", &c__0, &c__0, &aaqq, & + work[q], m, &c__1, &work[*n + + 1], lda, &ierr); + aapq = _starpu_ddot_(m, &work[*n + 1], &c__1, + &a[p * a_dim1 + 1], &c__1) * + work[p] / aapp; + } + } + +/* Computing MAX */ + d__1 = mxaapq, d__2 = abs(aapq); + mxaapq = max(d__1,d__2); + +/* TO rotate or NOT to rotate, THAT is the question ... */ + + if (abs(aapq) > tol) { + +/* .. rotate */ +/* [RTD] ROTATED = ROTATED + ONE */ + + if (ir1 == 0) { + notrot = 0; + pskipped = 0; + ++iswrot; + } + + if (rotok) { + + aqoap = aaqq / aapp; + apoaq = aapp / aaqq; + theta = (d__1 = aqoap - apoaq, abs( + d__1)) * -.5 / aapq; + + if (abs(theta) > bigtheta) { + + t = .5 / theta; + fastr[2] = t * work[p] / work[q]; + fastr[3] = -t * work[q] / work[p]; + _starpu_drotm_(m, &a[p * a_dim1 + 1], & + c__1, &a[q * a_dim1 + 1], + &c__1, fastr); + if (rsvec) { + _starpu_drotm_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[q * + v_dim1 + 1], &c__1, fastr); + } +/* Computing MAX */ + d__1 = 0., d__2 = t * apoaq * + aapq + 1.; + sva[q] = aaqq * sqrt((max(d__1, + d__2))); + aapp *= sqrt(1. - t * aqoap * + aapq); +/* Computing MAX */ + d__1 = mxsinj, d__2 = abs(t); + mxsinj = max(d__1,d__2); + + } else { + +/* .. choose correct signum for THETA and rotate */ + + thsign = -d_sign(&c_b18, &aapq); + t = 1. / (theta + thsign * sqrt( + theta * theta + 1.)); + cs = sqrt(1. / (t * t + 1.)); + sn = t * cs; + +/* Computing MAX */ + d__1 = mxsinj, d__2 = abs(sn); + mxsinj = max(d__1,d__2); +/* Computing MAX */ + d__1 = 0., d__2 = t * apoaq * + aapq + 1.; + sva[q] = aaqq * sqrt((max(d__1, + d__2))); +/* Computing MAX */ + d__1 = 0., d__2 = 1. - t * aqoap * + aapq; + aapp *= sqrt((max(d__1,d__2))); + + apoaq = work[p] / work[q]; + aqoap = work[q] / work[p]; + if (work[p] >= 1.) { + if (work[q] >= 1.) { + fastr[2] = t * apoaq; + fastr[3] = -t * aqoap; + work[p] *= cs; + work[q] *= cs; + _starpu_drotm_(m, &a[p * a_dim1 + 1], &c__1, &a[q * + a_dim1 + 1], &c__1, fastr); + if (rsvec) { + _starpu_drotm_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[ + q * v_dim1 + 1], &c__1, fastr); + } + } else { + d__1 = -t * aqoap; + _starpu_daxpy_(m, &d__1, &a[q * a_dim1 + 1], &c__1, &a[ + p * a_dim1 + 1], &c__1); + d__1 = cs * sn * apoaq; + _starpu_daxpy_(m, &d__1, &a[p * a_dim1 + 1], &c__1, &a[ + q * a_dim1 + 1], &c__1); + work[p] *= cs; + work[q] /= cs; + if (rsvec) { + d__1 = -t * aqoap; + _starpu_daxpy_(&mvl, &d__1, &v[q * v_dim1 + 1], & + c__1, &v[p * v_dim1 + 1], &c__1); + d__1 = cs * sn * apoaq; + _starpu_daxpy_(&mvl, &d__1, &v[p * v_dim1 + 1], & + c__1, &v[q * v_dim1 + 1], &c__1); + } + } + } else { + if (work[q] >= 1.) { + d__1 = t * apoaq; + _starpu_daxpy_(m, &d__1, &a[p * a_dim1 + 1], &c__1, &a[ + q * a_dim1 + 1], &c__1); + d__1 = -cs * sn * aqoap; + _starpu_daxpy_(m, &d__1, &a[q * a_dim1 + 1], &c__1, &a[ + p * a_dim1 + 1], &c__1); + work[p] /= cs; + work[q] *= cs; + if (rsvec) { + d__1 = t * apoaq; + _starpu_daxpy_(&mvl, &d__1, &v[p * v_dim1 + 1], & + c__1, &v[q * v_dim1 + 1], &c__1); + d__1 = -cs * sn * aqoap; + _starpu_daxpy_(&mvl, &d__1, &v[q * v_dim1 + 1], & + c__1, &v[p * v_dim1 + 1], &c__1); + } + } else { + if (work[p] >= work[q]) { + d__1 = -t * aqoap; + _starpu_daxpy_(m, &d__1, &a[q * a_dim1 + 1], &c__1, + &a[p * a_dim1 + 1], &c__1); + d__1 = cs * sn * apoaq; + _starpu_daxpy_(m, &d__1, &a[p * a_dim1 + 1], &c__1, + &a[q * a_dim1 + 1], &c__1); + work[p] *= cs; + work[q] /= cs; + if (rsvec) { + d__1 = -t * aqoap; + _starpu_daxpy_(&mvl, &d__1, &v[q * v_dim1 + 1], + &c__1, &v[p * v_dim1 + 1], & + c__1); + d__1 = cs * sn * apoaq; + _starpu_daxpy_(&mvl, &d__1, &v[p * v_dim1 + 1], + &c__1, &v[q * v_dim1 + 1], & + c__1); + } + } else { + d__1 = t * apoaq; + _starpu_daxpy_(m, &d__1, &a[p * a_dim1 + 1], &c__1, + &a[q * a_dim1 + 1], &c__1); + d__1 = -cs * sn * aqoap; + _starpu_daxpy_(m, &d__1, &a[q * a_dim1 + 1], &c__1, + &a[p * a_dim1 + 1], &c__1); + work[p] /= cs; + work[q] *= cs; + if (rsvec) { + d__1 = t * apoaq; + _starpu_daxpy_(&mvl, &d__1, &v[p * v_dim1 + 1], + &c__1, &v[q * v_dim1 + 1], & + c__1); + d__1 = -cs * sn * aqoap; + _starpu_daxpy_(&mvl, &d__1, &v[q * v_dim1 + 1], + &c__1, &v[p * v_dim1 + 1], & + c__1); + } + } + } + } + } + + } else { +/* .. have to use modified Gram-Schmidt like transformation */ + _starpu_dcopy_(m, &a[p * a_dim1 + 1], &c__1, & + work[*n + 1], &c__1); + _starpu_dlascl_("G", &c__0, &c__0, &aapp, & + c_b18, m, &c__1, &work[*n + 1] +, lda, &ierr); + _starpu_dlascl_("G", &c__0, &c__0, &aaqq, & + c_b18, m, &c__1, &a[q * + a_dim1 + 1], lda, &ierr); + temp1 = -aapq * work[p] / work[q]; + _starpu_daxpy_(m, &temp1, &work[*n + 1], & + c__1, &a[q * a_dim1 + 1], & + c__1); + _starpu_dlascl_("G", &c__0, &c__0, &c_b18, & + aaqq, m, &c__1, &a[q * a_dim1 + + 1], lda, &ierr); +/* Computing MAX */ + d__1 = 0., d__2 = 1. - aapq * aapq; + sva[q] = aaqq * sqrt((max(d__1,d__2))) + ; + mxsinj = max(mxsinj,sfmin); + } +/* END IF ROTOK THEN ... ELSE */ + +/* In the case of cancellation in updating SVA(q), SVA(p) */ +/* recompute SVA(q), SVA(p). */ + +/* Computing 2nd power */ + d__1 = sva[q] / aaqq; + if (d__1 * d__1 <= rooteps) { + if (aaqq < rootbig && aaqq > + rootsfmin) { + sva[q] = _starpu_dnrm2_(m, &a[q * a_dim1 + + 1], &c__1) * work[q]; + } else { + t = 0.; + aaqq = 0.; + _starpu_dlassq_(m, &a[q * a_dim1 + 1], & + c__1, &t, &aaqq); + sva[q] = t * sqrt(aaqq) * work[q]; + } + } + if (aapp / aapp0 <= rooteps) { + if (aapp < rootbig && aapp > + rootsfmin) { + aapp = _starpu_dnrm2_(m, &a[p * a_dim1 + + 1], &c__1) * work[p]; + } else { + t = 0.; + aapp = 0.; + _starpu_dlassq_(m, &a[p * a_dim1 + 1], & + c__1, &t, &aapp); + aapp = t * sqrt(aapp) * work[p]; + } + sva[p] = aapp; + } + + } else { +/* A(:,p) and A(:,q) already numerically orthogonal */ + if (ir1 == 0) { + ++notrot; + } +/* [RTD] SKIPPED = SKIPPED + 1 */ + ++pskipped; + } + } else { +/* A(:,q) is zero column */ + if (ir1 == 0) { + ++notrot; + } + ++pskipped; + } + + if (i__ <= swband && pskipped > rowskip) { + if (ir1 == 0) { + aapp = -aapp; + } + notrot = 0; + goto L2103; + } + +/* L2002: */ + } +/* END q-LOOP */ + +L2103: +/* bailed out of q-loop */ + + sva[p] = aapp; + + } else { + sva[p] = aapp; + if (ir1 == 0 && aapp == 0.) { +/* Computing MIN */ + i__4 = igl + kbl - 1; + notrot = notrot + min(i__4,*n) - p; + } + } + +/* L2001: */ + } +/* end of the p-loop */ +/* end of doing the block ( ibr, ibr ) */ +/* L1002: */ + } +/* end of ir1-loop */ + +/* ... go to the off diagonal blocks */ + + igl = (ibr - 1) * kbl + 1; + + i__2 = nbl; + for (jbc = ibr + 1; jbc <= i__2; ++jbc) { + + jgl = (jbc - 1) * kbl + 1; + +/* doing the block at ( ibr, jbc ) */ + + ijblsk = 0; +/* Computing MIN */ + i__4 = igl + kbl - 1; + i__3 = min(i__4,*n); + for (p = igl; p <= i__3; ++p) { + + aapp = sva[p]; + if (aapp > 0.) { + + pskipped = 0; + +/* Computing MIN */ + i__5 = jgl + kbl - 1; + i__4 = min(i__5,*n); + for (q = jgl; q <= i__4; ++q) { + + aaqq = sva[q]; + if (aaqq > 0.) { + aapp0 = aapp; + +/* -#- M x 2 Jacobi SVD -#- */ + +/* Safe Gram matrix computation */ + + if (aaqq >= 1.) { + if (aapp >= aaqq) { + rotok = small * aapp <= aaqq; + } else { + rotok = small * aaqq <= aapp; + } + if (aapp < big / aaqq) { + aapq = _starpu_ddot_(m, &a[p * a_dim1 + 1], & + c__1, &a[q * a_dim1 + 1], & + c__1) * work[p] * work[q] / + aaqq / aapp; + } else { + _starpu_dcopy_(m, &a[p * a_dim1 + 1], &c__1, & + work[*n + 1], &c__1); + _starpu_dlascl_("G", &c__0, &c__0, &aapp, & + work[p], m, &c__1, &work[*n + + 1], lda, &ierr); + aapq = _starpu_ddot_(m, &work[*n + 1], &c__1, + &a[q * a_dim1 + 1], &c__1) * + work[q] / aaqq; + } + } else { + if (aapp >= aaqq) { + rotok = aapp <= aaqq / small; + } else { + rotok = aaqq <= aapp / small; + } + if (aapp > small / aaqq) { + aapq = _starpu_ddot_(m, &a[p * a_dim1 + 1], & + c__1, &a[q * a_dim1 + 1], & + c__1) * work[p] * work[q] / + aaqq / aapp; + } else { + _starpu_dcopy_(m, &a[q * a_dim1 + 1], &c__1, & + work[*n + 1], &c__1); + _starpu_dlascl_("G", &c__0, &c__0, &aaqq, & + work[q], m, &c__1, &work[*n + + 1], lda, &ierr); + aapq = _starpu_ddot_(m, &work[*n + 1], &c__1, + &a[p * a_dim1 + 1], &c__1) * + work[p] / aapp; + } + } + +/* Computing MAX */ + d__1 = mxaapq, d__2 = abs(aapq); + mxaapq = max(d__1,d__2); + +/* TO rotate or NOT to rotate, THAT is the question ... */ + + if (abs(aapq) > tol) { + notrot = 0; +/* [RTD] ROTATED = ROTATED + 1 */ + pskipped = 0; + ++iswrot; + + if (rotok) { + + aqoap = aaqq / aapp; + apoaq = aapp / aaqq; + theta = (d__1 = aqoap - apoaq, abs( + d__1)) * -.5 / aapq; + if (aaqq > aapp0) { + theta = -theta; + } + + if (abs(theta) > bigtheta) { + t = .5 / theta; + fastr[2] = t * work[p] / work[q]; + fastr[3] = -t * work[q] / work[p]; + _starpu_drotm_(m, &a[p * a_dim1 + 1], & + c__1, &a[q * a_dim1 + 1], + &c__1, fastr); + if (rsvec) { + _starpu_drotm_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[q * + v_dim1 + 1], &c__1, fastr); + } +/* Computing MAX */ + d__1 = 0., d__2 = t * apoaq * + aapq + 1.; + sva[q] = aaqq * sqrt((max(d__1, + d__2))); +/* Computing MAX */ + d__1 = 0., d__2 = 1. - t * aqoap * + aapq; + aapp *= sqrt((max(d__1,d__2))); +/* Computing MAX */ + d__1 = mxsinj, d__2 = abs(t); + mxsinj = max(d__1,d__2); + } else { + +/* .. choose correct signum for THETA and rotate */ + + thsign = -d_sign(&c_b18, &aapq); + if (aaqq > aapp0) { + thsign = -thsign; + } + t = 1. / (theta + thsign * sqrt( + theta * theta + 1.)); + cs = sqrt(1. / (t * t + 1.)); + sn = t * cs; +/* Computing MAX */ + d__1 = mxsinj, d__2 = abs(sn); + mxsinj = max(d__1,d__2); +/* Computing MAX */ + d__1 = 0., d__2 = t * apoaq * + aapq + 1.; + sva[q] = aaqq * sqrt((max(d__1, + d__2))); + aapp *= sqrt(1. - t * aqoap * + aapq); + + apoaq = work[p] / work[q]; + aqoap = work[q] / work[p]; + if (work[p] >= 1.) { + + if (work[q] >= 1.) { + fastr[2] = t * apoaq; + fastr[3] = -t * aqoap; + work[p] *= cs; + work[q] *= cs; + _starpu_drotm_(m, &a[p * a_dim1 + 1], &c__1, &a[q * + a_dim1 + 1], &c__1, fastr); + if (rsvec) { + _starpu_drotm_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[ + q * v_dim1 + 1], &c__1, fastr); + } + } else { + d__1 = -t * aqoap; + _starpu_daxpy_(m, &d__1, &a[q * a_dim1 + 1], &c__1, &a[ + p * a_dim1 + 1], &c__1); + d__1 = cs * sn * apoaq; + _starpu_daxpy_(m, &d__1, &a[p * a_dim1 + 1], &c__1, &a[ + q * a_dim1 + 1], &c__1); + if (rsvec) { + d__1 = -t * aqoap; + _starpu_daxpy_(&mvl, &d__1, &v[q * v_dim1 + 1], & + c__1, &v[p * v_dim1 + 1], &c__1); + d__1 = cs * sn * apoaq; + _starpu_daxpy_(&mvl, &d__1, &v[p * v_dim1 + 1], & + c__1, &v[q * v_dim1 + 1], &c__1); + } + work[p] *= cs; + work[q] /= cs; + } + } else { + if (work[q] >= 1.) { + d__1 = t * apoaq; + _starpu_daxpy_(m, &d__1, &a[p * a_dim1 + 1], &c__1, &a[ + q * a_dim1 + 1], &c__1); + d__1 = -cs * sn * aqoap; + _starpu_daxpy_(m, &d__1, &a[q * a_dim1 + 1], &c__1, &a[ + p * a_dim1 + 1], &c__1); + if (rsvec) { + d__1 = t * apoaq; + _starpu_daxpy_(&mvl, &d__1, &v[p * v_dim1 + 1], & + c__1, &v[q * v_dim1 + 1], &c__1); + d__1 = -cs * sn * aqoap; + _starpu_daxpy_(&mvl, &d__1, &v[q * v_dim1 + 1], & + c__1, &v[p * v_dim1 + 1], &c__1); + } + work[p] /= cs; + work[q] *= cs; + } else { + if (work[p] >= work[q]) { + d__1 = -t * aqoap; + _starpu_daxpy_(m, &d__1, &a[q * a_dim1 + 1], &c__1, + &a[p * a_dim1 + 1], &c__1); + d__1 = cs * sn * apoaq; + _starpu_daxpy_(m, &d__1, &a[p * a_dim1 + 1], &c__1, + &a[q * a_dim1 + 1], &c__1); + work[p] *= cs; + work[q] /= cs; + if (rsvec) { + d__1 = -t * aqoap; + _starpu_daxpy_(&mvl, &d__1, &v[q * v_dim1 + 1], + &c__1, &v[p * v_dim1 + 1], & + c__1); + d__1 = cs * sn * apoaq; + _starpu_daxpy_(&mvl, &d__1, &v[p * v_dim1 + 1], + &c__1, &v[q * v_dim1 + 1], & + c__1); + } + } else { + d__1 = t * apoaq; + _starpu_daxpy_(m, &d__1, &a[p * a_dim1 + 1], &c__1, + &a[q * a_dim1 + 1], &c__1); + d__1 = -cs * sn * aqoap; + _starpu_daxpy_(m, &d__1, &a[q * a_dim1 + 1], &c__1, + &a[p * a_dim1 + 1], &c__1); + work[p] /= cs; + work[q] *= cs; + if (rsvec) { + d__1 = t * apoaq; + _starpu_daxpy_(&mvl, &d__1, &v[p * v_dim1 + 1], + &c__1, &v[q * v_dim1 + 1], & + c__1); + d__1 = -cs * sn * aqoap; + _starpu_daxpy_(&mvl, &d__1, &v[q * v_dim1 + 1], + &c__1, &v[p * v_dim1 + 1], & + c__1); + } + } + } + } + } + + } else { + if (aapp > aaqq) { + _starpu_dcopy_(m, &a[p * a_dim1 + 1], & + c__1, &work[*n + 1], & + c__1); + _starpu_dlascl_("G", &c__0, &c__0, &aapp, + &c_b18, m, &c__1, &work[* + n + 1], lda, &ierr); + _starpu_dlascl_("G", &c__0, &c__0, &aaqq, + &c_b18, m, &c__1, &a[q * + a_dim1 + 1], lda, &ierr); + temp1 = -aapq * work[p] / work[q]; + _starpu_daxpy_(m, &temp1, &work[*n + 1], & + c__1, &a[q * a_dim1 + 1], + &c__1); + _starpu_dlascl_("G", &c__0, &c__0, &c_b18, + &aaqq, m, &c__1, &a[q * + a_dim1 + 1], lda, &ierr); +/* Computing MAX */ + d__1 = 0., d__2 = 1. - aapq * + aapq; + sva[q] = aaqq * sqrt((max(d__1, + d__2))); + mxsinj = max(mxsinj,sfmin); + } else { + _starpu_dcopy_(m, &a[q * a_dim1 + 1], & + c__1, &work[*n + 1], & + c__1); + _starpu_dlascl_("G", &c__0, &c__0, &aaqq, + &c_b18, m, &c__1, &work[* + n + 1], lda, &ierr); + _starpu_dlascl_("G", &c__0, &c__0, &aapp, + &c_b18, m, &c__1, &a[p * + a_dim1 + 1], lda, &ierr); + temp1 = -aapq * work[q] / work[p]; + _starpu_daxpy_(m, &temp1, &work[*n + 1], & + c__1, &a[p * a_dim1 + 1], + &c__1); + _starpu_dlascl_("G", &c__0, &c__0, &c_b18, + &aapp, m, &c__1, &a[p * + a_dim1 + 1], lda, &ierr); +/* Computing MAX */ + d__1 = 0., d__2 = 1. - aapq * + aapq; + sva[p] = aapp * sqrt((max(d__1, + d__2))); + mxsinj = max(mxsinj,sfmin); + } + } +/* END IF ROTOK THEN ... ELSE */ + +/* In the case of cancellation in updating SVA(q) */ +/* .. recompute SVA(q) */ +/* Computing 2nd power */ + d__1 = sva[q] / aaqq; + if (d__1 * d__1 <= rooteps) { + if (aaqq < rootbig && aaqq > + rootsfmin) { + sva[q] = _starpu_dnrm2_(m, &a[q * a_dim1 + + 1], &c__1) * work[q]; + } else { + t = 0.; + aaqq = 0.; + _starpu_dlassq_(m, &a[q * a_dim1 + 1], & + c__1, &t, &aaqq); + sva[q] = t * sqrt(aaqq) * work[q]; + } + } +/* Computing 2nd power */ + d__1 = aapp / aapp0; + if (d__1 * d__1 <= rooteps) { + if (aapp < rootbig && aapp > + rootsfmin) { + aapp = _starpu_dnrm2_(m, &a[p * a_dim1 + + 1], &c__1) * work[p]; + } else { + t = 0.; + aapp = 0.; + _starpu_dlassq_(m, &a[p * a_dim1 + 1], & + c__1, &t, &aapp); + aapp = t * sqrt(aapp) * work[p]; + } + sva[p] = aapp; + } +/* end of OK rotation */ + } else { + ++notrot; +/* [RTD] SKIPPED = SKIPPED + 1 */ + ++pskipped; + ++ijblsk; + } + } else { + ++notrot; + ++pskipped; + ++ijblsk; + } + + if (i__ <= swband && ijblsk >= blskip) { + sva[p] = aapp; + notrot = 0; + goto L2011; + } + if (i__ <= swband && pskipped > rowskip) { + aapp = -aapp; + notrot = 0; + goto L2203; + } + +/* L2200: */ + } +/* end of the q-loop */ +L2203: + + sva[p] = aapp; + + } else { + + if (aapp == 0.) { +/* Computing MIN */ + i__4 = jgl + kbl - 1; + notrot = notrot + min(i__4,*n) - jgl + 1; + } + if (aapp < 0.) { + notrot = 0; + } + + } + +/* L2100: */ + } +/* end of the p-loop */ +/* L2010: */ + } +/* end of the jbc-loop */ +L2011: +/* 2011 bailed out of the jbc-loop */ +/* Computing MIN */ + i__3 = igl + kbl - 1; + i__2 = min(i__3,*n); + for (p = igl; p <= i__2; ++p) { + sva[p] = (d__1 = sva[p], abs(d__1)); +/* L2012: */ + } +/* ** */ +/* L2000: */ + } +/* 2000 :: end of the ibr-loop */ + +/* .. update SVA(N) */ + if (sva[*n] < rootbig && sva[*n] > rootsfmin) { + sva[*n] = _starpu_dnrm2_(m, &a[*n * a_dim1 + 1], &c__1) * work[*n]; + } else { + t = 0.; + aapp = 0.; + _starpu_dlassq_(m, &a[*n * a_dim1 + 1], &c__1, &t, &aapp); + sva[*n] = t * sqrt(aapp) * work[*n]; + } + +/* Additional steering devices */ + + if (i__ < swband && (mxaapq <= roottol || iswrot <= *n)) { + swband = i__; + } + + if (i__ > swband + 1 && mxaapq < sqrt((doublereal) (*n)) * tol && ( + doublereal) (*n) * mxaapq * mxsinj < tol) { + goto L1994; + } + + if (notrot >= emptsw) { + goto L1994; + } + +/* L1993: */ + } +/* end i=1:NSWEEP loop */ + +/* #:( Reaching this point means that the procedure has not converged. */ + *info = 29; + goto L1995; + +L1994: +/* #:) Reaching this point means numerical convergence after the i-th */ +/* sweep. */ + + *info = 0; +/* #:) INFO = 0 confirms successful iterations. */ +L1995: + +/* Sort the singular values and find how many are above */ +/* the underflow threshold. */ + + n2 = 0; + n4 = 0; + i__1 = *n - 1; + for (p = 1; p <= i__1; ++p) { + i__2 = *n - p + 1; + q = _starpu_idamax_(&i__2, &sva[p], &c__1) + p - 1; + if (p != q) { + temp1 = sva[p]; + sva[p] = sva[q]; + sva[q] = temp1; + temp1 = work[p]; + work[p] = work[q]; + work[q] = temp1; + _starpu_dswap_(m, &a[p * a_dim1 + 1], &c__1, &a[q * a_dim1 + 1], &c__1); + if (rsvec) { + _starpu_dswap_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[q * v_dim1 + 1], & + c__1); + } + } + if (sva[p] != 0.) { + ++n4; + if (sva[p] * scale > sfmin) { + ++n2; + } + } +/* L5991: */ + } + if (sva[*n] != 0.) { + ++n4; + if (sva[*n] * scale > sfmin) { + ++n2; + } + } + +/* Normalize the left singular vectors. */ + + if (lsvec || uctol) { + i__1 = n2; + for (p = 1; p <= i__1; ++p) { + d__1 = work[p] / sva[p]; + _starpu_dscal_(m, &d__1, &a[p * a_dim1 + 1], &c__1); +/* L1998: */ + } + } + +/* Scale the product of Jacobi rotations (assemble the fast rotations). */ + + if (rsvec) { + if (applv) { + i__1 = *n; + for (p = 1; p <= i__1; ++p) { + _starpu_dscal_(&mvl, &work[p], &v[p * v_dim1 + 1], &c__1); +/* L2398: */ + } + } else { + i__1 = *n; + for (p = 1; p <= i__1; ++p) { + temp1 = 1. / _starpu_dnrm2_(&mvl, &v[p * v_dim1 + 1], &c__1); + _starpu_dscal_(&mvl, &temp1, &v[p * v_dim1 + 1], &c__1); +/* L2399: */ + } + } + } + +/* Undo scaling, if necessary (and possible). */ + if (scale > 1. && sva[1] < big / scale || scale < 1. && sva[n2] > sfmin / + scale) { + i__1 = *n; + for (p = 1; p <= i__1; ++p) { + sva[p] = scale * sva[p]; +/* L2400: */ + } + scale = 1.; + } + + work[1] = scale; +/* The singular values of A are SCALE*SVA(1:N). If SCALE.NE.ONE */ +/* then some of the singular values may overflow or underflow and */ +/* the spectrum is given in this factored representation. */ + + work[2] = (doublereal) n4; +/* N4 is the number of computed nonzero singular values of A. */ + + work[3] = (doublereal) n2; +/* N2 is the number of singular values of A greater than SFMIN. */ +/* If N2= 0. */ + +/* NRHS (input) INTEGER */ +/* The number of right hand sides, i.e., the number of columns */ +/* of the matrices B and X. NRHS >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the N-by-N matrix A. If FACT = 'F' and EQUED is */ +/* not 'N', then A must have been equilibrated by the scaling */ +/* factors in R and/or C. A is not modified if FACT = 'F' or */ +/* 'N', or if FACT = 'E' and EQUED = 'N' on exit. */ + +/* On exit, if EQUED .ne. 'N', A is scaled as follows: */ +/* EQUED = 'R': A := diag(R) * A */ +/* EQUED = 'C': A := A * diag(C) */ +/* EQUED = 'B': A := diag(R) * A * diag(C). */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,N). */ + +/* AF (input or output) DOUBLE PRECISION array, dimension (LDAF,N) */ +/* If FACT = 'F', then AF is an input argument and on entry */ +/* contains the factors L and U from the factorization */ +/* A = P*L*U as computed by DGETRF. If EQUED .ne. 'N', then */ +/* AF is the factored form of the equilibrated matrix A. */ + +/* If FACT = 'N', then AF is an output argument and on exit */ +/* returns the factors L and U from the factorization A = P*L*U */ +/* of the original matrix A. */ + +/* If FACT = 'E', then AF is an output argument and on exit */ +/* returns the factors L and U from the factorization A = P*L*U */ +/* of the equilibrated matrix A (see the description of A for */ +/* the form of the equilibrated matrix). */ + +/* LDAF (input) INTEGER */ +/* The leading dimension of the array AF. LDAF >= max(1,N). */ + +/* IPIV (input or output) INTEGER array, dimension (N) */ +/* If FACT = 'F', then IPIV is an input argument and on entry */ +/* contains the pivot indices from the factorization A = P*L*U */ +/* as computed by DGETRF; row i of the matrix was interchanged */ +/* with row IPIV(i). */ + +/* If FACT = 'N', then IPIV is an output argument and on exit */ +/* contains the pivot indices from the factorization A = P*L*U */ +/* of the original matrix A. */ + +/* If FACT = 'E', then IPIV is an output argument and on exit */ +/* contains the pivot indices from the factorization A = P*L*U */ +/* of the equilibrated matrix A. */ + +/* EQUED (input or output) CHARACTER*1 */ +/* Specifies the form of equilibration that was done. */ +/* = 'N': No equilibration (always true if FACT = 'N'). */ +/* = 'R': Row equilibration, i.e., A has been premultiplied by */ +/* diag(R). */ +/* = 'C': Column equilibration, i.e., A has been postmultiplied */ +/* by diag(C). */ +/* = 'B': Both row and column equilibration, i.e., A has been */ +/* replaced by diag(R) * A * diag(C). */ +/* EQUED is an input argument if FACT = 'F'; otherwise, it is an */ +/* output argument. */ + +/* R (input or output) DOUBLE PRECISION array, dimension (N) */ +/* The row scale factors for A. If EQUED = 'R' or 'B', A is */ +/* multiplied on the left by diag(R); if EQUED = 'N' or 'C', R */ +/* is not accessed. R is an input argument if FACT = 'F'; */ +/* otherwise, R is an output argument. If FACT = 'F' and */ +/* EQUED = 'R' or 'B', each element of R must be positive. */ + +/* C (input or output) DOUBLE PRECISION array, dimension (N) */ +/* The column scale factors for A. If EQUED = 'C' or 'B', A is */ +/* multiplied on the right by diag(C); if EQUED = 'N' or 'R', C */ +/* is not accessed. C is an input argument if FACT = 'F'; */ +/* otherwise, C is an output argument. If FACT = 'F' and */ +/* EQUED = 'C' or 'B', each element of C must be positive. */ + +/* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* On entry, the N-by-NRHS right hand side matrix B. */ +/* On exit, */ +/* if EQUED = 'N', B is not modified; */ +/* if TRANS = 'N' and EQUED = 'R' or 'B', B is overwritten by */ +/* diag(R)*B; */ +/* if TRANS = 'T' or 'C' and EQUED = 'C' or 'B', B is */ +/* overwritten by diag(C)*B. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the array B. LDB >= max(1,N). */ + +/* X (output) DOUBLE PRECISION array, dimension (LDX,NRHS) */ +/* If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X */ +/* to the original system of equations. Note that A and B are */ +/* modified on exit if EQUED .ne. 'N', and the solution to the */ +/* equilibrated system is inv(diag(C))*X if TRANS = 'N' and */ +/* EQUED = 'C' or 'B', or inv(diag(R))*X if TRANS = 'T' or 'C' */ +/* and EQUED = 'R' or 'B'. */ + +/* LDX (input) INTEGER */ +/* The leading dimension of the array X. LDX >= max(1,N). */ + +/* RCOND (output) DOUBLE PRECISION */ +/* The estimate of the reciprocal condition number of the matrix */ +/* A after equilibration (if done). If RCOND is less than the */ +/* machine precision (in particular, if RCOND = 0), the matrix */ +/* is singular to working precision. This condition is */ +/* indicated by a return code of INFO > 0. */ + +/* FERR (output) DOUBLE PRECISION array, dimension (NRHS) */ +/* The estimated forward error bound for each solution vector */ +/* X(j) (the j-th column of the solution matrix X). */ +/* If XTRUE is the true solution corresponding to X(j), FERR(j) */ +/* is an estimated upper bound for the magnitude of the largest */ +/* element in (X(j) - XTRUE) divided by the magnitude of the */ +/* largest element in X(j). The estimate is as reliable as */ +/* the estimate for RCOND, and is almost always a slight */ +/* overestimate of the true error. */ + +/* BERR (output) DOUBLE PRECISION array, dimension (NRHS) */ +/* The componentwise relative backward error of each solution */ +/* vector X(j) (i.e., the smallest relative change in */ +/* any element of A or B that makes X(j) an exact solution). */ + +/* WORK (workspace/output) DOUBLE PRECISION array, dimension (4*N) */ +/* On exit, WORK(1) contains the reciprocal pivot growth */ +/* factor norm(A)/norm(U). The "max absolute element" norm is */ +/* used. If WORK(1) is much less than 1, then the stability */ +/* of the LU factorization of the (equilibrated) matrix A */ +/* could be poor. This also means that the solution X, condition */ +/* estimator RCOND, and forward error bound FERR could be */ +/* unreliable. If factorization fails with 0 0: if INFO = i, and i is */ +/* <= N: U(i,i) is exactly zero. The factorization has */ +/* been completed, but the factor U is exactly */ +/* singular, so the solution and error bounds */ +/* could not be computed. RCOND = 0 is returned. */ +/* = N+1: U is nonsingular, but RCOND is less than machine */ +/* precision, meaning that the matrix is singular */ +/* to working precision. Nevertheless, the */ +/* solution and error bounds are computed because */ +/* there are a number of situations where the */ +/* computed solution can be more accurate than the */ +/* value of RCOND would suggest. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + af_dim1 = *ldaf; + af_offset = 1 + af_dim1; + af -= af_offset; + --ipiv; + --r__; + --c__; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + x_dim1 = *ldx; + x_offset = 1 + x_dim1; + x -= x_offset; + --ferr; + --berr; + --work; + --iwork; + + /* Function Body */ + *info = 0; + nofact = _starpu_lsame_(fact, "N"); + equil = _starpu_lsame_(fact, "E"); + notran = _starpu_lsame_(trans, "N"); + if (nofact || equil) { + *(unsigned char *)equed = 'N'; + rowequ = FALSE_; + colequ = FALSE_; + } else { + rowequ = _starpu_lsame_(equed, "R") || _starpu_lsame_(equed, + "B"); + colequ = _starpu_lsame_(equed, "C") || _starpu_lsame_(equed, + "B"); + smlnum = _starpu_dlamch_("Safe minimum"); + bignum = 1. / smlnum; + } + +/* Test the input parameters. */ + + if (! nofact && ! equil && ! _starpu_lsame_(fact, "F")) { + *info = -1; + } else if (! notran && ! _starpu_lsame_(trans, "T") && ! + _starpu_lsame_(trans, "C")) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*nrhs < 0) { + *info = -4; + } else if (*lda < max(1,*n)) { + *info = -6; + } else if (*ldaf < max(1,*n)) { + *info = -8; + } else if (_starpu_lsame_(fact, "F") && ! (rowequ || colequ + || _starpu_lsame_(equed, "N"))) { + *info = -10; + } else { + if (rowequ) { + rcmin = bignum; + rcmax = 0.; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + d__1 = rcmin, d__2 = r__[j]; + rcmin = min(d__1,d__2); +/* Computing MAX */ + d__1 = rcmax, d__2 = r__[j]; + rcmax = max(d__1,d__2); +/* L10: */ + } + if (rcmin <= 0.) { + *info = -11; + } else if (*n > 0) { + rowcnd = max(rcmin,smlnum) / min(rcmax,bignum); + } else { + rowcnd = 1.; + } + } + if (colequ && *info == 0) { + rcmin = bignum; + rcmax = 0.; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + d__1 = rcmin, d__2 = c__[j]; + rcmin = min(d__1,d__2); +/* Computing MAX */ + d__1 = rcmax, d__2 = c__[j]; + rcmax = max(d__1,d__2); +/* L20: */ + } + if (rcmin <= 0.) { + *info = -12; + } else if (*n > 0) { + colcnd = max(rcmin,smlnum) / min(rcmax,bignum); + } else { + colcnd = 1.; + } + } + if (*info == 0) { + if (*ldb < max(1,*n)) { + *info = -14; + } else if (*ldx < max(1,*n)) { + *info = -16; + } + } + } + + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DGESVX", &i__1); + return 0; + } + + if (equil) { + +/* Compute row and column scalings to equilibrate the matrix A. */ + + _starpu_dgeequ_(n, n, &a[a_offset], lda, &r__[1], &c__[1], &rowcnd, &colcnd, & + amax, &infequ); + if (infequ == 0) { + +/* Equilibrate the matrix. */ + + _starpu_dlaqge_(n, n, &a[a_offset], lda, &r__[1], &c__[1], &rowcnd, & + colcnd, &amax, equed); + rowequ = _starpu_lsame_(equed, "R") || _starpu_lsame_(equed, + "B"); + colequ = _starpu_lsame_(equed, "C") || _starpu_lsame_(equed, + "B"); + } + } + +/* Scale the right hand side. */ + + if (notran) { + if (rowequ) { + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = r__[i__] * b[i__ + j * b_dim1]; +/* L30: */ + } +/* L40: */ + } + } + } else if (colequ) { + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = c__[i__] * b[i__ + j * b_dim1]; +/* L50: */ + } +/* L60: */ + } + } + + if (nofact || equil) { + +/* Compute the LU factorization of A. */ + + _starpu_dlacpy_("Full", n, n, &a[a_offset], lda, &af[af_offset], ldaf); + _starpu_dgetrf_(n, n, &af[af_offset], ldaf, &ipiv[1], info); + +/* Return if INFO is non-zero. */ + + if (*info > 0) { + +/* Compute the reciprocal pivot growth factor of the */ +/* leading rank-deficient INFO columns of A. */ + + rpvgrw = _starpu_dlantr_("M", "U", "N", info, info, &af[af_offset], ldaf, + &work[1]); + if (rpvgrw == 0.) { + rpvgrw = 1.; + } else { + rpvgrw = _starpu_dlange_("M", n, info, &a[a_offset], lda, &work[1]) / rpvgrw; + } + work[1] = rpvgrw; + *rcond = 0.; + return 0; + } + } + +/* Compute the norm of the matrix A and the */ +/* reciprocal pivot growth factor RPVGRW. */ + + if (notran) { + *(unsigned char *)norm = '1'; + } else { + *(unsigned char *)norm = 'I'; + } + anorm = _starpu_dlange_(norm, n, n, &a[a_offset], lda, &work[1]); + rpvgrw = _starpu_dlantr_("M", "U", "N", n, n, &af[af_offset], ldaf, &work[1]); + if (rpvgrw == 0.) { + rpvgrw = 1.; + } else { + rpvgrw = _starpu_dlange_("M", n, n, &a[a_offset], lda, &work[1]) / + rpvgrw; + } + +/* Compute the reciprocal of the condition number of A. */ + + _starpu_dgecon_(norm, n, &af[af_offset], ldaf, &anorm, rcond, &work[1], &iwork[1], + info); + +/* Compute the solution matrix X. */ + + _starpu_dlacpy_("Full", n, nrhs, &b[b_offset], ldb, &x[x_offset], ldx); + _starpu_dgetrs_(trans, n, nrhs, &af[af_offset], ldaf, &ipiv[1], &x[x_offset], ldx, + info); + +/* Use iterative refinement to improve the computed solution and */ +/* compute error bounds and backward error estimates for it. */ + + _starpu_dgerfs_(trans, n, nrhs, &a[a_offset], lda, &af[af_offset], ldaf, &ipiv[1], + &b[b_offset], ldb, &x[x_offset], ldx, &ferr[1], &berr[1], &work[ + 1], &iwork[1], info); + +/* Transform the solution matrix X to a solution of the original */ +/* system. */ + + if (notran) { + if (colequ) { + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + x[i__ + j * x_dim1] = c__[i__] * x[i__ + j * x_dim1]; +/* L70: */ + } +/* L80: */ + } + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + ferr[j] /= colcnd; +/* L90: */ + } + } + } else if (rowequ) { + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + x[i__ + j * x_dim1] = r__[i__] * x[i__ + j * x_dim1]; +/* L100: */ + } +/* L110: */ + } + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + ferr[j] /= rowcnd; +/* L120: */ + } + } + + work[1] = rpvgrw; + +/* Set INFO = N+1 if the matrix is singular to working precision. */ + + if (*rcond < _starpu_dlamch_("Epsilon")) { + *info = *n + 1; + } + return 0; + +/* End of DGESVX */ + +} /* _starpu_dgesvx_ */ diff --git a/min-dgels/base/SRC/dgesvxx.c b/min-dgels/base/SRC/dgesvxx.c new file mode 100644 index 0000000..f6bce7c --- /dev/null +++ b/min-dgels/base/SRC/dgesvxx.c @@ -0,0 +1,713 @@ +/* dgesvxx.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dgesvxx_(char *fact, char *trans, integer *n, integer * + nrhs, doublereal *a, integer *lda, doublereal *af, integer *ldaf, + integer *ipiv, char *equed, doublereal *r__, doublereal *c__, + doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal * + rcond, doublereal *rpvgrw, doublereal *berr, integer *n_err_bnds__, + doublereal *err_bnds_norm__, doublereal *err_bnds_comp__, integer * + nparams, doublereal *params, doublereal *work, integer *iwork, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, af_dim1, af_offset, b_dim1, b_offset, x_dim1, + x_offset, err_bnds_norm_dim1, err_bnds_norm_offset, + err_bnds_comp_dim1, err_bnds_comp_offset, i__1; + doublereal d__1, d__2; + + /* Local variables */ + integer j; + extern doublereal _starpu_dla_rpvgrw__(integer *, integer *, doublereal *, + integer *, doublereal *, integer *); + doublereal amax; + extern logical _starpu_lsame_(char *, char *); + doublereal rcmin, rcmax; + logical equil; + extern doublereal _starpu_dlamch_(char *); + extern /* Subroutine */ int _starpu_dlaqge_(integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, char *); + doublereal colcnd; + logical nofact; + extern /* Subroutine */ int _starpu_dgetrf_(integer *, integer *, doublereal *, + integer *, integer *, integer *), _starpu_dlacpy_(char *, integer *, + integer *, doublereal *, integer *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); + doublereal bignum; + integer infequ; + logical colequ; + extern /* Subroutine */ int _starpu_dgetrs_(char *, integer *, integer *, + doublereal *, integer *, integer *, doublereal *, integer *, + integer *); + doublereal rowcnd; + logical notran; + doublereal smlnum; + logical rowequ; + extern /* Subroutine */ int _starpu_dlascl2_(integer *, integer *, doublereal *, + doublereal *, integer *), _starpu_dgeequb_(integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, integer *), _starpu_dgerfsx_(char *, char *, + integer *, integer *, doublereal *, integer *, doublereal *, + integer *, integer *, doublereal *, doublereal *, doublereal *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + doublereal *, integer *, integer *); + + +/* -- LAPACK driver routine (version 3.2) -- */ +/* -- Contributed by James Demmel, Deaglan Halligan, Yozo Hida and -- */ +/* -- Jason Riedy of Univ. of California Berkeley. -- */ +/* -- November 2008 -- */ + +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley and NAG Ltd. -- */ + +/* .. */ +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DGESVXX uses the LU factorization to compute the solution to a */ +/* double precision system of linear equations A * X = B, where A is an */ +/* N-by-N matrix and X and B are N-by-NRHS matrices. */ + +/* If requested, both normwise and maximum componentwise error bounds */ +/* are returned. DGESVXX will return a solution with a tiny */ +/* guaranteed error (O(eps) where eps is the working machine */ +/* precision) unless the matrix is very ill-conditioned, in which */ +/* case a warning is returned. Relevant condition numbers also are */ +/* calculated and returned. */ + +/* DGESVXX accepts user-provided factorizations and equilibration */ +/* factors; see the definitions of the FACT and EQUED options. */ +/* Solving with refinement and using a factorization from a previous */ +/* DGESVXX call will also produce a solution with either O(eps) */ +/* errors or warnings, but we cannot make that claim for general */ +/* user-provided factorizations and equilibration factors if they */ +/* differ from what DGESVXX would itself produce. */ + +/* Description */ +/* =========== */ + +/* The following steps are performed: */ + +/* 1. If FACT = 'E', double precision scaling factors are computed to equilibrate */ +/* the system: */ + +/* TRANS = 'N': diag(R)*A*diag(C) *inv(diag(C))*X = diag(R)*B */ +/* TRANS = 'T': (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B */ +/* TRANS = 'C': (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B */ + +/* Whether or not the system will be equilibrated depends on the */ +/* scaling of the matrix A, but if equilibration is used, A is */ +/* overwritten by diag(R)*A*diag(C) and B by diag(R)*B (if TRANS='N') */ +/* or diag(C)*B (if TRANS = 'T' or 'C'). */ + +/* 2. If FACT = 'N' or 'E', the LU decomposition is used to factor */ +/* the matrix A (after equilibration if FACT = 'E') as */ + +/* A = P * L * U, */ + +/* where P is a permutation matrix, L is a unit lower triangular */ +/* matrix, and U is upper triangular. */ + +/* 3. If some U(i,i)=0, so that U is exactly singular, then the */ +/* routine returns with INFO = i. Otherwise, the factored form of A */ +/* is used to estimate the condition number of the matrix A (see */ +/* argument RCOND). If the reciprocal of the condition number is less */ +/* than machine precision, the routine still goes on to solve for X */ +/* and compute error bounds as described below. */ + +/* 4. The system of equations is solved for X using the factored form */ +/* of A. */ + +/* 5. By default (unless PARAMS(LA_LINRX_ITREF_I) is set to zero), */ +/* the routine will use iterative refinement to try to get a small */ +/* error and error bounds. Refinement calculates the residual to at */ +/* least twice the working precision. */ + +/* 6. If equilibration was used, the matrix X is premultiplied by */ +/* diag(C) (if TRANS = 'N') or diag(R) (if TRANS = 'T' or 'C') so */ +/* that it solves the original system before equilibration. */ + +/* Arguments */ +/* ========= */ + +/* Some optional parameters are bundled in the PARAMS array. These */ +/* settings determine how refinement is performed, but often the */ +/* defaults are acceptable. If the defaults are acceptable, users */ +/* can pass NPARAMS = 0 which prevents the source code from accessing */ +/* the PARAMS argument. */ + +/* FACT (input) CHARACTER*1 */ +/* Specifies whether or not the factored form of the matrix A is */ +/* supplied on entry, and if not, whether the matrix A should be */ +/* equilibrated before it is factored. */ +/* = 'F': On entry, AF and IPIV contain the factored form of A. */ +/* If EQUED is not 'N', the matrix A has been */ +/* equilibrated with scaling factors given by R and C. */ +/* A, AF, and IPIV are not modified. */ +/* = 'N': The matrix A will be copied to AF and factored. */ +/* = 'E': The matrix A will be equilibrated if necessary, then */ +/* copied to AF and factored. */ + +/* TRANS (input) CHARACTER*1 */ +/* Specifies the form of the system of equations: */ +/* = 'N': A * X = B (No transpose) */ +/* = 'T': A**T * X = B (Transpose) */ +/* = 'C': A**H * X = B (Conjugate Transpose = Transpose) */ + +/* N (input) INTEGER */ +/* The number of linear equations, i.e., the order of the */ +/* matrix A. N >= 0. */ + +/* NRHS (input) INTEGER */ +/* The number of right hand sides, i.e., the number of columns */ +/* of the matrices B and X. NRHS >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the N-by-N matrix A. If FACT = 'F' and EQUED is */ +/* not 'N', then A must have been equilibrated by the scaling */ +/* factors in R and/or C. A is not modified if FACT = 'F' or */ +/* 'N', or if FACT = 'E' and EQUED = 'N' on exit. */ + +/* On exit, if EQUED .ne. 'N', A is scaled as follows: */ +/* EQUED = 'R': A := diag(R) * A */ +/* EQUED = 'C': A := A * diag(C) */ +/* EQUED = 'B': A := diag(R) * A * diag(C). */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,N). */ + +/* AF (input or output) DOUBLE PRECISION array, dimension (LDAF,N) */ +/* If FACT = 'F', then AF is an input argument and on entry */ +/* contains the factors L and U from the factorization */ +/* A = P*L*U as computed by DGETRF. If EQUED .ne. 'N', then */ +/* AF is the factored form of the equilibrated matrix A. */ + +/* If FACT = 'N', then AF is an output argument and on exit */ +/* returns the factors L and U from the factorization A = P*L*U */ +/* of the original matrix A. */ + +/* If FACT = 'E', then AF is an output argument and on exit */ +/* returns the factors L and U from the factorization A = P*L*U */ +/* of the equilibrated matrix A (see the description of A for */ +/* the form of the equilibrated matrix). */ + +/* LDAF (input) INTEGER */ +/* The leading dimension of the array AF. LDAF >= max(1,N). */ + +/* IPIV (input or output) INTEGER array, dimension (N) */ +/* If FACT = 'F', then IPIV is an input argument and on entry */ +/* contains the pivot indices from the factorization A = P*L*U */ +/* as computed by DGETRF; row i of the matrix was interchanged */ +/* with row IPIV(i). */ + +/* If FACT = 'N', then IPIV is an output argument and on exit */ +/* contains the pivot indices from the factorization A = P*L*U */ +/* of the original matrix A. */ + +/* If FACT = 'E', then IPIV is an output argument and on exit */ +/* contains the pivot indices from the factorization A = P*L*U */ +/* of the equilibrated matrix A. */ + +/* EQUED (input or output) CHARACTER*1 */ +/* Specifies the form of equilibration that was done. */ +/* = 'N': No equilibration (always true if FACT = 'N'). */ +/* = 'R': Row equilibration, i.e., A has been premultiplied by */ +/* diag(R). */ +/* = 'C': Column equilibration, i.e., A has been postmultiplied */ +/* by diag(C). */ +/* = 'B': Both row and column equilibration, i.e., A has been */ +/* replaced by diag(R) * A * diag(C). */ +/* EQUED is an input argument if FACT = 'F'; otherwise, it is an */ +/* output argument. */ + +/* R (input or output) DOUBLE PRECISION array, dimension (N) */ +/* The row scale factors for A. If EQUED = 'R' or 'B', A is */ +/* multiplied on the left by diag(R); if EQUED = 'N' or 'C', R */ +/* is not accessed. R is an input argument if FACT = 'F'; */ +/* otherwise, R is an output argument. If FACT = 'F' and */ +/* EQUED = 'R' or 'B', each element of R must be positive. */ +/* If R is output, each element of R is a power of the radix. */ +/* If R is input, each element of R should be a power of the radix */ +/* to ensure a reliable solution and error estimates. Scaling by */ +/* powers of the radix does not cause rounding errors unless the */ +/* result underflows or overflows. Rounding errors during scaling */ +/* lead to refining with a matrix that is not equivalent to the */ +/* input matrix, producing error estimates that may not be */ +/* reliable. */ + +/* C (input or output) DOUBLE PRECISION array, dimension (N) */ +/* The column scale factors for A. If EQUED = 'C' or 'B', A is */ +/* multiplied on the right by diag(C); if EQUED = 'N' or 'R', C */ +/* is not accessed. C is an input argument if FACT = 'F'; */ +/* otherwise, C is an output argument. If FACT = 'F' and */ +/* EQUED = 'C' or 'B', each element of C must be positive. */ +/* If C is output, each element of C is a power of the radix. */ +/* If C is input, each element of C should be a power of the radix */ +/* to ensure a reliable solution and error estimates. Scaling by */ +/* powers of the radix does not cause rounding errors unless the */ +/* result underflows or overflows. Rounding errors during scaling */ +/* lead to refining with a matrix that is not equivalent to the */ +/* input matrix, producing error estimates that may not be */ +/* reliable. */ + +/* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* On entry, the N-by-NRHS right hand side matrix B. */ +/* On exit, */ +/* if EQUED = 'N', B is not modified; */ +/* if TRANS = 'N' and EQUED = 'R' or 'B', B is overwritten by */ +/* diag(R)*B; */ +/* if TRANS = 'T' or 'C' and EQUED = 'C' or 'B', B is */ +/* overwritten by diag(C)*B. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the array B. LDB >= max(1,N). */ + +/* X (output) DOUBLE PRECISION array, dimension (LDX,NRHS) */ +/* If INFO = 0, the N-by-NRHS solution matrix X to the original */ +/* system of equations. Note that A and B are modified on exit */ +/* if EQUED .ne. 'N', and the solution to the equilibrated system is */ +/* inv(diag(C))*X if TRANS = 'N' and EQUED = 'C' or 'B', or */ +/* inv(diag(R))*X if TRANS = 'T' or 'C' and EQUED = 'R' or 'B'. */ + +/* LDX (input) INTEGER */ +/* The leading dimension of the array X. LDX >= max(1,N). */ + +/* RCOND (output) DOUBLE PRECISION */ +/* Reciprocal scaled condition number. This is an estimate of the */ +/* reciprocal Skeel condition number of the matrix A after */ +/* equilibration (if done). If this is less than the machine */ +/* precision (in particular, if it is zero), the matrix is singular */ +/* to working precision. Note that the error may still be small even */ +/* if this number is very small and the matrix appears ill- */ +/* conditioned. */ + +/* RPVGRW (output) DOUBLE PRECISION */ +/* Reciprocal pivot growth. On exit, this contains the reciprocal */ +/* pivot growth factor norm(A)/norm(U). The "max absolute element" */ +/* norm is used. If this is much less than 1, then the stability of */ +/* the LU factorization of the (equilibrated) matrix A could be poor. */ +/* This also means that the solution X, estimated condition numbers, */ +/* and error bounds could be unreliable. If factorization fails with */ +/* 0 0 and <= N: U(INFO,INFO) is exactly zero. The factorization */ +/* has been completed, but the factor U is exactly singular, so */ +/* the solution and error bounds could not be computed. RCOND = 0 */ +/* is returned. */ +/* = N+J: The solution corresponding to the Jth right-hand side is */ +/* not guaranteed. The solutions corresponding to other right- */ +/* hand sides K with K > J may not be guaranteed as well, but */ +/* only the first such right-hand side is reported. If a small */ +/* componentwise error is not requested (PARAMS(3) = 0.0) then */ +/* the Jth right-hand side is the first with a normwise error */ +/* bound that is not guaranteed (the smallest J such */ +/* that ERR_BNDS_NORM(J,1) = 0.0). By default (PARAMS(3) = 1.0) */ +/* the Jth right-hand side is the first with either a normwise or */ +/* componentwise error bound that is not guaranteed (the smallest */ +/* J such that either ERR_BNDS_NORM(J,1) = 0.0 or */ +/* ERR_BNDS_COMP(J,1) = 0.0). See the definition of */ +/* ERR_BNDS_NORM(:,1) and ERR_BNDS_COMP(:,1). To get information */ +/* about all of the right-hand sides check ERR_BNDS_NORM or */ +/* ERR_BNDS_COMP. */ + +/* ================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + err_bnds_comp_dim1 = *nrhs; + err_bnds_comp_offset = 1 + err_bnds_comp_dim1; + err_bnds_comp__ -= err_bnds_comp_offset; + err_bnds_norm_dim1 = *nrhs; + err_bnds_norm_offset = 1 + err_bnds_norm_dim1; + err_bnds_norm__ -= err_bnds_norm_offset; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + af_dim1 = *ldaf; + af_offset = 1 + af_dim1; + af -= af_offset; + --ipiv; + --r__; + --c__; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + x_dim1 = *ldx; + x_offset = 1 + x_dim1; + x -= x_offset; + --berr; + --params; + --work; + --iwork; + + /* Function Body */ + *info = 0; + nofact = _starpu_lsame_(fact, "N"); + equil = _starpu_lsame_(fact, "E"); + notran = _starpu_lsame_(trans, "N"); + smlnum = _starpu_dlamch_("Safe minimum"); + bignum = 1. / smlnum; + if (nofact || equil) { + *(unsigned char *)equed = 'N'; + rowequ = FALSE_; + colequ = FALSE_; + } else { + rowequ = _starpu_lsame_(equed, "R") || _starpu_lsame_(equed, + "B"); + colequ = _starpu_lsame_(equed, "C") || _starpu_lsame_(equed, + "B"); + } + +/* Default is failure. If an input parameter is wrong or */ +/* factorization fails, make everything look horrible. Only the */ +/* pivot growth is set here, the rest is initialized in DGERFSX. */ + + *rpvgrw = 0.; + +/* Test the input parameters. PARAMS is not tested until DGERFSX. */ + + if (! nofact && ! equil && ! _starpu_lsame_(fact, "F")) { + *info = -1; + } else if (! notran && ! _starpu_lsame_(trans, "T") && ! + _starpu_lsame_(trans, "C")) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*nrhs < 0) { + *info = -4; + } else if (*lda < max(1,*n)) { + *info = -6; + } else if (*ldaf < max(1,*n)) { + *info = -8; + } else if (_starpu_lsame_(fact, "F") && ! (rowequ || colequ + || _starpu_lsame_(equed, "N"))) { + *info = -10; + } else { + if (rowequ) { + rcmin = bignum; + rcmax = 0.; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + d__1 = rcmin, d__2 = r__[j]; + rcmin = min(d__1,d__2); +/* Computing MAX */ + d__1 = rcmax, d__2 = r__[j]; + rcmax = max(d__1,d__2); +/* L10: */ + } + if (rcmin <= 0.) { + *info = -11; + } else if (*n > 0) { + rowcnd = max(rcmin,smlnum) / min(rcmax,bignum); + } else { + rowcnd = 1.; + } + } + if (colequ && *info == 0) { + rcmin = bignum; + rcmax = 0.; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + d__1 = rcmin, d__2 = c__[j]; + rcmin = min(d__1,d__2); +/* Computing MAX */ + d__1 = rcmax, d__2 = c__[j]; + rcmax = max(d__1,d__2); +/* L20: */ + } + if (rcmin <= 0.) { + *info = -12; + } else if (*n > 0) { + colcnd = max(rcmin,smlnum) / min(rcmax,bignum); + } else { + colcnd = 1.; + } + } + if (*info == 0) { + if (*ldb < max(1,*n)) { + *info = -14; + } else if (*ldx < max(1,*n)) { + *info = -16; + } + } + } + + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DGESVXX", &i__1); + return 0; + } + + if (equil) { + +/* Compute row and column scalings to equilibrate the matrix A. */ + + _starpu_dgeequb_(n, n, &a[a_offset], lda, &r__[1], &c__[1], &rowcnd, &colcnd, + &amax, &infequ); + if (infequ == 0) { + +/* Equilibrate the matrix. */ + + _starpu_dlaqge_(n, n, &a[a_offset], lda, &r__[1], &c__[1], &rowcnd, & + colcnd, &amax, equed); + rowequ = _starpu_lsame_(equed, "R") || _starpu_lsame_(equed, + "B"); + colequ = _starpu_lsame_(equed, "C") || _starpu_lsame_(equed, + "B"); + } + +/* If the scaling factors are not applied, set them to 1.0. */ + + if (! rowequ) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + r__[j] = 1.; + } + } + if (! colequ) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + c__[j] = 1.; + } + } + } + +/* Scale the right-hand side. */ + + if (notran) { + if (rowequ) { + _starpu_dlascl2_(n, nrhs, &r__[1], &b[b_offset], ldb); + } + } else { + if (colequ) { + _starpu_dlascl2_(n, nrhs, &c__[1], &b[b_offset], ldb); + } + } + + if (nofact || equil) { + +/* Compute the LU factorization of A. */ + + _starpu_dlacpy_("Full", n, n, &a[a_offset], lda, &af[af_offset], ldaf); + _starpu_dgetrf_(n, n, &af[af_offset], ldaf, &ipiv[1], info); + +/* Return if INFO is non-zero. */ + + if (*info > 0) { + +/* Pivot in column INFO is exactly 0 */ +/* Compute the reciprocal pivot growth factor of the */ +/* leading rank-deficient INFO columns of A. */ + + *rpvgrw = _starpu_dla_rpvgrw__(n, info, &a[a_offset], lda, &af[af_offset], + ldaf); + return 0; + } + } + +/* Compute the reciprocal pivot growth factor RPVGRW. */ + + *rpvgrw = _starpu_dla_rpvgrw__(n, n, &a[a_offset], lda, &af[af_offset], ldaf); + +/* Compute the solution matrix X. */ + + _starpu_dlacpy_("Full", n, nrhs, &b[b_offset], ldb, &x[x_offset], ldx); + _starpu_dgetrs_(trans, n, nrhs, &af[af_offset], ldaf, &ipiv[1], &x[x_offset], ldx, + info); + +/* Use iterative refinement to improve the computed solution and */ +/* compute error bounds and backward error estimates for it. */ + + _starpu_dgerfsx_(trans, equed, n, nrhs, &a[a_offset], lda, &af[af_offset], ldaf, & + ipiv[1], &r__[1], &c__[1], &b[b_offset], ldb, &x[x_offset], ldx, + rcond, &berr[1], n_err_bnds__, &err_bnds_norm__[ + err_bnds_norm_offset], &err_bnds_comp__[err_bnds_comp_offset], + nparams, ¶ms[1], &work[1], &iwork[1], info); + +/* Scale solutions. */ + + if (colequ && notran) { + _starpu_dlascl2_(n, nrhs, &c__[1], &x[x_offset], ldx); + } else if (rowequ && ! notran) { + _starpu_dlascl2_(n, nrhs, &r__[1], &x[x_offset], ldx); + } + + return 0; + +/* End of DGESVXX */ +} /* _starpu_dgesvxx_ */ diff --git a/min-dgels/base/SRC/dgetc2.c b/min-dgels/base/SRC/dgetc2.c new file mode 100644 index 0000000..2f63d49 --- /dev/null +++ b/min-dgels/base/SRC/dgetc2.c @@ -0,0 +1,199 @@ +/* dgetc2.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static doublereal c_b10 = -1.; + +/* Subroutine */ int _starpu_dgetc2_(integer *n, doublereal *a, integer *lda, integer + *ipiv, integer *jpiv, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + doublereal d__1; + + /* Local variables */ + integer i__, j, ip, jp; + doublereal eps; + integer ipv, jpv; + extern /* Subroutine */ int _starpu_dger_(integer *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *); + doublereal smin, xmax; + extern /* Subroutine */ int _starpu_dswap_(integer *, doublereal *, integer *, + doublereal *, integer *), _starpu_dlabad_(doublereal *, doublereal *); + extern doublereal _starpu_dlamch_(char *); + doublereal bignum, smlnum; + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DGETC2 computes an LU factorization with complete pivoting of the */ +/* n-by-n matrix A. The factorization has the form A = P * L * U * Q, */ +/* where P and Q are permutation matrices, L is lower triangular with */ +/* unit diagonal elements and U is upper triangular. */ + +/* This is the Level 2 BLAS algorithm. */ + +/* Arguments */ +/* ========= */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA, N) */ +/* On entry, the n-by-n matrix A to be factored. */ +/* On exit, the factors L and U from the factorization */ +/* A = P*L*U*Q; the unit diagonal elements of L are not stored. */ +/* If U(k, k) appears to be less than SMIN, U(k, k) is given the */ +/* value of SMIN, i.e., giving a nonsingular perturbed system. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,N). */ + +/* IPIV (output) INTEGER array, dimension(N). */ +/* The pivot indices; for 1 <= i <= N, row i of the */ +/* matrix has been interchanged with row IPIV(i). */ + +/* JPIV (output) INTEGER array, dimension(N). */ +/* The pivot indices; for 1 <= j <= N, column j of the */ +/* matrix has been interchanged with column JPIV(j). */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* > 0: if INFO = k, U(k, k) is likely to produce owerflow if */ +/* we try to solve for x in Ax = b. So U is perturbed to */ +/* avoid the overflow. */ + +/* Further Details */ +/* =============== */ + +/* Based on contributions by */ +/* Bo Kagstrom and Peter Poromaa, Department of Computing Science, */ +/* Umea University, S-901 87 Umea, Sweden. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Set constants to control overflow */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + --jpiv; + + /* Function Body */ + *info = 0; + eps = _starpu_dlamch_("P"); + smlnum = _starpu_dlamch_("S") / eps; + bignum = 1. / smlnum; + _starpu_dlabad_(&smlnum, &bignum); + +/* Factorize A using complete pivoting. */ +/* Set pivots less than SMIN to SMIN. */ + + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Find max element in matrix A */ + + xmax = 0.; + i__2 = *n; + for (ip = i__; ip <= i__2; ++ip) { + i__3 = *n; + for (jp = i__; jp <= i__3; ++jp) { + if ((d__1 = a[ip + jp * a_dim1], abs(d__1)) >= xmax) { + xmax = (d__1 = a[ip + jp * a_dim1], abs(d__1)); + ipv = ip; + jpv = jp; + } +/* L10: */ + } +/* L20: */ + } + if (i__ == 1) { +/* Computing MAX */ + d__1 = eps * xmax; + smin = max(d__1,smlnum); + } + +/* Swap rows */ + + if (ipv != i__) { + _starpu_dswap_(n, &a[ipv + a_dim1], lda, &a[i__ + a_dim1], lda); + } + ipiv[i__] = ipv; + +/* Swap columns */ + + if (jpv != i__) { + _starpu_dswap_(n, &a[jpv * a_dim1 + 1], &c__1, &a[i__ * a_dim1 + 1], & + c__1); + } + jpiv[i__] = jpv; + +/* Check for singularity */ + + if ((d__1 = a[i__ + i__ * a_dim1], abs(d__1)) < smin) { + *info = i__; + a[i__ + i__ * a_dim1] = smin; + } + i__2 = *n; + for (j = i__ + 1; j <= i__2; ++j) { + a[j + i__ * a_dim1] /= a[i__ + i__ * a_dim1]; +/* L30: */ + } + i__2 = *n - i__; + i__3 = *n - i__; + _starpu_dger_(&i__2, &i__3, &c_b10, &a[i__ + 1 + i__ * a_dim1], &c__1, &a[i__ + + (i__ + 1) * a_dim1], lda, &a[i__ + 1 + (i__ + 1) * a_dim1], + lda); +/* L40: */ + } + + if ((d__1 = a[*n + *n * a_dim1], abs(d__1)) < smin) { + *info = *n; + a[*n + *n * a_dim1] = smin; + } + + return 0; + +/* End of DGETC2 */ + +} /* _starpu_dgetc2_ */ diff --git a/min-dgels/base/SRC/dgetf2.c b/min-dgels/base/SRC/dgetf2.c new file mode 100644 index 0000000..011acf9 --- /dev/null +++ b/min-dgels/base/SRC/dgetf2.c @@ -0,0 +1,193 @@ +/* dgetf2.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static doublereal c_b8 = -1.; + +/* Subroutine */ int _starpu_dgetf2_(integer *m, integer *n, doublereal *a, integer * + lda, integer *ipiv, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + doublereal d__1; + + /* Local variables */ + integer i__, j, jp; + extern /* Subroutine */ int _starpu_dger_(integer *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *), _starpu_dscal_(integer *, doublereal *, doublereal *, integer + *); + doublereal sfmin; + extern /* Subroutine */ int _starpu_dswap_(integer *, doublereal *, integer *, + doublereal *, integer *); + extern doublereal _starpu_dlamch_(char *); + extern integer _starpu_idamax_(integer *, doublereal *, integer *); + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DGETF2 computes an LU factorization of a general m-by-n matrix A */ +/* using partial pivoting with row interchanges. */ + +/* The factorization has the form */ +/* A = P * L * U */ +/* where P is a permutation matrix, L is lower triangular with unit */ +/* diagonal elements (lower trapezoidal if m > n), and U is upper */ +/* triangular (upper trapezoidal if m < n). */ + +/* This is the right-looking Level 2 BLAS version of the algorithm. */ + +/* Arguments */ +/* ========= */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix A. M >= 0. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix A. N >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the m by n matrix to be factored. */ +/* On exit, the factors L and U from the factorization */ +/* A = P*L*U; the unit diagonal elements of L are not stored. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,M). */ + +/* IPIV (output) INTEGER array, dimension (min(M,N)) */ +/* The pivot indices; for 1 <= i <= min(M,N), row i of the */ +/* matrix was interchanged with row IPIV(i). */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -k, the k-th argument had an illegal value */ +/* > 0: if INFO = k, U(k,k) is exactly zero. The factorization */ +/* has been completed, but the factor U is exactly */ +/* singular, and division by zero will occur if it is used */ +/* to solve a system of equations. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DGETF2", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + return 0; + } + +/* Compute machine safe minimum */ + + sfmin = _starpu_dlamch_("S"); + + i__1 = min(*m,*n); + for (j = 1; j <= i__1; ++j) { + +/* Find pivot and test for singularity. */ + + i__2 = *m - j + 1; + jp = j - 1 + _starpu_idamax_(&i__2, &a[j + j * a_dim1], &c__1); + ipiv[j] = jp; + if (a[jp + j * a_dim1] != 0.) { + +/* Apply the interchange to columns 1:N. */ + + if (jp != j) { + _starpu_dswap_(n, &a[j + a_dim1], lda, &a[jp + a_dim1], lda); + } + +/* Compute elements J+1:M of J-th column. */ + + if (j < *m) { + if ((d__1 = a[j + j * a_dim1], abs(d__1)) >= sfmin) { + i__2 = *m - j; + d__1 = 1. / a[j + j * a_dim1]; + _starpu_dscal_(&i__2, &d__1, &a[j + 1 + j * a_dim1], &c__1); + } else { + i__2 = *m - j; + for (i__ = 1; i__ <= i__2; ++i__) { + a[j + i__ + j * a_dim1] /= a[j + j * a_dim1]; +/* L20: */ + } + } + } + + } else if (*info == 0) { + + *info = j; + } + + if (j < min(*m,*n)) { + +/* Update trailing submatrix. */ + + i__2 = *m - j; + i__3 = *n - j; + _starpu_dger_(&i__2, &i__3, &c_b8, &a[j + 1 + j * a_dim1], &c__1, &a[j + ( + j + 1) * a_dim1], lda, &a[j + 1 + (j + 1) * a_dim1], lda); + } +/* L10: */ + } + return 0; + +/* End of DGETF2 */ + +} /* _starpu_dgetf2_ */ diff --git a/min-dgels/base/SRC/dgetrf.c b/min-dgels/base/SRC/dgetrf.c new file mode 100644 index 0000000..f78852d --- /dev/null +++ b/min-dgels/base/SRC/dgetrf.c @@ -0,0 +1,219 @@ +/* dgetrf.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; +static doublereal c_b16 = 1.; +static doublereal c_b19 = -1.; + +/* Subroutine */ int _starpu_dgetrf_(integer *m, integer *n, doublereal *a, integer * + lda, integer *ipiv, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + + /* Local variables */ + integer i__, j, jb, nb; + extern /* Subroutine */ int _starpu_dgemm_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *); + integer iinfo; + extern /* Subroutine */ int _starpu_dtrsm_(char *, char *, char *, char *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *), _starpu_dgetf2_( + integer *, integer *, doublereal *, integer *, integer *, integer + *), _starpu_xerbla_(char *, integer *); + extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *); + extern /* Subroutine */ int _starpu_dlaswp_(integer *, doublereal *, integer *, + integer *, integer *, integer *, integer *); + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DGETRF computes an LU factorization of a general M-by-N matrix A */ +/* using partial pivoting with row interchanges. */ + +/* The factorization has the form */ +/* A = P * L * U */ +/* where P is a permutation matrix, L is lower triangular with unit */ +/* diagonal elements (lower trapezoidal if m > n), and U is upper */ +/* triangular (upper trapezoidal if m < n). */ + +/* This is the right-looking Level 3 BLAS version of the algorithm. */ + +/* Arguments */ +/* ========= */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix A. M >= 0. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix A. N >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the M-by-N matrix to be factored. */ +/* On exit, the factors L and U from the factorization */ +/* A = P*L*U; the unit diagonal elements of L are not stored. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,M). */ + +/* IPIV (output) INTEGER array, dimension (min(M,N)) */ +/* The pivot indices; for 1 <= i <= min(M,N), row i of the */ +/* matrix was interchanged with row IPIV(i). */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > 0: if INFO = i, U(i,i) is exactly zero. The factorization */ +/* has been completed, but the factor U is exactly */ +/* singular, and division by zero will occur if it is used */ +/* to solve a system of equations. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DGETRF", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + return 0; + } + +/* Determine the block size for this environment. */ + + nb = _starpu_ilaenv_(&c__1, "DGETRF", " ", m, n, &c_n1, &c_n1); + if (nb <= 1 || nb >= min(*m,*n)) { + +/* Use unblocked code. */ + + _starpu_dgetf2_(m, n, &a[a_offset], lda, &ipiv[1], info); + } else { + +/* Use blocked code. */ + + i__1 = min(*m,*n); + i__2 = nb; + for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { +/* Computing MIN */ + i__3 = min(*m,*n) - j + 1; + jb = min(i__3,nb); + +/* Factor diagonal and subdiagonal blocks and test for exact */ +/* singularity. */ + + i__3 = *m - j + 1; + _starpu_dgetf2_(&i__3, &jb, &a[j + j * a_dim1], lda, &ipiv[j], &iinfo); + +/* Adjust INFO and the pivot indices. */ + + if (*info == 0 && iinfo > 0) { + *info = iinfo + j - 1; + } +/* Computing MIN */ + i__4 = *m, i__5 = j + jb - 1; + i__3 = min(i__4,i__5); + for (i__ = j; i__ <= i__3; ++i__) { + ipiv[i__] = j - 1 + ipiv[i__]; +/* L10: */ + } + +/* Apply interchanges to columns 1:J-1. */ + + i__3 = j - 1; + i__4 = j + jb - 1; + _starpu_dlaswp_(&i__3, &a[a_offset], lda, &j, &i__4, &ipiv[1], &c__1); + + if (j + jb <= *n) { + +/* Apply interchanges to columns J+JB:N. */ + + i__3 = *n - j - jb + 1; + i__4 = j + jb - 1; + _starpu_dlaswp_(&i__3, &a[(j + jb) * a_dim1 + 1], lda, &j, &i__4, & + ipiv[1], &c__1); + +/* Compute block row of U. */ + + i__3 = *n - j - jb + 1; + _starpu_dtrsm_("Left", "Lower", "No transpose", "Unit", &jb, &i__3, & + c_b16, &a[j + j * a_dim1], lda, &a[j + (j + jb) * + a_dim1], lda); + if (j + jb <= *m) { + +/* Update trailing submatrix. */ + + i__3 = *m - j - jb + 1; + i__4 = *n - j - jb + 1; + _starpu_dgemm_("No transpose", "No transpose", &i__3, &i__4, &jb, + &c_b19, &a[j + jb + j * a_dim1], lda, &a[j + (j + + jb) * a_dim1], lda, &c_b16, &a[j + jb + (j + jb) * + a_dim1], lda); + } + } +/* L20: */ + } + } + return 0; + +/* End of DGETRF */ + +} /* _starpu_dgetrf_ */ diff --git a/min-dgels/base/SRC/dgetri.c b/min-dgels/base/SRC/dgetri.c new file mode 100644 index 0000000..0b2b414 --- /dev/null +++ b/min-dgels/base/SRC/dgetri.c @@ -0,0 +1,264 @@ +/* dgetri.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; +static integer c__2 = 2; +static doublereal c_b20 = -1.; +static doublereal c_b22 = 1.; + +/* Subroutine */ int _starpu_dgetri_(integer *n, doublereal *a, integer *lda, integer + *ipiv, doublereal *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + integer i__, j, jb, nb, jj, jp, nn, iws; + extern /* Subroutine */ int _starpu_dgemm_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *), + _starpu_dgemv_(char *, integer *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + integer *); + integer nbmin; + extern /* Subroutine */ int _starpu_dswap_(integer *, doublereal *, integer *, + doublereal *, integer *), _starpu_dtrsm_(char *, char *, char *, char *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *), _starpu_xerbla_( + char *, integer *); + extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *); + integer ldwork; + extern /* Subroutine */ int _starpu_dtrtri_(char *, char *, integer *, doublereal + *, integer *, integer *); + integer lwkopt; + logical lquery; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DGETRI computes the inverse of a matrix using the LU factorization */ +/* computed by DGETRF. */ + +/* This method inverts U and then computes inv(A) by solving the system */ +/* inv(A)*L = inv(U) for inv(A). */ + +/* Arguments */ +/* ========= */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the factors L and U from the factorization */ +/* A = P*L*U as computed by DGETRF. */ +/* On exit, if INFO = 0, the inverse of the original matrix A. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,N). */ + +/* IPIV (input) INTEGER array, dimension (N) */ +/* The pivot indices from DGETRF; for 1<=i<=N, row i of the */ +/* matrix was interchanged with row IPIV(i). */ + +/* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* On exit, if INFO=0, then WORK(1) returns the optimal LWORK. */ + +/* LWORK (input) INTEGER */ +/* The dimension of the array WORK. LWORK >= max(1,N). */ +/* For optimal performance LWORK >= N*NB, where NB is */ +/* the optimal blocksize returned by ILAENV. */ + +/* If LWORK = -1, then a workspace query is assumed; the routine */ +/* only calculates the optimal size of the WORK array, returns */ +/* this value as the first entry of the WORK array, and no error */ +/* message related to LWORK is issued by XERBLA. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > 0: if INFO = i, U(i,i) is exactly zero; the matrix is */ +/* singular and its inverse could not be computed. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + --work; + + /* Function Body */ + *info = 0; + nb = _starpu_ilaenv_(&c__1, "DGETRI", " ", n, &c_n1, &c_n1, &c_n1); + lwkopt = *n * nb; + work[1] = (doublereal) lwkopt; + lquery = *lwork == -1; + if (*n < 0) { + *info = -1; + } else if (*lda < max(1,*n)) { + *info = -3; + } else if (*lwork < max(1,*n) && ! lquery) { + *info = -6; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DGETRI", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Form inv(U). If INFO > 0 from DTRTRI, then U is singular, */ +/* and the inverse is not computed. */ + + _starpu_dtrtri_("Upper", "Non-unit", n, &a[a_offset], lda, info); + if (*info > 0) { + return 0; + } + + nbmin = 2; + ldwork = *n; + if (nb > 1 && nb < *n) { +/* Computing MAX */ + i__1 = ldwork * nb; + iws = max(i__1,1); + if (*lwork < iws) { + nb = *lwork / ldwork; +/* Computing MAX */ + i__1 = 2, i__2 = _starpu_ilaenv_(&c__2, "DGETRI", " ", n, &c_n1, &c_n1, & + c_n1); + nbmin = max(i__1,i__2); + } + } else { + iws = *n; + } + +/* Solve the equation inv(A)*L = inv(U) for inv(A). */ + + if (nb < nbmin || nb >= *n) { + +/* Use unblocked code. */ + + for (j = *n; j >= 1; --j) { + +/* Copy current column of L to WORK and replace with zeros. */ + + i__1 = *n; + for (i__ = j + 1; i__ <= i__1; ++i__) { + work[i__] = a[i__ + j * a_dim1]; + a[i__ + j * a_dim1] = 0.; +/* L10: */ + } + +/* Compute current column of inv(A). */ + + if (j < *n) { + i__1 = *n - j; + _starpu_dgemv_("No transpose", n, &i__1, &c_b20, &a[(j + 1) * a_dim1 + + 1], lda, &work[j + 1], &c__1, &c_b22, &a[j * a_dim1 + + 1], &c__1); + } +/* L20: */ + } + } else { + +/* Use blocked code. */ + + nn = (*n - 1) / nb * nb + 1; + i__1 = -nb; + for (j = nn; i__1 < 0 ? j >= 1 : j <= 1; j += i__1) { +/* Computing MIN */ + i__2 = nb, i__3 = *n - j + 1; + jb = min(i__2,i__3); + +/* Copy current block column of L to WORK and replace with */ +/* zeros. */ + + i__2 = j + jb - 1; + for (jj = j; jj <= i__2; ++jj) { + i__3 = *n; + for (i__ = jj + 1; i__ <= i__3; ++i__) { + work[i__ + (jj - j) * ldwork] = a[i__ + jj * a_dim1]; + a[i__ + jj * a_dim1] = 0.; +/* L30: */ + } +/* L40: */ + } + +/* Compute current block column of inv(A). */ + + if (j + jb <= *n) { + i__2 = *n - j - jb + 1; + _starpu_dgemm_("No transpose", "No transpose", n, &jb, &i__2, &c_b20, + &a[(j + jb) * a_dim1 + 1], lda, &work[j + jb], & + ldwork, &c_b22, &a[j * a_dim1 + 1], lda); + } + _starpu_dtrsm_("Right", "Lower", "No transpose", "Unit", n, &jb, &c_b22, & + work[j], &ldwork, &a[j * a_dim1 + 1], lda); +/* L50: */ + } + } + +/* Apply column interchanges. */ + + for (j = *n - 1; j >= 1; --j) { + jp = ipiv[j]; + if (jp != j) { + _starpu_dswap_(n, &a[j * a_dim1 + 1], &c__1, &a[jp * a_dim1 + 1], &c__1); + } +/* L60: */ + } + + work[1] = (doublereal) iws; + return 0; + +/* End of DGETRI */ + +} /* _starpu_dgetri_ */ diff --git a/min-dgels/base/SRC/dgetrs.c b/min-dgels/base/SRC/dgetrs.c new file mode 100644 index 0000000..9062d5e --- /dev/null +++ b/min-dgels/base/SRC/dgetrs.c @@ -0,0 +1,186 @@ +/* dgetrs.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static doublereal c_b12 = 1.; +static integer c_n1 = -1; + +/* Subroutine */ int _starpu_dgetrs_(char *trans, integer *n, integer *nrhs, + doublereal *a, integer *lda, integer *ipiv, doublereal *b, integer * + ldb, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1; + + /* Local variables */ + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int _starpu_dtrsm_(char *, char *, char *, char *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *), _starpu_xerbla_( + char *, integer *), _starpu_dlaswp_(integer *, doublereal *, + integer *, integer *, integer *, integer *, integer *); + logical notran; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DGETRS solves a system of linear equations */ +/* A * X = B or A' * X = B */ +/* with a general N-by-N matrix A using the LU factorization computed */ +/* by DGETRF. */ + +/* Arguments */ +/* ========= */ + +/* TRANS (input) CHARACTER*1 */ +/* Specifies the form of the system of equations: */ +/* = 'N': A * X = B (No transpose) */ +/* = 'T': A'* X = B (Transpose) */ +/* = 'C': A'* X = B (Conjugate transpose = Transpose) */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* NRHS (input) INTEGER */ +/* The number of right hand sides, i.e., the number of columns */ +/* of the matrix B. NRHS >= 0. */ + +/* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ +/* The factors L and U from the factorization A = P*L*U */ +/* as computed by DGETRF. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,N). */ + +/* IPIV (input) INTEGER array, dimension (N) */ +/* The pivot indices from DGETRF; for 1<=i<=N, row i of the */ +/* matrix was interchanged with row IPIV(i). */ + +/* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* On entry, the right hand side matrix B. */ +/* On exit, the solution matrix X. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the array B. LDB >= max(1,N). */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + *info = 0; + notran = _starpu_lsame_(trans, "N"); + if (! notran && ! _starpu_lsame_(trans, "T") && ! _starpu_lsame_( + trans, "C")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } else if (*ldb < max(1,*n)) { + *info = -8; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DGETRS", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0 || *nrhs == 0) { + return 0; + } + + if (notran) { + +/* Solve A * X = B. */ + +/* Apply row interchanges to the right hand sides. */ + + _starpu_dlaswp_(nrhs, &b[b_offset], ldb, &c__1, n, &ipiv[1], &c__1); + +/* Solve L*X = B, overwriting B with X. */ + + _starpu_dtrsm_("Left", "Lower", "No transpose", "Unit", n, nrhs, &c_b12, &a[ + a_offset], lda, &b[b_offset], ldb); + +/* Solve U*X = B, overwriting B with X. */ + + _starpu_dtrsm_("Left", "Upper", "No transpose", "Non-unit", n, nrhs, &c_b12, & + a[a_offset], lda, &b[b_offset], ldb); + } else { + +/* Solve A' * X = B. */ + +/* Solve U'*X = B, overwriting B with X. */ + + _starpu_dtrsm_("Left", "Upper", "Transpose", "Non-unit", n, nrhs, &c_b12, &a[ + a_offset], lda, &b[b_offset], ldb); + +/* Solve L'*X = B, overwriting B with X. */ + + _starpu_dtrsm_("Left", "Lower", "Transpose", "Unit", n, nrhs, &c_b12, &a[ + a_offset], lda, &b[b_offset], ldb); + +/* Apply row interchanges to the solution vectors. */ + + _starpu_dlaswp_(nrhs, &b[b_offset], ldb, &c__1, n, &ipiv[1], &c_n1); + } + + return 0; + +/* End of DGETRS */ + +} /* _starpu_dgetrs_ */ diff --git a/min-dgels/base/SRC/dggbak.c b/min-dgels/base/SRC/dggbak.c new file mode 100644 index 0000000..4a928cf --- /dev/null +++ b/min-dgels/base/SRC/dggbak.c @@ -0,0 +1,276 @@ +/* dggbak.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dggbak_(char *job, char *side, integer *n, integer *ilo, + integer *ihi, doublereal *lscale, doublereal *rscale, integer *m, + doublereal *v, integer *ldv, integer *info) +{ + /* System generated locals */ + integer v_dim1, v_offset, i__1; + + /* Local variables */ + integer i__, k; + extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, + integer *); + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int _starpu_dswap_(integer *, doublereal *, integer *, + doublereal *, integer *); + logical leftv; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + logical rightv; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DGGBAK forms the right or left eigenvectors of a real generalized */ +/* eigenvalue problem A*x = lambda*B*x, by backward transformation on */ +/* the computed eigenvectors of the balanced pair of matrices output by */ +/* DGGBAL. */ + +/* Arguments */ +/* ========= */ + +/* JOB (input) CHARACTER*1 */ +/* Specifies the type of backward transformation required: */ +/* = 'N': do nothing, return immediately; */ +/* = 'P': do backward transformation for permutation only; */ +/* = 'S': do backward transformation for scaling only; */ +/* = 'B': do backward transformations for both permutation and */ +/* scaling. */ +/* JOB must be the same as the argument JOB supplied to DGGBAL. */ + +/* SIDE (input) CHARACTER*1 */ +/* = 'R': V contains right eigenvectors; */ +/* = 'L': V contains left eigenvectors. */ + +/* N (input) INTEGER */ +/* The number of rows of the matrix V. N >= 0. */ + +/* ILO (input) INTEGER */ +/* IHI (input) INTEGER */ +/* The integers ILO and IHI determined by DGGBAL. */ +/* 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. */ + +/* LSCALE (input) DOUBLE PRECISION array, dimension (N) */ +/* Details of the permutations and/or scaling factors applied */ +/* to the left side of A and B, as returned by DGGBAL. */ + +/* RSCALE (input) DOUBLE PRECISION array, dimension (N) */ +/* Details of the permutations and/or scaling factors applied */ +/* to the right side of A and B, as returned by DGGBAL. */ + +/* M (input) INTEGER */ +/* The number of columns of the matrix V. M >= 0. */ + +/* V (input/output) DOUBLE PRECISION array, dimension (LDV,M) */ +/* On entry, the matrix of right or left eigenvectors to be */ +/* transformed, as returned by DTGEVC. */ +/* On exit, V is overwritten by the transformed eigenvectors. */ + +/* LDV (input) INTEGER */ +/* The leading dimension of the matrix V. LDV >= max(1,N). */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit. */ +/* < 0: if INFO = -i, the i-th argument had an illegal value. */ + +/* Further Details */ +/* =============== */ + +/* See R.C. Ward, Balancing the generalized eigenvalue problem, */ +/* SIAM J. Sci. Stat. Comp. 2 (1981), 141-152. */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters */ + + /* Parameter adjustments */ + --lscale; + --rscale; + v_dim1 = *ldv; + v_offset = 1 + v_dim1; + v -= v_offset; + + /* Function Body */ + rightv = _starpu_lsame_(side, "R"); + leftv = _starpu_lsame_(side, "L"); + + *info = 0; + if (! _starpu_lsame_(job, "N") && ! _starpu_lsame_(job, "P") && ! _starpu_lsame_(job, "S") + && ! _starpu_lsame_(job, "B")) { + *info = -1; + } else if (! rightv && ! leftv) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*ilo < 1) { + *info = -4; + } else if (*n == 0 && *ihi == 0 && *ilo != 1) { + *info = -4; + } else if (*n > 0 && (*ihi < *ilo || *ihi > max(1,*n))) { + *info = -5; + } else if (*n == 0 && *ilo == 1 && *ihi != 0) { + *info = -5; + } else if (*m < 0) { + *info = -8; + } else if (*ldv < max(1,*n)) { + *info = -10; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DGGBAK", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + if (*m == 0) { + return 0; + } + if (_starpu_lsame_(job, "N")) { + return 0; + } + + if (*ilo == *ihi) { + goto L30; + } + +/* Backward balance */ + + if (_starpu_lsame_(job, "S") || _starpu_lsame_(job, "B")) { + +/* Backward transformation on right eigenvectors */ + + if (rightv) { + i__1 = *ihi; + for (i__ = *ilo; i__ <= i__1; ++i__) { + _starpu_dscal_(m, &rscale[i__], &v[i__ + v_dim1], ldv); +/* L10: */ + } + } + +/* Backward transformation on left eigenvectors */ + + if (leftv) { + i__1 = *ihi; + for (i__ = *ilo; i__ <= i__1; ++i__) { + _starpu_dscal_(m, &lscale[i__], &v[i__ + v_dim1], ldv); +/* L20: */ + } + } + } + +/* Backward permutation */ + +L30: + if (_starpu_lsame_(job, "P") || _starpu_lsame_(job, "B")) { + +/* Backward permutation on right eigenvectors */ + + if (rightv) { + if (*ilo == 1) { + goto L50; + } + + for (i__ = *ilo - 1; i__ >= 1; --i__) { + k = (integer) rscale[i__]; + if (k == i__) { + goto L40; + } + _starpu_dswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv); +L40: + ; + } + +L50: + if (*ihi == *n) { + goto L70; + } + i__1 = *n; + for (i__ = *ihi + 1; i__ <= i__1; ++i__) { + k = (integer) rscale[i__]; + if (k == i__) { + goto L60; + } + _starpu_dswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv); +L60: + ; + } + } + +/* Backward permutation on left eigenvectors */ + +L70: + if (leftv) { + if (*ilo == 1) { + goto L90; + } + for (i__ = *ilo - 1; i__ >= 1; --i__) { + k = (integer) lscale[i__]; + if (k == i__) { + goto L80; + } + _starpu_dswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv); +L80: + ; + } + +L90: + if (*ihi == *n) { + goto L110; + } + i__1 = *n; + for (i__ = *ihi + 1; i__ <= i__1; ++i__) { + k = (integer) lscale[i__]; + if (k == i__) { + goto L100; + } + _starpu_dswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv); +L100: + ; + } + } + } + +L110: + + return 0; + +/* End of DGGBAK */ + +} /* _starpu_dggbak_ */ diff --git a/min-dgels/base/SRC/dggbal.c b/min-dgels/base/SRC/dggbal.c new file mode 100644 index 0000000..b8d1386 --- /dev/null +++ b/min-dgels/base/SRC/dggbal.c @@ -0,0 +1,627 @@ +/* dggbal.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static doublereal c_b35 = 10.; +static doublereal c_b71 = .5; + +/* Subroutine */ int _starpu_dggbal_(char *job, integer *n, doublereal *a, integer * + lda, doublereal *b, integer *ldb, integer *ilo, integer *ihi, + doublereal *lscale, doublereal *rscale, doublereal *work, integer * + info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3; + doublereal d__1, d__2, d__3; + + /* Builtin functions */ + double d_lg10(doublereal *), d_sign(doublereal *, doublereal *), pow_di( + doublereal *, integer *); + + /* Local variables */ + integer i__, j, k, l, m; + doublereal t; + integer jc; + doublereal ta, tb, tc; + integer ir; + doublereal ew; + integer it, nr, ip1, jp1, lm1; + doublereal cab, rab, ewc, cor, sum; + integer nrp2, icab, lcab; + doublereal beta, coef; + integer irab, lrab; + doublereal basl, cmax; + extern doublereal _starpu_ddot_(integer *, doublereal *, integer *, doublereal *, + integer *); + doublereal coef2, coef5, gamma, alpha; + extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, + integer *); + extern logical _starpu_lsame_(char *, char *); + doublereal sfmin, sfmax; + extern /* Subroutine */ int _starpu_dswap_(integer *, doublereal *, integer *, + doublereal *, integer *); + integer iflow; + extern /* Subroutine */ int _starpu_daxpy_(integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *); + integer kount; + extern doublereal _starpu_dlamch_(char *); + doublereal pgamma; + extern integer _starpu_idamax_(integer *, doublereal *, integer *); + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + integer lsfmin, lsfmax; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DGGBAL balances a pair of general real matrices (A,B). This */ +/* involves, first, permuting A and B by similarity transformations to */ +/* isolate eigenvalues in the first 1 to ILO$-$1 and last IHI+1 to N */ +/* elements on the diagonal; and second, applying a diagonal similarity */ +/* transformation to rows and columns ILO to IHI to make the rows */ +/* and columns as close in norm as possible. Both steps are optional. */ + +/* Balancing may reduce the 1-norm of the matrices, and improve the */ +/* accuracy of the computed eigenvalues and/or eigenvectors in the */ +/* generalized eigenvalue problem A*x = lambda*B*x. */ + +/* Arguments */ +/* ========= */ + +/* JOB (input) CHARACTER*1 */ +/* Specifies the operations to be performed on A and B: */ +/* = 'N': none: simply set ILO = 1, IHI = N, LSCALE(I) = 1.0 */ +/* and RSCALE(I) = 1.0 for i = 1,...,N. */ +/* = 'P': permute only; */ +/* = 'S': scale only; */ +/* = 'B': both permute and scale. */ + +/* N (input) INTEGER */ +/* The order of the matrices A and B. N >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the input matrix A. */ +/* On exit, A is overwritten by the balanced matrix. */ +/* If JOB = 'N', A is not referenced. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,N). */ + +/* B (input/output) DOUBLE PRECISION array, dimension (LDB,N) */ +/* On entry, the input matrix B. */ +/* On exit, B is overwritten by the balanced matrix. */ +/* If JOB = 'N', B is not referenced. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the array B. LDB >= max(1,N). */ + +/* ILO (output) INTEGER */ +/* IHI (output) INTEGER */ +/* ILO and IHI are set to integers such that on exit */ +/* A(i,j) = 0 and B(i,j) = 0 if i > j and */ +/* j = 1,...,ILO-1 or i = IHI+1,...,N. */ +/* If JOB = 'N' or 'S', ILO = 1 and IHI = N. */ + +/* LSCALE (output) DOUBLE PRECISION array, dimension (N) */ +/* Details of the permutations and scaling factors applied */ +/* to the left side of A and B. If P(j) is the index of the */ +/* row interchanged with row j, and D(j) */ +/* is the scaling factor applied to row j, then */ +/* LSCALE(j) = P(j) for J = 1,...,ILO-1 */ +/* = D(j) for J = ILO,...,IHI */ +/* = P(j) for J = IHI+1,...,N. */ +/* The order in which the interchanges are made is N to IHI+1, */ +/* then 1 to ILO-1. */ + +/* RSCALE (output) DOUBLE PRECISION array, dimension (N) */ +/* Details of the permutations and scaling factors applied */ +/* to the right side of A and B. If P(j) is the index of the */ +/* column interchanged with column j, and D(j) */ +/* is the scaling factor applied to column j, then */ +/* LSCALE(j) = P(j) for J = 1,...,ILO-1 */ +/* = D(j) for J = ILO,...,IHI */ +/* = P(j) for J = IHI+1,...,N. */ +/* The order in which the interchanges are made is N to IHI+1, */ +/* then 1 to ILO-1. */ + +/* WORK (workspace) REAL array, dimension (lwork) */ +/* lwork must be at least max(1,6*N) when JOB = 'S' or 'B', and */ +/* at least 1 when JOB = 'N' or 'P'. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value. */ + +/* Further Details */ +/* =============== */ + +/* See R.C. WARD, Balancing the generalized eigenvalue problem, */ +/* SIAM J. Sci. Stat. Comp. 2 (1981), 141-152. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + --lscale; + --rscale; + --work; + + /* Function Body */ + *info = 0; + if (! _starpu_lsame_(job, "N") && ! _starpu_lsame_(job, "P") && ! _starpu_lsame_(job, "S") + && ! _starpu_lsame_(job, "B")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } else if (*ldb < max(1,*n)) { + *info = -6; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DGGBAL", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + *ilo = 1; + *ihi = *n; + return 0; + } + + if (*n == 1) { + *ilo = 1; + *ihi = *n; + lscale[1] = 1.; + rscale[1] = 1.; + return 0; + } + + if (_starpu_lsame_(job, "N")) { + *ilo = 1; + *ihi = *n; + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + lscale[i__] = 1.; + rscale[i__] = 1.; +/* L10: */ + } + return 0; + } + + k = 1; + l = *n; + if (_starpu_lsame_(job, "S")) { + goto L190; + } + + goto L30; + +/* Permute the matrices A and B to isolate the eigenvalues. */ + +/* Find row with one nonzero in columns 1 through L */ + +L20: + l = lm1; + if (l != 1) { + goto L30; + } + + rscale[1] = 1.; + lscale[1] = 1.; + goto L190; + +L30: + lm1 = l - 1; + for (i__ = l; i__ >= 1; --i__) { + i__1 = lm1; + for (j = 1; j <= i__1; ++j) { + jp1 = j + 1; + if (a[i__ + j * a_dim1] != 0. || b[i__ + j * b_dim1] != 0.) { + goto L50; + } +/* L40: */ + } + j = l; + goto L70; + +L50: + i__1 = l; + for (j = jp1; j <= i__1; ++j) { + if (a[i__ + j * a_dim1] != 0. || b[i__ + j * b_dim1] != 0.) { + goto L80; + } +/* L60: */ + } + j = jp1 - 1; + +L70: + m = l; + iflow = 1; + goto L160; +L80: + ; + } + goto L100; + +/* Find column with one nonzero in rows K through N */ + +L90: + ++k; + +L100: + i__1 = l; + for (j = k; j <= i__1; ++j) { + i__2 = lm1; + for (i__ = k; i__ <= i__2; ++i__) { + ip1 = i__ + 1; + if (a[i__ + j * a_dim1] != 0. || b[i__ + j * b_dim1] != 0.) { + goto L120; + } +/* L110: */ + } + i__ = l; + goto L140; +L120: + i__2 = l; + for (i__ = ip1; i__ <= i__2; ++i__) { + if (a[i__ + j * a_dim1] != 0. || b[i__ + j * b_dim1] != 0.) { + goto L150; + } +/* L130: */ + } + i__ = ip1 - 1; +L140: + m = k; + iflow = 2; + goto L160; +L150: + ; + } + goto L190; + +/* Permute rows M and I */ + +L160: + lscale[m] = (doublereal) i__; + if (i__ == m) { + goto L170; + } + i__1 = *n - k + 1; + _starpu_dswap_(&i__1, &a[i__ + k * a_dim1], lda, &a[m + k * a_dim1], lda); + i__1 = *n - k + 1; + _starpu_dswap_(&i__1, &b[i__ + k * b_dim1], ldb, &b[m + k * b_dim1], ldb); + +/* Permute columns M and J */ + +L170: + rscale[m] = (doublereal) j; + if (j == m) { + goto L180; + } + _starpu_dswap_(&l, &a[j * a_dim1 + 1], &c__1, &a[m * a_dim1 + 1], &c__1); + _starpu_dswap_(&l, &b[j * b_dim1 + 1], &c__1, &b[m * b_dim1 + 1], &c__1); + +L180: + switch (iflow) { + case 1: goto L20; + case 2: goto L90; + } + +L190: + *ilo = k; + *ihi = l; + + if (_starpu_lsame_(job, "P")) { + i__1 = *ihi; + for (i__ = *ilo; i__ <= i__1; ++i__) { + lscale[i__] = 1.; + rscale[i__] = 1.; +/* L195: */ + } + return 0; + } + + if (*ilo == *ihi) { + return 0; + } + +/* Balance the submatrix in rows ILO to IHI. */ + + nr = *ihi - *ilo + 1; + i__1 = *ihi; + for (i__ = *ilo; i__ <= i__1; ++i__) { + rscale[i__] = 0.; + lscale[i__] = 0.; + + work[i__] = 0.; + work[i__ + *n] = 0.; + work[i__ + (*n << 1)] = 0.; + work[i__ + *n * 3] = 0.; + work[i__ + (*n << 2)] = 0.; + work[i__ + *n * 5] = 0.; +/* L200: */ + } + +/* Compute right side vector in resulting linear equations */ + + basl = d_lg10(&c_b35); + i__1 = *ihi; + for (i__ = *ilo; i__ <= i__1; ++i__) { + i__2 = *ihi; + for (j = *ilo; j <= i__2; ++j) { + tb = b[i__ + j * b_dim1]; + ta = a[i__ + j * a_dim1]; + if (ta == 0.) { + goto L210; + } + d__1 = abs(ta); + ta = d_lg10(&d__1) / basl; +L210: + if (tb == 0.) { + goto L220; + } + d__1 = abs(tb); + tb = d_lg10(&d__1) / basl; +L220: + work[i__ + (*n << 2)] = work[i__ + (*n << 2)] - ta - tb; + work[j + *n * 5] = work[j + *n * 5] - ta - tb; +/* L230: */ + } +/* L240: */ + } + + coef = 1. / (doublereal) (nr << 1); + coef2 = coef * coef; + coef5 = coef2 * .5; + nrp2 = nr + 2; + beta = 0.; + it = 1; + +/* Start generalized conjugate gradient iteration */ + +L250: + + gamma = _starpu_ddot_(&nr, &work[*ilo + (*n << 2)], &c__1, &work[*ilo + (*n << 2)] +, &c__1) + _starpu_ddot_(&nr, &work[*ilo + *n * 5], &c__1, &work[*ilo + * + n * 5], &c__1); + + ew = 0.; + ewc = 0.; + i__1 = *ihi; + for (i__ = *ilo; i__ <= i__1; ++i__) { + ew += work[i__ + (*n << 2)]; + ewc += work[i__ + *n * 5]; +/* L260: */ + } + +/* Computing 2nd power */ + d__1 = ew; +/* Computing 2nd power */ + d__2 = ewc; +/* Computing 2nd power */ + d__3 = ew - ewc; + gamma = coef * gamma - coef2 * (d__1 * d__1 + d__2 * d__2) - coef5 * ( + d__3 * d__3); + if (gamma == 0.) { + goto L350; + } + if (it != 1) { + beta = gamma / pgamma; + } + t = coef5 * (ewc - ew * 3.); + tc = coef5 * (ew - ewc * 3.); + + _starpu_dscal_(&nr, &beta, &work[*ilo], &c__1); + _starpu_dscal_(&nr, &beta, &work[*ilo + *n], &c__1); + + _starpu_daxpy_(&nr, &coef, &work[*ilo + (*n << 2)], &c__1, &work[*ilo + *n], & + c__1); + _starpu_daxpy_(&nr, &coef, &work[*ilo + *n * 5], &c__1, &work[*ilo], &c__1); + + i__1 = *ihi; + for (i__ = *ilo; i__ <= i__1; ++i__) { + work[i__] += tc; + work[i__ + *n] += t; +/* L270: */ + } + +/* Apply matrix to vector */ + + i__1 = *ihi; + for (i__ = *ilo; i__ <= i__1; ++i__) { + kount = 0; + sum = 0.; + i__2 = *ihi; + for (j = *ilo; j <= i__2; ++j) { + if (a[i__ + j * a_dim1] == 0.) { + goto L280; + } + ++kount; + sum += work[j]; +L280: + if (b[i__ + j * b_dim1] == 0.) { + goto L290; + } + ++kount; + sum += work[j]; +L290: + ; + } + work[i__ + (*n << 1)] = (doublereal) kount * work[i__ + *n] + sum; +/* L300: */ + } + + i__1 = *ihi; + for (j = *ilo; j <= i__1; ++j) { + kount = 0; + sum = 0.; + i__2 = *ihi; + for (i__ = *ilo; i__ <= i__2; ++i__) { + if (a[i__ + j * a_dim1] == 0.) { + goto L310; + } + ++kount; + sum += work[i__ + *n]; +L310: + if (b[i__ + j * b_dim1] == 0.) { + goto L320; + } + ++kount; + sum += work[i__ + *n]; +L320: + ; + } + work[j + *n * 3] = (doublereal) kount * work[j] + sum; +/* L330: */ + } + + sum = _starpu_ddot_(&nr, &work[*ilo + *n], &c__1, &work[*ilo + (*n << 1)], &c__1) + + _starpu_ddot_(&nr, &work[*ilo], &c__1, &work[*ilo + *n * 3], &c__1); + alpha = gamma / sum; + +/* Determine correction to current iteration */ + + cmax = 0.; + i__1 = *ihi; + for (i__ = *ilo; i__ <= i__1; ++i__) { + cor = alpha * work[i__ + *n]; + if (abs(cor) > cmax) { + cmax = abs(cor); + } + lscale[i__] += cor; + cor = alpha * work[i__]; + if (abs(cor) > cmax) { + cmax = abs(cor); + } + rscale[i__] += cor; +/* L340: */ + } + if (cmax < .5) { + goto L350; + } + + d__1 = -alpha; + _starpu_daxpy_(&nr, &d__1, &work[*ilo + (*n << 1)], &c__1, &work[*ilo + (*n << 2)] +, &c__1); + d__1 = -alpha; + _starpu_daxpy_(&nr, &d__1, &work[*ilo + *n * 3], &c__1, &work[*ilo + *n * 5], & + c__1); + + pgamma = gamma; + ++it; + if (it <= nrp2) { + goto L250; + } + +/* End generalized conjugate gradient iteration */ + +L350: + sfmin = _starpu_dlamch_("S"); + sfmax = 1. / sfmin; + lsfmin = (integer) (d_lg10(&sfmin) / basl + 1.); + lsfmax = (integer) (d_lg10(&sfmax) / basl); + i__1 = *ihi; + for (i__ = *ilo; i__ <= i__1; ++i__) { + i__2 = *n - *ilo + 1; + irab = _starpu_idamax_(&i__2, &a[i__ + *ilo * a_dim1], lda); + rab = (d__1 = a[i__ + (irab + *ilo - 1) * a_dim1], abs(d__1)); + i__2 = *n - *ilo + 1; + irab = _starpu_idamax_(&i__2, &b[i__ + *ilo * b_dim1], ldb); +/* Computing MAX */ + d__2 = rab, d__3 = (d__1 = b[i__ + (irab + *ilo - 1) * b_dim1], abs( + d__1)); + rab = max(d__2,d__3); + d__1 = rab + sfmin; + lrab = (integer) (d_lg10(&d__1) / basl + 1.); + ir = (integer) (lscale[i__] + d_sign(&c_b71, &lscale[i__])); +/* Computing MIN */ + i__2 = max(ir,lsfmin), i__2 = min(i__2,lsfmax), i__3 = lsfmax - lrab; + ir = min(i__2,i__3); + lscale[i__] = pow_di(&c_b35, &ir); + icab = _starpu_idamax_(ihi, &a[i__ * a_dim1 + 1], &c__1); + cab = (d__1 = a[icab + i__ * a_dim1], abs(d__1)); + icab = _starpu_idamax_(ihi, &b[i__ * b_dim1 + 1], &c__1); +/* Computing MAX */ + d__2 = cab, d__3 = (d__1 = b[icab + i__ * b_dim1], abs(d__1)); + cab = max(d__2,d__3); + d__1 = cab + sfmin; + lcab = (integer) (d_lg10(&d__1) / basl + 1.); + jc = (integer) (rscale[i__] + d_sign(&c_b71, &rscale[i__])); +/* Computing MIN */ + i__2 = max(jc,lsfmin), i__2 = min(i__2,lsfmax), i__3 = lsfmax - lcab; + jc = min(i__2,i__3); + rscale[i__] = pow_di(&c_b35, &jc); +/* L360: */ + } + +/* Row scaling of matrices A and B */ + + i__1 = *ihi; + for (i__ = *ilo; i__ <= i__1; ++i__) { + i__2 = *n - *ilo + 1; + _starpu_dscal_(&i__2, &lscale[i__], &a[i__ + *ilo * a_dim1], lda); + i__2 = *n - *ilo + 1; + _starpu_dscal_(&i__2, &lscale[i__], &b[i__ + *ilo * b_dim1], ldb); +/* L370: */ + } + +/* Column scaling of matrices A and B */ + + i__1 = *ihi; + for (j = *ilo; j <= i__1; ++j) { + _starpu_dscal_(ihi, &rscale[j], &a[j * a_dim1 + 1], &c__1); + _starpu_dscal_(ihi, &rscale[j], &b[j * b_dim1 + 1], &c__1); +/* L380: */ + } + + return 0; + +/* End of DGGBAL */ + +} /* _starpu_dggbal_ */ diff --git a/min-dgels/base/SRC/dgges.c b/min-dgels/base/SRC/dgges.c new file mode 100644 index 0000000..dd13acb --- /dev/null +++ b/min-dgels/base/SRC/dgges.c @@ -0,0 +1,692 @@ +/* dgges.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c__0 = 0; +static integer c_n1 = -1; +static doublereal c_b38 = 0.; +static doublereal c_b39 = 1.; + +/* Subroutine */ int _starpu_dgges_(char *jobvsl, char *jobvsr, char *sort, L_fp + selctg, integer *n, doublereal *a, integer *lda, doublereal *b, + integer *ldb, integer *sdim, doublereal *alphar, doublereal *alphai, + doublereal *beta, doublereal *vsl, integer *ldvsl, doublereal *vsr, + integer *ldvsr, doublereal *work, integer *lwork, logical *bwork, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, vsl_dim1, vsl_offset, + vsr_dim1, vsr_offset, i__1, i__2; + doublereal d__1; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + integer i__, ip; + doublereal dif[2]; + integer ihi, ilo; + doublereal eps, anrm, bnrm; + integer idum[1], ierr, itau, iwrk; + doublereal pvsl, pvsr; + extern logical _starpu_lsame_(char *, char *); + integer ileft, icols; + logical cursl, ilvsl, ilvsr; + integer irows; + extern /* Subroutine */ int _starpu_dlabad_(doublereal *, doublereal *), _starpu_dggbak_( + char *, char *, integer *, integer *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, integer *), _starpu_dggbal_(char *, integer *, doublereal *, integer + *, doublereal *, integer *, integer *, integer *, doublereal *, + doublereal *, doublereal *, integer *); + logical lst2sl; + extern doublereal _starpu_dlamch_(char *), _starpu_dlange_(char *, integer *, + integer *, doublereal *, integer *, doublereal *); + extern /* Subroutine */ int _starpu_dgghrd_(char *, char *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, integer *, integer *), _starpu_dlascl_(char *, integer *, integer *, doublereal + *, doublereal *, integer *, integer *, doublereal *, integer *, + integer *); + logical ilascl, ilbscl; + extern /* Subroutine */ int _starpu_dgeqrf_(integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, integer *), + _starpu_dlacpy_(char *, integer *, integer *, doublereal *, integer *, + doublereal *, integer *); + doublereal safmin; + extern /* Subroutine */ int _starpu_dlaset_(char *, integer *, integer *, + doublereal *, doublereal *, doublereal *, integer *); + doublereal safmax; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + doublereal bignum; + extern /* Subroutine */ int _starpu_dhgeqz_(char *, char *, char *, integer *, + integer *, integer *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, doublereal *, doublereal *, + integer *, doublereal *, integer *, doublereal *, integer *, + integer *), _starpu_dtgsen_(integer *, logical *, + logical *, logical *, integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, doublereal *, doublereal *, + doublereal *, integer *, doublereal *, integer *, integer *, + doublereal *, doublereal *, doublereal *, doublereal *, integer *, + integer *, integer *, integer *); + integer ijobvl, iright; + extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *); + integer ijobvr; + extern /* Subroutine */ int _starpu_dorgqr_(integer *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + integer *); + doublereal anrmto, bnrmto; + logical lastsl; + extern /* Subroutine */ int _starpu_dormqr_(char *, char *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *); + integer minwrk, maxwrk; + doublereal smlnum; + logical wantst, lquery; + + +/* -- LAPACK driver routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ +/* .. Function Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DGGES computes for a pair of N-by-N real nonsymmetric matrices (A,B), */ +/* the generalized eigenvalues, the generalized real Schur form (S,T), */ +/* optionally, the left and/or right matrices of Schur vectors (VSL and */ +/* VSR). This gives the generalized Schur factorization */ + +/* (A,B) = ( (VSL)*S*(VSR)**T, (VSL)*T*(VSR)**T ) */ + +/* Optionally, it also orders the eigenvalues so that a selected cluster */ +/* of eigenvalues appears in the leading diagonal blocks of the upper */ +/* quasi-triangular matrix S and the upper triangular matrix T.The */ +/* leading columns of VSL and VSR then form an orthonormal basis for the */ +/* corresponding left and right eigenspaces (deflating subspaces). */ + +/* (If only the generalized eigenvalues are needed, use the driver */ +/* DGGEV instead, which is faster.) */ + +/* A generalized eigenvalue for a pair of matrices (A,B) is a scalar w */ +/* or a ratio alpha/beta = w, such that A - w*B is singular. It is */ +/* usually represented as the pair (alpha,beta), as there is a */ +/* reasonable interpretation for beta=0 or both being zero. */ + +/* A pair of matrices (S,T) is in generalized real Schur form if T is */ +/* upper triangular with non-negative diagonal and S is block upper */ +/* triangular with 1-by-1 and 2-by-2 blocks. 1-by-1 blocks correspond */ +/* to real generalized eigenvalues, while 2-by-2 blocks of S will be */ +/* "standardized" by making the corresponding elements of T have the */ +/* form: */ +/* [ a 0 ] */ +/* [ 0 b ] */ + +/* and the pair of corresponding 2-by-2 blocks in S and T will have a */ +/* complex conjugate pair of generalized eigenvalues. */ + + +/* Arguments */ +/* ========= */ + +/* JOBVSL (input) CHARACTER*1 */ +/* = 'N': do not compute the left Schur vectors; */ +/* = 'V': compute the left Schur vectors. */ + +/* JOBVSR (input) CHARACTER*1 */ +/* = 'N': do not compute the right Schur vectors; */ +/* = 'V': compute the right Schur vectors. */ + +/* SORT (input) CHARACTER*1 */ +/* Specifies whether or not to order the eigenvalues on the */ +/* diagonal of the generalized Schur form. */ +/* = 'N': Eigenvalues are not ordered; */ +/* = 'S': Eigenvalues are ordered (see SELCTG); */ + +/* SELCTG (external procedure) LOGICAL FUNCTION of three DOUBLE PRECISION arguments */ +/* SELCTG must be declared EXTERNAL in the calling subroutine. */ +/* If SORT = 'N', SELCTG is not referenced. */ +/* If SORT = 'S', SELCTG is used to select eigenvalues to sort */ +/* to the top left of the Schur form. */ +/* An eigenvalue (ALPHAR(j)+ALPHAI(j))/BETA(j) is selected if */ +/* SELCTG(ALPHAR(j),ALPHAI(j),BETA(j)) is true; i.e. if either */ +/* one of a complex conjugate pair of eigenvalues is selected, */ +/* then both complex eigenvalues are selected. */ + +/* Note that in the ill-conditioned case, a selected complex */ +/* eigenvalue may no longer satisfy SELCTG(ALPHAR(j),ALPHAI(j), */ +/* BETA(j)) = .TRUE. after ordering. INFO is to be set to N+2 */ +/* in this case. */ + +/* N (input) INTEGER */ +/* The order of the matrices A, B, VSL, and VSR. N >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA, N) */ +/* On entry, the first of the pair of matrices. */ +/* On exit, A has been overwritten by its generalized Schur */ +/* form S. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of A. LDA >= max(1,N). */ + +/* B (input/output) DOUBLE PRECISION array, dimension (LDB, N) */ +/* On entry, the second of the pair of matrices. */ +/* On exit, B has been overwritten by its generalized Schur */ +/* form T. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of B. LDB >= max(1,N). */ + +/* SDIM (output) INTEGER */ +/* If SORT = 'N', SDIM = 0. */ +/* If SORT = 'S', SDIM = number of eigenvalues (after sorting) */ +/* for which SELCTG is true. (Complex conjugate pairs for which */ +/* SELCTG is true for either eigenvalue count as 2.) */ + +/* ALPHAR (output) DOUBLE PRECISION array, dimension (N) */ +/* ALPHAI (output) DOUBLE PRECISION array, dimension (N) */ +/* BETA (output) DOUBLE PRECISION array, dimension (N) */ +/* On exit, (ALPHAR(j) + ALPHAI(j)*i)/BETA(j), j=1,...,N, will */ +/* be the generalized eigenvalues. ALPHAR(j) + ALPHAI(j)*i, */ +/* and BETA(j),j=1,...,N are the diagonals of the complex Schur */ +/* form (S,T) that would result if the 2-by-2 diagonal blocks of */ +/* the real Schur form of (A,B) were further reduced to */ +/* triangular form using 2-by-2 complex unitary transformations. */ +/* If ALPHAI(j) is zero, then the j-th eigenvalue is real; if */ +/* positive, then the j-th and (j+1)-st eigenvalues are a */ +/* complex conjugate pair, with ALPHAI(j+1) negative. */ + +/* Note: the quotients ALPHAR(j)/BETA(j) and ALPHAI(j)/BETA(j) */ +/* may easily over- or underflow, and BETA(j) may even be zero. */ +/* Thus, the user should avoid naively computing the ratio. */ +/* However, ALPHAR and ALPHAI will be always less than and */ +/* usually comparable with norm(A) in magnitude, and BETA always */ +/* less than and usually comparable with norm(B). */ + +/* VSL (output) DOUBLE PRECISION array, dimension (LDVSL,N) */ +/* If JOBVSL = 'V', VSL will contain the left Schur vectors. */ +/* Not referenced if JOBVSL = 'N'. */ + +/* LDVSL (input) INTEGER */ +/* The leading dimension of the matrix VSL. LDVSL >=1, and */ +/* if JOBVSL = 'V', LDVSL >= N. */ + +/* VSR (output) DOUBLE PRECISION array, dimension (LDVSR,N) */ +/* If JOBVSR = 'V', VSR will contain the right Schur vectors. */ +/* Not referenced if JOBVSR = 'N'. */ + +/* LDVSR (input) INTEGER */ +/* The leading dimension of the matrix VSR. LDVSR >= 1, and */ +/* if JOBVSR = 'V', LDVSR >= N. */ + +/* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ + +/* LWORK (input) INTEGER */ +/* The dimension of the array WORK. */ +/* If N = 0, LWORK >= 1, else LWORK >= 8*N+16. */ +/* For good performance , LWORK must generally be larger. */ + +/* If LWORK = -1, then a workspace query is assumed; the routine */ +/* only calculates the optimal size of the WORK array, returns */ +/* this value as the first entry of the WORK array, and no error */ +/* message related to LWORK is issued by XERBLA. */ + +/* BWORK (workspace) LOGICAL array, dimension (N) */ +/* Not referenced if SORT = 'N'. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* = 1,...,N: */ +/* The QZ iteration failed. (A,B) are not in Schur */ +/* form, but ALPHAR(j), ALPHAI(j), and BETA(j) should */ +/* be correct for j=INFO+1,...,N. */ +/* > N: =N+1: other than QZ iteration failed in DHGEQZ. */ +/* =N+2: after reordering, roundoff changed values of */ +/* some complex eigenvalues so that leading */ +/* eigenvalues in the Generalized Schur form no */ +/* longer satisfy SELCTG=.TRUE. This could also */ +/* be caused due to scaling. */ +/* =N+3: reordering failed in DTGSEN. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Decode the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + --alphar; + --alphai; + --beta; + vsl_dim1 = *ldvsl; + vsl_offset = 1 + vsl_dim1; + vsl -= vsl_offset; + vsr_dim1 = *ldvsr; + vsr_offset = 1 + vsr_dim1; + vsr -= vsr_offset; + --work; + --bwork; + + /* Function Body */ + if (_starpu_lsame_(jobvsl, "N")) { + ijobvl = 1; + ilvsl = FALSE_; + } else if (_starpu_lsame_(jobvsl, "V")) { + ijobvl = 2; + ilvsl = TRUE_; + } else { + ijobvl = -1; + ilvsl = FALSE_; + } + + if (_starpu_lsame_(jobvsr, "N")) { + ijobvr = 1; + ilvsr = FALSE_; + } else if (_starpu_lsame_(jobvsr, "V")) { + ijobvr = 2; + ilvsr = TRUE_; + } else { + ijobvr = -1; + ilvsr = FALSE_; + } + + wantst = _starpu_lsame_(sort, "S"); + +/* Test the input arguments */ + + *info = 0; + lquery = *lwork == -1; + if (ijobvl <= 0) { + *info = -1; + } else if (ijobvr <= 0) { + *info = -2; + } else if (! wantst && ! _starpu_lsame_(sort, "N")) { + *info = -3; + } else if (*n < 0) { + *info = -5; + } else if (*lda < max(1,*n)) { + *info = -7; + } else if (*ldb < max(1,*n)) { + *info = -9; + } else if (*ldvsl < 1 || ilvsl && *ldvsl < *n) { + *info = -15; + } else if (*ldvsr < 1 || ilvsr && *ldvsr < *n) { + *info = -17; + } + +/* Compute workspace */ +/* (Note: Comments in the code beginning "Workspace:" describe the */ +/* minimal amount of workspace needed at that point in the code, */ +/* as well as the preferred amount for good performance. */ +/* NB refers to the optimal block size for the immediately */ +/* following subroutine, as returned by ILAENV.) */ + + if (*info == 0) { + if (*n > 0) { +/* Computing MAX */ + i__1 = *n << 3, i__2 = *n * 6 + 16; + minwrk = max(i__1,i__2); + maxwrk = minwrk - *n + *n * _starpu_ilaenv_(&c__1, "DGEQRF", " ", n, & + c__1, n, &c__0); +/* Computing MAX */ + i__1 = maxwrk, i__2 = minwrk - *n + *n * _starpu_ilaenv_(&c__1, "DORMQR", + " ", n, &c__1, n, &c_n1); + maxwrk = max(i__1,i__2); + if (ilvsl) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = minwrk - *n + *n * _starpu_ilaenv_(&c__1, "DOR" + "GQR", " ", n, &c__1, n, &c_n1); + maxwrk = max(i__1,i__2); + } + } else { + minwrk = 1; + maxwrk = 1; + } + work[1] = (doublereal) maxwrk; + + if (*lwork < minwrk && ! lquery) { + *info = -19; + } + } + + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DGGES ", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + *sdim = 0; + return 0; + } + +/* Get machine constants */ + + eps = _starpu_dlamch_("P"); + safmin = _starpu_dlamch_("S"); + safmax = 1. / safmin; + _starpu_dlabad_(&safmin, &safmax); + smlnum = sqrt(safmin) / eps; + bignum = 1. / smlnum; + +/* Scale A if max element outside range [SMLNUM,BIGNUM] */ + + anrm = _starpu_dlange_("M", n, n, &a[a_offset], lda, &work[1]); + ilascl = FALSE_; + if (anrm > 0. && anrm < smlnum) { + anrmto = smlnum; + ilascl = TRUE_; + } else if (anrm > bignum) { + anrmto = bignum; + ilascl = TRUE_; + } + if (ilascl) { + _starpu_dlascl_("G", &c__0, &c__0, &anrm, &anrmto, n, n, &a[a_offset], lda, & + ierr); + } + +/* Scale B if max element outside range [SMLNUM,BIGNUM] */ + + bnrm = _starpu_dlange_("M", n, n, &b[b_offset], ldb, &work[1]); + ilbscl = FALSE_; + if (bnrm > 0. && bnrm < smlnum) { + bnrmto = smlnum; + ilbscl = TRUE_; + } else if (bnrm > bignum) { + bnrmto = bignum; + ilbscl = TRUE_; + } + if (ilbscl) { + _starpu_dlascl_("G", &c__0, &c__0, &bnrm, &bnrmto, n, n, &b[b_offset], ldb, & + ierr); + } + +/* Permute the matrix to make it more nearly triangular */ +/* (Workspace: need 6*N + 2*N space for storing balancing factors) */ + + ileft = 1; + iright = *n + 1; + iwrk = iright + *n; + _starpu_dggbal_("P", n, &a[a_offset], lda, &b[b_offset], ldb, &ilo, &ihi, &work[ + ileft], &work[iright], &work[iwrk], &ierr); + +/* Reduce B to triangular form (QR decomposition of B) */ +/* (Workspace: need N, prefer N*NB) */ + + irows = ihi + 1 - ilo; + icols = *n + 1 - ilo; + itau = iwrk; + iwrk = itau + irows; + i__1 = *lwork + 1 - iwrk; + _starpu_dgeqrf_(&irows, &icols, &b[ilo + ilo * b_dim1], ldb, &work[itau], &work[ + iwrk], &i__1, &ierr); + +/* Apply the orthogonal transformation to matrix A */ +/* (Workspace: need N, prefer N*NB) */ + + i__1 = *lwork + 1 - iwrk; + _starpu_dormqr_("L", "T", &irows, &icols, &irows, &b[ilo + ilo * b_dim1], ldb, & + work[itau], &a[ilo + ilo * a_dim1], lda, &work[iwrk], &i__1, & + ierr); + +/* Initialize VSL */ +/* (Workspace: need N, prefer N*NB) */ + + if (ilvsl) { + _starpu_dlaset_("Full", n, n, &c_b38, &c_b39, &vsl[vsl_offset], ldvsl); + if (irows > 1) { + i__1 = irows - 1; + i__2 = irows - 1; + _starpu_dlacpy_("L", &i__1, &i__2, &b[ilo + 1 + ilo * b_dim1], ldb, &vsl[ + ilo + 1 + ilo * vsl_dim1], ldvsl); + } + i__1 = *lwork + 1 - iwrk; + _starpu_dorgqr_(&irows, &irows, &irows, &vsl[ilo + ilo * vsl_dim1], ldvsl, & + work[itau], &work[iwrk], &i__1, &ierr); + } + +/* Initialize VSR */ + + if (ilvsr) { + _starpu_dlaset_("Full", n, n, &c_b38, &c_b39, &vsr[vsr_offset], ldvsr); + } + +/* Reduce to generalized Hessenberg form */ +/* (Workspace: none needed) */ + + _starpu_dgghrd_(jobvsl, jobvsr, n, &ilo, &ihi, &a[a_offset], lda, &b[b_offset], + ldb, &vsl[vsl_offset], ldvsl, &vsr[vsr_offset], ldvsr, &ierr); + +/* Perform QZ algorithm, computing Schur vectors if desired */ +/* (Workspace: need N) */ + + iwrk = itau; + i__1 = *lwork + 1 - iwrk; + _starpu_dhgeqz_("S", jobvsl, jobvsr, n, &ilo, &ihi, &a[a_offset], lda, &b[ + b_offset], ldb, &alphar[1], &alphai[1], &beta[1], &vsl[vsl_offset] +, ldvsl, &vsr[vsr_offset], ldvsr, &work[iwrk], &i__1, &ierr); + if (ierr != 0) { + if (ierr > 0 && ierr <= *n) { + *info = ierr; + } else if (ierr > *n && ierr <= *n << 1) { + *info = ierr - *n; + } else { + *info = *n + 1; + } + goto L50; + } + +/* Sort eigenvalues ALPHA/BETA if desired */ +/* (Workspace: need 4*N+16 ) */ + + *sdim = 0; + if (wantst) { + +/* Undo scaling on eigenvalues before SELCTGing */ + + if (ilascl) { + _starpu_dlascl_("G", &c__0, &c__0, &anrmto, &anrm, n, &c__1, &alphar[1], + n, &ierr); + _starpu_dlascl_("G", &c__0, &c__0, &anrmto, &anrm, n, &c__1, &alphai[1], + n, &ierr); + } + if (ilbscl) { + _starpu_dlascl_("G", &c__0, &c__0, &bnrmto, &bnrm, n, &c__1, &beta[1], n, + &ierr); + } + +/* Select eigenvalues */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + bwork[i__] = (*selctg)(&alphar[i__], &alphai[i__], &beta[i__]); +/* L10: */ + } + + i__1 = *lwork - iwrk + 1; + _starpu_dtgsen_(&c__0, &ilvsl, &ilvsr, &bwork[1], n, &a[a_offset], lda, &b[ + b_offset], ldb, &alphar[1], &alphai[1], &beta[1], &vsl[ + vsl_offset], ldvsl, &vsr[vsr_offset], ldvsr, sdim, &pvsl, & + pvsr, dif, &work[iwrk], &i__1, idum, &c__1, &ierr); + if (ierr == 1) { + *info = *n + 3; + } + + } + +/* Apply back-permutation to VSL and VSR */ +/* (Workspace: none needed) */ + + if (ilvsl) { + _starpu_dggbak_("P", "L", n, &ilo, &ihi, &work[ileft], &work[iright], n, &vsl[ + vsl_offset], ldvsl, &ierr); + } + + if (ilvsr) { + _starpu_dggbak_("P", "R", n, &ilo, &ihi, &work[ileft], &work[iright], n, &vsr[ + vsr_offset], ldvsr, &ierr); + } + +/* Check if unscaling would cause over/underflow, if so, rescale */ +/* (ALPHAR(I),ALPHAI(I),BETA(I)) so BETA(I) is on the order of */ +/* B(I,I) and ALPHAR(I) and ALPHAI(I) are on the order of A(I,I) */ + + if (ilascl) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + if (alphai[i__] != 0.) { + if (alphar[i__] / safmax > anrmto / anrm || safmin / alphar[ + i__] > anrm / anrmto) { + work[1] = (d__1 = a[i__ + i__ * a_dim1] / alphar[i__], + abs(d__1)); + beta[i__] *= work[1]; + alphar[i__] *= work[1]; + alphai[i__] *= work[1]; + } else if (alphai[i__] / safmax > anrmto / anrm || safmin / + alphai[i__] > anrm / anrmto) { + work[1] = (d__1 = a[i__ + (i__ + 1) * a_dim1] / alphai[ + i__], abs(d__1)); + beta[i__] *= work[1]; + alphar[i__] *= work[1]; + alphai[i__] *= work[1]; + } + } +/* L20: */ + } + } + + if (ilbscl) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + if (alphai[i__] != 0.) { + if (beta[i__] / safmax > bnrmto / bnrm || safmin / beta[i__] + > bnrm / bnrmto) { + work[1] = (d__1 = b[i__ + i__ * b_dim1] / beta[i__], abs( + d__1)); + beta[i__] *= work[1]; + alphar[i__] *= work[1]; + alphai[i__] *= work[1]; + } + } +/* L30: */ + } + } + +/* Undo scaling */ + + if (ilascl) { + _starpu_dlascl_("H", &c__0, &c__0, &anrmto, &anrm, n, n, &a[a_offset], lda, & + ierr); + _starpu_dlascl_("G", &c__0, &c__0, &anrmto, &anrm, n, &c__1, &alphar[1], n, & + ierr); + _starpu_dlascl_("G", &c__0, &c__0, &anrmto, &anrm, n, &c__1, &alphai[1], n, & + ierr); + } + + if (ilbscl) { + _starpu_dlascl_("U", &c__0, &c__0, &bnrmto, &bnrm, n, n, &b[b_offset], ldb, & + ierr); + _starpu_dlascl_("G", &c__0, &c__0, &bnrmto, &bnrm, n, &c__1, &beta[1], n, & + ierr); + } + + if (wantst) { + +/* Check if reordering is correct */ + + lastsl = TRUE_; + lst2sl = TRUE_; + *sdim = 0; + ip = 0; + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + cursl = (*selctg)(&alphar[i__], &alphai[i__], &beta[i__]); + if (alphai[i__] == 0.) { + if (cursl) { + ++(*sdim); + } + ip = 0; + if (cursl && ! lastsl) { + *info = *n + 2; + } + } else { + if (ip == 1) { + +/* Last eigenvalue of conjugate pair */ + + cursl = cursl || lastsl; + lastsl = cursl; + if (cursl) { + *sdim += 2; + } + ip = -1; + if (cursl && ! lst2sl) { + *info = *n + 2; + } + } else { + +/* First eigenvalue of conjugate pair */ + + ip = 1; + } + } + lst2sl = lastsl; + lastsl = cursl; +/* L40: */ + } + + } + +L50: + + work[1] = (doublereal) maxwrk; + + return 0; + +/* End of DGGES */ + +} /* _starpu_dgges_ */ diff --git a/min-dgels/base/SRC/dggesx.c b/min-dgels/base/SRC/dggesx.c new file mode 100644 index 0000000..9028c9a --- /dev/null +++ b/min-dgels/base/SRC/dggesx.c @@ -0,0 +1,818 @@ +/* dggesx.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c__0 = 0; +static integer c_n1 = -1; +static doublereal c_b42 = 0.; +static doublereal c_b43 = 1.; + +/* Subroutine */ int _starpu_dggesx_(char *jobvsl, char *jobvsr, char *sort, L_fp + selctg, char *sense, integer *n, doublereal *a, integer *lda, + doublereal *b, integer *ldb, integer *sdim, doublereal *alphar, + doublereal *alphai, doublereal *beta, doublereal *vsl, integer *ldvsl, + doublereal *vsr, integer *ldvsr, doublereal *rconde, doublereal * + rcondv, doublereal *work, integer *lwork, integer *iwork, integer * + liwork, logical *bwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, vsl_dim1, vsl_offset, + vsr_dim1, vsr_offset, i__1, i__2; + doublereal d__1; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + integer i__, ip; + doublereal pl, pr, dif[2]; + integer ihi, ilo; + doublereal eps; + integer ijob; + doublereal anrm, bnrm; + integer ierr, itau, iwrk, lwrk; + extern logical _starpu_lsame_(char *, char *); + integer ileft, icols; + logical cursl, ilvsl, ilvsr; + integer irows; + extern /* Subroutine */ int _starpu_dlabad_(doublereal *, doublereal *), _starpu_dggbak_( + char *, char *, integer *, integer *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, integer *), _starpu_dggbal_(char *, integer *, doublereal *, integer + *, doublereal *, integer *, integer *, integer *, doublereal *, + doublereal *, doublereal *, integer *); + logical lst2sl; + extern doublereal _starpu_dlamch_(char *), _starpu_dlange_(char *, integer *, + integer *, doublereal *, integer *, doublereal *); + extern /* Subroutine */ int _starpu_dgghrd_(char *, char *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, integer *, integer *), _starpu_dlascl_(char *, integer *, integer *, doublereal + *, doublereal *, integer *, integer *, doublereal *, integer *, + integer *); + logical ilascl, ilbscl; + extern /* Subroutine */ int _starpu_dgeqrf_(integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, integer *), + _starpu_dlacpy_(char *, integer *, integer *, doublereal *, integer *, + doublereal *, integer *); + doublereal safmin; + extern /* Subroutine */ int _starpu_dlaset_(char *, integer *, integer *, + doublereal *, doublereal *, doublereal *, integer *); + doublereal safmax; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + doublereal bignum; + extern /* Subroutine */ int _starpu_dhgeqz_(char *, char *, char *, integer *, + integer *, integer *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, doublereal *, doublereal *, + integer *, doublereal *, integer *, doublereal *, integer *, + integer *); + integer ijobvl, iright; + extern /* Subroutine */ int _starpu_dtgsen_(integer *, logical *, logical *, + logical *, integer *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *, doublereal *, + doublereal *, doublereal *, doublereal *, integer *, integer *, + integer *, integer *); + extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *); + integer ijobvr; + logical wantsb; + integer liwmin; + logical wantse, lastsl; + doublereal anrmto, bnrmto; + extern /* Subroutine */ int _starpu_dorgqr_(integer *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + integer *); + integer minwrk, maxwrk; + logical wantsn; + doublereal smlnum; + extern /* Subroutine */ int _starpu_dormqr_(char *, char *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *); + logical wantst, lquery, wantsv; + + +/* -- LAPACK driver routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ +/* .. Function Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DGGESX computes for a pair of N-by-N real nonsymmetric matrices */ +/* (A,B), the generalized eigenvalues, the real Schur form (S,T), and, */ +/* optionally, the left and/or right matrices of Schur vectors (VSL and */ +/* VSR). This gives the generalized Schur factorization */ + +/* (A,B) = ( (VSL) S (VSR)**T, (VSL) T (VSR)**T ) */ + +/* Optionally, it also orders the eigenvalues so that a selected cluster */ +/* of eigenvalues appears in the leading diagonal blocks of the upper */ +/* quasi-triangular matrix S and the upper triangular matrix T; computes */ +/* a reciprocal condition number for the average of the selected */ +/* eigenvalues (RCONDE); and computes a reciprocal condition number for */ +/* the right and left deflating subspaces corresponding to the selected */ +/* eigenvalues (RCONDV). The leading columns of VSL and VSR then form */ +/* an orthonormal basis for the corresponding left and right eigenspaces */ +/* (deflating subspaces). */ + +/* A generalized eigenvalue for a pair of matrices (A,B) is a scalar w */ +/* or a ratio alpha/beta = w, such that A - w*B is singular. It is */ +/* usually represented as the pair (alpha,beta), as there is a */ +/* reasonable interpretation for beta=0 or for both being zero. */ + +/* A pair of matrices (S,T) is in generalized real Schur form if T is */ +/* upper triangular with non-negative diagonal and S is block upper */ +/* triangular with 1-by-1 and 2-by-2 blocks. 1-by-1 blocks correspond */ +/* to real generalized eigenvalues, while 2-by-2 blocks of S will be */ +/* "standardized" by making the corresponding elements of T have the */ +/* form: */ +/* [ a 0 ] */ +/* [ 0 b ] */ + +/* and the pair of corresponding 2-by-2 blocks in S and T will have a */ +/* complex conjugate pair of generalized eigenvalues. */ + + +/* Arguments */ +/* ========= */ + +/* JOBVSL (input) CHARACTER*1 */ +/* = 'N': do not compute the left Schur vectors; */ +/* = 'V': compute the left Schur vectors. */ + +/* JOBVSR (input) CHARACTER*1 */ +/* = 'N': do not compute the right Schur vectors; */ +/* = 'V': compute the right Schur vectors. */ + +/* SORT (input) CHARACTER*1 */ +/* Specifies whether or not to order the eigenvalues on the */ +/* diagonal of the generalized Schur form. */ +/* = 'N': Eigenvalues are not ordered; */ +/* = 'S': Eigenvalues are ordered (see SELCTG). */ + +/* SELCTG (external procedure) LOGICAL FUNCTION of three DOUBLE PRECISION arguments */ +/* SELCTG must be declared EXTERNAL in the calling subroutine. */ +/* If SORT = 'N', SELCTG is not referenced. */ +/* If SORT = 'S', SELCTG is used to select eigenvalues to sort */ +/* to the top left of the Schur form. */ +/* An eigenvalue (ALPHAR(j)+ALPHAI(j))/BETA(j) is selected if */ +/* SELCTG(ALPHAR(j),ALPHAI(j),BETA(j)) is true; i.e. if either */ +/* one of a complex conjugate pair of eigenvalues is selected, */ +/* then both complex eigenvalues are selected. */ +/* Note that a selected complex eigenvalue may no longer satisfy */ +/* SELCTG(ALPHAR(j),ALPHAI(j),BETA(j)) = .TRUE. after ordering, */ +/* since ordering may change the value of complex eigenvalues */ +/* (especially if the eigenvalue is ill-conditioned), in this */ +/* case INFO is set to N+3. */ + +/* SENSE (input) CHARACTER*1 */ +/* Determines which reciprocal condition numbers are computed. */ +/* = 'N' : None are computed; */ +/* = 'E' : Computed for average of selected eigenvalues only; */ +/* = 'V' : Computed for selected deflating subspaces only; */ +/* = 'B' : Computed for both. */ +/* If SENSE = 'E', 'V', or 'B', SORT must equal 'S'. */ + +/* N (input) INTEGER */ +/* The order of the matrices A, B, VSL, and VSR. N >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA, N) */ +/* On entry, the first of the pair of matrices. */ +/* On exit, A has been overwritten by its generalized Schur */ +/* form S. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of A. LDA >= max(1,N). */ + +/* B (input/output) DOUBLE PRECISION array, dimension (LDB, N) */ +/* On entry, the second of the pair of matrices. */ +/* On exit, B has been overwritten by its generalized Schur */ +/* form T. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of B. LDB >= max(1,N). */ + +/* SDIM (output) INTEGER */ +/* If SORT = 'N', SDIM = 0. */ +/* If SORT = 'S', SDIM = number of eigenvalues (after sorting) */ +/* for which SELCTG is true. (Complex conjugate pairs for which */ +/* SELCTG is true for either eigenvalue count as 2.) */ + +/* ALPHAR (output) DOUBLE PRECISION array, dimension (N) */ +/* ALPHAI (output) DOUBLE PRECISION array, dimension (N) */ +/* BETA (output) DOUBLE PRECISION array, dimension (N) */ +/* On exit, (ALPHAR(j) + ALPHAI(j)*i)/BETA(j), j=1,...,N, will */ +/* be the generalized eigenvalues. ALPHAR(j) + ALPHAI(j)*i */ +/* and BETA(j),j=1,...,N are the diagonals of the complex Schur */ +/* form (S,T) that would result if the 2-by-2 diagonal blocks of */ +/* the real Schur form of (A,B) were further reduced to */ +/* triangular form using 2-by-2 complex unitary transformations. */ +/* If ALPHAI(j) is zero, then the j-th eigenvalue is real; if */ +/* positive, then the j-th and (j+1)-st eigenvalues are a */ +/* complex conjugate pair, with ALPHAI(j+1) negative. */ + +/* Note: the quotients ALPHAR(j)/BETA(j) and ALPHAI(j)/BETA(j) */ +/* may easily over- or underflow, and BETA(j) may even be zero. */ +/* Thus, the user should avoid naively computing the ratio. */ +/* However, ALPHAR and ALPHAI will be always less than and */ +/* usually comparable with norm(A) in magnitude, and BETA always */ +/* less than and usually comparable with norm(B). */ + +/* VSL (output) DOUBLE PRECISION array, dimension (LDVSL,N) */ +/* If JOBVSL = 'V', VSL will contain the left Schur vectors. */ +/* Not referenced if JOBVSL = 'N'. */ + +/* LDVSL (input) INTEGER */ +/* The leading dimension of the matrix VSL. LDVSL >=1, and */ +/* if JOBVSL = 'V', LDVSL >= N. */ + +/* VSR (output) DOUBLE PRECISION array, dimension (LDVSR,N) */ +/* If JOBVSR = 'V', VSR will contain the right Schur vectors. */ +/* Not referenced if JOBVSR = 'N'. */ + +/* LDVSR (input) INTEGER */ +/* The leading dimension of the matrix VSR. LDVSR >= 1, and */ +/* if JOBVSR = 'V', LDVSR >= N. */ + +/* RCONDE (output) DOUBLE PRECISION array, dimension ( 2 ) */ +/* If SENSE = 'E' or 'B', RCONDE(1) and RCONDE(2) contain the */ +/* reciprocal condition numbers for the average of the selected */ +/* eigenvalues. */ +/* Not referenced if SENSE = 'N' or 'V'. */ + +/* RCONDV (output) DOUBLE PRECISION array, dimension ( 2 ) */ +/* If SENSE = 'V' or 'B', RCONDV(1) and RCONDV(2) contain the */ +/* reciprocal condition numbers for the selected deflating */ +/* subspaces. */ +/* Not referenced if SENSE = 'N' or 'E'. */ + +/* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ + +/* LWORK (input) INTEGER */ +/* The dimension of the array WORK. */ +/* If N = 0, LWORK >= 1, else if SENSE = 'E', 'V', or 'B', */ +/* LWORK >= max( 8*N, 6*N+16, 2*SDIM*(N-SDIM) ), else */ +/* LWORK >= max( 8*N, 6*N+16 ). */ +/* Note that 2*SDIM*(N-SDIM) <= N*N/2. */ +/* Note also that an error is only returned if */ +/* LWORK < max( 8*N, 6*N+16), but if SENSE = 'E' or 'V' or 'B' */ +/* this may not be large enough. */ + +/* If LWORK = -1, then a workspace query is assumed; the routine */ +/* only calculates the bound on the optimal size of the WORK */ +/* array and the minimum size of the IWORK array, returns these */ +/* values as the first entries of the WORK and IWORK arrays, and */ +/* no error message related to LWORK or LIWORK is issued by */ +/* XERBLA. */ + +/* IWORK (workspace) INTEGER array, dimension (MAX(1,LIWORK)) */ +/* On exit, if INFO = 0, IWORK(1) returns the minimum LIWORK. */ + +/* LIWORK (input) INTEGER */ +/* The dimension of the array IWORK. */ +/* If SENSE = 'N' or N = 0, LIWORK >= 1, otherwise */ +/* LIWORK >= N+6. */ + +/* If LIWORK = -1, then a workspace query is assumed; the */ +/* routine only calculates the bound on the optimal size of the */ +/* WORK array and the minimum size of the IWORK array, returns */ +/* these values as the first entries of the WORK and IWORK */ +/* arrays, and no error message related to LWORK or LIWORK is */ +/* issued by XERBLA. */ + +/* BWORK (workspace) LOGICAL array, dimension (N) */ +/* Not referenced if SORT = 'N'. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* = 1,...,N: */ +/* The QZ iteration failed. (A,B) are not in Schur */ +/* form, but ALPHAR(j), ALPHAI(j), and BETA(j) should */ +/* be correct for j=INFO+1,...,N. */ +/* > N: =N+1: other than QZ iteration failed in DHGEQZ */ +/* =N+2: after reordering, roundoff changed values of */ +/* some complex eigenvalues so that leading */ +/* eigenvalues in the Generalized Schur form no */ +/* longer satisfy SELCTG=.TRUE. This could also */ +/* be caused due to scaling. */ +/* =N+3: reordering failed in DTGSEN. */ + +/* Further details */ +/* =============== */ + +/* An approximate (asymptotic) bound on the average absolute error of */ +/* the selected eigenvalues is */ + +/* EPS * norm((A, B)) / RCONDE( 1 ). */ + +/* An approximate (asymptotic) bound on the maximum angular error in */ +/* the computed deflating subspaces is */ + +/* EPS * norm((A, B)) / RCONDV( 2 ). */ + +/* See LAPACK User's Guide, section 4.11 for more information. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Decode the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + --alphar; + --alphai; + --beta; + vsl_dim1 = *ldvsl; + vsl_offset = 1 + vsl_dim1; + vsl -= vsl_offset; + vsr_dim1 = *ldvsr; + vsr_offset = 1 + vsr_dim1; + vsr -= vsr_offset; + --rconde; + --rcondv; + --work; + --iwork; + --bwork; + + /* Function Body */ + if (_starpu_lsame_(jobvsl, "N")) { + ijobvl = 1; + ilvsl = FALSE_; + } else if (_starpu_lsame_(jobvsl, "V")) { + ijobvl = 2; + ilvsl = TRUE_; + } else { + ijobvl = -1; + ilvsl = FALSE_; + } + + if (_starpu_lsame_(jobvsr, "N")) { + ijobvr = 1; + ilvsr = FALSE_; + } else if (_starpu_lsame_(jobvsr, "V")) { + ijobvr = 2; + ilvsr = TRUE_; + } else { + ijobvr = -1; + ilvsr = FALSE_; + } + + wantst = _starpu_lsame_(sort, "S"); + wantsn = _starpu_lsame_(sense, "N"); + wantse = _starpu_lsame_(sense, "E"); + wantsv = _starpu_lsame_(sense, "V"); + wantsb = _starpu_lsame_(sense, "B"); + lquery = *lwork == -1 || *liwork == -1; + if (wantsn) { + ijob = 0; + } else if (wantse) { + ijob = 1; + } else if (wantsv) { + ijob = 2; + } else if (wantsb) { + ijob = 4; + } + +/* Test the input arguments */ + + *info = 0; + if (ijobvl <= 0) { + *info = -1; + } else if (ijobvr <= 0) { + *info = -2; + } else if (! wantst && ! _starpu_lsame_(sort, "N")) { + *info = -3; + } else if (! (wantsn || wantse || wantsv || wantsb) || ! wantst && ! + wantsn) { + *info = -5; + } else if (*n < 0) { + *info = -6; + } else if (*lda < max(1,*n)) { + *info = -8; + } else if (*ldb < max(1,*n)) { + *info = -10; + } else if (*ldvsl < 1 || ilvsl && *ldvsl < *n) { + *info = -16; + } else if (*ldvsr < 1 || ilvsr && *ldvsr < *n) { + *info = -18; + } + +/* Compute workspace */ +/* (Note: Comments in the code beginning "Workspace:" describe the */ +/* minimal amount of workspace needed at that point in the code, */ +/* as well as the preferred amount for good performance. */ +/* NB refers to the optimal block size for the immediately */ +/* following subroutine, as returned by ILAENV.) */ + + if (*info == 0) { + if (*n > 0) { +/* Computing MAX */ + i__1 = *n << 3, i__2 = *n * 6 + 16; + minwrk = max(i__1,i__2); + maxwrk = minwrk - *n + *n * _starpu_ilaenv_(&c__1, "DGEQRF", " ", n, & + c__1, n, &c__0); +/* Computing MAX */ + i__1 = maxwrk, i__2 = minwrk - *n + *n * _starpu_ilaenv_(&c__1, "DORMQR", + " ", n, &c__1, n, &c_n1); + maxwrk = max(i__1,i__2); + if (ilvsl) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = minwrk - *n + *n * _starpu_ilaenv_(&c__1, "DOR" + "GQR", " ", n, &c__1, n, &c_n1); + maxwrk = max(i__1,i__2); + } + lwrk = maxwrk; + if (ijob >= 1) { +/* Computing MAX */ + i__1 = lwrk, i__2 = *n * *n / 2; + lwrk = max(i__1,i__2); + } + } else { + minwrk = 1; + maxwrk = 1; + lwrk = 1; + } + work[1] = (doublereal) lwrk; + if (wantsn || *n == 0) { + liwmin = 1; + } else { + liwmin = *n + 6; + } + iwork[1] = liwmin; + + if (*lwork < minwrk && ! lquery) { + *info = -22; + } else if (*liwork < liwmin && ! lquery) { + *info = -24; + } + } + + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DGGESX", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + *sdim = 0; + return 0; + } + +/* Get machine constants */ + + eps = _starpu_dlamch_("P"); + safmin = _starpu_dlamch_("S"); + safmax = 1. / safmin; + _starpu_dlabad_(&safmin, &safmax); + smlnum = sqrt(safmin) / eps; + bignum = 1. / smlnum; + +/* Scale A if max element outside range [SMLNUM,BIGNUM] */ + + anrm = _starpu_dlange_("M", n, n, &a[a_offset], lda, &work[1]); + ilascl = FALSE_; + if (anrm > 0. && anrm < smlnum) { + anrmto = smlnum; + ilascl = TRUE_; + } else if (anrm > bignum) { + anrmto = bignum; + ilascl = TRUE_; + } + if (ilascl) { + _starpu_dlascl_("G", &c__0, &c__0, &anrm, &anrmto, n, n, &a[a_offset], lda, & + ierr); + } + +/* Scale B if max element outside range [SMLNUM,BIGNUM] */ + + bnrm = _starpu_dlange_("M", n, n, &b[b_offset], ldb, &work[1]); + ilbscl = FALSE_; + if (bnrm > 0. && bnrm < smlnum) { + bnrmto = smlnum; + ilbscl = TRUE_; + } else if (bnrm > bignum) { + bnrmto = bignum; + ilbscl = TRUE_; + } + if (ilbscl) { + _starpu_dlascl_("G", &c__0, &c__0, &bnrm, &bnrmto, n, n, &b[b_offset], ldb, & + ierr); + } + +/* Permute the matrix to make it more nearly triangular */ +/* (Workspace: need 6*N + 2*N for permutation parameters) */ + + ileft = 1; + iright = *n + 1; + iwrk = iright + *n; + _starpu_dggbal_("P", n, &a[a_offset], lda, &b[b_offset], ldb, &ilo, &ihi, &work[ + ileft], &work[iright], &work[iwrk], &ierr); + +/* Reduce B to triangular form (QR decomposition of B) */ +/* (Workspace: need N, prefer N*NB) */ + + irows = ihi + 1 - ilo; + icols = *n + 1 - ilo; + itau = iwrk; + iwrk = itau + irows; + i__1 = *lwork + 1 - iwrk; + _starpu_dgeqrf_(&irows, &icols, &b[ilo + ilo * b_dim1], ldb, &work[itau], &work[ + iwrk], &i__1, &ierr); + +/* Apply the orthogonal transformation to matrix A */ +/* (Workspace: need N, prefer N*NB) */ + + i__1 = *lwork + 1 - iwrk; + _starpu_dormqr_("L", "T", &irows, &icols, &irows, &b[ilo + ilo * b_dim1], ldb, & + work[itau], &a[ilo + ilo * a_dim1], lda, &work[iwrk], &i__1, & + ierr); + +/* Initialize VSL */ +/* (Workspace: need N, prefer N*NB) */ + + if (ilvsl) { + _starpu_dlaset_("Full", n, n, &c_b42, &c_b43, &vsl[vsl_offset], ldvsl); + if (irows > 1) { + i__1 = irows - 1; + i__2 = irows - 1; + _starpu_dlacpy_("L", &i__1, &i__2, &b[ilo + 1 + ilo * b_dim1], ldb, &vsl[ + ilo + 1 + ilo * vsl_dim1], ldvsl); + } + i__1 = *lwork + 1 - iwrk; + _starpu_dorgqr_(&irows, &irows, &irows, &vsl[ilo + ilo * vsl_dim1], ldvsl, & + work[itau], &work[iwrk], &i__1, &ierr); + } + +/* Initialize VSR */ + + if (ilvsr) { + _starpu_dlaset_("Full", n, n, &c_b42, &c_b43, &vsr[vsr_offset], ldvsr); + } + +/* Reduce to generalized Hessenberg form */ +/* (Workspace: none needed) */ + + _starpu_dgghrd_(jobvsl, jobvsr, n, &ilo, &ihi, &a[a_offset], lda, &b[b_offset], + ldb, &vsl[vsl_offset], ldvsl, &vsr[vsr_offset], ldvsr, &ierr); + + *sdim = 0; + +/* Perform QZ algorithm, computing Schur vectors if desired */ +/* (Workspace: need N) */ + + iwrk = itau; + i__1 = *lwork + 1 - iwrk; + _starpu_dhgeqz_("S", jobvsl, jobvsr, n, &ilo, &ihi, &a[a_offset], lda, &b[ + b_offset], ldb, &alphar[1], &alphai[1], &beta[1], &vsl[vsl_offset] +, ldvsl, &vsr[vsr_offset], ldvsr, &work[iwrk], &i__1, &ierr); + if (ierr != 0) { + if (ierr > 0 && ierr <= *n) { + *info = ierr; + } else if (ierr > *n && ierr <= *n << 1) { + *info = ierr - *n; + } else { + *info = *n + 1; + } + goto L60; + } + +/* Sort eigenvalues ALPHA/BETA and compute the reciprocal of */ +/* condition number(s) */ +/* (Workspace: If IJOB >= 1, need MAX( 8*(N+1), 2*SDIM*(N-SDIM) ) */ +/* otherwise, need 8*(N+1) ) */ + + if (wantst) { + +/* Undo scaling on eigenvalues before SELCTGing */ + + if (ilascl) { + _starpu_dlascl_("G", &c__0, &c__0, &anrmto, &anrm, n, &c__1, &alphar[1], + n, &ierr); + _starpu_dlascl_("G", &c__0, &c__0, &anrmto, &anrm, n, &c__1, &alphai[1], + n, &ierr); + } + if (ilbscl) { + _starpu_dlascl_("G", &c__0, &c__0, &bnrmto, &bnrm, n, &c__1, &beta[1], n, + &ierr); + } + +/* Select eigenvalues */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + bwork[i__] = (*selctg)(&alphar[i__], &alphai[i__], &beta[i__]); +/* L10: */ + } + +/* Reorder eigenvalues, transform Generalized Schur vectors, and */ +/* compute reciprocal condition numbers */ + + i__1 = *lwork - iwrk + 1; + _starpu_dtgsen_(&ijob, &ilvsl, &ilvsr, &bwork[1], n, &a[a_offset], lda, &b[ + b_offset], ldb, &alphar[1], &alphai[1], &beta[1], &vsl[ + vsl_offset], ldvsl, &vsr[vsr_offset], ldvsr, sdim, &pl, &pr, + dif, &work[iwrk], &i__1, &iwork[1], liwork, &ierr); + + if (ijob >= 1) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*sdim << 1) * (*n - *sdim); + maxwrk = max(i__1,i__2); + } + if (ierr == -22) { + +/* not enough real workspace */ + + *info = -22; + } else { + if (ijob == 1 || ijob == 4) { + rconde[1] = pl; + rconde[2] = pr; + } + if (ijob == 2 || ijob == 4) { + rcondv[1] = dif[0]; + rcondv[2] = dif[1]; + } + if (ierr == 1) { + *info = *n + 3; + } + } + + } + +/* Apply permutation to VSL and VSR */ +/* (Workspace: none needed) */ + + if (ilvsl) { + _starpu_dggbak_("P", "L", n, &ilo, &ihi, &work[ileft], &work[iright], n, &vsl[ + vsl_offset], ldvsl, &ierr); + } + + if (ilvsr) { + _starpu_dggbak_("P", "R", n, &ilo, &ihi, &work[ileft], &work[iright], n, &vsr[ + vsr_offset], ldvsr, &ierr); + } + +/* Check if unscaling would cause over/underflow, if so, rescale */ +/* (ALPHAR(I),ALPHAI(I),BETA(I)) so BETA(I) is on the order of */ +/* B(I,I) and ALPHAR(I) and ALPHAI(I) are on the order of A(I,I) */ + + if (ilascl) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + if (alphai[i__] != 0.) { + if (alphar[i__] / safmax > anrmto / anrm || safmin / alphar[ + i__] > anrm / anrmto) { + work[1] = (d__1 = a[i__ + i__ * a_dim1] / alphar[i__], + abs(d__1)); + beta[i__] *= work[1]; + alphar[i__] *= work[1]; + alphai[i__] *= work[1]; + } else if (alphai[i__] / safmax > anrmto / anrm || safmin / + alphai[i__] > anrm / anrmto) { + work[1] = (d__1 = a[i__ + (i__ + 1) * a_dim1] / alphai[ + i__], abs(d__1)); + beta[i__] *= work[1]; + alphar[i__] *= work[1]; + alphai[i__] *= work[1]; + } + } +/* L20: */ + } + } + + if (ilbscl) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + if (alphai[i__] != 0.) { + if (beta[i__] / safmax > bnrmto / bnrm || safmin / beta[i__] + > bnrm / bnrmto) { + work[1] = (d__1 = b[i__ + i__ * b_dim1] / beta[i__], abs( + d__1)); + beta[i__] *= work[1]; + alphar[i__] *= work[1]; + alphai[i__] *= work[1]; + } + } +/* L30: */ + } + } + +/* Undo scaling */ + + if (ilascl) { + _starpu_dlascl_("H", &c__0, &c__0, &anrmto, &anrm, n, n, &a[a_offset], lda, & + ierr); + _starpu_dlascl_("G", &c__0, &c__0, &anrmto, &anrm, n, &c__1, &alphar[1], n, & + ierr); + _starpu_dlascl_("G", &c__0, &c__0, &anrmto, &anrm, n, &c__1, &alphai[1], n, & + ierr); + } + + if (ilbscl) { + _starpu_dlascl_("U", &c__0, &c__0, &bnrmto, &bnrm, n, n, &b[b_offset], ldb, & + ierr); + _starpu_dlascl_("G", &c__0, &c__0, &bnrmto, &bnrm, n, &c__1, &beta[1], n, & + ierr); + } + + if (wantst) { + +/* Check if reordering is correct */ + + lastsl = TRUE_; + lst2sl = TRUE_; + *sdim = 0; + ip = 0; + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + cursl = (*selctg)(&alphar[i__], &alphai[i__], &beta[i__]); + if (alphai[i__] == 0.) { + if (cursl) { + ++(*sdim); + } + ip = 0; + if (cursl && ! lastsl) { + *info = *n + 2; + } + } else { + if (ip == 1) { + +/* Last eigenvalue of conjugate pair */ + + cursl = cursl || lastsl; + lastsl = cursl; + if (cursl) { + *sdim += 2; + } + ip = -1; + if (cursl && ! lst2sl) { + *info = *n + 2; + } + } else { + +/* First eigenvalue of conjugate pair */ + + ip = 1; + } + } + lst2sl = lastsl; + lastsl = cursl; +/* L50: */ + } + + } + +L60: + + work[1] = (doublereal) maxwrk; + iwork[1] = liwmin; + + return 0; + +/* End of DGGESX */ + +} /* _starpu_dggesx_ */ diff --git a/min-dgels/base/SRC/dggev.c b/min-dgels/base/SRC/dggev.c new file mode 100644 index 0000000..8d2a8cf --- /dev/null +++ b/min-dgels/base/SRC/dggev.c @@ -0,0 +1,641 @@ +/* dggev.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c__0 = 0; +static integer c_n1 = -1; +static doublereal c_b36 = 0.; +static doublereal c_b37 = 1.; + +/* Subroutine */ int _starpu_dggev_(char *jobvl, char *jobvr, integer *n, doublereal * + a, integer *lda, doublereal *b, integer *ldb, doublereal *alphar, + doublereal *alphai, doublereal *beta, doublereal *vl, integer *ldvl, + doublereal *vr, integer *ldvr, doublereal *work, integer *lwork, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, vl_dim1, vl_offset, vr_dim1, + vr_offset, i__1, i__2; + doublereal d__1, d__2, d__3, d__4; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + integer jc, in, jr, ihi, ilo; + doublereal eps; + logical ilv; + doublereal anrm, bnrm; + integer ierr, itau; + doublereal temp; + logical ilvl, ilvr; + integer iwrk; + extern logical _starpu_lsame_(char *, char *); + integer ileft, icols, irows; + extern /* Subroutine */ int _starpu_dlabad_(doublereal *, doublereal *), _starpu_dggbak_( + char *, char *, integer *, integer *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, integer *), _starpu_dggbal_(char *, integer *, doublereal *, integer + *, doublereal *, integer *, integer *, integer *, doublereal *, + doublereal *, doublereal *, integer *); + extern doublereal _starpu_dlamch_(char *), _starpu_dlange_(char *, integer *, + integer *, doublereal *, integer *, doublereal *); + extern /* Subroutine */ int _starpu_dgghrd_(char *, char *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, integer *, integer *), _starpu_dlascl_(char *, integer *, integer *, doublereal + *, doublereal *, integer *, integer *, doublereal *, integer *, + integer *); + logical ilascl, ilbscl; + extern /* Subroutine */ int _starpu_dgeqrf_(integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, integer *), + _starpu_dlacpy_(char *, integer *, integer *, doublereal *, integer *, + doublereal *, integer *), _starpu_dlaset_(char *, integer *, + integer *, doublereal *, doublereal *, doublereal *, integer *), _starpu_dtgevc_(char *, char *, logical *, integer *, doublereal + *, integer *, doublereal *, integer *, doublereal *, integer *, + doublereal *, integer *, integer *, integer *, doublereal *, + integer *); + logical ldumma[1]; + char chtemp[1]; + doublereal bignum; + extern /* Subroutine */ int _starpu_dhgeqz_(char *, char *, char *, integer *, + integer *, integer *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, doublereal *, doublereal *, + integer *, doublereal *, integer *, doublereal *, integer *, + integer *), _starpu_xerbla_(char *, integer *); + extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *); + integer ijobvl, iright, ijobvr; + extern /* Subroutine */ int _starpu_dorgqr_(integer *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + integer *); + doublereal anrmto, bnrmto; + extern /* Subroutine */ int _starpu_dormqr_(char *, char *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *); + integer minwrk, maxwrk; + doublereal smlnum; + logical lquery; + + +/* -- LAPACK driver routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DGGEV computes for a pair of N-by-N real nonsymmetric matrices (A,B) */ +/* the generalized eigenvalues, and optionally, the left and/or right */ +/* generalized eigenvectors. */ + +/* A generalized eigenvalue for a pair of matrices (A,B) is a scalar */ +/* lambda or a ratio alpha/beta = lambda, such that A - lambda*B is */ +/* singular. It is usually represented as the pair (alpha,beta), as */ +/* there is a reasonable interpretation for beta=0, and even for both */ +/* being zero. */ + +/* The right eigenvector v(j) corresponding to the eigenvalue lambda(j) */ +/* of (A,B) satisfies */ + +/* A * v(j) = lambda(j) * B * v(j). */ + +/* The left eigenvector u(j) corresponding to the eigenvalue lambda(j) */ +/* of (A,B) satisfies */ + +/* u(j)**H * A = lambda(j) * u(j)**H * B . */ + +/* where u(j)**H is the conjugate-transpose of u(j). */ + + +/* Arguments */ +/* ========= */ + +/* JOBVL (input) CHARACTER*1 */ +/* = 'N': do not compute the left generalized eigenvectors; */ +/* = 'V': compute the left generalized eigenvectors. */ + +/* JOBVR (input) CHARACTER*1 */ +/* = 'N': do not compute the right generalized eigenvectors; */ +/* = 'V': compute the right generalized eigenvectors. */ + +/* N (input) INTEGER */ +/* The order of the matrices A, B, VL, and VR. N >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA, N) */ +/* On entry, the matrix A in the pair (A,B). */ +/* On exit, A has been overwritten. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of A. LDA >= max(1,N). */ + +/* B (input/output) DOUBLE PRECISION array, dimension (LDB, N) */ +/* On entry, the matrix B in the pair (A,B). */ +/* On exit, B has been overwritten. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of B. LDB >= max(1,N). */ + +/* ALPHAR (output) DOUBLE PRECISION array, dimension (N) */ +/* ALPHAI (output) DOUBLE PRECISION array, dimension (N) */ +/* BETA (output) DOUBLE PRECISION array, dimension (N) */ +/* On exit, (ALPHAR(j) + ALPHAI(j)*i)/BETA(j), j=1,...,N, will */ +/* be the generalized eigenvalues. If ALPHAI(j) is zero, then */ +/* the j-th eigenvalue is real; if positive, then the j-th and */ +/* (j+1)-st eigenvalues are a complex conjugate pair, with */ +/* ALPHAI(j+1) negative. */ + +/* Note: the quotients ALPHAR(j)/BETA(j) and ALPHAI(j)/BETA(j) */ +/* may easily over- or underflow, and BETA(j) may even be zero. */ +/* Thus, the user should avoid naively computing the ratio */ +/* alpha/beta. However, ALPHAR and ALPHAI will be always less */ +/* than and usually comparable with norm(A) in magnitude, and */ +/* BETA always less than and usually comparable with norm(B). */ + +/* VL (output) DOUBLE PRECISION array, dimension (LDVL,N) */ +/* If JOBVL = 'V', the left eigenvectors u(j) are stored one */ +/* after another in the columns of VL, in the same order as */ +/* their eigenvalues. If the j-th eigenvalue is real, then */ +/* u(j) = VL(:,j), the j-th column of VL. If the j-th and */ +/* (j+1)-th eigenvalues form a complex conjugate pair, then */ +/* u(j) = VL(:,j)+i*VL(:,j+1) and u(j+1) = VL(:,j)-i*VL(:,j+1). */ +/* Each eigenvector is scaled so the largest component has */ +/* abs(real part)+abs(imag. part)=1. */ +/* Not referenced if JOBVL = 'N'. */ + +/* LDVL (input) INTEGER */ +/* The leading dimension of the matrix VL. LDVL >= 1, and */ +/* if JOBVL = 'V', LDVL >= N. */ + +/* VR (output) DOUBLE PRECISION array, dimension (LDVR,N) */ +/* If JOBVR = 'V', the right eigenvectors v(j) are stored one */ +/* after another in the columns of VR, in the same order as */ +/* their eigenvalues. If the j-th eigenvalue is real, then */ +/* v(j) = VR(:,j), the j-th column of VR. If the j-th and */ +/* (j+1)-th eigenvalues form a complex conjugate pair, then */ +/* v(j) = VR(:,j)+i*VR(:,j+1) and v(j+1) = VR(:,j)-i*VR(:,j+1). */ +/* Each eigenvector is scaled so the largest component has */ +/* abs(real part)+abs(imag. part)=1. */ +/* Not referenced if JOBVR = 'N'. */ + +/* LDVR (input) INTEGER */ +/* The leading dimension of the matrix VR. LDVR >= 1, and */ +/* if JOBVR = 'V', LDVR >= N. */ + +/* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ + +/* LWORK (input) INTEGER */ +/* The dimension of the array WORK. LWORK >= max(1,8*N). */ +/* For good performance, LWORK must generally be larger. */ + +/* If LWORK = -1, then a workspace query is assumed; the routine */ +/* only calculates the optimal size of the WORK array, returns */ +/* this value as the first entry of the WORK array, and no error */ +/* message related to LWORK is issued by XERBLA. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* = 1,...,N: */ +/* The QZ iteration failed. No eigenvectors have been */ +/* calculated, but ALPHAR(j), ALPHAI(j), and BETA(j) */ +/* should be correct for j=INFO+1,...,N. */ +/* > N: =N+1: other than QZ iteration failed in DHGEQZ. */ +/* =N+2: error return from DTGEVC. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Decode the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + --alphar; + --alphai; + --beta; + vl_dim1 = *ldvl; + vl_offset = 1 + vl_dim1; + vl -= vl_offset; + vr_dim1 = *ldvr; + vr_offset = 1 + vr_dim1; + vr -= vr_offset; + --work; + + /* Function Body */ + if (_starpu_lsame_(jobvl, "N")) { + ijobvl = 1; + ilvl = FALSE_; + } else if (_starpu_lsame_(jobvl, "V")) { + ijobvl = 2; + ilvl = TRUE_; + } else { + ijobvl = -1; + ilvl = FALSE_; + } + + if (_starpu_lsame_(jobvr, "N")) { + ijobvr = 1; + ilvr = FALSE_; + } else if (_starpu_lsame_(jobvr, "V")) { + ijobvr = 2; + ilvr = TRUE_; + } else { + ijobvr = -1; + ilvr = FALSE_; + } + ilv = ilvl || ilvr; + +/* Test the input arguments */ + + *info = 0; + lquery = *lwork == -1; + if (ijobvl <= 0) { + *info = -1; + } else if (ijobvr <= 0) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } else if (*ldb < max(1,*n)) { + *info = -7; + } else if (*ldvl < 1 || ilvl && *ldvl < *n) { + *info = -12; + } else if (*ldvr < 1 || ilvr && *ldvr < *n) { + *info = -14; + } + +/* Compute workspace */ +/* (Note: Comments in the code beginning "Workspace:" describe the */ +/* minimal amount of workspace needed at that point in the code, */ +/* as well as the preferred amount for good performance. */ +/* NB refers to the optimal block size for the immediately */ +/* following subroutine, as returned by ILAENV. The workspace is */ +/* computed assuming ILO = 1 and IHI = N, the worst case.) */ + + if (*info == 0) { +/* Computing MAX */ + i__1 = 1, i__2 = *n << 3; + minwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = 1, i__2 = *n * (_starpu_ilaenv_(&c__1, "DGEQRF", " ", n, &c__1, n, & + c__0) + 7); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n * (_starpu_ilaenv_(&c__1, "DORMQR", " ", n, &c__1, n, + &c__0) + 7); + maxwrk = max(i__1,i__2); + if (ilvl) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n * (_starpu_ilaenv_(&c__1, "DORGQR", " ", n, & + c__1, n, &c_n1) + 7); + maxwrk = max(i__1,i__2); + } + work[1] = (doublereal) maxwrk; + + if (*lwork < minwrk && ! lquery) { + *info = -16; + } + } + + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DGGEV ", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Get machine constants */ + + eps = _starpu_dlamch_("P"); + smlnum = _starpu_dlamch_("S"); + bignum = 1. / smlnum; + _starpu_dlabad_(&smlnum, &bignum); + smlnum = sqrt(smlnum) / eps; + bignum = 1. / smlnum; + +/* Scale A if max element outside range [SMLNUM,BIGNUM] */ + + anrm = _starpu_dlange_("M", n, n, &a[a_offset], lda, &work[1]); + ilascl = FALSE_; + if (anrm > 0. && anrm < smlnum) { + anrmto = smlnum; + ilascl = TRUE_; + } else if (anrm > bignum) { + anrmto = bignum; + ilascl = TRUE_; + } + if (ilascl) { + _starpu_dlascl_("G", &c__0, &c__0, &anrm, &anrmto, n, n, &a[a_offset], lda, & + ierr); + } + +/* Scale B if max element outside range [SMLNUM,BIGNUM] */ + + bnrm = _starpu_dlange_("M", n, n, &b[b_offset], ldb, &work[1]); + ilbscl = FALSE_; + if (bnrm > 0. && bnrm < smlnum) { + bnrmto = smlnum; + ilbscl = TRUE_; + } else if (bnrm > bignum) { + bnrmto = bignum; + ilbscl = TRUE_; + } + if (ilbscl) { + _starpu_dlascl_("G", &c__0, &c__0, &bnrm, &bnrmto, n, n, &b[b_offset], ldb, & + ierr); + } + +/* Permute the matrices A, B to isolate eigenvalues if possible */ +/* (Workspace: need 6*N) */ + + ileft = 1; + iright = *n + 1; + iwrk = iright + *n; + _starpu_dggbal_("P", n, &a[a_offset], lda, &b[b_offset], ldb, &ilo, &ihi, &work[ + ileft], &work[iright], &work[iwrk], &ierr); + +/* Reduce B to triangular form (QR decomposition of B) */ +/* (Workspace: need N, prefer N*NB) */ + + irows = ihi + 1 - ilo; + if (ilv) { + icols = *n + 1 - ilo; + } else { + icols = irows; + } + itau = iwrk; + iwrk = itau + irows; + i__1 = *lwork + 1 - iwrk; + _starpu_dgeqrf_(&irows, &icols, &b[ilo + ilo * b_dim1], ldb, &work[itau], &work[ + iwrk], &i__1, &ierr); + +/* Apply the orthogonal transformation to matrix A */ +/* (Workspace: need N, prefer N*NB) */ + + i__1 = *lwork + 1 - iwrk; + _starpu_dormqr_("L", "T", &irows, &icols, &irows, &b[ilo + ilo * b_dim1], ldb, & + work[itau], &a[ilo + ilo * a_dim1], lda, &work[iwrk], &i__1, & + ierr); + +/* Initialize VL */ +/* (Workspace: need N, prefer N*NB) */ + + if (ilvl) { + _starpu_dlaset_("Full", n, n, &c_b36, &c_b37, &vl[vl_offset], ldvl) + ; + if (irows > 1) { + i__1 = irows - 1; + i__2 = irows - 1; + _starpu_dlacpy_("L", &i__1, &i__2, &b[ilo + 1 + ilo * b_dim1], ldb, &vl[ + ilo + 1 + ilo * vl_dim1], ldvl); + } + i__1 = *lwork + 1 - iwrk; + _starpu_dorgqr_(&irows, &irows, &irows, &vl[ilo + ilo * vl_dim1], ldvl, &work[ + itau], &work[iwrk], &i__1, &ierr); + } + +/* Initialize VR */ + + if (ilvr) { + _starpu_dlaset_("Full", n, n, &c_b36, &c_b37, &vr[vr_offset], ldvr) + ; + } + +/* Reduce to generalized Hessenberg form */ +/* (Workspace: none needed) */ + + if (ilv) { + +/* Eigenvectors requested -- work on whole matrix. */ + + _starpu_dgghrd_(jobvl, jobvr, n, &ilo, &ihi, &a[a_offset], lda, &b[b_offset], + ldb, &vl[vl_offset], ldvl, &vr[vr_offset], ldvr, &ierr); + } else { + _starpu_dgghrd_("N", "N", &irows, &c__1, &irows, &a[ilo + ilo * a_dim1], lda, + &b[ilo + ilo * b_dim1], ldb, &vl[vl_offset], ldvl, &vr[ + vr_offset], ldvr, &ierr); + } + +/* Perform QZ algorithm (Compute eigenvalues, and optionally, the */ +/* Schur forms and Schur vectors) */ +/* (Workspace: need N) */ + + iwrk = itau; + if (ilv) { + *(unsigned char *)chtemp = 'S'; + } else { + *(unsigned char *)chtemp = 'E'; + } + i__1 = *lwork + 1 - iwrk; + _starpu_dhgeqz_(chtemp, jobvl, jobvr, n, &ilo, &ihi, &a[a_offset], lda, &b[ + b_offset], ldb, &alphar[1], &alphai[1], &beta[1], &vl[vl_offset], + ldvl, &vr[vr_offset], ldvr, &work[iwrk], &i__1, &ierr); + if (ierr != 0) { + if (ierr > 0 && ierr <= *n) { + *info = ierr; + } else if (ierr > *n && ierr <= *n << 1) { + *info = ierr - *n; + } else { + *info = *n + 1; + } + goto L110; + } + +/* Compute Eigenvectors */ +/* (Workspace: need 6*N) */ + + if (ilv) { + if (ilvl) { + if (ilvr) { + *(unsigned char *)chtemp = 'B'; + } else { + *(unsigned char *)chtemp = 'L'; + } + } else { + *(unsigned char *)chtemp = 'R'; + } + _starpu_dtgevc_(chtemp, "B", ldumma, n, &a[a_offset], lda, &b[b_offset], ldb, + &vl[vl_offset], ldvl, &vr[vr_offset], ldvr, n, &in, &work[ + iwrk], &ierr); + if (ierr != 0) { + *info = *n + 2; + goto L110; + } + +/* Undo balancing on VL and VR and normalization */ +/* (Workspace: none needed) */ + + if (ilvl) { + _starpu_dggbak_("P", "L", n, &ilo, &ihi, &work[ileft], &work[iright], n, & + vl[vl_offset], ldvl, &ierr); + i__1 = *n; + for (jc = 1; jc <= i__1; ++jc) { + if (alphai[jc] < 0.) { + goto L50; + } + temp = 0.; + if (alphai[jc] == 0.) { + i__2 = *n; + for (jr = 1; jr <= i__2; ++jr) { +/* Computing MAX */ + d__2 = temp, d__3 = (d__1 = vl[jr + jc * vl_dim1], + abs(d__1)); + temp = max(d__2,d__3); +/* L10: */ + } + } else { + i__2 = *n; + for (jr = 1; jr <= i__2; ++jr) { +/* Computing MAX */ + d__3 = temp, d__4 = (d__1 = vl[jr + jc * vl_dim1], + abs(d__1)) + (d__2 = vl[jr + (jc + 1) * + vl_dim1], abs(d__2)); + temp = max(d__3,d__4); +/* L20: */ + } + } + if (temp < smlnum) { + goto L50; + } + temp = 1. / temp; + if (alphai[jc] == 0.) { + i__2 = *n; + for (jr = 1; jr <= i__2; ++jr) { + vl[jr + jc * vl_dim1] *= temp; +/* L30: */ + } + } else { + i__2 = *n; + for (jr = 1; jr <= i__2; ++jr) { + vl[jr + jc * vl_dim1] *= temp; + vl[jr + (jc + 1) * vl_dim1] *= temp; +/* L40: */ + } + } +L50: + ; + } + } + if (ilvr) { + _starpu_dggbak_("P", "R", n, &ilo, &ihi, &work[ileft], &work[iright], n, & + vr[vr_offset], ldvr, &ierr); + i__1 = *n; + for (jc = 1; jc <= i__1; ++jc) { + if (alphai[jc] < 0.) { + goto L100; + } + temp = 0.; + if (alphai[jc] == 0.) { + i__2 = *n; + for (jr = 1; jr <= i__2; ++jr) { +/* Computing MAX */ + d__2 = temp, d__3 = (d__1 = vr[jr + jc * vr_dim1], + abs(d__1)); + temp = max(d__2,d__3); +/* L60: */ + } + } else { + i__2 = *n; + for (jr = 1; jr <= i__2; ++jr) { +/* Computing MAX */ + d__3 = temp, d__4 = (d__1 = vr[jr + jc * vr_dim1], + abs(d__1)) + (d__2 = vr[jr + (jc + 1) * + vr_dim1], abs(d__2)); + temp = max(d__3,d__4); +/* L70: */ + } + } + if (temp < smlnum) { + goto L100; + } + temp = 1. / temp; + if (alphai[jc] == 0.) { + i__2 = *n; + for (jr = 1; jr <= i__2; ++jr) { + vr[jr + jc * vr_dim1] *= temp; +/* L80: */ + } + } else { + i__2 = *n; + for (jr = 1; jr <= i__2; ++jr) { + vr[jr + jc * vr_dim1] *= temp; + vr[jr + (jc + 1) * vr_dim1] *= temp; +/* L90: */ + } + } +L100: + ; + } + } + +/* End of eigenvector calculation */ + + } + +/* Undo scaling if necessary */ + + if (ilascl) { + _starpu_dlascl_("G", &c__0, &c__0, &anrmto, &anrm, n, &c__1, &alphar[1], n, & + ierr); + _starpu_dlascl_("G", &c__0, &c__0, &anrmto, &anrm, n, &c__1, &alphai[1], n, & + ierr); + } + + if (ilbscl) { + _starpu_dlascl_("G", &c__0, &c__0, &bnrmto, &bnrm, n, &c__1, &beta[1], n, & + ierr); + } + +L110: + + work[1] = (doublereal) maxwrk; + + return 0; + +/* End of DGGEV */ + +} /* _starpu_dggev_ */ diff --git a/min-dgels/base/SRC/dggevx.c b/min-dgels/base/SRC/dggevx.c new file mode 100644 index 0000000..26d454b --- /dev/null +++ b/min-dgels/base/SRC/dggevx.c @@ -0,0 +1,885 @@ +/* dggevx.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c__0 = 0; +static doublereal c_b59 = 0.; +static doublereal c_b60 = 1.; + +/* Subroutine */ int _starpu_dggevx_(char *balanc, char *jobvl, char *jobvr, char * + sense, integer *n, doublereal *a, integer *lda, doublereal *b, + integer *ldb, doublereal *alphar, doublereal *alphai, doublereal * + beta, doublereal *vl, integer *ldvl, doublereal *vr, integer *ldvr, + integer *ilo, integer *ihi, doublereal *lscale, doublereal *rscale, + doublereal *abnrm, doublereal *bbnrm, doublereal *rconde, doublereal * + rcondv, doublereal *work, integer *lwork, integer *iwork, logical * + bwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, vl_dim1, vl_offset, vr_dim1, + vr_offset, i__1, i__2; + doublereal d__1, d__2, d__3, d__4; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + integer i__, j, m, jc, in, mm, jr; + doublereal eps; + logical ilv, pair; + doublereal anrm, bnrm; + integer ierr, itau; + doublereal temp; + logical ilvl, ilvr; + integer iwrk, iwrk1; + extern logical _starpu_lsame_(char *, char *); + integer icols; + logical noscl; + integer irows; + extern /* Subroutine */ int _starpu_dlabad_(doublereal *, doublereal *), _starpu_dggbak_( + char *, char *, integer *, integer *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, integer *), _starpu_dggbal_(char *, integer *, doublereal *, integer + *, doublereal *, integer *, integer *, integer *, doublereal *, + doublereal *, doublereal *, integer *); + extern doublereal _starpu_dlamch_(char *), _starpu_dlange_(char *, integer *, + integer *, doublereal *, integer *, doublereal *); + extern /* Subroutine */ int _starpu_dgghrd_(char *, char *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, integer *, integer *), _starpu_dlascl_(char *, integer *, integer *, doublereal + *, doublereal *, integer *, integer *, doublereal *, integer *, + integer *); + logical ilascl, ilbscl; + extern /* Subroutine */ int _starpu_dgeqrf_(integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, integer *), + _starpu_dlacpy_(char *, integer *, integer *, doublereal *, integer *, + doublereal *, integer *), _starpu_dlaset_(char *, integer *, + integer *, doublereal *, doublereal *, doublereal *, integer *); + logical ldumma[1]; + char chtemp[1]; + doublereal bignum; + extern /* Subroutine */ int _starpu_dhgeqz_(char *, char *, char *, integer *, + integer *, integer *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, doublereal *, doublereal *, + integer *, doublereal *, integer *, doublereal *, integer *, + integer *), _starpu_dtgevc_(char *, char *, + logical *, integer *, doublereal *, integer *, doublereal *, + integer *, doublereal *, integer *, doublereal *, integer *, + integer *, integer *, doublereal *, integer *); + integer ijobvl; + extern /* Subroutine */ int _starpu_dtgsna_(char *, char *, logical *, integer *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + integer *, integer *, doublereal *, integer *, integer *, integer + *), _starpu_xerbla_(char *, integer *); + extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *); + integer ijobvr; + logical wantsb; + extern /* Subroutine */ int _starpu_dorgqr_(integer *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + integer *); + doublereal anrmto; + logical wantse; + doublereal bnrmto; + extern /* Subroutine */ int _starpu_dormqr_(char *, char *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *); + integer minwrk, maxwrk; + logical wantsn; + doublereal smlnum; + logical lquery, wantsv; + + +/* -- LAPACK driver routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DGGEVX computes for a pair of N-by-N real nonsymmetric matrices (A,B) */ +/* the generalized eigenvalues, and optionally, the left and/or right */ +/* generalized eigenvectors. */ + +/* Optionally also, it computes a balancing transformation to improve */ +/* the conditioning of the eigenvalues and eigenvectors (ILO, IHI, */ +/* LSCALE, RSCALE, ABNRM, and BBNRM), reciprocal condition numbers for */ +/* the eigenvalues (RCONDE), and reciprocal condition numbers for the */ +/* right eigenvectors (RCONDV). */ + +/* A generalized eigenvalue for a pair of matrices (A,B) is a scalar */ +/* lambda or a ratio alpha/beta = lambda, such that A - lambda*B is */ +/* singular. It is usually represented as the pair (alpha,beta), as */ +/* there is a reasonable interpretation for beta=0, and even for both */ +/* being zero. */ + +/* The right eigenvector v(j) corresponding to the eigenvalue lambda(j) */ +/* of (A,B) satisfies */ + +/* A * v(j) = lambda(j) * B * v(j) . */ + +/* The left eigenvector u(j) corresponding to the eigenvalue lambda(j) */ +/* of (A,B) satisfies */ + +/* u(j)**H * A = lambda(j) * u(j)**H * B. */ + +/* where u(j)**H is the conjugate-transpose of u(j). */ + + +/* Arguments */ +/* ========= */ + +/* BALANC (input) CHARACTER*1 */ +/* Specifies the balance option to be performed. */ +/* = 'N': do not diagonally scale or permute; */ +/* = 'P': permute only; */ +/* = 'S': scale only; */ +/* = 'B': both permute and scale. */ +/* Computed reciprocal condition numbers will be for the */ +/* matrices after permuting and/or balancing. Permuting does */ +/* not change condition numbers (in exact arithmetic), but */ +/* balancing does. */ + +/* JOBVL (input) CHARACTER*1 */ +/* = 'N': do not compute the left generalized eigenvectors; */ +/* = 'V': compute the left generalized eigenvectors. */ + +/* JOBVR (input) CHARACTER*1 */ +/* = 'N': do not compute the right generalized eigenvectors; */ +/* = 'V': compute the right generalized eigenvectors. */ + +/* SENSE (input) CHARACTER*1 */ +/* Determines which reciprocal condition numbers are computed. */ +/* = 'N': none are computed; */ +/* = 'E': computed for eigenvalues only; */ +/* = 'V': computed for eigenvectors only; */ +/* = 'B': computed for eigenvalues and eigenvectors. */ + +/* N (input) INTEGER */ +/* The order of the matrices A, B, VL, and VR. N >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA, N) */ +/* On entry, the matrix A in the pair (A,B). */ +/* On exit, A has been overwritten. If JOBVL='V' or JOBVR='V' */ +/* or both, then A contains the first part of the real Schur */ +/* form of the "balanced" versions of the input A and B. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of A. LDA >= max(1,N). */ + +/* B (input/output) DOUBLE PRECISION array, dimension (LDB, N) */ +/* On entry, the matrix B in the pair (A,B). */ +/* On exit, B has been overwritten. If JOBVL='V' or JOBVR='V' */ +/* or both, then B contains the second part of the real Schur */ +/* form of the "balanced" versions of the input A and B. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of B. LDB >= max(1,N). */ + +/* ALPHAR (output) DOUBLE PRECISION array, dimension (N) */ +/* ALPHAI (output) DOUBLE PRECISION array, dimension (N) */ +/* BETA (output) DOUBLE PRECISION array, dimension (N) */ +/* On exit, (ALPHAR(j) + ALPHAI(j)*i)/BETA(j), j=1,...,N, will */ +/* be the generalized eigenvalues. If ALPHAI(j) is zero, then */ +/* the j-th eigenvalue is real; if positive, then the j-th and */ +/* (j+1)-st eigenvalues are a complex conjugate pair, with */ +/* ALPHAI(j+1) negative. */ + +/* Note: the quotients ALPHAR(j)/BETA(j) and ALPHAI(j)/BETA(j) */ +/* may easily over- or underflow, and BETA(j) may even be zero. */ +/* Thus, the user should avoid naively computing the ratio */ +/* ALPHA/BETA. However, ALPHAR and ALPHAI will be always less */ +/* than and usually comparable with norm(A) in magnitude, and */ +/* BETA always less than and usually comparable with norm(B). */ + +/* VL (output) DOUBLE PRECISION array, dimension (LDVL,N) */ +/* If JOBVL = 'V', the left eigenvectors u(j) are stored one */ +/* after another in the columns of VL, in the same order as */ +/* their eigenvalues. If the j-th eigenvalue is real, then */ +/* u(j) = VL(:,j), the j-th column of VL. If the j-th and */ +/* (j+1)-th eigenvalues form a complex conjugate pair, then */ +/* u(j) = VL(:,j)+i*VL(:,j+1) and u(j+1) = VL(:,j)-i*VL(:,j+1). */ +/* Each eigenvector will be scaled so the largest component have */ +/* abs(real part) + abs(imag. part) = 1. */ +/* Not referenced if JOBVL = 'N'. */ + +/* LDVL (input) INTEGER */ +/* The leading dimension of the matrix VL. LDVL >= 1, and */ +/* if JOBVL = 'V', LDVL >= N. */ + +/* VR (output) DOUBLE PRECISION array, dimension (LDVR,N) */ +/* If JOBVR = 'V', the right eigenvectors v(j) are stored one */ +/* after another in the columns of VR, in the same order as */ +/* their eigenvalues. If the j-th eigenvalue is real, then */ +/* v(j) = VR(:,j), the j-th column of VR. If the j-th and */ +/* (j+1)-th eigenvalues form a complex conjugate pair, then */ +/* v(j) = VR(:,j)+i*VR(:,j+1) and v(j+1) = VR(:,j)-i*VR(:,j+1). */ +/* Each eigenvector will be scaled so the largest component have */ +/* abs(real part) + abs(imag. part) = 1. */ +/* Not referenced if JOBVR = 'N'. */ + +/* LDVR (input) INTEGER */ +/* The leading dimension of the matrix VR. LDVR >= 1, and */ +/* if JOBVR = 'V', LDVR >= N. */ + +/* ILO (output) INTEGER */ +/* IHI (output) INTEGER */ +/* ILO and IHI are integer values such that on exit */ +/* A(i,j) = 0 and B(i,j) = 0 if i > j and */ +/* j = 1,...,ILO-1 or i = IHI+1,...,N. */ +/* If BALANC = 'N' or 'S', ILO = 1 and IHI = N. */ + +/* LSCALE (output) DOUBLE PRECISION array, dimension (N) */ +/* Details of the permutations and scaling factors applied */ +/* to the left side of A and B. If PL(j) is the index of the */ +/* row interchanged with row j, and DL(j) is the scaling */ +/* factor applied to row j, then */ +/* LSCALE(j) = PL(j) for j = 1,...,ILO-1 */ +/* = DL(j) for j = ILO,...,IHI */ +/* = PL(j) for j = IHI+1,...,N. */ +/* The order in which the interchanges are made is N to IHI+1, */ +/* then 1 to ILO-1. */ + +/* RSCALE (output) DOUBLE PRECISION array, dimension (N) */ +/* Details of the permutations and scaling factors applied */ +/* to the right side of A and B. If PR(j) is the index of the */ +/* column interchanged with column j, and DR(j) is the scaling */ +/* factor applied to column j, then */ +/* RSCALE(j) = PR(j) for j = 1,...,ILO-1 */ +/* = DR(j) for j = ILO,...,IHI */ +/* = PR(j) for j = IHI+1,...,N */ +/* The order in which the interchanges are made is N to IHI+1, */ +/* then 1 to ILO-1. */ + +/* ABNRM (output) DOUBLE PRECISION */ +/* The one-norm of the balanced matrix A. */ + +/* BBNRM (output) DOUBLE PRECISION */ +/* The one-norm of the balanced matrix B. */ + +/* RCONDE (output) DOUBLE PRECISION array, dimension (N) */ +/* If SENSE = 'E' or 'B', the reciprocal condition numbers of */ +/* the eigenvalues, stored in consecutive elements of the array. */ +/* For a complex conjugate pair of eigenvalues two consecutive */ +/* elements of RCONDE are set to the same value. Thus RCONDE(j), */ +/* RCONDV(j), and the j-th columns of VL and VR all correspond */ +/* to the j-th eigenpair. */ +/* If SENSE = 'N or 'V', RCONDE is not referenced. */ + +/* RCONDV (output) DOUBLE PRECISION array, dimension (N) */ +/* If SENSE = 'V' or 'B', the estimated reciprocal condition */ +/* numbers of the eigenvectors, stored in consecutive elements */ +/* of the array. For a complex eigenvector two consecutive */ +/* elements of RCONDV are set to the same value. If the */ +/* eigenvalues cannot be reordered to compute RCONDV(j), */ +/* RCONDV(j) is set to 0; this can only occur when the true */ +/* value would be very small anyway. */ +/* If SENSE = 'N' or 'E', RCONDV is not referenced. */ + +/* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ + +/* LWORK (input) INTEGER */ +/* The dimension of the array WORK. LWORK >= max(1,2*N). */ +/* If BALANC = 'S' or 'B', or JOBVL = 'V', or JOBVR = 'V', */ +/* LWORK >= max(1,6*N). */ +/* If SENSE = 'E' or 'B', LWORK >= max(1,10*N). */ +/* If SENSE = 'V' or 'B', LWORK >= 2*N*N+8*N+16. */ + +/* If LWORK = -1, then a workspace query is assumed; the routine */ +/* only calculates the optimal size of the WORK array, returns */ +/* this value as the first entry of the WORK array, and no error */ +/* message related to LWORK is issued by XERBLA. */ + +/* IWORK (workspace) INTEGER array, dimension (N+6) */ +/* If SENSE = 'E', IWORK is not referenced. */ + +/* BWORK (workspace) LOGICAL array, dimension (N) */ +/* If SENSE = 'N', BWORK is not referenced. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* = 1,...,N: */ +/* The QZ iteration failed. No eigenvectors have been */ +/* calculated, but ALPHAR(j), ALPHAI(j), and BETA(j) */ +/* should be correct for j=INFO+1,...,N. */ +/* > N: =N+1: other than QZ iteration failed in DHGEQZ. */ +/* =N+2: error return from DTGEVC. */ + +/* Further Details */ +/* =============== */ + +/* Balancing a matrix pair (A,B) includes, first, permuting rows and */ +/* columns to isolate eigenvalues, second, applying diagonal similarity */ +/* transformation to the rows and columns to make the rows and columns */ +/* as close in norm as possible. The computed reciprocal condition */ +/* numbers correspond to the balanced matrix. Permuting rows and columns */ +/* will not change the condition numbers (in exact arithmetic) but */ +/* diagonal scaling will. For further explanation of balancing, see */ +/* section 4.11.1.2 of LAPACK Users' Guide. */ + +/* An approximate error bound on the chordal distance between the i-th */ +/* computed generalized eigenvalue w and the corresponding exact */ +/* eigenvalue lambda is */ + +/* chord(w, lambda) <= EPS * norm(ABNRM, BBNRM) / RCONDE(I) */ + +/* An approximate error bound for the angle between the i-th computed */ +/* eigenvector VL(i) or VR(i) is given by */ + +/* EPS * norm(ABNRM, BBNRM) / DIF(i). */ + +/* For further explanation of the reciprocal condition numbers RCONDE */ +/* and RCONDV, see section 4.11 of LAPACK User's Guide. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Decode the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + --alphar; + --alphai; + --beta; + vl_dim1 = *ldvl; + vl_offset = 1 + vl_dim1; + vl -= vl_offset; + vr_dim1 = *ldvr; + vr_offset = 1 + vr_dim1; + vr -= vr_offset; + --lscale; + --rscale; + --rconde; + --rcondv; + --work; + --iwork; + --bwork; + + /* Function Body */ + if (_starpu_lsame_(jobvl, "N")) { + ijobvl = 1; + ilvl = FALSE_; + } else if (_starpu_lsame_(jobvl, "V")) { + ijobvl = 2; + ilvl = TRUE_; + } else { + ijobvl = -1; + ilvl = FALSE_; + } + + if (_starpu_lsame_(jobvr, "N")) { + ijobvr = 1; + ilvr = FALSE_; + } else if (_starpu_lsame_(jobvr, "V")) { + ijobvr = 2; + ilvr = TRUE_; + } else { + ijobvr = -1; + ilvr = FALSE_; + } + ilv = ilvl || ilvr; + + noscl = _starpu_lsame_(balanc, "N") || _starpu_lsame_(balanc, "P"); + wantsn = _starpu_lsame_(sense, "N"); + wantse = _starpu_lsame_(sense, "E"); + wantsv = _starpu_lsame_(sense, "V"); + wantsb = _starpu_lsame_(sense, "B"); + +/* Test the input arguments */ + + *info = 0; + lquery = *lwork == -1; + if (! (_starpu_lsame_(balanc, "N") || _starpu_lsame_(balanc, "S") || _starpu_lsame_(balanc, "P") + || _starpu_lsame_(balanc, "B"))) { + *info = -1; + } else if (ijobvl <= 0) { + *info = -2; + } else if (ijobvr <= 0) { + *info = -3; + } else if (! (wantsn || wantse || wantsb || wantsv)) { + *info = -4; + } else if (*n < 0) { + *info = -5; + } else if (*lda < max(1,*n)) { + *info = -7; + } else if (*ldb < max(1,*n)) { + *info = -9; + } else if (*ldvl < 1 || ilvl && *ldvl < *n) { + *info = -14; + } else if (*ldvr < 1 || ilvr && *ldvr < *n) { + *info = -16; + } + +/* Compute workspace */ +/* (Note: Comments in the code beginning "Workspace:" describe the */ +/* minimal amount of workspace needed at that point in the code, */ +/* as well as the preferred amount for good performance. */ +/* NB refers to the optimal block size for the immediately */ +/* following subroutine, as returned by ILAENV. The workspace is */ +/* computed assuming ILO = 1 and IHI = N, the worst case.) */ + + if (*info == 0) { + if (*n == 0) { + minwrk = 1; + maxwrk = 1; + } else { + if (noscl && ! ilv) { + minwrk = *n << 1; + } else { + minwrk = *n * 6; + } + if (wantse || wantsb) { + minwrk = *n * 10; + } + if (wantsv || wantsb) { +/* Computing MAX */ + i__1 = minwrk, i__2 = (*n << 1) * (*n + 4) + 16; + minwrk = max(i__1,i__2); + } + maxwrk = minwrk; +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n + *n * _starpu_ilaenv_(&c__1, "DGEQRF", " ", n, & + c__1, n, &c__0); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n + *n * _starpu_ilaenv_(&c__1, "DORMQR", " ", n, & + c__1, n, &c__0); + maxwrk = max(i__1,i__2); + if (ilvl) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n + *n * _starpu_ilaenv_(&c__1, "DORGQR", + " ", n, &c__1, n, &c__0); + maxwrk = max(i__1,i__2); + } + } + work[1] = (doublereal) maxwrk; + + if (*lwork < minwrk && ! lquery) { + *info = -26; + } + } + + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DGGEVX", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + +/* Get machine constants */ + + eps = _starpu_dlamch_("P"); + smlnum = _starpu_dlamch_("S"); + bignum = 1. / smlnum; + _starpu_dlabad_(&smlnum, &bignum); + smlnum = sqrt(smlnum) / eps; + bignum = 1. / smlnum; + +/* Scale A if max element outside range [SMLNUM,BIGNUM] */ + + anrm = _starpu_dlange_("M", n, n, &a[a_offset], lda, &work[1]); + ilascl = FALSE_; + if (anrm > 0. && anrm < smlnum) { + anrmto = smlnum; + ilascl = TRUE_; + } else if (anrm > bignum) { + anrmto = bignum; + ilascl = TRUE_; + } + if (ilascl) { + _starpu_dlascl_("G", &c__0, &c__0, &anrm, &anrmto, n, n, &a[a_offset], lda, & + ierr); + } + +/* Scale B if max element outside range [SMLNUM,BIGNUM] */ + + bnrm = _starpu_dlange_("M", n, n, &b[b_offset], ldb, &work[1]); + ilbscl = FALSE_; + if (bnrm > 0. && bnrm < smlnum) { + bnrmto = smlnum; + ilbscl = TRUE_; + } else if (bnrm > bignum) { + bnrmto = bignum; + ilbscl = TRUE_; + } + if (ilbscl) { + _starpu_dlascl_("G", &c__0, &c__0, &bnrm, &bnrmto, n, n, &b[b_offset], ldb, & + ierr); + } + +/* Permute and/or balance the matrix pair (A,B) */ +/* (Workspace: need 6*N if BALANC = 'S' or 'B', 1 otherwise) */ + + _starpu_dggbal_(balanc, n, &a[a_offset], lda, &b[b_offset], ldb, ilo, ihi, & + lscale[1], &rscale[1], &work[1], &ierr); + +/* Compute ABNRM and BBNRM */ + + *abnrm = _starpu_dlange_("1", n, n, &a[a_offset], lda, &work[1]); + if (ilascl) { + work[1] = *abnrm; + _starpu_dlascl_("G", &c__0, &c__0, &anrmto, &anrm, &c__1, &c__1, &work[1], & + c__1, &ierr); + *abnrm = work[1]; + } + + *bbnrm = _starpu_dlange_("1", n, n, &b[b_offset], ldb, &work[1]); + if (ilbscl) { + work[1] = *bbnrm; + _starpu_dlascl_("G", &c__0, &c__0, &bnrmto, &bnrm, &c__1, &c__1, &work[1], & + c__1, &ierr); + *bbnrm = work[1]; + } + +/* Reduce B to triangular form (QR decomposition of B) */ +/* (Workspace: need N, prefer N*NB ) */ + + irows = *ihi + 1 - *ilo; + if (ilv || ! wantsn) { + icols = *n + 1 - *ilo; + } else { + icols = irows; + } + itau = 1; + iwrk = itau + irows; + i__1 = *lwork + 1 - iwrk; + _starpu_dgeqrf_(&irows, &icols, &b[*ilo + *ilo * b_dim1], ldb, &work[itau], &work[ + iwrk], &i__1, &ierr); + +/* Apply the orthogonal transformation to A */ +/* (Workspace: need N, prefer N*NB) */ + + i__1 = *lwork + 1 - iwrk; + _starpu_dormqr_("L", "T", &irows, &icols, &irows, &b[*ilo + *ilo * b_dim1], ldb, & + work[itau], &a[*ilo + *ilo * a_dim1], lda, &work[iwrk], &i__1, & + ierr); + +/* Initialize VL and/or VR */ +/* (Workspace: need N, prefer N*NB) */ + + if (ilvl) { + _starpu_dlaset_("Full", n, n, &c_b59, &c_b60, &vl[vl_offset], ldvl) + ; + if (irows > 1) { + i__1 = irows - 1; + i__2 = irows - 1; + _starpu_dlacpy_("L", &i__1, &i__2, &b[*ilo + 1 + *ilo * b_dim1], ldb, &vl[ + *ilo + 1 + *ilo * vl_dim1], ldvl); + } + i__1 = *lwork + 1 - iwrk; + _starpu_dorgqr_(&irows, &irows, &irows, &vl[*ilo + *ilo * vl_dim1], ldvl, & + work[itau], &work[iwrk], &i__1, &ierr); + } + + if (ilvr) { + _starpu_dlaset_("Full", n, n, &c_b59, &c_b60, &vr[vr_offset], ldvr) + ; + } + +/* Reduce to generalized Hessenberg form */ +/* (Workspace: none needed) */ + + if (ilv || ! wantsn) { + +/* Eigenvectors requested -- work on whole matrix. */ + + _starpu_dgghrd_(jobvl, jobvr, n, ilo, ihi, &a[a_offset], lda, &b[b_offset], + ldb, &vl[vl_offset], ldvl, &vr[vr_offset], ldvr, &ierr); + } else { + _starpu_dgghrd_("N", "N", &irows, &c__1, &irows, &a[*ilo + *ilo * a_dim1], + lda, &b[*ilo + *ilo * b_dim1], ldb, &vl[vl_offset], ldvl, &vr[ + vr_offset], ldvr, &ierr); + } + +/* Perform QZ algorithm (Compute eigenvalues, and optionally, the */ +/* Schur forms and Schur vectors) */ +/* (Workspace: need N) */ + + if (ilv || ! wantsn) { + *(unsigned char *)chtemp = 'S'; + } else { + *(unsigned char *)chtemp = 'E'; + } + + _starpu_dhgeqz_(chtemp, jobvl, jobvr, n, ilo, ihi, &a[a_offset], lda, &b[b_offset] +, ldb, &alphar[1], &alphai[1], &beta[1], &vl[vl_offset], ldvl, & + vr[vr_offset], ldvr, &work[1], lwork, &ierr); + if (ierr != 0) { + if (ierr > 0 && ierr <= *n) { + *info = ierr; + } else if (ierr > *n && ierr <= *n << 1) { + *info = ierr - *n; + } else { + *info = *n + 1; + } + goto L130; + } + +/* Compute Eigenvectors and estimate condition numbers if desired */ +/* (Workspace: DTGEVC: need 6*N */ +/* DTGSNA: need 2*N*(N+2)+16 if SENSE = 'V' or 'B', */ +/* need N otherwise ) */ + + if (ilv || ! wantsn) { + if (ilv) { + if (ilvl) { + if (ilvr) { + *(unsigned char *)chtemp = 'B'; + } else { + *(unsigned char *)chtemp = 'L'; + } + } else { + *(unsigned char *)chtemp = 'R'; + } + + _starpu_dtgevc_(chtemp, "B", ldumma, n, &a[a_offset], lda, &b[b_offset], + ldb, &vl[vl_offset], ldvl, &vr[vr_offset], ldvr, n, &in, & + work[1], &ierr); + if (ierr != 0) { + *info = *n + 2; + goto L130; + } + } + + if (! wantsn) { + +/* compute eigenvectors (DTGEVC) and estimate condition */ +/* numbers (DTGSNA). Note that the definition of the condition */ +/* number is not invariant under transformation (u,v) to */ +/* (Q*u, Z*v), where (u,v) are eigenvectors of the generalized */ +/* Schur form (S,T), Q and Z are orthogonal matrices. In order */ +/* to avoid using extra 2*N*N workspace, we have to recalculate */ +/* eigenvectors and estimate one condition numbers at a time. */ + + pair = FALSE_; + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + + if (pair) { + pair = FALSE_; + goto L20; + } + mm = 1; + if (i__ < *n) { + if (a[i__ + 1 + i__ * a_dim1] != 0.) { + pair = TRUE_; + mm = 2; + } + } + + i__2 = *n; + for (j = 1; j <= i__2; ++j) { + bwork[j] = FALSE_; +/* L10: */ + } + if (mm == 1) { + bwork[i__] = TRUE_; + } else if (mm == 2) { + bwork[i__] = TRUE_; + bwork[i__ + 1] = TRUE_; + } + + iwrk = mm * *n + 1; + iwrk1 = iwrk + mm * *n; + +/* Compute a pair of left and right eigenvectors. */ +/* (compute workspace: need up to 4*N + 6*N) */ + + if (wantse || wantsb) { + _starpu_dtgevc_("B", "S", &bwork[1], n, &a[a_offset], lda, &b[ + b_offset], ldb, &work[1], n, &work[iwrk], n, &mm, + &m, &work[iwrk1], &ierr); + if (ierr != 0) { + *info = *n + 2; + goto L130; + } + } + + i__2 = *lwork - iwrk1 + 1; + _starpu_dtgsna_(sense, "S", &bwork[1], n, &a[a_offset], lda, &b[ + b_offset], ldb, &work[1], n, &work[iwrk], n, &rconde[ + i__], &rcondv[i__], &mm, &m, &work[iwrk1], &i__2, & + iwork[1], &ierr); + +L20: + ; + } + } + } + +/* Undo balancing on VL and VR and normalization */ +/* (Workspace: none needed) */ + + if (ilvl) { + _starpu_dggbak_(balanc, "L", n, ilo, ihi, &lscale[1], &rscale[1], n, &vl[ + vl_offset], ldvl, &ierr); + + i__1 = *n; + for (jc = 1; jc <= i__1; ++jc) { + if (alphai[jc] < 0.) { + goto L70; + } + temp = 0.; + if (alphai[jc] == 0.) { + i__2 = *n; + for (jr = 1; jr <= i__2; ++jr) { +/* Computing MAX */ + d__2 = temp, d__3 = (d__1 = vl[jr + jc * vl_dim1], abs( + d__1)); + temp = max(d__2,d__3); +/* L30: */ + } + } else { + i__2 = *n; + for (jr = 1; jr <= i__2; ++jr) { +/* Computing MAX */ + d__3 = temp, d__4 = (d__1 = vl[jr + jc * vl_dim1], abs( + d__1)) + (d__2 = vl[jr + (jc + 1) * vl_dim1], abs( + d__2)); + temp = max(d__3,d__4); +/* L40: */ + } + } + if (temp < smlnum) { + goto L70; + } + temp = 1. / temp; + if (alphai[jc] == 0.) { + i__2 = *n; + for (jr = 1; jr <= i__2; ++jr) { + vl[jr + jc * vl_dim1] *= temp; +/* L50: */ + } + } else { + i__2 = *n; + for (jr = 1; jr <= i__2; ++jr) { + vl[jr + jc * vl_dim1] *= temp; + vl[jr + (jc + 1) * vl_dim1] *= temp; +/* L60: */ + } + } +L70: + ; + } + } + if (ilvr) { + _starpu_dggbak_(balanc, "R", n, ilo, ihi, &lscale[1], &rscale[1], n, &vr[ + vr_offset], ldvr, &ierr); + i__1 = *n; + for (jc = 1; jc <= i__1; ++jc) { + if (alphai[jc] < 0.) { + goto L120; + } + temp = 0.; + if (alphai[jc] == 0.) { + i__2 = *n; + for (jr = 1; jr <= i__2; ++jr) { +/* Computing MAX */ + d__2 = temp, d__3 = (d__1 = vr[jr + jc * vr_dim1], abs( + d__1)); + temp = max(d__2,d__3); +/* L80: */ + } + } else { + i__2 = *n; + for (jr = 1; jr <= i__2; ++jr) { +/* Computing MAX */ + d__3 = temp, d__4 = (d__1 = vr[jr + jc * vr_dim1], abs( + d__1)) + (d__2 = vr[jr + (jc + 1) * vr_dim1], abs( + d__2)); + temp = max(d__3,d__4); +/* L90: */ + } + } + if (temp < smlnum) { + goto L120; + } + temp = 1. / temp; + if (alphai[jc] == 0.) { + i__2 = *n; + for (jr = 1; jr <= i__2; ++jr) { + vr[jr + jc * vr_dim1] *= temp; +/* L100: */ + } + } else { + i__2 = *n; + for (jr = 1; jr <= i__2; ++jr) { + vr[jr + jc * vr_dim1] *= temp; + vr[jr + (jc + 1) * vr_dim1] *= temp; +/* L110: */ + } + } +L120: + ; + } + } + +/* Undo scaling if necessary */ + + if (ilascl) { + _starpu_dlascl_("G", &c__0, &c__0, &anrmto, &anrm, n, &c__1, &alphar[1], n, & + ierr); + _starpu_dlascl_("G", &c__0, &c__0, &anrmto, &anrm, n, &c__1, &alphai[1], n, & + ierr); + } + + if (ilbscl) { + _starpu_dlascl_("G", &c__0, &c__0, &bnrmto, &bnrm, n, &c__1, &beta[1], n, & + ierr); + } + +L130: + work[1] = (doublereal) maxwrk; + + return 0; + +/* End of DGGEVX */ + +} /* _starpu_dggevx_ */ diff --git a/min-dgels/base/SRC/dggglm.c b/min-dgels/base/SRC/dggglm.c new file mode 100644 index 0000000..281eaf6 --- /dev/null +++ b/min-dgels/base/SRC/dggglm.c @@ -0,0 +1,331 @@ +/* dggglm.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; +static doublereal c_b32 = -1.; +static doublereal c_b34 = 1.; + +/* Subroutine */ int _starpu_dggglm_(integer *n, integer *m, integer *p, doublereal * + a, integer *lda, doublereal *b, integer *ldb, doublereal *d__, + doublereal *x, doublereal *y, doublereal *work, integer *lwork, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + integer i__, nb, np, nb1, nb2, nb3, nb4, lopt; + extern /* Subroutine */ int _starpu_dgemv_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *), _starpu_dcopy_(integer *, + doublereal *, integer *, doublereal *, integer *), _starpu_dggqrf_( + integer *, integer *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *, doublereal *, doublereal *, + integer *, integer *), _starpu_xerbla_(char *, integer *); + extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *); + integer lwkmin; + extern /* Subroutine */ int _starpu_dormqr_(char *, char *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *), + _starpu_dormrq_(char *, char *, integer *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *, integer *); + integer lwkopt; + logical lquery; + extern /* Subroutine */ int _starpu_dtrtrs_(char *, char *, char *, integer *, + integer *, doublereal *, integer *, doublereal *, integer *, + integer *); + + +/* -- LAPACK driver routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DGGGLM solves a general Gauss-Markov linear model (GLM) problem: */ + +/* minimize || y ||_2 subject to d = A*x + B*y */ +/* x */ + +/* where A is an N-by-M matrix, B is an N-by-P matrix, and d is a */ +/* given N-vector. It is assumed that M <= N <= M+P, and */ + +/* rank(A) = M and rank( A B ) = N. */ + +/* Under these assumptions, the constrained equation is always */ +/* consistent, and there is a unique solution x and a minimal 2-norm */ +/* solution y, which is obtained using a generalized QR factorization */ +/* of the matrices (A, B) given by */ + +/* A = Q*(R), B = Q*T*Z. */ +/* (0) */ + +/* In particular, if matrix B is square nonsingular, then the problem */ +/* GLM is equivalent to the following weighted linear least squares */ +/* problem */ + +/* minimize || inv(B)*(d-A*x) ||_2 */ +/* x */ + +/* where inv(B) denotes the inverse of B. */ + +/* Arguments */ +/* ========= */ + +/* N (input) INTEGER */ +/* The number of rows of the matrices A and B. N >= 0. */ + +/* M (input) INTEGER */ +/* The number of columns of the matrix A. 0 <= M <= N. */ + +/* P (input) INTEGER */ +/* The number of columns of the matrix B. P >= N-M. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,M) */ +/* On entry, the N-by-M matrix A. */ +/* On exit, the upper triangular part of the array A contains */ +/* the M-by-M upper triangular matrix R. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,N). */ + +/* B (input/output) DOUBLE PRECISION array, dimension (LDB,P) */ +/* On entry, the N-by-P matrix B. */ +/* On exit, if N <= P, the upper triangle of the subarray */ +/* B(1:N,P-N+1:P) contains the N-by-N upper triangular matrix T; */ +/* if N > P, the elements on and above the (N-P)th subdiagonal */ +/* contain the N-by-P upper trapezoidal matrix T. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the array B. LDB >= max(1,N). */ + +/* D (input/output) DOUBLE PRECISION array, dimension (N) */ +/* On entry, D is the left hand side of the GLM equation. */ +/* On exit, D is destroyed. */ + +/* X (output) DOUBLE PRECISION array, dimension (M) */ +/* Y (output) DOUBLE PRECISION array, dimension (P) */ +/* On exit, X and Y are the solutions of the GLM problem. */ + +/* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ + +/* LWORK (input) INTEGER */ +/* The dimension of the array WORK. LWORK >= max(1,N+M+P). */ +/* For optimum performance, LWORK >= M+min(N,P)+max(N,P)*NB, */ +/* where NB is an upper bound for the optimal blocksizes for */ +/* DGEQRF, SGERQF, DORMQR and SORMRQ. */ + +/* If LWORK = -1, then a workspace query is assumed; the routine */ +/* only calculates the optimal size of the WORK array, returns */ +/* this value as the first entry of the WORK array, and no error */ +/* message related to LWORK is issued by XERBLA. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit. */ +/* < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* = 1: the upper triangular factor R associated with A in the */ +/* generalized QR factorization of the pair (A, B) is */ +/* singular, so that rank(A) < M; the least squares */ +/* solution could not be computed. */ +/* = 2: the bottom (N-M) by (N-M) part of the upper trapezoidal */ +/* factor T associated with B in the generalized QR */ +/* factorization of the pair (A, B) is singular, so that */ +/* rank( A B ) < N; the least squares solution could not */ +/* be computed. */ + +/* =================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + --d__; + --x; + --y; + --work; + + /* Function Body */ + *info = 0; + np = min(*n,*p); + lquery = *lwork == -1; + if (*n < 0) { + *info = -1; + } else if (*m < 0 || *m > *n) { + *info = -2; + } else if (*p < 0 || *p < *n - *m) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } else if (*ldb < max(1,*n)) { + *info = -7; + } + +/* Calculate workspace */ + + if (*info == 0) { + if (*n == 0) { + lwkmin = 1; + lwkopt = 1; + } else { + nb1 = _starpu_ilaenv_(&c__1, "DGEQRF", " ", n, m, &c_n1, &c_n1); + nb2 = _starpu_ilaenv_(&c__1, "DGERQF", " ", n, m, &c_n1, &c_n1); + nb3 = _starpu_ilaenv_(&c__1, "DORMQR", " ", n, m, p, &c_n1); + nb4 = _starpu_ilaenv_(&c__1, "DORMRQ", " ", n, m, p, &c_n1); +/* Computing MAX */ + i__1 = max(nb1,nb2), i__1 = max(i__1,nb3); + nb = max(i__1,nb4); + lwkmin = *m + *n + *p; + lwkopt = *m + np + max(*n,*p) * nb; + } + work[1] = (doublereal) lwkopt; + + if (*lwork < lwkmin && ! lquery) { + *info = -12; + } + } + + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DGGGLM", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Compute the GQR factorization of matrices A and B: */ + +/* Q'*A = ( R11 ) M, Q'*B*Z' = ( T11 T12 ) M */ +/* ( 0 ) N-M ( 0 T22 ) N-M */ +/* M M+P-N N-M */ + +/* where R11 and T22 are upper triangular, and Q and Z are */ +/* orthogonal. */ + + i__1 = *lwork - *m - np; + _starpu_dggqrf_(n, m, p, &a[a_offset], lda, &work[1], &b[b_offset], ldb, &work[*m + + 1], &work[*m + np + 1], &i__1, info); + lopt = (integer) work[*m + np + 1]; + +/* Update left-hand-side vector d = Q'*d = ( d1 ) M */ +/* ( d2 ) N-M */ + + i__1 = max(1,*n); + i__2 = *lwork - *m - np; + _starpu_dormqr_("Left", "Transpose", n, &c__1, m, &a[a_offset], lda, &work[1], & + d__[1], &i__1, &work[*m + np + 1], &i__2, info); +/* Computing MAX */ + i__1 = lopt, i__2 = (integer) work[*m + np + 1]; + lopt = max(i__1,i__2); + +/* Solve T22*y2 = d2 for y2 */ + + if (*n > *m) { + i__1 = *n - *m; + i__2 = *n - *m; + _starpu_dtrtrs_("Upper", "No transpose", "Non unit", &i__1, &c__1, &b[*m + 1 + + (*m + *p - *n + 1) * b_dim1], ldb, &d__[*m + 1], &i__2, + info); + + if (*info > 0) { + *info = 1; + return 0; + } + + i__1 = *n - *m; + _starpu_dcopy_(&i__1, &d__[*m + 1], &c__1, &y[*m + *p - *n + 1], &c__1); + } + +/* Set y1 = 0 */ + + i__1 = *m + *p - *n; + for (i__ = 1; i__ <= i__1; ++i__) { + y[i__] = 0.; +/* L10: */ + } + +/* Update d1 = d1 - T12*y2 */ + + i__1 = *n - *m; + _starpu_dgemv_("No transpose", m, &i__1, &c_b32, &b[(*m + *p - *n + 1) * b_dim1 + + 1], ldb, &y[*m + *p - *n + 1], &c__1, &c_b34, &d__[1], &c__1); + +/* Solve triangular system: R11*x = d1 */ + + if (*m > 0) { + _starpu_dtrtrs_("Upper", "No Transpose", "Non unit", m, &c__1, &a[a_offset], + lda, &d__[1], m, info); + + if (*info > 0) { + *info = 2; + return 0; + } + +/* Copy D to X */ + + _starpu_dcopy_(m, &d__[1], &c__1, &x[1], &c__1); + } + +/* Backward transformation y = Z'*y */ + +/* Computing MAX */ + i__1 = 1, i__2 = *n - *p + 1; + i__3 = max(1,*p); + i__4 = *lwork - *m - np; + _starpu_dormrq_("Left", "Transpose", p, &c__1, &np, &b[max(i__1, i__2)+ b_dim1], + ldb, &work[*m + 1], &y[1], &i__3, &work[*m + np + 1], &i__4, info); +/* Computing MAX */ + i__1 = lopt, i__2 = (integer) work[*m + np + 1]; + work[1] = (doublereal) (*m + np + max(i__1,i__2)); + + return 0; + +/* End of DGGGLM */ + +} /* _starpu_dggglm_ */ diff --git a/min-dgels/base/SRC/dgghrd.c b/min-dgels/base/SRC/dgghrd.c new file mode 100644 index 0000000..00722f5 --- /dev/null +++ b/min-dgels/base/SRC/dgghrd.c @@ -0,0 +1,329 @@ +/* dgghrd.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static doublereal c_b10 = 0.; +static doublereal c_b11 = 1.; +static integer c__1 = 1; + +/* Subroutine */ int _starpu_dgghrd_(char *compq, char *compz, integer *n, integer * + ilo, integer *ihi, doublereal *a, integer *lda, doublereal *b, + integer *ldb, doublereal *q, integer *ldq, doublereal *z__, integer * + ldz, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, q_dim1, q_offset, z_dim1, + z_offset, i__1, i__2, i__3; + + /* Local variables */ + doublereal c__, s; + logical ilq, ilz; + integer jcol; + doublereal temp; + extern /* Subroutine */ int _starpu_drot_(integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, doublereal *); + integer jrow; + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int _starpu_dlaset_(char *, integer *, integer *, + doublereal *, doublereal *, doublereal *, integer *), + _starpu_dlartg_(doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *), _starpu_xerbla_(char *, integer *); + integer icompq, icompz; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DGGHRD reduces a pair of real matrices (A,B) to generalized upper */ +/* Hessenberg form using orthogonal transformations, where A is a */ +/* general matrix and B is upper triangular. The form of the */ +/* generalized eigenvalue problem is */ +/* A*x = lambda*B*x, */ +/* and B is typically made upper triangular by computing its QR */ +/* factorization and moving the orthogonal matrix Q to the left side */ +/* of the equation. */ + +/* This subroutine simultaneously reduces A to a Hessenberg matrix H: */ +/* Q**T*A*Z = H */ +/* and transforms B to another upper triangular matrix T: */ +/* Q**T*B*Z = T */ +/* in order to reduce the problem to its standard form */ +/* H*y = lambda*T*y */ +/* where y = Z**T*x. */ + +/* The orthogonal matrices Q and Z are determined as products of Givens */ +/* rotations. They may either be formed explicitly, or they may be */ +/* postmultiplied into input matrices Q1 and Z1, so that */ + +/* Q1 * A * Z1**T = (Q1*Q) * H * (Z1*Z)**T */ + +/* Q1 * B * Z1**T = (Q1*Q) * T * (Z1*Z)**T */ + +/* If Q1 is the orthogonal matrix from the QR factorization of B in the */ +/* original equation A*x = lambda*B*x, then DGGHRD reduces the original */ +/* problem to generalized Hessenberg form. */ + +/* Arguments */ +/* ========= */ + +/* COMPQ (input) CHARACTER*1 */ +/* = 'N': do not compute Q; */ +/* = 'I': Q is initialized to the unit matrix, and the */ +/* orthogonal matrix Q is returned; */ +/* = 'V': Q must contain an orthogonal matrix Q1 on entry, */ +/* and the product Q1*Q is returned. */ + +/* COMPZ (input) CHARACTER*1 */ +/* = 'N': do not compute Z; */ +/* = 'I': Z is initialized to the unit matrix, and the */ +/* orthogonal matrix Z is returned; */ +/* = 'V': Z must contain an orthogonal matrix Z1 on entry, */ +/* and the product Z1*Z is returned. */ + +/* N (input) INTEGER */ +/* The order of the matrices A and B. N >= 0. */ + +/* ILO (input) INTEGER */ +/* IHI (input) INTEGER */ +/* ILO and IHI mark the rows and columns of A which are to be */ +/* reduced. It is assumed that A is already upper triangular */ +/* in rows and columns 1:ILO-1 and IHI+1:N. ILO and IHI are */ +/* normally set by a previous call to SGGBAL; otherwise they */ +/* should be set to 1 and N respectively. */ +/* 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA, N) */ +/* On entry, the N-by-N general matrix to be reduced. */ +/* On exit, the upper triangle and the first subdiagonal of A */ +/* are overwritten with the upper Hessenberg matrix H, and the */ +/* rest is set to zero. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,N). */ + +/* B (input/output) DOUBLE PRECISION array, dimension (LDB, N) */ +/* On entry, the N-by-N upper triangular matrix B. */ +/* On exit, the upper triangular matrix T = Q**T B Z. The */ +/* elements below the diagonal are set to zero. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the array B. LDB >= max(1,N). */ + +/* Q (input/output) DOUBLE PRECISION array, dimension (LDQ, N) */ +/* On entry, if COMPQ = 'V', the orthogonal matrix Q1, */ +/* typically from the QR factorization of B. */ +/* On exit, if COMPQ='I', the orthogonal matrix Q, and if */ +/* COMPQ = 'V', the product Q1*Q. */ +/* Not referenced if COMPQ='N'. */ + +/* LDQ (input) INTEGER */ +/* The leading dimension of the array Q. */ +/* LDQ >= N if COMPQ='V' or 'I'; LDQ >= 1 otherwise. */ + +/* Z (input/output) DOUBLE PRECISION array, dimension (LDZ, N) */ +/* On entry, if COMPZ = 'V', the orthogonal matrix Z1. */ +/* On exit, if COMPZ='I', the orthogonal matrix Z, and if */ +/* COMPZ = 'V', the product Z1*Z. */ +/* Not referenced if COMPZ='N'. */ + +/* LDZ (input) INTEGER */ +/* The leading dimension of the array Z. */ +/* LDZ >= N if COMPZ='V' or 'I'; LDZ >= 1 otherwise. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit. */ +/* < 0: if INFO = -i, the i-th argument had an illegal value. */ + +/* Further Details */ +/* =============== */ + +/* This routine reduces A to Hessenberg and B to triangular form by */ +/* an unblocked reduction, as described in _Matrix_Computations_, */ +/* by Golub and Van Loan (Johns Hopkins Press.) */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Decode COMPQ */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + + /* Function Body */ + if (_starpu_lsame_(compq, "N")) { + ilq = FALSE_; + icompq = 1; + } else if (_starpu_lsame_(compq, "V")) { + ilq = TRUE_; + icompq = 2; + } else if (_starpu_lsame_(compq, "I")) { + ilq = TRUE_; + icompq = 3; + } else { + icompq = 0; + } + +/* Decode COMPZ */ + + if (_starpu_lsame_(compz, "N")) { + ilz = FALSE_; + icompz = 1; + } else if (_starpu_lsame_(compz, "V")) { + ilz = TRUE_; + icompz = 2; + } else if (_starpu_lsame_(compz, "I")) { + ilz = TRUE_; + icompz = 3; + } else { + icompz = 0; + } + +/* Test the input parameters. */ + + *info = 0; + if (icompq <= 0) { + *info = -1; + } else if (icompz <= 0) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*ilo < 1) { + *info = -4; + } else if (*ihi > *n || *ihi < *ilo - 1) { + *info = -5; + } else if (*lda < max(1,*n)) { + *info = -7; + } else if (*ldb < max(1,*n)) { + *info = -9; + } else if (ilq && *ldq < *n || *ldq < 1) { + *info = -11; + } else if (ilz && *ldz < *n || *ldz < 1) { + *info = -13; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DGGHRD", &i__1); + return 0; + } + +/* Initialize Q and Z if desired. */ + + if (icompq == 3) { + _starpu_dlaset_("Full", n, n, &c_b10, &c_b11, &q[q_offset], ldq); + } + if (icompz == 3) { + _starpu_dlaset_("Full", n, n, &c_b10, &c_b11, &z__[z_offset], ldz); + } + +/* Quick return if possible */ + + if (*n <= 1) { + return 0; + } + +/* Zero out lower triangle of B */ + + i__1 = *n - 1; + for (jcol = 1; jcol <= i__1; ++jcol) { + i__2 = *n; + for (jrow = jcol + 1; jrow <= i__2; ++jrow) { + b[jrow + jcol * b_dim1] = 0.; +/* L10: */ + } +/* L20: */ + } + +/* Reduce A and B */ + + i__1 = *ihi - 2; + for (jcol = *ilo; jcol <= i__1; ++jcol) { + + i__2 = jcol + 2; + for (jrow = *ihi; jrow >= i__2; --jrow) { + +/* Step 1: rotate rows JROW-1, JROW to kill A(JROW,JCOL) */ + + temp = a[jrow - 1 + jcol * a_dim1]; + _starpu_dlartg_(&temp, &a[jrow + jcol * a_dim1], &c__, &s, &a[jrow - 1 + + jcol * a_dim1]); + a[jrow + jcol * a_dim1] = 0.; + i__3 = *n - jcol; + _starpu_drot_(&i__3, &a[jrow - 1 + (jcol + 1) * a_dim1], lda, &a[jrow + ( + jcol + 1) * a_dim1], lda, &c__, &s); + i__3 = *n + 2 - jrow; + _starpu_drot_(&i__3, &b[jrow - 1 + (jrow - 1) * b_dim1], ldb, &b[jrow + ( + jrow - 1) * b_dim1], ldb, &c__, &s); + if (ilq) { + _starpu_drot_(n, &q[(jrow - 1) * q_dim1 + 1], &c__1, &q[jrow * q_dim1 + + 1], &c__1, &c__, &s); + } + +/* Step 2: rotate columns JROW, JROW-1 to kill B(JROW,JROW-1) */ + + temp = b[jrow + jrow * b_dim1]; + _starpu_dlartg_(&temp, &b[jrow + (jrow - 1) * b_dim1], &c__, &s, &b[jrow + + jrow * b_dim1]); + b[jrow + (jrow - 1) * b_dim1] = 0.; + _starpu_drot_(ihi, &a[jrow * a_dim1 + 1], &c__1, &a[(jrow - 1) * a_dim1 + + 1], &c__1, &c__, &s); + i__3 = jrow - 1; + _starpu_drot_(&i__3, &b[jrow * b_dim1 + 1], &c__1, &b[(jrow - 1) * b_dim1 + + 1], &c__1, &c__, &s); + if (ilz) { + _starpu_drot_(n, &z__[jrow * z_dim1 + 1], &c__1, &z__[(jrow - 1) * + z_dim1 + 1], &c__1, &c__, &s); + } +/* L30: */ + } +/* L40: */ + } + + return 0; + +/* End of DGGHRD */ + +} /* _starpu_dgghrd_ */ diff --git a/min-dgels/base/SRC/dgglse.c b/min-dgels/base/SRC/dgglse.c new file mode 100644 index 0000000..9efc8c6 --- /dev/null +++ b/min-dgels/base/SRC/dgglse.c @@ -0,0 +1,340 @@ +/* dgglse.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; +static doublereal c_b31 = -1.; +static doublereal c_b33 = 1.; + +/* Subroutine */ int _starpu_dgglse_(integer *m, integer *n, integer *p, doublereal * + a, integer *lda, doublereal *b, integer *ldb, doublereal *c__, + doublereal *d__, doublereal *x, doublereal *work, integer *lwork, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2; + + /* Local variables */ + integer nb, mn, nr, nb1, nb2, nb3, nb4, lopt; + extern /* Subroutine */ int _starpu_dgemv_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *), _starpu_dcopy_(integer *, + doublereal *, integer *, doublereal *, integer *), _starpu_daxpy_(integer + *, doublereal *, doublereal *, integer *, doublereal *, integer *) + , _starpu_dtrmv_(char *, char *, char *, integer *, doublereal *, integer + *, doublereal *, integer *), _starpu_dggrqf_( + integer *, integer *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *, doublereal *, doublereal *, + integer *, integer *), _starpu_xerbla_(char *, integer *); + extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *); + integer lwkmin; + extern /* Subroutine */ int _starpu_dormqr_(char *, char *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *), + _starpu_dormrq_(char *, char *, integer *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *, integer *); + integer lwkopt; + logical lquery; + extern /* Subroutine */ int _starpu_dtrtrs_(char *, char *, char *, integer *, + integer *, doublereal *, integer *, doublereal *, integer *, + integer *); + + +/* -- LAPACK driver routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DGGLSE solves the linear equality-constrained least squares (LSE) */ +/* problem: */ + +/* minimize || c - A*x ||_2 subject to B*x = d */ + +/* where A is an M-by-N matrix, B is a P-by-N matrix, c is a given */ +/* M-vector, and d is a given P-vector. It is assumed that */ +/* P <= N <= M+P, and */ + +/* rank(B) = P and rank( (A) ) = N. */ +/* ( (B) ) */ + +/* These conditions ensure that the LSE problem has a unique solution, */ +/* which is obtained using a generalized RQ factorization of the */ +/* matrices (B, A) given by */ + +/* B = (0 R)*Q, A = Z*T*Q. */ + +/* Arguments */ +/* ========= */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix A. M >= 0. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrices A and B. N >= 0. */ + +/* P (input) INTEGER */ +/* The number of rows of the matrix B. 0 <= P <= N <= M+P. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the M-by-N matrix A. */ +/* On exit, the elements on and above the diagonal of the array */ +/* contain the min(M,N)-by-N upper trapezoidal matrix T. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,M). */ + +/* B (input/output) DOUBLE PRECISION array, dimension (LDB,N) */ +/* On entry, the P-by-N matrix B. */ +/* On exit, the upper triangle of the subarray B(1:P,N-P+1:N) */ +/* contains the P-by-P upper triangular matrix R. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the array B. LDB >= max(1,P). */ + +/* C (input/output) DOUBLE PRECISION array, dimension (M) */ +/* On entry, C contains the right hand side vector for the */ +/* least squares part of the LSE problem. */ +/* On exit, the residual sum of squares for the solution */ +/* is given by the sum of squares of elements N-P+1 to M of */ +/* vector C. */ + +/* D (input/output) DOUBLE PRECISION array, dimension (P) */ +/* On entry, D contains the right hand side vector for the */ +/* constrained equation. */ +/* On exit, D is destroyed. */ + +/* X (output) DOUBLE PRECISION array, dimension (N) */ +/* On exit, X is the solution of the LSE problem. */ + +/* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ + +/* LWORK (input) INTEGER */ +/* The dimension of the array WORK. LWORK >= max(1,M+N+P). */ +/* For optimum performance LWORK >= P+min(M,N)+max(M,N)*NB, */ +/* where NB is an upper bound for the optimal blocksizes for */ +/* DGEQRF, SGERQF, DORMQR and SORMRQ. */ + +/* If LWORK = -1, then a workspace query is assumed; the routine */ +/* only calculates the optimal size of the WORK array, returns */ +/* this value as the first entry of the WORK array, and no error */ +/* message related to LWORK is issued by XERBLA. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit. */ +/* < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* = 1: the upper triangular factor R associated with B in the */ +/* generalized RQ factorization of the pair (B, A) is */ +/* singular, so that rank(B) < P; the least squares */ +/* solution could not be computed. */ +/* = 2: the (N-P) by (N-P) part of the upper trapezoidal factor */ +/* T associated with A in the generalized RQ factorization */ +/* of the pair (B, A) is singular, so that */ +/* rank( (A) ) < N; the least squares solution could not */ +/* ( (B) ) */ +/* be computed. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + --c__; + --d__; + --x; + --work; + + /* Function Body */ + *info = 0; + mn = min(*m,*n); + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*p < 0 || *p > *n || *p < *n - *m) { + *info = -3; + } else if (*lda < max(1,*m)) { + *info = -5; + } else if (*ldb < max(1,*p)) { + *info = -7; + } + +/* Calculate workspace */ + + if (*info == 0) { + if (*n == 0) { + lwkmin = 1; + lwkopt = 1; + } else { + nb1 = _starpu_ilaenv_(&c__1, "DGEQRF", " ", m, n, &c_n1, &c_n1); + nb2 = _starpu_ilaenv_(&c__1, "DGERQF", " ", m, n, &c_n1, &c_n1); + nb3 = _starpu_ilaenv_(&c__1, "DORMQR", " ", m, n, p, &c_n1); + nb4 = _starpu_ilaenv_(&c__1, "DORMRQ", " ", m, n, p, &c_n1); +/* Computing MAX */ + i__1 = max(nb1,nb2), i__1 = max(i__1,nb3); + nb = max(i__1,nb4); + lwkmin = *m + *n + *p; + lwkopt = *p + mn + max(*m,*n) * nb; + } + work[1] = (doublereal) lwkopt; + + if (*lwork < lwkmin && ! lquery) { + *info = -12; + } + } + + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DGGLSE", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Compute the GRQ factorization of matrices B and A: */ + +/* B*Q' = ( 0 T12 ) P Z'*A*Q' = ( R11 R12 ) N-P */ +/* N-P P ( 0 R22 ) M+P-N */ +/* N-P P */ + +/* where T12 and R11 are upper triangular, and Q and Z are */ +/* orthogonal. */ + + i__1 = *lwork - *p - mn; + _starpu_dggrqf_(p, m, n, &b[b_offset], ldb, &work[1], &a[a_offset], lda, &work[*p + + 1], &work[*p + mn + 1], &i__1, info); + lopt = (integer) work[*p + mn + 1]; + +/* Update c = Z'*c = ( c1 ) N-P */ +/* ( c2 ) M+P-N */ + + i__1 = max(1,*m); + i__2 = *lwork - *p - mn; + _starpu_dormqr_("Left", "Transpose", m, &c__1, &mn, &a[a_offset], lda, &work[*p + + 1], &c__[1], &i__1, &work[*p + mn + 1], &i__2, info); +/* Computing MAX */ + i__1 = lopt, i__2 = (integer) work[*p + mn + 1]; + lopt = max(i__1,i__2); + +/* Solve T12*x2 = d for x2 */ + + if (*p > 0) { + _starpu_dtrtrs_("Upper", "No transpose", "Non-unit", p, &c__1, &b[(*n - *p + + 1) * b_dim1 + 1], ldb, &d__[1], p, info); + + if (*info > 0) { + *info = 1; + return 0; + } + +/* Put the solution in X */ + + _starpu_dcopy_(p, &d__[1], &c__1, &x[*n - *p + 1], &c__1); + +/* Update c1 */ + + i__1 = *n - *p; + _starpu_dgemv_("No transpose", &i__1, p, &c_b31, &a[(*n - *p + 1) * a_dim1 + + 1], lda, &d__[1], &c__1, &c_b33, &c__[1], &c__1); + } + +/* Solve R11*x1 = c1 for x1 */ + + if (*n > *p) { + i__1 = *n - *p; + i__2 = *n - *p; + _starpu_dtrtrs_("Upper", "No transpose", "Non-unit", &i__1, &c__1, &a[ + a_offset], lda, &c__[1], &i__2, info); + + if (*info > 0) { + *info = 2; + return 0; + } + +/* Put the solutions in X */ + + i__1 = *n - *p; + _starpu_dcopy_(&i__1, &c__[1], &c__1, &x[1], &c__1); + } + +/* Compute the residual vector: */ + + if (*m < *n) { + nr = *m + *p - *n; + if (nr > 0) { + i__1 = *n - *m; + _starpu_dgemv_("No transpose", &nr, &i__1, &c_b31, &a[*n - *p + 1 + (*m + + 1) * a_dim1], lda, &d__[nr + 1], &c__1, &c_b33, &c__[*n - + *p + 1], &c__1); + } + } else { + nr = *p; + } + if (nr > 0) { + _starpu_dtrmv_("Upper", "No transpose", "Non unit", &nr, &a[*n - *p + 1 + (*n + - *p + 1) * a_dim1], lda, &d__[1], &c__1); + _starpu_daxpy_(&nr, &c_b31, &d__[1], &c__1, &c__[*n - *p + 1], &c__1); + } + +/* Backward transformation x = Q'*x */ + + i__1 = *lwork - *p - mn; + _starpu_dormrq_("Left", "Transpose", n, &c__1, p, &b[b_offset], ldb, &work[1], &x[ + 1], n, &work[*p + mn + 1], &i__1, info); +/* Computing MAX */ + i__1 = lopt, i__2 = (integer) work[*p + mn + 1]; + work[1] = (doublereal) (*p + mn + max(i__1,i__2)); + + return 0; + +/* End of DGGLSE */ + +} /* _starpu_dgglse_ */ diff --git a/min-dgels/base/SRC/dggqrf.c b/min-dgels/base/SRC/dggqrf.c new file mode 100644 index 0000000..509c66c --- /dev/null +++ b/min-dgels/base/SRC/dggqrf.c @@ -0,0 +1,267 @@ +/* dggqrf.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; + +/* Subroutine */ int _starpu_dggqrf_(integer *n, integer *m, integer *p, doublereal * + a, integer *lda, doublereal *taua, doublereal *b, integer *ldb, + doublereal *taub, doublereal *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2; + + /* Local variables */ + integer nb, nb1, nb2, nb3, lopt; + extern /* Subroutine */ int _starpu_dgeqrf_(integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, integer *), + _starpu_dgerqf_(integer *, integer *, doublereal *, integer *, doublereal + *, doublereal *, integer *, integer *), _starpu_xerbla_(char *, integer *); + extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *); + extern /* Subroutine */ int _starpu_dormqr_(char *, char *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *); + integer lwkopt; + logical lquery; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DGGQRF computes a generalized QR factorization of an N-by-M matrix A */ +/* and an N-by-P matrix B: */ + +/* A = Q*R, B = Q*T*Z, */ + +/* where Q is an N-by-N orthogonal matrix, Z is a P-by-P orthogonal */ +/* matrix, and R and T assume one of the forms: */ + +/* if N >= M, R = ( R11 ) M , or if N < M, R = ( R11 R12 ) N, */ +/* ( 0 ) N-M N M-N */ +/* M */ + +/* where R11 is upper triangular, and */ + +/* if N <= P, T = ( 0 T12 ) N, or if N > P, T = ( T11 ) N-P, */ +/* P-N N ( T21 ) P */ +/* P */ + +/* where T12 or T21 is upper triangular. */ + +/* In particular, if B is square and nonsingular, the GQR factorization */ +/* of A and B implicitly gives the QR factorization of inv(B)*A: */ + +/* inv(B)*A = Z'*(inv(T)*R) */ + +/* where inv(B) denotes the inverse of the matrix B, and Z' denotes the */ +/* transpose of the matrix Z. */ + +/* Arguments */ +/* ========= */ + +/* N (input) INTEGER */ +/* The number of rows of the matrices A and B. N >= 0. */ + +/* M (input) INTEGER */ +/* The number of columns of the matrix A. M >= 0. */ + +/* P (input) INTEGER */ +/* The number of columns of the matrix B. P >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,M) */ +/* On entry, the N-by-M matrix A. */ +/* On exit, the elements on and above the diagonal of the array */ +/* contain the min(N,M)-by-M upper trapezoidal matrix R (R is */ +/* upper triangular if N >= M); the elements below the diagonal, */ +/* with the array TAUA, represent the orthogonal matrix Q as a */ +/* product of min(N,M) elementary reflectors (see Further */ +/* Details). */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,N). */ + +/* TAUA (output) DOUBLE PRECISION array, dimension (min(N,M)) */ +/* The scalar factors of the elementary reflectors which */ +/* represent the orthogonal matrix Q (see Further Details). */ + +/* B (input/output) DOUBLE PRECISION array, dimension (LDB,P) */ +/* On entry, the N-by-P matrix B. */ +/* On exit, if N <= P, the upper triangle of the subarray */ +/* B(1:N,P-N+1:P) contains the N-by-N upper triangular matrix T; */ +/* if N > P, the elements on and above the (N-P)-th subdiagonal */ +/* contain the N-by-P upper trapezoidal matrix T; the remaining */ +/* elements, with the array TAUB, represent the orthogonal */ +/* matrix Z as a product of elementary reflectors (see Further */ +/* Details). */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the array B. LDB >= max(1,N). */ + +/* TAUB (output) DOUBLE PRECISION array, dimension (min(N,P)) */ +/* The scalar factors of the elementary reflectors which */ +/* represent the orthogonal matrix Z (see Further Details). */ + +/* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ + +/* LWORK (input) INTEGER */ +/* The dimension of the array WORK. LWORK >= max(1,N,M,P). */ +/* For optimum performance LWORK >= max(N,M,P)*max(NB1,NB2,NB3), */ +/* where NB1 is the optimal blocksize for the QR factorization */ +/* of an N-by-M matrix, NB2 is the optimal blocksize for the */ +/* RQ factorization of an N-by-P matrix, and NB3 is the optimal */ +/* blocksize for a call of DORMQR. */ + +/* If LWORK = -1, then a workspace query is assumed; the routine */ +/* only calculates the optimal size of the WORK array, returns */ +/* this value as the first entry of the WORK array, and no error */ +/* message related to LWORK is issued by XERBLA. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value. */ + +/* Further Details */ +/* =============== */ + +/* The matrix Q is represented as a product of elementary reflectors */ + +/* Q = H(1) H(2) . . . H(k), where k = min(n,m). */ + +/* Each H(i) has the form */ + +/* H(i) = I - taua * v * v' */ + +/* where taua is a real scalar, and v is a real vector with */ +/* v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i+1:n,i), */ +/* and taua in TAUA(i). */ +/* To form Q explicitly, use LAPACK subroutine DORGQR. */ +/* To use Q to update another matrix, use LAPACK subroutine DORMQR. */ + +/* The matrix Z is represented as a product of elementary reflectors */ + +/* Z = H(1) H(2) . . . H(k), where k = min(n,p). */ + +/* Each H(i) has the form */ + +/* H(i) = I - taub * v * v' */ + +/* where taub is a real scalar, and v is a real vector with */ +/* v(p-k+i+1:p) = 0 and v(p-k+i) = 1; v(1:p-k+i-1) is stored on exit in */ +/* B(n-k+i,1:p-k+i-1), and taub in TAUB(i). */ +/* To form Z explicitly, use LAPACK subroutine DORGRQ. */ +/* To use Z to update another matrix, use LAPACK subroutine DORMRQ. */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --taua; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + --taub; + --work; + + /* Function Body */ + *info = 0; + nb1 = _starpu_ilaenv_(&c__1, "DGEQRF", " ", n, m, &c_n1, &c_n1); + nb2 = _starpu_ilaenv_(&c__1, "DGERQF", " ", n, p, &c_n1, &c_n1); + nb3 = _starpu_ilaenv_(&c__1, "DORMQR", " ", n, m, p, &c_n1); +/* Computing MAX */ + i__1 = max(nb1,nb2); + nb = max(i__1,nb3); +/* Computing MAX */ + i__1 = max(*n,*m); + lwkopt = max(i__1,*p) * nb; + work[1] = (doublereal) lwkopt; + lquery = *lwork == -1; + if (*n < 0) { + *info = -1; + } else if (*m < 0) { + *info = -2; + } else if (*p < 0) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } else if (*ldb < max(1,*n)) { + *info = -8; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = max(1,*n), i__1 = max(i__1,*m); + if (*lwork < max(i__1,*p) && ! lquery) { + *info = -11; + } + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DGGQRF", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* QR factorization of N-by-M matrix A: A = Q*R */ + + _starpu_dgeqrf_(n, m, &a[a_offset], lda, &taua[1], &work[1], lwork, info); + lopt = (integer) work[1]; + +/* Update B := Q'*B. */ + + i__1 = min(*n,*m); + _starpu_dormqr_("Left", "Transpose", n, p, &i__1, &a[a_offset], lda, &taua[1], &b[ + b_offset], ldb, &work[1], lwork, info); +/* Computing MAX */ + i__1 = lopt, i__2 = (integer) work[1]; + lopt = max(i__1,i__2); + +/* RQ factorization of N-by-P matrix B: B = T*Z. */ + + _starpu_dgerqf_(n, p, &b[b_offset], ldb, &taub[1], &work[1], lwork, info); +/* Computing MAX */ + i__1 = lopt, i__2 = (integer) work[1]; + work[1] = (doublereal) max(i__1,i__2); + + return 0; + +/* End of DGGQRF */ + +} /* _starpu_dggqrf_ */ diff --git a/min-dgels/base/SRC/dggrqf.c b/min-dgels/base/SRC/dggrqf.c new file mode 100644 index 0000000..daab61c --- /dev/null +++ b/min-dgels/base/SRC/dggrqf.c @@ -0,0 +1,268 @@ +/* dggrqf.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; + +/* Subroutine */ int _starpu_dggrqf_(integer *m, integer *p, integer *n, doublereal * + a, integer *lda, doublereal *taua, doublereal *b, integer *ldb, + doublereal *taub, doublereal *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3; + + /* Local variables */ + integer nb, nb1, nb2, nb3, lopt; + extern /* Subroutine */ int _starpu_dgeqrf_(integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, integer *), + _starpu_dgerqf_(integer *, integer *, doublereal *, integer *, doublereal + *, doublereal *, integer *, integer *), _starpu_xerbla_(char *, integer *); + extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *); + extern /* Subroutine */ int _starpu_dormrq_(char *, char *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *); + integer lwkopt; + logical lquery; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DGGRQF computes a generalized RQ factorization of an M-by-N matrix A */ +/* and a P-by-N matrix B: */ + +/* A = R*Q, B = Z*T*Q, */ + +/* where Q is an N-by-N orthogonal matrix, Z is a P-by-P orthogonal */ +/* matrix, and R and T assume one of the forms: */ + +/* if M <= N, R = ( 0 R12 ) M, or if M > N, R = ( R11 ) M-N, */ +/* N-M M ( R21 ) N */ +/* N */ + +/* where R12 or R21 is upper triangular, and */ + +/* if P >= N, T = ( T11 ) N , or if P < N, T = ( T11 T12 ) P, */ +/* ( 0 ) P-N P N-P */ +/* N */ + +/* where T11 is upper triangular. */ + +/* In particular, if B is square and nonsingular, the GRQ factorization */ +/* of A and B implicitly gives the RQ factorization of A*inv(B): */ + +/* A*inv(B) = (R*inv(T))*Z' */ + +/* where inv(B) denotes the inverse of the matrix B, and Z' denotes the */ +/* transpose of the matrix Z. */ + +/* Arguments */ +/* ========= */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix A. M >= 0. */ + +/* P (input) INTEGER */ +/* The number of rows of the matrix B. P >= 0. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrices A and B. N >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the M-by-N matrix A. */ +/* On exit, if M <= N, the upper triangle of the subarray */ +/* A(1:M,N-M+1:N) contains the M-by-M upper triangular matrix R; */ +/* if M > N, the elements on and above the (M-N)-th subdiagonal */ +/* contain the M-by-N upper trapezoidal matrix R; the remaining */ +/* elements, with the array TAUA, represent the orthogonal */ +/* matrix Q as a product of elementary reflectors (see Further */ +/* Details). */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,M). */ + +/* TAUA (output) DOUBLE PRECISION array, dimension (min(M,N)) */ +/* The scalar factors of the elementary reflectors which */ +/* represent the orthogonal matrix Q (see Further Details). */ + +/* B (input/output) DOUBLE PRECISION array, dimension (LDB,N) */ +/* On entry, the P-by-N matrix B. */ +/* On exit, the elements on and above the diagonal of the array */ +/* contain the min(P,N)-by-N upper trapezoidal matrix T (T is */ +/* upper triangular if P >= N); the elements below the diagonal, */ +/* with the array TAUB, represent the orthogonal matrix Z as a */ +/* product of elementary reflectors (see Further Details). */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the array B. LDB >= max(1,P). */ + +/* TAUB (output) DOUBLE PRECISION array, dimension (min(P,N)) */ +/* The scalar factors of the elementary reflectors which */ +/* represent the orthogonal matrix Z (see Further Details). */ + +/* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ + +/* LWORK (input) INTEGER */ +/* The dimension of the array WORK. LWORK >= max(1,N,M,P). */ +/* For optimum performance LWORK >= max(N,M,P)*max(NB1,NB2,NB3), */ +/* where NB1 is the optimal blocksize for the RQ factorization */ +/* of an M-by-N matrix, NB2 is the optimal blocksize for the */ +/* QR factorization of a P-by-N matrix, and NB3 is the optimal */ +/* blocksize for a call of DORMRQ. */ + +/* If LWORK = -1, then a workspace query is assumed; the routine */ +/* only calculates the optimal size of the WORK array, returns */ +/* this value as the first entry of the WORK array, and no error */ +/* message related to LWORK is issued by XERBLA. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INF0= -i, the i-th argument had an illegal value. */ + +/* Further Details */ +/* =============== */ + +/* The matrix Q is represented as a product of elementary reflectors */ + +/* Q = H(1) H(2) . . . H(k), where k = min(m,n). */ + +/* Each H(i) has the form */ + +/* H(i) = I - taua * v * v' */ + +/* where taua is a real scalar, and v is a real vector with */ +/* v(n-k+i+1:n) = 0 and v(n-k+i) = 1; v(1:n-k+i-1) is stored on exit in */ +/* A(m-k+i,1:n-k+i-1), and taua in TAUA(i). */ +/* To form Q explicitly, use LAPACK subroutine DORGRQ. */ +/* To use Q to update another matrix, use LAPACK subroutine DORMRQ. */ + +/* The matrix Z is represented as a product of elementary reflectors */ + +/* Z = H(1) H(2) . . . H(k), where k = min(p,n). */ + +/* Each H(i) has the form */ + +/* H(i) = I - taub * v * v' */ + +/* where taub is a real scalar, and v is a real vector with */ +/* v(1:i-1) = 0 and v(i) = 1; v(i+1:p) is stored on exit in B(i+1:p,i), */ +/* and taub in TAUB(i). */ +/* To form Z explicitly, use LAPACK subroutine DORGQR. */ +/* To use Z to update another matrix, use LAPACK subroutine DORMQR. */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --taua; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + --taub; + --work; + + /* Function Body */ + *info = 0; + nb1 = _starpu_ilaenv_(&c__1, "DGERQF", " ", m, n, &c_n1, &c_n1); + nb2 = _starpu_ilaenv_(&c__1, "DGEQRF", " ", p, n, &c_n1, &c_n1); + nb3 = _starpu_ilaenv_(&c__1, "DORMRQ", " ", m, n, p, &c_n1); +/* Computing MAX */ + i__1 = max(nb1,nb2); + nb = max(i__1,nb3); +/* Computing MAX */ + i__1 = max(*n,*m); + lwkopt = max(i__1,*p) * nb; + work[1] = (doublereal) lwkopt; + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*p < 0) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < max(1,*m)) { + *info = -5; + } else if (*ldb < max(1,*p)) { + *info = -8; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = max(1,*m), i__1 = max(i__1,*p); + if (*lwork < max(i__1,*n) && ! lquery) { + *info = -11; + } + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DGGRQF", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* RQ factorization of M-by-N matrix A: A = R*Q */ + + _starpu_dgerqf_(m, n, &a[a_offset], lda, &taua[1], &work[1], lwork, info); + lopt = (integer) work[1]; + +/* Update B := B*Q' */ + + i__1 = min(*m,*n); +/* Computing MAX */ + i__2 = 1, i__3 = *m - *n + 1; + _starpu_dormrq_("Right", "Transpose", p, n, &i__1, &a[max(i__2, i__3)+ a_dim1], + lda, &taua[1], &b[b_offset], ldb, &work[1], lwork, info); +/* Computing MAX */ + i__1 = lopt, i__2 = (integer) work[1]; + lopt = max(i__1,i__2); + +/* QR factorization of P-by-N matrix B: B = Z*T */ + + _starpu_dgeqrf_(p, n, &b[b_offset], ldb, &taub[1], &work[1], lwork, info); +/* Computing MAX */ + i__1 = lopt, i__2 = (integer) work[1]; + work[1] = (doublereal) max(i__1,i__2); + + return 0; + +/* End of DGGRQF */ + +} /* _starpu_dggrqf_ */ diff --git a/min-dgels/base/SRC/dggsvd.c b/min-dgels/base/SRC/dggsvd.c new file mode 100644 index 0000000..62cad68 --- /dev/null +++ b/min-dgels/base/SRC/dggsvd.c @@ -0,0 +1,405 @@ +/* dggsvd.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +/* Subroutine */ int _starpu_dggsvd_(char *jobu, char *jobv, char *jobq, integer *m, + integer *n, integer *p, integer *k, integer *l, doublereal *a, + integer *lda, doublereal *b, integer *ldb, doublereal *alpha, + doublereal *beta, doublereal *u, integer *ldu, doublereal *v, integer + *ldv, doublereal *q, integer *ldq, doublereal *work, integer *iwork, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, q_dim1, q_offset, u_dim1, + u_offset, v_dim1, v_offset, i__1, i__2; + + /* Local variables */ + integer i__, j; + doublereal ulp; + integer ibnd; + doublereal tola; + integer isub; + doublereal tolb, unfl, temp, smax; + extern logical _starpu_lsame_(char *, char *); + doublereal anorm, bnorm; + extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *); + logical wantq, wantu, wantv; + extern doublereal _starpu_dlamch_(char *), _starpu_dlange_(char *, integer *, + integer *, doublereal *, integer *, doublereal *); + extern /* Subroutine */ int _starpu_dtgsja_(char *, char *, char *, integer *, + integer *, integer *, integer *, integer *, doublereal *, integer + *, doublereal *, integer *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, integer *, doublereal *, integer *, + integer *); + integer ncycle; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *), _starpu_dggsvp_( + char *, char *, char *, integer *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, integer *, integer *, + doublereal *, doublereal *, integer *); + + +/* -- LAPACK driver routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DGGSVD computes the generalized singular value decomposition (GSVD) */ +/* of an M-by-N real matrix A and P-by-N real matrix B: */ + +/* U'*A*Q = D1*( 0 R ), V'*B*Q = D2*( 0 R ) */ + +/* where U, V and Q are orthogonal matrices, and Z' is the transpose */ +/* of Z. Let K+L = the effective numerical rank of the matrix (A',B')', */ +/* then R is a K+L-by-K+L nonsingular upper triangular matrix, D1 and */ +/* D2 are M-by-(K+L) and P-by-(K+L) "diagonal" matrices and of the */ +/* following structures, respectively: */ + +/* If M-K-L >= 0, */ + +/* K L */ +/* D1 = K ( I 0 ) */ +/* L ( 0 C ) */ +/* M-K-L ( 0 0 ) */ + +/* K L */ +/* D2 = L ( 0 S ) */ +/* P-L ( 0 0 ) */ + +/* N-K-L K L */ +/* ( 0 R ) = K ( 0 R11 R12 ) */ +/* L ( 0 0 R22 ) */ + +/* where */ + +/* C = diag( ALPHA(K+1), ... , ALPHA(K+L) ), */ +/* S = diag( BETA(K+1), ... , BETA(K+L) ), */ +/* C**2 + S**2 = I. */ + +/* R is stored in A(1:K+L,N-K-L+1:N) on exit. */ + +/* If M-K-L < 0, */ + +/* K M-K K+L-M */ +/* D1 = K ( I 0 0 ) */ +/* M-K ( 0 C 0 ) */ + +/* K M-K K+L-M */ +/* D2 = M-K ( 0 S 0 ) */ +/* K+L-M ( 0 0 I ) */ +/* P-L ( 0 0 0 ) */ + +/* N-K-L K M-K K+L-M */ +/* ( 0 R ) = K ( 0 R11 R12 R13 ) */ +/* M-K ( 0 0 R22 R23 ) */ +/* K+L-M ( 0 0 0 R33 ) */ + +/* where */ + +/* C = diag( ALPHA(K+1), ... , ALPHA(M) ), */ +/* S = diag( BETA(K+1), ... , BETA(M) ), */ +/* C**2 + S**2 = I. */ + +/* (R11 R12 R13 ) is stored in A(1:M, N-K-L+1:N), and R33 is stored */ +/* ( 0 R22 R23 ) */ +/* in B(M-K+1:L,N+M-K-L+1:N) on exit. */ + +/* The routine computes C, S, R, and optionally the orthogonal */ +/* transformation matrices U, V and Q. */ + +/* In particular, if B is an N-by-N nonsingular matrix, then the GSVD of */ +/* A and B implicitly gives the SVD of A*inv(B): */ +/* A*inv(B) = U*(D1*inv(D2))*V'. */ +/* If ( A',B')' has orthonormal columns, then the GSVD of A and B is */ +/* also equal to the CS decomposition of A and B. Furthermore, the GSVD */ +/* can be used to derive the solution of the eigenvalue problem: */ +/* A'*A x = lambda* B'*B x. */ +/* In some literature, the GSVD of A and B is presented in the form */ +/* U'*A*X = ( 0 D1 ), V'*B*X = ( 0 D2 ) */ +/* where U and V are orthogonal and X is nonsingular, D1 and D2 are */ +/* ``diagonal''. The former GSVD form can be converted to the latter */ +/* form by taking the nonsingular matrix X as */ + +/* X = Q*( I 0 ) */ +/* ( 0 inv(R) ). */ + +/* Arguments */ +/* ========= */ + +/* JOBU (input) CHARACTER*1 */ +/* = 'U': Orthogonal matrix U is computed; */ +/* = 'N': U is not computed. */ + +/* JOBV (input) CHARACTER*1 */ +/* = 'V': Orthogonal matrix V is computed; */ +/* = 'N': V is not computed. */ + +/* JOBQ (input) CHARACTER*1 */ +/* = 'Q': Orthogonal matrix Q is computed; */ +/* = 'N': Q is not computed. */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix A. M >= 0. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrices A and B. N >= 0. */ + +/* P (input) INTEGER */ +/* The number of rows of the matrix B. P >= 0. */ + +/* K (output) INTEGER */ +/* L (output) INTEGER */ +/* On exit, K and L specify the dimension of the subblocks */ +/* described in the Purpose section. */ +/* K + L = effective numerical rank of (A',B')'. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the M-by-N matrix A. */ +/* On exit, A contains the triangular matrix R, or part of R. */ +/* See Purpose for details. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,M). */ + +/* B (input/output) DOUBLE PRECISION array, dimension (LDB,N) */ +/* On entry, the P-by-N matrix B. */ +/* On exit, B contains the triangular matrix R if M-K-L < 0. */ +/* See Purpose for details. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the array B. LDB >= max(1,P). */ + +/* ALPHA (output) DOUBLE PRECISION array, dimension (N) */ +/* BETA (output) DOUBLE PRECISION array, dimension (N) */ +/* On exit, ALPHA and BETA contain the generalized singular */ +/* value pairs of A and B; */ +/* ALPHA(1:K) = 1, */ +/* BETA(1:K) = 0, */ +/* and if M-K-L >= 0, */ +/* ALPHA(K+1:K+L) = C, */ +/* BETA(K+1:K+L) = S, */ +/* or if M-K-L < 0, */ +/* ALPHA(K+1:M)=C, ALPHA(M+1:K+L)=0 */ +/* BETA(K+1:M) =S, BETA(M+1:K+L) =1 */ +/* and */ +/* ALPHA(K+L+1:N) = 0 */ +/* BETA(K+L+1:N) = 0 */ + +/* U (output) DOUBLE PRECISION array, dimension (LDU,M) */ +/* If JOBU = 'U', U contains the M-by-M orthogonal matrix U. */ +/* If JOBU = 'N', U is not referenced. */ + +/* LDU (input) INTEGER */ +/* The leading dimension of the array U. LDU >= max(1,M) if */ +/* JOBU = 'U'; LDU >= 1 otherwise. */ + +/* V (output) DOUBLE PRECISION array, dimension (LDV,P) */ +/* If JOBV = 'V', V contains the P-by-P orthogonal matrix V. */ +/* If JOBV = 'N', V is not referenced. */ + +/* LDV (input) INTEGER */ +/* The leading dimension of the array V. LDV >= max(1,P) if */ +/* JOBV = 'V'; LDV >= 1 otherwise. */ + +/* Q (output) DOUBLE PRECISION array, dimension (LDQ,N) */ +/* If JOBQ = 'Q', Q contains the N-by-N orthogonal matrix Q. */ +/* If JOBQ = 'N', Q is not referenced. */ + +/* LDQ (input) INTEGER */ +/* The leading dimension of the array Q. LDQ >= max(1,N) if */ +/* JOBQ = 'Q'; LDQ >= 1 otherwise. */ + +/* WORK (workspace) DOUBLE PRECISION array, */ +/* dimension (max(3*N,M,P)+N) */ + +/* IWORK (workspace/output) INTEGER array, dimension (N) */ +/* On exit, IWORK stores the sorting information. More */ +/* precisely, the following loop will sort ALPHA */ +/* for I = K+1, min(M,K+L) */ +/* swap ALPHA(I) and ALPHA(IWORK(I)) */ +/* endfor */ +/* such that ALPHA(1) >= ALPHA(2) >= ... >= ALPHA(N). */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > 0: if INFO = 1, the Jacobi-type procedure failed to */ +/* converge. For further details, see subroutine DTGSJA. */ + +/* Internal Parameters */ +/* =================== */ + +/* TOLA DOUBLE PRECISION */ +/* TOLB DOUBLE PRECISION */ +/* TOLA and TOLB are the thresholds to determine the effective */ +/* rank of (A',B')'. Generally, they are set to */ +/* TOLA = MAX(M,N)*norm(A)*MAZHEPS, */ +/* TOLB = MAX(P,N)*norm(B)*MAZHEPS. */ +/* The size of TOLA and TOLB may affect the size of backward */ +/* errors of the decomposition. */ + +/* Further Details */ +/* =============== */ + +/* 2-96 Based on modifications by */ +/* Ming Gu and Huan Ren, Computer Science Division, University of */ +/* California at Berkeley, USA */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + --alpha; + --beta; + u_dim1 = *ldu; + u_offset = 1 + u_dim1; + u -= u_offset; + v_dim1 = *ldv; + v_offset = 1 + v_dim1; + v -= v_offset; + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + --work; + --iwork; + + /* Function Body */ + wantu = _starpu_lsame_(jobu, "U"); + wantv = _starpu_lsame_(jobv, "V"); + wantq = _starpu_lsame_(jobq, "Q"); + + *info = 0; + if (! (wantu || _starpu_lsame_(jobu, "N"))) { + *info = -1; + } else if (! (wantv || _starpu_lsame_(jobv, "N"))) { + *info = -2; + } else if (! (wantq || _starpu_lsame_(jobq, "N"))) { + *info = -3; + } else if (*m < 0) { + *info = -4; + } else if (*n < 0) { + *info = -5; + } else if (*p < 0) { + *info = -6; + } else if (*lda < max(1,*m)) { + *info = -10; + } else if (*ldb < max(1,*p)) { + *info = -12; + } else if (*ldu < 1 || wantu && *ldu < *m) { + *info = -16; + } else if (*ldv < 1 || wantv && *ldv < *p) { + *info = -18; + } else if (*ldq < 1 || wantq && *ldq < *n) { + *info = -20; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DGGSVD", &i__1); + return 0; + } + +/* Compute the Frobenius norm of matrices A and B */ + + anorm = _starpu_dlange_("1", m, n, &a[a_offset], lda, &work[1]); + bnorm = _starpu_dlange_("1", p, n, &b[b_offset], ldb, &work[1]); + +/* Get machine precision and set up threshold for determining */ +/* the effective numerical rank of the matrices A and B. */ + + ulp = _starpu_dlamch_("Precision"); + unfl = _starpu_dlamch_("Safe Minimum"); + tola = max(*m,*n) * max(anorm,unfl) * ulp; + tolb = max(*p,*n) * max(bnorm,unfl) * ulp; + +/* Preprocessing */ + + _starpu_dggsvp_(jobu, jobv, jobq, m, p, n, &a[a_offset], lda, &b[b_offset], ldb, & + tola, &tolb, k, l, &u[u_offset], ldu, &v[v_offset], ldv, &q[ + q_offset], ldq, &iwork[1], &work[1], &work[*n + 1], info); + +/* Compute the GSVD of two upper "triangular" matrices */ + + _starpu_dtgsja_(jobu, jobv, jobq, m, p, n, k, l, &a[a_offset], lda, &b[b_offset], + ldb, &tola, &tolb, &alpha[1], &beta[1], &u[u_offset], ldu, &v[ + v_offset], ldv, &q[q_offset], ldq, &work[1], &ncycle, info); + +/* Sort the singular values and store the pivot indices in IWORK */ +/* Copy ALPHA to WORK, then sort ALPHA in WORK */ + + _starpu_dcopy_(n, &alpha[1], &c__1, &work[1], &c__1); +/* Computing MIN */ + i__1 = *l, i__2 = *m - *k; + ibnd = min(i__1,i__2); + i__1 = ibnd; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Scan for largest ALPHA(K+I) */ + + isub = i__; + smax = work[*k + i__]; + i__2 = ibnd; + for (j = i__ + 1; j <= i__2; ++j) { + temp = work[*k + j]; + if (temp > smax) { + isub = j; + smax = temp; + } +/* L10: */ + } + if (isub != i__) { + work[*k + isub] = work[*k + i__]; + work[*k + i__] = smax; + iwork[*k + i__] = *k + isub; + } else { + iwork[*k + i__] = *k + i__; + } +/* L20: */ + } + + return 0; + +/* End of DGGSVD */ + +} /* _starpu_dggsvd_ */ diff --git a/min-dgels/base/SRC/dggsvp.c b/min-dgels/base/SRC/dggsvp.c new file mode 100644 index 0000000..b1d1ca4 --- /dev/null +++ b/min-dgels/base/SRC/dggsvp.c @@ -0,0 +1,512 @@ +/* dggsvp.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static doublereal c_b12 = 0.; +static doublereal c_b22 = 1.; + +/* Subroutine */ int _starpu_dggsvp_(char *jobu, char *jobv, char *jobq, integer *m, + integer *p, integer *n, doublereal *a, integer *lda, doublereal *b, + integer *ldb, doublereal *tola, doublereal *tolb, integer *k, integer + *l, doublereal *u, integer *ldu, doublereal *v, integer *ldv, + doublereal *q, integer *ldq, integer *iwork, doublereal *tau, + doublereal *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, q_dim1, q_offset, u_dim1, + u_offset, v_dim1, v_offset, i__1, i__2, i__3; + doublereal d__1; + + /* Local variables */ + integer i__, j; + extern logical _starpu_lsame_(char *, char *); + logical wantq, wantu, wantv; + extern /* Subroutine */ int _starpu_dgeqr2_(integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *), _starpu_dgerq2_( + integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *), _starpu_dorg2r_(integer *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *), + _starpu_dorm2r_(char *, char *, integer *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *), _starpu_dormr2_(char *, char *, + integer *, integer *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *), _starpu_dgeqpf_(integer *, integer *, doublereal *, + integer *, integer *, doublereal *, doublereal *, integer *), + _starpu_dlacpy_(char *, integer *, integer *, doublereal *, integer *, + doublereal *, integer *), _starpu_dlaset_(char *, integer *, + integer *, doublereal *, doublereal *, doublereal *, integer *), _starpu_xerbla_(char *, integer *), _starpu_dlapmt_(logical *, + integer *, integer *, doublereal *, integer *, integer *); + logical forwrd; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DGGSVP computes orthogonal matrices U, V and Q such that */ + +/* N-K-L K L */ +/* U'*A*Q = K ( 0 A12 A13 ) if M-K-L >= 0; */ +/* L ( 0 0 A23 ) */ +/* M-K-L ( 0 0 0 ) */ + +/* N-K-L K L */ +/* = K ( 0 A12 A13 ) if M-K-L < 0; */ +/* M-K ( 0 0 A23 ) */ + +/* N-K-L K L */ +/* V'*B*Q = L ( 0 0 B13 ) */ +/* P-L ( 0 0 0 ) */ + +/* where the K-by-K matrix A12 and L-by-L matrix B13 are nonsingular */ +/* upper triangular; A23 is L-by-L upper triangular if M-K-L >= 0, */ +/* otherwise A23 is (M-K)-by-L upper trapezoidal. K+L = the effective */ +/* numerical rank of the (M+P)-by-N matrix (A',B')'. Z' denotes the */ +/* transpose of Z. */ + +/* This decomposition is the preprocessing step for computing the */ +/* Generalized Singular Value Decomposition (GSVD), see subroutine */ +/* DGGSVD. */ + +/* Arguments */ +/* ========= */ + +/* JOBU (input) CHARACTER*1 */ +/* = 'U': Orthogonal matrix U is computed; */ +/* = 'N': U is not computed. */ + +/* JOBV (input) CHARACTER*1 */ +/* = 'V': Orthogonal matrix V is computed; */ +/* = 'N': V is not computed. */ + +/* JOBQ (input) CHARACTER*1 */ +/* = 'Q': Orthogonal matrix Q is computed; */ +/* = 'N': Q is not computed. */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix A. M >= 0. */ + +/* P (input) INTEGER */ +/* The number of rows of the matrix B. P >= 0. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrices A and B. N >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the M-by-N matrix A. */ +/* On exit, A contains the triangular (or trapezoidal) matrix */ +/* described in the Purpose section. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,M). */ + +/* B (input/output) DOUBLE PRECISION array, dimension (LDB,N) */ +/* On entry, the P-by-N matrix B. */ +/* On exit, B contains the triangular matrix described in */ +/* the Purpose section. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the array B. LDB >= max(1,P). */ + +/* TOLA (input) DOUBLE PRECISION */ +/* TOLB (input) DOUBLE PRECISION */ +/* TOLA and TOLB are the thresholds to determine the effective */ +/* numerical rank of matrix B and a subblock of A. Generally, */ +/* they are set to */ +/* TOLA = MAX(M,N)*norm(A)*MAZHEPS, */ +/* TOLB = MAX(P,N)*norm(B)*MAZHEPS. */ +/* The size of TOLA and TOLB may affect the size of backward */ +/* errors of the decomposition. */ + +/* K (output) INTEGER */ +/* L (output) INTEGER */ +/* On exit, K and L specify the dimension of the subblocks */ +/* described in Purpose. */ +/* K + L = effective numerical rank of (A',B')'. */ + +/* U (output) DOUBLE PRECISION array, dimension (LDU,M) */ +/* If JOBU = 'U', U contains the orthogonal matrix U. */ +/* If JOBU = 'N', U is not referenced. */ + +/* LDU (input) INTEGER */ +/* The leading dimension of the array U. LDU >= max(1,M) if */ +/* JOBU = 'U'; LDU >= 1 otherwise. */ + +/* V (output) DOUBLE PRECISION array, dimension (LDV,P) */ +/* If JOBV = 'V', V contains the orthogonal matrix V. */ +/* If JOBV = 'N', V is not referenced. */ + +/* LDV (input) INTEGER */ +/* The leading dimension of the array V. LDV >= max(1,P) if */ +/* JOBV = 'V'; LDV >= 1 otherwise. */ + +/* Q (output) DOUBLE PRECISION array, dimension (LDQ,N) */ +/* If JOBQ = 'Q', Q contains the orthogonal matrix Q. */ +/* If JOBQ = 'N', Q is not referenced. */ + +/* LDQ (input) INTEGER */ +/* The leading dimension of the array Q. LDQ >= max(1,N) if */ +/* JOBQ = 'Q'; LDQ >= 1 otherwise. */ + +/* IWORK (workspace) INTEGER array, dimension (N) */ + +/* TAU (workspace) DOUBLE PRECISION array, dimension (N) */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (max(3*N,M,P)) */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value. */ + + +/* Further Details */ +/* =============== */ + +/* The subroutine uses LAPACK subroutine DGEQPF for the QR factorization */ +/* with column pivoting to detect the effective numerical rank of the */ +/* a matrix. It may be replaced by a better rank determination strategy. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + u_dim1 = *ldu; + u_offset = 1 + u_dim1; + u -= u_offset; + v_dim1 = *ldv; + v_offset = 1 + v_dim1; + v -= v_offset; + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + --iwork; + --tau; + --work; + + /* Function Body */ + wantu = _starpu_lsame_(jobu, "U"); + wantv = _starpu_lsame_(jobv, "V"); + wantq = _starpu_lsame_(jobq, "Q"); + forwrd = TRUE_; + + *info = 0; + if (! (wantu || _starpu_lsame_(jobu, "N"))) { + *info = -1; + } else if (! (wantv || _starpu_lsame_(jobv, "N"))) { + *info = -2; + } else if (! (wantq || _starpu_lsame_(jobq, "N"))) { + *info = -3; + } else if (*m < 0) { + *info = -4; + } else if (*p < 0) { + *info = -5; + } else if (*n < 0) { + *info = -6; + } else if (*lda < max(1,*m)) { + *info = -8; + } else if (*ldb < max(1,*p)) { + *info = -10; + } else if (*ldu < 1 || wantu && *ldu < *m) { + *info = -16; + } else if (*ldv < 1 || wantv && *ldv < *p) { + *info = -18; + } else if (*ldq < 1 || wantq && *ldq < *n) { + *info = -20; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DGGSVP", &i__1); + return 0; + } + +/* QR with column pivoting of B: B*P = V*( S11 S12 ) */ +/* ( 0 0 ) */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + iwork[i__] = 0; +/* L10: */ + } + _starpu_dgeqpf_(p, n, &b[b_offset], ldb, &iwork[1], &tau[1], &work[1], info); + +/* Update A := A*P */ + + _starpu_dlapmt_(&forwrd, m, n, &a[a_offset], lda, &iwork[1]); + +/* Determine the effective rank of matrix B. */ + + *l = 0; + i__1 = min(*p,*n); + for (i__ = 1; i__ <= i__1; ++i__) { + if ((d__1 = b[i__ + i__ * b_dim1], abs(d__1)) > *tolb) { + ++(*l); + } +/* L20: */ + } + + if (wantv) { + +/* Copy the details of V, and form V. */ + + _starpu_dlaset_("Full", p, p, &c_b12, &c_b12, &v[v_offset], ldv); + if (*p > 1) { + i__1 = *p - 1; + _starpu_dlacpy_("Lower", &i__1, n, &b[b_dim1 + 2], ldb, &v[v_dim1 + 2], + ldv); + } + i__1 = min(*p,*n); + _starpu_dorg2r_(p, p, &i__1, &v[v_offset], ldv, &tau[1], &work[1], info); + } + +/* Clean up B */ + + i__1 = *l - 1; + for (j = 1; j <= i__1; ++j) { + i__2 = *l; + for (i__ = j + 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = 0.; +/* L30: */ + } +/* L40: */ + } + if (*p > *l) { + i__1 = *p - *l; + _starpu_dlaset_("Full", &i__1, n, &c_b12, &c_b12, &b[*l + 1 + b_dim1], ldb); + } + + if (wantq) { + +/* Set Q = I and Update Q := Q*P */ + + _starpu_dlaset_("Full", n, n, &c_b12, &c_b22, &q[q_offset], ldq); + _starpu_dlapmt_(&forwrd, n, n, &q[q_offset], ldq, &iwork[1]); + } + + if (*p >= *l && *n != *l) { + +/* RQ factorization of (S11 S12): ( S11 S12 ) = ( 0 S12 )*Z */ + + _starpu_dgerq2_(l, n, &b[b_offset], ldb, &tau[1], &work[1], info); + +/* Update A := A*Z' */ + + _starpu_dormr2_("Right", "Transpose", m, n, l, &b[b_offset], ldb, &tau[1], &a[ + a_offset], lda, &work[1], info); + + if (wantq) { + +/* Update Q := Q*Z' */ + + _starpu_dormr2_("Right", "Transpose", n, n, l, &b[b_offset], ldb, &tau[1], + &q[q_offset], ldq, &work[1], info); + } + +/* Clean up B */ + + i__1 = *n - *l; + _starpu_dlaset_("Full", l, &i__1, &c_b12, &c_b12, &b[b_offset], ldb); + i__1 = *n; + for (j = *n - *l + 1; j <= i__1; ++j) { + i__2 = *l; + for (i__ = j - *n + *l + 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = 0.; +/* L50: */ + } +/* L60: */ + } + + } + +/* Let N-L L */ +/* A = ( A11 A12 ) M, */ + +/* then the following does the complete QR decomposition of A11: */ + +/* A11 = U*( 0 T12 )*P1' */ +/* ( 0 0 ) */ + + i__1 = *n - *l; + for (i__ = 1; i__ <= i__1; ++i__) { + iwork[i__] = 0; +/* L70: */ + } + i__1 = *n - *l; + _starpu_dgeqpf_(m, &i__1, &a[a_offset], lda, &iwork[1], &tau[1], &work[1], info); + +/* Determine the effective rank of A11 */ + + *k = 0; +/* Computing MIN */ + i__2 = *m, i__3 = *n - *l; + i__1 = min(i__2,i__3); + for (i__ = 1; i__ <= i__1; ++i__) { + if ((d__1 = a[i__ + i__ * a_dim1], abs(d__1)) > *tola) { + ++(*k); + } +/* L80: */ + } + +/* Update A12 := U'*A12, where A12 = A( 1:M, N-L+1:N ) */ + +/* Computing MIN */ + i__2 = *m, i__3 = *n - *l; + i__1 = min(i__2,i__3); + _starpu_dorm2r_("Left", "Transpose", m, l, &i__1, &a[a_offset], lda, &tau[1], &a[( + *n - *l + 1) * a_dim1 + 1], lda, &work[1], info); + + if (wantu) { + +/* Copy the details of U, and form U */ + + _starpu_dlaset_("Full", m, m, &c_b12, &c_b12, &u[u_offset], ldu); + if (*m > 1) { + i__1 = *m - 1; + i__2 = *n - *l; + _starpu_dlacpy_("Lower", &i__1, &i__2, &a[a_dim1 + 2], lda, &u[u_dim1 + 2] +, ldu); + } +/* Computing MIN */ + i__2 = *m, i__3 = *n - *l; + i__1 = min(i__2,i__3); + _starpu_dorg2r_(m, m, &i__1, &u[u_offset], ldu, &tau[1], &work[1], info); + } + + if (wantq) { + +/* Update Q( 1:N, 1:N-L ) = Q( 1:N, 1:N-L )*P1 */ + + i__1 = *n - *l; + _starpu_dlapmt_(&forwrd, n, &i__1, &q[q_offset], ldq, &iwork[1]); + } + +/* Clean up A: set the strictly lower triangular part of */ +/* A(1:K, 1:K) = 0, and A( K+1:M, 1:N-L ) = 0. */ + + i__1 = *k - 1; + for (j = 1; j <= i__1; ++j) { + i__2 = *k; + for (i__ = j + 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = 0.; +/* L90: */ + } +/* L100: */ + } + if (*m > *k) { + i__1 = *m - *k; + i__2 = *n - *l; + _starpu_dlaset_("Full", &i__1, &i__2, &c_b12, &c_b12, &a[*k + 1 + a_dim1], + lda); + } + + if (*n - *l > *k) { + +/* RQ factorization of ( T11 T12 ) = ( 0 T12 )*Z1 */ + + i__1 = *n - *l; + _starpu_dgerq2_(k, &i__1, &a[a_offset], lda, &tau[1], &work[1], info); + + if (wantq) { + +/* Update Q( 1:N,1:N-L ) = Q( 1:N,1:N-L )*Z1' */ + + i__1 = *n - *l; + _starpu_dormr2_("Right", "Transpose", n, &i__1, k, &a[a_offset], lda, & + tau[1], &q[q_offset], ldq, &work[1], info); + } + +/* Clean up A */ + + i__1 = *n - *l - *k; + _starpu_dlaset_("Full", k, &i__1, &c_b12, &c_b12, &a[a_offset], lda); + i__1 = *n - *l; + for (j = *n - *l - *k + 1; j <= i__1; ++j) { + i__2 = *k; + for (i__ = j - *n + *l + *k + 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = 0.; +/* L110: */ + } +/* L120: */ + } + + } + + if (*m > *k) { + +/* QR factorization of A( K+1:M,N-L+1:N ) */ + + i__1 = *m - *k; + _starpu_dgeqr2_(&i__1, l, &a[*k + 1 + (*n - *l + 1) * a_dim1], lda, &tau[1], & + work[1], info); + + if (wantu) { + +/* Update U(:,K+1:M) := U(:,K+1:M)*U1 */ + + i__1 = *m - *k; +/* Computing MIN */ + i__3 = *m - *k; + i__2 = min(i__3,*l); + _starpu_dorm2r_("Right", "No transpose", m, &i__1, &i__2, &a[*k + 1 + (*n + - *l + 1) * a_dim1], lda, &tau[1], &u[(*k + 1) * u_dim1 + + 1], ldu, &work[1], info); + } + +/* Clean up */ + + i__1 = *n; + for (j = *n - *l + 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = j - *n + *k + *l + 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = 0.; +/* L130: */ + } +/* L140: */ + } + + } + + return 0; + +/* End of DGGSVP */ + +} /* _starpu_dggsvp_ */ diff --git a/min-dgels/base/SRC/dgsvj0.c b/min-dgels/base/SRC/dgsvj0.c new file mode 100644 index 0000000..e4f20c3 --- /dev/null +++ b/min-dgels/base/SRC/dgsvj0.c @@ -0,0 +1,1159 @@ +/* dgsvj0.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c__0 = 0; +static doublereal c_b42 = 1.; + +/* Subroutine */ int _starpu_dgsvj0_(char *jobv, integer *m, integer *n, doublereal * + a, integer *lda, doublereal *d__, doublereal *sva, integer *mv, + doublereal *v, integer *ldv, doublereal *eps, doublereal *sfmin, + doublereal *tol, integer *nsweep, doublereal *work, integer *lwork, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, v_dim1, v_offset, i__1, i__2, i__3, i__4, i__5, + i__6; + doublereal d__1, d__2; + + /* Builtin functions */ + double sqrt(doublereal), d_sign(doublereal *, doublereal *); + + /* Local variables */ + doublereal bigtheta; + integer pskipped, i__, p, q; + doublereal t, rootsfmin, cs, sn; + integer ir1, jbc; + doublereal big; + integer kbl, igl, ibr, jgl, nbl, mvl; + doublereal aapp, aapq, aaqq; + extern doublereal _starpu_ddot_(integer *, doublereal *, integer *, doublereal *, + integer *); + integer ierr; + doublereal aapp0; + extern doublereal _starpu_dnrm2_(integer *, doublereal *, integer *); + doublereal temp1, apoaq, aqoap; + extern logical _starpu_lsame_(char *, char *); + doublereal theta, small; + extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *); + doublereal fastr[5]; + extern /* Subroutine */ int _starpu_dswap_(integer *, doublereal *, integer *, + doublereal *, integer *); + logical applv, rsvec; + extern /* Subroutine */ int _starpu_daxpy_(integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *), _starpu_drotm_(integer *, doublereal + *, integer *, doublereal *, integer *, doublereal *); + logical rotok; + extern /* Subroutine */ int _starpu_dlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *); + extern integer _starpu_idamax_(integer *, doublereal *, integer *); + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + integer ijblsk, swband, blskip; + doublereal mxaapq; + extern /* Subroutine */ int _starpu_dlassq_(integer *, doublereal *, integer *, + doublereal *, doublereal *); + doublereal thsign, mxsinj; + integer emptsw, notrot, iswrot, lkahead; + doublereal rootbig, rooteps; + integer rowskip; + doublereal roottol; + + +/* -- LAPACK routine (version 3.2) -- */ + +/* -- Contributed by Zlatko Drmac of the University of Zagreb and -- */ +/* -- Kresimir Veselic of the Fernuniversitaet Hagen -- */ +/* -- November 2008 -- */ + +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* This routine is also part of SIGMA (version 1.23, October 23. 2008.) */ +/* SIGMA is a library of algorithms for highly accurate algorithms for */ +/* computation of SVD, PSVD, QSVD, (H,K)-SVD, and for solution of the */ +/* eigenvalue problems Hx = lambda M x, H M x = lambda x with H, M > 0. */ + +/* Scalar Arguments */ + + +/* Array Arguments */ + +/* .. */ + +/* Purpose */ +/* ~~~~~~~ */ +/* DGSVJ0 is called from DGESVJ as a pre-processor and that is its main */ +/* purpose. It applies Jacobi rotations in the same way as DGESVJ does, but */ +/* it does not check convergence (stopping criterion). Few tuning */ +/* parameters (marked by [TP]) are available for the implementer. */ + +/* Further details */ +/* ~~~~~~~~~~~~~~~ */ +/* DGSVJ0 is used just to enable SGESVJ to call a simplified version of */ +/* itself to work on a submatrix of the original matrix. */ + +/* Contributors */ +/* ~~~~~~~~~~~~ */ +/* Zlatko Drmac (Zagreb, Croatia) and Kresimir Veselic (Hagen, Germany) */ + +/* Bugs, Examples and Comments */ +/* ~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ +/* Please report all bugs and send interesting test examples and comments to */ +/* drmac@math.hr. Thank you. */ + +/* Arguments */ +/* ~~~~~~~~~ */ + +/* JOBV (input) CHARACTER*1 */ +/* Specifies whether the output from this procedure is used */ +/* to compute the matrix V: */ +/* = 'V': the product of the Jacobi rotations is accumulated */ +/* by postmulyiplying the N-by-N array V. */ +/* (See the description of V.) */ +/* = 'A': the product of the Jacobi rotations is accumulated */ +/* by postmulyiplying the MV-by-N array V. */ +/* (See the descriptions of MV and V.) */ +/* = 'N': the Jacobi rotations are not accumulated. */ + +/* M (input) INTEGER */ +/* The number of rows of the input matrix A. M >= 0. */ + +/* N (input) INTEGER */ +/* The number of columns of the input matrix A. */ +/* M >= N >= 0. */ + +/* A (input/output) REAL array, dimension (LDA,N) */ +/* On entry, M-by-N matrix A, such that A*diag(D) represents */ +/* the input matrix. */ +/* On exit, */ +/* A_onexit * D_onexit represents the input matrix A*diag(D) */ +/* post-multiplied by a sequence of Jacobi rotations, where the */ +/* rotation threshold and the total number of sweeps are given in */ +/* TOL and NSWEEP, respectively. */ +/* (See the descriptions of D, TOL and NSWEEP.) */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,M). */ + +/* D (input/workspace/output) REAL array, dimension (N) */ +/* The array D accumulates the scaling factors from the fast scaled */ +/* Jacobi rotations. */ +/* On entry, A*diag(D) represents the input matrix. */ +/* On exit, A_onexit*diag(D_onexit) represents the input matrix */ +/* post-multiplied by a sequence of Jacobi rotations, where the */ +/* rotation threshold and the total number of sweeps are given in */ +/* TOL and NSWEEP, respectively. */ +/* (See the descriptions of A, TOL and NSWEEP.) */ + +/* SVA (input/workspace/output) REAL array, dimension (N) */ +/* On entry, SVA contains the Euclidean norms of the columns of */ +/* the matrix A*diag(D). */ +/* On exit, SVA contains the Euclidean norms of the columns of */ +/* the matrix onexit*diag(D_onexit). */ + +/* MV (input) INTEGER */ +/* If JOBV .EQ. 'A', then MV rows of V are post-multipled by a */ +/* sequence of Jacobi rotations. */ +/* If JOBV = 'N', then MV is not referenced. */ + +/* V (input/output) REAL array, dimension (LDV,N) */ +/* If JOBV .EQ. 'V' then N rows of V are post-multipled by a */ +/* sequence of Jacobi rotations. */ +/* If JOBV .EQ. 'A' then MV rows of V are post-multipled by a */ +/* sequence of Jacobi rotations. */ +/* If JOBV = 'N', then V is not referenced. */ + +/* LDV (input) INTEGER */ +/* The leading dimension of the array V, LDV >= 1. */ +/* If JOBV = 'V', LDV .GE. N. */ +/* If JOBV = 'A', LDV .GE. MV. */ + +/* EPS (input) INTEGER */ +/* EPS = SLAMCH('Epsilon') */ + +/* SFMIN (input) INTEGER */ +/* SFMIN = SLAMCH('Safe Minimum') */ + +/* TOL (input) REAL */ +/* TOL is the threshold for Jacobi rotations. For a pair */ +/* A(:,p), A(:,q) of pivot columns, the Jacobi rotation is */ +/* applied only if DABS(COS(angle(A(:,p),A(:,q)))) .GT. TOL. */ + +/* NSWEEP (input) INTEGER */ +/* NSWEEP is the number of sweeps of Jacobi rotations to be */ +/* performed. */ + +/* WORK (workspace) REAL array, dimension LWORK. */ + +/* LWORK (input) INTEGER */ +/* LWORK is the dimension of WORK. LWORK .GE. M. */ + +/* INFO (output) INTEGER */ +/* = 0 : successful exit. */ +/* < 0 : if INFO = -i, then the i-th argument had an illegal value */ + +/* Local Parameters */ +/* Local Scalars */ +/* Local Arrays */ + + +/* Intrinsic Functions */ + + +/* External Functions */ + + +/* External Subroutines */ + + +/* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~| */ + + /* Parameter adjustments */ + --sva; + --d__; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + v_dim1 = *ldv; + v_offset = 1 + v_dim1; + v -= v_offset; + --work; + + /* Function Body */ + applv = _starpu_lsame_(jobv, "A"); + rsvec = _starpu_lsame_(jobv, "V"); + if (! (rsvec || applv || _starpu_lsame_(jobv, "N"))) { + *info = -1; + } else if (*m < 0) { + *info = -2; + } else if (*n < 0 || *n > *m) { + *info = -3; + } else if (*lda < *m) { + *info = -5; + } else if (*mv < 0) { + *info = -8; + } else if (*ldv < *m) { + *info = -10; + } else if (*tol <= *eps) { + *info = -13; + } else if (*nsweep < 0) { + *info = -14; + } else if (*lwork < *m) { + *info = -16; + } else { + *info = 0; + } + +/* #:( */ + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DGSVJ0", &i__1); + return 0; + } + + if (rsvec) { + mvl = *n; + } else if (applv) { + mvl = *mv; + } + rsvec = rsvec || applv; + rooteps = sqrt(*eps); + rootsfmin = sqrt(*sfmin); + small = *sfmin / *eps; + big = 1. / *sfmin; + rootbig = 1. / rootsfmin; + bigtheta = 1. / rooteps; + roottol = sqrt(*tol); + + +/* -#- Row-cyclic Jacobi SVD algorithm with column pivoting -#- */ + + emptsw = *n * (*n - 1) / 2; + notrot = 0; + fastr[0] = 0.; + +/* -#- Row-cyclic pivot strategy with de Rijk's pivoting -#- */ + + swband = 0; +/* [TP] SWBAND is a tuning parameter. It is meaningful and effective */ +/* if SGESVJ is used as a computational routine in the preconditioned */ +/* Jacobi SVD algorithm SGESVJ. For sweeps i=1:SWBAND the procedure */ +/* ...... */ + kbl = min(8,*n); +/* [TP] KBL is a tuning parameter that defines the tile size in the */ +/* tiling of the p-q loops of pivot pairs. In general, an optimal */ +/* value of KBL depends on the matrix dimensions and on the */ +/* parameters of the computer's memory. */ + + nbl = *n / kbl; + if (nbl * kbl != *n) { + ++nbl; + } +/* Computing 2nd power */ + i__1 = kbl; + blskip = i__1 * i__1 + 1; +/* [TP] BLKSKIP is a tuning parameter that depends on SWBAND and KBL. */ + rowskip = min(5,kbl); +/* [TP] ROWSKIP is a tuning parameter. */ + lkahead = 1; +/* [TP] LKAHEAD is a tuning parameter. */ + swband = 0; + pskipped = 0; + + i__1 = *nsweep; + for (i__ = 1; i__ <= i__1; ++i__) { +/* .. go go go ... */ + + mxaapq = 0.; + mxsinj = 0.; + iswrot = 0; + + notrot = 0; + pskipped = 0; + + i__2 = nbl; + for (ibr = 1; ibr <= i__2; ++ibr) { + igl = (ibr - 1) * kbl + 1; + +/* Computing MIN */ + i__4 = lkahead, i__5 = nbl - ibr; + i__3 = min(i__4,i__5); + for (ir1 = 0; ir1 <= i__3; ++ir1) { + + igl += ir1 * kbl; + +/* Computing MIN */ + i__5 = igl + kbl - 1, i__6 = *n - 1; + i__4 = min(i__5,i__6); + for (p = igl; p <= i__4; ++p) { +/* .. de Rijk's pivoting */ + i__5 = *n - p + 1; + q = _starpu_idamax_(&i__5, &sva[p], &c__1) + p - 1; + if (p != q) { + _starpu_dswap_(m, &a[p * a_dim1 + 1], &c__1, &a[q * a_dim1 + + 1], &c__1); + if (rsvec) { + _starpu_dswap_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[q * + v_dim1 + 1], &c__1); + } + temp1 = sva[p]; + sva[p] = sva[q]; + sva[q] = temp1; + temp1 = d__[p]; + d__[p] = d__[q]; + d__[q] = temp1; + } + + if (ir1 == 0) { + +/* Column norms are periodically updated by explicit */ +/* norm computation. */ +/* Caveat: */ +/* Some BLAS implementations compute DNRM2(M,A(1,p),1) */ +/* as DSQRT(DDOT(M,A(1,p),1,A(1,p),1)), which may result in */ +/* overflow for ||A(:,p)||_2 > DSQRT(overflow_threshold), and */ +/* undeflow for ||A(:,p)||_2 < DSQRT(underflow_threshold). */ +/* Hence, DNRM2 cannot be trusted, not even in the case when */ +/* the true norm is far from the under(over)flow boundaries. */ +/* If properly implemented DNRM2 is available, the IF-THEN-ELSE */ +/* below should read "AAPP = DNRM2( M, A(1,p), 1 ) * D(p)". */ + + if (sva[p] < rootbig && sva[p] > rootsfmin) { + sva[p] = _starpu_dnrm2_(m, &a[p * a_dim1 + 1], &c__1) * + d__[p]; + } else { + temp1 = 0.; + aapp = 0.; + _starpu_dlassq_(m, &a[p * a_dim1 + 1], &c__1, &temp1, & + aapp); + sva[p] = temp1 * sqrt(aapp) * d__[p]; + } + aapp = sva[p]; + } else { + aapp = sva[p]; + } + + if (aapp > 0.) { + + pskipped = 0; + +/* Computing MIN */ + i__6 = igl + kbl - 1; + i__5 = min(i__6,*n); + for (q = p + 1; q <= i__5; ++q) { + + aaqq = sva[q]; + if (aaqq > 0.) { + + aapp0 = aapp; + if (aaqq >= 1.) { + rotok = small * aapp <= aaqq; + if (aapp < big / aaqq) { + aapq = _starpu_ddot_(m, &a[p * a_dim1 + 1], & + c__1, &a[q * a_dim1 + 1], & + c__1) * d__[p] * d__[q] / + aaqq / aapp; + } else { + _starpu_dcopy_(m, &a[p * a_dim1 + 1], &c__1, & + work[1], &c__1); + _starpu_dlascl_("G", &c__0, &c__0, &aapp, & + d__[p], m, &c__1, &work[1], + lda, &ierr); + aapq = _starpu_ddot_(m, &work[1], &c__1, &a[q + * a_dim1 + 1], &c__1) * d__[q] + / aaqq; + } + } else { + rotok = aapp <= aaqq / small; + if (aapp > small / aaqq) { + aapq = _starpu_ddot_(m, &a[p * a_dim1 + 1], & + c__1, &a[q * a_dim1 + 1], & + c__1) * d__[p] * d__[q] / + aaqq / aapp; + } else { + _starpu_dcopy_(m, &a[q * a_dim1 + 1], &c__1, & + work[1], &c__1); + _starpu_dlascl_("G", &c__0, &c__0, &aaqq, & + d__[q], m, &c__1, &work[1], + lda, &ierr); + aapq = _starpu_ddot_(m, &work[1], &c__1, &a[p + * a_dim1 + 1], &c__1) * d__[p] + / aapp; + } + } + +/* Computing MAX */ + d__1 = mxaapq, d__2 = abs(aapq); + mxaapq = max(d__1,d__2); + +/* TO rotate or NOT to rotate, THAT is the question ... */ + + if (abs(aapq) > *tol) { + +/* .. rotate */ +/* ROTATED = ROTATED + ONE */ + + if (ir1 == 0) { + notrot = 0; + pskipped = 0; + ++iswrot; + } + + if (rotok) { + + aqoap = aaqq / aapp; + apoaq = aapp / aaqq; + theta = (d__1 = aqoap - apoaq, abs( + d__1)) * -.5 / aapq; + + if (abs(theta) > bigtheta) { + + t = .5 / theta; + fastr[2] = t * d__[p] / d__[q]; + fastr[3] = -t * d__[q] / d__[p]; + _starpu_drotm_(m, &a[p * a_dim1 + 1], & + c__1, &a[q * a_dim1 + 1], + &c__1, fastr); + if (rsvec) { + _starpu_drotm_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[q * + v_dim1 + 1], &c__1, fastr); + } +/* Computing MAX */ + d__1 = 0., d__2 = t * apoaq * + aapq + 1.; + sva[q] = aaqq * sqrt((max(d__1, + d__2))); + aapp *= sqrt(1. - t * aqoap * + aapq); +/* Computing MAX */ + d__1 = mxsinj, d__2 = abs(t); + mxsinj = max(d__1,d__2); + + } else { + +/* .. choose correct signum for THETA and rotate */ + + thsign = -d_sign(&c_b42, &aapq); + t = 1. / (theta + thsign * sqrt( + theta * theta + 1.)); + cs = sqrt(1. / (t * t + 1.)); + sn = t * cs; + +/* Computing MAX */ + d__1 = mxsinj, d__2 = abs(sn); + mxsinj = max(d__1,d__2); +/* Computing MAX */ + d__1 = 0., d__2 = t * apoaq * + aapq + 1.; + sva[q] = aaqq * sqrt((max(d__1, + d__2))); +/* Computing MAX */ + d__1 = 0., d__2 = 1. - t * aqoap * + aapq; + aapp *= sqrt((max(d__1,d__2))); + + apoaq = d__[p] / d__[q]; + aqoap = d__[q] / d__[p]; + if (d__[p] >= 1.) { + if (d__[q] >= 1.) { + fastr[2] = t * apoaq; + fastr[3] = -t * aqoap; + d__[p] *= cs; + d__[q] *= cs; + _starpu_drotm_(m, &a[p * a_dim1 + 1], &c__1, &a[q * + a_dim1 + 1], &c__1, fastr); + if (rsvec) { + _starpu_drotm_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[ + q * v_dim1 + 1], &c__1, fastr); + } + } else { + d__1 = -t * aqoap; + _starpu_daxpy_(m, &d__1, &a[q * a_dim1 + 1], &c__1, &a[ + p * a_dim1 + 1], &c__1); + d__1 = cs * sn * apoaq; + _starpu_daxpy_(m, &d__1, &a[p * a_dim1 + 1], &c__1, &a[ + q * a_dim1 + 1], &c__1); + d__[p] *= cs; + d__[q] /= cs; + if (rsvec) { + d__1 = -t * aqoap; + _starpu_daxpy_(&mvl, &d__1, &v[q * v_dim1 + 1], & + c__1, &v[p * v_dim1 + 1], &c__1); + d__1 = cs * sn * apoaq; + _starpu_daxpy_(&mvl, &d__1, &v[p * v_dim1 + 1], & + c__1, &v[q * v_dim1 + 1], &c__1); + } + } + } else { + if (d__[q] >= 1.) { + d__1 = t * apoaq; + _starpu_daxpy_(m, &d__1, &a[p * a_dim1 + 1], &c__1, &a[ + q * a_dim1 + 1], &c__1); + d__1 = -cs * sn * aqoap; + _starpu_daxpy_(m, &d__1, &a[q * a_dim1 + 1], &c__1, &a[ + p * a_dim1 + 1], &c__1); + d__[p] /= cs; + d__[q] *= cs; + if (rsvec) { + d__1 = t * apoaq; + _starpu_daxpy_(&mvl, &d__1, &v[p * v_dim1 + 1], & + c__1, &v[q * v_dim1 + 1], &c__1); + d__1 = -cs * sn * aqoap; + _starpu_daxpy_(&mvl, &d__1, &v[q * v_dim1 + 1], & + c__1, &v[p * v_dim1 + 1], &c__1); + } + } else { + if (d__[p] >= d__[q]) { + d__1 = -t * aqoap; + _starpu_daxpy_(m, &d__1, &a[q * a_dim1 + 1], &c__1, + &a[p * a_dim1 + 1], &c__1); + d__1 = cs * sn * apoaq; + _starpu_daxpy_(m, &d__1, &a[p * a_dim1 + 1], &c__1, + &a[q * a_dim1 + 1], &c__1); + d__[p] *= cs; + d__[q] /= cs; + if (rsvec) { + d__1 = -t * aqoap; + _starpu_daxpy_(&mvl, &d__1, &v[q * v_dim1 + 1], + &c__1, &v[p * v_dim1 + 1], & + c__1); + d__1 = cs * sn * apoaq; + _starpu_daxpy_(&mvl, &d__1, &v[p * v_dim1 + 1], + &c__1, &v[q * v_dim1 + 1], & + c__1); + } + } else { + d__1 = t * apoaq; + _starpu_daxpy_(m, &d__1, &a[p * a_dim1 + 1], &c__1, + &a[q * a_dim1 + 1], &c__1); + d__1 = -cs * sn * aqoap; + _starpu_daxpy_(m, &d__1, &a[q * a_dim1 + 1], &c__1, + &a[p * a_dim1 + 1], &c__1); + d__[p] /= cs; + d__[q] *= cs; + if (rsvec) { + d__1 = t * apoaq; + _starpu_daxpy_(&mvl, &d__1, &v[p * v_dim1 + 1], + &c__1, &v[q * v_dim1 + 1], & + c__1); + d__1 = -cs * sn * aqoap; + _starpu_daxpy_(&mvl, &d__1, &v[q * v_dim1 + 1], + &c__1, &v[p * v_dim1 + 1], & + c__1); + } + } + } + } + } + + } else { +/* .. have to use modified Gram-Schmidt like transformation */ + _starpu_dcopy_(m, &a[p * a_dim1 + 1], &c__1, & + work[1], &c__1); + _starpu_dlascl_("G", &c__0, &c__0, &aapp, & + c_b42, m, &c__1, &work[1], + lda, &ierr); + _starpu_dlascl_("G", &c__0, &c__0, &aaqq, & + c_b42, m, &c__1, &a[q * + a_dim1 + 1], lda, &ierr); + temp1 = -aapq * d__[p] / d__[q]; + _starpu_daxpy_(m, &temp1, &work[1], &c__1, &a[ + q * a_dim1 + 1], &c__1); + _starpu_dlascl_("G", &c__0, &c__0, &c_b42, & + aaqq, m, &c__1, &a[q * a_dim1 + + 1], lda, &ierr); +/* Computing MAX */ + d__1 = 0., d__2 = 1. - aapq * aapq; + sva[q] = aaqq * sqrt((max(d__1,d__2))) + ; + mxsinj = max(mxsinj,*sfmin); + } +/* END IF ROTOK THEN ... ELSE */ + +/* In the case of cancellation in updating SVA(q), SVA(p) */ +/* recompute SVA(q), SVA(p). */ +/* Computing 2nd power */ + d__1 = sva[q] / aaqq; + if (d__1 * d__1 <= rooteps) { + if (aaqq < rootbig && aaqq > + rootsfmin) { + sva[q] = _starpu_dnrm2_(m, &a[q * a_dim1 + + 1], &c__1) * d__[q]; + } else { + t = 0.; + aaqq = 0.; + _starpu_dlassq_(m, &a[q * a_dim1 + 1], & + c__1, &t, &aaqq); + sva[q] = t * sqrt(aaqq) * d__[q]; + } + } + if (aapp / aapp0 <= rooteps) { + if (aapp < rootbig && aapp > + rootsfmin) { + aapp = _starpu_dnrm2_(m, &a[p * a_dim1 + + 1], &c__1) * d__[p]; + } else { + t = 0.; + aapp = 0.; + _starpu_dlassq_(m, &a[p * a_dim1 + 1], & + c__1, &t, &aapp); + aapp = t * sqrt(aapp) * d__[p]; + } + sva[p] = aapp; + } + + } else { +/* A(:,p) and A(:,q) already numerically orthogonal */ + if (ir1 == 0) { + ++notrot; + } + ++pskipped; + } + } else { +/* A(:,q) is zero column */ + if (ir1 == 0) { + ++notrot; + } + ++pskipped; + } + + if (i__ <= swband && pskipped > rowskip) { + if (ir1 == 0) { + aapp = -aapp; + } + notrot = 0; + goto L2103; + } + +/* L2002: */ + } +/* END q-LOOP */ + +L2103: +/* bailed out of q-loop */ + sva[p] = aapp; + } else { + sva[p] = aapp; + if (ir1 == 0 && aapp == 0.) { +/* Computing MIN */ + i__5 = igl + kbl - 1; + notrot = notrot + min(i__5,*n) - p; + } + } + +/* L2001: */ + } +/* end of the p-loop */ +/* end of doing the block ( ibr, ibr ) */ +/* L1002: */ + } +/* end of ir1-loop */ + +/* ........................................................ */ +/* ... go to the off diagonal blocks */ + + igl = (ibr - 1) * kbl + 1; + + i__3 = nbl; + for (jbc = ibr + 1; jbc <= i__3; ++jbc) { + + jgl = (jbc - 1) * kbl + 1; + +/* doing the block at ( ibr, jbc ) */ + + ijblsk = 0; +/* Computing MIN */ + i__5 = igl + kbl - 1; + i__4 = min(i__5,*n); + for (p = igl; p <= i__4; ++p) { + + aapp = sva[p]; + + if (aapp > 0.) { + + pskipped = 0; + +/* Computing MIN */ + i__6 = jgl + kbl - 1; + i__5 = min(i__6,*n); + for (q = jgl; q <= i__5; ++q) { + + aaqq = sva[q]; + + if (aaqq > 0.) { + aapp0 = aapp; + +/* -#- M x 2 Jacobi SVD -#- */ + +/* -#- Safe Gram matrix computation -#- */ + + if (aaqq >= 1.) { + if (aapp >= aaqq) { + rotok = small * aapp <= aaqq; + } else { + rotok = small * aaqq <= aapp; + } + if (aapp < big / aaqq) { + aapq = _starpu_ddot_(m, &a[p * a_dim1 + 1], & + c__1, &a[q * a_dim1 + 1], & + c__1) * d__[p] * d__[q] / + aaqq / aapp; + } else { + _starpu_dcopy_(m, &a[p * a_dim1 + 1], &c__1, & + work[1], &c__1); + _starpu_dlascl_("G", &c__0, &c__0, &aapp, & + d__[p], m, &c__1, &work[1], + lda, &ierr); + aapq = _starpu_ddot_(m, &work[1], &c__1, &a[q + * a_dim1 + 1], &c__1) * d__[q] + / aaqq; + } + } else { + if (aapp >= aaqq) { + rotok = aapp <= aaqq / small; + } else { + rotok = aaqq <= aapp / small; + } + if (aapp > small / aaqq) { + aapq = _starpu_ddot_(m, &a[p * a_dim1 + 1], & + c__1, &a[q * a_dim1 + 1], & + c__1) * d__[p] * d__[q] / + aaqq / aapp; + } else { + _starpu_dcopy_(m, &a[q * a_dim1 + 1], &c__1, & + work[1], &c__1); + _starpu_dlascl_("G", &c__0, &c__0, &aaqq, & + d__[q], m, &c__1, &work[1], + lda, &ierr); + aapq = _starpu_ddot_(m, &work[1], &c__1, &a[p + * a_dim1 + 1], &c__1) * d__[p] + / aapp; + } + } + +/* Computing MAX */ + d__1 = mxaapq, d__2 = abs(aapq); + mxaapq = max(d__1,d__2); + +/* TO rotate or NOT to rotate, THAT is the question ... */ + + if (abs(aapq) > *tol) { + notrot = 0; +/* ROTATED = ROTATED + 1 */ + pskipped = 0; + ++iswrot; + + if (rotok) { + + aqoap = aaqq / aapp; + apoaq = aapp / aaqq; + theta = (d__1 = aqoap - apoaq, abs( + d__1)) * -.5 / aapq; + if (aaqq > aapp0) { + theta = -theta; + } + + if (abs(theta) > bigtheta) { + t = .5 / theta; + fastr[2] = t * d__[p] / d__[q]; + fastr[3] = -t * d__[q] / d__[p]; + _starpu_drotm_(m, &a[p * a_dim1 + 1], & + c__1, &a[q * a_dim1 + 1], + &c__1, fastr); + if (rsvec) { + _starpu_drotm_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[q * + v_dim1 + 1], &c__1, fastr); + } +/* Computing MAX */ + d__1 = 0., d__2 = t * apoaq * + aapq + 1.; + sva[q] = aaqq * sqrt((max(d__1, + d__2))); +/* Computing MAX */ + d__1 = 0., d__2 = 1. - t * aqoap * + aapq; + aapp *= sqrt((max(d__1,d__2))); +/* Computing MAX */ + d__1 = mxsinj, d__2 = abs(t); + mxsinj = max(d__1,d__2); + } else { + +/* .. choose correct signum for THETA and rotate */ + + thsign = -d_sign(&c_b42, &aapq); + if (aaqq > aapp0) { + thsign = -thsign; + } + t = 1. / (theta + thsign * sqrt( + theta * theta + 1.)); + cs = sqrt(1. / (t * t + 1.)); + sn = t * cs; +/* Computing MAX */ + d__1 = mxsinj, d__2 = abs(sn); + mxsinj = max(d__1,d__2); +/* Computing MAX */ + d__1 = 0., d__2 = t * apoaq * + aapq + 1.; + sva[q] = aaqq * sqrt((max(d__1, + d__2))); + aapp *= sqrt(1. - t * aqoap * + aapq); + + apoaq = d__[p] / d__[q]; + aqoap = d__[q] / d__[p]; + if (d__[p] >= 1.) { + + if (d__[q] >= 1.) { + fastr[2] = t * apoaq; + fastr[3] = -t * aqoap; + d__[p] *= cs; + d__[q] *= cs; + _starpu_drotm_(m, &a[p * a_dim1 + 1], &c__1, &a[q * + a_dim1 + 1], &c__1, fastr); + if (rsvec) { + _starpu_drotm_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[ + q * v_dim1 + 1], &c__1, fastr); + } + } else { + d__1 = -t * aqoap; + _starpu_daxpy_(m, &d__1, &a[q * a_dim1 + 1], &c__1, &a[ + p * a_dim1 + 1], &c__1); + d__1 = cs * sn * apoaq; + _starpu_daxpy_(m, &d__1, &a[p * a_dim1 + 1], &c__1, &a[ + q * a_dim1 + 1], &c__1); + if (rsvec) { + d__1 = -t * aqoap; + _starpu_daxpy_(&mvl, &d__1, &v[q * v_dim1 + 1], & + c__1, &v[p * v_dim1 + 1], &c__1); + d__1 = cs * sn * apoaq; + _starpu_daxpy_(&mvl, &d__1, &v[p * v_dim1 + 1], & + c__1, &v[q * v_dim1 + 1], &c__1); + } + d__[p] *= cs; + d__[q] /= cs; + } + } else { + if (d__[q] >= 1.) { + d__1 = t * apoaq; + _starpu_daxpy_(m, &d__1, &a[p * a_dim1 + 1], &c__1, &a[ + q * a_dim1 + 1], &c__1); + d__1 = -cs * sn * aqoap; + _starpu_daxpy_(m, &d__1, &a[q * a_dim1 + 1], &c__1, &a[ + p * a_dim1 + 1], &c__1); + if (rsvec) { + d__1 = t * apoaq; + _starpu_daxpy_(&mvl, &d__1, &v[p * v_dim1 + 1], & + c__1, &v[q * v_dim1 + 1], &c__1); + d__1 = -cs * sn * aqoap; + _starpu_daxpy_(&mvl, &d__1, &v[q * v_dim1 + 1], & + c__1, &v[p * v_dim1 + 1], &c__1); + } + d__[p] /= cs; + d__[q] *= cs; + } else { + if (d__[p] >= d__[q]) { + d__1 = -t * aqoap; + _starpu_daxpy_(m, &d__1, &a[q * a_dim1 + 1], &c__1, + &a[p * a_dim1 + 1], &c__1); + d__1 = cs * sn * apoaq; + _starpu_daxpy_(m, &d__1, &a[p * a_dim1 + 1], &c__1, + &a[q * a_dim1 + 1], &c__1); + d__[p] *= cs; + d__[q] /= cs; + if (rsvec) { + d__1 = -t * aqoap; + _starpu_daxpy_(&mvl, &d__1, &v[q * v_dim1 + 1], + &c__1, &v[p * v_dim1 + 1], & + c__1); + d__1 = cs * sn * apoaq; + _starpu_daxpy_(&mvl, &d__1, &v[p * v_dim1 + 1], + &c__1, &v[q * v_dim1 + 1], & + c__1); + } + } else { + d__1 = t * apoaq; + _starpu_daxpy_(m, &d__1, &a[p * a_dim1 + 1], &c__1, + &a[q * a_dim1 + 1], &c__1); + d__1 = -cs * sn * aqoap; + _starpu_daxpy_(m, &d__1, &a[q * a_dim1 + 1], &c__1, + &a[p * a_dim1 + 1], &c__1); + d__[p] /= cs; + d__[q] *= cs; + if (rsvec) { + d__1 = t * apoaq; + _starpu_daxpy_(&mvl, &d__1, &v[p * v_dim1 + 1], + &c__1, &v[q * v_dim1 + 1], & + c__1); + d__1 = -cs * sn * aqoap; + _starpu_daxpy_(&mvl, &d__1, &v[q * v_dim1 + 1], + &c__1, &v[p * v_dim1 + 1], & + c__1); + } + } + } + } + } + + } else { + if (aapp > aaqq) { + _starpu_dcopy_(m, &a[p * a_dim1 + 1], & + c__1, &work[1], &c__1); + _starpu_dlascl_("G", &c__0, &c__0, &aapp, + &c_b42, m, &c__1, &work[1] +, lda, &ierr); + _starpu_dlascl_("G", &c__0, &c__0, &aaqq, + &c_b42, m, &c__1, &a[q * + a_dim1 + 1], lda, &ierr); + temp1 = -aapq * d__[p] / d__[q]; + _starpu_daxpy_(m, &temp1, &work[1], &c__1, + &a[q * a_dim1 + 1], & + c__1); + _starpu_dlascl_("G", &c__0, &c__0, &c_b42, + &aaqq, m, &c__1, &a[q * + a_dim1 + 1], lda, &ierr); +/* Computing MAX */ + d__1 = 0., d__2 = 1. - aapq * + aapq; + sva[q] = aaqq * sqrt((max(d__1, + d__2))); + mxsinj = max(mxsinj,*sfmin); + } else { + _starpu_dcopy_(m, &a[q * a_dim1 + 1], & + c__1, &work[1], &c__1); + _starpu_dlascl_("G", &c__0, &c__0, &aaqq, + &c_b42, m, &c__1, &work[1] +, lda, &ierr); + _starpu_dlascl_("G", &c__0, &c__0, &aapp, + &c_b42, m, &c__1, &a[p * + a_dim1 + 1], lda, &ierr); + temp1 = -aapq * d__[q] / d__[p]; + _starpu_daxpy_(m, &temp1, &work[1], &c__1, + &a[p * a_dim1 + 1], & + c__1); + _starpu_dlascl_("G", &c__0, &c__0, &c_b42, + &aapp, m, &c__1, &a[p * + a_dim1 + 1], lda, &ierr); +/* Computing MAX */ + d__1 = 0., d__2 = 1. - aapq * + aapq; + sva[p] = aapp * sqrt((max(d__1, + d__2))); + mxsinj = max(mxsinj,*sfmin); + } + } +/* END IF ROTOK THEN ... ELSE */ + +/* In the case of cancellation in updating SVA(q) */ +/* .. recompute SVA(q) */ +/* Computing 2nd power */ + d__1 = sva[q] / aaqq; + if (d__1 * d__1 <= rooteps) { + if (aaqq < rootbig && aaqq > + rootsfmin) { + sva[q] = _starpu_dnrm2_(m, &a[q * a_dim1 + + 1], &c__1) * d__[q]; + } else { + t = 0.; + aaqq = 0.; + _starpu_dlassq_(m, &a[q * a_dim1 + 1], & + c__1, &t, &aaqq); + sva[q] = t * sqrt(aaqq) * d__[q]; + } + } +/* Computing 2nd power */ + d__1 = aapp / aapp0; + if (d__1 * d__1 <= rooteps) { + if (aapp < rootbig && aapp > + rootsfmin) { + aapp = _starpu_dnrm2_(m, &a[p * a_dim1 + + 1], &c__1) * d__[p]; + } else { + t = 0.; + aapp = 0.; + _starpu_dlassq_(m, &a[p * a_dim1 + 1], & + c__1, &t, &aapp); + aapp = t * sqrt(aapp) * d__[p]; + } + sva[p] = aapp; + } +/* end of OK rotation */ + } else { + ++notrot; + ++pskipped; + ++ijblsk; + } + } else { + ++notrot; + ++pskipped; + ++ijblsk; + } + + if (i__ <= swband && ijblsk >= blskip) { + sva[p] = aapp; + notrot = 0; + goto L2011; + } + if (i__ <= swband && pskipped > rowskip) { + aapp = -aapp; + notrot = 0; + goto L2203; + } + +/* L2200: */ + } +/* end of the q-loop */ +L2203: + + sva[p] = aapp; + + } else { + if (aapp == 0.) { +/* Computing MIN */ + i__5 = jgl + kbl - 1; + notrot = notrot + min(i__5,*n) - jgl + 1; + } + if (aapp < 0.) { + notrot = 0; + } + } +/* L2100: */ + } +/* end of the p-loop */ +/* L2010: */ + } +/* end of the jbc-loop */ +L2011: +/* 2011 bailed out of the jbc-loop */ +/* Computing MIN */ + i__4 = igl + kbl - 1; + i__3 = min(i__4,*n); + for (p = igl; p <= i__3; ++p) { + sva[p] = (d__1 = sva[p], abs(d__1)); +/* L2012: */ + } + +/* L2000: */ + } +/* 2000 :: end of the ibr-loop */ + +/* .. update SVA(N) */ + if (sva[*n] < rootbig && sva[*n] > rootsfmin) { + sva[*n] = _starpu_dnrm2_(m, &a[*n * a_dim1 + 1], &c__1) * d__[*n]; + } else { + t = 0.; + aapp = 0.; + _starpu_dlassq_(m, &a[*n * a_dim1 + 1], &c__1, &t, &aapp); + sva[*n] = t * sqrt(aapp) * d__[*n]; + } + +/* Additional steering devices */ + + if (i__ < swband && (mxaapq <= roottol || iswrot <= *n)) { + swband = i__; + } + + if (i__ > swband + 1 && mxaapq < (doublereal) (*n) * *tol && ( + doublereal) (*n) * mxaapq * mxsinj < *tol) { + goto L1994; + } + + if (notrot >= emptsw) { + goto L1994; + } +/* L1993: */ + } +/* end i=1:NSWEEP loop */ +/* #:) Reaching this point means that the procedure has comleted the given */ +/* number of iterations. */ + *info = *nsweep - 1; + goto L1995; +L1994: +/* #:) Reaching this point means that during the i-th sweep all pivots were */ +/* below the given tolerance, causing early exit. */ + + *info = 0; +/* #:) INFO = 0 confirms successful iterations. */ +L1995: + +/* Sort the vector D. */ + i__1 = *n - 1; + for (p = 1; p <= i__1; ++p) { + i__2 = *n - p + 1; + q = _starpu_idamax_(&i__2, &sva[p], &c__1) + p - 1; + if (p != q) { + temp1 = sva[p]; + sva[p] = sva[q]; + sva[q] = temp1; + temp1 = d__[p]; + d__[p] = d__[q]; + d__[q] = temp1; + _starpu_dswap_(m, &a[p * a_dim1 + 1], &c__1, &a[q * a_dim1 + 1], &c__1); + if (rsvec) { + _starpu_dswap_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[q * v_dim1 + 1], & + c__1); + } + } +/* L5991: */ + } + + return 0; +/* .. */ +/* .. END OF DGSVJ0 */ +/* .. */ +} /* _starpu_dgsvj0_ */ diff --git a/min-dgels/base/SRC/dgsvj1.c b/min-dgels/base/SRC/dgsvj1.c new file mode 100644 index 0000000..7e5bf98 --- /dev/null +++ b/min-dgels/base/SRC/dgsvj1.c @@ -0,0 +1,798 @@ +/* dgsvj1.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c__0 = 0; +static doublereal c_b35 = 1.; + +/* Subroutine */ int _starpu_dgsvj1_(char *jobv, integer *m, integer *n, integer *n1, + doublereal *a, integer *lda, doublereal *d__, doublereal *sva, + integer *mv, doublereal *v, integer *ldv, doublereal *eps, doublereal + *sfmin, doublereal *tol, integer *nsweep, doublereal *work, integer * + lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, v_dim1, v_offset, i__1, i__2, i__3, i__4, i__5, + i__6; + doublereal d__1, d__2; + + /* Builtin functions */ + double sqrt(doublereal), d_sign(doublereal *, doublereal *); + + /* Local variables */ + doublereal bigtheta; + integer pskipped, i__, p, q; + doublereal t, rootsfmin, cs, sn; + integer jbc; + doublereal big; + integer kbl, igl, ibr, jgl, mvl, nblc; + doublereal aapp, aapq, aaqq; + extern doublereal _starpu_ddot_(integer *, doublereal *, integer *, doublereal *, + integer *); + integer nblr, ierr; + doublereal aapp0; + extern doublereal _starpu_dnrm2_(integer *, doublereal *, integer *); + doublereal temp1, large, apoaq, aqoap; + extern logical _starpu_lsame_(char *, char *); + doublereal theta, small; + extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *); + doublereal fastr[5]; + extern /* Subroutine */ int _starpu_dswap_(integer *, doublereal *, integer *, + doublereal *, integer *); + logical applv, rsvec; + extern /* Subroutine */ int _starpu_daxpy_(integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *), _starpu_drotm_(integer *, doublereal + *, integer *, doublereal *, integer *, doublereal *); + logical rotok; + extern /* Subroutine */ int _starpu_dlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *); + extern integer _starpu_idamax_(integer *, doublereal *, integer *); + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + integer ijblsk, swband, blskip; + doublereal mxaapq; + extern /* Subroutine */ int _starpu_dlassq_(integer *, doublereal *, integer *, + doublereal *, doublereal *); + doublereal thsign, mxsinj; + integer emptsw, notrot, iswrot; + doublereal rootbig, rooteps; + integer rowskip; + doublereal roottol; + + +/* -- LAPACK routine (version 3.2) -- */ + +/* -- Contributed by Zlatko Drmac of the University of Zagreb and -- */ +/* -- Kresimir Veselic of the Fernuniversitaet Hagen -- */ +/* -- November 2008 -- */ + +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* This routine is also part of SIGMA (version 1.23, October 23. 2008.) */ +/* SIGMA is a library of algorithms for highly accurate algorithms for */ +/* computation of SVD, PSVD, QSVD, (H,K)-SVD, and for solution of the */ +/* eigenvalue problems Hx = lambda M x, H M x = lambda x with H, M > 0. */ + +/* -#- Scalar Arguments -#- */ + + +/* -#- Array Arguments -#- */ + +/* .. */ + +/* Purpose */ +/* ~~~~~~~ */ +/* DGSVJ1 is called from SGESVJ as a pre-processor and that is its main */ +/* purpose. It applies Jacobi rotations in the same way as SGESVJ does, but */ +/* it targets only particular pivots and it does not check convergence */ +/* (stopping criterion). Few tunning parameters (marked by [TP]) are */ +/* available for the implementer. */ + +/* Further details */ +/* ~~~~~~~~~~~~~~~ */ +/* DGSVJ1 applies few sweeps of Jacobi rotations in the column space of */ +/* the input M-by-N matrix A. The pivot pairs are taken from the (1,2) */ +/* off-diagonal block in the corresponding N-by-N Gram matrix A^T * A. The */ +/* block-entries (tiles) of the (1,2) off-diagonal block are marked by the */ +/* [x]'s in the following scheme: */ + +/* | * * * [x] [x] [x]| */ +/* | * * * [x] [x] [x]| Row-cycling in the nblr-by-nblc [x] blocks. */ +/* | * * * [x] [x] [x]| Row-cyclic pivoting inside each [x] block. */ +/* |[x] [x] [x] * * * | */ +/* |[x] [x] [x] * * * | */ +/* |[x] [x] [x] * * * | */ + +/* In terms of the columns of A, the first N1 columns are rotated 'against' */ +/* the remaining N-N1 columns, trying to increase the angle between the */ +/* corresponding subspaces. The off-diagonal block is N1-by(N-N1) and it is */ +/* tiled using quadratic tiles of side KBL. Here, KBL is a tunning parmeter. */ +/* The number of sweeps is given in NSWEEP and the orthogonality threshold */ +/* is given in TOL. */ + +/* Contributors */ +/* ~~~~~~~~~~~~ */ +/* Zlatko Drmac (Zagreb, Croatia) and Kresimir Veselic (Hagen, Germany) */ + +/* Arguments */ +/* ~~~~~~~~~ */ + +/* JOBV (input) CHARACTER*1 */ +/* Specifies whether the output from this procedure is used */ +/* to compute the matrix V: */ +/* = 'V': the product of the Jacobi rotations is accumulated */ +/* by postmulyiplying the N-by-N array V. */ +/* (See the description of V.) */ +/* = 'A': the product of the Jacobi rotations is accumulated */ +/* by postmulyiplying the MV-by-N array V. */ +/* (See the descriptions of MV and V.) */ +/* = 'N': the Jacobi rotations are not accumulated. */ + +/* M (input) INTEGER */ +/* The number of rows of the input matrix A. M >= 0. */ + +/* N (input) INTEGER */ +/* The number of columns of the input matrix A. */ +/* M >= N >= 0. */ + +/* N1 (input) INTEGER */ +/* N1 specifies the 2 x 2 block partition, the first N1 columns are */ +/* rotated 'against' the remaining N-N1 columns of A. */ + +/* A (input/output) REAL array, dimension (LDA,N) */ +/* On entry, M-by-N matrix A, such that A*diag(D) represents */ +/* the input matrix. */ +/* On exit, */ +/* A_onexit * D_onexit represents the input matrix A*diag(D) */ +/* post-multiplied by a sequence of Jacobi rotations, where the */ +/* rotation threshold and the total number of sweeps are given in */ +/* TOL and NSWEEP, respectively. */ +/* (See the descriptions of N1, D, TOL and NSWEEP.) */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,M). */ + +/* D (input/workspace/output) REAL array, dimension (N) */ +/* The array D accumulates the scaling factors from the fast scaled */ +/* Jacobi rotations. */ +/* On entry, A*diag(D) represents the input matrix. */ +/* On exit, A_onexit*diag(D_onexit) represents the input matrix */ +/* post-multiplied by a sequence of Jacobi rotations, where the */ +/* rotation threshold and the total number of sweeps are given in */ +/* TOL and NSWEEP, respectively. */ +/* (See the descriptions of N1, A, TOL and NSWEEP.) */ + +/* SVA (input/workspace/output) REAL array, dimension (N) */ +/* On entry, SVA contains the Euclidean norms of the columns of */ +/* the matrix A*diag(D). */ +/* On exit, SVA contains the Euclidean norms of the columns of */ +/* the matrix onexit*diag(D_onexit). */ + +/* MV (input) INTEGER */ +/* If JOBV .EQ. 'A', then MV rows of V are post-multipled by a */ +/* sequence of Jacobi rotations. */ +/* If JOBV = 'N', then MV is not referenced. */ + +/* V (input/output) REAL array, dimension (LDV,N) */ +/* If JOBV .EQ. 'V' then N rows of V are post-multipled by a */ +/* sequence of Jacobi rotations. */ +/* If JOBV .EQ. 'A' then MV rows of V are post-multipled by a */ +/* sequence of Jacobi rotations. */ +/* If JOBV = 'N', then V is not referenced. */ + +/* LDV (input) INTEGER */ +/* The leading dimension of the array V, LDV >= 1. */ +/* If JOBV = 'V', LDV .GE. N. */ +/* If JOBV = 'A', LDV .GE. MV. */ + +/* EPS (input) INTEGER */ +/* EPS = SLAMCH('Epsilon') */ + +/* SFMIN (input) INTEGER */ +/* SFMIN = SLAMCH('Safe Minimum') */ + +/* TOL (input) REAL */ +/* TOL is the threshold for Jacobi rotations. For a pair */ +/* A(:,p), A(:,q) of pivot columns, the Jacobi rotation is */ +/* applied only if DABS(COS(angle(A(:,p),A(:,q)))) .GT. TOL. */ + +/* NSWEEP (input) INTEGER */ +/* NSWEEP is the number of sweeps of Jacobi rotations to be */ +/* performed. */ + +/* WORK (workspace) REAL array, dimension LWORK. */ + +/* LWORK (input) INTEGER */ +/* LWORK is the dimension of WORK. LWORK .GE. M. */ + +/* INFO (output) INTEGER */ +/* = 0 : successful exit. */ +/* < 0 : if INFO = -i, then the i-th argument had an illegal value */ + +/* -#- Local Parameters -#- */ + +/* -#- Local Scalars -#- */ + + +/* Local Arrays */ + + +/* Intrinsic Functions */ + + +/* External Functions */ + + +/* External Subroutines */ + + + + /* Parameter adjustments */ + --sva; + --d__; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + v_dim1 = *ldv; + v_offset = 1 + v_dim1; + v -= v_offset; + --work; + + /* Function Body */ + applv = _starpu_lsame_(jobv, "A"); + rsvec = _starpu_lsame_(jobv, "V"); + if (! (rsvec || applv || _starpu_lsame_(jobv, "N"))) { + *info = -1; + } else if (*m < 0) { + *info = -2; + } else if (*n < 0 || *n > *m) { + *info = -3; + } else if (*n1 < 0) { + *info = -4; + } else if (*lda < *m) { + *info = -6; + } else if (*mv < 0) { + *info = -9; + } else if (*ldv < *m) { + *info = -11; + } else if (*tol <= *eps) { + *info = -14; + } else if (*nsweep < 0) { + *info = -15; + } else if (*lwork < *m) { + *info = -17; + } else { + *info = 0; + } + +/* #:( */ + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DGSVJ1", &i__1); + return 0; + } + + if (rsvec) { + mvl = *n; + } else if (applv) { + mvl = *mv; + } + rsvec = rsvec || applv; + rooteps = sqrt(*eps); + rootsfmin = sqrt(*sfmin); + small = *sfmin / *eps; + big = 1. / *sfmin; + rootbig = 1. / rootsfmin; + large = big / sqrt((doublereal) (*m * *n)); + bigtheta = 1. / rooteps; + roottol = sqrt(*tol); + +/* -#- Initialize the right singular vector matrix -#- */ + +/* RSVEC = LSAME( JOBV, 'Y' ) */ + + emptsw = *n1 * (*n - *n1); + notrot = 0; + fastr[0] = 0.; + +/* -#- Row-cyclic pivot strategy with de Rijk's pivoting -#- */ + + kbl = min(8,*n); + nblr = *n1 / kbl; + if (nblr * kbl != *n1) { + ++nblr; + } +/* .. the tiling is nblr-by-nblc [tiles] */ + nblc = (*n - *n1) / kbl; + if (nblc * kbl != *n - *n1) { + ++nblc; + } +/* Computing 2nd power */ + i__1 = kbl; + blskip = i__1 * i__1 + 1; +/* [TP] BLKSKIP is a tuning parameter that depends on SWBAND and KBL. */ + rowskip = min(5,kbl); +/* [TP] ROWSKIP is a tuning parameter. */ + swband = 0; +/* [TP] SWBAND is a tuning parameter. It is meaningful and effective */ +/* if SGESVJ is used as a computational routine in the preconditioned */ +/* Jacobi SVD algorithm SGESVJ. */ + + +/* | * * * [x] [x] [x]| */ +/* | * * * [x] [x] [x]| Row-cycling in the nblr-by-nblc [x] blocks. */ +/* | * * * [x] [x] [x]| Row-cyclic pivoting inside each [x] block. */ +/* |[x] [x] [x] * * * | */ +/* |[x] [x] [x] * * * | */ +/* |[x] [x] [x] * * * | */ + + + i__1 = *nsweep; + for (i__ = 1; i__ <= i__1; ++i__) { +/* .. go go go ... */ + + mxaapq = 0.; + mxsinj = 0.; + iswrot = 0; + + notrot = 0; + pskipped = 0; + + i__2 = nblr; + for (ibr = 1; ibr <= i__2; ++ibr) { + igl = (ibr - 1) * kbl + 1; + + +/* ........................................................ */ +/* ... go to the off diagonal blocks */ + igl = (ibr - 1) * kbl + 1; + i__3 = nblc; + for (jbc = 1; jbc <= i__3; ++jbc) { + jgl = *n1 + (jbc - 1) * kbl + 1; +/* doing the block at ( ibr, jbc ) */ + ijblsk = 0; +/* Computing MIN */ + i__5 = igl + kbl - 1; + i__4 = min(i__5,*n1); + for (p = igl; p <= i__4; ++p) { + aapp = sva[p]; + if (aapp > 0.) { + pskipped = 0; +/* Computing MIN */ + i__6 = jgl + kbl - 1; + i__5 = min(i__6,*n); + for (q = jgl; q <= i__5; ++q) { + + aaqq = sva[q]; + if (aaqq > 0.) { + aapp0 = aapp; + +/* -#- M x 2 Jacobi SVD -#- */ + +/* -#- Safe Gram matrix computation -#- */ + + if (aaqq >= 1.) { + if (aapp >= aaqq) { + rotok = small * aapp <= aaqq; + } else { + rotok = small * aaqq <= aapp; + } + if (aapp < big / aaqq) { + aapq = _starpu_ddot_(m, &a[p * a_dim1 + 1], & + c__1, &a[q * a_dim1 + 1], & + c__1) * d__[p] * d__[q] / + aaqq / aapp; + } else { + _starpu_dcopy_(m, &a[p * a_dim1 + 1], &c__1, & + work[1], &c__1); + _starpu_dlascl_("G", &c__0, &c__0, &aapp, & + d__[p], m, &c__1, &work[1], + lda, &ierr); + aapq = _starpu_ddot_(m, &work[1], &c__1, &a[q + * a_dim1 + 1], &c__1) * d__[q] + / aaqq; + } + } else { + if (aapp >= aaqq) { + rotok = aapp <= aaqq / small; + } else { + rotok = aaqq <= aapp / small; + } + if (aapp > small / aaqq) { + aapq = _starpu_ddot_(m, &a[p * a_dim1 + 1], & + c__1, &a[q * a_dim1 + 1], & + c__1) * d__[p] * d__[q] / + aaqq / aapp; + } else { + _starpu_dcopy_(m, &a[q * a_dim1 + 1], &c__1, & + work[1], &c__1); + _starpu_dlascl_("G", &c__0, &c__0, &aaqq, & + d__[q], m, &c__1, &work[1], + lda, &ierr); + aapq = _starpu_ddot_(m, &work[1], &c__1, &a[p + * a_dim1 + 1], &c__1) * d__[p] + / aapp; + } + } +/* Computing MAX */ + d__1 = mxaapq, d__2 = abs(aapq); + mxaapq = max(d__1,d__2); +/* TO rotate or NOT to rotate, THAT is the question ... */ + + if (abs(aapq) > *tol) { + notrot = 0; +/* ROTATED = ROTATED + 1 */ + pskipped = 0; + ++iswrot; + + if (rotok) { + + aqoap = aaqq / aapp; + apoaq = aapp / aaqq; + theta = (d__1 = aqoap - apoaq, abs( + d__1)) * -.5 / aapq; + if (aaqq > aapp0) { + theta = -theta; + } + if (abs(theta) > bigtheta) { + t = .5 / theta; + fastr[2] = t * d__[p] / d__[q]; + fastr[3] = -t * d__[q] / d__[p]; + _starpu_drotm_(m, &a[p * a_dim1 + 1], & + c__1, &a[q * a_dim1 + 1], + &c__1, fastr); + if (rsvec) { + _starpu_drotm_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[q * + v_dim1 + 1], &c__1, fastr); + } +/* Computing MAX */ + d__1 = 0., d__2 = t * apoaq * + aapq + 1.; + sva[q] = aaqq * sqrt((max(d__1, + d__2))); +/* Computing MAX */ + d__1 = 0., d__2 = 1. - t * aqoap * + aapq; + aapp *= sqrt((max(d__1,d__2))); +/* Computing MAX */ + d__1 = mxsinj, d__2 = abs(t); + mxsinj = max(d__1,d__2); + } else { + +/* .. choose correct signum for THETA and rotate */ + + thsign = -d_sign(&c_b35, &aapq); + if (aaqq > aapp0) { + thsign = -thsign; + } + t = 1. / (theta + thsign * sqrt( + theta * theta + 1.)); + cs = sqrt(1. / (t * t + 1.)); + sn = t * cs; +/* Computing MAX */ + d__1 = mxsinj, d__2 = abs(sn); + mxsinj = max(d__1,d__2); +/* Computing MAX */ + d__1 = 0., d__2 = t * apoaq * + aapq + 1.; + sva[q] = aaqq * sqrt((max(d__1, + d__2))); + aapp *= sqrt(1. - t * aqoap * + aapq); + apoaq = d__[p] / d__[q]; + aqoap = d__[q] / d__[p]; + if (d__[p] >= 1.) { + + if (d__[q] >= 1.) { + fastr[2] = t * apoaq; + fastr[3] = -t * aqoap; + d__[p] *= cs; + d__[q] *= cs; + _starpu_drotm_(m, &a[p * a_dim1 + 1], &c__1, &a[q * + a_dim1 + 1], &c__1, fastr); + if (rsvec) { + _starpu_drotm_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[ + q * v_dim1 + 1], &c__1, fastr); + } + } else { + d__1 = -t * aqoap; + _starpu_daxpy_(m, &d__1, &a[q * a_dim1 + 1], &c__1, &a[ + p * a_dim1 + 1], &c__1); + d__1 = cs * sn * apoaq; + _starpu_daxpy_(m, &d__1, &a[p * a_dim1 + 1], &c__1, &a[ + q * a_dim1 + 1], &c__1); + if (rsvec) { + d__1 = -t * aqoap; + _starpu_daxpy_(&mvl, &d__1, &v[q * v_dim1 + 1], & + c__1, &v[p * v_dim1 + 1], &c__1); + d__1 = cs * sn * apoaq; + _starpu_daxpy_(&mvl, &d__1, &v[p * v_dim1 + 1], & + c__1, &v[q * v_dim1 + 1], &c__1); + } + d__[p] *= cs; + d__[q] /= cs; + } + } else { + if (d__[q] >= 1.) { + d__1 = t * apoaq; + _starpu_daxpy_(m, &d__1, &a[p * a_dim1 + 1], &c__1, &a[ + q * a_dim1 + 1], &c__1); + d__1 = -cs * sn * aqoap; + _starpu_daxpy_(m, &d__1, &a[q * a_dim1 + 1], &c__1, &a[ + p * a_dim1 + 1], &c__1); + if (rsvec) { + d__1 = t * apoaq; + _starpu_daxpy_(&mvl, &d__1, &v[p * v_dim1 + 1], & + c__1, &v[q * v_dim1 + 1], &c__1); + d__1 = -cs * sn * aqoap; + _starpu_daxpy_(&mvl, &d__1, &v[q * v_dim1 + 1], & + c__1, &v[p * v_dim1 + 1], &c__1); + } + d__[p] /= cs; + d__[q] *= cs; + } else { + if (d__[p] >= d__[q]) { + d__1 = -t * aqoap; + _starpu_daxpy_(m, &d__1, &a[q * a_dim1 + 1], &c__1, + &a[p * a_dim1 + 1], &c__1); + d__1 = cs * sn * apoaq; + _starpu_daxpy_(m, &d__1, &a[p * a_dim1 + 1], &c__1, + &a[q * a_dim1 + 1], &c__1); + d__[p] *= cs; + d__[q] /= cs; + if (rsvec) { + d__1 = -t * aqoap; + _starpu_daxpy_(&mvl, &d__1, &v[q * v_dim1 + 1], + &c__1, &v[p * v_dim1 + 1], & + c__1); + d__1 = cs * sn * apoaq; + _starpu_daxpy_(&mvl, &d__1, &v[p * v_dim1 + 1], + &c__1, &v[q * v_dim1 + 1], & + c__1); + } + } else { + d__1 = t * apoaq; + _starpu_daxpy_(m, &d__1, &a[p * a_dim1 + 1], &c__1, + &a[q * a_dim1 + 1], &c__1); + d__1 = -cs * sn * aqoap; + _starpu_daxpy_(m, &d__1, &a[q * a_dim1 + 1], &c__1, + &a[p * a_dim1 + 1], &c__1); + d__[p] /= cs; + d__[q] *= cs; + if (rsvec) { + d__1 = t * apoaq; + _starpu_daxpy_(&mvl, &d__1, &v[p * v_dim1 + 1], + &c__1, &v[q * v_dim1 + 1], & + c__1); + d__1 = -cs * sn * aqoap; + _starpu_daxpy_(&mvl, &d__1, &v[q * v_dim1 + 1], + &c__1, &v[p * v_dim1 + 1], & + c__1); + } + } + } + } + } + } else { + if (aapp > aaqq) { + _starpu_dcopy_(m, &a[p * a_dim1 + 1], & + c__1, &work[1], &c__1); + _starpu_dlascl_("G", &c__0, &c__0, &aapp, + &c_b35, m, &c__1, &work[1] +, lda, &ierr); + _starpu_dlascl_("G", &c__0, &c__0, &aaqq, + &c_b35, m, &c__1, &a[q * + a_dim1 + 1], lda, &ierr); + temp1 = -aapq * d__[p] / d__[q]; + _starpu_daxpy_(m, &temp1, &work[1], &c__1, + &a[q * a_dim1 + 1], & + c__1); + _starpu_dlascl_("G", &c__0, &c__0, &c_b35, + &aaqq, m, &c__1, &a[q * + a_dim1 + 1], lda, &ierr); +/* Computing MAX */ + d__1 = 0., d__2 = 1. - aapq * + aapq; + sva[q] = aaqq * sqrt((max(d__1, + d__2))); + mxsinj = max(mxsinj,*sfmin); + } else { + _starpu_dcopy_(m, &a[q * a_dim1 + 1], & + c__1, &work[1], &c__1); + _starpu_dlascl_("G", &c__0, &c__0, &aaqq, + &c_b35, m, &c__1, &work[1] +, lda, &ierr); + _starpu_dlascl_("G", &c__0, &c__0, &aapp, + &c_b35, m, &c__1, &a[p * + a_dim1 + 1], lda, &ierr); + temp1 = -aapq * d__[q] / d__[p]; + _starpu_daxpy_(m, &temp1, &work[1], &c__1, + &a[p * a_dim1 + 1], & + c__1); + _starpu_dlascl_("G", &c__0, &c__0, &c_b35, + &aapp, m, &c__1, &a[p * + a_dim1 + 1], lda, &ierr); +/* Computing MAX */ + d__1 = 0., d__2 = 1. - aapq * + aapq; + sva[p] = aapp * sqrt((max(d__1, + d__2))); + mxsinj = max(mxsinj,*sfmin); + } + } +/* END IF ROTOK THEN ... ELSE */ + +/* In the case of cancellation in updating SVA(q) */ +/* .. recompute SVA(q) */ +/* Computing 2nd power */ + d__1 = sva[q] / aaqq; + if (d__1 * d__1 <= rooteps) { + if (aaqq < rootbig && aaqq > + rootsfmin) { + sva[q] = _starpu_dnrm2_(m, &a[q * a_dim1 + + 1], &c__1) * d__[q]; + } else { + t = 0.; + aaqq = 0.; + _starpu_dlassq_(m, &a[q * a_dim1 + 1], & + c__1, &t, &aaqq); + sva[q] = t * sqrt(aaqq) * d__[q]; + } + } +/* Computing 2nd power */ + d__1 = aapp / aapp0; + if (d__1 * d__1 <= rooteps) { + if (aapp < rootbig && aapp > + rootsfmin) { + aapp = _starpu_dnrm2_(m, &a[p * a_dim1 + + 1], &c__1) * d__[p]; + } else { + t = 0.; + aapp = 0.; + _starpu_dlassq_(m, &a[p * a_dim1 + 1], & + c__1, &t, &aapp); + aapp = t * sqrt(aapp) * d__[p]; + } + sva[p] = aapp; + } +/* end of OK rotation */ + } else { + ++notrot; +/* SKIPPED = SKIPPED + 1 */ + ++pskipped; + ++ijblsk; + } + } else { + ++notrot; + ++pskipped; + ++ijblsk; + } +/* IF ( NOTROT .GE. EMPTSW ) GO TO 2011 */ + if (i__ <= swband && ijblsk >= blskip) { + sva[p] = aapp; + notrot = 0; + goto L2011; + } + if (i__ <= swband && pskipped > rowskip) { + aapp = -aapp; + notrot = 0; + goto L2203; + } + +/* L2200: */ + } +/* end of the q-loop */ +L2203: + sva[p] = aapp; + + } else { + if (aapp == 0.) { +/* Computing MIN */ + i__5 = jgl + kbl - 1; + notrot = notrot + min(i__5,*n) - jgl + 1; + } + if (aapp < 0.) { + notrot = 0; + } +/* ** IF ( NOTROT .GE. EMPTSW ) GO TO 2011 */ + } +/* L2100: */ + } +/* end of the p-loop */ +/* L2010: */ + } +/* end of the jbc-loop */ +L2011: +/* 2011 bailed out of the jbc-loop */ +/* Computing MIN */ + i__4 = igl + kbl - 1; + i__3 = min(i__4,*n); + for (p = igl; p <= i__3; ++p) { + sva[p] = (d__1 = sva[p], abs(d__1)); +/* L2012: */ + } +/* ** IF ( NOTROT .GE. EMPTSW ) GO TO 1994 */ +/* L2000: */ + } +/* 2000 :: end of the ibr-loop */ + +/* .. update SVA(N) */ + if (sva[*n] < rootbig && sva[*n] > rootsfmin) { + sva[*n] = _starpu_dnrm2_(m, &a[*n * a_dim1 + 1], &c__1) * d__[*n]; + } else { + t = 0.; + aapp = 0.; + _starpu_dlassq_(m, &a[*n * a_dim1 + 1], &c__1, &t, &aapp); + sva[*n] = t * sqrt(aapp) * d__[*n]; + } + +/* Additional steering devices */ + + if (i__ < swband && (mxaapq <= roottol || iswrot <= *n)) { + swband = i__; + } + if (i__ > swband + 1 && mxaapq < (doublereal) (*n) * *tol && ( + doublereal) (*n) * mxaapq * mxsinj < *tol) { + goto L1994; + } + + if (notrot >= emptsw) { + goto L1994; + } +/* L1993: */ + } +/* end i=1:NSWEEP loop */ +/* #:) Reaching this point means that the procedure has completed the given */ +/* number of sweeps. */ + *info = *nsweep - 1; + goto L1995; +L1994: +/* #:) Reaching this point means that during the i-th sweep all pivots were */ +/* below the given threshold, causing early exit. */ + *info = 0; +/* #:) INFO = 0 confirms successful iterations. */ +L1995: + +/* Sort the vector D */ + + i__1 = *n - 1; + for (p = 1; p <= i__1; ++p) { + i__2 = *n - p + 1; + q = _starpu_idamax_(&i__2, &sva[p], &c__1) + p - 1; + if (p != q) { + temp1 = sva[p]; + sva[p] = sva[q]; + sva[q] = temp1; + temp1 = d__[p]; + d__[p] = d__[q]; + d__[q] = temp1; + _starpu_dswap_(m, &a[p * a_dim1 + 1], &c__1, &a[q * a_dim1 + 1], &c__1); + if (rsvec) { + _starpu_dswap_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[q * v_dim1 + 1], & + c__1); + } + } +/* L5991: */ + } + + return 0; +/* .. */ +/* .. END OF DGSVJ1 */ +/* .. */ +} /* _starpu_dgsvj1_ */ diff --git a/min-dgels/base/SRC/dgtcon.c b/min-dgels/base/SRC/dgtcon.c new file mode 100644 index 0000000..e4e3275 --- /dev/null +++ b/min-dgels/base/SRC/dgtcon.c @@ -0,0 +1,209 @@ +/* dgtcon.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +/* Subroutine */ int _starpu_dgtcon_(char *norm, integer *n, doublereal *dl, + doublereal *d__, doublereal *du, doublereal *du2, integer *ipiv, + doublereal *anorm, doublereal *rcond, doublereal *work, integer * + iwork, integer *info) +{ + /* System generated locals */ + integer i__1; + + /* Local variables */ + integer i__, kase, kase1; + extern logical _starpu_lsame_(char *, char *); + integer isave[3]; + extern /* Subroutine */ int _starpu_dlacn2_(integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *), _starpu_xerbla_(char *, + integer *); + doublereal ainvnm; + logical onenrm; + extern /* Subroutine */ int _starpu_dgttrs_(char *, integer *, integer *, + doublereal *, doublereal *, doublereal *, doublereal *, integer *, + doublereal *, integer *, integer *); + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* Modified to call DLACN2 in place of DLACON, 5 Feb 03, SJH. */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DGTCON estimates the reciprocal of the condition number of a real */ +/* tridiagonal matrix A using the LU factorization as computed by */ +/* DGTTRF. */ + +/* An estimate is obtained for norm(inv(A)), and the reciprocal of the */ +/* condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). */ + +/* Arguments */ +/* ========= */ + +/* NORM (input) CHARACTER*1 */ +/* Specifies whether the 1-norm condition number or the */ +/* infinity-norm condition number is required: */ +/* = '1' or 'O': 1-norm; */ +/* = 'I': Infinity-norm. */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* DL (input) DOUBLE PRECISION array, dimension (N-1) */ +/* The (n-1) multipliers that define the matrix L from the */ +/* LU factorization of A as computed by DGTTRF. */ + +/* D (input) DOUBLE PRECISION array, dimension (N) */ +/* The n diagonal elements of the upper triangular matrix U from */ +/* the LU factorization of A. */ + +/* DU (input) DOUBLE PRECISION array, dimension (N-1) */ +/* The (n-1) elements of the first superdiagonal of U. */ + +/* DU2 (input) DOUBLE PRECISION array, dimension (N-2) */ +/* The (n-2) elements of the second superdiagonal of U. */ + +/* IPIV (input) INTEGER array, dimension (N) */ +/* The pivot indices; for 1 <= i <= n, row i of the matrix was */ +/* interchanged with row IPIV(i). IPIV(i) will always be either */ +/* i or i+1; IPIV(i) = i indicates a row interchange was not */ +/* required. */ + +/* ANORM (input) DOUBLE PRECISION */ +/* If NORM = '1' or 'O', the 1-norm of the original matrix A. */ +/* If NORM = 'I', the infinity-norm of the original matrix A. */ + +/* RCOND (output) DOUBLE PRECISION */ +/* The reciprocal of the condition number of the matrix A, */ +/* computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an */ +/* estimate of the 1-norm of inv(A) computed in this routine. */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (2*N) */ + +/* IWORK (workspace) INTEGER array, dimension (N) */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments. */ + + /* Parameter adjustments */ + --iwork; + --work; + --ipiv; + --du2; + --du; + --d__; + --dl; + + /* Function Body */ + *info = 0; + onenrm = *(unsigned char *)norm == '1' || _starpu_lsame_(norm, "O"); + if (! onenrm && ! _starpu_lsame_(norm, "I")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*anorm < 0.) { + *info = -8; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DGTCON", &i__1); + return 0; + } + +/* Quick return if possible */ + + *rcond = 0.; + if (*n == 0) { + *rcond = 1.; + return 0; + } else if (*anorm == 0.) { + return 0; + } + +/* Check that D(1:N) is non-zero. */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + if (d__[i__] == 0.) { + return 0; + } +/* L10: */ + } + + ainvnm = 0.; + if (onenrm) { + kase1 = 1; + } else { + kase1 = 2; + } + kase = 0; +L20: + _starpu_dlacn2_(n, &work[*n + 1], &work[1], &iwork[1], &ainvnm, &kase, isave); + if (kase != 0) { + if (kase == kase1) { + +/* Multiply by inv(U)*inv(L). */ + + _starpu_dgttrs_("No transpose", n, &c__1, &dl[1], &d__[1], &du[1], &du2[1] +, &ipiv[1], &work[1], n, info); + } else { + +/* Multiply by inv(L')*inv(U'). */ + + _starpu_dgttrs_("Transpose", n, &c__1, &dl[1], &d__[1], &du[1], &du2[1], & + ipiv[1], &work[1], n, info); + } + goto L20; + } + +/* Compute the estimate of the reciprocal condition number. */ + + if (ainvnm != 0.) { + *rcond = 1. / ainvnm / *anorm; + } + + return 0; + +/* End of DGTCON */ + +} /* _starpu_dgtcon_ */ diff --git a/min-dgels/base/SRC/dgtrfs.c b/min-dgels/base/SRC/dgtrfs.c new file mode 100644 index 0000000..3e87ebf --- /dev/null +++ b/min-dgels/base/SRC/dgtrfs.c @@ -0,0 +1,451 @@ +/* dgtrfs.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static doublereal c_b18 = -1.; +static doublereal c_b19 = 1.; + +/* Subroutine */ int _starpu_dgtrfs_(char *trans, integer *n, integer *nrhs, + doublereal *dl, doublereal *d__, doublereal *du, doublereal *dlf, + doublereal *df, doublereal *duf, doublereal *du2, integer *ipiv, + doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal * + ferr, doublereal *berr, doublereal *work, integer *iwork, integer * + info) +{ + /* System generated locals */ + integer b_dim1, b_offset, x_dim1, x_offset, i__1, i__2; + doublereal d__1, d__2, d__3, d__4; + + /* Local variables */ + integer i__, j; + doublereal s; + integer nz; + doublereal eps; + integer kase; + doublereal safe1, safe2; + extern logical _starpu_lsame_(char *, char *); + integer isave[3]; + extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *), _starpu_daxpy_(integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *); + integer count; + extern /* Subroutine */ int _starpu_dlacn2_(integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *); + extern doublereal _starpu_dlamch_(char *); + extern /* Subroutine */ int _starpu_dlagtm_(char *, integer *, integer *, + doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, integer *, doublereal *, doublereal *, integer *); + doublereal safmin; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + logical notran; + char transn[1]; + extern /* Subroutine */ int _starpu_dgttrs_(char *, integer *, integer *, + doublereal *, doublereal *, doublereal *, doublereal *, integer *, + doublereal *, integer *, integer *); + char transt[1]; + doublereal lstres; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* Modified to call DLACN2 in place of DLACON, 5 Feb 03, SJH. */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DGTRFS improves the computed solution to a system of linear */ +/* equations when the coefficient matrix is tridiagonal, and provides */ +/* error bounds and backward error estimates for the solution. */ + +/* Arguments */ +/* ========= */ + +/* TRANS (input) CHARACTER*1 */ +/* Specifies the form of the system of equations: */ +/* = 'N': A * X = B (No transpose) */ +/* = 'T': A**T * X = B (Transpose) */ +/* = 'C': A**H * X = B (Conjugate transpose = Transpose) */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* NRHS (input) INTEGER */ +/* The number of right hand sides, i.e., the number of columns */ +/* of the matrix B. NRHS >= 0. */ + +/* DL (input) DOUBLE PRECISION array, dimension (N-1) */ +/* The (n-1) subdiagonal elements of A. */ + +/* D (input) DOUBLE PRECISION array, dimension (N) */ +/* The diagonal elements of A. */ + +/* DU (input) DOUBLE PRECISION array, dimension (N-1) */ +/* The (n-1) superdiagonal elements of A. */ + +/* DLF (input) DOUBLE PRECISION array, dimension (N-1) */ +/* The (n-1) multipliers that define the matrix L from the */ +/* LU factorization of A as computed by DGTTRF. */ + +/* DF (input) DOUBLE PRECISION array, dimension (N) */ +/* The n diagonal elements of the upper triangular matrix U from */ +/* the LU factorization of A. */ + +/* DUF (input) DOUBLE PRECISION array, dimension (N-1) */ +/* The (n-1) elements of the first superdiagonal of U. */ + +/* DU2 (input) DOUBLE PRECISION array, dimension (N-2) */ +/* The (n-2) elements of the second superdiagonal of U. */ + +/* IPIV (input) INTEGER array, dimension (N) */ +/* The pivot indices; for 1 <= i <= n, row i of the matrix was */ +/* interchanged with row IPIV(i). IPIV(i) will always be either */ +/* i or i+1; IPIV(i) = i indicates a row interchange was not */ +/* required. */ + +/* B (input) DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* The right hand side matrix B. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the array B. LDB >= max(1,N). */ + +/* X (input/output) DOUBLE PRECISION array, dimension (LDX,NRHS) */ +/* On entry, the solution matrix X, as computed by DGTTRS. */ +/* On exit, the improved solution matrix X. */ + +/* LDX (input) INTEGER */ +/* The leading dimension of the array X. LDX >= max(1,N). */ + +/* FERR (output) DOUBLE PRECISION array, dimension (NRHS) */ +/* The estimated forward error bound for each solution vector */ +/* X(j) (the j-th column of the solution matrix X). */ +/* If XTRUE is the true solution corresponding to X(j), FERR(j) */ +/* is an estimated upper bound for the magnitude of the largest */ +/* element in (X(j) - XTRUE) divided by the magnitude of the */ +/* largest element in X(j). The estimate is as reliable as */ +/* the estimate for RCOND, and is almost always a slight */ +/* overestimate of the true error. */ + +/* BERR (output) DOUBLE PRECISION array, dimension (NRHS) */ +/* The componentwise relative backward error of each solution */ +/* vector X(j) (i.e., the smallest relative change in */ +/* any element of A or B that makes X(j) an exact solution). */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (3*N) */ + +/* IWORK (workspace) INTEGER array, dimension (N) */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ + +/* Internal Parameters */ +/* =================== */ + +/* ITMAX is the maximum number of steps of iterative refinement. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --dl; + --d__; + --du; + --dlf; + --df; + --duf; + --du2; + --ipiv; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + x_dim1 = *ldx; + x_offset = 1 + x_dim1; + x -= x_offset; + --ferr; + --berr; + --work; + --iwork; + + /* Function Body */ + *info = 0; + notran = _starpu_lsame_(trans, "N"); + if (! notran && ! _starpu_lsame_(trans, "T") && ! _starpu_lsame_( + trans, "C")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*ldb < max(1,*n)) { + *info = -13; + } else if (*ldx < max(1,*n)) { + *info = -15; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DGTRFS", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0 || *nrhs == 0) { + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + ferr[j] = 0.; + berr[j] = 0.; +/* L10: */ + } + return 0; + } + + if (notran) { + *(unsigned char *)transn = 'N'; + *(unsigned char *)transt = 'T'; + } else { + *(unsigned char *)transn = 'T'; + *(unsigned char *)transt = 'N'; + } + +/* NZ = maximum number of nonzero elements in each row of A, plus 1 */ + + nz = 4; + eps = _starpu_dlamch_("Epsilon"); + safmin = _starpu_dlamch_("Safe minimum"); + safe1 = nz * safmin; + safe2 = safe1 / eps; + +/* Do for each right hand side */ + + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + + count = 1; + lstres = 3.; +L20: + +/* Loop until stopping criterion is satisfied. */ + +/* Compute residual R = B - op(A) * X, */ +/* where op(A) = A, A**T, or A**H, depending on TRANS. */ + + _starpu_dcopy_(n, &b[j * b_dim1 + 1], &c__1, &work[*n + 1], &c__1); + _starpu_dlagtm_(trans, n, &c__1, &c_b18, &dl[1], &d__[1], &du[1], &x[j * + x_dim1 + 1], ldx, &c_b19, &work[*n + 1], n); + +/* Compute abs(op(A))*abs(x) + abs(b) for use in the backward */ +/* error bound. */ + + if (notran) { + if (*n == 1) { + work[1] = (d__1 = b[j * b_dim1 + 1], abs(d__1)) + (d__2 = d__[ + 1] * x[j * x_dim1 + 1], abs(d__2)); + } else { + work[1] = (d__1 = b[j * b_dim1 + 1], abs(d__1)) + (d__2 = d__[ + 1] * x[j * x_dim1 + 1], abs(d__2)) + (d__3 = du[1] * + x[j * x_dim1 + 2], abs(d__3)); + i__2 = *n - 1; + for (i__ = 2; i__ <= i__2; ++i__) { + work[i__] = (d__1 = b[i__ + j * b_dim1], abs(d__1)) + ( + d__2 = dl[i__ - 1] * x[i__ - 1 + j * x_dim1], abs( + d__2)) + (d__3 = d__[i__] * x[i__ + j * x_dim1], + abs(d__3)) + (d__4 = du[i__] * x[i__ + 1 + j * + x_dim1], abs(d__4)); +/* L30: */ + } + work[*n] = (d__1 = b[*n + j * b_dim1], abs(d__1)) + (d__2 = + dl[*n - 1] * x[*n - 1 + j * x_dim1], abs(d__2)) + ( + d__3 = d__[*n] * x[*n + j * x_dim1], abs(d__3)); + } + } else { + if (*n == 1) { + work[1] = (d__1 = b[j * b_dim1 + 1], abs(d__1)) + (d__2 = d__[ + 1] * x[j * x_dim1 + 1], abs(d__2)); + } else { + work[1] = (d__1 = b[j * b_dim1 + 1], abs(d__1)) + (d__2 = d__[ + 1] * x[j * x_dim1 + 1], abs(d__2)) + (d__3 = dl[1] * + x[j * x_dim1 + 2], abs(d__3)); + i__2 = *n - 1; + for (i__ = 2; i__ <= i__2; ++i__) { + work[i__] = (d__1 = b[i__ + j * b_dim1], abs(d__1)) + ( + d__2 = du[i__ - 1] * x[i__ - 1 + j * x_dim1], abs( + d__2)) + (d__3 = d__[i__] * x[i__ + j * x_dim1], + abs(d__3)) + (d__4 = dl[i__] * x[i__ + 1 + j * + x_dim1], abs(d__4)); +/* L40: */ + } + work[*n] = (d__1 = b[*n + j * b_dim1], abs(d__1)) + (d__2 = + du[*n - 1] * x[*n - 1 + j * x_dim1], abs(d__2)) + ( + d__3 = d__[*n] * x[*n + j * x_dim1], abs(d__3)); + } + } + +/* Compute componentwise relative backward error from formula */ + +/* max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) */ + +/* where abs(Z) is the componentwise absolute value of the matrix */ +/* or vector Z. If the i-th component of the denominator is less */ +/* than SAFE2, then SAFE1 is added to the i-th components of the */ +/* numerator and denominator before dividing. */ + + s = 0.; + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + if (work[i__] > safe2) { +/* Computing MAX */ + d__2 = s, d__3 = (d__1 = work[*n + i__], abs(d__1)) / work[ + i__]; + s = max(d__2,d__3); + } else { +/* Computing MAX */ + d__2 = s, d__3 = ((d__1 = work[*n + i__], abs(d__1)) + safe1) + / (work[i__] + safe1); + s = max(d__2,d__3); + } +/* L50: */ + } + berr[j] = s; + +/* Test stopping criterion. Continue iterating if */ +/* 1) The residual BERR(J) is larger than machine epsilon, and */ +/* 2) BERR(J) decreased by at least a factor of 2 during the */ +/* last iteration, and */ +/* 3) At most ITMAX iterations tried. */ + + if (berr[j] > eps && berr[j] * 2. <= lstres && count <= 5) { + +/* Update solution and try again. */ + + _starpu_dgttrs_(trans, n, &c__1, &dlf[1], &df[1], &duf[1], &du2[1], &ipiv[ + 1], &work[*n + 1], n, info); + _starpu_daxpy_(n, &c_b19, &work[*n + 1], &c__1, &x[j * x_dim1 + 1], &c__1) + ; + lstres = berr[j]; + ++count; + goto L20; + } + +/* Bound error from formula */ + +/* norm(X - XTRUE) / norm(X) .le. FERR = */ +/* norm( abs(inv(op(A)))* */ +/* ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) */ + +/* where */ +/* norm(Z) is the magnitude of the largest component of Z */ +/* inv(op(A)) is the inverse of op(A) */ +/* abs(Z) is the componentwise absolute value of the matrix or */ +/* vector Z */ +/* NZ is the maximum number of nonzeros in any row of A, plus 1 */ +/* EPS is machine epsilon */ + +/* The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) */ +/* is incremented by SAFE1 if the i-th component of */ +/* abs(op(A))*abs(X) + abs(B) is less than SAFE2. */ + +/* Use DLACN2 to estimate the infinity-norm of the matrix */ +/* inv(op(A)) * diag(W), */ +/* where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) */ + + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + if (work[i__] > safe2) { + work[i__] = (d__1 = work[*n + i__], abs(d__1)) + nz * eps * + work[i__]; + } else { + work[i__] = (d__1 = work[*n + i__], abs(d__1)) + nz * eps * + work[i__] + safe1; + } +/* L60: */ + } + + kase = 0; +L70: + _starpu_dlacn2_(n, &work[(*n << 1) + 1], &work[*n + 1], &iwork[1], &ferr[j], & + kase, isave); + if (kase != 0) { + if (kase == 1) { + +/* Multiply by diag(W)*inv(op(A)**T). */ + + _starpu_dgttrs_(transt, n, &c__1, &dlf[1], &df[1], &duf[1], &du2[1], & + ipiv[1], &work[*n + 1], n, info); + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + work[*n + i__] = work[i__] * work[*n + i__]; +/* L80: */ + } + } else { + +/* Multiply by inv(op(A))*diag(W). */ + + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + work[*n + i__] = work[i__] * work[*n + i__]; +/* L90: */ + } + _starpu_dgttrs_(transn, n, &c__1, &dlf[1], &df[1], &duf[1], &du2[1], & + ipiv[1], &work[*n + 1], n, info); + } + goto L70; + } + +/* Normalize error. */ + + lstres = 0.; + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { +/* Computing MAX */ + d__2 = lstres, d__3 = (d__1 = x[i__ + j * x_dim1], abs(d__1)); + lstres = max(d__2,d__3); +/* L100: */ + } + if (lstres != 0.) { + ferr[j] /= lstres; + } + +/* L110: */ + } + + return 0; + +/* End of DGTRFS */ + +} /* _starpu_dgtrfs_ */ diff --git a/min-dgels/base/SRC/dgtsv.c b/min-dgels/base/SRC/dgtsv.c new file mode 100644 index 0000000..a3002a7 --- /dev/null +++ b/min-dgels/base/SRC/dgtsv.c @@ -0,0 +1,315 @@ +/* dgtsv.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dgtsv_(integer *n, integer *nrhs, doublereal *dl, + doublereal *d__, doublereal *du, doublereal *b, integer *ldb, integer + *info) +{ + /* System generated locals */ + integer b_dim1, b_offset, i__1, i__2; + doublereal d__1, d__2; + + /* Local variables */ + integer i__, j; + doublereal fact, temp; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DGTSV solves the equation */ + +/* A*X = B, */ + +/* where A is an n by n tridiagonal matrix, by Gaussian elimination with */ +/* partial pivoting. */ + +/* Note that the equation A'*X = B may be solved by interchanging the */ +/* order of the arguments DU and DL. */ + +/* Arguments */ +/* ========= */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* NRHS (input) INTEGER */ +/* The number of right hand sides, i.e., the number of columns */ +/* of the matrix B. NRHS >= 0. */ + +/* DL (input/output) DOUBLE PRECISION array, dimension (N-1) */ +/* On entry, DL must contain the (n-1) sub-diagonal elements of */ +/* A. */ + +/* On exit, DL is overwritten by the (n-2) elements of the */ +/* second super-diagonal of the upper triangular matrix U from */ +/* the LU factorization of A, in DL(1), ..., DL(n-2). */ + +/* D (input/output) DOUBLE PRECISION array, dimension (N) */ +/* On entry, D must contain the diagonal elements of A. */ + +/* On exit, D is overwritten by the n diagonal elements of U. */ + +/* DU (input/output) DOUBLE PRECISION array, dimension (N-1) */ +/* On entry, DU must contain the (n-1) super-diagonal elements */ +/* of A. */ + +/* On exit, DU is overwritten by the (n-1) elements of the first */ +/* super-diagonal of U. */ + +/* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* On entry, the N by NRHS matrix of right hand side matrix B. */ +/* On exit, if INFO = 0, the N by NRHS solution matrix X. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the array B. LDB >= max(1,N). */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > 0: if INFO = i, U(i,i) is exactly zero, and the solution */ +/* has not been computed. The factorization has not been */ +/* completed unless i = N. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + --dl; + --d__; + --du; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + *info = 0; + if (*n < 0) { + *info = -1; + } else if (*nrhs < 0) { + *info = -2; + } else if (*ldb < max(1,*n)) { + *info = -7; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DGTSV ", &i__1); + return 0; + } + + if (*n == 0) { + return 0; + } + + if (*nrhs == 1) { + i__1 = *n - 2; + for (i__ = 1; i__ <= i__1; ++i__) { + if ((d__1 = d__[i__], abs(d__1)) >= (d__2 = dl[i__], abs(d__2))) { + +/* No row interchange required */ + + if (d__[i__] != 0.) { + fact = dl[i__] / d__[i__]; + d__[i__ + 1] -= fact * du[i__]; + b[i__ + 1 + b_dim1] -= fact * b[i__ + b_dim1]; + } else { + *info = i__; + return 0; + } + dl[i__] = 0.; + } else { + +/* Interchange rows I and I+1 */ + + fact = d__[i__] / dl[i__]; + d__[i__] = dl[i__]; + temp = d__[i__ + 1]; + d__[i__ + 1] = du[i__] - fact * temp; + dl[i__] = du[i__ + 1]; + du[i__ + 1] = -fact * dl[i__]; + du[i__] = temp; + temp = b[i__ + b_dim1]; + b[i__ + b_dim1] = b[i__ + 1 + b_dim1]; + b[i__ + 1 + b_dim1] = temp - fact * b[i__ + 1 + b_dim1]; + } +/* L10: */ + } + if (*n > 1) { + i__ = *n - 1; + if ((d__1 = d__[i__], abs(d__1)) >= (d__2 = dl[i__], abs(d__2))) { + if (d__[i__] != 0.) { + fact = dl[i__] / d__[i__]; + d__[i__ + 1] -= fact * du[i__]; + b[i__ + 1 + b_dim1] -= fact * b[i__ + b_dim1]; + } else { + *info = i__; + return 0; + } + } else { + fact = d__[i__] / dl[i__]; + d__[i__] = dl[i__]; + temp = d__[i__ + 1]; + d__[i__ + 1] = du[i__] - fact * temp; + du[i__] = temp; + temp = b[i__ + b_dim1]; + b[i__ + b_dim1] = b[i__ + 1 + b_dim1]; + b[i__ + 1 + b_dim1] = temp - fact * b[i__ + 1 + b_dim1]; + } + } + if (d__[*n] == 0.) { + *info = *n; + return 0; + } + } else { + i__1 = *n - 2; + for (i__ = 1; i__ <= i__1; ++i__) { + if ((d__1 = d__[i__], abs(d__1)) >= (d__2 = dl[i__], abs(d__2))) { + +/* No row interchange required */ + + if (d__[i__] != 0.) { + fact = dl[i__] / d__[i__]; + d__[i__ + 1] -= fact * du[i__]; + i__2 = *nrhs; + for (j = 1; j <= i__2; ++j) { + b[i__ + 1 + j * b_dim1] -= fact * b[i__ + j * b_dim1]; +/* L20: */ + } + } else { + *info = i__; + return 0; + } + dl[i__] = 0.; + } else { + +/* Interchange rows I and I+1 */ + + fact = d__[i__] / dl[i__]; + d__[i__] = dl[i__]; + temp = d__[i__ + 1]; + d__[i__ + 1] = du[i__] - fact * temp; + dl[i__] = du[i__ + 1]; + du[i__ + 1] = -fact * dl[i__]; + du[i__] = temp; + i__2 = *nrhs; + for (j = 1; j <= i__2; ++j) { + temp = b[i__ + j * b_dim1]; + b[i__ + j * b_dim1] = b[i__ + 1 + j * b_dim1]; + b[i__ + 1 + j * b_dim1] = temp - fact * b[i__ + 1 + j * + b_dim1]; +/* L30: */ + } + } +/* L40: */ + } + if (*n > 1) { + i__ = *n - 1; + if ((d__1 = d__[i__], abs(d__1)) >= (d__2 = dl[i__], abs(d__2))) { + if (d__[i__] != 0.) { + fact = dl[i__] / d__[i__]; + d__[i__ + 1] -= fact * du[i__]; + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + b[i__ + 1 + j * b_dim1] -= fact * b[i__ + j * b_dim1]; +/* L50: */ + } + } else { + *info = i__; + return 0; + } + } else { + fact = d__[i__] / dl[i__]; + d__[i__] = dl[i__]; + temp = d__[i__ + 1]; + d__[i__ + 1] = du[i__] - fact * temp; + du[i__] = temp; + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + temp = b[i__ + j * b_dim1]; + b[i__ + j * b_dim1] = b[i__ + 1 + j * b_dim1]; + b[i__ + 1 + j * b_dim1] = temp - fact * b[i__ + 1 + j * + b_dim1]; +/* L60: */ + } + } + } + if (d__[*n] == 0.) { + *info = *n; + return 0; + } + } + +/* Back solve with the matrix U from the factorization. */ + + if (*nrhs <= 2) { + j = 1; +L70: + b[*n + j * b_dim1] /= d__[*n]; + if (*n > 1) { + b[*n - 1 + j * b_dim1] = (b[*n - 1 + j * b_dim1] - du[*n - 1] * b[ + *n + j * b_dim1]) / d__[*n - 1]; + } + for (i__ = *n - 2; i__ >= 1; --i__) { + b[i__ + j * b_dim1] = (b[i__ + j * b_dim1] - du[i__] * b[i__ + 1 + + j * b_dim1] - dl[i__] * b[i__ + 2 + j * b_dim1]) / d__[ + i__]; +/* L80: */ + } + if (j < *nrhs) { + ++j; + goto L70; + } + } else { + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + b[*n + j * b_dim1] /= d__[*n]; + if (*n > 1) { + b[*n - 1 + j * b_dim1] = (b[*n - 1 + j * b_dim1] - du[*n - 1] + * b[*n + j * b_dim1]) / d__[*n - 1]; + } + for (i__ = *n - 2; i__ >= 1; --i__) { + b[i__ + j * b_dim1] = (b[i__ + j * b_dim1] - du[i__] * b[i__ + + 1 + j * b_dim1] - dl[i__] * b[i__ + 2 + j * b_dim1]) + / d__[i__]; +/* L90: */ + } +/* L100: */ + } + } + + return 0; + +/* End of DGTSV */ + +} /* _starpu_dgtsv_ */ diff --git a/min-dgels/base/SRC/dgtsvx.c b/min-dgels/base/SRC/dgtsvx.c new file mode 100644 index 0000000..6b9ad20 --- /dev/null +++ b/min-dgels/base/SRC/dgtsvx.c @@ -0,0 +1,349 @@ +/* dgtsvx.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +/* Subroutine */ int _starpu_dgtsvx_(char *fact, char *trans, integer *n, integer * + nrhs, doublereal *dl, doublereal *d__, doublereal *du, doublereal * + dlf, doublereal *df, doublereal *duf, doublereal *du2, integer *ipiv, + doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal * + rcond, doublereal *ferr, doublereal *berr, doublereal *work, integer * + iwork, integer *info) +{ + /* System generated locals */ + integer b_dim1, b_offset, x_dim1, x_offset, i__1; + + /* Local variables */ + char norm[1]; + extern logical _starpu_lsame_(char *, char *); + doublereal anorm; + extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *); + extern doublereal _starpu_dlamch_(char *), _starpu_dlangt_(char *, integer *, + doublereal *, doublereal *, doublereal *); + logical nofact; + extern /* Subroutine */ int _starpu_dlacpy_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *), + _starpu_xerbla_(char *, integer *), _starpu_dgtcon_(char *, integer *, + doublereal *, doublereal *, doublereal *, doublereal *, integer *, + doublereal *, doublereal *, doublereal *, integer *, integer *), _starpu_dgtrfs_(char *, integer *, integer *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, doublereal *, doublereal *, + integer *, integer *), _starpu_dgttrf_(integer *, doublereal *, + doublereal *, doublereal *, doublereal *, integer *, integer *); + logical notran; + extern /* Subroutine */ int _starpu_dgttrs_(char *, integer *, integer *, + doublereal *, doublereal *, doublereal *, doublereal *, integer *, + doublereal *, integer *, integer *); + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DGTSVX uses the LU factorization to compute the solution to a real */ +/* system of linear equations A * X = B or A**T * X = B, */ +/* where A is a tridiagonal matrix of order N and X and B are N-by-NRHS */ +/* matrices. */ + +/* Error bounds on the solution and a condition estimate are also */ +/* provided. */ + +/* Description */ +/* =========== */ + +/* The following steps are performed: */ + +/* 1. If FACT = 'N', the LU decomposition is used to factor the matrix A */ +/* as A = L * U, where L is a product of permutation and unit lower */ +/* bidiagonal matrices and U is upper triangular with nonzeros in */ +/* only the main diagonal and first two superdiagonals. */ + +/* 2. If some U(i,i)=0, so that U is exactly singular, then the routine */ +/* returns with INFO = i. Otherwise, the factored form of A is used */ +/* to estimate the condition number of the matrix A. If the */ +/* reciprocal of the condition number is less than machine precision, */ +/* INFO = N+1 is returned as a warning, but the routine still goes on */ +/* to solve for X and compute error bounds as described below. */ + +/* 3. The system of equations is solved for X using the factored form */ +/* of A. */ + +/* 4. Iterative refinement is applied to improve the computed solution */ +/* matrix and calculate error bounds and backward error estimates */ +/* for it. */ + +/* Arguments */ +/* ========= */ + +/* FACT (input) CHARACTER*1 */ +/* Specifies whether or not the factored form of A has been */ +/* supplied on entry. */ +/* = 'F': DLF, DF, DUF, DU2, and IPIV contain the factored */ +/* form of A; DL, D, DU, DLF, DF, DUF, DU2 and IPIV */ +/* will not be modified. */ +/* = 'N': The matrix will be copied to DLF, DF, and DUF */ +/* and factored. */ + +/* TRANS (input) CHARACTER*1 */ +/* Specifies the form of the system of equations: */ +/* = 'N': A * X = B (No transpose) */ +/* = 'T': A**T * X = B (Transpose) */ +/* = 'C': A**H * X = B (Conjugate transpose = Transpose) */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* NRHS (input) INTEGER */ +/* The number of right hand sides, i.e., the number of columns */ +/* of the matrix B. NRHS >= 0. */ + +/* DL (input) DOUBLE PRECISION array, dimension (N-1) */ +/* The (n-1) subdiagonal elements of A. */ + +/* D (input) DOUBLE PRECISION array, dimension (N) */ +/* The n diagonal elements of A. */ + +/* DU (input) DOUBLE PRECISION array, dimension (N-1) */ +/* The (n-1) superdiagonal elements of A. */ + +/* DLF (input or output) DOUBLE PRECISION array, dimension (N-1) */ +/* If FACT = 'F', then DLF is an input argument and on entry */ +/* contains the (n-1) multipliers that define the matrix L from */ +/* the LU factorization of A as computed by DGTTRF. */ + +/* If FACT = 'N', then DLF is an output argument and on exit */ +/* contains the (n-1) multipliers that define the matrix L from */ +/* the LU factorization of A. */ + +/* DF (input or output) DOUBLE PRECISION array, dimension (N) */ +/* If FACT = 'F', then DF is an input argument and on entry */ +/* contains the n diagonal elements of the upper triangular */ +/* matrix U from the LU factorization of A. */ + +/* If FACT = 'N', then DF is an output argument and on exit */ +/* contains the n diagonal elements of the upper triangular */ +/* matrix U from the LU factorization of A. */ + +/* DUF (input or output) DOUBLE PRECISION array, dimension (N-1) */ +/* If FACT = 'F', then DUF is an input argument and on entry */ +/* contains the (n-1) elements of the first superdiagonal of U. */ + +/* If FACT = 'N', then DUF is an output argument and on exit */ +/* contains the (n-1) elements of the first superdiagonal of U. */ + +/* DU2 (input or output) DOUBLE PRECISION array, dimension (N-2) */ +/* If FACT = 'F', then DU2 is an input argument and on entry */ +/* contains the (n-2) elements of the second superdiagonal of */ +/* U. */ + +/* If FACT = 'N', then DU2 is an output argument and on exit */ +/* contains the (n-2) elements of the second superdiagonal of */ +/* U. */ + +/* IPIV (input or output) INTEGER array, dimension (N) */ +/* If FACT = 'F', then IPIV is an input argument and on entry */ +/* contains the pivot indices from the LU factorization of A as */ +/* computed by DGTTRF. */ + +/* If FACT = 'N', then IPIV is an output argument and on exit */ +/* contains the pivot indices from the LU factorization of A; */ +/* row i of the matrix was interchanged with row IPIV(i). */ +/* IPIV(i) will always be either i or i+1; IPIV(i) = i indicates */ +/* a row interchange was not required. */ + +/* B (input) DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* The N-by-NRHS right hand side matrix B. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the array B. LDB >= max(1,N). */ + +/* X (output) DOUBLE PRECISION array, dimension (LDX,NRHS) */ +/* If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X. */ + +/* LDX (input) INTEGER */ +/* The leading dimension of the array X. LDX >= max(1,N). */ + +/* RCOND (output) DOUBLE PRECISION */ +/* The estimate of the reciprocal condition number of the matrix */ +/* A. If RCOND is less than the machine precision (in */ +/* particular, if RCOND = 0), the matrix is singular to working */ +/* precision. This condition is indicated by a return code of */ +/* INFO > 0. */ + +/* FERR (output) DOUBLE PRECISION array, dimension (NRHS) */ +/* The estimated forward error bound for each solution vector */ +/* X(j) (the j-th column of the solution matrix X). */ +/* If XTRUE is the true solution corresponding to X(j), FERR(j) */ +/* is an estimated upper bound for the magnitude of the largest */ +/* element in (X(j) - XTRUE) divided by the magnitude of the */ +/* largest element in X(j). The estimate is as reliable as */ +/* the estimate for RCOND, and is almost always a slight */ +/* overestimate of the true error. */ + +/* BERR (output) DOUBLE PRECISION array, dimension (NRHS) */ +/* The componentwise relative backward error of each solution */ +/* vector X(j) (i.e., the smallest relative change in */ +/* any element of A or B that makes X(j) an exact solution). */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (3*N) */ + +/* IWORK (workspace) INTEGER array, dimension (N) */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > 0: if INFO = i, and i is */ +/* <= N: U(i,i) is exactly zero. The factorization */ +/* has not been completed unless i = N, but the */ +/* factor U is exactly singular, so the solution */ +/* and error bounds could not be computed. */ +/* RCOND = 0 is returned. */ +/* = N+1: U is nonsingular, but RCOND is less than machine */ +/* precision, meaning that the matrix is singular */ +/* to working precision. Nevertheless, the */ +/* solution and error bounds are computed because */ +/* there are a number of situations where the */ +/* computed solution can be more accurate than the */ +/* value of RCOND would suggest. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + --dl; + --d__; + --du; + --dlf; + --df; + --duf; + --du2; + --ipiv; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + x_dim1 = *ldx; + x_offset = 1 + x_dim1; + x -= x_offset; + --ferr; + --berr; + --work; + --iwork; + + /* Function Body */ + *info = 0; + nofact = _starpu_lsame_(fact, "N"); + notran = _starpu_lsame_(trans, "N"); + if (! nofact && ! _starpu_lsame_(fact, "F")) { + *info = -1; + } else if (! notran && ! _starpu_lsame_(trans, "T") && ! + _starpu_lsame_(trans, "C")) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*nrhs < 0) { + *info = -4; + } else if (*ldb < max(1,*n)) { + *info = -14; + } else if (*ldx < max(1,*n)) { + *info = -16; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DGTSVX", &i__1); + return 0; + } + + if (nofact) { + +/* Compute the LU factorization of A. */ + + _starpu_dcopy_(n, &d__[1], &c__1, &df[1], &c__1); + if (*n > 1) { + i__1 = *n - 1; + _starpu_dcopy_(&i__1, &dl[1], &c__1, &dlf[1], &c__1); + i__1 = *n - 1; + _starpu_dcopy_(&i__1, &du[1], &c__1, &duf[1], &c__1); + } + _starpu_dgttrf_(n, &dlf[1], &df[1], &duf[1], &du2[1], &ipiv[1], info); + +/* Return if INFO is non-zero. */ + + if (*info > 0) { + *rcond = 0.; + return 0; + } + } + +/* Compute the norm of the matrix A. */ + + if (notran) { + *(unsigned char *)norm = '1'; + } else { + *(unsigned char *)norm = 'I'; + } + anorm = _starpu_dlangt_(norm, n, &dl[1], &d__[1], &du[1]); + +/* Compute the reciprocal of the condition number of A. */ + + _starpu_dgtcon_(norm, n, &dlf[1], &df[1], &duf[1], &du2[1], &ipiv[1], &anorm, + rcond, &work[1], &iwork[1], info); + +/* Compute the solution vectors X. */ + + _starpu_dlacpy_("Full", n, nrhs, &b[b_offset], ldb, &x[x_offset], ldx); + _starpu_dgttrs_(trans, n, nrhs, &dlf[1], &df[1], &duf[1], &du2[1], &ipiv[1], &x[ + x_offset], ldx, info); + +/* Use iterative refinement to improve the computed solutions and */ +/* compute error bounds and backward error estimates for them. */ + + _starpu_dgtrfs_(trans, n, nrhs, &dl[1], &d__[1], &du[1], &dlf[1], &df[1], &duf[1], + &du2[1], &ipiv[1], &b[b_offset], ldb, &x[x_offset], ldx, &ferr[1] +, &berr[1], &work[1], &iwork[1], info); + +/* Set INFO = N+1 if the matrix is singular to working precision. */ + + if (*rcond < _starpu_dlamch_("Epsilon")) { + *info = *n + 1; + } + + return 0; + +/* End of DGTSVX */ + +} /* _starpu_dgtsvx_ */ diff --git a/min-dgels/base/SRC/dgttrf.c b/min-dgels/base/SRC/dgttrf.c new file mode 100644 index 0000000..1d96768 --- /dev/null +++ b/min-dgels/base/SRC/dgttrf.c @@ -0,0 +1,203 @@ +/* dgttrf.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dgttrf_(integer *n, doublereal *dl, doublereal *d__, + doublereal *du, doublereal *du2, integer *ipiv, integer *info) +{ + /* System generated locals */ + integer i__1; + doublereal d__1, d__2; + + /* Local variables */ + integer i__; + doublereal fact, temp; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DGTTRF computes an LU factorization of a real tridiagonal matrix A */ +/* using elimination with partial pivoting and row interchanges. */ + +/* The factorization has the form */ +/* A = L * U */ +/* where L is a product of permutation and unit lower bidiagonal */ +/* matrices and U is upper triangular with nonzeros in only the main */ +/* diagonal and first two superdiagonals. */ + +/* Arguments */ +/* ========= */ + +/* N (input) INTEGER */ +/* The order of the matrix A. */ + +/* DL (input/output) DOUBLE PRECISION array, dimension (N-1) */ +/* On entry, DL must contain the (n-1) sub-diagonal elements of */ +/* A. */ + +/* On exit, DL is overwritten by the (n-1) multipliers that */ +/* define the matrix L from the LU factorization of A. */ + +/* D (input/output) DOUBLE PRECISION array, dimension (N) */ +/* On entry, D must contain the diagonal elements of A. */ + +/* On exit, D is overwritten by the n diagonal elements of the */ +/* upper triangular matrix U from the LU factorization of A. */ + +/* DU (input/output) DOUBLE PRECISION array, dimension (N-1) */ +/* On entry, DU must contain the (n-1) super-diagonal elements */ +/* of A. */ + +/* On exit, DU is overwritten by the (n-1) elements of the first */ +/* super-diagonal of U. */ + +/* DU2 (output) DOUBLE PRECISION array, dimension (N-2) */ +/* On exit, DU2 is overwritten by the (n-2) elements of the */ +/* second super-diagonal of U. */ + +/* IPIV (output) INTEGER array, dimension (N) */ +/* The pivot indices; for 1 <= i <= n, row i of the matrix was */ +/* interchanged with row IPIV(i). IPIV(i) will always be either */ +/* i or i+1; IPIV(i) = i indicates a row interchange was not */ +/* required. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -k, the k-th argument had an illegal value */ +/* > 0: if INFO = k, U(k,k) is exactly zero. The factorization */ +/* has been completed, but the factor U is exactly */ +/* singular, and division by zero will occur if it is used */ +/* to solve a system of equations. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + --ipiv; + --du2; + --du; + --d__; + --dl; + + /* Function Body */ + *info = 0; + if (*n < 0) { + *info = -1; + i__1 = -(*info); + _starpu_xerbla_("DGTTRF", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Initialize IPIV(i) = i and DU2(I) = 0 */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + ipiv[i__] = i__; +/* L10: */ + } + i__1 = *n - 2; + for (i__ = 1; i__ <= i__1; ++i__) { + du2[i__] = 0.; +/* L20: */ + } + + i__1 = *n - 2; + for (i__ = 1; i__ <= i__1; ++i__) { + if ((d__1 = d__[i__], abs(d__1)) >= (d__2 = dl[i__], abs(d__2))) { + +/* No row interchange required, eliminate DL(I) */ + + if (d__[i__] != 0.) { + fact = dl[i__] / d__[i__]; + dl[i__] = fact; + d__[i__ + 1] -= fact * du[i__]; + } + } else { + +/* Interchange rows I and I+1, eliminate DL(I) */ + + fact = d__[i__] / dl[i__]; + d__[i__] = dl[i__]; + dl[i__] = fact; + temp = du[i__]; + du[i__] = d__[i__ + 1]; + d__[i__ + 1] = temp - fact * d__[i__ + 1]; + du2[i__] = du[i__ + 1]; + du[i__ + 1] = -fact * du[i__ + 1]; + ipiv[i__] = i__ + 1; + } +/* L30: */ + } + if (*n > 1) { + i__ = *n - 1; + if ((d__1 = d__[i__], abs(d__1)) >= (d__2 = dl[i__], abs(d__2))) { + if (d__[i__] != 0.) { + fact = dl[i__] / d__[i__]; + dl[i__] = fact; + d__[i__ + 1] -= fact * du[i__]; + } + } else { + fact = d__[i__] / dl[i__]; + d__[i__] = dl[i__]; + dl[i__] = fact; + temp = du[i__]; + du[i__] = d__[i__ + 1]; + d__[i__ + 1] = temp - fact * d__[i__ + 1]; + ipiv[i__] = i__ + 1; + } + } + +/* Check for a zero on the diagonal of U. */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + if (d__[i__] == 0.) { + *info = i__; + goto L50; + } +/* L40: */ + } +L50: + + return 0; + +/* End of DGTTRF */ + +} /* _starpu_dgttrf_ */ diff --git a/min-dgels/base/SRC/dgttrs.c b/min-dgels/base/SRC/dgttrs.c new file mode 100644 index 0000000..07dd149 --- /dev/null +++ b/min-dgels/base/SRC/dgttrs.c @@ -0,0 +1,189 @@ +/* dgttrs.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; + +/* Subroutine */ int _starpu_dgttrs_(char *trans, integer *n, integer *nrhs, + doublereal *dl, doublereal *d__, doublereal *du, doublereal *du2, + integer *ipiv, doublereal *b, integer *ldb, integer *info) +{ + /* System generated locals */ + integer b_dim1, b_offset, i__1, i__2, i__3; + + /* Local variables */ + integer j, jb, nb; + extern /* Subroutine */ int _starpu_dgtts2_(integer *, integer *, integer *, + doublereal *, doublereal *, doublereal *, doublereal *, integer *, + doublereal *, integer *), _starpu_xerbla_(char *, integer *); + extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *); + integer itrans; + logical notran; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DGTTRS solves one of the systems of equations */ +/* A*X = B or A'*X = B, */ +/* with a tridiagonal matrix A using the LU factorization computed */ +/* by DGTTRF. */ + +/* Arguments */ +/* ========= */ + +/* TRANS (input) CHARACTER*1 */ +/* Specifies the form of the system of equations. */ +/* = 'N': A * X = B (No transpose) */ +/* = 'T': A'* X = B (Transpose) */ +/* = 'C': A'* X = B (Conjugate transpose = Transpose) */ + +/* N (input) INTEGER */ +/* The order of the matrix A. */ + +/* NRHS (input) INTEGER */ +/* The number of right hand sides, i.e., the number of columns */ +/* of the matrix B. NRHS >= 0. */ + +/* DL (input) DOUBLE PRECISION array, dimension (N-1) */ +/* The (n-1) multipliers that define the matrix L from the */ +/* LU factorization of A. */ + +/* D (input) DOUBLE PRECISION array, dimension (N) */ +/* The n diagonal elements of the upper triangular matrix U from */ +/* the LU factorization of A. */ + +/* DU (input) DOUBLE PRECISION array, dimension (N-1) */ +/* The (n-1) elements of the first super-diagonal of U. */ + +/* DU2 (input) DOUBLE PRECISION array, dimension (N-2) */ +/* The (n-2) elements of the second super-diagonal of U. */ + +/* IPIV (input) INTEGER array, dimension (N) */ +/* The pivot indices; for 1 <= i <= n, row i of the matrix was */ +/* interchanged with row IPIV(i). IPIV(i) will always be either */ +/* i or i+1; IPIV(i) = i indicates a row interchange was not */ +/* required. */ + +/* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* On entry, the matrix of right hand side vectors B. */ +/* On exit, B is overwritten by the solution vectors X. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the array B. LDB >= max(1,N). */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + --dl; + --d__; + --du; + --du2; + --ipiv; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + *info = 0; + notran = *(unsigned char *)trans == 'N' || *(unsigned char *)trans == 'n'; + if (! notran && ! (*(unsigned char *)trans == 'T' || *(unsigned char *) + trans == 't') && ! (*(unsigned char *)trans == 'C' || *(unsigned + char *)trans == 'c')) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*ldb < max(*n,1)) { + *info = -10; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DGTTRS", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0 || *nrhs == 0) { + return 0; + } + +/* Decode TRANS */ + + if (notran) { + itrans = 0; + } else { + itrans = 1; + } + +/* Determine the number of right-hand sides to solve at a time. */ + + if (*nrhs == 1) { + nb = 1; + } else { +/* Computing MAX */ + i__1 = 1, i__2 = _starpu_ilaenv_(&c__1, "DGTTRS", trans, n, nrhs, &c_n1, & + c_n1); + nb = max(i__1,i__2); + } + + if (nb >= *nrhs) { + _starpu_dgtts2_(&itrans, n, nrhs, &dl[1], &d__[1], &du[1], &du2[1], &ipiv[1], + &b[b_offset], ldb); + } else { + i__1 = *nrhs; + i__2 = nb; + for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { +/* Computing MIN */ + i__3 = *nrhs - j + 1; + jb = min(i__3,nb); + _starpu_dgtts2_(&itrans, n, &jb, &dl[1], &d__[1], &du[1], &du2[1], &ipiv[ + 1], &b[j * b_dim1 + 1], ldb); +/* L10: */ + } + } + +/* End of DGTTRS */ + + return 0; +} /* _starpu_dgttrs_ */ diff --git a/min-dgels/base/SRC/dgtts2.c b/min-dgels/base/SRC/dgtts2.c new file mode 100644 index 0000000..53d5617 --- /dev/null +++ b/min-dgels/base/SRC/dgtts2.c @@ -0,0 +1,261 @@ +/* dgtts2.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dgtts2_(integer *itrans, integer *n, integer *nrhs, + doublereal *dl, doublereal *d__, doublereal *du, doublereal *du2, + integer *ipiv, doublereal *b, integer *ldb) +{ + /* System generated locals */ + integer b_dim1, b_offset, i__1, i__2; + + /* Local variables */ + integer i__, j, ip; + doublereal temp; + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DGTTS2 solves one of the systems of equations */ +/* A*X = B or A'*X = B, */ +/* with a tridiagonal matrix A using the LU factorization computed */ +/* by DGTTRF. */ + +/* Arguments */ +/* ========= */ + +/* ITRANS (input) INTEGER */ +/* Specifies the form of the system of equations. */ +/* = 0: A * X = B (No transpose) */ +/* = 1: A'* X = B (Transpose) */ +/* = 2: A'* X = B (Conjugate transpose = Transpose) */ + +/* N (input) INTEGER */ +/* The order of the matrix A. */ + +/* NRHS (input) INTEGER */ +/* The number of right hand sides, i.e., the number of columns */ +/* of the matrix B. NRHS >= 0. */ + +/* DL (input) DOUBLE PRECISION array, dimension (N-1) */ +/* The (n-1) multipliers that define the matrix L from the */ +/* LU factorization of A. */ + +/* D (input) DOUBLE PRECISION array, dimension (N) */ +/* The n diagonal elements of the upper triangular matrix U from */ +/* the LU factorization of A. */ + +/* DU (input) DOUBLE PRECISION array, dimension (N-1) */ +/* The (n-1) elements of the first super-diagonal of U. */ + +/* DU2 (input) DOUBLE PRECISION array, dimension (N-2) */ +/* The (n-2) elements of the second super-diagonal of U. */ + +/* IPIV (input) INTEGER array, dimension (N) */ +/* The pivot indices; for 1 <= i <= n, row i of the matrix was */ +/* interchanged with row IPIV(i). IPIV(i) will always be either */ +/* i or i+1; IPIV(i) = i indicates a row interchange was not */ +/* required. */ + +/* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* On entry, the matrix of right hand side vectors B. */ +/* On exit, B is overwritten by the solution vectors X. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the array B. LDB >= max(1,N). */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Quick return if possible */ + + /* Parameter adjustments */ + --dl; + --d__; + --du; + --du2; + --ipiv; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + if (*n == 0 || *nrhs == 0) { + return 0; + } + + if (*itrans == 0) { + +/* Solve A*X = B using the LU factorization of A, */ +/* overwriting each right hand side vector with its solution. */ + + if (*nrhs <= 1) { + j = 1; +L10: + +/* Solve L*x = b. */ + + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + ip = ipiv[i__]; + temp = b[i__ + 1 - ip + i__ + j * b_dim1] - dl[i__] * b[ip + + j * b_dim1]; + b[i__ + j * b_dim1] = b[ip + j * b_dim1]; + b[i__ + 1 + j * b_dim1] = temp; +/* L20: */ + } + +/* Solve U*x = b. */ + + b[*n + j * b_dim1] /= d__[*n]; + if (*n > 1) { + b[*n - 1 + j * b_dim1] = (b[*n - 1 + j * b_dim1] - du[*n - 1] + * b[*n + j * b_dim1]) / d__[*n - 1]; + } + for (i__ = *n - 2; i__ >= 1; --i__) { + b[i__ + j * b_dim1] = (b[i__ + j * b_dim1] - du[i__] * b[i__ + + 1 + j * b_dim1] - du2[i__] * b[i__ + 2 + j * b_dim1] + ) / d__[i__]; +/* L30: */ + } + if (j < *nrhs) { + ++j; + goto L10; + } + } else { + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + +/* Solve L*x = b. */ + + i__2 = *n - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + if (ipiv[i__] == i__) { + b[i__ + 1 + j * b_dim1] -= dl[i__] * b[i__ + j * + b_dim1]; + } else { + temp = b[i__ + j * b_dim1]; + b[i__ + j * b_dim1] = b[i__ + 1 + j * b_dim1]; + b[i__ + 1 + j * b_dim1] = temp - dl[i__] * b[i__ + j * + b_dim1]; + } +/* L40: */ + } + +/* Solve U*x = b. */ + + b[*n + j * b_dim1] /= d__[*n]; + if (*n > 1) { + b[*n - 1 + j * b_dim1] = (b[*n - 1 + j * b_dim1] - du[*n + - 1] * b[*n + j * b_dim1]) / d__[*n - 1]; + } + for (i__ = *n - 2; i__ >= 1; --i__) { + b[i__ + j * b_dim1] = (b[i__ + j * b_dim1] - du[i__] * b[ + i__ + 1 + j * b_dim1] - du2[i__] * b[i__ + 2 + j * + b_dim1]) / d__[i__]; +/* L50: */ + } +/* L60: */ + } + } + } else { + +/* Solve A' * X = B. */ + + if (*nrhs <= 1) { + +/* Solve U'*x = b. */ + + j = 1; +L70: + b[j * b_dim1 + 1] /= d__[1]; + if (*n > 1) { + b[j * b_dim1 + 2] = (b[j * b_dim1 + 2] - du[1] * b[j * b_dim1 + + 1]) / d__[2]; + } + i__1 = *n; + for (i__ = 3; i__ <= i__1; ++i__) { + b[i__ + j * b_dim1] = (b[i__ + j * b_dim1] - du[i__ - 1] * b[ + i__ - 1 + j * b_dim1] - du2[i__ - 2] * b[i__ - 2 + j * + b_dim1]) / d__[i__]; +/* L80: */ + } + +/* Solve L'*x = b. */ + + for (i__ = *n - 1; i__ >= 1; --i__) { + ip = ipiv[i__]; + temp = b[i__ + j * b_dim1] - dl[i__] * b[i__ + 1 + j * b_dim1] + ; + b[i__ + j * b_dim1] = b[ip + j * b_dim1]; + b[ip + j * b_dim1] = temp; +/* L90: */ + } + if (j < *nrhs) { + ++j; + goto L70; + } + + } else { + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + +/* Solve U'*x = b. */ + + b[j * b_dim1 + 1] /= d__[1]; + if (*n > 1) { + b[j * b_dim1 + 2] = (b[j * b_dim1 + 2] - du[1] * b[j * + b_dim1 + 1]) / d__[2]; + } + i__2 = *n; + for (i__ = 3; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = (b[i__ + j * b_dim1] - du[i__ - 1] * + b[i__ - 1 + j * b_dim1] - du2[i__ - 2] * b[i__ - + 2 + j * b_dim1]) / d__[i__]; +/* L100: */ + } + for (i__ = *n - 1; i__ >= 1; --i__) { + if (ipiv[i__] == i__) { + b[i__ + j * b_dim1] -= dl[i__] * b[i__ + 1 + j * + b_dim1]; + } else { + temp = b[i__ + 1 + j * b_dim1]; + b[i__ + 1 + j * b_dim1] = b[i__ + j * b_dim1] - dl[ + i__] * temp; + b[i__ + j * b_dim1] = temp; + } +/* L110: */ + } +/* L120: */ + } + } + } + +/* End of DGTTS2 */ + + return 0; +} /* _starpu_dgtts2_ */ diff --git a/min-dgels/base/SRC/dhgeqz.c b/min-dgels/base/SRC/dhgeqz.c new file mode 100644 index 0000000..83497a8 --- /dev/null +++ b/min-dgels/base/SRC/dhgeqz.c @@ -0,0 +1,1498 @@ +/* dhgeqz.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static doublereal c_b12 = 0.; +static doublereal c_b13 = 1.; +static integer c__1 = 1; +static integer c__3 = 3; + +/* Subroutine */ int _starpu_dhgeqz_(char *job, char *compq, char *compz, integer *n, + integer *ilo, integer *ihi, doublereal *h__, integer *ldh, doublereal + *t, integer *ldt, doublereal *alphar, doublereal *alphai, doublereal * + beta, doublereal *q, integer *ldq, doublereal *z__, integer *ldz, + doublereal *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer h_dim1, h_offset, q_dim1, q_offset, t_dim1, t_offset, z_dim1, + z_offset, i__1, i__2, i__3, i__4; + doublereal d__1, d__2, d__3, d__4; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + doublereal c__; + integer j; + doublereal s, v[3], s1, s2, t1, u1, u2, a11, a12, a21, a22, b11, b22, c12, + c21; + integer jc; + doublereal an, bn, cl, cq, cr; + integer in; + doublereal u12, w11, w12, w21; + integer jr; + doublereal cz, w22, sl, wi, sr, vs, wr, b1a, b2a, a1i, a2i, b1i, b2i, a1r, + a2r, b1r, b2r, wr2, ad11, ad12, ad21, ad22, c11i, c22i; + integer jch; + doublereal c11r, c22r; + logical ilq; + doublereal u12l, tau, sqi; + logical ilz; + doublereal ulp, sqr, szi, szr, ad11l, ad12l, ad21l, ad22l, ad32l, wabs, + atol, btol, temp; + extern /* Subroutine */ int _starpu_drot_(integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, doublereal *), _starpu_dlag2_( + doublereal *, integer *, doublereal *, integer *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *); + doublereal temp2, s1inv, scale; + extern logical _starpu_lsame_(char *, char *); + integer iiter, ilast, jiter; + doublereal anorm, bnorm; + integer maxit; + doublereal tempi, tempr; + extern doublereal _starpu_dlapy2_(doublereal *, doublereal *), _starpu_dlapy3_(doublereal + *, doublereal *, doublereal *); + extern /* Subroutine */ int _starpu_dlasv2_(doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *); + logical ilazr2; + doublereal ascale, bscale; + extern doublereal _starpu_dlamch_(char *); + extern /* Subroutine */ int _starpu_dlarfg_(integer *, doublereal *, doublereal *, + integer *, doublereal *); + extern doublereal _starpu_dlanhs_(char *, integer *, doublereal *, integer *, + doublereal *); + extern /* Subroutine */ int _starpu_dlaset_(char *, integer *, integer *, + doublereal *, doublereal *, doublereal *, integer *); + doublereal safmin; + extern /* Subroutine */ int _starpu_dlartg_(doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *); + doublereal safmax; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + doublereal eshift; + logical ilschr; + integer icompq, ilastm, ischur; + logical ilazro; + integer icompz, ifirst, ifrstm, istart; + logical ilpivt, lquery; + + +/* -- LAPACK routine (version 3.2.1) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* -- April 2009 -- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DHGEQZ computes the eigenvalues of a real matrix pair (H,T), */ +/* where H is an upper Hessenberg matrix and T is upper triangular, */ +/* using the double-shift QZ method. */ +/* Matrix pairs of this type are produced by the reduction to */ +/* generalized upper Hessenberg form of a real matrix pair (A,B): */ + +/* A = Q1*H*Z1**T, B = Q1*T*Z1**T, */ + +/* as computed by DGGHRD. */ + +/* If JOB='S', then the Hessenberg-triangular pair (H,T) is */ +/* also reduced to generalized Schur form, */ + +/* H = Q*S*Z**T, T = Q*P*Z**T, */ + +/* where Q and Z are orthogonal matrices, P is an upper triangular */ +/* matrix, and S is a quasi-triangular matrix with 1-by-1 and 2-by-2 */ +/* diagonal blocks. */ + +/* The 1-by-1 blocks correspond to real eigenvalues of the matrix pair */ +/* (H,T) and the 2-by-2 blocks correspond to complex conjugate pairs of */ +/* eigenvalues. */ + +/* Additionally, the 2-by-2 upper triangular diagonal blocks of P */ +/* corresponding to 2-by-2 blocks of S are reduced to positive diagonal */ +/* form, i.e., if S(j+1,j) is non-zero, then P(j+1,j) = P(j,j+1) = 0, */ +/* P(j,j) > 0, and P(j+1,j+1) > 0. */ + +/* Optionally, the orthogonal matrix Q from the generalized Schur */ +/* factorization may be postmultiplied into an input matrix Q1, and the */ +/* orthogonal matrix Z may be postmultiplied into an input matrix Z1. */ +/* If Q1 and Z1 are the orthogonal matrices from DGGHRD that reduced */ +/* the matrix pair (A,B) to generalized upper Hessenberg form, then the */ +/* output matrices Q1*Q and Z1*Z are the orthogonal factors from the */ +/* generalized Schur factorization of (A,B): */ + +/* A = (Q1*Q)*S*(Z1*Z)**T, B = (Q1*Q)*P*(Z1*Z)**T. */ + +/* To avoid overflow, eigenvalues of the matrix pair (H,T) (equivalently, */ +/* of (A,B)) are computed as a pair of values (alpha,beta), where alpha is */ +/* complex and beta real. */ +/* If beta is nonzero, lambda = alpha / beta is an eigenvalue of the */ +/* generalized nonsymmetric eigenvalue problem (GNEP) */ +/* A*x = lambda*B*x */ +/* and if alpha is nonzero, mu = beta / alpha is an eigenvalue of the */ +/* alternate form of the GNEP */ +/* mu*A*y = B*y. */ +/* Real eigenvalues can be read directly from the generalized Schur */ +/* form: */ +/* alpha = S(i,i), beta = P(i,i). */ + +/* Ref: C.B. Moler & G.W. Stewart, "An Algorithm for Generalized Matrix */ +/* Eigenvalue Problems", SIAM J. Numer. Anal., 10(1973), */ +/* pp. 241--256. */ + +/* Arguments */ +/* ========= */ + +/* JOB (input) CHARACTER*1 */ +/* = 'E': Compute eigenvalues only; */ +/* = 'S': Compute eigenvalues and the Schur form. */ + +/* COMPQ (input) CHARACTER*1 */ +/* = 'N': Left Schur vectors (Q) are not computed; */ +/* = 'I': Q is initialized to the unit matrix and the matrix Q */ +/* of left Schur vectors of (H,T) is returned; */ +/* = 'V': Q must contain an orthogonal matrix Q1 on entry and */ +/* the product Q1*Q is returned. */ + +/* COMPZ (input) CHARACTER*1 */ +/* = 'N': Right Schur vectors (Z) are not computed; */ +/* = 'I': Z is initialized to the unit matrix and the matrix Z */ +/* of right Schur vectors of (H,T) is returned; */ +/* = 'V': Z must contain an orthogonal matrix Z1 on entry and */ +/* the product Z1*Z is returned. */ + +/* N (input) INTEGER */ +/* The order of the matrices H, T, Q, and Z. N >= 0. */ + +/* ILO (input) INTEGER */ +/* IHI (input) INTEGER */ +/* ILO and IHI mark the rows and columns of H which are in */ +/* Hessenberg form. It is assumed that A is already upper */ +/* triangular in rows and columns 1:ILO-1 and IHI+1:N. */ +/* If N > 0, 1 <= ILO <= IHI <= N; if N = 0, ILO=1 and IHI=0. */ + +/* H (input/output) DOUBLE PRECISION array, dimension (LDH, N) */ +/* On entry, the N-by-N upper Hessenberg matrix H. */ +/* On exit, if JOB = 'S', H contains the upper quasi-triangular */ +/* matrix S from the generalized Schur factorization; */ +/* 2-by-2 diagonal blocks (corresponding to complex conjugate */ +/* pairs of eigenvalues) are returned in standard form, with */ +/* H(i,i) = H(i+1,i+1) and H(i+1,i)*H(i,i+1) < 0. */ +/* If JOB = 'E', the diagonal blocks of H match those of S, but */ +/* the rest of H is unspecified. */ + +/* LDH (input) INTEGER */ +/* The leading dimension of the array H. LDH >= max( 1, N ). */ + +/* T (input/output) DOUBLE PRECISION array, dimension (LDT, N) */ +/* On entry, the N-by-N upper triangular matrix T. */ +/* On exit, if JOB = 'S', T contains the upper triangular */ +/* matrix P from the generalized Schur factorization; */ +/* 2-by-2 diagonal blocks of P corresponding to 2-by-2 blocks of S */ +/* are reduced to positive diagonal form, i.e., if H(j+1,j) is */ +/* non-zero, then T(j+1,j) = T(j,j+1) = 0, T(j,j) > 0, and */ +/* T(j+1,j+1) > 0. */ +/* If JOB = 'E', the diagonal blocks of T match those of P, but */ +/* the rest of T is unspecified. */ + +/* LDT (input) INTEGER */ +/* The leading dimension of the array T. LDT >= max( 1, N ). */ + +/* ALPHAR (output) DOUBLE PRECISION array, dimension (N) */ +/* The real parts of each scalar alpha defining an eigenvalue */ +/* of GNEP. */ + +/* ALPHAI (output) DOUBLE PRECISION array, dimension (N) */ +/* The imaginary parts of each scalar alpha defining an */ +/* eigenvalue of GNEP. */ +/* If ALPHAI(j) is zero, then the j-th eigenvalue is real; if */ +/* positive, then the j-th and (j+1)-st eigenvalues are a */ +/* complex conjugate pair, with ALPHAI(j+1) = -ALPHAI(j). */ + +/* BETA (output) DOUBLE PRECISION array, dimension (N) */ +/* The scalars beta that define the eigenvalues of GNEP. */ +/* Together, the quantities alpha = (ALPHAR(j),ALPHAI(j)) and */ +/* beta = BETA(j) represent the j-th eigenvalue of the matrix */ +/* pair (A,B), in one of the forms lambda = alpha/beta or */ +/* mu = beta/alpha. Since either lambda or mu may overflow, */ +/* they should not, in general, be computed. */ + +/* Q (input/output) DOUBLE PRECISION array, dimension (LDQ, N) */ +/* On entry, if COMPZ = 'V', the orthogonal matrix Q1 used in */ +/* the reduction of (A,B) to generalized Hessenberg form. */ +/* On exit, if COMPZ = 'I', the orthogonal matrix of left Schur */ +/* vectors of (H,T), and if COMPZ = 'V', the orthogonal matrix */ +/* of left Schur vectors of (A,B). */ +/* Not referenced if COMPZ = 'N'. */ + +/* LDQ (input) INTEGER */ +/* The leading dimension of the array Q. LDQ >= 1. */ +/* If COMPQ='V' or 'I', then LDQ >= N. */ + +/* Z (input/output) DOUBLE PRECISION array, dimension (LDZ, N) */ +/* On entry, if COMPZ = 'V', the orthogonal matrix Z1 used in */ +/* the reduction of (A,B) to generalized Hessenberg form. */ +/* On exit, if COMPZ = 'I', the orthogonal matrix of */ +/* right Schur vectors of (H,T), and if COMPZ = 'V', the */ +/* orthogonal matrix of right Schur vectors of (A,B). */ +/* Not referenced if COMPZ = 'N'. */ + +/* LDZ (input) INTEGER */ +/* The leading dimension of the array Z. LDZ >= 1. */ +/* If COMPZ='V' or 'I', then LDZ >= N. */ + +/* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* On exit, if INFO >= 0, WORK(1) returns the optimal LWORK. */ + +/* LWORK (input) INTEGER */ +/* The dimension of the array WORK. LWORK >= max(1,N). */ + +/* If LWORK = -1, then a workspace query is assumed; the routine */ +/* only calculates the optimal size of the WORK array, returns */ +/* this value as the first entry of the WORK array, and no error */ +/* message related to LWORK is issued by XERBLA. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ +/* = 1,...,N: the QZ iteration did not converge. (H,T) is not */ +/* in Schur form, but ALPHAR(i), ALPHAI(i), and */ +/* BETA(i), i=INFO+1,...,N should be correct. */ +/* = N+1,...,2*N: the shift calculation failed. (H,T) is not */ +/* in Schur form, but ALPHAR(i), ALPHAI(i), and */ +/* BETA(i), i=INFO-N+1,...,N should be correct. */ + +/* Further Details */ +/* =============== */ + +/* Iteration counters: */ + +/* JITER -- counts iterations. */ +/* IITER -- counts iterations run since ILAST was last */ +/* changed. This is therefore reset only when a 1-by-1 or */ +/* 2-by-2 block deflates off the bottom. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* $ SAFETY = 1.0E+0 ) */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Decode JOB, COMPQ, COMPZ */ + + /* Parameter adjustments */ + h_dim1 = *ldh; + h_offset = 1 + h_dim1; + h__ -= h_offset; + t_dim1 = *ldt; + t_offset = 1 + t_dim1; + t -= t_offset; + --alphar; + --alphai; + --beta; + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --work; + + /* Function Body */ + if (_starpu_lsame_(job, "E")) { + ilschr = FALSE_; + ischur = 1; + } else if (_starpu_lsame_(job, "S")) { + ilschr = TRUE_; + ischur = 2; + } else { + ischur = 0; + } + + if (_starpu_lsame_(compq, "N")) { + ilq = FALSE_; + icompq = 1; + } else if (_starpu_lsame_(compq, "V")) { + ilq = TRUE_; + icompq = 2; + } else if (_starpu_lsame_(compq, "I")) { + ilq = TRUE_; + icompq = 3; + } else { + icompq = 0; + } + + if (_starpu_lsame_(compz, "N")) { + ilz = FALSE_; + icompz = 1; + } else if (_starpu_lsame_(compz, "V")) { + ilz = TRUE_; + icompz = 2; + } else if (_starpu_lsame_(compz, "I")) { + ilz = TRUE_; + icompz = 3; + } else { + icompz = 0; + } + +/* Check Argument Values */ + + *info = 0; + work[1] = (doublereal) max(1,*n); + lquery = *lwork == -1; + if (ischur == 0) { + *info = -1; + } else if (icompq == 0) { + *info = -2; + } else if (icompz == 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*ilo < 1) { + *info = -5; + } else if (*ihi > *n || *ihi < *ilo - 1) { + *info = -6; + } else if (*ldh < *n) { + *info = -8; + } else if (*ldt < *n) { + *info = -10; + } else if (*ldq < 1 || ilq && *ldq < *n) { + *info = -15; + } else if (*ldz < 1 || ilz && *ldz < *n) { + *info = -17; + } else if (*lwork < max(1,*n) && ! lquery) { + *info = -19; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DHGEQZ", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n <= 0) { + work[1] = 1.; + return 0; + } + +/* Initialize Q and Z */ + + if (icompq == 3) { + _starpu_dlaset_("Full", n, n, &c_b12, &c_b13, &q[q_offset], ldq); + } + if (icompz == 3) { + _starpu_dlaset_("Full", n, n, &c_b12, &c_b13, &z__[z_offset], ldz); + } + +/* Machine Constants */ + + in = *ihi + 1 - *ilo; + safmin = _starpu_dlamch_("S"); + safmax = 1. / safmin; + ulp = _starpu_dlamch_("E") * _starpu_dlamch_("B"); + anorm = _starpu_dlanhs_("F", &in, &h__[*ilo + *ilo * h_dim1], ldh, &work[1]); + bnorm = _starpu_dlanhs_("F", &in, &t[*ilo + *ilo * t_dim1], ldt, &work[1]); +/* Computing MAX */ + d__1 = safmin, d__2 = ulp * anorm; + atol = max(d__1,d__2); +/* Computing MAX */ + d__1 = safmin, d__2 = ulp * bnorm; + btol = max(d__1,d__2); + ascale = 1. / max(safmin,anorm); + bscale = 1. / max(safmin,bnorm); + +/* Set Eigenvalues IHI+1:N */ + + i__1 = *n; + for (j = *ihi + 1; j <= i__1; ++j) { + if (t[j + j * t_dim1] < 0.) { + if (ilschr) { + i__2 = j; + for (jr = 1; jr <= i__2; ++jr) { + h__[jr + j * h_dim1] = -h__[jr + j * h_dim1]; + t[jr + j * t_dim1] = -t[jr + j * t_dim1]; +/* L10: */ + } + } else { + h__[j + j * h_dim1] = -h__[j + j * h_dim1]; + t[j + j * t_dim1] = -t[j + j * t_dim1]; + } + if (ilz) { + i__2 = *n; + for (jr = 1; jr <= i__2; ++jr) { + z__[jr + j * z_dim1] = -z__[jr + j * z_dim1]; +/* L20: */ + } + } + } + alphar[j] = h__[j + j * h_dim1]; + alphai[j] = 0.; + beta[j] = t[j + j * t_dim1]; +/* L30: */ + } + +/* If IHI < ILO, skip QZ steps */ + + if (*ihi < *ilo) { + goto L380; + } + +/* MAIN QZ ITERATION LOOP */ + +/* Initialize dynamic indices */ + +/* Eigenvalues ILAST+1:N have been found. */ +/* Column operations modify rows IFRSTM:whatever. */ +/* Row operations modify columns whatever:ILASTM. */ + +/* If only eigenvalues are being computed, then */ +/* IFRSTM is the row of the last splitting row above row ILAST; */ +/* this is always at least ILO. */ +/* IITER counts iterations since the last eigenvalue was found, */ +/* to tell when to use an extraordinary shift. */ +/* MAXIT is the maximum number of QZ sweeps allowed. */ + + ilast = *ihi; + if (ilschr) { + ifrstm = 1; + ilastm = *n; + } else { + ifrstm = *ilo; + ilastm = *ihi; + } + iiter = 0; + eshift = 0.; + maxit = (*ihi - *ilo + 1) * 30; + + i__1 = maxit; + for (jiter = 1; jiter <= i__1; ++jiter) { + +/* Split the matrix if possible. */ + +/* Two tests: */ +/* 1: H(j,j-1)=0 or j=ILO */ +/* 2: T(j,j)=0 */ + + if (ilast == *ilo) { + +/* Special case: j=ILAST */ + + goto L80; + } else { + if ((d__1 = h__[ilast + (ilast - 1) * h_dim1], abs(d__1)) <= atol) + { + h__[ilast + (ilast - 1) * h_dim1] = 0.; + goto L80; + } + } + + if ((d__1 = t[ilast + ilast * t_dim1], abs(d__1)) <= btol) { + t[ilast + ilast * t_dim1] = 0.; + goto L70; + } + +/* General case: j= i__2; --j) { + +/* Test 1: for H(j,j-1)=0 or j=ILO */ + + if (j == *ilo) { + ilazro = TRUE_; + } else { + if ((d__1 = h__[j + (j - 1) * h_dim1], abs(d__1)) <= atol) { + h__[j + (j - 1) * h_dim1] = 0.; + ilazro = TRUE_; + } else { + ilazro = FALSE_; + } + } + +/* Test 2: for T(j,j)=0 */ + + if ((d__1 = t[j + j * t_dim1], abs(d__1)) < btol) { + t[j + j * t_dim1] = 0.; + +/* Test 1a: Check for 2 consecutive small subdiagonals in A */ + + ilazr2 = FALSE_; + if (! ilazro) { + temp = (d__1 = h__[j + (j - 1) * h_dim1], abs(d__1)); + temp2 = (d__1 = h__[j + j * h_dim1], abs(d__1)); + tempr = max(temp,temp2); + if (tempr < 1. && tempr != 0.) { + temp /= tempr; + temp2 /= tempr; + } + if (temp * (ascale * (d__1 = h__[j + 1 + j * h_dim1], abs( + d__1))) <= temp2 * (ascale * atol)) { + ilazr2 = TRUE_; + } + } + +/* If both tests pass (1 & 2), i.e., the leading diagonal */ +/* element of B in the block is zero, split a 1x1 block off */ +/* at the top. (I.e., at the J-th row/column) The leading */ +/* diagonal element of the remainder can also be zero, so */ +/* this may have to be done repeatedly. */ + + if (ilazro || ilazr2) { + i__3 = ilast - 1; + for (jch = j; jch <= i__3; ++jch) { + temp = h__[jch + jch * h_dim1]; + _starpu_dlartg_(&temp, &h__[jch + 1 + jch * h_dim1], &c__, &s, + &h__[jch + jch * h_dim1]); + h__[jch + 1 + jch * h_dim1] = 0.; + i__4 = ilastm - jch; + _starpu_drot_(&i__4, &h__[jch + (jch + 1) * h_dim1], ldh, & + h__[jch + 1 + (jch + 1) * h_dim1], ldh, &c__, + &s); + i__4 = ilastm - jch; + _starpu_drot_(&i__4, &t[jch + (jch + 1) * t_dim1], ldt, &t[ + jch + 1 + (jch + 1) * t_dim1], ldt, &c__, &s); + if (ilq) { + _starpu_drot_(n, &q[jch * q_dim1 + 1], &c__1, &q[(jch + 1) + * q_dim1 + 1], &c__1, &c__, &s); + } + if (ilazr2) { + h__[jch + (jch - 1) * h_dim1] *= c__; + } + ilazr2 = FALSE_; + if ((d__1 = t[jch + 1 + (jch + 1) * t_dim1], abs(d__1) + ) >= btol) { + if (jch + 1 >= ilast) { + goto L80; + } else { + ifirst = jch + 1; + goto L110; + } + } + t[jch + 1 + (jch + 1) * t_dim1] = 0.; +/* L40: */ + } + goto L70; + } else { + +/* Only test 2 passed -- chase the zero to T(ILAST,ILAST) */ +/* Then process as in the case T(ILAST,ILAST)=0 */ + + i__3 = ilast - 1; + for (jch = j; jch <= i__3; ++jch) { + temp = t[jch + (jch + 1) * t_dim1]; + _starpu_dlartg_(&temp, &t[jch + 1 + (jch + 1) * t_dim1], &c__, + &s, &t[jch + (jch + 1) * t_dim1]); + t[jch + 1 + (jch + 1) * t_dim1] = 0.; + if (jch < ilastm - 1) { + i__4 = ilastm - jch - 1; + _starpu_drot_(&i__4, &t[jch + (jch + 2) * t_dim1], ldt, & + t[jch + 1 + (jch + 2) * t_dim1], ldt, & + c__, &s); + } + i__4 = ilastm - jch + 2; + _starpu_drot_(&i__4, &h__[jch + (jch - 1) * h_dim1], ldh, & + h__[jch + 1 + (jch - 1) * h_dim1], ldh, &c__, + &s); + if (ilq) { + _starpu_drot_(n, &q[jch * q_dim1 + 1], &c__1, &q[(jch + 1) + * q_dim1 + 1], &c__1, &c__, &s); + } + temp = h__[jch + 1 + jch * h_dim1]; + _starpu_dlartg_(&temp, &h__[jch + 1 + (jch - 1) * h_dim1], & + c__, &s, &h__[jch + 1 + jch * h_dim1]); + h__[jch + 1 + (jch - 1) * h_dim1] = 0.; + i__4 = jch + 1 - ifrstm; + _starpu_drot_(&i__4, &h__[ifrstm + jch * h_dim1], &c__1, &h__[ + ifrstm + (jch - 1) * h_dim1], &c__1, &c__, &s) + ; + i__4 = jch - ifrstm; + _starpu_drot_(&i__4, &t[ifrstm + jch * t_dim1], &c__1, &t[ + ifrstm + (jch - 1) * t_dim1], &c__1, &c__, &s) + ; + if (ilz) { + _starpu_drot_(n, &z__[jch * z_dim1 + 1], &c__1, &z__[(jch + - 1) * z_dim1 + 1], &c__1, &c__, &s); + } +/* L50: */ + } + goto L70; + } + } else if (ilazro) { + +/* Only test 1 passed -- work on J:ILAST */ + + ifirst = j; + goto L110; + } + +/* Neither test passed -- try next J */ + +/* L60: */ + } + +/* (Drop-through is "impossible") */ + + *info = *n + 1; + goto L420; + +/* T(ILAST,ILAST)=0 -- clear H(ILAST,ILAST-1) to split off a */ +/* 1x1 block. */ + +L70: + temp = h__[ilast + ilast * h_dim1]; + _starpu_dlartg_(&temp, &h__[ilast + (ilast - 1) * h_dim1], &c__, &s, &h__[ + ilast + ilast * h_dim1]); + h__[ilast + (ilast - 1) * h_dim1] = 0.; + i__2 = ilast - ifrstm; + _starpu_drot_(&i__2, &h__[ifrstm + ilast * h_dim1], &c__1, &h__[ifrstm + ( + ilast - 1) * h_dim1], &c__1, &c__, &s); + i__2 = ilast - ifrstm; + _starpu_drot_(&i__2, &t[ifrstm + ilast * t_dim1], &c__1, &t[ifrstm + (ilast - + 1) * t_dim1], &c__1, &c__, &s); + if (ilz) { + _starpu_drot_(n, &z__[ilast * z_dim1 + 1], &c__1, &z__[(ilast - 1) * + z_dim1 + 1], &c__1, &c__, &s); + } + +/* H(ILAST,ILAST-1)=0 -- Standardize B, set ALPHAR, ALPHAI, */ +/* and BETA */ + +L80: + if (t[ilast + ilast * t_dim1] < 0.) { + if (ilschr) { + i__2 = ilast; + for (j = ifrstm; j <= i__2; ++j) { + h__[j + ilast * h_dim1] = -h__[j + ilast * h_dim1]; + t[j + ilast * t_dim1] = -t[j + ilast * t_dim1]; +/* L90: */ + } + } else { + h__[ilast + ilast * h_dim1] = -h__[ilast + ilast * h_dim1]; + t[ilast + ilast * t_dim1] = -t[ilast + ilast * t_dim1]; + } + if (ilz) { + i__2 = *n; + for (j = 1; j <= i__2; ++j) { + z__[j + ilast * z_dim1] = -z__[j + ilast * z_dim1]; +/* L100: */ + } + } + } + alphar[ilast] = h__[ilast + ilast * h_dim1]; + alphai[ilast] = 0.; + beta[ilast] = t[ilast + ilast * t_dim1]; + +/* Go to next block -- exit if finished. */ + + --ilast; + if (ilast < *ilo) { + goto L380; + } + +/* Reset counters */ + + iiter = 0; + eshift = 0.; + if (! ilschr) { + ilastm = ilast; + if (ifrstm > ilast) { + ifrstm = *ilo; + } + } + goto L350; + +/* QZ step */ + +/* This iteration only involves rows/columns IFIRST:ILAST. We */ +/* assume IFIRST < ILAST, and that the diagonal of B is non-zero. */ + +L110: + ++iiter; + if (! ilschr) { + ifrstm = ifirst; + } + +/* Compute single shifts. */ + +/* At this point, IFIRST < ILAST, and the diagonal elements of */ +/* T(IFIRST:ILAST,IFIRST,ILAST) are larger than BTOL (in */ +/* magnitude) */ + + if (iiter / 10 * 10 == iiter) { + +/* Exceptional shift. Chosen for no particularly good reason. */ +/* (Single shift only.) */ + + if ((doublereal) maxit * safmin * (d__1 = h__[ilast - 1 + ilast * + h_dim1], abs(d__1)) < (d__2 = t[ilast - 1 + (ilast - 1) * + t_dim1], abs(d__2))) { + eshift += h__[ilast - 1 + ilast * h_dim1] / t[ilast - 1 + ( + ilast - 1) * t_dim1]; + } else { + eshift += 1. / (safmin * (doublereal) maxit); + } + s1 = 1.; + wr = eshift; + + } else { + +/* Shifts based on the generalized eigenvalues of the */ +/* bottom-right 2x2 block of A and B. The first eigenvalue */ +/* returned by DLAG2 is the Wilkinson shift (AEP p.512), */ + + d__1 = safmin * 100.; + _starpu_dlag2_(&h__[ilast - 1 + (ilast - 1) * h_dim1], ldh, &t[ilast - 1 + + (ilast - 1) * t_dim1], ldt, &d__1, &s1, &s2, &wr, &wr2, + &wi); + +/* Computing MAX */ +/* Computing MAX */ + d__3 = 1., d__4 = abs(wr), d__3 = max(d__3,d__4), d__4 = abs(wi); + d__1 = s1, d__2 = safmin * max(d__3,d__4); + temp = max(d__1,d__2); + if (wi != 0.) { + goto L200; + } + } + +/* Fiddle with shift to avoid overflow */ + + temp = min(ascale,1.) * (safmax * .5); + if (s1 > temp) { + scale = temp / s1; + } else { + scale = 1.; + } + + temp = min(bscale,1.) * (safmax * .5); + if (abs(wr) > temp) { +/* Computing MIN */ + d__1 = scale, d__2 = temp / abs(wr); + scale = min(d__1,d__2); + } + s1 = scale * s1; + wr = scale * wr; + +/* Now check for two consecutive small subdiagonals. */ + + i__2 = ifirst + 1; + for (j = ilast - 1; j >= i__2; --j) { + istart = j; + temp = (d__1 = s1 * h__[j + (j - 1) * h_dim1], abs(d__1)); + temp2 = (d__1 = s1 * h__[j + j * h_dim1] - wr * t[j + j * t_dim1], + abs(d__1)); + tempr = max(temp,temp2); + if (tempr < 1. && tempr != 0.) { + temp /= tempr; + temp2 /= tempr; + } + if ((d__1 = ascale * h__[j + 1 + j * h_dim1] * temp, abs(d__1)) <= + ascale * atol * temp2) { + goto L130; + } +/* L120: */ + } + + istart = ifirst; +L130: + +/* Do an implicit single-shift QZ sweep. */ + +/* Initial Q */ + + temp = s1 * h__[istart + istart * h_dim1] - wr * t[istart + istart * + t_dim1]; + temp2 = s1 * h__[istart + 1 + istart * h_dim1]; + _starpu_dlartg_(&temp, &temp2, &c__, &s, &tempr); + +/* Sweep */ + + i__2 = ilast - 1; + for (j = istart; j <= i__2; ++j) { + if (j > istart) { + temp = h__[j + (j - 1) * h_dim1]; + _starpu_dlartg_(&temp, &h__[j + 1 + (j - 1) * h_dim1], &c__, &s, &h__[ + j + (j - 1) * h_dim1]); + h__[j + 1 + (j - 1) * h_dim1] = 0.; + } + + i__3 = ilastm; + for (jc = j; jc <= i__3; ++jc) { + temp = c__ * h__[j + jc * h_dim1] + s * h__[j + 1 + jc * + h_dim1]; + h__[j + 1 + jc * h_dim1] = -s * h__[j + jc * h_dim1] + c__ * + h__[j + 1 + jc * h_dim1]; + h__[j + jc * h_dim1] = temp; + temp2 = c__ * t[j + jc * t_dim1] + s * t[j + 1 + jc * t_dim1]; + t[j + 1 + jc * t_dim1] = -s * t[j + jc * t_dim1] + c__ * t[j + + 1 + jc * t_dim1]; + t[j + jc * t_dim1] = temp2; +/* L140: */ + } + if (ilq) { + i__3 = *n; + for (jr = 1; jr <= i__3; ++jr) { + temp = c__ * q[jr + j * q_dim1] + s * q[jr + (j + 1) * + q_dim1]; + q[jr + (j + 1) * q_dim1] = -s * q[jr + j * q_dim1] + c__ * + q[jr + (j + 1) * q_dim1]; + q[jr + j * q_dim1] = temp; +/* L150: */ + } + } + + temp = t[j + 1 + (j + 1) * t_dim1]; + _starpu_dlartg_(&temp, &t[j + 1 + j * t_dim1], &c__, &s, &t[j + 1 + (j + + 1) * t_dim1]); + t[j + 1 + j * t_dim1] = 0.; + +/* Computing MIN */ + i__4 = j + 2; + i__3 = min(i__4,ilast); + for (jr = ifrstm; jr <= i__3; ++jr) { + temp = c__ * h__[jr + (j + 1) * h_dim1] + s * h__[jr + j * + h_dim1]; + h__[jr + j * h_dim1] = -s * h__[jr + (j + 1) * h_dim1] + c__ * + h__[jr + j * h_dim1]; + h__[jr + (j + 1) * h_dim1] = temp; +/* L160: */ + } + i__3 = j; + for (jr = ifrstm; jr <= i__3; ++jr) { + temp = c__ * t[jr + (j + 1) * t_dim1] + s * t[jr + j * t_dim1] + ; + t[jr + j * t_dim1] = -s * t[jr + (j + 1) * t_dim1] + c__ * t[ + jr + j * t_dim1]; + t[jr + (j + 1) * t_dim1] = temp; +/* L170: */ + } + if (ilz) { + i__3 = *n; + for (jr = 1; jr <= i__3; ++jr) { + temp = c__ * z__[jr + (j + 1) * z_dim1] + s * z__[jr + j * + z_dim1]; + z__[jr + j * z_dim1] = -s * z__[jr + (j + 1) * z_dim1] + + c__ * z__[jr + j * z_dim1]; + z__[jr + (j + 1) * z_dim1] = temp; +/* L180: */ + } + } +/* L190: */ + } + + goto L350; + +/* Use Francis double-shift */ + +/* Note: the Francis double-shift should work with real shifts, */ +/* but only if the block is at least 3x3. */ +/* This code may break if this point is reached with */ +/* a 2x2 block with real eigenvalues. */ + +L200: + if (ifirst + 1 == ilast) { + +/* Special case -- 2x2 block with complex eigenvectors */ + +/* Step 1: Standardize, that is, rotate so that */ + +/* ( B11 0 ) */ +/* B = ( ) with B11 non-negative. */ +/* ( 0 B22 ) */ + + _starpu_dlasv2_(&t[ilast - 1 + (ilast - 1) * t_dim1], &t[ilast - 1 + + ilast * t_dim1], &t[ilast + ilast * t_dim1], &b22, &b11, & + sr, &cr, &sl, &cl); + + if (b11 < 0.) { + cr = -cr; + sr = -sr; + b11 = -b11; + b22 = -b22; + } + + i__2 = ilastm + 1 - ifirst; + _starpu_drot_(&i__2, &h__[ilast - 1 + (ilast - 1) * h_dim1], ldh, &h__[ + ilast + (ilast - 1) * h_dim1], ldh, &cl, &sl); + i__2 = ilast + 1 - ifrstm; + _starpu_drot_(&i__2, &h__[ifrstm + (ilast - 1) * h_dim1], &c__1, &h__[ + ifrstm + ilast * h_dim1], &c__1, &cr, &sr); + + if (ilast < ilastm) { + i__2 = ilastm - ilast; + _starpu_drot_(&i__2, &t[ilast - 1 + (ilast + 1) * t_dim1], ldt, &t[ + ilast + (ilast + 1) * t_dim1], ldt, &cl, &sl); + } + if (ifrstm < ilast - 1) { + i__2 = ifirst - ifrstm; + _starpu_drot_(&i__2, &t[ifrstm + (ilast - 1) * t_dim1], &c__1, &t[ + ifrstm + ilast * t_dim1], &c__1, &cr, &sr); + } + + if (ilq) { + _starpu_drot_(n, &q[(ilast - 1) * q_dim1 + 1], &c__1, &q[ilast * + q_dim1 + 1], &c__1, &cl, &sl); + } + if (ilz) { + _starpu_drot_(n, &z__[(ilast - 1) * z_dim1 + 1], &c__1, &z__[ilast * + z_dim1 + 1], &c__1, &cr, &sr); + } + + t[ilast - 1 + (ilast - 1) * t_dim1] = b11; + t[ilast - 1 + ilast * t_dim1] = 0.; + t[ilast + (ilast - 1) * t_dim1] = 0.; + t[ilast + ilast * t_dim1] = b22; + +/* If B22 is negative, negate column ILAST */ + + if (b22 < 0.) { + i__2 = ilast; + for (j = ifrstm; j <= i__2; ++j) { + h__[j + ilast * h_dim1] = -h__[j + ilast * h_dim1]; + t[j + ilast * t_dim1] = -t[j + ilast * t_dim1]; +/* L210: */ + } + + if (ilz) { + i__2 = *n; + for (j = 1; j <= i__2; ++j) { + z__[j + ilast * z_dim1] = -z__[j + ilast * z_dim1]; +/* L220: */ + } + } + } + +/* Step 2: Compute ALPHAR, ALPHAI, and BETA (see refs.) */ + +/* Recompute shift */ + + d__1 = safmin * 100.; + _starpu_dlag2_(&h__[ilast - 1 + (ilast - 1) * h_dim1], ldh, &t[ilast - 1 + + (ilast - 1) * t_dim1], ldt, &d__1, &s1, &temp, &wr, & + temp2, &wi); + +/* If standardization has perturbed the shift onto real line, */ +/* do another (real single-shift) QR step. */ + + if (wi == 0.) { + goto L350; + } + s1inv = 1. / s1; + +/* Do EISPACK (QZVAL) computation of alpha and beta */ + + a11 = h__[ilast - 1 + (ilast - 1) * h_dim1]; + a21 = h__[ilast + (ilast - 1) * h_dim1]; + a12 = h__[ilast - 1 + ilast * h_dim1]; + a22 = h__[ilast + ilast * h_dim1]; + +/* Compute complex Givens rotation on right */ +/* (Assume some element of C = (sA - wB) > unfl ) */ +/* __ */ +/* (sA - wB) ( CZ -SZ ) */ +/* ( SZ CZ ) */ + + c11r = s1 * a11 - wr * b11; + c11i = -wi * b11; + c12 = s1 * a12; + c21 = s1 * a21; + c22r = s1 * a22 - wr * b22; + c22i = -wi * b22; + + if (abs(c11r) + abs(c11i) + abs(c12) > abs(c21) + abs(c22r) + abs( + c22i)) { + t1 = _starpu_dlapy3_(&c12, &c11r, &c11i); + cz = c12 / t1; + szr = -c11r / t1; + szi = -c11i / t1; + } else { + cz = _starpu_dlapy2_(&c22r, &c22i); + if (cz <= safmin) { + cz = 0.; + szr = 1.; + szi = 0.; + } else { + tempr = c22r / cz; + tempi = c22i / cz; + t1 = _starpu_dlapy2_(&cz, &c21); + cz /= t1; + szr = -c21 * tempr / t1; + szi = c21 * tempi / t1; + } + } + +/* Compute Givens rotation on left */ + +/* ( CQ SQ ) */ +/* ( __ ) A or B */ +/* ( -SQ CQ ) */ + + an = abs(a11) + abs(a12) + abs(a21) + abs(a22); + bn = abs(b11) + abs(b22); + wabs = abs(wr) + abs(wi); + if (s1 * an > wabs * bn) { + cq = cz * b11; + sqr = szr * b22; + sqi = -szi * b22; + } else { + a1r = cz * a11 + szr * a12; + a1i = szi * a12; + a2r = cz * a21 + szr * a22; + a2i = szi * a22; + cq = _starpu_dlapy2_(&a1r, &a1i); + if (cq <= safmin) { + cq = 0.; + sqr = 1.; + sqi = 0.; + } else { + tempr = a1r / cq; + tempi = a1i / cq; + sqr = tempr * a2r + tempi * a2i; + sqi = tempi * a2r - tempr * a2i; + } + } + t1 = _starpu_dlapy3_(&cq, &sqr, &sqi); + cq /= t1; + sqr /= t1; + sqi /= t1; + +/* Compute diagonal elements of QBZ */ + + tempr = sqr * szr - sqi * szi; + tempi = sqr * szi + sqi * szr; + b1r = cq * cz * b11 + tempr * b22; + b1i = tempi * b22; + b1a = _starpu_dlapy2_(&b1r, &b1i); + b2r = cq * cz * b22 + tempr * b11; + b2i = -tempi * b11; + b2a = _starpu_dlapy2_(&b2r, &b2i); + +/* Normalize so beta > 0, and Im( alpha1 ) > 0 */ + + beta[ilast - 1] = b1a; + beta[ilast] = b2a; + alphar[ilast - 1] = wr * b1a * s1inv; + alphai[ilast - 1] = wi * b1a * s1inv; + alphar[ilast] = wr * b2a * s1inv; + alphai[ilast] = -(wi * b2a) * s1inv; + +/* Step 3: Go to next block -- exit if finished. */ + + ilast = ifirst - 1; + if (ilast < *ilo) { + goto L380; + } + +/* Reset counters */ + + iiter = 0; + eshift = 0.; + if (! ilschr) { + ilastm = ilast; + if (ifrstm > ilast) { + ifrstm = *ilo; + } + } + goto L350; + } else { + +/* Usual case: 3x3 or larger block, using Francis implicit */ +/* double-shift */ + +/* 2 */ +/* Eigenvalue equation is w - c w + d = 0, */ + +/* -1 2 -1 */ +/* so compute 1st column of (A B ) - c A B + d */ +/* using the formula in QZIT (from EISPACK) */ + +/* We assume that the block is at least 3x3 */ + + ad11 = ascale * h__[ilast - 1 + (ilast - 1) * h_dim1] / (bscale * + t[ilast - 1 + (ilast - 1) * t_dim1]); + ad21 = ascale * h__[ilast + (ilast - 1) * h_dim1] / (bscale * t[ + ilast - 1 + (ilast - 1) * t_dim1]); + ad12 = ascale * h__[ilast - 1 + ilast * h_dim1] / (bscale * t[ + ilast + ilast * t_dim1]); + ad22 = ascale * h__[ilast + ilast * h_dim1] / (bscale * t[ilast + + ilast * t_dim1]); + u12 = t[ilast - 1 + ilast * t_dim1] / t[ilast + ilast * t_dim1]; + ad11l = ascale * h__[ifirst + ifirst * h_dim1] / (bscale * t[ + ifirst + ifirst * t_dim1]); + ad21l = ascale * h__[ifirst + 1 + ifirst * h_dim1] / (bscale * t[ + ifirst + ifirst * t_dim1]); + ad12l = ascale * h__[ifirst + (ifirst + 1) * h_dim1] / (bscale * + t[ifirst + 1 + (ifirst + 1) * t_dim1]); + ad22l = ascale * h__[ifirst + 1 + (ifirst + 1) * h_dim1] / ( + bscale * t[ifirst + 1 + (ifirst + 1) * t_dim1]); + ad32l = ascale * h__[ifirst + 2 + (ifirst + 1) * h_dim1] / ( + bscale * t[ifirst + 1 + (ifirst + 1) * t_dim1]); + u12l = t[ifirst + (ifirst + 1) * t_dim1] / t[ifirst + 1 + (ifirst + + 1) * t_dim1]; + + v[0] = (ad11 - ad11l) * (ad22 - ad11l) - ad12 * ad21 + ad21 * u12 + * ad11l + (ad12l - ad11l * u12l) * ad21l; + v[1] = (ad22l - ad11l - ad21l * u12l - (ad11 - ad11l) - (ad22 - + ad11l) + ad21 * u12) * ad21l; + v[2] = ad32l * ad21l; + + istart = ifirst; + + _starpu_dlarfg_(&c__3, v, &v[1], &c__1, &tau); + v[0] = 1.; + +/* Sweep */ + + i__2 = ilast - 2; + for (j = istart; j <= i__2; ++j) { + +/* All but last elements: use 3x3 Householder transforms. */ + +/* Zero (j-1)st column of A */ + + if (j > istart) { + v[0] = h__[j + (j - 1) * h_dim1]; + v[1] = h__[j + 1 + (j - 1) * h_dim1]; + v[2] = h__[j + 2 + (j - 1) * h_dim1]; + + _starpu_dlarfg_(&c__3, &h__[j + (j - 1) * h_dim1], &v[1], &c__1, & + tau); + v[0] = 1.; + h__[j + 1 + (j - 1) * h_dim1] = 0.; + h__[j + 2 + (j - 1) * h_dim1] = 0.; + } + + i__3 = ilastm; + for (jc = j; jc <= i__3; ++jc) { + temp = tau * (h__[j + jc * h_dim1] + v[1] * h__[j + 1 + + jc * h_dim1] + v[2] * h__[j + 2 + jc * h_dim1]); + h__[j + jc * h_dim1] -= temp; + h__[j + 1 + jc * h_dim1] -= temp * v[1]; + h__[j + 2 + jc * h_dim1] -= temp * v[2]; + temp2 = tau * (t[j + jc * t_dim1] + v[1] * t[j + 1 + jc * + t_dim1] + v[2] * t[j + 2 + jc * t_dim1]); + t[j + jc * t_dim1] -= temp2; + t[j + 1 + jc * t_dim1] -= temp2 * v[1]; + t[j + 2 + jc * t_dim1] -= temp2 * v[2]; +/* L230: */ + } + if (ilq) { + i__3 = *n; + for (jr = 1; jr <= i__3; ++jr) { + temp = tau * (q[jr + j * q_dim1] + v[1] * q[jr + (j + + 1) * q_dim1] + v[2] * q[jr + (j + 2) * q_dim1] + ); + q[jr + j * q_dim1] -= temp; + q[jr + (j + 1) * q_dim1] -= temp * v[1]; + q[jr + (j + 2) * q_dim1] -= temp * v[2]; +/* L240: */ + } + } + +/* Zero j-th column of B (see DLAGBC for details) */ + +/* Swap rows to pivot */ + + ilpivt = FALSE_; +/* Computing MAX */ + d__3 = (d__1 = t[j + 1 + (j + 1) * t_dim1], abs(d__1)), d__4 = + (d__2 = t[j + 1 + (j + 2) * t_dim1], abs(d__2)); + temp = max(d__3,d__4); +/* Computing MAX */ + d__3 = (d__1 = t[j + 2 + (j + 1) * t_dim1], abs(d__1)), d__4 = + (d__2 = t[j + 2 + (j + 2) * t_dim1], abs(d__2)); + temp2 = max(d__3,d__4); + if (max(temp,temp2) < safmin) { + scale = 0.; + u1 = 1.; + u2 = 0.; + goto L250; + } else if (temp >= temp2) { + w11 = t[j + 1 + (j + 1) * t_dim1]; + w21 = t[j + 2 + (j + 1) * t_dim1]; + w12 = t[j + 1 + (j + 2) * t_dim1]; + w22 = t[j + 2 + (j + 2) * t_dim1]; + u1 = t[j + 1 + j * t_dim1]; + u2 = t[j + 2 + j * t_dim1]; + } else { + w21 = t[j + 1 + (j + 1) * t_dim1]; + w11 = t[j + 2 + (j + 1) * t_dim1]; + w22 = t[j + 1 + (j + 2) * t_dim1]; + w12 = t[j + 2 + (j + 2) * t_dim1]; + u2 = t[j + 1 + j * t_dim1]; + u1 = t[j + 2 + j * t_dim1]; + } + +/* Swap columns if nec. */ + + if (abs(w12) > abs(w11)) { + ilpivt = TRUE_; + temp = w12; + temp2 = w22; + w12 = w11; + w22 = w21; + w11 = temp; + w21 = temp2; + } + +/* LU-factor */ + + temp = w21 / w11; + u2 -= temp * u1; + w22 -= temp * w12; + w21 = 0.; + +/* Compute SCALE */ + + scale = 1.; + if (abs(w22) < safmin) { + scale = 0.; + u2 = 1.; + u1 = -w12 / w11; + goto L250; + } + if (abs(w22) < abs(u2)) { + scale = (d__1 = w22 / u2, abs(d__1)); + } + if (abs(w11) < abs(u1)) { +/* Computing MIN */ + d__2 = scale, d__3 = (d__1 = w11 / u1, abs(d__1)); + scale = min(d__2,d__3); + } + +/* Solve */ + + u2 = scale * u2 / w22; + u1 = (scale * u1 - w12 * u2) / w11; + +L250: + if (ilpivt) { + temp = u2; + u2 = u1; + u1 = temp; + } + +/* Compute Householder Vector */ + +/* Computing 2nd power */ + d__1 = scale; +/* Computing 2nd power */ + d__2 = u1; +/* Computing 2nd power */ + d__3 = u2; + t1 = sqrt(d__1 * d__1 + d__2 * d__2 + d__3 * d__3); + tau = scale / t1 + 1.; + vs = -1. / (scale + t1); + v[0] = 1.; + v[1] = vs * u1; + v[2] = vs * u2; + +/* Apply transformations from the right. */ + +/* Computing MIN */ + i__4 = j + 3; + i__3 = min(i__4,ilast); + for (jr = ifrstm; jr <= i__3; ++jr) { + temp = tau * (h__[jr + j * h_dim1] + v[1] * h__[jr + (j + + 1) * h_dim1] + v[2] * h__[jr + (j + 2) * h_dim1]); + h__[jr + j * h_dim1] -= temp; + h__[jr + (j + 1) * h_dim1] -= temp * v[1]; + h__[jr + (j + 2) * h_dim1] -= temp * v[2]; +/* L260: */ + } + i__3 = j + 2; + for (jr = ifrstm; jr <= i__3; ++jr) { + temp = tau * (t[jr + j * t_dim1] + v[1] * t[jr + (j + 1) * + t_dim1] + v[2] * t[jr + (j + 2) * t_dim1]); + t[jr + j * t_dim1] -= temp; + t[jr + (j + 1) * t_dim1] -= temp * v[1]; + t[jr + (j + 2) * t_dim1] -= temp * v[2]; +/* L270: */ + } + if (ilz) { + i__3 = *n; + for (jr = 1; jr <= i__3; ++jr) { + temp = tau * (z__[jr + j * z_dim1] + v[1] * z__[jr + ( + j + 1) * z_dim1] + v[2] * z__[jr + (j + 2) * + z_dim1]); + z__[jr + j * z_dim1] -= temp; + z__[jr + (j + 1) * z_dim1] -= temp * v[1]; + z__[jr + (j + 2) * z_dim1] -= temp * v[2]; +/* L280: */ + } + } + t[j + 1 + j * t_dim1] = 0.; + t[j + 2 + j * t_dim1] = 0.; +/* L290: */ + } + +/* Last elements: Use Givens rotations */ + +/* Rotations from the left */ + + j = ilast - 1; + temp = h__[j + (j - 1) * h_dim1]; + _starpu_dlartg_(&temp, &h__[j + 1 + (j - 1) * h_dim1], &c__, &s, &h__[j + + (j - 1) * h_dim1]); + h__[j + 1 + (j - 1) * h_dim1] = 0.; + + i__2 = ilastm; + for (jc = j; jc <= i__2; ++jc) { + temp = c__ * h__[j + jc * h_dim1] + s * h__[j + 1 + jc * + h_dim1]; + h__[j + 1 + jc * h_dim1] = -s * h__[j + jc * h_dim1] + c__ * + h__[j + 1 + jc * h_dim1]; + h__[j + jc * h_dim1] = temp; + temp2 = c__ * t[j + jc * t_dim1] + s * t[j + 1 + jc * t_dim1]; + t[j + 1 + jc * t_dim1] = -s * t[j + jc * t_dim1] + c__ * t[j + + 1 + jc * t_dim1]; + t[j + jc * t_dim1] = temp2; +/* L300: */ + } + if (ilq) { + i__2 = *n; + for (jr = 1; jr <= i__2; ++jr) { + temp = c__ * q[jr + j * q_dim1] + s * q[jr + (j + 1) * + q_dim1]; + q[jr + (j + 1) * q_dim1] = -s * q[jr + j * q_dim1] + c__ * + q[jr + (j + 1) * q_dim1]; + q[jr + j * q_dim1] = temp; +/* L310: */ + } + } + +/* Rotations from the right. */ + + temp = t[j + 1 + (j + 1) * t_dim1]; + _starpu_dlartg_(&temp, &t[j + 1 + j * t_dim1], &c__, &s, &t[j + 1 + (j + + 1) * t_dim1]); + t[j + 1 + j * t_dim1] = 0.; + + i__2 = ilast; + for (jr = ifrstm; jr <= i__2; ++jr) { + temp = c__ * h__[jr + (j + 1) * h_dim1] + s * h__[jr + j * + h_dim1]; + h__[jr + j * h_dim1] = -s * h__[jr + (j + 1) * h_dim1] + c__ * + h__[jr + j * h_dim1]; + h__[jr + (j + 1) * h_dim1] = temp; +/* L320: */ + } + i__2 = ilast - 1; + for (jr = ifrstm; jr <= i__2; ++jr) { + temp = c__ * t[jr + (j + 1) * t_dim1] + s * t[jr + j * t_dim1] + ; + t[jr + j * t_dim1] = -s * t[jr + (j + 1) * t_dim1] + c__ * t[ + jr + j * t_dim1]; + t[jr + (j + 1) * t_dim1] = temp; +/* L330: */ + } + if (ilz) { + i__2 = *n; + for (jr = 1; jr <= i__2; ++jr) { + temp = c__ * z__[jr + (j + 1) * z_dim1] + s * z__[jr + j * + z_dim1]; + z__[jr + j * z_dim1] = -s * z__[jr + (j + 1) * z_dim1] + + c__ * z__[jr + j * z_dim1]; + z__[jr + (j + 1) * z_dim1] = temp; +/* L340: */ + } + } + +/* End of Double-Shift code */ + + } + + goto L350; + +/* End of iteration loop */ + +L350: +/* L360: */ + ; + } + +/* Drop-through = non-convergence */ + + *info = ilast; + goto L420; + +/* Successful completion of all QZ steps */ + +L380: + +/* Set Eigenvalues 1:ILO-1 */ + + i__1 = *ilo - 1; + for (j = 1; j <= i__1; ++j) { + if (t[j + j * t_dim1] < 0.) { + if (ilschr) { + i__2 = j; + for (jr = 1; jr <= i__2; ++jr) { + h__[jr + j * h_dim1] = -h__[jr + j * h_dim1]; + t[jr + j * t_dim1] = -t[jr + j * t_dim1]; +/* L390: */ + } + } else { + h__[j + j * h_dim1] = -h__[j + j * h_dim1]; + t[j + j * t_dim1] = -t[j + j * t_dim1]; + } + if (ilz) { + i__2 = *n; + for (jr = 1; jr <= i__2; ++jr) { + z__[jr + j * z_dim1] = -z__[jr + j * z_dim1]; +/* L400: */ + } + } + } + alphar[j] = h__[j + j * h_dim1]; + alphai[j] = 0.; + beta[j] = t[j + j * t_dim1]; +/* L410: */ + } + +/* Normal Termination */ + + *info = 0; + +/* Exit (other than argument error) -- return optimal workspace size */ + +L420: + work[1] = (doublereal) (*n); + return 0; + +/* End of DHGEQZ */ + +} /* _starpu_dhgeqz_ */ diff --git a/min-dgels/base/SRC/dhsein.c b/min-dgels/base/SRC/dhsein.c new file mode 100644 index 0000000..cf6c4a7 --- /dev/null +++ b/min-dgels/base/SRC/dhsein.c @@ -0,0 +1,491 @@ +/* dhsein.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static logical c_false = FALSE_; +static logical c_true = TRUE_; + +/* Subroutine */ int _starpu_dhsein_(char *side, char *eigsrc, char *initv, logical * + select, integer *n, doublereal *h__, integer *ldh, doublereal *wr, + doublereal *wi, doublereal *vl, integer *ldvl, doublereal *vr, + integer *ldvr, integer *mm, integer *m, doublereal *work, integer * + ifaill, integer *ifailr, integer *info) +{ + /* System generated locals */ + integer h_dim1, h_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1, + i__2; + doublereal d__1, d__2; + + /* Local variables */ + integer i__, k, kl, kr, kln, ksi; + doublereal wki; + integer ksr; + doublereal ulp, wkr, eps3; + logical pair; + doublereal unfl; + extern logical _starpu_lsame_(char *, char *); + integer iinfo; + logical leftv, bothv; + doublereal hnorm; + extern doublereal _starpu_dlamch_(char *); + extern /* Subroutine */ int _starpu_dlaein_(logical *, logical *, integer *, + doublereal *, integer *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, integer *, doublereal *, doublereal * +, doublereal *, doublereal *, integer *); + extern doublereal _starpu_dlanhs_(char *, integer *, doublereal *, integer *, + doublereal *); + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + doublereal bignum; + logical noinit; + integer ldwork; + logical rightv, fromqr; + doublereal smlnum; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DHSEIN uses inverse iteration to find specified right and/or left */ +/* eigenvectors of a real upper Hessenberg matrix H. */ + +/* The right eigenvector x and the left eigenvector y of the matrix H */ +/* corresponding to an eigenvalue w are defined by: */ + +/* H * x = w * x, y**h * H = w * y**h */ + +/* where y**h denotes the conjugate transpose of the vector y. */ + +/* Arguments */ +/* ========= */ + +/* SIDE (input) CHARACTER*1 */ +/* = 'R': compute right eigenvectors only; */ +/* = 'L': compute left eigenvectors only; */ +/* = 'B': compute both right and left eigenvectors. */ + +/* EIGSRC (input) CHARACTER*1 */ +/* Specifies the source of eigenvalues supplied in (WR,WI): */ +/* = 'Q': the eigenvalues were found using DHSEQR; thus, if */ +/* H has zero subdiagonal elements, and so is */ +/* block-triangular, then the j-th eigenvalue can be */ +/* assumed to be an eigenvalue of the block containing */ +/* the j-th row/column. This property allows DHSEIN to */ +/* perform inverse iteration on just one diagonal block. */ +/* = 'N': no assumptions are made on the correspondence */ +/* between eigenvalues and diagonal blocks. In this */ +/* case, DHSEIN must always perform inverse iteration */ +/* using the whole matrix H. */ + +/* INITV (input) CHARACTER*1 */ +/* = 'N': no initial vectors are supplied; */ +/* = 'U': user-supplied initial vectors are stored in the arrays */ +/* VL and/or VR. */ + +/* SELECT (input/output) LOGICAL array, dimension (N) */ +/* Specifies the eigenvectors to be computed. To select the */ +/* real eigenvector corresponding to a real eigenvalue WR(j), */ +/* SELECT(j) must be set to .TRUE.. To select the complex */ +/* eigenvector corresponding to a complex eigenvalue */ +/* (WR(j),WI(j)), with complex conjugate (WR(j+1),WI(j+1)), */ +/* either SELECT(j) or SELECT(j+1) or both must be set to */ +/* .TRUE.; then on exit SELECT(j) is .TRUE. and SELECT(j+1) is */ +/* .FALSE.. */ + +/* N (input) INTEGER */ +/* The order of the matrix H. N >= 0. */ + +/* H (input) DOUBLE PRECISION array, dimension (LDH,N) */ +/* The upper Hessenberg matrix H. */ + +/* LDH (input) INTEGER */ +/* The leading dimension of the array H. LDH >= max(1,N). */ + +/* WR (input/output) DOUBLE PRECISION array, dimension (N) */ +/* WI (input) DOUBLE PRECISION array, dimension (N) */ +/* On entry, the real and imaginary parts of the eigenvalues of */ +/* H; a complex conjugate pair of eigenvalues must be stored in */ +/* consecutive elements of WR and WI. */ +/* On exit, WR may have been altered since close eigenvalues */ +/* are perturbed slightly in searching for independent */ +/* eigenvectors. */ + +/* VL (input/output) DOUBLE PRECISION array, dimension (LDVL,MM) */ +/* On entry, if INITV = 'U' and SIDE = 'L' or 'B', VL must */ +/* contain starting vectors for the inverse iteration for the */ +/* left eigenvectors; the starting vector for each eigenvector */ +/* must be in the same column(s) in which the eigenvector will */ +/* be stored. */ +/* On exit, if SIDE = 'L' or 'B', the left eigenvectors */ +/* specified by SELECT will be stored consecutively in the */ +/* columns of VL, in the same order as their eigenvalues. A */ +/* complex eigenvector corresponding to a complex eigenvalue is */ +/* stored in two consecutive columns, the first holding the real */ +/* part and the second the imaginary part. */ +/* If SIDE = 'R', VL is not referenced. */ + +/* LDVL (input) INTEGER */ +/* The leading dimension of the array VL. */ +/* LDVL >= max(1,N) if SIDE = 'L' or 'B'; LDVL >= 1 otherwise. */ + +/* VR (input/output) DOUBLE PRECISION array, dimension (LDVR,MM) */ +/* On entry, if INITV = 'U' and SIDE = 'R' or 'B', VR must */ +/* contain starting vectors for the inverse iteration for the */ +/* right eigenvectors; the starting vector for each eigenvector */ +/* must be in the same column(s) in which the eigenvector will */ +/* be stored. */ +/* On exit, if SIDE = 'R' or 'B', the right eigenvectors */ +/* specified by SELECT will be stored consecutively in the */ +/* columns of VR, in the same order as their eigenvalues. A */ +/* complex eigenvector corresponding to a complex eigenvalue is */ +/* stored in two consecutive columns, the first holding the real */ +/* part and the second the imaginary part. */ +/* If SIDE = 'L', VR is not referenced. */ + +/* LDVR (input) INTEGER */ +/* The leading dimension of the array VR. */ +/* LDVR >= max(1,N) if SIDE = 'R' or 'B'; LDVR >= 1 otherwise. */ + +/* MM (input) INTEGER */ +/* The number of columns in the arrays VL and/or VR. MM >= M. */ + +/* M (output) INTEGER */ +/* The number of columns in the arrays VL and/or VR required to */ +/* store the eigenvectors; each selected real eigenvector */ +/* occupies one column and each selected complex eigenvector */ +/* occupies two columns. */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension ((N+2)*N) */ + +/* IFAILL (output) INTEGER array, dimension (MM) */ +/* If SIDE = 'L' or 'B', IFAILL(i) = j > 0 if the left */ +/* eigenvector in the i-th column of VL (corresponding to the */ +/* eigenvalue w(j)) failed to converge; IFAILL(i) = 0 if the */ +/* eigenvector converged satisfactorily. If the i-th and (i+1)th */ +/* columns of VL hold a complex eigenvector, then IFAILL(i) and */ +/* IFAILL(i+1) are set to the same value. */ +/* If SIDE = 'R', IFAILL is not referenced. */ + +/* IFAILR (output) INTEGER array, dimension (MM) */ +/* If SIDE = 'R' or 'B', IFAILR(i) = j > 0 if the right */ +/* eigenvector in the i-th column of VR (corresponding to the */ +/* eigenvalue w(j)) failed to converge; IFAILR(i) = 0 if the */ +/* eigenvector converged satisfactorily. If the i-th and (i+1)th */ +/* columns of VR hold a complex eigenvector, then IFAILR(i) and */ +/* IFAILR(i+1) are set to the same value. */ +/* If SIDE = 'L', IFAILR is not referenced. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > 0: if INFO = i, i is the number of eigenvectors which */ +/* failed to converge; see IFAILL and IFAILR for further */ +/* details. */ + +/* Further Details */ +/* =============== */ + +/* Each eigenvector is normalized so that the element of largest */ +/* magnitude has magnitude 1; here the magnitude of a complex number */ +/* (x,y) is taken to be |x|+|y|. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Decode and test the input parameters. */ + + /* Parameter adjustments */ + --select; + h_dim1 = *ldh; + h_offset = 1 + h_dim1; + h__ -= h_offset; + --wr; + --wi; + vl_dim1 = *ldvl; + vl_offset = 1 + vl_dim1; + vl -= vl_offset; + vr_dim1 = *ldvr; + vr_offset = 1 + vr_dim1; + vr -= vr_offset; + --work; + --ifaill; + --ifailr; + + /* Function Body */ + bothv = _starpu_lsame_(side, "B"); + rightv = _starpu_lsame_(side, "R") || bothv; + leftv = _starpu_lsame_(side, "L") || bothv; + + fromqr = _starpu_lsame_(eigsrc, "Q"); + + noinit = _starpu_lsame_(initv, "N"); + +/* Set M to the number of columns required to store the selected */ +/* eigenvectors, and standardize the array SELECT. */ + + *m = 0; + pair = FALSE_; + i__1 = *n; + for (k = 1; k <= i__1; ++k) { + if (pair) { + pair = FALSE_; + select[k] = FALSE_; + } else { + if (wi[k] == 0.) { + if (select[k]) { + ++(*m); + } + } else { + pair = TRUE_; + if (select[k] || select[k + 1]) { + select[k] = TRUE_; + *m += 2; + } + } + } +/* L10: */ + } + + *info = 0; + if (! rightv && ! leftv) { + *info = -1; + } else if (! fromqr && ! _starpu_lsame_(eigsrc, "N")) { + *info = -2; + } else if (! noinit && ! _starpu_lsame_(initv, "U")) { + *info = -3; + } else if (*n < 0) { + *info = -5; + } else if (*ldh < max(1,*n)) { + *info = -7; + } else if (*ldvl < 1 || leftv && *ldvl < *n) { + *info = -11; + } else if (*ldvr < 1 || rightv && *ldvr < *n) { + *info = -13; + } else if (*mm < *m) { + *info = -14; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DHSEIN", &i__1); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0) { + return 0; + } + +/* Set machine-dependent constants. */ + + unfl = _starpu_dlamch_("Safe minimum"); + ulp = _starpu_dlamch_("Precision"); + smlnum = unfl * (*n / ulp); + bignum = (1. - ulp) / smlnum; + + ldwork = *n + 1; + + kl = 1; + kln = 0; + if (fromqr) { + kr = 0; + } else { + kr = *n; + } + ksr = 1; + + i__1 = *n; + for (k = 1; k <= i__1; ++k) { + if (select[k]) { + +/* Compute eigenvector(s) corresponding to W(K). */ + + if (fromqr) { + +/* If affiliation of eigenvalues is known, check whether */ +/* the matrix splits. */ + +/* Determine KL and KR such that 1 <= KL <= K <= KR <= N */ +/* and H(KL,KL-1) and H(KR+1,KR) are zero (or KL = 1 or */ +/* KR = N). */ + +/* Then inverse iteration can be performed with the */ +/* submatrix H(KL:N,KL:N) for a left eigenvector, and with */ +/* the submatrix H(1:KR,1:KR) for a right eigenvector. */ + + i__2 = kl + 1; + for (i__ = k; i__ >= i__2; --i__) { + if (h__[i__ + (i__ - 1) * h_dim1] == 0.) { + goto L30; + } +/* L20: */ + } +L30: + kl = i__; + if (k > kr) { + i__2 = *n - 1; + for (i__ = k; i__ <= i__2; ++i__) { + if (h__[i__ + 1 + i__ * h_dim1] == 0.) { + goto L50; + } +/* L40: */ + } +L50: + kr = i__; + } + } + + if (kl != kln) { + kln = kl; + +/* Compute infinity-norm of submatrix H(KL:KR,KL:KR) if it */ +/* has not ben computed before. */ + + i__2 = kr - kl + 1; + hnorm = _starpu_dlanhs_("I", &i__2, &h__[kl + kl * h_dim1], ldh, & + work[1]); + if (hnorm > 0.) { + eps3 = hnorm * ulp; + } else { + eps3 = smlnum; + } + } + +/* Perturb eigenvalue if it is close to any previous */ +/* selected eigenvalues affiliated to the submatrix */ +/* H(KL:KR,KL:KR). Close roots are modified by EPS3. */ + + wkr = wr[k]; + wki = wi[k]; +L60: + i__2 = kl; + for (i__ = k - 1; i__ >= i__2; --i__) { + if (select[i__] && (d__1 = wr[i__] - wkr, abs(d__1)) + (d__2 = + wi[i__] - wki, abs(d__2)) < eps3) { + wkr += eps3; + goto L60; + } +/* L70: */ + } + wr[k] = wkr; + + pair = wki != 0.; + if (pair) { + ksi = ksr + 1; + } else { + ksi = ksr; + } + if (leftv) { + +/* Compute left eigenvector. */ + + i__2 = *n - kl + 1; + _starpu_dlaein_(&c_false, &noinit, &i__2, &h__[kl + kl * h_dim1], ldh, + &wkr, &wki, &vl[kl + ksr * vl_dim1], &vl[kl + ksi * + vl_dim1], &work[1], &ldwork, &work[*n * *n + *n + 1], + &eps3, &smlnum, &bignum, &iinfo); + if (iinfo > 0) { + if (pair) { + *info += 2; + } else { + ++(*info); + } + ifaill[ksr] = k; + ifaill[ksi] = k; + } else { + ifaill[ksr] = 0; + ifaill[ksi] = 0; + } + i__2 = kl - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + vl[i__ + ksr * vl_dim1] = 0.; +/* L80: */ + } + if (pair) { + i__2 = kl - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + vl[i__ + ksi * vl_dim1] = 0.; +/* L90: */ + } + } + } + if (rightv) { + +/* Compute right eigenvector. */ + + _starpu_dlaein_(&c_true, &noinit, &kr, &h__[h_offset], ldh, &wkr, & + wki, &vr[ksr * vr_dim1 + 1], &vr[ksi * vr_dim1 + 1], & + work[1], &ldwork, &work[*n * *n + *n + 1], &eps3, & + smlnum, &bignum, &iinfo); + if (iinfo > 0) { + if (pair) { + *info += 2; + } else { + ++(*info); + } + ifailr[ksr] = k; + ifailr[ksi] = k; + } else { + ifailr[ksr] = 0; + ifailr[ksi] = 0; + } + i__2 = *n; + for (i__ = kr + 1; i__ <= i__2; ++i__) { + vr[i__ + ksr * vr_dim1] = 0.; +/* L100: */ + } + if (pair) { + i__2 = *n; + for (i__ = kr + 1; i__ <= i__2; ++i__) { + vr[i__ + ksi * vr_dim1] = 0.; +/* L110: */ + } + } + } + + if (pair) { + ksr += 2; + } else { + ++ksr; + } + } +/* L120: */ + } + + return 0; + +/* End of DHSEIN */ + +} /* _starpu_dhsein_ */ diff --git a/min-dgels/base/SRC/dhseqr.c b/min-dgels/base/SRC/dhseqr.c new file mode 100644 index 0000000..b41aa9f --- /dev/null +++ b/min-dgels/base/SRC/dhseqr.c @@ -0,0 +1,487 @@ +/* dhseqr.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static doublereal c_b11 = 0.; +static doublereal c_b12 = 1.; +static integer c__12 = 12; +static integer c__2 = 2; +static integer c__49 = 49; + +/* Subroutine */ int _starpu_dhseqr_(char *job, char *compz, integer *n, integer *ilo, + integer *ihi, doublereal *h__, integer *ldh, doublereal *wr, + doublereal *wi, doublereal *z__, integer *ldz, doublereal *work, + integer *lwork, integer *info) +{ + /* System generated locals */ + address a__1[2]; + integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2[2], i__3; + doublereal d__1; + char ch__1[2]; + + /* Builtin functions */ + /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); + + /* Local variables */ + integer i__; + doublereal hl[2401] /* was [49][49] */; + integer kbot, nmin; + extern logical _starpu_lsame_(char *, char *); + logical initz; + doublereal workl[49]; + logical wantt, wantz; + extern /* Subroutine */ int _starpu_dlaqr0_(logical *, logical *, integer *, + integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, integer *, doublereal *, integer *, + doublereal *, integer *, integer *), _starpu_dlahqr_(logical *, logical *, + integer *, integer *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *), _starpu_dlacpy_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *), + _starpu_dlaset_(char *, integer *, integer *, doublereal *, doublereal *, + doublereal *, integer *); + extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *); + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + logical lquery; + + +/* -- LAPACK driver routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ +/* Purpose */ +/* ======= */ + +/* DHSEQR computes the eigenvalues of a Hessenberg matrix H */ +/* and, optionally, the matrices T and Z from the Schur decomposition */ +/* H = Z T Z**T, where T is an upper quasi-triangular matrix (the */ +/* Schur form), and Z is the orthogonal matrix of Schur vectors. */ + +/* Optionally Z may be postmultiplied into an input orthogonal */ +/* matrix Q so that this routine can give the Schur factorization */ +/* of a matrix A which has been reduced to the Hessenberg form H */ +/* by the orthogonal matrix Q: A = Q*H*Q**T = (QZ)*T*(QZ)**T. */ + +/* Arguments */ +/* ========= */ + +/* JOB (input) CHARACTER*1 */ +/* = 'E': compute eigenvalues only; */ +/* = 'S': compute eigenvalues and the Schur form T. */ + +/* COMPZ (input) CHARACTER*1 */ +/* = 'N': no Schur vectors are computed; */ +/* = 'I': Z is initialized to the unit matrix and the matrix Z */ +/* of Schur vectors of H is returned; */ +/* = 'V': Z must contain an orthogonal matrix Q on entry, and */ +/* the product Q*Z is returned. */ + +/* N (input) INTEGER */ +/* The order of the matrix H. N .GE. 0. */ + +/* ILO (input) INTEGER */ +/* IHI (input) INTEGER */ +/* It is assumed that H is already upper triangular in rows */ +/* and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally */ +/* set by a previous call to DGEBAL, and then passed to DGEHRD */ +/* when the matrix output by DGEBAL is reduced to Hessenberg */ +/* form. Otherwise ILO and IHI should be set to 1 and N */ +/* respectively. If N.GT.0, then 1.LE.ILO.LE.IHI.LE.N. */ +/* If N = 0, then ILO = 1 and IHI = 0. */ + +/* H (input/output) DOUBLE PRECISION array, dimension (LDH,N) */ +/* On entry, the upper Hessenberg matrix H. */ +/* On exit, if INFO = 0 and JOB = 'S', then H contains the */ +/* upper quasi-triangular matrix T from the Schur decomposition */ +/* (the Schur form); 2-by-2 diagonal blocks (corresponding to */ +/* complex conjugate pairs of eigenvalues) are returned in */ +/* standard form, with H(i,i) = H(i+1,i+1) and */ +/* H(i+1,i)*H(i,i+1).LT.0. If INFO = 0 and JOB = 'E', the */ +/* contents of H are unspecified on exit. (The output value of */ +/* H when INFO.GT.0 is given under the description of INFO */ +/* below.) */ + +/* Unlike earlier versions of DHSEQR, this subroutine may */ +/* explicitly H(i,j) = 0 for i.GT.j and j = 1, 2, ... ILO-1 */ +/* or j = IHI+1, IHI+2, ... N. */ + +/* LDH (input) INTEGER */ +/* The leading dimension of the array H. LDH .GE. max(1,N). */ + +/* WR (output) DOUBLE PRECISION array, dimension (N) */ +/* WI (output) DOUBLE PRECISION array, dimension (N) */ +/* The real and imaginary parts, respectively, of the computed */ +/* eigenvalues. If two eigenvalues are computed as a complex */ +/* conjugate pair, they are stored in consecutive elements of */ +/* WR and WI, say the i-th and (i+1)th, with WI(i) .GT. 0 and */ +/* WI(i+1) .LT. 0. If JOB = 'S', the eigenvalues are stored in */ +/* the same order as on the diagonal of the Schur form returned */ +/* in H, with WR(i) = H(i,i) and, if H(i:i+1,i:i+1) is a 2-by-2 */ +/* diagonal block, WI(i) = sqrt(-H(i+1,i)*H(i,i+1)) and */ +/* WI(i+1) = -WI(i). */ + +/* Z (input/output) DOUBLE PRECISION array, dimension (LDZ,N) */ +/* If COMPZ = 'N', Z is not referenced. */ +/* If COMPZ = 'I', on entry Z need not be set and on exit, */ +/* if INFO = 0, Z contains the orthogonal matrix Z of the Schur */ +/* vectors of H. If COMPZ = 'V', on entry Z must contain an */ +/* N-by-N matrix Q, which is assumed to be equal to the unit */ +/* matrix except for the submatrix Z(ILO:IHI,ILO:IHI). On exit, */ +/* if INFO = 0, Z contains Q*Z. */ +/* Normally Q is the orthogonal matrix generated by DORGHR */ +/* after the call to DGEHRD which formed the Hessenberg matrix */ +/* H. (The output value of Z when INFO.GT.0 is given under */ +/* the description of INFO below.) */ + +/* LDZ (input) INTEGER */ +/* The leading dimension of the array Z. if COMPZ = 'I' or */ +/* COMPZ = 'V', then LDZ.GE.MAX(1,N). Otherwize, LDZ.GE.1. */ + +/* WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) */ +/* On exit, if INFO = 0, WORK(1) returns an estimate of */ +/* the optimal value for LWORK. */ + +/* LWORK (input) INTEGER */ +/* The dimension of the array WORK. LWORK .GE. max(1,N) */ +/* is sufficient and delivers very good and sometimes */ +/* optimal performance. However, LWORK as large as 11*N */ +/* may be required for optimal performance. A workspace */ +/* query is recommended to determine the optimal workspace */ +/* size. */ + +/* If LWORK = -1, then DHSEQR does a workspace query. */ +/* In this case, DHSEQR checks the input parameters and */ +/* estimates the optimal workspace size for the given */ +/* values of N, ILO and IHI. The estimate is returned */ +/* in WORK(1). No error message related to LWORK is */ +/* issued by XERBLA. Neither H nor Z are accessed. */ + + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* .LT. 0: if INFO = -i, the i-th argument had an illegal */ +/* value */ +/* .GT. 0: if INFO = i, DHSEQR failed to compute all of */ +/* the eigenvalues. Elements 1:ilo-1 and i+1:n of WR */ +/* and WI contain those eigenvalues which have been */ +/* successfully computed. (Failures are rare.) */ + +/* If INFO .GT. 0 and JOB = 'E', then on exit, the */ +/* remaining unconverged eigenvalues are the eigen- */ +/* values of the upper Hessenberg matrix rows and */ +/* columns ILO through INFO of the final, output */ +/* value of H. */ + +/* If INFO .GT. 0 and JOB = 'S', then on exit */ + +/* (*) (initial value of H)*U = U*(final value of H) */ + +/* where U is an orthogonal matrix. The final */ +/* value of H is upper Hessenberg and quasi-triangular */ +/* in rows and columns INFO+1 through IHI. */ + +/* If INFO .GT. 0 and COMPZ = 'V', then on exit */ + +/* (final value of Z) = (initial value of Z)*U */ + +/* where U is the orthogonal matrix in (*) (regard- */ +/* less of the value of JOB.) */ + +/* If INFO .GT. 0 and COMPZ = 'I', then on exit */ +/* (final value of Z) = U */ +/* where U is the orthogonal matrix in (*) (regard- */ +/* less of the value of JOB.) */ + +/* If INFO .GT. 0 and COMPZ = 'N', then Z is not */ +/* accessed. */ + +/* ================================================================ */ +/* Default values supplied by */ +/* ILAENV(ISPEC,'DHSEQR',JOB(:1)//COMPZ(:1),N,ILO,IHI,LWORK). */ +/* It is suggested that these defaults be adjusted in order */ +/* to attain best performance in each particular */ +/* computational environment. */ + +/* ISPEC=12: The DLAHQR vs DLAQR0 crossover point. */ +/* Default: 75. (Must be at least 11.) */ + +/* ISPEC=13: Recommended deflation window size. */ +/* This depends on ILO, IHI and NS. NS is the */ +/* number of simultaneous shifts returned */ +/* by ILAENV(ISPEC=15). (See ISPEC=15 below.) */ +/* The default for (IHI-ILO+1).LE.500 is NS. */ +/* The default for (IHI-ILO+1).GT.500 is 3*NS/2. */ + +/* ISPEC=14: Nibble crossover point. (See IPARMQ for */ +/* details.) Default: 14% of deflation window */ +/* size. */ + +/* ISPEC=15: Number of simultaneous shifts in a multishift */ +/* QR iteration. */ + +/* If IHI-ILO+1 is ... */ + +/* greater than ...but less ... the */ +/* or equal to ... than default is */ + +/* 1 30 NS = 2(+) */ +/* 30 60 NS = 4(+) */ +/* 60 150 NS = 10(+) */ +/* 150 590 NS = ** */ +/* 590 3000 NS = 64 */ +/* 3000 6000 NS = 128 */ +/* 6000 infinity NS = 256 */ + +/* (+) By default some or all matrices of this order */ +/* are passed to the implicit double shift routine */ +/* DLAHQR and this parameter is ignored. See */ +/* ISPEC=12 above and comments in IPARMQ for */ +/* details. */ + +/* (**) The asterisks (**) indicate an ad-hoc */ +/* function of N increasing from 10 to 64. */ + +/* ISPEC=16: Select structured matrix multiply. */ +/* If the number of simultaneous shifts (specified */ +/* by ISPEC=15) is less than 14, then the default */ +/* for ISPEC=16 is 0. Otherwise the default for */ +/* ISPEC=16 is 2. */ + +/* ================================================================ */ +/* Based on contributions by */ +/* Karen Braman and Ralph Byers, Department of Mathematics, */ +/* University of Kansas, USA */ + +/* ================================================================ */ +/* References: */ +/* K. Braman, R. Byers and R. Mathias, The Multi-Shift QR */ +/* Algorithm Part I: Maintaining Well Focused Shifts, and Level 3 */ +/* Performance, SIAM Journal of Matrix Analysis, volume 23, pages */ +/* 929--947, 2002. */ + +/* K. Braman, R. Byers and R. Mathias, The Multi-Shift QR */ +/* Algorithm Part II: Aggressive Early Deflation, SIAM Journal */ +/* of Matrix Analysis, volume 23, pages 948--973, 2002. */ + +/* ================================================================ */ +/* .. Parameters .. */ + +/* ==== Matrices of order NTINY or smaller must be processed by */ +/* . DLAHQR because of insufficient subdiagonal scratch space. */ +/* . (This is a hard limit.) ==== */ + +/* ==== NL allocates some local workspace to help small matrices */ +/* . through a rare DLAHQR failure. NL .GT. NTINY = 11 is */ +/* . required and NL .LE. NMIN = ILAENV(ISPEC=12,...) is recom- */ +/* . mended. (The default value of NMIN is 75.) Using NL = 49 */ +/* . allows up to six simultaneous shifts and a 16-by-16 */ +/* . deflation window. ==== */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* ==== Decode and check the input parameters. ==== */ + + /* Parameter adjustments */ + h_dim1 = *ldh; + h_offset = 1 + h_dim1; + h__ -= h_offset; + --wr; + --wi; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --work; + + /* Function Body */ + wantt = _starpu_lsame_(job, "S"); + initz = _starpu_lsame_(compz, "I"); + wantz = initz || _starpu_lsame_(compz, "V"); + work[1] = (doublereal) max(1,*n); + lquery = *lwork == -1; + + *info = 0; + if (! _starpu_lsame_(job, "E") && ! wantt) { + *info = -1; + } else if (! _starpu_lsame_(compz, "N") && ! wantz) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*ilo < 1 || *ilo > max(1,*n)) { + *info = -4; + } else if (*ihi < min(*ilo,*n) || *ihi > *n) { + *info = -5; + } else if (*ldh < max(1,*n)) { + *info = -7; + } else if (*ldz < 1 || wantz && *ldz < max(1,*n)) { + *info = -11; + } else if (*lwork < max(1,*n) && ! lquery) { + *info = -13; + } + + if (*info != 0) { + +/* ==== Quick return in case of invalid argument. ==== */ + + i__1 = -(*info); + _starpu_xerbla_("DHSEQR", &i__1); + return 0; + + } else if (*n == 0) { + +/* ==== Quick return in case N = 0; nothing to do. ==== */ + + return 0; + + } else if (lquery) { + +/* ==== Quick return in case of a workspace query ==== */ + + _starpu_dlaqr0_(&wantt, &wantz, n, ilo, ihi, &h__[h_offset], ldh, &wr[1], &wi[ + 1], ilo, ihi, &z__[z_offset], ldz, &work[1], lwork, info); +/* ==== Ensure reported workspace size is backward-compatible with */ +/* . previous LAPACK versions. ==== */ +/* Computing MAX */ + d__1 = (doublereal) max(1,*n); + work[1] = max(d__1,work[1]); + return 0; + + } else { + +/* ==== copy eigenvalues isolated by DGEBAL ==== */ + + i__1 = *ilo - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + wr[i__] = h__[i__ + i__ * h_dim1]; + wi[i__] = 0.; +/* L10: */ + } + i__1 = *n; + for (i__ = *ihi + 1; i__ <= i__1; ++i__) { + wr[i__] = h__[i__ + i__ * h_dim1]; + wi[i__] = 0.; +/* L20: */ + } + +/* ==== Initialize Z, if requested ==== */ + + if (initz) { + _starpu_dlaset_("A", n, n, &c_b11, &c_b12, &z__[z_offset], ldz) + ; + } + +/* ==== Quick return if possible ==== */ + + if (*ilo == *ihi) { + wr[*ilo] = h__[*ilo + *ilo * h_dim1]; + wi[*ilo] = 0.; + return 0; + } + +/* ==== DLAHQR/DLAQR0 crossover point ==== */ + +/* Writing concatenation */ + i__2[0] = 1, a__1[0] = job; + i__2[1] = 1, a__1[1] = compz; + s_cat(ch__1, a__1, i__2, &c__2, (ftnlen)2); + nmin = _starpu_ilaenv_(&c__12, "DHSEQR", ch__1, n, ilo, ihi, lwork); + nmin = max(11,nmin); + +/* ==== DLAQR0 for big matrices; DLAHQR for small ones ==== */ + + if (*n > nmin) { + _starpu_dlaqr0_(&wantt, &wantz, n, ilo, ihi, &h__[h_offset], ldh, &wr[1], + &wi[1], ilo, ihi, &z__[z_offset], ldz, &work[1], lwork, + info); + } else { + +/* ==== Small matrix ==== */ + + _starpu_dlahqr_(&wantt, &wantz, n, ilo, ihi, &h__[h_offset], ldh, &wr[1], + &wi[1], ilo, ihi, &z__[z_offset], ldz, info); + + if (*info > 0) { + +/* ==== A rare DLAHQR failure! DLAQR0 sometimes succeeds */ +/* . when DLAHQR fails. ==== */ + + kbot = *info; + + if (*n >= 49) { + +/* ==== Larger matrices have enough subdiagonal scratch */ +/* . space to call DLAQR0 directly. ==== */ + + _starpu_dlaqr0_(&wantt, &wantz, n, ilo, &kbot, &h__[h_offset], + ldh, &wr[1], &wi[1], ilo, ihi, &z__[z_offset], + ldz, &work[1], lwork, info); + + } else { + +/* ==== Tiny matrices don't have enough subdiagonal */ +/* . scratch space to benefit from DLAQR0. Hence, */ +/* . tiny matrices must be copied into a larger */ +/* . array before calling DLAQR0. ==== */ + + _starpu_dlacpy_("A", n, n, &h__[h_offset], ldh, hl, &c__49); + hl[*n + 1 + *n * 49 - 50] = 0.; + i__1 = 49 - *n; + _starpu_dlaset_("A", &c__49, &i__1, &c_b11, &c_b11, &hl[(*n + 1) * + 49 - 49], &c__49); + _starpu_dlaqr0_(&wantt, &wantz, &c__49, ilo, &kbot, hl, &c__49, & + wr[1], &wi[1], ilo, ihi, &z__[z_offset], ldz, + workl, &c__49, info); + if (wantt || *info != 0) { + _starpu_dlacpy_("A", n, n, hl, &c__49, &h__[h_offset], ldh); + } + } + } + } + +/* ==== Clear out the trash, if necessary. ==== */ + + if ((wantt || *info != 0) && *n > 2) { + i__1 = *n - 2; + i__3 = *n - 2; + _starpu_dlaset_("L", &i__1, &i__3, &c_b11, &c_b11, &h__[h_dim1 + 3], ldh); + } + +/* ==== Ensure reported workspace size is backward-compatible with */ +/* . previous LAPACK versions. ==== */ + +/* Computing MAX */ + d__1 = (doublereal) max(1,*n); + work[1] = max(d__1,work[1]); + } + +/* ==== End of DHSEQR ==== */ + + return 0; +} /* _starpu_dhseqr_ */ diff --git a/min-dgels/base/SRC/disnan.c b/min-dgels/base/SRC/disnan.c new file mode 100644 index 0000000..77623a6 --- /dev/null +++ b/min-dgels/base/SRC/disnan.c @@ -0,0 +1,52 @@ +/* disnan.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +logical _starpu_disnan_(doublereal *din) +{ + /* System generated locals */ + logical ret_val; + + /* Local variables */ + extern logical _starpu_dlaisnan_(doublereal *, doublereal *); + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DISNAN returns .TRUE. if its argument is NaN, and .FALSE. */ +/* otherwise. To be replaced by the Fortran 2003 intrinsic in the */ +/* future. */ + +/* Arguments */ +/* ========= */ + +/* DIN (input) DOUBLE PRECISION */ +/* Input to test for NaN. */ + +/* ===================================================================== */ + +/* .. External Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + ret_val = _starpu_dlaisnan_(din, din); + return ret_val; +} /* _starpu_disnan_ */ diff --git a/min-dgels/base/SRC/dla_gbamv.c b/min-dgels/base/SRC/dla_gbamv.c new file mode 100644 index 0000000..be8343b --- /dev/null +++ b/min-dgels/base/SRC/dla_gbamv.c @@ -0,0 +1,316 @@ +/* _starpu_dla_gbamv.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dla_gbamv__(integer *trans, integer *m, integer *n, + integer *kl, integer *ku, doublereal *alpha, doublereal *ab, integer * + ldab, doublereal *x, integer *incx, doublereal *beta, doublereal *y, + integer *incy) +{ + /* System generated locals */ + integer ab_dim1, ab_offset, i__1, i__2, i__3, i__4; + doublereal d__1; + + /* Builtin functions */ + double d_sign(doublereal *, doublereal *); + + /* Local variables */ + extern integer _starpu_ilatrans_(char *); + integer i__, j; + logical symb_zero__; + integer kd, iy, jx, kx, ky, info; + doublereal temp; + integer lenx, leny; + doublereal safe1; + extern doublereal _starpu_dlamch_(char *); + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + + +/* -- LAPACK routine (version 3.2) -- */ +/* -- Contributed by James Demmel, Deaglan Halligan, Yozo Hida and -- */ +/* -- Jason Riedy of Univ. of California Berkeley. -- */ +/* -- November 2008 -- */ + +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley and NAG Ltd. -- */ + +/* .. */ +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLA_GEAMV performs one of the matrix-vector operations */ + +/* y := alpha*abs(A)*abs(x) + beta*abs(y), */ +/* or y := alpha*abs(A)'*abs(x) + beta*abs(y), */ + +/* where alpha and beta are scalars, x and y are vectors and A is an */ +/* m by n matrix. */ + +/* This function is primarily used in calculating error bounds. */ +/* To protect against underflow during evaluation, components in */ +/* the resulting vector are perturbed away from zero by (N+1) */ +/* times the underflow threshold. To prevent unnecessarily large */ +/* errors for block-structure embedded in general matrices, */ +/* "symbolically" zero components are not perturbed. A zero */ +/* entry is considered "symbolic" if all multiplications involved */ +/* in computing that entry have at least one zero multiplicand. */ + +/* Parameters */ +/* ========== */ + +/* TRANS - INTEGER */ +/* On entry, TRANS specifies the operation to be performed as */ +/* follows: */ + +/* BLAS_NO_TRANS y := alpha*abs(A)*abs(x) + beta*abs(y) */ +/* BLAS_TRANS y := alpha*abs(A')*abs(x) + beta*abs(y) */ +/* BLAS_CONJ_TRANS y := alpha*abs(A')*abs(x) + beta*abs(y) */ + +/* Unchanged on exit. */ + +/* M - INTEGER */ +/* On entry, M specifies the number of rows of the matrix A. */ +/* M must be at least zero. */ +/* Unchanged on exit. */ + +/* N - INTEGER */ +/* On entry, N specifies the number of columns of the matrix A. */ +/* N must be at least zero. */ +/* Unchanged on exit. */ + +/* KL - INTEGER */ +/* The number of subdiagonals within the band of A. KL >= 0. */ + +/* KU - INTEGER */ +/* The number of superdiagonals within the band of A. KU >= 0. */ + +/* ALPHA - DOUBLE PRECISION */ +/* On entry, ALPHA specifies the scalar alpha. */ +/* Unchanged on exit. */ + +/* A - DOUBLE PRECISION array of DIMENSION ( LDA, n ) */ +/* Before entry, the leading m by n part of the array A must */ +/* contain the matrix of coefficients. */ +/* Unchanged on exit. */ + +/* LDA - INTEGER */ +/* On entry, LDA specifies the first dimension of A as declared */ +/* in the calling (sub) program. LDA must be at least */ +/* max( 1, m ). */ +/* Unchanged on exit. */ + +/* X - DOUBLE PRECISION array of DIMENSION at least */ +/* ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' */ +/* and at least */ +/* ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. */ +/* Before entry, the incremented array X must contain the */ +/* vector x. */ +/* Unchanged on exit. */ + +/* INCX - INTEGER */ +/* On entry, INCX specifies the increment for the elements of */ +/* X. INCX must not be zero. */ +/* Unchanged on exit. */ + +/* BETA - DOUBLE PRECISION */ +/* On entry, BETA specifies the scalar beta. When BETA is */ +/* supplied as zero then Y need not be set on input. */ +/* Unchanged on exit. */ + +/* Y - DOUBLE PRECISION array of DIMENSION at least */ +/* ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' */ +/* and at least */ +/* ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. */ +/* Before entry with BETA non-zero, the incremented array Y */ +/* must contain the vector y. On exit, Y is overwritten by the */ +/* updated vector y. */ + +/* INCY - INTEGER */ +/* On entry, INCY specifies the increment for the elements of */ +/* Y. INCY must not be zero. */ +/* Unchanged on exit. */ + + +/* Level 2 Blas routine. */ +/* .. */ +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + ab_dim1 = *ldab; + ab_offset = 1 + ab_dim1; + ab -= ab_offset; + --x; + --y; + + /* Function Body */ + info = 0; + if (! (*trans == _starpu_ilatrans_("N") || *trans == _starpu_ilatrans_("T") || *trans == _starpu_ilatrans_("C"))) { + info = 1; + } else if (*m < 0) { + info = 2; + } else if (*n < 0) { + info = 3; + } else if (*kl < 0) { + info = 4; + } else if (*ku < 0) { + info = 5; + } else if (*ldab < *kl + *ku + 1) { + info = 6; + } else if (*incx == 0) { + info = 8; + } else if (*incy == 0) { + info = 11; + } + if (info != 0) { + _starpu_xerbla_("DLA_GBAMV ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*m == 0 || *n == 0 || *alpha == 0. && *beta == 1.) { + return 0; + } + +/* Set LENX and LENY, the lengths of the vectors x and y, and set */ +/* up the start points in X and Y. */ + + if (*trans == _starpu_ilatrans_("N")) { + lenx = *n; + leny = *m; + } else { + lenx = *m; + leny = *n; + } + if (*incx > 0) { + kx = 1; + } else { + kx = 1 - (lenx - 1) * *incx; + } + if (*incy > 0) { + ky = 1; + } else { + ky = 1 - (leny - 1) * *incy; + } + +/* Set SAFE1 essentially to be the underflow threshold times the */ +/* number of additions in each row. */ + + safe1 = _starpu_dlamch_("Safe minimum"); + safe1 = (*n + 1) * safe1; + +/* Form y := alpha*abs(A)*abs(x) + beta*abs(y). */ + +/* The O(M*N) SYMB_ZERO tests could be replaced by O(N) queries to */ +/* the inexact flag. Still doesn't help change the iteration order */ +/* to per-column. */ + + kd = *ku + 1; + iy = ky; + if (*incx == 1) { + i__1 = leny; + for (i__ = 1; i__ <= i__1; ++i__) { + if (*beta == 0.) { + symb_zero__ = TRUE_; + y[iy] = 0.; + } else if (y[iy] == 0.) { + symb_zero__ = TRUE_; + } else { + symb_zero__ = FALSE_; + y[iy] = *beta * (d__1 = y[iy], abs(d__1)); + } + if (*alpha != 0.) { +/* Computing MAX */ + i__2 = i__ - *ku; +/* Computing MIN */ + i__4 = i__ + *kl; + i__3 = min(i__4,lenx); + for (j = max(i__2,1); j <= i__3; ++j) { + if (*trans == _starpu_ilatrans_("N")) { + temp = (d__1 = ab[kd + i__ - j + j * ab_dim1], abs( + d__1)); + } else { + temp = (d__1 = ab[j + (kd + i__ - j) * ab_dim1], abs( + d__1)); + } + symb_zero__ = symb_zero__ && (x[j] == 0. || temp == 0.); + y[iy] += *alpha * (d__1 = x[j], abs(d__1)) * temp; + } + } + if (! symb_zero__) { + y[iy] += d_sign(&safe1, &y[iy]); + } + iy += *incy; + } + } else { + i__1 = leny; + for (i__ = 1; i__ <= i__1; ++i__) { + if (*beta == 0.) { + symb_zero__ = TRUE_; + y[iy] = 0.; + } else if (y[iy] == 0.) { + symb_zero__ = TRUE_; + } else { + symb_zero__ = FALSE_; + y[iy] = *beta * (d__1 = y[iy], abs(d__1)); + } + if (*alpha != 0.) { + jx = kx; +/* Computing MAX */ + i__3 = i__ - *ku; +/* Computing MIN */ + i__4 = i__ + *kl; + i__2 = min(i__4,lenx); + for (j = max(i__3,1); j <= i__2; ++j) { + if (*trans == _starpu_ilatrans_("N")) { + temp = (d__1 = ab[kd + i__ - j + j * ab_dim1], abs( + d__1)); + } else { + temp = (d__1 = ab[j + (kd + i__ - j) * ab_dim1], abs( + d__1)); + } + symb_zero__ = symb_zero__ && (x[jx] == 0. || temp == 0.); + y[iy] += *alpha * (d__1 = x[jx], abs(d__1)) * temp; + jx += *incx; + } + } + if (! symb_zero__) { + y[iy] += d_sign(&safe1, &y[iy]); + } + iy += *incy; + } + } + + return 0; + +/* End of DLA_GBAMV */ + +} /* _starpu_dla_gbamv__ */ diff --git a/min-dgels/base/SRC/dla_gbrcond.c b/min-dgels/base/SRC/dla_gbrcond.c new file mode 100644 index 0000000..b42aa8d --- /dev/null +++ b/min-dgels/base/SRC/dla_gbrcond.c @@ -0,0 +1,345 @@ +/* _starpu_dla_gbrcond.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +doublereal _starpu_dla_gbrcond__(char *trans, integer *n, integer *kl, integer *ku, + doublereal *ab, integer *ldab, doublereal *afb, integer *ldafb, + integer *ipiv, integer *cmode, doublereal *c__, integer *info, + doublereal *work, integer *iwork, ftnlen trans_len) +{ + /* System generated locals */ + integer ab_dim1, ab_offset, afb_dim1, afb_offset, i__1, i__2, i__3, i__4; + doublereal ret_val, d__1; + + /* Local variables */ + integer i__, j, kd, ke; + doublereal tmp; + integer kase; + extern logical _starpu_lsame_(char *, char *); + integer isave[3]; + extern /* Subroutine */ int _starpu_dlacn2_(integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *), _starpu_xerbla_(char *, + integer *), _starpu_dgbtrs_(char *, integer *, integer *, integer + *, integer *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *); + doublereal ainvnm; + logical notrans; + + +/* -- LAPACK routine (version 3.2.1) -- */ +/* -- Contributed by James Demmel, Deaglan Halligan, Yozo Hida and -- */ +/* -- Jason Riedy of Univ. of California Berkeley. -- */ +/* -- April 2009 -- */ + +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley and NAG Ltd. -- */ + +/* .. */ +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLA_GERCOND Estimates the Skeel condition number of op(A) * op2(C) */ +/* where op2 is determined by CMODE as follows */ +/* CMODE = 1 op2(C) = C */ +/* CMODE = 0 op2(C) = I */ +/* CMODE = -1 op2(C) = inv(C) */ +/* The Skeel condition number cond(A) = norminf( |inv(A)||A| ) */ +/* is computed by computing scaling factors R such that */ +/* diag(R)*A*op2(C) is row equilibrated and computing the standard */ +/* infinity-norm condition number. */ + +/* Arguments */ +/* ========= */ + +/* TRANS (input) CHARACTER*1 */ +/* Specifies the form of the system of equations: */ +/* = 'N': A * X = B (No transpose) */ +/* = 'T': A**T * X = B (Transpose) */ +/* = 'C': A**H * X = B (Conjugate Transpose = Transpose) */ + +/* N (input) INTEGER */ +/* The number of linear equations, i.e., the order of the */ +/* matrix A. N >= 0. */ + +/* KL (input) INTEGER */ +/* The number of subdiagonals within the band of A. KL >= 0. */ + +/* KU (input) INTEGER */ +/* The number of superdiagonals within the band of A. KU >= 0. */ + +/* AB (input) DOUBLE PRECISION array, dimension (LDAB,N) */ +/* On entry, the matrix A in band storage, in rows 1 to KL+KU+1. */ +/* The j-th column of A is stored in the j-th column of the */ +/* array AB as follows: */ +/* AB(KU+1+i-j,j) = A(i,j) for max(1,j-KU)<=i<=min(N,j+kl) */ + +/* LDAB (input) INTEGER */ +/* The leading dimension of the array AB. LDAB >= KL+KU+1. */ + +/* AFB (input) DOUBLE PRECISION array, dimension (LDAFB,N) */ +/* Details of the LU factorization of the band matrix A, as */ +/* computed by DGBTRF. U is stored as an upper triangular */ +/* band matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, */ +/* and the multipliers used during the factorization are stored */ +/* in rows KL+KU+2 to 2*KL+KU+1. */ + +/* LDAFB (input) INTEGER */ +/* The leading dimension of the array AFB. LDAFB >= 2*KL+KU+1. */ + +/* IPIV (input) INTEGER array, dimension (N) */ +/* The pivot indices from the factorization A = P*L*U */ +/* as computed by DGBTRF; row i of the matrix was interchanged */ +/* with row IPIV(i). */ + +/* CMODE (input) INTEGER */ +/* Determines op2(C) in the formula op(A) * op2(C) as follows: */ +/* CMODE = 1 op2(C) = C */ +/* CMODE = 0 op2(C) = I */ +/* CMODE = -1 op2(C) = inv(C) */ + +/* C (input) DOUBLE PRECISION array, dimension (N) */ +/* The vector C in the formula op(A) * op2(C). */ + +/* INFO (output) INTEGER */ +/* = 0: Successful exit. */ +/* i > 0: The ith argument is invalid. */ + +/* WORK (input) DOUBLE PRECISION array, dimension (5*N). */ +/* Workspace. */ + +/* IWORK (input) INTEGER array, dimension (N). */ +/* Workspace. */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + ab_dim1 = *ldab; + ab_offset = 1 + ab_dim1; + ab -= ab_offset; + afb_dim1 = *ldafb; + afb_offset = 1 + afb_dim1; + afb -= afb_offset; + --ipiv; + --c__; + --work; + --iwork; + + /* Function Body */ + ret_val = 0.; + + *info = 0; + notrans = _starpu_lsame_(trans, "N"); + if (! notrans && ! _starpu_lsame_(trans, "T") && ! _starpu_lsame_( + trans, "C")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*kl < 0 || *kl > *n - 1) { + *info = -3; + } else if (*ku < 0 || *ku > *n - 1) { + *info = -4; + } else if (*ldab < *kl + *ku + 1) { + *info = -6; + } else if (*ldafb < (*kl << 1) + *ku + 1) { + *info = -8; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DLA_GBRCOND", &i__1); + return ret_val; + } + if (*n == 0) { + ret_val = 1.; + return ret_val; + } + +/* Compute the equilibration matrix R such that */ +/* inv(R)*A*C has unit 1-norm. */ + + kd = *ku + 1; + ke = *kl + 1; + if (notrans) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + tmp = 0.; + if (*cmode == 1) { +/* Computing MAX */ + i__2 = i__ - *kl; +/* Computing MIN */ + i__4 = i__ + *ku; + i__3 = min(i__4,*n); + for (j = max(i__2,1); j <= i__3; ++j) { + tmp += (d__1 = ab[kd + i__ - j + j * ab_dim1] * c__[j], + abs(d__1)); + } + } else if (*cmode == 0) { +/* Computing MAX */ + i__3 = i__ - *kl; +/* Computing MIN */ + i__4 = i__ + *ku; + i__2 = min(i__4,*n); + for (j = max(i__3,1); j <= i__2; ++j) { + tmp += (d__1 = ab[kd + i__ - j + j * ab_dim1], abs(d__1)); + } + } else { +/* Computing MAX */ + i__2 = i__ - *kl; +/* Computing MIN */ + i__4 = i__ + *ku; + i__3 = min(i__4,*n); + for (j = max(i__2,1); j <= i__3; ++j) { + tmp += (d__1 = ab[kd + i__ - j + j * ab_dim1] / c__[j], + abs(d__1)); + } + } + work[(*n << 1) + i__] = tmp; + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + tmp = 0.; + if (*cmode == 1) { +/* Computing MAX */ + i__3 = i__ - *kl; +/* Computing MIN */ + i__4 = i__ + *ku; + i__2 = min(i__4,*n); + for (j = max(i__3,1); j <= i__2; ++j) { + tmp += (d__1 = ab[ke - i__ + j + i__ * ab_dim1] * c__[j], + abs(d__1)); + } + } else if (*cmode == 0) { +/* Computing MAX */ + i__2 = i__ - *kl; +/* Computing MIN */ + i__4 = i__ + *ku; + i__3 = min(i__4,*n); + for (j = max(i__2,1); j <= i__3; ++j) { + tmp += (d__1 = ab[ke - i__ + j + i__ * ab_dim1], abs(d__1) + ); + } + } else { +/* Computing MAX */ + i__3 = i__ - *kl; +/* Computing MIN */ + i__4 = i__ + *ku; + i__2 = min(i__4,*n); + for (j = max(i__3,1); j <= i__2; ++j) { + tmp += (d__1 = ab[ke - i__ + j + i__ * ab_dim1] / c__[j], + abs(d__1)); + } + } + work[(*n << 1) + i__] = tmp; + } + } + +/* Estimate the norm of inv(op(A)). */ + + ainvnm = 0.; + kase = 0; +L10: + _starpu_dlacn2_(n, &work[*n + 1], &work[1], &iwork[1], &ainvnm, &kase, isave); + if (kase != 0) { + if (kase == 2) { + +/* Multiply by R. */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + work[i__] *= work[(*n << 1) + i__]; + } + if (notrans) { + _starpu_dgbtrs_("No transpose", n, kl, ku, &c__1, &afb[afb_offset], + ldafb, &ipiv[1], &work[1], n, info); + } else { + _starpu_dgbtrs_("Transpose", n, kl, ku, &c__1, &afb[afb_offset], + ldafb, &ipiv[1], &work[1], n, info); + } + +/* Multiply by inv(C). */ + + if (*cmode == 1) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + work[i__] /= c__[i__]; + } + } else if (*cmode == -1) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + work[i__] *= c__[i__]; + } + } + } else { + +/* Multiply by inv(C'). */ + + if (*cmode == 1) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + work[i__] /= c__[i__]; + } + } else if (*cmode == -1) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + work[i__] *= c__[i__]; + } + } + if (notrans) { + _starpu_dgbtrs_("Transpose", n, kl, ku, &c__1, &afb[afb_offset], + ldafb, &ipiv[1], &work[1], n, info); + } else { + _starpu_dgbtrs_("No transpose", n, kl, ku, &c__1, &afb[afb_offset], + ldafb, &ipiv[1], &work[1], n, info); + } + +/* Multiply by R. */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + work[i__] *= work[(*n << 1) + i__]; + } + } + goto L10; + } + +/* Compute the estimate of the reciprocal condition number. */ + + if (ainvnm != 0.) { + ret_val = 1. / ainvnm; + } + + return ret_val; + +} /* _starpu_dla_gbrcond__ */ diff --git a/min-dgels/base/SRC/dla_gbrfsx_extended.c b/min-dgels/base/SRC/dla_gbrfsx_extended.c new file mode 100644 index 0000000..972693d --- /dev/null +++ b/min-dgels/base/SRC/dla_gbrfsx_extended.c @@ -0,0 +1,630 @@ +/* _starpu_dla_gbrfsx_extended.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static doublereal c_b6 = -1.; +static doublereal c_b8 = 1.; + +/* Subroutine */ int _starpu_dla_gbrfsx_extended__(integer *prec_type__, integer * + trans_type__, integer *n, integer *kl, integer *ku, integer *nrhs, + doublereal *ab, integer *ldab, doublereal *afb, integer *ldafb, + integer *ipiv, logical *colequ, doublereal *c__, doublereal *b, + integer *ldb, doublereal *y, integer *ldy, doublereal *berr_out__, + integer *n_norms__, doublereal *err_bnds_norm__, doublereal * + err_bnds_comp__, doublereal *res, doublereal *ayb, doublereal *dy, + doublereal *y_tail__, doublereal *rcond, integer *ithresh, doublereal + *rthresh, doublereal *dz_ub__, logical *ignore_cwise__, integer *info) +{ + /* System generated locals */ + integer ab_dim1, ab_offset, afb_dim1, afb_offset, b_dim1, b_offset, + y_dim1, y_offset, err_bnds_norm_dim1, err_bnds_norm_offset, + err_bnds_comp_dim1, err_bnds_comp_offset, i__1, i__2, i__3; + doublereal d__1, d__2; + char ch__1[1]; + + /* Local variables */ + doublereal dxratmax, dzratmax; + integer i__, j, m; + extern /* Subroutine */ int _starpu_dla_gbamv__(integer *, integer *, integer *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *); + logical incr_prec__; + doublereal prev_dz_z__, yk, final_dx_x__; + extern /* Subroutine */ int _starpu_dla_wwaddw__(integer *, doublereal *, + doublereal *, doublereal *); + doublereal final_dz_z__, prevnormdx; + integer cnt; + doublereal dyk, eps, incr_thresh__, dx_x__, dz_z__; + extern /* Subroutine */ int _starpu_dla_lin_berr__(integer *, integer *, integer * + , doublereal *, doublereal *, doublereal *); + doublereal ymin; + extern /* Subroutine */ int _starpu_blas_dgbmv_x__(integer *, integer *, integer * + , integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + integer *); + integer y_prec_state__; + extern /* Subroutine */ int blas_dgbmv2_x__(integer *, integer *, integer + *, integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, doublereal *, integer *, doublereal *, doublereal *, + integer *, integer *), _starpu_dgbmv_(char *, integer *, integer *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_dcopy_(integer *, doublereal *, integer *, doublereal *, + integer *); + doublereal dxrat, dzrat; + extern /* Subroutine */ int _starpu_daxpy_(integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *); + char trans[1]; + doublereal normx, normy; + extern doublereal _starpu_dlamch_(char *); + extern /* Subroutine */ int _starpu_dgbtrs_(char *, integer *, integer *, integer + *, integer *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *); + doublereal normdx; + extern /* Character */ VOID _starpu_chla_transtype__(char *, ftnlen, integer *); + doublereal hugeval; + integer x_state__, z_state__; + + +/* -- LAPACK routine (version 3.2.1) -- */ +/* -- Contributed by James Demmel, Deaglan Halligan, Yozo Hida and -- */ +/* -- Jason Riedy of Univ. of California Berkeley. -- */ +/* -- April 2009 -- */ + +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley and NAG Ltd. -- */ + +/* .. */ +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLA_GBRFSX_EXTENDED improves the computed solution to a system of */ +/* linear equations by performing extra-precise iterative refinement */ +/* and provides error bounds and backward error estimates for the solution. */ +/* This subroutine is called by DGBRFSX to perform iterative refinement. */ +/* In addition to normwise error bound, the code provides maximum */ +/* componentwise error bound if possible. See comments for ERR_BNDS_NORM */ +/* and ERR_BNDS_COMP for details of the error bounds. Note that this */ +/* subroutine is only resonsible for setting the second fields of */ +/* ERR_BNDS_NORM and ERR_BNDS_COMP. */ + +/* Arguments */ +/* ========= */ + +/* PREC_TYPE (input) INTEGER */ +/* Specifies the intermediate precision to be used in refinement. */ +/* The value is defined by ILAPREC(P) where P is a CHARACTER and */ +/* P = 'S': Single */ +/* = 'D': Double */ +/* = 'I': Indigenous */ +/* = 'X', 'E': Extra */ + +/* TRANS_TYPE (input) INTEGER */ +/* Specifies the transposition operation on A. */ +/* The value is defined by ILATRANS(T) where T is a CHARACTER and */ +/* T = 'N': No transpose */ +/* = 'T': Transpose */ +/* = 'C': Conjugate transpose */ + +/* N (input) INTEGER */ +/* The number of linear equations, i.e., the order of the */ +/* matrix A. N >= 0. */ + +/* KL (input) INTEGER */ +/* The number of subdiagonals within the band of A. KL >= 0. */ + +/* KU (input) INTEGER */ +/* The number of superdiagonals within the band of A. KU >= 0 */ + +/* NRHS (input) INTEGER */ +/* The number of right-hand-sides, i.e., the number of columns of the */ +/* matrix B. */ + +/* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the N-by-N matrix A. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,N). */ + +/* AF (input) DOUBLE PRECISION array, dimension (LDAF,N) */ +/* The factors L and U from the factorization */ +/* A = P*L*U as computed by DGBTRF. */ + +/* LDAF (input) INTEGER */ +/* The leading dimension of the array AF. LDAF >= max(1,N). */ + +/* IPIV (input) INTEGER array, dimension (N) */ +/* The pivot indices from the factorization A = P*L*U */ +/* as computed by DGBTRF; row i of the matrix was interchanged */ +/* with row IPIV(i). */ + +/* COLEQU (input) LOGICAL */ +/* If .TRUE. then column equilibration was done to A before calling */ +/* this routine. This is needed to compute the solution and error */ +/* bounds correctly. */ + +/* C (input) DOUBLE PRECISION array, dimension (N) */ +/* The column scale factors for A. If COLEQU = .FALSE., C */ +/* is not accessed. If C is input, each element of C should be a power */ +/* of the radix to ensure a reliable solution and error estimates. */ +/* Scaling by powers of the radix does not cause rounding errors unless */ +/* the result underflows or overflows. Rounding errors during scaling */ +/* lead to refining with a matrix that is not equivalent to the */ +/* input matrix, producing error estimates that may not be */ +/* reliable. */ + +/* B (input) DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* The right-hand-side matrix B. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the array B. LDB >= max(1,N). */ + +/* Y (input/output) DOUBLE PRECISION array, dimension */ +/* (LDY,NRHS) */ +/* On entry, the solution matrix X, as computed by DGBTRS. */ +/* On exit, the improved solution matrix Y. */ + +/* LDY (input) INTEGER */ +/* The leading dimension of the array Y. LDY >= max(1,N). */ + +/* BERR_OUT (output) DOUBLE PRECISION array, dimension (NRHS) */ +/* On exit, BERR_OUT(j) contains the componentwise relative backward */ +/* error for right-hand-side j from the formula */ +/* max(i) ( abs(RES(i)) / ( abs(op(A_s))*abs(Y) + abs(B_s) )(i) ) */ +/* where abs(Z) is the componentwise absolute value of the matrix */ +/* or vector Z. This is computed by DLA_LIN_BERR. */ + +/* N_NORMS (input) INTEGER */ +/* Determines which error bounds to return (see ERR_BNDS_NORM */ +/* and ERR_BNDS_COMP). */ +/* If N_NORMS >= 1 return normwise error bounds. */ +/* If N_NORMS >= 2 return componentwise error bounds. */ + +/* ERR_BNDS_NORM (input/output) DOUBLE PRECISION array, dimension */ +/* (NRHS, N_ERR_BNDS) */ +/* For each right-hand side, this array contains information about */ +/* various error bounds and condition numbers corresponding to the */ +/* normwise relative error, which is defined as follows: */ + +/* Normwise relative error in the ith solution vector: */ +/* max_j (abs(XTRUE(j,i) - X(j,i))) */ +/* ------------------------------ */ +/* max_j abs(X(j,i)) */ + +/* The array is indexed by the type of error information as described */ +/* below. There currently are up to three pieces of information */ +/* returned. */ + +/* The first index in ERR_BNDS_NORM(i,:) corresponds to the ith */ +/* right-hand side. */ + +/* The second index in ERR_BNDS_NORM(:,err) contains the following */ +/* three fields: */ +/* err = 1 "Trust/don't trust" boolean. Trust the answer if the */ +/* reciprocal condition number is less than the threshold */ +/* sqrt(n) * slamch('Epsilon'). */ + +/* err = 2 "Guaranteed" error bound: The estimated forward error, */ +/* almost certainly within a factor of 10 of the true error */ +/* so long as the next entry is greater than the threshold */ +/* sqrt(n) * slamch('Epsilon'). This error bound should only */ +/* be trusted if the previous boolean is true. */ + +/* err = 3 Reciprocal condition number: Estimated normwise */ +/* reciprocal condition number. Compared with the threshold */ +/* sqrt(n) * slamch('Epsilon') to determine if the error */ +/* estimate is "guaranteed". These reciprocal condition */ +/* numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some */ +/* appropriately scaled matrix Z. */ +/* Let Z = S*A, where S scales each row by a power of the */ +/* radix so all absolute row sums of Z are approximately 1. */ + +/* This subroutine is only responsible for setting the second field */ +/* above. */ +/* See Lapack Working Note 165 for further details and extra */ +/* cautions. */ + +/* ERR_BNDS_COMP (input/output) DOUBLE PRECISION array, dimension */ +/* (NRHS, N_ERR_BNDS) */ +/* For each right-hand side, this array contains information about */ +/* various error bounds and condition numbers corresponding to the */ +/* componentwise relative error, which is defined as follows: */ + +/* Componentwise relative error in the ith solution vector: */ +/* abs(XTRUE(j,i) - X(j,i)) */ +/* max_j ---------------------- */ +/* abs(X(j,i)) */ + +/* The array is indexed by the right-hand side i (on which the */ +/* componentwise relative error depends), and the type of error */ +/* information as described below. There currently are up to three */ +/* pieces of information returned for each right-hand side. If */ +/* componentwise accuracy is not requested (PARAMS(3) = 0.0), then */ +/* ERR_BNDS_COMP is not accessed. If N_ERR_BNDS .LT. 3, then at most */ +/* the first (:,N_ERR_BNDS) entries are returned. */ + +/* The first index in ERR_BNDS_COMP(i,:) corresponds to the ith */ +/* right-hand side. */ + +/* The second index in ERR_BNDS_COMP(:,err) contains the following */ +/* three fields: */ +/* err = 1 "Trust/don't trust" boolean. Trust the answer if the */ +/* reciprocal condition number is less than the threshold */ +/* sqrt(n) * slamch('Epsilon'). */ + +/* err = 2 "Guaranteed" error bound: The estimated forward error, */ +/* almost certainly within a factor of 10 of the true error */ +/* so long as the next entry is greater than the threshold */ +/* sqrt(n) * slamch('Epsilon'). This error bound should only */ +/* be trusted if the previous boolean is true. */ + +/* err = 3 Reciprocal condition number: Estimated componentwise */ +/* reciprocal condition number. Compared with the threshold */ +/* sqrt(n) * slamch('Epsilon') to determine if the error */ +/* estimate is "guaranteed". These reciprocal condition */ +/* numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some */ +/* appropriately scaled matrix Z. */ +/* Let Z = S*(A*diag(x)), where x is the solution for the */ +/* current right-hand side and S scales each row of */ +/* A*diag(x) by a power of the radix so all absolute row */ +/* sums of Z are approximately 1. */ + +/* This subroutine is only responsible for setting the second field */ +/* above. */ +/* See Lapack Working Note 165 for further details and extra */ +/* cautions. */ + +/* RES (input) DOUBLE PRECISION array, dimension (N) */ +/* Workspace to hold the intermediate residual. */ + +/* AYB (input) DOUBLE PRECISION array, dimension (N) */ +/* Workspace. This can be the same workspace passed for Y_TAIL. */ + +/* DY (input) DOUBLE PRECISION array, dimension (N) */ +/* Workspace to hold the intermediate solution. */ + +/* Y_TAIL (input) DOUBLE PRECISION array, dimension (N) */ +/* Workspace to hold the trailing bits of the intermediate solution. */ + +/* RCOND (input) DOUBLE PRECISION */ +/* Reciprocal scaled condition number. This is an estimate of the */ +/* reciprocal Skeel condition number of the matrix A after */ +/* equilibration (if done). If this is less than the machine */ +/* precision (in particular, if it is zero), the matrix is singular */ +/* to working precision. Note that the error may still be small even */ +/* if this number is very small and the matrix appears ill- */ +/* conditioned. */ + +/* ITHRESH (input) INTEGER */ +/* The maximum number of residual computations allowed for */ +/* refinement. The default is 10. For 'aggressive' set to 100 to */ +/* permit convergence using approximate factorizations or */ +/* factorizations other than LU. If the factorization uses a */ +/* technique other than Gaussian elimination, the guarantees in */ +/* ERR_BNDS_NORM and ERR_BNDS_COMP may no longer be trustworthy. */ + +/* RTHRESH (input) DOUBLE PRECISION */ +/* Determines when to stop refinement if the error estimate stops */ +/* decreasing. Refinement will stop when the next solution no longer */ +/* satisfies norm(dx_{i+1}) < RTHRESH * norm(dx_i) where norm(Z) is */ +/* the infinity norm of Z. RTHRESH satisfies 0 < RTHRESH <= 1. The */ +/* default value is 0.5. For 'aggressive' set to 0.9 to permit */ +/* convergence on extremely ill-conditioned matrices. See LAWN 165 */ +/* for more details. */ + +/* DZ_UB (input) DOUBLE PRECISION */ +/* Determines when to start considering componentwise convergence. */ +/* Componentwise convergence is only considered after each component */ +/* of the solution Y is stable, which we definte as the relative */ +/* change in each component being less than DZ_UB. The default value */ +/* is 0.25, requiring the first bit to be stable. See LAWN 165 for */ +/* more details. */ + +/* IGNORE_CWISE (input) LOGICAL */ +/* If .TRUE. then ignore componentwise convergence. Default value */ +/* is .FALSE.. */ + +/* INFO (output) INTEGER */ +/* = 0: Successful exit. */ +/* < 0: if INFO = -i, the ith argument to DGBTRS had an illegal */ +/* value */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. Parameters .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + err_bnds_comp_dim1 = *nrhs; + err_bnds_comp_offset = 1 + err_bnds_comp_dim1; + err_bnds_comp__ -= err_bnds_comp_offset; + err_bnds_norm_dim1 = *nrhs; + err_bnds_norm_offset = 1 + err_bnds_norm_dim1; + err_bnds_norm__ -= err_bnds_norm_offset; + ab_dim1 = *ldab; + ab_offset = 1 + ab_dim1; + ab -= ab_offset; + afb_dim1 = *ldafb; + afb_offset = 1 + afb_dim1; + afb -= afb_offset; + --ipiv; + --c__; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + y_dim1 = *ldy; + y_offset = 1 + y_dim1; + y -= y_offset; + --berr_out__; + --res; + --ayb; + --dy; + --y_tail__; + + /* Function Body */ + if (*info != 0) { + return 0; + } + _starpu_chla_transtype__(ch__1, (ftnlen)1, trans_type__); + *(unsigned char *)trans = *(unsigned char *)&ch__1[0]; + eps = _starpu_dlamch_("Epsilon"); + hugeval = _starpu_dlamch_("Overflow"); +/* Force HUGEVAL to Inf */ + hugeval *= hugeval; +/* Using HUGEVAL may lead to spurious underflows. */ + incr_thresh__ = (doublereal) (*n) * eps; + m = *kl + *ku + 1; + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + y_prec_state__ = 1; + if (y_prec_state__ == 2) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + y_tail__[i__] = 0.; + } + } + dxrat = 0.; + dxratmax = 0.; + dzrat = 0.; + dzratmax = 0.; + final_dx_x__ = hugeval; + final_dz_z__ = hugeval; + prevnormdx = hugeval; + prev_dz_z__ = hugeval; + dz_z__ = hugeval; + dx_x__ = hugeval; + x_state__ = 1; + z_state__ = 0; + incr_prec__ = FALSE_; + i__2 = *ithresh; + for (cnt = 1; cnt <= i__2; ++cnt) { + +/* Compute residual RES = B_s - op(A_s) * Y, */ +/* op(A) = A, A**T, or A**H depending on TRANS (and type). */ + + _starpu_dcopy_(n, &b[j * b_dim1 + 1], &c__1, &res[1], &c__1); + if (y_prec_state__ == 0) { + _starpu_dgbmv_(trans, &m, n, kl, ku, &c_b6, &ab[ab_offset], ldab, &y[ + j * y_dim1 + 1], &c__1, &c_b8, &res[1], &c__1); + } else if (y_prec_state__ == 1) { + _starpu_blas_dgbmv_x__(trans_type__, n, n, kl, ku, &c_b6, &ab[ + ab_offset], ldab, &y[j * y_dim1 + 1], &c__1, &c_b8, & + res[1], &c__1, prec_type__); + } else { + blas_dgbmv2_x__(trans_type__, n, n, kl, ku, &c_b6, &ab[ + ab_offset], ldab, &y[j * y_dim1 + 1], &y_tail__[1], & + c__1, &c_b8, &res[1], &c__1, prec_type__); + } +/* XXX: RES is no longer needed. */ + _starpu_dcopy_(n, &res[1], &c__1, &dy[1], &c__1); + _starpu_dgbtrs_(trans, n, kl, ku, &c__1, &afb[afb_offset], ldafb, &ipiv[1] +, &dy[1], n, info); + +/* Calculate relative changes DX_X, DZ_Z and ratios DXRAT, DZRAT. */ + + normx = 0.; + normy = 0.; + normdx = 0.; + dz_z__ = 0.; + ymin = hugeval; + i__3 = *n; + for (i__ = 1; i__ <= i__3; ++i__) { + yk = (d__1 = y[i__ + j * y_dim1], abs(d__1)); + dyk = (d__1 = dy[i__], abs(d__1)); + if (yk != 0.) { +/* Computing MAX */ + d__1 = dz_z__, d__2 = dyk / yk; + dz_z__ = max(d__1,d__2); + } else if (dyk != 0.) { + dz_z__ = hugeval; + } + ymin = min(ymin,yk); + normy = max(normy,yk); + if (*colequ) { +/* Computing MAX */ + d__1 = normx, d__2 = yk * c__[i__]; + normx = max(d__1,d__2); +/* Computing MAX */ + d__1 = normdx, d__2 = dyk * c__[i__]; + normdx = max(d__1,d__2); + } else { + normx = normy; + normdx = max(normdx,dyk); + } + } + if (normx != 0.) { + dx_x__ = normdx / normx; + } else if (normdx == 0.) { + dx_x__ = 0.; + } else { + dx_x__ = hugeval; + } + dxrat = normdx / prevnormdx; + dzrat = dz_z__ / prev_dz_z__; + +/* Check termination criteria. */ + + if (! (*ignore_cwise__) && ymin * *rcond < incr_thresh__ * normy + && y_prec_state__ < 2) { + incr_prec__ = TRUE_; + } + if (x_state__ == 3 && dxrat <= *rthresh) { + x_state__ = 1; + } + if (x_state__ == 1) { + if (dx_x__ <= eps) { + x_state__ = 2; + } else if (dxrat > *rthresh) { + if (y_prec_state__ != 2) { + incr_prec__ = TRUE_; + } else { + x_state__ = 3; + } + } else { + if (dxrat > dxratmax) { + dxratmax = dxrat; + } + } + if (x_state__ > 1) { + final_dx_x__ = dx_x__; + } + } + if (z_state__ == 0 && dz_z__ <= *dz_ub__) { + z_state__ = 1; + } + if (z_state__ == 3 && dzrat <= *rthresh) { + z_state__ = 1; + } + if (z_state__ == 1) { + if (dz_z__ <= eps) { + z_state__ = 2; + } else if (dz_z__ > *dz_ub__) { + z_state__ = 0; + dzratmax = 0.; + final_dz_z__ = hugeval; + } else if (dzrat > *rthresh) { + if (y_prec_state__ != 2) { + incr_prec__ = TRUE_; + } else { + z_state__ = 3; + } + } else { + if (dzrat > dzratmax) { + dzratmax = dzrat; + } + } + if (z_state__ > 1) { + final_dz_z__ = dz_z__; + } + } + +/* Exit if both normwise and componentwise stopped working, */ +/* but if componentwise is unstable, let it go at least two */ +/* iterations. */ + + if (x_state__ != 1) { + if (*ignore_cwise__) { + goto L666; + } + if (z_state__ == 3 || z_state__ == 2) { + goto L666; + } + if (z_state__ == 0 && cnt > 1) { + goto L666; + } + } + if (incr_prec__) { + incr_prec__ = FALSE_; + ++y_prec_state__; + i__3 = *n; + for (i__ = 1; i__ <= i__3; ++i__) { + y_tail__[i__] = 0.; + } + } + prevnormdx = normdx; + prev_dz_z__ = dz_z__; + +/* Update soluton. */ + + if (y_prec_state__ < 2) { + _starpu_daxpy_(n, &c_b8, &dy[1], &c__1, &y[j * y_dim1 + 1], &c__1); + } else { + _starpu_dla_wwaddw__(n, &y[j * y_dim1 + 1], &y_tail__[1], &dy[1]); + } + } +/* Target of "IF (Z_STOP .AND. X_STOP)". Sun's f77 won't EXIT. */ +L666: + +/* Set final_* when cnt hits ithresh. */ + + if (x_state__ == 1) { + final_dx_x__ = dx_x__; + } + if (z_state__ == 1) { + final_dz_z__ = dz_z__; + } + +/* Compute error bounds. */ + + if (*n_norms__ >= 1) { + err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] = final_dx_x__ / ( + 1 - dxratmax); + } + if (*n_norms__ >= 2) { + err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] = final_dz_z__ / ( + 1 - dzratmax); + } + +/* Compute componentwise relative backward error from formula */ +/* max(i) ( abs(R(i)) / ( abs(op(A_s))*abs(Y) + abs(B_s) )(i) ) */ +/* where abs(Z) is the componentwise absolute value of the matrix */ +/* or vector Z. */ + +/* Compute residual RES = B_s - op(A_s) * Y, */ +/* op(A) = A, A**T, or A**H depending on TRANS (and type). */ + + _starpu_dcopy_(n, &b[j * b_dim1 + 1], &c__1, &res[1], &c__1); + _starpu_dgbmv_(trans, n, n, kl, ku, &c_b6, &ab[ab_offset], ldab, &y[j * + y_dim1 + 1], &c__1, &c_b8, &res[1], &c__1); + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + ayb[i__] = (d__1 = b[i__ + j * b_dim1], abs(d__1)); + } + +/* Compute abs(op(A_s))*abs(Y) + abs(B_s). */ + + _starpu_dla_gbamv__(trans_type__, n, n, kl, ku, &c_b8, &ab[ab_offset], ldab, & + y[j * y_dim1 + 1], &c__1, &c_b8, &ayb[1], &c__1); + _starpu_dla_lin_berr__(n, n, &c__1, &res[1], &ayb[1], &berr_out__[j]); + +/* End of loop for each RHS */ + + } + + return 0; +} /* _starpu_dla_gbrfsx_extended__ */ diff --git a/min-dgels/base/SRC/dla_gbrpvgrw.c b/min-dgels/base/SRC/dla_gbrpvgrw.c new file mode 100644 index 0000000..c968db2 --- /dev/null +++ b/min-dgels/base/SRC/dla_gbrpvgrw.c @@ -0,0 +1,136 @@ +/* _starpu_dla_gbrpvgrw.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +doublereal _starpu_dla_gbrpvgrw__(integer *n, integer *kl, integer *ku, integer * + ncols, doublereal *ab, integer *ldab, doublereal *afb, integer *ldafb) +{ + /* System generated locals */ + integer ab_dim1, ab_offset, afb_dim1, afb_offset, i__1, i__2, i__3, i__4; + doublereal ret_val, d__1, d__2; + + /* Local variables */ + integer i__, j, kd; + doublereal amax, umax, rpvgrw; + + +/* -- LAPACK routine (version 3.2.1) -- */ +/* -- Contributed by James Demmel, Deaglan Halligan, Yozo Hida and -- */ +/* -- Jason Riedy of Univ. of California Berkeley. -- */ +/* -- April 2009 -- */ + +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley and NAG Ltd. -- */ + +/* .. */ +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLA_GBRPVGRW computes the reciprocal pivot growth factor */ +/* norm(A)/norm(U). The "max absolute element" norm is used. If this is */ +/* much less than 1, the stability of the LU factorization of the */ +/* (equilibrated) matrix A could be poor. This also means that the */ +/* solution X, estimated condition numbers, and error bounds could be */ +/* unreliable. */ + +/* Arguments */ +/* ========= */ + +/* N (input) INTEGER */ +/* The number of linear equations, i.e., the order of the */ +/* matrix A. N >= 0. */ + +/* KL (input) INTEGER */ +/* The number of subdiagonals within the band of A. KL >= 0. */ + +/* KU (input) INTEGER */ +/* The number of superdiagonals within the band of A. KU >= 0. */ + +/* NCOLS (input) INTEGER */ +/* The number of columns of the matrix A. NCOLS >= 0. */ + +/* AB (input) DOUBLE PRECISION array, dimension (LDAB,N) */ +/* On entry, the matrix A in band storage, in rows 1 to KL+KU+1. */ +/* The j-th column of A is stored in the j-th column of the */ +/* array AB as follows: */ +/* AB(KU+1+i-j,j) = A(i,j) for max(1,j-KU)<=i<=min(N,j+kl) */ + +/* LDAB (input) INTEGER */ +/* The leading dimension of the array AB. LDAB >= KL+KU+1. */ + +/* AFB (input) DOUBLE PRECISION array, dimension (LDAFB,N) */ +/* Details of the LU factorization of the band matrix A, as */ +/* computed by DGBTRF. U is stored as an upper triangular */ +/* band matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, */ +/* and the multipliers used during the factorization are stored */ +/* in rows KL+KU+2 to 2*KL+KU+1. */ + +/* LDAFB (input) INTEGER */ +/* The leading dimension of the array AFB. LDAFB >= 2*KL+KU+1. */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + ab_dim1 = *ldab; + ab_offset = 1 + ab_dim1; + ab -= ab_offset; + afb_dim1 = *ldafb; + afb_offset = 1 + afb_dim1; + afb -= afb_offset; + + /* Function Body */ + rpvgrw = 1.; + kd = *ku + 1; + i__1 = *ncols; + for (j = 1; j <= i__1; ++j) { + amax = 0.; + umax = 0.; +/* Computing MAX */ + i__2 = j - *ku; +/* Computing MIN */ + i__4 = j + *kl; + i__3 = min(i__4,*n); + for (i__ = max(i__2,1); i__ <= i__3; ++i__) { +/* Computing MAX */ + d__2 = (d__1 = ab[kd + i__ - j + j * ab_dim1], abs(d__1)); + amax = max(d__2,amax); + } +/* Computing MAX */ + i__3 = j - *ku; + i__2 = j; + for (i__ = max(i__3,1); i__ <= i__2; ++i__) { +/* Computing MAX */ + d__2 = (d__1 = afb[kd + i__ - j + j * afb_dim1], abs(d__1)); + umax = max(d__2,umax); + } + if (umax != 0.) { +/* Computing MIN */ + d__1 = amax / umax; + rpvgrw = min(d__1,rpvgrw); + } + } + ret_val = rpvgrw; + return ret_val; +} /* _starpu_dla_gbrpvgrw__ */ diff --git a/min-dgels/base/SRC/dla_geamv.c b/min-dgels/base/SRC/dla_geamv.c new file mode 100644 index 0000000..eee6080 --- /dev/null +++ b/min-dgels/base/SRC/dla_geamv.c @@ -0,0 +1,293 @@ +/* _starpu_dla_geamv.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dla_geamv__(integer *trans, integer *m, integer *n, + doublereal *alpha, doublereal *a, integer *lda, doublereal *x, + integer *incx, doublereal *beta, doublereal *y, integer *incy) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + doublereal d__1; + + /* Builtin functions */ + double d_sign(doublereal *, doublereal *); + + /* Local variables */ + extern integer _starpu_ilatrans_(char *); + integer i__, j; + logical symb_zero__; + integer iy, jx, kx, ky, info; + doublereal temp; + integer lenx, leny; + doublereal safe1; + extern doublereal _starpu_dlamch_(char *); + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + + +/* -- LAPACK routine (version 3.2) -- */ +/* -- Contributed by James Demmel, Deaglan Halligan, Yozo Hida and -- */ +/* -- Jason Riedy of Univ. of California Berkeley. -- */ +/* -- November 2008 -- */ + +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley and NAG Ltd. -- */ + +/* .. */ +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLA_GEAMV performs one of the matrix-vector operations */ + +/* y := alpha*abs(A)*abs(x) + beta*abs(y), */ +/* or y := alpha*abs(A)'*abs(x) + beta*abs(y), */ + +/* where alpha and beta are scalars, x and y are vectors and A is an */ +/* m by n matrix. */ + +/* This function is primarily used in calculating error bounds. */ +/* To protect against underflow during evaluation, components in */ +/* the resulting vector are perturbed away from zero by (N+1) */ +/* times the underflow threshold. To prevent unnecessarily large */ +/* errors for block-structure embedded in general matrices, */ +/* "symbolically" zero components are not perturbed. A zero */ +/* entry is considered "symbolic" if all multiplications involved */ +/* in computing that entry have at least one zero multiplicand. */ + +/* Parameters */ +/* ========== */ + +/* TRANS - INTEGER */ +/* On entry, TRANS specifies the operation to be performed as */ +/* follows: */ + +/* BLAS_NO_TRANS y := alpha*abs(A)*abs(x) + beta*abs(y) */ +/* BLAS_TRANS y := alpha*abs(A')*abs(x) + beta*abs(y) */ +/* BLAS_CONJ_TRANS y := alpha*abs(A')*abs(x) + beta*abs(y) */ + +/* Unchanged on exit. */ + +/* M - INTEGER */ +/* On entry, M specifies the number of rows of the matrix A. */ +/* M must be at least zero. */ +/* Unchanged on exit. */ + +/* N - INTEGER */ +/* On entry, N specifies the number of columns of the matrix A. */ +/* N must be at least zero. */ +/* Unchanged on exit. */ + +/* ALPHA - DOUBLE PRECISION */ +/* On entry, ALPHA specifies the scalar alpha. */ +/* Unchanged on exit. */ + +/* A - DOUBLE PRECISION array of DIMENSION ( LDA, n ) */ +/* Before entry, the leading m by n part of the array A must */ +/* contain the matrix of coefficients. */ +/* Unchanged on exit. */ + +/* LDA - INTEGER */ +/* On entry, LDA specifies the first dimension of A as declared */ +/* in the calling (sub) program. LDA must be at least */ +/* max( 1, m ). */ +/* Unchanged on exit. */ + +/* X - DOUBLE PRECISION array of DIMENSION at least */ +/* ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' */ +/* and at least */ +/* ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. */ +/* Before entry, the incremented array X must contain the */ +/* vector x. */ +/* Unchanged on exit. */ + +/* INCX - INTEGER */ +/* On entry, INCX specifies the increment for the elements of */ +/* X. INCX must not be zero. */ +/* Unchanged on exit. */ + +/* BETA - DOUBLE PRECISION */ +/* On entry, BETA specifies the scalar beta. When BETA is */ +/* supplied as zero then Y need not be set on input. */ +/* Unchanged on exit. */ + +/* Y - DOUBLE PRECISION */ +/* Array of DIMENSION at least */ +/* ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' */ +/* and at least */ +/* ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. */ +/* Before entry with BETA non-zero, the incremented array Y */ +/* must contain the vector y. On exit, Y is overwritten by the */ +/* updated vector y. */ + +/* INCY - INTEGER */ +/* On entry, INCY specifies the increment for the elements of */ +/* Y. INCY must not be zero. */ +/* Unchanged on exit. */ + +/* Level 2 Blas routine. */ + +/* .. */ +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --x; + --y; + + /* Function Body */ + info = 0; + if (! (*trans == _starpu_ilatrans_("N") || *trans == _starpu_ilatrans_("T") || *trans == _starpu_ilatrans_("C"))) { + info = 1; + } else if (*m < 0) { + info = 2; + } else if (*n < 0) { + info = 3; + } else if (*lda < max(1,*m)) { + info = 6; + } else if (*incx == 0) { + info = 8; + } else if (*incy == 0) { + info = 11; + } + if (info != 0) { + _starpu_xerbla_("DLA_GEAMV ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*m == 0 || *n == 0 || *alpha == 0. && *beta == 1.) { + return 0; + } + +/* Set LENX and LENY, the lengths of the vectors x and y, and set */ +/* up the start points in X and Y. */ + + if (*trans == _starpu_ilatrans_("N")) { + lenx = *n; + leny = *m; + } else { + lenx = *m; + leny = *n; + } + if (*incx > 0) { + kx = 1; + } else { + kx = 1 - (lenx - 1) * *incx; + } + if (*incy > 0) { + ky = 1; + } else { + ky = 1 - (leny - 1) * *incy; + } + +/* Set SAFE1 essentially to be the underflow threshold times the */ +/* number of additions in each row. */ + + safe1 = _starpu_dlamch_("Safe minimum"); + safe1 = (*n + 1) * safe1; + +/* Form y := alpha*abs(A)*abs(x) + beta*abs(y). */ + +/* The O(M*N) SYMB_ZERO tests could be replaced by O(N) queries to */ +/* the inexact flag. Still doesn't help change the iteration order */ +/* to per-column. */ + + iy = ky; + if (*incx == 1) { + i__1 = leny; + for (i__ = 1; i__ <= i__1; ++i__) { + if (*beta == 0.) { + symb_zero__ = TRUE_; + y[iy] = 0.; + } else if (y[iy] == 0.) { + symb_zero__ = TRUE_; + } else { + symb_zero__ = FALSE_; + y[iy] = *beta * (d__1 = y[iy], abs(d__1)); + } + if (*alpha != 0.) { + i__2 = lenx; + for (j = 1; j <= i__2; ++j) { + if (*trans == _starpu_ilatrans_("N")) { + temp = (d__1 = a[i__ + j * a_dim1], abs(d__1)); + } else { + temp = (d__1 = a[j + i__ * a_dim1], abs(d__1)); + } + symb_zero__ = symb_zero__ && (x[j] == 0. || temp == 0.); + y[iy] += *alpha * (d__1 = x[j], abs(d__1)) * temp; + } + } + if (! symb_zero__) { + y[iy] += d_sign(&safe1, &y[iy]); + } + iy += *incy; + } + } else { + i__1 = leny; + for (i__ = 1; i__ <= i__1; ++i__) { + if (*beta == 0.) { + symb_zero__ = TRUE_; + y[iy] = 0.; + } else if (y[iy] == 0.) { + symb_zero__ = TRUE_; + } else { + symb_zero__ = FALSE_; + y[iy] = *beta * (d__1 = y[iy], abs(d__1)); + } + if (*alpha != 0.) { + jx = kx; + i__2 = lenx; + for (j = 1; j <= i__2; ++j) { + if (*trans == _starpu_ilatrans_("N")) { + temp = (d__1 = a[i__ + j * a_dim1], abs(d__1)); + } else { + temp = (d__1 = a[j + i__ * a_dim1], abs(d__1)); + } + symb_zero__ = symb_zero__ && (x[jx] == 0. || temp == 0.); + y[iy] += *alpha * (d__1 = x[jx], abs(d__1)) * temp; + jx += *incx; + } + } + if (! symb_zero__) { + y[iy] += d_sign(&safe1, &y[iy]); + } + iy += *incy; + } + } + + return 0; + +/* End of DLA_GEAMV */ + +} /* _starpu_dla_geamv__ */ diff --git a/min-dgels/base/SRC/dla_gercond.c b/min-dgels/base/SRC/dla_gercond.c new file mode 100644 index 0000000..1f8465a --- /dev/null +++ b/min-dgels/base/SRC/dla_gercond.c @@ -0,0 +1,299 @@ +/* _starpu_dla_gercond.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +doublereal _starpu_dla_gercond__(char *trans, integer *n, doublereal *a, integer *lda, + doublereal *af, integer *ldaf, integer *ipiv, integer *cmode, + doublereal *c__, integer *info, doublereal *work, integer *iwork, + ftnlen trans_len) +{ + /* System generated locals */ + integer a_dim1, a_offset, af_dim1, af_offset, i__1, i__2; + doublereal ret_val, d__1; + + /* Local variables */ + integer i__, j; + doublereal tmp; + integer kase; + extern logical _starpu_lsame_(char *, char *); + integer isave[3]; + extern /* Subroutine */ int _starpu_dlacn2_(integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *), _starpu_xerbla_(char *, + integer *); + doublereal ainvnm; + extern /* Subroutine */ int _starpu_dgetrs_(char *, integer *, integer *, + doublereal *, integer *, integer *, doublereal *, integer *, + integer *); + logical notrans; + + +/* -- LAPACK routine (version 3.2.1) -- */ +/* -- Contributed by James Demmel, Deaglan Halligan, Yozo Hida and -- */ +/* -- Jason Riedy of Univ. of California Berkeley. -- */ +/* -- April 2009 -- */ + +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley and NAG Ltd. -- */ + +/* .. */ +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLA_GERCOND estimates the Skeel condition number of op(A) * op2(C) */ +/* where op2 is determined by CMODE as follows */ +/* CMODE = 1 op2(C) = C */ +/* CMODE = 0 op2(C) = I */ +/* CMODE = -1 op2(C) = inv(C) */ +/* The Skeel condition number cond(A) = norminf( |inv(A)||A| ) */ +/* is computed by computing scaling factors R such that */ +/* diag(R)*A*op2(C) is row equilibrated and computing the standard */ +/* infinity-norm condition number. */ + +/* Arguments */ +/* ========== */ + +/* TRANS (input) CHARACTER*1 */ +/* Specifies the form of the system of equations: */ +/* = 'N': A * X = B (No transpose) */ +/* = 'T': A**T * X = B (Transpose) */ +/* = 'C': A**H * X = B (Conjugate Transpose = Transpose) */ + +/* N (input) INTEGER */ +/* The number of linear equations, i.e., the order of the */ +/* matrix A. N >= 0. */ + +/* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the N-by-N matrix A. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,N). */ + +/* AF (input) DOUBLE PRECISION array, dimension (LDAF,N) */ +/* The factors L and U from the factorization */ +/* A = P*L*U as computed by DGETRF. */ + +/* LDAF (input) INTEGER */ +/* The leading dimension of the array AF. LDAF >= max(1,N). */ + +/* IPIV (input) INTEGER array, dimension (N) */ +/* The pivot indices from the factorization A = P*L*U */ +/* as computed by DGETRF; row i of the matrix was interchanged */ +/* with row IPIV(i). */ + +/* CMODE (input) INTEGER */ +/* Determines op2(C) in the formula op(A) * op2(C) as follows: */ +/* CMODE = 1 op2(C) = C */ +/* CMODE = 0 op2(C) = I */ +/* CMODE = -1 op2(C) = inv(C) */ + +/* C (input) DOUBLE PRECISION array, dimension (N) */ +/* The vector C in the formula op(A) * op2(C). */ + +/* INFO (output) INTEGER */ +/* = 0: Successful exit. */ +/* i > 0: The ith argument is invalid. */ + +/* WORK (input) DOUBLE PRECISION array, dimension (3*N). */ +/* Workspace. */ + +/* IWORK (input) INTEGER array, dimension (N). */ +/* Workspace. */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + af_dim1 = *ldaf; + af_offset = 1 + af_dim1; + af -= af_offset; + --ipiv; + --c__; + --work; + --iwork; + + /* Function Body */ + ret_val = 0.; + + *info = 0; + notrans = _starpu_lsame_(trans, "N"); + if (! notrans && ! _starpu_lsame_(trans, "T") && ! _starpu_lsame_( + trans, "C")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } else if (*ldaf < max(1,*n)) { + *info = -6; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DLA_GERCOND", &i__1); + return ret_val; + } + if (*n == 0) { + ret_val = 1.; + return ret_val; + } + +/* Compute the equilibration matrix R such that */ +/* inv(R)*A*C has unit 1-norm. */ + + if (notrans) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + tmp = 0.; + if (*cmode == 1) { + i__2 = *n; + for (j = 1; j <= i__2; ++j) { + tmp += (d__1 = a[i__ + j * a_dim1] * c__[j], abs(d__1)); + } + } else if (*cmode == 0) { + i__2 = *n; + for (j = 1; j <= i__2; ++j) { + tmp += (d__1 = a[i__ + j * a_dim1], abs(d__1)); + } + } else { + i__2 = *n; + for (j = 1; j <= i__2; ++j) { + tmp += (d__1 = a[i__ + j * a_dim1] / c__[j], abs(d__1)); + } + } + work[(*n << 1) + i__] = tmp; + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + tmp = 0.; + if (*cmode == 1) { + i__2 = *n; + for (j = 1; j <= i__2; ++j) { + tmp += (d__1 = a[j + i__ * a_dim1] * c__[j], abs(d__1)); + } + } else if (*cmode == 0) { + i__2 = *n; + for (j = 1; j <= i__2; ++j) { + tmp += (d__1 = a[j + i__ * a_dim1], abs(d__1)); + } + } else { + i__2 = *n; + for (j = 1; j <= i__2; ++j) { + tmp += (d__1 = a[j + i__ * a_dim1] / c__[j], abs(d__1)); + } + } + work[(*n << 1) + i__] = tmp; + } + } + +/* Estimate the norm of inv(op(A)). */ + + ainvnm = 0.; + kase = 0; +L10: + _starpu_dlacn2_(n, &work[*n + 1], &work[1], &iwork[1], &ainvnm, &kase, isave); + if (kase != 0) { + if (kase == 2) { + +/* Multiply by R. */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + work[i__] *= work[(*n << 1) + i__]; + } + if (notrans) { + _starpu_dgetrs_("No transpose", n, &c__1, &af[af_offset], ldaf, &ipiv[ + 1], &work[1], n, info); + } else { + _starpu_dgetrs_("Transpose", n, &c__1, &af[af_offset], ldaf, &ipiv[1], + &work[1], n, info); + } + +/* Multiply by inv(C). */ + + if (*cmode == 1) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + work[i__] /= c__[i__]; + } + } else if (*cmode == -1) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + work[i__] *= c__[i__]; + } + } + } else { + +/* Multiply by inv(C'). */ + + if (*cmode == 1) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + work[i__] /= c__[i__]; + } + } else if (*cmode == -1) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + work[i__] *= c__[i__]; + } + } + if (notrans) { + _starpu_dgetrs_("Transpose", n, &c__1, &af[af_offset], ldaf, &ipiv[1], + &work[1], n, info); + } else { + _starpu_dgetrs_("No transpose", n, &c__1, &af[af_offset], ldaf, &ipiv[ + 1], &work[1], n, info); + } + +/* Multiply by R. */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + work[i__] *= work[(*n << 1) + i__]; + } + } + goto L10; + } + +/* Compute the estimate of the reciprocal condition number. */ + + if (ainvnm != 0.) { + ret_val = 1. / ainvnm; + } + + return ret_val; + +} /* _starpu_dla_gercond__ */ diff --git a/min-dgels/base/SRC/dla_gerfsx_extended.c b/min-dgels/base/SRC/dla_gerfsx_extended.c new file mode 100644 index 0000000..72b07e0 --- /dev/null +++ b/min-dgels/base/SRC/dla_gerfsx_extended.c @@ -0,0 +1,622 @@ +/* _starpu_dla_gerfsx_extended.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static doublereal c_b6 = -1.; +static doublereal c_b8 = 1.; + +/* Subroutine */ int _starpu_dla_gerfsx_extended__(integer *prec_type__, integer * + trans_type__, integer *n, integer *nrhs, doublereal *a, integer *lda, + doublereal *af, integer *ldaf, integer *ipiv, logical *colequ, + doublereal *c__, doublereal *b, integer *ldb, doublereal *y, integer * + ldy, doublereal *berr_out__, integer *n_norms__, doublereal *errs_n__, + doublereal *errs_c__, doublereal *res, doublereal *ayb, doublereal * + dy, doublereal *y_tail__, doublereal *rcond, integer *ithresh, + doublereal *rthresh, doublereal *dz_ub__, logical *ignore_cwise__, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, af_dim1, af_offset, b_dim1, b_offset, y_dim1, + y_offset, errs_n_dim1, errs_n_offset, errs_c_dim1, errs_c_offset, + i__1, i__2, i__3; + doublereal d__1, d__2; + char ch__1[1]; + + /* Local variables */ + doublereal dxratmax, dzratmax; + integer i__, j; + extern /* Subroutine */ int _starpu_dla_geamv__(integer *, integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *); + logical incr_prec__; + doublereal prev_dz_z__, yk, final_dx_x__; + extern /* Subroutine */ int _starpu_dla_wwaddw__(integer *, doublereal *, + doublereal *, doublereal *); + doublereal final_dz_z__, prevnormdx; + integer cnt; + doublereal dyk, eps, incr_thresh__, dx_x__, dz_z__; + extern /* Subroutine */ int _starpu_dla_lin_berr__(integer *, integer *, integer * + , doublereal *, doublereal *, doublereal *); + doublereal ymin; + extern /* Subroutine */ int _starpu_blas_dgemv_x__(integer *, integer *, integer * + , doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *, integer *); + integer y_prec_state__; + extern /* Subroutine */ int blas_dgemv2_x__(integer *, integer *, integer + *, doublereal *, doublereal *, integer *, doublereal *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + integer *), _starpu_dgemv_(char *, integer *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *), _starpu_dcopy_(integer *, doublereal *, + integer *, doublereal *, integer *); + doublereal dxrat, dzrat; + extern /* Subroutine */ int _starpu_daxpy_(integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *); + char trans[1]; + doublereal normx, normy; + extern doublereal _starpu_dlamch_(char *); + extern /* Subroutine */ int _starpu_dgetrs_(char *, integer *, integer *, + doublereal *, integer *, integer *, doublereal *, integer *, + integer *); + doublereal normdx; + extern /* Character */ VOID _starpu_chla_transtype__(char *, ftnlen, integer *); + doublereal hugeval; + integer x_state__, z_state__; + + +/* -- LAPACK routine (version 3.2.1) -- */ +/* -- Contributed by James Demmel, Deaglan Halligan, Yozo Hida and -- */ +/* -- Jason Riedy of Univ. of California Berkeley. -- */ +/* -- April 2009 -- */ + +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley and NAG Ltd. -- */ + +/* .. */ +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLA_GERFSX_EXTENDED improves the computed solution to a system of */ +/* linear equations by performing extra-precise iterative refinement */ +/* and provides error bounds and backward error estimates for the solution. */ +/* This subroutine is called by DGERFSX to perform iterative refinement. */ +/* In addition to normwise error bound, the code provides maximum */ +/* componentwise error bound if possible. See comments for ERR_BNDS_NORM */ +/* and ERR_BNDS_COMP for details of the error bounds. Note that this */ +/* subroutine is only resonsible for setting the second fields of */ +/* ERR_BNDS_NORM and ERR_BNDS_COMP. */ + +/* Arguments */ +/* ========= */ + +/* PREC_TYPE (input) INTEGER */ +/* Specifies the intermediate precision to be used in refinement. */ +/* The value is defined by ILAPREC(P) where P is a CHARACTER and */ +/* P = 'S': Single */ +/* = 'D': Double */ +/* = 'I': Indigenous */ +/* = 'X', 'E': Extra */ + +/* TRANS_TYPE (input) INTEGER */ +/* Specifies the transposition operation on A. */ +/* The value is defined by ILATRANS(T) where T is a CHARACTER and */ +/* T = 'N': No transpose */ +/* = 'T': Transpose */ +/* = 'C': Conjugate transpose */ + +/* N (input) INTEGER */ +/* The number of linear equations, i.e., the order of the */ +/* matrix A. N >= 0. */ + +/* NRHS (input) INTEGER */ +/* The number of right-hand-sides, i.e., the number of columns of the */ +/* matrix B. */ + +/* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the N-by-N matrix A. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,N). */ + +/* AF (input) DOUBLE PRECISION array, dimension (LDAF,N) */ +/* The factors L and U from the factorization */ +/* A = P*L*U as computed by DGETRF. */ + +/* LDAF (input) INTEGER */ +/* The leading dimension of the array AF. LDAF >= max(1,N). */ + +/* IPIV (input) INTEGER array, dimension (N) */ +/* The pivot indices from the factorization A = P*L*U */ +/* as computed by DGETRF; row i of the matrix was interchanged */ +/* with row IPIV(i). */ + +/* COLEQU (input) LOGICAL */ +/* If .TRUE. then column equilibration was done to A before calling */ +/* this routine. This is needed to compute the solution and error */ +/* bounds correctly. */ + +/* C (input) DOUBLE PRECISION array, dimension (N) */ +/* The column scale factors for A. If COLEQU = .FALSE., C */ +/* is not accessed. If C is input, each element of C should be a power */ +/* of the radix to ensure a reliable solution and error estimates. */ +/* Scaling by powers of the radix does not cause rounding errors unless */ +/* the result underflows or overflows. Rounding errors during scaling */ +/* lead to refining with a matrix that is not equivalent to the */ +/* input matrix, producing error estimates that may not be */ +/* reliable. */ + +/* B (input) DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* The right-hand-side matrix B. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the array B. LDB >= max(1,N). */ + +/* Y (input/output) DOUBLE PRECISION array, dimension */ +/* (LDY,NRHS) */ +/* On entry, the solution matrix X, as computed by DGETRS. */ +/* On exit, the improved solution matrix Y. */ + +/* LDY (input) INTEGER */ +/* The leading dimension of the array Y. LDY >= max(1,N). */ + +/* BERR_OUT (output) DOUBLE PRECISION array, dimension (NRHS) */ +/* On exit, BERR_OUT(j) contains the componentwise relative backward */ +/* error for right-hand-side j from the formula */ +/* max(i) ( abs(RES(i)) / ( abs(op(A_s))*abs(Y) + abs(B_s) )(i) ) */ +/* where abs(Z) is the componentwise absolute value of the matrix */ +/* or vector Z. This is computed by DLA_LIN_BERR. */ + +/* N_NORMS (input) INTEGER */ +/* Determines which error bounds to return (see ERR_BNDS_NORM */ +/* and ERR_BNDS_COMP). */ +/* If N_NORMS >= 1 return normwise error bounds. */ +/* If N_NORMS >= 2 return componentwise error bounds. */ + +/* ERR_BNDS_NORM (input/output) DOUBLE PRECISION array, dimension */ +/* (NRHS, N_ERR_BNDS) */ +/* For each right-hand side, this array contains information about */ +/* various error bounds and condition numbers corresponding to the */ +/* normwise relative error, which is defined as follows: */ + +/* Normwise relative error in the ith solution vector: */ +/* max_j (abs(XTRUE(j,i) - X(j,i))) */ +/* ------------------------------ */ +/* max_j abs(X(j,i)) */ + +/* The array is indexed by the type of error information as described */ +/* below. There currently are up to three pieces of information */ +/* returned. */ + +/* The first index in ERR_BNDS_NORM(i,:) corresponds to the ith */ +/* right-hand side. */ + +/* The second index in ERR_BNDS_NORM(:,err) contains the following */ +/* three fields: */ +/* err = 1 "Trust/don't trust" boolean. Trust the answer if the */ +/* reciprocal condition number is less than the threshold */ +/* sqrt(n) * slamch('Epsilon'). */ + +/* err = 2 "Guaranteed" error bound: The estimated forward error, */ +/* almost certainly within a factor of 10 of the true error */ +/* so long as the next entry is greater than the threshold */ +/* sqrt(n) * slamch('Epsilon'). This error bound should only */ +/* be trusted if the previous boolean is true. */ + +/* err = 3 Reciprocal condition number: Estimated normwise */ +/* reciprocal condition number. Compared with the threshold */ +/* sqrt(n) * slamch('Epsilon') to determine if the error */ +/* estimate is "guaranteed". These reciprocal condition */ +/* numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some */ +/* appropriately scaled matrix Z. */ +/* Let Z = S*A, where S scales each row by a power of the */ +/* radix so all absolute row sums of Z are approximately 1. */ + +/* This subroutine is only responsible for setting the second field */ +/* above. */ +/* See Lapack Working Note 165 for further details and extra */ +/* cautions. */ + +/* ERR_BNDS_COMP (input/output) DOUBLE PRECISION array, dimension */ +/* (NRHS, N_ERR_BNDS) */ +/* For each right-hand side, this array contains information about */ +/* various error bounds and condition numbers corresponding to the */ +/* componentwise relative error, which is defined as follows: */ + +/* Componentwise relative error in the ith solution vector: */ +/* abs(XTRUE(j,i) - X(j,i)) */ +/* max_j ---------------------- */ +/* abs(X(j,i)) */ + +/* The array is indexed by the right-hand side i (on which the */ +/* componentwise relative error depends), and the type of error */ +/* information as described below. There currently are up to three */ +/* pieces of information returned for each right-hand side. If */ +/* componentwise accuracy is not requested (PARAMS(3) = 0.0), then */ +/* ERR_BNDS_COMP is not accessed. If N_ERR_BNDS .LT. 3, then at most */ +/* the first (:,N_ERR_BNDS) entries are returned. */ + +/* The first index in ERR_BNDS_COMP(i,:) corresponds to the ith */ +/* right-hand side. */ + +/* The second index in ERR_BNDS_COMP(:,err) contains the following */ +/* three fields: */ +/* err = 1 "Trust/don't trust" boolean. Trust the answer if the */ +/* reciprocal condition number is less than the threshold */ +/* sqrt(n) * slamch('Epsilon'). */ + +/* err = 2 "Guaranteed" error bound: The estimated forward error, */ +/* almost certainly within a factor of 10 of the true error */ +/* so long as the next entry is greater than the threshold */ +/* sqrt(n) * slamch('Epsilon'). This error bound should only */ +/* be trusted if the previous boolean is true. */ + +/* err = 3 Reciprocal condition number: Estimated componentwise */ +/* reciprocal condition number. Compared with the threshold */ +/* sqrt(n) * slamch('Epsilon') to determine if the error */ +/* estimate is "guaranteed". These reciprocal condition */ +/* numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some */ +/* appropriately scaled matrix Z. */ +/* Let Z = S*(A*diag(x)), where x is the solution for the */ +/* current right-hand side and S scales each row of */ +/* A*diag(x) by a power of the radix so all absolute row */ +/* sums of Z are approximately 1. */ + +/* This subroutine is only responsible for setting the second field */ +/* above. */ +/* See Lapack Working Note 165 for further details and extra */ +/* cautions. */ + +/* RES (input) DOUBLE PRECISION array, dimension (N) */ +/* Workspace to hold the intermediate residual. */ + +/* AYB (input) DOUBLE PRECISION array, dimension (N) */ +/* Workspace. This can be the same workspace passed for Y_TAIL. */ + +/* DY (input) DOUBLE PRECISION array, dimension (N) */ +/* Workspace to hold the intermediate solution. */ + +/* Y_TAIL (input) DOUBLE PRECISION array, dimension (N) */ +/* Workspace to hold the trailing bits of the intermediate solution. */ + +/* RCOND (input) DOUBLE PRECISION */ +/* Reciprocal scaled condition number. This is an estimate of the */ +/* reciprocal Skeel condition number of the matrix A after */ +/* equilibration (if done). If this is less than the machine */ +/* precision (in particular, if it is zero), the matrix is singular */ +/* to working precision. Note that the error may still be small even */ +/* if this number is very small and the matrix appears ill- */ +/* conditioned. */ + +/* ITHRESH (input) INTEGER */ +/* The maximum number of residual computations allowed for */ +/* refinement. The default is 10. For 'aggressive' set to 100 to */ +/* permit convergence using approximate factorizations or */ +/* factorizations other than LU. If the factorization uses a */ +/* technique other than Gaussian elimination, the guarantees in */ +/* ERR_BNDS_NORM and ERR_BNDS_COMP may no longer be trustworthy. */ + +/* RTHRESH (input) DOUBLE PRECISION */ +/* Determines when to stop refinement if the error estimate stops */ +/* decreasing. Refinement will stop when the next solution no longer */ +/* satisfies norm(dx_{i+1}) < RTHRESH * norm(dx_i) where norm(Z) is */ +/* the infinity norm of Z. RTHRESH satisfies 0 < RTHRESH <= 1. The */ +/* default value is 0.5. For 'aggressive' set to 0.9 to permit */ +/* convergence on extremely ill-conditioned matrices. See LAWN 165 */ +/* for more details. */ + +/* DZ_UB (input) DOUBLE PRECISION */ +/* Determines when to start considering componentwise convergence. */ +/* Componentwise convergence is only considered after each component */ +/* of the solution Y is stable, which we definte as the relative */ +/* change in each component being less than DZ_UB. The default value */ +/* is 0.25, requiring the first bit to be stable. See LAWN 165 for */ +/* more details. */ + +/* IGNORE_CWISE (input) LOGICAL */ +/* If .TRUE. then ignore componentwise convergence. Default value */ +/* is .FALSE.. */ + +/* INFO (output) INTEGER */ +/* = 0: Successful exit. */ +/* < 0: if INFO = -i, the ith argument to DGETRS had an illegal */ +/* value */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. Parameters .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + errs_c_dim1 = *nrhs; + errs_c_offset = 1 + errs_c_dim1; + errs_c__ -= errs_c_offset; + errs_n_dim1 = *nrhs; + errs_n_offset = 1 + errs_n_dim1; + errs_n__ -= errs_n_offset; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + af_dim1 = *ldaf; + af_offset = 1 + af_dim1; + af -= af_offset; + --ipiv; + --c__; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + y_dim1 = *ldy; + y_offset = 1 + y_dim1; + y -= y_offset; + --berr_out__; + --res; + --ayb; + --dy; + --y_tail__; + + /* Function Body */ + if (*info != 0) { + return 0; + } + _starpu_chla_transtype__(ch__1, (ftnlen)1, trans_type__); + *(unsigned char *)trans = *(unsigned char *)&ch__1[0]; + eps = _starpu_dlamch_("Epsilon"); + hugeval = _starpu_dlamch_("Overflow"); +/* Force HUGEVAL to Inf */ + hugeval *= hugeval; +/* Using HUGEVAL may lead to spurious underflows. */ + incr_thresh__ = (doublereal) (*n) * eps; + + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + y_prec_state__ = 1; + if (y_prec_state__ == 2) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + y_tail__[i__] = 0.; + } + } + dxrat = 0.; + dxratmax = 0.; + dzrat = 0.; + dzratmax = 0.; + final_dx_x__ = hugeval; + final_dz_z__ = hugeval; + prevnormdx = hugeval; + prev_dz_z__ = hugeval; + dz_z__ = hugeval; + dx_x__ = hugeval; + x_state__ = 1; + z_state__ = 0; + incr_prec__ = FALSE_; + i__2 = *ithresh; + for (cnt = 1; cnt <= i__2; ++cnt) { + +/* Compute residual RES = B_s - op(A_s) * Y, */ +/* op(A) = A, A**T, or A**H depending on TRANS (and type). */ + + _starpu_dcopy_(n, &b[j * b_dim1 + 1], &c__1, &res[1], &c__1); + if (y_prec_state__ == 0) { + _starpu_dgemv_(trans, n, n, &c_b6, &a[a_offset], lda, &y[j * y_dim1 + + 1], &c__1, &c_b8, &res[1], &c__1); + } else if (y_prec_state__ == 1) { + _starpu_blas_dgemv_x__(trans_type__, n, n, &c_b6, &a[a_offset], lda, & + y[j * y_dim1 + 1], &c__1, &c_b8, &res[1], &c__1, + prec_type__); + } else { + blas_dgemv2_x__(trans_type__, n, n, &c_b6, &a[a_offset], lda, + &y[j * y_dim1 + 1], &y_tail__[1], &c__1, &c_b8, &res[ + 1], &c__1, prec_type__); + } +/* XXX: RES is no longer needed. */ + _starpu_dcopy_(n, &res[1], &c__1, &dy[1], &c__1); + _starpu_dgetrs_(trans, n, &c__1, &af[af_offset], ldaf, &ipiv[1], &dy[1], + n, info); + +/* Calculate relative changes DX_X, DZ_Z and ratios DXRAT, DZRAT. */ + + normx = 0.; + normy = 0.; + normdx = 0.; + dz_z__ = 0.; + ymin = hugeval; + + i__3 = *n; + for (i__ = 1; i__ <= i__3; ++i__) { + yk = (d__1 = y[i__ + j * y_dim1], abs(d__1)); + dyk = (d__1 = dy[i__], abs(d__1)); + if (yk != 0.) { +/* Computing MAX */ + d__1 = dz_z__, d__2 = dyk / yk; + dz_z__ = max(d__1,d__2); + } else if (dyk != 0.) { + dz_z__ = hugeval; + } + ymin = min(ymin,yk); + normy = max(normy,yk); + if (*colequ) { +/* Computing MAX */ + d__1 = normx, d__2 = yk * c__[i__]; + normx = max(d__1,d__2); +/* Computing MAX */ + d__1 = normdx, d__2 = dyk * c__[i__]; + normdx = max(d__1,d__2); + } else { + normx = normy; + normdx = max(normdx,dyk); + } + } + if (normx != 0.) { + dx_x__ = normdx / normx; + } else if (normdx == 0.) { + dx_x__ = 0.; + } else { + dx_x__ = hugeval; + } + dxrat = normdx / prevnormdx; + dzrat = dz_z__ / prev_dz_z__; + +/* Check termination criteria */ + + if (! (*ignore_cwise__) && ymin * *rcond < incr_thresh__ * normy + && y_prec_state__ < 2) { + incr_prec__ = TRUE_; + } + if (x_state__ == 3 && dxrat <= *rthresh) { + x_state__ = 1; + } + if (x_state__ == 1) { + if (dx_x__ <= eps) { + x_state__ = 2; + } else if (dxrat > *rthresh) { + if (y_prec_state__ != 2) { + incr_prec__ = TRUE_; + } else { + x_state__ = 3; + } + } else { + if (dxrat > dxratmax) { + dxratmax = dxrat; + } + } + if (x_state__ > 1) { + final_dx_x__ = dx_x__; + } + } + if (z_state__ == 0 && dz_z__ <= *dz_ub__) { + z_state__ = 1; + } + if (z_state__ == 3 && dzrat <= *rthresh) { + z_state__ = 1; + } + if (z_state__ == 1) { + if (dz_z__ <= eps) { + z_state__ = 2; + } else if (dz_z__ > *dz_ub__) { + z_state__ = 0; + dzratmax = 0.; + final_dz_z__ = hugeval; + } else if (dzrat > *rthresh) { + if (y_prec_state__ != 2) { + incr_prec__ = TRUE_; + } else { + z_state__ = 3; + } + } else { + if (dzrat > dzratmax) { + dzratmax = dzrat; + } + } + if (z_state__ > 1) { + final_dz_z__ = dz_z__; + } + } + +/* Exit if both normwise and componentwise stopped working, */ +/* but if componentwise is unstable, let it go at least two */ +/* iterations. */ + + if (x_state__ != 1) { + if (*ignore_cwise__) { + goto L666; + } + if (z_state__ == 3 || z_state__ == 2) { + goto L666; + } + if (z_state__ == 0 && cnt > 1) { + goto L666; + } + } + if (incr_prec__) { + incr_prec__ = FALSE_; + ++y_prec_state__; + i__3 = *n; + for (i__ = 1; i__ <= i__3; ++i__) { + y_tail__[i__] = 0.; + } + } + prevnormdx = normdx; + prev_dz_z__ = dz_z__; + +/* Update soluton. */ + + if (y_prec_state__ < 2) { + _starpu_daxpy_(n, &c_b8, &dy[1], &c__1, &y[j * y_dim1 + 1], &c__1); + } else { + _starpu_dla_wwaddw__(n, &y[j * y_dim1 + 1], &y_tail__[1], &dy[1]); + } + } +/* Target of "IF (Z_STOP .AND. X_STOP)". Sun's f77 won't EXIT. */ +L666: + +/* Set final_* when cnt hits ithresh. */ + + if (x_state__ == 1) { + final_dx_x__ = dx_x__; + } + if (z_state__ == 1) { + final_dz_z__ = dz_z__; + } + +/* Compute error bounds */ + + if (*n_norms__ >= 1) { + errs_n__[j + (errs_n_dim1 << 1)] = final_dx_x__ / (1 - dxratmax); + } + if (*n_norms__ >= 2) { + errs_c__[j + (errs_c_dim1 << 1)] = final_dz_z__ / (1 - dzratmax); + } + +/* Compute componentwise relative backward error from formula */ +/* max(i) ( abs(R(i)) / ( abs(op(A_s))*abs(Y) + abs(B_s) )(i) ) */ +/* where abs(Z) is the componentwise absolute value of the matrix */ +/* or vector Z. */ + +/* Compute residual RES = B_s - op(A_s) * Y, */ +/* op(A) = A, A**T, or A**H depending on TRANS (and type). */ + + _starpu_dcopy_(n, &b[j * b_dim1 + 1], &c__1, &res[1], &c__1); + _starpu_dgemv_(trans, n, n, &c_b6, &a[a_offset], lda, &y[j * y_dim1 + 1], & + c__1, &c_b8, &res[1], &c__1); + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + ayb[i__] = (d__1 = b[i__ + j * b_dim1], abs(d__1)); + } + +/* Compute abs(op(A_s))*abs(Y) + abs(B_s). */ + + _starpu_dla_geamv__(trans_type__, n, n, &c_b8, &a[a_offset], lda, &y[j * + y_dim1 + 1], &c__1, &c_b8, &ayb[1], &c__1); + _starpu_dla_lin_berr__(n, n, &c__1, &res[1], &ayb[1], &berr_out__[j]); + +/* End of loop for each RHS. */ + + } + + return 0; +} /* _starpu_dla_gerfsx_extended__ */ diff --git a/min-dgels/base/SRC/dla_lin_berr.c b/min-dgels/base/SRC/dla_lin_berr.c new file mode 100644 index 0000000..c9db6de --- /dev/null +++ b/min-dgels/base/SRC/dla_lin_berr.c @@ -0,0 +1,124 @@ +/* _starpu_dla_lin_berr.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dla_lin_berr__(integer *n, integer *nz, integer *nrhs, + doublereal *res, doublereal *ayb, doublereal *berr) +{ + /* System generated locals */ + integer ayb_dim1, ayb_offset, res_dim1, res_offset, i__1, i__2; + doublereal d__1; + + /* Local variables */ + integer i__, j; + doublereal tmp, safe1; + extern doublereal _starpu_dlamch_(char *); + + +/* -- LAPACK routine (version 3.2.1) -- */ +/* -- Contributed by James Demmel, Deaglan Halligan, Yozo Hida and -- */ +/* -- Jason Riedy of Univ. of California Berkeley. -- */ +/* -- April 2009 -- */ + +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley and NAG Ltd. -- */ + +/* .. */ +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLA_LIN_BERR computes componentwise relative backward error from */ +/* the formula */ +/* max(i) ( abs(R(i)) / ( abs(op(A_s))*abs(Y) + abs(B_s) )(i) ) */ +/* where abs(Z) is the componentwise absolute value of the matrix */ +/* or vector Z. */ + +/* Arguments */ +/* ========== */ + +/* N (input) INTEGER */ +/* The number of linear equations, i.e., the order of the */ +/* matrix A. N >= 0. */ + +/* NZ (input) INTEGER */ +/* We add (NZ+1)*SLAMCH( 'Safe minimum' ) to R(i) in the numerator to */ +/* guard against spuriously zero residuals. Default value is N. */ + +/* NRHS (input) INTEGER */ +/* The number of right hand sides, i.e., the number of columns */ +/* of the matrices AYB, RES, and BERR. NRHS >= 0. */ + +/* RES (input) DOUBLE PRECISION array, dimension (N,NRHS) */ +/* The residual matrix, i.e., the matrix R in the relative backward */ +/* error formula above. */ + +/* AYB (input) DOUBLE PRECISION array, dimension (N, NRHS) */ +/* The denominator in the relative backward error formula above, i.e., */ +/* the matrix abs(op(A_s))*abs(Y) + abs(B_s). The matrices A, Y, and B */ +/* are from iterative refinement (see _starpu_dla_gerfsx_extended.f). */ + +/* RES (output) DOUBLE PRECISION array, dimension (NRHS) */ +/* The componentwise relative backward error from the formula above. */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Adding SAFE1 to the numerator guards against spuriously zero */ +/* residuals. A similar safeguard is in the SLA_yyAMV routine used */ +/* to compute AYB. */ + + /* Parameter adjustments */ + --berr; + ayb_dim1 = *n; + ayb_offset = 1 + ayb_dim1; + ayb -= ayb_offset; + res_dim1 = *n; + res_offset = 1 + res_dim1; + res -= res_offset; + + /* Function Body */ + safe1 = _starpu_dlamch_("Safe minimum"); + safe1 = (*nz + 1) * safe1; + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + berr[j] = 0.; + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + if (ayb[i__ + j * ayb_dim1] != 0.) { + tmp = (safe1 + (d__1 = res[i__ + j * res_dim1], abs(d__1))) / + ayb[i__ + j * ayb_dim1]; +/* Computing MAX */ + d__1 = berr[j]; + berr[j] = max(d__1,tmp); + } + +/* If AYB is exactly 0.0 (and if computed by SLA_yyAMV), then we know */ +/* the true residual also must be exactly 0.0. */ + + } + } + return 0; +} /* _starpu_dla_lin_berr__ */ diff --git a/min-dgels/base/SRC/dla_porcond.c b/min-dgels/base/SRC/dla_porcond.c new file mode 100644 index 0000000..ee3bec5 --- /dev/null +++ b/min-dgels/base/SRC/dla_porcond.c @@ -0,0 +1,309 @@ +/* _starpu_dla_porcond.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +doublereal _starpu_dla_porcond__(char *uplo, integer *n, doublereal *a, integer *lda, + doublereal *af, integer *ldaf, integer *cmode, doublereal *c__, + integer *info, doublereal *work, integer *iwork, ftnlen uplo_len) +{ + /* System generated locals */ + integer a_dim1, a_offset, af_dim1, af_offset, i__1, i__2; + doublereal ret_val, d__1; + + /* Local variables */ + integer i__, j; + logical up; + doublereal tmp; + integer kase; + extern logical _starpu_lsame_(char *, char *); + integer isave[3]; + extern /* Subroutine */ int _starpu_dlacn2_(integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *), _starpu_xerbla_(char *, + integer *); + doublereal ainvnm; + extern /* Subroutine */ int _starpu_dpotrs_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *, integer *); + + +/* -- LAPACK routine (version 3.2.1) -- */ +/* -- Contributed by James Demmel, Deaglan Halligan, Yozo Hida and -- */ +/* -- Jason Riedy of Univ. of California Berkeley. -- */ +/* -- April 2009 -- */ + +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley and NAG Ltd. -- */ + +/* .. */ +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLA_PORCOND Estimates the Skeel condition number of op(A) * op2(C) */ +/* where op2 is determined by CMODE as follows */ +/* CMODE = 1 op2(C) = C */ +/* CMODE = 0 op2(C) = I */ +/* CMODE = -1 op2(C) = inv(C) */ +/* The Skeel condition number cond(A) = norminf( |inv(A)||A| ) */ +/* is computed by computing scaling factors R such that */ +/* diag(R)*A*op2(C) is row equilibrated and computing the standard */ +/* infinity-norm condition number. */ + +/* Arguments */ +/* ========== */ + +/* UPLO (input) CHARACTER*1 */ +/* = 'U': Upper triangle of A is stored; */ +/* = 'L': Lower triangle of A is stored. */ + +/* N (input) INTEGER */ +/* The number of linear equations, i.e., the order of the */ +/* matrix A. N >= 0. */ + +/* A (input) REAL array, dimension (LDA,N) */ +/* On entry, the N-by-N matrix A. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,N). */ + +/* AF (input) DOUBLE PRECISION array, dimension (LDAF,N) */ +/* The triangular factor U or L from the Cholesky factorization */ +/* A = U**T*U or A = L*L**T, as computed by DPOTRF. */ + +/* LDAF (input) INTEGER */ +/* The leading dimension of the array AF. LDAF >= max(1,N). */ + +/* CMODE (input) INTEGER */ +/* Determines op2(C) in the formula op(A) * op2(C) as follows: */ +/* CMODE = 1 op2(C) = C */ +/* CMODE = 0 op2(C) = I */ +/* CMODE = -1 op2(C) = inv(C) */ + +/* C (input) DOUBLE PRECISION array, dimension (N) */ +/* The vector C in the formula op(A) * op2(C). */ + +/* INFO (output) INTEGER */ +/* = 0: Successful exit. */ +/* i > 0: The ith argument is invalid. */ + +/* WORK (input) DOUBLE PRECISION array, dimension (3*N). */ +/* Workspace. */ + +/* IWORK (input) INTEGER array, dimension (N). */ +/* Workspace. */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + af_dim1 = *ldaf; + af_offset = 1 + af_dim1; + af -= af_offset; + --c__; + --work; + --iwork; + + /* Function Body */ + ret_val = 0.; + + *info = 0; + if (*n < 0) { + *info = -2; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DLA_PORCOND", &i__1); + return ret_val; + } + if (*n == 0) { + ret_val = 1.; + return ret_val; + } + up = FALSE_; + if (_starpu_lsame_(uplo, "U")) { + up = TRUE_; + } + +/* Compute the equilibration matrix R such that */ +/* inv(R)*A*C has unit 1-norm. */ + + if (up) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + tmp = 0.; + if (*cmode == 1) { + i__2 = i__; + for (j = 1; j <= i__2; ++j) { + tmp += (d__1 = a[j + i__ * a_dim1] * c__[j], abs(d__1)); + } + i__2 = *n; + for (j = i__ + 1; j <= i__2; ++j) { + tmp += (d__1 = a[i__ + j * a_dim1] * c__[j], abs(d__1)); + } + } else if (*cmode == 0) { + i__2 = i__; + for (j = 1; j <= i__2; ++j) { + tmp += (d__1 = a[j + i__ * a_dim1], abs(d__1)); + } + i__2 = *n; + for (j = i__ + 1; j <= i__2; ++j) { + tmp += (d__1 = a[i__ + j * a_dim1], abs(d__1)); + } + } else { + i__2 = i__; + for (j = 1; j <= i__2; ++j) { + tmp += (d__1 = a[j + i__ * a_dim1] / c__[j], abs(d__1)); + } + i__2 = *n; + for (j = i__ + 1; j <= i__2; ++j) { + tmp += (d__1 = a[i__ + j * a_dim1] / c__[j], abs(d__1)); + } + } + work[(*n << 1) + i__] = tmp; + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + tmp = 0.; + if (*cmode == 1) { + i__2 = i__; + for (j = 1; j <= i__2; ++j) { + tmp += (d__1 = a[i__ + j * a_dim1] * c__[j], abs(d__1)); + } + i__2 = *n; + for (j = i__ + 1; j <= i__2; ++j) { + tmp += (d__1 = a[j + i__ * a_dim1] * c__[j], abs(d__1)); + } + } else if (*cmode == 0) { + i__2 = i__; + for (j = 1; j <= i__2; ++j) { + tmp += (d__1 = a[i__ + j * a_dim1], abs(d__1)); + } + i__2 = *n; + for (j = i__ + 1; j <= i__2; ++j) { + tmp += (d__1 = a[j + i__ * a_dim1], abs(d__1)); + } + } else { + i__2 = i__; + for (j = 1; j <= i__2; ++j) { + tmp += (d__1 = a[i__ + j * a_dim1] / c__[j], abs(d__1)); + } + i__2 = *n; + for (j = i__ + 1; j <= i__2; ++j) { + tmp += (d__1 = a[j + i__ * a_dim1] / c__[j], abs(d__1)); + } + } + work[(*n << 1) + i__] = tmp; + } + } + +/* Estimate the norm of inv(op(A)). */ + + ainvnm = 0.; + kase = 0; +L10: + _starpu_dlacn2_(n, &work[*n + 1], &work[1], &iwork[1], &ainvnm, &kase, isave); + if (kase != 0) { + if (kase == 2) { + +/* Multiply by R. */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + work[i__] *= work[(*n << 1) + i__]; + } + if (up) { + _starpu_dpotrs_("Upper", n, &c__1, &af[af_offset], ldaf, &work[1], n, + info); + } else { + _starpu_dpotrs_("Lower", n, &c__1, &af[af_offset], ldaf, &work[1], n, + info); + } + +/* Multiply by inv(C). */ + + if (*cmode == 1) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + work[i__] /= c__[i__]; + } + } else if (*cmode == -1) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + work[i__] *= c__[i__]; + } + } + } else { + +/* Multiply by inv(C'). */ + + if (*cmode == 1) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + work[i__] /= c__[i__]; + } + } else if (*cmode == -1) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + work[i__] *= c__[i__]; + } + } + if (up) { + _starpu_dpotrs_("Upper", n, &c__1, &af[af_offset], ldaf, &work[1], n, + info); + } else { + _starpu_dpotrs_("Lower", n, &c__1, &af[af_offset], ldaf, &work[1], n, + info); + } + +/* Multiply by R. */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + work[i__] *= work[(*n << 1) + i__]; + } + } + goto L10; + } + +/* Compute the estimate of the reciprocal condition number. */ + + if (ainvnm != 0.) { + ret_val = 1. / ainvnm; + } + + return ret_val; + +} /* _starpu_dla_porcond__ */ diff --git a/min-dgels/base/SRC/dla_porfsx_extended.c b/min-dgels/base/SRC/dla_porfsx_extended.c new file mode 100644 index 0000000..0969d11 --- /dev/null +++ b/min-dgels/base/SRC/dla_porfsx_extended.c @@ -0,0 +1,602 @@ +/* _starpu_dla_porfsx_extended.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static doublereal c_b9 = -1.; +static doublereal c_b11 = 1.; + +/* Subroutine */ int _starpu_dla_porfsx_extended__(integer *prec_type__, char *uplo, + integer *n, integer *nrhs, doublereal *a, integer *lda, doublereal * + af, integer *ldaf, logical *colequ, doublereal *c__, doublereal *b, + integer *ldb, doublereal *y, integer *ldy, doublereal *berr_out__, + integer *n_norms__, doublereal *err_bnds_norm__, doublereal * + err_bnds_comp__, doublereal *res, doublereal *ayb, doublereal *dy, + doublereal *y_tail__, doublereal *rcond, integer *ithresh, doublereal + *rthresh, doublereal *dz_ub__, logical *ignore_cwise__, integer *info, + ftnlen uplo_len) +{ + /* System generated locals */ + integer a_dim1, a_offset, af_dim1, af_offset, b_dim1, b_offset, y_dim1, + y_offset, err_bnds_norm_dim1, err_bnds_norm_offset, + err_bnds_comp_dim1, err_bnds_comp_offset, i__1, i__2, i__3; + doublereal d__1, d__2; + + /* Local variables */ + doublereal dxratmax, dzratmax; + integer i__, j; + logical incr_prec__; + extern /* Subroutine */ int _starpu_dla_syamv__(integer *, integer *, doublereal * + , doublereal *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *); + doublereal prev_dz_z__, yk, final_dx_x__; + extern /* Subroutine */ int _starpu_dla_wwaddw__(integer *, doublereal *, + doublereal *, doublereal *); + doublereal final_dz_z__, prevnormdx; + integer cnt; + doublereal dyk, eps, incr_thresh__, dx_x__, dz_z__; + extern /* Subroutine */ int _starpu_dla_lin_berr__(integer *, integer *, integer * + , doublereal *, doublereal *, doublereal *); + doublereal ymin; + integer y_prec_state__; + extern /* Subroutine */ int _starpu_blas_dsymv_x__(integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *, integer *); + integer uplo2; + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int blas_dsymv2_x__(integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, doublereal *, + integer *, doublereal *, doublereal *, integer *, integer *), + _starpu_dcopy_(integer *, doublereal *, integer *, doublereal *, integer * +); + doublereal dxrat, dzrat; + extern /* Subroutine */ int _starpu_daxpy_(integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *), _starpu_dsymv_(char *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *); + doublereal normx, normy; + extern doublereal _starpu_dlamch_(char *); + doublereal normdx; + extern /* Subroutine */ int _starpu_dpotrs_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *, integer *); + doublereal hugeval; + extern integer _starpu_ilauplo_(char *); + integer x_state__, z_state__; + + +/* -- LAPACK routine (version 3.2.1) -- */ +/* -- Contributed by James Demmel, Deaglan Halligan, Yozo Hida and -- */ +/* -- Jason Riedy of Univ. of California Berkeley. -- */ +/* -- April 2009 -- */ + +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley and NAG Ltd. -- */ + +/* .. */ +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLA_PORFSX_EXTENDED improves the computed solution to a system of */ +/* linear equations by performing extra-precise iterative refinement */ +/* and provides error bounds and backward error estimates for the solution. */ +/* This subroutine is called by DPORFSX to perform iterative refinement. */ +/* In addition to normwise error bound, the code provides maximum */ +/* componentwise error bound if possible. See comments for ERR_BNDS_NORM */ +/* and ERR_BNDS_COMP for details of the error bounds. Note that this */ +/* subroutine is only resonsible for setting the second fields of */ +/* ERR_BNDS_NORM and ERR_BNDS_COMP. */ + +/* Arguments */ +/* ========= */ + +/* PREC_TYPE (input) INTEGER */ +/* Specifies the intermediate precision to be used in refinement. */ +/* The value is defined by ILAPREC(P) where P is a CHARACTER and */ +/* P = 'S': Single */ +/* = 'D': Double */ +/* = 'I': Indigenous */ +/* = 'X', 'E': Extra */ + +/* UPLO (input) CHARACTER*1 */ +/* = 'U': Upper triangle of A is stored; */ +/* = 'L': Lower triangle of A is stored. */ + +/* N (input) INTEGER */ +/* The number of linear equations, i.e., the order of the */ +/* matrix A. N >= 0. */ + +/* NRHS (input) INTEGER */ +/* The number of right-hand-sides, i.e., the number of columns of the */ +/* matrix B. */ + +/* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the N-by-N matrix A. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,N). */ + +/* AF (input) DOUBLE PRECISION array, dimension (LDAF,N) */ +/* The triangular factor U or L from the Cholesky factorization */ +/* A = U**T*U or A = L*L**T, as computed by DPOTRF. */ + +/* LDAF (input) INTEGER */ +/* The leading dimension of the array AF. LDAF >= max(1,N). */ + +/* COLEQU (input) LOGICAL */ +/* If .TRUE. then column equilibration was done to A before calling */ +/* this routine. This is needed to compute the solution and error */ +/* bounds correctly. */ + +/* C (input) DOUBLE PRECISION array, dimension (N) */ +/* The column scale factors for A. If COLEQU = .FALSE., C */ +/* is not accessed. If C is input, each element of C should be a power */ +/* of the radix to ensure a reliable solution and error estimates. */ +/* Scaling by powers of the radix does not cause rounding errors unless */ +/* the result underflows or overflows. Rounding errors during scaling */ +/* lead to refining with a matrix that is not equivalent to the */ +/* input matrix, producing error estimates that may not be */ +/* reliable. */ + +/* B (input) DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* The right-hand-side matrix B. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the array B. LDB >= max(1,N). */ + +/* Y (input/output) DOUBLE PRECISION array, dimension */ +/* (LDY,NRHS) */ +/* On entry, the solution matrix X, as computed by DPOTRS. */ +/* On exit, the improved solution matrix Y. */ + +/* LDY (input) INTEGER */ +/* The leading dimension of the array Y. LDY >= max(1,N). */ + +/* BERR_OUT (output) DOUBLE PRECISION array, dimension (NRHS) */ +/* On exit, BERR_OUT(j) contains the componentwise relative backward */ +/* error for right-hand-side j from the formula */ +/* max(i) ( abs(RES(i)) / ( abs(op(A_s))*abs(Y) + abs(B_s) )(i) ) */ +/* where abs(Z) is the componentwise absolute value of the matrix */ +/* or vector Z. This is computed by DLA_LIN_BERR. */ + +/* N_NORMS (input) INTEGER */ +/* Determines which error bounds to return (see ERR_BNDS_NORM */ +/* and ERR_BNDS_COMP). */ +/* If N_NORMS >= 1 return normwise error bounds. */ +/* If N_NORMS >= 2 return componentwise error bounds. */ + +/* ERR_BNDS_NORM (input/output) DOUBLE PRECISION array, dimension */ +/* (NRHS, N_ERR_BNDS) */ +/* For each right-hand side, this array contains information about */ +/* various error bounds and condition numbers corresponding to the */ +/* normwise relative error, which is defined as follows: */ + +/* Normwise relative error in the ith solution vector: */ +/* max_j (abs(XTRUE(j,i) - X(j,i))) */ +/* ------------------------------ */ +/* max_j abs(X(j,i)) */ + +/* The array is indexed by the type of error information as described */ +/* below. There currently are up to three pieces of information */ +/* returned. */ + +/* The first index in ERR_BNDS_NORM(i,:) corresponds to the ith */ +/* right-hand side. */ + +/* The second index in ERR_BNDS_NORM(:,err) contains the following */ +/* three fields: */ +/* err = 1 "Trust/don't trust" boolean. Trust the answer if the */ +/* reciprocal condition number is less than the threshold */ +/* sqrt(n) * slamch('Epsilon'). */ + +/* err = 2 "Guaranteed" error bound: The estimated forward error, */ +/* almost certainly within a factor of 10 of the true error */ +/* so long as the next entry is greater than the threshold */ +/* sqrt(n) * slamch('Epsilon'). This error bound should only */ +/* be trusted if the previous boolean is true. */ + +/* err = 3 Reciprocal condition number: Estimated normwise */ +/* reciprocal condition number. Compared with the threshold */ +/* sqrt(n) * slamch('Epsilon') to determine if the error */ +/* estimate is "guaranteed". These reciprocal condition */ +/* numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some */ +/* appropriately scaled matrix Z. */ +/* Let Z = S*A, where S scales each row by a power of the */ +/* radix so all absolute row sums of Z are approximately 1. */ + +/* This subroutine is only responsible for setting the second field */ +/* above. */ +/* See Lapack Working Note 165 for further details and extra */ +/* cautions. */ + +/* ERR_BNDS_COMP (input/output) DOUBLE PRECISION array, dimension */ +/* (NRHS, N_ERR_BNDS) */ +/* For each right-hand side, this array contains information about */ +/* various error bounds and condition numbers corresponding to the */ +/* componentwise relative error, which is defined as follows: */ + +/* Componentwise relative error in the ith solution vector: */ +/* abs(XTRUE(j,i) - X(j,i)) */ +/* max_j ---------------------- */ +/* abs(X(j,i)) */ + +/* The array is indexed by the right-hand side i (on which the */ +/* componentwise relative error depends), and the type of error */ +/* information as described below. There currently are up to three */ +/* pieces of information returned for each right-hand side. If */ +/* componentwise accuracy is not requested (PARAMS(3) = 0.0), then */ +/* ERR_BNDS_COMP is not accessed. If N_ERR_BNDS .LT. 3, then at most */ +/* the first (:,N_ERR_BNDS) entries are returned. */ + +/* The first index in ERR_BNDS_COMP(i,:) corresponds to the ith */ +/* right-hand side. */ + +/* The second index in ERR_BNDS_COMP(:,err) contains the following */ +/* three fields: */ +/* err = 1 "Trust/don't trust" boolean. Trust the answer if the */ +/* reciprocal condition number is less than the threshold */ +/* sqrt(n) * slamch('Epsilon'). */ + +/* err = 2 "Guaranteed" error bound: The estimated forward error, */ +/* almost certainly within a factor of 10 of the true error */ +/* so long as the next entry is greater than the threshold */ +/* sqrt(n) * slamch('Epsilon'). This error bound should only */ +/* be trusted if the previous boolean is true. */ + +/* err = 3 Reciprocal condition number: Estimated componentwise */ +/* reciprocal condition number. Compared with the threshold */ +/* sqrt(n) * slamch('Epsilon') to determine if the error */ +/* estimate is "guaranteed". These reciprocal condition */ +/* numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some */ +/* appropriately scaled matrix Z. */ +/* Let Z = S*(A*diag(x)), where x is the solution for the */ +/* current right-hand side and S scales each row of */ +/* A*diag(x) by a power of the radix so all absolute row */ +/* sums of Z are approximately 1. */ + +/* This subroutine is only responsible for setting the second field */ +/* above. */ +/* See Lapack Working Note 165 for further details and extra */ +/* cautions. */ + +/* RES (input) DOUBLE PRECISION array, dimension (N) */ +/* Workspace to hold the intermediate residual. */ + +/* AYB (input) DOUBLE PRECISION array, dimension (N) */ +/* Workspace. This can be the same workspace passed for Y_TAIL. */ + +/* DY (input) DOUBLE PRECISION array, dimension (N) */ +/* Workspace to hold the intermediate solution. */ + +/* Y_TAIL (input) DOUBLE PRECISION array, dimension (N) */ +/* Workspace to hold the trailing bits of the intermediate solution. */ + +/* RCOND (input) DOUBLE PRECISION */ +/* Reciprocal scaled condition number. This is an estimate of the */ +/* reciprocal Skeel condition number of the matrix A after */ +/* equilibration (if done). If this is less than the machine */ +/* precision (in particular, if it is zero), the matrix is singular */ +/* to working precision. Note that the error may still be small even */ +/* if this number is very small and the matrix appears ill- */ +/* conditioned. */ + +/* ITHRESH (input) INTEGER */ +/* The maximum number of residual computations allowed for */ +/* refinement. The default is 10. For 'aggressive' set to 100 to */ +/* permit convergence using approximate factorizations or */ +/* factorizations other than LU. If the factorization uses a */ +/* technique other than Gaussian elimination, the guarantees in */ +/* ERR_BNDS_NORM and ERR_BNDS_COMP may no longer be trustworthy. */ + +/* RTHRESH (input) DOUBLE PRECISION */ +/* Determines when to stop refinement if the error estimate stops */ +/* decreasing. Refinement will stop when the next solution no longer */ +/* satisfies norm(dx_{i+1}) < RTHRESH * norm(dx_i) where norm(Z) is */ +/* the infinity norm of Z. RTHRESH satisfies 0 < RTHRESH <= 1. The */ +/* default value is 0.5. For 'aggressive' set to 0.9 to permit */ +/* convergence on extremely ill-conditioned matrices. See LAWN 165 */ +/* for more details. */ + +/* DZ_UB (input) DOUBLE PRECISION */ +/* Determines when to start considering componentwise convergence. */ +/* Componentwise convergence is only considered after each component */ +/* of the solution Y is stable, which we definte as the relative */ +/* change in each component being less than DZ_UB. The default value */ +/* is 0.25, requiring the first bit to be stable. See LAWN 165 for */ +/* more details. */ + +/* IGNORE_CWISE (input) LOGICAL */ +/* If .TRUE. then ignore componentwise convergence. Default value */ +/* is .FALSE.. */ + +/* INFO (output) INTEGER */ +/* = 0: Successful exit. */ +/* < 0: if INFO = -i, the ith argument to DPOTRS had an illegal */ +/* value */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. Parameters .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + err_bnds_comp_dim1 = *nrhs; + err_bnds_comp_offset = 1 + err_bnds_comp_dim1; + err_bnds_comp__ -= err_bnds_comp_offset; + err_bnds_norm_dim1 = *nrhs; + err_bnds_norm_offset = 1 + err_bnds_norm_dim1; + err_bnds_norm__ -= err_bnds_norm_offset; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + af_dim1 = *ldaf; + af_offset = 1 + af_dim1; + af -= af_offset; + --c__; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + y_dim1 = *ldy; + y_offset = 1 + y_dim1; + y -= y_offset; + --berr_out__; + --res; + --ayb; + --dy; + --y_tail__; + + /* Function Body */ + if (*info != 0) { + return 0; + } + eps = _starpu_dlamch_("Epsilon"); + hugeval = _starpu_dlamch_("Overflow"); +/* Force HUGEVAL to Inf */ + hugeval *= hugeval; +/* Using HUGEVAL may lead to spurious underflows. */ + incr_thresh__ = (doublereal) (*n) * eps; + if (_starpu_lsame_(uplo, "L")) { + uplo2 = _starpu_ilauplo_("L"); + } else { + uplo2 = _starpu_ilauplo_("U"); + } + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + y_prec_state__ = 1; + if (y_prec_state__ == 2) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + y_tail__[i__] = 0.; + } + } + dxrat = 0.; + dxratmax = 0.; + dzrat = 0.; + dzratmax = 0.; + final_dx_x__ = hugeval; + final_dz_z__ = hugeval; + prevnormdx = hugeval; + prev_dz_z__ = hugeval; + dz_z__ = hugeval; + dx_x__ = hugeval; + x_state__ = 1; + z_state__ = 0; + incr_prec__ = FALSE_; + i__2 = *ithresh; + for (cnt = 1; cnt <= i__2; ++cnt) { + +/* Compute residual RES = B_s - op(A_s) * Y, */ +/* op(A) = A, A**T, or A**H depending on TRANS (and type). */ + + _starpu_dcopy_(n, &b[j * b_dim1 + 1], &c__1, &res[1], &c__1); + if (y_prec_state__ == 0) { + _starpu_dsymv_(uplo, n, &c_b9, &a[a_offset], lda, &y[j * y_dim1 + 1], + &c__1, &c_b11, &res[1], &c__1); + } else if (y_prec_state__ == 1) { + _starpu_blas_dsymv_x__(&uplo2, n, &c_b9, &a[a_offset], lda, &y[j * + y_dim1 + 1], &c__1, &c_b11, &res[1], &c__1, + prec_type__); + } else { + blas_dsymv2_x__(&uplo2, n, &c_b9, &a[a_offset], lda, &y[j * + y_dim1 + 1], &y_tail__[1], &c__1, &c_b11, &res[1], & + c__1, prec_type__); + } +/* XXX: RES is no longer needed. */ + _starpu_dcopy_(n, &res[1], &c__1, &dy[1], &c__1); + _starpu_dpotrs_(uplo, n, nrhs, &af[af_offset], ldaf, &dy[1], n, info); + +/* Calculate relative changes DX_X, DZ_Z and ratios DXRAT, DZRAT. */ + + normx = 0.; + normy = 0.; + normdx = 0.; + dz_z__ = 0.; + ymin = hugeval; + i__3 = *n; + for (i__ = 1; i__ <= i__3; ++i__) { + yk = (d__1 = y[i__ + j * y_dim1], abs(d__1)); + dyk = (d__1 = dy[i__], abs(d__1)); + if (yk != 0.) { +/* Computing MAX */ + d__1 = dz_z__, d__2 = dyk / yk; + dz_z__ = max(d__1,d__2); + } else if (dyk != 0.) { + dz_z__ = hugeval; + } + ymin = min(ymin,yk); + normy = max(normy,yk); + if (*colequ) { +/* Computing MAX */ + d__1 = normx, d__2 = yk * c__[i__]; + normx = max(d__1,d__2); +/* Computing MAX */ + d__1 = normdx, d__2 = dyk * c__[i__]; + normdx = max(d__1,d__2); + } else { + normx = normy; + normdx = max(normdx,dyk); + } + } + if (normx != 0.) { + dx_x__ = normdx / normx; + } else if (normdx == 0.) { + dx_x__ = 0.; + } else { + dx_x__ = hugeval; + } + dxrat = normdx / prevnormdx; + dzrat = dz_z__ / prev_dz_z__; + +/* Check termination criteria. */ + + if (ymin * *rcond < incr_thresh__ * normy && y_prec_state__ < 2) { + incr_prec__ = TRUE_; + } + if (x_state__ == 3 && dxrat <= *rthresh) { + x_state__ = 1; + } + if (x_state__ == 1) { + if (dx_x__ <= eps) { + x_state__ = 2; + } else if (dxrat > *rthresh) { + if (y_prec_state__ != 2) { + incr_prec__ = TRUE_; + } else { + x_state__ = 3; + } + } else { + if (dxrat > dxratmax) { + dxratmax = dxrat; + } + } + if (x_state__ > 1) { + final_dx_x__ = dx_x__; + } + } + if (z_state__ == 0 && dz_z__ <= *dz_ub__) { + z_state__ = 1; + } + if (z_state__ == 3 && dzrat <= *rthresh) { + z_state__ = 1; + } + if (z_state__ == 1) { + if (dz_z__ <= eps) { + z_state__ = 2; + } else if (dz_z__ > *dz_ub__) { + z_state__ = 0; + dzratmax = 0.; + final_dz_z__ = hugeval; + } else if (dzrat > *rthresh) { + if (y_prec_state__ != 2) { + incr_prec__ = TRUE_; + } else { + z_state__ = 3; + } + } else { + if (dzrat > dzratmax) { + dzratmax = dzrat; + } + } + if (z_state__ > 1) { + final_dz_z__ = dz_z__; + } + } + if (x_state__ != 1 && (*ignore_cwise__ || z_state__ != 1)) { + goto L666; + } + if (incr_prec__) { + incr_prec__ = FALSE_; + ++y_prec_state__; + i__3 = *n; + for (i__ = 1; i__ <= i__3; ++i__) { + y_tail__[i__] = 0.; + } + } + prevnormdx = normdx; + prev_dz_z__ = dz_z__; + +/* Update soluton. */ + + if (y_prec_state__ < 2) { + _starpu_daxpy_(n, &c_b11, &dy[1], &c__1, &y[j * y_dim1 + 1], &c__1); + } else { + _starpu_dla_wwaddw__(n, &y[j * y_dim1 + 1], &y_tail__[1], &dy[1]); + } + } +/* Target of "IF (Z_STOP .AND. X_STOP)". Sun's f77 won't EXIT. */ +L666: + +/* Set final_* when cnt hits ithresh. */ + + if (x_state__ == 1) { + final_dx_x__ = dx_x__; + } + if (z_state__ == 1) { + final_dz_z__ = dz_z__; + } + +/* Compute error bounds. */ + + if (*n_norms__ >= 1) { + err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] = final_dx_x__ / ( + 1 - dxratmax); + } + if (*n_norms__ >= 2) { + err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] = final_dz_z__ / ( + 1 - dzratmax); + } + +/* Compute componentwise relative backward error from formula */ +/* max(i) ( abs(R(i)) / ( abs(op(A_s))*abs(Y) + abs(B_s) )(i) ) */ +/* where abs(Z) is the componentwise absolute value of the matrix */ +/* or vector Z. */ + +/* Compute residual RES = B_s - op(A_s) * Y, */ +/* op(A) = A, A**T, or A**H depending on TRANS (and type). */ + + _starpu_dcopy_(n, &b[j * b_dim1 + 1], &c__1, &res[1], &c__1); + _starpu_dsymv_(uplo, n, &c_b9, &a[a_offset], lda, &y[j * y_dim1 + 1], &c__1, & + c_b11, &res[1], &c__1); + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + ayb[i__] = (d__1 = b[i__ + j * b_dim1], abs(d__1)); + } + +/* Compute abs(op(A_s))*abs(Y) + abs(B_s). */ + + _starpu_dla_syamv__(&uplo2, n, &c_b11, &a[a_offset], lda, &y[j * y_dim1 + 1], + &c__1, &c_b11, &ayb[1], &c__1); + _starpu_dla_lin_berr__(n, n, &c__1, &res[1], &ayb[1], &berr_out__[j]); + +/* End of loop for each RHS. */ + + } + + return 0; +} /* _starpu_dla_porfsx_extended__ */ diff --git a/min-dgels/base/SRC/dla_porpvgrw.c b/min-dgels/base/SRC/dla_porpvgrw.c new file mode 100644 index 0000000..c03f791 --- /dev/null +++ b/min-dgels/base/SRC/dla_porpvgrw.c @@ -0,0 +1,197 @@ +/* _starpu_dla_porpvgrw.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +doublereal _starpu_dla_porpvgrw__(char *uplo, integer *ncols, doublereal *a, integer * + lda, doublereal *af, integer *ldaf, doublereal *work, ftnlen uplo_len) +{ + /* System generated locals */ + integer a_dim1, a_offset, af_dim1, af_offset, i__1, i__2; + doublereal ret_val, d__1, d__2, d__3; + + /* Local variables */ + integer i__, j; + doublereal amax, umax; + extern logical _starpu_lsame_(char *, char *); + logical upper; + doublereal rpvgrw; + + +/* -- LAPACK routine (version 3.2.1) -- */ +/* -- Contributed by James Demmel, Deaglan Halligan, Yozo Hida and -- */ +/* -- Jason Riedy of Univ. of California Berkeley. -- */ +/* -- April 2009 -- */ + +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley and NAG Ltd. -- */ + +/* .. */ +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLA_PORPVGRW computes the reciprocal pivot growth factor */ +/* norm(A)/norm(U). The "max absolute element" norm is used. If this is */ +/* much less than 1, the stability of the LU factorization of the */ +/* (equilibrated) matrix A could be poor. This also means that the */ +/* solution X, estimated condition numbers, and error bounds could be */ +/* unreliable. */ + +/* Arguments */ +/* ========= */ + +/* UPLO (input) CHARACTER*1 */ +/* = 'U': Upper triangle of A is stored; */ +/* = 'L': Lower triangle of A is stored. */ + +/* NCOLS (input) INTEGER */ +/* The number of columns of the matrix A. NCOLS >= 0. */ + +/* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the N-by-N matrix A. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,N). */ + +/* AF (input) DOUBLE PRECISION array, dimension (LDAF,N) */ +/* The triangular factor U or L from the Cholesky factorization */ +/* A = U**T*U or A = L*L**T, as computed by DPOTRF. */ + +/* LDAF (input) INTEGER */ +/* The leading dimension of the array AF. LDAF >= max(1,N). */ + +/* WORK (input) DOUBLE PRECISION array, dimension (2*N) */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + af_dim1 = *ldaf; + af_offset = 1 + af_dim1; + af -= af_offset; + --work; + + /* Function Body */ + upper = _starpu_lsame_("Upper", uplo); + +/* DPOTRF will have factored only the NCOLSxNCOLS leading minor, so */ +/* we restrict the growth search to that minor and use only the first */ +/* 2*NCOLS workspace entries. */ + + rpvgrw = 1.; + i__1 = *ncols << 1; + for (i__ = 1; i__ <= i__1; ++i__) { + work[i__] = 0.; + } + +/* Find the max magnitude entry of each column. */ + + if (upper) { + i__1 = *ncols; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { +/* Computing MAX */ + d__2 = (d__1 = a[i__ + j * a_dim1], abs(d__1)), d__3 = work[* + ncols + j]; + work[*ncols + j] = max(d__2,d__3); + } + } + } else { + i__1 = *ncols; + for (j = 1; j <= i__1; ++j) { + i__2 = *ncols; + for (i__ = j; i__ <= i__2; ++i__) { +/* Computing MAX */ + d__2 = (d__1 = a[i__ + j * a_dim1], abs(d__1)), d__3 = work[* + ncols + j]; + work[*ncols + j] = max(d__2,d__3); + } + } + } + +/* Now find the max magnitude entry of each column of the factor in */ +/* AF. No pivoting, so no permutations. */ + + if (_starpu_lsame_("Upper", uplo)) { + i__1 = *ncols; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { +/* Computing MAX */ + d__2 = (d__1 = af[i__ + j * af_dim1], abs(d__1)), d__3 = work[ + j]; + work[j] = max(d__2,d__3); + } + } + } else { + i__1 = *ncols; + for (j = 1; j <= i__1; ++j) { + i__2 = *ncols; + for (i__ = j; i__ <= i__2; ++i__) { +/* Computing MAX */ + d__2 = (d__1 = af[i__ + j * af_dim1], abs(d__1)), d__3 = work[ + j]; + work[j] = max(d__2,d__3); + } + } + } + +/* Compute the *inverse* of the max element growth factor. Dividing */ +/* by zero would imply the largest entry of the factor's column is */ +/* zero. Than can happen when either the column of A is zero or */ +/* massive pivots made the factor underflow to zero. Neither counts */ +/* as growth in itself, so simply ignore terms with zero */ +/* denominators. */ + + if (_starpu_lsame_("Upper", uplo)) { + i__1 = *ncols; + for (i__ = 1; i__ <= i__1; ++i__) { + umax = work[i__]; + amax = work[*ncols + i__]; + if (umax != 0.) { +/* Computing MIN */ + d__1 = amax / umax; + rpvgrw = min(d__1,rpvgrw); + } + } + } else { + i__1 = *ncols; + for (i__ = 1; i__ <= i__1; ++i__) { + umax = work[i__]; + amax = work[*ncols + i__]; + if (umax != 0.) { +/* Computing MIN */ + d__1 = amax / umax; + rpvgrw = min(d__1,rpvgrw); + } + } + } + ret_val = rpvgrw; + return ret_val; +} /* _starpu_dla_porpvgrw__ */ diff --git a/min-dgels/base/SRC/dla_rpvgrw.c b/min-dgels/base/SRC/dla_rpvgrw.c new file mode 100644 index 0000000..0f31e8b --- /dev/null +++ b/min-dgels/base/SRC/dla_rpvgrw.c @@ -0,0 +1,117 @@ +/* _starpu_dla_rpvgrw.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +doublereal _starpu_dla_rpvgrw__(integer *n, integer *ncols, doublereal *a, integer * + lda, doublereal *af, integer *ldaf) +{ + /* System generated locals */ + integer a_dim1, a_offset, af_dim1, af_offset, i__1, i__2; + doublereal ret_val, d__1, d__2; + + /* Local variables */ + integer i__, j; + doublereal amax, umax, rpvgrw; + + +/* -- LAPACK routine (version 3.2.1) -- */ +/* -- Contributed by James Demmel, Deaglan Halligan, Yozo Hida and -- */ +/* -- Jason Riedy of Univ. of California Berkeley. -- */ +/* -- April 2009 -- */ + +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley and NAG Ltd. -- */ + +/* .. */ +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLA_RPVGRW computes the reciprocal pivot growth factor */ +/* norm(A)/norm(U). The "max absolute element" norm is used. If this is */ +/* much less than 1, the stability of the LU factorization of the */ +/* (equilibrated) matrix A could be poor. This also means that the */ +/* solution X, estimated condition numbers, and error bounds could be */ +/* unreliable. */ + +/* Arguments */ +/* ========= */ + +/* N (input) INTEGER */ +/* The number of linear equations, i.e., the order of the */ +/* matrix A. N >= 0. */ + +/* NCOLS (input) INTEGER */ +/* The number of columns of the matrix A. NCOLS >= 0. */ + +/* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the N-by-N matrix A. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,N). */ + +/* AF (input) DOUBLE PRECISION array, dimension (LDAF,N) */ +/* The factors L and U from the factorization */ +/* A = P*L*U as computed by DGETRF. */ + +/* LDAF (input) INTEGER */ +/* The leading dimension of the array AF. LDAF >= max(1,N). */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + af_dim1 = *ldaf; + af_offset = 1 + af_dim1; + af -= af_offset; + + /* Function Body */ + rpvgrw = 1.; + i__1 = *ncols; + for (j = 1; j <= i__1; ++j) { + amax = 0.; + umax = 0.; + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { +/* Computing MAX */ + d__2 = (d__1 = a[i__ + j * a_dim1], abs(d__1)); + amax = max(d__2,amax); + } + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { +/* Computing MAX */ + d__2 = (d__1 = af[i__ + j * af_dim1], abs(d__1)); + umax = max(d__2,umax); + } + if (umax != 0.) { +/* Computing MIN */ + d__1 = amax / umax; + rpvgrw = min(d__1,rpvgrw); + } + } + ret_val = rpvgrw; + return ret_val; +} /* _starpu_dla_rpvgrw__ */ diff --git a/min-dgels/base/SRC/dla_syamv.c b/min-dgels/base/SRC/dla_syamv.c new file mode 100644 index 0000000..cf3085f --- /dev/null +++ b/min-dgels/base/SRC/dla_syamv.c @@ -0,0 +1,299 @@ +/* _starpu_dla_syamv.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dla_syamv__(integer *uplo, integer *n, doublereal *alpha, + doublereal *a, integer *lda, doublereal *x, integer *incx, + doublereal *beta, doublereal *y, integer *incy) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + doublereal d__1; + + /* Builtin functions */ + double d_sign(doublereal *, doublereal *); + + /* Local variables */ + integer i__, j; + logical symb_zero__; + integer iy, jx, kx, ky, info; + doublereal temp, safe1; + extern doublereal _starpu_dlamch_(char *); + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + extern integer _starpu_ilauplo_(char *); + + +/* -- LAPACK routine (version 3.2) -- */ +/* -- Contributed by James Demmel, Deaglan Halligan, Yozo Hida and -- */ +/* -- Jason Riedy of Univ. of California Berkeley. -- */ +/* -- November 2008 -- */ + +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley and NAG Ltd. -- */ + +/* .. */ +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLA_SYAMV performs the matrix-vector operation */ + +/* y := alpha*abs(A)*abs(x) + beta*abs(y), */ + +/* where alpha and beta are scalars, x and y are vectors and A is an */ +/* n by n symmetric matrix. */ + +/* This function is primarily used in calculating error bounds. */ +/* To protect against underflow during evaluation, components in */ +/* the resulting vector are perturbed away from zero by (N+1) */ +/* times the underflow threshold. To prevent unnecessarily large */ +/* errors for block-structure embedded in general matrices, */ +/* "symbolically" zero components are not perturbed. A zero */ +/* entry is considered "symbolic" if all multiplications involved */ +/* in computing that entry have at least one zero multiplicand. */ + +/* Parameters */ +/* ========== */ + +/* UPLO - INTEGER */ +/* On entry, UPLO specifies whether the upper or lower */ +/* triangular part of the array A is to be referenced as */ +/* follows: */ + +/* UPLO = BLAS_UPPER Only the upper triangular part of A */ +/* is to be referenced. */ + +/* UPLO = BLAS_LOWER Only the lower triangular part of A */ +/* is to be referenced. */ + +/* Unchanged on exit. */ + +/* N - INTEGER. */ +/* On entry, N specifies the number of columns of the matrix A. */ +/* N must be at least zero. */ +/* Unchanged on exit. */ + +/* ALPHA - DOUBLE PRECISION . */ +/* On entry, ALPHA specifies the scalar alpha. */ +/* Unchanged on exit. */ + +/* A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). */ +/* Before entry, the leading m by n part of the array A must */ +/* contain the matrix of coefficients. */ +/* Unchanged on exit. */ + +/* LDA - INTEGER. */ +/* On entry, LDA specifies the first dimension of A as declared */ +/* in the calling (sub) program. LDA must be at least */ +/* max( 1, n ). */ +/* Unchanged on exit. */ + +/* X - DOUBLE PRECISION array of DIMENSION at least */ +/* ( 1 + ( n - 1 )*abs( INCX ) ) */ +/* Before entry, the incremented array X must contain the */ +/* vector x. */ +/* Unchanged on exit. */ + +/* INCX - INTEGER. */ +/* On entry, INCX specifies the increment for the elements of */ +/* X. INCX must not be zero. */ +/* Unchanged on exit. */ + +/* BETA - DOUBLE PRECISION . */ +/* On entry, BETA specifies the scalar beta. When BETA is */ +/* supplied as zero then Y need not be set on input. */ +/* Unchanged on exit. */ + +/* Y - DOUBLE PRECISION array of DIMENSION at least */ +/* ( 1 + ( n - 1 )*abs( INCY ) ) */ +/* Before entry with BETA non-zero, the incremented array Y */ +/* must contain the vector y. On exit, Y is overwritten by the */ +/* updated vector y. */ + +/* INCY - INTEGER. */ +/* On entry, INCY specifies the increment for the elements of */ +/* Y. INCY must not be zero. */ +/* Unchanged on exit. */ + + +/* Level 2 Blas routine. */ + +/* -- Written on 22-October-1986. */ +/* Jack Dongarra, Argonne National Lab. */ +/* Jeremy Du Croz, Nag Central Office. */ +/* Sven Hammarling, Nag Central Office. */ +/* Richard Hanson, Sandia National Labs. */ +/* -- Modified for the absolute-value product, April 2006 */ +/* Jason Riedy, UC Berkeley */ + +/* .. */ +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --x; + --y; + + /* Function Body */ + info = 0; + if (*uplo != _starpu_ilauplo_("U") && *uplo != _starpu_ilauplo_("L") + ) { + info = 1; + } else if (*n < 0) { + info = 2; + } else if (*lda < max(1,*n)) { + info = 5; + } else if (*incx == 0) { + info = 7; + } else if (*incy == 0) { + info = 10; + } + if (info != 0) { + _starpu_xerbla_("DSYMV ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0 || *alpha == 0. && *beta == 1.) { + return 0; + } + +/* Set up the start points in X and Y. */ + + if (*incx > 0) { + kx = 1; + } else { + kx = 1 - (*n - 1) * *incx; + } + if (*incy > 0) { + ky = 1; + } else { + ky = 1 - (*n - 1) * *incy; + } + +/* Set SAFE1 essentially to be the underflow threshold times the */ +/* number of additions in each row. */ + + safe1 = _starpu_dlamch_("Safe minimum"); + safe1 = (*n + 1) * safe1; + +/* Form y := alpha*abs(A)*abs(x) + beta*abs(y). */ + +/* The O(N^2) SYMB_ZERO tests could be replaced by O(N) queries to */ +/* the inexact flag. Still doesn't help change the iteration order */ +/* to per-column. */ + + iy = ky; + if (*incx == 1) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + if (*beta == 0.) { + symb_zero__ = TRUE_; + y[iy] = 0.; + } else if (y[iy] == 0.) { + symb_zero__ = TRUE_; + } else { + symb_zero__ = FALSE_; + y[iy] = *beta * (d__1 = y[iy], abs(d__1)); + } + if (*alpha != 0.) { + i__2 = *n; + for (j = 1; j <= i__2; ++j) { + if (*uplo == _starpu_ilauplo_("U")) { + if (i__ <= j) { + temp = (d__1 = a[i__ + j * a_dim1], abs(d__1)); + } else { + temp = (d__1 = a[j + i__ * a_dim1], abs(d__1)); + } + } else { + if (i__ >= j) { + temp = (d__1 = a[i__ + j * a_dim1], abs(d__1)); + } else { + temp = (d__1 = a[j + i__ * a_dim1], abs(d__1)); + } + } + symb_zero__ = symb_zero__ && (x[j] == 0. || temp == 0.); + y[iy] += *alpha * (d__1 = x[j], abs(d__1)) * temp; + } + } + if (! symb_zero__) { + y[iy] += d_sign(&safe1, &y[iy]); + } + iy += *incy; + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + if (*beta == 0.) { + symb_zero__ = TRUE_; + y[iy] = 0.; + } else if (y[iy] == 0.) { + symb_zero__ = TRUE_; + } else { + symb_zero__ = FALSE_; + y[iy] = *beta * (d__1 = y[iy], abs(d__1)); + } + jx = kx; + if (*alpha != 0.) { + i__2 = *n; + for (j = 1; j <= i__2; ++j) { + if (*uplo == _starpu_ilauplo_("U")) { + if (i__ <= j) { + temp = (d__1 = a[i__ + j * a_dim1], abs(d__1)); + } else { + temp = (d__1 = a[j + i__ * a_dim1], abs(d__1)); + } + } else { + if (i__ >= j) { + temp = (d__1 = a[i__ + j * a_dim1], abs(d__1)); + } else { + temp = (d__1 = a[j + i__ * a_dim1], abs(d__1)); + } + } + symb_zero__ = symb_zero__ && (x[j] == 0. || temp == 0.); + y[iy] += *alpha * (d__1 = x[jx], abs(d__1)) * temp; + jx += *incx; + } + } + if (! symb_zero__) { + y[iy] += d_sign(&safe1, &y[iy]); + } + iy += *incy; + } + } + + return 0; + +/* End of DLA_SYAMV */ + +} /* _starpu_dla_syamv__ */ diff --git a/min-dgels/base/SRC/dla_syrcond.c b/min-dgels/base/SRC/dla_syrcond.c new file mode 100644 index 0000000..54f7466 --- /dev/null +++ b/min-dgels/base/SRC/dla_syrcond.c @@ -0,0 +1,322 @@ +/* _starpu_dla_syrcond.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +doublereal _starpu_dla_syrcond__(char *uplo, integer *n, doublereal *a, integer *lda, + doublereal *af, integer *ldaf, integer *ipiv, integer *cmode, + doublereal *c__, integer *info, doublereal *work, integer *iwork, + ftnlen uplo_len) +{ + /* System generated locals */ + integer a_dim1, a_offset, af_dim1, af_offset, i__1, i__2; + doublereal ret_val, d__1; + + /* Local variables */ + integer i__, j; + logical up; + doublereal tmp; + integer kase; + extern logical _starpu_lsame_(char *, char *); + integer isave[3]; + extern /* Subroutine */ int _starpu_dlacn2_(integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *); + extern doublereal _starpu_dlamch_(char *); + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + doublereal ainvnm; + char normin[1]; + doublereal smlnum; + extern /* Subroutine */ int _starpu_dsytrs_(char *, integer *, integer *, + doublereal *, integer *, integer *, doublereal *, integer *, + integer *); + + +/* -- LAPACK routine (version 3.2.1) -- */ +/* -- Contributed by James Demmel, Deaglan Halligan, Yozo Hida and -- */ +/* -- Jason Riedy of Univ. of California Berkeley. -- */ +/* -- April 2009 -- */ + +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley and NAG Ltd. -- */ + +/* .. */ +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLA_SYRCOND estimates the Skeel condition number of op(A) * op2(C) */ +/* where op2 is determined by CMODE as follows */ +/* CMODE = 1 op2(C) = C */ +/* CMODE = 0 op2(C) = I */ +/* CMODE = -1 op2(C) = inv(C) */ +/* The Skeel condition number cond(A) = norminf( |inv(A)||A| ) */ +/* is computed by computing scaling factors R such that */ +/* diag(R)*A*op2(C) is row equilibrated and computing the standard */ +/* infinity-norm condition number. */ + +/* Arguments */ +/* ========== */ + +/* UPLO (input) CHARACTER*1 */ +/* = 'U': Upper triangle of A is stored; */ +/* = 'L': Lower triangle of A is stored. */ + +/* N (input) INTEGER */ +/* The number of linear equations, i.e., the order of the */ +/* matrix A. N >= 0. */ + +/* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the N-by-N matrix A. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,N). */ + +/* AF (input) DOUBLE PRECISION array, dimension (LDAF,N) */ +/* The block diagonal matrix D and the multipliers used to */ +/* obtain the factor U or L as computed by DSYTRF. */ + +/* LDAF (input) INTEGER */ +/* The leading dimension of the array AF. LDAF >= max(1,N). */ + +/* IPIV (input) INTEGER array, dimension (N) */ +/* Details of the interchanges and the block structure of D */ +/* as determined by DSYTRF. */ + +/* CMODE (input) INTEGER */ +/* Determines op2(C) in the formula op(A) * op2(C) as follows: */ +/* CMODE = 1 op2(C) = C */ +/* CMODE = 0 op2(C) = I */ +/* CMODE = -1 op2(C) = inv(C) */ + +/* C (input) DOUBLE PRECISION array, dimension (N) */ +/* The vector C in the formula op(A) * op2(C). */ + +/* INFO (output) INTEGER */ +/* = 0: Successful exit. */ +/* i > 0: The ith argument is invalid. */ + +/* WORK (input) DOUBLE PRECISION array, dimension (3*N). */ +/* Workspace. */ + +/* IWORK (input) INTEGER array, dimension (N). */ +/* Workspace. */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + af_dim1 = *ldaf; + af_offset = 1 + af_dim1; + af -= af_offset; + --ipiv; + --c__; + --work; + --iwork; + + /* Function Body */ + ret_val = 0.; + + *info = 0; + if (*n < 0) { + *info = -2; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DLA_SYRCOND", &i__1); + return ret_val; + } + if (*n == 0) { + ret_val = 1.; + return ret_val; + } + up = FALSE_; + if (_starpu_lsame_(uplo, "U")) { + up = TRUE_; + } + +/* Compute the equilibration matrix R such that */ +/* inv(R)*A*C has unit 1-norm. */ + + if (up) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + tmp = 0.; + if (*cmode == 1) { + i__2 = i__; + for (j = 1; j <= i__2; ++j) { + tmp += (d__1 = a[j + i__ * a_dim1] * c__[j], abs(d__1)); + } + i__2 = *n; + for (j = i__ + 1; j <= i__2; ++j) { + tmp += (d__1 = a[i__ + j * a_dim1] * c__[j], abs(d__1)); + } + } else if (*cmode == 0) { + i__2 = i__; + for (j = 1; j <= i__2; ++j) { + tmp += (d__1 = a[j + i__ * a_dim1], abs(d__1)); + } + i__2 = *n; + for (j = i__ + 1; j <= i__2; ++j) { + tmp += (d__1 = a[i__ + j * a_dim1], abs(d__1)); + } + } else { + i__2 = i__; + for (j = 1; j <= i__2; ++j) { + tmp += (d__1 = a[j + i__ * a_dim1] / c__[j], abs(d__1)); + } + i__2 = *n; + for (j = i__ + 1; j <= i__2; ++j) { + tmp += (d__1 = a[i__ + j * a_dim1] / c__[j], abs(d__1)); + } + } + work[(*n << 1) + i__] = tmp; + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + tmp = 0.; + if (*cmode == 1) { + i__2 = i__; + for (j = 1; j <= i__2; ++j) { + tmp += (d__1 = a[i__ + j * a_dim1] * c__[j], abs(d__1)); + } + i__2 = *n; + for (j = i__ + 1; j <= i__2; ++j) { + tmp += (d__1 = a[j + i__ * a_dim1] * c__[j], abs(d__1)); + } + } else if (*cmode == 0) { + i__2 = i__; + for (j = 1; j <= i__2; ++j) { + tmp += (d__1 = a[i__ + j * a_dim1], abs(d__1)); + } + i__2 = *n; + for (j = i__ + 1; j <= i__2; ++j) { + tmp += (d__1 = a[j + i__ * a_dim1], abs(d__1)); + } + } else { + i__2 = i__; + for (j = 1; j <= i__2; ++j) { + tmp += (d__1 = a[i__ + j * a_dim1] / c__[j], abs(d__1)); + } + i__2 = *n; + for (j = i__ + 1; j <= i__2; ++j) { + tmp += (d__1 = a[j + i__ * a_dim1] / c__[j], abs(d__1)); + } + } + work[(*n << 1) + i__] = tmp; + } + } + +/* Estimate the norm of inv(op(A)). */ + + smlnum = _starpu_dlamch_("Safe minimum"); + ainvnm = 0.; + *(unsigned char *)normin = 'N'; + kase = 0; +L10: + _starpu_dlacn2_(n, &work[*n + 1], &work[1], &iwork[1], &ainvnm, &kase, isave); + if (kase != 0) { + if (kase == 2) { + +/* Multiply by R. */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + work[i__] *= work[(*n << 1) + i__]; + } + if (up) { + _starpu_dsytrs_("U", n, &c__1, &af[af_offset], ldaf, &ipiv[1], &work[ + 1], n, info); + } else { + _starpu_dsytrs_("L", n, &c__1, &af[af_offset], ldaf, &ipiv[1], &work[ + 1], n, info); + } + +/* Multiply by inv(C). */ + + if (*cmode == 1) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + work[i__] /= c__[i__]; + } + } else if (*cmode == -1) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + work[i__] *= c__[i__]; + } + } + } else { + +/* Multiply by inv(C'). */ + + if (*cmode == 1) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + work[i__] /= c__[i__]; + } + } else if (*cmode == -1) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + work[i__] *= c__[i__]; + } + } + if (up) { + _starpu_dsytrs_("U", n, &c__1, &af[af_offset], ldaf, &ipiv[1], &work[ + 1], n, info); + } else { + _starpu_dsytrs_("L", n, &c__1, &af[af_offset], ldaf, &ipiv[1], &work[ + 1], n, info); + } + +/* Multiply by R. */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + work[i__] *= work[(*n << 1) + i__]; + } + } + + goto L10; + } + +/* Compute the estimate of the reciprocal condition number. */ + + if (ainvnm != 0.) { + ret_val = 1. / ainvnm; + } + + return ret_val; + +} /* _starpu_dla_syrcond__ */ diff --git a/min-dgels/base/SRC/dla_syrfsx_extended.c b/min-dgels/base/SRC/dla_syrfsx_extended.c new file mode 100644 index 0000000..51eabc8 --- /dev/null +++ b/min-dgels/base/SRC/dla_syrfsx_extended.c @@ -0,0 +1,608 @@ +/* _starpu_dla_syrfsx_extended.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static doublereal c_b9 = -1.; +static doublereal c_b11 = 1.; + +/* Subroutine */ int _starpu_dla_syrfsx_extended__(integer *prec_type__, char *uplo, + integer *n, integer *nrhs, doublereal *a, integer *lda, doublereal * + af, integer *ldaf, integer *ipiv, logical *colequ, doublereal *c__, + doublereal *b, integer *ldb, doublereal *y, integer *ldy, doublereal * + berr_out__, integer *n_norms__, doublereal *err_bnds_norm__, + doublereal *err_bnds_comp__, doublereal *res, doublereal *ayb, + doublereal *dy, doublereal *y_tail__, doublereal *rcond, integer * + ithresh, doublereal *rthresh, doublereal *dz_ub__, logical * + ignore_cwise__, integer *info, ftnlen uplo_len) +{ + /* System generated locals */ + integer a_dim1, a_offset, af_dim1, af_offset, b_dim1, b_offset, y_dim1, + y_offset, err_bnds_norm_dim1, err_bnds_norm_offset, + err_bnds_comp_dim1, err_bnds_comp_offset, i__1, i__2, i__3; + doublereal d__1, d__2; + + /* Local variables */ + doublereal dxratmax, dzratmax; + integer i__, j; + logical incr_prec__; + extern /* Subroutine */ int _starpu_dla_syamv__(integer *, integer *, doublereal * + , doublereal *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *); + doublereal prev_dz_z__, yk, final_dx_x__; + extern /* Subroutine */ int _starpu_dla_wwaddw__(integer *, doublereal *, + doublereal *, doublereal *); + doublereal final_dz_z__, prevnormdx; + integer cnt; + doublereal dyk, eps, incr_thresh__, dx_x__, dz_z__; + extern /* Subroutine */ int _starpu_dla_lin_berr__(integer *, integer *, integer * + , doublereal *, doublereal *, doublereal *); + doublereal ymin; + integer y_prec_state__; + extern /* Subroutine */ int _starpu_blas_dsymv_x__(integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *, integer *); + integer uplo2; + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int blas_dsymv2_x__(integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, doublereal *, + integer *, doublereal *, doublereal *, integer *, integer *), + _starpu_dcopy_(integer *, doublereal *, integer *, doublereal *, integer * +); + doublereal dxrat, dzrat; + extern /* Subroutine */ int _starpu_daxpy_(integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *), _starpu_dsymv_(char *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *); + doublereal normx, normy; + extern doublereal _starpu_dlamch_(char *); + doublereal normdx; + extern /* Subroutine */ int _starpu_dsytrs_(char *, integer *, integer *, + doublereal *, integer *, integer *, doublereal *, integer *, + integer *); + doublereal hugeval; + extern integer _starpu_ilauplo_(char *); + integer x_state__, z_state__; + + +/* -- LAPACK routine (version 3.2.1) -- */ +/* -- Contributed by James Demmel, Deaglan Halligan, Yozo Hida and -- */ +/* -- Jason Riedy of Univ. of California Berkeley. -- */ +/* -- April 2009 -- */ + +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley and NAG Ltd. -- */ + +/* .. */ +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLA_SYRFSX_EXTENDED improves the computed solution to a system of */ +/* linear equations by performing extra-precise iterative refinement */ +/* and provides error bounds and backward error estimates for the solution. */ +/* This subroutine is called by DSYRFSX to perform iterative refinement. */ +/* In addition to normwise error bound, the code provides maximum */ +/* componentwise error bound if possible. See comments for ERR_BNDS_NORM */ +/* and ERR_BNDS_COMP for details of the error bounds. Note that this */ +/* subroutine is only resonsible for setting the second fields of */ +/* ERR_BNDS_NORM and ERR_BNDS_COMP. */ + +/* Arguments */ +/* ========= */ + +/* PREC_TYPE (input) INTEGER */ +/* Specifies the intermediate precision to be used in refinement. */ +/* The value is defined by ILAPREC(P) where P is a CHARACTER and */ +/* P = 'S': Single */ +/* = 'D': Double */ +/* = 'I': Indigenous */ +/* = 'X', 'E': Extra */ + +/* UPLO (input) CHARACTER*1 */ +/* = 'U': Upper triangle of A is stored; */ +/* = 'L': Lower triangle of A is stored. */ + +/* N (input) INTEGER */ +/* The number of linear equations, i.e., the order of the */ +/* matrix A. N >= 0. */ + +/* NRHS (input) INTEGER */ +/* The number of right-hand-sides, i.e., the number of columns of the */ +/* matrix B. */ + +/* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the N-by-N matrix A. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,N). */ + +/* AF (input) DOUBLE PRECISION array, dimension (LDAF,N) */ +/* The block diagonal matrix D and the multipliers used to */ +/* obtain the factor U or L as computed by DSYTRF. */ + +/* LDAF (input) INTEGER */ +/* The leading dimension of the array AF. LDAF >= max(1,N). */ + +/* IPIV (input) INTEGER array, dimension (N) */ +/* Details of the interchanges and the block structure of D */ +/* as determined by DSYTRF. */ + +/* COLEQU (input) LOGICAL */ +/* If .TRUE. then column equilibration was done to A before calling */ +/* this routine. This is needed to compute the solution and error */ +/* bounds correctly. */ + +/* C (input) DOUBLE PRECISION array, dimension (N) */ +/* The column scale factors for A. If COLEQU = .FALSE., C */ +/* is not accessed. If C is input, each element of C should be a power */ +/* of the radix to ensure a reliable solution and error estimates. */ +/* Scaling by powers of the radix does not cause rounding errors unless */ +/* the result underflows or overflows. Rounding errors during scaling */ +/* lead to refining with a matrix that is not equivalent to the */ +/* input matrix, producing error estimates that may not be */ +/* reliable. */ + +/* B (input) DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* The right-hand-side matrix B. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the array B. LDB >= max(1,N). */ + +/* Y (input/output) DOUBLE PRECISION array, dimension */ +/* (LDY,NRHS) */ +/* On entry, the solution matrix X, as computed by DSYTRS. */ +/* On exit, the improved solution matrix Y. */ + +/* LDY (input) INTEGER */ +/* The leading dimension of the array Y. LDY >= max(1,N). */ + +/* BERR_OUT (output) DOUBLE PRECISION array, dimension (NRHS) */ +/* On exit, BERR_OUT(j) contains the componentwise relative backward */ +/* error for right-hand-side j from the formula */ +/* max(i) ( abs(RES(i)) / ( abs(op(A_s))*abs(Y) + abs(B_s) )(i) ) */ +/* where abs(Z) is the componentwise absolute value of the matrix */ +/* or vector Z. This is computed by DLA_LIN_BERR. */ + +/* N_NORMS (input) INTEGER */ +/* Determines which error bounds to return (see ERR_BNDS_NORM */ +/* and ERR_BNDS_COMP). */ +/* If N_NORMS >= 1 return normwise error bounds. */ +/* If N_NORMS >= 2 return componentwise error bounds. */ + +/* ERR_BNDS_NORM (input/output) DOUBLE PRECISION array, dimension */ +/* (NRHS, N_ERR_BNDS) */ +/* For each right-hand side, this array contains information about */ +/* various error bounds and condition numbers corresponding to the */ +/* normwise relative error, which is defined as follows: */ + +/* Normwise relative error in the ith solution vector: */ +/* max_j (abs(XTRUE(j,i) - X(j,i))) */ +/* ------------------------------ */ +/* max_j abs(X(j,i)) */ + +/* The array is indexed by the type of error information as described */ +/* below. There currently are up to three pieces of information */ +/* returned. */ + +/* The first index in ERR_BNDS_NORM(i,:) corresponds to the ith */ +/* right-hand side. */ + +/* The second index in ERR_BNDS_NORM(:,err) contains the following */ +/* three fields: */ +/* err = 1 "Trust/don't trust" boolean. Trust the answer if the */ +/* reciprocal condition number is less than the threshold */ +/* sqrt(n) * slamch('Epsilon'). */ + +/* err = 2 "Guaranteed" error bound: The estimated forward error, */ +/* almost certainly within a factor of 10 of the true error */ +/* so long as the next entry is greater than the threshold */ +/* sqrt(n) * slamch('Epsilon'). This error bound should only */ +/* be trusted if the previous boolean is true. */ + +/* err = 3 Reciprocal condition number: Estimated normwise */ +/* reciprocal condition number. Compared with the threshold */ +/* sqrt(n) * slamch('Epsilon') to determine if the error */ +/* estimate is "guaranteed". These reciprocal condition */ +/* numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some */ +/* appropriately scaled matrix Z. */ +/* Let Z = S*A, where S scales each row by a power of the */ +/* radix so all absolute row sums of Z are approximately 1. */ + +/* This subroutine is only responsible for setting the second field */ +/* above. */ +/* See Lapack Working Note 165 for further details and extra */ +/* cautions. */ + +/* ERR_BNDS_COMP (input/output) DOUBLE PRECISION array, dimension */ +/* (NRHS, N_ERR_BNDS) */ +/* For each right-hand side, this array contains information about */ +/* various error bounds and condition numbers corresponding to the */ +/* componentwise relative error, which is defined as follows: */ + +/* Componentwise relative error in the ith solution vector: */ +/* abs(XTRUE(j,i) - X(j,i)) */ +/* max_j ---------------------- */ +/* abs(X(j,i)) */ + +/* The array is indexed by the right-hand side i (on which the */ +/* componentwise relative error depends), and the type of error */ +/* information as described below. There currently are up to three */ +/* pieces of information returned for each right-hand side. If */ +/* componentwise accuracy is not requested (PARAMS(3) = 0.0), then */ +/* ERR_BNDS_COMP is not accessed. If N_ERR_BNDS .LT. 3, then at most */ +/* the first (:,N_ERR_BNDS) entries are returned. */ + +/* The first index in ERR_BNDS_COMP(i,:) corresponds to the ith */ +/* right-hand side. */ + +/* The second index in ERR_BNDS_COMP(:,err) contains the following */ +/* three fields: */ +/* err = 1 "Trust/don't trust" boolean. Trust the answer if the */ +/* reciprocal condition number is less than the threshold */ +/* sqrt(n) * slamch('Epsilon'). */ + +/* err = 2 "Guaranteed" error bound: The estimated forward error, */ +/* almost certainly within a factor of 10 of the true error */ +/* so long as the next entry is greater than the threshold */ +/* sqrt(n) * slamch('Epsilon'). This error bound should only */ +/* be trusted if the previous boolean is true. */ + +/* err = 3 Reciprocal condition number: Estimated componentwise */ +/* reciprocal condition number. Compared with the threshold */ +/* sqrt(n) * slamch('Epsilon') to determine if the error */ +/* estimate is "guaranteed". These reciprocal condition */ +/* numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some */ +/* appropriately scaled matrix Z. */ +/* Let Z = S*(A*diag(x)), where x is the solution for the */ +/* current right-hand side and S scales each row of */ +/* A*diag(x) by a power of the radix so all absolute row */ +/* sums of Z are approximately 1. */ + +/* This subroutine is only responsible for setting the second field */ +/* above. */ +/* See Lapack Working Note 165 for further details and extra */ +/* cautions. */ + +/* RES (input) DOUBLE PRECISION array, dimension (N) */ +/* Workspace to hold the intermediate residual. */ + +/* AYB (input) DOUBLE PRECISION array, dimension (N) */ +/* Workspace. This can be the same workspace passed for Y_TAIL. */ + +/* DY (input) DOUBLE PRECISION array, dimension (N) */ +/* Workspace to hold the intermediate solution. */ + +/* Y_TAIL (input) DOUBLE PRECISION array, dimension (N) */ +/* Workspace to hold the trailing bits of the intermediate solution. */ + +/* RCOND (input) DOUBLE PRECISION */ +/* Reciprocal scaled condition number. This is an estimate of the */ +/* reciprocal Skeel condition number of the matrix A after */ +/* equilibration (if done). If this is less than the machine */ +/* precision (in particular, if it is zero), the matrix is singular */ +/* to working precision. Note that the error may still be small even */ +/* if this number is very small and the matrix appears ill- */ +/* conditioned. */ + +/* ITHRESH (input) INTEGER */ +/* The maximum number of residual computations allowed for */ +/* refinement. The default is 10. For 'aggressive' set to 100 to */ +/* permit convergence using approximate factorizations or */ +/* factorizations other than LU. If the factorization uses a */ +/* technique other than Gaussian elimination, the guarantees in */ +/* ERR_BNDS_NORM and ERR_BNDS_COMP may no longer be trustworthy. */ + +/* RTHRESH (input) DOUBLE PRECISION */ +/* Determines when to stop refinement if the error estimate stops */ +/* decreasing. Refinement will stop when the next solution no longer */ +/* satisfies norm(dx_{i+1}) < RTHRESH * norm(dx_i) where norm(Z) is */ +/* the infinity norm of Z. RTHRESH satisfies 0 < RTHRESH <= 1. The */ +/* default value is 0.5. For 'aggressive' set to 0.9 to permit */ +/* convergence on extremely ill-conditioned matrices. See LAWN 165 */ +/* for more details. */ + +/* DZ_UB (input) DOUBLE PRECISION */ +/* Determines when to start considering componentwise convergence. */ +/* Componentwise convergence is only considered after each component */ +/* of the solution Y is stable, which we definte as the relative */ +/* change in each component being less than DZ_UB. The default value */ +/* is 0.25, requiring the first bit to be stable. See LAWN 165 for */ +/* more details. */ + +/* IGNORE_CWISE (input) LOGICAL */ +/* If .TRUE. then ignore componentwise convergence. Default value */ +/* is .FALSE.. */ + +/* INFO (output) INTEGER */ +/* = 0: Successful exit. */ +/* < 0: if INFO = -i, the ith argument to DSYTRS had an illegal */ +/* value */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. Parameters .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + err_bnds_comp_dim1 = *nrhs; + err_bnds_comp_offset = 1 + err_bnds_comp_dim1; + err_bnds_comp__ -= err_bnds_comp_offset; + err_bnds_norm_dim1 = *nrhs; + err_bnds_norm_offset = 1 + err_bnds_norm_dim1; + err_bnds_norm__ -= err_bnds_norm_offset; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + af_dim1 = *ldaf; + af_offset = 1 + af_dim1; + af -= af_offset; + --ipiv; + --c__; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + y_dim1 = *ldy; + y_offset = 1 + y_dim1; + y -= y_offset; + --berr_out__; + --res; + --ayb; + --dy; + --y_tail__; + + /* Function Body */ + if (*info != 0) { + return 0; + } + eps = _starpu_dlamch_("Epsilon"); + hugeval = _starpu_dlamch_("Overflow"); +/* Force HUGEVAL to Inf */ + hugeval *= hugeval; +/* Using HUGEVAL may lead to spurious underflows. */ + incr_thresh__ = (doublereal) (*n) * eps; + if (_starpu_lsame_(uplo, "L")) { + uplo2 = _starpu_ilauplo_("L"); + } else { + uplo2 = _starpu_ilauplo_("U"); + } + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + y_prec_state__ = 1; + if (y_prec_state__ == 2) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + y_tail__[i__] = 0.; + } + } + dxrat = 0.; + dxratmax = 0.; + dzrat = 0.; + dzratmax = 0.; + final_dx_x__ = hugeval; + final_dz_z__ = hugeval; + prevnormdx = hugeval; + prev_dz_z__ = hugeval; + dz_z__ = hugeval; + dx_x__ = hugeval; + x_state__ = 1; + z_state__ = 0; + incr_prec__ = FALSE_; + i__2 = *ithresh; + for (cnt = 1; cnt <= i__2; ++cnt) { + +/* Compute residual RES = B_s - op(A_s) * Y, */ +/* op(A) = A, A**T, or A**H depending on TRANS (and type). */ + + _starpu_dcopy_(n, &b[j * b_dim1 + 1], &c__1, &res[1], &c__1); + if (y_prec_state__ == 0) { + _starpu_dsymv_(uplo, n, &c_b9, &a[a_offset], lda, &y[j * y_dim1 + 1], + &c__1, &c_b11, &res[1], &c__1); + } else if (y_prec_state__ == 1) { + _starpu_blas_dsymv_x__(&uplo2, n, &c_b9, &a[a_offset], lda, &y[j * + y_dim1 + 1], &c__1, &c_b11, &res[1], &c__1, + prec_type__); + } else { + blas_dsymv2_x__(&uplo2, n, &c_b9, &a[a_offset], lda, &y[j * + y_dim1 + 1], &y_tail__[1], &c__1, &c_b11, &res[1], & + c__1, prec_type__); + } +/* XXX: RES is no longer needed. */ + _starpu_dcopy_(n, &res[1], &c__1, &dy[1], &c__1); + _starpu_dsytrs_(uplo, n, nrhs, &af[af_offset], ldaf, &ipiv[1], &dy[1], n, + info); + +/* Calculate relative changes DX_X, DZ_Z and ratios DXRAT, DZRAT. */ + + normx = 0.; + normy = 0.; + normdx = 0.; + dz_z__ = 0.; + ymin = hugeval; + i__3 = *n; + for (i__ = 1; i__ <= i__3; ++i__) { + yk = (d__1 = y[i__ + j * y_dim1], abs(d__1)); + dyk = (d__1 = dy[i__], abs(d__1)); + if (yk != 0.) { +/* Computing MAX */ + d__1 = dz_z__, d__2 = dyk / yk; + dz_z__ = max(d__1,d__2); + } else if (dyk != 0.) { + dz_z__ = hugeval; + } + ymin = min(ymin,yk); + normy = max(normy,yk); + if (*colequ) { +/* Computing MAX */ + d__1 = normx, d__2 = yk * c__[i__]; + normx = max(d__1,d__2); +/* Computing MAX */ + d__1 = normdx, d__2 = dyk * c__[i__]; + normdx = max(d__1,d__2); + } else { + normx = normy; + normdx = max(normdx,dyk); + } + } + if (normx != 0.) { + dx_x__ = normdx / normx; + } else if (normdx == 0.) { + dx_x__ = 0.; + } else { + dx_x__ = hugeval; + } + dxrat = normdx / prevnormdx; + dzrat = dz_z__ / prev_dz_z__; + +/* Check termination criteria. */ + + if (ymin * *rcond < incr_thresh__ * normy && y_prec_state__ < 2) { + incr_prec__ = TRUE_; + } + if (x_state__ == 3 && dxrat <= *rthresh) { + x_state__ = 1; + } + if (x_state__ == 1) { + if (dx_x__ <= eps) { + x_state__ = 2; + } else if (dxrat > *rthresh) { + if (y_prec_state__ != 2) { + incr_prec__ = TRUE_; + } else { + x_state__ = 3; + } + } else { + if (dxrat > dxratmax) { + dxratmax = dxrat; + } + } + if (x_state__ > 1) { + final_dx_x__ = dx_x__; + } + } + if (z_state__ == 0 && dz_z__ <= *dz_ub__) { + z_state__ = 1; + } + if (z_state__ == 3 && dzrat <= *rthresh) { + z_state__ = 1; + } + if (z_state__ == 1) { + if (dz_z__ <= eps) { + z_state__ = 2; + } else if (dz_z__ > *dz_ub__) { + z_state__ = 0; + dzratmax = 0.; + final_dz_z__ = hugeval; + } else if (dzrat > *rthresh) { + if (y_prec_state__ != 2) { + incr_prec__ = TRUE_; + } else { + z_state__ = 3; + } + } else { + if (dzrat > dzratmax) { + dzratmax = dzrat; + } + } + if (z_state__ > 1) { + final_dz_z__ = dz_z__; + } + } + if (x_state__ != 1 && (*ignore_cwise__ || z_state__ != 1)) { + goto L666; + } + if (incr_prec__) { + incr_prec__ = FALSE_; + ++y_prec_state__; + i__3 = *n; + for (i__ = 1; i__ <= i__3; ++i__) { + y_tail__[i__] = 0.; + } + } + prevnormdx = normdx; + prev_dz_z__ = dz_z__; + +/* Update soluton. */ + + if (y_prec_state__ < 2) { + _starpu_daxpy_(n, &c_b11, &dy[1], &c__1, &y[j * y_dim1 + 1], &c__1); + } else { + _starpu_dla_wwaddw__(n, &y[j * y_dim1 + 1], &y_tail__[1], &dy[1]); + } + } +/* Target of "IF (Z_STOP .AND. X_STOP)". Sun's f77 won't EXIT. */ +L666: + +/* Set final_* when cnt hits ithresh. */ + + if (x_state__ == 1) { + final_dx_x__ = dx_x__; + } + if (z_state__ == 1) { + final_dz_z__ = dz_z__; + } + +/* Compute error bounds. */ + + if (*n_norms__ >= 1) { + err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] = final_dx_x__ / ( + 1 - dxratmax); + } + if (*n_norms__ >= 2) { + err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] = final_dz_z__ / ( + 1 - dzratmax); + } + +/* Compute componentwise relative backward error from formula */ +/* max(i) ( abs(R(i)) / ( abs(op(A_s))*abs(Y) + abs(B_s) )(i) ) */ +/* where abs(Z) is the componentwise absolute value of the matrix */ +/* or vector Z. */ + +/* Compute residual RES = B_s - op(A_s) * Y, */ +/* op(A) = A, A**T, or A**H depending on TRANS (and type). */ + _starpu_dcopy_(n, &b[j * b_dim1 + 1], &c__1, &res[1], &c__1); + _starpu_dsymv_(uplo, n, &c_b9, &a[a_offset], lda, &y[j * y_dim1 + 1], &c__1, & + c_b11, &res[1], &c__1); + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + ayb[i__] = (d__1 = b[i__ + j * b_dim1], abs(d__1)); + } + +/* Compute abs(op(A_s))*abs(Y) + abs(B_s). */ + + _starpu_dla_syamv__(&uplo2, n, &c_b11, &a[a_offset], lda, &y[j * y_dim1 + 1], + &c__1, &c_b11, &ayb[1], &c__1); + _starpu_dla_lin_berr__(n, n, &c__1, &res[1], &ayb[1], &berr_out__[j]); + +/* End of loop for each RHS. */ + + } + + return 0; +} /* _starpu_dla_syrfsx_extended__ */ diff --git a/min-dgels/base/SRC/dla_syrpvgrw.c b/min-dgels/base/SRC/dla_syrpvgrw.c new file mode 100644 index 0000000..e54d4e8 --- /dev/null +++ b/min-dgels/base/SRC/dla_syrpvgrw.c @@ -0,0 +1,330 @@ +/* _starpu_dla_syrpvgrw.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +doublereal _starpu_dla_syrpvgrw__(char *uplo, integer *n, integer *info, doublereal * + a, integer *lda, doublereal *af, integer *ldaf, integer *ipiv, + doublereal *work, ftnlen uplo_len) +{ + /* System generated locals */ + integer a_dim1, a_offset, af_dim1, af_offset, i__1, i__2; + doublereal ret_val, d__1, d__2, d__3; + + /* Local variables */ + integer i__, j, k, kp; + doublereal tmp, amax, umax; + extern logical _starpu_lsame_(char *, char *); + integer ncols; + logical upper; + doublereal rpvgrw; + + +/* -- LAPACK routine (version 3.2.1) -- */ +/* -- Contributed by James Demmel, Deaglan Halligan, Yozo Hida and -- */ +/* -- Jason Riedy of Univ. of California Berkeley. -- */ +/* -- April 2009 -- */ + +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley and NAG Ltd. -- */ + +/* .. */ +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLA_SYRPVGRW computes the reciprocal pivot growth factor */ +/* norm(A)/norm(U). The "max absolute element" norm is used. If this is */ +/* much less than 1, the stability of the LU factorization of the */ +/* (equilibrated) matrix A could be poor. This also means that the */ +/* solution X, estimated condition numbers, and error bounds could be */ +/* unreliable. */ + +/* Arguments */ +/* ========= */ + +/* UPLO (input) CHARACTER*1 */ +/* = 'U': Upper triangle of A is stored; */ +/* = 'L': Lower triangle of A is stored. */ + +/* N (input) INTEGER */ +/* The number of linear equations, i.e., the order of the */ +/* matrix A. N >= 0. */ + +/* INFO (input) INTEGER */ +/* The value of INFO returned from DSYTRF, .i.e., the pivot in */ +/* column INFO is exactly 0. */ + +/* NCOLS (input) INTEGER */ +/* The number of columns of the matrix A. NCOLS >= 0. */ + +/* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the N-by-N matrix A. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,N). */ + +/* AF (input) DOUBLE PRECISION array, dimension (LDAF,N) */ +/* The block diagonal matrix D and the multipliers used to */ +/* obtain the factor U or L as computed by DSYTRF. */ + +/* LDAF (input) INTEGER */ +/* The leading dimension of the array AF. LDAF >= max(1,N). */ + +/* IPIV (input) INTEGER array, dimension (N) */ +/* Details of the interchanges and the block structure of D */ +/* as determined by DSYTRF. */ + +/* WORK (input) DOUBLE PRECISION array, dimension (2*N) */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + af_dim1 = *ldaf; + af_offset = 1 + af_dim1; + af -= af_offset; + --ipiv; + --work; + + /* Function Body */ + upper = _starpu_lsame_("Upper", uplo); + if (*info == 0) { + if (upper) { + ncols = 1; + } else { + ncols = *n; + } + } else { + ncols = *info; + } + rpvgrw = 1.; + i__1 = *n << 1; + for (i__ = 1; i__ <= i__1; ++i__) { + work[i__] = 0.; + } + +/* Find the max magnitude entry of each column of A. Compute the max */ +/* for all N columns so we can apply the pivot permutation while */ +/* looping below. Assume a full factorization is the common case. */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { +/* Computing MAX */ + d__2 = (d__1 = a[i__ + j * a_dim1], abs(d__1)), d__3 = work[* + n + i__]; + work[*n + i__] = max(d__2,d__3); +/* Computing MAX */ + d__2 = (d__1 = a[i__ + j * a_dim1], abs(d__1)), d__3 = work[* + n + j]; + work[*n + j] = max(d__2,d__3); + } + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { +/* Computing MAX */ + d__2 = (d__1 = a[i__ + j * a_dim1], abs(d__1)), d__3 = work[* + n + i__]; + work[*n + i__] = max(d__2,d__3); +/* Computing MAX */ + d__2 = (d__1 = a[i__ + j * a_dim1], abs(d__1)), d__3 = work[* + n + j]; + work[*n + j] = max(d__2,d__3); + } + } + } + +/* Now find the max magnitude entry of each column of U or L. Also */ +/* permute the magnitudes of A above so they're in the same order as */ +/* the factor. */ + +/* The iteration orders and permutations were copied from dsytrs. */ +/* Calls to SSWAP would be severe overkill. */ + + if (upper) { + k = *n; + while(k < ncols && k > 0) { + if (ipiv[k] > 0) { +/* 1x1 pivot */ + kp = ipiv[k]; + if (kp != k) { + tmp = work[*n + k]; + work[*n + k] = work[*n + kp]; + work[*n + kp] = tmp; + } + i__1 = k; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing MAX */ + d__2 = (d__1 = af[i__ + k * af_dim1], abs(d__1)), d__3 = + work[k]; + work[k] = max(d__2,d__3); + } + --k; + } else { +/* 2x2 pivot */ + kp = -ipiv[k]; + tmp = work[*n + k - 1]; + work[*n + k - 1] = work[*n + kp]; + work[*n + kp] = tmp; + i__1 = k - 1; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing MAX */ + d__2 = (d__1 = af[i__ + k * af_dim1], abs(d__1)), d__3 = + work[k]; + work[k] = max(d__2,d__3); +/* Computing MAX */ + d__2 = (d__1 = af[i__ + (k - 1) * af_dim1], abs(d__1)), + d__3 = work[k - 1]; + work[k - 1] = max(d__2,d__3); + } +/* Computing MAX */ + d__2 = (d__1 = af[k + k * af_dim1], abs(d__1)), d__3 = work[k] + ; + work[k] = max(d__2,d__3); + k += -2; + } + } + k = ncols; + while(k <= *n) { + if (ipiv[k] > 0) { + kp = ipiv[k]; + if (kp != k) { + tmp = work[*n + k]; + work[*n + k] = work[*n + kp]; + work[*n + kp] = tmp; + } + ++k; + } else { + kp = -ipiv[k]; + tmp = work[*n + k]; + work[*n + k] = work[*n + kp]; + work[*n + kp] = tmp; + k += 2; + } + } + } else { + k = 1; + while(k <= ncols) { + if (ipiv[k] > 0) { +/* 1x1 pivot */ + kp = ipiv[k]; + if (kp != k) { + tmp = work[*n + k]; + work[*n + k] = work[*n + kp]; + work[*n + kp] = tmp; + } + i__1 = *n; + for (i__ = k; i__ <= i__1; ++i__) { +/* Computing MAX */ + d__2 = (d__1 = af[i__ + k * af_dim1], abs(d__1)), d__3 = + work[k]; + work[k] = max(d__2,d__3); + } + ++k; + } else { +/* 2x2 pivot */ + kp = -ipiv[k]; + tmp = work[*n + k + 1]; + work[*n + k + 1] = work[*n + kp]; + work[*n + kp] = tmp; + i__1 = *n; + for (i__ = k + 1; i__ <= i__1; ++i__) { +/* Computing MAX */ + d__2 = (d__1 = af[i__ + k * af_dim1], abs(d__1)), d__3 = + work[k]; + work[k] = max(d__2,d__3); +/* Computing MAX */ + d__2 = (d__1 = af[i__ + (k + 1) * af_dim1], abs(d__1)), + d__3 = work[k + 1]; + work[k + 1] = max(d__2,d__3); + } +/* Computing MAX */ + d__2 = (d__1 = af[k + k * af_dim1], abs(d__1)), d__3 = work[k] + ; + work[k] = max(d__2,d__3); + k += 2; + } + } + k = ncols; + while(k >= 1) { + if (ipiv[k] > 0) { + kp = ipiv[k]; + if (kp != k) { + tmp = work[*n + k]; + work[*n + k] = work[*n + kp]; + work[*n + kp] = tmp; + } + --k; + } else { + kp = -ipiv[k]; + tmp = work[*n + k]; + work[*n + k] = work[*n + kp]; + work[*n + kp] = tmp; + k += -2; + } + } + } + +/* Compute the *inverse* of the max element growth factor. Dividing */ +/* by zero would imply the largest entry of the factor's column is */ +/* zero. Than can happen when either the column of A is zero or */ +/* massive pivots made the factor underflow to zero. Neither counts */ +/* as growth in itself, so simply ignore terms with zero */ +/* denominators. */ + + if (upper) { + i__1 = *n; + for (i__ = ncols; i__ <= i__1; ++i__) { + umax = work[i__]; + amax = work[*n + i__]; + if (umax != 0.) { +/* Computing MIN */ + d__1 = amax / umax; + rpvgrw = min(d__1,rpvgrw); + } + } + } else { + i__1 = ncols; + for (i__ = 1; i__ <= i__1; ++i__) { + umax = work[i__]; + amax = work[*n + i__]; + if (umax != 0.) { +/* Computing MIN */ + d__1 = amax / umax; + rpvgrw = min(d__1,rpvgrw); + } + } + } + ret_val = rpvgrw; + return ret_val; +} /* _starpu_dla_syrpvgrw__ */ diff --git a/min-dgels/base/SRC/dla_wwaddw.c b/min-dgels/base/SRC/dla_wwaddw.c new file mode 100644 index 0000000..fc83155 --- /dev/null +++ b/min-dgels/base/SRC/dla_wwaddw.c @@ -0,0 +1,80 @@ +/* _starpu_dla_wwaddw.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dla_wwaddw__(integer *n, doublereal *x, doublereal *y, + doublereal *w) +{ + /* System generated locals */ + integer i__1; + + /* Local variables */ + integer i__; + doublereal s; + + +/* -- LAPACK routine (version 3.2) -- */ +/* -- Contributed by James Demmel, Deaglan Halligan, Yozo Hida and -- */ +/* -- Jason Riedy of Univ. of California Berkeley. -- */ +/* -- November 2008 -- */ + +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley and NAG Ltd. -- */ + +/* .. */ +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLA_WWADDW adds a vector W into a doubled-single vector (X, Y). */ + +/* This works for all extant IBM's hex and binary floating point */ +/* arithmetics, but not for decimal. */ + +/* Arguments */ +/* ========= */ + +/* N (input) INTEGER */ +/* The length of vectors X, Y, and W. */ + +/* X, Y (input/output) DOUBLE PRECISION array, length N */ +/* The doubled-single accumulation vector. */ + +/* W (input) DOUBLE PRECISION array, length N */ +/* The vector to be added. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + --w; + --y; + --x; + + /* Function Body */ + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + s = x[i__] + w[i__]; + s = s + s - s; + y[i__] = x[i__] - s + w[i__] + y[i__]; + x[i__] = s; +/* L10: */ + } + return 0; +} /* _starpu_dla_wwaddw__ */ diff --git a/min-dgels/base/SRC/dlabad.c b/min-dgels/base/SRC/dlabad.c new file mode 100644 index 0000000..dda6e97 --- /dev/null +++ b/min-dgels/base/SRC/dlabad.c @@ -0,0 +1,72 @@ +/* dlabad.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dlabad_(doublereal *small, doublereal *large) +{ + /* Builtin functions */ + double d_lg10(doublereal *), sqrt(doublereal); + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLABAD takes as input the values computed by DLAMCH for underflow and */ +/* overflow, and returns the square root of each of these values if the */ +/* log of LARGE is sufficiently large. This subroutine is intended to */ +/* identify machines with a large exponent range, such as the Crays, and */ +/* redefine the underflow and overflow limits to be the square roots of */ +/* the values computed by DLAMCH. This subroutine is needed because */ +/* DLAMCH does not compensate for poor arithmetic in the upper half of */ +/* the exponent range, as is found on a Cray. */ + +/* Arguments */ +/* ========= */ + +/* SMALL (input/output) DOUBLE PRECISION */ +/* On entry, the underflow threshold as computed by DLAMCH. */ +/* On exit, if LOG10(LARGE) is sufficiently large, the square */ +/* root of SMALL, otherwise unchanged. */ + +/* LARGE (input/output) DOUBLE PRECISION */ +/* On entry, the overflow threshold as computed by DLAMCH. */ +/* On exit, if LOG10(LARGE) is sufficiently large, the square */ +/* root of LARGE, otherwise unchanged. */ + +/* ===================================================================== */ + +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* If it looks like we're on a Cray, take the square root of */ +/* SMALL and LARGE to avoid overflow and underflow problems. */ + + if (d_lg10(large) > 2e3) { + *small = sqrt(*small); + *large = sqrt(*large); + } + + return 0; + +/* End of DLABAD */ + +} /* _starpu_dlabad_ */ diff --git a/min-dgels/base/SRC/dlabrd.c b/min-dgels/base/SRC/dlabrd.c new file mode 100644 index 0000000..45c7138 --- /dev/null +++ b/min-dgels/base/SRC/dlabrd.c @@ -0,0 +1,434 @@ +/* dlabrd.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static doublereal c_b4 = -1.; +static doublereal c_b5 = 1.; +static integer c__1 = 1; +static doublereal c_b16 = 0.; + +/* Subroutine */ int _starpu_dlabrd_(integer *m, integer *n, integer *nb, doublereal * + a, integer *lda, doublereal *d__, doublereal *e, doublereal *tauq, + doublereal *taup, doublereal *x, integer *ldx, doublereal *y, integer + *ldy) +{ + /* System generated locals */ + integer a_dim1, a_offset, x_dim1, x_offset, y_dim1, y_offset, i__1, i__2, + i__3; + + /* Local variables */ + integer i__; + extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, + integer *), _starpu_dgemv_(char *, integer *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *), _starpu_dlarfg_(integer *, doublereal *, + doublereal *, integer *, doublereal *); + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLABRD reduces the first NB rows and columns of a real general */ +/* m by n matrix A to upper or lower bidiagonal form by an orthogonal */ +/* transformation Q' * A * P, and returns the matrices X and Y which */ +/* are needed to apply the transformation to the unreduced part of A. */ + +/* If m >= n, A is reduced to upper bidiagonal form; if m < n, to lower */ +/* bidiagonal form. */ + +/* This is an auxiliary routine called by DGEBRD */ + +/* Arguments */ +/* ========= */ + +/* M (input) INTEGER */ +/* The number of rows in the matrix A. */ + +/* N (input) INTEGER */ +/* The number of columns in the matrix A. */ + +/* NB (input) INTEGER */ +/* The number of leading rows and columns of A to be reduced. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the m by n general matrix to be reduced. */ +/* On exit, the first NB rows and columns of the matrix are */ +/* overwritten; the rest of the array is unchanged. */ +/* If m >= n, elements on and below the diagonal in the first NB */ +/* columns, with the array TAUQ, represent the orthogonal */ +/* matrix Q as a product of elementary reflectors; and */ +/* elements above the diagonal in the first NB rows, with the */ +/* array TAUP, represent the orthogonal matrix P as a product */ +/* of elementary reflectors. */ +/* If m < n, elements below the diagonal in the first NB */ +/* columns, with the array TAUQ, represent the orthogonal */ +/* matrix Q as a product of elementary reflectors, and */ +/* elements on and above the diagonal in the first NB rows, */ +/* with the array TAUP, represent the orthogonal matrix P as */ +/* a product of elementary reflectors. */ +/* See Further Details. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,M). */ + +/* D (output) DOUBLE PRECISION array, dimension (NB) */ +/* The diagonal elements of the first NB rows and columns of */ +/* the reduced matrix. D(i) = A(i,i). */ + +/* E (output) DOUBLE PRECISION array, dimension (NB) */ +/* The off-diagonal elements of the first NB rows and columns of */ +/* the reduced matrix. */ + +/* TAUQ (output) DOUBLE PRECISION array dimension (NB) */ +/* The scalar factors of the elementary reflectors which */ +/* represent the orthogonal matrix Q. See Further Details. */ + +/* TAUP (output) DOUBLE PRECISION array, dimension (NB) */ +/* The scalar factors of the elementary reflectors which */ +/* represent the orthogonal matrix P. See Further Details. */ + +/* X (output) DOUBLE PRECISION array, dimension (LDX,NB) */ +/* The m-by-nb matrix X required to update the unreduced part */ +/* of A. */ + +/* LDX (input) INTEGER */ +/* The leading dimension of the array X. LDX >= M. */ + +/* Y (output) DOUBLE PRECISION array, dimension (LDY,NB) */ +/* The n-by-nb matrix Y required to update the unreduced part */ +/* of A. */ + +/* LDY (input) INTEGER */ +/* The leading dimension of the array Y. LDY >= N. */ + +/* Further Details */ +/* =============== */ + +/* The matrices Q and P are represented as products of elementary */ +/* reflectors: */ + +/* Q = H(1) H(2) . . . H(nb) and P = G(1) G(2) . . . G(nb) */ + +/* Each H(i) and G(i) has the form: */ + +/* H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' */ + +/* where tauq and taup are real scalars, and v and u are real vectors. */ + +/* If m >= n, v(1:i-1) = 0, v(i) = 1, and v(i:m) is stored on exit in */ +/* A(i:m,i); u(1:i) = 0, u(i+1) = 1, and u(i+1:n) is stored on exit in */ +/* A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i). */ + +/* If m < n, v(1:i) = 0, v(i+1) = 1, and v(i+1:m) is stored on exit in */ +/* A(i+2:m,i); u(1:i-1) = 0, u(i) = 1, and u(i:n) is stored on exit in */ +/* A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i). */ + +/* The elements of the vectors v and u together form the m-by-nb matrix */ +/* V and the nb-by-n matrix U' which are needed, with X and Y, to apply */ +/* the transformation to the unreduced part of the matrix, using a block */ +/* update of the form: A := A - V*Y' - X*U'. */ + +/* The contents of A on exit are illustrated by the following examples */ +/* with nb = 2: */ + +/* m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): */ + +/* ( 1 1 u1 u1 u1 ) ( 1 u1 u1 u1 u1 u1 ) */ +/* ( v1 1 1 u2 u2 ) ( 1 1 u2 u2 u2 u2 ) */ +/* ( v1 v2 a a a ) ( v1 1 a a a a ) */ +/* ( v1 v2 a a a ) ( v1 v2 a a a a ) */ +/* ( v1 v2 a a a ) ( v1 v2 a a a a ) */ +/* ( v1 v2 a a a ) */ + +/* where a denotes an element of the original matrix which is unchanged, */ +/* vi denotes an element of the vector defining H(i), and ui an element */ +/* of the vector defining G(i). */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Quick return if possible */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --d__; + --e; + --tauq; + --taup; + x_dim1 = *ldx; + x_offset = 1 + x_dim1; + x -= x_offset; + y_dim1 = *ldy; + y_offset = 1 + y_dim1; + y -= y_offset; + + /* Function Body */ + if (*m <= 0 || *n <= 0) { + return 0; + } + + if (*m >= *n) { + +/* Reduce to upper bidiagonal form */ + + i__1 = *nb; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Update A(i:m,i) */ + + i__2 = *m - i__ + 1; + i__3 = i__ - 1; + _starpu_dgemv_("No transpose", &i__2, &i__3, &c_b4, &a[i__ + a_dim1], lda, + &y[i__ + y_dim1], ldy, &c_b5, &a[i__ + i__ * a_dim1], & + c__1); + i__2 = *m - i__ + 1; + i__3 = i__ - 1; + _starpu_dgemv_("No transpose", &i__2, &i__3, &c_b4, &x[i__ + x_dim1], ldx, + &a[i__ * a_dim1 + 1], &c__1, &c_b5, &a[i__ + i__ * + a_dim1], &c__1); + +/* Generate reflection Q(i) to annihilate A(i+1:m,i) */ + + i__2 = *m - i__ + 1; +/* Computing MIN */ + i__3 = i__ + 1; + _starpu_dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[min(i__3, *m)+ i__ * + a_dim1], &c__1, &tauq[i__]); + d__[i__] = a[i__ + i__ * a_dim1]; + if (i__ < *n) { + a[i__ + i__ * a_dim1] = 1.; + +/* Compute Y(i+1:n,i) */ + + i__2 = *m - i__ + 1; + i__3 = *n - i__; + _starpu_dgemv_("Transpose", &i__2, &i__3, &c_b5, &a[i__ + (i__ + 1) * + a_dim1], lda, &a[i__ + i__ * a_dim1], &c__1, &c_b16, & + y[i__ + 1 + i__ * y_dim1], &c__1); + i__2 = *m - i__ + 1; + i__3 = i__ - 1; + _starpu_dgemv_("Transpose", &i__2, &i__3, &c_b5, &a[i__ + a_dim1], + lda, &a[i__ + i__ * a_dim1], &c__1, &c_b16, &y[i__ * + y_dim1 + 1], &c__1); + i__2 = *n - i__; + i__3 = i__ - 1; + _starpu_dgemv_("No transpose", &i__2, &i__3, &c_b4, &y[i__ + 1 + + y_dim1], ldy, &y[i__ * y_dim1 + 1], &c__1, &c_b5, &y[ + i__ + 1 + i__ * y_dim1], &c__1); + i__2 = *m - i__ + 1; + i__3 = i__ - 1; + _starpu_dgemv_("Transpose", &i__2, &i__3, &c_b5, &x[i__ + x_dim1], + ldx, &a[i__ + i__ * a_dim1], &c__1, &c_b16, &y[i__ * + y_dim1 + 1], &c__1); + i__2 = i__ - 1; + i__3 = *n - i__; + _starpu_dgemv_("Transpose", &i__2, &i__3, &c_b4, &a[(i__ + 1) * + a_dim1 + 1], lda, &y[i__ * y_dim1 + 1], &c__1, &c_b5, + &y[i__ + 1 + i__ * y_dim1], &c__1); + i__2 = *n - i__; + _starpu_dscal_(&i__2, &tauq[i__], &y[i__ + 1 + i__ * y_dim1], &c__1); + +/* Update A(i,i+1:n) */ + + i__2 = *n - i__; + _starpu_dgemv_("No transpose", &i__2, &i__, &c_b4, &y[i__ + 1 + + y_dim1], ldy, &a[i__ + a_dim1], lda, &c_b5, &a[i__ + ( + i__ + 1) * a_dim1], lda); + i__2 = i__ - 1; + i__3 = *n - i__; + _starpu_dgemv_("Transpose", &i__2, &i__3, &c_b4, &a[(i__ + 1) * + a_dim1 + 1], lda, &x[i__ + x_dim1], ldx, &c_b5, &a[ + i__ + (i__ + 1) * a_dim1], lda); + +/* Generate reflection P(i) to annihilate A(i,i+2:n) */ + + i__2 = *n - i__; +/* Computing MIN */ + i__3 = i__ + 2; + _starpu_dlarfg_(&i__2, &a[i__ + (i__ + 1) * a_dim1], &a[i__ + min( + i__3, *n)* a_dim1], lda, &taup[i__]); + e[i__] = a[i__ + (i__ + 1) * a_dim1]; + a[i__ + (i__ + 1) * a_dim1] = 1.; + +/* Compute X(i+1:m,i) */ + + i__2 = *m - i__; + i__3 = *n - i__; + _starpu_dgemv_("No transpose", &i__2, &i__3, &c_b5, &a[i__ + 1 + (i__ + + 1) * a_dim1], lda, &a[i__ + (i__ + 1) * a_dim1], + lda, &c_b16, &x[i__ + 1 + i__ * x_dim1], &c__1); + i__2 = *n - i__; + _starpu_dgemv_("Transpose", &i__2, &i__, &c_b5, &y[i__ + 1 + y_dim1], + ldy, &a[i__ + (i__ + 1) * a_dim1], lda, &c_b16, &x[ + i__ * x_dim1 + 1], &c__1); + i__2 = *m - i__; + _starpu_dgemv_("No transpose", &i__2, &i__, &c_b4, &a[i__ + 1 + + a_dim1], lda, &x[i__ * x_dim1 + 1], &c__1, &c_b5, &x[ + i__ + 1 + i__ * x_dim1], &c__1); + i__2 = i__ - 1; + i__3 = *n - i__; + _starpu_dgemv_("No transpose", &i__2, &i__3, &c_b5, &a[(i__ + 1) * + a_dim1 + 1], lda, &a[i__ + (i__ + 1) * a_dim1], lda, & + c_b16, &x[i__ * x_dim1 + 1], &c__1); + i__2 = *m - i__; + i__3 = i__ - 1; + _starpu_dgemv_("No transpose", &i__2, &i__3, &c_b4, &x[i__ + 1 + + x_dim1], ldx, &x[i__ * x_dim1 + 1], &c__1, &c_b5, &x[ + i__ + 1 + i__ * x_dim1], &c__1); + i__2 = *m - i__; + _starpu_dscal_(&i__2, &taup[i__], &x[i__ + 1 + i__ * x_dim1], &c__1); + } +/* L10: */ + } + } else { + +/* Reduce to lower bidiagonal form */ + + i__1 = *nb; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Update A(i,i:n) */ + + i__2 = *n - i__ + 1; + i__3 = i__ - 1; + _starpu_dgemv_("No transpose", &i__2, &i__3, &c_b4, &y[i__ + y_dim1], ldy, + &a[i__ + a_dim1], lda, &c_b5, &a[i__ + i__ * a_dim1], + lda); + i__2 = i__ - 1; + i__3 = *n - i__ + 1; + _starpu_dgemv_("Transpose", &i__2, &i__3, &c_b4, &a[i__ * a_dim1 + 1], + lda, &x[i__ + x_dim1], ldx, &c_b5, &a[i__ + i__ * a_dim1], + lda); + +/* Generate reflection P(i) to annihilate A(i,i+1:n) */ + + i__2 = *n - i__ + 1; +/* Computing MIN */ + i__3 = i__ + 1; + _starpu_dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[i__ + min(i__3, *n)* + a_dim1], lda, &taup[i__]); + d__[i__] = a[i__ + i__ * a_dim1]; + if (i__ < *m) { + a[i__ + i__ * a_dim1] = 1.; + +/* Compute X(i+1:m,i) */ + + i__2 = *m - i__; + i__3 = *n - i__ + 1; + _starpu_dgemv_("No transpose", &i__2, &i__3, &c_b5, &a[i__ + 1 + i__ * + a_dim1], lda, &a[i__ + i__ * a_dim1], lda, &c_b16, & + x[i__ + 1 + i__ * x_dim1], &c__1); + i__2 = *n - i__ + 1; + i__3 = i__ - 1; + _starpu_dgemv_("Transpose", &i__2, &i__3, &c_b5, &y[i__ + y_dim1], + ldy, &a[i__ + i__ * a_dim1], lda, &c_b16, &x[i__ * + x_dim1 + 1], &c__1); + i__2 = *m - i__; + i__3 = i__ - 1; + _starpu_dgemv_("No transpose", &i__2, &i__3, &c_b4, &a[i__ + 1 + + a_dim1], lda, &x[i__ * x_dim1 + 1], &c__1, &c_b5, &x[ + i__ + 1 + i__ * x_dim1], &c__1); + i__2 = i__ - 1; + i__3 = *n - i__ + 1; + _starpu_dgemv_("No transpose", &i__2, &i__3, &c_b5, &a[i__ * a_dim1 + + 1], lda, &a[i__ + i__ * a_dim1], lda, &c_b16, &x[i__ * + x_dim1 + 1], &c__1); + i__2 = *m - i__; + i__3 = i__ - 1; + _starpu_dgemv_("No transpose", &i__2, &i__3, &c_b4, &x[i__ + 1 + + x_dim1], ldx, &x[i__ * x_dim1 + 1], &c__1, &c_b5, &x[ + i__ + 1 + i__ * x_dim1], &c__1); + i__2 = *m - i__; + _starpu_dscal_(&i__2, &taup[i__], &x[i__ + 1 + i__ * x_dim1], &c__1); + +/* Update A(i+1:m,i) */ + + i__2 = *m - i__; + i__3 = i__ - 1; + _starpu_dgemv_("No transpose", &i__2, &i__3, &c_b4, &a[i__ + 1 + + a_dim1], lda, &y[i__ + y_dim1], ldy, &c_b5, &a[i__ + + 1 + i__ * a_dim1], &c__1); + i__2 = *m - i__; + _starpu_dgemv_("No transpose", &i__2, &i__, &c_b4, &x[i__ + 1 + + x_dim1], ldx, &a[i__ * a_dim1 + 1], &c__1, &c_b5, &a[ + i__ + 1 + i__ * a_dim1], &c__1); + +/* Generate reflection Q(i) to annihilate A(i+2:m,i) */ + + i__2 = *m - i__; +/* Computing MIN */ + i__3 = i__ + 2; + _starpu_dlarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3, *m)+ + i__ * a_dim1], &c__1, &tauq[i__]); + e[i__] = a[i__ + 1 + i__ * a_dim1]; + a[i__ + 1 + i__ * a_dim1] = 1.; + +/* Compute Y(i+1:n,i) */ + + i__2 = *m - i__; + i__3 = *n - i__; + _starpu_dgemv_("Transpose", &i__2, &i__3, &c_b5, &a[i__ + 1 + (i__ + + 1) * a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, + &c_b16, &y[i__ + 1 + i__ * y_dim1], &c__1); + i__2 = *m - i__; + i__3 = i__ - 1; + _starpu_dgemv_("Transpose", &i__2, &i__3, &c_b5, &a[i__ + 1 + a_dim1], + lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b16, &y[ + i__ * y_dim1 + 1], &c__1); + i__2 = *n - i__; + i__3 = i__ - 1; + _starpu_dgemv_("No transpose", &i__2, &i__3, &c_b4, &y[i__ + 1 + + y_dim1], ldy, &y[i__ * y_dim1 + 1], &c__1, &c_b5, &y[ + i__ + 1 + i__ * y_dim1], &c__1); + i__2 = *m - i__; + _starpu_dgemv_("Transpose", &i__2, &i__, &c_b5, &x[i__ + 1 + x_dim1], + ldx, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b16, &y[ + i__ * y_dim1 + 1], &c__1); + i__2 = *n - i__; + _starpu_dgemv_("Transpose", &i__, &i__2, &c_b4, &a[(i__ + 1) * a_dim1 + + 1], lda, &y[i__ * y_dim1 + 1], &c__1, &c_b5, &y[i__ + + 1 + i__ * y_dim1], &c__1); + i__2 = *n - i__; + _starpu_dscal_(&i__2, &tauq[i__], &y[i__ + 1 + i__ * y_dim1], &c__1); + } +/* L20: */ + } + } + return 0; + +/* End of DLABRD */ + +} /* _starpu_dlabrd_ */ diff --git a/min-dgels/base/SRC/dlacn2.c b/min-dgels/base/SRC/dlacn2.c new file mode 100644 index 0000000..6103198 --- /dev/null +++ b/min-dgels/base/SRC/dlacn2.c @@ -0,0 +1,267 @@ +/* dlacn2.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static doublereal c_b11 = 1.; + +/* Subroutine */ int _starpu_dlacn2_(integer *n, doublereal *v, doublereal *x, + integer *isgn, doublereal *est, integer *kase, integer *isave) +{ + /* System generated locals */ + integer i__1; + doublereal d__1; + + /* Builtin functions */ + double d_sign(doublereal *, doublereal *); + integer i_dnnt(doublereal *); + + /* Local variables */ + integer i__; + doublereal temp; + extern doublereal _starpu_dasum_(integer *, doublereal *, integer *); + integer jlast; + extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *); + extern integer _starpu_idamax_(integer *, doublereal *, integer *); + doublereal altsgn, estold; + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLACN2 estimates the 1-norm of a square, real matrix A. */ +/* Reverse communication is used for evaluating matrix-vector products. */ + +/* Arguments */ +/* ========= */ + +/* N (input) INTEGER */ +/* The order of the matrix. N >= 1. */ + +/* V (workspace) DOUBLE PRECISION array, dimension (N) */ +/* On the final return, V = A*W, where EST = norm(V)/norm(W) */ +/* (W is not returned). */ + +/* X (input/output) DOUBLE PRECISION array, dimension (N) */ +/* On an intermediate return, X should be overwritten by */ +/* A * X, if KASE=1, */ +/* A' * X, if KASE=2, */ +/* and DLACN2 must be re-called with all the other parameters */ +/* unchanged. */ + +/* ISGN (workspace) INTEGER array, dimension (N) */ + +/* EST (input/output) DOUBLE PRECISION */ +/* On entry with KASE = 1 or 2 and ISAVE(1) = 3, EST should be */ +/* unchanged from the previous call to DLACN2. */ +/* On exit, EST is an estimate (a lower bound) for norm(A). */ + +/* KASE (input/output) INTEGER */ +/* On the initial call to DLACN2, KASE should be 0. */ +/* On an intermediate return, KASE will be 1 or 2, indicating */ +/* whether X should be overwritten by A * X or A' * X. */ +/* On the final return from DLACN2, KASE will again be 0. */ + +/* ISAVE (input/output) INTEGER array, dimension (3) */ +/* ISAVE is used to save variables between calls to DLACN2 */ + +/* Further Details */ +/* ======= ======= */ + +/* Contributed by Nick Higham, University of Manchester. */ +/* Originally named SONEST, dated March 16, 1988. */ + +/* Reference: N.J. Higham, "FORTRAN codes for estimating the one-norm of */ +/* a real or complex matrix, with applications to condition estimation", */ +/* ACM Trans. Math. Soft., vol. 14, no. 4, pp. 381-396, December 1988. */ + +/* This is a thread safe version of DLACON, which uses the array ISAVE */ +/* in place of a SAVE statement, as follows: */ + +/* DLACON DLACN2 */ +/* JUMP ISAVE(1) */ +/* J ISAVE(2) */ +/* ITER ISAVE(3) */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + --isave; + --isgn; + --x; + --v; + + /* Function Body */ + if (*kase == 0) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + x[i__] = 1. / (doublereal) (*n); +/* L10: */ + } + *kase = 1; + isave[1] = 1; + return 0; + } + + switch (isave[1]) { + case 1: goto L20; + case 2: goto L40; + case 3: goto L70; + case 4: goto L110; + case 5: goto L140; + } + +/* ................ ENTRY (ISAVE( 1 ) = 1) */ +/* FIRST ITERATION. X HAS BEEN OVERWRITTEN BY A*X. */ + +L20: + if (*n == 1) { + v[1] = x[1]; + *est = abs(v[1]); +/* ... QUIT */ + goto L150; + } + *est = _starpu_dasum_(n, &x[1], &c__1); + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + x[i__] = d_sign(&c_b11, &x[i__]); + isgn[i__] = i_dnnt(&x[i__]); +/* L30: */ + } + *kase = 2; + isave[1] = 2; + return 0; + +/* ................ ENTRY (ISAVE( 1 ) = 2) */ +/* FIRST ITERATION. X HAS BEEN OVERWRITTEN BY TRANSPOSE(A)*X. */ + +L40: + isave[2] = _starpu_idamax_(n, &x[1], &c__1); + isave[3] = 2; + +/* MAIN LOOP - ITERATIONS 2,3,...,ITMAX. */ + +L50: + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + x[i__] = 0.; +/* L60: */ + } + x[isave[2]] = 1.; + *kase = 1; + isave[1] = 3; + return 0; + +/* ................ ENTRY (ISAVE( 1 ) = 3) */ +/* X HAS BEEN OVERWRITTEN BY A*X. */ + +L70: + _starpu_dcopy_(n, &x[1], &c__1, &v[1], &c__1); + estold = *est; + *est = _starpu_dasum_(n, &v[1], &c__1); + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + d__1 = d_sign(&c_b11, &x[i__]); + if (i_dnnt(&d__1) != isgn[i__]) { + goto L90; + } +/* L80: */ + } +/* REPEATED SIGN VECTOR DETECTED, HENCE ALGORITHM HAS CONVERGED. */ + goto L120; + +L90: +/* TEST FOR CYCLING. */ + if (*est <= estold) { + goto L120; + } + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + x[i__] = d_sign(&c_b11, &x[i__]); + isgn[i__] = i_dnnt(&x[i__]); +/* L100: */ + } + *kase = 2; + isave[1] = 4; + return 0; + +/* ................ ENTRY (ISAVE( 1 ) = 4) */ +/* X HAS BEEN OVERWRITTEN BY TRANSPOSE(A)*X. */ + +L110: + jlast = isave[2]; + isave[2] = _starpu_idamax_(n, &x[1], &c__1); + if (x[jlast] != (d__1 = x[isave[2]], abs(d__1)) && isave[3] < 5) { + ++isave[3]; + goto L50; + } + +/* ITERATION COMPLETE. FINAL STAGE. */ + +L120: + altsgn = 1.; + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + x[i__] = altsgn * ((doublereal) (i__ - 1) / (doublereal) (*n - 1) + + 1.); + altsgn = -altsgn; +/* L130: */ + } + *kase = 1; + isave[1] = 5; + return 0; + +/* ................ ENTRY (ISAVE( 1 ) = 5) */ +/* X HAS BEEN OVERWRITTEN BY A*X. */ + +L140: + temp = _starpu_dasum_(n, &x[1], &c__1) / (doublereal) (*n * 3) * 2.; + if (temp > *est) { + _starpu_dcopy_(n, &x[1], &c__1, &v[1], &c__1); + *est = temp; + } + +L150: + *kase = 0; + return 0; + +/* End of DLACN2 */ + +} /* _starpu_dlacn2_ */ diff --git a/min-dgels/base/SRC/dlacon.c b/min-dgels/base/SRC/dlacon.c new file mode 100644 index 0000000..0f4ec58 --- /dev/null +++ b/min-dgels/base/SRC/dlacon.c @@ -0,0 +1,258 @@ +/* dlacon.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static doublereal c_b11 = 1.; + +/* Subroutine */ int _starpu_dlacon_(integer *n, doublereal *v, doublereal *x, + integer *isgn, doublereal *est, integer *kase) +{ + /* System generated locals */ + integer i__1; + doublereal d__1; + + /* Builtin functions */ + double d_sign(doublereal *, doublereal *); + integer i_dnnt(doublereal *); + + /* Local variables */ + static integer i__, j, iter; + static doublereal temp; + static integer jump; + extern doublereal _starpu_dasum_(integer *, doublereal *, integer *); + static integer jlast; + extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *); + extern integer _starpu_idamax_(integer *, doublereal *, integer *); + static doublereal altsgn, estold; + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLACON estimates the 1-norm of a square, real matrix A. */ +/* Reverse communication is used for evaluating matrix-vector products. */ + +/* Arguments */ +/* ========= */ + +/* N (input) INTEGER */ +/* The order of the matrix. N >= 1. */ + +/* V (workspace) DOUBLE PRECISION array, dimension (N) */ +/* On the final return, V = A*W, where EST = norm(V)/norm(W) */ +/* (W is not returned). */ + +/* X (input/output) DOUBLE PRECISION array, dimension (N) */ +/* On an intermediate return, X should be overwritten by */ +/* A * X, if KASE=1, */ +/* A' * X, if KASE=2, */ +/* and DLACON must be re-called with all the other parameters */ +/* unchanged. */ + +/* ISGN (workspace) INTEGER array, dimension (N) */ + +/* EST (input/output) DOUBLE PRECISION */ +/* On entry with KASE = 1 or 2 and JUMP = 3, EST should be */ +/* unchanged from the previous call to DLACON. */ +/* On exit, EST is an estimate (a lower bound) for norm(A). */ + +/* KASE (input/output) INTEGER */ +/* On the initial call to DLACON, KASE should be 0. */ +/* On an intermediate return, KASE will be 1 or 2, indicating */ +/* whether X should be overwritten by A * X or A' * X. */ +/* On the final return from DLACON, KASE will again be 0. */ + +/* Further Details */ +/* ======= ======= */ + +/* Contributed by Nick Higham, University of Manchester. */ +/* Originally named SONEST, dated March 16, 1988. */ + +/* Reference: N.J. Higham, "FORTRAN codes for estimating the one-norm of */ +/* a real or complex matrix, with applications to condition estimation", */ +/* ACM Trans. Math. Soft., vol. 14, no. 4, pp. 381-396, December 1988. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Save statement .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + --isgn; + --x; + --v; + + /* Function Body */ + if (*kase == 0) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + x[i__] = 1. / (doublereal) (*n); +/* L10: */ + } + *kase = 1; + jump = 1; + return 0; + } + + switch (jump) { + case 1: goto L20; + case 2: goto L40; + case 3: goto L70; + case 4: goto L110; + case 5: goto L140; + } + +/* ................ ENTRY (JUMP = 1) */ +/* FIRST ITERATION. X HAS BEEN OVERWRITTEN BY A*X. */ + +L20: + if (*n == 1) { + v[1] = x[1]; + *est = abs(v[1]); +/* ... QUIT */ + goto L150; + } + *est = _starpu_dasum_(n, &x[1], &c__1); + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + x[i__] = d_sign(&c_b11, &x[i__]); + isgn[i__] = i_dnnt(&x[i__]); +/* L30: */ + } + *kase = 2; + jump = 2; + return 0; + +/* ................ ENTRY (JUMP = 2) */ +/* FIRST ITERATION. X HAS BEEN OVERWRITTEN BY TRANSPOSE(A)*X. */ + +L40: + j = _starpu_idamax_(n, &x[1], &c__1); + iter = 2; + +/* MAIN LOOP - ITERATIONS 2,3,...,ITMAX. */ + +L50: + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + x[i__] = 0.; +/* L60: */ + } + x[j] = 1.; + *kase = 1; + jump = 3; + return 0; + +/* ................ ENTRY (JUMP = 3) */ +/* X HAS BEEN OVERWRITTEN BY A*X. */ + +L70: + _starpu_dcopy_(n, &x[1], &c__1, &v[1], &c__1); + estold = *est; + *est = _starpu_dasum_(n, &v[1], &c__1); + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + d__1 = d_sign(&c_b11, &x[i__]); + if (i_dnnt(&d__1) != isgn[i__]) { + goto L90; + } +/* L80: */ + } +/* REPEATED SIGN VECTOR DETECTED, HENCE ALGORITHM HAS CONVERGED. */ + goto L120; + +L90: +/* TEST FOR CYCLING. */ + if (*est <= estold) { + goto L120; + } + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + x[i__] = d_sign(&c_b11, &x[i__]); + isgn[i__] = i_dnnt(&x[i__]); +/* L100: */ + } + *kase = 2; + jump = 4; + return 0; + +/* ................ ENTRY (JUMP = 4) */ +/* X HAS BEEN OVERWRITTEN BY TRANSPOSE(A)*X. */ + +L110: + jlast = j; + j = _starpu_idamax_(n, &x[1], &c__1); + if (x[jlast] != (d__1 = x[j], abs(d__1)) && iter < 5) { + ++iter; + goto L50; + } + +/* ITERATION COMPLETE. FINAL STAGE. */ + +L120: + altsgn = 1.; + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + x[i__] = altsgn * ((doublereal) (i__ - 1) / (doublereal) (*n - 1) + + 1.); + altsgn = -altsgn; +/* L130: */ + } + *kase = 1; + jump = 5; + return 0; + +/* ................ ENTRY (JUMP = 5) */ +/* X HAS BEEN OVERWRITTEN BY A*X. */ + +L140: + temp = _starpu_dasum_(n, &x[1], &c__1) / (doublereal) (*n * 3) * 2.; + if (temp > *est) { + _starpu_dcopy_(n, &x[1], &c__1, &v[1], &c__1); + *est = temp; + } + +L150: + *kase = 0; + return 0; + +/* End of DLACON */ + +} /* _starpu_dlacon_ */ diff --git a/min-dgels/base/SRC/dlacpy.c b/min-dgels/base/SRC/dlacpy.c new file mode 100644 index 0000000..04faa62 --- /dev/null +++ b/min-dgels/base/SRC/dlacpy.c @@ -0,0 +1,125 @@ +/* dlacpy.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dlacpy_(char *uplo, integer *m, integer *n, doublereal * + a, integer *lda, doublereal *b, integer *ldb) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2; + + /* Local variables */ + integer i__, j; + extern logical _starpu_lsame_(char *, char *); + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLACPY copies all or part of a two-dimensional matrix A to another */ +/* matrix B. */ + +/* Arguments */ +/* ========= */ + +/* UPLO (input) CHARACTER*1 */ +/* Specifies the part of the matrix A to be copied to B. */ +/* = 'U': Upper triangular part */ +/* = 'L': Lower triangular part */ +/* Otherwise: All of the matrix A */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix A. M >= 0. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix A. N >= 0. */ + +/* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ +/* The m by n matrix A. If UPLO = 'U', only the upper triangle */ +/* or trapezoid is accessed; if UPLO = 'L', only the lower */ +/* triangle or trapezoid is accessed. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,M). */ + +/* B (output) DOUBLE PRECISION array, dimension (LDB,N) */ +/* On exit, B = A in the locations specified by UPLO. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the array B. LDB >= max(1,M). */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + if (_starpu_lsame_(uplo, "U")) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = min(j,*m); + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = a[i__ + j * a_dim1]; +/* L10: */ + } +/* L20: */ + } + } else if (_starpu_lsame_(uplo, "L")) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = j; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = a[i__ + j * a_dim1]; +/* L30: */ + } +/* L40: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = a[i__ + j * a_dim1]; +/* L50: */ + } +/* L60: */ + } + } + return 0; + +/* End of DLACPY */ + +} /* _starpu_dlacpy_ */ diff --git a/min-dgels/base/SRC/dladiv.c b/min-dgels/base/SRC/dladiv.c new file mode 100644 index 0000000..383ccb2 --- /dev/null +++ b/min-dgels/base/SRC/dladiv.c @@ -0,0 +1,78 @@ +/* dladiv.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dladiv_(doublereal *a, doublereal *b, doublereal *c__, + doublereal *d__, doublereal *p, doublereal *q) +{ + doublereal e, f; + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLADIV performs complex division in real arithmetic */ + +/* a + i*b */ +/* p + i*q = --------- */ +/* c + i*d */ + +/* The algorithm is due to Robert L. Smith and can be found */ +/* in D. Knuth, The art of Computer Programming, Vol.2, p.195 */ + +/* Arguments */ +/* ========= */ + +/* A (input) DOUBLE PRECISION */ +/* B (input) DOUBLE PRECISION */ +/* C (input) DOUBLE PRECISION */ +/* D (input) DOUBLE PRECISION */ +/* The scalars a, b, c, and d in the above expression. */ + +/* P (output) DOUBLE PRECISION */ +/* Q (output) DOUBLE PRECISION */ +/* The scalars p and q in the above expression. */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + if (abs(*d__) < abs(*c__)) { + e = *d__ / *c__; + f = *c__ + *d__ * e; + *p = (*a + *b * e) / f; + *q = (*b - *a * e) / f; + } else { + e = *c__ / *d__; + f = *d__ + *c__ * e; + *p = (*b + *a * e) / f; + *q = (-(*a) + *b * e) / f; + } + + return 0; + +/* End of DLADIV */ + +} /* _starpu_dladiv_ */ diff --git a/min-dgels/base/SRC/dlae2.c b/min-dgels/base/SRC/dlae2.c new file mode 100644 index 0000000..565bd97 --- /dev/null +++ b/min-dgels/base/SRC/dlae2.c @@ -0,0 +1,142 @@ +/* dlae2.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dlae2_(doublereal *a, doublereal *b, doublereal *c__, + doublereal *rt1, doublereal *rt2) +{ + /* System generated locals */ + doublereal d__1; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + doublereal ab, df, tb, sm, rt, adf, acmn, acmx; + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLAE2 computes the eigenvalues of a 2-by-2 symmetric matrix */ +/* [ A B ] */ +/* [ B C ]. */ +/* On return, RT1 is the eigenvalue of larger absolute value, and RT2 */ +/* is the eigenvalue of smaller absolute value. */ + +/* Arguments */ +/* ========= */ + +/* A (input) DOUBLE PRECISION */ +/* The (1,1) element of the 2-by-2 matrix. */ + +/* B (input) DOUBLE PRECISION */ +/* The (1,2) and (2,1) elements of the 2-by-2 matrix. */ + +/* C (input) DOUBLE PRECISION */ +/* The (2,2) element of the 2-by-2 matrix. */ + +/* RT1 (output) DOUBLE PRECISION */ +/* The eigenvalue of larger absolute value. */ + +/* RT2 (output) DOUBLE PRECISION */ +/* The eigenvalue of smaller absolute value. */ + +/* Further Details */ +/* =============== */ + +/* RT1 is accurate to a few ulps barring over/underflow. */ + +/* RT2 may be inaccurate if there is massive cancellation in the */ +/* determinant A*C-B*B; higher precision or correctly rounded or */ +/* correctly truncated arithmetic would be needed to compute RT2 */ +/* accurately in all cases. */ + +/* Overflow is possible only if RT1 is within a factor of 5 of overflow. */ +/* Underflow is harmless if the input data is 0 or exceeds */ +/* underflow_threshold / macheps. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Compute the eigenvalues */ + + sm = *a + *c__; + df = *a - *c__; + adf = abs(df); + tb = *b + *b; + ab = abs(tb); + if (abs(*a) > abs(*c__)) { + acmx = *a; + acmn = *c__; + } else { + acmx = *c__; + acmn = *a; + } + if (adf > ab) { +/* Computing 2nd power */ + d__1 = ab / adf; + rt = adf * sqrt(d__1 * d__1 + 1.); + } else if (adf < ab) { +/* Computing 2nd power */ + d__1 = adf / ab; + rt = ab * sqrt(d__1 * d__1 + 1.); + } else { + +/* Includes case AB=ADF=0 */ + + rt = ab * sqrt(2.); + } + if (sm < 0.) { + *rt1 = (sm - rt) * .5; + +/* Order of execution important. */ +/* To get fully accurate smaller eigenvalue, */ +/* next line needs to be executed in higher precision. */ + + *rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b; + } else if (sm > 0.) { + *rt1 = (sm + rt) * .5; + +/* Order of execution important. */ +/* To get fully accurate smaller eigenvalue, */ +/* next line needs to be executed in higher precision. */ + + *rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b; + } else { + +/* Includes case RT1 = RT2 = 0 */ + + *rt1 = rt * .5; + *rt2 = rt * -.5; + } + return 0; + +/* End of DLAE2 */ + +} /* _starpu_dlae2_ */ diff --git a/min-dgels/base/SRC/dlaebz.c b/min-dgels/base/SRC/dlaebz.c new file mode 100644 index 0000000..14aa027 --- /dev/null +++ b/min-dgels/base/SRC/dlaebz.c @@ -0,0 +1,640 @@ +/* dlaebz.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dlaebz_(integer *ijob, integer *nitmax, integer *n, + integer *mmax, integer *minp, integer *nbmin, doublereal *abstol, + doublereal *reltol, doublereal *pivmin, doublereal *d__, doublereal * + e, doublereal *e2, integer *nval, doublereal *ab, doublereal *c__, + integer *mout, integer *nab, doublereal *work, integer *iwork, + integer *info) +{ + /* System generated locals */ + integer nab_dim1, nab_offset, ab_dim1, ab_offset, i__1, i__2, i__3, i__4, + i__5, i__6; + doublereal d__1, d__2, d__3, d__4; + + /* Local variables */ + integer j, kf, ji, kl, jp, jit; + doublereal tmp1, tmp2; + integer itmp1, itmp2, kfnew, klnew; + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLAEBZ contains the iteration loops which compute and use the */ +/* function N(w), which is the count of eigenvalues of a symmetric */ +/* tridiagonal matrix T less than or equal to its argument w. It */ +/* performs a choice of two types of loops: */ + +/* IJOB=1, followed by */ +/* IJOB=2: It takes as input a list of intervals and returns a list of */ +/* sufficiently small intervals whose union contains the same */ +/* eigenvalues as the union of the original intervals. */ +/* The input intervals are (AB(j,1),AB(j,2)], j=1,...,MINP. */ +/* The output interval (AB(j,1),AB(j,2)] will contain */ +/* eigenvalues NAB(j,1)+1,...,NAB(j,2), where 1 <= j <= MOUT. */ + +/* IJOB=3: It performs a binary search in each input interval */ +/* (AB(j,1),AB(j,2)] for a point w(j) such that */ +/* N(w(j))=NVAL(j), and uses C(j) as the starting point of */ +/* the search. If such a w(j) is found, then on output */ +/* AB(j,1)=AB(j,2)=w. If no such w(j) is found, then on output */ +/* (AB(j,1),AB(j,2)] will be a small interval containing the */ +/* point where N(w) jumps through NVAL(j), unless that point */ +/* lies outside the initial interval. */ + +/* Note that the intervals are in all cases half-open intervals, */ +/* i.e., of the form (a,b] , which includes b but not a . */ + +/* To avoid underflow, the matrix should be scaled so that its largest */ +/* element is no greater than overflow**(1/2) * underflow**(1/4) */ +/* in absolute value. To assure the most accurate computation */ +/* of small eigenvalues, the matrix should be scaled to be */ +/* not much smaller than that, either. */ + +/* See W. Kahan "Accurate Eigenvalues of a Symmetric Tridiagonal */ +/* Matrix", Report CS41, Computer Science Dept., Stanford */ +/* University, July 21, 1966 */ + +/* Note: the arguments are, in general, *not* checked for unreasonable */ +/* values. */ + +/* Arguments */ +/* ========= */ + +/* IJOB (input) INTEGER */ +/* Specifies what is to be done: */ +/* = 1: Compute NAB for the initial intervals. */ +/* = 2: Perform bisection iteration to find eigenvalues of T. */ +/* = 3: Perform bisection iteration to invert N(w), i.e., */ +/* to find a point which has a specified number of */ +/* eigenvalues of T to its left. */ +/* Other values will cause DLAEBZ to return with INFO=-1. */ + +/* NITMAX (input) INTEGER */ +/* The maximum number of "levels" of bisection to be */ +/* performed, i.e., an interval of width W will not be made */ +/* smaller than 2^(-NITMAX) * W. If not all intervals */ +/* have converged after NITMAX iterations, then INFO is set */ +/* to the number of non-converged intervals. */ + +/* N (input) INTEGER */ +/* The dimension n of the tridiagonal matrix T. It must be at */ +/* least 1. */ + +/* MMAX (input) INTEGER */ +/* The maximum number of intervals. If more than MMAX intervals */ +/* are generated, then DLAEBZ will quit with INFO=MMAX+1. */ + +/* MINP (input) INTEGER */ +/* The initial number of intervals. It may not be greater than */ +/* MMAX. */ + +/* NBMIN (input) INTEGER */ +/* The smallest number of intervals that should be processed */ +/* using a vector loop. If zero, then only the scalar loop */ +/* will be used. */ + +/* ABSTOL (input) DOUBLE PRECISION */ +/* The minimum (absolute) width of an interval. When an */ +/* interval is narrower than ABSTOL, or than RELTOL times the */ +/* larger (in magnitude) endpoint, then it is considered to be */ +/* sufficiently small, i.e., converged. This must be at least */ +/* zero. */ + +/* RELTOL (input) DOUBLE PRECISION */ +/* The minimum relative width of an interval. When an interval */ +/* is narrower than ABSTOL, or than RELTOL times the larger (in */ +/* magnitude) endpoint, then it is considered to be */ +/* sufficiently small, i.e., converged. Note: this should */ +/* always be at least radix*machine epsilon. */ + +/* PIVMIN (input) DOUBLE PRECISION */ +/* The minimum absolute value of a "pivot" in the Sturm */ +/* sequence loop. This *must* be at least max |e(j)**2| * */ +/* safe_min and at least safe_min, where safe_min is at least */ +/* the smallest number that can divide one without overflow. */ + +/* D (input) DOUBLE PRECISION array, dimension (N) */ +/* The diagonal elements of the tridiagonal matrix T. */ + +/* E (input) DOUBLE PRECISION array, dimension (N) */ +/* The offdiagonal elements of the tridiagonal matrix T in */ +/* positions 1 through N-1. E(N) is arbitrary. */ + +/* E2 (input) DOUBLE PRECISION array, dimension (N) */ +/* The squares of the offdiagonal elements of the tridiagonal */ +/* matrix T. E2(N) is ignored. */ + +/* NVAL (input/output) INTEGER array, dimension (MINP) */ +/* If IJOB=1 or 2, not referenced. */ +/* If IJOB=3, the desired values of N(w). The elements of NVAL */ +/* will be reordered to correspond with the intervals in AB. */ +/* Thus, NVAL(j) on output will not, in general be the same as */ +/* NVAL(j) on input, but it will correspond with the interval */ +/* (AB(j,1),AB(j,2)] on output. */ + +/* AB (input/output) DOUBLE PRECISION array, dimension (MMAX,2) */ +/* The endpoints of the intervals. AB(j,1) is a(j), the left */ +/* endpoint of the j-th interval, and AB(j,2) is b(j), the */ +/* right endpoint of the j-th interval. The input intervals */ +/* will, in general, be modified, split, and reordered by the */ +/* calculation. */ + +/* C (input/output) DOUBLE PRECISION array, dimension (MMAX) */ +/* If IJOB=1, ignored. */ +/* If IJOB=2, workspace. */ +/* If IJOB=3, then on input C(j) should be initialized to the */ +/* first search point in the binary search. */ + +/* MOUT (output) INTEGER */ +/* If IJOB=1, the number of eigenvalues in the intervals. */ +/* If IJOB=2 or 3, the number of intervals output. */ +/* If IJOB=3, MOUT will equal MINP. */ + +/* NAB (input/output) INTEGER array, dimension (MMAX,2) */ +/* If IJOB=1, then on output NAB(i,j) will be set to N(AB(i,j)). */ +/* If IJOB=2, then on input, NAB(i,j) should be set. It must */ +/* satisfy the condition: */ +/* N(AB(i,1)) <= NAB(i,1) <= NAB(i,2) <= N(AB(i,2)), */ +/* which means that in interval i only eigenvalues */ +/* NAB(i,1)+1,...,NAB(i,2) will be considered. Usually, */ +/* NAB(i,j)=N(AB(i,j)), from a previous call to DLAEBZ with */ +/* IJOB=1. */ +/* On output, NAB(i,j) will contain */ +/* max(na(k),min(nb(k),N(AB(i,j)))), where k is the index of */ +/* the input interval that the output interval */ +/* (AB(j,1),AB(j,2)] came from, and na(k) and nb(k) are the */ +/* the input values of NAB(k,1) and NAB(k,2). */ +/* If IJOB=3, then on output, NAB(i,j) contains N(AB(i,j)), */ +/* unless N(w) > NVAL(i) for all search points w , in which */ +/* case NAB(i,1) will not be modified, i.e., the output */ +/* value will be the same as the input value (modulo */ +/* reorderings -- see NVAL and AB), or unless N(w) < NVAL(i) */ +/* for all search points w , in which case NAB(i,2) will */ +/* not be modified. Normally, NAB should be set to some */ +/* distinctive value(s) before DLAEBZ is called. */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (MMAX) */ +/* Workspace. */ + +/* IWORK (workspace) INTEGER array, dimension (MMAX) */ +/* Workspace. */ + +/* INFO (output) INTEGER */ +/* = 0: All intervals converged. */ +/* = 1--MMAX: The last INFO intervals did not converge. */ +/* = MMAX+1: More than MMAX intervals were generated. */ + +/* Further Details */ +/* =============== */ + +/* This routine is intended to be called only by other LAPACK */ +/* routines, thus the interface is less user-friendly. It is intended */ +/* for two purposes: */ + +/* (a) finding eigenvalues. In this case, DLAEBZ should have one or */ +/* more initial intervals set up in AB, and DLAEBZ should be called */ +/* with IJOB=1. This sets up NAB, and also counts the eigenvalues. */ +/* Intervals with no eigenvalues would usually be thrown out at */ +/* this point. Also, if not all the eigenvalues in an interval i */ +/* are desired, NAB(i,1) can be increased or NAB(i,2) decreased. */ +/* For example, set NAB(i,1)=NAB(i,2)-1 to get the largest */ +/* eigenvalue. DLAEBZ is then called with IJOB=2 and MMAX */ +/* no smaller than the value of MOUT returned by the call with */ +/* IJOB=1. After this (IJOB=2) call, eigenvalues NAB(i,1)+1 */ +/* through NAB(i,2) are approximately AB(i,1) (or AB(i,2)) to the */ +/* tolerance specified by ABSTOL and RELTOL. */ + +/* (b) finding an interval (a',b'] containing eigenvalues w(f),...,w(l). */ +/* In this case, start with a Gershgorin interval (a,b). Set up */ +/* AB to contain 2 search intervals, both initially (a,b). One */ +/* NVAL element should contain f-1 and the other should contain l */ +/* , while C should contain a and b, resp. NAB(i,1) should be -1 */ +/* and NAB(i,2) should be N+1, to flag an error if the desired */ +/* interval does not lie in (a,b). DLAEBZ is then called with */ +/* IJOB=3. On exit, if w(f-1) < w(f), then one of the intervals -- */ +/* j -- will have AB(j,1)=AB(j,2) and NAB(j,1)=NAB(j,2)=f-1, while */ +/* if, to the specified tolerance, w(f-k)=...=w(f+r), k > 0 and r */ +/* >= 0, then the interval will have N(AB(j,1))=NAB(j,1)=f-k and */ +/* N(AB(j,2))=NAB(j,2)=f+r. The cases w(l) < w(l+1) and */ +/* w(l-r)=...=w(l+k) are handled similarly. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Check for Errors */ + + /* Parameter adjustments */ + nab_dim1 = *mmax; + nab_offset = 1 + nab_dim1; + nab -= nab_offset; + ab_dim1 = *mmax; + ab_offset = 1 + ab_dim1; + ab -= ab_offset; + --d__; + --e; + --e2; + --nval; + --c__; + --work; + --iwork; + + /* Function Body */ + *info = 0; + if (*ijob < 1 || *ijob > 3) { + *info = -1; + return 0; + } + +/* Initialize NAB */ + + if (*ijob == 1) { + +/* Compute the number of eigenvalues in the initial intervals. */ + + *mout = 0; +/* DIR$ NOVECTOR */ + i__1 = *minp; + for (ji = 1; ji <= i__1; ++ji) { + for (jp = 1; jp <= 2; ++jp) { + tmp1 = d__[1] - ab[ji + jp * ab_dim1]; + if (abs(tmp1) < *pivmin) { + tmp1 = -(*pivmin); + } + nab[ji + jp * nab_dim1] = 0; + if (tmp1 <= 0.) { + nab[ji + jp * nab_dim1] = 1; + } + + i__2 = *n; + for (j = 2; j <= i__2; ++j) { + tmp1 = d__[j] - e2[j - 1] / tmp1 - ab[ji + jp * ab_dim1]; + if (abs(tmp1) < *pivmin) { + tmp1 = -(*pivmin); + } + if (tmp1 <= 0.) { + ++nab[ji + jp * nab_dim1]; + } +/* L10: */ + } +/* L20: */ + } + *mout = *mout + nab[ji + (nab_dim1 << 1)] - nab[ji + nab_dim1]; +/* L30: */ + } + return 0; + } + +/* Initialize for loop */ + +/* KF and KL have the following meaning: */ +/* Intervals 1,...,KF-1 have converged. */ +/* Intervals KF,...,KL still need to be refined. */ + + kf = 1; + kl = *minp; + +/* If IJOB=2, initialize C. */ +/* If IJOB=3, use the user-supplied starting point. */ + + if (*ijob == 2) { + i__1 = *minp; + for (ji = 1; ji <= i__1; ++ji) { + c__[ji] = (ab[ji + ab_dim1] + ab[ji + (ab_dim1 << 1)]) * .5; +/* L40: */ + } + } + +/* Iteration loop */ + + i__1 = *nitmax; + for (jit = 1; jit <= i__1; ++jit) { + +/* Loop over intervals */ + + if (kl - kf + 1 >= *nbmin && *nbmin > 0) { + +/* Begin of Parallel Version of the loop */ + + i__2 = kl; + for (ji = kf; ji <= i__2; ++ji) { + +/* Compute N(c), the number of eigenvalues less than c */ + + work[ji] = d__[1] - c__[ji]; + iwork[ji] = 0; + if (work[ji] <= *pivmin) { + iwork[ji] = 1; +/* Computing MIN */ + d__1 = work[ji], d__2 = -(*pivmin); + work[ji] = min(d__1,d__2); + } + + i__3 = *n; + for (j = 2; j <= i__3; ++j) { + work[ji] = d__[j] - e2[j - 1] / work[ji] - c__[ji]; + if (work[ji] <= *pivmin) { + ++iwork[ji]; +/* Computing MIN */ + d__1 = work[ji], d__2 = -(*pivmin); + work[ji] = min(d__1,d__2); + } +/* L50: */ + } +/* L60: */ + } + + if (*ijob <= 2) { + +/* IJOB=2: Choose all intervals containing eigenvalues. */ + + klnew = kl; + i__2 = kl; + for (ji = kf; ji <= i__2; ++ji) { + +/* Insure that N(w) is monotone */ + +/* Computing MIN */ +/* Computing MAX */ + i__5 = nab[ji + nab_dim1], i__6 = iwork[ji]; + i__3 = nab[ji + (nab_dim1 << 1)], i__4 = max(i__5,i__6); + iwork[ji] = min(i__3,i__4); + +/* Update the Queue -- add intervals if both halves */ +/* contain eigenvalues. */ + + if (iwork[ji] == nab[ji + (nab_dim1 << 1)]) { + +/* No eigenvalue in the upper interval: */ +/* just use the lower interval. */ + + ab[ji + (ab_dim1 << 1)] = c__[ji]; + + } else if (iwork[ji] == nab[ji + nab_dim1]) { + +/* No eigenvalue in the lower interval: */ +/* just use the upper interval. */ + + ab[ji + ab_dim1] = c__[ji]; + } else { + ++klnew; + if (klnew <= *mmax) { + +/* Eigenvalue in both intervals -- add upper to */ +/* queue. */ + + ab[klnew + (ab_dim1 << 1)] = ab[ji + (ab_dim1 << + 1)]; + nab[klnew + (nab_dim1 << 1)] = nab[ji + (nab_dim1 + << 1)]; + ab[klnew + ab_dim1] = c__[ji]; + nab[klnew + nab_dim1] = iwork[ji]; + ab[ji + (ab_dim1 << 1)] = c__[ji]; + nab[ji + (nab_dim1 << 1)] = iwork[ji]; + } else { + *info = *mmax + 1; + } + } +/* L70: */ + } + if (*info != 0) { + return 0; + } + kl = klnew; + } else { + +/* IJOB=3: Binary search. Keep only the interval containing */ +/* w s.t. N(w) = NVAL */ + + i__2 = kl; + for (ji = kf; ji <= i__2; ++ji) { + if (iwork[ji] <= nval[ji]) { + ab[ji + ab_dim1] = c__[ji]; + nab[ji + nab_dim1] = iwork[ji]; + } + if (iwork[ji] >= nval[ji]) { + ab[ji + (ab_dim1 << 1)] = c__[ji]; + nab[ji + (nab_dim1 << 1)] = iwork[ji]; + } +/* L80: */ + } + } + + } else { + +/* End of Parallel Version of the loop */ + +/* Begin of Serial Version of the loop */ + + klnew = kl; + i__2 = kl; + for (ji = kf; ji <= i__2; ++ji) { + +/* Compute N(w), the number of eigenvalues less than w */ + + tmp1 = c__[ji]; + tmp2 = d__[1] - tmp1; + itmp1 = 0; + if (tmp2 <= *pivmin) { + itmp1 = 1; +/* Computing MIN */ + d__1 = tmp2, d__2 = -(*pivmin); + tmp2 = min(d__1,d__2); + } + +/* A series of compiler directives to defeat vectorization */ +/* for the next loop */ + +/* $PL$ CMCHAR=' ' */ +/* DIR$ NEXTSCALAR */ +/* $DIR SCALAR */ +/* DIR$ NEXT SCALAR */ +/* VD$L NOVECTOR */ +/* DEC$ NOVECTOR */ +/* VD$ NOVECTOR */ +/* VDIR NOVECTOR */ +/* VOCL LOOP,SCALAR */ +/* IBM PREFER SCALAR */ +/* $PL$ CMCHAR='*' */ + + i__3 = *n; + for (j = 2; j <= i__3; ++j) { + tmp2 = d__[j] - e2[j - 1] / tmp2 - tmp1; + if (tmp2 <= *pivmin) { + ++itmp1; +/* Computing MIN */ + d__1 = tmp2, d__2 = -(*pivmin); + tmp2 = min(d__1,d__2); + } +/* L90: */ + } + + if (*ijob <= 2) { + +/* IJOB=2: Choose all intervals containing eigenvalues. */ + +/* Insure that N(w) is monotone */ + +/* Computing MIN */ +/* Computing MAX */ + i__5 = nab[ji + nab_dim1]; + i__3 = nab[ji + (nab_dim1 << 1)], i__4 = max(i__5,itmp1); + itmp1 = min(i__3,i__4); + +/* Update the Queue -- add intervals if both halves */ +/* contain eigenvalues. */ + + if (itmp1 == nab[ji + (nab_dim1 << 1)]) { + +/* No eigenvalue in the upper interval: */ +/* just use the lower interval. */ + + ab[ji + (ab_dim1 << 1)] = tmp1; + + } else if (itmp1 == nab[ji + nab_dim1]) { + +/* No eigenvalue in the lower interval: */ +/* just use the upper interval. */ + + ab[ji + ab_dim1] = tmp1; + } else if (klnew < *mmax) { + +/* Eigenvalue in both intervals -- add upper to queue. */ + + ++klnew; + ab[klnew + (ab_dim1 << 1)] = ab[ji + (ab_dim1 << 1)]; + nab[klnew + (nab_dim1 << 1)] = nab[ji + (nab_dim1 << + 1)]; + ab[klnew + ab_dim1] = tmp1; + nab[klnew + nab_dim1] = itmp1; + ab[ji + (ab_dim1 << 1)] = tmp1; + nab[ji + (nab_dim1 << 1)] = itmp1; + } else { + *info = *mmax + 1; + return 0; + } + } else { + +/* IJOB=3: Binary search. Keep only the interval */ +/* containing w s.t. N(w) = NVAL */ + + if (itmp1 <= nval[ji]) { + ab[ji + ab_dim1] = tmp1; + nab[ji + nab_dim1] = itmp1; + } + if (itmp1 >= nval[ji]) { + ab[ji + (ab_dim1 << 1)] = tmp1; + nab[ji + (nab_dim1 << 1)] = itmp1; + } + } +/* L100: */ + } + kl = klnew; + +/* End of Serial Version of the loop */ + + } + +/* Check for convergence */ + + kfnew = kf; + i__2 = kl; + for (ji = kf; ji <= i__2; ++ji) { + tmp1 = (d__1 = ab[ji + (ab_dim1 << 1)] - ab[ji + ab_dim1], abs( + d__1)); +/* Computing MAX */ + d__3 = (d__1 = ab[ji + (ab_dim1 << 1)], abs(d__1)), d__4 = (d__2 = + ab[ji + ab_dim1], abs(d__2)); + tmp2 = max(d__3,d__4); +/* Computing MAX */ + d__1 = max(*abstol,*pivmin), d__2 = *reltol * tmp2; + if (tmp1 < max(d__1,d__2) || nab[ji + nab_dim1] >= nab[ji + ( + nab_dim1 << 1)]) { + +/* Converged -- Swap with position KFNEW, */ +/* then increment KFNEW */ + + if (ji > kfnew) { + tmp1 = ab[ji + ab_dim1]; + tmp2 = ab[ji + (ab_dim1 << 1)]; + itmp1 = nab[ji + nab_dim1]; + itmp2 = nab[ji + (nab_dim1 << 1)]; + ab[ji + ab_dim1] = ab[kfnew + ab_dim1]; + ab[ji + (ab_dim1 << 1)] = ab[kfnew + (ab_dim1 << 1)]; + nab[ji + nab_dim1] = nab[kfnew + nab_dim1]; + nab[ji + (nab_dim1 << 1)] = nab[kfnew + (nab_dim1 << 1)]; + ab[kfnew + ab_dim1] = tmp1; + ab[kfnew + (ab_dim1 << 1)] = tmp2; + nab[kfnew + nab_dim1] = itmp1; + nab[kfnew + (nab_dim1 << 1)] = itmp2; + if (*ijob == 3) { + itmp1 = nval[ji]; + nval[ji] = nval[kfnew]; + nval[kfnew] = itmp1; + } + } + ++kfnew; + } +/* L110: */ + } + kf = kfnew; + +/* Choose Midpoints */ + + i__2 = kl; + for (ji = kf; ji <= i__2; ++ji) { + c__[ji] = (ab[ji + ab_dim1] + ab[ji + (ab_dim1 << 1)]) * .5; +/* L120: */ + } + +/* If no more intervals to refine, quit. */ + + if (kf > kl) { + goto L140; + } +/* L130: */ + } + +/* Converged */ + +L140: +/* Computing MAX */ + i__1 = kl + 1 - kf; + *info = max(i__1,0); + *mout = kl; + + return 0; + +/* End of DLAEBZ */ + +} /* _starpu_dlaebz_ */ diff --git a/min-dgels/base/SRC/dlaed0.c b/min-dgels/base/SRC/dlaed0.c new file mode 100644 index 0000000..b2559c3 --- /dev/null +++ b/min-dgels/base/SRC/dlaed0.c @@ -0,0 +1,440 @@ +/* dlaed0.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__9 = 9; +static integer c__0 = 0; +static integer c__2 = 2; +static doublereal c_b23 = 1.; +static doublereal c_b24 = 0.; +static integer c__1 = 1; + +/* Subroutine */ int _starpu_dlaed0_(integer *icompq, integer *qsiz, integer *n, + doublereal *d__, doublereal *e, doublereal *q, integer *ldq, + doublereal *qstore, integer *ldqs, doublereal *work, integer *iwork, + integer *info) +{ + /* System generated locals */ + integer q_dim1, q_offset, qstore_dim1, qstore_offset, i__1, i__2; + doublereal d__1; + + /* Builtin functions */ + double log(doublereal); + integer pow_ii(integer *, integer *); + + /* Local variables */ + integer i__, j, k, iq, lgn, msd2, smm1, spm1, spm2; + doublereal temp; + integer curr; + extern /* Subroutine */ int _starpu_dgemm_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *); + integer iperm; + extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *); + integer indxq, iwrem; + extern /* Subroutine */ int _starpu_dlaed1_(integer *, doublereal *, doublereal *, + integer *, integer *, doublereal *, integer *, doublereal *, + integer *, integer *); + integer iqptr; + extern /* Subroutine */ int _starpu_dlaed7_(integer *, integer *, integer *, + integer *, integer *, integer *, doublereal *, doublereal *, + integer *, integer *, doublereal *, integer *, doublereal *, + integer *, integer *, integer *, integer *, integer *, doublereal + *, doublereal *, integer *, integer *); + integer tlvls; + extern /* Subroutine */ int _starpu_dlacpy_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *); + integer igivcl; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *); + integer igivnm, submat, curprb, subpbs, igivpt; + extern /* Subroutine */ int _starpu_dsteqr_(char *, integer *, doublereal *, + doublereal *, doublereal *, integer *, doublereal *, integer *); + integer curlvl, matsiz, iprmpt, smlsiz; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLAED0 computes all eigenvalues and corresponding eigenvectors of a */ +/* symmetric tridiagonal matrix using the divide and conquer method. */ + +/* Arguments */ +/* ========= */ + +/* ICOMPQ (input) INTEGER */ +/* = 0: Compute eigenvalues only. */ +/* = 1: Compute eigenvectors of original dense symmetric matrix */ +/* also. On entry, Q contains the orthogonal matrix used */ +/* to reduce the original matrix to tridiagonal form. */ +/* = 2: Compute eigenvalues and eigenvectors of tridiagonal */ +/* matrix. */ + +/* QSIZ (input) INTEGER */ +/* The dimension of the orthogonal matrix used to reduce */ +/* the full matrix to tridiagonal form. QSIZ >= N if ICOMPQ = 1. */ + +/* N (input) INTEGER */ +/* The dimension of the symmetric tridiagonal matrix. N >= 0. */ + +/* D (input/output) DOUBLE PRECISION array, dimension (N) */ +/* On entry, the main diagonal of the tridiagonal matrix. */ +/* On exit, its eigenvalues. */ + +/* E (input) DOUBLE PRECISION array, dimension (N-1) */ +/* The off-diagonal elements of the tridiagonal matrix. */ +/* On exit, E has been destroyed. */ + +/* Q (input/output) DOUBLE PRECISION array, dimension (LDQ, N) */ +/* On entry, Q must contain an N-by-N orthogonal matrix. */ +/* If ICOMPQ = 0 Q is not referenced. */ +/* If ICOMPQ = 1 On entry, Q is a subset of the columns of the */ +/* orthogonal matrix used to reduce the full */ +/* matrix to tridiagonal form corresponding to */ +/* the subset of the full matrix which is being */ +/* decomposed at this time. */ +/* If ICOMPQ = 2 On entry, Q will be the identity matrix. */ +/* On exit, Q contains the eigenvectors of the */ +/* tridiagonal matrix. */ + +/* LDQ (input) INTEGER */ +/* The leading dimension of the array Q. If eigenvectors are */ +/* desired, then LDQ >= max(1,N). In any case, LDQ >= 1. */ + +/* QSTORE (workspace) DOUBLE PRECISION array, dimension (LDQS, N) */ +/* Referenced only when ICOMPQ = 1. Used to store parts of */ +/* the eigenvector matrix when the updating matrix multiplies */ +/* take place. */ + +/* LDQS (input) INTEGER */ +/* The leading dimension of the array QSTORE. If ICOMPQ = 1, */ +/* then LDQS >= max(1,N). In any case, LDQS >= 1. */ + +/* WORK (workspace) DOUBLE PRECISION array, */ +/* If ICOMPQ = 0 or 1, the dimension of WORK must be at least */ +/* 1 + 3*N + 2*N*lg N + 2*N**2 */ +/* ( lg( N ) = smallest integer k */ +/* such that 2^k >= N ) */ +/* If ICOMPQ = 2, the dimension of WORK must be at least */ +/* 4*N + N**2. */ + +/* IWORK (workspace) INTEGER array, */ +/* If ICOMPQ = 0 or 1, the dimension of IWORK must be at least */ +/* 6 + 6*N + 5*N*lg N. */ +/* ( lg( N ) = smallest integer k */ +/* such that 2^k >= N ) */ +/* If ICOMPQ = 2, the dimension of IWORK must be at least */ +/* 3 + 5*N. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit. */ +/* < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > 0: The algorithm failed to compute an eigenvalue while */ +/* working on the submatrix lying in rows and columns */ +/* INFO/(N+1) through mod(INFO,N+1). */ + +/* Further Details */ +/* =============== */ + +/* Based on contributions by */ +/* Jeff Rutter, Computer Science Division, University of California */ +/* at Berkeley, USA */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --d__; + --e; + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + qstore_dim1 = *ldqs; + qstore_offset = 1 + qstore_dim1; + qstore -= qstore_offset; + --work; + --iwork; + + /* Function Body */ + *info = 0; + + if (*icompq < 0 || *icompq > 2) { + *info = -1; + } else if (*icompq == 1 && *qsiz < max(0,*n)) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*ldq < max(1,*n)) { + *info = -7; + } else if (*ldqs < max(1,*n)) { + *info = -9; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DLAED0", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + smlsiz = _starpu_ilaenv_(&c__9, "DLAED0", " ", &c__0, &c__0, &c__0, &c__0); + +/* Determine the size and placement of the submatrices, and save in */ +/* the leading elements of IWORK. */ + + iwork[1] = *n; + subpbs = 1; + tlvls = 0; +L10: + if (iwork[subpbs] > smlsiz) { + for (j = subpbs; j >= 1; --j) { + iwork[j * 2] = (iwork[j] + 1) / 2; + iwork[(j << 1) - 1] = iwork[j] / 2; +/* L20: */ + } + ++tlvls; + subpbs <<= 1; + goto L10; + } + i__1 = subpbs; + for (j = 2; j <= i__1; ++j) { + iwork[j] += iwork[j - 1]; +/* L30: */ + } + +/* Divide the matrix into SUBPBS submatrices of size at most SMLSIZ+1 */ +/* using rank-1 modifications (cuts). */ + + spm1 = subpbs - 1; + i__1 = spm1; + for (i__ = 1; i__ <= i__1; ++i__) { + submat = iwork[i__] + 1; + smm1 = submat - 1; + d__[smm1] -= (d__1 = e[smm1], abs(d__1)); + d__[submat] -= (d__1 = e[smm1], abs(d__1)); +/* L40: */ + } + + indxq = (*n << 2) + 3; + if (*icompq != 2) { + +/* Set up workspaces for eigenvalues only/accumulate new vectors */ +/* routine */ + + temp = log((doublereal) (*n)) / log(2.); + lgn = (integer) temp; + if (pow_ii(&c__2, &lgn) < *n) { + ++lgn; + } + if (pow_ii(&c__2, &lgn) < *n) { + ++lgn; + } + iprmpt = indxq + *n + 1; + iperm = iprmpt + *n * lgn; + iqptr = iperm + *n * lgn; + igivpt = iqptr + *n + 2; + igivcl = igivpt + *n * lgn; + + igivnm = 1; + iq = igivnm + (*n << 1) * lgn; +/* Computing 2nd power */ + i__1 = *n; + iwrem = iq + i__1 * i__1 + 1; + +/* Initialize pointers */ + + i__1 = subpbs; + for (i__ = 0; i__ <= i__1; ++i__) { + iwork[iprmpt + i__] = 1; + iwork[igivpt + i__] = 1; +/* L50: */ + } + iwork[iqptr] = 1; + } + +/* Solve each submatrix eigenproblem at the bottom of the divide and */ +/* conquer tree. */ + + curr = 0; + i__1 = spm1; + for (i__ = 0; i__ <= i__1; ++i__) { + if (i__ == 0) { + submat = 1; + matsiz = iwork[1]; + } else { + submat = iwork[i__] + 1; + matsiz = iwork[i__ + 1] - iwork[i__]; + } + if (*icompq == 2) { + _starpu_dsteqr_("I", &matsiz, &d__[submat], &e[submat], &q[submat + + submat * q_dim1], ldq, &work[1], info); + if (*info != 0) { + goto L130; + } + } else { + _starpu_dsteqr_("I", &matsiz, &d__[submat], &e[submat], &work[iq - 1 + + iwork[iqptr + curr]], &matsiz, &work[1], info); + if (*info != 0) { + goto L130; + } + if (*icompq == 1) { + _starpu_dgemm_("N", "N", qsiz, &matsiz, &matsiz, &c_b23, &q[submat * + q_dim1 + 1], ldq, &work[iq - 1 + iwork[iqptr + curr]], + &matsiz, &c_b24, &qstore[submat * qstore_dim1 + 1], + ldqs); + } +/* Computing 2nd power */ + i__2 = matsiz; + iwork[iqptr + curr + 1] = iwork[iqptr + curr] + i__2 * i__2; + ++curr; + } + k = 1; + i__2 = iwork[i__ + 1]; + for (j = submat; j <= i__2; ++j) { + iwork[indxq + j] = k; + ++k; +/* L60: */ + } +/* L70: */ + } + +/* Successively merge eigensystems of adjacent submatrices */ +/* into eigensystem for the corresponding larger matrix. */ + +/* while ( SUBPBS > 1 ) */ + + curlvl = 1; +L80: + if (subpbs > 1) { + spm2 = subpbs - 2; + i__1 = spm2; + for (i__ = 0; i__ <= i__1; i__ += 2) { + if (i__ == 0) { + submat = 1; + matsiz = iwork[2]; + msd2 = iwork[1]; + curprb = 0; + } else { + submat = iwork[i__] + 1; + matsiz = iwork[i__ + 2] - iwork[i__]; + msd2 = matsiz / 2; + ++curprb; + } + +/* Merge lower order eigensystems (of size MSD2 and MATSIZ - MSD2) */ +/* into an eigensystem of size MATSIZ. */ +/* DLAED1 is used only for the full eigensystem of a tridiagonal */ +/* matrix. */ +/* DLAED7 handles the cases in which eigenvalues only or eigenvalues */ +/* and eigenvectors of a full symmetric matrix (which was reduced to */ +/* tridiagonal form) are desired. */ + + if (*icompq == 2) { + _starpu_dlaed1_(&matsiz, &d__[submat], &q[submat + submat * q_dim1], + ldq, &iwork[indxq + submat], &e[submat + msd2 - 1], & + msd2, &work[1], &iwork[subpbs + 1], info); + } else { + _starpu_dlaed7_(icompq, &matsiz, qsiz, &tlvls, &curlvl, &curprb, &d__[ + submat], &qstore[submat * qstore_dim1 + 1], ldqs, & + iwork[indxq + submat], &e[submat + msd2 - 1], &msd2, & + work[iq], &iwork[iqptr], &iwork[iprmpt], &iwork[iperm] +, &iwork[igivpt], &iwork[igivcl], &work[igivnm], & + work[iwrem], &iwork[subpbs + 1], info); + } + if (*info != 0) { + goto L130; + } + iwork[i__ / 2 + 1] = iwork[i__ + 2]; +/* L90: */ + } + subpbs /= 2; + ++curlvl; + goto L80; + } + +/* end while */ + +/* Re-merge the eigenvalues/vectors which were deflated at the final */ +/* merge step. */ + + if (*icompq == 1) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + j = iwork[indxq + i__]; + work[i__] = d__[j]; + _starpu_dcopy_(qsiz, &qstore[j * qstore_dim1 + 1], &c__1, &q[i__ * q_dim1 + + 1], &c__1); +/* L100: */ + } + _starpu_dcopy_(n, &work[1], &c__1, &d__[1], &c__1); + } else if (*icompq == 2) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + j = iwork[indxq + i__]; + work[i__] = d__[j]; + _starpu_dcopy_(n, &q[j * q_dim1 + 1], &c__1, &work[*n * i__ + 1], &c__1); +/* L110: */ + } + _starpu_dcopy_(n, &work[1], &c__1, &d__[1], &c__1); + _starpu_dlacpy_("A", n, n, &work[*n + 1], n, &q[q_offset], ldq); + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + j = iwork[indxq + i__]; + work[i__] = d__[j]; +/* L120: */ + } + _starpu_dcopy_(n, &work[1], &c__1, &d__[1], &c__1); + } + goto L140; + +L130: + *info = submat * (*n + 1) + submat + matsiz - 1; + +L140: + return 0; + +/* End of DLAED0 */ + +} /* _starpu_dlaed0_ */ diff --git a/min-dgels/base/SRC/dlaed1.c b/min-dgels/base/SRC/dlaed1.c new file mode 100644 index 0000000..903774f --- /dev/null +++ b/min-dgels/base/SRC/dlaed1.c @@ -0,0 +1,249 @@ +/* dlaed1.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; + +/* Subroutine */ int _starpu_dlaed1_(integer *n, doublereal *d__, doublereal *q, + integer *ldq, integer *indxq, doublereal *rho, integer *cutpnt, + doublereal *work, integer *iwork, integer *info) +{ + /* System generated locals */ + integer q_dim1, q_offset, i__1, i__2; + + /* Local variables */ + integer i__, k, n1, n2, is, iw, iz, iq2, zpp1, indx, indxc; + extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *); + integer indxp; + extern /* Subroutine */ int _starpu_dlaed2_(integer *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, integer *, + integer *, integer *, integer *, integer *), _starpu_dlaed3_(integer *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, doublereal *, doublereal *, integer *, integer *, + doublereal *, doublereal *, integer *); + integer idlmda; + extern /* Subroutine */ int _starpu_dlamrg_(integer *, integer *, doublereal *, + integer *, integer *, integer *), _starpu_xerbla_(char *, integer *); + integer coltyp; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLAED1 computes the updated eigensystem of a diagonal */ +/* matrix after modification by a rank-one symmetric matrix. This */ +/* routine is used only for the eigenproblem which requires all */ +/* eigenvalues and eigenvectors of a tridiagonal matrix. DLAED7 handles */ +/* the case in which eigenvalues only or eigenvalues and eigenvectors */ +/* of a full symmetric matrix (which was reduced to tridiagonal form) */ +/* are desired. */ + +/* T = Q(in) ( D(in) + RHO * Z*Z' ) Q'(in) = Q(out) * D(out) * Q'(out) */ + +/* where Z = Q'u, u is a vector of length N with ones in the */ +/* CUTPNT and CUTPNT + 1 th elements and zeros elsewhere. */ + +/* The eigenvectors of the original matrix are stored in Q, and the */ +/* eigenvalues are in D. The algorithm consists of three stages: */ + +/* The first stage consists of deflating the size of the problem */ +/* when there are multiple eigenvalues or if there is a zero in */ +/* the Z vector. For each such occurence the dimension of the */ +/* secular equation problem is reduced by one. This stage is */ +/* performed by the routine DLAED2. */ + +/* The second stage consists of calculating the updated */ +/* eigenvalues. This is done by finding the roots of the secular */ +/* equation via the routine DLAED4 (as called by DLAED3). */ +/* This routine also calculates the eigenvectors of the current */ +/* problem. */ + +/* The final stage consists of computing the updated eigenvectors */ +/* directly using the updated eigenvalues. The eigenvectors for */ +/* the current problem are multiplied with the eigenvectors from */ +/* the overall problem. */ + +/* Arguments */ +/* ========= */ + +/* N (input) INTEGER */ +/* The dimension of the symmetric tridiagonal matrix. N >= 0. */ + +/* D (input/output) DOUBLE PRECISION array, dimension (N) */ +/* On entry, the eigenvalues of the rank-1-perturbed matrix. */ +/* On exit, the eigenvalues of the repaired matrix. */ + +/* Q (input/output) DOUBLE PRECISION array, dimension (LDQ,N) */ +/* On entry, the eigenvectors of the rank-1-perturbed matrix. */ +/* On exit, the eigenvectors of the repaired tridiagonal matrix. */ + +/* LDQ (input) INTEGER */ +/* The leading dimension of the array Q. LDQ >= max(1,N). */ + +/* INDXQ (input/output) INTEGER array, dimension (N) */ +/* On entry, the permutation which separately sorts the two */ +/* subproblems in D into ascending order. */ +/* On exit, the permutation which will reintegrate the */ +/* subproblems back into sorted order, */ +/* i.e. D( INDXQ( I = 1, N ) ) will be in ascending order. */ + +/* RHO (input) DOUBLE PRECISION */ +/* The subdiagonal entry used to create the rank-1 modification. */ + +/* CUTPNT (input) INTEGER */ +/* The location of the last eigenvalue in the leading sub-matrix. */ +/* min(1,N) <= CUTPNT <= N/2. */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (4*N + N**2) */ + +/* IWORK (workspace) INTEGER array, dimension (4*N) */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit. */ +/* < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > 0: if INFO = 1, an eigenvalue did not converge */ + +/* Further Details */ +/* =============== */ + +/* Based on contributions by */ +/* Jeff Rutter, Computer Science Division, University of California */ +/* at Berkeley, USA */ +/* Modified by Francoise Tisseur, University of Tennessee. */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --d__; + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + --indxq; + --work; + --iwork; + + /* Function Body */ + *info = 0; + + if (*n < 0) { + *info = -1; + } else if (*ldq < max(1,*n)) { + *info = -4; + } else /* if(complicated condition) */ { +/* Computing MIN */ + i__1 = 1, i__2 = *n / 2; + if (min(i__1,i__2) > *cutpnt || *n / 2 < *cutpnt) { + *info = -7; + } + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DLAED1", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* The following values are integer pointers which indicate */ +/* the portion of the workspace */ +/* used by a particular array in DLAED2 and DLAED3. */ + + iz = 1; + idlmda = iz + *n; + iw = idlmda + *n; + iq2 = iw + *n; + + indx = 1; + indxc = indx + *n; + coltyp = indxc + *n; + indxp = coltyp + *n; + + +/* Form the z-vector which consists of the last row of Q_1 and the */ +/* first row of Q_2. */ + + _starpu_dcopy_(cutpnt, &q[*cutpnt + q_dim1], ldq, &work[iz], &c__1); + zpp1 = *cutpnt + 1; + i__1 = *n - *cutpnt; + _starpu_dcopy_(&i__1, &q[zpp1 + zpp1 * q_dim1], ldq, &work[iz + *cutpnt], &c__1); + +/* Deflate eigenvalues. */ + + _starpu_dlaed2_(&k, n, cutpnt, &d__[1], &q[q_offset], ldq, &indxq[1], rho, &work[ + iz], &work[idlmda], &work[iw], &work[iq2], &iwork[indx], &iwork[ + indxc], &iwork[indxp], &iwork[coltyp], info); + + if (*info != 0) { + goto L20; + } + +/* Solve Secular Equation. */ + + if (k != 0) { + is = (iwork[coltyp] + iwork[coltyp + 1]) * *cutpnt + (iwork[coltyp + + 1] + iwork[coltyp + 2]) * (*n - *cutpnt) + iq2; + _starpu_dlaed3_(&k, n, cutpnt, &d__[1], &q[q_offset], ldq, rho, &work[idlmda], + &work[iq2], &iwork[indxc], &iwork[coltyp], &work[iw], &work[ + is], info); + if (*info != 0) { + goto L20; + } + +/* Prepare the INDXQ sorting permutation. */ + + n1 = k; + n2 = *n - k; + _starpu_dlamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &indxq[1]); + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + indxq[i__] = i__; +/* L10: */ + } + } + +L20: + return 0; + +/* End of DLAED1 */ + +} /* _starpu_dlaed1_ */ diff --git a/min-dgels/base/SRC/dlaed2.c b/min-dgels/base/SRC/dlaed2.c new file mode 100644 index 0000000..a6a9b2f --- /dev/null +++ b/min-dgels/base/SRC/dlaed2.c @@ -0,0 +1,532 @@ +/* dlaed2.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static doublereal c_b3 = -1.; +static integer c__1 = 1; + +/* Subroutine */ int _starpu_dlaed2_(integer *k, integer *n, integer *n1, doublereal * + d__, doublereal *q, integer *ldq, integer *indxq, doublereal *rho, + doublereal *z__, doublereal *dlamda, doublereal *w, doublereal *q2, + integer *indx, integer *indxc, integer *indxp, integer *coltyp, + integer *info) +{ + /* System generated locals */ + integer q_dim1, q_offset, i__1, i__2; + doublereal d__1, d__2, d__3, d__4; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + doublereal c__; + integer i__, j; + doublereal s, t; + integer k2, n2, ct, nj, pj, js, iq1, iq2, n1p1; + doublereal eps, tau, tol; + integer psm[4], imax, jmax; + extern /* Subroutine */ int _starpu_drot_(integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, doublereal *); + integer ctot[4]; + extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, + integer *), _starpu_dcopy_(integer *, doublereal *, integer *, doublereal + *, integer *); + extern doublereal _starpu_dlapy2_(doublereal *, doublereal *), _starpu_dlamch_(char *); + extern integer _starpu_idamax_(integer *, doublereal *, integer *); + extern /* Subroutine */ int _starpu_dlamrg_(integer *, integer *, doublereal *, + integer *, integer *, integer *), _starpu_dlacpy_(char *, integer *, + integer *, doublereal *, integer *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLAED2 merges the two sets of eigenvalues together into a single */ +/* sorted set. Then it tries to deflate the size of the problem. */ +/* There are two ways in which deflation can occur: when two or more */ +/* eigenvalues are close together or if there is a tiny entry in the */ +/* Z vector. For each such occurrence the order of the related secular */ +/* equation problem is reduced by one. */ + +/* Arguments */ +/* ========= */ + +/* K (output) INTEGER */ +/* The number of non-deflated eigenvalues, and the order of the */ +/* related secular equation. 0 <= K <=N. */ + +/* N (input) INTEGER */ +/* The dimension of the symmetric tridiagonal matrix. N >= 0. */ + +/* N1 (input) INTEGER */ +/* The location of the last eigenvalue in the leading sub-matrix. */ +/* min(1,N) <= N1 <= N/2. */ + +/* D (input/output) DOUBLE PRECISION array, dimension (N) */ +/* On entry, D contains the eigenvalues of the two submatrices to */ +/* be combined. */ +/* On exit, D contains the trailing (N-K) updated eigenvalues */ +/* (those which were deflated) sorted into increasing order. */ + +/* Q (input/output) DOUBLE PRECISION array, dimension (LDQ, N) */ +/* On entry, Q contains the eigenvectors of two submatrices in */ +/* the two square blocks with corners at (1,1), (N1,N1) */ +/* and (N1+1, N1+1), (N,N). */ +/* On exit, Q contains the trailing (N-K) updated eigenvectors */ +/* (those which were deflated) in its last N-K columns. */ + +/* LDQ (input) INTEGER */ +/* The leading dimension of the array Q. LDQ >= max(1,N). */ + +/* INDXQ (input/output) INTEGER array, dimension (N) */ +/* The permutation which separately sorts the two sub-problems */ +/* in D into ascending order. Note that elements in the second */ +/* half of this permutation must first have N1 added to their */ +/* values. Destroyed on exit. */ + +/* RHO (input/output) DOUBLE PRECISION */ +/* On entry, the off-diagonal element associated with the rank-1 */ +/* cut which originally split the two submatrices which are now */ +/* being recombined. */ +/* On exit, RHO has been modified to the value required by */ +/* DLAED3. */ + +/* Z (input) DOUBLE PRECISION array, dimension (N) */ +/* On entry, Z contains the updating vector (the last */ +/* row of the first sub-eigenvector matrix and the first row of */ +/* the second sub-eigenvector matrix). */ +/* On exit, the contents of Z have been destroyed by the updating */ +/* process. */ + +/* DLAMDA (output) DOUBLE PRECISION array, dimension (N) */ +/* A copy of the first K eigenvalues which will be used by */ +/* DLAED3 to form the secular equation. */ + +/* W (output) DOUBLE PRECISION array, dimension (N) */ +/* The first k values of the final deflation-altered z-vector */ +/* which will be passed to DLAED3. */ + +/* Q2 (output) DOUBLE PRECISION array, dimension (N1**2+(N-N1)**2) */ +/* A copy of the first K eigenvectors which will be used by */ +/* DLAED3 in a matrix multiply (DGEMM) to solve for the new */ +/* eigenvectors. */ + +/* INDX (workspace) INTEGER array, dimension (N) */ +/* The permutation used to sort the contents of DLAMDA into */ +/* ascending order. */ + +/* INDXC (output) INTEGER array, dimension (N) */ +/* The permutation used to arrange the columns of the deflated */ +/* Q matrix into three groups: the first group contains non-zero */ +/* elements only at and above N1, the second contains */ +/* non-zero elements only below N1, and the third is dense. */ + +/* INDXP (workspace) INTEGER array, dimension (N) */ +/* The permutation used to place deflated values of D at the end */ +/* of the array. INDXP(1:K) points to the nondeflated D-values */ +/* and INDXP(K+1:N) points to the deflated eigenvalues. */ + +/* COLTYP (workspace/output) INTEGER array, dimension (N) */ +/* During execution, a label which will indicate which of the */ +/* following types a column in the Q2 matrix is: */ +/* 1 : non-zero in the upper half only; */ +/* 2 : dense; */ +/* 3 : non-zero in the lower half only; */ +/* 4 : deflated. */ +/* On exit, COLTYP(i) is the number of columns of type i, */ +/* for i=1 to 4 only. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit. */ +/* < 0: if INFO = -i, the i-th argument had an illegal value. */ + +/* Further Details */ +/* =============== */ + +/* Based on contributions by */ +/* Jeff Rutter, Computer Science Division, University of California */ +/* at Berkeley, USA */ +/* Modified by Francoise Tisseur, University of Tennessee. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --d__; + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + --indxq; + --z__; + --dlamda; + --w; + --q2; + --indx; + --indxc; + --indxp; + --coltyp; + + /* Function Body */ + *info = 0; + + if (*n < 0) { + *info = -2; + } else if (*ldq < max(1,*n)) { + *info = -6; + } else /* if(complicated condition) */ { +/* Computing MIN */ + i__1 = 1, i__2 = *n / 2; + if (min(i__1,i__2) > *n1 || *n / 2 < *n1) { + *info = -3; + } + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DLAED2", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + n2 = *n - *n1; + n1p1 = *n1 + 1; + + if (*rho < 0.) { + _starpu_dscal_(&n2, &c_b3, &z__[n1p1], &c__1); + } + +/* Normalize z so that norm(z) = 1. Since z is the concatenation of */ +/* two normalized vectors, norm2(z) = sqrt(2). */ + + t = 1. / sqrt(2.); + _starpu_dscal_(n, &t, &z__[1], &c__1); + +/* RHO = ABS( norm(z)**2 * RHO ) */ + + *rho = (d__1 = *rho * 2., abs(d__1)); + +/* Sort the eigenvalues into increasing order */ + + i__1 = *n; + for (i__ = n1p1; i__ <= i__1; ++i__) { + indxq[i__] += *n1; +/* L10: */ + } + +/* re-integrate the deflated parts from the last pass */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + dlamda[i__] = d__[indxq[i__]]; +/* L20: */ + } + _starpu_dlamrg_(n1, &n2, &dlamda[1], &c__1, &c__1, &indxc[1]); + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + indx[i__] = indxq[indxc[i__]]; +/* L30: */ + } + +/* Calculate the allowable deflation tolerance */ + + imax = _starpu_idamax_(n, &z__[1], &c__1); + jmax = _starpu_idamax_(n, &d__[1], &c__1); + eps = _starpu_dlamch_("Epsilon"); +/* Computing MAX */ + d__3 = (d__1 = d__[jmax], abs(d__1)), d__4 = (d__2 = z__[imax], abs(d__2)) + ; + tol = eps * 8. * max(d__3,d__4); + +/* If the rank-1 modifier is small enough, no more needs to be done */ +/* except to reorganize Q so that its columns correspond with the */ +/* elements in D. */ + + if (*rho * (d__1 = z__[imax], abs(d__1)) <= tol) { + *k = 0; + iq2 = 1; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__ = indx[j]; + _starpu_dcopy_(n, &q[i__ * q_dim1 + 1], &c__1, &q2[iq2], &c__1); + dlamda[j] = d__[i__]; + iq2 += *n; +/* L40: */ + } + _starpu_dlacpy_("A", n, n, &q2[1], n, &q[q_offset], ldq); + _starpu_dcopy_(n, &dlamda[1], &c__1, &d__[1], &c__1); + goto L190; + } + +/* If there are multiple eigenvalues then the problem deflates. Here */ +/* the number of equal eigenvalues are found. As each equal */ +/* eigenvalue is found, an elementary reflector is computed to rotate */ +/* the corresponding eigensubspace so that the corresponding */ +/* components of Z are zero in this new basis. */ + + i__1 = *n1; + for (i__ = 1; i__ <= i__1; ++i__) { + coltyp[i__] = 1; +/* L50: */ + } + i__1 = *n; + for (i__ = n1p1; i__ <= i__1; ++i__) { + coltyp[i__] = 3; +/* L60: */ + } + + + *k = 0; + k2 = *n + 1; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + nj = indx[j]; + if (*rho * (d__1 = z__[nj], abs(d__1)) <= tol) { + +/* Deflate due to small z component. */ + + --k2; + coltyp[nj] = 4; + indxp[k2] = nj; + if (j == *n) { + goto L100; + } + } else { + pj = nj; + goto L80; + } +/* L70: */ + } +L80: + ++j; + nj = indx[j]; + if (j > *n) { + goto L100; + } + if (*rho * (d__1 = z__[nj], abs(d__1)) <= tol) { + +/* Deflate due to small z component. */ + + --k2; + coltyp[nj] = 4; + indxp[k2] = nj; + } else { + +/* Check if eigenvalues are close enough to allow deflation. */ + + s = z__[pj]; + c__ = z__[nj]; + +/* Find sqrt(a**2+b**2) without overflow or */ +/* destructive underflow. */ + + tau = _starpu_dlapy2_(&c__, &s); + t = d__[nj] - d__[pj]; + c__ /= tau; + s = -s / tau; + if ((d__1 = t * c__ * s, abs(d__1)) <= tol) { + +/* Deflation is possible. */ + + z__[nj] = tau; + z__[pj] = 0.; + if (coltyp[nj] != coltyp[pj]) { + coltyp[nj] = 2; + } + coltyp[pj] = 4; + _starpu_drot_(n, &q[pj * q_dim1 + 1], &c__1, &q[nj * q_dim1 + 1], &c__1, & + c__, &s); +/* Computing 2nd power */ + d__1 = c__; +/* Computing 2nd power */ + d__2 = s; + t = d__[pj] * (d__1 * d__1) + d__[nj] * (d__2 * d__2); +/* Computing 2nd power */ + d__1 = s; +/* Computing 2nd power */ + d__2 = c__; + d__[nj] = d__[pj] * (d__1 * d__1) + d__[nj] * (d__2 * d__2); + d__[pj] = t; + --k2; + i__ = 1; +L90: + if (k2 + i__ <= *n) { + if (d__[pj] < d__[indxp[k2 + i__]]) { + indxp[k2 + i__ - 1] = indxp[k2 + i__]; + indxp[k2 + i__] = pj; + ++i__; + goto L90; + } else { + indxp[k2 + i__ - 1] = pj; + } + } else { + indxp[k2 + i__ - 1] = pj; + } + pj = nj; + } else { + ++(*k); + dlamda[*k] = d__[pj]; + w[*k] = z__[pj]; + indxp[*k] = pj; + pj = nj; + } + } + goto L80; +L100: + +/* Record the last eigenvalue. */ + + ++(*k); + dlamda[*k] = d__[pj]; + w[*k] = z__[pj]; + indxp[*k] = pj; + +/* Count up the total number of the various types of columns, then */ +/* form a permutation which positions the four column types into */ +/* four uniform groups (although one or more of these groups may be */ +/* empty). */ + + for (j = 1; j <= 4; ++j) { + ctot[j - 1] = 0; +/* L110: */ + } + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + ct = coltyp[j]; + ++ctot[ct - 1]; +/* L120: */ + } + +/* PSM(*) = Position in SubMatrix (of types 1 through 4) */ + + psm[0] = 1; + psm[1] = ctot[0] + 1; + psm[2] = psm[1] + ctot[1]; + psm[3] = psm[2] + ctot[2]; + *k = *n - ctot[3]; + +/* Fill out the INDXC array so that the permutation which it induces */ +/* will place all type-1 columns first, all type-2 columns next, */ +/* then all type-3's, and finally all type-4's. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + js = indxp[j]; + ct = coltyp[js]; + indx[psm[ct - 1]] = js; + indxc[psm[ct - 1]] = j; + ++psm[ct - 1]; +/* L130: */ + } + +/* Sort the eigenvalues and corresponding eigenvectors into DLAMDA */ +/* and Q2 respectively. The eigenvalues/vectors which were not */ +/* deflated go into the first K slots of DLAMDA and Q2 respectively, */ +/* while those which were deflated go into the last N - K slots. */ + + i__ = 1; + iq1 = 1; + iq2 = (ctot[0] + ctot[1]) * *n1 + 1; + i__1 = ctot[0]; + for (j = 1; j <= i__1; ++j) { + js = indx[i__]; + _starpu_dcopy_(n1, &q[js * q_dim1 + 1], &c__1, &q2[iq1], &c__1); + z__[i__] = d__[js]; + ++i__; + iq1 += *n1; +/* L140: */ + } + + i__1 = ctot[1]; + for (j = 1; j <= i__1; ++j) { + js = indx[i__]; + _starpu_dcopy_(n1, &q[js * q_dim1 + 1], &c__1, &q2[iq1], &c__1); + _starpu_dcopy_(&n2, &q[*n1 + 1 + js * q_dim1], &c__1, &q2[iq2], &c__1); + z__[i__] = d__[js]; + ++i__; + iq1 += *n1; + iq2 += n2; +/* L150: */ + } + + i__1 = ctot[2]; + for (j = 1; j <= i__1; ++j) { + js = indx[i__]; + _starpu_dcopy_(&n2, &q[*n1 + 1 + js * q_dim1], &c__1, &q2[iq2], &c__1); + z__[i__] = d__[js]; + ++i__; + iq2 += n2; +/* L160: */ + } + + iq1 = iq2; + i__1 = ctot[3]; + for (j = 1; j <= i__1; ++j) { + js = indx[i__]; + _starpu_dcopy_(n, &q[js * q_dim1 + 1], &c__1, &q2[iq2], &c__1); + iq2 += *n; + z__[i__] = d__[js]; + ++i__; +/* L170: */ + } + +/* The deflated eigenvalues and their corresponding vectors go back */ +/* into the last N - K slots of D and Q respectively. */ + + _starpu_dlacpy_("A", n, &ctot[3], &q2[iq1], n, &q[(*k + 1) * q_dim1 + 1], ldq); + i__1 = *n - *k; + _starpu_dcopy_(&i__1, &z__[*k + 1], &c__1, &d__[*k + 1], &c__1); + +/* Copy CTOT into COLTYP for referencing in DLAED3. */ + + for (j = 1; j <= 4; ++j) { + coltyp[j] = ctot[j - 1]; +/* L180: */ + } + +L190: + return 0; + +/* End of DLAED2 */ + +} /* _starpu_dlaed2_ */ diff --git a/min-dgels/base/SRC/dlaed3.c b/min-dgels/base/SRC/dlaed3.c new file mode 100644 index 0000000..4c47299 --- /dev/null +++ b/min-dgels/base/SRC/dlaed3.c @@ -0,0 +1,338 @@ +/* dlaed3.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static doublereal c_b22 = 1.; +static doublereal c_b23 = 0.; + +/* Subroutine */ int _starpu_dlaed3_(integer *k, integer *n, integer *n1, doublereal * + d__, doublereal *q, integer *ldq, doublereal *rho, doublereal *dlamda, + doublereal *q2, integer *indx, integer *ctot, doublereal *w, + doublereal *s, integer *info) +{ + /* System generated locals */ + integer q_dim1, q_offset, i__1, i__2; + doublereal d__1; + + /* Builtin functions */ + double sqrt(doublereal), d_sign(doublereal *, doublereal *); + + /* Local variables */ + integer i__, j, n2, n12, ii, n23, iq2; + doublereal temp; + extern doublereal _starpu_dnrm2_(integer *, doublereal *, integer *); + extern /* Subroutine */ int _starpu_dgemm_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *), + _starpu_dcopy_(integer *, doublereal *, integer *, doublereal *, integer + *), _starpu_dlaed4_(integer *, integer *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, integer *); + extern doublereal _starpu_dlamc3_(doublereal *, doublereal *); + extern /* Subroutine */ int _starpu_dlacpy_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *), + _starpu_dlaset_(char *, integer *, integer *, doublereal *, doublereal *, + doublereal *, integer *), _starpu_xerbla_(char *, integer *); + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLAED3 finds the roots of the secular equation, as defined by the */ +/* values in D, W, and RHO, between 1 and K. It makes the */ +/* appropriate calls to DLAED4 and then updates the eigenvectors by */ +/* multiplying the matrix of eigenvectors of the pair of eigensystems */ +/* being combined by the matrix of eigenvectors of the K-by-K system */ +/* which is solved here. */ + +/* This code makes very mild assumptions about floating point */ +/* arithmetic. It will work on machines with a guard digit in */ +/* add/subtract, or on those binary machines without guard digits */ +/* which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. */ +/* It could conceivably fail on hexadecimal or decimal machines */ +/* without guard digits, but we know of none. */ + +/* Arguments */ +/* ========= */ + +/* K (input) INTEGER */ +/* The number of terms in the rational function to be solved by */ +/* DLAED4. K >= 0. */ + +/* N (input) INTEGER */ +/* The number of rows and columns in the Q matrix. */ +/* N >= K (deflation may result in N>K). */ + +/* N1 (input) INTEGER */ +/* The location of the last eigenvalue in the leading submatrix. */ +/* min(1,N) <= N1 <= N/2. */ + +/* D (output) DOUBLE PRECISION array, dimension (N) */ +/* D(I) contains the updated eigenvalues for */ +/* 1 <= I <= K. */ + +/* Q (output) DOUBLE PRECISION array, dimension (LDQ,N) */ +/* Initially the first K columns are used as workspace. */ +/* On output the columns 1 to K contain */ +/* the updated eigenvectors. */ + +/* LDQ (input) INTEGER */ +/* The leading dimension of the array Q. LDQ >= max(1,N). */ + +/* RHO (input) DOUBLE PRECISION */ +/* The value of the parameter in the rank one update equation. */ +/* RHO >= 0 required. */ + +/* DLAMDA (input/output) DOUBLE PRECISION array, dimension (K) */ +/* The first K elements of this array contain the old roots */ +/* of the deflated updating problem. These are the poles */ +/* of the secular equation. May be changed on output by */ +/* having lowest order bit set to zero on Cray X-MP, Cray Y-MP, */ +/* Cray-2, or Cray C-90, as described above. */ + +/* Q2 (input) DOUBLE PRECISION array, dimension (LDQ2, N) */ +/* The first K columns of this matrix contain the non-deflated */ +/* eigenvectors for the split problem. */ + +/* INDX (input) INTEGER array, dimension (N) */ +/* The permutation used to arrange the columns of the deflated */ +/* Q matrix into three groups (see DLAED2). */ +/* The rows of the eigenvectors found by DLAED4 must be likewise */ +/* permuted before the matrix multiply can take place. */ + +/* CTOT (input) INTEGER array, dimension (4) */ +/* A count of the total number of the various types of columns */ +/* in Q, as described in INDX. The fourth column type is any */ +/* column which has been deflated. */ + +/* W (input/output) DOUBLE PRECISION array, dimension (K) */ +/* The first K elements of this array contain the components */ +/* of the deflation-adjusted updating vector. Destroyed on */ +/* output. */ + +/* S (workspace) DOUBLE PRECISION array, dimension (N1 + 1)*K */ +/* Will contain the eigenvectors of the repaired matrix which */ +/* will be multiplied by the previously accumulated eigenvectors */ +/* to update the system. */ + +/* LDS (input) INTEGER */ +/* The leading dimension of S. LDS >= max(1,K). */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit. */ +/* < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > 0: if INFO = 1, an eigenvalue did not converge */ + +/* Further Details */ +/* =============== */ + +/* Based on contributions by */ +/* Jeff Rutter, Computer Science Division, University of California */ +/* at Berkeley, USA */ +/* Modified by Francoise Tisseur, University of Tennessee. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --d__; + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + --dlamda; + --q2; + --indx; + --ctot; + --w; + --s; + + /* Function Body */ + *info = 0; + + if (*k < 0) { + *info = -1; + } else if (*n < *k) { + *info = -2; + } else if (*ldq < max(1,*n)) { + *info = -6; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DLAED3", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*k == 0) { + return 0; + } + +/* Modify values DLAMDA(i) to make sure all DLAMDA(i)-DLAMDA(j) can */ +/* be computed with high relative accuracy (barring over/underflow). */ +/* This is a problem on machines without a guard digit in */ +/* add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2). */ +/* The following code replaces DLAMDA(I) by 2*DLAMDA(I)-DLAMDA(I), */ +/* which on any of these machines zeros out the bottommost */ +/* bit of DLAMDA(I) if it is 1; this makes the subsequent */ +/* subtractions DLAMDA(I)-DLAMDA(J) unproblematic when cancellation */ +/* occurs. On binary machines with a guard digit (almost all */ +/* machines) it does not change DLAMDA(I) at all. On hexadecimal */ +/* and decimal machines with a guard digit, it slightly */ +/* changes the bottommost bits of DLAMDA(I). It does not account */ +/* for hexadecimal or decimal machines without guard digits */ +/* (we know of none). We use a subroutine call to compute */ +/* 2*DLAMBDA(I) to prevent optimizing compilers from eliminating */ +/* this code. */ + + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + dlamda[i__] = _starpu_dlamc3_(&dlamda[i__], &dlamda[i__]) - dlamda[i__]; +/* L10: */ + } + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + _starpu_dlaed4_(k, &j, &dlamda[1], &w[1], &q[j * q_dim1 + 1], rho, &d__[j], + info); + +/* If the zero finder fails, the computation is terminated. */ + + if (*info != 0) { + goto L120; + } +/* L20: */ + } + + if (*k == 1) { + goto L110; + } + if (*k == 2) { + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + w[1] = q[j * q_dim1 + 1]; + w[2] = q[j * q_dim1 + 2]; + ii = indx[1]; + q[j * q_dim1 + 1] = w[ii]; + ii = indx[2]; + q[j * q_dim1 + 2] = w[ii]; +/* L30: */ + } + goto L110; + } + +/* Compute updated W. */ + + _starpu_dcopy_(k, &w[1], &c__1, &s[1], &c__1); + +/* Initialize W(I) = Q(I,I) */ + + i__1 = *ldq + 1; + _starpu_dcopy_(k, &q[q_offset], &i__1, &w[1], &c__1); + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + w[i__] *= q[i__ + j * q_dim1] / (dlamda[i__] - dlamda[j]); +/* L40: */ + } + i__2 = *k; + for (i__ = j + 1; i__ <= i__2; ++i__) { + w[i__] *= q[i__ + j * q_dim1] / (dlamda[i__] - dlamda[j]); +/* L50: */ + } +/* L60: */ + } + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + d__1 = sqrt(-w[i__]); + w[i__] = d_sign(&d__1, &s[i__]); +/* L70: */ + } + +/* Compute eigenvectors of the modified rank-1 modification. */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = *k; + for (i__ = 1; i__ <= i__2; ++i__) { + s[i__] = w[i__] / q[i__ + j * q_dim1]; +/* L80: */ + } + temp = _starpu_dnrm2_(k, &s[1], &c__1); + i__2 = *k; + for (i__ = 1; i__ <= i__2; ++i__) { + ii = indx[i__]; + q[i__ + j * q_dim1] = s[ii] / temp; +/* L90: */ + } +/* L100: */ + } + +/* Compute the updated eigenvectors. */ + +L110: + + n2 = *n - *n1; + n12 = ctot[1] + ctot[2]; + n23 = ctot[2] + ctot[3]; + + _starpu_dlacpy_("A", &n23, k, &q[ctot[1] + 1 + q_dim1], ldq, &s[1], &n23); + iq2 = *n1 * n12 + 1; + if (n23 != 0) { + _starpu_dgemm_("N", "N", &n2, k, &n23, &c_b22, &q2[iq2], &n2, &s[1], &n23, & + c_b23, &q[*n1 + 1 + q_dim1], ldq); + } else { + _starpu_dlaset_("A", &n2, k, &c_b23, &c_b23, &q[*n1 + 1 + q_dim1], ldq); + } + + _starpu_dlacpy_("A", &n12, k, &q[q_offset], ldq, &s[1], &n12); + if (n12 != 0) { + _starpu_dgemm_("N", "N", n1, k, &n12, &c_b22, &q2[1], n1, &s[1], &n12, &c_b23, + &q[q_offset], ldq); + } else { + _starpu_dlaset_("A", n1, k, &c_b23, &c_b23, &q[q_dim1 + 1], ldq); + } + + +L120: + return 0; + +/* End of DLAED3 */ + +} /* _starpu_dlaed3_ */ diff --git a/min-dgels/base/SRC/dlaed4.c b/min-dgels/base/SRC/dlaed4.c new file mode 100644 index 0000000..7eaa921 --- /dev/null +++ b/min-dgels/base/SRC/dlaed4.c @@ -0,0 +1,954 @@ +/* dlaed4.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dlaed4_(integer *n, integer *i__, doublereal *d__, + doublereal *z__, doublereal *delta, doublereal *rho, doublereal *dlam, + integer *info) +{ + /* System generated locals */ + integer i__1; + doublereal d__1; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + doublereal a, b, c__; + integer j; + doublereal w; + integer ii; + doublereal dw, zz[3]; + integer ip1; + doublereal del, eta, phi, eps, tau, psi; + integer iim1, iip1; + doublereal dphi, dpsi; + integer iter; + doublereal temp, prew, temp1, dltlb, dltub, midpt; + integer niter; + logical swtch; + extern /* Subroutine */ int _starpu_dlaed5_(integer *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *), _starpu_dlaed6_(integer *, + logical *, doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, integer *); + logical swtch3; + extern doublereal _starpu_dlamch_(char *); + logical orgati; + doublereal erretm, rhoinv; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* This subroutine computes the I-th updated eigenvalue of a symmetric */ +/* rank-one modification to a diagonal matrix whose elements are */ +/* given in the array d, and that */ + +/* D(i) < D(j) for i < j */ + +/* and that RHO > 0. This is arranged by the calling routine, and is */ +/* no loss in generality. The rank-one modified system is thus */ + +/* diag( D ) + RHO * Z * Z_transpose. */ + +/* where we assume the Euclidean norm of Z is 1. */ + +/* The method consists of approximating the rational functions in the */ +/* secular equation by simpler interpolating rational functions. */ + +/* Arguments */ +/* ========= */ + +/* N (input) INTEGER */ +/* The length of all arrays. */ + +/* I (input) INTEGER */ +/* The index of the eigenvalue to be computed. 1 <= I <= N. */ + +/* D (input) DOUBLE PRECISION array, dimension (N) */ +/* The original eigenvalues. It is assumed that they are in */ +/* order, D(I) < D(J) for I < J. */ + +/* Z (input) DOUBLE PRECISION array, dimension (N) */ +/* The components of the updating vector. */ + +/* DELTA (output) DOUBLE PRECISION array, dimension (N) */ +/* If N .GT. 2, DELTA contains (D(j) - lambda_I) in its j-th */ +/* component. If N = 1, then DELTA(1) = 1. If N = 2, see DLAED5 */ +/* for detail. The vector DELTA contains the information necessary */ +/* to construct the eigenvectors by DLAED3 and DLAED9. */ + +/* RHO (input) DOUBLE PRECISION */ +/* The scalar in the symmetric updating formula. */ + +/* DLAM (output) DOUBLE PRECISION */ +/* The computed lambda_I, the I-th updated eigenvalue. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* > 0: if INFO = 1, the updating process failed. */ + +/* Internal Parameters */ +/* =================== */ + +/* Logical variable ORGATI (origin-at-i?) is used for distinguishing */ +/* whether D(i) or D(i+1) is treated as the origin. */ + +/* ORGATI = .true. origin at i */ +/* ORGATI = .false. origin at i+1 */ + +/* Logical variable SWTCH3 (switch-for-3-poles?) is for noting */ +/* if we are working with THREE poles! */ + +/* MAXIT is the maximum number of iterations allowed for each */ +/* eigenvalue. */ + +/* Further Details */ +/* =============== */ + +/* Based on contributions by */ +/* Ren-Cang Li, Computer Science Division, University of California */ +/* at Berkeley, USA */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Since this routine is called in an inner loop, we do no argument */ +/* checking. */ + +/* Quick return for N=1 and 2. */ + + /* Parameter adjustments */ + --delta; + --z__; + --d__; + + /* Function Body */ + *info = 0; + if (*n == 1) { + +/* Presumably, I=1 upon entry */ + + *dlam = d__[1] + *rho * z__[1] * z__[1]; + delta[1] = 1.; + return 0; + } + if (*n == 2) { + _starpu_dlaed5_(i__, &d__[1], &z__[1], &delta[1], rho, dlam); + return 0; + } + +/* Compute machine epsilon */ + + eps = _starpu_dlamch_("Epsilon"); + rhoinv = 1. / *rho; + +/* The case I = N */ + + if (*i__ == *n) { + +/* Initialize some basic variables */ + + ii = *n - 1; + niter = 1; + +/* Calculate initial guess */ + + midpt = *rho / 2.; + +/* If ||Z||_2 is not one, then TEMP should be set to */ +/* RHO * ||Z||_2^2 / TWO */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + delta[j] = d__[j] - d__[*i__] - midpt; +/* L10: */ + } + + psi = 0.; + i__1 = *n - 2; + for (j = 1; j <= i__1; ++j) { + psi += z__[j] * z__[j] / delta[j]; +/* L20: */ + } + + c__ = rhoinv + psi; + w = c__ + z__[ii] * z__[ii] / delta[ii] + z__[*n] * z__[*n] / delta[* + n]; + + if (w <= 0.) { + temp = z__[*n - 1] * z__[*n - 1] / (d__[*n] - d__[*n - 1] + *rho) + + z__[*n] * z__[*n] / *rho; + if (c__ <= temp) { + tau = *rho; + } else { + del = d__[*n] - d__[*n - 1]; + a = -c__ * del + z__[*n - 1] * z__[*n - 1] + z__[*n] * z__[*n] + ; + b = z__[*n] * z__[*n] * del; + if (a < 0.) { + tau = b * 2. / (sqrt(a * a + b * 4. * c__) - a); + } else { + tau = (a + sqrt(a * a + b * 4. * c__)) / (c__ * 2.); + } + } + +/* It can be proved that */ +/* D(N)+RHO/2 <= LAMBDA(N) < D(N)+TAU <= D(N)+RHO */ + + dltlb = midpt; + dltub = *rho; + } else { + del = d__[*n] - d__[*n - 1]; + a = -c__ * del + z__[*n - 1] * z__[*n - 1] + z__[*n] * z__[*n]; + b = z__[*n] * z__[*n] * del; + if (a < 0.) { + tau = b * 2. / (sqrt(a * a + b * 4. * c__) - a); + } else { + tau = (a + sqrt(a * a + b * 4. * c__)) / (c__ * 2.); + } + +/* It can be proved that */ +/* D(N) < D(N)+TAU < LAMBDA(N) < D(N)+RHO/2 */ + + dltlb = 0.; + dltub = midpt; + } + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + delta[j] = d__[j] - d__[*i__] - tau; +/* L30: */ + } + +/* Evaluate PSI and the derivative DPSI */ + + dpsi = 0.; + psi = 0.; + erretm = 0.; + i__1 = ii; + for (j = 1; j <= i__1; ++j) { + temp = z__[j] / delta[j]; + psi += z__[j] * temp; + dpsi += temp * temp; + erretm += psi; +/* L40: */ + } + erretm = abs(erretm); + +/* Evaluate PHI and the derivative DPHI */ + + temp = z__[*n] / delta[*n]; + phi = z__[*n] * temp; + dphi = temp * temp; + erretm = (-phi - psi) * 8. + erretm - phi + rhoinv + abs(tau) * (dpsi + + dphi); + + w = rhoinv + phi + psi; + +/* Test for convergence */ + + if (abs(w) <= eps * erretm) { + *dlam = d__[*i__] + tau; + goto L250; + } + + if (w <= 0.) { + dltlb = max(dltlb,tau); + } else { + dltub = min(dltub,tau); + } + +/* Calculate the new step */ + + ++niter; + c__ = w - delta[*n - 1] * dpsi - delta[*n] * dphi; + a = (delta[*n - 1] + delta[*n]) * w - delta[*n - 1] * delta[*n] * ( + dpsi + dphi); + b = delta[*n - 1] * delta[*n] * w; + if (c__ < 0.) { + c__ = abs(c__); + } + if (c__ == 0.) { +/* ETA = B/A */ +/* ETA = RHO - TAU */ + eta = dltub - tau; + } else if (a >= 0.) { + eta = (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (c__ + * 2.); + } else { + eta = b * 2. / (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1))) + ); + } + +/* Note, eta should be positive if w is negative, and */ +/* eta should be negative otherwise. However, */ +/* if for some reason caused by roundoff, eta*w > 0, */ +/* we simply use one Newton step instead. This way */ +/* will guarantee eta*w < 0. */ + + if (w * eta > 0.) { + eta = -w / (dpsi + dphi); + } + temp = tau + eta; + if (temp > dltub || temp < dltlb) { + if (w < 0.) { + eta = (dltub - tau) / 2.; + } else { + eta = (dltlb - tau) / 2.; + } + } + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + delta[j] -= eta; +/* L50: */ + } + + tau += eta; + +/* Evaluate PSI and the derivative DPSI */ + + dpsi = 0.; + psi = 0.; + erretm = 0.; + i__1 = ii; + for (j = 1; j <= i__1; ++j) { + temp = z__[j] / delta[j]; + psi += z__[j] * temp; + dpsi += temp * temp; + erretm += psi; +/* L60: */ + } + erretm = abs(erretm); + +/* Evaluate PHI and the derivative DPHI */ + + temp = z__[*n] / delta[*n]; + phi = z__[*n] * temp; + dphi = temp * temp; + erretm = (-phi - psi) * 8. + erretm - phi + rhoinv + abs(tau) * (dpsi + + dphi); + + w = rhoinv + phi + psi; + +/* Main loop to update the values of the array DELTA */ + + iter = niter + 1; + + for (niter = iter; niter <= 30; ++niter) { + +/* Test for convergence */ + + if (abs(w) <= eps * erretm) { + *dlam = d__[*i__] + tau; + goto L250; + } + + if (w <= 0.) { + dltlb = max(dltlb,tau); + } else { + dltub = min(dltub,tau); + } + +/* Calculate the new step */ + + c__ = w - delta[*n - 1] * dpsi - delta[*n] * dphi; + a = (delta[*n - 1] + delta[*n]) * w - delta[*n - 1] * delta[*n] * + (dpsi + dphi); + b = delta[*n - 1] * delta[*n] * w; + if (a >= 0.) { + eta = (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / ( + c__ * 2.); + } else { + eta = b * 2. / (a - sqrt((d__1 = a * a - b * 4. * c__, abs( + d__1)))); + } + +/* Note, eta should be positive if w is negative, and */ +/* eta should be negative otherwise. However, */ +/* if for some reason caused by roundoff, eta*w > 0, */ +/* we simply use one Newton step instead. This way */ +/* will guarantee eta*w < 0. */ + + if (w * eta > 0.) { + eta = -w / (dpsi + dphi); + } + temp = tau + eta; + if (temp > dltub || temp < dltlb) { + if (w < 0.) { + eta = (dltub - tau) / 2.; + } else { + eta = (dltlb - tau) / 2.; + } + } + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + delta[j] -= eta; +/* L70: */ + } + + tau += eta; + +/* Evaluate PSI and the derivative DPSI */ + + dpsi = 0.; + psi = 0.; + erretm = 0.; + i__1 = ii; + for (j = 1; j <= i__1; ++j) { + temp = z__[j] / delta[j]; + psi += z__[j] * temp; + dpsi += temp * temp; + erretm += psi; +/* L80: */ + } + erretm = abs(erretm); + +/* Evaluate PHI and the derivative DPHI */ + + temp = z__[*n] / delta[*n]; + phi = z__[*n] * temp; + dphi = temp * temp; + erretm = (-phi - psi) * 8. + erretm - phi + rhoinv + abs(tau) * ( + dpsi + dphi); + + w = rhoinv + phi + psi; +/* L90: */ + } + +/* Return with INFO = 1, NITER = MAXIT and not converged */ + + *info = 1; + *dlam = d__[*i__] + tau; + goto L250; + +/* End for the case I = N */ + + } else { + +/* The case for I < N */ + + niter = 1; + ip1 = *i__ + 1; + +/* Calculate initial guess */ + + del = d__[ip1] - d__[*i__]; + midpt = del / 2.; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + delta[j] = d__[j] - d__[*i__] - midpt; +/* L100: */ + } + + psi = 0.; + i__1 = *i__ - 1; + for (j = 1; j <= i__1; ++j) { + psi += z__[j] * z__[j] / delta[j]; +/* L110: */ + } + + phi = 0.; + i__1 = *i__ + 2; + for (j = *n; j >= i__1; --j) { + phi += z__[j] * z__[j] / delta[j]; +/* L120: */ + } + c__ = rhoinv + psi + phi; + w = c__ + z__[*i__] * z__[*i__] / delta[*i__] + z__[ip1] * z__[ip1] / + delta[ip1]; + + if (w > 0.) { + +/* d(i)< the ith eigenvalue < (d(i)+d(i+1))/2 */ + +/* We choose d(i) as origin. */ + + orgati = TRUE_; + a = c__ * del + z__[*i__] * z__[*i__] + z__[ip1] * z__[ip1]; + b = z__[*i__] * z__[*i__] * del; + if (a > 0.) { + tau = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs( + d__1)))); + } else { + tau = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / ( + c__ * 2.); + } + dltlb = 0.; + dltub = midpt; + } else { + +/* (d(i)+d(i+1))/2 <= the ith eigenvalue < d(i+1) */ + +/* We choose d(i+1) as origin. */ + + orgati = FALSE_; + a = c__ * del - z__[*i__] * z__[*i__] - z__[ip1] * z__[ip1]; + b = z__[ip1] * z__[ip1] * del; + if (a < 0.) { + tau = b * 2. / (a - sqrt((d__1 = a * a + b * 4. * c__, abs( + d__1)))); + } else { + tau = -(a + sqrt((d__1 = a * a + b * 4. * c__, abs(d__1)))) / + (c__ * 2.); + } + dltlb = -midpt; + dltub = 0.; + } + + if (orgati) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + delta[j] = d__[j] - d__[*i__] - tau; +/* L130: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + delta[j] = d__[j] - d__[ip1] - tau; +/* L140: */ + } + } + if (orgati) { + ii = *i__; + } else { + ii = *i__ + 1; + } + iim1 = ii - 1; + iip1 = ii + 1; + +/* Evaluate PSI and the derivative DPSI */ + + dpsi = 0.; + psi = 0.; + erretm = 0.; + i__1 = iim1; + for (j = 1; j <= i__1; ++j) { + temp = z__[j] / delta[j]; + psi += z__[j] * temp; + dpsi += temp * temp; + erretm += psi; +/* L150: */ + } + erretm = abs(erretm); + +/* Evaluate PHI and the derivative DPHI */ + + dphi = 0.; + phi = 0.; + i__1 = iip1; + for (j = *n; j >= i__1; --j) { + temp = z__[j] / delta[j]; + phi += z__[j] * temp; + dphi += temp * temp; + erretm += phi; +/* L160: */ + } + + w = rhoinv + phi + psi; + +/* W is the value of the secular function with */ +/* its ii-th element removed. */ + + swtch3 = FALSE_; + if (orgati) { + if (w < 0.) { + swtch3 = TRUE_; + } + } else { + if (w > 0.) { + swtch3 = TRUE_; + } + } + if (ii == 1 || ii == *n) { + swtch3 = FALSE_; + } + + temp = z__[ii] / delta[ii]; + dw = dpsi + dphi + temp * temp; + temp = z__[ii] * temp; + w += temp; + erretm = (phi - psi) * 8. + erretm + rhoinv * 2. + abs(temp) * 3. + + abs(tau) * dw; + +/* Test for convergence */ + + if (abs(w) <= eps * erretm) { + if (orgati) { + *dlam = d__[*i__] + tau; + } else { + *dlam = d__[ip1] + tau; + } + goto L250; + } + + if (w <= 0.) { + dltlb = max(dltlb,tau); + } else { + dltub = min(dltub,tau); + } + +/* Calculate the new step */ + + ++niter; + if (! swtch3) { + if (orgati) { +/* Computing 2nd power */ + d__1 = z__[*i__] / delta[*i__]; + c__ = w - delta[ip1] * dw - (d__[*i__] - d__[ip1]) * (d__1 * + d__1); + } else { +/* Computing 2nd power */ + d__1 = z__[ip1] / delta[ip1]; + c__ = w - delta[*i__] * dw - (d__[ip1] - d__[*i__]) * (d__1 * + d__1); + } + a = (delta[*i__] + delta[ip1]) * w - delta[*i__] * delta[ip1] * + dw; + b = delta[*i__] * delta[ip1] * w; + if (c__ == 0.) { + if (a == 0.) { + if (orgati) { + a = z__[*i__] * z__[*i__] + delta[ip1] * delta[ip1] * + (dpsi + dphi); + } else { + a = z__[ip1] * z__[ip1] + delta[*i__] * delta[*i__] * + (dpsi + dphi); + } + } + eta = b / a; + } else if (a <= 0.) { + eta = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / ( + c__ * 2.); + } else { + eta = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs( + d__1)))); + } + } else { + +/* Interpolation using THREE most relevant poles */ + + temp = rhoinv + psi + phi; + if (orgati) { + temp1 = z__[iim1] / delta[iim1]; + temp1 *= temp1; + c__ = temp - delta[iip1] * (dpsi + dphi) - (d__[iim1] - d__[ + iip1]) * temp1; + zz[0] = z__[iim1] * z__[iim1]; + zz[2] = delta[iip1] * delta[iip1] * (dpsi - temp1 + dphi); + } else { + temp1 = z__[iip1] / delta[iip1]; + temp1 *= temp1; + c__ = temp - delta[iim1] * (dpsi + dphi) - (d__[iip1] - d__[ + iim1]) * temp1; + zz[0] = delta[iim1] * delta[iim1] * (dpsi + (dphi - temp1)); + zz[2] = z__[iip1] * z__[iip1]; + } + zz[1] = z__[ii] * z__[ii]; + _starpu_dlaed6_(&niter, &orgati, &c__, &delta[iim1], zz, &w, &eta, info); + if (*info != 0) { + goto L250; + } + } + +/* Note, eta should be positive if w is negative, and */ +/* eta should be negative otherwise. However, */ +/* if for some reason caused by roundoff, eta*w > 0, */ +/* we simply use one Newton step instead. This way */ +/* will guarantee eta*w < 0. */ + + if (w * eta >= 0.) { + eta = -w / dw; + } + temp = tau + eta; + if (temp > dltub || temp < dltlb) { + if (w < 0.) { + eta = (dltub - tau) / 2.; + } else { + eta = (dltlb - tau) / 2.; + } + } + + prew = w; + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + delta[j] -= eta; +/* L180: */ + } + +/* Evaluate PSI and the derivative DPSI */ + + dpsi = 0.; + psi = 0.; + erretm = 0.; + i__1 = iim1; + for (j = 1; j <= i__1; ++j) { + temp = z__[j] / delta[j]; + psi += z__[j] * temp; + dpsi += temp * temp; + erretm += psi; +/* L190: */ + } + erretm = abs(erretm); + +/* Evaluate PHI and the derivative DPHI */ + + dphi = 0.; + phi = 0.; + i__1 = iip1; + for (j = *n; j >= i__1; --j) { + temp = z__[j] / delta[j]; + phi += z__[j] * temp; + dphi += temp * temp; + erretm += phi; +/* L200: */ + } + + temp = z__[ii] / delta[ii]; + dw = dpsi + dphi + temp * temp; + temp = z__[ii] * temp; + w = rhoinv + phi + psi + temp; + erretm = (phi - psi) * 8. + erretm + rhoinv * 2. + abs(temp) * 3. + ( + d__1 = tau + eta, abs(d__1)) * dw; + + swtch = FALSE_; + if (orgati) { + if (-w > abs(prew) / 10.) { + swtch = TRUE_; + } + } else { + if (w > abs(prew) / 10.) { + swtch = TRUE_; + } + } + + tau += eta; + +/* Main loop to update the values of the array DELTA */ + + iter = niter + 1; + + for (niter = iter; niter <= 30; ++niter) { + +/* Test for convergence */ + + if (abs(w) <= eps * erretm) { + if (orgati) { + *dlam = d__[*i__] + tau; + } else { + *dlam = d__[ip1] + tau; + } + goto L250; + } + + if (w <= 0.) { + dltlb = max(dltlb,tau); + } else { + dltub = min(dltub,tau); + } + +/* Calculate the new step */ + + if (! swtch3) { + if (! swtch) { + if (orgati) { +/* Computing 2nd power */ + d__1 = z__[*i__] / delta[*i__]; + c__ = w - delta[ip1] * dw - (d__[*i__] - d__[ip1]) * ( + d__1 * d__1); + } else { +/* Computing 2nd power */ + d__1 = z__[ip1] / delta[ip1]; + c__ = w - delta[*i__] * dw - (d__[ip1] - d__[*i__]) * + (d__1 * d__1); + } + } else { + temp = z__[ii] / delta[ii]; + if (orgati) { + dpsi += temp * temp; + } else { + dphi += temp * temp; + } + c__ = w - delta[*i__] * dpsi - delta[ip1] * dphi; + } + a = (delta[*i__] + delta[ip1]) * w - delta[*i__] * delta[ip1] + * dw; + b = delta[*i__] * delta[ip1] * w; + if (c__ == 0.) { + if (a == 0.) { + if (! swtch) { + if (orgati) { + a = z__[*i__] * z__[*i__] + delta[ip1] * + delta[ip1] * (dpsi + dphi); + } else { + a = z__[ip1] * z__[ip1] + delta[*i__] * delta[ + *i__] * (dpsi + dphi); + } + } else { + a = delta[*i__] * delta[*i__] * dpsi + delta[ip1] + * delta[ip1] * dphi; + } + } + eta = b / a; + } else if (a <= 0.) { + eta = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) + / (c__ * 2.); + } else { + eta = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, + abs(d__1)))); + } + } else { + +/* Interpolation using THREE most relevant poles */ + + temp = rhoinv + psi + phi; + if (swtch) { + c__ = temp - delta[iim1] * dpsi - delta[iip1] * dphi; + zz[0] = delta[iim1] * delta[iim1] * dpsi; + zz[2] = delta[iip1] * delta[iip1] * dphi; + } else { + if (orgati) { + temp1 = z__[iim1] / delta[iim1]; + temp1 *= temp1; + c__ = temp - delta[iip1] * (dpsi + dphi) - (d__[iim1] + - d__[iip1]) * temp1; + zz[0] = z__[iim1] * z__[iim1]; + zz[2] = delta[iip1] * delta[iip1] * (dpsi - temp1 + + dphi); + } else { + temp1 = z__[iip1] / delta[iip1]; + temp1 *= temp1; + c__ = temp - delta[iim1] * (dpsi + dphi) - (d__[iip1] + - d__[iim1]) * temp1; + zz[0] = delta[iim1] * delta[iim1] * (dpsi + (dphi - + temp1)); + zz[2] = z__[iip1] * z__[iip1]; + } + } + _starpu_dlaed6_(&niter, &orgati, &c__, &delta[iim1], zz, &w, &eta, + info); + if (*info != 0) { + goto L250; + } + } + +/* Note, eta should be positive if w is negative, and */ +/* eta should be negative otherwise. However, */ +/* if for some reason caused by roundoff, eta*w > 0, */ +/* we simply use one Newton step instead. This way */ +/* will guarantee eta*w < 0. */ + + if (w * eta >= 0.) { + eta = -w / dw; + } + temp = tau + eta; + if (temp > dltub || temp < dltlb) { + if (w < 0.) { + eta = (dltub - tau) / 2.; + } else { + eta = (dltlb - tau) / 2.; + } + } + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + delta[j] -= eta; +/* L210: */ + } + + tau += eta; + prew = w; + +/* Evaluate PSI and the derivative DPSI */ + + dpsi = 0.; + psi = 0.; + erretm = 0.; + i__1 = iim1; + for (j = 1; j <= i__1; ++j) { + temp = z__[j] / delta[j]; + psi += z__[j] * temp; + dpsi += temp * temp; + erretm += psi; +/* L220: */ + } + erretm = abs(erretm); + +/* Evaluate PHI and the derivative DPHI */ + + dphi = 0.; + phi = 0.; + i__1 = iip1; + for (j = *n; j >= i__1; --j) { + temp = z__[j] / delta[j]; + phi += z__[j] * temp; + dphi += temp * temp; + erretm += phi; +/* L230: */ + } + + temp = z__[ii] / delta[ii]; + dw = dpsi + dphi + temp * temp; + temp = z__[ii] * temp; + w = rhoinv + phi + psi + temp; + erretm = (phi - psi) * 8. + erretm + rhoinv * 2. + abs(temp) * 3. + + abs(tau) * dw; + if (w * prew > 0. && abs(w) > abs(prew) / 10.) { + swtch = ! swtch; + } + +/* L240: */ + } + +/* Return with INFO = 1, NITER = MAXIT and not converged */ + + *info = 1; + if (orgati) { + *dlam = d__[*i__] + tau; + } else { + *dlam = d__[ip1] + tau; + } + + } + +L250: + + return 0; + +/* End of DLAED4 */ + +} /* _starpu_dlaed4_ */ diff --git a/min-dgels/base/SRC/dlaed5.c b/min-dgels/base/SRC/dlaed5.c new file mode 100644 index 0000000..671a461 --- /dev/null +++ b/min-dgels/base/SRC/dlaed5.c @@ -0,0 +1,148 @@ +/* dlaed5.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dlaed5_(integer *i__, doublereal *d__, doublereal *z__, + doublereal *delta, doublereal *rho, doublereal *dlam) +{ + /* System generated locals */ + doublereal d__1; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + doublereal b, c__, w, del, tau, temp; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* This subroutine computes the I-th eigenvalue of a symmetric rank-one */ +/* modification of a 2-by-2 diagonal matrix */ + +/* diag( D ) + RHO * Z * transpose(Z) . */ + +/* The diagonal elements in the array D are assumed to satisfy */ + +/* D(i) < D(j) for i < j . */ + +/* We also assume RHO > 0 and that the Euclidean norm of the vector */ +/* Z is one. */ + +/* Arguments */ +/* ========= */ + +/* I (input) INTEGER */ +/* The index of the eigenvalue to be computed. I = 1 or I = 2. */ + +/* D (input) DOUBLE PRECISION array, dimension (2) */ +/* The original eigenvalues. We assume D(1) < D(2). */ + +/* Z (input) DOUBLE PRECISION array, dimension (2) */ +/* The components of the updating vector. */ + +/* DELTA (output) DOUBLE PRECISION array, dimension (2) */ +/* The vector DELTA contains the information necessary */ +/* to construct the eigenvectors. */ + +/* RHO (input) DOUBLE PRECISION */ +/* The scalar in the symmetric updating formula. */ + +/* DLAM (output) DOUBLE PRECISION */ +/* The computed lambda_I, the I-th updated eigenvalue. */ + +/* Further Details */ +/* =============== */ + +/* Based on contributions by */ +/* Ren-Cang Li, Computer Science Division, University of California */ +/* at Berkeley, USA */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + --delta; + --z__; + --d__; + + /* Function Body */ + del = d__[2] - d__[1]; + if (*i__ == 1) { + w = *rho * 2. * (z__[2] * z__[2] - z__[1] * z__[1]) / del + 1.; + if (w > 0.) { + b = del + *rho * (z__[1] * z__[1] + z__[2] * z__[2]); + c__ = *rho * z__[1] * z__[1] * del; + +/* B > ZERO, always */ + + tau = c__ * 2. / (b + sqrt((d__1 = b * b - c__ * 4., abs(d__1)))); + *dlam = d__[1] + tau; + delta[1] = -z__[1] / tau; + delta[2] = z__[2] / (del - tau); + } else { + b = -del + *rho * (z__[1] * z__[1] + z__[2] * z__[2]); + c__ = *rho * z__[2] * z__[2] * del; + if (b > 0.) { + tau = c__ * -2. / (b + sqrt(b * b + c__ * 4.)); + } else { + tau = (b - sqrt(b * b + c__ * 4.)) / 2.; + } + *dlam = d__[2] + tau; + delta[1] = -z__[1] / (del + tau); + delta[2] = -z__[2] / tau; + } + temp = sqrt(delta[1] * delta[1] + delta[2] * delta[2]); + delta[1] /= temp; + delta[2] /= temp; + } else { + +/* Now I=2 */ + + b = -del + *rho * (z__[1] * z__[1] + z__[2] * z__[2]); + c__ = *rho * z__[2] * z__[2] * del; + if (b > 0.) { + tau = (b + sqrt(b * b + c__ * 4.)) / 2.; + } else { + tau = c__ * 2. / (-b + sqrt(b * b + c__ * 4.)); + } + *dlam = d__[2] + tau; + delta[1] = -z__[1] / (del + tau); + delta[2] = -z__[2] / tau; + temp = sqrt(delta[1] * delta[1] + delta[2] * delta[2]); + delta[1] /= temp; + delta[2] /= temp; + } + return 0; + +/* End OF DLAED5 */ + +} /* _starpu_dlaed5_ */ diff --git a/min-dgels/base/SRC/dlaed6.c b/min-dgels/base/SRC/dlaed6.c new file mode 100644 index 0000000..ae0aebc --- /dev/null +++ b/min-dgels/base/SRC/dlaed6.c @@ -0,0 +1,374 @@ +/* dlaed6.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dlaed6_(integer *kniter, logical *orgati, doublereal * + rho, doublereal *d__, doublereal *z__, doublereal *finit, doublereal * + tau, integer *info) +{ + /* System generated locals */ + integer i__1; + doublereal d__1, d__2, d__3, d__4; + + /* Builtin functions */ + double sqrt(doublereal), log(doublereal), pow_di(doublereal *, integer *); + + /* Local variables */ + doublereal a, b, c__, f; + integer i__; + doublereal fc, df, ddf, lbd, eta, ubd, eps, base; + integer iter; + doublereal temp, temp1, temp2, temp3, temp4; + logical scale; + integer niter; + doublereal small1, small2, sminv1, sminv2; + extern doublereal _starpu_dlamch_(char *); + doublereal dscale[3], sclfac, zscale[3], erretm, sclinv; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* February 2007 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLAED6 computes the positive or negative root (closest to the origin) */ +/* of */ +/* z(1) z(2) z(3) */ +/* f(x) = rho + --------- + ---------- + --------- */ +/* d(1)-x d(2)-x d(3)-x */ + +/* It is assumed that */ + +/* if ORGATI = .true. the root is between d(2) and d(3); */ +/* otherwise it is between d(1) and d(2) */ + +/* This routine will be called by DLAED4 when necessary. In most cases, */ +/* the root sought is the smallest in magnitude, though it might not be */ +/* in some extremely rare situations. */ + +/* Arguments */ +/* ========= */ + +/* KNITER (input) INTEGER */ +/* Refer to DLAED4 for its significance. */ + +/* ORGATI (input) LOGICAL */ +/* If ORGATI is true, the needed root is between d(2) and */ +/* d(3); otherwise it is between d(1) and d(2). See */ +/* DLAED4 for further details. */ + +/* RHO (input) DOUBLE PRECISION */ +/* Refer to the equation f(x) above. */ + +/* D (input) DOUBLE PRECISION array, dimension (3) */ +/* D satisfies d(1) < d(2) < d(3). */ + +/* Z (input) DOUBLE PRECISION array, dimension (3) */ +/* Each of the elements in z must be positive. */ + +/* FINIT (input) DOUBLE PRECISION */ +/* The value of f at 0. It is more accurate than the one */ +/* evaluated inside this routine (if someone wants to do */ +/* so). */ + +/* TAU (output) DOUBLE PRECISION */ +/* The root of the equation f(x). */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* > 0: if INFO = 1, failure to converge */ + +/* Further Details */ +/* =============== */ + +/* 30/06/99: Based on contributions by */ +/* Ren-Cang Li, Computer Science Division, University of California */ +/* at Berkeley, USA */ + +/* 10/02/03: This version has a few statements commented out for thread */ +/* safety (machine parameters are computed on each entry). SJH. */ + +/* 05/10/06: Modified from a new version of Ren-Cang Li, use */ +/* Gragg-Thornton-Warner cubic convergent scheme for better stability. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + --z__; + --d__; + + /* Function Body */ + *info = 0; + + if (*orgati) { + lbd = d__[2]; + ubd = d__[3]; + } else { + lbd = d__[1]; + ubd = d__[2]; + } + if (*finit < 0.) { + lbd = 0.; + } else { + ubd = 0.; + } + + niter = 1; + *tau = 0.; + if (*kniter == 2) { + if (*orgati) { + temp = (d__[3] - d__[2]) / 2.; + c__ = *rho + z__[1] / (d__[1] - d__[2] - temp); + a = c__ * (d__[2] + d__[3]) + z__[2] + z__[3]; + b = c__ * d__[2] * d__[3] + z__[2] * d__[3] + z__[3] * d__[2]; + } else { + temp = (d__[1] - d__[2]) / 2.; + c__ = *rho + z__[3] / (d__[3] - d__[2] - temp); + a = c__ * (d__[1] + d__[2]) + z__[1] + z__[2]; + b = c__ * d__[1] * d__[2] + z__[1] * d__[2] + z__[2] * d__[1]; + } +/* Computing MAX */ + d__1 = abs(a), d__2 = abs(b), d__1 = max(d__1,d__2), d__2 = abs(c__); + temp = max(d__1,d__2); + a /= temp; + b /= temp; + c__ /= temp; + if (c__ == 0.) { + *tau = b / a; + } else if (a <= 0.) { + *tau = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / ( + c__ * 2.); + } else { + *tau = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)) + )); + } + if (*tau < lbd || *tau > ubd) { + *tau = (lbd + ubd) / 2.; + } + if (d__[1] == *tau || d__[2] == *tau || d__[3] == *tau) { + *tau = 0.; + } else { + temp = *finit + *tau * z__[1] / (d__[1] * (d__[1] - *tau)) + *tau + * z__[2] / (d__[2] * (d__[2] - *tau)) + *tau * z__[3] / ( + d__[3] * (d__[3] - *tau)); + if (temp <= 0.) { + lbd = *tau; + } else { + ubd = *tau; + } + if (abs(*finit) <= abs(temp)) { + *tau = 0.; + } + } + } + +/* get machine parameters for possible scaling to avoid overflow */ + +/* modified by Sven: parameters SMALL1, SMINV1, SMALL2, */ +/* SMINV2, EPS are not SAVEd anymore between one call to the */ +/* others but recomputed at each call */ + + eps = _starpu_dlamch_("Epsilon"); + base = _starpu_dlamch_("Base"); + i__1 = (integer) (log(_starpu_dlamch_("SafMin")) / log(base) / 3.); + small1 = pow_di(&base, &i__1); + sminv1 = 1. / small1; + small2 = small1 * small1; + sminv2 = sminv1 * sminv1; + +/* Determine if scaling of inputs necessary to avoid overflow */ +/* when computing 1/TEMP**3 */ + + if (*orgati) { +/* Computing MIN */ + d__3 = (d__1 = d__[2] - *tau, abs(d__1)), d__4 = (d__2 = d__[3] - * + tau, abs(d__2)); + temp = min(d__3,d__4); + } else { +/* Computing MIN */ + d__3 = (d__1 = d__[1] - *tau, abs(d__1)), d__4 = (d__2 = d__[2] - * + tau, abs(d__2)); + temp = min(d__3,d__4); + } + scale = FALSE_; + if (temp <= small1) { + scale = TRUE_; + if (temp <= small2) { + +/* Scale up by power of radix nearest 1/SAFMIN**(2/3) */ + + sclfac = sminv2; + sclinv = small2; + } else { + +/* Scale up by power of radix nearest 1/SAFMIN**(1/3) */ + + sclfac = sminv1; + sclinv = small1; + } + +/* Scaling up safe because D, Z, TAU scaled elsewhere to be O(1) */ + + for (i__ = 1; i__ <= 3; ++i__) { + dscale[i__ - 1] = d__[i__] * sclfac; + zscale[i__ - 1] = z__[i__] * sclfac; +/* L10: */ + } + *tau *= sclfac; + lbd *= sclfac; + ubd *= sclfac; + } else { + +/* Copy D and Z to DSCALE and ZSCALE */ + + for (i__ = 1; i__ <= 3; ++i__) { + dscale[i__ - 1] = d__[i__]; + zscale[i__ - 1] = z__[i__]; +/* L20: */ + } + } + + fc = 0.; + df = 0.; + ddf = 0.; + for (i__ = 1; i__ <= 3; ++i__) { + temp = 1. / (dscale[i__ - 1] - *tau); + temp1 = zscale[i__ - 1] * temp; + temp2 = temp1 * temp; + temp3 = temp2 * temp; + fc += temp1 / dscale[i__ - 1]; + df += temp2; + ddf += temp3; +/* L30: */ + } + f = *finit + *tau * fc; + + if (abs(f) <= 0.) { + goto L60; + } + if (f <= 0.) { + lbd = *tau; + } else { + ubd = *tau; + } + +/* Iteration begins -- Use Gragg-Thornton-Warner cubic convergent */ +/* scheme */ + +/* It is not hard to see that */ + +/* 1) Iterations will go up monotonically */ +/* if FINIT < 0; */ + +/* 2) Iterations will go down monotonically */ +/* if FINIT > 0. */ + + iter = niter + 1; + + for (niter = iter; niter <= 40; ++niter) { + + if (*orgati) { + temp1 = dscale[1] - *tau; + temp2 = dscale[2] - *tau; + } else { + temp1 = dscale[0] - *tau; + temp2 = dscale[1] - *tau; + } + a = (temp1 + temp2) * f - temp1 * temp2 * df; + b = temp1 * temp2 * f; + c__ = f - (temp1 + temp2) * df + temp1 * temp2 * ddf; +/* Computing MAX */ + d__1 = abs(a), d__2 = abs(b), d__1 = max(d__1,d__2), d__2 = abs(c__); + temp = max(d__1,d__2); + a /= temp; + b /= temp; + c__ /= temp; + if (c__ == 0.) { + eta = b / a; + } else if (a <= 0.) { + eta = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (c__ + * 2.); + } else { + eta = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1))) + ); + } + if (f * eta >= 0.) { + eta = -f / df; + } + + *tau += eta; + if (*tau < lbd || *tau > ubd) { + *tau = (lbd + ubd) / 2.; + } + + fc = 0.; + erretm = 0.; + df = 0.; + ddf = 0.; + for (i__ = 1; i__ <= 3; ++i__) { + temp = 1. / (dscale[i__ - 1] - *tau); + temp1 = zscale[i__ - 1] * temp; + temp2 = temp1 * temp; + temp3 = temp2 * temp; + temp4 = temp1 / dscale[i__ - 1]; + fc += temp4; + erretm += abs(temp4); + df += temp2; + ddf += temp3; +/* L40: */ + } + f = *finit + *tau * fc; + erretm = (abs(*finit) + abs(*tau) * erretm) * 8. + abs(*tau) * df; + if (abs(f) <= eps * erretm) { + goto L60; + } + if (f <= 0.) { + lbd = *tau; + } else { + ubd = *tau; + } +/* L50: */ + } + *info = 1; +L60: + +/* Undo scaling */ + + if (scale) { + *tau *= sclinv; + } + return 0; + +/* End of DLAED6 */ + +} /* _starpu_dlaed6_ */ diff --git a/min-dgels/base/SRC/dlaed7.c b/min-dgels/base/SRC/dlaed7.c new file mode 100644 index 0000000..3a63941 --- /dev/null +++ b/min-dgels/base/SRC/dlaed7.c @@ -0,0 +1,354 @@ +/* dlaed7.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__2 = 2; +static integer c__1 = 1; +static doublereal c_b10 = 1.; +static doublereal c_b11 = 0.; +static integer c_n1 = -1; + +/* Subroutine */ int _starpu_dlaed7_(integer *icompq, integer *n, integer *qsiz, + integer *tlvls, integer *curlvl, integer *curpbm, doublereal *d__, + doublereal *q, integer *ldq, integer *indxq, doublereal *rho, integer + *cutpnt, doublereal *qstore, integer *qptr, integer *prmptr, integer * + perm, integer *givptr, integer *givcol, doublereal *givnum, + doublereal *work, integer *iwork, integer *info) +{ + /* System generated locals */ + integer q_dim1, q_offset, i__1, i__2; + + /* Builtin functions */ + integer pow_ii(integer *, integer *); + + /* Local variables */ + integer i__, k, n1, n2, is, iw, iz, iq2, ptr, ldq2, indx, curr; + extern /* Subroutine */ int _starpu_dgemm_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *); + integer indxc, indxp; + extern /* Subroutine */ int _starpu_dlaed8_(integer *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *, integer *, + doublereal *, integer *, integer *, integer *), _starpu_dlaed9_(integer *, + integer *, integer *, integer *, doublereal *, doublereal *, + integer *, doublereal *, doublereal *, doublereal *, doublereal *, + integer *, integer *), _starpu_dlaeda_(integer *, integer *, integer *, + integer *, integer *, integer *, integer *, integer *, doublereal + *, doublereal *, integer *, doublereal *, doublereal *, integer *) + ; + integer idlmda; + extern /* Subroutine */ int _starpu_dlamrg_(integer *, integer *, doublereal *, + integer *, integer *, integer *), _starpu_xerbla_(char *, integer *); + integer coltyp; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLAED7 computes the updated eigensystem of a diagonal */ +/* matrix after modification by a rank-one symmetric matrix. This */ +/* routine is used only for the eigenproblem which requires all */ +/* eigenvalues and optionally eigenvectors of a dense symmetric matrix */ +/* that has been reduced to tridiagonal form. DLAED1 handles */ +/* the case in which all eigenvalues and eigenvectors of a symmetric */ +/* tridiagonal matrix are desired. */ + +/* T = Q(in) ( D(in) + RHO * Z*Z' ) Q'(in) = Q(out) * D(out) * Q'(out) */ + +/* where Z = Q'u, u is a vector of length N with ones in the */ +/* CUTPNT and CUTPNT + 1 th elements and zeros elsewhere. */ + +/* The eigenvectors of the original matrix are stored in Q, and the */ +/* eigenvalues are in D. The algorithm consists of three stages: */ + +/* The first stage consists of deflating the size of the problem */ +/* when there are multiple eigenvalues or if there is a zero in */ +/* the Z vector. For each such occurence the dimension of the */ +/* secular equation problem is reduced by one. This stage is */ +/* performed by the routine DLAED8. */ + +/* The second stage consists of calculating the updated */ +/* eigenvalues. This is done by finding the roots of the secular */ +/* equation via the routine DLAED4 (as called by DLAED9). */ +/* This routine also calculates the eigenvectors of the current */ +/* problem. */ + +/* The final stage consists of computing the updated eigenvectors */ +/* directly using the updated eigenvalues. The eigenvectors for */ +/* the current problem are multiplied with the eigenvectors from */ +/* the overall problem. */ + +/* Arguments */ +/* ========= */ + +/* ICOMPQ (input) INTEGER */ +/* = 0: Compute eigenvalues only. */ +/* = 1: Compute eigenvectors of original dense symmetric matrix */ +/* also. On entry, Q contains the orthogonal matrix used */ +/* to reduce the original matrix to tridiagonal form. */ + +/* N (input) INTEGER */ +/* The dimension of the symmetric tridiagonal matrix. N >= 0. */ + +/* QSIZ (input) INTEGER */ +/* The dimension of the orthogonal matrix used to reduce */ +/* the full matrix to tridiagonal form. QSIZ >= N if ICOMPQ = 1. */ + +/* TLVLS (input) INTEGER */ +/* The total number of merging levels in the overall divide and */ +/* conquer tree. */ + +/* CURLVL (input) INTEGER */ +/* The current level in the overall merge routine, */ +/* 0 <= CURLVL <= TLVLS. */ + +/* CURPBM (input) INTEGER */ +/* The current problem in the current level in the overall */ +/* merge routine (counting from upper left to lower right). */ + +/* D (input/output) DOUBLE PRECISION array, dimension (N) */ +/* On entry, the eigenvalues of the rank-1-perturbed matrix. */ +/* On exit, the eigenvalues of the repaired matrix. */ + +/* Q (input/output) DOUBLE PRECISION array, dimension (LDQ, N) */ +/* On entry, the eigenvectors of the rank-1-perturbed matrix. */ +/* On exit, the eigenvectors of the repaired tridiagonal matrix. */ + +/* LDQ (input) INTEGER */ +/* The leading dimension of the array Q. LDQ >= max(1,N). */ + +/* INDXQ (output) INTEGER array, dimension (N) */ +/* The permutation which will reintegrate the subproblem just */ +/* solved back into sorted order, i.e., D( INDXQ( I = 1, N ) ) */ +/* will be in ascending order. */ + +/* RHO (input) DOUBLE PRECISION */ +/* The subdiagonal element used to create the rank-1 */ +/* modification. */ + +/* CUTPNT (input) INTEGER */ +/* Contains the location of the last eigenvalue in the leading */ +/* sub-matrix. min(1,N) <= CUTPNT <= N. */ + +/* QSTORE (input/output) DOUBLE PRECISION array, dimension (N**2+1) */ +/* Stores eigenvectors of submatrices encountered during */ +/* divide and conquer, packed together. QPTR points to */ +/* beginning of the submatrices. */ + +/* QPTR (input/output) INTEGER array, dimension (N+2) */ +/* List of indices pointing to beginning of submatrices stored */ +/* in QSTORE. The submatrices are numbered starting at the */ +/* bottom left of the divide and conquer tree, from left to */ +/* right and bottom to top. */ + +/* PRMPTR (input) INTEGER array, dimension (N lg N) */ +/* Contains a list of pointers which indicate where in PERM a */ +/* level's permutation is stored. PRMPTR(i+1) - PRMPTR(i) */ +/* indicates the size of the permutation and also the size of */ +/* the full, non-deflated problem. */ + +/* PERM (input) INTEGER array, dimension (N lg N) */ +/* Contains the permutations (from deflation and sorting) to be */ +/* applied to each eigenblock. */ + +/* GIVPTR (input) INTEGER array, dimension (N lg N) */ +/* Contains a list of pointers which indicate where in GIVCOL a */ +/* level's Givens rotations are stored. GIVPTR(i+1) - GIVPTR(i) */ +/* indicates the number of Givens rotations. */ + +/* GIVCOL (input) INTEGER array, dimension (2, N lg N) */ +/* Each pair of numbers indicates a pair of columns to take place */ +/* in a Givens rotation. */ + +/* GIVNUM (input) DOUBLE PRECISION array, dimension (2, N lg N) */ +/* Each number indicates the S value to be used in the */ +/* corresponding Givens rotation. */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (3*N+QSIZ*N) */ + +/* IWORK (workspace) INTEGER array, dimension (4*N) */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit. */ +/* < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > 0: if INFO = 1, an eigenvalue did not converge */ + +/* Further Details */ +/* =============== */ + +/* Based on contributions by */ +/* Jeff Rutter, Computer Science Division, University of California */ +/* at Berkeley, USA */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --d__; + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + --indxq; + --qstore; + --qptr; + --prmptr; + --perm; + --givptr; + givcol -= 3; + givnum -= 3; + --work; + --iwork; + + /* Function Body */ + *info = 0; + + if (*icompq < 0 || *icompq > 1) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*icompq == 1 && *qsiz < *n) { + *info = -4; + } else if (*ldq < max(1,*n)) { + *info = -9; + } else if (min(1,*n) > *cutpnt || *n < *cutpnt) { + *info = -12; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DLAED7", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* The following values are for bookkeeping purposes only. They are */ +/* integer pointers which indicate the portion of the workspace */ +/* used by a particular array in DLAED8 and DLAED9. */ + + if (*icompq == 1) { + ldq2 = *qsiz; + } else { + ldq2 = *n; + } + + iz = 1; + idlmda = iz + *n; + iw = idlmda + *n; + iq2 = iw + *n; + is = iq2 + *n * ldq2; + + indx = 1; + indxc = indx + *n; + coltyp = indxc + *n; + indxp = coltyp + *n; + +/* Form the z-vector which consists of the last row of Q_1 and the */ +/* first row of Q_2. */ + + ptr = pow_ii(&c__2, tlvls) + 1; + i__1 = *curlvl - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = *tlvls - i__; + ptr += pow_ii(&c__2, &i__2); +/* L10: */ + } + curr = ptr + *curpbm; + _starpu_dlaeda_(n, tlvls, curlvl, curpbm, &prmptr[1], &perm[1], &givptr[1], & + givcol[3], &givnum[3], &qstore[1], &qptr[1], &work[iz], &work[iz + + *n], info); + +/* When solving the final problem, we no longer need the stored data, */ +/* so we will overwrite the data from this level onto the previously */ +/* used storage space. */ + + if (*curlvl == *tlvls) { + qptr[curr] = 1; + prmptr[curr] = 1; + givptr[curr] = 1; + } + +/* Sort and Deflate eigenvalues. */ + + _starpu_dlaed8_(icompq, &k, n, qsiz, &d__[1], &q[q_offset], ldq, &indxq[1], rho, + cutpnt, &work[iz], &work[idlmda], &work[iq2], &ldq2, &work[iw], & + perm[prmptr[curr]], &givptr[curr + 1], &givcol[(givptr[curr] << 1) + + 1], &givnum[(givptr[curr] << 1) + 1], &iwork[indxp], &iwork[ + indx], info); + prmptr[curr + 1] = prmptr[curr] + *n; + givptr[curr + 1] += givptr[curr]; + +/* Solve Secular Equation. */ + + if (k != 0) { + _starpu_dlaed9_(&k, &c__1, &k, n, &d__[1], &work[is], &k, rho, &work[idlmda], + &work[iw], &qstore[qptr[curr]], &k, info); + if (*info != 0) { + goto L30; + } + if (*icompq == 1) { + _starpu_dgemm_("N", "N", qsiz, &k, &k, &c_b10, &work[iq2], &ldq2, &qstore[ + qptr[curr]], &k, &c_b11, &q[q_offset], ldq); + } +/* Computing 2nd power */ + i__1 = k; + qptr[curr + 1] = qptr[curr] + i__1 * i__1; + +/* Prepare the INDXQ sorting permutation. */ + + n1 = k; + n2 = *n - k; + _starpu_dlamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &indxq[1]); + } else { + qptr[curr + 1] = qptr[curr]; + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + indxq[i__] = i__; +/* L20: */ + } + } + +L30: + return 0; + +/* End of DLAED7 */ + +} /* _starpu_dlaed7_ */ diff --git a/min-dgels/base/SRC/dlaed8.c b/min-dgels/base/SRC/dlaed8.c new file mode 100644 index 0000000..3fcf075 --- /dev/null +++ b/min-dgels/base/SRC/dlaed8.c @@ -0,0 +1,475 @@ +/* dlaed8.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static doublereal c_b3 = -1.; +static integer c__1 = 1; + +/* Subroutine */ int _starpu_dlaed8_(integer *icompq, integer *k, integer *n, integer + *qsiz, doublereal *d__, doublereal *q, integer *ldq, integer *indxq, + doublereal *rho, integer *cutpnt, doublereal *z__, doublereal *dlamda, + doublereal *q2, integer *ldq2, doublereal *w, integer *perm, integer + *givptr, integer *givcol, doublereal *givnum, integer *indxp, integer + *indx, integer *info) +{ + /* System generated locals */ + integer q_dim1, q_offset, q2_dim1, q2_offset, i__1; + doublereal d__1; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + doublereal c__; + integer i__, j; + doublereal s, t; + integer k2, n1, n2, jp, n1p1; + doublereal eps, tau, tol; + integer jlam, imax, jmax; + extern /* Subroutine */ int _starpu_drot_(integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, doublereal *), _starpu_dscal_( + integer *, doublereal *, doublereal *, integer *), _starpu_dcopy_(integer + *, doublereal *, integer *, doublereal *, integer *); + extern doublereal _starpu_dlapy2_(doublereal *, doublereal *), _starpu_dlamch_(char *); + extern integer _starpu_idamax_(integer *, doublereal *, integer *); + extern /* Subroutine */ int _starpu_dlamrg_(integer *, integer *, doublereal *, + integer *, integer *, integer *), _starpu_dlacpy_(char *, integer *, + integer *, doublereal *, integer *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLAED8 merges the two sets of eigenvalues together into a single */ +/* sorted set. Then it tries to deflate the size of the problem. */ +/* There are two ways in which deflation can occur: when two or more */ +/* eigenvalues are close together or if there is a tiny element in the */ +/* Z vector. For each such occurrence the order of the related secular */ +/* equation problem is reduced by one. */ + +/* Arguments */ +/* ========= */ + +/* ICOMPQ (input) INTEGER */ +/* = 0: Compute eigenvalues only. */ +/* = 1: Compute eigenvectors of original dense symmetric matrix */ +/* also. On entry, Q contains the orthogonal matrix used */ +/* to reduce the original matrix to tridiagonal form. */ + +/* K (output) INTEGER */ +/* The number of non-deflated eigenvalues, and the order of the */ +/* related secular equation. */ + +/* N (input) INTEGER */ +/* The dimension of the symmetric tridiagonal matrix. N >= 0. */ + +/* QSIZ (input) INTEGER */ +/* The dimension of the orthogonal matrix used to reduce */ +/* the full matrix to tridiagonal form. QSIZ >= N if ICOMPQ = 1. */ + +/* D (input/output) DOUBLE PRECISION array, dimension (N) */ +/* On entry, the eigenvalues of the two submatrices to be */ +/* combined. On exit, the trailing (N-K) updated eigenvalues */ +/* (those which were deflated) sorted into increasing order. */ + +/* Q (input/output) DOUBLE PRECISION array, dimension (LDQ,N) */ +/* If ICOMPQ = 0, Q is not referenced. Otherwise, */ +/* on entry, Q contains the eigenvectors of the partially solved */ +/* system which has been previously updated in matrix */ +/* multiplies with other partially solved eigensystems. */ +/* On exit, Q contains the trailing (N-K) updated eigenvectors */ +/* (those which were deflated) in its last N-K columns. */ + +/* LDQ (input) INTEGER */ +/* The leading dimension of the array Q. LDQ >= max(1,N). */ + +/* INDXQ (input) INTEGER array, dimension (N) */ +/* The permutation which separately sorts the two sub-problems */ +/* in D into ascending order. Note that elements in the second */ +/* half of this permutation must first have CUTPNT added to */ +/* their values in order to be accurate. */ + +/* RHO (input/output) DOUBLE PRECISION */ +/* On entry, the off-diagonal element associated with the rank-1 */ +/* cut which originally split the two submatrices which are now */ +/* being recombined. */ +/* On exit, RHO has been modified to the value required by */ +/* DLAED3. */ + +/* CUTPNT (input) INTEGER */ +/* The location of the last eigenvalue in the leading */ +/* sub-matrix. min(1,N) <= CUTPNT <= N. */ + +/* Z (input) DOUBLE PRECISION array, dimension (N) */ +/* On entry, Z contains the updating vector (the last row of */ +/* the first sub-eigenvector matrix and the first row of the */ +/* second sub-eigenvector matrix). */ +/* On exit, the contents of Z are destroyed by the updating */ +/* process. */ + +/* DLAMDA (output) DOUBLE PRECISION array, dimension (N) */ +/* A copy of the first K eigenvalues which will be used by */ +/* DLAED3 to form the secular equation. */ + +/* Q2 (output) DOUBLE PRECISION array, dimension (LDQ2,N) */ +/* If ICOMPQ = 0, Q2 is not referenced. Otherwise, */ +/* a copy of the first K eigenvectors which will be used by */ +/* DLAED7 in a matrix multiply (DGEMM) to update the new */ +/* eigenvectors. */ + +/* LDQ2 (input) INTEGER */ +/* The leading dimension of the array Q2. LDQ2 >= max(1,N). */ + +/* W (output) DOUBLE PRECISION array, dimension (N) */ +/* The first k values of the final deflation-altered z-vector and */ +/* will be passed to DLAED3. */ + +/* PERM (output) INTEGER array, dimension (N) */ +/* The permutations (from deflation and sorting) to be applied */ +/* to each eigenblock. */ + +/* GIVPTR (output) INTEGER */ +/* The number of Givens rotations which took place in this */ +/* subproblem. */ + +/* GIVCOL (output) INTEGER array, dimension (2, N) */ +/* Each pair of numbers indicates a pair of columns to take place */ +/* in a Givens rotation. */ + +/* GIVNUM (output) DOUBLE PRECISION array, dimension (2, N) */ +/* Each number indicates the S value to be used in the */ +/* corresponding Givens rotation. */ + +/* INDXP (workspace) INTEGER array, dimension (N) */ +/* The permutation used to place deflated values of D at the end */ +/* of the array. INDXP(1:K) points to the nondeflated D-values */ +/* and INDXP(K+1:N) points to the deflated eigenvalues. */ + +/* INDX (workspace) INTEGER array, dimension (N) */ +/* The permutation used to sort the contents of D into ascending */ +/* order. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit. */ +/* < 0: if INFO = -i, the i-th argument had an illegal value. */ + +/* Further Details */ +/* =============== */ + +/* Based on contributions by */ +/* Jeff Rutter, Computer Science Division, University of California */ +/* at Berkeley, USA */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ + +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --d__; + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + --indxq; + --z__; + --dlamda; + q2_dim1 = *ldq2; + q2_offset = 1 + q2_dim1; + q2 -= q2_offset; + --w; + --perm; + givcol -= 3; + givnum -= 3; + --indxp; + --indx; + + /* Function Body */ + *info = 0; + + if (*icompq < 0 || *icompq > 1) { + *info = -1; + } else if (*n < 0) { + *info = -3; + } else if (*icompq == 1 && *qsiz < *n) { + *info = -4; + } else if (*ldq < max(1,*n)) { + *info = -7; + } else if (*cutpnt < min(1,*n) || *cutpnt > *n) { + *info = -10; + } else if (*ldq2 < max(1,*n)) { + *info = -14; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DLAED8", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + n1 = *cutpnt; + n2 = *n - n1; + n1p1 = n1 + 1; + + if (*rho < 0.) { + _starpu_dscal_(&n2, &c_b3, &z__[n1p1], &c__1); + } + +/* Normalize z so that norm(z) = 1 */ + + t = 1. / sqrt(2.); + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + indx[j] = j; +/* L10: */ + } + _starpu_dscal_(n, &t, &z__[1], &c__1); + *rho = (d__1 = *rho * 2., abs(d__1)); + +/* Sort the eigenvalues into increasing order */ + + i__1 = *n; + for (i__ = *cutpnt + 1; i__ <= i__1; ++i__) { + indxq[i__] += *cutpnt; +/* L20: */ + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + dlamda[i__] = d__[indxq[i__]]; + w[i__] = z__[indxq[i__]]; +/* L30: */ + } + i__ = 1; + j = *cutpnt + 1; + _starpu_dlamrg_(&n1, &n2, &dlamda[1], &c__1, &c__1, &indx[1]); + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + d__[i__] = dlamda[indx[i__]]; + z__[i__] = w[indx[i__]]; +/* L40: */ + } + +/* Calculate the allowable deflation tolerence */ + + imax = _starpu_idamax_(n, &z__[1], &c__1); + jmax = _starpu_idamax_(n, &d__[1], &c__1); + eps = _starpu_dlamch_("Epsilon"); + tol = eps * 8. * (d__1 = d__[jmax], abs(d__1)); + +/* If the rank-1 modifier is small enough, no more needs to be done */ +/* except to reorganize Q so that its columns correspond with the */ +/* elements in D. */ + + if (*rho * (d__1 = z__[imax], abs(d__1)) <= tol) { + *k = 0; + if (*icompq == 0) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + perm[j] = indxq[indx[j]]; +/* L50: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + perm[j] = indxq[indx[j]]; + _starpu_dcopy_(qsiz, &q[perm[j] * q_dim1 + 1], &c__1, &q2[j * q2_dim1 + + 1], &c__1); +/* L60: */ + } + _starpu_dlacpy_("A", qsiz, n, &q2[q2_dim1 + 1], ldq2, &q[q_dim1 + 1], ldq); + } + return 0; + } + +/* If there are multiple eigenvalues then the problem deflates. Here */ +/* the number of equal eigenvalues are found. As each equal */ +/* eigenvalue is found, an elementary reflector is computed to rotate */ +/* the corresponding eigensubspace so that the corresponding */ +/* components of Z are zero in this new basis. */ + + *k = 0; + *givptr = 0; + k2 = *n + 1; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*rho * (d__1 = z__[j], abs(d__1)) <= tol) { + +/* Deflate due to small z component. */ + + --k2; + indxp[k2] = j; + if (j == *n) { + goto L110; + } + } else { + jlam = j; + goto L80; + } +/* L70: */ + } +L80: + ++j; + if (j > *n) { + goto L100; + } + if (*rho * (d__1 = z__[j], abs(d__1)) <= tol) { + +/* Deflate due to small z component. */ + + --k2; + indxp[k2] = j; + } else { + +/* Check if eigenvalues are close enough to allow deflation. */ + + s = z__[jlam]; + c__ = z__[j]; + +/* Find sqrt(a**2+b**2) without overflow or */ +/* destructive underflow. */ + + tau = _starpu_dlapy2_(&c__, &s); + t = d__[j] - d__[jlam]; + c__ /= tau; + s = -s / tau; + if ((d__1 = t * c__ * s, abs(d__1)) <= tol) { + +/* Deflation is possible. */ + + z__[j] = tau; + z__[jlam] = 0.; + +/* Record the appropriate Givens rotation */ + + ++(*givptr); + givcol[(*givptr << 1) + 1] = indxq[indx[jlam]]; + givcol[(*givptr << 1) + 2] = indxq[indx[j]]; + givnum[(*givptr << 1) + 1] = c__; + givnum[(*givptr << 1) + 2] = s; + if (*icompq == 1) { + _starpu_drot_(qsiz, &q[indxq[indx[jlam]] * q_dim1 + 1], &c__1, &q[ + indxq[indx[j]] * q_dim1 + 1], &c__1, &c__, &s); + } + t = d__[jlam] * c__ * c__ + d__[j] * s * s; + d__[j] = d__[jlam] * s * s + d__[j] * c__ * c__; + d__[jlam] = t; + --k2; + i__ = 1; +L90: + if (k2 + i__ <= *n) { + if (d__[jlam] < d__[indxp[k2 + i__]]) { + indxp[k2 + i__ - 1] = indxp[k2 + i__]; + indxp[k2 + i__] = jlam; + ++i__; + goto L90; + } else { + indxp[k2 + i__ - 1] = jlam; + } + } else { + indxp[k2 + i__ - 1] = jlam; + } + jlam = j; + } else { + ++(*k); + w[*k] = z__[jlam]; + dlamda[*k] = d__[jlam]; + indxp[*k] = jlam; + jlam = j; + } + } + goto L80; +L100: + +/* Record the last eigenvalue. */ + + ++(*k); + w[*k] = z__[jlam]; + dlamda[*k] = d__[jlam]; + indxp[*k] = jlam; + +L110: + +/* Sort the eigenvalues and corresponding eigenvectors into DLAMDA */ +/* and Q2 respectively. The eigenvalues/vectors which were not */ +/* deflated go into the first K slots of DLAMDA and Q2 respectively, */ +/* while those which were deflated go into the last N - K slots. */ + + if (*icompq == 0) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + jp = indxp[j]; + dlamda[j] = d__[jp]; + perm[j] = indxq[indx[jp]]; +/* L120: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + jp = indxp[j]; + dlamda[j] = d__[jp]; + perm[j] = indxq[indx[jp]]; + _starpu_dcopy_(qsiz, &q[perm[j] * q_dim1 + 1], &c__1, &q2[j * q2_dim1 + 1] +, &c__1); +/* L130: */ + } + } + +/* The deflated eigenvalues and their corresponding vectors go back */ +/* into the last N - K slots of D and Q respectively. */ + + if (*k < *n) { + if (*icompq == 0) { + i__1 = *n - *k; + _starpu_dcopy_(&i__1, &dlamda[*k + 1], &c__1, &d__[*k + 1], &c__1); + } else { + i__1 = *n - *k; + _starpu_dcopy_(&i__1, &dlamda[*k + 1], &c__1, &d__[*k + 1], &c__1); + i__1 = *n - *k; + _starpu_dlacpy_("A", qsiz, &i__1, &q2[(*k + 1) * q2_dim1 + 1], ldq2, &q[(* + k + 1) * q_dim1 + 1], ldq); + } + } + + return 0; + +/* End of DLAED8 */ + +} /* _starpu_dlaed8_ */ diff --git a/min-dgels/base/SRC/dlaed9.c b/min-dgels/base/SRC/dlaed9.c new file mode 100644 index 0000000..fa731eb --- /dev/null +++ b/min-dgels/base/SRC/dlaed9.c @@ -0,0 +1,274 @@ +/* dlaed9.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +/* Subroutine */ int _starpu_dlaed9_(integer *k, integer *kstart, integer *kstop, + integer *n, doublereal *d__, doublereal *q, integer *ldq, doublereal * + rho, doublereal *dlamda, doublereal *w, doublereal *s, integer *lds, + integer *info) +{ + /* System generated locals */ + integer q_dim1, q_offset, s_dim1, s_offset, i__1, i__2; + doublereal d__1; + + /* Builtin functions */ + double sqrt(doublereal), d_sign(doublereal *, doublereal *); + + /* Local variables */ + integer i__, j; + doublereal temp; + extern doublereal _starpu_dnrm2_(integer *, doublereal *, integer *); + extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *), _starpu_dlaed4_(integer *, integer *, + doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, integer *); + extern doublereal _starpu_dlamc3_(doublereal *, doublereal *); + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLAED9 finds the roots of the secular equation, as defined by the */ +/* values in D, Z, and RHO, between KSTART and KSTOP. It makes the */ +/* appropriate calls to DLAED4 and then stores the new matrix of */ +/* eigenvectors for use in calculating the next level of Z vectors. */ + +/* Arguments */ +/* ========= */ + +/* K (input) INTEGER */ +/* The number of terms in the rational function to be solved by */ +/* DLAED4. K >= 0. */ + +/* KSTART (input) INTEGER */ +/* KSTOP (input) INTEGER */ +/* The updated eigenvalues Lambda(I), KSTART <= I <= KSTOP */ +/* are to be computed. 1 <= KSTART <= KSTOP <= K. */ + +/* N (input) INTEGER */ +/* The number of rows and columns in the Q matrix. */ +/* N >= K (delation may result in N > K). */ + +/* D (output) DOUBLE PRECISION array, dimension (N) */ +/* D(I) contains the updated eigenvalues */ +/* for KSTART <= I <= KSTOP. */ + +/* Q (workspace) DOUBLE PRECISION array, dimension (LDQ,N) */ + +/* LDQ (input) INTEGER */ +/* The leading dimension of the array Q. LDQ >= max( 1, N ). */ + +/* RHO (input) DOUBLE PRECISION */ +/* The value of the parameter in the rank one update equation. */ +/* RHO >= 0 required. */ + +/* DLAMDA (input) DOUBLE PRECISION array, dimension (K) */ +/* The first K elements of this array contain the old roots */ +/* of the deflated updating problem. These are the poles */ +/* of the secular equation. */ + +/* W (input) DOUBLE PRECISION array, dimension (K) */ +/* The first K elements of this array contain the components */ +/* of the deflation-adjusted updating vector. */ + +/* S (output) DOUBLE PRECISION array, dimension (LDS, K) */ +/* Will contain the eigenvectors of the repaired matrix which */ +/* will be stored for subsequent Z vector calculation and */ +/* multiplied by the previously accumulated eigenvectors */ +/* to update the system. */ + +/* LDS (input) INTEGER */ +/* The leading dimension of S. LDS >= max( 1, K ). */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit. */ +/* < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > 0: if INFO = 1, an eigenvalue did not converge */ + +/* Further Details */ +/* =============== */ + +/* Based on contributions by */ +/* Jeff Rutter, Computer Science Division, University of California */ +/* at Berkeley, USA */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --d__; + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + --dlamda; + --w; + s_dim1 = *lds; + s_offset = 1 + s_dim1; + s -= s_offset; + + /* Function Body */ + *info = 0; + + if (*k < 0) { + *info = -1; + } else if (*kstart < 1 || *kstart > max(1,*k)) { + *info = -2; + } else if (max(1,*kstop) < *kstart || *kstop > max(1,*k)) { + *info = -3; + } else if (*n < *k) { + *info = -4; + } else if (*ldq < max(1,*k)) { + *info = -7; + } else if (*lds < max(1,*k)) { + *info = -12; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DLAED9", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*k == 0) { + return 0; + } + +/* Modify values DLAMDA(i) to make sure all DLAMDA(i)-DLAMDA(j) can */ +/* be computed with high relative accuracy (barring over/underflow). */ +/* This is a problem on machines without a guard digit in */ +/* add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2). */ +/* The following code replaces DLAMDA(I) by 2*DLAMDA(I)-DLAMDA(I), */ +/* which on any of these machines zeros out the bottommost */ +/* bit of DLAMDA(I) if it is 1; this makes the subsequent */ +/* subtractions DLAMDA(I)-DLAMDA(J) unproblematic when cancellation */ +/* occurs. On binary machines with a guard digit (almost all */ +/* machines) it does not change DLAMDA(I) at all. On hexadecimal */ +/* and decimal machines with a guard digit, it slightly */ +/* changes the bottommost bits of DLAMDA(I). It does not account */ +/* for hexadecimal or decimal machines without guard digits */ +/* (we know of none). We use a subroutine call to compute */ +/* 2*DLAMBDA(I) to prevent optimizing compilers from eliminating */ +/* this code. */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + dlamda[i__] = _starpu_dlamc3_(&dlamda[i__], &dlamda[i__]) - dlamda[i__]; +/* L10: */ + } + + i__1 = *kstop; + for (j = *kstart; j <= i__1; ++j) { + _starpu_dlaed4_(k, &j, &dlamda[1], &w[1], &q[j * q_dim1 + 1], rho, &d__[j], + info); + +/* If the zero finder fails, the computation is terminated. */ + + if (*info != 0) { + goto L120; + } +/* L20: */ + } + + if (*k == 1 || *k == 2) { + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = *k; + for (j = 1; j <= i__2; ++j) { + s[j + i__ * s_dim1] = q[j + i__ * q_dim1]; +/* L30: */ + } +/* L40: */ + } + goto L120; + } + +/* Compute updated W. */ + + _starpu_dcopy_(k, &w[1], &c__1, &s[s_offset], &c__1); + +/* Initialize W(I) = Q(I,I) */ + + i__1 = *ldq + 1; + _starpu_dcopy_(k, &q[q_offset], &i__1, &w[1], &c__1); + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + w[i__] *= q[i__ + j * q_dim1] / (dlamda[i__] - dlamda[j]); +/* L50: */ + } + i__2 = *k; + for (i__ = j + 1; i__ <= i__2; ++i__) { + w[i__] *= q[i__ + j * q_dim1] / (dlamda[i__] - dlamda[j]); +/* L60: */ + } +/* L70: */ + } + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + d__1 = sqrt(-w[i__]); + w[i__] = d_sign(&d__1, &s[i__ + s_dim1]); +/* L80: */ + } + +/* Compute eigenvectors of the modified rank-1 modification. */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = *k; + for (i__ = 1; i__ <= i__2; ++i__) { + q[i__ + j * q_dim1] = w[i__] / q[i__ + j * q_dim1]; +/* L90: */ + } + temp = _starpu_dnrm2_(k, &q[j * q_dim1 + 1], &c__1); + i__2 = *k; + for (i__ = 1; i__ <= i__2; ++i__) { + s[i__ + j * s_dim1] = q[i__ + j * q_dim1] / temp; +/* L100: */ + } +/* L110: */ + } + +L120: + return 0; + +/* End of DLAED9 */ + +} /* _starpu_dlaed9_ */ diff --git a/min-dgels/base/SRC/dlaeda.c b/min-dgels/base/SRC/dlaeda.c new file mode 100644 index 0000000..ec4cc22 --- /dev/null +++ b/min-dgels/base/SRC/dlaeda.c @@ -0,0 +1,287 @@ +/* dlaeda.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__2 = 2; +static integer c__1 = 1; +static doublereal c_b24 = 1.; +static doublereal c_b26 = 0.; + +/* Subroutine */ int _starpu_dlaeda_(integer *n, integer *tlvls, integer *curlvl, + integer *curpbm, integer *prmptr, integer *perm, integer *givptr, + integer *givcol, doublereal *givnum, doublereal *q, integer *qptr, + doublereal *z__, doublereal *ztemp, integer *info) +{ + /* System generated locals */ + integer i__1, i__2, i__3; + + /* Builtin functions */ + integer pow_ii(integer *, integer *); + double sqrt(doublereal); + + /* Local variables */ + integer i__, k, mid, ptr; + extern /* Subroutine */ int _starpu_drot_(integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, doublereal *); + integer curr, bsiz1, bsiz2, psiz1, psiz2, zptr1; + extern /* Subroutine */ int _starpu_dgemv_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *), _starpu_dcopy_(integer *, + doublereal *, integer *, doublereal *, integer *), _starpu_xerbla_(char *, + integer *); + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLAEDA computes the Z vector corresponding to the merge step in the */ +/* CURLVLth step of the merge process with TLVLS steps for the CURPBMth */ +/* problem. */ + +/* Arguments */ +/* ========= */ + +/* N (input) INTEGER */ +/* The dimension of the symmetric tridiagonal matrix. N >= 0. */ + +/* TLVLS (input) INTEGER */ +/* The total number of merging levels in the overall divide and */ +/* conquer tree. */ + +/* CURLVL (input) INTEGER */ +/* The current level in the overall merge routine, */ +/* 0 <= curlvl <= tlvls. */ + +/* CURPBM (input) INTEGER */ +/* The current problem in the current level in the overall */ +/* merge routine (counting from upper left to lower right). */ + +/* PRMPTR (input) INTEGER array, dimension (N lg N) */ +/* Contains a list of pointers which indicate where in PERM a */ +/* level's permutation is stored. PRMPTR(i+1) - PRMPTR(i) */ +/* indicates the size of the permutation and incidentally the */ +/* size of the full, non-deflated problem. */ + +/* PERM (input) INTEGER array, dimension (N lg N) */ +/* Contains the permutations (from deflation and sorting) to be */ +/* applied to each eigenblock. */ + +/* GIVPTR (input) INTEGER array, dimension (N lg N) */ +/* Contains a list of pointers which indicate where in GIVCOL a */ +/* level's Givens rotations are stored. GIVPTR(i+1) - GIVPTR(i) */ +/* indicates the number of Givens rotations. */ + +/* GIVCOL (input) INTEGER array, dimension (2, N lg N) */ +/* Each pair of numbers indicates a pair of columns to take place */ +/* in a Givens rotation. */ + +/* GIVNUM (input) DOUBLE PRECISION array, dimension (2, N lg N) */ +/* Each number indicates the S value to be used in the */ +/* corresponding Givens rotation. */ + +/* Q (input) DOUBLE PRECISION array, dimension (N**2) */ +/* Contains the square eigenblocks from previous levels, the */ +/* starting positions for blocks are given by QPTR. */ + +/* QPTR (input) INTEGER array, dimension (N+2) */ +/* Contains a list of pointers which indicate where in Q an */ +/* eigenblock is stored. SQRT( QPTR(i+1) - QPTR(i) ) indicates */ +/* the size of the block. */ + +/* Z (output) DOUBLE PRECISION array, dimension (N) */ +/* On output this vector contains the updating vector (the last */ +/* row of the first sub-eigenvector matrix and the first row of */ +/* the second sub-eigenvector matrix). */ + +/* ZTEMP (workspace) DOUBLE PRECISION array, dimension (N) */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit. */ +/* < 0: if INFO = -i, the i-th argument had an illegal value. */ + +/* Further Details */ +/* =============== */ + +/* Based on contributions by */ +/* Jeff Rutter, Computer Science Division, University of California */ +/* at Berkeley, USA */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --ztemp; + --z__; + --qptr; + --q; + givnum -= 3; + givcol -= 3; + --givptr; + --perm; + --prmptr; + + /* Function Body */ + *info = 0; + + if (*n < 0) { + *info = -1; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DLAEDA", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Determine location of first number in second half. */ + + mid = *n / 2 + 1; + +/* Gather last/first rows of appropriate eigenblocks into center of Z */ + + ptr = 1; + +/* Determine location of lowest level subproblem in the full storage */ +/* scheme */ + + i__1 = *curlvl - 1; + curr = ptr + *curpbm * pow_ii(&c__2, curlvl) + pow_ii(&c__2, &i__1) - 1; + +/* Determine size of these matrices. We add HALF to the value of */ +/* the SQRT in case the machine underestimates one of these square */ +/* roots. */ + + bsiz1 = (integer) (sqrt((doublereal) (qptr[curr + 1] - qptr[curr])) + .5); + bsiz2 = (integer) (sqrt((doublereal) (qptr[curr + 2] - qptr[curr + 1])) + + .5); + i__1 = mid - bsiz1 - 1; + for (k = 1; k <= i__1; ++k) { + z__[k] = 0.; +/* L10: */ + } + _starpu_dcopy_(&bsiz1, &q[qptr[curr] + bsiz1 - 1], &bsiz1, &z__[mid - bsiz1], & + c__1); + _starpu_dcopy_(&bsiz2, &q[qptr[curr + 1]], &bsiz2, &z__[mid], &c__1); + i__1 = *n; + for (k = mid + bsiz2; k <= i__1; ++k) { + z__[k] = 0.; +/* L20: */ + } + +/* Loop thru remaining levels 1 -> CURLVL applying the Givens */ +/* rotations and permutation and then multiplying the center matrices */ +/* against the current Z. */ + + ptr = pow_ii(&c__2, tlvls) + 1; + i__1 = *curlvl - 1; + for (k = 1; k <= i__1; ++k) { + i__2 = *curlvl - k; + i__3 = *curlvl - k - 1; + curr = ptr + *curpbm * pow_ii(&c__2, &i__2) + pow_ii(&c__2, &i__3) - + 1; + psiz1 = prmptr[curr + 1] - prmptr[curr]; + psiz2 = prmptr[curr + 2] - prmptr[curr + 1]; + zptr1 = mid - psiz1; + +/* Apply Givens at CURR and CURR+1 */ + + i__2 = givptr[curr + 1] - 1; + for (i__ = givptr[curr]; i__ <= i__2; ++i__) { + _starpu_drot_(&c__1, &z__[zptr1 + givcol[(i__ << 1) + 1] - 1], &c__1, & + z__[zptr1 + givcol[(i__ << 1) + 2] - 1], &c__1, &givnum[( + i__ << 1) + 1], &givnum[(i__ << 1) + 2]); +/* L30: */ + } + i__2 = givptr[curr + 2] - 1; + for (i__ = givptr[curr + 1]; i__ <= i__2; ++i__) { + _starpu_drot_(&c__1, &z__[mid - 1 + givcol[(i__ << 1) + 1]], &c__1, &z__[ + mid - 1 + givcol[(i__ << 1) + 2]], &c__1, &givnum[(i__ << + 1) + 1], &givnum[(i__ << 1) + 2]); +/* L40: */ + } + psiz1 = prmptr[curr + 1] - prmptr[curr]; + psiz2 = prmptr[curr + 2] - prmptr[curr + 1]; + i__2 = psiz1 - 1; + for (i__ = 0; i__ <= i__2; ++i__) { + ztemp[i__ + 1] = z__[zptr1 + perm[prmptr[curr] + i__] - 1]; +/* L50: */ + } + i__2 = psiz2 - 1; + for (i__ = 0; i__ <= i__2; ++i__) { + ztemp[psiz1 + i__ + 1] = z__[mid + perm[prmptr[curr + 1] + i__] - + 1]; +/* L60: */ + } + +/* Multiply Blocks at CURR and CURR+1 */ + +/* Determine size of these matrices. We add HALF to the value of */ +/* the SQRT in case the machine underestimates one of these */ +/* square roots. */ + + bsiz1 = (integer) (sqrt((doublereal) (qptr[curr + 1] - qptr[curr])) + + .5); + bsiz2 = (integer) (sqrt((doublereal) (qptr[curr + 2] - qptr[curr + 1]) + ) + .5); + if (bsiz1 > 0) { + _starpu_dgemv_("T", &bsiz1, &bsiz1, &c_b24, &q[qptr[curr]], &bsiz1, & + ztemp[1], &c__1, &c_b26, &z__[zptr1], &c__1); + } + i__2 = psiz1 - bsiz1; + _starpu_dcopy_(&i__2, &ztemp[bsiz1 + 1], &c__1, &z__[zptr1 + bsiz1], &c__1); + if (bsiz2 > 0) { + _starpu_dgemv_("T", &bsiz2, &bsiz2, &c_b24, &q[qptr[curr + 1]], &bsiz2, & + ztemp[psiz1 + 1], &c__1, &c_b26, &z__[mid], &c__1); + } + i__2 = psiz2 - bsiz2; + _starpu_dcopy_(&i__2, &ztemp[psiz1 + bsiz2 + 1], &c__1, &z__[mid + bsiz2], & + c__1); + + i__2 = *tlvls - k; + ptr += pow_ii(&c__2, &i__2); +/* L70: */ + } + + return 0; + +/* End of DLAEDA */ + +} /* _starpu_dlaeda_ */ diff --git a/min-dgels/base/SRC/dlaein.c b/min-dgels/base/SRC/dlaein.c new file mode 100644 index 0000000..4422b44 --- /dev/null +++ b/min-dgels/base/SRC/dlaein.c @@ -0,0 +1,677 @@ +/* dlaein.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +/* Subroutine */ int _starpu_dlaein_(logical *rightv, logical *noinit, integer *n, + doublereal *h__, integer *ldh, doublereal *wr, doublereal *wi, + doublereal *vr, doublereal *vi, doublereal *b, integer *ldb, + doublereal *work, doublereal *eps3, doublereal *smlnum, doublereal * + bignum, integer *info) +{ + /* System generated locals */ + integer b_dim1, b_offset, h_dim1, h_offset, i__1, i__2, i__3, i__4; + doublereal d__1, d__2, d__3, d__4; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + integer i__, j; + doublereal w, x, y; + integer i1, i2, i3; + doublereal w1, ei, ej, xi, xr, rec; + integer its, ierr; + doublereal temp, norm, vmax; + extern doublereal _starpu_dnrm2_(integer *, doublereal *, integer *); + extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, + integer *); + doublereal scale; + extern doublereal _starpu_dasum_(integer *, doublereal *, integer *); + char trans[1]; + doublereal vcrit, rootn, vnorm; + extern doublereal _starpu_dlapy2_(doublereal *, doublereal *); + doublereal absbii, absbjj; + extern integer _starpu_idamax_(integer *, doublereal *, integer *); + extern /* Subroutine */ int _starpu_dladiv_(doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *), _starpu_dlatrs_( + char *, char *, char *, char *, integer *, doublereal *, integer * +, doublereal *, doublereal *, doublereal *, integer *); + char normin[1]; + doublereal nrmsml, growto; + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLAEIN uses inverse iteration to find a right or left eigenvector */ +/* corresponding to the eigenvalue (WR,WI) of a real upper Hessenberg */ +/* matrix H. */ + +/* Arguments */ +/* ========= */ + +/* RIGHTV (input) LOGICAL */ +/* = .TRUE. : compute right eigenvector; */ +/* = .FALSE.: compute left eigenvector. */ + +/* NOINIT (input) LOGICAL */ +/* = .TRUE. : no initial vector supplied in (VR,VI). */ +/* = .FALSE.: initial vector supplied in (VR,VI). */ + +/* N (input) INTEGER */ +/* The order of the matrix H. N >= 0. */ + +/* H (input) DOUBLE PRECISION array, dimension (LDH,N) */ +/* The upper Hessenberg matrix H. */ + +/* LDH (input) INTEGER */ +/* The leading dimension of the array H. LDH >= max(1,N). */ + +/* WR (input) DOUBLE PRECISION */ +/* WI (input) DOUBLE PRECISION */ +/* The real and imaginary parts of the eigenvalue of H whose */ +/* corresponding right or left eigenvector is to be computed. */ + +/* VR (input/output) DOUBLE PRECISION array, dimension (N) */ +/* VI (input/output) DOUBLE PRECISION array, dimension (N) */ +/* On entry, if NOINIT = .FALSE. and WI = 0.0, VR must contain */ +/* a real starting vector for inverse iteration using the real */ +/* eigenvalue WR; if NOINIT = .FALSE. and WI.ne.0.0, VR and VI */ +/* must contain the real and imaginary parts of a complex */ +/* starting vector for inverse iteration using the complex */ +/* eigenvalue (WR,WI); otherwise VR and VI need not be set. */ +/* On exit, if WI = 0.0 (real eigenvalue), VR contains the */ +/* computed real eigenvector; if WI.ne.0.0 (complex eigenvalue), */ +/* VR and VI contain the real and imaginary parts of the */ +/* computed complex eigenvector. The eigenvector is normalized */ +/* so that the component of largest magnitude has magnitude 1; */ +/* here the magnitude of a complex number (x,y) is taken to be */ +/* |x| + |y|. */ +/* VI is not referenced if WI = 0.0. */ + +/* B (workspace) DOUBLE PRECISION array, dimension (LDB,N) */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the array B. LDB >= N+1. */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (N) */ + +/* EPS3 (input) DOUBLE PRECISION */ +/* A small machine-dependent value which is used to perturb */ +/* close eigenvalues, and to replace zero pivots. */ + +/* SMLNUM (input) DOUBLE PRECISION */ +/* A machine-dependent value close to the underflow threshold. */ + +/* BIGNUM (input) DOUBLE PRECISION */ +/* A machine-dependent value close to the overflow threshold. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* = 1: inverse iteration did not converge; VR is set to the */ +/* last iterate, and so is VI if WI.ne.0.0. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + h_dim1 = *ldh; + h_offset = 1 + h_dim1; + h__ -= h_offset; + --vr; + --vi; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + --work; + + /* Function Body */ + *info = 0; + +/* GROWTO is the threshold used in the acceptance test for an */ +/* eigenvector. */ + + rootn = sqrt((doublereal) (*n)); + growto = .1 / rootn; +/* Computing MAX */ + d__1 = 1., d__2 = *eps3 * rootn; + nrmsml = max(d__1,d__2) * *smlnum; + +/* Form B = H - (WR,WI)*I (except that the subdiagonal elements and */ +/* the imaginary parts of the diagonal elements are not stored). */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = h__[i__ + j * h_dim1]; +/* L10: */ + } + b[j + j * b_dim1] = h__[j + j * h_dim1] - *wr; +/* L20: */ + } + + if (*wi == 0.) { + +/* Real eigenvalue. */ + + if (*noinit) { + +/* Set initial vector. */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + vr[i__] = *eps3; +/* L30: */ + } + } else { + +/* Scale supplied initial vector. */ + + vnorm = _starpu_dnrm2_(n, &vr[1], &c__1); + d__1 = *eps3 * rootn / max(vnorm,nrmsml); + _starpu_dscal_(n, &d__1, &vr[1], &c__1); + } + + if (*rightv) { + +/* LU decomposition with partial pivoting of B, replacing zero */ +/* pivots by EPS3. */ + + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + ei = h__[i__ + 1 + i__ * h_dim1]; + if ((d__1 = b[i__ + i__ * b_dim1], abs(d__1)) < abs(ei)) { + +/* Interchange rows and eliminate. */ + + x = b[i__ + i__ * b_dim1] / ei; + b[i__ + i__ * b_dim1] = ei; + i__2 = *n; + for (j = i__ + 1; j <= i__2; ++j) { + temp = b[i__ + 1 + j * b_dim1]; + b[i__ + 1 + j * b_dim1] = b[i__ + j * b_dim1] - x * + temp; + b[i__ + j * b_dim1] = temp; +/* L40: */ + } + } else { + +/* Eliminate without interchange. */ + + if (b[i__ + i__ * b_dim1] == 0.) { + b[i__ + i__ * b_dim1] = *eps3; + } + x = ei / b[i__ + i__ * b_dim1]; + if (x != 0.) { + i__2 = *n; + for (j = i__ + 1; j <= i__2; ++j) { + b[i__ + 1 + j * b_dim1] -= x * b[i__ + j * b_dim1] + ; +/* L50: */ + } + } + } +/* L60: */ + } + if (b[*n + *n * b_dim1] == 0.) { + b[*n + *n * b_dim1] = *eps3; + } + + *(unsigned char *)trans = 'N'; + + } else { + +/* UL decomposition with partial pivoting of B, replacing zero */ +/* pivots by EPS3. */ + + for (j = *n; j >= 2; --j) { + ej = h__[j + (j - 1) * h_dim1]; + if ((d__1 = b[j + j * b_dim1], abs(d__1)) < abs(ej)) { + +/* Interchange columns and eliminate. */ + + x = b[j + j * b_dim1] / ej; + b[j + j * b_dim1] = ej; + i__1 = j - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + temp = b[i__ + (j - 1) * b_dim1]; + b[i__ + (j - 1) * b_dim1] = b[i__ + j * b_dim1] - x * + temp; + b[i__ + j * b_dim1] = temp; +/* L70: */ + } + } else { + +/* Eliminate without interchange. */ + + if (b[j + j * b_dim1] == 0.) { + b[j + j * b_dim1] = *eps3; + } + x = ej / b[j + j * b_dim1]; + if (x != 0.) { + i__1 = j - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + b[i__ + (j - 1) * b_dim1] -= x * b[i__ + j * + b_dim1]; +/* L80: */ + } + } + } +/* L90: */ + } + if (b[b_dim1 + 1] == 0.) { + b[b_dim1 + 1] = *eps3; + } + + *(unsigned char *)trans = 'T'; + + } + + *(unsigned char *)normin = 'N'; + i__1 = *n; + for (its = 1; its <= i__1; ++its) { + +/* Solve U*x = scale*v for a right eigenvector */ +/* or U'*x = scale*v for a left eigenvector, */ +/* overwriting x on v. */ + + _starpu_dlatrs_("Upper", trans, "Nonunit", normin, n, &b[b_offset], ldb, & + vr[1], &scale, &work[1], &ierr); + *(unsigned char *)normin = 'Y'; + +/* Test for sufficient growth in the norm of v. */ + + vnorm = _starpu_dasum_(n, &vr[1], &c__1); + if (vnorm >= growto * scale) { + goto L120; + } + +/* Choose new orthogonal starting vector and try again. */ + + temp = *eps3 / (rootn + 1.); + vr[1] = *eps3; + i__2 = *n; + for (i__ = 2; i__ <= i__2; ++i__) { + vr[i__] = temp; +/* L100: */ + } + vr[*n - its + 1] -= *eps3 * rootn; +/* L110: */ + } + +/* Failure to find eigenvector in N iterations. */ + + *info = 1; + +L120: + +/* Normalize eigenvector. */ + + i__ = _starpu_idamax_(n, &vr[1], &c__1); + d__2 = 1. / (d__1 = vr[i__], abs(d__1)); + _starpu_dscal_(n, &d__2, &vr[1], &c__1); + } else { + +/* Complex eigenvalue. */ + + if (*noinit) { + +/* Set initial vector. */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + vr[i__] = *eps3; + vi[i__] = 0.; +/* L130: */ + } + } else { + +/* Scale supplied initial vector. */ + + d__1 = _starpu_dnrm2_(n, &vr[1], &c__1); + d__2 = _starpu_dnrm2_(n, &vi[1], &c__1); + norm = _starpu_dlapy2_(&d__1, &d__2); + rec = *eps3 * rootn / max(norm,nrmsml); + _starpu_dscal_(n, &rec, &vr[1], &c__1); + _starpu_dscal_(n, &rec, &vi[1], &c__1); + } + + if (*rightv) { + +/* LU decomposition with partial pivoting of B, replacing zero */ +/* pivots by EPS3. */ + +/* The imaginary part of the (i,j)-th element of U is stored in */ +/* B(j+1,i). */ + + b[b_dim1 + 2] = -(*wi); + i__1 = *n; + for (i__ = 2; i__ <= i__1; ++i__) { + b[i__ + 1 + b_dim1] = 0.; +/* L140: */ + } + + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + absbii = _starpu_dlapy2_(&b[i__ + i__ * b_dim1], &b[i__ + 1 + i__ * + b_dim1]); + ei = h__[i__ + 1 + i__ * h_dim1]; + if (absbii < abs(ei)) { + +/* Interchange rows and eliminate. */ + + xr = b[i__ + i__ * b_dim1] / ei; + xi = b[i__ + 1 + i__ * b_dim1] / ei; + b[i__ + i__ * b_dim1] = ei; + b[i__ + 1 + i__ * b_dim1] = 0.; + i__2 = *n; + for (j = i__ + 1; j <= i__2; ++j) { + temp = b[i__ + 1 + j * b_dim1]; + b[i__ + 1 + j * b_dim1] = b[i__ + j * b_dim1] - xr * + temp; + b[j + 1 + (i__ + 1) * b_dim1] = b[j + 1 + i__ * + b_dim1] - xi * temp; + b[i__ + j * b_dim1] = temp; + b[j + 1 + i__ * b_dim1] = 0.; +/* L150: */ + } + b[i__ + 2 + i__ * b_dim1] = -(*wi); + b[i__ + 1 + (i__ + 1) * b_dim1] -= xi * *wi; + b[i__ + 2 + (i__ + 1) * b_dim1] += xr * *wi; + } else { + +/* Eliminate without interchanging rows. */ + + if (absbii == 0.) { + b[i__ + i__ * b_dim1] = *eps3; + b[i__ + 1 + i__ * b_dim1] = 0.; + absbii = *eps3; + } + ei = ei / absbii / absbii; + xr = b[i__ + i__ * b_dim1] * ei; + xi = -b[i__ + 1 + i__ * b_dim1] * ei; + i__2 = *n; + for (j = i__ + 1; j <= i__2; ++j) { + b[i__ + 1 + j * b_dim1] = b[i__ + 1 + j * b_dim1] - + xr * b[i__ + j * b_dim1] + xi * b[j + 1 + i__ + * b_dim1]; + b[j + 1 + (i__ + 1) * b_dim1] = -xr * b[j + 1 + i__ * + b_dim1] - xi * b[i__ + j * b_dim1]; +/* L160: */ + } + b[i__ + 2 + (i__ + 1) * b_dim1] -= *wi; + } + +/* Compute 1-norm of offdiagonal elements of i-th row. */ + + i__2 = *n - i__; + i__3 = *n - i__; + work[i__] = _starpu_dasum_(&i__2, &b[i__ + (i__ + 1) * b_dim1], ldb) + + _starpu_dasum_(&i__3, &b[i__ + 2 + i__ * b_dim1], &c__1); +/* L170: */ + } + if (b[*n + *n * b_dim1] == 0. && b[*n + 1 + *n * b_dim1] == 0.) { + b[*n + *n * b_dim1] = *eps3; + } + work[*n] = 0.; + + i1 = *n; + i2 = 1; + i3 = -1; + } else { + +/* UL decomposition with partial pivoting of conjg(B), */ +/* replacing zero pivots by EPS3. */ + +/* The imaginary part of the (i,j)-th element of U is stored in */ +/* B(j+1,i). */ + + b[*n + 1 + *n * b_dim1] = *wi; + i__1 = *n - 1; + for (j = 1; j <= i__1; ++j) { + b[*n + 1 + j * b_dim1] = 0.; +/* L180: */ + } + + for (j = *n; j >= 2; --j) { + ej = h__[j + (j - 1) * h_dim1]; + absbjj = _starpu_dlapy2_(&b[j + j * b_dim1], &b[j + 1 + j * b_dim1]); + if (absbjj < abs(ej)) { + +/* Interchange columns and eliminate */ + + xr = b[j + j * b_dim1] / ej; + xi = b[j + 1 + j * b_dim1] / ej; + b[j + j * b_dim1] = ej; + b[j + 1 + j * b_dim1] = 0.; + i__1 = j - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + temp = b[i__ + (j - 1) * b_dim1]; + b[i__ + (j - 1) * b_dim1] = b[i__ + j * b_dim1] - xr * + temp; + b[j + i__ * b_dim1] = b[j + 1 + i__ * b_dim1] - xi * + temp; + b[i__ + j * b_dim1] = temp; + b[j + 1 + i__ * b_dim1] = 0.; +/* L190: */ + } + b[j + 1 + (j - 1) * b_dim1] = *wi; + b[j - 1 + (j - 1) * b_dim1] += xi * *wi; + b[j + (j - 1) * b_dim1] -= xr * *wi; + } else { + +/* Eliminate without interchange. */ + + if (absbjj == 0.) { + b[j + j * b_dim1] = *eps3; + b[j + 1 + j * b_dim1] = 0.; + absbjj = *eps3; + } + ej = ej / absbjj / absbjj; + xr = b[j + j * b_dim1] * ej; + xi = -b[j + 1 + j * b_dim1] * ej; + i__1 = j - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + b[i__ + (j - 1) * b_dim1] = b[i__ + (j - 1) * b_dim1] + - xr * b[i__ + j * b_dim1] + xi * b[j + 1 + + i__ * b_dim1]; + b[j + i__ * b_dim1] = -xr * b[j + 1 + i__ * b_dim1] - + xi * b[i__ + j * b_dim1]; +/* L200: */ + } + b[j + (j - 1) * b_dim1] += *wi; + } + +/* Compute 1-norm of offdiagonal elements of j-th column. */ + + i__1 = j - 1; + i__2 = j - 1; + work[j] = _starpu_dasum_(&i__1, &b[j * b_dim1 + 1], &c__1) + _starpu_dasum_(& + i__2, &b[j + 1 + b_dim1], ldb); +/* L210: */ + } + if (b[b_dim1 + 1] == 0. && b[b_dim1 + 2] == 0.) { + b[b_dim1 + 1] = *eps3; + } + work[1] = 0.; + + i1 = 1; + i2 = *n; + i3 = 1; + } + + i__1 = *n; + for (its = 1; its <= i__1; ++its) { + scale = 1.; + vmax = 1.; + vcrit = *bignum; + +/* Solve U*(xr,xi) = scale*(vr,vi) for a right eigenvector, */ +/* or U'*(xr,xi) = scale*(vr,vi) for a left eigenvector, */ +/* overwriting (xr,xi) on (vr,vi). */ + + i__2 = i2; + i__3 = i3; + for (i__ = i1; i__3 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__3) + { + + if (work[i__] > vcrit) { + rec = 1. / vmax; + _starpu_dscal_(n, &rec, &vr[1], &c__1); + _starpu_dscal_(n, &rec, &vi[1], &c__1); + scale *= rec; + vmax = 1.; + vcrit = *bignum; + } + + xr = vr[i__]; + xi = vi[i__]; + if (*rightv) { + i__4 = *n; + for (j = i__ + 1; j <= i__4; ++j) { + xr = xr - b[i__ + j * b_dim1] * vr[j] + b[j + 1 + i__ + * b_dim1] * vi[j]; + xi = xi - b[i__ + j * b_dim1] * vi[j] - b[j + 1 + i__ + * b_dim1] * vr[j]; +/* L220: */ + } + } else { + i__4 = i__ - 1; + for (j = 1; j <= i__4; ++j) { + xr = xr - b[j + i__ * b_dim1] * vr[j] + b[i__ + 1 + j + * b_dim1] * vi[j]; + xi = xi - b[j + i__ * b_dim1] * vi[j] - b[i__ + 1 + j + * b_dim1] * vr[j]; +/* L230: */ + } + } + + w = (d__1 = b[i__ + i__ * b_dim1], abs(d__1)) + (d__2 = b[i__ + + 1 + i__ * b_dim1], abs(d__2)); + if (w > *smlnum) { + if (w < 1.) { + w1 = abs(xr) + abs(xi); + if (w1 > w * *bignum) { + rec = 1. / w1; + _starpu_dscal_(n, &rec, &vr[1], &c__1); + _starpu_dscal_(n, &rec, &vi[1], &c__1); + xr = vr[i__]; + xi = vi[i__]; + scale *= rec; + vmax *= rec; + } + } + +/* Divide by diagonal element of B. */ + + _starpu_dladiv_(&xr, &xi, &b[i__ + i__ * b_dim1], &b[i__ + 1 + + i__ * b_dim1], &vr[i__], &vi[i__]); +/* Computing MAX */ + d__3 = (d__1 = vr[i__], abs(d__1)) + (d__2 = vi[i__], abs( + d__2)); + vmax = max(d__3,vmax); + vcrit = *bignum / vmax; + } else { + i__4 = *n; + for (j = 1; j <= i__4; ++j) { + vr[j] = 0.; + vi[j] = 0.; +/* L240: */ + } + vr[i__] = 1.; + vi[i__] = 1.; + scale = 0.; + vmax = 1.; + vcrit = *bignum; + } +/* L250: */ + } + +/* Test for sufficient growth in the norm of (VR,VI). */ + + vnorm = _starpu_dasum_(n, &vr[1], &c__1) + _starpu_dasum_(n, &vi[1], &c__1); + if (vnorm >= growto * scale) { + goto L280; + } + +/* Choose a new orthogonal starting vector and try again. */ + + y = *eps3 / (rootn + 1.); + vr[1] = *eps3; + vi[1] = 0.; + + i__3 = *n; + for (i__ = 2; i__ <= i__3; ++i__) { + vr[i__] = y; + vi[i__] = 0.; +/* L260: */ + } + vr[*n - its + 1] -= *eps3 * rootn; +/* L270: */ + } + +/* Failure to find eigenvector in N iterations */ + + *info = 1; + +L280: + +/* Normalize eigenvector. */ + + vnorm = 0.; + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing MAX */ + d__3 = vnorm, d__4 = (d__1 = vr[i__], abs(d__1)) + (d__2 = vi[i__] + , abs(d__2)); + vnorm = max(d__3,d__4); +/* L290: */ + } + d__1 = 1. / vnorm; + _starpu_dscal_(n, &d__1, &vr[1], &c__1); + d__1 = 1. / vnorm; + _starpu_dscal_(n, &d__1, &vi[1], &c__1); + + } + + return 0; + +/* End of DLAEIN */ + +} /* _starpu_dlaein_ */ diff --git a/min-dgels/base/SRC/dlaev2.c b/min-dgels/base/SRC/dlaev2.c new file mode 100644 index 0000000..a65b771 --- /dev/null +++ b/min-dgels/base/SRC/dlaev2.c @@ -0,0 +1,188 @@ +/* dlaev2.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dlaev2_(doublereal *a, doublereal *b, doublereal *c__, + doublereal *rt1, doublereal *rt2, doublereal *cs1, doublereal *sn1) +{ + /* System generated locals */ + doublereal d__1; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + doublereal ab, df, cs, ct, tb, sm, tn, rt, adf, acs; + integer sgn1, sgn2; + doublereal acmn, acmx; + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLAEV2 computes the eigendecomposition of a 2-by-2 symmetric matrix */ +/* [ A B ] */ +/* [ B C ]. */ +/* On return, RT1 is the eigenvalue of larger absolute value, RT2 is the */ +/* eigenvalue of smaller absolute value, and (CS1,SN1) is the unit right */ +/* eigenvector for RT1, giving the decomposition */ + +/* [ CS1 SN1 ] [ A B ] [ CS1 -SN1 ] = [ RT1 0 ] */ +/* [-SN1 CS1 ] [ B C ] [ SN1 CS1 ] [ 0 RT2 ]. */ + +/* Arguments */ +/* ========= */ + +/* A (input) DOUBLE PRECISION */ +/* The (1,1) element of the 2-by-2 matrix. */ + +/* B (input) DOUBLE PRECISION */ +/* The (1,2) element and the conjugate of the (2,1) element of */ +/* the 2-by-2 matrix. */ + +/* C (input) DOUBLE PRECISION */ +/* The (2,2) element of the 2-by-2 matrix. */ + +/* RT1 (output) DOUBLE PRECISION */ +/* The eigenvalue of larger absolute value. */ + +/* RT2 (output) DOUBLE PRECISION */ +/* The eigenvalue of smaller absolute value. */ + +/* CS1 (output) DOUBLE PRECISION */ +/* SN1 (output) DOUBLE PRECISION */ +/* The vector (CS1, SN1) is a unit right eigenvector for RT1. */ + +/* Further Details */ +/* =============== */ + +/* RT1 is accurate to a few ulps barring over/underflow. */ + +/* RT2 may be inaccurate if there is massive cancellation in the */ +/* determinant A*C-B*B; higher precision or correctly rounded or */ +/* correctly truncated arithmetic would be needed to compute RT2 */ +/* accurately in all cases. */ + +/* CS1 and SN1 are accurate to a few ulps barring over/underflow. */ + +/* Overflow is possible only if RT1 is within a factor of 5 of overflow. */ +/* Underflow is harmless if the input data is 0 or exceeds */ +/* underflow_threshold / macheps. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Compute the eigenvalues */ + + sm = *a + *c__; + df = *a - *c__; + adf = abs(df); + tb = *b + *b; + ab = abs(tb); + if (abs(*a) > abs(*c__)) { + acmx = *a; + acmn = *c__; + } else { + acmx = *c__; + acmn = *a; + } + if (adf > ab) { +/* Computing 2nd power */ + d__1 = ab / adf; + rt = adf * sqrt(d__1 * d__1 + 1.); + } else if (adf < ab) { +/* Computing 2nd power */ + d__1 = adf / ab; + rt = ab * sqrt(d__1 * d__1 + 1.); + } else { + +/* Includes case AB=ADF=0 */ + + rt = ab * sqrt(2.); + } + if (sm < 0.) { + *rt1 = (sm - rt) * .5; + sgn1 = -1; + +/* Order of execution important. */ +/* To get fully accurate smaller eigenvalue, */ +/* next line needs to be executed in higher precision. */ + + *rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b; + } else if (sm > 0.) { + *rt1 = (sm + rt) * .5; + sgn1 = 1; + +/* Order of execution important. */ +/* To get fully accurate smaller eigenvalue, */ +/* next line needs to be executed in higher precision. */ + + *rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b; + } else { + +/* Includes case RT1 = RT2 = 0 */ + + *rt1 = rt * .5; + *rt2 = rt * -.5; + sgn1 = 1; + } + +/* Compute the eigenvector */ + + if (df >= 0.) { + cs = df + rt; + sgn2 = 1; + } else { + cs = df - rt; + sgn2 = -1; + } + acs = abs(cs); + if (acs > ab) { + ct = -tb / cs; + *sn1 = 1. / sqrt(ct * ct + 1.); + *cs1 = ct * *sn1; + } else { + if (ab == 0.) { + *cs1 = 1.; + *sn1 = 0.; + } else { + tn = -cs / tb; + *cs1 = 1. / sqrt(tn * tn + 1.); + *sn1 = tn * *cs1; + } + } + if (sgn1 == sgn2) { + tn = *cs1; + *cs1 = -(*sn1); + *sn1 = tn; + } + return 0; + +/* End of DLAEV2 */ + +} /* _starpu_dlaev2_ */ diff --git a/min-dgels/base/SRC/dlaexc.c b/min-dgels/base/SRC/dlaexc.c new file mode 100644 index 0000000..9793426 --- /dev/null +++ b/min-dgels/base/SRC/dlaexc.c @@ -0,0 +1,459 @@ +/* dlaexc.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c__4 = 4; +static logical c_false = FALSE_; +static integer c_n1 = -1; +static integer c__2 = 2; +static integer c__3 = 3; + +/* Subroutine */ int _starpu_dlaexc_(logical *wantq, integer *n, doublereal *t, + integer *ldt, doublereal *q, integer *ldq, integer *j1, integer *n1, + integer *n2, doublereal *work, integer *info) +{ + /* System generated locals */ + integer q_dim1, q_offset, t_dim1, t_offset, i__1; + doublereal d__1, d__2, d__3; + + /* Local variables */ + doublereal d__[16] /* was [4][4] */; + integer k; + doublereal u[3], x[4] /* was [2][2] */; + integer j2, j3, j4; + doublereal u1[3], u2[3]; + integer nd; + doublereal cs, t11, t22, t33, sn, wi1, wi2, wr1, wr2, eps, tau, tau1, + tau2; + integer ierr; + doublereal temp; + extern /* Subroutine */ int _starpu_drot_(integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, doublereal *); + doublereal scale, dnorm, xnorm; + extern /* Subroutine */ int _starpu_dlanv2_(doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *), _starpu_dlasy2_( + logical *, logical *, integer *, integer *, integer *, doublereal + *, integer *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *); + extern doublereal _starpu_dlamch_(char *), _starpu_dlange_(char *, integer *, + integer *, doublereal *, integer *, doublereal *); + extern /* Subroutine */ int _starpu_dlarfg_(integer *, doublereal *, doublereal *, + integer *, doublereal *), _starpu_dlacpy_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *), + _starpu_dlartg_(doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *), _starpu_dlarfx_(char *, integer *, integer *, doublereal *, + doublereal *, doublereal *, integer *, doublereal *); + doublereal thresh, smlnum; + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLAEXC swaps adjacent diagonal blocks T11 and T22 of order 1 or 2 in */ +/* an upper quasi-triangular matrix T by an orthogonal similarity */ +/* transformation. */ + +/* T must be in Schur canonical form, that is, block upper triangular */ +/* with 1-by-1 and 2-by-2 diagonal blocks; each 2-by-2 diagonal block */ +/* has its diagonal elemnts equal and its off-diagonal elements of */ +/* opposite sign. */ + +/* Arguments */ +/* ========= */ + +/* WANTQ (input) LOGICAL */ +/* = .TRUE. : accumulate the transformation in the matrix Q; */ +/* = .FALSE.: do not accumulate the transformation. */ + +/* N (input) INTEGER */ +/* The order of the matrix T. N >= 0. */ + +/* T (input/output) DOUBLE PRECISION array, dimension (LDT,N) */ +/* On entry, the upper quasi-triangular matrix T, in Schur */ +/* canonical form. */ +/* On exit, the updated matrix T, again in Schur canonical form. */ + +/* LDT (input) INTEGER */ +/* The leading dimension of the array T. LDT >= max(1,N). */ + +/* Q (input/output) DOUBLE PRECISION array, dimension (LDQ,N) */ +/* On entry, if WANTQ is .TRUE., the orthogonal matrix Q. */ +/* On exit, if WANTQ is .TRUE., the updated matrix Q. */ +/* If WANTQ is .FALSE., Q is not referenced. */ + +/* LDQ (input) INTEGER */ +/* The leading dimension of the array Q. */ +/* LDQ >= 1; and if WANTQ is .TRUE., LDQ >= N. */ + +/* J1 (input) INTEGER */ +/* The index of the first row of the first block T11. */ + +/* N1 (input) INTEGER */ +/* The order of the first block T11. N1 = 0, 1 or 2. */ + +/* N2 (input) INTEGER */ +/* The order of the second block T22. N2 = 0, 1 or 2. */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (N) */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* = 1: the transformed matrix T would be too far from Schur */ +/* form; the blocks are not swapped and T and Q are */ +/* unchanged. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + t_dim1 = *ldt; + t_offset = 1 + t_dim1; + t -= t_offset; + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + --work; + + /* Function Body */ + *info = 0; + +/* Quick return if possible */ + + if (*n == 0 || *n1 == 0 || *n2 == 0) { + return 0; + } + if (*j1 + *n1 > *n) { + return 0; + } + + j2 = *j1 + 1; + j3 = *j1 + 2; + j4 = *j1 + 3; + + if (*n1 == 1 && *n2 == 1) { + +/* Swap two 1-by-1 blocks. */ + + t11 = t[*j1 + *j1 * t_dim1]; + t22 = t[j2 + j2 * t_dim1]; + +/* Determine the transformation to perform the interchange. */ + + d__1 = t22 - t11; + _starpu_dlartg_(&t[*j1 + j2 * t_dim1], &d__1, &cs, &sn, &temp); + +/* Apply transformation to the matrix T. */ + + if (j3 <= *n) { + i__1 = *n - *j1 - 1; + _starpu_drot_(&i__1, &t[*j1 + j3 * t_dim1], ldt, &t[j2 + j3 * t_dim1], + ldt, &cs, &sn); + } + i__1 = *j1 - 1; + _starpu_drot_(&i__1, &t[*j1 * t_dim1 + 1], &c__1, &t[j2 * t_dim1 + 1], &c__1, + &cs, &sn); + + t[*j1 + *j1 * t_dim1] = t22; + t[j2 + j2 * t_dim1] = t11; + + if (*wantq) { + +/* Accumulate transformation in the matrix Q. */ + + _starpu_drot_(n, &q[*j1 * q_dim1 + 1], &c__1, &q[j2 * q_dim1 + 1], &c__1, + &cs, &sn); + } + + } else { + +/* Swapping involves at least one 2-by-2 block. */ + +/* Copy the diagonal block of order N1+N2 to the local array D */ +/* and compute its norm. */ + + nd = *n1 + *n2; + _starpu_dlacpy_("Full", &nd, &nd, &t[*j1 + *j1 * t_dim1], ldt, d__, &c__4); + dnorm = _starpu_dlange_("Max", &nd, &nd, d__, &c__4, &work[1]); + +/* Compute machine-dependent threshold for test for accepting */ +/* swap. */ + + eps = _starpu_dlamch_("P"); + smlnum = _starpu_dlamch_("S") / eps; +/* Computing MAX */ + d__1 = eps * 10. * dnorm; + thresh = max(d__1,smlnum); + +/* Solve T11*X - X*T22 = scale*T12 for X. */ + + _starpu_dlasy2_(&c_false, &c_false, &c_n1, n1, n2, d__, &c__4, &d__[*n1 + 1 + + (*n1 + 1 << 2) - 5], &c__4, &d__[(*n1 + 1 << 2) - 4], &c__4, & + scale, x, &c__2, &xnorm, &ierr); + +/* Swap the adjacent diagonal blocks. */ + + k = *n1 + *n1 + *n2 - 3; + switch (k) { + case 1: goto L10; + case 2: goto L20; + case 3: goto L30; + } + +L10: + +/* N1 = 1, N2 = 2: generate elementary reflector H so that: */ + +/* ( scale, X11, X12 ) H = ( 0, 0, * ) */ + + u[0] = scale; + u[1] = x[0]; + u[2] = x[2]; + _starpu_dlarfg_(&c__3, &u[2], u, &c__1, &tau); + u[2] = 1.; + t11 = t[*j1 + *j1 * t_dim1]; + +/* Perform swap provisionally on diagonal block in D. */ + + _starpu_dlarfx_("L", &c__3, &c__3, u, &tau, d__, &c__4, &work[1]); + _starpu_dlarfx_("R", &c__3, &c__3, u, &tau, d__, &c__4, &work[1]); + +/* Test whether to reject swap. */ + +/* Computing MAX */ + d__2 = abs(d__[2]), d__3 = abs(d__[6]), d__2 = max(d__2,d__3), d__3 = + (d__1 = d__[10] - t11, abs(d__1)); + if (max(d__2,d__3) > thresh) { + goto L50; + } + +/* Accept swap: apply transformation to the entire matrix T. */ + + i__1 = *n - *j1 + 1; + _starpu_dlarfx_("L", &c__3, &i__1, u, &tau, &t[*j1 + *j1 * t_dim1], ldt, & + work[1]); + _starpu_dlarfx_("R", &j2, &c__3, u, &tau, &t[*j1 * t_dim1 + 1], ldt, &work[1]); + + t[j3 + *j1 * t_dim1] = 0.; + t[j3 + j2 * t_dim1] = 0.; + t[j3 + j3 * t_dim1] = t11; + + if (*wantq) { + +/* Accumulate transformation in the matrix Q. */ + + _starpu_dlarfx_("R", n, &c__3, u, &tau, &q[*j1 * q_dim1 + 1], ldq, &work[ + 1]); + } + goto L40; + +L20: + +/* N1 = 2, N2 = 1: generate elementary reflector H so that: */ + +/* H ( -X11 ) = ( * ) */ +/* ( -X21 ) = ( 0 ) */ +/* ( scale ) = ( 0 ) */ + + u[0] = -x[0]; + u[1] = -x[1]; + u[2] = scale; + _starpu_dlarfg_(&c__3, u, &u[1], &c__1, &tau); + u[0] = 1.; + t33 = t[j3 + j3 * t_dim1]; + +/* Perform swap provisionally on diagonal block in D. */ + + _starpu_dlarfx_("L", &c__3, &c__3, u, &tau, d__, &c__4, &work[1]); + _starpu_dlarfx_("R", &c__3, &c__3, u, &tau, d__, &c__4, &work[1]); + +/* Test whether to reject swap. */ + +/* Computing MAX */ + d__2 = abs(d__[1]), d__3 = abs(d__[2]), d__2 = max(d__2,d__3), d__3 = + (d__1 = d__[0] - t33, abs(d__1)); + if (max(d__2,d__3) > thresh) { + goto L50; + } + +/* Accept swap: apply transformation to the entire matrix T. */ + + _starpu_dlarfx_("R", &j3, &c__3, u, &tau, &t[*j1 * t_dim1 + 1], ldt, &work[1]); + i__1 = *n - *j1; + _starpu_dlarfx_("L", &c__3, &i__1, u, &tau, &t[*j1 + j2 * t_dim1], ldt, &work[ + 1]); + + t[*j1 + *j1 * t_dim1] = t33; + t[j2 + *j1 * t_dim1] = 0.; + t[j3 + *j1 * t_dim1] = 0.; + + if (*wantq) { + +/* Accumulate transformation in the matrix Q. */ + + _starpu_dlarfx_("R", n, &c__3, u, &tau, &q[*j1 * q_dim1 + 1], ldq, &work[ + 1]); + } + goto L40; + +L30: + +/* N1 = 2, N2 = 2: generate elementary reflectors H(1) and H(2) so */ +/* that: */ + +/* H(2) H(1) ( -X11 -X12 ) = ( * * ) */ +/* ( -X21 -X22 ) ( 0 * ) */ +/* ( scale 0 ) ( 0 0 ) */ +/* ( 0 scale ) ( 0 0 ) */ + + u1[0] = -x[0]; + u1[1] = -x[1]; + u1[2] = scale; + _starpu_dlarfg_(&c__3, u1, &u1[1], &c__1, &tau1); + u1[0] = 1.; + + temp = -tau1 * (x[2] + u1[1] * x[3]); + u2[0] = -temp * u1[1] - x[3]; + u2[1] = -temp * u1[2]; + u2[2] = scale; + _starpu_dlarfg_(&c__3, u2, &u2[1], &c__1, &tau2); + u2[0] = 1.; + +/* Perform swap provisionally on diagonal block in D. */ + + _starpu_dlarfx_("L", &c__3, &c__4, u1, &tau1, d__, &c__4, &work[1]) + ; + _starpu_dlarfx_("R", &c__4, &c__3, u1, &tau1, d__, &c__4, &work[1]) + ; + _starpu_dlarfx_("L", &c__3, &c__4, u2, &tau2, &d__[1], &c__4, &work[1]); + _starpu_dlarfx_("R", &c__4, &c__3, u2, &tau2, &d__[4], &c__4, &work[1]); + +/* Test whether to reject swap. */ + +/* Computing MAX */ + d__1 = abs(d__[2]), d__2 = abs(d__[6]), d__1 = max(d__1,d__2), d__2 = + abs(d__[3]), d__1 = max(d__1,d__2), d__2 = abs(d__[7]); + if (max(d__1,d__2) > thresh) { + goto L50; + } + +/* Accept swap: apply transformation to the entire matrix T. */ + + i__1 = *n - *j1 + 1; + _starpu_dlarfx_("L", &c__3, &i__1, u1, &tau1, &t[*j1 + *j1 * t_dim1], ldt, & + work[1]); + _starpu_dlarfx_("R", &j4, &c__3, u1, &tau1, &t[*j1 * t_dim1 + 1], ldt, &work[ + 1]); + i__1 = *n - *j1 + 1; + _starpu_dlarfx_("L", &c__3, &i__1, u2, &tau2, &t[j2 + *j1 * t_dim1], ldt, & + work[1]); + _starpu_dlarfx_("R", &j4, &c__3, u2, &tau2, &t[j2 * t_dim1 + 1], ldt, &work[1] +); + + t[j3 + *j1 * t_dim1] = 0.; + t[j3 + j2 * t_dim1] = 0.; + t[j4 + *j1 * t_dim1] = 0.; + t[j4 + j2 * t_dim1] = 0.; + + if (*wantq) { + +/* Accumulate transformation in the matrix Q. */ + + _starpu_dlarfx_("R", n, &c__3, u1, &tau1, &q[*j1 * q_dim1 + 1], ldq, & + work[1]); + _starpu_dlarfx_("R", n, &c__3, u2, &tau2, &q[j2 * q_dim1 + 1], ldq, &work[ + 1]); + } + +L40: + + if (*n2 == 2) { + +/* Standardize new 2-by-2 block T11 */ + + _starpu_dlanv2_(&t[*j1 + *j1 * t_dim1], &t[*j1 + j2 * t_dim1], &t[j2 + * + j1 * t_dim1], &t[j2 + j2 * t_dim1], &wr1, &wi1, &wr2, & + wi2, &cs, &sn); + i__1 = *n - *j1 - 1; + _starpu_drot_(&i__1, &t[*j1 + (*j1 + 2) * t_dim1], ldt, &t[j2 + (*j1 + 2) + * t_dim1], ldt, &cs, &sn); + i__1 = *j1 - 1; + _starpu_drot_(&i__1, &t[*j1 * t_dim1 + 1], &c__1, &t[j2 * t_dim1 + 1], & + c__1, &cs, &sn); + if (*wantq) { + _starpu_drot_(n, &q[*j1 * q_dim1 + 1], &c__1, &q[j2 * q_dim1 + 1], & + c__1, &cs, &sn); + } + } + + if (*n1 == 2) { + +/* Standardize new 2-by-2 block T22 */ + + j3 = *j1 + *n2; + j4 = j3 + 1; + _starpu_dlanv2_(&t[j3 + j3 * t_dim1], &t[j3 + j4 * t_dim1], &t[j4 + j3 * + t_dim1], &t[j4 + j4 * t_dim1], &wr1, &wi1, &wr2, &wi2, & + cs, &sn); + if (j3 + 2 <= *n) { + i__1 = *n - j3 - 1; + _starpu_drot_(&i__1, &t[j3 + (j3 + 2) * t_dim1], ldt, &t[j4 + (j3 + 2) + * t_dim1], ldt, &cs, &sn); + } + i__1 = j3 - 1; + _starpu_drot_(&i__1, &t[j3 * t_dim1 + 1], &c__1, &t[j4 * t_dim1 + 1], & + c__1, &cs, &sn); + if (*wantq) { + _starpu_drot_(n, &q[j3 * q_dim1 + 1], &c__1, &q[j4 * q_dim1 + 1], & + c__1, &cs, &sn); + } + } + + } + return 0; + +/* Exit with INFO = 1 if swap was rejected. */ + +L50: + *info = 1; + return 0; + +/* End of DLAEXC */ + +} /* _starpu_dlaexc_ */ diff --git a/min-dgels/base/SRC/dlag2.c b/min-dgels/base/SRC/dlag2.c new file mode 100644 index 0000000..a7b2315 --- /dev/null +++ b/min-dgels/base/SRC/dlag2.c @@ -0,0 +1,356 @@ +/* dlag2.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dlag2_(doublereal *a, integer *lda, doublereal *b, + integer *ldb, doublereal *safmin, doublereal *scale1, doublereal * + scale2, doublereal *wr1, doublereal *wr2, doublereal *wi) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset; + doublereal d__1, d__2, d__3, d__4, d__5, d__6; + + /* Builtin functions */ + double sqrt(doublereal), d_sign(doublereal *, doublereal *); + + /* Local variables */ + doublereal r__, c1, c2, c3, c4, c5, s1, s2, a11, a12, a21, a22, b11, b12, + b22, pp, qq, ss, as11, as12, as22, sum, abi22, diff, bmin, wbig, + wabs, wdet, binv11, binv22, discr, anorm, bnorm, bsize, shift, + rtmin, rtmax, wsize, ascale, bscale, wscale, safmax, wsmall; + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLAG2 computes the eigenvalues of a 2 x 2 generalized eigenvalue */ +/* problem A - w B, with scaling as necessary to avoid over-/underflow. */ + +/* The scaling factor "s" results in a modified eigenvalue equation */ + +/* s A - w B */ + +/* where s is a non-negative scaling factor chosen so that w, w B, */ +/* and s A do not overflow and, if possible, do not underflow, either. */ + +/* Arguments */ +/* ========= */ + +/* A (input) DOUBLE PRECISION array, dimension (LDA, 2) */ +/* On entry, the 2 x 2 matrix A. It is assumed that its 1-norm */ +/* is less than 1/SAFMIN. Entries less than */ +/* sqrt(SAFMIN)*norm(A) are subject to being treated as zero. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= 2. */ + +/* B (input) DOUBLE PRECISION array, dimension (LDB, 2) */ +/* On entry, the 2 x 2 upper triangular matrix B. It is */ +/* assumed that the one-norm of B is less than 1/SAFMIN. The */ +/* diagonals should be at least sqrt(SAFMIN) times the largest */ +/* element of B (in absolute value); if a diagonal is smaller */ +/* than that, then +/- sqrt(SAFMIN) will be used instead of */ +/* that diagonal. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the array B. LDB >= 2. */ + +/* SAFMIN (input) DOUBLE PRECISION */ +/* The smallest positive number s.t. 1/SAFMIN does not */ +/* overflow. (This should always be DLAMCH('S') -- it is an */ +/* argument in order to avoid having to call DLAMCH frequently.) */ + +/* SCALE1 (output) DOUBLE PRECISION */ +/* A scaling factor used to avoid over-/underflow in the */ +/* eigenvalue equation which defines the first eigenvalue. If */ +/* the eigenvalues are complex, then the eigenvalues are */ +/* ( WR1 +/- WI i ) / SCALE1 (which may lie outside the */ +/* exponent range of the machine), SCALE1=SCALE2, and SCALE1 */ +/* will always be positive. If the eigenvalues are real, then */ +/* the first (real) eigenvalue is WR1 / SCALE1 , but this may */ +/* overflow or underflow, and in fact, SCALE1 may be zero or */ +/* less than the underflow threshhold if the exact eigenvalue */ +/* is sufficiently large. */ + +/* SCALE2 (output) DOUBLE PRECISION */ +/* A scaling factor used to avoid over-/underflow in the */ +/* eigenvalue equation which defines the second eigenvalue. If */ +/* the eigenvalues are complex, then SCALE2=SCALE1. If the */ +/* eigenvalues are real, then the second (real) eigenvalue is */ +/* WR2 / SCALE2 , but this may overflow or underflow, and in */ +/* fact, SCALE2 may be zero or less than the underflow */ +/* threshhold if the exact eigenvalue is sufficiently large. */ + +/* WR1 (output) DOUBLE PRECISION */ +/* If the eigenvalue is real, then WR1 is SCALE1 times the */ +/* eigenvalue closest to the (2,2) element of A B**(-1). If the */ +/* eigenvalue is complex, then WR1=WR2 is SCALE1 times the real */ +/* part of the eigenvalues. */ + +/* WR2 (output) DOUBLE PRECISION */ +/* If the eigenvalue is real, then WR2 is SCALE2 times the */ +/* other eigenvalue. If the eigenvalue is complex, then */ +/* WR1=WR2 is SCALE1 times the real part of the eigenvalues. */ + +/* WI (output) DOUBLE PRECISION */ +/* If the eigenvalue is real, then WI is zero. If the */ +/* eigenvalue is complex, then WI is SCALE1 times the imaginary */ +/* part of the eigenvalues. WI will always be non-negative. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + rtmin = sqrt(*safmin); + rtmax = 1. / rtmin; + safmax = 1. / *safmin; + +/* Scale A */ + +/* Computing MAX */ + d__5 = (d__1 = a[a_dim1 + 1], abs(d__1)) + (d__2 = a[a_dim1 + 2], abs( + d__2)), d__6 = (d__3 = a[(a_dim1 << 1) + 1], abs(d__3)) + (d__4 = + a[(a_dim1 << 1) + 2], abs(d__4)), d__5 = max(d__5,d__6); + anorm = max(d__5,*safmin); + ascale = 1. / anorm; + a11 = ascale * a[a_dim1 + 1]; + a21 = ascale * a[a_dim1 + 2]; + a12 = ascale * a[(a_dim1 << 1) + 1]; + a22 = ascale * a[(a_dim1 << 1) + 2]; + +/* Perturb B if necessary to insure non-singularity */ + + b11 = b[b_dim1 + 1]; + b12 = b[(b_dim1 << 1) + 1]; + b22 = b[(b_dim1 << 1) + 2]; +/* Computing MAX */ + d__1 = abs(b11), d__2 = abs(b12), d__1 = max(d__1,d__2), d__2 = abs(b22), + d__1 = max(d__1,d__2); + bmin = rtmin * max(d__1,rtmin); + if (abs(b11) < bmin) { + b11 = d_sign(&bmin, &b11); + } + if (abs(b22) < bmin) { + b22 = d_sign(&bmin, &b22); + } + +/* Scale B */ + +/* Computing MAX */ + d__1 = abs(b11), d__2 = abs(b12) + abs(b22), d__1 = max(d__1,d__2); + bnorm = max(d__1,*safmin); +/* Computing MAX */ + d__1 = abs(b11), d__2 = abs(b22); + bsize = max(d__1,d__2); + bscale = 1. / bsize; + b11 *= bscale; + b12 *= bscale; + b22 *= bscale; + +/* Compute larger eigenvalue by method described by C. van Loan */ + +/* ( AS is A shifted by -SHIFT*B ) */ + + binv11 = 1. / b11; + binv22 = 1. / b22; + s1 = a11 * binv11; + s2 = a22 * binv22; + if (abs(s1) <= abs(s2)) { + as12 = a12 - s1 * b12; + as22 = a22 - s1 * b22; + ss = a21 * (binv11 * binv22); + abi22 = as22 * binv22 - ss * b12; + pp = abi22 * .5; + shift = s1; + } else { + as12 = a12 - s2 * b12; + as11 = a11 - s2 * b11; + ss = a21 * (binv11 * binv22); + abi22 = -ss * b12; + pp = (as11 * binv11 + abi22) * .5; + shift = s2; + } + qq = ss * as12; + if ((d__1 = pp * rtmin, abs(d__1)) >= 1.) { +/* Computing 2nd power */ + d__1 = rtmin * pp; + discr = d__1 * d__1 + qq * *safmin; + r__ = sqrt((abs(discr))) * rtmax; + } else { +/* Computing 2nd power */ + d__1 = pp; + if (d__1 * d__1 + abs(qq) <= *safmin) { +/* Computing 2nd power */ + d__1 = rtmax * pp; + discr = d__1 * d__1 + qq * safmax; + r__ = sqrt((abs(discr))) * rtmin; + } else { +/* Computing 2nd power */ + d__1 = pp; + discr = d__1 * d__1 + qq; + r__ = sqrt((abs(discr))); + } + } + +/* Note: the test of R in the following IF is to cover the case when */ +/* DISCR is small and negative and is flushed to zero during */ +/* the calculation of R. On machines which have a consistent */ +/* flush-to-zero threshhold and handle numbers above that */ +/* threshhold correctly, it would not be necessary. */ + + if (discr >= 0. || r__ == 0.) { + sum = pp + d_sign(&r__, &pp); + diff = pp - d_sign(&r__, &pp); + wbig = shift + sum; + +/* Compute smaller eigenvalue */ + + wsmall = shift + diff; +/* Computing MAX */ + d__1 = abs(wsmall); + if (abs(wbig) * .5 > max(d__1,*safmin)) { + wdet = (a11 * a22 - a12 * a21) * (binv11 * binv22); + wsmall = wdet / wbig; + } + +/* Choose (real) eigenvalue closest to 2,2 element of A*B**(-1) */ +/* for WR1. */ + + if (pp > abi22) { + *wr1 = min(wbig,wsmall); + *wr2 = max(wbig,wsmall); + } else { + *wr1 = max(wbig,wsmall); + *wr2 = min(wbig,wsmall); + } + *wi = 0.; + } else { + +/* Complex eigenvalues */ + + *wr1 = shift + pp; + *wr2 = *wr1; + *wi = r__; + } + +/* Further scaling to avoid underflow and overflow in computing */ +/* SCALE1 and overflow in computing w*B. */ + +/* This scale factor (WSCALE) is bounded from above using C1 and C2, */ +/* and from below using C3 and C4. */ +/* C1 implements the condition s A must never overflow. */ +/* C2 implements the condition w B must never overflow. */ +/* C3, with C2, */ +/* implement the condition that s A - w B must never overflow. */ +/* C4 implements the condition s should not underflow. */ +/* C5 implements the condition max(s,|w|) should be at least 2. */ + + c1 = bsize * (*safmin * max(1.,ascale)); + c2 = *safmin * max(1.,bnorm); + c3 = bsize * *safmin; + if (ascale <= 1. && bsize <= 1.) { +/* Computing MIN */ + d__1 = 1., d__2 = ascale / *safmin * bsize; + c4 = min(d__1,d__2); + } else { + c4 = 1.; + } + if (ascale <= 1. || bsize <= 1.) { +/* Computing MIN */ + d__1 = 1., d__2 = ascale * bsize; + c5 = min(d__1,d__2); + } else { + c5 = 1.; + } + +/* Scale first eigenvalue */ + + wabs = abs(*wr1) + abs(*wi); +/* Computing MAX */ +/* Computing MIN */ + d__3 = c4, d__4 = max(wabs,c5) * .5; + d__1 = max(*safmin,c1), d__2 = (wabs * c2 + c3) * 1.0000100000000001, + d__1 = max(d__1,d__2), d__2 = min(d__3,d__4); + wsize = max(d__1,d__2); + if (wsize != 1.) { + wscale = 1. / wsize; + if (wsize > 1.) { + *scale1 = max(ascale,bsize) * wscale * min(ascale,bsize); + } else { + *scale1 = min(ascale,bsize) * wscale * max(ascale,bsize); + } + *wr1 *= wscale; + if (*wi != 0.) { + *wi *= wscale; + *wr2 = *wr1; + *scale2 = *scale1; + } + } else { + *scale1 = ascale * bsize; + *scale2 = *scale1; + } + +/* Scale second eigenvalue (if real) */ + + if (*wi == 0.) { +/* Computing MAX */ +/* Computing MIN */ +/* Computing MAX */ + d__5 = abs(*wr2); + d__3 = c4, d__4 = max(d__5,c5) * .5; + d__1 = max(*safmin,c1), d__2 = (abs(*wr2) * c2 + c3) * + 1.0000100000000001, d__1 = max(d__1,d__2), d__2 = min(d__3, + d__4); + wsize = max(d__1,d__2); + if (wsize != 1.) { + wscale = 1. / wsize; + if (wsize > 1.) { + *scale2 = max(ascale,bsize) * wscale * min(ascale,bsize); + } else { + *scale2 = min(ascale,bsize) * wscale * max(ascale,bsize); + } + *wr2 *= wscale; + } else { + *scale2 = ascale * bsize; + } + } + +/* End of DLAG2 */ + + return 0; +} /* _starpu_dlag2_ */ diff --git a/min-dgels/base/SRC/dlag2s.c b/min-dgels/base/SRC/dlag2s.c new file mode 100644 index 0000000..3918e61 --- /dev/null +++ b/min-dgels/base/SRC/dlag2s.c @@ -0,0 +1,115 @@ +/* dlag2s.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dlag2s_(integer *m, integer *n, doublereal *a, integer * + lda, real *sa, integer *ldsa, integer *info) +{ + /* System generated locals */ + integer sa_dim1, sa_offset, a_dim1, a_offset, i__1, i__2; + + /* Local variables */ + integer i__, j; + doublereal rmax; + extern doublereal _starpu_slamch_(char *); + + +/* -- LAPACK PROTOTYPE auxiliary routine (version 3.1.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* August 2007 */ + +/* .. */ +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLAG2S converts a DOUBLE PRECISION matrix, SA, to a SINGLE */ +/* PRECISION matrix, A. */ + +/* RMAX is the overflow for the SINGLE PRECISION arithmetic */ +/* DLAG2S checks that all the entries of A are between -RMAX and */ +/* RMAX. If not the convertion is aborted and a flag is raised. */ + +/* This is an auxiliary routine so there is no argument checking. */ + +/* Arguments */ +/* ========= */ + +/* M (input) INTEGER */ +/* The number of lines of the matrix A. M >= 0. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix A. N >= 0. */ + +/* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the M-by-N coefficient matrix A. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,M). */ + +/* SA (output) REAL array, dimension (LDSA,N) */ +/* On exit, if INFO=0, the M-by-N coefficient matrix SA; if */ +/* INFO>0, the content of SA is unspecified. */ + +/* LDSA (input) INTEGER */ +/* The leading dimension of the array SA. LDSA >= max(1,M). */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit. */ +/* = 1: an entry of the matrix A is greater than the SINGLE */ +/* PRECISION overflow threshold, in this case, the content */ +/* of SA in exit is unspecified. */ + +/* ========= */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + sa_dim1 = *ldsa; + sa_offset = 1 + sa_dim1; + sa -= sa_offset; + + /* Function Body */ + rmax = _starpu_slamch_("O"); + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + if (a[i__ + j * a_dim1] < -rmax || a[i__ + j * a_dim1] > rmax) { + *info = 1; + goto L30; + } + sa[i__ + j * sa_dim1] = a[i__ + j * a_dim1]; +/* L10: */ + } +/* L20: */ + } + *info = 0; +L30: + return 0; + +/* End of DLAG2S */ + +} /* _starpu_dlag2s_ */ diff --git a/min-dgels/base/SRC/dlags2.c b/min-dgels/base/SRC/dlags2.c new file mode 100644 index 0000000..a538714 --- /dev/null +++ b/min-dgels/base/SRC/dlags2.c @@ -0,0 +1,292 @@ +/* dlags2.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dlags2_(logical *upper, doublereal *a1, doublereal *a2, + doublereal *a3, doublereal *b1, doublereal *b2, doublereal *b3, + doublereal *csu, doublereal *snu, doublereal *csv, doublereal *snv, + doublereal *csq, doublereal *snq) +{ + /* System generated locals */ + doublereal d__1; + + /* Local variables */ + doublereal a, b, c__, d__, r__, s1, s2, ua11, ua12, ua21, ua22, vb11, + vb12, vb21, vb22, csl, csr, snl, snr, aua11, aua12, aua21, aua22, + avb11, avb12, avb21, avb22, ua11r, ua22r, vb11r, vb22r; + extern /* Subroutine */ int _starpu_dlasv2_(doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *), _starpu_dlartg_(doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *); + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLAGS2 computes 2-by-2 orthogonal matrices U, V and Q, such */ +/* that if ( UPPER ) then */ + +/* U'*A*Q = U'*( A1 A2 )*Q = ( x 0 ) */ +/* ( 0 A3 ) ( x x ) */ +/* and */ +/* V'*B*Q = V'*( B1 B2 )*Q = ( x 0 ) */ +/* ( 0 B3 ) ( x x ) */ + +/* or if ( .NOT.UPPER ) then */ + +/* U'*A*Q = U'*( A1 0 )*Q = ( x x ) */ +/* ( A2 A3 ) ( 0 x ) */ +/* and */ +/* V'*B*Q = V'*( B1 0 )*Q = ( x x ) */ +/* ( B2 B3 ) ( 0 x ) */ + +/* The rows of the transformed A and B are parallel, where */ + +/* U = ( CSU SNU ), V = ( CSV SNV ), Q = ( CSQ SNQ ) */ +/* ( -SNU CSU ) ( -SNV CSV ) ( -SNQ CSQ ) */ + +/* Z' denotes the transpose of Z. */ + + +/* Arguments */ +/* ========= */ + +/* UPPER (input) LOGICAL */ +/* = .TRUE.: the input matrices A and B are upper triangular. */ +/* = .FALSE.: the input matrices A and B are lower triangular. */ + +/* A1 (input) DOUBLE PRECISION */ +/* A2 (input) DOUBLE PRECISION */ +/* A3 (input) DOUBLE PRECISION */ +/* On entry, A1, A2 and A3 are elements of the input 2-by-2 */ +/* upper (lower) triangular matrix A. */ + +/* B1 (input) DOUBLE PRECISION */ +/* B2 (input) DOUBLE PRECISION */ +/* B3 (input) DOUBLE PRECISION */ +/* On entry, B1, B2 and B3 are elements of the input 2-by-2 */ +/* upper (lower) triangular matrix B. */ + +/* CSU (output) DOUBLE PRECISION */ +/* SNU (output) DOUBLE PRECISION */ +/* The desired orthogonal matrix U. */ + +/* CSV (output) DOUBLE PRECISION */ +/* SNV (output) DOUBLE PRECISION */ +/* The desired orthogonal matrix V. */ + +/* CSQ (output) DOUBLE PRECISION */ +/* SNQ (output) DOUBLE PRECISION */ +/* The desired orthogonal matrix Q. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + if (*upper) { + +/* Input matrices A and B are upper triangular matrices */ + +/* Form matrix C = A*adj(B) = ( a b ) */ +/* ( 0 d ) */ + + a = *a1 * *b3; + d__ = *a3 * *b1; + b = *a2 * *b1 - *a1 * *b2; + +/* The SVD of real 2-by-2 triangular C */ + +/* ( CSL -SNL )*( A B )*( CSR SNR ) = ( R 0 ) */ +/* ( SNL CSL ) ( 0 D ) ( -SNR CSR ) ( 0 T ) */ + + _starpu_dlasv2_(&a, &b, &d__, &s1, &s2, &snr, &csr, &snl, &csl); + + if (abs(csl) >= abs(snl) || abs(csr) >= abs(snr)) { + +/* Compute the (1,1) and (1,2) elements of U'*A and V'*B, */ +/* and (1,2) element of |U|'*|A| and |V|'*|B|. */ + + ua11r = csl * *a1; + ua12 = csl * *a2 + snl * *a3; + + vb11r = csr * *b1; + vb12 = csr * *b2 + snr * *b3; + + aua12 = abs(csl) * abs(*a2) + abs(snl) * abs(*a3); + avb12 = abs(csr) * abs(*b2) + abs(snr) * abs(*b3); + +/* zero (1,2) elements of U'*A and V'*B */ + + if (abs(ua11r) + abs(ua12) != 0.) { + if (aua12 / (abs(ua11r) + abs(ua12)) <= avb12 / (abs(vb11r) + + abs(vb12))) { + d__1 = -ua11r; + _starpu_dlartg_(&d__1, &ua12, csq, snq, &r__); + } else { + d__1 = -vb11r; + _starpu_dlartg_(&d__1, &vb12, csq, snq, &r__); + } + } else { + d__1 = -vb11r; + _starpu_dlartg_(&d__1, &vb12, csq, snq, &r__); + } + + *csu = csl; + *snu = -snl; + *csv = csr; + *snv = -snr; + + } else { + +/* Compute the (2,1) and (2,2) elements of U'*A and V'*B, */ +/* and (2,2) element of |U|'*|A| and |V|'*|B|. */ + + ua21 = -snl * *a1; + ua22 = -snl * *a2 + csl * *a3; + + vb21 = -snr * *b1; + vb22 = -snr * *b2 + csr * *b3; + + aua22 = abs(snl) * abs(*a2) + abs(csl) * abs(*a3); + avb22 = abs(snr) * abs(*b2) + abs(csr) * abs(*b3); + +/* zero (2,2) elements of U'*A and V'*B, and then swap. */ + + if (abs(ua21) + abs(ua22) != 0.) { + if (aua22 / (abs(ua21) + abs(ua22)) <= avb22 / (abs(vb21) + + abs(vb22))) { + d__1 = -ua21; + _starpu_dlartg_(&d__1, &ua22, csq, snq, &r__); + } else { + d__1 = -vb21; + _starpu_dlartg_(&d__1, &vb22, csq, snq, &r__); + } + } else { + d__1 = -vb21; + _starpu_dlartg_(&d__1, &vb22, csq, snq, &r__); + } + + *csu = snl; + *snu = csl; + *csv = snr; + *snv = csr; + + } + + } else { + +/* Input matrices A and B are lower triangular matrices */ + +/* Form matrix C = A*adj(B) = ( a 0 ) */ +/* ( c d ) */ + + a = *a1 * *b3; + d__ = *a3 * *b1; + c__ = *a2 * *b3 - *a3 * *b2; + +/* The SVD of real 2-by-2 triangular C */ + +/* ( CSL -SNL )*( A 0 )*( CSR SNR ) = ( R 0 ) */ +/* ( SNL CSL ) ( C D ) ( -SNR CSR ) ( 0 T ) */ + + _starpu_dlasv2_(&a, &c__, &d__, &s1, &s2, &snr, &csr, &snl, &csl); + + if (abs(csr) >= abs(snr) || abs(csl) >= abs(snl)) { + +/* Compute the (2,1) and (2,2) elements of U'*A and V'*B, */ +/* and (2,1) element of |U|'*|A| and |V|'*|B|. */ + + ua21 = -snr * *a1 + csr * *a2; + ua22r = csr * *a3; + + vb21 = -snl * *b1 + csl * *b2; + vb22r = csl * *b3; + + aua21 = abs(snr) * abs(*a1) + abs(csr) * abs(*a2); + avb21 = abs(snl) * abs(*b1) + abs(csl) * abs(*b2); + +/* zero (2,1) elements of U'*A and V'*B. */ + + if (abs(ua21) + abs(ua22r) != 0.) { + if (aua21 / (abs(ua21) + abs(ua22r)) <= avb21 / (abs(vb21) + + abs(vb22r))) { + _starpu_dlartg_(&ua22r, &ua21, csq, snq, &r__); + } else { + _starpu_dlartg_(&vb22r, &vb21, csq, snq, &r__); + } + } else { + _starpu_dlartg_(&vb22r, &vb21, csq, snq, &r__); + } + + *csu = csr; + *snu = -snr; + *csv = csl; + *snv = -snl; + + } else { + +/* Compute the (1,1) and (1,2) elements of U'*A and V'*B, */ +/* and (1,1) element of |U|'*|A| and |V|'*|B|. */ + + ua11 = csr * *a1 + snr * *a2; + ua12 = snr * *a3; + + vb11 = csl * *b1 + snl * *b2; + vb12 = snl * *b3; + + aua11 = abs(csr) * abs(*a1) + abs(snr) * abs(*a2); + avb11 = abs(csl) * abs(*b1) + abs(snl) * abs(*b2); + +/* zero (1,1) elements of U'*A and V'*B, and then swap. */ + + if (abs(ua11) + abs(ua12) != 0.) { + if (aua11 / (abs(ua11) + abs(ua12)) <= avb11 / (abs(vb11) + + abs(vb12))) { + _starpu_dlartg_(&ua12, &ua11, csq, snq, &r__); + } else { + _starpu_dlartg_(&vb12, &vb11, csq, snq, &r__); + } + } else { + _starpu_dlartg_(&vb12, &vb11, csq, snq, &r__); + } + + *csu = snr; + *snu = csr; + *csv = snl; + *snv = csl; + + } + + } + + return 0; + +/* End of DLAGS2 */ + +} /* _starpu_dlags2_ */ diff --git a/min-dgels/base/SRC/dlagtf.c b/min-dgels/base/SRC/dlagtf.c new file mode 100644 index 0000000..70e6733 --- /dev/null +++ b/min-dgels/base/SRC/dlagtf.c @@ -0,0 +1,224 @@ +/* dlagtf.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dlagtf_(integer *n, doublereal *a, doublereal *lambda, + doublereal *b, doublereal *c__, doublereal *tol, doublereal *d__, + integer *in, integer *info) +{ + /* System generated locals */ + integer i__1; + doublereal d__1, d__2; + + /* Local variables */ + integer k; + doublereal tl, eps, piv1, piv2, temp, mult, scale1, scale2; + extern doublereal _starpu_dlamch_(char *); + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLAGTF factorizes the matrix (T - lambda*I), where T is an n by n */ +/* tridiagonal matrix and lambda is a scalar, as */ + +/* T - lambda*I = PLU, */ + +/* where P is a permutation matrix, L is a unit lower tridiagonal matrix */ +/* with at most one non-zero sub-diagonal elements per column and U is */ +/* an upper triangular matrix with at most two non-zero super-diagonal */ +/* elements per column. */ + +/* The factorization is obtained by Gaussian elimination with partial */ +/* pivoting and implicit row scaling. */ + +/* The parameter LAMBDA is included in the routine so that DLAGTF may */ +/* be used, in conjunction with DLAGTS, to obtain eigenvectors of T by */ +/* inverse iteration. */ + +/* Arguments */ +/* ========= */ + +/* N (input) INTEGER */ +/* The order of the matrix T. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (N) */ +/* On entry, A must contain the diagonal elements of T. */ + +/* On exit, A is overwritten by the n diagonal elements of the */ +/* upper triangular matrix U of the factorization of T. */ + +/* LAMBDA (input) DOUBLE PRECISION */ +/* On entry, the scalar lambda. */ + +/* B (input/output) DOUBLE PRECISION array, dimension (N-1) */ +/* On entry, B must contain the (n-1) super-diagonal elements of */ +/* T. */ + +/* On exit, B is overwritten by the (n-1) super-diagonal */ +/* elements of the matrix U of the factorization of T. */ + +/* C (input/output) DOUBLE PRECISION array, dimension (N-1) */ +/* On entry, C must contain the (n-1) sub-diagonal elements of */ +/* T. */ + +/* On exit, C is overwritten by the (n-1) sub-diagonal elements */ +/* of the matrix L of the factorization of T. */ + +/* TOL (input) DOUBLE PRECISION */ +/* On entry, a relative tolerance used to indicate whether or */ +/* not the matrix (T - lambda*I) is nearly singular. TOL should */ +/* normally be chose as approximately the largest relative error */ +/* in the elements of T. For example, if the elements of T are */ +/* correct to about 4 significant figures, then TOL should be */ +/* set to about 5*10**(-4). If TOL is supplied as less than eps, */ +/* where eps is the relative machine precision, then the value */ +/* eps is used in place of TOL. */ + +/* D (output) DOUBLE PRECISION array, dimension (N-2) */ +/* On exit, D is overwritten by the (n-2) second super-diagonal */ +/* elements of the matrix U of the factorization of T. */ + +/* IN (output) INTEGER array, dimension (N) */ +/* On exit, IN contains details of the permutation matrix P. If */ +/* an interchange occurred at the kth step of the elimination, */ +/* then IN(k) = 1, otherwise IN(k) = 0. The element IN(n) */ +/* returns the smallest positive integer j such that */ + +/* abs( u(j,j) ).le. norm( (T - lambda*I)(j) )*TOL, */ + +/* where norm( A(j) ) denotes the sum of the absolute values of */ +/* the jth row of the matrix A. If no such j exists then IN(n) */ +/* is returned as zero. If IN(n) is returned as positive, then a */ +/* diagonal element of U is small, indicating that */ +/* (T - lambda*I) is singular or nearly singular, */ + +/* INFO (output) INTEGER */ +/* = 0 : successful exit */ +/* .lt. 0: if INFO = -k, the kth argument had an illegal value */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + --in; + --d__; + --c__; + --b; + --a; + + /* Function Body */ + *info = 0; + if (*n < 0) { + *info = -1; + i__1 = -(*info); + _starpu_xerbla_("DLAGTF", &i__1); + return 0; + } + + if (*n == 0) { + return 0; + } + + a[1] -= *lambda; + in[*n] = 0; + if (*n == 1) { + if (a[1] == 0.) { + in[1] = 1; + } + return 0; + } + + eps = _starpu_dlamch_("Epsilon"); + + tl = max(*tol,eps); + scale1 = abs(a[1]) + abs(b[1]); + i__1 = *n - 1; + for (k = 1; k <= i__1; ++k) { + a[k + 1] -= *lambda; + scale2 = (d__1 = c__[k], abs(d__1)) + (d__2 = a[k + 1], abs(d__2)); + if (k < *n - 1) { + scale2 += (d__1 = b[k + 1], abs(d__1)); + } + if (a[k] == 0.) { + piv1 = 0.; + } else { + piv1 = (d__1 = a[k], abs(d__1)) / scale1; + } + if (c__[k] == 0.) { + in[k] = 0; + piv2 = 0.; + scale1 = scale2; + if (k < *n - 1) { + d__[k] = 0.; + } + } else { + piv2 = (d__1 = c__[k], abs(d__1)) / scale2; + if (piv2 <= piv1) { + in[k] = 0; + scale1 = scale2; + c__[k] /= a[k]; + a[k + 1] -= c__[k] * b[k]; + if (k < *n - 1) { + d__[k] = 0.; + } + } else { + in[k] = 1; + mult = a[k] / c__[k]; + a[k] = c__[k]; + temp = a[k + 1]; + a[k + 1] = b[k] - mult * temp; + if (k < *n - 1) { + d__[k] = b[k + 1]; + b[k + 1] = -mult * d__[k]; + } + b[k] = temp; + c__[k] = mult; + } + } + if (max(piv1,piv2) <= tl && in[*n] == 0) { + in[*n] = k; + } +/* L10: */ + } + if ((d__1 = a[*n], abs(d__1)) <= scale1 * tl && in[*n] == 0) { + in[*n] = *n; + } + + return 0; + +/* End of DLAGTF */ + +} /* _starpu_dlagtf_ */ diff --git a/min-dgels/base/SRC/dlagtm.c b/min-dgels/base/SRC/dlagtm.c new file mode 100644 index 0000000..21ed280 --- /dev/null +++ b/min-dgels/base/SRC/dlagtm.c @@ -0,0 +1,254 @@ +/* dlagtm.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dlagtm_(char *trans, integer *n, integer *nrhs, + doublereal *alpha, doublereal *dl, doublereal *d__, doublereal *du, + doublereal *x, integer *ldx, doublereal *beta, doublereal *b, integer + *ldb) +{ + /* System generated locals */ + integer b_dim1, b_offset, x_dim1, x_offset, i__1, i__2; + + /* Local variables */ + integer i__, j; + extern logical _starpu_lsame_(char *, char *); + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLAGTM performs a matrix-vector product of the form */ + +/* B := alpha * A * X + beta * B */ + +/* where A is a tridiagonal matrix of order N, B and X are N by NRHS */ +/* matrices, and alpha and beta are real scalars, each of which may be */ +/* 0., 1., or -1. */ + +/* Arguments */ +/* ========= */ + +/* TRANS (input) CHARACTER*1 */ +/* Specifies the operation applied to A. */ +/* = 'N': No transpose, B := alpha * A * X + beta * B */ +/* = 'T': Transpose, B := alpha * A'* X + beta * B */ +/* = 'C': Conjugate transpose = Transpose */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* NRHS (input) INTEGER */ +/* The number of right hand sides, i.e., the number of columns */ +/* of the matrices X and B. */ + +/* ALPHA (input) DOUBLE PRECISION */ +/* The scalar alpha. ALPHA must be 0., 1., or -1.; otherwise, */ +/* it is assumed to be 0. */ + +/* DL (input) DOUBLE PRECISION array, dimension (N-1) */ +/* The (n-1) sub-diagonal elements of T. */ + +/* D (input) DOUBLE PRECISION array, dimension (N) */ +/* The diagonal elements of T. */ + +/* DU (input) DOUBLE PRECISION array, dimension (N-1) */ +/* The (n-1) super-diagonal elements of T. */ + +/* X (input) DOUBLE PRECISION array, dimension (LDX,NRHS) */ +/* The N by NRHS matrix X. */ +/* LDX (input) INTEGER */ +/* The leading dimension of the array X. LDX >= max(N,1). */ + +/* BETA (input) DOUBLE PRECISION */ +/* The scalar beta. BETA must be 0., 1., or -1.; otherwise, */ +/* it is assumed to be 1. */ + +/* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* On entry, the N by NRHS matrix B. */ +/* On exit, B is overwritten by the matrix expression */ +/* B := alpha * A * X + beta * B. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the array B. LDB >= max(N,1). */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + --dl; + --d__; + --du; + x_dim1 = *ldx; + x_offset = 1 + x_dim1; + x -= x_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + if (*n == 0) { + return 0; + } + +/* Multiply B by BETA if BETA.NE.1. */ + + if (*beta == 0.) { + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = 0.; +/* L10: */ + } +/* L20: */ + } + } else if (*beta == -1.) { + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = -b[i__ + j * b_dim1]; +/* L30: */ + } +/* L40: */ + } + } + + if (*alpha == 1.) { + if (_starpu_lsame_(trans, "N")) { + +/* Compute B := B + A*X */ + + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + if (*n == 1) { + b[j * b_dim1 + 1] += d__[1] * x[j * x_dim1 + 1]; + } else { + b[j * b_dim1 + 1] = b[j * b_dim1 + 1] + d__[1] * x[j * + x_dim1 + 1] + du[1] * x[j * x_dim1 + 2]; + b[*n + j * b_dim1] = b[*n + j * b_dim1] + dl[*n - 1] * x[* + n - 1 + j * x_dim1] + d__[*n] * x[*n + j * x_dim1] + ; + i__2 = *n - 1; + for (i__ = 2; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = b[i__ + j * b_dim1] + dl[i__ - + 1] * x[i__ - 1 + j * x_dim1] + d__[i__] * x[ + i__ + j * x_dim1] + du[i__] * x[i__ + 1 + j * + x_dim1]; +/* L50: */ + } + } +/* L60: */ + } + } else { + +/* Compute B := B + A'*X */ + + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + if (*n == 1) { + b[j * b_dim1 + 1] += d__[1] * x[j * x_dim1 + 1]; + } else { + b[j * b_dim1 + 1] = b[j * b_dim1 + 1] + d__[1] * x[j * + x_dim1 + 1] + dl[1] * x[j * x_dim1 + 2]; + b[*n + j * b_dim1] = b[*n + j * b_dim1] + du[*n - 1] * x[* + n - 1 + j * x_dim1] + d__[*n] * x[*n + j * x_dim1] + ; + i__2 = *n - 1; + for (i__ = 2; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = b[i__ + j * b_dim1] + du[i__ - + 1] * x[i__ - 1 + j * x_dim1] + d__[i__] * x[ + i__ + j * x_dim1] + dl[i__] * x[i__ + 1 + j * + x_dim1]; +/* L70: */ + } + } +/* L80: */ + } + } + } else if (*alpha == -1.) { + if (_starpu_lsame_(trans, "N")) { + +/* Compute B := B - A*X */ + + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + if (*n == 1) { + b[j * b_dim1 + 1] -= d__[1] * x[j * x_dim1 + 1]; + } else { + b[j * b_dim1 + 1] = b[j * b_dim1 + 1] - d__[1] * x[j * + x_dim1 + 1] - du[1] * x[j * x_dim1 + 2]; + b[*n + j * b_dim1] = b[*n + j * b_dim1] - dl[*n - 1] * x[* + n - 1 + j * x_dim1] - d__[*n] * x[*n + j * x_dim1] + ; + i__2 = *n - 1; + for (i__ = 2; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = b[i__ + j * b_dim1] - dl[i__ - + 1] * x[i__ - 1 + j * x_dim1] - d__[i__] * x[ + i__ + j * x_dim1] - du[i__] * x[i__ + 1 + j * + x_dim1]; +/* L90: */ + } + } +/* L100: */ + } + } else { + +/* Compute B := B - A'*X */ + + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + if (*n == 1) { + b[j * b_dim1 + 1] -= d__[1] * x[j * x_dim1 + 1]; + } else { + b[j * b_dim1 + 1] = b[j * b_dim1 + 1] - d__[1] * x[j * + x_dim1 + 1] - dl[1] * x[j * x_dim1 + 2]; + b[*n + j * b_dim1] = b[*n + j * b_dim1] - du[*n - 1] * x[* + n - 1 + j * x_dim1] - d__[*n] * x[*n + j * x_dim1] + ; + i__2 = *n - 1; + for (i__ = 2; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = b[i__ + j * b_dim1] - du[i__ - + 1] * x[i__ - 1 + j * x_dim1] - d__[i__] * x[ + i__ + j * x_dim1] - dl[i__] * x[i__ + 1 + j * + x_dim1]; +/* L110: */ + } + } +/* L120: */ + } + } + } + return 0; + +/* End of DLAGTM */ + +} /* _starpu_dlagtm_ */ diff --git a/min-dgels/base/SRC/dlagts.c b/min-dgels/base/SRC/dlagts.c new file mode 100644 index 0000000..931623c --- /dev/null +++ b/min-dgels/base/SRC/dlagts.c @@ -0,0 +1,351 @@ +/* dlagts.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dlagts_(integer *job, integer *n, doublereal *a, + doublereal *b, doublereal *c__, doublereal *d__, integer *in, + doublereal *y, doublereal *tol, integer *info) +{ + /* System generated locals */ + integer i__1; + doublereal d__1, d__2, d__3, d__4, d__5; + + /* Builtin functions */ + double d_sign(doublereal *, doublereal *); + + /* Local variables */ + integer k; + doublereal ak, eps, temp, pert, absak, sfmin; + extern doublereal _starpu_dlamch_(char *); + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + doublereal bignum; + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLAGTS may be used to solve one of the systems of equations */ + +/* (T - lambda*I)*x = y or (T - lambda*I)'*x = y, */ + +/* where T is an n by n tridiagonal matrix, for x, following the */ +/* factorization of (T - lambda*I) as */ + +/* (T - lambda*I) = P*L*U , */ + +/* by routine DLAGTF. The choice of equation to be solved is */ +/* controlled by the argument JOB, and in each case there is an option */ +/* to perturb zero or very small diagonal elements of U, this option */ +/* being intended for use in applications such as inverse iteration. */ + +/* Arguments */ +/* ========= */ + +/* JOB (input) INTEGER */ +/* Specifies the job to be performed by DLAGTS as follows: */ +/* = 1: The equations (T - lambda*I)x = y are to be solved, */ +/* but diagonal elements of U are not to be perturbed. */ +/* = -1: The equations (T - lambda*I)x = y are to be solved */ +/* and, if overflow would otherwise occur, the diagonal */ +/* elements of U are to be perturbed. See argument TOL */ +/* below. */ +/* = 2: The equations (T - lambda*I)'x = y are to be solved, */ +/* but diagonal elements of U are not to be perturbed. */ +/* = -2: The equations (T - lambda*I)'x = y are to be solved */ +/* and, if overflow would otherwise occur, the diagonal */ +/* elements of U are to be perturbed. See argument TOL */ +/* below. */ + +/* N (input) INTEGER */ +/* The order of the matrix T. */ + +/* A (input) DOUBLE PRECISION array, dimension (N) */ +/* On entry, A must contain the diagonal elements of U as */ +/* returned from DLAGTF. */ + +/* B (input) DOUBLE PRECISION array, dimension (N-1) */ +/* On entry, B must contain the first super-diagonal elements of */ +/* U as returned from DLAGTF. */ + +/* C (input) DOUBLE PRECISION array, dimension (N-1) */ +/* On entry, C must contain the sub-diagonal elements of L as */ +/* returned from DLAGTF. */ + +/* D (input) DOUBLE PRECISION array, dimension (N-2) */ +/* On entry, D must contain the second super-diagonal elements */ +/* of U as returned from DLAGTF. */ + +/* IN (input) INTEGER array, dimension (N) */ +/* On entry, IN must contain details of the matrix P as returned */ +/* from DLAGTF. */ + +/* Y (input/output) DOUBLE PRECISION array, dimension (N) */ +/* On entry, the right hand side vector y. */ +/* On exit, Y is overwritten by the solution vector x. */ + +/* TOL (input/output) DOUBLE PRECISION */ +/* On entry, with JOB .lt. 0, TOL should be the minimum */ +/* perturbation to be made to very small diagonal elements of U. */ +/* TOL should normally be chosen as about eps*norm(U), where eps */ +/* is the relative machine precision, but if TOL is supplied as */ +/* non-positive, then it is reset to eps*max( abs( u(i,j) ) ). */ +/* If JOB .gt. 0 then TOL is not referenced. */ + +/* On exit, TOL is changed as described above, only if TOL is */ +/* non-positive on entry. Otherwise TOL is unchanged. */ + +/* INFO (output) INTEGER */ +/* = 0 : successful exit */ +/* .lt. 0: if INFO = -i, the i-th argument had an illegal value */ +/* .gt. 0: overflow would occur when computing the INFO(th) */ +/* element of the solution vector x. This can only occur */ +/* when JOB is supplied as positive and either means */ +/* that a diagonal element of U is very small, or that */ +/* the elements of the right-hand side vector y are very */ +/* large. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + --y; + --in; + --d__; + --c__; + --b; + --a; + + /* Function Body */ + *info = 0; + if (abs(*job) > 2 || *job == 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DLAGTS", &i__1); + return 0; + } + + if (*n == 0) { + return 0; + } + + eps = _starpu_dlamch_("Epsilon"); + sfmin = _starpu_dlamch_("Safe minimum"); + bignum = 1. / sfmin; + + if (*job < 0) { + if (*tol <= 0.) { + *tol = abs(a[1]); + if (*n > 1) { +/* Computing MAX */ + d__1 = *tol, d__2 = abs(a[2]), d__1 = max(d__1,d__2), d__2 = + abs(b[1]); + *tol = max(d__1,d__2); + } + i__1 = *n; + for (k = 3; k <= i__1; ++k) { +/* Computing MAX */ + d__4 = *tol, d__5 = (d__1 = a[k], abs(d__1)), d__4 = max(d__4, + d__5), d__5 = (d__2 = b[k - 1], abs(d__2)), d__4 = + max(d__4,d__5), d__5 = (d__3 = d__[k - 2], abs(d__3)); + *tol = max(d__4,d__5); +/* L10: */ + } + *tol *= eps; + if (*tol == 0.) { + *tol = eps; + } + } + } + + if (abs(*job) == 1) { + i__1 = *n; + for (k = 2; k <= i__1; ++k) { + if (in[k - 1] == 0) { + y[k] -= c__[k - 1] * y[k - 1]; + } else { + temp = y[k - 1]; + y[k - 1] = y[k]; + y[k] = temp - c__[k - 1] * y[k]; + } +/* L20: */ + } + if (*job == 1) { + for (k = *n; k >= 1; --k) { + if (k <= *n - 2) { + temp = y[k] - b[k] * y[k + 1] - d__[k] * y[k + 2]; + } else if (k == *n - 1) { + temp = y[k] - b[k] * y[k + 1]; + } else { + temp = y[k]; + } + ak = a[k]; + absak = abs(ak); + if (absak < 1.) { + if (absak < sfmin) { + if (absak == 0. || abs(temp) * sfmin > absak) { + *info = k; + return 0; + } else { + temp *= bignum; + ak *= bignum; + } + } else if (abs(temp) > absak * bignum) { + *info = k; + return 0; + } + } + y[k] = temp / ak; +/* L30: */ + } + } else { + for (k = *n; k >= 1; --k) { + if (k <= *n - 2) { + temp = y[k] - b[k] * y[k + 1] - d__[k] * y[k + 2]; + } else if (k == *n - 1) { + temp = y[k] - b[k] * y[k + 1]; + } else { + temp = y[k]; + } + ak = a[k]; + pert = d_sign(tol, &ak); +L40: + absak = abs(ak); + if (absak < 1.) { + if (absak < sfmin) { + if (absak == 0. || abs(temp) * sfmin > absak) { + ak += pert; + pert *= 2; + goto L40; + } else { + temp *= bignum; + ak *= bignum; + } + } else if (abs(temp) > absak * bignum) { + ak += pert; + pert *= 2; + goto L40; + } + } + y[k] = temp / ak; +/* L50: */ + } + } + } else { + +/* Come to here if JOB = 2 or -2 */ + + if (*job == 2) { + i__1 = *n; + for (k = 1; k <= i__1; ++k) { + if (k >= 3) { + temp = y[k] - b[k - 1] * y[k - 1] - d__[k - 2] * y[k - 2]; + } else if (k == 2) { + temp = y[k] - b[k - 1] * y[k - 1]; + } else { + temp = y[k]; + } + ak = a[k]; + absak = abs(ak); + if (absak < 1.) { + if (absak < sfmin) { + if (absak == 0. || abs(temp) * sfmin > absak) { + *info = k; + return 0; + } else { + temp *= bignum; + ak *= bignum; + } + } else if (abs(temp) > absak * bignum) { + *info = k; + return 0; + } + } + y[k] = temp / ak; +/* L60: */ + } + } else { + i__1 = *n; + for (k = 1; k <= i__1; ++k) { + if (k >= 3) { + temp = y[k] - b[k - 1] * y[k - 1] - d__[k - 2] * y[k - 2]; + } else if (k == 2) { + temp = y[k] - b[k - 1] * y[k - 1]; + } else { + temp = y[k]; + } + ak = a[k]; + pert = d_sign(tol, &ak); +L70: + absak = abs(ak); + if (absak < 1.) { + if (absak < sfmin) { + if (absak == 0. || abs(temp) * sfmin > absak) { + ak += pert; + pert *= 2; + goto L70; + } else { + temp *= bignum; + ak *= bignum; + } + } else if (abs(temp) > absak * bignum) { + ak += pert; + pert *= 2; + goto L70; + } + } + y[k] = temp / ak; +/* L80: */ + } + } + + for (k = *n; k >= 2; --k) { + if (in[k - 1] == 0) { + y[k - 1] -= c__[k - 1] * y[k]; + } else { + temp = y[k - 1]; + y[k - 1] = y[k]; + y[k] = temp - c__[k - 1] * y[k]; + } +/* L90: */ + } + } + +/* End of DLAGTS */ + + return 0; +} /* _starpu_dlagts_ */ diff --git a/min-dgels/base/SRC/dlagv2.c b/min-dgels/base/SRC/dlagv2.c new file mode 100644 index 0000000..e200286 --- /dev/null +++ b/min-dgels/base/SRC/dlagv2.c @@ -0,0 +1,351 @@ +/* dlagv2.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__2 = 2; +static integer c__1 = 1; + +/* Subroutine */ int _starpu_dlagv2_(doublereal *a, integer *lda, doublereal *b, + integer *ldb, doublereal *alphar, doublereal *alphai, doublereal * + beta, doublereal *csl, doublereal *snl, doublereal *csr, doublereal * + snr) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset; + doublereal d__1, d__2, d__3, d__4, d__5, d__6; + + /* Local variables */ + doublereal r__, t, h1, h2, h3, wi, qq, rr, wr1, wr2, ulp; + extern /* Subroutine */ int _starpu_drot_(integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, doublereal *), _starpu_dlag2_( + doublereal *, integer *, doublereal *, integer *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *); + doublereal anorm, bnorm, scale1, scale2; + extern /* Subroutine */ int _starpu_dlasv2_(doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *); + extern doublereal _starpu_dlapy2_(doublereal *, doublereal *); + doublereal ascale, bscale; + extern doublereal _starpu_dlamch_(char *); + doublereal safmin; + extern /* Subroutine */ int _starpu_dlartg_(doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *); + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLAGV2 computes the Generalized Schur factorization of a real 2-by-2 */ +/* matrix pencil (A,B) where B is upper triangular. This routine */ +/* computes orthogonal (rotation) matrices given by CSL, SNL and CSR, */ +/* SNR such that */ + +/* 1) if the pencil (A,B) has two real eigenvalues (include 0/0 or 1/0 */ +/* types), then */ + +/* [ a11 a12 ] := [ CSL SNL ] [ a11 a12 ] [ CSR -SNR ] */ +/* [ 0 a22 ] [ -SNL CSL ] [ a21 a22 ] [ SNR CSR ] */ + +/* [ b11 b12 ] := [ CSL SNL ] [ b11 b12 ] [ CSR -SNR ] */ +/* [ 0 b22 ] [ -SNL CSL ] [ 0 b22 ] [ SNR CSR ], */ + +/* 2) if the pencil (A,B) has a pair of complex conjugate eigenvalues, */ +/* then */ + +/* [ a11 a12 ] := [ CSL SNL ] [ a11 a12 ] [ CSR -SNR ] */ +/* [ a21 a22 ] [ -SNL CSL ] [ a21 a22 ] [ SNR CSR ] */ + +/* [ b11 0 ] := [ CSL SNL ] [ b11 b12 ] [ CSR -SNR ] */ +/* [ 0 b22 ] [ -SNL CSL ] [ 0 b22 ] [ SNR CSR ] */ + +/* where b11 >= b22 > 0. */ + + +/* Arguments */ +/* ========= */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA, 2) */ +/* On entry, the 2 x 2 matrix A. */ +/* On exit, A is overwritten by the ``A-part'' of the */ +/* generalized Schur form. */ + +/* LDA (input) INTEGER */ +/* THe leading dimension of the array A. LDA >= 2. */ + +/* B (input/output) DOUBLE PRECISION array, dimension (LDB, 2) */ +/* On entry, the upper triangular 2 x 2 matrix B. */ +/* On exit, B is overwritten by the ``B-part'' of the */ +/* generalized Schur form. */ + +/* LDB (input) INTEGER */ +/* THe leading dimension of the array B. LDB >= 2. */ + +/* ALPHAR (output) DOUBLE PRECISION array, dimension (2) */ +/* ALPHAI (output) DOUBLE PRECISION array, dimension (2) */ +/* BETA (output) DOUBLE PRECISION array, dimension (2) */ +/* (ALPHAR(k)+i*ALPHAI(k))/BETA(k) are the eigenvalues of the */ +/* pencil (A,B), k=1,2, i = sqrt(-1). Note that BETA(k) may */ +/* be zero. */ + +/* CSL (output) DOUBLE PRECISION */ +/* The cosine of the left rotation matrix. */ + +/* SNL (output) DOUBLE PRECISION */ +/* The sine of the left rotation matrix. */ + +/* CSR (output) DOUBLE PRECISION */ +/* The cosine of the right rotation matrix. */ + +/* SNR (output) DOUBLE PRECISION */ +/* The sine of the right rotation matrix. */ + +/* Further Details */ +/* =============== */ + +/* Based on contributions by */ +/* Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + --alphar; + --alphai; + --beta; + + /* Function Body */ + safmin = _starpu_dlamch_("S"); + ulp = _starpu_dlamch_("P"); + +/* Scale A */ + +/* Computing MAX */ + d__5 = (d__1 = a[a_dim1 + 1], abs(d__1)) + (d__2 = a[a_dim1 + 2], abs( + d__2)), d__6 = (d__3 = a[(a_dim1 << 1) + 1], abs(d__3)) + (d__4 = + a[(a_dim1 << 1) + 2], abs(d__4)), d__5 = max(d__5,d__6); + anorm = max(d__5,safmin); + ascale = 1. / anorm; + a[a_dim1 + 1] = ascale * a[a_dim1 + 1]; + a[(a_dim1 << 1) + 1] = ascale * a[(a_dim1 << 1) + 1]; + a[a_dim1 + 2] = ascale * a[a_dim1 + 2]; + a[(a_dim1 << 1) + 2] = ascale * a[(a_dim1 << 1) + 2]; + +/* Scale B */ + +/* Computing MAX */ + d__4 = (d__3 = b[b_dim1 + 1], abs(d__3)), d__5 = (d__1 = b[(b_dim1 << 1) + + 1], abs(d__1)) + (d__2 = b[(b_dim1 << 1) + 2], abs(d__2)), d__4 + = max(d__4,d__5); + bnorm = max(d__4,safmin); + bscale = 1. / bnorm; + b[b_dim1 + 1] = bscale * b[b_dim1 + 1]; + b[(b_dim1 << 1) + 1] = bscale * b[(b_dim1 << 1) + 1]; + b[(b_dim1 << 1) + 2] = bscale * b[(b_dim1 << 1) + 2]; + +/* Check if A can be deflated */ + + if ((d__1 = a[a_dim1 + 2], abs(d__1)) <= ulp) { + *csl = 1.; + *snl = 0.; + *csr = 1.; + *snr = 0.; + a[a_dim1 + 2] = 0.; + b[b_dim1 + 2] = 0.; + +/* Check if B is singular */ + + } else if ((d__1 = b[b_dim1 + 1], abs(d__1)) <= ulp) { + _starpu_dlartg_(&a[a_dim1 + 1], &a[a_dim1 + 2], csl, snl, &r__); + *csr = 1.; + *snr = 0.; + _starpu_drot_(&c__2, &a[a_dim1 + 1], lda, &a[a_dim1 + 2], lda, csl, snl); + _starpu_drot_(&c__2, &b[b_dim1 + 1], ldb, &b[b_dim1 + 2], ldb, csl, snl); + a[a_dim1 + 2] = 0.; + b[b_dim1 + 1] = 0.; + b[b_dim1 + 2] = 0.; + + } else if ((d__1 = b[(b_dim1 << 1) + 2], abs(d__1)) <= ulp) { + _starpu_dlartg_(&a[(a_dim1 << 1) + 2], &a[a_dim1 + 2], csr, snr, &t); + *snr = -(*snr); + _starpu_drot_(&c__2, &a[a_dim1 + 1], &c__1, &a[(a_dim1 << 1) + 1], &c__1, csr, + snr); + _starpu_drot_(&c__2, &b[b_dim1 + 1], &c__1, &b[(b_dim1 << 1) + 1], &c__1, csr, + snr); + *csl = 1.; + *snl = 0.; + a[a_dim1 + 2] = 0.; + b[b_dim1 + 2] = 0.; + b[(b_dim1 << 1) + 2] = 0.; + + } else { + +/* B is nonsingular, first compute the eigenvalues of (A,B) */ + + _starpu_dlag2_(&a[a_offset], lda, &b[b_offset], ldb, &safmin, &scale1, & + scale2, &wr1, &wr2, &wi); + + if (wi == 0.) { + +/* two real eigenvalues, compute s*A-w*B */ + + h1 = scale1 * a[a_dim1 + 1] - wr1 * b[b_dim1 + 1]; + h2 = scale1 * a[(a_dim1 << 1) + 1] - wr1 * b[(b_dim1 << 1) + 1]; + h3 = scale1 * a[(a_dim1 << 1) + 2] - wr1 * b[(b_dim1 << 1) + 2]; + + rr = _starpu_dlapy2_(&h1, &h2); + d__1 = scale1 * a[a_dim1 + 2]; + qq = _starpu_dlapy2_(&d__1, &h3); + + if (rr > qq) { + +/* find right rotation matrix to zero 1,1 element of */ +/* (sA - wB) */ + + _starpu_dlartg_(&h2, &h1, csr, snr, &t); + + } else { + +/* find right rotation matrix to zero 2,1 element of */ +/* (sA - wB) */ + + d__1 = scale1 * a[a_dim1 + 2]; + _starpu_dlartg_(&h3, &d__1, csr, snr, &t); + + } + + *snr = -(*snr); + _starpu_drot_(&c__2, &a[a_dim1 + 1], &c__1, &a[(a_dim1 << 1) + 1], &c__1, + csr, snr); + _starpu_drot_(&c__2, &b[b_dim1 + 1], &c__1, &b[(b_dim1 << 1) + 1], &c__1, + csr, snr); + +/* compute inf norms of A and B */ + +/* Computing MAX */ + d__5 = (d__1 = a[a_dim1 + 1], abs(d__1)) + (d__2 = a[(a_dim1 << 1) + + 1], abs(d__2)), d__6 = (d__3 = a[a_dim1 + 2], abs(d__3) + ) + (d__4 = a[(a_dim1 << 1) + 2], abs(d__4)); + h1 = max(d__5,d__6); +/* Computing MAX */ + d__5 = (d__1 = b[b_dim1 + 1], abs(d__1)) + (d__2 = b[(b_dim1 << 1) + + 1], abs(d__2)), d__6 = (d__3 = b[b_dim1 + 2], abs(d__3) + ) + (d__4 = b[(b_dim1 << 1) + 2], abs(d__4)); + h2 = max(d__5,d__6); + + if (scale1 * h1 >= abs(wr1) * h2) { + +/* find left rotation matrix Q to zero out B(2,1) */ + + _starpu_dlartg_(&b[b_dim1 + 1], &b[b_dim1 + 2], csl, snl, &r__); + + } else { + +/* find left rotation matrix Q to zero out A(2,1) */ + + _starpu_dlartg_(&a[a_dim1 + 1], &a[a_dim1 + 2], csl, snl, &r__); + + } + + _starpu_drot_(&c__2, &a[a_dim1 + 1], lda, &a[a_dim1 + 2], lda, csl, snl); + _starpu_drot_(&c__2, &b[b_dim1 + 1], ldb, &b[b_dim1 + 2], ldb, csl, snl); + + a[a_dim1 + 2] = 0.; + b[b_dim1 + 2] = 0.; + + } else { + +/* a pair of complex conjugate eigenvalues */ +/* first compute the SVD of the matrix B */ + + _starpu_dlasv2_(&b[b_dim1 + 1], &b[(b_dim1 << 1) + 1], &b[(b_dim1 << 1) + + 2], &r__, &t, snr, csr, snl, csl); + +/* Form (A,B) := Q(A,B)Z' where Q is left rotation matrix and */ +/* Z is right rotation matrix computed from DLASV2 */ + + _starpu_drot_(&c__2, &a[a_dim1 + 1], lda, &a[a_dim1 + 2], lda, csl, snl); + _starpu_drot_(&c__2, &b[b_dim1 + 1], ldb, &b[b_dim1 + 2], ldb, csl, snl); + _starpu_drot_(&c__2, &a[a_dim1 + 1], &c__1, &a[(a_dim1 << 1) + 1], &c__1, + csr, snr); + _starpu_drot_(&c__2, &b[b_dim1 + 1], &c__1, &b[(b_dim1 << 1) + 1], &c__1, + csr, snr); + + b[b_dim1 + 2] = 0.; + b[(b_dim1 << 1) + 1] = 0.; + + } + + } + +/* Unscaling */ + + a[a_dim1 + 1] = anorm * a[a_dim1 + 1]; + a[a_dim1 + 2] = anorm * a[a_dim1 + 2]; + a[(a_dim1 << 1) + 1] = anorm * a[(a_dim1 << 1) + 1]; + a[(a_dim1 << 1) + 2] = anorm * a[(a_dim1 << 1) + 2]; + b[b_dim1 + 1] = bnorm * b[b_dim1 + 1]; + b[b_dim1 + 2] = bnorm * b[b_dim1 + 2]; + b[(b_dim1 << 1) + 1] = bnorm * b[(b_dim1 << 1) + 1]; + b[(b_dim1 << 1) + 2] = bnorm * b[(b_dim1 << 1) + 2]; + + if (wi == 0.) { + alphar[1] = a[a_dim1 + 1]; + alphar[2] = a[(a_dim1 << 1) + 2]; + alphai[1] = 0.; + alphai[2] = 0.; + beta[1] = b[b_dim1 + 1]; + beta[2] = b[(b_dim1 << 1) + 2]; + } else { + alphar[1] = anorm * wr1 / scale1 / bnorm; + alphai[1] = anorm * wi / scale1 / bnorm; + alphar[2] = alphar[1]; + alphai[2] = -alphai[1]; + beta[1] = 1.; + beta[2] = 1.; + } + + return 0; + +/* End of DLAGV2 */ + +} /* _starpu_dlagv2_ */ diff --git a/min-dgels/base/SRC/dlahqr.c b/min-dgels/base/SRC/dlahqr.c new file mode 100644 index 0000000..244642f --- /dev/null +++ b/min-dgels/base/SRC/dlahqr.c @@ -0,0 +1,631 @@ +/* dlahqr.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +/* Subroutine */ int _starpu_dlahqr_(logical *wantt, logical *wantz, integer *n, + integer *ilo, integer *ihi, doublereal *h__, integer *ldh, doublereal + *wr, doublereal *wi, integer *iloz, integer *ihiz, doublereal *z__, + integer *ldz, integer *info) +{ + /* System generated locals */ + integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3; + doublereal d__1, d__2, d__3, d__4; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + integer i__, j, k, l, m; + doublereal s, v[3]; + integer i1, i2; + doublereal t1, t2, t3, v2, v3, aa, ab, ba, bb, h11, h12, h21, h22, cs; + integer nh; + doublereal sn; + integer nr; + doublereal tr; + integer nz; + doublereal det, h21s; + integer its; + doublereal ulp, sum, tst, rt1i, rt2i, rt1r, rt2r; + extern /* Subroutine */ int _starpu_drot_(integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, doublereal *), _starpu_dcopy_( + integer *, doublereal *, integer *, doublereal *, integer *), + _starpu_dlanv2_(doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *), _starpu_dlabad_(doublereal *, doublereal *); + extern doublereal _starpu_dlamch_(char *); + extern /* Subroutine */ int _starpu_dlarfg_(integer *, doublereal *, doublereal *, + integer *, doublereal *); + doublereal safmin, safmax, rtdisc, smlnum; + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLAHQR is an auxiliary routine called by DHSEQR to update the */ +/* eigenvalues and Schur decomposition already computed by DHSEQR, by */ +/* dealing with the Hessenberg submatrix in rows and columns ILO to */ +/* IHI. */ + +/* Arguments */ +/* ========= */ + +/* WANTT (input) LOGICAL */ +/* = .TRUE. : the full Schur form T is required; */ +/* = .FALSE.: only eigenvalues are required. */ + +/* WANTZ (input) LOGICAL */ +/* = .TRUE. : the matrix of Schur vectors Z is required; */ +/* = .FALSE.: Schur vectors are not required. */ + +/* N (input) INTEGER */ +/* The order of the matrix H. N >= 0. */ + +/* ILO (input) INTEGER */ +/* IHI (input) INTEGER */ +/* It is assumed that H is already upper quasi-triangular in */ +/* rows and columns IHI+1:N, and that H(ILO,ILO-1) = 0 (unless */ +/* ILO = 1). DLAHQR works primarily with the Hessenberg */ +/* submatrix in rows and columns ILO to IHI, but applies */ +/* transformations to all of H if WANTT is .TRUE.. */ +/* 1 <= ILO <= max(1,IHI); IHI <= N. */ + +/* H (input/output) DOUBLE PRECISION array, dimension (LDH,N) */ +/* On entry, the upper Hessenberg matrix H. */ +/* On exit, if INFO is zero and if WANTT is .TRUE., H is upper */ +/* quasi-triangular in rows and columns ILO:IHI, with any */ +/* 2-by-2 diagonal blocks in standard form. If INFO is zero */ +/* and WANTT is .FALSE., the contents of H are unspecified on */ +/* exit. The output state of H if INFO is nonzero is given */ +/* below under the description of INFO. */ + +/* LDH (input) INTEGER */ +/* The leading dimension of the array H. LDH >= max(1,N). */ + +/* WR (output) DOUBLE PRECISION array, dimension (N) */ +/* WI (output) DOUBLE PRECISION array, dimension (N) */ +/* The real and imaginary parts, respectively, of the computed */ +/* eigenvalues ILO to IHI are stored in the corresponding */ +/* elements of WR and WI. If two eigenvalues are computed as a */ +/* complex conjugate pair, they are stored in consecutive */ +/* elements of WR and WI, say the i-th and (i+1)th, with */ +/* WI(i) > 0 and WI(i+1) < 0. If WANTT is .TRUE., the */ +/* eigenvalues are stored in the same order as on the diagonal */ +/* of the Schur form returned in H, with WR(i) = H(i,i), and, if */ +/* H(i:i+1,i:i+1) is a 2-by-2 diagonal block, */ +/* WI(i) = sqrt(H(i+1,i)*H(i,i+1)) and WI(i+1) = -WI(i). */ + +/* ILOZ (input) INTEGER */ +/* IHIZ (input) INTEGER */ +/* Specify the rows of Z to which transformations must be */ +/* applied if WANTZ is .TRUE.. */ +/* 1 <= ILOZ <= ILO; IHI <= IHIZ <= N. */ + +/* Z (input/output) DOUBLE PRECISION array, dimension (LDZ,N) */ +/* If WANTZ is .TRUE., on entry Z must contain the current */ +/* matrix Z of transformations accumulated by DHSEQR, and on */ +/* exit Z has been updated; transformations are applied only to */ +/* the submatrix Z(ILOZ:IHIZ,ILO:IHI). */ +/* If WANTZ is .FALSE., Z is not referenced. */ + +/* LDZ (input) INTEGER */ +/* The leading dimension of the array Z. LDZ >= max(1,N). */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* .GT. 0: If INFO = i, DLAHQR failed to compute all the */ +/* eigenvalues ILO to IHI in a total of 30 iterations */ +/* per eigenvalue; elements i+1:ihi of WR and WI */ +/* contain those eigenvalues which have been */ +/* successfully computed. */ + +/* If INFO .GT. 0 and WANTT is .FALSE., then on exit, */ +/* the remaining unconverged eigenvalues are the */ +/* eigenvalues of the upper Hessenberg matrix rows */ +/* and columns ILO thorugh INFO of the final, output */ +/* value of H. */ + +/* If INFO .GT. 0 and WANTT is .TRUE., then on exit */ +/* (*) (initial value of H)*U = U*(final value of H) */ +/* where U is an orthognal matrix. The final */ +/* value of H is upper Hessenberg and triangular in */ +/* rows and columns INFO+1 through IHI. */ + +/* If INFO .GT. 0 and WANTZ is .TRUE., then on exit */ +/* (final value of Z) = (initial value of Z)*U */ +/* where U is the orthogonal matrix in (*) */ +/* (regardless of the value of WANTT.) */ + +/* Further Details */ +/* =============== */ + +/* 02-96 Based on modifications by */ +/* David Day, Sandia National Laboratory, USA */ + +/* 12-04 Further modifications by */ +/* Ralph Byers, University of Kansas, USA */ +/* This is a modified version of DLAHQR from LAPACK version 3.0. */ +/* It is (1) more robust against overflow and underflow and */ +/* (2) adopts the more conservative Ahues & Tisseur stopping */ +/* criterion (LAWN 122, 1997). */ + +/* ========================================================= */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + h_dim1 = *ldh; + h_offset = 1 + h_dim1; + h__ -= h_offset; + --wr; + --wi; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + + /* Function Body */ + *info = 0; + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + if (*ilo == *ihi) { + wr[*ilo] = h__[*ilo + *ilo * h_dim1]; + wi[*ilo] = 0.; + return 0; + } + +/* ==== clear out the trash ==== */ + i__1 = *ihi - 3; + for (j = *ilo; j <= i__1; ++j) { + h__[j + 2 + j * h_dim1] = 0.; + h__[j + 3 + j * h_dim1] = 0.; +/* L10: */ + } + if (*ilo <= *ihi - 2) { + h__[*ihi + (*ihi - 2) * h_dim1] = 0.; + } + + nh = *ihi - *ilo + 1; + nz = *ihiz - *iloz + 1; + +/* Set machine-dependent constants for the stopping criterion. */ + + safmin = _starpu_dlamch_("SAFE MINIMUM"); + safmax = 1. / safmin; + _starpu_dlabad_(&safmin, &safmax); + ulp = _starpu_dlamch_("PRECISION"); + smlnum = safmin * ((doublereal) nh / ulp); + +/* I1 and I2 are the indices of the first row and last column of H */ +/* to which transformations must be applied. If eigenvalues only are */ +/* being computed, I1 and I2 are set inside the main loop. */ + + if (*wantt) { + i1 = 1; + i2 = *n; + } + +/* The main loop begins here. I is the loop index and decreases from */ +/* IHI to ILO in steps of 1 or 2. Each iteration of the loop works */ +/* with the active submatrix in rows and columns L to I. */ +/* Eigenvalues I+1 to IHI have already converged. Either L = ILO or */ +/* H(L,L-1) is negligible so that the matrix splits. */ + + i__ = *ihi; +L20: + l = *ilo; + if (i__ < *ilo) { + goto L160; + } + +/* Perform QR iterations on rows and columns ILO to I until a */ +/* submatrix of order 1 or 2 splits off at the bottom because a */ +/* subdiagonal element has become negligible. */ + + for (its = 0; its <= 30; ++its) { + +/* Look for a single small subdiagonal element. */ + + i__1 = l + 1; + for (k = i__; k >= i__1; --k) { + if ((d__1 = h__[k + (k - 1) * h_dim1], abs(d__1)) <= smlnum) { + goto L40; + } + tst = (d__1 = h__[k - 1 + (k - 1) * h_dim1], abs(d__1)) + (d__2 = + h__[k + k * h_dim1], abs(d__2)); + if (tst == 0.) { + if (k - 2 >= *ilo) { + tst += (d__1 = h__[k - 1 + (k - 2) * h_dim1], abs(d__1)); + } + if (k + 1 <= *ihi) { + tst += (d__1 = h__[k + 1 + k * h_dim1], abs(d__1)); + } + } +/* ==== The following is a conservative small subdiagonal */ +/* . deflation criterion due to Ahues & Tisseur (LAWN 122, */ +/* . 1997). It has better mathematical foundation and */ +/* . improves accuracy in some cases. ==== */ + if ((d__1 = h__[k + (k - 1) * h_dim1], abs(d__1)) <= ulp * tst) { +/* Computing MAX */ + d__3 = (d__1 = h__[k + (k - 1) * h_dim1], abs(d__1)), d__4 = ( + d__2 = h__[k - 1 + k * h_dim1], abs(d__2)); + ab = max(d__3,d__4); +/* Computing MIN */ + d__3 = (d__1 = h__[k + (k - 1) * h_dim1], abs(d__1)), d__4 = ( + d__2 = h__[k - 1 + k * h_dim1], abs(d__2)); + ba = min(d__3,d__4); +/* Computing MAX */ + d__3 = (d__1 = h__[k + k * h_dim1], abs(d__1)), d__4 = (d__2 = + h__[k - 1 + (k - 1) * h_dim1] - h__[k + k * h_dim1], + abs(d__2)); + aa = max(d__3,d__4); +/* Computing MIN */ + d__3 = (d__1 = h__[k + k * h_dim1], abs(d__1)), d__4 = (d__2 = + h__[k - 1 + (k - 1) * h_dim1] - h__[k + k * h_dim1], + abs(d__2)); + bb = min(d__3,d__4); + s = aa + ab; +/* Computing MAX */ + d__1 = smlnum, d__2 = ulp * (bb * (aa / s)); + if (ba * (ab / s) <= max(d__1,d__2)) { + goto L40; + } + } +/* L30: */ + } +L40: + l = k; + if (l > *ilo) { + +/* H(L,L-1) is negligible */ + + h__[l + (l - 1) * h_dim1] = 0.; + } + +/* Exit from loop if a submatrix of order 1 or 2 has split off. */ + + if (l >= i__ - 1) { + goto L150; + } + +/* Now the active submatrix is in rows and columns L to I. If */ +/* eigenvalues only are being computed, only the active submatrix */ +/* need be transformed. */ + + if (! (*wantt)) { + i1 = l; + i2 = i__; + } + + if (its == 10) { + +/* Exceptional shift. */ + + s = (d__1 = h__[l + 1 + l * h_dim1], abs(d__1)) + (d__2 = h__[l + + 2 + (l + 1) * h_dim1], abs(d__2)); + h11 = s * .75 + h__[l + l * h_dim1]; + h12 = s * -.4375; + h21 = s; + h22 = h11; + } else if (its == 20) { + +/* Exceptional shift. */ + + s = (d__1 = h__[i__ + (i__ - 1) * h_dim1], abs(d__1)) + (d__2 = + h__[i__ - 1 + (i__ - 2) * h_dim1], abs(d__2)); + h11 = s * .75 + h__[i__ + i__ * h_dim1]; + h12 = s * -.4375; + h21 = s; + h22 = h11; + } else { + +/* Prepare to use Francis' double shift */ +/* (i.e. 2nd degree generalized Rayleigh quotient) */ + + h11 = h__[i__ - 1 + (i__ - 1) * h_dim1]; + h21 = h__[i__ + (i__ - 1) * h_dim1]; + h12 = h__[i__ - 1 + i__ * h_dim1]; + h22 = h__[i__ + i__ * h_dim1]; + } + s = abs(h11) + abs(h12) + abs(h21) + abs(h22); + if (s == 0.) { + rt1r = 0.; + rt1i = 0.; + rt2r = 0.; + rt2i = 0.; + } else { + h11 /= s; + h21 /= s; + h12 /= s; + h22 /= s; + tr = (h11 + h22) / 2.; + det = (h11 - tr) * (h22 - tr) - h12 * h21; + rtdisc = sqrt((abs(det))); + if (det >= 0.) { + +/* ==== complex conjugate shifts ==== */ + + rt1r = tr * s; + rt2r = rt1r; + rt1i = rtdisc * s; + rt2i = -rt1i; + } else { + +/* ==== real shifts (use only one of them) ==== */ + + rt1r = tr + rtdisc; + rt2r = tr - rtdisc; + if ((d__1 = rt1r - h22, abs(d__1)) <= (d__2 = rt2r - h22, abs( + d__2))) { + rt1r *= s; + rt2r = rt1r; + } else { + rt2r *= s; + rt1r = rt2r; + } + rt1i = 0.; + rt2i = 0.; + } + } + +/* Look for two consecutive small subdiagonal elements. */ + + i__1 = l; + for (m = i__ - 2; m >= i__1; --m) { +/* Determine the effect of starting the double-shift QR */ +/* iteration at row M, and see if this would make H(M,M-1) */ +/* negligible. (The following uses scaling to avoid */ +/* overflows and most underflows.) */ + + h21s = h__[m + 1 + m * h_dim1]; + s = (d__1 = h__[m + m * h_dim1] - rt2r, abs(d__1)) + abs(rt2i) + + abs(h21s); + h21s = h__[m + 1 + m * h_dim1] / s; + v[0] = h21s * h__[m + (m + 1) * h_dim1] + (h__[m + m * h_dim1] - + rt1r) * ((h__[m + m * h_dim1] - rt2r) / s) - rt1i * (rt2i + / s); + v[1] = h21s * (h__[m + m * h_dim1] + h__[m + 1 + (m + 1) * h_dim1] + - rt1r - rt2r); + v[2] = h21s * h__[m + 2 + (m + 1) * h_dim1]; + s = abs(v[0]) + abs(v[1]) + abs(v[2]); + v[0] /= s; + v[1] /= s; + v[2] /= s; + if (m == l) { + goto L60; + } + if ((d__1 = h__[m + (m - 1) * h_dim1], abs(d__1)) * (abs(v[1]) + + abs(v[2])) <= ulp * abs(v[0]) * ((d__2 = h__[m - 1 + (m - + 1) * h_dim1], abs(d__2)) + (d__3 = h__[m + m * h_dim1], + abs(d__3)) + (d__4 = h__[m + 1 + (m + 1) * h_dim1], abs( + d__4)))) { + goto L60; + } +/* L50: */ + } +L60: + +/* Double-shift QR step */ + + i__1 = i__ - 1; + for (k = m; k <= i__1; ++k) { + +/* The first iteration of this loop determines a reflection G */ +/* from the vector V and applies it from left and right to H, */ +/* thus creating a nonzero bulge below the subdiagonal. */ + +/* Each subsequent iteration determines a reflection G to */ +/* restore the Hessenberg form in the (K-1)th column, and thus */ +/* chases the bulge one step toward the bottom of the active */ +/* submatrix. NR is the order of G. */ + +/* Computing MIN */ + i__2 = 3, i__3 = i__ - k + 1; + nr = min(i__2,i__3); + if (k > m) { + _starpu_dcopy_(&nr, &h__[k + (k - 1) * h_dim1], &c__1, v, &c__1); + } + _starpu_dlarfg_(&nr, v, &v[1], &c__1, &t1); + if (k > m) { + h__[k + (k - 1) * h_dim1] = v[0]; + h__[k + 1 + (k - 1) * h_dim1] = 0.; + if (k < i__ - 1) { + h__[k + 2 + (k - 1) * h_dim1] = 0.; + } + } else if (m > l) { +/* ==== Use the following instead of */ +/* . H( K, K-1 ) = -H( K, K-1 ) to */ +/* . avoid a bug when v(2) and v(3) */ +/* . underflow. ==== */ + h__[k + (k - 1) * h_dim1] *= 1. - t1; + } + v2 = v[1]; + t2 = t1 * v2; + if (nr == 3) { + v3 = v[2]; + t3 = t1 * v3; + +/* Apply G from the left to transform the rows of the matrix */ +/* in columns K to I2. */ + + i__2 = i2; + for (j = k; j <= i__2; ++j) { + sum = h__[k + j * h_dim1] + v2 * h__[k + 1 + j * h_dim1] + + v3 * h__[k + 2 + j * h_dim1]; + h__[k + j * h_dim1] -= sum * t1; + h__[k + 1 + j * h_dim1] -= sum * t2; + h__[k + 2 + j * h_dim1] -= sum * t3; +/* L70: */ + } + +/* Apply G from the right to transform the columns of the */ +/* matrix in rows I1 to min(K+3,I). */ + +/* Computing MIN */ + i__3 = k + 3; + i__2 = min(i__3,i__); + for (j = i1; j <= i__2; ++j) { + sum = h__[j + k * h_dim1] + v2 * h__[j + (k + 1) * h_dim1] + + v3 * h__[j + (k + 2) * h_dim1]; + h__[j + k * h_dim1] -= sum * t1; + h__[j + (k + 1) * h_dim1] -= sum * t2; + h__[j + (k + 2) * h_dim1] -= sum * t3; +/* L80: */ + } + + if (*wantz) { + +/* Accumulate transformations in the matrix Z */ + + i__2 = *ihiz; + for (j = *iloz; j <= i__2; ++j) { + sum = z__[j + k * z_dim1] + v2 * z__[j + (k + 1) * + z_dim1] + v3 * z__[j + (k + 2) * z_dim1]; + z__[j + k * z_dim1] -= sum * t1; + z__[j + (k + 1) * z_dim1] -= sum * t2; + z__[j + (k + 2) * z_dim1] -= sum * t3; +/* L90: */ + } + } + } else if (nr == 2) { + +/* Apply G from the left to transform the rows of the matrix */ +/* in columns K to I2. */ + + i__2 = i2; + for (j = k; j <= i__2; ++j) { + sum = h__[k + j * h_dim1] + v2 * h__[k + 1 + j * h_dim1]; + h__[k + j * h_dim1] -= sum * t1; + h__[k + 1 + j * h_dim1] -= sum * t2; +/* L100: */ + } + +/* Apply G from the right to transform the columns of the */ +/* matrix in rows I1 to min(K+3,I). */ + + i__2 = i__; + for (j = i1; j <= i__2; ++j) { + sum = h__[j + k * h_dim1] + v2 * h__[j + (k + 1) * h_dim1] + ; + h__[j + k * h_dim1] -= sum * t1; + h__[j + (k + 1) * h_dim1] -= sum * t2; +/* L110: */ + } + + if (*wantz) { + +/* Accumulate transformations in the matrix Z */ + + i__2 = *ihiz; + for (j = *iloz; j <= i__2; ++j) { + sum = z__[j + k * z_dim1] + v2 * z__[j + (k + 1) * + z_dim1]; + z__[j + k * z_dim1] -= sum * t1; + z__[j + (k + 1) * z_dim1] -= sum * t2; +/* L120: */ + } + } + } +/* L130: */ + } + +/* L140: */ + } + +/* Failure to converge in remaining number of iterations */ + + *info = i__; + return 0; + +L150: + + if (l == i__) { + +/* H(I,I-1) is negligible: one eigenvalue has converged. */ + + wr[i__] = h__[i__ + i__ * h_dim1]; + wi[i__] = 0.; + } else if (l == i__ - 1) { + +/* H(I-1,I-2) is negligible: a pair of eigenvalues have converged. */ + +/* Transform the 2-by-2 submatrix to standard Schur form, */ +/* and compute and store the eigenvalues. */ + + _starpu_dlanv2_(&h__[i__ - 1 + (i__ - 1) * h_dim1], &h__[i__ - 1 + i__ * + h_dim1], &h__[i__ + (i__ - 1) * h_dim1], &h__[i__ + i__ * + h_dim1], &wr[i__ - 1], &wi[i__ - 1], &wr[i__], &wi[i__], &cs, + &sn); + + if (*wantt) { + +/* Apply the transformation to the rest of H. */ + + if (i2 > i__) { + i__1 = i2 - i__; + _starpu_drot_(&i__1, &h__[i__ - 1 + (i__ + 1) * h_dim1], ldh, &h__[ + i__ + (i__ + 1) * h_dim1], ldh, &cs, &sn); + } + i__1 = i__ - i1 - 1; + _starpu_drot_(&i__1, &h__[i1 + (i__ - 1) * h_dim1], &c__1, &h__[i1 + i__ * + h_dim1], &c__1, &cs, &sn); + } + if (*wantz) { + +/* Apply the transformation to Z. */ + + _starpu_drot_(&nz, &z__[*iloz + (i__ - 1) * z_dim1], &c__1, &z__[*iloz + + i__ * z_dim1], &c__1, &cs, &sn); + } + } + +/* return to start of the main loop with new value of I. */ + + i__ = l - 1; + goto L20; + +L160: + return 0; + +/* End of DLAHQR */ + +} /* _starpu_dlahqr_ */ diff --git a/min-dgels/base/SRC/dlahr2.c b/min-dgels/base/SRC/dlahr2.c new file mode 100644 index 0000000..edd3768 --- /dev/null +++ b/min-dgels/base/SRC/dlahr2.c @@ -0,0 +1,315 @@ +/* dlahr2.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static doublereal c_b4 = -1.; +static doublereal c_b5 = 1.; +static integer c__1 = 1; +static doublereal c_b38 = 0.; + +/* Subroutine */ int _starpu_dlahr2_(integer *n, integer *k, integer *nb, doublereal * + a, integer *lda, doublereal *tau, doublereal *t, integer *ldt, + doublereal *y, integer *ldy) +{ + /* System generated locals */ + integer a_dim1, a_offset, t_dim1, t_offset, y_dim1, y_offset, i__1, i__2, + i__3; + doublereal d__1; + + /* Local variables */ + integer i__; + doublereal ei; + extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, + integer *), _starpu_dgemm_(char *, char *, integer *, integer *, integer * +, doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *), _starpu_dgemv_( + char *, integer *, integer *, doublereal *, doublereal *, integer + *, doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_dcopy_(integer *, doublereal *, integer *, doublereal *, + integer *), _starpu_dtrmm_(char *, char *, char *, char *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *), _starpu_daxpy_(integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *), + _starpu_dtrmv_(char *, char *, char *, integer *, doublereal *, integer *, + doublereal *, integer *), _starpu_dlarfg_( + integer *, doublereal *, doublereal *, integer *, doublereal *), + _starpu_dlacpy_(char *, integer *, integer *, doublereal *, integer *, + doublereal *, integer *); + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLAHR2 reduces the first NB columns of A real general n-BY-(n-k+1) */ +/* matrix A so that elements below the k-th subdiagonal are zero. The */ +/* reduction is performed by an orthogonal similarity transformation */ +/* Q' * A * Q. The routine returns the matrices V and T which determine */ +/* Q as a block reflector I - V*T*V', and also the matrix Y = A * V * T. */ + +/* This is an auxiliary routine called by DGEHRD. */ + +/* Arguments */ +/* ========= */ + +/* N (input) INTEGER */ +/* The order of the matrix A. */ + +/* K (input) INTEGER */ +/* The offset for the reduction. Elements below the k-th */ +/* subdiagonal in the first NB columns are reduced to zero. */ +/* K < N. */ + +/* NB (input) INTEGER */ +/* The number of columns to be reduced. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N-K+1) */ +/* On entry, the n-by-(n-k+1) general matrix A. */ +/* On exit, the elements on and above the k-th subdiagonal in */ +/* the first NB columns are overwritten with the corresponding */ +/* elements of the reduced matrix; the elements below the k-th */ +/* subdiagonal, with the array TAU, represent the matrix Q as a */ +/* product of elementary reflectors. The other columns of A are */ +/* unchanged. See Further Details. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,N). */ + +/* TAU (output) DOUBLE PRECISION array, dimension (NB) */ +/* The scalar factors of the elementary reflectors. See Further */ +/* Details. */ + +/* T (output) DOUBLE PRECISION array, dimension (LDT,NB) */ +/* The upper triangular matrix T. */ + +/* LDT (input) INTEGER */ +/* The leading dimension of the array T. LDT >= NB. */ + +/* Y (output) DOUBLE PRECISION array, dimension (LDY,NB) */ +/* The n-by-nb matrix Y. */ + +/* LDY (input) INTEGER */ +/* The leading dimension of the array Y. LDY >= N. */ + +/* Further Details */ +/* =============== */ + +/* The matrix Q is represented as a product of nb elementary reflectors */ + +/* Q = H(1) H(2) . . . H(nb). */ + +/* Each H(i) has the form */ + +/* H(i) = I - tau * v * v' */ + +/* where tau is a real scalar, and v is a real vector with */ +/* v(1:i+k-1) = 0, v(i+k) = 1; v(i+k+1:n) is stored on exit in */ +/* A(i+k+1:n,i), and tau in TAU(i). */ + +/* The elements of the vectors v together form the (n-k+1)-by-nb matrix */ +/* V which is needed, with T and Y, to apply the transformation to the */ +/* unreduced part of the matrix, using an update of the form: */ +/* A := (I - V*T*V') * (A - Y*V'). */ + +/* The contents of A on exit are illustrated by the following example */ +/* with n = 7, k = 3 and nb = 2: */ + +/* ( a a a a a ) */ +/* ( a a a a a ) */ +/* ( a a a a a ) */ +/* ( h h a a a ) */ +/* ( v1 h a a a ) */ +/* ( v1 v2 a a a ) */ +/* ( v1 v2 a a a ) */ + +/* where a denotes an element of the original matrix A, h denotes a */ +/* modified element of the upper Hessenberg matrix H, and vi denotes an */ +/* element of the vector defining H(i). */ + +/* This file is a slight modification of LAPACK-3.0's DLAHRD */ +/* incorporating improvements proposed by Quintana-Orti and Van de */ +/* Gejin. Note that the entries of A(1:K,2:NB) differ from those */ +/* returned by the original LAPACK routine. This function is */ +/* not backward compatible with LAPACK3.0. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Quick return if possible */ + + /* Parameter adjustments */ + --tau; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + t_dim1 = *ldt; + t_offset = 1 + t_dim1; + t -= t_offset; + y_dim1 = *ldy; + y_offset = 1 + y_dim1; + y -= y_offset; + + /* Function Body */ + if (*n <= 1) { + return 0; + } + + i__1 = *nb; + for (i__ = 1; i__ <= i__1; ++i__) { + if (i__ > 1) { + +/* Update A(K+1:N,I) */ + +/* Update I-th column of A - Y * V' */ + + i__2 = *n - *k; + i__3 = i__ - 1; + _starpu_dgemv_("NO TRANSPOSE", &i__2, &i__3, &c_b4, &y[*k + 1 + y_dim1], + ldy, &a[*k + i__ - 1 + a_dim1], lda, &c_b5, &a[*k + 1 + + i__ * a_dim1], &c__1); + +/* Apply I - V * T' * V' to this column (call it b) from the */ +/* left, using the last column of T as workspace */ + +/* Let V = ( V1 ) and b = ( b1 ) (first I-1 rows) */ +/* ( V2 ) ( b2 ) */ + +/* where V1 is unit lower triangular */ + +/* w := V1' * b1 */ + + i__2 = i__ - 1; + _starpu_dcopy_(&i__2, &a[*k + 1 + i__ * a_dim1], &c__1, &t[*nb * t_dim1 + + 1], &c__1); + i__2 = i__ - 1; + _starpu_dtrmv_("Lower", "Transpose", "UNIT", &i__2, &a[*k + 1 + a_dim1], + lda, &t[*nb * t_dim1 + 1], &c__1); + +/* w := w + V2'*b2 */ + + i__2 = *n - *k - i__ + 1; + i__3 = i__ - 1; + _starpu_dgemv_("Transpose", &i__2, &i__3, &c_b5, &a[*k + i__ + a_dim1], + lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b5, &t[*nb * + t_dim1 + 1], &c__1); + +/* w := T'*w */ + + i__2 = i__ - 1; + _starpu_dtrmv_("Upper", "Transpose", "NON-UNIT", &i__2, &t[t_offset], ldt, + &t[*nb * t_dim1 + 1], &c__1); + +/* b2 := b2 - V2*w */ + + i__2 = *n - *k - i__ + 1; + i__3 = i__ - 1; + _starpu_dgemv_("NO TRANSPOSE", &i__2, &i__3, &c_b4, &a[*k + i__ + a_dim1], + lda, &t[*nb * t_dim1 + 1], &c__1, &c_b5, &a[*k + i__ + + i__ * a_dim1], &c__1); + +/* b1 := b1 - V1*w */ + + i__2 = i__ - 1; + _starpu_dtrmv_("Lower", "NO TRANSPOSE", "UNIT", &i__2, &a[*k + 1 + a_dim1] +, lda, &t[*nb * t_dim1 + 1], &c__1); + i__2 = i__ - 1; + _starpu_daxpy_(&i__2, &c_b4, &t[*nb * t_dim1 + 1], &c__1, &a[*k + 1 + i__ + * a_dim1], &c__1); + + a[*k + i__ - 1 + (i__ - 1) * a_dim1] = ei; + } + +/* Generate the elementary reflector H(I) to annihilate */ +/* A(K+I+1:N,I) */ + + i__2 = *n - *k - i__ + 1; +/* Computing MIN */ + i__3 = *k + i__ + 1; + _starpu_dlarfg_(&i__2, &a[*k + i__ + i__ * a_dim1], &a[min(i__3, *n)+ i__ * + a_dim1], &c__1, &tau[i__]); + ei = a[*k + i__ + i__ * a_dim1]; + a[*k + i__ + i__ * a_dim1] = 1.; + +/* Compute Y(K+1:N,I) */ + + i__2 = *n - *k; + i__3 = *n - *k - i__ + 1; + _starpu_dgemv_("NO TRANSPOSE", &i__2, &i__3, &c_b5, &a[*k + 1 + (i__ + 1) * + a_dim1], lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b38, &y[* + k + 1 + i__ * y_dim1], &c__1); + i__2 = *n - *k - i__ + 1; + i__3 = i__ - 1; + _starpu_dgemv_("Transpose", &i__2, &i__3, &c_b5, &a[*k + i__ + a_dim1], lda, & + a[*k + i__ + i__ * a_dim1], &c__1, &c_b38, &t[i__ * t_dim1 + + 1], &c__1); + i__2 = *n - *k; + i__3 = i__ - 1; + _starpu_dgemv_("NO TRANSPOSE", &i__2, &i__3, &c_b4, &y[*k + 1 + y_dim1], ldy, + &t[i__ * t_dim1 + 1], &c__1, &c_b5, &y[*k + 1 + i__ * y_dim1], + &c__1); + i__2 = *n - *k; + _starpu_dscal_(&i__2, &tau[i__], &y[*k + 1 + i__ * y_dim1], &c__1); + +/* Compute T(1:I,I) */ + + i__2 = i__ - 1; + d__1 = -tau[i__]; + _starpu_dscal_(&i__2, &d__1, &t[i__ * t_dim1 + 1], &c__1); + i__2 = i__ - 1; + _starpu_dtrmv_("Upper", "No Transpose", "NON-UNIT", &i__2, &t[t_offset], ldt, + &t[i__ * t_dim1 + 1], &c__1) + ; + t[i__ + i__ * t_dim1] = tau[i__]; + +/* L10: */ + } + a[*k + *nb + *nb * a_dim1] = ei; + +/* Compute Y(1:K,1:NB) */ + + _starpu_dlacpy_("ALL", k, nb, &a[(a_dim1 << 1) + 1], lda, &y[y_offset], ldy); + _starpu_dtrmm_("RIGHT", "Lower", "NO TRANSPOSE", "UNIT", k, nb, &c_b5, &a[*k + 1 + + a_dim1], lda, &y[y_offset], ldy); + if (*n > *k + *nb) { + i__1 = *n - *k - *nb; + _starpu_dgemm_("NO TRANSPOSE", "NO TRANSPOSE", k, nb, &i__1, &c_b5, &a[(*nb + + 2) * a_dim1 + 1], lda, &a[*k + 1 + *nb + a_dim1], lda, &c_b5, + &y[y_offset], ldy); + } + _starpu_dtrmm_("RIGHT", "Upper", "NO TRANSPOSE", "NON-UNIT", k, nb, &c_b5, &t[ + t_offset], ldt, &y[y_offset], ldy); + + return 0; + +/* End of DLAHR2 */ + +} /* _starpu_dlahr2_ */ diff --git a/min-dgels/base/SRC/dlahrd.c b/min-dgels/base/SRC/dlahrd.c new file mode 100644 index 0000000..faa414b --- /dev/null +++ b/min-dgels/base/SRC/dlahrd.c @@ -0,0 +1,285 @@ +/* dlahrd.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static doublereal c_b4 = -1.; +static doublereal c_b5 = 1.; +static integer c__1 = 1; +static doublereal c_b38 = 0.; + +/* Subroutine */ int _starpu_dlahrd_(integer *n, integer *k, integer *nb, doublereal * + a, integer *lda, doublereal *tau, doublereal *t, integer *ldt, + doublereal *y, integer *ldy) +{ + /* System generated locals */ + integer a_dim1, a_offset, t_dim1, t_offset, y_dim1, y_offset, i__1, i__2, + i__3; + doublereal d__1; + + /* Local variables */ + integer i__; + doublereal ei; + extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, + integer *), _starpu_dgemv_(char *, integer *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *), _starpu_dcopy_(integer *, doublereal *, + integer *, doublereal *, integer *), _starpu_daxpy_(integer *, doublereal + *, doublereal *, integer *, doublereal *, integer *), _starpu_dtrmv_(char + *, char *, char *, integer *, doublereal *, integer *, doublereal + *, integer *), _starpu_dlarfg_(integer *, + doublereal *, doublereal *, integer *, doublereal *); + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLAHRD reduces the first NB columns of a real general n-by-(n-k+1) */ +/* matrix A so that elements below the k-th subdiagonal are zero. The */ +/* reduction is performed by an orthogonal similarity transformation */ +/* Q' * A * Q. The routine returns the matrices V and T which determine */ +/* Q as a block reflector I - V*T*V', and also the matrix Y = A * V * T. */ + +/* This is an OBSOLETE auxiliary routine. */ +/* This routine will be 'deprecated' in a future release. */ +/* Please use the new routine DLAHR2 instead. */ + +/* Arguments */ +/* ========= */ + +/* N (input) INTEGER */ +/* The order of the matrix A. */ + +/* K (input) INTEGER */ +/* The offset for the reduction. Elements below the k-th */ +/* subdiagonal in the first NB columns are reduced to zero. */ + +/* NB (input) INTEGER */ +/* The number of columns to be reduced. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N-K+1) */ +/* On entry, the n-by-(n-k+1) general matrix A. */ +/* On exit, the elements on and above the k-th subdiagonal in */ +/* the first NB columns are overwritten with the corresponding */ +/* elements of the reduced matrix; the elements below the k-th */ +/* subdiagonal, with the array TAU, represent the matrix Q as a */ +/* product of elementary reflectors. The other columns of A are */ +/* unchanged. See Further Details. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,N). */ + +/* TAU (output) DOUBLE PRECISION array, dimension (NB) */ +/* The scalar factors of the elementary reflectors. See Further */ +/* Details. */ + +/* T (output) DOUBLE PRECISION array, dimension (LDT,NB) */ +/* The upper triangular matrix T. */ + +/* LDT (input) INTEGER */ +/* The leading dimension of the array T. LDT >= NB. */ + +/* Y (output) DOUBLE PRECISION array, dimension (LDY,NB) */ +/* The n-by-nb matrix Y. */ + +/* LDY (input) INTEGER */ +/* The leading dimension of the array Y. LDY >= N. */ + +/* Further Details */ +/* =============== */ + +/* The matrix Q is represented as a product of nb elementary reflectors */ + +/* Q = H(1) H(2) . . . H(nb). */ + +/* Each H(i) has the form */ + +/* H(i) = I - tau * v * v' */ + +/* where tau is a real scalar, and v is a real vector with */ +/* v(1:i+k-1) = 0, v(i+k) = 1; v(i+k+1:n) is stored on exit in */ +/* A(i+k+1:n,i), and tau in TAU(i). */ + +/* The elements of the vectors v together form the (n-k+1)-by-nb matrix */ +/* V which is needed, with T and Y, to apply the transformation to the */ +/* unreduced part of the matrix, using an update of the form: */ +/* A := (I - V*T*V') * (A - Y*V'). */ + +/* The contents of A on exit are illustrated by the following example */ +/* with n = 7, k = 3 and nb = 2: */ + +/* ( a h a a a ) */ +/* ( a h a a a ) */ +/* ( a h a a a ) */ +/* ( h h a a a ) */ +/* ( v1 h a a a ) */ +/* ( v1 v2 a a a ) */ +/* ( v1 v2 a a a ) */ + +/* where a denotes an element of the original matrix A, h denotes a */ +/* modified element of the upper Hessenberg matrix H, and vi denotes an */ +/* element of the vector defining H(i). */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Quick return if possible */ + + /* Parameter adjustments */ + --tau; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + t_dim1 = *ldt; + t_offset = 1 + t_dim1; + t -= t_offset; + y_dim1 = *ldy; + y_offset = 1 + y_dim1; + y -= y_offset; + + /* Function Body */ + if (*n <= 1) { + return 0; + } + + i__1 = *nb; + for (i__ = 1; i__ <= i__1; ++i__) { + if (i__ > 1) { + +/* Update A(1:n,i) */ + +/* Compute i-th column of A - Y * V' */ + + i__2 = i__ - 1; + _starpu_dgemv_("No transpose", n, &i__2, &c_b4, &y[y_offset], ldy, &a[*k + + i__ - 1 + a_dim1], lda, &c_b5, &a[i__ * a_dim1 + 1], & + c__1); + +/* Apply I - V * T' * V' to this column (call it b) from the */ +/* left, using the last column of T as workspace */ + +/* Let V = ( V1 ) and b = ( b1 ) (first I-1 rows) */ +/* ( V2 ) ( b2 ) */ + +/* where V1 is unit lower triangular */ + +/* w := V1' * b1 */ + + i__2 = i__ - 1; + _starpu_dcopy_(&i__2, &a[*k + 1 + i__ * a_dim1], &c__1, &t[*nb * t_dim1 + + 1], &c__1); + i__2 = i__ - 1; + _starpu_dtrmv_("Lower", "Transpose", "Unit", &i__2, &a[*k + 1 + a_dim1], + lda, &t[*nb * t_dim1 + 1], &c__1); + +/* w := w + V2'*b2 */ + + i__2 = *n - *k - i__ + 1; + i__3 = i__ - 1; + _starpu_dgemv_("Transpose", &i__2, &i__3, &c_b5, &a[*k + i__ + a_dim1], + lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b5, &t[*nb * + t_dim1 + 1], &c__1); + +/* w := T'*w */ + + i__2 = i__ - 1; + _starpu_dtrmv_("Upper", "Transpose", "Non-unit", &i__2, &t[t_offset], ldt, + &t[*nb * t_dim1 + 1], &c__1); + +/* b2 := b2 - V2*w */ + + i__2 = *n - *k - i__ + 1; + i__3 = i__ - 1; + _starpu_dgemv_("No transpose", &i__2, &i__3, &c_b4, &a[*k + i__ + a_dim1], + lda, &t[*nb * t_dim1 + 1], &c__1, &c_b5, &a[*k + i__ + + i__ * a_dim1], &c__1); + +/* b1 := b1 - V1*w */ + + i__2 = i__ - 1; + _starpu_dtrmv_("Lower", "No transpose", "Unit", &i__2, &a[*k + 1 + a_dim1] +, lda, &t[*nb * t_dim1 + 1], &c__1); + i__2 = i__ - 1; + _starpu_daxpy_(&i__2, &c_b4, &t[*nb * t_dim1 + 1], &c__1, &a[*k + 1 + i__ + * a_dim1], &c__1); + + a[*k + i__ - 1 + (i__ - 1) * a_dim1] = ei; + } + +/* Generate the elementary reflector H(i) to annihilate */ +/* A(k+i+1:n,i) */ + + i__2 = *n - *k - i__ + 1; +/* Computing MIN */ + i__3 = *k + i__ + 1; + _starpu_dlarfg_(&i__2, &a[*k + i__ + i__ * a_dim1], &a[min(i__3, *n)+ i__ * + a_dim1], &c__1, &tau[i__]); + ei = a[*k + i__ + i__ * a_dim1]; + a[*k + i__ + i__ * a_dim1] = 1.; + +/* Compute Y(1:n,i) */ + + i__2 = *n - *k - i__ + 1; + _starpu_dgemv_("No transpose", n, &i__2, &c_b5, &a[(i__ + 1) * a_dim1 + 1], + lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b38, &y[i__ * + y_dim1 + 1], &c__1); + i__2 = *n - *k - i__ + 1; + i__3 = i__ - 1; + _starpu_dgemv_("Transpose", &i__2, &i__3, &c_b5, &a[*k + i__ + a_dim1], lda, & + a[*k + i__ + i__ * a_dim1], &c__1, &c_b38, &t[i__ * t_dim1 + + 1], &c__1); + i__2 = i__ - 1; + _starpu_dgemv_("No transpose", n, &i__2, &c_b4, &y[y_offset], ldy, &t[i__ * + t_dim1 + 1], &c__1, &c_b5, &y[i__ * y_dim1 + 1], &c__1); + _starpu_dscal_(n, &tau[i__], &y[i__ * y_dim1 + 1], &c__1); + +/* Compute T(1:i,i) */ + + i__2 = i__ - 1; + d__1 = -tau[i__]; + _starpu_dscal_(&i__2, &d__1, &t[i__ * t_dim1 + 1], &c__1); + i__2 = i__ - 1; + _starpu_dtrmv_("Upper", "No transpose", "Non-unit", &i__2, &t[t_offset], ldt, + &t[i__ * t_dim1 + 1], &c__1) + ; + t[i__ + i__ * t_dim1] = tau[i__]; + +/* L10: */ + } + a[*k + *nb + *nb * a_dim1] = ei; + + return 0; + +/* End of DLAHRD */ + +} /* _starpu_dlahrd_ */ diff --git a/min-dgels/base/SRC/dlaic1.c b/min-dgels/base/SRC/dlaic1.c new file mode 100644 index 0000000..983259b --- /dev/null +++ b/min-dgels/base/SRC/dlaic1.c @@ -0,0 +1,326 @@ +/* dlaic1.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static doublereal c_b5 = 1.; + +/* Subroutine */ int _starpu_dlaic1_(integer *job, integer *j, doublereal *x, + doublereal *sest, doublereal *w, doublereal *gamma, doublereal * + sestpr, doublereal *s, doublereal *c__) +{ + /* System generated locals */ + doublereal d__1, d__2, d__3, d__4; + + /* Builtin functions */ + double sqrt(doublereal), d_sign(doublereal *, doublereal *); + + /* Local variables */ + doublereal b, t, s1, s2, eps, tmp; + extern doublereal _starpu_ddot_(integer *, doublereal *, integer *, doublereal *, + integer *); + doublereal sine, test, zeta1, zeta2, alpha, norma; + extern doublereal _starpu_dlamch_(char *); + doublereal absgam, absalp, cosine, absest; + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLAIC1 applies one step of incremental condition estimation in */ +/* its simplest version: */ + +/* Let x, twonorm(x) = 1, be an approximate singular vector of an j-by-j */ +/* lower triangular matrix L, such that */ +/* twonorm(L*x) = sest */ +/* Then DLAIC1 computes sestpr, s, c such that */ +/* the vector */ +/* [ s*x ] */ +/* xhat = [ c ] */ +/* is an approximate singular vector of */ +/* [ L 0 ] */ +/* Lhat = [ w' gamma ] */ +/* in the sense that */ +/* twonorm(Lhat*xhat) = sestpr. */ + +/* Depending on JOB, an estimate for the largest or smallest singular */ +/* value is computed. */ + +/* Note that [s c]' and sestpr**2 is an eigenpair of the system */ + +/* diag(sest*sest, 0) + [alpha gamma] * [ alpha ] */ +/* [ gamma ] */ + +/* where alpha = x'*w. */ + +/* Arguments */ +/* ========= */ + +/* JOB (input) INTEGER */ +/* = 1: an estimate for the largest singular value is computed. */ +/* = 2: an estimate for the smallest singular value is computed. */ + +/* J (input) INTEGER */ +/* Length of X and W */ + +/* X (input) DOUBLE PRECISION array, dimension (J) */ +/* The j-vector x. */ + +/* SEST (input) DOUBLE PRECISION */ +/* Estimated singular value of j by j matrix L */ + +/* W (input) DOUBLE PRECISION array, dimension (J) */ +/* The j-vector w. */ + +/* GAMMA (input) DOUBLE PRECISION */ +/* The diagonal element gamma. */ + +/* SESTPR (output) DOUBLE PRECISION */ +/* Estimated singular value of (j+1) by (j+1) matrix Lhat. */ + +/* S (output) DOUBLE PRECISION */ +/* Sine needed in forming xhat. */ + +/* C (output) DOUBLE PRECISION */ +/* Cosine needed in forming xhat. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + --w; + --x; + + /* Function Body */ + eps = _starpu_dlamch_("Epsilon"); + alpha = _starpu_ddot_(j, &x[1], &c__1, &w[1], &c__1); + + absalp = abs(alpha); + absgam = abs(*gamma); + absest = abs(*sest); + + if (*job == 1) { + +/* Estimating largest singular value */ + +/* special cases */ + + if (*sest == 0.) { + s1 = max(absgam,absalp); + if (s1 == 0.) { + *s = 0.; + *c__ = 1.; + *sestpr = 0.; + } else { + *s = alpha / s1; + *c__ = *gamma / s1; + tmp = sqrt(*s * *s + *c__ * *c__); + *s /= tmp; + *c__ /= tmp; + *sestpr = s1 * tmp; + } + return 0; + } else if (absgam <= eps * absest) { + *s = 1.; + *c__ = 0.; + tmp = max(absest,absalp); + s1 = absest / tmp; + s2 = absalp / tmp; + *sestpr = tmp * sqrt(s1 * s1 + s2 * s2); + return 0; + } else if (absalp <= eps * absest) { + s1 = absgam; + s2 = absest; + if (s1 <= s2) { + *s = 1.; + *c__ = 0.; + *sestpr = s2; + } else { + *s = 0.; + *c__ = 1.; + *sestpr = s1; + } + return 0; + } else if (absest <= eps * absalp || absest <= eps * absgam) { + s1 = absgam; + s2 = absalp; + if (s1 <= s2) { + tmp = s1 / s2; + *s = sqrt(tmp * tmp + 1.); + *sestpr = s2 * *s; + *c__ = *gamma / s2 / *s; + *s = d_sign(&c_b5, &alpha) / *s; + } else { + tmp = s2 / s1; + *c__ = sqrt(tmp * tmp + 1.); + *sestpr = s1 * *c__; + *s = alpha / s1 / *c__; + *c__ = d_sign(&c_b5, gamma) / *c__; + } + return 0; + } else { + +/* normal case */ + + zeta1 = alpha / absest; + zeta2 = *gamma / absest; + + b = (1. - zeta1 * zeta1 - zeta2 * zeta2) * .5; + *c__ = zeta1 * zeta1; + if (b > 0.) { + t = *c__ / (b + sqrt(b * b + *c__)); + } else { + t = sqrt(b * b + *c__) - b; + } + + sine = -zeta1 / t; + cosine = -zeta2 / (t + 1.); + tmp = sqrt(sine * sine + cosine * cosine); + *s = sine / tmp; + *c__ = cosine / tmp; + *sestpr = sqrt(t + 1.) * absest; + return 0; + } + + } else if (*job == 2) { + +/* Estimating smallest singular value */ + +/* special cases */ + + if (*sest == 0.) { + *sestpr = 0.; + if (max(absgam,absalp) == 0.) { + sine = 1.; + cosine = 0.; + } else { + sine = -(*gamma); + cosine = alpha; + } +/* Computing MAX */ + d__1 = abs(sine), d__2 = abs(cosine); + s1 = max(d__1,d__2); + *s = sine / s1; + *c__ = cosine / s1; + tmp = sqrt(*s * *s + *c__ * *c__); + *s /= tmp; + *c__ /= tmp; + return 0; + } else if (absgam <= eps * absest) { + *s = 0.; + *c__ = 1.; + *sestpr = absgam; + return 0; + } else if (absalp <= eps * absest) { + s1 = absgam; + s2 = absest; + if (s1 <= s2) { + *s = 0.; + *c__ = 1.; + *sestpr = s1; + } else { + *s = 1.; + *c__ = 0.; + *sestpr = s2; + } + return 0; + } else if (absest <= eps * absalp || absest <= eps * absgam) { + s1 = absgam; + s2 = absalp; + if (s1 <= s2) { + tmp = s1 / s2; + *c__ = sqrt(tmp * tmp + 1.); + *sestpr = absest * (tmp / *c__); + *s = -(*gamma / s2) / *c__; + *c__ = d_sign(&c_b5, &alpha) / *c__; + } else { + tmp = s2 / s1; + *s = sqrt(tmp * tmp + 1.); + *sestpr = absest / *s; + *c__ = alpha / s1 / *s; + *s = -d_sign(&c_b5, gamma) / *s; + } + return 0; + } else { + +/* normal case */ + + zeta1 = alpha / absest; + zeta2 = *gamma / absest; + +/* Computing MAX */ + d__3 = zeta1 * zeta1 + 1. + (d__1 = zeta1 * zeta2, abs(d__1)), + d__4 = (d__2 = zeta1 * zeta2, abs(d__2)) + zeta2 * zeta2; + norma = max(d__3,d__4); + +/* See if root is closer to zero or to ONE */ + + test = (zeta1 - zeta2) * 2. * (zeta1 + zeta2) + 1.; + if (test >= 0.) { + +/* root is close to zero, compute directly */ + + b = (zeta1 * zeta1 + zeta2 * zeta2 + 1.) * .5; + *c__ = zeta2 * zeta2; + t = *c__ / (b + sqrt((d__1 = b * b - *c__, abs(d__1)))); + sine = zeta1 / (1. - t); + cosine = -zeta2 / t; + *sestpr = sqrt(t + eps * 4. * eps * norma) * absest; + } else { + +/* root is closer to ONE, shift by that amount */ + + b = (zeta2 * zeta2 + zeta1 * zeta1 - 1.) * .5; + *c__ = zeta1 * zeta1; + if (b >= 0.) { + t = -(*c__) / (b + sqrt(b * b + *c__)); + } else { + t = b - sqrt(b * b + *c__); + } + sine = -zeta1 / t; + cosine = -zeta2 / (t + 1.); + *sestpr = sqrt(t + 1. + eps * 4. * eps * norma) * absest; + } + tmp = sqrt(sine * sine + cosine * cosine); + *s = sine / tmp; + *c__ = cosine / tmp; + return 0; + + } + } + return 0; + +/* End of DLAIC1 */ + +} /* _starpu_dlaic1_ */ diff --git a/min-dgels/base/SRC/dlaisnan.c b/min-dgels/base/SRC/dlaisnan.c new file mode 100644 index 0000000..fd59014 --- /dev/null +++ b/min-dgels/base/SRC/dlaisnan.c @@ -0,0 +1,58 @@ +/* dlaisnan.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +logical _starpu_dlaisnan_(doublereal *din1, doublereal *din2) +{ + /* System generated locals */ + logical ret_val; + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* This routine is not for general use. It exists solely to avoid */ +/* over-optimization in DISNAN. */ + +/* DLAISNAN checks for NaNs by comparing its two arguments for */ +/* inequality. NaN is the only floating-point value where NaN != NaN */ +/* returns .TRUE. To check for NaNs, pass the same variable as both */ +/* arguments. */ + +/* A compiler must assume that the two arguments are */ +/* not the same variable, and the test will not be optimized away. */ +/* Interprocedural or whole-program optimization may delete this */ +/* test. The ISNAN functions will be replaced by the correct */ +/* Fortran 03 intrinsic once the intrinsic is widely available. */ + +/* Arguments */ +/* ========= */ + +/* DIN1 (input) DOUBLE PRECISION */ +/* DIN2 (input) DOUBLE PRECISION */ +/* Two numbers to compare for inequality. */ + +/* ===================================================================== */ + +/* .. Executable Statements .. */ + ret_val = *din1 != *din2; + return ret_val; +} /* _starpu_dlaisnan_ */ diff --git a/min-dgels/base/SRC/dlaln2.c b/min-dgels/base/SRC/dlaln2.c new file mode 100644 index 0000000..08ed12f --- /dev/null +++ b/min-dgels/base/SRC/dlaln2.c @@ -0,0 +1,575 @@ +/* dlaln2.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dlaln2_(logical *ltrans, integer *na, integer *nw, + doublereal *smin, doublereal *ca, doublereal *a, integer *lda, + doublereal *d1, doublereal *d2, doublereal *b, integer *ldb, + doublereal *wr, doublereal *wi, doublereal *x, integer *ldx, + doublereal *scale, doublereal *xnorm, integer *info) +{ + /* Initialized data */ + + static logical zswap[4] = { FALSE_,FALSE_,TRUE_,TRUE_ }; + static logical rswap[4] = { FALSE_,TRUE_,FALSE_,TRUE_ }; + static integer ipivot[16] /* was [4][4] */ = { 1,2,3,4,2,1,4,3,3,4,1,2, + 4,3,2,1 }; + + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, x_dim1, x_offset; + doublereal d__1, d__2, d__3, d__4, d__5, d__6; + static doublereal equiv_0[4], equiv_1[4]; + + /* Local variables */ + integer j; +#define ci (equiv_0) +#define cr (equiv_1) + doublereal bi1, bi2, br1, br2, xi1, xi2, xr1, xr2, ci21, ci22, cr21, cr22, + li21, csi, ui11, lr21, ui12, ui22; +#define civ (equiv_0) + doublereal csr, ur11, ur12, ur22; +#define crv (equiv_1) + doublereal bbnd, cmax, ui11r, ui12s, temp, ur11r, ur12s, u22abs; + integer icmax; + doublereal bnorm, cnorm, smini; + extern doublereal _starpu_dlamch_(char *); + extern /* Subroutine */ int _starpu_dladiv_(doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *); + doublereal bignum, smlnum; + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLALN2 solves a system of the form (ca A - w D ) X = s B */ +/* or (ca A' - w D) X = s B with possible scaling ("s") and */ +/* perturbation of A. (A' means A-transpose.) */ + +/* A is an NA x NA real matrix, ca is a real scalar, D is an NA x NA */ +/* real diagonal matrix, w is a real or complex value, and X and B are */ +/* NA x 1 matrices -- real if w is real, complex if w is complex. NA */ +/* may be 1 or 2. */ + +/* If w is complex, X and B are represented as NA x 2 matrices, */ +/* the first column of each being the real part and the second */ +/* being the imaginary part. */ + +/* "s" is a scaling factor (.LE. 1), computed by DLALN2, which is */ +/* so chosen that X can be computed without overflow. X is further */ +/* scaled if necessary to assure that norm(ca A - w D)*norm(X) is less */ +/* than overflow. */ + +/* If both singular values of (ca A - w D) are less than SMIN, */ +/* SMIN*identity will be used instead of (ca A - w D). If only one */ +/* singular value is less than SMIN, one element of (ca A - w D) will be */ +/* perturbed enough to make the smallest singular value roughly SMIN. */ +/* If both singular values are at least SMIN, (ca A - w D) will not be */ +/* perturbed. In any case, the perturbation will be at most some small */ +/* multiple of max( SMIN, ulp*norm(ca A - w D) ). The singular values */ +/* are computed by infinity-norm approximations, and thus will only be */ +/* correct to a factor of 2 or so. */ + +/* Note: all input quantities are assumed to be smaller than overflow */ +/* by a reasonable factor. (See BIGNUM.) */ + +/* Arguments */ +/* ========== */ + +/* LTRANS (input) LOGICAL */ +/* =.TRUE.: A-transpose will be used. */ +/* =.FALSE.: A will be used (not transposed.) */ + +/* NA (input) INTEGER */ +/* The size of the matrix A. It may (only) be 1 or 2. */ + +/* NW (input) INTEGER */ +/* 1 if "w" is real, 2 if "w" is complex. It may only be 1 */ +/* or 2. */ + +/* SMIN (input) DOUBLE PRECISION */ +/* The desired lower bound on the singular values of A. This */ +/* should be a safe distance away from underflow or overflow, */ +/* say, between (underflow/machine precision) and (machine */ +/* precision * overflow ). (See BIGNUM and ULP.) */ + +/* CA (input) DOUBLE PRECISION */ +/* The coefficient c, which A is multiplied by. */ + +/* A (input) DOUBLE PRECISION array, dimension (LDA,NA) */ +/* The NA x NA matrix A. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of A. It must be at least NA. */ + +/* D1 (input) DOUBLE PRECISION */ +/* The 1,1 element in the diagonal matrix D. */ + +/* D2 (input) DOUBLE PRECISION */ +/* The 2,2 element in the diagonal matrix D. Not used if NW=1. */ + +/* B (input) DOUBLE PRECISION array, dimension (LDB,NW) */ +/* The NA x NW matrix B (right-hand side). If NW=2 ("w" is */ +/* complex), column 1 contains the real part of B and column 2 */ +/* contains the imaginary part. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of B. It must be at least NA. */ + +/* WR (input) DOUBLE PRECISION */ +/* The real part of the scalar "w". */ + +/* WI (input) DOUBLE PRECISION */ +/* The imaginary part of the scalar "w". Not used if NW=1. */ + +/* X (output) DOUBLE PRECISION array, dimension (LDX,NW) */ +/* The NA x NW matrix X (unknowns), as computed by DLALN2. */ +/* If NW=2 ("w" is complex), on exit, column 1 will contain */ +/* the real part of X and column 2 will contain the imaginary */ +/* part. */ + +/* LDX (input) INTEGER */ +/* The leading dimension of X. It must be at least NA. */ + +/* SCALE (output) DOUBLE PRECISION */ +/* The scale factor that B must be multiplied by to insure */ +/* that overflow does not occur when computing X. Thus, */ +/* (ca A - w D) X will be SCALE*B, not B (ignoring */ +/* perturbations of A.) It will be at most 1. */ + +/* XNORM (output) DOUBLE PRECISION */ +/* The infinity-norm of X, when X is regarded as an NA x NW */ +/* real matrix. */ + +/* INFO (output) INTEGER */ +/* An error flag. It will be set to zero if no error occurs, */ +/* a negative number if an argument is in error, or a positive */ +/* number if ca A - w D had to be perturbed. */ +/* The possible values are: */ +/* = 0: No error occurred, and (ca A - w D) did not have to be */ +/* perturbed. */ +/* = 1: (ca A - w D) had to be perturbed to make its smallest */ +/* (or only) singular value greater than SMIN. */ +/* NOTE: In the interests of speed, this routine does not */ +/* check the inputs for errors. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Equivalences .. */ +/* .. */ +/* .. Data statements .. */ + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + x_dim1 = *ldx; + x_offset = 1 + x_dim1; + x -= x_offset; + + /* Function Body */ +/* .. */ +/* .. Executable Statements .. */ + +/* Compute BIGNUM */ + + smlnum = 2. * _starpu_dlamch_("Safe minimum"); + bignum = 1. / smlnum; + smini = max(*smin,smlnum); + +/* Don't check for input errors */ + + *info = 0; + +/* Standard Initializations */ + + *scale = 1.; + + if (*na == 1) { + +/* 1 x 1 (i.e., scalar) system C X = B */ + + if (*nw == 1) { + +/* Real 1x1 system. */ + +/* C = ca A - w D */ + + csr = *ca * a[a_dim1 + 1] - *wr * *d1; + cnorm = abs(csr); + +/* If | C | < SMINI, use C = SMINI */ + + if (cnorm < smini) { + csr = smini; + cnorm = smini; + *info = 1; + } + +/* Check scaling for X = B / C */ + + bnorm = (d__1 = b[b_dim1 + 1], abs(d__1)); + if (cnorm < 1. && bnorm > 1.) { + if (bnorm > bignum * cnorm) { + *scale = 1. / bnorm; + } + } + +/* Compute X */ + + x[x_dim1 + 1] = b[b_dim1 + 1] * *scale / csr; + *xnorm = (d__1 = x[x_dim1 + 1], abs(d__1)); + } else { + +/* Complex 1x1 system (w is complex) */ + +/* C = ca A - w D */ + + csr = *ca * a[a_dim1 + 1] - *wr * *d1; + csi = -(*wi) * *d1; + cnorm = abs(csr) + abs(csi); + +/* If | C | < SMINI, use C = SMINI */ + + if (cnorm < smini) { + csr = smini; + csi = 0.; + cnorm = smini; + *info = 1; + } + +/* Check scaling for X = B / C */ + + bnorm = (d__1 = b[b_dim1 + 1], abs(d__1)) + (d__2 = b[(b_dim1 << + 1) + 1], abs(d__2)); + if (cnorm < 1. && bnorm > 1.) { + if (bnorm > bignum * cnorm) { + *scale = 1. / bnorm; + } + } + +/* Compute X */ + + d__1 = *scale * b[b_dim1 + 1]; + d__2 = *scale * b[(b_dim1 << 1) + 1]; + _starpu_dladiv_(&d__1, &d__2, &csr, &csi, &x[x_dim1 + 1], &x[(x_dim1 << 1) + + 1]); + *xnorm = (d__1 = x[x_dim1 + 1], abs(d__1)) + (d__2 = x[(x_dim1 << + 1) + 1], abs(d__2)); + } + + } else { + +/* 2x2 System */ + +/* Compute the real part of C = ca A - w D (or ca A' - w D ) */ + + cr[0] = *ca * a[a_dim1 + 1] - *wr * *d1; + cr[3] = *ca * a[(a_dim1 << 1) + 2] - *wr * *d2; + if (*ltrans) { + cr[2] = *ca * a[a_dim1 + 2]; + cr[1] = *ca * a[(a_dim1 << 1) + 1]; + } else { + cr[1] = *ca * a[a_dim1 + 2]; + cr[2] = *ca * a[(a_dim1 << 1) + 1]; + } + + if (*nw == 1) { + +/* Real 2x2 system (w is real) */ + +/* Find the largest element in C */ + + cmax = 0.; + icmax = 0; + + for (j = 1; j <= 4; ++j) { + if ((d__1 = crv[j - 1], abs(d__1)) > cmax) { + cmax = (d__1 = crv[j - 1], abs(d__1)); + icmax = j; + } +/* L10: */ + } + +/* If norm(C) < SMINI, use SMINI*identity. */ + + if (cmax < smini) { +/* Computing MAX */ + d__3 = (d__1 = b[b_dim1 + 1], abs(d__1)), d__4 = (d__2 = b[ + b_dim1 + 2], abs(d__2)); + bnorm = max(d__3,d__4); + if (smini < 1. && bnorm > 1.) { + if (bnorm > bignum * smini) { + *scale = 1. / bnorm; + } + } + temp = *scale / smini; + x[x_dim1 + 1] = temp * b[b_dim1 + 1]; + x[x_dim1 + 2] = temp * b[b_dim1 + 2]; + *xnorm = temp * bnorm; + *info = 1; + return 0; + } + +/* Gaussian elimination with complete pivoting. */ + + ur11 = crv[icmax - 1]; + cr21 = crv[ipivot[(icmax << 2) - 3] - 1]; + ur12 = crv[ipivot[(icmax << 2) - 2] - 1]; + cr22 = crv[ipivot[(icmax << 2) - 1] - 1]; + ur11r = 1. / ur11; + lr21 = ur11r * cr21; + ur22 = cr22 - ur12 * lr21; + +/* If smaller pivot < SMINI, use SMINI */ + + if (abs(ur22) < smini) { + ur22 = smini; + *info = 1; + } + if (rswap[icmax - 1]) { + br1 = b[b_dim1 + 2]; + br2 = b[b_dim1 + 1]; + } else { + br1 = b[b_dim1 + 1]; + br2 = b[b_dim1 + 2]; + } + br2 -= lr21 * br1; +/* Computing MAX */ + d__2 = (d__1 = br1 * (ur22 * ur11r), abs(d__1)), d__3 = abs(br2); + bbnd = max(d__2,d__3); + if (bbnd > 1. && abs(ur22) < 1.) { + if (bbnd >= bignum * abs(ur22)) { + *scale = 1. / bbnd; + } + } + + xr2 = br2 * *scale / ur22; + xr1 = *scale * br1 * ur11r - xr2 * (ur11r * ur12); + if (zswap[icmax - 1]) { + x[x_dim1 + 1] = xr2; + x[x_dim1 + 2] = xr1; + } else { + x[x_dim1 + 1] = xr1; + x[x_dim1 + 2] = xr2; + } +/* Computing MAX */ + d__1 = abs(xr1), d__2 = abs(xr2); + *xnorm = max(d__1,d__2); + +/* Further scaling if norm(A) norm(X) > overflow */ + + if (*xnorm > 1. && cmax > 1.) { + if (*xnorm > bignum / cmax) { + temp = cmax / bignum; + x[x_dim1 + 1] = temp * x[x_dim1 + 1]; + x[x_dim1 + 2] = temp * x[x_dim1 + 2]; + *xnorm = temp * *xnorm; + *scale = temp * *scale; + } + } + } else { + +/* Complex 2x2 system (w is complex) */ + +/* Find the largest element in C */ + + ci[0] = -(*wi) * *d1; + ci[1] = 0.; + ci[2] = 0.; + ci[3] = -(*wi) * *d2; + cmax = 0.; + icmax = 0; + + for (j = 1; j <= 4; ++j) { + if ((d__1 = crv[j - 1], abs(d__1)) + (d__2 = civ[j - 1], abs( + d__2)) > cmax) { + cmax = (d__1 = crv[j - 1], abs(d__1)) + (d__2 = civ[j - 1] + , abs(d__2)); + icmax = j; + } +/* L20: */ + } + +/* If norm(C) < SMINI, use SMINI*identity. */ + + if (cmax < smini) { +/* Computing MAX */ + d__5 = (d__1 = b[b_dim1 + 1], abs(d__1)) + (d__2 = b[(b_dim1 + << 1) + 1], abs(d__2)), d__6 = (d__3 = b[b_dim1 + 2], + abs(d__3)) + (d__4 = b[(b_dim1 << 1) + 2], abs(d__4)); + bnorm = max(d__5,d__6); + if (smini < 1. && bnorm > 1.) { + if (bnorm > bignum * smini) { + *scale = 1. / bnorm; + } + } + temp = *scale / smini; + x[x_dim1 + 1] = temp * b[b_dim1 + 1]; + x[x_dim1 + 2] = temp * b[b_dim1 + 2]; + x[(x_dim1 << 1) + 1] = temp * b[(b_dim1 << 1) + 1]; + x[(x_dim1 << 1) + 2] = temp * b[(b_dim1 << 1) + 2]; + *xnorm = temp * bnorm; + *info = 1; + return 0; + } + +/* Gaussian elimination with complete pivoting. */ + + ur11 = crv[icmax - 1]; + ui11 = civ[icmax - 1]; + cr21 = crv[ipivot[(icmax << 2) - 3] - 1]; + ci21 = civ[ipivot[(icmax << 2) - 3] - 1]; + ur12 = crv[ipivot[(icmax << 2) - 2] - 1]; + ui12 = civ[ipivot[(icmax << 2) - 2] - 1]; + cr22 = crv[ipivot[(icmax << 2) - 1] - 1]; + ci22 = civ[ipivot[(icmax << 2) - 1] - 1]; + if (icmax == 1 || icmax == 4) { + +/* Code when off-diagonals of pivoted C are real */ + + if (abs(ur11) > abs(ui11)) { + temp = ui11 / ur11; +/* Computing 2nd power */ + d__1 = temp; + ur11r = 1. / (ur11 * (d__1 * d__1 + 1.)); + ui11r = -temp * ur11r; + } else { + temp = ur11 / ui11; +/* Computing 2nd power */ + d__1 = temp; + ui11r = -1. / (ui11 * (d__1 * d__1 + 1.)); + ur11r = -temp * ui11r; + } + lr21 = cr21 * ur11r; + li21 = cr21 * ui11r; + ur12s = ur12 * ur11r; + ui12s = ur12 * ui11r; + ur22 = cr22 - ur12 * lr21; + ui22 = ci22 - ur12 * li21; + } else { + +/* Code when diagonals of pivoted C are real */ + + ur11r = 1. / ur11; + ui11r = 0.; + lr21 = cr21 * ur11r; + li21 = ci21 * ur11r; + ur12s = ur12 * ur11r; + ui12s = ui12 * ur11r; + ur22 = cr22 - ur12 * lr21 + ui12 * li21; + ui22 = -ur12 * li21 - ui12 * lr21; + } + u22abs = abs(ur22) + abs(ui22); + +/* If smaller pivot < SMINI, use SMINI */ + + if (u22abs < smini) { + ur22 = smini; + ui22 = 0.; + *info = 1; + } + if (rswap[icmax - 1]) { + br2 = b[b_dim1 + 1]; + br1 = b[b_dim1 + 2]; + bi2 = b[(b_dim1 << 1) + 1]; + bi1 = b[(b_dim1 << 1) + 2]; + } else { + br1 = b[b_dim1 + 1]; + br2 = b[b_dim1 + 2]; + bi1 = b[(b_dim1 << 1) + 1]; + bi2 = b[(b_dim1 << 1) + 2]; + } + br2 = br2 - lr21 * br1 + li21 * bi1; + bi2 = bi2 - li21 * br1 - lr21 * bi1; +/* Computing MAX */ + d__1 = (abs(br1) + abs(bi1)) * (u22abs * (abs(ur11r) + abs(ui11r)) + ), d__2 = abs(br2) + abs(bi2); + bbnd = max(d__1,d__2); + if (bbnd > 1. && u22abs < 1.) { + if (bbnd >= bignum * u22abs) { + *scale = 1. / bbnd; + br1 = *scale * br1; + bi1 = *scale * bi1; + br2 = *scale * br2; + bi2 = *scale * bi2; + } + } + + _starpu_dladiv_(&br2, &bi2, &ur22, &ui22, &xr2, &xi2); + xr1 = ur11r * br1 - ui11r * bi1 - ur12s * xr2 + ui12s * xi2; + xi1 = ui11r * br1 + ur11r * bi1 - ui12s * xr2 - ur12s * xi2; + if (zswap[icmax - 1]) { + x[x_dim1 + 1] = xr2; + x[x_dim1 + 2] = xr1; + x[(x_dim1 << 1) + 1] = xi2; + x[(x_dim1 << 1) + 2] = xi1; + } else { + x[x_dim1 + 1] = xr1; + x[x_dim1 + 2] = xr2; + x[(x_dim1 << 1) + 1] = xi1; + x[(x_dim1 << 1) + 2] = xi2; + } +/* Computing MAX */ + d__1 = abs(xr1) + abs(xi1), d__2 = abs(xr2) + abs(xi2); + *xnorm = max(d__1,d__2); + +/* Further scaling if norm(A) norm(X) > overflow */ + + if (*xnorm > 1. && cmax > 1.) { + if (*xnorm > bignum / cmax) { + temp = cmax / bignum; + x[x_dim1 + 1] = temp * x[x_dim1 + 1]; + x[x_dim1 + 2] = temp * x[x_dim1 + 2]; + x[(x_dim1 << 1) + 1] = temp * x[(x_dim1 << 1) + 1]; + x[(x_dim1 << 1) + 2] = temp * x[(x_dim1 << 1) + 2]; + *xnorm = temp * *xnorm; + *scale = temp * *scale; + } + } + } + } + + return 0; + +/* End of DLALN2 */ + +} /* _starpu_dlaln2_ */ + +#undef crv +#undef civ +#undef cr +#undef ci diff --git a/min-dgels/base/SRC/dlals0.c b/min-dgels/base/SRC/dlals0.c new file mode 100644 index 0000000..8fe9535 --- /dev/null +++ b/min-dgels/base/SRC/dlals0.c @@ -0,0 +1,473 @@ +/* dlals0.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static doublereal c_b5 = -1.; +static integer c__1 = 1; +static doublereal c_b11 = 1.; +static doublereal c_b13 = 0.; +static integer c__0 = 0; + +/* Subroutine */ int _starpu_dlals0_(integer *icompq, integer *nl, integer *nr, + integer *sqre, integer *nrhs, doublereal *b, integer *ldb, doublereal + *bx, integer *ldbx, integer *perm, integer *givptr, integer *givcol, + integer *ldgcol, doublereal *givnum, integer *ldgnum, doublereal * + poles, doublereal *difl, doublereal *difr, doublereal *z__, integer * + k, doublereal *c__, doublereal *s, doublereal *work, integer *info) +{ + /* System generated locals */ + integer givcol_dim1, givcol_offset, b_dim1, b_offset, bx_dim1, bx_offset, + difr_dim1, difr_offset, givnum_dim1, givnum_offset, poles_dim1, + poles_offset, i__1, i__2; + doublereal d__1; + + /* Local variables */ + integer i__, j, m, n; + doublereal dj; + integer nlp1; + doublereal temp; + extern /* Subroutine */ int _starpu_drot_(integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, doublereal *); + extern doublereal _starpu_dnrm2_(integer *, doublereal *, integer *); + extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, + integer *); + doublereal diflj, difrj, dsigj; + extern /* Subroutine */ int _starpu_dgemv_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *), _starpu_dcopy_(integer *, + doublereal *, integer *, doublereal *, integer *); + extern doublereal _starpu_dlamc3_(doublereal *, doublereal *); + extern /* Subroutine */ int _starpu_dlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *), _starpu_dlacpy_(char *, integer *, integer + *, doublereal *, integer *, doublereal *, integer *), + _starpu_xerbla_(char *, integer *); + doublereal dsigjp; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLALS0 applies back the multiplying factors of either the left or the */ +/* right singular vector matrix of a diagonal matrix appended by a row */ +/* to the right hand side matrix B in solving the least squares problem */ +/* using the divide-and-conquer SVD approach. */ + +/* For the left singular vector matrix, three types of orthogonal */ +/* matrices are involved: */ + +/* (1L) Givens rotations: the number of such rotations is GIVPTR; the */ +/* pairs of columns/rows they were applied to are stored in GIVCOL; */ +/* and the C- and S-values of these rotations are stored in GIVNUM. */ + +/* (2L) Permutation. The (NL+1)-st row of B is to be moved to the first */ +/* row, and for J=2:N, PERM(J)-th row of B is to be moved to the */ +/* J-th row. */ + +/* (3L) The left singular vector matrix of the remaining matrix. */ + +/* For the right singular vector matrix, four types of orthogonal */ +/* matrices are involved: */ + +/* (1R) The right singular vector matrix of the remaining matrix. */ + +/* (2R) If SQRE = 1, one extra Givens rotation to generate the right */ +/* null space. */ + +/* (3R) The inverse transformation of (2L). */ + +/* (4R) The inverse transformation of (1L). */ + +/* Arguments */ +/* ========= */ + +/* ICOMPQ (input) INTEGER */ +/* Specifies whether singular vectors are to be computed in */ +/* factored form: */ +/* = 0: Left singular vector matrix. */ +/* = 1: Right singular vector matrix. */ + +/* NL (input) INTEGER */ +/* The row dimension of the upper block. NL >= 1. */ + +/* NR (input) INTEGER */ +/* The row dimension of the lower block. NR >= 1. */ + +/* SQRE (input) INTEGER */ +/* = 0: the lower block is an NR-by-NR square matrix. */ +/* = 1: the lower block is an NR-by-(NR+1) rectangular matrix. */ + +/* The bidiagonal matrix has row dimension N = NL + NR + 1, */ +/* and column dimension M = N + SQRE. */ + +/* NRHS (input) INTEGER */ +/* The number of columns of B and BX. NRHS must be at least 1. */ + +/* B (input/output) DOUBLE PRECISION array, dimension ( LDB, NRHS ) */ +/* On input, B contains the right hand sides of the least */ +/* squares problem in rows 1 through M. On output, B contains */ +/* the solution X in rows 1 through N. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of B. LDB must be at least */ +/* max(1,MAX( M, N ) ). */ + +/* BX (workspace) DOUBLE PRECISION array, dimension ( LDBX, NRHS ) */ + +/* LDBX (input) INTEGER */ +/* The leading dimension of BX. */ + +/* PERM (input) INTEGER array, dimension ( N ) */ +/* The permutations (from deflation and sorting) applied */ +/* to the two blocks. */ + +/* GIVPTR (input) INTEGER */ +/* The number of Givens rotations which took place in this */ +/* subproblem. */ + +/* GIVCOL (input) INTEGER array, dimension ( LDGCOL, 2 ) */ +/* Each pair of numbers indicates a pair of rows/columns */ +/* involved in a Givens rotation. */ + +/* LDGCOL (input) INTEGER */ +/* The leading dimension of GIVCOL, must be at least N. */ + +/* GIVNUM (input) DOUBLE PRECISION array, dimension ( LDGNUM, 2 ) */ +/* Each number indicates the C or S value used in the */ +/* corresponding Givens rotation. */ + +/* LDGNUM (input) INTEGER */ +/* The leading dimension of arrays DIFR, POLES and */ +/* GIVNUM, must be at least K. */ + +/* POLES (input) DOUBLE PRECISION array, dimension ( LDGNUM, 2 ) */ +/* On entry, POLES(1:K, 1) contains the new singular */ +/* values obtained from solving the secular equation, and */ +/* POLES(1:K, 2) is an array containing the poles in the secular */ +/* equation. */ + +/* DIFL (input) DOUBLE PRECISION array, dimension ( K ). */ +/* On entry, DIFL(I) is the distance between I-th updated */ +/* (undeflated) singular value and the I-th (undeflated) old */ +/* singular value. */ + +/* DIFR (input) DOUBLE PRECISION array, dimension ( LDGNUM, 2 ). */ +/* On entry, DIFR(I, 1) contains the distances between I-th */ +/* updated (undeflated) singular value and the I+1-th */ +/* (undeflated) old singular value. And DIFR(I, 2) is the */ +/* normalizing factor for the I-th right singular vector. */ + +/* Z (input) DOUBLE PRECISION array, dimension ( K ) */ +/* Contain the components of the deflation-adjusted updating row */ +/* vector. */ + +/* K (input) INTEGER */ +/* Contains the dimension of the non-deflated matrix, */ +/* This is the order of the related secular equation. 1 <= K <=N. */ + +/* C (input) DOUBLE PRECISION */ +/* C contains garbage if SQRE =0 and the C-value of a Givens */ +/* rotation related to the right null space if SQRE = 1. */ + +/* S (input) DOUBLE PRECISION */ +/* S contains garbage if SQRE =0 and the S-value of a Givens */ +/* rotation related to the right null space if SQRE = 1. */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension ( K ) */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit. */ +/* < 0: if INFO = -i, the i-th argument had an illegal value. */ + +/* Further Details */ +/* =============== */ + +/* Based on contributions by */ +/* Ming Gu and Ren-Cang Li, Computer Science Division, University of */ +/* California at Berkeley, USA */ +/* Osni Marques, LBNL/NERSC, USA */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + bx_dim1 = *ldbx; + bx_offset = 1 + bx_dim1; + bx -= bx_offset; + --perm; + givcol_dim1 = *ldgcol; + givcol_offset = 1 + givcol_dim1; + givcol -= givcol_offset; + difr_dim1 = *ldgnum; + difr_offset = 1 + difr_dim1; + difr -= difr_offset; + poles_dim1 = *ldgnum; + poles_offset = 1 + poles_dim1; + poles -= poles_offset; + givnum_dim1 = *ldgnum; + givnum_offset = 1 + givnum_dim1; + givnum -= givnum_offset; + --difl; + --z__; + --work; + + /* Function Body */ + *info = 0; + + if (*icompq < 0 || *icompq > 1) { + *info = -1; + } else if (*nl < 1) { + *info = -2; + } else if (*nr < 1) { + *info = -3; + } else if (*sqre < 0 || *sqre > 1) { + *info = -4; + } + + n = *nl + *nr + 1; + + if (*nrhs < 1) { + *info = -5; + } else if (*ldb < n) { + *info = -7; + } else if (*ldbx < n) { + *info = -9; + } else if (*givptr < 0) { + *info = -11; + } else if (*ldgcol < n) { + *info = -13; + } else if (*ldgnum < n) { + *info = -15; + } else if (*k < 1) { + *info = -20; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DLALS0", &i__1); + return 0; + } + + m = n + *sqre; + nlp1 = *nl + 1; + + if (*icompq == 0) { + +/* Apply back orthogonal transformations from the left. */ + +/* Step (1L): apply back the Givens rotations performed. */ + + i__1 = *givptr; + for (i__ = 1; i__ <= i__1; ++i__) { + _starpu_drot_(nrhs, &b[givcol[i__ + (givcol_dim1 << 1)] + b_dim1], ldb, & + b[givcol[i__ + givcol_dim1] + b_dim1], ldb, &givnum[i__ + + (givnum_dim1 << 1)], &givnum[i__ + givnum_dim1]); +/* L10: */ + } + +/* Step (2L): permute rows of B. */ + + _starpu_dcopy_(nrhs, &b[nlp1 + b_dim1], ldb, &bx[bx_dim1 + 1], ldbx); + i__1 = n; + for (i__ = 2; i__ <= i__1; ++i__) { + _starpu_dcopy_(nrhs, &b[perm[i__] + b_dim1], ldb, &bx[i__ + bx_dim1], + ldbx); +/* L20: */ + } + +/* Step (3L): apply the inverse of the left singular vector */ +/* matrix to BX. */ + + if (*k == 1) { + _starpu_dcopy_(nrhs, &bx[bx_offset], ldbx, &b[b_offset], ldb); + if (z__[1] < 0.) { + _starpu_dscal_(nrhs, &c_b5, &b[b_offset], ldb); + } + } else { + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + diflj = difl[j]; + dj = poles[j + poles_dim1]; + dsigj = -poles[j + (poles_dim1 << 1)]; + if (j < *k) { + difrj = -difr[j + difr_dim1]; + dsigjp = -poles[j + 1 + (poles_dim1 << 1)]; + } + if (z__[j] == 0. || poles[j + (poles_dim1 << 1)] == 0.) { + work[j] = 0.; + } else { + work[j] = -poles[j + (poles_dim1 << 1)] * z__[j] / diflj / + (poles[j + (poles_dim1 << 1)] + dj); + } + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + if (z__[i__] == 0. || poles[i__ + (poles_dim1 << 1)] == + 0.) { + work[i__] = 0.; + } else { + work[i__] = poles[i__ + (poles_dim1 << 1)] * z__[i__] + / (_starpu_dlamc3_(&poles[i__ + (poles_dim1 << 1)], & + dsigj) - diflj) / (poles[i__ + (poles_dim1 << + 1)] + dj); + } +/* L30: */ + } + i__2 = *k; + for (i__ = j + 1; i__ <= i__2; ++i__) { + if (z__[i__] == 0. || poles[i__ + (poles_dim1 << 1)] == + 0.) { + work[i__] = 0.; + } else { + work[i__] = poles[i__ + (poles_dim1 << 1)] * z__[i__] + / (_starpu_dlamc3_(&poles[i__ + (poles_dim1 << 1)], & + dsigjp) + difrj) / (poles[i__ + (poles_dim1 << + 1)] + dj); + } +/* L40: */ + } + work[1] = -1.; + temp = _starpu_dnrm2_(k, &work[1], &c__1); + _starpu_dgemv_("T", k, nrhs, &c_b11, &bx[bx_offset], ldbx, &work[1], & + c__1, &c_b13, &b[j + b_dim1], ldb); + _starpu_dlascl_("G", &c__0, &c__0, &temp, &c_b11, &c__1, nrhs, &b[j + + b_dim1], ldb, info); +/* L50: */ + } + } + +/* Move the deflated rows of BX to B also. */ + + if (*k < max(m,n)) { + i__1 = n - *k; + _starpu_dlacpy_("A", &i__1, nrhs, &bx[*k + 1 + bx_dim1], ldbx, &b[*k + 1 + + b_dim1], ldb); + } + } else { + +/* Apply back the right orthogonal transformations. */ + +/* Step (1R): apply back the new right singular vector matrix */ +/* to B. */ + + if (*k == 1) { + _starpu_dcopy_(nrhs, &b[b_offset], ldb, &bx[bx_offset], ldbx); + } else { + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + dsigj = poles[j + (poles_dim1 << 1)]; + if (z__[j] == 0.) { + work[j] = 0.; + } else { + work[j] = -z__[j] / difl[j] / (dsigj + poles[j + + poles_dim1]) / difr[j + (difr_dim1 << 1)]; + } + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + if (z__[j] == 0.) { + work[i__] = 0.; + } else { + d__1 = -poles[i__ + 1 + (poles_dim1 << 1)]; + work[i__] = z__[j] / (_starpu_dlamc3_(&dsigj, &d__1) - difr[ + i__ + difr_dim1]) / (dsigj + poles[i__ + + poles_dim1]) / difr[i__ + (difr_dim1 << 1)]; + } +/* L60: */ + } + i__2 = *k; + for (i__ = j + 1; i__ <= i__2; ++i__) { + if (z__[j] == 0.) { + work[i__] = 0.; + } else { + d__1 = -poles[i__ + (poles_dim1 << 1)]; + work[i__] = z__[j] / (_starpu_dlamc3_(&dsigj, &d__1) - difl[ + i__]) / (dsigj + poles[i__ + poles_dim1]) / + difr[i__ + (difr_dim1 << 1)]; + } +/* L70: */ + } + _starpu_dgemv_("T", k, nrhs, &c_b11, &b[b_offset], ldb, &work[1], & + c__1, &c_b13, &bx[j + bx_dim1], ldbx); +/* L80: */ + } + } + +/* Step (2R): if SQRE = 1, apply back the rotation that is */ +/* related to the right null space of the subproblem. */ + + if (*sqre == 1) { + _starpu_dcopy_(nrhs, &b[m + b_dim1], ldb, &bx[m + bx_dim1], ldbx); + _starpu_drot_(nrhs, &bx[bx_dim1 + 1], ldbx, &bx[m + bx_dim1], ldbx, c__, + s); + } + if (*k < max(m,n)) { + i__1 = n - *k; + _starpu_dlacpy_("A", &i__1, nrhs, &b[*k + 1 + b_dim1], ldb, &bx[*k + 1 + + bx_dim1], ldbx); + } + +/* Step (3R): permute rows of B. */ + + _starpu_dcopy_(nrhs, &bx[bx_dim1 + 1], ldbx, &b[nlp1 + b_dim1], ldb); + if (*sqre == 1) { + _starpu_dcopy_(nrhs, &bx[m + bx_dim1], ldbx, &b[m + b_dim1], ldb); + } + i__1 = n; + for (i__ = 2; i__ <= i__1; ++i__) { + _starpu_dcopy_(nrhs, &bx[i__ + bx_dim1], ldbx, &b[perm[i__] + b_dim1], + ldb); +/* L90: */ + } + +/* Step (4R): apply back the Givens rotations performed. */ + + for (i__ = *givptr; i__ >= 1; --i__) { + d__1 = -givnum[i__ + givnum_dim1]; + _starpu_drot_(nrhs, &b[givcol[i__ + (givcol_dim1 << 1)] + b_dim1], ldb, & + b[givcol[i__ + givcol_dim1] + b_dim1], ldb, &givnum[i__ + + (givnum_dim1 << 1)], &d__1); +/* L100: */ + } + } + + return 0; + +/* End of DLALS0 */ + +} /* _starpu_dlals0_ */ diff --git a/min-dgels/base/SRC/dlalsa.c b/min-dgels/base/SRC/dlalsa.c new file mode 100644 index 0000000..78630eb --- /dev/null +++ b/min-dgels/base/SRC/dlalsa.c @@ -0,0 +1,456 @@ +/* dlalsa.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static doublereal c_b7 = 1.; +static doublereal c_b8 = 0.; +static integer c__2 = 2; + +/* Subroutine */ int _starpu_dlalsa_(integer *icompq, integer *smlsiz, integer *n, + integer *nrhs, doublereal *b, integer *ldb, doublereal *bx, integer * + ldbx, doublereal *u, integer *ldu, doublereal *vt, integer *k, + doublereal *difl, doublereal *difr, doublereal *z__, doublereal * + poles, integer *givptr, integer *givcol, integer *ldgcol, integer * + perm, doublereal *givnum, doublereal *c__, doublereal *s, doublereal * + work, integer *iwork, integer *info) +{ + /* System generated locals */ + integer givcol_dim1, givcol_offset, perm_dim1, perm_offset, b_dim1, + b_offset, bx_dim1, bx_offset, difl_dim1, difl_offset, difr_dim1, + difr_offset, givnum_dim1, givnum_offset, poles_dim1, poles_offset, + u_dim1, u_offset, vt_dim1, vt_offset, z_dim1, z_offset, i__1, + i__2; + + /* Builtin functions */ + integer pow_ii(integer *, integer *); + + /* Local variables */ + integer i__, j, i1, ic, lf, nd, ll, nl, nr, im1, nlf, nrf, lvl, ndb1, + nlp1, lvl2, nrp1, nlvl, sqre; + extern /* Subroutine */ int _starpu_dgemm_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *); + integer inode, ndiml, ndimr; + extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *), _starpu_dlals0_(integer *, integer *, integer *, + integer *, integer *, doublereal *, integer *, doublereal *, + integer *, integer *, integer *, integer *, integer *, doublereal + *, integer *, doublereal *, doublereal *, doublereal *, + doublereal *, integer *, doublereal *, doublereal *, doublereal *, + integer *), _starpu_dlasdt_(integer *, integer *, integer *, integer *, + integer *, integer *, integer *), _starpu_xerbla_(char *, integer *); + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLALSA is an itermediate step in solving the least squares problem */ +/* by computing the SVD of the coefficient matrix in compact form (The */ +/* singular vectors are computed as products of simple orthorgonal */ +/* matrices.). */ + +/* If ICOMPQ = 0, DLALSA applies the inverse of the left singular vector */ +/* matrix of an upper bidiagonal matrix to the right hand side; and if */ +/* ICOMPQ = 1, DLALSA applies the right singular vector matrix to the */ +/* right hand side. The singular vector matrices were generated in */ +/* compact form by DLALSA. */ + +/* Arguments */ +/* ========= */ + + +/* ICOMPQ (input) INTEGER */ +/* Specifies whether the left or the right singular vector */ +/* matrix is involved. */ +/* = 0: Left singular vector matrix */ +/* = 1: Right singular vector matrix */ + +/* SMLSIZ (input) INTEGER */ +/* The maximum size of the subproblems at the bottom of the */ +/* computation tree. */ + +/* N (input) INTEGER */ +/* The row and column dimensions of the upper bidiagonal matrix. */ + +/* NRHS (input) INTEGER */ +/* The number of columns of B and BX. NRHS must be at least 1. */ + +/* B (input/output) DOUBLE PRECISION array, dimension ( LDB, NRHS ) */ +/* On input, B contains the right hand sides of the least */ +/* squares problem in rows 1 through M. */ +/* On output, B contains the solution X in rows 1 through N. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of B in the calling subprogram. */ +/* LDB must be at least max(1,MAX( M, N ) ). */ + +/* BX (output) DOUBLE PRECISION array, dimension ( LDBX, NRHS ) */ +/* On exit, the result of applying the left or right singular */ +/* vector matrix to B. */ + +/* LDBX (input) INTEGER */ +/* The leading dimension of BX. */ + +/* U (input) DOUBLE PRECISION array, dimension ( LDU, SMLSIZ ). */ +/* On entry, U contains the left singular vector matrices of all */ +/* subproblems at the bottom level. */ + +/* LDU (input) INTEGER, LDU = > N. */ +/* The leading dimension of arrays U, VT, DIFL, DIFR, */ +/* POLES, GIVNUM, and Z. */ + +/* VT (input) DOUBLE PRECISION array, dimension ( LDU, SMLSIZ+1 ). */ +/* On entry, VT' contains the right singular vector matrices of */ +/* all subproblems at the bottom level. */ + +/* K (input) INTEGER array, dimension ( N ). */ + +/* DIFL (input) DOUBLE PRECISION array, dimension ( LDU, NLVL ). */ +/* where NLVL = INT(log_2 (N/(SMLSIZ+1))) + 1. */ + +/* DIFR (input) DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ). */ +/* On entry, DIFL(*, I) and DIFR(*, 2 * I -1) record */ +/* distances between singular values on the I-th level and */ +/* singular values on the (I -1)-th level, and DIFR(*, 2 * I) */ +/* record the normalizing factors of the right singular vectors */ +/* matrices of subproblems on I-th level. */ + +/* Z (input) DOUBLE PRECISION array, dimension ( LDU, NLVL ). */ +/* On entry, Z(1, I) contains the components of the deflation- */ +/* adjusted updating row vector for subproblems on the I-th */ +/* level. */ + +/* POLES (input) DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ). */ +/* On entry, POLES(*, 2 * I -1: 2 * I) contains the new and old */ +/* singular values involved in the secular equations on the I-th */ +/* level. */ + +/* GIVPTR (input) INTEGER array, dimension ( N ). */ +/* On entry, GIVPTR( I ) records the number of Givens */ +/* rotations performed on the I-th problem on the computation */ +/* tree. */ + +/* GIVCOL (input) INTEGER array, dimension ( LDGCOL, 2 * NLVL ). */ +/* On entry, for each I, GIVCOL(*, 2 * I - 1: 2 * I) records the */ +/* locations of Givens rotations performed on the I-th level on */ +/* the computation tree. */ + +/* LDGCOL (input) INTEGER, LDGCOL = > N. */ +/* The leading dimension of arrays GIVCOL and PERM. */ + +/* PERM (input) INTEGER array, dimension ( LDGCOL, NLVL ). */ +/* On entry, PERM(*, I) records permutations done on the I-th */ +/* level of the computation tree. */ + +/* GIVNUM (input) DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ). */ +/* On entry, GIVNUM(*, 2 *I -1 : 2 * I) records the C- and S- */ +/* values of Givens rotations performed on the I-th level on the */ +/* computation tree. */ + +/* C (input) DOUBLE PRECISION array, dimension ( N ). */ +/* On entry, if the I-th subproblem is not square, */ +/* C( I ) contains the C-value of a Givens rotation related to */ +/* the right null space of the I-th subproblem. */ + +/* S (input) DOUBLE PRECISION array, dimension ( N ). */ +/* On entry, if the I-th subproblem is not square, */ +/* S( I ) contains the S-value of a Givens rotation related to */ +/* the right null space of the I-th subproblem. */ + +/* WORK (workspace) DOUBLE PRECISION array. */ +/* The dimension must be at least N. */ + +/* IWORK (workspace) INTEGER array. */ +/* The dimension must be at least 3 * N */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit. */ +/* < 0: if INFO = -i, the i-th argument had an illegal value. */ + +/* Further Details */ +/* =============== */ + +/* Based on contributions by */ +/* Ming Gu and Ren-Cang Li, Computer Science Division, University of */ +/* California at Berkeley, USA */ +/* Osni Marques, LBNL/NERSC, USA */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + bx_dim1 = *ldbx; + bx_offset = 1 + bx_dim1; + bx -= bx_offset; + givnum_dim1 = *ldu; + givnum_offset = 1 + givnum_dim1; + givnum -= givnum_offset; + poles_dim1 = *ldu; + poles_offset = 1 + poles_dim1; + poles -= poles_offset; + z_dim1 = *ldu; + z_offset = 1 + z_dim1; + z__ -= z_offset; + difr_dim1 = *ldu; + difr_offset = 1 + difr_dim1; + difr -= difr_offset; + difl_dim1 = *ldu; + difl_offset = 1 + difl_dim1; + difl -= difl_offset; + vt_dim1 = *ldu; + vt_offset = 1 + vt_dim1; + vt -= vt_offset; + u_dim1 = *ldu; + u_offset = 1 + u_dim1; + u -= u_offset; + --k; + --givptr; + perm_dim1 = *ldgcol; + perm_offset = 1 + perm_dim1; + perm -= perm_offset; + givcol_dim1 = *ldgcol; + givcol_offset = 1 + givcol_dim1; + givcol -= givcol_offset; + --c__; + --s; + --work; + --iwork; + + /* Function Body */ + *info = 0; + + if (*icompq < 0 || *icompq > 1) { + *info = -1; + } else if (*smlsiz < 3) { + *info = -2; + } else if (*n < *smlsiz) { + *info = -3; + } else if (*nrhs < 1) { + *info = -4; + } else if (*ldb < *n) { + *info = -6; + } else if (*ldbx < *n) { + *info = -8; + } else if (*ldu < *n) { + *info = -10; + } else if (*ldgcol < *n) { + *info = -19; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DLALSA", &i__1); + return 0; + } + +/* Book-keeping and setting up the computation tree. */ + + inode = 1; + ndiml = inode + *n; + ndimr = ndiml + *n; + + _starpu_dlasdt_(n, &nlvl, &nd, &iwork[inode], &iwork[ndiml], &iwork[ndimr], + smlsiz); + +/* The following code applies back the left singular vector factors. */ +/* For applying back the right singular vector factors, go to 50. */ + + if (*icompq == 1) { + goto L50; + } + +/* The nodes on the bottom level of the tree were solved */ +/* by DLASDQ. The corresponding left and right singular vector */ +/* matrices are in explicit form. First apply back the left */ +/* singular vector matrices. */ + + ndb1 = (nd + 1) / 2; + i__1 = nd; + for (i__ = ndb1; i__ <= i__1; ++i__) { + +/* IC : center row of each node */ +/* NL : number of rows of left subproblem */ +/* NR : number of rows of right subproblem */ +/* NLF: starting row of the left subproblem */ +/* NRF: starting row of the right subproblem */ + + i1 = i__ - 1; + ic = iwork[inode + i1]; + nl = iwork[ndiml + i1]; + nr = iwork[ndimr + i1]; + nlf = ic - nl; + nrf = ic + 1; + _starpu_dgemm_("T", "N", &nl, nrhs, &nl, &c_b7, &u[nlf + u_dim1], ldu, &b[nlf + + b_dim1], ldb, &c_b8, &bx[nlf + bx_dim1], ldbx); + _starpu_dgemm_("T", "N", &nr, nrhs, &nr, &c_b7, &u[nrf + u_dim1], ldu, &b[nrf + + b_dim1], ldb, &c_b8, &bx[nrf + bx_dim1], ldbx); +/* L10: */ + } + +/* Next copy the rows of B that correspond to unchanged rows */ +/* in the bidiagonal matrix to BX. */ + + i__1 = nd; + for (i__ = 1; i__ <= i__1; ++i__) { + ic = iwork[inode + i__ - 1]; + _starpu_dcopy_(nrhs, &b[ic + b_dim1], ldb, &bx[ic + bx_dim1], ldbx); +/* L20: */ + } + +/* Finally go through the left singular vector matrices of all */ +/* the other subproblems bottom-up on the tree. */ + + j = pow_ii(&c__2, &nlvl); + sqre = 0; + + for (lvl = nlvl; lvl >= 1; --lvl) { + lvl2 = (lvl << 1) - 1; + +/* find the first node LF and last node LL on */ +/* the current level LVL */ + + if (lvl == 1) { + lf = 1; + ll = 1; + } else { + i__1 = lvl - 1; + lf = pow_ii(&c__2, &i__1); + ll = (lf << 1) - 1; + } + i__1 = ll; + for (i__ = lf; i__ <= i__1; ++i__) { + im1 = i__ - 1; + ic = iwork[inode + im1]; + nl = iwork[ndiml + im1]; + nr = iwork[ndimr + im1]; + nlf = ic - nl; + nrf = ic + 1; + --j; + _starpu_dlals0_(icompq, &nl, &nr, &sqre, nrhs, &bx[nlf + bx_dim1], ldbx, & + b[nlf + b_dim1], ldb, &perm[nlf + lvl * perm_dim1], & + givptr[j], &givcol[nlf + lvl2 * givcol_dim1], ldgcol, & + givnum[nlf + lvl2 * givnum_dim1], ldu, &poles[nlf + lvl2 * + poles_dim1], &difl[nlf + lvl * difl_dim1], &difr[nlf + + lvl2 * difr_dim1], &z__[nlf + lvl * z_dim1], &k[j], &c__[ + j], &s[j], &work[1], info); +/* L30: */ + } +/* L40: */ + } + goto L90; + +/* ICOMPQ = 1: applying back the right singular vector factors. */ + +L50: + +/* First now go through the right singular vector matrices of all */ +/* the tree nodes top-down. */ + + j = 0; + i__1 = nlvl; + for (lvl = 1; lvl <= i__1; ++lvl) { + lvl2 = (lvl << 1) - 1; + +/* Find the first node LF and last node LL on */ +/* the current level LVL. */ + + if (lvl == 1) { + lf = 1; + ll = 1; + } else { + i__2 = lvl - 1; + lf = pow_ii(&c__2, &i__2); + ll = (lf << 1) - 1; + } + i__2 = lf; + for (i__ = ll; i__ >= i__2; --i__) { + im1 = i__ - 1; + ic = iwork[inode + im1]; + nl = iwork[ndiml + im1]; + nr = iwork[ndimr + im1]; + nlf = ic - nl; + nrf = ic + 1; + if (i__ == ll) { + sqre = 0; + } else { + sqre = 1; + } + ++j; + _starpu_dlals0_(icompq, &nl, &nr, &sqre, nrhs, &b[nlf + b_dim1], ldb, &bx[ + nlf + bx_dim1], ldbx, &perm[nlf + lvl * perm_dim1], & + givptr[j], &givcol[nlf + lvl2 * givcol_dim1], ldgcol, & + givnum[nlf + lvl2 * givnum_dim1], ldu, &poles[nlf + lvl2 * + poles_dim1], &difl[nlf + lvl * difl_dim1], &difr[nlf + + lvl2 * difr_dim1], &z__[nlf + lvl * z_dim1], &k[j], &c__[ + j], &s[j], &work[1], info); +/* L60: */ + } +/* L70: */ + } + +/* The nodes on the bottom level of the tree were solved */ +/* by DLASDQ. The corresponding right singular vector */ +/* matrices are in explicit form. Apply them back. */ + + ndb1 = (nd + 1) / 2; + i__1 = nd; + for (i__ = ndb1; i__ <= i__1; ++i__) { + i1 = i__ - 1; + ic = iwork[inode + i1]; + nl = iwork[ndiml + i1]; + nr = iwork[ndimr + i1]; + nlp1 = nl + 1; + if (i__ == nd) { + nrp1 = nr; + } else { + nrp1 = nr + 1; + } + nlf = ic - nl; + nrf = ic + 1; + _starpu_dgemm_("T", "N", &nlp1, nrhs, &nlp1, &c_b7, &vt[nlf + vt_dim1], ldu, & + b[nlf + b_dim1], ldb, &c_b8, &bx[nlf + bx_dim1], ldbx); + _starpu_dgemm_("T", "N", &nrp1, nrhs, &nrp1, &c_b7, &vt[nrf + vt_dim1], ldu, & + b[nrf + b_dim1], ldb, &c_b8, &bx[nrf + bx_dim1], ldbx); +/* L80: */ + } + +L90: + + return 0; + +/* End of DLALSA */ + +} /* _starpu_dlalsa_ */ diff --git a/min-dgels/base/SRC/dlalsd.c b/min-dgels/base/SRC/dlalsd.c new file mode 100644 index 0000000..4fde5f4 --- /dev/null +++ b/min-dgels/base/SRC/dlalsd.c @@ -0,0 +1,529 @@ +/* dlalsd.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static doublereal c_b6 = 0.; +static integer c__0 = 0; +static doublereal c_b11 = 1.; + +/* Subroutine */ int _starpu_dlalsd_(char *uplo, integer *smlsiz, integer *n, integer + *nrhs, doublereal *d__, doublereal *e, doublereal *b, integer *ldb, + doublereal *rcond, integer *rank, doublereal *work, integer *iwork, + integer *info) +{ + /* System generated locals */ + integer b_dim1, b_offset, i__1, i__2; + doublereal d__1; + + /* Builtin functions */ + double log(doublereal), d_sign(doublereal *, doublereal *); + + /* Local variables */ + integer c__, i__, j, k; + doublereal r__; + integer s, u, z__; + doublereal cs; + integer bx; + doublereal sn; + integer st, vt, nm1, st1; + doublereal eps; + integer iwk; + doublereal tol; + integer difl, difr; + doublereal rcnd; + integer perm, nsub; + extern /* Subroutine */ int _starpu_drot_(integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, doublereal *); + integer nlvl, sqre, bxst; + extern /* Subroutine */ int _starpu_dgemm_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *), + _starpu_dcopy_(integer *, doublereal *, integer *, doublereal *, integer + *); + integer poles, sizei, nsize, nwork, icmpq1, icmpq2; + extern doublereal _starpu_dlamch_(char *); + extern /* Subroutine */ int _starpu_dlasda_(integer *, integer *, integer *, + integer *, doublereal *, doublereal *, doublereal *, integer *, + doublereal *, integer *, doublereal *, doublereal *, doublereal *, + doublereal *, integer *, integer *, integer *, integer *, + doublereal *, doublereal *, doublereal *, doublereal *, integer *, + integer *), _starpu_dlalsa_(integer *, integer *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + doublereal *, doublereal *, integer *, integer *, integer *, + integer *, doublereal *, doublereal *, doublereal *, doublereal *, + integer *, integer *), _starpu_dlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *); + extern integer _starpu_idamax_(integer *, doublereal *, integer *); + extern /* Subroutine */ int _starpu_dlasdq_(char *, integer *, integer *, integer + *, integer *, integer *, doublereal *, doublereal *, doublereal *, + integer *, doublereal *, integer *, doublereal *, integer *, + doublereal *, integer *), _starpu_dlacpy_(char *, integer *, + integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dlartg_(doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *), _starpu_dlaset_(char *, integer *, integer *, + doublereal *, doublereal *, doublereal *, integer *), + _starpu_xerbla_(char *, integer *); + integer givcol; + extern doublereal _starpu_dlanst_(char *, integer *, doublereal *, doublereal *); + extern /* Subroutine */ int _starpu_dlasrt_(char *, integer *, doublereal *, + integer *); + doublereal orgnrm; + integer givnum, givptr, smlszp; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLALSD uses the singular value decomposition of A to solve the least */ +/* squares problem of finding X to minimize the Euclidean norm of each */ +/* column of A*X-B, where A is N-by-N upper bidiagonal, and X and B */ +/* are N-by-NRHS. The solution X overwrites B. */ + +/* The singular values of A smaller than RCOND times the largest */ +/* singular value are treated as zero in solving the least squares */ +/* problem; in this case a minimum norm solution is returned. */ +/* The actual singular values are returned in D in ascending order. */ + +/* This code makes very mild assumptions about floating point */ +/* arithmetic. It will work on machines with a guard digit in */ +/* add/subtract, or on those binary machines without guard digits */ +/* which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2. */ +/* It could conceivably fail on hexadecimal or decimal machines */ +/* without guard digits, but we know of none. */ + +/* Arguments */ +/* ========= */ + +/* UPLO (input) CHARACTER*1 */ +/* = 'U': D and E define an upper bidiagonal matrix. */ +/* = 'L': D and E define a lower bidiagonal matrix. */ + +/* SMLSIZ (input) INTEGER */ +/* The maximum size of the subproblems at the bottom of the */ +/* computation tree. */ + +/* N (input) INTEGER */ +/* The dimension of the bidiagonal matrix. N >= 0. */ + +/* NRHS (input) INTEGER */ +/* The number of columns of B. NRHS must be at least 1. */ + +/* D (input/output) DOUBLE PRECISION array, dimension (N) */ +/* On entry D contains the main diagonal of the bidiagonal */ +/* matrix. On exit, if INFO = 0, D contains its singular values. */ + +/* E (input/output) DOUBLE PRECISION array, dimension (N-1) */ +/* Contains the super-diagonal entries of the bidiagonal matrix. */ +/* On exit, E has been destroyed. */ + +/* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* On input, B contains the right hand sides of the least */ +/* squares problem. On output, B contains the solution X. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of B in the calling subprogram. */ +/* LDB must be at least max(1,N). */ + +/* RCOND (input) DOUBLE PRECISION */ +/* The singular values of A less than or equal to RCOND times */ +/* the largest singular value are treated as zero in solving */ +/* the least squares problem. If RCOND is negative, */ +/* machine precision is used instead. */ +/* For example, if diag(S)*X=B were the least squares problem, */ +/* where diag(S) is a diagonal matrix of singular values, the */ +/* solution would be X(i) = B(i) / S(i) if S(i) is greater than */ +/* RCOND*max(S), and X(i) = 0 if S(i) is less than or equal to */ +/* RCOND*max(S). */ + +/* RANK (output) INTEGER */ +/* The number of singular values of A greater than RCOND times */ +/* the largest singular value. */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension at least */ +/* (9*N + 2*N*SMLSIZ + 8*N*NLVL + N*NRHS + (SMLSIZ+1)**2), */ +/* where NLVL = max(0, INT(log_2 (N/(SMLSIZ+1))) + 1). */ + +/* IWORK (workspace) INTEGER array, dimension at least */ +/* (3*N*NLVL + 11*N) */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit. */ +/* < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > 0: The algorithm failed to compute an singular value while */ +/* working on the submatrix lying in rows and columns */ +/* INFO/(N+1) through MOD(INFO,N+1). */ + +/* Further Details */ +/* =============== */ + +/* Based on contributions by */ +/* Ming Gu and Ren-Cang Li, Computer Science Division, University of */ +/* California at Berkeley, USA */ +/* Osni Marques, LBNL/NERSC, USA */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --d__; + --e; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + --work; + --iwork; + + /* Function Body */ + *info = 0; + + if (*n < 0) { + *info = -3; + } else if (*nrhs < 1) { + *info = -4; + } else if (*ldb < 1 || *ldb < *n) { + *info = -8; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DLALSD", &i__1); + return 0; + } + + eps = _starpu_dlamch_("Epsilon"); + +/* Set up the tolerance. */ + + if (*rcond <= 0. || *rcond >= 1.) { + rcnd = eps; + } else { + rcnd = *rcond; + } + + *rank = 0; + +/* Quick return if possible. */ + + if (*n == 0) { + return 0; + } else if (*n == 1) { + if (d__[1] == 0.) { + _starpu_dlaset_("A", &c__1, nrhs, &c_b6, &c_b6, &b[b_offset], ldb); + } else { + *rank = 1; + _starpu_dlascl_("G", &c__0, &c__0, &d__[1], &c_b11, &c__1, nrhs, &b[ + b_offset], ldb, info); + d__[1] = abs(d__[1]); + } + return 0; + } + +/* Rotate the matrix if it is lower bidiagonal. */ + + if (*(unsigned char *)uplo == 'L') { + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + _starpu_dlartg_(&d__[i__], &e[i__], &cs, &sn, &r__); + d__[i__] = r__; + e[i__] = sn * d__[i__ + 1]; + d__[i__ + 1] = cs * d__[i__ + 1]; + if (*nrhs == 1) { + _starpu_drot_(&c__1, &b[i__ + b_dim1], &c__1, &b[i__ + 1 + b_dim1], & + c__1, &cs, &sn); + } else { + work[(i__ << 1) - 1] = cs; + work[i__ * 2] = sn; + } +/* L10: */ + } + if (*nrhs > 1) { + i__1 = *nrhs; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = *n - 1; + for (j = 1; j <= i__2; ++j) { + cs = work[(j << 1) - 1]; + sn = work[j * 2]; + _starpu_drot_(&c__1, &b[j + i__ * b_dim1], &c__1, &b[j + 1 + i__ * + b_dim1], &c__1, &cs, &sn); +/* L20: */ + } +/* L30: */ + } + } + } + +/* Scale. */ + + nm1 = *n - 1; + orgnrm = _starpu_dlanst_("M", n, &d__[1], &e[1]); + if (orgnrm == 0.) { + _starpu_dlaset_("A", n, nrhs, &c_b6, &c_b6, &b[b_offset], ldb); + return 0; + } + + _starpu_dlascl_("G", &c__0, &c__0, &orgnrm, &c_b11, n, &c__1, &d__[1], n, info); + _starpu_dlascl_("G", &c__0, &c__0, &orgnrm, &c_b11, &nm1, &c__1, &e[1], &nm1, + info); + +/* If N is smaller than the minimum divide size SMLSIZ, then solve */ +/* the problem with another solver. */ + + if (*n <= *smlsiz) { + nwork = *n * *n + 1; + _starpu_dlaset_("A", n, n, &c_b6, &c_b11, &work[1], n); + _starpu_dlasdq_("U", &c__0, n, n, &c__0, nrhs, &d__[1], &e[1], &work[1], n, & + work[1], n, &b[b_offset], ldb, &work[nwork], info); + if (*info != 0) { + return 0; + } + tol = rcnd * (d__1 = d__[_starpu_idamax_(n, &d__[1], &c__1)], abs(d__1)); + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + if (d__[i__] <= tol) { + _starpu_dlaset_("A", &c__1, nrhs, &c_b6, &c_b6, &b[i__ + b_dim1], ldb); + } else { + _starpu_dlascl_("G", &c__0, &c__0, &d__[i__], &c_b11, &c__1, nrhs, &b[ + i__ + b_dim1], ldb, info); + ++(*rank); + } +/* L40: */ + } + _starpu_dgemm_("T", "N", n, nrhs, n, &c_b11, &work[1], n, &b[b_offset], ldb, & + c_b6, &work[nwork], n); + _starpu_dlacpy_("A", n, nrhs, &work[nwork], n, &b[b_offset], ldb); + +/* Unscale. */ + + _starpu_dlascl_("G", &c__0, &c__0, &c_b11, &orgnrm, n, &c__1, &d__[1], n, + info); + _starpu_dlasrt_("D", n, &d__[1], info); + _starpu_dlascl_("G", &c__0, &c__0, &orgnrm, &c_b11, n, nrhs, &b[b_offset], + ldb, info); + + return 0; + } + +/* Book-keeping and setting up some constants. */ + + nlvl = (integer) (log((doublereal) (*n) / (doublereal) (*smlsiz + 1)) / + log(2.)) + 1; + + smlszp = *smlsiz + 1; + + u = 1; + vt = *smlsiz * *n + 1; + difl = vt + smlszp * *n; + difr = difl + nlvl * *n; + z__ = difr + (nlvl * *n << 1); + c__ = z__ + nlvl * *n; + s = c__ + *n; + poles = s + *n; + givnum = poles + (nlvl << 1) * *n; + bx = givnum + (nlvl << 1) * *n; + nwork = bx + *n * *nrhs; + + sizei = *n + 1; + k = sizei + *n; + givptr = k + *n; + perm = givptr + *n; + givcol = perm + nlvl * *n; + iwk = givcol + (nlvl * *n << 1); + + st = 1; + sqre = 0; + icmpq1 = 1; + icmpq2 = 0; + nsub = 0; + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + if ((d__1 = d__[i__], abs(d__1)) < eps) { + d__[i__] = d_sign(&eps, &d__[i__]); + } +/* L50: */ + } + + i__1 = nm1; + for (i__ = 1; i__ <= i__1; ++i__) { + if ((d__1 = e[i__], abs(d__1)) < eps || i__ == nm1) { + ++nsub; + iwork[nsub] = st; + +/* Subproblem found. First determine its size and then */ +/* apply divide and conquer on it. */ + + if (i__ < nm1) { + +/* A subproblem with E(I) small for I < NM1. */ + + nsize = i__ - st + 1; + iwork[sizei + nsub - 1] = nsize; + } else if ((d__1 = e[i__], abs(d__1)) >= eps) { + +/* A subproblem with E(NM1) not too small but I = NM1. */ + + nsize = *n - st + 1; + iwork[sizei + nsub - 1] = nsize; + } else { + +/* A subproblem with E(NM1) small. This implies an */ +/* 1-by-1 subproblem at D(N), which is not solved */ +/* explicitly. */ + + nsize = i__ - st + 1; + iwork[sizei + nsub - 1] = nsize; + ++nsub; + iwork[nsub] = *n; + iwork[sizei + nsub - 1] = 1; + _starpu_dcopy_(nrhs, &b[*n + b_dim1], ldb, &work[bx + nm1], n); + } + st1 = st - 1; + if (nsize == 1) { + +/* This is a 1-by-1 subproblem and is not solved */ +/* explicitly. */ + + _starpu_dcopy_(nrhs, &b[st + b_dim1], ldb, &work[bx + st1], n); + } else if (nsize <= *smlsiz) { + +/* This is a small subproblem and is solved by DLASDQ. */ + + _starpu_dlaset_("A", &nsize, &nsize, &c_b6, &c_b11, &work[vt + st1], + n); + _starpu_dlasdq_("U", &c__0, &nsize, &nsize, &c__0, nrhs, &d__[st], &e[ + st], &work[vt + st1], n, &work[nwork], n, &b[st + + b_dim1], ldb, &work[nwork], info); + if (*info != 0) { + return 0; + } + _starpu_dlacpy_("A", &nsize, nrhs, &b[st + b_dim1], ldb, &work[bx + + st1], n); + } else { + +/* A large problem. Solve it using divide and conquer. */ + + _starpu_dlasda_(&icmpq1, smlsiz, &nsize, &sqre, &d__[st], &e[st], & + work[u + st1], n, &work[vt + st1], &iwork[k + st1], & + work[difl + st1], &work[difr + st1], &work[z__ + st1], + &work[poles + st1], &iwork[givptr + st1], &iwork[ + givcol + st1], n, &iwork[perm + st1], &work[givnum + + st1], &work[c__ + st1], &work[s + st1], &work[nwork], + &iwork[iwk], info); + if (*info != 0) { + return 0; + } + bxst = bx + st1; + _starpu_dlalsa_(&icmpq2, smlsiz, &nsize, nrhs, &b[st + b_dim1], ldb, & + work[bxst], n, &work[u + st1], n, &work[vt + st1], & + iwork[k + st1], &work[difl + st1], &work[difr + st1], + &work[z__ + st1], &work[poles + st1], &iwork[givptr + + st1], &iwork[givcol + st1], n, &iwork[perm + st1], & + work[givnum + st1], &work[c__ + st1], &work[s + st1], + &work[nwork], &iwork[iwk], info); + if (*info != 0) { + return 0; + } + } + st = i__ + 1; + } +/* L60: */ + } + +/* Apply the singular values and treat the tiny ones as zero. */ + + tol = rcnd * (d__1 = d__[_starpu_idamax_(n, &d__[1], &c__1)], abs(d__1)); + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Some of the elements in D can be negative because 1-by-1 */ +/* subproblems were not solved explicitly. */ + + if ((d__1 = d__[i__], abs(d__1)) <= tol) { + _starpu_dlaset_("A", &c__1, nrhs, &c_b6, &c_b6, &work[bx + i__ - 1], n); + } else { + ++(*rank); + _starpu_dlascl_("G", &c__0, &c__0, &d__[i__], &c_b11, &c__1, nrhs, &work[ + bx + i__ - 1], n, info); + } + d__[i__] = (d__1 = d__[i__], abs(d__1)); +/* L70: */ + } + +/* Now apply back the right singular vectors. */ + + icmpq2 = 1; + i__1 = nsub; + for (i__ = 1; i__ <= i__1; ++i__) { + st = iwork[i__]; + st1 = st - 1; + nsize = iwork[sizei + i__ - 1]; + bxst = bx + st1; + if (nsize == 1) { + _starpu_dcopy_(nrhs, &work[bxst], n, &b[st + b_dim1], ldb); + } else if (nsize <= *smlsiz) { + _starpu_dgemm_("T", "N", &nsize, nrhs, &nsize, &c_b11, &work[vt + st1], n, + &work[bxst], n, &c_b6, &b[st + b_dim1], ldb); + } else { + _starpu_dlalsa_(&icmpq2, smlsiz, &nsize, nrhs, &work[bxst], n, &b[st + + b_dim1], ldb, &work[u + st1], n, &work[vt + st1], &iwork[ + k + st1], &work[difl + st1], &work[difr + st1], &work[z__ + + st1], &work[poles + st1], &iwork[givptr + st1], &iwork[ + givcol + st1], n, &iwork[perm + st1], &work[givnum + st1], + &work[c__ + st1], &work[s + st1], &work[nwork], &iwork[ + iwk], info); + if (*info != 0) { + return 0; + } + } +/* L80: */ + } + +/* Unscale and sort the singular values. */ + + _starpu_dlascl_("G", &c__0, &c__0, &c_b11, &orgnrm, n, &c__1, &d__[1], n, info); + _starpu_dlasrt_("D", n, &d__[1], info); + _starpu_dlascl_("G", &c__0, &c__0, &orgnrm, &c_b11, n, nrhs, &b[b_offset], ldb, + info); + + return 0; + +/* End of DLALSD */ + +} /* _starpu_dlalsd_ */ diff --git a/min-dgels/base/SRC/dlamrg.c b/min-dgels/base/SRC/dlamrg.c new file mode 100644 index 0000000..e308648 --- /dev/null +++ b/min-dgels/base/SRC/dlamrg.c @@ -0,0 +1,131 @@ +/* dlamrg.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dlamrg_(integer *n1, integer *n2, doublereal *a, integer + *dtrd1, integer *dtrd2, integer *index) +{ + /* System generated locals */ + integer i__1; + + /* Local variables */ + integer i__, ind1, ind2, n1sv, n2sv; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLAMRG will create a permutation list which will merge the elements */ +/* of A (which is composed of two independently sorted sets) into a */ +/* single set which is sorted in ascending order. */ + +/* Arguments */ +/* ========= */ + +/* N1 (input) INTEGER */ +/* N2 (input) INTEGER */ +/* These arguements contain the respective lengths of the two */ +/* sorted lists to be merged. */ + +/* A (input) DOUBLE PRECISION array, dimension (N1+N2) */ +/* The first N1 elements of A contain a list of numbers which */ +/* are sorted in either ascending or descending order. Likewise */ +/* for the final N2 elements. */ + +/* DTRD1 (input) INTEGER */ +/* DTRD2 (input) INTEGER */ +/* These are the strides to be taken through the array A. */ +/* Allowable strides are 1 and -1. They indicate whether a */ +/* subset of A is sorted in ascending (DTRDx = 1) or descending */ +/* (DTRDx = -1) order. */ + +/* INDEX (output) INTEGER array, dimension (N1+N2) */ +/* On exit this array will contain a permutation such that */ +/* if B( I ) = A( INDEX( I ) ) for I=1,N1+N2, then B will be */ +/* sorted in ascending order. */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + --index; + --a; + + /* Function Body */ + n1sv = *n1; + n2sv = *n2; + if (*dtrd1 > 0) { + ind1 = 1; + } else { + ind1 = *n1; + } + if (*dtrd2 > 0) { + ind2 = *n1 + 1; + } else { + ind2 = *n1 + *n2; + } + i__ = 1; +/* while ( (N1SV > 0) & (N2SV > 0) ) */ +L10: + if (n1sv > 0 && n2sv > 0) { + if (a[ind1] <= a[ind2]) { + index[i__] = ind1; + ++i__; + ind1 += *dtrd1; + --n1sv; + } else { + index[i__] = ind2; + ++i__; + ind2 += *dtrd2; + --n2sv; + } + goto L10; + } +/* end while */ + if (n1sv == 0) { + i__1 = n2sv; + for (n1sv = 1; n1sv <= i__1; ++n1sv) { + index[i__] = ind2; + ++i__; + ind2 += *dtrd2; +/* L20: */ + } + } else { +/* N2SV .EQ. 0 */ + i__1 = n1sv; + for (n2sv = 1; n2sv <= i__1; ++n2sv) { + index[i__] = ind1; + ++i__; + ind1 += *dtrd1; +/* L30: */ + } + } + + return 0; + +/* End of DLAMRG */ + +} /* _starpu_dlamrg_ */ diff --git a/min-dgels/base/SRC/dlaneg.c b/min-dgels/base/SRC/dlaneg.c new file mode 100644 index 0000000..b03e191 --- /dev/null +++ b/min-dgels/base/SRC/dlaneg.c @@ -0,0 +1,218 @@ +/* dlaneg.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +integer _starpu_dlaneg_(integer *n, doublereal *d__, doublereal *lld, doublereal * + sigma, doublereal *pivmin, integer *r__) +{ + /* System generated locals */ + integer ret_val, i__1, i__2, i__3, i__4; + + /* Local variables */ + integer j; + doublereal p, t; + integer bj; + doublereal tmp; + integer neg1, neg2; + doublereal bsav, gamma, dplus; + extern logical _starpu_disnan_(doublereal *); + integer negcnt; + logical sawnan; + doublereal dminus; + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLANEG computes the Sturm count, the number of negative pivots */ +/* encountered while factoring tridiagonal T - sigma I = L D L^T. */ +/* This implementation works directly on the factors without forming */ +/* the tridiagonal matrix T. The Sturm count is also the number of */ +/* eigenvalues of T less than sigma. */ + +/* This routine is called from DLARRB. */ + +/* The current routine does not use the PIVMIN parameter but rather */ +/* requires IEEE-754 propagation of Infinities and NaNs. This */ +/* routine also has no input range restrictions but does require */ +/* default exception handling such that x/0 produces Inf when x is */ +/* non-zero, and Inf/Inf produces NaN. For more information, see: */ + +/* Marques, Riedy, and Voemel, "Benefits of IEEE-754 Features in */ +/* Modern Symmetric Tridiagonal Eigensolvers," SIAM Journal on */ +/* Scientific Computing, v28, n5, 2006. DOI 10.1137/050641624 */ +/* (Tech report version in LAWN 172 with the same title.) */ + +/* Arguments */ +/* ========= */ + +/* N (input) INTEGER */ +/* The order of the matrix. */ + +/* D (input) DOUBLE PRECISION array, dimension (N) */ +/* The N diagonal elements of the diagonal matrix D. */ + +/* LLD (input) DOUBLE PRECISION array, dimension (N-1) */ +/* The (N-1) elements L(i)*L(i)*D(i). */ + +/* SIGMA (input) DOUBLE PRECISION */ +/* Shift amount in T - sigma I = L D L^T. */ + +/* PIVMIN (input) DOUBLE PRECISION */ +/* The minimum pivot in the Sturm sequence. May be used */ +/* when zero pivots are encountered on non-IEEE-754 */ +/* architectures. */ + +/* R (input) INTEGER */ +/* The twist index for the twisted factorization that is used */ +/* for the negcount. */ + +/* Further Details */ +/* =============== */ + +/* Based on contributions by */ +/* Osni Marques, LBNL/NERSC, USA */ +/* Christof Voemel, University of California, Berkeley, USA */ +/* Jason Riedy, University of California, Berkeley, USA */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* Some architectures propagate Infinities and NaNs very slowly, so */ +/* the code computes counts in BLKLEN chunks. Then a NaN can */ +/* propagate at most BLKLEN columns before being detected. This is */ +/* not a general tuning parameter; it needs only to be just large */ +/* enough that the overhead is tiny in common cases. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + /* Parameter adjustments */ + --lld; + --d__; + + /* Function Body */ + negcnt = 0; +/* I) upper part: L D L^T - SIGMA I = L+ D+ L+^T */ + t = -(*sigma); + i__1 = *r__ - 1; + for (bj = 1; bj <= i__1; bj += 128) { + neg1 = 0; + bsav = t; +/* Computing MIN */ + i__3 = bj + 127, i__4 = *r__ - 1; + i__2 = min(i__3,i__4); + for (j = bj; j <= i__2; ++j) { + dplus = d__[j] + t; + if (dplus < 0.) { + ++neg1; + } + tmp = t / dplus; + t = tmp * lld[j] - *sigma; +/* L21: */ + } + sawnan = _starpu_disnan_(&t); +/* Run a slower version of the above loop if a NaN is detected. */ +/* A NaN should occur only with a zero pivot after an infinite */ +/* pivot. In that case, substituting 1 for T/DPLUS is the */ +/* correct limit. */ + if (sawnan) { + neg1 = 0; + t = bsav; +/* Computing MIN */ + i__3 = bj + 127, i__4 = *r__ - 1; + i__2 = min(i__3,i__4); + for (j = bj; j <= i__2; ++j) { + dplus = d__[j] + t; + if (dplus < 0.) { + ++neg1; + } + tmp = t / dplus; + if (_starpu_disnan_(&tmp)) { + tmp = 1.; + } + t = tmp * lld[j] - *sigma; +/* L22: */ + } + } + negcnt += neg1; +/* L210: */ + } + +/* II) lower part: L D L^T - SIGMA I = U- D- U-^T */ + p = d__[*n] - *sigma; + i__1 = *r__; + for (bj = *n - 1; bj >= i__1; bj += -128) { + neg2 = 0; + bsav = p; +/* Computing MAX */ + i__3 = bj - 127; + i__2 = max(i__3,*r__); + for (j = bj; j >= i__2; --j) { + dminus = lld[j] + p; + if (dminus < 0.) { + ++neg2; + } + tmp = p / dminus; + p = tmp * d__[j] - *sigma; +/* L23: */ + } + sawnan = _starpu_disnan_(&p); +/* As above, run a slower version that substitutes 1 for Inf/Inf. */ + + if (sawnan) { + neg2 = 0; + p = bsav; +/* Computing MAX */ + i__3 = bj - 127; + i__2 = max(i__3,*r__); + for (j = bj; j >= i__2; --j) { + dminus = lld[j] + p; + if (dminus < 0.) { + ++neg2; + } + tmp = p / dminus; + if (_starpu_disnan_(&tmp)) { + tmp = 1.; + } + p = tmp * d__[j] - *sigma; +/* L24: */ + } + } + negcnt += neg2; +/* L230: */ + } + +/* III) Twist index */ +/* T was shifted by SIGMA initially. */ + gamma = t + *sigma + p; + if (gamma < 0.) { + ++negcnt; + } + ret_val = negcnt; + return ret_val; +} /* _starpu_dlaneg_ */ diff --git a/min-dgels/base/SRC/dlangb.c b/min-dgels/base/SRC/dlangb.c new file mode 100644 index 0000000..6d229fc --- /dev/null +++ b/min-dgels/base/SRC/dlangb.c @@ -0,0 +1,226 @@ +/* dlangb.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +doublereal _starpu_dlangb_(char *norm, integer *n, integer *kl, integer *ku, + doublereal *ab, integer *ldab, doublereal *work) +{ + /* System generated locals */ + integer ab_dim1, ab_offset, i__1, i__2, i__3, i__4, i__5, i__6; + doublereal ret_val, d__1, d__2, d__3; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + integer i__, j, k, l; + doublereal sum, scale; + extern logical _starpu_lsame_(char *, char *); + doublereal value; + extern /* Subroutine */ int _starpu_dlassq_(integer *, doublereal *, integer *, + doublereal *, doublereal *); + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLANGB returns the value of the one norm, or the Frobenius norm, or */ +/* the infinity norm, or the element of largest absolute value of an */ +/* n by n band matrix A, with kl sub-diagonals and ku super-diagonals. */ + +/* Description */ +/* =========== */ + +/* DLANGB returns the value */ + +/* DLANGB = ( max(abs(A(i,j))), NORM = 'M' or 'm' */ +/* ( */ +/* ( norm1(A), NORM = '1', 'O' or 'o' */ +/* ( */ +/* ( normI(A), NORM = 'I' or 'i' */ +/* ( */ +/* ( normF(A), NORM = 'F', 'f', 'E' or 'e' */ + +/* where norm1 denotes the one norm of a matrix (maximum column sum), */ +/* normI denotes the infinity norm of a matrix (maximum row sum) and */ +/* normF denotes the Frobenius norm of a matrix (square root of sum of */ +/* squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. */ + +/* Arguments */ +/* ========= */ + +/* NORM (input) CHARACTER*1 */ +/* Specifies the value to be returned in DLANGB as described */ +/* above. */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. When N = 0, DLANGB is */ +/* set to zero. */ + +/* KL (input) INTEGER */ +/* The number of sub-diagonals of the matrix A. KL >= 0. */ + +/* KU (input) INTEGER */ +/* The number of super-diagonals of the matrix A. KU >= 0. */ + +/* AB (input) DOUBLE PRECISION array, dimension (LDAB,N) */ +/* The band matrix A, stored in rows 1 to KL+KU+1. The j-th */ +/* column of A is stored in the j-th column of the array AB as */ +/* follows: */ +/* AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(n,j+kl). */ + +/* LDAB (input) INTEGER */ +/* The leading dimension of the array AB. LDAB >= KL+KU+1. */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (MAX(1,LWORK)), */ +/* where LWORK >= N when NORM = 'I'; otherwise, WORK is not */ +/* referenced. */ + +/* ===================================================================== */ + + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + ab_dim1 = *ldab; + ab_offset = 1 + ab_dim1; + ab -= ab_offset; + --work; + + /* Function Body */ + if (*n == 0) { + value = 0.; + } else if (_starpu_lsame_(norm, "M")) { + +/* Find max(abs(A(i,j))). */ + + value = 0.; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MAX */ + i__2 = *ku + 2 - j; +/* Computing MIN */ + i__4 = *n + *ku + 1 - j, i__5 = *kl + *ku + 1; + i__3 = min(i__4,i__5); + for (i__ = max(i__2,1); i__ <= i__3; ++i__) { +/* Computing MAX */ + d__2 = value, d__3 = (d__1 = ab[i__ + j * ab_dim1], abs(d__1)) + ; + value = max(d__2,d__3); +/* L10: */ + } +/* L20: */ + } + } else if (_starpu_lsame_(norm, "O") || *(unsigned char *) + norm == '1') { + +/* Find norm1(A). */ + + value = 0.; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = 0.; +/* Computing MAX */ + i__3 = *ku + 2 - j; +/* Computing MIN */ + i__4 = *n + *ku + 1 - j, i__5 = *kl + *ku + 1; + i__2 = min(i__4,i__5); + for (i__ = max(i__3,1); i__ <= i__2; ++i__) { + sum += (d__1 = ab[i__ + j * ab_dim1], abs(d__1)); +/* L30: */ + } + value = max(value,sum); +/* L40: */ + } + } else if (_starpu_lsame_(norm, "I")) { + +/* Find normI(A). */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + work[i__] = 0.; +/* L50: */ + } + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + k = *ku + 1 - j; +/* Computing MAX */ + i__2 = 1, i__3 = j - *ku; +/* Computing MIN */ + i__5 = *n, i__6 = j + *kl; + i__4 = min(i__5,i__6); + for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) { + work[i__] += (d__1 = ab[k + i__ + j * ab_dim1], abs(d__1)); +/* L60: */ + } +/* L70: */ + } + value = 0.; + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing MAX */ + d__1 = value, d__2 = work[i__]; + value = max(d__1,d__2); +/* L80: */ + } + } else if (_starpu_lsame_(norm, "F") || _starpu_lsame_(norm, "E")) { + +/* Find normF(A). */ + + scale = 0.; + sum = 1.; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MAX */ + i__4 = 1, i__2 = j - *ku; + l = max(i__4,i__2); + k = *ku + 1 - j + l; +/* Computing MIN */ + i__2 = *n, i__3 = j + *kl; + i__4 = min(i__2,i__3) - l + 1; + _starpu_dlassq_(&i__4, &ab[k + j * ab_dim1], &c__1, &scale, &sum); +/* L90: */ + } + value = scale * sqrt(sum); + } + + ret_val = value; + return ret_val; + +/* End of DLANGB */ + +} /* _starpu_dlangb_ */ diff --git a/min-dgels/base/SRC/dlange.c b/min-dgels/base/SRC/dlange.c new file mode 100644 index 0000000..1a7c787 --- /dev/null +++ b/min-dgels/base/SRC/dlange.c @@ -0,0 +1,199 @@ +/* dlange.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +doublereal _starpu_dlange_(char *norm, integer *m, integer *n, doublereal *a, integer + *lda, doublereal *work) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + doublereal ret_val, d__1, d__2, d__3; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + integer i__, j; + doublereal sum, scale; + extern logical _starpu_lsame_(char *, char *); + doublereal value; + extern /* Subroutine */ int _starpu_dlassq_(integer *, doublereal *, integer *, + doublereal *, doublereal *); + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLANGE returns the value of the one norm, or the Frobenius norm, or */ +/* the infinity norm, or the element of largest absolute value of a */ +/* real matrix A. */ + +/* Description */ +/* =========== */ + +/* DLANGE returns the value */ + +/* DLANGE = ( max(abs(A(i,j))), NORM = 'M' or 'm' */ +/* ( */ +/* ( norm1(A), NORM = '1', 'O' or 'o' */ +/* ( */ +/* ( normI(A), NORM = 'I' or 'i' */ +/* ( */ +/* ( normF(A), NORM = 'F', 'f', 'E' or 'e' */ + +/* where norm1 denotes the one norm of a matrix (maximum column sum), */ +/* normI denotes the infinity norm of a matrix (maximum row sum) and */ +/* normF denotes the Frobenius norm of a matrix (square root of sum of */ +/* squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. */ + +/* Arguments */ +/* ========= */ + +/* NORM (input) CHARACTER*1 */ +/* Specifies the value to be returned in DLANGE as described */ +/* above. */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix A. M >= 0. When M = 0, */ +/* DLANGE is set to zero. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix A. N >= 0. When N = 0, */ +/* DLANGE is set to zero. */ + +/* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ +/* The m by n matrix A. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(M,1). */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (MAX(1,LWORK)), */ +/* where LWORK >= M when NORM = 'I'; otherwise, WORK is not */ +/* referenced. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --work; + + /* Function Body */ + if (min(*m,*n) == 0) { + value = 0.; + } else if (_starpu_lsame_(norm, "M")) { + +/* Find max(abs(A(i,j))). */ + + value = 0.; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { +/* Computing MAX */ + d__2 = value, d__3 = (d__1 = a[i__ + j * a_dim1], abs(d__1)); + value = max(d__2,d__3); +/* L10: */ + } +/* L20: */ + } + } else if (_starpu_lsame_(norm, "O") || *(unsigned char *) + norm == '1') { + +/* Find norm1(A). */ + + value = 0.; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = 0.; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + sum += (d__1 = a[i__ + j * a_dim1], abs(d__1)); +/* L30: */ + } + value = max(value,sum); +/* L40: */ + } + } else if (_starpu_lsame_(norm, "I")) { + +/* Find normI(A). */ + + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + work[i__] = 0.; +/* L50: */ + } + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + work[i__] += (d__1 = a[i__ + j * a_dim1], abs(d__1)); +/* L60: */ + } +/* L70: */ + } + value = 0.; + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing MAX */ + d__1 = value, d__2 = work[i__]; + value = max(d__1,d__2); +/* L80: */ + } + } else if (_starpu_lsame_(norm, "F") || _starpu_lsame_(norm, "E")) { + +/* Find normF(A). */ + + scale = 0.; + sum = 1.; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + _starpu_dlassq_(m, &a[j * a_dim1 + 1], &c__1, &scale, &sum); +/* L90: */ + } + value = scale * sqrt(sum); + } + + ret_val = value; + return ret_val; + +/* End of DLANGE */ + +} /* _starpu_dlange_ */ diff --git a/min-dgels/base/SRC/dlangt.c b/min-dgels/base/SRC/dlangt.c new file mode 100644 index 0000000..7222765 --- /dev/null +++ b/min-dgels/base/SRC/dlangt.c @@ -0,0 +1,195 @@ +/* dlangt.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +doublereal _starpu_dlangt_(char *norm, integer *n, doublereal *dl, doublereal *d__, + doublereal *du) +{ + /* System generated locals */ + integer i__1; + doublereal ret_val, d__1, d__2, d__3, d__4, d__5; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + integer i__; + doublereal sum, scale; + extern logical _starpu_lsame_(char *, char *); + doublereal anorm; + extern /* Subroutine */ int _starpu_dlassq_(integer *, doublereal *, integer *, + doublereal *, doublereal *); + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLANGT returns the value of the one norm, or the Frobenius norm, or */ +/* the infinity norm, or the element of largest absolute value of a */ +/* real tridiagonal matrix A. */ + +/* Description */ +/* =========== */ + +/* DLANGT returns the value */ + +/* DLANGT = ( max(abs(A(i,j))), NORM = 'M' or 'm' */ +/* ( */ +/* ( norm1(A), NORM = '1', 'O' or 'o' */ +/* ( */ +/* ( normI(A), NORM = 'I' or 'i' */ +/* ( */ +/* ( normF(A), NORM = 'F', 'f', 'E' or 'e' */ + +/* where norm1 denotes the one norm of a matrix (maximum column sum), */ +/* normI denotes the infinity norm of a matrix (maximum row sum) and */ +/* normF denotes the Frobenius norm of a matrix (square root of sum of */ +/* squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. */ + +/* Arguments */ +/* ========= */ + +/* NORM (input) CHARACTER*1 */ +/* Specifies the value to be returned in DLANGT as described */ +/* above. */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. When N = 0, DLANGT is */ +/* set to zero. */ + +/* DL (input) DOUBLE PRECISION array, dimension (N-1) */ +/* The (n-1) sub-diagonal elements of A. */ + +/* D (input) DOUBLE PRECISION array, dimension (N) */ +/* The diagonal elements of A. */ + +/* DU (input) DOUBLE PRECISION array, dimension (N-1) */ +/* The (n-1) super-diagonal elements of A. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + --du; + --d__; + --dl; + + /* Function Body */ + if (*n <= 0) { + anorm = 0.; + } else if (_starpu_lsame_(norm, "M")) { + +/* Find max(abs(A(i,j))). */ + + anorm = (d__1 = d__[*n], abs(d__1)); + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing MAX */ + d__2 = anorm, d__3 = (d__1 = dl[i__], abs(d__1)); + anorm = max(d__2,d__3); +/* Computing MAX */ + d__2 = anorm, d__3 = (d__1 = d__[i__], abs(d__1)); + anorm = max(d__2,d__3); +/* Computing MAX */ + d__2 = anorm, d__3 = (d__1 = du[i__], abs(d__1)); + anorm = max(d__2,d__3); +/* L10: */ + } + } else if (_starpu_lsame_(norm, "O") || *(unsigned char *) + norm == '1') { + +/* Find norm1(A). */ + + if (*n == 1) { + anorm = abs(d__[1]); + } else { +/* Computing MAX */ + d__3 = abs(d__[1]) + abs(dl[1]), d__4 = (d__1 = d__[*n], abs(d__1) + ) + (d__2 = du[*n - 1], abs(d__2)); + anorm = max(d__3,d__4); + i__1 = *n - 1; + for (i__ = 2; i__ <= i__1; ++i__) { +/* Computing MAX */ + d__4 = anorm, d__5 = (d__1 = d__[i__], abs(d__1)) + (d__2 = + dl[i__], abs(d__2)) + (d__3 = du[i__ - 1], abs(d__3)); + anorm = max(d__4,d__5); +/* L20: */ + } + } + } else if (_starpu_lsame_(norm, "I")) { + +/* Find normI(A). */ + + if (*n == 1) { + anorm = abs(d__[1]); + } else { +/* Computing MAX */ + d__3 = abs(d__[1]) + abs(du[1]), d__4 = (d__1 = d__[*n], abs(d__1) + ) + (d__2 = dl[*n - 1], abs(d__2)); + anorm = max(d__3,d__4); + i__1 = *n - 1; + for (i__ = 2; i__ <= i__1; ++i__) { +/* Computing MAX */ + d__4 = anorm, d__5 = (d__1 = d__[i__], abs(d__1)) + (d__2 = + du[i__], abs(d__2)) + (d__3 = dl[i__ - 1], abs(d__3)); + anorm = max(d__4,d__5); +/* L30: */ + } + } + } else if (_starpu_lsame_(norm, "F") || _starpu_lsame_(norm, "E")) { + +/* Find normF(A). */ + + scale = 0.; + sum = 1.; + _starpu_dlassq_(n, &d__[1], &c__1, &scale, &sum); + if (*n > 1) { + i__1 = *n - 1; + _starpu_dlassq_(&i__1, &dl[1], &c__1, &scale, &sum); + i__1 = *n - 1; + _starpu_dlassq_(&i__1, &du[1], &c__1, &scale, &sum); + } + anorm = scale * sqrt(sum); + } + + ret_val = anorm; + return ret_val; + +/* End of DLANGT */ + +} /* _starpu_dlangt_ */ diff --git a/min-dgels/base/SRC/dlanhs.c b/min-dgels/base/SRC/dlanhs.c new file mode 100644 index 0000000..4a3285d --- /dev/null +++ b/min-dgels/base/SRC/dlanhs.c @@ -0,0 +1,205 @@ +/* dlanhs.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +doublereal _starpu_dlanhs_(char *norm, integer *n, doublereal *a, integer *lda, + doublereal *work) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + doublereal ret_val, d__1, d__2, d__3; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + integer i__, j; + doublereal sum, scale; + extern logical _starpu_lsame_(char *, char *); + doublereal value; + extern /* Subroutine */ int _starpu_dlassq_(integer *, doublereal *, integer *, + doublereal *, doublereal *); + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLANHS returns the value of the one norm, or the Frobenius norm, or */ +/* the infinity norm, or the element of largest absolute value of a */ +/* Hessenberg matrix A. */ + +/* Description */ +/* =========== */ + +/* DLANHS returns the value */ + +/* DLANHS = ( max(abs(A(i,j))), NORM = 'M' or 'm' */ +/* ( */ +/* ( norm1(A), NORM = '1', 'O' or 'o' */ +/* ( */ +/* ( normI(A), NORM = 'I' or 'i' */ +/* ( */ +/* ( normF(A), NORM = 'F', 'f', 'E' or 'e' */ + +/* where norm1 denotes the one norm of a matrix (maximum column sum), */ +/* normI denotes the infinity norm of a matrix (maximum row sum) and */ +/* normF denotes the Frobenius norm of a matrix (square root of sum of */ +/* squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. */ + +/* Arguments */ +/* ========= */ + +/* NORM (input) CHARACTER*1 */ +/* Specifies the value to be returned in DLANHS as described */ +/* above. */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. When N = 0, DLANHS is */ +/* set to zero. */ + +/* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ +/* The n by n upper Hessenberg matrix A; the part of A below the */ +/* first sub-diagonal is not referenced. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(N,1). */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (MAX(1,LWORK)), */ +/* where LWORK >= N when NORM = 'I'; otherwise, WORK is not */ +/* referenced. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --work; + + /* Function Body */ + if (*n == 0) { + value = 0.; + } else if (_starpu_lsame_(norm, "M")) { + +/* Find max(abs(A(i,j))). */ + + value = 0.; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + i__3 = *n, i__4 = j + 1; + i__2 = min(i__3,i__4); + for (i__ = 1; i__ <= i__2; ++i__) { +/* Computing MAX */ + d__2 = value, d__3 = (d__1 = a[i__ + j * a_dim1], abs(d__1)); + value = max(d__2,d__3); +/* L10: */ + } +/* L20: */ + } + } else if (_starpu_lsame_(norm, "O") || *(unsigned char *) + norm == '1') { + +/* Find norm1(A). */ + + value = 0.; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = 0.; +/* Computing MIN */ + i__3 = *n, i__4 = j + 1; + i__2 = min(i__3,i__4); + for (i__ = 1; i__ <= i__2; ++i__) { + sum += (d__1 = a[i__ + j * a_dim1], abs(d__1)); +/* L30: */ + } + value = max(value,sum); +/* L40: */ + } + } else if (_starpu_lsame_(norm, "I")) { + +/* Find normI(A). */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + work[i__] = 0.; +/* L50: */ + } + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + i__3 = *n, i__4 = j + 1; + i__2 = min(i__3,i__4); + for (i__ = 1; i__ <= i__2; ++i__) { + work[i__] += (d__1 = a[i__ + j * a_dim1], abs(d__1)); +/* L60: */ + } +/* L70: */ + } + value = 0.; + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing MAX */ + d__1 = value, d__2 = work[i__]; + value = max(d__1,d__2); +/* L80: */ + } + } else if (_starpu_lsame_(norm, "F") || _starpu_lsame_(norm, "E")) { + +/* Find normF(A). */ + + scale = 0.; + sum = 1.; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + i__3 = *n, i__4 = j + 1; + i__2 = min(i__3,i__4); + _starpu_dlassq_(&i__2, &a[j * a_dim1 + 1], &c__1, &scale, &sum); +/* L90: */ + } + value = scale * sqrt(sum); + } + + ret_val = value; + return ret_val; + +/* End of DLANHS */ + +} /* _starpu_dlanhs_ */ diff --git a/min-dgels/base/SRC/dlansb.c b/min-dgels/base/SRC/dlansb.c new file mode 100644 index 0000000..85bb920 --- /dev/null +++ b/min-dgels/base/SRC/dlansb.c @@ -0,0 +1,263 @@ +/* dlansb.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +doublereal _starpu_dlansb_(char *norm, char *uplo, integer *n, integer *k, doublereal + *ab, integer *ldab, doublereal *work) +{ + /* System generated locals */ + integer ab_dim1, ab_offset, i__1, i__2, i__3, i__4; + doublereal ret_val, d__1, d__2, d__3; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + integer i__, j, l; + doublereal sum, absa, scale; + extern logical _starpu_lsame_(char *, char *); + doublereal value; + extern /* Subroutine */ int _starpu_dlassq_(integer *, doublereal *, integer *, + doublereal *, doublereal *); + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLANSB returns the value of the one norm, or the Frobenius norm, or */ +/* the infinity norm, or the element of largest absolute value of an */ +/* n by n symmetric band matrix A, with k super-diagonals. */ + +/* Description */ +/* =========== */ + +/* DLANSB returns the value */ + +/* DLANSB = ( max(abs(A(i,j))), NORM = 'M' or 'm' */ +/* ( */ +/* ( norm1(A), NORM = '1', 'O' or 'o' */ +/* ( */ +/* ( normI(A), NORM = 'I' or 'i' */ +/* ( */ +/* ( normF(A), NORM = 'F', 'f', 'E' or 'e' */ + +/* where norm1 denotes the one norm of a matrix (maximum column sum), */ +/* normI denotes the infinity norm of a matrix (maximum row sum) and */ +/* normF denotes the Frobenius norm of a matrix (square root of sum of */ +/* squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. */ + +/* Arguments */ +/* ========= */ + +/* NORM (input) CHARACTER*1 */ +/* Specifies the value to be returned in DLANSB as described */ +/* above. */ + +/* UPLO (input) CHARACTER*1 */ +/* Specifies whether the upper or lower triangular part of the */ +/* band matrix A is supplied. */ +/* = 'U': Upper triangular part is supplied */ +/* = 'L': Lower triangular part is supplied */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. When N = 0, DLANSB is */ +/* set to zero. */ + +/* K (input) INTEGER */ +/* The number of super-diagonals or sub-diagonals of the */ +/* band matrix A. K >= 0. */ + +/* AB (input) DOUBLE PRECISION array, dimension (LDAB,N) */ +/* The upper or lower triangle of the symmetric band matrix A, */ +/* stored in the first K+1 rows of AB. The j-th column of A is */ +/* stored in the j-th column of the array AB as follows: */ +/* if UPLO = 'U', AB(k+1+i-j,j) = A(i,j) for max(1,j-k)<=i<=j; */ +/* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+k). */ + +/* LDAB (input) INTEGER */ +/* The leading dimension of the array AB. LDAB >= K+1. */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (MAX(1,LWORK)), */ +/* where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise, */ +/* WORK is not referenced. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + ab_dim1 = *ldab; + ab_offset = 1 + ab_dim1; + ab -= ab_offset; + --work; + + /* Function Body */ + if (*n == 0) { + value = 0.; + } else if (_starpu_lsame_(norm, "M")) { + +/* Find max(abs(A(i,j))). */ + + value = 0.; + if (_starpu_lsame_(uplo, "U")) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MAX */ + i__2 = *k + 2 - j; + i__3 = *k + 1; + for (i__ = max(i__2,1); i__ <= i__3; ++i__) { +/* Computing MAX */ + d__2 = value, d__3 = (d__1 = ab[i__ + j * ab_dim1], abs( + d__1)); + value = max(d__2,d__3); +/* L10: */ + } +/* L20: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + i__2 = *n + 1 - j, i__4 = *k + 1; + i__3 = min(i__2,i__4); + for (i__ = 1; i__ <= i__3; ++i__) { +/* Computing MAX */ + d__2 = value, d__3 = (d__1 = ab[i__ + j * ab_dim1], abs( + d__1)); + value = max(d__2,d__3); +/* L30: */ + } +/* L40: */ + } + } + } else if (_starpu_lsame_(norm, "I") || _starpu_lsame_(norm, "O") || *(unsigned char *)norm == '1') { + +/* Find normI(A) ( = norm1(A), since A is symmetric). */ + + value = 0.; + if (_starpu_lsame_(uplo, "U")) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = 0.; + l = *k + 1 - j; +/* Computing MAX */ + i__3 = 1, i__2 = j - *k; + i__4 = j - 1; + for (i__ = max(i__3,i__2); i__ <= i__4; ++i__) { + absa = (d__1 = ab[l + i__ + j * ab_dim1], abs(d__1)); + sum += absa; + work[i__] += absa; +/* L50: */ + } + work[j] = sum + (d__1 = ab[*k + 1 + j * ab_dim1], abs(d__1)); +/* L60: */ + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing MAX */ + d__1 = value, d__2 = work[i__]; + value = max(d__1,d__2); +/* L70: */ + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + work[i__] = 0.; +/* L80: */ + } + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = work[j] + (d__1 = ab[j * ab_dim1 + 1], abs(d__1)); + l = 1 - j; +/* Computing MIN */ + i__3 = *n, i__2 = j + *k; + i__4 = min(i__3,i__2); + for (i__ = j + 1; i__ <= i__4; ++i__) { + absa = (d__1 = ab[l + i__ + j * ab_dim1], abs(d__1)); + sum += absa; + work[i__] += absa; +/* L90: */ + } + value = max(value,sum); +/* L100: */ + } + } + } else if (_starpu_lsame_(norm, "F") || _starpu_lsame_(norm, "E")) { + +/* Find normF(A). */ + + scale = 0.; + sum = 1.; + if (*k > 0) { + if (_starpu_lsame_(uplo, "U")) { + i__1 = *n; + for (j = 2; j <= i__1; ++j) { +/* Computing MIN */ + i__3 = j - 1; + i__4 = min(i__3,*k); +/* Computing MAX */ + i__2 = *k + 2 - j; + _starpu_dlassq_(&i__4, &ab[max(i__2, 1)+ j * ab_dim1], &c__1, & + scale, &sum); +/* L110: */ + } + l = *k + 1; + } else { + i__1 = *n - 1; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + i__3 = *n - j; + i__4 = min(i__3,*k); + _starpu_dlassq_(&i__4, &ab[j * ab_dim1 + 2], &c__1, &scale, &sum); +/* L120: */ + } + l = 1; + } + sum *= 2; + } else { + l = 1; + } + _starpu_dlassq_(n, &ab[l + ab_dim1], ldab, &scale, &sum); + value = scale * sqrt(sum); + } + + ret_val = value; + return ret_val; + +/* End of DLANSB */ + +} /* _starpu_dlansb_ */ diff --git a/min-dgels/base/SRC/dlansf.c b/min-dgels/base/SRC/dlansf.c new file mode 100644 index 0000000..b61bcd2 --- /dev/null +++ b/min-dgels/base/SRC/dlansf.c @@ -0,0 +1,1012 @@ +/* dlansf.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +doublereal _starpu_dlansf_(char *norm, char *transr, char *uplo, integer *n, + doublereal *a, doublereal *work) +{ + /* System generated locals */ + integer i__1, i__2; + doublereal ret_val, d__1, d__2, d__3; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + integer i__, j, k, l; + doublereal s; + integer n1; + doublereal aa; + integer lda, ifm, noe, ilu; + doublereal scale; + extern logical _starpu_lsame_(char *, char *); + doublereal value; + extern integer _starpu_idamax_(integer *, doublereal *, integer *); + extern /* Subroutine */ int _starpu_dlassq_(integer *, doublereal *, integer *, + doublereal *, doublereal *); + + +/* -- LAPACK routine (version 3.2) -- */ + +/* -- Contributed by Fred Gustavson of the IBM Watson Research Center -- */ +/* -- November 2008 -- */ + +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLANSF returns the value of the one norm, or the Frobenius norm, or */ +/* the infinity norm, or the element of largest absolute value of a */ +/* real symmetric matrix A in RFP format. */ + +/* Description */ +/* =========== */ + +/* DLANSF returns the value */ + +/* DLANSF = ( max(abs(A(i,j))), NORM = 'M' or 'm' */ +/* ( */ +/* ( norm1(A), NORM = '1', 'O' or 'o' */ +/* ( */ +/* ( normI(A), NORM = 'I' or 'i' */ +/* ( */ +/* ( normF(A), NORM = 'F', 'f', 'E' or 'e' */ + +/* where norm1 denotes the one norm of a matrix (maximum column sum), */ +/* normI denotes the infinity norm of a matrix (maximum row sum) and */ +/* normF denotes the Frobenius norm of a matrix (square root of sum of */ +/* squares). Note that max(abs(A(i,j))) is not a matrix norm. */ + +/* Arguments */ +/* ========= */ + +/* NORM (input) CHARACTER */ +/* Specifies the value to be returned in DLANSF as described */ +/* above. */ + +/* TRANSR (input) CHARACTER */ +/* Specifies whether the RFP format of A is normal or */ +/* transposed format. */ +/* = 'N': RFP format is Normal; */ +/* = 'T': RFP format is Transpose. */ + +/* UPLO (input) CHARACTER */ +/* On entry, UPLO specifies whether the RFP matrix A came from */ +/* an upper or lower triangular matrix as follows: */ +/* = 'U': RFP A came from an upper triangular matrix; */ +/* = 'L': RFP A came from a lower triangular matrix. */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. When N = 0, DLANSF is */ +/* set to zero. */ + +/* A (input) DOUBLE PRECISION array, dimension ( N*(N+1)/2 ); */ +/* On entry, the upper (if UPLO = 'U') or lower (if UPLO = 'L') */ +/* part of the symmetric matrix A stored in RFP format. See the */ +/* "Notes" below for more details. */ +/* Unchanged on exit. */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (MAX(1,LWORK)), */ +/* where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise, */ +/* WORK is not referenced. */ + +/* Notes */ +/* ===== */ + +/* We first consider Rectangular Full Packed (RFP) Format when N is */ +/* even. We give an example where N = 6. */ + +/* AP is Upper AP is Lower */ + +/* 00 01 02 03 04 05 00 */ +/* 11 12 13 14 15 10 11 */ +/* 22 23 24 25 20 21 22 */ +/* 33 34 35 30 31 32 33 */ +/* 44 45 40 41 42 43 44 */ +/* 55 50 51 52 53 54 55 */ + + +/* Let TRANSR = 'N'. RFP holds AP as follows: */ +/* For UPLO = 'U' the upper trapezoid A(0:5,0:2) consists of the last */ +/* three columns of AP upper. The lower triangle A(4:6,0:2) consists of */ +/* the transpose of the first three columns of AP upper. */ +/* For UPLO = 'L' the lower trapezoid A(1:6,0:2) consists of the first */ +/* three columns of AP lower. The upper triangle A(0:2,0:2) consists of */ +/* the transpose of the last three columns of AP lower. */ +/* This covers the case N even and TRANSR = 'N'. */ + +/* RFP A RFP A */ + +/* 03 04 05 33 43 53 */ +/* 13 14 15 00 44 54 */ +/* 23 24 25 10 11 55 */ +/* 33 34 35 20 21 22 */ +/* 00 44 45 30 31 32 */ +/* 01 11 55 40 41 42 */ +/* 02 12 22 50 51 52 */ + +/* Now let TRANSR = 'T'. RFP A in both UPLO cases is just the */ +/* transpose of RFP A above. One therefore gets: */ + + +/* RFP A RFP A */ + +/* 03 13 23 33 00 01 02 33 00 10 20 30 40 50 */ +/* 04 14 24 34 44 11 12 43 44 11 21 31 41 51 */ +/* 05 15 25 35 45 55 22 53 54 55 22 32 42 52 */ + + +/* We first consider Rectangular Full Packed (RFP) Format when N is */ +/* odd. We give an example where N = 5. */ + +/* AP is Upper AP is Lower */ + +/* 00 01 02 03 04 00 */ +/* 11 12 13 14 10 11 */ +/* 22 23 24 20 21 22 */ +/* 33 34 30 31 32 33 */ +/* 44 40 41 42 43 44 */ + + +/* Let TRANSR = 'N'. RFP holds AP as follows: */ +/* For UPLO = 'U' the upper trapezoid A(0:4,0:2) consists of the last */ +/* three columns of AP upper. The lower triangle A(3:4,0:1) consists of */ +/* the transpose of the first two columns of AP upper. */ +/* For UPLO = 'L' the lower trapezoid A(0:4,0:2) consists of the first */ +/* three columns of AP lower. The upper triangle A(0:1,1:2) consists of */ +/* the transpose of the last two columns of AP lower. */ +/* This covers the case N odd and TRANSR = 'N'. */ + +/* RFP A RFP A */ + +/* 02 03 04 00 33 43 */ +/* 12 13 14 10 11 44 */ +/* 22 23 24 20 21 22 */ +/* 00 33 34 30 31 32 */ +/* 01 11 44 40 41 42 */ + +/* Now let TRANSR = 'T'. RFP A in both UPLO cases is just the */ +/* transpose of RFP A above. One therefore gets: */ + +/* RFP A RFP A */ + +/* 02 12 22 00 01 00 10 20 30 40 50 */ +/* 03 13 23 33 11 33 11 21 31 41 51 */ +/* 04 14 24 34 44 43 44 22 32 42 52 */ + +/* Reference */ +/* ========= */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + if (*n == 0) { + ret_val = 0.; + return ret_val; + } + +/* set noe = 1 if n is odd. if n is even set noe=0 */ + + noe = 1; + if (*n % 2 == 0) { + noe = 0; + } + +/* set ifm = 0 when form='T or 't' and 1 otherwise */ + + ifm = 1; + if (_starpu_lsame_(transr, "T")) { + ifm = 0; + } + +/* set ilu = 0 when uplo='U or 'u' and 1 otherwise */ + + ilu = 1; + if (_starpu_lsame_(uplo, "U")) { + ilu = 0; + } + +/* set lda = (n+1)/2 when ifm = 0 */ +/* set lda = n when ifm = 1 and noe = 1 */ +/* set lda = n+1 when ifm = 1 and noe = 0 */ + + if (ifm == 1) { + if (noe == 1) { + lda = *n; + } else { +/* noe=0 */ + lda = *n + 1; + } + } else { +/* ifm=0 */ + lda = (*n + 1) / 2; + } + + if (_starpu_lsame_(norm, "M")) { + +/* Find max(abs(A(i,j))). */ + + k = (*n + 1) / 2; + value = 0.; + if (noe == 1) { +/* n is odd */ + if (ifm == 1) { +/* A is n by k */ + i__1 = k - 1; + for (j = 0; j <= i__1; ++j) { + i__2 = *n - 1; + for (i__ = 0; i__ <= i__2; ++i__) { +/* Computing MAX */ + d__2 = value, d__3 = (d__1 = a[i__ + j * lda], abs( + d__1)); + value = max(d__2,d__3); + } + } + } else { +/* xpose case; A is k by n */ + i__1 = *n - 1; + for (j = 0; j <= i__1; ++j) { + i__2 = k - 1; + for (i__ = 0; i__ <= i__2; ++i__) { +/* Computing MAX */ + d__2 = value, d__3 = (d__1 = a[i__ + j * lda], abs( + d__1)); + value = max(d__2,d__3); + } + } + } + } else { +/* n is even */ + if (ifm == 1) { +/* A is n+1 by k */ + i__1 = k - 1; + for (j = 0; j <= i__1; ++j) { + i__2 = *n; + for (i__ = 0; i__ <= i__2; ++i__) { +/* Computing MAX */ + d__2 = value, d__3 = (d__1 = a[i__ + j * lda], abs( + d__1)); + value = max(d__2,d__3); + } + } + } else { +/* xpose case; A is k by n+1 */ + i__1 = *n; + for (j = 0; j <= i__1; ++j) { + i__2 = k - 1; + for (i__ = 0; i__ <= i__2; ++i__) { +/* Computing MAX */ + d__2 = value, d__3 = (d__1 = a[i__ + j * lda], abs( + d__1)); + value = max(d__2,d__3); + } + } + } + } + } else if (_starpu_lsame_(norm, "I") || _starpu_lsame_(norm, "O") || *(unsigned char *)norm == '1') { + +/* Find normI(A) ( = norm1(A), since A is symmetric). */ + + if (ifm == 1) { + k = *n / 2; + if (noe == 1) { +/* n is odd */ + if (ilu == 0) { + i__1 = k - 1; + for (i__ = 0; i__ <= i__1; ++i__) { + work[i__] = 0.; + } + i__1 = k; + for (j = 0; j <= i__1; ++j) { + s = 0.; + i__2 = k + j - 1; + for (i__ = 0; i__ <= i__2; ++i__) { + aa = (d__1 = a[i__ + j * lda], abs(d__1)); +/* -> A(i,j+k) */ + s += aa; + work[i__] += aa; + } + aa = (d__1 = a[i__ + j * lda], abs(d__1)); +/* -> A(j+k,j+k) */ + work[j + k] = s + aa; + if (i__ == k + k) { + goto L10; + } + ++i__; + aa = (d__1 = a[i__ + j * lda], abs(d__1)); +/* -> A(j,j) */ + work[j] += aa; + s = 0.; + i__2 = k - 1; + for (l = j + 1; l <= i__2; ++l) { + ++i__; + aa = (d__1 = a[i__ + j * lda], abs(d__1)); +/* -> A(l,j) */ + s += aa; + work[l] += aa; + } + work[j] += s; + } +L10: + i__ = _starpu_idamax_(n, work, &c__1); + value = work[i__ - 1]; + } else { +/* ilu = 1 */ + ++k; +/* k=(n+1)/2 for n odd and ilu=1 */ + i__1 = *n - 1; + for (i__ = k; i__ <= i__1; ++i__) { + work[i__] = 0.; + } + for (j = k - 1; j >= 0; --j) { + s = 0.; + i__1 = j - 2; + for (i__ = 0; i__ <= i__1; ++i__) { + aa = (d__1 = a[i__ + j * lda], abs(d__1)); +/* -> A(j+k,i+k) */ + s += aa; + work[i__ + k] += aa; + } + if (j > 0) { + aa = (d__1 = a[i__ + j * lda], abs(d__1)); +/* -> A(j+k,j+k) */ + s += aa; + work[i__ + k] += s; +/* i=j */ + ++i__; + } + aa = (d__1 = a[i__ + j * lda], abs(d__1)); +/* -> A(j,j) */ + work[j] = aa; + s = 0.; + i__1 = *n - 1; + for (l = j + 1; l <= i__1; ++l) { + ++i__; + aa = (d__1 = a[i__ + j * lda], abs(d__1)); +/* -> A(l,j) */ + s += aa; + work[l] += aa; + } + work[j] += s; + } + i__ = _starpu_idamax_(n, work, &c__1); + value = work[i__ - 1]; + } + } else { +/* n is even */ + if (ilu == 0) { + i__1 = k - 1; + for (i__ = 0; i__ <= i__1; ++i__) { + work[i__] = 0.; + } + i__1 = k - 1; + for (j = 0; j <= i__1; ++j) { + s = 0.; + i__2 = k + j - 1; + for (i__ = 0; i__ <= i__2; ++i__) { + aa = (d__1 = a[i__ + j * lda], abs(d__1)); +/* -> A(i,j+k) */ + s += aa; + work[i__] += aa; + } + aa = (d__1 = a[i__ + j * lda], abs(d__1)); +/* -> A(j+k,j+k) */ + work[j + k] = s + aa; + ++i__; + aa = (d__1 = a[i__ + j * lda], abs(d__1)); +/* -> A(j,j) */ + work[j] += aa; + s = 0.; + i__2 = k - 1; + for (l = j + 1; l <= i__2; ++l) { + ++i__; + aa = (d__1 = a[i__ + j * lda], abs(d__1)); +/* -> A(l,j) */ + s += aa; + work[l] += aa; + } + work[j] += s; + } + i__ = _starpu_idamax_(n, work, &c__1); + value = work[i__ - 1]; + } else { +/* ilu = 1 */ + i__1 = *n - 1; + for (i__ = k; i__ <= i__1; ++i__) { + work[i__] = 0.; + } + for (j = k - 1; j >= 0; --j) { + s = 0.; + i__1 = j - 1; + for (i__ = 0; i__ <= i__1; ++i__) { + aa = (d__1 = a[i__ + j * lda], abs(d__1)); +/* -> A(j+k,i+k) */ + s += aa; + work[i__ + k] += aa; + } + aa = (d__1 = a[i__ + j * lda], abs(d__1)); +/* -> A(j+k,j+k) */ + s += aa; + work[i__ + k] += s; +/* i=j */ + ++i__; + aa = (d__1 = a[i__ + j * lda], abs(d__1)); +/* -> A(j,j) */ + work[j] = aa; + s = 0.; + i__1 = *n - 1; + for (l = j + 1; l <= i__1; ++l) { + ++i__; + aa = (d__1 = a[i__ + j * lda], abs(d__1)); +/* -> A(l,j) */ + s += aa; + work[l] += aa; + } + work[j] += s; + } + i__ = _starpu_idamax_(n, work, &c__1); + value = work[i__ - 1]; + } + } + } else { +/* ifm=0 */ + k = *n / 2; + if (noe == 1) { +/* n is odd */ + if (ilu == 0) { + n1 = k; +/* n/2 */ + ++k; +/* k is the row size and lda */ + i__1 = *n - 1; + for (i__ = n1; i__ <= i__1; ++i__) { + work[i__] = 0.; + } + i__1 = n1 - 1; + for (j = 0; j <= i__1; ++j) { + s = 0.; + i__2 = k - 1; + for (i__ = 0; i__ <= i__2; ++i__) { + aa = (d__1 = a[i__ + j * lda], abs(d__1)); +/* A(j,n1+i) */ + work[i__ + n1] += aa; + s += aa; + } + work[j] = s; + } +/* j=n1=k-1 is special */ + s = (d__1 = a[j * lda], abs(d__1)); +/* A(k-1,k-1) */ + i__1 = k - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + aa = (d__1 = a[i__ + j * lda], abs(d__1)); +/* A(k-1,i+n1) */ + work[i__ + n1] += aa; + s += aa; + } + work[j] += s; + i__1 = *n - 1; + for (j = k; j <= i__1; ++j) { + s = 0.; + i__2 = j - k - 1; + for (i__ = 0; i__ <= i__2; ++i__) { + aa = (d__1 = a[i__ + j * lda], abs(d__1)); +/* A(i,j-k) */ + work[i__] += aa; + s += aa; + } +/* i=j-k */ + aa = (d__1 = a[i__ + j * lda], abs(d__1)); +/* A(j-k,j-k) */ + s += aa; + work[j - k] += s; + ++i__; + s = (d__1 = a[i__ + j * lda], abs(d__1)); +/* A(j,j) */ + i__2 = *n - 1; + for (l = j + 1; l <= i__2; ++l) { + ++i__; + aa = (d__1 = a[i__ + j * lda], abs(d__1)); +/* A(j,l) */ + work[l] += aa; + s += aa; + } + work[j] += s; + } + i__ = _starpu_idamax_(n, work, &c__1); + value = work[i__ - 1]; + } else { +/* ilu=1 */ + ++k; +/* k=(n+1)/2 for n odd and ilu=1 */ + i__1 = *n - 1; + for (i__ = k; i__ <= i__1; ++i__) { + work[i__] = 0.; + } + i__1 = k - 2; + for (j = 0; j <= i__1; ++j) { +/* process */ + s = 0.; + i__2 = j - 1; + for (i__ = 0; i__ <= i__2; ++i__) { + aa = (d__1 = a[i__ + j * lda], abs(d__1)); +/* A(j,i) */ + work[i__] += aa; + s += aa; + } + aa = (d__1 = a[i__ + j * lda], abs(d__1)); +/* i=j so process of A(j,j) */ + s += aa; + work[j] = s; +/* is initialised here */ + ++i__; +/* i=j process A(j+k,j+k) */ + aa = (d__1 = a[i__ + j * lda], abs(d__1)); + s = aa; + i__2 = *n - 1; + for (l = k + j + 1; l <= i__2; ++l) { + ++i__; + aa = (d__1 = a[i__ + j * lda], abs(d__1)); +/* A(l,k+j) */ + s += aa; + work[l] += aa; + } + work[k + j] += s; + } +/* j=k-1 is special :process col A(k-1,0:k-1) */ + s = 0.; + i__1 = k - 2; + for (i__ = 0; i__ <= i__1; ++i__) { + aa = (d__1 = a[i__ + j * lda], abs(d__1)); +/* A(k,i) */ + work[i__] += aa; + s += aa; + } +/* i=k-1 */ + aa = (d__1 = a[i__ + j * lda], abs(d__1)); +/* A(k-1,k-1) */ + s += aa; + work[i__] = s; +/* done with col j=k+1 */ + i__1 = *n - 1; + for (j = k; j <= i__1; ++j) { +/* process col j of A = A(j,0:k-1) */ + s = 0.; + i__2 = k - 1; + for (i__ = 0; i__ <= i__2; ++i__) { + aa = (d__1 = a[i__ + j * lda], abs(d__1)); +/* A(j,i) */ + work[i__] += aa; + s += aa; + } + work[j] += s; + } + i__ = _starpu_idamax_(n, work, &c__1); + value = work[i__ - 1]; + } + } else { +/* n is even */ + if (ilu == 0) { + i__1 = *n - 1; + for (i__ = k; i__ <= i__1; ++i__) { + work[i__] = 0.; + } + i__1 = k - 1; + for (j = 0; j <= i__1; ++j) { + s = 0.; + i__2 = k - 1; + for (i__ = 0; i__ <= i__2; ++i__) { + aa = (d__1 = a[i__ + j * lda], abs(d__1)); +/* A(j,i+k) */ + work[i__ + k] += aa; + s += aa; + } + work[j] = s; + } +/* j=k */ + aa = (d__1 = a[j * lda], abs(d__1)); +/* A(k,k) */ + s = aa; + i__1 = k - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + aa = (d__1 = a[i__ + j * lda], abs(d__1)); +/* A(k,k+i) */ + work[i__ + k] += aa; + s += aa; + } + work[j] += s; + i__1 = *n - 1; + for (j = k + 1; j <= i__1; ++j) { + s = 0.; + i__2 = j - 2 - k; + for (i__ = 0; i__ <= i__2; ++i__) { + aa = (d__1 = a[i__ + j * lda], abs(d__1)); +/* A(i,j-k-1) */ + work[i__] += aa; + s += aa; + } +/* i=j-1-k */ + aa = (d__1 = a[i__ + j * lda], abs(d__1)); +/* A(j-k-1,j-k-1) */ + s += aa; + work[j - k - 1] += s; + ++i__; + aa = (d__1 = a[i__ + j * lda], abs(d__1)); +/* A(j,j) */ + s = aa; + i__2 = *n - 1; + for (l = j + 1; l <= i__2; ++l) { + ++i__; + aa = (d__1 = a[i__ + j * lda], abs(d__1)); +/* A(j,l) */ + work[l] += aa; + s += aa; + } + work[j] += s; + } +/* j=n */ + s = 0.; + i__1 = k - 2; + for (i__ = 0; i__ <= i__1; ++i__) { + aa = (d__1 = a[i__ + j * lda], abs(d__1)); +/* A(i,k-1) */ + work[i__] += aa; + s += aa; + } +/* i=k-1 */ + aa = (d__1 = a[i__ + j * lda], abs(d__1)); +/* A(k-1,k-1) */ + s += aa; + work[i__] += s; + i__ = _starpu_idamax_(n, work, &c__1); + value = work[i__ - 1]; + } else { +/* ilu=1 */ + i__1 = *n - 1; + for (i__ = k; i__ <= i__1; ++i__) { + work[i__] = 0.; + } +/* j=0 is special :process col A(k:n-1,k) */ + s = abs(a[0]); +/* A(k,k) */ + i__1 = k - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + aa = (d__1 = a[i__], abs(d__1)); +/* A(k+i,k) */ + work[i__ + k] += aa; + s += aa; + } + work[k] += s; + i__1 = k - 1; + for (j = 1; j <= i__1; ++j) { +/* process */ + s = 0.; + i__2 = j - 2; + for (i__ = 0; i__ <= i__2; ++i__) { + aa = (d__1 = a[i__ + j * lda], abs(d__1)); +/* A(j-1,i) */ + work[i__] += aa; + s += aa; + } + aa = (d__1 = a[i__ + j * lda], abs(d__1)); +/* i=j-1 so process of A(j-1,j-1) */ + s += aa; + work[j - 1] = s; +/* is initialised here */ + ++i__; +/* i=j process A(j+k,j+k) */ + aa = (d__1 = a[i__ + j * lda], abs(d__1)); + s = aa; + i__2 = *n - 1; + for (l = k + j + 1; l <= i__2; ++l) { + ++i__; + aa = (d__1 = a[i__ + j * lda], abs(d__1)); +/* A(l,k+j) */ + s += aa; + work[l] += aa; + } + work[k + j] += s; + } +/* j=k is special :process col A(k,0:k-1) */ + s = 0.; + i__1 = k - 2; + for (i__ = 0; i__ <= i__1; ++i__) { + aa = (d__1 = a[i__ + j * lda], abs(d__1)); +/* A(k,i) */ + work[i__] += aa; + s += aa; + } +/* i=k-1 */ + aa = (d__1 = a[i__ + j * lda], abs(d__1)); +/* A(k-1,k-1) */ + s += aa; + work[i__] = s; +/* done with col j=k+1 */ + i__1 = *n; + for (j = k + 1; j <= i__1; ++j) { +/* process col j-1 of A = A(j-1,0:k-1) */ + s = 0.; + i__2 = k - 1; + for (i__ = 0; i__ <= i__2; ++i__) { + aa = (d__1 = a[i__ + j * lda], abs(d__1)); +/* A(j-1,i) */ + work[i__] += aa; + s += aa; + } + work[j - 1] += s; + } + i__ = _starpu_idamax_(n, work, &c__1); + value = work[i__ - 1]; + } + } + } + } else if (_starpu_lsame_(norm, "F") || _starpu_lsame_(norm, "E")) { + +/* Find normF(A). */ + + k = (*n + 1) / 2; + scale = 0.; + s = 1.; + if (noe == 1) { +/* n is odd */ + if (ifm == 1) { +/* A is normal */ + if (ilu == 0) { +/* A is upper */ + i__1 = k - 3; + for (j = 0; j <= i__1; ++j) { + i__2 = k - j - 2; + _starpu_dlassq_(&i__2, &a[k + j + 1 + j * lda], &c__1, &scale, + &s); +/* L at A(k,0) */ + } + i__1 = k - 1; + for (j = 0; j <= i__1; ++j) { + i__2 = k + j - 1; + _starpu_dlassq_(&i__2, &a[j * lda], &c__1, &scale, &s); +/* trap U at A(0,0) */ + } + s += s; +/* double s for the off diagonal elements */ + i__1 = k - 1; + i__2 = lda + 1; + _starpu_dlassq_(&i__1, &a[k], &i__2, &scale, &s); +/* tri L at A(k,0) */ + i__1 = lda + 1; + _starpu_dlassq_(&k, &a[k - 1], &i__1, &scale, &s); +/* tri U at A(k-1,0) */ + } else { +/* ilu=1 & A is lower */ + i__1 = k - 1; + for (j = 0; j <= i__1; ++j) { + i__2 = *n - j - 1; + _starpu_dlassq_(&i__2, &a[j + 1 + j * lda], &c__1, &scale, &s) + ; +/* trap L at A(0,0) */ + } + i__1 = k - 2; + for (j = 0; j <= i__1; ++j) { + _starpu_dlassq_(&j, &a[(j + 1) * lda], &c__1, &scale, &s); +/* U at A(0,1) */ + } + s += s; +/* double s for the off diagonal elements */ + i__1 = lda + 1; + _starpu_dlassq_(&k, a, &i__1, &scale, &s); +/* tri L at A(0,0) */ + i__1 = k - 1; + i__2 = lda + 1; + _starpu_dlassq_(&i__1, &a[lda], &i__2, &scale, &s); +/* tri U at A(0,1) */ + } + } else { +/* A is xpose */ + if (ilu == 0) { +/* A' is upper */ + i__1 = k - 2; + for (j = 1; j <= i__1; ++j) { + _starpu_dlassq_(&j, &a[(k + j) * lda], &c__1, &scale, &s); +/* U at A(0,k) */ + } + i__1 = k - 2; + for (j = 0; j <= i__1; ++j) { + _starpu_dlassq_(&k, &a[j * lda], &c__1, &scale, &s); +/* k by k-1 rect. at A(0,0) */ + } + i__1 = k - 2; + for (j = 0; j <= i__1; ++j) { + i__2 = k - j - 1; + _starpu_dlassq_(&i__2, &a[j + 1 + (j + k - 1) * lda], &c__1, & + scale, &s); +/* L at A(0,k-1) */ + } + s += s; +/* double s for the off diagonal elements */ + i__1 = k - 1; + i__2 = lda + 1; + _starpu_dlassq_(&i__1, &a[k * lda], &i__2, &scale, &s); +/* tri U at A(0,k) */ + i__1 = lda + 1; + _starpu_dlassq_(&k, &a[(k - 1) * lda], &i__1, &scale, &s); +/* tri L at A(0,k-1) */ + } else { +/* A' is lower */ + i__1 = k - 1; + for (j = 1; j <= i__1; ++j) { + _starpu_dlassq_(&j, &a[j * lda], &c__1, &scale, &s); +/* U at A(0,0) */ + } + i__1 = *n - 1; + for (j = k; j <= i__1; ++j) { + _starpu_dlassq_(&k, &a[j * lda], &c__1, &scale, &s); +/* k by k-1 rect. at A(0,k) */ + } + i__1 = k - 3; + for (j = 0; j <= i__1; ++j) { + i__2 = k - j - 2; + _starpu_dlassq_(&i__2, &a[j + 2 + j * lda], &c__1, &scale, &s) + ; +/* L at A(1,0) */ + } + s += s; +/* double s for the off diagonal elements */ + i__1 = lda + 1; + _starpu_dlassq_(&k, a, &i__1, &scale, &s); +/* tri U at A(0,0) */ + i__1 = k - 1; + i__2 = lda + 1; + _starpu_dlassq_(&i__1, &a[1], &i__2, &scale, &s); +/* tri L at A(1,0) */ + } + } + } else { +/* n is even */ + if (ifm == 1) { +/* A is normal */ + if (ilu == 0) { +/* A is upper */ + i__1 = k - 2; + for (j = 0; j <= i__1; ++j) { + i__2 = k - j - 1; + _starpu_dlassq_(&i__2, &a[k + j + 2 + j * lda], &c__1, &scale, + &s); +/* L at A(k+1,0) */ + } + i__1 = k - 1; + for (j = 0; j <= i__1; ++j) { + i__2 = k + j; + _starpu_dlassq_(&i__2, &a[j * lda], &c__1, &scale, &s); +/* trap U at A(0,0) */ + } + s += s; +/* double s for the off diagonal elements */ + i__1 = lda + 1; + _starpu_dlassq_(&k, &a[k + 1], &i__1, &scale, &s); +/* tri L at A(k+1,0) */ + i__1 = lda + 1; + _starpu_dlassq_(&k, &a[k], &i__1, &scale, &s); +/* tri U at A(k,0) */ + } else { +/* ilu=1 & A is lower */ + i__1 = k - 1; + for (j = 0; j <= i__1; ++j) { + i__2 = *n - j - 1; + _starpu_dlassq_(&i__2, &a[j + 2 + j * lda], &c__1, &scale, &s) + ; +/* trap L at A(1,0) */ + } + i__1 = k - 1; + for (j = 1; j <= i__1; ++j) { + _starpu_dlassq_(&j, &a[j * lda], &c__1, &scale, &s); +/* U at A(0,0) */ + } + s += s; +/* double s for the off diagonal elements */ + i__1 = lda + 1; + _starpu_dlassq_(&k, &a[1], &i__1, &scale, &s); +/* tri L at A(1,0) */ + i__1 = lda + 1; + _starpu_dlassq_(&k, a, &i__1, &scale, &s); +/* tri U at A(0,0) */ + } + } else { +/* A is xpose */ + if (ilu == 0) { +/* A' is upper */ + i__1 = k - 1; + for (j = 1; j <= i__1; ++j) { + _starpu_dlassq_(&j, &a[(k + 1 + j) * lda], &c__1, &scale, &s); +/* U at A(0,k+1) */ + } + i__1 = k - 1; + for (j = 0; j <= i__1; ++j) { + _starpu_dlassq_(&k, &a[j * lda], &c__1, &scale, &s); +/* k by k rect. at A(0,0) */ + } + i__1 = k - 2; + for (j = 0; j <= i__1; ++j) { + i__2 = k - j - 1; + _starpu_dlassq_(&i__2, &a[j + 1 + (j + k) * lda], &c__1, & + scale, &s); +/* L at A(0,k) */ + } + s += s; +/* double s for the off diagonal elements */ + i__1 = lda + 1; + _starpu_dlassq_(&k, &a[(k + 1) * lda], &i__1, &scale, &s); +/* tri U at A(0,k+1) */ + i__1 = lda + 1; + _starpu_dlassq_(&k, &a[k * lda], &i__1, &scale, &s); +/* tri L at A(0,k) */ + } else { +/* A' is lower */ + i__1 = k - 1; + for (j = 1; j <= i__1; ++j) { + _starpu_dlassq_(&j, &a[(j + 1) * lda], &c__1, &scale, &s); +/* U at A(0,1) */ + } + i__1 = *n; + for (j = k + 1; j <= i__1; ++j) { + _starpu_dlassq_(&k, &a[j * lda], &c__1, &scale, &s); +/* k by k rect. at A(0,k+1) */ + } + i__1 = k - 2; + for (j = 0; j <= i__1; ++j) { + i__2 = k - j - 1; + _starpu_dlassq_(&i__2, &a[j + 1 + j * lda], &c__1, &scale, &s) + ; +/* L at A(0,0) */ + } + s += s; +/* double s for the off diagonal elements */ + i__1 = lda + 1; + _starpu_dlassq_(&k, &a[lda], &i__1, &scale, &s); +/* tri L at A(0,1) */ + i__1 = lda + 1; + _starpu_dlassq_(&k, a, &i__1, &scale, &s); +/* tri U at A(0,0) */ + } + } + } + value = scale * sqrt(s); + } + + ret_val = value; + return ret_val; + +/* End of DLANSF */ + +} /* _starpu_dlansf_ */ diff --git a/min-dgels/base/SRC/dlansp.c b/min-dgels/base/SRC/dlansp.c new file mode 100644 index 0000000..024d50f --- /dev/null +++ b/min-dgels/base/SRC/dlansp.c @@ -0,0 +1,263 @@ +/* dlansp.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +doublereal _starpu_dlansp_(char *norm, char *uplo, integer *n, doublereal *ap, + doublereal *work) +{ + /* System generated locals */ + integer i__1, i__2; + doublereal ret_val, d__1, d__2, d__3; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + integer i__, j, k; + doublereal sum, absa, scale; + extern logical _starpu_lsame_(char *, char *); + doublereal value; + extern /* Subroutine */ int _starpu_dlassq_(integer *, doublereal *, integer *, + doublereal *, doublereal *); + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLANSP returns the value of the one norm, or the Frobenius norm, or */ +/* the infinity norm, or the element of largest absolute value of a */ +/* real symmetric matrix A, supplied in packed form. */ + +/* Description */ +/* =========== */ + +/* DLANSP returns the value */ + +/* DLANSP = ( max(abs(A(i,j))), NORM = 'M' or 'm' */ +/* ( */ +/* ( norm1(A), NORM = '1', 'O' or 'o' */ +/* ( */ +/* ( normI(A), NORM = 'I' or 'i' */ +/* ( */ +/* ( normF(A), NORM = 'F', 'f', 'E' or 'e' */ + +/* where norm1 denotes the one norm of a matrix (maximum column sum), */ +/* normI denotes the infinity norm of a matrix (maximum row sum) and */ +/* normF denotes the Frobenius norm of a matrix (square root of sum of */ +/* squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. */ + +/* Arguments */ +/* ========= */ + +/* NORM (input) CHARACTER*1 */ +/* Specifies the value to be returned in DLANSP as described */ +/* above. */ + +/* UPLO (input) CHARACTER*1 */ +/* Specifies whether the upper or lower triangular part of the */ +/* symmetric matrix A is supplied. */ +/* = 'U': Upper triangular part of A is supplied */ +/* = 'L': Lower triangular part of A is supplied */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. When N = 0, DLANSP is */ +/* set to zero. */ + +/* AP (input) DOUBLE PRECISION array, dimension (N*(N+1)/2) */ +/* The upper or lower triangle of the symmetric matrix A, packed */ +/* columnwise in a linear array. The j-th column of A is stored */ +/* in the array AP as follows: */ +/* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; */ +/* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (MAX(1,LWORK)), */ +/* where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise, */ +/* WORK is not referenced. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + --work; + --ap; + + /* Function Body */ + if (*n == 0) { + value = 0.; + } else if (_starpu_lsame_(norm, "M")) { + +/* Find max(abs(A(i,j))). */ + + value = 0.; + if (_starpu_lsame_(uplo, "U")) { + k = 1; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = k + j - 1; + for (i__ = k; i__ <= i__2; ++i__) { +/* Computing MAX */ + d__2 = value, d__3 = (d__1 = ap[i__], abs(d__1)); + value = max(d__2,d__3); +/* L10: */ + } + k += j; +/* L20: */ + } + } else { + k = 1; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = k + *n - j; + for (i__ = k; i__ <= i__2; ++i__) { +/* Computing MAX */ + d__2 = value, d__3 = (d__1 = ap[i__], abs(d__1)); + value = max(d__2,d__3); +/* L30: */ + } + k = k + *n - j + 1; +/* L40: */ + } + } + } else if (_starpu_lsame_(norm, "I") || _starpu_lsame_(norm, "O") || *(unsigned char *)norm == '1') { + +/* Find normI(A) ( = norm1(A), since A is symmetric). */ + + value = 0.; + k = 1; + if (_starpu_lsame_(uplo, "U")) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = 0.; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + absa = (d__1 = ap[k], abs(d__1)); + sum += absa; + work[i__] += absa; + ++k; +/* L50: */ + } + work[j] = sum + (d__1 = ap[k], abs(d__1)); + ++k; +/* L60: */ + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing MAX */ + d__1 = value, d__2 = work[i__]; + value = max(d__1,d__2); +/* L70: */ + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + work[i__] = 0.; +/* L80: */ + } + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = work[j] + (d__1 = ap[k], abs(d__1)); + ++k; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + absa = (d__1 = ap[k], abs(d__1)); + sum += absa; + work[i__] += absa; + ++k; +/* L90: */ + } + value = max(value,sum); +/* L100: */ + } + } + } else if (_starpu_lsame_(norm, "F") || _starpu_lsame_(norm, "E")) { + +/* Find normF(A). */ + + scale = 0.; + sum = 1.; + k = 2; + if (_starpu_lsame_(uplo, "U")) { + i__1 = *n; + for (j = 2; j <= i__1; ++j) { + i__2 = j - 1; + _starpu_dlassq_(&i__2, &ap[k], &c__1, &scale, &sum); + k += j; +/* L110: */ + } + } else { + i__1 = *n - 1; + for (j = 1; j <= i__1; ++j) { + i__2 = *n - j; + _starpu_dlassq_(&i__2, &ap[k], &c__1, &scale, &sum); + k = k + *n - j + 1; +/* L120: */ + } + } + sum *= 2; + k = 1; + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + if (ap[k] != 0.) { + absa = (d__1 = ap[k], abs(d__1)); + if (scale < absa) { +/* Computing 2nd power */ + d__1 = scale / absa; + sum = sum * (d__1 * d__1) + 1.; + scale = absa; + } else { +/* Computing 2nd power */ + d__1 = absa / scale; + sum += d__1 * d__1; + } + } + if (_starpu_lsame_(uplo, "U")) { + k = k + i__ + 1; + } else { + k = k + *n - i__ + 1; + } +/* L130: */ + } + value = scale * sqrt(sum); + } + + ret_val = value; + return ret_val; + +/* End of DLANSP */ + +} /* _starpu_dlansp_ */ diff --git a/min-dgels/base/SRC/dlanst.c b/min-dgels/base/SRC/dlanst.c new file mode 100644 index 0000000..1cdf24e --- /dev/null +++ b/min-dgels/base/SRC/dlanst.c @@ -0,0 +1,166 @@ +/* dlanst.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +doublereal _starpu_dlanst_(char *norm, integer *n, doublereal *d__, doublereal *e) +{ + /* System generated locals */ + integer i__1; + doublereal ret_val, d__1, d__2, d__3, d__4, d__5; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + integer i__; + doublereal sum, scale; + extern logical _starpu_lsame_(char *, char *); + doublereal anorm; + extern /* Subroutine */ int _starpu_dlassq_(integer *, doublereal *, integer *, + doublereal *, doublereal *); + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLANST returns the value of the one norm, or the Frobenius norm, or */ +/* the infinity norm, or the element of largest absolute value of a */ +/* real symmetric tridiagonal matrix A. */ + +/* Description */ +/* =========== */ + +/* DLANST returns the value */ + +/* DLANST = ( max(abs(A(i,j))), NORM = 'M' or 'm' */ +/* ( */ +/* ( norm1(A), NORM = '1', 'O' or 'o' */ +/* ( */ +/* ( normI(A), NORM = 'I' or 'i' */ +/* ( */ +/* ( normF(A), NORM = 'F', 'f', 'E' or 'e' */ + +/* where norm1 denotes the one norm of a matrix (maximum column sum), */ +/* normI denotes the infinity norm of a matrix (maximum row sum) and */ +/* normF denotes the Frobenius norm of a matrix (square root of sum of */ +/* squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. */ + +/* Arguments */ +/* ========= */ + +/* NORM (input) CHARACTER*1 */ +/* Specifies the value to be returned in DLANST as described */ +/* above. */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. When N = 0, DLANST is */ +/* set to zero. */ + +/* D (input) DOUBLE PRECISION array, dimension (N) */ +/* The diagonal elements of A. */ + +/* E (input) DOUBLE PRECISION array, dimension (N-1) */ +/* The (n-1) sub-diagonal or super-diagonal elements of A. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + --e; + --d__; + + /* Function Body */ + if (*n <= 0) { + anorm = 0.; + } else if (_starpu_lsame_(norm, "M")) { + +/* Find max(abs(A(i,j))). */ + + anorm = (d__1 = d__[*n], abs(d__1)); + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing MAX */ + d__2 = anorm, d__3 = (d__1 = d__[i__], abs(d__1)); + anorm = max(d__2,d__3); +/* Computing MAX */ + d__2 = anorm, d__3 = (d__1 = e[i__], abs(d__1)); + anorm = max(d__2,d__3); +/* L10: */ + } + } else if (_starpu_lsame_(norm, "O") || *(unsigned char *) + norm == '1' || _starpu_lsame_(norm, "I")) { + +/* Find norm1(A). */ + + if (*n == 1) { + anorm = abs(d__[1]); + } else { +/* Computing MAX */ + d__3 = abs(d__[1]) + abs(e[1]), d__4 = (d__1 = e[*n - 1], abs( + d__1)) + (d__2 = d__[*n], abs(d__2)); + anorm = max(d__3,d__4); + i__1 = *n - 1; + for (i__ = 2; i__ <= i__1; ++i__) { +/* Computing MAX */ + d__4 = anorm, d__5 = (d__1 = d__[i__], abs(d__1)) + (d__2 = e[ + i__], abs(d__2)) + (d__3 = e[i__ - 1], abs(d__3)); + anorm = max(d__4,d__5); +/* L20: */ + } + } + } else if (_starpu_lsame_(norm, "F") || _starpu_lsame_(norm, "E")) { + +/* Find normF(A). */ + + scale = 0.; + sum = 1.; + if (*n > 1) { + i__1 = *n - 1; + _starpu_dlassq_(&i__1, &e[1], &c__1, &scale, &sum); + sum *= 2; + } + _starpu_dlassq_(n, &d__[1], &c__1, &scale, &sum); + anorm = scale * sqrt(sum); + } + + ret_val = anorm; + return ret_val; + +/* End of DLANST */ + +} /* _starpu_dlanst_ */ diff --git a/min-dgels/base/SRC/dlansy.c b/min-dgels/base/SRC/dlansy.c new file mode 100644 index 0000000..608cb77 --- /dev/null +++ b/min-dgels/base/SRC/dlansy.c @@ -0,0 +1,239 @@ +/* dlansy.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +doublereal _starpu_dlansy_(char *norm, char *uplo, integer *n, doublereal *a, integer + *lda, doublereal *work) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + doublereal ret_val, d__1, d__2, d__3; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + integer i__, j; + doublereal sum, absa, scale; + extern logical _starpu_lsame_(char *, char *); + doublereal value; + extern /* Subroutine */ int _starpu_dlassq_(integer *, doublereal *, integer *, + doublereal *, doublereal *); + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLANSY returns the value of the one norm, or the Frobenius norm, or */ +/* the infinity norm, or the element of largest absolute value of a */ +/* real symmetric matrix A. */ + +/* Description */ +/* =========== */ + +/* DLANSY returns the value */ + +/* DLANSY = ( max(abs(A(i,j))), NORM = 'M' or 'm' */ +/* ( */ +/* ( norm1(A), NORM = '1', 'O' or 'o' */ +/* ( */ +/* ( normI(A), NORM = 'I' or 'i' */ +/* ( */ +/* ( normF(A), NORM = 'F', 'f', 'E' or 'e' */ + +/* where norm1 denotes the one norm of a matrix (maximum column sum), */ +/* normI denotes the infinity norm of a matrix (maximum row sum) and */ +/* normF denotes the Frobenius norm of a matrix (square root of sum of */ +/* squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. */ + +/* Arguments */ +/* ========= */ + +/* NORM (input) CHARACTER*1 */ +/* Specifies the value to be returned in DLANSY as described */ +/* above. */ + +/* UPLO (input) CHARACTER*1 */ +/* Specifies whether the upper or lower triangular part of the */ +/* symmetric matrix A is to be referenced. */ +/* = 'U': Upper triangular part of A is referenced */ +/* = 'L': Lower triangular part of A is referenced */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. When N = 0, DLANSY is */ +/* set to zero. */ + +/* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ +/* The symmetric matrix A. If UPLO = 'U', the leading n by n */ +/* upper triangular part of A contains the upper triangular part */ +/* of the matrix A, and the strictly lower triangular part of A */ +/* is not referenced. If UPLO = 'L', the leading n by n lower */ +/* triangular part of A contains the lower triangular part of */ +/* the matrix A, and the strictly upper triangular part of A is */ +/* not referenced. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(N,1). */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (MAX(1,LWORK)), */ +/* where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise, */ +/* WORK is not referenced. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --work; + + /* Function Body */ + if (*n == 0) { + value = 0.; + } else if (_starpu_lsame_(norm, "M")) { + +/* Find max(abs(A(i,j))). */ + + value = 0.; + if (_starpu_lsame_(uplo, "U")) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { +/* Computing MAX */ + d__2 = value, d__3 = (d__1 = a[i__ + j * a_dim1], abs( + d__1)); + value = max(d__2,d__3); +/* L10: */ + } +/* L20: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { +/* Computing MAX */ + d__2 = value, d__3 = (d__1 = a[i__ + j * a_dim1], abs( + d__1)); + value = max(d__2,d__3); +/* L30: */ + } +/* L40: */ + } + } + } else if (_starpu_lsame_(norm, "I") || _starpu_lsame_(norm, "O") || *(unsigned char *)norm == '1') { + +/* Find normI(A) ( = norm1(A), since A is symmetric). */ + + value = 0.; + if (_starpu_lsame_(uplo, "U")) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = 0.; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + absa = (d__1 = a[i__ + j * a_dim1], abs(d__1)); + sum += absa; + work[i__] += absa; +/* L50: */ + } + work[j] = sum + (d__1 = a[j + j * a_dim1], abs(d__1)); +/* L60: */ + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing MAX */ + d__1 = value, d__2 = work[i__]; + value = max(d__1,d__2); +/* L70: */ + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + work[i__] = 0.; +/* L80: */ + } + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = work[j] + (d__1 = a[j + j * a_dim1], abs(d__1)); + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + absa = (d__1 = a[i__ + j * a_dim1], abs(d__1)); + sum += absa; + work[i__] += absa; +/* L90: */ + } + value = max(value,sum); +/* L100: */ + } + } + } else if (_starpu_lsame_(norm, "F") || _starpu_lsame_(norm, "E")) { + +/* Find normF(A). */ + + scale = 0.; + sum = 1.; + if (_starpu_lsame_(uplo, "U")) { + i__1 = *n; + for (j = 2; j <= i__1; ++j) { + i__2 = j - 1; + _starpu_dlassq_(&i__2, &a[j * a_dim1 + 1], &c__1, &scale, &sum); +/* L110: */ + } + } else { + i__1 = *n - 1; + for (j = 1; j <= i__1; ++j) { + i__2 = *n - j; + _starpu_dlassq_(&i__2, &a[j + 1 + j * a_dim1], &c__1, &scale, &sum); +/* L120: */ + } + } + sum *= 2; + i__1 = *lda + 1; + _starpu_dlassq_(n, &a[a_offset], &i__1, &scale, &sum); + value = scale * sqrt(sum); + } + + ret_val = value; + return ret_val; + +/* End of DLANSY */ + +} /* _starpu_dlansy_ */ diff --git a/min-dgels/base/SRC/dlantb.c b/min-dgels/base/SRC/dlantb.c new file mode 100644 index 0000000..560c282 --- /dev/null +++ b/min-dgels/base/SRC/dlantb.c @@ -0,0 +1,434 @@ +/* dlantb.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +doublereal _starpu_dlantb_(char *norm, char *uplo, char *diag, integer *n, integer *k, + doublereal *ab, integer *ldab, doublereal *work) +{ + /* System generated locals */ + integer ab_dim1, ab_offset, i__1, i__2, i__3, i__4, i__5; + doublereal ret_val, d__1, d__2, d__3; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + integer i__, j, l; + doublereal sum, scale; + logical udiag; + extern logical _starpu_lsame_(char *, char *); + doublereal value; + extern /* Subroutine */ int _starpu_dlassq_(integer *, doublereal *, integer *, + doublereal *, doublereal *); + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLANTB returns the value of the one norm, or the Frobenius norm, or */ +/* the infinity norm, or the element of largest absolute value of an */ +/* n by n triangular band matrix A, with ( k + 1 ) diagonals. */ + +/* Description */ +/* =========== */ + +/* DLANTB returns the value */ + +/* DLANTB = ( max(abs(A(i,j))), NORM = 'M' or 'm' */ +/* ( */ +/* ( norm1(A), NORM = '1', 'O' or 'o' */ +/* ( */ +/* ( normI(A), NORM = 'I' or 'i' */ +/* ( */ +/* ( normF(A), NORM = 'F', 'f', 'E' or 'e' */ + +/* where norm1 denotes the one norm of a matrix (maximum column sum), */ +/* normI denotes the infinity norm of a matrix (maximum row sum) and */ +/* normF denotes the Frobenius norm of a matrix (square root of sum of */ +/* squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. */ + +/* Arguments */ +/* ========= */ + +/* NORM (input) CHARACTER*1 */ +/* Specifies the value to be returned in DLANTB as described */ +/* above. */ + +/* UPLO (input) CHARACTER*1 */ +/* Specifies whether the matrix A is upper or lower triangular. */ +/* = 'U': Upper triangular */ +/* = 'L': Lower triangular */ + +/* DIAG (input) CHARACTER*1 */ +/* Specifies whether or not the matrix A is unit triangular. */ +/* = 'N': Non-unit triangular */ +/* = 'U': Unit triangular */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. When N = 0, DLANTB is */ +/* set to zero. */ + +/* K (input) INTEGER */ +/* The number of super-diagonals of the matrix A if UPLO = 'U', */ +/* or the number of sub-diagonals of the matrix A if UPLO = 'L'. */ +/* K >= 0. */ + +/* AB (input) DOUBLE PRECISION array, dimension (LDAB,N) */ +/* The upper or lower triangular band matrix A, stored in the */ +/* first k+1 rows of AB. The j-th column of A is stored */ +/* in the j-th column of the array AB as follows: */ +/* if UPLO = 'U', AB(k+1+i-j,j) = A(i,j) for max(1,j-k)<=i<=j; */ +/* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+k). */ +/* Note that when DIAG = 'U', the elements of the array AB */ +/* corresponding to the diagonal elements of the matrix A are */ +/* not referenced, but are assumed to be one. */ + +/* LDAB (input) INTEGER */ +/* The leading dimension of the array AB. LDAB >= K+1. */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (MAX(1,LWORK)), */ +/* where LWORK >= N when NORM = 'I'; otherwise, WORK is not */ +/* referenced. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + ab_dim1 = *ldab; + ab_offset = 1 + ab_dim1; + ab -= ab_offset; + --work; + + /* Function Body */ + if (*n == 0) { + value = 0.; + } else if (_starpu_lsame_(norm, "M")) { + +/* Find max(abs(A(i,j))). */ + + if (_starpu_lsame_(diag, "U")) { + value = 1.; + if (_starpu_lsame_(uplo, "U")) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MAX */ + i__2 = *k + 2 - j; + i__3 = *k; + for (i__ = max(i__2,1); i__ <= i__3; ++i__) { +/* Computing MAX */ + d__2 = value, d__3 = (d__1 = ab[i__ + j * ab_dim1], + abs(d__1)); + value = max(d__2,d__3); +/* L10: */ + } +/* L20: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + i__2 = *n + 1 - j, i__4 = *k + 1; + i__3 = min(i__2,i__4); + for (i__ = 2; i__ <= i__3; ++i__) { +/* Computing MAX */ + d__2 = value, d__3 = (d__1 = ab[i__ + j * ab_dim1], + abs(d__1)); + value = max(d__2,d__3); +/* L30: */ + } +/* L40: */ + } + } + } else { + value = 0.; + if (_starpu_lsame_(uplo, "U")) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MAX */ + i__3 = *k + 2 - j; + i__2 = *k + 1; + for (i__ = max(i__3,1); i__ <= i__2; ++i__) { +/* Computing MAX */ + d__2 = value, d__3 = (d__1 = ab[i__ + j * ab_dim1], + abs(d__1)); + value = max(d__2,d__3); +/* L50: */ + } +/* L60: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + i__3 = *n + 1 - j, i__4 = *k + 1; + i__2 = min(i__3,i__4); + for (i__ = 1; i__ <= i__2; ++i__) { +/* Computing MAX */ + d__2 = value, d__3 = (d__1 = ab[i__ + j * ab_dim1], + abs(d__1)); + value = max(d__2,d__3); +/* L70: */ + } +/* L80: */ + } + } + } + } else if (_starpu_lsame_(norm, "O") || *(unsigned char *) + norm == '1') { + +/* Find norm1(A). */ + + value = 0.; + udiag = _starpu_lsame_(diag, "U"); + if (_starpu_lsame_(uplo, "U")) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (udiag) { + sum = 1.; +/* Computing MAX */ + i__2 = *k + 2 - j; + i__3 = *k; + for (i__ = max(i__2,1); i__ <= i__3; ++i__) { + sum += (d__1 = ab[i__ + j * ab_dim1], abs(d__1)); +/* L90: */ + } + } else { + sum = 0.; +/* Computing MAX */ + i__3 = *k + 2 - j; + i__2 = *k + 1; + for (i__ = max(i__3,1); i__ <= i__2; ++i__) { + sum += (d__1 = ab[i__ + j * ab_dim1], abs(d__1)); +/* L100: */ + } + } + value = max(value,sum); +/* L110: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (udiag) { + sum = 1.; +/* Computing MIN */ + i__3 = *n + 1 - j, i__4 = *k + 1; + i__2 = min(i__3,i__4); + for (i__ = 2; i__ <= i__2; ++i__) { + sum += (d__1 = ab[i__ + j * ab_dim1], abs(d__1)); +/* L120: */ + } + } else { + sum = 0.; +/* Computing MIN */ + i__3 = *n + 1 - j, i__4 = *k + 1; + i__2 = min(i__3,i__4); + for (i__ = 1; i__ <= i__2; ++i__) { + sum += (d__1 = ab[i__ + j * ab_dim1], abs(d__1)); +/* L130: */ + } + } + value = max(value,sum); +/* L140: */ + } + } + } else if (_starpu_lsame_(norm, "I")) { + +/* Find normI(A). */ + + value = 0.; + if (_starpu_lsame_(uplo, "U")) { + if (_starpu_lsame_(diag, "U")) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + work[i__] = 1.; +/* L150: */ + } + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + l = *k + 1 - j; +/* Computing MAX */ + i__2 = 1, i__3 = j - *k; + i__4 = j - 1; + for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) { + work[i__] += (d__1 = ab[l + i__ + j * ab_dim1], abs( + d__1)); +/* L160: */ + } +/* L170: */ + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + work[i__] = 0.; +/* L180: */ + } + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + l = *k + 1 - j; +/* Computing MAX */ + i__4 = 1, i__2 = j - *k; + i__3 = j; + for (i__ = max(i__4,i__2); i__ <= i__3; ++i__) { + work[i__] += (d__1 = ab[l + i__ + j * ab_dim1], abs( + d__1)); +/* L190: */ + } +/* L200: */ + } + } + } else { + if (_starpu_lsame_(diag, "U")) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + work[i__] = 1.; +/* L210: */ + } + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + l = 1 - j; +/* Computing MIN */ + i__4 = *n, i__2 = j + *k; + i__3 = min(i__4,i__2); + for (i__ = j + 1; i__ <= i__3; ++i__) { + work[i__] += (d__1 = ab[l + i__ + j * ab_dim1], abs( + d__1)); +/* L220: */ + } +/* L230: */ + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + work[i__] = 0.; +/* L240: */ + } + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + l = 1 - j; +/* Computing MIN */ + i__4 = *n, i__2 = j + *k; + i__3 = min(i__4,i__2); + for (i__ = j; i__ <= i__3; ++i__) { + work[i__] += (d__1 = ab[l + i__ + j * ab_dim1], abs( + d__1)); +/* L250: */ + } +/* L260: */ + } + } + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing MAX */ + d__1 = value, d__2 = work[i__]; + value = max(d__1,d__2); +/* L270: */ + } + } else if (_starpu_lsame_(norm, "F") || _starpu_lsame_(norm, "E")) { + +/* Find normF(A). */ + + if (_starpu_lsame_(uplo, "U")) { + if (_starpu_lsame_(diag, "U")) { + scale = 1.; + sum = (doublereal) (*n); + if (*k > 0) { + i__1 = *n; + for (j = 2; j <= i__1; ++j) { +/* Computing MIN */ + i__4 = j - 1; + i__3 = min(i__4,*k); +/* Computing MAX */ + i__2 = *k + 2 - j; + _starpu_dlassq_(&i__3, &ab[max(i__2, 1)+ j * ab_dim1], &c__1, + &scale, &sum); +/* L280: */ + } + } + } else { + scale = 0.; + sum = 1.; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + i__4 = j, i__2 = *k + 1; + i__3 = min(i__4,i__2); +/* Computing MAX */ + i__5 = *k + 2 - j; + _starpu_dlassq_(&i__3, &ab[max(i__5, 1)+ j * ab_dim1], &c__1, & + scale, &sum); +/* L290: */ + } + } + } else { + if (_starpu_lsame_(diag, "U")) { + scale = 1.; + sum = (doublereal) (*n); + if (*k > 0) { + i__1 = *n - 1; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + i__4 = *n - j; + i__3 = min(i__4,*k); + _starpu_dlassq_(&i__3, &ab[j * ab_dim1 + 2], &c__1, &scale, & + sum); +/* L300: */ + } + } + } else { + scale = 0.; + sum = 1.; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + i__4 = *n - j + 1, i__2 = *k + 1; + i__3 = min(i__4,i__2); + _starpu_dlassq_(&i__3, &ab[j * ab_dim1 + 1], &c__1, &scale, &sum); +/* L310: */ + } + } + } + value = scale * sqrt(sum); + } + + ret_val = value; + return ret_val; + +/* End of DLANTB */ + +} /* _starpu_dlantb_ */ diff --git a/min-dgels/base/SRC/dlantp.c b/min-dgels/base/SRC/dlantp.c new file mode 100644 index 0000000..269c280 --- /dev/null +++ b/min-dgels/base/SRC/dlantp.c @@ -0,0 +1,391 @@ +/* dlantp.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +doublereal _starpu_dlantp_(char *norm, char *uplo, char *diag, integer *n, doublereal + *ap, doublereal *work) +{ + /* System generated locals */ + integer i__1, i__2; + doublereal ret_val, d__1, d__2, d__3; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + integer i__, j, k; + doublereal sum, scale; + logical udiag; + extern logical _starpu_lsame_(char *, char *); + doublereal value; + extern /* Subroutine */ int _starpu_dlassq_(integer *, doublereal *, integer *, + doublereal *, doublereal *); + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLANTP returns the value of the one norm, or the Frobenius norm, or */ +/* the infinity norm, or the element of largest absolute value of a */ +/* triangular matrix A, supplied in packed form. */ + +/* Description */ +/* =========== */ + +/* DLANTP returns the value */ + +/* DLANTP = ( max(abs(A(i,j))), NORM = 'M' or 'm' */ +/* ( */ +/* ( norm1(A), NORM = '1', 'O' or 'o' */ +/* ( */ +/* ( normI(A), NORM = 'I' or 'i' */ +/* ( */ +/* ( normF(A), NORM = 'F', 'f', 'E' or 'e' */ + +/* where norm1 denotes the one norm of a matrix (maximum column sum), */ +/* normI denotes the infinity norm of a matrix (maximum row sum) and */ +/* normF denotes the Frobenius norm of a matrix (square root of sum of */ +/* squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. */ + +/* Arguments */ +/* ========= */ + +/* NORM (input) CHARACTER*1 */ +/* Specifies the value to be returned in DLANTP as described */ +/* above. */ + +/* UPLO (input) CHARACTER*1 */ +/* Specifies whether the matrix A is upper or lower triangular. */ +/* = 'U': Upper triangular */ +/* = 'L': Lower triangular */ + +/* DIAG (input) CHARACTER*1 */ +/* Specifies whether or not the matrix A is unit triangular. */ +/* = 'N': Non-unit triangular */ +/* = 'U': Unit triangular */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. When N = 0, DLANTP is */ +/* set to zero. */ + +/* AP (input) DOUBLE PRECISION array, dimension (N*(N+1)/2) */ +/* The upper or lower triangular matrix A, packed columnwise in */ +/* a linear array. The j-th column of A is stored in the array */ +/* AP as follows: */ +/* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; */ +/* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. */ +/* Note that when DIAG = 'U', the elements of the array AP */ +/* corresponding to the diagonal elements of the matrix A are */ +/* not referenced, but are assumed to be one. */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (MAX(1,LWORK)), */ +/* where LWORK >= N when NORM = 'I'; otherwise, WORK is not */ +/* referenced. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + --work; + --ap; + + /* Function Body */ + if (*n == 0) { + value = 0.; + } else if (_starpu_lsame_(norm, "M")) { + +/* Find max(abs(A(i,j))). */ + + k = 1; + if (_starpu_lsame_(diag, "U")) { + value = 1.; + if (_starpu_lsame_(uplo, "U")) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = k + j - 2; + for (i__ = k; i__ <= i__2; ++i__) { +/* Computing MAX */ + d__2 = value, d__3 = (d__1 = ap[i__], abs(d__1)); + value = max(d__2,d__3); +/* L10: */ + } + k += j; +/* L20: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = k + *n - j; + for (i__ = k + 1; i__ <= i__2; ++i__) { +/* Computing MAX */ + d__2 = value, d__3 = (d__1 = ap[i__], abs(d__1)); + value = max(d__2,d__3); +/* L30: */ + } + k = k + *n - j + 1; +/* L40: */ + } + } + } else { + value = 0.; + if (_starpu_lsame_(uplo, "U")) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = k + j - 1; + for (i__ = k; i__ <= i__2; ++i__) { +/* Computing MAX */ + d__2 = value, d__3 = (d__1 = ap[i__], abs(d__1)); + value = max(d__2,d__3); +/* L50: */ + } + k += j; +/* L60: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = k + *n - j; + for (i__ = k; i__ <= i__2; ++i__) { +/* Computing MAX */ + d__2 = value, d__3 = (d__1 = ap[i__], abs(d__1)); + value = max(d__2,d__3); +/* L70: */ + } + k = k + *n - j + 1; +/* L80: */ + } + } + } + } else if (_starpu_lsame_(norm, "O") || *(unsigned char *) + norm == '1') { + +/* Find norm1(A). */ + + value = 0.; + k = 1; + udiag = _starpu_lsame_(diag, "U"); + if (_starpu_lsame_(uplo, "U")) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (udiag) { + sum = 1.; + i__2 = k + j - 2; + for (i__ = k; i__ <= i__2; ++i__) { + sum += (d__1 = ap[i__], abs(d__1)); +/* L90: */ + } + } else { + sum = 0.; + i__2 = k + j - 1; + for (i__ = k; i__ <= i__2; ++i__) { + sum += (d__1 = ap[i__], abs(d__1)); +/* L100: */ + } + } + k += j; + value = max(value,sum); +/* L110: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (udiag) { + sum = 1.; + i__2 = k + *n - j; + for (i__ = k + 1; i__ <= i__2; ++i__) { + sum += (d__1 = ap[i__], abs(d__1)); +/* L120: */ + } + } else { + sum = 0.; + i__2 = k + *n - j; + for (i__ = k; i__ <= i__2; ++i__) { + sum += (d__1 = ap[i__], abs(d__1)); +/* L130: */ + } + } + k = k + *n - j + 1; + value = max(value,sum); +/* L140: */ + } + } + } else if (_starpu_lsame_(norm, "I")) { + +/* Find normI(A). */ + + k = 1; + if (_starpu_lsame_(uplo, "U")) { + if (_starpu_lsame_(diag, "U")) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + work[i__] = 1.; +/* L150: */ + } + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + work[i__] += (d__1 = ap[k], abs(d__1)); + ++k; +/* L160: */ + } + ++k; +/* L170: */ + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + work[i__] = 0.; +/* L180: */ + } + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + work[i__] += (d__1 = ap[k], abs(d__1)); + ++k; +/* L190: */ + } +/* L200: */ + } + } + } else { + if (_starpu_lsame_(diag, "U")) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + work[i__] = 1.; +/* L210: */ + } + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + ++k; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + work[i__] += (d__1 = ap[k], abs(d__1)); + ++k; +/* L220: */ + } +/* L230: */ + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + work[i__] = 0.; +/* L240: */ + } + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + work[i__] += (d__1 = ap[k], abs(d__1)); + ++k; +/* L250: */ + } +/* L260: */ + } + } + } + value = 0.; + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing MAX */ + d__1 = value, d__2 = work[i__]; + value = max(d__1,d__2); +/* L270: */ + } + } else if (_starpu_lsame_(norm, "F") || _starpu_lsame_(norm, "E")) { + +/* Find normF(A). */ + + if (_starpu_lsame_(uplo, "U")) { + if (_starpu_lsame_(diag, "U")) { + scale = 1.; + sum = (doublereal) (*n); + k = 2; + i__1 = *n; + for (j = 2; j <= i__1; ++j) { + i__2 = j - 1; + _starpu_dlassq_(&i__2, &ap[k], &c__1, &scale, &sum); + k += j; +/* L280: */ + } + } else { + scale = 0.; + sum = 1.; + k = 1; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + _starpu_dlassq_(&j, &ap[k], &c__1, &scale, &sum); + k += j; +/* L290: */ + } + } + } else { + if (_starpu_lsame_(diag, "U")) { + scale = 1.; + sum = (doublereal) (*n); + k = 2; + i__1 = *n - 1; + for (j = 1; j <= i__1; ++j) { + i__2 = *n - j; + _starpu_dlassq_(&i__2, &ap[k], &c__1, &scale, &sum); + k = k + *n - j + 1; +/* L300: */ + } + } else { + scale = 0.; + sum = 1.; + k = 1; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *n - j + 1; + _starpu_dlassq_(&i__2, &ap[k], &c__1, &scale, &sum); + k = k + *n - j + 1; +/* L310: */ + } + } + } + value = scale * sqrt(sum); + } + + ret_val = value; + return ret_val; + +/* End of DLANTP */ + +} /* _starpu_dlantp_ */ diff --git a/min-dgels/base/SRC/dlantr.c b/min-dgels/base/SRC/dlantr.c new file mode 100644 index 0000000..909ab59 --- /dev/null +++ b/min-dgels/base/SRC/dlantr.c @@ -0,0 +1,398 @@ +/* dlantr.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +doublereal _starpu_dlantr_(char *norm, char *uplo, char *diag, integer *m, integer *n, + doublereal *a, integer *lda, doublereal *work) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + doublereal ret_val, d__1, d__2, d__3; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + integer i__, j; + doublereal sum, scale; + logical udiag; + extern logical _starpu_lsame_(char *, char *); + doublereal value; + extern /* Subroutine */ int _starpu_dlassq_(integer *, doublereal *, integer *, + doublereal *, doublereal *); + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLANTR returns the value of the one norm, or the Frobenius norm, or */ +/* the infinity norm, or the element of largest absolute value of a */ +/* trapezoidal or triangular matrix A. */ + +/* Description */ +/* =========== */ + +/* DLANTR returns the value */ + +/* DLANTR = ( max(abs(A(i,j))), NORM = 'M' or 'm' */ +/* ( */ +/* ( norm1(A), NORM = '1', 'O' or 'o' */ +/* ( */ +/* ( normI(A), NORM = 'I' or 'i' */ +/* ( */ +/* ( normF(A), NORM = 'F', 'f', 'E' or 'e' */ + +/* where norm1 denotes the one norm of a matrix (maximum column sum), */ +/* normI denotes the infinity norm of a matrix (maximum row sum) and */ +/* normF denotes the Frobenius norm of a matrix (square root of sum of */ +/* squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. */ + +/* Arguments */ +/* ========= */ + +/* NORM (input) CHARACTER*1 */ +/* Specifies the value to be returned in DLANTR as described */ +/* above. */ + +/* UPLO (input) CHARACTER*1 */ +/* Specifies whether the matrix A is upper or lower trapezoidal. */ +/* = 'U': Upper trapezoidal */ +/* = 'L': Lower trapezoidal */ +/* Note that A is triangular instead of trapezoidal if M = N. */ + +/* DIAG (input) CHARACTER*1 */ +/* Specifies whether or not the matrix A has unit diagonal. */ +/* = 'N': Non-unit diagonal */ +/* = 'U': Unit diagonal */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix A. M >= 0, and if */ +/* UPLO = 'U', M <= N. When M = 0, DLANTR is set to zero. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix A. N >= 0, and if */ +/* UPLO = 'L', N <= M. When N = 0, DLANTR is set to zero. */ + +/* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ +/* The trapezoidal matrix A (A is triangular if M = N). */ +/* If UPLO = 'U', the leading m by n upper trapezoidal part of */ +/* the array A contains the upper trapezoidal matrix, and the */ +/* strictly lower triangular part of A is not referenced. */ +/* If UPLO = 'L', the leading m by n lower trapezoidal part of */ +/* the array A contains the lower trapezoidal matrix, and the */ +/* strictly upper triangular part of A is not referenced. Note */ +/* that when DIAG = 'U', the diagonal elements of A are not */ +/* referenced and are assumed to be one. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(M,1). */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (MAX(1,LWORK)), */ +/* where LWORK >= M when NORM = 'I'; otherwise, WORK is not */ +/* referenced. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --work; + + /* Function Body */ + if (min(*m,*n) == 0) { + value = 0.; + } else if (_starpu_lsame_(norm, "M")) { + +/* Find max(abs(A(i,j))). */ + + if (_starpu_lsame_(diag, "U")) { + value = 1.; + if (_starpu_lsame_(uplo, "U")) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + i__3 = *m, i__4 = j - 1; + i__2 = min(i__3,i__4); + for (i__ = 1; i__ <= i__2; ++i__) { +/* Computing MAX */ + d__2 = value, d__3 = (d__1 = a[i__ + j * a_dim1], abs( + d__1)); + value = max(d__2,d__3); +/* L10: */ + } +/* L20: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = j + 1; i__ <= i__2; ++i__) { +/* Computing MAX */ + d__2 = value, d__3 = (d__1 = a[i__ + j * a_dim1], abs( + d__1)); + value = max(d__2,d__3); +/* L30: */ + } +/* L40: */ + } + } + } else { + value = 0.; + if (_starpu_lsame_(uplo, "U")) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = min(*m,j); + for (i__ = 1; i__ <= i__2; ++i__) { +/* Computing MAX */ + d__2 = value, d__3 = (d__1 = a[i__ + j * a_dim1], abs( + d__1)); + value = max(d__2,d__3); +/* L50: */ + } +/* L60: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = j; i__ <= i__2; ++i__) { +/* Computing MAX */ + d__2 = value, d__3 = (d__1 = a[i__ + j * a_dim1], abs( + d__1)); + value = max(d__2,d__3); +/* L70: */ + } +/* L80: */ + } + } + } + } else if (_starpu_lsame_(norm, "O") || *(unsigned char *) + norm == '1') { + +/* Find norm1(A). */ + + value = 0.; + udiag = _starpu_lsame_(diag, "U"); + if (_starpu_lsame_(uplo, "U")) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (udiag && j <= *m) { + sum = 1.; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + sum += (d__1 = a[i__ + j * a_dim1], abs(d__1)); +/* L90: */ + } + } else { + sum = 0.; + i__2 = min(*m,j); + for (i__ = 1; i__ <= i__2; ++i__) { + sum += (d__1 = a[i__ + j * a_dim1], abs(d__1)); +/* L100: */ + } + } + value = max(value,sum); +/* L110: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (udiag) { + sum = 1.; + i__2 = *m; + for (i__ = j + 1; i__ <= i__2; ++i__) { + sum += (d__1 = a[i__ + j * a_dim1], abs(d__1)); +/* L120: */ + } + } else { + sum = 0.; + i__2 = *m; + for (i__ = j; i__ <= i__2; ++i__) { + sum += (d__1 = a[i__ + j * a_dim1], abs(d__1)); +/* L130: */ + } + } + value = max(value,sum); +/* L140: */ + } + } + } else if (_starpu_lsame_(norm, "I")) { + +/* Find normI(A). */ + + if (_starpu_lsame_(uplo, "U")) { + if (_starpu_lsame_(diag, "U")) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + work[i__] = 1.; +/* L150: */ + } + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + i__3 = *m, i__4 = j - 1; + i__2 = min(i__3,i__4); + for (i__ = 1; i__ <= i__2; ++i__) { + work[i__] += (d__1 = a[i__ + j * a_dim1], abs(d__1)); +/* L160: */ + } +/* L170: */ + } + } else { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + work[i__] = 0.; +/* L180: */ + } + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = min(*m,j); + for (i__ = 1; i__ <= i__2; ++i__) { + work[i__] += (d__1 = a[i__ + j * a_dim1], abs(d__1)); +/* L190: */ + } +/* L200: */ + } + } + } else { + if (_starpu_lsame_(diag, "U")) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + work[i__] = 1.; +/* L210: */ + } + i__1 = *m; + for (i__ = *n + 1; i__ <= i__1; ++i__) { + work[i__] = 0.; +/* L220: */ + } + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = j + 1; i__ <= i__2; ++i__) { + work[i__] += (d__1 = a[i__ + j * a_dim1], abs(d__1)); +/* L230: */ + } +/* L240: */ + } + } else { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + work[i__] = 0.; +/* L250: */ + } + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = j; i__ <= i__2; ++i__) { + work[i__] += (d__1 = a[i__ + j * a_dim1], abs(d__1)); +/* L260: */ + } +/* L270: */ + } + } + } + value = 0.; + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing MAX */ + d__1 = value, d__2 = work[i__]; + value = max(d__1,d__2); +/* L280: */ + } + } else if (_starpu_lsame_(norm, "F") || _starpu_lsame_(norm, "E")) { + +/* Find normF(A). */ + + if (_starpu_lsame_(uplo, "U")) { + if (_starpu_lsame_(diag, "U")) { + scale = 1.; + sum = (doublereal) min(*m,*n); + i__1 = *n; + for (j = 2; j <= i__1; ++j) { +/* Computing MIN */ + i__3 = *m, i__4 = j - 1; + i__2 = min(i__3,i__4); + _starpu_dlassq_(&i__2, &a[j * a_dim1 + 1], &c__1, &scale, &sum); +/* L290: */ + } + } else { + scale = 0.; + sum = 1.; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = min(*m,j); + _starpu_dlassq_(&i__2, &a[j * a_dim1 + 1], &c__1, &scale, &sum); +/* L300: */ + } + } + } else { + if (_starpu_lsame_(diag, "U")) { + scale = 1.; + sum = (doublereal) min(*m,*n); + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m - j; +/* Computing MIN */ + i__3 = *m, i__4 = j + 1; + _starpu_dlassq_(&i__2, &a[min(i__3, i__4)+ j * a_dim1], &c__1, & + scale, &sum); +/* L310: */ + } + } else { + scale = 0.; + sum = 1.; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m - j + 1; + _starpu_dlassq_(&i__2, &a[j + j * a_dim1], &c__1, &scale, &sum); +/* L320: */ + } + } + } + value = scale * sqrt(sum); + } + + ret_val = value; + return ret_val; + +/* End of DLANTR */ + +} /* _starpu_dlantr_ */ diff --git a/min-dgels/base/SRC/dlanv2.c b/min-dgels/base/SRC/dlanv2.c new file mode 100644 index 0000000..e72782b --- /dev/null +++ b/min-dgels/base/SRC/dlanv2.c @@ -0,0 +1,235 @@ +/* dlanv2.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static doublereal c_b4 = 1.; + +/* Subroutine */ int _starpu_dlanv2_(doublereal *a, doublereal *b, doublereal *c__, + doublereal *d__, doublereal *rt1r, doublereal *rt1i, doublereal *rt2r, + doublereal *rt2i, doublereal *cs, doublereal *sn) +{ + /* System generated locals */ + doublereal d__1, d__2; + + /* Builtin functions */ + double d_sign(doublereal *, doublereal *), sqrt(doublereal); + + /* Local variables */ + doublereal p, z__, aa, bb, cc, dd, cs1, sn1, sab, sac, eps, tau, temp, + scale, bcmax, bcmis, sigma; + extern doublereal _starpu_dlapy2_(doublereal *, doublereal *), _starpu_dlamch_(char *); + + +/* -- LAPACK driver routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLANV2 computes the Schur factorization of a real 2-by-2 nonsymmetric */ +/* matrix in standard form: */ + +/* [ A B ] = [ CS -SN ] [ AA BB ] [ CS SN ] */ +/* [ C D ] [ SN CS ] [ CC DD ] [-SN CS ] */ + +/* where either */ +/* 1) CC = 0 so that AA and DD are real eigenvalues of the matrix, or */ +/* 2) AA = DD and BB*CC < 0, so that AA + or - sqrt(BB*CC) are complex */ +/* conjugate eigenvalues. */ + +/* Arguments */ +/* ========= */ + +/* A (input/output) DOUBLE PRECISION */ +/* B (input/output) DOUBLE PRECISION */ +/* C (input/output) DOUBLE PRECISION */ +/* D (input/output) DOUBLE PRECISION */ +/* On entry, the elements of the input matrix. */ +/* On exit, they are overwritten by the elements of the */ +/* standardised Schur form. */ + +/* RT1R (output) DOUBLE PRECISION */ +/* RT1I (output) DOUBLE PRECISION */ +/* RT2R (output) DOUBLE PRECISION */ +/* RT2I (output) DOUBLE PRECISION */ +/* The real and imaginary parts of the eigenvalues. If the */ +/* eigenvalues are a complex conjugate pair, RT1I > 0. */ + +/* CS (output) DOUBLE PRECISION */ +/* SN (output) DOUBLE PRECISION */ +/* Parameters of the rotation matrix. */ + +/* Further Details */ +/* =============== */ + +/* Modified by V. Sima, Research Institute for Informatics, Bucharest, */ +/* Romania, to reduce the risk of cancellation errors, */ +/* when computing real eigenvalues, and to ensure, if possible, that */ +/* abs(RT1R) >= abs(RT2R). */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + eps = _starpu_dlamch_("P"); + if (*c__ == 0.) { + *cs = 1.; + *sn = 0.; + goto L10; + + } else if (*b == 0.) { + +/* Swap rows and columns */ + + *cs = 0.; + *sn = 1.; + temp = *d__; + *d__ = *a; + *a = temp; + *b = -(*c__); + *c__ = 0.; + goto L10; + } else if (*a - *d__ == 0. && d_sign(&c_b4, b) != d_sign(&c_b4, c__)) { + *cs = 1.; + *sn = 0.; + goto L10; + } else { + + temp = *a - *d__; + p = temp * .5; +/* Computing MAX */ + d__1 = abs(*b), d__2 = abs(*c__); + bcmax = max(d__1,d__2); +/* Computing MIN */ + d__1 = abs(*b), d__2 = abs(*c__); + bcmis = min(d__1,d__2) * d_sign(&c_b4, b) * d_sign(&c_b4, c__); +/* Computing MAX */ + d__1 = abs(p); + scale = max(d__1,bcmax); + z__ = p / scale * p + bcmax / scale * bcmis; + +/* If Z is of the order of the machine accuracy, postpone the */ +/* decision on the nature of eigenvalues */ + + if (z__ >= eps * 4.) { + +/* Real eigenvalues. Compute A and D. */ + + d__1 = sqrt(scale) * sqrt(z__); + z__ = p + d_sign(&d__1, &p); + *a = *d__ + z__; + *d__ -= bcmax / z__ * bcmis; + +/* Compute B and the rotation matrix */ + + tau = _starpu_dlapy2_(c__, &z__); + *cs = z__ / tau; + *sn = *c__ / tau; + *b -= *c__; + *c__ = 0.; + } else { + +/* Complex eigenvalues, or real (almost) equal eigenvalues. */ +/* Make diagonal elements equal. */ + + sigma = *b + *c__; + tau = _starpu_dlapy2_(&sigma, &temp); + *cs = sqrt((abs(sigma) / tau + 1.) * .5); + *sn = -(p / (tau * *cs)) * d_sign(&c_b4, &sigma); + +/* Compute [ AA BB ] = [ A B ] [ CS -SN ] */ +/* [ CC DD ] [ C D ] [ SN CS ] */ + + aa = *a * *cs + *b * *sn; + bb = -(*a) * *sn + *b * *cs; + cc = *c__ * *cs + *d__ * *sn; + dd = -(*c__) * *sn + *d__ * *cs; + +/* Compute [ A B ] = [ CS SN ] [ AA BB ] */ +/* [ C D ] [-SN CS ] [ CC DD ] */ + + *a = aa * *cs + cc * *sn; + *b = bb * *cs + dd * *sn; + *c__ = -aa * *sn + cc * *cs; + *d__ = -bb * *sn + dd * *cs; + + temp = (*a + *d__) * .5; + *a = temp; + *d__ = temp; + + if (*c__ != 0.) { + if (*b != 0.) { + if (d_sign(&c_b4, b) == d_sign(&c_b4, c__)) { + +/* Real eigenvalues: reduce to upper triangular form */ + + sab = sqrt((abs(*b))); + sac = sqrt((abs(*c__))); + d__1 = sab * sac; + p = d_sign(&d__1, c__); + tau = 1. / sqrt((d__1 = *b + *c__, abs(d__1))); + *a = temp + p; + *d__ = temp - p; + *b -= *c__; + *c__ = 0.; + cs1 = sab * tau; + sn1 = sac * tau; + temp = *cs * cs1 - *sn * sn1; + *sn = *cs * sn1 + *sn * cs1; + *cs = temp; + } + } else { + *b = -(*c__); + *c__ = 0.; + temp = *cs; + *cs = -(*sn); + *sn = temp; + } + } + } + + } + +L10: + +/* Store eigenvalues in (RT1R,RT1I) and (RT2R,RT2I). */ + + *rt1r = *a; + *rt2r = *d__; + if (*c__ == 0.) { + *rt1i = 0.; + *rt2i = 0.; + } else { + *rt1i = sqrt((abs(*b))) * sqrt((abs(*c__))); + *rt2i = -(*rt1i); + } + return 0; + +/* End of DLANV2 */ + +} /* _starpu_dlanv2_ */ diff --git a/min-dgels/base/SRC/dlapll.c b/min-dgels/base/SRC/dlapll.c new file mode 100644 index 0000000..916c42d --- /dev/null +++ b/min-dgels/base/SRC/dlapll.c @@ -0,0 +1,127 @@ +/* dlapll.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dlapll_(integer *n, doublereal *x, integer *incx, + doublereal *y, integer *incy, doublereal *ssmin) +{ + /* System generated locals */ + integer i__1; + + /* Local variables */ + doublereal c__, a11, a12, a22, tau; + extern doublereal _starpu_ddot_(integer *, doublereal *, integer *, doublereal *, + integer *); + extern /* Subroutine */ int _starpu_dlas2_(doublereal *, doublereal *, doublereal + *, doublereal *, doublereal *), _starpu_daxpy_(integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *); + doublereal ssmax; + extern /* Subroutine */ int _starpu_dlarfg_(integer *, doublereal *, doublereal *, + integer *, doublereal *); + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* Given two column vectors X and Y, let */ + +/* A = ( X Y ). */ + +/* The subroutine first computes the QR factorization of A = Q*R, */ +/* and then computes the SVD of the 2-by-2 upper triangular matrix R. */ +/* The smaller singular value of R is returned in SSMIN, which is used */ +/* as the measurement of the linear dependency of the vectors X and Y. */ + +/* Arguments */ +/* ========= */ + +/* N (input) INTEGER */ +/* The length of the vectors X and Y. */ + +/* X (input/output) DOUBLE PRECISION array, */ +/* dimension (1+(N-1)*INCX) */ +/* On entry, X contains the N-vector X. */ +/* On exit, X is overwritten. */ + +/* INCX (input) INTEGER */ +/* The increment between successive elements of X. INCX > 0. */ + +/* Y (input/output) DOUBLE PRECISION array, */ +/* dimension (1+(N-1)*INCY) */ +/* On entry, Y contains the N-vector Y. */ +/* On exit, Y is overwritten. */ + +/* INCY (input) INTEGER */ +/* The increment between successive elements of Y. INCY > 0. */ + +/* SSMIN (output) DOUBLE PRECISION */ +/* The smallest singular value of the N-by-2 matrix A = ( X Y ). */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Quick return if possible */ + + /* Parameter adjustments */ + --y; + --x; + + /* Function Body */ + if (*n <= 1) { + *ssmin = 0.; + return 0; + } + +/* Compute the QR factorization of the N-by-2 matrix ( X Y ) */ + + _starpu_dlarfg_(n, &x[1], &x[*incx + 1], incx, &tau); + a11 = x[1]; + x[1] = 1.; + + c__ = -tau * _starpu_ddot_(n, &x[1], incx, &y[1], incy); + _starpu_daxpy_(n, &c__, &x[1], incx, &y[1], incy); + + i__1 = *n - 1; + _starpu_dlarfg_(&i__1, &y[*incy + 1], &y[(*incy << 1) + 1], incy, &tau); + + a12 = y[1]; + a22 = y[*incy + 1]; + +/* Compute the SVD of 2-by-2 Upper triangular matrix. */ + + _starpu_dlas2_(&a11, &a12, &a22, ssmin, &ssmax); + + return 0; + +/* End of DLAPLL */ + +} /* _starpu_dlapll_ */ diff --git a/min-dgels/base/SRC/dlapmt.c b/min-dgels/base/SRC/dlapmt.c new file mode 100644 index 0000000..c2a56e5 --- /dev/null +++ b/min-dgels/base/SRC/dlapmt.c @@ -0,0 +1,178 @@ +/* dlapmt.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dlapmt_(logical *forwrd, integer *m, integer *n, + doublereal *x, integer *ldx, integer *k) +{ + /* System generated locals */ + integer x_dim1, x_offset, i__1, i__2; + + /* Local variables */ + integer i__, j, ii, in; + doublereal temp; + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLAPMT rearranges the columns of the M by N matrix X as specified */ +/* by the permutation K(1),K(2),...,K(N) of the integers 1,...,N. */ +/* If FORWRD = .TRUE., forward permutation: */ + +/* X(*,K(J)) is moved X(*,J) for J = 1,2,...,N. */ + +/* If FORWRD = .FALSE., backward permutation: */ + +/* X(*,J) is moved to X(*,K(J)) for J = 1,2,...,N. */ + +/* Arguments */ +/* ========= */ + +/* FORWRD (input) LOGICAL */ +/* = .TRUE., forward permutation */ +/* = .FALSE., backward permutation */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix X. M >= 0. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix X. N >= 0. */ + +/* X (input/output) DOUBLE PRECISION array, dimension (LDX,N) */ +/* On entry, the M by N matrix X. */ +/* On exit, X contains the permuted matrix X. */ + +/* LDX (input) INTEGER */ +/* The leading dimension of the array X, LDX >= MAX(1,M). */ + +/* K (input/output) INTEGER array, dimension (N) */ +/* On entry, K contains the permutation vector. K is used as */ +/* internal workspace, but reset to its original value on */ +/* output. */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + x_dim1 = *ldx; + x_offset = 1 + x_dim1; + x -= x_offset; + --k; + + /* Function Body */ + if (*n <= 1) { + return 0; + } + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + k[i__] = -k[i__]; +/* L10: */ + } + + if (*forwrd) { + +/* Forward permutation */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + + if (k[i__] > 0) { + goto L40; + } + + j = i__; + k[j] = -k[j]; + in = k[j]; + +L20: + if (k[in] > 0) { + goto L40; + } + + i__2 = *m; + for (ii = 1; ii <= i__2; ++ii) { + temp = x[ii + j * x_dim1]; + x[ii + j * x_dim1] = x[ii + in * x_dim1]; + x[ii + in * x_dim1] = temp; +/* L30: */ + } + + k[in] = -k[in]; + j = in; + in = k[in]; + goto L20; + +L40: + +/* L50: */ + ; + } + + } else { + +/* Backward permutation */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + + if (k[i__] > 0) { + goto L80; + } + + k[i__] = -k[i__]; + j = k[i__]; +L60: + if (j == i__) { + goto L80; + } + + i__2 = *m; + for (ii = 1; ii <= i__2; ++ii) { + temp = x[ii + i__ * x_dim1]; + x[ii + i__ * x_dim1] = x[ii + j * x_dim1]; + x[ii + j * x_dim1] = temp; +/* L70: */ + } + + k[j] = -k[j]; + j = k[j]; + goto L60; + +L80: + +/* L90: */ + ; + } + + } + + return 0; + +/* End of DLAPMT */ + +} /* _starpu_dlapmt_ */ diff --git a/min-dgels/base/SRC/dlapy2.c b/min-dgels/base/SRC/dlapy2.c new file mode 100644 index 0000000..ac89cb8 --- /dev/null +++ b/min-dgels/base/SRC/dlapy2.c @@ -0,0 +1,73 @@ +/* dlapy2.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +doublereal _starpu_dlapy2_(doublereal *x, doublereal *y) +{ + /* System generated locals */ + doublereal ret_val, d__1; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + doublereal w, z__, xabs, yabs; + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLAPY2 returns sqrt(x**2+y**2), taking care not to cause unnecessary */ +/* overflow. */ + +/* Arguments */ +/* ========= */ + +/* X (input) DOUBLE PRECISION */ +/* Y (input) DOUBLE PRECISION */ +/* X and Y specify the values x and y. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + xabs = abs(*x); + yabs = abs(*y); + w = max(xabs,yabs); + z__ = min(xabs,yabs); + if (z__ == 0.) { + ret_val = w; + } else { +/* Computing 2nd power */ + d__1 = z__ / w; + ret_val = w * sqrt(d__1 * d__1 + 1.); + } + return ret_val; + +/* End of DLAPY2 */ + +} /* _starpu_dlapy2_ */ diff --git a/min-dgels/base/SRC/dlapy3.c b/min-dgels/base/SRC/dlapy3.c new file mode 100644 index 0000000..9bdeca8 --- /dev/null +++ b/min-dgels/base/SRC/dlapy3.c @@ -0,0 +1,83 @@ +/* dlapy3.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +doublereal _starpu_dlapy3_(doublereal *x, doublereal *y, doublereal *z__) +{ + /* System generated locals */ + doublereal ret_val, d__1, d__2, d__3; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + doublereal w, xabs, yabs, zabs; + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLAPY3 returns sqrt(x**2+y**2+z**2), taking care not to cause */ +/* unnecessary overflow. */ + +/* Arguments */ +/* ========= */ + +/* X (input) DOUBLE PRECISION */ +/* Y (input) DOUBLE PRECISION */ +/* Z (input) DOUBLE PRECISION */ +/* X, Y and Z specify the values x, y and z. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + xabs = abs(*x); + yabs = abs(*y); + zabs = abs(*z__); +/* Computing MAX */ + d__1 = max(xabs,yabs); + w = max(d__1,zabs); + if (w == 0.) { +/* W can be zero for max(0,nan,0) */ +/* adding all three entries together will make sure */ +/* NaN will not disappear. */ + ret_val = xabs + yabs + zabs; + } else { +/* Computing 2nd power */ + d__1 = xabs / w; +/* Computing 2nd power */ + d__2 = yabs / w; +/* Computing 2nd power */ + d__3 = zabs / w; + ret_val = w * sqrt(d__1 * d__1 + d__2 * d__2 + d__3 * d__3); + } + return ret_val; + +/* End of DLAPY3 */ + +} /* _starpu_dlapy3_ */ diff --git a/min-dgels/base/SRC/dlaqgb.c b/min-dgels/base/SRC/dlaqgb.c new file mode 100644 index 0000000..de131b4 --- /dev/null +++ b/min-dgels/base/SRC/dlaqgb.c @@ -0,0 +1,216 @@ +/* dlaqgb.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dlaqgb_(integer *m, integer *n, integer *kl, integer *ku, + doublereal *ab, integer *ldab, doublereal *r__, doublereal *c__, + doublereal *rowcnd, doublereal *colcnd, doublereal *amax, char *equed) +{ + /* System generated locals */ + integer ab_dim1, ab_offset, i__1, i__2, i__3, i__4, i__5, i__6; + + /* Local variables */ + integer i__, j; + doublereal cj, large, small; + extern doublereal _starpu_dlamch_(char *); + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLAQGB equilibrates a general M by N band matrix A with KL */ +/* subdiagonals and KU superdiagonals using the row and scaling factors */ +/* in the vectors R and C. */ + +/* Arguments */ +/* ========= */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix A. M >= 0. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix A. N >= 0. */ + +/* KL (input) INTEGER */ +/* The number of subdiagonals within the band of A. KL >= 0. */ + +/* KU (input) INTEGER */ +/* The number of superdiagonals within the band of A. KU >= 0. */ + +/* AB (input/output) DOUBLE PRECISION array, dimension (LDAB,N) */ +/* On entry, the matrix A in band storage, in rows 1 to KL+KU+1. */ +/* The j-th column of A is stored in the j-th column of the */ +/* array AB as follows: */ +/* AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl) */ + +/* On exit, the equilibrated matrix, in the same storage format */ +/* as A. See EQUED for the form of the equilibrated matrix. */ + +/* LDAB (input) INTEGER */ +/* The leading dimension of the array AB. LDA >= KL+KU+1. */ + +/* R (input) DOUBLE PRECISION array, dimension (M) */ +/* The row scale factors for A. */ + +/* C (input) DOUBLE PRECISION array, dimension (N) */ +/* The column scale factors for A. */ + +/* ROWCND (input) DOUBLE PRECISION */ +/* Ratio of the smallest R(i) to the largest R(i). */ + +/* COLCND (input) DOUBLE PRECISION */ +/* Ratio of the smallest C(i) to the largest C(i). */ + +/* AMAX (input) DOUBLE PRECISION */ +/* Absolute value of largest matrix entry. */ + +/* EQUED (output) CHARACTER*1 */ +/* Specifies the form of equilibration that was done. */ +/* = 'N': No equilibration */ +/* = 'R': Row equilibration, i.e., A has been premultiplied by */ +/* diag(R). */ +/* = 'C': Column equilibration, i.e., A has been postmultiplied */ +/* by diag(C). */ +/* = 'B': Both row and column equilibration, i.e., A has been */ +/* replaced by diag(R) * A * diag(C). */ + +/* Internal Parameters */ +/* =================== */ + +/* THRESH is a threshold value used to decide if row or column scaling */ +/* should be done based on the ratio of the row or column scaling */ +/* factors. If ROWCND < THRESH, row scaling is done, and if */ +/* COLCND < THRESH, column scaling is done. */ + +/* LARGE and SMALL are threshold values used to decide if row scaling */ +/* should be done based on the absolute size of the largest matrix */ +/* element. If AMAX > LARGE or AMAX < SMALL, row scaling is done. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Quick return if possible */ + + /* Parameter adjustments */ + ab_dim1 = *ldab; + ab_offset = 1 + ab_dim1; + ab -= ab_offset; + --r__; + --c__; + + /* Function Body */ + if (*m <= 0 || *n <= 0) { + *(unsigned char *)equed = 'N'; + return 0; + } + +/* Initialize LARGE and SMALL. */ + + small = _starpu_dlamch_("Safe minimum") / _starpu_dlamch_("Precision"); + large = 1. / small; + + if (*rowcnd >= .1 && *amax >= small && *amax <= large) { + +/* No row scaling */ + + if (*colcnd >= .1) { + +/* No column scaling */ + + *(unsigned char *)equed = 'N'; + } else { + +/* Column scaling */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + cj = c__[j]; +/* Computing MAX */ + i__2 = 1, i__3 = j - *ku; +/* Computing MIN */ + i__5 = *m, i__6 = j + *kl; + i__4 = min(i__5,i__6); + for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) { + ab[*ku + 1 + i__ - j + j * ab_dim1] = cj * ab[*ku + 1 + + i__ - j + j * ab_dim1]; +/* L10: */ + } +/* L20: */ + } + *(unsigned char *)equed = 'C'; + } + } else if (*colcnd >= .1) { + +/* Row scaling, no column scaling */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MAX */ + i__4 = 1, i__2 = j - *ku; +/* Computing MIN */ + i__5 = *m, i__6 = j + *kl; + i__3 = min(i__5,i__6); + for (i__ = max(i__4,i__2); i__ <= i__3; ++i__) { + ab[*ku + 1 + i__ - j + j * ab_dim1] = r__[i__] * ab[*ku + 1 + + i__ - j + j * ab_dim1]; +/* L30: */ + } +/* L40: */ + } + *(unsigned char *)equed = 'R'; + } else { + +/* Row and column scaling */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + cj = c__[j]; +/* Computing MAX */ + i__3 = 1, i__4 = j - *ku; +/* Computing MIN */ + i__5 = *m, i__6 = j + *kl; + i__2 = min(i__5,i__6); + for (i__ = max(i__3,i__4); i__ <= i__2; ++i__) { + ab[*ku + 1 + i__ - j + j * ab_dim1] = cj * r__[i__] * ab[*ku + + 1 + i__ - j + j * ab_dim1]; +/* L50: */ + } +/* L60: */ + } + *(unsigned char *)equed = 'B'; + } + + return 0; + +/* End of DLAQGB */ + +} /* _starpu_dlaqgb_ */ diff --git a/min-dgels/base/SRC/dlaqge.c b/min-dgels/base/SRC/dlaqge.c new file mode 100644 index 0000000..4f5f87d --- /dev/null +++ b/min-dgels/base/SRC/dlaqge.c @@ -0,0 +1,188 @@ +/* dlaqge.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dlaqge_(integer *m, integer *n, doublereal *a, integer * + lda, doublereal *r__, doublereal *c__, doublereal *rowcnd, doublereal + *colcnd, doublereal *amax, char *equed) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + + /* Local variables */ + integer i__, j; + doublereal cj, large, small; + extern doublereal _starpu_dlamch_(char *); + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLAQGE equilibrates a general M by N matrix A using the row and */ +/* column scaling factors in the vectors R and C. */ + +/* Arguments */ +/* ========= */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix A. M >= 0. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix A. N >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the M by N matrix A. */ +/* On exit, the equilibrated matrix. See EQUED for the form of */ +/* the equilibrated matrix. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(M,1). */ + +/* R (input) DOUBLE PRECISION array, dimension (M) */ +/* The row scale factors for A. */ + +/* C (input) DOUBLE PRECISION array, dimension (N) */ +/* The column scale factors for A. */ + +/* ROWCND (input) DOUBLE PRECISION */ +/* Ratio of the smallest R(i) to the largest R(i). */ + +/* COLCND (input) DOUBLE PRECISION */ +/* Ratio of the smallest C(i) to the largest C(i). */ + +/* AMAX (input) DOUBLE PRECISION */ +/* Absolute value of largest matrix entry. */ + +/* EQUED (output) CHARACTER*1 */ +/* Specifies the form of equilibration that was done. */ +/* = 'N': No equilibration */ +/* = 'R': Row equilibration, i.e., A has been premultiplied by */ +/* diag(R). */ +/* = 'C': Column equilibration, i.e., A has been postmultiplied */ +/* by diag(C). */ +/* = 'B': Both row and column equilibration, i.e., A has been */ +/* replaced by diag(R) * A * diag(C). */ + +/* Internal Parameters */ +/* =================== */ + +/* THRESH is a threshold value used to decide if row or column scaling */ +/* should be done based on the ratio of the row or column scaling */ +/* factors. If ROWCND < THRESH, row scaling is done, and if */ +/* COLCND < THRESH, column scaling is done. */ + +/* LARGE and SMALL are threshold values used to decide if row scaling */ +/* should be done based on the absolute size of the largest matrix */ +/* element. If AMAX > LARGE or AMAX < SMALL, row scaling is done. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Quick return if possible */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --r__; + --c__; + + /* Function Body */ + if (*m <= 0 || *n <= 0) { + *(unsigned char *)equed = 'N'; + return 0; + } + +/* Initialize LARGE and SMALL. */ + + small = _starpu_dlamch_("Safe minimum") / _starpu_dlamch_("Precision"); + large = 1. / small; + + if (*rowcnd >= .1 && *amax >= small && *amax <= large) { + +/* No row scaling */ + + if (*colcnd >= .1) { + +/* No column scaling */ + + *(unsigned char *)equed = 'N'; + } else { + +/* Column scaling */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + cj = c__[j]; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = cj * a[i__ + j * a_dim1]; +/* L10: */ + } +/* L20: */ + } + *(unsigned char *)equed = 'C'; + } + } else if (*colcnd >= .1) { + +/* Row scaling, no column scaling */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = r__[i__] * a[i__ + j * a_dim1]; +/* L30: */ + } +/* L40: */ + } + *(unsigned char *)equed = 'R'; + } else { + +/* Row and column scaling */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + cj = c__[j]; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = cj * r__[i__] * a[i__ + j * a_dim1]; +/* L50: */ + } +/* L60: */ + } + *(unsigned char *)equed = 'B'; + } + + return 0; + +/* End of DLAQGE */ + +} /* _starpu_dlaqge_ */ diff --git a/min-dgels/base/SRC/dlaqp2.c b/min-dgels/base/SRC/dlaqp2.c new file mode 100644 index 0000000..0c83853 --- /dev/null +++ b/min-dgels/base/SRC/dlaqp2.c @@ -0,0 +1,237 @@ +/* dlaqp2.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +/* Subroutine */ int _starpu_dlaqp2_(integer *m, integer *n, integer *offset, + doublereal *a, integer *lda, integer *jpvt, doublereal *tau, + doublereal *vn1, doublereal *vn2, doublereal *work) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + doublereal d__1, d__2; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + integer i__, j, mn; + doublereal aii; + integer pvt; + doublereal temp; + extern doublereal _starpu_dnrm2_(integer *, doublereal *, integer *); + doublereal temp2, tol3z; + extern /* Subroutine */ int _starpu_dlarf_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + doublereal *); + integer offpi, itemp; + extern /* Subroutine */ int _starpu_dswap_(integer *, doublereal *, integer *, + doublereal *, integer *); + extern doublereal _starpu_dlamch_(char *); + extern integer _starpu_idamax_(integer *, doublereal *, integer *); + extern /* Subroutine */ int _starpu_dlarfp_(integer *, doublereal *, doublereal *, + integer *, doublereal *); + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLAQP2 computes a QR factorization with column pivoting of */ +/* the block A(OFFSET+1:M,1:N). */ +/* The block A(1:OFFSET,1:N) is accordingly pivoted, but not factorized. */ + +/* Arguments */ +/* ========= */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix A. M >= 0. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix A. N >= 0. */ + +/* OFFSET (input) INTEGER */ +/* The number of rows of the matrix A that must be pivoted */ +/* but no factorized. OFFSET >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the M-by-N matrix A. */ +/* On exit, the upper triangle of block A(OFFSET+1:M,1:N) is */ +/* the triangular factor obtained; the elements in block */ +/* A(OFFSET+1:M,1:N) below the diagonal, together with the */ +/* array TAU, represent the orthogonal matrix Q as a product of */ +/* elementary reflectors. Block A(1:OFFSET,1:N) has been */ +/* accordingly pivoted, but no factorized. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,M). */ + +/* JPVT (input/output) INTEGER array, dimension (N) */ +/* On entry, if JPVT(i) .ne. 0, the i-th column of A is permuted */ +/* to the front of A*P (a leading column); if JPVT(i) = 0, */ +/* the i-th column of A is a free column. */ +/* On exit, if JPVT(i) = k, then the i-th column of A*P */ +/* was the k-th column of A. */ + +/* TAU (output) DOUBLE PRECISION array, dimension (min(M,N)) */ +/* The scalar factors of the elementary reflectors. */ + +/* VN1 (input/output) DOUBLE PRECISION array, dimension (N) */ +/* The vector with the partial column norms. */ + +/* VN2 (input/output) DOUBLE PRECISION array, dimension (N) */ +/* The vector with the exact column norms. */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (N) */ + +/* Further Details */ +/* =============== */ + +/* Based on contributions by */ +/* G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain */ +/* X. Sun, Computer Science Dept., Duke University, USA */ + +/* Partial column norm updating strategy modified by */ +/* Z. Drmac and Z. Bujanovic, Dept. of Mathematics, */ +/* University of Zagreb, Croatia. */ +/* June 2006. */ +/* For more details see LAPACK Working Note 176. */ +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --jpvt; + --tau; + --vn1; + --vn2; + --work; + + /* Function Body */ +/* Computing MIN */ + i__1 = *m - *offset; + mn = min(i__1,*n); + tol3z = sqrt(_starpu_dlamch_("Epsilon")); + +/* Compute factorization. */ + + i__1 = mn; + for (i__ = 1; i__ <= i__1; ++i__) { + + offpi = *offset + i__; + +/* Determine ith pivot column and swap if necessary. */ + + i__2 = *n - i__ + 1; + pvt = i__ - 1 + _starpu_idamax_(&i__2, &vn1[i__], &c__1); + + if (pvt != i__) { + _starpu_dswap_(m, &a[pvt * a_dim1 + 1], &c__1, &a[i__ * a_dim1 + 1], & + c__1); + itemp = jpvt[pvt]; + jpvt[pvt] = jpvt[i__]; + jpvt[i__] = itemp; + vn1[pvt] = vn1[i__]; + vn2[pvt] = vn2[i__]; + } + +/* Generate elementary reflector H(i). */ + + if (offpi < *m) { + i__2 = *m - offpi + 1; + _starpu_dlarfp_(&i__2, &a[offpi + i__ * a_dim1], &a[offpi + 1 + i__ * + a_dim1], &c__1, &tau[i__]); + } else { + _starpu_dlarfp_(&c__1, &a[*m + i__ * a_dim1], &a[*m + i__ * a_dim1], & + c__1, &tau[i__]); + } + + if (i__ <= *n) { + +/* Apply H(i)' to A(offset+i:m,i+1:n) from the left. */ + + aii = a[offpi + i__ * a_dim1]; + a[offpi + i__ * a_dim1] = 1.; + i__2 = *m - offpi + 1; + i__3 = *n - i__; + _starpu_dlarf_("Left", &i__2, &i__3, &a[offpi + i__ * a_dim1], &c__1, & + tau[i__], &a[offpi + (i__ + 1) * a_dim1], lda, &work[1]); + a[offpi + i__ * a_dim1] = aii; + } + +/* Update partial column norms. */ + + i__2 = *n; + for (j = i__ + 1; j <= i__2; ++j) { + if (vn1[j] != 0.) { + +/* NOTE: The following 4 lines follow from the analysis in */ +/* Lapack Working Note 176. */ + +/* Computing 2nd power */ + d__2 = (d__1 = a[offpi + j * a_dim1], abs(d__1)) / vn1[j]; + temp = 1. - d__2 * d__2; + temp = max(temp,0.); +/* Computing 2nd power */ + d__1 = vn1[j] / vn2[j]; + temp2 = temp * (d__1 * d__1); + if (temp2 <= tol3z) { + if (offpi < *m) { + i__3 = *m - offpi; + vn1[j] = _starpu_dnrm2_(&i__3, &a[offpi + 1 + j * a_dim1], & + c__1); + vn2[j] = vn1[j]; + } else { + vn1[j] = 0.; + vn2[j] = 0.; + } + } else { + vn1[j] *= sqrt(temp); + } + } +/* L10: */ + } + +/* L20: */ + } + + return 0; + +/* End of DLAQP2 */ + +} /* _starpu_dlaqp2_ */ diff --git a/min-dgels/base/SRC/dlaqps.c b/min-dgels/base/SRC/dlaqps.c new file mode 100644 index 0000000..0dad8f3 --- /dev/null +++ b/min-dgels/base/SRC/dlaqps.c @@ -0,0 +1,345 @@ +/* dlaqps.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static doublereal c_b8 = -1.; +static doublereal c_b9 = 1.; +static doublereal c_b16 = 0.; + +/* Subroutine */ int _starpu_dlaqps_(integer *m, integer *n, integer *offset, integer + *nb, integer *kb, doublereal *a, integer *lda, integer *jpvt, + doublereal *tau, doublereal *vn1, doublereal *vn2, doublereal *auxv, + doublereal *f, integer *ldf) +{ + /* System generated locals */ + integer a_dim1, a_offset, f_dim1, f_offset, i__1, i__2; + doublereal d__1, d__2; + + /* Builtin functions */ + double sqrt(doublereal); + integer i_dnnt(doublereal *); + + /* Local variables */ + integer j, k, rk; + doublereal akk; + integer pvt; + doublereal temp; + extern doublereal _starpu_dnrm2_(integer *, doublereal *, integer *); + doublereal temp2, tol3z; + extern /* Subroutine */ int _starpu_dgemm_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *), + _starpu_dgemv_(char *, integer *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + integer *); + integer itemp; + extern /* Subroutine */ int _starpu_dswap_(integer *, doublereal *, integer *, + doublereal *, integer *); + extern doublereal _starpu_dlamch_(char *); + extern integer _starpu_idamax_(integer *, doublereal *, integer *); + extern /* Subroutine */ int _starpu_dlarfp_(integer *, doublereal *, doublereal *, + integer *, doublereal *); + integer lsticc, lastrk; + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLAQPS computes a step of QR factorization with column pivoting */ +/* of a real M-by-N matrix A by using Blas-3. It tries to factorize */ +/* NB columns from A starting from the row OFFSET+1, and updates all */ +/* of the matrix with Blas-3 xGEMM. */ + +/* In some cases, due to catastrophic cancellations, it cannot */ +/* factorize NB columns. Hence, the actual number of factorized */ +/* columns is returned in KB. */ + +/* Block A(1:OFFSET,1:N) is accordingly pivoted, but not factorized. */ + +/* Arguments */ +/* ========= */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix A. M >= 0. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix A. N >= 0 */ + +/* OFFSET (input) INTEGER */ +/* The number of rows of A that have been factorized in */ +/* previous steps. */ + +/* NB (input) INTEGER */ +/* The number of columns to factorize. */ + +/* KB (output) INTEGER */ +/* The number of columns actually factorized. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the M-by-N matrix A. */ +/* On exit, block A(OFFSET+1:M,1:KB) is the triangular */ +/* factor obtained and block A(1:OFFSET,1:N) has been */ +/* accordingly pivoted, but no factorized. */ +/* The rest of the matrix, block A(OFFSET+1:M,KB+1:N) has */ +/* been updated. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,M). */ + +/* JPVT (input/output) INTEGER array, dimension (N) */ +/* JPVT(I) = K <==> Column K of the full matrix A has been */ +/* permuted into position I in AP. */ + +/* TAU (output) DOUBLE PRECISION array, dimension (KB) */ +/* The scalar factors of the elementary reflectors. */ + +/* VN1 (input/output) DOUBLE PRECISION array, dimension (N) */ +/* The vector with the partial column norms. */ + +/* VN2 (input/output) DOUBLE PRECISION array, dimension (N) */ +/* The vector with the exact column norms. */ + +/* AUXV (input/output) DOUBLE PRECISION array, dimension (NB) */ +/* Auxiliar vector. */ + +/* F (input/output) DOUBLE PRECISION array, dimension (LDF,NB) */ +/* Matrix F' = L*Y'*A. */ + +/* LDF (input) INTEGER */ +/* The leading dimension of the array F. LDF >= max(1,N). */ + +/* Further Details */ +/* =============== */ + +/* Based on contributions by */ +/* G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain */ +/* X. Sun, Computer Science Dept., Duke University, USA */ + +/* Partial column norm updating strategy modified by */ +/* Z. Drmac and Z. Bujanovic, Dept. of Mathematics, */ +/* University of Zagreb, Croatia. */ +/* June 2006. */ +/* For more details see LAPACK Working Note 176. */ +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --jpvt; + --tau; + --vn1; + --vn2; + --auxv; + f_dim1 = *ldf; + f_offset = 1 + f_dim1; + f -= f_offset; + + /* Function Body */ +/* Computing MIN */ + i__1 = *m, i__2 = *n + *offset; + lastrk = min(i__1,i__2); + lsticc = 0; + k = 0; + tol3z = sqrt(_starpu_dlamch_("Epsilon")); + +/* Beginning of while loop. */ + +L10: + if (k < *nb && lsticc == 0) { + ++k; + rk = *offset + k; + +/* Determine ith pivot column and swap if necessary */ + + i__1 = *n - k + 1; + pvt = k - 1 + _starpu_idamax_(&i__1, &vn1[k], &c__1); + if (pvt != k) { + _starpu_dswap_(m, &a[pvt * a_dim1 + 1], &c__1, &a[k * a_dim1 + 1], &c__1); + i__1 = k - 1; + _starpu_dswap_(&i__1, &f[pvt + f_dim1], ldf, &f[k + f_dim1], ldf); + itemp = jpvt[pvt]; + jpvt[pvt] = jpvt[k]; + jpvt[k] = itemp; + vn1[pvt] = vn1[k]; + vn2[pvt] = vn2[k]; + } + +/* Apply previous Householder reflectors to column K: */ +/* A(RK:M,K) := A(RK:M,K) - A(RK:M,1:K-1)*F(K,1:K-1)'. */ + + if (k > 1) { + i__1 = *m - rk + 1; + i__2 = k - 1; + _starpu_dgemv_("No transpose", &i__1, &i__2, &c_b8, &a[rk + a_dim1], lda, + &f[k + f_dim1], ldf, &c_b9, &a[rk + k * a_dim1], &c__1); + } + +/* Generate elementary reflector H(k). */ + + if (rk < *m) { + i__1 = *m - rk + 1; + _starpu_dlarfp_(&i__1, &a[rk + k * a_dim1], &a[rk + 1 + k * a_dim1], & + c__1, &tau[k]); + } else { + _starpu_dlarfp_(&c__1, &a[rk + k * a_dim1], &a[rk + k * a_dim1], &c__1, & + tau[k]); + } + + akk = a[rk + k * a_dim1]; + a[rk + k * a_dim1] = 1.; + +/* Compute Kth column of F: */ + +/* Compute F(K+1:N,K) := tau(K)*A(RK:M,K+1:N)'*A(RK:M,K). */ + + if (k < *n) { + i__1 = *m - rk + 1; + i__2 = *n - k; + _starpu_dgemv_("Transpose", &i__1, &i__2, &tau[k], &a[rk + (k + 1) * + a_dim1], lda, &a[rk + k * a_dim1], &c__1, &c_b16, &f[k + + 1 + k * f_dim1], &c__1); + } + +/* Padding F(1:K,K) with zeros. */ + + i__1 = k; + for (j = 1; j <= i__1; ++j) { + f[j + k * f_dim1] = 0.; +/* L20: */ + } + +/* Incremental updating of F: */ +/* F(1:N,K) := F(1:N,K) - tau(K)*F(1:N,1:K-1)*A(RK:M,1:K-1)' */ +/* *A(RK:M,K). */ + + if (k > 1) { + i__1 = *m - rk + 1; + i__2 = k - 1; + d__1 = -tau[k]; + _starpu_dgemv_("Transpose", &i__1, &i__2, &d__1, &a[rk + a_dim1], lda, &a[ + rk + k * a_dim1], &c__1, &c_b16, &auxv[1], &c__1); + + i__1 = k - 1; + _starpu_dgemv_("No transpose", n, &i__1, &c_b9, &f[f_dim1 + 1], ldf, & + auxv[1], &c__1, &c_b9, &f[k * f_dim1 + 1], &c__1); + } + +/* Update the current row of A: */ +/* A(RK,K+1:N) := A(RK,K+1:N) - A(RK,1:K)*F(K+1:N,1:K)'. */ + + if (k < *n) { + i__1 = *n - k; + _starpu_dgemv_("No transpose", &i__1, &k, &c_b8, &f[k + 1 + f_dim1], ldf, + &a[rk + a_dim1], lda, &c_b9, &a[rk + (k + 1) * a_dim1], + lda); + } + +/* Update partial column norms. */ + + if (rk < lastrk) { + i__1 = *n; + for (j = k + 1; j <= i__1; ++j) { + if (vn1[j] != 0.) { + +/* NOTE: The following 4 lines follow from the analysis in */ +/* Lapack Working Note 176. */ + + temp = (d__1 = a[rk + j * a_dim1], abs(d__1)) / vn1[j]; +/* Computing MAX */ + d__1 = 0., d__2 = (temp + 1.) * (1. - temp); + temp = max(d__1,d__2); +/* Computing 2nd power */ + d__1 = vn1[j] / vn2[j]; + temp2 = temp * (d__1 * d__1); + if (temp2 <= tol3z) { + vn2[j] = (doublereal) lsticc; + lsticc = j; + } else { + vn1[j] *= sqrt(temp); + } + } +/* L30: */ + } + } + + a[rk + k * a_dim1] = akk; + +/* End of while loop. */ + + goto L10; + } + *kb = k; + rk = *offset + *kb; + +/* Apply the block reflector to the rest of the matrix: */ +/* A(OFFSET+KB+1:M,KB+1:N) := A(OFFSET+KB+1:M,KB+1:N) - */ +/* A(OFFSET+KB+1:M,1:KB)*F(KB+1:N,1:KB)'. */ + +/* Computing MIN */ + i__1 = *n, i__2 = *m - *offset; + if (*kb < min(i__1,i__2)) { + i__1 = *m - rk; + i__2 = *n - *kb; + _starpu_dgemm_("No transpose", "Transpose", &i__1, &i__2, kb, &c_b8, &a[rk + + 1 + a_dim1], lda, &f[*kb + 1 + f_dim1], ldf, &c_b9, &a[rk + 1 + + (*kb + 1) * a_dim1], lda); + } + +/* Recomputation of difficult columns. */ + +L40: + if (lsticc > 0) { + itemp = i_dnnt(&vn2[lsticc]); + i__1 = *m - rk; + vn1[lsticc] = _starpu_dnrm2_(&i__1, &a[rk + 1 + lsticc * a_dim1], &c__1); + +/* NOTE: The computation of VN1( LSTICC ) relies on the fact that */ +/* SNRM2 does not fail on vectors with norm below the value of */ +/* SQRT(DLAMCH('S')) */ + + vn2[lsticc] = vn1[lsticc]; + lsticc = itemp; + goto L40; + } + + return 0; + +/* End of DLAQPS */ + +} /* _starpu_dlaqps_ */ diff --git a/min-dgels/base/SRC/dlaqr0.c b/min-dgels/base/SRC/dlaqr0.c new file mode 100644 index 0000000..952f82c --- /dev/null +++ b/min-dgels/base/SRC/dlaqr0.c @@ -0,0 +1,758 @@ +/* dlaqr0.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__13 = 13; +static integer c__15 = 15; +static integer c_n1 = -1; +static integer c__12 = 12; +static integer c__14 = 14; +static integer c__16 = 16; +static logical c_false = FALSE_; +static integer c__1 = 1; +static integer c__3 = 3; + +/* Subroutine */ int _starpu_dlaqr0_(logical *wantt, logical *wantz, integer *n, + integer *ilo, integer *ihi, doublereal *h__, integer *ldh, doublereal + *wr, doublereal *wi, integer *iloz, integer *ihiz, doublereal *z__, + integer *ldz, doublereal *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4, i__5; + doublereal d__1, d__2, d__3, d__4; + + /* Local variables */ + integer i__, k; + doublereal aa, bb, cc, dd; + integer ld; + doublereal cs; + integer nh, it, ks, kt; + doublereal sn; + integer ku, kv, ls, ns; + doublereal ss; + integer nw, inf, kdu, nho, nve, kwh, nsr, nwr, kwv, ndec, ndfl, kbot, + nmin; + doublereal swap; + integer ktop; + doublereal zdum[1] /* was [1][1] */; + integer kacc22, itmax, nsmax, nwmax, kwtop; + extern /* Subroutine */ int _starpu_dlanv2_(doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *), _starpu_dlaqr3_( + logical *, logical *, integer *, integer *, integer *, integer *, + doublereal *, integer *, integer *, integer *, doublereal *, + integer *, integer *, integer *, doublereal *, doublereal *, + doublereal *, integer *, integer *, doublereal *, integer *, + integer *, doublereal *, integer *, doublereal *, integer *), + _starpu_dlaqr4_(logical *, logical *, integer *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + integer *, doublereal *, integer *, doublereal *, integer *, + integer *), _starpu_dlaqr5_(logical *, logical *, integer *, integer *, + integer *, integer *, integer *, doublereal *, doublereal *, + doublereal *, integer *, integer *, integer *, doublereal *, + integer *, doublereal *, integer *, doublereal *, integer *, + integer *, doublereal *, integer *, integer *, doublereal *, + integer *); + integer nibble; + extern /* Subroutine */ int _starpu_dlahqr_(logical *, logical *, integer *, + integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, integer *, doublereal *, integer *, + integer *), _starpu_dlacpy_(char *, integer *, integer *, doublereal *, + integer *, doublereal *, integer *); + extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *); + char jbcmpz[1]; + integer nwupbd; + logical sorted; + integer lwkopt; + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLAQR0 computes the eigenvalues of a Hessenberg matrix H */ +/* and, optionally, the matrices T and Z from the Schur decomposition */ +/* H = Z T Z**T, where T is an upper quasi-triangular matrix (the */ +/* Schur form), and Z is the orthogonal matrix of Schur vectors. */ + +/* Optionally Z may be postmultiplied into an input orthogonal */ +/* matrix Q so that this routine can give the Schur factorization */ +/* of a matrix A which has been reduced to the Hessenberg form H */ +/* by the orthogonal matrix Q: A = Q*H*Q**T = (QZ)*T*(QZ)**T. */ + +/* Arguments */ +/* ========= */ + +/* WANTT (input) LOGICAL */ +/* = .TRUE. : the full Schur form T is required; */ +/* = .FALSE.: only eigenvalues are required. */ + +/* WANTZ (input) LOGICAL */ +/* = .TRUE. : the matrix of Schur vectors Z is required; */ +/* = .FALSE.: Schur vectors are not required. */ + +/* N (input) INTEGER */ +/* The order of the matrix H. N .GE. 0. */ + +/* ILO (input) INTEGER */ +/* IHI (input) INTEGER */ +/* It is assumed that H is already upper triangular in rows */ +/* and columns 1:ILO-1 and IHI+1:N and, if ILO.GT.1, */ +/* H(ILO,ILO-1) is zero. ILO and IHI are normally set by a */ +/* previous call to DGEBAL, and then passed to DGEHRD when the */ +/* matrix output by DGEBAL is reduced to Hessenberg form. */ +/* Otherwise, ILO and IHI should be set to 1 and N, */ +/* respectively. If N.GT.0, then 1.LE.ILO.LE.IHI.LE.N. */ +/* If N = 0, then ILO = 1 and IHI = 0. */ + +/* H (input/output) DOUBLE PRECISION array, dimension (LDH,N) */ +/* On entry, the upper Hessenberg matrix H. */ +/* On exit, if INFO = 0 and WANTT is .TRUE., then H contains */ +/* the upper quasi-triangular matrix T from the Schur */ +/* decomposition (the Schur form); 2-by-2 diagonal blocks */ +/* (corresponding to complex conjugate pairs of eigenvalues) */ +/* are returned in standard form, with H(i,i) = H(i+1,i+1) */ +/* and H(i+1,i)*H(i,i+1).LT.0. If INFO = 0 and WANTT is */ +/* .FALSE., then the contents of H are unspecified on exit. */ +/* (The output value of H when INFO.GT.0 is given under the */ +/* description of INFO below.) */ + +/* This subroutine may explicitly set H(i,j) = 0 for i.GT.j and */ +/* j = 1, 2, ... ILO-1 or j = IHI+1, IHI+2, ... N. */ + +/* LDH (input) INTEGER */ +/* The leading dimension of the array H. LDH .GE. max(1,N). */ + +/* WR (output) DOUBLE PRECISION array, dimension (IHI) */ +/* WI (output) DOUBLE PRECISION array, dimension (IHI) */ +/* The real and imaginary parts, respectively, of the computed */ +/* eigenvalues of H(ILO:IHI,ILO:IHI) are stored in WR(ILO:IHI) */ +/* and WI(ILO:IHI). If two eigenvalues are computed as a */ +/* complex conjugate pair, they are stored in consecutive */ +/* elements of WR and WI, say the i-th and (i+1)th, with */ +/* WI(i) .GT. 0 and WI(i+1) .LT. 0. If WANTT is .TRUE., then */ +/* the eigenvalues are stored in the same order as on the */ +/* diagonal of the Schur form returned in H, with */ +/* WR(i) = H(i,i) and, if H(i:i+1,i:i+1) is a 2-by-2 diagonal */ +/* block, WI(i) = sqrt(-H(i+1,i)*H(i,i+1)) and */ +/* WI(i+1) = -WI(i). */ + +/* ILOZ (input) INTEGER */ +/* IHIZ (input) INTEGER */ +/* Specify the rows of Z to which transformations must be */ +/* applied if WANTZ is .TRUE.. */ +/* 1 .LE. ILOZ .LE. ILO; IHI .LE. IHIZ .LE. N. */ + +/* Z (input/output) DOUBLE PRECISION array, dimension (LDZ,IHI) */ +/* If WANTZ is .FALSE., then Z is not referenced. */ +/* If WANTZ is .TRUE., then Z(ILO:IHI,ILOZ:IHIZ) is */ +/* replaced by Z(ILO:IHI,ILOZ:IHIZ)*U where U is the */ +/* orthogonal Schur factor of H(ILO:IHI,ILO:IHI). */ +/* (The output value of Z when INFO.GT.0 is given under */ +/* the description of INFO below.) */ + +/* LDZ (input) INTEGER */ +/* The leading dimension of the array Z. if WANTZ is .TRUE. */ +/* then LDZ.GE.MAX(1,IHIZ). Otherwize, LDZ.GE.1. */ + +/* WORK (workspace/output) DOUBLE PRECISION array, dimension LWORK */ +/* On exit, if LWORK = -1, WORK(1) returns an estimate of */ +/* the optimal value for LWORK. */ + +/* LWORK (input) INTEGER */ +/* The dimension of the array WORK. LWORK .GE. max(1,N) */ +/* is sufficient, but LWORK typically as large as 6*N may */ +/* be required for optimal performance. A workspace query */ +/* to determine the optimal workspace size is recommended. */ + +/* If LWORK = -1, then DLAQR0 does a workspace query. */ +/* In this case, DLAQR0 checks the input parameters and */ +/* estimates the optimal workspace size for the given */ +/* values of N, ILO and IHI. The estimate is returned */ +/* in WORK(1). No error message related to LWORK is */ +/* issued by XERBLA. Neither H nor Z are accessed. */ + + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* .GT. 0: if INFO = i, DLAQR0 failed to compute all of */ +/* the eigenvalues. Elements 1:ilo-1 and i+1:n of WR */ +/* and WI contain those eigenvalues which have been */ +/* successfully computed. (Failures are rare.) */ + +/* If INFO .GT. 0 and WANT is .FALSE., then on exit, */ +/* the remaining unconverged eigenvalues are the eigen- */ +/* values of the upper Hessenberg matrix rows and */ +/* columns ILO through INFO of the final, output */ +/* value of H. */ + +/* If INFO .GT. 0 and WANTT is .TRUE., then on exit */ + +/* (*) (initial value of H)*U = U*(final value of H) */ + +/* where U is an orthogonal matrix. The final */ +/* value of H is upper Hessenberg and quasi-triangular */ +/* in rows and columns INFO+1 through IHI. */ + +/* If INFO .GT. 0 and WANTZ is .TRUE., then on exit */ + +/* (final value of Z(ILO:IHI,ILOZ:IHIZ) */ +/* = (initial value of Z(ILO:IHI,ILOZ:IHIZ)*U */ + +/* where U is the orthogonal matrix in (*) (regard- */ +/* less of the value of WANTT.) */ + +/* If INFO .GT. 0 and WANTZ is .FALSE., then Z is not */ +/* accessed. */ + +/* ================================================================ */ +/* Based on contributions by */ +/* Karen Braman and Ralph Byers, Department of Mathematics, */ +/* University of Kansas, USA */ + +/* ================================================================ */ +/* References: */ +/* K. Braman, R. Byers and R. Mathias, The Multi-Shift QR */ +/* Algorithm Part I: Maintaining Well Focused Shifts, and Level 3 */ +/* Performance, SIAM Journal of Matrix Analysis, volume 23, pages */ +/* 929--947, 2002. */ + +/* K. Braman, R. Byers and R. Mathias, The Multi-Shift QR */ +/* Algorithm Part II: Aggressive Early Deflation, SIAM Journal */ +/* of Matrix Analysis, volume 23, pages 948--973, 2002. */ + +/* ================================================================ */ +/* .. Parameters .. */ + +/* ==== Matrices of order NTINY or smaller must be processed by */ +/* . DLAHQR because of insufficient subdiagonal scratch space. */ +/* . (This is a hard limit.) ==== */ + +/* ==== Exceptional deflation windows: try to cure rare */ +/* . slow convergence by varying the size of the */ +/* . deflation window after KEXNW iterations. ==== */ + +/* ==== Exceptional shifts: try to cure rare slow convergence */ +/* . with ad-hoc exceptional shifts every KEXSH iterations. */ +/* . ==== */ + +/* ==== The constants WILK1 and WILK2 are used to form the */ +/* . exceptional shifts. ==== */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + /* Parameter adjustments */ + h_dim1 = *ldh; + h_offset = 1 + h_dim1; + h__ -= h_offset; + --wr; + --wi; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --work; + + /* Function Body */ + *info = 0; + +/* ==== Quick return for N = 0: nothing to do. ==== */ + + if (*n == 0) { + work[1] = 1.; + return 0; + } + + if (*n <= 11) { + +/* ==== Tiny matrices must use DLAHQR. ==== */ + + lwkopt = 1; + if (*lwork != -1) { + _starpu_dlahqr_(wantt, wantz, n, ilo, ihi, &h__[h_offset], ldh, &wr[1], & + wi[1], iloz, ihiz, &z__[z_offset], ldz, info); + } + } else { + +/* ==== Use small bulge multi-shift QR with aggressive early */ +/* . deflation on larger-than-tiny matrices. ==== */ + +/* ==== Hope for the best. ==== */ + + *info = 0; + +/* ==== Set up job flags for ILAENV. ==== */ + + if (*wantt) { + *(unsigned char *)jbcmpz = 'S'; + } else { + *(unsigned char *)jbcmpz = 'E'; + } + if (*wantz) { + *(unsigned char *)&jbcmpz[1] = 'V'; + } else { + *(unsigned char *)&jbcmpz[1] = 'N'; + } + +/* ==== NWR = recommended deflation window size. At this */ +/* . point, N .GT. NTINY = 11, so there is enough */ +/* . subdiagonal workspace for NWR.GE.2 as required. */ +/* . (In fact, there is enough subdiagonal space for */ +/* . NWR.GE.3.) ==== */ + + nwr = _starpu_ilaenv_(&c__13, "DLAQR0", jbcmpz, n, ilo, ihi, lwork); + nwr = max(2,nwr); +/* Computing MIN */ + i__1 = *ihi - *ilo + 1, i__2 = (*n - 1) / 3, i__1 = min(i__1,i__2); + nwr = min(i__1,nwr); + +/* ==== NSR = recommended number of simultaneous shifts. */ +/* . At this point N .GT. NTINY = 11, so there is at */ +/* . enough subdiagonal workspace for NSR to be even */ +/* . and greater than or equal to two as required. ==== */ + + nsr = _starpu_ilaenv_(&c__15, "DLAQR0", jbcmpz, n, ilo, ihi, lwork); +/* Computing MIN */ + i__1 = nsr, i__2 = (*n + 6) / 9, i__1 = min(i__1,i__2), i__2 = *ihi - + *ilo; + nsr = min(i__1,i__2); +/* Computing MAX */ + i__1 = 2, i__2 = nsr - nsr % 2; + nsr = max(i__1,i__2); + +/* ==== Estimate optimal workspace ==== */ + +/* ==== Workspace query call to DLAQR3 ==== */ + + i__1 = nwr + 1; + _starpu_dlaqr3_(wantt, wantz, n, ilo, ihi, &i__1, &h__[h_offset], ldh, iloz, + ihiz, &z__[z_offset], ldz, &ls, &ld, &wr[1], &wi[1], &h__[ + h_offset], ldh, n, &h__[h_offset], ldh, n, &h__[h_offset], + ldh, &work[1], &c_n1); + +/* ==== Optimal workspace = MAX(DLAQR5, DLAQR3) ==== */ + +/* Computing MAX */ + i__1 = nsr * 3 / 2, i__2 = (integer) work[1]; + lwkopt = max(i__1,i__2); + +/* ==== Quick return in case of workspace query. ==== */ + + if (*lwork == -1) { + work[1] = (doublereal) lwkopt; + return 0; + } + +/* ==== DLAHQR/DLAQR0 crossover point ==== */ + + nmin = _starpu_ilaenv_(&c__12, "DLAQR0", jbcmpz, n, ilo, ihi, lwork); + nmin = max(11,nmin); + +/* ==== Nibble crossover point ==== */ + + nibble = _starpu_ilaenv_(&c__14, "DLAQR0", jbcmpz, n, ilo, ihi, lwork); + nibble = max(0,nibble); + +/* ==== Accumulate reflections during ttswp? Use block */ +/* . 2-by-2 structure during matrix-matrix multiply? ==== */ + + kacc22 = _starpu_ilaenv_(&c__16, "DLAQR0", jbcmpz, n, ilo, ihi, lwork); + kacc22 = max(0,kacc22); + kacc22 = min(2,kacc22); + +/* ==== NWMAX = the largest possible deflation window for */ +/* . which there is sufficient workspace. ==== */ + +/* Computing MIN */ + i__1 = (*n - 1) / 3, i__2 = *lwork / 2; + nwmax = min(i__1,i__2); + nw = nwmax; + +/* ==== NSMAX = the Largest number of simultaneous shifts */ +/* . for which there is sufficient workspace. ==== */ + +/* Computing MIN */ + i__1 = (*n + 6) / 9, i__2 = (*lwork << 1) / 3; + nsmax = min(i__1,i__2); + nsmax -= nsmax % 2; + +/* ==== NDFL: an iteration count restarted at deflation. ==== */ + + ndfl = 1; + +/* ==== ITMAX = iteration limit ==== */ + +/* Computing MAX */ + i__1 = 10, i__2 = *ihi - *ilo + 1; + itmax = max(i__1,i__2) * 30; + +/* ==== Last row and column in the active block ==== */ + + kbot = *ihi; + +/* ==== Main Loop ==== */ + + i__1 = itmax; + for (it = 1; it <= i__1; ++it) { + +/* ==== Done when KBOT falls below ILO ==== */ + + if (kbot < *ilo) { + goto L90; + } + +/* ==== Locate active block ==== */ + + i__2 = *ilo + 1; + for (k = kbot; k >= i__2; --k) { + if (h__[k + (k - 1) * h_dim1] == 0.) { + goto L20; + } +/* L10: */ + } + k = *ilo; +L20: + ktop = k; + +/* ==== Select deflation window size: */ +/* . Typical Case: */ +/* . If possible and advisable, nibble the entire */ +/* . active block. If not, use size MIN(NWR,NWMAX) */ +/* . or MIN(NWR+1,NWMAX) depending upon which has */ +/* . the smaller corresponding subdiagonal entry */ +/* . (a heuristic). */ +/* . */ +/* . Exceptional Case: */ +/* . If there have been no deflations in KEXNW or */ +/* . more iterations, then vary the deflation window */ +/* . size. At first, because, larger windows are, */ +/* . in general, more powerful than smaller ones, */ +/* . rapidly increase the window to the maximum possible. */ +/* . Then, gradually reduce the window size. ==== */ + + nh = kbot - ktop + 1; + nwupbd = min(nh,nwmax); + if (ndfl < 5) { + nw = min(nwupbd,nwr); + } else { +/* Computing MIN */ + i__2 = nwupbd, i__3 = nw << 1; + nw = min(i__2,i__3); + } + if (nw < nwmax) { + if (nw >= nh - 1) { + nw = nh; + } else { + kwtop = kbot - nw + 1; + if ((d__1 = h__[kwtop + (kwtop - 1) * h_dim1], abs(d__1)) + > (d__2 = h__[kwtop - 1 + (kwtop - 2) * h_dim1], + abs(d__2))) { + ++nw; + } + } + } + if (ndfl < 5) { + ndec = -1; + } else if (ndec >= 0 || nw >= nwupbd) { + ++ndec; + if (nw - ndec < 2) { + ndec = 0; + } + nw -= ndec; + } + +/* ==== Aggressive early deflation: */ +/* . split workspace under the subdiagonal into */ +/* . - an nw-by-nw work array V in the lower */ +/* . left-hand-corner, */ +/* . - an NW-by-at-least-NW-but-more-is-better */ +/* . (NW-by-NHO) horizontal work array along */ +/* . the bottom edge, */ +/* . - an at-least-NW-but-more-is-better (NHV-by-NW) */ +/* . vertical work array along the left-hand-edge. */ +/* . ==== */ + + kv = *n - nw + 1; + kt = nw + 1; + nho = *n - nw - 1 - kt + 1; + kwv = nw + 2; + nve = *n - nw - kwv + 1; + +/* ==== Aggressive early deflation ==== */ + + _starpu_dlaqr3_(wantt, wantz, n, &ktop, &kbot, &nw, &h__[h_offset], ldh, + iloz, ihiz, &z__[z_offset], ldz, &ls, &ld, &wr[1], &wi[1], + &h__[kv + h_dim1], ldh, &nho, &h__[kv + kt * h_dim1], + ldh, &nve, &h__[kwv + h_dim1], ldh, &work[1], lwork); + +/* ==== Adjust KBOT accounting for new deflations. ==== */ + + kbot -= ld; + +/* ==== KS points to the shifts. ==== */ + + ks = kbot - ls + 1; + +/* ==== Skip an expensive QR sweep if there is a (partly */ +/* . heuristic) reason to expect that many eigenvalues */ +/* . will deflate without it. Here, the QR sweep is */ +/* . skipped if many eigenvalues have just been deflated */ +/* . or if the remaining active block is small. */ + + if (ld == 0 || ld * 100 <= nw * nibble && kbot - ktop + 1 > min( + nmin,nwmax)) { + +/* ==== NS = nominal number of simultaneous shifts. */ +/* . This may be lowered (slightly) if DLAQR3 */ +/* . did not provide that many shifts. ==== */ + +/* Computing MIN */ +/* Computing MAX */ + i__4 = 2, i__5 = kbot - ktop; + i__2 = min(nsmax,nsr), i__3 = max(i__4,i__5); + ns = min(i__2,i__3); + ns -= ns % 2; + +/* ==== If there have been no deflations */ +/* . in a multiple of KEXSH iterations, */ +/* . then try exceptional shifts. */ +/* . Otherwise use shifts provided by */ +/* . DLAQR3 above or from the eigenvalues */ +/* . of a trailing principal submatrix. ==== */ + + if (ndfl % 6 == 0) { + ks = kbot - ns + 1; +/* Computing MAX */ + i__3 = ks + 1, i__4 = ktop + 2; + i__2 = max(i__3,i__4); + for (i__ = kbot; i__ >= i__2; i__ += -2) { + ss = (d__1 = h__[i__ + (i__ - 1) * h_dim1], abs(d__1)) + + (d__2 = h__[i__ - 1 + (i__ - 2) * h_dim1], + abs(d__2)); + aa = ss * .75 + h__[i__ + i__ * h_dim1]; + bb = ss; + cc = ss * -.4375; + dd = aa; + _starpu_dlanv2_(&aa, &bb, &cc, &dd, &wr[i__ - 1], &wi[i__ - 1] +, &wr[i__], &wi[i__], &cs, &sn); +/* L30: */ + } + if (ks == ktop) { + wr[ks + 1] = h__[ks + 1 + (ks + 1) * h_dim1]; + wi[ks + 1] = 0.; + wr[ks] = wr[ks + 1]; + wi[ks] = wi[ks + 1]; + } + } else { + +/* ==== Got NS/2 or fewer shifts? Use DLAQR4 or */ +/* . DLAHQR on a trailing principal submatrix to */ +/* . get more. (Since NS.LE.NSMAX.LE.(N+6)/9, */ +/* . there is enough space below the subdiagonal */ +/* . to fit an NS-by-NS scratch array.) ==== */ + + if (kbot - ks + 1 <= ns / 2) { + ks = kbot - ns + 1; + kt = *n - ns + 1; + _starpu_dlacpy_("A", &ns, &ns, &h__[ks + ks * h_dim1], ldh, & + h__[kt + h_dim1], ldh); + if (ns > nmin) { + _starpu_dlaqr4_(&c_false, &c_false, &ns, &c__1, &ns, &h__[ + kt + h_dim1], ldh, &wr[ks], &wi[ks], & + c__1, &c__1, zdum, &c__1, &work[1], lwork, + &inf); + } else { + _starpu_dlahqr_(&c_false, &c_false, &ns, &c__1, &ns, &h__[ + kt + h_dim1], ldh, &wr[ks], &wi[ks], & + c__1, &c__1, zdum, &c__1, &inf); + } + ks += inf; + +/* ==== In case of a rare QR failure use */ +/* . eigenvalues of the trailing 2-by-2 */ +/* . principal submatrix. ==== */ + + if (ks >= kbot) { + aa = h__[kbot - 1 + (kbot - 1) * h_dim1]; + cc = h__[kbot + (kbot - 1) * h_dim1]; + bb = h__[kbot - 1 + kbot * h_dim1]; + dd = h__[kbot + kbot * h_dim1]; + _starpu_dlanv2_(&aa, &bb, &cc, &dd, &wr[kbot - 1], &wi[ + kbot - 1], &wr[kbot], &wi[kbot], &cs, &sn) + ; + ks = kbot - 1; + } + } + + if (kbot - ks + 1 > ns) { + +/* ==== Sort the shifts (Helps a little) */ +/* . Bubble sort keeps complex conjugate */ +/* . pairs together. ==== */ + + sorted = FALSE_; + i__2 = ks + 1; + for (k = kbot; k >= i__2; --k) { + if (sorted) { + goto L60; + } + sorted = TRUE_; + i__3 = k - 1; + for (i__ = ks; i__ <= i__3; ++i__) { + if ((d__1 = wr[i__], abs(d__1)) + (d__2 = wi[ + i__], abs(d__2)) < (d__3 = wr[i__ + 1] + , abs(d__3)) + (d__4 = wi[i__ + 1], + abs(d__4))) { + sorted = FALSE_; + + swap = wr[i__]; + wr[i__] = wr[i__ + 1]; + wr[i__ + 1] = swap; + + swap = wi[i__]; + wi[i__] = wi[i__ + 1]; + wi[i__ + 1] = swap; + } +/* L40: */ + } +/* L50: */ + } +L60: + ; + } + +/* ==== Shuffle shifts into pairs of real shifts */ +/* . and pairs of complex conjugate shifts */ +/* . assuming complex conjugate shifts are */ +/* . already adjacent to one another. (Yes, */ +/* . they are.) ==== */ + + i__2 = ks + 2; + for (i__ = kbot; i__ >= i__2; i__ += -2) { + if (wi[i__] != -wi[i__ - 1]) { + + swap = wr[i__]; + wr[i__] = wr[i__ - 1]; + wr[i__ - 1] = wr[i__ - 2]; + wr[i__ - 2] = swap; + + swap = wi[i__]; + wi[i__] = wi[i__ - 1]; + wi[i__ - 1] = wi[i__ - 2]; + wi[i__ - 2] = swap; + } +/* L70: */ + } + } + +/* ==== If there are only two shifts and both are */ +/* . real, then use only one. ==== */ + + if (kbot - ks + 1 == 2) { + if (wi[kbot] == 0.) { + if ((d__1 = wr[kbot] - h__[kbot + kbot * h_dim1], abs( + d__1)) < (d__2 = wr[kbot - 1] - h__[kbot + + kbot * h_dim1], abs(d__2))) { + wr[kbot - 1] = wr[kbot]; + } else { + wr[kbot] = wr[kbot - 1]; + } + } + } + +/* ==== Use up to NS of the the smallest magnatiude */ +/* . shifts. If there aren't NS shifts available, */ +/* . then use them all, possibly dropping one to */ +/* . make the number of shifts even. ==== */ + +/* Computing MIN */ + i__2 = ns, i__3 = kbot - ks + 1; + ns = min(i__2,i__3); + ns -= ns % 2; + ks = kbot - ns + 1; + +/* ==== Small-bulge multi-shift QR sweep: */ +/* . split workspace under the subdiagonal into */ +/* . - a KDU-by-KDU work array U in the lower */ +/* . left-hand-corner, */ +/* . - a KDU-by-at-least-KDU-but-more-is-better */ +/* . (KDU-by-NHo) horizontal work array WH along */ +/* . the bottom edge, */ +/* . - and an at-least-KDU-but-more-is-better-by-KDU */ +/* . (NVE-by-KDU) vertical work WV arrow along */ +/* . the left-hand-edge. ==== */ + + kdu = ns * 3 - 3; + ku = *n - kdu + 1; + kwh = kdu + 1; + nho = *n - kdu - 3 - (kdu + 1) + 1; + kwv = kdu + 4; + nve = *n - kdu - kwv + 1; + +/* ==== Small-bulge multi-shift QR sweep ==== */ + + _starpu_dlaqr5_(wantt, wantz, &kacc22, n, &ktop, &kbot, &ns, &wr[ks], + &wi[ks], &h__[h_offset], ldh, iloz, ihiz, &z__[ + z_offset], ldz, &work[1], &c__3, &h__[ku + h_dim1], + ldh, &nve, &h__[kwv + h_dim1], ldh, &nho, &h__[ku + + kwh * h_dim1], ldh); + } + +/* ==== Note progress (or the lack of it). ==== */ + + if (ld > 0) { + ndfl = 1; + } else { + ++ndfl; + } + +/* ==== End of main loop ==== */ +/* L80: */ + } + +/* ==== Iteration limit exceeded. Set INFO to show where */ +/* . the problem occurred and exit. ==== */ + + *info = kbot; +L90: + ; + } + +/* ==== Return the optimal value of LWORK. ==== */ + + work[1] = (doublereal) lwkopt; + +/* ==== End of DLAQR0 ==== */ + + return 0; +} /* _starpu_dlaqr0_ */ diff --git a/min-dgels/base/SRC/dlaqr1.c b/min-dgels/base/SRC/dlaqr1.c new file mode 100644 index 0000000..987dbf8 --- /dev/null +++ b/min-dgels/base/SRC/dlaqr1.c @@ -0,0 +1,127 @@ +/* dlaqr1.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dlaqr1_(integer *n, doublereal *h__, integer *ldh, + doublereal *sr1, doublereal *si1, doublereal *sr2, doublereal *si2, + doublereal *v) +{ + /* System generated locals */ + integer h_dim1, h_offset; + doublereal d__1, d__2, d__3; + + /* Local variables */ + doublereal s, h21s, h31s; + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Given a 2-by-2 or 3-by-3 matrix H, DLAQR1 sets v to a */ +/* scalar multiple of the first column of the product */ + +/* (*) K = (H - (sr1 + i*si1)*I)*(H - (sr2 + i*si2)*I) */ + +/* scaling to avoid overflows and most underflows. It */ +/* is assumed that either */ + +/* 1) sr1 = sr2 and si1 = -si2 */ +/* or */ +/* 2) si1 = si2 = 0. */ + +/* This is useful for starting double implicit shift bulges */ +/* in the QR algorithm. */ + + +/* N (input) integer */ +/* Order of the matrix H. N must be either 2 or 3. */ + +/* H (input) DOUBLE PRECISION array of dimension (LDH,N) */ +/* The 2-by-2 or 3-by-3 matrix H in (*). */ + +/* LDH (input) integer */ +/* The leading dimension of H as declared in */ +/* the calling procedure. LDH.GE.N */ + +/* SR1 (input) DOUBLE PRECISION */ +/* SI1 The shifts in (*). */ +/* SR2 */ +/* SI2 */ + +/* V (output) DOUBLE PRECISION array of dimension N */ +/* A scalar multiple of the first column of the */ +/* matrix K in (*). */ + +/* ================================================================ */ +/* Based on contributions by */ +/* Karen Braman and Ralph Byers, Department of Mathematics, */ +/* University of Kansas, USA */ + +/* ================================================================ */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + /* Parameter adjustments */ + h_dim1 = *ldh; + h_offset = 1 + h_dim1; + h__ -= h_offset; + --v; + + /* Function Body */ + if (*n == 2) { + s = (d__1 = h__[h_dim1 + 1] - *sr2, abs(d__1)) + abs(*si2) + (d__2 = + h__[h_dim1 + 2], abs(d__2)); + if (s == 0.) { + v[1] = 0.; + v[2] = 0.; + } else { + h21s = h__[h_dim1 + 2] / s; + v[1] = h21s * h__[(h_dim1 << 1) + 1] + (h__[h_dim1 + 1] - *sr1) * + ((h__[h_dim1 + 1] - *sr2) / s) - *si1 * (*si2 / s); + v[2] = h21s * (h__[h_dim1 + 1] + h__[(h_dim1 << 1) + 2] - *sr1 - * + sr2); + } + } else { + s = (d__1 = h__[h_dim1 + 1] - *sr2, abs(d__1)) + abs(*si2) + (d__2 = + h__[h_dim1 + 2], abs(d__2)) + (d__3 = h__[h_dim1 + 3], abs( + d__3)); + if (s == 0.) { + v[1] = 0.; + v[2] = 0.; + v[3] = 0.; + } else { + h21s = h__[h_dim1 + 2] / s; + h31s = h__[h_dim1 + 3] / s; + v[1] = (h__[h_dim1 + 1] - *sr1) * ((h__[h_dim1 + 1] - *sr2) / s) + - *si1 * (*si2 / s) + h__[(h_dim1 << 1) + 1] * h21s + h__[ + h_dim1 * 3 + 1] * h31s; + v[2] = h21s * (h__[h_dim1 + 1] + h__[(h_dim1 << 1) + 2] - *sr1 - * + sr2) + h__[h_dim1 * 3 + 2] * h31s; + v[3] = h31s * (h__[h_dim1 + 1] + h__[h_dim1 * 3 + 3] - *sr1 - * + sr2) + h21s * h__[(h_dim1 << 1) + 3]; + } + } + return 0; +} /* _starpu_dlaqr1_ */ diff --git a/min-dgels/base/SRC/dlaqr2.c b/min-dgels/base/SRC/dlaqr2.c new file mode 100644 index 0000000..bb14ee5 --- /dev/null +++ b/min-dgels/base/SRC/dlaqr2.c @@ -0,0 +1,698 @@ +/* dlaqr2.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; +static doublereal c_b12 = 0.; +static doublereal c_b13 = 1.; +static logical c_true = TRUE_; + +/* Subroutine */ int _starpu_dlaqr2_(logical *wantt, logical *wantz, integer *n, + integer *ktop, integer *kbot, integer *nw, doublereal *h__, integer * + ldh, integer *iloz, integer *ihiz, doublereal *z__, integer *ldz, + integer *ns, integer *nd, doublereal *sr, doublereal *si, doublereal * + v, integer *ldv, integer *nh, doublereal *t, integer *ldt, integer * + nv, doublereal *wv, integer *ldwv, doublereal *work, integer *lwork) +{ + /* System generated locals */ + integer h_dim1, h_offset, t_dim1, t_offset, v_dim1, v_offset, wv_dim1, + wv_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4; + doublereal d__1, d__2, d__3, d__4, d__5, d__6; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + integer i__, j, k; + doublereal s, aa, bb, cc, dd, cs, sn; + integer jw; + doublereal evi, evk, foo; + integer kln; + doublereal tau, ulp; + integer lwk1, lwk2; + doublereal beta; + integer kend, kcol, info, ifst, ilst, ltop, krow; + extern /* Subroutine */ int _starpu_dlarf_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + doublereal *), _starpu_dgemm_(char *, char *, integer *, integer * +, integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *); + logical bulge; + extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *); + integer infqr, kwtop; + extern /* Subroutine */ int _starpu_dlanv2_(doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *), _starpu_dlabad_( + doublereal *, doublereal *); + extern doublereal _starpu_dlamch_(char *); + extern /* Subroutine */ int _starpu_dgehrd_(integer *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + integer *), _starpu_dlarfg_(integer *, doublereal *, doublereal *, + integer *, doublereal *), _starpu_dlahqr_(logical *, logical *, integer *, + integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, integer *, doublereal *, integer *, + integer *), _starpu_dlacpy_(char *, integer *, integer *, doublereal *, + integer *, doublereal *, integer *); + doublereal safmin; + extern /* Subroutine */ int _starpu_dlaset_(char *, integer *, integer *, + doublereal *, doublereal *, doublereal *, integer *); + doublereal safmax; + extern /* Subroutine */ int _starpu_dtrexc_(char *, integer *, doublereal *, + integer *, doublereal *, integer *, integer *, integer *, + doublereal *, integer *), _starpu_dormhr_(char *, char *, integer + *, integer *, integer *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + integer *); + logical sorted; + doublereal smlnum; + integer lwkopt; + + +/* -- LAPACK auxiliary routine (version 3.2.1) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd.. */ +/* -- April 2009 -- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* This subroutine is identical to DLAQR3 except that it avoids */ +/* recursion by calling DLAHQR instead of DLAQR4. */ + + +/* ****************************************************************** */ +/* Aggressive early deflation: */ + +/* This subroutine accepts as input an upper Hessenberg matrix */ +/* H and performs an orthogonal similarity transformation */ +/* designed to detect and deflate fully converged eigenvalues from */ +/* a trailing principal submatrix. On output H has been over- */ +/* written by a new Hessenberg matrix that is a perturbation of */ +/* an orthogonal similarity transformation of H. It is to be */ +/* hoped that the final version of H has many zero subdiagonal */ +/* entries. */ + +/* ****************************************************************** */ +/* WANTT (input) LOGICAL */ +/* If .TRUE., then the Hessenberg matrix H is fully updated */ +/* so that the quasi-triangular Schur factor may be */ +/* computed (in cooperation with the calling subroutine). */ +/* If .FALSE., then only enough of H is updated to preserve */ +/* the eigenvalues. */ + +/* WANTZ (input) LOGICAL */ +/* If .TRUE., then the orthogonal matrix Z is updated so */ +/* so that the orthogonal Schur factor may be computed */ +/* (in cooperation with the calling subroutine). */ +/* If .FALSE., then Z is not referenced. */ + +/* N (input) INTEGER */ +/* The order of the matrix H and (if WANTZ is .TRUE.) the */ +/* order of the orthogonal matrix Z. */ + +/* KTOP (input) INTEGER */ +/* It is assumed that either KTOP = 1 or H(KTOP,KTOP-1)=0. */ +/* KBOT and KTOP together determine an isolated block */ +/* along the diagonal of the Hessenberg matrix. */ + +/* KBOT (input) INTEGER */ +/* It is assumed without a check that either */ +/* KBOT = N or H(KBOT+1,KBOT)=0. KBOT and KTOP together */ +/* determine an isolated block along the diagonal of the */ +/* Hessenberg matrix. */ + +/* NW (input) INTEGER */ +/* Deflation window size. 1 .LE. NW .LE. (KBOT-KTOP+1). */ + +/* H (input/output) DOUBLE PRECISION array, dimension (LDH,N) */ +/* On input the initial N-by-N section of H stores the */ +/* Hessenberg matrix undergoing aggressive early deflation. */ +/* On output H has been transformed by an orthogonal */ +/* similarity transformation, perturbed, and the returned */ +/* to Hessenberg form that (it is to be hoped) has some */ +/* zero subdiagonal entries. */ + +/* LDH (input) integer */ +/* Leading dimension of H just as declared in the calling */ +/* subroutine. N .LE. LDH */ + +/* ILOZ (input) INTEGER */ +/* IHIZ (input) INTEGER */ +/* Specify the rows of Z to which transformations must be */ +/* applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N. */ + +/* Z (input/output) DOUBLE PRECISION array, dimension (LDZ,N) */ +/* IF WANTZ is .TRUE., then on output, the orthogonal */ +/* similarity transformation mentioned above has been */ +/* accumulated into Z(ILOZ:IHIZ,ILO:IHI) from the right. */ +/* If WANTZ is .FALSE., then Z is unreferenced. */ + +/* LDZ (input) integer */ +/* The leading dimension of Z just as declared in the */ +/* calling subroutine. 1 .LE. LDZ. */ + +/* NS (output) integer */ +/* The number of unconverged (ie approximate) eigenvalues */ +/* returned in SR and SI that may be used as shifts by the */ +/* calling subroutine. */ + +/* ND (output) integer */ +/* The number of converged eigenvalues uncovered by this */ +/* subroutine. */ + +/* SR (output) DOUBLE PRECISION array, dimension KBOT */ +/* SI (output) DOUBLE PRECISION array, dimension KBOT */ +/* On output, the real and imaginary parts of approximate */ +/* eigenvalues that may be used for shifts are stored in */ +/* SR(KBOT-ND-NS+1) through SR(KBOT-ND) and */ +/* SI(KBOT-ND-NS+1) through SI(KBOT-ND), respectively. */ +/* The real and imaginary parts of converged eigenvalues */ +/* are stored in SR(KBOT-ND+1) through SR(KBOT) and */ +/* SI(KBOT-ND+1) through SI(KBOT), respectively. */ + +/* V (workspace) DOUBLE PRECISION array, dimension (LDV,NW) */ +/* An NW-by-NW work array. */ + +/* LDV (input) integer scalar */ +/* The leading dimension of V just as declared in the */ +/* calling subroutine. NW .LE. LDV */ + +/* NH (input) integer scalar */ +/* The number of columns of T. NH.GE.NW. */ + +/* T (workspace) DOUBLE PRECISION array, dimension (LDT,NW) */ + +/* LDT (input) integer */ +/* The leading dimension of T just as declared in the */ +/* calling subroutine. NW .LE. LDT */ + +/* NV (input) integer */ +/* The number of rows of work array WV available for */ +/* workspace. NV.GE.NW. */ + +/* WV (workspace) DOUBLE PRECISION array, dimension (LDWV,NW) */ + +/* LDWV (input) integer */ +/* The leading dimension of W just as declared in the */ +/* calling subroutine. NW .LE. LDV */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension LWORK. */ +/* On exit, WORK(1) is set to an estimate of the optimal value */ +/* of LWORK for the given values of N, NW, KTOP and KBOT. */ + +/* LWORK (input) integer */ +/* The dimension of the work array WORK. LWORK = 2*NW */ +/* suffices, but greater efficiency may result from larger */ +/* values of LWORK. */ + +/* If LWORK = -1, then a workspace query is assumed; DLAQR2 */ +/* only estimates the optimal workspace size for the given */ +/* values of N, NW, KTOP and KBOT. The estimate is returned */ +/* in WORK(1). No error message related to LWORK is issued */ +/* by XERBLA. Neither H nor Z are accessed. */ + +/* ================================================================ */ +/* Based on contributions by */ +/* Karen Braman and Ralph Byers, Department of Mathematics, */ +/* University of Kansas, USA */ + +/* ================================================================ */ +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* ==== Estimate optimal workspace. ==== */ + + /* Parameter adjustments */ + h_dim1 = *ldh; + h_offset = 1 + h_dim1; + h__ -= h_offset; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --sr; + --si; + v_dim1 = *ldv; + v_offset = 1 + v_dim1; + v -= v_offset; + t_dim1 = *ldt; + t_offset = 1 + t_dim1; + t -= t_offset; + wv_dim1 = *ldwv; + wv_offset = 1 + wv_dim1; + wv -= wv_offset; + --work; + + /* Function Body */ +/* Computing MIN */ + i__1 = *nw, i__2 = *kbot - *ktop + 1; + jw = min(i__1,i__2); + if (jw <= 2) { + lwkopt = 1; + } else { + +/* ==== Workspace query call to DGEHRD ==== */ + + i__1 = jw - 1; + _starpu_dgehrd_(&jw, &c__1, &i__1, &t[t_offset], ldt, &work[1], &work[1], & + c_n1, &info); + lwk1 = (integer) work[1]; + +/* ==== Workspace query call to DORMHR ==== */ + + i__1 = jw - 1; + _starpu_dormhr_("R", "N", &jw, &jw, &c__1, &i__1, &t[t_offset], ldt, &work[1], + &v[v_offset], ldv, &work[1], &c_n1, &info); + lwk2 = (integer) work[1]; + +/* ==== Optimal workspace ==== */ + + lwkopt = jw + max(lwk1,lwk2); + } + +/* ==== Quick return in case of workspace query. ==== */ + + if (*lwork == -1) { + work[1] = (doublereal) lwkopt; + return 0; + } + +/* ==== Nothing to do ... */ +/* ... for an empty active block ... ==== */ + *ns = 0; + *nd = 0; + work[1] = 1.; + if (*ktop > *kbot) { + return 0; + } +/* ... nor for an empty deflation window. ==== */ + if (*nw < 1) { + return 0; + } + +/* ==== Machine constants ==== */ + + safmin = _starpu_dlamch_("SAFE MINIMUM"); + safmax = 1. / safmin; + _starpu_dlabad_(&safmin, &safmax); + ulp = _starpu_dlamch_("PRECISION"); + smlnum = safmin * ((doublereal) (*n) / ulp); + +/* ==== Setup deflation window ==== */ + +/* Computing MIN */ + i__1 = *nw, i__2 = *kbot - *ktop + 1; + jw = min(i__1,i__2); + kwtop = *kbot - jw + 1; + if (kwtop == *ktop) { + s = 0.; + } else { + s = h__[kwtop + (kwtop - 1) * h_dim1]; + } + + if (*kbot == kwtop) { + +/* ==== 1-by-1 deflation window: not much to do ==== */ + + sr[kwtop] = h__[kwtop + kwtop * h_dim1]; + si[kwtop] = 0.; + *ns = 1; + *nd = 0; +/* Computing MAX */ + d__2 = smlnum, d__3 = ulp * (d__1 = h__[kwtop + kwtop * h_dim1], abs( + d__1)); + if (abs(s) <= max(d__2,d__3)) { + *ns = 0; + *nd = 1; + if (kwtop > *ktop) { + h__[kwtop + (kwtop - 1) * h_dim1] = 0.; + } + } + work[1] = 1.; + return 0; + } + +/* ==== Convert to spike-triangular form. (In case of a */ +/* . rare QR failure, this routine continues to do */ +/* . aggressive early deflation using that part of */ +/* . the deflation window that converged using INFQR */ +/* . here and there to keep track.) ==== */ + + _starpu_dlacpy_("U", &jw, &jw, &h__[kwtop + kwtop * h_dim1], ldh, &t[t_offset], + ldt); + i__1 = jw - 1; + i__2 = *ldh + 1; + i__3 = *ldt + 1; + _starpu_dcopy_(&i__1, &h__[kwtop + 1 + kwtop * h_dim1], &i__2, &t[t_dim1 + 2], & + i__3); + + _starpu_dlaset_("A", &jw, &jw, &c_b12, &c_b13, &v[v_offset], ldv); + _starpu_dlahqr_(&c_true, &c_true, &jw, &c__1, &jw, &t[t_offset], ldt, &sr[kwtop], + &si[kwtop], &c__1, &jw, &v[v_offset], ldv, &infqr); + +/* ==== DTREXC needs a clean margin near the diagonal ==== */ + + i__1 = jw - 3; + for (j = 1; j <= i__1; ++j) { + t[j + 2 + j * t_dim1] = 0.; + t[j + 3 + j * t_dim1] = 0.; +/* L10: */ + } + if (jw > 2) { + t[jw + (jw - 2) * t_dim1] = 0.; + } + +/* ==== Deflation detection loop ==== */ + + *ns = jw; + ilst = infqr + 1; +L20: + if (ilst <= *ns) { + if (*ns == 1) { + bulge = FALSE_; + } else { + bulge = t[*ns + (*ns - 1) * t_dim1] != 0.; + } + +/* ==== Small spike tip test for deflation ==== */ + + if (! bulge) { + +/* ==== Real eigenvalue ==== */ + + foo = (d__1 = t[*ns + *ns * t_dim1], abs(d__1)); + if (foo == 0.) { + foo = abs(s); + } +/* Computing MAX */ + d__2 = smlnum, d__3 = ulp * foo; + if ((d__1 = s * v[*ns * v_dim1 + 1], abs(d__1)) <= max(d__2,d__3)) + { + +/* ==== Deflatable ==== */ + + --(*ns); + } else { + +/* ==== Undeflatable. Move it up out of the way. */ +/* . (DTREXC can not fail in this case.) ==== */ + + ifst = *ns; + _starpu_dtrexc_("V", &jw, &t[t_offset], ldt, &v[v_offset], ldv, &ifst, + &ilst, &work[1], &info); + ++ilst; + } + } else { + +/* ==== Complex conjugate pair ==== */ + + foo = (d__3 = t[*ns + *ns * t_dim1], abs(d__3)) + sqrt((d__1 = t[* + ns + (*ns - 1) * t_dim1], abs(d__1))) * sqrt((d__2 = t[* + ns - 1 + *ns * t_dim1], abs(d__2))); + if (foo == 0.) { + foo = abs(s); + } +/* Computing MAX */ + d__3 = (d__1 = s * v[*ns * v_dim1 + 1], abs(d__1)), d__4 = (d__2 = + s * v[(*ns - 1) * v_dim1 + 1], abs(d__2)); +/* Computing MAX */ + d__5 = smlnum, d__6 = ulp * foo; + if (max(d__3,d__4) <= max(d__5,d__6)) { + +/* ==== Deflatable ==== */ + + *ns += -2; + } else { + +/* ==== Undeflatable. Move them up out of the way. */ +/* . Fortunately, DTREXC does the right thing with */ +/* . ILST in case of a rare exchange failure. ==== */ + + ifst = *ns; + _starpu_dtrexc_("V", &jw, &t[t_offset], ldt, &v[v_offset], ldv, &ifst, + &ilst, &work[1], &info); + ilst += 2; + } + } + +/* ==== End deflation detection loop ==== */ + + goto L20; + } + +/* ==== Return to Hessenberg form ==== */ + + if (*ns == 0) { + s = 0.; + } + + if (*ns < jw) { + +/* ==== sorting diagonal blocks of T improves accuracy for */ +/* . graded matrices. Bubble sort deals well with */ +/* . exchange failures. ==== */ + + sorted = FALSE_; + i__ = *ns + 1; +L30: + if (sorted) { + goto L50; + } + sorted = TRUE_; + + kend = i__ - 1; + i__ = infqr + 1; + if (i__ == *ns) { + k = i__ + 1; + } else if (t[i__ + 1 + i__ * t_dim1] == 0.) { + k = i__ + 1; + } else { + k = i__ + 2; + } +L40: + if (k <= kend) { + if (k == i__ + 1) { + evi = (d__1 = t[i__ + i__ * t_dim1], abs(d__1)); + } else { + evi = (d__3 = t[i__ + i__ * t_dim1], abs(d__3)) + sqrt((d__1 = + t[i__ + 1 + i__ * t_dim1], abs(d__1))) * sqrt((d__2 = + t[i__ + (i__ + 1) * t_dim1], abs(d__2))); + } + + if (k == kend) { + evk = (d__1 = t[k + k * t_dim1], abs(d__1)); + } else if (t[k + 1 + k * t_dim1] == 0.) { + evk = (d__1 = t[k + k * t_dim1], abs(d__1)); + } else { + evk = (d__3 = t[k + k * t_dim1], abs(d__3)) + sqrt((d__1 = t[ + k + 1 + k * t_dim1], abs(d__1))) * sqrt((d__2 = t[k + + (k + 1) * t_dim1], abs(d__2))); + } + + if (evi >= evk) { + i__ = k; + } else { + sorted = FALSE_; + ifst = i__; + ilst = k; + _starpu_dtrexc_("V", &jw, &t[t_offset], ldt, &v[v_offset], ldv, &ifst, + &ilst, &work[1], &info); + if (info == 0) { + i__ = ilst; + } else { + i__ = k; + } + } + if (i__ == kend) { + k = i__ + 1; + } else if (t[i__ + 1 + i__ * t_dim1] == 0.) { + k = i__ + 1; + } else { + k = i__ + 2; + } + goto L40; + } + goto L30; +L50: + ; + } + +/* ==== Restore shift/eigenvalue array from T ==== */ + + i__ = jw; +L60: + if (i__ >= infqr + 1) { + if (i__ == infqr + 1) { + sr[kwtop + i__ - 1] = t[i__ + i__ * t_dim1]; + si[kwtop + i__ - 1] = 0.; + --i__; + } else if (t[i__ + (i__ - 1) * t_dim1] == 0.) { + sr[kwtop + i__ - 1] = t[i__ + i__ * t_dim1]; + si[kwtop + i__ - 1] = 0.; + --i__; + } else { + aa = t[i__ - 1 + (i__ - 1) * t_dim1]; + cc = t[i__ + (i__ - 1) * t_dim1]; + bb = t[i__ - 1 + i__ * t_dim1]; + dd = t[i__ + i__ * t_dim1]; + _starpu_dlanv2_(&aa, &bb, &cc, &dd, &sr[kwtop + i__ - 2], &si[kwtop + i__ + - 2], &sr[kwtop + i__ - 1], &si[kwtop + i__ - 1], &cs, & + sn); + i__ += -2; + } + goto L60; + } + + if (*ns < jw || s == 0.) { + if (*ns > 1 && s != 0.) { + +/* ==== Reflect spike back into lower triangle ==== */ + + _starpu_dcopy_(ns, &v[v_offset], ldv, &work[1], &c__1); + beta = work[1]; + _starpu_dlarfg_(ns, &beta, &work[2], &c__1, &tau); + work[1] = 1.; + + i__1 = jw - 2; + i__2 = jw - 2; + _starpu_dlaset_("L", &i__1, &i__2, &c_b12, &c_b12, &t[t_dim1 + 3], ldt); + + _starpu_dlarf_("L", ns, &jw, &work[1], &c__1, &tau, &t[t_offset], ldt, & + work[jw + 1]); + _starpu_dlarf_("R", ns, ns, &work[1], &c__1, &tau, &t[t_offset], ldt, & + work[jw + 1]); + _starpu_dlarf_("R", &jw, ns, &work[1], &c__1, &tau, &v[v_offset], ldv, & + work[jw + 1]); + + i__1 = *lwork - jw; + _starpu_dgehrd_(&jw, &c__1, ns, &t[t_offset], ldt, &work[1], &work[jw + 1] +, &i__1, &info); + } + +/* ==== Copy updated reduced window into place ==== */ + + if (kwtop > 1) { + h__[kwtop + (kwtop - 1) * h_dim1] = s * v[v_dim1 + 1]; + } + _starpu_dlacpy_("U", &jw, &jw, &t[t_offset], ldt, &h__[kwtop + kwtop * h_dim1] +, ldh); + i__1 = jw - 1; + i__2 = *ldt + 1; + i__3 = *ldh + 1; + _starpu_dcopy_(&i__1, &t[t_dim1 + 2], &i__2, &h__[kwtop + 1 + kwtop * h_dim1], + &i__3); + +/* ==== Accumulate orthogonal matrix in order update */ +/* . H and Z, if requested. ==== */ + + if (*ns > 1 && s != 0.) { + i__1 = *lwork - jw; + _starpu_dormhr_("R", "N", &jw, ns, &c__1, ns, &t[t_offset], ldt, &work[1], + &v[v_offset], ldv, &work[jw + 1], &i__1, &info); + } + +/* ==== Update vertical slab in H ==== */ + + if (*wantt) { + ltop = 1; + } else { + ltop = *ktop; + } + i__1 = kwtop - 1; + i__2 = *nv; + for (krow = ltop; i__2 < 0 ? krow >= i__1 : krow <= i__1; krow += + i__2) { +/* Computing MIN */ + i__3 = *nv, i__4 = kwtop - krow; + kln = min(i__3,i__4); + _starpu_dgemm_("N", "N", &kln, &jw, &jw, &c_b13, &h__[krow + kwtop * + h_dim1], ldh, &v[v_offset], ldv, &c_b12, &wv[wv_offset], + ldwv); + _starpu_dlacpy_("A", &kln, &jw, &wv[wv_offset], ldwv, &h__[krow + kwtop * + h_dim1], ldh); +/* L70: */ + } + +/* ==== Update horizontal slab in H ==== */ + + if (*wantt) { + i__2 = *n; + i__1 = *nh; + for (kcol = *kbot + 1; i__1 < 0 ? kcol >= i__2 : kcol <= i__2; + kcol += i__1) { +/* Computing MIN */ + i__3 = *nh, i__4 = *n - kcol + 1; + kln = min(i__3,i__4); + _starpu_dgemm_("C", "N", &jw, &kln, &jw, &c_b13, &v[v_offset], ldv, & + h__[kwtop + kcol * h_dim1], ldh, &c_b12, &t[t_offset], + ldt); + _starpu_dlacpy_("A", &jw, &kln, &t[t_offset], ldt, &h__[kwtop + kcol * + h_dim1], ldh); +/* L80: */ + } + } + +/* ==== Update vertical slab in Z ==== */ + + if (*wantz) { + i__1 = *ihiz; + i__2 = *nv; + for (krow = *iloz; i__2 < 0 ? krow >= i__1 : krow <= i__1; krow += + i__2) { +/* Computing MIN */ + i__3 = *nv, i__4 = *ihiz - krow + 1; + kln = min(i__3,i__4); + _starpu_dgemm_("N", "N", &kln, &jw, &jw, &c_b13, &z__[krow + kwtop * + z_dim1], ldz, &v[v_offset], ldv, &c_b12, &wv[ + wv_offset], ldwv); + _starpu_dlacpy_("A", &kln, &jw, &wv[wv_offset], ldwv, &z__[krow + + kwtop * z_dim1], ldz); +/* L90: */ + } + } + } + +/* ==== Return the number of deflations ... ==== */ + + *nd = jw - *ns; + +/* ==== ... and the number of shifts. (Subtracting */ +/* . INFQR from the spike length takes care */ +/* . of the case of a rare QR failure while */ +/* . calculating eigenvalues of the deflation */ +/* . window.) ==== */ + + *ns -= infqr; + +/* ==== Return optimal workspace. ==== */ + + work[1] = (doublereal) lwkopt; + +/* ==== End of DLAQR2 ==== */ + + return 0; +} /* _starpu_dlaqr2_ */ diff --git a/min-dgels/base/SRC/dlaqr3.c b/min-dgels/base/SRC/dlaqr3.c new file mode 100644 index 0000000..60993d3 --- /dev/null +++ b/min-dgels/base/SRC/dlaqr3.c @@ -0,0 +1,715 @@ +/* dlaqr3.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; +static logical c_true = TRUE_; +static doublereal c_b17 = 0.; +static doublereal c_b18 = 1.; +static integer c__12 = 12; + +/* Subroutine */ int _starpu_dlaqr3_(logical *wantt, logical *wantz, integer *n, + integer *ktop, integer *kbot, integer *nw, doublereal *h__, integer * + ldh, integer *iloz, integer *ihiz, doublereal *z__, integer *ldz, + integer *ns, integer *nd, doublereal *sr, doublereal *si, doublereal * + v, integer *ldv, integer *nh, doublereal *t, integer *ldt, integer * + nv, doublereal *wv, integer *ldwv, doublereal *work, integer *lwork) +{ + /* System generated locals */ + integer h_dim1, h_offset, t_dim1, t_offset, v_dim1, v_offset, wv_dim1, + wv_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4; + doublereal d__1, d__2, d__3, d__4, d__5, d__6; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + integer i__, j, k; + doublereal s, aa, bb, cc, dd, cs, sn; + integer jw; + doublereal evi, evk, foo; + integer kln; + doublereal tau, ulp; + integer lwk1, lwk2, lwk3; + doublereal beta; + integer kend, kcol, info, nmin, ifst, ilst, ltop, krow; + extern /* Subroutine */ int _starpu_dlarf_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + doublereal *), _starpu_dgemm_(char *, char *, integer *, integer * +, integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *); + logical bulge; + extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *); + integer infqr, kwtop; + extern /* Subroutine */ int _starpu_dlanv2_(doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *), _starpu_dlaqr4_( + logical *, logical *, integer *, integer *, integer *, doublereal + *, integer *, doublereal *, doublereal *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *, integer *), + _starpu_dlabad_(doublereal *, doublereal *); + extern doublereal _starpu_dlamch_(char *); + extern /* Subroutine */ int _starpu_dgehrd_(integer *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + integer *), _starpu_dlarfg_(integer *, doublereal *, doublereal *, + integer *, doublereal *), _starpu_dlahqr_(logical *, logical *, integer *, + integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, integer *, doublereal *, integer *, + integer *), _starpu_dlacpy_(char *, integer *, integer *, doublereal *, + integer *, doublereal *, integer *); + doublereal safmin; + extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *); + doublereal safmax; + extern /* Subroutine */ int _starpu_dlaset_(char *, integer *, integer *, + doublereal *, doublereal *, doublereal *, integer *), + _starpu_dtrexc_(char *, integer *, doublereal *, integer *, doublereal *, + integer *, integer *, integer *, doublereal *, integer *), + _starpu_dormhr_(char *, char *, integer *, integer *, integer *, integer + *, doublereal *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *, integer *); + logical sorted; + doublereal smlnum; + integer lwkopt; + + +/* -- LAPACK auxiliary routine (version 3.2.1) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd.. */ +/* -- April 2009 -- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ****************************************************************** */ +/* Aggressive early deflation: */ + +/* This subroutine accepts as input an upper Hessenberg matrix */ +/* H and performs an orthogonal similarity transformation */ +/* designed to detect and deflate fully converged eigenvalues from */ +/* a trailing principal submatrix. On output H has been over- */ +/* written by a new Hessenberg matrix that is a perturbation of */ +/* an orthogonal similarity transformation of H. It is to be */ +/* hoped that the final version of H has many zero subdiagonal */ +/* entries. */ + +/* ****************************************************************** */ +/* WANTT (input) LOGICAL */ +/* If .TRUE., then the Hessenberg matrix H is fully updated */ +/* so that the quasi-triangular Schur factor may be */ +/* computed (in cooperation with the calling subroutine). */ +/* If .FALSE., then only enough of H is updated to preserve */ +/* the eigenvalues. */ + +/* WANTZ (input) LOGICAL */ +/* If .TRUE., then the orthogonal matrix Z is updated so */ +/* so that the orthogonal Schur factor may be computed */ +/* (in cooperation with the calling subroutine). */ +/* If .FALSE., then Z is not referenced. */ + +/* N (input) INTEGER */ +/* The order of the matrix H and (if WANTZ is .TRUE.) the */ +/* order of the orthogonal matrix Z. */ + +/* KTOP (input) INTEGER */ +/* It is assumed that either KTOP = 1 or H(KTOP,KTOP-1)=0. */ +/* KBOT and KTOP together determine an isolated block */ +/* along the diagonal of the Hessenberg matrix. */ + +/* KBOT (input) INTEGER */ +/* It is assumed without a check that either */ +/* KBOT = N or H(KBOT+1,KBOT)=0. KBOT and KTOP together */ +/* determine an isolated block along the diagonal of the */ +/* Hessenberg matrix. */ + +/* NW (input) INTEGER */ +/* Deflation window size. 1 .LE. NW .LE. (KBOT-KTOP+1). */ + +/* H (input/output) DOUBLE PRECISION array, dimension (LDH,N) */ +/* On input the initial N-by-N section of H stores the */ +/* Hessenberg matrix undergoing aggressive early deflation. */ +/* On output H has been transformed by an orthogonal */ +/* similarity transformation, perturbed, and the returned */ +/* to Hessenberg form that (it is to be hoped) has some */ +/* zero subdiagonal entries. */ + +/* LDH (input) integer */ +/* Leading dimension of H just as declared in the calling */ +/* subroutine. N .LE. LDH */ + +/* ILOZ (input) INTEGER */ +/* IHIZ (input) INTEGER */ +/* Specify the rows of Z to which transformations must be */ +/* applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N. */ + +/* Z (input/output) DOUBLE PRECISION array, dimension (LDZ,N) */ +/* IF WANTZ is .TRUE., then on output, the orthogonal */ +/* similarity transformation mentioned above has been */ +/* accumulated into Z(ILOZ:IHIZ,ILO:IHI) from the right. */ +/* If WANTZ is .FALSE., then Z is unreferenced. */ + +/* LDZ (input) integer */ +/* The leading dimension of Z just as declared in the */ +/* calling subroutine. 1 .LE. LDZ. */ + +/* NS (output) integer */ +/* The number of unconverged (ie approximate) eigenvalues */ +/* returned in SR and SI that may be used as shifts by the */ +/* calling subroutine. */ + +/* ND (output) integer */ +/* The number of converged eigenvalues uncovered by this */ +/* subroutine. */ + +/* SR (output) DOUBLE PRECISION array, dimension KBOT */ +/* SI (output) DOUBLE PRECISION array, dimension KBOT */ +/* On output, the real and imaginary parts of approximate */ +/* eigenvalues that may be used for shifts are stored in */ +/* SR(KBOT-ND-NS+1) through SR(KBOT-ND) and */ +/* SI(KBOT-ND-NS+1) through SI(KBOT-ND), respectively. */ +/* The real and imaginary parts of converged eigenvalues */ +/* are stored in SR(KBOT-ND+1) through SR(KBOT) and */ +/* SI(KBOT-ND+1) through SI(KBOT), respectively. */ + +/* V (workspace) DOUBLE PRECISION array, dimension (LDV,NW) */ +/* An NW-by-NW work array. */ + +/* LDV (input) integer scalar */ +/* The leading dimension of V just as declared in the */ +/* calling subroutine. NW .LE. LDV */ + +/* NH (input) integer scalar */ +/* The number of columns of T. NH.GE.NW. */ + +/* T (workspace) DOUBLE PRECISION array, dimension (LDT,NW) */ + +/* LDT (input) integer */ +/* The leading dimension of T just as declared in the */ +/* calling subroutine. NW .LE. LDT */ + +/* NV (input) integer */ +/* The number of rows of work array WV available for */ +/* workspace. NV.GE.NW. */ + +/* WV (workspace) DOUBLE PRECISION array, dimension (LDWV,NW) */ + +/* LDWV (input) integer */ +/* The leading dimension of W just as declared in the */ +/* calling subroutine. NW .LE. LDV */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension LWORK. */ +/* On exit, WORK(1) is set to an estimate of the optimal value */ +/* of LWORK for the given values of N, NW, KTOP and KBOT. */ + +/* LWORK (input) integer */ +/* The dimension of the work array WORK. LWORK = 2*NW */ +/* suffices, but greater efficiency may result from larger */ +/* values of LWORK. */ + +/* If LWORK = -1, then a workspace query is assumed; DLAQR3 */ +/* only estimates the optimal workspace size for the given */ +/* values of N, NW, KTOP and KBOT. The estimate is returned */ +/* in WORK(1). No error message related to LWORK is issued */ +/* by XERBLA. Neither H nor Z are accessed. */ + +/* ================================================================ */ +/* Based on contributions by */ +/* Karen Braman and Ralph Byers, Department of Mathematics, */ +/* University of Kansas, USA */ + +/* ================================================================ */ +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* ==== Estimate optimal workspace. ==== */ + + /* Parameter adjustments */ + h_dim1 = *ldh; + h_offset = 1 + h_dim1; + h__ -= h_offset; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --sr; + --si; + v_dim1 = *ldv; + v_offset = 1 + v_dim1; + v -= v_offset; + t_dim1 = *ldt; + t_offset = 1 + t_dim1; + t -= t_offset; + wv_dim1 = *ldwv; + wv_offset = 1 + wv_dim1; + wv -= wv_offset; + --work; + + /* Function Body */ +/* Computing MIN */ + i__1 = *nw, i__2 = *kbot - *ktop + 1; + jw = min(i__1,i__2); + if (jw <= 2) { + lwkopt = 1; + } else { + +/* ==== Workspace query call to DGEHRD ==== */ + + i__1 = jw - 1; + _starpu_dgehrd_(&jw, &c__1, &i__1, &t[t_offset], ldt, &work[1], &work[1], & + c_n1, &info); + lwk1 = (integer) work[1]; + +/* ==== Workspace query call to DORMHR ==== */ + + i__1 = jw - 1; + _starpu_dormhr_("R", "N", &jw, &jw, &c__1, &i__1, &t[t_offset], ldt, &work[1], + &v[v_offset], ldv, &work[1], &c_n1, &info); + lwk2 = (integer) work[1]; + +/* ==== Workspace query call to DLAQR4 ==== */ + + _starpu_dlaqr4_(&c_true, &c_true, &jw, &c__1, &jw, &t[t_offset], ldt, &sr[1], + &si[1], &c__1, &jw, &v[v_offset], ldv, &work[1], &c_n1, & + infqr); + lwk3 = (integer) work[1]; + +/* ==== Optimal workspace ==== */ + +/* Computing MAX */ + i__1 = jw + max(lwk1,lwk2); + lwkopt = max(i__1,lwk3); + } + +/* ==== Quick return in case of workspace query. ==== */ + + if (*lwork == -1) { + work[1] = (doublereal) lwkopt; + return 0; + } + +/* ==== Nothing to do ... */ +/* ... for an empty active block ... ==== */ + *ns = 0; + *nd = 0; + work[1] = 1.; + if (*ktop > *kbot) { + return 0; + } +/* ... nor for an empty deflation window. ==== */ + if (*nw < 1) { + return 0; + } + +/* ==== Machine constants ==== */ + + safmin = _starpu_dlamch_("SAFE MINIMUM"); + safmax = 1. / safmin; + _starpu_dlabad_(&safmin, &safmax); + ulp = _starpu_dlamch_("PRECISION"); + smlnum = safmin * ((doublereal) (*n) / ulp); + +/* ==== Setup deflation window ==== */ + +/* Computing MIN */ + i__1 = *nw, i__2 = *kbot - *ktop + 1; + jw = min(i__1,i__2); + kwtop = *kbot - jw + 1; + if (kwtop == *ktop) { + s = 0.; + } else { + s = h__[kwtop + (kwtop - 1) * h_dim1]; + } + + if (*kbot == kwtop) { + +/* ==== 1-by-1 deflation window: not much to do ==== */ + + sr[kwtop] = h__[kwtop + kwtop * h_dim1]; + si[kwtop] = 0.; + *ns = 1; + *nd = 0; +/* Computing MAX */ + d__2 = smlnum, d__3 = ulp * (d__1 = h__[kwtop + kwtop * h_dim1], abs( + d__1)); + if (abs(s) <= max(d__2,d__3)) { + *ns = 0; + *nd = 1; + if (kwtop > *ktop) { + h__[kwtop + (kwtop - 1) * h_dim1] = 0.; + } + } + work[1] = 1.; + return 0; + } + +/* ==== Convert to spike-triangular form. (In case of a */ +/* . rare QR failure, this routine continues to do */ +/* . aggressive early deflation using that part of */ +/* . the deflation window that converged using INFQR */ +/* . here and there to keep track.) ==== */ + + _starpu_dlacpy_("U", &jw, &jw, &h__[kwtop + kwtop * h_dim1], ldh, &t[t_offset], + ldt); + i__1 = jw - 1; + i__2 = *ldh + 1; + i__3 = *ldt + 1; + _starpu_dcopy_(&i__1, &h__[kwtop + 1 + kwtop * h_dim1], &i__2, &t[t_dim1 + 2], & + i__3); + + _starpu_dlaset_("A", &jw, &jw, &c_b17, &c_b18, &v[v_offset], ldv); + nmin = _starpu_ilaenv_(&c__12, "DLAQR3", "SV", &jw, &c__1, &jw, lwork); + if (jw > nmin) { + _starpu_dlaqr4_(&c_true, &c_true, &jw, &c__1, &jw, &t[t_offset], ldt, &sr[ + kwtop], &si[kwtop], &c__1, &jw, &v[v_offset], ldv, &work[1], + lwork, &infqr); + } else { + _starpu_dlahqr_(&c_true, &c_true, &jw, &c__1, &jw, &t[t_offset], ldt, &sr[ + kwtop], &si[kwtop], &c__1, &jw, &v[v_offset], ldv, &infqr); + } + +/* ==== DTREXC needs a clean margin near the diagonal ==== */ + + i__1 = jw - 3; + for (j = 1; j <= i__1; ++j) { + t[j + 2 + j * t_dim1] = 0.; + t[j + 3 + j * t_dim1] = 0.; +/* L10: */ + } + if (jw > 2) { + t[jw + (jw - 2) * t_dim1] = 0.; + } + +/* ==== Deflation detection loop ==== */ + + *ns = jw; + ilst = infqr + 1; +L20: + if (ilst <= *ns) { + if (*ns == 1) { + bulge = FALSE_; + } else { + bulge = t[*ns + (*ns - 1) * t_dim1] != 0.; + } + +/* ==== Small spike tip test for deflation ==== */ + + if (! bulge) { + +/* ==== Real eigenvalue ==== */ + + foo = (d__1 = t[*ns + *ns * t_dim1], abs(d__1)); + if (foo == 0.) { + foo = abs(s); + } +/* Computing MAX */ + d__2 = smlnum, d__3 = ulp * foo; + if ((d__1 = s * v[*ns * v_dim1 + 1], abs(d__1)) <= max(d__2,d__3)) + { + +/* ==== Deflatable ==== */ + + --(*ns); + } else { + +/* ==== Undeflatable. Move it up out of the way. */ +/* . (DTREXC can not fail in this case.) ==== */ + + ifst = *ns; + _starpu_dtrexc_("V", &jw, &t[t_offset], ldt, &v[v_offset], ldv, &ifst, + &ilst, &work[1], &info); + ++ilst; + } + } else { + +/* ==== Complex conjugate pair ==== */ + + foo = (d__3 = t[*ns + *ns * t_dim1], abs(d__3)) + sqrt((d__1 = t[* + ns + (*ns - 1) * t_dim1], abs(d__1))) * sqrt((d__2 = t[* + ns - 1 + *ns * t_dim1], abs(d__2))); + if (foo == 0.) { + foo = abs(s); + } +/* Computing MAX */ + d__3 = (d__1 = s * v[*ns * v_dim1 + 1], abs(d__1)), d__4 = (d__2 = + s * v[(*ns - 1) * v_dim1 + 1], abs(d__2)); +/* Computing MAX */ + d__5 = smlnum, d__6 = ulp * foo; + if (max(d__3,d__4) <= max(d__5,d__6)) { + +/* ==== Deflatable ==== */ + + *ns += -2; + } else { + +/* ==== Undeflatable. Move them up out of the way. */ +/* . Fortunately, DTREXC does the right thing with */ +/* . ILST in case of a rare exchange failure. ==== */ + + ifst = *ns; + _starpu_dtrexc_("V", &jw, &t[t_offset], ldt, &v[v_offset], ldv, &ifst, + &ilst, &work[1], &info); + ilst += 2; + } + } + +/* ==== End deflation detection loop ==== */ + + goto L20; + } + +/* ==== Return to Hessenberg form ==== */ + + if (*ns == 0) { + s = 0.; + } + + if (*ns < jw) { + +/* ==== sorting diagonal blocks of T improves accuracy for */ +/* . graded matrices. Bubble sort deals well with */ +/* . exchange failures. ==== */ + + sorted = FALSE_; + i__ = *ns + 1; +L30: + if (sorted) { + goto L50; + } + sorted = TRUE_; + + kend = i__ - 1; + i__ = infqr + 1; + if (i__ == *ns) { + k = i__ + 1; + } else if (t[i__ + 1 + i__ * t_dim1] == 0.) { + k = i__ + 1; + } else { + k = i__ + 2; + } +L40: + if (k <= kend) { + if (k == i__ + 1) { + evi = (d__1 = t[i__ + i__ * t_dim1], abs(d__1)); + } else { + evi = (d__3 = t[i__ + i__ * t_dim1], abs(d__3)) + sqrt((d__1 = + t[i__ + 1 + i__ * t_dim1], abs(d__1))) * sqrt((d__2 = + t[i__ + (i__ + 1) * t_dim1], abs(d__2))); + } + + if (k == kend) { + evk = (d__1 = t[k + k * t_dim1], abs(d__1)); + } else if (t[k + 1 + k * t_dim1] == 0.) { + evk = (d__1 = t[k + k * t_dim1], abs(d__1)); + } else { + evk = (d__3 = t[k + k * t_dim1], abs(d__3)) + sqrt((d__1 = t[ + k + 1 + k * t_dim1], abs(d__1))) * sqrt((d__2 = t[k + + (k + 1) * t_dim1], abs(d__2))); + } + + if (evi >= evk) { + i__ = k; + } else { + sorted = FALSE_; + ifst = i__; + ilst = k; + _starpu_dtrexc_("V", &jw, &t[t_offset], ldt, &v[v_offset], ldv, &ifst, + &ilst, &work[1], &info); + if (info == 0) { + i__ = ilst; + } else { + i__ = k; + } + } + if (i__ == kend) { + k = i__ + 1; + } else if (t[i__ + 1 + i__ * t_dim1] == 0.) { + k = i__ + 1; + } else { + k = i__ + 2; + } + goto L40; + } + goto L30; +L50: + ; + } + +/* ==== Restore shift/eigenvalue array from T ==== */ + + i__ = jw; +L60: + if (i__ >= infqr + 1) { + if (i__ == infqr + 1) { + sr[kwtop + i__ - 1] = t[i__ + i__ * t_dim1]; + si[kwtop + i__ - 1] = 0.; + --i__; + } else if (t[i__ + (i__ - 1) * t_dim1] == 0.) { + sr[kwtop + i__ - 1] = t[i__ + i__ * t_dim1]; + si[kwtop + i__ - 1] = 0.; + --i__; + } else { + aa = t[i__ - 1 + (i__ - 1) * t_dim1]; + cc = t[i__ + (i__ - 1) * t_dim1]; + bb = t[i__ - 1 + i__ * t_dim1]; + dd = t[i__ + i__ * t_dim1]; + _starpu_dlanv2_(&aa, &bb, &cc, &dd, &sr[kwtop + i__ - 2], &si[kwtop + i__ + - 2], &sr[kwtop + i__ - 1], &si[kwtop + i__ - 1], &cs, & + sn); + i__ += -2; + } + goto L60; + } + + if (*ns < jw || s == 0.) { + if (*ns > 1 && s != 0.) { + +/* ==== Reflect spike back into lower triangle ==== */ + + _starpu_dcopy_(ns, &v[v_offset], ldv, &work[1], &c__1); + beta = work[1]; + _starpu_dlarfg_(ns, &beta, &work[2], &c__1, &tau); + work[1] = 1.; + + i__1 = jw - 2; + i__2 = jw - 2; + _starpu_dlaset_("L", &i__1, &i__2, &c_b17, &c_b17, &t[t_dim1 + 3], ldt); + + _starpu_dlarf_("L", ns, &jw, &work[1], &c__1, &tau, &t[t_offset], ldt, & + work[jw + 1]); + _starpu_dlarf_("R", ns, ns, &work[1], &c__1, &tau, &t[t_offset], ldt, & + work[jw + 1]); + _starpu_dlarf_("R", &jw, ns, &work[1], &c__1, &tau, &v[v_offset], ldv, & + work[jw + 1]); + + i__1 = *lwork - jw; + _starpu_dgehrd_(&jw, &c__1, ns, &t[t_offset], ldt, &work[1], &work[jw + 1] +, &i__1, &info); + } + +/* ==== Copy updated reduced window into place ==== */ + + if (kwtop > 1) { + h__[kwtop + (kwtop - 1) * h_dim1] = s * v[v_dim1 + 1]; + } + _starpu_dlacpy_("U", &jw, &jw, &t[t_offset], ldt, &h__[kwtop + kwtop * h_dim1] +, ldh); + i__1 = jw - 1; + i__2 = *ldt + 1; + i__3 = *ldh + 1; + _starpu_dcopy_(&i__1, &t[t_dim1 + 2], &i__2, &h__[kwtop + 1 + kwtop * h_dim1], + &i__3); + +/* ==== Accumulate orthogonal matrix in order update */ +/* . H and Z, if requested. ==== */ + + if (*ns > 1 && s != 0.) { + i__1 = *lwork - jw; + _starpu_dormhr_("R", "N", &jw, ns, &c__1, ns, &t[t_offset], ldt, &work[1], + &v[v_offset], ldv, &work[jw + 1], &i__1, &info); + } + +/* ==== Update vertical slab in H ==== */ + + if (*wantt) { + ltop = 1; + } else { + ltop = *ktop; + } + i__1 = kwtop - 1; + i__2 = *nv; + for (krow = ltop; i__2 < 0 ? krow >= i__1 : krow <= i__1; krow += + i__2) { +/* Computing MIN */ + i__3 = *nv, i__4 = kwtop - krow; + kln = min(i__3,i__4); + _starpu_dgemm_("N", "N", &kln, &jw, &jw, &c_b18, &h__[krow + kwtop * + h_dim1], ldh, &v[v_offset], ldv, &c_b17, &wv[wv_offset], + ldwv); + _starpu_dlacpy_("A", &kln, &jw, &wv[wv_offset], ldwv, &h__[krow + kwtop * + h_dim1], ldh); +/* L70: */ + } + +/* ==== Update horizontal slab in H ==== */ + + if (*wantt) { + i__2 = *n; + i__1 = *nh; + for (kcol = *kbot + 1; i__1 < 0 ? kcol >= i__2 : kcol <= i__2; + kcol += i__1) { +/* Computing MIN */ + i__3 = *nh, i__4 = *n - kcol + 1; + kln = min(i__3,i__4); + _starpu_dgemm_("C", "N", &jw, &kln, &jw, &c_b18, &v[v_offset], ldv, & + h__[kwtop + kcol * h_dim1], ldh, &c_b17, &t[t_offset], + ldt); + _starpu_dlacpy_("A", &jw, &kln, &t[t_offset], ldt, &h__[kwtop + kcol * + h_dim1], ldh); +/* L80: */ + } + } + +/* ==== Update vertical slab in Z ==== */ + + if (*wantz) { + i__1 = *ihiz; + i__2 = *nv; + for (krow = *iloz; i__2 < 0 ? krow >= i__1 : krow <= i__1; krow += + i__2) { +/* Computing MIN */ + i__3 = *nv, i__4 = *ihiz - krow + 1; + kln = min(i__3,i__4); + _starpu_dgemm_("N", "N", &kln, &jw, &jw, &c_b18, &z__[krow + kwtop * + z_dim1], ldz, &v[v_offset], ldv, &c_b17, &wv[ + wv_offset], ldwv); + _starpu_dlacpy_("A", &kln, &jw, &wv[wv_offset], ldwv, &z__[krow + + kwtop * z_dim1], ldz); +/* L90: */ + } + } + } + +/* ==== Return the number of deflations ... ==== */ + + *nd = jw - *ns; + +/* ==== ... and the number of shifts. (Subtracting */ +/* . INFQR from the spike length takes care */ +/* . of the case of a rare QR failure while */ +/* . calculating eigenvalues of the deflation */ +/* . window.) ==== */ + + *ns -= infqr; + +/* ==== Return optimal workspace. ==== */ + + work[1] = (doublereal) lwkopt; + +/* ==== End of DLAQR3 ==== */ + + return 0; +} /* _starpu_dlaqr3_ */ diff --git a/min-dgels/base/SRC/dlaqr4.c b/min-dgels/base/SRC/dlaqr4.c new file mode 100644 index 0000000..9dde37a --- /dev/null +++ b/min-dgels/base/SRC/dlaqr4.c @@ -0,0 +1,754 @@ +/* dlaqr4.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__13 = 13; +static integer c__15 = 15; +static integer c_n1 = -1; +static integer c__12 = 12; +static integer c__14 = 14; +static integer c__16 = 16; +static logical c_false = FALSE_; +static integer c__1 = 1; +static integer c__3 = 3; + +/* Subroutine */ int _starpu_dlaqr4_(logical *wantt, logical *wantz, integer *n, + integer *ilo, integer *ihi, doublereal *h__, integer *ldh, doublereal + *wr, doublereal *wi, integer *iloz, integer *ihiz, doublereal *z__, + integer *ldz, doublereal *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4, i__5; + doublereal d__1, d__2, d__3, d__4; + + /* Local variables */ + integer i__, k; + doublereal aa, bb, cc, dd; + integer ld; + doublereal cs; + integer nh, it, ks, kt; + doublereal sn; + integer ku, kv, ls, ns; + doublereal ss; + integer nw, inf, kdu, nho, nve, kwh, nsr, nwr, kwv, ndec, ndfl, kbot, + nmin; + doublereal swap; + integer ktop; + doublereal zdum[1] /* was [1][1] */; + integer kacc22, itmax, nsmax, nwmax, kwtop; + extern /* Subroutine */ int _starpu_dlaqr2_(logical *, logical *, integer *, + integer *, integer *, integer *, doublereal *, integer *, integer + *, integer *, doublereal *, integer *, integer *, integer *, + doublereal *, doublereal *, doublereal *, integer *, integer *, + doublereal *, integer *, integer *, doublereal *, integer *, + doublereal *, integer *), _starpu_dlanv2_(doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *), _starpu_dlaqr5_( + logical *, logical *, integer *, integer *, integer *, integer *, + integer *, doublereal *, doublereal *, doublereal *, integer *, + integer *, integer *, doublereal *, integer *, doublereal *, + integer *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *, doublereal *, integer *); + integer nibble; + extern /* Subroutine */ int _starpu_dlahqr_(logical *, logical *, integer *, + integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, integer *, doublereal *, integer *, + integer *), _starpu_dlacpy_(char *, integer *, integer *, doublereal *, + integer *, doublereal *, integer *); + extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *); + char jbcmpz[1]; + integer nwupbd; + logical sorted; + integer lwkopt; + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* This subroutine implements one level of recursion for DLAQR0. */ +/* It is a complete implementation of the small bulge multi-shift */ +/* QR algorithm. It may be called by DLAQR0 and, for large enough */ +/* deflation window size, it may be called by DLAQR3. This */ +/* subroutine is identical to DLAQR0 except that it calls DLAQR2 */ +/* instead of DLAQR3. */ + +/* Purpose */ +/* ======= */ + +/* DLAQR4 computes the eigenvalues of a Hessenberg matrix H */ +/* and, optionally, the matrices T and Z from the Schur decomposition */ +/* H = Z T Z**T, where T is an upper quasi-triangular matrix (the */ +/* Schur form), and Z is the orthogonal matrix of Schur vectors. */ + +/* Optionally Z may be postmultiplied into an input orthogonal */ +/* matrix Q so that this routine can give the Schur factorization */ +/* of a matrix A which has been reduced to the Hessenberg form H */ +/* by the orthogonal matrix Q: A = Q*H*Q**T = (QZ)*T*(QZ)**T. */ + +/* Arguments */ +/* ========= */ + +/* WANTT (input) LOGICAL */ +/* = .TRUE. : the full Schur form T is required; */ +/* = .FALSE.: only eigenvalues are required. */ + +/* WANTZ (input) LOGICAL */ +/* = .TRUE. : the matrix of Schur vectors Z is required; */ +/* = .FALSE.: Schur vectors are not required. */ + +/* N (input) INTEGER */ +/* The order of the matrix H. N .GE. 0. */ + +/* ILO (input) INTEGER */ +/* IHI (input) INTEGER */ +/* It is assumed that H is already upper triangular in rows */ +/* and columns 1:ILO-1 and IHI+1:N and, if ILO.GT.1, */ +/* H(ILO,ILO-1) is zero. ILO and IHI are normally set by a */ +/* previous call to DGEBAL, and then passed to DGEHRD when the */ +/* matrix output by DGEBAL is reduced to Hessenberg form. */ +/* Otherwise, ILO and IHI should be set to 1 and N, */ +/* respectively. If N.GT.0, then 1.LE.ILO.LE.IHI.LE.N. */ +/* If N = 0, then ILO = 1 and IHI = 0. */ + +/* H (input/output) DOUBLE PRECISION array, dimension (LDH,N) */ +/* On entry, the upper Hessenberg matrix H. */ +/* On exit, if INFO = 0 and WANTT is .TRUE., then H contains */ +/* the upper quasi-triangular matrix T from the Schur */ +/* decomposition (the Schur form); 2-by-2 diagonal blocks */ +/* (corresponding to complex conjugate pairs of eigenvalues) */ +/* are returned in standard form, with H(i,i) = H(i+1,i+1) */ +/* and H(i+1,i)*H(i,i+1).LT.0. If INFO = 0 and WANTT is */ +/* .FALSE., then the contents of H are unspecified on exit. */ +/* (The output value of H when INFO.GT.0 is given under the */ +/* description of INFO below.) */ + +/* This subroutine may explicitly set H(i,j) = 0 for i.GT.j and */ +/* j = 1, 2, ... ILO-1 or j = IHI+1, IHI+2, ... N. */ + +/* LDH (input) INTEGER */ +/* The leading dimension of the array H. LDH .GE. max(1,N). */ + +/* WR (output) DOUBLE PRECISION array, dimension (IHI) */ +/* WI (output) DOUBLE PRECISION array, dimension (IHI) */ +/* The real and imaginary parts, respectively, of the computed */ +/* eigenvalues of H(ILO:IHI,ILO:IHI) are stored in WR(ILO:IHI) */ +/* and WI(ILO:IHI). If two eigenvalues are computed as a */ +/* complex conjugate pair, they are stored in consecutive */ +/* elements of WR and WI, say the i-th and (i+1)th, with */ +/* WI(i) .GT. 0 and WI(i+1) .LT. 0. If WANTT is .TRUE., then */ +/* the eigenvalues are stored in the same order as on the */ +/* diagonal of the Schur form returned in H, with */ +/* WR(i) = H(i,i) and, if H(i:i+1,i:i+1) is a 2-by-2 diagonal */ +/* block, WI(i) = sqrt(-H(i+1,i)*H(i,i+1)) and */ +/* WI(i+1) = -WI(i). */ + +/* ILOZ (input) INTEGER */ +/* IHIZ (input) INTEGER */ +/* Specify the rows of Z to which transformations must be */ +/* applied if WANTZ is .TRUE.. */ +/* 1 .LE. ILOZ .LE. ILO; IHI .LE. IHIZ .LE. N. */ + +/* Z (input/output) DOUBLE PRECISION array, dimension (LDZ,IHI) */ +/* If WANTZ is .FALSE., then Z is not referenced. */ +/* If WANTZ is .TRUE., then Z(ILO:IHI,ILOZ:IHIZ) is */ +/* replaced by Z(ILO:IHI,ILOZ:IHIZ)*U where U is the */ +/* orthogonal Schur factor of H(ILO:IHI,ILO:IHI). */ +/* (The output value of Z when INFO.GT.0 is given under */ +/* the description of INFO below.) */ + +/* LDZ (input) INTEGER */ +/* The leading dimension of the array Z. if WANTZ is .TRUE. */ +/* then LDZ.GE.MAX(1,IHIZ). Otherwize, LDZ.GE.1. */ + +/* WORK (workspace/output) DOUBLE PRECISION array, dimension LWORK */ +/* On exit, if LWORK = -1, WORK(1) returns an estimate of */ +/* the optimal value for LWORK. */ + +/* LWORK (input) INTEGER */ +/* The dimension of the array WORK. LWORK .GE. max(1,N) */ +/* is sufficient, but LWORK typically as large as 6*N may */ +/* be required for optimal performance. A workspace query */ +/* to determine the optimal workspace size is recommended. */ + +/* If LWORK = -1, then DLAQR4 does a workspace query. */ +/* In this case, DLAQR4 checks the input parameters and */ +/* estimates the optimal workspace size for the given */ +/* values of N, ILO and IHI. The estimate is returned */ +/* in WORK(1). No error message related to LWORK is */ +/* issued by XERBLA. Neither H nor Z are accessed. */ + + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* .GT. 0: if INFO = i, DLAQR4 failed to compute all of */ +/* the eigenvalues. Elements 1:ilo-1 and i+1:n of WR */ +/* and WI contain those eigenvalues which have been */ +/* successfully computed. (Failures are rare.) */ + +/* If INFO .GT. 0 and WANT is .FALSE., then on exit, */ +/* the remaining unconverged eigenvalues are the eigen- */ +/* values of the upper Hessenberg matrix rows and */ +/* columns ILO through INFO of the final, output */ +/* value of H. */ + +/* If INFO .GT. 0 and WANTT is .TRUE., then on exit */ + +/* (*) (initial value of H)*U = U*(final value of H) */ + +/* where U is an orthogonal matrix. The final */ +/* value of H is upper Hessenberg and quasi-triangular */ +/* in rows and columns INFO+1 through IHI. */ + +/* If INFO .GT. 0 and WANTZ is .TRUE., then on exit */ + +/* (final value of Z(ILO:IHI,ILOZ:IHIZ) */ +/* = (initial value of Z(ILO:IHI,ILOZ:IHIZ)*U */ + +/* where U is the orthogonal matrix in (*) (regard- */ +/* less of the value of WANTT.) */ + +/* If INFO .GT. 0 and WANTZ is .FALSE., then Z is not */ +/* accessed. */ + +/* ================================================================ */ +/* Based on contributions by */ +/* Karen Braman and Ralph Byers, Department of Mathematics, */ +/* University of Kansas, USA */ + +/* ================================================================ */ +/* References: */ +/* K. Braman, R. Byers and R. Mathias, The Multi-Shift QR */ +/* Algorithm Part I: Maintaining Well Focused Shifts, and Level 3 */ +/* Performance, SIAM Journal of Matrix Analysis, volume 23, pages */ +/* 929--947, 2002. */ + +/* K. Braman, R. Byers and R. Mathias, The Multi-Shift QR */ +/* Algorithm Part II: Aggressive Early Deflation, SIAM Journal */ +/* of Matrix Analysis, volume 23, pages 948--973, 2002. */ + +/* ================================================================ */ +/* .. Parameters .. */ + +/* ==== Matrices of order NTINY or smaller must be processed by */ +/* . DLAHQR because of insufficient subdiagonal scratch space. */ +/* . (This is a hard limit.) ==== */ + +/* ==== Exceptional deflation windows: try to cure rare */ +/* . slow convergence by varying the size of the */ +/* . deflation window after KEXNW iterations. ==== */ + +/* ==== Exceptional shifts: try to cure rare slow convergence */ +/* . with ad-hoc exceptional shifts every KEXSH iterations. */ +/* . ==== */ + +/* ==== The constants WILK1 and WILK2 are used to form the */ +/* . exceptional shifts. ==== */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + /* Parameter adjustments */ + h_dim1 = *ldh; + h_offset = 1 + h_dim1; + h__ -= h_offset; + --wr; + --wi; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --work; + + /* Function Body */ + *info = 0; + +/* ==== Quick return for N = 0: nothing to do. ==== */ + + if (*n == 0) { + work[1] = 1.; + return 0; + } + + if (*n <= 11) { + +/* ==== Tiny matrices must use DLAHQR. ==== */ + + lwkopt = 1; + if (*lwork != -1) { + _starpu_dlahqr_(wantt, wantz, n, ilo, ihi, &h__[h_offset], ldh, &wr[1], & + wi[1], iloz, ihiz, &z__[z_offset], ldz, info); + } + } else { + +/* ==== Use small bulge multi-shift QR with aggressive early */ +/* . deflation on larger-than-tiny matrices. ==== */ + +/* ==== Hope for the best. ==== */ + + *info = 0; + +/* ==== Set up job flags for ILAENV. ==== */ + + if (*wantt) { + *(unsigned char *)jbcmpz = 'S'; + } else { + *(unsigned char *)jbcmpz = 'E'; + } + if (*wantz) { + *(unsigned char *)&jbcmpz[1] = 'V'; + } else { + *(unsigned char *)&jbcmpz[1] = 'N'; + } + +/* ==== NWR = recommended deflation window size. At this */ +/* . point, N .GT. NTINY = 11, so there is enough */ +/* . subdiagonal workspace for NWR.GE.2 as required. */ +/* . (In fact, there is enough subdiagonal space for */ +/* . NWR.GE.3.) ==== */ + + nwr = _starpu_ilaenv_(&c__13, "DLAQR4", jbcmpz, n, ilo, ihi, lwork); + nwr = max(2,nwr); +/* Computing MIN */ + i__1 = *ihi - *ilo + 1, i__2 = (*n - 1) / 3, i__1 = min(i__1,i__2); + nwr = min(i__1,nwr); + +/* ==== NSR = recommended number of simultaneous shifts. */ +/* . At this point N .GT. NTINY = 11, so there is at */ +/* . enough subdiagonal workspace for NSR to be even */ +/* . and greater than or equal to two as required. ==== */ + + nsr = _starpu_ilaenv_(&c__15, "DLAQR4", jbcmpz, n, ilo, ihi, lwork); +/* Computing MIN */ + i__1 = nsr, i__2 = (*n + 6) / 9, i__1 = min(i__1,i__2), i__2 = *ihi - + *ilo; + nsr = min(i__1,i__2); +/* Computing MAX */ + i__1 = 2, i__2 = nsr - nsr % 2; + nsr = max(i__1,i__2); + +/* ==== Estimate optimal workspace ==== */ + +/* ==== Workspace query call to DLAQR2 ==== */ + + i__1 = nwr + 1; + _starpu_dlaqr2_(wantt, wantz, n, ilo, ihi, &i__1, &h__[h_offset], ldh, iloz, + ihiz, &z__[z_offset], ldz, &ls, &ld, &wr[1], &wi[1], &h__[ + h_offset], ldh, n, &h__[h_offset], ldh, n, &h__[h_offset], + ldh, &work[1], &c_n1); + +/* ==== Optimal workspace = MAX(DLAQR5, DLAQR2) ==== */ + +/* Computing MAX */ + i__1 = nsr * 3 / 2, i__2 = (integer) work[1]; + lwkopt = max(i__1,i__2); + +/* ==== Quick return in case of workspace query. ==== */ + + if (*lwork == -1) { + work[1] = (doublereal) lwkopt; + return 0; + } + +/* ==== DLAHQR/DLAQR0 crossover point ==== */ + + nmin = _starpu_ilaenv_(&c__12, "DLAQR4", jbcmpz, n, ilo, ihi, lwork); + nmin = max(11,nmin); + +/* ==== Nibble crossover point ==== */ + + nibble = _starpu_ilaenv_(&c__14, "DLAQR4", jbcmpz, n, ilo, ihi, lwork); + nibble = max(0,nibble); + +/* ==== Accumulate reflections during ttswp? Use block */ +/* . 2-by-2 structure during matrix-matrix multiply? ==== */ + + kacc22 = _starpu_ilaenv_(&c__16, "DLAQR4", jbcmpz, n, ilo, ihi, lwork); + kacc22 = max(0,kacc22); + kacc22 = min(2,kacc22); + +/* ==== NWMAX = the largest possible deflation window for */ +/* . which there is sufficient workspace. ==== */ + +/* Computing MIN */ + i__1 = (*n - 1) / 3, i__2 = *lwork / 2; + nwmax = min(i__1,i__2); + nw = nwmax; + +/* ==== NSMAX = the Largest number of simultaneous shifts */ +/* . for which there is sufficient workspace. ==== */ + +/* Computing MIN */ + i__1 = (*n + 6) / 9, i__2 = (*lwork << 1) / 3; + nsmax = min(i__1,i__2); + nsmax -= nsmax % 2; + +/* ==== NDFL: an iteration count restarted at deflation. ==== */ + + ndfl = 1; + +/* ==== ITMAX = iteration limit ==== */ + +/* Computing MAX */ + i__1 = 10, i__2 = *ihi - *ilo + 1; + itmax = max(i__1,i__2) * 30; + +/* ==== Last row and column in the active block ==== */ + + kbot = *ihi; + +/* ==== Main Loop ==== */ + + i__1 = itmax; + for (it = 1; it <= i__1; ++it) { + +/* ==== Done when KBOT falls below ILO ==== */ + + if (kbot < *ilo) { + goto L90; + } + +/* ==== Locate active block ==== */ + + i__2 = *ilo + 1; + for (k = kbot; k >= i__2; --k) { + if (h__[k + (k - 1) * h_dim1] == 0.) { + goto L20; + } +/* L10: */ + } + k = *ilo; +L20: + ktop = k; + +/* ==== Select deflation window size: */ +/* . Typical Case: */ +/* . If possible and advisable, nibble the entire */ +/* . active block. If not, use size MIN(NWR,NWMAX) */ +/* . or MIN(NWR+1,NWMAX) depending upon which has */ +/* . the smaller corresponding subdiagonal entry */ +/* . (a heuristic). */ +/* . */ +/* . Exceptional Case: */ +/* . If there have been no deflations in KEXNW or */ +/* . more iterations, then vary the deflation window */ +/* . size. At first, because, larger windows are, */ +/* . in general, more powerful than smaller ones, */ +/* . rapidly increase the window to the maximum possible. */ +/* . Then, gradually reduce the window size. ==== */ + + nh = kbot - ktop + 1; + nwupbd = min(nh,nwmax); + if (ndfl < 5) { + nw = min(nwupbd,nwr); + } else { +/* Computing MIN */ + i__2 = nwupbd, i__3 = nw << 1; + nw = min(i__2,i__3); + } + if (nw < nwmax) { + if (nw >= nh - 1) { + nw = nh; + } else { + kwtop = kbot - nw + 1; + if ((d__1 = h__[kwtop + (kwtop - 1) * h_dim1], abs(d__1)) + > (d__2 = h__[kwtop - 1 + (kwtop - 2) * h_dim1], + abs(d__2))) { + ++nw; + } + } + } + if (ndfl < 5) { + ndec = -1; + } else if (ndec >= 0 || nw >= nwupbd) { + ++ndec; + if (nw - ndec < 2) { + ndec = 0; + } + nw -= ndec; + } + +/* ==== Aggressive early deflation: */ +/* . split workspace under the subdiagonal into */ +/* . - an nw-by-nw work array V in the lower */ +/* . left-hand-corner, */ +/* . - an NW-by-at-least-NW-but-more-is-better */ +/* . (NW-by-NHO) horizontal work array along */ +/* . the bottom edge, */ +/* . - an at-least-NW-but-more-is-better (NHV-by-NW) */ +/* . vertical work array along the left-hand-edge. */ +/* . ==== */ + + kv = *n - nw + 1; + kt = nw + 1; + nho = *n - nw - 1 - kt + 1; + kwv = nw + 2; + nve = *n - nw - kwv + 1; + +/* ==== Aggressive early deflation ==== */ + + _starpu_dlaqr2_(wantt, wantz, n, &ktop, &kbot, &nw, &h__[h_offset], ldh, + iloz, ihiz, &z__[z_offset], ldz, &ls, &ld, &wr[1], &wi[1], + &h__[kv + h_dim1], ldh, &nho, &h__[kv + kt * h_dim1], + ldh, &nve, &h__[kwv + h_dim1], ldh, &work[1], lwork); + +/* ==== Adjust KBOT accounting for new deflations. ==== */ + + kbot -= ld; + +/* ==== KS points to the shifts. ==== */ + + ks = kbot - ls + 1; + +/* ==== Skip an expensive QR sweep if there is a (partly */ +/* . heuristic) reason to expect that many eigenvalues */ +/* . will deflate without it. Here, the QR sweep is */ +/* . skipped if many eigenvalues have just been deflated */ +/* . or if the remaining active block is small. */ + + if (ld == 0 || ld * 100 <= nw * nibble && kbot - ktop + 1 > min( + nmin,nwmax)) { + +/* ==== NS = nominal number of simultaneous shifts. */ +/* . This may be lowered (slightly) if DLAQR2 */ +/* . did not provide that many shifts. ==== */ + +/* Computing MIN */ +/* Computing MAX */ + i__4 = 2, i__5 = kbot - ktop; + i__2 = min(nsmax,nsr), i__3 = max(i__4,i__5); + ns = min(i__2,i__3); + ns -= ns % 2; + +/* ==== If there have been no deflations */ +/* . in a multiple of KEXSH iterations, */ +/* . then try exceptional shifts. */ +/* . Otherwise use shifts provided by */ +/* . DLAQR2 above or from the eigenvalues */ +/* . of a trailing principal submatrix. ==== */ + + if (ndfl % 6 == 0) { + ks = kbot - ns + 1; +/* Computing MAX */ + i__3 = ks + 1, i__4 = ktop + 2; + i__2 = max(i__3,i__4); + for (i__ = kbot; i__ >= i__2; i__ += -2) { + ss = (d__1 = h__[i__ + (i__ - 1) * h_dim1], abs(d__1)) + + (d__2 = h__[i__ - 1 + (i__ - 2) * h_dim1], + abs(d__2)); + aa = ss * .75 + h__[i__ + i__ * h_dim1]; + bb = ss; + cc = ss * -.4375; + dd = aa; + _starpu_dlanv2_(&aa, &bb, &cc, &dd, &wr[i__ - 1], &wi[i__ - 1] +, &wr[i__], &wi[i__], &cs, &sn); +/* L30: */ + } + if (ks == ktop) { + wr[ks + 1] = h__[ks + 1 + (ks + 1) * h_dim1]; + wi[ks + 1] = 0.; + wr[ks] = wr[ks + 1]; + wi[ks] = wi[ks + 1]; + } + } else { + +/* ==== Got NS/2 or fewer shifts? Use DLAHQR */ +/* . on a trailing principal submatrix to */ +/* . get more. (Since NS.LE.NSMAX.LE.(N+6)/9, */ +/* . there is enough space below the subdiagonal */ +/* . to fit an NS-by-NS scratch array.) ==== */ + + if (kbot - ks + 1 <= ns / 2) { + ks = kbot - ns + 1; + kt = *n - ns + 1; + _starpu_dlacpy_("A", &ns, &ns, &h__[ks + ks * h_dim1], ldh, & + h__[kt + h_dim1], ldh); + _starpu_dlahqr_(&c_false, &c_false, &ns, &c__1, &ns, &h__[kt + + h_dim1], ldh, &wr[ks], &wi[ks], &c__1, & + c__1, zdum, &c__1, &inf); + ks += inf; + +/* ==== In case of a rare QR failure use */ +/* . eigenvalues of the trailing 2-by-2 */ +/* . principal submatrix. ==== */ + + if (ks >= kbot) { + aa = h__[kbot - 1 + (kbot - 1) * h_dim1]; + cc = h__[kbot + (kbot - 1) * h_dim1]; + bb = h__[kbot - 1 + kbot * h_dim1]; + dd = h__[kbot + kbot * h_dim1]; + _starpu_dlanv2_(&aa, &bb, &cc, &dd, &wr[kbot - 1], &wi[ + kbot - 1], &wr[kbot], &wi[kbot], &cs, &sn) + ; + ks = kbot - 1; + } + } + + if (kbot - ks + 1 > ns) { + +/* ==== Sort the shifts (Helps a little) */ +/* . Bubble sort keeps complex conjugate */ +/* . pairs together. ==== */ + + sorted = FALSE_; + i__2 = ks + 1; + for (k = kbot; k >= i__2; --k) { + if (sorted) { + goto L60; + } + sorted = TRUE_; + i__3 = k - 1; + for (i__ = ks; i__ <= i__3; ++i__) { + if ((d__1 = wr[i__], abs(d__1)) + (d__2 = wi[ + i__], abs(d__2)) < (d__3 = wr[i__ + 1] + , abs(d__3)) + (d__4 = wi[i__ + 1], + abs(d__4))) { + sorted = FALSE_; + + swap = wr[i__]; + wr[i__] = wr[i__ + 1]; + wr[i__ + 1] = swap; + + swap = wi[i__]; + wi[i__] = wi[i__ + 1]; + wi[i__ + 1] = swap; + } +/* L40: */ + } +/* L50: */ + } +L60: + ; + } + +/* ==== Shuffle shifts into pairs of real shifts */ +/* . and pairs of complex conjugate shifts */ +/* . assuming complex conjugate shifts are */ +/* . already adjacent to one another. (Yes, */ +/* . they are.) ==== */ + + i__2 = ks + 2; + for (i__ = kbot; i__ >= i__2; i__ += -2) { + if (wi[i__] != -wi[i__ - 1]) { + + swap = wr[i__]; + wr[i__] = wr[i__ - 1]; + wr[i__ - 1] = wr[i__ - 2]; + wr[i__ - 2] = swap; + + swap = wi[i__]; + wi[i__] = wi[i__ - 1]; + wi[i__ - 1] = wi[i__ - 2]; + wi[i__ - 2] = swap; + } +/* L70: */ + } + } + +/* ==== If there are only two shifts and both are */ +/* . real, then use only one. ==== */ + + if (kbot - ks + 1 == 2) { + if (wi[kbot] == 0.) { + if ((d__1 = wr[kbot] - h__[kbot + kbot * h_dim1], abs( + d__1)) < (d__2 = wr[kbot - 1] - h__[kbot + + kbot * h_dim1], abs(d__2))) { + wr[kbot - 1] = wr[kbot]; + } else { + wr[kbot] = wr[kbot - 1]; + } + } + } + +/* ==== Use up to NS of the the smallest magnatiude */ +/* . shifts. If there aren't NS shifts available, */ +/* . then use them all, possibly dropping one to */ +/* . make the number of shifts even. ==== */ + +/* Computing MIN */ + i__2 = ns, i__3 = kbot - ks + 1; + ns = min(i__2,i__3); + ns -= ns % 2; + ks = kbot - ns + 1; + +/* ==== Small-bulge multi-shift QR sweep: */ +/* . split workspace under the subdiagonal into */ +/* . - a KDU-by-KDU work array U in the lower */ +/* . left-hand-corner, */ +/* . - a KDU-by-at-least-KDU-but-more-is-better */ +/* . (KDU-by-NHo) horizontal work array WH along */ +/* . the bottom edge, */ +/* . - and an at-least-KDU-but-more-is-better-by-KDU */ +/* . (NVE-by-KDU) vertical work WV arrow along */ +/* . the left-hand-edge. ==== */ + + kdu = ns * 3 - 3; + ku = *n - kdu + 1; + kwh = kdu + 1; + nho = *n - kdu - 3 - (kdu + 1) + 1; + kwv = kdu + 4; + nve = *n - kdu - kwv + 1; + +/* ==== Small-bulge multi-shift QR sweep ==== */ + + _starpu_dlaqr5_(wantt, wantz, &kacc22, n, &ktop, &kbot, &ns, &wr[ks], + &wi[ks], &h__[h_offset], ldh, iloz, ihiz, &z__[ + z_offset], ldz, &work[1], &c__3, &h__[ku + h_dim1], + ldh, &nve, &h__[kwv + h_dim1], ldh, &nho, &h__[ku + + kwh * h_dim1], ldh); + } + +/* ==== Note progress (or the lack of it). ==== */ + + if (ld > 0) { + ndfl = 1; + } else { + ++ndfl; + } + +/* ==== End of main loop ==== */ +/* L80: */ + } + +/* ==== Iteration limit exceeded. Set INFO to show where */ +/* . the problem occurred and exit. ==== */ + + *info = kbot; +L90: + ; + } + +/* ==== Return the optimal value of LWORK. ==== */ + + work[1] = (doublereal) lwkopt; + +/* ==== End of DLAQR4 ==== */ + + return 0; +} /* _starpu_dlaqr4_ */ diff --git a/min-dgels/base/SRC/dlaqr5.c b/min-dgels/base/SRC/dlaqr5.c new file mode 100644 index 0000000..179ae30 --- /dev/null +++ b/min-dgels/base/SRC/dlaqr5.c @@ -0,0 +1,1025 @@ +/* dlaqr5.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static doublereal c_b7 = 0.; +static doublereal c_b8 = 1.; +static integer c__3 = 3; +static integer c__1 = 1; +static integer c__2 = 2; + +/* Subroutine */ int _starpu_dlaqr5_(logical *wantt, logical *wantz, integer *kacc22, + integer *n, integer *ktop, integer *kbot, integer *nshfts, doublereal + *sr, doublereal *si, doublereal *h__, integer *ldh, integer *iloz, + integer *ihiz, doublereal *z__, integer *ldz, doublereal *v, integer * + ldv, doublereal *u, integer *ldu, integer *nv, doublereal *wv, + integer *ldwv, integer *nh, doublereal *wh, integer *ldwh) +{ + /* System generated locals */ + integer h_dim1, h_offset, u_dim1, u_offset, v_dim1, v_offset, wh_dim1, + wh_offset, wv_dim1, wv_offset, z_dim1, z_offset, i__1, i__2, i__3, + i__4, i__5, i__6, i__7; + doublereal d__1, d__2, d__3, d__4, d__5; + + /* Local variables */ + integer i__, j, k, m, i2, j2, i4, j4, k1; + doublereal h11, h12, h21, h22; + integer m22, ns, nu; + doublereal vt[3], scl; + integer kdu, kms; + doublereal ulp; + integer knz, kzs; + doublereal tst1, tst2, beta; + logical blk22, bmp22; + integer mend, jcol, jlen, jbot, mbot; + doublereal swap; + integer jtop, jrow, mtop; + doublereal alpha; + logical accum; + extern /* Subroutine */ int _starpu_dgemm_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *); + integer ndcol, incol, krcol, nbmps; + extern /* Subroutine */ int _starpu_dtrmm_(char *, char *, char *, char *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *), _starpu_dlaqr1_( + integer *, doublereal *, integer *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *), _starpu_dlabad_(doublereal *, + doublereal *); + extern doublereal _starpu_dlamch_(char *); + extern /* Subroutine */ int _starpu_dlarfg_(integer *, doublereal *, doublereal *, + integer *, doublereal *), _starpu_dlacpy_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *); + doublereal safmin; + extern /* Subroutine */ int _starpu_dlaset_(char *, integer *, integer *, + doublereal *, doublereal *, doublereal *, integer *); + doublereal safmax, refsum; + integer mstart; + doublereal smlnum; + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* This auxiliary subroutine called by DLAQR0 performs a */ +/* single small-bulge multi-shift QR sweep. */ + +/* WANTT (input) logical scalar */ +/* WANTT = .true. if the quasi-triangular Schur factor */ +/* is being computed. WANTT is set to .false. otherwise. */ + +/* WANTZ (input) logical scalar */ +/* WANTZ = .true. if the orthogonal Schur factor is being */ +/* computed. WANTZ is set to .false. otherwise. */ + +/* KACC22 (input) integer with value 0, 1, or 2. */ +/* Specifies the computation mode of far-from-diagonal */ +/* orthogonal updates. */ +/* = 0: DLAQR5 does not accumulate reflections and does not */ +/* use matrix-matrix multiply to update far-from-diagonal */ +/* matrix entries. */ +/* = 1: DLAQR5 accumulates reflections and uses matrix-matrix */ +/* multiply to update the far-from-diagonal matrix entries. */ +/* = 2: DLAQR5 accumulates reflections, uses matrix-matrix */ +/* multiply to update the far-from-diagonal matrix entries, */ +/* and takes advantage of 2-by-2 block structure during */ +/* matrix multiplies. */ + +/* N (input) integer scalar */ +/* N is the order of the Hessenberg matrix H upon which this */ +/* subroutine operates. */ + +/* KTOP (input) integer scalar */ +/* KBOT (input) integer scalar */ +/* These are the first and last rows and columns of an */ +/* isolated diagonal block upon which the QR sweep is to be */ +/* applied. It is assumed without a check that */ +/* either KTOP = 1 or H(KTOP,KTOP-1) = 0 */ +/* and */ +/* either KBOT = N or H(KBOT+1,KBOT) = 0. */ + +/* NSHFTS (input) integer scalar */ +/* NSHFTS gives the number of simultaneous shifts. NSHFTS */ +/* must be positive and even. */ + +/* SR (input/output) DOUBLE PRECISION array of size (NSHFTS) */ +/* SI (input/output) DOUBLE PRECISION array of size (NSHFTS) */ +/* SR contains the real parts and SI contains the imaginary */ +/* parts of the NSHFTS shifts of origin that define the */ +/* multi-shift QR sweep. On output SR and SI may be */ +/* reordered. */ + +/* H (input/output) DOUBLE PRECISION array of size (LDH,N) */ +/* On input H contains a Hessenberg matrix. On output a */ +/* multi-shift QR sweep with shifts SR(J)+i*SI(J) is applied */ +/* to the isolated diagonal block in rows and columns KTOP */ +/* through KBOT. */ + +/* LDH (input) integer scalar */ +/* LDH is the leading dimension of H just as declared in the */ +/* calling procedure. LDH.GE.MAX(1,N). */ + +/* ILOZ (input) INTEGER */ +/* IHIZ (input) INTEGER */ +/* Specify the rows of Z to which transformations must be */ +/* applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N */ + +/* Z (input/output) DOUBLE PRECISION array of size (LDZ,IHI) */ +/* If WANTZ = .TRUE., then the QR Sweep orthogonal */ +/* similarity transformation is accumulated into */ +/* Z(ILOZ:IHIZ,ILO:IHI) from the right. */ +/* If WANTZ = .FALSE., then Z is unreferenced. */ + +/* LDZ (input) integer scalar */ +/* LDA is the leading dimension of Z just as declared in */ +/* the calling procedure. LDZ.GE.N. */ + +/* V (workspace) DOUBLE PRECISION array of size (LDV,NSHFTS/2) */ + +/* LDV (input) integer scalar */ +/* LDV is the leading dimension of V as declared in the */ +/* calling procedure. LDV.GE.3. */ + +/* U (workspace) DOUBLE PRECISION array of size */ +/* (LDU,3*NSHFTS-3) */ + +/* LDU (input) integer scalar */ +/* LDU is the leading dimension of U just as declared in the */ +/* in the calling subroutine. LDU.GE.3*NSHFTS-3. */ + +/* NH (input) integer scalar */ +/* NH is the number of columns in array WH available for */ +/* workspace. NH.GE.1. */ + +/* WH (workspace) DOUBLE PRECISION array of size (LDWH,NH) */ + +/* LDWH (input) integer scalar */ +/* Leading dimension of WH just as declared in the */ +/* calling procedure. LDWH.GE.3*NSHFTS-3. */ + +/* NV (input) integer scalar */ +/* NV is the number of rows in WV agailable for workspace. */ +/* NV.GE.1. */ + +/* WV (workspace) DOUBLE PRECISION array of size */ +/* (LDWV,3*NSHFTS-3) */ + +/* LDWV (input) integer scalar */ +/* LDWV is the leading dimension of WV as declared in the */ +/* in the calling subroutine. LDWV.GE.NV. */ + +/* ================================================================ */ +/* Based on contributions by */ +/* Karen Braman and Ralph Byers, Department of Mathematics, */ +/* University of Kansas, USA */ + +/* ================================================================ */ +/* Reference: */ + +/* K. Braman, R. Byers and R. Mathias, The Multi-Shift QR */ +/* Algorithm Part I: Maintaining Well Focused Shifts, and */ +/* Level 3 Performance, SIAM Journal of Matrix Analysis, */ +/* volume 23, pages 929--947, 2002. */ + +/* ================================================================ */ +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ + +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* ==== If there are no shifts, then there is nothing to do. ==== */ + + /* Parameter adjustments */ + --sr; + --si; + h_dim1 = *ldh; + h_offset = 1 + h_dim1; + h__ -= h_offset; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + v_dim1 = *ldv; + v_offset = 1 + v_dim1; + v -= v_offset; + u_dim1 = *ldu; + u_offset = 1 + u_dim1; + u -= u_offset; + wv_dim1 = *ldwv; + wv_offset = 1 + wv_dim1; + wv -= wv_offset; + wh_dim1 = *ldwh; + wh_offset = 1 + wh_dim1; + wh -= wh_offset; + + /* Function Body */ + if (*nshfts < 2) { + return 0; + } + +/* ==== If the active block is empty or 1-by-1, then there */ +/* . is nothing to do. ==== */ + + if (*ktop >= *kbot) { + return 0; + } + +/* ==== Shuffle shifts into pairs of real shifts and pairs */ +/* . of complex conjugate shifts assuming complex */ +/* . conjugate shifts are already adjacent to one */ +/* . another. ==== */ + + i__1 = *nshfts - 2; + for (i__ = 1; i__ <= i__1; i__ += 2) { + if (si[i__] != -si[i__ + 1]) { + + swap = sr[i__]; + sr[i__] = sr[i__ + 1]; + sr[i__ + 1] = sr[i__ + 2]; + sr[i__ + 2] = swap; + + swap = si[i__]; + si[i__] = si[i__ + 1]; + si[i__ + 1] = si[i__ + 2]; + si[i__ + 2] = swap; + } +/* L10: */ + } + +/* ==== NSHFTS is supposed to be even, but if it is odd, */ +/* . then simply reduce it by one. The shuffle above */ +/* . ensures that the dropped shift is real and that */ +/* . the remaining shifts are paired. ==== */ + + ns = *nshfts - *nshfts % 2; + +/* ==== Machine constants for deflation ==== */ + + safmin = _starpu_dlamch_("SAFE MINIMUM"); + safmax = 1. / safmin; + _starpu_dlabad_(&safmin, &safmax); + ulp = _starpu_dlamch_("PRECISION"); + smlnum = safmin * ((doublereal) (*n) / ulp); + +/* ==== Use accumulated reflections to update far-from-diagonal */ +/* . entries ? ==== */ + + accum = *kacc22 == 1 || *kacc22 == 2; + +/* ==== If so, exploit the 2-by-2 block structure? ==== */ + + blk22 = ns > 2 && *kacc22 == 2; + +/* ==== clear trash ==== */ + + if (*ktop + 2 <= *kbot) { + h__[*ktop + 2 + *ktop * h_dim1] = 0.; + } + +/* ==== NBMPS = number of 2-shift bulges in the chain ==== */ + + nbmps = ns / 2; + +/* ==== KDU = width of slab ==== */ + + kdu = nbmps * 6 - 3; + +/* ==== Create and chase chains of NBMPS bulges ==== */ + + i__1 = *kbot - 2; + i__2 = nbmps * 3 - 2; + for (incol = (1 - nbmps) * 3 + *ktop - 1; i__2 < 0 ? incol >= i__1 : + incol <= i__1; incol += i__2) { + ndcol = incol + kdu; + if (accum) { + _starpu_dlaset_("ALL", &kdu, &kdu, &c_b7, &c_b8, &u[u_offset], ldu); + } + +/* ==== Near-the-diagonal bulge chase. The following loop */ +/* . performs the near-the-diagonal part of a small bulge */ +/* . multi-shift QR sweep. Each 6*NBMPS-2 column diagonal */ +/* . chunk extends from column INCOL to column NDCOL */ +/* . (including both column INCOL and column NDCOL). The */ +/* . following loop chases a 3*NBMPS column long chain of */ +/* . NBMPS bulges 3*NBMPS-2 columns to the right. (INCOL */ +/* . may be less than KTOP and and NDCOL may be greater than */ +/* . KBOT indicating phantom columns from which to chase */ +/* . bulges before they are actually introduced or to which */ +/* . to chase bulges beyond column KBOT.) ==== */ + +/* Computing MIN */ + i__4 = incol + nbmps * 3 - 3, i__5 = *kbot - 2; + i__3 = min(i__4,i__5); + for (krcol = incol; krcol <= i__3; ++krcol) { + +/* ==== Bulges number MTOP to MBOT are active double implicit */ +/* . shift bulges. There may or may not also be small */ +/* . 2-by-2 bulge, if there is room. The inactive bulges */ +/* . (if any) must wait until the active bulges have moved */ +/* . down the diagonal to make room. The phantom matrix */ +/* . paradigm described above helps keep track. ==== */ + +/* Computing MAX */ + i__4 = 1, i__5 = (*ktop - 1 - krcol + 2) / 3 + 1; + mtop = max(i__4,i__5); +/* Computing MIN */ + i__4 = nbmps, i__5 = (*kbot - krcol) / 3; + mbot = min(i__4,i__5); + m22 = mbot + 1; + bmp22 = mbot < nbmps && krcol + (m22 - 1) * 3 == *kbot - 2; + +/* ==== Generate reflections to chase the chain right */ +/* . one column. (The minimum value of K is KTOP-1.) ==== */ + + i__4 = mbot; + for (m = mtop; m <= i__4; ++m) { + k = krcol + (m - 1) * 3; + if (k == *ktop - 1) { + _starpu_dlaqr1_(&c__3, &h__[*ktop + *ktop * h_dim1], ldh, &sr[(m + << 1) - 1], &si[(m << 1) - 1], &sr[m * 2], &si[m * + 2], &v[m * v_dim1 + 1]); + alpha = v[m * v_dim1 + 1]; + _starpu_dlarfg_(&c__3, &alpha, &v[m * v_dim1 + 2], &c__1, &v[m * + v_dim1 + 1]); + } else { + beta = h__[k + 1 + k * h_dim1]; + v[m * v_dim1 + 2] = h__[k + 2 + k * h_dim1]; + v[m * v_dim1 + 3] = h__[k + 3 + k * h_dim1]; + _starpu_dlarfg_(&c__3, &beta, &v[m * v_dim1 + 2], &c__1, &v[m * + v_dim1 + 1]); + +/* ==== A Bulge may collapse because of vigilant */ +/* . deflation or destructive underflow. In the */ +/* . underflow case, try the two-small-subdiagonals */ +/* . trick to try to reinflate the bulge. ==== */ + + if (h__[k + 3 + k * h_dim1] != 0. || h__[k + 3 + (k + 1) * + h_dim1] != 0. || h__[k + 3 + (k + 2) * h_dim1] == + 0.) { + +/* ==== Typical case: not collapsed (yet). ==== */ + + h__[k + 1 + k * h_dim1] = beta; + h__[k + 2 + k * h_dim1] = 0.; + h__[k + 3 + k * h_dim1] = 0.; + } else { + +/* ==== Atypical case: collapsed. Attempt to */ +/* . reintroduce ignoring H(K+1,K) and H(K+2,K). */ +/* . If the fill resulting from the new */ +/* . reflector is too large, then abandon it. */ +/* . Otherwise, use the new one. ==== */ + + _starpu_dlaqr1_(&c__3, &h__[k + 1 + (k + 1) * h_dim1], ldh, & + sr[(m << 1) - 1], &si[(m << 1) - 1], &sr[m * + 2], &si[m * 2], vt); + alpha = vt[0]; + _starpu_dlarfg_(&c__3, &alpha, &vt[1], &c__1, vt); + refsum = vt[0] * (h__[k + 1 + k * h_dim1] + vt[1] * + h__[k + 2 + k * h_dim1]); + + if ((d__1 = h__[k + 2 + k * h_dim1] - refsum * vt[1], + abs(d__1)) + (d__2 = refsum * vt[2], abs(d__2) + ) > ulp * ((d__3 = h__[k + k * h_dim1], abs( + d__3)) + (d__4 = h__[k + 1 + (k + 1) * h_dim1] + , abs(d__4)) + (d__5 = h__[k + 2 + (k + 2) * + h_dim1], abs(d__5)))) { + +/* ==== Starting a new bulge here would */ +/* . create non-negligible fill. Use */ +/* . the old one with trepidation. ==== */ + + h__[k + 1 + k * h_dim1] = beta; + h__[k + 2 + k * h_dim1] = 0.; + h__[k + 3 + k * h_dim1] = 0.; + } else { + +/* ==== Stating a new bulge here would */ +/* . create only negligible fill. */ +/* . Replace the old reflector with */ +/* . the new one. ==== */ + + h__[k + 1 + k * h_dim1] -= refsum; + h__[k + 2 + k * h_dim1] = 0.; + h__[k + 3 + k * h_dim1] = 0.; + v[m * v_dim1 + 1] = vt[0]; + v[m * v_dim1 + 2] = vt[1]; + v[m * v_dim1 + 3] = vt[2]; + } + } + } +/* L20: */ + } + +/* ==== Generate a 2-by-2 reflection, if needed. ==== */ + + k = krcol + (m22 - 1) * 3; + if (bmp22) { + if (k == *ktop - 1) { + _starpu_dlaqr1_(&c__2, &h__[k + 1 + (k + 1) * h_dim1], ldh, &sr[( + m22 << 1) - 1], &si[(m22 << 1) - 1], &sr[m22 * 2], + &si[m22 * 2], &v[m22 * v_dim1 + 1]); + beta = v[m22 * v_dim1 + 1]; + _starpu_dlarfg_(&c__2, &beta, &v[m22 * v_dim1 + 2], &c__1, &v[m22 + * v_dim1 + 1]); + } else { + beta = h__[k + 1 + k * h_dim1]; + v[m22 * v_dim1 + 2] = h__[k + 2 + k * h_dim1]; + _starpu_dlarfg_(&c__2, &beta, &v[m22 * v_dim1 + 2], &c__1, &v[m22 + * v_dim1 + 1]); + h__[k + 1 + k * h_dim1] = beta; + h__[k + 2 + k * h_dim1] = 0.; + } + } + +/* ==== Multiply H by reflections from the left ==== */ + + if (accum) { + jbot = min(ndcol,*kbot); + } else if (*wantt) { + jbot = *n; + } else { + jbot = *kbot; + } + i__4 = jbot; + for (j = max(*ktop,krcol); j <= i__4; ++j) { +/* Computing MIN */ + i__5 = mbot, i__6 = (j - krcol + 2) / 3; + mend = min(i__5,i__6); + i__5 = mend; + for (m = mtop; m <= i__5; ++m) { + k = krcol + (m - 1) * 3; + refsum = v[m * v_dim1 + 1] * (h__[k + 1 + j * h_dim1] + v[ + m * v_dim1 + 2] * h__[k + 2 + j * h_dim1] + v[m * + v_dim1 + 3] * h__[k + 3 + j * h_dim1]); + h__[k + 1 + j * h_dim1] -= refsum; + h__[k + 2 + j * h_dim1] -= refsum * v[m * v_dim1 + 2]; + h__[k + 3 + j * h_dim1] -= refsum * v[m * v_dim1 + 3]; +/* L30: */ + } +/* L40: */ + } + if (bmp22) { + k = krcol + (m22 - 1) * 3; +/* Computing MAX */ + i__4 = k + 1; + i__5 = jbot; + for (j = max(i__4,*ktop); j <= i__5; ++j) { + refsum = v[m22 * v_dim1 + 1] * (h__[k + 1 + j * h_dim1] + + v[m22 * v_dim1 + 2] * h__[k + 2 + j * h_dim1]); + h__[k + 1 + j * h_dim1] -= refsum; + h__[k + 2 + j * h_dim1] -= refsum * v[m22 * v_dim1 + 2]; +/* L50: */ + } + } + +/* ==== Multiply H by reflections from the right. */ +/* . Delay filling in the last row until the */ +/* . vigilant deflation check is complete. ==== */ + + if (accum) { + jtop = max(*ktop,incol); + } else if (*wantt) { + jtop = 1; + } else { + jtop = *ktop; + } + i__5 = mbot; + for (m = mtop; m <= i__5; ++m) { + if (v[m * v_dim1 + 1] != 0.) { + k = krcol + (m - 1) * 3; +/* Computing MIN */ + i__6 = *kbot, i__7 = k + 3; + i__4 = min(i__6,i__7); + for (j = jtop; j <= i__4; ++j) { + refsum = v[m * v_dim1 + 1] * (h__[j + (k + 1) * + h_dim1] + v[m * v_dim1 + 2] * h__[j + (k + 2) + * h_dim1] + v[m * v_dim1 + 3] * h__[j + (k + + 3) * h_dim1]); + h__[j + (k + 1) * h_dim1] -= refsum; + h__[j + (k + 2) * h_dim1] -= refsum * v[m * v_dim1 + + 2]; + h__[j + (k + 3) * h_dim1] -= refsum * v[m * v_dim1 + + 3]; +/* L60: */ + } + + if (accum) { + +/* ==== Accumulate U. (If necessary, update Z later */ +/* . with with an efficient matrix-matrix */ +/* . multiply.) ==== */ + + kms = k - incol; +/* Computing MAX */ + i__4 = 1, i__6 = *ktop - incol; + i__7 = kdu; + for (j = max(i__4,i__6); j <= i__7; ++j) { + refsum = v[m * v_dim1 + 1] * (u[j + (kms + 1) * + u_dim1] + v[m * v_dim1 + 2] * u[j + (kms + + 2) * u_dim1] + v[m * v_dim1 + 3] * u[j + + (kms + 3) * u_dim1]); + u[j + (kms + 1) * u_dim1] -= refsum; + u[j + (kms + 2) * u_dim1] -= refsum * v[m * + v_dim1 + 2]; + u[j + (kms + 3) * u_dim1] -= refsum * v[m * + v_dim1 + 3]; +/* L70: */ + } + } else if (*wantz) { + +/* ==== U is not accumulated, so update Z */ +/* . now by multiplying by reflections */ +/* . from the right. ==== */ + + i__7 = *ihiz; + for (j = *iloz; j <= i__7; ++j) { + refsum = v[m * v_dim1 + 1] * (z__[j + (k + 1) * + z_dim1] + v[m * v_dim1 + 2] * z__[j + (k + + 2) * z_dim1] + v[m * v_dim1 + 3] * z__[ + j + (k + 3) * z_dim1]); + z__[j + (k + 1) * z_dim1] -= refsum; + z__[j + (k + 2) * z_dim1] -= refsum * v[m * + v_dim1 + 2]; + z__[j + (k + 3) * z_dim1] -= refsum * v[m * + v_dim1 + 3]; +/* L80: */ + } + } + } +/* L90: */ + } + +/* ==== Special case: 2-by-2 reflection (if needed) ==== */ + + k = krcol + (m22 - 1) * 3; + if (bmp22 && v[m22 * v_dim1 + 1] != 0.) { +/* Computing MIN */ + i__7 = *kbot, i__4 = k + 3; + i__5 = min(i__7,i__4); + for (j = jtop; j <= i__5; ++j) { + refsum = v[m22 * v_dim1 + 1] * (h__[j + (k + 1) * h_dim1] + + v[m22 * v_dim1 + 2] * h__[j + (k + 2) * h_dim1]) + ; + h__[j + (k + 1) * h_dim1] -= refsum; + h__[j + (k + 2) * h_dim1] -= refsum * v[m22 * v_dim1 + 2]; +/* L100: */ + } + + if (accum) { + kms = k - incol; +/* Computing MAX */ + i__5 = 1, i__7 = *ktop - incol; + i__4 = kdu; + for (j = max(i__5,i__7); j <= i__4; ++j) { + refsum = v[m22 * v_dim1 + 1] * (u[j + (kms + 1) * + u_dim1] + v[m22 * v_dim1 + 2] * u[j + (kms + + 2) * u_dim1]); + u[j + (kms + 1) * u_dim1] -= refsum; + u[j + (kms + 2) * u_dim1] -= refsum * v[m22 * v_dim1 + + 2]; +/* L110: */ + } + } else if (*wantz) { + i__4 = *ihiz; + for (j = *iloz; j <= i__4; ++j) { + refsum = v[m22 * v_dim1 + 1] * (z__[j + (k + 1) * + z_dim1] + v[m22 * v_dim1 + 2] * z__[j + (k + + 2) * z_dim1]); + z__[j + (k + 1) * z_dim1] -= refsum; + z__[j + (k + 2) * z_dim1] -= refsum * v[m22 * v_dim1 + + 2]; +/* L120: */ + } + } + } + +/* ==== Vigilant deflation check ==== */ + + mstart = mtop; + if (krcol + (mstart - 1) * 3 < *ktop) { + ++mstart; + } + mend = mbot; + if (bmp22) { + ++mend; + } + if (krcol == *kbot - 2) { + ++mend; + } + i__4 = mend; + for (m = mstart; m <= i__4; ++m) { +/* Computing MIN */ + i__5 = *kbot - 1, i__7 = krcol + (m - 1) * 3; + k = min(i__5,i__7); + +/* ==== The following convergence test requires that */ +/* . the tradition small-compared-to-nearby-diagonals */ +/* . criterion and the Ahues & Tisseur (LAWN 122, 1997) */ +/* . criteria both be satisfied. The latter improves */ +/* . accuracy in some examples. Falling back on an */ +/* . alternate convergence criterion when TST1 or TST2 */ +/* . is zero (as done here) is traditional but probably */ +/* . unnecessary. ==== */ + + if (h__[k + 1 + k * h_dim1] != 0.) { + tst1 = (d__1 = h__[k + k * h_dim1], abs(d__1)) + (d__2 = + h__[k + 1 + (k + 1) * h_dim1], abs(d__2)); + if (tst1 == 0.) { + if (k >= *ktop + 1) { + tst1 += (d__1 = h__[k + (k - 1) * h_dim1], abs( + d__1)); + } + if (k >= *ktop + 2) { + tst1 += (d__1 = h__[k + (k - 2) * h_dim1], abs( + d__1)); + } + if (k >= *ktop + 3) { + tst1 += (d__1 = h__[k + (k - 3) * h_dim1], abs( + d__1)); + } + if (k <= *kbot - 2) { + tst1 += (d__1 = h__[k + 2 + (k + 1) * h_dim1], + abs(d__1)); + } + if (k <= *kbot - 3) { + tst1 += (d__1 = h__[k + 3 + (k + 1) * h_dim1], + abs(d__1)); + } + if (k <= *kbot - 4) { + tst1 += (d__1 = h__[k + 4 + (k + 1) * h_dim1], + abs(d__1)); + } + } +/* Computing MAX */ + d__2 = smlnum, d__3 = ulp * tst1; + if ((d__1 = h__[k + 1 + k * h_dim1], abs(d__1)) <= max( + d__2,d__3)) { +/* Computing MAX */ + d__3 = (d__1 = h__[k + 1 + k * h_dim1], abs(d__1)), + d__4 = (d__2 = h__[k + (k + 1) * h_dim1], abs( + d__2)); + h12 = max(d__3,d__4); +/* Computing MIN */ + d__3 = (d__1 = h__[k + 1 + k * h_dim1], abs(d__1)), + d__4 = (d__2 = h__[k + (k + 1) * h_dim1], abs( + d__2)); + h21 = min(d__3,d__4); +/* Computing MAX */ + d__3 = (d__1 = h__[k + 1 + (k + 1) * h_dim1], abs( + d__1)), d__4 = (d__2 = h__[k + k * h_dim1] - + h__[k + 1 + (k + 1) * h_dim1], abs(d__2)); + h11 = max(d__3,d__4); +/* Computing MIN */ + d__3 = (d__1 = h__[k + 1 + (k + 1) * h_dim1], abs( + d__1)), d__4 = (d__2 = h__[k + k * h_dim1] - + h__[k + 1 + (k + 1) * h_dim1], abs(d__2)); + h22 = min(d__3,d__4); + scl = h11 + h12; + tst2 = h22 * (h11 / scl); + +/* Computing MAX */ + d__1 = smlnum, d__2 = ulp * tst2; + if (tst2 == 0. || h21 * (h12 / scl) <= max(d__1,d__2)) + { + h__[k + 1 + k * h_dim1] = 0.; + } + } + } +/* L130: */ + } + +/* ==== Fill in the last row of each bulge. ==== */ + +/* Computing MIN */ + i__4 = nbmps, i__5 = (*kbot - krcol - 1) / 3; + mend = min(i__4,i__5); + i__4 = mend; + for (m = mtop; m <= i__4; ++m) { + k = krcol + (m - 1) * 3; + refsum = v[m * v_dim1 + 1] * v[m * v_dim1 + 3] * h__[k + 4 + ( + k + 3) * h_dim1]; + h__[k + 4 + (k + 1) * h_dim1] = -refsum; + h__[k + 4 + (k + 2) * h_dim1] = -refsum * v[m * v_dim1 + 2]; + h__[k + 4 + (k + 3) * h_dim1] -= refsum * v[m * v_dim1 + 3]; +/* L140: */ + } + +/* ==== End of near-the-diagonal bulge chase. ==== */ + +/* L150: */ + } + +/* ==== Use U (if accumulated) to update far-from-diagonal */ +/* . entries in H. If required, use U to update Z as */ +/* . well. ==== */ + + if (accum) { + if (*wantt) { + jtop = 1; + jbot = *n; + } else { + jtop = *ktop; + jbot = *kbot; + } + if (! blk22 || incol < *ktop || ndcol > *kbot || ns <= 2) { + +/* ==== Updates not exploiting the 2-by-2 block */ +/* . structure of U. K1 and NU keep track of */ +/* . the location and size of U in the special */ +/* . cases of introducing bulges and chasing */ +/* . bulges off the bottom. In these special */ +/* . cases and in case the number of shifts */ +/* . is NS = 2, there is no 2-by-2 block */ +/* . structure to exploit. ==== */ + +/* Computing MAX */ + i__3 = 1, i__4 = *ktop - incol; + k1 = max(i__3,i__4); +/* Computing MAX */ + i__3 = 0, i__4 = ndcol - *kbot; + nu = kdu - max(i__3,i__4) - k1 + 1; + +/* ==== Horizontal Multiply ==== */ + + i__3 = jbot; + i__4 = *nh; + for (jcol = min(ndcol,*kbot) + 1; i__4 < 0 ? jcol >= i__3 : + jcol <= i__3; jcol += i__4) { +/* Computing MIN */ + i__5 = *nh, i__7 = jbot - jcol + 1; + jlen = min(i__5,i__7); + _starpu_dgemm_("C", "N", &nu, &jlen, &nu, &c_b8, &u[k1 + k1 * + u_dim1], ldu, &h__[incol + k1 + jcol * h_dim1], + ldh, &c_b7, &wh[wh_offset], ldwh); + _starpu_dlacpy_("ALL", &nu, &jlen, &wh[wh_offset], ldwh, &h__[ + incol + k1 + jcol * h_dim1], ldh); +/* L160: */ + } + +/* ==== Vertical multiply ==== */ + + i__4 = max(*ktop,incol) - 1; + i__3 = *nv; + for (jrow = jtop; i__3 < 0 ? jrow >= i__4 : jrow <= i__4; + jrow += i__3) { +/* Computing MIN */ + i__5 = *nv, i__7 = max(*ktop,incol) - jrow; + jlen = min(i__5,i__7); + _starpu_dgemm_("N", "N", &jlen, &nu, &nu, &c_b8, &h__[jrow + ( + incol + k1) * h_dim1], ldh, &u[k1 + k1 * u_dim1], + ldu, &c_b7, &wv[wv_offset], ldwv); + _starpu_dlacpy_("ALL", &jlen, &nu, &wv[wv_offset], ldwv, &h__[ + jrow + (incol + k1) * h_dim1], ldh); +/* L170: */ + } + +/* ==== Z multiply (also vertical) ==== */ + + if (*wantz) { + i__3 = *ihiz; + i__4 = *nv; + for (jrow = *iloz; i__4 < 0 ? jrow >= i__3 : jrow <= i__3; + jrow += i__4) { +/* Computing MIN */ + i__5 = *nv, i__7 = *ihiz - jrow + 1; + jlen = min(i__5,i__7); + _starpu_dgemm_("N", "N", &jlen, &nu, &nu, &c_b8, &z__[jrow + ( + incol + k1) * z_dim1], ldz, &u[k1 + k1 * + u_dim1], ldu, &c_b7, &wv[wv_offset], ldwv); + _starpu_dlacpy_("ALL", &jlen, &nu, &wv[wv_offset], ldwv, &z__[ + jrow + (incol + k1) * z_dim1], ldz) + ; +/* L180: */ + } + } + } else { + +/* ==== Updates exploiting U's 2-by-2 block structure. */ +/* . (I2, I4, J2, J4 are the last rows and columns */ +/* . of the blocks.) ==== */ + + i2 = (kdu + 1) / 2; + i4 = kdu; + j2 = i4 - i2; + j4 = kdu; + +/* ==== KZS and KNZ deal with the band of zeros */ +/* . along the diagonal of one of the triangular */ +/* . blocks. ==== */ + + kzs = j4 - j2 - (ns + 1); + knz = ns + 1; + +/* ==== Horizontal multiply ==== */ + + i__4 = jbot; + i__3 = *nh; + for (jcol = min(ndcol,*kbot) + 1; i__3 < 0 ? jcol >= i__4 : + jcol <= i__4; jcol += i__3) { +/* Computing MIN */ + i__5 = *nh, i__7 = jbot - jcol + 1; + jlen = min(i__5,i__7); + +/* ==== Copy bottom of H to top+KZS of scratch ==== */ +/* (The first KZS rows get multiplied by zero.) ==== */ + + _starpu_dlacpy_("ALL", &knz, &jlen, &h__[incol + 1 + j2 + jcol * + h_dim1], ldh, &wh[kzs + 1 + wh_dim1], ldwh); + +/* ==== Multiply by U21' ==== */ + + _starpu_dlaset_("ALL", &kzs, &jlen, &c_b7, &c_b7, &wh[wh_offset], + ldwh); + _starpu_dtrmm_("L", "U", "C", "N", &knz, &jlen, &c_b8, &u[j2 + 1 + + (kzs + 1) * u_dim1], ldu, &wh[kzs + 1 + wh_dim1] +, ldwh); + +/* ==== Multiply top of H by U11' ==== */ + + _starpu_dgemm_("C", "N", &i2, &jlen, &j2, &c_b8, &u[u_offset], + ldu, &h__[incol + 1 + jcol * h_dim1], ldh, &c_b8, + &wh[wh_offset], ldwh); + +/* ==== Copy top of H to bottom of WH ==== */ + + _starpu_dlacpy_("ALL", &j2, &jlen, &h__[incol + 1 + jcol * h_dim1] +, ldh, &wh[i2 + 1 + wh_dim1], ldwh); + +/* ==== Multiply by U21' ==== */ + + _starpu_dtrmm_("L", "L", "C", "N", &j2, &jlen, &c_b8, &u[(i2 + 1) + * u_dim1 + 1], ldu, &wh[i2 + 1 + wh_dim1], ldwh); + +/* ==== Multiply by U22 ==== */ + + i__5 = i4 - i2; + i__7 = j4 - j2; + _starpu_dgemm_("C", "N", &i__5, &jlen, &i__7, &c_b8, &u[j2 + 1 + ( + i2 + 1) * u_dim1], ldu, &h__[incol + 1 + j2 + + jcol * h_dim1], ldh, &c_b8, &wh[i2 + 1 + wh_dim1], + ldwh); + +/* ==== Copy it back ==== */ + + _starpu_dlacpy_("ALL", &kdu, &jlen, &wh[wh_offset], ldwh, &h__[ + incol + 1 + jcol * h_dim1], ldh); +/* L190: */ + } + +/* ==== Vertical multiply ==== */ + + i__3 = max(incol,*ktop) - 1; + i__4 = *nv; + for (jrow = jtop; i__4 < 0 ? jrow >= i__3 : jrow <= i__3; + jrow += i__4) { +/* Computing MIN */ + i__5 = *nv, i__7 = max(incol,*ktop) - jrow; + jlen = min(i__5,i__7); + +/* ==== Copy right of H to scratch (the first KZS */ +/* . columns get multiplied by zero) ==== */ + + _starpu_dlacpy_("ALL", &jlen, &knz, &h__[jrow + (incol + 1 + j2) * + h_dim1], ldh, &wv[(kzs + 1) * wv_dim1 + 1], ldwv); + +/* ==== Multiply by U21 ==== */ + + _starpu_dlaset_("ALL", &jlen, &kzs, &c_b7, &c_b7, &wv[wv_offset], + ldwv); + _starpu_dtrmm_("R", "U", "N", "N", &jlen, &knz, &c_b8, &u[j2 + 1 + + (kzs + 1) * u_dim1], ldu, &wv[(kzs + 1) * + wv_dim1 + 1], ldwv); + +/* ==== Multiply by U11 ==== */ + + _starpu_dgemm_("N", "N", &jlen, &i2, &j2, &c_b8, &h__[jrow + ( + incol + 1) * h_dim1], ldh, &u[u_offset], ldu, & + c_b8, &wv[wv_offset], ldwv); + +/* ==== Copy left of H to right of scratch ==== */ + + _starpu_dlacpy_("ALL", &jlen, &j2, &h__[jrow + (incol + 1) * + h_dim1], ldh, &wv[(i2 + 1) * wv_dim1 + 1], ldwv); + +/* ==== Multiply by U21 ==== */ + + i__5 = i4 - i2; + _starpu_dtrmm_("R", "L", "N", "N", &jlen, &i__5, &c_b8, &u[(i2 + + 1) * u_dim1 + 1], ldu, &wv[(i2 + 1) * wv_dim1 + 1] +, ldwv); + +/* ==== Multiply by U22 ==== */ + + i__5 = i4 - i2; + i__7 = j4 - j2; + _starpu_dgemm_("N", "N", &jlen, &i__5, &i__7, &c_b8, &h__[jrow + ( + incol + 1 + j2) * h_dim1], ldh, &u[j2 + 1 + (i2 + + 1) * u_dim1], ldu, &c_b8, &wv[(i2 + 1) * wv_dim1 + + 1], ldwv); + +/* ==== Copy it back ==== */ + + _starpu_dlacpy_("ALL", &jlen, &kdu, &wv[wv_offset], ldwv, &h__[ + jrow + (incol + 1) * h_dim1], ldh); +/* L200: */ + } + +/* ==== Multiply Z (also vertical) ==== */ + + if (*wantz) { + i__4 = *ihiz; + i__3 = *nv; + for (jrow = *iloz; i__3 < 0 ? jrow >= i__4 : jrow <= i__4; + jrow += i__3) { +/* Computing MIN */ + i__5 = *nv, i__7 = *ihiz - jrow + 1; + jlen = min(i__5,i__7); + +/* ==== Copy right of Z to left of scratch (first */ +/* . KZS columns get multiplied by zero) ==== */ + + _starpu_dlacpy_("ALL", &jlen, &knz, &z__[jrow + (incol + 1 + + j2) * z_dim1], ldz, &wv[(kzs + 1) * wv_dim1 + + 1], ldwv); + +/* ==== Multiply by U12 ==== */ + + _starpu_dlaset_("ALL", &jlen, &kzs, &c_b7, &c_b7, &wv[ + wv_offset], ldwv); + _starpu_dtrmm_("R", "U", "N", "N", &jlen, &knz, &c_b8, &u[j2 + + 1 + (kzs + 1) * u_dim1], ldu, &wv[(kzs + 1) + * wv_dim1 + 1], ldwv); + +/* ==== Multiply by U11 ==== */ + + _starpu_dgemm_("N", "N", &jlen, &i2, &j2, &c_b8, &z__[jrow + ( + incol + 1) * z_dim1], ldz, &u[u_offset], ldu, + &c_b8, &wv[wv_offset], ldwv); + +/* ==== Copy left of Z to right of scratch ==== */ + + _starpu_dlacpy_("ALL", &jlen, &j2, &z__[jrow + (incol + 1) * + z_dim1], ldz, &wv[(i2 + 1) * wv_dim1 + 1], + ldwv); + +/* ==== Multiply by U21 ==== */ + + i__5 = i4 - i2; + _starpu_dtrmm_("R", "L", "N", "N", &jlen, &i__5, &c_b8, &u[( + i2 + 1) * u_dim1 + 1], ldu, &wv[(i2 + 1) * + wv_dim1 + 1], ldwv); + +/* ==== Multiply by U22 ==== */ + + i__5 = i4 - i2; + i__7 = j4 - j2; + _starpu_dgemm_("N", "N", &jlen, &i__5, &i__7, &c_b8, &z__[ + jrow + (incol + 1 + j2) * z_dim1], ldz, &u[j2 + + 1 + (i2 + 1) * u_dim1], ldu, &c_b8, &wv[(i2 + + 1) * wv_dim1 + 1], ldwv); + +/* ==== Copy the result back to Z ==== */ + + _starpu_dlacpy_("ALL", &jlen, &kdu, &wv[wv_offset], ldwv, & + z__[jrow + (incol + 1) * z_dim1], ldz); +/* L210: */ + } + } + } + } +/* L220: */ + } + +/* ==== End of DLAQR5 ==== */ + + return 0; +} /* _starpu_dlaqr5_ */ diff --git a/min-dgels/base/SRC/dlaqsb.c b/min-dgels/base/SRC/dlaqsb.c new file mode 100644 index 0000000..d19d57d --- /dev/null +++ b/min-dgels/base/SRC/dlaqsb.c @@ -0,0 +1,185 @@ +/* dlaqsb.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dlaqsb_(char *uplo, integer *n, integer *kd, doublereal * + ab, integer *ldab, doublereal *s, doublereal *scond, doublereal *amax, + char *equed) +{ + /* System generated locals */ + integer ab_dim1, ab_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + integer i__, j; + doublereal cj, large; + extern logical _starpu_lsame_(char *, char *); + doublereal small; + extern doublereal _starpu_dlamch_(char *); + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLAQSB equilibrates a symmetric band matrix A using the scaling */ +/* factors in the vector S. */ + +/* Arguments */ +/* ========= */ + +/* UPLO (input) CHARACTER*1 */ +/* Specifies whether the upper or lower triangular part of the */ +/* symmetric matrix A is stored. */ +/* = 'U': Upper triangular */ +/* = 'L': Lower triangular */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* KD (input) INTEGER */ +/* The number of super-diagonals of the matrix A if UPLO = 'U', */ +/* or the number of sub-diagonals if UPLO = 'L'. KD >= 0. */ + +/* AB (input/output) DOUBLE PRECISION array, dimension (LDAB,N) */ +/* On entry, the upper or lower triangle of the symmetric band */ +/* matrix A, stored in the first KD+1 rows of the array. The */ +/* j-th column of A is stored in the j-th column of the array AB */ +/* as follows: */ +/* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; */ +/* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). */ + +/* On exit, if INFO = 0, the triangular factor U or L from the */ +/* Cholesky factorization A = U'*U or A = L*L' of the band */ +/* matrix A, in the same storage format as A. */ + +/* LDAB (input) INTEGER */ +/* The leading dimension of the array AB. LDAB >= KD+1. */ + +/* S (input) DOUBLE PRECISION array, dimension (N) */ +/* The scale factors for A. */ + +/* SCOND (input) DOUBLE PRECISION */ +/* Ratio of the smallest S(i) to the largest S(i). */ + +/* AMAX (input) DOUBLE PRECISION */ +/* Absolute value of largest matrix entry. */ + +/* EQUED (output) CHARACTER*1 */ +/* Specifies whether or not equilibration was done. */ +/* = 'N': No equilibration. */ +/* = 'Y': Equilibration was done, i.e., A has been replaced by */ +/* diag(S) * A * diag(S). */ + +/* Internal Parameters */ +/* =================== */ + +/* THRESH is a threshold value used to decide if scaling should be done */ +/* based on the ratio of the scaling factors. If SCOND < THRESH, */ +/* scaling is done. */ + +/* LARGE and SMALL are threshold values used to decide if scaling should */ +/* be done based on the absolute size of the largest matrix element. */ +/* If AMAX > LARGE or AMAX < SMALL, scaling is done. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Quick return if possible */ + + /* Parameter adjustments */ + ab_dim1 = *ldab; + ab_offset = 1 + ab_dim1; + ab -= ab_offset; + --s; + + /* Function Body */ + if (*n <= 0) { + *(unsigned char *)equed = 'N'; + return 0; + } + +/* Initialize LARGE and SMALL. */ + + small = _starpu_dlamch_("Safe minimum") / _starpu_dlamch_("Precision"); + large = 1. / small; + + if (*scond >= .1 && *amax >= small && *amax <= large) { + +/* No equilibration */ + + *(unsigned char *)equed = 'N'; + } else { + +/* Replace A by diag(S) * A * diag(S). */ + + if (_starpu_lsame_(uplo, "U")) { + +/* Upper triangle of A is stored in band format. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + cj = s[j]; +/* Computing MAX */ + i__2 = 1, i__3 = j - *kd; + i__4 = j; + for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) { + ab[*kd + 1 + i__ - j + j * ab_dim1] = cj * s[i__] * ab[* + kd + 1 + i__ - j + j * ab_dim1]; +/* L10: */ + } +/* L20: */ + } + } else { + +/* Lower triangle of A is stored. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + cj = s[j]; +/* Computing MIN */ + i__2 = *n, i__3 = j + *kd; + i__4 = min(i__2,i__3); + for (i__ = j; i__ <= i__4; ++i__) { + ab[i__ + 1 - j + j * ab_dim1] = cj * s[i__] * ab[i__ + 1 + - j + j * ab_dim1]; +/* L30: */ + } +/* L40: */ + } + } + *(unsigned char *)equed = 'Y'; + } + + return 0; + +/* End of DLAQSB */ + +} /* _starpu_dlaqsb_ */ diff --git a/min-dgels/base/SRC/dlaqsp.c b/min-dgels/base/SRC/dlaqsp.c new file mode 100644 index 0000000..6b5878c --- /dev/null +++ b/min-dgels/base/SRC/dlaqsp.c @@ -0,0 +1,169 @@ +/* dlaqsp.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dlaqsp_(char *uplo, integer *n, doublereal *ap, + doublereal *s, doublereal *scond, doublereal *amax, char *equed) +{ + /* System generated locals */ + integer i__1, i__2; + + /* Local variables */ + integer i__, j, jc; + doublereal cj, large; + extern logical _starpu_lsame_(char *, char *); + doublereal small; + extern doublereal _starpu_dlamch_(char *); + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLAQSP equilibrates a symmetric matrix A using the scaling factors */ +/* in the vector S. */ + +/* Arguments */ +/* ========= */ + +/* UPLO (input) CHARACTER*1 */ +/* Specifies whether the upper or lower triangular part of the */ +/* symmetric matrix A is stored. */ +/* = 'U': Upper triangular */ +/* = 'L': Lower triangular */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* AP (input/output) DOUBLE PRECISION array, dimension (N*(N+1)/2) */ +/* On entry, the upper or lower triangle of the symmetric matrix */ +/* A, packed columnwise in a linear array. The j-th column of A */ +/* is stored in the array AP as follows: */ +/* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; */ +/* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. */ + +/* On exit, the equilibrated matrix: diag(S) * A * diag(S), in */ +/* the same storage format as A. */ + +/* S (input) DOUBLE PRECISION array, dimension (N) */ +/* The scale factors for A. */ + +/* SCOND (input) DOUBLE PRECISION */ +/* Ratio of the smallest S(i) to the largest S(i). */ + +/* AMAX (input) DOUBLE PRECISION */ +/* Absolute value of largest matrix entry. */ + +/* EQUED (output) CHARACTER*1 */ +/* Specifies whether or not equilibration was done. */ +/* = 'N': No equilibration. */ +/* = 'Y': Equilibration was done, i.e., A has been replaced by */ +/* diag(S) * A * diag(S). */ + +/* Internal Parameters */ +/* =================== */ + +/* THRESH is a threshold value used to decide if scaling should be done */ +/* based on the ratio of the scaling factors. If SCOND < THRESH, */ +/* scaling is done. */ + +/* LARGE and SMALL are threshold values used to decide if scaling should */ +/* be done based on the absolute size of the largest matrix element. */ +/* If AMAX > LARGE or AMAX < SMALL, scaling is done. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Quick return if possible */ + + /* Parameter adjustments */ + --s; + --ap; + + /* Function Body */ + if (*n <= 0) { + *(unsigned char *)equed = 'N'; + return 0; + } + +/* Initialize LARGE and SMALL. */ + + small = _starpu_dlamch_("Safe minimum") / _starpu_dlamch_("Precision"); + large = 1. / small; + + if (*scond >= .1 && *amax >= small && *amax <= large) { + +/* No equilibration */ + + *(unsigned char *)equed = 'N'; + } else { + +/* Replace A by diag(S) * A * diag(S). */ + + if (_starpu_lsame_(uplo, "U")) { + +/* Upper triangle of A is stored. */ + + jc = 1; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + cj = s[j]; + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + ap[jc + i__ - 1] = cj * s[i__] * ap[jc + i__ - 1]; +/* L10: */ + } + jc += j; +/* L20: */ + } + } else { + +/* Lower triangle of A is stored. */ + + jc = 1; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + cj = s[j]; + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + ap[jc + i__ - j] = cj * s[i__] * ap[jc + i__ - j]; +/* L30: */ + } + jc = jc + *n - j + 1; +/* L40: */ + } + } + *(unsigned char *)equed = 'Y'; + } + + return 0; + +/* End of DLAQSP */ + +} /* _starpu_dlaqsp_ */ diff --git a/min-dgels/base/SRC/dlaqsy.c b/min-dgels/base/SRC/dlaqsy.c new file mode 100644 index 0000000..8be3903 --- /dev/null +++ b/min-dgels/base/SRC/dlaqsy.c @@ -0,0 +1,172 @@ +/* dlaqsy.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dlaqsy_(char *uplo, integer *n, doublereal *a, integer * + lda, doublereal *s, doublereal *scond, doublereal *amax, char *equed) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + + /* Local variables */ + integer i__, j; + doublereal cj, large; + extern logical _starpu_lsame_(char *, char *); + doublereal small; + extern doublereal _starpu_dlamch_(char *); + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLAQSY equilibrates a symmetric matrix A using the scaling factors */ +/* in the vector S. */ + +/* Arguments */ +/* ========= */ + +/* UPLO (input) CHARACTER*1 */ +/* Specifies whether the upper or lower triangular part of the */ +/* symmetric matrix A is stored. */ +/* = 'U': Upper triangular */ +/* = 'L': Lower triangular */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the symmetric matrix A. If UPLO = 'U', the leading */ +/* n by n upper triangular part of A contains the upper */ +/* triangular part of the matrix A, and the strictly lower */ +/* triangular part of A is not referenced. If UPLO = 'L', the */ +/* leading n by n lower triangular part of A contains the lower */ +/* triangular part of the matrix A, and the strictly upper */ +/* triangular part of A is not referenced. */ + +/* On exit, if EQUED = 'Y', the equilibrated matrix: */ +/* diag(S) * A * diag(S). */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(N,1). */ + +/* S (input) DOUBLE PRECISION array, dimension (N) */ +/* The scale factors for A. */ + +/* SCOND (input) DOUBLE PRECISION */ +/* Ratio of the smallest S(i) to the largest S(i). */ + +/* AMAX (input) DOUBLE PRECISION */ +/* Absolute value of largest matrix entry. */ + +/* EQUED (output) CHARACTER*1 */ +/* Specifies whether or not equilibration was done. */ +/* = 'N': No equilibration. */ +/* = 'Y': Equilibration was done, i.e., A has been replaced by */ +/* diag(S) * A * diag(S). */ + +/* Internal Parameters */ +/* =================== */ + +/* THRESH is a threshold value used to decide if scaling should be done */ +/* based on the ratio of the scaling factors. If SCOND < THRESH, */ +/* scaling is done. */ + +/* LARGE and SMALL are threshold values used to decide if scaling should */ +/* be done based on the absolute size of the largest matrix element. */ +/* If AMAX > LARGE or AMAX < SMALL, scaling is done. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Quick return if possible */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --s; + + /* Function Body */ + if (*n <= 0) { + *(unsigned char *)equed = 'N'; + return 0; + } + +/* Initialize LARGE and SMALL. */ + + small = _starpu_dlamch_("Safe minimum") / _starpu_dlamch_("Precision"); + large = 1. / small; + + if (*scond >= .1 && *amax >= small && *amax <= large) { + +/* No equilibration */ + + *(unsigned char *)equed = 'N'; + } else { + +/* Replace A by diag(S) * A * diag(S). */ + + if (_starpu_lsame_(uplo, "U")) { + +/* Upper triangle of A is stored. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + cj = s[j]; + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = cj * s[i__] * a[i__ + j * a_dim1]; +/* L10: */ + } +/* L20: */ + } + } else { + +/* Lower triangle of A is stored. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + cj = s[j]; + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = cj * s[i__] * a[i__ + j * a_dim1]; +/* L30: */ + } +/* L40: */ + } + } + *(unsigned char *)equed = 'Y'; + } + + return 0; + +/* End of DLAQSY */ + +} /* _starpu_dlaqsy_ */ diff --git a/min-dgels/base/SRC/dlaqtr.c b/min-dgels/base/SRC/dlaqtr.c new file mode 100644 index 0000000..fd62276 --- /dev/null +++ b/min-dgels/base/SRC/dlaqtr.c @@ -0,0 +1,832 @@ +/* dlaqtr.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static logical c_false = FALSE_; +static integer c__2 = 2; +static doublereal c_b21 = 1.; +static doublereal c_b25 = 0.; +static logical c_true = TRUE_; + +/* Subroutine */ int _starpu_dlaqtr_(logical *ltran, logical *lreal, integer *n, + doublereal *t, integer *ldt, doublereal *b, doublereal *w, doublereal + *scale, doublereal *x, doublereal *work, integer *info) +{ + /* System generated locals */ + integer t_dim1, t_offset, i__1, i__2; + doublereal d__1, d__2, d__3, d__4, d__5, d__6; + + /* Local variables */ + doublereal d__[4] /* was [2][2] */; + integer i__, j, k; + doublereal v[4] /* was [2][2] */, z__; + integer j1, j2, n1, n2; + doublereal si, xj, sr, rec, eps, tjj, tmp; + extern doublereal _starpu_ddot_(integer *, doublereal *, integer *, doublereal *, + integer *); + integer ierr; + doublereal smin, xmax; + extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, + integer *); + extern doublereal _starpu_dasum_(integer *, doublereal *, integer *); + extern /* Subroutine */ int _starpu_daxpy_(integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *); + integer jnext; + doublereal sminw, xnorm; + extern /* Subroutine */ int _starpu_dlaln2_(logical *, integer *, integer *, + doublereal *, doublereal *, doublereal *, integer *, doublereal *, + doublereal *, doublereal *, integer *, doublereal *, doublereal * +, doublereal *, integer *, doublereal *, doublereal *, integer *); + extern doublereal _starpu_dlamch_(char *), _starpu_dlange_(char *, integer *, + integer *, doublereal *, integer *, doublereal *); + extern integer _starpu_idamax_(integer *, doublereal *, integer *); + doublereal scaloc; + extern /* Subroutine */ int _starpu_dladiv_(doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *); + doublereal bignum; + logical notran; + doublereal smlnum; + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLAQTR solves the real quasi-triangular system */ + +/* op(T)*p = scale*c, if LREAL = .TRUE. */ + +/* or the complex quasi-triangular systems */ + +/* op(T + iB)*(p+iq) = scale*(c+id), if LREAL = .FALSE. */ + +/* in real arithmetic, where T is upper quasi-triangular. */ +/* If LREAL = .FALSE., then the first diagonal block of T must be */ +/* 1 by 1, B is the specially structured matrix */ + +/* B = [ b(1) b(2) ... b(n) ] */ +/* [ w ] */ +/* [ w ] */ +/* [ . ] */ +/* [ w ] */ + +/* op(A) = A or A', A' denotes the conjugate transpose of */ +/* matrix A. */ + +/* On input, X = [ c ]. On output, X = [ p ]. */ +/* [ d ] [ q ] */ + +/* This subroutine is designed for the condition number estimation */ +/* in routine DTRSNA. */ + +/* Arguments */ +/* ========= */ + +/* LTRAN (input) LOGICAL */ +/* On entry, LTRAN specifies the option of conjugate transpose: */ +/* = .FALSE., op(T+i*B) = T+i*B, */ +/* = .TRUE., op(T+i*B) = (T+i*B)'. */ + +/* LREAL (input) LOGICAL */ +/* On entry, LREAL specifies the input matrix structure: */ +/* = .FALSE., the input is complex */ +/* = .TRUE., the input is real */ + +/* N (input) INTEGER */ +/* On entry, N specifies the order of T+i*B. N >= 0. */ + +/* T (input) DOUBLE PRECISION array, dimension (LDT,N) */ +/* On entry, T contains a matrix in Schur canonical form. */ +/* If LREAL = .FALSE., then the first diagonal block of T mu */ +/* be 1 by 1. */ + +/* LDT (input) INTEGER */ +/* The leading dimension of the matrix T. LDT >= max(1,N). */ + +/* B (input) DOUBLE PRECISION array, dimension (N) */ +/* On entry, B contains the elements to form the matrix */ +/* B as described above. */ +/* If LREAL = .TRUE., B is not referenced. */ + +/* W (input) DOUBLE PRECISION */ +/* On entry, W is the diagonal element of the matrix B. */ +/* If LREAL = .TRUE., W is not referenced. */ + +/* SCALE (output) DOUBLE PRECISION */ +/* On exit, SCALE is the scale factor. */ + +/* X (input/output) DOUBLE PRECISION array, dimension (2*N) */ +/* On entry, X contains the right hand side of the system. */ +/* On exit, X is overwritten by the solution. */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (N) */ + +/* INFO (output) INTEGER */ +/* On exit, INFO is set to */ +/* 0: successful exit. */ +/* 1: the some diagonal 1 by 1 block has been perturbed by */ +/* a small number SMIN to keep nonsingularity. */ +/* 2: the some diagonal 2 by 2 block has been perturbed by */ +/* a small number in DLALN2 to keep nonsingularity. */ +/* NOTE: In the interests of speed, this routine does not */ +/* check the inputs for errors. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Do not test the input parameters for errors */ + + /* Parameter adjustments */ + t_dim1 = *ldt; + t_offset = 1 + t_dim1; + t -= t_offset; + --b; + --x; + --work; + + /* Function Body */ + notran = ! (*ltran); + *info = 0; + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Set constants to control overflow */ + + eps = _starpu_dlamch_("P"); + smlnum = _starpu_dlamch_("S") / eps; + bignum = 1. / smlnum; + + xnorm = _starpu_dlange_("M", n, n, &t[t_offset], ldt, d__); + if (! (*lreal)) { +/* Computing MAX */ + d__1 = xnorm, d__2 = abs(*w), d__1 = max(d__1,d__2), d__2 = _starpu_dlange_( + "M", n, &c__1, &b[1], n, d__); + xnorm = max(d__1,d__2); + } +/* Computing MAX */ + d__1 = smlnum, d__2 = eps * xnorm; + smin = max(d__1,d__2); + +/* Compute 1-norm of each column of strictly upper triangular */ +/* part of T to control overflow in triangular solver. */ + + work[1] = 0.; + i__1 = *n; + for (j = 2; j <= i__1; ++j) { + i__2 = j - 1; + work[j] = _starpu_dasum_(&i__2, &t[j * t_dim1 + 1], &c__1); +/* L10: */ + } + + if (! (*lreal)) { + i__1 = *n; + for (i__ = 2; i__ <= i__1; ++i__) { + work[i__] += (d__1 = b[i__], abs(d__1)); +/* L20: */ + } + } + + n2 = *n << 1; + n1 = *n; + if (! (*lreal)) { + n1 = n2; + } + k = _starpu_idamax_(&n1, &x[1], &c__1); + xmax = (d__1 = x[k], abs(d__1)); + *scale = 1.; + + if (xmax > bignum) { + *scale = bignum / xmax; + _starpu_dscal_(&n1, scale, &x[1], &c__1); + xmax = bignum; + } + + if (*lreal) { + + if (notran) { + +/* Solve T*p = scale*c */ + + jnext = *n; + for (j = *n; j >= 1; --j) { + if (j > jnext) { + goto L30; + } + j1 = j; + j2 = j; + jnext = j - 1; + if (j > 1) { + if (t[j + (j - 1) * t_dim1] != 0.) { + j1 = j - 1; + jnext = j - 2; + } + } + + if (j1 == j2) { + +/* Meet 1 by 1 diagonal block */ + +/* Scale to avoid overflow when computing */ +/* x(j) = b(j)/T(j,j) */ + + xj = (d__1 = x[j1], abs(d__1)); + tjj = (d__1 = t[j1 + j1 * t_dim1], abs(d__1)); + tmp = t[j1 + j1 * t_dim1]; + if (tjj < smin) { + tmp = smin; + tjj = smin; + *info = 1; + } + + if (xj == 0.) { + goto L30; + } + + if (tjj < 1.) { + if (xj > bignum * tjj) { + rec = 1. / xj; + _starpu_dscal_(n, &rec, &x[1], &c__1); + *scale *= rec; + xmax *= rec; + } + } + x[j1] /= tmp; + xj = (d__1 = x[j1], abs(d__1)); + +/* Scale x if necessary to avoid overflow when adding a */ +/* multiple of column j1 of T. */ + + if (xj > 1.) { + rec = 1. / xj; + if (work[j1] > (bignum - xmax) * rec) { + _starpu_dscal_(n, &rec, &x[1], &c__1); + *scale *= rec; + } + } + if (j1 > 1) { + i__1 = j1 - 1; + d__1 = -x[j1]; + _starpu_daxpy_(&i__1, &d__1, &t[j1 * t_dim1 + 1], &c__1, &x[1] +, &c__1); + i__1 = j1 - 1; + k = _starpu_idamax_(&i__1, &x[1], &c__1); + xmax = (d__1 = x[k], abs(d__1)); + } + + } else { + +/* Meet 2 by 2 diagonal block */ + +/* Call 2 by 2 linear system solve, to take */ +/* care of possible overflow by scaling factor. */ + + d__[0] = x[j1]; + d__[1] = x[j2]; + _starpu_dlaln2_(&c_false, &c__2, &c__1, &smin, &c_b21, &t[j1 + j1 + * t_dim1], ldt, &c_b21, &c_b21, d__, &c__2, & + c_b25, &c_b25, v, &c__2, &scaloc, &xnorm, &ierr); + if (ierr != 0) { + *info = 2; + } + + if (scaloc != 1.) { + _starpu_dscal_(n, &scaloc, &x[1], &c__1); + *scale *= scaloc; + } + x[j1] = v[0]; + x[j2] = v[1]; + +/* Scale V(1,1) (= X(J1)) and/or V(2,1) (=X(J2)) */ +/* to avoid overflow in updating right-hand side. */ + +/* Computing MAX */ + d__1 = abs(v[0]), d__2 = abs(v[1]); + xj = max(d__1,d__2); + if (xj > 1.) { + rec = 1. / xj; +/* Computing MAX */ + d__1 = work[j1], d__2 = work[j2]; + if (max(d__1,d__2) > (bignum - xmax) * rec) { + _starpu_dscal_(n, &rec, &x[1], &c__1); + *scale *= rec; + } + } + +/* Update right-hand side */ + + if (j1 > 1) { + i__1 = j1 - 1; + d__1 = -x[j1]; + _starpu_daxpy_(&i__1, &d__1, &t[j1 * t_dim1 + 1], &c__1, &x[1] +, &c__1); + i__1 = j1 - 1; + d__1 = -x[j2]; + _starpu_daxpy_(&i__1, &d__1, &t[j2 * t_dim1 + 1], &c__1, &x[1] +, &c__1); + i__1 = j1 - 1; + k = _starpu_idamax_(&i__1, &x[1], &c__1); + xmax = (d__1 = x[k], abs(d__1)); + } + + } + +L30: + ; + } + + } else { + +/* Solve T'*p = scale*c */ + + jnext = 1; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (j < jnext) { + goto L40; + } + j1 = j; + j2 = j; + jnext = j + 1; + if (j < *n) { + if (t[j + 1 + j * t_dim1] != 0.) { + j2 = j + 1; + jnext = j + 2; + } + } + + if (j1 == j2) { + +/* 1 by 1 diagonal block */ + +/* Scale if necessary to avoid overflow in forming the */ +/* right-hand side element by inner product. */ + + xj = (d__1 = x[j1], abs(d__1)); + if (xmax > 1.) { + rec = 1. / xmax; + if (work[j1] > (bignum - xj) * rec) { + _starpu_dscal_(n, &rec, &x[1], &c__1); + *scale *= rec; + xmax *= rec; + } + } + + i__2 = j1 - 1; + x[j1] -= _starpu_ddot_(&i__2, &t[j1 * t_dim1 + 1], &c__1, &x[1], & + c__1); + + xj = (d__1 = x[j1], abs(d__1)); + tjj = (d__1 = t[j1 + j1 * t_dim1], abs(d__1)); + tmp = t[j1 + j1 * t_dim1]; + if (tjj < smin) { + tmp = smin; + tjj = smin; + *info = 1; + } + + if (tjj < 1.) { + if (xj > bignum * tjj) { + rec = 1. / xj; + _starpu_dscal_(n, &rec, &x[1], &c__1); + *scale *= rec; + xmax *= rec; + } + } + x[j1] /= tmp; +/* Computing MAX */ + d__2 = xmax, d__3 = (d__1 = x[j1], abs(d__1)); + xmax = max(d__2,d__3); + + } else { + +/* 2 by 2 diagonal block */ + +/* Scale if necessary to avoid overflow in forming the */ +/* right-hand side elements by inner product. */ + +/* Computing MAX */ + d__3 = (d__1 = x[j1], abs(d__1)), d__4 = (d__2 = x[j2], + abs(d__2)); + xj = max(d__3,d__4); + if (xmax > 1.) { + rec = 1. / xmax; +/* Computing MAX */ + d__1 = work[j2], d__2 = work[j1]; + if (max(d__1,d__2) > (bignum - xj) * rec) { + _starpu_dscal_(n, &rec, &x[1], &c__1); + *scale *= rec; + xmax *= rec; + } + } + + i__2 = j1 - 1; + d__[0] = x[j1] - _starpu_ddot_(&i__2, &t[j1 * t_dim1 + 1], &c__1, + &x[1], &c__1); + i__2 = j1 - 1; + d__[1] = x[j2] - _starpu_ddot_(&i__2, &t[j2 * t_dim1 + 1], &c__1, + &x[1], &c__1); + + _starpu_dlaln2_(&c_true, &c__2, &c__1, &smin, &c_b21, &t[j1 + j1 * + t_dim1], ldt, &c_b21, &c_b21, d__, &c__2, &c_b25, + &c_b25, v, &c__2, &scaloc, &xnorm, &ierr); + if (ierr != 0) { + *info = 2; + } + + if (scaloc != 1.) { + _starpu_dscal_(n, &scaloc, &x[1], &c__1); + *scale *= scaloc; + } + x[j1] = v[0]; + x[j2] = v[1]; +/* Computing MAX */ + d__3 = (d__1 = x[j1], abs(d__1)), d__4 = (d__2 = x[j2], + abs(d__2)), d__3 = max(d__3,d__4); + xmax = max(d__3,xmax); + + } +L40: + ; + } + } + + } else { + +/* Computing MAX */ + d__1 = eps * abs(*w); + sminw = max(d__1,smin); + if (notran) { + +/* Solve (T + iB)*(p+iq) = c+id */ + + jnext = *n; + for (j = *n; j >= 1; --j) { + if (j > jnext) { + goto L70; + } + j1 = j; + j2 = j; + jnext = j - 1; + if (j > 1) { + if (t[j + (j - 1) * t_dim1] != 0.) { + j1 = j - 1; + jnext = j - 2; + } + } + + if (j1 == j2) { + +/* 1 by 1 diagonal block */ + +/* Scale if necessary to avoid overflow in division */ + + z__ = *w; + if (j1 == 1) { + z__ = b[1]; + } + xj = (d__1 = x[j1], abs(d__1)) + (d__2 = x[*n + j1], abs( + d__2)); + tjj = (d__1 = t[j1 + j1 * t_dim1], abs(d__1)) + abs(z__); + tmp = t[j1 + j1 * t_dim1]; + if (tjj < sminw) { + tmp = sminw; + tjj = sminw; + *info = 1; + } + + if (xj == 0.) { + goto L70; + } + + if (tjj < 1.) { + if (xj > bignum * tjj) { + rec = 1. / xj; + _starpu_dscal_(&n2, &rec, &x[1], &c__1); + *scale *= rec; + xmax *= rec; + } + } + _starpu_dladiv_(&x[j1], &x[*n + j1], &tmp, &z__, &sr, &si); + x[j1] = sr; + x[*n + j1] = si; + xj = (d__1 = x[j1], abs(d__1)) + (d__2 = x[*n + j1], abs( + d__2)); + +/* Scale x if necessary to avoid overflow when adding a */ +/* multiple of column j1 of T. */ + + if (xj > 1.) { + rec = 1. / xj; + if (work[j1] > (bignum - xmax) * rec) { + _starpu_dscal_(&n2, &rec, &x[1], &c__1); + *scale *= rec; + } + } + + if (j1 > 1) { + i__1 = j1 - 1; + d__1 = -x[j1]; + _starpu_daxpy_(&i__1, &d__1, &t[j1 * t_dim1 + 1], &c__1, &x[1] +, &c__1); + i__1 = j1 - 1; + d__1 = -x[*n + j1]; + _starpu_daxpy_(&i__1, &d__1, &t[j1 * t_dim1 + 1], &c__1, &x[* + n + 1], &c__1); + + x[1] += b[j1] * x[*n + j1]; + x[*n + 1] -= b[j1] * x[j1]; + + xmax = 0.; + i__1 = j1 - 1; + for (k = 1; k <= i__1; ++k) { +/* Computing MAX */ + d__3 = xmax, d__4 = (d__1 = x[k], abs(d__1)) + ( + d__2 = x[k + *n], abs(d__2)); + xmax = max(d__3,d__4); +/* L50: */ + } + } + + } else { + +/* Meet 2 by 2 diagonal block */ + + d__[0] = x[j1]; + d__[1] = x[j2]; + d__[2] = x[*n + j1]; + d__[3] = x[*n + j2]; + d__1 = -(*w); + _starpu_dlaln2_(&c_false, &c__2, &c__2, &sminw, &c_b21, &t[j1 + + j1 * t_dim1], ldt, &c_b21, &c_b21, d__, &c__2, & + c_b25, &d__1, v, &c__2, &scaloc, &xnorm, &ierr); + if (ierr != 0) { + *info = 2; + } + + if (scaloc != 1.) { + i__1 = *n << 1; + _starpu_dscal_(&i__1, &scaloc, &x[1], &c__1); + *scale = scaloc * *scale; + } + x[j1] = v[0]; + x[j2] = v[1]; + x[*n + j1] = v[2]; + x[*n + j2] = v[3]; + +/* Scale X(J1), .... to avoid overflow in */ +/* updating right hand side. */ + +/* Computing MAX */ + d__1 = abs(v[0]) + abs(v[2]), d__2 = abs(v[1]) + abs(v[3]) + ; + xj = max(d__1,d__2); + if (xj > 1.) { + rec = 1. / xj; +/* Computing MAX */ + d__1 = work[j1], d__2 = work[j2]; + if (max(d__1,d__2) > (bignum - xmax) * rec) { + _starpu_dscal_(&n2, &rec, &x[1], &c__1); + *scale *= rec; + } + } + +/* Update the right-hand side. */ + + if (j1 > 1) { + i__1 = j1 - 1; + d__1 = -x[j1]; + _starpu_daxpy_(&i__1, &d__1, &t[j1 * t_dim1 + 1], &c__1, &x[1] +, &c__1); + i__1 = j1 - 1; + d__1 = -x[j2]; + _starpu_daxpy_(&i__1, &d__1, &t[j2 * t_dim1 + 1], &c__1, &x[1] +, &c__1); + + i__1 = j1 - 1; + d__1 = -x[*n + j1]; + _starpu_daxpy_(&i__1, &d__1, &t[j1 * t_dim1 + 1], &c__1, &x[* + n + 1], &c__1); + i__1 = j1 - 1; + d__1 = -x[*n + j2]; + _starpu_daxpy_(&i__1, &d__1, &t[j2 * t_dim1 + 1], &c__1, &x[* + n + 1], &c__1); + + x[1] = x[1] + b[j1] * x[*n + j1] + b[j2] * x[*n + j2]; + x[*n + 1] = x[*n + 1] - b[j1] * x[j1] - b[j2] * x[j2]; + + xmax = 0.; + i__1 = j1 - 1; + for (k = 1; k <= i__1; ++k) { +/* Computing MAX */ + d__3 = (d__1 = x[k], abs(d__1)) + (d__2 = x[k + * + n], abs(d__2)); + xmax = max(d__3,xmax); +/* L60: */ + } + } + + } +L70: + ; + } + + } else { + +/* Solve (T + iB)'*(p+iq) = c+id */ + + jnext = 1; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (j < jnext) { + goto L80; + } + j1 = j; + j2 = j; + jnext = j + 1; + if (j < *n) { + if (t[j + 1 + j * t_dim1] != 0.) { + j2 = j + 1; + jnext = j + 2; + } + } + + if (j1 == j2) { + +/* 1 by 1 diagonal block */ + +/* Scale if necessary to avoid overflow in forming the */ +/* right-hand side element by inner product. */ + + xj = (d__1 = x[j1], abs(d__1)) + (d__2 = x[j1 + *n], abs( + d__2)); + if (xmax > 1.) { + rec = 1. / xmax; + if (work[j1] > (bignum - xj) * rec) { + _starpu_dscal_(&n2, &rec, &x[1], &c__1); + *scale *= rec; + xmax *= rec; + } + } + + i__2 = j1 - 1; + x[j1] -= _starpu_ddot_(&i__2, &t[j1 * t_dim1 + 1], &c__1, &x[1], & + c__1); + i__2 = j1 - 1; + x[*n + j1] -= _starpu_ddot_(&i__2, &t[j1 * t_dim1 + 1], &c__1, &x[ + *n + 1], &c__1); + if (j1 > 1) { + x[j1] -= b[j1] * x[*n + 1]; + x[*n + j1] += b[j1] * x[1]; + } + xj = (d__1 = x[j1], abs(d__1)) + (d__2 = x[j1 + *n], abs( + d__2)); + + z__ = *w; + if (j1 == 1) { + z__ = b[1]; + } + +/* Scale if necessary to avoid overflow in */ +/* complex division */ + + tjj = (d__1 = t[j1 + j1 * t_dim1], abs(d__1)) + abs(z__); + tmp = t[j1 + j1 * t_dim1]; + if (tjj < sminw) { + tmp = sminw; + tjj = sminw; + *info = 1; + } + + if (tjj < 1.) { + if (xj > bignum * tjj) { + rec = 1. / xj; + _starpu_dscal_(&n2, &rec, &x[1], &c__1); + *scale *= rec; + xmax *= rec; + } + } + d__1 = -z__; + _starpu_dladiv_(&x[j1], &x[*n + j1], &tmp, &d__1, &sr, &si); + x[j1] = sr; + x[j1 + *n] = si; +/* Computing MAX */ + d__3 = (d__1 = x[j1], abs(d__1)) + (d__2 = x[j1 + *n], + abs(d__2)); + xmax = max(d__3,xmax); + + } else { + +/* 2 by 2 diagonal block */ + +/* Scale if necessary to avoid overflow in forming the */ +/* right-hand side element by inner product. */ + +/* Computing MAX */ + d__5 = (d__1 = x[j1], abs(d__1)) + (d__2 = x[*n + j1], + abs(d__2)), d__6 = (d__3 = x[j2], abs(d__3)) + ( + d__4 = x[*n + j2], abs(d__4)); + xj = max(d__5,d__6); + if (xmax > 1.) { + rec = 1. / xmax; +/* Computing MAX */ + d__1 = work[j1], d__2 = work[j2]; + if (max(d__1,d__2) > (bignum - xj) / xmax) { + _starpu_dscal_(&n2, &rec, &x[1], &c__1); + *scale *= rec; + xmax *= rec; + } + } + + i__2 = j1 - 1; + d__[0] = x[j1] - _starpu_ddot_(&i__2, &t[j1 * t_dim1 + 1], &c__1, + &x[1], &c__1); + i__2 = j1 - 1; + d__[1] = x[j2] - _starpu_ddot_(&i__2, &t[j2 * t_dim1 + 1], &c__1, + &x[1], &c__1); + i__2 = j1 - 1; + d__[2] = x[*n + j1] - _starpu_ddot_(&i__2, &t[j1 * t_dim1 + 1], & + c__1, &x[*n + 1], &c__1); + i__2 = j1 - 1; + d__[3] = x[*n + j2] - _starpu_ddot_(&i__2, &t[j2 * t_dim1 + 1], & + c__1, &x[*n + 1], &c__1); + d__[0] -= b[j1] * x[*n + 1]; + d__[1] -= b[j2] * x[*n + 1]; + d__[2] += b[j1] * x[1]; + d__[3] += b[j2] * x[1]; + + _starpu_dlaln2_(&c_true, &c__2, &c__2, &sminw, &c_b21, &t[j1 + j1 + * t_dim1], ldt, &c_b21, &c_b21, d__, &c__2, & + c_b25, w, v, &c__2, &scaloc, &xnorm, &ierr); + if (ierr != 0) { + *info = 2; + } + + if (scaloc != 1.) { + _starpu_dscal_(&n2, &scaloc, &x[1], &c__1); + *scale = scaloc * *scale; + } + x[j1] = v[0]; + x[j2] = v[1]; + x[*n + j1] = v[2]; + x[*n + j2] = v[3]; +/* Computing MAX */ + d__5 = (d__1 = x[j1], abs(d__1)) + (d__2 = x[*n + j1], + abs(d__2)), d__6 = (d__3 = x[j2], abs(d__3)) + ( + d__4 = x[*n + j2], abs(d__4)), d__5 = max(d__5, + d__6); + xmax = max(d__5,xmax); + + } + +L80: + ; + } + + } + + } + + return 0; + +/* End of DLAQTR */ + +} /* _starpu_dlaqtr_ */ diff --git a/min-dgels/base/SRC/dlar1v.c b/min-dgels/base/SRC/dlar1v.c new file mode 100644 index 0000000..6b1a5a5 --- /dev/null +++ b/min-dgels/base/SRC/dlar1v.c @@ -0,0 +1,441 @@ +/* dlar1v.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dlar1v_(integer *n, integer *b1, integer *bn, doublereal + *lambda, doublereal *d__, doublereal *l, doublereal *ld, doublereal * + lld, doublereal *pivmin, doublereal *gaptol, doublereal *z__, logical + *wantnc, integer *negcnt, doublereal *ztz, doublereal *mingma, + integer *r__, integer *isuppz, doublereal *nrminv, doublereal *resid, + doublereal *rqcorr, doublereal *work) +{ + /* System generated locals */ + integer i__1; + doublereal d__1, d__2, d__3; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + integer i__; + doublereal s; + integer r1, r2; + doublereal eps, tmp; + integer neg1, neg2, indp, inds; + doublereal dplus; + extern doublereal _starpu_dlamch_(char *); + extern logical _starpu_disnan_(doublereal *); + integer indlpl, indumn; + doublereal dminus; + logical sawnan1, sawnan2; + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLAR1V computes the (scaled) r-th column of the inverse of */ +/* the sumbmatrix in rows B1 through BN of the tridiagonal matrix */ +/* L D L^T - sigma I. When sigma is close to an eigenvalue, the */ +/* computed vector is an accurate eigenvector. Usually, r corresponds */ +/* to the index where the eigenvector is largest in magnitude. */ +/* The following steps accomplish this computation : */ +/* (a) Stationary qd transform, L D L^T - sigma I = L(+) D(+) L(+)^T, */ +/* (b) Progressive qd transform, L D L^T - sigma I = U(-) D(-) U(-)^T, */ +/* (c) Computation of the diagonal elements of the inverse of */ +/* L D L^T - sigma I by combining the above transforms, and choosing */ +/* r as the index where the diagonal of the inverse is (one of the) */ +/* largest in magnitude. */ +/* (d) Computation of the (scaled) r-th column of the inverse using the */ +/* twisted factorization obtained by combining the top part of the */ +/* the stationary and the bottom part of the progressive transform. */ + +/* Arguments */ +/* ========= */ + +/* N (input) INTEGER */ +/* The order of the matrix L D L^T. */ + +/* B1 (input) INTEGER */ +/* First index of the submatrix of L D L^T. */ + +/* BN (input) INTEGER */ +/* Last index of the submatrix of L D L^T. */ + +/* LAMBDA (input) DOUBLE PRECISION */ +/* The shift. In order to compute an accurate eigenvector, */ +/* LAMBDA should be a good approximation to an eigenvalue */ +/* of L D L^T. */ + +/* L (input) DOUBLE PRECISION array, dimension (N-1) */ +/* The (n-1) subdiagonal elements of the unit bidiagonal matrix */ +/* L, in elements 1 to N-1. */ + +/* D (input) DOUBLE PRECISION array, dimension (N) */ +/* The n diagonal elements of the diagonal matrix D. */ + +/* LD (input) DOUBLE PRECISION array, dimension (N-1) */ +/* The n-1 elements L(i)*D(i). */ + +/* LLD (input) DOUBLE PRECISION array, dimension (N-1) */ +/* The n-1 elements L(i)*L(i)*D(i). */ + +/* PIVMIN (input) DOUBLE PRECISION */ +/* The minimum pivot in the Sturm sequence. */ + +/* GAPTOL (input) DOUBLE PRECISION */ +/* Tolerance that indicates when eigenvector entries are negligible */ +/* w.r.t. their contribution to the residual. */ + +/* Z (input/output) DOUBLE PRECISION array, dimension (N) */ +/* On input, all entries of Z must be set to 0. */ +/* On output, Z contains the (scaled) r-th column of the */ +/* inverse. The scaling is such that Z(R) equals 1. */ + +/* WANTNC (input) LOGICAL */ +/* Specifies whether NEGCNT has to be computed. */ + +/* NEGCNT (output) INTEGER */ +/* If WANTNC is .TRUE. then NEGCNT = the number of pivots < pivmin */ +/* in the matrix factorization L D L^T, and NEGCNT = -1 otherwise. */ + +/* ZTZ (output) DOUBLE PRECISION */ +/* The square of the 2-norm of Z. */ + +/* MINGMA (output) DOUBLE PRECISION */ +/* The reciprocal of the largest (in magnitude) diagonal */ +/* element of the inverse of L D L^T - sigma I. */ + +/* R (input/output) INTEGER */ +/* The twist index for the twisted factorization used to */ +/* compute Z. */ +/* On input, 0 <= R <= N. If R is input as 0, R is set to */ +/* the index where (L D L^T - sigma I)^{-1} is largest */ +/* in magnitude. If 1 <= R <= N, R is unchanged. */ +/* On output, R contains the twist index used to compute Z. */ +/* Ideally, R designates the position of the maximum entry in the */ +/* eigenvector. */ + +/* ISUPPZ (output) INTEGER array, dimension (2) */ +/* The support of the vector in Z, i.e., the vector Z is */ +/* nonzero only in elements ISUPPZ(1) through ISUPPZ( 2 ). */ + +/* NRMINV (output) DOUBLE PRECISION */ +/* NRMINV = 1/SQRT( ZTZ ) */ + +/* RESID (output) DOUBLE PRECISION */ +/* The residual of the FP vector. */ +/* RESID = ABS( MINGMA )/SQRT( ZTZ ) */ + +/* RQCORR (output) DOUBLE PRECISION */ +/* The Rayleigh Quotient correction to LAMBDA. */ +/* RQCORR = MINGMA*TMP */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (4*N) */ + +/* Further Details */ +/* =============== */ + +/* Based on contributions by */ +/* Beresford Parlett, University of California, Berkeley, USA */ +/* Jim Demmel, University of California, Berkeley, USA */ +/* Inderjit Dhillon, University of Texas, Austin, USA */ +/* Osni Marques, LBNL/NERSC, USA */ +/* Christof Voemel, University of California, Berkeley, USA */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + --work; + --isuppz; + --z__; + --lld; + --ld; + --l; + --d__; + + /* Function Body */ + eps = _starpu_dlamch_("Precision"); + if (*r__ == 0) { + r1 = *b1; + r2 = *bn; + } else { + r1 = *r__; + r2 = *r__; + } +/* Storage for LPLUS */ + indlpl = 0; +/* Storage for UMINUS */ + indumn = *n; + inds = (*n << 1) + 1; + indp = *n * 3 + 1; + if (*b1 == 1) { + work[inds] = 0.; + } else { + work[inds + *b1 - 1] = lld[*b1 - 1]; + } + +/* Compute the stationary transform (using the differential form) */ +/* until the index R2. */ + + sawnan1 = FALSE_; + neg1 = 0; + s = work[inds + *b1 - 1] - *lambda; + i__1 = r1 - 1; + for (i__ = *b1; i__ <= i__1; ++i__) { + dplus = d__[i__] + s; + work[indlpl + i__] = ld[i__] / dplus; + if (dplus < 0.) { + ++neg1; + } + work[inds + i__] = s * work[indlpl + i__] * l[i__]; + s = work[inds + i__] - *lambda; +/* L50: */ + } + sawnan1 = _starpu_disnan_(&s); + if (sawnan1) { + goto L60; + } + i__1 = r2 - 1; + for (i__ = r1; i__ <= i__1; ++i__) { + dplus = d__[i__] + s; + work[indlpl + i__] = ld[i__] / dplus; + work[inds + i__] = s * work[indlpl + i__] * l[i__]; + s = work[inds + i__] - *lambda; +/* L51: */ + } + sawnan1 = _starpu_disnan_(&s); + +L60: + if (sawnan1) { +/* Runs a slower version of the above loop if a NaN is detected */ + neg1 = 0; + s = work[inds + *b1 - 1] - *lambda; + i__1 = r1 - 1; + for (i__ = *b1; i__ <= i__1; ++i__) { + dplus = d__[i__] + s; + if (abs(dplus) < *pivmin) { + dplus = -(*pivmin); + } + work[indlpl + i__] = ld[i__] / dplus; + if (dplus < 0.) { + ++neg1; + } + work[inds + i__] = s * work[indlpl + i__] * l[i__]; + if (work[indlpl + i__] == 0.) { + work[inds + i__] = lld[i__]; + } + s = work[inds + i__] - *lambda; +/* L70: */ + } + i__1 = r2 - 1; + for (i__ = r1; i__ <= i__1; ++i__) { + dplus = d__[i__] + s; + if (abs(dplus) < *pivmin) { + dplus = -(*pivmin); + } + work[indlpl + i__] = ld[i__] / dplus; + work[inds + i__] = s * work[indlpl + i__] * l[i__]; + if (work[indlpl + i__] == 0.) { + work[inds + i__] = lld[i__]; + } + s = work[inds + i__] - *lambda; +/* L71: */ + } + } + +/* Compute the progressive transform (using the differential form) */ +/* until the index R1 */ + + sawnan2 = FALSE_; + neg2 = 0; + work[indp + *bn - 1] = d__[*bn] - *lambda; + i__1 = r1; + for (i__ = *bn - 1; i__ >= i__1; --i__) { + dminus = lld[i__] + work[indp + i__]; + tmp = d__[i__] / dminus; + if (dminus < 0.) { + ++neg2; + } + work[indumn + i__] = l[i__] * tmp; + work[indp + i__ - 1] = work[indp + i__] * tmp - *lambda; +/* L80: */ + } + tmp = work[indp + r1 - 1]; + sawnan2 = _starpu_disnan_(&tmp); + if (sawnan2) { +/* Runs a slower version of the above loop if a NaN is detected */ + neg2 = 0; + i__1 = r1; + for (i__ = *bn - 1; i__ >= i__1; --i__) { + dminus = lld[i__] + work[indp + i__]; + if (abs(dminus) < *pivmin) { + dminus = -(*pivmin); + } + tmp = d__[i__] / dminus; + if (dminus < 0.) { + ++neg2; + } + work[indumn + i__] = l[i__] * tmp; + work[indp + i__ - 1] = work[indp + i__] * tmp - *lambda; + if (tmp == 0.) { + work[indp + i__ - 1] = d__[i__] - *lambda; + } +/* L100: */ + } + } + +/* Find the index (from R1 to R2) of the largest (in magnitude) */ +/* diagonal element of the inverse */ + + *mingma = work[inds + r1 - 1] + work[indp + r1 - 1]; + if (*mingma < 0.) { + ++neg1; + } + if (*wantnc) { + *negcnt = neg1 + neg2; + } else { + *negcnt = -1; + } + if (abs(*mingma) == 0.) { + *mingma = eps * work[inds + r1 - 1]; + } + *r__ = r1; + i__1 = r2 - 1; + for (i__ = r1; i__ <= i__1; ++i__) { + tmp = work[inds + i__] + work[indp + i__]; + if (tmp == 0.) { + tmp = eps * work[inds + i__]; + } + if (abs(tmp) <= abs(*mingma)) { + *mingma = tmp; + *r__ = i__ + 1; + } +/* L110: */ + } + +/* Compute the FP vector: solve N^T v = e_r */ + + isuppz[1] = *b1; + isuppz[2] = *bn; + z__[*r__] = 1.; + *ztz = 1.; + +/* Compute the FP vector upwards from R */ + + if (! sawnan1 && ! sawnan2) { + i__1 = *b1; + for (i__ = *r__ - 1; i__ >= i__1; --i__) { + z__[i__] = -(work[indlpl + i__] * z__[i__ + 1]); + if (((d__1 = z__[i__], abs(d__1)) + (d__2 = z__[i__ + 1], abs( + d__2))) * (d__3 = ld[i__], abs(d__3)) < *gaptol) { + z__[i__] = 0.; + isuppz[1] = i__ + 1; + goto L220; + } + *ztz += z__[i__] * z__[i__]; +/* L210: */ + } +L220: + ; + } else { +/* Run slower loop if NaN occurred. */ + i__1 = *b1; + for (i__ = *r__ - 1; i__ >= i__1; --i__) { + if (z__[i__ + 1] == 0.) { + z__[i__] = -(ld[i__ + 1] / ld[i__]) * z__[i__ + 2]; + } else { + z__[i__] = -(work[indlpl + i__] * z__[i__ + 1]); + } + if (((d__1 = z__[i__], abs(d__1)) + (d__2 = z__[i__ + 1], abs( + d__2))) * (d__3 = ld[i__], abs(d__3)) < *gaptol) { + z__[i__] = 0.; + isuppz[1] = i__ + 1; + goto L240; + } + *ztz += z__[i__] * z__[i__]; +/* L230: */ + } +L240: + ; + } +/* Compute the FP vector downwards from R in blocks of size BLKSIZ */ + if (! sawnan1 && ! sawnan2) { + i__1 = *bn - 1; + for (i__ = *r__; i__ <= i__1; ++i__) { + z__[i__ + 1] = -(work[indumn + i__] * z__[i__]); + if (((d__1 = z__[i__], abs(d__1)) + (d__2 = z__[i__ + 1], abs( + d__2))) * (d__3 = ld[i__], abs(d__3)) < *gaptol) { + z__[i__ + 1] = 0.; + isuppz[2] = i__; + goto L260; + } + *ztz += z__[i__ + 1] * z__[i__ + 1]; +/* L250: */ + } +L260: + ; + } else { +/* Run slower loop if NaN occurred. */ + i__1 = *bn - 1; + for (i__ = *r__; i__ <= i__1; ++i__) { + if (z__[i__] == 0.) { + z__[i__ + 1] = -(ld[i__ - 1] / ld[i__]) * z__[i__ - 1]; + } else { + z__[i__ + 1] = -(work[indumn + i__] * z__[i__]); + } + if (((d__1 = z__[i__], abs(d__1)) + (d__2 = z__[i__ + 1], abs( + d__2))) * (d__3 = ld[i__], abs(d__3)) < *gaptol) { + z__[i__ + 1] = 0.; + isuppz[2] = i__; + goto L280; + } + *ztz += z__[i__ + 1] * z__[i__ + 1]; +/* L270: */ + } +L280: + ; + } + +/* Compute quantities for convergence test */ + + tmp = 1. / *ztz; + *nrminv = sqrt(tmp); + *resid = abs(*mingma) * *nrminv; + *rqcorr = *mingma * tmp; + + + return 0; + +/* End of DLAR1V */ + +} /* _starpu_dlar1v_ */ diff --git a/min-dgels/base/SRC/dlar2v.c b/min-dgels/base/SRC/dlar2v.c new file mode 100644 index 0000000..280ac50 --- /dev/null +++ b/min-dgels/base/SRC/dlar2v.c @@ -0,0 +1,121 @@ +/* dlar2v.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dlar2v_(integer *n, doublereal *x, doublereal *y, + doublereal *z__, integer *incx, doublereal *c__, doublereal *s, + integer *incc) +{ + /* System generated locals */ + integer i__1; + + /* Local variables */ + integer i__; + doublereal t1, t2, t3, t4, t5, t6; + integer ic; + doublereal ci, si; + integer ix; + doublereal xi, yi, zi; + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLAR2V applies a vector of real plane rotations from both sides to */ +/* a sequence of 2-by-2 real symmetric matrices, defined by the elements */ +/* of the vectors x, y and z. For i = 1,2,...,n */ + +/* ( x(i) z(i) ) := ( c(i) s(i) ) ( x(i) z(i) ) ( c(i) -s(i) ) */ +/* ( z(i) y(i) ) ( -s(i) c(i) ) ( z(i) y(i) ) ( s(i) c(i) ) */ + +/* Arguments */ +/* ========= */ + +/* N (input) INTEGER */ +/* The number of plane rotations to be applied. */ + +/* X (input/output) DOUBLE PRECISION array, */ +/* dimension (1+(N-1)*INCX) */ +/* The vector x. */ + +/* Y (input/output) DOUBLE PRECISION array, */ +/* dimension (1+(N-1)*INCX) */ +/* The vector y. */ + +/* Z (input/output) DOUBLE PRECISION array, */ +/* dimension (1+(N-1)*INCX) */ +/* The vector z. */ + +/* INCX (input) INTEGER */ +/* The increment between elements of X, Y and Z. INCX > 0. */ + +/* C (input) DOUBLE PRECISION array, dimension (1+(N-1)*INCC) */ +/* The cosines of the plane rotations. */ + +/* S (input) DOUBLE PRECISION array, dimension (1+(N-1)*INCC) */ +/* The sines of the plane rotations. */ + +/* INCC (input) INTEGER */ +/* The increment between elements of C and S. INCC > 0. */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + --s; + --c__; + --z__; + --y; + --x; + + /* Function Body */ + ix = 1; + ic = 1; + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + xi = x[ix]; + yi = y[ix]; + zi = z__[ix]; + ci = c__[ic]; + si = s[ic]; + t1 = si * zi; + t2 = ci * zi; + t3 = t2 - si * xi; + t4 = t2 + si * yi; + t5 = ci * xi + t1; + t6 = ci * yi - t1; + x[ix] = ci * t5 + si * t4; + y[ix] = ci * t6 - si * t3; + z__[ix] = ci * t4 - si * t5; + ix += *incx; + ic += *incc; +/* L10: */ + } + +/* End of DLAR2V */ + + return 0; +} /* _starpu_dlar2v_ */ diff --git a/min-dgels/base/SRC/dlarf.c b/min-dgels/base/SRC/dlarf.c new file mode 100644 index 0000000..167887e --- /dev/null +++ b/min-dgels/base/SRC/dlarf.c @@ -0,0 +1,193 @@ +/* dlarf.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static doublereal c_b4 = 1.; +static doublereal c_b5 = 0.; +static integer c__1 = 1; + +/* Subroutine */ int _starpu_dlarf_(char *side, integer *m, integer *n, doublereal *v, + integer *incv, doublereal *tau, doublereal *c__, integer *ldc, + doublereal *work) +{ + /* System generated locals */ + integer c_dim1, c_offset; + doublereal d__1; + + /* Local variables */ + integer i__; + logical applyleft; + extern /* Subroutine */ int _starpu_dger_(integer *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *); + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int _starpu_dgemv_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *); + integer lastc, lastv; + extern integer _starpu_iladlc_(integer *, integer *, doublereal *, integer *), + _starpu_iladlr_(integer *, integer *, doublereal *, integer *); + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLARF applies a real elementary reflector H to a real m by n matrix */ +/* C, from either the left or the right. H is represented in the form */ + +/* H = I - tau * v * v' */ + +/* where tau is a real scalar and v is a real vector. */ + +/* If tau = 0, then H is taken to be the unit matrix. */ + +/* Arguments */ +/* ========= */ + +/* SIDE (input) CHARACTER*1 */ +/* = 'L': form H * C */ +/* = 'R': form C * H */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix C. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix C. */ + +/* V (input) DOUBLE PRECISION array, dimension */ +/* (1 + (M-1)*abs(INCV)) if SIDE = 'L' */ +/* or (1 + (N-1)*abs(INCV)) if SIDE = 'R' */ +/* The vector v in the representation of H. V is not used if */ +/* TAU = 0. */ + +/* INCV (input) INTEGER */ +/* The increment between elements of v. INCV <> 0. */ + +/* TAU (input) DOUBLE PRECISION */ +/* The value tau in the representation of H. */ + +/* C (input/output) DOUBLE PRECISION array, dimension (LDC,N) */ +/* On entry, the m by n matrix C. */ +/* On exit, C is overwritten by the matrix H * C if SIDE = 'L', */ +/* or C * H if SIDE = 'R'. */ + +/* LDC (input) INTEGER */ +/* The leading dimension of the array C. LDC >= max(1,M). */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension */ +/* (N) if SIDE = 'L' */ +/* or (M) if SIDE = 'R' */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + --v; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + applyleft = _starpu_lsame_(side, "L"); + lastv = 0; + lastc = 0; + if (*tau != 0.) { +/* Set up variables for scanning V. LASTV begins pointing to the end */ +/* of V. */ + if (applyleft) { + lastv = *m; + } else { + lastv = *n; + } + if (*incv > 0) { + i__ = (lastv - 1) * *incv + 1; + } else { + i__ = 1; + } +/* Look for the last non-zero row in V. */ + while(lastv > 0 && v[i__] == 0.) { + --lastv; + i__ -= *incv; + } + if (applyleft) { +/* Scan for the last non-zero column in C(1:lastv,:). */ + lastc = _starpu_iladlc_(&lastv, n, &c__[c_offset], ldc); + } else { +/* Scan for the last non-zero row in C(:,1:lastv). */ + lastc = _starpu_iladlr_(m, &lastv, &c__[c_offset], ldc); + } + } +/* Note that lastc.eq.0 renders the BLAS operations null; no special */ +/* case is needed at this level. */ + if (applyleft) { + +/* Form H * C */ + + if (lastv > 0) { + +/* w(1:lastc,1) := C(1:lastv,1:lastc)' * v(1:lastv,1) */ + + _starpu_dgemv_("Transpose", &lastv, &lastc, &c_b4, &c__[c_offset], ldc, & + v[1], incv, &c_b5, &work[1], &c__1); + +/* C(1:lastv,1:lastc) := C(...) - v(1:lastv,1) * w(1:lastc,1)' */ + + d__1 = -(*tau); + _starpu_dger_(&lastv, &lastc, &d__1, &v[1], incv, &work[1], &c__1, &c__[ + c_offset], ldc); + } + } else { + +/* Form C * H */ + + if (lastv > 0) { + +/* w(1:lastc,1) := C(1:lastc,1:lastv) * v(1:lastv,1) */ + + _starpu_dgemv_("No transpose", &lastc, &lastv, &c_b4, &c__[c_offset], ldc, + &v[1], incv, &c_b5, &work[1], &c__1); + +/* C(1:lastc,1:lastv) := C(...) - w(1:lastc,1) * v(1:lastv,1)' */ + + d__1 = -(*tau); + _starpu_dger_(&lastc, &lastv, &d__1, &work[1], &c__1, &v[1], incv, &c__[ + c_offset], ldc); + } + } + return 0; + +/* End of DLARF */ + +} /* _starpu_dlarf_ */ diff --git a/min-dgels/base/SRC/dlarfb.c b/min-dgels/base/SRC/dlarfb.c new file mode 100644 index 0000000..7829f2a --- /dev/null +++ b/min-dgels/base/SRC/dlarfb.c @@ -0,0 +1,774 @@ +/* dlarfb.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static doublereal c_b14 = 1.; +static doublereal c_b25 = -1.; + +/* Subroutine */ int _starpu_dlarfb_(char *side, char *trans, char *direct, char * + storev, integer *m, integer *n, integer *k, doublereal *v, integer * + ldv, doublereal *t, integer *ldt, doublereal *c__, integer *ldc, + doublereal *work, integer *ldwork) +{ + /* System generated locals */ + integer c_dim1, c_offset, t_dim1, t_offset, v_dim1, v_offset, work_dim1, + work_offset, i__1, i__2; + + /* Local variables */ + integer i__, j; + extern /* Subroutine */ int _starpu_dgemm_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *); + extern logical _starpu_lsame_(char *, char *); + integer lastc; + extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *), _starpu_dtrmm_(char *, char *, char *, char *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *); + integer lastv; + extern integer _starpu_iladlc_(integer *, integer *, doublereal *, integer *), + _starpu_iladlr_(integer *, integer *, doublereal *, integer *); + char transt[1]; + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLARFB applies a real block reflector H or its transpose H' to a */ +/* real m by n matrix C, from either the left or the right. */ + +/* Arguments */ +/* ========= */ + +/* SIDE (input) CHARACTER*1 */ +/* = 'L': apply H or H' from the Left */ +/* = 'R': apply H or H' from the Right */ + +/* TRANS (input) CHARACTER*1 */ +/* = 'N': apply H (No transpose) */ +/* = 'T': apply H' (Transpose) */ + +/* DIRECT (input) CHARACTER*1 */ +/* Indicates how H is formed from a product of elementary */ +/* reflectors */ +/* = 'F': H = H(1) H(2) . . . H(k) (Forward) */ +/* = 'B': H = H(k) . . . H(2) H(1) (Backward) */ + +/* STOREV (input) CHARACTER*1 */ +/* Indicates how the vectors which define the elementary */ +/* reflectors are stored: */ +/* = 'C': Columnwise */ +/* = 'R': Rowwise */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix C. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix C. */ + +/* K (input) INTEGER */ +/* The order of the matrix T (= the number of elementary */ +/* reflectors whose product defines the block reflector). */ + +/* V (input) DOUBLE PRECISION array, dimension */ +/* (LDV,K) if STOREV = 'C' */ +/* (LDV,M) if STOREV = 'R' and SIDE = 'L' */ +/* (LDV,N) if STOREV = 'R' and SIDE = 'R' */ +/* The matrix V. See further details. */ + +/* LDV (input) INTEGER */ +/* The leading dimension of the array V. */ +/* If STOREV = 'C' and SIDE = 'L', LDV >= max(1,M); */ +/* if STOREV = 'C' and SIDE = 'R', LDV >= max(1,N); */ +/* if STOREV = 'R', LDV >= K. */ + +/* T (input) DOUBLE PRECISION array, dimension (LDT,K) */ +/* The triangular k by k matrix T in the representation of the */ +/* block reflector. */ + +/* LDT (input) INTEGER */ +/* The leading dimension of the array T. LDT >= K. */ + +/* C (input/output) DOUBLE PRECISION array, dimension (LDC,N) */ +/* On entry, the m by n matrix C. */ +/* On exit, C is overwritten by H*C or H'*C or C*H or C*H'. */ + +/* LDC (input) INTEGER */ +/* The leading dimension of the array C. LDA >= max(1,M). */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (LDWORK,K) */ + +/* LDWORK (input) INTEGER */ +/* The leading dimension of the array WORK. */ +/* If SIDE = 'L', LDWORK >= max(1,N); */ +/* if SIDE = 'R', LDWORK >= max(1,M). */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Quick return if possible */ + + /* Parameter adjustments */ + v_dim1 = *ldv; + v_offset = 1 + v_dim1; + v -= v_offset; + t_dim1 = *ldt; + t_offset = 1 + t_dim1; + t -= t_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + work_dim1 = *ldwork; + work_offset = 1 + work_dim1; + work -= work_offset; + + /* Function Body */ + if (*m <= 0 || *n <= 0) { + return 0; + } + + if (_starpu_lsame_(trans, "N")) { + *(unsigned char *)transt = 'T'; + } else { + *(unsigned char *)transt = 'N'; + } + + if (_starpu_lsame_(storev, "C")) { + + if (_starpu_lsame_(direct, "F")) { + +/* Let V = ( V1 ) (first K rows) */ +/* ( V2 ) */ +/* where V1 is unit lower triangular. */ + + if (_starpu_lsame_(side, "L")) { + +/* Form H * C or H' * C where C = ( C1 ) */ +/* ( C2 ) */ + +/* Computing MAX */ + i__1 = *k, i__2 = _starpu_iladlr_(m, k, &v[v_offset], ldv); + lastv = max(i__1,i__2); + lastc = _starpu_iladlc_(&lastv, n, &c__[c_offset], ldc); + +/* W := C' * V = (C1'*V1 + C2'*V2) (stored in WORK) */ + +/* W := C1' */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + _starpu_dcopy_(&lastc, &c__[j + c_dim1], ldc, &work[j * work_dim1 + + 1], &c__1); +/* L10: */ + } + +/* W := W * V1 */ + + _starpu_dtrmm_("Right", "Lower", "No transpose", "Unit", &lastc, k, & + c_b14, &v[v_offset], ldv, &work[work_offset], ldwork); + if (lastv > *k) { + +/* W := W + C2'*V2 */ + + i__1 = lastv - *k; + _starpu_dgemm_("Transpose", "No transpose", &lastc, k, &i__1, & + c_b14, &c__[*k + 1 + c_dim1], ldc, &v[*k + 1 + + v_dim1], ldv, &c_b14, &work[work_offset], ldwork); + } + +/* W := W * T' or W * T */ + + _starpu_dtrmm_("Right", "Upper", transt, "Non-unit", &lastc, k, & + c_b14, &t[t_offset], ldt, &work[work_offset], ldwork); + +/* C := C - V * W' */ + + if (lastv > *k) { + +/* C2 := C2 - V2 * W' */ + + i__1 = lastv - *k; + _starpu_dgemm_("No transpose", "Transpose", &i__1, &lastc, k, & + c_b25, &v[*k + 1 + v_dim1], ldv, &work[ + work_offset], ldwork, &c_b14, &c__[*k + 1 + + c_dim1], ldc); + } + +/* W := W * V1' */ + + _starpu_dtrmm_("Right", "Lower", "Transpose", "Unit", &lastc, k, & + c_b14, &v[v_offset], ldv, &work[work_offset], ldwork); + +/* C1 := C1 - W' */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = lastc; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[j + i__ * c_dim1] -= work[i__ + j * work_dim1]; +/* L20: */ + } +/* L30: */ + } + + } else if (_starpu_lsame_(side, "R")) { + +/* Form C * H or C * H' where C = ( C1 C2 ) */ + +/* Computing MAX */ + i__1 = *k, i__2 = _starpu_iladlr_(n, k, &v[v_offset], ldv); + lastv = max(i__1,i__2); + lastc = _starpu_iladlr_(m, &lastv, &c__[c_offset], ldc); + +/* W := C * V = (C1*V1 + C2*V2) (stored in WORK) */ + +/* W := C1 */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + _starpu_dcopy_(&lastc, &c__[j * c_dim1 + 1], &c__1, &work[j * + work_dim1 + 1], &c__1); +/* L40: */ + } + +/* W := W * V1 */ + + _starpu_dtrmm_("Right", "Lower", "No transpose", "Unit", &lastc, k, & + c_b14, &v[v_offset], ldv, &work[work_offset], ldwork); + if (lastv > *k) { + +/* W := W + C2 * V2 */ + + i__1 = lastv - *k; + _starpu_dgemm_("No transpose", "No transpose", &lastc, k, &i__1, & + c_b14, &c__[(*k + 1) * c_dim1 + 1], ldc, &v[*k + + 1 + v_dim1], ldv, &c_b14, &work[work_offset], + ldwork); + } + +/* W := W * T or W * T' */ + + _starpu_dtrmm_("Right", "Upper", trans, "Non-unit", &lastc, k, &c_b14, + &t[t_offset], ldt, &work[work_offset], ldwork); + +/* C := C - W * V' */ + + if (lastv > *k) { + +/* C2 := C2 - W * V2' */ + + i__1 = lastv - *k; + _starpu_dgemm_("No transpose", "Transpose", &lastc, &i__1, k, & + c_b25, &work[work_offset], ldwork, &v[*k + 1 + + v_dim1], ldv, &c_b14, &c__[(*k + 1) * c_dim1 + 1], + ldc); + } + +/* W := W * V1' */ + + _starpu_dtrmm_("Right", "Lower", "Transpose", "Unit", &lastc, k, & + c_b14, &v[v_offset], ldv, &work[work_offset], ldwork); + +/* C1 := C1 - W */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = lastc; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] -= work[i__ + j * work_dim1]; +/* L50: */ + } +/* L60: */ + } + } + + } else { + +/* Let V = ( V1 ) */ +/* ( V2 ) (last K rows) */ +/* where V2 is unit upper triangular. */ + + if (_starpu_lsame_(side, "L")) { + +/* Form H * C or H' * C where C = ( C1 ) */ +/* ( C2 ) */ + +/* Computing MAX */ + i__1 = *k, i__2 = _starpu_iladlr_(m, k, &v[v_offset], ldv); + lastv = max(i__1,i__2); + lastc = _starpu_iladlc_(&lastv, n, &c__[c_offset], ldc); + +/* W := C' * V = (C1'*V1 + C2'*V2) (stored in WORK) */ + +/* W := C2' */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + _starpu_dcopy_(&lastc, &c__[lastv - *k + j + c_dim1], ldc, &work[ + j * work_dim1 + 1], &c__1); +/* L70: */ + } + +/* W := W * V2 */ + + _starpu_dtrmm_("Right", "Upper", "No transpose", "Unit", &lastc, k, & + c_b14, &v[lastv - *k + 1 + v_dim1], ldv, &work[ + work_offset], ldwork); + if (lastv > *k) { + +/* W := W + C1'*V1 */ + + i__1 = lastv - *k; + _starpu_dgemm_("Transpose", "No transpose", &lastc, k, &i__1, & + c_b14, &c__[c_offset], ldc, &v[v_offset], ldv, & + c_b14, &work[work_offset], ldwork); + } + +/* W := W * T' or W * T */ + + _starpu_dtrmm_("Right", "Lower", transt, "Non-unit", &lastc, k, & + c_b14, &t[t_offset], ldt, &work[work_offset], ldwork); + +/* C := C - V * W' */ + + if (lastv > *k) { + +/* C1 := C1 - V1 * W' */ + + i__1 = lastv - *k; + _starpu_dgemm_("No transpose", "Transpose", &i__1, &lastc, k, & + c_b25, &v[v_offset], ldv, &work[work_offset], + ldwork, &c_b14, &c__[c_offset], ldc); + } + +/* W := W * V2' */ + + _starpu_dtrmm_("Right", "Upper", "Transpose", "Unit", &lastc, k, & + c_b14, &v[lastv - *k + 1 + v_dim1], ldv, &work[ + work_offset], ldwork); + +/* C2 := C2 - W' */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = lastc; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[lastv - *k + j + i__ * c_dim1] -= work[i__ + j * + work_dim1]; +/* L80: */ + } +/* L90: */ + } + + } else if (_starpu_lsame_(side, "R")) { + +/* Form C * H or C * H' where C = ( C1 C2 ) */ + +/* Computing MAX */ + i__1 = *k, i__2 = _starpu_iladlr_(n, k, &v[v_offset], ldv); + lastv = max(i__1,i__2); + lastc = _starpu_iladlr_(m, &lastv, &c__[c_offset], ldc); + +/* W := C * V = (C1*V1 + C2*V2) (stored in WORK) */ + +/* W := C2 */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + _starpu_dcopy_(&lastc, &c__[(*n - *k + j) * c_dim1 + 1], &c__1, & + work[j * work_dim1 + 1], &c__1); +/* L100: */ + } + +/* W := W * V2 */ + + _starpu_dtrmm_("Right", "Upper", "No transpose", "Unit", &lastc, k, & + c_b14, &v[lastv - *k + 1 + v_dim1], ldv, &work[ + work_offset], ldwork); + if (lastv > *k) { + +/* W := W + C1 * V1 */ + + i__1 = lastv - *k; + _starpu_dgemm_("No transpose", "No transpose", &lastc, k, &i__1, & + c_b14, &c__[c_offset], ldc, &v[v_offset], ldv, & + c_b14, &work[work_offset], ldwork); + } + +/* W := W * T or W * T' */ + + _starpu_dtrmm_("Right", "Lower", trans, "Non-unit", &lastc, k, &c_b14, + &t[t_offset], ldt, &work[work_offset], ldwork); + +/* C := C - W * V' */ + + if (lastv > *k) { + +/* C1 := C1 - W * V1' */ + + i__1 = lastv - *k; + _starpu_dgemm_("No transpose", "Transpose", &lastc, &i__1, k, & + c_b25, &work[work_offset], ldwork, &v[v_offset], + ldv, &c_b14, &c__[c_offset], ldc); + } + +/* W := W * V2' */ + + _starpu_dtrmm_("Right", "Upper", "Transpose", "Unit", &lastc, k, & + c_b14, &v[lastv - *k + 1 + v_dim1], ldv, &work[ + work_offset], ldwork); + +/* C2 := C2 - W */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = lastc; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + (lastv - *k + j) * c_dim1] -= work[i__ + j * + work_dim1]; +/* L110: */ + } +/* L120: */ + } + } + } + + } else if (_starpu_lsame_(storev, "R")) { + + if (_starpu_lsame_(direct, "F")) { + +/* Let V = ( V1 V2 ) (V1: first K columns) */ +/* where V1 is unit upper triangular. */ + + if (_starpu_lsame_(side, "L")) { + +/* Form H * C or H' * C where C = ( C1 ) */ +/* ( C2 ) */ + +/* Computing MAX */ + i__1 = *k, i__2 = _starpu_iladlc_(k, m, &v[v_offset], ldv); + lastv = max(i__1,i__2); + lastc = _starpu_iladlc_(&lastv, n, &c__[c_offset], ldc); + +/* W := C' * V' = (C1'*V1' + C2'*V2') (stored in WORK) */ + +/* W := C1' */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + _starpu_dcopy_(&lastc, &c__[j + c_dim1], ldc, &work[j * work_dim1 + + 1], &c__1); +/* L130: */ + } + +/* W := W * V1' */ + + _starpu_dtrmm_("Right", "Upper", "Transpose", "Unit", &lastc, k, & + c_b14, &v[v_offset], ldv, &work[work_offset], ldwork); + if (lastv > *k) { + +/* W := W + C2'*V2' */ + + i__1 = lastv - *k; + _starpu_dgemm_("Transpose", "Transpose", &lastc, k, &i__1, &c_b14, + &c__[*k + 1 + c_dim1], ldc, &v[(*k + 1) * v_dim1 + + 1], ldv, &c_b14, &work[work_offset], ldwork); + } + +/* W := W * T' or W * T */ + + _starpu_dtrmm_("Right", "Upper", transt, "Non-unit", &lastc, k, & + c_b14, &t[t_offset], ldt, &work[work_offset], ldwork); + +/* C := C - V' * W' */ + + if (lastv > *k) { + +/* C2 := C2 - V2' * W' */ + + i__1 = lastv - *k; + _starpu_dgemm_("Transpose", "Transpose", &i__1, &lastc, k, &c_b25, + &v[(*k + 1) * v_dim1 + 1], ldv, &work[ + work_offset], ldwork, &c_b14, &c__[*k + 1 + + c_dim1], ldc); + } + +/* W := W * V1 */ + + _starpu_dtrmm_("Right", "Upper", "No transpose", "Unit", &lastc, k, & + c_b14, &v[v_offset], ldv, &work[work_offset], ldwork); + +/* C1 := C1 - W' */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = lastc; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[j + i__ * c_dim1] -= work[i__ + j * work_dim1]; +/* L140: */ + } +/* L150: */ + } + + } else if (_starpu_lsame_(side, "R")) { + +/* Form C * H or C * H' where C = ( C1 C2 ) */ + +/* Computing MAX */ + i__1 = *k, i__2 = _starpu_iladlc_(k, n, &v[v_offset], ldv); + lastv = max(i__1,i__2); + lastc = _starpu_iladlr_(m, &lastv, &c__[c_offset], ldc); + +/* W := C * V' = (C1*V1' + C2*V2') (stored in WORK) */ + +/* W := C1 */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + _starpu_dcopy_(&lastc, &c__[j * c_dim1 + 1], &c__1, &work[j * + work_dim1 + 1], &c__1); +/* L160: */ + } + +/* W := W * V1' */ + + _starpu_dtrmm_("Right", "Upper", "Transpose", "Unit", &lastc, k, & + c_b14, &v[v_offset], ldv, &work[work_offset], ldwork); + if (lastv > *k) { + +/* W := W + C2 * V2' */ + + i__1 = lastv - *k; + _starpu_dgemm_("No transpose", "Transpose", &lastc, k, &i__1, & + c_b14, &c__[(*k + 1) * c_dim1 + 1], ldc, &v[(*k + + 1) * v_dim1 + 1], ldv, &c_b14, &work[work_offset], + ldwork); + } + +/* W := W * T or W * T' */ + + _starpu_dtrmm_("Right", "Upper", trans, "Non-unit", &lastc, k, &c_b14, + &t[t_offset], ldt, &work[work_offset], ldwork); + +/* C := C - W * V */ + + if (lastv > *k) { + +/* C2 := C2 - W * V2 */ + + i__1 = lastv - *k; + _starpu_dgemm_("No transpose", "No transpose", &lastc, &i__1, k, & + c_b25, &work[work_offset], ldwork, &v[(*k + 1) * + v_dim1 + 1], ldv, &c_b14, &c__[(*k + 1) * c_dim1 + + 1], ldc); + } + +/* W := W * V1 */ + + _starpu_dtrmm_("Right", "Upper", "No transpose", "Unit", &lastc, k, & + c_b14, &v[v_offset], ldv, &work[work_offset], ldwork); + +/* C1 := C1 - W */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = lastc; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] -= work[i__ + j * work_dim1]; +/* L170: */ + } +/* L180: */ + } + + } + + } else { + +/* Let V = ( V1 V2 ) (V2: last K columns) */ +/* where V2 is unit lower triangular. */ + + if (_starpu_lsame_(side, "L")) { + +/* Form H * C or H' * C where C = ( C1 ) */ +/* ( C2 ) */ + +/* Computing MAX */ + i__1 = *k, i__2 = _starpu_iladlc_(k, m, &v[v_offset], ldv); + lastv = max(i__1,i__2); + lastc = _starpu_iladlc_(&lastv, n, &c__[c_offset], ldc); + +/* W := C' * V' = (C1'*V1' + C2'*V2') (stored in WORK) */ + +/* W := C2' */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + _starpu_dcopy_(&lastc, &c__[lastv - *k + j + c_dim1], ldc, &work[ + j * work_dim1 + 1], &c__1); +/* L190: */ + } + +/* W := W * V2' */ + + _starpu_dtrmm_("Right", "Lower", "Transpose", "Unit", &lastc, k, & + c_b14, &v[(lastv - *k + 1) * v_dim1 + 1], ldv, &work[ + work_offset], ldwork); + if (lastv > *k) { + +/* W := W + C1'*V1' */ + + i__1 = lastv - *k; + _starpu_dgemm_("Transpose", "Transpose", &lastc, k, &i__1, &c_b14, + &c__[c_offset], ldc, &v[v_offset], ldv, &c_b14, & + work[work_offset], ldwork); + } + +/* W := W * T' or W * T */ + + _starpu_dtrmm_("Right", "Lower", transt, "Non-unit", &lastc, k, & + c_b14, &t[t_offset], ldt, &work[work_offset], ldwork); + +/* C := C - V' * W' */ + + if (lastv > *k) { + +/* C1 := C1 - V1' * W' */ + + i__1 = lastv - *k; + _starpu_dgemm_("Transpose", "Transpose", &i__1, &lastc, k, &c_b25, + &v[v_offset], ldv, &work[work_offset], ldwork, & + c_b14, &c__[c_offset], ldc); + } + +/* W := W * V2 */ + + _starpu_dtrmm_("Right", "Lower", "No transpose", "Unit", &lastc, k, & + c_b14, &v[(lastv - *k + 1) * v_dim1 + 1], ldv, &work[ + work_offset], ldwork); + +/* C2 := C2 - W' */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = lastc; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[lastv - *k + j + i__ * c_dim1] -= work[i__ + j * + work_dim1]; +/* L200: */ + } +/* L210: */ + } + + } else if (_starpu_lsame_(side, "R")) { + +/* Form C * H or C * H' where C = ( C1 C2 ) */ + +/* Computing MAX */ + i__1 = *k, i__2 = _starpu_iladlc_(k, n, &v[v_offset], ldv); + lastv = max(i__1,i__2); + lastc = _starpu_iladlr_(m, &lastv, &c__[c_offset], ldc); + +/* W := C * V' = (C1*V1' + C2*V2') (stored in WORK) */ + +/* W := C2 */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + _starpu_dcopy_(&lastc, &c__[(lastv - *k + j) * c_dim1 + 1], &c__1, + &work[j * work_dim1 + 1], &c__1); +/* L220: */ + } + +/* W := W * V2' */ + + _starpu_dtrmm_("Right", "Lower", "Transpose", "Unit", &lastc, k, & + c_b14, &v[(lastv - *k + 1) * v_dim1 + 1], ldv, &work[ + work_offset], ldwork); + if (lastv > *k) { + +/* W := W + C1 * V1' */ + + i__1 = lastv - *k; + _starpu_dgemm_("No transpose", "Transpose", &lastc, k, &i__1, & + c_b14, &c__[c_offset], ldc, &v[v_offset], ldv, & + c_b14, &work[work_offset], ldwork); + } + +/* W := W * T or W * T' */ + + _starpu_dtrmm_("Right", "Lower", trans, "Non-unit", &lastc, k, &c_b14, + &t[t_offset], ldt, &work[work_offset], ldwork); + +/* C := C - W * V */ + + if (lastv > *k) { + +/* C1 := C1 - W * V1 */ + + i__1 = lastv - *k; + _starpu_dgemm_("No transpose", "No transpose", &lastc, &i__1, k, & + c_b25, &work[work_offset], ldwork, &v[v_offset], + ldv, &c_b14, &c__[c_offset], ldc); + } + +/* W := W * V2 */ + + _starpu_dtrmm_("Right", "Lower", "No transpose", "Unit", &lastc, k, & + c_b14, &v[(lastv - *k + 1) * v_dim1 + 1], ldv, &work[ + work_offset], ldwork); + +/* C1 := C1 - W */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = lastc; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + (lastv - *k + j) * c_dim1] -= work[i__ + j * + work_dim1]; +/* L230: */ + } +/* L240: */ + } + + } + + } + } + + return 0; + +/* End of DLARFB */ + +} /* _starpu_dlarfb_ */ diff --git a/min-dgels/base/SRC/dlarfg.c b/min-dgels/base/SRC/dlarfg.c new file mode 100644 index 0000000..09405a4 --- /dev/null +++ b/min-dgels/base/SRC/dlarfg.c @@ -0,0 +1,170 @@ +/* dlarfg.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dlarfg_(integer *n, doublereal *alpha, doublereal *x, + integer *incx, doublereal *tau) +{ + /* System generated locals */ + integer i__1; + doublereal d__1; + + /* Builtin functions */ + double d_sign(doublereal *, doublereal *); + + /* Local variables */ + integer j, knt; + doublereal beta; + extern doublereal _starpu_dnrm2_(integer *, doublereal *, integer *); + extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, + integer *); + doublereal xnorm; + extern doublereal _starpu_dlapy2_(doublereal *, doublereal *), _starpu_dlamch_(char *); + doublereal safmin, rsafmn; + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLARFG generates a real elementary reflector H of order n, such */ +/* that */ + +/* H * ( alpha ) = ( beta ), H' * H = I. */ +/* ( x ) ( 0 ) */ + +/* where alpha and beta are scalars, and x is an (n-1)-element real */ +/* vector. H is represented in the form */ + +/* H = I - tau * ( 1 ) * ( 1 v' ) , */ +/* ( v ) */ + +/* where tau is a real scalar and v is a real (n-1)-element */ +/* vector. */ + +/* If the elements of x are all zero, then tau = 0 and H is taken to be */ +/* the unit matrix. */ + +/* Otherwise 1 <= tau <= 2. */ + +/* Arguments */ +/* ========= */ + +/* N (input) INTEGER */ +/* The order of the elementary reflector. */ + +/* ALPHA (input/output) DOUBLE PRECISION */ +/* On entry, the value alpha. */ +/* On exit, it is overwritten with the value beta. */ + +/* X (input/output) DOUBLE PRECISION array, dimension */ +/* (1+(N-2)*abs(INCX)) */ +/* On entry, the vector x. */ +/* On exit, it is overwritten with the vector v. */ + +/* INCX (input) INTEGER */ +/* The increment between elements of X. INCX > 0. */ + +/* TAU (output) DOUBLE PRECISION */ +/* The value tau. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + --x; + + /* Function Body */ + if (*n <= 1) { + *tau = 0.; + return 0; + } + + i__1 = *n - 1; + xnorm = _starpu_dnrm2_(&i__1, &x[1], incx); + + if (xnorm == 0.) { + +/* H = I */ + + *tau = 0.; + } else { + +/* general case */ + + d__1 = _starpu_dlapy2_(alpha, &xnorm); + beta = -d_sign(&d__1, alpha); + safmin = _starpu_dlamch_("S") / _starpu_dlamch_("E"); + knt = 0; + if (abs(beta) < safmin) { + +/* XNORM, BETA may be inaccurate; scale X and recompute them */ + + rsafmn = 1. / safmin; +L10: + ++knt; + i__1 = *n - 1; + _starpu_dscal_(&i__1, &rsafmn, &x[1], incx); + beta *= rsafmn; + *alpha *= rsafmn; + if (abs(beta) < safmin) { + goto L10; + } + +/* New BETA is at most 1, at least SAFMIN */ + + i__1 = *n - 1; + xnorm = _starpu_dnrm2_(&i__1, &x[1], incx); + d__1 = _starpu_dlapy2_(alpha, &xnorm); + beta = -d_sign(&d__1, alpha); + } + *tau = (beta - *alpha) / beta; + i__1 = *n - 1; + d__1 = 1. / (*alpha - beta); + _starpu_dscal_(&i__1, &d__1, &x[1], incx); + +/* If ALPHA is subnormal, it may lose relative accuracy */ + + i__1 = knt; + for (j = 1; j <= i__1; ++j) { + beta *= safmin; +/* L20: */ + } + *alpha = beta; + } + + return 0; + +/* End of DLARFG */ + +} /* _starpu_dlarfg_ */ diff --git a/min-dgels/base/SRC/dlarfp.c b/min-dgels/base/SRC/dlarfp.c new file mode 100644 index 0000000..5baf1d5 --- /dev/null +++ b/min-dgels/base/SRC/dlarfp.c @@ -0,0 +1,192 @@ +/* dlarfp.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dlarfp_(integer *n, doublereal *alpha, doublereal *x, + integer *incx, doublereal *tau) +{ + /* System generated locals */ + integer i__1; + doublereal d__1; + + /* Builtin functions */ + double d_sign(doublereal *, doublereal *); + + /* Local variables */ + integer j, knt; + doublereal beta; + extern doublereal _starpu_dnrm2_(integer *, doublereal *, integer *); + extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, + integer *); + doublereal xnorm; + extern doublereal _starpu_dlapy2_(doublereal *, doublereal *), _starpu_dlamch_(char *); + doublereal safmin, rsafmn; + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLARFP generates a real elementary reflector H of order n, such */ +/* that */ + +/* H * ( alpha ) = ( beta ), H' * H = I. */ +/* ( x ) ( 0 ) */ + +/* where alpha and beta are scalars, beta is non-negative, and x is */ +/* an (n-1)-element real vector. H is represented in the form */ + +/* H = I - tau * ( 1 ) * ( 1 v' ) , */ +/* ( v ) */ + +/* where tau is a real scalar and v is a real (n-1)-element */ +/* vector. */ + +/* If the elements of x are all zero, then tau = 0 and H is taken to be */ +/* the unit matrix. */ + +/* Otherwise 1 <= tau <= 2. */ + +/* Arguments */ +/* ========= */ + +/* N (input) INTEGER */ +/* The order of the elementary reflector. */ + +/* ALPHA (input/output) DOUBLE PRECISION */ +/* On entry, the value alpha. */ +/* On exit, it is overwritten with the value beta. */ + +/* X (input/output) DOUBLE PRECISION array, dimension */ +/* (1+(N-2)*abs(INCX)) */ +/* On entry, the vector x. */ +/* On exit, it is overwritten with the vector v. */ + +/* INCX (input) INTEGER */ +/* The increment between elements of X. INCX > 0. */ + +/* TAU (output) DOUBLE PRECISION */ +/* The value tau. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + --x; + + /* Function Body */ + if (*n <= 0) { + *tau = 0.; + return 0; + } + + i__1 = *n - 1; + xnorm = _starpu_dnrm2_(&i__1, &x[1], incx); + + if (xnorm == 0.) { + +/* H = [+/-1, 0; I], sign chosen so ALPHA >= 0 */ + + if (*alpha >= 0.) { +/* When TAU.eq.ZERO, the vector is special-cased to be */ +/* all zeros in the application routines. We do not need */ +/* to clear it. */ + *tau = 0.; + } else { +/* However, the application routines rely on explicit */ +/* zero checks when TAU.ne.ZERO, and we must clear X. */ + *tau = 2.; + i__1 = *n - 1; + for (j = 1; j <= i__1; ++j) { + x[(j - 1) * *incx + 1] = 0.; + } + *alpha = -(*alpha); + } + } else { + +/* general case */ + + d__1 = _starpu_dlapy2_(alpha, &xnorm); + beta = d_sign(&d__1, alpha); + safmin = _starpu_dlamch_("S") / _starpu_dlamch_("E"); + knt = 0; + if (abs(beta) < safmin) { + +/* XNORM, BETA may be inaccurate; scale X and recompute them */ + + rsafmn = 1. / safmin; +L10: + ++knt; + i__1 = *n - 1; + _starpu_dscal_(&i__1, &rsafmn, &x[1], incx); + beta *= rsafmn; + *alpha *= rsafmn; + if (abs(beta) < safmin) { + goto L10; + } + +/* New BETA is at most 1, at least SAFMIN */ + + i__1 = *n - 1; + xnorm = _starpu_dnrm2_(&i__1, &x[1], incx); + d__1 = _starpu_dlapy2_(alpha, &xnorm); + beta = d_sign(&d__1, alpha); + } + *alpha += beta; + if (beta < 0.) { + beta = -beta; + *tau = -(*alpha) / beta; + } else { + *alpha = xnorm * (xnorm / *alpha); + *tau = *alpha / beta; + *alpha = -(*alpha); + } + i__1 = *n - 1; + d__1 = 1. / *alpha; + _starpu_dscal_(&i__1, &d__1, &x[1], incx); + +/* If BETA is subnormal, it may lose relative accuracy */ + + i__1 = knt; + for (j = 1; j <= i__1; ++j) { + beta *= safmin; +/* L20: */ + } + *alpha = beta; + } + + return 0; + +/* End of DLARFP */ + +} /* _starpu_dlarfp_ */ diff --git a/min-dgels/base/SRC/dlarft.c b/min-dgels/base/SRC/dlarft.c new file mode 100644 index 0000000..8fc445f --- /dev/null +++ b/min-dgels/base/SRC/dlarft.c @@ -0,0 +1,325 @@ +/* dlarft.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static doublereal c_b8 = 0.; + +/* Subroutine */ int _starpu_dlarft_(char *direct, char *storev, integer *n, integer * + k, doublereal *v, integer *ldv, doublereal *tau, doublereal *t, + integer *ldt) +{ + /* System generated locals */ + integer t_dim1, t_offset, v_dim1, v_offset, i__1, i__2, i__3; + doublereal d__1; + + /* Local variables */ + integer i__, j, prevlastv; + doublereal vii; + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int _starpu_dgemv_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *); + integer lastv; + extern /* Subroutine */ int _starpu_dtrmv_(char *, char *, char *, integer *, + doublereal *, integer *, doublereal *, integer *); + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLARFT forms the triangular factor T of a real block reflector H */ +/* of order n, which is defined as a product of k elementary reflectors. */ + +/* If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; */ + +/* If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. */ + +/* If STOREV = 'C', the vector which defines the elementary reflector */ +/* H(i) is stored in the i-th column of the array V, and */ + +/* H = I - V * T * V' */ + +/* If STOREV = 'R', the vector which defines the elementary reflector */ +/* H(i) is stored in the i-th row of the array V, and */ + +/* H = I - V' * T * V */ + +/* Arguments */ +/* ========= */ + +/* DIRECT (input) CHARACTER*1 */ +/* Specifies the order in which the elementary reflectors are */ +/* multiplied to form the block reflector: */ +/* = 'F': H = H(1) H(2) . . . H(k) (Forward) */ +/* = 'B': H = H(k) . . . H(2) H(1) (Backward) */ + +/* STOREV (input) CHARACTER*1 */ +/* Specifies how the vectors which define the elementary */ +/* reflectors are stored (see also Further Details): */ +/* = 'C': columnwise */ +/* = 'R': rowwise */ + +/* N (input) INTEGER */ +/* The order of the block reflector H. N >= 0. */ + +/* K (input) INTEGER */ +/* The order of the triangular factor T (= the number of */ +/* elementary reflectors). K >= 1. */ + +/* V (input/output) DOUBLE PRECISION array, dimension */ +/* (LDV,K) if STOREV = 'C' */ +/* (LDV,N) if STOREV = 'R' */ +/* The matrix V. See further details. */ + +/* LDV (input) INTEGER */ +/* The leading dimension of the array V. */ +/* If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K. */ + +/* TAU (input) DOUBLE PRECISION array, dimension (K) */ +/* TAU(i) must contain the scalar factor of the elementary */ +/* reflector H(i). */ + +/* T (output) DOUBLE PRECISION array, dimension (LDT,K) */ +/* The k by k triangular factor T of the block reflector. */ +/* If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is */ +/* lower triangular. The rest of the array is not used. */ + +/* LDT (input) INTEGER */ +/* The leading dimension of the array T. LDT >= K. */ + +/* Further Details */ +/* =============== */ + +/* The shape of the matrix V and the storage of the vectors which define */ +/* the H(i) is best illustrated by the following example with n = 5 and */ +/* k = 3. The elements equal to 1 are not stored; the corresponding */ +/* array elements are modified but restored on exit. The rest of the */ +/* array is not used. */ + +/* DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R': */ + +/* V = ( 1 ) V = ( 1 v1 v1 v1 v1 ) */ +/* ( v1 1 ) ( 1 v2 v2 v2 ) */ +/* ( v1 v2 1 ) ( 1 v3 v3 ) */ +/* ( v1 v2 v3 ) */ +/* ( v1 v2 v3 ) */ + +/* DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R': */ + +/* V = ( v1 v2 v3 ) V = ( v1 v1 1 ) */ +/* ( v1 v2 v3 ) ( v2 v2 v2 1 ) */ +/* ( 1 v2 v3 ) ( v3 v3 v3 v3 1 ) */ +/* ( 1 v3 ) */ +/* ( 1 ) */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Quick return if possible */ + + /* Parameter adjustments */ + v_dim1 = *ldv; + v_offset = 1 + v_dim1; + v -= v_offset; + --tau; + t_dim1 = *ldt; + t_offset = 1 + t_dim1; + t -= t_offset; + + /* Function Body */ + if (*n == 0) { + return 0; + } + + if (_starpu_lsame_(direct, "F")) { + prevlastv = *n; + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + prevlastv = max(i__,prevlastv); + if (tau[i__] == 0.) { + +/* H(i) = I */ + + i__2 = i__; + for (j = 1; j <= i__2; ++j) { + t[j + i__ * t_dim1] = 0.; +/* L10: */ + } + } else { + +/* general case */ + + vii = v[i__ + i__ * v_dim1]; + v[i__ + i__ * v_dim1] = 1.; + if (_starpu_lsame_(storev, "C")) { +/* Skip any trailing zeros. */ + i__2 = i__ + 1; + for (lastv = *n; lastv >= i__2; --lastv) { + if (v[lastv + i__ * v_dim1] != 0.) { + break; + } + } + j = min(lastv,prevlastv); + +/* T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)' * V(i:j,i) */ + + i__2 = j - i__ + 1; + i__3 = i__ - 1; + d__1 = -tau[i__]; + _starpu_dgemv_("Transpose", &i__2, &i__3, &d__1, &v[i__ + v_dim1], + ldv, &v[i__ + i__ * v_dim1], &c__1, &c_b8, &t[ + i__ * t_dim1 + 1], &c__1); + } else { +/* Skip any trailing zeros. */ + i__2 = i__ + 1; + for (lastv = *n; lastv >= i__2; --lastv) { + if (v[i__ + lastv * v_dim1] != 0.) { + break; + } + } + j = min(lastv,prevlastv); + +/* T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)' */ + + i__2 = i__ - 1; + i__3 = j - i__ + 1; + d__1 = -tau[i__]; + _starpu_dgemv_("No transpose", &i__2, &i__3, &d__1, &v[i__ * + v_dim1 + 1], ldv, &v[i__ + i__ * v_dim1], ldv, & + c_b8, &t[i__ * t_dim1 + 1], &c__1); + } + v[i__ + i__ * v_dim1] = vii; + +/* T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) */ + + i__2 = i__ - 1; + _starpu_dtrmv_("Upper", "No transpose", "Non-unit", &i__2, &t[ + t_offset], ldt, &t[i__ * t_dim1 + 1], &c__1); + t[i__ + i__ * t_dim1] = tau[i__]; + if (i__ > 1) { + prevlastv = max(prevlastv,lastv); + } else { + prevlastv = lastv; + } + } +/* L20: */ + } + } else { + prevlastv = 1; + for (i__ = *k; i__ >= 1; --i__) { + if (tau[i__] == 0.) { + +/* H(i) = I */ + + i__1 = *k; + for (j = i__; j <= i__1; ++j) { + t[j + i__ * t_dim1] = 0.; +/* L30: */ + } + } else { + +/* general case */ + + if (i__ < *k) { + if (_starpu_lsame_(storev, "C")) { + vii = v[*n - *k + i__ + i__ * v_dim1]; + v[*n - *k + i__ + i__ * v_dim1] = 1.; +/* Skip any leading zeros. */ + i__1 = i__ - 1; + for (lastv = 1; lastv <= i__1; ++lastv) { + if (v[lastv + i__ * v_dim1] != 0.) { + break; + } + } + j = max(lastv,prevlastv); + +/* T(i+1:k,i) := */ +/* - tau(i) * V(j:n-k+i,i+1:k)' * V(j:n-k+i,i) */ + + i__1 = *n - *k + i__ - j + 1; + i__2 = *k - i__; + d__1 = -tau[i__]; + _starpu_dgemv_("Transpose", &i__1, &i__2, &d__1, &v[j + (i__ + + 1) * v_dim1], ldv, &v[j + i__ * v_dim1], & + c__1, &c_b8, &t[i__ + 1 + i__ * t_dim1], & + c__1); + v[*n - *k + i__ + i__ * v_dim1] = vii; + } else { + vii = v[i__ + (*n - *k + i__) * v_dim1]; + v[i__ + (*n - *k + i__) * v_dim1] = 1.; +/* Skip any leading zeros. */ + i__1 = i__ - 1; + for (lastv = 1; lastv <= i__1; ++lastv) { + if (v[i__ + lastv * v_dim1] != 0.) { + break; + } + } + j = max(lastv,prevlastv); + +/* T(i+1:k,i) := */ +/* - tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)' */ + + i__1 = *k - i__; + i__2 = *n - *k + i__ - j + 1; + d__1 = -tau[i__]; + _starpu_dgemv_("No transpose", &i__1, &i__2, &d__1, &v[i__ + + 1 + j * v_dim1], ldv, &v[i__ + j * v_dim1], + ldv, &c_b8, &t[i__ + 1 + i__ * t_dim1], &c__1); + v[i__ + (*n - *k + i__) * v_dim1] = vii; + } + +/* T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) */ + + i__1 = *k - i__; + _starpu_dtrmv_("Lower", "No transpose", "Non-unit", &i__1, &t[i__ + + 1 + (i__ + 1) * t_dim1], ldt, &t[i__ + 1 + i__ * + t_dim1], &c__1) + ; + if (i__ > 1) { + prevlastv = min(prevlastv,lastv); + } else { + prevlastv = lastv; + } + } + t[i__ + i__ * t_dim1] = tau[i__]; + } +/* L40: */ + } + } + return 0; + +/* End of DLARFT */ + +} /* _starpu_dlarft_ */ diff --git a/min-dgels/base/SRC/dlarfx.c b/min-dgels/base/SRC/dlarfx.c new file mode 100644 index 0000000..b5f2043 --- /dev/null +++ b/min-dgels/base/SRC/dlarfx.c @@ -0,0 +1,730 @@ +/* dlarfx.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +/* Subroutine */ int _starpu_dlarfx_(char *side, integer *m, integer *n, doublereal * + v, doublereal *tau, doublereal *c__, integer *ldc, doublereal *work) +{ + /* System generated locals */ + integer c_dim1, c_offset, i__1; + + /* Local variables */ + integer j; + doublereal t1, t2, t3, t4, t5, t6, t7, t8, t9, v1, v2, v3, v4, v5, v6, v7, + v8, v9, t10, v10, sum; + extern /* Subroutine */ int _starpu_dlarf_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + doublereal *); + extern logical _starpu_lsame_(char *, char *); + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLARFX applies a real elementary reflector H to a real m by n */ +/* matrix C, from either the left or the right. H is represented in the */ +/* form */ + +/* H = I - tau * v * v' */ + +/* where tau is a real scalar and v is a real vector. */ + +/* If tau = 0, then H is taken to be the unit matrix */ + +/* This version uses inline code if H has order < 11. */ + +/* Arguments */ +/* ========= */ + +/* SIDE (input) CHARACTER*1 */ +/* = 'L': form H * C */ +/* = 'R': form C * H */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix C. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix C. */ + +/* V (input) DOUBLE PRECISION array, dimension (M) if SIDE = 'L' */ +/* or (N) if SIDE = 'R' */ +/* The vector v in the representation of H. */ + +/* TAU (input) DOUBLE PRECISION */ +/* The value tau in the representation of H. */ + +/* C (input/output) DOUBLE PRECISION array, dimension (LDC,N) */ +/* On entry, the m by n matrix C. */ +/* On exit, C is overwritten by the matrix H * C if SIDE = 'L', */ +/* or C * H if SIDE = 'R'. */ + +/* LDC (input) INTEGER */ +/* The leading dimension of the array C. LDA >= (1,M). */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension */ +/* (N) if SIDE = 'L' */ +/* or (M) if SIDE = 'R' */ +/* WORK is not referenced if H has order < 11. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + --v; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + if (*tau == 0.) { + return 0; + } + if (_starpu_lsame_(side, "L")) { + +/* Form H * C, where H has order m. */ + + switch (*m) { + case 1: goto L10; + case 2: goto L30; + case 3: goto L50; + case 4: goto L70; + case 5: goto L90; + case 6: goto L110; + case 7: goto L130; + case 8: goto L150; + case 9: goto L170; + case 10: goto L190; + } + +/* Code for general M */ + + _starpu_dlarf_(side, m, n, &v[1], &c__1, tau, &c__[c_offset], ldc, &work[1]); + goto L410; +L10: + +/* Special code for 1 x 1 Householder */ + + t1 = 1. - *tau * v[1] * v[1]; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + c__[j * c_dim1 + 1] = t1 * c__[j * c_dim1 + 1]; +/* L20: */ + } + goto L410; +L30: + +/* Special code for 2 x 2 Householder */ + + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2]; + c__[j * c_dim1 + 1] -= sum * t1; + c__[j * c_dim1 + 2] -= sum * t2; +/* L40: */ + } + goto L410; +L50: + +/* Special code for 3 x 3 Householder */ + + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 * + c__[j * c_dim1 + 3]; + c__[j * c_dim1 + 1] -= sum * t1; + c__[j * c_dim1 + 2] -= sum * t2; + c__[j * c_dim1 + 3] -= sum * t3; +/* L60: */ + } + goto L410; +L70: + +/* Special code for 4 x 4 Householder */ + + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 * + c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4]; + c__[j * c_dim1 + 1] -= sum * t1; + c__[j * c_dim1 + 2] -= sum * t2; + c__[j * c_dim1 + 3] -= sum * t3; + c__[j * c_dim1 + 4] -= sum * t4; +/* L80: */ + } + goto L410; +L90: + +/* Special code for 5 x 5 Householder */ + + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 * + c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4] + v5 * c__[ + j * c_dim1 + 5]; + c__[j * c_dim1 + 1] -= sum * t1; + c__[j * c_dim1 + 2] -= sum * t2; + c__[j * c_dim1 + 3] -= sum * t3; + c__[j * c_dim1 + 4] -= sum * t4; + c__[j * c_dim1 + 5] -= sum * t5; +/* L100: */ + } + goto L410; +L110: + +/* Special code for 6 x 6 Householder */ + + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + v6 = v[6]; + t6 = *tau * v6; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 * + c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4] + v5 * c__[ + j * c_dim1 + 5] + v6 * c__[j * c_dim1 + 6]; + c__[j * c_dim1 + 1] -= sum * t1; + c__[j * c_dim1 + 2] -= sum * t2; + c__[j * c_dim1 + 3] -= sum * t3; + c__[j * c_dim1 + 4] -= sum * t4; + c__[j * c_dim1 + 5] -= sum * t5; + c__[j * c_dim1 + 6] -= sum * t6; +/* L120: */ + } + goto L410; +L130: + +/* Special code for 7 x 7 Householder */ + + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + v6 = v[6]; + t6 = *tau * v6; + v7 = v[7]; + t7 = *tau * v7; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 * + c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4] + v5 * c__[ + j * c_dim1 + 5] + v6 * c__[j * c_dim1 + 6] + v7 * c__[j * + c_dim1 + 7]; + c__[j * c_dim1 + 1] -= sum * t1; + c__[j * c_dim1 + 2] -= sum * t2; + c__[j * c_dim1 + 3] -= sum * t3; + c__[j * c_dim1 + 4] -= sum * t4; + c__[j * c_dim1 + 5] -= sum * t5; + c__[j * c_dim1 + 6] -= sum * t6; + c__[j * c_dim1 + 7] -= sum * t7; +/* L140: */ + } + goto L410; +L150: + +/* Special code for 8 x 8 Householder */ + + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + v6 = v[6]; + t6 = *tau * v6; + v7 = v[7]; + t7 = *tau * v7; + v8 = v[8]; + t8 = *tau * v8; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 * + c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4] + v5 * c__[ + j * c_dim1 + 5] + v6 * c__[j * c_dim1 + 6] + v7 * c__[j * + c_dim1 + 7] + v8 * c__[j * c_dim1 + 8]; + c__[j * c_dim1 + 1] -= sum * t1; + c__[j * c_dim1 + 2] -= sum * t2; + c__[j * c_dim1 + 3] -= sum * t3; + c__[j * c_dim1 + 4] -= sum * t4; + c__[j * c_dim1 + 5] -= sum * t5; + c__[j * c_dim1 + 6] -= sum * t6; + c__[j * c_dim1 + 7] -= sum * t7; + c__[j * c_dim1 + 8] -= sum * t8; +/* L160: */ + } + goto L410; +L170: + +/* Special code for 9 x 9 Householder */ + + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + v6 = v[6]; + t6 = *tau * v6; + v7 = v[7]; + t7 = *tau * v7; + v8 = v[8]; + t8 = *tau * v8; + v9 = v[9]; + t9 = *tau * v9; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 * + c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4] + v5 * c__[ + j * c_dim1 + 5] + v6 * c__[j * c_dim1 + 6] + v7 * c__[j * + c_dim1 + 7] + v8 * c__[j * c_dim1 + 8] + v9 * c__[j * + c_dim1 + 9]; + c__[j * c_dim1 + 1] -= sum * t1; + c__[j * c_dim1 + 2] -= sum * t2; + c__[j * c_dim1 + 3] -= sum * t3; + c__[j * c_dim1 + 4] -= sum * t4; + c__[j * c_dim1 + 5] -= sum * t5; + c__[j * c_dim1 + 6] -= sum * t6; + c__[j * c_dim1 + 7] -= sum * t7; + c__[j * c_dim1 + 8] -= sum * t8; + c__[j * c_dim1 + 9] -= sum * t9; +/* L180: */ + } + goto L410; +L190: + +/* Special code for 10 x 10 Householder */ + + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + v6 = v[6]; + t6 = *tau * v6; + v7 = v[7]; + t7 = *tau * v7; + v8 = v[8]; + t8 = *tau * v8; + v9 = v[9]; + t9 = *tau * v9; + v10 = v[10]; + t10 = *tau * v10; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 * + c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4] + v5 * c__[ + j * c_dim1 + 5] + v6 * c__[j * c_dim1 + 6] + v7 * c__[j * + c_dim1 + 7] + v8 * c__[j * c_dim1 + 8] + v9 * c__[j * + c_dim1 + 9] + v10 * c__[j * c_dim1 + 10]; + c__[j * c_dim1 + 1] -= sum * t1; + c__[j * c_dim1 + 2] -= sum * t2; + c__[j * c_dim1 + 3] -= sum * t3; + c__[j * c_dim1 + 4] -= sum * t4; + c__[j * c_dim1 + 5] -= sum * t5; + c__[j * c_dim1 + 6] -= sum * t6; + c__[j * c_dim1 + 7] -= sum * t7; + c__[j * c_dim1 + 8] -= sum * t8; + c__[j * c_dim1 + 9] -= sum * t9; + c__[j * c_dim1 + 10] -= sum * t10; +/* L200: */ + } + goto L410; + } else { + +/* Form C * H, where H has order n. */ + + switch (*n) { + case 1: goto L210; + case 2: goto L230; + case 3: goto L250; + case 4: goto L270; + case 5: goto L290; + case 6: goto L310; + case 7: goto L330; + case 8: goto L350; + case 9: goto L370; + case 10: goto L390; + } + +/* Code for general N */ + + _starpu_dlarf_(side, m, n, &v[1], &c__1, tau, &c__[c_offset], ldc, &work[1]); + goto L410; +L210: + +/* Special code for 1 x 1 Householder */ + + t1 = 1. - *tau * v[1] * v[1]; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + c__[j + c_dim1] = t1 * c__[j + c_dim1]; +/* L220: */ + } + goto L410; +L230: + +/* Special code for 2 x 2 Householder */ + + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)]; + c__[j + c_dim1] -= sum * t1; + c__[j + (c_dim1 << 1)] -= sum * t2; +/* L240: */ + } + goto L410; +L250: + +/* Special code for 3 x 3 Householder */ + + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 * + c__[j + c_dim1 * 3]; + c__[j + c_dim1] -= sum * t1; + c__[j + (c_dim1 << 1)] -= sum * t2; + c__[j + c_dim1 * 3] -= sum * t3; +/* L260: */ + } + goto L410; +L270: + +/* Special code for 4 x 4 Householder */ + + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 * + c__[j + c_dim1 * 3] + v4 * c__[j + (c_dim1 << 2)]; + c__[j + c_dim1] -= sum * t1; + c__[j + (c_dim1 << 1)] -= sum * t2; + c__[j + c_dim1 * 3] -= sum * t3; + c__[j + (c_dim1 << 2)] -= sum * t4; +/* L280: */ + } + goto L410; +L290: + +/* Special code for 5 x 5 Householder */ + + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 * + c__[j + c_dim1 * 3] + v4 * c__[j + (c_dim1 << 2)] + v5 * + c__[j + c_dim1 * 5]; + c__[j + c_dim1] -= sum * t1; + c__[j + (c_dim1 << 1)] -= sum * t2; + c__[j + c_dim1 * 3] -= sum * t3; + c__[j + (c_dim1 << 2)] -= sum * t4; + c__[j + c_dim1 * 5] -= sum * t5; +/* L300: */ + } + goto L410; +L310: + +/* Special code for 6 x 6 Householder */ + + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + v6 = v[6]; + t6 = *tau * v6; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 * + c__[j + c_dim1 * 3] + v4 * c__[j + (c_dim1 << 2)] + v5 * + c__[j + c_dim1 * 5] + v6 * c__[j + c_dim1 * 6]; + c__[j + c_dim1] -= sum * t1; + c__[j + (c_dim1 << 1)] -= sum * t2; + c__[j + c_dim1 * 3] -= sum * t3; + c__[j + (c_dim1 << 2)] -= sum * t4; + c__[j + c_dim1 * 5] -= sum * t5; + c__[j + c_dim1 * 6] -= sum * t6; +/* L320: */ + } + goto L410; +L330: + +/* Special code for 7 x 7 Householder */ + + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + v6 = v[6]; + t6 = *tau * v6; + v7 = v[7]; + t7 = *tau * v7; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 * + c__[j + c_dim1 * 3] + v4 * c__[j + (c_dim1 << 2)] + v5 * + c__[j + c_dim1 * 5] + v6 * c__[j + c_dim1 * 6] + v7 * c__[ + j + c_dim1 * 7]; + c__[j + c_dim1] -= sum * t1; + c__[j + (c_dim1 << 1)] -= sum * t2; + c__[j + c_dim1 * 3] -= sum * t3; + c__[j + (c_dim1 << 2)] -= sum * t4; + c__[j + c_dim1 * 5] -= sum * t5; + c__[j + c_dim1 * 6] -= sum * t6; + c__[j + c_dim1 * 7] -= sum * t7; +/* L340: */ + } + goto L410; +L350: + +/* Special code for 8 x 8 Householder */ + + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + v6 = v[6]; + t6 = *tau * v6; + v7 = v[7]; + t7 = *tau * v7; + v8 = v[8]; + t8 = *tau * v8; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 * + c__[j + c_dim1 * 3] + v4 * c__[j + (c_dim1 << 2)] + v5 * + c__[j + c_dim1 * 5] + v6 * c__[j + c_dim1 * 6] + v7 * c__[ + j + c_dim1 * 7] + v8 * c__[j + (c_dim1 << 3)]; + c__[j + c_dim1] -= sum * t1; + c__[j + (c_dim1 << 1)] -= sum * t2; + c__[j + c_dim1 * 3] -= sum * t3; + c__[j + (c_dim1 << 2)] -= sum * t4; + c__[j + c_dim1 * 5] -= sum * t5; + c__[j + c_dim1 * 6] -= sum * t6; + c__[j + c_dim1 * 7] -= sum * t7; + c__[j + (c_dim1 << 3)] -= sum * t8; +/* L360: */ + } + goto L410; +L370: + +/* Special code for 9 x 9 Householder */ + + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + v6 = v[6]; + t6 = *tau * v6; + v7 = v[7]; + t7 = *tau * v7; + v8 = v[8]; + t8 = *tau * v8; + v9 = v[9]; + t9 = *tau * v9; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 * + c__[j + c_dim1 * 3] + v4 * c__[j + (c_dim1 << 2)] + v5 * + c__[j + c_dim1 * 5] + v6 * c__[j + c_dim1 * 6] + v7 * c__[ + j + c_dim1 * 7] + v8 * c__[j + (c_dim1 << 3)] + v9 * c__[ + j + c_dim1 * 9]; + c__[j + c_dim1] -= sum * t1; + c__[j + (c_dim1 << 1)] -= sum * t2; + c__[j + c_dim1 * 3] -= sum * t3; + c__[j + (c_dim1 << 2)] -= sum * t4; + c__[j + c_dim1 * 5] -= sum * t5; + c__[j + c_dim1 * 6] -= sum * t6; + c__[j + c_dim1 * 7] -= sum * t7; + c__[j + (c_dim1 << 3)] -= sum * t8; + c__[j + c_dim1 * 9] -= sum * t9; +/* L380: */ + } + goto L410; +L390: + +/* Special code for 10 x 10 Householder */ + + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + v6 = v[6]; + t6 = *tau * v6; + v7 = v[7]; + t7 = *tau * v7; + v8 = v[8]; + t8 = *tau * v8; + v9 = v[9]; + t9 = *tau * v9; + v10 = v[10]; + t10 = *tau * v10; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 * + c__[j + c_dim1 * 3] + v4 * c__[j + (c_dim1 << 2)] + v5 * + c__[j + c_dim1 * 5] + v6 * c__[j + c_dim1 * 6] + v7 * c__[ + j + c_dim1 * 7] + v8 * c__[j + (c_dim1 << 3)] + v9 * c__[ + j + c_dim1 * 9] + v10 * c__[j + c_dim1 * 10]; + c__[j + c_dim1] -= sum * t1; + c__[j + (c_dim1 << 1)] -= sum * t2; + c__[j + c_dim1 * 3] -= sum * t3; + c__[j + (c_dim1 << 2)] -= sum * t4; + c__[j + c_dim1 * 5] -= sum * t5; + c__[j + c_dim1 * 6] -= sum * t6; + c__[j + c_dim1 * 7] -= sum * t7; + c__[j + (c_dim1 << 3)] -= sum * t8; + c__[j + c_dim1 * 9] -= sum * t9; + c__[j + c_dim1 * 10] -= sum * t10; +/* L400: */ + } + goto L410; + } +L410: + return 0; + +/* End of DLARFX */ + +} /* _starpu_dlarfx_ */ diff --git a/min-dgels/base/SRC/dlargv.c b/min-dgels/base/SRC/dlargv.c new file mode 100644 index 0000000..27b28f0 --- /dev/null +++ b/min-dgels/base/SRC/dlargv.c @@ -0,0 +1,130 @@ +/* dlargv.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dlargv_(integer *n, doublereal *x, integer *incx, + doublereal *y, integer *incy, doublereal *c__, integer *incc) +{ + /* System generated locals */ + integer i__1; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + doublereal f, g; + integer i__; + doublereal t; + integer ic, ix, iy; + doublereal tt; + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLARGV generates a vector of real plane rotations, determined by */ +/* elements of the real vectors x and y. For i = 1,2,...,n */ + +/* ( c(i) s(i) ) ( x(i) ) = ( a(i) ) */ +/* ( -s(i) c(i) ) ( y(i) ) = ( 0 ) */ + +/* Arguments */ +/* ========= */ + +/* N (input) INTEGER */ +/* The number of plane rotations to be generated. */ + +/* X (input/output) DOUBLE PRECISION array, */ +/* dimension (1+(N-1)*INCX) */ +/* On entry, the vector x. */ +/* On exit, x(i) is overwritten by a(i), for i = 1,...,n. */ + +/* INCX (input) INTEGER */ +/* The increment between elements of X. INCX > 0. */ + +/* Y (input/output) DOUBLE PRECISION array, */ +/* dimension (1+(N-1)*INCY) */ +/* On entry, the vector y. */ +/* On exit, the sines of the plane rotations. */ + +/* INCY (input) INTEGER */ +/* The increment between elements of Y. INCY > 0. */ + +/* C (output) DOUBLE PRECISION array, dimension (1+(N-1)*INCC) */ +/* The cosines of the plane rotations. */ + +/* INCC (input) INTEGER */ +/* The increment between elements of C. INCC > 0. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + --c__; + --y; + --x; + + /* Function Body */ + ix = 1; + iy = 1; + ic = 1; + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + f = x[ix]; + g = y[iy]; + if (g == 0.) { + c__[ic] = 1.; + } else if (f == 0.) { + c__[ic] = 0.; + y[iy] = 1.; + x[ix] = g; + } else if (abs(f) > abs(g)) { + t = g / f; + tt = sqrt(t * t + 1.); + c__[ic] = 1. / tt; + y[iy] = t * c__[ic]; + x[ix] = f * tt; + } else { + t = f / g; + tt = sqrt(t * t + 1.); + y[iy] = 1. / tt; + c__[ic] = t * y[iy]; + x[ix] = g * tt; + } + ic += *incc; + iy += *incy; + ix += *incx; +/* L10: */ + } + return 0; + +/* End of DLARGV */ + +} /* _starpu_dlargv_ */ diff --git a/min-dgels/base/SRC/dlarnv.c b/min-dgels/base/SRC/dlarnv.c new file mode 100644 index 0000000..4028236 --- /dev/null +++ b/min-dgels/base/SRC/dlarnv.c @@ -0,0 +1,146 @@ +/* dlarnv.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dlarnv_(integer *idist, integer *iseed, integer *n, + doublereal *x) +{ + /* System generated locals */ + integer i__1, i__2, i__3; + + /* Builtin functions */ + double log(doublereal), sqrt(doublereal), cos(doublereal); + + /* Local variables */ + integer i__; + doublereal u[128]; + integer il, iv, il2; + extern /* Subroutine */ int _starpu_dlaruv_(integer *, integer *, doublereal *); + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLARNV returns a vector of n random real numbers from a uniform or */ +/* normal distribution. */ + +/* Arguments */ +/* ========= */ + +/* IDIST (input) INTEGER */ +/* Specifies the distribution of the random numbers: */ +/* = 1: uniform (0,1) */ +/* = 2: uniform (-1,1) */ +/* = 3: normal (0,1) */ + +/* ISEED (input/output) INTEGER array, dimension (4) */ +/* On entry, the seed of the random number generator; the array */ +/* elements must be between 0 and 4095, and ISEED(4) must be */ +/* odd. */ +/* On exit, the seed is updated. */ + +/* N (input) INTEGER */ +/* The number of random numbers to be generated. */ + +/* X (output) DOUBLE PRECISION array, dimension (N) */ +/* The generated random numbers. */ + +/* Further Details */ +/* =============== */ + +/* This routine calls the auxiliary routine DLARUV to generate random */ +/* real numbers from a uniform (0,1) distribution, in batches of up to */ +/* 128 using vectorisable code. The Box-Muller method is used to */ +/* transform numbers from a uniform to a normal distribution. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + --x; + --iseed; + + /* Function Body */ + i__1 = *n; + for (iv = 1; iv <= i__1; iv += 64) { +/* Computing MIN */ + i__2 = 64, i__3 = *n - iv + 1; + il = min(i__2,i__3); + if (*idist == 3) { + il2 = il << 1; + } else { + il2 = il; + } + +/* Call DLARUV to generate IL2 numbers from a uniform (0,1) */ +/* distribution (IL2 <= LV) */ + + _starpu_dlaruv_(&iseed[1], &il2, u); + + if (*idist == 1) { + +/* Copy generated numbers */ + + i__2 = il; + for (i__ = 1; i__ <= i__2; ++i__) { + x[iv + i__ - 1] = u[i__ - 1]; +/* L10: */ + } + } else if (*idist == 2) { + +/* Convert generated numbers to uniform (-1,1) distribution */ + + i__2 = il; + for (i__ = 1; i__ <= i__2; ++i__) { + x[iv + i__ - 1] = u[i__ - 1] * 2. - 1.; +/* L20: */ + } + } else if (*idist == 3) { + +/* Convert generated numbers to normal (0,1) distribution */ + + i__2 = il; + for (i__ = 1; i__ <= i__2; ++i__) { + x[iv + i__ - 1] = sqrt(log(u[(i__ << 1) - 2]) * -2.) * cos(u[( + i__ << 1) - 1] * 6.2831853071795864769252867663); +/* L30: */ + } + } +/* L40: */ + } + return 0; + +/* End of DLARNV */ + +} /* _starpu_dlarnv_ */ diff --git a/min-dgels/base/SRC/dlarra.c b/min-dgels/base/SRC/dlarra.c new file mode 100644 index 0000000..ad10980 --- /dev/null +++ b/min-dgels/base/SRC/dlarra.c @@ -0,0 +1,156 @@ +/* dlarra.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dlarra_(integer *n, doublereal *d__, doublereal *e, + doublereal *e2, doublereal *spltol, doublereal *tnrm, integer *nsplit, + integer *isplit, integer *info) +{ + /* System generated locals */ + integer i__1; + doublereal d__1, d__2; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + integer i__; + doublereal tmp1, eabs; + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* Compute the splitting points with threshold SPLTOL. */ +/* DLARRA sets any "small" off-diagonal elements to zero. */ + +/* Arguments */ +/* ========= */ + +/* N (input) INTEGER */ +/* The order of the matrix. N > 0. */ + +/* D (input) DOUBLE PRECISION array, dimension (N) */ +/* On entry, the N diagonal elements of the tridiagonal */ +/* matrix T. */ + +/* E (input/output) DOUBLE PRECISION array, dimension (N) */ +/* On entry, the first (N-1) entries contain the subdiagonal */ +/* elements of the tridiagonal matrix T; E(N) need not be set. */ +/* On exit, the entries E( ISPLIT( I ) ), 1 <= I <= NSPLIT, */ +/* are set to zero, the other entries of E are untouched. */ + +/* E2 (input/output) DOUBLE PRECISION array, dimension (N) */ +/* On entry, the first (N-1) entries contain the SQUARES of the */ +/* subdiagonal elements of the tridiagonal matrix T; */ +/* E2(N) need not be set. */ +/* On exit, the entries E2( ISPLIT( I ) ), */ +/* 1 <= I <= NSPLIT, have been set to zero */ + +/* SPLTOL (input) DOUBLE PRECISION */ +/* The threshold for splitting. Two criteria can be used: */ +/* SPLTOL<0 : criterion based on absolute off-diagonal value */ +/* SPLTOL>0 : criterion that preserves relative accuracy */ + +/* TNRM (input) DOUBLE PRECISION */ +/* The norm of the matrix. */ + +/* NSPLIT (output) INTEGER */ +/* The number of blocks T splits into. 1 <= NSPLIT <= N. */ + +/* ISPLIT (output) INTEGER array, dimension (N) */ +/* The splitting points, at which T breaks up into blocks. */ +/* The first block consists of rows/columns 1 to ISPLIT(1), */ +/* the second of rows/columns ISPLIT(1)+1 through ISPLIT(2), */ +/* etc., and the NSPLIT-th consists of rows/columns */ +/* ISPLIT(NSPLIT-1)+1 through ISPLIT(NSPLIT)=N. */ + + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ + +/* Further Details */ +/* =============== */ + +/* Based on contributions by */ +/* Beresford Parlett, University of California, Berkeley, USA */ +/* Jim Demmel, University of California, Berkeley, USA */ +/* Inderjit Dhillon, University of Texas, Austin, USA */ +/* Osni Marques, LBNL/NERSC, USA */ +/* Christof Voemel, University of California, Berkeley, USA */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + --isplit; + --e2; + --e; + --d__; + + /* Function Body */ + *info = 0; +/* Compute splitting points */ + *nsplit = 1; + if (*spltol < 0.) { +/* Criterion based on absolute off-diagonal value */ + tmp1 = abs(*spltol) * *tnrm; + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + eabs = (d__1 = e[i__], abs(d__1)); + if (eabs <= tmp1) { + e[i__] = 0.; + e2[i__] = 0.; + isplit[*nsplit] = i__; + ++(*nsplit); + } +/* L9: */ + } + } else { +/* Criterion that guarantees relative accuracy */ + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + eabs = (d__1 = e[i__], abs(d__1)); + if (eabs <= *spltol * sqrt((d__1 = d__[i__], abs(d__1))) * sqrt(( + d__2 = d__[i__ + 1], abs(d__2)))) { + e[i__] = 0.; + e2[i__] = 0.; + isplit[*nsplit] = i__; + ++(*nsplit); + } +/* L10: */ + } + } + isplit[*nsplit] = *n; + return 0; + +/* End of DLARRA */ + +} /* _starpu_dlarra_ */ diff --git a/min-dgels/base/SRC/dlarrb.c b/min-dgels/base/SRC/dlarrb.c new file mode 100644 index 0000000..32a389e --- /dev/null +++ b/min-dgels/base/SRC/dlarrb.c @@ -0,0 +1,350 @@ +/* dlarrb.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dlarrb_(integer *n, doublereal *d__, doublereal *lld, + integer *ifirst, integer *ilast, doublereal *rtol1, doublereal *rtol2, + integer *offset, doublereal *w, doublereal *wgap, doublereal *werr, + doublereal *work, integer *iwork, doublereal *pivmin, doublereal * + spdiam, integer *twist, integer *info) +{ + /* System generated locals */ + integer i__1; + doublereal d__1, d__2; + + /* Builtin functions */ + double log(doublereal); + + /* Local variables */ + integer i__, k, r__, i1, ii, ip; + doublereal gap, mid, tmp, back, lgap, rgap, left; + integer iter, nint, prev, next; + doublereal cvrgd, right, width; + extern integer _starpu_dlaneg_(integer *, doublereal *, doublereal *, doublereal * +, doublereal *, integer *); + integer negcnt; + doublereal mnwdth; + integer olnint, maxitr; + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* Given the relatively robust representation(RRR) L D L^T, DLARRB */ +/* does "limited" bisection to refine the eigenvalues of L D L^T, */ +/* W( IFIRST-OFFSET ) through W( ILAST-OFFSET ), to more accuracy. Initial */ +/* guesses for these eigenvalues are input in W, the corresponding estimate */ +/* of the error in these guesses and their gaps are input in WERR */ +/* and WGAP, respectively. During bisection, intervals */ +/* [left, right] are maintained by storing their mid-points and */ +/* semi-widths in the arrays W and WERR respectively. */ + +/* Arguments */ +/* ========= */ + +/* N (input) INTEGER */ +/* The order of the matrix. */ + +/* D (input) DOUBLE PRECISION array, dimension (N) */ +/* The N diagonal elements of the diagonal matrix D. */ + +/* LLD (input) DOUBLE PRECISION array, dimension (N-1) */ +/* The (N-1) elements L(i)*L(i)*D(i). */ + +/* IFIRST (input) INTEGER */ +/* The index of the first eigenvalue to be computed. */ + +/* ILAST (input) INTEGER */ +/* The index of the last eigenvalue to be computed. */ + +/* RTOL1 (input) DOUBLE PRECISION */ +/* RTOL2 (input) DOUBLE PRECISION */ +/* Tolerance for the convergence of the bisection intervals. */ +/* An interval [LEFT,RIGHT] has converged if */ +/* RIGHT-LEFT.LT.MAX( RTOL1*GAP, RTOL2*MAX(|LEFT|,|RIGHT|) ) */ +/* where GAP is the (estimated) distance to the nearest */ +/* eigenvalue. */ + +/* OFFSET (input) INTEGER */ +/* Offset for the arrays W, WGAP and WERR, i.e., the IFIRST-OFFSET */ +/* through ILAST-OFFSET elements of these arrays are to be used. */ + +/* W (input/output) DOUBLE PRECISION array, dimension (N) */ +/* On input, W( IFIRST-OFFSET ) through W( ILAST-OFFSET ) are */ +/* estimates of the eigenvalues of L D L^T indexed IFIRST throug */ +/* ILAST. */ +/* On output, these estimates are refined. */ + +/* WGAP (input/output) DOUBLE PRECISION array, dimension (N-1) */ +/* On input, the (estimated) gaps between consecutive */ +/* eigenvalues of L D L^T, i.e., WGAP(I-OFFSET) is the gap between */ +/* eigenvalues I and I+1. Note that if IFIRST.EQ.ILAST */ +/* then WGAP(IFIRST-OFFSET) must be set to ZERO. */ +/* On output, these gaps are refined. */ + +/* WERR (input/output) DOUBLE PRECISION array, dimension (N) */ +/* On input, WERR( IFIRST-OFFSET ) through WERR( ILAST-OFFSET ) are */ +/* the errors in the estimates of the corresponding elements in W. */ +/* On output, these errors are refined. */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (2*N) */ +/* Workspace. */ + +/* IWORK (workspace) INTEGER array, dimension (2*N) */ +/* Workspace. */ + +/* PIVMIN (input) DOUBLE PRECISION */ +/* The minimum pivot in the Sturm sequence. */ + +/* SPDIAM (input) DOUBLE PRECISION */ +/* The spectral diameter of the matrix. */ + +/* TWIST (input) INTEGER */ +/* The twist index for the twisted factorization that is used */ +/* for the negcount. */ +/* TWIST = N: Compute negcount from L D L^T - LAMBDA I = L+ D+ L+^T */ +/* TWIST = 1: Compute negcount from L D L^T - LAMBDA I = U- D- U-^T */ +/* TWIST = R: Compute negcount from L D L^T - LAMBDA I = N(r) D(r) N(r) */ + +/* INFO (output) INTEGER */ +/* Error flag. */ + +/* Further Details */ +/* =============== */ + +/* Based on contributions by */ +/* Beresford Parlett, University of California, Berkeley, USA */ +/* Jim Demmel, University of California, Berkeley, USA */ +/* Inderjit Dhillon, University of Texas, Austin, USA */ +/* Osni Marques, LBNL/NERSC, USA */ +/* Christof Voemel, University of California, Berkeley, USA */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ + +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + --iwork; + --work; + --werr; + --wgap; + --w; + --lld; + --d__; + + /* Function Body */ + *info = 0; + + maxitr = (integer) ((log(*spdiam + *pivmin) - log(*pivmin)) / log(2.)) + + 2; + mnwdth = *pivmin * 2.; + + r__ = *twist; + if (r__ < 1 || r__ > *n) { + r__ = *n; + } + +/* Initialize unconverged intervals in [ WORK(2*I-1), WORK(2*I) ]. */ +/* The Sturm Count, Count( WORK(2*I-1) ) is arranged to be I-1, while */ +/* Count( WORK(2*I) ) is stored in IWORK( 2*I ). The integer IWORK( 2*I-1 ) */ +/* for an unconverged interval is set to the index of the next unconverged */ +/* interval, and is -1 or 0 for a converged interval. Thus a linked */ +/* list of unconverged intervals is set up. */ + + i1 = *ifirst; +/* The number of unconverged intervals */ + nint = 0; +/* The last unconverged interval found */ + prev = 0; + rgap = wgap[i1 - *offset]; + i__1 = *ilast; + for (i__ = i1; i__ <= i__1; ++i__) { + k = i__ << 1; + ii = i__ - *offset; + left = w[ii] - werr[ii]; + right = w[ii] + werr[ii]; + lgap = rgap; + rgap = wgap[ii]; + gap = min(lgap,rgap); +/* Make sure that [LEFT,RIGHT] contains the desired eigenvalue */ +/* Compute negcount from dstqds facto L+D+L+^T = L D L^T - LEFT */ + +/* Do while( NEGCNT(LEFT).GT.I-1 ) */ + + back = werr[ii]; +L20: + negcnt = _starpu_dlaneg_(n, &d__[1], &lld[1], &left, pivmin, &r__); + if (negcnt > i__ - 1) { + left -= back; + back *= 2.; + goto L20; + } + +/* Do while( NEGCNT(RIGHT).LT.I ) */ +/* Compute negcount from dstqds facto L+D+L+^T = L D L^T - RIGHT */ + + back = werr[ii]; +L50: + negcnt = _starpu_dlaneg_(n, &d__[1], &lld[1], &right, pivmin, &r__); + if (negcnt < i__) { + right += back; + back *= 2.; + goto L50; + } + width = (d__1 = left - right, abs(d__1)) * .5; +/* Computing MAX */ + d__1 = abs(left), d__2 = abs(right); + tmp = max(d__1,d__2); +/* Computing MAX */ + d__1 = *rtol1 * gap, d__2 = *rtol2 * tmp; + cvrgd = max(d__1,d__2); + if (width <= cvrgd || width <= mnwdth) { +/* This interval has already converged and does not need refinement. */ +/* (Note that the gaps might change through refining the */ +/* eigenvalues, however, they can only get bigger.) */ +/* Remove it from the list. */ + iwork[k - 1] = -1; +/* Make sure that I1 always points to the first unconverged interval */ + if (i__ == i1 && i__ < *ilast) { + i1 = i__ + 1; + } + if (prev >= i1 && i__ <= *ilast) { + iwork[(prev << 1) - 1] = i__ + 1; + } + } else { +/* unconverged interval found */ + prev = i__; + ++nint; + iwork[k - 1] = i__ + 1; + iwork[k] = negcnt; + } + work[k - 1] = left; + work[k] = right; +/* L75: */ + } + +/* Do while( NINT.GT.0 ), i.e. there are still unconverged intervals */ +/* and while (ITER.LT.MAXITR) */ + + iter = 0; +L80: + prev = i1 - 1; + i__ = i1; + olnint = nint; + i__1 = olnint; + for (ip = 1; ip <= i__1; ++ip) { + k = i__ << 1; + ii = i__ - *offset; + rgap = wgap[ii]; + lgap = rgap; + if (ii > 1) { + lgap = wgap[ii - 1]; + } + gap = min(lgap,rgap); + next = iwork[k - 1]; + left = work[k - 1]; + right = work[k]; + mid = (left + right) * .5; +/* semiwidth of interval */ + width = right - mid; +/* Computing MAX */ + d__1 = abs(left), d__2 = abs(right); + tmp = max(d__1,d__2); +/* Computing MAX */ + d__1 = *rtol1 * gap, d__2 = *rtol2 * tmp; + cvrgd = max(d__1,d__2); + if (width <= cvrgd || width <= mnwdth || iter == maxitr) { +/* reduce number of unconverged intervals */ + --nint; +/* Mark interval as converged. */ + iwork[k - 1] = 0; + if (i1 == i__) { + i1 = next; + } else { +/* Prev holds the last unconverged interval previously examined */ + if (prev >= i1) { + iwork[(prev << 1) - 1] = next; + } + } + i__ = next; + goto L100; + } + prev = i__; + +/* Perform one bisection step */ + + negcnt = _starpu_dlaneg_(n, &d__[1], &lld[1], &mid, pivmin, &r__); + if (negcnt <= i__ - 1) { + work[k - 1] = mid; + } else { + work[k] = mid; + } + i__ = next; +L100: + ; + } + ++iter; +/* do another loop if there are still unconverged intervals */ +/* However, in the last iteration, all intervals are accepted */ +/* since this is the best we can do. */ + if (nint > 0 && iter <= maxitr) { + goto L80; + } + + +/* At this point, all the intervals have converged */ + i__1 = *ilast; + for (i__ = *ifirst; i__ <= i__1; ++i__) { + k = i__ << 1; + ii = i__ - *offset; +/* All intervals marked by '0' have been refined. */ + if (iwork[k - 1] == 0) { + w[ii] = (work[k - 1] + work[k]) * .5; + werr[ii] = work[k] - w[ii]; + } +/* L110: */ + } + + i__1 = *ilast; + for (i__ = *ifirst + 1; i__ <= i__1; ++i__) { + k = i__ << 1; + ii = i__ - *offset; +/* Computing MAX */ + d__1 = 0., d__2 = w[ii] - werr[ii] - w[ii - 1] - werr[ii - 1]; + wgap[ii - 1] = max(d__1,d__2); +/* L111: */ + } + return 0; + +/* End of DLARRB */ + +} /* _starpu_dlarrb_ */ diff --git a/min-dgels/base/SRC/dlarrc.c b/min-dgels/base/SRC/dlarrc.c new file mode 100644 index 0000000..d89733f --- /dev/null +++ b/min-dgels/base/SRC/dlarrc.c @@ -0,0 +1,183 @@ +/* dlarrc.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dlarrc_(char *jobt, integer *n, doublereal *vl, + doublereal *vu, doublereal *d__, doublereal *e, doublereal *pivmin, + integer *eigcnt, integer *lcnt, integer *rcnt, integer *info) +{ + /* System generated locals */ + integer i__1; + doublereal d__1; + + /* Local variables */ + integer i__; + doublereal sl, su, tmp, tmp2; + logical matt; + extern logical _starpu_lsame_(char *, char *); + doublereal lpivot, rpivot; + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* Find the number of eigenvalues of the symmetric tridiagonal matrix T */ +/* that are in the interval (VL,VU] if JOBT = 'T', and of L D L^T */ +/* if JOBT = 'L'. */ + +/* Arguments */ +/* ========= */ + +/* JOBT (input) CHARACTER*1 */ +/* = 'T': Compute Sturm count for matrix T. */ +/* = 'L': Compute Sturm count for matrix L D L^T. */ + +/* N (input) INTEGER */ +/* The order of the matrix. N > 0. */ + +/* VL (input) DOUBLE PRECISION */ +/* VU (input) DOUBLE PRECISION */ +/* The lower and upper bounds for the eigenvalues. */ + +/* D (input) DOUBLE PRECISION array, dimension (N) */ +/* JOBT = 'T': The N diagonal elements of the tridiagonal matrix T. */ +/* JOBT = 'L': The N diagonal elements of the diagonal matrix D. */ + +/* E (input) DOUBLE PRECISION array, dimension (N) */ +/* JOBT = 'T': The N-1 offdiagonal elements of the matrix T. */ +/* JOBT = 'L': The N-1 offdiagonal elements of the matrix L. */ + +/* PIVMIN (input) DOUBLE PRECISION */ +/* The minimum pivot in the Sturm sequence for T. */ + +/* EIGCNT (output) INTEGER */ +/* The number of eigenvalues of the symmetric tridiagonal matrix T */ +/* that are in the interval (VL,VU] */ + +/* LCNT (output) INTEGER */ +/* RCNT (output) INTEGER */ +/* The left and right negcounts of the interval. */ + +/* INFO (output) INTEGER */ + +/* Further Details */ +/* =============== */ + +/* Based on contributions by */ +/* Beresford Parlett, University of California, Berkeley, USA */ +/* Jim Demmel, University of California, Berkeley, USA */ +/* Inderjit Dhillon, University of Texas, Austin, USA */ +/* Osni Marques, LBNL/NERSC, USA */ +/* Christof Voemel, University of California, Berkeley, USA */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + --e; + --d__; + + /* Function Body */ + *info = 0; + *lcnt = 0; + *rcnt = 0; + *eigcnt = 0; + matt = _starpu_lsame_(jobt, "T"); + if (matt) { +/* Sturm sequence count on T */ + lpivot = d__[1] - *vl; + rpivot = d__[1] - *vu; + if (lpivot <= 0.) { + ++(*lcnt); + } + if (rpivot <= 0.) { + ++(*rcnt); + } + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing 2nd power */ + d__1 = e[i__]; + tmp = d__1 * d__1; + lpivot = d__[i__ + 1] - *vl - tmp / lpivot; + rpivot = d__[i__ + 1] - *vu - tmp / rpivot; + if (lpivot <= 0.) { + ++(*lcnt); + } + if (rpivot <= 0.) { + ++(*rcnt); + } +/* L10: */ + } + } else { +/* Sturm sequence count on L D L^T */ + sl = -(*vl); + su = -(*vu); + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + lpivot = d__[i__] + sl; + rpivot = d__[i__] + su; + if (lpivot <= 0.) { + ++(*lcnt); + } + if (rpivot <= 0.) { + ++(*rcnt); + } + tmp = e[i__] * d__[i__] * e[i__]; + + tmp2 = tmp / lpivot; + if (tmp2 == 0.) { + sl = tmp - *vl; + } else { + sl = sl * tmp2 - *vl; + } + + tmp2 = tmp / rpivot; + if (tmp2 == 0.) { + su = tmp - *vu; + } else { + su = su * tmp2 - *vu; + } +/* L20: */ + } + lpivot = d__[*n] + sl; + rpivot = d__[*n] + su; + if (lpivot <= 0.) { + ++(*lcnt); + } + if (rpivot <= 0.) { + ++(*rcnt); + } + } + *eigcnt = *rcnt - *lcnt; + return 0; + +/* end of DLARRC */ + +} /* _starpu_dlarrc_ */ diff --git a/min-dgels/base/SRC/dlarrd.c b/min-dgels/base/SRC/dlarrd.c new file mode 100644 index 0000000..03659d6 --- /dev/null +++ b/min-dgels/base/SRC/dlarrd.c @@ -0,0 +1,793 @@ +/* dlarrd.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; +static integer c__3 = 3; +static integer c__2 = 2; +static integer c__0 = 0; + +/* Subroutine */ int _starpu_dlarrd_(char *range, char *order, integer *n, doublereal + *vl, doublereal *vu, integer *il, integer *iu, doublereal *gers, + doublereal *reltol, doublereal *d__, doublereal *e, doublereal *e2, + doublereal *pivmin, integer *nsplit, integer *isplit, integer *m, + doublereal *w, doublereal *werr, doublereal *wl, doublereal *wu, + integer *iblock, integer *indexw, doublereal *work, integer *iwork, + integer *info) +{ + /* System generated locals */ + integer i__1, i__2, i__3; + doublereal d__1, d__2; + + /* Builtin functions */ + double log(doublereal); + + /* Local variables */ + integer i__, j, ib, ie, je, nb; + doublereal gl; + integer im, in; + doublereal gu; + integer iw, jee; + doublereal eps; + integer nwl; + doublereal wlu, wul; + integer nwu; + doublereal tmp1, tmp2; + integer iend, jblk, ioff, iout, itmp1, itmp2, jdisc; + extern logical _starpu_lsame_(char *, char *); + integer iinfo; + doublereal atoli; + integer iwoff, itmax; + doublereal wkill, rtoli, uflow, tnorm; + extern doublereal _starpu_dlamch_(char *); + integer ibegin; + extern /* Subroutine */ int _starpu_dlaebz_(integer *, integer *, integer *, + integer *, integer *, integer *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *); + integer irange, idiscl, idumma[1]; + extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *); + integer idiscu; + logical ncnvrg, toofew; + + +/* -- LAPACK auxiliary routine (version 3.2.1) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* -- April 2009 -- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLARRD computes the eigenvalues of a symmetric tridiagonal */ +/* matrix T to suitable accuracy. This is an auxiliary code to be */ +/* called from DSTEMR. */ +/* The user may ask for all eigenvalues, all eigenvalues */ +/* in the half-open interval (VL, VU], or the IL-th through IU-th */ +/* eigenvalues. */ + +/* To avoid overflow, the matrix must be scaled so that its */ +/* largest element is no greater than overflow**(1/2) * */ +/* underflow**(1/4) in absolute value, and for greatest */ +/* accuracy, it should not be much smaller than that. */ + +/* See W. Kahan "Accurate Eigenvalues of a Symmetric Tridiagonal */ +/* Matrix", Report CS41, Computer Science Dept., Stanford */ +/* University, July 21, 1966. */ + +/* Arguments */ +/* ========= */ + +/* RANGE (input) CHARACTER */ +/* = 'A': ("All") all eigenvalues will be found. */ +/* = 'V': ("Value") all eigenvalues in the half-open interval */ +/* (VL, VU] will be found. */ +/* = 'I': ("Index") the IL-th through IU-th eigenvalues (of the */ +/* entire matrix) will be found. */ + +/* ORDER (input) CHARACTER */ +/* = 'B': ("By Block") the eigenvalues will be grouped by */ +/* split-off block (see IBLOCK, ISPLIT) and */ +/* ordered from smallest to largest within */ +/* the block. */ +/* = 'E': ("Entire matrix") */ +/* the eigenvalues for the entire matrix */ +/* will be ordered from smallest to */ +/* largest. */ + +/* N (input) INTEGER */ +/* The order of the tridiagonal matrix T. N >= 0. */ + +/* VL (input) DOUBLE PRECISION */ +/* VU (input) DOUBLE PRECISION */ +/* If RANGE='V', the lower and upper bounds of the interval to */ +/* be searched for eigenvalues. Eigenvalues less than or equal */ +/* to VL, or greater than VU, will not be returned. VL < VU. */ +/* Not referenced if RANGE = 'A' or 'I'. */ + +/* IL (input) INTEGER */ +/* IU (input) INTEGER */ +/* If RANGE='I', the indices (in ascending order) of the */ +/* smallest and largest eigenvalues to be returned. */ +/* 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. */ +/* Not referenced if RANGE = 'A' or 'V'. */ + +/* GERS (input) DOUBLE PRECISION array, dimension (2*N) */ +/* The N Gerschgorin intervals (the i-th Gerschgorin interval */ +/* is (GERS(2*i-1), GERS(2*i)). */ + +/* RELTOL (input) DOUBLE PRECISION */ +/* The minimum relative width of an interval. When an interval */ +/* is narrower than RELTOL times the larger (in */ +/* magnitude) endpoint, then it is considered to be */ +/* sufficiently small, i.e., converged. Note: this should */ +/* always be at least radix*machine epsilon. */ + +/* D (input) DOUBLE PRECISION array, dimension (N) */ +/* The n diagonal elements of the tridiagonal matrix T. */ + +/* E (input) DOUBLE PRECISION array, dimension (N-1) */ +/* The (n-1) off-diagonal elements of the tridiagonal matrix T. */ + +/* E2 (input) DOUBLE PRECISION array, dimension (N-1) */ +/* The (n-1) squared off-diagonal elements of the tridiagonal matrix T. */ + +/* PIVMIN (input) DOUBLE PRECISION */ +/* The minimum pivot allowed in the Sturm sequence for T. */ + +/* NSPLIT (input) INTEGER */ +/* The number of diagonal blocks in the matrix T. */ +/* 1 <= NSPLIT <= N. */ + +/* ISPLIT (input) INTEGER array, dimension (N) */ +/* The splitting points, at which T breaks up into submatrices. */ +/* The first submatrix consists of rows/columns 1 to ISPLIT(1), */ +/* the second of rows/columns ISPLIT(1)+1 through ISPLIT(2), */ +/* etc., and the NSPLIT-th consists of rows/columns */ +/* ISPLIT(NSPLIT-1)+1 through ISPLIT(NSPLIT)=N. */ +/* (Only the first NSPLIT elements will actually be used, but */ +/* since the user cannot know a priori what value NSPLIT will */ +/* have, N words must be reserved for ISPLIT.) */ + +/* M (output) INTEGER */ +/* The actual number of eigenvalues found. 0 <= M <= N. */ +/* (See also the description of INFO=2,3.) */ + +/* W (output) DOUBLE PRECISION array, dimension (N) */ +/* On exit, the first M elements of W will contain the */ +/* eigenvalue approximations. DLARRD computes an interval */ +/* I_j = (a_j, b_j] that includes eigenvalue j. The eigenvalue */ +/* approximation is given as the interval midpoint */ +/* W(j)= ( a_j + b_j)/2. The corresponding error is bounded by */ +/* WERR(j) = abs( a_j - b_j)/2 */ + +/* WERR (output) DOUBLE PRECISION array, dimension (N) */ +/* The error bound on the corresponding eigenvalue approximation */ +/* in W. */ + +/* WL (output) DOUBLE PRECISION */ +/* WU (output) DOUBLE PRECISION */ +/* The interval (WL, WU] contains all the wanted eigenvalues. */ +/* If RANGE='V', then WL=VL and WU=VU. */ +/* If RANGE='A', then WL and WU are the global Gerschgorin bounds */ +/* on the spectrum. */ +/* If RANGE='I', then WL and WU are computed by DLAEBZ from the */ +/* index range specified. */ + +/* IBLOCK (output) INTEGER array, dimension (N) */ +/* At each row/column j where E(j) is zero or small, the */ +/* matrix T is considered to split into a block diagonal */ +/* matrix. On exit, if INFO = 0, IBLOCK(i) specifies to which */ +/* block (from 1 to the number of blocks) the eigenvalue W(i) */ +/* belongs. (DLARRD may use the remaining N-M elements as */ +/* workspace.) */ + +/* INDEXW (output) INTEGER array, dimension (N) */ +/* The indices of the eigenvalues within each block (submatrix); */ +/* for example, INDEXW(i)= j and IBLOCK(i)=k imply that the */ +/* i-th eigenvalue W(i) is the j-th eigenvalue in block k. */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (4*N) */ + +/* IWORK (workspace) INTEGER array, dimension (3*N) */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > 0: some or all of the eigenvalues failed to converge or */ +/* were not computed: */ +/* =1 or 3: Bisection failed to converge for some */ +/* eigenvalues; these eigenvalues are flagged by a */ +/* negative block number. The effect is that the */ +/* eigenvalues may not be as accurate as the */ +/* absolute and relative tolerances. This is */ +/* generally caused by unexpectedly inaccurate */ +/* arithmetic. */ +/* =2 or 3: RANGE='I' only: Not all of the eigenvalues */ +/* IL:IU were found. */ +/* Effect: M < IU+1-IL */ +/* Cause: non-monotonic arithmetic, causing the */ +/* Sturm sequence to be non-monotonic. */ +/* Cure: recalculate, using RANGE='A', and pick */ +/* out eigenvalues IL:IU. In some cases, */ +/* increasing the PARAMETER "FUDGE" may */ +/* make things work. */ +/* = 4: RANGE='I', and the Gershgorin interval */ +/* initially used was too small. No eigenvalues */ +/* were computed. */ +/* Probable cause: your machine has sloppy */ +/* floating-point arithmetic. */ +/* Cure: Increase the PARAMETER "FUDGE", */ +/* recompile, and try again. */ + +/* Internal Parameters */ +/* =================== */ + +/* FUDGE DOUBLE PRECISION, default = 2 */ +/* A "fudge factor" to widen the Gershgorin intervals. Ideally, */ +/* a value of 1 should work, but on machines with sloppy */ +/* arithmetic, this needs to be larger. The default for */ +/* publicly released versions should be large enough to handle */ +/* the worst machine around. Note that this has no effect */ +/* on accuracy of the solution. */ + +/* Based on contributions by */ +/* W. Kahan, University of California, Berkeley, USA */ +/* Beresford Parlett, University of California, Berkeley, USA */ +/* Jim Demmel, University of California, Berkeley, USA */ +/* Inderjit Dhillon, University of Texas, Austin, USA */ +/* Osni Marques, LBNL/NERSC, USA */ +/* Christof Voemel, University of California, Berkeley, USA */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + --iwork; + --work; + --indexw; + --iblock; + --werr; + --w; + --isplit; + --e2; + --e; + --d__; + --gers; + + /* Function Body */ + *info = 0; + +/* Decode RANGE */ + + if (_starpu_lsame_(range, "A")) { + irange = 1; + } else if (_starpu_lsame_(range, "V")) { + irange = 2; + } else if (_starpu_lsame_(range, "I")) { + irange = 3; + } else { + irange = 0; + } + +/* Check for Errors */ + + if (irange <= 0) { + *info = -1; + } else if (! (_starpu_lsame_(order, "B") || _starpu_lsame_(order, + "E"))) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (irange == 2) { + if (*vl >= *vu) { + *info = -5; + } + } else if (irange == 3 && (*il < 1 || *il > max(1,*n))) { + *info = -6; + } else if (irange == 3 && (*iu < min(*n,*il) || *iu > *n)) { + *info = -7; + } + + if (*info != 0) { + return 0; + } +/* Initialize error flags */ + *info = 0; + ncnvrg = FALSE_; + toofew = FALSE_; +/* Quick return if possible */ + *m = 0; + if (*n == 0) { + return 0; + } +/* Simplification: */ + if (irange == 3 && *il == 1 && *iu == *n) { + irange = 1; + } +/* Get machine constants */ + eps = _starpu_dlamch_("P"); + uflow = _starpu_dlamch_("U"); +/* Special Case when N=1 */ +/* Treat case of 1x1 matrix for quick return */ + if (*n == 1) { + if (irange == 1 || irange == 2 && d__[1] > *vl && d__[1] <= *vu || + irange == 3 && *il == 1 && *iu == 1) { + *m = 1; + w[1] = d__[1]; +/* The computation error of the eigenvalue is zero */ + werr[1] = 0.; + iblock[1] = 1; + indexw[1] = 1; + } + return 0; + } +/* NB is the minimum vector length for vector bisection, or 0 */ +/* if only scalar is to be done. */ + nb = _starpu_ilaenv_(&c__1, "DSTEBZ", " ", n, &c_n1, &c_n1, &c_n1); + if (nb <= 1) { + nb = 0; + } +/* Find global spectral radius */ + gl = d__[1]; + gu = d__[1]; + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing MIN */ + d__1 = gl, d__2 = gers[(i__ << 1) - 1]; + gl = min(d__1,d__2); +/* Computing MAX */ + d__1 = gu, d__2 = gers[i__ * 2]; + gu = max(d__1,d__2); +/* L5: */ + } +/* Compute global Gerschgorin bounds and spectral diameter */ +/* Computing MAX */ + d__1 = abs(gl), d__2 = abs(gu); + tnorm = max(d__1,d__2); + gl = gl - tnorm * 2. * eps * *n - *pivmin * 4.; + gu = gu + tnorm * 2. * eps * *n + *pivmin * 4.; +/* [JAN/28/2009] remove the line below since SPDIAM variable not use */ +/* SPDIAM = GU - GL */ +/* Input arguments for DLAEBZ: */ +/* The relative tolerance. An interval (a,b] lies within */ +/* "relative tolerance" if b-a < RELTOL*max(|a|,|b|), */ + rtoli = *reltol; +/* Set the absolute tolerance for interval convergence to zero to force */ +/* interval convergence based on relative size of the interval. */ +/* This is dangerous because intervals might not converge when RELTOL is */ +/* small. But at least a very small number should be selected so that for */ +/* strongly graded matrices, the code can get relatively accurate */ +/* eigenvalues. */ + atoli = uflow * 4. + *pivmin * 4.; + if (irange == 3) { +/* RANGE='I': Compute an interval containing eigenvalues */ +/* IL through IU. The initial interval [GL,GU] from the global */ +/* Gerschgorin bounds GL and GU is refined by DLAEBZ. */ + itmax = (integer) ((log(tnorm + *pivmin) - log(*pivmin)) / log(2.)) + + 2; + work[*n + 1] = gl; + work[*n + 2] = gl; + work[*n + 3] = gu; + work[*n + 4] = gu; + work[*n + 5] = gl; + work[*n + 6] = gu; + iwork[1] = -1; + iwork[2] = -1; + iwork[3] = *n + 1; + iwork[4] = *n + 1; + iwork[5] = *il - 1; + iwork[6] = *iu; + + _starpu_dlaebz_(&c__3, &itmax, n, &c__2, &c__2, &nb, &atoli, &rtoli, pivmin, & + d__[1], &e[1], &e2[1], &iwork[5], &work[*n + 1], &work[*n + 5] +, &iout, &iwork[1], &w[1], &iblock[1], &iinfo); + if (iinfo != 0) { + *info = iinfo; + return 0; + } +/* On exit, output intervals may not be ordered by ascending negcount */ + if (iwork[6] == *iu) { + *wl = work[*n + 1]; + wlu = work[*n + 3]; + nwl = iwork[1]; + *wu = work[*n + 4]; + wul = work[*n + 2]; + nwu = iwork[4]; + } else { + *wl = work[*n + 2]; + wlu = work[*n + 4]; + nwl = iwork[2]; + *wu = work[*n + 3]; + wul = work[*n + 1]; + nwu = iwork[3]; + } +/* On exit, the interval [WL, WLU] contains a value with negcount NWL, */ +/* and [WUL, WU] contains a value with negcount NWU. */ + if (nwl < 0 || nwl >= *n || nwu < 1 || nwu > *n) { + *info = 4; + return 0; + } + } else if (irange == 2) { + *wl = *vl; + *wu = *vu; + } else if (irange == 1) { + *wl = gl; + *wu = gu; + } +/* Find Eigenvalues -- Loop Over blocks and recompute NWL and NWU. */ +/* NWL accumulates the number of eigenvalues .le. WL, */ +/* NWU accumulates the number of eigenvalues .le. WU */ + *m = 0; + iend = 0; + *info = 0; + nwl = 0; + nwu = 0; + + i__1 = *nsplit; + for (jblk = 1; jblk <= i__1; ++jblk) { + ioff = iend; + ibegin = ioff + 1; + iend = isplit[jblk]; + in = iend - ioff; + + if (in == 1) { +/* 1x1 block */ + if (*wl >= d__[ibegin] - *pivmin) { + ++nwl; + } + if (*wu >= d__[ibegin] - *pivmin) { + ++nwu; + } + if (irange == 1 || *wl < d__[ibegin] - *pivmin && *wu >= d__[ + ibegin] - *pivmin) { + ++(*m); + w[*m] = d__[ibegin]; + werr[*m] = 0.; +/* The gap for a single block doesn't matter for the later */ +/* algorithm and is assigned an arbitrary large value */ + iblock[*m] = jblk; + indexw[*m] = 1; + } +/* Disabled 2x2 case because of a failure on the following matrix */ +/* RANGE = 'I', IL = IU = 4 */ +/* Original Tridiagonal, d = [ */ +/* -0.150102010615740E+00 */ +/* -0.849897989384260E+00 */ +/* -0.128208148052635E-15 */ +/* 0.128257718286320E-15 */ +/* ]; */ +/* e = [ */ +/* -0.357171383266986E+00 */ +/* -0.180411241501588E-15 */ +/* -0.175152352710251E-15 */ +/* ]; */ + +/* ELSE IF( IN.EQ.2 ) THEN */ +/* * 2x2 block */ +/* DISC = SQRT( (HALF*(D(IBEGIN)-D(IEND)))**2 + E(IBEGIN)**2 ) */ +/* TMP1 = HALF*(D(IBEGIN)+D(IEND)) */ +/* L1 = TMP1 - DISC */ +/* IF( WL.GE. L1-PIVMIN ) */ +/* $ NWL = NWL + 1 */ +/* IF( WU.GE. L1-PIVMIN ) */ +/* $ NWU = NWU + 1 */ +/* IF( IRANGE.EQ.ALLRNG .OR. ( WL.LT.L1-PIVMIN .AND. WU.GE. */ +/* $ L1-PIVMIN ) ) THEN */ +/* M = M + 1 */ +/* W( M ) = L1 */ +/* * The uncertainty of eigenvalues of a 2x2 matrix is very small */ +/* WERR( M ) = EPS * ABS( W( M ) ) * TWO */ +/* IBLOCK( M ) = JBLK */ +/* INDEXW( M ) = 1 */ +/* ENDIF */ +/* L2 = TMP1 + DISC */ +/* IF( WL.GE. L2-PIVMIN ) */ +/* $ NWL = NWL + 1 */ +/* IF( WU.GE. L2-PIVMIN ) */ +/* $ NWU = NWU + 1 */ +/* IF( IRANGE.EQ.ALLRNG .OR. ( WL.LT.L2-PIVMIN .AND. WU.GE. */ +/* $ L2-PIVMIN ) ) THEN */ +/* M = M + 1 */ +/* W( M ) = L2 */ +/* * The uncertainty of eigenvalues of a 2x2 matrix is very small */ +/* WERR( M ) = EPS * ABS( W( M ) ) * TWO */ +/* IBLOCK( M ) = JBLK */ +/* INDEXW( M ) = 2 */ +/* ENDIF */ + } else { +/* General Case - block of size IN >= 2 */ +/* Compute local Gerschgorin interval and use it as the initial */ +/* interval for DLAEBZ */ + gu = d__[ibegin]; + gl = d__[ibegin]; + tmp1 = 0.; + i__2 = iend; + for (j = ibegin; j <= i__2; ++j) { +/* Computing MIN */ + d__1 = gl, d__2 = gers[(j << 1) - 1]; + gl = min(d__1,d__2); +/* Computing MAX */ + d__1 = gu, d__2 = gers[j * 2]; + gu = max(d__1,d__2); +/* L40: */ + } +/* [JAN/28/2009] */ +/* change SPDIAM by TNORM in lines 2 and 3 thereafter */ +/* line 1: remove computation of SPDIAM (not useful anymore) */ +/* SPDIAM = GU - GL */ +/* GL = GL - FUDGE*SPDIAM*EPS*IN - FUDGE*PIVMIN */ +/* GU = GU + FUDGE*SPDIAM*EPS*IN + FUDGE*PIVMIN */ + gl = gl - tnorm * 2. * eps * in - *pivmin * 2.; + gu = gu + tnorm * 2. * eps * in + *pivmin * 2.; + + if (irange > 1) { + if (gu < *wl) { +/* the local block contains none of the wanted eigenvalues */ + nwl += in; + nwu += in; + goto L70; + } +/* refine search interval if possible, only range (WL,WU] matters */ + gl = max(gl,*wl); + gu = min(gu,*wu); + if (gl >= gu) { + goto L70; + } + } +/* Find negcount of initial interval boundaries GL and GU */ + work[*n + 1] = gl; + work[*n + in + 1] = gu; + _starpu_dlaebz_(&c__1, &c__0, &in, &in, &c__1, &nb, &atoli, &rtoli, + pivmin, &d__[ibegin], &e[ibegin], &e2[ibegin], idumma, & + work[*n + 1], &work[*n + (in << 1) + 1], &im, &iwork[1], & + w[*m + 1], &iblock[*m + 1], &iinfo); + if (iinfo != 0) { + *info = iinfo; + return 0; + } + + nwl += iwork[1]; + nwu += iwork[in + 1]; + iwoff = *m - iwork[1]; +/* Compute Eigenvalues */ + itmax = (integer) ((log(gu - gl + *pivmin) - log(*pivmin)) / log( + 2.)) + 2; + _starpu_dlaebz_(&c__2, &itmax, &in, &in, &c__1, &nb, &atoli, &rtoli, + pivmin, &d__[ibegin], &e[ibegin], &e2[ibegin], idumma, & + work[*n + 1], &work[*n + (in << 1) + 1], &iout, &iwork[1], + &w[*m + 1], &iblock[*m + 1], &iinfo); + if (iinfo != 0) { + *info = iinfo; + return 0; + } + +/* Copy eigenvalues into W and IBLOCK */ +/* Use -JBLK for block number for unconverged eigenvalues. */ +/* Loop over the number of output intervals from DLAEBZ */ + i__2 = iout; + for (j = 1; j <= i__2; ++j) { +/* eigenvalue approximation is middle point of interval */ + tmp1 = (work[j + *n] + work[j + in + *n]) * .5; +/* semi length of error interval */ + tmp2 = (d__1 = work[j + *n] - work[j + in + *n], abs(d__1)) * + .5; + if (j > iout - iinfo) { +/* Flag non-convergence. */ + ncnvrg = TRUE_; + ib = -jblk; + } else { + ib = jblk; + } + i__3 = iwork[j + in] + iwoff; + for (je = iwork[j] + 1 + iwoff; je <= i__3; ++je) { + w[je] = tmp1; + werr[je] = tmp2; + indexw[je] = je - iwoff; + iblock[je] = ib; +/* L50: */ + } +/* L60: */ + } + + *m += im; + } +L70: + ; + } +/* If RANGE='I', then (WL,WU) contains eigenvalues NWL+1,...,NWU */ +/* If NWL+1 < IL or NWU > IU, discard extra eigenvalues. */ + if (irange == 3) { + idiscl = *il - 1 - nwl; + idiscu = nwu - *iu; + + if (idiscl > 0) { + im = 0; + i__1 = *m; + for (je = 1; je <= i__1; ++je) { +/* Remove some of the smallest eigenvalues from the left so that */ +/* at the end IDISCL =0. Move all eigenvalues up to the left. */ + if (w[je] <= wlu && idiscl > 0) { + --idiscl; + } else { + ++im; + w[im] = w[je]; + werr[im] = werr[je]; + indexw[im] = indexw[je]; + iblock[im] = iblock[je]; + } +/* L80: */ + } + *m = im; + } + if (idiscu > 0) { +/* Remove some of the largest eigenvalues from the right so that */ +/* at the end IDISCU =0. Move all eigenvalues up to the left. */ + im = *m + 1; + for (je = *m; je >= 1; --je) { + if (w[je] >= wul && idiscu > 0) { + --idiscu; + } else { + --im; + w[im] = w[je]; + werr[im] = werr[je]; + indexw[im] = indexw[je]; + iblock[im] = iblock[je]; + } +/* L81: */ + } + jee = 0; + i__1 = *m; + for (je = im; je <= i__1; ++je) { + ++jee; + w[jee] = w[je]; + werr[jee] = werr[je]; + indexw[jee] = indexw[je]; + iblock[jee] = iblock[je]; +/* L82: */ + } + *m = *m - im + 1; + } + if (idiscl > 0 || idiscu > 0) { +/* Code to deal with effects of bad arithmetic. (If N(w) is */ +/* monotone non-decreasing, this should never happen.) */ +/* Some low eigenvalues to be discarded are not in (WL,WLU], */ +/* or high eigenvalues to be discarded are not in (WUL,WU] */ +/* so just kill off the smallest IDISCL/largest IDISCU */ +/* eigenvalues, by marking the corresponding IBLOCK = 0 */ + if (idiscl > 0) { + wkill = *wu; + i__1 = idiscl; + for (jdisc = 1; jdisc <= i__1; ++jdisc) { + iw = 0; + i__2 = *m; + for (je = 1; je <= i__2; ++je) { + if (iblock[je] != 0 && (w[je] < wkill || iw == 0)) { + iw = je; + wkill = w[je]; + } +/* L90: */ + } + iblock[iw] = 0; +/* L100: */ + } + } + if (idiscu > 0) { + wkill = *wl; + i__1 = idiscu; + for (jdisc = 1; jdisc <= i__1; ++jdisc) { + iw = 0; + i__2 = *m; + for (je = 1; je <= i__2; ++je) { + if (iblock[je] != 0 && (w[je] >= wkill || iw == 0)) { + iw = je; + wkill = w[je]; + } +/* L110: */ + } + iblock[iw] = 0; +/* L120: */ + } + } +/* Now erase all eigenvalues with IBLOCK set to zero */ + im = 0; + i__1 = *m; + for (je = 1; je <= i__1; ++je) { + if (iblock[je] != 0) { + ++im; + w[im] = w[je]; + werr[im] = werr[je]; + indexw[im] = indexw[je]; + iblock[im] = iblock[je]; + } +/* L130: */ + } + *m = im; + } + if (idiscl < 0 || idiscu < 0) { + toofew = TRUE_; + } + } + + if (irange == 1 && *m != *n || irange == 3 && *m != *iu - *il + 1) { + toofew = TRUE_; + } +/* If ORDER='B', do nothing the eigenvalues are already sorted by */ +/* block. */ +/* If ORDER='E', sort the eigenvalues from smallest to largest */ + if (_starpu_lsame_(order, "E") && *nsplit > 1) { + i__1 = *m - 1; + for (je = 1; je <= i__1; ++je) { + ie = 0; + tmp1 = w[je]; + i__2 = *m; + for (j = je + 1; j <= i__2; ++j) { + if (w[j] < tmp1) { + ie = j; + tmp1 = w[j]; + } +/* L140: */ + } + if (ie != 0) { + tmp2 = werr[ie]; + itmp1 = iblock[ie]; + itmp2 = indexw[ie]; + w[ie] = w[je]; + werr[ie] = werr[je]; + iblock[ie] = iblock[je]; + indexw[ie] = indexw[je]; + w[je] = tmp1; + werr[je] = tmp2; + iblock[je] = itmp1; + indexw[je] = itmp2; + } +/* L150: */ + } + } + + *info = 0; + if (ncnvrg) { + ++(*info); + } + if (toofew) { + *info += 2; + } + return 0; + +/* End of DLARRD */ + +} /* _starpu_dlarrd_ */ diff --git a/min-dgels/base/SRC/dlarre.c b/min-dgels/base/SRC/dlarre.c new file mode 100644 index 0000000..3968543 --- /dev/null +++ b/min-dgels/base/SRC/dlarre.c @@ -0,0 +1,861 @@ +/* dlarre.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c__2 = 2; + +/* Subroutine */ int _starpu_dlarre_(char *range, integer *n, doublereal *vl, + doublereal *vu, integer *il, integer *iu, doublereal *d__, doublereal + *e, doublereal *e2, doublereal *rtol1, doublereal *rtol2, doublereal * + spltol, integer *nsplit, integer *isplit, integer *m, doublereal *w, + doublereal *werr, doublereal *wgap, integer *iblock, integer *indexw, + doublereal *gers, doublereal *pivmin, doublereal *work, integer * + iwork, integer *info) +{ + /* System generated locals */ + integer i__1, i__2; + doublereal d__1, d__2, d__3; + + /* Builtin functions */ + double sqrt(doublereal), log(doublereal); + + /* Local variables */ + integer i__, j; + doublereal s1, s2; + integer mb; + doublereal gl; + integer in, mm; + doublereal gu; + integer cnt; + doublereal eps, tau, tmp, rtl; + integer cnt1, cnt2; + doublereal tmp1, eabs; + integer iend, jblk; + doublereal eold; + integer indl; + doublereal dmax__, emax; + integer wend, idum, indu; + doublereal rtol; + integer iseed[4]; + doublereal avgap, sigma; + extern logical _starpu_lsame_(char *, char *); + integer iinfo; + extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *); + logical norep; + extern /* Subroutine */ int _starpu_dlasq2_(integer *, doublereal *, integer *); + extern doublereal _starpu_dlamch_(char *); + integer ibegin; + logical forceb; + integer irange; + doublereal sgndef; + extern /* Subroutine */ int _starpu_dlarra_(integer *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, integer *, integer *, + integer *), _starpu_dlarrb_(integer *, doublereal *, doublereal *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, doublereal *, doublereal *, doublereal *, integer *, + doublereal *, doublereal *, integer *, integer *), _starpu_dlarrc_(char * +, integer *, doublereal *, doublereal *, doublereal *, doublereal + *, doublereal *, integer *, integer *, integer *, integer *); + integer wbegin; + extern /* Subroutine */ int _starpu_dlarrd_(char *, char *, integer *, doublereal + *, doublereal *, integer *, integer *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, integer * +, integer *, integer *, doublereal *, doublereal *, doublereal *, + doublereal *, integer *, integer *, doublereal *, integer *, + integer *); + doublereal safmin, spdiam; + extern /* Subroutine */ int _starpu_dlarrk_(integer *, integer *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, integer *); + logical usedqd; + doublereal clwdth, isleft; + extern /* Subroutine */ int _starpu_dlarnv_(integer *, integer *, integer *, + doublereal *); + doublereal isrght, bsrtol, dpivot; + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* To find the desired eigenvalues of a given real symmetric */ +/* tridiagonal matrix T, DLARRE sets any "small" off-diagonal */ +/* elements to zero, and for each unreduced block T_i, it finds */ +/* (a) a suitable shift at one end of the block's spectrum, */ +/* (b) the base representation, T_i - sigma_i I = L_i D_i L_i^T, and */ +/* (c) eigenvalues of each L_i D_i L_i^T. */ +/* The representations and eigenvalues found are then used by */ +/* DSTEMR to compute the eigenvectors of T. */ +/* The accuracy varies depending on whether bisection is used to */ +/* find a few eigenvalues or the dqds algorithm (subroutine DLASQ2) to */ +/* conpute all and then discard any unwanted one. */ +/* As an added benefit, DLARRE also outputs the n */ +/* Gerschgorin intervals for the matrices L_i D_i L_i^T. */ + +/* Arguments */ +/* ========= */ + +/* RANGE (input) CHARACTER */ +/* = 'A': ("All") all eigenvalues will be found. */ +/* = 'V': ("Value") all eigenvalues in the half-open interval */ +/* (VL, VU] will be found. */ +/* = 'I': ("Index") the IL-th through IU-th eigenvalues (of the */ +/* entire matrix) will be found. */ + +/* N (input) INTEGER */ +/* The order of the matrix. N > 0. */ + +/* VL (input/output) DOUBLE PRECISION */ +/* VU (input/output) DOUBLE PRECISION */ +/* If RANGE='V', the lower and upper bounds for the eigenvalues. */ +/* Eigenvalues less than or equal to VL, or greater than VU, */ +/* will not be returned. VL < VU. */ +/* If RANGE='I' or ='A', DLARRE computes bounds on the desired */ +/* part of the spectrum. */ + +/* IL (input) INTEGER */ +/* IU (input) INTEGER */ +/* If RANGE='I', the indices (in ascending order) of the */ +/* smallest and largest eigenvalues to be returned. */ +/* 1 <= IL <= IU <= N. */ + +/* D (input/output) DOUBLE PRECISION array, dimension (N) */ +/* On entry, the N diagonal elements of the tridiagonal */ +/* matrix T. */ +/* On exit, the N diagonal elements of the diagonal */ +/* matrices D_i. */ + +/* E (input/output) DOUBLE PRECISION array, dimension (N) */ +/* On entry, the first (N-1) entries contain the subdiagonal */ +/* elements of the tridiagonal matrix T; E(N) need not be set. */ +/* On exit, E contains the subdiagonal elements of the unit */ +/* bidiagonal matrices L_i. The entries E( ISPLIT( I ) ), */ +/* 1 <= I <= NSPLIT, contain the base points sigma_i on output. */ + +/* E2 (input/output) DOUBLE PRECISION array, dimension (N) */ +/* On entry, the first (N-1) entries contain the SQUARES of the */ +/* subdiagonal elements of the tridiagonal matrix T; */ +/* E2(N) need not be set. */ +/* On exit, the entries E2( ISPLIT( I ) ), */ +/* 1 <= I <= NSPLIT, have been set to zero */ + +/* RTOL1 (input) DOUBLE PRECISION */ +/* RTOL2 (input) DOUBLE PRECISION */ +/* Parameters for bisection. */ +/* An interval [LEFT,RIGHT] has converged if */ +/* RIGHT-LEFT.LT.MAX( RTOL1*GAP, RTOL2*MAX(|LEFT|,|RIGHT|) ) */ + +/* SPLTOL (input) DOUBLE PRECISION */ +/* The threshold for splitting. */ + +/* NSPLIT (output) INTEGER */ +/* The number of blocks T splits into. 1 <= NSPLIT <= N. */ + +/* ISPLIT (output) INTEGER array, dimension (N) */ +/* The splitting points, at which T breaks up into blocks. */ +/* The first block consists of rows/columns 1 to ISPLIT(1), */ +/* the second of rows/columns ISPLIT(1)+1 through ISPLIT(2), */ +/* etc., and the NSPLIT-th consists of rows/columns */ +/* ISPLIT(NSPLIT-1)+1 through ISPLIT(NSPLIT)=N. */ + +/* M (output) INTEGER */ +/* The total number of eigenvalues (of all L_i D_i L_i^T) */ +/* found. */ + +/* W (output) DOUBLE PRECISION array, dimension (N) */ +/* The first M elements contain the eigenvalues. The */ +/* eigenvalues of each of the blocks, L_i D_i L_i^T, are */ +/* sorted in ascending order ( DLARRE may use the */ +/* remaining N-M elements as workspace). */ + +/* WERR (output) DOUBLE PRECISION array, dimension (N) */ +/* The error bound on the corresponding eigenvalue in W. */ + +/* WGAP (output) DOUBLE PRECISION array, dimension (N) */ +/* The separation from the right neighbor eigenvalue in W. */ +/* The gap is only with respect to the eigenvalues of the same block */ +/* as each block has its own representation tree. */ +/* Exception: at the right end of a block we store the left gap */ + +/* IBLOCK (output) INTEGER array, dimension (N) */ +/* The indices of the blocks (submatrices) associated with the */ +/* corresponding eigenvalues in W; IBLOCK(i)=1 if eigenvalue */ +/* W(i) belongs to the first block from the top, =2 if W(i) */ +/* belongs to the second block, etc. */ + +/* INDEXW (output) INTEGER array, dimension (N) */ +/* The indices of the eigenvalues within each block (submatrix); */ +/* for example, INDEXW(i)= 10 and IBLOCK(i)=2 imply that the */ +/* i-th eigenvalue W(i) is the 10-th eigenvalue in block 2 */ + +/* GERS (output) DOUBLE PRECISION array, dimension (2*N) */ +/* The N Gerschgorin intervals (the i-th Gerschgorin interval */ +/* is (GERS(2*i-1), GERS(2*i)). */ + +/* PIVMIN (output) DOUBLE PRECISION */ +/* The minimum pivot in the Sturm sequence for T. */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (6*N) */ +/* Workspace. */ + +/* IWORK (workspace) INTEGER array, dimension (5*N) */ +/* Workspace. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* > 0: A problem occured in DLARRE. */ +/* < 0: One of the called subroutines signaled an internal problem. */ +/* Needs inspection of the corresponding parameter IINFO */ +/* for further information. */ + +/* =-1: Problem in DLARRD. */ +/* = 2: No base representation could be found in MAXTRY iterations. */ +/* Increasing MAXTRY and recompilation might be a remedy. */ +/* =-3: Problem in DLARRB when computing the refined root */ +/* representation for DLASQ2. */ +/* =-4: Problem in DLARRB when preforming bisection on the */ +/* desired part of the spectrum. */ +/* =-5: Problem in DLASQ2. */ +/* =-6: Problem in DLASQ2. */ + +/* Further Details */ +/* The base representations are required to suffer very little */ +/* element growth and consequently define all their eigenvalues to */ +/* high relative accuracy. */ +/* =============== */ + +/* Based on contributions by */ +/* Beresford Parlett, University of California, Berkeley, USA */ +/* Jim Demmel, University of California, Berkeley, USA */ +/* Inderjit Dhillon, University of Texas, Austin, USA */ +/* Osni Marques, LBNL/NERSC, USA */ +/* Christof Voemel, University of California, Berkeley, USA */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + --iwork; + --work; + --gers; + --indexw; + --iblock; + --wgap; + --werr; + --w; + --isplit; + --e2; + --e; + --d__; + + /* Function Body */ + *info = 0; + +/* Decode RANGE */ + + if (_starpu_lsame_(range, "A")) { + irange = 1; + } else if (_starpu_lsame_(range, "V")) { + irange = 3; + } else if (_starpu_lsame_(range, "I")) { + irange = 2; + } + *m = 0; +/* Get machine constants */ + safmin = _starpu_dlamch_("S"); + eps = _starpu_dlamch_("P"); +/* Set parameters */ + rtl = sqrt(eps); + bsrtol = sqrt(eps); +/* Treat case of 1x1 matrix for quick return */ + if (*n == 1) { + if (irange == 1 || irange == 3 && d__[1] > *vl && d__[1] <= *vu || + irange == 2 && *il == 1 && *iu == 1) { + *m = 1; + w[1] = d__[1]; +/* The computation error of the eigenvalue is zero */ + werr[1] = 0.; + wgap[1] = 0.; + iblock[1] = 1; + indexw[1] = 1; + gers[1] = d__[1]; + gers[2] = d__[1]; + } +/* store the shift for the initial RRR, which is zero in this case */ + e[1] = 0.; + return 0; + } +/* General case: tridiagonal matrix of order > 1 */ + +/* Init WERR, WGAP. Compute Gerschgorin intervals and spectral diameter. */ +/* Compute maximum off-diagonal entry and pivmin. */ + gl = d__[1]; + gu = d__[1]; + eold = 0.; + emax = 0.; + e[*n] = 0.; + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + werr[i__] = 0.; + wgap[i__] = 0.; + eabs = (d__1 = e[i__], abs(d__1)); + if (eabs >= emax) { + emax = eabs; + } + tmp1 = eabs + eold; + gers[(i__ << 1) - 1] = d__[i__] - tmp1; +/* Computing MIN */ + d__1 = gl, d__2 = gers[(i__ << 1) - 1]; + gl = min(d__1,d__2); + gers[i__ * 2] = d__[i__] + tmp1; +/* Computing MAX */ + d__1 = gu, d__2 = gers[i__ * 2]; + gu = max(d__1,d__2); + eold = eabs; +/* L5: */ + } +/* The minimum pivot allowed in the Sturm sequence for T */ +/* Computing MAX */ +/* Computing 2nd power */ + d__3 = emax; + d__1 = 1., d__2 = d__3 * d__3; + *pivmin = safmin * max(d__1,d__2); +/* Compute spectral diameter. The Gerschgorin bounds give an */ +/* estimate that is wrong by at most a factor of SQRT(2) */ + spdiam = gu - gl; +/* Compute splitting points */ + _starpu_dlarra_(n, &d__[1], &e[1], &e2[1], spltol, &spdiam, nsplit, &isplit[1], & + iinfo); +/* Can force use of bisection instead of faster DQDS. */ +/* Option left in the code for future multisection work. */ + forceb = FALSE_; +/* Initialize USEDQD, DQDS should be used for ALLRNG unless someone */ +/* explicitly wants bisection. */ + usedqd = irange == 1 && ! forceb; + if (irange == 1 && ! forceb) { +/* Set interval [VL,VU] that contains all eigenvalues */ + *vl = gl; + *vu = gu; + } else { +/* We call DLARRD to find crude approximations to the eigenvalues */ +/* in the desired range. In case IRANGE = INDRNG, we also obtain the */ +/* interval (VL,VU] that contains all the wanted eigenvalues. */ +/* An interval [LEFT,RIGHT] has converged if */ +/* RIGHT-LEFT.LT.RTOL*MAX(ABS(LEFT),ABS(RIGHT)) */ +/* DLARRD needs a WORK of size 4*N, IWORK of size 3*N */ + _starpu_dlarrd_(range, "B", n, vl, vu, il, iu, &gers[1], &bsrtol, &d__[1], &e[ + 1], &e2[1], pivmin, nsplit, &isplit[1], &mm, &w[1], &werr[1], + vl, vu, &iblock[1], &indexw[1], &work[1], &iwork[1], &iinfo); + if (iinfo != 0) { + *info = -1; + return 0; + } +/* Make sure that the entries M+1 to N in W, WERR, IBLOCK, INDEXW are 0 */ + i__1 = *n; + for (i__ = mm + 1; i__ <= i__1; ++i__) { + w[i__] = 0.; + werr[i__] = 0.; + iblock[i__] = 0; + indexw[i__] = 0; +/* L14: */ + } + } +/* ** */ +/* Loop over unreduced blocks */ + ibegin = 1; + wbegin = 1; + i__1 = *nsplit; + for (jblk = 1; jblk <= i__1; ++jblk) { + iend = isplit[jblk]; + in = iend - ibegin + 1; +/* 1 X 1 block */ + if (in == 1) { + if (irange == 1 || irange == 3 && d__[ibegin] > *vl && d__[ibegin] + <= *vu || irange == 2 && iblock[wbegin] == jblk) { + ++(*m); + w[*m] = d__[ibegin]; + werr[*m] = 0.; +/* The gap for a single block doesn't matter for the later */ +/* algorithm and is assigned an arbitrary large value */ + wgap[*m] = 0.; + iblock[*m] = jblk; + indexw[*m] = 1; + ++wbegin; + } +/* E( IEND ) holds the shift for the initial RRR */ + e[iend] = 0.; + ibegin = iend + 1; + goto L170; + } + +/* Blocks of size larger than 1x1 */ + +/* E( IEND ) will hold the shift for the initial RRR, for now set it =0 */ + e[iend] = 0.; + +/* Find local outer bounds GL,GU for the block */ + gl = d__[ibegin]; + gu = d__[ibegin]; + i__2 = iend; + for (i__ = ibegin; i__ <= i__2; ++i__) { +/* Computing MIN */ + d__1 = gers[(i__ << 1) - 1]; + gl = min(d__1,gl); +/* Computing MAX */ + d__1 = gers[i__ * 2]; + gu = max(d__1,gu); +/* L15: */ + } + spdiam = gu - gl; + if (! (irange == 1 && ! forceb)) { +/* Count the number of eigenvalues in the current block. */ + mb = 0; + i__2 = mm; + for (i__ = wbegin; i__ <= i__2; ++i__) { + if (iblock[i__] == jblk) { + ++mb; + } else { + goto L21; + } +/* L20: */ + } +L21: + if (mb == 0) { +/* No eigenvalue in the current block lies in the desired range */ +/* E( IEND ) holds the shift for the initial RRR */ + e[iend] = 0.; + ibegin = iend + 1; + goto L170; + } else { +/* Decide whether dqds or bisection is more efficient */ + usedqd = (doublereal) mb > in * .5 && ! forceb; + wend = wbegin + mb - 1; +/* Calculate gaps for the current block */ +/* In later stages, when representations for individual */ +/* eigenvalues are different, we use SIGMA = E( IEND ). */ + sigma = 0.; + i__2 = wend - 1; + for (i__ = wbegin; i__ <= i__2; ++i__) { +/* Computing MAX */ + d__1 = 0., d__2 = w[i__ + 1] - werr[i__ + 1] - (w[i__] + + werr[i__]); + wgap[i__] = max(d__1,d__2); +/* L30: */ + } +/* Computing MAX */ + d__1 = 0., d__2 = *vu - sigma - (w[wend] + werr[wend]); + wgap[wend] = max(d__1,d__2); +/* Find local index of the first and last desired evalue. */ + indl = indexw[wbegin]; + indu = indexw[wend]; + } + } + if (irange == 1 && ! forceb || usedqd) { +/* Case of DQDS */ +/* Find approximations to the extremal eigenvalues of the block */ + _starpu_dlarrk_(&in, &c__1, &gl, &gu, &d__[ibegin], &e2[ibegin], pivmin, & + rtl, &tmp, &tmp1, &iinfo); + if (iinfo != 0) { + *info = -1; + return 0; + } +/* Computing MAX */ + d__2 = gl, d__3 = tmp - tmp1 - eps * 100. * (d__1 = tmp - tmp1, + abs(d__1)); + isleft = max(d__2,d__3); + _starpu_dlarrk_(&in, &in, &gl, &gu, &d__[ibegin], &e2[ibegin], pivmin, & + rtl, &tmp, &tmp1, &iinfo); + if (iinfo != 0) { + *info = -1; + return 0; + } +/* Computing MIN */ + d__2 = gu, d__3 = tmp + tmp1 + eps * 100. * (d__1 = tmp + tmp1, + abs(d__1)); + isrght = min(d__2,d__3); +/* Improve the estimate of the spectral diameter */ + spdiam = isrght - isleft; + } else { +/* Case of bisection */ +/* Find approximations to the wanted extremal eigenvalues */ +/* Computing MAX */ + d__2 = gl, d__3 = w[wbegin] - werr[wbegin] - eps * 100. * (d__1 = + w[wbegin] - werr[wbegin], abs(d__1)); + isleft = max(d__2,d__3); +/* Computing MIN */ + d__2 = gu, d__3 = w[wend] + werr[wend] + eps * 100. * (d__1 = w[ + wend] + werr[wend], abs(d__1)); + isrght = min(d__2,d__3); + } +/* Decide whether the base representation for the current block */ +/* L_JBLK D_JBLK L_JBLK^T = T_JBLK - sigma_JBLK I */ +/* should be on the left or the right end of the current block. */ +/* The strategy is to shift to the end which is "more populated" */ +/* Furthermore, decide whether to use DQDS for the computation of */ +/* the eigenvalue approximations at the end of DLARRE or bisection. */ +/* dqds is chosen if all eigenvalues are desired or the number of */ +/* eigenvalues to be computed is large compared to the blocksize. */ + if (irange == 1 && ! forceb) { +/* If all the eigenvalues have to be computed, we use dqd */ + usedqd = TRUE_; +/* INDL is the local index of the first eigenvalue to compute */ + indl = 1; + indu = in; +/* MB = number of eigenvalues to compute */ + mb = in; + wend = wbegin + mb - 1; +/* Define 1/4 and 3/4 points of the spectrum */ + s1 = isleft + spdiam * .25; + s2 = isrght - spdiam * .25; + } else { +/* DLARRD has computed IBLOCK and INDEXW for each eigenvalue */ +/* approximation. */ +/* choose sigma */ + if (usedqd) { + s1 = isleft + spdiam * .25; + s2 = isrght - spdiam * .25; + } else { + tmp = min(isrght,*vu) - max(isleft,*vl); + s1 = max(isleft,*vl) + tmp * .25; + s2 = min(isrght,*vu) - tmp * .25; + } + } +/* Compute the negcount at the 1/4 and 3/4 points */ + if (mb > 1) { + _starpu_dlarrc_("T", &in, &s1, &s2, &d__[ibegin], &e[ibegin], pivmin, & + cnt, &cnt1, &cnt2, &iinfo); + } + if (mb == 1) { + sigma = gl; + sgndef = 1.; + } else if (cnt1 - indl >= indu - cnt2) { + if (irange == 1 && ! forceb) { + sigma = max(isleft,gl); + } else if (usedqd) { +/* use Gerschgorin bound as shift to get pos def matrix */ +/* for dqds */ + sigma = isleft; + } else { +/* use approximation of the first desired eigenvalue of the */ +/* block as shift */ + sigma = max(isleft,*vl); + } + sgndef = 1.; + } else { + if (irange == 1 && ! forceb) { + sigma = min(isrght,gu); + } else if (usedqd) { +/* use Gerschgorin bound as shift to get neg def matrix */ +/* for dqds */ + sigma = isrght; + } else { +/* use approximation of the first desired eigenvalue of the */ +/* block as shift */ + sigma = min(isrght,*vu); + } + sgndef = -1.; + } +/* An initial SIGMA has been chosen that will be used for computing */ +/* T - SIGMA I = L D L^T */ +/* Define the increment TAU of the shift in case the initial shift */ +/* needs to be refined to obtain a factorization with not too much */ +/* element growth. */ + if (usedqd) { +/* The initial SIGMA was to the outer end of the spectrum */ +/* the matrix is definite and we need not retreat. */ + tau = spdiam * eps * *n + *pivmin * 2.; + } else { + if (mb > 1) { + clwdth = w[wend] + werr[wend] - w[wbegin] - werr[wbegin]; + avgap = (d__1 = clwdth / (doublereal) (wend - wbegin), abs( + d__1)); + if (sgndef == 1.) { +/* Computing MAX */ + d__1 = wgap[wbegin]; + tau = max(d__1,avgap) * .5; +/* Computing MAX */ + d__1 = tau, d__2 = werr[wbegin]; + tau = max(d__1,d__2); + } else { +/* Computing MAX */ + d__1 = wgap[wend - 1]; + tau = max(d__1,avgap) * .5; +/* Computing MAX */ + d__1 = tau, d__2 = werr[wend]; + tau = max(d__1,d__2); + } + } else { + tau = werr[wbegin]; + } + } + + for (idum = 1; idum <= 6; ++idum) { +/* Compute L D L^T factorization of tridiagonal matrix T - sigma I. */ +/* Store D in WORK(1:IN), L in WORK(IN+1:2*IN), and reciprocals of */ +/* pivots in WORK(2*IN+1:3*IN) */ + dpivot = d__[ibegin] - sigma; + work[1] = dpivot; + dmax__ = abs(work[1]); + j = ibegin; + i__2 = in - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + work[(in << 1) + i__] = 1. / work[i__]; + tmp = e[j] * work[(in << 1) + i__]; + work[in + i__] = tmp; + dpivot = d__[j + 1] - sigma - tmp * e[j]; + work[i__ + 1] = dpivot; +/* Computing MAX */ + d__1 = dmax__, d__2 = abs(dpivot); + dmax__ = max(d__1,d__2); + ++j; +/* L70: */ + } +/* check for element growth */ + if (dmax__ > spdiam * 64.) { + norep = TRUE_; + } else { + norep = FALSE_; + } + if (usedqd && ! norep) { +/* Ensure the definiteness of the representation */ +/* All entries of D (of L D L^T) must have the same sign */ + i__2 = in; + for (i__ = 1; i__ <= i__2; ++i__) { + tmp = sgndef * work[i__]; + if (tmp < 0.) { + norep = TRUE_; + } +/* L71: */ + } + } + if (norep) { +/* Note that in the case of IRANGE=ALLRNG, we use the Gerschgorin */ +/* shift which makes the matrix definite. So we should end up */ +/* here really only in the case of IRANGE = VALRNG or INDRNG. */ + if (idum == 5) { + if (sgndef == 1.) { +/* The fudged Gerschgorin shift should succeed */ + sigma = gl - spdiam * 2. * eps * *n - *pivmin * 4.; + } else { + sigma = gu + spdiam * 2. * eps * *n + *pivmin * 4.; + } + } else { + sigma -= sgndef * tau; + tau *= 2.; + } + } else { +/* an initial RRR is found */ + goto L83; + } +/* L80: */ + } +/* if the program reaches this point, no base representation could be */ +/* found in MAXTRY iterations. */ + *info = 2; + return 0; +L83: +/* At this point, we have found an initial base representation */ +/* T - SIGMA I = L D L^T with not too much element growth. */ +/* Store the shift. */ + e[iend] = sigma; +/* Store D and L. */ + _starpu_dcopy_(&in, &work[1], &c__1, &d__[ibegin], &c__1); + i__2 = in - 1; + _starpu_dcopy_(&i__2, &work[in + 1], &c__1, &e[ibegin], &c__1); + if (mb > 1) { + +/* Perturb each entry of the base representation by a small */ +/* (but random) relative amount to overcome difficulties with */ +/* glued matrices. */ + + for (i__ = 1; i__ <= 4; ++i__) { + iseed[i__ - 1] = 1; +/* L122: */ + } + i__2 = (in << 1) - 1; + _starpu_dlarnv_(&c__2, iseed, &i__2, &work[1]); + i__2 = in - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + d__[ibegin + i__ - 1] *= eps * 8. * work[i__] + 1.; + e[ibegin + i__ - 1] *= eps * 8. * work[in + i__] + 1.; +/* L125: */ + } + d__[iend] *= eps * 4. * work[in] + 1.; + + } + +/* Don't update the Gerschgorin intervals because keeping track */ +/* of the updates would be too much work in DLARRV. */ +/* We update W instead and use it to locate the proper Gerschgorin */ +/* intervals. */ +/* Compute the required eigenvalues of L D L' by bisection or dqds */ + if (! usedqd) { +/* If DLARRD has been used, shift the eigenvalue approximations */ +/* according to their representation. This is necessary for */ +/* a uniform DLARRV since dqds computes eigenvalues of the */ +/* shifted representation. In DLARRV, W will always hold the */ +/* UNshifted eigenvalue approximation. */ + i__2 = wend; + for (j = wbegin; j <= i__2; ++j) { + w[j] -= sigma; + werr[j] += (d__1 = w[j], abs(d__1)) * eps; +/* L134: */ + } +/* call DLARRB to reduce eigenvalue error of the approximations */ +/* from DLARRD */ + i__2 = iend - 1; + for (i__ = ibegin; i__ <= i__2; ++i__) { +/* Computing 2nd power */ + d__1 = e[i__]; + work[i__] = d__[i__] * (d__1 * d__1); +/* L135: */ + } +/* use bisection to find EV from INDL to INDU */ + i__2 = indl - 1; + _starpu_dlarrb_(&in, &d__[ibegin], &work[ibegin], &indl, &indu, rtol1, + rtol2, &i__2, &w[wbegin], &wgap[wbegin], &werr[wbegin], & + work[(*n << 1) + 1], &iwork[1], pivmin, &spdiam, &in, & + iinfo); + if (iinfo != 0) { + *info = -4; + return 0; + } +/* DLARRB computes all gaps correctly except for the last one */ +/* Record distance to VU/GU */ +/* Computing MAX */ + d__1 = 0., d__2 = *vu - sigma - (w[wend] + werr[wend]); + wgap[wend] = max(d__1,d__2); + i__2 = indu; + for (i__ = indl; i__ <= i__2; ++i__) { + ++(*m); + iblock[*m] = jblk; + indexw[*m] = i__; +/* L138: */ + } + } else { +/* Call dqds to get all eigs (and then possibly delete unwanted */ +/* eigenvalues). */ +/* Note that dqds finds the eigenvalues of the L D L^T representation */ +/* of T to high relative accuracy. High relative accuracy */ +/* might be lost when the shift of the RRR is subtracted to obtain */ +/* the eigenvalues of T. However, T is not guaranteed to define its */ +/* eigenvalues to high relative accuracy anyway. */ +/* Set RTOL to the order of the tolerance used in DLASQ2 */ +/* This is an ESTIMATED error, the worst case bound is 4*N*EPS */ +/* which is usually too large and requires unnecessary work to be */ +/* done by bisection when computing the eigenvectors */ + rtol = log((doublereal) in) * 4. * eps; + j = ibegin; + i__2 = in - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + work[(i__ << 1) - 1] = (d__1 = d__[j], abs(d__1)); + work[i__ * 2] = e[j] * e[j] * work[(i__ << 1) - 1]; + ++j; +/* L140: */ + } + work[(in << 1) - 1] = (d__1 = d__[iend], abs(d__1)); + work[in * 2] = 0.; + _starpu_dlasq2_(&in, &work[1], &iinfo); + if (iinfo != 0) { +/* If IINFO = -5 then an index is part of a tight cluster */ +/* and should be changed. The index is in IWORK(1) and the */ +/* gap is in WORK(N+1) */ + *info = -5; + return 0; + } else { +/* Test that all eigenvalues are positive as expected */ + i__2 = in; + for (i__ = 1; i__ <= i__2; ++i__) { + if (work[i__] < 0.) { + *info = -6; + return 0; + } +/* L149: */ + } + } + if (sgndef > 0.) { + i__2 = indu; + for (i__ = indl; i__ <= i__2; ++i__) { + ++(*m); + w[*m] = work[in - i__ + 1]; + iblock[*m] = jblk; + indexw[*m] = i__; +/* L150: */ + } + } else { + i__2 = indu; + for (i__ = indl; i__ <= i__2; ++i__) { + ++(*m); + w[*m] = -work[i__]; + iblock[*m] = jblk; + indexw[*m] = i__; +/* L160: */ + } + } + i__2 = *m; + for (i__ = *m - mb + 1; i__ <= i__2; ++i__) { +/* the value of RTOL below should be the tolerance in DLASQ2 */ + werr[i__] = rtol * (d__1 = w[i__], abs(d__1)); +/* L165: */ + } + i__2 = *m - 1; + for (i__ = *m - mb + 1; i__ <= i__2; ++i__) { +/* compute the right gap between the intervals */ +/* Computing MAX */ + d__1 = 0., d__2 = w[i__ + 1] - werr[i__ + 1] - (w[i__] + werr[ + i__]); + wgap[i__] = max(d__1,d__2); +/* L166: */ + } +/* Computing MAX */ + d__1 = 0., d__2 = *vu - sigma - (w[*m] + werr[*m]); + wgap[*m] = max(d__1,d__2); + } +/* proceed with next block */ + ibegin = iend + 1; + wbegin = wend + 1; +L170: + ; + } + + return 0; + +/* end of DLARRE */ + +} /* _starpu_dlarre_ */ diff --git a/min-dgels/base/SRC/dlarrf.c b/min-dgels/base/SRC/dlarrf.c new file mode 100644 index 0000000..b597a33 --- /dev/null +++ b/min-dgels/base/SRC/dlarrf.c @@ -0,0 +1,423 @@ +/* dlarrf.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +/* Subroutine */ int _starpu_dlarrf_(integer *n, doublereal *d__, doublereal *l, + doublereal *ld, integer *clstrt, integer *clend, doublereal *w, + doublereal *wgap, doublereal *werr, doublereal *spdiam, doublereal * + clgapl, doublereal *clgapr, doublereal *pivmin, doublereal *sigma, + doublereal *dplus, doublereal *lplus, doublereal *work, integer *info) +{ + /* System generated locals */ + integer i__1; + doublereal d__1, d__2, d__3; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + integer i__; + doublereal s, bestshift, smlgrowth, eps, tmp, max1, max2, rrr1, rrr2, + znm2, growthbound, fail, fact, oldp; + integer indx; + doublereal prod; + integer ktry; + doublereal fail2, avgap, ldmax, rdmax; + integer shift; + extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *); + logical dorrr1; + extern doublereal _starpu_dlamch_(char *); + doublereal ldelta; + logical nofail; + doublereal mingap, lsigma, rdelta; + extern logical _starpu_disnan_(doublereal *); + logical forcer; + doublereal rsigma, clwdth; + logical sawnan1, sawnan2, tryrrr1; + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ +/* * */ +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* Given the initial representation L D L^T and its cluster of close */ +/* eigenvalues (in a relative measure), W( CLSTRT ), W( CLSTRT+1 ), ... */ +/* W( CLEND ), DLARRF finds a new relatively robust representation */ +/* L D L^T - SIGMA I = L(+) D(+) L(+)^T such that at least one of the */ +/* eigenvalues of L(+) D(+) L(+)^T is relatively isolated. */ + +/* Arguments */ +/* ========= */ + +/* N (input) INTEGER */ +/* The order of the matrix (subblock, if the matrix splitted). */ + +/* D (input) DOUBLE PRECISION array, dimension (N) */ +/* The N diagonal elements of the diagonal matrix D. */ + +/* L (input) DOUBLE PRECISION array, dimension (N-1) */ +/* The (N-1) subdiagonal elements of the unit bidiagonal */ +/* matrix L. */ + +/* LD (input) DOUBLE PRECISION array, dimension (N-1) */ +/* The (N-1) elements L(i)*D(i). */ + +/* CLSTRT (input) INTEGER */ +/* The index of the first eigenvalue in the cluster. */ + +/* CLEND (input) INTEGER */ +/* The index of the last eigenvalue in the cluster. */ + +/* W (input) DOUBLE PRECISION array, dimension >= (CLEND-CLSTRT+1) */ +/* The eigenvalue APPROXIMATIONS of L D L^T in ascending order. */ +/* W( CLSTRT ) through W( CLEND ) form the cluster of relatively */ +/* close eigenalues. */ + +/* WGAP (input/output) DOUBLE PRECISION array, dimension >= (CLEND-CLSTRT+1) */ +/* The separation from the right neighbor eigenvalue in W. */ + +/* WERR (input) DOUBLE PRECISION array, dimension >= (CLEND-CLSTRT+1) */ +/* WERR contain the semiwidth of the uncertainty */ +/* interval of the corresponding eigenvalue APPROXIMATION in W */ + +/* SPDIAM (input) estimate of the spectral diameter obtained from the */ +/* Gerschgorin intervals */ + +/* CLGAPL, CLGAPR (input) absolute gap on each end of the cluster. */ +/* Set by the calling routine to protect against shifts too close */ +/* to eigenvalues outside the cluster. */ + +/* PIVMIN (input) DOUBLE PRECISION */ +/* The minimum pivot allowed in the Sturm sequence. */ + +/* SIGMA (output) DOUBLE PRECISION */ +/* The shift used to form L(+) D(+) L(+)^T. */ + +/* DPLUS (output) DOUBLE PRECISION array, dimension (N) */ +/* The N diagonal elements of the diagonal matrix D(+). */ + +/* LPLUS (output) DOUBLE PRECISION array, dimension (N-1) */ +/* The first (N-1) elements of LPLUS contain the subdiagonal */ +/* elements of the unit bidiagonal matrix L(+). */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (2*N) */ +/* Workspace. */ + +/* Further Details */ +/* =============== */ + +/* Based on contributions by */ +/* Beresford Parlett, University of California, Berkeley, USA */ +/* Jim Demmel, University of California, Berkeley, USA */ +/* Inderjit Dhillon, University of Texas, Austin, USA */ +/* Osni Marques, LBNL/NERSC, USA */ +/* Christof Voemel, University of California, Berkeley, USA */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + --work; + --lplus; + --dplus; + --werr; + --wgap; + --w; + --ld; + --l; + --d__; + + /* Function Body */ + *info = 0; + fact = 2.; + eps = _starpu_dlamch_("Precision"); + shift = 0; + forcer = FALSE_; +/* Note that we cannot guarantee that for any of the shifts tried, */ +/* the factorization has a small or even moderate element growth. */ +/* There could be Ritz values at both ends of the cluster and despite */ +/* backing off, there are examples where all factorizations tried */ +/* (in IEEE mode, allowing zero pivots & infinities) have INFINITE */ +/* element growth. */ +/* For this reason, we should use PIVMIN in this subroutine so that at */ +/* least the L D L^T factorization exists. It can be checked afterwards */ +/* whether the element growth caused bad residuals/orthogonality. */ +/* Decide whether the code should accept the best among all */ +/* representations despite large element growth or signal INFO=1 */ + nofail = TRUE_; + +/* Compute the average gap length of the cluster */ + clwdth = (d__1 = w[*clend] - w[*clstrt], abs(d__1)) + werr[*clend] + werr[ + *clstrt]; + avgap = clwdth / (doublereal) (*clend - *clstrt); + mingap = min(*clgapl,*clgapr); +/* Initial values for shifts to both ends of cluster */ +/* Computing MIN */ + d__1 = w[*clstrt], d__2 = w[*clend]; + lsigma = min(d__1,d__2) - werr[*clstrt]; +/* Computing MAX */ + d__1 = w[*clstrt], d__2 = w[*clend]; + rsigma = max(d__1,d__2) + werr[*clend]; +/* Use a small fudge to make sure that we really shift to the outside */ + lsigma -= abs(lsigma) * 4. * eps; + rsigma += abs(rsigma) * 4. * eps; +/* Compute upper bounds for how much to back off the initial shifts */ + ldmax = mingap * .25 + *pivmin * 2.; + rdmax = mingap * .25 + *pivmin * 2.; +/* Computing MAX */ + d__1 = avgap, d__2 = wgap[*clstrt]; + ldelta = max(d__1,d__2) / fact; +/* Computing MAX */ + d__1 = avgap, d__2 = wgap[*clend - 1]; + rdelta = max(d__1,d__2) / fact; + +/* Initialize the record of the best representation found */ + + s = _starpu_dlamch_("S"); + smlgrowth = 1. / s; + fail = (doublereal) (*n - 1) * mingap / (*spdiam * eps); + fail2 = (doublereal) (*n - 1) * mingap / (*spdiam * sqrt(eps)); + bestshift = lsigma; + +/* while (KTRY <= KTRYMAX) */ + ktry = 0; + growthbound = *spdiam * 8.; +L5: + sawnan1 = FALSE_; + sawnan2 = FALSE_; +/* Ensure that we do not back off too much of the initial shifts */ + ldelta = min(ldmax,ldelta); + rdelta = min(rdmax,rdelta); +/* Compute the element growth when shifting to both ends of the cluster */ +/* accept the shift if there is no element growth at one of the two ends */ +/* Left end */ + s = -lsigma; + dplus[1] = d__[1] + s; + if (abs(dplus[1]) < *pivmin) { + dplus[1] = -(*pivmin); +/* Need to set SAWNAN1 because refined RRR test should not be used */ +/* in this case */ + sawnan1 = TRUE_; + } + max1 = abs(dplus[1]); + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + lplus[i__] = ld[i__] / dplus[i__]; + s = s * lplus[i__] * l[i__] - lsigma; + dplus[i__ + 1] = d__[i__ + 1] + s; + if ((d__1 = dplus[i__ + 1], abs(d__1)) < *pivmin) { + dplus[i__ + 1] = -(*pivmin); +/* Need to set SAWNAN1 because refined RRR test should not be used */ +/* in this case */ + sawnan1 = TRUE_; + } +/* Computing MAX */ + d__2 = max1, d__3 = (d__1 = dplus[i__ + 1], abs(d__1)); + max1 = max(d__2,d__3); +/* L6: */ + } + sawnan1 = sawnan1 || _starpu_disnan_(&max1); + if (forcer || max1 <= growthbound && ! sawnan1) { + *sigma = lsigma; + shift = 1; + goto L100; + } +/* Right end */ + s = -rsigma; + work[1] = d__[1] + s; + if (abs(work[1]) < *pivmin) { + work[1] = -(*pivmin); +/* Need to set SAWNAN2 because refined RRR test should not be used */ +/* in this case */ + sawnan2 = TRUE_; + } + max2 = abs(work[1]); + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + work[*n + i__] = ld[i__] / work[i__]; + s = s * work[*n + i__] * l[i__] - rsigma; + work[i__ + 1] = d__[i__ + 1] + s; + if ((d__1 = work[i__ + 1], abs(d__1)) < *pivmin) { + work[i__ + 1] = -(*pivmin); +/* Need to set SAWNAN2 because refined RRR test should not be used */ +/* in this case */ + sawnan2 = TRUE_; + } +/* Computing MAX */ + d__2 = max2, d__3 = (d__1 = work[i__ + 1], abs(d__1)); + max2 = max(d__2,d__3); +/* L7: */ + } + sawnan2 = sawnan2 || _starpu_disnan_(&max2); + if (forcer || max2 <= growthbound && ! sawnan2) { + *sigma = rsigma; + shift = 2; + goto L100; + } +/* If we are at this point, both shifts led to too much element growth */ +/* Record the better of the two shifts (provided it didn't lead to NaN) */ + if (sawnan1 && sawnan2) { +/* both MAX1 and MAX2 are NaN */ + goto L50; + } else { + if (! sawnan1) { + indx = 1; + if (max1 <= smlgrowth) { + smlgrowth = max1; + bestshift = lsigma; + } + } + if (! sawnan2) { + if (sawnan1 || max2 <= max1) { + indx = 2; + } + if (max2 <= smlgrowth) { + smlgrowth = max2; + bestshift = rsigma; + } + } + } +/* If we are here, both the left and the right shift led to */ +/* element growth. If the element growth is moderate, then */ +/* we may still accept the representation, if it passes a */ +/* refined test for RRR. This test supposes that no NaN occurred. */ +/* Moreover, we use the refined RRR test only for isolated clusters. */ + if (clwdth < mingap / 128. && min(max1,max2) < fail2 && ! sawnan1 && ! + sawnan2) { + dorrr1 = TRUE_; + } else { + dorrr1 = FALSE_; + } + tryrrr1 = TRUE_; + if (tryrrr1 && dorrr1) { + if (indx == 1) { + tmp = (d__1 = dplus[*n], abs(d__1)); + znm2 = 1.; + prod = 1.; + oldp = 1.; + for (i__ = *n - 1; i__ >= 1; --i__) { + if (prod <= eps) { + prod = dplus[i__ + 1] * work[*n + i__ + 1] / (dplus[i__] * + work[*n + i__]) * oldp; + } else { + prod *= (d__1 = work[*n + i__], abs(d__1)); + } + oldp = prod; +/* Computing 2nd power */ + d__1 = prod; + znm2 += d__1 * d__1; +/* Computing MAX */ + d__2 = tmp, d__3 = (d__1 = dplus[i__] * prod, abs(d__1)); + tmp = max(d__2,d__3); +/* L15: */ + } + rrr1 = tmp / (*spdiam * sqrt(znm2)); + if (rrr1 <= 8.) { + *sigma = lsigma; + shift = 1; + goto L100; + } + } else if (indx == 2) { + tmp = (d__1 = work[*n], abs(d__1)); + znm2 = 1.; + prod = 1.; + oldp = 1.; + for (i__ = *n - 1; i__ >= 1; --i__) { + if (prod <= eps) { + prod = work[i__ + 1] * lplus[i__ + 1] / (work[i__] * + lplus[i__]) * oldp; + } else { + prod *= (d__1 = lplus[i__], abs(d__1)); + } + oldp = prod; +/* Computing 2nd power */ + d__1 = prod; + znm2 += d__1 * d__1; +/* Computing MAX */ + d__2 = tmp, d__3 = (d__1 = work[i__] * prod, abs(d__1)); + tmp = max(d__2,d__3); +/* L16: */ + } + rrr2 = tmp / (*spdiam * sqrt(znm2)); + if (rrr2 <= 8.) { + *sigma = rsigma; + shift = 2; + goto L100; + } + } + } +L50: + if (ktry < 1) { +/* If we are here, both shifts failed also the RRR test. */ +/* Back off to the outside */ +/* Computing MAX */ + d__1 = lsigma - ldelta, d__2 = lsigma - ldmax; + lsigma = max(d__1,d__2); +/* Computing MIN */ + d__1 = rsigma + rdelta, d__2 = rsigma + rdmax; + rsigma = min(d__1,d__2); + ldelta *= 2.; + rdelta *= 2.; + ++ktry; + goto L5; + } else { +/* None of the representations investigated satisfied our */ +/* criteria. Take the best one we found. */ + if (smlgrowth < fail || nofail) { + lsigma = bestshift; + rsigma = bestshift; + forcer = TRUE_; + goto L5; + } else { + *info = 1; + return 0; + } + } +L100: + if (shift == 1) { + } else if (shift == 2) { +/* store new L and D back into DPLUS, LPLUS */ + _starpu_dcopy_(n, &work[1], &c__1, &dplus[1], &c__1); + i__1 = *n - 1; + _starpu_dcopy_(&i__1, &work[*n + 1], &c__1, &lplus[1], &c__1); + } + return 0; + +/* End of DLARRF */ + +} /* _starpu_dlarrf_ */ diff --git a/min-dgels/base/SRC/dlarrj.c b/min-dgels/base/SRC/dlarrj.c new file mode 100644 index 0000000..0a51871 --- /dev/null +++ b/min-dgels/base/SRC/dlarrj.c @@ -0,0 +1,338 @@ +/* dlarrj.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dlarrj_(integer *n, doublereal *d__, doublereal *e2, + integer *ifirst, integer *ilast, doublereal *rtol, integer *offset, + doublereal *w, doublereal *werr, doublereal *work, integer *iwork, + doublereal *pivmin, doublereal *spdiam, integer *info) +{ + /* System generated locals */ + integer i__1, i__2; + doublereal d__1, d__2; + + /* Builtin functions */ + double log(doublereal); + + /* Local variables */ + integer i__, j, k, p; + doublereal s; + integer i1, i2, ii; + doublereal fac, mid; + integer cnt; + doublereal tmp, left; + integer iter, nint, prev, next, savi1; + doublereal right, width, dplus; + integer olnint, maxitr; + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* Given the initial eigenvalue approximations of T, DLARRJ */ +/* does bisection to refine the eigenvalues of T, */ +/* W( IFIRST-OFFSET ) through W( ILAST-OFFSET ), to more accuracy. Initial */ +/* guesses for these eigenvalues are input in W, the corresponding estimate */ +/* of the error in these guesses in WERR. During bisection, intervals */ +/* [left, right] are maintained by storing their mid-points and */ +/* semi-widths in the arrays W and WERR respectively. */ + +/* Arguments */ +/* ========= */ + +/* N (input) INTEGER */ +/* The order of the matrix. */ + +/* D (input) DOUBLE PRECISION array, dimension (N) */ +/* The N diagonal elements of T. */ + +/* E2 (input) DOUBLE PRECISION array, dimension (N-1) */ +/* The Squares of the (N-1) subdiagonal elements of T. */ + +/* IFIRST (input) INTEGER */ +/* The index of the first eigenvalue to be computed. */ + +/* ILAST (input) INTEGER */ +/* The index of the last eigenvalue to be computed. */ + +/* RTOL (input) DOUBLE PRECISION */ +/* Tolerance for the convergence of the bisection intervals. */ +/* An interval [LEFT,RIGHT] has converged if */ +/* RIGHT-LEFT.LT.RTOL*MAX(|LEFT|,|RIGHT|). */ + +/* OFFSET (input) INTEGER */ +/* Offset for the arrays W and WERR, i.e., the IFIRST-OFFSET */ +/* through ILAST-OFFSET elements of these arrays are to be used. */ + +/* W (input/output) DOUBLE PRECISION array, dimension (N) */ +/* On input, W( IFIRST-OFFSET ) through W( ILAST-OFFSET ) are */ +/* estimates of the eigenvalues of L D L^T indexed IFIRST through */ +/* ILAST. */ +/* On output, these estimates are refined. */ + +/* WERR (input/output) DOUBLE PRECISION array, dimension (N) */ +/* On input, WERR( IFIRST-OFFSET ) through WERR( ILAST-OFFSET ) are */ +/* the errors in the estimates of the corresponding elements in W. */ +/* On output, these errors are refined. */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (2*N) */ +/* Workspace. */ + +/* IWORK (workspace) INTEGER array, dimension (2*N) */ +/* Workspace. */ + +/* PIVMIN (input) DOUBLE PRECISION */ +/* The minimum pivot in the Sturm sequence for T. */ + +/* SPDIAM (input) DOUBLE PRECISION */ +/* The spectral diameter of T. */ + +/* INFO (output) INTEGER */ +/* Error flag. */ + +/* Further Details */ +/* =============== */ + +/* Based on contributions by */ +/* Beresford Parlett, University of California, Berkeley, USA */ +/* Jim Demmel, University of California, Berkeley, USA */ +/* Inderjit Dhillon, University of Texas, Austin, USA */ +/* Osni Marques, LBNL/NERSC, USA */ +/* Christof Voemel, University of California, Berkeley, USA */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ + +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + --iwork; + --work; + --werr; + --w; + --e2; + --d__; + + /* Function Body */ + *info = 0; + + maxitr = (integer) ((log(*spdiam + *pivmin) - log(*pivmin)) / log(2.)) + + 2; + +/* Initialize unconverged intervals in [ WORK(2*I-1), WORK(2*I) ]. */ +/* The Sturm Count, Count( WORK(2*I-1) ) is arranged to be I-1, while */ +/* Count( WORK(2*I) ) is stored in IWORK( 2*I ). The integer IWORK( 2*I-1 ) */ +/* for an unconverged interval is set to the index of the next unconverged */ +/* interval, and is -1 or 0 for a converged interval. Thus a linked */ +/* list of unconverged intervals is set up. */ + + i1 = *ifirst; + i2 = *ilast; +/* The number of unconverged intervals */ + nint = 0; +/* The last unconverged interval found */ + prev = 0; + i__1 = i2; + for (i__ = i1; i__ <= i__1; ++i__) { + k = i__ << 1; + ii = i__ - *offset; + left = w[ii] - werr[ii]; + mid = w[ii]; + right = w[ii] + werr[ii]; + width = right - mid; +/* Computing MAX */ + d__1 = abs(left), d__2 = abs(right); + tmp = max(d__1,d__2); +/* The following test prevents the test of converged intervals */ + if (width < *rtol * tmp) { +/* This interval has already converged and does not need refinement. */ +/* (Note that the gaps might change through refining the */ +/* eigenvalues, however, they can only get bigger.) */ +/* Remove it from the list. */ + iwork[k - 1] = -1; +/* Make sure that I1 always points to the first unconverged interval */ + if (i__ == i1 && i__ < i2) { + i1 = i__ + 1; + } + if (prev >= i1 && i__ <= i2) { + iwork[(prev << 1) - 1] = i__ + 1; + } + } else { +/* unconverged interval found */ + prev = i__; +/* Make sure that [LEFT,RIGHT] contains the desired eigenvalue */ + +/* Do while( CNT(LEFT).GT.I-1 ) */ + + fac = 1.; +L20: + cnt = 0; + s = left; + dplus = d__[1] - s; + if (dplus < 0.) { + ++cnt; + } + i__2 = *n; + for (j = 2; j <= i__2; ++j) { + dplus = d__[j] - s - e2[j - 1] / dplus; + if (dplus < 0.) { + ++cnt; + } +/* L30: */ + } + if (cnt > i__ - 1) { + left -= werr[ii] * fac; + fac *= 2.; + goto L20; + } + +/* Do while( CNT(RIGHT).LT.I ) */ + + fac = 1.; +L50: + cnt = 0; + s = right; + dplus = d__[1] - s; + if (dplus < 0.) { + ++cnt; + } + i__2 = *n; + for (j = 2; j <= i__2; ++j) { + dplus = d__[j] - s - e2[j - 1] / dplus; + if (dplus < 0.) { + ++cnt; + } +/* L60: */ + } + if (cnt < i__) { + right += werr[ii] * fac; + fac *= 2.; + goto L50; + } + ++nint; + iwork[k - 1] = i__ + 1; + iwork[k] = cnt; + } + work[k - 1] = left; + work[k] = right; +/* L75: */ + } + savi1 = i1; + +/* Do while( NINT.GT.0 ), i.e. there are still unconverged intervals */ +/* and while (ITER.LT.MAXITR) */ + + iter = 0; +L80: + prev = i1 - 1; + i__ = i1; + olnint = nint; + i__1 = olnint; + for (p = 1; p <= i__1; ++p) { + k = i__ << 1; + ii = i__ - *offset; + next = iwork[k - 1]; + left = work[k - 1]; + right = work[k]; + mid = (left + right) * .5; +/* semiwidth of interval */ + width = right - mid; +/* Computing MAX */ + d__1 = abs(left), d__2 = abs(right); + tmp = max(d__1,d__2); + if (width < *rtol * tmp || iter == maxitr) { +/* reduce number of unconverged intervals */ + --nint; +/* Mark interval as converged. */ + iwork[k - 1] = 0; + if (i1 == i__) { + i1 = next; + } else { +/* Prev holds the last unconverged interval previously examined */ + if (prev >= i1) { + iwork[(prev << 1) - 1] = next; + } + } + i__ = next; + goto L100; + } + prev = i__; + +/* Perform one bisection step */ + + cnt = 0; + s = mid; + dplus = d__[1] - s; + if (dplus < 0.) { + ++cnt; + } + i__2 = *n; + for (j = 2; j <= i__2; ++j) { + dplus = d__[j] - s - e2[j - 1] / dplus; + if (dplus < 0.) { + ++cnt; + } +/* L90: */ + } + if (cnt <= i__ - 1) { + work[k - 1] = mid; + } else { + work[k] = mid; + } + i__ = next; +L100: + ; + } + ++iter; +/* do another loop if there are still unconverged intervals */ +/* However, in the last iteration, all intervals are accepted */ +/* since this is the best we can do. */ + if (nint > 0 && iter <= maxitr) { + goto L80; + } + + +/* At this point, all the intervals have converged */ + i__1 = *ilast; + for (i__ = savi1; i__ <= i__1; ++i__) { + k = i__ << 1; + ii = i__ - *offset; +/* All intervals marked by '0' have been refined. */ + if (iwork[k - 1] == 0) { + w[ii] = (work[k - 1] + work[k]) * .5; + werr[ii] = work[k] - w[ii]; + } +/* L110: */ + } + + return 0; + +/* End of DLARRJ */ + +} /* _starpu_dlarrj_ */ diff --git a/min-dgels/base/SRC/dlarrk.c b/min-dgels/base/SRC/dlarrk.c new file mode 100644 index 0000000..68fee74 --- /dev/null +++ b/min-dgels/base/SRC/dlarrk.c @@ -0,0 +1,193 @@ +/* dlarrk.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dlarrk_(integer *n, integer *iw, doublereal *gl, + doublereal *gu, doublereal *d__, doublereal *e2, doublereal *pivmin, + doublereal *reltol, doublereal *w, doublereal *werr, integer *info) +{ + /* System generated locals */ + integer i__1; + doublereal d__1, d__2; + + /* Builtin functions */ + double log(doublereal); + + /* Local variables */ + integer i__, it; + doublereal mid, eps, tmp1, tmp2, left, atoli, right; + integer itmax; + doublereal rtoli, tnorm; + extern doublereal _starpu_dlamch_(char *); + integer negcnt; + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLARRK computes one eigenvalue of a symmetric tridiagonal */ +/* matrix T to suitable accuracy. This is an auxiliary code to be */ +/* called from DSTEMR. */ + +/* To avoid overflow, the matrix must be scaled so that its */ +/* largest element is no greater than overflow**(1/2) * */ +/* underflow**(1/4) in absolute value, and for greatest */ +/* accuracy, it should not be much smaller than that. */ + +/* See W. Kahan "Accurate Eigenvalues of a Symmetric Tridiagonal */ +/* Matrix", Report CS41, Computer Science Dept., Stanford */ +/* University, July 21, 1966. */ + +/* Arguments */ +/* ========= */ + +/* N (input) INTEGER */ +/* The order of the tridiagonal matrix T. N >= 0. */ + +/* IW (input) INTEGER */ +/* The index of the eigenvalues to be returned. */ + +/* GL (input) DOUBLE PRECISION */ +/* GU (input) DOUBLE PRECISION */ +/* An upper and a lower bound on the eigenvalue. */ + +/* D (input) DOUBLE PRECISION array, dimension (N) */ +/* The n diagonal elements of the tridiagonal matrix T. */ + +/* E2 (input) DOUBLE PRECISION array, dimension (N-1) */ +/* The (n-1) squared off-diagonal elements of the tridiagonal matrix T. */ + +/* PIVMIN (input) DOUBLE PRECISION */ +/* The minimum pivot allowed in the Sturm sequence for T. */ + +/* RELTOL (input) DOUBLE PRECISION */ +/* The minimum relative width of an interval. When an interval */ +/* is narrower than RELTOL times the larger (in */ +/* magnitude) endpoint, then it is considered to be */ +/* sufficiently small, i.e., converged. Note: this should */ +/* always be at least radix*machine epsilon. */ + +/* W (output) DOUBLE PRECISION */ + +/* WERR (output) DOUBLE PRECISION */ +/* The error bound on the corresponding eigenvalue approximation */ +/* in W. */ + +/* INFO (output) INTEGER */ +/* = 0: Eigenvalue converged */ +/* = -1: Eigenvalue did NOT converge */ + +/* Internal Parameters */ +/* =================== */ + +/* FUDGE DOUBLE PRECISION, default = 2 */ +/* A "fudge factor" to widen the Gershgorin intervals. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Get machine constants */ + /* Parameter adjustments */ + --e2; + --d__; + + /* Function Body */ + eps = _starpu_dlamch_("P"); +/* Computing MAX */ + d__1 = abs(*gl), d__2 = abs(*gu); + tnorm = max(d__1,d__2); + rtoli = *reltol; + atoli = *pivmin * 4.; + itmax = (integer) ((log(tnorm + *pivmin) - log(*pivmin)) / log(2.)) + 2; + *info = -1; + left = *gl - tnorm * 2. * eps * *n - *pivmin * 4.; + right = *gu + tnorm * 2. * eps * *n + *pivmin * 4.; + it = 0; +L10: + +/* Check if interval converged or maximum number of iterations reached */ + + tmp1 = (d__1 = right - left, abs(d__1)); +/* Computing MAX */ + d__1 = abs(right), d__2 = abs(left); + tmp2 = max(d__1,d__2); +/* Computing MAX */ + d__1 = max(atoli,*pivmin), d__2 = rtoli * tmp2; + if (tmp1 < max(d__1,d__2)) { + *info = 0; + goto L30; + } + if (it > itmax) { + goto L30; + } + +/* Count number of negative pivots for mid-point */ + + ++it; + mid = (left + right) * .5; + negcnt = 0; + tmp1 = d__[1] - mid; + if (abs(tmp1) < *pivmin) { + tmp1 = -(*pivmin); + } + if (tmp1 <= 0.) { + ++negcnt; + } + + i__1 = *n; + for (i__ = 2; i__ <= i__1; ++i__) { + tmp1 = d__[i__] - e2[i__ - 1] / tmp1 - mid; + if (abs(tmp1) < *pivmin) { + tmp1 = -(*pivmin); + } + if (tmp1 <= 0.) { + ++negcnt; + } +/* L20: */ + } + if (negcnt >= *iw) { + right = mid; + } else { + left = mid; + } + goto L10; +L30: + +/* Converged or maximum number of iterations reached */ + + *w = (left + right) * .5; + *werr = (d__1 = right - left, abs(d__1)) * .5; + return 0; + +/* End of DLARRK */ + +} /* _starpu_dlarrk_ */ diff --git a/min-dgels/base/SRC/dlarrr.c b/min-dgels/base/SRC/dlarrr.c new file mode 100644 index 0000000..5ffe9ab --- /dev/null +++ b/min-dgels/base/SRC/dlarrr.c @@ -0,0 +1,176 @@ +/* dlarrr.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dlarrr_(integer *n, doublereal *d__, doublereal *e, + integer *info) +{ + /* System generated locals */ + integer i__1; + doublereal d__1; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + integer i__; + doublereal eps, tmp, tmp2, rmin; + extern doublereal _starpu_dlamch_(char *); + doublereal offdig, safmin; + logical yesrel; + doublereal smlnum, offdig2; + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + + +/* Purpose */ +/* ======= */ + +/* Perform tests to decide whether the symmetric tridiagonal matrix T */ +/* warrants expensive computations which guarantee high relative accuracy */ +/* in the eigenvalues. */ + +/* Arguments */ +/* ========= */ + +/* N (input) INTEGER */ +/* The order of the matrix. N > 0. */ + +/* D (input) DOUBLE PRECISION array, dimension (N) */ +/* The N diagonal elements of the tridiagonal matrix T. */ + +/* E (input/output) DOUBLE PRECISION array, dimension (N) */ +/* On entry, the first (N-1) entries contain the subdiagonal */ +/* elements of the tridiagonal matrix T; E(N) is set to ZERO. */ + +/* INFO (output) INTEGER */ +/* INFO = 0(default) : the matrix warrants computations preserving */ +/* relative accuracy. */ +/* INFO = 1 : the matrix warrants computations guaranteeing */ +/* only absolute accuracy. */ + +/* Further Details */ +/* =============== */ + +/* Based on contributions by */ +/* Beresford Parlett, University of California, Berkeley, USA */ +/* Jim Demmel, University of California, Berkeley, USA */ +/* Inderjit Dhillon, University of Texas, Austin, USA */ +/* Osni Marques, LBNL/NERSC, USA */ +/* Christof Voemel, University of California, Berkeley, USA */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* As a default, do NOT go for relative-accuracy preserving computations. */ + /* Parameter adjustments */ + --e; + --d__; + + /* Function Body */ + *info = 1; + safmin = _starpu_dlamch_("Safe minimum"); + eps = _starpu_dlamch_("Precision"); + smlnum = safmin / eps; + rmin = sqrt(smlnum); +/* Tests for relative accuracy */ + +/* Test for scaled diagonal dominance */ +/* Scale the diagonal entries to one and check whether the sum of the */ +/* off-diagonals is less than one */ + +/* The sdd relative error bounds have a 1/(1- 2*x) factor in them, */ +/* x = max(OFFDIG + OFFDIG2), so when x is close to 1/2, no relative */ +/* accuracy is promised. In the notation of the code fragment below, */ +/* 1/(1 - (OFFDIG + OFFDIG2)) is the condition number. */ +/* We don't think it is worth going into "sdd mode" unless the relative */ +/* condition number is reasonable, not 1/macheps. */ +/* The threshold should be compatible with other thresholds used in the */ +/* code. We set OFFDIG + OFFDIG2 <= .999 =: RELCOND, it corresponds */ +/* to losing at most 3 decimal digits: 1 / (1 - (OFFDIG + OFFDIG2)) <= 1000 */ +/* instead of the current OFFDIG + OFFDIG2 < 1 */ + + yesrel = TRUE_; + offdig = 0.; + tmp = sqrt((abs(d__[1]))); + if (tmp < rmin) { + yesrel = FALSE_; + } + if (! yesrel) { + goto L11; + } + i__1 = *n; + for (i__ = 2; i__ <= i__1; ++i__) { + tmp2 = sqrt((d__1 = d__[i__], abs(d__1))); + if (tmp2 < rmin) { + yesrel = FALSE_; + } + if (! yesrel) { + goto L11; + } + offdig2 = (d__1 = e[i__ - 1], abs(d__1)) / (tmp * tmp2); + if (offdig + offdig2 >= .999) { + yesrel = FALSE_; + } + if (! yesrel) { + goto L11; + } + tmp = tmp2; + offdig = offdig2; +/* L10: */ + } +L11: + if (yesrel) { + *info = 0; + return 0; + } else { + } + + +/* *** MORE TO BE IMPLEMENTED *** */ + + +/* Test if the lower bidiagonal matrix L from T = L D L^T */ +/* (zero shift facto) is well conditioned */ + + +/* Test if the upper bidiagonal matrix U from T = U D U^T */ +/* (zero shift facto) is well conditioned. */ +/* In this case, the matrix needs to be flipped and, at the end */ +/* of the eigenvector computation, the flip needs to be applied */ +/* to the computed eigenvectors (and the support) */ + + + return 0; + +/* END OF DLARRR */ + +} /* _starpu_dlarrr_ */ diff --git a/min-dgels/base/SRC/dlarrv.c b/min-dgels/base/SRC/dlarrv.c new file mode 100644 index 0000000..8adb8eb --- /dev/null +++ b/min-dgels/base/SRC/dlarrv.c @@ -0,0 +1,988 @@ +/* dlarrv.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static doublereal c_b5 = 0.; +static integer c__1 = 1; +static integer c__2 = 2; + +/* Subroutine */ int _starpu_dlarrv_(integer *n, doublereal *vl, doublereal *vu, + doublereal *d__, doublereal *l, doublereal *pivmin, integer *isplit, + integer *m, integer *dol, integer *dou, doublereal *minrgp, + doublereal *rtol1, doublereal *rtol2, doublereal *w, doublereal *werr, + doublereal *wgap, integer *iblock, integer *indexw, doublereal *gers, + doublereal *z__, integer *ldz, integer *isuppz, doublereal *work, + integer *iwork, integer *info) +{ + /* System generated locals */ + integer z_dim1, z_offset, i__1, i__2, i__3, i__4, i__5; + doublereal d__1, d__2; + logical L__1; + + /* Builtin functions */ + double log(doublereal); + + /* Local variables */ + integer minwsize, i__, j, k, p, q, miniwsize, ii; + doublereal gl; + integer im, in; + doublereal gu, gap, eps, tau, tol, tmp; + integer zto; + doublereal ztz; + integer iend, jblk; + doublereal lgap; + integer done; + doublereal rgap, left; + integer wend, iter; + doublereal bstw; + integer itmp1; + extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, + integer *); + integer indld; + doublereal fudge; + integer idone; + doublereal sigma; + integer iinfo, iindr; + doublereal resid; + logical eskip; + doublereal right; + extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *); + integer nclus, zfrom; + doublereal rqtol; + integer iindc1, iindc2; + extern /* Subroutine */ int _starpu_dlar1v_(integer *, integer *, integer *, + doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, logical *, + integer *, doublereal *, doublereal *, integer *, integer *, + doublereal *, doublereal *, doublereal *, doublereal *); + logical stp2ii; + doublereal lambda; + extern doublereal _starpu_dlamch_(char *); + integer ibegin, indeig; + logical needbs; + integer indlld; + doublereal sgndef, mingma; + extern /* Subroutine */ int _starpu_dlarrb_(integer *, doublereal *, doublereal *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, doublereal *, doublereal *, doublereal *, integer *, + doublereal *, doublereal *, integer *, integer *); + integer oldien, oldncl, wbegin; + doublereal spdiam; + integer negcnt; + extern /* Subroutine */ int _starpu_dlarrf_(integer *, doublereal *, doublereal *, + doublereal *, integer *, integer *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, integer *); + integer oldcls; + doublereal savgap; + integer ndepth; + doublereal ssigma; + extern /* Subroutine */ int _starpu_dlaset_(char *, integer *, integer *, + doublereal *, doublereal *, doublereal *, integer *); + logical usedbs; + integer iindwk, offset; + doublereal gaptol; + integer newcls, oldfst, indwrk, windex, oldlst; + logical usedrq; + integer newfst, newftt, parity, windmn, windpl, isupmn, newlst, zusedl; + doublereal bstres; + integer newsiz, zusedu, zusedw; + doublereal nrminv, rqcorr; + logical tryrqc; + integer isupmx; + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLARRV computes the eigenvectors of the tridiagonal matrix */ +/* T = L D L^T given L, D and APPROXIMATIONS to the eigenvalues of L D L^T. */ +/* The input eigenvalues should have been computed by DLARRE. */ + +/* Arguments */ +/* ========= */ + +/* N (input) INTEGER */ +/* The order of the matrix. N >= 0. */ + +/* VL (input) DOUBLE PRECISION */ +/* VU (input) DOUBLE PRECISION */ +/* Lower and upper bounds of the interval that contains the desired */ +/* eigenvalues. VL < VU. Needed to compute gaps on the left or right */ +/* end of the extremal eigenvalues in the desired RANGE. */ + +/* D (input/output) DOUBLE PRECISION array, dimension (N) */ +/* On entry, the N diagonal elements of the diagonal matrix D. */ +/* On exit, D may be overwritten. */ + +/* L (input/output) DOUBLE PRECISION array, dimension (N) */ +/* On entry, the (N-1) subdiagonal elements of the unit */ +/* bidiagonal matrix L are in elements 1 to N-1 of L */ +/* (if the matrix is not splitted.) At the end of each block */ +/* is stored the corresponding shift as given by DLARRE. */ +/* On exit, L is overwritten. */ + +/* PIVMIN (in) DOUBLE PRECISION */ +/* The minimum pivot allowed in the Sturm sequence. */ + +/* ISPLIT (input) INTEGER array, dimension (N) */ +/* The splitting points, at which T breaks up into blocks. */ +/* The first block consists of rows/columns 1 to */ +/* ISPLIT( 1 ), the second of rows/columns ISPLIT( 1 )+1 */ +/* through ISPLIT( 2 ), etc. */ + +/* M (input) INTEGER */ +/* The total number of input eigenvalues. 0 <= M <= N. */ + +/* DOL (input) INTEGER */ +/* DOU (input) INTEGER */ +/* If the user wants to compute only selected eigenvectors from all */ +/* the eigenvalues supplied, he can specify an index range DOL:DOU. */ +/* Or else the setting DOL=1, DOU=M should be applied. */ +/* Note that DOL and DOU refer to the order in which the eigenvalues */ +/* are stored in W. */ +/* If the user wants to compute only selected eigenpairs, then */ +/* the columns DOL-1 to DOU+1 of the eigenvector space Z contain the */ +/* computed eigenvectors. All other columns of Z are set to zero. */ + +/* MINRGP (input) DOUBLE PRECISION */ + +/* RTOL1 (input) DOUBLE PRECISION */ +/* RTOL2 (input) DOUBLE PRECISION */ +/* Parameters for bisection. */ +/* An interval [LEFT,RIGHT] has converged if */ +/* RIGHT-LEFT.LT.MAX( RTOL1*GAP, RTOL2*MAX(|LEFT|,|RIGHT|) ) */ + +/* W (input/output) DOUBLE PRECISION array, dimension (N) */ +/* The first M elements of W contain the APPROXIMATE eigenvalues for */ +/* which eigenvectors are to be computed. The eigenvalues */ +/* should be grouped by split-off block and ordered from */ +/* smallest to largest within the block ( The output array */ +/* W from DLARRE is expected here ). Furthermore, they are with */ +/* respect to the shift of the corresponding root representation */ +/* for their block. On exit, W holds the eigenvalues of the */ +/* UNshifted matrix. */ + +/* WERR (input/output) DOUBLE PRECISION array, dimension (N) */ +/* The first M elements contain the semiwidth of the uncertainty */ +/* interval of the corresponding eigenvalue in W */ + +/* WGAP (input/output) DOUBLE PRECISION array, dimension (N) */ +/* The separation from the right neighbor eigenvalue in W. */ + +/* IBLOCK (input) INTEGER array, dimension (N) */ +/* The indices of the blocks (submatrices) associated with the */ +/* corresponding eigenvalues in W; IBLOCK(i)=1 if eigenvalue */ +/* W(i) belongs to the first block from the top, =2 if W(i) */ +/* belongs to the second block, etc. */ + +/* INDEXW (input) INTEGER array, dimension (N) */ +/* The indices of the eigenvalues within each block (submatrix); */ +/* for example, INDEXW(i)= 10 and IBLOCK(i)=2 imply that the */ +/* i-th eigenvalue W(i) is the 10-th eigenvalue in the second block. */ + +/* GERS (input) DOUBLE PRECISION array, dimension (2*N) */ +/* The N Gerschgorin intervals (the i-th Gerschgorin interval */ +/* is (GERS(2*i-1), GERS(2*i)). The Gerschgorin intervals should */ +/* be computed from the original UNshifted matrix. */ + +/* Z (output) DOUBLE PRECISION array, dimension (LDZ, max(1,M) ) */ +/* If INFO = 0, the first M columns of Z contain the */ +/* orthonormal eigenvectors of the matrix T */ +/* corresponding to the input eigenvalues, with the i-th */ +/* column of Z holding the eigenvector associated with W(i). */ +/* Note: the user must ensure that at least max(1,M) columns are */ +/* supplied in the array Z. */ + +/* LDZ (input) INTEGER */ +/* The leading dimension of the array Z. LDZ >= 1, and if */ +/* JOBZ = 'V', LDZ >= max(1,N). */ + +/* ISUPPZ (output) INTEGER array, dimension ( 2*max(1,M) ) */ +/* The support of the eigenvectors in Z, i.e., the indices */ +/* indicating the nonzero elements in Z. The I-th eigenvector */ +/* is nonzero only in elements ISUPPZ( 2*I-1 ) through */ +/* ISUPPZ( 2*I ). */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (12*N) */ + +/* IWORK (workspace) INTEGER array, dimension (7*N) */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ + +/* > 0: A problem occured in DLARRV. */ +/* < 0: One of the called subroutines signaled an internal problem. */ +/* Needs inspection of the corresponding parameter IINFO */ +/* for further information. */ + +/* =-1: Problem in DLARRB when refining a child's eigenvalues. */ +/* =-2: Problem in DLARRF when computing the RRR of a child. */ +/* When a child is inside a tight cluster, it can be difficult */ +/* to find an RRR. A partial remedy from the user's point of */ +/* view is to make the parameter MINRGP smaller and recompile. */ +/* However, as the orthogonality of the computed vectors is */ +/* proportional to 1/MINRGP, the user should be aware that */ +/* he might be trading in precision when he decreases MINRGP. */ +/* =-3: Problem in DLARRB when refining a single eigenvalue */ +/* after the Rayleigh correction was rejected. */ +/* = 5: The Rayleigh Quotient Iteration failed to converge to */ +/* full accuracy in MAXITR steps. */ + +/* Further Details */ +/* =============== */ + +/* Based on contributions by */ +/* Beresford Parlett, University of California, Berkeley, USA */ +/* Jim Demmel, University of California, Berkeley, USA */ +/* Inderjit Dhillon, University of Texas, Austin, USA */ +/* Osni Marques, LBNL/NERSC, USA */ +/* Christof Voemel, University of California, Berkeley, USA */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ +/* .. */ +/* The first N entries of WORK are reserved for the eigenvalues */ + /* Parameter adjustments */ + --d__; + --l; + --isplit; + --w; + --werr; + --wgap; + --iblock; + --indexw; + --gers; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --isuppz; + --work; + --iwork; + + /* Function Body */ + indld = *n + 1; + indlld = (*n << 1) + 1; + indwrk = *n * 3 + 1; + minwsize = *n * 12; + i__1 = minwsize; + for (i__ = 1; i__ <= i__1; ++i__) { + work[i__] = 0.; +/* L5: */ + } +/* IWORK(IINDR+1:IINDR+N) hold the twist indices R for the */ +/* factorization used to compute the FP vector */ + iindr = 0; +/* IWORK(IINDC1+1:IINC2+N) are used to store the clusters of the current */ +/* layer and the one above. */ + iindc1 = *n; + iindc2 = *n << 1; + iindwk = *n * 3 + 1; + miniwsize = *n * 7; + i__1 = miniwsize; + for (i__ = 1; i__ <= i__1; ++i__) { + iwork[i__] = 0; +/* L10: */ + } + zusedl = 1; + if (*dol > 1) { +/* Set lower bound for use of Z */ + zusedl = *dol - 1; + } + zusedu = *m; + if (*dou < *m) { +/* Set lower bound for use of Z */ + zusedu = *dou + 1; + } +/* The width of the part of Z that is used */ + zusedw = zusedu - zusedl + 1; + _starpu_dlaset_("Full", n, &zusedw, &c_b5, &c_b5, &z__[zusedl * z_dim1 + 1], ldz); + eps = _starpu_dlamch_("Precision"); + rqtol = eps * 2.; + +/* Set expert flags for standard code. */ + tryrqc = TRUE_; + if (*dol == 1 && *dou == *m) { + } else { +/* Only selected eigenpairs are computed. Since the other evalues */ +/* are not refined by RQ iteration, bisection has to compute to full */ +/* accuracy. */ + *rtol1 = eps * 4.; + *rtol2 = eps * 4.; + } +/* The entries WBEGIN:WEND in W, WERR, WGAP correspond to the */ +/* desired eigenvalues. The support of the nonzero eigenvector */ +/* entries is contained in the interval IBEGIN:IEND. */ +/* Remark that if k eigenpairs are desired, then the eigenvectors */ +/* are stored in k contiguous columns of Z. */ +/* DONE is the number of eigenvectors already computed */ + done = 0; + ibegin = 1; + wbegin = 1; + i__1 = iblock[*m]; + for (jblk = 1; jblk <= i__1; ++jblk) { + iend = isplit[jblk]; + sigma = l[iend]; +/* Find the eigenvectors of the submatrix indexed IBEGIN */ +/* through IEND. */ + wend = wbegin - 1; +L15: + if (wend < *m) { + if (iblock[wend + 1] == jblk) { + ++wend; + goto L15; + } + } + if (wend < wbegin) { + ibegin = iend + 1; + goto L170; + } else if (wend < *dol || wbegin > *dou) { + ibegin = iend + 1; + wbegin = wend + 1; + goto L170; + } +/* Find local spectral diameter of the block */ + gl = gers[(ibegin << 1) - 1]; + gu = gers[ibegin * 2]; + i__2 = iend; + for (i__ = ibegin + 1; i__ <= i__2; ++i__) { +/* Computing MIN */ + d__1 = gers[(i__ << 1) - 1]; + gl = min(d__1,gl); +/* Computing MAX */ + d__1 = gers[i__ * 2]; + gu = max(d__1,gu); +/* L20: */ + } + spdiam = gu - gl; +/* OLDIEN is the last index of the previous block */ + oldien = ibegin - 1; +/* Calculate the size of the current block */ + in = iend - ibegin + 1; +/* The number of eigenvalues in the current block */ + im = wend - wbegin + 1; +/* This is for a 1x1 block */ + if (ibegin == iend) { + ++done; + z__[ibegin + wbegin * z_dim1] = 1.; + isuppz[(wbegin << 1) - 1] = ibegin; + isuppz[wbegin * 2] = ibegin; + w[wbegin] += sigma; + work[wbegin] = w[wbegin]; + ibegin = iend + 1; + ++wbegin; + goto L170; + } +/* The desired (shifted) eigenvalues are stored in W(WBEGIN:WEND) */ +/* Note that these can be approximations, in this case, the corresp. */ +/* entries of WERR give the size of the uncertainty interval. */ +/* The eigenvalue approximations will be refined when necessary as */ +/* high relative accuracy is required for the computation of the */ +/* corresponding eigenvectors. */ + _starpu_dcopy_(&im, &w[wbegin], &c__1, &work[wbegin], &c__1); +/* We store in W the eigenvalue approximations w.r.t. the original */ +/* matrix T. */ + i__2 = im; + for (i__ = 1; i__ <= i__2; ++i__) { + w[wbegin + i__ - 1] += sigma; +/* L30: */ + } +/* NDEPTH is the current depth of the representation tree */ + ndepth = 0; +/* PARITY is either 1 or 0 */ + parity = 1; +/* NCLUS is the number of clusters for the next level of the */ +/* representation tree, we start with NCLUS = 1 for the root */ + nclus = 1; + iwork[iindc1 + 1] = 1; + iwork[iindc1 + 2] = im; +/* IDONE is the number of eigenvectors already computed in the current */ +/* block */ + idone = 0; +/* loop while( IDONE.LT.IM ) */ +/* generate the representation tree for the current block and */ +/* compute the eigenvectors */ +L40: + if (idone < im) { +/* This is a crude protection against infinitely deep trees */ + if (ndepth > *m) { + *info = -2; + return 0; + } +/* breadth first processing of the current level of the representation */ +/* tree: OLDNCL = number of clusters on current level */ + oldncl = nclus; +/* reset NCLUS to count the number of child clusters */ + nclus = 0; + + parity = 1 - parity; + if (parity == 0) { + oldcls = iindc1; + newcls = iindc2; + } else { + oldcls = iindc2; + newcls = iindc1; + } +/* Process the clusters on the current level */ + i__2 = oldncl; + for (i__ = 1; i__ <= i__2; ++i__) { + j = oldcls + (i__ << 1); +/* OLDFST, OLDLST = first, last index of current cluster. */ +/* cluster indices start with 1 and are relative */ +/* to WBEGIN when accessing W, WGAP, WERR, Z */ + oldfst = iwork[j - 1]; + oldlst = iwork[j]; + if (ndepth > 0) { +/* Retrieve relatively robust representation (RRR) of cluster */ +/* that has been computed at the previous level */ +/* The RRR is stored in Z and overwritten once the eigenvectors */ +/* have been computed or when the cluster is refined */ + if (*dol == 1 && *dou == *m) { +/* Get representation from location of the leftmost evalue */ +/* of the cluster */ + j = wbegin + oldfst - 1; + } else { + if (wbegin + oldfst - 1 < *dol) { +/* Get representation from the left end of Z array */ + j = *dol - 1; + } else if (wbegin + oldfst - 1 > *dou) { +/* Get representation from the right end of Z array */ + j = *dou; + } else { + j = wbegin + oldfst - 1; + } + } + _starpu_dcopy_(&in, &z__[ibegin + j * z_dim1], &c__1, &d__[ibegin] +, &c__1); + i__3 = in - 1; + _starpu_dcopy_(&i__3, &z__[ibegin + (j + 1) * z_dim1], &c__1, &l[ + ibegin], &c__1); + sigma = z__[iend + (j + 1) * z_dim1]; +/* Set the corresponding entries in Z to zero */ + _starpu_dlaset_("Full", &in, &c__2, &c_b5, &c_b5, &z__[ibegin + j + * z_dim1], ldz); + } +/* Compute DL and DLL of current RRR */ + i__3 = iend - 1; + for (j = ibegin; j <= i__3; ++j) { + tmp = d__[j] * l[j]; + work[indld - 1 + j] = tmp; + work[indlld - 1 + j] = tmp * l[j]; +/* L50: */ + } + if (ndepth > 0) { +/* P and Q are index of the first and last eigenvalue to compute */ +/* within the current block */ + p = indexw[wbegin - 1 + oldfst]; + q = indexw[wbegin - 1 + oldlst]; +/* Offset for the arrays WORK, WGAP and WERR, i.e., th P-OFFSET */ +/* thru' Q-OFFSET elements of these arrays are to be used. */ +/* OFFSET = P-OLDFST */ + offset = indexw[wbegin] - 1; +/* perform limited bisection (if necessary) to get approximate */ +/* eigenvalues to the precision needed. */ + _starpu_dlarrb_(&in, &d__[ibegin], &work[indlld + ibegin - 1], &p, + &q, rtol1, rtol2, &offset, &work[wbegin], &wgap[ + wbegin], &werr[wbegin], &work[indwrk], &iwork[ + iindwk], pivmin, &spdiam, &in, &iinfo); + if (iinfo != 0) { + *info = -1; + return 0; + } +/* We also recompute the extremal gaps. W holds all eigenvalues */ +/* of the unshifted matrix and must be used for computation */ +/* of WGAP, the entries of WORK might stem from RRRs with */ +/* different shifts. The gaps from WBEGIN-1+OLDFST to */ +/* WBEGIN-1+OLDLST are correctly computed in DLARRB. */ +/* However, we only allow the gaps to become greater since */ +/* this is what should happen when we decrease WERR */ + if (oldfst > 1) { +/* Computing MAX */ + d__1 = wgap[wbegin + oldfst - 2], d__2 = w[wbegin + + oldfst - 1] - werr[wbegin + oldfst - 1] - w[ + wbegin + oldfst - 2] - werr[wbegin + oldfst - + 2]; + wgap[wbegin + oldfst - 2] = max(d__1,d__2); + } + if (wbegin + oldlst - 1 < wend) { +/* Computing MAX */ + d__1 = wgap[wbegin + oldlst - 1], d__2 = w[wbegin + + oldlst] - werr[wbegin + oldlst] - w[wbegin + + oldlst - 1] - werr[wbegin + oldlst - 1]; + wgap[wbegin + oldlst - 1] = max(d__1,d__2); + } +/* Each time the eigenvalues in WORK get refined, we store */ +/* the newly found approximation with all shifts applied in W */ + i__3 = oldlst; + for (j = oldfst; j <= i__3; ++j) { + w[wbegin + j - 1] = work[wbegin + j - 1] + sigma; +/* L53: */ + } + } +/* Process the current node. */ + newfst = oldfst; + i__3 = oldlst; + for (j = oldfst; j <= i__3; ++j) { + if (j == oldlst) { +/* we are at the right end of the cluster, this is also the */ +/* boundary of the child cluster */ + newlst = j; + } else if (wgap[wbegin + j - 1] >= *minrgp * (d__1 = work[ + wbegin + j - 1], abs(d__1))) { +/* the right relative gap is big enough, the child cluster */ +/* (NEWFST,..,NEWLST) is well separated from the following */ + newlst = j; + } else { +/* inside a child cluster, the relative gap is not */ +/* big enough. */ + goto L140; + } +/* Compute size of child cluster found */ + newsiz = newlst - newfst + 1; +/* NEWFTT is the place in Z where the new RRR or the computed */ +/* eigenvector is to be stored */ + if (*dol == 1 && *dou == *m) { +/* Store representation at location of the leftmost evalue */ +/* of the cluster */ + newftt = wbegin + newfst - 1; + } else { + if (wbegin + newfst - 1 < *dol) { +/* Store representation at the left end of Z array */ + newftt = *dol - 1; + } else if (wbegin + newfst - 1 > *dou) { +/* Store representation at the right end of Z array */ + newftt = *dou; + } else { + newftt = wbegin + newfst - 1; + } + } + if (newsiz > 1) { + +/* Current child is not a singleton but a cluster. */ +/* Compute and store new representation of child. */ + + +/* Compute left and right cluster gap. */ + +/* LGAP and RGAP are not computed from WORK because */ +/* the eigenvalue approximations may stem from RRRs */ +/* different shifts. However, W hold all eigenvalues */ +/* of the unshifted matrix. Still, the entries in WGAP */ +/* have to be computed from WORK since the entries */ +/* in W might be of the same order so that gaps are not */ +/* exhibited correctly for very close eigenvalues. */ + if (newfst == 1) { +/* Computing MAX */ + d__1 = 0., d__2 = w[wbegin] - werr[wbegin] - *vl; + lgap = max(d__1,d__2); + } else { + lgap = wgap[wbegin + newfst - 2]; + } + rgap = wgap[wbegin + newlst - 1]; + +/* Compute left- and rightmost eigenvalue of child */ +/* to high precision in order to shift as close */ +/* as possible and obtain as large relative gaps */ +/* as possible */ + + for (k = 1; k <= 2; ++k) { + if (k == 1) { + p = indexw[wbegin - 1 + newfst]; + } else { + p = indexw[wbegin - 1 + newlst]; + } + offset = indexw[wbegin] - 1; + _starpu_dlarrb_(&in, &d__[ibegin], &work[indlld + ibegin + - 1], &p, &p, &rqtol, &rqtol, &offset, & + work[wbegin], &wgap[wbegin], &werr[wbegin] +, &work[indwrk], &iwork[iindwk], pivmin, & + spdiam, &in, &iinfo); +/* L55: */ + } + + if (wbegin + newlst - 1 < *dol || wbegin + newfst - 1 + > *dou) { +/* if the cluster contains no desired eigenvalues */ +/* skip the computation of that branch of the rep. tree */ + +/* We could skip before the refinement of the extremal */ +/* eigenvalues of the child, but then the representation */ +/* tree could be different from the one when nothing is */ +/* skipped. For this reason we skip at this place. */ + idone = idone + newlst - newfst + 1; + goto L139; + } + +/* Compute RRR of child cluster. */ +/* Note that the new RRR is stored in Z */ + +/* DLARRF needs LWORK = 2*N */ + _starpu_dlarrf_(&in, &d__[ibegin], &l[ibegin], &work[indld + + ibegin - 1], &newfst, &newlst, &work[wbegin], + &wgap[wbegin], &werr[wbegin], &spdiam, &lgap, + &rgap, pivmin, &tau, &z__[ibegin + newftt * + z_dim1], &z__[ibegin + (newftt + 1) * z_dim1], + &work[indwrk], &iinfo); + if (iinfo == 0) { +/* a new RRR for the cluster was found by DLARRF */ +/* update shift and store it */ + ssigma = sigma + tau; + z__[iend + (newftt + 1) * z_dim1] = ssigma; +/* WORK() are the midpoints and WERR() the semi-width */ +/* Note that the entries in W are unchanged. */ + i__4 = newlst; + for (k = newfst; k <= i__4; ++k) { + fudge = eps * 3. * (d__1 = work[wbegin + k - + 1], abs(d__1)); + work[wbegin + k - 1] -= tau; + fudge += eps * 4. * (d__1 = work[wbegin + k - + 1], abs(d__1)); +/* Fudge errors */ + werr[wbegin + k - 1] += fudge; +/* Gaps are not fudged. Provided that WERR is small */ +/* when eigenvalues are close, a zero gap indicates */ +/* that a new representation is needed for resolving */ +/* the cluster. A fudge could lead to a wrong decision */ +/* of judging eigenvalues 'separated' which in */ +/* reality are not. This could have a negative impact */ +/* on the orthogonality of the computed eigenvectors. */ +/* L116: */ + } + ++nclus; + k = newcls + (nclus << 1); + iwork[k - 1] = newfst; + iwork[k] = newlst; + } else { + *info = -2; + return 0; + } + } else { + +/* Compute eigenvector of singleton */ + + iter = 0; + + tol = log((doublereal) in) * 4. * eps; + + k = newfst; + windex = wbegin + k - 1; +/* Computing MAX */ + i__4 = windex - 1; + windmn = max(i__4,1); +/* Computing MIN */ + i__4 = windex + 1; + windpl = min(i__4,*m); + lambda = work[windex]; + ++done; +/* Check if eigenvector computation is to be skipped */ + if (windex < *dol || windex > *dou) { + eskip = TRUE_; + goto L125; + } else { + eskip = FALSE_; + } + left = work[windex] - werr[windex]; + right = work[windex] + werr[windex]; + indeig = indexw[windex]; +/* Note that since we compute the eigenpairs for a child, */ +/* all eigenvalue approximations are w.r.t the same shift. */ +/* In this case, the entries in WORK should be used for */ +/* computing the gaps since they exhibit even very small */ +/* differences in the eigenvalues, as opposed to the */ +/* entries in W which might "look" the same. */ + if (k == 1) { +/* In the case RANGE='I' and with not much initial */ +/* accuracy in LAMBDA and VL, the formula */ +/* LGAP = MAX( ZERO, (SIGMA - VL) + LAMBDA ) */ +/* can lead to an overestimation of the left gap and */ +/* thus to inadequately early RQI 'convergence'. */ +/* Prevent this by forcing a small left gap. */ +/* Computing MAX */ + d__1 = abs(left), d__2 = abs(right); + lgap = eps * max(d__1,d__2); + } else { + lgap = wgap[windmn]; + } + if (k == im) { +/* In the case RANGE='I' and with not much initial */ +/* accuracy in LAMBDA and VU, the formula */ +/* can lead to an overestimation of the right gap and */ +/* thus to inadequately early RQI 'convergence'. */ +/* Prevent this by forcing a small right gap. */ +/* Computing MAX */ + d__1 = abs(left), d__2 = abs(right); + rgap = eps * max(d__1,d__2); + } else { + rgap = wgap[windex]; + } + gap = min(lgap,rgap); + if (k == 1 || k == im) { +/* The eigenvector support can become wrong */ +/* because significant entries could be cut off due to a */ +/* large GAPTOL parameter in LAR1V. Prevent this. */ + gaptol = 0.; + } else { + gaptol = gap * eps; + } + isupmn = in; + isupmx = 1; +/* Update WGAP so that it holds the minimum gap */ +/* to the left or the right. This is crucial in the */ +/* case where bisection is used to ensure that the */ +/* eigenvalue is refined up to the required precision. */ +/* The correct value is restored afterwards. */ + savgap = wgap[windex]; + wgap[windex] = gap; +/* We want to use the Rayleigh Quotient Correction */ +/* as often as possible since it converges quadratically */ +/* when we are close enough to the desired eigenvalue. */ +/* However, the Rayleigh Quotient can have the wrong sign */ +/* and lead us away from the desired eigenvalue. In this */ +/* case, the best we can do is to use bisection. */ + usedbs = FALSE_; + usedrq = FALSE_; +/* Bisection is initially turned off unless it is forced */ + needbs = ! tryrqc; +L120: +/* Check if bisection should be used to refine eigenvalue */ + if (needbs) { +/* Take the bisection as new iterate */ + usedbs = TRUE_; + itmp1 = iwork[iindr + windex]; + offset = indexw[wbegin] - 1; + d__1 = eps * 2.; + _starpu_dlarrb_(&in, &d__[ibegin], &work[indlld + ibegin + - 1], &indeig, &indeig, &c_b5, &d__1, & + offset, &work[wbegin], &wgap[wbegin], & + werr[wbegin], &work[indwrk], &iwork[ + iindwk], pivmin, &spdiam, &itmp1, &iinfo); + if (iinfo != 0) { + *info = -3; + return 0; + } + lambda = work[windex]; +/* Reset twist index from inaccurate LAMBDA to */ +/* force computation of true MINGMA */ + iwork[iindr + windex] = 0; + } +/* Given LAMBDA, compute the eigenvector. */ + L__1 = ! usedbs; + _starpu_dlar1v_(&in, &c__1, &in, &lambda, &d__[ibegin], &l[ + ibegin], &work[indld + ibegin - 1], &work[ + indlld + ibegin - 1], pivmin, &gaptol, &z__[ + ibegin + windex * z_dim1], &L__1, &negcnt, & + ztz, &mingma, &iwork[iindr + windex], &isuppz[ + (windex << 1) - 1], &nrminv, &resid, &rqcorr, + &work[indwrk]); + if (iter == 0) { + bstres = resid; + bstw = lambda; + } else if (resid < bstres) { + bstres = resid; + bstw = lambda; + } +/* Computing MIN */ + i__4 = isupmn, i__5 = isuppz[(windex << 1) - 1]; + isupmn = min(i__4,i__5); +/* Computing MAX */ + i__4 = isupmx, i__5 = isuppz[windex * 2]; + isupmx = max(i__4,i__5); + ++iter; +/* sin alpha <= |resid|/gap */ +/* Note that both the residual and the gap are */ +/* proportional to the matrix, so ||T|| doesn't play */ +/* a role in the quotient */ + +/* Convergence test for Rayleigh-Quotient iteration */ +/* (omitted when Bisection has been used) */ + + if (resid > tol * gap && abs(rqcorr) > rqtol * abs( + lambda) && ! usedbs) { +/* We need to check that the RQCORR update doesn't */ +/* move the eigenvalue away from the desired one and */ +/* towards a neighbor. -> protection with bisection */ + if (indeig <= negcnt) { +/* The wanted eigenvalue lies to the left */ + sgndef = -1.; + } else { +/* The wanted eigenvalue lies to the right */ + sgndef = 1.; + } +/* We only use the RQCORR if it improves the */ +/* the iterate reasonably. */ + if (rqcorr * sgndef >= 0. && lambda + rqcorr <= + right && lambda + rqcorr >= left) { + usedrq = TRUE_; +/* Store new midpoint of bisection interval in WORK */ + if (sgndef == 1.) { +/* The current LAMBDA is on the left of the true */ +/* eigenvalue */ + left = lambda; +/* We prefer to assume that the error estimate */ +/* is correct. We could make the interval not */ +/* as a bracket but to be modified if the RQCORR */ +/* chooses to. In this case, the RIGHT side should */ +/* be modified as follows: */ +/* RIGHT = MAX(RIGHT, LAMBDA + RQCORR) */ + } else { +/* The current LAMBDA is on the right of the true */ +/* eigenvalue */ + right = lambda; +/* See comment about assuming the error estimate is */ +/* correct above. */ +/* LEFT = MIN(LEFT, LAMBDA + RQCORR) */ + } + work[windex] = (right + left) * .5; +/* Take RQCORR since it has the correct sign and */ +/* improves the iterate reasonably */ + lambda += rqcorr; +/* Update width of error interval */ + werr[windex] = (right - left) * .5; + } else { + needbs = TRUE_; + } + if (right - left < rqtol * abs(lambda)) { +/* The eigenvalue is computed to bisection accuracy */ +/* compute eigenvector and stop */ + usedbs = TRUE_; + goto L120; + } else if (iter < 10) { + goto L120; + } else if (iter == 10) { + needbs = TRUE_; + goto L120; + } else { + *info = 5; + return 0; + } + } else { + stp2ii = FALSE_; + if (usedrq && usedbs && bstres <= resid) { + lambda = bstw; + stp2ii = TRUE_; + } + if (stp2ii) { +/* improve error angle by second step */ + L__1 = ! usedbs; + _starpu_dlar1v_(&in, &c__1, &in, &lambda, &d__[ibegin] +, &l[ibegin], &work[indld + ibegin - + 1], &work[indlld + ibegin - 1], + pivmin, &gaptol, &z__[ibegin + windex + * z_dim1], &L__1, &negcnt, &ztz, & + mingma, &iwork[iindr + windex], & + isuppz[(windex << 1) - 1], &nrminv, & + resid, &rqcorr, &work[indwrk]); + } + work[windex] = lambda; + } + +/* Compute FP-vector support w.r.t. whole matrix */ + + isuppz[(windex << 1) - 1] += oldien; + isuppz[windex * 2] += oldien; + zfrom = isuppz[(windex << 1) - 1]; + zto = isuppz[windex * 2]; + isupmn += oldien; + isupmx += oldien; +/* Ensure vector is ok if support in the RQI has changed */ + if (isupmn < zfrom) { + i__4 = zfrom - 1; + for (ii = isupmn; ii <= i__4; ++ii) { + z__[ii + windex * z_dim1] = 0.; +/* L122: */ + } + } + if (isupmx > zto) { + i__4 = isupmx; + for (ii = zto + 1; ii <= i__4; ++ii) { + z__[ii + windex * z_dim1] = 0.; +/* L123: */ + } + } + i__4 = zto - zfrom + 1; + _starpu_dscal_(&i__4, &nrminv, &z__[zfrom + windex * z_dim1], + &c__1); +L125: +/* Update W */ + w[windex] = lambda + sigma; +/* Recompute the gaps on the left and right */ +/* But only allow them to become larger and not */ +/* smaller (which can only happen through "bad" */ +/* cancellation and doesn't reflect the theory */ +/* where the initial gaps are underestimated due */ +/* to WERR being too crude.) */ + if (! eskip) { + if (k > 1) { +/* Computing MAX */ + d__1 = wgap[windmn], d__2 = w[windex] - werr[ + windex] - w[windmn] - werr[windmn]; + wgap[windmn] = max(d__1,d__2); + } + if (windex < wend) { +/* Computing MAX */ + d__1 = savgap, d__2 = w[windpl] - werr[windpl] + - w[windex] - werr[windex]; + wgap[windex] = max(d__1,d__2); + } + } + ++idone; + } +/* here ends the code for the current child */ + +L139: +/* Proceed to any remaining child nodes */ + newfst = j + 1; +L140: + ; + } +/* L150: */ + } + ++ndepth; + goto L40; + } + ibegin = iend + 1; + wbegin = wend + 1; +L170: + ; + } + + return 0; + +/* End of DLARRV */ + +} /* _starpu_dlarrv_ */ diff --git a/min-dgels/base/SRC/dlarscl2.c b/min-dgels/base/SRC/dlarscl2.c new file mode 100644 index 0000000..18d7d99 --- /dev/null +++ b/min-dgels/base/SRC/dlarscl2.c @@ -0,0 +1,90 @@ +/* dlarscl2.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dlarscl2_(integer *m, integer *n, doublereal *d__, + doublereal *x, integer *ldx) +{ + /* System generated locals */ + integer x_dim1, x_offset, i__1, i__2; + + /* Local variables */ + integer i__, j; + + +/* -- LAPACK routine (version 3.2.1) -- */ +/* -- Contributed by James Demmel, Deaglan Halligan, Yozo Hida and -- */ +/* -- Jason Riedy of Univ. of California Berkeley. -- */ +/* -- April 2009 -- */ + +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley and NAG Ltd. -- */ + +/* .. */ +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLARSCL2 performs a reciprocal diagonal scaling on an vector: */ +/* x <-- inv(D) * x */ +/* where the diagonal matrix D is stored as a vector. */ + +/* Eventually to be replaced by BLAS_dge_diag_scale in the new BLAS */ +/* standard. */ + +/* Arguments */ +/* ========= */ + +/* M (input) INTEGER */ +/* The number of rows of D and X. M >= 0. */ + +/* N (input) INTEGER */ +/* The number of columns of D and X. N >= 0. */ + +/* D (input) DOUBLE PRECISION array, length M */ +/* Diagonal matrix D, stored as a vector of length M. */ + +/* X (input/output) DOUBLE PRECISION array, dimension (LDX,N) */ +/* On entry, the vector X to be scaled by D. */ +/* On exit, the scaled vector. */ + +/* LDX (input) INTEGER */ +/* The leading dimension of the vector X. LDX >= 0. */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + --d__; + x_dim1 = *ldx; + x_offset = 1 + x_dim1; + x -= x_offset; + + /* Function Body */ + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + x[i__ + j * x_dim1] /= d__[i__]; + } + } + return 0; +} /* _starpu_dlarscl2_ */ diff --git a/min-dgels/base/SRC/dlartg.c b/min-dgels/base/SRC/dlartg.c new file mode 100644 index 0000000..b9e8125 --- /dev/null +++ b/min-dgels/base/SRC/dlartg.c @@ -0,0 +1,190 @@ +/* dlartg.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dlartg_(doublereal *f, doublereal *g, doublereal *cs, + doublereal *sn, doublereal *r__) +{ + /* System generated locals */ + integer i__1; + doublereal d__1, d__2; + + /* Builtin functions */ + double log(doublereal), pow_di(doublereal *, integer *), sqrt(doublereal); + + /* Local variables */ + integer i__; + doublereal f1, g1, eps, scale; + integer count; + doublereal safmn2, safmx2; + extern doublereal _starpu_dlamch_(char *); + doublereal safmin; + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLARTG generate a plane rotation so that */ + +/* [ CS SN ] . [ F ] = [ R ] where CS**2 + SN**2 = 1. */ +/* [ -SN CS ] [ G ] [ 0 ] */ + +/* This is a slower, more accurate version of the BLAS1 routine DROTG, */ +/* with the following other differences: */ +/* F and G are unchanged on return. */ +/* If G=0, then CS=1 and SN=0. */ +/* If F=0 and (G .ne. 0), then CS=0 and SN=1 without doing any */ +/* floating point operations (saves work in DBDSQR when */ +/* there are zeros on the diagonal). */ + +/* If F exceeds G in magnitude, CS will be positive. */ + +/* Arguments */ +/* ========= */ + +/* F (input) DOUBLE PRECISION */ +/* The first component of vector to be rotated. */ + +/* G (input) DOUBLE PRECISION */ +/* The second component of vector to be rotated. */ + +/* CS (output) DOUBLE PRECISION */ +/* The cosine of the rotation. */ + +/* SN (output) DOUBLE PRECISION */ +/* The sine of the rotation. */ + +/* R (output) DOUBLE PRECISION */ +/* The nonzero component of the rotated vector. */ + +/* This version has a few statements commented out for thread safety */ +/* (machine parameters are computed on each entry). 10 feb 03, SJH. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* LOGICAL FIRST */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Save statement .. */ +/* SAVE FIRST, SAFMX2, SAFMIN, SAFMN2 */ +/* .. */ +/* .. Data statements .. */ +/* DATA FIRST / .TRUE. / */ +/* .. */ +/* .. Executable Statements .. */ + +/* IF( FIRST ) THEN */ + safmin = _starpu_dlamch_("S"); + eps = _starpu_dlamch_("E"); + d__1 = _starpu_dlamch_("B"); + i__1 = (integer) (log(safmin / eps) / log(_starpu_dlamch_("B")) / 2.); + safmn2 = pow_di(&d__1, &i__1); + safmx2 = 1. / safmn2; +/* FIRST = .FALSE. */ +/* END IF */ + if (*g == 0.) { + *cs = 1.; + *sn = 0.; + *r__ = *f; + } else if (*f == 0.) { + *cs = 0.; + *sn = 1.; + *r__ = *g; + } else { + f1 = *f; + g1 = *g; +/* Computing MAX */ + d__1 = abs(f1), d__2 = abs(g1); + scale = max(d__1,d__2); + if (scale >= safmx2) { + count = 0; +L10: + ++count; + f1 *= safmn2; + g1 *= safmn2; +/* Computing MAX */ + d__1 = abs(f1), d__2 = abs(g1); + scale = max(d__1,d__2); + if (scale >= safmx2) { + goto L10; + } +/* Computing 2nd power */ + d__1 = f1; +/* Computing 2nd power */ + d__2 = g1; + *r__ = sqrt(d__1 * d__1 + d__2 * d__2); + *cs = f1 / *r__; + *sn = g1 / *r__; + i__1 = count; + for (i__ = 1; i__ <= i__1; ++i__) { + *r__ *= safmx2; +/* L20: */ + } + } else if (scale <= safmn2) { + count = 0; +L30: + ++count; + f1 *= safmx2; + g1 *= safmx2; +/* Computing MAX */ + d__1 = abs(f1), d__2 = abs(g1); + scale = max(d__1,d__2); + if (scale <= safmn2) { + goto L30; + } +/* Computing 2nd power */ + d__1 = f1; +/* Computing 2nd power */ + d__2 = g1; + *r__ = sqrt(d__1 * d__1 + d__2 * d__2); + *cs = f1 / *r__; + *sn = g1 / *r__; + i__1 = count; + for (i__ = 1; i__ <= i__1; ++i__) { + *r__ *= safmn2; +/* L40: */ + } + } else { +/* Computing 2nd power */ + d__1 = f1; +/* Computing 2nd power */ + d__2 = g1; + *r__ = sqrt(d__1 * d__1 + d__2 * d__2); + *cs = f1 / *r__; + *sn = g1 / *r__; + } + if (abs(*f) > abs(*g) && *cs < 0.) { + *cs = -(*cs); + *sn = -(*sn); + *r__ = -(*r__); + } + } + return 0; + +/* End of DLARTG */ + +} /* _starpu_dlartg_ */ diff --git a/min-dgels/base/SRC/dlartv.c b/min-dgels/base/SRC/dlartv.c new file mode 100644 index 0000000..6cff5d1 --- /dev/null +++ b/min-dgels/base/SRC/dlartv.c @@ -0,0 +1,106 @@ +/* dlartv.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dlartv_(integer *n, doublereal *x, integer *incx, + doublereal *y, integer *incy, doublereal *c__, doublereal *s, integer + *incc) +{ + /* System generated locals */ + integer i__1; + + /* Local variables */ + integer i__, ic, ix, iy; + doublereal xi, yi; + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLARTV applies a vector of real plane rotations to elements of the */ +/* real vectors x and y. For i = 1,2,...,n */ + +/* ( x(i) ) := ( c(i) s(i) ) ( x(i) ) */ +/* ( y(i) ) ( -s(i) c(i) ) ( y(i) ) */ + +/* Arguments */ +/* ========= */ + +/* N (input) INTEGER */ +/* The number of plane rotations to be applied. */ + +/* X (input/output) DOUBLE PRECISION array, */ +/* dimension (1+(N-1)*INCX) */ +/* The vector x. */ + +/* INCX (input) INTEGER */ +/* The increment between elements of X. INCX > 0. */ + +/* Y (input/output) DOUBLE PRECISION array, */ +/* dimension (1+(N-1)*INCY) */ +/* The vector y. */ + +/* INCY (input) INTEGER */ +/* The increment between elements of Y. INCY > 0. */ + +/* C (input) DOUBLE PRECISION array, dimension (1+(N-1)*INCC) */ +/* The cosines of the plane rotations. */ + +/* S (input) DOUBLE PRECISION array, dimension (1+(N-1)*INCC) */ +/* The sines of the plane rotations. */ + +/* INCC (input) INTEGER */ +/* The increment between elements of C and S. INCC > 0. */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + --s; + --c__; + --y; + --x; + + /* Function Body */ + ix = 1; + iy = 1; + ic = 1; + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + xi = x[ix]; + yi = y[iy]; + x[ix] = c__[ic] * xi + s[ic] * yi; + y[iy] = c__[ic] * yi - s[ic] * xi; + ix += *incx; + iy += *incy; + ic += *incc; +/* L10: */ + } + return 0; + +/* End of DLARTV */ + +} /* _starpu_dlartv_ */ diff --git a/min-dgels/base/SRC/dlaruv.c b/min-dgels/base/SRC/dlaruv.c new file mode 100644 index 0000000..c96bce9 --- /dev/null +++ b/min-dgels/base/SRC/dlaruv.c @@ -0,0 +1,192 @@ +/* dlaruv.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dlaruv_(integer *iseed, integer *n, doublereal *x) +{ + /* Initialized data */ + + static integer mm[512] /* was [128][4] */ = { 494,2637,255,2008,1253, + 3344,4084,1739,3143,3468,688,1657,1238,3166,1292,3422,1270,2016, + 154,2862,697,1706,491,931,1444,444,3577,3944,2184,1661,3482,657, + 3023,3618,1267,1828,164,3798,3087,2400,2870,3876,1905,1593,1797, + 1234,3460,328,2861,1950,617,2070,3331,769,1558,2412,2800,189,287, + 2045,1227,2838,209,2770,3654,3993,192,2253,3491,2889,2857,2094, + 1818,688,1407,634,3231,815,3524,1914,516,164,303,2144,3480,119, + 3357,837,2826,2332,2089,3780,1700,3712,150,2000,3375,1621,3090, + 3765,1149,3146,33,3082,2741,359,3316,1749,185,2784,2202,2199,1364, + 1244,2020,3160,2785,2772,1217,1822,1245,2252,3904,2774,997,2573, + 1148,545,322,789,1440,752,2859,123,1848,643,2405,2638,2344,46, + 3814,913,3649,339,3808,822,2832,3078,3633,2970,637,2249,2081,4019, + 1478,242,481,2075,4058,622,3376,812,234,641,4005,1122,3135,2640, + 2302,40,1832,2247,2034,2637,1287,1691,496,1597,2394,2584,1843,336, + 1472,2407,433,2096,1761,2810,566,442,41,1238,1086,603,840,3168, + 1499,1084,3438,2408,1589,2391,288,26,512,1456,171,1677,2657,2270, + 2587,2961,1970,1817,676,1410,3723,2803,3185,184,663,499,3784,1631, + 1925,3912,1398,1349,1441,2224,2411,1907,3192,2786,382,37,759,2948, + 1862,3802,2423,2051,2295,1332,1832,2405,3638,3661,327,3660,716, + 1842,3987,1368,1848,2366,2508,3754,1766,3572,2893,307,1297,3966, + 758,2598,3406,2922,1038,2934,2091,2451,1580,1958,2055,1507,1078, + 3273,17,854,2916,3971,2889,3831,2621,1541,893,736,3992,787,2125, + 2364,2460,257,1574,3912,1216,3248,3401,2124,2762,149,2245,166,466, + 4018,1399,190,2879,153,2320,18,712,2159,2318,2091,3443,1510,449, + 1956,2201,3137,3399,1321,2271,3667,2703,629,2365,2431,1113,3922, + 2554,184,2099,3228,4012,1921,3452,3901,572,3309,3171,817,3039, + 1696,1256,3715,2077,3019,1497,1101,717,51,981,1978,1813,3881,76, + 3846,3694,1682,124,1660,3997,479,1141,886,3514,1301,3604,1888, + 1836,1990,2058,692,1194,20,3285,2046,2107,3508,3525,3801,2549, + 1145,2253,305,3301,1065,3133,2913,3285,1241,1197,3729,2501,1673, + 541,2753,949,2361,1165,4081,2725,3305,3069,3617,3733,409,2157, + 1361,3973,1865,2525,1409,3445,3577,77,3761,2149,1449,3005,225,85, + 3673,3117,3089,1349,2057,413,65,1845,697,3085,3441,1573,3689,2941, + 929,533,2841,4077,721,2821,2249,2397,2817,245,1913,1997,3121,997, + 1833,2877,1633,981,2009,941,2449,197,2441,285,1473,2741,3129,909, + 2801,421,4073,2813,2337,1429,1177,1901,81,1669,2633,2269,129,1141, + 249,3917,2481,3941,2217,2749,3041,1877,345,2861,1809,3141,2825, + 157,2881,3637,1465,2829,2161,3365,361,2685,3745,2325,3609,3821, + 3537,517,3017,2141,1537 }; + + /* System generated locals */ + integer i__1; + + /* Local variables */ + integer i__, i1, i2, i3, i4, it1, it2, it3, it4; + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLARUV returns a vector of n random real numbers from a uniform (0,1) */ +/* distribution (n <= 128). */ + +/* This is an auxiliary routine called by DLARNV and ZLARNV. */ + +/* Arguments */ +/* ========= */ + +/* ISEED (input/output) INTEGER array, dimension (4) */ +/* On entry, the seed of the random number generator; the array */ +/* elements must be between 0 and 4095, and ISEED(4) must be */ +/* odd. */ +/* On exit, the seed is updated. */ + +/* N (input) INTEGER */ +/* The number of random numbers to be generated. N <= 128. */ + +/* X (output) DOUBLE PRECISION array, dimension (N) */ +/* The generated random numbers. */ + +/* Further Details */ +/* =============== */ + +/* This routine uses a multiplicative congruential method with modulus */ +/* 2**48 and multiplier 33952834046453 (see G.S.Fishman, */ +/* 'Multiplicative congruential random number generators with modulus */ +/* 2**b: an exhaustive analysis for b = 32 and a partial analysis for */ +/* b = 48', Math. Comp. 189, pp 331-344, 1990). */ + +/* 48-bit integers are stored in 4 integer array elements with 12 bits */ +/* per element. Hence the routine is portable across machines with */ +/* integers of 32 bits or more. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Data statements .. */ + /* Parameter adjustments */ + --iseed; + --x; + + /* Function Body */ +/* .. */ +/* .. Executable Statements .. */ + + i1 = iseed[1]; + i2 = iseed[2]; + i3 = iseed[3]; + i4 = iseed[4]; + + i__1 = min(*n,128); + for (i__ = 1; i__ <= i__1; ++i__) { + +L20: + +/* Multiply the seed by i-th power of the multiplier modulo 2**48 */ + + it4 = i4 * mm[i__ + 383]; + it3 = it4 / 4096; + it4 -= it3 << 12; + it3 = it3 + i3 * mm[i__ + 383] + i4 * mm[i__ + 255]; + it2 = it3 / 4096; + it3 -= it2 << 12; + it2 = it2 + i2 * mm[i__ + 383] + i3 * mm[i__ + 255] + i4 * mm[i__ + + 127]; + it1 = it2 / 4096; + it2 -= it1 << 12; + it1 = it1 + i1 * mm[i__ + 383] + i2 * mm[i__ + 255] + i3 * mm[i__ + + 127] + i4 * mm[i__ - 1]; + it1 %= 4096; + +/* Convert 48-bit integer to a real number in the interval (0,1) */ + + x[i__] = ((doublereal) it1 + ((doublereal) it2 + ((doublereal) it3 + ( + doublereal) it4 * 2.44140625e-4) * 2.44140625e-4) * + 2.44140625e-4) * 2.44140625e-4; + + if (x[i__] == 1.) { +/* If a real number has n bits of precision, and the first */ +/* n bits of the 48-bit integer above happen to be all 1 (which */ +/* will occur about once every 2**n calls), then X( I ) will */ +/* be rounded to exactly 1.0. */ +/* Since X( I ) is not supposed to return exactly 0.0 or 1.0, */ +/* the statistically correct thing to do in this situation is */ +/* simply to iterate again. */ +/* N.B. the case X( I ) = 0.0 should not be possible. */ + i1 += 2; + i2 += 2; + i3 += 2; + i4 += 2; + goto L20; + } + +/* L10: */ + } + +/* Return final value of seed */ + + iseed[1] = it1; + iseed[2] = it2; + iseed[3] = it3; + iseed[4] = it4; + return 0; + +/* End of DLARUV */ + +} /* _starpu_dlaruv_ */ diff --git a/min-dgels/base/SRC/dlarz.c b/min-dgels/base/SRC/dlarz.c new file mode 100644 index 0000000..f31bbaf --- /dev/null +++ b/min-dgels/base/SRC/dlarz.c @@ -0,0 +1,194 @@ +/* dlarz.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static doublereal c_b5 = 1.; + +/* Subroutine */ int _starpu_dlarz_(char *side, integer *m, integer *n, integer *l, + doublereal *v, integer *incv, doublereal *tau, doublereal *c__, + integer *ldc, doublereal *work) +{ + /* System generated locals */ + integer c_dim1, c_offset; + doublereal d__1; + + /* Local variables */ + extern /* Subroutine */ int _starpu_dger_(integer *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *); + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int _starpu_dgemv_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *), _starpu_dcopy_(integer *, + doublereal *, integer *, doublereal *, integer *), _starpu_daxpy_(integer + *, doublereal *, doublereal *, integer *, doublereal *, integer *) + ; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLARZ applies a real elementary reflector H to a real M-by-N */ +/* matrix C, from either the left or the right. H is represented in the */ +/* form */ + +/* H = I - tau * v * v' */ + +/* where tau is a real scalar and v is a real vector. */ + +/* If tau = 0, then H is taken to be the unit matrix. */ + + +/* H is a product of k elementary reflectors as returned by DTZRZF. */ + +/* Arguments */ +/* ========= */ + +/* SIDE (input) CHARACTER*1 */ +/* = 'L': form H * C */ +/* = 'R': form C * H */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix C. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix C. */ + +/* L (input) INTEGER */ +/* The number of entries of the vector V containing */ +/* the meaningful part of the Householder vectors. */ +/* If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0. */ + +/* V (input) DOUBLE PRECISION array, dimension (1+(L-1)*abs(INCV)) */ +/* The vector v in the representation of H as returned by */ +/* DTZRZF. V is not used if TAU = 0. */ + +/* INCV (input) INTEGER */ +/* The increment between elements of v. INCV <> 0. */ + +/* TAU (input) DOUBLE PRECISION */ +/* The value tau in the representation of H. */ + +/* C (input/output) DOUBLE PRECISION array, dimension (LDC,N) */ +/* On entry, the M-by-N matrix C. */ +/* On exit, C is overwritten by the matrix H * C if SIDE = 'L', */ +/* or C * H if SIDE = 'R'. */ + +/* LDC (input) INTEGER */ +/* The leading dimension of the array C. LDC >= max(1,M). */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension */ +/* (N) if SIDE = 'L' */ +/* or (M) if SIDE = 'R' */ + +/* Further Details */ +/* =============== */ + +/* Based on contributions by */ +/* A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + --v; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + if (_starpu_lsame_(side, "L")) { + +/* Form H * C */ + + if (*tau != 0.) { + +/* w( 1:n ) = C( 1, 1:n ) */ + + _starpu_dcopy_(n, &c__[c_offset], ldc, &work[1], &c__1); + +/* w( 1:n ) = w( 1:n ) + C( m-l+1:m, 1:n )' * v( 1:l ) */ + + _starpu_dgemv_("Transpose", l, n, &c_b5, &c__[*m - *l + 1 + c_dim1], ldc, + &v[1], incv, &c_b5, &work[1], &c__1); + +/* C( 1, 1:n ) = C( 1, 1:n ) - tau * w( 1:n ) */ + + d__1 = -(*tau); + _starpu_daxpy_(n, &d__1, &work[1], &c__1, &c__[c_offset], ldc); + +/* C( m-l+1:m, 1:n ) = C( m-l+1:m, 1:n ) - ... */ +/* tau * v( 1:l ) * w( 1:n )' */ + + d__1 = -(*tau); + _starpu_dger_(l, n, &d__1, &v[1], incv, &work[1], &c__1, &c__[*m - *l + 1 + + c_dim1], ldc); + } + + } else { + +/* Form C * H */ + + if (*tau != 0.) { + +/* w( 1:m ) = C( 1:m, 1 ) */ + + _starpu_dcopy_(m, &c__[c_offset], &c__1, &work[1], &c__1); + +/* w( 1:m ) = w( 1:m ) + C( 1:m, n-l+1:n, 1:n ) * v( 1:l ) */ + + _starpu_dgemv_("No transpose", m, l, &c_b5, &c__[(*n - *l + 1) * c_dim1 + + 1], ldc, &v[1], incv, &c_b5, &work[1], &c__1); + +/* C( 1:m, 1 ) = C( 1:m, 1 ) - tau * w( 1:m ) */ + + d__1 = -(*tau); + _starpu_daxpy_(m, &d__1, &work[1], &c__1, &c__[c_offset], &c__1); + +/* C( 1:m, n-l+1:n ) = C( 1:m, n-l+1:n ) - ... */ +/* tau * w( 1:m ) * v( 1:l )' */ + + d__1 = -(*tau); + _starpu_dger_(m, l, &d__1, &work[1], &c__1, &v[1], incv, &c__[(*n - *l + + 1) * c_dim1 + 1], ldc); + + } + + } + + return 0; + +/* End of DLARZ */ + +} /* _starpu_dlarz_ */ diff --git a/min-dgels/base/SRC/dlarzb.c b/min-dgels/base/SRC/dlarzb.c new file mode 100644 index 0000000..15ecdeb --- /dev/null +++ b/min-dgels/base/SRC/dlarzb.c @@ -0,0 +1,288 @@ +/* dlarzb.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static doublereal c_b13 = 1.; +static doublereal c_b23 = -1.; + +/* Subroutine */ int _starpu_dlarzb_(char *side, char *trans, char *direct, char * + storev, integer *m, integer *n, integer *k, integer *l, doublereal *v, + integer *ldv, doublereal *t, integer *ldt, doublereal *c__, integer * + ldc, doublereal *work, integer *ldwork) +{ + /* System generated locals */ + integer c_dim1, c_offset, t_dim1, t_offset, v_dim1, v_offset, work_dim1, + work_offset, i__1, i__2; + + /* Local variables */ + integer i__, j, info; + extern /* Subroutine */ int _starpu_dgemm_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *); + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *), _starpu_dtrmm_(char *, char *, char *, char *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *), _starpu_xerbla_( + char *, integer *); + char transt[1]; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLARZB applies a real block reflector H or its transpose H**T to */ +/* a real distributed M-by-N C from the left or the right. */ + +/* Currently, only STOREV = 'R' and DIRECT = 'B' are supported. */ + +/* Arguments */ +/* ========= */ + +/* SIDE (input) CHARACTER*1 */ +/* = 'L': apply H or H' from the Left */ +/* = 'R': apply H or H' from the Right */ + +/* TRANS (input) CHARACTER*1 */ +/* = 'N': apply H (No transpose) */ +/* = 'C': apply H' (Transpose) */ + +/* DIRECT (input) CHARACTER*1 */ +/* Indicates how H is formed from a product of elementary */ +/* reflectors */ +/* = 'F': H = H(1) H(2) . . . H(k) (Forward, not supported yet) */ +/* = 'B': H = H(k) . . . H(2) H(1) (Backward) */ + +/* STOREV (input) CHARACTER*1 */ +/* Indicates how the vectors which define the elementary */ +/* reflectors are stored: */ +/* = 'C': Columnwise (not supported yet) */ +/* = 'R': Rowwise */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix C. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix C. */ + +/* K (input) INTEGER */ +/* The order of the matrix T (= the number of elementary */ +/* reflectors whose product defines the block reflector). */ + +/* L (input) INTEGER */ +/* The number of columns of the matrix V containing the */ +/* meaningful part of the Householder reflectors. */ +/* If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0. */ + +/* V (input) DOUBLE PRECISION array, dimension (LDV,NV). */ +/* If STOREV = 'C', NV = K; if STOREV = 'R', NV = L. */ + +/* LDV (input) INTEGER */ +/* The leading dimension of the array V. */ +/* If STOREV = 'C', LDV >= L; if STOREV = 'R', LDV >= K. */ + +/* T (input) DOUBLE PRECISION array, dimension (LDT,K) */ +/* The triangular K-by-K matrix T in the representation of the */ +/* block reflector. */ + +/* LDT (input) INTEGER */ +/* The leading dimension of the array T. LDT >= K. */ + +/* C (input/output) DOUBLE PRECISION array, dimension (LDC,N) */ +/* On entry, the M-by-N matrix C. */ +/* On exit, C is overwritten by H*C or H'*C or C*H or C*H'. */ + +/* LDC (input) INTEGER */ +/* The leading dimension of the array C. LDC >= max(1,M). */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (LDWORK,K) */ + +/* LDWORK (input) INTEGER */ +/* The leading dimension of the array WORK. */ +/* If SIDE = 'L', LDWORK >= max(1,N); */ +/* if SIDE = 'R', LDWORK >= max(1,M). */ + +/* Further Details */ +/* =============== */ + +/* Based on contributions by */ +/* A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Quick return if possible */ + + /* Parameter adjustments */ + v_dim1 = *ldv; + v_offset = 1 + v_dim1; + v -= v_offset; + t_dim1 = *ldt; + t_offset = 1 + t_dim1; + t -= t_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + work_dim1 = *ldwork; + work_offset = 1 + work_dim1; + work -= work_offset; + + /* Function Body */ + if (*m <= 0 || *n <= 0) { + return 0; + } + +/* Check for currently supported options */ + + info = 0; + if (! _starpu_lsame_(direct, "B")) { + info = -3; + } else if (! _starpu_lsame_(storev, "R")) { + info = -4; + } + if (info != 0) { + i__1 = -info; + _starpu_xerbla_("DLARZB", &i__1); + return 0; + } + + if (_starpu_lsame_(trans, "N")) { + *(unsigned char *)transt = 'T'; + } else { + *(unsigned char *)transt = 'N'; + } + + if (_starpu_lsame_(side, "L")) { + +/* Form H * C or H' * C */ + +/* W( 1:n, 1:k ) = C( 1:k, 1:n )' */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + _starpu_dcopy_(n, &c__[j + c_dim1], ldc, &work[j * work_dim1 + 1], &c__1); +/* L10: */ + } + +/* W( 1:n, 1:k ) = W( 1:n, 1:k ) + ... */ +/* C( m-l+1:m, 1:n )' * V( 1:k, 1:l )' */ + + if (*l > 0) { + _starpu_dgemm_("Transpose", "Transpose", n, k, l, &c_b13, &c__[*m - *l + + 1 + c_dim1], ldc, &v[v_offset], ldv, &c_b13, &work[ + work_offset], ldwork); + } + +/* W( 1:n, 1:k ) = W( 1:n, 1:k ) * T' or W( 1:m, 1:k ) * T */ + + _starpu_dtrmm_("Right", "Lower", transt, "Non-unit", n, k, &c_b13, &t[ + t_offset], ldt, &work[work_offset], ldwork); + +/* C( 1:k, 1:n ) = C( 1:k, 1:n ) - W( 1:n, 1:k )' */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *k; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] -= work[j + i__ * work_dim1]; +/* L20: */ + } +/* L30: */ + } + +/* C( m-l+1:m, 1:n ) = C( m-l+1:m, 1:n ) - ... */ +/* V( 1:k, 1:l )' * W( 1:n, 1:k )' */ + + if (*l > 0) { + _starpu_dgemm_("Transpose", "Transpose", l, n, k, &c_b23, &v[v_offset], + ldv, &work[work_offset], ldwork, &c_b13, &c__[*m - *l + 1 + + c_dim1], ldc); + } + + } else if (_starpu_lsame_(side, "R")) { + +/* Form C * H or C * H' */ + +/* W( 1:m, 1:k ) = C( 1:m, 1:k ) */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + _starpu_dcopy_(m, &c__[j * c_dim1 + 1], &c__1, &work[j * work_dim1 + 1], & + c__1); +/* L40: */ + } + +/* W( 1:m, 1:k ) = W( 1:m, 1:k ) + ... */ +/* C( 1:m, n-l+1:n ) * V( 1:k, 1:l )' */ + + if (*l > 0) { + _starpu_dgemm_("No transpose", "Transpose", m, k, l, &c_b13, &c__[(*n - * + l + 1) * c_dim1 + 1], ldc, &v[v_offset], ldv, &c_b13, & + work[work_offset], ldwork); + } + +/* W( 1:m, 1:k ) = W( 1:m, 1:k ) * T or W( 1:m, 1:k ) * T' */ + + _starpu_dtrmm_("Right", "Lower", trans, "Non-unit", m, k, &c_b13, &t[t_offset] +, ldt, &work[work_offset], ldwork); + +/* C( 1:m, 1:k ) = C( 1:m, 1:k ) - W( 1:m, 1:k ) */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] -= work[i__ + j * work_dim1]; +/* L50: */ + } +/* L60: */ + } + +/* C( 1:m, n-l+1:n ) = C( 1:m, n-l+1:n ) - ... */ +/* W( 1:m, 1:k ) * V( 1:k, 1:l ) */ + + if (*l > 0) { + _starpu_dgemm_("No transpose", "No transpose", m, l, k, &c_b23, &work[ + work_offset], ldwork, &v[v_offset], ldv, &c_b13, &c__[(*n + - *l + 1) * c_dim1 + 1], ldc); + } + + } + + return 0; + +/* End of DLARZB */ + +} /* _starpu_dlarzb_ */ diff --git a/min-dgels/base/SRC/dlarzt.c b/min-dgels/base/SRC/dlarzt.c new file mode 100644 index 0000000..ff70e05 --- /dev/null +++ b/min-dgels/base/SRC/dlarzt.c @@ -0,0 +1,229 @@ +/* dlarzt.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static doublereal c_b8 = 0.; +static integer c__1 = 1; + +/* Subroutine */ int _starpu_dlarzt_(char *direct, char *storev, integer *n, integer * + k, doublereal *v, integer *ldv, doublereal *tau, doublereal *t, + integer *ldt) +{ + /* System generated locals */ + integer t_dim1, t_offset, v_dim1, v_offset, i__1; + doublereal d__1; + + /* Local variables */ + integer i__, j, info; + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int _starpu_dgemv_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *), _starpu_dtrmv_(char *, + char *, char *, integer *, doublereal *, integer *, doublereal *, + integer *), _starpu_xerbla_(char *, integer *); + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLARZT forms the triangular factor T of a real block reflector */ +/* H of order > n, which is defined as a product of k elementary */ +/* reflectors. */ + +/* If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; */ + +/* If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. */ + +/* If STOREV = 'C', the vector which defines the elementary reflector */ +/* H(i) is stored in the i-th column of the array V, and */ + +/* H = I - V * T * V' */ + +/* If STOREV = 'R', the vector which defines the elementary reflector */ +/* H(i) is stored in the i-th row of the array V, and */ + +/* H = I - V' * T * V */ + +/* Currently, only STOREV = 'R' and DIRECT = 'B' are supported. */ + +/* Arguments */ +/* ========= */ + +/* DIRECT (input) CHARACTER*1 */ +/* Specifies the order in which the elementary reflectors are */ +/* multiplied to form the block reflector: */ +/* = 'F': H = H(1) H(2) . . . H(k) (Forward, not supported yet) */ +/* = 'B': H = H(k) . . . H(2) H(1) (Backward) */ + +/* STOREV (input) CHARACTER*1 */ +/* Specifies how the vectors which define the elementary */ +/* reflectors are stored (see also Further Details): */ +/* = 'C': columnwise (not supported yet) */ +/* = 'R': rowwise */ + +/* N (input) INTEGER */ +/* The order of the block reflector H. N >= 0. */ + +/* K (input) INTEGER */ +/* The order of the triangular factor T (= the number of */ +/* elementary reflectors). K >= 1. */ + +/* V (input/output) DOUBLE PRECISION array, dimension */ +/* (LDV,K) if STOREV = 'C' */ +/* (LDV,N) if STOREV = 'R' */ +/* The matrix V. See further details. */ + +/* LDV (input) INTEGER */ +/* The leading dimension of the array V. */ +/* If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K. */ + +/* TAU (input) DOUBLE PRECISION array, dimension (K) */ +/* TAU(i) must contain the scalar factor of the elementary */ +/* reflector H(i). */ + +/* T (output) DOUBLE PRECISION array, dimension (LDT,K) */ +/* The k by k triangular factor T of the block reflector. */ +/* If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is */ +/* lower triangular. The rest of the array is not used. */ + +/* LDT (input) INTEGER */ +/* The leading dimension of the array T. LDT >= K. */ + +/* Further Details */ +/* =============== */ + +/* Based on contributions by */ +/* A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA */ + +/* The shape of the matrix V and the storage of the vectors which define */ +/* the H(i) is best illustrated by the following example with n = 5 and */ +/* k = 3. The elements equal to 1 are not stored; the corresponding */ +/* array elements are modified but restored on exit. The rest of the */ +/* array is not used. */ + +/* DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R': */ + +/* ______V_____ */ +/* ( v1 v2 v3 ) / \ */ +/* ( v1 v2 v3 ) ( v1 v1 v1 v1 v1 . . . . 1 ) */ +/* V = ( v1 v2 v3 ) ( v2 v2 v2 v2 v2 . . . 1 ) */ +/* ( v1 v2 v3 ) ( v3 v3 v3 v3 v3 . . 1 ) */ +/* ( v1 v2 v3 ) */ +/* . . . */ +/* . . . */ +/* 1 . . */ +/* 1 . */ +/* 1 */ + +/* DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R': */ + +/* ______V_____ */ +/* 1 / \ */ +/* . 1 ( 1 . . . . v1 v1 v1 v1 v1 ) */ +/* . . 1 ( . 1 . . . v2 v2 v2 v2 v2 ) */ +/* . . . ( . . 1 . . v3 v3 v3 v3 v3 ) */ +/* . . . */ +/* ( v1 v2 v3 ) */ +/* ( v1 v2 v3 ) */ +/* V = ( v1 v2 v3 ) */ +/* ( v1 v2 v3 ) */ +/* ( v1 v2 v3 ) */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Check for currently supported options */ + + /* Parameter adjustments */ + v_dim1 = *ldv; + v_offset = 1 + v_dim1; + v -= v_offset; + --tau; + t_dim1 = *ldt; + t_offset = 1 + t_dim1; + t -= t_offset; + + /* Function Body */ + info = 0; + if (! _starpu_lsame_(direct, "B")) { + info = -1; + } else if (! _starpu_lsame_(storev, "R")) { + info = -2; + } + if (info != 0) { + i__1 = -info; + _starpu_xerbla_("DLARZT", &i__1); + return 0; + } + + for (i__ = *k; i__ >= 1; --i__) { + if (tau[i__] == 0.) { + +/* H(i) = I */ + + i__1 = *k; + for (j = i__; j <= i__1; ++j) { + t[j + i__ * t_dim1] = 0.; +/* L10: */ + } + } else { + +/* general case */ + + if (i__ < *k) { + +/* T(i+1:k,i) = - tau(i) * V(i+1:k,1:n) * V(i,1:n)' */ + + i__1 = *k - i__; + d__1 = -tau[i__]; + _starpu_dgemv_("No transpose", &i__1, n, &d__1, &v[i__ + 1 + v_dim1], + ldv, &v[i__ + v_dim1], ldv, &c_b8, &t[i__ + 1 + i__ * + t_dim1], &c__1); + +/* T(i+1:k,i) = T(i+1:k,i+1:k) * T(i+1:k,i) */ + + i__1 = *k - i__; + _starpu_dtrmv_("Lower", "No transpose", "Non-unit", &i__1, &t[i__ + 1 + + (i__ + 1) * t_dim1], ldt, &t[i__ + 1 + i__ * t_dim1] +, &c__1); + } + t[i__ + i__ * t_dim1] = tau[i__]; + } +/* L20: */ + } + return 0; + +/* End of DLARZT */ + +} /* _starpu_dlarzt_ */ diff --git a/min-dgels/base/SRC/dlas2.c b/min-dgels/base/SRC/dlas2.c new file mode 100644 index 0000000..cafe484 --- /dev/null +++ b/min-dgels/base/SRC/dlas2.c @@ -0,0 +1,144 @@ +/* dlas2.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dlas2_(doublereal *f, doublereal *g, doublereal *h__, + doublereal *ssmin, doublereal *ssmax) +{ + /* System generated locals */ + doublereal d__1, d__2; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + doublereal c__, fa, ga, ha, as, at, au, fhmn, fhmx; + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLAS2 computes the singular values of the 2-by-2 matrix */ +/* [ F G ] */ +/* [ 0 H ]. */ +/* On return, SSMIN is the smaller singular value and SSMAX is the */ +/* larger singular value. */ + +/* Arguments */ +/* ========= */ + +/* F (input) DOUBLE PRECISION */ +/* The (1,1) element of the 2-by-2 matrix. */ + +/* G (input) DOUBLE PRECISION */ +/* The (1,2) element of the 2-by-2 matrix. */ + +/* H (input) DOUBLE PRECISION */ +/* The (2,2) element of the 2-by-2 matrix. */ + +/* SSMIN (output) DOUBLE PRECISION */ +/* The smaller singular value. */ + +/* SSMAX (output) DOUBLE PRECISION */ +/* The larger singular value. */ + +/* Further Details */ +/* =============== */ + +/* Barring over/underflow, all output quantities are correct to within */ +/* a few units in the last place (ulps), even in the absence of a guard */ +/* digit in addition/subtraction. */ + +/* In IEEE arithmetic, the code works correctly if one matrix element is */ +/* infinite. */ + +/* Overflow will not occur unless the largest singular value itself */ +/* overflows, or is within a few ulps of overflow. (On machines with */ +/* partial overflow, like the Cray, overflow may occur if the largest */ +/* singular value is within a factor of 2 of overflow.) */ + +/* Underflow is harmless if underflow is gradual. Otherwise, results */ +/* may correspond to a matrix modified by perturbations of size near */ +/* the underflow threshold. */ + +/* ==================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + fa = abs(*f); + ga = abs(*g); + ha = abs(*h__); + fhmn = min(fa,ha); + fhmx = max(fa,ha); + if (fhmn == 0.) { + *ssmin = 0.; + if (fhmx == 0.) { + *ssmax = ga; + } else { +/* Computing 2nd power */ + d__1 = min(fhmx,ga) / max(fhmx,ga); + *ssmax = max(fhmx,ga) * sqrt(d__1 * d__1 + 1.); + } + } else { + if (ga < fhmx) { + as = fhmn / fhmx + 1.; + at = (fhmx - fhmn) / fhmx; +/* Computing 2nd power */ + d__1 = ga / fhmx; + au = d__1 * d__1; + c__ = 2. / (sqrt(as * as + au) + sqrt(at * at + au)); + *ssmin = fhmn * c__; + *ssmax = fhmx / c__; + } else { + au = fhmx / ga; + if (au == 0.) { + +/* Avoid possible harmful underflow if exponent range */ +/* asymmetric (true SSMIN may not underflow even if */ +/* AU underflows) */ + + *ssmin = fhmn * fhmx / ga; + *ssmax = ga; + } else { + as = fhmn / fhmx + 1.; + at = (fhmx - fhmn) / fhmx; +/* Computing 2nd power */ + d__1 = as * au; +/* Computing 2nd power */ + d__2 = at * au; + c__ = 1. / (sqrt(d__1 * d__1 + 1.) + sqrt(d__2 * d__2 + 1.)); + *ssmin = fhmn * c__ * au; + *ssmin += *ssmin; + *ssmax = ga / (c__ + c__); + } + } + } + return 0; + +/* End of DLAS2 */ + +} /* _starpu_dlas2_ */ diff --git a/min-dgels/base/SRC/dlascl.c b/min-dgels/base/SRC/dlascl.c new file mode 100644 index 0000000..c6bf9ae --- /dev/null +++ b/min-dgels/base/SRC/dlascl.c @@ -0,0 +1,354 @@ +/* dlascl.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dlascl_(char *type__, integer *kl, integer *ku, + doublereal *cfrom, doublereal *cto, integer *m, integer *n, + doublereal *a, integer *lda, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + + /* Local variables */ + integer i__, j, k1, k2, k3, k4; + doublereal mul, cto1; + logical done; + doublereal ctoc; + extern logical _starpu_lsame_(char *, char *); + integer itype; + doublereal cfrom1; + extern doublereal _starpu_dlamch_(char *); + doublereal cfromc; + extern logical _starpu_disnan_(doublereal *); + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + doublereal bignum, smlnum; + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLASCL multiplies the M by N real matrix A by the real scalar */ +/* CTO/CFROM. This is done without over/underflow as long as the final */ +/* result CTO*A(I,J)/CFROM does not over/underflow. TYPE specifies that */ +/* A may be full, upper triangular, lower triangular, upper Hessenberg, */ +/* or banded. */ + +/* Arguments */ +/* ========= */ + +/* TYPE (input) CHARACTER*1 */ +/* TYPE indices the storage type of the input matrix. */ +/* = 'G': A is a full matrix. */ +/* = 'L': A is a lower triangular matrix. */ +/* = 'U': A is an upper triangular matrix. */ +/* = 'H': A is an upper Hessenberg matrix. */ +/* = 'B': A is a symmetric band matrix with lower bandwidth KL */ +/* and upper bandwidth KU and with the only the lower */ +/* half stored. */ +/* = 'Q': A is a symmetric band matrix with lower bandwidth KL */ +/* and upper bandwidth KU and with the only the upper */ +/* half stored. */ +/* = 'Z': A is a band matrix with lower bandwidth KL and upper */ +/* bandwidth KU. */ + +/* KL (input) INTEGER */ +/* The lower bandwidth of A. Referenced only if TYPE = 'B', */ +/* 'Q' or 'Z'. */ + +/* KU (input) INTEGER */ +/* The upper bandwidth of A. Referenced only if TYPE = 'B', */ +/* 'Q' or 'Z'. */ + +/* CFROM (input) DOUBLE PRECISION */ +/* CTO (input) DOUBLE PRECISION */ +/* The matrix A is multiplied by CTO/CFROM. A(I,J) is computed */ +/* without over/underflow if the final result CTO*A(I,J)/CFROM */ +/* can be represented without over/underflow. CFROM must be */ +/* nonzero. */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix A. M >= 0. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix A. N >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* The matrix to be multiplied by CTO/CFROM. See TYPE for the */ +/* storage type. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,M). */ + +/* INFO (output) INTEGER */ +/* 0 - successful exit */ +/* <0 - if INFO = -i, the i-th argument had an illegal value. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + *info = 0; + + if (_starpu_lsame_(type__, "G")) { + itype = 0; + } else if (_starpu_lsame_(type__, "L")) { + itype = 1; + } else if (_starpu_lsame_(type__, "U")) { + itype = 2; + } else if (_starpu_lsame_(type__, "H")) { + itype = 3; + } else if (_starpu_lsame_(type__, "B")) { + itype = 4; + } else if (_starpu_lsame_(type__, "Q")) { + itype = 5; + } else if (_starpu_lsame_(type__, "Z")) { + itype = 6; + } else { + itype = -1; + } + + if (itype == -1) { + *info = -1; + } else if (*cfrom == 0. || _starpu_disnan_(cfrom)) { + *info = -4; + } else if (_starpu_disnan_(cto)) { + *info = -5; + } else if (*m < 0) { + *info = -6; + } else if (*n < 0 || itype == 4 && *n != *m || itype == 5 && *n != *m) { + *info = -7; + } else if (itype <= 3 && *lda < max(1,*m)) { + *info = -9; + } else if (itype >= 4) { +/* Computing MAX */ + i__1 = *m - 1; + if (*kl < 0 || *kl > max(i__1,0)) { + *info = -2; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = *n - 1; + if (*ku < 0 || *ku > max(i__1,0) || (itype == 4 || itype == 5) && + *kl != *ku) { + *info = -3; + } else if (itype == 4 && *lda < *kl + 1 || itype == 5 && *lda < * + ku + 1 || itype == 6 && *lda < (*kl << 1) + *ku + 1) { + *info = -9; + } + } + } + + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DLASCL", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0 || *m == 0) { + return 0; + } + +/* Get machine parameters */ + + smlnum = _starpu_dlamch_("S"); + bignum = 1. / smlnum; + + cfromc = *cfrom; + ctoc = *cto; + +L10: + cfrom1 = cfromc * smlnum; + if (cfrom1 == cfromc) { +/* CFROMC is an inf. Multiply by a correctly signed zero for */ +/* finite CTOC, or a NaN if CTOC is infinite. */ + mul = ctoc / cfromc; + done = TRUE_; + cto1 = ctoc; + } else { + cto1 = ctoc / bignum; + if (cto1 == ctoc) { +/* CTOC is either 0 or an inf. In both cases, CTOC itself */ +/* serves as the correct multiplication factor. */ + mul = ctoc; + done = TRUE_; + cfromc = 1.; + } else if (abs(cfrom1) > abs(ctoc) && ctoc != 0.) { + mul = smlnum; + done = FALSE_; + cfromc = cfrom1; + } else if (abs(cto1) > abs(cfromc)) { + mul = bignum; + done = FALSE_; + ctoc = cto1; + } else { + mul = ctoc / cfromc; + done = TRUE_; + } + } + + if (itype == 0) { + +/* Full matrix */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] *= mul; +/* L20: */ + } +/* L30: */ + } + + } else if (itype == 1) { + +/* Lower triangular matrix */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = j; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] *= mul; +/* L40: */ + } +/* L50: */ + } + + } else if (itype == 2) { + +/* Upper triangular matrix */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = min(j,*m); + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] *= mul; +/* L60: */ + } +/* L70: */ + } + + } else if (itype == 3) { + +/* Upper Hessenberg matrix */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + i__3 = j + 1; + i__2 = min(i__3,*m); + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] *= mul; +/* L80: */ + } +/* L90: */ + } + + } else if (itype == 4) { + +/* Lower half of a symmetric band matrix */ + + k3 = *kl + 1; + k4 = *n + 1; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + i__3 = k3, i__4 = k4 - j; + i__2 = min(i__3,i__4); + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] *= mul; +/* L100: */ + } +/* L110: */ + } + + } else if (itype == 5) { + +/* Upper half of a symmetric band matrix */ + + k1 = *ku + 2; + k3 = *ku + 1; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MAX */ + i__2 = k1 - j; + i__3 = k3; + for (i__ = max(i__2,1); i__ <= i__3; ++i__) { + a[i__ + j * a_dim1] *= mul; +/* L120: */ + } +/* L130: */ + } + + } else if (itype == 6) { + +/* Band matrix */ + + k1 = *kl + *ku + 2; + k2 = *kl + 1; + k3 = (*kl << 1) + *ku + 1; + k4 = *kl + *ku + 1 + *m; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MAX */ + i__3 = k1 - j; +/* Computing MIN */ + i__4 = k3, i__5 = k4 - j; + i__2 = min(i__4,i__5); + for (i__ = max(i__3,k2); i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] *= mul; +/* L140: */ + } +/* L150: */ + } + + } + + if (! done) { + goto L10; + } + + return 0; + +/* End of DLASCL */ + +} /* _starpu_dlascl_ */ diff --git a/min-dgels/base/SRC/dlascl2.c b/min-dgels/base/SRC/dlascl2.c new file mode 100644 index 0000000..d6695c1 --- /dev/null +++ b/min-dgels/base/SRC/dlascl2.c @@ -0,0 +1,90 @@ +/* dlascl2.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dlascl2_(integer *m, integer *n, doublereal *d__, + doublereal *x, integer *ldx) +{ + /* System generated locals */ + integer x_dim1, x_offset, i__1, i__2; + + /* Local variables */ + integer i__, j; + + +/* -- LAPACK routine (version 3.2.1) -- */ +/* -- Contributed by James Demmel, Deaglan Halligan, Yozo Hida and -- */ +/* -- Jason Riedy of Univ. of California Berkeley. -- */ +/* -- April 2009 -- */ + +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley and NAG Ltd. -- */ + +/* .. */ +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLASCL2 performs a diagonal scaling on a vector: */ +/* x <-- D * x */ +/* where the diagonal matrix D is stored as a vector. */ + +/* Eventually to be replaced by BLAS_dge_diag_scale in the new BLAS */ +/* standard. */ + +/* Arguments */ +/* ========= */ + +/* M (input) INTEGER */ +/* The number of rows of D and X. M >= 0. */ + +/* N (input) INTEGER */ +/* The number of columns of D and X. N >= 0. */ + +/* D (input) DOUBLE PRECISION array, length M */ +/* Diagonal matrix D, stored as a vector of length M. */ + +/* X (input/output) DOUBLE PRECISION array, dimension (LDX,N) */ +/* On entry, the vector X to be scaled by D. */ +/* On exit, the scaled vector. */ + +/* LDX (input) INTEGER */ +/* The leading dimension of the vector X. LDX >= 0. */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + --d__; + x_dim1 = *ldx; + x_offset = 1 + x_dim1; + x -= x_offset; + + /* Function Body */ + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + x[i__ + j * x_dim1] *= d__[i__]; + } + } + return 0; +} /* _starpu_dlascl2_ */ diff --git a/min-dgels/base/SRC/dlasd0.c b/min-dgels/base/SRC/dlasd0.c new file mode 100644 index 0000000..e3f36d9 --- /dev/null +++ b/min-dgels/base/SRC/dlasd0.c @@ -0,0 +1,291 @@ +/* dlasd0.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__0 = 0; +static integer c__2 = 2; + +/* Subroutine */ int _starpu_dlasd0_(integer *n, integer *sqre, doublereal *d__, + doublereal *e, doublereal *u, integer *ldu, doublereal *vt, integer * + ldvt, integer *smlsiz, integer *iwork, doublereal *work, integer * + info) +{ + /* System generated locals */ + integer u_dim1, u_offset, vt_dim1, vt_offset, i__1, i__2; + + /* Builtin functions */ + integer pow_ii(integer *, integer *); + + /* Local variables */ + integer i__, j, m, i1, ic, lf, nd, ll, nl, nr, im1, ncc, nlf, nrf, iwk, + lvl, ndb1, nlp1, nrp1; + doublereal beta; + integer idxq, nlvl; + doublereal alpha; + integer inode, ndiml, idxqc, ndimr, itemp, sqrei; + extern /* Subroutine */ int _starpu_dlasd1_(integer *, integer *, integer *, + doublereal *, doublereal *, doublereal *, doublereal *, integer *, + doublereal *, integer *, integer *, integer *, doublereal *, + integer *), _starpu_dlasdq_(char *, integer *, integer *, integer *, + integer *, integer *, doublereal *, doublereal *, doublereal *, + integer *, doublereal *, integer *, doublereal *, integer *, + doublereal *, integer *), _starpu_dlasdt_(integer *, integer *, + integer *, integer *, integer *, integer *, integer *), _starpu_xerbla_( + char *, integer *); + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* Using a divide and conquer approach, DLASD0 computes the singular */ +/* value decomposition (SVD) of a real upper bidiagonal N-by-M */ +/* matrix B with diagonal D and offdiagonal E, where M = N + SQRE. */ +/* The algorithm computes orthogonal matrices U and VT such that */ +/* B = U * S * VT. The singular values S are overwritten on D. */ + +/* A related subroutine, DLASDA, computes only the singular values, */ +/* and optionally, the singular vectors in compact form. */ + +/* Arguments */ +/* ========= */ + +/* N (input) INTEGER */ +/* On entry, the row dimension of the upper bidiagonal matrix. */ +/* This is also the dimension of the main diagonal array D. */ + +/* SQRE (input) INTEGER */ +/* Specifies the column dimension of the bidiagonal matrix. */ +/* = 0: The bidiagonal matrix has column dimension M = N; */ +/* = 1: The bidiagonal matrix has column dimension M = N+1; */ + +/* D (input/output) DOUBLE PRECISION array, dimension (N) */ +/* On entry D contains the main diagonal of the bidiagonal */ +/* matrix. */ +/* On exit D, if INFO = 0, contains its singular values. */ + +/* E (input) DOUBLE PRECISION array, dimension (M-1) */ +/* Contains the subdiagonal entries of the bidiagonal matrix. */ +/* On exit, E has been destroyed. */ + +/* U (output) DOUBLE PRECISION array, dimension at least (LDQ, N) */ +/* On exit, U contains the left singular vectors. */ + +/* LDU (input) INTEGER */ +/* On entry, leading dimension of U. */ + +/* VT (output) DOUBLE PRECISION array, dimension at least (LDVT, M) */ +/* On exit, VT' contains the right singular vectors. */ + +/* LDVT (input) INTEGER */ +/* On entry, leading dimension of VT. */ + +/* SMLSIZ (input) INTEGER */ +/* On entry, maximum size of the subproblems at the */ +/* bottom of the computation tree. */ + +/* IWORK (workspace) INTEGER work array. */ +/* Dimension must be at least (8 * N) */ + +/* WORK (workspace) DOUBLE PRECISION work array. */ +/* Dimension must be at least (3 * M**2 + 2 * M) */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit. */ +/* < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > 0: if INFO = 1, an singular value did not converge */ + +/* Further Details */ +/* =============== */ + +/* Based on contributions by */ +/* Ming Gu and Huan Ren, Computer Science Division, University of */ +/* California at Berkeley, USA */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --d__; + --e; + u_dim1 = *ldu; + u_offset = 1 + u_dim1; + u -= u_offset; + vt_dim1 = *ldvt; + vt_offset = 1 + vt_dim1; + vt -= vt_offset; + --iwork; + --work; + + /* Function Body */ + *info = 0; + + if (*n < 0) { + *info = -1; + } else if (*sqre < 0 || *sqre > 1) { + *info = -2; + } + + m = *n + *sqre; + + if (*ldu < *n) { + *info = -6; + } else if (*ldvt < m) { + *info = -8; + } else if (*smlsiz < 3) { + *info = -9; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DLASD0", &i__1); + return 0; + } + +/* If the input matrix is too small, call DLASDQ to find the SVD. */ + + if (*n <= *smlsiz) { + _starpu_dlasdq_("U", sqre, n, &m, n, &c__0, &d__[1], &e[1], &vt[vt_offset], + ldvt, &u[u_offset], ldu, &u[u_offset], ldu, &work[1], info); + return 0; + } + +/* Set up the computation tree. */ + + inode = 1; + ndiml = inode + *n; + ndimr = ndiml + *n; + idxq = ndimr + *n; + iwk = idxq + *n; + _starpu_dlasdt_(n, &nlvl, &nd, &iwork[inode], &iwork[ndiml], &iwork[ndimr], + smlsiz); + +/* For the nodes on bottom level of the tree, solve */ +/* their subproblems by DLASDQ. */ + + ndb1 = (nd + 1) / 2; + ncc = 0; + i__1 = nd; + for (i__ = ndb1; i__ <= i__1; ++i__) { + +/* IC : center row of each node */ +/* NL : number of rows of left subproblem */ +/* NR : number of rows of right subproblem */ +/* NLF: starting row of the left subproblem */ +/* NRF: starting row of the right subproblem */ + + i1 = i__ - 1; + ic = iwork[inode + i1]; + nl = iwork[ndiml + i1]; + nlp1 = nl + 1; + nr = iwork[ndimr + i1]; + nrp1 = nr + 1; + nlf = ic - nl; + nrf = ic + 1; + sqrei = 1; + _starpu_dlasdq_("U", &sqrei, &nl, &nlp1, &nl, &ncc, &d__[nlf], &e[nlf], &vt[ + nlf + nlf * vt_dim1], ldvt, &u[nlf + nlf * u_dim1], ldu, &u[ + nlf + nlf * u_dim1], ldu, &work[1], info); + if (*info != 0) { + return 0; + } + itemp = idxq + nlf - 2; + i__2 = nl; + for (j = 1; j <= i__2; ++j) { + iwork[itemp + j] = j; +/* L10: */ + } + if (i__ == nd) { + sqrei = *sqre; + } else { + sqrei = 1; + } + nrp1 = nr + sqrei; + _starpu_dlasdq_("U", &sqrei, &nr, &nrp1, &nr, &ncc, &d__[nrf], &e[nrf], &vt[ + nrf + nrf * vt_dim1], ldvt, &u[nrf + nrf * u_dim1], ldu, &u[ + nrf + nrf * u_dim1], ldu, &work[1], info); + if (*info != 0) { + return 0; + } + itemp = idxq + ic; + i__2 = nr; + for (j = 1; j <= i__2; ++j) { + iwork[itemp + j - 1] = j; +/* L20: */ + } +/* L30: */ + } + +/* Now conquer each subproblem bottom-up. */ + + for (lvl = nlvl; lvl >= 1; --lvl) { + +/* Find the first node LF and last node LL on the */ +/* current level LVL. */ + + if (lvl == 1) { + lf = 1; + ll = 1; + } else { + i__1 = lvl - 1; + lf = pow_ii(&c__2, &i__1); + ll = (lf << 1) - 1; + } + i__1 = ll; + for (i__ = lf; i__ <= i__1; ++i__) { + im1 = i__ - 1; + ic = iwork[inode + im1]; + nl = iwork[ndiml + im1]; + nr = iwork[ndimr + im1]; + nlf = ic - nl; + if (*sqre == 0 && i__ == ll) { + sqrei = *sqre; + } else { + sqrei = 1; + } + idxqc = idxq + nlf - 1; + alpha = d__[ic]; + beta = e[ic]; + _starpu_dlasd1_(&nl, &nr, &sqrei, &d__[nlf], &alpha, &beta, &u[nlf + nlf * + u_dim1], ldu, &vt[nlf + nlf * vt_dim1], ldvt, &iwork[ + idxqc], &iwork[iwk], &work[1], info); + if (*info != 0) { + return 0; + } +/* L40: */ + } +/* L50: */ + } + + return 0; + +/* End of DLASD0 */ + +} /* _starpu_dlasd0_ */ diff --git a/min-dgels/base/SRC/dlasd1.c b/min-dgels/base/SRC/dlasd1.c new file mode 100644 index 0000000..5c5c750 --- /dev/null +++ b/min-dgels/base/SRC/dlasd1.c @@ -0,0 +1,288 @@ +/* dlasd1.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__0 = 0; +static doublereal c_b7 = 1.; +static integer c__1 = 1; +static integer c_n1 = -1; + +/* Subroutine */ int _starpu_dlasd1_(integer *nl, integer *nr, integer *sqre, + doublereal *d__, doublereal *alpha, doublereal *beta, doublereal *u, + integer *ldu, doublereal *vt, integer *ldvt, integer *idxq, integer * + iwork, doublereal *work, integer *info) +{ + /* System generated locals */ + integer u_dim1, u_offset, vt_dim1, vt_offset, i__1; + doublereal d__1, d__2; + + /* Local variables */ + integer i__, k, m, n, n1, n2, iq, iz, iu2, ldq, idx, ldu2, ivt2, idxc, + idxp, ldvt2; + extern /* Subroutine */ int _starpu_dlasd2_(integer *, integer *, integer *, + integer *, doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, integer *, + integer *, integer *, integer *, integer *, integer *), _starpu_dlasd3_( + integer *, integer *, integer *, integer *, doublereal *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *, integer *, integer *, doublereal *, integer *), + _starpu_dlascl_(char *, integer *, integer *, doublereal *, doublereal *, + integer *, integer *, doublereal *, integer *, integer *), + _starpu_dlamrg_(integer *, integer *, doublereal *, integer *, integer *, + integer *); + integer isigma; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + doublereal orgnrm; + integer coltyp; + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLASD1 computes the SVD of an upper bidiagonal N-by-M matrix B, */ +/* where N = NL + NR + 1 and M = N + SQRE. DLASD1 is called from DLASD0. */ + +/* A related subroutine DLASD7 handles the case in which the singular */ +/* values (and the singular vectors in factored form) are desired. */ + +/* DLASD1 computes the SVD as follows: */ + +/* ( D1(in) 0 0 0 ) */ +/* B = U(in) * ( Z1' a Z2' b ) * VT(in) */ +/* ( 0 0 D2(in) 0 ) */ + +/* = U(out) * ( D(out) 0) * VT(out) */ + +/* where Z' = (Z1' a Z2' b) = u' VT', and u is a vector of dimension M */ +/* with ALPHA and BETA in the NL+1 and NL+2 th entries and zeros */ +/* elsewhere; and the entry b is empty if SQRE = 0. */ + +/* The left singular vectors of the original matrix are stored in U, and */ +/* the transpose of the right singular vectors are stored in VT, and the */ +/* singular values are in D. The algorithm consists of three stages: */ + +/* The first stage consists of deflating the size of the problem */ +/* when there are multiple singular values or when there are zeros in */ +/* the Z vector. For each such occurence the dimension of the */ +/* secular equation problem is reduced by one. This stage is */ +/* performed by the routine DLASD2. */ + +/* The second stage consists of calculating the updated */ +/* singular values. This is done by finding the square roots of the */ +/* roots of the secular equation via the routine DLASD4 (as called */ +/* by DLASD3). This routine also calculates the singular vectors of */ +/* the current problem. */ + +/* The final stage consists of computing the updated singular vectors */ +/* directly using the updated singular values. The singular vectors */ +/* for the current problem are multiplied with the singular vectors */ +/* from the overall problem. */ + +/* Arguments */ +/* ========= */ + +/* NL (input) INTEGER */ +/* The row dimension of the upper block. NL >= 1. */ + +/* NR (input) INTEGER */ +/* The row dimension of the lower block. NR >= 1. */ + +/* SQRE (input) INTEGER */ +/* = 0: the lower block is an NR-by-NR square matrix. */ +/* = 1: the lower block is an NR-by-(NR+1) rectangular matrix. */ + +/* The bidiagonal matrix has row dimension N = NL + NR + 1, */ +/* and column dimension M = N + SQRE. */ + +/* D (input/output) DOUBLE PRECISION array, */ +/* dimension (N = NL+NR+1). */ +/* On entry D(1:NL,1:NL) contains the singular values of the */ +/* upper block; and D(NL+2:N) contains the singular values of */ +/* the lower block. On exit D(1:N) contains the singular values */ +/* of the modified matrix. */ + +/* ALPHA (input/output) DOUBLE PRECISION */ +/* Contains the diagonal element associated with the added row. */ + +/* BETA (input/output) DOUBLE PRECISION */ +/* Contains the off-diagonal element associated with the added */ +/* row. */ + +/* U (input/output) DOUBLE PRECISION array, dimension(LDU,N) */ +/* On entry U(1:NL, 1:NL) contains the left singular vectors of */ +/* the upper block; U(NL+2:N, NL+2:N) contains the left singular */ +/* vectors of the lower block. On exit U contains the left */ +/* singular vectors of the bidiagonal matrix. */ + +/* LDU (input) INTEGER */ +/* The leading dimension of the array U. LDU >= max( 1, N ). */ + +/* VT (input/output) DOUBLE PRECISION array, dimension(LDVT,M) */ +/* where M = N + SQRE. */ +/* On entry VT(1:NL+1, 1:NL+1)' contains the right singular */ +/* vectors of the upper block; VT(NL+2:M, NL+2:M)' contains */ +/* the right singular vectors of the lower block. On exit */ +/* VT' contains the right singular vectors of the */ +/* bidiagonal matrix. */ + +/* LDVT (input) INTEGER */ +/* The leading dimension of the array VT. LDVT >= max( 1, M ). */ + +/* IDXQ (output) INTEGER array, dimension(N) */ +/* This contains the permutation which will reintegrate the */ +/* subproblem just solved back into sorted order, i.e. */ +/* D( IDXQ( I = 1, N ) ) will be in ascending order. */ + +/* IWORK (workspace) INTEGER array, dimension( 4 * N ) */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension( 3*M**2 + 2*M ) */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit. */ +/* < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > 0: if INFO = 1, an singular value did not converge */ + +/* Further Details */ +/* =============== */ + +/* Based on contributions by */ +/* Ming Gu and Huan Ren, Computer Science Division, University of */ +/* California at Berkeley, USA */ + +/* ===================================================================== */ + +/* .. Parameters .. */ + +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --d__; + u_dim1 = *ldu; + u_offset = 1 + u_dim1; + u -= u_offset; + vt_dim1 = *ldvt; + vt_offset = 1 + vt_dim1; + vt -= vt_offset; + --idxq; + --iwork; + --work; + + /* Function Body */ + *info = 0; + + if (*nl < 1) { + *info = -1; + } else if (*nr < 1) { + *info = -2; + } else if (*sqre < 0 || *sqre > 1) { + *info = -3; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DLASD1", &i__1); + return 0; + } + + n = *nl + *nr + 1; + m = n + *sqre; + +/* The following values are for bookkeeping purposes only. They are */ +/* integer pointers which indicate the portion of the workspace */ +/* used by a particular array in DLASD2 and DLASD3. */ + + ldu2 = n; + ldvt2 = m; + + iz = 1; + isigma = iz + m; + iu2 = isigma + n; + ivt2 = iu2 + ldu2 * n; + iq = ivt2 + ldvt2 * m; + + idx = 1; + idxc = idx + n; + coltyp = idxc + n; + idxp = coltyp + n; + +/* Scale. */ + +/* Computing MAX */ + d__1 = abs(*alpha), d__2 = abs(*beta); + orgnrm = max(d__1,d__2); + d__[*nl + 1] = 0.; + i__1 = n; + for (i__ = 1; i__ <= i__1; ++i__) { + if ((d__1 = d__[i__], abs(d__1)) > orgnrm) { + orgnrm = (d__1 = d__[i__], abs(d__1)); + } +/* L10: */ + } + _starpu_dlascl_("G", &c__0, &c__0, &orgnrm, &c_b7, &n, &c__1, &d__[1], &n, info); + *alpha /= orgnrm; + *beta /= orgnrm; + +/* Deflate singular values. */ + + _starpu_dlasd2_(nl, nr, sqre, &k, &d__[1], &work[iz], alpha, beta, &u[u_offset], + ldu, &vt[vt_offset], ldvt, &work[isigma], &work[iu2], &ldu2, & + work[ivt2], &ldvt2, &iwork[idxp], &iwork[idx], &iwork[idxc], & + idxq[1], &iwork[coltyp], info); + +/* Solve Secular Equation and update singular vectors. */ + + ldq = k; + _starpu_dlasd3_(nl, nr, sqre, &k, &d__[1], &work[iq], &ldq, &work[isigma], &u[ + u_offset], ldu, &work[iu2], &ldu2, &vt[vt_offset], ldvt, &work[ + ivt2], &ldvt2, &iwork[idxc], &iwork[coltyp], &work[iz], info); + if (*info != 0) { + return 0; + } + +/* Unscale. */ + + _starpu_dlascl_("G", &c__0, &c__0, &c_b7, &orgnrm, &n, &c__1, &d__[1], &n, info); + +/* Prepare the IDXQ sorting permutation. */ + + n1 = k; + n2 = n - k; + _starpu_dlamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &idxq[1]); + + return 0; + +/* End of DLASD1 */ + +} /* _starpu_dlasd1_ */ diff --git a/min-dgels/base/SRC/dlasd2.c b/min-dgels/base/SRC/dlasd2.c new file mode 100644 index 0000000..d5932ae --- /dev/null +++ b/min-dgels/base/SRC/dlasd2.c @@ -0,0 +1,609 @@ +/* dlasd2.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static doublereal c_b30 = 0.; + +/* Subroutine */ int _starpu_dlasd2_(integer *nl, integer *nr, integer *sqre, integer + *k, doublereal *d__, doublereal *z__, doublereal *alpha, doublereal * + beta, doublereal *u, integer *ldu, doublereal *vt, integer *ldvt, + doublereal *dsigma, doublereal *u2, integer *ldu2, doublereal *vt2, + integer *ldvt2, integer *idxp, integer *idx, integer *idxc, integer * + idxq, integer *coltyp, integer *info) +{ + /* System generated locals */ + integer u_dim1, u_offset, u2_dim1, u2_offset, vt_dim1, vt_offset, + vt2_dim1, vt2_offset, i__1; + doublereal d__1, d__2; + + /* Local variables */ + doublereal c__; + integer i__, j, m, n; + doublereal s; + integer k2; + doublereal z1; + integer ct, jp; + doublereal eps, tau, tol; + integer psm[4], nlp1, nlp2, idxi, idxj; + extern /* Subroutine */ int _starpu_drot_(integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, doublereal *); + integer ctot[4], idxjp; + extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *); + integer jprev; + extern doublereal _starpu_dlapy2_(doublereal *, doublereal *), _starpu_dlamch_(char *); + extern /* Subroutine */ int _starpu_dlamrg_(integer *, integer *, doublereal *, + integer *, integer *, integer *), _starpu_dlacpy_(char *, integer *, + integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dlaset_(char *, integer *, integer *, doublereal *, + doublereal *, doublereal *, integer *), _starpu_xerbla_(char *, + integer *); + doublereal hlftol; + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLASD2 merges the two sets of singular values together into a single */ +/* sorted set. Then it tries to deflate the size of the problem. */ +/* There are two ways in which deflation can occur: when two or more */ +/* singular values are close together or if there is a tiny entry in the */ +/* Z vector. For each such occurrence the order of the related secular */ +/* equation problem is reduced by one. */ + +/* DLASD2 is called from DLASD1. */ + +/* Arguments */ +/* ========= */ + +/* NL (input) INTEGER */ +/* The row dimension of the upper block. NL >= 1. */ + +/* NR (input) INTEGER */ +/* The row dimension of the lower block. NR >= 1. */ + +/* SQRE (input) INTEGER */ +/* = 0: the lower block is an NR-by-NR square matrix. */ +/* = 1: the lower block is an NR-by-(NR+1) rectangular matrix. */ + +/* The bidiagonal matrix has N = NL + NR + 1 rows and */ +/* M = N + SQRE >= N columns. */ + +/* K (output) INTEGER */ +/* Contains the dimension of the non-deflated matrix, */ +/* This is the order of the related secular equation. 1 <= K <=N. */ + +/* D (input/output) DOUBLE PRECISION array, dimension(N) */ +/* On entry D contains the singular values of the two submatrices */ +/* to be combined. On exit D contains the trailing (N-K) updated */ +/* singular values (those which were deflated) sorted into */ +/* increasing order. */ + +/* Z (output) DOUBLE PRECISION array, dimension(N) */ +/* On exit Z contains the updating row vector in the secular */ +/* equation. */ + +/* ALPHA (input) DOUBLE PRECISION */ +/* Contains the diagonal element associated with the added row. */ + +/* BETA (input) DOUBLE PRECISION */ +/* Contains the off-diagonal element associated with the added */ +/* row. */ + +/* U (input/output) DOUBLE PRECISION array, dimension(LDU,N) */ +/* On entry U contains the left singular vectors of two */ +/* submatrices in the two square blocks with corners at (1,1), */ +/* (NL, NL), and (NL+2, NL+2), (N,N). */ +/* On exit U contains the trailing (N-K) updated left singular */ +/* vectors (those which were deflated) in its last N-K columns. */ + +/* LDU (input) INTEGER */ +/* The leading dimension of the array U. LDU >= N. */ + +/* VT (input/output) DOUBLE PRECISION array, dimension(LDVT,M) */ +/* On entry VT' contains the right singular vectors of two */ +/* submatrices in the two square blocks with corners at (1,1), */ +/* (NL+1, NL+1), and (NL+2, NL+2), (M,M). */ +/* On exit VT' contains the trailing (N-K) updated right singular */ +/* vectors (those which were deflated) in its last N-K columns. */ +/* In case SQRE =1, the last row of VT spans the right null */ +/* space. */ + +/* LDVT (input) INTEGER */ +/* The leading dimension of the array VT. LDVT >= M. */ + +/* DSIGMA (output) DOUBLE PRECISION array, dimension (N) */ +/* Contains a copy of the diagonal elements (K-1 singular values */ +/* and one zero) in the secular equation. */ + +/* U2 (output) DOUBLE PRECISION array, dimension(LDU2,N) */ +/* Contains a copy of the first K-1 left singular vectors which */ +/* will be used by DLASD3 in a matrix multiply (DGEMM) to solve */ +/* for the new left singular vectors. U2 is arranged into four */ +/* blocks. The first block contains a column with 1 at NL+1 and */ +/* zero everywhere else; the second block contains non-zero */ +/* entries only at and above NL; the third contains non-zero */ +/* entries only below NL+1; and the fourth is dense. */ + +/* LDU2 (input) INTEGER */ +/* The leading dimension of the array U2. LDU2 >= N. */ + +/* VT2 (output) DOUBLE PRECISION array, dimension(LDVT2,N) */ +/* VT2' contains a copy of the first K right singular vectors */ +/* which will be used by DLASD3 in a matrix multiply (DGEMM) to */ +/* solve for the new right singular vectors. VT2 is arranged into */ +/* three blocks. The first block contains a row that corresponds */ +/* to the special 0 diagonal element in SIGMA; the second block */ +/* contains non-zeros only at and before NL +1; the third block */ +/* contains non-zeros only at and after NL +2. */ + +/* LDVT2 (input) INTEGER */ +/* The leading dimension of the array VT2. LDVT2 >= M. */ + +/* IDXP (workspace) INTEGER array dimension(N) */ +/* This will contain the permutation used to place deflated */ +/* values of D at the end of the array. On output IDXP(2:K) */ +/* points to the nondeflated D-values and IDXP(K+1:N) */ +/* points to the deflated singular values. */ + +/* IDX (workspace) INTEGER array dimension(N) */ +/* This will contain the permutation used to sort the contents of */ +/* D into ascending order. */ + +/* IDXC (output) INTEGER array dimension(N) */ +/* This will contain the permutation used to arrange the columns */ +/* of the deflated U matrix into three groups: the first group */ +/* contains non-zero entries only at and above NL, the second */ +/* contains non-zero entries only below NL+2, and the third is */ +/* dense. */ + +/* IDXQ (input/output) INTEGER array dimension(N) */ +/* This contains the permutation which separately sorts the two */ +/* sub-problems in D into ascending order. Note that entries in */ +/* the first hlaf of this permutation must first be moved one */ +/* position backward; and entries in the second half */ +/* must first have NL+1 added to their values. */ + +/* COLTYP (workspace/output) INTEGER array dimension(N) */ +/* As workspace, this will contain a label which will indicate */ +/* which of the following types a column in the U2 matrix or a */ +/* row in the VT2 matrix is: */ +/* 1 : non-zero in the upper half only */ +/* 2 : non-zero in the lower half only */ +/* 3 : dense */ +/* 4 : deflated */ + +/* On exit, it is an array of dimension 4, with COLTYP(I) being */ +/* the dimension of the I-th type columns. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit. */ +/* < 0: if INFO = -i, the i-th argument had an illegal value. */ + +/* Further Details */ +/* =============== */ + +/* Based on contributions by */ +/* Ming Gu and Huan Ren, Computer Science Division, University of */ +/* California at Berkeley, USA */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --d__; + --z__; + u_dim1 = *ldu; + u_offset = 1 + u_dim1; + u -= u_offset; + vt_dim1 = *ldvt; + vt_offset = 1 + vt_dim1; + vt -= vt_offset; + --dsigma; + u2_dim1 = *ldu2; + u2_offset = 1 + u2_dim1; + u2 -= u2_offset; + vt2_dim1 = *ldvt2; + vt2_offset = 1 + vt2_dim1; + vt2 -= vt2_offset; + --idxp; + --idx; + --idxc; + --idxq; + --coltyp; + + /* Function Body */ + *info = 0; + + if (*nl < 1) { + *info = -1; + } else if (*nr < 1) { + *info = -2; + } else if (*sqre != 1 && *sqre != 0) { + *info = -3; + } + + n = *nl + *nr + 1; + m = n + *sqre; + + if (*ldu < n) { + *info = -10; + } else if (*ldvt < m) { + *info = -12; + } else if (*ldu2 < n) { + *info = -15; + } else if (*ldvt2 < m) { + *info = -17; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DLASD2", &i__1); + return 0; + } + + nlp1 = *nl + 1; + nlp2 = *nl + 2; + +/* Generate the first part of the vector Z; and move the singular */ +/* values in the first part of D one position backward. */ + + z1 = *alpha * vt[nlp1 + nlp1 * vt_dim1]; + z__[1] = z1; + for (i__ = *nl; i__ >= 1; --i__) { + z__[i__ + 1] = *alpha * vt[i__ + nlp1 * vt_dim1]; + d__[i__ + 1] = d__[i__]; + idxq[i__ + 1] = idxq[i__] + 1; +/* L10: */ + } + +/* Generate the second part of the vector Z. */ + + i__1 = m; + for (i__ = nlp2; i__ <= i__1; ++i__) { + z__[i__] = *beta * vt[i__ + nlp2 * vt_dim1]; +/* L20: */ + } + +/* Initialize some reference arrays. */ + + i__1 = nlp1; + for (i__ = 2; i__ <= i__1; ++i__) { + coltyp[i__] = 1; +/* L30: */ + } + i__1 = n; + for (i__ = nlp2; i__ <= i__1; ++i__) { + coltyp[i__] = 2; +/* L40: */ + } + +/* Sort the singular values into increasing order */ + + i__1 = n; + for (i__ = nlp2; i__ <= i__1; ++i__) { + idxq[i__] += nlp1; +/* L50: */ + } + +/* DSIGMA, IDXC, IDXC, and the first column of U2 */ +/* are used as storage space. */ + + i__1 = n; + for (i__ = 2; i__ <= i__1; ++i__) { + dsigma[i__] = d__[idxq[i__]]; + u2[i__ + u2_dim1] = z__[idxq[i__]]; + idxc[i__] = coltyp[idxq[i__]]; +/* L60: */ + } + + _starpu_dlamrg_(nl, nr, &dsigma[2], &c__1, &c__1, &idx[2]); + + i__1 = n; + for (i__ = 2; i__ <= i__1; ++i__) { + idxi = idx[i__] + 1; + d__[i__] = dsigma[idxi]; + z__[i__] = u2[idxi + u2_dim1]; + coltyp[i__] = idxc[idxi]; +/* L70: */ + } + +/* Calculate the allowable deflation tolerance */ + + eps = _starpu_dlamch_("Epsilon"); +/* Computing MAX */ + d__1 = abs(*alpha), d__2 = abs(*beta); + tol = max(d__1,d__2); +/* Computing MAX */ + d__2 = (d__1 = d__[n], abs(d__1)); + tol = eps * 8. * max(d__2,tol); + +/* There are 2 kinds of deflation -- first a value in the z-vector */ +/* is small, second two (or more) singular values are very close */ +/* together (their difference is small). */ + +/* If the value in the z-vector is small, we simply permute the */ +/* array so that the corresponding singular value is moved to the */ +/* end. */ + +/* If two values in the D-vector are close, we perform a two-sided */ +/* rotation designed to make one of the corresponding z-vector */ +/* entries zero, and then permute the array so that the deflated */ +/* singular value is moved to the end. */ + +/* If there are multiple singular values then the problem deflates. */ +/* Here the number of equal singular values are found. As each equal */ +/* singular value is found, an elementary reflector is computed to */ +/* rotate the corresponding singular subspace so that the */ +/* corresponding components of Z are zero in this new basis. */ + + *k = 1; + k2 = n + 1; + i__1 = n; + for (j = 2; j <= i__1; ++j) { + if ((d__1 = z__[j], abs(d__1)) <= tol) { + +/* Deflate due to small z component. */ + + --k2; + idxp[k2] = j; + coltyp[j] = 4; + if (j == n) { + goto L120; + } + } else { + jprev = j; + goto L90; + } +/* L80: */ + } +L90: + j = jprev; +L100: + ++j; + if (j > n) { + goto L110; + } + if ((d__1 = z__[j], abs(d__1)) <= tol) { + +/* Deflate due to small z component. */ + + --k2; + idxp[k2] = j; + coltyp[j] = 4; + } else { + +/* Check if singular values are close enough to allow deflation. */ + + if ((d__1 = d__[j] - d__[jprev], abs(d__1)) <= tol) { + +/* Deflation is possible. */ + + s = z__[jprev]; + c__ = z__[j]; + +/* Find sqrt(a**2+b**2) without overflow or */ +/* destructive underflow. */ + + tau = _starpu_dlapy2_(&c__, &s); + c__ /= tau; + s = -s / tau; + z__[j] = tau; + z__[jprev] = 0.; + +/* Apply back the Givens rotation to the left and right */ +/* singular vector matrices. */ + + idxjp = idxq[idx[jprev] + 1]; + idxj = idxq[idx[j] + 1]; + if (idxjp <= nlp1) { + --idxjp; + } + if (idxj <= nlp1) { + --idxj; + } + _starpu_drot_(&n, &u[idxjp * u_dim1 + 1], &c__1, &u[idxj * u_dim1 + 1], & + c__1, &c__, &s); + _starpu_drot_(&m, &vt[idxjp + vt_dim1], ldvt, &vt[idxj + vt_dim1], ldvt, & + c__, &s); + if (coltyp[j] != coltyp[jprev]) { + coltyp[j] = 3; + } + coltyp[jprev] = 4; + --k2; + idxp[k2] = jprev; + jprev = j; + } else { + ++(*k); + u2[*k + u2_dim1] = z__[jprev]; + dsigma[*k] = d__[jprev]; + idxp[*k] = jprev; + jprev = j; + } + } + goto L100; +L110: + +/* Record the last singular value. */ + + ++(*k); + u2[*k + u2_dim1] = z__[jprev]; + dsigma[*k] = d__[jprev]; + idxp[*k] = jprev; + +L120: + +/* Count up the total number of the various types of columns, then */ +/* form a permutation which positions the four column types into */ +/* four groups of uniform structure (although one or more of these */ +/* groups may be empty). */ + + for (j = 1; j <= 4; ++j) { + ctot[j - 1] = 0; +/* L130: */ + } + i__1 = n; + for (j = 2; j <= i__1; ++j) { + ct = coltyp[j]; + ++ctot[ct - 1]; +/* L140: */ + } + +/* PSM(*) = Position in SubMatrix (of types 1 through 4) */ + + psm[0] = 2; + psm[1] = ctot[0] + 2; + psm[2] = psm[1] + ctot[1]; + psm[3] = psm[2] + ctot[2]; + +/* Fill out the IDXC array so that the permutation which it induces */ +/* will place all type-1 columns first, all type-2 columns next, */ +/* then all type-3's, and finally all type-4's, starting from the */ +/* second column. This applies similarly to the rows of VT. */ + + i__1 = n; + for (j = 2; j <= i__1; ++j) { + jp = idxp[j]; + ct = coltyp[jp]; + idxc[psm[ct - 1]] = j; + ++psm[ct - 1]; +/* L150: */ + } + +/* Sort the singular values and corresponding singular vectors into */ +/* DSIGMA, U2, and VT2 respectively. The singular values/vectors */ +/* which were not deflated go into the first K slots of DSIGMA, U2, */ +/* and VT2 respectively, while those which were deflated go into the */ +/* last N - K slots, except that the first column/row will be treated */ +/* separately. */ + + i__1 = n; + for (j = 2; j <= i__1; ++j) { + jp = idxp[j]; + dsigma[j] = d__[jp]; + idxj = idxq[idx[idxp[idxc[j]]] + 1]; + if (idxj <= nlp1) { + --idxj; + } + _starpu_dcopy_(&n, &u[idxj * u_dim1 + 1], &c__1, &u2[j * u2_dim1 + 1], &c__1); + _starpu_dcopy_(&m, &vt[idxj + vt_dim1], ldvt, &vt2[j + vt2_dim1], ldvt2); +/* L160: */ + } + +/* Determine DSIGMA(1), DSIGMA(2) and Z(1) */ + + dsigma[1] = 0.; + hlftol = tol / 2.; + if (abs(dsigma[2]) <= hlftol) { + dsigma[2] = hlftol; + } + if (m > n) { + z__[1] = _starpu_dlapy2_(&z1, &z__[m]); + if (z__[1] <= tol) { + c__ = 1.; + s = 0.; + z__[1] = tol; + } else { + c__ = z1 / z__[1]; + s = z__[m] / z__[1]; + } + } else { + if (abs(z1) <= tol) { + z__[1] = tol; + } else { + z__[1] = z1; + } + } + +/* Move the rest of the updating row to Z. */ + + i__1 = *k - 1; + _starpu_dcopy_(&i__1, &u2[u2_dim1 + 2], &c__1, &z__[2], &c__1); + +/* Determine the first column of U2, the first row of VT2 and the */ +/* last row of VT. */ + + _starpu_dlaset_("A", &n, &c__1, &c_b30, &c_b30, &u2[u2_offset], ldu2); + u2[nlp1 + u2_dim1] = 1.; + if (m > n) { + i__1 = nlp1; + for (i__ = 1; i__ <= i__1; ++i__) { + vt[m + i__ * vt_dim1] = -s * vt[nlp1 + i__ * vt_dim1]; + vt2[i__ * vt2_dim1 + 1] = c__ * vt[nlp1 + i__ * vt_dim1]; +/* L170: */ + } + i__1 = m; + for (i__ = nlp2; i__ <= i__1; ++i__) { + vt2[i__ * vt2_dim1 + 1] = s * vt[m + i__ * vt_dim1]; + vt[m + i__ * vt_dim1] = c__ * vt[m + i__ * vt_dim1]; +/* L180: */ + } + } else { + _starpu_dcopy_(&m, &vt[nlp1 + vt_dim1], ldvt, &vt2[vt2_dim1 + 1], ldvt2); + } + if (m > n) { + _starpu_dcopy_(&m, &vt[m + vt_dim1], ldvt, &vt2[m + vt2_dim1], ldvt2); + } + +/* The deflated singular values and their corresponding vectors go */ +/* into the back of D, U, and V respectively. */ + + if (n > *k) { + i__1 = n - *k; + _starpu_dcopy_(&i__1, &dsigma[*k + 1], &c__1, &d__[*k + 1], &c__1); + i__1 = n - *k; + _starpu_dlacpy_("A", &n, &i__1, &u2[(*k + 1) * u2_dim1 + 1], ldu2, &u[(*k + 1) + * u_dim1 + 1], ldu); + i__1 = n - *k; + _starpu_dlacpy_("A", &i__1, &m, &vt2[*k + 1 + vt2_dim1], ldvt2, &vt[*k + 1 + + vt_dim1], ldvt); + } + +/* Copy CTOT into COLTYP for referencing in DLASD3. */ + + for (j = 1; j <= 4; ++j) { + coltyp[j] = ctot[j - 1]; +/* L190: */ + } + + return 0; + +/* End of DLASD2 */ + +} /* _starpu_dlasd2_ */ diff --git a/min-dgels/base/SRC/dlasd3.c b/min-dgels/base/SRC/dlasd3.c new file mode 100644 index 0000000..743bd25 --- /dev/null +++ b/min-dgels/base/SRC/dlasd3.c @@ -0,0 +1,452 @@ +/* dlasd3.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c__0 = 0; +static doublereal c_b13 = 1.; +static doublereal c_b26 = 0.; + +/* Subroutine */ int _starpu_dlasd3_(integer *nl, integer *nr, integer *sqre, integer + *k, doublereal *d__, doublereal *q, integer *ldq, doublereal *dsigma, + doublereal *u, integer *ldu, doublereal *u2, integer *ldu2, + doublereal *vt, integer *ldvt, doublereal *vt2, integer *ldvt2, + integer *idxc, integer *ctot, doublereal *z__, integer *info) +{ + /* System generated locals */ + integer q_dim1, q_offset, u_dim1, u_offset, u2_dim1, u2_offset, vt_dim1, + vt_offset, vt2_dim1, vt2_offset, i__1, i__2; + doublereal d__1, d__2; + + /* Builtin functions */ + double sqrt(doublereal), d_sign(doublereal *, doublereal *); + + /* Local variables */ + integer i__, j, m, n, jc; + doublereal rho; + integer nlp1, nlp2, nrp1; + doublereal temp; + extern doublereal _starpu_dnrm2_(integer *, doublereal *, integer *); + extern /* Subroutine */ int _starpu_dgemm_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *); + integer ctemp; + extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *); + integer ktemp; + extern doublereal _starpu_dlamc3_(doublereal *, doublereal *); + extern /* Subroutine */ int _starpu_dlasd4_(integer *, integer *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, integer *), _starpu_dlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *), _starpu_dlacpy_(char *, integer *, integer + *, doublereal *, integer *, doublereal *, integer *), + _starpu_xerbla_(char *, integer *); + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLASD3 finds all the square roots of the roots of the secular */ +/* equation, as defined by the values in D and Z. It makes the */ +/* appropriate calls to DLASD4 and then updates the singular */ +/* vectors by matrix multiplication. */ + +/* This code makes very mild assumptions about floating point */ +/* arithmetic. It will work on machines with a guard digit in */ +/* add/subtract, or on those binary machines without guard digits */ +/* which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2. */ +/* It could conceivably fail on hexadecimal or decimal machines */ +/* without guard digits, but we know of none. */ + +/* DLASD3 is called from DLASD1. */ + +/* Arguments */ +/* ========= */ + +/* NL (input) INTEGER */ +/* The row dimension of the upper block. NL >= 1. */ + +/* NR (input) INTEGER */ +/* The row dimension of the lower block. NR >= 1. */ + +/* SQRE (input) INTEGER */ +/* = 0: the lower block is an NR-by-NR square matrix. */ +/* = 1: the lower block is an NR-by-(NR+1) rectangular matrix. */ + +/* The bidiagonal matrix has N = NL + NR + 1 rows and */ +/* M = N + SQRE >= N columns. */ + +/* K (input) INTEGER */ +/* The size of the secular equation, 1 =< K = < N. */ + +/* D (output) DOUBLE PRECISION array, dimension(K) */ +/* On exit the square roots of the roots of the secular equation, */ +/* in ascending order. */ + +/* Q (workspace) DOUBLE PRECISION array, */ +/* dimension at least (LDQ,K). */ + +/* LDQ (input) INTEGER */ +/* The leading dimension of the array Q. LDQ >= K. */ + +/* DSIGMA (input) DOUBLE PRECISION array, dimension(K) */ +/* The first K elements of this array contain the old roots */ +/* of the deflated updating problem. These are the poles */ +/* of the secular equation. */ + +/* U (output) DOUBLE PRECISION array, dimension (LDU, N) */ +/* The last N - K columns of this matrix contain the deflated */ +/* left singular vectors. */ + +/* LDU (input) INTEGER */ +/* The leading dimension of the array U. LDU >= N. */ + +/* U2 (input/output) DOUBLE PRECISION array, dimension (LDU2, N) */ +/* The first K columns of this matrix contain the non-deflated */ +/* left singular vectors for the split problem. */ + +/* LDU2 (input) INTEGER */ +/* The leading dimension of the array U2. LDU2 >= N. */ + +/* VT (output) DOUBLE PRECISION array, dimension (LDVT, M) */ +/* The last M - K columns of VT' contain the deflated */ +/* right singular vectors. */ + +/* LDVT (input) INTEGER */ +/* The leading dimension of the array VT. LDVT >= N. */ + +/* VT2 (input/output) DOUBLE PRECISION array, dimension (LDVT2, N) */ +/* The first K columns of VT2' contain the non-deflated */ +/* right singular vectors for the split problem. */ + +/* LDVT2 (input) INTEGER */ +/* The leading dimension of the array VT2. LDVT2 >= N. */ + +/* IDXC (input) INTEGER array, dimension ( N ) */ +/* The permutation used to arrange the columns of U (and rows of */ +/* VT) into three groups: the first group contains non-zero */ +/* entries only at and above (or before) NL +1; the second */ +/* contains non-zero entries only at and below (or after) NL+2; */ +/* and the third is dense. The first column of U and the row of */ +/* VT are treated separately, however. */ + +/* The rows of the singular vectors found by DLASD4 */ +/* must be likewise permuted before the matrix multiplies can */ +/* take place. */ + +/* CTOT (input) INTEGER array, dimension ( 4 ) */ +/* A count of the total number of the various types of columns */ +/* in U (or rows in VT), as described in IDXC. The fourth column */ +/* type is any column which has been deflated. */ + +/* Z (input) DOUBLE PRECISION array, dimension (K) */ +/* The first K elements of this array contain the components */ +/* of the deflation-adjusted updating row vector. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit. */ +/* < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > 0: if INFO = 1, an singular value did not converge */ + +/* Further Details */ +/* =============== */ + +/* Based on contributions by */ +/* Ming Gu and Huan Ren, Computer Science Division, University of */ +/* California at Berkeley, USA */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --d__; + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + --dsigma; + u_dim1 = *ldu; + u_offset = 1 + u_dim1; + u -= u_offset; + u2_dim1 = *ldu2; + u2_offset = 1 + u2_dim1; + u2 -= u2_offset; + vt_dim1 = *ldvt; + vt_offset = 1 + vt_dim1; + vt -= vt_offset; + vt2_dim1 = *ldvt2; + vt2_offset = 1 + vt2_dim1; + vt2 -= vt2_offset; + --idxc; + --ctot; + --z__; + + /* Function Body */ + *info = 0; + + if (*nl < 1) { + *info = -1; + } else if (*nr < 1) { + *info = -2; + } else if (*sqre != 1 && *sqre != 0) { + *info = -3; + } + + n = *nl + *nr + 1; + m = n + *sqre; + nlp1 = *nl + 1; + nlp2 = *nl + 2; + + if (*k < 1 || *k > n) { + *info = -4; + } else if (*ldq < *k) { + *info = -7; + } else if (*ldu < n) { + *info = -10; + } else if (*ldu2 < n) { + *info = -12; + } else if (*ldvt < m) { + *info = -14; + } else if (*ldvt2 < m) { + *info = -16; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DLASD3", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*k == 1) { + d__[1] = abs(z__[1]); + _starpu_dcopy_(&m, &vt2[vt2_dim1 + 1], ldvt2, &vt[vt_dim1 + 1], ldvt); + if (z__[1] > 0.) { + _starpu_dcopy_(&n, &u2[u2_dim1 + 1], &c__1, &u[u_dim1 + 1], &c__1); + } else { + i__1 = n; + for (i__ = 1; i__ <= i__1; ++i__) { + u[i__ + u_dim1] = -u2[i__ + u2_dim1]; +/* L10: */ + } + } + return 0; + } + +/* Modify values DSIGMA(i) to make sure all DSIGMA(i)-DSIGMA(j) can */ +/* be computed with high relative accuracy (barring over/underflow). */ +/* This is a problem on machines without a guard digit in */ +/* add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2). */ +/* The following code replaces DSIGMA(I) by 2*DSIGMA(I)-DSIGMA(I), */ +/* which on any of these machines zeros out the bottommost */ +/* bit of DSIGMA(I) if it is 1; this makes the subsequent */ +/* subtractions DSIGMA(I)-DSIGMA(J) unproblematic when cancellation */ +/* occurs. On binary machines with a guard digit (almost all */ +/* machines) it does not change DSIGMA(I) at all. On hexadecimal */ +/* and decimal machines with a guard digit, it slightly */ +/* changes the bottommost bits of DSIGMA(I). It does not account */ +/* for hexadecimal or decimal machines without guard digits */ +/* (we know of none). We use a subroutine call to compute */ +/* 2*DSIGMA(I) to prevent optimizing compilers from eliminating */ +/* this code. */ + + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + dsigma[i__] = _starpu_dlamc3_(&dsigma[i__], &dsigma[i__]) - dsigma[i__]; +/* L20: */ + } + +/* Keep a copy of Z. */ + + _starpu_dcopy_(k, &z__[1], &c__1, &q[q_offset], &c__1); + +/* Normalize Z. */ + + rho = _starpu_dnrm2_(k, &z__[1], &c__1); + _starpu_dlascl_("G", &c__0, &c__0, &rho, &c_b13, k, &c__1, &z__[1], k, info); + rho *= rho; + +/* Find the new singular values. */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + _starpu_dlasd4_(k, &j, &dsigma[1], &z__[1], &u[j * u_dim1 + 1], &rho, &d__[j], + &vt[j * vt_dim1 + 1], info); + +/* If the zero finder fails, the computation is terminated. */ + + if (*info != 0) { + return 0; + } +/* L30: */ + } + +/* Compute updated Z. */ + + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + z__[i__] = u[i__ + *k * u_dim1] * vt[i__ + *k * vt_dim1]; + i__2 = i__ - 1; + for (j = 1; j <= i__2; ++j) { + z__[i__] *= u[i__ + j * u_dim1] * vt[i__ + j * vt_dim1] / (dsigma[ + i__] - dsigma[j]) / (dsigma[i__] + dsigma[j]); +/* L40: */ + } + i__2 = *k - 1; + for (j = i__; j <= i__2; ++j) { + z__[i__] *= u[i__ + j * u_dim1] * vt[i__ + j * vt_dim1] / (dsigma[ + i__] - dsigma[j + 1]) / (dsigma[i__] + dsigma[j + 1]); +/* L50: */ + } + d__2 = sqrt((d__1 = z__[i__], abs(d__1))); + z__[i__] = d_sign(&d__2, &q[i__ + q_dim1]); +/* L60: */ + } + +/* Compute left singular vectors of the modified diagonal matrix, */ +/* and store related information for the right singular vectors. */ + + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + vt[i__ * vt_dim1 + 1] = z__[1] / u[i__ * u_dim1 + 1] / vt[i__ * + vt_dim1 + 1]; + u[i__ * u_dim1 + 1] = -1.; + i__2 = *k; + for (j = 2; j <= i__2; ++j) { + vt[j + i__ * vt_dim1] = z__[j] / u[j + i__ * u_dim1] / vt[j + i__ + * vt_dim1]; + u[j + i__ * u_dim1] = dsigma[j] * vt[j + i__ * vt_dim1]; +/* L70: */ + } + temp = _starpu_dnrm2_(k, &u[i__ * u_dim1 + 1], &c__1); + q[i__ * q_dim1 + 1] = u[i__ * u_dim1 + 1] / temp; + i__2 = *k; + for (j = 2; j <= i__2; ++j) { + jc = idxc[j]; + q[j + i__ * q_dim1] = u[jc + i__ * u_dim1] / temp; +/* L80: */ + } +/* L90: */ + } + +/* Update the left singular vector matrix. */ + + if (*k == 2) { + _starpu_dgemm_("N", "N", &n, k, k, &c_b13, &u2[u2_offset], ldu2, &q[q_offset], + ldq, &c_b26, &u[u_offset], ldu); + goto L100; + } + if (ctot[1] > 0) { + _starpu_dgemm_("N", "N", nl, k, &ctot[1], &c_b13, &u2[(u2_dim1 << 1) + 1], + ldu2, &q[q_dim1 + 2], ldq, &c_b26, &u[u_dim1 + 1], ldu); + if (ctot[3] > 0) { + ktemp = ctot[1] + 2 + ctot[2]; + _starpu_dgemm_("N", "N", nl, k, &ctot[3], &c_b13, &u2[ktemp * u2_dim1 + 1] +, ldu2, &q[ktemp + q_dim1], ldq, &c_b13, &u[u_dim1 + 1], + ldu); + } + } else if (ctot[3] > 0) { + ktemp = ctot[1] + 2 + ctot[2]; + _starpu_dgemm_("N", "N", nl, k, &ctot[3], &c_b13, &u2[ktemp * u2_dim1 + 1], + ldu2, &q[ktemp + q_dim1], ldq, &c_b26, &u[u_dim1 + 1], ldu); + } else { + _starpu_dlacpy_("F", nl, k, &u2[u2_offset], ldu2, &u[u_offset], ldu); + } + _starpu_dcopy_(k, &q[q_dim1 + 1], ldq, &u[nlp1 + u_dim1], ldu); + ktemp = ctot[1] + 2; + ctemp = ctot[2] + ctot[3]; + _starpu_dgemm_("N", "N", nr, k, &ctemp, &c_b13, &u2[nlp2 + ktemp * u2_dim1], ldu2, + &q[ktemp + q_dim1], ldq, &c_b26, &u[nlp2 + u_dim1], ldu); + +/* Generate the right singular vectors. */ + +L100: + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + temp = _starpu_dnrm2_(k, &vt[i__ * vt_dim1 + 1], &c__1); + q[i__ + q_dim1] = vt[i__ * vt_dim1 + 1] / temp; + i__2 = *k; + for (j = 2; j <= i__2; ++j) { + jc = idxc[j]; + q[i__ + j * q_dim1] = vt[jc + i__ * vt_dim1] / temp; +/* L110: */ + } +/* L120: */ + } + +/* Update the right singular vector matrix. */ + + if (*k == 2) { + _starpu_dgemm_("N", "N", k, &m, k, &c_b13, &q[q_offset], ldq, &vt2[vt2_offset] +, ldvt2, &c_b26, &vt[vt_offset], ldvt); + return 0; + } + ktemp = ctot[1] + 1; + _starpu_dgemm_("N", "N", k, &nlp1, &ktemp, &c_b13, &q[q_dim1 + 1], ldq, &vt2[ + vt2_dim1 + 1], ldvt2, &c_b26, &vt[vt_dim1 + 1], ldvt); + ktemp = ctot[1] + 2 + ctot[2]; + if (ktemp <= *ldvt2) { + _starpu_dgemm_("N", "N", k, &nlp1, &ctot[3], &c_b13, &q[ktemp * q_dim1 + 1], + ldq, &vt2[ktemp + vt2_dim1], ldvt2, &c_b13, &vt[vt_dim1 + 1], + ldvt); + } + + ktemp = ctot[1] + 1; + nrp1 = *nr + *sqre; + if (ktemp > 1) { + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + q[i__ + ktemp * q_dim1] = q[i__ + q_dim1]; +/* L130: */ + } + i__1 = m; + for (i__ = nlp2; i__ <= i__1; ++i__) { + vt2[ktemp + i__ * vt2_dim1] = vt2[i__ * vt2_dim1 + 1]; +/* L140: */ + } + } + ctemp = ctot[2] + 1 + ctot[3]; + _starpu_dgemm_("N", "N", k, &nrp1, &ctemp, &c_b13, &q[ktemp * q_dim1 + 1], ldq, & + vt2[ktemp + nlp2 * vt2_dim1], ldvt2, &c_b26, &vt[nlp2 * vt_dim1 + + 1], ldvt); + + return 0; + +/* End of DLASD3 */ + +} /* _starpu_dlasd3_ */ diff --git a/min-dgels/base/SRC/dlasd4.c b/min-dgels/base/SRC/dlasd4.c new file mode 100644 index 0000000..1a4cd08 --- /dev/null +++ b/min-dgels/base/SRC/dlasd4.c @@ -0,0 +1,1010 @@ +/* dlasd4.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dlasd4_(integer *n, integer *i__, doublereal *d__, + doublereal *z__, doublereal *delta, doublereal *rho, doublereal * + sigma, doublereal *work, integer *info) +{ + /* System generated locals */ + integer i__1; + doublereal d__1; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + doublereal a, b, c__; + integer j; + doublereal w, dd[3]; + integer ii; + doublereal dw, zz[3]; + integer ip1; + doublereal eta, phi, eps, tau, psi; + integer iim1, iip1; + doublereal dphi, dpsi; + integer iter; + doublereal temp, prew, sg2lb, sg2ub, temp1, temp2, dtiim, delsq, dtiip; + integer niter; + doublereal dtisq; + logical swtch; + doublereal dtnsq; + extern /* Subroutine */ int _starpu_dlaed6_(integer *, logical *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, integer *) + , _starpu_dlasd5_(integer *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *); + doublereal delsq2, dtnsq1; + logical swtch3; + extern doublereal _starpu_dlamch_(char *); + logical orgati; + doublereal erretm, dtipsq, rhoinv; + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* This subroutine computes the square root of the I-th updated */ +/* eigenvalue of a positive symmetric rank-one modification to */ +/* a positive diagonal matrix whose entries are given as the squares */ +/* of the corresponding entries in the array d, and that */ + +/* 0 <= D(i) < D(j) for i < j */ + +/* and that RHO > 0. This is arranged by the calling routine, and is */ +/* no loss in generality. The rank-one modified system is thus */ + +/* diag( D ) * diag( D ) + RHO * Z * Z_transpose. */ + +/* where we assume the Euclidean norm of Z is 1. */ + +/* The method consists of approximating the rational functions in the */ +/* secular equation by simpler interpolating rational functions. */ + +/* Arguments */ +/* ========= */ + +/* N (input) INTEGER */ +/* The length of all arrays. */ + +/* I (input) INTEGER */ +/* The index of the eigenvalue to be computed. 1 <= I <= N. */ + +/* D (input) DOUBLE PRECISION array, dimension ( N ) */ +/* The original eigenvalues. It is assumed that they are in */ +/* order, 0 <= D(I) < D(J) for I < J. */ + +/* Z (input) DOUBLE PRECISION array, dimension ( N ) */ +/* The components of the updating vector. */ + +/* DELTA (output) DOUBLE PRECISION array, dimension ( N ) */ +/* If N .ne. 1, DELTA contains (D(j) - sigma_I) in its j-th */ +/* component. If N = 1, then DELTA(1) = 1. The vector DELTA */ +/* contains the information necessary to construct the */ +/* (singular) eigenvectors. */ + +/* RHO (input) DOUBLE PRECISION */ +/* The scalar in the symmetric updating formula. */ + +/* SIGMA (output) DOUBLE PRECISION */ +/* The computed sigma_I, the I-th updated eigenvalue. */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension ( N ) */ +/* If N .ne. 1, WORK contains (D(j) + sigma_I) in its j-th */ +/* component. If N = 1, then WORK( 1 ) = 1. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* > 0: if INFO = 1, the updating process failed. */ + +/* Internal Parameters */ +/* =================== */ + +/* Logical variable ORGATI (origin-at-i?) is used for distinguishing */ +/* whether D(i) or D(i+1) is treated as the origin. */ + +/* ORGATI = .true. origin at i */ +/* ORGATI = .false. origin at i+1 */ + +/* Logical variable SWTCH3 (switch-for-3-poles?) is for noting */ +/* if we are working with THREE poles! */ + +/* MAXIT is the maximum number of iterations allowed for each */ +/* eigenvalue. */ + +/* Further Details */ +/* =============== */ + +/* Based on contributions by */ +/* Ren-Cang Li, Computer Science Division, University of California */ +/* at Berkeley, USA */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Since this routine is called in an inner loop, we do no argument */ +/* checking. */ + +/* Quick return for N=1 and 2. */ + + /* Parameter adjustments */ + --work; + --delta; + --z__; + --d__; + + /* Function Body */ + *info = 0; + if (*n == 1) { + +/* Presumably, I=1 upon entry */ + + *sigma = sqrt(d__[1] * d__[1] + *rho * z__[1] * z__[1]); + delta[1] = 1.; + work[1] = 1.; + return 0; + } + if (*n == 2) { + _starpu_dlasd5_(i__, &d__[1], &z__[1], &delta[1], rho, sigma, &work[1]); + return 0; + } + +/* Compute machine epsilon */ + + eps = _starpu_dlamch_("Epsilon"); + rhoinv = 1. / *rho; + +/* The case I = N */ + + if (*i__ == *n) { + +/* Initialize some basic variables */ + + ii = *n - 1; + niter = 1; + +/* Calculate initial guess */ + + temp = *rho / 2.; + +/* If ||Z||_2 is not one, then TEMP should be set to */ +/* RHO * ||Z||_2^2 / TWO */ + + temp1 = temp / (d__[*n] + sqrt(d__[*n] * d__[*n] + temp)); + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + work[j] = d__[j] + d__[*n] + temp1; + delta[j] = d__[j] - d__[*n] - temp1; +/* L10: */ + } + + psi = 0.; + i__1 = *n - 2; + for (j = 1; j <= i__1; ++j) { + psi += z__[j] * z__[j] / (delta[j] * work[j]); +/* L20: */ + } + + c__ = rhoinv + psi; + w = c__ + z__[ii] * z__[ii] / (delta[ii] * work[ii]) + z__[*n] * z__[* + n] / (delta[*n] * work[*n]); + + if (w <= 0.) { + temp1 = sqrt(d__[*n] * d__[*n] + *rho); + temp = z__[*n - 1] * z__[*n - 1] / ((d__[*n - 1] + temp1) * (d__[* + n] - d__[*n - 1] + *rho / (d__[*n] + temp1))) + z__[*n] * + z__[*n] / *rho; + +/* The following TAU is to approximate */ +/* SIGMA_n^2 - D( N )*D( N ) */ + + if (c__ <= temp) { + tau = *rho; + } else { + delsq = (d__[*n] - d__[*n - 1]) * (d__[*n] + d__[*n - 1]); + a = -c__ * delsq + z__[*n - 1] * z__[*n - 1] + z__[*n] * z__[* + n]; + b = z__[*n] * z__[*n] * delsq; + if (a < 0.) { + tau = b * 2. / (sqrt(a * a + b * 4. * c__) - a); + } else { + tau = (a + sqrt(a * a + b * 4. * c__)) / (c__ * 2.); + } + } + +/* It can be proved that */ +/* D(N)^2+RHO/2 <= SIGMA_n^2 < D(N)^2+TAU <= D(N)^2+RHO */ + + } else { + delsq = (d__[*n] - d__[*n - 1]) * (d__[*n] + d__[*n - 1]); + a = -c__ * delsq + z__[*n - 1] * z__[*n - 1] + z__[*n] * z__[*n]; + b = z__[*n] * z__[*n] * delsq; + +/* The following TAU is to approximate */ +/* SIGMA_n^2 - D( N )*D( N ) */ + + if (a < 0.) { + tau = b * 2. / (sqrt(a * a + b * 4. * c__) - a); + } else { + tau = (a + sqrt(a * a + b * 4. * c__)) / (c__ * 2.); + } + +/* It can be proved that */ +/* D(N)^2 < D(N)^2+TAU < SIGMA(N)^2 < D(N)^2+RHO/2 */ + + } + +/* The following ETA is to approximate SIGMA_n - D( N ) */ + + eta = tau / (d__[*n] + sqrt(d__[*n] * d__[*n] + tau)); + + *sigma = d__[*n] + eta; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + delta[j] = d__[j] - d__[*i__] - eta; + work[j] = d__[j] + d__[*i__] + eta; +/* L30: */ + } + +/* Evaluate PSI and the derivative DPSI */ + + dpsi = 0.; + psi = 0.; + erretm = 0.; + i__1 = ii; + for (j = 1; j <= i__1; ++j) { + temp = z__[j] / (delta[j] * work[j]); + psi += z__[j] * temp; + dpsi += temp * temp; + erretm += psi; +/* L40: */ + } + erretm = abs(erretm); + +/* Evaluate PHI and the derivative DPHI */ + + temp = z__[*n] / (delta[*n] * work[*n]); + phi = z__[*n] * temp; + dphi = temp * temp; + erretm = (-phi - psi) * 8. + erretm - phi + rhoinv + abs(tau) * (dpsi + + dphi); + + w = rhoinv + phi + psi; + +/* Test for convergence */ + + if (abs(w) <= eps * erretm) { + goto L240; + } + +/* Calculate the new step */ + + ++niter; + dtnsq1 = work[*n - 1] * delta[*n - 1]; + dtnsq = work[*n] * delta[*n]; + c__ = w - dtnsq1 * dpsi - dtnsq * dphi; + a = (dtnsq + dtnsq1) * w - dtnsq * dtnsq1 * (dpsi + dphi); + b = dtnsq * dtnsq1 * w; + if (c__ < 0.) { + c__ = abs(c__); + } + if (c__ == 0.) { + eta = *rho - *sigma * *sigma; + } else if (a >= 0.) { + eta = (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (c__ + * 2.); + } else { + eta = b * 2. / (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1))) + ); + } + +/* Note, eta should be positive if w is negative, and */ +/* eta should be negative otherwise. However, */ +/* if for some reason caused by roundoff, eta*w > 0, */ +/* we simply use one Newton step instead. This way */ +/* will guarantee eta*w < 0. */ + + if (w * eta > 0.) { + eta = -w / (dpsi + dphi); + } + temp = eta - dtnsq; + if (temp > *rho) { + eta = *rho + dtnsq; + } + + tau += eta; + eta /= *sigma + sqrt(eta + *sigma * *sigma); + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + delta[j] -= eta; + work[j] += eta; +/* L50: */ + } + + *sigma += eta; + +/* Evaluate PSI and the derivative DPSI */ + + dpsi = 0.; + psi = 0.; + erretm = 0.; + i__1 = ii; + for (j = 1; j <= i__1; ++j) { + temp = z__[j] / (work[j] * delta[j]); + psi += z__[j] * temp; + dpsi += temp * temp; + erretm += psi; +/* L60: */ + } + erretm = abs(erretm); + +/* Evaluate PHI and the derivative DPHI */ + + temp = z__[*n] / (work[*n] * delta[*n]); + phi = z__[*n] * temp; + dphi = temp * temp; + erretm = (-phi - psi) * 8. + erretm - phi + rhoinv + abs(tau) * (dpsi + + dphi); + + w = rhoinv + phi + psi; + +/* Main loop to update the values of the array DELTA */ + + iter = niter + 1; + + for (niter = iter; niter <= 20; ++niter) { + +/* Test for convergence */ + + if (abs(w) <= eps * erretm) { + goto L240; + } + +/* Calculate the new step */ + + dtnsq1 = work[*n - 1] * delta[*n - 1]; + dtnsq = work[*n] * delta[*n]; + c__ = w - dtnsq1 * dpsi - dtnsq * dphi; + a = (dtnsq + dtnsq1) * w - dtnsq1 * dtnsq * (dpsi + dphi); + b = dtnsq1 * dtnsq * w; + if (a >= 0.) { + eta = (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / ( + c__ * 2.); + } else { + eta = b * 2. / (a - sqrt((d__1 = a * a - b * 4. * c__, abs( + d__1)))); + } + +/* Note, eta should be positive if w is negative, and */ +/* eta should be negative otherwise. However, */ +/* if for some reason caused by roundoff, eta*w > 0, */ +/* we simply use one Newton step instead. This way */ +/* will guarantee eta*w < 0. */ + + if (w * eta > 0.) { + eta = -w / (dpsi + dphi); + } + temp = eta - dtnsq; + if (temp <= 0.) { + eta /= 2.; + } + + tau += eta; + eta /= *sigma + sqrt(eta + *sigma * *sigma); + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + delta[j] -= eta; + work[j] += eta; +/* L70: */ + } + + *sigma += eta; + +/* Evaluate PSI and the derivative DPSI */ + + dpsi = 0.; + psi = 0.; + erretm = 0.; + i__1 = ii; + for (j = 1; j <= i__1; ++j) { + temp = z__[j] / (work[j] * delta[j]); + psi += z__[j] * temp; + dpsi += temp * temp; + erretm += psi; +/* L80: */ + } + erretm = abs(erretm); + +/* Evaluate PHI and the derivative DPHI */ + + temp = z__[*n] / (work[*n] * delta[*n]); + phi = z__[*n] * temp; + dphi = temp * temp; + erretm = (-phi - psi) * 8. + erretm - phi + rhoinv + abs(tau) * ( + dpsi + dphi); + + w = rhoinv + phi + psi; +/* L90: */ + } + +/* Return with INFO = 1, NITER = MAXIT and not converged */ + + *info = 1; + goto L240; + +/* End for the case I = N */ + + } else { + +/* The case for I < N */ + + niter = 1; + ip1 = *i__ + 1; + +/* Calculate initial guess */ + + delsq = (d__[ip1] - d__[*i__]) * (d__[ip1] + d__[*i__]); + delsq2 = delsq / 2.; + temp = delsq2 / (d__[*i__] + sqrt(d__[*i__] * d__[*i__] + delsq2)); + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + work[j] = d__[j] + d__[*i__] + temp; + delta[j] = d__[j] - d__[*i__] - temp; +/* L100: */ + } + + psi = 0.; + i__1 = *i__ - 1; + for (j = 1; j <= i__1; ++j) { + psi += z__[j] * z__[j] / (work[j] * delta[j]); +/* L110: */ + } + + phi = 0.; + i__1 = *i__ + 2; + for (j = *n; j >= i__1; --j) { + phi += z__[j] * z__[j] / (work[j] * delta[j]); +/* L120: */ + } + c__ = rhoinv + psi + phi; + w = c__ + z__[*i__] * z__[*i__] / (work[*i__] * delta[*i__]) + z__[ + ip1] * z__[ip1] / (work[ip1] * delta[ip1]); + + if (w > 0.) { + +/* d(i)^2 < the ith sigma^2 < (d(i)^2+d(i+1)^2)/2 */ + +/* We choose d(i) as origin. */ + + orgati = TRUE_; + sg2lb = 0.; + sg2ub = delsq2; + a = c__ * delsq + z__[*i__] * z__[*i__] + z__[ip1] * z__[ip1]; + b = z__[*i__] * z__[*i__] * delsq; + if (a > 0.) { + tau = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs( + d__1)))); + } else { + tau = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / ( + c__ * 2.); + } + +/* TAU now is an estimation of SIGMA^2 - D( I )^2. The */ +/* following, however, is the corresponding estimation of */ +/* SIGMA - D( I ). */ + + eta = tau / (d__[*i__] + sqrt(d__[*i__] * d__[*i__] + tau)); + } else { + +/* (d(i)^2+d(i+1)^2)/2 <= the ith sigma^2 < d(i+1)^2/2 */ + +/* We choose d(i+1) as origin. */ + + orgati = FALSE_; + sg2lb = -delsq2; + sg2ub = 0.; + a = c__ * delsq - z__[*i__] * z__[*i__] - z__[ip1] * z__[ip1]; + b = z__[ip1] * z__[ip1] * delsq; + if (a < 0.) { + tau = b * 2. / (a - sqrt((d__1 = a * a + b * 4. * c__, abs( + d__1)))); + } else { + tau = -(a + sqrt((d__1 = a * a + b * 4. * c__, abs(d__1)))) / + (c__ * 2.); + } + +/* TAU now is an estimation of SIGMA^2 - D( IP1 )^2. The */ +/* following, however, is the corresponding estimation of */ +/* SIGMA - D( IP1 ). */ + + eta = tau / (d__[ip1] + sqrt((d__1 = d__[ip1] * d__[ip1] + tau, + abs(d__1)))); + } + + if (orgati) { + ii = *i__; + *sigma = d__[*i__] + eta; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + work[j] = d__[j] + d__[*i__] + eta; + delta[j] = d__[j] - d__[*i__] - eta; +/* L130: */ + } + } else { + ii = *i__ + 1; + *sigma = d__[ip1] + eta; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + work[j] = d__[j] + d__[ip1] + eta; + delta[j] = d__[j] - d__[ip1] - eta; +/* L140: */ + } + } + iim1 = ii - 1; + iip1 = ii + 1; + +/* Evaluate PSI and the derivative DPSI */ + + dpsi = 0.; + psi = 0.; + erretm = 0.; + i__1 = iim1; + for (j = 1; j <= i__1; ++j) { + temp = z__[j] / (work[j] * delta[j]); + psi += z__[j] * temp; + dpsi += temp * temp; + erretm += psi; +/* L150: */ + } + erretm = abs(erretm); + +/* Evaluate PHI and the derivative DPHI */ + + dphi = 0.; + phi = 0.; + i__1 = iip1; + for (j = *n; j >= i__1; --j) { + temp = z__[j] / (work[j] * delta[j]); + phi += z__[j] * temp; + dphi += temp * temp; + erretm += phi; +/* L160: */ + } + + w = rhoinv + phi + psi; + +/* W is the value of the secular function with */ +/* its ii-th element removed. */ + + swtch3 = FALSE_; + if (orgati) { + if (w < 0.) { + swtch3 = TRUE_; + } + } else { + if (w > 0.) { + swtch3 = TRUE_; + } + } + if (ii == 1 || ii == *n) { + swtch3 = FALSE_; + } + + temp = z__[ii] / (work[ii] * delta[ii]); + dw = dpsi + dphi + temp * temp; + temp = z__[ii] * temp; + w += temp; + erretm = (phi - psi) * 8. + erretm + rhoinv * 2. + abs(temp) * 3. + + abs(tau) * dw; + +/* Test for convergence */ + + if (abs(w) <= eps * erretm) { + goto L240; + } + + if (w <= 0.) { + sg2lb = max(sg2lb,tau); + } else { + sg2ub = min(sg2ub,tau); + } + +/* Calculate the new step */ + + ++niter; + if (! swtch3) { + dtipsq = work[ip1] * delta[ip1]; + dtisq = work[*i__] * delta[*i__]; + if (orgati) { +/* Computing 2nd power */ + d__1 = z__[*i__] / dtisq; + c__ = w - dtipsq * dw + delsq * (d__1 * d__1); + } else { +/* Computing 2nd power */ + d__1 = z__[ip1] / dtipsq; + c__ = w - dtisq * dw - delsq * (d__1 * d__1); + } + a = (dtipsq + dtisq) * w - dtipsq * dtisq * dw; + b = dtipsq * dtisq * w; + if (c__ == 0.) { + if (a == 0.) { + if (orgati) { + a = z__[*i__] * z__[*i__] + dtipsq * dtipsq * (dpsi + + dphi); + } else { + a = z__[ip1] * z__[ip1] + dtisq * dtisq * (dpsi + + dphi); + } + } + eta = b / a; + } else if (a <= 0.) { + eta = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / ( + c__ * 2.); + } else { + eta = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs( + d__1)))); + } + } else { + +/* Interpolation using THREE most relevant poles */ + + dtiim = work[iim1] * delta[iim1]; + dtiip = work[iip1] * delta[iip1]; + temp = rhoinv + psi + phi; + if (orgati) { + temp1 = z__[iim1] / dtiim; + temp1 *= temp1; + c__ = temp - dtiip * (dpsi + dphi) - (d__[iim1] - d__[iip1]) * + (d__[iim1] + d__[iip1]) * temp1; + zz[0] = z__[iim1] * z__[iim1]; + if (dpsi < temp1) { + zz[2] = dtiip * dtiip * dphi; + } else { + zz[2] = dtiip * dtiip * (dpsi - temp1 + dphi); + } + } else { + temp1 = z__[iip1] / dtiip; + temp1 *= temp1; + c__ = temp - dtiim * (dpsi + dphi) - (d__[iip1] - d__[iim1]) * + (d__[iim1] + d__[iip1]) * temp1; + if (dphi < temp1) { + zz[0] = dtiim * dtiim * dpsi; + } else { + zz[0] = dtiim * dtiim * (dpsi + (dphi - temp1)); + } + zz[2] = z__[iip1] * z__[iip1]; + } + zz[1] = z__[ii] * z__[ii]; + dd[0] = dtiim; + dd[1] = delta[ii] * work[ii]; + dd[2] = dtiip; + _starpu_dlaed6_(&niter, &orgati, &c__, dd, zz, &w, &eta, info); + if (*info != 0) { + goto L240; + } + } + +/* Note, eta should be positive if w is negative, and */ +/* eta should be negative otherwise. However, */ +/* if for some reason caused by roundoff, eta*w > 0, */ +/* we simply use one Newton step instead. This way */ +/* will guarantee eta*w < 0. */ + + if (w * eta >= 0.) { + eta = -w / dw; + } + if (orgati) { + temp1 = work[*i__] * delta[*i__]; + temp = eta - temp1; + } else { + temp1 = work[ip1] * delta[ip1]; + temp = eta - temp1; + } + if (temp > sg2ub || temp < sg2lb) { + if (w < 0.) { + eta = (sg2ub - tau) / 2.; + } else { + eta = (sg2lb - tau) / 2.; + } + } + + tau += eta; + eta /= *sigma + sqrt(*sigma * *sigma + eta); + + prew = w; + + *sigma += eta; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + work[j] += eta; + delta[j] -= eta; +/* L170: */ + } + +/* Evaluate PSI and the derivative DPSI */ + + dpsi = 0.; + psi = 0.; + erretm = 0.; + i__1 = iim1; + for (j = 1; j <= i__1; ++j) { + temp = z__[j] / (work[j] * delta[j]); + psi += z__[j] * temp; + dpsi += temp * temp; + erretm += psi; +/* L180: */ + } + erretm = abs(erretm); + +/* Evaluate PHI and the derivative DPHI */ + + dphi = 0.; + phi = 0.; + i__1 = iip1; + for (j = *n; j >= i__1; --j) { + temp = z__[j] / (work[j] * delta[j]); + phi += z__[j] * temp; + dphi += temp * temp; + erretm += phi; +/* L190: */ + } + + temp = z__[ii] / (work[ii] * delta[ii]); + dw = dpsi + dphi + temp * temp; + temp = z__[ii] * temp; + w = rhoinv + phi + psi + temp; + erretm = (phi - psi) * 8. + erretm + rhoinv * 2. + abs(temp) * 3. + + abs(tau) * dw; + + if (w <= 0.) { + sg2lb = max(sg2lb,tau); + } else { + sg2ub = min(sg2ub,tau); + } + + swtch = FALSE_; + if (orgati) { + if (-w > abs(prew) / 10.) { + swtch = TRUE_; + } + } else { + if (w > abs(prew) / 10.) { + swtch = TRUE_; + } + } + +/* Main loop to update the values of the array DELTA and WORK */ + + iter = niter + 1; + + for (niter = iter; niter <= 20; ++niter) { + +/* Test for convergence */ + + if (abs(w) <= eps * erretm) { + goto L240; + } + +/* Calculate the new step */ + + if (! swtch3) { + dtipsq = work[ip1] * delta[ip1]; + dtisq = work[*i__] * delta[*i__]; + if (! swtch) { + if (orgati) { +/* Computing 2nd power */ + d__1 = z__[*i__] / dtisq; + c__ = w - dtipsq * dw + delsq * (d__1 * d__1); + } else { +/* Computing 2nd power */ + d__1 = z__[ip1] / dtipsq; + c__ = w - dtisq * dw - delsq * (d__1 * d__1); + } + } else { + temp = z__[ii] / (work[ii] * delta[ii]); + if (orgati) { + dpsi += temp * temp; + } else { + dphi += temp * temp; + } + c__ = w - dtisq * dpsi - dtipsq * dphi; + } + a = (dtipsq + dtisq) * w - dtipsq * dtisq * dw; + b = dtipsq * dtisq * w; + if (c__ == 0.) { + if (a == 0.) { + if (! swtch) { + if (orgati) { + a = z__[*i__] * z__[*i__] + dtipsq * dtipsq * + (dpsi + dphi); + } else { + a = z__[ip1] * z__[ip1] + dtisq * dtisq * ( + dpsi + dphi); + } + } else { + a = dtisq * dtisq * dpsi + dtipsq * dtipsq * dphi; + } + } + eta = b / a; + } else if (a <= 0.) { + eta = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) + / (c__ * 2.); + } else { + eta = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, + abs(d__1)))); + } + } else { + +/* Interpolation using THREE most relevant poles */ + + dtiim = work[iim1] * delta[iim1]; + dtiip = work[iip1] * delta[iip1]; + temp = rhoinv + psi + phi; + if (swtch) { + c__ = temp - dtiim * dpsi - dtiip * dphi; + zz[0] = dtiim * dtiim * dpsi; + zz[2] = dtiip * dtiip * dphi; + } else { + if (orgati) { + temp1 = z__[iim1] / dtiim; + temp1 *= temp1; + temp2 = (d__[iim1] - d__[iip1]) * (d__[iim1] + d__[ + iip1]) * temp1; + c__ = temp - dtiip * (dpsi + dphi) - temp2; + zz[0] = z__[iim1] * z__[iim1]; + if (dpsi < temp1) { + zz[2] = dtiip * dtiip * dphi; + } else { + zz[2] = dtiip * dtiip * (dpsi - temp1 + dphi); + } + } else { + temp1 = z__[iip1] / dtiip; + temp1 *= temp1; + temp2 = (d__[iip1] - d__[iim1]) * (d__[iim1] + d__[ + iip1]) * temp1; + c__ = temp - dtiim * (dpsi + dphi) - temp2; + if (dphi < temp1) { + zz[0] = dtiim * dtiim * dpsi; + } else { + zz[0] = dtiim * dtiim * (dpsi + (dphi - temp1)); + } + zz[2] = z__[iip1] * z__[iip1]; + } + } + dd[0] = dtiim; + dd[1] = delta[ii] * work[ii]; + dd[2] = dtiip; + _starpu_dlaed6_(&niter, &orgati, &c__, dd, zz, &w, &eta, info); + if (*info != 0) { + goto L240; + } + } + +/* Note, eta should be positive if w is negative, and */ +/* eta should be negative otherwise. However, */ +/* if for some reason caused by roundoff, eta*w > 0, */ +/* we simply use one Newton step instead. This way */ +/* will guarantee eta*w < 0. */ + + if (w * eta >= 0.) { + eta = -w / dw; + } + if (orgati) { + temp1 = work[*i__] * delta[*i__]; + temp = eta - temp1; + } else { + temp1 = work[ip1] * delta[ip1]; + temp = eta - temp1; + } + if (temp > sg2ub || temp < sg2lb) { + if (w < 0.) { + eta = (sg2ub - tau) / 2.; + } else { + eta = (sg2lb - tau) / 2.; + } + } + + tau += eta; + eta /= *sigma + sqrt(*sigma * *sigma + eta); + + *sigma += eta; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + work[j] += eta; + delta[j] -= eta; +/* L200: */ + } + + prew = w; + +/* Evaluate PSI and the derivative DPSI */ + + dpsi = 0.; + psi = 0.; + erretm = 0.; + i__1 = iim1; + for (j = 1; j <= i__1; ++j) { + temp = z__[j] / (work[j] * delta[j]); + psi += z__[j] * temp; + dpsi += temp * temp; + erretm += psi; +/* L210: */ + } + erretm = abs(erretm); + +/* Evaluate PHI and the derivative DPHI */ + + dphi = 0.; + phi = 0.; + i__1 = iip1; + for (j = *n; j >= i__1; --j) { + temp = z__[j] / (work[j] * delta[j]); + phi += z__[j] * temp; + dphi += temp * temp; + erretm += phi; +/* L220: */ + } + + temp = z__[ii] / (work[ii] * delta[ii]); + dw = dpsi + dphi + temp * temp; + temp = z__[ii] * temp; + w = rhoinv + phi + psi + temp; + erretm = (phi - psi) * 8. + erretm + rhoinv * 2. + abs(temp) * 3. + + abs(tau) * dw; + if (w * prew > 0. && abs(w) > abs(prew) / 10.) { + swtch = ! swtch; + } + + if (w <= 0.) { + sg2lb = max(sg2lb,tau); + } else { + sg2ub = min(sg2ub,tau); + } + +/* L230: */ + } + +/* Return with INFO = 1, NITER = MAXIT and not converged */ + + *info = 1; + + } + +L240: + return 0; + +/* End of DLASD4 */ + +} /* _starpu_dlasd4_ */ diff --git a/min-dgels/base/SRC/dlasd5.c b/min-dgels/base/SRC/dlasd5.c new file mode 100644 index 0000000..07746eb --- /dev/null +++ b/min-dgels/base/SRC/dlasd5.c @@ -0,0 +1,189 @@ +/* dlasd5.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dlasd5_(integer *i__, doublereal *d__, doublereal *z__, + doublereal *delta, doublereal *rho, doublereal *dsigma, doublereal * + work) +{ + /* System generated locals */ + doublereal d__1; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + doublereal b, c__, w, del, tau, delsq; + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* This subroutine computes the square root of the I-th eigenvalue */ +/* of a positive symmetric rank-one modification of a 2-by-2 diagonal */ +/* matrix */ + +/* diag( D ) * diag( D ) + RHO * Z * transpose(Z) . */ + +/* The diagonal entries in the array D are assumed to satisfy */ + +/* 0 <= D(i) < D(j) for i < j . */ + +/* We also assume RHO > 0 and that the Euclidean norm of the vector */ +/* Z is one. */ + +/* Arguments */ +/* ========= */ + +/* I (input) INTEGER */ +/* The index of the eigenvalue to be computed. I = 1 or I = 2. */ + +/* D (input) DOUBLE PRECISION array, dimension ( 2 ) */ +/* The original eigenvalues. We assume 0 <= D(1) < D(2). */ + +/* Z (input) DOUBLE PRECISION array, dimension ( 2 ) */ +/* The components of the updating vector. */ + +/* DELTA (output) DOUBLE PRECISION array, dimension ( 2 ) */ +/* Contains (D(j) - sigma_I) in its j-th component. */ +/* The vector DELTA contains the information necessary */ +/* to construct the eigenvectors. */ + +/* RHO (input) DOUBLE PRECISION */ +/* The scalar in the symmetric updating formula. */ + +/* DSIGMA (output) DOUBLE PRECISION */ +/* The computed sigma_I, the I-th updated eigenvalue. */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension ( 2 ) */ +/* WORK contains (D(j) + sigma_I) in its j-th component. */ + +/* Further Details */ +/* =============== */ + +/* Based on contributions by */ +/* Ren-Cang Li, Computer Science Division, University of California */ +/* at Berkeley, USA */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + --work; + --delta; + --z__; + --d__; + + /* Function Body */ + del = d__[2] - d__[1]; + delsq = del * (d__[2] + d__[1]); + if (*i__ == 1) { + w = *rho * 4. * (z__[2] * z__[2] / (d__[1] + d__[2] * 3.) - z__[1] * + z__[1] / (d__[1] * 3. + d__[2])) / del + 1.; + if (w > 0.) { + b = delsq + *rho * (z__[1] * z__[1] + z__[2] * z__[2]); + c__ = *rho * z__[1] * z__[1] * delsq; + +/* B > ZERO, always */ + +/* The following TAU is DSIGMA * DSIGMA - D( 1 ) * D( 1 ) */ + + tau = c__ * 2. / (b + sqrt((d__1 = b * b - c__ * 4., abs(d__1)))); + +/* The following TAU is DSIGMA - D( 1 ) */ + + tau /= d__[1] + sqrt(d__[1] * d__[1] + tau); + *dsigma = d__[1] + tau; + delta[1] = -tau; + delta[2] = del - tau; + work[1] = d__[1] * 2. + tau; + work[2] = d__[1] + tau + d__[2]; +/* DELTA( 1 ) = -Z( 1 ) / TAU */ +/* DELTA( 2 ) = Z( 2 ) / ( DEL-TAU ) */ + } else { + b = -delsq + *rho * (z__[1] * z__[1] + z__[2] * z__[2]); + c__ = *rho * z__[2] * z__[2] * delsq; + +/* The following TAU is DSIGMA * DSIGMA - D( 2 ) * D( 2 ) */ + + if (b > 0.) { + tau = c__ * -2. / (b + sqrt(b * b + c__ * 4.)); + } else { + tau = (b - sqrt(b * b + c__ * 4.)) / 2.; + } + +/* The following TAU is DSIGMA - D( 2 ) */ + + tau /= d__[2] + sqrt((d__1 = d__[2] * d__[2] + tau, abs(d__1))); + *dsigma = d__[2] + tau; + delta[1] = -(del + tau); + delta[2] = -tau; + work[1] = d__[1] + tau + d__[2]; + work[2] = d__[2] * 2. + tau; +/* DELTA( 1 ) = -Z( 1 ) / ( DEL+TAU ) */ +/* DELTA( 2 ) = -Z( 2 ) / TAU */ + } +/* TEMP = SQRT( DELTA( 1 )*DELTA( 1 )+DELTA( 2 )*DELTA( 2 ) ) */ +/* DELTA( 1 ) = DELTA( 1 ) / TEMP */ +/* DELTA( 2 ) = DELTA( 2 ) / TEMP */ + } else { + +/* Now I=2 */ + + b = -delsq + *rho * (z__[1] * z__[1] + z__[2] * z__[2]); + c__ = *rho * z__[2] * z__[2] * delsq; + +/* The following TAU is DSIGMA * DSIGMA - D( 2 ) * D( 2 ) */ + + if (b > 0.) { + tau = (b + sqrt(b * b + c__ * 4.)) / 2.; + } else { + tau = c__ * 2. / (-b + sqrt(b * b + c__ * 4.)); + } + +/* The following TAU is DSIGMA - D( 2 ) */ + + tau /= d__[2] + sqrt(d__[2] * d__[2] + tau); + *dsigma = d__[2] + tau; + delta[1] = -(del + tau); + delta[2] = -tau; + work[1] = d__[1] + tau + d__[2]; + work[2] = d__[2] * 2. + tau; +/* DELTA( 1 ) = -Z( 1 ) / ( DEL+TAU ) */ +/* DELTA( 2 ) = -Z( 2 ) / TAU */ +/* TEMP = SQRT( DELTA( 1 )*DELTA( 1 )+DELTA( 2 )*DELTA( 2 ) ) */ +/* DELTA( 1 ) = DELTA( 1 ) / TEMP */ +/* DELTA( 2 ) = DELTA( 2 ) / TEMP */ + } + return 0; + +/* End of DLASD5 */ + +} /* _starpu_dlasd5_ */ diff --git a/min-dgels/base/SRC/dlasd6.c b/min-dgels/base/SRC/dlasd6.c new file mode 100644 index 0000000..6494d1d --- /dev/null +++ b/min-dgels/base/SRC/dlasd6.c @@ -0,0 +1,367 @@ +/* dlasd6.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__0 = 0; +static doublereal c_b7 = 1.; +static integer c__1 = 1; +static integer c_n1 = -1; + +/* Subroutine */ int _starpu_dlasd6_(integer *icompq, integer *nl, integer *nr, + integer *sqre, doublereal *d__, doublereal *vf, doublereal *vl, + doublereal *alpha, doublereal *beta, integer *idxq, integer *perm, + integer *givptr, integer *givcol, integer *ldgcol, doublereal *givnum, + integer *ldgnum, doublereal *poles, doublereal *difl, doublereal * + difr, doublereal *z__, integer *k, doublereal *c__, doublereal *s, + doublereal *work, integer *iwork, integer *info) +{ + /* System generated locals */ + integer givcol_dim1, givcol_offset, givnum_dim1, givnum_offset, + poles_dim1, poles_offset, i__1; + doublereal d__1, d__2; + + /* Local variables */ + integer i__, m, n, n1, n2, iw, idx, idxc, idxp, ivfw, ivlw; + extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *), _starpu_dlasd7_(integer *, integer *, integer *, + integer *, integer *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, integer *, integer *, + integer *, integer *, integer *, integer *, integer *, doublereal + *, integer *, doublereal *, doublereal *, integer *), _starpu_dlasd8_( + integer *, integer *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, integer *, doublereal *, + doublereal *, integer *), _starpu_dlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *), _starpu_dlamrg_(integer *, integer *, + doublereal *, integer *, integer *, integer *); + integer isigma; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + doublereal orgnrm; + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLASD6 computes the SVD of an updated upper bidiagonal matrix B */ +/* obtained by merging two smaller ones by appending a row. This */ +/* routine is used only for the problem which requires all singular */ +/* values and optionally singular vector matrices in factored form. */ +/* B is an N-by-M matrix with N = NL + NR + 1 and M = N + SQRE. */ +/* A related subroutine, DLASD1, handles the case in which all singular */ +/* values and singular vectors of the bidiagonal matrix are desired. */ + +/* DLASD6 computes the SVD as follows: */ + +/* ( D1(in) 0 0 0 ) */ +/* B = U(in) * ( Z1' a Z2' b ) * VT(in) */ +/* ( 0 0 D2(in) 0 ) */ + +/* = U(out) * ( D(out) 0) * VT(out) */ + +/* where Z' = (Z1' a Z2' b) = u' VT', and u is a vector of dimension M */ +/* with ALPHA and BETA in the NL+1 and NL+2 th entries and zeros */ +/* elsewhere; and the entry b is empty if SQRE = 0. */ + +/* The singular values of B can be computed using D1, D2, the first */ +/* components of all the right singular vectors of the lower block, and */ +/* the last components of all the right singular vectors of the upper */ +/* block. These components are stored and updated in VF and VL, */ +/* respectively, in DLASD6. Hence U and VT are not explicitly */ +/* referenced. */ + +/* The singular values are stored in D. The algorithm consists of two */ +/* stages: */ + +/* The first stage consists of deflating the size of the problem */ +/* when there are multiple singular values or if there is a zero */ +/* in the Z vector. For each such occurence the dimension of the */ +/* secular equation problem is reduced by one. This stage is */ +/* performed by the routine DLASD7. */ + +/* The second stage consists of calculating the updated */ +/* singular values. This is done by finding the roots of the */ +/* secular equation via the routine DLASD4 (as called by DLASD8). */ +/* This routine also updates VF and VL and computes the distances */ +/* between the updated singular values and the old singular */ +/* values. */ + +/* DLASD6 is called from DLASDA. */ + +/* Arguments */ +/* ========= */ + +/* ICOMPQ (input) INTEGER */ +/* Specifies whether singular vectors are to be computed in */ +/* factored form: */ +/* = 0: Compute singular values only. */ +/* = 1: Compute singular vectors in factored form as well. */ + +/* NL (input) INTEGER */ +/* The row dimension of the upper block. NL >= 1. */ + +/* NR (input) INTEGER */ +/* The row dimension of the lower block. NR >= 1. */ + +/* SQRE (input) INTEGER */ +/* = 0: the lower block is an NR-by-NR square matrix. */ +/* = 1: the lower block is an NR-by-(NR+1) rectangular matrix. */ + +/* The bidiagonal matrix has row dimension N = NL + NR + 1, */ +/* and column dimension M = N + SQRE. */ + +/* D (input/output) DOUBLE PRECISION array, dimension ( NL+NR+1 ). */ +/* On entry D(1:NL,1:NL) contains the singular values of the */ +/* upper block, and D(NL+2:N) contains the singular values */ +/* of the lower block. On exit D(1:N) contains the singular */ +/* values of the modified matrix. */ + +/* VF (input/output) DOUBLE PRECISION array, dimension ( M ) */ +/* On entry, VF(1:NL+1) contains the first components of all */ +/* right singular vectors of the upper block; and VF(NL+2:M) */ +/* contains the first components of all right singular vectors */ +/* of the lower block. On exit, VF contains the first components */ +/* of all right singular vectors of the bidiagonal matrix. */ + +/* VL (input/output) DOUBLE PRECISION array, dimension ( M ) */ +/* On entry, VL(1:NL+1) contains the last components of all */ +/* right singular vectors of the upper block; and VL(NL+2:M) */ +/* contains the last components of all right singular vectors of */ +/* the lower block. On exit, VL contains the last components of */ +/* all right singular vectors of the bidiagonal matrix. */ + +/* ALPHA (input/output) DOUBLE PRECISION */ +/* Contains the diagonal element associated with the added row. */ + +/* BETA (input/output) DOUBLE PRECISION */ +/* Contains the off-diagonal element associated with the added */ +/* row. */ + +/* IDXQ (output) INTEGER array, dimension ( N ) */ +/* This contains the permutation which will reintegrate the */ +/* subproblem just solved back into sorted order, i.e. */ +/* D( IDXQ( I = 1, N ) ) will be in ascending order. */ + +/* PERM (output) INTEGER array, dimension ( N ) */ +/* The permutations (from deflation and sorting) to be applied */ +/* to each block. Not referenced if ICOMPQ = 0. */ + +/* GIVPTR (output) INTEGER */ +/* The number of Givens rotations which took place in this */ +/* subproblem. Not referenced if ICOMPQ = 0. */ + +/* GIVCOL (output) INTEGER array, dimension ( LDGCOL, 2 ) */ +/* Each pair of numbers indicates a pair of columns to take place */ +/* in a Givens rotation. Not referenced if ICOMPQ = 0. */ + +/* LDGCOL (input) INTEGER */ +/* leading dimension of GIVCOL, must be at least N. */ + +/* GIVNUM (output) DOUBLE PRECISION array, dimension ( LDGNUM, 2 ) */ +/* Each number indicates the C or S value to be used in the */ +/* corresponding Givens rotation. Not referenced if ICOMPQ = 0. */ + +/* LDGNUM (input) INTEGER */ +/* The leading dimension of GIVNUM and POLES, must be at least N. */ + +/* POLES (output) DOUBLE PRECISION array, dimension ( LDGNUM, 2 ) */ +/* On exit, POLES(1,*) is an array containing the new singular */ +/* values obtained from solving the secular equation, and */ +/* POLES(2,*) is an array containing the poles in the secular */ +/* equation. Not referenced if ICOMPQ = 0. */ + +/* DIFL (output) DOUBLE PRECISION array, dimension ( N ) */ +/* On exit, DIFL(I) is the distance between I-th updated */ +/* (undeflated) singular value and the I-th (undeflated) old */ +/* singular value. */ + +/* DIFR (output) DOUBLE PRECISION array, */ +/* dimension ( LDGNUM, 2 ) if ICOMPQ = 1 and */ +/* dimension ( N ) if ICOMPQ = 0. */ +/* On exit, DIFR(I, 1) is the distance between I-th updated */ +/* (undeflated) singular value and the I+1-th (undeflated) old */ +/* singular value. */ + +/* If ICOMPQ = 1, DIFR(1:K,2) is an array containing the */ +/* normalizing factors for the right singular vector matrix. */ + +/* See DLASD8 for details on DIFL and DIFR. */ + +/* Z (output) DOUBLE PRECISION array, dimension ( M ) */ +/* The first elements of this array contain the components */ +/* of the deflation-adjusted updating row vector. */ + +/* K (output) INTEGER */ +/* Contains the dimension of the non-deflated matrix, */ +/* This is the order of the related secular equation. 1 <= K <=N. */ + +/* C (output) DOUBLE PRECISION */ +/* C contains garbage if SQRE =0 and the C-value of a Givens */ +/* rotation related to the right null space if SQRE = 1. */ + +/* S (output) DOUBLE PRECISION */ +/* S contains garbage if SQRE =0 and the S-value of a Givens */ +/* rotation related to the right null space if SQRE = 1. */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension ( 4 * M ) */ + +/* IWORK (workspace) INTEGER array, dimension ( 3 * N ) */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit. */ +/* < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > 0: if INFO = 1, an singular value did not converge */ + +/* Further Details */ +/* =============== */ + +/* Based on contributions by */ +/* Ming Gu and Huan Ren, Computer Science Division, University of */ +/* California at Berkeley, USA */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --d__; + --vf; + --vl; + --idxq; + --perm; + givcol_dim1 = *ldgcol; + givcol_offset = 1 + givcol_dim1; + givcol -= givcol_offset; + poles_dim1 = *ldgnum; + poles_offset = 1 + poles_dim1; + poles -= poles_offset; + givnum_dim1 = *ldgnum; + givnum_offset = 1 + givnum_dim1; + givnum -= givnum_offset; + --difl; + --difr; + --z__; + --work; + --iwork; + + /* Function Body */ + *info = 0; + n = *nl + *nr + 1; + m = n + *sqre; + + if (*icompq < 0 || *icompq > 1) { + *info = -1; + } else if (*nl < 1) { + *info = -2; + } else if (*nr < 1) { + *info = -3; + } else if (*sqre < 0 || *sqre > 1) { + *info = -4; + } else if (*ldgcol < n) { + *info = -14; + } else if (*ldgnum < n) { + *info = -16; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DLASD6", &i__1); + return 0; + } + +/* The following values are for bookkeeping purposes only. They are */ +/* integer pointers which indicate the portion of the workspace */ +/* used by a particular array in DLASD7 and DLASD8. */ + + isigma = 1; + iw = isigma + n; + ivfw = iw + m; + ivlw = ivfw + m; + + idx = 1; + idxc = idx + n; + idxp = idxc + n; + +/* Scale. */ + +/* Computing MAX */ + d__1 = abs(*alpha), d__2 = abs(*beta); + orgnrm = max(d__1,d__2); + d__[*nl + 1] = 0.; + i__1 = n; + for (i__ = 1; i__ <= i__1; ++i__) { + if ((d__1 = d__[i__], abs(d__1)) > orgnrm) { + orgnrm = (d__1 = d__[i__], abs(d__1)); + } +/* L10: */ + } + _starpu_dlascl_("G", &c__0, &c__0, &orgnrm, &c_b7, &n, &c__1, &d__[1], &n, info); + *alpha /= orgnrm; + *beta /= orgnrm; + +/* Sort and Deflate singular values. */ + + _starpu_dlasd7_(icompq, nl, nr, sqre, k, &d__[1], &z__[1], &work[iw], &vf[1], & + work[ivfw], &vl[1], &work[ivlw], alpha, beta, &work[isigma], & + iwork[idx], &iwork[idxp], &idxq[1], &perm[1], givptr, &givcol[ + givcol_offset], ldgcol, &givnum[givnum_offset], ldgnum, c__, s, + info); + +/* Solve Secular Equation, compute DIFL, DIFR, and update VF, VL. */ + + _starpu_dlasd8_(icompq, k, &d__[1], &z__[1], &vf[1], &vl[1], &difl[1], &difr[1], + ldgnum, &work[isigma], &work[iw], info); + +/* Save the poles if ICOMPQ = 1. */ + + if (*icompq == 1) { + _starpu_dcopy_(k, &d__[1], &c__1, &poles[poles_dim1 + 1], &c__1); + _starpu_dcopy_(k, &work[isigma], &c__1, &poles[(poles_dim1 << 1) + 1], &c__1); + } + +/* Unscale. */ + + _starpu_dlascl_("G", &c__0, &c__0, &c_b7, &orgnrm, &n, &c__1, &d__[1], &n, info); + +/* Prepare the IDXQ sorting permutation. */ + + n1 = *k; + n2 = n - *k; + _starpu_dlamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &idxq[1]); + + return 0; + +/* End of DLASD6 */ + +} /* _starpu_dlasd6_ */ diff --git a/min-dgels/base/SRC/dlasd7.c b/min-dgels/base/SRC/dlasd7.c new file mode 100644 index 0000000..2a797ac --- /dev/null +++ b/min-dgels/base/SRC/dlasd7.c @@ -0,0 +1,518 @@ +/* dlasd7.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +/* Subroutine */ int _starpu_dlasd7_(integer *icompq, integer *nl, integer *nr, + integer *sqre, integer *k, doublereal *d__, doublereal *z__, + doublereal *zw, doublereal *vf, doublereal *vfw, doublereal *vl, + doublereal *vlw, doublereal *alpha, doublereal *beta, doublereal * + dsigma, integer *idx, integer *idxp, integer *idxq, integer *perm, + integer *givptr, integer *givcol, integer *ldgcol, doublereal *givnum, + integer *ldgnum, doublereal *c__, doublereal *s, integer *info) +{ + /* System generated locals */ + integer givcol_dim1, givcol_offset, givnum_dim1, givnum_offset, i__1; + doublereal d__1, d__2; + + /* Local variables */ + integer i__, j, m, n, k2; + doublereal z1; + integer jp; + doublereal eps, tau, tol; + integer nlp1, nlp2, idxi, idxj; + extern /* Subroutine */ int _starpu_drot_(integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, doublereal *); + integer idxjp; + extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *); + integer jprev; + extern doublereal _starpu_dlapy2_(doublereal *, doublereal *), _starpu_dlamch_(char *); + extern /* Subroutine */ int _starpu_dlamrg_(integer *, integer *, doublereal *, + integer *, integer *, integer *), _starpu_xerbla_(char *, integer *); + doublereal hlftol; + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLASD7 merges the two sets of singular values together into a single */ +/* sorted set. Then it tries to deflate the size of the problem. There */ +/* are two ways in which deflation can occur: when two or more singular */ +/* values are close together or if there is a tiny entry in the Z */ +/* vector. For each such occurrence the order of the related */ +/* secular equation problem is reduced by one. */ + +/* DLASD7 is called from DLASD6. */ + +/* Arguments */ +/* ========= */ + +/* ICOMPQ (input) INTEGER */ +/* Specifies whether singular vectors are to be computed */ +/* in compact form, as follows: */ +/* = 0: Compute singular values only. */ +/* = 1: Compute singular vectors of upper */ +/* bidiagonal matrix in compact form. */ + +/* NL (input) INTEGER */ +/* The row dimension of the upper block. NL >= 1. */ + +/* NR (input) INTEGER */ +/* The row dimension of the lower block. NR >= 1. */ + +/* SQRE (input) INTEGER */ +/* = 0: the lower block is an NR-by-NR square matrix. */ +/* = 1: the lower block is an NR-by-(NR+1) rectangular matrix. */ + +/* The bidiagonal matrix has */ +/* N = NL + NR + 1 rows and */ +/* M = N + SQRE >= N columns. */ + +/* K (output) INTEGER */ +/* Contains the dimension of the non-deflated matrix, this is */ +/* the order of the related secular equation. 1 <= K <=N. */ + +/* D (input/output) DOUBLE PRECISION array, dimension ( N ) */ +/* On entry D contains the singular values of the two submatrices */ +/* to be combined. On exit D contains the trailing (N-K) updated */ +/* singular values (those which were deflated) sorted into */ +/* increasing order. */ + +/* Z (output) DOUBLE PRECISION array, dimension ( M ) */ +/* On exit Z contains the updating row vector in the secular */ +/* equation. */ + +/* ZW (workspace) DOUBLE PRECISION array, dimension ( M ) */ +/* Workspace for Z. */ + +/* VF (input/output) DOUBLE PRECISION array, dimension ( M ) */ +/* On entry, VF(1:NL+1) contains the first components of all */ +/* right singular vectors of the upper block; and VF(NL+2:M) */ +/* contains the first components of all right singular vectors */ +/* of the lower block. On exit, VF contains the first components */ +/* of all right singular vectors of the bidiagonal matrix. */ + +/* VFW (workspace) DOUBLE PRECISION array, dimension ( M ) */ +/* Workspace for VF. */ + +/* VL (input/output) DOUBLE PRECISION array, dimension ( M ) */ +/* On entry, VL(1:NL+1) contains the last components of all */ +/* right singular vectors of the upper block; and VL(NL+2:M) */ +/* contains the last components of all right singular vectors */ +/* of the lower block. On exit, VL contains the last components */ +/* of all right singular vectors of the bidiagonal matrix. */ + +/* VLW (workspace) DOUBLE PRECISION array, dimension ( M ) */ +/* Workspace for VL. */ + +/* ALPHA (input) DOUBLE PRECISION */ +/* Contains the diagonal element associated with the added row. */ + +/* BETA (input) DOUBLE PRECISION */ +/* Contains the off-diagonal element associated with the added */ +/* row. */ + +/* DSIGMA (output) DOUBLE PRECISION array, dimension ( N ) */ +/* Contains a copy of the diagonal elements (K-1 singular values */ +/* and one zero) in the secular equation. */ + +/* IDX (workspace) INTEGER array, dimension ( N ) */ +/* This will contain the permutation used to sort the contents of */ +/* D into ascending order. */ + +/* IDXP (workspace) INTEGER array, dimension ( N ) */ +/* This will contain the permutation used to place deflated */ +/* values of D at the end of the array. On output IDXP(2:K) */ +/* points to the nondeflated D-values and IDXP(K+1:N) */ +/* points to the deflated singular values. */ + +/* IDXQ (input) INTEGER array, dimension ( N ) */ +/* This contains the permutation which separately sorts the two */ +/* sub-problems in D into ascending order. Note that entries in */ +/* the first half of this permutation must first be moved one */ +/* position backward; and entries in the second half */ +/* must first have NL+1 added to their values. */ + +/* PERM (output) INTEGER array, dimension ( N ) */ +/* The permutations (from deflation and sorting) to be applied */ +/* to each singular block. Not referenced if ICOMPQ = 0. */ + +/* GIVPTR (output) INTEGER */ +/* The number of Givens rotations which took place in this */ +/* subproblem. Not referenced if ICOMPQ = 0. */ + +/* GIVCOL (output) INTEGER array, dimension ( LDGCOL, 2 ) */ +/* Each pair of numbers indicates a pair of columns to take place */ +/* in a Givens rotation. Not referenced if ICOMPQ = 0. */ + +/* LDGCOL (input) INTEGER */ +/* The leading dimension of GIVCOL, must be at least N. */ + +/* GIVNUM (output) DOUBLE PRECISION array, dimension ( LDGNUM, 2 ) */ +/* Each number indicates the C or S value to be used in the */ +/* corresponding Givens rotation. Not referenced if ICOMPQ = 0. */ + +/* LDGNUM (input) INTEGER */ +/* The leading dimension of GIVNUM, must be at least N. */ + +/* C (output) DOUBLE PRECISION */ +/* C contains garbage if SQRE =0 and the C-value of a Givens */ +/* rotation related to the right null space if SQRE = 1. */ + +/* S (output) DOUBLE PRECISION */ +/* S contains garbage if SQRE =0 and the S-value of a Givens */ +/* rotation related to the right null space if SQRE = 1. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit. */ +/* < 0: if INFO = -i, the i-th argument had an illegal value. */ + +/* Further Details */ +/* =============== */ + +/* Based on contributions by */ +/* Ming Gu and Huan Ren, Computer Science Division, University of */ +/* California at Berkeley, USA */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ + +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --d__; + --z__; + --zw; + --vf; + --vfw; + --vl; + --vlw; + --dsigma; + --idx; + --idxp; + --idxq; + --perm; + givcol_dim1 = *ldgcol; + givcol_offset = 1 + givcol_dim1; + givcol -= givcol_offset; + givnum_dim1 = *ldgnum; + givnum_offset = 1 + givnum_dim1; + givnum -= givnum_offset; + + /* Function Body */ + *info = 0; + n = *nl + *nr + 1; + m = n + *sqre; + + if (*icompq < 0 || *icompq > 1) { + *info = -1; + } else if (*nl < 1) { + *info = -2; + } else if (*nr < 1) { + *info = -3; + } else if (*sqre < 0 || *sqre > 1) { + *info = -4; + } else if (*ldgcol < n) { + *info = -22; + } else if (*ldgnum < n) { + *info = -24; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DLASD7", &i__1); + return 0; + } + + nlp1 = *nl + 1; + nlp2 = *nl + 2; + if (*icompq == 1) { + *givptr = 0; + } + +/* Generate the first part of the vector Z and move the singular */ +/* values in the first part of D one position backward. */ + + z1 = *alpha * vl[nlp1]; + vl[nlp1] = 0.; + tau = vf[nlp1]; + for (i__ = *nl; i__ >= 1; --i__) { + z__[i__ + 1] = *alpha * vl[i__]; + vl[i__] = 0.; + vf[i__ + 1] = vf[i__]; + d__[i__ + 1] = d__[i__]; + idxq[i__ + 1] = idxq[i__] + 1; +/* L10: */ + } + vf[1] = tau; + +/* Generate the second part of the vector Z. */ + + i__1 = m; + for (i__ = nlp2; i__ <= i__1; ++i__) { + z__[i__] = *beta * vf[i__]; + vf[i__] = 0.; +/* L20: */ + } + +/* Sort the singular values into increasing order */ + + i__1 = n; + for (i__ = nlp2; i__ <= i__1; ++i__) { + idxq[i__] += nlp1; +/* L30: */ + } + +/* DSIGMA, IDXC, IDXC, and ZW are used as storage space. */ + + i__1 = n; + for (i__ = 2; i__ <= i__1; ++i__) { + dsigma[i__] = d__[idxq[i__]]; + zw[i__] = z__[idxq[i__]]; + vfw[i__] = vf[idxq[i__]]; + vlw[i__] = vl[idxq[i__]]; +/* L40: */ + } + + _starpu_dlamrg_(nl, nr, &dsigma[2], &c__1, &c__1, &idx[2]); + + i__1 = n; + for (i__ = 2; i__ <= i__1; ++i__) { + idxi = idx[i__] + 1; + d__[i__] = dsigma[idxi]; + z__[i__] = zw[idxi]; + vf[i__] = vfw[idxi]; + vl[i__] = vlw[idxi]; +/* L50: */ + } + +/* Calculate the allowable deflation tolerence */ + + eps = _starpu_dlamch_("Epsilon"); +/* Computing MAX */ + d__1 = abs(*alpha), d__2 = abs(*beta); + tol = max(d__1,d__2); +/* Computing MAX */ + d__2 = (d__1 = d__[n], abs(d__1)); + tol = eps * 64. * max(d__2,tol); + +/* There are 2 kinds of deflation -- first a value in the z-vector */ +/* is small, second two (or more) singular values are very close */ +/* together (their difference is small). */ + +/* If the value in the z-vector is small, we simply permute the */ +/* array so that the corresponding singular value is moved to the */ +/* end. */ + +/* If two values in the D-vector are close, we perform a two-sided */ +/* rotation designed to make one of the corresponding z-vector */ +/* entries zero, and then permute the array so that the deflated */ +/* singular value is moved to the end. */ + +/* If there are multiple singular values then the problem deflates. */ +/* Here the number of equal singular values are found. As each equal */ +/* singular value is found, an elementary reflector is computed to */ +/* rotate the corresponding singular subspace so that the */ +/* corresponding components of Z are zero in this new basis. */ + + *k = 1; + k2 = n + 1; + i__1 = n; + for (j = 2; j <= i__1; ++j) { + if ((d__1 = z__[j], abs(d__1)) <= tol) { + +/* Deflate due to small z component. */ + + --k2; + idxp[k2] = j; + if (j == n) { + goto L100; + } + } else { + jprev = j; + goto L70; + } +/* L60: */ + } +L70: + j = jprev; +L80: + ++j; + if (j > n) { + goto L90; + } + if ((d__1 = z__[j], abs(d__1)) <= tol) { + +/* Deflate due to small z component. */ + + --k2; + idxp[k2] = j; + } else { + +/* Check if singular values are close enough to allow deflation. */ + + if ((d__1 = d__[j] - d__[jprev], abs(d__1)) <= tol) { + +/* Deflation is possible. */ + + *s = z__[jprev]; + *c__ = z__[j]; + +/* Find sqrt(a**2+b**2) without overflow or */ +/* destructive underflow. */ + + tau = _starpu_dlapy2_(c__, s); + z__[j] = tau; + z__[jprev] = 0.; + *c__ /= tau; + *s = -(*s) / tau; + +/* Record the appropriate Givens rotation */ + + if (*icompq == 1) { + ++(*givptr); + idxjp = idxq[idx[jprev] + 1]; + idxj = idxq[idx[j] + 1]; + if (idxjp <= nlp1) { + --idxjp; + } + if (idxj <= nlp1) { + --idxj; + } + givcol[*givptr + (givcol_dim1 << 1)] = idxjp; + givcol[*givptr + givcol_dim1] = idxj; + givnum[*givptr + (givnum_dim1 << 1)] = *c__; + givnum[*givptr + givnum_dim1] = *s; + } + _starpu_drot_(&c__1, &vf[jprev], &c__1, &vf[j], &c__1, c__, s); + _starpu_drot_(&c__1, &vl[jprev], &c__1, &vl[j], &c__1, c__, s); + --k2; + idxp[k2] = jprev; + jprev = j; + } else { + ++(*k); + zw[*k] = z__[jprev]; + dsigma[*k] = d__[jprev]; + idxp[*k] = jprev; + jprev = j; + } + } + goto L80; +L90: + +/* Record the last singular value. */ + + ++(*k); + zw[*k] = z__[jprev]; + dsigma[*k] = d__[jprev]; + idxp[*k] = jprev; + +L100: + +/* Sort the singular values into DSIGMA. The singular values which */ +/* were not deflated go into the first K slots of DSIGMA, except */ +/* that DSIGMA(1) is treated separately. */ + + i__1 = n; + for (j = 2; j <= i__1; ++j) { + jp = idxp[j]; + dsigma[j] = d__[jp]; + vfw[j] = vf[jp]; + vlw[j] = vl[jp]; +/* L110: */ + } + if (*icompq == 1) { + i__1 = n; + for (j = 2; j <= i__1; ++j) { + jp = idxp[j]; + perm[j] = idxq[idx[jp] + 1]; + if (perm[j] <= nlp1) { + --perm[j]; + } +/* L120: */ + } + } + +/* The deflated singular values go back into the last N - K slots of */ +/* D. */ + + i__1 = n - *k; + _starpu_dcopy_(&i__1, &dsigma[*k + 1], &c__1, &d__[*k + 1], &c__1); + +/* Determine DSIGMA(1), DSIGMA(2), Z(1), VF(1), VL(1), VF(M), and */ +/* VL(M). */ + + dsigma[1] = 0.; + hlftol = tol / 2.; + if (abs(dsigma[2]) <= hlftol) { + dsigma[2] = hlftol; + } + if (m > n) { + z__[1] = _starpu_dlapy2_(&z1, &z__[m]); + if (z__[1] <= tol) { + *c__ = 1.; + *s = 0.; + z__[1] = tol; + } else { + *c__ = z1 / z__[1]; + *s = -z__[m] / z__[1]; + } + _starpu_drot_(&c__1, &vf[m], &c__1, &vf[1], &c__1, c__, s); + _starpu_drot_(&c__1, &vl[m], &c__1, &vl[1], &c__1, c__, s); + } else { + if (abs(z1) <= tol) { + z__[1] = tol; + } else { + z__[1] = z1; + } + } + +/* Restore Z, VF, and VL. */ + + i__1 = *k - 1; + _starpu_dcopy_(&i__1, &zw[2], &c__1, &z__[2], &c__1); + i__1 = n - 1; + _starpu_dcopy_(&i__1, &vfw[2], &c__1, &vf[2], &c__1); + i__1 = n - 1; + _starpu_dcopy_(&i__1, &vlw[2], &c__1, &vl[2], &c__1); + + return 0; + +/* End of DLASD7 */ + +} /* _starpu_dlasd7_ */ diff --git a/min-dgels/base/SRC/dlasd8.c b/min-dgels/base/SRC/dlasd8.c new file mode 100644 index 0000000..3135374 --- /dev/null +++ b/min-dgels/base/SRC/dlasd8.c @@ -0,0 +1,326 @@ +/* dlasd8.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c__0 = 0; +static doublereal c_b8 = 1.; + +/* Subroutine */ int _starpu_dlasd8_(integer *icompq, integer *k, doublereal *d__, + doublereal *z__, doublereal *vf, doublereal *vl, doublereal *difl, + doublereal *difr, integer *lddifr, doublereal *dsigma, doublereal * + work, integer *info) +{ + /* System generated locals */ + integer difr_dim1, difr_offset, i__1, i__2; + doublereal d__1, d__2; + + /* Builtin functions */ + double sqrt(doublereal), d_sign(doublereal *, doublereal *); + + /* Local variables */ + integer i__, j; + doublereal dj, rho; + integer iwk1, iwk2, iwk3; + extern doublereal _starpu_ddot_(integer *, doublereal *, integer *, doublereal *, + integer *); + doublereal temp; + extern doublereal _starpu_dnrm2_(integer *, doublereal *, integer *); + integer iwk2i, iwk3i; + doublereal diflj, difrj, dsigj; + extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *); + extern doublereal _starpu_dlamc3_(doublereal *, doublereal *); + extern /* Subroutine */ int _starpu_dlasd4_(integer *, integer *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, integer *), _starpu_dlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *), _starpu_dlaset_(char *, integer *, integer + *, doublereal *, doublereal *, doublereal *, integer *), + _starpu_xerbla_(char *, integer *); + doublereal dsigjp; + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* October 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLASD8 finds the square roots of the roots of the secular equation, */ +/* as defined by the values in DSIGMA and Z. It makes the appropriate */ +/* calls to DLASD4, and stores, for each element in D, the distance */ +/* to its two nearest poles (elements in DSIGMA). It also updates */ +/* the arrays VF and VL, the first and last components of all the */ +/* right singular vectors of the original bidiagonal matrix. */ + +/* DLASD8 is called from DLASD6. */ + +/* Arguments */ +/* ========= */ + +/* ICOMPQ (input) INTEGER */ +/* Specifies whether singular vectors are to be computed in */ +/* factored form in the calling routine: */ +/* = 0: Compute singular values only. */ +/* = 1: Compute singular vectors in factored form as well. */ + +/* K (input) INTEGER */ +/* The number of terms in the rational function to be solved */ +/* by DLASD4. K >= 1. */ + +/* D (output) DOUBLE PRECISION array, dimension ( K ) */ +/* On output, D contains the updated singular values. */ + +/* Z (input/output) DOUBLE PRECISION array, dimension ( K ) */ +/* On entry, the first K elements of this array contain the */ +/* components of the deflation-adjusted updating row vector. */ +/* On exit, Z is updated. */ + +/* VF (input/output) DOUBLE PRECISION array, dimension ( K ) */ +/* On entry, VF contains information passed through DBEDE8. */ +/* On exit, VF contains the first K components of the first */ +/* components of all right singular vectors of the bidiagonal */ +/* matrix. */ + +/* VL (input/output) DOUBLE PRECISION array, dimension ( K ) */ +/* On entry, VL contains information passed through DBEDE8. */ +/* On exit, VL contains the first K components of the last */ +/* components of all right singular vectors of the bidiagonal */ +/* matrix. */ + +/* DIFL (output) DOUBLE PRECISION array, dimension ( K ) */ +/* On exit, DIFL(I) = D(I) - DSIGMA(I). */ + +/* DIFR (output) DOUBLE PRECISION array, */ +/* dimension ( LDDIFR, 2 ) if ICOMPQ = 1 and */ +/* dimension ( K ) if ICOMPQ = 0. */ +/* On exit, DIFR(I,1) = D(I) - DSIGMA(I+1), DIFR(K,1) is not */ +/* defined and will not be referenced. */ + +/* If ICOMPQ = 1, DIFR(1:K,2) is an array containing the */ +/* normalizing factors for the right singular vector matrix. */ + +/* LDDIFR (input) INTEGER */ +/* The leading dimension of DIFR, must be at least K. */ + +/* DSIGMA (input/output) DOUBLE PRECISION array, dimension ( K ) */ +/* On entry, the first K elements of this array contain the old */ +/* roots of the deflated updating problem. These are the poles */ +/* of the secular equation. */ +/* On exit, the elements of DSIGMA may be very slightly altered */ +/* in value. */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension at least 3 * K */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit. */ +/* < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > 0: if INFO = 1, an singular value did not converge */ + +/* Further Details */ +/* =============== */ + +/* Based on contributions by */ +/* Ming Gu and Huan Ren, Computer Science Division, University of */ +/* California at Berkeley, USA */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --d__; + --z__; + --vf; + --vl; + --difl; + difr_dim1 = *lddifr; + difr_offset = 1 + difr_dim1; + difr -= difr_offset; + --dsigma; + --work; + + /* Function Body */ + *info = 0; + + if (*icompq < 0 || *icompq > 1) { + *info = -1; + } else if (*k < 1) { + *info = -2; + } else if (*lddifr < *k) { + *info = -9; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DLASD8", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*k == 1) { + d__[1] = abs(z__[1]); + difl[1] = d__[1]; + if (*icompq == 1) { + difl[2] = 1.; + difr[(difr_dim1 << 1) + 1] = 1.; + } + return 0; + } + +/* Modify values DSIGMA(i) to make sure all DSIGMA(i)-DSIGMA(j) can */ +/* be computed with high relative accuracy (barring over/underflow). */ +/* This is a problem on machines without a guard digit in */ +/* add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2). */ +/* The following code replaces DSIGMA(I) by 2*DSIGMA(I)-DSIGMA(I), */ +/* which on any of these machines zeros out the bottommost */ +/* bit of DSIGMA(I) if it is 1; this makes the subsequent */ +/* subtractions DSIGMA(I)-DSIGMA(J) unproblematic when cancellation */ +/* occurs. On binary machines with a guard digit (almost all */ +/* machines) it does not change DSIGMA(I) at all. On hexadecimal */ +/* and decimal machines with a guard digit, it slightly */ +/* changes the bottommost bits of DSIGMA(I). It does not account */ +/* for hexadecimal or decimal machines without guard digits */ +/* (we know of none). We use a subroutine call to compute */ +/* 2*DLAMBDA(I) to prevent optimizing compilers from eliminating */ +/* this code. */ + + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + dsigma[i__] = _starpu_dlamc3_(&dsigma[i__], &dsigma[i__]) - dsigma[i__]; +/* L10: */ + } + +/* Book keeping. */ + + iwk1 = 1; + iwk2 = iwk1 + *k; + iwk3 = iwk2 + *k; + iwk2i = iwk2 - 1; + iwk3i = iwk3 - 1; + +/* Normalize Z. */ + + rho = _starpu_dnrm2_(k, &z__[1], &c__1); + _starpu_dlascl_("G", &c__0, &c__0, &rho, &c_b8, k, &c__1, &z__[1], k, info); + rho *= rho; + +/* Initialize WORK(IWK3). */ + + _starpu_dlaset_("A", k, &c__1, &c_b8, &c_b8, &work[iwk3], k); + +/* Compute the updated singular values, the arrays DIFL, DIFR, */ +/* and the updated Z. */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + _starpu_dlasd4_(k, &j, &dsigma[1], &z__[1], &work[iwk1], &rho, &d__[j], &work[ + iwk2], info); + +/* If the root finder fails, the computation is terminated. */ + + if (*info != 0) { + return 0; + } + work[iwk3i + j] = work[iwk3i + j] * work[j] * work[iwk2i + j]; + difl[j] = -work[j]; + difr[j + difr_dim1] = -work[j + 1]; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + work[iwk3i + i__] = work[iwk3i + i__] * work[i__] * work[iwk2i + + i__] / (dsigma[i__] - dsigma[j]) / (dsigma[i__] + dsigma[ + j]); +/* L20: */ + } + i__2 = *k; + for (i__ = j + 1; i__ <= i__2; ++i__) { + work[iwk3i + i__] = work[iwk3i + i__] * work[i__] * work[iwk2i + + i__] / (dsigma[i__] - dsigma[j]) / (dsigma[i__] + dsigma[ + j]); +/* L30: */ + } +/* L40: */ + } + +/* Compute updated Z. */ + + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + d__2 = sqrt((d__1 = work[iwk3i + i__], abs(d__1))); + z__[i__] = d_sign(&d__2, &z__[i__]); +/* L50: */ + } + +/* Update VF and VL. */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + diflj = difl[j]; + dj = d__[j]; + dsigj = -dsigma[j]; + if (j < *k) { + difrj = -difr[j + difr_dim1]; + dsigjp = -dsigma[j + 1]; + } + work[j] = -z__[j] / diflj / (dsigma[j] + dj); + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + work[i__] = z__[i__] / (_starpu_dlamc3_(&dsigma[i__], &dsigj) - diflj) / ( + dsigma[i__] + dj); +/* L60: */ + } + i__2 = *k; + for (i__ = j + 1; i__ <= i__2; ++i__) { + work[i__] = z__[i__] / (_starpu_dlamc3_(&dsigma[i__], &dsigjp) + difrj) / + (dsigma[i__] + dj); +/* L70: */ + } + temp = _starpu_dnrm2_(k, &work[1], &c__1); + work[iwk2i + j] = _starpu_ddot_(k, &work[1], &c__1, &vf[1], &c__1) / temp; + work[iwk3i + j] = _starpu_ddot_(k, &work[1], &c__1, &vl[1], &c__1) / temp; + if (*icompq == 1) { + difr[j + (difr_dim1 << 1)] = temp; + } +/* L80: */ + } + + _starpu_dcopy_(k, &work[iwk2], &c__1, &vf[1], &c__1); + _starpu_dcopy_(k, &work[iwk3], &c__1, &vl[1], &c__1); + + return 0; + +/* End of DLASD8 */ + +} /* _starpu_dlasd8_ */ diff --git a/min-dgels/base/SRC/dlasda.c b/min-dgels/base/SRC/dlasda.c new file mode 100644 index 0000000..7083f82 --- /dev/null +++ b/min-dgels/base/SRC/dlasda.c @@ -0,0 +1,488 @@ +/* dlasda.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__0 = 0; +static doublereal c_b11 = 0.; +static doublereal c_b12 = 1.; +static integer c__1 = 1; +static integer c__2 = 2; + +/* Subroutine */ int _starpu_dlasda_(integer *icompq, integer *smlsiz, integer *n, + integer *sqre, doublereal *d__, doublereal *e, doublereal *u, integer + *ldu, doublereal *vt, integer *k, doublereal *difl, doublereal *difr, + doublereal *z__, doublereal *poles, integer *givptr, integer *givcol, + integer *ldgcol, integer *perm, doublereal *givnum, doublereal *c__, + doublereal *s, doublereal *work, integer *iwork, integer *info) +{ + /* System generated locals */ + integer givcol_dim1, givcol_offset, perm_dim1, perm_offset, difl_dim1, + difl_offset, difr_dim1, difr_offset, givnum_dim1, givnum_offset, + poles_dim1, poles_offset, u_dim1, u_offset, vt_dim1, vt_offset, + z_dim1, z_offset, i__1, i__2; + + /* Builtin functions */ + integer pow_ii(integer *, integer *); + + /* Local variables */ + integer i__, j, m, i1, ic, lf, nd, ll, nl, vf, nr, vl, im1, ncc, nlf, nrf, + vfi, iwk, vli, lvl, nru, ndb1, nlp1, lvl2, nrp1; + doublereal beta; + integer idxq, nlvl; + doublereal alpha; + integer inode, ndiml, ndimr, idxqi, itemp; + extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *); + integer sqrei; + extern /* Subroutine */ int _starpu_dlasd6_(integer *, integer *, integer *, + integer *, doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, integer *, integer *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + doublereal *, doublereal *, integer *, doublereal *, doublereal *, + doublereal *, integer *, integer *); + integer nwork1, nwork2; + extern /* Subroutine */ int _starpu_dlasdq_(char *, integer *, integer *, integer + *, integer *, integer *, doublereal *, doublereal *, doublereal *, + integer *, doublereal *, integer *, doublereal *, integer *, + doublereal *, integer *), _starpu_dlasdt_(integer *, integer *, + integer *, integer *, integer *, integer *, integer *), _starpu_dlaset_( + char *, integer *, integer *, doublereal *, doublereal *, + doublereal *, integer *), _starpu_xerbla_(char *, integer *); + integer smlszp; + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* Using a divide and conquer approach, DLASDA computes the singular */ +/* value decomposition (SVD) of a real upper bidiagonal N-by-M matrix */ +/* B with diagonal D and offdiagonal E, where M = N + SQRE. The */ +/* algorithm computes the singular values in the SVD B = U * S * VT. */ +/* The orthogonal matrices U and VT are optionally computed in */ +/* compact form. */ + +/* A related subroutine, DLASD0, computes the singular values and */ +/* the singular vectors in explicit form. */ + +/* Arguments */ +/* ========= */ + +/* ICOMPQ (input) INTEGER */ +/* Specifies whether singular vectors are to be computed */ +/* in compact form, as follows */ +/* = 0: Compute singular values only. */ +/* = 1: Compute singular vectors of upper bidiagonal */ +/* matrix in compact form. */ + +/* SMLSIZ (input) INTEGER */ +/* The maximum size of the subproblems at the bottom of the */ +/* computation tree. */ + +/* N (input) INTEGER */ +/* The row dimension of the upper bidiagonal matrix. This is */ +/* also the dimension of the main diagonal array D. */ + +/* SQRE (input) INTEGER */ +/* Specifies the column dimension of the bidiagonal matrix. */ +/* = 0: The bidiagonal matrix has column dimension M = N; */ +/* = 1: The bidiagonal matrix has column dimension M = N + 1. */ + +/* D (input/output) DOUBLE PRECISION array, dimension ( N ) */ +/* On entry D contains the main diagonal of the bidiagonal */ +/* matrix. On exit D, if INFO = 0, contains its singular values. */ + +/* E (input) DOUBLE PRECISION array, dimension ( M-1 ) */ +/* Contains the subdiagonal entries of the bidiagonal matrix. */ +/* On exit, E has been destroyed. */ + +/* U (output) DOUBLE PRECISION array, */ +/* dimension ( LDU, SMLSIZ ) if ICOMPQ = 1, and not referenced */ +/* if ICOMPQ = 0. If ICOMPQ = 1, on exit, U contains the left */ +/* singular vector matrices of all subproblems at the bottom */ +/* level. */ + +/* LDU (input) INTEGER, LDU = > N. */ +/* The leading dimension of arrays U, VT, DIFL, DIFR, POLES, */ +/* GIVNUM, and Z. */ + +/* VT (output) DOUBLE PRECISION array, */ +/* dimension ( LDU, SMLSIZ+1 ) if ICOMPQ = 1, and not referenced */ +/* if ICOMPQ = 0. If ICOMPQ = 1, on exit, VT' contains the right */ +/* singular vector matrices of all subproblems at the bottom */ +/* level. */ + +/* K (output) INTEGER array, */ +/* dimension ( N ) if ICOMPQ = 1 and dimension 1 if ICOMPQ = 0. */ +/* If ICOMPQ = 1, on exit, K(I) is the dimension of the I-th */ +/* secular equation on the computation tree. */ + +/* DIFL (output) DOUBLE PRECISION array, dimension ( LDU, NLVL ), */ +/* where NLVL = floor(log_2 (N/SMLSIZ))). */ + +/* DIFR (output) DOUBLE PRECISION array, */ +/* dimension ( LDU, 2 * NLVL ) if ICOMPQ = 1 and */ +/* dimension ( N ) if ICOMPQ = 0. */ +/* If ICOMPQ = 1, on exit, DIFL(1:N, I) and DIFR(1:N, 2 * I - 1) */ +/* record distances between singular values on the I-th */ +/* level and singular values on the (I -1)-th level, and */ +/* DIFR(1:N, 2 * I ) contains the normalizing factors for */ +/* the right singular vector matrix. See DLASD8 for details. */ + +/* Z (output) DOUBLE PRECISION array, */ +/* dimension ( LDU, NLVL ) if ICOMPQ = 1 and */ +/* dimension ( N ) if ICOMPQ = 0. */ +/* The first K elements of Z(1, I) contain the components of */ +/* the deflation-adjusted updating row vector for subproblems */ +/* on the I-th level. */ + +/* POLES (output) DOUBLE PRECISION array, */ +/* dimension ( LDU, 2 * NLVL ) if ICOMPQ = 1, and not referenced */ +/* if ICOMPQ = 0. If ICOMPQ = 1, on exit, POLES(1, 2*I - 1) and */ +/* POLES(1, 2*I) contain the new and old singular values */ +/* involved in the secular equations on the I-th level. */ + +/* GIVPTR (output) INTEGER array, */ +/* dimension ( N ) if ICOMPQ = 1, and not referenced if */ +/* ICOMPQ = 0. If ICOMPQ = 1, on exit, GIVPTR( I ) records */ +/* the number of Givens rotations performed on the I-th */ +/* problem on the computation tree. */ + +/* GIVCOL (output) INTEGER array, */ +/* dimension ( LDGCOL, 2 * NLVL ) if ICOMPQ = 1, and not */ +/* referenced if ICOMPQ = 0. If ICOMPQ = 1, on exit, for each I, */ +/* GIVCOL(1, 2 *I - 1) and GIVCOL(1, 2 *I) record the locations */ +/* of Givens rotations performed on the I-th level on the */ +/* computation tree. */ + +/* LDGCOL (input) INTEGER, LDGCOL = > N. */ +/* The leading dimension of arrays GIVCOL and PERM. */ + +/* PERM (output) INTEGER array, */ +/* dimension ( LDGCOL, NLVL ) if ICOMPQ = 1, and not referenced */ +/* if ICOMPQ = 0. If ICOMPQ = 1, on exit, PERM(1, I) records */ +/* permutations done on the I-th level of the computation tree. */ + +/* GIVNUM (output) DOUBLE PRECISION array, */ +/* dimension ( LDU, 2 * NLVL ) if ICOMPQ = 1, and not */ +/* referenced if ICOMPQ = 0. If ICOMPQ = 1, on exit, for each I, */ +/* GIVNUM(1, 2 *I - 1) and GIVNUM(1, 2 *I) record the C- and S- */ +/* values of Givens rotations performed on the I-th level on */ +/* the computation tree. */ + +/* C (output) DOUBLE PRECISION array, */ +/* dimension ( N ) if ICOMPQ = 1, and dimension 1 if ICOMPQ = 0. */ +/* If ICOMPQ = 1 and the I-th subproblem is not square, on exit, */ +/* C( I ) contains the C-value of a Givens rotation related to */ +/* the right null space of the I-th subproblem. */ + +/* S (output) DOUBLE PRECISION array, dimension ( N ) if */ +/* ICOMPQ = 1, and dimension 1 if ICOMPQ = 0. If ICOMPQ = 1 */ +/* and the I-th subproblem is not square, on exit, S( I ) */ +/* contains the S-value of a Givens rotation related to */ +/* the right null space of the I-th subproblem. */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension */ +/* (6 * N + (SMLSIZ + 1)*(SMLSIZ + 1)). */ + +/* IWORK (workspace) INTEGER array. */ +/* Dimension must be at least (7 * N). */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit. */ +/* < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > 0: if INFO = 1, an singular value did not converge */ + +/* Further Details */ +/* =============== */ + +/* Based on contributions by */ +/* Ming Gu and Huan Ren, Computer Science Division, University of */ +/* California at Berkeley, USA */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --d__; + --e; + givnum_dim1 = *ldu; + givnum_offset = 1 + givnum_dim1; + givnum -= givnum_offset; + poles_dim1 = *ldu; + poles_offset = 1 + poles_dim1; + poles -= poles_offset; + z_dim1 = *ldu; + z_offset = 1 + z_dim1; + z__ -= z_offset; + difr_dim1 = *ldu; + difr_offset = 1 + difr_dim1; + difr -= difr_offset; + difl_dim1 = *ldu; + difl_offset = 1 + difl_dim1; + difl -= difl_offset; + vt_dim1 = *ldu; + vt_offset = 1 + vt_dim1; + vt -= vt_offset; + u_dim1 = *ldu; + u_offset = 1 + u_dim1; + u -= u_offset; + --k; + --givptr; + perm_dim1 = *ldgcol; + perm_offset = 1 + perm_dim1; + perm -= perm_offset; + givcol_dim1 = *ldgcol; + givcol_offset = 1 + givcol_dim1; + givcol -= givcol_offset; + --c__; + --s; + --work; + --iwork; + + /* Function Body */ + *info = 0; + + if (*icompq < 0 || *icompq > 1) { + *info = -1; + } else if (*smlsiz < 3) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*sqre < 0 || *sqre > 1) { + *info = -4; + } else if (*ldu < *n + *sqre) { + *info = -8; + } else if (*ldgcol < *n) { + *info = -17; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DLASDA", &i__1); + return 0; + } + + m = *n + *sqre; + +/* If the input matrix is too small, call DLASDQ to find the SVD. */ + + if (*n <= *smlsiz) { + if (*icompq == 0) { + _starpu_dlasdq_("U", sqre, n, &c__0, &c__0, &c__0, &d__[1], &e[1], &vt[ + vt_offset], ldu, &u[u_offset], ldu, &u[u_offset], ldu, & + work[1], info); + } else { + _starpu_dlasdq_("U", sqre, n, &m, n, &c__0, &d__[1], &e[1], &vt[vt_offset] +, ldu, &u[u_offset], ldu, &u[u_offset], ldu, &work[1], + info); + } + return 0; + } + +/* Book-keeping and set up the computation tree. */ + + inode = 1; + ndiml = inode + *n; + ndimr = ndiml + *n; + idxq = ndimr + *n; + iwk = idxq + *n; + + ncc = 0; + nru = 0; + + smlszp = *smlsiz + 1; + vf = 1; + vl = vf + m; + nwork1 = vl + m; + nwork2 = nwork1 + smlszp * smlszp; + + _starpu_dlasdt_(n, &nlvl, &nd, &iwork[inode], &iwork[ndiml], &iwork[ndimr], + smlsiz); + +/* for the nodes on bottom level of the tree, solve */ +/* their subproblems by DLASDQ. */ + + ndb1 = (nd + 1) / 2; + i__1 = nd; + for (i__ = ndb1; i__ <= i__1; ++i__) { + +/* IC : center row of each node */ +/* NL : number of rows of left subproblem */ +/* NR : number of rows of right subproblem */ +/* NLF: starting row of the left subproblem */ +/* NRF: starting row of the right subproblem */ + + i1 = i__ - 1; + ic = iwork[inode + i1]; + nl = iwork[ndiml + i1]; + nlp1 = nl + 1; + nr = iwork[ndimr + i1]; + nlf = ic - nl; + nrf = ic + 1; + idxqi = idxq + nlf - 2; + vfi = vf + nlf - 1; + vli = vl + nlf - 1; + sqrei = 1; + if (*icompq == 0) { + _starpu_dlaset_("A", &nlp1, &nlp1, &c_b11, &c_b12, &work[nwork1], &smlszp); + _starpu_dlasdq_("U", &sqrei, &nl, &nlp1, &nru, &ncc, &d__[nlf], &e[nlf], & + work[nwork1], &smlszp, &work[nwork2], &nl, &work[nwork2], + &nl, &work[nwork2], info); + itemp = nwork1 + nl * smlszp; + _starpu_dcopy_(&nlp1, &work[nwork1], &c__1, &work[vfi], &c__1); + _starpu_dcopy_(&nlp1, &work[itemp], &c__1, &work[vli], &c__1); + } else { + _starpu_dlaset_("A", &nl, &nl, &c_b11, &c_b12, &u[nlf + u_dim1], ldu); + _starpu_dlaset_("A", &nlp1, &nlp1, &c_b11, &c_b12, &vt[nlf + vt_dim1], + ldu); + _starpu_dlasdq_("U", &sqrei, &nl, &nlp1, &nl, &ncc, &d__[nlf], &e[nlf], & + vt[nlf + vt_dim1], ldu, &u[nlf + u_dim1], ldu, &u[nlf + + u_dim1], ldu, &work[nwork1], info); + _starpu_dcopy_(&nlp1, &vt[nlf + vt_dim1], &c__1, &work[vfi], &c__1); + _starpu_dcopy_(&nlp1, &vt[nlf + nlp1 * vt_dim1], &c__1, &work[vli], &c__1) + ; + } + if (*info != 0) { + return 0; + } + i__2 = nl; + for (j = 1; j <= i__2; ++j) { + iwork[idxqi + j] = j; +/* L10: */ + } + if (i__ == nd && *sqre == 0) { + sqrei = 0; + } else { + sqrei = 1; + } + idxqi += nlp1; + vfi += nlp1; + vli += nlp1; + nrp1 = nr + sqrei; + if (*icompq == 0) { + _starpu_dlaset_("A", &nrp1, &nrp1, &c_b11, &c_b12, &work[nwork1], &smlszp); + _starpu_dlasdq_("U", &sqrei, &nr, &nrp1, &nru, &ncc, &d__[nrf], &e[nrf], & + work[nwork1], &smlszp, &work[nwork2], &nr, &work[nwork2], + &nr, &work[nwork2], info); + itemp = nwork1 + (nrp1 - 1) * smlszp; + _starpu_dcopy_(&nrp1, &work[nwork1], &c__1, &work[vfi], &c__1); + _starpu_dcopy_(&nrp1, &work[itemp], &c__1, &work[vli], &c__1); + } else { + _starpu_dlaset_("A", &nr, &nr, &c_b11, &c_b12, &u[nrf + u_dim1], ldu); + _starpu_dlaset_("A", &nrp1, &nrp1, &c_b11, &c_b12, &vt[nrf + vt_dim1], + ldu); + _starpu_dlasdq_("U", &sqrei, &nr, &nrp1, &nr, &ncc, &d__[nrf], &e[nrf], & + vt[nrf + vt_dim1], ldu, &u[nrf + u_dim1], ldu, &u[nrf + + u_dim1], ldu, &work[nwork1], info); + _starpu_dcopy_(&nrp1, &vt[nrf + vt_dim1], &c__1, &work[vfi], &c__1); + _starpu_dcopy_(&nrp1, &vt[nrf + nrp1 * vt_dim1], &c__1, &work[vli], &c__1) + ; + } + if (*info != 0) { + return 0; + } + i__2 = nr; + for (j = 1; j <= i__2; ++j) { + iwork[idxqi + j] = j; +/* L20: */ + } +/* L30: */ + } + +/* Now conquer each subproblem bottom-up. */ + + j = pow_ii(&c__2, &nlvl); + for (lvl = nlvl; lvl >= 1; --lvl) { + lvl2 = (lvl << 1) - 1; + +/* Find the first node LF and last node LL on */ +/* the current level LVL. */ + + if (lvl == 1) { + lf = 1; + ll = 1; + } else { + i__1 = lvl - 1; + lf = pow_ii(&c__2, &i__1); + ll = (lf << 1) - 1; + } + i__1 = ll; + for (i__ = lf; i__ <= i__1; ++i__) { + im1 = i__ - 1; + ic = iwork[inode + im1]; + nl = iwork[ndiml + im1]; + nr = iwork[ndimr + im1]; + nlf = ic - nl; + nrf = ic + 1; + if (i__ == ll) { + sqrei = *sqre; + } else { + sqrei = 1; + } + vfi = vf + nlf - 1; + vli = vl + nlf - 1; + idxqi = idxq + nlf - 1; + alpha = d__[ic]; + beta = e[ic]; + if (*icompq == 0) { + _starpu_dlasd6_(icompq, &nl, &nr, &sqrei, &d__[nlf], &work[vfi], & + work[vli], &alpha, &beta, &iwork[idxqi], &perm[ + perm_offset], &givptr[1], &givcol[givcol_offset], + ldgcol, &givnum[givnum_offset], ldu, &poles[ + poles_offset], &difl[difl_offset], &difr[difr_offset], + &z__[z_offset], &k[1], &c__[1], &s[1], &work[nwork1], + &iwork[iwk], info); + } else { + --j; + _starpu_dlasd6_(icompq, &nl, &nr, &sqrei, &d__[nlf], &work[vfi], & + work[vli], &alpha, &beta, &iwork[idxqi], &perm[nlf + + lvl * perm_dim1], &givptr[j], &givcol[nlf + lvl2 * + givcol_dim1], ldgcol, &givnum[nlf + lvl2 * + givnum_dim1], ldu, &poles[nlf + lvl2 * poles_dim1], & + difl[nlf + lvl * difl_dim1], &difr[nlf + lvl2 * + difr_dim1], &z__[nlf + lvl * z_dim1], &k[j], &c__[j], + &s[j], &work[nwork1], &iwork[iwk], info); + } + if (*info != 0) { + return 0; + } +/* L40: */ + } +/* L50: */ + } + + return 0; + +/* End of DLASDA */ + +} /* _starpu_dlasda_ */ diff --git a/min-dgels/base/SRC/dlasdq.c b/min-dgels/base/SRC/dlasdq.c new file mode 100644 index 0000000..ea2086f --- /dev/null +++ b/min-dgels/base/SRC/dlasdq.c @@ -0,0 +1,380 @@ +/* dlasdq.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +/* Subroutine */ int _starpu_dlasdq_(char *uplo, integer *sqre, integer *n, integer * + ncvt, integer *nru, integer *ncc, doublereal *d__, doublereal *e, + doublereal *vt, integer *ldvt, doublereal *u, integer *ldu, + doublereal *c__, integer *ldc, doublereal *work, integer *info) +{ + /* System generated locals */ + integer c_dim1, c_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1, + i__2; + + /* Local variables */ + integer i__, j; + doublereal r__, cs, sn; + integer np1, isub; + doublereal smin; + integer sqre1; + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int _starpu_dlasr_(char *, char *, char *, integer *, + integer *, doublereal *, doublereal *, doublereal *, integer *), _starpu_dswap_(integer *, doublereal *, integer * +, doublereal *, integer *); + integer iuplo; + extern /* Subroutine */ int _starpu_dlartg_(doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *), _starpu_xerbla_(char *, + integer *), _starpu_dbdsqr_(char *, integer *, integer *, integer + *, integer *, doublereal *, doublereal *, doublereal *, integer *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *); + logical rotate; + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLASDQ computes the singular value decomposition (SVD) of a real */ +/* (upper or lower) bidiagonal matrix with diagonal D and offdiagonal */ +/* E, accumulating the transformations if desired. Letting B denote */ +/* the input bidiagonal matrix, the algorithm computes orthogonal */ +/* matrices Q and P such that B = Q * S * P' (P' denotes the transpose */ +/* of P). The singular values S are overwritten on D. */ + +/* The input matrix U is changed to U * Q if desired. */ +/* The input matrix VT is changed to P' * VT if desired. */ +/* The input matrix C is changed to Q' * C if desired. */ + +/* See "Computing Small Singular Values of Bidiagonal Matrices With */ +/* Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan, */ +/* LAPACK Working Note #3, for a detailed description of the algorithm. */ + +/* Arguments */ +/* ========= */ + +/* UPLO (input) CHARACTER*1 */ +/* On entry, UPLO specifies whether the input bidiagonal matrix */ +/* is upper or lower bidiagonal, and wether it is square are */ +/* not. */ +/* UPLO = 'U' or 'u' B is upper bidiagonal. */ +/* UPLO = 'L' or 'l' B is lower bidiagonal. */ + +/* SQRE (input) INTEGER */ +/* = 0: then the input matrix is N-by-N. */ +/* = 1: then the input matrix is N-by-(N+1) if UPLU = 'U' and */ +/* (N+1)-by-N if UPLU = 'L'. */ + +/* The bidiagonal matrix has */ +/* N = NL + NR + 1 rows and */ +/* M = N + SQRE >= N columns. */ + +/* N (input) INTEGER */ +/* On entry, N specifies the number of rows and columns */ +/* in the matrix. N must be at least 0. */ + +/* NCVT (input) INTEGER */ +/* On entry, NCVT specifies the number of columns of */ +/* the matrix VT. NCVT must be at least 0. */ + +/* NRU (input) INTEGER */ +/* On entry, NRU specifies the number of rows of */ +/* the matrix U. NRU must be at least 0. */ + +/* NCC (input) INTEGER */ +/* On entry, NCC specifies the number of columns of */ +/* the matrix C. NCC must be at least 0. */ + +/* D (input/output) DOUBLE PRECISION array, dimension (N) */ +/* On entry, D contains the diagonal entries of the */ +/* bidiagonal matrix whose SVD is desired. On normal exit, */ +/* D contains the singular values in ascending order. */ + +/* E (input/output) DOUBLE PRECISION array. */ +/* dimension is (N-1) if SQRE = 0 and N if SQRE = 1. */ +/* On entry, the entries of E contain the offdiagonal entries */ +/* of the bidiagonal matrix whose SVD is desired. On normal */ +/* exit, E will contain 0. If the algorithm does not converge, */ +/* D and E will contain the diagonal and superdiagonal entries */ +/* of a bidiagonal matrix orthogonally equivalent to the one */ +/* given as input. */ + +/* VT (input/output) DOUBLE PRECISION array, dimension (LDVT, NCVT) */ +/* On entry, contains a matrix which on exit has been */ +/* premultiplied by P', dimension N-by-NCVT if SQRE = 0 */ +/* and (N+1)-by-NCVT if SQRE = 1 (not referenced if NCVT=0). */ + +/* LDVT (input) INTEGER */ +/* On entry, LDVT specifies the leading dimension of VT as */ +/* declared in the calling (sub) program. LDVT must be at */ +/* least 1. If NCVT is nonzero LDVT must also be at least N. */ + +/* U (input/output) DOUBLE PRECISION array, dimension (LDU, N) */ +/* On entry, contains a matrix which on exit has been */ +/* postmultiplied by Q, dimension NRU-by-N if SQRE = 0 */ +/* and NRU-by-(N+1) if SQRE = 1 (not referenced if NRU=0). */ + +/* LDU (input) INTEGER */ +/* On entry, LDU specifies the leading dimension of U as */ +/* declared in the calling (sub) program. LDU must be at */ +/* least max( 1, NRU ) . */ + +/* C (input/output) DOUBLE PRECISION array, dimension (LDC, NCC) */ +/* On entry, contains an N-by-NCC matrix which on exit */ +/* has been premultiplied by Q' dimension N-by-NCC if SQRE = 0 */ +/* and (N+1)-by-NCC if SQRE = 1 (not referenced if NCC=0). */ + +/* LDC (input) INTEGER */ +/* On entry, LDC specifies the leading dimension of C as */ +/* declared in the calling (sub) program. LDC must be at */ +/* least 1. If NCC is nonzero, LDC must also be at least N. */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (4*N) */ +/* Workspace. Only referenced if one of NCVT, NRU, or NCC is */ +/* nonzero, and if N is at least 2. */ + +/* INFO (output) INTEGER */ +/* On exit, a value of 0 indicates a successful exit. */ +/* If INFO < 0, argument number -INFO is illegal. */ +/* If INFO > 0, the algorithm did not converge, and INFO */ +/* specifies how many superdiagonals did not converge. */ + +/* Further Details */ +/* =============== */ + +/* Based on contributions by */ +/* Ming Gu and Huan Ren, Computer Science Division, University of */ +/* California at Berkeley, USA */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --d__; + --e; + vt_dim1 = *ldvt; + vt_offset = 1 + vt_dim1; + vt -= vt_offset; + u_dim1 = *ldu; + u_offset = 1 + u_dim1; + u -= u_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + iuplo = 0; + if (_starpu_lsame_(uplo, "U")) { + iuplo = 1; + } + if (_starpu_lsame_(uplo, "L")) { + iuplo = 2; + } + if (iuplo == 0) { + *info = -1; + } else if (*sqre < 0 || *sqre > 1) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*ncvt < 0) { + *info = -4; + } else if (*nru < 0) { + *info = -5; + } else if (*ncc < 0) { + *info = -6; + } else if (*ncvt == 0 && *ldvt < 1 || *ncvt > 0 && *ldvt < max(1,*n)) { + *info = -10; + } else if (*ldu < max(1,*nru)) { + *info = -12; + } else if (*ncc == 0 && *ldc < 1 || *ncc > 0 && *ldc < max(1,*n)) { + *info = -14; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DLASDQ", &i__1); + return 0; + } + if (*n == 0) { + return 0; + } + +/* ROTATE is true if any singular vectors desired, false otherwise */ + + rotate = *ncvt > 0 || *nru > 0 || *ncc > 0; + np1 = *n + 1; + sqre1 = *sqre; + +/* If matrix non-square upper bidiagonal, rotate to be lower */ +/* bidiagonal. The rotations are on the right. */ + + if (iuplo == 1 && sqre1 == 1) { + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + _starpu_dlartg_(&d__[i__], &e[i__], &cs, &sn, &r__); + d__[i__] = r__; + e[i__] = sn * d__[i__ + 1]; + d__[i__ + 1] = cs * d__[i__ + 1]; + if (rotate) { + work[i__] = cs; + work[*n + i__] = sn; + } +/* L10: */ + } + _starpu_dlartg_(&d__[*n], &e[*n], &cs, &sn, &r__); + d__[*n] = r__; + e[*n] = 0.; + if (rotate) { + work[*n] = cs; + work[*n + *n] = sn; + } + iuplo = 2; + sqre1 = 0; + +/* Update singular vectors if desired. */ + + if (*ncvt > 0) { + _starpu_dlasr_("L", "V", "F", &np1, ncvt, &work[1], &work[np1], &vt[ + vt_offset], ldvt); + } + } + +/* If matrix lower bidiagonal, rotate to be upper bidiagonal */ +/* by applying Givens rotations on the left. */ + + if (iuplo == 2) { + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + _starpu_dlartg_(&d__[i__], &e[i__], &cs, &sn, &r__); + d__[i__] = r__; + e[i__] = sn * d__[i__ + 1]; + d__[i__ + 1] = cs * d__[i__ + 1]; + if (rotate) { + work[i__] = cs; + work[*n + i__] = sn; + } +/* L20: */ + } + +/* If matrix (N+1)-by-N lower bidiagonal, one additional */ +/* rotation is needed. */ + + if (sqre1 == 1) { + _starpu_dlartg_(&d__[*n], &e[*n], &cs, &sn, &r__); + d__[*n] = r__; + if (rotate) { + work[*n] = cs; + work[*n + *n] = sn; + } + } + +/* Update singular vectors if desired. */ + + if (*nru > 0) { + if (sqre1 == 0) { + _starpu_dlasr_("R", "V", "F", nru, n, &work[1], &work[np1], &u[ + u_offset], ldu); + } else { + _starpu_dlasr_("R", "V", "F", nru, &np1, &work[1], &work[np1], &u[ + u_offset], ldu); + } + } + if (*ncc > 0) { + if (sqre1 == 0) { + _starpu_dlasr_("L", "V", "F", n, ncc, &work[1], &work[np1], &c__[ + c_offset], ldc); + } else { + _starpu_dlasr_("L", "V", "F", &np1, ncc, &work[1], &work[np1], &c__[ + c_offset], ldc); + } + } + } + +/* Call DBDSQR to compute the SVD of the reduced real */ +/* N-by-N upper bidiagonal matrix. */ + + _starpu_dbdsqr_("U", n, ncvt, nru, ncc, &d__[1], &e[1], &vt[vt_offset], ldvt, &u[ + u_offset], ldu, &c__[c_offset], ldc, &work[1], info); + +/* Sort the singular values into ascending order (insertion sort on */ +/* singular values, but only one transposition per singular vector) */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Scan for smallest D(I). */ + + isub = i__; + smin = d__[i__]; + i__2 = *n; + for (j = i__ + 1; j <= i__2; ++j) { + if (d__[j] < smin) { + isub = j; + smin = d__[j]; + } +/* L30: */ + } + if (isub != i__) { + +/* Swap singular values and vectors. */ + + d__[isub] = d__[i__]; + d__[i__] = smin; + if (*ncvt > 0) { + _starpu_dswap_(ncvt, &vt[isub + vt_dim1], ldvt, &vt[i__ + vt_dim1], + ldvt); + } + if (*nru > 0) { + _starpu_dswap_(nru, &u[isub * u_dim1 + 1], &c__1, &u[i__ * u_dim1 + 1] +, &c__1); + } + if (*ncc > 0) { + _starpu_dswap_(ncc, &c__[isub + c_dim1], ldc, &c__[i__ + c_dim1], ldc) + ; + } + } +/* L40: */ + } + + return 0; + +/* End of DLASDQ */ + +} /* _starpu_dlasdq_ */ diff --git a/min-dgels/base/SRC/dlasdt.c b/min-dgels/base/SRC/dlasdt.c new file mode 100644 index 0000000..c71d594 --- /dev/null +++ b/min-dgels/base/SRC/dlasdt.c @@ -0,0 +1,136 @@ +/* dlasdt.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dlasdt_(integer *n, integer *lvl, integer *nd, integer * + inode, integer *ndiml, integer *ndimr, integer *msub) +{ + /* System generated locals */ + integer i__1, i__2; + + /* Builtin functions */ + double log(doublereal); + + /* Local variables */ + integer i__, il, ir, maxn; + doublereal temp; + integer nlvl, llst, ncrnt; + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLASDT creates a tree of subproblems for bidiagonal divide and */ +/* conquer. */ + +/* Arguments */ +/* ========= */ + +/* N (input) INTEGER */ +/* On entry, the number of diagonal elements of the */ +/* bidiagonal matrix. */ + +/* LVL (output) INTEGER */ +/* On exit, the number of levels on the computation tree. */ + +/* ND (output) INTEGER */ +/* On exit, the number of nodes on the tree. */ + +/* INODE (output) INTEGER array, dimension ( N ) */ +/* On exit, centers of subproblems. */ + +/* NDIML (output) INTEGER array, dimension ( N ) */ +/* On exit, row dimensions of left children. */ + +/* NDIMR (output) INTEGER array, dimension ( N ) */ +/* On exit, row dimensions of right children. */ + +/* MSUB (input) INTEGER. */ +/* On entry, the maximum row dimension each subproblem at the */ +/* bottom of the tree can be of. */ + +/* Further Details */ +/* =============== */ + +/* Based on contributions by */ +/* Ming Gu and Huan Ren, Computer Science Division, University of */ +/* California at Berkeley, USA */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Find the number of levels on the tree. */ + + /* Parameter adjustments */ + --ndimr; + --ndiml; + --inode; + + /* Function Body */ + maxn = max(1,*n); + temp = log((doublereal) maxn / (doublereal) (*msub + 1)) / log(2.); + *lvl = (integer) temp + 1; + + i__ = *n / 2; + inode[1] = i__ + 1; + ndiml[1] = i__; + ndimr[1] = *n - i__ - 1; + il = 0; + ir = 1; + llst = 1; + i__1 = *lvl - 1; + for (nlvl = 1; nlvl <= i__1; ++nlvl) { + +/* Constructing the tree at (NLVL+1)-st level. The number of */ +/* nodes created on this level is LLST * 2. */ + + i__2 = llst - 1; + for (i__ = 0; i__ <= i__2; ++i__) { + il += 2; + ir += 2; + ncrnt = llst + i__; + ndiml[il] = ndiml[ncrnt] / 2; + ndimr[il] = ndiml[ncrnt] - ndiml[il] - 1; + inode[il] = inode[ncrnt] - ndimr[il] - 1; + ndiml[ir] = ndimr[ncrnt] / 2; + ndimr[ir] = ndimr[ncrnt] - ndiml[ir] - 1; + inode[ir] = inode[ncrnt] + ndiml[ir] + 1; +/* L10: */ + } + llst <<= 1; +/* L20: */ + } + *nd = (llst << 1) - 1; + + return 0; + +/* End of DLASDT */ + +} /* _starpu_dlasdt_ */ diff --git a/min-dgels/base/SRC/dlaset.c b/min-dgels/base/SRC/dlaset.c new file mode 100644 index 0000000..82e4676 --- /dev/null +++ b/min-dgels/base/SRC/dlaset.c @@ -0,0 +1,152 @@ +/* dlaset.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dlaset_(char *uplo, integer *m, integer *n, doublereal * + alpha, doublereal *beta, doublereal *a, integer *lda) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + integer i__, j; + extern logical _starpu_lsame_(char *, char *); + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLASET initializes an m-by-n matrix A to BETA on the diagonal and */ +/* ALPHA on the offdiagonals. */ + +/* Arguments */ +/* ========= */ + +/* UPLO (input) CHARACTER*1 */ +/* Specifies the part of the matrix A to be set. */ +/* = 'U': Upper triangular part is set; the strictly lower */ +/* triangular part of A is not changed. */ +/* = 'L': Lower triangular part is set; the strictly upper */ +/* triangular part of A is not changed. */ +/* Otherwise: All of the matrix A is set. */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix A. M >= 0. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix A. N >= 0. */ + +/* ALPHA (input) DOUBLE PRECISION */ +/* The constant to which the offdiagonal elements are to be set. */ + +/* BETA (input) DOUBLE PRECISION */ +/* The constant to which the diagonal elements are to be set. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On exit, the leading m-by-n submatrix of A is set as follows: */ + +/* if UPLO = 'U', A(i,j) = ALPHA, 1<=i<=j-1, 1<=j<=n, */ +/* if UPLO = 'L', A(i,j) = ALPHA, j+1<=i<=m, 1<=j<=n, */ +/* otherwise, A(i,j) = ALPHA, 1<=i<=m, 1<=j<=n, i.ne.j, */ + +/* and, for all UPLO, A(i,i) = BETA, 1<=i<=min(m,n). */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,M). */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + if (_starpu_lsame_(uplo, "U")) { + +/* Set the strictly upper triangular or trapezoidal part of the */ +/* array to ALPHA. */ + + i__1 = *n; + for (j = 2; j <= i__1; ++j) { +/* Computing MIN */ + i__3 = j - 1; + i__2 = min(i__3,*m); + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = *alpha; +/* L10: */ + } +/* L20: */ + } + + } else if (_starpu_lsame_(uplo, "L")) { + +/* Set the strictly lower triangular or trapezoidal part of the */ +/* array to ALPHA. */ + + i__1 = min(*m,*n); + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = j + 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = *alpha; +/* L30: */ + } +/* L40: */ + } + + } else { + +/* Set the leading m-by-n submatrix to ALPHA. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = *alpha; +/* L50: */ + } +/* L60: */ + } + } + +/* Set the first min(M,N) diagonal elements to BETA. */ + + i__1 = min(*m,*n); + for (i__ = 1; i__ <= i__1; ++i__) { + a[i__ + i__ * a_dim1] = *beta; +/* L70: */ + } + + return 0; + +/* End of DLASET */ + +} /* _starpu_dlaset_ */ diff --git a/min-dgels/base/SRC/dlasq1.c b/min-dgels/base/SRC/dlasq1.c new file mode 100644 index 0000000..8a8f5d1 --- /dev/null +++ b/min-dgels/base/SRC/dlasq1.c @@ -0,0 +1,219 @@ +/* dlasq1.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c__2 = 2; +static integer c__0 = 0; + +/* Subroutine */ int _starpu_dlasq1_(integer *n, doublereal *d__, doublereal *e, + doublereal *work, integer *info) +{ + /* System generated locals */ + integer i__1, i__2; + doublereal d__1, d__2, d__3; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + integer i__; + doublereal eps; + extern /* Subroutine */ int _starpu_dlas2_(doublereal *, doublereal *, doublereal + *, doublereal *, doublereal *); + doublereal scale; + integer iinfo; + doublereal sigmn; + extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *); + doublereal sigmx; + extern /* Subroutine */ int _starpu_dlasq2_(integer *, doublereal *, integer *); + extern doublereal _starpu_dlamch_(char *); + extern /* Subroutine */ int _starpu_dlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *); + doublereal safmin; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *), _starpu_dlasrt_( + char *, integer *, doublereal *, integer *); + + +/* -- LAPACK routine (version 3.2) -- */ + +/* -- Contributed by Osni Marques of the Lawrence Berkeley National -- */ +/* -- Laboratory and Beresford Parlett of the Univ. of California at -- */ +/* -- Berkeley -- */ +/* -- November 2008 -- */ + +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLASQ1 computes the singular values of a real N-by-N bidiagonal */ +/* matrix with diagonal D and off-diagonal E. The singular values */ +/* are computed to high relative accuracy, in the absence of */ +/* denormalization, underflow and overflow. The algorithm was first */ +/* presented in */ + +/* "Accurate singular values and differential qd algorithms" by K. V. */ +/* Fernando and B. N. Parlett, Numer. Math., Vol-67, No. 2, pp. 191-230, */ +/* 1994, */ + +/* and the present implementation is described in "An implementation of */ +/* the dqds Algorithm (Positive Case)", LAPACK Working Note. */ + +/* Arguments */ +/* ========= */ + +/* N (input) INTEGER */ +/* The number of rows and columns in the matrix. N >= 0. */ + +/* D (input/output) DOUBLE PRECISION array, dimension (N) */ +/* On entry, D contains the diagonal elements of the */ +/* bidiagonal matrix whose SVD is desired. On normal exit, */ +/* D contains the singular values in decreasing order. */ + +/* E (input/output) DOUBLE PRECISION array, dimension (N) */ +/* On entry, elements E(1:N-1) contain the off-diagonal elements */ +/* of the bidiagonal matrix whose SVD is desired. */ +/* On exit, E is overwritten. */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (4*N) */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > 0: the algorithm failed */ +/* = 1, a split was marked by a positive value in E */ +/* = 2, current block of Z not diagonalized after 30*N */ +/* iterations (in inner while loop) */ +/* = 3, termination criterion of outer while loop not met */ +/* (program created more than N unreduced blocks) */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + --work; + --e; + --d__; + + /* Function Body */ + *info = 0; + if (*n < 0) { + *info = -2; + i__1 = -(*info); + _starpu_xerbla_("DLASQ1", &i__1); + return 0; + } else if (*n == 0) { + return 0; + } else if (*n == 1) { + d__[1] = abs(d__[1]); + return 0; + } else if (*n == 2) { + _starpu_dlas2_(&d__[1], &e[1], &d__[2], &sigmn, &sigmx); + d__[1] = sigmx; + d__[2] = sigmn; + return 0; + } + +/* Estimate the largest singular value. */ + + sigmx = 0.; + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + d__[i__] = (d__1 = d__[i__], abs(d__1)); +/* Computing MAX */ + d__2 = sigmx, d__3 = (d__1 = e[i__], abs(d__1)); + sigmx = max(d__2,d__3); +/* L10: */ + } + d__[*n] = (d__1 = d__[*n], abs(d__1)); + +/* Early return if SIGMX is zero (matrix is already diagonal). */ + + if (sigmx == 0.) { + _starpu_dlasrt_("D", n, &d__[1], &iinfo); + return 0; + } + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing MAX */ + d__1 = sigmx, d__2 = d__[i__]; + sigmx = max(d__1,d__2); +/* L20: */ + } + +/* Copy D and E into WORK (in the Z format) and scale (squaring the */ +/* input data makes scaling by a power of the radix pointless). */ + + eps = _starpu_dlamch_("Precision"); + safmin = _starpu_dlamch_("Safe minimum"); + scale = sqrt(eps / safmin); + _starpu_dcopy_(n, &d__[1], &c__1, &work[1], &c__2); + i__1 = *n - 1; + _starpu_dcopy_(&i__1, &e[1], &c__1, &work[2], &c__2); + i__1 = (*n << 1) - 1; + i__2 = (*n << 1) - 1; + _starpu_dlascl_("G", &c__0, &c__0, &sigmx, &scale, &i__1, &c__1, &work[1], &i__2, + &iinfo); + +/* Compute the q's and e's. */ + + i__1 = (*n << 1) - 1; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing 2nd power */ + d__1 = work[i__]; + work[i__] = d__1 * d__1; +/* L30: */ + } + work[*n * 2] = 0.; + + _starpu_dlasq2_(n, &work[1], info); + + if (*info == 0) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + d__[i__] = sqrt(work[i__]); +/* L40: */ + } + _starpu_dlascl_("G", &c__0, &c__0, &scale, &sigmx, n, &c__1, &d__[1], n, & + iinfo); + } + + return 0; + +/* End of DLASQ1 */ + +} /* _starpu_dlasq1_ */ diff --git a/min-dgels/base/SRC/dlasq2.c b/min-dgels/base/SRC/dlasq2.c new file mode 100644 index 0000000..2480ed5 --- /dev/null +++ b/min-dgels/base/SRC/dlasq2.c @@ -0,0 +1,602 @@ +/* dlasq2.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c__2 = 2; +static integer c__10 = 10; +static integer c__3 = 3; +static integer c__4 = 4; +static integer c__11 = 11; + +/* Subroutine */ int _starpu_dlasq2_(integer *n, doublereal *z__, integer *info) +{ + /* System generated locals */ + integer i__1, i__2, i__3; + doublereal d__1, d__2; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + doublereal d__, e, g; + integer k; + doublereal s, t; + integer i0, i4, n0; + doublereal dn; + integer pp; + doublereal dn1, dn2, dee, eps, tau, tol; + integer ipn4; + doublereal tol2; + logical ieee; + integer nbig; + doublereal dmin__, emin, emax; + integer kmin, ndiv, iter; + doublereal qmin, temp, qmax, zmax; + integer splt; + doublereal dmin1, dmin2; + integer nfail; + doublereal desig, trace, sigma; + integer iinfo, ttype; + extern /* Subroutine */ int _starpu_dlasq3_(integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, doublereal *, doublereal *, + integer *, integer *, integer *, logical *, integer *, + doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *); + extern doublereal _starpu_dlamch_(char *); + doublereal deemin; + integer iwhila, iwhilb; + doublereal oldemn, safmin; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *); + extern /* Subroutine */ int _starpu_dlasrt_(char *, integer *, doublereal *, + integer *); + + +/* -- LAPACK routine (version 3.2) -- */ + +/* -- Contributed by Osni Marques of the Lawrence Berkeley National -- */ +/* -- Laboratory and Beresford Parlett of the Univ. of California at -- */ +/* -- Berkeley -- */ +/* -- November 2008 -- */ + +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLASQ2 computes all the eigenvalues of the symmetric positive */ +/* definite tridiagonal matrix associated with the qd array Z to high */ +/* relative accuracy are computed to high relative accuracy, in the */ +/* absence of denormalization, underflow and overflow. */ + +/* To see the relation of Z to the tridiagonal matrix, let L be a */ +/* unit lower bidiagonal matrix with subdiagonals Z(2,4,6,,..) and */ +/* let U be an upper bidiagonal matrix with 1's above and diagonal */ +/* Z(1,3,5,,..). The tridiagonal is L*U or, if you prefer, the */ +/* symmetric tridiagonal to which it is similar. */ + +/* Note : DLASQ2 defines a logical variable, IEEE, which is true */ +/* on machines which follow ieee-754 floating-point standard in their */ +/* handling of infinities and NaNs, and false otherwise. This variable */ +/* is passed to DLASQ3. */ + +/* Arguments */ +/* ========= */ + +/* N (input) INTEGER */ +/* The number of rows and columns in the matrix. N >= 0. */ + +/* Z (input/output) DOUBLE PRECISION array, dimension ( 4*N ) */ +/* On entry Z holds the qd array. On exit, entries 1 to N hold */ +/* the eigenvalues in decreasing order, Z( 2*N+1 ) holds the */ +/* trace, and Z( 2*N+2 ) holds the sum of the eigenvalues. If */ +/* N > 2, then Z( 2*N+3 ) holds the iteration count, Z( 2*N+4 ) */ +/* holds NDIVS/NIN^2, and Z( 2*N+5 ) holds the percentage of */ +/* shifts that failed. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if the i-th argument is a scalar and had an illegal */ +/* value, then INFO = -i, if the i-th argument is an */ +/* array and the j-entry had an illegal value, then */ +/* INFO = -(i*100+j) */ +/* > 0: the algorithm failed */ +/* = 1, a split was marked by a positive value in E */ +/* = 2, current block of Z not diagonalized after 30*N */ +/* iterations (in inner while loop) */ +/* = 3, termination criterion of outer while loop not met */ +/* (program created more than N unreduced blocks) */ + +/* Further Details */ +/* =============== */ +/* Local Variables: I0:N0 defines a current unreduced segment of Z. */ +/* The shifts are accumulated in SIGMA. Iteration count is in ITER. */ +/* Ping-pong is controlled by PP (alternates between 0 and 1). */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments. */ +/* (in case DLASQ2 is not called by DLASQ1) */ + + /* Parameter adjustments */ + --z__; + + /* Function Body */ + *info = 0; + eps = _starpu_dlamch_("Precision"); + safmin = _starpu_dlamch_("Safe minimum"); + tol = eps * 100.; +/* Computing 2nd power */ + d__1 = tol; + tol2 = d__1 * d__1; + + if (*n < 0) { + *info = -1; + _starpu_xerbla_("DLASQ2", &c__1); + return 0; + } else if (*n == 0) { + return 0; + } else if (*n == 1) { + +/* 1-by-1 case. */ + + if (z__[1] < 0.) { + *info = -201; + _starpu_xerbla_("DLASQ2", &c__2); + } + return 0; + } else if (*n == 2) { + +/* 2-by-2 case. */ + + if (z__[2] < 0. || z__[3] < 0.) { + *info = -2; + _starpu_xerbla_("DLASQ2", &c__2); + return 0; + } else if (z__[3] > z__[1]) { + d__ = z__[3]; + z__[3] = z__[1]; + z__[1] = d__; + } + z__[5] = z__[1] + z__[2] + z__[3]; + if (z__[2] > z__[3] * tol2) { + t = (z__[1] - z__[3] + z__[2]) * .5; + s = z__[3] * (z__[2] / t); + if (s <= t) { + s = z__[3] * (z__[2] / (t * (sqrt(s / t + 1.) + 1.))); + } else { + s = z__[3] * (z__[2] / (t + sqrt(t) * sqrt(t + s))); + } + t = z__[1] + (s + z__[2]); + z__[3] *= z__[1] / t; + z__[1] = t; + } + z__[2] = z__[3]; + z__[6] = z__[2] + z__[1]; + return 0; + } + +/* Check for negative data and compute sums of q's and e's. */ + + z__[*n * 2] = 0.; + emin = z__[2]; + qmax = 0.; + zmax = 0.; + d__ = 0.; + e = 0.; + + i__1 = *n - 1 << 1; + for (k = 1; k <= i__1; k += 2) { + if (z__[k] < 0.) { + *info = -(k + 200); + _starpu_xerbla_("DLASQ2", &c__2); + return 0; + } else if (z__[k + 1] < 0.) { + *info = -(k + 201); + _starpu_xerbla_("DLASQ2", &c__2); + return 0; + } + d__ += z__[k]; + e += z__[k + 1]; +/* Computing MAX */ + d__1 = qmax, d__2 = z__[k]; + qmax = max(d__1,d__2); +/* Computing MIN */ + d__1 = emin, d__2 = z__[k + 1]; + emin = min(d__1,d__2); +/* Computing MAX */ + d__1 = max(qmax,zmax), d__2 = z__[k + 1]; + zmax = max(d__1,d__2); +/* L10: */ + } + if (z__[(*n << 1) - 1] < 0.) { + *info = -((*n << 1) + 199); + _starpu_xerbla_("DLASQ2", &c__2); + return 0; + } + d__ += z__[(*n << 1) - 1]; +/* Computing MAX */ + d__1 = qmax, d__2 = z__[(*n << 1) - 1]; + qmax = max(d__1,d__2); + zmax = max(qmax,zmax); + +/* Check for diagonality. */ + + if (e == 0.) { + i__1 = *n; + for (k = 2; k <= i__1; ++k) { + z__[k] = z__[(k << 1) - 1]; +/* L20: */ + } + _starpu_dlasrt_("D", n, &z__[1], &iinfo); + z__[(*n << 1) - 1] = d__; + return 0; + } + + trace = d__ + e; + +/* Check for zero data. */ + + if (trace == 0.) { + z__[(*n << 1) - 1] = 0.; + return 0; + } + +/* Check whether the machine is IEEE conformable. */ + + ieee = _starpu_ilaenv_(&c__10, "DLASQ2", "N", &c__1, &c__2, &c__3, &c__4) == 1 && _starpu_ilaenv_(&c__11, "DLASQ2", "N", &c__1, &c__2, + &c__3, &c__4) == 1; + +/* Rearrange data for locality: Z=(q1,qq1,e1,ee1,q2,qq2,e2,ee2,...). */ + + for (k = *n << 1; k >= 2; k += -2) { + z__[k * 2] = 0.; + z__[(k << 1) - 1] = z__[k]; + z__[(k << 1) - 2] = 0.; + z__[(k << 1) - 3] = z__[k - 1]; +/* L30: */ + } + + i0 = 1; + n0 = *n; + +/* Reverse the qd-array, if warranted. */ + + if (z__[(i0 << 2) - 3] * 1.5 < z__[(n0 << 2) - 3]) { + ipn4 = i0 + n0 << 2; + i__1 = i0 + n0 - 1 << 1; + for (i4 = i0 << 2; i4 <= i__1; i4 += 4) { + temp = z__[i4 - 3]; + z__[i4 - 3] = z__[ipn4 - i4 - 3]; + z__[ipn4 - i4 - 3] = temp; + temp = z__[i4 - 1]; + z__[i4 - 1] = z__[ipn4 - i4 - 5]; + z__[ipn4 - i4 - 5] = temp; +/* L40: */ + } + } + +/* Initial split checking via dqd and Li's test. */ + + pp = 0; + + for (k = 1; k <= 2; ++k) { + + d__ = z__[(n0 << 2) + pp - 3]; + i__1 = (i0 << 2) + pp; + for (i4 = (n0 - 1 << 2) + pp; i4 >= i__1; i4 += -4) { + if (z__[i4 - 1] <= tol2 * d__) { + z__[i4 - 1] = -0.; + d__ = z__[i4 - 3]; + } else { + d__ = z__[i4 - 3] * (d__ / (d__ + z__[i4 - 1])); + } +/* L50: */ + } + +/* dqd maps Z to ZZ plus Li's test. */ + + emin = z__[(i0 << 2) + pp + 1]; + d__ = z__[(i0 << 2) + pp - 3]; + i__1 = (n0 - 1 << 2) + pp; + for (i4 = (i0 << 2) + pp; i4 <= i__1; i4 += 4) { + z__[i4 - (pp << 1) - 2] = d__ + z__[i4 - 1]; + if (z__[i4 - 1] <= tol2 * d__) { + z__[i4 - 1] = -0.; + z__[i4 - (pp << 1) - 2] = d__; + z__[i4 - (pp << 1)] = 0.; + d__ = z__[i4 + 1]; + } else if (safmin * z__[i4 + 1] < z__[i4 - (pp << 1) - 2] && + safmin * z__[i4 - (pp << 1) - 2] < z__[i4 + 1]) { + temp = z__[i4 + 1] / z__[i4 - (pp << 1) - 2]; + z__[i4 - (pp << 1)] = z__[i4 - 1] * temp; + d__ *= temp; + } else { + z__[i4 - (pp << 1)] = z__[i4 + 1] * (z__[i4 - 1] / z__[i4 - ( + pp << 1) - 2]); + d__ = z__[i4 + 1] * (d__ / z__[i4 - (pp << 1) - 2]); + } +/* Computing MIN */ + d__1 = emin, d__2 = z__[i4 - (pp << 1)]; + emin = min(d__1,d__2); +/* L60: */ + } + z__[(n0 << 2) - pp - 2] = d__; + +/* Now find qmax. */ + + qmax = z__[(i0 << 2) - pp - 2]; + i__1 = (n0 << 2) - pp - 2; + for (i4 = (i0 << 2) - pp + 2; i4 <= i__1; i4 += 4) { +/* Computing MAX */ + d__1 = qmax, d__2 = z__[i4]; + qmax = max(d__1,d__2); +/* L70: */ + } + +/* Prepare for the next iteration on K. */ + + pp = 1 - pp; +/* L80: */ + } + +/* Initialise variables to pass to DLASQ3. */ + + ttype = 0; + dmin1 = 0.; + dmin2 = 0.; + dn = 0.; + dn1 = 0.; + dn2 = 0.; + g = 0.; + tau = 0.; + + iter = 2; + nfail = 0; + ndiv = n0 - i0 << 1; + + i__1 = *n + 1; + for (iwhila = 1; iwhila <= i__1; ++iwhila) { + if (n0 < 1) { + goto L170; + } + +/* While array unfinished do */ + +/* E(N0) holds the value of SIGMA when submatrix in I0:N0 */ +/* splits from the rest of the array, but is negated. */ + + desig = 0.; + if (n0 == *n) { + sigma = 0.; + } else { + sigma = -z__[(n0 << 2) - 1]; + } + if (sigma < 0.) { + *info = 1; + return 0; + } + +/* Find last unreduced submatrix's top index I0, find QMAX and */ +/* EMIN. Find Gershgorin-type bound if Q's much greater than E's. */ + + emax = 0.; + if (n0 > i0) { + emin = (d__1 = z__[(n0 << 2) - 5], abs(d__1)); + } else { + emin = 0.; + } + qmin = z__[(n0 << 2) - 3]; + qmax = qmin; + for (i4 = n0 << 2; i4 >= 8; i4 += -4) { + if (z__[i4 - 5] <= 0.) { + goto L100; + } + if (qmin >= emax * 4.) { +/* Computing MIN */ + d__1 = qmin, d__2 = z__[i4 - 3]; + qmin = min(d__1,d__2); +/* Computing MAX */ + d__1 = emax, d__2 = z__[i4 - 5]; + emax = max(d__1,d__2); + } +/* Computing MAX */ + d__1 = qmax, d__2 = z__[i4 - 7] + z__[i4 - 5]; + qmax = max(d__1,d__2); +/* Computing MIN */ + d__1 = emin, d__2 = z__[i4 - 5]; + emin = min(d__1,d__2); +/* L90: */ + } + i4 = 4; + +L100: + i0 = i4 / 4; + pp = 0; + + if (n0 - i0 > 1) { + dee = z__[(i0 << 2) - 3]; + deemin = dee; + kmin = i0; + i__2 = (n0 << 2) - 3; + for (i4 = (i0 << 2) + 1; i4 <= i__2; i4 += 4) { + dee = z__[i4] * (dee / (dee + z__[i4 - 2])); + if (dee <= deemin) { + deemin = dee; + kmin = (i4 + 3) / 4; + } +/* L110: */ + } + if (kmin - i0 << 1 < n0 - kmin && deemin <= z__[(n0 << 2) - 3] * + .5) { + ipn4 = i0 + n0 << 2; + pp = 2; + i__2 = i0 + n0 - 1 << 1; + for (i4 = i0 << 2; i4 <= i__2; i4 += 4) { + temp = z__[i4 - 3]; + z__[i4 - 3] = z__[ipn4 - i4 - 3]; + z__[ipn4 - i4 - 3] = temp; + temp = z__[i4 - 2]; + z__[i4 - 2] = z__[ipn4 - i4 - 2]; + z__[ipn4 - i4 - 2] = temp; + temp = z__[i4 - 1]; + z__[i4 - 1] = z__[ipn4 - i4 - 5]; + z__[ipn4 - i4 - 5] = temp; + temp = z__[i4]; + z__[i4] = z__[ipn4 - i4 - 4]; + z__[ipn4 - i4 - 4] = temp; +/* L120: */ + } + } + } + +/* Put -(initial shift) into DMIN. */ + +/* Computing MAX */ + d__1 = 0., d__2 = qmin - sqrt(qmin) * 2. * sqrt(emax); + dmin__ = -max(d__1,d__2); + +/* Now I0:N0 is unreduced. */ +/* PP = 0 for ping, PP = 1 for pong. */ +/* PP = 2 indicates that flipping was applied to the Z array and */ +/* and that the tests for deflation upon entry in DLASQ3 */ +/* should not be performed. */ + + nbig = (n0 - i0 + 1) * 30; + i__2 = nbig; + for (iwhilb = 1; iwhilb <= i__2; ++iwhilb) { + if (i0 > n0) { + goto L150; + } + +/* While submatrix unfinished take a good dqds step. */ + + _starpu_dlasq3_(&i0, &n0, &z__[1], &pp, &dmin__, &sigma, &desig, &qmax, & + nfail, &iter, &ndiv, &ieee, &ttype, &dmin1, &dmin2, &dn, & + dn1, &dn2, &g, &tau); + + pp = 1 - pp; + +/* When EMIN is very small check for splits. */ + + if (pp == 0 && n0 - i0 >= 3) { + if (z__[n0 * 4] <= tol2 * qmax || z__[(n0 << 2) - 1] <= tol2 * + sigma) { + splt = i0 - 1; + qmax = z__[(i0 << 2) - 3]; + emin = z__[(i0 << 2) - 1]; + oldemn = z__[i0 * 4]; + i__3 = n0 - 3 << 2; + for (i4 = i0 << 2; i4 <= i__3; i4 += 4) { + if (z__[i4] <= tol2 * z__[i4 - 3] || z__[i4 - 1] <= + tol2 * sigma) { + z__[i4 - 1] = -sigma; + splt = i4 / 4; + qmax = 0.; + emin = z__[i4 + 3]; + oldemn = z__[i4 + 4]; + } else { +/* Computing MAX */ + d__1 = qmax, d__2 = z__[i4 + 1]; + qmax = max(d__1,d__2); +/* Computing MIN */ + d__1 = emin, d__2 = z__[i4 - 1]; + emin = min(d__1,d__2); +/* Computing MIN */ + d__1 = oldemn, d__2 = z__[i4]; + oldemn = min(d__1,d__2); + } +/* L130: */ + } + z__[(n0 << 2) - 1] = emin; + z__[n0 * 4] = oldemn; + i0 = splt + 1; + } + } + +/* L140: */ + } + + *info = 2; + return 0; + +/* end IWHILB */ + +L150: + +/* L160: */ + ; + } + + *info = 3; + return 0; + +/* end IWHILA */ + +L170: + +/* Move q's to the front. */ + + i__1 = *n; + for (k = 2; k <= i__1; ++k) { + z__[k] = z__[(k << 2) - 3]; +/* L180: */ + } + +/* Sort and compute sum of eigenvalues. */ + + _starpu_dlasrt_("D", n, &z__[1], &iinfo); + + e = 0.; + for (k = *n; k >= 1; --k) { + e += z__[k]; +/* L190: */ + } + +/* Store trace, sum(eigenvalues) and information on performance. */ + + z__[(*n << 1) + 1] = trace; + z__[(*n << 1) + 2] = e; + z__[(*n << 1) + 3] = (doublereal) iter; +/* Computing 2nd power */ + i__1 = *n; + z__[(*n << 1) + 4] = (doublereal) ndiv / (doublereal) (i__1 * i__1); + z__[(*n << 1) + 5] = nfail * 100. / (doublereal) iter; + return 0; + +/* End of DLASQ2 */ + +} /* _starpu_dlasq2_ */ diff --git a/min-dgels/base/SRC/dlasq3.c b/min-dgels/base/SRC/dlasq3.c new file mode 100644 index 0000000..6881a7a --- /dev/null +++ b/min-dgels/base/SRC/dlasq3.c @@ -0,0 +1,350 @@ +/* dlasq3.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dlasq3_(integer *i0, integer *n0, doublereal *z__, + integer *pp, doublereal *dmin__, doublereal *sigma, doublereal *desig, + doublereal *qmax, integer *nfail, integer *iter, integer *ndiv, + logical *ieee, integer *ttype, doublereal *dmin1, doublereal *dmin2, + doublereal *dn, doublereal *dn1, doublereal *dn2, doublereal *g, + doublereal *tau) +{ + /* System generated locals */ + integer i__1; + doublereal d__1, d__2; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + doublereal s, t; + integer j4, nn; + doublereal eps, tol; + integer n0in, ipn4; + doublereal tol2, temp; + extern /* Subroutine */ int _starpu_dlasq4_(integer *, integer *, doublereal *, + integer *, integer *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, integer *, + doublereal *), _starpu_dlasq5_(integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, logical *), _starpu_dlasq6_( + integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *); + extern doublereal _starpu_dlamch_(char *); + extern logical _starpu_disnan_(doublereal *); + + +/* -- LAPACK routine (version 3.2) -- */ + +/* -- Contributed by Osni Marques of the Lawrence Berkeley National -- */ +/* -- Laboratory and Beresford Parlett of the Univ. of California at -- */ +/* -- Berkeley -- */ +/* -- November 2008 -- */ + +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLASQ3 checks for deflation, computes a shift (TAU) and calls dqds. */ +/* In case of failure it changes shifts, and tries again until output */ +/* is positive. */ + +/* Arguments */ +/* ========= */ + +/* I0 (input) INTEGER */ +/* First index. */ + +/* N0 (input) INTEGER */ +/* Last index. */ + +/* Z (input) DOUBLE PRECISION array, dimension ( 4*N ) */ +/* Z holds the qd array. */ + +/* PP (input/output) INTEGER */ +/* PP=0 for ping, PP=1 for pong. */ +/* PP=2 indicates that flipping was applied to the Z array */ +/* and that the initial tests for deflation should not be */ +/* performed. */ + +/* DMIN (output) DOUBLE PRECISION */ +/* Minimum value of d. */ + +/* SIGMA (output) DOUBLE PRECISION */ +/* Sum of shifts used in current segment. */ + +/* DESIG (input/output) DOUBLE PRECISION */ +/* Lower order part of SIGMA */ + +/* QMAX (input) DOUBLE PRECISION */ +/* Maximum value of q. */ + +/* NFAIL (output) INTEGER */ +/* Number of times shift was too big. */ + +/* ITER (output) INTEGER */ +/* Number of iterations. */ + +/* NDIV (output) INTEGER */ +/* Number of divisions. */ + +/* IEEE (input) LOGICAL */ +/* Flag for IEEE or non IEEE arithmetic (passed to DLASQ5). */ + +/* TTYPE (input/output) INTEGER */ +/* Shift type. */ + +/* DMIN1, DMIN2, DN, DN1, DN2, G, TAU (input/output) DOUBLE PRECISION */ +/* These are passed as arguments in order to save their values */ +/* between calls to DLASQ3. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. External Function .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + --z__; + + /* Function Body */ + n0in = *n0; + eps = _starpu_dlamch_("Precision"); + tol = eps * 100.; +/* Computing 2nd power */ + d__1 = tol; + tol2 = d__1 * d__1; + +/* Check for deflation. */ + +L10: + + if (*n0 < *i0) { + return 0; + } + if (*n0 == *i0) { + goto L20; + } + nn = (*n0 << 2) + *pp; + if (*n0 == *i0 + 1) { + goto L40; + } + +/* Check whether E(N0-1) is negligible, 1 eigenvalue. */ + + if (z__[nn - 5] > tol2 * (*sigma + z__[nn - 3]) && z__[nn - (*pp << 1) - + 4] > tol2 * z__[nn - 7]) { + goto L30; + } + +L20: + + z__[(*n0 << 2) - 3] = z__[(*n0 << 2) + *pp - 3] + *sigma; + --(*n0); + goto L10; + +/* Check whether E(N0-2) is negligible, 2 eigenvalues. */ + +L30: + + if (z__[nn - 9] > tol2 * *sigma && z__[nn - (*pp << 1) - 8] > tol2 * z__[ + nn - 11]) { + goto L50; + } + +L40: + + if (z__[nn - 3] > z__[nn - 7]) { + s = z__[nn - 3]; + z__[nn - 3] = z__[nn - 7]; + z__[nn - 7] = s; + } + if (z__[nn - 5] > z__[nn - 3] * tol2) { + t = (z__[nn - 7] - z__[nn - 3] + z__[nn - 5]) * .5; + s = z__[nn - 3] * (z__[nn - 5] / t); + if (s <= t) { + s = z__[nn - 3] * (z__[nn - 5] / (t * (sqrt(s / t + 1.) + 1.))); + } else { + s = z__[nn - 3] * (z__[nn - 5] / (t + sqrt(t) * sqrt(t + s))); + } + t = z__[nn - 7] + (s + z__[nn - 5]); + z__[nn - 3] *= z__[nn - 7] / t; + z__[nn - 7] = t; + } + z__[(*n0 << 2) - 7] = z__[nn - 7] + *sigma; + z__[(*n0 << 2) - 3] = z__[nn - 3] + *sigma; + *n0 += -2; + goto L10; + +L50: + if (*pp == 2) { + *pp = 0; + } + +/* Reverse the qd-array, if warranted. */ + + if (*dmin__ <= 0. || *n0 < n0in) { + if (z__[(*i0 << 2) + *pp - 3] * 1.5 < z__[(*n0 << 2) + *pp - 3]) { + ipn4 = *i0 + *n0 << 2; + i__1 = *i0 + *n0 - 1 << 1; + for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { + temp = z__[j4 - 3]; + z__[j4 - 3] = z__[ipn4 - j4 - 3]; + z__[ipn4 - j4 - 3] = temp; + temp = z__[j4 - 2]; + z__[j4 - 2] = z__[ipn4 - j4 - 2]; + z__[ipn4 - j4 - 2] = temp; + temp = z__[j4 - 1]; + z__[j4 - 1] = z__[ipn4 - j4 - 5]; + z__[ipn4 - j4 - 5] = temp; + temp = z__[j4]; + z__[j4] = z__[ipn4 - j4 - 4]; + z__[ipn4 - j4 - 4] = temp; +/* L60: */ + } + if (*n0 - *i0 <= 4) { + z__[(*n0 << 2) + *pp - 1] = z__[(*i0 << 2) + *pp - 1]; + z__[(*n0 << 2) - *pp] = z__[(*i0 << 2) - *pp]; + } +/* Computing MIN */ + d__1 = *dmin2, d__2 = z__[(*n0 << 2) + *pp - 1]; + *dmin2 = min(d__1,d__2); +/* Computing MIN */ + d__1 = z__[(*n0 << 2) + *pp - 1], d__2 = z__[(*i0 << 2) + *pp - 1] + , d__1 = min(d__1,d__2), d__2 = z__[(*i0 << 2) + *pp + 3]; + z__[(*n0 << 2) + *pp - 1] = min(d__1,d__2); +/* Computing MIN */ + d__1 = z__[(*n0 << 2) - *pp], d__2 = z__[(*i0 << 2) - *pp], d__1 = + min(d__1,d__2), d__2 = z__[(*i0 << 2) - *pp + 4]; + z__[(*n0 << 2) - *pp] = min(d__1,d__2); +/* Computing MAX */ + d__1 = *qmax, d__2 = z__[(*i0 << 2) + *pp - 3], d__1 = max(d__1, + d__2), d__2 = z__[(*i0 << 2) + *pp + 1]; + *qmax = max(d__1,d__2); + *dmin__ = -0.; + } + } + +/* Choose a shift. */ + + _starpu_dlasq4_(i0, n0, &z__[1], pp, &n0in, dmin__, dmin1, dmin2, dn, dn1, dn2, + tau, ttype, g); + +/* Call dqds until DMIN > 0. */ + +L70: + + _starpu_dlasq5_(i0, n0, &z__[1], pp, tau, dmin__, dmin1, dmin2, dn, dn1, dn2, + ieee); + + *ndiv += *n0 - *i0 + 2; + ++(*iter); + +/* Check status. */ + + if (*dmin__ >= 0. && *dmin1 > 0.) { + +/* Success. */ + + goto L90; + + } else if (*dmin__ < 0. && *dmin1 > 0. && z__[(*n0 - 1 << 2) - *pp] < tol + * (*sigma + *dn1) && abs(*dn) < tol * *sigma) { + +/* Convergence hidden by negative DN. */ + + z__[(*n0 - 1 << 2) - *pp + 2] = 0.; + *dmin__ = 0.; + goto L90; + } else if (*dmin__ < 0.) { + +/* TAU too big. Select new TAU and try again. */ + + ++(*nfail); + if (*ttype < -22) { + +/* Failed twice. Play it safe. */ + + *tau = 0.; + } else if (*dmin1 > 0.) { + +/* Late failure. Gives excellent shift. */ + + *tau = (*tau + *dmin__) * (1. - eps * 2.); + *ttype += -11; + } else { + +/* Early failure. Divide by 4. */ + + *tau *= .25; + *ttype += -12; + } + goto L70; + } else if (_starpu_disnan_(dmin__)) { + +/* NaN. */ + + if (*tau == 0.) { + goto L80; + } else { + *tau = 0.; + goto L70; + } + } else { + +/* Possible underflow. Play it safe. */ + + goto L80; + } + +/* Risk of underflow. */ + +L80: + _starpu_dlasq6_(i0, n0, &z__[1], pp, dmin__, dmin1, dmin2, dn, dn1, dn2); + *ndiv += *n0 - *i0 + 2; + ++(*iter); + *tau = 0.; + +L90: + if (*tau < *sigma) { + *desig += *tau; + t = *sigma + *desig; + *desig -= t - *sigma; + } else { + t = *sigma + *tau; + *desig = *sigma - (t - *tau) + *desig; + } + *sigma = t; + + return 0; + +/* End of DLASQ3 */ + +} /* _starpu_dlasq3_ */ diff --git a/min-dgels/base/SRC/dlasq4.c b/min-dgels/base/SRC/dlasq4.c new file mode 100644 index 0000000..78dbc0c --- /dev/null +++ b/min-dgels/base/SRC/dlasq4.c @@ -0,0 +1,403 @@ +/* dlasq4.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dlasq4_(integer *i0, integer *n0, doublereal *z__, + integer *pp, integer *n0in, doublereal *dmin__, doublereal *dmin1, + doublereal *dmin2, doublereal *dn, doublereal *dn1, doublereal *dn2, + doublereal *tau, integer *ttype, doublereal *g) +{ + /* System generated locals */ + integer i__1; + doublereal d__1, d__2; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + doublereal s, a2, b1, b2; + integer i4, nn, np; + doublereal gam, gap1, gap2; + + +/* -- LAPACK routine (version 3.2) -- */ + +/* -- Contributed by Osni Marques of the Lawrence Berkeley National -- */ +/* -- Laboratory and Beresford Parlett of the Univ. of California at -- */ +/* -- Berkeley -- */ +/* -- November 2008 -- */ + +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLASQ4 computes an approximation TAU to the smallest eigenvalue */ +/* using values of d from the previous transform. */ + +/* I0 (input) INTEGER */ +/* First index. */ + +/* N0 (input) INTEGER */ +/* Last index. */ + +/* Z (input) DOUBLE PRECISION array, dimension ( 4*N ) */ +/* Z holds the qd array. */ + +/* PP (input) INTEGER */ +/* PP=0 for ping, PP=1 for pong. */ + +/* NOIN (input) INTEGER */ +/* The value of N0 at start of EIGTEST. */ + +/* DMIN (input) DOUBLE PRECISION */ +/* Minimum value of d. */ + +/* DMIN1 (input) DOUBLE PRECISION */ +/* Minimum value of d, excluding D( N0 ). */ + +/* DMIN2 (input) DOUBLE PRECISION */ +/* Minimum value of d, excluding D( N0 ) and D( N0-1 ). */ + +/* DN (input) DOUBLE PRECISION */ +/* d(N) */ + +/* DN1 (input) DOUBLE PRECISION */ +/* d(N-1) */ + +/* DN2 (input) DOUBLE PRECISION */ +/* d(N-2) */ + +/* TAU (output) DOUBLE PRECISION */ +/* This is the shift. */ + +/* TTYPE (output) INTEGER */ +/* Shift type. */ + +/* G (input/output) REAL */ +/* G is passed as an argument in order to save its value between */ +/* calls to DLASQ4. */ + +/* Further Details */ +/* =============== */ +/* CNST1 = 9/16 */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* A negative DMIN forces the shift to take that absolute value */ +/* TTYPE records the type of shift. */ + + /* Parameter adjustments */ + --z__; + + /* Function Body */ + if (*dmin__ <= 0.) { + *tau = -(*dmin__); + *ttype = -1; + return 0; + } + + nn = (*n0 << 2) + *pp; + if (*n0in == *n0) { + +/* No eigenvalues deflated. */ + + if (*dmin__ == *dn || *dmin__ == *dn1) { + + b1 = sqrt(z__[nn - 3]) * sqrt(z__[nn - 5]); + b2 = sqrt(z__[nn - 7]) * sqrt(z__[nn - 9]); + a2 = z__[nn - 7] + z__[nn - 5]; + +/* Cases 2 and 3. */ + + if (*dmin__ == *dn && *dmin1 == *dn1) { + gap2 = *dmin2 - a2 - *dmin2 * .25; + if (gap2 > 0. && gap2 > b2) { + gap1 = a2 - *dn - b2 / gap2 * b2; + } else { + gap1 = a2 - *dn - (b1 + b2); + } + if (gap1 > 0. && gap1 > b1) { +/* Computing MAX */ + d__1 = *dn - b1 / gap1 * b1, d__2 = *dmin__ * .5; + s = max(d__1,d__2); + *ttype = -2; + } else { + s = 0.; + if (*dn > b1) { + s = *dn - b1; + } + if (a2 > b1 + b2) { +/* Computing MIN */ + d__1 = s, d__2 = a2 - (b1 + b2); + s = min(d__1,d__2); + } +/* Computing MAX */ + d__1 = s, d__2 = *dmin__ * .333; + s = max(d__1,d__2); + *ttype = -3; + } + } else { + +/* Case 4. */ + + *ttype = -4; + s = *dmin__ * .25; + if (*dmin__ == *dn) { + gam = *dn; + a2 = 0.; + if (z__[nn - 5] > z__[nn - 7]) { + return 0; + } + b2 = z__[nn - 5] / z__[nn - 7]; + np = nn - 9; + } else { + np = nn - (*pp << 1); + b2 = z__[np - 2]; + gam = *dn1; + if (z__[np - 4] > z__[np - 2]) { + return 0; + } + a2 = z__[np - 4] / z__[np - 2]; + if (z__[nn - 9] > z__[nn - 11]) { + return 0; + } + b2 = z__[nn - 9] / z__[nn - 11]; + np = nn - 13; + } + +/* Approximate contribution to norm squared from I < NN-1. */ + + a2 += b2; + i__1 = (*i0 << 2) - 1 + *pp; + for (i4 = np; i4 >= i__1; i4 += -4) { + if (b2 == 0.) { + goto L20; + } + b1 = b2; + if (z__[i4] > z__[i4 - 2]) { + return 0; + } + b2 *= z__[i4] / z__[i4 - 2]; + a2 += b2; + if (max(b2,b1) * 100. < a2 || .563 < a2) { + goto L20; + } +/* L10: */ + } +L20: + a2 *= 1.05; + +/* Rayleigh quotient residual bound. */ + + if (a2 < .563) { + s = gam * (1. - sqrt(a2)) / (a2 + 1.); + } + } + } else if (*dmin__ == *dn2) { + +/* Case 5. */ + + *ttype = -5; + s = *dmin__ * .25; + +/* Compute contribution to norm squared from I > NN-2. */ + + np = nn - (*pp << 1); + b1 = z__[np - 2]; + b2 = z__[np - 6]; + gam = *dn2; + if (z__[np - 8] > b2 || z__[np - 4] > b1) { + return 0; + } + a2 = z__[np - 8] / b2 * (z__[np - 4] / b1 + 1.); + +/* Approximate contribution to norm squared from I < NN-2. */ + + if (*n0 - *i0 > 2) { + b2 = z__[nn - 13] / z__[nn - 15]; + a2 += b2; + i__1 = (*i0 << 2) - 1 + *pp; + for (i4 = nn - 17; i4 >= i__1; i4 += -4) { + if (b2 == 0.) { + goto L40; + } + b1 = b2; + if (z__[i4] > z__[i4 - 2]) { + return 0; + } + b2 *= z__[i4] / z__[i4 - 2]; + a2 += b2; + if (max(b2,b1) * 100. < a2 || .563 < a2) { + goto L40; + } +/* L30: */ + } +L40: + a2 *= 1.05; + } + + if (a2 < .563) { + s = gam * (1. - sqrt(a2)) / (a2 + 1.); + } + } else { + +/* Case 6, no information to guide us. */ + + if (*ttype == -6) { + *g += (1. - *g) * .333; + } else if (*ttype == -18) { + *g = .083250000000000005; + } else { + *g = .25; + } + s = *g * *dmin__; + *ttype = -6; + } + + } else if (*n0in == *n0 + 1) { + +/* One eigenvalue just deflated. Use DMIN1, DN1 for DMIN and DN. */ + + if (*dmin1 == *dn1 && *dmin2 == *dn2) { + +/* Cases 7 and 8. */ + + *ttype = -7; + s = *dmin1 * .333; + if (z__[nn - 5] > z__[nn - 7]) { + return 0; + } + b1 = z__[nn - 5] / z__[nn - 7]; + b2 = b1; + if (b2 == 0.) { + goto L60; + } + i__1 = (*i0 << 2) - 1 + *pp; + for (i4 = (*n0 << 2) - 9 + *pp; i4 >= i__1; i4 += -4) { + a2 = b1; + if (z__[i4] > z__[i4 - 2]) { + return 0; + } + b1 *= z__[i4] / z__[i4 - 2]; + b2 += b1; + if (max(b1,a2) * 100. < b2) { + goto L60; + } +/* L50: */ + } +L60: + b2 = sqrt(b2 * 1.05); +/* Computing 2nd power */ + d__1 = b2; + a2 = *dmin1 / (d__1 * d__1 + 1.); + gap2 = *dmin2 * .5 - a2; + if (gap2 > 0. && gap2 > b2 * a2) { +/* Computing MAX */ + d__1 = s, d__2 = a2 * (1. - a2 * 1.01 * (b2 / gap2) * b2); + s = max(d__1,d__2); + } else { +/* Computing MAX */ + d__1 = s, d__2 = a2 * (1. - b2 * 1.01); + s = max(d__1,d__2); + *ttype = -8; + } + } else { + +/* Case 9. */ + + s = *dmin1 * .25; + if (*dmin1 == *dn1) { + s = *dmin1 * .5; + } + *ttype = -9; + } + + } else if (*n0in == *n0 + 2) { + +/* Two eigenvalues deflated. Use DMIN2, DN2 for DMIN and DN. */ + +/* Cases 10 and 11. */ + + if (*dmin2 == *dn2 && z__[nn - 5] * 2. < z__[nn - 7]) { + *ttype = -10; + s = *dmin2 * .333; + if (z__[nn - 5] > z__[nn - 7]) { + return 0; + } + b1 = z__[nn - 5] / z__[nn - 7]; + b2 = b1; + if (b2 == 0.) { + goto L80; + } + i__1 = (*i0 << 2) - 1 + *pp; + for (i4 = (*n0 << 2) - 9 + *pp; i4 >= i__1; i4 += -4) { + if (z__[i4] > z__[i4 - 2]) { + return 0; + } + b1 *= z__[i4] / z__[i4 - 2]; + b2 += b1; + if (b1 * 100. < b2) { + goto L80; + } +/* L70: */ + } +L80: + b2 = sqrt(b2 * 1.05); +/* Computing 2nd power */ + d__1 = b2; + a2 = *dmin2 / (d__1 * d__1 + 1.); + gap2 = z__[nn - 7] + z__[nn - 9] - sqrt(z__[nn - 11]) * sqrt(z__[ + nn - 9]) - a2; + if (gap2 > 0. && gap2 > b2 * a2) { +/* Computing MAX */ + d__1 = s, d__2 = a2 * (1. - a2 * 1.01 * (b2 / gap2) * b2); + s = max(d__1,d__2); + } else { +/* Computing MAX */ + d__1 = s, d__2 = a2 * (1. - b2 * 1.01); + s = max(d__1,d__2); + } + } else { + s = *dmin2 * .25; + *ttype = -11; + } + } else if (*n0in > *n0 + 2) { + +/* Case 12, more than two eigenvalues deflated. No information. */ + + s = 0.; + *ttype = -12; + } + + *tau = s; + return 0; + +/* End of DLASQ4 */ + +} /* _starpu_dlasq4_ */ diff --git a/min-dgels/base/SRC/dlasq5.c b/min-dgels/base/SRC/dlasq5.c new file mode 100644 index 0000000..2f35e29 --- /dev/null +++ b/min-dgels/base/SRC/dlasq5.c @@ -0,0 +1,240 @@ +/* dlasq5.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dlasq5_(integer *i0, integer *n0, doublereal *z__, + integer *pp, doublereal *tau, doublereal *dmin__, doublereal *dmin1, + doublereal *dmin2, doublereal *dn, doublereal *dnm1, doublereal *dnm2, + logical *ieee) +{ + /* System generated locals */ + integer i__1; + doublereal d__1, d__2; + + /* Local variables */ + doublereal d__; + integer j4, j4p2; + doublereal emin, temp; + + +/* -- LAPACK routine (version 3.2) -- */ + +/* -- Contributed by Osni Marques of the Lawrence Berkeley National -- */ +/* -- Laboratory and Beresford Parlett of the Univ. of California at -- */ +/* -- Berkeley -- */ +/* -- November 2008 -- */ + +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLASQ5 computes one dqds transform in ping-pong form, one */ +/* version for IEEE machines another for non IEEE machines. */ + +/* Arguments */ +/* ========= */ + +/* I0 (input) INTEGER */ +/* First index. */ + +/* N0 (input) INTEGER */ +/* Last index. */ + +/* Z (input) DOUBLE PRECISION array, dimension ( 4*N ) */ +/* Z holds the qd array. EMIN is stored in Z(4*N0) to avoid */ +/* an extra argument. */ + +/* PP (input) INTEGER */ +/* PP=0 for ping, PP=1 for pong. */ + +/* TAU (input) DOUBLE PRECISION */ +/* This is the shift. */ + +/* DMIN (output) DOUBLE PRECISION */ +/* Minimum value of d. */ + +/* DMIN1 (output) DOUBLE PRECISION */ +/* Minimum value of d, excluding D( N0 ). */ + +/* DMIN2 (output) DOUBLE PRECISION */ +/* Minimum value of d, excluding D( N0 ) and D( N0-1 ). */ + +/* DN (output) DOUBLE PRECISION */ +/* d(N0), the last value of d. */ + +/* DNM1 (output) DOUBLE PRECISION */ +/* d(N0-1). */ + +/* DNM2 (output) DOUBLE PRECISION */ +/* d(N0-2). */ + +/* IEEE (input) LOGICAL */ +/* Flag for IEEE or non IEEE arithmetic. */ + +/* ===================================================================== */ + +/* .. Parameter .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + --z__; + + /* Function Body */ + if (*n0 - *i0 - 1 <= 0) { + return 0; + } + + j4 = (*i0 << 2) + *pp - 3; + emin = z__[j4 + 4]; + d__ = z__[j4] - *tau; + *dmin__ = d__; + *dmin1 = -z__[j4]; + + if (*ieee) { + +/* Code for IEEE arithmetic. */ + + if (*pp == 0) { + i__1 = *n0 - 3 << 2; + for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { + z__[j4 - 2] = d__ + z__[j4 - 1]; + temp = z__[j4 + 1] / z__[j4 - 2]; + d__ = d__ * temp - *tau; + *dmin__ = min(*dmin__,d__); + z__[j4] = z__[j4 - 1] * temp; +/* Computing MIN */ + d__1 = z__[j4]; + emin = min(d__1,emin); +/* L10: */ + } + } else { + i__1 = *n0 - 3 << 2; + for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { + z__[j4 - 3] = d__ + z__[j4]; + temp = z__[j4 + 2] / z__[j4 - 3]; + d__ = d__ * temp - *tau; + *dmin__ = min(*dmin__,d__); + z__[j4 - 1] = z__[j4] * temp; +/* Computing MIN */ + d__1 = z__[j4 - 1]; + emin = min(d__1,emin); +/* L20: */ + } + } + +/* Unroll last two steps. */ + + *dnm2 = d__; + *dmin2 = *dmin__; + j4 = (*n0 - 2 << 2) - *pp; + j4p2 = j4 + (*pp << 1) - 1; + z__[j4 - 2] = *dnm2 + z__[j4p2]; + z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]); + *dnm1 = z__[j4p2 + 2] * (*dnm2 / z__[j4 - 2]) - *tau; + *dmin__ = min(*dmin__,*dnm1); + + *dmin1 = *dmin__; + j4 += 4; + j4p2 = j4 + (*pp << 1) - 1; + z__[j4 - 2] = *dnm1 + z__[j4p2]; + z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]); + *dn = z__[j4p2 + 2] * (*dnm1 / z__[j4 - 2]) - *tau; + *dmin__ = min(*dmin__,*dn); + + } else { + +/* Code for non IEEE arithmetic. */ + + if (*pp == 0) { + i__1 = *n0 - 3 << 2; + for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { + z__[j4 - 2] = d__ + z__[j4 - 1]; + if (d__ < 0.) { + return 0; + } else { + z__[j4] = z__[j4 + 1] * (z__[j4 - 1] / z__[j4 - 2]); + d__ = z__[j4 + 1] * (d__ / z__[j4 - 2]) - *tau; + } + *dmin__ = min(*dmin__,d__); +/* Computing MIN */ + d__1 = emin, d__2 = z__[j4]; + emin = min(d__1,d__2); +/* L30: */ + } + } else { + i__1 = *n0 - 3 << 2; + for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { + z__[j4 - 3] = d__ + z__[j4]; + if (d__ < 0.) { + return 0; + } else { + z__[j4 - 1] = z__[j4 + 2] * (z__[j4] / z__[j4 - 3]); + d__ = z__[j4 + 2] * (d__ / z__[j4 - 3]) - *tau; + } + *dmin__ = min(*dmin__,d__); +/* Computing MIN */ + d__1 = emin, d__2 = z__[j4 - 1]; + emin = min(d__1,d__2); +/* L40: */ + } + } + +/* Unroll last two steps. */ + + *dnm2 = d__; + *dmin2 = *dmin__; + j4 = (*n0 - 2 << 2) - *pp; + j4p2 = j4 + (*pp << 1) - 1; + z__[j4 - 2] = *dnm2 + z__[j4p2]; + if (*dnm2 < 0.) { + return 0; + } else { + z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]); + *dnm1 = z__[j4p2 + 2] * (*dnm2 / z__[j4 - 2]) - *tau; + } + *dmin__ = min(*dmin__,*dnm1); + + *dmin1 = *dmin__; + j4 += 4; + j4p2 = j4 + (*pp << 1) - 1; + z__[j4 - 2] = *dnm1 + z__[j4p2]; + if (*dnm1 < 0.) { + return 0; + } else { + z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]); + *dn = z__[j4p2 + 2] * (*dnm1 / z__[j4 - 2]) - *tau; + } + *dmin__ = min(*dmin__,*dn); + + } + + z__[j4 + 2] = *dn; + z__[(*n0 << 2) - *pp] = emin; + return 0; + +/* End of DLASQ5 */ + +} /* _starpu_dlasq5_ */ diff --git a/min-dgels/base/SRC/dlasq6.c b/min-dgels/base/SRC/dlasq6.c new file mode 100644 index 0000000..605076c --- /dev/null +++ b/min-dgels/base/SRC/dlasq6.c @@ -0,0 +1,212 @@ +/* dlasq6.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dlasq6_(integer *i0, integer *n0, doublereal *z__, + integer *pp, doublereal *dmin__, doublereal *dmin1, doublereal *dmin2, + doublereal *dn, doublereal *dnm1, doublereal *dnm2) +{ + /* System generated locals */ + integer i__1; + doublereal d__1, d__2; + + /* Local variables */ + doublereal d__; + integer j4, j4p2; + doublereal emin, temp; + extern doublereal _starpu_dlamch_(char *); + doublereal safmin; + + +/* -- LAPACK routine (version 3.2) -- */ + +/* -- Contributed by Osni Marques of the Lawrence Berkeley National -- */ +/* -- Laboratory and Beresford Parlett of the Univ. of California at -- */ +/* -- Berkeley -- */ +/* -- November 2008 -- */ + +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLASQ6 computes one dqd (shift equal to zero) transform in */ +/* ping-pong form, with protection against underflow and overflow. */ + +/* Arguments */ +/* ========= */ + +/* I0 (input) INTEGER */ +/* First index. */ + +/* N0 (input) INTEGER */ +/* Last index. */ + +/* Z (input) DOUBLE PRECISION array, dimension ( 4*N ) */ +/* Z holds the qd array. EMIN is stored in Z(4*N0) to avoid */ +/* an extra argument. */ + +/* PP (input) INTEGER */ +/* PP=0 for ping, PP=1 for pong. */ + +/* DMIN (output) DOUBLE PRECISION */ +/* Minimum value of d. */ + +/* DMIN1 (output) DOUBLE PRECISION */ +/* Minimum value of d, excluding D( N0 ). */ + +/* DMIN2 (output) DOUBLE PRECISION */ +/* Minimum value of d, excluding D( N0 ) and D( N0-1 ). */ + +/* DN (output) DOUBLE PRECISION */ +/* d(N0), the last value of d. */ + +/* DNM1 (output) DOUBLE PRECISION */ +/* d(N0-1). */ + +/* DNM2 (output) DOUBLE PRECISION */ +/* d(N0-2). */ + +/* ===================================================================== */ + +/* .. Parameter .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Function .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + --z__; + + /* Function Body */ + if (*n0 - *i0 - 1 <= 0) { + return 0; + } + + safmin = _starpu_dlamch_("Safe minimum"); + j4 = (*i0 << 2) + *pp - 3; + emin = z__[j4 + 4]; + d__ = z__[j4]; + *dmin__ = d__; + + if (*pp == 0) { + i__1 = *n0 - 3 << 2; + for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { + z__[j4 - 2] = d__ + z__[j4 - 1]; + if (z__[j4 - 2] == 0.) { + z__[j4] = 0.; + d__ = z__[j4 + 1]; + *dmin__ = d__; + emin = 0.; + } else if (safmin * z__[j4 + 1] < z__[j4 - 2] && safmin * z__[j4 + - 2] < z__[j4 + 1]) { + temp = z__[j4 + 1] / z__[j4 - 2]; + z__[j4] = z__[j4 - 1] * temp; + d__ *= temp; + } else { + z__[j4] = z__[j4 + 1] * (z__[j4 - 1] / z__[j4 - 2]); + d__ = z__[j4 + 1] * (d__ / z__[j4 - 2]); + } + *dmin__ = min(*dmin__,d__); +/* Computing MIN */ + d__1 = emin, d__2 = z__[j4]; + emin = min(d__1,d__2); +/* L10: */ + } + } else { + i__1 = *n0 - 3 << 2; + for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { + z__[j4 - 3] = d__ + z__[j4]; + if (z__[j4 - 3] == 0.) { + z__[j4 - 1] = 0.; + d__ = z__[j4 + 2]; + *dmin__ = d__; + emin = 0.; + } else if (safmin * z__[j4 + 2] < z__[j4 - 3] && safmin * z__[j4 + - 3] < z__[j4 + 2]) { + temp = z__[j4 + 2] / z__[j4 - 3]; + z__[j4 - 1] = z__[j4] * temp; + d__ *= temp; + } else { + z__[j4 - 1] = z__[j4 + 2] * (z__[j4] / z__[j4 - 3]); + d__ = z__[j4 + 2] * (d__ / z__[j4 - 3]); + } + *dmin__ = min(*dmin__,d__); +/* Computing MIN */ + d__1 = emin, d__2 = z__[j4 - 1]; + emin = min(d__1,d__2); +/* L20: */ + } + } + +/* Unroll last two steps. */ + + *dnm2 = d__; + *dmin2 = *dmin__; + j4 = (*n0 - 2 << 2) - *pp; + j4p2 = j4 + (*pp << 1) - 1; + z__[j4 - 2] = *dnm2 + z__[j4p2]; + if (z__[j4 - 2] == 0.) { + z__[j4] = 0.; + *dnm1 = z__[j4p2 + 2]; + *dmin__ = *dnm1; + emin = 0.; + } else if (safmin * z__[j4p2 + 2] < z__[j4 - 2] && safmin * z__[j4 - 2] < + z__[j4p2 + 2]) { + temp = z__[j4p2 + 2] / z__[j4 - 2]; + z__[j4] = z__[j4p2] * temp; + *dnm1 = *dnm2 * temp; + } else { + z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]); + *dnm1 = z__[j4p2 + 2] * (*dnm2 / z__[j4 - 2]); + } + *dmin__ = min(*dmin__,*dnm1); + + *dmin1 = *dmin__; + j4 += 4; + j4p2 = j4 + (*pp << 1) - 1; + z__[j4 - 2] = *dnm1 + z__[j4p2]; + if (z__[j4 - 2] == 0.) { + z__[j4] = 0.; + *dn = z__[j4p2 + 2]; + *dmin__ = *dn; + emin = 0.; + } else if (safmin * z__[j4p2 + 2] < z__[j4 - 2] && safmin * z__[j4 - 2] < + z__[j4p2 + 2]) { + temp = z__[j4p2 + 2] / z__[j4 - 2]; + z__[j4] = z__[j4p2] * temp; + *dn = *dnm1 * temp; + } else { + z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]); + *dn = z__[j4p2 + 2] * (*dnm1 / z__[j4 - 2]); + } + *dmin__ = min(*dmin__,*dn); + + z__[j4 + 2] = *dn; + z__[(*n0 << 2) - *pp] = emin; + return 0; + +/* End of DLASQ6 */ + +} /* _starpu_dlasq6_ */ diff --git a/min-dgels/base/SRC/dlasr.c b/min-dgels/base/SRC/dlasr.c new file mode 100644 index 0000000..1a77894 --- /dev/null +++ b/min-dgels/base/SRC/dlasr.c @@ -0,0 +1,453 @@ +/* dlasr.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dlasr_(char *side, char *pivot, char *direct, integer *m, + integer *n, doublereal *c__, doublereal *s, doublereal *a, integer * + lda) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + + /* Local variables */ + integer i__, j, info; + doublereal temp; + extern logical _starpu_lsame_(char *, char *); + doublereal ctemp, stemp; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLASR applies a sequence of plane rotations to a real matrix A, */ +/* from either the left or the right. */ + +/* When SIDE = 'L', the transformation takes the form */ + +/* A := P*A */ + +/* and when SIDE = 'R', the transformation takes the form */ + +/* A := A*P**T */ + +/* where P is an orthogonal matrix consisting of a sequence of z plane */ +/* rotations, with z = M when SIDE = 'L' and z = N when SIDE = 'R', */ +/* and P**T is the transpose of P. */ + +/* When DIRECT = 'F' (Forward sequence), then */ + +/* P = P(z-1) * ... * P(2) * P(1) */ + +/* and when DIRECT = 'B' (Backward sequence), then */ + +/* P = P(1) * P(2) * ... * P(z-1) */ + +/* where P(k) is a plane rotation matrix defined by the 2-by-2 rotation */ + +/* R(k) = ( c(k) s(k) ) */ +/* = ( -s(k) c(k) ). */ + +/* When PIVOT = 'V' (Variable pivot), the rotation is performed */ +/* for the plane (k,k+1), i.e., P(k) has the form */ + +/* P(k) = ( 1 ) */ +/* ( ... ) */ +/* ( 1 ) */ +/* ( c(k) s(k) ) */ +/* ( -s(k) c(k) ) */ +/* ( 1 ) */ +/* ( ... ) */ +/* ( 1 ) */ + +/* where R(k) appears as a rank-2 modification to the identity matrix in */ +/* rows and columns k and k+1. */ + +/* When PIVOT = 'T' (Top pivot), the rotation is performed for the */ +/* plane (1,k+1), so P(k) has the form */ + +/* P(k) = ( c(k) s(k) ) */ +/* ( 1 ) */ +/* ( ... ) */ +/* ( 1 ) */ +/* ( -s(k) c(k) ) */ +/* ( 1 ) */ +/* ( ... ) */ +/* ( 1 ) */ + +/* where R(k) appears in rows and columns 1 and k+1. */ + +/* Similarly, when PIVOT = 'B' (Bottom pivot), the rotation is */ +/* performed for the plane (k,z), giving P(k) the form */ + +/* P(k) = ( 1 ) */ +/* ( ... ) */ +/* ( 1 ) */ +/* ( c(k) s(k) ) */ +/* ( 1 ) */ +/* ( ... ) */ +/* ( 1 ) */ +/* ( -s(k) c(k) ) */ + +/* where R(k) appears in rows and columns k and z. The rotations are */ +/* performed without ever forming P(k) explicitly. */ + +/* Arguments */ +/* ========= */ + +/* SIDE (input) CHARACTER*1 */ +/* Specifies whether the plane rotation matrix P is applied to */ +/* A on the left or the right. */ +/* = 'L': Left, compute A := P*A */ +/* = 'R': Right, compute A:= A*P**T */ + +/* PIVOT (input) CHARACTER*1 */ +/* Specifies the plane for which P(k) is a plane rotation */ +/* matrix. */ +/* = 'V': Variable pivot, the plane (k,k+1) */ +/* = 'T': Top pivot, the plane (1,k+1) */ +/* = 'B': Bottom pivot, the plane (k,z) */ + +/* DIRECT (input) CHARACTER*1 */ +/* Specifies whether P is a forward or backward sequence of */ +/* plane rotations. */ +/* = 'F': Forward, P = P(z-1)*...*P(2)*P(1) */ +/* = 'B': Backward, P = P(1)*P(2)*...*P(z-1) */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix A. If m <= 1, an immediate */ +/* return is effected. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix A. If n <= 1, an */ +/* immediate return is effected. */ + +/* C (input) DOUBLE PRECISION array, dimension */ +/* (M-1) if SIDE = 'L' */ +/* (N-1) if SIDE = 'R' */ +/* The cosines c(k) of the plane rotations. */ + +/* S (input) DOUBLE PRECISION array, dimension */ +/* (M-1) if SIDE = 'L' */ +/* (N-1) if SIDE = 'R' */ +/* The sines s(k) of the plane rotations. The 2-by-2 plane */ +/* rotation part of the matrix P(k), R(k), has the form */ +/* R(k) = ( c(k) s(k) ) */ +/* ( -s(k) c(k) ). */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* The M-by-N matrix A. On exit, A is overwritten by P*A if */ +/* SIDE = 'R' or by A*P**T if SIDE = 'L'. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,M). */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters */ + + /* Parameter adjustments */ + --c__; + --s; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + info = 0; + if (! (_starpu_lsame_(side, "L") || _starpu_lsame_(side, "R"))) { + info = 1; + } else if (! (_starpu_lsame_(pivot, "V") || _starpu_lsame_(pivot, + "T") || _starpu_lsame_(pivot, "B"))) { + info = 2; + } else if (! (_starpu_lsame_(direct, "F") || _starpu_lsame_(direct, + "B"))) { + info = 3; + } else if (*m < 0) { + info = 4; + } else if (*n < 0) { + info = 5; + } else if (*lda < max(1,*m)) { + info = 9; + } + if (info != 0) { + _starpu_xerbla_("DLASR ", &info); + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + return 0; + } + if (_starpu_lsame_(side, "L")) { + +/* Form P * A */ + + if (_starpu_lsame_(pivot, "V")) { + if (_starpu_lsame_(direct, "F")) { + i__1 = *m - 1; + for (j = 1; j <= i__1; ++j) { + ctemp = c__[j]; + stemp = s[j]; + if (ctemp != 1. || stemp != 0.) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + temp = a[j + 1 + i__ * a_dim1]; + a[j + 1 + i__ * a_dim1] = ctemp * temp - stemp * + a[j + i__ * a_dim1]; + a[j + i__ * a_dim1] = stemp * temp + ctemp * a[j + + i__ * a_dim1]; +/* L10: */ + } + } +/* L20: */ + } + } else if (_starpu_lsame_(direct, "B")) { + for (j = *m - 1; j >= 1; --j) { + ctemp = c__[j]; + stemp = s[j]; + if (ctemp != 1. || stemp != 0.) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + temp = a[j + 1 + i__ * a_dim1]; + a[j + 1 + i__ * a_dim1] = ctemp * temp - stemp * + a[j + i__ * a_dim1]; + a[j + i__ * a_dim1] = stemp * temp + ctemp * a[j + + i__ * a_dim1]; +/* L30: */ + } + } +/* L40: */ + } + } + } else if (_starpu_lsame_(pivot, "T")) { + if (_starpu_lsame_(direct, "F")) { + i__1 = *m; + for (j = 2; j <= i__1; ++j) { + ctemp = c__[j - 1]; + stemp = s[j - 1]; + if (ctemp != 1. || stemp != 0.) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + temp = a[j + i__ * a_dim1]; + a[j + i__ * a_dim1] = ctemp * temp - stemp * a[ + i__ * a_dim1 + 1]; + a[i__ * a_dim1 + 1] = stemp * temp + ctemp * a[ + i__ * a_dim1 + 1]; +/* L50: */ + } + } +/* L60: */ + } + } else if (_starpu_lsame_(direct, "B")) { + for (j = *m; j >= 2; --j) { + ctemp = c__[j - 1]; + stemp = s[j - 1]; + if (ctemp != 1. || stemp != 0.) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + temp = a[j + i__ * a_dim1]; + a[j + i__ * a_dim1] = ctemp * temp - stemp * a[ + i__ * a_dim1 + 1]; + a[i__ * a_dim1 + 1] = stemp * temp + ctemp * a[ + i__ * a_dim1 + 1]; +/* L70: */ + } + } +/* L80: */ + } + } + } else if (_starpu_lsame_(pivot, "B")) { + if (_starpu_lsame_(direct, "F")) { + i__1 = *m - 1; + for (j = 1; j <= i__1; ++j) { + ctemp = c__[j]; + stemp = s[j]; + if (ctemp != 1. || stemp != 0.) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + temp = a[j + i__ * a_dim1]; + a[j + i__ * a_dim1] = stemp * a[*m + i__ * a_dim1] + + ctemp * temp; + a[*m + i__ * a_dim1] = ctemp * a[*m + i__ * + a_dim1] - stemp * temp; +/* L90: */ + } + } +/* L100: */ + } + } else if (_starpu_lsame_(direct, "B")) { + for (j = *m - 1; j >= 1; --j) { + ctemp = c__[j]; + stemp = s[j]; + if (ctemp != 1. || stemp != 0.) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + temp = a[j + i__ * a_dim1]; + a[j + i__ * a_dim1] = stemp * a[*m + i__ * a_dim1] + + ctemp * temp; + a[*m + i__ * a_dim1] = ctemp * a[*m + i__ * + a_dim1] - stemp * temp; +/* L110: */ + } + } +/* L120: */ + } + } + } + } else if (_starpu_lsame_(side, "R")) { + +/* Form A * P' */ + + if (_starpu_lsame_(pivot, "V")) { + if (_starpu_lsame_(direct, "F")) { + i__1 = *n - 1; + for (j = 1; j <= i__1; ++j) { + ctemp = c__[j]; + stemp = s[j]; + if (ctemp != 1. || stemp != 0.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp = a[i__ + (j + 1) * a_dim1]; + a[i__ + (j + 1) * a_dim1] = ctemp * temp - stemp * + a[i__ + j * a_dim1]; + a[i__ + j * a_dim1] = stemp * temp + ctemp * a[ + i__ + j * a_dim1]; +/* L130: */ + } + } +/* L140: */ + } + } else if (_starpu_lsame_(direct, "B")) { + for (j = *n - 1; j >= 1; --j) { + ctemp = c__[j]; + stemp = s[j]; + if (ctemp != 1. || stemp != 0.) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + temp = a[i__ + (j + 1) * a_dim1]; + a[i__ + (j + 1) * a_dim1] = ctemp * temp - stemp * + a[i__ + j * a_dim1]; + a[i__ + j * a_dim1] = stemp * temp + ctemp * a[ + i__ + j * a_dim1]; +/* L150: */ + } + } +/* L160: */ + } + } + } else if (_starpu_lsame_(pivot, "T")) { + if (_starpu_lsame_(direct, "F")) { + i__1 = *n; + for (j = 2; j <= i__1; ++j) { + ctemp = c__[j - 1]; + stemp = s[j - 1]; + if (ctemp != 1. || stemp != 0.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp = a[i__ + j * a_dim1]; + a[i__ + j * a_dim1] = ctemp * temp - stemp * a[ + i__ + a_dim1]; + a[i__ + a_dim1] = stemp * temp + ctemp * a[i__ + + a_dim1]; +/* L170: */ + } + } +/* L180: */ + } + } else if (_starpu_lsame_(direct, "B")) { + for (j = *n; j >= 2; --j) { + ctemp = c__[j - 1]; + stemp = s[j - 1]; + if (ctemp != 1. || stemp != 0.) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + temp = a[i__ + j * a_dim1]; + a[i__ + j * a_dim1] = ctemp * temp - stemp * a[ + i__ + a_dim1]; + a[i__ + a_dim1] = stemp * temp + ctemp * a[i__ + + a_dim1]; +/* L190: */ + } + } +/* L200: */ + } + } + } else if (_starpu_lsame_(pivot, "B")) { + if (_starpu_lsame_(direct, "F")) { + i__1 = *n - 1; + for (j = 1; j <= i__1; ++j) { + ctemp = c__[j]; + stemp = s[j]; + if (ctemp != 1. || stemp != 0.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp = a[i__ + j * a_dim1]; + a[i__ + j * a_dim1] = stemp * a[i__ + *n * a_dim1] + + ctemp * temp; + a[i__ + *n * a_dim1] = ctemp * a[i__ + *n * + a_dim1] - stemp * temp; +/* L210: */ + } + } +/* L220: */ + } + } else if (_starpu_lsame_(direct, "B")) { + for (j = *n - 1; j >= 1; --j) { + ctemp = c__[j]; + stemp = s[j]; + if (ctemp != 1. || stemp != 0.) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + temp = a[i__ + j * a_dim1]; + a[i__ + j * a_dim1] = stemp * a[i__ + *n * a_dim1] + + ctemp * temp; + a[i__ + *n * a_dim1] = ctemp * a[i__ + *n * + a_dim1] - stemp * temp; +/* L230: */ + } + } +/* L240: */ + } + } + } + } + + return 0; + +/* End of DLASR */ + +} /* _starpu_dlasr_ */ diff --git a/min-dgels/base/SRC/dlasrt.c b/min-dgels/base/SRC/dlasrt.c new file mode 100644 index 0000000..e136b54 --- /dev/null +++ b/min-dgels/base/SRC/dlasrt.c @@ -0,0 +1,286 @@ +/* dlasrt.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dlasrt_(char *id, integer *n, doublereal *d__, integer * + info) +{ + /* System generated locals */ + integer i__1, i__2; + + /* Local variables */ + integer i__, j; + doublereal d1, d2, d3; + integer dir; + doublereal tmp; + integer endd; + extern logical _starpu_lsame_(char *, char *); + integer stack[64] /* was [2][32] */; + doublereal dmnmx; + integer start; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + integer stkpnt; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* Sort the numbers in D in increasing order (if ID = 'I') or */ +/* in decreasing order (if ID = 'D' ). */ + +/* Use Quick Sort, reverting to Insertion sort on arrays of */ +/* size <= 20. Dimension of STACK limits N to about 2**32. */ + +/* Arguments */ +/* ========= */ + +/* ID (input) CHARACTER*1 */ +/* = 'I': sort D in increasing order; */ +/* = 'D': sort D in decreasing order. */ + +/* N (input) INTEGER */ +/* The length of the array D. */ + +/* D (input/output) DOUBLE PRECISION array, dimension (N) */ +/* On entry, the array to be sorted. */ +/* On exit, D has been sorted into increasing order */ +/* (D(1) <= ... <= D(N) ) or into decreasing order */ +/* (D(1) >= ... >= D(N) ), depending on ID. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input paramters. */ + + /* Parameter adjustments */ + --d__; + + /* Function Body */ + *info = 0; + dir = -1; + if (_starpu_lsame_(id, "D")) { + dir = 0; + } else if (_starpu_lsame_(id, "I")) { + dir = 1; + } + if (dir == -1) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DLASRT", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n <= 1) { + return 0; + } + + stkpnt = 1; + stack[0] = 1; + stack[1] = *n; +L10: + start = stack[(stkpnt << 1) - 2]; + endd = stack[(stkpnt << 1) - 1]; + --stkpnt; + if (endd - start <= 20 && endd - start > 0) { + +/* Do Insertion sort on D( START:ENDD ) */ + + if (dir == 0) { + +/* Sort into decreasing order */ + + i__1 = endd; + for (i__ = start + 1; i__ <= i__1; ++i__) { + i__2 = start + 1; + for (j = i__; j >= i__2; --j) { + if (d__[j] > d__[j - 1]) { + dmnmx = d__[j]; + d__[j] = d__[j - 1]; + d__[j - 1] = dmnmx; + } else { + goto L30; + } +/* L20: */ + } +L30: + ; + } + + } else { + +/* Sort into increasing order */ + + i__1 = endd; + for (i__ = start + 1; i__ <= i__1; ++i__) { + i__2 = start + 1; + for (j = i__; j >= i__2; --j) { + if (d__[j] < d__[j - 1]) { + dmnmx = d__[j]; + d__[j] = d__[j - 1]; + d__[j - 1] = dmnmx; + } else { + goto L50; + } +/* L40: */ + } +L50: + ; + } + + } + + } else if (endd - start > 20) { + +/* Partition D( START:ENDD ) and stack parts, largest one first */ + +/* Choose partition entry as median of 3 */ + + d1 = d__[start]; + d2 = d__[endd]; + i__ = (start + endd) / 2; + d3 = d__[i__]; + if (d1 < d2) { + if (d3 < d1) { + dmnmx = d1; + } else if (d3 < d2) { + dmnmx = d3; + } else { + dmnmx = d2; + } + } else { + if (d3 < d2) { + dmnmx = d2; + } else if (d3 < d1) { + dmnmx = d3; + } else { + dmnmx = d1; + } + } + + if (dir == 0) { + +/* Sort into decreasing order */ + + i__ = start - 1; + j = endd + 1; +L60: +L70: + --j; + if (d__[j] < dmnmx) { + goto L70; + } +L80: + ++i__; + if (d__[i__] > dmnmx) { + goto L80; + } + if (i__ < j) { + tmp = d__[i__]; + d__[i__] = d__[j]; + d__[j] = tmp; + goto L60; + } + if (j - start > endd - j - 1) { + ++stkpnt; + stack[(stkpnt << 1) - 2] = start; + stack[(stkpnt << 1) - 1] = j; + ++stkpnt; + stack[(stkpnt << 1) - 2] = j + 1; + stack[(stkpnt << 1) - 1] = endd; + } else { + ++stkpnt; + stack[(stkpnt << 1) - 2] = j + 1; + stack[(stkpnt << 1) - 1] = endd; + ++stkpnt; + stack[(stkpnt << 1) - 2] = start; + stack[(stkpnt << 1) - 1] = j; + } + } else { + +/* Sort into increasing order */ + + i__ = start - 1; + j = endd + 1; +L90: +L100: + --j; + if (d__[j] > dmnmx) { + goto L100; + } +L110: + ++i__; + if (d__[i__] < dmnmx) { + goto L110; + } + if (i__ < j) { + tmp = d__[i__]; + d__[i__] = d__[j]; + d__[j] = tmp; + goto L90; + } + if (j - start > endd - j - 1) { + ++stkpnt; + stack[(stkpnt << 1) - 2] = start; + stack[(stkpnt << 1) - 1] = j; + ++stkpnt; + stack[(stkpnt << 1) - 2] = j + 1; + stack[(stkpnt << 1) - 1] = endd; + } else { + ++stkpnt; + stack[(stkpnt << 1) - 2] = j + 1; + stack[(stkpnt << 1) - 1] = endd; + ++stkpnt; + stack[(stkpnt << 1) - 2] = start; + stack[(stkpnt << 1) - 1] = j; + } + } + } + if (stkpnt > 0) { + goto L10; + } + return 0; + +/* End of DLASRT */ + +} /* _starpu_dlasrt_ */ diff --git a/min-dgels/base/SRC/dlassq.c b/min-dgels/base/SRC/dlassq.c new file mode 100644 index 0000000..34baa3d --- /dev/null +++ b/min-dgels/base/SRC/dlassq.c @@ -0,0 +1,116 @@ +/* dlassq.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dlassq_(integer *n, doublereal *x, integer *incx, + doublereal *scale, doublereal *sumsq) +{ + /* System generated locals */ + integer i__1, i__2; + doublereal d__1; + + /* Local variables */ + integer ix; + doublereal absxi; + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLASSQ returns the values scl and smsq such that */ + +/* ( scl**2 )*smsq = x( 1 )**2 +...+ x( n )**2 + ( scale**2 )*sumsq, */ + +/* where x( i ) = X( 1 + ( i - 1 )*INCX ). The value of sumsq is */ +/* assumed to be non-negative and scl returns the value */ + +/* scl = max( scale, abs( x( i ) ) ). */ + +/* scale and sumsq must be supplied in SCALE and SUMSQ and */ +/* scl and smsq are overwritten on SCALE and SUMSQ respectively. */ + +/* The routine makes only one pass through the vector x. */ + +/* Arguments */ +/* ========= */ + +/* N (input) INTEGER */ +/* The number of elements to be used from the vector X. */ + +/* X (input) DOUBLE PRECISION array, dimension (N) */ +/* The vector for which a scaled sum of squares is computed. */ +/* x( i ) = X( 1 + ( i - 1 )*INCX ), 1 <= i <= n. */ + +/* INCX (input) INTEGER */ +/* The increment between successive values of the vector X. */ +/* INCX > 0. */ + +/* SCALE (input/output) DOUBLE PRECISION */ +/* On entry, the value scale in the equation above. */ +/* On exit, SCALE is overwritten with scl , the scaling factor */ +/* for the sum of squares. */ + +/* SUMSQ (input/output) DOUBLE PRECISION */ +/* On entry, the value sumsq in the equation above. */ +/* On exit, SUMSQ is overwritten with smsq , the basic sum of */ +/* squares from which scl has been factored out. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + --x; + + /* Function Body */ + if (*n > 0) { + i__1 = (*n - 1) * *incx + 1; + i__2 = *incx; + for (ix = 1; i__2 < 0 ? ix >= i__1 : ix <= i__1; ix += i__2) { + if (x[ix] != 0.) { + absxi = (d__1 = x[ix], abs(d__1)); + if (*scale < absxi) { +/* Computing 2nd power */ + d__1 = *scale / absxi; + *sumsq = *sumsq * (d__1 * d__1) + 1; + *scale = absxi; + } else { +/* Computing 2nd power */ + d__1 = absxi / *scale; + *sumsq += d__1 * d__1; + } + } +/* L10: */ + } + } + return 0; + +/* End of DLASSQ */ + +} /* _starpu_dlassq_ */ diff --git a/min-dgels/base/SRC/dlasv2.c b/min-dgels/base/SRC/dlasv2.c new file mode 100644 index 0000000..09d61b4 --- /dev/null +++ b/min-dgels/base/SRC/dlasv2.c @@ -0,0 +1,274 @@ +/* dlasv2.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static doublereal c_b3 = 2.; +static doublereal c_b4 = 1.; + +/* Subroutine */ int _starpu_dlasv2_(doublereal *f, doublereal *g, doublereal *h__, + doublereal *ssmin, doublereal *ssmax, doublereal *snr, doublereal * + csr, doublereal *snl, doublereal *csl) +{ + /* System generated locals */ + doublereal d__1; + + /* Builtin functions */ + double sqrt(doublereal), d_sign(doublereal *, doublereal *); + + /* Local variables */ + doublereal a, d__, l, m, r__, s, t, fa, ga, ha, ft, gt, ht, mm, tt, clt, + crt, slt, srt; + integer pmax; + doublereal temp; + logical swap; + doublereal tsign; + extern doublereal _starpu_dlamch_(char *); + logical gasmal; + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLASV2 computes the singular value decomposition of a 2-by-2 */ +/* triangular matrix */ +/* [ F G ] */ +/* [ 0 H ]. */ +/* On return, abs(SSMAX) is the larger singular value, abs(SSMIN) is the */ +/* smaller singular value, and (CSL,SNL) and (CSR,SNR) are the left and */ +/* right singular vectors for abs(SSMAX), giving the decomposition */ + +/* [ CSL SNL ] [ F G ] [ CSR -SNR ] = [ SSMAX 0 ] */ +/* [-SNL CSL ] [ 0 H ] [ SNR CSR ] [ 0 SSMIN ]. */ + +/* Arguments */ +/* ========= */ + +/* F (input) DOUBLE PRECISION */ +/* The (1,1) element of the 2-by-2 matrix. */ + +/* G (input) DOUBLE PRECISION */ +/* The (1,2) element of the 2-by-2 matrix. */ + +/* H (input) DOUBLE PRECISION */ +/* The (2,2) element of the 2-by-2 matrix. */ + +/* SSMIN (output) DOUBLE PRECISION */ +/* abs(SSMIN) is the smaller singular value. */ + +/* SSMAX (output) DOUBLE PRECISION */ +/* abs(SSMAX) is the larger singular value. */ + +/* SNL (output) DOUBLE PRECISION */ +/* CSL (output) DOUBLE PRECISION */ +/* The vector (CSL, SNL) is a unit left singular vector for the */ +/* singular value abs(SSMAX). */ + +/* SNR (output) DOUBLE PRECISION */ +/* CSR (output) DOUBLE PRECISION */ +/* The vector (CSR, SNR) is a unit right singular vector for the */ +/* singular value abs(SSMAX). */ + +/* Further Details */ +/* =============== */ + +/* Any input parameter may be aliased with any output parameter. */ + +/* Barring over/underflow and assuming a guard digit in subtraction, all */ +/* output quantities are correct to within a few units in the last */ +/* place (ulps). */ + +/* In IEEE arithmetic, the code works correctly if one matrix element is */ +/* infinite. */ + +/* Overflow will not occur unless the largest singular value itself */ +/* overflows or is within a few ulps of overflow. (On machines with */ +/* partial overflow, like the Cray, overflow may occur if the largest */ +/* singular value is within a factor of 2 of overflow.) */ + +/* Underflow is harmless if underflow is gradual. Otherwise, results */ +/* may correspond to a matrix modified by perturbations of size near */ +/* the underflow threshold. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + ft = *f; + fa = abs(ft); + ht = *h__; + ha = abs(*h__); + +/* PMAX points to the maximum absolute element of matrix */ +/* PMAX = 1 if F largest in absolute values */ +/* PMAX = 2 if G largest in absolute values */ +/* PMAX = 3 if H largest in absolute values */ + + pmax = 1; + swap = ha > fa; + if (swap) { + pmax = 3; + temp = ft; + ft = ht; + ht = temp; + temp = fa; + fa = ha; + ha = temp; + +/* Now FA .ge. HA */ + + } + gt = *g; + ga = abs(gt); + if (ga == 0.) { + +/* Diagonal matrix */ + + *ssmin = ha; + *ssmax = fa; + clt = 1.; + crt = 1.; + slt = 0.; + srt = 0.; + } else { + gasmal = TRUE_; + if (ga > fa) { + pmax = 2; + if (fa / ga < _starpu_dlamch_("EPS")) { + +/* Case of very large GA */ + + gasmal = FALSE_; + *ssmax = ga; + if (ha > 1.) { + *ssmin = fa / (ga / ha); + } else { + *ssmin = fa / ga * ha; + } + clt = 1.; + slt = ht / gt; + srt = 1.; + crt = ft / gt; + } + } + if (gasmal) { + +/* Normal case */ + + d__ = fa - ha; + if (d__ == fa) { + +/* Copes with infinite F or H */ + + l = 1.; + } else { + l = d__ / fa; + } + +/* Note that 0 .le. L .le. 1 */ + + m = gt / ft; + +/* Note that abs(M) .le. 1/macheps */ + + t = 2. - l; + +/* Note that T .ge. 1 */ + + mm = m * m; + tt = t * t; + s = sqrt(tt + mm); + +/* Note that 1 .le. S .le. 1 + 1/macheps */ + + if (l == 0.) { + r__ = abs(m); + } else { + r__ = sqrt(l * l + mm); + } + +/* Note that 0 .le. R .le. 1 + 1/macheps */ + + a = (s + r__) * .5; + +/* Note that 1 .le. A .le. 1 + abs(M) */ + + *ssmin = ha / a; + *ssmax = fa * a; + if (mm == 0.) { + +/* Note that M is very tiny */ + + if (l == 0.) { + t = d_sign(&c_b3, &ft) * d_sign(&c_b4, >); + } else { + t = gt / d_sign(&d__, &ft) + m / t; + } + } else { + t = (m / (s + t) + m / (r__ + l)) * (a + 1.); + } + l = sqrt(t * t + 4.); + crt = 2. / l; + srt = t / l; + clt = (crt + srt * m) / a; + slt = ht / ft * srt / a; + } + } + if (swap) { + *csl = srt; + *snl = crt; + *csr = slt; + *snr = clt; + } else { + *csl = clt; + *snl = slt; + *csr = crt; + *snr = srt; + } + +/* Correct signs of SSMAX and SSMIN */ + + if (pmax == 1) { + tsign = d_sign(&c_b4, csr) * d_sign(&c_b4, csl) * d_sign(&c_b4, f); + } + if (pmax == 2) { + tsign = d_sign(&c_b4, snr) * d_sign(&c_b4, csl) * d_sign(&c_b4, g); + } + if (pmax == 3) { + tsign = d_sign(&c_b4, snr) * d_sign(&c_b4, snl) * d_sign(&c_b4, h__); + } + *ssmax = d_sign(ssmax, &tsign); + d__1 = tsign * d_sign(&c_b4, f) * d_sign(&c_b4, h__); + *ssmin = d_sign(ssmin, &d__1); + return 0; + +/* End of DLASV2 */ + +} /* _starpu_dlasv2_ */ diff --git a/min-dgels/base/SRC/dlaswp.c b/min-dgels/base/SRC/dlaswp.c new file mode 100644 index 0000000..4ca9c18 --- /dev/null +++ b/min-dgels/base/SRC/dlaswp.c @@ -0,0 +1,158 @@ +/* dlaswp.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dlaswp_(integer *n, doublereal *a, integer *lda, integer + *k1, integer *k2, integer *ipiv, integer *incx) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + integer i__, j, k, i1, i2, n32, ip, ix, ix0, inc; + doublereal temp; + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLASWP performs a series of row interchanges on the matrix A. */ +/* One row interchange is initiated for each of rows K1 through K2 of A. */ + +/* Arguments */ +/* ========= */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix A. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the matrix of column dimension N to which the row */ +/* interchanges will be applied. */ +/* On exit, the permuted matrix. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. */ + +/* K1 (input) INTEGER */ +/* The first element of IPIV for which a row interchange will */ +/* be done. */ + +/* K2 (input) INTEGER */ +/* The last element of IPIV for which a row interchange will */ +/* be done. */ + +/* IPIV (input) INTEGER array, dimension (K2*abs(INCX)) */ +/* The vector of pivot indices. Only the elements in positions */ +/* K1 through K2 of IPIV are accessed. */ +/* IPIV(K) = L implies rows K and L are to be interchanged. */ + +/* INCX (input) INTEGER */ +/* The increment between successive values of IPIV. If IPIV */ +/* is negative, the pivots are applied in reverse order. */ + +/* Further Details */ +/* =============== */ + +/* Modified by */ +/* R. C. Whaley, Computer Science Dept., Univ. of Tenn., Knoxville, USA */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Interchange row I with row IPIV(I) for each of rows K1 through K2. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + + /* Function Body */ + if (*incx > 0) { + ix0 = *k1; + i1 = *k1; + i2 = *k2; + inc = 1; + } else if (*incx < 0) { + ix0 = (1 - *k2) * *incx + 1; + i1 = *k2; + i2 = *k1; + inc = -1; + } else { + return 0; + } + + n32 = *n / 32 << 5; + if (n32 != 0) { + i__1 = n32; + for (j = 1; j <= i__1; j += 32) { + ix = ix0; + i__2 = i2; + i__3 = inc; + for (i__ = i1; i__3 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__3) + { + ip = ipiv[ix]; + if (ip != i__) { + i__4 = j + 31; + for (k = j; k <= i__4; ++k) { + temp = a[i__ + k * a_dim1]; + a[i__ + k * a_dim1] = a[ip + k * a_dim1]; + a[ip + k * a_dim1] = temp; +/* L10: */ + } + } + ix += *incx; +/* L20: */ + } +/* L30: */ + } + } + if (n32 != *n) { + ++n32; + ix = ix0; + i__1 = i2; + i__3 = inc; + for (i__ = i1; i__3 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__3) { + ip = ipiv[ix]; + if (ip != i__) { + i__2 = *n; + for (k = n32; k <= i__2; ++k) { + temp = a[i__ + k * a_dim1]; + a[i__ + k * a_dim1] = a[ip + k * a_dim1]; + a[ip + k * a_dim1] = temp; +/* L40: */ + } + } + ix += *incx; +/* L50: */ + } + } + + return 0; + +/* End of DLASWP */ + +} /* _starpu_dlaswp_ */ diff --git a/min-dgels/base/SRC/dlasy2.c b/min-dgels/base/SRC/dlasy2.c new file mode 100644 index 0000000..97bd187 --- /dev/null +++ b/min-dgels/base/SRC/dlasy2.c @@ -0,0 +1,478 @@ +/* dlasy2.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__4 = 4; +static integer c__1 = 1; +static integer c__16 = 16; +static integer c__0 = 0; + +/* Subroutine */ int _starpu_dlasy2_(logical *ltranl, logical *ltranr, integer *isgn, + integer *n1, integer *n2, doublereal *tl, integer *ldtl, doublereal * + tr, integer *ldtr, doublereal *b, integer *ldb, doublereal *scale, + doublereal *x, integer *ldx, doublereal *xnorm, integer *info) +{ + /* Initialized data */ + + static integer locu12[4] = { 3,4,1,2 }; + static integer locl21[4] = { 2,1,4,3 }; + static integer locu22[4] = { 4,3,2,1 }; + static logical xswpiv[4] = { FALSE_,FALSE_,TRUE_,TRUE_ }; + static logical bswpiv[4] = { FALSE_,TRUE_,FALSE_,TRUE_ }; + + /* System generated locals */ + integer b_dim1, b_offset, tl_dim1, tl_offset, tr_dim1, tr_offset, x_dim1, + x_offset; + doublereal d__1, d__2, d__3, d__4, d__5, d__6, d__7, d__8; + + /* Local variables */ + integer i__, j, k; + doublereal x2[2], l21, u11, u12; + integer ip, jp; + doublereal u22, t16[16] /* was [4][4] */, gam, bet, eps, sgn, tmp[4], + tau1, btmp[4], smin; + integer ipiv; + doublereal temp; + integer jpiv[4]; + doublereal xmax; + integer ipsv, jpsv; + logical bswap; + extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *), _starpu_dswap_(integer *, doublereal *, integer + *, doublereal *, integer *); + logical xswap; + extern doublereal _starpu_dlamch_(char *); + extern integer _starpu_idamax_(integer *, doublereal *, integer *); + doublereal smlnum; + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLASY2 solves for the N1 by N2 matrix X, 1 <= N1,N2 <= 2, in */ + +/* op(TL)*X + ISGN*X*op(TR) = SCALE*B, */ + +/* where TL is N1 by N1, TR is N2 by N2, B is N1 by N2, and ISGN = 1 or */ +/* -1. op(T) = T or T', where T' denotes the transpose of T. */ + +/* Arguments */ +/* ========= */ + +/* LTRANL (input) LOGICAL */ +/* On entry, LTRANL specifies the op(TL): */ +/* = .FALSE., op(TL) = TL, */ +/* = .TRUE., op(TL) = TL'. */ + +/* LTRANR (input) LOGICAL */ +/* On entry, LTRANR specifies the op(TR): */ +/* = .FALSE., op(TR) = TR, */ +/* = .TRUE., op(TR) = TR'. */ + +/* ISGN (input) INTEGER */ +/* On entry, ISGN specifies the sign of the equation */ +/* as described before. ISGN may only be 1 or -1. */ + +/* N1 (input) INTEGER */ +/* On entry, N1 specifies the order of matrix TL. */ +/* N1 may only be 0, 1 or 2. */ + +/* N2 (input) INTEGER */ +/* On entry, N2 specifies the order of matrix TR. */ +/* N2 may only be 0, 1 or 2. */ + +/* TL (input) DOUBLE PRECISION array, dimension (LDTL,2) */ +/* On entry, TL contains an N1 by N1 matrix. */ + +/* LDTL (input) INTEGER */ +/* The leading dimension of the matrix TL. LDTL >= max(1,N1). */ + +/* TR (input) DOUBLE PRECISION array, dimension (LDTR,2) */ +/* On entry, TR contains an N2 by N2 matrix. */ + +/* LDTR (input) INTEGER */ +/* The leading dimension of the matrix TR. LDTR >= max(1,N2). */ + +/* B (input) DOUBLE PRECISION array, dimension (LDB,2) */ +/* On entry, the N1 by N2 matrix B contains the right-hand */ +/* side of the equation. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the matrix B. LDB >= max(1,N1). */ + +/* SCALE (output) DOUBLE PRECISION */ +/* On exit, SCALE contains the scale factor. SCALE is chosen */ +/* less than or equal to 1 to prevent the solution overflowing. */ + +/* X (output) DOUBLE PRECISION array, dimension (LDX,2) */ +/* On exit, X contains the N1 by N2 solution. */ + +/* LDX (input) INTEGER */ +/* The leading dimension of the matrix X. LDX >= max(1,N1). */ + +/* XNORM (output) DOUBLE PRECISION */ +/* On exit, XNORM is the infinity-norm of the solution. */ + +/* INFO (output) INTEGER */ +/* On exit, INFO is set to */ +/* 0: successful exit. */ +/* 1: TL and TR have too close eigenvalues, so TL or */ +/* TR is perturbed to get a nonsingular equation. */ +/* NOTE: In the interests of speed, this routine does not */ +/* check the inputs for errors. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Data statements .. */ + /* Parameter adjustments */ + tl_dim1 = *ldtl; + tl_offset = 1 + tl_dim1; + tl -= tl_offset; + tr_dim1 = *ldtr; + tr_offset = 1 + tr_dim1; + tr -= tr_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + x_dim1 = *ldx; + x_offset = 1 + x_dim1; + x -= x_offset; + + /* Function Body */ +/* .. */ +/* .. Executable Statements .. */ + +/* Do not check the input parameters for errors */ + + *info = 0; + +/* Quick return if possible */ + + if (*n1 == 0 || *n2 == 0) { + return 0; + } + +/* Set constants to control overflow */ + + eps = _starpu_dlamch_("P"); + smlnum = _starpu_dlamch_("S") / eps; + sgn = (doublereal) (*isgn); + + k = *n1 + *n1 + *n2 - 2; + switch (k) { + case 1: goto L10; + case 2: goto L20; + case 3: goto L30; + case 4: goto L50; + } + +/* 1 by 1: TL11*X + SGN*X*TR11 = B11 */ + +L10: + tau1 = tl[tl_dim1 + 1] + sgn * tr[tr_dim1 + 1]; + bet = abs(tau1); + if (bet <= smlnum) { + tau1 = smlnum; + bet = smlnum; + *info = 1; + } + + *scale = 1.; + gam = (d__1 = b[b_dim1 + 1], abs(d__1)); + if (smlnum * gam > bet) { + *scale = 1. / gam; + } + + x[x_dim1 + 1] = b[b_dim1 + 1] * *scale / tau1; + *xnorm = (d__1 = x[x_dim1 + 1], abs(d__1)); + return 0; + +/* 1 by 2: */ +/* TL11*[X11 X12] + ISGN*[X11 X12]*op[TR11 TR12] = [B11 B12] */ +/* [TR21 TR22] */ + +L20: + +/* Computing MAX */ +/* Computing MAX */ + d__7 = (d__1 = tl[tl_dim1 + 1], abs(d__1)), d__8 = (d__2 = tr[tr_dim1 + 1] + , abs(d__2)), d__7 = max(d__7,d__8), d__8 = (d__3 = tr[(tr_dim1 << + 1) + 1], abs(d__3)), d__7 = max(d__7,d__8), d__8 = (d__4 = tr[ + tr_dim1 + 2], abs(d__4)), d__7 = max(d__7,d__8), d__8 = (d__5 = + tr[(tr_dim1 << 1) + 2], abs(d__5)); + d__6 = eps * max(d__7,d__8); + smin = max(d__6,smlnum); + tmp[0] = tl[tl_dim1 + 1] + sgn * tr[tr_dim1 + 1]; + tmp[3] = tl[tl_dim1 + 1] + sgn * tr[(tr_dim1 << 1) + 2]; + if (*ltranr) { + tmp[1] = sgn * tr[tr_dim1 + 2]; + tmp[2] = sgn * tr[(tr_dim1 << 1) + 1]; + } else { + tmp[1] = sgn * tr[(tr_dim1 << 1) + 1]; + tmp[2] = sgn * tr[tr_dim1 + 2]; + } + btmp[0] = b[b_dim1 + 1]; + btmp[1] = b[(b_dim1 << 1) + 1]; + goto L40; + +/* 2 by 1: */ +/* op[TL11 TL12]*[X11] + ISGN* [X11]*TR11 = [B11] */ +/* [TL21 TL22] [X21] [X21] [B21] */ + +L30: +/* Computing MAX */ +/* Computing MAX */ + d__7 = (d__1 = tr[tr_dim1 + 1], abs(d__1)), d__8 = (d__2 = tl[tl_dim1 + 1] + , abs(d__2)), d__7 = max(d__7,d__8), d__8 = (d__3 = tl[(tl_dim1 << + 1) + 1], abs(d__3)), d__7 = max(d__7,d__8), d__8 = (d__4 = tl[ + tl_dim1 + 2], abs(d__4)), d__7 = max(d__7,d__8), d__8 = (d__5 = + tl[(tl_dim1 << 1) + 2], abs(d__5)); + d__6 = eps * max(d__7,d__8); + smin = max(d__6,smlnum); + tmp[0] = tl[tl_dim1 + 1] + sgn * tr[tr_dim1 + 1]; + tmp[3] = tl[(tl_dim1 << 1) + 2] + sgn * tr[tr_dim1 + 1]; + if (*ltranl) { + tmp[1] = tl[(tl_dim1 << 1) + 1]; + tmp[2] = tl[tl_dim1 + 2]; + } else { + tmp[1] = tl[tl_dim1 + 2]; + tmp[2] = tl[(tl_dim1 << 1) + 1]; + } + btmp[0] = b[b_dim1 + 1]; + btmp[1] = b[b_dim1 + 2]; +L40: + +/* Solve 2 by 2 system using complete pivoting. */ +/* Set pivots less than SMIN to SMIN. */ + + ipiv = _starpu_idamax_(&c__4, tmp, &c__1); + u11 = tmp[ipiv - 1]; + if (abs(u11) <= smin) { + *info = 1; + u11 = smin; + } + u12 = tmp[locu12[ipiv - 1] - 1]; + l21 = tmp[locl21[ipiv - 1] - 1] / u11; + u22 = tmp[locu22[ipiv - 1] - 1] - u12 * l21; + xswap = xswpiv[ipiv - 1]; + bswap = bswpiv[ipiv - 1]; + if (abs(u22) <= smin) { + *info = 1; + u22 = smin; + } + if (bswap) { + temp = btmp[1]; + btmp[1] = btmp[0] - l21 * temp; + btmp[0] = temp; + } else { + btmp[1] -= l21 * btmp[0]; + } + *scale = 1.; + if (smlnum * 2. * abs(btmp[1]) > abs(u22) || smlnum * 2. * abs(btmp[0]) > + abs(u11)) { +/* Computing MAX */ + d__1 = abs(btmp[0]), d__2 = abs(btmp[1]); + *scale = .5 / max(d__1,d__2); + btmp[0] *= *scale; + btmp[1] *= *scale; + } + x2[1] = btmp[1] / u22; + x2[0] = btmp[0] / u11 - u12 / u11 * x2[1]; + if (xswap) { + temp = x2[1]; + x2[1] = x2[0]; + x2[0] = temp; + } + x[x_dim1 + 1] = x2[0]; + if (*n1 == 1) { + x[(x_dim1 << 1) + 1] = x2[1]; + *xnorm = (d__1 = x[x_dim1 + 1], abs(d__1)) + (d__2 = x[(x_dim1 << 1) + + 1], abs(d__2)); + } else { + x[x_dim1 + 2] = x2[1]; +/* Computing MAX */ + d__3 = (d__1 = x[x_dim1 + 1], abs(d__1)), d__4 = (d__2 = x[x_dim1 + 2] + , abs(d__2)); + *xnorm = max(d__3,d__4); + } + return 0; + +/* 2 by 2: */ +/* op[TL11 TL12]*[X11 X12] +ISGN* [X11 X12]*op[TR11 TR12] = [B11 B12] */ +/* [TL21 TL22] [X21 X22] [X21 X22] [TR21 TR22] [B21 B22] */ + +/* Solve equivalent 4 by 4 system using complete pivoting. */ +/* Set pivots less than SMIN to SMIN. */ + +L50: +/* Computing MAX */ + d__5 = (d__1 = tr[tr_dim1 + 1], abs(d__1)), d__6 = (d__2 = tr[(tr_dim1 << + 1) + 1], abs(d__2)), d__5 = max(d__5,d__6), d__6 = (d__3 = tr[ + tr_dim1 + 2], abs(d__3)), d__5 = max(d__5,d__6), d__6 = (d__4 = + tr[(tr_dim1 << 1) + 2], abs(d__4)); + smin = max(d__5,d__6); +/* Computing MAX */ + d__5 = smin, d__6 = (d__1 = tl[tl_dim1 + 1], abs(d__1)), d__5 = max(d__5, + d__6), d__6 = (d__2 = tl[(tl_dim1 << 1) + 1], abs(d__2)), d__5 = + max(d__5,d__6), d__6 = (d__3 = tl[tl_dim1 + 2], abs(d__3)), d__5 = + max(d__5,d__6), d__6 = (d__4 = tl[(tl_dim1 << 1) + 2], abs(d__4)) + ; + smin = max(d__5,d__6); +/* Computing MAX */ + d__1 = eps * smin; + smin = max(d__1,smlnum); + btmp[0] = 0.; + _starpu_dcopy_(&c__16, btmp, &c__0, t16, &c__1); + t16[0] = tl[tl_dim1 + 1] + sgn * tr[tr_dim1 + 1]; + t16[5] = tl[(tl_dim1 << 1) + 2] + sgn * tr[tr_dim1 + 1]; + t16[10] = tl[tl_dim1 + 1] + sgn * tr[(tr_dim1 << 1) + 2]; + t16[15] = tl[(tl_dim1 << 1) + 2] + sgn * tr[(tr_dim1 << 1) + 2]; + if (*ltranl) { + t16[4] = tl[tl_dim1 + 2]; + t16[1] = tl[(tl_dim1 << 1) + 1]; + t16[14] = tl[tl_dim1 + 2]; + t16[11] = tl[(tl_dim1 << 1) + 1]; + } else { + t16[4] = tl[(tl_dim1 << 1) + 1]; + t16[1] = tl[tl_dim1 + 2]; + t16[14] = tl[(tl_dim1 << 1) + 1]; + t16[11] = tl[tl_dim1 + 2]; + } + if (*ltranr) { + t16[8] = sgn * tr[(tr_dim1 << 1) + 1]; + t16[13] = sgn * tr[(tr_dim1 << 1) + 1]; + t16[2] = sgn * tr[tr_dim1 + 2]; + t16[7] = sgn * tr[tr_dim1 + 2]; + } else { + t16[8] = sgn * tr[tr_dim1 + 2]; + t16[13] = sgn * tr[tr_dim1 + 2]; + t16[2] = sgn * tr[(tr_dim1 << 1) + 1]; + t16[7] = sgn * tr[(tr_dim1 << 1) + 1]; + } + btmp[0] = b[b_dim1 + 1]; + btmp[1] = b[b_dim1 + 2]; + btmp[2] = b[(b_dim1 << 1) + 1]; + btmp[3] = b[(b_dim1 << 1) + 2]; + +/* Perform elimination */ + + for (i__ = 1; i__ <= 3; ++i__) { + xmax = 0.; + for (ip = i__; ip <= 4; ++ip) { + for (jp = i__; jp <= 4; ++jp) { + if ((d__1 = t16[ip + (jp << 2) - 5], abs(d__1)) >= xmax) { + xmax = (d__1 = t16[ip + (jp << 2) - 5], abs(d__1)); + ipsv = ip; + jpsv = jp; + } +/* L60: */ + } +/* L70: */ + } + if (ipsv != i__) { + _starpu_dswap_(&c__4, &t16[ipsv - 1], &c__4, &t16[i__ - 1], &c__4); + temp = btmp[i__ - 1]; + btmp[i__ - 1] = btmp[ipsv - 1]; + btmp[ipsv - 1] = temp; + } + if (jpsv != i__) { + _starpu_dswap_(&c__4, &t16[(jpsv << 2) - 4], &c__1, &t16[(i__ << 2) - 4], + &c__1); + } + jpiv[i__ - 1] = jpsv; + if ((d__1 = t16[i__ + (i__ << 2) - 5], abs(d__1)) < smin) { + *info = 1; + t16[i__ + (i__ << 2) - 5] = smin; + } + for (j = i__ + 1; j <= 4; ++j) { + t16[j + (i__ << 2) - 5] /= t16[i__ + (i__ << 2) - 5]; + btmp[j - 1] -= t16[j + (i__ << 2) - 5] * btmp[i__ - 1]; + for (k = i__ + 1; k <= 4; ++k) { + t16[j + (k << 2) - 5] -= t16[j + (i__ << 2) - 5] * t16[i__ + ( + k << 2) - 5]; +/* L80: */ + } +/* L90: */ + } +/* L100: */ + } + if (abs(t16[15]) < smin) { + t16[15] = smin; + } + *scale = 1.; + if (smlnum * 8. * abs(btmp[0]) > abs(t16[0]) || smlnum * 8. * abs(btmp[1]) + > abs(t16[5]) || smlnum * 8. * abs(btmp[2]) > abs(t16[10]) || + smlnum * 8. * abs(btmp[3]) > abs(t16[15])) { +/* Computing MAX */ + d__1 = abs(btmp[0]), d__2 = abs(btmp[1]), d__1 = max(d__1,d__2), d__2 + = abs(btmp[2]), d__1 = max(d__1,d__2), d__2 = abs(btmp[3]); + *scale = .125 / max(d__1,d__2); + btmp[0] *= *scale; + btmp[1] *= *scale; + btmp[2] *= *scale; + btmp[3] *= *scale; + } + for (i__ = 1; i__ <= 4; ++i__) { + k = 5 - i__; + temp = 1. / t16[k + (k << 2) - 5]; + tmp[k - 1] = btmp[k - 1] * temp; + for (j = k + 1; j <= 4; ++j) { + tmp[k - 1] -= temp * t16[k + (j << 2) - 5] * tmp[j - 1]; +/* L110: */ + } +/* L120: */ + } + for (i__ = 1; i__ <= 3; ++i__) { + if (jpiv[4 - i__ - 1] != 4 - i__) { + temp = tmp[4 - i__ - 1]; + tmp[4 - i__ - 1] = tmp[jpiv[4 - i__ - 1] - 1]; + tmp[jpiv[4 - i__ - 1] - 1] = temp; + } +/* L130: */ + } + x[x_dim1 + 1] = tmp[0]; + x[x_dim1 + 2] = tmp[1]; + x[(x_dim1 << 1) + 1] = tmp[2]; + x[(x_dim1 << 1) + 2] = tmp[3]; +/* Computing MAX */ + d__1 = abs(tmp[0]) + abs(tmp[2]), d__2 = abs(tmp[1]) + abs(tmp[3]); + *xnorm = max(d__1,d__2); + return 0; + +/* End of DLASY2 */ + +} /* _starpu_dlasy2_ */ diff --git a/min-dgels/base/SRC/dlasyf.c b/min-dgels/base/SRC/dlasyf.c new file mode 100644 index 0000000..c5b20e8 --- /dev/null +++ b/min-dgels/base/SRC/dlasyf.c @@ -0,0 +1,721 @@ +/* dlasyf.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static doublereal c_b8 = -1.; +static doublereal c_b9 = 1.; + +/* Subroutine */ int _starpu_dlasyf_(char *uplo, integer *n, integer *nb, integer *kb, + doublereal *a, integer *lda, integer *ipiv, doublereal *w, integer * + ldw, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, w_dim1, w_offset, i__1, i__2, i__3, i__4, i__5; + doublereal d__1, d__2, d__3; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + integer j, k; + doublereal t, r1, d11, d21, d22; + integer jb, jj, kk, jp, kp, kw, kkw, imax, jmax; + doublereal alpha; + extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, + integer *), _starpu_dgemm_(char *, char *, integer *, integer *, integer * +, doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *); + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int _starpu_dgemv_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *), _starpu_dcopy_(integer *, + doublereal *, integer *, doublereal *, integer *), _starpu_dswap_(integer + *, doublereal *, integer *, doublereal *, integer *); + integer kstep; + doublereal absakk; + extern integer _starpu_idamax_(integer *, doublereal *, integer *); + doublereal colmax, rowmax; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLASYF computes a partial factorization of a real symmetric matrix A */ +/* using the Bunch-Kaufman diagonal pivoting method. The partial */ +/* factorization has the form: */ + +/* A = ( I U12 ) ( A11 0 ) ( I 0 ) if UPLO = 'U', or: */ +/* ( 0 U22 ) ( 0 D ) ( U12' U22' ) */ + +/* A = ( L11 0 ) ( D 0 ) ( L11' L21' ) if UPLO = 'L' */ +/* ( L21 I ) ( 0 A22 ) ( 0 I ) */ + +/* where the order of D is at most NB. The actual order is returned in */ +/* the argument KB, and is either NB or NB-1, or N if N <= NB. */ + +/* DLASYF is an auxiliary routine called by DSYTRF. It uses blocked code */ +/* (calling Level 3 BLAS) to update the submatrix A11 (if UPLO = 'U') or */ +/* A22 (if UPLO = 'L'). */ + +/* Arguments */ +/* ========= */ + +/* UPLO (input) CHARACTER*1 */ +/* Specifies whether the upper or lower triangular part of the */ +/* symmetric matrix A is stored: */ +/* = 'U': Upper triangular */ +/* = 'L': Lower triangular */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* NB (input) INTEGER */ +/* The maximum number of columns of the matrix A that should be */ +/* factored. NB should be at least 2 to allow for 2-by-2 pivot */ +/* blocks. */ + +/* KB (output) INTEGER */ +/* The number of columns of A that were actually factored. */ +/* KB is either NB-1 or NB, or N if N <= NB. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the symmetric matrix A. If UPLO = 'U', the leading */ +/* n-by-n upper triangular part of A contains the upper */ +/* triangular part of the matrix A, and the strictly lower */ +/* triangular part of A is not referenced. If UPLO = 'L', the */ +/* leading n-by-n lower triangular part of A contains the lower */ +/* triangular part of the matrix A, and the strictly upper */ +/* triangular part of A is not referenced. */ +/* On exit, A contains details of the partial factorization. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,N). */ + +/* IPIV (output) INTEGER array, dimension (N) */ +/* Details of the interchanges and the block structure of D. */ +/* If UPLO = 'U', only the last KB elements of IPIV are set; */ +/* if UPLO = 'L', only the first KB elements are set. */ + +/* If IPIV(k) > 0, then rows and columns k and IPIV(k) were */ +/* interchanged and D(k,k) is a 1-by-1 diagonal block. */ +/* If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and */ +/* columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) */ +/* is a 2-by-2 diagonal block. If UPLO = 'L' and IPIV(k) = */ +/* IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were */ +/* interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. */ + +/* W (workspace) DOUBLE PRECISION array, dimension (LDW,NB) */ + +/* LDW (input) INTEGER */ +/* The leading dimension of the array W. LDW >= max(1,N). */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* > 0: if INFO = k, D(k,k) is exactly zero. The factorization */ +/* has been completed, but the block diagonal matrix D is */ +/* exactly singular. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + w_dim1 = *ldw; + w_offset = 1 + w_dim1; + w -= w_offset; + + /* Function Body */ + *info = 0; + +/* Initialize ALPHA for use in choosing pivot block size. */ + + alpha = (sqrt(17.) + 1.) / 8.; + + if (_starpu_lsame_(uplo, "U")) { + +/* Factorize the trailing columns of A using the upper triangle */ +/* of A and working backwards, and compute the matrix W = U12*D */ +/* for use in updating A11 */ + +/* K is the main loop index, decreasing from N in steps of 1 or 2 */ + +/* KW is the column of W which corresponds to column K of A */ + + k = *n; +L10: + kw = *nb + k - *n; + +/* Exit from loop */ + + if (k <= *n - *nb + 1 && *nb < *n || k < 1) { + goto L30; + } + +/* Copy column K of A to column KW of W and update it */ + + _starpu_dcopy_(&k, &a[k * a_dim1 + 1], &c__1, &w[kw * w_dim1 + 1], &c__1); + if (k < *n) { + i__1 = *n - k; + _starpu_dgemv_("No transpose", &k, &i__1, &c_b8, &a[(k + 1) * a_dim1 + 1], + lda, &w[k + (kw + 1) * w_dim1], ldw, &c_b9, &w[kw * + w_dim1 + 1], &c__1); + } + + kstep = 1; + +/* Determine rows and columns to be interchanged and whether */ +/* a 1-by-1 or 2-by-2 pivot block will be used */ + + absakk = (d__1 = w[k + kw * w_dim1], abs(d__1)); + +/* IMAX is the row-index of the largest off-diagonal element in */ +/* column K, and COLMAX is its absolute value */ + + if (k > 1) { + i__1 = k - 1; + imax = _starpu_idamax_(&i__1, &w[kw * w_dim1 + 1], &c__1); + colmax = (d__1 = w[imax + kw * w_dim1], abs(d__1)); + } else { + colmax = 0.; + } + + if (max(absakk,colmax) == 0.) { + +/* Column K is zero: set INFO and continue */ + + if (*info == 0) { + *info = k; + } + kp = k; + } else { + if (absakk >= alpha * colmax) { + +/* no interchange, use 1-by-1 pivot block */ + + kp = k; + } else { + +/* Copy column IMAX to column KW-1 of W and update it */ + + _starpu_dcopy_(&imax, &a[imax * a_dim1 + 1], &c__1, &w[(kw - 1) * + w_dim1 + 1], &c__1); + i__1 = k - imax; + _starpu_dcopy_(&i__1, &a[imax + (imax + 1) * a_dim1], lda, &w[imax + + 1 + (kw - 1) * w_dim1], &c__1); + if (k < *n) { + i__1 = *n - k; + _starpu_dgemv_("No transpose", &k, &i__1, &c_b8, &a[(k + 1) * + a_dim1 + 1], lda, &w[imax + (kw + 1) * w_dim1], + ldw, &c_b9, &w[(kw - 1) * w_dim1 + 1], &c__1); + } + +/* JMAX is the column-index of the largest off-diagonal */ +/* element in row IMAX, and ROWMAX is its absolute value */ + + i__1 = k - imax; + jmax = imax + _starpu_idamax_(&i__1, &w[imax + 1 + (kw - 1) * w_dim1], + &c__1); + rowmax = (d__1 = w[jmax + (kw - 1) * w_dim1], abs(d__1)); + if (imax > 1) { + i__1 = imax - 1; + jmax = _starpu_idamax_(&i__1, &w[(kw - 1) * w_dim1 + 1], &c__1); +/* Computing MAX */ + d__2 = rowmax, d__3 = (d__1 = w[jmax + (kw - 1) * w_dim1], + abs(d__1)); + rowmax = max(d__2,d__3); + } + + if (absakk >= alpha * colmax * (colmax / rowmax)) { + +/* no interchange, use 1-by-1 pivot block */ + + kp = k; + } else if ((d__1 = w[imax + (kw - 1) * w_dim1], abs(d__1)) >= + alpha * rowmax) { + +/* interchange rows and columns K and IMAX, use 1-by-1 */ +/* pivot block */ + + kp = imax; + +/* copy column KW-1 of W to column KW */ + + _starpu_dcopy_(&k, &w[(kw - 1) * w_dim1 + 1], &c__1, &w[kw * + w_dim1 + 1], &c__1); + } else { + +/* interchange rows and columns K-1 and IMAX, use 2-by-2 */ +/* pivot block */ + + kp = imax; + kstep = 2; + } + } + + kk = k - kstep + 1; + kkw = *nb + kk - *n; + +/* Updated column KP is already stored in column KKW of W */ + + if (kp != kk) { + +/* Copy non-updated column KK to column KP */ + + a[kp + k * a_dim1] = a[kk + k * a_dim1]; + i__1 = k - 1 - kp; + _starpu_dcopy_(&i__1, &a[kp + 1 + kk * a_dim1], &c__1, &a[kp + (kp + + 1) * a_dim1], lda); + _starpu_dcopy_(&kp, &a[kk * a_dim1 + 1], &c__1, &a[kp * a_dim1 + 1], & + c__1); + +/* Interchange rows KK and KP in last KK columns of A and W */ + + i__1 = *n - kk + 1; + _starpu_dswap_(&i__1, &a[kk + kk * a_dim1], lda, &a[kp + kk * a_dim1], + lda); + i__1 = *n - kk + 1; + _starpu_dswap_(&i__1, &w[kk + kkw * w_dim1], ldw, &w[kp + kkw * + w_dim1], ldw); + } + + if (kstep == 1) { + +/* 1-by-1 pivot block D(k): column KW of W now holds */ + +/* W(k) = U(k)*D(k) */ + +/* where U(k) is the k-th column of U */ + +/* Store U(k) in column k of A */ + + _starpu_dcopy_(&k, &w[kw * w_dim1 + 1], &c__1, &a[k * a_dim1 + 1], & + c__1); + r1 = 1. / a[k + k * a_dim1]; + i__1 = k - 1; + _starpu_dscal_(&i__1, &r1, &a[k * a_dim1 + 1], &c__1); + } else { + +/* 2-by-2 pivot block D(k): columns KW and KW-1 of W now */ +/* hold */ + +/* ( W(k-1) W(k) ) = ( U(k-1) U(k) )*D(k) */ + +/* where U(k) and U(k-1) are the k-th and (k-1)-th columns */ +/* of U */ + + if (k > 2) { + +/* Store U(k) and U(k-1) in columns k and k-1 of A */ + + d21 = w[k - 1 + kw * w_dim1]; + d11 = w[k + kw * w_dim1] / d21; + d22 = w[k - 1 + (kw - 1) * w_dim1] / d21; + t = 1. / (d11 * d22 - 1.); + d21 = t / d21; + i__1 = k - 2; + for (j = 1; j <= i__1; ++j) { + a[j + (k - 1) * a_dim1] = d21 * (d11 * w[j + (kw - 1) + * w_dim1] - w[j + kw * w_dim1]); + a[j + k * a_dim1] = d21 * (d22 * w[j + kw * w_dim1] - + w[j + (kw - 1) * w_dim1]); +/* L20: */ + } + } + +/* Copy D(k) to A */ + + a[k - 1 + (k - 1) * a_dim1] = w[k - 1 + (kw - 1) * w_dim1]; + a[k - 1 + k * a_dim1] = w[k - 1 + kw * w_dim1]; + a[k + k * a_dim1] = w[k + kw * w_dim1]; + } + } + +/* Store details of the interchanges in IPIV */ + + if (kstep == 1) { + ipiv[k] = kp; + } else { + ipiv[k] = -kp; + ipiv[k - 1] = -kp; + } + +/* Decrease K and return to the start of the main loop */ + + k -= kstep; + goto L10; + +L30: + +/* Update the upper triangle of A11 (= A(1:k,1:k)) as */ + +/* A11 := A11 - U12*D*U12' = A11 - U12*W' */ + +/* computing blocks of NB columns at a time */ + + i__1 = -(*nb); + for (j = (k - 1) / *nb * *nb + 1; i__1 < 0 ? j >= 1 : j <= 1; j += + i__1) { +/* Computing MIN */ + i__2 = *nb, i__3 = k - j + 1; + jb = min(i__2,i__3); + +/* Update the upper triangle of the diagonal block */ + + i__2 = j + jb - 1; + for (jj = j; jj <= i__2; ++jj) { + i__3 = jj - j + 1; + i__4 = *n - k; + _starpu_dgemv_("No transpose", &i__3, &i__4, &c_b8, &a[j + (k + 1) * + a_dim1], lda, &w[jj + (kw + 1) * w_dim1], ldw, &c_b9, + &a[j + jj * a_dim1], &c__1); +/* L40: */ + } + +/* Update the rectangular superdiagonal block */ + + i__2 = j - 1; + i__3 = *n - k; + _starpu_dgemm_("No transpose", "Transpose", &i__2, &jb, &i__3, &c_b8, &a[( + k + 1) * a_dim1 + 1], lda, &w[j + (kw + 1) * w_dim1], ldw, + &c_b9, &a[j * a_dim1 + 1], lda); +/* L50: */ + } + +/* Put U12 in standard form by partially undoing the interchanges */ +/* in columns k+1:n */ + + j = k + 1; +L60: + jj = j; + jp = ipiv[j]; + if (jp < 0) { + jp = -jp; + ++j; + } + ++j; + if (jp != jj && j <= *n) { + i__1 = *n - j + 1; + _starpu_dswap_(&i__1, &a[jp + j * a_dim1], lda, &a[jj + j * a_dim1], lda); + } + if (j <= *n) { + goto L60; + } + +/* Set KB to the number of columns factorized */ + + *kb = *n - k; + + } else { + +/* Factorize the leading columns of A using the lower triangle */ +/* of A and working forwards, and compute the matrix W = L21*D */ +/* for use in updating A22 */ + +/* K is the main loop index, increasing from 1 in steps of 1 or 2 */ + + k = 1; +L70: + +/* Exit from loop */ + + if (k >= *nb && *nb < *n || k > *n) { + goto L90; + } + +/* Copy column K of A to column K of W and update it */ + + i__1 = *n - k + 1; + _starpu_dcopy_(&i__1, &a[k + k * a_dim1], &c__1, &w[k + k * w_dim1], &c__1); + i__1 = *n - k + 1; + i__2 = k - 1; + _starpu_dgemv_("No transpose", &i__1, &i__2, &c_b8, &a[k + a_dim1], lda, &w[k + + w_dim1], ldw, &c_b9, &w[k + k * w_dim1], &c__1); + + kstep = 1; + +/* Determine rows and columns to be interchanged and whether */ +/* a 1-by-1 or 2-by-2 pivot block will be used */ + + absakk = (d__1 = w[k + k * w_dim1], abs(d__1)); + +/* IMAX is the row-index of the largest off-diagonal element in */ +/* column K, and COLMAX is its absolute value */ + + if (k < *n) { + i__1 = *n - k; + imax = k + _starpu_idamax_(&i__1, &w[k + 1 + k * w_dim1], &c__1); + colmax = (d__1 = w[imax + k * w_dim1], abs(d__1)); + } else { + colmax = 0.; + } + + if (max(absakk,colmax) == 0.) { + +/* Column K is zero: set INFO and continue */ + + if (*info == 0) { + *info = k; + } + kp = k; + } else { + if (absakk >= alpha * colmax) { + +/* no interchange, use 1-by-1 pivot block */ + + kp = k; + } else { + +/* Copy column IMAX to column K+1 of W and update it */ + + i__1 = imax - k; + _starpu_dcopy_(&i__1, &a[imax + k * a_dim1], lda, &w[k + (k + 1) * + w_dim1], &c__1); + i__1 = *n - imax + 1; + _starpu_dcopy_(&i__1, &a[imax + imax * a_dim1], &c__1, &w[imax + (k + + 1) * w_dim1], &c__1); + i__1 = *n - k + 1; + i__2 = k - 1; + _starpu_dgemv_("No transpose", &i__1, &i__2, &c_b8, &a[k + a_dim1], + lda, &w[imax + w_dim1], ldw, &c_b9, &w[k + (k + 1) * + w_dim1], &c__1); + +/* JMAX is the column-index of the largest off-diagonal */ +/* element in row IMAX, and ROWMAX is its absolute value */ + + i__1 = imax - k; + jmax = k - 1 + _starpu_idamax_(&i__1, &w[k + (k + 1) * w_dim1], &c__1) + ; + rowmax = (d__1 = w[jmax + (k + 1) * w_dim1], abs(d__1)); + if (imax < *n) { + i__1 = *n - imax; + jmax = imax + _starpu_idamax_(&i__1, &w[imax + 1 + (k + 1) * + w_dim1], &c__1); +/* Computing MAX */ + d__2 = rowmax, d__3 = (d__1 = w[jmax + (k + 1) * w_dim1], + abs(d__1)); + rowmax = max(d__2,d__3); + } + + if (absakk >= alpha * colmax * (colmax / rowmax)) { + +/* no interchange, use 1-by-1 pivot block */ + + kp = k; + } else if ((d__1 = w[imax + (k + 1) * w_dim1], abs(d__1)) >= + alpha * rowmax) { + +/* interchange rows and columns K and IMAX, use 1-by-1 */ +/* pivot block */ + + kp = imax; + +/* copy column K+1 of W to column K */ + + i__1 = *n - k + 1; + _starpu_dcopy_(&i__1, &w[k + (k + 1) * w_dim1], &c__1, &w[k + k * + w_dim1], &c__1); + } else { + +/* interchange rows and columns K+1 and IMAX, use 2-by-2 */ +/* pivot block */ + + kp = imax; + kstep = 2; + } + } + + kk = k + kstep - 1; + +/* Updated column KP is already stored in column KK of W */ + + if (kp != kk) { + +/* Copy non-updated column KK to column KP */ + + a[kp + k * a_dim1] = a[kk + k * a_dim1]; + i__1 = kp - k - 1; + _starpu_dcopy_(&i__1, &a[k + 1 + kk * a_dim1], &c__1, &a[kp + (k + 1) + * a_dim1], lda); + i__1 = *n - kp + 1; + _starpu_dcopy_(&i__1, &a[kp + kk * a_dim1], &c__1, &a[kp + kp * + a_dim1], &c__1); + +/* Interchange rows KK and KP in first KK columns of A and W */ + + _starpu_dswap_(&kk, &a[kk + a_dim1], lda, &a[kp + a_dim1], lda); + _starpu_dswap_(&kk, &w[kk + w_dim1], ldw, &w[kp + w_dim1], ldw); + } + + if (kstep == 1) { + +/* 1-by-1 pivot block D(k): column k of W now holds */ + +/* W(k) = L(k)*D(k) */ + +/* where L(k) is the k-th column of L */ + +/* Store L(k) in column k of A */ + + i__1 = *n - k + 1; + _starpu_dcopy_(&i__1, &w[k + k * w_dim1], &c__1, &a[k + k * a_dim1], & + c__1); + if (k < *n) { + r1 = 1. / a[k + k * a_dim1]; + i__1 = *n - k; + _starpu_dscal_(&i__1, &r1, &a[k + 1 + k * a_dim1], &c__1); + } + } else { + +/* 2-by-2 pivot block D(k): columns k and k+1 of W now hold */ + +/* ( W(k) W(k+1) ) = ( L(k) L(k+1) )*D(k) */ + +/* where L(k) and L(k+1) are the k-th and (k+1)-th columns */ +/* of L */ + + if (k < *n - 1) { + +/* Store L(k) and L(k+1) in columns k and k+1 of A */ + + d21 = w[k + 1 + k * w_dim1]; + d11 = w[k + 1 + (k + 1) * w_dim1] / d21; + d22 = w[k + k * w_dim1] / d21; + t = 1. / (d11 * d22 - 1.); + d21 = t / d21; + i__1 = *n; + for (j = k + 2; j <= i__1; ++j) { + a[j + k * a_dim1] = d21 * (d11 * w[j + k * w_dim1] - + w[j + (k + 1) * w_dim1]); + a[j + (k + 1) * a_dim1] = d21 * (d22 * w[j + (k + 1) * + w_dim1] - w[j + k * w_dim1]); +/* L80: */ + } + } + +/* Copy D(k) to A */ + + a[k + k * a_dim1] = w[k + k * w_dim1]; + a[k + 1 + k * a_dim1] = w[k + 1 + k * w_dim1]; + a[k + 1 + (k + 1) * a_dim1] = w[k + 1 + (k + 1) * w_dim1]; + } + } + +/* Store details of the interchanges in IPIV */ + + if (kstep == 1) { + ipiv[k] = kp; + } else { + ipiv[k] = -kp; + ipiv[k + 1] = -kp; + } + +/* Increase K and return to the start of the main loop */ + + k += kstep; + goto L70; + +L90: + +/* Update the lower triangle of A22 (= A(k:n,k:n)) as */ + +/* A22 := A22 - L21*D*L21' = A22 - L21*W' */ + +/* computing blocks of NB columns at a time */ + + i__1 = *n; + i__2 = *nb; + for (j = k; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { +/* Computing MIN */ + i__3 = *nb, i__4 = *n - j + 1; + jb = min(i__3,i__4); + +/* Update the lower triangle of the diagonal block */ + + i__3 = j + jb - 1; + for (jj = j; jj <= i__3; ++jj) { + i__4 = j + jb - jj; + i__5 = k - 1; + _starpu_dgemv_("No transpose", &i__4, &i__5, &c_b8, &a[jj + a_dim1], + lda, &w[jj + w_dim1], ldw, &c_b9, &a[jj + jj * a_dim1] +, &c__1); +/* L100: */ + } + +/* Update the rectangular subdiagonal block */ + + if (j + jb <= *n) { + i__3 = *n - j - jb + 1; + i__4 = k - 1; + _starpu_dgemm_("No transpose", "Transpose", &i__3, &jb, &i__4, &c_b8, + &a[j + jb + a_dim1], lda, &w[j + w_dim1], ldw, &c_b9, + &a[j + jb + j * a_dim1], lda); + } +/* L110: */ + } + +/* Put L21 in standard form by partially undoing the interchanges */ +/* in columns 1:k-1 */ + + j = k - 1; +L120: + jj = j; + jp = ipiv[j]; + if (jp < 0) { + jp = -jp; + --j; + } + --j; + if (jp != jj && j >= 1) { + _starpu_dswap_(&j, &a[jp + a_dim1], lda, &a[jj + a_dim1], lda); + } + if (j >= 1) { + goto L120; + } + +/* Set KB to the number of columns factorized */ + + *kb = k - 1; + + } + return 0; + +/* End of DLASYF */ + +} /* _starpu_dlasyf_ */ diff --git a/min-dgels/base/SRC/dlat2s.c b/min-dgels/base/SRC/dlat2s.c new file mode 100644 index 0000000..2ffdc60 --- /dev/null +++ b/min-dgels/base/SRC/dlat2s.c @@ -0,0 +1,137 @@ +/* dlat2s.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dlat2s_(char *uplo, integer *n, doublereal *a, integer * + lda, real *sa, integer *ldsa, integer *info) +{ + /* System generated locals */ + integer sa_dim1, sa_offset, a_dim1, a_offset, i__1, i__2; + + /* Local variables */ + integer i__, j; + doublereal rmax; + extern logical _starpu_lsame_(char *, char *); + logical upper; + extern doublereal _starpu_slamch_(char *); + + +/* -- LAPACK PROTOTYPE auxiliary routine (version 3.1.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* May 2007 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLAT2S converts a DOUBLE PRECISION triangular matrix, SA, to a SINGLE */ +/* PRECISION triangular matrix, A. */ + +/* RMAX is the overflow for the SINGLE PRECISION arithmetic */ +/* DLAS2S checks that all the entries of A are between -RMAX and */ +/* RMAX. If not the convertion is aborted and a flag is raised. */ + +/* This is an auxiliary routine so there is no argument checking. */ + +/* Arguments */ +/* ========= */ + +/* UPLO (input) CHARACTER*1 */ +/* = 'U': A is upper triangular; */ +/* = 'L': A is lower triangular. */ + +/* N (input) INTEGER */ +/* The number of rows and columns of the matrix A. N >= 0. */ + +/* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the N-by-N triangular coefficient matrix A. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,N). */ + +/* SA (output) REAL array, dimension (LDSA,N) */ +/* Only the UPLO part of SA is referenced. On exit, if INFO=0, */ +/* the N-by-N coefficient matrix SA; if INFO>0, the content of */ +/* the UPLO part of SA is unspecified. */ + +/* LDSA (input) INTEGER */ +/* The leading dimension of the array SA. LDSA >= max(1,M). */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit. */ +/* = 1: an entry of the matrix A is greater than the SINGLE */ +/* PRECISION overflow threshold, in this case, the content */ +/* of the UPLO part of SA in exit is unspecified. */ + +/* ========= */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + sa_dim1 = *ldsa; + sa_offset = 1 + sa_dim1; + sa -= sa_offset; + + /* Function Body */ + rmax = _starpu_slamch_("O"); + upper = _starpu_lsame_(uplo, "U"); + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + if (a[i__ + j * a_dim1] < -rmax || a[i__ + j * a_dim1] > rmax) + { + *info = 1; + goto L50; + } + sa[i__ + j * sa_dim1] = a[i__ + j * a_dim1]; +/* L10: */ + } +/* L20: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + if (a[i__ + j * a_dim1] < -rmax || a[i__ + j * a_dim1] > rmax) + { + *info = 1; + goto L50; + } + sa[i__ + j * sa_dim1] = a[i__ + j * a_dim1]; +/* L30: */ + } +/* L40: */ + } + } +L50: + + return 0; + +/* End of DLAT2S */ + +} /* _starpu_dlat2s_ */ diff --git a/min-dgels/base/SRC/dlatbs.c b/min-dgels/base/SRC/dlatbs.c new file mode 100644 index 0000000..f63f62d --- /dev/null +++ b/min-dgels/base/SRC/dlatbs.c @@ -0,0 +1,850 @@ +/* dlatbs.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static doublereal c_b36 = .5; + +/* Subroutine */ int _starpu_dlatbs_(char *uplo, char *trans, char *diag, char * + normin, integer *n, integer *kd, doublereal *ab, integer *ldab, + doublereal *x, doublereal *scale, doublereal *cnorm, integer *info) +{ + /* System generated locals */ + integer ab_dim1, ab_offset, i__1, i__2, i__3, i__4; + doublereal d__1, d__2, d__3; + + /* Local variables */ + integer i__, j; + doublereal xj, rec, tjj; + integer jinc, jlen; + extern doublereal _starpu_ddot_(integer *, doublereal *, integer *, doublereal *, + integer *); + doublereal xbnd; + integer imax; + doublereal tmax, tjjs, xmax, grow, sumj; + extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, + integer *); + integer maind; + extern logical _starpu_lsame_(char *, char *); + doublereal tscal, uscal; + extern doublereal _starpu_dasum_(integer *, doublereal *, integer *); + integer jlast; + extern /* Subroutine */ int _starpu_dtbsv_(char *, char *, char *, integer *, + integer *, doublereal *, integer *, doublereal *, integer *), _starpu_daxpy_(integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *); + logical upper; + extern doublereal _starpu_dlamch_(char *); + extern integer _starpu_idamax_(integer *, doublereal *, integer *); + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + doublereal bignum; + logical notran; + integer jfirst; + doublereal smlnum; + logical nounit; + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLATBS solves one of the triangular systems */ + +/* A *x = s*b or A'*x = s*b */ + +/* with scaling to prevent overflow, where A is an upper or lower */ +/* triangular band matrix. Here A' denotes the transpose of A, x and b */ +/* are n-element vectors, and s is a scaling factor, usually less than */ +/* or equal to 1, chosen so that the components of x will be less than */ +/* the overflow threshold. If the unscaled problem will not cause */ +/* overflow, the Level 2 BLAS routine DTBSV is called. If the matrix A */ +/* is singular (A(j,j) = 0 for some j), then s is set to 0 and a */ +/* non-trivial solution to A*x = 0 is returned. */ + +/* Arguments */ +/* ========= */ + +/* UPLO (input) CHARACTER*1 */ +/* Specifies whether the matrix A is upper or lower triangular. */ +/* = 'U': Upper triangular */ +/* = 'L': Lower triangular */ + +/* TRANS (input) CHARACTER*1 */ +/* Specifies the operation applied to A. */ +/* = 'N': Solve A * x = s*b (No transpose) */ +/* = 'T': Solve A'* x = s*b (Transpose) */ +/* = 'C': Solve A'* x = s*b (Conjugate transpose = Transpose) */ + +/* DIAG (input) CHARACTER*1 */ +/* Specifies whether or not the matrix A is unit triangular. */ +/* = 'N': Non-unit triangular */ +/* = 'U': Unit triangular */ + +/* NORMIN (input) CHARACTER*1 */ +/* Specifies whether CNORM has been set or not. */ +/* = 'Y': CNORM contains the column norms on entry */ +/* = 'N': CNORM is not set on entry. On exit, the norms will */ +/* be computed and stored in CNORM. */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* KD (input) INTEGER */ +/* The number of subdiagonals or superdiagonals in the */ +/* triangular matrix A. KD >= 0. */ + +/* AB (input) DOUBLE PRECISION array, dimension (LDAB,N) */ +/* The upper or lower triangular band matrix A, stored in the */ +/* first KD+1 rows of the array. The j-th column of A is stored */ +/* in the j-th column of the array AB as follows: */ +/* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; */ +/* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). */ + +/* LDAB (input) INTEGER */ +/* The leading dimension of the array AB. LDAB >= KD+1. */ + +/* X (input/output) DOUBLE PRECISION array, dimension (N) */ +/* On entry, the right hand side b of the triangular system. */ +/* On exit, X is overwritten by the solution vector x. */ + +/* SCALE (output) DOUBLE PRECISION */ +/* The scaling factor s for the triangular system */ +/* A * x = s*b or A'* x = s*b. */ +/* If SCALE = 0, the matrix A is singular or badly scaled, and */ +/* the vector x is an exact or approximate solution to A*x = 0. */ + +/* CNORM (input or output) DOUBLE PRECISION array, dimension (N) */ + +/* If NORMIN = 'Y', CNORM is an input argument and CNORM(j) */ +/* contains the norm of the off-diagonal part of the j-th column */ +/* of A. If TRANS = 'N', CNORM(j) must be greater than or equal */ +/* to the infinity-norm, and if TRANS = 'T' or 'C', CNORM(j) */ +/* must be greater than or equal to the 1-norm. */ + +/* If NORMIN = 'N', CNORM is an output argument and CNORM(j) */ +/* returns the 1-norm of the offdiagonal part of the j-th column */ +/* of A. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -k, the k-th argument had an illegal value */ + +/* Further Details */ +/* ======= ======= */ + +/* A rough bound on x is computed; if that is less than overflow, DTBSV */ +/* is called, otherwise, specific code is used which checks for possible */ +/* overflow or divide-by-zero at every operation. */ + +/* A columnwise scheme is used for solving A*x = b. The basic algorithm */ +/* if A is lower triangular is */ + +/* x[1:n] := b[1:n] */ +/* for j = 1, ..., n */ +/* x(j) := x(j) / A(j,j) */ +/* x[j+1:n] := x[j+1:n] - x(j) * A[j+1:n,j] */ +/* end */ + +/* Define bounds on the components of x after j iterations of the loop: */ +/* M(j) = bound on x[1:j] */ +/* G(j) = bound on x[j+1:n] */ +/* Initially, let M(0) = 0 and G(0) = max{x(i), i=1,...,n}. */ + +/* Then for iteration j+1 we have */ +/* M(j+1) <= G(j) / | A(j+1,j+1) | */ +/* G(j+1) <= G(j) + M(j+1) * | A[j+2:n,j+1] | */ +/* <= G(j) ( 1 + CNORM(j+1) / | A(j+1,j+1) | ) */ + +/* where CNORM(j+1) is greater than or equal to the infinity-norm of */ +/* column j+1 of A, not counting the diagonal. Hence */ + +/* G(j) <= G(0) product ( 1 + CNORM(i) / | A(i,i) | ) */ +/* 1<=i<=j */ +/* and */ + +/* |x(j)| <= ( G(0) / |A(j,j)| ) product ( 1 + CNORM(i) / |A(i,i)| ) */ +/* 1<=i< j */ + +/* Since |x(j)| <= M(j), we use the Level 2 BLAS routine DTBSV if the */ +/* reciprocal of the largest M(j), j=1,..,n, is larger than */ +/* max(underflow, 1/overflow). */ + +/* The bound on x(j) is also used to determine when a step in the */ +/* columnwise method can be performed without fear of overflow. If */ +/* the computed bound is greater than a large constant, x is scaled to */ +/* prevent overflow, but if the bound overflows, x is set to 0, x(j) to */ +/* 1, and scale to 0, and a non-trivial solution to A*x = 0 is found. */ + +/* Similarly, a row-wise scheme is used to solve A'*x = b. The basic */ +/* algorithm for A upper triangular is */ + +/* for j = 1, ..., n */ +/* x(j) := ( b(j) - A[1:j-1,j]' * x[1:j-1] ) / A(j,j) */ +/* end */ + +/* We simultaneously compute two bounds */ +/* G(j) = bound on ( b(i) - A[1:i-1,i]' * x[1:i-1] ), 1<=i<=j */ +/* M(j) = bound on x(i), 1<=i<=j */ + +/* The initial values are G(0) = 0, M(0) = max{b(i), i=1,..,n}, and we */ +/* add the constraint G(j) >= G(j-1) and M(j) >= M(j-1) for j >= 1. */ +/* Then the bound on x(j) is */ + +/* M(j) <= M(j-1) * ( 1 + CNORM(j) ) / | A(j,j) | */ + +/* <= M(0) * product ( ( 1 + CNORM(i) ) / |A(i,i)| ) */ +/* 1<=i<=j */ + +/* and we can safely call DTBSV if 1/M(n) and 1/G(n) are both greater */ +/* than max(underflow, 1/overflow). */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + ab_dim1 = *ldab; + ab_offset = 1 + ab_dim1; + ab -= ab_offset; + --x; + --cnorm; + + /* Function Body */ + *info = 0; + upper = _starpu_lsame_(uplo, "U"); + notran = _starpu_lsame_(trans, "N"); + nounit = _starpu_lsame_(diag, "N"); + +/* Test the input parameters. */ + + if (! upper && ! _starpu_lsame_(uplo, "L")) { + *info = -1; + } else if (! notran && ! _starpu_lsame_(trans, "T") && ! + _starpu_lsame_(trans, "C")) { + *info = -2; + } else if (! nounit && ! _starpu_lsame_(diag, "U")) { + *info = -3; + } else if (! _starpu_lsame_(normin, "Y") && ! _starpu_lsame_(normin, + "N")) { + *info = -4; + } else if (*n < 0) { + *info = -5; + } else if (*kd < 0) { + *info = -6; + } else if (*ldab < *kd + 1) { + *info = -8; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DLATBS", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Determine machine dependent parameters to control overflow. */ + + smlnum = _starpu_dlamch_("Safe minimum") / _starpu_dlamch_("Precision"); + bignum = 1. / smlnum; + *scale = 1.; + + if (_starpu_lsame_(normin, "N")) { + +/* Compute the 1-norm of each column, not including the diagonal. */ + + if (upper) { + +/* A is upper triangular. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + i__2 = *kd, i__3 = j - 1; + jlen = min(i__2,i__3); + cnorm[j] = _starpu_dasum_(&jlen, &ab[*kd + 1 - jlen + j * ab_dim1], & + c__1); +/* L10: */ + } + } else { + +/* A is lower triangular. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + i__2 = *kd, i__3 = *n - j; + jlen = min(i__2,i__3); + if (jlen > 0) { + cnorm[j] = _starpu_dasum_(&jlen, &ab[j * ab_dim1 + 2], &c__1); + } else { + cnorm[j] = 0.; + } +/* L20: */ + } + } + } + +/* Scale the column norms by TSCAL if the maximum element in CNORM is */ +/* greater than BIGNUM. */ + + imax = _starpu_idamax_(n, &cnorm[1], &c__1); + tmax = cnorm[imax]; + if (tmax <= bignum) { + tscal = 1.; + } else { + tscal = 1. / (smlnum * tmax); + _starpu_dscal_(n, &tscal, &cnorm[1], &c__1); + } + +/* Compute a bound on the computed solution vector to see if the */ +/* Level 2 BLAS routine DTBSV can be used. */ + + j = _starpu_idamax_(n, &x[1], &c__1); + xmax = (d__1 = x[j], abs(d__1)); + xbnd = xmax; + if (notran) { + +/* Compute the growth in A * x = b. */ + + if (upper) { + jfirst = *n; + jlast = 1; + jinc = -1; + maind = *kd + 1; + } else { + jfirst = 1; + jlast = *n; + jinc = 1; + maind = 1; + } + + if (tscal != 1.) { + grow = 0.; + goto L50; + } + + if (nounit) { + +/* A is non-unit triangular. */ + +/* Compute GROW = 1/G(j) and XBND = 1/M(j). */ +/* Initially, G(0) = max{x(i), i=1,...,n}. */ + + grow = 1. / max(xbnd,smlnum); + xbnd = grow; + i__1 = jlast; + i__2 = jinc; + for (j = jfirst; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { + +/* Exit the loop if the growth factor is too small. */ + + if (grow <= smlnum) { + goto L50; + } + +/* M(j) = G(j-1) / abs(A(j,j)) */ + + tjj = (d__1 = ab[maind + j * ab_dim1], abs(d__1)); +/* Computing MIN */ + d__1 = xbnd, d__2 = min(1.,tjj) * grow; + xbnd = min(d__1,d__2); + if (tjj + cnorm[j] >= smlnum) { + +/* G(j) = G(j-1)*( 1 + CNORM(j) / abs(A(j,j)) ) */ + + grow *= tjj / (tjj + cnorm[j]); + } else { + +/* G(j) could overflow, set GROW to 0. */ + + grow = 0.; + } +/* L30: */ + } + grow = xbnd; + } else { + +/* A is unit triangular. */ + +/* Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. */ + +/* Computing MIN */ + d__1 = 1., d__2 = 1. / max(xbnd,smlnum); + grow = min(d__1,d__2); + i__2 = jlast; + i__1 = jinc; + for (j = jfirst; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) { + +/* Exit the loop if the growth factor is too small. */ + + if (grow <= smlnum) { + goto L50; + } + +/* G(j) = G(j-1)*( 1 + CNORM(j) ) */ + + grow *= 1. / (cnorm[j] + 1.); +/* L40: */ + } + } +L50: + + ; + } else { + +/* Compute the growth in A' * x = b. */ + + if (upper) { + jfirst = 1; + jlast = *n; + jinc = 1; + maind = *kd + 1; + } else { + jfirst = *n; + jlast = 1; + jinc = -1; + maind = 1; + } + + if (tscal != 1.) { + grow = 0.; + goto L80; + } + + if (nounit) { + +/* A is non-unit triangular. */ + +/* Compute GROW = 1/G(j) and XBND = 1/M(j). */ +/* Initially, M(0) = max{x(i), i=1,...,n}. */ + + grow = 1. / max(xbnd,smlnum); + xbnd = grow; + i__1 = jlast; + i__2 = jinc; + for (j = jfirst; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { + +/* Exit the loop if the growth factor is too small. */ + + if (grow <= smlnum) { + goto L80; + } + +/* G(j) = max( G(j-1), M(j-1)*( 1 + CNORM(j) ) ) */ + + xj = cnorm[j] + 1.; +/* Computing MIN */ + d__1 = grow, d__2 = xbnd / xj; + grow = min(d__1,d__2); + +/* M(j) = M(j-1)*( 1 + CNORM(j) ) / abs(A(j,j)) */ + + tjj = (d__1 = ab[maind + j * ab_dim1], abs(d__1)); + if (xj > tjj) { + xbnd *= tjj / xj; + } +/* L60: */ + } + grow = min(grow,xbnd); + } else { + +/* A is unit triangular. */ + +/* Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. */ + +/* Computing MIN */ + d__1 = 1., d__2 = 1. / max(xbnd,smlnum); + grow = min(d__1,d__2); + i__2 = jlast; + i__1 = jinc; + for (j = jfirst; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) { + +/* Exit the loop if the growth factor is too small. */ + + if (grow <= smlnum) { + goto L80; + } + +/* G(j) = ( 1 + CNORM(j) )*G(j-1) */ + + xj = cnorm[j] + 1.; + grow /= xj; +/* L70: */ + } + } +L80: + ; + } + + if (grow * tscal > smlnum) { + +/* Use the Level 2 BLAS solve if the reciprocal of the bound on */ +/* elements of X is not too small. */ + + _starpu_dtbsv_(uplo, trans, diag, n, kd, &ab[ab_offset], ldab, &x[1], &c__1); + } else { + +/* Use a Level 1 BLAS solve, scaling intermediate results. */ + + if (xmax > bignum) { + +/* Scale X so that its components are less than or equal to */ +/* BIGNUM in absolute value. */ + + *scale = bignum / xmax; + _starpu_dscal_(n, scale, &x[1], &c__1); + xmax = bignum; + } + + if (notran) { + +/* Solve A * x = b */ + + i__1 = jlast; + i__2 = jinc; + for (j = jfirst; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { + +/* Compute x(j) = b(j) / A(j,j), scaling x if necessary. */ + + xj = (d__1 = x[j], abs(d__1)); + if (nounit) { + tjjs = ab[maind + j * ab_dim1] * tscal; + } else { + tjjs = tscal; + if (tscal == 1.) { + goto L100; + } + } + tjj = abs(tjjs); + if (tjj > smlnum) { + +/* abs(A(j,j)) > SMLNUM: */ + + if (tjj < 1.) { + if (xj > tjj * bignum) { + +/* Scale x by 1/b(j). */ + + rec = 1. / xj; + _starpu_dscal_(n, &rec, &x[1], &c__1); + *scale *= rec; + xmax *= rec; + } + } + x[j] /= tjjs; + xj = (d__1 = x[j], abs(d__1)); + } else if (tjj > 0.) { + +/* 0 < abs(A(j,j)) <= SMLNUM: */ + + if (xj > tjj * bignum) { + +/* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM */ +/* to avoid overflow when dividing by A(j,j). */ + + rec = tjj * bignum / xj; + if (cnorm[j] > 1.) { + +/* Scale by 1/CNORM(j) to avoid overflow when */ +/* multiplying x(j) times column j. */ + + rec /= cnorm[j]; + } + _starpu_dscal_(n, &rec, &x[1], &c__1); + *scale *= rec; + xmax *= rec; + } + x[j] /= tjjs; + xj = (d__1 = x[j], abs(d__1)); + } else { + +/* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and */ +/* scale = 0, and compute a solution to A*x = 0. */ + + i__3 = *n; + for (i__ = 1; i__ <= i__3; ++i__) { + x[i__] = 0.; +/* L90: */ + } + x[j] = 1.; + xj = 1.; + *scale = 0.; + xmax = 0.; + } +L100: + +/* Scale x if necessary to avoid overflow when adding a */ +/* multiple of column j of A. */ + + if (xj > 1.) { + rec = 1. / xj; + if (cnorm[j] > (bignum - xmax) * rec) { + +/* Scale x by 1/(2*abs(x(j))). */ + + rec *= .5; + _starpu_dscal_(n, &rec, &x[1], &c__1); + *scale *= rec; + } + } else if (xj * cnorm[j] > bignum - xmax) { + +/* Scale x by 1/2. */ + + _starpu_dscal_(n, &c_b36, &x[1], &c__1); + *scale *= .5; + } + + if (upper) { + if (j > 1) { + +/* Compute the update */ +/* x(max(1,j-kd):j-1) := x(max(1,j-kd):j-1) - */ +/* x(j)* A(max(1,j-kd):j-1,j) */ + +/* Computing MIN */ + i__3 = *kd, i__4 = j - 1; + jlen = min(i__3,i__4); + d__1 = -x[j] * tscal; + _starpu_daxpy_(&jlen, &d__1, &ab[*kd + 1 - jlen + j * ab_dim1] +, &c__1, &x[j - jlen], &c__1); + i__3 = j - 1; + i__ = _starpu_idamax_(&i__3, &x[1], &c__1); + xmax = (d__1 = x[i__], abs(d__1)); + } + } else if (j < *n) { + +/* Compute the update */ +/* x(j+1:min(j+kd,n)) := x(j+1:min(j+kd,n)) - */ +/* x(j) * A(j+1:min(j+kd,n),j) */ + +/* Computing MIN */ + i__3 = *kd, i__4 = *n - j; + jlen = min(i__3,i__4); + if (jlen > 0) { + d__1 = -x[j] * tscal; + _starpu_daxpy_(&jlen, &d__1, &ab[j * ab_dim1 + 2], &c__1, &x[ + j + 1], &c__1); + } + i__3 = *n - j; + i__ = j + _starpu_idamax_(&i__3, &x[j + 1], &c__1); + xmax = (d__1 = x[i__], abs(d__1)); + } +/* L110: */ + } + + } else { + +/* Solve A' * x = b */ + + i__2 = jlast; + i__1 = jinc; + for (j = jfirst; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) { + +/* Compute x(j) = b(j) - sum A(k,j)*x(k). */ +/* k<>j */ + + xj = (d__1 = x[j], abs(d__1)); + uscal = tscal; + rec = 1. / max(xmax,1.); + if (cnorm[j] > (bignum - xj) * rec) { + +/* If x(j) could overflow, scale x by 1/(2*XMAX). */ + + rec *= .5; + if (nounit) { + tjjs = ab[maind + j * ab_dim1] * tscal; + } else { + tjjs = tscal; + } + tjj = abs(tjjs); + if (tjj > 1.) { + +/* Divide by A(j,j) when scaling x if A(j,j) > 1. */ + +/* Computing MIN */ + d__1 = 1., d__2 = rec * tjj; + rec = min(d__1,d__2); + uscal /= tjjs; + } + if (rec < 1.) { + _starpu_dscal_(n, &rec, &x[1], &c__1); + *scale *= rec; + xmax *= rec; + } + } + + sumj = 0.; + if (uscal == 1.) { + +/* If the scaling needed for A in the dot product is 1, */ +/* call DDOT to perform the dot product. */ + + if (upper) { +/* Computing MIN */ + i__3 = *kd, i__4 = j - 1; + jlen = min(i__3,i__4); + sumj = _starpu_ddot_(&jlen, &ab[*kd + 1 - jlen + j * ab_dim1], + &c__1, &x[j - jlen], &c__1); + } else { +/* Computing MIN */ + i__3 = *kd, i__4 = *n - j; + jlen = min(i__3,i__4); + if (jlen > 0) { + sumj = _starpu_ddot_(&jlen, &ab[j * ab_dim1 + 2], &c__1, & + x[j + 1], &c__1); + } + } + } else { + +/* Otherwise, use in-line code for the dot product. */ + + if (upper) { +/* Computing MIN */ + i__3 = *kd, i__4 = j - 1; + jlen = min(i__3,i__4); + i__3 = jlen; + for (i__ = 1; i__ <= i__3; ++i__) { + sumj += ab[*kd + i__ - jlen + j * ab_dim1] * + uscal * x[j - jlen - 1 + i__]; +/* L120: */ + } + } else { +/* Computing MIN */ + i__3 = *kd, i__4 = *n - j; + jlen = min(i__3,i__4); + i__3 = jlen; + for (i__ = 1; i__ <= i__3; ++i__) { + sumj += ab[i__ + 1 + j * ab_dim1] * uscal * x[j + + i__]; +/* L130: */ + } + } + } + + if (uscal == tscal) { + +/* Compute x(j) := ( x(j) - sumj ) / A(j,j) if 1/A(j,j) */ +/* was not used to scale the dotproduct. */ + + x[j] -= sumj; + xj = (d__1 = x[j], abs(d__1)); + if (nounit) { + +/* Compute x(j) = x(j) / A(j,j), scaling if necessary. */ + + tjjs = ab[maind + j * ab_dim1] * tscal; + } else { + tjjs = tscal; + if (tscal == 1.) { + goto L150; + } + } + tjj = abs(tjjs); + if (tjj > smlnum) { + +/* abs(A(j,j)) > SMLNUM: */ + + if (tjj < 1.) { + if (xj > tjj * bignum) { + +/* Scale X by 1/abs(x(j)). */ + + rec = 1. / xj; + _starpu_dscal_(n, &rec, &x[1], &c__1); + *scale *= rec; + xmax *= rec; + } + } + x[j] /= tjjs; + } else if (tjj > 0.) { + +/* 0 < abs(A(j,j)) <= SMLNUM: */ + + if (xj > tjj * bignum) { + +/* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM. */ + + rec = tjj * bignum / xj; + _starpu_dscal_(n, &rec, &x[1], &c__1); + *scale *= rec; + xmax *= rec; + } + x[j] /= tjjs; + } else { + +/* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and */ +/* scale = 0, and compute a solution to A'*x = 0. */ + + i__3 = *n; + for (i__ = 1; i__ <= i__3; ++i__) { + x[i__] = 0.; +/* L140: */ + } + x[j] = 1.; + *scale = 0.; + xmax = 0.; + } +L150: + ; + } else { + +/* Compute x(j) := x(j) / A(j,j) - sumj if the dot */ +/* product has already been divided by 1/A(j,j). */ + + x[j] = x[j] / tjjs - sumj; + } +/* Computing MAX */ + d__2 = xmax, d__3 = (d__1 = x[j], abs(d__1)); + xmax = max(d__2,d__3); +/* L160: */ + } + } + *scale /= tscal; + } + +/* Scale the column norms by 1/TSCAL for return. */ + + if (tscal != 1.) { + d__1 = 1. / tscal; + _starpu_dscal_(n, &d__1, &cnorm[1], &c__1); + } + + return 0; + +/* End of DLATBS */ + +} /* _starpu_dlatbs_ */ diff --git a/min-dgels/base/SRC/dlatdf.c b/min-dgels/base/SRC/dlatdf.c new file mode 100644 index 0000000..10254d9 --- /dev/null +++ b/min-dgels/base/SRC/dlatdf.c @@ -0,0 +1,303 @@ +/* dlatdf.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; +static doublereal c_b23 = 1.; +static doublereal c_b37 = -1.; + +/* Subroutine */ int _starpu_dlatdf_(integer *ijob, integer *n, doublereal *z__, + integer *ldz, doublereal *rhs, doublereal *rdsum, doublereal *rdscal, + integer *ipiv, integer *jpiv) +{ + /* System generated locals */ + integer z_dim1, z_offset, i__1, i__2; + doublereal d__1; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + integer i__, j, k; + doublereal bm, bp, xm[8], xp[8]; + extern doublereal _starpu_ddot_(integer *, doublereal *, integer *, doublereal *, + integer *); + integer info; + doublereal temp, work[32]; + extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, + integer *); + extern doublereal _starpu_dasum_(integer *, doublereal *, integer *); + doublereal pmone; + extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *), _starpu_daxpy_(integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *); + doublereal sminu; + integer iwork[8]; + doublereal splus; + extern /* Subroutine */ int _starpu_dgesc2_(integer *, doublereal *, integer *, + doublereal *, integer *, integer *, doublereal *), _starpu_dgecon_(char *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + doublereal *, integer *, integer *), _starpu_dlassq_(integer *, + doublereal *, integer *, doublereal *, doublereal *), _starpu_dlaswp_( + integer *, doublereal *, integer *, integer *, integer *, integer + *, integer *); + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLATDF uses the LU factorization of the n-by-n matrix Z computed by */ +/* DGETC2 and computes a contribution to the reciprocal Dif-estimate */ +/* by solving Z * x = b for x, and choosing the r.h.s. b such that */ +/* the norm of x is as large as possible. On entry RHS = b holds the */ +/* contribution from earlier solved sub-systems, and on return RHS = x. */ + +/* The factorization of Z returned by DGETC2 has the form Z = P*L*U*Q, */ +/* where P and Q are permutation matrices. L is lower triangular with */ +/* unit diagonal elements and U is upper triangular. */ + +/* Arguments */ +/* ========= */ + +/* IJOB (input) INTEGER */ +/* IJOB = 2: First compute an approximative null-vector e */ +/* of Z using DGECON, e is normalized and solve for */ +/* Zx = +-e - f with the sign giving the greater value */ +/* of 2-norm(x). About 5 times as expensive as Default. */ +/* IJOB .ne. 2: Local look ahead strategy where all entries of */ +/* the r.h.s. b is choosen as either +1 or -1 (Default). */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix Z. */ + +/* Z (input) DOUBLE PRECISION array, dimension (LDZ, N) */ +/* On entry, the LU part of the factorization of the n-by-n */ +/* matrix Z computed by DGETC2: Z = P * L * U * Q */ + +/* LDZ (input) INTEGER */ +/* The leading dimension of the array Z. LDA >= max(1, N). */ + +/* RHS (input/output) DOUBLE PRECISION array, dimension N. */ +/* On entry, RHS contains contributions from other subsystems. */ +/* On exit, RHS contains the solution of the subsystem with */ +/* entries acoording to the value of IJOB (see above). */ + +/* RDSUM (input/output) DOUBLE PRECISION */ +/* On entry, the sum of squares of computed contributions to */ +/* the Dif-estimate under computation by DTGSYL, where the */ +/* scaling factor RDSCAL (see below) has been factored out. */ +/* On exit, the corresponding sum of squares updated with the */ +/* contributions from the current sub-system. */ +/* If TRANS = 'T' RDSUM is not touched. */ +/* NOTE: RDSUM only makes sense when DTGSY2 is called by STGSYL. */ + +/* RDSCAL (input/output) DOUBLE PRECISION */ +/* On entry, scaling factor used to prevent overflow in RDSUM. */ +/* On exit, RDSCAL is updated w.r.t. the current contributions */ +/* in RDSUM. */ +/* If TRANS = 'T', RDSCAL is not touched. */ +/* NOTE: RDSCAL only makes sense when DTGSY2 is called by */ +/* DTGSYL. */ + +/* IPIV (input) INTEGER array, dimension (N). */ +/* The pivot indices; for 1 <= i <= N, row i of the */ +/* matrix has been interchanged with row IPIV(i). */ + +/* JPIV (input) INTEGER array, dimension (N). */ +/* The pivot indices; for 1 <= j <= N, column j of the */ +/* matrix has been interchanged with column JPIV(j). */ + +/* Further Details */ +/* =============== */ + +/* Based on contributions by */ +/* Bo Kagstrom and Peter Poromaa, Department of Computing Science, */ +/* Umea University, S-901 87 Umea, Sweden. */ + +/* This routine is a further developed implementation of algorithm */ +/* BSOLVE in [1] using complete pivoting in the LU factorization. */ + +/* [1] Bo Kagstrom and Lars Westin, */ +/* Generalized Schur Methods with Condition Estimators for */ +/* Solving the Generalized Sylvester Equation, IEEE Transactions */ +/* on Automatic Control, Vol. 34, No. 7, July 1989, pp 745-751. */ + +/* [2] Peter Poromaa, */ +/* On Efficient and Robust Estimators for the Separation */ +/* between two Regular Matrix Pairs with Applications in */ +/* Condition Estimation. Report IMINF-95.05, Departement of */ +/* Computing Science, Umea University, S-901 87 Umea, Sweden, 1995. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --rhs; + --ipiv; + --jpiv; + + /* Function Body */ + if (*ijob != 2) { + +/* Apply permutations IPIV to RHS */ + + i__1 = *n - 1; + _starpu_dlaswp_(&c__1, &rhs[1], ldz, &c__1, &i__1, &ipiv[1], &c__1); + +/* Solve for L-part choosing RHS either to +1 or -1. */ + + pmone = -1.; + + i__1 = *n - 1; + for (j = 1; j <= i__1; ++j) { + bp = rhs[j] + 1.; + bm = rhs[j] - 1.; + splus = 1.; + +/* Look-ahead for L-part RHS(1:N-1) = + or -1, SPLUS and */ +/* SMIN computed more efficiently than in BSOLVE [1]. */ + + i__2 = *n - j; + splus += _starpu_ddot_(&i__2, &z__[j + 1 + j * z_dim1], &c__1, &z__[j + 1 + + j * z_dim1], &c__1); + i__2 = *n - j; + sminu = _starpu_ddot_(&i__2, &z__[j + 1 + j * z_dim1], &c__1, &rhs[j + 1], + &c__1); + splus *= rhs[j]; + if (splus > sminu) { + rhs[j] = bp; + } else if (sminu > splus) { + rhs[j] = bm; + } else { + +/* In this case the updating sums are equal and we can */ +/* choose RHS(J) +1 or -1. The first time this happens */ +/* we choose -1, thereafter +1. This is a simple way to */ +/* get good estimates of matrices like Byers well-known */ +/* example (see [1]). (Not done in BSOLVE.) */ + + rhs[j] += pmone; + pmone = 1.; + } + +/* Compute the remaining r.h.s. */ + + temp = -rhs[j]; + i__2 = *n - j; + _starpu_daxpy_(&i__2, &temp, &z__[j + 1 + j * z_dim1], &c__1, &rhs[j + 1], + &c__1); + +/* L10: */ + } + +/* Solve for U-part, look-ahead for RHS(N) = +-1. This is not done */ +/* in BSOLVE and will hopefully give us a better estimate because */ +/* any ill-conditioning of the original matrix is transfered to U */ +/* and not to L. U(N, N) is an approximation to sigma_min(LU). */ + + i__1 = *n - 1; + _starpu_dcopy_(&i__1, &rhs[1], &c__1, xp, &c__1); + xp[*n - 1] = rhs[*n] + 1.; + rhs[*n] += -1.; + splus = 0.; + sminu = 0.; + for (i__ = *n; i__ >= 1; --i__) { + temp = 1. / z__[i__ + i__ * z_dim1]; + xp[i__ - 1] *= temp; + rhs[i__] *= temp; + i__1 = *n; + for (k = i__ + 1; k <= i__1; ++k) { + xp[i__ - 1] -= xp[k - 1] * (z__[i__ + k * z_dim1] * temp); + rhs[i__] -= rhs[k] * (z__[i__ + k * z_dim1] * temp); +/* L20: */ + } + splus += (d__1 = xp[i__ - 1], abs(d__1)); + sminu += (d__1 = rhs[i__], abs(d__1)); +/* L30: */ + } + if (splus > sminu) { + _starpu_dcopy_(n, xp, &c__1, &rhs[1], &c__1); + } + +/* Apply the permutations JPIV to the computed solution (RHS) */ + + i__1 = *n - 1; + _starpu_dlaswp_(&c__1, &rhs[1], ldz, &c__1, &i__1, &jpiv[1], &c_n1); + +/* Compute the sum of squares */ + + _starpu_dlassq_(n, &rhs[1], &c__1, rdscal, rdsum); + + } else { + +/* IJOB = 2, Compute approximate nullvector XM of Z */ + + _starpu_dgecon_("I", n, &z__[z_offset], ldz, &c_b23, &temp, work, iwork, & + info); + _starpu_dcopy_(n, &work[*n], &c__1, xm, &c__1); + +/* Compute RHS */ + + i__1 = *n - 1; + _starpu_dlaswp_(&c__1, xm, ldz, &c__1, &i__1, &ipiv[1], &c_n1); + temp = 1. / sqrt(_starpu_ddot_(n, xm, &c__1, xm, &c__1)); + _starpu_dscal_(n, &temp, xm, &c__1); + _starpu_dcopy_(n, xm, &c__1, xp, &c__1); + _starpu_daxpy_(n, &c_b23, &rhs[1], &c__1, xp, &c__1); + _starpu_daxpy_(n, &c_b37, xm, &c__1, &rhs[1], &c__1); + _starpu_dgesc2_(n, &z__[z_offset], ldz, &rhs[1], &ipiv[1], &jpiv[1], &temp); + _starpu_dgesc2_(n, &z__[z_offset], ldz, xp, &ipiv[1], &jpiv[1], &temp); + if (_starpu_dasum_(n, xp, &c__1) > _starpu_dasum_(n, &rhs[1], &c__1)) { + _starpu_dcopy_(n, xp, &c__1, &rhs[1], &c__1); + } + +/* Compute the sum of squares */ + + _starpu_dlassq_(n, &rhs[1], &c__1, rdscal, rdsum); + + } + + return 0; + +/* End of DLATDF */ + +} /* _starpu_dlatdf_ */ diff --git a/min-dgels/base/SRC/dlatps.c b/min-dgels/base/SRC/dlatps.c new file mode 100644 index 0000000..7d7f271 --- /dev/null +++ b/min-dgels/base/SRC/dlatps.c @@ -0,0 +1,824 @@ +/* dlatps.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static doublereal c_b36 = .5; + +/* Subroutine */ int _starpu_dlatps_(char *uplo, char *trans, char *diag, char * + normin, integer *n, doublereal *ap, doublereal *x, doublereal *scale, + doublereal *cnorm, integer *info) +{ + /* System generated locals */ + integer i__1, i__2, i__3; + doublereal d__1, d__2, d__3; + + /* Local variables */ + integer i__, j, ip; + doublereal xj, rec, tjj; + integer jinc, jlen; + extern doublereal _starpu_ddot_(integer *, doublereal *, integer *, doublereal *, + integer *); + doublereal xbnd; + integer imax; + doublereal tmax, tjjs, xmax, grow, sumj; + extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, + integer *); + extern logical _starpu_lsame_(char *, char *); + doublereal tscal, uscal; + extern doublereal _starpu_dasum_(integer *, doublereal *, integer *); + integer jlast; + extern /* Subroutine */ int _starpu_daxpy_(integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *); + logical upper; + extern /* Subroutine */ int _starpu_dtpsv_(char *, char *, char *, integer *, + doublereal *, doublereal *, integer *); + extern doublereal _starpu_dlamch_(char *); + extern integer _starpu_idamax_(integer *, doublereal *, integer *); + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + doublereal bignum; + logical notran; + integer jfirst; + doublereal smlnum; + logical nounit; + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLATPS solves one of the triangular systems */ + +/* A *x = s*b or A'*x = s*b */ + +/* with scaling to prevent overflow, where A is an upper or lower */ +/* triangular matrix stored in packed form. Here A' denotes the */ +/* transpose of A, x and b are n-element vectors, and s is a scaling */ +/* factor, usually less than or equal to 1, chosen so that the */ +/* components of x will be less than the overflow threshold. If the */ +/* unscaled problem will not cause overflow, the Level 2 BLAS routine */ +/* DTPSV is called. If the matrix A is singular (A(j,j) = 0 for some j), */ +/* then s is set to 0 and a non-trivial solution to A*x = 0 is returned. */ + +/* Arguments */ +/* ========= */ + +/* UPLO (input) CHARACTER*1 */ +/* Specifies whether the matrix A is upper or lower triangular. */ +/* = 'U': Upper triangular */ +/* = 'L': Lower triangular */ + +/* TRANS (input) CHARACTER*1 */ +/* Specifies the operation applied to A. */ +/* = 'N': Solve A * x = s*b (No transpose) */ +/* = 'T': Solve A'* x = s*b (Transpose) */ +/* = 'C': Solve A'* x = s*b (Conjugate transpose = Transpose) */ + +/* DIAG (input) CHARACTER*1 */ +/* Specifies whether or not the matrix A is unit triangular. */ +/* = 'N': Non-unit triangular */ +/* = 'U': Unit triangular */ + +/* NORMIN (input) CHARACTER*1 */ +/* Specifies whether CNORM has been set or not. */ +/* = 'Y': CNORM contains the column norms on entry */ +/* = 'N': CNORM is not set on entry. On exit, the norms will */ +/* be computed and stored in CNORM. */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* AP (input) DOUBLE PRECISION array, dimension (N*(N+1)/2) */ +/* The upper or lower triangular matrix A, packed columnwise in */ +/* a linear array. The j-th column of A is stored in the array */ +/* AP as follows: */ +/* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; */ +/* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. */ + +/* X (input/output) DOUBLE PRECISION array, dimension (N) */ +/* On entry, the right hand side b of the triangular system. */ +/* On exit, X is overwritten by the solution vector x. */ + +/* SCALE (output) DOUBLE PRECISION */ +/* The scaling factor s for the triangular system */ +/* A * x = s*b or A'* x = s*b. */ +/* If SCALE = 0, the matrix A is singular or badly scaled, and */ +/* the vector x is an exact or approximate solution to A*x = 0. */ + +/* CNORM (input or output) DOUBLE PRECISION array, dimension (N) */ + +/* If NORMIN = 'Y', CNORM is an input argument and CNORM(j) */ +/* contains the norm of the off-diagonal part of the j-th column */ +/* of A. If TRANS = 'N', CNORM(j) must be greater than or equal */ +/* to the infinity-norm, and if TRANS = 'T' or 'C', CNORM(j) */ +/* must be greater than or equal to the 1-norm. */ + +/* If NORMIN = 'N', CNORM is an output argument and CNORM(j) */ +/* returns the 1-norm of the offdiagonal part of the j-th column */ +/* of A. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -k, the k-th argument had an illegal value */ + +/* Further Details */ +/* ======= ======= */ + +/* A rough bound on x is computed; if that is less than overflow, DTPSV */ +/* is called, otherwise, specific code is used which checks for possible */ +/* overflow or divide-by-zero at every operation. */ + +/* A columnwise scheme is used for solving A*x = b. The basic algorithm */ +/* if A is lower triangular is */ + +/* x[1:n] := b[1:n] */ +/* for j = 1, ..., n */ +/* x(j) := x(j) / A(j,j) */ +/* x[j+1:n] := x[j+1:n] - x(j) * A[j+1:n,j] */ +/* end */ + +/* Define bounds on the components of x after j iterations of the loop: */ +/* M(j) = bound on x[1:j] */ +/* G(j) = bound on x[j+1:n] */ +/* Initially, let M(0) = 0 and G(0) = max{x(i), i=1,...,n}. */ + +/* Then for iteration j+1 we have */ +/* M(j+1) <= G(j) / | A(j+1,j+1) | */ +/* G(j+1) <= G(j) + M(j+1) * | A[j+2:n,j+1] | */ +/* <= G(j) ( 1 + CNORM(j+1) / | A(j+1,j+1) | ) */ + +/* where CNORM(j+1) is greater than or equal to the infinity-norm of */ +/* column j+1 of A, not counting the diagonal. Hence */ + +/* G(j) <= G(0) product ( 1 + CNORM(i) / | A(i,i) | ) */ +/* 1<=i<=j */ +/* and */ + +/* |x(j)| <= ( G(0) / |A(j,j)| ) product ( 1 + CNORM(i) / |A(i,i)| ) */ +/* 1<=i< j */ + +/* Since |x(j)| <= M(j), we use the Level 2 BLAS routine DTPSV if the */ +/* reciprocal of the largest M(j), j=1,..,n, is larger than */ +/* max(underflow, 1/overflow). */ + +/* The bound on x(j) is also used to determine when a step in the */ +/* columnwise method can be performed without fear of overflow. If */ +/* the computed bound is greater than a large constant, x is scaled to */ +/* prevent overflow, but if the bound overflows, x is set to 0, x(j) to */ +/* 1, and scale to 0, and a non-trivial solution to A*x = 0 is found. */ + +/* Similarly, a row-wise scheme is used to solve A'*x = b. The basic */ +/* algorithm for A upper triangular is */ + +/* for j = 1, ..., n */ +/* x(j) := ( b(j) - A[1:j-1,j]' * x[1:j-1] ) / A(j,j) */ +/* end */ + +/* We simultaneously compute two bounds */ +/* G(j) = bound on ( b(i) - A[1:i-1,i]' * x[1:i-1] ), 1<=i<=j */ +/* M(j) = bound on x(i), 1<=i<=j */ + +/* The initial values are G(0) = 0, M(0) = max{b(i), i=1,..,n}, and we */ +/* add the constraint G(j) >= G(j-1) and M(j) >= M(j-1) for j >= 1. */ +/* Then the bound on x(j) is */ + +/* M(j) <= M(j-1) * ( 1 + CNORM(j) ) / | A(j,j) | */ + +/* <= M(0) * product ( ( 1 + CNORM(i) ) / |A(i,i)| ) */ +/* 1<=i<=j */ + +/* and we can safely call DTPSV if 1/M(n) and 1/G(n) are both greater */ +/* than max(underflow, 1/overflow). */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + --cnorm; + --x; + --ap; + + /* Function Body */ + *info = 0; + upper = _starpu_lsame_(uplo, "U"); + notran = _starpu_lsame_(trans, "N"); + nounit = _starpu_lsame_(diag, "N"); + +/* Test the input parameters. */ + + if (! upper && ! _starpu_lsame_(uplo, "L")) { + *info = -1; + } else if (! notran && ! _starpu_lsame_(trans, "T") && ! + _starpu_lsame_(trans, "C")) { + *info = -2; + } else if (! nounit && ! _starpu_lsame_(diag, "U")) { + *info = -3; + } else if (! _starpu_lsame_(normin, "Y") && ! _starpu_lsame_(normin, + "N")) { + *info = -4; + } else if (*n < 0) { + *info = -5; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DLATPS", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Determine machine dependent parameters to control overflow. */ + + smlnum = _starpu_dlamch_("Safe minimum") / _starpu_dlamch_("Precision"); + bignum = 1. / smlnum; + *scale = 1.; + + if (_starpu_lsame_(normin, "N")) { + +/* Compute the 1-norm of each column, not including the diagonal. */ + + if (upper) { + +/* A is upper triangular. */ + + ip = 1; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j - 1; + cnorm[j] = _starpu_dasum_(&i__2, &ap[ip], &c__1); + ip += j; +/* L10: */ + } + } else { + +/* A is lower triangular. */ + + ip = 1; + i__1 = *n - 1; + for (j = 1; j <= i__1; ++j) { + i__2 = *n - j; + cnorm[j] = _starpu_dasum_(&i__2, &ap[ip + 1], &c__1); + ip = ip + *n - j + 1; +/* L20: */ + } + cnorm[*n] = 0.; + } + } + +/* Scale the column norms by TSCAL if the maximum element in CNORM is */ +/* greater than BIGNUM. */ + + imax = _starpu_idamax_(n, &cnorm[1], &c__1); + tmax = cnorm[imax]; + if (tmax <= bignum) { + tscal = 1.; + } else { + tscal = 1. / (smlnum * tmax); + _starpu_dscal_(n, &tscal, &cnorm[1], &c__1); + } + +/* Compute a bound on the computed solution vector to see if the */ +/* Level 2 BLAS routine DTPSV can be used. */ + + j = _starpu_idamax_(n, &x[1], &c__1); + xmax = (d__1 = x[j], abs(d__1)); + xbnd = xmax; + if (notran) { + +/* Compute the growth in A * x = b. */ + + if (upper) { + jfirst = *n; + jlast = 1; + jinc = -1; + } else { + jfirst = 1; + jlast = *n; + jinc = 1; + } + + if (tscal != 1.) { + grow = 0.; + goto L50; + } + + if (nounit) { + +/* A is non-unit triangular. */ + +/* Compute GROW = 1/G(j) and XBND = 1/M(j). */ +/* Initially, G(0) = max{x(i), i=1,...,n}. */ + + grow = 1. / max(xbnd,smlnum); + xbnd = grow; + ip = jfirst * (jfirst + 1) / 2; + jlen = *n; + i__1 = jlast; + i__2 = jinc; + for (j = jfirst; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { + +/* Exit the loop if the growth factor is too small. */ + + if (grow <= smlnum) { + goto L50; + } + +/* M(j) = G(j-1) / abs(A(j,j)) */ + + tjj = (d__1 = ap[ip], abs(d__1)); +/* Computing MIN */ + d__1 = xbnd, d__2 = min(1.,tjj) * grow; + xbnd = min(d__1,d__2); + if (tjj + cnorm[j] >= smlnum) { + +/* G(j) = G(j-1)*( 1 + CNORM(j) / abs(A(j,j)) ) */ + + grow *= tjj / (tjj + cnorm[j]); + } else { + +/* G(j) could overflow, set GROW to 0. */ + + grow = 0.; + } + ip += jinc * jlen; + --jlen; +/* L30: */ + } + grow = xbnd; + } else { + +/* A is unit triangular. */ + +/* Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. */ + +/* Computing MIN */ + d__1 = 1., d__2 = 1. / max(xbnd,smlnum); + grow = min(d__1,d__2); + i__2 = jlast; + i__1 = jinc; + for (j = jfirst; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) { + +/* Exit the loop if the growth factor is too small. */ + + if (grow <= smlnum) { + goto L50; + } + +/* G(j) = G(j-1)*( 1 + CNORM(j) ) */ + + grow *= 1. / (cnorm[j] + 1.); +/* L40: */ + } + } +L50: + + ; + } else { + +/* Compute the growth in A' * x = b. */ + + if (upper) { + jfirst = 1; + jlast = *n; + jinc = 1; + } else { + jfirst = *n; + jlast = 1; + jinc = -1; + } + + if (tscal != 1.) { + grow = 0.; + goto L80; + } + + if (nounit) { + +/* A is non-unit triangular. */ + +/* Compute GROW = 1/G(j) and XBND = 1/M(j). */ +/* Initially, M(0) = max{x(i), i=1,...,n}. */ + + grow = 1. / max(xbnd,smlnum); + xbnd = grow; + ip = jfirst * (jfirst + 1) / 2; + jlen = 1; + i__1 = jlast; + i__2 = jinc; + for (j = jfirst; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { + +/* Exit the loop if the growth factor is too small. */ + + if (grow <= smlnum) { + goto L80; + } + +/* G(j) = max( G(j-1), M(j-1)*( 1 + CNORM(j) ) ) */ + + xj = cnorm[j] + 1.; +/* Computing MIN */ + d__1 = grow, d__2 = xbnd / xj; + grow = min(d__1,d__2); + +/* M(j) = M(j-1)*( 1 + CNORM(j) ) / abs(A(j,j)) */ + + tjj = (d__1 = ap[ip], abs(d__1)); + if (xj > tjj) { + xbnd *= tjj / xj; + } + ++jlen; + ip += jinc * jlen; +/* L60: */ + } + grow = min(grow,xbnd); + } else { + +/* A is unit triangular. */ + +/* Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. */ + +/* Computing MIN */ + d__1 = 1., d__2 = 1. / max(xbnd,smlnum); + grow = min(d__1,d__2); + i__2 = jlast; + i__1 = jinc; + for (j = jfirst; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) { + +/* Exit the loop if the growth factor is too small. */ + + if (grow <= smlnum) { + goto L80; + } + +/* G(j) = ( 1 + CNORM(j) )*G(j-1) */ + + xj = cnorm[j] + 1.; + grow /= xj; +/* L70: */ + } + } +L80: + ; + } + + if (grow * tscal > smlnum) { + +/* Use the Level 2 BLAS solve if the reciprocal of the bound on */ +/* elements of X is not too small. */ + + _starpu_dtpsv_(uplo, trans, diag, n, &ap[1], &x[1], &c__1); + } else { + +/* Use a Level 1 BLAS solve, scaling intermediate results. */ + + if (xmax > bignum) { + +/* Scale X so that its components are less than or equal to */ +/* BIGNUM in absolute value. */ + + *scale = bignum / xmax; + _starpu_dscal_(n, scale, &x[1], &c__1); + xmax = bignum; + } + + if (notran) { + +/* Solve A * x = b */ + + ip = jfirst * (jfirst + 1) / 2; + i__1 = jlast; + i__2 = jinc; + for (j = jfirst; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { + +/* Compute x(j) = b(j) / A(j,j), scaling x if necessary. */ + + xj = (d__1 = x[j], abs(d__1)); + if (nounit) { + tjjs = ap[ip] * tscal; + } else { + tjjs = tscal; + if (tscal == 1.) { + goto L100; + } + } + tjj = abs(tjjs); + if (tjj > smlnum) { + +/* abs(A(j,j)) > SMLNUM: */ + + if (tjj < 1.) { + if (xj > tjj * bignum) { + +/* Scale x by 1/b(j). */ + + rec = 1. / xj; + _starpu_dscal_(n, &rec, &x[1], &c__1); + *scale *= rec; + xmax *= rec; + } + } + x[j] /= tjjs; + xj = (d__1 = x[j], abs(d__1)); + } else if (tjj > 0.) { + +/* 0 < abs(A(j,j)) <= SMLNUM: */ + + if (xj > tjj * bignum) { + +/* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM */ +/* to avoid overflow when dividing by A(j,j). */ + + rec = tjj * bignum / xj; + if (cnorm[j] > 1.) { + +/* Scale by 1/CNORM(j) to avoid overflow when */ +/* multiplying x(j) times column j. */ + + rec /= cnorm[j]; + } + _starpu_dscal_(n, &rec, &x[1], &c__1); + *scale *= rec; + xmax *= rec; + } + x[j] /= tjjs; + xj = (d__1 = x[j], abs(d__1)); + } else { + +/* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and */ +/* scale = 0, and compute a solution to A*x = 0. */ + + i__3 = *n; + for (i__ = 1; i__ <= i__3; ++i__) { + x[i__] = 0.; +/* L90: */ + } + x[j] = 1.; + xj = 1.; + *scale = 0.; + xmax = 0.; + } +L100: + +/* Scale x if necessary to avoid overflow when adding a */ +/* multiple of column j of A. */ + + if (xj > 1.) { + rec = 1. / xj; + if (cnorm[j] > (bignum - xmax) * rec) { + +/* Scale x by 1/(2*abs(x(j))). */ + + rec *= .5; + _starpu_dscal_(n, &rec, &x[1], &c__1); + *scale *= rec; + } + } else if (xj * cnorm[j] > bignum - xmax) { + +/* Scale x by 1/2. */ + + _starpu_dscal_(n, &c_b36, &x[1], &c__1); + *scale *= .5; + } + + if (upper) { + if (j > 1) { + +/* Compute the update */ +/* x(1:j-1) := x(1:j-1) - x(j) * A(1:j-1,j) */ + + i__3 = j - 1; + d__1 = -x[j] * tscal; + _starpu_daxpy_(&i__3, &d__1, &ap[ip - j + 1], &c__1, &x[1], & + c__1); + i__3 = j - 1; + i__ = _starpu_idamax_(&i__3, &x[1], &c__1); + xmax = (d__1 = x[i__], abs(d__1)); + } + ip -= j; + } else { + if (j < *n) { + +/* Compute the update */ +/* x(j+1:n) := x(j+1:n) - x(j) * A(j+1:n,j) */ + + i__3 = *n - j; + d__1 = -x[j] * tscal; + _starpu_daxpy_(&i__3, &d__1, &ap[ip + 1], &c__1, &x[j + 1], & + c__1); + i__3 = *n - j; + i__ = j + _starpu_idamax_(&i__3, &x[j + 1], &c__1); + xmax = (d__1 = x[i__], abs(d__1)); + } + ip = ip + *n - j + 1; + } +/* L110: */ + } + + } else { + +/* Solve A' * x = b */ + + ip = jfirst * (jfirst + 1) / 2; + jlen = 1; + i__2 = jlast; + i__1 = jinc; + for (j = jfirst; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) { + +/* Compute x(j) = b(j) - sum A(k,j)*x(k). */ +/* k<>j */ + + xj = (d__1 = x[j], abs(d__1)); + uscal = tscal; + rec = 1. / max(xmax,1.); + if (cnorm[j] > (bignum - xj) * rec) { + +/* If x(j) could overflow, scale x by 1/(2*XMAX). */ + + rec *= .5; + if (nounit) { + tjjs = ap[ip] * tscal; + } else { + tjjs = tscal; + } + tjj = abs(tjjs); + if (tjj > 1.) { + +/* Divide by A(j,j) when scaling x if A(j,j) > 1. */ + +/* Computing MIN */ + d__1 = 1., d__2 = rec * tjj; + rec = min(d__1,d__2); + uscal /= tjjs; + } + if (rec < 1.) { + _starpu_dscal_(n, &rec, &x[1], &c__1); + *scale *= rec; + xmax *= rec; + } + } + + sumj = 0.; + if (uscal == 1.) { + +/* If the scaling needed for A in the dot product is 1, */ +/* call DDOT to perform the dot product. */ + + if (upper) { + i__3 = j - 1; + sumj = _starpu_ddot_(&i__3, &ap[ip - j + 1], &c__1, &x[1], & + c__1); + } else if (j < *n) { + i__3 = *n - j; + sumj = _starpu_ddot_(&i__3, &ap[ip + 1], &c__1, &x[j + 1], & + c__1); + } + } else { + +/* Otherwise, use in-line code for the dot product. */ + + if (upper) { + i__3 = j - 1; + for (i__ = 1; i__ <= i__3; ++i__) { + sumj += ap[ip - j + i__] * uscal * x[i__]; +/* L120: */ + } + } else if (j < *n) { + i__3 = *n - j; + for (i__ = 1; i__ <= i__3; ++i__) { + sumj += ap[ip + i__] * uscal * x[j + i__]; +/* L130: */ + } + } + } + + if (uscal == tscal) { + +/* Compute x(j) := ( x(j) - sumj ) / A(j,j) if 1/A(j,j) */ +/* was not used to scale the dotproduct. */ + + x[j] -= sumj; + xj = (d__1 = x[j], abs(d__1)); + if (nounit) { + +/* Compute x(j) = x(j) / A(j,j), scaling if necessary. */ + + tjjs = ap[ip] * tscal; + } else { + tjjs = tscal; + if (tscal == 1.) { + goto L150; + } + } + tjj = abs(tjjs); + if (tjj > smlnum) { + +/* abs(A(j,j)) > SMLNUM: */ + + if (tjj < 1.) { + if (xj > tjj * bignum) { + +/* Scale X by 1/abs(x(j)). */ + + rec = 1. / xj; + _starpu_dscal_(n, &rec, &x[1], &c__1); + *scale *= rec; + xmax *= rec; + } + } + x[j] /= tjjs; + } else if (tjj > 0.) { + +/* 0 < abs(A(j,j)) <= SMLNUM: */ + + if (xj > tjj * bignum) { + +/* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM. */ + + rec = tjj * bignum / xj; + _starpu_dscal_(n, &rec, &x[1], &c__1); + *scale *= rec; + xmax *= rec; + } + x[j] /= tjjs; + } else { + +/* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and */ +/* scale = 0, and compute a solution to A'*x = 0. */ + + i__3 = *n; + for (i__ = 1; i__ <= i__3; ++i__) { + x[i__] = 0.; +/* L140: */ + } + x[j] = 1.; + *scale = 0.; + xmax = 0.; + } +L150: + ; + } else { + +/* Compute x(j) := x(j) / A(j,j) - sumj if the dot */ +/* product has already been divided by 1/A(j,j). */ + + x[j] = x[j] / tjjs - sumj; + } +/* Computing MAX */ + d__2 = xmax, d__3 = (d__1 = x[j], abs(d__1)); + xmax = max(d__2,d__3); + ++jlen; + ip += jinc * jlen; +/* L160: */ + } + } + *scale /= tscal; + } + +/* Scale the column norms by 1/TSCAL for return. */ + + if (tscal != 1.) { + d__1 = 1. / tscal; + _starpu_dscal_(n, &d__1, &cnorm[1], &c__1); + } + + return 0; + +/* End of DLATPS */ + +} /* _starpu_dlatps_ */ diff --git a/min-dgels/base/SRC/dlatrd.c b/min-dgels/base/SRC/dlatrd.c new file mode 100644 index 0000000..801383b --- /dev/null +++ b/min-dgels/base/SRC/dlatrd.c @@ -0,0 +1,355 @@ +/* dlatrd.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static doublereal c_b5 = -1.; +static doublereal c_b6 = 1.; +static integer c__1 = 1; +static doublereal c_b16 = 0.; + +/* Subroutine */ int _starpu_dlatrd_(char *uplo, integer *n, integer *nb, doublereal * + a, integer *lda, doublereal *e, doublereal *tau, doublereal *w, + integer *ldw) +{ + /* System generated locals */ + integer a_dim1, a_offset, w_dim1, w_offset, i__1, i__2, i__3; + + /* Local variables */ + integer i__, iw; + extern doublereal _starpu_ddot_(integer *, doublereal *, integer *, doublereal *, + integer *); + doublereal alpha; + extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, + integer *); + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int _starpu_dgemv_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *), _starpu_daxpy_(integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *), + _starpu_dsymv_(char *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_dlarfg_(integer *, doublereal *, doublereal *, integer *, + doublereal *); + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLATRD reduces NB rows and columns of a real symmetric matrix A to */ +/* symmetric tridiagonal form by an orthogonal similarity */ +/* transformation Q' * A * Q, and returns the matrices V and W which are */ +/* needed to apply the transformation to the unreduced part of A. */ + +/* If UPLO = 'U', DLATRD reduces the last NB rows and columns of a */ +/* matrix, of which the upper triangle is supplied; */ +/* if UPLO = 'L', DLATRD reduces the first NB rows and columns of a */ +/* matrix, of which the lower triangle is supplied. */ + +/* This is an auxiliary routine called by DSYTRD. */ + +/* Arguments */ +/* ========= */ + +/* UPLO (input) CHARACTER*1 */ +/* Specifies whether the upper or lower triangular part of the */ +/* symmetric matrix A is stored: */ +/* = 'U': Upper triangular */ +/* = 'L': Lower triangular */ + +/* N (input) INTEGER */ +/* The order of the matrix A. */ + +/* NB (input) INTEGER */ +/* The number of rows and columns to be reduced. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the symmetric matrix A. If UPLO = 'U', the leading */ +/* n-by-n upper triangular part of A contains the upper */ +/* triangular part of the matrix A, and the strictly lower */ +/* triangular part of A is not referenced. If UPLO = 'L', the */ +/* leading n-by-n lower triangular part of A contains the lower */ +/* triangular part of the matrix A, and the strictly upper */ +/* triangular part of A is not referenced. */ +/* On exit: */ +/* if UPLO = 'U', the last NB columns have been reduced to */ +/* tridiagonal form, with the diagonal elements overwriting */ +/* the diagonal elements of A; the elements above the diagonal */ +/* with the array TAU, represent the orthogonal matrix Q as a */ +/* product of elementary reflectors; */ +/* if UPLO = 'L', the first NB columns have been reduced to */ +/* tridiagonal form, with the diagonal elements overwriting */ +/* the diagonal elements of A; the elements below the diagonal */ +/* with the array TAU, represent the orthogonal matrix Q as a */ +/* product of elementary reflectors. */ +/* See Further Details. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= (1,N). */ + +/* E (output) DOUBLE PRECISION array, dimension (N-1) */ +/* If UPLO = 'U', E(n-nb:n-1) contains the superdiagonal */ +/* elements of the last NB columns of the reduced matrix; */ +/* if UPLO = 'L', E(1:nb) contains the subdiagonal elements of */ +/* the first NB columns of the reduced matrix. */ + +/* TAU (output) DOUBLE PRECISION array, dimension (N-1) */ +/* The scalar factors of the elementary reflectors, stored in */ +/* TAU(n-nb:n-1) if UPLO = 'U', and in TAU(1:nb) if UPLO = 'L'. */ +/* See Further Details. */ + +/* W (output) DOUBLE PRECISION array, dimension (LDW,NB) */ +/* The n-by-nb matrix W required to update the unreduced part */ +/* of A. */ + +/* LDW (input) INTEGER */ +/* The leading dimension of the array W. LDW >= max(1,N). */ + +/* Further Details */ +/* =============== */ + +/* If UPLO = 'U', the matrix Q is represented as a product of elementary */ +/* reflectors */ + +/* Q = H(n) H(n-1) . . . H(n-nb+1). */ + +/* Each H(i) has the form */ + +/* H(i) = I - tau * v * v' */ + +/* where tau is a real scalar, and v is a real vector with */ +/* v(i:n) = 0 and v(i-1) = 1; v(1:i-1) is stored on exit in A(1:i-1,i), */ +/* and tau in TAU(i-1). */ + +/* If UPLO = 'L', the matrix Q is represented as a product of elementary */ +/* reflectors */ + +/* Q = H(1) H(2) . . . H(nb). */ + +/* Each H(i) has the form */ + +/* H(i) = I - tau * v * v' */ + +/* where tau is a real scalar, and v is a real vector with */ +/* v(1:i) = 0 and v(i+1) = 1; v(i+1:n) is stored on exit in A(i+1:n,i), */ +/* and tau in TAU(i). */ + +/* The elements of the vectors v together form the n-by-nb matrix V */ +/* which is needed, with W, to apply the transformation to the unreduced */ +/* part of the matrix, using a symmetric rank-2k update of the form: */ +/* A := A - V*W' - W*V'. */ + +/* The contents of A on exit are illustrated by the following examples */ +/* with n = 5 and nb = 2: */ + +/* if UPLO = 'U': if UPLO = 'L': */ + +/* ( a a a v4 v5 ) ( d ) */ +/* ( a a v4 v5 ) ( 1 d ) */ +/* ( a 1 v5 ) ( v1 1 a ) */ +/* ( d 1 ) ( v1 v2 a a ) */ +/* ( d ) ( v1 v2 a a a ) */ + +/* where d denotes a diagonal element of the reduced matrix, a denotes */ +/* an element of the original matrix that is unchanged, and vi denotes */ +/* an element of the vector defining H(i). */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Quick return if possible */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --e; + --tau; + w_dim1 = *ldw; + w_offset = 1 + w_dim1; + w -= w_offset; + + /* Function Body */ + if (*n <= 0) { + return 0; + } + + if (_starpu_lsame_(uplo, "U")) { + +/* Reduce last NB columns of upper triangle */ + + i__1 = *n - *nb + 1; + for (i__ = *n; i__ >= i__1; --i__) { + iw = i__ - *n + *nb; + if (i__ < *n) { + +/* Update A(1:i,i) */ + + i__2 = *n - i__; + _starpu_dgemv_("No transpose", &i__, &i__2, &c_b5, &a[(i__ + 1) * + a_dim1 + 1], lda, &w[i__ + (iw + 1) * w_dim1], ldw, & + c_b6, &a[i__ * a_dim1 + 1], &c__1); + i__2 = *n - i__; + _starpu_dgemv_("No transpose", &i__, &i__2, &c_b5, &w[(iw + 1) * + w_dim1 + 1], ldw, &a[i__ + (i__ + 1) * a_dim1], lda, & + c_b6, &a[i__ * a_dim1 + 1], &c__1); + } + if (i__ > 1) { + +/* Generate elementary reflector H(i) to annihilate */ +/* A(1:i-2,i) */ + + i__2 = i__ - 1; + _starpu_dlarfg_(&i__2, &a[i__ - 1 + i__ * a_dim1], &a[i__ * a_dim1 + + 1], &c__1, &tau[i__ - 1]); + e[i__ - 1] = a[i__ - 1 + i__ * a_dim1]; + a[i__ - 1 + i__ * a_dim1] = 1.; + +/* Compute W(1:i-1,i) */ + + i__2 = i__ - 1; + _starpu_dsymv_("Upper", &i__2, &c_b6, &a[a_offset], lda, &a[i__ * + a_dim1 + 1], &c__1, &c_b16, &w[iw * w_dim1 + 1], & + c__1); + if (i__ < *n) { + i__2 = i__ - 1; + i__3 = *n - i__; + _starpu_dgemv_("Transpose", &i__2, &i__3, &c_b6, &w[(iw + 1) * + w_dim1 + 1], ldw, &a[i__ * a_dim1 + 1], &c__1, & + c_b16, &w[i__ + 1 + iw * w_dim1], &c__1); + i__2 = i__ - 1; + i__3 = *n - i__; + _starpu_dgemv_("No transpose", &i__2, &i__3, &c_b5, &a[(i__ + 1) * + a_dim1 + 1], lda, &w[i__ + 1 + iw * w_dim1], & + c__1, &c_b6, &w[iw * w_dim1 + 1], &c__1); + i__2 = i__ - 1; + i__3 = *n - i__; + _starpu_dgemv_("Transpose", &i__2, &i__3, &c_b6, &a[(i__ + 1) * + a_dim1 + 1], lda, &a[i__ * a_dim1 + 1], &c__1, & + c_b16, &w[i__ + 1 + iw * w_dim1], &c__1); + i__2 = i__ - 1; + i__3 = *n - i__; + _starpu_dgemv_("No transpose", &i__2, &i__3, &c_b5, &w[(iw + 1) * + w_dim1 + 1], ldw, &w[i__ + 1 + iw * w_dim1], & + c__1, &c_b6, &w[iw * w_dim1 + 1], &c__1); + } + i__2 = i__ - 1; + _starpu_dscal_(&i__2, &tau[i__ - 1], &w[iw * w_dim1 + 1], &c__1); + i__2 = i__ - 1; + alpha = tau[i__ - 1] * -.5 * _starpu_ddot_(&i__2, &w[iw * w_dim1 + 1], + &c__1, &a[i__ * a_dim1 + 1], &c__1); + i__2 = i__ - 1; + _starpu_daxpy_(&i__2, &alpha, &a[i__ * a_dim1 + 1], &c__1, &w[iw * + w_dim1 + 1], &c__1); + } + +/* L10: */ + } + } else { + +/* Reduce first NB columns of lower triangle */ + + i__1 = *nb; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Update A(i:n,i) */ + + i__2 = *n - i__ + 1; + i__3 = i__ - 1; + _starpu_dgemv_("No transpose", &i__2, &i__3, &c_b5, &a[i__ + a_dim1], lda, + &w[i__ + w_dim1], ldw, &c_b6, &a[i__ + i__ * a_dim1], & + c__1); + i__2 = *n - i__ + 1; + i__3 = i__ - 1; + _starpu_dgemv_("No transpose", &i__2, &i__3, &c_b5, &w[i__ + w_dim1], ldw, + &a[i__ + a_dim1], lda, &c_b6, &a[i__ + i__ * a_dim1], & + c__1); + if (i__ < *n) { + +/* Generate elementary reflector H(i) to annihilate */ +/* A(i+2:n,i) */ + + i__2 = *n - i__; +/* Computing MIN */ + i__3 = i__ + 2; + _starpu_dlarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3, *n)+ + i__ * a_dim1], &c__1, &tau[i__]); + e[i__] = a[i__ + 1 + i__ * a_dim1]; + a[i__ + 1 + i__ * a_dim1] = 1.; + +/* Compute W(i+1:n,i) */ + + i__2 = *n - i__; + _starpu_dsymv_("Lower", &i__2, &c_b6, &a[i__ + 1 + (i__ + 1) * a_dim1] +, lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b16, &w[ + i__ + 1 + i__ * w_dim1], &c__1); + i__2 = *n - i__; + i__3 = i__ - 1; + _starpu_dgemv_("Transpose", &i__2, &i__3, &c_b6, &w[i__ + 1 + w_dim1], + ldw, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b16, &w[ + i__ * w_dim1 + 1], &c__1); + i__2 = *n - i__; + i__3 = i__ - 1; + _starpu_dgemv_("No transpose", &i__2, &i__3, &c_b5, &a[i__ + 1 + + a_dim1], lda, &w[i__ * w_dim1 + 1], &c__1, &c_b6, &w[ + i__ + 1 + i__ * w_dim1], &c__1); + i__2 = *n - i__; + i__3 = i__ - 1; + _starpu_dgemv_("Transpose", &i__2, &i__3, &c_b6, &a[i__ + 1 + a_dim1], + lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b16, &w[ + i__ * w_dim1 + 1], &c__1); + i__2 = *n - i__; + i__3 = i__ - 1; + _starpu_dgemv_("No transpose", &i__2, &i__3, &c_b5, &w[i__ + 1 + + w_dim1], ldw, &w[i__ * w_dim1 + 1], &c__1, &c_b6, &w[ + i__ + 1 + i__ * w_dim1], &c__1); + i__2 = *n - i__; + _starpu_dscal_(&i__2, &tau[i__], &w[i__ + 1 + i__ * w_dim1], &c__1); + i__2 = *n - i__; + alpha = tau[i__] * -.5 * _starpu_ddot_(&i__2, &w[i__ + 1 + i__ * + w_dim1], &c__1, &a[i__ + 1 + i__ * a_dim1], &c__1); + i__2 = *n - i__; + _starpu_daxpy_(&i__2, &alpha, &a[i__ + 1 + i__ * a_dim1], &c__1, &w[ + i__ + 1 + i__ * w_dim1], &c__1); + } + +/* L20: */ + } + } + + return 0; + +/* End of DLATRD */ + +} /* _starpu_dlatrd_ */ diff --git a/min-dgels/base/SRC/dlatrs.c b/min-dgels/base/SRC/dlatrs.c new file mode 100644 index 0000000..5d2fcd3 --- /dev/null +++ b/min-dgels/base/SRC/dlatrs.c @@ -0,0 +1,815 @@ +/* dlatrs.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static doublereal c_b36 = .5; + +/* Subroutine */ int _starpu_dlatrs_(char *uplo, char *trans, char *diag, char * + normin, integer *n, doublereal *a, integer *lda, doublereal *x, + doublereal *scale, doublereal *cnorm, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + doublereal d__1, d__2, d__3; + + /* Local variables */ + integer i__, j; + doublereal xj, rec, tjj; + integer jinc; + extern doublereal _starpu_ddot_(integer *, doublereal *, integer *, doublereal *, + integer *); + doublereal xbnd; + integer imax; + doublereal tmax, tjjs, xmax, grow, sumj; + extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, + integer *); + extern logical _starpu_lsame_(char *, char *); + doublereal tscal, uscal; + extern doublereal _starpu_dasum_(integer *, doublereal *, integer *); + integer jlast; + extern /* Subroutine */ int _starpu_daxpy_(integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *); + logical upper; + extern /* Subroutine */ int _starpu_dtrsv_(char *, char *, char *, integer *, + doublereal *, integer *, doublereal *, integer *); + extern doublereal _starpu_dlamch_(char *); + extern integer _starpu_idamax_(integer *, doublereal *, integer *); + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + doublereal bignum; + logical notran; + integer jfirst; + doublereal smlnum; + logical nounit; + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLATRS solves one of the triangular systems */ + +/* A *x = s*b or A'*x = s*b */ + +/* with scaling to prevent overflow. Here A is an upper or lower */ +/* triangular matrix, A' denotes the transpose of A, x and b are */ +/* n-element vectors, and s is a scaling factor, usually less than */ +/* or equal to 1, chosen so that the components of x will be less than */ +/* the overflow threshold. If the unscaled problem will not cause */ +/* overflow, the Level 2 BLAS routine DTRSV is called. If the matrix A */ +/* is singular (A(j,j) = 0 for some j), then s is set to 0 and a */ +/* non-trivial solution to A*x = 0 is returned. */ + +/* Arguments */ +/* ========= */ + +/* UPLO (input) CHARACTER*1 */ +/* Specifies whether the matrix A is upper or lower triangular. */ +/* = 'U': Upper triangular */ +/* = 'L': Lower triangular */ + +/* TRANS (input) CHARACTER*1 */ +/* Specifies the operation applied to A. */ +/* = 'N': Solve A * x = s*b (No transpose) */ +/* = 'T': Solve A'* x = s*b (Transpose) */ +/* = 'C': Solve A'* x = s*b (Conjugate transpose = Transpose) */ + +/* DIAG (input) CHARACTER*1 */ +/* Specifies whether or not the matrix A is unit triangular. */ +/* = 'N': Non-unit triangular */ +/* = 'U': Unit triangular */ + +/* NORMIN (input) CHARACTER*1 */ +/* Specifies whether CNORM has been set or not. */ +/* = 'Y': CNORM contains the column norms on entry */ +/* = 'N': CNORM is not set on entry. On exit, the norms will */ +/* be computed and stored in CNORM. */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ +/* The triangular matrix A. If UPLO = 'U', the leading n by n */ +/* upper triangular part of the array A contains the upper */ +/* triangular matrix, and the strictly lower triangular part of */ +/* A is not referenced. If UPLO = 'L', the leading n by n lower */ +/* triangular part of the array A contains the lower triangular */ +/* matrix, and the strictly upper triangular part of A is not */ +/* referenced. If DIAG = 'U', the diagonal elements of A are */ +/* also not referenced and are assumed to be 1. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max (1,N). */ + +/* X (input/output) DOUBLE PRECISION array, dimension (N) */ +/* On entry, the right hand side b of the triangular system. */ +/* On exit, X is overwritten by the solution vector x. */ + +/* SCALE (output) DOUBLE PRECISION */ +/* The scaling factor s for the triangular system */ +/* A * x = s*b or A'* x = s*b. */ +/* If SCALE = 0, the matrix A is singular or badly scaled, and */ +/* the vector x is an exact or approximate solution to A*x = 0. */ + +/* CNORM (input or output) DOUBLE PRECISION array, dimension (N) */ + +/* If NORMIN = 'Y', CNORM is an input argument and CNORM(j) */ +/* contains the norm of the off-diagonal part of the j-th column */ +/* of A. If TRANS = 'N', CNORM(j) must be greater than or equal */ +/* to the infinity-norm, and if TRANS = 'T' or 'C', CNORM(j) */ +/* must be greater than or equal to the 1-norm. */ + +/* If NORMIN = 'N', CNORM is an output argument and CNORM(j) */ +/* returns the 1-norm of the offdiagonal part of the j-th column */ +/* of A. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -k, the k-th argument had an illegal value */ + +/* Further Details */ +/* ======= ======= */ + +/* A rough bound on x is computed; if that is less than overflow, DTRSV */ +/* is called, otherwise, specific code is used which checks for possible */ +/* overflow or divide-by-zero at every operation. */ + +/* A columnwise scheme is used for solving A*x = b. The basic algorithm */ +/* if A is lower triangular is */ + +/* x[1:n] := b[1:n] */ +/* for j = 1, ..., n */ +/* x(j) := x(j) / A(j,j) */ +/* x[j+1:n] := x[j+1:n] - x(j) * A[j+1:n,j] */ +/* end */ + +/* Define bounds on the components of x after j iterations of the loop: */ +/* M(j) = bound on x[1:j] */ +/* G(j) = bound on x[j+1:n] */ +/* Initially, let M(0) = 0 and G(0) = max{x(i), i=1,...,n}. */ + +/* Then for iteration j+1 we have */ +/* M(j+1) <= G(j) / | A(j+1,j+1) | */ +/* G(j+1) <= G(j) + M(j+1) * | A[j+2:n,j+1] | */ +/* <= G(j) ( 1 + CNORM(j+1) / | A(j+1,j+1) | ) */ + +/* where CNORM(j+1) is greater than or equal to the infinity-norm of */ +/* column j+1 of A, not counting the diagonal. Hence */ + +/* G(j) <= G(0) product ( 1 + CNORM(i) / | A(i,i) | ) */ +/* 1<=i<=j */ +/* and */ + +/* |x(j)| <= ( G(0) / |A(j,j)| ) product ( 1 + CNORM(i) / |A(i,i)| ) */ +/* 1<=i< j */ + +/* Since |x(j)| <= M(j), we use the Level 2 BLAS routine DTRSV if the */ +/* reciprocal of the largest M(j), j=1,..,n, is larger than */ +/* max(underflow, 1/overflow). */ + +/* The bound on x(j) is also used to determine when a step in the */ +/* columnwise method can be performed without fear of overflow. If */ +/* the computed bound is greater than a large constant, x is scaled to */ +/* prevent overflow, but if the bound overflows, x is set to 0, x(j) to */ +/* 1, and scale to 0, and a non-trivial solution to A*x = 0 is found. */ + +/* Similarly, a row-wise scheme is used to solve A'*x = b. The basic */ +/* algorithm for A upper triangular is */ + +/* for j = 1, ..., n */ +/* x(j) := ( b(j) - A[1:j-1,j]' * x[1:j-1] ) / A(j,j) */ +/* end */ + +/* We simultaneously compute two bounds */ +/* G(j) = bound on ( b(i) - A[1:i-1,i]' * x[1:i-1] ), 1<=i<=j */ +/* M(j) = bound on x(i), 1<=i<=j */ + +/* The initial values are G(0) = 0, M(0) = max{b(i), i=1,..,n}, and we */ +/* add the constraint G(j) >= G(j-1) and M(j) >= M(j-1) for j >= 1. */ +/* Then the bound on x(j) is */ + +/* M(j) <= M(j-1) * ( 1 + CNORM(j) ) / | A(j,j) | */ + +/* <= M(0) * product ( ( 1 + CNORM(i) ) / |A(i,i)| ) */ +/* 1<=i<=j */ + +/* and we can safely call DTRSV if 1/M(n) and 1/G(n) are both greater */ +/* than max(underflow, 1/overflow). */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --x; + --cnorm; + + /* Function Body */ + *info = 0; + upper = _starpu_lsame_(uplo, "U"); + notran = _starpu_lsame_(trans, "N"); + nounit = _starpu_lsame_(diag, "N"); + +/* Test the input parameters. */ + + if (! upper && ! _starpu_lsame_(uplo, "L")) { + *info = -1; + } else if (! notran && ! _starpu_lsame_(trans, "T") && ! + _starpu_lsame_(trans, "C")) { + *info = -2; + } else if (! nounit && ! _starpu_lsame_(diag, "U")) { + *info = -3; + } else if (! _starpu_lsame_(normin, "Y") && ! _starpu_lsame_(normin, + "N")) { + *info = -4; + } else if (*n < 0) { + *info = -5; + } else if (*lda < max(1,*n)) { + *info = -7; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DLATRS", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Determine machine dependent parameters to control overflow. */ + + smlnum = _starpu_dlamch_("Safe minimum") / _starpu_dlamch_("Precision"); + bignum = 1. / smlnum; + *scale = 1.; + + if (_starpu_lsame_(normin, "N")) { + +/* Compute the 1-norm of each column, not including the diagonal. */ + + if (upper) { + +/* A is upper triangular. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j - 1; + cnorm[j] = _starpu_dasum_(&i__2, &a[j * a_dim1 + 1], &c__1); +/* L10: */ + } + } else { + +/* A is lower triangular. */ + + i__1 = *n - 1; + for (j = 1; j <= i__1; ++j) { + i__2 = *n - j; + cnorm[j] = _starpu_dasum_(&i__2, &a[j + 1 + j * a_dim1], &c__1); +/* L20: */ + } + cnorm[*n] = 0.; + } + } + +/* Scale the column norms by TSCAL if the maximum element in CNORM is */ +/* greater than BIGNUM. */ + + imax = _starpu_idamax_(n, &cnorm[1], &c__1); + tmax = cnorm[imax]; + if (tmax <= bignum) { + tscal = 1.; + } else { + tscal = 1. / (smlnum * tmax); + _starpu_dscal_(n, &tscal, &cnorm[1], &c__1); + } + +/* Compute a bound on the computed solution vector to see if the */ +/* Level 2 BLAS routine DTRSV can be used. */ + + j = _starpu_idamax_(n, &x[1], &c__1); + xmax = (d__1 = x[j], abs(d__1)); + xbnd = xmax; + if (notran) { + +/* Compute the growth in A * x = b. */ + + if (upper) { + jfirst = *n; + jlast = 1; + jinc = -1; + } else { + jfirst = 1; + jlast = *n; + jinc = 1; + } + + if (tscal != 1.) { + grow = 0.; + goto L50; + } + + if (nounit) { + +/* A is non-unit triangular. */ + +/* Compute GROW = 1/G(j) and XBND = 1/M(j). */ +/* Initially, G(0) = max{x(i), i=1,...,n}. */ + + grow = 1. / max(xbnd,smlnum); + xbnd = grow; + i__1 = jlast; + i__2 = jinc; + for (j = jfirst; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { + +/* Exit the loop if the growth factor is too small. */ + + if (grow <= smlnum) { + goto L50; + } + +/* M(j) = G(j-1) / abs(A(j,j)) */ + + tjj = (d__1 = a[j + j * a_dim1], abs(d__1)); +/* Computing MIN */ + d__1 = xbnd, d__2 = min(1.,tjj) * grow; + xbnd = min(d__1,d__2); + if (tjj + cnorm[j] >= smlnum) { + +/* G(j) = G(j-1)*( 1 + CNORM(j) / abs(A(j,j)) ) */ + + grow *= tjj / (tjj + cnorm[j]); + } else { + +/* G(j) could overflow, set GROW to 0. */ + + grow = 0.; + } +/* L30: */ + } + grow = xbnd; + } else { + +/* A is unit triangular. */ + +/* Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. */ + +/* Computing MIN */ + d__1 = 1., d__2 = 1. / max(xbnd,smlnum); + grow = min(d__1,d__2); + i__2 = jlast; + i__1 = jinc; + for (j = jfirst; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) { + +/* Exit the loop if the growth factor is too small. */ + + if (grow <= smlnum) { + goto L50; + } + +/* G(j) = G(j-1)*( 1 + CNORM(j) ) */ + + grow *= 1. / (cnorm[j] + 1.); +/* L40: */ + } + } +L50: + + ; + } else { + +/* Compute the growth in A' * x = b. */ + + if (upper) { + jfirst = 1; + jlast = *n; + jinc = 1; + } else { + jfirst = *n; + jlast = 1; + jinc = -1; + } + + if (tscal != 1.) { + grow = 0.; + goto L80; + } + + if (nounit) { + +/* A is non-unit triangular. */ + +/* Compute GROW = 1/G(j) and XBND = 1/M(j). */ +/* Initially, M(0) = max{x(i), i=1,...,n}. */ + + grow = 1. / max(xbnd,smlnum); + xbnd = grow; + i__1 = jlast; + i__2 = jinc; + for (j = jfirst; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { + +/* Exit the loop if the growth factor is too small. */ + + if (grow <= smlnum) { + goto L80; + } + +/* G(j) = max( G(j-1), M(j-1)*( 1 + CNORM(j) ) ) */ + + xj = cnorm[j] + 1.; +/* Computing MIN */ + d__1 = grow, d__2 = xbnd / xj; + grow = min(d__1,d__2); + +/* M(j) = M(j-1)*( 1 + CNORM(j) ) / abs(A(j,j)) */ + + tjj = (d__1 = a[j + j * a_dim1], abs(d__1)); + if (xj > tjj) { + xbnd *= tjj / xj; + } +/* L60: */ + } + grow = min(grow,xbnd); + } else { + +/* A is unit triangular. */ + +/* Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. */ + +/* Computing MIN */ + d__1 = 1., d__2 = 1. / max(xbnd,smlnum); + grow = min(d__1,d__2); + i__2 = jlast; + i__1 = jinc; + for (j = jfirst; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) { + +/* Exit the loop if the growth factor is too small. */ + + if (grow <= smlnum) { + goto L80; + } + +/* G(j) = ( 1 + CNORM(j) )*G(j-1) */ + + xj = cnorm[j] + 1.; + grow /= xj; +/* L70: */ + } + } +L80: + ; + } + + if (grow * tscal > smlnum) { + +/* Use the Level 2 BLAS solve if the reciprocal of the bound on */ +/* elements of X is not too small. */ + + _starpu_dtrsv_(uplo, trans, diag, n, &a[a_offset], lda, &x[1], &c__1); + } else { + +/* Use a Level 1 BLAS solve, scaling intermediate results. */ + + if (xmax > bignum) { + +/* Scale X so that its components are less than or equal to */ +/* BIGNUM in absolute value. */ + + *scale = bignum / xmax; + _starpu_dscal_(n, scale, &x[1], &c__1); + xmax = bignum; + } + + if (notran) { + +/* Solve A * x = b */ + + i__1 = jlast; + i__2 = jinc; + for (j = jfirst; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { + +/* Compute x(j) = b(j) / A(j,j), scaling x if necessary. */ + + xj = (d__1 = x[j], abs(d__1)); + if (nounit) { + tjjs = a[j + j * a_dim1] * tscal; + } else { + tjjs = tscal; + if (tscal == 1.) { + goto L100; + } + } + tjj = abs(tjjs); + if (tjj > smlnum) { + +/* abs(A(j,j)) > SMLNUM: */ + + if (tjj < 1.) { + if (xj > tjj * bignum) { + +/* Scale x by 1/b(j). */ + + rec = 1. / xj; + _starpu_dscal_(n, &rec, &x[1], &c__1); + *scale *= rec; + xmax *= rec; + } + } + x[j] /= tjjs; + xj = (d__1 = x[j], abs(d__1)); + } else if (tjj > 0.) { + +/* 0 < abs(A(j,j)) <= SMLNUM: */ + + if (xj > tjj * bignum) { + +/* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM */ +/* to avoid overflow when dividing by A(j,j). */ + + rec = tjj * bignum / xj; + if (cnorm[j] > 1.) { + +/* Scale by 1/CNORM(j) to avoid overflow when */ +/* multiplying x(j) times column j. */ + + rec /= cnorm[j]; + } + _starpu_dscal_(n, &rec, &x[1], &c__1); + *scale *= rec; + xmax *= rec; + } + x[j] /= tjjs; + xj = (d__1 = x[j], abs(d__1)); + } else { + +/* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and */ +/* scale = 0, and compute a solution to A*x = 0. */ + + i__3 = *n; + for (i__ = 1; i__ <= i__3; ++i__) { + x[i__] = 0.; +/* L90: */ + } + x[j] = 1.; + xj = 1.; + *scale = 0.; + xmax = 0.; + } +L100: + +/* Scale x if necessary to avoid overflow when adding a */ +/* multiple of column j of A. */ + + if (xj > 1.) { + rec = 1. / xj; + if (cnorm[j] > (bignum - xmax) * rec) { + +/* Scale x by 1/(2*abs(x(j))). */ + + rec *= .5; + _starpu_dscal_(n, &rec, &x[1], &c__1); + *scale *= rec; + } + } else if (xj * cnorm[j] > bignum - xmax) { + +/* Scale x by 1/2. */ + + _starpu_dscal_(n, &c_b36, &x[1], &c__1); + *scale *= .5; + } + + if (upper) { + if (j > 1) { + +/* Compute the update */ +/* x(1:j-1) := x(1:j-1) - x(j) * A(1:j-1,j) */ + + i__3 = j - 1; + d__1 = -x[j] * tscal; + _starpu_daxpy_(&i__3, &d__1, &a[j * a_dim1 + 1], &c__1, &x[1], + &c__1); + i__3 = j - 1; + i__ = _starpu_idamax_(&i__3, &x[1], &c__1); + xmax = (d__1 = x[i__], abs(d__1)); + } + } else { + if (j < *n) { + +/* Compute the update */ +/* x(j+1:n) := x(j+1:n) - x(j) * A(j+1:n,j) */ + + i__3 = *n - j; + d__1 = -x[j] * tscal; + _starpu_daxpy_(&i__3, &d__1, &a[j + 1 + j * a_dim1], &c__1, & + x[j + 1], &c__1); + i__3 = *n - j; + i__ = j + _starpu_idamax_(&i__3, &x[j + 1], &c__1); + xmax = (d__1 = x[i__], abs(d__1)); + } + } +/* L110: */ + } + + } else { + +/* Solve A' * x = b */ + + i__2 = jlast; + i__1 = jinc; + for (j = jfirst; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) { + +/* Compute x(j) = b(j) - sum A(k,j)*x(k). */ +/* k<>j */ + + xj = (d__1 = x[j], abs(d__1)); + uscal = tscal; + rec = 1. / max(xmax,1.); + if (cnorm[j] > (bignum - xj) * rec) { + +/* If x(j) could overflow, scale x by 1/(2*XMAX). */ + + rec *= .5; + if (nounit) { + tjjs = a[j + j * a_dim1] * tscal; + } else { + tjjs = tscal; + } + tjj = abs(tjjs); + if (tjj > 1.) { + +/* Divide by A(j,j) when scaling x if A(j,j) > 1. */ + +/* Computing MIN */ + d__1 = 1., d__2 = rec * tjj; + rec = min(d__1,d__2); + uscal /= tjjs; + } + if (rec < 1.) { + _starpu_dscal_(n, &rec, &x[1], &c__1); + *scale *= rec; + xmax *= rec; + } + } + + sumj = 0.; + if (uscal == 1.) { + +/* If the scaling needed for A in the dot product is 1, */ +/* call DDOT to perform the dot product. */ + + if (upper) { + i__3 = j - 1; + sumj = _starpu_ddot_(&i__3, &a[j * a_dim1 + 1], &c__1, &x[1], + &c__1); + } else if (j < *n) { + i__3 = *n - j; + sumj = _starpu_ddot_(&i__3, &a[j + 1 + j * a_dim1], &c__1, &x[ + j + 1], &c__1); + } + } else { + +/* Otherwise, use in-line code for the dot product. */ + + if (upper) { + i__3 = j - 1; + for (i__ = 1; i__ <= i__3; ++i__) { + sumj += a[i__ + j * a_dim1] * uscal * x[i__]; +/* L120: */ + } + } else if (j < *n) { + i__3 = *n; + for (i__ = j + 1; i__ <= i__3; ++i__) { + sumj += a[i__ + j * a_dim1] * uscal * x[i__]; +/* L130: */ + } + } + } + + if (uscal == tscal) { + +/* Compute x(j) := ( x(j) - sumj ) / A(j,j) if 1/A(j,j) */ +/* was not used to scale the dotproduct. */ + + x[j] -= sumj; + xj = (d__1 = x[j], abs(d__1)); + if (nounit) { + tjjs = a[j + j * a_dim1] * tscal; + } else { + tjjs = tscal; + if (tscal == 1.) { + goto L150; + } + } + +/* Compute x(j) = x(j) / A(j,j), scaling if necessary. */ + + tjj = abs(tjjs); + if (tjj > smlnum) { + +/* abs(A(j,j)) > SMLNUM: */ + + if (tjj < 1.) { + if (xj > tjj * bignum) { + +/* Scale X by 1/abs(x(j)). */ + + rec = 1. / xj; + _starpu_dscal_(n, &rec, &x[1], &c__1); + *scale *= rec; + xmax *= rec; + } + } + x[j] /= tjjs; + } else if (tjj > 0.) { + +/* 0 < abs(A(j,j)) <= SMLNUM: */ + + if (xj > tjj * bignum) { + +/* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM. */ + + rec = tjj * bignum / xj; + _starpu_dscal_(n, &rec, &x[1], &c__1); + *scale *= rec; + xmax *= rec; + } + x[j] /= tjjs; + } else { + +/* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and */ +/* scale = 0, and compute a solution to A'*x = 0. */ + + i__3 = *n; + for (i__ = 1; i__ <= i__3; ++i__) { + x[i__] = 0.; +/* L140: */ + } + x[j] = 1.; + *scale = 0.; + xmax = 0.; + } +L150: + ; + } else { + +/* Compute x(j) := x(j) / A(j,j) - sumj if the dot */ +/* product has already been divided by 1/A(j,j). */ + + x[j] = x[j] / tjjs - sumj; + } +/* Computing MAX */ + d__2 = xmax, d__3 = (d__1 = x[j], abs(d__1)); + xmax = max(d__2,d__3); +/* L160: */ + } + } + *scale /= tscal; + } + +/* Scale the column norms by 1/TSCAL for return. */ + + if (tscal != 1.) { + d__1 = 1. / tscal; + _starpu_dscal_(n, &d__1, &cnorm[1], &c__1); + } + + return 0; + +/* End of DLATRS */ + +} /* _starpu_dlatrs_ */ diff --git a/min-dgels/base/SRC/dlatrz.c b/min-dgels/base/SRC/dlatrz.c new file mode 100644 index 0000000..c34fd02 --- /dev/null +++ b/min-dgels/base/SRC/dlatrz.c @@ -0,0 +1,163 @@ +/* dlatrz.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dlatrz_(integer *m, integer *n, integer *l, doublereal * + a, integer *lda, doublereal *tau, doublereal *work) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + + /* Local variables */ + integer i__; + extern /* Subroutine */ int _starpu_dlarz_(char *, integer *, integer *, integer * +, doublereal *, integer *, doublereal *, doublereal *, integer *, + doublereal *), _starpu_dlarfp_(integer *, doublereal *, + doublereal *, integer *, doublereal *); + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLATRZ factors the M-by-(M+L) real upper trapezoidal matrix */ +/* [ A1 A2 ] = [ A(1:M,1:M) A(1:M,N-L+1:N) ] as ( R 0 ) * Z, by means */ +/* of orthogonal transformations. Z is an (M+L)-by-(M+L) orthogonal */ +/* matrix and, R and A1 are M-by-M upper triangular matrices. */ + +/* Arguments */ +/* ========= */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix A. M >= 0. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix A. N >= 0. */ + +/* L (input) INTEGER */ +/* The number of columns of the matrix A containing the */ +/* meaningful part of the Householder vectors. N-M >= L >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the leading M-by-N upper trapezoidal part of the */ +/* array A must contain the matrix to be factorized. */ +/* On exit, the leading M-by-M upper triangular part of A */ +/* contains the upper triangular matrix R, and elements N-L+1 to */ +/* N of the first M rows of A, with the array TAU, represent the */ +/* orthogonal matrix Z as a product of M elementary reflectors. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,M). */ + +/* TAU (output) DOUBLE PRECISION array, dimension (M) */ +/* The scalar factors of the elementary reflectors. */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (M) */ + +/* Further Details */ +/* =============== */ + +/* Based on contributions by */ +/* A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA */ + +/* The factorization is obtained by Householder's method. The kth */ +/* transformation matrix, Z( k ), which is used to introduce zeros into */ +/* the ( m - k + 1 )th row of A, is given in the form */ + +/* Z( k ) = ( I 0 ), */ +/* ( 0 T( k ) ) */ + +/* where */ + +/* T( k ) = I - tau*u( k )*u( k )', u( k ) = ( 1 ), */ +/* ( 0 ) */ +/* ( z( k ) ) */ + +/* tau is a scalar and z( k ) is an l element vector. tau and z( k ) */ +/* are chosen to annihilate the elements of the kth row of A2. */ + +/* The scalar tau is returned in the kth element of TAU and the vector */ +/* u( k ) in the kth row of A2, such that the elements of z( k ) are */ +/* in a( k, l + 1 ), ..., a( k, n ). The elements of R are returned in */ +/* the upper triangular part of A1. */ + +/* Z is given by */ + +/* Z = Z( 1 ) * Z( 2 ) * ... * Z( m ). */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments */ + +/* Quick return if possible */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + if (*m == 0) { + return 0; + } else if (*m == *n) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + tau[i__] = 0.; +/* L10: */ + } + return 0; + } + + for (i__ = *m; i__ >= 1; --i__) { + +/* Generate elementary reflector H(i) to annihilate */ +/* [ A(i,i) A(i,n-l+1:n) ] */ + + i__1 = *l + 1; + _starpu_dlarfp_(&i__1, &a[i__ + i__ * a_dim1], &a[i__ + (*n - *l + 1) * + a_dim1], lda, &tau[i__]); + +/* Apply H(i) to A(1:i-1,i:n) from the right */ + + i__1 = i__ - 1; + i__2 = *n - i__ + 1; + _starpu_dlarz_("Right", &i__1, &i__2, l, &a[i__ + (*n - *l + 1) * a_dim1], + lda, &tau[i__], &a[i__ * a_dim1 + 1], lda, &work[1]); + +/* L20: */ + } + + return 0; + +/* End of DLATRZ */ + +} /* _starpu_dlatrz_ */ diff --git a/min-dgels/base/SRC/dlatzm.c b/min-dgels/base/SRC/dlatzm.c new file mode 100644 index 0000000..b35941b --- /dev/null +++ b/min-dgels/base/SRC/dlatzm.c @@ -0,0 +1,193 @@ +/* dlatzm.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static doublereal c_b5 = 1.; + +/* Subroutine */ int _starpu_dlatzm_(char *side, integer *m, integer *n, doublereal * + v, integer *incv, doublereal *tau, doublereal *c1, doublereal *c2, + integer *ldc, doublereal *work) +{ + /* System generated locals */ + integer c1_dim1, c1_offset, c2_dim1, c2_offset, i__1; + doublereal d__1; + + /* Local variables */ + extern /* Subroutine */ int _starpu_dger_(integer *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *); + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int _starpu_dgemv_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *), _starpu_dcopy_(integer *, + doublereal *, integer *, doublereal *, integer *), _starpu_daxpy_(integer + *, doublereal *, doublereal *, integer *, doublereal *, integer *) + ; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* This routine is deprecated and has been replaced by routine DORMRZ. */ + +/* DLATZM applies a Householder matrix generated by DTZRQF to a matrix. */ + +/* Let P = I - tau*u*u', u = ( 1 ), */ +/* ( v ) */ +/* where v is an (m-1) vector if SIDE = 'L', or a (n-1) vector if */ +/* SIDE = 'R'. */ + +/* If SIDE equals 'L', let */ +/* C = [ C1 ] 1 */ +/* [ C2 ] m-1 */ +/* n */ +/* Then C is overwritten by P*C. */ + +/* If SIDE equals 'R', let */ +/* C = [ C1, C2 ] m */ +/* 1 n-1 */ +/* Then C is overwritten by C*P. */ + +/* Arguments */ +/* ========= */ + +/* SIDE (input) CHARACTER*1 */ +/* = 'L': form P * C */ +/* = 'R': form C * P */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix C. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix C. */ + +/* V (input) DOUBLE PRECISION array, dimension */ +/* (1 + (M-1)*abs(INCV)) if SIDE = 'L' */ +/* (1 + (N-1)*abs(INCV)) if SIDE = 'R' */ +/* The vector v in the representation of P. V is not used */ +/* if TAU = 0. */ + +/* INCV (input) INTEGER */ +/* The increment between elements of v. INCV <> 0 */ + +/* TAU (input) DOUBLE PRECISION */ +/* The value tau in the representation of P. */ + +/* C1 (input/output) DOUBLE PRECISION array, dimension */ +/* (LDC,N) if SIDE = 'L' */ +/* (M,1) if SIDE = 'R' */ +/* On entry, the n-vector C1 if SIDE = 'L', or the m-vector C1 */ +/* if SIDE = 'R'. */ + +/* On exit, the first row of P*C if SIDE = 'L', or the first */ +/* column of C*P if SIDE = 'R'. */ + +/* C2 (input/output) DOUBLE PRECISION array, dimension */ +/* (LDC, N) if SIDE = 'L' */ +/* (LDC, N-1) if SIDE = 'R' */ +/* On entry, the (m - 1) x n matrix C2 if SIDE = 'L', or the */ +/* m x (n - 1) matrix C2 if SIDE = 'R'. */ + +/* On exit, rows 2:m of P*C if SIDE = 'L', or columns 2:m of C*P */ +/* if SIDE = 'R'. */ + +/* LDC (input) INTEGER */ +/* The leading dimension of the arrays C1 and C2. LDC >= (1,M). */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension */ +/* (N) if SIDE = 'L' */ +/* (M) if SIDE = 'R' */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + --v; + c2_dim1 = *ldc; + c2_offset = 1 + c2_dim1; + c2 -= c2_offset; + c1_dim1 = *ldc; + c1_offset = 1 + c1_dim1; + c1 -= c1_offset; + --work; + + /* Function Body */ + if (min(*m,*n) == 0 || *tau == 0.) { + return 0; + } + + if (_starpu_lsame_(side, "L")) { + +/* w := C1 + v' * C2 */ + + _starpu_dcopy_(n, &c1[c1_offset], ldc, &work[1], &c__1); + i__1 = *m - 1; + _starpu_dgemv_("Transpose", &i__1, n, &c_b5, &c2[c2_offset], ldc, &v[1], incv, + &c_b5, &work[1], &c__1); + +/* [ C1 ] := [ C1 ] - tau* [ 1 ] * w' */ +/* [ C2 ] [ C2 ] [ v ] */ + + d__1 = -(*tau); + _starpu_daxpy_(n, &d__1, &work[1], &c__1, &c1[c1_offset], ldc); + i__1 = *m - 1; + d__1 = -(*tau); + _starpu_dger_(&i__1, n, &d__1, &v[1], incv, &work[1], &c__1, &c2[c2_offset], + ldc); + + } else if (_starpu_lsame_(side, "R")) { + +/* w := C1 + C2 * v */ + + _starpu_dcopy_(m, &c1[c1_offset], &c__1, &work[1], &c__1); + i__1 = *n - 1; + _starpu_dgemv_("No transpose", m, &i__1, &c_b5, &c2[c2_offset], ldc, &v[1], + incv, &c_b5, &work[1], &c__1); + +/* [ C1, C2 ] := [ C1, C2 ] - tau* w * [ 1 , v'] */ + + d__1 = -(*tau); + _starpu_daxpy_(m, &d__1, &work[1], &c__1, &c1[c1_offset], &c__1); + i__1 = *n - 1; + d__1 = -(*tau); + _starpu_dger_(m, &i__1, &d__1, &work[1], &c__1, &v[1], incv, &c2[c2_offset], + ldc); + } + + return 0; + +/* End of DLATZM */ + +} /* _starpu_dlatzm_ */ diff --git a/min-dgels/base/SRC/dlauu2.c b/min-dgels/base/SRC/dlauu2.c new file mode 100644 index 0000000..0330733 --- /dev/null +++ b/min-dgels/base/SRC/dlauu2.c @@ -0,0 +1,183 @@ +/* dlauu2.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static doublereal c_b7 = 1.; +static integer c__1 = 1; + +/* Subroutine */ int _starpu_dlauu2_(char *uplo, integer *n, doublereal *a, integer * + lda, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + integer i__; + doublereal aii; + extern doublereal _starpu_ddot_(integer *, doublereal *, integer *, doublereal *, + integer *); + extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, + integer *); + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int _starpu_dgemv_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *); + logical upper; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLAUU2 computes the product U * U' or L' * L, where the triangular */ +/* factor U or L is stored in the upper or lower triangular part of */ +/* the array A. */ + +/* If UPLO = 'U' or 'u' then the upper triangle of the result is stored, */ +/* overwriting the factor U in A. */ +/* If UPLO = 'L' or 'l' then the lower triangle of the result is stored, */ +/* overwriting the factor L in A. */ + +/* This is the unblocked form of the algorithm, calling Level 2 BLAS. */ + +/* Arguments */ +/* ========= */ + +/* UPLO (input) CHARACTER*1 */ +/* Specifies whether the triangular factor stored in the array A */ +/* is upper or lower triangular: */ +/* = 'U': Upper triangular */ +/* = 'L': Lower triangular */ + +/* N (input) INTEGER */ +/* The order of the triangular factor U or L. N >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the triangular factor U or L. */ +/* On exit, if UPLO = 'U', the upper triangle of A is */ +/* overwritten with the upper triangle of the product U * U'; */ +/* if UPLO = 'L', the lower triangle of A is overwritten with */ +/* the lower triangle of the product L' * L. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,N). */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -k, the k-th argument had an illegal value */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + *info = 0; + upper = _starpu_lsame_(uplo, "U"); + if (! upper && ! _starpu_lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DLAUU2", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + if (upper) { + +/* Compute the product U * U'. */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + aii = a[i__ + i__ * a_dim1]; + if (i__ < *n) { + i__2 = *n - i__ + 1; + a[i__ + i__ * a_dim1] = _starpu_ddot_(&i__2, &a[i__ + i__ * a_dim1], + lda, &a[i__ + i__ * a_dim1], lda); + i__2 = i__ - 1; + i__3 = *n - i__; + _starpu_dgemv_("No transpose", &i__2, &i__3, &c_b7, &a[(i__ + 1) * + a_dim1 + 1], lda, &a[i__ + (i__ + 1) * a_dim1], lda, & + aii, &a[i__ * a_dim1 + 1], &c__1); + } else { + _starpu_dscal_(&i__, &aii, &a[i__ * a_dim1 + 1], &c__1); + } +/* L10: */ + } + + } else { + +/* Compute the product L' * L. */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + aii = a[i__ + i__ * a_dim1]; + if (i__ < *n) { + i__2 = *n - i__ + 1; + a[i__ + i__ * a_dim1] = _starpu_ddot_(&i__2, &a[i__ + i__ * a_dim1], & + c__1, &a[i__ + i__ * a_dim1], &c__1); + i__2 = *n - i__; + i__3 = i__ - 1; + _starpu_dgemv_("Transpose", &i__2, &i__3, &c_b7, &a[i__ + 1 + a_dim1], + lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &aii, &a[i__ + + a_dim1], lda); + } else { + _starpu_dscal_(&i__, &aii, &a[i__ + a_dim1], lda); + } +/* L20: */ + } + } + + return 0; + +/* End of DLAUU2 */ + +} /* _starpu_dlauu2_ */ diff --git a/min-dgels/base/SRC/dlauum.c b/min-dgels/base/SRC/dlauum.c new file mode 100644 index 0000000..ec450d5 --- /dev/null +++ b/min-dgels/base/SRC/dlauum.c @@ -0,0 +1,217 @@ +/* dlauum.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; +static doublereal c_b15 = 1.; + +/* Subroutine */ int _starpu_dlauum_(char *uplo, integer *n, doublereal *a, integer * + lda, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + integer i__, ib, nb; + extern /* Subroutine */ int _starpu_dgemm_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *); + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int _starpu_dtrmm_(char *, char *, char *, char *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *); + logical upper; + extern /* Subroutine */ int _starpu_dsyrk_(char *, char *, integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, doublereal *, + integer *), _starpu_dlauu2_(char *, integer *, + doublereal *, integer *, integer *), _starpu_xerbla_(char *, + integer *); + extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *); + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLAUUM computes the product U * U' or L' * L, where the triangular */ +/* factor U or L is stored in the upper or lower triangular part of */ +/* the array A. */ + +/* If UPLO = 'U' or 'u' then the upper triangle of the result is stored, */ +/* overwriting the factor U in A. */ +/* If UPLO = 'L' or 'l' then the lower triangle of the result is stored, */ +/* overwriting the factor L in A. */ + +/* This is the blocked form of the algorithm, calling Level 3 BLAS. */ + +/* Arguments */ +/* ========= */ + +/* UPLO (input) CHARACTER*1 */ +/* Specifies whether the triangular factor stored in the array A */ +/* is upper or lower triangular: */ +/* = 'U': Upper triangular */ +/* = 'L': Lower triangular */ + +/* N (input) INTEGER */ +/* The order of the triangular factor U or L. N >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the triangular factor U or L. */ +/* On exit, if UPLO = 'U', the upper triangle of A is */ +/* overwritten with the upper triangle of the product U * U'; */ +/* if UPLO = 'L', the lower triangle of A is overwritten with */ +/* the lower triangle of the product L' * L. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,N). */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -k, the k-th argument had an illegal value */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + *info = 0; + upper = _starpu_lsame_(uplo, "U"); + if (! upper && ! _starpu_lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DLAUUM", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Determine the block size for this environment. */ + + nb = _starpu_ilaenv_(&c__1, "DLAUUM", uplo, n, &c_n1, &c_n1, &c_n1); + + if (nb <= 1 || nb >= *n) { + +/* Use unblocked code */ + + _starpu_dlauu2_(uplo, n, &a[a_offset], lda, info); + } else { + +/* Use blocked code */ + + if (upper) { + +/* Compute the product U * U'. */ + + i__1 = *n; + i__2 = nb; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__3 = nb, i__4 = *n - i__ + 1; + ib = min(i__3,i__4); + i__3 = i__ - 1; + _starpu_dtrmm_("Right", "Upper", "Transpose", "Non-unit", &i__3, &ib, + &c_b15, &a[i__ + i__ * a_dim1], lda, &a[i__ * a_dim1 + + 1], lda) + ; + _starpu_dlauu2_("Upper", &ib, &a[i__ + i__ * a_dim1], lda, info); + if (i__ + ib <= *n) { + i__3 = i__ - 1; + i__4 = *n - i__ - ib + 1; + _starpu_dgemm_("No transpose", "Transpose", &i__3, &ib, &i__4, & + c_b15, &a[(i__ + ib) * a_dim1 + 1], lda, &a[i__ + + (i__ + ib) * a_dim1], lda, &c_b15, &a[i__ * + a_dim1 + 1], lda); + i__3 = *n - i__ - ib + 1; + _starpu_dsyrk_("Upper", "No transpose", &ib, &i__3, &c_b15, &a[ + i__ + (i__ + ib) * a_dim1], lda, &c_b15, &a[i__ + + i__ * a_dim1], lda); + } +/* L10: */ + } + } else { + +/* Compute the product L' * L. */ + + i__2 = *n; + i__1 = nb; + for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) { +/* Computing MIN */ + i__3 = nb, i__4 = *n - i__ + 1; + ib = min(i__3,i__4); + i__3 = i__ - 1; + _starpu_dtrmm_("Left", "Lower", "Transpose", "Non-unit", &ib, &i__3, & + c_b15, &a[i__ + i__ * a_dim1], lda, &a[i__ + a_dim1], + lda); + _starpu_dlauu2_("Lower", &ib, &a[i__ + i__ * a_dim1], lda, info); + if (i__ + ib <= *n) { + i__3 = i__ - 1; + i__4 = *n - i__ - ib + 1; + _starpu_dgemm_("Transpose", "No transpose", &ib, &i__3, &i__4, & + c_b15, &a[i__ + ib + i__ * a_dim1], lda, &a[i__ + + ib + a_dim1], lda, &c_b15, &a[i__ + a_dim1], lda); + i__3 = *n - i__ - ib + 1; + _starpu_dsyrk_("Lower", "Transpose", &ib, &i__3, &c_b15, &a[i__ + + ib + i__ * a_dim1], lda, &c_b15, &a[i__ + i__ * + a_dim1], lda); + } +/* L20: */ + } + } + } + + return 0; + +/* End of DLAUUM */ + +} /* _starpu_dlauum_ */ diff --git a/min-dgels/base/SRC/dopgtr.c b/min-dgels/base/SRC/dopgtr.c new file mode 100644 index 0000000..2169286 --- /dev/null +++ b/min-dgels/base/SRC/dopgtr.c @@ -0,0 +1,210 @@ +/* dopgtr.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dopgtr_(char *uplo, integer *n, doublereal *ap, + doublereal *tau, doublereal *q, integer *ldq, doublereal *work, + integer *info) +{ + /* System generated locals */ + integer q_dim1, q_offset, i__1, i__2, i__3; + + /* Local variables */ + integer i__, j, ij; + extern logical _starpu_lsame_(char *, char *); + integer iinfo; + logical upper; + extern /* Subroutine */ int _starpu_dorg2l_(integer *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *), + _starpu_dorg2r_(integer *, integer *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DOPGTR generates a real orthogonal matrix Q which is defined as the */ +/* product of n-1 elementary reflectors H(i) of order n, as returned by */ +/* DSPTRD using packed storage: */ + +/* if UPLO = 'U', Q = H(n-1) . . . H(2) H(1), */ + +/* if UPLO = 'L', Q = H(1) H(2) . . . H(n-1). */ + +/* Arguments */ +/* ========= */ + +/* UPLO (input) CHARACTER*1 */ +/* = 'U': Upper triangular packed storage used in previous */ +/* call to DSPTRD; */ +/* = 'L': Lower triangular packed storage used in previous */ +/* call to DSPTRD. */ + +/* N (input) INTEGER */ +/* The order of the matrix Q. N >= 0. */ + +/* AP (input) DOUBLE PRECISION array, dimension (N*(N+1)/2) */ +/* The vectors which define the elementary reflectors, as */ +/* returned by DSPTRD. */ + +/* TAU (input) DOUBLE PRECISION array, dimension (N-1) */ +/* TAU(i) must contain the scalar factor of the elementary */ +/* reflector H(i), as returned by DSPTRD. */ + +/* Q (output) DOUBLE PRECISION array, dimension (LDQ,N) */ +/* The N-by-N orthogonal matrix Q. */ + +/* LDQ (input) INTEGER */ +/* The leading dimension of the array Q. LDQ >= max(1,N). */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (N-1) */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments */ + + /* Parameter adjustments */ + --ap; + --tau; + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + --work; + + /* Function Body */ + *info = 0; + upper = _starpu_lsame_(uplo, "U"); + if (! upper && ! _starpu_lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*ldq < max(1,*n)) { + *info = -6; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DOPGTR", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + if (upper) { + +/* Q was determined by a call to DSPTRD with UPLO = 'U' */ + +/* Unpack the vectors which define the elementary reflectors and */ +/* set the last row and column of Q equal to those of the unit */ +/* matrix */ + + ij = 2; + i__1 = *n - 1; + for (j = 1; j <= i__1; ++j) { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + q[i__ + j * q_dim1] = ap[ij]; + ++ij; +/* L10: */ + } + ij += 2; + q[*n + j * q_dim1] = 0.; +/* L20: */ + } + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + q[i__ + *n * q_dim1] = 0.; +/* L30: */ + } + q[*n + *n * q_dim1] = 1.; + +/* Generate Q(1:n-1,1:n-1) */ + + i__1 = *n - 1; + i__2 = *n - 1; + i__3 = *n - 1; + _starpu_dorg2l_(&i__1, &i__2, &i__3, &q[q_offset], ldq, &tau[1], &work[1], & + iinfo); + + } else { + +/* Q was determined by a call to DSPTRD with UPLO = 'L'. */ + +/* Unpack the vectors which define the elementary reflectors and */ +/* set the first row and column of Q equal to those of the unit */ +/* matrix */ + + q[q_dim1 + 1] = 1.; + i__1 = *n; + for (i__ = 2; i__ <= i__1; ++i__) { + q[i__ + q_dim1] = 0.; +/* L40: */ + } + ij = 3; + i__1 = *n; + for (j = 2; j <= i__1; ++j) { + q[j * q_dim1 + 1] = 0.; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + q[i__ + j * q_dim1] = ap[ij]; + ++ij; +/* L50: */ + } + ij += 2; +/* L60: */ + } + if (*n > 1) { + +/* Generate Q(2:n,2:n) */ + + i__1 = *n - 1; + i__2 = *n - 1; + i__3 = *n - 1; + _starpu_dorg2r_(&i__1, &i__2, &i__3, &q[(q_dim1 << 1) + 2], ldq, &tau[1], + &work[1], &iinfo); + } + } + return 0; + +/* End of DOPGTR */ + +} /* _starpu_dopgtr_ */ diff --git a/min-dgels/base/SRC/dopmtr.c b/min-dgels/base/SRC/dopmtr.c new file mode 100644 index 0000000..f7ec5d5 --- /dev/null +++ b/min-dgels/base/SRC/dopmtr.c @@ -0,0 +1,296 @@ +/* dopmtr.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +/* Subroutine */ int _starpu_dopmtr_(char *side, char *uplo, char *trans, integer *m, + integer *n, doublereal *ap, doublereal *tau, doublereal *c__, integer + *ldc, doublereal *work, integer *info) +{ + /* System generated locals */ + integer c_dim1, c_offset, i__1, i__2; + + /* Local variables */ + integer i__, i1, i2, i3, ic, jc, ii, mi, ni, nq; + doublereal aii; + logical left; + extern /* Subroutine */ int _starpu_dlarf_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + doublereal *); + extern logical _starpu_lsame_(char *, char *); + logical upper; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + logical notran, forwrd; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DOPMTR overwrites the general real M-by-N matrix C with */ + +/* SIDE = 'L' SIDE = 'R' */ +/* TRANS = 'N': Q * C C * Q */ +/* TRANS = 'T': Q**T * C C * Q**T */ + +/* where Q is a real orthogonal matrix of order nq, with nq = m if */ +/* SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of */ +/* nq-1 elementary reflectors, as returned by DSPTRD using packed */ +/* storage: */ + +/* if UPLO = 'U', Q = H(nq-1) . . . H(2) H(1); */ + +/* if UPLO = 'L', Q = H(1) H(2) . . . H(nq-1). */ + +/* Arguments */ +/* ========= */ + +/* SIDE (input) CHARACTER*1 */ +/* = 'L': apply Q or Q**T from the Left; */ +/* = 'R': apply Q or Q**T from the Right. */ + +/* UPLO (input) CHARACTER*1 */ +/* = 'U': Upper triangular packed storage used in previous */ +/* call to DSPTRD; */ +/* = 'L': Lower triangular packed storage used in previous */ +/* call to DSPTRD. */ + +/* TRANS (input) CHARACTER*1 */ +/* = 'N': No transpose, apply Q; */ +/* = 'T': Transpose, apply Q**T. */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix C. M >= 0. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix C. N >= 0. */ + +/* AP (input) DOUBLE PRECISION array, dimension */ +/* (M*(M+1)/2) if SIDE = 'L' */ +/* (N*(N+1)/2) if SIDE = 'R' */ +/* The vectors which define the elementary reflectors, as */ +/* returned by DSPTRD. AP is modified by the routine but */ +/* restored on exit. */ + +/* TAU (input) DOUBLE PRECISION array, dimension (M-1) if SIDE = 'L' */ +/* or (N-1) if SIDE = 'R' */ +/* TAU(i) must contain the scalar factor of the elementary */ +/* reflector H(i), as returned by DSPTRD. */ + +/* C (input/output) DOUBLE PRECISION array, dimension (LDC,N) */ +/* On entry, the M-by-N matrix C. */ +/* On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. */ + +/* LDC (input) INTEGER */ +/* The leading dimension of the array C. LDC >= max(1,M). */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension */ +/* (N) if SIDE = 'L' */ +/* (M) if SIDE = 'R' */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments */ + + /* Parameter adjustments */ + --ap; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + left = _starpu_lsame_(side, "L"); + notran = _starpu_lsame_(trans, "N"); + upper = _starpu_lsame_(uplo, "U"); + +/* NQ is the order of Q */ + + if (left) { + nq = *m; + } else { + nq = *n; + } + if (! left && ! _starpu_lsame_(side, "R")) { + *info = -1; + } else if (! upper && ! _starpu_lsame_(uplo, "L")) { + *info = -2; + } else if (! notran && ! _starpu_lsame_(trans, "T")) { + *info = -3; + } else if (*m < 0) { + *info = -4; + } else if (*n < 0) { + *info = -5; + } else if (*ldc < max(1,*m)) { + *info = -9; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DOPMTR", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + return 0; + } + + if (upper) { + +/* Q was determined by a call to DSPTRD with UPLO = 'U' */ + + forwrd = left && notran || ! left && ! notran; + + if (forwrd) { + i1 = 1; + i2 = nq - 1; + i3 = 1; + ii = 2; + } else { + i1 = nq - 1; + i2 = 1; + i3 = -1; + ii = nq * (nq + 1) / 2 - 1; + } + + if (left) { + ni = *n; + } else { + mi = *m; + } + + i__1 = i2; + i__2 = i3; + for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + if (left) { + +/* H(i) is applied to C(1:i,1:n) */ + + mi = i__; + } else { + +/* H(i) is applied to C(1:m,1:i) */ + + ni = i__; + } + +/* Apply H(i) */ + + aii = ap[ii]; + ap[ii] = 1.; + _starpu_dlarf_(side, &mi, &ni, &ap[ii - i__ + 1], &c__1, &tau[i__], &c__[ + c_offset], ldc, &work[1]); + ap[ii] = aii; + + if (forwrd) { + ii = ii + i__ + 2; + } else { + ii = ii - i__ - 1; + } +/* L10: */ + } + } else { + +/* Q was determined by a call to DSPTRD with UPLO = 'L'. */ + + forwrd = left && ! notran || ! left && notran; + + if (forwrd) { + i1 = 1; + i2 = nq - 1; + i3 = 1; + ii = 2; + } else { + i1 = nq - 1; + i2 = 1; + i3 = -1; + ii = nq * (nq + 1) / 2 - 1; + } + + if (left) { + ni = *n; + jc = 1; + } else { + mi = *m; + ic = 1; + } + + i__2 = i2; + i__1 = i3; + for (i__ = i1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) { + aii = ap[ii]; + ap[ii] = 1.; + if (left) { + +/* H(i) is applied to C(i+1:m,1:n) */ + + mi = *m - i__; + ic = i__ + 1; + } else { + +/* H(i) is applied to C(1:m,i+1:n) */ + + ni = *n - i__; + jc = i__ + 1; + } + +/* Apply H(i) */ + + _starpu_dlarf_(side, &mi, &ni, &ap[ii], &c__1, &tau[i__], &c__[ic + jc * + c_dim1], ldc, &work[1]); + ap[ii] = aii; + + if (forwrd) { + ii = ii + nq - i__ + 1; + } else { + ii = ii - nq + i__ - 2; + } +/* L20: */ + } + } + return 0; + +/* End of DOPMTR */ + +} /* _starpu_dopmtr_ */ diff --git a/min-dgels/base/SRC/dorg2l.c b/min-dgels/base/SRC/dorg2l.c new file mode 100644 index 0000000..5665ade --- /dev/null +++ b/min-dgels/base/SRC/dorg2l.c @@ -0,0 +1,173 @@ +/* dorg2l.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +/* Subroutine */ int _starpu_dorg2l_(integer *m, integer *n, integer *k, doublereal * + a, integer *lda, doublereal *tau, doublereal *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + doublereal d__1; + + /* Local variables */ + integer i__, j, l, ii; + extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, + integer *), _starpu_dlarf_(char *, integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, doublereal *), _starpu_xerbla_(char *, integer *); + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DORG2L generates an m by n real matrix Q with orthonormal columns, */ +/* which is defined as the last n columns of a product of k elementary */ +/* reflectors of order m */ + +/* Q = H(k) . . . H(2) H(1) */ + +/* as returned by DGEQLF. */ + +/* Arguments */ +/* ========= */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix Q. M >= 0. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix Q. M >= N >= 0. */ + +/* K (input) INTEGER */ +/* The number of elementary reflectors whose product defines the */ +/* matrix Q. N >= K >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the (n-k+i)-th column must contain the vector which */ +/* defines the elementary reflector H(i), for i = 1,2,...,k, as */ +/* returned by DGEQLF in the last k columns of its array */ +/* argument A. */ +/* On exit, the m by n matrix Q. */ + +/* LDA (input) INTEGER */ +/* The first dimension of the array A. LDA >= max(1,M). */ + +/* TAU (input) DOUBLE PRECISION array, dimension (K) */ +/* TAU(i) must contain the scalar factor of the elementary */ +/* reflector H(i), as returned by DGEQLF. */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (N) */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument has an illegal value */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0 || *n > *m) { + *info = -2; + } else if (*k < 0 || *k > *n) { + *info = -3; + } else if (*lda < max(1,*m)) { + *info = -5; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DORG2L", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n <= 0) { + return 0; + } + +/* Initialise columns 1:n-k to columns of the unit matrix */ + + i__1 = *n - *k; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (l = 1; l <= i__2; ++l) { + a[l + j * a_dim1] = 0.; +/* L10: */ + } + a[*m - *n + j + j * a_dim1] = 1.; +/* L20: */ + } + + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + ii = *n - *k + i__; + +/* Apply H(i) to A(1:m-k+i,1:n-k+i) from the left */ + + a[*m - *n + ii + ii * a_dim1] = 1.; + i__2 = *m - *n + ii; + i__3 = ii - 1; + _starpu_dlarf_("Left", &i__2, &i__3, &a[ii * a_dim1 + 1], &c__1, &tau[i__], & + a[a_offset], lda, &work[1]); + i__2 = *m - *n + ii - 1; + d__1 = -tau[i__]; + _starpu_dscal_(&i__2, &d__1, &a[ii * a_dim1 + 1], &c__1); + a[*m - *n + ii + ii * a_dim1] = 1. - tau[i__]; + +/* Set A(m-k+i+1:m,n-k+i) to zero */ + + i__2 = *m; + for (l = *m - *n + ii + 1; l <= i__2; ++l) { + a[l + ii * a_dim1] = 0.; +/* L30: */ + } +/* L40: */ + } + return 0; + +/* End of DORG2L */ + +} /* _starpu_dorg2l_ */ diff --git a/min-dgels/base/SRC/dorg2r.c b/min-dgels/base/SRC/dorg2r.c new file mode 100644 index 0000000..c7a8da7 --- /dev/null +++ b/min-dgels/base/SRC/dorg2r.c @@ -0,0 +1,175 @@ +/* dorg2r.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +/* Subroutine */ int _starpu_dorg2r_(integer *m, integer *n, integer *k, doublereal * + a, integer *lda, doublereal *tau, doublereal *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + doublereal d__1; + + /* Local variables */ + integer i__, j, l; + extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, + integer *), _starpu_dlarf_(char *, integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, doublereal *), _starpu_xerbla_(char *, integer *); + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DORG2R generates an m by n real matrix Q with orthonormal columns, */ +/* which is defined as the first n columns of a product of k elementary */ +/* reflectors of order m */ + +/* Q = H(1) H(2) . . . H(k) */ + +/* as returned by DGEQRF. */ + +/* Arguments */ +/* ========= */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix Q. M >= 0. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix Q. M >= N >= 0. */ + +/* K (input) INTEGER */ +/* The number of elementary reflectors whose product defines the */ +/* matrix Q. N >= K >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the i-th column must contain the vector which */ +/* defines the elementary reflector H(i), for i = 1,2,...,k, as */ +/* returned by DGEQRF in the first k columns of its array */ +/* argument A. */ +/* On exit, the m-by-n matrix Q. */ + +/* LDA (input) INTEGER */ +/* The first dimension of the array A. LDA >= max(1,M). */ + +/* TAU (input) DOUBLE PRECISION array, dimension (K) */ +/* TAU(i) must contain the scalar factor of the elementary */ +/* reflector H(i), as returned by DGEQRF. */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (N) */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument has an illegal value */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0 || *n > *m) { + *info = -2; + } else if (*k < 0 || *k > *n) { + *info = -3; + } else if (*lda < max(1,*m)) { + *info = -5; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DORG2R", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n <= 0) { + return 0; + } + +/* Initialise columns k+1:n to columns of the unit matrix */ + + i__1 = *n; + for (j = *k + 1; j <= i__1; ++j) { + i__2 = *m; + for (l = 1; l <= i__2; ++l) { + a[l + j * a_dim1] = 0.; +/* L10: */ + } + a[j + j * a_dim1] = 1.; +/* L20: */ + } + + for (i__ = *k; i__ >= 1; --i__) { + +/* Apply H(i) to A(i:m,i:n) from the left */ + + if (i__ < *n) { + a[i__ + i__ * a_dim1] = 1.; + i__1 = *m - i__ + 1; + i__2 = *n - i__; + _starpu_dlarf_("Left", &i__1, &i__2, &a[i__ + i__ * a_dim1], &c__1, &tau[ + i__], &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]); + } + if (i__ < *m) { + i__1 = *m - i__; + d__1 = -tau[i__]; + _starpu_dscal_(&i__1, &d__1, &a[i__ + 1 + i__ * a_dim1], &c__1); + } + a[i__ + i__ * a_dim1] = 1. - tau[i__]; + +/* Set A(1:i-1,i) to zero */ + + i__1 = i__ - 1; + for (l = 1; l <= i__1; ++l) { + a[l + i__ * a_dim1] = 0.; +/* L30: */ + } +/* L40: */ + } + return 0; + +/* End of DORG2R */ + +} /* _starpu_dorg2r_ */ diff --git a/min-dgels/base/SRC/dorgbr.c b/min-dgels/base/SRC/dorgbr.c new file mode 100644 index 0000000..9381ff3 --- /dev/null +++ b/min-dgels/base/SRC/dorgbr.c @@ -0,0 +1,299 @@ +/* dorgbr.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; + +/* Subroutine */ int _starpu_dorgbr_(char *vect, integer *m, integer *n, integer *k, + doublereal *a, integer *lda, doublereal *tau, doublereal *work, + integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + integer i__, j, nb, mn; + extern logical _starpu_lsame_(char *, char *); + integer iinfo; + logical wantq; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *); + extern /* Subroutine */ int _starpu_dorglq_(integer *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + integer *), _starpu_dorgqr_(integer *, integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, integer *); + integer lwkopt; + logical lquery; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DORGBR generates one of the real orthogonal matrices Q or P**T */ +/* determined by DGEBRD when reducing a real matrix A to bidiagonal */ +/* form: A = Q * B * P**T. Q and P**T are defined as products of */ +/* elementary reflectors H(i) or G(i) respectively. */ + +/* If VECT = 'Q', A is assumed to have been an M-by-K matrix, and Q */ +/* is of order M: */ +/* if m >= k, Q = H(1) H(2) . . . H(k) and DORGBR returns the first n */ +/* columns of Q, where m >= n >= k; */ +/* if m < k, Q = H(1) H(2) . . . H(m-1) and DORGBR returns Q as an */ +/* M-by-M matrix. */ + +/* If VECT = 'P', A is assumed to have been a K-by-N matrix, and P**T */ +/* is of order N: */ +/* if k < n, P**T = G(k) . . . G(2) G(1) and DORGBR returns the first m */ +/* rows of P**T, where n >= m >= k; */ +/* if k >= n, P**T = G(n-1) . . . G(2) G(1) and DORGBR returns P**T as */ +/* an N-by-N matrix. */ + +/* Arguments */ +/* ========= */ + +/* VECT (input) CHARACTER*1 */ +/* Specifies whether the matrix Q or the matrix P**T is */ +/* required, as defined in the transformation applied by DGEBRD: */ +/* = 'Q': generate Q; */ +/* = 'P': generate P**T. */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix Q or P**T to be returned. */ +/* M >= 0. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix Q or P**T to be returned. */ +/* N >= 0. */ +/* If VECT = 'Q', M >= N >= min(M,K); */ +/* if VECT = 'P', N >= M >= min(N,K). */ + +/* K (input) INTEGER */ +/* If VECT = 'Q', the number of columns in the original M-by-K */ +/* matrix reduced by DGEBRD. */ +/* If VECT = 'P', the number of rows in the original K-by-N */ +/* matrix reduced by DGEBRD. */ +/* K >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the vectors which define the elementary reflectors, */ +/* as returned by DGEBRD. */ +/* On exit, the M-by-N matrix Q or P**T. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,M). */ + +/* TAU (input) DOUBLE PRECISION array, dimension */ +/* (min(M,K)) if VECT = 'Q' */ +/* (min(N,K)) if VECT = 'P' */ +/* TAU(i) must contain the scalar factor of the elementary */ +/* reflector H(i) or G(i), which determines Q or P**T, as */ +/* returned by DGEBRD in its array argument TAUQ or TAUP. */ + +/* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ + +/* LWORK (input) INTEGER */ +/* The dimension of the array WORK. LWORK >= max(1,min(M,N)). */ +/* For optimum performance LWORK >= min(M,N)*NB, where NB */ +/* is the optimal blocksize. */ + +/* If LWORK = -1, then a workspace query is assumed; the routine */ +/* only calculates the optimal size of the WORK array, returns */ +/* this value as the first entry of the WORK array, and no error */ +/* message related to LWORK is issued by XERBLA. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + wantq = _starpu_lsame_(vect, "Q"); + mn = min(*m,*n); + lquery = *lwork == -1; + if (! wantq && ! _starpu_lsame_(vect, "P")) { + *info = -1; + } else if (*m < 0) { + *info = -2; + } else if (*n < 0 || wantq && (*n > *m || *n < min(*m,*k)) || ! wantq && ( + *m > *n || *m < min(*n,*k))) { + *info = -3; + } else if (*k < 0) { + *info = -4; + } else if (*lda < max(1,*m)) { + *info = -6; + } else if (*lwork < max(1,mn) && ! lquery) { + *info = -9; + } + + if (*info == 0) { + if (wantq) { + nb = _starpu_ilaenv_(&c__1, "DORGQR", " ", m, n, k, &c_n1); + } else { + nb = _starpu_ilaenv_(&c__1, "DORGLQ", " ", m, n, k, &c_n1); + } + lwkopt = max(1,mn) * nb; + work[1] = (doublereal) lwkopt; + } + + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DORGBR", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + work[1] = 1.; + return 0; + } + + if (wantq) { + +/* Form Q, determined by a call to DGEBRD to reduce an m-by-k */ +/* matrix */ + + if (*m >= *k) { + +/* If m >= k, assume m >= n >= k */ + + _starpu_dorgqr_(m, n, k, &a[a_offset], lda, &tau[1], &work[1], lwork, & + iinfo); + + } else { + +/* If m < k, assume m = n */ + +/* Shift the vectors which define the elementary reflectors one */ +/* column to the right, and set the first row and column of Q */ +/* to those of the unit matrix */ + + for (j = *m; j >= 2; --j) { + a[j * a_dim1 + 1] = 0.; + i__1 = *m; + for (i__ = j + 1; i__ <= i__1; ++i__) { + a[i__ + j * a_dim1] = a[i__ + (j - 1) * a_dim1]; +/* L10: */ + } +/* L20: */ + } + a[a_dim1 + 1] = 1.; + i__1 = *m; + for (i__ = 2; i__ <= i__1; ++i__) { + a[i__ + a_dim1] = 0.; +/* L30: */ + } + if (*m > 1) { + +/* Form Q(2:m,2:m) */ + + i__1 = *m - 1; + i__2 = *m - 1; + i__3 = *m - 1; + _starpu_dorgqr_(&i__1, &i__2, &i__3, &a[(a_dim1 << 1) + 2], lda, &tau[ + 1], &work[1], lwork, &iinfo); + } + } + } else { + +/* Form P', determined by a call to DGEBRD to reduce a k-by-n */ +/* matrix */ + + if (*k < *n) { + +/* If k < n, assume k <= m <= n */ + + _starpu_dorglq_(m, n, k, &a[a_offset], lda, &tau[1], &work[1], lwork, & + iinfo); + + } else { + +/* If k >= n, assume m = n */ + +/* Shift the vectors which define the elementary reflectors one */ +/* row downward, and set the first row and column of P' to */ +/* those of the unit matrix */ + + a[a_dim1 + 1] = 1.; + i__1 = *n; + for (i__ = 2; i__ <= i__1; ++i__) { + a[i__ + a_dim1] = 0.; +/* L40: */ + } + i__1 = *n; + for (j = 2; j <= i__1; ++j) { + for (i__ = j - 1; i__ >= 2; --i__) { + a[i__ + j * a_dim1] = a[i__ - 1 + j * a_dim1]; +/* L50: */ + } + a[j * a_dim1 + 1] = 0.; +/* L60: */ + } + if (*n > 1) { + +/* Form P'(2:n,2:n) */ + + i__1 = *n - 1; + i__2 = *n - 1; + i__3 = *n - 1; + _starpu_dorglq_(&i__1, &i__2, &i__3, &a[(a_dim1 << 1) + 2], lda, &tau[ + 1], &work[1], lwork, &iinfo); + } + } + } + work[1] = (doublereal) lwkopt; + return 0; + +/* End of DORGBR */ + +} /* _starpu_dorgbr_ */ diff --git a/min-dgels/base/SRC/dorghr.c b/min-dgels/base/SRC/dorghr.c new file mode 100644 index 0000000..fc5da42 --- /dev/null +++ b/min-dgels/base/SRC/dorghr.c @@ -0,0 +1,216 @@ +/* dorghr.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; + +/* Subroutine */ int _starpu_dorghr_(integer *n, integer *ilo, integer *ihi, + doublereal *a, integer *lda, doublereal *tau, doublereal *work, + integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + + /* Local variables */ + integer i__, j, nb, nh, iinfo; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *); + extern /* Subroutine */ int _starpu_dorgqr_(integer *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + integer *); + integer lwkopt; + logical lquery; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DORGHR generates a real orthogonal matrix Q which is defined as the */ +/* product of IHI-ILO elementary reflectors of order N, as returned by */ +/* DGEHRD: */ + +/* Q = H(ilo) H(ilo+1) . . . H(ihi-1). */ + +/* Arguments */ +/* ========= */ + +/* N (input) INTEGER */ +/* The order of the matrix Q. N >= 0. */ + +/* ILO (input) INTEGER */ +/* IHI (input) INTEGER */ +/* ILO and IHI must have the same values as in the previous call */ +/* of DGEHRD. Q is equal to the unit matrix except in the */ +/* submatrix Q(ilo+1:ihi,ilo+1:ihi). */ +/* 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the vectors which define the elementary reflectors, */ +/* as returned by DGEHRD. */ +/* On exit, the N-by-N orthogonal matrix Q. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,N). */ + +/* TAU (input) DOUBLE PRECISION array, dimension (N-1) */ +/* TAU(i) must contain the scalar factor of the elementary */ +/* reflector H(i), as returned by DGEHRD. */ + +/* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ + +/* LWORK (input) INTEGER */ +/* The dimension of the array WORK. LWORK >= IHI-ILO. */ +/* For optimum performance LWORK >= (IHI-ILO)*NB, where NB is */ +/* the optimal blocksize. */ + +/* If LWORK = -1, then a workspace query is assumed; the routine */ +/* only calculates the optimal size of the WORK array, returns */ +/* this value as the first entry of the WORK array, and no error */ +/* message related to LWORK is issued by XERBLA. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + nh = *ihi - *ilo; + lquery = *lwork == -1; + if (*n < 0) { + *info = -1; + } else if (*ilo < 1 || *ilo > max(1,*n)) { + *info = -2; + } else if (*ihi < min(*ilo,*n) || *ihi > *n) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } else if (*lwork < max(1,nh) && ! lquery) { + *info = -8; + } + + if (*info == 0) { + nb = _starpu_ilaenv_(&c__1, "DORGQR", " ", &nh, &nh, &nh, &c_n1); + lwkopt = max(1,nh) * nb; + work[1] = (doublereal) lwkopt; + } + + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DORGHR", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + work[1] = 1.; + return 0; + } + +/* Shift the vectors which define the elementary reflectors one */ +/* column to the right, and set the first ilo and the last n-ihi */ +/* rows and columns to those of the unit matrix */ + + i__1 = *ilo + 1; + for (j = *ihi; j >= i__1; --j) { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = 0.; +/* L10: */ + } + i__2 = *ihi; + for (i__ = j + 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = a[i__ + (j - 1) * a_dim1]; +/* L20: */ + } + i__2 = *n; + for (i__ = *ihi + 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = 0.; +/* L30: */ + } +/* L40: */ + } + i__1 = *ilo; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = 0.; +/* L50: */ + } + a[j + j * a_dim1] = 1.; +/* L60: */ + } + i__1 = *n; + for (j = *ihi + 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = 0.; +/* L70: */ + } + a[j + j * a_dim1] = 1.; +/* L80: */ + } + + if (nh > 0) { + +/* Generate Q(ilo+1:ihi,ilo+1:ihi) */ + + _starpu_dorgqr_(&nh, &nh, &nh, &a[*ilo + 1 + (*ilo + 1) * a_dim1], lda, &tau[* + ilo], &work[1], lwork, &iinfo); + } + work[1] = (doublereal) lwkopt; + return 0; + +/* End of DORGHR */ + +} /* _starpu_dorghr_ */ diff --git a/min-dgels/base/SRC/dorgl2.c b/min-dgels/base/SRC/dorgl2.c new file mode 100644 index 0000000..8191cb7 --- /dev/null +++ b/min-dgels/base/SRC/dorgl2.c @@ -0,0 +1,175 @@ +/* dorgl2.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dorgl2_(integer *m, integer *n, integer *k, doublereal * + a, integer *lda, doublereal *tau, doublereal *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + doublereal d__1; + + /* Local variables */ + integer i__, j, l; + extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, + integer *), _starpu_dlarf_(char *, integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, doublereal *), _starpu_xerbla_(char *, integer *); + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DORGL2 generates an m by n real matrix Q with orthonormal rows, */ +/* which is defined as the first m rows of a product of k elementary */ +/* reflectors of order n */ + +/* Q = H(k) . . . H(2) H(1) */ + +/* as returned by DGELQF. */ + +/* Arguments */ +/* ========= */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix Q. M >= 0. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix Q. N >= M. */ + +/* K (input) INTEGER */ +/* The number of elementary reflectors whose product defines the */ +/* matrix Q. M >= K >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the i-th row must contain the vector which defines */ +/* the elementary reflector H(i), for i = 1,2,...,k, as returned */ +/* by DGELQF in the first k rows of its array argument A. */ +/* On exit, the m-by-n matrix Q. */ + +/* LDA (input) INTEGER */ +/* The first dimension of the array A. LDA >= max(1,M). */ + +/* TAU (input) DOUBLE PRECISION array, dimension (K) */ +/* TAU(i) must contain the scalar factor of the elementary */ +/* reflector H(i), as returned by DGELQF. */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (M) */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument has an illegal value */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < *m) { + *info = -2; + } else if (*k < 0 || *k > *m) { + *info = -3; + } else if (*lda < max(1,*m)) { + *info = -5; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DORGL2", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*m <= 0) { + return 0; + } + + if (*k < *m) { + +/* Initialise rows k+1:m to rows of the unit matrix */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (l = *k + 1; l <= i__2; ++l) { + a[l + j * a_dim1] = 0.; +/* L10: */ + } + if (j > *k && j <= *m) { + a[j + j * a_dim1] = 1.; + } +/* L20: */ + } + } + + for (i__ = *k; i__ >= 1; --i__) { + +/* Apply H(i) to A(i:m,i:n) from the right */ + + if (i__ < *n) { + if (i__ < *m) { + a[i__ + i__ * a_dim1] = 1.; + i__1 = *m - i__; + i__2 = *n - i__ + 1; + _starpu_dlarf_("Right", &i__1, &i__2, &a[i__ + i__ * a_dim1], lda, & + tau[i__], &a[i__ + 1 + i__ * a_dim1], lda, &work[1]); + } + i__1 = *n - i__; + d__1 = -tau[i__]; + _starpu_dscal_(&i__1, &d__1, &a[i__ + (i__ + 1) * a_dim1], lda); + } + a[i__ + i__ * a_dim1] = 1. - tau[i__]; + +/* Set A(i,1:i-1) to zero */ + + i__1 = i__ - 1; + for (l = 1; l <= i__1; ++l) { + a[i__ + l * a_dim1] = 0.; +/* L30: */ + } +/* L40: */ + } + return 0; + +/* End of DORGL2 */ + +} /* _starpu_dorgl2_ */ diff --git a/min-dgels/base/SRC/dorglq.c b/min-dgels/base/SRC/dorglq.c new file mode 100644 index 0000000..6172896 --- /dev/null +++ b/min-dgels/base/SRC/dorglq.c @@ -0,0 +1,280 @@ +/* dorglq.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; +static integer c__3 = 3; +static integer c__2 = 2; + +/* Subroutine */ int _starpu_dorglq_(integer *m, integer *n, integer *k, doublereal * + a, integer *lda, doublereal *tau, doublereal *work, integer *lwork, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + integer i__, j, l, ib, nb, ki, kk, nx, iws, nbmin, iinfo; + extern /* Subroutine */ int _starpu_dorgl2_(integer *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *), + _starpu_dlarfb_(char *, char *, char *, char *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, integer *), _starpu_dlarft_(char *, char *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); + extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *); + integer ldwork, lwkopt; + logical lquery; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DORGLQ generates an M-by-N real matrix Q with orthonormal rows, */ +/* which is defined as the first M rows of a product of K elementary */ +/* reflectors of order N */ + +/* Q = H(k) . . . H(2) H(1) */ + +/* as returned by DGELQF. */ + +/* Arguments */ +/* ========= */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix Q. M >= 0. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix Q. N >= M. */ + +/* K (input) INTEGER */ +/* The number of elementary reflectors whose product defines the */ +/* matrix Q. M >= K >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the i-th row must contain the vector which defines */ +/* the elementary reflector H(i), for i = 1,2,...,k, as returned */ +/* by DGELQF in the first k rows of its array argument A. */ +/* On exit, the M-by-N matrix Q. */ + +/* LDA (input) INTEGER */ +/* The first dimension of the array A. LDA >= max(1,M). */ + +/* TAU (input) DOUBLE PRECISION array, dimension (K) */ +/* TAU(i) must contain the scalar factor of the elementary */ +/* reflector H(i), as returned by DGELQF. */ + +/* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ + +/* LWORK (input) INTEGER */ +/* The dimension of the array WORK. LWORK >= max(1,M). */ +/* For optimum performance LWORK >= M*NB, where NB is */ +/* the optimal blocksize. */ + +/* If LWORK = -1, then a workspace query is assumed; the routine */ +/* only calculates the optimal size of the WORK array, returns */ +/* this value as the first entry of the WORK array, and no error */ +/* message related to LWORK is issued by XERBLA. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument has an illegal value */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + nb = _starpu_ilaenv_(&c__1, "DORGLQ", " ", m, n, k, &c_n1); + lwkopt = max(1,*m) * nb; + work[1] = (doublereal) lwkopt; + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < *m) { + *info = -2; + } else if (*k < 0 || *k > *m) { + *info = -3; + } else if (*lda < max(1,*m)) { + *info = -5; + } else if (*lwork < max(1,*m) && ! lquery) { + *info = -8; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DORGLQ", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m <= 0) { + work[1] = 1.; + return 0; + } + + nbmin = 2; + nx = 0; + iws = *m; + if (nb > 1 && nb < *k) { + +/* Determine when to cross over from blocked to unblocked code. */ + +/* Computing MAX */ + i__1 = 0, i__2 = _starpu_ilaenv_(&c__3, "DORGLQ", " ", m, n, k, &c_n1); + nx = max(i__1,i__2); + if (nx < *k) { + +/* Determine if workspace is large enough for blocked code. */ + + ldwork = *m; + iws = ldwork * nb; + if (*lwork < iws) { + +/* Not enough workspace to use optimal NB: reduce NB and */ +/* determine the minimum value of NB. */ + + nb = *lwork / ldwork; +/* Computing MAX */ + i__1 = 2, i__2 = _starpu_ilaenv_(&c__2, "DORGLQ", " ", m, n, k, &c_n1); + nbmin = max(i__1,i__2); + } + } + } + + if (nb >= nbmin && nb < *k && nx < *k) { + +/* Use blocked code after the last block. */ +/* The first kk rows are handled by the block method. */ + + ki = (*k - nx - 1) / nb * nb; +/* Computing MIN */ + i__1 = *k, i__2 = ki + nb; + kk = min(i__1,i__2); + +/* Set A(kk+1:m,1:kk) to zero. */ + + i__1 = kk; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = kk + 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = 0.; +/* L10: */ + } +/* L20: */ + } + } else { + kk = 0; + } + +/* Use unblocked code for the last or only block. */ + + if (kk < *m) { + i__1 = *m - kk; + i__2 = *n - kk; + i__3 = *k - kk; + _starpu_dorgl2_(&i__1, &i__2, &i__3, &a[kk + 1 + (kk + 1) * a_dim1], lda, & + tau[kk + 1], &work[1], &iinfo); + } + + if (kk > 0) { + +/* Use blocked code */ + + i__1 = -nb; + for (i__ = ki + 1; i__1 < 0 ? i__ >= 1 : i__ <= 1; i__ += i__1) { +/* Computing MIN */ + i__2 = nb, i__3 = *k - i__ + 1; + ib = min(i__2,i__3); + if (i__ + ib <= *m) { + +/* Form the triangular factor of the block reflector */ +/* H = H(i) H(i+1) . . . H(i+ib-1) */ + + i__2 = *n - i__ + 1; + _starpu_dlarft_("Forward", "Rowwise", &i__2, &ib, &a[i__ + i__ * + a_dim1], lda, &tau[i__], &work[1], &ldwork); + +/* Apply H' to A(i+ib:m,i:n) from the right */ + + i__2 = *m - i__ - ib + 1; + i__3 = *n - i__ + 1; + _starpu_dlarfb_("Right", "Transpose", "Forward", "Rowwise", &i__2, & + i__3, &ib, &a[i__ + i__ * a_dim1], lda, &work[1], & + ldwork, &a[i__ + ib + i__ * a_dim1], lda, &work[ib + + 1], &ldwork); + } + +/* Apply H' to columns i:n of current block */ + + i__2 = *n - i__ + 1; + _starpu_dorgl2_(&ib, &i__2, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], & + work[1], &iinfo); + +/* Set columns 1:i-1 of current block to zero */ + + i__2 = i__ - 1; + for (j = 1; j <= i__2; ++j) { + i__3 = i__ + ib - 1; + for (l = i__; l <= i__3; ++l) { + a[l + j * a_dim1] = 0.; +/* L30: */ + } +/* L40: */ + } +/* L50: */ + } + } + + work[1] = (doublereal) iws; + return 0; + +/* End of DORGLQ */ + +} /* _starpu_dorglq_ */ diff --git a/min-dgels/base/SRC/dorgql.c b/min-dgels/base/SRC/dorgql.c new file mode 100644 index 0000000..8acd1ce --- /dev/null +++ b/min-dgels/base/SRC/dorgql.c @@ -0,0 +1,289 @@ +/* dorgql.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; +static integer c__3 = 3; +static integer c__2 = 2; + +/* Subroutine */ int _starpu_dorgql_(integer *m, integer *n, integer *k, doublereal * + a, integer *lda, doublereal *tau, doublereal *work, integer *lwork, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + integer i__, j, l, ib, nb, kk, nx, iws, nbmin, iinfo; + extern /* Subroutine */ int _starpu_dorg2l_(integer *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *), + _starpu_dlarfb_(char *, char *, char *, char *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, integer *), _starpu_dlarft_(char *, char *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); + extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *); + integer ldwork, lwkopt; + logical lquery; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DORGQL generates an M-by-N real matrix Q with orthonormal columns, */ +/* which is defined as the last N columns of a product of K elementary */ +/* reflectors of order M */ + +/* Q = H(k) . . . H(2) H(1) */ + +/* as returned by DGEQLF. */ + +/* Arguments */ +/* ========= */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix Q. M >= 0. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix Q. M >= N >= 0. */ + +/* K (input) INTEGER */ +/* The number of elementary reflectors whose product defines the */ +/* matrix Q. N >= K >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the (n-k+i)-th column must contain the vector which */ +/* defines the elementary reflector H(i), for i = 1,2,...,k, as */ +/* returned by DGEQLF in the last k columns of its array */ +/* argument A. */ +/* On exit, the M-by-N matrix Q. */ + +/* LDA (input) INTEGER */ +/* The first dimension of the array A. LDA >= max(1,M). */ + +/* TAU (input) DOUBLE PRECISION array, dimension (K) */ +/* TAU(i) must contain the scalar factor of the elementary */ +/* reflector H(i), as returned by DGEQLF. */ + +/* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ + +/* LWORK (input) INTEGER */ +/* The dimension of the array WORK. LWORK >= max(1,N). */ +/* For optimum performance LWORK >= N*NB, where NB is the */ +/* optimal blocksize. */ + +/* If LWORK = -1, then a workspace query is assumed; the routine */ +/* only calculates the optimal size of the WORK array, returns */ +/* this value as the first entry of the WORK array, and no error */ +/* message related to LWORK is issued by XERBLA. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument has an illegal value */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < 0 || *n > *m) { + *info = -2; + } else if (*k < 0 || *k > *n) { + *info = -3; + } else if (*lda < max(1,*m)) { + *info = -5; + } + + if (*info == 0) { + if (*n == 0) { + lwkopt = 1; + } else { + nb = _starpu_ilaenv_(&c__1, "DORGQL", " ", m, n, k, &c_n1); + lwkopt = *n * nb; + } + work[1] = (doublereal) lwkopt; + + if (*lwork < max(1,*n) && ! lquery) { + *info = -8; + } + } + + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DORGQL", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n <= 0) { + return 0; + } + + nbmin = 2; + nx = 0; + iws = *n; + if (nb > 1 && nb < *k) { + +/* Determine when to cross over from blocked to unblocked code. */ + +/* Computing MAX */ + i__1 = 0, i__2 = _starpu_ilaenv_(&c__3, "DORGQL", " ", m, n, k, &c_n1); + nx = max(i__1,i__2); + if (nx < *k) { + +/* Determine if workspace is large enough for blocked code. */ + + ldwork = *n; + iws = ldwork * nb; + if (*lwork < iws) { + +/* Not enough workspace to use optimal NB: reduce NB and */ +/* determine the minimum value of NB. */ + + nb = *lwork / ldwork; +/* Computing MAX */ + i__1 = 2, i__2 = _starpu_ilaenv_(&c__2, "DORGQL", " ", m, n, k, &c_n1); + nbmin = max(i__1,i__2); + } + } + } + + if (nb >= nbmin && nb < *k && nx < *k) { + +/* Use blocked code after the first block. */ +/* The last kk columns are handled by the block method. */ + +/* Computing MIN */ + i__1 = *k, i__2 = (*k - nx + nb - 1) / nb * nb; + kk = min(i__1,i__2); + +/* Set A(m-kk+1:m,1:n-kk) to zero. */ + + i__1 = *n - kk; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = *m - kk + 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = 0.; +/* L10: */ + } +/* L20: */ + } + } else { + kk = 0; + } + +/* Use unblocked code for the first or only block. */ + + i__1 = *m - kk; + i__2 = *n - kk; + i__3 = *k - kk; + _starpu_dorg2l_(&i__1, &i__2, &i__3, &a[a_offset], lda, &tau[1], &work[1], &iinfo) + ; + + if (kk > 0) { + +/* Use blocked code */ + + i__1 = *k; + i__2 = nb; + for (i__ = *k - kk + 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += + i__2) { +/* Computing MIN */ + i__3 = nb, i__4 = *k - i__ + 1; + ib = min(i__3,i__4); + if (*n - *k + i__ > 1) { + +/* Form the triangular factor of the block reflector */ +/* H = H(i+ib-1) . . . H(i+1) H(i) */ + + i__3 = *m - *k + i__ + ib - 1; + _starpu_dlarft_("Backward", "Columnwise", &i__3, &ib, &a[(*n - *k + + i__) * a_dim1 + 1], lda, &tau[i__], &work[1], &ldwork); + +/* Apply H to A(1:m-k+i+ib-1,1:n-k+i-1) from the left */ + + i__3 = *m - *k + i__ + ib - 1; + i__4 = *n - *k + i__ - 1; + _starpu_dlarfb_("Left", "No transpose", "Backward", "Columnwise", & + i__3, &i__4, &ib, &a[(*n - *k + i__) * a_dim1 + 1], + lda, &work[1], &ldwork, &a[a_offset], lda, &work[ib + + 1], &ldwork); + } + +/* Apply H to rows 1:m-k+i+ib-1 of current block */ + + i__3 = *m - *k + i__ + ib - 1; + _starpu_dorg2l_(&i__3, &ib, &ib, &a[(*n - *k + i__) * a_dim1 + 1], lda, & + tau[i__], &work[1], &iinfo); + +/* Set rows m-k+i+ib:m of current block to zero */ + + i__3 = *n - *k + i__ + ib - 1; + for (j = *n - *k + i__; j <= i__3; ++j) { + i__4 = *m; + for (l = *m - *k + i__ + ib; l <= i__4; ++l) { + a[l + j * a_dim1] = 0.; +/* L30: */ + } +/* L40: */ + } +/* L50: */ + } + } + + work[1] = (doublereal) iws; + return 0; + +/* End of DORGQL */ + +} /* _starpu_dorgql_ */ diff --git a/min-dgels/base/SRC/dorgqr.c b/min-dgels/base/SRC/dorgqr.c new file mode 100644 index 0000000..ad7c593 --- /dev/null +++ b/min-dgels/base/SRC/dorgqr.c @@ -0,0 +1,281 @@ +/* dorgqr.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; +static integer c__3 = 3; +static integer c__2 = 2; + +/* Subroutine */ int _starpu_dorgqr_(integer *m, integer *n, integer *k, doublereal * + a, integer *lda, doublereal *tau, doublereal *work, integer *lwork, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + integer i__, j, l, ib, nb, ki, kk, nx, iws, nbmin, iinfo; + extern /* Subroutine */ int _starpu_dorg2r_(integer *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *), + _starpu_dlarfb_(char *, char *, char *, char *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, integer *), _starpu_dlarft_(char *, char *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); + extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *); + integer ldwork, lwkopt; + logical lquery; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DORGQR generates an M-by-N real matrix Q with orthonormal columns, */ +/* which is defined as the first N columns of a product of K elementary */ +/* reflectors of order M */ + +/* Q = H(1) H(2) . . . H(k) */ + +/* as returned by DGEQRF. */ + +/* Arguments */ +/* ========= */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix Q. M >= 0. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix Q. M >= N >= 0. */ + +/* K (input) INTEGER */ +/* The number of elementary reflectors whose product defines the */ +/* matrix Q. N >= K >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the i-th column must contain the vector which */ +/* defines the elementary reflector H(i), for i = 1,2,...,k, as */ +/* returned by DGEQRF in the first k columns of its array */ +/* argument A. */ +/* On exit, the M-by-N matrix Q. */ + +/* LDA (input) INTEGER */ +/* The first dimension of the array A. LDA >= max(1,M). */ + +/* TAU (input) DOUBLE PRECISION array, dimension (K) */ +/* TAU(i) must contain the scalar factor of the elementary */ +/* reflector H(i), as returned by DGEQRF. */ + +/* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ + +/* LWORK (input) INTEGER */ +/* The dimension of the array WORK. LWORK >= max(1,N). */ +/* For optimum performance LWORK >= N*NB, where NB is the */ +/* optimal blocksize. */ + +/* If LWORK = -1, then a workspace query is assumed; the routine */ +/* only calculates the optimal size of the WORK array, returns */ +/* this value as the first entry of the WORK array, and no error */ +/* message related to LWORK is issued by XERBLA. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument has an illegal value */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + nb = _starpu_ilaenv_(&c__1, "DORGQR", " ", m, n, k, &c_n1); + lwkopt = max(1,*n) * nb; + work[1] = (doublereal) lwkopt; + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < 0 || *n > *m) { + *info = -2; + } else if (*k < 0 || *k > *n) { + *info = -3; + } else if (*lda < max(1,*m)) { + *info = -5; + } else if (*lwork < max(1,*n) && ! lquery) { + *info = -8; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DORGQR", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n <= 0) { + work[1] = 1.; + return 0; + } + + nbmin = 2; + nx = 0; + iws = *n; + if (nb > 1 && nb < *k) { + +/* Determine when to cross over from blocked to unblocked code. */ + +/* Computing MAX */ + i__1 = 0, i__2 = _starpu_ilaenv_(&c__3, "DORGQR", " ", m, n, k, &c_n1); + nx = max(i__1,i__2); + if (nx < *k) { + +/* Determine if workspace is large enough for blocked code. */ + + ldwork = *n; + iws = ldwork * nb; + if (*lwork < iws) { + +/* Not enough workspace to use optimal NB: reduce NB and */ +/* determine the minimum value of NB. */ + + nb = *lwork / ldwork; +/* Computing MAX */ + i__1 = 2, i__2 = _starpu_ilaenv_(&c__2, "DORGQR", " ", m, n, k, &c_n1); + nbmin = max(i__1,i__2); + } + } + } + + if (nb >= nbmin && nb < *k && nx < *k) { + +/* Use blocked code after the last block. */ +/* The first kk columns are handled by the block method. */ + + ki = (*k - nx - 1) / nb * nb; +/* Computing MIN */ + i__1 = *k, i__2 = ki + nb; + kk = min(i__1,i__2); + +/* Set A(1:kk,kk+1:n) to zero. */ + + i__1 = *n; + for (j = kk + 1; j <= i__1; ++j) { + i__2 = kk; + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = 0.; +/* L10: */ + } +/* L20: */ + } + } else { + kk = 0; + } + +/* Use unblocked code for the last or only block. */ + + if (kk < *n) { + i__1 = *m - kk; + i__2 = *n - kk; + i__3 = *k - kk; + _starpu_dorg2r_(&i__1, &i__2, &i__3, &a[kk + 1 + (kk + 1) * a_dim1], lda, & + tau[kk + 1], &work[1], &iinfo); + } + + if (kk > 0) { + +/* Use blocked code */ + + i__1 = -nb; + for (i__ = ki + 1; i__1 < 0 ? i__ >= 1 : i__ <= 1; i__ += i__1) { +/* Computing MIN */ + i__2 = nb, i__3 = *k - i__ + 1; + ib = min(i__2,i__3); + if (i__ + ib <= *n) { + +/* Form the triangular factor of the block reflector */ +/* H = H(i) H(i+1) . . . H(i+ib-1) */ + + i__2 = *m - i__ + 1; + _starpu_dlarft_("Forward", "Columnwise", &i__2, &ib, &a[i__ + i__ * + a_dim1], lda, &tau[i__], &work[1], &ldwork); + +/* Apply H to A(i:m,i+ib:n) from the left */ + + i__2 = *m - i__ + 1; + i__3 = *n - i__ - ib + 1; + _starpu_dlarfb_("Left", "No transpose", "Forward", "Columnwise", & + i__2, &i__3, &ib, &a[i__ + i__ * a_dim1], lda, &work[ + 1], &ldwork, &a[i__ + (i__ + ib) * a_dim1], lda, & + work[ib + 1], &ldwork); + } + +/* Apply H to rows i:m of current block */ + + i__2 = *m - i__ + 1; + _starpu_dorg2r_(&i__2, &ib, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], & + work[1], &iinfo); + +/* Set rows 1:i-1 of current block to zero */ + + i__2 = i__ + ib - 1; + for (j = i__; j <= i__2; ++j) { + i__3 = i__ - 1; + for (l = 1; l <= i__3; ++l) { + a[l + j * a_dim1] = 0.; +/* L30: */ + } +/* L40: */ + } +/* L50: */ + } + } + + work[1] = (doublereal) iws; + return 0; + +/* End of DORGQR */ + +} /* _starpu_dorgqr_ */ diff --git a/min-dgels/base/SRC/dorgr2.c b/min-dgels/base/SRC/dorgr2.c new file mode 100644 index 0000000..fbbd30a --- /dev/null +++ b/min-dgels/base/SRC/dorgr2.c @@ -0,0 +1,174 @@ +/* dorgr2.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dorgr2_(integer *m, integer *n, integer *k, doublereal * + a, integer *lda, doublereal *tau, doublereal *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + doublereal d__1; + + /* Local variables */ + integer i__, j, l, ii; + extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, + integer *), _starpu_dlarf_(char *, integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, doublereal *), _starpu_xerbla_(char *, integer *); + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DORGR2 generates an m by n real matrix Q with orthonormal rows, */ +/* which is defined as the last m rows of a product of k elementary */ +/* reflectors of order n */ + +/* Q = H(1) H(2) . . . H(k) */ + +/* as returned by DGERQF. */ + +/* Arguments */ +/* ========= */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix Q. M >= 0. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix Q. N >= M. */ + +/* K (input) INTEGER */ +/* The number of elementary reflectors whose product defines the */ +/* matrix Q. M >= K >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the (m-k+i)-th row must contain the vector which */ +/* defines the elementary reflector H(i), for i = 1,2,...,k, as */ +/* returned by DGERQF in the last k rows of its array argument */ +/* A. */ +/* On exit, the m by n matrix Q. */ + +/* LDA (input) INTEGER */ +/* The first dimension of the array A. LDA >= max(1,M). */ + +/* TAU (input) DOUBLE PRECISION array, dimension (K) */ +/* TAU(i) must contain the scalar factor of the elementary */ +/* reflector H(i), as returned by DGERQF. */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (M) */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument has an illegal value */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < *m) { + *info = -2; + } else if (*k < 0 || *k > *m) { + *info = -3; + } else if (*lda < max(1,*m)) { + *info = -5; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DORGR2", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*m <= 0) { + return 0; + } + + if (*k < *m) { + +/* Initialise rows 1:m-k to rows of the unit matrix */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m - *k; + for (l = 1; l <= i__2; ++l) { + a[l + j * a_dim1] = 0.; +/* L10: */ + } + if (j > *n - *m && j <= *n - *k) { + a[*m - *n + j + j * a_dim1] = 1.; + } +/* L20: */ + } + } + + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + ii = *m - *k + i__; + +/* Apply H(i) to A(1:m-k+i,1:n-k+i) from the right */ + + a[ii + (*n - *m + ii) * a_dim1] = 1.; + i__2 = ii - 1; + i__3 = *n - *m + ii; + _starpu_dlarf_("Right", &i__2, &i__3, &a[ii + a_dim1], lda, &tau[i__], &a[ + a_offset], lda, &work[1]); + i__2 = *n - *m + ii - 1; + d__1 = -tau[i__]; + _starpu_dscal_(&i__2, &d__1, &a[ii + a_dim1], lda); + a[ii + (*n - *m + ii) * a_dim1] = 1. - tau[i__]; + +/* Set A(m-k+i,n-k+i+1:n) to zero */ + + i__2 = *n; + for (l = *n - *m + ii + 1; l <= i__2; ++l) { + a[ii + l * a_dim1] = 0.; +/* L30: */ + } +/* L40: */ + } + return 0; + +/* End of DORGR2 */ + +} /* _starpu_dorgr2_ */ diff --git a/min-dgels/base/SRC/dorgrq.c b/min-dgels/base/SRC/dorgrq.c new file mode 100644 index 0000000..645197a --- /dev/null +++ b/min-dgels/base/SRC/dorgrq.c @@ -0,0 +1,289 @@ +/* dorgrq.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; +static integer c__3 = 3; +static integer c__2 = 2; + +/* Subroutine */ int _starpu_dorgrq_(integer *m, integer *n, integer *k, doublereal * + a, integer *lda, doublereal *tau, doublereal *work, integer *lwork, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + integer i__, j, l, ib, nb, ii, kk, nx, iws, nbmin, iinfo; + extern /* Subroutine */ int _starpu_dorgr2_(integer *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *), + _starpu_dlarfb_(char *, char *, char *, char *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, integer *), _starpu_dlarft_(char *, char *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); + extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *); + integer ldwork, lwkopt; + logical lquery; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DORGRQ generates an M-by-N real matrix Q with orthonormal rows, */ +/* which is defined as the last M rows of a product of K elementary */ +/* reflectors of order N */ + +/* Q = H(1) H(2) . . . H(k) */ + +/* as returned by DGERQF. */ + +/* Arguments */ +/* ========= */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix Q. M >= 0. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix Q. N >= M. */ + +/* K (input) INTEGER */ +/* The number of elementary reflectors whose product defines the */ +/* matrix Q. M >= K >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the (m-k+i)-th row must contain the vector which */ +/* defines the elementary reflector H(i), for i = 1,2,...,k, as */ +/* returned by DGERQF in the last k rows of its array argument */ +/* A. */ +/* On exit, the M-by-N matrix Q. */ + +/* LDA (input) INTEGER */ +/* The first dimension of the array A. LDA >= max(1,M). */ + +/* TAU (input) DOUBLE PRECISION array, dimension (K) */ +/* TAU(i) must contain the scalar factor of the elementary */ +/* reflector H(i), as returned by DGERQF. */ + +/* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ + +/* LWORK (input) INTEGER */ +/* The dimension of the array WORK. LWORK >= max(1,M). */ +/* For optimum performance LWORK >= M*NB, where NB is the */ +/* optimal blocksize. */ + +/* If LWORK = -1, then a workspace query is assumed; the routine */ +/* only calculates the optimal size of the WORK array, returns */ +/* this value as the first entry of the WORK array, and no error */ +/* message related to LWORK is issued by XERBLA. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument has an illegal value */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < *m) { + *info = -2; + } else if (*k < 0 || *k > *m) { + *info = -3; + } else if (*lda < max(1,*m)) { + *info = -5; + } + + if (*info == 0) { + if (*m <= 0) { + lwkopt = 1; + } else { + nb = _starpu_ilaenv_(&c__1, "DORGRQ", " ", m, n, k, &c_n1); + lwkopt = *m * nb; + } + work[1] = (doublereal) lwkopt; + + if (*lwork < max(1,*m) && ! lquery) { + *info = -8; + } + } + + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DORGRQ", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m <= 0) { + return 0; + } + + nbmin = 2; + nx = 0; + iws = *m; + if (nb > 1 && nb < *k) { + +/* Determine when to cross over from blocked to unblocked code. */ + +/* Computing MAX */ + i__1 = 0, i__2 = _starpu_ilaenv_(&c__3, "DORGRQ", " ", m, n, k, &c_n1); + nx = max(i__1,i__2); + if (nx < *k) { + +/* Determine if workspace is large enough for blocked code. */ + + ldwork = *m; + iws = ldwork * nb; + if (*lwork < iws) { + +/* Not enough workspace to use optimal NB: reduce NB and */ +/* determine the minimum value of NB. */ + + nb = *lwork / ldwork; +/* Computing MAX */ + i__1 = 2, i__2 = _starpu_ilaenv_(&c__2, "DORGRQ", " ", m, n, k, &c_n1); + nbmin = max(i__1,i__2); + } + } + } + + if (nb >= nbmin && nb < *k && nx < *k) { + +/* Use blocked code after the first block. */ +/* The last kk rows are handled by the block method. */ + +/* Computing MIN */ + i__1 = *k, i__2 = (*k - nx + nb - 1) / nb * nb; + kk = min(i__1,i__2); + +/* Set A(1:m-kk,n-kk+1:n) to zero. */ + + i__1 = *n; + for (j = *n - kk + 1; j <= i__1; ++j) { + i__2 = *m - kk; + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = 0.; +/* L10: */ + } +/* L20: */ + } + } else { + kk = 0; + } + +/* Use unblocked code for the first or only block. */ + + i__1 = *m - kk; + i__2 = *n - kk; + i__3 = *k - kk; + _starpu_dorgr2_(&i__1, &i__2, &i__3, &a[a_offset], lda, &tau[1], &work[1], &iinfo) + ; + + if (kk > 0) { + +/* Use blocked code */ + + i__1 = *k; + i__2 = nb; + for (i__ = *k - kk + 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += + i__2) { +/* Computing MIN */ + i__3 = nb, i__4 = *k - i__ + 1; + ib = min(i__3,i__4); + ii = *m - *k + i__; + if (ii > 1) { + +/* Form the triangular factor of the block reflector */ +/* H = H(i+ib-1) . . . H(i+1) H(i) */ + + i__3 = *n - *k + i__ + ib - 1; + _starpu_dlarft_("Backward", "Rowwise", &i__3, &ib, &a[ii + a_dim1], + lda, &tau[i__], &work[1], &ldwork); + +/* Apply H' to A(1:m-k+i-1,1:n-k+i+ib-1) from the right */ + + i__3 = ii - 1; + i__4 = *n - *k + i__ + ib - 1; + _starpu_dlarfb_("Right", "Transpose", "Backward", "Rowwise", &i__3, & + i__4, &ib, &a[ii + a_dim1], lda, &work[1], &ldwork, & + a[a_offset], lda, &work[ib + 1], &ldwork); + } + +/* Apply H' to columns 1:n-k+i+ib-1 of current block */ + + i__3 = *n - *k + i__ + ib - 1; + _starpu_dorgr2_(&ib, &i__3, &ib, &a[ii + a_dim1], lda, &tau[i__], &work[1] +, &iinfo); + +/* Set columns n-k+i+ib:n of current block to zero */ + + i__3 = *n; + for (l = *n - *k + i__ + ib; l <= i__3; ++l) { + i__4 = ii + ib - 1; + for (j = ii; j <= i__4; ++j) { + a[j + l * a_dim1] = 0.; +/* L30: */ + } +/* L40: */ + } +/* L50: */ + } + } + + work[1] = (doublereal) iws; + return 0; + +/* End of DORGRQ */ + +} /* _starpu_dorgrq_ */ diff --git a/min-dgels/base/SRC/dorgtr.c b/min-dgels/base/SRC/dorgtr.c new file mode 100644 index 0000000..5b4a12c --- /dev/null +++ b/min-dgels/base/SRC/dorgtr.c @@ -0,0 +1,250 @@ +/* dorgtr.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; + +/* Subroutine */ int _starpu_dorgtr_(char *uplo, integer *n, doublereal *a, integer * + lda, doublereal *tau, doublereal *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + integer i__, j, nb; + extern logical _starpu_lsame_(char *, char *); + integer iinfo; + logical upper; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *); + extern /* Subroutine */ int _starpu_dorgql_(integer *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + integer *), _starpu_dorgqr_(integer *, integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, integer *); + integer lwkopt; + logical lquery; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DORGTR generates a real orthogonal matrix Q which is defined as the */ +/* product of n-1 elementary reflectors of order N, as returned by */ +/* DSYTRD: */ + +/* if UPLO = 'U', Q = H(n-1) . . . H(2) H(1), */ + +/* if UPLO = 'L', Q = H(1) H(2) . . . H(n-1). */ + +/* Arguments */ +/* ========= */ + +/* UPLO (input) CHARACTER*1 */ +/* = 'U': Upper triangle of A contains elementary reflectors */ +/* from DSYTRD; */ +/* = 'L': Lower triangle of A contains elementary reflectors */ +/* from DSYTRD. */ + +/* N (input) INTEGER */ +/* The order of the matrix Q. N >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the vectors which define the elementary reflectors, */ +/* as returned by DSYTRD. */ +/* On exit, the N-by-N orthogonal matrix Q. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,N). */ + +/* TAU (input) DOUBLE PRECISION array, dimension (N-1) */ +/* TAU(i) must contain the scalar factor of the elementary */ +/* reflector H(i), as returned by DSYTRD. */ + +/* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ + +/* LWORK (input) INTEGER */ +/* The dimension of the array WORK. LWORK >= max(1,N-1). */ +/* For optimum performance LWORK >= (N-1)*NB, where NB is */ +/* the optimal blocksize. */ + +/* If LWORK = -1, then a workspace query is assumed; the routine */ +/* only calculates the optimal size of the WORK array, returns */ +/* this value as the first entry of the WORK array, and no error */ +/* message related to LWORK is issued by XERBLA. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + lquery = *lwork == -1; + upper = _starpu_lsame_(uplo, "U"); + if (! upper && ! _starpu_lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = 1, i__2 = *n - 1; + if (*lwork < max(i__1,i__2) && ! lquery) { + *info = -7; + } + } + + if (*info == 0) { + if (upper) { + i__1 = *n - 1; + i__2 = *n - 1; + i__3 = *n - 1; + nb = _starpu_ilaenv_(&c__1, "DORGQL", " ", &i__1, &i__2, &i__3, &c_n1); + } else { + i__1 = *n - 1; + i__2 = *n - 1; + i__3 = *n - 1; + nb = _starpu_ilaenv_(&c__1, "DORGQR", " ", &i__1, &i__2, &i__3, &c_n1); + } +/* Computing MAX */ + i__1 = 1, i__2 = *n - 1; + lwkopt = max(i__1,i__2) * nb; + work[1] = (doublereal) lwkopt; + } + + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DORGTR", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + work[1] = 1.; + return 0; + } + + if (upper) { + +/* Q was determined by a call to DSYTRD with UPLO = 'U' */ + +/* Shift the vectors which define the elementary reflectors one */ +/* column to the left, and set the last row and column of Q to */ +/* those of the unit matrix */ + + i__1 = *n - 1; + for (j = 1; j <= i__1; ++j) { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = a[i__ + (j + 1) * a_dim1]; +/* L10: */ + } + a[*n + j * a_dim1] = 0.; +/* L20: */ + } + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + a[i__ + *n * a_dim1] = 0.; +/* L30: */ + } + a[*n + *n * a_dim1] = 1.; + +/* Generate Q(1:n-1,1:n-1) */ + + i__1 = *n - 1; + i__2 = *n - 1; + i__3 = *n - 1; + _starpu_dorgql_(&i__1, &i__2, &i__3, &a[a_offset], lda, &tau[1], &work[1], + lwork, &iinfo); + + } else { + +/* Q was determined by a call to DSYTRD with UPLO = 'L'. */ + +/* Shift the vectors which define the elementary reflectors one */ +/* column to the right, and set the first row and column of Q to */ +/* those of the unit matrix */ + + for (j = *n; j >= 2; --j) { + a[j * a_dim1 + 1] = 0.; + i__1 = *n; + for (i__ = j + 1; i__ <= i__1; ++i__) { + a[i__ + j * a_dim1] = a[i__ + (j - 1) * a_dim1]; +/* L40: */ + } +/* L50: */ + } + a[a_dim1 + 1] = 1.; + i__1 = *n; + for (i__ = 2; i__ <= i__1; ++i__) { + a[i__ + a_dim1] = 0.; +/* L60: */ + } + if (*n > 1) { + +/* Generate Q(2:n,2:n) */ + + i__1 = *n - 1; + i__2 = *n - 1; + i__3 = *n - 1; + _starpu_dorgqr_(&i__1, &i__2, &i__3, &a[(a_dim1 << 1) + 2], lda, &tau[1], + &work[1], lwork, &iinfo); + } + } + work[1] = (doublereal) lwkopt; + return 0; + +/* End of DORGTR */ + +} /* _starpu_dorgtr_ */ diff --git a/min-dgels/base/SRC/dorm2l.c b/min-dgels/base/SRC/dorm2l.c new file mode 100644 index 0000000..38e0269 --- /dev/null +++ b/min-dgels/base/SRC/dorm2l.c @@ -0,0 +1,231 @@ +/* dorm2l.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +/* Subroutine */ int _starpu_dorm2l_(char *side, char *trans, integer *m, integer *n, + integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal * + c__, integer *ldc, doublereal *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2; + + /* Local variables */ + integer i__, i1, i2, i3, mi, ni, nq; + doublereal aii; + logical left; + extern /* Subroutine */ int _starpu_dlarf_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + doublereal *); + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + logical notran; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DORM2L overwrites the general real m by n matrix C with */ + +/* Q * C if SIDE = 'L' and TRANS = 'N', or */ + +/* Q'* C if SIDE = 'L' and TRANS = 'T', or */ + +/* C * Q if SIDE = 'R' and TRANS = 'N', or */ + +/* C * Q' if SIDE = 'R' and TRANS = 'T', */ + +/* where Q is a real orthogonal matrix defined as the product of k */ +/* elementary reflectors */ + +/* Q = H(k) . . . H(2) H(1) */ + +/* as returned by DGEQLF. Q is of order m if SIDE = 'L' and of order n */ +/* if SIDE = 'R'. */ + +/* Arguments */ +/* ========= */ + +/* SIDE (input) CHARACTER*1 */ +/* = 'L': apply Q or Q' from the Left */ +/* = 'R': apply Q or Q' from the Right */ + +/* TRANS (input) CHARACTER*1 */ +/* = 'N': apply Q (No transpose) */ +/* = 'T': apply Q' (Transpose) */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix C. M >= 0. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix C. N >= 0. */ + +/* K (input) INTEGER */ +/* The number of elementary reflectors whose product defines */ +/* the matrix Q. */ +/* If SIDE = 'L', M >= K >= 0; */ +/* if SIDE = 'R', N >= K >= 0. */ + +/* A (input) DOUBLE PRECISION array, dimension (LDA,K) */ +/* The i-th column must contain the vector which defines the */ +/* elementary reflector H(i), for i = 1,2,...,k, as returned by */ +/* DGEQLF in the last k columns of its array argument A. */ +/* A is modified by the routine but restored on exit. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. */ +/* If SIDE = 'L', LDA >= max(1,M); */ +/* if SIDE = 'R', LDA >= max(1,N). */ + +/* TAU (input) DOUBLE PRECISION array, dimension (K) */ +/* TAU(i) must contain the scalar factor of the elementary */ +/* reflector H(i), as returned by DGEQLF. */ + +/* C (input/output) DOUBLE PRECISION array, dimension (LDC,N) */ +/* On entry, the m by n matrix C. */ +/* On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q. */ + +/* LDC (input) INTEGER */ +/* The leading dimension of the array C. LDC >= max(1,M). */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension */ +/* (N) if SIDE = 'L', */ +/* (M) if SIDE = 'R' */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + left = _starpu_lsame_(side, "L"); + notran = _starpu_lsame_(trans, "N"); + +/* NQ is the order of Q */ + + if (left) { + nq = *m; + } else { + nq = *n; + } + if (! left && ! _starpu_lsame_(side, "R")) { + *info = -1; + } else if (! notran && ! _starpu_lsame_(trans, "T")) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*k < 0 || *k > nq) { + *info = -5; + } else if (*lda < max(1,nq)) { + *info = -7; + } else if (*ldc < max(1,*m)) { + *info = -10; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DORM2L", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0 || *k == 0) { + return 0; + } + + if (left && notran || ! left && ! notran) { + i1 = 1; + i2 = *k; + i3 = 1; + } else { + i1 = *k; + i2 = 1; + i3 = -1; + } + + if (left) { + ni = *n; + } else { + mi = *m; + } + + i__1 = i2; + i__2 = i3; + for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + if (left) { + +/* H(i) is applied to C(1:m-k+i,1:n) */ + + mi = *m - *k + i__; + } else { + +/* H(i) is applied to C(1:m,1:n-k+i) */ + + ni = *n - *k + i__; + } + +/* Apply H(i) */ + + aii = a[nq - *k + i__ + i__ * a_dim1]; + a[nq - *k + i__ + i__ * a_dim1] = 1.; + _starpu_dlarf_(side, &mi, &ni, &a[i__ * a_dim1 + 1], &c__1, &tau[i__], &c__[ + c_offset], ldc, &work[1]); + a[nq - *k + i__ + i__ * a_dim1] = aii; +/* L10: */ + } + return 0; + +/* End of DORM2L */ + +} /* _starpu_dorm2l_ */ diff --git a/min-dgels/base/SRC/dorm2r.c b/min-dgels/base/SRC/dorm2r.c new file mode 100644 index 0000000..ad37a22 --- /dev/null +++ b/min-dgels/base/SRC/dorm2r.c @@ -0,0 +1,235 @@ +/* dorm2r.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +/* Subroutine */ int _starpu_dorm2r_(char *side, char *trans, integer *m, integer *n, + integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal * + c__, integer *ldc, doublereal *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2; + + /* Local variables */ + integer i__, i1, i2, i3, ic, jc, mi, ni, nq; + doublereal aii; + logical left; + extern /* Subroutine */ int _starpu_dlarf_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + doublereal *); + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + logical notran; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DORM2R overwrites the general real m by n matrix C with */ + +/* Q * C if SIDE = 'L' and TRANS = 'N', or */ + +/* Q'* C if SIDE = 'L' and TRANS = 'T', or */ + +/* C * Q if SIDE = 'R' and TRANS = 'N', or */ + +/* C * Q' if SIDE = 'R' and TRANS = 'T', */ + +/* where Q is a real orthogonal matrix defined as the product of k */ +/* elementary reflectors */ + +/* Q = H(1) H(2) . . . H(k) */ + +/* as returned by DGEQRF. Q is of order m if SIDE = 'L' and of order n */ +/* if SIDE = 'R'. */ + +/* Arguments */ +/* ========= */ + +/* SIDE (input) CHARACTER*1 */ +/* = 'L': apply Q or Q' from the Left */ +/* = 'R': apply Q or Q' from the Right */ + +/* TRANS (input) CHARACTER*1 */ +/* = 'N': apply Q (No transpose) */ +/* = 'T': apply Q' (Transpose) */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix C. M >= 0. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix C. N >= 0. */ + +/* K (input) INTEGER */ +/* The number of elementary reflectors whose product defines */ +/* the matrix Q. */ +/* If SIDE = 'L', M >= K >= 0; */ +/* if SIDE = 'R', N >= K >= 0. */ + +/* A (input) DOUBLE PRECISION array, dimension (LDA,K) */ +/* The i-th column must contain the vector which defines the */ +/* elementary reflector H(i), for i = 1,2,...,k, as returned by */ +/* DGEQRF in the first k columns of its array argument A. */ +/* A is modified by the routine but restored on exit. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. */ +/* If SIDE = 'L', LDA >= max(1,M); */ +/* if SIDE = 'R', LDA >= max(1,N). */ + +/* TAU (input) DOUBLE PRECISION array, dimension (K) */ +/* TAU(i) must contain the scalar factor of the elementary */ +/* reflector H(i), as returned by DGEQRF. */ + +/* C (input/output) DOUBLE PRECISION array, dimension (LDC,N) */ +/* On entry, the m by n matrix C. */ +/* On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q. */ + +/* LDC (input) INTEGER */ +/* The leading dimension of the array C. LDC >= max(1,M). */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension */ +/* (N) if SIDE = 'L', */ +/* (M) if SIDE = 'R' */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + left = _starpu_lsame_(side, "L"); + notran = _starpu_lsame_(trans, "N"); + +/* NQ is the order of Q */ + + if (left) { + nq = *m; + } else { + nq = *n; + } + if (! left && ! _starpu_lsame_(side, "R")) { + *info = -1; + } else if (! notran && ! _starpu_lsame_(trans, "T")) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*k < 0 || *k > nq) { + *info = -5; + } else if (*lda < max(1,nq)) { + *info = -7; + } else if (*ldc < max(1,*m)) { + *info = -10; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DORM2R", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0 || *k == 0) { + return 0; + } + + if (left && ! notran || ! left && notran) { + i1 = 1; + i2 = *k; + i3 = 1; + } else { + i1 = *k; + i2 = 1; + i3 = -1; + } + + if (left) { + ni = *n; + jc = 1; + } else { + mi = *m; + ic = 1; + } + + i__1 = i2; + i__2 = i3; + for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + if (left) { + +/* H(i) is applied to C(i:m,1:n) */ + + mi = *m - i__ + 1; + ic = i__; + } else { + +/* H(i) is applied to C(1:m,i:n) */ + + ni = *n - i__ + 1; + jc = i__; + } + +/* Apply H(i) */ + + aii = a[i__ + i__ * a_dim1]; + a[i__ + i__ * a_dim1] = 1.; + _starpu_dlarf_(side, &mi, &ni, &a[i__ + i__ * a_dim1], &c__1, &tau[i__], &c__[ + ic + jc * c_dim1], ldc, &work[1]); + a[i__ + i__ * a_dim1] = aii; +/* L10: */ + } + return 0; + +/* End of DORM2R */ + +} /* _starpu_dorm2r_ */ diff --git a/min-dgels/base/SRC/dormbr.c b/min-dgels/base/SRC/dormbr.c new file mode 100644 index 0000000..04b03eb --- /dev/null +++ b/min-dgels/base/SRC/dormbr.c @@ -0,0 +1,360 @@ +/* dormbr.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; +static integer c__2 = 2; + +/* Subroutine */ int _starpu_dormbr_(char *vect, char *side, char *trans, integer *m, + integer *n, integer *k, doublereal *a, integer *lda, doublereal *tau, + doublereal *c__, integer *ldc, doublereal *work, integer *lwork, + integer *info) +{ + /* System generated locals */ + address a__1[2]; + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2]; + char ch__1[2]; + + /* Builtin functions */ + /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); + + /* Local variables */ + integer i1, i2, nb, mi, ni, nq, nw; + logical left; + extern logical _starpu_lsame_(char *, char *); + integer iinfo; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *); + extern /* Subroutine */ int _starpu_dormlq_(char *, char *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *); + logical notran; + extern /* Subroutine */ int _starpu_dormqr_(char *, char *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *); + logical applyq; + char transt[1]; + integer lwkopt; + logical lquery; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* If VECT = 'Q', DORMBR overwrites the general real M-by-N matrix C */ +/* with */ +/* SIDE = 'L' SIDE = 'R' */ +/* TRANS = 'N': Q * C C * Q */ +/* TRANS = 'T': Q**T * C C * Q**T */ + +/* If VECT = 'P', DORMBR overwrites the general real M-by-N matrix C */ +/* with */ +/* SIDE = 'L' SIDE = 'R' */ +/* TRANS = 'N': P * C C * P */ +/* TRANS = 'T': P**T * C C * P**T */ + +/* Here Q and P**T are the orthogonal matrices determined by DGEBRD when */ +/* reducing a real matrix A to bidiagonal form: A = Q * B * P**T. Q and */ +/* P**T are defined as products of elementary reflectors H(i) and G(i) */ +/* respectively. */ + +/* Let nq = m if SIDE = 'L' and nq = n if SIDE = 'R'. Thus nq is the */ +/* order of the orthogonal matrix Q or P**T that is applied. */ + +/* If VECT = 'Q', A is assumed to have been an NQ-by-K matrix: */ +/* if nq >= k, Q = H(1) H(2) . . . H(k); */ +/* if nq < k, Q = H(1) H(2) . . . H(nq-1). */ + +/* If VECT = 'P', A is assumed to have been a K-by-NQ matrix: */ +/* if k < nq, P = G(1) G(2) . . . G(k); */ +/* if k >= nq, P = G(1) G(2) . . . G(nq-1). */ + +/* Arguments */ +/* ========= */ + +/* VECT (input) CHARACTER*1 */ +/* = 'Q': apply Q or Q**T; */ +/* = 'P': apply P or P**T. */ + +/* SIDE (input) CHARACTER*1 */ +/* = 'L': apply Q, Q**T, P or P**T from the Left; */ +/* = 'R': apply Q, Q**T, P or P**T from the Right. */ + +/* TRANS (input) CHARACTER*1 */ +/* = 'N': No transpose, apply Q or P; */ +/* = 'T': Transpose, apply Q**T or P**T. */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix C. M >= 0. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix C. N >= 0. */ + +/* K (input) INTEGER */ +/* If VECT = 'Q', the number of columns in the original */ +/* matrix reduced by DGEBRD. */ +/* If VECT = 'P', the number of rows in the original */ +/* matrix reduced by DGEBRD. */ +/* K >= 0. */ + +/* A (input) DOUBLE PRECISION array, dimension */ +/* (LDA,min(nq,K)) if VECT = 'Q' */ +/* (LDA,nq) if VECT = 'P' */ +/* The vectors which define the elementary reflectors H(i) and */ +/* G(i), whose products determine the matrices Q and P, as */ +/* returned by DGEBRD. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. */ +/* If VECT = 'Q', LDA >= max(1,nq); */ +/* if VECT = 'P', LDA >= max(1,min(nq,K)). */ + +/* TAU (input) DOUBLE PRECISION array, dimension (min(nq,K)) */ +/* TAU(i) must contain the scalar factor of the elementary */ +/* reflector H(i) or G(i) which determines Q or P, as returned */ +/* by DGEBRD in the array argument TAUQ or TAUP. */ + +/* C (input/output) DOUBLE PRECISION array, dimension (LDC,N) */ +/* On entry, the M-by-N matrix C. */ +/* On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q */ +/* or P*C or P**T*C or C*P or C*P**T. */ + +/* LDC (input) INTEGER */ +/* The leading dimension of the array C. LDC >= max(1,M). */ + +/* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ + +/* LWORK (input) INTEGER */ +/* The dimension of the array WORK. */ +/* If SIDE = 'L', LWORK >= max(1,N); */ +/* if SIDE = 'R', LWORK >= max(1,M). */ +/* For optimum performance LWORK >= N*NB if SIDE = 'L', and */ +/* LWORK >= M*NB if SIDE = 'R', where NB is the optimal */ +/* blocksize. */ + +/* If LWORK = -1, then a workspace query is assumed; the routine */ +/* only calculates the optimal size of the WORK array, returns */ +/* this value as the first entry of the WORK array, and no error */ +/* message related to LWORK is issued by XERBLA. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + applyq = _starpu_lsame_(vect, "Q"); + left = _starpu_lsame_(side, "L"); + notran = _starpu_lsame_(trans, "N"); + lquery = *lwork == -1; + +/* NQ is the order of Q or P and NW is the minimum dimension of WORK */ + + if (left) { + nq = *m; + nw = *n; + } else { + nq = *n; + nw = *m; + } + if (! applyq && ! _starpu_lsame_(vect, "P")) { + *info = -1; + } else if (! left && ! _starpu_lsame_(side, "R")) { + *info = -2; + } else if (! notran && ! _starpu_lsame_(trans, "T")) { + *info = -3; + } else if (*m < 0) { + *info = -4; + } else if (*n < 0) { + *info = -5; + } else if (*k < 0) { + *info = -6; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = 1, i__2 = min(nq,*k); + if (applyq && *lda < max(1,nq) || ! applyq && *lda < max(i__1,i__2)) { + *info = -8; + } else if (*ldc < max(1,*m)) { + *info = -11; + } else if (*lwork < max(1,nw) && ! lquery) { + *info = -13; + } + } + + if (*info == 0) { + if (applyq) { + if (left) { +/* Writing concatenation */ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = *m - 1; + i__2 = *m - 1; + nb = _starpu_ilaenv_(&c__1, "DORMQR", ch__1, &i__1, n, &i__2, &c_n1); + } else { +/* Writing concatenation */ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = *n - 1; + i__2 = *n - 1; + nb = _starpu_ilaenv_(&c__1, "DORMQR", ch__1, m, &i__1, &i__2, &c_n1); + } + } else { + if (left) { +/* Writing concatenation */ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = *m - 1; + i__2 = *m - 1; + nb = _starpu_ilaenv_(&c__1, "DORMLQ", ch__1, &i__1, n, &i__2, &c_n1); + } else { +/* Writing concatenation */ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = *n - 1; + i__2 = *n - 1; + nb = _starpu_ilaenv_(&c__1, "DORMLQ", ch__1, m, &i__1, &i__2, &c_n1); + } + } + lwkopt = max(1,nw) * nb; + work[1] = (doublereal) lwkopt; + } + + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DORMBR", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + work[1] = 1.; + if (*m == 0 || *n == 0) { + return 0; + } + + if (applyq) { + +/* Apply Q */ + + if (nq >= *k) { + +/* Q was determined by a call to DGEBRD with nq >= k */ + + _starpu_dormqr_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[ + c_offset], ldc, &work[1], lwork, &iinfo); + } else if (nq > 1) { + +/* Q was determined by a call to DGEBRD with nq < k */ + + if (left) { + mi = *m - 1; + ni = *n; + i1 = 2; + i2 = 1; + } else { + mi = *m; + ni = *n - 1; + i1 = 1; + i2 = 2; + } + i__1 = nq - 1; + _starpu_dormqr_(side, trans, &mi, &ni, &i__1, &a[a_dim1 + 2], lda, &tau[1] +, &c__[i1 + i2 * c_dim1], ldc, &work[1], lwork, &iinfo); + } + } else { + +/* Apply P */ + + if (notran) { + *(unsigned char *)transt = 'T'; + } else { + *(unsigned char *)transt = 'N'; + } + if (nq > *k) { + +/* P was determined by a call to DGEBRD with nq > k */ + + _starpu_dormlq_(side, transt, m, n, k, &a[a_offset], lda, &tau[1], &c__[ + c_offset], ldc, &work[1], lwork, &iinfo); + } else if (nq > 1) { + +/* P was determined by a call to DGEBRD with nq <= k */ + + if (left) { + mi = *m - 1; + ni = *n; + i1 = 2; + i2 = 1; + } else { + mi = *m; + ni = *n - 1; + i1 = 1; + i2 = 2; + } + i__1 = nq - 1; + _starpu_dormlq_(side, transt, &mi, &ni, &i__1, &a[(a_dim1 << 1) + 1], lda, + &tau[1], &c__[i1 + i2 * c_dim1], ldc, &work[1], lwork, & + iinfo); + } + } + work[1] = (doublereal) lwkopt; + return 0; + +/* End of DORMBR */ + +} /* _starpu_dormbr_ */ diff --git a/min-dgels/base/SRC/dormhr.c b/min-dgels/base/SRC/dormhr.c new file mode 100644 index 0000000..2296e0f --- /dev/null +++ b/min-dgels/base/SRC/dormhr.c @@ -0,0 +1,257 @@ +/* dormhr.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; +static integer c__2 = 2; + +/* Subroutine */ int _starpu_dormhr_(char *side, char *trans, integer *m, integer *n, + integer *ilo, integer *ihi, doublereal *a, integer *lda, doublereal * + tau, doublereal *c__, integer *ldc, doublereal *work, integer *lwork, + integer *info) +{ + /* System generated locals */ + address a__1[2]; + integer a_dim1, a_offset, c_dim1, c_offset, i__1[2], i__2; + char ch__1[2]; + + /* Builtin functions */ + /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); + + /* Local variables */ + integer i1, i2, nb, mi, nh, ni, nq, nw; + logical left; + extern logical _starpu_lsame_(char *, char *); + integer iinfo; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *); + extern /* Subroutine */ int _starpu_dormqr_(char *, char *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *); + integer lwkopt; + logical lquery; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DORMHR overwrites the general real M-by-N matrix C with */ + +/* SIDE = 'L' SIDE = 'R' */ +/* TRANS = 'N': Q * C C * Q */ +/* TRANS = 'T': Q**T * C C * Q**T */ + +/* where Q is a real orthogonal matrix of order nq, with nq = m if */ +/* SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of */ +/* IHI-ILO elementary reflectors, as returned by DGEHRD: */ + +/* Q = H(ilo) H(ilo+1) . . . H(ihi-1). */ + +/* Arguments */ +/* ========= */ + +/* SIDE (input) CHARACTER*1 */ +/* = 'L': apply Q or Q**T from the Left; */ +/* = 'R': apply Q or Q**T from the Right. */ + +/* TRANS (input) CHARACTER*1 */ +/* = 'N': No transpose, apply Q; */ +/* = 'T': Transpose, apply Q**T. */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix C. M >= 0. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix C. N >= 0. */ + +/* ILO (input) INTEGER */ +/* IHI (input) INTEGER */ +/* ILO and IHI must have the same values as in the previous call */ +/* of DGEHRD. Q is equal to the unit matrix except in the */ +/* submatrix Q(ilo+1:ihi,ilo+1:ihi). */ +/* If SIDE = 'L', then 1 <= ILO <= IHI <= M, if M > 0, and */ +/* ILO = 1 and IHI = 0, if M = 0; */ +/* if SIDE = 'R', then 1 <= ILO <= IHI <= N, if N > 0, and */ +/* ILO = 1 and IHI = 0, if N = 0. */ + +/* A (input) DOUBLE PRECISION array, dimension */ +/* (LDA,M) if SIDE = 'L' */ +/* (LDA,N) if SIDE = 'R' */ +/* The vectors which define the elementary reflectors, as */ +/* returned by DGEHRD. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. */ +/* LDA >= max(1,M) if SIDE = 'L'; LDA >= max(1,N) if SIDE = 'R'. */ + +/* TAU (input) DOUBLE PRECISION array, dimension */ +/* (M-1) if SIDE = 'L' */ +/* (N-1) if SIDE = 'R' */ +/* TAU(i) must contain the scalar factor of the elementary */ +/* reflector H(i), as returned by DGEHRD. */ + +/* C (input/output) DOUBLE PRECISION array, dimension (LDC,N) */ +/* On entry, the M-by-N matrix C. */ +/* On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. */ + +/* LDC (input) INTEGER */ +/* The leading dimension of the array C. LDC >= max(1,M). */ + +/* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ + +/* LWORK (input) INTEGER */ +/* The dimension of the array WORK. */ +/* If SIDE = 'L', LWORK >= max(1,N); */ +/* if SIDE = 'R', LWORK >= max(1,M). */ +/* For optimum performance LWORK >= N*NB if SIDE = 'L', and */ +/* LWORK >= M*NB if SIDE = 'R', where NB is the optimal */ +/* blocksize. */ + +/* If LWORK = -1, then a workspace query is assumed; the routine */ +/* only calculates the optimal size of the WORK array, returns */ +/* this value as the first entry of the WORK array, and no error */ +/* message related to LWORK is issued by XERBLA. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + nh = *ihi - *ilo; + left = _starpu_lsame_(side, "L"); + lquery = *lwork == -1; + +/* NQ is the order of Q and NW is the minimum dimension of WORK */ + + if (left) { + nq = *m; + nw = *n; + } else { + nq = *n; + nw = *m; + } + if (! left && ! _starpu_lsame_(side, "R")) { + *info = -1; + } else if (! _starpu_lsame_(trans, "N") && ! _starpu_lsame_(trans, + "T")) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*ilo < 1 || *ilo > max(1,nq)) { + *info = -5; + } else if (*ihi < min(*ilo,nq) || *ihi > nq) { + *info = -6; + } else if (*lda < max(1,nq)) { + *info = -8; + } else if (*ldc < max(1,*m)) { + *info = -11; + } else if (*lwork < max(1,nw) && ! lquery) { + *info = -13; + } + + if (*info == 0) { + if (left) { +/* Writing concatenation */ + i__1[0] = 1, a__1[0] = side; + i__1[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); + nb = _starpu_ilaenv_(&c__1, "DORMQR", ch__1, &nh, n, &nh, &c_n1); + } else { +/* Writing concatenation */ + i__1[0] = 1, a__1[0] = side; + i__1[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); + nb = _starpu_ilaenv_(&c__1, "DORMQR", ch__1, m, &nh, &nh, &c_n1); + } + lwkopt = max(1,nw) * nb; + work[1] = (doublereal) lwkopt; + } + + if (*info != 0) { + i__2 = -(*info); + _starpu_xerbla_("DORMHR", &i__2); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0 || nh == 0) { + work[1] = 1.; + return 0; + } + + if (left) { + mi = nh; + ni = *n; + i1 = *ilo + 1; + i2 = 1; + } else { + mi = *m; + ni = nh; + i1 = 1; + i2 = *ilo + 1; + } + + _starpu_dormqr_(side, trans, &mi, &ni, &nh, &a[*ilo + 1 + *ilo * a_dim1], lda, & + tau[*ilo], &c__[i1 + i2 * c_dim1], ldc, &work[1], lwork, &iinfo); + + work[1] = (doublereal) lwkopt; + return 0; + +/* End of DORMHR */ + +} /* _starpu_dormhr_ */ diff --git a/min-dgels/base/SRC/dorml2.c b/min-dgels/base/SRC/dorml2.c new file mode 100644 index 0000000..a0d5394 --- /dev/null +++ b/min-dgels/base/SRC/dorml2.c @@ -0,0 +1,231 @@ +/* dorml2.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dorml2_(char *side, char *trans, integer *m, integer *n, + integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal * + c__, integer *ldc, doublereal *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2; + + /* Local variables */ + integer i__, i1, i2, i3, ic, jc, mi, ni, nq; + doublereal aii; + logical left; + extern /* Subroutine */ int _starpu_dlarf_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + doublereal *); + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + logical notran; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DORML2 overwrites the general real m by n matrix C with */ + +/* Q * C if SIDE = 'L' and TRANS = 'N', or */ + +/* Q'* C if SIDE = 'L' and TRANS = 'T', or */ + +/* C * Q if SIDE = 'R' and TRANS = 'N', or */ + +/* C * Q' if SIDE = 'R' and TRANS = 'T', */ + +/* where Q is a real orthogonal matrix defined as the product of k */ +/* elementary reflectors */ + +/* Q = H(k) . . . H(2) H(1) */ + +/* as returned by DGELQF. Q is of order m if SIDE = 'L' and of order n */ +/* if SIDE = 'R'. */ + +/* Arguments */ +/* ========= */ + +/* SIDE (input) CHARACTER*1 */ +/* = 'L': apply Q or Q' from the Left */ +/* = 'R': apply Q or Q' from the Right */ + +/* TRANS (input) CHARACTER*1 */ +/* = 'N': apply Q (No transpose) */ +/* = 'T': apply Q' (Transpose) */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix C. M >= 0. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix C. N >= 0. */ + +/* K (input) INTEGER */ +/* The number of elementary reflectors whose product defines */ +/* the matrix Q. */ +/* If SIDE = 'L', M >= K >= 0; */ +/* if SIDE = 'R', N >= K >= 0. */ + +/* A (input) DOUBLE PRECISION array, dimension */ +/* (LDA,M) if SIDE = 'L', */ +/* (LDA,N) if SIDE = 'R' */ +/* The i-th row must contain the vector which defines the */ +/* elementary reflector H(i), for i = 1,2,...,k, as returned by */ +/* DGELQF in the first k rows of its array argument A. */ +/* A is modified by the routine but restored on exit. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,K). */ + +/* TAU (input) DOUBLE PRECISION array, dimension (K) */ +/* TAU(i) must contain the scalar factor of the elementary */ +/* reflector H(i), as returned by DGELQF. */ + +/* C (input/output) DOUBLE PRECISION array, dimension (LDC,N) */ +/* On entry, the m by n matrix C. */ +/* On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q. */ + +/* LDC (input) INTEGER */ +/* The leading dimension of the array C. LDC >= max(1,M). */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension */ +/* (N) if SIDE = 'L', */ +/* (M) if SIDE = 'R' */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + left = _starpu_lsame_(side, "L"); + notran = _starpu_lsame_(trans, "N"); + +/* NQ is the order of Q */ + + if (left) { + nq = *m; + } else { + nq = *n; + } + if (! left && ! _starpu_lsame_(side, "R")) { + *info = -1; + } else if (! notran && ! _starpu_lsame_(trans, "T")) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*k < 0 || *k > nq) { + *info = -5; + } else if (*lda < max(1,*k)) { + *info = -7; + } else if (*ldc < max(1,*m)) { + *info = -10; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DORML2", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0 || *k == 0) { + return 0; + } + + if (left && notran || ! left && ! notran) { + i1 = 1; + i2 = *k; + i3 = 1; + } else { + i1 = *k; + i2 = 1; + i3 = -1; + } + + if (left) { + ni = *n; + jc = 1; + } else { + mi = *m; + ic = 1; + } + + i__1 = i2; + i__2 = i3; + for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + if (left) { + +/* H(i) is applied to C(i:m,1:n) */ + + mi = *m - i__ + 1; + ic = i__; + } else { + +/* H(i) is applied to C(1:m,i:n) */ + + ni = *n - i__ + 1; + jc = i__; + } + +/* Apply H(i) */ + + aii = a[i__ + i__ * a_dim1]; + a[i__ + i__ * a_dim1] = 1.; + _starpu_dlarf_(side, &mi, &ni, &a[i__ + i__ * a_dim1], lda, &tau[i__], &c__[ + ic + jc * c_dim1], ldc, &work[1]); + a[i__ + i__ * a_dim1] = aii; +/* L10: */ + } + return 0; + +/* End of DORML2 */ + +} /* _starpu_dorml2_ */ diff --git a/min-dgels/base/SRC/dormlq.c b/min-dgels/base/SRC/dormlq.c new file mode 100644 index 0000000..870ee89 --- /dev/null +++ b/min-dgels/base/SRC/dormlq.c @@ -0,0 +1,334 @@ +/* dormlq.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; +static integer c__2 = 2; +static integer c__65 = 65; + +/* Subroutine */ int _starpu_dormlq_(char *side, char *trans, integer *m, integer *n, + integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal * + c__, integer *ldc, doublereal *work, integer *lwork, integer *info) +{ + /* System generated locals */ + address a__1[2]; + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2], i__4, + i__5; + char ch__1[2]; + + /* Builtin functions */ + /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); + + /* Local variables */ + integer i__; + doublereal t[4160] /* was [65][64] */; + integer i1, i2, i3, ib, ic, jc, nb, mi, ni, nq, nw, iws; + logical left; + extern logical _starpu_lsame_(char *, char *); + integer nbmin, iinfo; + extern /* Subroutine */ int _starpu_dorml2_(char *, char *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *), _starpu_dlarfb_(char + *, char *, char *, char *, integer *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *, doublereal *, integer *), _starpu_dlarft_(char *, char *, integer *, integer *, doublereal + *, integer *, doublereal *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); + extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *); + logical notran; + integer ldwork; + char transt[1]; + integer lwkopt; + logical lquery; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DORMLQ overwrites the general real M-by-N matrix C with */ + +/* SIDE = 'L' SIDE = 'R' */ +/* TRANS = 'N': Q * C C * Q */ +/* TRANS = 'T': Q**T * C C * Q**T */ + +/* where Q is a real orthogonal matrix defined as the product of k */ +/* elementary reflectors */ + +/* Q = H(k) . . . H(2) H(1) */ + +/* as returned by DGELQF. Q is of order M if SIDE = 'L' and of order N */ +/* if SIDE = 'R'. */ + +/* Arguments */ +/* ========= */ + +/* SIDE (input) CHARACTER*1 */ +/* = 'L': apply Q or Q**T from the Left; */ +/* = 'R': apply Q or Q**T from the Right. */ + +/* TRANS (input) CHARACTER*1 */ +/* = 'N': No transpose, apply Q; */ +/* = 'T': Transpose, apply Q**T. */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix C. M >= 0. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix C. N >= 0. */ + +/* K (input) INTEGER */ +/* The number of elementary reflectors whose product defines */ +/* the matrix Q. */ +/* If SIDE = 'L', M >= K >= 0; */ +/* if SIDE = 'R', N >= K >= 0. */ + +/* A (input) DOUBLE PRECISION array, dimension */ +/* (LDA,M) if SIDE = 'L', */ +/* (LDA,N) if SIDE = 'R' */ +/* The i-th row must contain the vector which defines the */ +/* elementary reflector H(i), for i = 1,2,...,k, as returned by */ +/* DGELQF in the first k rows of its array argument A. */ +/* A is modified by the routine but restored on exit. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,K). */ + +/* TAU (input) DOUBLE PRECISION array, dimension (K) */ +/* TAU(i) must contain the scalar factor of the elementary */ +/* reflector H(i), as returned by DGELQF. */ + +/* C (input/output) DOUBLE PRECISION array, dimension (LDC,N) */ +/* On entry, the M-by-N matrix C. */ +/* On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. */ + +/* LDC (input) INTEGER */ +/* The leading dimension of the array C. LDC >= max(1,M). */ + +/* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ + +/* LWORK (input) INTEGER */ +/* The dimension of the array WORK. */ +/* If SIDE = 'L', LWORK >= max(1,N); */ +/* if SIDE = 'R', LWORK >= max(1,M). */ +/* For optimum performance LWORK >= N*NB if SIDE = 'L', and */ +/* LWORK >= M*NB if SIDE = 'R', where NB is the optimal */ +/* blocksize. */ + +/* If LWORK = -1, then a workspace query is assumed; the routine */ +/* only calculates the optimal size of the WORK array, returns */ +/* this value as the first entry of the WORK array, and no error */ +/* message related to LWORK is issued by XERBLA. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + left = _starpu_lsame_(side, "L"); + notran = _starpu_lsame_(trans, "N"); + lquery = *lwork == -1; + +/* NQ is the order of Q and NW is the minimum dimension of WORK */ + + if (left) { + nq = *m; + nw = *n; + } else { + nq = *n; + nw = *m; + } + if (! left && ! _starpu_lsame_(side, "R")) { + *info = -1; + } else if (! notran && ! _starpu_lsame_(trans, "T")) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*k < 0 || *k > nq) { + *info = -5; + } else if (*lda < max(1,*k)) { + *info = -7; + } else if (*ldc < max(1,*m)) { + *info = -10; + } else if (*lwork < max(1,nw) && ! lquery) { + *info = -12; + } + + if (*info == 0) { + +/* Determine the block size. NB may be at most NBMAX, where NBMAX */ +/* is used to define the local array T. */ + +/* Computing MIN */ +/* Writing concatenation */ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = 64, i__2 = _starpu_ilaenv_(&c__1, "DORMLQ", ch__1, m, n, k, &c_n1); + nb = min(i__1,i__2); + lwkopt = max(1,nw) * nb; + work[1] = (doublereal) lwkopt; + } + + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DORMLQ", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0 || *k == 0) { + work[1] = 1.; + return 0; + } + + nbmin = 2; + ldwork = nw; + if (nb > 1 && nb < *k) { + iws = nw * nb; + if (*lwork < iws) { + nb = *lwork / ldwork; +/* Computing MAX */ +/* Writing concatenation */ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = 2, i__2 = _starpu_ilaenv_(&c__2, "DORMLQ", ch__1, m, n, k, &c_n1); + nbmin = max(i__1,i__2); + } + } else { + iws = nw; + } + + if (nb < nbmin || nb >= *k) { + +/* Use unblocked code */ + + _starpu_dorml2_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[ + c_offset], ldc, &work[1], &iinfo); + } else { + +/* Use blocked code */ + + if (left && notran || ! left && ! notran) { + i1 = 1; + i2 = *k; + i3 = nb; + } else { + i1 = (*k - 1) / nb * nb + 1; + i2 = 1; + i3 = -nb; + } + + if (left) { + ni = *n; + jc = 1; + } else { + mi = *m; + ic = 1; + } + + if (notran) { + *(unsigned char *)transt = 'T'; + } else { + *(unsigned char *)transt = 'N'; + } + + i__1 = i2; + i__2 = i3; + for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__4 = nb, i__5 = *k - i__ + 1; + ib = min(i__4,i__5); + +/* Form the triangular factor of the block reflector */ +/* H = H(i) H(i+1) . . . H(i+ib-1) */ + + i__4 = nq - i__ + 1; + _starpu_dlarft_("Forward", "Rowwise", &i__4, &ib, &a[i__ + i__ * a_dim1], + lda, &tau[i__], t, &c__65); + if (left) { + +/* H or H' is applied to C(i:m,1:n) */ + + mi = *m - i__ + 1; + ic = i__; + } else { + +/* H or H' is applied to C(1:m,i:n) */ + + ni = *n - i__ + 1; + jc = i__; + } + +/* Apply H or H' */ + + _starpu_dlarfb_(side, transt, "Forward", "Rowwise", &mi, &ni, &ib, &a[i__ + + i__ * a_dim1], lda, t, &c__65, &c__[ic + jc * c_dim1], + ldc, &work[1], &ldwork); +/* L10: */ + } + } + work[1] = (doublereal) lwkopt; + return 0; + +/* End of DORMLQ */ + +} /* _starpu_dormlq_ */ diff --git a/min-dgels/base/SRC/dormql.c b/min-dgels/base/SRC/dormql.c new file mode 100644 index 0000000..7f623d0 --- /dev/null +++ b/min-dgels/base/SRC/dormql.c @@ -0,0 +1,327 @@ +/* dormql.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; +static integer c__2 = 2; +static integer c__65 = 65; + +/* Subroutine */ int _starpu_dormql_(char *side, char *trans, integer *m, integer *n, + integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal * + c__, integer *ldc, doublereal *work, integer *lwork, integer *info) +{ + /* System generated locals */ + address a__1[2]; + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2], i__4, + i__5; + char ch__1[2]; + + /* Builtin functions */ + /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); + + /* Local variables */ + integer i__; + doublereal t[4160] /* was [65][64] */; + integer i1, i2, i3, ib, nb, mi, ni, nq, nw, iws; + logical left; + extern logical _starpu_lsame_(char *, char *); + integer nbmin, iinfo; + extern /* Subroutine */ int _starpu_dorm2l_(char *, char *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *), _starpu_dlarfb_(char + *, char *, char *, char *, integer *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *, doublereal *, integer *), _starpu_dlarft_(char *, char *, integer *, integer *, doublereal + *, integer *, doublereal *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); + extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *); + logical notran; + integer ldwork, lwkopt; + logical lquery; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DORMQL overwrites the general real M-by-N matrix C with */ + +/* SIDE = 'L' SIDE = 'R' */ +/* TRANS = 'N': Q * C C * Q */ +/* TRANS = 'T': Q**T * C C * Q**T */ + +/* where Q is a real orthogonal matrix defined as the product of k */ +/* elementary reflectors */ + +/* Q = H(k) . . . H(2) H(1) */ + +/* as returned by DGEQLF. Q is of order M if SIDE = 'L' and of order N */ +/* if SIDE = 'R'. */ + +/* Arguments */ +/* ========= */ + +/* SIDE (input) CHARACTER*1 */ +/* = 'L': apply Q or Q**T from the Left; */ +/* = 'R': apply Q or Q**T from the Right. */ + +/* TRANS (input) CHARACTER*1 */ +/* = 'N': No transpose, apply Q; */ +/* = 'T': Transpose, apply Q**T. */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix C. M >= 0. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix C. N >= 0. */ + +/* K (input) INTEGER */ +/* The number of elementary reflectors whose product defines */ +/* the matrix Q. */ +/* If SIDE = 'L', M >= K >= 0; */ +/* if SIDE = 'R', N >= K >= 0. */ + +/* A (input) DOUBLE PRECISION array, dimension (LDA,K) */ +/* The i-th column must contain the vector which defines the */ +/* elementary reflector H(i), for i = 1,2,...,k, as returned by */ +/* DGEQLF in the last k columns of its array argument A. */ +/* A is modified by the routine but restored on exit. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. */ +/* If SIDE = 'L', LDA >= max(1,M); */ +/* if SIDE = 'R', LDA >= max(1,N). */ + +/* TAU (input) DOUBLE PRECISION array, dimension (K) */ +/* TAU(i) must contain the scalar factor of the elementary */ +/* reflector H(i), as returned by DGEQLF. */ + +/* C (input/output) DOUBLE PRECISION array, dimension (LDC,N) */ +/* On entry, the M-by-N matrix C. */ +/* On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. */ + +/* LDC (input) INTEGER */ +/* The leading dimension of the array C. LDC >= max(1,M). */ + +/* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ + +/* LWORK (input) INTEGER */ +/* The dimension of the array WORK. */ +/* If SIDE = 'L', LWORK >= max(1,N); */ +/* if SIDE = 'R', LWORK >= max(1,M). */ +/* For optimum performance LWORK >= N*NB if SIDE = 'L', and */ +/* LWORK >= M*NB if SIDE = 'R', where NB is the optimal */ +/* blocksize. */ + +/* If LWORK = -1, then a workspace query is assumed; the routine */ +/* only calculates the optimal size of the WORK array, returns */ +/* this value as the first entry of the WORK array, and no error */ +/* message related to LWORK is issued by XERBLA. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + left = _starpu_lsame_(side, "L"); + notran = _starpu_lsame_(trans, "N"); + lquery = *lwork == -1; + +/* NQ is the order of Q and NW is the minimum dimension of WORK */ + + if (left) { + nq = *m; + nw = max(1,*n); + } else { + nq = *n; + nw = max(1,*m); + } + if (! left && ! _starpu_lsame_(side, "R")) { + *info = -1; + } else if (! notran && ! _starpu_lsame_(trans, "T")) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*k < 0 || *k > nq) { + *info = -5; + } else if (*lda < max(1,nq)) { + *info = -7; + } else if (*ldc < max(1,*m)) { + *info = -10; + } + + if (*info == 0) { + if (*m == 0 || *n == 0) { + lwkopt = 1; + } else { + +/* Determine the block size. NB may be at most NBMAX, where */ +/* NBMAX is used to define the local array T. */ + +/* Computing MIN */ +/* Writing concatenation */ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = 64, i__2 = _starpu_ilaenv_(&c__1, "DORMQL", ch__1, m, n, k, &c_n1); + nb = min(i__1,i__2); + lwkopt = nw * nb; + } + work[1] = (doublereal) lwkopt; + + if (*lwork < nw && ! lquery) { + *info = -12; + } + } + + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DORMQL", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + return 0; + } + + nbmin = 2; + ldwork = nw; + if (nb > 1 && nb < *k) { + iws = nw * nb; + if (*lwork < iws) { + nb = *lwork / ldwork; +/* Computing MAX */ +/* Writing concatenation */ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = 2, i__2 = _starpu_ilaenv_(&c__2, "DORMQL", ch__1, m, n, k, &c_n1); + nbmin = max(i__1,i__2); + } + } else { + iws = nw; + } + + if (nb < nbmin || nb >= *k) { + +/* Use unblocked code */ + + _starpu_dorm2l_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[ + c_offset], ldc, &work[1], &iinfo); + } else { + +/* Use blocked code */ + + if (left && notran || ! left && ! notran) { + i1 = 1; + i2 = *k; + i3 = nb; + } else { + i1 = (*k - 1) / nb * nb + 1; + i2 = 1; + i3 = -nb; + } + + if (left) { + ni = *n; + } else { + mi = *m; + } + + i__1 = i2; + i__2 = i3; + for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__4 = nb, i__5 = *k - i__ + 1; + ib = min(i__4,i__5); + +/* Form the triangular factor of the block reflector */ +/* H = H(i+ib-1) . . . H(i+1) H(i) */ + + i__4 = nq - *k + i__ + ib - 1; + _starpu_dlarft_("Backward", "Columnwise", &i__4, &ib, &a[i__ * a_dim1 + 1] +, lda, &tau[i__], t, &c__65); + if (left) { + +/* H or H' is applied to C(1:m-k+i+ib-1,1:n) */ + + mi = *m - *k + i__ + ib - 1; + } else { + +/* H or H' is applied to C(1:m,1:n-k+i+ib-1) */ + + ni = *n - *k + i__ + ib - 1; + } + +/* Apply H or H' */ + + _starpu_dlarfb_(side, trans, "Backward", "Columnwise", &mi, &ni, &ib, &a[ + i__ * a_dim1 + 1], lda, t, &c__65, &c__[c_offset], ldc, & + work[1], &ldwork); +/* L10: */ + } + } + work[1] = (doublereal) lwkopt; + return 0; + +/* End of DORMQL */ + +} /* _starpu_dormql_ */ diff --git a/min-dgels/base/SRC/dormqr.c b/min-dgels/base/SRC/dormqr.c new file mode 100644 index 0000000..50d0d0e --- /dev/null +++ b/min-dgels/base/SRC/dormqr.c @@ -0,0 +1,327 @@ +/* dormqr.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; +static integer c__2 = 2; +static integer c__65 = 65; + +/* Subroutine */ int _starpu_dormqr_(char *side, char *trans, integer *m, integer *n, + integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal * + c__, integer *ldc, doublereal *work, integer *lwork, integer *info) +{ + /* System generated locals */ + address a__1[2]; + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2], i__4, + i__5; + char ch__1[2]; + + /* Builtin functions */ + /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); + + /* Local variables */ + integer i__; + doublereal t[4160] /* was [65][64] */; + integer i1, i2, i3, ib, ic, jc, nb, mi, ni, nq, nw, iws; + logical left; + extern logical _starpu_lsame_(char *, char *); + integer nbmin, iinfo; + extern /* Subroutine */ int _starpu_dorm2r_(char *, char *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *), _starpu_dlarfb_(char + *, char *, char *, char *, integer *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *, doublereal *, integer *), _starpu_dlarft_(char *, char *, integer *, integer *, doublereal + *, integer *, doublereal *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); + extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *); + logical notran; + integer ldwork, lwkopt; + logical lquery; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DORMQR overwrites the general real M-by-N matrix C with */ + +/* SIDE = 'L' SIDE = 'R' */ +/* TRANS = 'N': Q * C C * Q */ +/* TRANS = 'T': Q**T * C C * Q**T */ + +/* where Q is a real orthogonal matrix defined as the product of k */ +/* elementary reflectors */ + +/* Q = H(1) H(2) . . . H(k) */ + +/* as returned by DGEQRF. Q is of order M if SIDE = 'L' and of order N */ +/* if SIDE = 'R'. */ + +/* Arguments */ +/* ========= */ + +/* SIDE (input) CHARACTER*1 */ +/* = 'L': apply Q or Q**T from the Left; */ +/* = 'R': apply Q or Q**T from the Right. */ + +/* TRANS (input) CHARACTER*1 */ +/* = 'N': No transpose, apply Q; */ +/* = 'T': Transpose, apply Q**T. */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix C. M >= 0. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix C. N >= 0. */ + +/* K (input) INTEGER */ +/* The number of elementary reflectors whose product defines */ +/* the matrix Q. */ +/* If SIDE = 'L', M >= K >= 0; */ +/* if SIDE = 'R', N >= K >= 0. */ + +/* A (input) DOUBLE PRECISION array, dimension (LDA,K) */ +/* The i-th column must contain the vector which defines the */ +/* elementary reflector H(i), for i = 1,2,...,k, as returned by */ +/* DGEQRF in the first k columns of its array argument A. */ +/* A is modified by the routine but restored on exit. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. */ +/* If SIDE = 'L', LDA >= max(1,M); */ +/* if SIDE = 'R', LDA >= max(1,N). */ + +/* TAU (input) DOUBLE PRECISION array, dimension (K) */ +/* TAU(i) must contain the scalar factor of the elementary */ +/* reflector H(i), as returned by DGEQRF. */ + +/* C (input/output) DOUBLE PRECISION array, dimension (LDC,N) */ +/* On entry, the M-by-N matrix C. */ +/* On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. */ + +/* LDC (input) INTEGER */ +/* The leading dimension of the array C. LDC >= max(1,M). */ + +/* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ + +/* LWORK (input) INTEGER */ +/* The dimension of the array WORK. */ +/* If SIDE = 'L', LWORK >= max(1,N); */ +/* if SIDE = 'R', LWORK >= max(1,M). */ +/* For optimum performance LWORK >= N*NB if SIDE = 'L', and */ +/* LWORK >= M*NB if SIDE = 'R', where NB is the optimal */ +/* blocksize. */ + +/* If LWORK = -1, then a workspace query is assumed; the routine */ +/* only calculates the optimal size of the WORK array, returns */ +/* this value as the first entry of the WORK array, and no error */ +/* message related to LWORK is issued by XERBLA. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + left = _starpu_lsame_(side, "L"); + notran = _starpu_lsame_(trans, "N"); + lquery = *lwork == -1; + +/* NQ is the order of Q and NW is the minimum dimension of WORK */ + + if (left) { + nq = *m; + nw = *n; + } else { + nq = *n; + nw = *m; + } + if (! left && ! _starpu_lsame_(side, "R")) { + *info = -1; + } else if (! notran && ! _starpu_lsame_(trans, "T")) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*k < 0 || *k > nq) { + *info = -5; + } else if (*lda < max(1,nq)) { + *info = -7; + } else if (*ldc < max(1,*m)) { + *info = -10; + } else if (*lwork < max(1,nw) && ! lquery) { + *info = -12; + } + + if (*info == 0) { + +/* Determine the block size. NB may be at most NBMAX, where NBMAX */ +/* is used to define the local array T. */ + +/* Computing MIN */ +/* Writing concatenation */ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = 64, i__2 = _starpu_ilaenv_(&c__1, "DORMQR", ch__1, m, n, k, &c_n1); + nb = min(i__1,i__2); + lwkopt = max(1,nw) * nb; + work[1] = (doublereal) lwkopt; + } + + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DORMQR", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0 || *k == 0) { + work[1] = 1.; + return 0; + } + + nbmin = 2; + ldwork = nw; + if (nb > 1 && nb < *k) { + iws = nw * nb; + if (*lwork < iws) { + nb = *lwork / ldwork; +/* Computing MAX */ +/* Writing concatenation */ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = 2, i__2 = _starpu_ilaenv_(&c__2, "DORMQR", ch__1, m, n, k, &c_n1); + nbmin = max(i__1,i__2); + } + } else { + iws = nw; + } + + if (nb < nbmin || nb >= *k) { + +/* Use unblocked code */ + + _starpu_dorm2r_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[ + c_offset], ldc, &work[1], &iinfo); + } else { + +/* Use blocked code */ + + if (left && ! notran || ! left && notran) { + i1 = 1; + i2 = *k; + i3 = nb; + } else { + i1 = (*k - 1) / nb * nb + 1; + i2 = 1; + i3 = -nb; + } + + if (left) { + ni = *n; + jc = 1; + } else { + mi = *m; + ic = 1; + } + + i__1 = i2; + i__2 = i3; + for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__4 = nb, i__5 = *k - i__ + 1; + ib = min(i__4,i__5); + +/* Form the triangular factor of the block reflector */ +/* H = H(i) H(i+1) . . . H(i+ib-1) */ + + i__4 = nq - i__ + 1; + _starpu_dlarft_("Forward", "Columnwise", &i__4, &ib, &a[i__ + i__ * + a_dim1], lda, &tau[i__], t, &c__65) + ; + if (left) { + +/* H or H' is applied to C(i:m,1:n) */ + + mi = *m - i__ + 1; + ic = i__; + } else { + +/* H or H' is applied to C(1:m,i:n) */ + + ni = *n - i__ + 1; + jc = i__; + } + +/* Apply H or H' */ + + _starpu_dlarfb_(side, trans, "Forward", "Columnwise", &mi, &ni, &ib, &a[ + i__ + i__ * a_dim1], lda, t, &c__65, &c__[ic + jc * + c_dim1], ldc, &work[1], &ldwork); +/* L10: */ + } + } + work[1] = (doublereal) lwkopt; + return 0; + +/* End of DORMQR */ + +} /* _starpu_dormqr_ */ diff --git a/min-dgels/base/SRC/dormr2.c b/min-dgels/base/SRC/dormr2.c new file mode 100644 index 0000000..ce702fc --- /dev/null +++ b/min-dgels/base/SRC/dormr2.c @@ -0,0 +1,227 @@ +/* dormr2.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dormr2_(char *side, char *trans, integer *m, integer *n, + integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal * + c__, integer *ldc, doublereal *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2; + + /* Local variables */ + integer i__, i1, i2, i3, mi, ni, nq; + doublereal aii; + logical left; + extern /* Subroutine */ int _starpu_dlarf_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + doublereal *); + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + logical notran; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DORMR2 overwrites the general real m by n matrix C with */ + +/* Q * C if SIDE = 'L' and TRANS = 'N', or */ + +/* Q'* C if SIDE = 'L' and TRANS = 'T', or */ + +/* C * Q if SIDE = 'R' and TRANS = 'N', or */ + +/* C * Q' if SIDE = 'R' and TRANS = 'T', */ + +/* where Q is a real orthogonal matrix defined as the product of k */ +/* elementary reflectors */ + +/* Q = H(1) H(2) . . . H(k) */ + +/* as returned by DGERQF. Q is of order m if SIDE = 'L' and of order n */ +/* if SIDE = 'R'. */ + +/* Arguments */ +/* ========= */ + +/* SIDE (input) CHARACTER*1 */ +/* = 'L': apply Q or Q' from the Left */ +/* = 'R': apply Q or Q' from the Right */ + +/* TRANS (input) CHARACTER*1 */ +/* = 'N': apply Q (No transpose) */ +/* = 'T': apply Q' (Transpose) */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix C. M >= 0. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix C. N >= 0. */ + +/* K (input) INTEGER */ +/* The number of elementary reflectors whose product defines */ +/* the matrix Q. */ +/* If SIDE = 'L', M >= K >= 0; */ +/* if SIDE = 'R', N >= K >= 0. */ + +/* A (input) DOUBLE PRECISION array, dimension */ +/* (LDA,M) if SIDE = 'L', */ +/* (LDA,N) if SIDE = 'R' */ +/* The i-th row must contain the vector which defines the */ +/* elementary reflector H(i), for i = 1,2,...,k, as returned by */ +/* DGERQF in the last k rows of its array argument A. */ +/* A is modified by the routine but restored on exit. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,K). */ + +/* TAU (input) DOUBLE PRECISION array, dimension (K) */ +/* TAU(i) must contain the scalar factor of the elementary */ +/* reflector H(i), as returned by DGERQF. */ + +/* C (input/output) DOUBLE PRECISION array, dimension (LDC,N) */ +/* On entry, the m by n matrix C. */ +/* On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q. */ + +/* LDC (input) INTEGER */ +/* The leading dimension of the array C. LDC >= max(1,M). */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension */ +/* (N) if SIDE = 'L', */ +/* (M) if SIDE = 'R' */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + left = _starpu_lsame_(side, "L"); + notran = _starpu_lsame_(trans, "N"); + +/* NQ is the order of Q */ + + if (left) { + nq = *m; + } else { + nq = *n; + } + if (! left && ! _starpu_lsame_(side, "R")) { + *info = -1; + } else if (! notran && ! _starpu_lsame_(trans, "T")) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*k < 0 || *k > nq) { + *info = -5; + } else if (*lda < max(1,*k)) { + *info = -7; + } else if (*ldc < max(1,*m)) { + *info = -10; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DORMR2", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0 || *k == 0) { + return 0; + } + + if (left && ! notran || ! left && notran) { + i1 = 1; + i2 = *k; + i3 = 1; + } else { + i1 = *k; + i2 = 1; + i3 = -1; + } + + if (left) { + ni = *n; + } else { + mi = *m; + } + + i__1 = i2; + i__2 = i3; + for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + if (left) { + +/* H(i) is applied to C(1:m-k+i,1:n) */ + + mi = *m - *k + i__; + } else { + +/* H(i) is applied to C(1:m,1:n-k+i) */ + + ni = *n - *k + i__; + } + +/* Apply H(i) */ + + aii = a[i__ + (nq - *k + i__) * a_dim1]; + a[i__ + (nq - *k + i__) * a_dim1] = 1.; + _starpu_dlarf_(side, &mi, &ni, &a[i__ + a_dim1], lda, &tau[i__], &c__[ + c_offset], ldc, &work[1]); + a[i__ + (nq - *k + i__) * a_dim1] = aii; +/* L10: */ + } + return 0; + +/* End of DORMR2 */ + +} /* _starpu_dormr2_ */ diff --git a/min-dgels/base/SRC/dormr3.c b/min-dgels/base/SRC/dormr3.c new file mode 100644 index 0000000..bc02c3a --- /dev/null +++ b/min-dgels/base/SRC/dormr3.c @@ -0,0 +1,241 @@ +/* dormr3.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dormr3_(char *side, char *trans, integer *m, integer *n, + integer *k, integer *l, doublereal *a, integer *lda, doublereal *tau, + doublereal *c__, integer *ldc, doublereal *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2; + + /* Local variables */ + integer i__, i1, i2, i3, ja, ic, jc, mi, ni, nq; + logical left; + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int _starpu_dlarz_(char *, integer *, integer *, integer * +, doublereal *, integer *, doublereal *, doublereal *, integer *, + doublereal *), _starpu_xerbla_(char *, integer *); + logical notran; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DORMR3 overwrites the general real m by n matrix C with */ + +/* Q * C if SIDE = 'L' and TRANS = 'N', or */ + +/* Q'* C if SIDE = 'L' and TRANS = 'T', or */ + +/* C * Q if SIDE = 'R' and TRANS = 'N', or */ + +/* C * Q' if SIDE = 'R' and TRANS = 'T', */ + +/* where Q is a real orthogonal matrix defined as the product of k */ +/* elementary reflectors */ + +/* Q = H(1) H(2) . . . H(k) */ + +/* as returned by DTZRZF. Q is of order m if SIDE = 'L' and of order n */ +/* if SIDE = 'R'. */ + +/* Arguments */ +/* ========= */ + +/* SIDE (input) CHARACTER*1 */ +/* = 'L': apply Q or Q' from the Left */ +/* = 'R': apply Q or Q' from the Right */ + +/* TRANS (input) CHARACTER*1 */ +/* = 'N': apply Q (No transpose) */ +/* = 'T': apply Q' (Transpose) */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix C. M >= 0. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix C. N >= 0. */ + +/* K (input) INTEGER */ +/* The number of elementary reflectors whose product defines */ +/* the matrix Q. */ +/* If SIDE = 'L', M >= K >= 0; */ +/* if SIDE = 'R', N >= K >= 0. */ + +/* L (input) INTEGER */ +/* The number of columns of the matrix A containing */ +/* the meaningful part of the Householder reflectors. */ +/* If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0. */ + +/* A (input) DOUBLE PRECISION array, dimension */ +/* (LDA,M) if SIDE = 'L', */ +/* (LDA,N) if SIDE = 'R' */ +/* The i-th row must contain the vector which defines the */ +/* elementary reflector H(i), for i = 1,2,...,k, as returned by */ +/* DTZRZF in the last k rows of its array argument A. */ +/* A is modified by the routine but restored on exit. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,K). */ + +/* TAU (input) DOUBLE PRECISION array, dimension (K) */ +/* TAU(i) must contain the scalar factor of the elementary */ +/* reflector H(i), as returned by DTZRZF. */ + +/* C (input/output) DOUBLE PRECISION array, dimension (LDC,N) */ +/* On entry, the m-by-n matrix C. */ +/* On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q. */ + +/* LDC (input) INTEGER */ +/* The leading dimension of the array C. LDC >= max(1,M). */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension */ +/* (N) if SIDE = 'L', */ +/* (M) if SIDE = 'R' */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ + +/* Further Details */ +/* =============== */ + +/* Based on contributions by */ +/* A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + left = _starpu_lsame_(side, "L"); + notran = _starpu_lsame_(trans, "N"); + +/* NQ is the order of Q */ + + if (left) { + nq = *m; + } else { + nq = *n; + } + if (! left && ! _starpu_lsame_(side, "R")) { + *info = -1; + } else if (! notran && ! _starpu_lsame_(trans, "T")) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*k < 0 || *k > nq) { + *info = -5; + } else if (*l < 0 || left && *l > *m || ! left && *l > *n) { + *info = -6; + } else if (*lda < max(1,*k)) { + *info = -8; + } else if (*ldc < max(1,*m)) { + *info = -11; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DORMR3", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0 || *k == 0) { + return 0; + } + + if (left && ! notran || ! left && notran) { + i1 = 1; + i2 = *k; + i3 = 1; + } else { + i1 = *k; + i2 = 1; + i3 = -1; + } + + if (left) { + ni = *n; + ja = *m - *l + 1; + jc = 1; + } else { + mi = *m; + ja = *n - *l + 1; + ic = 1; + } + + i__1 = i2; + i__2 = i3; + for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + if (left) { + +/* H(i) or H(i)' is applied to C(i:m,1:n) */ + + mi = *m - i__ + 1; + ic = i__; + } else { + +/* H(i) or H(i)' is applied to C(1:m,i:n) */ + + ni = *n - i__ + 1; + jc = i__; + } + +/* Apply H(i) or H(i)' */ + + _starpu_dlarz_(side, &mi, &ni, l, &a[i__ + ja * a_dim1], lda, &tau[i__], &c__[ + ic + jc * c_dim1], ldc, &work[1]); + +/* L10: */ + } + + return 0; + +/* End of DORMR3 */ + +} /* _starpu_dormr3_ */ diff --git a/min-dgels/base/SRC/dormrq.c b/min-dgels/base/SRC/dormrq.c new file mode 100644 index 0000000..895c20b --- /dev/null +++ b/min-dgels/base/SRC/dormrq.c @@ -0,0 +1,335 @@ +/* dormrq.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; +static integer c__2 = 2; +static integer c__65 = 65; + +/* Subroutine */ int _starpu_dormrq_(char *side, char *trans, integer *m, integer *n, + integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal * + c__, integer *ldc, doublereal *work, integer *lwork, integer *info) +{ + /* System generated locals */ + address a__1[2]; + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2], i__4, + i__5; + char ch__1[2]; + + /* Builtin functions */ + /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); + + /* Local variables */ + integer i__; + doublereal t[4160] /* was [65][64] */; + integer i1, i2, i3, ib, nb, mi, ni, nq, nw, iws; + logical left; + extern logical _starpu_lsame_(char *, char *); + integer nbmin, iinfo; + extern /* Subroutine */ int _starpu_dormr2_(char *, char *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *), _starpu_dlarfb_(char + *, char *, char *, char *, integer *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *, doublereal *, integer *), _starpu_dlarft_(char *, char *, integer *, integer *, doublereal + *, integer *, doublereal *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); + extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *); + logical notran; + integer ldwork; + char transt[1]; + integer lwkopt; + logical lquery; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DORMRQ overwrites the general real M-by-N matrix C with */ + +/* SIDE = 'L' SIDE = 'R' */ +/* TRANS = 'N': Q * C C * Q */ +/* TRANS = 'T': Q**T * C C * Q**T */ + +/* where Q is a real orthogonal matrix defined as the product of k */ +/* elementary reflectors */ + +/* Q = H(1) H(2) . . . H(k) */ + +/* as returned by DGERQF. Q is of order M if SIDE = 'L' and of order N */ +/* if SIDE = 'R'. */ + +/* Arguments */ +/* ========= */ + +/* SIDE (input) CHARACTER*1 */ +/* = 'L': apply Q or Q**T from the Left; */ +/* = 'R': apply Q or Q**T from the Right. */ + +/* TRANS (input) CHARACTER*1 */ +/* = 'N': No transpose, apply Q; */ +/* = 'T': Transpose, apply Q**T. */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix C. M >= 0. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix C. N >= 0. */ + +/* K (input) INTEGER */ +/* The number of elementary reflectors whose product defines */ +/* the matrix Q. */ +/* If SIDE = 'L', M >= K >= 0; */ +/* if SIDE = 'R', N >= K >= 0. */ + +/* A (input) DOUBLE PRECISION array, dimension */ +/* (LDA,M) if SIDE = 'L', */ +/* (LDA,N) if SIDE = 'R' */ +/* The i-th row must contain the vector which defines the */ +/* elementary reflector H(i), for i = 1,2,...,k, as returned by */ +/* DGERQF in the last k rows of its array argument A. */ +/* A is modified by the routine but restored on exit. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,K). */ + +/* TAU (input) DOUBLE PRECISION array, dimension (K) */ +/* TAU(i) must contain the scalar factor of the elementary */ +/* reflector H(i), as returned by DGERQF. */ + +/* C (input/output) DOUBLE PRECISION array, dimension (LDC,N) */ +/* On entry, the M-by-N matrix C. */ +/* On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. */ + +/* LDC (input) INTEGER */ +/* The leading dimension of the array C. LDC >= max(1,M). */ + +/* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ + +/* LWORK (input) INTEGER */ +/* The dimension of the array WORK. */ +/* If SIDE = 'L', LWORK >= max(1,N); */ +/* if SIDE = 'R', LWORK >= max(1,M). */ +/* For optimum performance LWORK >= N*NB if SIDE = 'L', and */ +/* LWORK >= M*NB if SIDE = 'R', where NB is the optimal */ +/* blocksize. */ + +/* If LWORK = -1, then a workspace query is assumed; the routine */ +/* only calculates the optimal size of the WORK array, returns */ +/* this value as the first entry of the WORK array, and no error */ +/* message related to LWORK is issued by XERBLA. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + left = _starpu_lsame_(side, "L"); + notran = _starpu_lsame_(trans, "N"); + lquery = *lwork == -1; + +/* NQ is the order of Q and NW is the minimum dimension of WORK */ + + if (left) { + nq = *m; + nw = max(1,*n); + } else { + nq = *n; + nw = max(1,*m); + } + if (! left && ! _starpu_lsame_(side, "R")) { + *info = -1; + } else if (! notran && ! _starpu_lsame_(trans, "T")) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*k < 0 || *k > nq) { + *info = -5; + } else if (*lda < max(1,*k)) { + *info = -7; + } else if (*ldc < max(1,*m)) { + *info = -10; + } + + if (*info == 0) { + if (*m == 0 || *n == 0) { + lwkopt = 1; + } else { + +/* Determine the block size. NB may be at most NBMAX, where */ +/* NBMAX is used to define the local array T. */ + +/* Computing MIN */ +/* Writing concatenation */ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = 64, i__2 = _starpu_ilaenv_(&c__1, "DORMRQ", ch__1, m, n, k, &c_n1); + nb = min(i__1,i__2); + lwkopt = nw * nb; + } + work[1] = (doublereal) lwkopt; + + if (*lwork < nw && ! lquery) { + *info = -12; + } + } + + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DORMRQ", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + return 0; + } + + nbmin = 2; + ldwork = nw; + if (nb > 1 && nb < *k) { + iws = nw * nb; + if (*lwork < iws) { + nb = *lwork / ldwork; +/* Computing MAX */ +/* Writing concatenation */ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = 2, i__2 = _starpu_ilaenv_(&c__2, "DORMRQ", ch__1, m, n, k, &c_n1); + nbmin = max(i__1,i__2); + } + } else { + iws = nw; + } + + if (nb < nbmin || nb >= *k) { + +/* Use unblocked code */ + + _starpu_dormr2_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[ + c_offset], ldc, &work[1], &iinfo); + } else { + +/* Use blocked code */ + + if (left && ! notran || ! left && notran) { + i1 = 1; + i2 = *k; + i3 = nb; + } else { + i1 = (*k - 1) / nb * nb + 1; + i2 = 1; + i3 = -nb; + } + + if (left) { + ni = *n; + } else { + mi = *m; + } + + if (notran) { + *(unsigned char *)transt = 'T'; + } else { + *(unsigned char *)transt = 'N'; + } + + i__1 = i2; + i__2 = i3; + for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__4 = nb, i__5 = *k - i__ + 1; + ib = min(i__4,i__5); + +/* Form the triangular factor of the block reflector */ +/* H = H(i+ib-1) . . . H(i+1) H(i) */ + + i__4 = nq - *k + i__ + ib - 1; + _starpu_dlarft_("Backward", "Rowwise", &i__4, &ib, &a[i__ + a_dim1], lda, + &tau[i__], t, &c__65); + if (left) { + +/* H or H' is applied to C(1:m-k+i+ib-1,1:n) */ + + mi = *m - *k + i__ + ib - 1; + } else { + +/* H or H' is applied to C(1:m,1:n-k+i+ib-1) */ + + ni = *n - *k + i__ + ib - 1; + } + +/* Apply H or H' */ + + _starpu_dlarfb_(side, transt, "Backward", "Rowwise", &mi, &ni, &ib, &a[ + i__ + a_dim1], lda, t, &c__65, &c__[c_offset], ldc, &work[ + 1], &ldwork); +/* L10: */ + } + } + work[1] = (doublereal) lwkopt; + return 0; + +/* End of DORMRQ */ + +} /* _starpu_dormrq_ */ diff --git a/min-dgels/base/SRC/dormrz.c b/min-dgels/base/SRC/dormrz.c new file mode 100644 index 0000000..a6c768a --- /dev/null +++ b/min-dgels/base/SRC/dormrz.c @@ -0,0 +1,362 @@ +/* dormrz.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; +static integer c__2 = 2; +static integer c__65 = 65; + +/* Subroutine */ int _starpu_dormrz_(char *side, char *trans, integer *m, integer *n, + integer *k, integer *l, doublereal *a, integer *lda, doublereal *tau, + doublereal *c__, integer *ldc, doublereal *work, integer *lwork, + integer *info) +{ + /* System generated locals */ + address a__1[2]; + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2], i__4, + i__5; + char ch__1[2]; + + /* Builtin functions */ + /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); + + /* Local variables */ + integer i__; + doublereal t[4160] /* was [65][64] */; + integer i1, i2, i3, ib, ic, ja, jc, nb, mi, ni, nq, nw, iws; + logical left; + extern logical _starpu_lsame_(char *, char *); + integer nbmin, iinfo; + extern /* Subroutine */ int _starpu_dormr3_(char *, char *, integer *, integer *, + integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *), + _starpu_xerbla_(char *, integer *); + extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *); + extern /* Subroutine */ int _starpu_dlarzb_(char *, char *, char *, char *, + integer *, integer *, integer *, integer *, doublereal *, integer + *, doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *), _starpu_dlarzt_(char *, char + *, integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *); + logical notran; + integer ldwork; + char transt[1]; + integer lwkopt; + logical lquery; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* January 2007 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DORMRZ overwrites the general real M-by-N matrix C with */ + +/* SIDE = 'L' SIDE = 'R' */ +/* TRANS = 'N': Q * C C * Q */ +/* TRANS = 'T': Q**T * C C * Q**T */ + +/* where Q is a real orthogonal matrix defined as the product of k */ +/* elementary reflectors */ + +/* Q = H(1) H(2) . . . H(k) */ + +/* as returned by DTZRZF. Q is of order M if SIDE = 'L' and of order N */ +/* if SIDE = 'R'. */ + +/* Arguments */ +/* ========= */ + +/* SIDE (input) CHARACTER*1 */ +/* = 'L': apply Q or Q**T from the Left; */ +/* = 'R': apply Q or Q**T from the Right. */ + +/* TRANS (input) CHARACTER*1 */ +/* = 'N': No transpose, apply Q; */ +/* = 'T': Transpose, apply Q**T. */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix C. M >= 0. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix C. N >= 0. */ + +/* K (input) INTEGER */ +/* The number of elementary reflectors whose product defines */ +/* the matrix Q. */ +/* If SIDE = 'L', M >= K >= 0; */ +/* if SIDE = 'R', N >= K >= 0. */ + +/* L (input) INTEGER */ +/* The number of columns of the matrix A containing */ +/* the meaningful part of the Householder reflectors. */ +/* If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0. */ + +/* A (input) DOUBLE PRECISION array, dimension */ +/* (LDA,M) if SIDE = 'L', */ +/* (LDA,N) if SIDE = 'R' */ +/* The i-th row must contain the vector which defines the */ +/* elementary reflector H(i), for i = 1,2,...,k, as returned by */ +/* DTZRZF in the last k rows of its array argument A. */ +/* A is modified by the routine but restored on exit. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,K). */ + +/* TAU (input) DOUBLE PRECISION array, dimension (K) */ +/* TAU(i) must contain the scalar factor of the elementary */ +/* reflector H(i), as returned by DTZRZF. */ + +/* C (input/output) DOUBLE PRECISION array, dimension (LDC,N) */ +/* On entry, the M-by-N matrix C. */ +/* On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. */ + +/* LDC (input) INTEGER */ +/* The leading dimension of the array C. LDC >= max(1,M). */ + +/* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ + +/* LWORK (input) INTEGER */ +/* The dimension of the array WORK. */ +/* If SIDE = 'L', LWORK >= max(1,N); */ +/* if SIDE = 'R', LWORK >= max(1,M). */ +/* For optimum performance LWORK >= N*NB if SIDE = 'L', and */ +/* LWORK >= M*NB if SIDE = 'R', where NB is the optimal */ +/* blocksize. */ + +/* If LWORK = -1, then a workspace query is assumed; the routine */ +/* only calculates the optimal size of the WORK array, returns */ +/* this value as the first entry of the WORK array, and no error */ +/* message related to LWORK is issued by XERBLA. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ + +/* Further Details */ +/* =============== */ + +/* Based on contributions by */ +/* A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + left = _starpu_lsame_(side, "L"); + notran = _starpu_lsame_(trans, "N"); + lquery = *lwork == -1; + +/* NQ is the order of Q and NW is the minimum dimension of WORK */ + + if (left) { + nq = *m; + nw = max(1,*n); + } else { + nq = *n; + nw = max(1,*m); + } + if (! left && ! _starpu_lsame_(side, "R")) { + *info = -1; + } else if (! notran && ! _starpu_lsame_(trans, "T")) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*k < 0 || *k > nq) { + *info = -5; + } else if (*l < 0 || left && *l > *m || ! left && *l > *n) { + *info = -6; + } else if (*lda < max(1,*k)) { + *info = -8; + } else if (*ldc < max(1,*m)) { + *info = -11; + } + + if (*info == 0) { + if (*m == 0 || *n == 0) { + lwkopt = 1; + } else { + +/* Determine the block size. NB may be at most NBMAX, where */ +/* NBMAX is used to define the local array T. */ + +/* Computing MIN */ +/* Writing concatenation */ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = 64, i__2 = _starpu_ilaenv_(&c__1, "DORMRQ", ch__1, m, n, k, &c_n1); + nb = min(i__1,i__2); + lwkopt = nw * nb; + } + work[1] = (doublereal) lwkopt; + + if (*lwork < max(1,nw) && ! lquery) { + *info = -13; + } + } + + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DORMRZ", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + work[1] = 1.; + return 0; + } + + nbmin = 2; + ldwork = nw; + if (nb > 1 && nb < *k) { + iws = nw * nb; + if (*lwork < iws) { + nb = *lwork / ldwork; +/* Computing MAX */ +/* Writing concatenation */ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = 2, i__2 = _starpu_ilaenv_(&c__2, "DORMRQ", ch__1, m, n, k, &c_n1); + nbmin = max(i__1,i__2); + } + } else { + iws = nw; + } + + if (nb < nbmin || nb >= *k) { + +/* Use unblocked code */ + + _starpu_dormr3_(side, trans, m, n, k, l, &a[a_offset], lda, &tau[1], &c__[ + c_offset], ldc, &work[1], &iinfo); + } else { + +/* Use blocked code */ + + if (left && ! notran || ! left && notran) { + i1 = 1; + i2 = *k; + i3 = nb; + } else { + i1 = (*k - 1) / nb * nb + 1; + i2 = 1; + i3 = -nb; + } + + if (left) { + ni = *n; + jc = 1; + ja = *m - *l + 1; + } else { + mi = *m; + ic = 1; + ja = *n - *l + 1; + } + + if (notran) { + *(unsigned char *)transt = 'T'; + } else { + *(unsigned char *)transt = 'N'; + } + + i__1 = i2; + i__2 = i3; + for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__4 = nb, i__5 = *k - i__ + 1; + ib = min(i__4,i__5); + +/* Form the triangular factor of the block reflector */ +/* H = H(i+ib-1) . . . H(i+1) H(i) */ + + _starpu_dlarzt_("Backward", "Rowwise", l, &ib, &a[i__ + ja * a_dim1], lda, + &tau[i__], t, &c__65); + + if (left) { + +/* H or H' is applied to C(i:m,1:n) */ + + mi = *m - i__ + 1; + ic = i__; + } else { + +/* H or H' is applied to C(1:m,i:n) */ + + ni = *n - i__ + 1; + jc = i__; + } + +/* Apply H or H' */ + + _starpu_dlarzb_(side, transt, "Backward", "Rowwise", &mi, &ni, &ib, l, &a[ + i__ + ja * a_dim1], lda, t, &c__65, &c__[ic + jc * c_dim1] +, ldc, &work[1], &ldwork); +/* L10: */ + } + + } + + work[1] = (doublereal) lwkopt; + + return 0; + +/* End of DORMRZ */ + +} /* _starpu_dormrz_ */ diff --git a/min-dgels/base/SRC/dormtr.c b/min-dgels/base/SRC/dormtr.c new file mode 100644 index 0000000..5ca6ae7 --- /dev/null +++ b/min-dgels/base/SRC/dormtr.c @@ -0,0 +1,295 @@ +/* dormtr.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; +static integer c__2 = 2; + +/* Subroutine */ int _starpu_dormtr_(char *side, char *uplo, char *trans, integer *m, + integer *n, doublereal *a, integer *lda, doublereal *tau, doublereal * + c__, integer *ldc, doublereal *work, integer *lwork, integer *info) +{ + /* System generated locals */ + address a__1[2]; + integer a_dim1, a_offset, c_dim1, c_offset, i__1[2], i__2, i__3; + char ch__1[2]; + + /* Builtin functions */ + /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); + + /* Local variables */ + integer i1, i2, nb, mi, ni, nq, nw; + logical left; + extern logical _starpu_lsame_(char *, char *); + integer iinfo; + logical upper; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *); + extern /* Subroutine */ int _starpu_dormql_(char *, char *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *), + _starpu_dormqr_(char *, char *, integer *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *, integer *); + integer lwkopt; + logical lquery; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DORMTR overwrites the general real M-by-N matrix C with */ + +/* SIDE = 'L' SIDE = 'R' */ +/* TRANS = 'N': Q * C C * Q */ +/* TRANS = 'T': Q**T * C C * Q**T */ + +/* where Q is a real orthogonal matrix of order nq, with nq = m if */ +/* SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of */ +/* nq-1 elementary reflectors, as returned by DSYTRD: */ + +/* if UPLO = 'U', Q = H(nq-1) . . . H(2) H(1); */ + +/* if UPLO = 'L', Q = H(1) H(2) . . . H(nq-1). */ + +/* Arguments */ +/* ========= */ + +/* SIDE (input) CHARACTER*1 */ +/* = 'L': apply Q or Q**T from the Left; */ +/* = 'R': apply Q or Q**T from the Right. */ + +/* UPLO (input) CHARACTER*1 */ +/* = 'U': Upper triangle of A contains elementary reflectors */ +/* from DSYTRD; */ +/* = 'L': Lower triangle of A contains elementary reflectors */ +/* from DSYTRD. */ + +/* TRANS (input) CHARACTER*1 */ +/* = 'N': No transpose, apply Q; */ +/* = 'T': Transpose, apply Q**T. */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix C. M >= 0. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix C. N >= 0. */ + +/* A (input) DOUBLE PRECISION array, dimension */ +/* (LDA,M) if SIDE = 'L' */ +/* (LDA,N) if SIDE = 'R' */ +/* The vectors which define the elementary reflectors, as */ +/* returned by DSYTRD. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. */ +/* LDA >= max(1,M) if SIDE = 'L'; LDA >= max(1,N) if SIDE = 'R'. */ + +/* TAU (input) DOUBLE PRECISION array, dimension */ +/* (M-1) if SIDE = 'L' */ +/* (N-1) if SIDE = 'R' */ +/* TAU(i) must contain the scalar factor of the elementary */ +/* reflector H(i), as returned by DSYTRD. */ + +/* C (input/output) DOUBLE PRECISION array, dimension (LDC,N) */ +/* On entry, the M-by-N matrix C. */ +/* On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. */ + +/* LDC (input) INTEGER */ +/* The leading dimension of the array C. LDC >= max(1,M). */ + +/* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ + +/* LWORK (input) INTEGER */ +/* The dimension of the array WORK. */ +/* If SIDE = 'L', LWORK >= max(1,N); */ +/* if SIDE = 'R', LWORK >= max(1,M). */ +/* For optimum performance LWORK >= N*NB if SIDE = 'L', and */ +/* LWORK >= M*NB if SIDE = 'R', where NB is the optimal */ +/* blocksize. */ + +/* If LWORK = -1, then a workspace query is assumed; the routine */ +/* only calculates the optimal size of the WORK array, returns */ +/* this value as the first entry of the WORK array, and no error */ +/* message related to LWORK is issued by XERBLA. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + left = _starpu_lsame_(side, "L"); + upper = _starpu_lsame_(uplo, "U"); + lquery = *lwork == -1; + +/* NQ is the order of Q and NW is the minimum dimension of WORK */ + + if (left) { + nq = *m; + nw = *n; + } else { + nq = *n; + nw = *m; + } + if (! left && ! _starpu_lsame_(side, "R")) { + *info = -1; + } else if (! upper && ! _starpu_lsame_(uplo, "L")) { + *info = -2; + } else if (! _starpu_lsame_(trans, "N") && ! _starpu_lsame_(trans, + "T")) { + *info = -3; + } else if (*m < 0) { + *info = -4; + } else if (*n < 0) { + *info = -5; + } else if (*lda < max(1,nq)) { + *info = -7; + } else if (*ldc < max(1,*m)) { + *info = -10; + } else if (*lwork < max(1,nw) && ! lquery) { + *info = -12; + } + + if (*info == 0) { + if (upper) { + if (left) { +/* Writing concatenation */ + i__1[0] = 1, a__1[0] = side; + i__1[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); + i__2 = *m - 1; + i__3 = *m - 1; + nb = _starpu_ilaenv_(&c__1, "DORMQL", ch__1, &i__2, n, &i__3, &c_n1); + } else { +/* Writing concatenation */ + i__1[0] = 1, a__1[0] = side; + i__1[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); + i__2 = *n - 1; + i__3 = *n - 1; + nb = _starpu_ilaenv_(&c__1, "DORMQL", ch__1, m, &i__2, &i__3, &c_n1); + } + } else { + if (left) { +/* Writing concatenation */ + i__1[0] = 1, a__1[0] = side; + i__1[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); + i__2 = *m - 1; + i__3 = *m - 1; + nb = _starpu_ilaenv_(&c__1, "DORMQR", ch__1, &i__2, n, &i__3, &c_n1); + } else { +/* Writing concatenation */ + i__1[0] = 1, a__1[0] = side; + i__1[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); + i__2 = *n - 1; + i__3 = *n - 1; + nb = _starpu_ilaenv_(&c__1, "DORMQR", ch__1, m, &i__2, &i__3, &c_n1); + } + } + lwkopt = max(1,nw) * nb; + work[1] = (doublereal) lwkopt; + } + + if (*info != 0) { + i__2 = -(*info); + _starpu_xerbla_("DORMTR", &i__2); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0 || nq == 1) { + work[1] = 1.; + return 0; + } + + if (left) { + mi = *m - 1; + ni = *n; + } else { + mi = *m; + ni = *n - 1; + } + + if (upper) { + +/* Q was determined by a call to DSYTRD with UPLO = 'U' */ + + i__2 = nq - 1; + _starpu_dormql_(side, trans, &mi, &ni, &i__2, &a[(a_dim1 << 1) + 1], lda, & + tau[1], &c__[c_offset], ldc, &work[1], lwork, &iinfo); + } else { + +/* Q was determined by a call to DSYTRD with UPLO = 'L' */ + + if (left) { + i1 = 2; + i2 = 1; + } else { + i1 = 1; + i2 = 2; + } + i__2 = nq - 1; + _starpu_dormqr_(side, trans, &mi, &ni, &i__2, &a[a_dim1 + 2], lda, &tau[1], & + c__[i1 + i2 * c_dim1], ldc, &work[1], lwork, &iinfo); + } + work[1] = (doublereal) lwkopt; + return 0; + +/* End of DORMTR */ + +} /* _starpu_dormtr_ */ diff --git a/min-dgels/base/SRC/dpbcon.c b/min-dgels/base/SRC/dpbcon.c new file mode 100644 index 0000000..21b1c27 --- /dev/null +++ b/min-dgels/base/SRC/dpbcon.c @@ -0,0 +1,233 @@ +/* dpbcon.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +/* Subroutine */ int _starpu_dpbcon_(char *uplo, integer *n, integer *kd, doublereal * + ab, integer *ldab, doublereal *anorm, doublereal *rcond, doublereal * + work, integer *iwork, integer *info) +{ + /* System generated locals */ + integer ab_dim1, ab_offset, i__1; + doublereal d__1; + + /* Local variables */ + integer ix, kase; + doublereal scale; + extern logical _starpu_lsame_(char *, char *); + integer isave[3]; + extern /* Subroutine */ int _starpu_drscl_(integer *, doublereal *, doublereal *, + integer *); + logical upper; + extern /* Subroutine */ int _starpu_dlacn2_(integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *); + extern doublereal _starpu_dlamch_(char *); + doublereal scalel; + extern integer _starpu_idamax_(integer *, doublereal *, integer *); + extern /* Subroutine */ int _starpu_dlatbs_(char *, char *, char *, char *, + integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, doublereal *, integer *); + doublereal scaleu; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + doublereal ainvnm; + char normin[1]; + doublereal smlnum; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* Modified to call DLACN2 in place of DLACON, 5 Feb 03, SJH. */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DPBCON estimates the reciprocal of the condition number (in the */ +/* 1-norm) of a real symmetric positive definite band matrix using the */ +/* Cholesky factorization A = U**T*U or A = L*L**T computed by DPBTRF. */ + +/* An estimate is obtained for norm(inv(A)), and the reciprocal of the */ +/* condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). */ + +/* Arguments */ +/* ========= */ + +/* UPLO (input) CHARACTER*1 */ +/* = 'U': Upper triangular factor stored in AB; */ +/* = 'L': Lower triangular factor stored in AB. */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* KD (input) INTEGER */ +/* The number of superdiagonals of the matrix A if UPLO = 'U', */ +/* or the number of subdiagonals if UPLO = 'L'. KD >= 0. */ + +/* AB (input) DOUBLE PRECISION array, dimension (LDAB,N) */ +/* The triangular factor U or L from the Cholesky factorization */ +/* A = U**T*U or A = L*L**T of the band matrix A, stored in the */ +/* first KD+1 rows of the array. The j-th column of U or L is */ +/* stored in the j-th column of the array AB as follows: */ +/* if UPLO ='U', AB(kd+1+i-j,j) = U(i,j) for max(1,j-kd)<=i<=j; */ +/* if UPLO ='L', AB(1+i-j,j) = L(i,j) for j<=i<=min(n,j+kd). */ + +/* LDAB (input) INTEGER */ +/* The leading dimension of the array AB. LDAB >= KD+1. */ + +/* ANORM (input) DOUBLE PRECISION */ +/* The 1-norm (or infinity-norm) of the symmetric band matrix A. */ + +/* RCOND (output) DOUBLE PRECISION */ +/* The reciprocal of the condition number of the matrix A, */ +/* computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an */ +/* estimate of the 1-norm of inv(A) computed in this routine. */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (3*N) */ + +/* IWORK (workspace) INTEGER array, dimension (N) */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + ab_dim1 = *ldab; + ab_offset = 1 + ab_dim1; + ab -= ab_offset; + --work; + --iwork; + + /* Function Body */ + *info = 0; + upper = _starpu_lsame_(uplo, "U"); + if (! upper && ! _starpu_lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*kd < 0) { + *info = -3; + } else if (*ldab < *kd + 1) { + *info = -5; + } else if (*anorm < 0.) { + *info = -6; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DPBCON", &i__1); + return 0; + } + +/* Quick return if possible */ + + *rcond = 0.; + if (*n == 0) { + *rcond = 1.; + return 0; + } else if (*anorm == 0.) { + return 0; + } + + smlnum = _starpu_dlamch_("Safe minimum"); + +/* Estimate the 1-norm of the inverse. */ + + kase = 0; + *(unsigned char *)normin = 'N'; +L10: + _starpu_dlacn2_(n, &work[*n + 1], &work[1], &iwork[1], &ainvnm, &kase, isave); + if (kase != 0) { + if (upper) { + +/* Multiply by inv(U'). */ + + _starpu_dlatbs_("Upper", "Transpose", "Non-unit", normin, n, kd, &ab[ + ab_offset], ldab, &work[1], &scalel, &work[(*n << 1) + 1], + info); + *(unsigned char *)normin = 'Y'; + +/* Multiply by inv(U). */ + + _starpu_dlatbs_("Upper", "No transpose", "Non-unit", normin, n, kd, &ab[ + ab_offset], ldab, &work[1], &scaleu, &work[(*n << 1) + 1], + info); + } else { + +/* Multiply by inv(L). */ + + _starpu_dlatbs_("Lower", "No transpose", "Non-unit", normin, n, kd, &ab[ + ab_offset], ldab, &work[1], &scalel, &work[(*n << 1) + 1], + info); + *(unsigned char *)normin = 'Y'; + +/* Multiply by inv(L'). */ + + _starpu_dlatbs_("Lower", "Transpose", "Non-unit", normin, n, kd, &ab[ + ab_offset], ldab, &work[1], &scaleu, &work[(*n << 1) + 1], + info); + } + +/* Multiply by 1/SCALE if doing so will not cause overflow. */ + + scale = scalel * scaleu; + if (scale != 1.) { + ix = _starpu_idamax_(n, &work[1], &c__1); + if (scale < (d__1 = work[ix], abs(d__1)) * smlnum || scale == 0.) + { + goto L20; + } + _starpu_drscl_(n, &scale, &work[1], &c__1); + } + goto L10; + } + +/* Compute the estimate of the reciprocal condition number. */ + + if (ainvnm != 0.) { + *rcond = 1. / ainvnm / *anorm; + } + +L20: + + return 0; + +/* End of DPBCON */ + +} /* _starpu_dpbcon_ */ diff --git a/min-dgels/base/SRC/dpbequ.c b/min-dgels/base/SRC/dpbequ.c new file mode 100644 index 0000000..29bca0d --- /dev/null +++ b/min-dgels/base/SRC/dpbequ.c @@ -0,0 +1,203 @@ +/* dpbequ.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dpbequ_(char *uplo, integer *n, integer *kd, doublereal * + ab, integer *ldab, doublereal *s, doublereal *scond, doublereal *amax, + integer *info) +{ + /* System generated locals */ + integer ab_dim1, ab_offset, i__1; + doublereal d__1, d__2; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + integer i__, j; + doublereal smin; + extern logical _starpu_lsame_(char *, char *); + logical upper; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DPBEQU computes row and column scalings intended to equilibrate a */ +/* symmetric positive definite band matrix A and reduce its condition */ +/* number (with respect to the two-norm). S contains the scale factors, */ +/* S(i) = 1/sqrt(A(i,i)), chosen so that the scaled matrix B with */ +/* elements B(i,j) = S(i)*A(i,j)*S(j) has ones on the diagonal. This */ +/* choice of S puts the condition number of B within a factor N of the */ +/* smallest possible condition number over all possible diagonal */ +/* scalings. */ + +/* Arguments */ +/* ========= */ + +/* UPLO (input) CHARACTER*1 */ +/* = 'U': Upper triangular of A is stored; */ +/* = 'L': Lower triangular of A is stored. */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* KD (input) INTEGER */ +/* The number of superdiagonals of the matrix A if UPLO = 'U', */ +/* or the number of subdiagonals if UPLO = 'L'. KD >= 0. */ + +/* AB (input) DOUBLE PRECISION array, dimension (LDAB,N) */ +/* The upper or lower triangle of the symmetric band matrix A, */ +/* stored in the first KD+1 rows of the array. The j-th column */ +/* of A is stored in the j-th column of the array AB as follows: */ +/* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; */ +/* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). */ + +/* LDAB (input) INTEGER */ +/* The leading dimension of the array A. LDAB >= KD+1. */ + +/* S (output) DOUBLE PRECISION array, dimension (N) */ +/* If INFO = 0, S contains the scale factors for A. */ + +/* SCOND (output) DOUBLE PRECISION */ +/* If INFO = 0, S contains the ratio of the smallest S(i) to */ +/* the largest S(i). If SCOND >= 0.1 and AMAX is neither too */ +/* large nor too small, it is not worth scaling by S. */ + +/* AMAX (output) DOUBLE PRECISION */ +/* Absolute value of largest matrix element. If AMAX is very */ +/* close to overflow or very close to underflow, the matrix */ +/* should be scaled. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > 0: if INFO = i, the i-th diagonal element is nonpositive. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + ab_dim1 = *ldab; + ab_offset = 1 + ab_dim1; + ab -= ab_offset; + --s; + + /* Function Body */ + *info = 0; + upper = _starpu_lsame_(uplo, "U"); + if (! upper && ! _starpu_lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*kd < 0) { + *info = -3; + } else if (*ldab < *kd + 1) { + *info = -5; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DPBEQU", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + *scond = 1.; + *amax = 0.; + return 0; + } + + if (upper) { + j = *kd + 1; + } else { + j = 1; + } + +/* Initialize SMIN and AMAX. */ + + s[1] = ab[j + ab_dim1]; + smin = s[1]; + *amax = s[1]; + +/* Find the minimum and maximum diagonal elements. */ + + i__1 = *n; + for (i__ = 2; i__ <= i__1; ++i__) { + s[i__] = ab[j + i__ * ab_dim1]; +/* Computing MIN */ + d__1 = smin, d__2 = s[i__]; + smin = min(d__1,d__2); +/* Computing MAX */ + d__1 = *amax, d__2 = s[i__]; + *amax = max(d__1,d__2); +/* L10: */ + } + + if (smin <= 0.) { + +/* Find the first non-positive diagonal element and return. */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + if (s[i__] <= 0.) { + *info = i__; + return 0; + } +/* L20: */ + } + } else { + +/* Set the scale factors to the reciprocals */ +/* of the diagonal elements. */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + s[i__] = 1. / sqrt(s[i__]); +/* L30: */ + } + +/* Compute SCOND = min(S(I)) / max(S(I)) */ + + *scond = sqrt(smin) / sqrt(*amax); + } + return 0; + +/* End of DPBEQU */ + +} /* _starpu_dpbequ_ */ diff --git a/min-dgels/base/SRC/dpbrfs.c b/min-dgels/base/SRC/dpbrfs.c new file mode 100644 index 0000000..96143da --- /dev/null +++ b/min-dgels/base/SRC/dpbrfs.c @@ -0,0 +1,438 @@ +/* dpbrfs.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static doublereal c_b12 = -1.; +static doublereal c_b14 = 1.; + +/* Subroutine */ int _starpu_dpbrfs_(char *uplo, integer *n, integer *kd, integer * + nrhs, doublereal *ab, integer *ldab, doublereal *afb, integer *ldafb, + doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal * + ferr, doublereal *berr, doublereal *work, integer *iwork, integer * + info) +{ + /* System generated locals */ + integer ab_dim1, ab_offset, afb_dim1, afb_offset, b_dim1, b_offset, + x_dim1, x_offset, i__1, i__2, i__3, i__4, i__5; + doublereal d__1, d__2, d__3; + + /* Local variables */ + integer i__, j, k, l; + doublereal s, xk; + integer nz; + doublereal eps; + integer kase; + doublereal safe1, safe2; + extern logical _starpu_lsame_(char *, char *); + integer isave[3]; + extern /* Subroutine */ int _starpu_dsbmv_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *), _starpu_dcopy_(integer *, + doublereal *, integer *, doublereal *, integer *), _starpu_daxpy_(integer + *, doublereal *, doublereal *, integer *, doublereal *, integer *) + ; + integer count; + logical upper; + extern /* Subroutine */ int _starpu_dlacn2_(integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *); + extern doublereal _starpu_dlamch_(char *); + doublereal safmin; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *), _starpu_dpbtrs_( + char *, integer *, integer *, integer *, doublereal *, integer *, + doublereal *, integer *, integer *); + doublereal lstres; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* Modified to call DLACN2 in place of DLACON, 5 Feb 03, SJH. */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DPBRFS improves the computed solution to a system of linear */ +/* equations when the coefficient matrix is symmetric positive definite */ +/* and banded, and provides error bounds and backward error estimates */ +/* for the solution. */ + +/* Arguments */ +/* ========= */ + +/* UPLO (input) CHARACTER*1 */ +/* = 'U': Upper triangle of A is stored; */ +/* = 'L': Lower triangle of A is stored. */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* KD (input) INTEGER */ +/* The number of superdiagonals of the matrix A if UPLO = 'U', */ +/* or the number of subdiagonals if UPLO = 'L'. KD >= 0. */ + +/* NRHS (input) INTEGER */ +/* The number of right hand sides, i.e., the number of columns */ +/* of the matrices B and X. NRHS >= 0. */ + +/* AB (input) DOUBLE PRECISION array, dimension (LDAB,N) */ +/* The upper or lower triangle of the symmetric band matrix A, */ +/* stored in the first KD+1 rows of the array. The j-th column */ +/* of A is stored in the j-th column of the array AB as follows: */ +/* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; */ +/* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). */ + +/* LDAB (input) INTEGER */ +/* The leading dimension of the array AB. LDAB >= KD+1. */ + +/* AFB (input) DOUBLE PRECISION array, dimension (LDAFB,N) */ +/* The triangular factor U or L from the Cholesky factorization */ +/* A = U**T*U or A = L*L**T of the band matrix A as computed by */ +/* DPBTRF, in the same storage format as A (see AB). */ + +/* LDAFB (input) INTEGER */ +/* The leading dimension of the array AFB. LDAFB >= KD+1. */ + +/* B (input) DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* The right hand side matrix B. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the array B. LDB >= max(1,N). */ + +/* X (input/output) DOUBLE PRECISION array, dimension (LDX,NRHS) */ +/* On entry, the solution matrix X, as computed by DPBTRS. */ +/* On exit, the improved solution matrix X. */ + +/* LDX (input) INTEGER */ +/* The leading dimension of the array X. LDX >= max(1,N). */ + +/* FERR (output) DOUBLE PRECISION array, dimension (NRHS) */ +/* The estimated forward error bound for each solution vector */ +/* X(j) (the j-th column of the solution matrix X). */ +/* If XTRUE is the true solution corresponding to X(j), FERR(j) */ +/* is an estimated upper bound for the magnitude of the largest */ +/* element in (X(j) - XTRUE) divided by the magnitude of the */ +/* largest element in X(j). The estimate is as reliable as */ +/* the estimate for RCOND, and is almost always a slight */ +/* overestimate of the true error. */ + +/* BERR (output) DOUBLE PRECISION array, dimension (NRHS) */ +/* The componentwise relative backward error of each solution */ +/* vector X(j) (i.e., the smallest relative change in */ +/* any element of A or B that makes X(j) an exact solution). */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (3*N) */ + +/* IWORK (workspace) INTEGER array, dimension (N) */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ + +/* Internal Parameters */ +/* =================== */ + +/* ITMAX is the maximum number of steps of iterative refinement. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + ab_dim1 = *ldab; + ab_offset = 1 + ab_dim1; + ab -= ab_offset; + afb_dim1 = *ldafb; + afb_offset = 1 + afb_dim1; + afb -= afb_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + x_dim1 = *ldx; + x_offset = 1 + x_dim1; + x -= x_offset; + --ferr; + --berr; + --work; + --iwork; + + /* Function Body */ + *info = 0; + upper = _starpu_lsame_(uplo, "U"); + if (! upper && ! _starpu_lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*kd < 0) { + *info = -3; + } else if (*nrhs < 0) { + *info = -4; + } else if (*ldab < *kd + 1) { + *info = -6; + } else if (*ldafb < *kd + 1) { + *info = -8; + } else if (*ldb < max(1,*n)) { + *info = -10; + } else if (*ldx < max(1,*n)) { + *info = -12; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DPBRFS", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0 || *nrhs == 0) { + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + ferr[j] = 0.; + berr[j] = 0.; +/* L10: */ + } + return 0; + } + +/* NZ = maximum number of nonzero elements in each row of A, plus 1 */ + +/* Computing MIN */ + i__1 = *n + 1, i__2 = (*kd << 1) + 2; + nz = min(i__1,i__2); + eps = _starpu_dlamch_("Epsilon"); + safmin = _starpu_dlamch_("Safe minimum"); + safe1 = nz * safmin; + safe2 = safe1 / eps; + +/* Do for each right hand side */ + + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + + count = 1; + lstres = 3.; +L20: + +/* Loop until stopping criterion is satisfied. */ + +/* Compute residual R = B - A * X */ + + _starpu_dcopy_(n, &b[j * b_dim1 + 1], &c__1, &work[*n + 1], &c__1); + _starpu_dsbmv_(uplo, n, kd, &c_b12, &ab[ab_offset], ldab, &x[j * x_dim1 + 1], + &c__1, &c_b14, &work[*n + 1], &c__1); + +/* Compute componentwise relative backward error from formula */ + +/* max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) ) */ + +/* where abs(Z) is the componentwise absolute value of the matrix */ +/* or vector Z. If the i-th component of the denominator is less */ +/* than SAFE2, then SAFE1 is added to the i-th components of the */ +/* numerator and denominator before dividing. */ + + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + work[i__] = (d__1 = b[i__ + j * b_dim1], abs(d__1)); +/* L30: */ + } + +/* Compute abs(A)*abs(X) + abs(B). */ + + if (upper) { + i__2 = *n; + for (k = 1; k <= i__2; ++k) { + s = 0.; + xk = (d__1 = x[k + j * x_dim1], abs(d__1)); + l = *kd + 1 - k; +/* Computing MAX */ + i__3 = 1, i__4 = k - *kd; + i__5 = k - 1; + for (i__ = max(i__3,i__4); i__ <= i__5; ++i__) { + work[i__] += (d__1 = ab[l + i__ + k * ab_dim1], abs(d__1)) + * xk; + s += (d__1 = ab[l + i__ + k * ab_dim1], abs(d__1)) * ( + d__2 = x[i__ + j * x_dim1], abs(d__2)); +/* L40: */ + } + work[k] = work[k] + (d__1 = ab[*kd + 1 + k * ab_dim1], abs( + d__1)) * xk + s; +/* L50: */ + } + } else { + i__2 = *n; + for (k = 1; k <= i__2; ++k) { + s = 0.; + xk = (d__1 = x[k + j * x_dim1], abs(d__1)); + work[k] += (d__1 = ab[k * ab_dim1 + 1], abs(d__1)) * xk; + l = 1 - k; +/* Computing MIN */ + i__3 = *n, i__4 = k + *kd; + i__5 = min(i__3,i__4); + for (i__ = k + 1; i__ <= i__5; ++i__) { + work[i__] += (d__1 = ab[l + i__ + k * ab_dim1], abs(d__1)) + * xk; + s += (d__1 = ab[l + i__ + k * ab_dim1], abs(d__1)) * ( + d__2 = x[i__ + j * x_dim1], abs(d__2)); +/* L60: */ + } + work[k] += s; +/* L70: */ + } + } + s = 0.; + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + if (work[i__] > safe2) { +/* Computing MAX */ + d__2 = s, d__3 = (d__1 = work[*n + i__], abs(d__1)) / work[ + i__]; + s = max(d__2,d__3); + } else { +/* Computing MAX */ + d__2 = s, d__3 = ((d__1 = work[*n + i__], abs(d__1)) + safe1) + / (work[i__] + safe1); + s = max(d__2,d__3); + } +/* L80: */ + } + berr[j] = s; + +/* Test stopping criterion. Continue iterating if */ +/* 1) The residual BERR(J) is larger than machine epsilon, and */ +/* 2) BERR(J) decreased by at least a factor of 2 during the */ +/* last iteration, and */ +/* 3) At most ITMAX iterations tried. */ + + if (berr[j] > eps && berr[j] * 2. <= lstres && count <= 5) { + +/* Update solution and try again. */ + + _starpu_dpbtrs_(uplo, n, kd, &c__1, &afb[afb_offset], ldafb, &work[*n + 1] +, n, info); + _starpu_daxpy_(n, &c_b14, &work[*n + 1], &c__1, &x[j * x_dim1 + 1], &c__1) + ; + lstres = berr[j]; + ++count; + goto L20; + } + +/* Bound error from formula */ + +/* norm(X - XTRUE) / norm(X) .le. FERR = */ +/* norm( abs(inv(A))* */ +/* ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X) */ + +/* where */ +/* norm(Z) is the magnitude of the largest component of Z */ +/* inv(A) is the inverse of A */ +/* abs(Z) is the componentwise absolute value of the matrix or */ +/* vector Z */ +/* NZ is the maximum number of nonzeros in any row of A, plus 1 */ +/* EPS is machine epsilon */ + +/* The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B)) */ +/* is incremented by SAFE1 if the i-th component of */ +/* abs(A)*abs(X) + abs(B) is less than SAFE2. */ + +/* Use DLACN2 to estimate the infinity-norm of the matrix */ +/* inv(A) * diag(W), */ +/* where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) */ + + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + if (work[i__] > safe2) { + work[i__] = (d__1 = work[*n + i__], abs(d__1)) + nz * eps * + work[i__]; + } else { + work[i__] = (d__1 = work[*n + i__], abs(d__1)) + nz * eps * + work[i__] + safe1; + } +/* L90: */ + } + + kase = 0; +L100: + _starpu_dlacn2_(n, &work[(*n << 1) + 1], &work[*n + 1], &iwork[1], &ferr[j], & + kase, isave); + if (kase != 0) { + if (kase == 1) { + +/* Multiply by diag(W)*inv(A'). */ + + _starpu_dpbtrs_(uplo, n, kd, &c__1, &afb[afb_offset], ldafb, &work[*n + + 1], n, info); + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + work[*n + i__] *= work[i__]; +/* L110: */ + } + } else if (kase == 2) { + +/* Multiply by inv(A)*diag(W). */ + + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + work[*n + i__] *= work[i__]; +/* L120: */ + } + _starpu_dpbtrs_(uplo, n, kd, &c__1, &afb[afb_offset], ldafb, &work[*n + + 1], n, info); + } + goto L100; + } + +/* Normalize error. */ + + lstres = 0.; + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { +/* Computing MAX */ + d__2 = lstres, d__3 = (d__1 = x[i__ + j * x_dim1], abs(d__1)); + lstres = max(d__2,d__3); +/* L130: */ + } + if (lstres != 0.) { + ferr[j] /= lstres; + } + +/* L140: */ + } + + return 0; + +/* End of DPBRFS */ + +} /* _starpu_dpbrfs_ */ diff --git a/min-dgels/base/SRC/dpbstf.c b/min-dgels/base/SRC/dpbstf.c new file mode 100644 index 0000000..5840f50 --- /dev/null +++ b/min-dgels/base/SRC/dpbstf.c @@ -0,0 +1,312 @@ +/* dpbstf.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static doublereal c_b9 = -1.; + +/* Subroutine */ int _starpu_dpbstf_(char *uplo, integer *n, integer *kd, doublereal * + ab, integer *ldab, integer *info) +{ + /* System generated locals */ + integer ab_dim1, ab_offset, i__1, i__2, i__3; + doublereal d__1; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + integer j, m, km; + doublereal ajj; + integer kld; + extern /* Subroutine */ int _starpu_dsyr_(char *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *), _starpu_dscal_( + integer *, doublereal *, doublereal *, integer *); + extern logical _starpu_lsame_(char *, char *); + logical upper; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DPBSTF computes a split Cholesky factorization of a real */ +/* symmetric positive definite band matrix A. */ + +/* This routine is designed to be used in conjunction with DSBGST. */ + +/* The factorization has the form A = S**T*S where S is a band matrix */ +/* of the same bandwidth as A and the following structure: */ + +/* S = ( U ) */ +/* ( M L ) */ + +/* where U is upper triangular of order m = (n+kd)/2, and L is lower */ +/* triangular of order n-m. */ + +/* Arguments */ +/* ========= */ + +/* UPLO (input) CHARACTER*1 */ +/* = 'U': Upper triangle of A is stored; */ +/* = 'L': Lower triangle of A is stored. */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* KD (input) INTEGER */ +/* The number of superdiagonals of the matrix A if UPLO = 'U', */ +/* or the number of subdiagonals if UPLO = 'L'. KD >= 0. */ + +/* AB (input/output) DOUBLE PRECISION array, dimension (LDAB,N) */ +/* On entry, the upper or lower triangle of the symmetric band */ +/* matrix A, stored in the first kd+1 rows of the array. The */ +/* j-th column of A is stored in the j-th column of the array AB */ +/* as follows: */ +/* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; */ +/* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). */ + +/* On exit, if INFO = 0, the factor S from the split Cholesky */ +/* factorization A = S**T*S. See Further Details. */ + +/* LDAB (input) INTEGER */ +/* The leading dimension of the array AB. LDAB >= KD+1. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > 0: if INFO = i, the factorization could not be completed, */ +/* because the updated element a(i,i) was negative; the */ +/* matrix A is not positive definite. */ + +/* Further Details */ +/* =============== */ + +/* The band storage scheme is illustrated by the following example, when */ +/* N = 7, KD = 2: */ + +/* S = ( s11 s12 s13 ) */ +/* ( s22 s23 s24 ) */ +/* ( s33 s34 ) */ +/* ( s44 ) */ +/* ( s53 s54 s55 ) */ +/* ( s64 s65 s66 ) */ +/* ( s75 s76 s77 ) */ + +/* If UPLO = 'U', the array AB holds: */ + +/* on entry: on exit: */ + +/* * * a13 a24 a35 a46 a57 * * s13 s24 s53 s64 s75 */ +/* * a12 a23 a34 a45 a56 a67 * s12 s23 s34 s54 s65 s76 */ +/* a11 a22 a33 a44 a55 a66 a77 s11 s22 s33 s44 s55 s66 s77 */ + +/* If UPLO = 'L', the array AB holds: */ + +/* on entry: on exit: */ + +/* a11 a22 a33 a44 a55 a66 a77 s11 s22 s33 s44 s55 s66 s77 */ +/* a21 a32 a43 a54 a65 a76 * s12 s23 s34 s54 s65 s76 * */ +/* a31 a42 a53 a64 a64 * * s13 s24 s53 s64 s75 * * */ + +/* Array elements marked * are not used by the routine. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + ab_dim1 = *ldab; + ab_offset = 1 + ab_dim1; + ab -= ab_offset; + + /* Function Body */ + *info = 0; + upper = _starpu_lsame_(uplo, "U"); + if (! upper && ! _starpu_lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*kd < 0) { + *info = -3; + } else if (*ldab < *kd + 1) { + *info = -5; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DPBSTF", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Computing MAX */ + i__1 = 1, i__2 = *ldab - 1; + kld = max(i__1,i__2); + +/* Set the splitting point m. */ + + m = (*n + *kd) / 2; + + if (upper) { + +/* Factorize A(m+1:n,m+1:n) as L**T*L, and update A(1:m,1:m). */ + + i__1 = m + 1; + for (j = *n; j >= i__1; --j) { + +/* Compute s(j,j) and test for non-positive-definiteness. */ + + ajj = ab[*kd + 1 + j * ab_dim1]; + if (ajj <= 0.) { + goto L50; + } + ajj = sqrt(ajj); + ab[*kd + 1 + j * ab_dim1] = ajj; +/* Computing MIN */ + i__2 = j - 1; + km = min(i__2,*kd); + +/* Compute elements j-km:j-1 of the j-th column and update the */ +/* the leading submatrix within the band. */ + + d__1 = 1. / ajj; + _starpu_dscal_(&km, &d__1, &ab[*kd + 1 - km + j * ab_dim1], &c__1); + _starpu_dsyr_("Upper", &km, &c_b9, &ab[*kd + 1 - km + j * ab_dim1], &c__1, + &ab[*kd + 1 + (j - km) * ab_dim1], &kld); +/* L10: */ + } + +/* Factorize the updated submatrix A(1:m,1:m) as U**T*U. */ + + i__1 = m; + for (j = 1; j <= i__1; ++j) { + +/* Compute s(j,j) and test for non-positive-definiteness. */ + + ajj = ab[*kd + 1 + j * ab_dim1]; + if (ajj <= 0.) { + goto L50; + } + ajj = sqrt(ajj); + ab[*kd + 1 + j * ab_dim1] = ajj; +/* Computing MIN */ + i__2 = *kd, i__3 = m - j; + km = min(i__2,i__3); + +/* Compute elements j+1:j+km of the j-th row and update the */ +/* trailing submatrix within the band. */ + + if (km > 0) { + d__1 = 1. / ajj; + _starpu_dscal_(&km, &d__1, &ab[*kd + (j + 1) * ab_dim1], &kld); + _starpu_dsyr_("Upper", &km, &c_b9, &ab[*kd + (j + 1) * ab_dim1], &kld, + &ab[*kd + 1 + (j + 1) * ab_dim1], &kld); + } +/* L20: */ + } + } else { + +/* Factorize A(m+1:n,m+1:n) as L**T*L, and update A(1:m,1:m). */ + + i__1 = m + 1; + for (j = *n; j >= i__1; --j) { + +/* Compute s(j,j) and test for non-positive-definiteness. */ + + ajj = ab[j * ab_dim1 + 1]; + if (ajj <= 0.) { + goto L50; + } + ajj = sqrt(ajj); + ab[j * ab_dim1 + 1] = ajj; +/* Computing MIN */ + i__2 = j - 1; + km = min(i__2,*kd); + +/* Compute elements j-km:j-1 of the j-th row and update the */ +/* trailing submatrix within the band. */ + + d__1 = 1. / ajj; + _starpu_dscal_(&km, &d__1, &ab[km + 1 + (j - km) * ab_dim1], &kld); + _starpu_dsyr_("Lower", &km, &c_b9, &ab[km + 1 + (j - km) * ab_dim1], &kld, + &ab[(j - km) * ab_dim1 + 1], &kld); +/* L30: */ + } + +/* Factorize the updated submatrix A(1:m,1:m) as U**T*U. */ + + i__1 = m; + for (j = 1; j <= i__1; ++j) { + +/* Compute s(j,j) and test for non-positive-definiteness. */ + + ajj = ab[j * ab_dim1 + 1]; + if (ajj <= 0.) { + goto L50; + } + ajj = sqrt(ajj); + ab[j * ab_dim1 + 1] = ajj; +/* Computing MIN */ + i__2 = *kd, i__3 = m - j; + km = min(i__2,i__3); + +/* Compute elements j+1:j+km of the j-th column and update the */ +/* trailing submatrix within the band. */ + + if (km > 0) { + d__1 = 1. / ajj; + _starpu_dscal_(&km, &d__1, &ab[j * ab_dim1 + 2], &c__1); + _starpu_dsyr_("Lower", &km, &c_b9, &ab[j * ab_dim1 + 2], &c__1, &ab[( + j + 1) * ab_dim1 + 1], &kld); + } +/* L40: */ + } + } + return 0; + +L50: + *info = j; + return 0; + +/* End of DPBSTF */ + +} /* _starpu_dpbstf_ */ diff --git a/min-dgels/base/SRC/dpbsv.c b/min-dgels/base/SRC/dpbsv.c new file mode 100644 index 0000000..4a95edc --- /dev/null +++ b/min-dgels/base/SRC/dpbsv.c @@ -0,0 +1,182 @@ +/* dpbsv.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dpbsv_(char *uplo, integer *n, integer *kd, integer * + nrhs, doublereal *ab, integer *ldab, doublereal *b, integer *ldb, + integer *info) +{ + /* System generated locals */ + integer ab_dim1, ab_offset, b_dim1, b_offset, i__1; + + /* Local variables */ + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *), _starpu_dpbtrf_( + char *, integer *, integer *, doublereal *, integer *, integer *), _starpu_dpbtrs_(char *, integer *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *, integer *); + + +/* -- LAPACK driver routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DPBSV computes the solution to a real system of linear equations */ +/* A * X = B, */ +/* where A is an N-by-N symmetric positive definite band matrix and X */ +/* and B are N-by-NRHS matrices. */ + +/* The Cholesky decomposition is used to factor A as */ +/* A = U**T * U, if UPLO = 'U', or */ +/* A = L * L**T, if UPLO = 'L', */ +/* where U is an upper triangular band matrix, and L is a lower */ +/* triangular band matrix, with the same number of superdiagonals or */ +/* subdiagonals as A. The factored form of A is then used to solve the */ +/* system of equations A * X = B. */ + +/* Arguments */ +/* ========= */ + +/* UPLO (input) CHARACTER*1 */ +/* = 'U': Upper triangle of A is stored; */ +/* = 'L': Lower triangle of A is stored. */ + +/* N (input) INTEGER */ +/* The number of linear equations, i.e., the order of the */ +/* matrix A. N >= 0. */ + +/* KD (input) INTEGER */ +/* The number of superdiagonals of the matrix A if UPLO = 'U', */ +/* or the number of subdiagonals if UPLO = 'L'. KD >= 0. */ + +/* NRHS (input) INTEGER */ +/* The number of right hand sides, i.e., the number of columns */ +/* of the matrix B. NRHS >= 0. */ + +/* AB (input/output) DOUBLE PRECISION array, dimension (LDAB,N) */ +/* On entry, the upper or lower triangle of the symmetric band */ +/* matrix A, stored in the first KD+1 rows of the array. The */ +/* j-th column of A is stored in the j-th column of the array AB */ +/* as follows: */ +/* if UPLO = 'U', AB(KD+1+i-j,j) = A(i,j) for max(1,j-KD)<=i<=j; */ +/* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(N,j+KD). */ +/* See below for further details. */ + +/* On exit, if INFO = 0, the triangular factor U or L from the */ +/* Cholesky factorization A = U**T*U or A = L*L**T of the band */ +/* matrix A, in the same storage format as A. */ + +/* LDAB (input) INTEGER */ +/* The leading dimension of the array AB. LDAB >= KD+1. */ + +/* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* On entry, the N-by-NRHS right hand side matrix B. */ +/* On exit, if INFO = 0, the N-by-NRHS solution matrix X. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the array B. LDB >= max(1,N). */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > 0: if INFO = i, the leading minor of order i of A is not */ +/* positive definite, so the factorization could not be */ +/* completed, and the solution has not been computed. */ + +/* Further Details */ +/* =============== */ + +/* The band storage scheme is illustrated by the following example, when */ +/* N = 6, KD = 2, and UPLO = 'U': */ + +/* On entry: On exit: */ + +/* * * a13 a24 a35 a46 * * u13 u24 u35 u46 */ +/* * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 */ +/* a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 */ + +/* Similarly, if UPLO = 'L' the format of A is as follows: */ + +/* On entry: On exit: */ + +/* a11 a22 a33 a44 a55 a66 l11 l22 l33 l44 l55 l66 */ +/* a21 a32 a43 a54 a65 * l21 l32 l43 l54 l65 * */ +/* a31 a42 a53 a64 * * l31 l42 l53 l64 * * */ + +/* Array elements marked * are not used by the routine. */ + +/* ===================================================================== */ + +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + ab_dim1 = *ldab; + ab_offset = 1 + ab_dim1; + ab -= ab_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + *info = 0; + if (! _starpu_lsame_(uplo, "U") && ! _starpu_lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*kd < 0) { + *info = -3; + } else if (*nrhs < 0) { + *info = -4; + } else if (*ldab < *kd + 1) { + *info = -6; + } else if (*ldb < max(1,*n)) { + *info = -8; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DPBSV ", &i__1); + return 0; + } + +/* Compute the Cholesky factorization A = U'*U or A = L*L'. */ + + _starpu_dpbtrf_(uplo, n, kd, &ab[ab_offset], ldab, info); + if (*info == 0) { + +/* Solve the system A*X = B, overwriting B with X. */ + + _starpu_dpbtrs_(uplo, n, kd, nrhs, &ab[ab_offset], ldab, &b[b_offset], ldb, + info); + + } + return 0; + +/* End of DPBSV */ + +} /* _starpu_dpbsv_ */ diff --git a/min-dgels/base/SRC/dpbsvx.c b/min-dgels/base/SRC/dpbsvx.c new file mode 100644 index 0000000..0bb575b --- /dev/null +++ b/min-dgels/base/SRC/dpbsvx.c @@ -0,0 +1,515 @@ +/* dpbsvx.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +/* Subroutine */ int _starpu_dpbsvx_(char *fact, char *uplo, integer *n, integer *kd, + integer *nrhs, doublereal *ab, integer *ldab, doublereal *afb, + integer *ldafb, char *equed, doublereal *s, doublereal *b, integer * + ldb, doublereal *x, integer *ldx, doublereal *rcond, doublereal *ferr, + doublereal *berr, doublereal *work, integer *iwork, integer *info) +{ + /* System generated locals */ + integer ab_dim1, ab_offset, afb_dim1, afb_offset, b_dim1, b_offset, + x_dim1, x_offset, i__1, i__2; + doublereal d__1, d__2; + + /* Local variables */ + integer i__, j, j1, j2; + doublereal amax, smin, smax; + extern logical _starpu_lsame_(char *, char *); + doublereal scond, anorm; + extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *); + logical equil, rcequ, upper; + extern doublereal _starpu_dlamch_(char *), _starpu_dlansb_(char *, char *, + integer *, integer *, doublereal *, integer *, doublereal *); + extern /* Subroutine */ int _starpu_dpbcon_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, doublereal *, + integer *, integer *), _starpu_dlaqsb_(char *, integer *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + doublereal *, char *); + logical nofact; + extern /* Subroutine */ int _starpu_dlacpy_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *), + _starpu_xerbla_(char *, integer *), _starpu_dpbequ_(char *, integer *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + doublereal *, integer *); + doublereal bignum; + extern /* Subroutine */ int _starpu_dpbrfs_(char *, integer *, integer *, integer + *, doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + doublereal *, integer *, integer *), _starpu_dpbtrf_(char *, + integer *, integer *, doublereal *, integer *, integer *); + integer infequ; + extern /* Subroutine */ int _starpu_dpbtrs_(char *, integer *, integer *, integer + *, doublereal *, integer *, doublereal *, integer *, integer *); + doublereal smlnum; + + +/* -- LAPACK driver routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DPBSVX uses the Cholesky factorization A = U**T*U or A = L*L**T to */ +/* compute the solution to a real system of linear equations */ +/* A * X = B, */ +/* where A is an N-by-N symmetric positive definite band matrix and X */ +/* and B are N-by-NRHS matrices. */ + +/* Error bounds on the solution and a condition estimate are also */ +/* provided. */ + +/* Description */ +/* =========== */ + +/* The following steps are performed: */ + +/* 1. If FACT = 'E', real scaling factors are computed to equilibrate */ +/* the system: */ +/* diag(S) * A * diag(S) * inv(diag(S)) * X = diag(S) * B */ +/* Whether or not the system will be equilibrated depends on the */ +/* scaling of the matrix A, but if equilibration is used, A is */ +/* overwritten by diag(S)*A*diag(S) and B by diag(S)*B. */ + +/* 2. If FACT = 'N' or 'E', the Cholesky decomposition is used to */ +/* factor the matrix A (after equilibration if FACT = 'E') as */ +/* A = U**T * U, if UPLO = 'U', or */ +/* A = L * L**T, if UPLO = 'L', */ +/* where U is an upper triangular band matrix, and L is a lower */ +/* triangular band matrix. */ + +/* 3. If the leading i-by-i principal minor is not positive definite, */ +/* then the routine returns with INFO = i. Otherwise, the factored */ +/* form of A is used to estimate the condition number of the matrix */ +/* A. If the reciprocal of the condition number is less than machine */ +/* precision, INFO = N+1 is returned as a warning, but the routine */ +/* still goes on to solve for X and compute error bounds as */ +/* described below. */ + +/* 4. The system of equations is solved for X using the factored form */ +/* of A. */ + +/* 5. Iterative refinement is applied to improve the computed solution */ +/* matrix and calculate error bounds and backward error estimates */ +/* for it. */ + +/* 6. If equilibration was used, the matrix X is premultiplied by */ +/* diag(S) so that it solves the original system before */ +/* equilibration. */ + +/* Arguments */ +/* ========= */ + +/* FACT (input) CHARACTER*1 */ +/* Specifies whether or not the factored form of the matrix A is */ +/* supplied on entry, and if not, whether the matrix A should be */ +/* equilibrated before it is factored. */ +/* = 'F': On entry, AFB contains the factored form of A. */ +/* If EQUED = 'Y', the matrix A has been equilibrated */ +/* with scaling factors given by S. AB and AFB will not */ +/* be modified. */ +/* = 'N': The matrix A will be copied to AFB and factored. */ +/* = 'E': The matrix A will be equilibrated if necessary, then */ +/* copied to AFB and factored. */ + +/* UPLO (input) CHARACTER*1 */ +/* = 'U': Upper triangle of A is stored; */ +/* = 'L': Lower triangle of A is stored. */ + +/* N (input) INTEGER */ +/* The number of linear equations, i.e., the order of the */ +/* matrix A. N >= 0. */ + +/* KD (input) INTEGER */ +/* The number of superdiagonals of the matrix A if UPLO = 'U', */ +/* or the number of subdiagonals if UPLO = 'L'. KD >= 0. */ + +/* NRHS (input) INTEGER */ +/* The number of right-hand sides, i.e., the number of columns */ +/* of the matrices B and X. NRHS >= 0. */ + +/* AB (input/output) DOUBLE PRECISION array, dimension (LDAB,N) */ +/* On entry, the upper or lower triangle of the symmetric band */ +/* matrix A, stored in the first KD+1 rows of the array, except */ +/* if FACT = 'F' and EQUED = 'Y', then A must contain the */ +/* equilibrated matrix diag(S)*A*diag(S). The j-th column of A */ +/* is stored in the j-th column of the array AB as follows: */ +/* if UPLO = 'U', AB(KD+1+i-j,j) = A(i,j) for max(1,j-KD)<=i<=j; */ +/* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(N,j+KD). */ +/* See below for further details. */ + +/* On exit, if FACT = 'E' and EQUED = 'Y', A is overwritten by */ +/* diag(S)*A*diag(S). */ + +/* LDAB (input) INTEGER */ +/* The leading dimension of the array A. LDAB >= KD+1. */ + +/* AFB (input or output) DOUBLE PRECISION array, dimension (LDAFB,N) */ +/* If FACT = 'F', then AFB is an input argument and on entry */ +/* contains the triangular factor U or L from the Cholesky */ +/* factorization A = U**T*U or A = L*L**T of the band matrix */ +/* A, in the same storage format as A (see AB). If EQUED = 'Y', */ +/* then AFB is the factored form of the equilibrated matrix A. */ + +/* If FACT = 'N', then AFB is an output argument and on exit */ +/* returns the triangular factor U or L from the Cholesky */ +/* factorization A = U**T*U or A = L*L**T. */ + +/* If FACT = 'E', then AFB is an output argument and on exit */ +/* returns the triangular factor U or L from the Cholesky */ +/* factorization A = U**T*U or A = L*L**T of the equilibrated */ +/* matrix A (see the description of A for the form of the */ +/* equilibrated matrix). */ + +/* LDAFB (input) INTEGER */ +/* The leading dimension of the array AFB. LDAFB >= KD+1. */ + +/* EQUED (input or output) CHARACTER*1 */ +/* Specifies the form of equilibration that was done. */ +/* = 'N': No equilibration (always true if FACT = 'N'). */ +/* = 'Y': Equilibration was done, i.e., A has been replaced by */ +/* diag(S) * A * diag(S). */ +/* EQUED is an input argument if FACT = 'F'; otherwise, it is an */ +/* output argument. */ + +/* S (input or output) DOUBLE PRECISION array, dimension (N) */ +/* The scale factors for A; not accessed if EQUED = 'N'. S is */ +/* an input argument if FACT = 'F'; otherwise, S is an output */ +/* argument. If FACT = 'F' and EQUED = 'Y', each element of S */ +/* must be positive. */ + +/* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* On entry, the N-by-NRHS right hand side matrix B. */ +/* On exit, if EQUED = 'N', B is not modified; if EQUED = 'Y', */ +/* B is overwritten by diag(S) * B. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the array B. LDB >= max(1,N). */ + +/* X (output) DOUBLE PRECISION array, dimension (LDX,NRHS) */ +/* If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X to */ +/* the original system of equations. Note that if EQUED = 'Y', */ +/* A and B are modified on exit, and the solution to the */ +/* equilibrated system is inv(diag(S))*X. */ + +/* LDX (input) INTEGER */ +/* The leading dimension of the array X. LDX >= max(1,N). */ + +/* RCOND (output) DOUBLE PRECISION */ +/* The estimate of the reciprocal condition number of the matrix */ +/* A after equilibration (if done). If RCOND is less than the */ +/* machine precision (in particular, if RCOND = 0), the matrix */ +/* is singular to working precision. This condition is */ +/* indicated by a return code of INFO > 0. */ + +/* FERR (output) DOUBLE PRECISION array, dimension (NRHS) */ +/* The estimated forward error bound for each solution vector */ +/* X(j) (the j-th column of the solution matrix X). */ +/* If XTRUE is the true solution corresponding to X(j), FERR(j) */ +/* is an estimated upper bound for the magnitude of the largest */ +/* element in (X(j) - XTRUE) divided by the magnitude of the */ +/* largest element in X(j). The estimate is as reliable as */ +/* the estimate for RCOND, and is almost always a slight */ +/* overestimate of the true error. */ + +/* BERR (output) DOUBLE PRECISION array, dimension (NRHS) */ +/* The componentwise relative backward error of each solution */ +/* vector X(j) (i.e., the smallest relative change in */ +/* any element of A or B that makes X(j) an exact solution). */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (3*N) */ + +/* IWORK (workspace) INTEGER array, dimension (N) */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > 0: if INFO = i, and i is */ +/* <= N: the leading minor of order i of A is */ +/* not positive definite, so the factorization */ +/* could not be completed, and the solution has not */ +/* been computed. RCOND = 0 is returned. */ +/* = N+1: U is nonsingular, but RCOND is less than machine */ +/* precision, meaning that the matrix is singular */ +/* to working precision. Nevertheless, the */ +/* solution and error bounds are computed because */ +/* there are a number of situations where the */ +/* computed solution can be more accurate than the */ +/* value of RCOND would suggest. */ + +/* Further Details */ +/* =============== */ + +/* The band storage scheme is illustrated by the following example, when */ +/* N = 6, KD = 2, and UPLO = 'U': */ + +/* Two-dimensional storage of the symmetric matrix A: */ + +/* a11 a12 a13 */ +/* a22 a23 a24 */ +/* a33 a34 a35 */ +/* a44 a45 a46 */ +/* a55 a56 */ +/* (aij=conjg(aji)) a66 */ + +/* Band storage of the upper triangle of A: */ + +/* * * a13 a24 a35 a46 */ +/* * a12 a23 a34 a45 a56 */ +/* a11 a22 a33 a44 a55 a66 */ + +/* Similarly, if UPLO = 'L' the format of A is as follows: */ + +/* a11 a22 a33 a44 a55 a66 */ +/* a21 a32 a43 a54 a65 * */ +/* a31 a42 a53 a64 * * */ + +/* Array elements marked * are not used by the routine. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + ab_dim1 = *ldab; + ab_offset = 1 + ab_dim1; + ab -= ab_offset; + afb_dim1 = *ldafb; + afb_offset = 1 + afb_dim1; + afb -= afb_offset; + --s; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + x_dim1 = *ldx; + x_offset = 1 + x_dim1; + x -= x_offset; + --ferr; + --berr; + --work; + --iwork; + + /* Function Body */ + *info = 0; + nofact = _starpu_lsame_(fact, "N"); + equil = _starpu_lsame_(fact, "E"); + upper = _starpu_lsame_(uplo, "U"); + if (nofact || equil) { + *(unsigned char *)equed = 'N'; + rcequ = FALSE_; + } else { + rcequ = _starpu_lsame_(equed, "Y"); + smlnum = _starpu_dlamch_("Safe minimum"); + bignum = 1. / smlnum; + } + +/* Test the input parameters. */ + + if (! nofact && ! equil && ! _starpu_lsame_(fact, "F")) { + *info = -1; + } else if (! upper && ! _starpu_lsame_(uplo, "L")) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*kd < 0) { + *info = -4; + } else if (*nrhs < 0) { + *info = -5; + } else if (*ldab < *kd + 1) { + *info = -7; + } else if (*ldafb < *kd + 1) { + *info = -9; + } else if (_starpu_lsame_(fact, "F") && ! (rcequ || _starpu_lsame_( + equed, "N"))) { + *info = -10; + } else { + if (rcequ) { + smin = bignum; + smax = 0.; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + d__1 = smin, d__2 = s[j]; + smin = min(d__1,d__2); +/* Computing MAX */ + d__1 = smax, d__2 = s[j]; + smax = max(d__1,d__2); +/* L10: */ + } + if (smin <= 0.) { + *info = -11; + } else if (*n > 0) { + scond = max(smin,smlnum) / min(smax,bignum); + } else { + scond = 1.; + } + } + if (*info == 0) { + if (*ldb < max(1,*n)) { + *info = -13; + } else if (*ldx < max(1,*n)) { + *info = -15; + } + } + } + + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DPBSVX", &i__1); + return 0; + } + + if (equil) { + +/* Compute row and column scalings to equilibrate the matrix A. */ + + _starpu_dpbequ_(uplo, n, kd, &ab[ab_offset], ldab, &s[1], &scond, &amax, & + infequ); + if (infequ == 0) { + +/* Equilibrate the matrix. */ + + _starpu_dlaqsb_(uplo, n, kd, &ab[ab_offset], ldab, &s[1], &scond, &amax, + equed); + rcequ = _starpu_lsame_(equed, "Y"); + } + } + +/* Scale the right-hand side. */ + + if (rcequ) { + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = s[i__] * b[i__ + j * b_dim1]; +/* L20: */ + } +/* L30: */ + } + } + + if (nofact || equil) { + +/* Compute the Cholesky factorization A = U'*U or A = L*L'. */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MAX */ + i__2 = j - *kd; + j1 = max(i__2,1); + i__2 = j - j1 + 1; + _starpu_dcopy_(&i__2, &ab[*kd + 1 - j + j1 + j * ab_dim1], &c__1, & + afb[*kd + 1 - j + j1 + j * afb_dim1], &c__1); +/* L40: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + i__2 = j + *kd; + j2 = min(i__2,*n); + i__2 = j2 - j + 1; + _starpu_dcopy_(&i__2, &ab[j * ab_dim1 + 1], &c__1, &afb[j * afb_dim1 + + 1], &c__1); +/* L50: */ + } + } + + _starpu_dpbtrf_(uplo, n, kd, &afb[afb_offset], ldafb, info); + +/* Return if INFO is non-zero. */ + + if (*info > 0) { + *rcond = 0.; + return 0; + } + } + +/* Compute the norm of the matrix A. */ + + anorm = _starpu_dlansb_("1", uplo, n, kd, &ab[ab_offset], ldab, &work[1]); + +/* Compute the reciprocal of the condition number of A. */ + + _starpu_dpbcon_(uplo, n, kd, &afb[afb_offset], ldafb, &anorm, rcond, &work[1], & + iwork[1], info); + +/* Compute the solution matrix X. */ + + _starpu_dlacpy_("Full", n, nrhs, &b[b_offset], ldb, &x[x_offset], ldx); + _starpu_dpbtrs_(uplo, n, kd, nrhs, &afb[afb_offset], ldafb, &x[x_offset], ldx, + info); + +/* Use iterative refinement to improve the computed solution and */ +/* compute error bounds and backward error estimates for it. */ + + _starpu_dpbrfs_(uplo, n, kd, nrhs, &ab[ab_offset], ldab, &afb[afb_offset], ldafb, + &b[b_offset], ldb, &x[x_offset], ldx, &ferr[1], &berr[1], &work[1] +, &iwork[1], info); + +/* Transform the solution matrix X to a solution of the original */ +/* system. */ + + if (rcequ) { + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + x[i__ + j * x_dim1] = s[i__] * x[i__ + j * x_dim1]; +/* L60: */ + } +/* L70: */ + } + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + ferr[j] /= scond; +/* L80: */ + } + } + +/* Set INFO = N+1 if the matrix is singular to working precision. */ + + if (*rcond < _starpu_dlamch_("Epsilon")) { + *info = *n + 1; + } + + return 0; + +/* End of DPBSVX */ + +} /* _starpu_dpbsvx_ */ diff --git a/min-dgels/base/SRC/dpbtf2.c b/min-dgels/base/SRC/dpbtf2.c new file mode 100644 index 0000000..70d9005 --- /dev/null +++ b/min-dgels/base/SRC/dpbtf2.c @@ -0,0 +1,244 @@ +/* dpbtf2.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static doublereal c_b8 = -1.; +static integer c__1 = 1; + +/* Subroutine */ int _starpu_dpbtf2_(char *uplo, integer *n, integer *kd, doublereal * + ab, integer *ldab, integer *info) +{ + /* System generated locals */ + integer ab_dim1, ab_offset, i__1, i__2, i__3; + doublereal d__1; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + integer j, kn; + doublereal ajj; + integer kld; + extern /* Subroutine */ int _starpu_dsyr_(char *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *), _starpu_dscal_( + integer *, doublereal *, doublereal *, integer *); + extern logical _starpu_lsame_(char *, char *); + logical upper; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DPBTF2 computes the Cholesky factorization of a real symmetric */ +/* positive definite band matrix A. */ + +/* The factorization has the form */ +/* A = U' * U , if UPLO = 'U', or */ +/* A = L * L', if UPLO = 'L', */ +/* where U is an upper triangular matrix, U' is the transpose of U, and */ +/* L is lower triangular. */ + +/* This is the unblocked version of the algorithm, calling Level 2 BLAS. */ + +/* Arguments */ +/* ========= */ + +/* UPLO (input) CHARACTER*1 */ +/* Specifies whether the upper or lower triangular part of the */ +/* symmetric matrix A is stored: */ +/* = 'U': Upper triangular */ +/* = 'L': Lower triangular */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* KD (input) INTEGER */ +/* The number of super-diagonals of the matrix A if UPLO = 'U', */ +/* or the number of sub-diagonals if UPLO = 'L'. KD >= 0. */ + +/* AB (input/output) DOUBLE PRECISION array, dimension (LDAB,N) */ +/* On entry, the upper or lower triangle of the symmetric band */ +/* matrix A, stored in the first KD+1 rows of the array. The */ +/* j-th column of A is stored in the j-th column of the array AB */ +/* as follows: */ +/* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; */ +/* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). */ + +/* On exit, if INFO = 0, the triangular factor U or L from the */ +/* Cholesky factorization A = U'*U or A = L*L' of the band */ +/* matrix A, in the same storage format as A. */ + +/* LDAB (input) INTEGER */ +/* The leading dimension of the array AB. LDAB >= KD+1. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -k, the k-th argument had an illegal value */ +/* > 0: if INFO = k, the leading minor of order k is not */ +/* positive definite, and the factorization could not be */ +/* completed. */ + +/* Further Details */ +/* =============== */ + +/* The band storage scheme is illustrated by the following example, when */ +/* N = 6, KD = 2, and UPLO = 'U': */ + +/* On entry: On exit: */ + +/* * * a13 a24 a35 a46 * * u13 u24 u35 u46 */ +/* * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 */ +/* a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 */ + +/* Similarly, if UPLO = 'L' the format of A is as follows: */ + +/* On entry: On exit: */ + +/* a11 a22 a33 a44 a55 a66 l11 l22 l33 l44 l55 l66 */ +/* a21 a32 a43 a54 a65 * l21 l32 l43 l54 l65 * */ +/* a31 a42 a53 a64 * * l31 l42 l53 l64 * * */ + +/* Array elements marked * are not used by the routine. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + ab_dim1 = *ldab; + ab_offset = 1 + ab_dim1; + ab -= ab_offset; + + /* Function Body */ + *info = 0; + upper = _starpu_lsame_(uplo, "U"); + if (! upper && ! _starpu_lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*kd < 0) { + *info = -3; + } else if (*ldab < *kd + 1) { + *info = -5; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DPBTF2", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Computing MAX */ + i__1 = 1, i__2 = *ldab - 1; + kld = max(i__1,i__2); + + if (upper) { + +/* Compute the Cholesky factorization A = U'*U. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + +/* Compute U(J,J) and test for non-positive-definiteness. */ + + ajj = ab[*kd + 1 + j * ab_dim1]; + if (ajj <= 0.) { + goto L30; + } + ajj = sqrt(ajj); + ab[*kd + 1 + j * ab_dim1] = ajj; + +/* Compute elements J+1:J+KN of row J and update the */ +/* trailing submatrix within the band. */ + +/* Computing MIN */ + i__2 = *kd, i__3 = *n - j; + kn = min(i__2,i__3); + if (kn > 0) { + d__1 = 1. / ajj; + _starpu_dscal_(&kn, &d__1, &ab[*kd + (j + 1) * ab_dim1], &kld); + _starpu_dsyr_("Upper", &kn, &c_b8, &ab[*kd + (j + 1) * ab_dim1], &kld, + &ab[*kd + 1 + (j + 1) * ab_dim1], &kld); + } +/* L10: */ + } + } else { + +/* Compute the Cholesky factorization A = L*L'. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + +/* Compute L(J,J) and test for non-positive-definiteness. */ + + ajj = ab[j * ab_dim1 + 1]; + if (ajj <= 0.) { + goto L30; + } + ajj = sqrt(ajj); + ab[j * ab_dim1 + 1] = ajj; + +/* Compute elements J+1:J+KN of column J and update the */ +/* trailing submatrix within the band. */ + +/* Computing MIN */ + i__2 = *kd, i__3 = *n - j; + kn = min(i__2,i__3); + if (kn > 0) { + d__1 = 1. / ajj; + _starpu_dscal_(&kn, &d__1, &ab[j * ab_dim1 + 2], &c__1); + _starpu_dsyr_("Lower", &kn, &c_b8, &ab[j * ab_dim1 + 2], &c__1, &ab[( + j + 1) * ab_dim1 + 1], &kld); + } +/* L20: */ + } + } + return 0; + +L30: + *info = j; + return 0; + +/* End of DPBTF2 */ + +} /* _starpu_dpbtf2_ */ diff --git a/min-dgels/base/SRC/dpbtrf.c b/min-dgels/base/SRC/dpbtrf.c new file mode 100644 index 0000000..795cb82 --- /dev/null +++ b/min-dgels/base/SRC/dpbtrf.c @@ -0,0 +1,471 @@ +/* dpbtrf.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; +static doublereal c_b18 = 1.; +static doublereal c_b21 = -1.; +static integer c__33 = 33; + +/* Subroutine */ int _starpu_dpbtrf_(char *uplo, integer *n, integer *kd, doublereal * + ab, integer *ldab, integer *info) +{ + /* System generated locals */ + integer ab_dim1, ab_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + integer i__, j, i2, i3, ib, nb, ii, jj; + doublereal work[1056] /* was [33][32] */; + extern /* Subroutine */ int _starpu_dgemm_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *); + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int _starpu_dtrsm_(char *, char *, char *, char *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *), _starpu_dsyrk_( + char *, char *, integer *, integer *, doublereal *, doublereal *, + integer *, doublereal *, doublereal *, integer *), + _starpu_dpbtf2_(char *, integer *, integer *, doublereal *, integer *, + integer *), _starpu_dpotf2_(char *, integer *, doublereal *, + integer *, integer *), _starpu_xerbla_(char *, integer *); + extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *); + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DPBTRF computes the Cholesky factorization of a real symmetric */ +/* positive definite band matrix A. */ + +/* The factorization has the form */ +/* A = U**T * U, if UPLO = 'U', or */ +/* A = L * L**T, if UPLO = 'L', */ +/* where U is an upper triangular matrix and L is lower triangular. */ + +/* Arguments */ +/* ========= */ + +/* UPLO (input) CHARACTER*1 */ +/* = 'U': Upper triangle of A is stored; */ +/* = 'L': Lower triangle of A is stored. */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* KD (input) INTEGER */ +/* The number of superdiagonals of the matrix A if UPLO = 'U', */ +/* or the number of subdiagonals if UPLO = 'L'. KD >= 0. */ + +/* AB (input/output) DOUBLE PRECISION array, dimension (LDAB,N) */ +/* On entry, the upper or lower triangle of the symmetric band */ +/* matrix A, stored in the first KD+1 rows of the array. The */ +/* j-th column of A is stored in the j-th column of the array AB */ +/* as follows: */ +/* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; */ +/* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). */ + +/* On exit, if INFO = 0, the triangular factor U or L from the */ +/* Cholesky factorization A = U**T*U or A = L*L**T of the band */ +/* matrix A, in the same storage format as A. */ + +/* LDAB (input) INTEGER */ +/* The leading dimension of the array AB. LDAB >= KD+1. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > 0: if INFO = i, the leading minor of order i is not */ +/* positive definite, and the factorization could not be */ +/* completed. */ + +/* Further Details */ +/* =============== */ + +/* The band storage scheme is illustrated by the following example, when */ +/* N = 6, KD = 2, and UPLO = 'U': */ + +/* On entry: On exit: */ + +/* * * a13 a24 a35 a46 * * u13 u24 u35 u46 */ +/* * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 */ +/* a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 */ + +/* Similarly, if UPLO = 'L' the format of A is as follows: */ + +/* On entry: On exit: */ + +/* a11 a22 a33 a44 a55 a66 l11 l22 l33 l44 l55 l66 */ +/* a21 a32 a43 a54 a65 * l21 l32 l43 l54 l65 * */ +/* a31 a42 a53 a64 * * l31 l42 l53 l64 * * */ + +/* Array elements marked * are not used by the routine. */ + +/* Contributed by */ +/* Peter Mayes and Giuseppe Radicati, IBM ECSEC, Rome, March 23, 1989 */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + ab_dim1 = *ldab; + ab_offset = 1 + ab_dim1; + ab -= ab_offset; + + /* Function Body */ + *info = 0; + if (! _starpu_lsame_(uplo, "U") && ! _starpu_lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*kd < 0) { + *info = -3; + } else if (*ldab < *kd + 1) { + *info = -5; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DPBTRF", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Determine the block size for this environment */ + + nb = _starpu_ilaenv_(&c__1, "DPBTRF", uplo, n, kd, &c_n1, &c_n1); + +/* The block size must not exceed the semi-bandwidth KD, and must not */ +/* exceed the limit set by the size of the local array WORK. */ + + nb = min(nb,32); + + if (nb <= 1 || nb > *kd) { + +/* Use unblocked code */ + + _starpu_dpbtf2_(uplo, n, kd, &ab[ab_offset], ldab, info); + } else { + +/* Use blocked code */ + + if (_starpu_lsame_(uplo, "U")) { + +/* Compute the Cholesky factorization of a symmetric band */ +/* matrix, given the upper triangle of the matrix in band */ +/* storage. */ + +/* Zero the upper triangle of the work array. */ + + i__1 = nb; + for (j = 1; j <= i__1; ++j) { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + work[i__ + j * 33 - 34] = 0.; +/* L10: */ + } +/* L20: */ + } + +/* Process the band matrix one diagonal block at a time. */ + + i__1 = *n; + i__2 = nb; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__3 = nb, i__4 = *n - i__ + 1; + ib = min(i__3,i__4); + +/* Factorize the diagonal block */ + + i__3 = *ldab - 1; + _starpu_dpotf2_(uplo, &ib, &ab[*kd + 1 + i__ * ab_dim1], &i__3, &ii); + if (ii != 0) { + *info = i__ + ii - 1; + goto L150; + } + if (i__ + ib <= *n) { + +/* Update the relevant part of the trailing submatrix. */ +/* If A11 denotes the diagonal block which has just been */ +/* factorized, then we need to update the remaining */ +/* blocks in the diagram: */ + +/* A11 A12 A13 */ +/* A22 A23 */ +/* A33 */ + +/* The numbers of rows and columns in the partitioning */ +/* are IB, I2, I3 respectively. The blocks A12, A22 and */ +/* A23 are empty if IB = KD. The upper triangle of A13 */ +/* lies outside the band. */ + +/* Computing MIN */ + i__3 = *kd - ib, i__4 = *n - i__ - ib + 1; + i2 = min(i__3,i__4); +/* Computing MIN */ + i__3 = ib, i__4 = *n - i__ - *kd + 1; + i3 = min(i__3,i__4); + + if (i2 > 0) { + +/* Update A12 */ + + i__3 = *ldab - 1; + i__4 = *ldab - 1; + _starpu_dtrsm_("Left", "Upper", "Transpose", "Non-unit", &ib, + &i2, &c_b18, &ab[*kd + 1 + i__ * ab_dim1], & + i__3, &ab[*kd + 1 - ib + (i__ + ib) * ab_dim1] +, &i__4); + +/* Update A22 */ + + i__3 = *ldab - 1; + i__4 = *ldab - 1; + _starpu_dsyrk_("Upper", "Transpose", &i2, &ib, &c_b21, &ab[* + kd + 1 - ib + (i__ + ib) * ab_dim1], &i__3, & + c_b18, &ab[*kd + 1 + (i__ + ib) * ab_dim1], & + i__4); + } + + if (i3 > 0) { + +/* Copy the lower triangle of A13 into the work array. */ + + i__3 = i3; + for (jj = 1; jj <= i__3; ++jj) { + i__4 = ib; + for (ii = jj; ii <= i__4; ++ii) { + work[ii + jj * 33 - 34] = ab[ii - jj + 1 + ( + jj + i__ + *kd - 1) * ab_dim1]; +/* L30: */ + } +/* L40: */ + } + +/* Update A13 (in the work array). */ + + i__3 = *ldab - 1; + _starpu_dtrsm_("Left", "Upper", "Transpose", "Non-unit", &ib, + &i3, &c_b18, &ab[*kd + 1 + i__ * ab_dim1], & + i__3, work, &c__33); + +/* Update A23 */ + + if (i2 > 0) { + i__3 = *ldab - 1; + i__4 = *ldab - 1; + _starpu_dgemm_("Transpose", "No Transpose", &i2, &i3, &ib, + &c_b21, &ab[*kd + 1 - ib + (i__ + ib) * + ab_dim1], &i__3, work, &c__33, &c_b18, & + ab[ib + 1 + (i__ + *kd) * ab_dim1], &i__4); + } + +/* Update A33 */ + + i__3 = *ldab - 1; + _starpu_dsyrk_("Upper", "Transpose", &i3, &ib, &c_b21, work, & + c__33, &c_b18, &ab[*kd + 1 + (i__ + *kd) * + ab_dim1], &i__3); + +/* Copy the lower triangle of A13 back into place. */ + + i__3 = i3; + for (jj = 1; jj <= i__3; ++jj) { + i__4 = ib; + for (ii = jj; ii <= i__4; ++ii) { + ab[ii - jj + 1 + (jj + i__ + *kd - 1) * + ab_dim1] = work[ii + jj * 33 - 34]; +/* L50: */ + } +/* L60: */ + } + } + } +/* L70: */ + } + } else { + +/* Compute the Cholesky factorization of a symmetric band */ +/* matrix, given the lower triangle of the matrix in band */ +/* storage. */ + +/* Zero the lower triangle of the work array. */ + + i__2 = nb; + for (j = 1; j <= i__2; ++j) { + i__1 = nb; + for (i__ = j + 1; i__ <= i__1; ++i__) { + work[i__ + j * 33 - 34] = 0.; +/* L80: */ + } +/* L90: */ + } + +/* Process the band matrix one diagonal block at a time. */ + + i__2 = *n; + i__1 = nb; + for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) { +/* Computing MIN */ + i__3 = nb, i__4 = *n - i__ + 1; + ib = min(i__3,i__4); + +/* Factorize the diagonal block */ + + i__3 = *ldab - 1; + _starpu_dpotf2_(uplo, &ib, &ab[i__ * ab_dim1 + 1], &i__3, &ii); + if (ii != 0) { + *info = i__ + ii - 1; + goto L150; + } + if (i__ + ib <= *n) { + +/* Update the relevant part of the trailing submatrix. */ +/* If A11 denotes the diagonal block which has just been */ +/* factorized, then we need to update the remaining */ +/* blocks in the diagram: */ + +/* A11 */ +/* A21 A22 */ +/* A31 A32 A33 */ + +/* The numbers of rows and columns in the partitioning */ +/* are IB, I2, I3 respectively. The blocks A21, A22 and */ +/* A32 are empty if IB = KD. The lower triangle of A31 */ +/* lies outside the band. */ + +/* Computing MIN */ + i__3 = *kd - ib, i__4 = *n - i__ - ib + 1; + i2 = min(i__3,i__4); +/* Computing MIN */ + i__3 = ib, i__4 = *n - i__ - *kd + 1; + i3 = min(i__3,i__4); + + if (i2 > 0) { + +/* Update A21 */ + + i__3 = *ldab - 1; + i__4 = *ldab - 1; + _starpu_dtrsm_("Right", "Lower", "Transpose", "Non-unit", &i2, + &ib, &c_b18, &ab[i__ * ab_dim1 + 1], &i__3, & + ab[ib + 1 + i__ * ab_dim1], &i__4); + +/* Update A22 */ + + i__3 = *ldab - 1; + i__4 = *ldab - 1; + _starpu_dsyrk_("Lower", "No Transpose", &i2, &ib, &c_b21, &ab[ + ib + 1 + i__ * ab_dim1], &i__3, &c_b18, &ab[( + i__ + ib) * ab_dim1 + 1], &i__4); + } + + if (i3 > 0) { + +/* Copy the upper triangle of A31 into the work array. */ + + i__3 = ib; + for (jj = 1; jj <= i__3; ++jj) { + i__4 = min(jj,i3); + for (ii = 1; ii <= i__4; ++ii) { + work[ii + jj * 33 - 34] = ab[*kd + 1 - jj + + ii + (jj + i__ - 1) * ab_dim1]; +/* L100: */ + } +/* L110: */ + } + +/* Update A31 (in the work array). */ + + i__3 = *ldab - 1; + _starpu_dtrsm_("Right", "Lower", "Transpose", "Non-unit", &i3, + &ib, &c_b18, &ab[i__ * ab_dim1 + 1], &i__3, + work, &c__33); + +/* Update A32 */ + + if (i2 > 0) { + i__3 = *ldab - 1; + i__4 = *ldab - 1; + _starpu_dgemm_("No transpose", "Transpose", &i3, &i2, &ib, + &c_b21, work, &c__33, &ab[ib + 1 + i__ * + ab_dim1], &i__3, &c_b18, &ab[*kd + 1 - ib + + (i__ + ib) * ab_dim1], &i__4); + } + +/* Update A33 */ + + i__3 = *ldab - 1; + _starpu_dsyrk_("Lower", "No Transpose", &i3, &ib, &c_b21, + work, &c__33, &c_b18, &ab[(i__ + *kd) * + ab_dim1 + 1], &i__3); + +/* Copy the upper triangle of A31 back into place. */ + + i__3 = ib; + for (jj = 1; jj <= i__3; ++jj) { + i__4 = min(jj,i3); + for (ii = 1; ii <= i__4; ++ii) { + ab[*kd + 1 - jj + ii + (jj + i__ - 1) * + ab_dim1] = work[ii + jj * 33 - 34]; +/* L120: */ + } +/* L130: */ + } + } + } +/* L140: */ + } + } + } + return 0; + +L150: + return 0; + +/* End of DPBTRF */ + +} /* _starpu_dpbtrf_ */ diff --git a/min-dgels/base/SRC/dpbtrs.c b/min-dgels/base/SRC/dpbtrs.c new file mode 100644 index 0000000..ad1a063 --- /dev/null +++ b/min-dgels/base/SRC/dpbtrs.c @@ -0,0 +1,184 @@ +/* dpbtrs.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +/* Subroutine */ int _starpu_dpbtrs_(char *uplo, integer *n, integer *kd, integer * + nrhs, doublereal *ab, integer *ldab, doublereal *b, integer *ldb, + integer *info) +{ + /* System generated locals */ + integer ab_dim1, ab_offset, b_dim1, b_offset, i__1; + + /* Local variables */ + integer j; + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int _starpu_dtbsv_(char *, char *, char *, integer *, + integer *, doublereal *, integer *, doublereal *, integer *); + logical upper; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DPBTRS solves a system of linear equations A*X = B with a symmetric */ +/* positive definite band matrix A using the Cholesky factorization */ +/* A = U**T*U or A = L*L**T computed by DPBTRF. */ + +/* Arguments */ +/* ========= */ + +/* UPLO (input) CHARACTER*1 */ +/* = 'U': Upper triangular factor stored in AB; */ +/* = 'L': Lower triangular factor stored in AB. */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* KD (input) INTEGER */ +/* The number of superdiagonals of the matrix A if UPLO = 'U', */ +/* or the number of subdiagonals if UPLO = 'L'. KD >= 0. */ + +/* NRHS (input) INTEGER */ +/* The number of right hand sides, i.e., the number of columns */ +/* of the matrix B. NRHS >= 0. */ + +/* AB (input) DOUBLE PRECISION array, dimension (LDAB,N) */ +/* The triangular factor U or L from the Cholesky factorization */ +/* A = U**T*U or A = L*L**T of the band matrix A, stored in the */ +/* first KD+1 rows of the array. The j-th column of U or L is */ +/* stored in the j-th column of the array AB as follows: */ +/* if UPLO ='U', AB(kd+1+i-j,j) = U(i,j) for max(1,j-kd)<=i<=j; */ +/* if UPLO ='L', AB(1+i-j,j) = L(i,j) for j<=i<=min(n,j+kd). */ + +/* LDAB (input) INTEGER */ +/* The leading dimension of the array AB. LDAB >= KD+1. */ + +/* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* On entry, the right hand side matrix B. */ +/* On exit, the solution matrix X. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the array B. LDB >= max(1,N). */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + ab_dim1 = *ldab; + ab_offset = 1 + ab_dim1; + ab -= ab_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + *info = 0; + upper = _starpu_lsame_(uplo, "U"); + if (! upper && ! _starpu_lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*kd < 0) { + *info = -3; + } else if (*nrhs < 0) { + *info = -4; + } else if (*ldab < *kd + 1) { + *info = -6; + } else if (*ldb < max(1,*n)) { + *info = -8; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DPBTRS", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0 || *nrhs == 0) { + return 0; + } + + if (upper) { + +/* Solve A*X = B where A = U'*U. */ + + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + +/* Solve U'*X = B, overwriting B with X. */ + + _starpu_dtbsv_("Upper", "Transpose", "Non-unit", n, kd, &ab[ab_offset], + ldab, &b[j * b_dim1 + 1], &c__1); + +/* Solve U*X = B, overwriting B with X. */ + + _starpu_dtbsv_("Upper", "No transpose", "Non-unit", n, kd, &ab[ab_offset], + ldab, &b[j * b_dim1 + 1], &c__1); +/* L10: */ + } + } else { + +/* Solve A*X = B where A = L*L'. */ + + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + +/* Solve L*X = B, overwriting B with X. */ + + _starpu_dtbsv_("Lower", "No transpose", "Non-unit", n, kd, &ab[ab_offset], + ldab, &b[j * b_dim1 + 1], &c__1); + +/* Solve L'*X = B, overwriting B with X. */ + + _starpu_dtbsv_("Lower", "Transpose", "Non-unit", n, kd, &ab[ab_offset], + ldab, &b[j * b_dim1 + 1], &c__1); +/* L20: */ + } + } + + return 0; + +/* End of DPBTRS */ + +} /* _starpu_dpbtrs_ */ diff --git a/min-dgels/base/SRC/dpftrf.c b/min-dgels/base/SRC/dpftrf.c new file mode 100644 index 0000000..84dddda --- /dev/null +++ b/min-dgels/base/SRC/dpftrf.c @@ -0,0 +1,452 @@ +/* dpftrf.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static doublereal c_b12 = 1.; +static doublereal c_b15 = -1.; + +/* Subroutine */ int _starpu_dpftrf_(char *transr, char *uplo, integer *n, doublereal + *a, integer *info) +{ + /* System generated locals */ + integer i__1, i__2; + + /* Local variables */ + integer k, n1, n2; + logical normaltransr; + extern logical _starpu_lsame_(char *, char *); + logical lower; + extern /* Subroutine */ int _starpu_dtrsm_(char *, char *, char *, char *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *), _starpu_dsyrk_( + char *, char *, integer *, integer *, doublereal *, doublereal *, + integer *, doublereal *, doublereal *, integer *), + _starpu_xerbla_(char *, integer *); + logical nisodd; + extern /* Subroutine */ int _starpu_dpotrf_(char *, integer *, doublereal *, + integer *, integer *); + + +/* -- LAPACK routine (version 3.2) -- */ + +/* -- Contributed by Fred Gustavson of the IBM Watson Research Center -- */ +/* -- November 2008 -- */ + +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. */ +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ + +/* Purpose */ +/* ======= */ + +/* DPFTRF computes the Cholesky factorization of a real symmetric */ +/* positive definite matrix A. */ + +/* The factorization has the form */ +/* A = U**T * U, if UPLO = 'U', or */ +/* A = L * L**T, if UPLO = 'L', */ +/* where U is an upper triangular matrix and L is lower triangular. */ + +/* This is the block version of the algorithm, calling Level 3 BLAS. */ + +/* Arguments */ +/* ========= */ + +/* TRANSR (input) CHARACTER */ +/* = 'N': The Normal TRANSR of RFP A is stored; */ +/* = 'T': The Transpose TRANSR of RFP A is stored. */ + +/* UPLO (input) CHARACTER */ +/* = 'U': Upper triangle of RFP A is stored; */ +/* = 'L': Lower triangle of RFP A is stored. */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension ( N*(N+1)/2 ); */ +/* On entry, the symmetric matrix A in RFP format. RFP format is */ +/* described by TRANSR, UPLO, and N as follows: If TRANSR = 'N' */ +/* then RFP A is (0:N,0:k-1) when N is even; k=N/2. RFP A is */ +/* (0:N-1,0:k) when N is odd; k=N/2. IF TRANSR = 'T' then RFP is */ +/* the transpose of RFP A as defined when */ +/* TRANSR = 'N'. The contents of RFP A are defined by UPLO as */ +/* follows: If UPLO = 'U' the RFP A contains the NT elements of */ +/* upper packed A. If UPLO = 'L' the RFP A contains the elements */ +/* of lower packed A. The LDA of RFP A is (N+1)/2 when TRANSR = */ +/* 'T'. When TRANSR is 'N' the LDA is N+1 when N is even and N */ +/* is odd. See the Note below for more details. */ + +/* On exit, if INFO = 0, the factor U or L from the Cholesky */ +/* factorization RFP A = U**T*U or RFP A = L*L**T. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > 0: if INFO = i, the leading minor of order i is not */ +/* positive definite, and the factorization could not be */ +/* completed. */ + +/* Notes */ +/* ===== */ + +/* We first consider Rectangular Full Packed (RFP) Format when N is */ +/* even. We give an example where N = 6. */ + +/* AP is Upper AP is Lower */ + +/* 00 01 02 03 04 05 00 */ +/* 11 12 13 14 15 10 11 */ +/* 22 23 24 25 20 21 22 */ +/* 33 34 35 30 31 32 33 */ +/* 44 45 40 41 42 43 44 */ +/* 55 50 51 52 53 54 55 */ + + +/* Let TRANSR = 'N'. RFP holds AP as follows: */ +/* For UPLO = 'U' the upper trapezoid A(0:5,0:2) consists of the last */ +/* three columns of AP upper. The lower triangle A(4:6,0:2) consists of */ +/* the transpose of the first three columns of AP upper. */ +/* For UPLO = 'L' the lower trapezoid A(1:6,0:2) consists of the first */ +/* three columns of AP lower. The upper triangle A(0:2,0:2) consists of */ +/* the transpose of the last three columns of AP lower. */ +/* This covers the case N even and TRANSR = 'N'. */ + +/* RFP A RFP A */ + +/* 03 04 05 33 43 53 */ +/* 13 14 15 00 44 54 */ +/* 23 24 25 10 11 55 */ +/* 33 34 35 20 21 22 */ +/* 00 44 45 30 31 32 */ +/* 01 11 55 40 41 42 */ +/* 02 12 22 50 51 52 */ + +/* Now let TRANSR = 'T'. RFP A in both UPLO cases is just the */ +/* transpose of RFP A above. One therefore gets: */ + + +/* RFP A RFP A */ + +/* 03 13 23 33 00 01 02 33 00 10 20 30 40 50 */ +/* 04 14 24 34 44 11 12 43 44 11 21 31 41 51 */ +/* 05 15 25 35 45 55 22 53 54 55 22 32 42 52 */ + + +/* We first consider Rectangular Full Packed (RFP) Format when N is */ +/* odd. We give an example where N = 5. */ + +/* AP is Upper AP is Lower */ + +/* 00 01 02 03 04 00 */ +/* 11 12 13 14 10 11 */ +/* 22 23 24 20 21 22 */ +/* 33 34 30 31 32 33 */ +/* 44 40 41 42 43 44 */ + + +/* Let TRANSR = 'N'. RFP holds AP as follows: */ +/* For UPLO = 'U' the upper trapezoid A(0:4,0:2) consists of the last */ +/* three columns of AP upper. The lower triangle A(3:4,0:1) consists of */ +/* the transpose of the first two columns of AP upper. */ +/* For UPLO = 'L' the lower trapezoid A(0:4,0:2) consists of the first */ +/* three columns of AP lower. The upper triangle A(0:1,1:2) consists of */ +/* the transpose of the last two columns of AP lower. */ +/* This covers the case N odd and TRANSR = 'N'. */ + +/* RFP A RFP A */ + +/* 02 03 04 00 33 43 */ +/* 12 13 14 10 11 44 */ +/* 22 23 24 20 21 22 */ +/* 00 33 34 30 31 32 */ +/* 01 11 44 40 41 42 */ + +/* Now let TRANSR = 'T'. RFP A in both UPLO cases is just the */ +/* transpose of RFP A above. One therefore gets: */ + +/* RFP A RFP A */ + +/* 02 12 22 00 01 00 10 20 30 40 50 */ +/* 03 13 23 33 11 33 11 21 31 41 51 */ +/* 04 14 24 34 44 43 44 22 32 42 52 */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + *info = 0; + normaltransr = _starpu_lsame_(transr, "N"); + lower = _starpu_lsame_(uplo, "L"); + if (! normaltransr && ! _starpu_lsame_(transr, "T")) { + *info = -1; + } else if (! lower && ! _starpu_lsame_(uplo, "U")) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DPFTRF", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* If N is odd, set NISODD = .TRUE. */ +/* If N is even, set K = N/2 and NISODD = .FALSE. */ + + if (*n % 2 == 0) { + k = *n / 2; + nisodd = FALSE_; + } else { + nisodd = TRUE_; + } + +/* Set N1 and N2 depending on LOWER */ + + if (lower) { + n2 = *n / 2; + n1 = *n - n2; + } else { + n1 = *n / 2; + n2 = *n - n1; + } + +/* start execution: there are eight cases */ + + if (nisodd) { + +/* N is odd */ + + if (normaltransr) { + +/* N is odd and TRANSR = 'N' */ + + if (lower) { + +/* SRPA for LOWER, NORMAL and N is odd ( a(0:n-1,0:n1-1) ) */ +/* T1 -> a(0,0), T2 -> a(0,1), S -> a(n1,0) */ +/* T1 -> a(0), T2 -> a(n), S -> a(n1) */ + + _starpu_dpotrf_("L", &n1, a, n, info); + if (*info > 0) { + return 0; + } + _starpu_dtrsm_("R", "L", "T", "N", &n2, &n1, &c_b12, a, n, &a[n1], n); + _starpu_dsyrk_("U", "N", &n2, &n1, &c_b15, &a[n1], n, &c_b12, &a[*n], + n); + _starpu_dpotrf_("U", &n2, &a[*n], n, info); + if (*info > 0) { + *info += n1; + } + + } else { + +/* SRPA for UPPER, NORMAL and N is odd ( a(0:n-1,0:n2-1) */ +/* T1 -> a(n1+1,0), T2 -> a(n1,0), S -> a(0,0) */ +/* T1 -> a(n2), T2 -> a(n1), S -> a(0) */ + + _starpu_dpotrf_("L", &n1, &a[n2], n, info); + if (*info > 0) { + return 0; + } + _starpu_dtrsm_("L", "L", "N", "N", &n1, &n2, &c_b12, &a[n2], n, a, n); + _starpu_dsyrk_("U", "T", &n2, &n1, &c_b15, a, n, &c_b12, &a[n1], n); + _starpu_dpotrf_("U", &n2, &a[n1], n, info); + if (*info > 0) { + *info += n1; + } + + } + + } else { + +/* N is odd and TRANSR = 'T' */ + + if (lower) { + +/* SRPA for LOWER, TRANSPOSE and N is odd */ +/* T1 -> A(0,0) , T2 -> A(1,0) , S -> A(0,n1) */ +/* T1 -> a(0+0) , T2 -> a(1+0) , S -> a(0+n1*n1); lda=n1 */ + + _starpu_dpotrf_("U", &n1, a, &n1, info); + if (*info > 0) { + return 0; + } + _starpu_dtrsm_("L", "U", "T", "N", &n1, &n2, &c_b12, a, &n1, &a[n1 * + n1], &n1); + _starpu_dsyrk_("L", "T", &n2, &n1, &c_b15, &a[n1 * n1], &n1, &c_b12, & + a[1], &n1); + _starpu_dpotrf_("L", &n2, &a[1], &n1, info); + if (*info > 0) { + *info += n1; + } + + } else { + +/* SRPA for UPPER, TRANSPOSE and N is odd */ +/* T1 -> A(0,n1+1), T2 -> A(0,n1), S -> A(0,0) */ +/* T1 -> a(n2*n2), T2 -> a(n1*n2), S -> a(0); lda = n2 */ + + _starpu_dpotrf_("U", &n1, &a[n2 * n2], &n2, info); + if (*info > 0) { + return 0; + } + _starpu_dtrsm_("R", "U", "N", "N", &n2, &n1, &c_b12, &a[n2 * n2], &n2, + a, &n2); + _starpu_dsyrk_("L", "N", &n2, &n1, &c_b15, a, &n2, &c_b12, &a[n1 * n2] +, &n2); + _starpu_dpotrf_("L", &n2, &a[n1 * n2], &n2, info); + if (*info > 0) { + *info += n1; + } + + } + + } + + } else { + +/* N is even */ + + if (normaltransr) { + +/* N is even and TRANSR = 'N' */ + + if (lower) { + +/* SRPA for LOWER, NORMAL, and N is even ( a(0:n,0:k-1) ) */ +/* T1 -> a(1,0), T2 -> a(0,0), S -> a(k+1,0) */ +/* T1 -> a(1), T2 -> a(0), S -> a(k+1) */ + + i__1 = *n + 1; + _starpu_dpotrf_("L", &k, &a[1], &i__1, info); + if (*info > 0) { + return 0; + } + i__1 = *n + 1; + i__2 = *n + 1; + _starpu_dtrsm_("R", "L", "T", "N", &k, &k, &c_b12, &a[1], &i__1, &a[k + + 1], &i__2); + i__1 = *n + 1; + i__2 = *n + 1; + _starpu_dsyrk_("U", "N", &k, &k, &c_b15, &a[k + 1], &i__1, &c_b12, a, + &i__2); + i__1 = *n + 1; + _starpu_dpotrf_("U", &k, a, &i__1, info); + if (*info > 0) { + *info += k; + } + + } else { + +/* SRPA for UPPER, NORMAL, and N is even ( a(0:n,0:k-1) ) */ +/* T1 -> a(k+1,0) , T2 -> a(k,0), S -> a(0,0) */ +/* T1 -> a(k+1), T2 -> a(k), S -> a(0) */ + + i__1 = *n + 1; + _starpu_dpotrf_("L", &k, &a[k + 1], &i__1, info); + if (*info > 0) { + return 0; + } + i__1 = *n + 1; + i__2 = *n + 1; + _starpu_dtrsm_("L", "L", "N", "N", &k, &k, &c_b12, &a[k + 1], &i__1, + a, &i__2); + i__1 = *n + 1; + i__2 = *n + 1; + _starpu_dsyrk_("U", "T", &k, &k, &c_b15, a, &i__1, &c_b12, &a[k], & + i__2); + i__1 = *n + 1; + _starpu_dpotrf_("U", &k, &a[k], &i__1, info); + if (*info > 0) { + *info += k; + } + + } + + } else { + +/* N is even and TRANSR = 'T' */ + + if (lower) { + +/* SRPA for LOWER, TRANSPOSE and N is even (see paper) */ +/* T1 -> B(0,1), T2 -> B(0,0), S -> B(0,k+1) */ +/* T1 -> a(0+k), T2 -> a(0+0), S -> a(0+k*(k+1)); lda=k */ + + _starpu_dpotrf_("U", &k, &a[k], &k, info); + if (*info > 0) { + return 0; + } + _starpu_dtrsm_("L", "U", "T", "N", &k, &k, &c_b12, &a[k], &n1, &a[k * + (k + 1)], &k); + _starpu_dsyrk_("L", "T", &k, &k, &c_b15, &a[k * (k + 1)], &k, &c_b12, + a, &k); + _starpu_dpotrf_("L", &k, a, &k, info); + if (*info > 0) { + *info += k; + } + + } else { + +/* SRPA for UPPER, TRANSPOSE and N is even (see paper) */ +/* T1 -> B(0,k+1), T2 -> B(0,k), S -> B(0,0) */ +/* T1 -> a(0+k*(k+1)), T2 -> a(0+k*k), S -> a(0+0)); lda=k */ + + _starpu_dpotrf_("U", &k, &a[k * (k + 1)], &k, info); + if (*info > 0) { + return 0; + } + _starpu_dtrsm_("R", "U", "N", "N", &k, &k, &c_b12, &a[k * (k + 1)], & + k, a, &k); + _starpu_dsyrk_("L", "N", &k, &k, &c_b15, a, &k, &c_b12, &a[k * k], &k); + _starpu_dpotrf_("L", &k, &a[k * k], &k, info); + if (*info > 0) { + *info += k; + } + + } + + } + + } + + return 0; + +/* End of DPFTRF */ + +} /* _starpu_dpftrf_ */ diff --git a/min-dgels/base/SRC/dpftri.c b/min-dgels/base/SRC/dpftri.c new file mode 100644 index 0000000..beecf53 --- /dev/null +++ b/min-dgels/base/SRC/dpftri.c @@ -0,0 +1,403 @@ +/* dpftri.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static doublereal c_b11 = 1.; + +/* Subroutine */ int _starpu_dpftri_(char *transr, char *uplo, integer *n, doublereal + *a, integer *info) +{ + /* System generated locals */ + integer i__1, i__2; + + /* Local variables */ + integer k, n1, n2; + logical normaltransr; + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int _starpu_dtrmm_(char *, char *, char *, char *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *); + logical lower; + extern /* Subroutine */ int _starpu_dsyrk_(char *, char *, integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, doublereal *, + integer *), _starpu_xerbla_(char *, integer *); + logical nisodd; + extern /* Subroutine */ int _starpu_dlauum_(char *, integer *, doublereal *, + integer *, integer *), _starpu_dtftri_(char *, char *, char *, + integer *, doublereal *, integer *); + + +/* -- LAPACK routine (version 3.2) -- */ + +/* -- Contributed by Fred Gustavson of the IBM Watson Research Center -- */ +/* -- November 2008 -- */ + +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DPFTRI computes the inverse of a (real) symmetric positive definite */ +/* matrix A using the Cholesky factorization A = U**T*U or A = L*L**T */ +/* computed by DPFTRF. */ + +/* Arguments */ +/* ========= */ + +/* TRANSR (input) CHARACTER */ +/* = 'N': The Normal TRANSR of RFP A is stored; */ +/* = 'T': The Transpose TRANSR of RFP A is stored. */ + +/* UPLO (input) CHARACTER */ +/* = 'U': Upper triangle of A is stored; */ +/* = 'L': Lower triangle of A is stored. */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension ( N*(N+1)/2 ) */ +/* On entry, the symmetric matrix A in RFP format. RFP format is */ +/* described by TRANSR, UPLO, and N as follows: If TRANSR = 'N' */ +/* then RFP A is (0:N,0:k-1) when N is even; k=N/2. RFP A is */ +/* (0:N-1,0:k) when N is odd; k=N/2. IF TRANSR = 'T' then RFP is */ +/* the transpose of RFP A as defined when */ +/* TRANSR = 'N'. The contents of RFP A are defined by UPLO as */ +/* follows: If UPLO = 'U' the RFP A contains the nt elements of */ +/* upper packed A. If UPLO = 'L' the RFP A contains the elements */ +/* of lower packed A. The LDA of RFP A is (N+1)/2 when TRANSR = */ +/* 'T'. When TRANSR is 'N' the LDA is N+1 when N is even and N */ +/* is odd. See the Note below for more details. */ + +/* On exit, the symmetric inverse of the original matrix, in the */ +/* same storage format. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > 0: if INFO = i, the (i,i) element of the factor U or L is */ +/* zero, and the inverse could not be computed. */ + +/* Notes */ +/* ===== */ + +/* We first consider Rectangular Full Packed (RFP) Format when N is */ +/* even. We give an example where N = 6. */ + +/* AP is Upper AP is Lower */ + +/* 00 01 02 03 04 05 00 */ +/* 11 12 13 14 15 10 11 */ +/* 22 23 24 25 20 21 22 */ +/* 33 34 35 30 31 32 33 */ +/* 44 45 40 41 42 43 44 */ +/* 55 50 51 52 53 54 55 */ + + +/* Let TRANSR = 'N'. RFP holds AP as follows: */ +/* For UPLO = 'U' the upper trapezoid A(0:5,0:2) consists of the last */ +/* three columns of AP upper. The lower triangle A(4:6,0:2) consists of */ +/* the transpose of the first three columns of AP upper. */ +/* For UPLO = 'L' the lower trapezoid A(1:6,0:2) consists of the first */ +/* three columns of AP lower. The upper triangle A(0:2,0:2) consists of */ +/* the transpose of the last three columns of AP lower. */ +/* This covers the case N even and TRANSR = 'N'. */ + +/* RFP A RFP A */ + +/* 03 04 05 33 43 53 */ +/* 13 14 15 00 44 54 */ +/* 23 24 25 10 11 55 */ +/* 33 34 35 20 21 22 */ +/* 00 44 45 30 31 32 */ +/* 01 11 55 40 41 42 */ +/* 02 12 22 50 51 52 */ + +/* Now let TRANSR = 'T'. RFP A in both UPLO cases is just the */ +/* transpose of RFP A above. One therefore gets: */ + + +/* RFP A RFP A */ + +/* 03 13 23 33 00 01 02 33 00 10 20 30 40 50 */ +/* 04 14 24 34 44 11 12 43 44 11 21 31 41 51 */ +/* 05 15 25 35 45 55 22 53 54 55 22 32 42 52 */ + + +/* We first consider Rectangular Full Packed (RFP) Format when N is */ +/* odd. We give an example where N = 5. */ + +/* AP is Upper AP is Lower */ + +/* 00 01 02 03 04 00 */ +/* 11 12 13 14 10 11 */ +/* 22 23 24 20 21 22 */ +/* 33 34 30 31 32 33 */ +/* 44 40 41 42 43 44 */ + + +/* Let TRANSR = 'N'. RFP holds AP as follows: */ +/* For UPLO = 'U' the upper trapezoid A(0:4,0:2) consists of the last */ +/* three columns of AP upper. The lower triangle A(3:4,0:1) consists of */ +/* the transpose of the first two columns of AP upper. */ +/* For UPLO = 'L' the lower trapezoid A(0:4,0:2) consists of the first */ +/* three columns of AP lower. The upper triangle A(0:1,1:2) consists of */ +/* the transpose of the last two columns of AP lower. */ +/* This covers the case N odd and TRANSR = 'N'. */ + +/* RFP A RFP A */ + +/* 02 03 04 00 33 43 */ +/* 12 13 14 10 11 44 */ +/* 22 23 24 20 21 22 */ +/* 00 33 34 30 31 32 */ +/* 01 11 44 40 41 42 */ + +/* Now let TRANSR = 'T'. RFP A in both UPLO cases is just the */ +/* transpose of RFP A above. One therefore gets: */ + +/* RFP A RFP A */ + +/* 02 12 22 00 01 00 10 20 30 40 50 */ +/* 03 13 23 33 11 33 11 21 31 41 51 */ +/* 04 14 24 34 44 43 44 22 32 42 52 */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + *info = 0; + normaltransr = _starpu_lsame_(transr, "N"); + lower = _starpu_lsame_(uplo, "L"); + if (! normaltransr && ! _starpu_lsame_(transr, "T")) { + *info = -1; + } else if (! lower && ! _starpu_lsame_(uplo, "U")) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DPFTRI", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Invert the triangular Cholesky factor U or L. */ + + _starpu_dtftri_(transr, uplo, "N", n, a, info); + if (*info > 0) { + return 0; + } + +/* If N is odd, set NISODD = .TRUE. */ +/* If N is even, set K = N/2 and NISODD = .FALSE. */ + + if (*n % 2 == 0) { + k = *n / 2; + nisodd = FALSE_; + } else { + nisodd = TRUE_; + } + +/* Set N1 and N2 depending on LOWER */ + + if (lower) { + n2 = *n / 2; + n1 = *n - n2; + } else { + n1 = *n / 2; + n2 = *n - n1; + } + +/* Start execution of triangular matrix multiply: inv(U)*inv(U)^C or */ +/* inv(L)^C*inv(L). There are eight cases. */ + + if (nisodd) { + +/* N is odd */ + + if (normaltransr) { + +/* N is odd and TRANSR = 'N' */ + + if (lower) { + +/* SRPA for LOWER, NORMAL and N is odd ( a(0:n-1,0:N1-1) ) */ +/* T1 -> a(0,0), T2 -> a(0,1), S -> a(N1,0) */ +/* T1 -> a(0), T2 -> a(n), S -> a(N1) */ + + _starpu_dlauum_("L", &n1, a, n, info); + _starpu_dsyrk_("L", "T", &n1, &n2, &c_b11, &a[n1], n, &c_b11, a, n); + _starpu_dtrmm_("L", "U", "N", "N", &n2, &n1, &c_b11, &a[*n], n, &a[n1] +, n); + _starpu_dlauum_("U", &n2, &a[*n], n, info); + + } else { + +/* SRPA for UPPER, NORMAL and N is odd ( a(0:n-1,0:N2-1) */ +/* T1 -> a(N1+1,0), T2 -> a(N1,0), S -> a(0,0) */ +/* T1 -> a(N2), T2 -> a(N1), S -> a(0) */ + + _starpu_dlauum_("L", &n1, &a[n2], n, info); + _starpu_dsyrk_("L", "N", &n1, &n2, &c_b11, a, n, &c_b11, &a[n2], n); + _starpu_dtrmm_("R", "U", "T", "N", &n1, &n2, &c_b11, &a[n1], n, a, n); + _starpu_dlauum_("U", &n2, &a[n1], n, info); + + } + + } else { + +/* N is odd and TRANSR = 'T' */ + + if (lower) { + +/* SRPA for LOWER, TRANSPOSE, and N is odd */ +/* T1 -> a(0), T2 -> a(1), S -> a(0+N1*N1) */ + + _starpu_dlauum_("U", &n1, a, &n1, info); + _starpu_dsyrk_("U", "N", &n1, &n2, &c_b11, &a[n1 * n1], &n1, &c_b11, + a, &n1); + _starpu_dtrmm_("R", "L", "N", "N", &n1, &n2, &c_b11, &a[1], &n1, &a[ + n1 * n1], &n1); + _starpu_dlauum_("L", &n2, &a[1], &n1, info); + + } else { + +/* SRPA for UPPER, TRANSPOSE, and N is odd */ +/* T1 -> a(0+N2*N2), T2 -> a(0+N1*N2), S -> a(0) */ + + _starpu_dlauum_("U", &n1, &a[n2 * n2], &n2, info); + _starpu_dsyrk_("U", "T", &n1, &n2, &c_b11, a, &n2, &c_b11, &a[n2 * n2] +, &n2); + _starpu_dtrmm_("L", "L", "T", "N", &n2, &n1, &c_b11, &a[n1 * n2], &n2, + a, &n2); + _starpu_dlauum_("L", &n2, &a[n1 * n2], &n2, info); + + } + + } + + } else { + +/* N is even */ + + if (normaltransr) { + +/* N is even and TRANSR = 'N' */ + + if (lower) { + +/* SRPA for LOWER, NORMAL, and N is even ( a(0:n,0:k-1) ) */ +/* T1 -> a(1,0), T2 -> a(0,0), S -> a(k+1,0) */ +/* T1 -> a(1), T2 -> a(0), S -> a(k+1) */ + + i__1 = *n + 1; + _starpu_dlauum_("L", &k, &a[1], &i__1, info); + i__1 = *n + 1; + i__2 = *n + 1; + _starpu_dsyrk_("L", "T", &k, &k, &c_b11, &a[k + 1], &i__1, &c_b11, &a[ + 1], &i__2); + i__1 = *n + 1; + i__2 = *n + 1; + _starpu_dtrmm_("L", "U", "N", "N", &k, &k, &c_b11, a, &i__1, &a[k + 1] +, &i__2); + i__1 = *n + 1; + _starpu_dlauum_("U", &k, a, &i__1, info); + + } else { + +/* SRPA for UPPER, NORMAL, and N is even ( a(0:n,0:k-1) ) */ +/* T1 -> a(k+1,0) , T2 -> a(k,0), S -> a(0,0) */ +/* T1 -> a(k+1), T2 -> a(k), S -> a(0) */ + + i__1 = *n + 1; + _starpu_dlauum_("L", &k, &a[k + 1], &i__1, info); + i__1 = *n + 1; + i__2 = *n + 1; + _starpu_dsyrk_("L", "N", &k, &k, &c_b11, a, &i__1, &c_b11, &a[k + 1], + &i__2); + i__1 = *n + 1; + i__2 = *n + 1; + _starpu_dtrmm_("R", "U", "T", "N", &k, &k, &c_b11, &a[k], &i__1, a, & + i__2); + i__1 = *n + 1; + _starpu_dlauum_("U", &k, &a[k], &i__1, info); + + } + + } else { + +/* N is even and TRANSR = 'T' */ + + if (lower) { + +/* SRPA for LOWER, TRANSPOSE, and N is even (see paper) */ +/* T1 -> B(0,1), T2 -> B(0,0), S -> B(0,k+1), */ +/* T1 -> a(0+k), T2 -> a(0+0), S -> a(0+k*(k+1)); lda=k */ + + _starpu_dlauum_("U", &k, &a[k], &k, info); + _starpu_dsyrk_("U", "N", &k, &k, &c_b11, &a[k * (k + 1)], &k, &c_b11, + &a[k], &k); + _starpu_dtrmm_("R", "L", "N", "N", &k, &k, &c_b11, a, &k, &a[k * (k + + 1)], &k); + _starpu_dlauum_("L", &k, a, &k, info); + + } else { + +/* SRPA for UPPER, TRANSPOSE, and N is even (see paper) */ +/* T1 -> B(0,k+1), T2 -> B(0,k), S -> B(0,0), */ +/* T1 -> a(0+k*(k+1)), T2 -> a(0+k*k), S -> a(0+0)); lda=k */ + + _starpu_dlauum_("U", &k, &a[k * (k + 1)], &k, info); + _starpu_dsyrk_("U", "T", &k, &k, &c_b11, a, &k, &c_b11, &a[k * (k + 1) + ], &k); + _starpu_dtrmm_("L", "L", "T", "N", &k, &k, &c_b11, &a[k * k], &k, a, & + k); + _starpu_dlauum_("L", &k, &a[k * k], &k, info); + + } + + } + + } + + return 0; + +/* End of DPFTRI */ + +} /* _starpu_dpftri_ */ diff --git a/min-dgels/base/SRC/dpftrs.c b/min-dgels/base/SRC/dpftrs.c new file mode 100644 index 0000000..5ec1aac --- /dev/null +++ b/min-dgels/base/SRC/dpftrs.c @@ -0,0 +1,240 @@ +/* dpftrs.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static doublereal c_b10 = 1.; + +/* Subroutine */ int _starpu_dpftrs_(char *transr, char *uplo, integer *n, integer * + nrhs, doublereal *a, doublereal *b, integer *ldb, integer *info) +{ + /* System generated locals */ + integer b_dim1, b_offset, i__1; + + /* Local variables */ + logical normaltransr; + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int _starpu_dtfsm_(char *, char *, char *, char *, char *, + integer *, integer *, doublereal *, doublereal *, doublereal *, + integer *); + logical lower; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + + +/* -- LAPACK routine (version 3.2) -- */ + +/* -- Contributed by Fred Gustavson of the IBM Watson Research Center -- */ +/* -- November 2008 -- */ + +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DPFTRS solves a system of linear equations A*X = B with a symmetric */ +/* positive definite matrix A using the Cholesky factorization */ +/* A = U**T*U or A = L*L**T computed by DPFTRF. */ + +/* Arguments */ +/* ========= */ + +/* TRANSR (input) CHARACTER */ +/* = 'N': The Normal TRANSR of RFP A is stored; */ +/* = 'T': The Transpose TRANSR of RFP A is stored. */ + +/* UPLO (input) CHARACTER */ +/* = 'U': Upper triangle of RFP A is stored; */ +/* = 'L': Lower triangle of RFP A is stored. */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* NRHS (input) INTEGER */ +/* The number of right hand sides, i.e., the number of columns */ +/* of the matrix B. NRHS >= 0. */ + +/* A (input) DOUBLE PRECISION array, dimension ( N*(N+1)/2 ). */ +/* The triangular factor U or L from the Cholesky factorization */ +/* of RFP A = U**T*U or RFP A = L*L**T, as computed by DPFTRF. */ +/* See note below for more details about RFP A. */ + +/* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* On entry, the right hand side matrix B. */ +/* On exit, the solution matrix X. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the array B. LDB >= max(1,N). */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ + +/* Notes */ +/* ===== */ + +/* We first consider Rectangular Full Packed (RFP) Format when N is */ +/* even. We give an example where N = 6. */ + +/* AP is Upper AP is Lower */ + +/* 00 01 02 03 04 05 00 */ +/* 11 12 13 14 15 10 11 */ +/* 22 23 24 25 20 21 22 */ +/* 33 34 35 30 31 32 33 */ +/* 44 45 40 41 42 43 44 */ +/* 55 50 51 52 53 54 55 */ + + +/* Let TRANSR = 'N'. RFP holds AP as follows: */ +/* For UPLO = 'U' the upper trapezoid A(0:5,0:2) consists of the last */ +/* three columns of AP upper. The lower triangle A(4:6,0:2) consists of */ +/* the transpose of the first three columns of AP upper. */ +/* For UPLO = 'L' the lower trapezoid A(1:6,0:2) consists of the first */ +/* three columns of AP lower. The upper triangle A(0:2,0:2) consists of */ +/* the transpose of the last three columns of AP lower. */ +/* This covers the case N even and TRANSR = 'N'. */ + +/* RFP A RFP A */ + +/* 03 04 05 33 43 53 */ +/* 13 14 15 00 44 54 */ +/* 23 24 25 10 11 55 */ +/* 33 34 35 20 21 22 */ +/* 00 44 45 30 31 32 */ +/* 01 11 55 40 41 42 */ +/* 02 12 22 50 51 52 */ + +/* Now let TRANSR = 'T'. RFP A in both UPLO cases is just the */ +/* transpose of RFP A above. One therefore gets: */ + + +/* RFP A RFP A */ + +/* 03 13 23 33 00 01 02 33 00 10 20 30 40 50 */ +/* 04 14 24 34 44 11 12 43 44 11 21 31 41 51 */ +/* 05 15 25 35 45 55 22 53 54 55 22 32 42 52 */ + + +/* We first consider Rectangular Full Packed (RFP) Format when N is */ +/* odd. We give an example where N = 5. */ + +/* AP is Upper AP is Lower */ + +/* 00 01 02 03 04 00 */ +/* 11 12 13 14 10 11 */ +/* 22 23 24 20 21 22 */ +/* 33 34 30 31 32 33 */ +/* 44 40 41 42 43 44 */ + + +/* Let TRANSR = 'N'. RFP holds AP as follows: */ +/* For UPLO = 'U' the upper trapezoid A(0:4,0:2) consists of the last */ +/* three columns of AP upper. The lower triangle A(3:4,0:1) consists of */ +/* the transpose of the first two columns of AP upper. */ +/* For UPLO = 'L' the lower trapezoid A(0:4,0:2) consists of the first */ +/* three columns of AP lower. The upper triangle A(0:1,1:2) consists of */ +/* the transpose of the last two columns of AP lower. */ +/* This covers the case N odd and TRANSR = 'N'. */ + +/* RFP A RFP A */ + +/* 02 03 04 00 33 43 */ +/* 12 13 14 10 11 44 */ +/* 22 23 24 20 21 22 */ +/* 00 33 34 30 31 32 */ +/* 01 11 44 40 41 42 */ + +/* Now let TRANSR = 'T'. RFP A in both UPLO cases is just the */ +/* transpose of RFP A above. One therefore gets: */ + +/* RFP A RFP A */ + +/* 02 12 22 00 01 00 10 20 30 40 50 */ +/* 03 13 23 33 11 33 11 21 31 41 51 */ +/* 04 14 24 34 44 43 44 22 32 42 52 */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + *info = 0; + normaltransr = _starpu_lsame_(transr, "N"); + lower = _starpu_lsame_(uplo, "L"); + if (! normaltransr && ! _starpu_lsame_(transr, "T")) { + *info = -1; + } else if (! lower && ! _starpu_lsame_(uplo, "U")) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*nrhs < 0) { + *info = -4; + } else if (*ldb < max(1,*n)) { + *info = -7; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DPFTRS", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0 || *nrhs == 0) { + return 0; + } + +/* start execution: there are two triangular solves */ + + if (lower) { + _starpu_dtfsm_(transr, "L", uplo, "N", "N", n, nrhs, &c_b10, a, &b[b_offset], + ldb); + _starpu_dtfsm_(transr, "L", uplo, "T", "N", n, nrhs, &c_b10, a, &b[b_offset], + ldb); + } else { + _starpu_dtfsm_(transr, "L", uplo, "T", "N", n, nrhs, &c_b10, a, &b[b_offset], + ldb); + _starpu_dtfsm_(transr, "L", uplo, "N", "N", n, nrhs, &c_b10, a, &b[b_offset], + ldb); + } + + return 0; + +/* End of DPFTRS */ + +} /* _starpu_dpftrs_ */ diff --git a/min-dgels/base/SRC/dpocon.c b/min-dgels/base/SRC/dpocon.c new file mode 100644 index 0000000..8a311cf --- /dev/null +++ b/min-dgels/base/SRC/dpocon.c @@ -0,0 +1,220 @@ +/* dpocon.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +/* Subroutine */ int _starpu_dpocon_(char *uplo, integer *n, doublereal *a, integer * + lda, doublereal *anorm, doublereal *rcond, doublereal *work, integer * + iwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1; + doublereal d__1; + + /* Local variables */ + integer ix, kase; + doublereal scale; + extern logical _starpu_lsame_(char *, char *); + integer isave[3]; + extern /* Subroutine */ int _starpu_drscl_(integer *, doublereal *, doublereal *, + integer *); + logical upper; + extern /* Subroutine */ int _starpu_dlacn2_(integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *); + extern doublereal _starpu_dlamch_(char *); + doublereal scalel; + extern integer _starpu_idamax_(integer *, doublereal *, integer *); + doublereal scaleu; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + doublereal ainvnm; + extern /* Subroutine */ int _starpu_dlatrs_(char *, char *, char *, char *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + doublereal *, integer *); + char normin[1]; + doublereal smlnum; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* Modified to call DLACN2 in place of DLACON, 5 Feb 03, SJH. */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DPOCON estimates the reciprocal of the condition number (in the */ +/* 1-norm) of a real symmetric positive definite matrix using the */ +/* Cholesky factorization A = U**T*U or A = L*L**T computed by DPOTRF. */ + +/* An estimate is obtained for norm(inv(A)), and the reciprocal of the */ +/* condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). */ + +/* Arguments */ +/* ========= */ + +/* UPLO (input) CHARACTER*1 */ +/* = 'U': Upper triangle of A is stored; */ +/* = 'L': Lower triangle of A is stored. */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ +/* The triangular factor U or L from the Cholesky factorization */ +/* A = U**T*U or A = L*L**T, as computed by DPOTRF. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,N). */ + +/* ANORM (input) DOUBLE PRECISION */ +/* The 1-norm (or infinity-norm) of the symmetric matrix A. */ + +/* RCOND (output) DOUBLE PRECISION */ +/* The reciprocal of the condition number of the matrix A, */ +/* computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an */ +/* estimate of the 1-norm of inv(A) computed in this routine. */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (3*N) */ + +/* IWORK (workspace) INTEGER array, dimension (N) */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --work; + --iwork; + + /* Function Body */ + *info = 0; + upper = _starpu_lsame_(uplo, "U"); + if (! upper && ! _starpu_lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } else if (*anorm < 0.) { + *info = -5; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DPOCON", &i__1); + return 0; + } + +/* Quick return if possible */ + + *rcond = 0.; + if (*n == 0) { + *rcond = 1.; + return 0; + } else if (*anorm == 0.) { + return 0; + } + + smlnum = _starpu_dlamch_("Safe minimum"); + +/* Estimate the 1-norm of inv(A). */ + + kase = 0; + *(unsigned char *)normin = 'N'; +L10: + _starpu_dlacn2_(n, &work[*n + 1], &work[1], &iwork[1], &ainvnm, &kase, isave); + if (kase != 0) { + if (upper) { + +/* Multiply by inv(U'). */ + + _starpu_dlatrs_("Upper", "Transpose", "Non-unit", normin, n, &a[a_offset], + lda, &work[1], &scalel, &work[(*n << 1) + 1], info); + *(unsigned char *)normin = 'Y'; + +/* Multiply by inv(U). */ + + _starpu_dlatrs_("Upper", "No transpose", "Non-unit", normin, n, &a[ + a_offset], lda, &work[1], &scaleu, &work[(*n << 1) + 1], + info); + } else { + +/* Multiply by inv(L). */ + + _starpu_dlatrs_("Lower", "No transpose", "Non-unit", normin, n, &a[ + a_offset], lda, &work[1], &scalel, &work[(*n << 1) + 1], + info); + *(unsigned char *)normin = 'Y'; + +/* Multiply by inv(L'). */ + + _starpu_dlatrs_("Lower", "Transpose", "Non-unit", normin, n, &a[a_offset], + lda, &work[1], &scaleu, &work[(*n << 1) + 1], info); + } + +/* Multiply by 1/SCALE if doing so will not cause overflow. */ + + scale = scalel * scaleu; + if (scale != 1.) { + ix = _starpu_idamax_(n, &work[1], &c__1); + if (scale < (d__1 = work[ix], abs(d__1)) * smlnum || scale == 0.) + { + goto L20; + } + _starpu_drscl_(n, &scale, &work[1], &c__1); + } + goto L10; + } + +/* Compute the estimate of the reciprocal condition number. */ + + if (ainvnm != 0.) { + *rcond = 1. / ainvnm / *anorm; + } + +L20: + return 0; + +/* End of DPOCON */ + +} /* _starpu_dpocon_ */ diff --git a/min-dgels/base/SRC/dpoequ.c b/min-dgels/base/SRC/dpoequ.c new file mode 100644 index 0000000..f108ad3 --- /dev/null +++ b/min-dgels/base/SRC/dpoequ.c @@ -0,0 +1,174 @@ +/* dpoequ.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dpoequ_(integer *n, doublereal *a, integer *lda, + doublereal *s, doublereal *scond, doublereal *amax, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1; + doublereal d__1, d__2; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + integer i__; + doublereal smin; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DPOEQU computes row and column scalings intended to equilibrate a */ +/* symmetric positive definite matrix A and reduce its condition number */ +/* (with respect to the two-norm). S contains the scale factors, */ +/* S(i) = 1/sqrt(A(i,i)), chosen so that the scaled matrix B with */ +/* elements B(i,j) = S(i)*A(i,j)*S(j) has ones on the diagonal. This */ +/* choice of S puts the condition number of B within a factor N of the */ +/* smallest possible condition number over all possible diagonal */ +/* scalings. */ + +/* Arguments */ +/* ========= */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ +/* The N-by-N symmetric positive definite matrix whose scaling */ +/* factors are to be computed. Only the diagonal elements of A */ +/* are referenced. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,N). */ + +/* S (output) DOUBLE PRECISION array, dimension (N) */ +/* If INFO = 0, S contains the scale factors for A. */ + +/* SCOND (output) DOUBLE PRECISION */ +/* If INFO = 0, S contains the ratio of the smallest S(i) to */ +/* the largest S(i). If SCOND >= 0.1 and AMAX is neither too */ +/* large nor too small, it is not worth scaling by S. */ + +/* AMAX (output) DOUBLE PRECISION */ +/* Absolute value of largest matrix element. If AMAX is very */ +/* close to overflow or very close to underflow, the matrix */ +/* should be scaled. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > 0: if INFO = i, the i-th diagonal element is nonpositive. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --s; + + /* Function Body */ + *info = 0; + if (*n < 0) { + *info = -1; + } else if (*lda < max(1,*n)) { + *info = -3; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DPOEQU", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + *scond = 1.; + *amax = 0.; + return 0; + } + +/* Find the minimum and maximum diagonal elements. */ + + s[1] = a[a_dim1 + 1]; + smin = s[1]; + *amax = s[1]; + i__1 = *n; + for (i__ = 2; i__ <= i__1; ++i__) { + s[i__] = a[i__ + i__ * a_dim1]; +/* Computing MIN */ + d__1 = smin, d__2 = s[i__]; + smin = min(d__1,d__2); +/* Computing MAX */ + d__1 = *amax, d__2 = s[i__]; + *amax = max(d__1,d__2); +/* L10: */ + } + + if (smin <= 0.) { + +/* Find the first non-positive diagonal element and return. */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + if (s[i__] <= 0.) { + *info = i__; + return 0; + } +/* L20: */ + } + } else { + +/* Set the scale factors to the reciprocals */ +/* of the diagonal elements. */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + s[i__] = 1. / sqrt(s[i__]); +/* L30: */ + } + +/* Compute SCOND = min(S(I)) / max(S(I)) */ + + *scond = sqrt(smin) / sqrt(*amax); + } + return 0; + +/* End of DPOEQU */ + +} /* _starpu_dpoequ_ */ diff --git a/min-dgels/base/SRC/dpoequb.c b/min-dgels/base/SRC/dpoequb.c new file mode 100644 index 0000000..9aba565 --- /dev/null +++ b/min-dgels/base/SRC/dpoequb.c @@ -0,0 +1,188 @@ +/* dpoequb.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dpoequb_(integer *n, doublereal *a, integer *lda, + doublereal *s, doublereal *scond, doublereal *amax, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + doublereal d__1, d__2; + + /* Builtin functions */ + double log(doublereal), pow_di(doublereal *, integer *), sqrt(doublereal); + + /* Local variables */ + integer i__; + doublereal tmp, base, smin; + extern doublereal _starpu_dlamch_(char *); + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + + +/* -- LAPACK routine (version 3.2) -- */ +/* -- Contributed by James Demmel, Deaglan Halligan, Yozo Hida and -- */ +/* -- Jason Riedy of Univ. of California Berkeley. -- */ +/* -- November 2008 -- */ + +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley and NAG Ltd. -- */ + +/* .. */ +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DPOEQU computes row and column scalings intended to equilibrate a */ +/* symmetric positive definite matrix A and reduce its condition number */ +/* (with respect to the two-norm). S contains the scale factors, */ +/* S(i) = 1/sqrt(A(i,i)), chosen so that the scaled matrix B with */ +/* elements B(i,j) = S(i)*A(i,j)*S(j) has ones on the diagonal. This */ +/* choice of S puts the condition number of B within a factor N of the */ +/* smallest possible condition number over all possible diagonal */ +/* scalings. */ + +/* Arguments */ +/* ========= */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ +/* The N-by-N symmetric positive definite matrix whose scaling */ +/* factors are to be computed. Only the diagonal elements of A */ +/* are referenced. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,N). */ + +/* S (output) DOUBLE PRECISION array, dimension (N) */ +/* If INFO = 0, S contains the scale factors for A. */ + +/* SCOND (output) DOUBLE PRECISION */ +/* If INFO = 0, S contains the ratio of the smallest S(i) to */ +/* the largest S(i). If SCOND >= 0.1 and AMAX is neither too */ +/* large nor too small, it is not worth scaling by S. */ + +/* AMAX (output) DOUBLE PRECISION */ +/* Absolute value of largest matrix element. If AMAX is very */ +/* close to overflow or very close to underflow, the matrix */ +/* should be scaled. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > 0: if INFO = i, the i-th diagonal element is nonpositive. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + +/* Positive definite only performs 1 pass of equilibration. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --s; + + /* Function Body */ + *info = 0; + if (*n < 0) { + *info = -1; + } else if (*lda < max(1,*n)) { + *info = -3; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DPOEQUB", &i__1); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0) { + *scond = 1.; + *amax = 0.; + return 0; + } + base = _starpu_dlamch_("B"); + tmp = -.5 / log(base); + +/* Find the minimum and maximum diagonal elements. */ + + s[1] = a[a_dim1 + 1]; + smin = s[1]; + *amax = s[1]; + i__1 = *n; + for (i__ = 2; i__ <= i__1; ++i__) { + s[i__] = a[i__ + i__ * a_dim1]; +/* Computing MIN */ + d__1 = smin, d__2 = s[i__]; + smin = min(d__1,d__2); +/* Computing MAX */ + d__1 = *amax, d__2 = s[i__]; + *amax = max(d__1,d__2); +/* L10: */ + } + + if (smin <= 0.) { + +/* Find the first non-positive diagonal element and return. */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + if (s[i__] <= 0.) { + *info = i__; + return 0; + } +/* L20: */ + } + } else { + +/* Set the scale factors to the reciprocals */ +/* of the diagonal elements. */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = (integer) (tmp * log(s[i__])); + s[i__] = pow_di(&base, &i__2); +/* L30: */ + } + +/* Compute SCOND = min(S(I)) / max(S(I)). */ + + *scond = sqrt(smin) / sqrt(*amax); + } + + return 0; + +/* End of DPOEQUB */ + +} /* _starpu_dpoequb_ */ diff --git a/min-dgels/base/SRC/dporfs.c b/min-dgels/base/SRC/dporfs.c new file mode 100644 index 0000000..074f8b4 --- /dev/null +++ b/min-dgels/base/SRC/dporfs.c @@ -0,0 +1,422 @@ +/* dporfs.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static doublereal c_b12 = -1.; +static doublereal c_b14 = 1.; + +/* Subroutine */ int _starpu_dporfs_(char *uplo, integer *n, integer *nrhs, + doublereal *a, integer *lda, doublereal *af, integer *ldaf, + doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal * + ferr, doublereal *berr, doublereal *work, integer *iwork, integer * + info) +{ + /* System generated locals */ + integer a_dim1, a_offset, af_dim1, af_offset, b_dim1, b_offset, x_dim1, + x_offset, i__1, i__2, i__3; + doublereal d__1, d__2, d__3; + + /* Local variables */ + integer i__, j, k; + doublereal s, xk; + integer nz; + doublereal eps; + integer kase; + doublereal safe1, safe2; + extern logical _starpu_lsame_(char *, char *); + integer isave[3]; + extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *), _starpu_daxpy_(integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *); + integer count; + logical upper; + extern /* Subroutine */ int _starpu_dsymv_(char *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *), _starpu_dlacn2_(integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, integer *); + extern doublereal _starpu_dlamch_(char *); + doublereal safmin; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *), _starpu_dpotrs_( + char *, integer *, integer *, doublereal *, integer *, doublereal + *, integer *, integer *); + doublereal lstres; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* Modified to call DLACN2 in place of DLACON, 5 Feb 03, SJH. */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DPORFS improves the computed solution to a system of linear */ +/* equations when the coefficient matrix is symmetric positive definite, */ +/* and provides error bounds and backward error estimates for the */ +/* solution. */ + +/* Arguments */ +/* ========= */ + +/* UPLO (input) CHARACTER*1 */ +/* = 'U': Upper triangle of A is stored; */ +/* = 'L': Lower triangle of A is stored. */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* NRHS (input) INTEGER */ +/* The number of right hand sides, i.e., the number of columns */ +/* of the matrices B and X. NRHS >= 0. */ + +/* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ +/* The symmetric matrix A. If UPLO = 'U', the leading N-by-N */ +/* upper triangular part of A contains the upper triangular part */ +/* of the matrix A, and the strictly lower triangular part of A */ +/* is not referenced. If UPLO = 'L', the leading N-by-N lower */ +/* triangular part of A contains the lower triangular part of */ +/* the matrix A, and the strictly upper triangular part of A is */ +/* not referenced. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,N). */ + +/* AF (input) DOUBLE PRECISION array, dimension (LDAF,N) */ +/* The triangular factor U or L from the Cholesky factorization */ +/* A = U**T*U or A = L*L**T, as computed by DPOTRF. */ + +/* LDAF (input) INTEGER */ +/* The leading dimension of the array AF. LDAF >= max(1,N). */ + +/* B (input) DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* The right hand side matrix B. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the array B. LDB >= max(1,N). */ + +/* X (input/output) DOUBLE PRECISION array, dimension (LDX,NRHS) */ +/* On entry, the solution matrix X, as computed by DPOTRS. */ +/* On exit, the improved solution matrix X. */ + +/* LDX (input) INTEGER */ +/* The leading dimension of the array X. LDX >= max(1,N). */ + +/* FERR (output) DOUBLE PRECISION array, dimension (NRHS) */ +/* The estimated forward error bound for each solution vector */ +/* X(j) (the j-th column of the solution matrix X). */ +/* If XTRUE is the true solution corresponding to X(j), FERR(j) */ +/* is an estimated upper bound for the magnitude of the largest */ +/* element in (X(j) - XTRUE) divided by the magnitude of the */ +/* largest element in X(j). The estimate is as reliable as */ +/* the estimate for RCOND, and is almost always a slight */ +/* overestimate of the true error. */ + +/* BERR (output) DOUBLE PRECISION array, dimension (NRHS) */ +/* The componentwise relative backward error of each solution */ +/* vector X(j) (i.e., the smallest relative change in */ +/* any element of A or B that makes X(j) an exact solution). */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (3*N) */ + +/* IWORK (workspace) INTEGER array, dimension (N) */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ + +/* Internal Parameters */ +/* =================== */ + +/* ITMAX is the maximum number of steps of iterative refinement. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + af_dim1 = *ldaf; + af_offset = 1 + af_dim1; + af -= af_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + x_dim1 = *ldx; + x_offset = 1 + x_dim1; + x -= x_offset; + --ferr; + --berr; + --work; + --iwork; + + /* Function Body */ + *info = 0; + upper = _starpu_lsame_(uplo, "U"); + if (! upper && ! _starpu_lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } else if (*ldaf < max(1,*n)) { + *info = -7; + } else if (*ldb < max(1,*n)) { + *info = -9; + } else if (*ldx < max(1,*n)) { + *info = -11; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DPORFS", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0 || *nrhs == 0) { + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + ferr[j] = 0.; + berr[j] = 0.; +/* L10: */ + } + return 0; + } + +/* NZ = maximum number of nonzero elements in each row of A, plus 1 */ + + nz = *n + 1; + eps = _starpu_dlamch_("Epsilon"); + safmin = _starpu_dlamch_("Safe minimum"); + safe1 = nz * safmin; + safe2 = safe1 / eps; + +/* Do for each right hand side */ + + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + + count = 1; + lstres = 3.; +L20: + +/* Loop until stopping criterion is satisfied. */ + +/* Compute residual R = B - A * X */ + + _starpu_dcopy_(n, &b[j * b_dim1 + 1], &c__1, &work[*n + 1], &c__1); + _starpu_dsymv_(uplo, n, &c_b12, &a[a_offset], lda, &x[j * x_dim1 + 1], &c__1, + &c_b14, &work[*n + 1], &c__1); + +/* Compute componentwise relative backward error from formula */ + +/* max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) ) */ + +/* where abs(Z) is the componentwise absolute value of the matrix */ +/* or vector Z. If the i-th component of the denominator is less */ +/* than SAFE2, then SAFE1 is added to the i-th components of the */ +/* numerator and denominator before dividing. */ + + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + work[i__] = (d__1 = b[i__ + j * b_dim1], abs(d__1)); +/* L30: */ + } + +/* Compute abs(A)*abs(X) + abs(B). */ + + if (upper) { + i__2 = *n; + for (k = 1; k <= i__2; ++k) { + s = 0.; + xk = (d__1 = x[k + j * x_dim1], abs(d__1)); + i__3 = k - 1; + for (i__ = 1; i__ <= i__3; ++i__) { + work[i__] += (d__1 = a[i__ + k * a_dim1], abs(d__1)) * xk; + s += (d__1 = a[i__ + k * a_dim1], abs(d__1)) * (d__2 = x[ + i__ + j * x_dim1], abs(d__2)); +/* L40: */ + } + work[k] = work[k] + (d__1 = a[k + k * a_dim1], abs(d__1)) * + xk + s; +/* L50: */ + } + } else { + i__2 = *n; + for (k = 1; k <= i__2; ++k) { + s = 0.; + xk = (d__1 = x[k + j * x_dim1], abs(d__1)); + work[k] += (d__1 = a[k + k * a_dim1], abs(d__1)) * xk; + i__3 = *n; + for (i__ = k + 1; i__ <= i__3; ++i__) { + work[i__] += (d__1 = a[i__ + k * a_dim1], abs(d__1)) * xk; + s += (d__1 = a[i__ + k * a_dim1], abs(d__1)) * (d__2 = x[ + i__ + j * x_dim1], abs(d__2)); +/* L60: */ + } + work[k] += s; +/* L70: */ + } + } + s = 0.; + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + if (work[i__] > safe2) { +/* Computing MAX */ + d__2 = s, d__3 = (d__1 = work[*n + i__], abs(d__1)) / work[ + i__]; + s = max(d__2,d__3); + } else { +/* Computing MAX */ + d__2 = s, d__3 = ((d__1 = work[*n + i__], abs(d__1)) + safe1) + / (work[i__] + safe1); + s = max(d__2,d__3); + } +/* L80: */ + } + berr[j] = s; + +/* Test stopping criterion. Continue iterating if */ +/* 1) The residual BERR(J) is larger than machine epsilon, and */ +/* 2) BERR(J) decreased by at least a factor of 2 during the */ +/* last iteration, and */ +/* 3) At most ITMAX iterations tried. */ + + if (berr[j] > eps && berr[j] * 2. <= lstres && count <= 5) { + +/* Update solution and try again. */ + + _starpu_dpotrs_(uplo, n, &c__1, &af[af_offset], ldaf, &work[*n + 1], n, + info); + _starpu_daxpy_(n, &c_b14, &work[*n + 1], &c__1, &x[j * x_dim1 + 1], &c__1) + ; + lstres = berr[j]; + ++count; + goto L20; + } + +/* Bound error from formula */ + +/* norm(X - XTRUE) / norm(X) .le. FERR = */ +/* norm( abs(inv(A))* */ +/* ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X) */ + +/* where */ +/* norm(Z) is the magnitude of the largest component of Z */ +/* inv(A) is the inverse of A */ +/* abs(Z) is the componentwise absolute value of the matrix or */ +/* vector Z */ +/* NZ is the maximum number of nonzeros in any row of A, plus 1 */ +/* EPS is machine epsilon */ + +/* The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B)) */ +/* is incremented by SAFE1 if the i-th component of */ +/* abs(A)*abs(X) + abs(B) is less than SAFE2. */ + +/* Use DLACN2 to estimate the infinity-norm of the matrix */ +/* inv(A) * diag(W), */ +/* where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) */ + + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + if (work[i__] > safe2) { + work[i__] = (d__1 = work[*n + i__], abs(d__1)) + nz * eps * + work[i__]; + } else { + work[i__] = (d__1 = work[*n + i__], abs(d__1)) + nz * eps * + work[i__] + safe1; + } +/* L90: */ + } + + kase = 0; +L100: + _starpu_dlacn2_(n, &work[(*n << 1) + 1], &work[*n + 1], &iwork[1], &ferr[j], & + kase, isave); + if (kase != 0) { + if (kase == 1) { + +/* Multiply by diag(W)*inv(A'). */ + + _starpu_dpotrs_(uplo, n, &c__1, &af[af_offset], ldaf, &work[*n + 1], + n, info); + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + work[*n + i__] = work[i__] * work[*n + i__]; +/* L110: */ + } + } else if (kase == 2) { + +/* Multiply by inv(A)*diag(W). */ + + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + work[*n + i__] = work[i__] * work[*n + i__]; +/* L120: */ + } + _starpu_dpotrs_(uplo, n, &c__1, &af[af_offset], ldaf, &work[*n + 1], + n, info); + } + goto L100; + } + +/* Normalize error. */ + + lstres = 0.; + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { +/* Computing MAX */ + d__2 = lstres, d__3 = (d__1 = x[i__ + j * x_dim1], abs(d__1)); + lstres = max(d__2,d__3); +/* L130: */ + } + if (lstres != 0.) { + ferr[j] /= lstres; + } + +/* L140: */ + } + + return 0; + +/* End of DPORFS */ + +} /* _starpu_dporfs_ */ diff --git a/min-dgels/base/SRC/dporfsx.c b/min-dgels/base/SRC/dporfsx.c new file mode 100644 index 0000000..ee40dca --- /dev/null +++ b/min-dgels/base/SRC/dporfsx.c @@ -0,0 +1,622 @@ +/* dporfsx.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c_n1 = -1; +static integer c__0 = 0; +static integer c__1 = 1; + +/* Subroutine */ int _starpu_dporfsx_(char *uplo, char *equed, integer *n, integer * + nrhs, doublereal *a, integer *lda, doublereal *af, integer *ldaf, + doublereal *s, doublereal *b, integer *ldb, doublereal *x, integer * + ldx, doublereal *rcond, doublereal *berr, integer *n_err_bnds__, + doublereal *err_bnds_norm__, doublereal *err_bnds_comp__, integer * + nparams, doublereal *params, doublereal *work, integer *iwork, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, af_dim1, af_offset, b_dim1, b_offset, x_dim1, + x_offset, err_bnds_norm_dim1, err_bnds_norm_offset, + err_bnds_comp_dim1, err_bnds_comp_offset, i__1; + doublereal d__1, d__2; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + doublereal illrcond_thresh__, unstable_thresh__, err_lbnd__; + integer ref_type__, j; + doublereal rcond_tmp__; + integer prec_type__; + extern doublereal _starpu_dla_porcond__(char *, integer *, doublereal *, integer * + , doublereal *, integer *, integer *, doublereal *, integer *, + doublereal *, integer *, ftnlen); + doublereal cwise_wrong__; + extern /* Subroutine */ int _starpu_dla_porfsx_extended__(integer *, char *, + integer *, integer *, doublereal *, integer *, doublereal *, + integer *, logical *, doublereal *, doublereal *, integer *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, integer *, doublereal *, doublereal *, + logical *, integer *, ftnlen); + char norm[1]; + logical ignore_cwise__; + extern logical _starpu_lsame_(char *, char *); + doublereal anorm; + logical rcequ; + extern doublereal _starpu_dlamch_(char *); + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *), _starpu_dpocon_( + char *, integer *, doublereal *, integer *, doublereal *, + doublereal *, doublereal *, integer *, integer *); + extern doublereal _starpu_dlansy_(char *, char *, integer *, doublereal *, + integer *, doublereal *); + extern integer _starpu_ilaprec_(char *); + integer ithresh, n_norms__; + doublereal rthresh; + + +/* -- LAPACK routine (version 3.2.1) -- */ +/* -- Contributed by James Demmel, Deaglan Halligan, Yozo Hida and -- */ +/* -- Jason Riedy of Univ. of California Berkeley. -- */ +/* -- April 2009 -- */ + +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley and NAG Ltd. -- */ + +/* .. */ +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DPORFSX improves the computed solution to a system of linear */ +/* equations when the coefficient matrix is symmetric positive */ +/* definite, and provides error bounds and backward error estimates */ +/* for the solution. In addition to normwise error bound, the code */ +/* provides maximum componentwise error bound if possible. See */ +/* comments for ERR_BNDS_NORM and ERR_BNDS_COMP for details of the */ +/* error bounds. */ + +/* The original system of linear equations may have been equilibrated */ +/* before calling this routine, as described by arguments EQUED and S */ +/* below. In this case, the solution and error bounds returned are */ +/* for the original unequilibrated system. */ + +/* Arguments */ +/* ========= */ + +/* Some optional parameters are bundled in the PARAMS array. These */ +/* settings determine how refinement is performed, but often the */ +/* defaults are acceptable. If the defaults are acceptable, users */ +/* can pass NPARAMS = 0 which prevents the source code from accessing */ +/* the PARAMS argument. */ + +/* UPLO (input) CHARACTER*1 */ +/* = 'U': Upper triangle of A is stored; */ +/* = 'L': Lower triangle of A is stored. */ + +/* EQUED (input) CHARACTER*1 */ +/* Specifies the form of equilibration that was done to A */ +/* before calling this routine. This is needed to compute */ +/* the solution and error bounds correctly. */ +/* = 'N': No equilibration */ +/* = 'Y': Both row and column equilibration, i.e., A has been */ +/* replaced by diag(S) * A * diag(S). */ +/* The right hand side B has been changed accordingly. */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* NRHS (input) INTEGER */ +/* The number of right hand sides, i.e., the number of columns */ +/* of the matrices B and X. NRHS >= 0. */ + +/* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ +/* The symmetric matrix A. If UPLO = 'U', the leading N-by-N */ +/* upper triangular part of A contains the upper triangular part */ +/* of the matrix A, and the strictly lower triangular part of A */ +/* is not referenced. If UPLO = 'L', the leading N-by-N lower */ +/* triangular part of A contains the lower triangular part of */ +/* the matrix A, and the strictly upper triangular part of A is */ +/* not referenced. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,N). */ + +/* AF (input) DOUBLE PRECISION array, dimension (LDAF,N) */ +/* The triangular factor U or L from the Cholesky factorization */ +/* A = U**T*U or A = L*L**T, as computed by DPOTRF. */ + +/* LDAF (input) INTEGER */ +/* The leading dimension of the array AF. LDAF >= max(1,N). */ + +/* S (input or output) DOUBLE PRECISION array, dimension (N) */ +/* The row scale factors for A. If EQUED = 'Y', A is multiplied on */ +/* the left and right by diag(S). S is an input argument if FACT = */ +/* 'F'; otherwise, S is an output argument. If FACT = 'F' and EQUED */ +/* = 'Y', each element of S must be positive. If S is output, each */ +/* element of S is a power of the radix. If S is input, each element */ +/* of S should be a power of the radix to ensure a reliable solution */ +/* and error estimates. Scaling by powers of the radix does not cause */ +/* rounding errors unless the result underflows or overflows. */ +/* Rounding errors during scaling lead to refining with a matrix that */ +/* is not equivalent to the input matrix, producing error estimates */ +/* that may not be reliable. */ + +/* B (input) DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* The right hand side matrix B. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the array B. LDB >= max(1,N). */ + +/* X (input/output) DOUBLE PRECISION array, dimension (LDX,NRHS) */ +/* On entry, the solution matrix X, as computed by DGETRS. */ +/* On exit, the improved solution matrix X. */ + +/* LDX (input) INTEGER */ +/* The leading dimension of the array X. LDX >= max(1,N). */ + +/* RCOND (output) DOUBLE PRECISION */ +/* Reciprocal scaled condition number. This is an estimate of the */ +/* reciprocal Skeel condition number of the matrix A after */ +/* equilibration (if done). If this is less than the machine */ +/* precision (in particular, if it is zero), the matrix is singular */ +/* to working precision. Note that the error may still be small even */ +/* if this number is very small and the matrix appears ill- */ +/* conditioned. */ + +/* BERR (output) DOUBLE PRECISION array, dimension (NRHS) */ +/* Componentwise relative backward error. This is the */ +/* componentwise relative backward error of each solution vector X(j) */ +/* (i.e., the smallest relative change in any element of A or B that */ +/* makes X(j) an exact solution). */ + +/* N_ERR_BNDS (input) INTEGER */ +/* Number of error bounds to return for each right hand side */ +/* and each type (normwise or componentwise). See ERR_BNDS_NORM and */ +/* ERR_BNDS_COMP below. */ + +/* ERR_BNDS_NORM (output) DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) */ +/* For each right-hand side, this array contains information about */ +/* various error bounds and condition numbers corresponding to the */ +/* normwise relative error, which is defined as follows: */ + +/* Normwise relative error in the ith solution vector: */ +/* max_j (abs(XTRUE(j,i) - X(j,i))) */ +/* ------------------------------ */ +/* max_j abs(X(j,i)) */ + +/* The array is indexed by the type of error information as described */ +/* below. There currently are up to three pieces of information */ +/* returned. */ + +/* The first index in ERR_BNDS_NORM(i,:) corresponds to the ith */ +/* right-hand side. */ + +/* The second index in ERR_BNDS_NORM(:,err) contains the following */ +/* three fields: */ +/* err = 1 "Trust/don't trust" boolean. Trust the answer if the */ +/* reciprocal condition number is less than the threshold */ +/* sqrt(n) * dlamch('Epsilon'). */ + +/* err = 2 "Guaranteed" error bound: The estimated forward error, */ +/* almost certainly within a factor of 10 of the true error */ +/* so long as the next entry is greater than the threshold */ +/* sqrt(n) * dlamch('Epsilon'). This error bound should only */ +/* be trusted if the previous boolean is true. */ + +/* err = 3 Reciprocal condition number: Estimated normwise */ +/* reciprocal condition number. Compared with the threshold */ +/* sqrt(n) * dlamch('Epsilon') to determine if the error */ +/* estimate is "guaranteed". These reciprocal condition */ +/* numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some */ +/* appropriately scaled matrix Z. */ +/* Let Z = S*A, where S scales each row by a power of the */ +/* radix so all absolute row sums of Z are approximately 1. */ + +/* See Lapack Working Note 165 for further details and extra */ +/* cautions. */ + +/* ERR_BNDS_COMP (output) DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) */ +/* For each right-hand side, this array contains information about */ +/* various error bounds and condition numbers corresponding to the */ +/* componentwise relative error, which is defined as follows: */ + +/* Componentwise relative error in the ith solution vector: */ +/* abs(XTRUE(j,i) - X(j,i)) */ +/* max_j ---------------------- */ +/* abs(X(j,i)) */ + +/* The array is indexed by the right-hand side i (on which the */ +/* componentwise relative error depends), and the type of error */ +/* information as described below. There currently are up to three */ +/* pieces of information returned for each right-hand side. If */ +/* componentwise accuracy is not requested (PARAMS(3) = 0.0), then */ +/* ERR_BNDS_COMP is not accessed. If N_ERR_BNDS .LT. 3, then at most */ +/* the first (:,N_ERR_BNDS) entries are returned. */ + +/* The first index in ERR_BNDS_COMP(i,:) corresponds to the ith */ +/* right-hand side. */ + +/* The second index in ERR_BNDS_COMP(:,err) contains the following */ +/* three fields: */ +/* err = 1 "Trust/don't trust" boolean. Trust the answer if the */ +/* reciprocal condition number is less than the threshold */ +/* sqrt(n) * dlamch('Epsilon'). */ + +/* err = 2 "Guaranteed" error bound: The estimated forward error, */ +/* almost certainly within a factor of 10 of the true error */ +/* so long as the next entry is greater than the threshold */ +/* sqrt(n) * dlamch('Epsilon'). This error bound should only */ +/* be trusted if the previous boolean is true. */ + +/* err = 3 Reciprocal condition number: Estimated componentwise */ +/* reciprocal condition number. Compared with the threshold */ +/* sqrt(n) * dlamch('Epsilon') to determine if the error */ +/* estimate is "guaranteed". These reciprocal condition */ +/* numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some */ +/* appropriately scaled matrix Z. */ +/* Let Z = S*(A*diag(x)), where x is the solution for the */ +/* current right-hand side and S scales each row of */ +/* A*diag(x) by a power of the radix so all absolute row */ +/* sums of Z are approximately 1. */ + +/* See Lapack Working Note 165 for further details and extra */ +/* cautions. */ + +/* NPARAMS (input) INTEGER */ +/* Specifies the number of parameters set in PARAMS. If .LE. 0, the */ +/* PARAMS array is never referenced and default values are used. */ + +/* PARAMS (input / output) DOUBLE PRECISION array, dimension NPARAMS */ +/* Specifies algorithm parameters. If an entry is .LT. 0.0, then */ +/* that entry will be filled with default value used for that */ +/* parameter. Only positions up to NPARAMS are accessed; defaults */ +/* are used for higher-numbered parameters. */ + +/* PARAMS(LA_LINRX_ITREF_I = 1) : Whether to perform iterative */ +/* refinement or not. */ +/* Default: 1.0D+0 */ +/* = 0.0 : No refinement is performed, and no error bounds are */ +/* computed. */ +/* = 1.0 : Use the double-precision refinement algorithm, */ +/* possibly with doubled-single computations if the */ +/* compilation environment does not support DOUBLE */ +/* PRECISION. */ +/* (other values are reserved for future use) */ + +/* PARAMS(LA_LINRX_ITHRESH_I = 2) : Maximum number of residual */ +/* computations allowed for refinement. */ +/* Default: 10 */ +/* Aggressive: Set to 100 to permit convergence using approximate */ +/* factorizations or factorizations other than LU. If */ +/* the factorization uses a technique other than */ +/* Gaussian elimination, the guarantees in */ +/* err_bnds_norm and err_bnds_comp may no longer be */ +/* trustworthy. */ + +/* PARAMS(LA_LINRX_CWISE_I = 3) : Flag determining if the code */ +/* will attempt to find a solution with small componentwise */ +/* relative error in the double-precision algorithm. Positive */ +/* is true, 0.0 is false. */ +/* Default: 1.0 (attempt componentwise convergence) */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (4*N) */ + +/* IWORK (workspace) INTEGER array, dimension (N) */ + +/* INFO (output) INTEGER */ +/* = 0: Successful exit. The solution to every right-hand side is */ +/* guaranteed. */ +/* < 0: If INFO = -i, the i-th argument had an illegal value */ +/* > 0 and <= N: U(INFO,INFO) is exactly zero. The factorization */ +/* has been completed, but the factor U is exactly singular, so */ +/* the solution and error bounds could not be computed. RCOND = 0 */ +/* is returned. */ +/* = N+J: The solution corresponding to the Jth right-hand side is */ +/* not guaranteed. The solutions corresponding to other right- */ +/* hand sides K with K > J may not be guaranteed as well, but */ +/* only the first such right-hand side is reported. If a small */ +/* componentwise error is not requested (PARAMS(3) = 0.0) then */ +/* the Jth right-hand side is the first with a normwise error */ +/* bound that is not guaranteed (the smallest J such */ +/* that ERR_BNDS_NORM(J,1) = 0.0). By default (PARAMS(3) = 1.0) */ +/* the Jth right-hand side is the first with either a normwise or */ +/* componentwise error bound that is not guaranteed (the smallest */ +/* J such that either ERR_BNDS_NORM(J,1) = 0.0 or */ +/* ERR_BNDS_COMP(J,1) = 0.0). See the definition of */ +/* ERR_BNDS_NORM(:,1) and ERR_BNDS_COMP(:,1). To get information */ +/* about all of the right-hand sides check ERR_BNDS_NORM or */ +/* ERR_BNDS_COMP. */ + +/* ================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Check the input parameters. */ + + /* Parameter adjustments */ + err_bnds_comp_dim1 = *nrhs; + err_bnds_comp_offset = 1 + err_bnds_comp_dim1; + err_bnds_comp__ -= err_bnds_comp_offset; + err_bnds_norm_dim1 = *nrhs; + err_bnds_norm_offset = 1 + err_bnds_norm_dim1; + err_bnds_norm__ -= err_bnds_norm_offset; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + af_dim1 = *ldaf; + af_offset = 1 + af_dim1; + af -= af_offset; + --s; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + x_dim1 = *ldx; + x_offset = 1 + x_dim1; + x -= x_offset; + --berr; + --params; + --work; + --iwork; + + /* Function Body */ + *info = 0; + ref_type__ = 1; + if (*nparams >= 1) { + if (params[1] < 0.) { + params[1] = 1.; + } else { + ref_type__ = (integer) params[1]; + } + } + +/* Set default parameters. */ + + illrcond_thresh__ = (doublereal) (*n) * _starpu_dlamch_("Epsilon"); + ithresh = 10; + rthresh = .5; + unstable_thresh__ = .25; + ignore_cwise__ = FALSE_; + + if (*nparams >= 2) { + if (params[2] < 0.) { + params[2] = (doublereal) ithresh; + } else { + ithresh = (integer) params[2]; + } + } + if (*nparams >= 3) { + if (params[3] < 0.) { + if (ignore_cwise__) { + params[3] = 0.; + } else { + params[3] = 1.; + } + } else { + ignore_cwise__ = params[3] == 0.; + } + } + if (ref_type__ == 0 || *n_err_bnds__ == 0) { + n_norms__ = 0; + } else if (ignore_cwise__) { + n_norms__ = 1; + } else { + n_norms__ = 2; + } + + rcequ = _starpu_lsame_(equed, "Y"); + +/* Test input parameters. */ + + if (! _starpu_lsame_(uplo, "U") && ! _starpu_lsame_(uplo, "L")) { + *info = -1; + } else if (! rcequ && ! _starpu_lsame_(equed, "N")) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*nrhs < 0) { + *info = -4; + } else if (*lda < max(1,*n)) { + *info = -6; + } else if (*ldaf < max(1,*n)) { + *info = -8; + } else if (*ldb < max(1,*n)) { + *info = -11; + } else if (*ldx < max(1,*n)) { + *info = -13; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DPORFSX", &i__1); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0 || *nrhs == 0) { + *rcond = 1.; + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + berr[j] = 0.; + if (*n_err_bnds__ >= 1) { + err_bnds_norm__[j + err_bnds_norm_dim1] = 1.; + err_bnds_comp__[j + err_bnds_comp_dim1] = 1.; + } else if (*n_err_bnds__ >= 2) { + err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] = 0.; + err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] = 0.; + } else if (*n_err_bnds__ >= 3) { + err_bnds_norm__[j + err_bnds_norm_dim1 * 3] = 1.; + err_bnds_comp__[j + err_bnds_comp_dim1 * 3] = 1.; + } + } + return 0; + } + +/* Default to failure. */ + + *rcond = 0.; + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + berr[j] = 1.; + if (*n_err_bnds__ >= 1) { + err_bnds_norm__[j + err_bnds_norm_dim1] = 1.; + err_bnds_comp__[j + err_bnds_comp_dim1] = 1.; + } else if (*n_err_bnds__ >= 2) { + err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] = 1.; + err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] = 1.; + } else if (*n_err_bnds__ >= 3) { + err_bnds_norm__[j + err_bnds_norm_dim1 * 3] = 0.; + err_bnds_comp__[j + err_bnds_comp_dim1 * 3] = 0.; + } + } + +/* Compute the norm of A and the reciprocal of the condition */ +/* number of A. */ + + *(unsigned char *)norm = 'I'; + anorm = _starpu_dlansy_(norm, uplo, n, &a[a_offset], lda, &work[1]); + _starpu_dpocon_(uplo, n, &af[af_offset], ldaf, &anorm, rcond, &work[1], &iwork[1], + info); + +/* Perform refinement on each right-hand side */ + + if (ref_type__ != 0) { + prec_type__ = _starpu_ilaprec_("E"); + _starpu_dla_porfsx_extended__(&prec_type__, uplo, n, nrhs, &a[a_offset], lda, + &af[af_offset], ldaf, &rcequ, &s[1], &b[b_offset], ldb, &x[ + x_offset], ldx, &berr[1], &n_norms__, &err_bnds_norm__[ + err_bnds_norm_offset], &err_bnds_comp__[err_bnds_comp_offset], + &work[*n + 1], &work[1], &work[(*n << 1) + 1], &work[1], + rcond, &ithresh, &rthresh, &unstable_thresh__, & + ignore_cwise__, info, (ftnlen)1); + } +/* Computing MAX */ + d__1 = 10., d__2 = sqrt((doublereal) (*n)); + err_lbnd__ = max(d__1,d__2) * _starpu_dlamch_("Epsilon"); + if (*n_err_bnds__ >= 1 && n_norms__ >= 1) { + +/* Compute scaled normwise condition number cond(A*C). */ + + if (rcequ) { + rcond_tmp__ = _starpu_dla_porcond__(uplo, n, &a[a_offset], lda, &af[ + af_offset], ldaf, &c_n1, &s[1], info, &work[1], &iwork[1], + (ftnlen)1); + } else { + rcond_tmp__ = _starpu_dla_porcond__(uplo, n, &a[a_offset], lda, &af[ + af_offset], ldaf, &c__0, &s[1], info, &work[1], &iwork[1], + (ftnlen)1); + } + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + +/* Cap the error at 1.0. */ + + if (*n_err_bnds__ >= 2 && err_bnds_norm__[j + (err_bnds_norm_dim1 + << 1)] > 1.) { + err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] = 1.; + } + +/* Threshold the error (see LAWN). */ + + if (rcond_tmp__ < illrcond_thresh__) { + err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] = 1.; + err_bnds_norm__[j + err_bnds_norm_dim1] = 0.; + if (*info <= *n) { + *info = *n + j; + } + } else if (err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] < + err_lbnd__) { + err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] = err_lbnd__; + err_bnds_norm__[j + err_bnds_norm_dim1] = 1.; + } + +/* Save the condition number. */ + + if (*n_err_bnds__ >= 3) { + err_bnds_norm__[j + err_bnds_norm_dim1 * 3] = rcond_tmp__; + } + } + } + if (*n_err_bnds__ >= 1 && n_norms__ >= 2) { + +/* Compute componentwise condition number cond(A*diag(Y(:,J))) for */ +/* each right-hand side using the current solution as an estimate of */ +/* the true solution. If the componentwise error estimate is too */ +/* large, then the solution is a lousy estimate of truth and the */ +/* estimated RCOND may be too optimistic. To avoid misleading users, */ +/* the inverse condition number is set to 0.0 when the estimated */ +/* cwise error is at least CWISE_WRONG. */ + + cwise_wrong__ = sqrt(_starpu_dlamch_("Epsilon")); + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + if (err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] < + cwise_wrong__) { + rcond_tmp__ = _starpu_dla_porcond__(uplo, n, &a[a_offset], lda, &af[ + af_offset], ldaf, &c__1, &x[j * x_dim1 + 1], info, & + work[1], &iwork[1], (ftnlen)1); + } else { + rcond_tmp__ = 0.; + } + +/* Cap the error at 1.0. */ + + if (*n_err_bnds__ >= 2 && err_bnds_comp__[j + (err_bnds_comp_dim1 + << 1)] > 1.) { + err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] = 1.; + } + +/* Threshold the error (see LAWN). */ + + if (rcond_tmp__ < illrcond_thresh__) { + err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] = 1.; + err_bnds_comp__[j + err_bnds_comp_dim1] = 0.; + if (params[3] == 1. && *info < *n + j) { + *info = *n + j; + } + } else if (err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] < + err_lbnd__) { + err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] = err_lbnd__; + err_bnds_comp__[j + err_bnds_comp_dim1] = 1.; + } + +/* Save the condition number. */ + + if (*n_err_bnds__ >= 3) { + err_bnds_comp__[j + err_bnds_comp_dim1 * 3] = rcond_tmp__; + } + } + } + + return 0; + +/* End of DPORFSX */ + +} /* _starpu_dporfsx_ */ diff --git a/min-dgels/base/SRC/dposv.c b/min-dgels/base/SRC/dposv.c new file mode 100644 index 0000000..3db84b0 --- /dev/null +++ b/min-dgels/base/SRC/dposv.c @@ -0,0 +1,151 @@ +/* dposv.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dposv_(char *uplo, integer *n, integer *nrhs, doublereal + *a, integer *lda, doublereal *b, integer *ldb, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1; + + /* Local variables */ + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *), _starpu_dpotrf_( + char *, integer *, doublereal *, integer *, integer *), + _starpu_dpotrs_(char *, integer *, integer *, doublereal *, integer *, + doublereal *, integer *, integer *); + + +/* -- LAPACK driver routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DPOSV computes the solution to a real system of linear equations */ +/* A * X = B, */ +/* where A is an N-by-N symmetric positive definite matrix and X and B */ +/* are N-by-NRHS matrices. */ + +/* The Cholesky decomposition is used to factor A as */ +/* A = U**T* U, if UPLO = 'U', or */ +/* A = L * L**T, if UPLO = 'L', */ +/* where U is an upper triangular matrix and L is a lower triangular */ +/* matrix. The factored form of A is then used to solve the system of */ +/* equations A * X = B. */ + +/* Arguments */ +/* ========= */ + +/* UPLO (input) CHARACTER*1 */ +/* = 'U': Upper triangle of A is stored; */ +/* = 'L': Lower triangle of A is stored. */ + +/* N (input) INTEGER */ +/* The number of linear equations, i.e., the order of the */ +/* matrix A. N >= 0. */ + +/* NRHS (input) INTEGER */ +/* The number of right hand sides, i.e., the number of columns */ +/* of the matrix B. NRHS >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the symmetric matrix A. If UPLO = 'U', the leading */ +/* N-by-N upper triangular part of A contains the upper */ +/* triangular part of the matrix A, and the strictly lower */ +/* triangular part of A is not referenced. If UPLO = 'L', the */ +/* leading N-by-N lower triangular part of A contains the lower */ +/* triangular part of the matrix A, and the strictly upper */ +/* triangular part of A is not referenced. */ + +/* On exit, if INFO = 0, the factor U or L from the Cholesky */ +/* factorization A = U**T*U or A = L*L**T. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,N). */ + +/* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* On entry, the N-by-NRHS right hand side matrix B. */ +/* On exit, if INFO = 0, the N-by-NRHS solution matrix X. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the array B. LDB >= max(1,N). */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > 0: if INFO = i, the leading minor of order i of A is not */ +/* positive definite, so the factorization could not be */ +/* completed, and the solution has not been computed. */ + +/* ===================================================================== */ + +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + *info = 0; + if (! _starpu_lsame_(uplo, "U") && ! _starpu_lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } else if (*ldb < max(1,*n)) { + *info = -7; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DPOSV ", &i__1); + return 0; + } + +/* Compute the Cholesky factorization A = U'*U or A = L*L'. */ + + _starpu_dpotrf_(uplo, n, &a[a_offset], lda, info); + if (*info == 0) { + +/* Solve the system A*X = B, overwriting B with X. */ + + _starpu_dpotrs_(uplo, n, nrhs, &a[a_offset], lda, &b[b_offset], ldb, info); + + } + return 0; + +/* End of DPOSV */ + +} /* _starpu_dposv_ */ diff --git a/min-dgels/base/SRC/dposvx.c b/min-dgels/base/SRC/dposvx.c new file mode 100644 index 0000000..ae5e484 --- /dev/null +++ b/min-dgels/base/SRC/dposvx.c @@ -0,0 +1,450 @@ +/* dposvx.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dposvx_(char *fact, char *uplo, integer *n, integer * + nrhs, doublereal *a, integer *lda, doublereal *af, integer *ldaf, + char *equed, doublereal *s, doublereal *b, integer *ldb, doublereal * + x, integer *ldx, doublereal *rcond, doublereal *ferr, doublereal * + berr, doublereal *work, integer *iwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, af_dim1, af_offset, b_dim1, b_offset, x_dim1, + x_offset, i__1, i__2; + doublereal d__1, d__2; + + /* Local variables */ + integer i__, j; + doublereal amax, smin, smax; + extern logical _starpu_lsame_(char *, char *); + doublereal scond, anorm; + logical equil, rcequ; + extern doublereal _starpu_dlamch_(char *); + logical nofact; + extern /* Subroutine */ int _starpu_dlacpy_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *), + _starpu_xerbla_(char *, integer *); + doublereal bignum; + extern /* Subroutine */ int _starpu_dpocon_(char *, integer *, doublereal *, + integer *, doublereal *, doublereal *, doublereal *, integer *, + integer *); + integer infequ; + extern doublereal _starpu_dlansy_(char *, char *, integer *, doublereal *, + integer *, doublereal *); + extern /* Subroutine */ int _starpu_dlaqsy_(char *, integer *, doublereal *, + integer *, doublereal *, doublereal *, doublereal *, char *), _starpu_dpoequ_(integer *, doublereal *, integer *, + doublereal *, doublereal *, doublereal *, integer *), _starpu_dporfs_( + char *, integer *, integer *, doublereal *, integer *, doublereal + *, integer *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, doublereal *, integer *, integer *), _starpu_dpotrf_(char *, integer *, doublereal *, integer *, + integer *); + doublereal smlnum; + extern /* Subroutine */ int _starpu_dpotrs_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *, integer *); + + +/* -- LAPACK driver routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DPOSVX uses the Cholesky factorization A = U**T*U or A = L*L**T to */ +/* compute the solution to a real system of linear equations */ +/* A * X = B, */ +/* where A is an N-by-N symmetric positive definite matrix and X and B */ +/* are N-by-NRHS matrices. */ + +/* Error bounds on the solution and a condition estimate are also */ +/* provided. */ + +/* Description */ +/* =========== */ + +/* The following steps are performed: */ + +/* 1. If FACT = 'E', real scaling factors are computed to equilibrate */ +/* the system: */ +/* diag(S) * A * diag(S) * inv(diag(S)) * X = diag(S) * B */ +/* Whether or not the system will be equilibrated depends on the */ +/* scaling of the matrix A, but if equilibration is used, A is */ +/* overwritten by diag(S)*A*diag(S) and B by diag(S)*B. */ + +/* 2. If FACT = 'N' or 'E', the Cholesky decomposition is used to */ +/* factor the matrix A (after equilibration if FACT = 'E') as */ +/* A = U**T* U, if UPLO = 'U', or */ +/* A = L * L**T, if UPLO = 'L', */ +/* where U is an upper triangular matrix and L is a lower triangular */ +/* matrix. */ + +/* 3. If the leading i-by-i principal minor is not positive definite, */ +/* then the routine returns with INFO = i. Otherwise, the factored */ +/* form of A is used to estimate the condition number of the matrix */ +/* A. If the reciprocal of the condition number is less than machine */ +/* precision, INFO = N+1 is returned as a warning, but the routine */ +/* still goes on to solve for X and compute error bounds as */ +/* described below. */ + +/* 4. The system of equations is solved for X using the factored form */ +/* of A. */ + +/* 5. Iterative refinement is applied to improve the computed solution */ +/* matrix and calculate error bounds and backward error estimates */ +/* for it. */ + +/* 6. If equilibration was used, the matrix X is premultiplied by */ +/* diag(S) so that it solves the original system before */ +/* equilibration. */ + +/* Arguments */ +/* ========= */ + +/* FACT (input) CHARACTER*1 */ +/* Specifies whether or not the factored form of the matrix A is */ +/* supplied on entry, and if not, whether the matrix A should be */ +/* equilibrated before it is factored. */ +/* = 'F': On entry, AF contains the factored form of A. */ +/* If EQUED = 'Y', the matrix A has been equilibrated */ +/* with scaling factors given by S. A and AF will not */ +/* be modified. */ +/* = 'N': The matrix A will be copied to AF and factored. */ +/* = 'E': The matrix A will be equilibrated if necessary, then */ +/* copied to AF and factored. */ + +/* UPLO (input) CHARACTER*1 */ +/* = 'U': Upper triangle of A is stored; */ +/* = 'L': Lower triangle of A is stored. */ + +/* N (input) INTEGER */ +/* The number of linear equations, i.e., the order of the */ +/* matrix A. N >= 0. */ + +/* NRHS (input) INTEGER */ +/* The number of right hand sides, i.e., the number of columns */ +/* of the matrices B and X. NRHS >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the symmetric matrix A, except if FACT = 'F' and */ +/* EQUED = 'Y', then A must contain the equilibrated matrix */ +/* diag(S)*A*diag(S). If UPLO = 'U', the leading */ +/* N-by-N upper triangular part of A contains the upper */ +/* triangular part of the matrix A, and the strictly lower */ +/* triangular part of A is not referenced. If UPLO = 'L', the */ +/* leading N-by-N lower triangular part of A contains the lower */ +/* triangular part of the matrix A, and the strictly upper */ +/* triangular part of A is not referenced. A is not modified if */ +/* FACT = 'F' or 'N', or if FACT = 'E' and EQUED = 'N' on exit. */ + +/* On exit, if FACT = 'E' and EQUED = 'Y', A is overwritten by */ +/* diag(S)*A*diag(S). */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,N). */ + +/* AF (input or output) DOUBLE PRECISION array, dimension (LDAF,N) */ +/* If FACT = 'F', then AF is an input argument and on entry */ +/* contains the triangular factor U or L from the Cholesky */ +/* factorization A = U**T*U or A = L*L**T, in the same storage */ +/* format as A. If EQUED .ne. 'N', then AF is the factored form */ +/* of the equilibrated matrix diag(S)*A*diag(S). */ + +/* If FACT = 'N', then AF is an output argument and on exit */ +/* returns the triangular factor U or L from the Cholesky */ +/* factorization A = U**T*U or A = L*L**T of the original */ +/* matrix A. */ + +/* If FACT = 'E', then AF is an output argument and on exit */ +/* returns the triangular factor U or L from the Cholesky */ +/* factorization A = U**T*U or A = L*L**T of the equilibrated */ +/* matrix A (see the description of A for the form of the */ +/* equilibrated matrix). */ + +/* LDAF (input) INTEGER */ +/* The leading dimension of the array AF. LDAF >= max(1,N). */ + +/* EQUED (input or output) CHARACTER*1 */ +/* Specifies the form of equilibration that was done. */ +/* = 'N': No equilibration (always true if FACT = 'N'). */ +/* = 'Y': Equilibration was done, i.e., A has been replaced by */ +/* diag(S) * A * diag(S). */ +/* EQUED is an input argument if FACT = 'F'; otherwise, it is an */ +/* output argument. */ + +/* S (input or output) DOUBLE PRECISION array, dimension (N) */ +/* The scale factors for A; not accessed if EQUED = 'N'. S is */ +/* an input argument if FACT = 'F'; otherwise, S is an output */ +/* argument. If FACT = 'F' and EQUED = 'Y', each element of S */ +/* must be positive. */ + +/* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* On entry, the N-by-NRHS right hand side matrix B. */ +/* On exit, if EQUED = 'N', B is not modified; if EQUED = 'Y', */ +/* B is overwritten by diag(S) * B. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the array B. LDB >= max(1,N). */ + +/* X (output) DOUBLE PRECISION array, dimension (LDX,NRHS) */ +/* If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X to */ +/* the original system of equations. Note that if EQUED = 'Y', */ +/* A and B are modified on exit, and the solution to the */ +/* equilibrated system is inv(diag(S))*X. */ + +/* LDX (input) INTEGER */ +/* The leading dimension of the array X. LDX >= max(1,N). */ + +/* RCOND (output) DOUBLE PRECISION */ +/* The estimate of the reciprocal condition number of the matrix */ +/* A after equilibration (if done). If RCOND is less than the */ +/* machine precision (in particular, if RCOND = 0), the matrix */ +/* is singular to working precision. This condition is */ +/* indicated by a return code of INFO > 0. */ + +/* FERR (output) DOUBLE PRECISION array, dimension (NRHS) */ +/* The estimated forward error bound for each solution vector */ +/* X(j) (the j-th column of the solution matrix X). */ +/* If XTRUE is the true solution corresponding to X(j), FERR(j) */ +/* is an estimated upper bound for the magnitude of the largest */ +/* element in (X(j) - XTRUE) divided by the magnitude of the */ +/* largest element in X(j). The estimate is as reliable as */ +/* the estimate for RCOND, and is almost always a slight */ +/* overestimate of the true error. */ + +/* BERR (output) DOUBLE PRECISION array, dimension (NRHS) */ +/* The componentwise relative backward error of each solution */ +/* vector X(j) (i.e., the smallest relative change in */ +/* any element of A or B that makes X(j) an exact solution). */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (3*N) */ + +/* IWORK (workspace) INTEGER array, dimension (N) */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > 0: if INFO = i, and i is */ +/* <= N: the leading minor of order i of A is */ +/* not positive definite, so the factorization */ +/* could not be completed, and the solution has not */ +/* been computed. RCOND = 0 is returned. */ +/* = N+1: U is nonsingular, but RCOND is less than machine */ +/* precision, meaning that the matrix is singular */ +/* to working precision. Nevertheless, the */ +/* solution and error bounds are computed because */ +/* there are a number of situations where the */ +/* computed solution can be more accurate than the */ +/* value of RCOND would suggest. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + af_dim1 = *ldaf; + af_offset = 1 + af_dim1; + af -= af_offset; + --s; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + x_dim1 = *ldx; + x_offset = 1 + x_dim1; + x -= x_offset; + --ferr; + --berr; + --work; + --iwork; + + /* Function Body */ + *info = 0; + nofact = _starpu_lsame_(fact, "N"); + equil = _starpu_lsame_(fact, "E"); + if (nofact || equil) { + *(unsigned char *)equed = 'N'; + rcequ = FALSE_; + } else { + rcequ = _starpu_lsame_(equed, "Y"); + smlnum = _starpu_dlamch_("Safe minimum"); + bignum = 1. / smlnum; + } + +/* Test the input parameters. */ + + if (! nofact && ! equil && ! _starpu_lsame_(fact, "F")) { + *info = -1; + } else if (! _starpu_lsame_(uplo, "U") && ! _starpu_lsame_(uplo, + "L")) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*nrhs < 0) { + *info = -4; + } else if (*lda < max(1,*n)) { + *info = -6; + } else if (*ldaf < max(1,*n)) { + *info = -8; + } else if (_starpu_lsame_(fact, "F") && ! (rcequ || _starpu_lsame_( + equed, "N"))) { + *info = -9; + } else { + if (rcequ) { + smin = bignum; + smax = 0.; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + d__1 = smin, d__2 = s[j]; + smin = min(d__1,d__2); +/* Computing MAX */ + d__1 = smax, d__2 = s[j]; + smax = max(d__1,d__2); +/* L10: */ + } + if (smin <= 0.) { + *info = -10; + } else if (*n > 0) { + scond = max(smin,smlnum) / min(smax,bignum); + } else { + scond = 1.; + } + } + if (*info == 0) { + if (*ldb < max(1,*n)) { + *info = -12; + } else if (*ldx < max(1,*n)) { + *info = -14; + } + } + } + + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DPOSVX", &i__1); + return 0; + } + + if (equil) { + +/* Compute row and column scalings to equilibrate the matrix A. */ + + _starpu_dpoequ_(n, &a[a_offset], lda, &s[1], &scond, &amax, &infequ); + if (infequ == 0) { + +/* Equilibrate the matrix. */ + + _starpu_dlaqsy_(uplo, n, &a[a_offset], lda, &s[1], &scond, &amax, equed); + rcequ = _starpu_lsame_(equed, "Y"); + } + } + +/* Scale the right hand side. */ + + if (rcequ) { + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = s[i__] * b[i__ + j * b_dim1]; +/* L20: */ + } +/* L30: */ + } + } + + if (nofact || equil) { + +/* Compute the Cholesky factorization A = U'*U or A = L*L'. */ + + _starpu_dlacpy_(uplo, n, n, &a[a_offset], lda, &af[af_offset], ldaf); + _starpu_dpotrf_(uplo, n, &af[af_offset], ldaf, info); + +/* Return if INFO is non-zero. */ + + if (*info > 0) { + *rcond = 0.; + return 0; + } + } + +/* Compute the norm of the matrix A. */ + + anorm = _starpu_dlansy_("1", uplo, n, &a[a_offset], lda, &work[1]); + +/* Compute the reciprocal of the condition number of A. */ + + _starpu_dpocon_(uplo, n, &af[af_offset], ldaf, &anorm, rcond, &work[1], &iwork[1], + info); + +/* Compute the solution matrix X. */ + + _starpu_dlacpy_("Full", n, nrhs, &b[b_offset], ldb, &x[x_offset], ldx); + _starpu_dpotrs_(uplo, n, nrhs, &af[af_offset], ldaf, &x[x_offset], ldx, info); + +/* Use iterative refinement to improve the computed solution and */ +/* compute error bounds and backward error estimates for it. */ + + _starpu_dporfs_(uplo, n, nrhs, &a[a_offset], lda, &af[af_offset], ldaf, &b[ + b_offset], ldb, &x[x_offset], ldx, &ferr[1], &berr[1], &work[1], & + iwork[1], info); + +/* Transform the solution matrix X to a solution of the original */ +/* system. */ + + if (rcequ) { + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + x[i__ + j * x_dim1] = s[i__] * x[i__ + j * x_dim1]; +/* L40: */ + } +/* L50: */ + } + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + ferr[j] /= scond; +/* L60: */ + } + } + +/* Set INFO = N+1 if the matrix is singular to working precision. */ + + if (*rcond < _starpu_dlamch_("Epsilon")) { + *info = *n + 1; + } + + return 0; + +/* End of DPOSVX */ + +} /* _starpu_dposvx_ */ diff --git a/min-dgels/base/SRC/dposvxx.c b/min-dgels/base/SRC/dposvxx.c new file mode 100644 index 0000000..d6a3e51 --- /dev/null +++ b/min-dgels/base/SRC/dposvxx.c @@ -0,0 +1,611 @@ +/* dposvxx.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dposvxx_(char *fact, char *uplo, integer *n, integer * + nrhs, doublereal *a, integer *lda, doublereal *af, integer *ldaf, + char *equed, doublereal *s, doublereal *b, integer *ldb, doublereal * + x, integer *ldx, doublereal *rcond, doublereal *rpvgrw, doublereal * + berr, integer *n_err_bnds__, doublereal *err_bnds_norm__, doublereal * + err_bnds_comp__, integer *nparams, doublereal *params, doublereal * + work, integer *iwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, af_dim1, af_offset, b_dim1, b_offset, x_dim1, + x_offset, err_bnds_norm_dim1, err_bnds_norm_offset, + err_bnds_comp_dim1, err_bnds_comp_offset, i__1; + doublereal d__1, d__2; + + /* Local variables */ + integer j; + doublereal amax, smin, smax; + extern doublereal _starpu_dla_porpvgrw__(char *, integer *, doublereal *, integer + *, doublereal *, integer *, doublereal *, ftnlen); + extern logical _starpu_lsame_(char *, char *); + doublereal scond; + logical equil, rcequ; + extern doublereal _starpu_dlamch_(char *); + logical nofact; + extern /* Subroutine */ int _starpu_dlacpy_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *), + _starpu_xerbla_(char *, integer *); + doublereal bignum; + integer infequ; + extern /* Subroutine */ int _starpu_dlaqsy_(char *, integer *, doublereal *, + integer *, doublereal *, doublereal *, doublereal *, char *), _starpu_dpotrf_(char *, integer *, doublereal *, integer + *, integer *); + doublereal smlnum; + extern /* Subroutine */ int _starpu_dpotrs_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *, integer *), _starpu_dlascl2_(integer *, integer *, doublereal *, doublereal * +, integer *), _starpu_dpoequb_(integer *, doublereal *, integer *, + doublereal *, doublereal *, doublereal *, integer *), _starpu_dporfsx_( + char *, char *, integer *, integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + doublereal *, doublereal *, integer *, doublereal *, doublereal *, + integer *, integer *); + + +/* -- LAPACK driver routine (version 3.2) -- */ +/* -- Contributed by James Demmel, Deaglan Halligan, Yozo Hida and -- */ +/* -- Jason Riedy of Univ. of California Berkeley. -- */ +/* -- November 2008 -- */ + +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley and NAG Ltd. -- */ + +/* .. */ +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DPOSVXX uses the Cholesky factorization A = U**T*U or A = L*L**T */ +/* to compute the solution to a double precision system of linear equations */ +/* A * X = B, where A is an N-by-N symmetric positive definite matrix */ +/* and X and B are N-by-NRHS matrices. */ + +/* If requested, both normwise and maximum componentwise error bounds */ +/* are returned. DPOSVXX will return a solution with a tiny */ +/* guaranteed error (O(eps) where eps is the working machine */ +/* precision) unless the matrix is very ill-conditioned, in which */ +/* case a warning is returned. Relevant condition numbers also are */ +/* calculated and returned. */ + +/* DPOSVXX accepts user-provided factorizations and equilibration */ +/* factors; see the definitions of the FACT and EQUED options. */ +/* Solving with refinement and using a factorization from a previous */ +/* DPOSVXX call will also produce a solution with either O(eps) */ +/* errors or warnings, but we cannot make that claim for general */ +/* user-provided factorizations and equilibration factors if they */ +/* differ from what DPOSVXX would itself produce. */ + +/* Description */ +/* =========== */ + +/* The following steps are performed: */ + +/* 1. If FACT = 'E', double precision scaling factors are computed to equilibrate */ +/* the system: */ + +/* diag(S)*A*diag(S) *inv(diag(S))*X = diag(S)*B */ + +/* Whether or not the system will be equilibrated depends on the */ +/* scaling of the matrix A, but if equilibration is used, A is */ +/* overwritten by diag(S)*A*diag(S) and B by diag(S)*B. */ + +/* 2. If FACT = 'N' or 'E', the Cholesky decomposition is used to */ +/* factor the matrix A (after equilibration if FACT = 'E') as */ +/* A = U**T* U, if UPLO = 'U', or */ +/* A = L * L**T, if UPLO = 'L', */ +/* where U is an upper triangular matrix and L is a lower triangular */ +/* matrix. */ + +/* 3. If the leading i-by-i principal minor is not positive definite, */ +/* then the routine returns with INFO = i. Otherwise, the factored */ +/* form of A is used to estimate the condition number of the matrix */ +/* A (see argument RCOND). If the reciprocal of the condition number */ +/* is less than machine precision, the routine still goes on to solve */ +/* for X and compute error bounds as described below. */ + +/* 4. The system of equations is solved for X using the factored form */ +/* of A. */ + +/* 5. By default (unless PARAMS(LA_LINRX_ITREF_I) is set to zero), */ +/* the routine will use iterative refinement to try to get a small */ +/* error and error bounds. Refinement calculates the residual to at */ +/* least twice the working precision. */ + +/* 6. If equilibration was used, the matrix X is premultiplied by */ +/* diag(S) so that it solves the original system before */ +/* equilibration. */ + +/* Arguments */ +/* ========= */ + +/* Some optional parameters are bundled in the PARAMS array. These */ +/* settings determine how refinement is performed, but often the */ +/* defaults are acceptable. If the defaults are acceptable, users */ +/* can pass NPARAMS = 0 which prevents the source code from accessing */ +/* the PARAMS argument. */ + +/* FACT (input) CHARACTER*1 */ +/* Specifies whether or not the factored form of the matrix A is */ +/* supplied on entry, and if not, whether the matrix A should be */ +/* equilibrated before it is factored. */ +/* = 'F': On entry, AF contains the factored form of A. */ +/* If EQUED is not 'N', the matrix A has been */ +/* equilibrated with scaling factors given by S. */ +/* A and AF are not modified. */ +/* = 'N': The matrix A will be copied to AF and factored. */ +/* = 'E': The matrix A will be equilibrated if necessary, then */ +/* copied to AF and factored. */ + +/* UPLO (input) CHARACTER*1 */ +/* = 'U': Upper triangle of A is stored; */ +/* = 'L': Lower triangle of A is stored. */ + +/* N (input) INTEGER */ +/* The number of linear equations, i.e., the order of the */ +/* matrix A. N >= 0. */ + +/* NRHS (input) INTEGER */ +/* The number of right hand sides, i.e., the number of columns */ +/* of the matrices B and X. NRHS >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the symmetric matrix A, except if FACT = 'F' and EQUED = */ +/* 'Y', then A must contain the equilibrated matrix */ +/* diag(S)*A*diag(S). If UPLO = 'U', the leading N-by-N upper */ +/* triangular part of A contains the upper triangular part of the */ +/* matrix A, and the strictly lower triangular part of A is not */ +/* referenced. If UPLO = 'L', the leading N-by-N lower triangular */ +/* part of A contains the lower triangular part of the matrix A, and */ +/* the strictly upper triangular part of A is not referenced. A is */ +/* not modified if FACT = 'F' or 'N', or if FACT = 'E' and EQUED = */ +/* 'N' on exit. */ + +/* On exit, if FACT = 'E' and EQUED = 'Y', A is overwritten by */ +/* diag(S)*A*diag(S). */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,N). */ + +/* AF (input or output) DOUBLE PRECISION array, dimension (LDAF,N) */ +/* If FACT = 'F', then AF is an input argument and on entry */ +/* contains the triangular factor U or L from the Cholesky */ +/* factorization A = U**T*U or A = L*L**T, in the same storage */ +/* format as A. If EQUED .ne. 'N', then AF is the factored */ +/* form of the equilibrated matrix diag(S)*A*diag(S). */ + +/* If FACT = 'N', then AF is an output argument and on exit */ +/* returns the triangular factor U or L from the Cholesky */ +/* factorization A = U**T*U or A = L*L**T of the original */ +/* matrix A. */ + +/* If FACT = 'E', then AF is an output argument and on exit */ +/* returns the triangular factor U or L from the Cholesky */ +/* factorization A = U**T*U or A = L*L**T of the equilibrated */ +/* matrix A (see the description of A for the form of the */ +/* equilibrated matrix). */ + +/* LDAF (input) INTEGER */ +/* The leading dimension of the array AF. LDAF >= max(1,N). */ + +/* EQUED (input or output) CHARACTER*1 */ +/* Specifies the form of equilibration that was done. */ +/* = 'N': No equilibration (always true if FACT = 'N'). */ +/* = 'Y': Both row and column equilibration, i.e., A has been */ +/* replaced by diag(S) * A * diag(S). */ +/* EQUED is an input argument if FACT = 'F'; otherwise, it is an */ +/* output argument. */ + +/* S (input or output) DOUBLE PRECISION array, dimension (N) */ +/* The row scale factors for A. If EQUED = 'Y', A is multiplied on */ +/* the left and right by diag(S). S is an input argument if FACT = */ +/* 'F'; otherwise, S is an output argument. If FACT = 'F' and EQUED */ +/* = 'Y', each element of S must be positive. If S is output, each */ +/* element of S is a power of the radix. If S is input, each element */ +/* of S should be a power of the radix to ensure a reliable solution */ +/* and error estimates. Scaling by powers of the radix does not cause */ +/* rounding errors unless the result underflows or overflows. */ +/* Rounding errors during scaling lead to refining with a matrix that */ +/* is not equivalent to the input matrix, producing error estimates */ +/* that may not be reliable. */ + +/* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* On entry, the N-by-NRHS right hand side matrix B. */ +/* On exit, */ +/* if EQUED = 'N', B is not modified; */ +/* if EQUED = 'Y', B is overwritten by diag(S)*B; */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the array B. LDB >= max(1,N). */ + +/* X (output) DOUBLE PRECISION array, dimension (LDX,NRHS) */ +/* If INFO = 0, the N-by-NRHS solution matrix X to the original */ +/* system of equations. Note that A and B are modified on exit if */ +/* EQUED .ne. 'N', and the solution to the equilibrated system is */ +/* inv(diag(S))*X. */ + +/* LDX (input) INTEGER */ +/* The leading dimension of the array X. LDX >= max(1,N). */ + +/* RCOND (output) DOUBLE PRECISION */ +/* Reciprocal scaled condition number. This is an estimate of the */ +/* reciprocal Skeel condition number of the matrix A after */ +/* equilibration (if done). If this is less than the machine */ +/* precision (in particular, if it is zero), the matrix is singular */ +/* to working precision. Note that the error may still be small even */ +/* if this number is very small and the matrix appears ill- */ +/* conditioned. */ + +/* RPVGRW (output) DOUBLE PRECISION */ +/* Reciprocal pivot growth. On exit, this contains the reciprocal */ +/* pivot growth factor norm(A)/norm(U). The "max absolute element" */ +/* norm is used. If this is much less than 1, then the stability of */ +/* the LU factorization of the (equilibrated) matrix A could be poor. */ +/* This also means that the solution X, estimated condition numbers, */ +/* and error bounds could be unreliable. If factorization fails with */ +/* 0 0 and <= N: U(INFO,INFO) is exactly zero. The factorization */ +/* has been completed, but the factor U is exactly singular, so */ +/* the solution and error bounds could not be computed. RCOND = 0 */ +/* is returned. */ +/* = N+J: The solution corresponding to the Jth right-hand side is */ +/* not guaranteed. The solutions corresponding to other right- */ +/* hand sides K with K > J may not be guaranteed as well, but */ +/* only the first such right-hand side is reported. If a small */ +/* componentwise error is not requested (PARAMS(3) = 0.0) then */ +/* the Jth right-hand side is the first with a normwise error */ +/* bound that is not guaranteed (the smallest J such */ +/* that ERR_BNDS_NORM(J,1) = 0.0). By default (PARAMS(3) = 1.0) */ +/* the Jth right-hand side is the first with either a normwise or */ +/* componentwise error bound that is not guaranteed (the smallest */ +/* J such that either ERR_BNDS_NORM(J,1) = 0.0 or */ +/* ERR_BNDS_COMP(J,1) = 0.0). See the definition of */ +/* ERR_BNDS_NORM(:,1) and ERR_BNDS_COMP(:,1). To get information */ +/* about all of the right-hand sides check ERR_BNDS_NORM or */ +/* ERR_BNDS_COMP. */ + +/* ================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + err_bnds_comp_dim1 = *nrhs; + err_bnds_comp_offset = 1 + err_bnds_comp_dim1; + err_bnds_comp__ -= err_bnds_comp_offset; + err_bnds_norm_dim1 = *nrhs; + err_bnds_norm_offset = 1 + err_bnds_norm_dim1; + err_bnds_norm__ -= err_bnds_norm_offset; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + af_dim1 = *ldaf; + af_offset = 1 + af_dim1; + af -= af_offset; + --s; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + x_dim1 = *ldx; + x_offset = 1 + x_dim1; + x -= x_offset; + --berr; + --params; + --work; + --iwork; + + /* Function Body */ + *info = 0; + nofact = _starpu_lsame_(fact, "N"); + equil = _starpu_lsame_(fact, "E"); + smlnum = _starpu_dlamch_("Safe minimum"); + bignum = 1. / smlnum; + if (nofact || equil) { + *(unsigned char *)equed = 'N'; + rcequ = FALSE_; + } else { + rcequ = _starpu_lsame_(equed, "Y"); + } + +/* Default is failure. If an input parameter is wrong or */ +/* factorization fails, make everything look horrible. Only the */ +/* pivot growth is set here, the rest is initialized in DPORFSX. */ + + *rpvgrw = 0.; + +/* Test the input parameters. PARAMS is not tested until DPORFSX. */ + + if (! nofact && ! equil && ! _starpu_lsame_(fact, "F")) { + *info = -1; + } else if (! _starpu_lsame_(uplo, "U") && ! _starpu_lsame_(uplo, + "L")) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*nrhs < 0) { + *info = -4; + } else if (*lda < max(1,*n)) { + *info = -6; + } else if (*ldaf < max(1,*n)) { + *info = -8; + } else if (_starpu_lsame_(fact, "F") && ! (rcequ || _starpu_lsame_( + equed, "N"))) { + *info = -9; + } else { + if (rcequ) { + smin = bignum; + smax = 0.; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + d__1 = smin, d__2 = s[j]; + smin = min(d__1,d__2); +/* Computing MAX */ + d__1 = smax, d__2 = s[j]; + smax = max(d__1,d__2); +/* L10: */ + } + if (smin <= 0.) { + *info = -10; + } else if (*n > 0) { + scond = max(smin,smlnum) / min(smax,bignum); + } else { + scond = 1.; + } + } + if (*info == 0) { + if (*ldb < max(1,*n)) { + *info = -12; + } else if (*ldx < max(1,*n)) { + *info = -14; + } + } + } + + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DPOSVXX", &i__1); + return 0; + } + + if (equil) { + +/* Compute row and column scalings to equilibrate the matrix A. */ + + _starpu_dpoequb_(n, &a[a_offset], lda, &s[1], &scond, &amax, &infequ); + if (infequ == 0) { + +/* Equilibrate the matrix. */ + + _starpu_dlaqsy_(uplo, n, &a[a_offset], lda, &s[1], &scond, &amax, equed); + rcequ = _starpu_lsame_(equed, "Y"); + } + } + +/* Scale the right-hand side. */ + + if (rcequ) { + _starpu_dlascl2_(n, nrhs, &s[1], &b[b_offset], ldb); + } + + if (nofact || equil) { + +/* Compute the LU factorization of A. */ + + _starpu_dlacpy_(uplo, n, n, &a[a_offset], lda, &af[af_offset], ldaf); + _starpu_dpotrf_(uplo, n, &af[af_offset], ldaf, info); + +/* Return if INFO is non-zero. */ + + if (*info != 0) { + +/* Pivot in column INFO is exactly 0 */ +/* Compute the reciprocal pivot growth factor of the */ +/* leading rank-deficient INFO columns of A. */ + + *rpvgrw = _starpu_dla_porpvgrw__(uplo, info, &a[a_offset], lda, &af[ + af_offset], ldaf, &work[1], (ftnlen)1); + return 0; + } + } + +/* Compute the reciprocal growth factor RPVGRW. */ + + *rpvgrw = _starpu_dla_porpvgrw__(uplo, n, &a[a_offset], lda, &af[af_offset], ldaf, + &work[1], (ftnlen)1); + +/* Compute the solution matrix X. */ + + _starpu_dlacpy_("Full", n, nrhs, &b[b_offset], ldb, &x[x_offset], ldx); + _starpu_dpotrs_(uplo, n, nrhs, &af[af_offset], ldaf, &x[x_offset], ldx, info); + +/* Use iterative refinement to improve the computed solution and */ +/* compute error bounds and backward error estimates for it. */ + + _starpu_dporfsx_(uplo, equed, n, nrhs, &a[a_offset], lda, &af[af_offset], ldaf, & + s[1], &b[b_offset], ldb, &x[x_offset], ldx, rcond, &berr[1], + n_err_bnds__, &err_bnds_norm__[err_bnds_norm_offset], & + err_bnds_comp__[err_bnds_comp_offset], nparams, ¶ms[1], &work[ + 1], &iwork[1], info); + +/* Scale solutions. */ + + if (rcequ) { + _starpu_dlascl2_(n, nrhs, &s[1], &x[x_offset], ldx); + } + + return 0; + +/* End of DPOSVXX */ + +} /* _starpu_dposvxx_ */ diff --git a/min-dgels/base/SRC/dpotf2.c b/min-dgels/base/SRC/dpotf2.c new file mode 100644 index 0000000..e5c37d6 --- /dev/null +++ b/min-dgels/base/SRC/dpotf2.c @@ -0,0 +1,224 @@ +/* dpotf2.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static doublereal c_b10 = -1.; +static doublereal c_b12 = 1.; + +/* Subroutine */ int _starpu_dpotf2_(char *uplo, integer *n, doublereal *a, integer * + lda, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + doublereal d__1; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + integer j; + doublereal ajj; + extern doublereal _starpu_ddot_(integer *, doublereal *, integer *, doublereal *, + integer *); + extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, + integer *); + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int _starpu_dgemv_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *); + logical upper; + extern logical _starpu_disnan_(doublereal *); + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DPOTF2 computes the Cholesky factorization of a real symmetric */ +/* positive definite matrix A. */ + +/* The factorization has the form */ +/* A = U' * U , if UPLO = 'U', or */ +/* A = L * L', if UPLO = 'L', */ +/* where U is an upper triangular matrix and L is lower triangular. */ + +/* This is the unblocked version of the algorithm, calling Level 2 BLAS. */ + +/* Arguments */ +/* ========= */ + +/* UPLO (input) CHARACTER*1 */ +/* Specifies whether the upper or lower triangular part of the */ +/* symmetric matrix A is stored. */ +/* = 'U': Upper triangular */ +/* = 'L': Lower triangular */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the symmetric matrix A. If UPLO = 'U', the leading */ +/* n by n upper triangular part of A contains the upper */ +/* triangular part of the matrix A, and the strictly lower */ +/* triangular part of A is not referenced. If UPLO = 'L', the */ +/* leading n by n lower triangular part of A contains the lower */ +/* triangular part of the matrix A, and the strictly upper */ +/* triangular part of A is not referenced. */ + +/* On exit, if INFO = 0, the factor U or L from the Cholesky */ +/* factorization A = U'*U or A = L*L'. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,N). */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -k, the k-th argument had an illegal value */ +/* > 0: if INFO = k, the leading minor of order k is not */ +/* positive definite, and the factorization could not be */ +/* completed. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + *info = 0; + upper = _starpu_lsame_(uplo, "U"); + if (! upper && ! _starpu_lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DPOTF2", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + if (upper) { + +/* Compute the Cholesky factorization A = U'*U. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + +/* Compute U(J,J) and test for non-positive-definiteness. */ + + i__2 = j - 1; + ajj = a[j + j * a_dim1] - _starpu_ddot_(&i__2, &a[j * a_dim1 + 1], &c__1, + &a[j * a_dim1 + 1], &c__1); + if (ajj <= 0. || _starpu_disnan_(&ajj)) { + a[j + j * a_dim1] = ajj; + goto L30; + } + ajj = sqrt(ajj); + a[j + j * a_dim1] = ajj; + +/* Compute elements J+1:N of row J. */ + + if (j < *n) { + i__2 = j - 1; + i__3 = *n - j; + _starpu_dgemv_("Transpose", &i__2, &i__3, &c_b10, &a[(j + 1) * a_dim1 + + 1], lda, &a[j * a_dim1 + 1], &c__1, &c_b12, &a[j + ( + j + 1) * a_dim1], lda); + i__2 = *n - j; + d__1 = 1. / ajj; + _starpu_dscal_(&i__2, &d__1, &a[j + (j + 1) * a_dim1], lda); + } +/* L10: */ + } + } else { + +/* Compute the Cholesky factorization A = L*L'. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + +/* Compute L(J,J) and test for non-positive-definiteness. */ + + i__2 = j - 1; + ajj = a[j + j * a_dim1] - _starpu_ddot_(&i__2, &a[j + a_dim1], lda, &a[j + + a_dim1], lda); + if (ajj <= 0. || _starpu_disnan_(&ajj)) { + a[j + j * a_dim1] = ajj; + goto L30; + } + ajj = sqrt(ajj); + a[j + j * a_dim1] = ajj; + +/* Compute elements J+1:N of column J. */ + + if (j < *n) { + i__2 = *n - j; + i__3 = j - 1; + _starpu_dgemv_("No transpose", &i__2, &i__3, &c_b10, &a[j + 1 + + a_dim1], lda, &a[j + a_dim1], lda, &c_b12, &a[j + 1 + + j * a_dim1], &c__1); + i__2 = *n - j; + d__1 = 1. / ajj; + _starpu_dscal_(&i__2, &d__1, &a[j + 1 + j * a_dim1], &c__1); + } +/* L20: */ + } + } + goto L40; + +L30: + *info = j; + +L40: + return 0; + +/* End of DPOTF2 */ + +} /* _starpu_dpotf2_ */ diff --git a/min-dgels/base/SRC/dpotrf.c b/min-dgels/base/SRC/dpotrf.c new file mode 100644 index 0000000..b09a4f0 --- /dev/null +++ b/min-dgels/base/SRC/dpotrf.c @@ -0,0 +1,245 @@ +/* dpotrf.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; +static doublereal c_b13 = -1.; +static doublereal c_b14 = 1.; + +/* Subroutine */ int _starpu_dpotrf_(char *uplo, integer *n, doublereal *a, integer * + lda, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + integer j, jb, nb; + extern /* Subroutine */ int _starpu_dgemm_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *); + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int _starpu_dtrsm_(char *, char *, char *, char *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *); + logical upper; + extern /* Subroutine */ int _starpu_dsyrk_(char *, char *, integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, doublereal *, + integer *), _starpu_dpotf2_(char *, integer *, + doublereal *, integer *, integer *), _starpu_xerbla_(char *, + integer *); + extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *); + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DPOTRF computes the Cholesky factorization of a real symmetric */ +/* positive definite matrix A. */ + +/* The factorization has the form */ +/* A = U**T * U, if UPLO = 'U', or */ +/* A = L * L**T, if UPLO = 'L', */ +/* where U is an upper triangular matrix and L is lower triangular. */ + +/* This is the block version of the algorithm, calling Level 3 BLAS. */ + +/* Arguments */ +/* ========= */ + +/* UPLO (input) CHARACTER*1 */ +/* = 'U': Upper triangle of A is stored; */ +/* = 'L': Lower triangle of A is stored. */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the symmetric matrix A. If UPLO = 'U', the leading */ +/* N-by-N upper triangular part of A contains the upper */ +/* triangular part of the matrix A, and the strictly lower */ +/* triangular part of A is not referenced. If UPLO = 'L', the */ +/* leading N-by-N lower triangular part of A contains the lower */ +/* triangular part of the matrix A, and the strictly upper */ +/* triangular part of A is not referenced. */ + +/* On exit, if INFO = 0, the factor U or L from the Cholesky */ +/* factorization A = U**T*U or A = L*L**T. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,N). */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > 0: if INFO = i, the leading minor of order i is not */ +/* positive definite, and the factorization could not be */ +/* completed. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + *info = 0; + upper = _starpu_lsame_(uplo, "U"); + if (! upper && ! _starpu_lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DPOTRF", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Determine the block size for this environment. */ + + nb = _starpu_ilaenv_(&c__1, "DPOTRF", uplo, n, &c_n1, &c_n1, &c_n1); + if (nb <= 1 || nb >= *n) { + +/* Use unblocked code. */ + + _starpu_dpotf2_(uplo, n, &a[a_offset], lda, info); + } else { + +/* Use blocked code. */ + + if (upper) { + +/* Compute the Cholesky factorization A = U'*U. */ + + i__1 = *n; + i__2 = nb; + for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { + +/* Update and factorize the current diagonal block and test */ +/* for non-positive-definiteness. */ + +/* Computing MIN */ + i__3 = nb, i__4 = *n - j + 1; + jb = min(i__3,i__4); + i__3 = j - 1; + _starpu_dsyrk_("Upper", "Transpose", &jb, &i__3, &c_b13, &a[j * + a_dim1 + 1], lda, &c_b14, &a[j + j * a_dim1], lda); + _starpu_dpotf2_("Upper", &jb, &a[j + j * a_dim1], lda, info); + if (*info != 0) { + goto L30; + } + if (j + jb <= *n) { + +/* Compute the current block row. */ + + i__3 = *n - j - jb + 1; + i__4 = j - 1; + _starpu_dgemm_("Transpose", "No transpose", &jb, &i__3, &i__4, & + c_b13, &a[j * a_dim1 + 1], lda, &a[(j + jb) * + a_dim1 + 1], lda, &c_b14, &a[j + (j + jb) * + a_dim1], lda); + i__3 = *n - j - jb + 1; + _starpu_dtrsm_("Left", "Upper", "Transpose", "Non-unit", &jb, & + i__3, &c_b14, &a[j + j * a_dim1], lda, &a[j + (j + + jb) * a_dim1], lda); + } +/* L10: */ + } + + } else { + +/* Compute the Cholesky factorization A = L*L'. */ + + i__2 = *n; + i__1 = nb; + for (j = 1; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) { + +/* Update and factorize the current diagonal block and test */ +/* for non-positive-definiteness. */ + +/* Computing MIN */ + i__3 = nb, i__4 = *n - j + 1; + jb = min(i__3,i__4); + i__3 = j - 1; + _starpu_dsyrk_("Lower", "No transpose", &jb, &i__3, &c_b13, &a[j + + a_dim1], lda, &c_b14, &a[j + j * a_dim1], lda); + _starpu_dpotf2_("Lower", &jb, &a[j + j * a_dim1], lda, info); + if (*info != 0) { + goto L30; + } + if (j + jb <= *n) { + +/* Compute the current block column. */ + + i__3 = *n - j - jb + 1; + i__4 = j - 1; + _starpu_dgemm_("No transpose", "Transpose", &i__3, &jb, &i__4, & + c_b13, &a[j + jb + a_dim1], lda, &a[j + a_dim1], + lda, &c_b14, &a[j + jb + j * a_dim1], lda); + i__3 = *n - j - jb + 1; + _starpu_dtrsm_("Right", "Lower", "Transpose", "Non-unit", &i__3, & + jb, &c_b14, &a[j + j * a_dim1], lda, &a[j + jb + + j * a_dim1], lda); + } +/* L20: */ + } + } + } + goto L40; + +L30: + *info = *info + j - 1; + +L40: + return 0; + +/* End of DPOTRF */ + +} /* _starpu_dpotrf_ */ diff --git a/min-dgels/base/SRC/dpotri.c b/min-dgels/base/SRC/dpotri.c new file mode 100644 index 0000000..581ccae --- /dev/null +++ b/min-dgels/base/SRC/dpotri.c @@ -0,0 +1,125 @@ +/* dpotri.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dpotri_(char *uplo, integer *n, doublereal *a, integer * + lda, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1; + + /* Local variables */ + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *), _starpu_dlauum_( + char *, integer *, doublereal *, integer *, integer *), + _starpu_dtrtri_(char *, char *, integer *, doublereal *, integer *, + integer *); + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DPOTRI computes the inverse of a real symmetric positive definite */ +/* matrix A using the Cholesky factorization A = U**T*U or A = L*L**T */ +/* computed by DPOTRF. */ + +/* Arguments */ +/* ========= */ + +/* UPLO (input) CHARACTER*1 */ +/* = 'U': Upper triangle of A is stored; */ +/* = 'L': Lower triangle of A is stored. */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the triangular factor U or L from the Cholesky */ +/* factorization A = U**T*U or A = L*L**T, as computed by */ +/* DPOTRF. */ +/* On exit, the upper or lower triangle of the (symmetric) */ +/* inverse of A, overwriting the input factor U or L. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,N). */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > 0: if INFO = i, the (i,i) element of the factor U or L is */ +/* zero, and the inverse could not be computed. */ + +/* ===================================================================== */ + +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + *info = 0; + if (! _starpu_lsame_(uplo, "U") && ! _starpu_lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DPOTRI", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Invert the triangular Cholesky factor U or L. */ + + _starpu_dtrtri_(uplo, "Non-unit", n, &a[a_offset], lda, info); + if (*info > 0) { + return 0; + } + +/* Form inv(U)*inv(U)' or inv(L)'*inv(L). */ + + _starpu_dlauum_(uplo, n, &a[a_offset], lda, info); + + return 0; + +/* End of DPOTRI */ + +} /* _starpu_dpotri_ */ diff --git a/min-dgels/base/SRC/dpotrs.c b/min-dgels/base/SRC/dpotrs.c new file mode 100644 index 0000000..02f4d91 --- /dev/null +++ b/min-dgels/base/SRC/dpotrs.c @@ -0,0 +1,166 @@ +/* dpotrs.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static doublereal c_b9 = 1.; + +/* Subroutine */ int _starpu_dpotrs_(char *uplo, integer *n, integer *nrhs, + doublereal *a, integer *lda, doublereal *b, integer *ldb, integer * + info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1; + + /* Local variables */ + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int _starpu_dtrsm_(char *, char *, char *, char *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *); + logical upper; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DPOTRS solves a system of linear equations A*X = B with a symmetric */ +/* positive definite matrix A using the Cholesky factorization */ +/* A = U**T*U or A = L*L**T computed by DPOTRF. */ + +/* Arguments */ +/* ========= */ + +/* UPLO (input) CHARACTER*1 */ +/* = 'U': Upper triangle of A is stored; */ +/* = 'L': Lower triangle of A is stored. */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* NRHS (input) INTEGER */ +/* The number of right hand sides, i.e., the number of columns */ +/* of the matrix B. NRHS >= 0. */ + +/* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ +/* The triangular factor U or L from the Cholesky factorization */ +/* A = U**T*U or A = L*L**T, as computed by DPOTRF. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,N). */ + +/* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* On entry, the right hand side matrix B. */ +/* On exit, the solution matrix X. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the array B. LDB >= max(1,N). */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + *info = 0; + upper = _starpu_lsame_(uplo, "U"); + if (! upper && ! _starpu_lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } else if (*ldb < max(1,*n)) { + *info = -7; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DPOTRS", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0 || *nrhs == 0) { + return 0; + } + + if (upper) { + +/* Solve A*X = B where A = U'*U. */ + +/* Solve U'*X = B, overwriting B with X. */ + + _starpu_dtrsm_("Left", "Upper", "Transpose", "Non-unit", n, nrhs, &c_b9, &a[ + a_offset], lda, &b[b_offset], ldb); + +/* Solve U*X = B, overwriting B with X. */ + + _starpu_dtrsm_("Left", "Upper", "No transpose", "Non-unit", n, nrhs, &c_b9, & + a[a_offset], lda, &b[b_offset], ldb); + } else { + +/* Solve A*X = B where A = L*L'. */ + +/* Solve L*X = B, overwriting B with X. */ + + _starpu_dtrsm_("Left", "Lower", "No transpose", "Non-unit", n, nrhs, &c_b9, & + a[a_offset], lda, &b[b_offset], ldb); + +/* Solve L'*X = B, overwriting B with X. */ + + _starpu_dtrsm_("Left", "Lower", "Transpose", "Non-unit", n, nrhs, &c_b9, &a[ + a_offset], lda, &b[b_offset], ldb); + } + + return 0; + +/* End of DPOTRS */ + +} /* _starpu_dpotrs_ */ diff --git a/min-dgels/base/SRC/dppcon.c b/min-dgels/base/SRC/dppcon.c new file mode 100644 index 0000000..63e33fe --- /dev/null +++ b/min-dgels/base/SRC/dppcon.c @@ -0,0 +1,215 @@ +/* dppcon.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +/* Subroutine */ int _starpu_dppcon_(char *uplo, integer *n, doublereal *ap, + doublereal *anorm, doublereal *rcond, doublereal *work, integer * + iwork, integer *info) +{ + /* System generated locals */ + integer i__1; + doublereal d__1; + + /* Local variables */ + integer ix, kase; + doublereal scale; + extern logical _starpu_lsame_(char *, char *); + integer isave[3]; + extern /* Subroutine */ int _starpu_drscl_(integer *, doublereal *, doublereal *, + integer *); + logical upper; + extern /* Subroutine */ int _starpu_dlacn2_(integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *); + extern doublereal _starpu_dlamch_(char *); + doublereal scalel; + extern integer _starpu_idamax_(integer *, doublereal *, integer *); + doublereal scaleu; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *), _starpu_dlatps_( + char *, char *, char *, char *, integer *, doublereal *, + doublereal *, doublereal *, doublereal *, integer *); + doublereal ainvnm; + char normin[1]; + doublereal smlnum; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* Modified to call DLACN2 in place of DLACON, 5 Feb 03, SJH. */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DPPCON estimates the reciprocal of the condition number (in the */ +/* 1-norm) of a real symmetric positive definite packed matrix using */ +/* the Cholesky factorization A = U**T*U or A = L*L**T computed by */ +/* DPPTRF. */ + +/* An estimate is obtained for norm(inv(A)), and the reciprocal of the */ +/* condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). */ + +/* Arguments */ +/* ========= */ + +/* UPLO (input) CHARACTER*1 */ +/* = 'U': Upper triangle of A is stored; */ +/* = 'L': Lower triangle of A is stored. */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* AP (input) DOUBLE PRECISION array, dimension (N*(N+1)/2) */ +/* The triangular factor U or L from the Cholesky factorization */ +/* A = U**T*U or A = L*L**T, packed columnwise in a linear */ +/* array. The j-th column of U or L is stored in the array AP */ +/* as follows: */ +/* if UPLO = 'U', AP(i + (j-1)*j/2) = U(i,j) for 1<=i<=j; */ +/* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = L(i,j) for j<=i<=n. */ + +/* ANORM (input) DOUBLE PRECISION */ +/* The 1-norm (or infinity-norm) of the symmetric matrix A. */ + +/* RCOND (output) DOUBLE PRECISION */ +/* The reciprocal of the condition number of the matrix A, */ +/* computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an */ +/* estimate of the 1-norm of inv(A) computed in this routine. */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (3*N) */ + +/* IWORK (workspace) INTEGER array, dimension (N) */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --iwork; + --work; + --ap; + + /* Function Body */ + *info = 0; + upper = _starpu_lsame_(uplo, "U"); + if (! upper && ! _starpu_lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*anorm < 0.) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DPPCON", &i__1); + return 0; + } + +/* Quick return if possible */ + + *rcond = 0.; + if (*n == 0) { + *rcond = 1.; + return 0; + } else if (*anorm == 0.) { + return 0; + } + + smlnum = _starpu_dlamch_("Safe minimum"); + +/* Estimate the 1-norm of the inverse. */ + + kase = 0; + *(unsigned char *)normin = 'N'; +L10: + _starpu_dlacn2_(n, &work[*n + 1], &work[1], &iwork[1], &ainvnm, &kase, isave); + if (kase != 0) { + if (upper) { + +/* Multiply by inv(U'). */ + + _starpu_dlatps_("Upper", "Transpose", "Non-unit", normin, n, &ap[1], & + work[1], &scalel, &work[(*n << 1) + 1], info); + *(unsigned char *)normin = 'Y'; + +/* Multiply by inv(U). */ + + _starpu_dlatps_("Upper", "No transpose", "Non-unit", normin, n, &ap[1], & + work[1], &scaleu, &work[(*n << 1) + 1], info); + } else { + +/* Multiply by inv(L). */ + + _starpu_dlatps_("Lower", "No transpose", "Non-unit", normin, n, &ap[1], & + work[1], &scalel, &work[(*n << 1) + 1], info); + *(unsigned char *)normin = 'Y'; + +/* Multiply by inv(L'). */ + + _starpu_dlatps_("Lower", "Transpose", "Non-unit", normin, n, &ap[1], & + work[1], &scaleu, &work[(*n << 1) + 1], info); + } + +/* Multiply by 1/SCALE if doing so will not cause overflow. */ + + scale = scalel * scaleu; + if (scale != 1.) { + ix = _starpu_idamax_(n, &work[1], &c__1); + if (scale < (d__1 = work[ix], abs(d__1)) * smlnum || scale == 0.) + { + goto L20; + } + _starpu_drscl_(n, &scale, &work[1], &c__1); + } + goto L10; + } + +/* Compute the estimate of the reciprocal condition number. */ + + if (ainvnm != 0.) { + *rcond = 1. / ainvnm / *anorm; + } + +L20: + return 0; + +/* End of DPPCON */ + +} /* _starpu_dppcon_ */ diff --git a/min-dgels/base/SRC/dppequ.c b/min-dgels/base/SRC/dppequ.c new file mode 100644 index 0000000..521c783 --- /dev/null +++ b/min-dgels/base/SRC/dppequ.c @@ -0,0 +1,208 @@ +/* dppequ.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dppequ_(char *uplo, integer *n, doublereal *ap, + doublereal *s, doublereal *scond, doublereal *amax, integer *info) +{ + /* System generated locals */ + integer i__1; + doublereal d__1, d__2; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + integer i__, jj; + doublereal smin; + extern logical _starpu_lsame_(char *, char *); + logical upper; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DPPEQU computes row and column scalings intended to equilibrate a */ +/* symmetric positive definite matrix A in packed storage and reduce */ +/* its condition number (with respect to the two-norm). S contains the */ +/* scale factors, S(i)=1/sqrt(A(i,i)), chosen so that the scaled matrix */ +/* B with elements B(i,j)=S(i)*A(i,j)*S(j) has ones on the diagonal. */ +/* This choice of S puts the condition number of B within a factor N of */ +/* the smallest possible condition number over all possible diagonal */ +/* scalings. */ + +/* Arguments */ +/* ========= */ + +/* UPLO (input) CHARACTER*1 */ +/* = 'U': Upper triangle of A is stored; */ +/* = 'L': Lower triangle of A is stored. */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* AP (input) DOUBLE PRECISION array, dimension (N*(N+1)/2) */ +/* The upper or lower triangle of the symmetric matrix A, packed */ +/* columnwise in a linear array. The j-th column of A is stored */ +/* in the array AP as follows: */ +/* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; */ +/* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. */ + +/* S (output) DOUBLE PRECISION array, dimension (N) */ +/* If INFO = 0, S contains the scale factors for A. */ + +/* SCOND (output) DOUBLE PRECISION */ +/* If INFO = 0, S contains the ratio of the smallest S(i) to */ +/* the largest S(i). If SCOND >= 0.1 and AMAX is neither too */ +/* large nor too small, it is not worth scaling by S. */ + +/* AMAX (output) DOUBLE PRECISION */ +/* Absolute value of largest matrix element. If AMAX is very */ +/* close to overflow or very close to underflow, the matrix */ +/* should be scaled. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > 0: if INFO = i, the i-th diagonal element is nonpositive. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --s; + --ap; + + /* Function Body */ + *info = 0; + upper = _starpu_lsame_(uplo, "U"); + if (! upper && ! _starpu_lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DPPEQU", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + *scond = 1.; + *amax = 0.; + return 0; + } + +/* Initialize SMIN and AMAX. */ + + s[1] = ap[1]; + smin = s[1]; + *amax = s[1]; + + if (upper) { + +/* UPLO = 'U': Upper triangle of A is stored. */ +/* Find the minimum and maximum diagonal elements. */ + + jj = 1; + i__1 = *n; + for (i__ = 2; i__ <= i__1; ++i__) { + jj += i__; + s[i__] = ap[jj]; +/* Computing MIN */ + d__1 = smin, d__2 = s[i__]; + smin = min(d__1,d__2); +/* Computing MAX */ + d__1 = *amax, d__2 = s[i__]; + *amax = max(d__1,d__2); +/* L10: */ + } + + } else { + +/* UPLO = 'L': Lower triangle of A is stored. */ +/* Find the minimum and maximum diagonal elements. */ + + jj = 1; + i__1 = *n; + for (i__ = 2; i__ <= i__1; ++i__) { + jj = jj + *n - i__ + 2; + s[i__] = ap[jj]; +/* Computing MIN */ + d__1 = smin, d__2 = s[i__]; + smin = min(d__1,d__2); +/* Computing MAX */ + d__1 = *amax, d__2 = s[i__]; + *amax = max(d__1,d__2); +/* L20: */ + } + } + + if (smin <= 0.) { + +/* Find the first non-positive diagonal element and return. */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + if (s[i__] <= 0.) { + *info = i__; + return 0; + } +/* L30: */ + } + } else { + +/* Set the scale factors to the reciprocals */ +/* of the diagonal elements. */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + s[i__] = 1. / sqrt(s[i__]); +/* L40: */ + } + +/* Compute SCOND = min(S(I)) / max(S(I)) */ + + *scond = sqrt(smin) / sqrt(*amax); + } + return 0; + +/* End of DPPEQU */ + +} /* _starpu_dppequ_ */ diff --git a/min-dgels/base/SRC/dpprfs.c b/min-dgels/base/SRC/dpprfs.c new file mode 100644 index 0000000..cb0c94b --- /dev/null +++ b/min-dgels/base/SRC/dpprfs.c @@ -0,0 +1,413 @@ +/* dpprfs.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static doublereal c_b12 = -1.; +static doublereal c_b14 = 1.; + +/* Subroutine */ int _starpu_dpprfs_(char *uplo, integer *n, integer *nrhs, + doublereal *ap, doublereal *afp, doublereal *b, integer *ldb, + doublereal *x, integer *ldx, doublereal *ferr, doublereal *berr, + doublereal *work, integer *iwork, integer *info) +{ + /* System generated locals */ + integer b_dim1, b_offset, x_dim1, x_offset, i__1, i__2, i__3; + doublereal d__1, d__2, d__3; + + /* Local variables */ + integer i__, j, k; + doublereal s; + integer ik, kk; + doublereal xk; + integer nz; + doublereal eps; + integer kase; + doublereal safe1, safe2; + extern logical _starpu_lsame_(char *, char *); + integer isave[3]; + extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *), _starpu_daxpy_(integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *); + integer count; + extern /* Subroutine */ int _starpu_dspmv_(char *, integer *, doublereal *, + doublereal *, doublereal *, integer *, doublereal *, doublereal *, + integer *); + logical upper; + extern /* Subroutine */ int _starpu_dlacn2_(integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *); + extern doublereal _starpu_dlamch_(char *); + doublereal safmin; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + doublereal lstres; + extern /* Subroutine */ int _starpu_dpptrs_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *); + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* Modified to call DLACN2 in place of DLACON, 5 Feb 03, SJH. */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DPPRFS improves the computed solution to a system of linear */ +/* equations when the coefficient matrix is symmetric positive definite */ +/* and packed, and provides error bounds and backward error estimates */ +/* for the solution. */ + +/* Arguments */ +/* ========= */ + +/* UPLO (input) CHARACTER*1 */ +/* = 'U': Upper triangle of A is stored; */ +/* = 'L': Lower triangle of A is stored. */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* NRHS (input) INTEGER */ +/* The number of right hand sides, i.e., the number of columns */ +/* of the matrices B and X. NRHS >= 0. */ + +/* AP (input) DOUBLE PRECISION array, dimension (N*(N+1)/2) */ +/* The upper or lower triangle of the symmetric matrix A, packed */ +/* columnwise in a linear array. The j-th column of A is stored */ +/* in the array AP as follows: */ +/* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; */ +/* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. */ + +/* AFP (input) DOUBLE PRECISION array, dimension (N*(N+1)/2) */ +/* The triangular factor U or L from the Cholesky factorization */ +/* A = U**T*U or A = L*L**T, as computed by DPPTRF/ZPPTRF, */ +/* packed columnwise in a linear array in the same format as A */ +/* (see AP). */ + +/* B (input) DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* The right hand side matrix B. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the array B. LDB >= max(1,N). */ + +/* X (input/output) DOUBLE PRECISION array, dimension (LDX,NRHS) */ +/* On entry, the solution matrix X, as computed by DPPTRS. */ +/* On exit, the improved solution matrix X. */ + +/* LDX (input) INTEGER */ +/* The leading dimension of the array X. LDX >= max(1,N). */ + +/* FERR (output) DOUBLE PRECISION array, dimension (NRHS) */ +/* The estimated forward error bound for each solution vector */ +/* X(j) (the j-th column of the solution matrix X). */ +/* If XTRUE is the true solution corresponding to X(j), FERR(j) */ +/* is an estimated upper bound for the magnitude of the largest */ +/* element in (X(j) - XTRUE) divided by the magnitude of the */ +/* largest element in X(j). The estimate is as reliable as */ +/* the estimate for RCOND, and is almost always a slight */ +/* overestimate of the true error. */ + +/* BERR (output) DOUBLE PRECISION array, dimension (NRHS) */ +/* The componentwise relative backward error of each solution */ +/* vector X(j) (i.e., the smallest relative change in */ +/* any element of A or B that makes X(j) an exact solution). */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (3*N) */ + +/* IWORK (workspace) INTEGER array, dimension (N) */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ + +/* Internal Parameters */ +/* =================== */ + +/* ITMAX is the maximum number of steps of iterative refinement. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --ap; + --afp; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + x_dim1 = *ldx; + x_offset = 1 + x_dim1; + x -= x_offset; + --ferr; + --berr; + --work; + --iwork; + + /* Function Body */ + *info = 0; + upper = _starpu_lsame_(uplo, "U"); + if (! upper && ! _starpu_lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*ldb < max(1,*n)) { + *info = -7; + } else if (*ldx < max(1,*n)) { + *info = -9; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DPPRFS", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0 || *nrhs == 0) { + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + ferr[j] = 0.; + berr[j] = 0.; +/* L10: */ + } + return 0; + } + +/* NZ = maximum number of nonzero elements in each row of A, plus 1 */ + + nz = *n + 1; + eps = _starpu_dlamch_("Epsilon"); + safmin = _starpu_dlamch_("Safe minimum"); + safe1 = nz * safmin; + safe2 = safe1 / eps; + +/* Do for each right hand side */ + + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + + count = 1; + lstres = 3.; +L20: + +/* Loop until stopping criterion is satisfied. */ + +/* Compute residual R = B - A * X */ + + _starpu_dcopy_(n, &b[j * b_dim1 + 1], &c__1, &work[*n + 1], &c__1); + _starpu_dspmv_(uplo, n, &c_b12, &ap[1], &x[j * x_dim1 + 1], &c__1, &c_b14, & + work[*n + 1], &c__1); + +/* Compute componentwise relative backward error from formula */ + +/* max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) ) */ + +/* where abs(Z) is the componentwise absolute value of the matrix */ +/* or vector Z. If the i-th component of the denominator is less */ +/* than SAFE2, then SAFE1 is added to the i-th components of the */ +/* numerator and denominator before dividing. */ + + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + work[i__] = (d__1 = b[i__ + j * b_dim1], abs(d__1)); +/* L30: */ + } + +/* Compute abs(A)*abs(X) + abs(B). */ + + kk = 1; + if (upper) { + i__2 = *n; + for (k = 1; k <= i__2; ++k) { + s = 0.; + xk = (d__1 = x[k + j * x_dim1], abs(d__1)); + ik = kk; + i__3 = k - 1; + for (i__ = 1; i__ <= i__3; ++i__) { + work[i__] += (d__1 = ap[ik], abs(d__1)) * xk; + s += (d__1 = ap[ik], abs(d__1)) * (d__2 = x[i__ + j * + x_dim1], abs(d__2)); + ++ik; +/* L40: */ + } + work[k] = work[k] + (d__1 = ap[kk + k - 1], abs(d__1)) * xk + + s; + kk += k; +/* L50: */ + } + } else { + i__2 = *n; + for (k = 1; k <= i__2; ++k) { + s = 0.; + xk = (d__1 = x[k + j * x_dim1], abs(d__1)); + work[k] += (d__1 = ap[kk], abs(d__1)) * xk; + ik = kk + 1; + i__3 = *n; + for (i__ = k + 1; i__ <= i__3; ++i__) { + work[i__] += (d__1 = ap[ik], abs(d__1)) * xk; + s += (d__1 = ap[ik], abs(d__1)) * (d__2 = x[i__ + j * + x_dim1], abs(d__2)); + ++ik; +/* L60: */ + } + work[k] += s; + kk += *n - k + 1; +/* L70: */ + } + } + s = 0.; + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + if (work[i__] > safe2) { +/* Computing MAX */ + d__2 = s, d__3 = (d__1 = work[*n + i__], abs(d__1)) / work[ + i__]; + s = max(d__2,d__3); + } else { +/* Computing MAX */ + d__2 = s, d__3 = ((d__1 = work[*n + i__], abs(d__1)) + safe1) + / (work[i__] + safe1); + s = max(d__2,d__3); + } +/* L80: */ + } + berr[j] = s; + +/* Test stopping criterion. Continue iterating if */ +/* 1) The residual BERR(J) is larger than machine epsilon, and */ +/* 2) BERR(J) decreased by at least a factor of 2 during the */ +/* last iteration, and */ +/* 3) At most ITMAX iterations tried. */ + + if (berr[j] > eps && berr[j] * 2. <= lstres && count <= 5) { + +/* Update solution and try again. */ + + _starpu_dpptrs_(uplo, n, &c__1, &afp[1], &work[*n + 1], n, info); + _starpu_daxpy_(n, &c_b14, &work[*n + 1], &c__1, &x[j * x_dim1 + 1], &c__1) + ; + lstres = berr[j]; + ++count; + goto L20; + } + +/* Bound error from formula */ + +/* norm(X - XTRUE) / norm(X) .le. FERR = */ +/* norm( abs(inv(A))* */ +/* ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X) */ + +/* where */ +/* norm(Z) is the magnitude of the largest component of Z */ +/* inv(A) is the inverse of A */ +/* abs(Z) is the componentwise absolute value of the matrix or */ +/* vector Z */ +/* NZ is the maximum number of nonzeros in any row of A, plus 1 */ +/* EPS is machine epsilon */ + +/* The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B)) */ +/* is incremented by SAFE1 if the i-th component of */ +/* abs(A)*abs(X) + abs(B) is less than SAFE2. */ + +/* Use DLACN2 to estimate the infinity-norm of the matrix */ +/* inv(A) * diag(W), */ +/* where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) */ + + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + if (work[i__] > safe2) { + work[i__] = (d__1 = work[*n + i__], abs(d__1)) + nz * eps * + work[i__]; + } else { + work[i__] = (d__1 = work[*n + i__], abs(d__1)) + nz * eps * + work[i__] + safe1; + } +/* L90: */ + } + + kase = 0; +L100: + _starpu_dlacn2_(n, &work[(*n << 1) + 1], &work[*n + 1], &iwork[1], &ferr[j], & + kase, isave); + if (kase != 0) { + if (kase == 1) { + +/* Multiply by diag(W)*inv(A'). */ + + _starpu_dpptrs_(uplo, n, &c__1, &afp[1], &work[*n + 1], n, info); + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + work[*n + i__] = work[i__] * work[*n + i__]; +/* L110: */ + } + } else if (kase == 2) { + +/* Multiply by inv(A)*diag(W). */ + + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + work[*n + i__] = work[i__] * work[*n + i__]; +/* L120: */ + } + _starpu_dpptrs_(uplo, n, &c__1, &afp[1], &work[*n + 1], n, info); + } + goto L100; + } + +/* Normalize error. */ + + lstres = 0.; + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { +/* Computing MAX */ + d__2 = lstres, d__3 = (d__1 = x[i__ + j * x_dim1], abs(d__1)); + lstres = max(d__2,d__3); +/* L130: */ + } + if (lstres != 0.) { + ferr[j] /= lstres; + } + +/* L140: */ + } + + return 0; + +/* End of DPPRFS */ + +} /* _starpu_dpprfs_ */ diff --git a/min-dgels/base/SRC/dppsv.c b/min-dgels/base/SRC/dppsv.c new file mode 100644 index 0000000..49ff3a2 --- /dev/null +++ b/min-dgels/base/SRC/dppsv.c @@ -0,0 +1,161 @@ +/* dppsv.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dppsv_(char *uplo, integer *n, integer *nrhs, doublereal + *ap, doublereal *b, integer *ldb, integer *info) +{ + /* System generated locals */ + integer b_dim1, b_offset, i__1; + + /* Local variables */ + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *), _starpu_dpptrf_( + char *, integer *, doublereal *, integer *), _starpu_dpptrs_(char + *, integer *, integer *, doublereal *, doublereal *, integer *, + integer *); + + +/* -- LAPACK driver routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DPPSV computes the solution to a real system of linear equations */ +/* A * X = B, */ +/* where A is an N-by-N symmetric positive definite matrix stored in */ +/* packed format and X and B are N-by-NRHS matrices. */ + +/* The Cholesky decomposition is used to factor A as */ +/* A = U**T* U, if UPLO = 'U', or */ +/* A = L * L**T, if UPLO = 'L', */ +/* where U is an upper triangular matrix and L is a lower triangular */ +/* matrix. The factored form of A is then used to solve the system of */ +/* equations A * X = B. */ + +/* Arguments */ +/* ========= */ + +/* UPLO (input) CHARACTER*1 */ +/* = 'U': Upper triangle of A is stored; */ +/* = 'L': Lower triangle of A is stored. */ + +/* N (input) INTEGER */ +/* The number of linear equations, i.e., the order of the */ +/* matrix A. N >= 0. */ + +/* NRHS (input) INTEGER */ +/* The number of right hand sides, i.e., the number of columns */ +/* of the matrix B. NRHS >= 0. */ + +/* AP (input/output) DOUBLE PRECISION array, dimension (N*(N+1)/2) */ +/* On entry, the upper or lower triangle of the symmetric matrix */ +/* A, packed columnwise in a linear array. The j-th column of A */ +/* is stored in the array AP as follows: */ +/* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; */ +/* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. */ +/* See below for further details. */ + +/* On exit, if INFO = 0, the factor U or L from the Cholesky */ +/* factorization A = U**T*U or A = L*L**T, in the same storage */ +/* format as A. */ + +/* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* On entry, the N-by-NRHS right hand side matrix B. */ +/* On exit, if INFO = 0, the N-by-NRHS solution matrix X. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the array B. LDB >= max(1,N). */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > 0: if INFO = i, the leading minor of order i of A is not */ +/* positive definite, so the factorization could not be */ +/* completed, and the solution has not been computed. */ + +/* Further Details */ +/* =============== */ + +/* The packed storage scheme is illustrated by the following example */ +/* when N = 4, UPLO = 'U': */ + +/* Two-dimensional storage of the symmetric matrix A: */ + +/* a11 a12 a13 a14 */ +/* a22 a23 a24 */ +/* a33 a34 (aij = conjg(aji)) */ +/* a44 */ + +/* Packed storage of the upper triangle of A: */ + +/* AP = [ a11, a12, a22, a13, a23, a33, a14, a24, a34, a44 ] */ + +/* ===================================================================== */ + +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --ap; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + *info = 0; + if (! _starpu_lsame_(uplo, "U") && ! _starpu_lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*ldb < max(1,*n)) { + *info = -6; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DPPSV ", &i__1); + return 0; + } + +/* Compute the Cholesky factorization A = U'*U or A = L*L'. */ + + _starpu_dpptrf_(uplo, n, &ap[1], info); + if (*info == 0) { + +/* Solve the system A*X = B, overwriting B with X. */ + + _starpu_dpptrs_(uplo, n, nrhs, &ap[1], &b[b_offset], ldb, info); + + } + return 0; + +/* End of DPPSV */ + +} /* _starpu_dppsv_ */ diff --git a/min-dgels/base/SRC/dppsvx.c b/min-dgels/base/SRC/dppsvx.c new file mode 100644 index 0000000..cb8313f --- /dev/null +++ b/min-dgels/base/SRC/dppsvx.c @@ -0,0 +1,455 @@ +/* dppsvx.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +/* Subroutine */ int _starpu_dppsvx_(char *fact, char *uplo, integer *n, integer * + nrhs, doublereal *ap, doublereal *afp, char *equed, doublereal *s, + doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal * + rcond, doublereal *ferr, doublereal *berr, doublereal *work, integer * + iwork, integer *info) +{ + /* System generated locals */ + integer b_dim1, b_offset, x_dim1, x_offset, i__1, i__2; + doublereal d__1, d__2; + + /* Local variables */ + integer i__, j; + doublereal amax, smin, smax; + extern logical _starpu_lsame_(char *, char *); + doublereal scond, anorm; + extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *); + logical equil, rcequ; + extern doublereal _starpu_dlamch_(char *); + logical nofact; + extern /* Subroutine */ int _starpu_dlacpy_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *), + _starpu_xerbla_(char *, integer *); + doublereal bignum; + extern doublereal _starpu_dlansp_(char *, char *, integer *, doublereal *, + doublereal *); + extern /* Subroutine */ int _starpu_dppcon_(char *, integer *, doublereal *, + doublereal *, doublereal *, doublereal *, integer *, integer *), _starpu_dlaqsp_(char *, integer *, doublereal *, doublereal *, + doublereal *, doublereal *, char *); + integer infequ; + extern /* Subroutine */ int _starpu_dppequ_(char *, integer *, doublereal *, + doublereal *, doublereal *, doublereal *, integer *), + _starpu_dpprfs_(char *, integer *, integer *, doublereal *, doublereal *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + doublereal *, doublereal *, integer *, integer *), + _starpu_dpptrf_(char *, integer *, doublereal *, integer *); + doublereal smlnum; + extern /* Subroutine */ int _starpu_dpptrs_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *); + + +/* -- LAPACK driver routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DPPSVX uses the Cholesky factorization A = U**T*U or A = L*L**T to */ +/* compute the solution to a real system of linear equations */ +/* A * X = B, */ +/* where A is an N-by-N symmetric positive definite matrix stored in */ +/* packed format and X and B are N-by-NRHS matrices. */ + +/* Error bounds on the solution and a condition estimate are also */ +/* provided. */ + +/* Description */ +/* =========== */ + +/* The following steps are performed: */ + +/* 1. If FACT = 'E', real scaling factors are computed to equilibrate */ +/* the system: */ +/* diag(S) * A * diag(S) * inv(diag(S)) * X = diag(S) * B */ +/* Whether or not the system will be equilibrated depends on the */ +/* scaling of the matrix A, but if equilibration is used, A is */ +/* overwritten by diag(S)*A*diag(S) and B by diag(S)*B. */ + +/* 2. If FACT = 'N' or 'E', the Cholesky decomposition is used to */ +/* factor the matrix A (after equilibration if FACT = 'E') as */ +/* A = U**T* U, if UPLO = 'U', or */ +/* A = L * L**T, if UPLO = 'L', */ +/* where U is an upper triangular matrix and L is a lower triangular */ +/* matrix. */ + +/* 3. If the leading i-by-i principal minor is not positive definite, */ +/* then the routine returns with INFO = i. Otherwise, the factored */ +/* form of A is used to estimate the condition number of the matrix */ +/* A. If the reciprocal of the condition number is less than machine */ +/* precision, INFO = N+1 is returned as a warning, but the routine */ +/* still goes on to solve for X and compute error bounds as */ +/* described below. */ + +/* 4. The system of equations is solved for X using the factored form */ +/* of A. */ + +/* 5. Iterative refinement is applied to improve the computed solution */ +/* matrix and calculate error bounds and backward error estimates */ +/* for it. */ + +/* 6. If equilibration was used, the matrix X is premultiplied by */ +/* diag(S) so that it solves the original system before */ +/* equilibration. */ + +/* Arguments */ +/* ========= */ + +/* FACT (input) CHARACTER*1 */ +/* Specifies whether or not the factored form of the matrix A is */ +/* supplied on entry, and if not, whether the matrix A should be */ +/* equilibrated before it is factored. */ +/* = 'F': On entry, AFP contains the factored form of A. */ +/* If EQUED = 'Y', the matrix A has been equilibrated */ +/* with scaling factors given by S. AP and AFP will not */ +/* be modified. */ +/* = 'N': The matrix A will be copied to AFP and factored. */ +/* = 'E': The matrix A will be equilibrated if necessary, then */ +/* copied to AFP and factored. */ + +/* UPLO (input) CHARACTER*1 */ +/* = 'U': Upper triangle of A is stored; */ +/* = 'L': Lower triangle of A is stored. */ + +/* N (input) INTEGER */ +/* The number of linear equations, i.e., the order of the */ +/* matrix A. N >= 0. */ + +/* NRHS (input) INTEGER */ +/* The number of right hand sides, i.e., the number of columns */ +/* of the matrices B and X. NRHS >= 0. */ + +/* AP (input/output) DOUBLE PRECISION array, dimension (N*(N+1)/2) */ +/* On entry, the upper or lower triangle of the symmetric matrix */ +/* A, packed columnwise in a linear array, except if FACT = 'F' */ +/* and EQUED = 'Y', then A must contain the equilibrated matrix */ +/* diag(S)*A*diag(S). The j-th column of A is stored in the */ +/* array AP as follows: */ +/* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; */ +/* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. */ +/* See below for further details. A is not modified if */ +/* FACT = 'F' or 'N', or if FACT = 'E' and EQUED = 'N' on exit. */ + +/* On exit, if FACT = 'E' and EQUED = 'Y', A is overwritten by */ +/* diag(S)*A*diag(S). */ + +/* AFP (input or output) DOUBLE PRECISION array, dimension */ +/* (N*(N+1)/2) */ +/* If FACT = 'F', then AFP is an input argument and on entry */ +/* contains the triangular factor U or L from the Cholesky */ +/* factorization A = U'*U or A = L*L', in the same storage */ +/* format as A. If EQUED .ne. 'N', then AFP is the factored */ +/* form of the equilibrated matrix A. */ + +/* If FACT = 'N', then AFP is an output argument and on exit */ +/* returns the triangular factor U or L from the Cholesky */ +/* factorization A = U'*U or A = L*L' of the original matrix A. */ + +/* If FACT = 'E', then AFP is an output argument and on exit */ +/* returns the triangular factor U or L from the Cholesky */ +/* factorization A = U'*U or A = L*L' of the equilibrated */ +/* matrix A (see the description of AP for the form of the */ +/* equilibrated matrix). */ + +/* EQUED (input or output) CHARACTER*1 */ +/* Specifies the form of equilibration that was done. */ +/* = 'N': No equilibration (always true if FACT = 'N'). */ +/* = 'Y': Equilibration was done, i.e., A has been replaced by */ +/* diag(S) * A * diag(S). */ +/* EQUED is an input argument if FACT = 'F'; otherwise, it is an */ +/* output argument. */ + +/* S (input or output) DOUBLE PRECISION array, dimension (N) */ +/* The scale factors for A; not accessed if EQUED = 'N'. S is */ +/* an input argument if FACT = 'F'; otherwise, S is an output */ +/* argument. If FACT = 'F' and EQUED = 'Y', each element of S */ +/* must be positive. */ + +/* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* On entry, the N-by-NRHS right hand side matrix B. */ +/* On exit, if EQUED = 'N', B is not modified; if EQUED = 'Y', */ +/* B is overwritten by diag(S) * B. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the array B. LDB >= max(1,N). */ + +/* X (output) DOUBLE PRECISION array, dimension (LDX,NRHS) */ +/* If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X to */ +/* the original system of equations. Note that if EQUED = 'Y', */ +/* A and B are modified on exit, and the solution to the */ +/* equilibrated system is inv(diag(S))*X. */ + +/* LDX (input) INTEGER */ +/* The leading dimension of the array X. LDX >= max(1,N). */ + +/* RCOND (output) DOUBLE PRECISION */ +/* The estimate of the reciprocal condition number of the matrix */ +/* A after equilibration (if done). If RCOND is less than the */ +/* machine precision (in particular, if RCOND = 0), the matrix */ +/* is singular to working precision. This condition is */ +/* indicated by a return code of INFO > 0. */ + +/* FERR (output) DOUBLE PRECISION array, dimension (NRHS) */ +/* The estimated forward error bound for each solution vector */ +/* X(j) (the j-th column of the solution matrix X). */ +/* If XTRUE is the true solution corresponding to X(j), FERR(j) */ +/* is an estimated upper bound for the magnitude of the largest */ +/* element in (X(j) - XTRUE) divided by the magnitude of the */ +/* largest element in X(j). The estimate is as reliable as */ +/* the estimate for RCOND, and is almost always a slight */ +/* overestimate of the true error. */ + +/* BERR (output) DOUBLE PRECISION array, dimension (NRHS) */ +/* The componentwise relative backward error of each solution */ +/* vector X(j) (i.e., the smallest relative change in */ +/* any element of A or B that makes X(j) an exact solution). */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (3*N) */ + +/* IWORK (workspace) INTEGER array, dimension (N) */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > 0: if INFO = i, and i is */ +/* <= N: the leading minor of order i of A is */ +/* not positive definite, so the factorization */ +/* could not be completed, and the solution has not */ +/* been computed. RCOND = 0 is returned. */ +/* = N+1: U is nonsingular, but RCOND is less than machine */ +/* precision, meaning that the matrix is singular */ +/* to working precision. Nevertheless, the */ +/* solution and error bounds are computed because */ +/* there are a number of situations where the */ +/* computed solution can be more accurate than the */ +/* value of RCOND would suggest. */ + +/* Further Details */ +/* =============== */ + +/* The packed storage scheme is illustrated by the following example */ +/* when N = 4, UPLO = 'U': */ + +/* Two-dimensional storage of the symmetric matrix A: */ + +/* a11 a12 a13 a14 */ +/* a22 a23 a24 */ +/* a33 a34 (aij = conjg(aji)) */ +/* a44 */ + +/* Packed storage of the upper triangle of A: */ + +/* AP = [ a11, a12, a22, a13, a23, a33, a14, a24, a34, a44 ] */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + --ap; + --afp; + --s; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + x_dim1 = *ldx; + x_offset = 1 + x_dim1; + x -= x_offset; + --ferr; + --berr; + --work; + --iwork; + + /* Function Body */ + *info = 0; + nofact = _starpu_lsame_(fact, "N"); + equil = _starpu_lsame_(fact, "E"); + if (nofact || equil) { + *(unsigned char *)equed = 'N'; + rcequ = FALSE_; + } else { + rcequ = _starpu_lsame_(equed, "Y"); + smlnum = _starpu_dlamch_("Safe minimum"); + bignum = 1. / smlnum; + } + +/* Test the input parameters. */ + + if (! nofact && ! equil && ! _starpu_lsame_(fact, "F")) { + *info = -1; + } else if (! _starpu_lsame_(uplo, "U") && ! _starpu_lsame_(uplo, + "L")) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*nrhs < 0) { + *info = -4; + } else if (_starpu_lsame_(fact, "F") && ! (rcequ || _starpu_lsame_( + equed, "N"))) { + *info = -7; + } else { + if (rcequ) { + smin = bignum; + smax = 0.; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + d__1 = smin, d__2 = s[j]; + smin = min(d__1,d__2); +/* Computing MAX */ + d__1 = smax, d__2 = s[j]; + smax = max(d__1,d__2); +/* L10: */ + } + if (smin <= 0.) { + *info = -8; + } else if (*n > 0) { + scond = max(smin,smlnum) / min(smax,bignum); + } else { + scond = 1.; + } + } + if (*info == 0) { + if (*ldb < max(1,*n)) { + *info = -10; + } else if (*ldx < max(1,*n)) { + *info = -12; + } + } + } + + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DPPSVX", &i__1); + return 0; + } + + if (equil) { + +/* Compute row and column scalings to equilibrate the matrix A. */ + + _starpu_dppequ_(uplo, n, &ap[1], &s[1], &scond, &amax, &infequ); + if (infequ == 0) { + +/* Equilibrate the matrix. */ + + _starpu_dlaqsp_(uplo, n, &ap[1], &s[1], &scond, &amax, equed); + rcequ = _starpu_lsame_(equed, "Y"); + } + } + +/* Scale the right-hand side. */ + + if (rcequ) { + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = s[i__] * b[i__ + j * b_dim1]; +/* L20: */ + } +/* L30: */ + } + } + + if (nofact || equil) { + +/* Compute the Cholesky factorization A = U'*U or A = L*L'. */ + + i__1 = *n * (*n + 1) / 2; + _starpu_dcopy_(&i__1, &ap[1], &c__1, &afp[1], &c__1); + _starpu_dpptrf_(uplo, n, &afp[1], info); + +/* Return if INFO is non-zero. */ + + if (*info > 0) { + *rcond = 0.; + return 0; + } + } + +/* Compute the norm of the matrix A. */ + + anorm = _starpu_dlansp_("I", uplo, n, &ap[1], &work[1]); + +/* Compute the reciprocal of the condition number of A. */ + + _starpu_dppcon_(uplo, n, &afp[1], &anorm, rcond, &work[1], &iwork[1], info); + +/* Compute the solution matrix X. */ + + _starpu_dlacpy_("Full", n, nrhs, &b[b_offset], ldb, &x[x_offset], ldx); + _starpu_dpptrs_(uplo, n, nrhs, &afp[1], &x[x_offset], ldx, info); + +/* Use iterative refinement to improve the computed solution and */ +/* compute error bounds and backward error estimates for it. */ + + _starpu_dpprfs_(uplo, n, nrhs, &ap[1], &afp[1], &b[b_offset], ldb, &x[x_offset], + ldx, &ferr[1], &berr[1], &work[1], &iwork[1], info); + +/* Transform the solution matrix X to a solution of the original */ +/* system. */ + + if (rcequ) { + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + x[i__ + j * x_dim1] = s[i__] * x[i__ + j * x_dim1]; +/* L40: */ + } +/* L50: */ + } + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + ferr[j] /= scond; +/* L60: */ + } + } + +/* Set INFO = N+1 if the matrix is singular to working precision. */ + + if (*rcond < _starpu_dlamch_("Epsilon")) { + *info = *n + 1; + } + + return 0; + +/* End of DPPSVX */ + +} /* _starpu_dppsvx_ */ diff --git a/min-dgels/base/SRC/dpptrf.c b/min-dgels/base/SRC/dpptrf.c new file mode 100644 index 0000000..fde3f88 --- /dev/null +++ b/min-dgels/base/SRC/dpptrf.c @@ -0,0 +1,223 @@ +/* dpptrf.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static doublereal c_b16 = -1.; + +/* Subroutine */ int _starpu_dpptrf_(char *uplo, integer *n, doublereal *ap, integer * + info) +{ + /* System generated locals */ + integer i__1, i__2; + doublereal d__1; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + integer j, jc, jj; + doublereal ajj; + extern doublereal _starpu_ddot_(integer *, doublereal *, integer *, doublereal *, + integer *); + extern /* Subroutine */ int _starpu_dspr_(char *, integer *, doublereal *, + doublereal *, integer *, doublereal *), _starpu_dscal_(integer *, + doublereal *, doublereal *, integer *); + extern logical _starpu_lsame_(char *, char *); + logical upper; + extern /* Subroutine */ int _starpu_dtpsv_(char *, char *, char *, integer *, + doublereal *, doublereal *, integer *), + _starpu_xerbla_(char *, integer *); + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DPPTRF computes the Cholesky factorization of a real symmetric */ +/* positive definite matrix A stored in packed format. */ + +/* The factorization has the form */ +/* A = U**T * U, if UPLO = 'U', or */ +/* A = L * L**T, if UPLO = 'L', */ +/* where U is an upper triangular matrix and L is lower triangular. */ + +/* Arguments */ +/* ========= */ + +/* UPLO (input) CHARACTER*1 */ +/* = 'U': Upper triangle of A is stored; */ +/* = 'L': Lower triangle of A is stored. */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* AP (input/output) DOUBLE PRECISION array, dimension (N*(N+1)/2) */ +/* On entry, the upper or lower triangle of the symmetric matrix */ +/* A, packed columnwise in a linear array. The j-th column of A */ +/* is stored in the array AP as follows: */ +/* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; */ +/* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. */ +/* See below for further details. */ + +/* On exit, if INFO = 0, the triangular factor U or L from the */ +/* Cholesky factorization A = U**T*U or A = L*L**T, in the same */ +/* storage format as A. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > 0: if INFO = i, the leading minor of order i is not */ +/* positive definite, and the factorization could not be */ +/* completed. */ + +/* Further Details */ +/* ======= ======= */ + +/* The packed storage scheme is illustrated by the following example */ +/* when N = 4, UPLO = 'U': */ + +/* Two-dimensional storage of the symmetric matrix A: */ + +/* a11 a12 a13 a14 */ +/* a22 a23 a24 */ +/* a33 a34 (aij = aji) */ +/* a44 */ + +/* Packed storage of the upper triangle of A: */ + +/* AP = [ a11, a12, a22, a13, a23, a33, a14, a24, a34, a44 ] */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --ap; + + /* Function Body */ + *info = 0; + upper = _starpu_lsame_(uplo, "U"); + if (! upper && ! _starpu_lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DPPTRF", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + if (upper) { + +/* Compute the Cholesky factorization A = U'*U. */ + + jj = 0; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + jc = jj + 1; + jj += j; + +/* Compute elements 1:J-1 of column J. */ + + if (j > 1) { + i__2 = j - 1; + _starpu_dtpsv_("Upper", "Transpose", "Non-unit", &i__2, &ap[1], &ap[ + jc], &c__1); + } + +/* Compute U(J,J) and test for non-positive-definiteness. */ + + i__2 = j - 1; + ajj = ap[jj] - _starpu_ddot_(&i__2, &ap[jc], &c__1, &ap[jc], &c__1); + if (ajj <= 0.) { + ap[jj] = ajj; + goto L30; + } + ap[jj] = sqrt(ajj); +/* L10: */ + } + } else { + +/* Compute the Cholesky factorization A = L*L'. */ + + jj = 1; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + +/* Compute L(J,J) and test for non-positive-definiteness. */ + + ajj = ap[jj]; + if (ajj <= 0.) { + ap[jj] = ajj; + goto L30; + } + ajj = sqrt(ajj); + ap[jj] = ajj; + +/* Compute elements J+1:N of column J and update the trailing */ +/* submatrix. */ + + if (j < *n) { + i__2 = *n - j; + d__1 = 1. / ajj; + _starpu_dscal_(&i__2, &d__1, &ap[jj + 1], &c__1); + i__2 = *n - j; + _starpu_dspr_("Lower", &i__2, &c_b16, &ap[jj + 1], &c__1, &ap[jj + *n + - j + 1]); + jj = jj + *n - j + 1; + } +/* L20: */ + } + } + goto L40; + +L30: + *info = j; + +L40: + return 0; + +/* End of DPPTRF */ + +} /* _starpu_dpptrf_ */ diff --git a/min-dgels/base/SRC/dpptri.c b/min-dgels/base/SRC/dpptri.c new file mode 100644 index 0000000..462c7d3 --- /dev/null +++ b/min-dgels/base/SRC/dpptri.c @@ -0,0 +1,173 @@ +/* dpptri.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static doublereal c_b8 = 1.; +static integer c__1 = 1; + +/* Subroutine */ int _starpu_dpptri_(char *uplo, integer *n, doublereal *ap, integer * + info) +{ + /* System generated locals */ + integer i__1, i__2; + + /* Local variables */ + integer j, jc, jj; + doublereal ajj; + integer jjn; + extern doublereal _starpu_ddot_(integer *, doublereal *, integer *, doublereal *, + integer *); + extern /* Subroutine */ int _starpu_dspr_(char *, integer *, doublereal *, + doublereal *, integer *, doublereal *), _starpu_dscal_(integer *, + doublereal *, doublereal *, integer *); + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int _starpu_dtpmv_(char *, char *, char *, integer *, + doublereal *, doublereal *, integer *); + logical upper; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *), _starpu_dtptri_( + char *, char *, integer *, doublereal *, integer *); + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DPPTRI computes the inverse of a real symmetric positive definite */ +/* matrix A using the Cholesky factorization A = U**T*U or A = L*L**T */ +/* computed by DPPTRF. */ + +/* Arguments */ +/* ========= */ + +/* UPLO (input) CHARACTER*1 */ +/* = 'U': Upper triangular factor is stored in AP; */ +/* = 'L': Lower triangular factor is stored in AP. */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* AP (input/output) DOUBLE PRECISION array, dimension (N*(N+1)/2) */ +/* On entry, the triangular factor U or L from the Cholesky */ +/* factorization A = U**T*U or A = L*L**T, packed columnwise as */ +/* a linear array. The j-th column of U or L is stored in the */ +/* array AP as follows: */ +/* if UPLO = 'U', AP(i + (j-1)*j/2) = U(i,j) for 1<=i<=j; */ +/* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = L(i,j) for j<=i<=n. */ + +/* On exit, the upper or lower triangle of the (symmetric) */ +/* inverse of A, overwriting the input factor U or L. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > 0: if INFO = i, the (i,i) element of the factor U or L is */ +/* zero, and the inverse could not be computed. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --ap; + + /* Function Body */ + *info = 0; + upper = _starpu_lsame_(uplo, "U"); + if (! upper && ! _starpu_lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DPPTRI", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Invert the triangular Cholesky factor U or L. */ + + _starpu_dtptri_(uplo, "Non-unit", n, &ap[1], info); + if (*info > 0) { + return 0; + } + + if (upper) { + +/* Compute the product inv(U) * inv(U)'. */ + + jj = 0; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + jc = jj + 1; + jj += j; + if (j > 1) { + i__2 = j - 1; + _starpu_dspr_("Upper", &i__2, &c_b8, &ap[jc], &c__1, &ap[1]); + } + ajj = ap[jj]; + _starpu_dscal_(&j, &ajj, &ap[jc], &c__1); +/* L10: */ + } + + } else { + +/* Compute the product inv(L)' * inv(L). */ + + jj = 1; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + jjn = jj + *n - j + 1; + i__2 = *n - j + 1; + ap[jj] = _starpu_ddot_(&i__2, &ap[jj], &c__1, &ap[jj], &c__1); + if (j < *n) { + i__2 = *n - j; + _starpu_dtpmv_("Lower", "Transpose", "Non-unit", &i__2, &ap[jjn], &ap[ + jj + 1], &c__1); + } + jj = jjn; +/* L20: */ + } + } + + return 0; + +/* End of DPPTRI */ + +} /* _starpu_dpptri_ */ diff --git a/min-dgels/base/SRC/dpptrs.c b/min-dgels/base/SRC/dpptrs.c new file mode 100644 index 0000000..4b332ad --- /dev/null +++ b/min-dgels/base/SRC/dpptrs.c @@ -0,0 +1,170 @@ +/* dpptrs.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +/* Subroutine */ int _starpu_dpptrs_(char *uplo, integer *n, integer *nrhs, + doublereal *ap, doublereal *b, integer *ldb, integer *info) +{ + /* System generated locals */ + integer b_dim1, b_offset, i__1; + + /* Local variables */ + integer i__; + extern logical _starpu_lsame_(char *, char *); + logical upper; + extern /* Subroutine */ int _starpu_dtpsv_(char *, char *, char *, integer *, + doublereal *, doublereal *, integer *), + _starpu_xerbla_(char *, integer *); + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DPPTRS solves a system of linear equations A*X = B with a symmetric */ +/* positive definite matrix A in packed storage using the Cholesky */ +/* factorization A = U**T*U or A = L*L**T computed by DPPTRF. */ + +/* Arguments */ +/* ========= */ + +/* UPLO (input) CHARACTER*1 */ +/* = 'U': Upper triangle of A is stored; */ +/* = 'L': Lower triangle of A is stored. */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* NRHS (input) INTEGER */ +/* The number of right hand sides, i.e., the number of columns */ +/* of the matrix B. NRHS >= 0. */ + +/* AP (input) DOUBLE PRECISION array, dimension (N*(N+1)/2) */ +/* The triangular factor U or L from the Cholesky factorization */ +/* A = U**T*U or A = L*L**T, packed columnwise in a linear */ +/* array. The j-th column of U or L is stored in the array AP */ +/* as follows: */ +/* if UPLO = 'U', AP(i + (j-1)*j/2) = U(i,j) for 1<=i<=j; */ +/* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = L(i,j) for j<=i<=n. */ + +/* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* On entry, the right hand side matrix B. */ +/* On exit, the solution matrix X. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the array B. LDB >= max(1,N). */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --ap; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + *info = 0; + upper = _starpu_lsame_(uplo, "U"); + if (! upper && ! _starpu_lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*ldb < max(1,*n)) { + *info = -6; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DPPTRS", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0 || *nrhs == 0) { + return 0; + } + + if (upper) { + +/* Solve A*X = B where A = U'*U. */ + + i__1 = *nrhs; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Solve U'*X = B, overwriting B with X. */ + + _starpu_dtpsv_("Upper", "Transpose", "Non-unit", n, &ap[1], &b[i__ * + b_dim1 + 1], &c__1); + +/* Solve U*X = B, overwriting B with X. */ + + _starpu_dtpsv_("Upper", "No transpose", "Non-unit", n, &ap[1], &b[i__ * + b_dim1 + 1], &c__1); +/* L10: */ + } + } else { + +/* Solve A*X = B where A = L*L'. */ + + i__1 = *nrhs; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Solve L*Y = B, overwriting B with X. */ + + _starpu_dtpsv_("Lower", "No transpose", "Non-unit", n, &ap[1], &b[i__ * + b_dim1 + 1], &c__1); + +/* Solve L'*X = Y, overwriting B with X. */ + + _starpu_dtpsv_("Lower", "Transpose", "Non-unit", n, &ap[1], &b[i__ * + b_dim1 + 1], &c__1); +/* L20: */ + } + } + + return 0; + +/* End of DPPTRS */ + +} /* _starpu_dpptrs_ */ diff --git a/min-dgels/base/SRC/dpstf2.c b/min-dgels/base/SRC/dpstf2.c new file mode 100644 index 0000000..0d5990c --- /dev/null +++ b/min-dgels/base/SRC/dpstf2.c @@ -0,0 +1,395 @@ +/* dpstf2.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static doublereal c_b16 = -1.; +static doublereal c_b18 = 1.; + +/* Subroutine */ int _starpu_dpstf2_(char *uplo, integer *n, doublereal *a, integer * + lda, integer *piv, integer *rank, doublereal *tol, doublereal *work, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + doublereal d__1; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + integer i__, j, maxlocval; + doublereal ajj; + integer pvt; + extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, + integer *); + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int _starpu_dgemv_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *); + doublereal dtemp; + integer itemp; + extern /* Subroutine */ int _starpu_dswap_(integer *, doublereal *, integer *, + doublereal *, integer *); + doublereal dstop; + logical upper; + extern doublereal _starpu_dlamch_(char *); + extern logical _starpu_disnan_(doublereal *); + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + extern integer _starpu_dmaxloc_(doublereal *, integer *); + + +/* -- LAPACK PROTOTYPE routine (version 3.2) -- */ +/* Craig Lucas, University of Manchester / NAG Ltd. */ +/* October, 2008 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DPSTF2 computes the Cholesky factorization with complete */ +/* pivoting of a real symmetric positive semidefinite matrix A. */ + +/* The factorization has the form */ +/* P' * A * P = U' * U , if UPLO = 'U', */ +/* P' * A * P = L * L', if UPLO = 'L', */ +/* where U is an upper triangular matrix and L is lower triangular, and */ +/* P is stored as vector PIV. */ + +/* This algorithm does not attempt to check that A is positive */ +/* semidefinite. This version of the algorithm calls level 2 BLAS. */ + +/* Arguments */ +/* ========= */ + +/* UPLO (input) CHARACTER*1 */ +/* Specifies whether the upper or lower triangular part of the */ +/* symmetric matrix A is stored. */ +/* = 'U': Upper triangular */ +/* = 'L': Lower triangular */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the symmetric matrix A. If UPLO = 'U', the leading */ +/* n by n upper triangular part of A contains the upper */ +/* triangular part of the matrix A, and the strictly lower */ +/* triangular part of A is not referenced. If UPLO = 'L', the */ +/* leading n by n lower triangular part of A contains the lower */ +/* triangular part of the matrix A, and the strictly upper */ +/* triangular part of A is not referenced. */ + +/* On exit, if INFO = 0, the factor U or L from the Cholesky */ +/* factorization as above. */ + +/* PIV (output) INTEGER array, dimension (N) */ +/* PIV is such that the nonzero entries are P( PIV(K), K ) = 1. */ + +/* RANK (output) INTEGER */ +/* The rank of A given by the number of steps the algorithm */ +/* completed. */ + +/* TOL (input) DOUBLE PRECISION */ +/* User defined tolerance. If TOL < 0, then N*U*MAX( A( K,K ) ) */ +/* will be used. The algorithm terminates at the (K-1)st step */ +/* if the pivot <= TOL. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,N). */ + +/* WORK DOUBLE PRECISION array, dimension (2*N) */ +/* Work space. */ + +/* INFO (output) INTEGER */ +/* < 0: If INFO = -K, the K-th argument had an illegal value, */ +/* = 0: algorithm completed successfully, and */ +/* > 0: the matrix A is either rank deficient with computed rank */ +/* as returned in RANK, or is indefinite. See Section 7 of */ +/* LAPACK Working Note #161 for further information. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters */ + + /* Parameter adjustments */ + --work; + --piv; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + *info = 0; + upper = _starpu_lsame_(uplo, "U"); + if (! upper && ! _starpu_lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DPSTF2", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Initialize PIV */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + piv[i__] = i__; +/* L100: */ + } + +/* Compute stopping value */ + + pvt = 1; + ajj = a[pvt + pvt * a_dim1]; + i__1 = *n; + for (i__ = 2; i__ <= i__1; ++i__) { + if (a[i__ + i__ * a_dim1] > ajj) { + pvt = i__; + ajj = a[pvt + pvt * a_dim1]; + } + } + if (ajj == 0. || _starpu_disnan_(&ajj)) { + *rank = 0; + *info = 1; + goto L170; + } + +/* Compute stopping value if not supplied */ + + if (*tol < 0.) { + dstop = *n * _starpu_dlamch_("Epsilon") * ajj; + } else { + dstop = *tol; + } + +/* Set first half of WORK to zero, holds dot products */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + work[i__] = 0.; +/* L110: */ + } + + if (upper) { + +/* Compute the Cholesky factorization P' * A * P = U' * U */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + +/* Find pivot, test for exit, else swap rows and columns */ +/* Update dot products, compute possible pivots which are */ +/* stored in the second half of WORK */ + + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + + if (j > 1) { +/* Computing 2nd power */ + d__1 = a[j - 1 + i__ * a_dim1]; + work[i__] += d__1 * d__1; + } + work[*n + i__] = a[i__ + i__ * a_dim1] - work[i__]; + +/* L120: */ + } + + if (j > 1) { + maxlocval = (*n << 1) - (*n + j) + 1; + itemp = _starpu_dmaxloc_(&work[*n + j], &maxlocval); + pvt = itemp + j - 1; + ajj = work[*n + pvt]; + if (ajj <= dstop || _starpu_disnan_(&ajj)) { + a[j + j * a_dim1] = ajj; + goto L160; + } + } + + if (j != pvt) { + +/* Pivot OK, so can now swap pivot rows and columns */ + + a[pvt + pvt * a_dim1] = a[j + j * a_dim1]; + i__2 = j - 1; + _starpu_dswap_(&i__2, &a[j * a_dim1 + 1], &c__1, &a[pvt * a_dim1 + 1], + &c__1); + if (pvt < *n) { + i__2 = *n - pvt; + _starpu_dswap_(&i__2, &a[j + (pvt + 1) * a_dim1], lda, &a[pvt + ( + pvt + 1) * a_dim1], lda); + } + i__2 = pvt - j - 1; + _starpu_dswap_(&i__2, &a[j + (j + 1) * a_dim1], lda, &a[j + 1 + pvt * + a_dim1], &c__1); + +/* Swap dot products and PIV */ + + dtemp = work[j]; + work[j] = work[pvt]; + work[pvt] = dtemp; + itemp = piv[pvt]; + piv[pvt] = piv[j]; + piv[j] = itemp; + } + + ajj = sqrt(ajj); + a[j + j * a_dim1] = ajj; + +/* Compute elements J+1:N of row J */ + + if (j < *n) { + i__2 = j - 1; + i__3 = *n - j; + _starpu_dgemv_("Trans", &i__2, &i__3, &c_b16, &a[(j + 1) * a_dim1 + 1] +, lda, &a[j * a_dim1 + 1], &c__1, &c_b18, &a[j + (j + + 1) * a_dim1], lda); + i__2 = *n - j; + d__1 = 1. / ajj; + _starpu_dscal_(&i__2, &d__1, &a[j + (j + 1) * a_dim1], lda); + } + +/* L130: */ + } + + } else { + +/* Compute the Cholesky factorization P' * A * P = L * L' */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + +/* Find pivot, test for exit, else swap rows and columns */ +/* Update dot products, compute possible pivots which are */ +/* stored in the second half of WORK */ + + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + + if (j > 1) { +/* Computing 2nd power */ + d__1 = a[i__ + (j - 1) * a_dim1]; + work[i__] += d__1 * d__1; + } + work[*n + i__] = a[i__ + i__ * a_dim1] - work[i__]; + +/* L140: */ + } + + if (j > 1) { + maxlocval = (*n << 1) - (*n + j) + 1; + itemp = _starpu_dmaxloc_(&work[*n + j], &maxlocval); + pvt = itemp + j - 1; + ajj = work[*n + pvt]; + if (ajj <= dstop || _starpu_disnan_(&ajj)) { + a[j + j * a_dim1] = ajj; + goto L160; + } + } + + if (j != pvt) { + +/* Pivot OK, so can now swap pivot rows and columns */ + + a[pvt + pvt * a_dim1] = a[j + j * a_dim1]; + i__2 = j - 1; + _starpu_dswap_(&i__2, &a[j + a_dim1], lda, &a[pvt + a_dim1], lda); + if (pvt < *n) { + i__2 = *n - pvt; + _starpu_dswap_(&i__2, &a[pvt + 1 + j * a_dim1], &c__1, &a[pvt + 1 + + pvt * a_dim1], &c__1); + } + i__2 = pvt - j - 1; + _starpu_dswap_(&i__2, &a[j + 1 + j * a_dim1], &c__1, &a[pvt + (j + 1) + * a_dim1], lda); + +/* Swap dot products and PIV */ + + dtemp = work[j]; + work[j] = work[pvt]; + work[pvt] = dtemp; + itemp = piv[pvt]; + piv[pvt] = piv[j]; + piv[j] = itemp; + } + + ajj = sqrt(ajj); + a[j + j * a_dim1] = ajj; + +/* Compute elements J+1:N of column J */ + + if (j < *n) { + i__2 = *n - j; + i__3 = j - 1; + _starpu_dgemv_("No Trans", &i__2, &i__3, &c_b16, &a[j + 1 + a_dim1], + lda, &a[j + a_dim1], lda, &c_b18, &a[j + 1 + j * + a_dim1], &c__1); + i__2 = *n - j; + d__1 = 1. / ajj; + _starpu_dscal_(&i__2, &d__1, &a[j + 1 + j * a_dim1], &c__1); + } + +/* L150: */ + } + + } + +/* Ran to completion, A has full rank */ + + *rank = *n; + + goto L170; +L160: + +/* Rank is number of steps completed. Set INFO = 1 to signal */ +/* that the factorization cannot be used to solve a system. */ + + *rank = j - 1; + *info = 1; + +L170: + return 0; + +/* End of DPSTF2 */ + +} /* _starpu_dpstf2_ */ diff --git a/min-dgels/base/SRC/dpstrf.c b/min-dgels/base/SRC/dpstrf.c new file mode 100644 index 0000000..adfe0ce --- /dev/null +++ b/min-dgels/base/SRC/dpstrf.c @@ -0,0 +1,471 @@ +/* dpstrf.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; +static doublereal c_b22 = -1.; +static doublereal c_b24 = 1.; + +/* Subroutine */ int _starpu_dpstrf_(char *uplo, integer *n, doublereal *a, integer * + lda, integer *piv, integer *rank, doublereal *tol, doublereal *work, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + doublereal d__1; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + integer i__, j, k, maxlocvar, jb, nb; + doublereal ajj; + integer pvt; + extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, + integer *); + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int _starpu_dgemv_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *); + doublereal dtemp; + integer itemp; + extern /* Subroutine */ int _starpu_dswap_(integer *, doublereal *, integer *, + doublereal *, integer *); + doublereal dstop; + logical upper; + extern /* Subroutine */ int _starpu_dsyrk_(char *, char *, integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, doublereal *, + integer *), _starpu_dpstf2_(char *, integer *, + doublereal *, integer *, integer *, integer *, doublereal *, + doublereal *, integer *); + extern doublereal _starpu_dlamch_(char *); + extern logical _starpu_disnan_(doublereal *); + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *); + extern integer _starpu_dmaxloc_(doublereal *, integer *); + + +/* -- LAPACK routine (version 3.2) -- */ +/* Craig Lucas, University of Manchester / NAG Ltd. */ +/* October, 2008 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DPSTRF computes the Cholesky factorization with complete */ +/* pivoting of a real symmetric positive semidefinite matrix A. */ + +/* The factorization has the form */ +/* P' * A * P = U' * U , if UPLO = 'U', */ +/* P' * A * P = L * L', if UPLO = 'L', */ +/* where U is an upper triangular matrix and L is lower triangular, and */ +/* P is stored as vector PIV. */ + +/* This algorithm does not attempt to check that A is positive */ +/* semidefinite. This version of the algorithm calls level 3 BLAS. */ + +/* Arguments */ +/* ========= */ + +/* UPLO (input) CHARACTER*1 */ +/* Specifies whether the upper or lower triangular part of the */ +/* symmetric matrix A is stored. */ +/* = 'U': Upper triangular */ +/* = 'L': Lower triangular */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the symmetric matrix A. If UPLO = 'U', the leading */ +/* n by n upper triangular part of A contains the upper */ +/* triangular part of the matrix A, and the strictly lower */ +/* triangular part of A is not referenced. If UPLO = 'L', the */ +/* leading n by n lower triangular part of A contains the lower */ +/* triangular part of the matrix A, and the strictly upper */ +/* triangular part of A is not referenced. */ + +/* On exit, if INFO = 0, the factor U or L from the Cholesky */ +/* factorization as above. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,N). */ + +/* PIV (output) INTEGER array, dimension (N) */ +/* PIV is such that the nonzero entries are P( PIV(K), K ) = 1. */ + +/* RANK (output) INTEGER */ +/* The rank of A given by the number of steps the algorithm */ +/* completed. */ + +/* TOL (input) DOUBLE PRECISION */ +/* User defined tolerance. If TOL < 0, then N*U*MAX( A(K,K) ) */ +/* will be used. The algorithm terminates at the (K-1)st step */ +/* if the pivot <= TOL. */ + +/* WORK DOUBLE PRECISION array, dimension (2*N) */ +/* Work space. */ + +/* INFO (output) INTEGER */ +/* < 0: If INFO = -K, the K-th argument had an illegal value, */ +/* = 0: algorithm completed successfully, and */ +/* > 0: the matrix A is either rank deficient with computed rank */ +/* as returned in RANK, or is indefinite. See Section 7 of */ +/* LAPACK Working Note #161 for further information. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --work; + --piv; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + *info = 0; + upper = _starpu_lsame_(uplo, "U"); + if (! upper && ! _starpu_lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DPSTRF", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Get block size */ + + nb = _starpu_ilaenv_(&c__1, "DPOTRF", uplo, n, &c_n1, &c_n1, &c_n1); + if (nb <= 1 || nb >= *n) { + +/* Use unblocked code */ + + _starpu_dpstf2_(uplo, n, &a[a_dim1 + 1], lda, &piv[1], rank, tol, &work[1], + info); + goto L200; + + } else { + +/* Initialize PIV */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + piv[i__] = i__; +/* L100: */ + } + +/* Compute stopping value */ + + pvt = 1; + ajj = a[pvt + pvt * a_dim1]; + i__1 = *n; + for (i__ = 2; i__ <= i__1; ++i__) { + if (a[i__ + i__ * a_dim1] > ajj) { + pvt = i__; + ajj = a[pvt + pvt * a_dim1]; + } + } + if (ajj == 0. || _starpu_disnan_(&ajj)) { + *rank = 0; + *info = 1; + goto L200; + } + +/* Compute stopping value if not supplied */ + + if (*tol < 0.) { + dstop = *n * _starpu_dlamch_("Epsilon") * ajj; + } else { + dstop = *tol; + } + + + if (upper) { + +/* Compute the Cholesky factorization P' * A * P = U' * U */ + + i__1 = *n; + i__2 = nb; + for (k = 1; i__2 < 0 ? k >= i__1 : k <= i__1; k += i__2) { + +/* Account for last block not being NB wide */ + +/* Computing MIN */ + i__3 = nb, i__4 = *n - k + 1; + jb = min(i__3,i__4); + +/* Set relevant part of first half of WORK to zero, */ +/* holds dot products */ + + i__3 = *n; + for (i__ = k; i__ <= i__3; ++i__) { + work[i__] = 0.; +/* L110: */ + } + + i__3 = k + jb - 1; + for (j = k; j <= i__3; ++j) { + +/* Find pivot, test for exit, else swap rows and columns */ +/* Update dot products, compute possible pivots which are */ +/* stored in the second half of WORK */ + + i__4 = *n; + for (i__ = j; i__ <= i__4; ++i__) { + + if (j > k) { +/* Computing 2nd power */ + d__1 = a[j - 1 + i__ * a_dim1]; + work[i__] += d__1 * d__1; + } + work[*n + i__] = a[i__ + i__ * a_dim1] - work[i__]; + +/* L120: */ + } + + if (j > 1) { + maxlocvar = (*n << 1) - (*n + j) + 1; + itemp = _starpu_dmaxloc_(&work[*n + j], &maxlocvar); + pvt = itemp + j - 1; + ajj = work[*n + pvt]; + if (ajj <= dstop || _starpu_disnan_(&ajj)) { + a[j + j * a_dim1] = ajj; + goto L190; + } + } + + if (j != pvt) { + +/* Pivot OK, so can now swap pivot rows and columns */ + + a[pvt + pvt * a_dim1] = a[j + j * a_dim1]; + i__4 = j - 1; + _starpu_dswap_(&i__4, &a[j * a_dim1 + 1], &c__1, &a[pvt * + a_dim1 + 1], &c__1); + if (pvt < *n) { + i__4 = *n - pvt; + _starpu_dswap_(&i__4, &a[j + (pvt + 1) * a_dim1], lda, &a[ + pvt + (pvt + 1) * a_dim1], lda); + } + i__4 = pvt - j - 1; + _starpu_dswap_(&i__4, &a[j + (j + 1) * a_dim1], lda, &a[j + 1 + + pvt * a_dim1], &c__1); + +/* Swap dot products and PIV */ + + dtemp = work[j]; + work[j] = work[pvt]; + work[pvt] = dtemp; + itemp = piv[pvt]; + piv[pvt] = piv[j]; + piv[j] = itemp; + } + + ajj = sqrt(ajj); + a[j + j * a_dim1] = ajj; + +/* Compute elements J+1:N of row J. */ + + if (j < *n) { + i__4 = j - k; + i__5 = *n - j; + _starpu_dgemv_("Trans", &i__4, &i__5, &c_b22, &a[k + (j + 1) * + a_dim1], lda, &a[k + j * a_dim1], &c__1, & + c_b24, &a[j + (j + 1) * a_dim1], lda); + i__4 = *n - j; + d__1 = 1. / ajj; + _starpu_dscal_(&i__4, &d__1, &a[j + (j + 1) * a_dim1], lda); + } + +/* L130: */ + } + +/* Update trailing matrix, J already incremented */ + + if (k + jb <= *n) { + i__3 = *n - j + 1; + _starpu_dsyrk_("Upper", "Trans", &i__3, &jb, &c_b22, &a[k + j * + a_dim1], lda, &c_b24, &a[j + j * a_dim1], lda); + } + +/* L140: */ + } + + } else { + +/* Compute the Cholesky factorization P' * A * P = L * L' */ + + i__2 = *n; + i__1 = nb; + for (k = 1; i__1 < 0 ? k >= i__2 : k <= i__2; k += i__1) { + +/* Account for last block not being NB wide */ + +/* Computing MIN */ + i__3 = nb, i__4 = *n - k + 1; + jb = min(i__3,i__4); + +/* Set relevant part of first half of WORK to zero, */ +/* holds dot products */ + + i__3 = *n; + for (i__ = k; i__ <= i__3; ++i__) { + work[i__] = 0.; +/* L150: */ + } + + i__3 = k + jb - 1; + for (j = k; j <= i__3; ++j) { + +/* Find pivot, test for exit, else swap rows and columns */ +/* Update dot products, compute possible pivots which are */ +/* stored in the second half of WORK */ + + i__4 = *n; + for (i__ = j; i__ <= i__4; ++i__) { + + if (j > k) { +/* Computing 2nd power */ + d__1 = a[i__ + (j - 1) * a_dim1]; + work[i__] += d__1 * d__1; + } + work[*n + i__] = a[i__ + i__ * a_dim1] - work[i__]; + +/* L160: */ + } + + if (j > 1) { + maxlocvar = (*n << 1) - (*n + j) + 1; + itemp = _starpu_dmaxloc_(&work[*n + j], &maxlocvar); + pvt = itemp + j - 1; + ajj = work[*n + pvt]; + if (ajj <= dstop || _starpu_disnan_(&ajj)) { + a[j + j * a_dim1] = ajj; + goto L190; + } + } + + if (j != pvt) { + +/* Pivot OK, so can now swap pivot rows and columns */ + + a[pvt + pvt * a_dim1] = a[j + j * a_dim1]; + i__4 = j - 1; + _starpu_dswap_(&i__4, &a[j + a_dim1], lda, &a[pvt + a_dim1], + lda); + if (pvt < *n) { + i__4 = *n - pvt; + _starpu_dswap_(&i__4, &a[pvt + 1 + j * a_dim1], &c__1, &a[ + pvt + 1 + pvt * a_dim1], &c__1); + } + i__4 = pvt - j - 1; + _starpu_dswap_(&i__4, &a[j + 1 + j * a_dim1], &c__1, &a[pvt + + (j + 1) * a_dim1], lda); + +/* Swap dot products and PIV */ + + dtemp = work[j]; + work[j] = work[pvt]; + work[pvt] = dtemp; + itemp = piv[pvt]; + piv[pvt] = piv[j]; + piv[j] = itemp; + } + + ajj = sqrt(ajj); + a[j + j * a_dim1] = ajj; + +/* Compute elements J+1:N of column J. */ + + if (j < *n) { + i__4 = *n - j; + i__5 = j - k; + _starpu_dgemv_("No Trans", &i__4, &i__5, &c_b22, &a[j + 1 + k + * a_dim1], lda, &a[j + k * a_dim1], lda, & + c_b24, &a[j + 1 + j * a_dim1], &c__1); + i__4 = *n - j; + d__1 = 1. / ajj; + _starpu_dscal_(&i__4, &d__1, &a[j + 1 + j * a_dim1], &c__1); + } + +/* L170: */ + } + +/* Update trailing matrix, J already incremented */ + + if (k + jb <= *n) { + i__3 = *n - j + 1; + _starpu_dsyrk_("Lower", "No Trans", &i__3, &jb, &c_b22, &a[j + k * + a_dim1], lda, &c_b24, &a[j + j * a_dim1], lda); + } + +/* L180: */ + } + + } + } + +/* Ran to completion, A has full rank */ + + *rank = *n; + + goto L200; +L190: + +/* Rank is the number of steps completed. Set INFO = 1 to signal */ +/* that the factorization cannot be used to solve a system. */ + + *rank = j - 1; + *info = 1; + +L200: + return 0; + +/* End of DPSTRF */ + +} /* _starpu_dpstrf_ */ diff --git a/min-dgels/base/SRC/dptcon.c b/min-dgels/base/SRC/dptcon.c new file mode 100644 index 0000000..0923163 --- /dev/null +++ b/min-dgels/base/SRC/dptcon.c @@ -0,0 +1,184 @@ +/* dptcon.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +/* Subroutine */ int _starpu_dptcon_(integer *n, doublereal *d__, doublereal *e, + doublereal *anorm, doublereal *rcond, doublereal *work, integer *info) +{ + /* System generated locals */ + integer i__1; + doublereal d__1; + + /* Local variables */ + integer i__, ix; + extern integer _starpu_idamax_(integer *, doublereal *, integer *); + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + doublereal ainvnm; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DPTCON computes the reciprocal of the condition number (in the */ +/* 1-norm) of a real symmetric positive definite tridiagonal matrix */ +/* using the factorization A = L*D*L**T or A = U**T*D*U computed by */ +/* DPTTRF. */ + +/* Norm(inv(A)) is computed by a direct method, and the reciprocal of */ +/* the condition number is computed as */ +/* RCOND = 1 / (ANORM * norm(inv(A))). */ + +/* Arguments */ +/* ========= */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* D (input) DOUBLE PRECISION array, dimension (N) */ +/* The n diagonal elements of the diagonal matrix D from the */ +/* factorization of A, as computed by DPTTRF. */ + +/* E (input) DOUBLE PRECISION array, dimension (N-1) */ +/* The (n-1) off-diagonal elements of the unit bidiagonal factor */ +/* U or L from the factorization of A, as computed by DPTTRF. */ + +/* ANORM (input) DOUBLE PRECISION */ +/* The 1-norm of the original matrix A. */ + +/* RCOND (output) DOUBLE PRECISION */ +/* The reciprocal of the condition number of the matrix A, */ +/* computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is the */ +/* 1-norm of inv(A) computed in this routine. */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (N) */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ + +/* Further Details */ +/* =============== */ + +/* The method used is described in Nicholas J. Higham, "Efficient */ +/* Algorithms for Computing the Condition Number of a Tridiagonal */ +/* Matrix", SIAM J. Sci. Stat. Comput., Vol. 7, No. 1, January 1986. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments. */ + + /* Parameter adjustments */ + --work; + --e; + --d__; + + /* Function Body */ + *info = 0; + if (*n < 0) { + *info = -1; + } else if (*anorm < 0.) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DPTCON", &i__1); + return 0; + } + +/* Quick return if possible */ + + *rcond = 0.; + if (*n == 0) { + *rcond = 1.; + return 0; + } else if (*anorm == 0.) { + return 0; + } + +/* Check that D(1:N) is positive. */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + if (d__[i__] <= 0.) { + return 0; + } +/* L10: */ + } + +/* Solve M(A) * x = e, where M(A) = (m(i,j)) is given by */ + +/* m(i,j) = abs(A(i,j)), i = j, */ +/* m(i,j) = -abs(A(i,j)), i .ne. j, */ + +/* and e = [ 1, 1, ..., 1 ]'. Note M(A) = M(L)*D*M(L)'. */ + +/* Solve M(L) * x = e. */ + + work[1] = 1.; + i__1 = *n; + for (i__ = 2; i__ <= i__1; ++i__) { + work[i__] = work[i__ - 1] * (d__1 = e[i__ - 1], abs(d__1)) + 1.; +/* L20: */ + } + +/* Solve D * M(L)' * x = b. */ + + work[*n] /= d__[*n]; + for (i__ = *n - 1; i__ >= 1; --i__) { + work[i__] = work[i__] / d__[i__] + work[i__ + 1] * (d__1 = e[i__], + abs(d__1)); +/* L30: */ + } + +/* Compute AINVNM = max(x(i)), 1<=i<=n. */ + + ix = _starpu_idamax_(n, &work[1], &c__1); + ainvnm = (d__1 = work[ix], abs(d__1)); + +/* Compute the reciprocal condition number. */ + + if (ainvnm != 0.) { + *rcond = 1. / ainvnm / *anorm; + } + + return 0; + +/* End of DPTCON */ + +} /* _starpu_dptcon_ */ diff --git a/min-dgels/base/SRC/dpteqr.c b/min-dgels/base/SRC/dpteqr.c new file mode 100644 index 0000000..f082788 --- /dev/null +++ b/min-dgels/base/SRC/dpteqr.c @@ -0,0 +1,244 @@ +/* dpteqr.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static doublereal c_b7 = 0.; +static doublereal c_b8 = 1.; +static integer c__0 = 0; +static integer c__1 = 1; + +/* Subroutine */ int _starpu_dpteqr_(char *compz, integer *n, doublereal *d__, + doublereal *e, doublereal *z__, integer *ldz, doublereal *work, + integer *info) +{ + /* System generated locals */ + integer z_dim1, z_offset, i__1; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + doublereal c__[1] /* was [1][1] */; + integer i__; + doublereal vt[1] /* was [1][1] */; + integer nru; + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int _starpu_dlaset_(char *, integer *, integer *, + doublereal *, doublereal *, doublereal *, integer *), + _starpu_xerbla_(char *, integer *), _starpu_dbdsqr_(char *, integer *, + integer *, integer *, integer *, doublereal *, doublereal *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *, doublereal *, integer *); + integer icompz; + extern /* Subroutine */ int _starpu_dpttrf_(integer *, doublereal *, doublereal *, + integer *); + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DPTEQR computes all eigenvalues and, optionally, eigenvectors of a */ +/* symmetric positive definite tridiagonal matrix by first factoring the */ +/* matrix using DPTTRF, and then calling DBDSQR to compute the singular */ +/* values of the bidiagonal factor. */ + +/* This routine computes the eigenvalues of the positive definite */ +/* tridiagonal matrix to high relative accuracy. This means that if the */ +/* eigenvalues range over many orders of magnitude in size, then the */ +/* small eigenvalues and corresponding eigenvectors will be computed */ +/* more accurately than, for example, with the standard QR method. */ + +/* The eigenvectors of a full or band symmetric positive definite matrix */ +/* can also be found if DSYTRD, DSPTRD, or DSBTRD has been used to */ +/* reduce this matrix to tridiagonal form. (The reduction to tridiagonal */ +/* form, however, may preclude the possibility of obtaining high */ +/* relative accuracy in the small eigenvalues of the original matrix, if */ +/* these eigenvalues range over many orders of magnitude.) */ + +/* Arguments */ +/* ========= */ + +/* COMPZ (input) CHARACTER*1 */ +/* = 'N': Compute eigenvalues only. */ +/* = 'V': Compute eigenvectors of original symmetric */ +/* matrix also. Array Z contains the orthogonal */ +/* matrix used to reduce the original matrix to */ +/* tridiagonal form. */ +/* = 'I': Compute eigenvectors of tridiagonal matrix also. */ + +/* N (input) INTEGER */ +/* The order of the matrix. N >= 0. */ + +/* D (input/output) DOUBLE PRECISION array, dimension (N) */ +/* On entry, the n diagonal elements of the tridiagonal */ +/* matrix. */ +/* On normal exit, D contains the eigenvalues, in descending */ +/* order. */ + +/* E (input/output) DOUBLE PRECISION array, dimension (N-1) */ +/* On entry, the (n-1) subdiagonal elements of the tridiagonal */ +/* matrix. */ +/* On exit, E has been destroyed. */ + +/* Z (input/output) DOUBLE PRECISION array, dimension (LDZ, N) */ +/* On entry, if COMPZ = 'V', the orthogonal matrix used in the */ +/* reduction to tridiagonal form. */ +/* On exit, if COMPZ = 'V', the orthonormal eigenvectors of the */ +/* original symmetric matrix; */ +/* if COMPZ = 'I', the orthonormal eigenvectors of the */ +/* tridiagonal matrix. */ +/* If INFO > 0 on exit, Z contains the eigenvectors associated */ +/* with only the stored eigenvalues. */ +/* If COMPZ = 'N', then Z is not referenced. */ + +/* LDZ (input) INTEGER */ +/* The leading dimension of the array Z. LDZ >= 1, and if */ +/* COMPZ = 'V' or 'I', LDZ >= max(1,N). */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (4*N) */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit. */ +/* < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > 0: if INFO = i, and i is: */ +/* <= N the Cholesky factorization of the matrix could */ +/* not be performed because the i-th principal minor */ +/* was not positive definite. */ +/* > N the SVD algorithm failed to converge; */ +/* if INFO = N+i, i off-diagonal elements of the */ +/* bidiagonal factor did not converge to zero. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --d__; + --e; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --work; + + /* Function Body */ + *info = 0; + + if (_starpu_lsame_(compz, "N")) { + icompz = 0; + } else if (_starpu_lsame_(compz, "V")) { + icompz = 1; + } else if (_starpu_lsame_(compz, "I")) { + icompz = 2; + } else { + icompz = -1; + } + if (icompz < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*ldz < 1 || icompz > 0 && *ldz < max(1,*n)) { + *info = -6; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DPTEQR", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + if (*n == 1) { + if (icompz > 0) { + z__[z_dim1 + 1] = 1.; + } + return 0; + } + if (icompz == 2) { + _starpu_dlaset_("Full", n, n, &c_b7, &c_b8, &z__[z_offset], ldz); + } + +/* Call DPTTRF to factor the matrix. */ + + _starpu_dpttrf_(n, &d__[1], &e[1], info); + if (*info != 0) { + return 0; + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + d__[i__] = sqrt(d__[i__]); +/* L10: */ + } + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + e[i__] *= d__[i__]; +/* L20: */ + } + +/* Call DBDSQR to compute the singular values/vectors of the */ +/* bidiagonal factor. */ + + if (icompz > 0) { + nru = *n; + } else { + nru = 0; + } + _starpu_dbdsqr_("Lower", n, &c__0, &nru, &c__0, &d__[1], &e[1], vt, &c__1, &z__[ + z_offset], ldz, c__, &c__1, &work[1], info); + +/* Square the singular values. */ + + if (*info == 0) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + d__[i__] *= d__[i__]; +/* L30: */ + } + } else { + *info = *n + *info; + } + + return 0; + +/* End of DPTEQR */ + +} /* _starpu_dpteqr_ */ diff --git a/min-dgels/base/SRC/dptrfs.c b/min-dgels/base/SRC/dptrfs.c new file mode 100644 index 0000000..b23cd36 --- /dev/null +++ b/min-dgels/base/SRC/dptrfs.c @@ -0,0 +1,365 @@ +/* dptrfs.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static doublereal c_b11 = 1.; + +/* Subroutine */ int _starpu_dptrfs_(integer *n, integer *nrhs, doublereal *d__, + doublereal *e, doublereal *df, doublereal *ef, doublereal *b, integer + *ldb, doublereal *x, integer *ldx, doublereal *ferr, doublereal *berr, + doublereal *work, integer *info) +{ + /* System generated locals */ + integer b_dim1, b_offset, x_dim1, x_offset, i__1, i__2; + doublereal d__1, d__2, d__3; + + /* Local variables */ + integer i__, j; + doublereal s, bi, cx, dx, ex; + integer ix, nz; + doublereal eps, safe1, safe2; + extern /* Subroutine */ int _starpu_daxpy_(integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *); + integer count; + extern doublereal _starpu_dlamch_(char *); + extern integer _starpu_idamax_(integer *, doublereal *, integer *); + doublereal safmin; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + doublereal lstres; + extern /* Subroutine */ int _starpu_dpttrs_(integer *, integer *, doublereal *, + doublereal *, doublereal *, integer *, integer *); + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DPTRFS improves the computed solution to a system of linear */ +/* equations when the coefficient matrix is symmetric positive definite */ +/* and tridiagonal, and provides error bounds and backward error */ +/* estimates for the solution. */ + +/* Arguments */ +/* ========= */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* NRHS (input) INTEGER */ +/* The number of right hand sides, i.e., the number of columns */ +/* of the matrix B. NRHS >= 0. */ + +/* D (input) DOUBLE PRECISION array, dimension (N) */ +/* The n diagonal elements of the tridiagonal matrix A. */ + +/* E (input) DOUBLE PRECISION array, dimension (N-1) */ +/* The (n-1) subdiagonal elements of the tridiagonal matrix A. */ + +/* DF (input) DOUBLE PRECISION array, dimension (N) */ +/* The n diagonal elements of the diagonal matrix D from the */ +/* factorization computed by DPTTRF. */ + +/* EF (input) DOUBLE PRECISION array, dimension (N-1) */ +/* The (n-1) subdiagonal elements of the unit bidiagonal factor */ +/* L from the factorization computed by DPTTRF. */ + +/* B (input) DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* The right hand side matrix B. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the array B. LDB >= max(1,N). */ + +/* X (input/output) DOUBLE PRECISION array, dimension (LDX,NRHS) */ +/* On entry, the solution matrix X, as computed by DPTTRS. */ +/* On exit, the improved solution matrix X. */ + +/* LDX (input) INTEGER */ +/* The leading dimension of the array X. LDX >= max(1,N). */ + +/* FERR (output) DOUBLE PRECISION array, dimension (NRHS) */ +/* The forward error bound for each solution vector */ +/* X(j) (the j-th column of the solution matrix X). */ +/* If XTRUE is the true solution corresponding to X(j), FERR(j) */ +/* is an estimated upper bound for the magnitude of the largest */ +/* element in (X(j) - XTRUE) divided by the magnitude of the */ +/* largest element in X(j). */ + +/* BERR (output) DOUBLE PRECISION array, dimension (NRHS) */ +/* The componentwise relative backward error of each solution */ +/* vector X(j) (i.e., the smallest relative change in */ +/* any element of A or B that makes X(j) an exact solution). */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (2*N) */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ + +/* Internal Parameters */ +/* =================== */ + +/* ITMAX is the maximum number of steps of iterative refinement. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --d__; + --e; + --df; + --ef; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + x_dim1 = *ldx; + x_offset = 1 + x_dim1; + x -= x_offset; + --ferr; + --berr; + --work; + + /* Function Body */ + *info = 0; + if (*n < 0) { + *info = -1; + } else if (*nrhs < 0) { + *info = -2; + } else if (*ldb < max(1,*n)) { + *info = -8; + } else if (*ldx < max(1,*n)) { + *info = -10; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DPTRFS", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0 || *nrhs == 0) { + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + ferr[j] = 0.; + berr[j] = 0.; +/* L10: */ + } + return 0; + } + +/* NZ = maximum number of nonzero elements in each row of A, plus 1 */ + + nz = 4; + eps = _starpu_dlamch_("Epsilon"); + safmin = _starpu_dlamch_("Safe minimum"); + safe1 = nz * safmin; + safe2 = safe1 / eps; + +/* Do for each right hand side */ + + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + + count = 1; + lstres = 3.; +L20: + +/* Loop until stopping criterion is satisfied. */ + +/* Compute residual R = B - A * X. Also compute */ +/* abs(A)*abs(x) + abs(b) for use in the backward error bound. */ + + if (*n == 1) { + bi = b[j * b_dim1 + 1]; + dx = d__[1] * x[j * x_dim1 + 1]; + work[*n + 1] = bi - dx; + work[1] = abs(bi) + abs(dx); + } else { + bi = b[j * b_dim1 + 1]; + dx = d__[1] * x[j * x_dim1 + 1]; + ex = e[1] * x[j * x_dim1 + 2]; + work[*n + 1] = bi - dx - ex; + work[1] = abs(bi) + abs(dx) + abs(ex); + i__2 = *n - 1; + for (i__ = 2; i__ <= i__2; ++i__) { + bi = b[i__ + j * b_dim1]; + cx = e[i__ - 1] * x[i__ - 1 + j * x_dim1]; + dx = d__[i__] * x[i__ + j * x_dim1]; + ex = e[i__] * x[i__ + 1 + j * x_dim1]; + work[*n + i__] = bi - cx - dx - ex; + work[i__] = abs(bi) + abs(cx) + abs(dx) + abs(ex); +/* L30: */ + } + bi = b[*n + j * b_dim1]; + cx = e[*n - 1] * x[*n - 1 + j * x_dim1]; + dx = d__[*n] * x[*n + j * x_dim1]; + work[*n + *n] = bi - cx - dx; + work[*n] = abs(bi) + abs(cx) + abs(dx); + } + +/* Compute componentwise relative backward error from formula */ + +/* max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) ) */ + +/* where abs(Z) is the componentwise absolute value of the matrix */ +/* or vector Z. If the i-th component of the denominator is less */ +/* than SAFE2, then SAFE1 is added to the i-th components of the */ +/* numerator and denominator before dividing. */ + + s = 0.; + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + if (work[i__] > safe2) { +/* Computing MAX */ + d__2 = s, d__3 = (d__1 = work[*n + i__], abs(d__1)) / work[ + i__]; + s = max(d__2,d__3); + } else { +/* Computing MAX */ + d__2 = s, d__3 = ((d__1 = work[*n + i__], abs(d__1)) + safe1) + / (work[i__] + safe1); + s = max(d__2,d__3); + } +/* L40: */ + } + berr[j] = s; + +/* Test stopping criterion. Continue iterating if */ +/* 1) The residual BERR(J) is larger than machine epsilon, and */ +/* 2) BERR(J) decreased by at least a factor of 2 during the */ +/* last iteration, and */ +/* 3) At most ITMAX iterations tried. */ + + if (berr[j] > eps && berr[j] * 2. <= lstres && count <= 5) { + +/* Update solution and try again. */ + + _starpu_dpttrs_(n, &c__1, &df[1], &ef[1], &work[*n + 1], n, info); + _starpu_daxpy_(n, &c_b11, &work[*n + 1], &c__1, &x[j * x_dim1 + 1], &c__1) + ; + lstres = berr[j]; + ++count; + goto L20; + } + +/* Bound error from formula */ + +/* norm(X - XTRUE) / norm(X) .le. FERR = */ +/* norm( abs(inv(A))* */ +/* ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X) */ + +/* where */ +/* norm(Z) is the magnitude of the largest component of Z */ +/* inv(A) is the inverse of A */ +/* abs(Z) is the componentwise absolute value of the matrix or */ +/* vector Z */ +/* NZ is the maximum number of nonzeros in any row of A, plus 1 */ +/* EPS is machine epsilon */ + +/* The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B)) */ +/* is incremented by SAFE1 if the i-th component of */ +/* abs(A)*abs(X) + abs(B) is less than SAFE2. */ + + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + if (work[i__] > safe2) { + work[i__] = (d__1 = work[*n + i__], abs(d__1)) + nz * eps * + work[i__]; + } else { + work[i__] = (d__1 = work[*n + i__], abs(d__1)) + nz * eps * + work[i__] + safe1; + } +/* L50: */ + } + ix = _starpu_idamax_(n, &work[1], &c__1); + ferr[j] = work[ix]; + +/* Estimate the norm of inv(A). */ + +/* Solve M(A) * x = e, where M(A) = (m(i,j)) is given by */ + +/* m(i,j) = abs(A(i,j)), i = j, */ +/* m(i,j) = -abs(A(i,j)), i .ne. j, */ + +/* and e = [ 1, 1, ..., 1 ]'. Note M(A) = M(L)*D*M(L)'. */ + +/* Solve M(L) * x = e. */ + + work[1] = 1.; + i__2 = *n; + for (i__ = 2; i__ <= i__2; ++i__) { + work[i__] = work[i__ - 1] * (d__1 = ef[i__ - 1], abs(d__1)) + 1.; +/* L60: */ + } + +/* Solve D * M(L)' * x = b. */ + + work[*n] /= df[*n]; + for (i__ = *n - 1; i__ >= 1; --i__) { + work[i__] = work[i__] / df[i__] + work[i__ + 1] * (d__1 = ef[i__], + abs(d__1)); +/* L70: */ + } + +/* Compute norm(inv(A)) = max(x(i)), 1<=i<=n. */ + + ix = _starpu_idamax_(n, &work[1], &c__1); + ferr[j] *= (d__1 = work[ix], abs(d__1)); + +/* Normalize error. */ + + lstres = 0.; + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { +/* Computing MAX */ + d__2 = lstres, d__3 = (d__1 = x[i__ + j * x_dim1], abs(d__1)); + lstres = max(d__2,d__3); +/* L80: */ + } + if (lstres != 0.) { + ferr[j] /= lstres; + } + +/* L90: */ + } + + return 0; + +/* End of DPTRFS */ + +} /* _starpu_dptrfs_ */ diff --git a/min-dgels/base/SRC/dptsv.c b/min-dgels/base/SRC/dptsv.c new file mode 100644 index 0000000..0fd5b8c --- /dev/null +++ b/min-dgels/base/SRC/dptsv.c @@ -0,0 +1,130 @@ +/* dptsv.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dptsv_(integer *n, integer *nrhs, doublereal *d__, + doublereal *e, doublereal *b, integer *ldb, integer *info) +{ + /* System generated locals */ + integer b_dim1, b_offset, i__1; + + /* Local variables */ + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *), _starpu_dpttrf_( + integer *, doublereal *, doublereal *, integer *), _starpu_dpttrs_( + integer *, integer *, doublereal *, doublereal *, doublereal *, + integer *, integer *); + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DPTSV computes the solution to a real system of linear equations */ +/* A*X = B, where A is an N-by-N symmetric positive definite tridiagonal */ +/* matrix, and X and B are N-by-NRHS matrices. */ + +/* A is factored as A = L*D*L**T, and the factored form of A is then */ +/* used to solve the system of equations. */ + +/* Arguments */ +/* ========= */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* NRHS (input) INTEGER */ +/* The number of right hand sides, i.e., the number of columns */ +/* of the matrix B. NRHS >= 0. */ + +/* D (input/output) DOUBLE PRECISION array, dimension (N) */ +/* On entry, the n diagonal elements of the tridiagonal matrix */ +/* A. On exit, the n diagonal elements of the diagonal matrix */ +/* D from the factorization A = L*D*L**T. */ + +/* E (input/output) DOUBLE PRECISION array, dimension (N-1) */ +/* On entry, the (n-1) subdiagonal elements of the tridiagonal */ +/* matrix A. On exit, the (n-1) subdiagonal elements of the */ +/* unit bidiagonal factor L from the L*D*L**T factorization of */ +/* A. (E can also be regarded as the superdiagonal of the unit */ +/* bidiagonal factor U from the U**T*D*U factorization of A.) */ + +/* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* On entry, the N-by-NRHS right hand side matrix B. */ +/* On exit, if INFO = 0, the N-by-NRHS solution matrix X. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the array B. LDB >= max(1,N). */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > 0: if INFO = i, the leading minor of order i is not */ +/* positive definite, and the solution has not been */ +/* computed. The factorization has not been completed */ +/* unless i = N. */ + +/* ===================================================================== */ + +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --d__; + --e; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + *info = 0; + if (*n < 0) { + *info = -1; + } else if (*nrhs < 0) { + *info = -2; + } else if (*ldb < max(1,*n)) { + *info = -6; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DPTSV ", &i__1); + return 0; + } + +/* Compute the L*D*L' (or U'*D*U) factorization of A. */ + + _starpu_dpttrf_(n, &d__[1], &e[1], info); + if (*info == 0) { + +/* Solve the system A*X = B, overwriting B with X. */ + + _starpu_dpttrs_(n, nrhs, &d__[1], &e[1], &b[b_offset], ldb, info); + } + return 0; + +/* End of DPTSV */ + +} /* _starpu_dptsv_ */ diff --git a/min-dgels/base/SRC/dptsvx.c b/min-dgels/base/SRC/dptsvx.c new file mode 100644 index 0000000..ceca8e5 --- /dev/null +++ b/min-dgels/base/SRC/dptsvx.c @@ -0,0 +1,283 @@ +/* dptsvx.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +/* Subroutine */ int _starpu_dptsvx_(char *fact, integer *n, integer *nrhs, + doublereal *d__, doublereal *e, doublereal *df, doublereal *ef, + doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal * + rcond, doublereal *ferr, doublereal *berr, doublereal *work, integer * + info) +{ + /* System generated locals */ + integer b_dim1, b_offset, x_dim1, x_offset, i__1; + + /* Local variables */ + extern logical _starpu_lsame_(char *, char *); + doublereal anorm; + extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *); + extern doublereal _starpu_dlamch_(char *); + logical nofact; + extern /* Subroutine */ int _starpu_dlacpy_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *), + _starpu_xerbla_(char *, integer *); + extern doublereal _starpu_dlanst_(char *, integer *, doublereal *, doublereal *); + extern /* Subroutine */ int _starpu_dptcon_(integer *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, integer *), _starpu_dptrfs_( + integer *, integer *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, doublereal *, integer *), _starpu_dpttrf_( + integer *, doublereal *, doublereal *, integer *), _starpu_dpttrs_( + integer *, integer *, doublereal *, doublereal *, doublereal *, + integer *, integer *); + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DPTSVX uses the factorization A = L*D*L**T to compute the solution */ +/* to a real system of linear equations A*X = B, where A is an N-by-N */ +/* symmetric positive definite tridiagonal matrix and X and B are */ +/* N-by-NRHS matrices. */ + +/* Error bounds on the solution and a condition estimate are also */ +/* provided. */ + +/* Description */ +/* =========== */ + +/* The following steps are performed: */ + +/* 1. If FACT = 'N', the matrix A is factored as A = L*D*L**T, where L */ +/* is a unit lower bidiagonal matrix and D is diagonal. The */ +/* factorization can also be regarded as having the form */ +/* A = U**T*D*U. */ + +/* 2. If the leading i-by-i principal minor is not positive definite, */ +/* then the routine returns with INFO = i. Otherwise, the factored */ +/* form of A is used to estimate the condition number of the matrix */ +/* A. If the reciprocal of the condition number is less than machine */ +/* precision, INFO = N+1 is returned as a warning, but the routine */ +/* still goes on to solve for X and compute error bounds as */ +/* described below. */ + +/* 3. The system of equations is solved for X using the factored form */ +/* of A. */ + +/* 4. Iterative refinement is applied to improve the computed solution */ +/* matrix and calculate error bounds and backward error estimates */ +/* for it. */ + +/* Arguments */ +/* ========= */ + +/* FACT (input) CHARACTER*1 */ +/* Specifies whether or not the factored form of A has been */ +/* supplied on entry. */ +/* = 'F': On entry, DF and EF contain the factored form of A. */ +/* D, E, DF, and EF will not be modified. */ +/* = 'N': The matrix A will be copied to DF and EF and */ +/* factored. */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* NRHS (input) INTEGER */ +/* The number of right hand sides, i.e., the number of columns */ +/* of the matrices B and X. NRHS >= 0. */ + +/* D (input) DOUBLE PRECISION array, dimension (N) */ +/* The n diagonal elements of the tridiagonal matrix A. */ + +/* E (input) DOUBLE PRECISION array, dimension (N-1) */ +/* The (n-1) subdiagonal elements of the tridiagonal matrix A. */ + +/* DF (input or output) DOUBLE PRECISION array, dimension (N) */ +/* If FACT = 'F', then DF is an input argument and on entry */ +/* contains the n diagonal elements of the diagonal matrix D */ +/* from the L*D*L**T factorization of A. */ +/* If FACT = 'N', then DF is an output argument and on exit */ +/* contains the n diagonal elements of the diagonal matrix D */ +/* from the L*D*L**T factorization of A. */ + +/* EF (input or output) DOUBLE PRECISION array, dimension (N-1) */ +/* If FACT = 'F', then EF is an input argument and on entry */ +/* contains the (n-1) subdiagonal elements of the unit */ +/* bidiagonal factor L from the L*D*L**T factorization of A. */ +/* If FACT = 'N', then EF is an output argument and on exit */ +/* contains the (n-1) subdiagonal elements of the unit */ +/* bidiagonal factor L from the L*D*L**T factorization of A. */ + +/* B (input) DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* The N-by-NRHS right hand side matrix B. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the array B. LDB >= max(1,N). */ + +/* X (output) DOUBLE PRECISION array, dimension (LDX,NRHS) */ +/* If INFO = 0 of INFO = N+1, the N-by-NRHS solution matrix X. */ + +/* LDX (input) INTEGER */ +/* The leading dimension of the array X. LDX >= max(1,N). */ + +/* RCOND (output) DOUBLE PRECISION */ +/* The reciprocal condition number of the matrix A. If RCOND */ +/* is less than the machine precision (in particular, if */ +/* RCOND = 0), the matrix is singular to working precision. */ +/* This condition is indicated by a return code of INFO > 0. */ + +/* FERR (output) DOUBLE PRECISION array, dimension (NRHS) */ +/* The forward error bound for each solution vector */ +/* X(j) (the j-th column of the solution matrix X). */ +/* If XTRUE is the true solution corresponding to X(j), FERR(j) */ +/* is an estimated upper bound for the magnitude of the largest */ +/* element in (X(j) - XTRUE) divided by the magnitude of the */ +/* largest element in X(j). */ + +/* BERR (output) DOUBLE PRECISION array, dimension (NRHS) */ +/* The componentwise relative backward error of each solution */ +/* vector X(j) (i.e., the smallest relative change in any */ +/* element of A or B that makes X(j) an exact solution). */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (2*N) */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > 0: if INFO = i, and i is */ +/* <= N: the leading minor of order i of A is */ +/* not positive definite, so the factorization */ +/* could not be completed, and the solution has not */ +/* been computed. RCOND = 0 is returned. */ +/* = N+1: U is nonsingular, but RCOND is less than machine */ +/* precision, meaning that the matrix is singular */ +/* to working precision. Nevertheless, the */ +/* solution and error bounds are computed because */ +/* there are a number of situations where the */ +/* computed solution can be more accurate than the */ +/* value of RCOND would suggest. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --d__; + --e; + --df; + --ef; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + x_dim1 = *ldx; + x_offset = 1 + x_dim1; + x -= x_offset; + --ferr; + --berr; + --work; + + /* Function Body */ + *info = 0; + nofact = _starpu_lsame_(fact, "N"); + if (! nofact && ! _starpu_lsame_(fact, "F")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*ldb < max(1,*n)) { + *info = -9; + } else if (*ldx < max(1,*n)) { + *info = -11; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DPTSVX", &i__1); + return 0; + } + + if (nofact) { + +/* Compute the L*D*L' (or U'*D*U) factorization of A. */ + + _starpu_dcopy_(n, &d__[1], &c__1, &df[1], &c__1); + if (*n > 1) { + i__1 = *n - 1; + _starpu_dcopy_(&i__1, &e[1], &c__1, &ef[1], &c__1); + } + _starpu_dpttrf_(n, &df[1], &ef[1], info); + +/* Return if INFO is non-zero. */ + + if (*info > 0) { + *rcond = 0.; + return 0; + } + } + +/* Compute the norm of the matrix A. */ + + anorm = _starpu_dlanst_("1", n, &d__[1], &e[1]); + +/* Compute the reciprocal of the condition number of A. */ + + _starpu_dptcon_(n, &df[1], &ef[1], &anorm, rcond, &work[1], info); + +/* Compute the solution vectors X. */ + + _starpu_dlacpy_("Full", n, nrhs, &b[b_offset], ldb, &x[x_offset], ldx); + _starpu_dpttrs_(n, nrhs, &df[1], &ef[1], &x[x_offset], ldx, info); + +/* Use iterative refinement to improve the computed solutions and */ +/* compute error bounds and backward error estimates for them. */ + + _starpu_dptrfs_(n, nrhs, &d__[1], &e[1], &df[1], &ef[1], &b[b_offset], ldb, &x[ + x_offset], ldx, &ferr[1], &berr[1], &work[1], info); + +/* Set INFO = N+1 if the matrix is singular to working precision. */ + + if (*rcond < _starpu_dlamch_("Epsilon")) { + *info = *n + 1; + } + + return 0; + +/* End of DPTSVX */ + +} /* _starpu_dptsvx_ */ diff --git a/min-dgels/base/SRC/dpttrf.c b/min-dgels/base/SRC/dpttrf.c new file mode 100644 index 0000000..7f15258 --- /dev/null +++ b/min-dgels/base/SRC/dpttrf.c @@ -0,0 +1,181 @@ +/* dpttrf.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dpttrf_(integer *n, doublereal *d__, doublereal *e, + integer *info) +{ + /* System generated locals */ + integer i__1; + + /* Local variables */ + integer i__, i4; + doublereal ei; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DPTTRF computes the L*D*L' factorization of a real symmetric */ +/* positive definite tridiagonal matrix A. The factorization may also */ +/* be regarded as having the form A = U'*D*U. */ + +/* Arguments */ +/* ========= */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* D (input/output) DOUBLE PRECISION array, dimension (N) */ +/* On entry, the n diagonal elements of the tridiagonal matrix */ +/* A. On exit, the n diagonal elements of the diagonal matrix */ +/* D from the L*D*L' factorization of A. */ + +/* E (input/output) DOUBLE PRECISION array, dimension (N-1) */ +/* On entry, the (n-1) subdiagonal elements of the tridiagonal */ +/* matrix A. On exit, the (n-1) subdiagonal elements of the */ +/* unit bidiagonal factor L from the L*D*L' factorization of A. */ +/* E can also be regarded as the superdiagonal of the unit */ +/* bidiagonal factor U from the U'*D*U factorization of A. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -k, the k-th argument had an illegal value */ +/* > 0: if INFO = k, the leading minor of order k is not */ +/* positive definite; if k < N, the factorization could not */ +/* be completed, while if k = N, the factorization was */ +/* completed, but D(N) <= 0. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --e; + --d__; + + /* Function Body */ + *info = 0; + if (*n < 0) { + *info = -1; + i__1 = -(*info); + _starpu_xerbla_("DPTTRF", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Compute the L*D*L' (or U'*D*U) factorization of A. */ + + i4 = (*n - 1) % 4; + i__1 = i4; + for (i__ = 1; i__ <= i__1; ++i__) { + if (d__[i__] <= 0.) { + *info = i__; + goto L30; + } + ei = e[i__]; + e[i__] = ei / d__[i__]; + d__[i__ + 1] -= e[i__] * ei; +/* L10: */ + } + + i__1 = *n - 4; + for (i__ = i4 + 1; i__ <= i__1; i__ += 4) { + +/* Drop out of the loop if d(i) <= 0: the matrix is not positive */ +/* definite. */ + + if (d__[i__] <= 0.) { + *info = i__; + goto L30; + } + +/* Solve for e(i) and d(i+1). */ + + ei = e[i__]; + e[i__] = ei / d__[i__]; + d__[i__ + 1] -= e[i__] * ei; + + if (d__[i__ + 1] <= 0.) { + *info = i__ + 1; + goto L30; + } + +/* Solve for e(i+1) and d(i+2). */ + + ei = e[i__ + 1]; + e[i__ + 1] = ei / d__[i__ + 1]; + d__[i__ + 2] -= e[i__ + 1] * ei; + + if (d__[i__ + 2] <= 0.) { + *info = i__ + 2; + goto L30; + } + +/* Solve for e(i+2) and d(i+3). */ + + ei = e[i__ + 2]; + e[i__ + 2] = ei / d__[i__ + 2]; + d__[i__ + 3] -= e[i__ + 2] * ei; + + if (d__[i__ + 3] <= 0.) { + *info = i__ + 3; + goto L30; + } + +/* Solve for e(i+3) and d(i+4). */ + + ei = e[i__ + 3]; + e[i__ + 3] = ei / d__[i__ + 3]; + d__[i__ + 4] -= e[i__ + 3] * ei; +/* L20: */ + } + +/* Check d(n) for positive definiteness. */ + + if (d__[*n] <= 0.) { + *info = *n; + } + +L30: + return 0; + +/* End of DPTTRF */ + +} /* _starpu_dpttrf_ */ diff --git a/min-dgels/base/SRC/dpttrs.c b/min-dgels/base/SRC/dpttrs.c new file mode 100644 index 0000000..9c7842e --- /dev/null +++ b/min-dgels/base/SRC/dpttrs.c @@ -0,0 +1,156 @@ +/* dpttrs.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; + +/* Subroutine */ int _starpu_dpttrs_(integer *n, integer *nrhs, doublereal *d__, + doublereal *e, doublereal *b, integer *ldb, integer *info) +{ + /* System generated locals */ + integer b_dim1, b_offset, i__1, i__2, i__3; + + /* Local variables */ + integer j, jb, nb; + extern /* Subroutine */ int _starpu_dptts2_(integer *, integer *, doublereal *, + doublereal *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); + extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *); + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DPTTRS solves a tridiagonal system of the form */ +/* A * X = B */ +/* using the L*D*L' factorization of A computed by DPTTRF. D is a */ +/* diagonal matrix specified in the vector D, L is a unit bidiagonal */ +/* matrix whose subdiagonal is specified in the vector E, and X and B */ +/* are N by NRHS matrices. */ + +/* Arguments */ +/* ========= */ + +/* N (input) INTEGER */ +/* The order of the tridiagonal matrix A. N >= 0. */ + +/* NRHS (input) INTEGER */ +/* The number of right hand sides, i.e., the number of columns */ +/* of the matrix B. NRHS >= 0. */ + +/* D (input) DOUBLE PRECISION array, dimension (N) */ +/* The n diagonal elements of the diagonal matrix D from the */ +/* L*D*L' factorization of A. */ + +/* E (input) DOUBLE PRECISION array, dimension (N-1) */ +/* The (n-1) subdiagonal elements of the unit bidiagonal factor */ +/* L from the L*D*L' factorization of A. E can also be regarded */ +/* as the superdiagonal of the unit bidiagonal factor U from the */ +/* factorization A = U'*D*U. */ + +/* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* On entry, the right hand side vectors B for the system of */ +/* linear equations. */ +/* On exit, the solution vectors, X. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the array B. LDB >= max(1,N). */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -k, the k-th argument had an illegal value */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments. */ + + /* Parameter adjustments */ + --d__; + --e; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + *info = 0; + if (*n < 0) { + *info = -1; + } else if (*nrhs < 0) { + *info = -2; + } else if (*ldb < max(1,*n)) { + *info = -6; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DPTTRS", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0 || *nrhs == 0) { + return 0; + } + +/* Determine the number of right-hand sides to solve at a time. */ + + if (*nrhs == 1) { + nb = 1; + } else { +/* Computing MAX */ + i__1 = 1, i__2 = _starpu_ilaenv_(&c__1, "DPTTRS", " ", n, nrhs, &c_n1, &c_n1); + nb = max(i__1,i__2); + } + + if (nb >= *nrhs) { + _starpu_dptts2_(n, nrhs, &d__[1], &e[1], &b[b_offset], ldb); + } else { + i__1 = *nrhs; + i__2 = nb; + for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { +/* Computing MIN */ + i__3 = *nrhs - j + 1; + jb = min(i__3,nb); + _starpu_dptts2_(n, &jb, &d__[1], &e[1], &b[j * b_dim1 + 1], ldb); +/* L10: */ + } + } + + return 0; + +/* End of DPTTRS */ + +} /* _starpu_dpttrs_ */ diff --git a/min-dgels/base/SRC/dptts2.c b/min-dgels/base/SRC/dptts2.c new file mode 100644 index 0000000..3f2dc73 --- /dev/null +++ b/min-dgels/base/SRC/dptts2.c @@ -0,0 +1,131 @@ +/* dptts2.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dptts2_(integer *n, integer *nrhs, doublereal *d__, + doublereal *e, doublereal *b, integer *ldb) +{ + /* System generated locals */ + integer b_dim1, b_offset, i__1, i__2; + doublereal d__1; + + /* Local variables */ + integer i__, j; + extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, + integer *); + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DPTTS2 solves a tridiagonal system of the form */ +/* A * X = B */ +/* using the L*D*L' factorization of A computed by DPTTRF. D is a */ +/* diagonal matrix specified in the vector D, L is a unit bidiagonal */ +/* matrix whose subdiagonal is specified in the vector E, and X and B */ +/* are N by NRHS matrices. */ + +/* Arguments */ +/* ========= */ + +/* N (input) INTEGER */ +/* The order of the tridiagonal matrix A. N >= 0. */ + +/* NRHS (input) INTEGER */ +/* The number of right hand sides, i.e., the number of columns */ +/* of the matrix B. NRHS >= 0. */ + +/* D (input) DOUBLE PRECISION array, dimension (N) */ +/* The n diagonal elements of the diagonal matrix D from the */ +/* L*D*L' factorization of A. */ + +/* E (input) DOUBLE PRECISION array, dimension (N-1) */ +/* The (n-1) subdiagonal elements of the unit bidiagonal factor */ +/* L from the L*D*L' factorization of A. E can also be regarded */ +/* as the superdiagonal of the unit bidiagonal factor U from the */ +/* factorization A = U'*D*U. */ + +/* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* On entry, the right hand side vectors B for the system of */ +/* linear equations. */ +/* On exit, the solution vectors, X. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the array B. LDB >= max(1,N). */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Quick return if possible */ + + /* Parameter adjustments */ + --d__; + --e; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + if (*n <= 1) { + if (*n == 1) { + d__1 = 1. / d__[1]; + _starpu_dscal_(nrhs, &d__1, &b[b_offset], ldb); + } + return 0; + } + +/* Solve A * X = B using the factorization A = L*D*L', */ +/* overwriting each right hand side vector with its solution. */ + + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + +/* Solve L * x = b. */ + + i__2 = *n; + for (i__ = 2; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] -= b[i__ - 1 + j * b_dim1] * e[i__ - 1]; +/* L10: */ + } + +/* Solve D * L' * x = b. */ + + b[*n + j * b_dim1] /= d__[*n]; + for (i__ = *n - 1; i__ >= 1; --i__) { + b[i__ + j * b_dim1] = b[i__ + j * b_dim1] / d__[i__] - b[i__ + 1 + + j * b_dim1] * e[i__]; +/* L20: */ + } +/* L30: */ + } + + return 0; + +/* End of DPTTS2 */ + +} /* _starpu_dptts2_ */ diff --git a/min-dgels/base/SRC/drscl.c b/min-dgels/base/SRC/drscl.c new file mode 100644 index 0000000..88fcf6f --- /dev/null +++ b/min-dgels/base/SRC/drscl.c @@ -0,0 +1,134 @@ +/* drscl.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_drscl_(integer *n, doublereal *sa, doublereal *sx, + integer *incx) +{ + doublereal mul, cden; + logical done; + doublereal cnum, cden1, cnum1; + extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, + integer *), _starpu_dlabad_(doublereal *, doublereal *); + extern doublereal _starpu_dlamch_(char *); + doublereal bignum, smlnum; + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DRSCL multiplies an n-element real vector x by the real scalar 1/a. */ +/* This is done without overflow or underflow as long as */ +/* the final result x/a does not overflow or underflow. */ + +/* Arguments */ +/* ========= */ + +/* N (input) INTEGER */ +/* The number of components of the vector x. */ + +/* SA (input) DOUBLE PRECISION */ +/* The scalar a which is used to divide each component of x. */ +/* SA must be >= 0, or the subroutine will divide by zero. */ + +/* SX (input/output) DOUBLE PRECISION array, dimension */ +/* (1+(N-1)*abs(INCX)) */ +/* The n-element vector x. */ + +/* INCX (input) INTEGER */ +/* The increment between successive values of the vector SX. */ +/* > 0: SX(1) = X(1) and SX(1+(i-1)*INCX) = x(i), 1< i<= n */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Quick return if possible */ + + /* Parameter adjustments */ + --sx; + + /* Function Body */ + if (*n <= 0) { + return 0; + } + +/* Get machine parameters */ + + smlnum = _starpu_dlamch_("S"); + bignum = 1. / smlnum; + _starpu_dlabad_(&smlnum, &bignum); + +/* Initialize the denominator to SA and the numerator to 1. */ + + cden = *sa; + cnum = 1.; + +L10: + cden1 = cden * smlnum; + cnum1 = cnum / bignum; + if (abs(cden1) > abs(cnum) && cnum != 0.) { + +/* Pre-multiply X by SMLNUM if CDEN is large compared to CNUM. */ + + mul = smlnum; + done = FALSE_; + cden = cden1; + } else if (abs(cnum1) > abs(cden)) { + +/* Pre-multiply X by BIGNUM if CDEN is small compared to CNUM. */ + + mul = bignum; + done = FALSE_; + cnum = cnum1; + } else { + +/* Multiply X by CNUM / CDEN and return. */ + + mul = cnum / cden; + done = TRUE_; + } + +/* Scale the vector X by MUL */ + + _starpu_dscal_(n, &mul, &sx[1], incx); + + if (! done) { + goto L10; + } + + return 0; + +/* End of DRSCL */ + +} /* _starpu_drscl_ */ diff --git a/min-dgels/base/SRC/dsbev.c b/min-dgels/base/SRC/dsbev.c new file mode 100644 index 0000000..c241a5c --- /dev/null +++ b/min-dgels/base/SRC/dsbev.c @@ -0,0 +1,268 @@ +/* dsbev.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static doublereal c_b11 = 1.; +static integer c__1 = 1; + +/* Subroutine */ int _starpu_dsbev_(char *jobz, char *uplo, integer *n, integer *kd, + doublereal *ab, integer *ldab, doublereal *w, doublereal *z__, + integer *ldz, doublereal *work, integer *info) +{ + /* System generated locals */ + integer ab_dim1, ab_offset, z_dim1, z_offset, i__1; + doublereal d__1; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + doublereal eps; + integer inde; + doublereal anrm; + integer imax; + doublereal rmin, rmax; + extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, + integer *); + doublereal sigma; + extern logical _starpu_lsame_(char *, char *); + integer iinfo; + logical lower, wantz; + extern doublereal _starpu_dlamch_(char *); + integer iscale; + extern /* Subroutine */ int _starpu_dlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *); + extern doublereal _starpu_dlansb_(char *, char *, integer *, integer *, + doublereal *, integer *, doublereal *); + doublereal safmin; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + doublereal bignum; + extern /* Subroutine */ int _starpu_dsbtrd_(char *, char *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, doublereal *, + integer *, doublereal *, integer *), _starpu_dsterf_( + integer *, doublereal *, doublereal *, integer *); + integer indwrk; + extern /* Subroutine */ int _starpu_dsteqr_(char *, integer *, doublereal *, + doublereal *, doublereal *, integer *, doublereal *, integer *); + doublereal smlnum; + + +/* -- LAPACK driver routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DSBEV computes all the eigenvalues and, optionally, eigenvectors of */ +/* a real symmetric band matrix A. */ + +/* Arguments */ +/* ========= */ + +/* JOBZ (input) CHARACTER*1 */ +/* = 'N': Compute eigenvalues only; */ +/* = 'V': Compute eigenvalues and eigenvectors. */ + +/* UPLO (input) CHARACTER*1 */ +/* = 'U': Upper triangle of A is stored; */ +/* = 'L': Lower triangle of A is stored. */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* KD (input) INTEGER */ +/* The number of superdiagonals of the matrix A if UPLO = 'U', */ +/* or the number of subdiagonals if UPLO = 'L'. KD >= 0. */ + +/* AB (input/output) DOUBLE PRECISION array, dimension (LDAB, N) */ +/* On entry, the upper or lower triangle of the symmetric band */ +/* matrix A, stored in the first KD+1 rows of the array. The */ +/* j-th column of A is stored in the j-th column of the array AB */ +/* as follows: */ +/* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; */ +/* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). */ + +/* On exit, AB is overwritten by values generated during the */ +/* reduction to tridiagonal form. If UPLO = 'U', the first */ +/* superdiagonal and the diagonal of the tridiagonal matrix T */ +/* are returned in rows KD and KD+1 of AB, and if UPLO = 'L', */ +/* the diagonal and first subdiagonal of T are returned in the */ +/* first two rows of AB. */ + +/* LDAB (input) INTEGER */ +/* The leading dimension of the array AB. LDAB >= KD + 1. */ + +/* W (output) DOUBLE PRECISION array, dimension (N) */ +/* If INFO = 0, the eigenvalues in ascending order. */ + +/* Z (output) DOUBLE PRECISION array, dimension (LDZ, N) */ +/* If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal */ +/* eigenvectors of the matrix A, with the i-th column of Z */ +/* holding the eigenvector associated with W(i). */ +/* If JOBZ = 'N', then Z is not referenced. */ + +/* LDZ (input) INTEGER */ +/* The leading dimension of the array Z. LDZ >= 1, and if */ +/* JOBZ = 'V', LDZ >= max(1,N). */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (max(1,3*N-2)) */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > 0: if INFO = i, the algorithm failed to converge; i */ +/* off-diagonal elements of an intermediate tridiagonal */ +/* form did not converge to zero. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + ab_dim1 = *ldab; + ab_offset = 1 + ab_dim1; + ab -= ab_offset; + --w; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --work; + + /* Function Body */ + wantz = _starpu_lsame_(jobz, "V"); + lower = _starpu_lsame_(uplo, "L"); + + *info = 0; + if (! (wantz || _starpu_lsame_(jobz, "N"))) { + *info = -1; + } else if (! (lower || _starpu_lsame_(uplo, "U"))) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*kd < 0) { + *info = -4; + } else if (*ldab < *kd + 1) { + *info = -6; + } else if (*ldz < 1 || wantz && *ldz < *n) { + *info = -9; + } + + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DSBEV ", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + if (*n == 1) { + if (lower) { + w[1] = ab[ab_dim1 + 1]; + } else { + w[1] = ab[*kd + 1 + ab_dim1]; + } + if (wantz) { + z__[z_dim1 + 1] = 1.; + } + return 0; + } + +/* Get machine constants. */ + + safmin = _starpu_dlamch_("Safe minimum"); + eps = _starpu_dlamch_("Precision"); + smlnum = safmin / eps; + bignum = 1. / smlnum; + rmin = sqrt(smlnum); + rmax = sqrt(bignum); + +/* Scale matrix to allowable range, if necessary. */ + + anrm = _starpu_dlansb_("M", uplo, n, kd, &ab[ab_offset], ldab, &work[1]); + iscale = 0; + if (anrm > 0. && anrm < rmin) { + iscale = 1; + sigma = rmin / anrm; + } else if (anrm > rmax) { + iscale = 1; + sigma = rmax / anrm; + } + if (iscale == 1) { + if (lower) { + _starpu_dlascl_("B", kd, kd, &c_b11, &sigma, n, n, &ab[ab_offset], ldab, + info); + } else { + _starpu_dlascl_("Q", kd, kd, &c_b11, &sigma, n, n, &ab[ab_offset], ldab, + info); + } + } + +/* Call DSBTRD to reduce symmetric band matrix to tridiagonal form. */ + + inde = 1; + indwrk = inde + *n; + _starpu_dsbtrd_(jobz, uplo, n, kd, &ab[ab_offset], ldab, &w[1], &work[inde], &z__[ + z_offset], ldz, &work[indwrk], &iinfo); + +/* For eigenvalues only, call DSTERF. For eigenvectors, call SSTEQR. */ + + if (! wantz) { + _starpu_dsterf_(n, &w[1], &work[inde], info); + } else { + _starpu_dsteqr_(jobz, n, &w[1], &work[inde], &z__[z_offset], ldz, &work[ + indwrk], info); + } + +/* If matrix was scaled, then rescale eigenvalues appropriately. */ + + if (iscale == 1) { + if (*info == 0) { + imax = *n; + } else { + imax = *info - 1; + } + d__1 = 1. / sigma; + _starpu_dscal_(&imax, &d__1, &w[1], &c__1); + } + + return 0; + +/* End of DSBEV */ + +} /* _starpu_dsbev_ */ diff --git a/min-dgels/base/SRC/dsbevd.c b/min-dgels/base/SRC/dsbevd.c new file mode 100644 index 0000000..99ec9b4 --- /dev/null +++ b/min-dgels/base/SRC/dsbevd.c @@ -0,0 +1,338 @@ +/* dsbevd.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static doublereal c_b11 = 1.; +static doublereal c_b18 = 0.; +static integer c__1 = 1; + +/* Subroutine */ int _starpu_dsbevd_(char *jobz, char *uplo, integer *n, integer *kd, + doublereal *ab, integer *ldab, doublereal *w, doublereal *z__, + integer *ldz, doublereal *work, integer *lwork, integer *iwork, + integer *liwork, integer *info) +{ + /* System generated locals */ + integer ab_dim1, ab_offset, z_dim1, z_offset, i__1; + doublereal d__1; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + doublereal eps; + integer inde; + doublereal anrm, rmin, rmax; + extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, + integer *), _starpu_dgemm_(char *, char *, integer *, integer *, integer * +, doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *); + doublereal sigma; + extern logical _starpu_lsame_(char *, char *); + integer iinfo, lwmin; + logical lower, wantz; + integer indwk2, llwrk2; + extern doublereal _starpu_dlamch_(char *); + integer iscale; + extern /* Subroutine */ int _starpu_dlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *); + extern doublereal _starpu_dlansb_(char *, char *, integer *, integer *, + doublereal *, integer *, doublereal *); + extern /* Subroutine */ int _starpu_dstedc_(char *, integer *, doublereal *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + integer *, integer *, integer *), _starpu_dlacpy_(char *, integer + *, integer *, doublereal *, integer *, doublereal *, integer *); + doublereal safmin; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + doublereal bignum; + extern /* Subroutine */ int _starpu_dsbtrd_(char *, char *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, doublereal *, + integer *, doublereal *, integer *), _starpu_dsterf_( + integer *, doublereal *, doublereal *, integer *); + integer indwrk, liwmin; + doublereal smlnum; + logical lquery; + + +/* -- LAPACK driver routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DSBEVD computes all the eigenvalues and, optionally, eigenvectors of */ +/* a real symmetric band matrix A. If eigenvectors are desired, it uses */ +/* a divide and conquer algorithm. */ + +/* The divide and conquer algorithm makes very mild assumptions about */ +/* floating point arithmetic. It will work on machines with a guard */ +/* digit in add/subtract, or on those binary machines without guard */ +/* digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or */ +/* Cray-2. It could conceivably fail on hexadecimal or decimal machines */ +/* without guard digits, but we know of none. */ + +/* Arguments */ +/* ========= */ + +/* JOBZ (input) CHARACTER*1 */ +/* = 'N': Compute eigenvalues only; */ +/* = 'V': Compute eigenvalues and eigenvectors. */ + +/* UPLO (input) CHARACTER*1 */ +/* = 'U': Upper triangle of A is stored; */ +/* = 'L': Lower triangle of A is stored. */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* KD (input) INTEGER */ +/* The number of superdiagonals of the matrix A if UPLO = 'U', */ +/* or the number of subdiagonals if UPLO = 'L'. KD >= 0. */ + +/* AB (input/output) DOUBLE PRECISION array, dimension (LDAB, N) */ +/* On entry, the upper or lower triangle of the symmetric band */ +/* matrix A, stored in the first KD+1 rows of the array. The */ +/* j-th column of A is stored in the j-th column of the array AB */ +/* as follows: */ +/* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; */ +/* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). */ + +/* On exit, AB is overwritten by values generated during the */ +/* reduction to tridiagonal form. If UPLO = 'U', the first */ +/* superdiagonal and the diagonal of the tridiagonal matrix T */ +/* are returned in rows KD and KD+1 of AB, and if UPLO = 'L', */ +/* the diagonal and first subdiagonal of T are returned in the */ +/* first two rows of AB. */ + +/* LDAB (input) INTEGER */ +/* The leading dimension of the array AB. LDAB >= KD + 1. */ + +/* W (output) DOUBLE PRECISION array, dimension (N) */ +/* If INFO = 0, the eigenvalues in ascending order. */ + +/* Z (output) DOUBLE PRECISION array, dimension (LDZ, N) */ +/* If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal */ +/* eigenvectors of the matrix A, with the i-th column of Z */ +/* holding the eigenvector associated with W(i). */ +/* If JOBZ = 'N', then Z is not referenced. */ + +/* LDZ (input) INTEGER */ +/* The leading dimension of the array Z. LDZ >= 1, and if */ +/* JOBZ = 'V', LDZ >= max(1,N). */ + +/* WORK (workspace/output) DOUBLE PRECISION array, */ +/* dimension (LWORK) */ +/* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ + +/* LWORK (input) INTEGER */ +/* The dimension of the array WORK. */ +/* IF N <= 1, LWORK must be at least 1. */ +/* If JOBZ = 'N' and N > 2, LWORK must be at least 2*N. */ +/* If JOBZ = 'V' and N > 2, LWORK must be at least */ +/* ( 1 + 5*N + 2*N**2 ). */ + +/* If LWORK = -1, then a workspace query is assumed; the routine */ +/* only calculates the optimal sizes of the WORK and IWORK */ +/* arrays, returns these values as the first entries of the WORK */ +/* and IWORK arrays, and no error message related to LWORK or */ +/* LIWORK is issued by XERBLA. */ + +/* IWORK (workspace/output) INTEGER array, dimension (MAX(1,LIWORK)) */ +/* On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. */ + +/* LIWORK (input) INTEGER */ +/* The dimension of the array LIWORK. */ +/* If JOBZ = 'N' or N <= 1, LIWORK must be at least 1. */ +/* If JOBZ = 'V' and N > 2, LIWORK must be at least 3 + 5*N. */ + +/* If LIWORK = -1, then a workspace query is assumed; the */ +/* routine only calculates the optimal sizes of the WORK and */ +/* IWORK arrays, returns these values as the first entries of */ +/* the WORK and IWORK arrays, and no error message related to */ +/* LWORK or LIWORK is issued by XERBLA. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > 0: if INFO = i, the algorithm failed to converge; i */ +/* off-diagonal elements of an intermediate tridiagonal */ +/* form did not converge to zero. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + ab_dim1 = *ldab; + ab_offset = 1 + ab_dim1; + ab -= ab_offset; + --w; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --work; + --iwork; + + /* Function Body */ + wantz = _starpu_lsame_(jobz, "V"); + lower = _starpu_lsame_(uplo, "L"); + lquery = *lwork == -1 || *liwork == -1; + + *info = 0; + if (*n <= 1) { + liwmin = 1; + lwmin = 1; + } else { + if (wantz) { + liwmin = *n * 5 + 3; +/* Computing 2nd power */ + i__1 = *n; + lwmin = *n * 5 + 1 + (i__1 * i__1 << 1); + } else { + liwmin = 1; + lwmin = *n << 1; + } + } + if (! (wantz || _starpu_lsame_(jobz, "N"))) { + *info = -1; + } else if (! (lower || _starpu_lsame_(uplo, "U"))) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*kd < 0) { + *info = -4; + } else if (*ldab < *kd + 1) { + *info = -6; + } else if (*ldz < 1 || wantz && *ldz < *n) { + *info = -9; + } + + if (*info == 0) { + work[1] = (doublereal) lwmin; + iwork[1] = liwmin; + + if (*lwork < lwmin && ! lquery) { + *info = -11; + } else if (*liwork < liwmin && ! lquery) { + *info = -13; + } + } + + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DSBEVD", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + if (*n == 1) { + w[1] = ab[ab_dim1 + 1]; + if (wantz) { + z__[z_dim1 + 1] = 1.; + } + return 0; + } + +/* Get machine constants. */ + + safmin = _starpu_dlamch_("Safe minimum"); + eps = _starpu_dlamch_("Precision"); + smlnum = safmin / eps; + bignum = 1. / smlnum; + rmin = sqrt(smlnum); + rmax = sqrt(bignum); + +/* Scale matrix to allowable range, if necessary. */ + + anrm = _starpu_dlansb_("M", uplo, n, kd, &ab[ab_offset], ldab, &work[1]); + iscale = 0; + if (anrm > 0. && anrm < rmin) { + iscale = 1; + sigma = rmin / anrm; + } else if (anrm > rmax) { + iscale = 1; + sigma = rmax / anrm; + } + if (iscale == 1) { + if (lower) { + _starpu_dlascl_("B", kd, kd, &c_b11, &sigma, n, n, &ab[ab_offset], ldab, + info); + } else { + _starpu_dlascl_("Q", kd, kd, &c_b11, &sigma, n, n, &ab[ab_offset], ldab, + info); + } + } + +/* Call DSBTRD to reduce symmetric band matrix to tridiagonal form. */ + + inde = 1; + indwrk = inde + *n; + indwk2 = indwrk + *n * *n; + llwrk2 = *lwork - indwk2 + 1; + _starpu_dsbtrd_(jobz, uplo, n, kd, &ab[ab_offset], ldab, &w[1], &work[inde], &z__[ + z_offset], ldz, &work[indwrk], &iinfo); + +/* For eigenvalues only, call DSTERF. For eigenvectors, call SSTEDC. */ + + if (! wantz) { + _starpu_dsterf_(n, &w[1], &work[inde], info); + } else { + _starpu_dstedc_("I", n, &w[1], &work[inde], &work[indwrk], n, &work[indwk2], & + llwrk2, &iwork[1], liwork, info); + _starpu_dgemm_("N", "N", n, n, n, &c_b11, &z__[z_offset], ldz, &work[indwrk], + n, &c_b18, &work[indwk2], n); + _starpu_dlacpy_("A", n, n, &work[indwk2], n, &z__[z_offset], ldz); + } + +/* If matrix was scaled, then rescale eigenvalues appropriately. */ + + if (iscale == 1) { + d__1 = 1. / sigma; + _starpu_dscal_(n, &d__1, &w[1], &c__1); + } + + work[1] = (doublereal) lwmin; + iwork[1] = liwmin; + return 0; + +/* End of DSBEVD */ + +} /* _starpu_dsbevd_ */ diff --git a/min-dgels/base/SRC/dsbevx.c b/min-dgels/base/SRC/dsbevx.c new file mode 100644 index 0000000..c081250 --- /dev/null +++ b/min-dgels/base/SRC/dsbevx.c @@ -0,0 +1,520 @@ +/* dsbevx.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static doublereal c_b14 = 1.; +static integer c__1 = 1; +static doublereal c_b34 = 0.; + +/* Subroutine */ int _starpu_dsbevx_(char *jobz, char *range, char *uplo, integer *n, + integer *kd, doublereal *ab, integer *ldab, doublereal *q, integer * + ldq, doublereal *vl, doublereal *vu, integer *il, integer *iu, + doublereal *abstol, integer *m, doublereal *w, doublereal *z__, + integer *ldz, doublereal *work, integer *iwork, integer *ifail, + integer *info) +{ + /* System generated locals */ + integer ab_dim1, ab_offset, q_dim1, q_offset, z_dim1, z_offset, i__1, + i__2; + doublereal d__1, d__2; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + integer i__, j, jj; + doublereal eps, vll, vuu, tmp1; + integer indd, inde; + doublereal anrm; + integer imax; + doublereal rmin, rmax; + logical test; + integer itmp1, indee; + extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, + integer *); + doublereal sigma; + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int _starpu_dgemv_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *); + integer iinfo; + char order[1]; + extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *), _starpu_dswap_(integer *, doublereal *, integer + *, doublereal *, integer *); + logical lower, wantz; + extern doublereal _starpu_dlamch_(char *); + logical alleig, indeig; + integer iscale, indibl; + extern /* Subroutine */ int _starpu_dlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *); + extern doublereal _starpu_dlansb_(char *, char *, integer *, integer *, + doublereal *, integer *, doublereal *); + logical valeig; + extern /* Subroutine */ int _starpu_dlacpy_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *); + doublereal safmin; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + doublereal abstll, bignum; + extern /* Subroutine */ int _starpu_dsbtrd_(char *, char *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, doublereal *, + integer *, doublereal *, integer *); + integer indisp; + extern /* Subroutine */ int _starpu_dstein_(integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *, doublereal *, + integer *, doublereal *, integer *, integer *, integer *), + _starpu_dsterf_(integer *, doublereal *, doublereal *, integer *); + integer indiwo; + extern /* Subroutine */ int _starpu_dstebz_(char *, char *, integer *, doublereal + *, doublereal *, integer *, integer *, doublereal *, doublereal *, + doublereal *, integer *, integer *, doublereal *, integer *, + integer *, doublereal *, integer *, integer *); + integer indwrk; + extern /* Subroutine */ int _starpu_dsteqr_(char *, integer *, doublereal *, + doublereal *, doublereal *, integer *, doublereal *, integer *); + integer nsplit; + doublereal smlnum; + + +/* -- LAPACK driver routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DSBEVX computes selected eigenvalues and, optionally, eigenvectors */ +/* of a real symmetric band matrix A. Eigenvalues and eigenvectors can */ +/* be selected by specifying either a range of values or a range of */ +/* indices for the desired eigenvalues. */ + +/* Arguments */ +/* ========= */ + +/* JOBZ (input) CHARACTER*1 */ +/* = 'N': Compute eigenvalues only; */ +/* = 'V': Compute eigenvalues and eigenvectors. */ + +/* RANGE (input) CHARACTER*1 */ +/* = 'A': all eigenvalues will be found; */ +/* = 'V': all eigenvalues in the half-open interval (VL,VU] */ +/* will be found; */ +/* = 'I': the IL-th through IU-th eigenvalues will be found. */ + +/* UPLO (input) CHARACTER*1 */ +/* = 'U': Upper triangle of A is stored; */ +/* = 'L': Lower triangle of A is stored. */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* KD (input) INTEGER */ +/* The number of superdiagonals of the matrix A if UPLO = 'U', */ +/* or the number of subdiagonals if UPLO = 'L'. KD >= 0. */ + +/* AB (input/output) DOUBLE PRECISION array, dimension (LDAB, N) */ +/* On entry, the upper or lower triangle of the symmetric band */ +/* matrix A, stored in the first KD+1 rows of the array. The */ +/* j-th column of A is stored in the j-th column of the array AB */ +/* as follows: */ +/* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; */ +/* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). */ + +/* On exit, AB is overwritten by values generated during the */ +/* reduction to tridiagonal form. If UPLO = 'U', the first */ +/* superdiagonal and the diagonal of the tridiagonal matrix T */ +/* are returned in rows KD and KD+1 of AB, and if UPLO = 'L', */ +/* the diagonal and first subdiagonal of T are returned in the */ +/* first two rows of AB. */ + +/* LDAB (input) INTEGER */ +/* The leading dimension of the array AB. LDAB >= KD + 1. */ + +/* Q (output) DOUBLE PRECISION array, dimension (LDQ, N) */ +/* If JOBZ = 'V', the N-by-N orthogonal matrix used in the */ +/* reduction to tridiagonal form. */ +/* If JOBZ = 'N', the array Q is not referenced. */ + +/* LDQ (input) INTEGER */ +/* The leading dimension of the array Q. If JOBZ = 'V', then */ +/* LDQ >= max(1,N). */ + +/* VL (input) DOUBLE PRECISION */ +/* VU (input) DOUBLE PRECISION */ +/* If RANGE='V', the lower and upper bounds of the interval to */ +/* be searched for eigenvalues. VL < VU. */ +/* Not referenced if RANGE = 'A' or 'I'. */ + +/* IL (input) INTEGER */ +/* IU (input) INTEGER */ +/* If RANGE='I', the indices (in ascending order) of the */ +/* smallest and largest eigenvalues to be returned. */ +/* 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. */ +/* Not referenced if RANGE = 'A' or 'V'. */ + +/* ABSTOL (input) DOUBLE PRECISION */ +/* The absolute error tolerance for the eigenvalues. */ +/* An approximate eigenvalue is accepted as converged */ +/* when it is determined to lie in an interval [a,b] */ +/* of width less than or equal to */ + +/* ABSTOL + EPS * max( |a|,|b| ) , */ + +/* where EPS is the machine precision. If ABSTOL is less than */ +/* or equal to zero, then EPS*|T| will be used in its place, */ +/* where |T| is the 1-norm of the tridiagonal matrix obtained */ +/* by reducing AB to tridiagonal form. */ + +/* Eigenvalues will be computed most accurately when ABSTOL is */ +/* set to twice the underflow threshold 2*DLAMCH('S'), not zero. */ +/* If this routine returns with INFO>0, indicating that some */ +/* eigenvectors did not converge, try setting ABSTOL to */ +/* 2*DLAMCH('S'). */ + +/* See "Computing Small Singular Values of Bidiagonal Matrices */ +/* with Guaranteed High Relative Accuracy," by Demmel and */ +/* Kahan, LAPACK Working Note #3. */ + +/* M (output) INTEGER */ +/* The total number of eigenvalues found. 0 <= M <= N. */ +/* If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. */ + +/* W (output) DOUBLE PRECISION array, dimension (N) */ +/* The first M elements contain the selected eigenvalues in */ +/* ascending order. */ + +/* Z (output) DOUBLE PRECISION array, dimension (LDZ, max(1,M)) */ +/* If JOBZ = 'V', then if INFO = 0, the first M columns of Z */ +/* contain the orthonormal eigenvectors of the matrix A */ +/* corresponding to the selected eigenvalues, with the i-th */ +/* column of Z holding the eigenvector associated with W(i). */ +/* If an eigenvector fails to converge, then that column of Z */ +/* contains the latest approximation to the eigenvector, and the */ +/* index of the eigenvector is returned in IFAIL. */ +/* If JOBZ = 'N', then Z is not referenced. */ +/* Note: the user must ensure that at least max(1,M) columns are */ +/* supplied in the array Z; if RANGE = 'V', the exact value of M */ +/* is not known in advance and an upper bound must be used. */ + +/* LDZ (input) INTEGER */ +/* The leading dimension of the array Z. LDZ >= 1, and if */ +/* JOBZ = 'V', LDZ >= max(1,N). */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (7*N) */ + +/* IWORK (workspace) INTEGER array, dimension (5*N) */ + +/* IFAIL (output) INTEGER array, dimension (N) */ +/* If JOBZ = 'V', then if INFO = 0, the first M elements of */ +/* IFAIL are zero. If INFO > 0, then IFAIL contains the */ +/* indices of the eigenvectors that failed to converge. */ +/* If JOBZ = 'N', then IFAIL is not referenced. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit. */ +/* < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > 0: if INFO = i, then i eigenvectors failed to converge. */ +/* Their indices are stored in array IFAIL. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + ab_dim1 = *ldab; + ab_offset = 1 + ab_dim1; + ab -= ab_offset; + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + --w; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --work; + --iwork; + --ifail; + + /* Function Body */ + wantz = _starpu_lsame_(jobz, "V"); + alleig = _starpu_lsame_(range, "A"); + valeig = _starpu_lsame_(range, "V"); + indeig = _starpu_lsame_(range, "I"); + lower = _starpu_lsame_(uplo, "L"); + + *info = 0; + if (! (wantz || _starpu_lsame_(jobz, "N"))) { + *info = -1; + } else if (! (alleig || valeig || indeig)) { + *info = -2; + } else if (! (lower || _starpu_lsame_(uplo, "U"))) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*kd < 0) { + *info = -5; + } else if (*ldab < *kd + 1) { + *info = -7; + } else if (wantz && *ldq < max(1,*n)) { + *info = -9; + } else { + if (valeig) { + if (*n > 0 && *vu <= *vl) { + *info = -11; + } + } else if (indeig) { + if (*il < 1 || *il > max(1,*n)) { + *info = -12; + } else if (*iu < min(*n,*il) || *iu > *n) { + *info = -13; + } + } + } + if (*info == 0) { + if (*ldz < 1 || wantz && *ldz < *n) { + *info = -18; + } + } + + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DSBEVX", &i__1); + return 0; + } + +/* Quick return if possible */ + + *m = 0; + if (*n == 0) { + return 0; + } + + if (*n == 1) { + *m = 1; + if (lower) { + tmp1 = ab[ab_dim1 + 1]; + } else { + tmp1 = ab[*kd + 1 + ab_dim1]; + } + if (valeig) { + if (! (*vl < tmp1 && *vu >= tmp1)) { + *m = 0; + } + } + if (*m == 1) { + w[1] = tmp1; + if (wantz) { + z__[z_dim1 + 1] = 1.; + } + } + return 0; + } + +/* Get machine constants. */ + + safmin = _starpu_dlamch_("Safe minimum"); + eps = _starpu_dlamch_("Precision"); + smlnum = safmin / eps; + bignum = 1. / smlnum; + rmin = sqrt(smlnum); +/* Computing MIN */ + d__1 = sqrt(bignum), d__2 = 1. / sqrt(sqrt(safmin)); + rmax = min(d__1,d__2); + +/* Scale matrix to allowable range, if necessary. */ + + iscale = 0; + abstll = *abstol; + if (valeig) { + vll = *vl; + vuu = *vu; + } else { + vll = 0.; + vuu = 0.; + } + anrm = _starpu_dlansb_("M", uplo, n, kd, &ab[ab_offset], ldab, &work[1]); + if (anrm > 0. && anrm < rmin) { + iscale = 1; + sigma = rmin / anrm; + } else if (anrm > rmax) { + iscale = 1; + sigma = rmax / anrm; + } + if (iscale == 1) { + if (lower) { + _starpu_dlascl_("B", kd, kd, &c_b14, &sigma, n, n, &ab[ab_offset], ldab, + info); + } else { + _starpu_dlascl_("Q", kd, kd, &c_b14, &sigma, n, n, &ab[ab_offset], ldab, + info); + } + if (*abstol > 0.) { + abstll = *abstol * sigma; + } + if (valeig) { + vll = *vl * sigma; + vuu = *vu * sigma; + } + } + +/* Call DSBTRD to reduce symmetric band matrix to tridiagonal form. */ + + indd = 1; + inde = indd + *n; + indwrk = inde + *n; + _starpu_dsbtrd_(jobz, uplo, n, kd, &ab[ab_offset], ldab, &work[indd], &work[inde], + &q[q_offset], ldq, &work[indwrk], &iinfo); + +/* If all eigenvalues are desired and ABSTOL is less than or equal */ +/* to zero, then call DSTERF or SSTEQR. If this fails for some */ +/* eigenvalue, then try DSTEBZ. */ + + test = FALSE_; + if (indeig) { + if (*il == 1 && *iu == *n) { + test = TRUE_; + } + } + if ((alleig || test) && *abstol <= 0.) { + _starpu_dcopy_(n, &work[indd], &c__1, &w[1], &c__1); + indee = indwrk + (*n << 1); + if (! wantz) { + i__1 = *n - 1; + _starpu_dcopy_(&i__1, &work[inde], &c__1, &work[indee], &c__1); + _starpu_dsterf_(n, &w[1], &work[indee], info); + } else { + _starpu_dlacpy_("A", n, n, &q[q_offset], ldq, &z__[z_offset], ldz); + i__1 = *n - 1; + _starpu_dcopy_(&i__1, &work[inde], &c__1, &work[indee], &c__1); + _starpu_dsteqr_(jobz, n, &w[1], &work[indee], &z__[z_offset], ldz, &work[ + indwrk], info); + if (*info == 0) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + ifail[i__] = 0; +/* L10: */ + } + } + } + if (*info == 0) { + *m = *n; + goto L30; + } + *info = 0; + } + +/* Otherwise, call DSTEBZ and, if eigenvectors are desired, SSTEIN. */ + + if (wantz) { + *(unsigned char *)order = 'B'; + } else { + *(unsigned char *)order = 'E'; + } + indibl = 1; + indisp = indibl + *n; + indiwo = indisp + *n; + _starpu_dstebz_(range, order, n, &vll, &vuu, il, iu, &abstll, &work[indd], &work[ + inde], m, &nsplit, &w[1], &iwork[indibl], &iwork[indisp], &work[ + indwrk], &iwork[indiwo], info); + + if (wantz) { + _starpu_dstein_(n, &work[indd], &work[inde], m, &w[1], &iwork[indibl], &iwork[ + indisp], &z__[z_offset], ldz, &work[indwrk], &iwork[indiwo], & + ifail[1], info); + +/* Apply orthogonal matrix used in reduction to tridiagonal */ +/* form to eigenvectors returned by DSTEIN. */ + + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + _starpu_dcopy_(n, &z__[j * z_dim1 + 1], &c__1, &work[1], &c__1); + _starpu_dgemv_("N", n, n, &c_b14, &q[q_offset], ldq, &work[1], &c__1, & + c_b34, &z__[j * z_dim1 + 1], &c__1); +/* L20: */ + } + } + +/* If matrix was scaled, then rescale eigenvalues appropriately. */ + +L30: + if (iscale == 1) { + if (*info == 0) { + imax = *m; + } else { + imax = *info - 1; + } + d__1 = 1. / sigma; + _starpu_dscal_(&imax, &d__1, &w[1], &c__1); + } + +/* If eigenvalues are not in order, then sort them, along with */ +/* eigenvectors. */ + + if (wantz) { + i__1 = *m - 1; + for (j = 1; j <= i__1; ++j) { + i__ = 0; + tmp1 = w[j]; + i__2 = *m; + for (jj = j + 1; jj <= i__2; ++jj) { + if (w[jj] < tmp1) { + i__ = jj; + tmp1 = w[jj]; + } +/* L40: */ + } + + if (i__ != 0) { + itmp1 = iwork[indibl + i__ - 1]; + w[i__] = w[j]; + iwork[indibl + i__ - 1] = iwork[indibl + j - 1]; + w[j] = tmp1; + iwork[indibl + j - 1] = itmp1; + _starpu_dswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[j * z_dim1 + 1], + &c__1); + if (*info != 0) { + itmp1 = ifail[i__]; + ifail[i__] = ifail[j]; + ifail[j] = itmp1; + } + } +/* L50: */ + } + } + + return 0; + +/* End of DSBEVX */ + +} /* _starpu_dsbevx_ */ diff --git a/min-dgels/base/SRC/dsbgst.c b/min-dgels/base/SRC/dsbgst.c new file mode 100644 index 0000000..ed4d10f --- /dev/null +++ b/min-dgels/base/SRC/dsbgst.c @@ -0,0 +1,1755 @@ +/* dsbgst.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static doublereal c_b8 = 0.; +static doublereal c_b9 = 1.; +static integer c__1 = 1; +static doublereal c_b20 = -1.; + +/* Subroutine */ int _starpu_dsbgst_(char *vect, char *uplo, integer *n, integer *ka, + integer *kb, doublereal *ab, integer *ldab, doublereal *bb, integer * + ldbb, doublereal *x, integer *ldx, doublereal *work, integer *info) +{ + /* System generated locals */ + integer ab_dim1, ab_offset, bb_dim1, bb_offset, x_dim1, x_offset, i__1, + i__2, i__3, i__4; + doublereal d__1; + + /* Local variables */ + integer i__, j, k, l, m; + doublereal t; + integer i0, i1, i2, j1, j2; + doublereal ra; + integer nr, nx, ka1, kb1; + doublereal ra1; + integer j1t, j2t; + doublereal bii; + integer kbt, nrt, inca; + extern /* Subroutine */ int _starpu_dger_(integer *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *), _starpu_drot_(integer *, doublereal *, integer *, doublereal * +, integer *, doublereal *, doublereal *), _starpu_dscal_(integer *, + doublereal *, doublereal *, integer *); + extern logical _starpu_lsame_(char *, char *); + logical upper, wantx; + extern /* Subroutine */ int _starpu_dlar2v_(integer *, doublereal *, doublereal *, + doublereal *, integer *, doublereal *, doublereal *, integer *), + _starpu_dlaset_(char *, integer *, integer *, doublereal *, doublereal *, + doublereal *, integer *), _starpu_dlartg_(doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *), _starpu_xerbla_( + char *, integer *), _starpu_dlargv_(integer *, doublereal *, + integer *, doublereal *, integer *, doublereal *, integer *); + logical update; + extern /* Subroutine */ int _starpu_dlartv_(integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *); + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DSBGST reduces a real symmetric-definite banded generalized */ +/* eigenproblem A*x = lambda*B*x to standard form C*y = lambda*y, */ +/* such that C has the same bandwidth as A. */ + +/* B must have been previously factorized as S**T*S by DPBSTF, using a */ +/* split Cholesky factorization. A is overwritten by C = X**T*A*X, where */ +/* X = S**(-1)*Q and Q is an orthogonal matrix chosen to preserve the */ +/* bandwidth of A. */ + +/* Arguments */ +/* ========= */ + +/* VECT (input) CHARACTER*1 */ +/* = 'N': do not form the transformation matrix X; */ +/* = 'V': form X. */ + +/* UPLO (input) CHARACTER*1 */ +/* = 'U': Upper triangle of A is stored; */ +/* = 'L': Lower triangle of A is stored. */ + +/* N (input) INTEGER */ +/* The order of the matrices A and B. N >= 0. */ + +/* KA (input) INTEGER */ +/* The number of superdiagonals of the matrix A if UPLO = 'U', */ +/* or the number of subdiagonals if UPLO = 'L'. KA >= 0. */ + +/* KB (input) INTEGER */ +/* The number of superdiagonals of the matrix B if UPLO = 'U', */ +/* or the number of subdiagonals if UPLO = 'L'. KA >= KB >= 0. */ + +/* AB (input/output) DOUBLE PRECISION array, dimension (LDAB,N) */ +/* On entry, the upper or lower triangle of the symmetric band */ +/* matrix A, stored in the first ka+1 rows of the array. The */ +/* j-th column of A is stored in the j-th column of the array AB */ +/* as follows: */ +/* if UPLO = 'U', AB(ka+1+i-j,j) = A(i,j) for max(1,j-ka)<=i<=j; */ +/* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+ka). */ + +/* On exit, the transformed matrix X**T*A*X, stored in the same */ +/* format as A. */ + +/* LDAB (input) INTEGER */ +/* The leading dimension of the array AB. LDAB >= KA+1. */ + +/* BB (input) DOUBLE PRECISION array, dimension (LDBB,N) */ +/* The banded factor S from the split Cholesky factorization of */ +/* B, as returned by DPBSTF, stored in the first KB+1 rows of */ +/* the array. */ + +/* LDBB (input) INTEGER */ +/* The leading dimension of the array BB. LDBB >= KB+1. */ + +/* X (output) DOUBLE PRECISION array, dimension (LDX,N) */ +/* If VECT = 'V', the n-by-n matrix X. */ +/* If VECT = 'N', the array X is not referenced. */ + +/* LDX (input) INTEGER */ +/* The leading dimension of the array X. */ +/* LDX >= max(1,N) if VECT = 'V'; LDX >= 1 otherwise. */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (2*N) */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters */ + + /* Parameter adjustments */ + ab_dim1 = *ldab; + ab_offset = 1 + ab_dim1; + ab -= ab_offset; + bb_dim1 = *ldbb; + bb_offset = 1 + bb_dim1; + bb -= bb_offset; + x_dim1 = *ldx; + x_offset = 1 + x_dim1; + x -= x_offset; + --work; + + /* Function Body */ + wantx = _starpu_lsame_(vect, "V"); + upper = _starpu_lsame_(uplo, "U"); + ka1 = *ka + 1; + kb1 = *kb + 1; + *info = 0; + if (! wantx && ! _starpu_lsame_(vect, "N")) { + *info = -1; + } else if (! upper && ! _starpu_lsame_(uplo, "L")) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*ka < 0) { + *info = -4; + } else if (*kb < 0 || *kb > *ka) { + *info = -5; + } else if (*ldab < *ka + 1) { + *info = -7; + } else if (*ldbb < *kb + 1) { + *info = -9; + } else if (*ldx < 1 || wantx && *ldx < max(1,*n)) { + *info = -11; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DSBGST", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + inca = *ldab * ka1; + +/* Initialize X to the unit matrix, if needed */ + + if (wantx) { + _starpu_dlaset_("Full", n, n, &c_b8, &c_b9, &x[x_offset], ldx); + } + +/* Set M to the splitting point m. It must be the same value as is */ +/* used in DPBSTF. The chosen value allows the arrays WORK and RWORK */ +/* to be of dimension (N). */ + + m = (*n + *kb) / 2; + +/* The routine works in two phases, corresponding to the two halves */ +/* of the split Cholesky factorization of B as S**T*S where */ + +/* S = ( U ) */ +/* ( M L ) */ + +/* with U upper triangular of order m, and L lower triangular of */ +/* order n-m. S has the same bandwidth as B. */ + +/* S is treated as a product of elementary matrices: */ + +/* S = S(m)*S(m-1)*...*S(2)*S(1)*S(m+1)*S(m+2)*...*S(n-1)*S(n) */ + +/* where S(i) is determined by the i-th row of S. */ + +/* In phase 1, the index i takes the values n, n-1, ... , m+1; */ +/* in phase 2, it takes the values 1, 2, ... , m. */ + +/* For each value of i, the current matrix A is updated by forming */ +/* inv(S(i))**T*A*inv(S(i)). This creates a triangular bulge outside */ +/* the band of A. The bulge is then pushed down toward the bottom of */ +/* A in phase 1, and up toward the top of A in phase 2, by applying */ +/* plane rotations. */ + +/* There are kb*(kb+1)/2 elements in the bulge, but at most 2*kb-1 */ +/* of them are linearly independent, so annihilating a bulge requires */ +/* only 2*kb-1 plane rotations. The rotations are divided into a 1st */ +/* set of kb-1 rotations, and a 2nd set of kb rotations. */ + +/* Wherever possible, rotations are generated and applied in vector */ +/* operations of length NR between the indices J1 and J2 (sometimes */ +/* replaced by modified values NRT, J1T or J2T). */ + +/* The cosines and sines of the rotations are stored in the array */ +/* WORK. The cosines of the 1st set of rotations are stored in */ +/* elements n+2:n+m-kb-1 and the sines of the 1st set in elements */ +/* 2:m-kb-1; the cosines of the 2nd set are stored in elements */ +/* n+m-kb+1:2*n and the sines of the second set in elements m-kb+1:n. */ + +/* The bulges are not formed explicitly; nonzero elements outside the */ +/* band are created only when they are required for generating new */ +/* rotations; they are stored in the array WORK, in positions where */ +/* they are later overwritten by the sines of the rotations which */ +/* annihilate them. */ + +/* **************************** Phase 1 ***************************** */ + +/* The logical structure of this phase is: */ + +/* UPDATE = .TRUE. */ +/* DO I = N, M + 1, -1 */ +/* use S(i) to update A and create a new bulge */ +/* apply rotations to push all bulges KA positions downward */ +/* END DO */ +/* UPDATE = .FALSE. */ +/* DO I = M + KA + 1, N - 1 */ +/* apply rotations to push all bulges KA positions downward */ +/* END DO */ + +/* To avoid duplicating code, the two loops are merged. */ + + update = TRUE_; + i__ = *n + 1; +L10: + if (update) { + --i__; +/* Computing MIN */ + i__1 = *kb, i__2 = i__ - 1; + kbt = min(i__1,i__2); + i0 = i__ - 1; +/* Computing MIN */ + i__1 = *n, i__2 = i__ + *ka; + i1 = min(i__1,i__2); + i2 = i__ - kbt + ka1; + if (i__ < m + 1) { + update = FALSE_; + ++i__; + i0 = m; + if (*ka == 0) { + goto L480; + } + goto L10; + } + } else { + i__ += *ka; + if (i__ > *n - 1) { + goto L480; + } + } + + if (upper) { + +/* Transform A, working with the upper triangle */ + + if (update) { + +/* Form inv(S(i))**T * A * inv(S(i)) */ + + bii = bb[kb1 + i__ * bb_dim1]; + i__1 = i1; + for (j = i__; j <= i__1; ++j) { + ab[i__ - j + ka1 + j * ab_dim1] /= bii; +/* L20: */ + } +/* Computing MAX */ + i__1 = 1, i__2 = i__ - *ka; + i__3 = i__; + for (j = max(i__1,i__2); j <= i__3; ++j) { + ab[j - i__ + ka1 + i__ * ab_dim1] /= bii; +/* L30: */ + } + i__3 = i__ - 1; + for (k = i__ - kbt; k <= i__3; ++k) { + i__1 = k; + for (j = i__ - kbt; j <= i__1; ++j) { + ab[j - k + ka1 + k * ab_dim1] = ab[j - k + ka1 + k * + ab_dim1] - bb[j - i__ + kb1 + i__ * bb_dim1] * ab[ + k - i__ + ka1 + i__ * ab_dim1] - bb[k - i__ + kb1 + + i__ * bb_dim1] * ab[j - i__ + ka1 + i__ * + ab_dim1] + ab[ka1 + i__ * ab_dim1] * bb[j - i__ + + kb1 + i__ * bb_dim1] * bb[k - i__ + kb1 + i__ * + bb_dim1]; +/* L40: */ + } +/* Computing MAX */ + i__1 = 1, i__2 = i__ - *ka; + i__4 = i__ - kbt - 1; + for (j = max(i__1,i__2); j <= i__4; ++j) { + ab[j - k + ka1 + k * ab_dim1] -= bb[k - i__ + kb1 + i__ * + bb_dim1] * ab[j - i__ + ka1 + i__ * ab_dim1]; +/* L50: */ + } +/* L60: */ + } + i__3 = i1; + for (j = i__; j <= i__3; ++j) { +/* Computing MAX */ + i__4 = j - *ka, i__1 = i__ - kbt; + i__2 = i__ - 1; + for (k = max(i__4,i__1); k <= i__2; ++k) { + ab[k - j + ka1 + j * ab_dim1] -= bb[k - i__ + kb1 + i__ * + bb_dim1] * ab[i__ - j + ka1 + j * ab_dim1]; +/* L70: */ + } +/* L80: */ + } + + if (wantx) { + +/* post-multiply X by inv(S(i)) */ + + i__3 = *n - m; + d__1 = 1. / bii; + _starpu_dscal_(&i__3, &d__1, &x[m + 1 + i__ * x_dim1], &c__1); + if (kbt > 0) { + i__3 = *n - m; + _starpu_dger_(&i__3, &kbt, &c_b20, &x[m + 1 + i__ * x_dim1], & + c__1, &bb[kb1 - kbt + i__ * bb_dim1], &c__1, &x[m + + 1 + (i__ - kbt) * x_dim1], ldx); + } + } + +/* store a(i,i1) in RA1 for use in next loop over K */ + + ra1 = ab[i__ - i1 + ka1 + i1 * ab_dim1]; + } + +/* Generate and apply vectors of rotations to chase all the */ +/* existing bulges KA positions down toward the bottom of the */ +/* band */ + + i__3 = *kb - 1; + for (k = 1; k <= i__3; ++k) { + if (update) { + +/* Determine the rotations which would annihilate the bulge */ +/* which has in theory just been created */ + + if (i__ - k + *ka < *n && i__ - k > 1) { + +/* generate rotation to annihilate a(i,i-k+ka+1) */ + + _starpu_dlartg_(&ab[k + 1 + (i__ - k + *ka) * ab_dim1], &ra1, & + work[*n + i__ - k + *ka - m], &work[i__ - k + *ka + - m], &ra); + +/* create nonzero element a(i-k,i-k+ka+1) outside the */ +/* band and store it in WORK(i-k) */ + + t = -bb[kb1 - k + i__ * bb_dim1] * ra1; + work[i__ - k] = work[*n + i__ - k + *ka - m] * t - work[ + i__ - k + *ka - m] * ab[(i__ - k + *ka) * ab_dim1 + + 1]; + ab[(i__ - k + *ka) * ab_dim1 + 1] = work[i__ - k + *ka - + m] * t + work[*n + i__ - k + *ka - m] * ab[(i__ - + k + *ka) * ab_dim1 + 1]; + ra1 = ra; + } + } +/* Computing MAX */ + i__2 = 1, i__4 = k - i0 + 2; + j2 = i__ - k - 1 + max(i__2,i__4) * ka1; + nr = (*n - j2 + *ka) / ka1; + j1 = j2 + (nr - 1) * ka1; + if (update) { +/* Computing MAX */ + i__2 = j2, i__4 = i__ + (*ka << 1) - k + 1; + j2t = max(i__2,i__4); + } else { + j2t = j2; + } + nrt = (*n - j2t + *ka) / ka1; + i__2 = j1; + i__4 = ka1; + for (j = j2t; i__4 < 0 ? j >= i__2 : j <= i__2; j += i__4) { + +/* create nonzero element a(j-ka,j+1) outside the band */ +/* and store it in WORK(j-m) */ + + work[j - m] *= ab[(j + 1) * ab_dim1 + 1]; + ab[(j + 1) * ab_dim1 + 1] = work[*n + j - m] * ab[(j + 1) * + ab_dim1 + 1]; +/* L90: */ + } + +/* generate rotations in 1st set to annihilate elements which */ +/* have been created outside the band */ + + if (nrt > 0) { + _starpu_dlargv_(&nrt, &ab[j2t * ab_dim1 + 1], &inca, &work[j2t - m], & + ka1, &work[*n + j2t - m], &ka1); + } + if (nr > 0) { + +/* apply rotations in 1st set from the right */ + + i__4 = *ka - 1; + for (l = 1; l <= i__4; ++l) { + _starpu_dlartv_(&nr, &ab[ka1 - l + j2 * ab_dim1], &inca, &ab[*ka + - l + (j2 + 1) * ab_dim1], &inca, &work[*n + j2 - + m], &work[j2 - m], &ka1); +/* L100: */ + } + +/* apply rotations in 1st set from both sides to diagonal */ +/* blocks */ + + _starpu_dlar2v_(&nr, &ab[ka1 + j2 * ab_dim1], &ab[ka1 + (j2 + 1) * + ab_dim1], &ab[*ka + (j2 + 1) * ab_dim1], &inca, &work[ + *n + j2 - m], &work[j2 - m], &ka1); + + } + +/* start applying rotations in 1st set from the left */ + + i__4 = *kb - k + 1; + for (l = *ka - 1; l >= i__4; --l) { + nrt = (*n - j2 + l) / ka1; + if (nrt > 0) { + _starpu_dlartv_(&nrt, &ab[l + (j2 + ka1 - l) * ab_dim1], &inca, & + ab[l + 1 + (j2 + ka1 - l) * ab_dim1], &inca, & + work[*n + j2 - m], &work[j2 - m], &ka1); + } +/* L110: */ + } + + if (wantx) { + +/* post-multiply X by product of rotations in 1st set */ + + i__4 = j1; + i__2 = ka1; + for (j = j2; i__2 < 0 ? j >= i__4 : j <= i__4; j += i__2) { + i__1 = *n - m; + _starpu_drot_(&i__1, &x[m + 1 + j * x_dim1], &c__1, &x[m + 1 + (j + + 1) * x_dim1], &c__1, &work[*n + j - m], &work[j + - m]); +/* L120: */ + } + } +/* L130: */ + } + + if (update) { + if (i2 <= *n && kbt > 0) { + +/* create nonzero element a(i-kbt,i-kbt+ka+1) outside the */ +/* band and store it in WORK(i-kbt) */ + + work[i__ - kbt] = -bb[kb1 - kbt + i__ * bb_dim1] * ra1; + } + } + + for (k = *kb; k >= 1; --k) { + if (update) { +/* Computing MAX */ + i__3 = 2, i__2 = k - i0 + 1; + j2 = i__ - k - 1 + max(i__3,i__2) * ka1; + } else { +/* Computing MAX */ + i__3 = 1, i__2 = k - i0 + 1; + j2 = i__ - k - 1 + max(i__3,i__2) * ka1; + } + +/* finish applying rotations in 2nd set from the left */ + + for (l = *kb - k; l >= 1; --l) { + nrt = (*n - j2 + *ka + l) / ka1; + if (nrt > 0) { + _starpu_dlartv_(&nrt, &ab[l + (j2 - l + 1) * ab_dim1], &inca, &ab[ + l + 1 + (j2 - l + 1) * ab_dim1], &inca, &work[*n + + j2 - *ka], &work[j2 - *ka], &ka1); + } +/* L140: */ + } + nr = (*n - j2 + *ka) / ka1; + j1 = j2 + (nr - 1) * ka1; + i__3 = j2; + i__2 = -ka1; + for (j = j1; i__2 < 0 ? j >= i__3 : j <= i__3; j += i__2) { + work[j] = work[j - *ka]; + work[*n + j] = work[*n + j - *ka]; +/* L150: */ + } + i__2 = j1; + i__3 = ka1; + for (j = j2; i__3 < 0 ? j >= i__2 : j <= i__2; j += i__3) { + +/* create nonzero element a(j-ka,j+1) outside the band */ +/* and store it in WORK(j) */ + + work[j] *= ab[(j + 1) * ab_dim1 + 1]; + ab[(j + 1) * ab_dim1 + 1] = work[*n + j] * ab[(j + 1) * + ab_dim1 + 1]; +/* L160: */ + } + if (update) { + if (i__ - k < *n - *ka && k <= kbt) { + work[i__ - k + *ka] = work[i__ - k]; + } + } +/* L170: */ + } + + for (k = *kb; k >= 1; --k) { +/* Computing MAX */ + i__3 = 1, i__2 = k - i0 + 1; + j2 = i__ - k - 1 + max(i__3,i__2) * ka1; + nr = (*n - j2 + *ka) / ka1; + j1 = j2 + (nr - 1) * ka1; + if (nr > 0) { + +/* generate rotations in 2nd set to annihilate elements */ +/* which have been created outside the band */ + + _starpu_dlargv_(&nr, &ab[j2 * ab_dim1 + 1], &inca, &work[j2], &ka1, & + work[*n + j2], &ka1); + +/* apply rotations in 2nd set from the right */ + + i__3 = *ka - 1; + for (l = 1; l <= i__3; ++l) { + _starpu_dlartv_(&nr, &ab[ka1 - l + j2 * ab_dim1], &inca, &ab[*ka + - l + (j2 + 1) * ab_dim1], &inca, &work[*n + j2], + &work[j2], &ka1); +/* L180: */ + } + +/* apply rotations in 2nd set from both sides to diagonal */ +/* blocks */ + + _starpu_dlar2v_(&nr, &ab[ka1 + j2 * ab_dim1], &ab[ka1 + (j2 + 1) * + ab_dim1], &ab[*ka + (j2 + 1) * ab_dim1], &inca, &work[ + *n + j2], &work[j2], &ka1); + + } + +/* start applying rotations in 2nd set from the left */ + + i__3 = *kb - k + 1; + for (l = *ka - 1; l >= i__3; --l) { + nrt = (*n - j2 + l) / ka1; + if (nrt > 0) { + _starpu_dlartv_(&nrt, &ab[l + (j2 + ka1 - l) * ab_dim1], &inca, & + ab[l + 1 + (j2 + ka1 - l) * ab_dim1], &inca, & + work[*n + j2], &work[j2], &ka1); + } +/* L190: */ + } + + if (wantx) { + +/* post-multiply X by product of rotations in 2nd set */ + + i__3 = j1; + i__2 = ka1; + for (j = j2; i__2 < 0 ? j >= i__3 : j <= i__3; j += i__2) { + i__4 = *n - m; + _starpu_drot_(&i__4, &x[m + 1 + j * x_dim1], &c__1, &x[m + 1 + (j + + 1) * x_dim1], &c__1, &work[*n + j], &work[j]); +/* L200: */ + } + } +/* L210: */ + } + + i__2 = *kb - 1; + for (k = 1; k <= i__2; ++k) { +/* Computing MAX */ + i__3 = 1, i__4 = k - i0 + 2; + j2 = i__ - k - 1 + max(i__3,i__4) * ka1; + +/* finish applying rotations in 1st set from the left */ + + for (l = *kb - k; l >= 1; --l) { + nrt = (*n - j2 + l) / ka1; + if (nrt > 0) { + _starpu_dlartv_(&nrt, &ab[l + (j2 + ka1 - l) * ab_dim1], &inca, & + ab[l + 1 + (j2 + ka1 - l) * ab_dim1], &inca, & + work[*n + j2 - m], &work[j2 - m], &ka1); + } +/* L220: */ + } +/* L230: */ + } + + if (*kb > 1) { + i__2 = i__ - *kb + (*ka << 1) + 1; + for (j = *n - 1; j >= i__2; --j) { + work[*n + j - m] = work[*n + j - *ka - m]; + work[j - m] = work[j - *ka - m]; +/* L240: */ + } + } + + } else { + +/* Transform A, working with the lower triangle */ + + if (update) { + +/* Form inv(S(i))**T * A * inv(S(i)) */ + + bii = bb[i__ * bb_dim1 + 1]; + i__2 = i1; + for (j = i__; j <= i__2; ++j) { + ab[j - i__ + 1 + i__ * ab_dim1] /= bii; +/* L250: */ + } +/* Computing MAX */ + i__2 = 1, i__3 = i__ - *ka; + i__4 = i__; + for (j = max(i__2,i__3); j <= i__4; ++j) { + ab[i__ - j + 1 + j * ab_dim1] /= bii; +/* L260: */ + } + i__4 = i__ - 1; + for (k = i__ - kbt; k <= i__4; ++k) { + i__2 = k; + for (j = i__ - kbt; j <= i__2; ++j) { + ab[k - j + 1 + j * ab_dim1] = ab[k - j + 1 + j * ab_dim1] + - bb[i__ - j + 1 + j * bb_dim1] * ab[i__ - k + 1 + + k * ab_dim1] - bb[i__ - k + 1 + k * bb_dim1] * + ab[i__ - j + 1 + j * ab_dim1] + ab[i__ * ab_dim1 + + 1] * bb[i__ - j + 1 + j * bb_dim1] * bb[i__ - k + + 1 + k * bb_dim1]; +/* L270: */ + } +/* Computing MAX */ + i__2 = 1, i__3 = i__ - *ka; + i__1 = i__ - kbt - 1; + for (j = max(i__2,i__3); j <= i__1; ++j) { + ab[k - j + 1 + j * ab_dim1] -= bb[i__ - k + 1 + k * + bb_dim1] * ab[i__ - j + 1 + j * ab_dim1]; +/* L280: */ + } +/* L290: */ + } + i__4 = i1; + for (j = i__; j <= i__4; ++j) { +/* Computing MAX */ + i__1 = j - *ka, i__2 = i__ - kbt; + i__3 = i__ - 1; + for (k = max(i__1,i__2); k <= i__3; ++k) { + ab[j - k + 1 + k * ab_dim1] -= bb[i__ - k + 1 + k * + bb_dim1] * ab[j - i__ + 1 + i__ * ab_dim1]; +/* L300: */ + } +/* L310: */ + } + + if (wantx) { + +/* post-multiply X by inv(S(i)) */ + + i__4 = *n - m; + d__1 = 1. / bii; + _starpu_dscal_(&i__4, &d__1, &x[m + 1 + i__ * x_dim1], &c__1); + if (kbt > 0) { + i__4 = *n - m; + i__3 = *ldbb - 1; + _starpu_dger_(&i__4, &kbt, &c_b20, &x[m + 1 + i__ * x_dim1], & + c__1, &bb[kbt + 1 + (i__ - kbt) * bb_dim1], &i__3, + &x[m + 1 + (i__ - kbt) * x_dim1], ldx); + } + } + +/* store a(i1,i) in RA1 for use in next loop over K */ + + ra1 = ab[i1 - i__ + 1 + i__ * ab_dim1]; + } + +/* Generate and apply vectors of rotations to chase all the */ +/* existing bulges KA positions down toward the bottom of the */ +/* band */ + + i__4 = *kb - 1; + for (k = 1; k <= i__4; ++k) { + if (update) { + +/* Determine the rotations which would annihilate the bulge */ +/* which has in theory just been created */ + + if (i__ - k + *ka < *n && i__ - k > 1) { + +/* generate rotation to annihilate a(i-k+ka+1,i) */ + + _starpu_dlartg_(&ab[ka1 - k + i__ * ab_dim1], &ra1, &work[*n + + i__ - k + *ka - m], &work[i__ - k + *ka - m], &ra) + ; + +/* create nonzero element a(i-k+ka+1,i-k) outside the */ +/* band and store it in WORK(i-k) */ + + t = -bb[k + 1 + (i__ - k) * bb_dim1] * ra1; + work[i__ - k] = work[*n + i__ - k + *ka - m] * t - work[ + i__ - k + *ka - m] * ab[ka1 + (i__ - k) * ab_dim1] + ; + ab[ka1 + (i__ - k) * ab_dim1] = work[i__ - k + *ka - m] * + t + work[*n + i__ - k + *ka - m] * ab[ka1 + (i__ + - k) * ab_dim1]; + ra1 = ra; + } + } +/* Computing MAX */ + i__3 = 1, i__1 = k - i0 + 2; + j2 = i__ - k - 1 + max(i__3,i__1) * ka1; + nr = (*n - j2 + *ka) / ka1; + j1 = j2 + (nr - 1) * ka1; + if (update) { +/* Computing MAX */ + i__3 = j2, i__1 = i__ + (*ka << 1) - k + 1; + j2t = max(i__3,i__1); + } else { + j2t = j2; + } + nrt = (*n - j2t + *ka) / ka1; + i__3 = j1; + i__1 = ka1; + for (j = j2t; i__1 < 0 ? j >= i__3 : j <= i__3; j += i__1) { + +/* create nonzero element a(j+1,j-ka) outside the band */ +/* and store it in WORK(j-m) */ + + work[j - m] *= ab[ka1 + (j - *ka + 1) * ab_dim1]; + ab[ka1 + (j - *ka + 1) * ab_dim1] = work[*n + j - m] * ab[ka1 + + (j - *ka + 1) * ab_dim1]; +/* L320: */ + } + +/* generate rotations in 1st set to annihilate elements which */ +/* have been created outside the band */ + + if (nrt > 0) { + _starpu_dlargv_(&nrt, &ab[ka1 + (j2t - *ka) * ab_dim1], &inca, &work[ + j2t - m], &ka1, &work[*n + j2t - m], &ka1); + } + if (nr > 0) { + +/* apply rotations in 1st set from the left */ + + i__1 = *ka - 1; + for (l = 1; l <= i__1; ++l) { + _starpu_dlartv_(&nr, &ab[l + 1 + (j2 - l) * ab_dim1], &inca, &ab[ + l + 2 + (j2 - l) * ab_dim1], &inca, &work[*n + j2 + - m], &work[j2 - m], &ka1); +/* L330: */ + } + +/* apply rotations in 1st set from both sides to diagonal */ +/* blocks */ + + _starpu_dlar2v_(&nr, &ab[j2 * ab_dim1 + 1], &ab[(j2 + 1) * ab_dim1 + + 1], &ab[j2 * ab_dim1 + 2], &inca, &work[*n + j2 - m], + &work[j2 - m], &ka1); + + } + +/* start applying rotations in 1st set from the right */ + + i__1 = *kb - k + 1; + for (l = *ka - 1; l >= i__1; --l) { + nrt = (*n - j2 + l) / ka1; + if (nrt > 0) { + _starpu_dlartv_(&nrt, &ab[ka1 - l + 1 + j2 * ab_dim1], &inca, &ab[ + ka1 - l + (j2 + 1) * ab_dim1], &inca, &work[*n + + j2 - m], &work[j2 - m], &ka1); + } +/* L340: */ + } + + if (wantx) { + +/* post-multiply X by product of rotations in 1st set */ + + i__1 = j1; + i__3 = ka1; + for (j = j2; i__3 < 0 ? j >= i__1 : j <= i__1; j += i__3) { + i__2 = *n - m; + _starpu_drot_(&i__2, &x[m + 1 + j * x_dim1], &c__1, &x[m + 1 + (j + + 1) * x_dim1], &c__1, &work[*n + j - m], &work[j + - m]); +/* L350: */ + } + } +/* L360: */ + } + + if (update) { + if (i2 <= *n && kbt > 0) { + +/* create nonzero element a(i-kbt+ka+1,i-kbt) outside the */ +/* band and store it in WORK(i-kbt) */ + + work[i__ - kbt] = -bb[kbt + 1 + (i__ - kbt) * bb_dim1] * ra1; + } + } + + for (k = *kb; k >= 1; --k) { + if (update) { +/* Computing MAX */ + i__4 = 2, i__3 = k - i0 + 1; + j2 = i__ - k - 1 + max(i__4,i__3) * ka1; + } else { +/* Computing MAX */ + i__4 = 1, i__3 = k - i0 + 1; + j2 = i__ - k - 1 + max(i__4,i__3) * ka1; + } + +/* finish applying rotations in 2nd set from the right */ + + for (l = *kb - k; l >= 1; --l) { + nrt = (*n - j2 + *ka + l) / ka1; + if (nrt > 0) { + _starpu_dlartv_(&nrt, &ab[ka1 - l + 1 + (j2 - *ka) * ab_dim1], & + inca, &ab[ka1 - l + (j2 - *ka + 1) * ab_dim1], & + inca, &work[*n + j2 - *ka], &work[j2 - *ka], &ka1) + ; + } +/* L370: */ + } + nr = (*n - j2 + *ka) / ka1; + j1 = j2 + (nr - 1) * ka1; + i__4 = j2; + i__3 = -ka1; + for (j = j1; i__3 < 0 ? j >= i__4 : j <= i__4; j += i__3) { + work[j] = work[j - *ka]; + work[*n + j] = work[*n + j - *ka]; +/* L380: */ + } + i__3 = j1; + i__4 = ka1; + for (j = j2; i__4 < 0 ? j >= i__3 : j <= i__3; j += i__4) { + +/* create nonzero element a(j+1,j-ka) outside the band */ +/* and store it in WORK(j) */ + + work[j] *= ab[ka1 + (j - *ka + 1) * ab_dim1]; + ab[ka1 + (j - *ka + 1) * ab_dim1] = work[*n + j] * ab[ka1 + ( + j - *ka + 1) * ab_dim1]; +/* L390: */ + } + if (update) { + if (i__ - k < *n - *ka && k <= kbt) { + work[i__ - k + *ka] = work[i__ - k]; + } + } +/* L400: */ + } + + for (k = *kb; k >= 1; --k) { +/* Computing MAX */ + i__4 = 1, i__3 = k - i0 + 1; + j2 = i__ - k - 1 + max(i__4,i__3) * ka1; + nr = (*n - j2 + *ka) / ka1; + j1 = j2 + (nr - 1) * ka1; + if (nr > 0) { + +/* generate rotations in 2nd set to annihilate elements */ +/* which have been created outside the band */ + + _starpu_dlargv_(&nr, &ab[ka1 + (j2 - *ka) * ab_dim1], &inca, &work[j2] +, &ka1, &work[*n + j2], &ka1); + +/* apply rotations in 2nd set from the left */ + + i__4 = *ka - 1; + for (l = 1; l <= i__4; ++l) { + _starpu_dlartv_(&nr, &ab[l + 1 + (j2 - l) * ab_dim1], &inca, &ab[ + l + 2 + (j2 - l) * ab_dim1], &inca, &work[*n + j2] +, &work[j2], &ka1); +/* L410: */ + } + +/* apply rotations in 2nd set from both sides to diagonal */ +/* blocks */ + + _starpu_dlar2v_(&nr, &ab[j2 * ab_dim1 + 1], &ab[(j2 + 1) * ab_dim1 + + 1], &ab[j2 * ab_dim1 + 2], &inca, &work[*n + j2], & + work[j2], &ka1); + + } + +/* start applying rotations in 2nd set from the right */ + + i__4 = *kb - k + 1; + for (l = *ka - 1; l >= i__4; --l) { + nrt = (*n - j2 + l) / ka1; + if (nrt > 0) { + _starpu_dlartv_(&nrt, &ab[ka1 - l + 1 + j2 * ab_dim1], &inca, &ab[ + ka1 - l + (j2 + 1) * ab_dim1], &inca, &work[*n + + j2], &work[j2], &ka1); + } +/* L420: */ + } + + if (wantx) { + +/* post-multiply X by product of rotations in 2nd set */ + + i__4 = j1; + i__3 = ka1; + for (j = j2; i__3 < 0 ? j >= i__4 : j <= i__4; j += i__3) { + i__1 = *n - m; + _starpu_drot_(&i__1, &x[m + 1 + j * x_dim1], &c__1, &x[m + 1 + (j + + 1) * x_dim1], &c__1, &work[*n + j], &work[j]); +/* L430: */ + } + } +/* L440: */ + } + + i__3 = *kb - 1; + for (k = 1; k <= i__3; ++k) { +/* Computing MAX */ + i__4 = 1, i__1 = k - i0 + 2; + j2 = i__ - k - 1 + max(i__4,i__1) * ka1; + +/* finish applying rotations in 1st set from the right */ + + for (l = *kb - k; l >= 1; --l) { + nrt = (*n - j2 + l) / ka1; + if (nrt > 0) { + _starpu_dlartv_(&nrt, &ab[ka1 - l + 1 + j2 * ab_dim1], &inca, &ab[ + ka1 - l + (j2 + 1) * ab_dim1], &inca, &work[*n + + j2 - m], &work[j2 - m], &ka1); + } +/* L450: */ + } +/* L460: */ + } + + if (*kb > 1) { + i__3 = i__ - *kb + (*ka << 1) + 1; + for (j = *n - 1; j >= i__3; --j) { + work[*n + j - m] = work[*n + j - *ka - m]; + work[j - m] = work[j - *ka - m]; +/* L470: */ + } + } + + } + + goto L10; + +L480: + +/* **************************** Phase 2 ***************************** */ + +/* The logical structure of this phase is: */ + +/* UPDATE = .TRUE. */ +/* DO I = 1, M */ +/* use S(i) to update A and create a new bulge */ +/* apply rotations to push all bulges KA positions upward */ +/* END DO */ +/* UPDATE = .FALSE. */ +/* DO I = M - KA - 1, 2, -1 */ +/* apply rotations to push all bulges KA positions upward */ +/* END DO */ + +/* To avoid duplicating code, the two loops are merged. */ + + update = TRUE_; + i__ = 0; +L490: + if (update) { + ++i__; +/* Computing MIN */ + i__3 = *kb, i__4 = m - i__; + kbt = min(i__3,i__4); + i0 = i__ + 1; +/* Computing MAX */ + i__3 = 1, i__4 = i__ - *ka; + i1 = max(i__3,i__4); + i2 = i__ + kbt - ka1; + if (i__ > m) { + update = FALSE_; + --i__; + i0 = m + 1; + if (*ka == 0) { + return 0; + } + goto L490; + } + } else { + i__ -= *ka; + if (i__ < 2) { + return 0; + } + } + + if (i__ < m - kbt) { + nx = m; + } else { + nx = *n; + } + + if (upper) { + +/* Transform A, working with the upper triangle */ + + if (update) { + +/* Form inv(S(i))**T * A * inv(S(i)) */ + + bii = bb[kb1 + i__ * bb_dim1]; + i__3 = i__; + for (j = i1; j <= i__3; ++j) { + ab[j - i__ + ka1 + i__ * ab_dim1] /= bii; +/* L500: */ + } +/* Computing MIN */ + i__4 = *n, i__1 = i__ + *ka; + i__3 = min(i__4,i__1); + for (j = i__; j <= i__3; ++j) { + ab[i__ - j + ka1 + j * ab_dim1] /= bii; +/* L510: */ + } + i__3 = i__ + kbt; + for (k = i__ + 1; k <= i__3; ++k) { + i__4 = i__ + kbt; + for (j = k; j <= i__4; ++j) { + ab[k - j + ka1 + j * ab_dim1] = ab[k - j + ka1 + j * + ab_dim1] - bb[i__ - j + kb1 + j * bb_dim1] * ab[ + i__ - k + ka1 + k * ab_dim1] - bb[i__ - k + kb1 + + k * bb_dim1] * ab[i__ - j + ka1 + j * ab_dim1] + + ab[ka1 + i__ * ab_dim1] * bb[i__ - j + kb1 + j * + bb_dim1] * bb[i__ - k + kb1 + k * bb_dim1]; +/* L520: */ + } +/* Computing MIN */ + i__1 = *n, i__2 = i__ + *ka; + i__4 = min(i__1,i__2); + for (j = i__ + kbt + 1; j <= i__4; ++j) { + ab[k - j + ka1 + j * ab_dim1] -= bb[i__ - k + kb1 + k * + bb_dim1] * ab[i__ - j + ka1 + j * ab_dim1]; +/* L530: */ + } +/* L540: */ + } + i__3 = i__; + for (j = i1; j <= i__3; ++j) { +/* Computing MIN */ + i__1 = j + *ka, i__2 = i__ + kbt; + i__4 = min(i__1,i__2); + for (k = i__ + 1; k <= i__4; ++k) { + ab[j - k + ka1 + k * ab_dim1] -= bb[i__ - k + kb1 + k * + bb_dim1] * ab[j - i__ + ka1 + i__ * ab_dim1]; +/* L550: */ + } +/* L560: */ + } + + if (wantx) { + +/* post-multiply X by inv(S(i)) */ + + d__1 = 1. / bii; + _starpu_dscal_(&nx, &d__1, &x[i__ * x_dim1 + 1], &c__1); + if (kbt > 0) { + i__3 = *ldbb - 1; + _starpu_dger_(&nx, &kbt, &c_b20, &x[i__ * x_dim1 + 1], &c__1, &bb[ + *kb + (i__ + 1) * bb_dim1], &i__3, &x[(i__ + 1) * + x_dim1 + 1], ldx); + } + } + +/* store a(i1,i) in RA1 for use in next loop over K */ + + ra1 = ab[i1 - i__ + ka1 + i__ * ab_dim1]; + } + +/* Generate and apply vectors of rotations to chase all the */ +/* existing bulges KA positions up toward the top of the band */ + + i__3 = *kb - 1; + for (k = 1; k <= i__3; ++k) { + if (update) { + +/* Determine the rotations which would annihilate the bulge */ +/* which has in theory just been created */ + + if (i__ + k - ka1 > 0 && i__ + k < m) { + +/* generate rotation to annihilate a(i+k-ka-1,i) */ + + _starpu_dlartg_(&ab[k + 1 + i__ * ab_dim1], &ra1, &work[*n + i__ + + k - *ka], &work[i__ + k - *ka], &ra); + +/* create nonzero element a(i+k-ka-1,i+k) outside the */ +/* band and store it in WORK(m-kb+i+k) */ + + t = -bb[kb1 - k + (i__ + k) * bb_dim1] * ra1; + work[m - *kb + i__ + k] = work[*n + i__ + k - *ka] * t - + work[i__ + k - *ka] * ab[(i__ + k) * ab_dim1 + 1]; + ab[(i__ + k) * ab_dim1 + 1] = work[i__ + k - *ka] * t + + work[*n + i__ + k - *ka] * ab[(i__ + k) * ab_dim1 + + 1]; + ra1 = ra; + } + } +/* Computing MAX */ + i__4 = 1, i__1 = k + i0 - m + 1; + j2 = i__ + k + 1 - max(i__4,i__1) * ka1; + nr = (j2 + *ka - 1) / ka1; + j1 = j2 - (nr - 1) * ka1; + if (update) { +/* Computing MIN */ + i__4 = j2, i__1 = i__ - (*ka << 1) + k - 1; + j2t = min(i__4,i__1); + } else { + j2t = j2; + } + nrt = (j2t + *ka - 1) / ka1; + i__4 = j2t; + i__1 = ka1; + for (j = j1; i__1 < 0 ? j >= i__4 : j <= i__4; j += i__1) { + +/* create nonzero element a(j-1,j+ka) outside the band */ +/* and store it in WORK(j) */ + + work[j] *= ab[(j + *ka - 1) * ab_dim1 + 1]; + ab[(j + *ka - 1) * ab_dim1 + 1] = work[*n + j] * ab[(j + *ka + - 1) * ab_dim1 + 1]; +/* L570: */ + } + +/* generate rotations in 1st set to annihilate elements which */ +/* have been created outside the band */ + + if (nrt > 0) { + _starpu_dlargv_(&nrt, &ab[(j1 + *ka) * ab_dim1 + 1], &inca, &work[j1], + &ka1, &work[*n + j1], &ka1); + } + if (nr > 0) { + +/* apply rotations in 1st set from the left */ + + i__1 = *ka - 1; + for (l = 1; l <= i__1; ++l) { + _starpu_dlartv_(&nr, &ab[ka1 - l + (j1 + l) * ab_dim1], &inca, & + ab[*ka - l + (j1 + l) * ab_dim1], &inca, &work[*n + + j1], &work[j1], &ka1); +/* L580: */ + } + +/* apply rotations in 1st set from both sides to diagonal */ +/* blocks */ + + _starpu_dlar2v_(&nr, &ab[ka1 + j1 * ab_dim1], &ab[ka1 + (j1 - 1) * + ab_dim1], &ab[*ka + j1 * ab_dim1], &inca, &work[*n + + j1], &work[j1], &ka1); + + } + +/* start applying rotations in 1st set from the right */ + + i__1 = *kb - k + 1; + for (l = *ka - 1; l >= i__1; --l) { + nrt = (j2 + l - 1) / ka1; + j1t = j2 - (nrt - 1) * ka1; + if (nrt > 0) { + _starpu_dlartv_(&nrt, &ab[l + j1t * ab_dim1], &inca, &ab[l + 1 + ( + j1t - 1) * ab_dim1], &inca, &work[*n + j1t], & + work[j1t], &ka1); + } +/* L590: */ + } + + if (wantx) { + +/* post-multiply X by product of rotations in 1st set */ + + i__1 = j2; + i__4 = ka1; + for (j = j1; i__4 < 0 ? j >= i__1 : j <= i__1; j += i__4) { + _starpu_drot_(&nx, &x[j * x_dim1 + 1], &c__1, &x[(j - 1) * x_dim1 + + 1], &c__1, &work[*n + j], &work[j]); +/* L600: */ + } + } +/* L610: */ + } + + if (update) { + if (i2 > 0 && kbt > 0) { + +/* create nonzero element a(i+kbt-ka-1,i+kbt) outside the */ +/* band and store it in WORK(m-kb+i+kbt) */ + + work[m - *kb + i__ + kbt] = -bb[kb1 - kbt + (i__ + kbt) * + bb_dim1] * ra1; + } + } + + for (k = *kb; k >= 1; --k) { + if (update) { +/* Computing MAX */ + i__3 = 2, i__4 = k + i0 - m; + j2 = i__ + k + 1 - max(i__3,i__4) * ka1; + } else { +/* Computing MAX */ + i__3 = 1, i__4 = k + i0 - m; + j2 = i__ + k + 1 - max(i__3,i__4) * ka1; + } + +/* finish applying rotations in 2nd set from the right */ + + for (l = *kb - k; l >= 1; --l) { + nrt = (j2 + *ka + l - 1) / ka1; + j1t = j2 - (nrt - 1) * ka1; + if (nrt > 0) { + _starpu_dlartv_(&nrt, &ab[l + (j1t + *ka) * ab_dim1], &inca, &ab[ + l + 1 + (j1t + *ka - 1) * ab_dim1], &inca, &work[* + n + m - *kb + j1t + *ka], &work[m - *kb + j1t + * + ka], &ka1); + } +/* L620: */ + } + nr = (j2 + *ka - 1) / ka1; + j1 = j2 - (nr - 1) * ka1; + i__3 = j2; + i__4 = ka1; + for (j = j1; i__4 < 0 ? j >= i__3 : j <= i__3; j += i__4) { + work[m - *kb + j] = work[m - *kb + j + *ka]; + work[*n + m - *kb + j] = work[*n + m - *kb + j + *ka]; +/* L630: */ + } + i__4 = j2; + i__3 = ka1; + for (j = j1; i__3 < 0 ? j >= i__4 : j <= i__4; j += i__3) { + +/* create nonzero element a(j-1,j+ka) outside the band */ +/* and store it in WORK(m-kb+j) */ + + work[m - *kb + j] *= ab[(j + *ka - 1) * ab_dim1 + 1]; + ab[(j + *ka - 1) * ab_dim1 + 1] = work[*n + m - *kb + j] * ab[ + (j + *ka - 1) * ab_dim1 + 1]; +/* L640: */ + } + if (update) { + if (i__ + k > ka1 && k <= kbt) { + work[m - *kb + i__ + k - *ka] = work[m - *kb + i__ + k]; + } + } +/* L650: */ + } + + for (k = *kb; k >= 1; --k) { +/* Computing MAX */ + i__3 = 1, i__4 = k + i0 - m; + j2 = i__ + k + 1 - max(i__3,i__4) * ka1; + nr = (j2 + *ka - 1) / ka1; + j1 = j2 - (nr - 1) * ka1; + if (nr > 0) { + +/* generate rotations in 2nd set to annihilate elements */ +/* which have been created outside the band */ + + _starpu_dlargv_(&nr, &ab[(j1 + *ka) * ab_dim1 + 1], &inca, &work[m - * + kb + j1], &ka1, &work[*n + m - *kb + j1], &ka1); + +/* apply rotations in 2nd set from the left */ + + i__3 = *ka - 1; + for (l = 1; l <= i__3; ++l) { + _starpu_dlartv_(&nr, &ab[ka1 - l + (j1 + l) * ab_dim1], &inca, & + ab[*ka - l + (j1 + l) * ab_dim1], &inca, &work[*n + + m - *kb + j1], &work[m - *kb + j1], &ka1); +/* L660: */ + } + +/* apply rotations in 2nd set from both sides to diagonal */ +/* blocks */ + + _starpu_dlar2v_(&nr, &ab[ka1 + j1 * ab_dim1], &ab[ka1 + (j1 - 1) * + ab_dim1], &ab[*ka + j1 * ab_dim1], &inca, &work[*n + + m - *kb + j1], &work[m - *kb + j1], &ka1); + + } + +/* start applying rotations in 2nd set from the right */ + + i__3 = *kb - k + 1; + for (l = *ka - 1; l >= i__3; --l) { + nrt = (j2 + l - 1) / ka1; + j1t = j2 - (nrt - 1) * ka1; + if (nrt > 0) { + _starpu_dlartv_(&nrt, &ab[l + j1t * ab_dim1], &inca, &ab[l + 1 + ( + j1t - 1) * ab_dim1], &inca, &work[*n + m - *kb + + j1t], &work[m - *kb + j1t], &ka1); + } +/* L670: */ + } + + if (wantx) { + +/* post-multiply X by product of rotations in 2nd set */ + + i__3 = j2; + i__4 = ka1; + for (j = j1; i__4 < 0 ? j >= i__3 : j <= i__3; j += i__4) { + _starpu_drot_(&nx, &x[j * x_dim1 + 1], &c__1, &x[(j - 1) * x_dim1 + + 1], &c__1, &work[*n + m - *kb + j], &work[m - * + kb + j]); +/* L680: */ + } + } +/* L690: */ + } + + i__4 = *kb - 1; + for (k = 1; k <= i__4; ++k) { +/* Computing MAX */ + i__3 = 1, i__1 = k + i0 - m + 1; + j2 = i__ + k + 1 - max(i__3,i__1) * ka1; + +/* finish applying rotations in 1st set from the right */ + + for (l = *kb - k; l >= 1; --l) { + nrt = (j2 + l - 1) / ka1; + j1t = j2 - (nrt - 1) * ka1; + if (nrt > 0) { + _starpu_dlartv_(&nrt, &ab[l + j1t * ab_dim1], &inca, &ab[l + 1 + ( + j1t - 1) * ab_dim1], &inca, &work[*n + j1t], & + work[j1t], &ka1); + } +/* L700: */ + } +/* L710: */ + } + + if (*kb > 1) { +/* Computing MIN */ + i__3 = i__ + *kb; + i__4 = min(i__3,m) - (*ka << 1) - 1; + for (j = 2; j <= i__4; ++j) { + work[*n + j] = work[*n + j + *ka]; + work[j] = work[j + *ka]; +/* L720: */ + } + } + + } else { + +/* Transform A, working with the lower triangle */ + + if (update) { + +/* Form inv(S(i))**T * A * inv(S(i)) */ + + bii = bb[i__ * bb_dim1 + 1]; + i__4 = i__; + for (j = i1; j <= i__4; ++j) { + ab[i__ - j + 1 + j * ab_dim1] /= bii; +/* L730: */ + } +/* Computing MIN */ + i__3 = *n, i__1 = i__ + *ka; + i__4 = min(i__3,i__1); + for (j = i__; j <= i__4; ++j) { + ab[j - i__ + 1 + i__ * ab_dim1] /= bii; +/* L740: */ + } + i__4 = i__ + kbt; + for (k = i__ + 1; k <= i__4; ++k) { + i__3 = i__ + kbt; + for (j = k; j <= i__3; ++j) { + ab[j - k + 1 + k * ab_dim1] = ab[j - k + 1 + k * ab_dim1] + - bb[j - i__ + 1 + i__ * bb_dim1] * ab[k - i__ + + 1 + i__ * ab_dim1] - bb[k - i__ + 1 + i__ * + bb_dim1] * ab[j - i__ + 1 + i__ * ab_dim1] + ab[ + i__ * ab_dim1 + 1] * bb[j - i__ + 1 + i__ * + bb_dim1] * bb[k - i__ + 1 + i__ * bb_dim1]; +/* L750: */ + } +/* Computing MIN */ + i__1 = *n, i__2 = i__ + *ka; + i__3 = min(i__1,i__2); + for (j = i__ + kbt + 1; j <= i__3; ++j) { + ab[j - k + 1 + k * ab_dim1] -= bb[k - i__ + 1 + i__ * + bb_dim1] * ab[j - i__ + 1 + i__ * ab_dim1]; +/* L760: */ + } +/* L770: */ + } + i__4 = i__; + for (j = i1; j <= i__4; ++j) { +/* Computing MIN */ + i__1 = j + *ka, i__2 = i__ + kbt; + i__3 = min(i__1,i__2); + for (k = i__ + 1; k <= i__3; ++k) { + ab[k - j + 1 + j * ab_dim1] -= bb[k - i__ + 1 + i__ * + bb_dim1] * ab[i__ - j + 1 + j * ab_dim1]; +/* L780: */ + } +/* L790: */ + } + + if (wantx) { + +/* post-multiply X by inv(S(i)) */ + + d__1 = 1. / bii; + _starpu_dscal_(&nx, &d__1, &x[i__ * x_dim1 + 1], &c__1); + if (kbt > 0) { + _starpu_dger_(&nx, &kbt, &c_b20, &x[i__ * x_dim1 + 1], &c__1, &bb[ + i__ * bb_dim1 + 2], &c__1, &x[(i__ + 1) * x_dim1 + + 1], ldx); + } + } + +/* store a(i,i1) in RA1 for use in next loop over K */ + + ra1 = ab[i__ - i1 + 1 + i1 * ab_dim1]; + } + +/* Generate and apply vectors of rotations to chase all the */ +/* existing bulges KA positions up toward the top of the band */ + + i__4 = *kb - 1; + for (k = 1; k <= i__4; ++k) { + if (update) { + +/* Determine the rotations which would annihilate the bulge */ +/* which has in theory just been created */ + + if (i__ + k - ka1 > 0 && i__ + k < m) { + +/* generate rotation to annihilate a(i,i+k-ka-1) */ + + _starpu_dlartg_(&ab[ka1 - k + (i__ + k - *ka) * ab_dim1], &ra1, & + work[*n + i__ + k - *ka], &work[i__ + k - *ka], & + ra); + +/* create nonzero element a(i+k,i+k-ka-1) outside the */ +/* band and store it in WORK(m-kb+i+k) */ + + t = -bb[k + 1 + i__ * bb_dim1] * ra1; + work[m - *kb + i__ + k] = work[*n + i__ + k - *ka] * t - + work[i__ + k - *ka] * ab[ka1 + (i__ + k - *ka) * + ab_dim1]; + ab[ka1 + (i__ + k - *ka) * ab_dim1] = work[i__ + k - *ka] + * t + work[*n + i__ + k - *ka] * ab[ka1 + (i__ + + k - *ka) * ab_dim1]; + ra1 = ra; + } + } +/* Computing MAX */ + i__3 = 1, i__1 = k + i0 - m + 1; + j2 = i__ + k + 1 - max(i__3,i__1) * ka1; + nr = (j2 + *ka - 1) / ka1; + j1 = j2 - (nr - 1) * ka1; + if (update) { +/* Computing MIN */ + i__3 = j2, i__1 = i__ - (*ka << 1) + k - 1; + j2t = min(i__3,i__1); + } else { + j2t = j2; + } + nrt = (j2t + *ka - 1) / ka1; + i__3 = j2t; + i__1 = ka1; + for (j = j1; i__1 < 0 ? j >= i__3 : j <= i__3; j += i__1) { + +/* create nonzero element a(j+ka,j-1) outside the band */ +/* and store it in WORK(j) */ + + work[j] *= ab[ka1 + (j - 1) * ab_dim1]; + ab[ka1 + (j - 1) * ab_dim1] = work[*n + j] * ab[ka1 + (j - 1) + * ab_dim1]; +/* L800: */ + } + +/* generate rotations in 1st set to annihilate elements which */ +/* have been created outside the band */ + + if (nrt > 0) { + _starpu_dlargv_(&nrt, &ab[ka1 + j1 * ab_dim1], &inca, &work[j1], &ka1, + &work[*n + j1], &ka1); + } + if (nr > 0) { + +/* apply rotations in 1st set from the right */ + + i__1 = *ka - 1; + for (l = 1; l <= i__1; ++l) { + _starpu_dlartv_(&nr, &ab[l + 1 + j1 * ab_dim1], &inca, &ab[l + 2 + + (j1 - 1) * ab_dim1], &inca, &work[*n + j1], & + work[j1], &ka1); +/* L810: */ + } + +/* apply rotations in 1st set from both sides to diagonal */ +/* blocks */ + + _starpu_dlar2v_(&nr, &ab[j1 * ab_dim1 + 1], &ab[(j1 - 1) * ab_dim1 + + 1], &ab[(j1 - 1) * ab_dim1 + 2], &inca, &work[*n + j1] +, &work[j1], &ka1); + + } + +/* start applying rotations in 1st set from the left */ + + i__1 = *kb - k + 1; + for (l = *ka - 1; l >= i__1; --l) { + nrt = (j2 + l - 1) / ka1; + j1t = j2 - (nrt - 1) * ka1; + if (nrt > 0) { + _starpu_dlartv_(&nrt, &ab[ka1 - l + 1 + (j1t - ka1 + l) * ab_dim1] +, &inca, &ab[ka1 - l + (j1t - ka1 + l) * ab_dim1], + &inca, &work[*n + j1t], &work[j1t], &ka1); + } +/* L820: */ + } + + if (wantx) { + +/* post-multiply X by product of rotations in 1st set */ + + i__1 = j2; + i__3 = ka1; + for (j = j1; i__3 < 0 ? j >= i__1 : j <= i__1; j += i__3) { + _starpu_drot_(&nx, &x[j * x_dim1 + 1], &c__1, &x[(j - 1) * x_dim1 + + 1], &c__1, &work[*n + j], &work[j]); +/* L830: */ + } + } +/* L840: */ + } + + if (update) { + if (i2 > 0 && kbt > 0) { + +/* create nonzero element a(i+kbt,i+kbt-ka-1) outside the */ +/* band and store it in WORK(m-kb+i+kbt) */ + + work[m - *kb + i__ + kbt] = -bb[kbt + 1 + i__ * bb_dim1] * + ra1; + } + } + + for (k = *kb; k >= 1; --k) { + if (update) { +/* Computing MAX */ + i__4 = 2, i__3 = k + i0 - m; + j2 = i__ + k + 1 - max(i__4,i__3) * ka1; + } else { +/* Computing MAX */ + i__4 = 1, i__3 = k + i0 - m; + j2 = i__ + k + 1 - max(i__4,i__3) * ka1; + } + +/* finish applying rotations in 2nd set from the left */ + + for (l = *kb - k; l >= 1; --l) { + nrt = (j2 + *ka + l - 1) / ka1; + j1t = j2 - (nrt - 1) * ka1; + if (nrt > 0) { + _starpu_dlartv_(&nrt, &ab[ka1 - l + 1 + (j1t + l - 1) * ab_dim1], + &inca, &ab[ka1 - l + (j1t + l - 1) * ab_dim1], & + inca, &work[*n + m - *kb + j1t + *ka], &work[m - * + kb + j1t + *ka], &ka1); + } +/* L850: */ + } + nr = (j2 + *ka - 1) / ka1; + j1 = j2 - (nr - 1) * ka1; + i__4 = j2; + i__3 = ka1; + for (j = j1; i__3 < 0 ? j >= i__4 : j <= i__4; j += i__3) { + work[m - *kb + j] = work[m - *kb + j + *ka]; + work[*n + m - *kb + j] = work[*n + m - *kb + j + *ka]; +/* L860: */ + } + i__3 = j2; + i__4 = ka1; + for (j = j1; i__4 < 0 ? j >= i__3 : j <= i__3; j += i__4) { + +/* create nonzero element a(j+ka,j-1) outside the band */ +/* and store it in WORK(m-kb+j) */ + + work[m - *kb + j] *= ab[ka1 + (j - 1) * ab_dim1]; + ab[ka1 + (j - 1) * ab_dim1] = work[*n + m - *kb + j] * ab[ka1 + + (j - 1) * ab_dim1]; +/* L870: */ + } + if (update) { + if (i__ + k > ka1 && k <= kbt) { + work[m - *kb + i__ + k - *ka] = work[m - *kb + i__ + k]; + } + } +/* L880: */ + } + + for (k = *kb; k >= 1; --k) { +/* Computing MAX */ + i__4 = 1, i__3 = k + i0 - m; + j2 = i__ + k + 1 - max(i__4,i__3) * ka1; + nr = (j2 + *ka - 1) / ka1; + j1 = j2 - (nr - 1) * ka1; + if (nr > 0) { + +/* generate rotations in 2nd set to annihilate elements */ +/* which have been created outside the band */ + + _starpu_dlargv_(&nr, &ab[ka1 + j1 * ab_dim1], &inca, &work[m - *kb + + j1], &ka1, &work[*n + m - *kb + j1], &ka1); + +/* apply rotations in 2nd set from the right */ + + i__4 = *ka - 1; + for (l = 1; l <= i__4; ++l) { + _starpu_dlartv_(&nr, &ab[l + 1 + j1 * ab_dim1], &inca, &ab[l + 2 + + (j1 - 1) * ab_dim1], &inca, &work[*n + m - *kb + + j1], &work[m - *kb + j1], &ka1); +/* L890: */ + } + +/* apply rotations in 2nd set from both sides to diagonal */ +/* blocks */ + + _starpu_dlar2v_(&nr, &ab[j1 * ab_dim1 + 1], &ab[(j1 - 1) * ab_dim1 + + 1], &ab[(j1 - 1) * ab_dim1 + 2], &inca, &work[*n + m + - *kb + j1], &work[m - *kb + j1], &ka1); + + } + +/* start applying rotations in 2nd set from the left */ + + i__4 = *kb - k + 1; + for (l = *ka - 1; l >= i__4; --l) { + nrt = (j2 + l - 1) / ka1; + j1t = j2 - (nrt - 1) * ka1; + if (nrt > 0) { + _starpu_dlartv_(&nrt, &ab[ka1 - l + 1 + (j1t - ka1 + l) * ab_dim1] +, &inca, &ab[ka1 - l + (j1t - ka1 + l) * ab_dim1], + &inca, &work[*n + m - *kb + j1t], &work[m - *kb + + j1t], &ka1); + } +/* L900: */ + } + + if (wantx) { + +/* post-multiply X by product of rotations in 2nd set */ + + i__4 = j2; + i__3 = ka1; + for (j = j1; i__3 < 0 ? j >= i__4 : j <= i__4; j += i__3) { + _starpu_drot_(&nx, &x[j * x_dim1 + 1], &c__1, &x[(j - 1) * x_dim1 + + 1], &c__1, &work[*n + m - *kb + j], &work[m - * + kb + j]); +/* L910: */ + } + } +/* L920: */ + } + + i__3 = *kb - 1; + for (k = 1; k <= i__3; ++k) { +/* Computing MAX */ + i__4 = 1, i__1 = k + i0 - m + 1; + j2 = i__ + k + 1 - max(i__4,i__1) * ka1; + +/* finish applying rotations in 1st set from the left */ + + for (l = *kb - k; l >= 1; --l) { + nrt = (j2 + l - 1) / ka1; + j1t = j2 - (nrt - 1) * ka1; + if (nrt > 0) { + _starpu_dlartv_(&nrt, &ab[ka1 - l + 1 + (j1t - ka1 + l) * ab_dim1] +, &inca, &ab[ka1 - l + (j1t - ka1 + l) * ab_dim1], + &inca, &work[*n + j1t], &work[j1t], &ka1); + } +/* L930: */ + } +/* L940: */ + } + + if (*kb > 1) { +/* Computing MIN */ + i__4 = i__ + *kb; + i__3 = min(i__4,m) - (*ka << 1) - 1; + for (j = 2; j <= i__3; ++j) { + work[*n + j] = work[*n + j + *ka]; + work[j] = work[j + *ka]; +/* L950: */ + } + } + + } + + goto L490; + +/* End of DSBGST */ + +} /* _starpu_dsbgst_ */ diff --git a/min-dgels/base/SRC/dsbgv.c b/min-dgels/base/SRC/dsbgv.c new file mode 100644 index 0000000..b6067df --- /dev/null +++ b/min-dgels/base/SRC/dsbgv.c @@ -0,0 +1,234 @@ +/* dsbgv.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dsbgv_(char *jobz, char *uplo, integer *n, integer *ka, + integer *kb, doublereal *ab, integer *ldab, doublereal *bb, integer * + ldbb, doublereal *w, doublereal *z__, integer *ldz, doublereal *work, + integer *info) +{ + /* System generated locals */ + integer ab_dim1, ab_offset, bb_dim1, bb_offset, z_dim1, z_offset, i__1; + + /* Local variables */ + integer inde; + char vect[1]; + extern logical _starpu_lsame_(char *, char *); + integer iinfo; + logical upper, wantz; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *), _starpu_dpbstf_( + char *, integer *, integer *, doublereal *, integer *, integer *), _starpu_dsbtrd_(char *, char *, integer *, integer *, doublereal + *, integer *, doublereal *, doublereal *, doublereal *, integer *, + doublereal *, integer *), _starpu_dsbgst_(char *, char *, + integer *, integer *, integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *), _starpu_dsterf_(integer *, doublereal *, + doublereal *, integer *); + integer indwrk; + extern /* Subroutine */ int _starpu_dsteqr_(char *, integer *, doublereal *, + doublereal *, doublereal *, integer *, doublereal *, integer *); + + +/* -- LAPACK driver routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DSBGV computes all the eigenvalues, and optionally, the eigenvectors */ +/* of a real generalized symmetric-definite banded eigenproblem, of */ +/* the form A*x=(lambda)*B*x. Here A and B are assumed to be symmetric */ +/* and banded, and B is also positive definite. */ + +/* Arguments */ +/* ========= */ + +/* JOBZ (input) CHARACTER*1 */ +/* = 'N': Compute eigenvalues only; */ +/* = 'V': Compute eigenvalues and eigenvectors. */ + +/* UPLO (input) CHARACTER*1 */ +/* = 'U': Upper triangles of A and B are stored; */ +/* = 'L': Lower triangles of A and B are stored. */ + +/* N (input) INTEGER */ +/* The order of the matrices A and B. N >= 0. */ + +/* KA (input) INTEGER */ +/* The number of superdiagonals of the matrix A if UPLO = 'U', */ +/* or the number of subdiagonals if UPLO = 'L'. KA >= 0. */ + +/* KB (input) INTEGER */ +/* The number of superdiagonals of the matrix B if UPLO = 'U', */ +/* or the number of subdiagonals if UPLO = 'L'. KB >= 0. */ + +/* AB (input/output) DOUBLE PRECISION array, dimension (LDAB, N) */ +/* On entry, the upper or lower triangle of the symmetric band */ +/* matrix A, stored in the first ka+1 rows of the array. The */ +/* j-th column of A is stored in the j-th column of the array AB */ +/* as follows: */ +/* if UPLO = 'U', AB(ka+1+i-j,j) = A(i,j) for max(1,j-ka)<=i<=j; */ +/* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+ka). */ + +/* On exit, the contents of AB are destroyed. */ + +/* LDAB (input) INTEGER */ +/* The leading dimension of the array AB. LDAB >= KA+1. */ + +/* BB (input/output) DOUBLE PRECISION array, dimension (LDBB, N) */ +/* On entry, the upper or lower triangle of the symmetric band */ +/* matrix B, stored in the first kb+1 rows of the array. The */ +/* j-th column of B is stored in the j-th column of the array BB */ +/* as follows: */ +/* if UPLO = 'U', BB(kb+1+i-j,j) = B(i,j) for max(1,j-kb)<=i<=j; */ +/* if UPLO = 'L', BB(1+i-j,j) = B(i,j) for j<=i<=min(n,j+kb). */ + +/* On exit, the factor S from the split Cholesky factorization */ +/* B = S**T*S, as returned by DPBSTF. */ + +/* LDBB (input) INTEGER */ +/* The leading dimension of the array BB. LDBB >= KB+1. */ + +/* W (output) DOUBLE PRECISION array, dimension (N) */ +/* If INFO = 0, the eigenvalues in ascending order. */ + +/* Z (output) DOUBLE PRECISION array, dimension (LDZ, N) */ +/* If JOBZ = 'V', then if INFO = 0, Z contains the matrix Z of */ +/* eigenvectors, with the i-th column of Z holding the */ +/* eigenvector associated with W(i). The eigenvectors are */ +/* normalized so that Z**T*B*Z = I. */ +/* If JOBZ = 'N', then Z is not referenced. */ + +/* LDZ (input) INTEGER */ +/* The leading dimension of the array Z. LDZ >= 1, and if */ +/* JOBZ = 'V', LDZ >= N. */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (3*N) */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > 0: if INFO = i, and i is: */ +/* <= N: the algorithm failed to converge: */ +/* i off-diagonal elements of an intermediate */ +/* tridiagonal form did not converge to zero; */ +/* > N: if INFO = N + i, for 1 <= i <= N, then DPBSTF */ +/* returned INFO = i: B is not positive definite. */ +/* The factorization of B could not be completed and */ +/* no eigenvalues or eigenvectors were computed. */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + ab_dim1 = *ldab; + ab_offset = 1 + ab_dim1; + ab -= ab_offset; + bb_dim1 = *ldbb; + bb_offset = 1 + bb_dim1; + bb -= bb_offset; + --w; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --work; + + /* Function Body */ + wantz = _starpu_lsame_(jobz, "V"); + upper = _starpu_lsame_(uplo, "U"); + + *info = 0; + if (! (wantz || _starpu_lsame_(jobz, "N"))) { + *info = -1; + } else if (! (upper || _starpu_lsame_(uplo, "L"))) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*ka < 0) { + *info = -4; + } else if (*kb < 0 || *kb > *ka) { + *info = -5; + } else if (*ldab < *ka + 1) { + *info = -7; + } else if (*ldbb < *kb + 1) { + *info = -9; + } else if (*ldz < 1 || wantz && *ldz < *n) { + *info = -12; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DSBGV ", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Form a split Cholesky factorization of B. */ + + _starpu_dpbstf_(uplo, n, kb, &bb[bb_offset], ldbb, info); + if (*info != 0) { + *info = *n + *info; + return 0; + } + +/* Transform problem to standard eigenvalue problem. */ + + inde = 1; + indwrk = inde + *n; + _starpu_dsbgst_(jobz, uplo, n, ka, kb, &ab[ab_offset], ldab, &bb[bb_offset], ldbb, + &z__[z_offset], ldz, &work[indwrk], &iinfo) + ; + +/* Reduce to tridiagonal form. */ + + if (wantz) { + *(unsigned char *)vect = 'U'; + } else { + *(unsigned char *)vect = 'N'; + } + _starpu_dsbtrd_(vect, uplo, n, ka, &ab[ab_offset], ldab, &w[1], &work[inde], &z__[ + z_offset], ldz, &work[indwrk], &iinfo); + +/* For eigenvalues only, call DSTERF. For eigenvectors, call SSTEQR. */ + + if (! wantz) { + _starpu_dsterf_(n, &w[1], &work[inde], info); + } else { + _starpu_dsteqr_(jobz, n, &w[1], &work[inde], &z__[z_offset], ldz, &work[ + indwrk], info); + } + return 0; + +/* End of DSBGV */ + +} /* _starpu_dsbgv_ */ diff --git a/min-dgels/base/SRC/dsbgvd.c b/min-dgels/base/SRC/dsbgvd.c new file mode 100644 index 0000000..a0bc93d --- /dev/null +++ b/min-dgels/base/SRC/dsbgvd.c @@ -0,0 +1,327 @@ +/* dsbgvd.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static doublereal c_b12 = 1.; +static doublereal c_b13 = 0.; + +/* Subroutine */ int _starpu_dsbgvd_(char *jobz, char *uplo, integer *n, integer *ka, + integer *kb, doublereal *ab, integer *ldab, doublereal *bb, integer * + ldbb, doublereal *w, doublereal *z__, integer *ldz, doublereal *work, + integer *lwork, integer *iwork, integer *liwork, integer *info) +{ + /* System generated locals */ + integer ab_dim1, ab_offset, bb_dim1, bb_offset, z_dim1, z_offset, i__1; + + /* Local variables */ + integer inde; + char vect[1]; + extern /* Subroutine */ int _starpu_dgemm_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *); + extern logical _starpu_lsame_(char *, char *); + integer iinfo, lwmin; + logical upper, wantz; + integer indwk2, llwrk2; + extern /* Subroutine */ int _starpu_dstedc_(char *, integer *, doublereal *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + integer *, integer *, integer *), _starpu_dlacpy_(char *, integer + *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_xerbla_(char *, integer *), _starpu_dpbstf_(char *, + integer *, integer *, doublereal *, integer *, integer *), + _starpu_dsbtrd_(char *, char *, integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, doublereal *, integer *, + doublereal *, integer *), _starpu_dsbgst_(char *, char *, + integer *, integer *, integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *), _starpu_dsterf_(integer *, doublereal *, + doublereal *, integer *); + integer indwrk, liwmin; + logical lquery; + + +/* -- LAPACK driver routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DSBGVD computes all the eigenvalues, and optionally, the eigenvectors */ +/* of a real generalized symmetric-definite banded eigenproblem, of the */ +/* form A*x=(lambda)*B*x. Here A and B are assumed to be symmetric and */ +/* banded, and B is also positive definite. If eigenvectors are */ +/* desired, it uses a divide and conquer algorithm. */ + +/* The divide and conquer algorithm makes very mild assumptions about */ +/* floating point arithmetic. It will work on machines with a guard */ +/* digit in add/subtract, or on those binary machines without guard */ +/* digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or */ +/* Cray-2. It could conceivably fail on hexadecimal or decimal machines */ +/* without guard digits, but we know of none. */ + +/* Arguments */ +/* ========= */ + +/* JOBZ (input) CHARACTER*1 */ +/* = 'N': Compute eigenvalues only; */ +/* = 'V': Compute eigenvalues and eigenvectors. */ + +/* UPLO (input) CHARACTER*1 */ +/* = 'U': Upper triangles of A and B are stored; */ +/* = 'L': Lower triangles of A and B are stored. */ + +/* N (input) INTEGER */ +/* The order of the matrices A and B. N >= 0. */ + +/* KA (input) INTEGER */ +/* The number of superdiagonals of the matrix A if UPLO = 'U', */ +/* or the number of subdiagonals if UPLO = 'L'. KA >= 0. */ + +/* KB (input) INTEGER */ +/* The number of superdiagonals of the matrix B if UPLO = 'U', */ +/* or the number of subdiagonals if UPLO = 'L'. KB >= 0. */ + +/* AB (input/output) DOUBLE PRECISION array, dimension (LDAB, N) */ +/* On entry, the upper or lower triangle of the symmetric band */ +/* matrix A, stored in the first ka+1 rows of the array. The */ +/* j-th column of A is stored in the j-th column of the array AB */ +/* as follows: */ +/* if UPLO = 'U', AB(ka+1+i-j,j) = A(i,j) for max(1,j-ka)<=i<=j; */ +/* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+ka). */ + +/* On exit, the contents of AB are destroyed. */ + +/* LDAB (input) INTEGER */ +/* The leading dimension of the array AB. LDAB >= KA+1. */ + +/* BB (input/output) DOUBLE PRECISION array, dimension (LDBB, N) */ +/* On entry, the upper or lower triangle of the symmetric band */ +/* matrix B, stored in the first kb+1 rows of the array. The */ +/* j-th column of B is stored in the j-th column of the array BB */ +/* as follows: */ +/* if UPLO = 'U', BB(ka+1+i-j,j) = B(i,j) for max(1,j-kb)<=i<=j; */ +/* if UPLO = 'L', BB(1+i-j,j) = B(i,j) for j<=i<=min(n,j+kb). */ + +/* On exit, the factor S from the split Cholesky factorization */ +/* B = S**T*S, as returned by DPBSTF. */ + +/* LDBB (input) INTEGER */ +/* The leading dimension of the array BB. LDBB >= KB+1. */ + +/* W (output) DOUBLE PRECISION array, dimension (N) */ +/* If INFO = 0, the eigenvalues in ascending order. */ + +/* Z (output) DOUBLE PRECISION array, dimension (LDZ, N) */ +/* If JOBZ = 'V', then if INFO = 0, Z contains the matrix Z of */ +/* eigenvectors, with the i-th column of Z holding the */ +/* eigenvector associated with W(i). The eigenvectors are */ +/* normalized so Z**T*B*Z = I. */ +/* If JOBZ = 'N', then Z is not referenced. */ + +/* LDZ (input) INTEGER */ +/* The leading dimension of the array Z. LDZ >= 1, and if */ +/* JOBZ = 'V', LDZ >= max(1,N). */ + +/* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ + +/* LWORK (input) INTEGER */ +/* The dimension of the array WORK. */ +/* If N <= 1, LWORK >= 1. */ +/* If JOBZ = 'N' and N > 1, LWORK >= 3*N. */ +/* If JOBZ = 'V' and N > 1, LWORK >= 1 + 5*N + 2*N**2. */ + +/* If LWORK = -1, then a workspace query is assumed; the routine */ +/* only calculates the optimal sizes of the WORK and IWORK */ +/* arrays, returns these values as the first entries of the WORK */ +/* and IWORK arrays, and no error message related to LWORK or */ +/* LIWORK is issued by XERBLA. */ + +/* IWORK (workspace/output) INTEGER array, dimension (MAX(1,LIWORK)) */ +/* On exit, if LIWORK > 0, IWORK(1) returns the optimal LIWORK. */ + +/* LIWORK (input) INTEGER */ +/* The dimension of the array IWORK. */ +/* If JOBZ = 'N' or N <= 1, LIWORK >= 1. */ +/* If JOBZ = 'V' and N > 1, LIWORK >= 3 + 5*N. */ + +/* If LIWORK = -1, then a workspace query is assumed; the */ +/* routine only calculates the optimal sizes of the WORK and */ +/* IWORK arrays, returns these values as the first entries of */ +/* the WORK and IWORK arrays, and no error message related to */ +/* LWORK or LIWORK is issued by XERBLA. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > 0: if INFO = i, and i is: */ +/* <= N: the algorithm failed to converge: */ +/* i off-diagonal elements of an intermediate */ +/* tridiagonal form did not converge to zero; */ +/* > N: if INFO = N + i, for 1 <= i <= N, then DPBSTF */ +/* returned INFO = i: B is not positive definite. */ +/* The factorization of B could not be completed and */ +/* no eigenvalues or eigenvectors were computed. */ + +/* Further Details */ +/* =============== */ + +/* Based on contributions by */ +/* Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + ab_dim1 = *ldab; + ab_offset = 1 + ab_dim1; + ab -= ab_offset; + bb_dim1 = *ldbb; + bb_offset = 1 + bb_dim1; + bb -= bb_offset; + --w; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --work; + --iwork; + + /* Function Body */ + wantz = _starpu_lsame_(jobz, "V"); + upper = _starpu_lsame_(uplo, "U"); + lquery = *lwork == -1 || *liwork == -1; + + *info = 0; + if (*n <= 1) { + liwmin = 1; + lwmin = 1; + } else if (wantz) { + liwmin = *n * 5 + 3; +/* Computing 2nd power */ + i__1 = *n; + lwmin = *n * 5 + 1 + (i__1 * i__1 << 1); + } else { + liwmin = 1; + lwmin = *n << 1; + } + + if (! (wantz || _starpu_lsame_(jobz, "N"))) { + *info = -1; + } else if (! (upper || _starpu_lsame_(uplo, "L"))) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*ka < 0) { + *info = -4; + } else if (*kb < 0 || *kb > *ka) { + *info = -5; + } else if (*ldab < *ka + 1) { + *info = -7; + } else if (*ldbb < *kb + 1) { + *info = -9; + } else if (*ldz < 1 || wantz && *ldz < *n) { + *info = -12; + } + + if (*info == 0) { + work[1] = (doublereal) lwmin; + iwork[1] = liwmin; + + if (*lwork < lwmin && ! lquery) { + *info = -14; + } else if (*liwork < liwmin && ! lquery) { + *info = -16; + } + } + + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DSBGVD", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Form a split Cholesky factorization of B. */ + + _starpu_dpbstf_(uplo, n, kb, &bb[bb_offset], ldbb, info); + if (*info != 0) { + *info = *n + *info; + return 0; + } + +/* Transform problem to standard eigenvalue problem. */ + + inde = 1; + indwrk = inde + *n; + indwk2 = indwrk + *n * *n; + llwrk2 = *lwork - indwk2 + 1; + _starpu_dsbgst_(jobz, uplo, n, ka, kb, &ab[ab_offset], ldab, &bb[bb_offset], ldbb, + &z__[z_offset], ldz, &work[indwrk], &iinfo) + ; + +/* Reduce to tridiagonal form. */ + + if (wantz) { + *(unsigned char *)vect = 'U'; + } else { + *(unsigned char *)vect = 'N'; + } + _starpu_dsbtrd_(vect, uplo, n, ka, &ab[ab_offset], ldab, &w[1], &work[inde], &z__[ + z_offset], ldz, &work[indwrk], &iinfo); + +/* For eigenvalues only, call DSTERF. For eigenvectors, call SSTEDC. */ + + if (! wantz) { + _starpu_dsterf_(n, &w[1], &work[inde], info); + } else { + _starpu_dstedc_("I", n, &w[1], &work[inde], &work[indwrk], n, &work[indwk2], & + llwrk2, &iwork[1], liwork, info); + _starpu_dgemm_("N", "N", n, n, n, &c_b12, &z__[z_offset], ldz, &work[indwrk], + n, &c_b13, &work[indwk2], n); + _starpu_dlacpy_("A", n, n, &work[indwk2], n, &z__[z_offset], ldz); + } + + work[1] = (doublereal) lwmin; + iwork[1] = liwmin; + + return 0; + +/* End of DSBGVD */ + +} /* _starpu_dsbgvd_ */ diff --git a/min-dgels/base/SRC/dsbgvx.c b/min-dgels/base/SRC/dsbgvx.c new file mode 100644 index 0000000..5ff52a3 --- /dev/null +++ b/min-dgels/base/SRC/dsbgvx.c @@ -0,0 +1,466 @@ +/* dsbgvx.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static doublereal c_b25 = 1.; +static doublereal c_b27 = 0.; + +/* Subroutine */ int _starpu_dsbgvx_(char *jobz, char *range, char *uplo, integer *n, + integer *ka, integer *kb, doublereal *ab, integer *ldab, doublereal * + bb, integer *ldbb, doublereal *q, integer *ldq, doublereal *vl, + doublereal *vu, integer *il, integer *iu, doublereal *abstol, integer + *m, doublereal *w, doublereal *z__, integer *ldz, doublereal *work, + integer *iwork, integer *ifail, integer *info) +{ + /* System generated locals */ + integer ab_dim1, ab_offset, bb_dim1, bb_offset, q_dim1, q_offset, z_dim1, + z_offset, i__1, i__2; + + /* Local variables */ + integer i__, j, jj; + doublereal tmp1; + integer indd, inde; + char vect[1]; + logical test; + integer itmp1, indee; + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int _starpu_dgemv_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *); + integer iinfo; + char order[1]; + extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *), _starpu_dswap_(integer *, doublereal *, integer + *, doublereal *, integer *); + logical upper, wantz, alleig, indeig; + integer indibl; + logical valeig; + extern /* Subroutine */ int _starpu_dlacpy_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *), + _starpu_xerbla_(char *, integer *), _starpu_dpbstf_(char *, integer *, + integer *, doublereal *, integer *, integer *), _starpu_dsbtrd_( + char *, char *, integer *, integer *, doublereal *, integer *, + doublereal *, doublereal *, doublereal *, integer *, doublereal *, + integer *); + integer indisp; + extern /* Subroutine */ int _starpu_dsbgst_(char *, char *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, integer *), + _starpu_dstein_(integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *, integer *, doublereal *, integer *, + doublereal *, integer *, integer *, integer *); + integer indiwo; + extern /* Subroutine */ int _starpu_dsterf_(integer *, doublereal *, doublereal *, + integer *), _starpu_dstebz_(char *, char *, integer *, doublereal *, + doublereal *, integer *, integer *, doublereal *, doublereal *, + doublereal *, integer *, integer *, doublereal *, integer *, + integer *, doublereal *, integer *, integer *); + integer indwrk; + extern /* Subroutine */ int _starpu_dsteqr_(char *, integer *, doublereal *, + doublereal *, doublereal *, integer *, doublereal *, integer *); + integer nsplit; + + +/* -- LAPACK driver routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DSBGVX computes selected eigenvalues, and optionally, eigenvectors */ +/* of a real generalized symmetric-definite banded eigenproblem, of */ +/* the form A*x=(lambda)*B*x. Here A and B are assumed to be symmetric */ +/* and banded, and B is also positive definite. Eigenvalues and */ +/* eigenvectors can be selected by specifying either all eigenvalues, */ +/* a range of values or a range of indices for the desired eigenvalues. */ + +/* Arguments */ +/* ========= */ + +/* JOBZ (input) CHARACTER*1 */ +/* = 'N': Compute eigenvalues only; */ +/* = 'V': Compute eigenvalues and eigenvectors. */ + +/* RANGE (input) CHARACTER*1 */ +/* = 'A': all eigenvalues will be found. */ +/* = 'V': all eigenvalues in the half-open interval (VL,VU] */ +/* will be found. */ +/* = 'I': the IL-th through IU-th eigenvalues will be found. */ + +/* UPLO (input) CHARACTER*1 */ +/* = 'U': Upper triangles of A and B are stored; */ +/* = 'L': Lower triangles of A and B are stored. */ + +/* N (input) INTEGER */ +/* The order of the matrices A and B. N >= 0. */ + +/* KA (input) INTEGER */ +/* The number of superdiagonals of the matrix A if UPLO = 'U', */ +/* or the number of subdiagonals if UPLO = 'L'. KA >= 0. */ + +/* KB (input) INTEGER */ +/* The number of superdiagonals of the matrix B if UPLO = 'U', */ +/* or the number of subdiagonals if UPLO = 'L'. KB >= 0. */ + +/* AB (input/output) DOUBLE PRECISION array, dimension (LDAB, N) */ +/* On entry, the upper or lower triangle of the symmetric band */ +/* matrix A, stored in the first ka+1 rows of the array. The */ +/* j-th column of A is stored in the j-th column of the array AB */ +/* as follows: */ +/* if UPLO = 'U', AB(ka+1+i-j,j) = A(i,j) for max(1,j-ka)<=i<=j; */ +/* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+ka). */ + +/* On exit, the contents of AB are destroyed. */ + +/* LDAB (input) INTEGER */ +/* The leading dimension of the array AB. LDAB >= KA+1. */ + +/* BB (input/output) DOUBLE PRECISION array, dimension (LDBB, N) */ +/* On entry, the upper or lower triangle of the symmetric band */ +/* matrix B, stored in the first kb+1 rows of the array. The */ +/* j-th column of B is stored in the j-th column of the array BB */ +/* as follows: */ +/* if UPLO = 'U', BB(ka+1+i-j,j) = B(i,j) for max(1,j-kb)<=i<=j; */ +/* if UPLO = 'L', BB(1+i-j,j) = B(i,j) for j<=i<=min(n,j+kb). */ + +/* On exit, the factor S from the split Cholesky factorization */ +/* B = S**T*S, as returned by DPBSTF. */ + +/* LDBB (input) INTEGER */ +/* The leading dimension of the array BB. LDBB >= KB+1. */ + +/* Q (output) DOUBLE PRECISION array, dimension (LDQ, N) */ +/* If JOBZ = 'V', the n-by-n matrix used in the reduction of */ +/* A*x = (lambda)*B*x to standard form, i.e. C*x = (lambda)*x, */ +/* and consequently C to tridiagonal form. */ +/* If JOBZ = 'N', the array Q is not referenced. */ + +/* LDQ (input) INTEGER */ +/* The leading dimension of the array Q. If JOBZ = 'N', */ +/* LDQ >= 1. If JOBZ = 'V', LDQ >= max(1,N). */ + +/* VL (input) DOUBLE PRECISION */ +/* VU (input) DOUBLE PRECISION */ +/* If RANGE='V', the lower and upper bounds of the interval to */ +/* be searched for eigenvalues. VL < VU. */ +/* Not referenced if RANGE = 'A' or 'I'. */ + +/* IL (input) INTEGER */ +/* IU (input) INTEGER */ +/* If RANGE='I', the indices (in ascending order) of the */ +/* smallest and largest eigenvalues to be returned. */ +/* 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. */ +/* Not referenced if RANGE = 'A' or 'V'. */ + +/* ABSTOL (input) DOUBLE PRECISION */ +/* The absolute error tolerance for the eigenvalues. */ +/* An approximate eigenvalue is accepted as converged */ +/* when it is determined to lie in an interval [a,b] */ +/* of width less than or equal to */ + +/* ABSTOL + EPS * max( |a|,|b| ) , */ + +/* where EPS is the machine precision. If ABSTOL is less than */ +/* or equal to zero, then EPS*|T| will be used in its place, */ +/* where |T| is the 1-norm of the tridiagonal matrix obtained */ +/* by reducing A to tridiagonal form. */ + +/* Eigenvalues will be computed most accurately when ABSTOL is */ +/* set to twice the underflow threshold 2*DLAMCH('S'), not zero. */ +/* If this routine returns with INFO>0, indicating that some */ +/* eigenvectors did not converge, try setting ABSTOL to */ +/* 2*DLAMCH('S'). */ + +/* M (output) INTEGER */ +/* The total number of eigenvalues found. 0 <= M <= N. */ +/* If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. */ + +/* W (output) DOUBLE PRECISION array, dimension (N) */ +/* If INFO = 0, the eigenvalues in ascending order. */ + +/* Z (output) DOUBLE PRECISION array, dimension (LDZ, N) */ +/* If JOBZ = 'V', then if INFO = 0, Z contains the matrix Z of */ +/* eigenvectors, with the i-th column of Z holding the */ +/* eigenvector associated with W(i). The eigenvectors are */ +/* normalized so Z**T*B*Z = I. */ +/* If JOBZ = 'N', then Z is not referenced. */ + +/* LDZ (input) INTEGER */ +/* The leading dimension of the array Z. LDZ >= 1, and if */ +/* JOBZ = 'V', LDZ >= max(1,N). */ + +/* WORK (workspace/output) DOUBLE PRECISION array, dimension (7*N) */ + +/* IWORK (workspace/output) INTEGER array, dimension (5*N) */ + +/* IFAIL (output) INTEGER array, dimension (M) */ +/* If JOBZ = 'V', then if INFO = 0, the first M elements of */ +/* IFAIL are zero. If INFO > 0, then IFAIL contains the */ +/* indices of the eigenvalues that failed to converge. */ +/* If JOBZ = 'N', then IFAIL is not referenced. */ + +/* INFO (output) INTEGER */ +/* = 0 : successful exit */ +/* < 0 : if INFO = -i, the i-th argument had an illegal value */ +/* <= N: if INFO = i, then i eigenvectors failed to converge. */ +/* Their indices are stored in IFAIL. */ +/* > N : DPBSTF returned an error code; i.e., */ +/* if INFO = N + i, for 1 <= i <= N, then the leading */ +/* minor of order i of B is not positive definite. */ +/* The factorization of B could not be completed and */ +/* no eigenvalues or eigenvectors were computed. */ + +/* Further Details */ +/* =============== */ + +/* Based on contributions by */ +/* Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + ab_dim1 = *ldab; + ab_offset = 1 + ab_dim1; + ab -= ab_offset; + bb_dim1 = *ldbb; + bb_offset = 1 + bb_dim1; + bb -= bb_offset; + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + --w; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --work; + --iwork; + --ifail; + + /* Function Body */ + wantz = _starpu_lsame_(jobz, "V"); + upper = _starpu_lsame_(uplo, "U"); + alleig = _starpu_lsame_(range, "A"); + valeig = _starpu_lsame_(range, "V"); + indeig = _starpu_lsame_(range, "I"); + + *info = 0; + if (! (wantz || _starpu_lsame_(jobz, "N"))) { + *info = -1; + } else if (! (alleig || valeig || indeig)) { + *info = -2; + } else if (! (upper || _starpu_lsame_(uplo, "L"))) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*ka < 0) { + *info = -5; + } else if (*kb < 0 || *kb > *ka) { + *info = -6; + } else if (*ldab < *ka + 1) { + *info = -8; + } else if (*ldbb < *kb + 1) { + *info = -10; + } else if (*ldq < 1 || wantz && *ldq < *n) { + *info = -12; + } else { + if (valeig) { + if (*n > 0 && *vu <= *vl) { + *info = -14; + } + } else if (indeig) { + if (*il < 1 || *il > max(1,*n)) { + *info = -15; + } else if (*iu < min(*n,*il) || *iu > *n) { + *info = -16; + } + } + } + if (*info == 0) { + if (*ldz < 1 || wantz && *ldz < *n) { + *info = -21; + } + } + + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DSBGVX", &i__1); + return 0; + } + +/* Quick return if possible */ + + *m = 0; + if (*n == 0) { + return 0; + } + +/* Form a split Cholesky factorization of B. */ + + _starpu_dpbstf_(uplo, n, kb, &bb[bb_offset], ldbb, info); + if (*info != 0) { + *info = *n + *info; + return 0; + } + +/* Transform problem to standard eigenvalue problem. */ + + _starpu_dsbgst_(jobz, uplo, n, ka, kb, &ab[ab_offset], ldab, &bb[bb_offset], ldbb, + &q[q_offset], ldq, &work[1], &iinfo); + +/* Reduce symmetric band matrix to tridiagonal form. */ + + indd = 1; + inde = indd + *n; + indwrk = inde + *n; + if (wantz) { + *(unsigned char *)vect = 'U'; + } else { + *(unsigned char *)vect = 'N'; + } + _starpu_dsbtrd_(vect, uplo, n, ka, &ab[ab_offset], ldab, &work[indd], &work[inde], + &q[q_offset], ldq, &work[indwrk], &iinfo); + +/* If all eigenvalues are desired and ABSTOL is less than or equal */ +/* to zero, then call DSTERF or SSTEQR. If this fails for some */ +/* eigenvalue, then try DSTEBZ. */ + + test = FALSE_; + if (indeig) { + if (*il == 1 && *iu == *n) { + test = TRUE_; + } + } + if ((alleig || test) && *abstol <= 0.) { + _starpu_dcopy_(n, &work[indd], &c__1, &w[1], &c__1); + indee = indwrk + (*n << 1); + i__1 = *n - 1; + _starpu_dcopy_(&i__1, &work[inde], &c__1, &work[indee], &c__1); + if (! wantz) { + _starpu_dsterf_(n, &w[1], &work[indee], info); + } else { + _starpu_dlacpy_("A", n, n, &q[q_offset], ldq, &z__[z_offset], ldz); + _starpu_dsteqr_(jobz, n, &w[1], &work[indee], &z__[z_offset], ldz, &work[ + indwrk], info); + if (*info == 0) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + ifail[i__] = 0; +/* L10: */ + } + } + } + if (*info == 0) { + *m = *n; + goto L30; + } + *info = 0; + } + +/* Otherwise, call DSTEBZ and, if eigenvectors are desired, */ +/* call DSTEIN. */ + + if (wantz) { + *(unsigned char *)order = 'B'; + } else { + *(unsigned char *)order = 'E'; + } + indibl = 1; + indisp = indibl + *n; + indiwo = indisp + *n; + _starpu_dstebz_(range, order, n, vl, vu, il, iu, abstol, &work[indd], &work[inde], + m, &nsplit, &w[1], &iwork[indibl], &iwork[indisp], &work[indwrk], + &iwork[indiwo], info); + + if (wantz) { + _starpu_dstein_(n, &work[indd], &work[inde], m, &w[1], &iwork[indibl], &iwork[ + indisp], &z__[z_offset], ldz, &work[indwrk], &iwork[indiwo], & + ifail[1], info); + +/* Apply transformation matrix used in reduction to tridiagonal */ +/* form to eigenvectors returned by DSTEIN. */ + + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + _starpu_dcopy_(n, &z__[j * z_dim1 + 1], &c__1, &work[1], &c__1); + _starpu_dgemv_("N", n, n, &c_b25, &q[q_offset], ldq, &work[1], &c__1, & + c_b27, &z__[j * z_dim1 + 1], &c__1); +/* L20: */ + } + } + +L30: + +/* If eigenvalues are not in order, then sort them, along with */ +/* eigenvectors. */ + + if (wantz) { + i__1 = *m - 1; + for (j = 1; j <= i__1; ++j) { + i__ = 0; + tmp1 = w[j]; + i__2 = *m; + for (jj = j + 1; jj <= i__2; ++jj) { + if (w[jj] < tmp1) { + i__ = jj; + tmp1 = w[jj]; + } +/* L40: */ + } + + if (i__ != 0) { + itmp1 = iwork[indibl + i__ - 1]; + w[i__] = w[j]; + iwork[indibl + i__ - 1] = iwork[indibl + j - 1]; + w[j] = tmp1; + iwork[indibl + j - 1] = itmp1; + _starpu_dswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[j * z_dim1 + 1], + &c__1); + if (*info != 0) { + itmp1 = ifail[i__]; + ifail[i__] = ifail[j]; + ifail[j] = itmp1; + } + } +/* L50: */ + } + } + + return 0; + +/* End of DSBGVX */ + +} /* _starpu_dsbgvx_ */ diff --git a/min-dgels/base/SRC/dsbtrd.c b/min-dgels/base/SRC/dsbtrd.c new file mode 100644 index 0000000..160423d --- /dev/null +++ b/min-dgels/base/SRC/dsbtrd.c @@ -0,0 +1,713 @@ +/* dsbtrd.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static doublereal c_b9 = 0.; +static doublereal c_b10 = 1.; +static integer c__1 = 1; + +/* Subroutine */ int _starpu_dsbtrd_(char *vect, char *uplo, integer *n, integer *kd, + doublereal *ab, integer *ldab, doublereal *d__, doublereal *e, + doublereal *q, integer *ldq, doublereal *work, integer *info) +{ + /* System generated locals */ + integer ab_dim1, ab_offset, q_dim1, q_offset, i__1, i__2, i__3, i__4, + i__5; + + /* Local variables */ + integer i__, j, k, l, i2, j1, j2, nq, nr, kd1, ibl, iqb, kdn, jin, nrt, + kdm1, inca, jend, lend, jinc, incx, last; + doublereal temp; + extern /* Subroutine */ int _starpu_drot_(integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, doublereal *); + integer j1end, j1inc, iqend; + extern logical _starpu_lsame_(char *, char *); + logical initq, wantq, upper; + extern /* Subroutine */ int _starpu_dlar2v_(integer *, doublereal *, doublereal *, + doublereal *, integer *, doublereal *, doublereal *, integer *); + integer iqaend; + extern /* Subroutine */ int _starpu_dlaset_(char *, integer *, integer *, + doublereal *, doublereal *, doublereal *, integer *), + _starpu_dlartg_(doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *), _starpu_xerbla_(char *, integer *), _starpu_dlargv_( + integer *, doublereal *, integer *, doublereal *, integer *, + doublereal *, integer *), _starpu_dlartv_(integer *, doublereal *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + integer *); + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DSBTRD reduces a real symmetric band matrix A to symmetric */ +/* tridiagonal form T by an orthogonal similarity transformation: */ +/* Q**T * A * Q = T. */ + +/* Arguments */ +/* ========= */ + +/* VECT (input) CHARACTER*1 */ +/* = 'N': do not form Q; */ +/* = 'V': form Q; */ +/* = 'U': update a matrix X, by forming X*Q. */ + +/* UPLO (input) CHARACTER*1 */ +/* = 'U': Upper triangle of A is stored; */ +/* = 'L': Lower triangle of A is stored. */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* KD (input) INTEGER */ +/* The number of superdiagonals of the matrix A if UPLO = 'U', */ +/* or the number of subdiagonals if UPLO = 'L'. KD >= 0. */ + +/* AB (input/output) DOUBLE PRECISION array, dimension (LDAB,N) */ +/* On entry, the upper or lower triangle of the symmetric band */ +/* matrix A, stored in the first KD+1 rows of the array. The */ +/* j-th column of A is stored in the j-th column of the array AB */ +/* as follows: */ +/* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; */ +/* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). */ +/* On exit, the diagonal elements of AB are overwritten by the */ +/* diagonal elements of the tridiagonal matrix T; if KD > 0, the */ +/* elements on the first superdiagonal (if UPLO = 'U') or the */ +/* first subdiagonal (if UPLO = 'L') are overwritten by the */ +/* off-diagonal elements of T; the rest of AB is overwritten by */ +/* values generated during the reduction. */ + +/* LDAB (input) INTEGER */ +/* The leading dimension of the array AB. LDAB >= KD+1. */ + +/* D (output) DOUBLE PRECISION array, dimension (N) */ +/* The diagonal elements of the tridiagonal matrix T. */ + +/* E (output) DOUBLE PRECISION array, dimension (N-1) */ +/* The off-diagonal elements of the tridiagonal matrix T: */ +/* E(i) = T(i,i+1) if UPLO = 'U'; E(i) = T(i+1,i) if UPLO = 'L'. */ + +/* Q (input/output) DOUBLE PRECISION array, dimension (LDQ,N) */ +/* On entry, if VECT = 'U', then Q must contain an N-by-N */ +/* matrix X; if VECT = 'N' or 'V', then Q need not be set. */ + +/* On exit: */ +/* if VECT = 'V', Q contains the N-by-N orthogonal matrix Q; */ +/* if VECT = 'U', Q contains the product X*Q; */ +/* if VECT = 'N', the array Q is not referenced. */ + +/* LDQ (input) INTEGER */ +/* The leading dimension of the array Q. */ +/* LDQ >= 1, and LDQ >= N if VECT = 'V' or 'U'. */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (N) */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ + +/* Further Details */ +/* =============== */ + +/* Modified by Linda Kaufman, Bell Labs. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters */ + + /* Parameter adjustments */ + ab_dim1 = *ldab; + ab_offset = 1 + ab_dim1; + ab -= ab_offset; + --d__; + --e; + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + --work; + + /* Function Body */ + initq = _starpu_lsame_(vect, "V"); + wantq = initq || _starpu_lsame_(vect, "U"); + upper = _starpu_lsame_(uplo, "U"); + kd1 = *kd + 1; + kdm1 = *kd - 1; + incx = *ldab - 1; + iqend = 1; + + *info = 0; + if (! wantq && ! _starpu_lsame_(vect, "N")) { + *info = -1; + } else if (! upper && ! _starpu_lsame_(uplo, "L")) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*kd < 0) { + *info = -4; + } else if (*ldab < kd1) { + *info = -6; + } else if (*ldq < max(1,*n) && wantq) { + *info = -10; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DSBTRD", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Initialize Q to the unit matrix, if needed */ + + if (initq) { + _starpu_dlaset_("Full", n, n, &c_b9, &c_b10, &q[q_offset], ldq); + } + +/* Wherever possible, plane rotations are generated and applied in */ +/* vector operations of length NR over the index set J1:J2:KD1. */ + +/* The cosines and sines of the plane rotations are stored in the */ +/* arrays D and WORK. */ + + inca = kd1 * *ldab; +/* Computing MIN */ + i__1 = *n - 1; + kdn = min(i__1,*kd); + if (upper) { + + if (*kd > 1) { + +/* Reduce to tridiagonal form, working with upper triangle */ + + nr = 0; + j1 = kdn + 2; + j2 = 1; + + i__1 = *n - 2; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Reduce i-th row of matrix to tridiagonal form */ + + for (k = kdn + 1; k >= 2; --k) { + j1 += kdn; + j2 += kdn; + + if (nr > 0) { + +/* generate plane rotations to annihilate nonzero */ +/* elements which have been created outside the band */ + + _starpu_dlargv_(&nr, &ab[(j1 - 1) * ab_dim1 + 1], &inca, & + work[j1], &kd1, &d__[j1], &kd1); + +/* apply rotations from the right */ + + +/* Dependent on the the number of diagonals either */ +/* DLARTV or DROT is used */ + + if (nr >= (*kd << 1) - 1) { + i__2 = *kd - 1; + for (l = 1; l <= i__2; ++l) { + _starpu_dlartv_(&nr, &ab[l + 1 + (j1 - 1) * ab_dim1], + &inca, &ab[l + j1 * ab_dim1], &inca, & + d__[j1], &work[j1], &kd1); +/* L10: */ + } + + } else { + jend = j1 + (nr - 1) * kd1; + i__2 = jend; + i__3 = kd1; + for (jinc = j1; i__3 < 0 ? jinc >= i__2 : jinc <= + i__2; jinc += i__3) { + _starpu_drot_(&kdm1, &ab[(jinc - 1) * ab_dim1 + 2], & + c__1, &ab[jinc * ab_dim1 + 1], &c__1, + &d__[jinc], &work[jinc]); +/* L20: */ + } + } + } + + + if (k > 2) { + if (k <= *n - i__ + 1) { + +/* generate plane rotation to annihilate a(i,i+k-1) */ +/* within the band */ + + _starpu_dlartg_(&ab[*kd - k + 3 + (i__ + k - 2) * ab_dim1] +, &ab[*kd - k + 2 + (i__ + k - 1) * + ab_dim1], &d__[i__ + k - 1], &work[i__ + + k - 1], &temp); + ab[*kd - k + 3 + (i__ + k - 2) * ab_dim1] = temp; + +/* apply rotation from the right */ + + i__3 = k - 3; + _starpu_drot_(&i__3, &ab[*kd - k + 4 + (i__ + k - 2) * + ab_dim1], &c__1, &ab[*kd - k + 3 + (i__ + + k - 1) * ab_dim1], &c__1, &d__[i__ + k - + 1], &work[i__ + k - 1]); + } + ++nr; + j1 = j1 - kdn - 1; + } + +/* apply plane rotations from both sides to diagonal */ +/* blocks */ + + if (nr > 0) { + _starpu_dlar2v_(&nr, &ab[kd1 + (j1 - 1) * ab_dim1], &ab[kd1 + + j1 * ab_dim1], &ab[*kd + j1 * ab_dim1], &inca, + &d__[j1], &work[j1], &kd1); + } + +/* apply plane rotations from the left */ + + if (nr > 0) { + if ((*kd << 1) - 1 < nr) { + +/* Dependent on the the number of diagonals either */ +/* DLARTV or DROT is used */ + + i__3 = *kd - 1; + for (l = 1; l <= i__3; ++l) { + if (j2 + l > *n) { + nrt = nr - 1; + } else { + nrt = nr; + } + if (nrt > 0) { + _starpu_dlartv_(&nrt, &ab[*kd - l + (j1 + l) * + ab_dim1], &inca, &ab[*kd - l + 1 + + (j1 + l) * ab_dim1], &inca, & + d__[j1], &work[j1], &kd1); + } +/* L30: */ + } + } else { + j1end = j1 + kd1 * (nr - 2); + if (j1end >= j1) { + i__3 = j1end; + i__2 = kd1; + for (jin = j1; i__2 < 0 ? jin >= i__3 : jin <= + i__3; jin += i__2) { + i__4 = *kd - 1; + _starpu_drot_(&i__4, &ab[*kd - 1 + (jin + 1) * + ab_dim1], &incx, &ab[*kd + (jin + + 1) * ab_dim1], &incx, &d__[jin], & + work[jin]); +/* L40: */ + } + } +/* Computing MIN */ + i__2 = kdm1, i__3 = *n - j2; + lend = min(i__2,i__3); + last = j1end + kd1; + if (lend > 0) { + _starpu_drot_(&lend, &ab[*kd - 1 + (last + 1) * + ab_dim1], &incx, &ab[*kd + (last + 1) + * ab_dim1], &incx, &d__[last], &work[ + last]); + } + } + } + + if (wantq) { + +/* accumulate product of plane rotations in Q */ + + if (initq) { + +/* take advantage of the fact that Q was */ +/* initially the Identity matrix */ + + iqend = max(iqend,j2); +/* Computing MAX */ + i__2 = 0, i__3 = k - 3; + i2 = max(i__2,i__3); + iqaend = i__ * *kd + 1; + if (k == 2) { + iqaend += *kd; + } + iqaend = min(iqaend,iqend); + i__2 = j2; + i__3 = kd1; + for (j = j1; i__3 < 0 ? j >= i__2 : j <= i__2; j + += i__3) { + ibl = i__ - i2 / kdm1; + ++i2; +/* Computing MAX */ + i__4 = 1, i__5 = j - ibl; + iqb = max(i__4,i__5); + nq = iqaend + 1 - iqb; +/* Computing MIN */ + i__4 = iqaend + *kd; + iqaend = min(i__4,iqend); + _starpu_drot_(&nq, &q[iqb + (j - 1) * q_dim1], &c__1, + &q[iqb + j * q_dim1], &c__1, &d__[j], + &work[j]); +/* L50: */ + } + } else { + + i__3 = j2; + i__2 = kd1; + for (j = j1; i__2 < 0 ? j >= i__3 : j <= i__3; j + += i__2) { + _starpu_drot_(n, &q[(j - 1) * q_dim1 + 1], &c__1, &q[ + j * q_dim1 + 1], &c__1, &d__[j], & + work[j]); +/* L60: */ + } + } + + } + + if (j2 + kdn > *n) { + +/* adjust J2 to keep within the bounds of the matrix */ + + --nr; + j2 = j2 - kdn - 1; + } + + i__2 = j2; + i__3 = kd1; + for (j = j1; i__3 < 0 ? j >= i__2 : j <= i__2; j += i__3) + { + +/* create nonzero element a(j-1,j+kd) outside the band */ +/* and store it in WORK */ + + work[j + *kd] = work[j] * ab[(j + *kd) * ab_dim1 + 1]; + ab[(j + *kd) * ab_dim1 + 1] = d__[j] * ab[(j + *kd) * + ab_dim1 + 1]; +/* L70: */ + } +/* L80: */ + } +/* L90: */ + } + } + + if (*kd > 0) { + +/* copy off-diagonal elements to E */ + + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + e[i__] = ab[*kd + (i__ + 1) * ab_dim1]; +/* L100: */ + } + } else { + +/* set E to zero if original matrix was diagonal */ + + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + e[i__] = 0.; +/* L110: */ + } + } + +/* copy diagonal elements to D */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + d__[i__] = ab[kd1 + i__ * ab_dim1]; +/* L120: */ + } + + } else { + + if (*kd > 1) { + +/* Reduce to tridiagonal form, working with lower triangle */ + + nr = 0; + j1 = kdn + 2; + j2 = 1; + + i__1 = *n - 2; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Reduce i-th column of matrix to tridiagonal form */ + + for (k = kdn + 1; k >= 2; --k) { + j1 += kdn; + j2 += kdn; + + if (nr > 0) { + +/* generate plane rotations to annihilate nonzero */ +/* elements which have been created outside the band */ + + _starpu_dlargv_(&nr, &ab[kd1 + (j1 - kd1) * ab_dim1], &inca, & + work[j1], &kd1, &d__[j1], &kd1); + +/* apply plane rotations from one side */ + + +/* Dependent on the the number of diagonals either */ +/* DLARTV or DROT is used */ + + if (nr > (*kd << 1) - 1) { + i__3 = *kd - 1; + for (l = 1; l <= i__3; ++l) { + _starpu_dlartv_(&nr, &ab[kd1 - l + (j1 - kd1 + l) * + ab_dim1], &inca, &ab[kd1 - l + 1 + ( + j1 - kd1 + l) * ab_dim1], &inca, &d__[ + j1], &work[j1], &kd1); +/* L130: */ + } + } else { + jend = j1 + kd1 * (nr - 1); + i__3 = jend; + i__2 = kd1; + for (jinc = j1; i__2 < 0 ? jinc >= i__3 : jinc <= + i__3; jinc += i__2) { + _starpu_drot_(&kdm1, &ab[*kd + (jinc - *kd) * ab_dim1] +, &incx, &ab[kd1 + (jinc - *kd) * + ab_dim1], &incx, &d__[jinc], &work[ + jinc]); +/* L140: */ + } + } + + } + + if (k > 2) { + if (k <= *n - i__ + 1) { + +/* generate plane rotation to annihilate a(i+k-1,i) */ +/* within the band */ + + _starpu_dlartg_(&ab[k - 1 + i__ * ab_dim1], &ab[k + i__ * + ab_dim1], &d__[i__ + k - 1], &work[i__ + + k - 1], &temp); + ab[k - 1 + i__ * ab_dim1] = temp; + +/* apply rotation from the left */ + + i__2 = k - 3; + i__3 = *ldab - 1; + i__4 = *ldab - 1; + _starpu_drot_(&i__2, &ab[k - 2 + (i__ + 1) * ab_dim1], & + i__3, &ab[k - 1 + (i__ + 1) * ab_dim1], & + i__4, &d__[i__ + k - 1], &work[i__ + k - + 1]); + } + ++nr; + j1 = j1 - kdn - 1; + } + +/* apply plane rotations from both sides to diagonal */ +/* blocks */ + + if (nr > 0) { + _starpu_dlar2v_(&nr, &ab[(j1 - 1) * ab_dim1 + 1], &ab[j1 * + ab_dim1 + 1], &ab[(j1 - 1) * ab_dim1 + 2], & + inca, &d__[j1], &work[j1], &kd1); + } + +/* apply plane rotations from the right */ + + +/* Dependent on the the number of diagonals either */ +/* DLARTV or DROT is used */ + + if (nr > 0) { + if (nr > (*kd << 1) - 1) { + i__2 = *kd - 1; + for (l = 1; l <= i__2; ++l) { + if (j2 + l > *n) { + nrt = nr - 1; + } else { + nrt = nr; + } + if (nrt > 0) { + _starpu_dlartv_(&nrt, &ab[l + 2 + (j1 - 1) * + ab_dim1], &inca, &ab[l + 1 + j1 * + ab_dim1], &inca, &d__[j1], &work[ + j1], &kd1); + } +/* L150: */ + } + } else { + j1end = j1 + kd1 * (nr - 2); + if (j1end >= j1) { + i__2 = j1end; + i__3 = kd1; + for (j1inc = j1; i__3 < 0 ? j1inc >= i__2 : + j1inc <= i__2; j1inc += i__3) { + _starpu_drot_(&kdm1, &ab[(j1inc - 1) * ab_dim1 + + 3], &c__1, &ab[j1inc * ab_dim1 + + 2], &c__1, &d__[j1inc], &work[ + j1inc]); +/* L160: */ + } + } +/* Computing MIN */ + i__3 = kdm1, i__2 = *n - j2; + lend = min(i__3,i__2); + last = j1end + kd1; + if (lend > 0) { + _starpu_drot_(&lend, &ab[(last - 1) * ab_dim1 + 3], & + c__1, &ab[last * ab_dim1 + 2], &c__1, + &d__[last], &work[last]); + } + } + } + + + + if (wantq) { + +/* accumulate product of plane rotations in Q */ + + if (initq) { + +/* take advantage of the fact that Q was */ +/* initially the Identity matrix */ + + iqend = max(iqend,j2); +/* Computing MAX */ + i__3 = 0, i__2 = k - 3; + i2 = max(i__3,i__2); + iqaend = i__ * *kd + 1; + if (k == 2) { + iqaend += *kd; + } + iqaend = min(iqaend,iqend); + i__3 = j2; + i__2 = kd1; + for (j = j1; i__2 < 0 ? j >= i__3 : j <= i__3; j + += i__2) { + ibl = i__ - i2 / kdm1; + ++i2; +/* Computing MAX */ + i__4 = 1, i__5 = j - ibl; + iqb = max(i__4,i__5); + nq = iqaend + 1 - iqb; +/* Computing MIN */ + i__4 = iqaend + *kd; + iqaend = min(i__4,iqend); + _starpu_drot_(&nq, &q[iqb + (j - 1) * q_dim1], &c__1, + &q[iqb + j * q_dim1], &c__1, &d__[j], + &work[j]); +/* L170: */ + } + } else { + + i__2 = j2; + i__3 = kd1; + for (j = j1; i__3 < 0 ? j >= i__2 : j <= i__2; j + += i__3) { + _starpu_drot_(n, &q[(j - 1) * q_dim1 + 1], &c__1, &q[ + j * q_dim1 + 1], &c__1, &d__[j], & + work[j]); +/* L180: */ + } + } + } + + if (j2 + kdn > *n) { + +/* adjust J2 to keep within the bounds of the matrix */ + + --nr; + j2 = j2 - kdn - 1; + } + + i__3 = j2; + i__2 = kd1; + for (j = j1; i__2 < 0 ? j >= i__3 : j <= i__3; j += i__2) + { + +/* create nonzero element a(j+kd,j-1) outside the */ +/* band and store it in WORK */ + + work[j + *kd] = work[j] * ab[kd1 + j * ab_dim1]; + ab[kd1 + j * ab_dim1] = d__[j] * ab[kd1 + j * ab_dim1] + ; +/* L190: */ + } +/* L200: */ + } +/* L210: */ + } + } + + if (*kd > 0) { + +/* copy off-diagonal elements to E */ + + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + e[i__] = ab[i__ * ab_dim1 + 2]; +/* L220: */ + } + } else { + +/* set E to zero if original matrix was diagonal */ + + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + e[i__] = 0.; +/* L230: */ + } + } + +/* copy diagonal elements to D */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + d__[i__] = ab[i__ * ab_dim1 + 1]; +/* L240: */ + } + } + + return 0; + +/* End of DSBTRD */ + +} /* _starpu_dsbtrd_ */ diff --git a/min-dgels/base/SRC/dsfrk.c b/min-dgels/base/SRC/dsfrk.c new file mode 100644 index 0000000..2ef5acf --- /dev/null +++ b/min-dgels/base/SRC/dsfrk.c @@ -0,0 +1,517 @@ +/* dsfrk.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dsfrk_(char *transr, char *uplo, char *trans, integer *n, + integer *k, doublereal *alpha, doublereal *a, integer *lda, + doublereal *beta, doublereal *c__) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1; + + /* Local variables */ + integer j, n1, n2, nk, info; + logical normaltransr; + extern /* Subroutine */ int _starpu_dgemm_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *); + extern logical _starpu_lsame_(char *, char *); + integer nrowa; + logical lower; + extern /* Subroutine */ int _starpu_dsyrk_(char *, char *, integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, doublereal *, + integer *), _starpu_xerbla_(char *, integer *); + logical nisodd, notrans; + + +/* -- LAPACK routine (version 3.2) -- */ + +/* -- Contributed by Julien Langou of the Univ. of Colorado Denver -- */ +/* -- November 2008 -- */ + +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. */ +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* Level 3 BLAS like routine for C in RFP Format. */ + +/* DSFRK performs one of the symmetric rank--k operations */ + +/* C := alpha*A*A' + beta*C, */ + +/* or */ + +/* C := alpha*A'*A + beta*C, */ + +/* where alpha and beta are real scalars, C is an n--by--n symmetric */ +/* matrix and A is an n--by--k matrix in the first case and a k--by--n */ +/* matrix in the second case. */ + +/* Arguments */ +/* ========== */ + +/* TRANSR (input) CHARACTER */ +/* = 'N': The Normal Form of RFP A is stored; */ +/* = 'T': The Transpose Form of RFP A is stored. */ + +/* UPLO - (input) CHARACTER */ +/* On entry, UPLO specifies whether the upper or lower */ +/* triangular part of the array C is to be referenced as */ +/* follows: */ + +/* UPLO = 'U' or 'u' Only the upper triangular part of C */ +/* is to be referenced. */ + +/* UPLO = 'L' or 'l' Only the lower triangular part of C */ +/* is to be referenced. */ + +/* Unchanged on exit. */ + +/* TRANS - (input) CHARACTER */ +/* On entry, TRANS specifies the operation to be performed as */ +/* follows: */ + +/* TRANS = 'N' or 'n' C := alpha*A*A' + beta*C. */ + +/* TRANS = 'T' or 't' C := alpha*A'*A + beta*C. */ + +/* Unchanged on exit. */ + +/* N - (input) INTEGER. */ +/* On entry, N specifies the order of the matrix C. N must be */ +/* at least zero. */ +/* Unchanged on exit. */ + +/* K - (input) INTEGER. */ +/* On entry with TRANS = 'N' or 'n', K specifies the number */ +/* of columns of the matrix A, and on entry with TRANS = 'T' */ +/* or 't', K specifies the number of rows of the matrix A. K */ +/* must be at least zero. */ +/* Unchanged on exit. */ + +/* ALPHA - (input) DOUBLE PRECISION. */ +/* On entry, ALPHA specifies the scalar alpha. */ +/* Unchanged on exit. */ + +/* A - (input) DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where KA */ +/* is K when TRANS = 'N' or 'n', and is N otherwise. Before */ +/* entry with TRANS = 'N' or 'n', the leading N--by--K part of */ +/* the array A must contain the matrix A, otherwise the leading */ +/* K--by--N part of the array A must contain the matrix A. */ +/* Unchanged on exit. */ + +/* LDA - (input) INTEGER. */ +/* On entry, LDA specifies the first dimension of A as declared */ +/* in the calling (sub) program. When TRANS = 'N' or 'n' */ +/* then LDA must be at least max( 1, n ), otherwise LDA must */ +/* be at least max( 1, k ). */ +/* Unchanged on exit. */ + +/* BETA - (input) DOUBLE PRECISION. */ +/* On entry, BETA specifies the scalar beta. */ +/* Unchanged on exit. */ + + +/* C - (input/output) DOUBLE PRECISION array, dimension ( NT ); */ +/* NT = N*(N+1)/2. On entry, the symmetric matrix C in RFP */ +/* Format. RFP Format is described by TRANSR, UPLO and N. */ + +/* Arguments */ +/* ========== */ + +/* .. */ +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --c__; + + /* Function Body */ + info = 0; + normaltransr = _starpu_lsame_(transr, "N"); + lower = _starpu_lsame_(uplo, "L"); + notrans = _starpu_lsame_(trans, "N"); + + if (notrans) { + nrowa = *n; + } else { + nrowa = *k; + } + + if (! normaltransr && ! _starpu_lsame_(transr, "T")) { + info = -1; + } else if (! lower && ! _starpu_lsame_(uplo, "U")) { + info = -2; + } else if (! notrans && ! _starpu_lsame_(trans, "T")) { + info = -3; + } else if (*n < 0) { + info = -4; + } else if (*k < 0) { + info = -5; + } else if (*lda < max(1,nrowa)) { + info = -8; + } + if (info != 0) { + i__1 = -info; + _starpu_xerbla_("DSFRK ", &i__1); + return 0; + } + +/* Quick return if possible. */ + +/* The quick return case: ((ALPHA.EQ.0).AND.(BETA.NE.ZERO)) is not */ +/* done (it is in DSYRK for example) and left in the general case. */ + + if (*n == 0 || (*alpha == 0. || *k == 0) && *beta == 1.) { + return 0; + } + + if (*alpha == 0. && *beta == 0.) { + i__1 = *n * (*n + 1) / 2; + for (j = 1; j <= i__1; ++j) { + c__[j] = 0.; + } + return 0; + } + +/* C is N-by-N. */ +/* If N is odd, set NISODD = .TRUE., and N1 and N2. */ +/* If N is even, NISODD = .FALSE., and NK. */ + + if (*n % 2 == 0) { + nisodd = FALSE_; + nk = *n / 2; + } else { + nisodd = TRUE_; + if (lower) { + n2 = *n / 2; + n1 = *n - n2; + } else { + n1 = *n / 2; + n2 = *n - n1; + } + } + + if (nisodd) { + +/* N is odd */ + + if (normaltransr) { + +/* N is odd and TRANSR = 'N' */ + + if (lower) { + +/* N is odd, TRANSR = 'N', and UPLO = 'L' */ + + if (notrans) { + +/* N is odd, TRANSR = 'N', UPLO = 'L', and TRANS = 'N' */ + + _starpu_dsyrk_("L", "N", &n1, k, alpha, &a[a_dim1 + 1], lda, beta, + &c__[1], n); + _starpu_dsyrk_("U", "N", &n2, k, alpha, &a[n1 + 1 + a_dim1], lda, + beta, &c__[*n + 1], n); + _starpu_dgemm_("N", "T", &n2, &n1, k, alpha, &a[n1 + 1 + a_dim1], + lda, &a[a_dim1 + 1], lda, beta, &c__[n1 + 1], n); + + } else { + +/* N is odd, TRANSR = 'N', UPLO = 'L', and TRANS = 'T' */ + + _starpu_dsyrk_("L", "T", &n1, k, alpha, &a[a_dim1 + 1], lda, beta, + &c__[1], n); + _starpu_dsyrk_("U", "T", &n2, k, alpha, &a[(n1 + 1) * a_dim1 + 1], + lda, beta, &c__[*n + 1], n) + ; + _starpu_dgemm_("T", "N", &n2, &n1, k, alpha, &a[(n1 + 1) * a_dim1 + + 1], lda, &a[a_dim1 + 1], lda, beta, &c__[n1 + 1] +, n); + + } + + } else { + +/* N is odd, TRANSR = 'N', and UPLO = 'U' */ + + if (notrans) { + +/* N is odd, TRANSR = 'N', UPLO = 'U', and TRANS = 'N' */ + + _starpu_dsyrk_("L", "N", &n1, k, alpha, &a[a_dim1 + 1], lda, beta, + &c__[n2 + 1], n); + _starpu_dsyrk_("U", "N", &n2, k, alpha, &a[n2 + a_dim1], lda, + beta, &c__[n1 + 1], n); + _starpu_dgemm_("N", "T", &n1, &n2, k, alpha, &a[a_dim1 + 1], lda, + &a[n2 + a_dim1], lda, beta, &c__[1], n); + + } else { + +/* N is odd, TRANSR = 'N', UPLO = 'U', and TRANS = 'T' */ + + _starpu_dsyrk_("L", "T", &n1, k, alpha, &a[a_dim1 + 1], lda, beta, + &c__[n2 + 1], n); + _starpu_dsyrk_("U", "T", &n2, k, alpha, &a[n2 * a_dim1 + 1], lda, + beta, &c__[n1 + 1], n); + _starpu_dgemm_("T", "N", &n1, &n2, k, alpha, &a[a_dim1 + 1], lda, + &a[n2 * a_dim1 + 1], lda, beta, &c__[1], n); + + } + + } + + } else { + +/* N is odd, and TRANSR = 'T' */ + + if (lower) { + +/* N is odd, TRANSR = 'T', and UPLO = 'L' */ + + if (notrans) { + +/* N is odd, TRANSR = 'T', UPLO = 'L', and TRANS = 'N' */ + + _starpu_dsyrk_("U", "N", &n1, k, alpha, &a[a_dim1 + 1], lda, beta, + &c__[1], &n1); + _starpu_dsyrk_("L", "N", &n2, k, alpha, &a[n1 + 1 + a_dim1], lda, + beta, &c__[2], &n1); + _starpu_dgemm_("N", "T", &n1, &n2, k, alpha, &a[a_dim1 + 1], lda, + &a[n1 + 1 + a_dim1], lda, beta, &c__[n1 * n1 + 1], + &n1); + + } else { + +/* N is odd, TRANSR = 'T', UPLO = 'L', and TRANS = 'T' */ + + _starpu_dsyrk_("U", "T", &n1, k, alpha, &a[a_dim1 + 1], lda, beta, + &c__[1], &n1); + _starpu_dsyrk_("L", "T", &n2, k, alpha, &a[(n1 + 1) * a_dim1 + 1], + lda, beta, &c__[2], &n1); + _starpu_dgemm_("T", "N", &n1, &n2, k, alpha, &a[a_dim1 + 1], lda, + &a[(n1 + 1) * a_dim1 + 1], lda, beta, &c__[n1 * + n1 + 1], &n1); + + } + + } else { + +/* N is odd, TRANSR = 'T', and UPLO = 'U' */ + + if (notrans) { + +/* N is odd, TRANSR = 'T', UPLO = 'U', and TRANS = 'N' */ + + _starpu_dsyrk_("U", "N", &n1, k, alpha, &a[a_dim1 + 1], lda, beta, + &c__[n2 * n2 + 1], &n2); + _starpu_dsyrk_("L", "N", &n2, k, alpha, &a[n1 + 1 + a_dim1], lda, + beta, &c__[n1 * n2 + 1], &n2); + _starpu_dgemm_("N", "T", &n2, &n1, k, alpha, &a[n1 + 1 + a_dim1], + lda, &a[a_dim1 + 1], lda, beta, &c__[1], &n2); + + } else { + +/* N is odd, TRANSR = 'T', UPLO = 'U', and TRANS = 'T' */ + + _starpu_dsyrk_("U", "T", &n1, k, alpha, &a[a_dim1 + 1], lda, beta, + &c__[n2 * n2 + 1], &n2); + _starpu_dsyrk_("L", "T", &n2, k, alpha, &a[(n1 + 1) * a_dim1 + 1], + lda, beta, &c__[n1 * n2 + 1], &n2); + _starpu_dgemm_("T", "N", &n2, &n1, k, alpha, &a[(n1 + 1) * a_dim1 + + 1], lda, &a[a_dim1 + 1], lda, beta, &c__[1], & + n2); + + } + + } + + } + + } else { + +/* N is even */ + + if (normaltransr) { + +/* N is even and TRANSR = 'N' */ + + if (lower) { + +/* N is even, TRANSR = 'N', and UPLO = 'L' */ + + if (notrans) { + +/* N is even, TRANSR = 'N', UPLO = 'L', and TRANS = 'N' */ + + i__1 = *n + 1; + _starpu_dsyrk_("L", "N", &nk, k, alpha, &a[a_dim1 + 1], lda, beta, + &c__[2], &i__1); + i__1 = *n + 1; + _starpu_dsyrk_("U", "N", &nk, k, alpha, &a[nk + 1 + a_dim1], lda, + beta, &c__[1], &i__1); + i__1 = *n + 1; + _starpu_dgemm_("N", "T", &nk, &nk, k, alpha, &a[nk + 1 + a_dim1], + lda, &a[a_dim1 + 1], lda, beta, &c__[nk + 2], & + i__1); + + } else { + +/* N is even, TRANSR = 'N', UPLO = 'L', and TRANS = 'T' */ + + i__1 = *n + 1; + _starpu_dsyrk_("L", "T", &nk, k, alpha, &a[a_dim1 + 1], lda, beta, + &c__[2], &i__1); + i__1 = *n + 1; + _starpu_dsyrk_("U", "T", &nk, k, alpha, &a[(nk + 1) * a_dim1 + 1], + lda, beta, &c__[1], &i__1); + i__1 = *n + 1; + _starpu_dgemm_("T", "N", &nk, &nk, k, alpha, &a[(nk + 1) * a_dim1 + + 1], lda, &a[a_dim1 + 1], lda, beta, &c__[nk + 2] +, &i__1); + + } + + } else { + +/* N is even, TRANSR = 'N', and UPLO = 'U' */ + + if (notrans) { + +/* N is even, TRANSR = 'N', UPLO = 'U', and TRANS = 'N' */ + + i__1 = *n + 1; + _starpu_dsyrk_("L", "N", &nk, k, alpha, &a[a_dim1 + 1], lda, beta, + &c__[nk + 2], &i__1); + i__1 = *n + 1; + _starpu_dsyrk_("U", "N", &nk, k, alpha, &a[nk + 1 + a_dim1], lda, + beta, &c__[nk + 1], &i__1); + i__1 = *n + 1; + _starpu_dgemm_("N", "T", &nk, &nk, k, alpha, &a[a_dim1 + 1], lda, + &a[nk + 1 + a_dim1], lda, beta, &c__[1], &i__1); + + } else { + +/* N is even, TRANSR = 'N', UPLO = 'U', and TRANS = 'T' */ + + i__1 = *n + 1; + _starpu_dsyrk_("L", "T", &nk, k, alpha, &a[a_dim1 + 1], lda, beta, + &c__[nk + 2], &i__1); + i__1 = *n + 1; + _starpu_dsyrk_("U", "T", &nk, k, alpha, &a[(nk + 1) * a_dim1 + 1], + lda, beta, &c__[nk + 1], &i__1); + i__1 = *n + 1; + _starpu_dgemm_("T", "N", &nk, &nk, k, alpha, &a[a_dim1 + 1], lda, + &a[(nk + 1) * a_dim1 + 1], lda, beta, &c__[1], & + i__1); + + } + + } + + } else { + +/* N is even, and TRANSR = 'T' */ + + if (lower) { + +/* N is even, TRANSR = 'T', and UPLO = 'L' */ + + if (notrans) { + +/* N is even, TRANSR = 'T', UPLO = 'L', and TRANS = 'N' */ + + _starpu_dsyrk_("U", "N", &nk, k, alpha, &a[a_dim1 + 1], lda, beta, + &c__[nk + 1], &nk); + _starpu_dsyrk_("L", "N", &nk, k, alpha, &a[nk + 1 + a_dim1], lda, + beta, &c__[1], &nk); + _starpu_dgemm_("N", "T", &nk, &nk, k, alpha, &a[a_dim1 + 1], lda, + &a[nk + 1 + a_dim1], lda, beta, &c__[(nk + 1) * + nk + 1], &nk); + + } else { + +/* N is even, TRANSR = 'T', UPLO = 'L', and TRANS = 'T' */ + + _starpu_dsyrk_("U", "T", &nk, k, alpha, &a[a_dim1 + 1], lda, beta, + &c__[nk + 1], &nk); + _starpu_dsyrk_("L", "T", &nk, k, alpha, &a[(nk + 1) * a_dim1 + 1], + lda, beta, &c__[1], &nk); + _starpu_dgemm_("T", "N", &nk, &nk, k, alpha, &a[a_dim1 + 1], lda, + &a[(nk + 1) * a_dim1 + 1], lda, beta, &c__[(nk + + 1) * nk + 1], &nk); + + } + + } else { + +/* N is even, TRANSR = 'T', and UPLO = 'U' */ + + if (notrans) { + +/* N is even, TRANSR = 'T', UPLO = 'U', and TRANS = 'N' */ + + _starpu_dsyrk_("U", "N", &nk, k, alpha, &a[a_dim1 + 1], lda, beta, + &c__[nk * (nk + 1) + 1], &nk); + _starpu_dsyrk_("L", "N", &nk, k, alpha, &a[nk + 1 + a_dim1], lda, + beta, &c__[nk * nk + 1], &nk); + _starpu_dgemm_("N", "T", &nk, &nk, k, alpha, &a[nk + 1 + a_dim1], + lda, &a[a_dim1 + 1], lda, beta, &c__[1], &nk); + + } else { + +/* N is even, TRANSR = 'T', UPLO = 'U', and TRANS = 'T' */ + + _starpu_dsyrk_("U", "T", &nk, k, alpha, &a[a_dim1 + 1], lda, beta, + &c__[nk * (nk + 1) + 1], &nk); + _starpu_dsyrk_("L", "T", &nk, k, alpha, &a[(nk + 1) * a_dim1 + 1], + lda, beta, &c__[nk * nk + 1], &nk); + _starpu_dgemm_("T", "N", &nk, &nk, k, alpha, &a[(nk + 1) * a_dim1 + + 1], lda, &a[a_dim1 + 1], lda, beta, &c__[1], & + nk); + + } + + } + + } + + } + + return 0; + +/* End of DSFRK */ + +} /* _starpu_dsfrk_ */ diff --git a/min-dgels/base/SRC/dsgesv.c b/min-dgels/base/SRC/dsgesv.c new file mode 100644 index 0000000..b171728 --- /dev/null +++ b/min-dgels/base/SRC/dsgesv.c @@ -0,0 +1,416 @@ +/* dsgesv.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static doublereal c_b10 = -1.; +static doublereal c_b11 = 1.; +static integer c__1 = 1; + +/* Subroutine */ int _starpu__starpu_dsgesv_(integer *n, integer *nrhs, doublereal *a, + integer *lda, integer *ipiv, doublereal *b, integer *ldb, doublereal * + x, integer *ldx, doublereal *work, real *swork, integer *iter, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, work_dim1, work_offset, + x_dim1, x_offset, i__1; + doublereal d__1; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + integer i__; + doublereal cte, eps, anrm; + integer ptsa; + doublereal rnrm, xnrm; + integer ptsx; + extern /* Subroutine */ int _starpu_dgemm_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *); + integer iiter; + extern /* Subroutine */ int _starpu_daxpy_(integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *), _starpu_dlag2s_(integer *, integer *, + doublereal *, integer *, real *, integer *, integer *), _starpu_slag2d_( + integer *, integer *, real *, integer *, doublereal *, integer *, + integer *); + extern doublereal _starpu_dlamch_(char *), _starpu_dlange_(char *, integer *, + integer *, doublereal *, integer *, doublereal *); + extern integer _starpu_idamax_(integer *, doublereal *, integer *); + extern /* Subroutine */ int _starpu_dlacpy_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *), + _starpu_xerbla_(char *, integer *), _starpu_dgetrf_(integer *, integer *, + doublereal *, integer *, integer *, integer *), _starpu_dgetrs_(char *, + integer *, integer *, doublereal *, integer *, integer *, + doublereal *, integer *, integer *), _starpu_sgetrf_(integer *, + integer *, real *, integer *, integer *, integer *), _starpu_sgetrs_(char + *, integer *, integer *, real *, integer *, integer *, real *, + integer *, integer *); + + +/* -- LAPACK PROTOTYPE driver routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* February 2007 */ + +/* .. */ +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DSGESV computes the solution to a real system of linear equations */ +/* A * X = B, */ +/* where A is an N-by-N matrix and X and B are N-by-NRHS matrices. */ + +/* DSGESV first attempts to factorize the matrix in SINGLE PRECISION */ +/* and use this factorization within an iterative refinement procedure */ +/* to produce a solution with DOUBLE PRECISION normwise backward error */ +/* quality (see below). If the approach fails the method switches to a */ +/* DOUBLE PRECISION factorization and solve. */ + +/* The iterative refinement is not going to be a winning strategy if */ +/* the ratio SINGLE PRECISION performance over DOUBLE PRECISION */ +/* performance is too small. A reasonable strategy should take the */ +/* number of right-hand sides and the size of the matrix into account. */ +/* This might be done with a call to ILAENV in the future. Up to now, we */ +/* always try iterative refinement. */ + +/* The iterative refinement process is stopped if */ +/* ITER > ITERMAX */ +/* or for all the RHS we have: */ +/* RNRM < SQRT(N)*XNRM*ANRM*EPS*BWDMAX */ +/* where */ +/* o ITER is the number of the current iteration in the iterative */ +/* refinement process */ +/* o RNRM is the infinity-norm of the residual */ +/* o XNRM is the infinity-norm of the solution */ +/* o ANRM is the infinity-operator-norm of the matrix A */ +/* o EPS is the machine epsilon returned by DLAMCH('Epsilon') */ +/* The value ITERMAX and BWDMAX are fixed to 30 and 1.0D+00 */ +/* respectively. */ + +/* Arguments */ +/* ========= */ + +/* N (input) INTEGER */ +/* The number of linear equations, i.e., the order of the */ +/* matrix A. N >= 0. */ + +/* NRHS (input) INTEGER */ +/* The number of right hand sides, i.e., the number of columns */ +/* of the matrix B. NRHS >= 0. */ + +/* A (input or input/ouptut) DOUBLE PRECISION array, */ +/* dimension (LDA,N) */ +/* On entry, the N-by-N coefficient matrix A. */ +/* On exit, if iterative refinement has been successfully used */ +/* (INFO.EQ.0 and ITER.GE.0, see description below), then A is */ +/* unchanged, if double precision factorization has been used */ +/* (INFO.EQ.0 and ITER.LT.0, see description below), then the */ +/* array A contains the factors L and U from the factorization */ +/* A = P*L*U; the unit diagonal elements of L are not stored. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,N). */ + +/* IPIV (output) INTEGER array, dimension (N) */ +/* The pivot indices that define the permutation matrix P; */ +/* row i of the matrix was interchanged with row IPIV(i). */ +/* Corresponds either to the single precision factorization */ +/* (if INFO.EQ.0 and ITER.GE.0) or the double precision */ +/* factorization (if INFO.EQ.0 and ITER.LT.0). */ + +/* B (input) DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* The N-by-NRHS right hand side matrix B. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the array B. LDB >= max(1,N). */ + +/* X (output) DOUBLE PRECISION array, dimension (LDX,NRHS) */ +/* If INFO = 0, the N-by-NRHS solution matrix X. */ + +/* LDX (input) INTEGER */ +/* The leading dimension of the array X. LDX >= max(1,N). */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (N*NRHS) */ +/* This array is used to hold the residual vectors. */ + +/* SWORK (workspace) REAL array, dimension (N*(N+NRHS)) */ +/* This array is used to use the single precision matrix and the */ +/* right-hand sides or solutions in single precision. */ + +/* ITER (output) INTEGER */ +/* < 0: iterative refinement has failed, double precision */ +/* factorization has been performed */ +/* -1 : the routine fell back to full precision for */ +/* implementation- or machine-specific reasons */ +/* -2 : narrowing the precision induced an overflow, */ +/* the routine fell back to full precision */ +/* -3 : failure of SGETRF */ +/* -31: stop the iterative refinement after the 30th */ +/* iterations */ +/* > 0: iterative refinement has been sucessfully used. */ +/* Returns the number of iterations */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > 0: if INFO = i, U(i,i) computed in DOUBLE PRECISION is */ +/* exactly zero. The factorization has been completed, */ +/* but the factor U is exactly singular, so the solution */ +/* could not be computed. */ + +/* ========= */ + +/* .. Parameters .. */ + + + + +/* .. Local Scalars .. */ + +/* .. External Subroutines .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + work_dim1 = *n; + work_offset = 1 + work_dim1; + work -= work_offset; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + x_dim1 = *ldx; + x_offset = 1 + x_dim1; + x -= x_offset; + --swork; + + /* Function Body */ + *info = 0; + *iter = 0; + +/* Test the input parameters. */ + + if (*n < 0) { + *info = -1; + } else if (*nrhs < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } else if (*ldb < max(1,*n)) { + *info = -7; + } else if (*ldx < max(1,*n)) { + *info = -9; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DSGESV", &i__1); + return 0; + } + +/* Quick return if (N.EQ.0). */ + + if (*n == 0) { + return 0; + } + +/* Skip single precision iterative refinement if a priori slower */ +/* than double precision factorization. */ + + if (FALSE_) { + *iter = -1; + goto L40; + } + +/* Compute some constants. */ + + anrm = _starpu_dlange_("I", n, n, &a[a_offset], lda, &work[work_offset]); + eps = _starpu_dlamch_("Epsilon"); + cte = anrm * eps * sqrt((doublereal) (*n)) * 1.; + +/* Set the indices PTSA, PTSX for referencing SA and SX in SWORK. */ + + ptsa = 1; + ptsx = ptsa + *n * *n; + +/* Convert B from double precision to single precision and store the */ +/* result in SX. */ + + _starpu_dlag2s_(n, nrhs, &b[b_offset], ldb, &swork[ptsx], n, info); + + if (*info != 0) { + *iter = -2; + goto L40; + } + +/* Convert A from double precision to single precision and store the */ +/* result in SA. */ + + _starpu_dlag2s_(n, n, &a[a_offset], lda, &swork[ptsa], n, info); + + if (*info != 0) { + *iter = -2; + goto L40; + } + +/* Compute the LU factorization of SA. */ + + _starpu_sgetrf_(n, n, &swork[ptsa], n, &ipiv[1], info); + + if (*info != 0) { + *iter = -3; + goto L40; + } + +/* Solve the system SA*SX = SB. */ + + _starpu_sgetrs_("No transpose", n, nrhs, &swork[ptsa], n, &ipiv[1], &swork[ptsx], + n, info); + +/* Convert SX back to double precision */ + + _starpu_slag2d_(n, nrhs, &swork[ptsx], n, &x[x_offset], ldx, info); + +/* Compute R = B - AX (R is WORK). */ + + _starpu_dlacpy_("All", n, nrhs, &b[b_offset], ldb, &work[work_offset], n); + + _starpu_dgemm_("No Transpose", "No Transpose", n, nrhs, n, &c_b10, &a[a_offset], + lda, &x[x_offset], ldx, &c_b11, &work[work_offset], n); + +/* Check whether the NRHS normwise backward errors satisfy the */ +/* stopping criterion. If yes, set ITER=0 and return. */ + + i__1 = *nrhs; + for (i__ = 1; i__ <= i__1; ++i__) { + xnrm = (d__1 = x[_starpu_idamax_(n, &x[i__ * x_dim1 + 1], &c__1) + i__ * + x_dim1], abs(d__1)); + rnrm = (d__1 = work[_starpu_idamax_(n, &work[i__ * work_dim1 + 1], &c__1) + + i__ * work_dim1], abs(d__1)); + if (rnrm > xnrm * cte) { + goto L10; + } + } + +/* If we are here, the NRHS normwise backward errors satisfy the */ +/* stopping criterion. We are good to exit. */ + + *iter = 0; + return 0; + +L10: + + for (iiter = 1; iiter <= 30; ++iiter) { + +/* Convert R (in WORK) from double precision to single precision */ +/* and store the result in SX. */ + + _starpu_dlag2s_(n, nrhs, &work[work_offset], n, &swork[ptsx], n, info); + + if (*info != 0) { + *iter = -2; + goto L40; + } + +/* Solve the system SA*SX = SR. */ + + _starpu_sgetrs_("No transpose", n, nrhs, &swork[ptsa], n, &ipiv[1], &swork[ + ptsx], n, info); + +/* Convert SX back to double precision and update the current */ +/* iterate. */ + + _starpu_slag2d_(n, nrhs, &swork[ptsx], n, &work[work_offset], n, info); + + i__1 = *nrhs; + for (i__ = 1; i__ <= i__1; ++i__) { + _starpu_daxpy_(n, &c_b11, &work[i__ * work_dim1 + 1], &c__1, &x[i__ * + x_dim1 + 1], &c__1); + } + +/* Compute R = B - AX (R is WORK). */ + + _starpu_dlacpy_("All", n, nrhs, &b[b_offset], ldb, &work[work_offset], n); + + _starpu_dgemm_("No Transpose", "No Transpose", n, nrhs, n, &c_b10, &a[ + a_offset], lda, &x[x_offset], ldx, &c_b11, &work[work_offset], + n); + +/* Check whether the NRHS normwise backward errors satisfy the */ +/* stopping criterion. If yes, set ITER=IITER>0 and return. */ + + i__1 = *nrhs; + for (i__ = 1; i__ <= i__1; ++i__) { + xnrm = (d__1 = x[_starpu_idamax_(n, &x[i__ * x_dim1 + 1], &c__1) + i__ * + x_dim1], abs(d__1)); + rnrm = (d__1 = work[_starpu_idamax_(n, &work[i__ * work_dim1 + 1], &c__1) + + i__ * work_dim1], abs(d__1)); + if (rnrm > xnrm * cte) { + goto L20; + } + } + +/* If we are here, the NRHS normwise backward errors satisfy the */ +/* stopping criterion, we are good to exit. */ + + *iter = iiter; + + return 0; + +L20: + +/* L30: */ + ; + } + +/* If we are at this place of the code, this is because we have */ +/* performed ITER=ITERMAX iterations and never satisified the */ +/* stopping criterion, set up the ITER flag accordingly and follow up */ +/* on double precision routine. */ + + *iter = -31; + +L40: + +/* Single-precision iterative refinement failed to converge to a */ +/* satisfactory solution, so we resort to double precision. */ + + _starpu_dgetrf_(n, n, &a[a_offset], lda, &ipiv[1], info); + + if (*info != 0) { + return 0; + } + + _starpu_dlacpy_("All", n, nrhs, &b[b_offset], ldb, &x[x_offset], ldx); + _starpu_dgetrs_("No transpose", n, nrhs, &a[a_offset], lda, &ipiv[1], &x[x_offset] +, ldx, info); + + return 0; + +/* End of DSGESV. */ + +} /* _starpu__starpu_dsgesv_ */ diff --git a/min-dgels/base/SRC/dspcon.c b/min-dgels/base/SRC/dspcon.c new file mode 100644 index 0000000..138ee7b --- /dev/null +++ b/min-dgels/base/SRC/dspcon.c @@ -0,0 +1,198 @@ +/* dspcon.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +/* Subroutine */ int _starpu_dspcon_(char *uplo, integer *n, doublereal *ap, integer * + ipiv, doublereal *anorm, doublereal *rcond, doublereal *work, integer + *iwork, integer *info) +{ + /* System generated locals */ + integer i__1; + + /* Local variables */ + integer i__, ip, kase; + extern logical _starpu_lsame_(char *, char *); + integer isave[3]; + logical upper; + extern /* Subroutine */ int _starpu_dlacn2_(integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *), _starpu_xerbla_(char *, + integer *); + doublereal ainvnm; + extern /* Subroutine */ int _starpu_dsptrs_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *, integer *); + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* Modified to call DLACN2 in place of DLACON, 5 Feb 03, SJH. */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DSPCON estimates the reciprocal of the condition number (in the */ +/* 1-norm) of a real symmetric packed matrix A using the factorization */ +/* A = U*D*U**T or A = L*D*L**T computed by DSPTRF. */ + +/* An estimate is obtained for norm(inv(A)), and the reciprocal of the */ +/* condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). */ + +/* Arguments */ +/* ========= */ + +/* UPLO (input) CHARACTER*1 */ +/* Specifies whether the details of the factorization are stored */ +/* as an upper or lower triangular matrix. */ +/* = 'U': Upper triangular, form is A = U*D*U**T; */ +/* = 'L': Lower triangular, form is A = L*D*L**T. */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* AP (input) DOUBLE PRECISION array, dimension (N*(N+1)/2) */ +/* The block diagonal matrix D and the multipliers used to */ +/* obtain the factor U or L as computed by DSPTRF, stored as a */ +/* packed triangular matrix. */ + +/* IPIV (input) INTEGER array, dimension (N) */ +/* Details of the interchanges and the block structure of D */ +/* as determined by DSPTRF. */ + +/* ANORM (input) DOUBLE PRECISION */ +/* The 1-norm of the original matrix A. */ + +/* RCOND (output) DOUBLE PRECISION */ +/* The reciprocal of the condition number of the matrix A, */ +/* computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an */ +/* estimate of the 1-norm of inv(A) computed in this routine. */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (2*N) */ + +/* IWORK (workspace) INTEGER array, dimension (N) */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --iwork; + --work; + --ipiv; + --ap; + + /* Function Body */ + *info = 0; + upper = _starpu_lsame_(uplo, "U"); + if (! upper && ! _starpu_lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*anorm < 0.) { + *info = -5; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DSPCON", &i__1); + return 0; + } + +/* Quick return if possible */ + + *rcond = 0.; + if (*n == 0) { + *rcond = 1.; + return 0; + } else if (*anorm <= 0.) { + return 0; + } + +/* Check that the diagonal matrix D is nonsingular. */ + + if (upper) { + +/* Upper triangular storage: examine D from bottom to top */ + + ip = *n * (*n + 1) / 2; + for (i__ = *n; i__ >= 1; --i__) { + if (ipiv[i__] > 0 && ap[ip] == 0.) { + return 0; + } + ip -= i__; +/* L10: */ + } + } else { + +/* Lower triangular storage: examine D from top to bottom. */ + + ip = 1; + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + if (ipiv[i__] > 0 && ap[ip] == 0.) { + return 0; + } + ip = ip + *n - i__ + 1; +/* L20: */ + } + } + +/* Estimate the 1-norm of the inverse. */ + + kase = 0; +L30: + _starpu_dlacn2_(n, &work[*n + 1], &work[1], &iwork[1], &ainvnm, &kase, isave); + if (kase != 0) { + +/* Multiply by inv(L*D*L') or inv(U*D*U'). */ + + _starpu_dsptrs_(uplo, n, &c__1, &ap[1], &ipiv[1], &work[1], n, info); + goto L30; + } + +/* Compute the estimate of the reciprocal condition number. */ + + if (ainvnm != 0.) { + *rcond = 1. / ainvnm / *anorm; + } + + return 0; + +/* End of DSPCON */ + +} /* _starpu_dspcon_ */ diff --git a/min-dgels/base/SRC/dspev.c b/min-dgels/base/SRC/dspev.c new file mode 100644 index 0000000..a6ff427 --- /dev/null +++ b/min-dgels/base/SRC/dspev.c @@ -0,0 +1,246 @@ +/* dspev.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +/* Subroutine */ int _starpu_dspev_(char *jobz, char *uplo, integer *n, doublereal * + ap, doublereal *w, doublereal *z__, integer *ldz, doublereal *work, + integer *info) +{ + /* System generated locals */ + integer z_dim1, z_offset, i__1; + doublereal d__1; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + doublereal eps; + integer inde; + doublereal anrm; + integer imax; + doublereal rmin, rmax; + extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, + integer *); + doublereal sigma; + extern logical _starpu_lsame_(char *, char *); + integer iinfo; + logical wantz; + extern doublereal _starpu_dlamch_(char *); + integer iscale; + doublereal safmin; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + doublereal bignum; + extern doublereal _starpu_dlansp_(char *, char *, integer *, doublereal *, + doublereal *); + integer indtau; + extern /* Subroutine */ int _starpu_dsterf_(integer *, doublereal *, doublereal *, + integer *); + integer indwrk; + extern /* Subroutine */ int _starpu_dopgtr_(char *, integer *, doublereal *, + doublereal *, doublereal *, integer *, doublereal *, integer *), _starpu_dsptrd_(char *, integer *, doublereal *, doublereal *, + doublereal *, doublereal *, integer *), _starpu_dsteqr_(char *, + integer *, doublereal *, doublereal *, doublereal *, integer *, + doublereal *, integer *); + doublereal smlnum; + + +/* -- LAPACK driver routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DSPEV computes all the eigenvalues and, optionally, eigenvectors of a */ +/* real symmetric matrix A in packed storage. */ + +/* Arguments */ +/* ========= */ + +/* JOBZ (input) CHARACTER*1 */ +/* = 'N': Compute eigenvalues only; */ +/* = 'V': Compute eigenvalues and eigenvectors. */ + +/* UPLO (input) CHARACTER*1 */ +/* = 'U': Upper triangle of A is stored; */ +/* = 'L': Lower triangle of A is stored. */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* AP (input/output) DOUBLE PRECISION array, dimension (N*(N+1)/2) */ +/* On entry, the upper or lower triangle of the symmetric matrix */ +/* A, packed columnwise in a linear array. The j-th column of A */ +/* is stored in the array AP as follows: */ +/* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; */ +/* if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. */ + +/* On exit, AP is overwritten by values generated during the */ +/* reduction to tridiagonal form. If UPLO = 'U', the diagonal */ +/* and first superdiagonal of the tridiagonal matrix T overwrite */ +/* the corresponding elements of A, and if UPLO = 'L', the */ +/* diagonal and first subdiagonal of T overwrite the */ +/* corresponding elements of A. */ + +/* W (output) DOUBLE PRECISION array, dimension (N) */ +/* If INFO = 0, the eigenvalues in ascending order. */ + +/* Z (output) DOUBLE PRECISION array, dimension (LDZ, N) */ +/* If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal */ +/* eigenvectors of the matrix A, with the i-th column of Z */ +/* holding the eigenvector associated with W(i). */ +/* If JOBZ = 'N', then Z is not referenced. */ + +/* LDZ (input) INTEGER */ +/* The leading dimension of the array Z. LDZ >= 1, and if */ +/* JOBZ = 'V', LDZ >= max(1,N). */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (3*N) */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit. */ +/* < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > 0: if INFO = i, the algorithm failed to converge; i */ +/* off-diagonal elements of an intermediate tridiagonal */ +/* form did not converge to zero. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --ap; + --w; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --work; + + /* Function Body */ + wantz = _starpu_lsame_(jobz, "V"); + + *info = 0; + if (! (wantz || _starpu_lsame_(jobz, "N"))) { + *info = -1; + } else if (! (_starpu_lsame_(uplo, "U") || _starpu_lsame_(uplo, + "L"))) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*ldz < 1 || wantz && *ldz < *n) { + *info = -7; + } + + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DSPEV ", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + if (*n == 1) { + w[1] = ap[1]; + if (wantz) { + z__[z_dim1 + 1] = 1.; + } + return 0; + } + +/* Get machine constants. */ + + safmin = _starpu_dlamch_("Safe minimum"); + eps = _starpu_dlamch_("Precision"); + smlnum = safmin / eps; + bignum = 1. / smlnum; + rmin = sqrt(smlnum); + rmax = sqrt(bignum); + +/* Scale matrix to allowable range, if necessary. */ + + anrm = _starpu_dlansp_("M", uplo, n, &ap[1], &work[1]); + iscale = 0; + if (anrm > 0. && anrm < rmin) { + iscale = 1; + sigma = rmin / anrm; + } else if (anrm > rmax) { + iscale = 1; + sigma = rmax / anrm; + } + if (iscale == 1) { + i__1 = *n * (*n + 1) / 2; + _starpu_dscal_(&i__1, &sigma, &ap[1], &c__1); + } + +/* Call DSPTRD to reduce symmetric packed matrix to tridiagonal form. */ + + inde = 1; + indtau = inde + *n; + _starpu_dsptrd_(uplo, n, &ap[1], &w[1], &work[inde], &work[indtau], &iinfo); + +/* For eigenvalues only, call DSTERF. For eigenvectors, first call */ +/* DOPGTR to generate the orthogonal matrix, then call DSTEQR. */ + + if (! wantz) { + _starpu_dsterf_(n, &w[1], &work[inde], info); + } else { + indwrk = indtau + *n; + _starpu_dopgtr_(uplo, n, &ap[1], &work[indtau], &z__[z_offset], ldz, &work[ + indwrk], &iinfo); + _starpu_dsteqr_(jobz, n, &w[1], &work[inde], &z__[z_offset], ldz, &work[ + indtau], info); + } + +/* If matrix was scaled, then rescale eigenvalues appropriately. */ + + if (iscale == 1) { + if (*info == 0) { + imax = *n; + } else { + imax = *info - 1; + } + d__1 = 1. / sigma; + _starpu_dscal_(&imax, &d__1, &w[1], &c__1); + } + + return 0; + +/* End of DSPEV */ + +} /* _starpu_dspev_ */ diff --git a/min-dgels/base/SRC/dspevd.c b/min-dgels/base/SRC/dspevd.c new file mode 100644 index 0000000..1c6f3b6 --- /dev/null +++ b/min-dgels/base/SRC/dspevd.c @@ -0,0 +1,314 @@ +/* dspevd.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +/* Subroutine */ int _starpu_dspevd_(char *jobz, char *uplo, integer *n, doublereal * + ap, doublereal *w, doublereal *z__, integer *ldz, doublereal *work, + integer *lwork, integer *iwork, integer *liwork, integer *info) +{ + /* System generated locals */ + integer z_dim1, z_offset, i__1; + doublereal d__1; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + doublereal eps; + integer inde; + doublereal anrm, rmin, rmax; + extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, + integer *); + doublereal sigma; + extern logical _starpu_lsame_(char *, char *); + integer iinfo, lwmin; + logical wantz; + extern doublereal _starpu_dlamch_(char *); + integer iscale; + extern /* Subroutine */ int _starpu_dstedc_(char *, integer *, doublereal *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + integer *, integer *, integer *); + doublereal safmin; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + doublereal bignum; + extern doublereal _starpu_dlansp_(char *, char *, integer *, doublereal *, + doublereal *); + integer indtau; + extern /* Subroutine */ int _starpu_dsterf_(integer *, doublereal *, doublereal *, + integer *); + integer indwrk, liwmin; + extern /* Subroutine */ int _starpu_dsptrd_(char *, integer *, doublereal *, + doublereal *, doublereal *, doublereal *, integer *), + _starpu_dopmtr_(char *, char *, char *, integer *, integer *, doublereal * +, doublereal *, doublereal *, integer *, doublereal *, integer *); + integer llwork; + doublereal smlnum; + logical lquery; + + +/* -- LAPACK driver routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DSPEVD computes all the eigenvalues and, optionally, eigenvectors */ +/* of a real symmetric matrix A in packed storage. If eigenvectors are */ +/* desired, it uses a divide and conquer algorithm. */ + +/* The divide and conquer algorithm makes very mild assumptions about */ +/* floating point arithmetic. It will work on machines with a guard */ +/* digit in add/subtract, or on those binary machines without guard */ +/* digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or */ +/* Cray-2. It could conceivably fail on hexadecimal or decimal machines */ +/* without guard digits, but we know of none. */ + +/* Arguments */ +/* ========= */ + +/* JOBZ (input) CHARACTER*1 */ +/* = 'N': Compute eigenvalues only; */ +/* = 'V': Compute eigenvalues and eigenvectors. */ + +/* UPLO (input) CHARACTER*1 */ +/* = 'U': Upper triangle of A is stored; */ +/* = 'L': Lower triangle of A is stored. */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* AP (input/output) DOUBLE PRECISION array, dimension (N*(N+1)/2) */ +/* On entry, the upper or lower triangle of the symmetric matrix */ +/* A, packed columnwise in a linear array. The j-th column of A */ +/* is stored in the array AP as follows: */ +/* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; */ +/* if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. */ + +/* On exit, AP is overwritten by values generated during the */ +/* reduction to tridiagonal form. If UPLO = 'U', the diagonal */ +/* and first superdiagonal of the tridiagonal matrix T overwrite */ +/* the corresponding elements of A, and if UPLO = 'L', the */ +/* diagonal and first subdiagonal of T overwrite the */ +/* corresponding elements of A. */ + +/* W (output) DOUBLE PRECISION array, dimension (N) */ +/* If INFO = 0, the eigenvalues in ascending order. */ + +/* Z (output) DOUBLE PRECISION array, dimension (LDZ, N) */ +/* If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal */ +/* eigenvectors of the matrix A, with the i-th column of Z */ +/* holding the eigenvector associated with W(i). */ +/* If JOBZ = 'N', then Z is not referenced. */ + +/* LDZ (input) INTEGER */ +/* The leading dimension of the array Z. LDZ >= 1, and if */ +/* JOBZ = 'V', LDZ >= max(1,N). */ + +/* WORK (workspace/output) DOUBLE PRECISION array, */ +/* dimension (LWORK) */ +/* On exit, if INFO = 0, WORK(1) returns the required LWORK. */ + +/* LWORK (input) INTEGER */ +/* The dimension of the array WORK. */ +/* If N <= 1, LWORK must be at least 1. */ +/* If JOBZ = 'N' and N > 1, LWORK must be at least 2*N. */ +/* If JOBZ = 'V' and N > 1, LWORK must be at least */ +/* 1 + 6*N + N**2. */ + +/* If LWORK = -1, then a workspace query is assumed; the routine */ +/* only calculates the required sizes of the WORK and IWORK */ +/* arrays, returns these values as the first entries of the WORK */ +/* and IWORK arrays, and no error message related to LWORK or */ +/* LIWORK is issued by XERBLA. */ + +/* IWORK (workspace/output) INTEGER array, dimension (MAX(1,LIWORK)) */ +/* On exit, if INFO = 0, IWORK(1) returns the required LIWORK. */ + +/* LIWORK (input) INTEGER */ +/* The dimension of the array IWORK. */ +/* If JOBZ = 'N' or N <= 1, LIWORK must be at least 1. */ +/* If JOBZ = 'V' and N > 1, LIWORK must be at least 3 + 5*N. */ + +/* If LIWORK = -1, then a workspace query is assumed; the */ +/* routine only calculates the required sizes of the WORK and */ +/* IWORK arrays, returns these values as the first entries of */ +/* the WORK and IWORK arrays, and no error message related to */ +/* LWORK or LIWORK is issued by XERBLA. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > 0: if INFO = i, the algorithm failed to converge; i */ +/* off-diagonal elements of an intermediate tridiagonal */ +/* form did not converge to zero. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --ap; + --w; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --work; + --iwork; + + /* Function Body */ + wantz = _starpu_lsame_(jobz, "V"); + lquery = *lwork == -1 || *liwork == -1; + + *info = 0; + if (! (wantz || _starpu_lsame_(jobz, "N"))) { + *info = -1; + } else if (! (_starpu_lsame_(uplo, "U") || _starpu_lsame_(uplo, + "L"))) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*ldz < 1 || wantz && *ldz < *n) { + *info = -7; + } + + if (*info == 0) { + if (*n <= 1) { + liwmin = 1; + lwmin = 1; + } else { + if (wantz) { + liwmin = *n * 5 + 3; +/* Computing 2nd power */ + i__1 = *n; + lwmin = *n * 6 + 1 + i__1 * i__1; + } else { + liwmin = 1; + lwmin = *n << 1; + } + } + iwork[1] = liwmin; + work[1] = (doublereal) lwmin; + + if (*lwork < lwmin && ! lquery) { + *info = -9; + } else if (*liwork < liwmin && ! lquery) { + *info = -11; + } + } + + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DSPEVD", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + if (*n == 1) { + w[1] = ap[1]; + if (wantz) { + z__[z_dim1 + 1] = 1.; + } + return 0; + } + +/* Get machine constants. */ + + safmin = _starpu_dlamch_("Safe minimum"); + eps = _starpu_dlamch_("Precision"); + smlnum = safmin / eps; + bignum = 1. / smlnum; + rmin = sqrt(smlnum); + rmax = sqrt(bignum); + +/* Scale matrix to allowable range, if necessary. */ + + anrm = _starpu_dlansp_("M", uplo, n, &ap[1], &work[1]); + iscale = 0; + if (anrm > 0. && anrm < rmin) { + iscale = 1; + sigma = rmin / anrm; + } else if (anrm > rmax) { + iscale = 1; + sigma = rmax / anrm; + } + if (iscale == 1) { + i__1 = *n * (*n + 1) / 2; + _starpu_dscal_(&i__1, &sigma, &ap[1], &c__1); + } + +/* Call DSPTRD to reduce symmetric packed matrix to tridiagonal form. */ + + inde = 1; + indtau = inde + *n; + _starpu_dsptrd_(uplo, n, &ap[1], &w[1], &work[inde], &work[indtau], &iinfo); + +/* For eigenvalues only, call DSTERF. For eigenvectors, first call */ +/* DSTEDC to generate the eigenvector matrix, WORK(INDWRK), of the */ +/* tridiagonal matrix, then call DOPMTR to multiply it by the */ +/* Householder transformations represented in AP. */ + + if (! wantz) { + _starpu_dsterf_(n, &w[1], &work[inde], info); + } else { + indwrk = indtau + *n; + llwork = *lwork - indwrk + 1; + _starpu_dstedc_("I", n, &w[1], &work[inde], &z__[z_offset], ldz, &work[indwrk] +, &llwork, &iwork[1], liwork, info); + _starpu_dopmtr_("L", uplo, "N", n, n, &ap[1], &work[indtau], &z__[z_offset], + ldz, &work[indwrk], &iinfo); + } + +/* If matrix was scaled, then rescale eigenvalues appropriately. */ + + if (iscale == 1) { + d__1 = 1. / sigma; + _starpu_dscal_(n, &d__1, &w[1], &c__1); + } + + work[1] = (doublereal) lwmin; + iwork[1] = liwmin; + return 0; + +/* End of DSPEVD */ + +} /* _starpu_dspevd_ */ diff --git a/min-dgels/base/SRC/dspevx.c b/min-dgels/base/SRC/dspevx.c new file mode 100644 index 0000000..39c5651 --- /dev/null +++ b/min-dgels/base/SRC/dspevx.c @@ -0,0 +1,467 @@ +/* dspevx.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +/* Subroutine */ int _starpu_dspevx_(char *jobz, char *range, char *uplo, integer *n, + doublereal *ap, doublereal *vl, doublereal *vu, integer *il, integer * + iu, doublereal *abstol, integer *m, doublereal *w, doublereal *z__, + integer *ldz, doublereal *work, integer *iwork, integer *ifail, + integer *info) +{ + /* System generated locals */ + integer z_dim1, z_offset, i__1, i__2; + doublereal d__1, d__2; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + integer i__, j, jj; + doublereal eps, vll, vuu, tmp1; + integer indd, inde; + doublereal anrm; + integer imax; + doublereal rmin, rmax; + logical test; + integer itmp1, indee; + extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, + integer *); + doublereal sigma; + extern logical _starpu_lsame_(char *, char *); + integer iinfo; + char order[1]; + extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *), _starpu_dswap_(integer *, doublereal *, integer + *, doublereal *, integer *); + logical wantz; + extern doublereal _starpu_dlamch_(char *); + logical alleig, indeig; + integer iscale, indibl; + logical valeig; + doublereal safmin; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + doublereal abstll, bignum; + extern doublereal _starpu_dlansp_(char *, char *, integer *, doublereal *, + doublereal *); + integer indtau, indisp; + extern /* Subroutine */ int _starpu_dstein_(integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *, doublereal *, + integer *, doublereal *, integer *, integer *, integer *), + _starpu_dsterf_(integer *, doublereal *, doublereal *, integer *); + integer indiwo; + extern /* Subroutine */ int _starpu_dstebz_(char *, char *, integer *, doublereal + *, doublereal *, integer *, integer *, doublereal *, doublereal *, + doublereal *, integer *, integer *, doublereal *, integer *, + integer *, doublereal *, integer *, integer *); + integer indwrk; + extern /* Subroutine */ int _starpu_dopgtr_(char *, integer *, doublereal *, + doublereal *, doublereal *, integer *, doublereal *, integer *), _starpu_dsptrd_(char *, integer *, doublereal *, doublereal *, + doublereal *, doublereal *, integer *), _starpu_dsteqr_(char *, + integer *, doublereal *, doublereal *, doublereal *, integer *, + doublereal *, integer *), _starpu_dopmtr_(char *, char *, char *, + integer *, integer *, doublereal *, doublereal *, doublereal *, + integer *, doublereal *, integer *); + integer nsplit; + doublereal smlnum; + + +/* -- LAPACK driver routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DSPEVX computes selected eigenvalues and, optionally, eigenvectors */ +/* of a real symmetric matrix A in packed storage. Eigenvalues/vectors */ +/* can be selected by specifying either a range of values or a range of */ +/* indices for the desired eigenvalues. */ + +/* Arguments */ +/* ========= */ + +/* JOBZ (input) CHARACTER*1 */ +/* = 'N': Compute eigenvalues only; */ +/* = 'V': Compute eigenvalues and eigenvectors. */ + +/* RANGE (input) CHARACTER*1 */ +/* = 'A': all eigenvalues will be found; */ +/* = 'V': all eigenvalues in the half-open interval (VL,VU] */ +/* will be found; */ +/* = 'I': the IL-th through IU-th eigenvalues will be found. */ + +/* UPLO (input) CHARACTER*1 */ +/* = 'U': Upper triangle of A is stored; */ +/* = 'L': Lower triangle of A is stored. */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* AP (input/output) DOUBLE PRECISION array, dimension (N*(N+1)/2) */ +/* On entry, the upper or lower triangle of the symmetric matrix */ +/* A, packed columnwise in a linear array. The j-th column of A */ +/* is stored in the array AP as follows: */ +/* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; */ +/* if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. */ + +/* On exit, AP is overwritten by values generated during the */ +/* reduction to tridiagonal form. If UPLO = 'U', the diagonal */ +/* and first superdiagonal of the tridiagonal matrix T overwrite */ +/* the corresponding elements of A, and if UPLO = 'L', the */ +/* diagonal and first subdiagonal of T overwrite the */ +/* corresponding elements of A. */ + +/* VL (input) DOUBLE PRECISION */ +/* VU (input) DOUBLE PRECISION */ +/* If RANGE='V', the lower and upper bounds of the interval to */ +/* be searched for eigenvalues. VL < VU. */ +/* Not referenced if RANGE = 'A' or 'I'. */ + +/* IL (input) INTEGER */ +/* IU (input) INTEGER */ +/* If RANGE='I', the indices (in ascending order) of the */ +/* smallest and largest eigenvalues to be returned. */ +/* 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. */ +/* Not referenced if RANGE = 'A' or 'V'. */ + +/* ABSTOL (input) DOUBLE PRECISION */ +/* The absolute error tolerance for the eigenvalues. */ +/* An approximate eigenvalue is accepted as converged */ +/* when it is determined to lie in an interval [a,b] */ +/* of width less than or equal to */ + +/* ABSTOL + EPS * max( |a|,|b| ) , */ + +/* where EPS is the machine precision. If ABSTOL is less than */ +/* or equal to zero, then EPS*|T| will be used in its place, */ +/* where |T| is the 1-norm of the tridiagonal matrix obtained */ +/* by reducing AP to tridiagonal form. */ + +/* Eigenvalues will be computed most accurately when ABSTOL is */ +/* set to twice the underflow threshold 2*DLAMCH('S'), not zero. */ +/* If this routine returns with INFO>0, indicating that some */ +/* eigenvectors did not converge, try setting ABSTOL to */ +/* 2*DLAMCH('S'). */ + +/* See "Computing Small Singular Values of Bidiagonal Matrices */ +/* with Guaranteed High Relative Accuracy," by Demmel and */ +/* Kahan, LAPACK Working Note #3. */ + +/* M (output) INTEGER */ +/* The total number of eigenvalues found. 0 <= M <= N. */ +/* If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. */ + +/* W (output) DOUBLE PRECISION array, dimension (N) */ +/* If INFO = 0, the selected eigenvalues in ascending order. */ + +/* Z (output) DOUBLE PRECISION array, dimension (LDZ, max(1,M)) */ +/* If JOBZ = 'V', then if INFO = 0, the first M columns of Z */ +/* contain the orthonormal eigenvectors of the matrix A */ +/* corresponding to the selected eigenvalues, with the i-th */ +/* column of Z holding the eigenvector associated with W(i). */ +/* If an eigenvector fails to converge, then that column of Z */ +/* contains the latest approximation to the eigenvector, and the */ +/* index of the eigenvector is returned in IFAIL. */ +/* If JOBZ = 'N', then Z is not referenced. */ +/* Note: the user must ensure that at least max(1,M) columns are */ +/* supplied in the array Z; if RANGE = 'V', the exact value of M */ +/* is not known in advance and an upper bound must be used. */ + +/* LDZ (input) INTEGER */ +/* The leading dimension of the array Z. LDZ >= 1, and if */ +/* JOBZ = 'V', LDZ >= max(1,N). */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (8*N) */ + +/* IWORK (workspace) INTEGER array, dimension (5*N) */ + +/* IFAIL (output) INTEGER array, dimension (N) */ +/* If JOBZ = 'V', then if INFO = 0, the first M elements of */ +/* IFAIL are zero. If INFO > 0, then IFAIL contains the */ +/* indices of the eigenvectors that failed to converge. */ +/* If JOBZ = 'N', then IFAIL is not referenced. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > 0: if INFO = i, then i eigenvectors failed to converge. */ +/* Their indices are stored in array IFAIL. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --ap; + --w; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --work; + --iwork; + --ifail; + + /* Function Body */ + wantz = _starpu_lsame_(jobz, "V"); + alleig = _starpu_lsame_(range, "A"); + valeig = _starpu_lsame_(range, "V"); + indeig = _starpu_lsame_(range, "I"); + + *info = 0; + if (! (wantz || _starpu_lsame_(jobz, "N"))) { + *info = -1; + } else if (! (alleig || valeig || indeig)) { + *info = -2; + } else if (! (_starpu_lsame_(uplo, "L") || _starpu_lsame_(uplo, + "U"))) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else { + if (valeig) { + if (*n > 0 && *vu <= *vl) { + *info = -7; + } + } else if (indeig) { + if (*il < 1 || *il > max(1,*n)) { + *info = -8; + } else if (*iu < min(*n,*il) || *iu > *n) { + *info = -9; + } + } + } + if (*info == 0) { + if (*ldz < 1 || wantz && *ldz < *n) { + *info = -14; + } + } + + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DSPEVX", &i__1); + return 0; + } + +/* Quick return if possible */ + + *m = 0; + if (*n == 0) { + return 0; + } + + if (*n == 1) { + if (alleig || indeig) { + *m = 1; + w[1] = ap[1]; + } else { + if (*vl < ap[1] && *vu >= ap[1]) { + *m = 1; + w[1] = ap[1]; + } + } + if (wantz) { + z__[z_dim1 + 1] = 1.; + } + return 0; + } + +/* Get machine constants. */ + + safmin = _starpu_dlamch_("Safe minimum"); + eps = _starpu_dlamch_("Precision"); + smlnum = safmin / eps; + bignum = 1. / smlnum; + rmin = sqrt(smlnum); +/* Computing MIN */ + d__1 = sqrt(bignum), d__2 = 1. / sqrt(sqrt(safmin)); + rmax = min(d__1,d__2); + +/* Scale matrix to allowable range, if necessary. */ + + iscale = 0; + abstll = *abstol; + if (valeig) { + vll = *vl; + vuu = *vu; + } else { + vll = 0.; + vuu = 0.; + } + anrm = _starpu_dlansp_("M", uplo, n, &ap[1], &work[1]); + if (anrm > 0. && anrm < rmin) { + iscale = 1; + sigma = rmin / anrm; + } else if (anrm > rmax) { + iscale = 1; + sigma = rmax / anrm; + } + if (iscale == 1) { + i__1 = *n * (*n + 1) / 2; + _starpu_dscal_(&i__1, &sigma, &ap[1], &c__1); + if (*abstol > 0.) { + abstll = *abstol * sigma; + } + if (valeig) { + vll = *vl * sigma; + vuu = *vu * sigma; + } + } + +/* Call DSPTRD to reduce symmetric packed matrix to tridiagonal form. */ + + indtau = 1; + inde = indtau + *n; + indd = inde + *n; + indwrk = indd + *n; + _starpu_dsptrd_(uplo, n, &ap[1], &work[indd], &work[inde], &work[indtau], &iinfo); + +/* If all eigenvalues are desired and ABSTOL is less than or equal */ +/* to zero, then call DSTERF or DOPGTR and SSTEQR. If this fails */ +/* for some eigenvalue, then try DSTEBZ. */ + + test = FALSE_; + if (indeig) { + if (*il == 1 && *iu == *n) { + test = TRUE_; + } + } + if ((alleig || test) && *abstol <= 0.) { + _starpu_dcopy_(n, &work[indd], &c__1, &w[1], &c__1); + indee = indwrk + (*n << 1); + if (! wantz) { + i__1 = *n - 1; + _starpu_dcopy_(&i__1, &work[inde], &c__1, &work[indee], &c__1); + _starpu_dsterf_(n, &w[1], &work[indee], info); + } else { + _starpu_dopgtr_(uplo, n, &ap[1], &work[indtau], &z__[z_offset], ldz, & + work[indwrk], &iinfo); + i__1 = *n - 1; + _starpu_dcopy_(&i__1, &work[inde], &c__1, &work[indee], &c__1); + _starpu_dsteqr_(jobz, n, &w[1], &work[indee], &z__[z_offset], ldz, &work[ + indwrk], info); + if (*info == 0) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + ifail[i__] = 0; +/* L10: */ + } + } + } + if (*info == 0) { + *m = *n; + goto L20; + } + *info = 0; + } + +/* Otherwise, call DSTEBZ and, if eigenvectors are desired, SSTEIN. */ + + if (wantz) { + *(unsigned char *)order = 'B'; + } else { + *(unsigned char *)order = 'E'; + } + indibl = 1; + indisp = indibl + *n; + indiwo = indisp + *n; + _starpu_dstebz_(range, order, n, &vll, &vuu, il, iu, &abstll, &work[indd], &work[ + inde], m, &nsplit, &w[1], &iwork[indibl], &iwork[indisp], &work[ + indwrk], &iwork[indiwo], info); + + if (wantz) { + _starpu_dstein_(n, &work[indd], &work[inde], m, &w[1], &iwork[indibl], &iwork[ + indisp], &z__[z_offset], ldz, &work[indwrk], &iwork[indiwo], & + ifail[1], info); + +/* Apply orthogonal matrix used in reduction to tridiagonal */ +/* form to eigenvectors returned by DSTEIN. */ + + _starpu_dopmtr_("L", uplo, "N", n, m, &ap[1], &work[indtau], &z__[z_offset], + ldz, &work[indwrk], &iinfo); + } + +/* If matrix was scaled, then rescale eigenvalues appropriately. */ + +L20: + if (iscale == 1) { + if (*info == 0) { + imax = *m; + } else { + imax = *info - 1; + } + d__1 = 1. / sigma; + _starpu_dscal_(&imax, &d__1, &w[1], &c__1); + } + +/* If eigenvalues are not in order, then sort them, along with */ +/* eigenvectors. */ + + if (wantz) { + i__1 = *m - 1; + for (j = 1; j <= i__1; ++j) { + i__ = 0; + tmp1 = w[j]; + i__2 = *m; + for (jj = j + 1; jj <= i__2; ++jj) { + if (w[jj] < tmp1) { + i__ = jj; + tmp1 = w[jj]; + } +/* L30: */ + } + + if (i__ != 0) { + itmp1 = iwork[indibl + i__ - 1]; + w[i__] = w[j]; + iwork[indibl + i__ - 1] = iwork[indibl + j - 1]; + w[j] = tmp1; + iwork[indibl + j - 1] = itmp1; + _starpu_dswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[j * z_dim1 + 1], + &c__1); + if (*info != 0) { + itmp1 = ifail[i__]; + ifail[i__] = ifail[j]; + ifail[j] = itmp1; + } + } +/* L40: */ + } + } + + return 0; + +/* End of DSPEVX */ + +} /* _starpu_dspevx_ */ diff --git a/min-dgels/base/SRC/dspgst.c b/min-dgels/base/SRC/dspgst.c new file mode 100644 index 0000000..08ef44b --- /dev/null +++ b/min-dgels/base/SRC/dspgst.c @@ -0,0 +1,284 @@ +/* dspgst.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static doublereal c_b9 = -1.; +static doublereal c_b11 = 1.; + +/* Subroutine */ int _starpu_dspgst_(integer *itype, char *uplo, integer *n, + doublereal *ap, doublereal *bp, integer *info) +{ + /* System generated locals */ + integer i__1, i__2; + doublereal d__1; + + /* Local variables */ + integer j, k, j1, k1, jj, kk; + doublereal ct, ajj; + integer j1j1; + doublereal akk; + integer k1k1; + doublereal bjj, bkk; + extern doublereal _starpu_ddot_(integer *, doublereal *, integer *, doublereal *, + integer *); + extern /* Subroutine */ int _starpu_dspr2_(char *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, doublereal *), _starpu_dscal_(integer *, doublereal *, doublereal *, integer *); + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int _starpu_daxpy_(integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *), _starpu_dspmv_(char *, integer *, + doublereal *, doublereal *, doublereal *, integer *, doublereal *, + doublereal *, integer *); + logical upper; + extern /* Subroutine */ int _starpu_dtpmv_(char *, char *, char *, integer *, + doublereal *, doublereal *, integer *), + _starpu_dtpsv_(char *, char *, char *, integer *, doublereal *, + doublereal *, integer *), _starpu_xerbla_(char *, + integer *); + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DSPGST reduces a real symmetric-definite generalized eigenproblem */ +/* to standard form, using packed storage. */ + +/* If ITYPE = 1, the problem is A*x = lambda*B*x, */ +/* and A is overwritten by inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T) */ + +/* If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or */ +/* B*A*x = lambda*x, and A is overwritten by U*A*U**T or L**T*A*L. */ + +/* B must have been previously factorized as U**T*U or L*L**T by DPPTRF. */ + +/* Arguments */ +/* ========= */ + +/* ITYPE (input) INTEGER */ +/* = 1: compute inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T); */ +/* = 2 or 3: compute U*A*U**T or L**T*A*L. */ + +/* UPLO (input) CHARACTER*1 */ +/* = 'U': Upper triangle of A is stored and B is factored as */ +/* U**T*U; */ +/* = 'L': Lower triangle of A is stored and B is factored as */ +/* L*L**T. */ + +/* N (input) INTEGER */ +/* The order of the matrices A and B. N >= 0. */ + +/* AP (input/output) DOUBLE PRECISION array, dimension (N*(N+1)/2) */ +/* On entry, the upper or lower triangle of the symmetric matrix */ +/* A, packed columnwise in a linear array. The j-th column of A */ +/* is stored in the array AP as follows: */ +/* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; */ +/* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. */ + +/* On exit, if INFO = 0, the transformed matrix, stored in the */ +/* same format as A. */ + +/* BP (input) DOUBLE PRECISION array, dimension (N*(N+1)/2) */ +/* The triangular factor from the Cholesky factorization of B, */ +/* stored in the same format as A, as returned by DPPTRF. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --bp; + --ap; + + /* Function Body */ + *info = 0; + upper = _starpu_lsame_(uplo, "U"); + if (*itype < 1 || *itype > 3) { + *info = -1; + } else if (! upper && ! _starpu_lsame_(uplo, "L")) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DSPGST", &i__1); + return 0; + } + + if (*itype == 1) { + if (upper) { + +/* Compute inv(U')*A*inv(U) */ + +/* J1 and JJ are the indices of A(1,j) and A(j,j) */ + + jj = 0; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + j1 = jj + 1; + jj += j; + +/* Compute the j-th column of the upper triangle of A */ + + bjj = bp[jj]; + _starpu_dtpsv_(uplo, "Transpose", "Nonunit", &j, &bp[1], &ap[j1], & + c__1); + i__2 = j - 1; + _starpu_dspmv_(uplo, &i__2, &c_b9, &ap[1], &bp[j1], &c__1, &c_b11, & + ap[j1], &c__1); + i__2 = j - 1; + d__1 = 1. / bjj; + _starpu_dscal_(&i__2, &d__1, &ap[j1], &c__1); + i__2 = j - 1; + ap[jj] = (ap[jj] - _starpu_ddot_(&i__2, &ap[j1], &c__1, &bp[j1], & + c__1)) / bjj; +/* L10: */ + } + } else { + +/* Compute inv(L)*A*inv(L') */ + +/* KK and K1K1 are the indices of A(k,k) and A(k+1,k+1) */ + + kk = 1; + i__1 = *n; + for (k = 1; k <= i__1; ++k) { + k1k1 = kk + *n - k + 1; + +/* Update the lower triangle of A(k:n,k:n) */ + + akk = ap[kk]; + bkk = bp[kk]; +/* Computing 2nd power */ + d__1 = bkk; + akk /= d__1 * d__1; + ap[kk] = akk; + if (k < *n) { + i__2 = *n - k; + d__1 = 1. / bkk; + _starpu_dscal_(&i__2, &d__1, &ap[kk + 1], &c__1); + ct = akk * -.5; + i__2 = *n - k; + _starpu_daxpy_(&i__2, &ct, &bp[kk + 1], &c__1, &ap[kk + 1], &c__1) + ; + i__2 = *n - k; + _starpu_dspr2_(uplo, &i__2, &c_b9, &ap[kk + 1], &c__1, &bp[kk + 1] +, &c__1, &ap[k1k1]); + i__2 = *n - k; + _starpu_daxpy_(&i__2, &ct, &bp[kk + 1], &c__1, &ap[kk + 1], &c__1) + ; + i__2 = *n - k; + _starpu_dtpsv_(uplo, "No transpose", "Non-unit", &i__2, &bp[k1k1], + &ap[kk + 1], &c__1); + } + kk = k1k1; +/* L20: */ + } + } + } else { + if (upper) { + +/* Compute U*A*U' */ + +/* K1 and KK are the indices of A(1,k) and A(k,k) */ + + kk = 0; + i__1 = *n; + for (k = 1; k <= i__1; ++k) { + k1 = kk + 1; + kk += k; + +/* Update the upper triangle of A(1:k,1:k) */ + + akk = ap[kk]; + bkk = bp[kk]; + i__2 = k - 1; + _starpu_dtpmv_(uplo, "No transpose", "Non-unit", &i__2, &bp[1], &ap[ + k1], &c__1); + ct = akk * .5; + i__2 = k - 1; + _starpu_daxpy_(&i__2, &ct, &bp[k1], &c__1, &ap[k1], &c__1); + i__2 = k - 1; + _starpu_dspr2_(uplo, &i__2, &c_b11, &ap[k1], &c__1, &bp[k1], &c__1, & + ap[1]); + i__2 = k - 1; + _starpu_daxpy_(&i__2, &ct, &bp[k1], &c__1, &ap[k1], &c__1); + i__2 = k - 1; + _starpu_dscal_(&i__2, &bkk, &ap[k1], &c__1); +/* Computing 2nd power */ + d__1 = bkk; + ap[kk] = akk * (d__1 * d__1); +/* L30: */ + } + } else { + +/* Compute L'*A*L */ + +/* JJ and J1J1 are the indices of A(j,j) and A(j+1,j+1) */ + + jj = 1; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + j1j1 = jj + *n - j + 1; + +/* Compute the j-th column of the lower triangle of A */ + + ajj = ap[jj]; + bjj = bp[jj]; + i__2 = *n - j; + ap[jj] = ajj * bjj + _starpu_ddot_(&i__2, &ap[jj + 1], &c__1, &bp[jj + + 1], &c__1); + i__2 = *n - j; + _starpu_dscal_(&i__2, &bjj, &ap[jj + 1], &c__1); + i__2 = *n - j; + _starpu_dspmv_(uplo, &i__2, &c_b11, &ap[j1j1], &bp[jj + 1], &c__1, & + c_b11, &ap[jj + 1], &c__1); + i__2 = *n - j + 1; + _starpu_dtpmv_(uplo, "Transpose", "Non-unit", &i__2, &bp[jj], &ap[jj], + &c__1); + jj = j1j1; +/* L40: */ + } + } + } + return 0; + +/* End of DSPGST */ + +} /* _starpu_dspgst_ */ diff --git a/min-dgels/base/SRC/dspgv.c b/min-dgels/base/SRC/dspgv.c new file mode 100644 index 0000000..ecf0e3d --- /dev/null +++ b/min-dgels/base/SRC/dspgv.c @@ -0,0 +1,243 @@ +/* dspgv.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +/* Subroutine */ int _starpu_dspgv_(integer *itype, char *jobz, char *uplo, integer * + n, doublereal *ap, doublereal *bp, doublereal *w, doublereal *z__, + integer *ldz, doublereal *work, integer *info) +{ + /* System generated locals */ + integer z_dim1, z_offset, i__1; + + /* Local variables */ + integer j, neig; + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int _starpu_dspev_(char *, char *, integer *, doublereal * +, doublereal *, doublereal *, integer *, doublereal *, integer *); + char trans[1]; + logical upper; + extern /* Subroutine */ int _starpu_dtpmv_(char *, char *, char *, integer *, + doublereal *, doublereal *, integer *), + _starpu_dtpsv_(char *, char *, char *, integer *, doublereal *, + doublereal *, integer *); + logical wantz; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *), _starpu_dpptrf_( + char *, integer *, doublereal *, integer *), _starpu_dspgst_( + integer *, char *, integer *, doublereal *, doublereal *, integer + *); + + +/* -- LAPACK driver routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DSPGV computes all the eigenvalues and, optionally, the eigenvectors */ +/* of a real generalized symmetric-definite eigenproblem, of the form */ +/* A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. */ +/* Here A and B are assumed to be symmetric, stored in packed format, */ +/* and B is also positive definite. */ + +/* Arguments */ +/* ========= */ + +/* ITYPE (input) INTEGER */ +/* Specifies the problem type to be solved: */ +/* = 1: A*x = (lambda)*B*x */ +/* = 2: A*B*x = (lambda)*x */ +/* = 3: B*A*x = (lambda)*x */ + +/* JOBZ (input) CHARACTER*1 */ +/* = 'N': Compute eigenvalues only; */ +/* = 'V': Compute eigenvalues and eigenvectors. */ + +/* UPLO (input) CHARACTER*1 */ +/* = 'U': Upper triangles of A and B are stored; */ +/* = 'L': Lower triangles of A and B are stored. */ + +/* N (input) INTEGER */ +/* The order of the matrices A and B. N >= 0. */ + +/* AP (input/output) DOUBLE PRECISION array, dimension */ +/* (N*(N+1)/2) */ +/* On entry, the upper or lower triangle of the symmetric matrix */ +/* A, packed columnwise in a linear array. The j-th column of A */ +/* is stored in the array AP as follows: */ +/* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; */ +/* if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. */ + +/* On exit, the contents of AP are destroyed. */ + +/* BP (input/output) DOUBLE PRECISION array, dimension (N*(N+1)/2) */ +/* On entry, the upper or lower triangle of the symmetric matrix */ +/* B, packed columnwise in a linear array. The j-th column of B */ +/* is stored in the array BP as follows: */ +/* if UPLO = 'U', BP(i + (j-1)*j/2) = B(i,j) for 1<=i<=j; */ +/* if UPLO = 'L', BP(i + (j-1)*(2*n-j)/2) = B(i,j) for j<=i<=n. */ + +/* On exit, the triangular factor U or L from the Cholesky */ +/* factorization B = U**T*U or B = L*L**T, in the same storage */ +/* format as B. */ + +/* W (output) DOUBLE PRECISION array, dimension (N) */ +/* If INFO = 0, the eigenvalues in ascending order. */ + +/* Z (output) DOUBLE PRECISION array, dimension (LDZ, N) */ +/* If JOBZ = 'V', then if INFO = 0, Z contains the matrix Z of */ +/* eigenvectors. The eigenvectors are normalized as follows: */ +/* if ITYPE = 1 or 2, Z**T*B*Z = I; */ +/* if ITYPE = 3, Z**T*inv(B)*Z = I. */ +/* If JOBZ = 'N', then Z is not referenced. */ + +/* LDZ (input) INTEGER */ +/* The leading dimension of the array Z. LDZ >= 1, and if */ +/* JOBZ = 'V', LDZ >= max(1,N). */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (3*N) */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > 0: DPPTRF or DSPEV returned an error code: */ +/* <= N: if INFO = i, DSPEV failed to converge; */ +/* i off-diagonal elements of an intermediate */ +/* tridiagonal form did not converge to zero. */ +/* > N: if INFO = n + i, for 1 <= i <= n, then the leading */ +/* minor of order i of B is not positive definite. */ +/* The factorization of B could not be completed and */ +/* no eigenvalues or eigenvectors were computed. */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --ap; + --bp; + --w; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --work; + + /* Function Body */ + wantz = _starpu_lsame_(jobz, "V"); + upper = _starpu_lsame_(uplo, "U"); + + *info = 0; + if (*itype < 1 || *itype > 3) { + *info = -1; + } else if (! (wantz || _starpu_lsame_(jobz, "N"))) { + *info = -2; + } else if (! (upper || _starpu_lsame_(uplo, "L"))) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*ldz < 1 || wantz && *ldz < *n) { + *info = -9; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DSPGV ", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Form a Cholesky factorization of B. */ + + _starpu_dpptrf_(uplo, n, &bp[1], info); + if (*info != 0) { + *info = *n + *info; + return 0; + } + +/* Transform problem to standard eigenvalue problem and solve. */ + + _starpu_dspgst_(itype, uplo, n, &ap[1], &bp[1], info); + _starpu_dspev_(jobz, uplo, n, &ap[1], &w[1], &z__[z_offset], ldz, &work[1], info); + + if (wantz) { + +/* Backtransform eigenvectors to the original problem. */ + + neig = *n; + if (*info > 0) { + neig = *info - 1; + } + if (*itype == 1 || *itype == 2) { + +/* For A*x=(lambda)*B*x and A*B*x=(lambda)*x; */ +/* backtransform eigenvectors: x = inv(L)'*y or inv(U)*y */ + + if (upper) { + *(unsigned char *)trans = 'N'; + } else { + *(unsigned char *)trans = 'T'; + } + + i__1 = neig; + for (j = 1; j <= i__1; ++j) { + _starpu_dtpsv_(uplo, trans, "Non-unit", n, &bp[1], &z__[j * z_dim1 + + 1], &c__1); +/* L10: */ + } + + } else if (*itype == 3) { + +/* For B*A*x=(lambda)*x; */ +/* backtransform eigenvectors: x = L*y or U'*y */ + + if (upper) { + *(unsigned char *)trans = 'T'; + } else { + *(unsigned char *)trans = 'N'; + } + + i__1 = neig; + for (j = 1; j <= i__1; ++j) { + _starpu_dtpmv_(uplo, trans, "Non-unit", n, &bp[1], &z__[j * z_dim1 + + 1], &c__1); +/* L20: */ + } + } + } + return 0; + +/* End of DSPGV */ + +} /* _starpu_dspgv_ */ diff --git a/min-dgels/base/SRC/dspgvd.c b/min-dgels/base/SRC/dspgvd.c new file mode 100644 index 0000000..a260406 --- /dev/null +++ b/min-dgels/base/SRC/dspgvd.c @@ -0,0 +1,334 @@ +/* dspgvd.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +/* Subroutine */ int _starpu_dspgvd_(integer *itype, char *jobz, char *uplo, integer * + n, doublereal *ap, doublereal *bp, doublereal *w, doublereal *z__, + integer *ldz, doublereal *work, integer *lwork, integer *iwork, + integer *liwork, integer *info) +{ + /* System generated locals */ + integer z_dim1, z_offset, i__1; + doublereal d__1, d__2; + + /* Local variables */ + integer j, neig; + extern logical _starpu_lsame_(char *, char *); + integer lwmin; + char trans[1]; + logical upper; + extern /* Subroutine */ int _starpu_dtpmv_(char *, char *, char *, integer *, + doublereal *, doublereal *, integer *), + _starpu_dtpsv_(char *, char *, char *, integer *, doublereal *, + doublereal *, integer *); + logical wantz; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *), _starpu_dspevd_( + char *, char *, integer *, doublereal *, doublereal *, doublereal + *, integer *, doublereal *, integer *, integer *, integer *, + integer *); + integer liwmin; + extern /* Subroutine */ int _starpu_dpptrf_(char *, integer *, doublereal *, + integer *), _starpu_dspgst_(integer *, char *, integer *, + doublereal *, doublereal *, integer *); + logical lquery; + + +/* -- LAPACK driver routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DSPGVD computes all the eigenvalues, and optionally, the eigenvectors */ +/* of a real generalized symmetric-definite eigenproblem, of the form */ +/* A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. Here A and */ +/* B are assumed to be symmetric, stored in packed format, and B is also */ +/* positive definite. */ +/* If eigenvectors are desired, it uses a divide and conquer algorithm. */ + +/* The divide and conquer algorithm makes very mild assumptions about */ +/* floating point arithmetic. It will work on machines with a guard */ +/* digit in add/subtract, or on those binary machines without guard */ +/* digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or */ +/* Cray-2. It could conceivably fail on hexadecimal or decimal machines */ +/* without guard digits, but we know of none. */ + +/* Arguments */ +/* ========= */ + +/* ITYPE (input) INTEGER */ +/* Specifies the problem type to be solved: */ +/* = 1: A*x = (lambda)*B*x */ +/* = 2: A*B*x = (lambda)*x */ +/* = 3: B*A*x = (lambda)*x */ + +/* JOBZ (input) CHARACTER*1 */ +/* = 'N': Compute eigenvalues only; */ +/* = 'V': Compute eigenvalues and eigenvectors. */ + +/* UPLO (input) CHARACTER*1 */ +/* = 'U': Upper triangles of A and B are stored; */ +/* = 'L': Lower triangles of A and B are stored. */ + +/* N (input) INTEGER */ +/* The order of the matrices A and B. N >= 0. */ + +/* AP (input/output) DOUBLE PRECISION array, dimension (N*(N+1)/2) */ +/* On entry, the upper or lower triangle of the symmetric matrix */ +/* A, packed columnwise in a linear array. The j-th column of A */ +/* is stored in the array AP as follows: */ +/* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; */ +/* if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. */ + +/* On exit, the contents of AP are destroyed. */ + +/* BP (input/output) DOUBLE PRECISION array, dimension (N*(N+1)/2) */ +/* On entry, the upper or lower triangle of the symmetric matrix */ +/* B, packed columnwise in a linear array. The j-th column of B */ +/* is stored in the array BP as follows: */ +/* if UPLO = 'U', BP(i + (j-1)*j/2) = B(i,j) for 1<=i<=j; */ +/* if UPLO = 'L', BP(i + (j-1)*(2*n-j)/2) = B(i,j) for j<=i<=n. */ + +/* On exit, the triangular factor U or L from the Cholesky */ +/* factorization B = U**T*U or B = L*L**T, in the same storage */ +/* format as B. */ + +/* W (output) DOUBLE PRECISION array, dimension (N) */ +/* If INFO = 0, the eigenvalues in ascending order. */ + +/* Z (output) DOUBLE PRECISION array, dimension (LDZ, N) */ +/* If JOBZ = 'V', then if INFO = 0, Z contains the matrix Z of */ +/* eigenvectors. The eigenvectors are normalized as follows: */ +/* if ITYPE = 1 or 2, Z**T*B*Z = I; */ +/* if ITYPE = 3, Z**T*inv(B)*Z = I. */ +/* If JOBZ = 'N', then Z is not referenced. */ + +/* LDZ (input) INTEGER */ +/* The leading dimension of the array Z. LDZ >= 1, and if */ +/* JOBZ = 'V', LDZ >= max(1,N). */ + +/* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* On exit, if INFO = 0, WORK(1) returns the required LWORK. */ + +/* LWORK (input) INTEGER */ +/* The dimension of the array WORK. */ +/* If N <= 1, LWORK >= 1. */ +/* If JOBZ = 'N' and N > 1, LWORK >= 2*N. */ +/* If JOBZ = 'V' and N > 1, LWORK >= 1 + 6*N + 2*N**2. */ + +/* If LWORK = -1, then a workspace query is assumed; the routine */ +/* only calculates the required sizes of the WORK and IWORK */ +/* arrays, returns these values as the first entries of the WORK */ +/* and IWORK arrays, and no error message related to LWORK or */ +/* LIWORK is issued by XERBLA. */ + +/* IWORK (workspace/output) INTEGER array, dimension (MAX(1,LIWORK)) */ +/* On exit, if INFO = 0, IWORK(1) returns the required LIWORK. */ + +/* LIWORK (input) INTEGER */ +/* The dimension of the array IWORK. */ +/* If JOBZ = 'N' or N <= 1, LIWORK >= 1. */ +/* If JOBZ = 'V' and N > 1, LIWORK >= 3 + 5*N. */ + +/* If LIWORK = -1, then a workspace query is assumed; the */ +/* routine only calculates the required sizes of the WORK and */ +/* IWORK arrays, returns these values as the first entries of */ +/* the WORK and IWORK arrays, and no error message related to */ +/* LWORK or LIWORK is issued by XERBLA. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > 0: DPPTRF or DSPEVD returned an error code: */ +/* <= N: if INFO = i, DSPEVD failed to converge; */ +/* i off-diagonal elements of an intermediate */ +/* tridiagonal form did not converge to zero; */ +/* > N: if INFO = N + i, for 1 <= i <= N, then the leading */ +/* minor of order i of B is not positive definite. */ +/* The factorization of B could not be completed and */ +/* no eigenvalues or eigenvectors were computed. */ + +/* Further Details */ +/* =============== */ + +/* Based on contributions by */ +/* Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --ap; + --bp; + --w; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --work; + --iwork; + + /* Function Body */ + wantz = _starpu_lsame_(jobz, "V"); + upper = _starpu_lsame_(uplo, "U"); + lquery = *lwork == -1 || *liwork == -1; + + *info = 0; + if (*itype < 1 || *itype > 3) { + *info = -1; + } else if (! (wantz || _starpu_lsame_(jobz, "N"))) { + *info = -2; + } else if (! (upper || _starpu_lsame_(uplo, "L"))) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*ldz < 1 || wantz && *ldz < *n) { + *info = -9; + } + + if (*info == 0) { + if (*n <= 1) { + liwmin = 1; + lwmin = 1; + } else { + if (wantz) { + liwmin = *n * 5 + 3; +/* Computing 2nd power */ + i__1 = *n; + lwmin = *n * 6 + 1 + (i__1 * i__1 << 1); + } else { + liwmin = 1; + lwmin = *n << 1; + } + } + work[1] = (doublereal) lwmin; + iwork[1] = liwmin; + + if (*lwork < lwmin && ! lquery) { + *info = -11; + } else if (*liwork < liwmin && ! lquery) { + *info = -13; + } + } + + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DSPGVD", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Form a Cholesky factorization of BP. */ + + _starpu_dpptrf_(uplo, n, &bp[1], info); + if (*info != 0) { + *info = *n + *info; + return 0; + } + +/* Transform problem to standard eigenvalue problem and solve. */ + + _starpu_dspgst_(itype, uplo, n, &ap[1], &bp[1], info); + _starpu_dspevd_(jobz, uplo, n, &ap[1], &w[1], &z__[z_offset], ldz, &work[1], + lwork, &iwork[1], liwork, info); +/* Computing MAX */ + d__1 = (doublereal) lwmin; + lwmin = (integer) max(d__1,work[1]); +/* Computing MAX */ + d__1 = (doublereal) liwmin, d__2 = (doublereal) iwork[1]; + liwmin = (integer) max(d__1,d__2); + + if (wantz) { + +/* Backtransform eigenvectors to the original problem. */ + + neig = *n; + if (*info > 0) { + neig = *info - 1; + } + if (*itype == 1 || *itype == 2) { + +/* For A*x=(lambda)*B*x and A*B*x=(lambda)*x; */ +/* backtransform eigenvectors: x = inv(L)'*y or inv(U)*y */ + + if (upper) { + *(unsigned char *)trans = 'N'; + } else { + *(unsigned char *)trans = 'T'; + } + + i__1 = neig; + for (j = 1; j <= i__1; ++j) { + _starpu_dtpsv_(uplo, trans, "Non-unit", n, &bp[1], &z__[j * z_dim1 + + 1], &c__1); +/* L10: */ + } + + } else if (*itype == 3) { + +/* For B*A*x=(lambda)*x; */ +/* backtransform eigenvectors: x = L*y or U'*y */ + + if (upper) { + *(unsigned char *)trans = 'T'; + } else { + *(unsigned char *)trans = 'N'; + } + + i__1 = neig; + for (j = 1; j <= i__1; ++j) { + _starpu_dtpmv_(uplo, trans, "Non-unit", n, &bp[1], &z__[j * z_dim1 + + 1], &c__1); +/* L20: */ + } + } + } + + work[1] = (doublereal) lwmin; + iwork[1] = liwmin; + + return 0; + +/* End of DSPGVD */ + +} /* _starpu_dspgvd_ */ diff --git a/min-dgels/base/SRC/dspgvx.c b/min-dgels/base/SRC/dspgvx.c new file mode 100644 index 0000000..e8b3da0 --- /dev/null +++ b/min-dgels/base/SRC/dspgvx.c @@ -0,0 +1,341 @@ +/* dspgvx.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +/* Subroutine */ int _starpu_dspgvx_(integer *itype, char *jobz, char *range, char * + uplo, integer *n, doublereal *ap, doublereal *bp, doublereal *vl, + doublereal *vu, integer *il, integer *iu, doublereal *abstol, integer + *m, doublereal *w, doublereal *z__, integer *ldz, doublereal *work, + integer *iwork, integer *ifail, integer *info) +{ + /* System generated locals */ + integer z_dim1, z_offset, i__1; + + /* Local variables */ + integer j; + extern logical _starpu_lsame_(char *, char *); + char trans[1]; + logical upper; + extern /* Subroutine */ int _starpu_dtpmv_(char *, char *, char *, integer *, + doublereal *, doublereal *, integer *), + _starpu_dtpsv_(char *, char *, char *, integer *, doublereal *, + doublereal *, integer *); + logical wantz, alleig, indeig, valeig; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *), _starpu_dpptrf_( + char *, integer *, doublereal *, integer *), _starpu_dspgst_( + integer *, char *, integer *, doublereal *, doublereal *, integer + *), _starpu_dspevx_(char *, char *, char *, integer *, doublereal + *, doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, integer *, integer *); + + +/* -- LAPACK driver routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DSPGVX computes selected eigenvalues, and optionally, eigenvectors */ +/* of a real generalized symmetric-definite eigenproblem, of the form */ +/* A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. Here A */ +/* and B are assumed to be symmetric, stored in packed storage, and B */ +/* is also positive definite. Eigenvalues and eigenvectors can be */ +/* selected by specifying either a range of values or a range of indices */ +/* for the desired eigenvalues. */ + +/* Arguments */ +/* ========= */ + +/* ITYPE (input) INTEGER */ +/* Specifies the problem type to be solved: */ +/* = 1: A*x = (lambda)*B*x */ +/* = 2: A*B*x = (lambda)*x */ +/* = 3: B*A*x = (lambda)*x */ + +/* JOBZ (input) CHARACTER*1 */ +/* = 'N': Compute eigenvalues only; */ +/* = 'V': Compute eigenvalues and eigenvectors. */ + +/* RANGE (input) CHARACTER*1 */ +/* = 'A': all eigenvalues will be found. */ +/* = 'V': all eigenvalues in the half-open interval (VL,VU] */ +/* will be found. */ +/* = 'I': the IL-th through IU-th eigenvalues will be found. */ + +/* UPLO (input) CHARACTER*1 */ +/* = 'U': Upper triangle of A and B are stored; */ +/* = 'L': Lower triangle of A and B are stored. */ + +/* N (input) INTEGER */ +/* The order of the matrix pencil (A,B). N >= 0. */ + +/* AP (input/output) DOUBLE PRECISION array, dimension (N*(N+1)/2) */ +/* On entry, the upper or lower triangle of the symmetric matrix */ +/* A, packed columnwise in a linear array. The j-th column of A */ +/* is stored in the array AP as follows: */ +/* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; */ +/* if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. */ + +/* On exit, the contents of AP are destroyed. */ + +/* BP (input/output) DOUBLE PRECISION array, dimension (N*(N+1)/2) */ +/* On entry, the upper or lower triangle of the symmetric matrix */ +/* B, packed columnwise in a linear array. The j-th column of B */ +/* is stored in the array BP as follows: */ +/* if UPLO = 'U', BP(i + (j-1)*j/2) = B(i,j) for 1<=i<=j; */ +/* if UPLO = 'L', BP(i + (j-1)*(2*n-j)/2) = B(i,j) for j<=i<=n. */ + +/* On exit, the triangular factor U or L from the Cholesky */ +/* factorization B = U**T*U or B = L*L**T, in the same storage */ +/* format as B. */ + +/* VL (input) DOUBLE PRECISION */ +/* VU (input) DOUBLE PRECISION */ +/* If RANGE='V', the lower and upper bounds of the interval to */ +/* be searched for eigenvalues. VL < VU. */ +/* Not referenced if RANGE = 'A' or 'I'. */ + +/* IL (input) INTEGER */ +/* IU (input) INTEGER */ +/* If RANGE='I', the indices (in ascending order) of the */ +/* smallest and largest eigenvalues to be returned. */ +/* 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. */ +/* Not referenced if RANGE = 'A' or 'V'. */ + +/* ABSTOL (input) DOUBLE PRECISION */ +/* The absolute error tolerance for the eigenvalues. */ +/* An approximate eigenvalue is accepted as converged */ +/* when it is determined to lie in an interval [a,b] */ +/* of width less than or equal to */ + +/* ABSTOL + EPS * max( |a|,|b| ) , */ + +/* where EPS is the machine precision. If ABSTOL is less than */ +/* or equal to zero, then EPS*|T| will be used in its place, */ +/* where |T| is the 1-norm of the tridiagonal matrix obtained */ +/* by reducing A to tridiagonal form. */ + +/* Eigenvalues will be computed most accurately when ABSTOL is */ +/* set to twice the underflow threshold 2*DLAMCH('S'), not zero. */ +/* If this routine returns with INFO>0, indicating that some */ +/* eigenvectors did not converge, try setting ABSTOL to */ +/* 2*DLAMCH('S'). */ + +/* M (output) INTEGER */ +/* The total number of eigenvalues found. 0 <= M <= N. */ +/* If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. */ + +/* W (output) DOUBLE PRECISION array, dimension (N) */ +/* On normal exit, the first M elements contain the selected */ +/* eigenvalues in ascending order. */ + +/* Z (output) DOUBLE PRECISION array, dimension (LDZ, max(1,M)) */ +/* If JOBZ = 'N', then Z is not referenced. */ +/* If JOBZ = 'V', then if INFO = 0, the first M columns of Z */ +/* contain the orthonormal eigenvectors of the matrix A */ +/* corresponding to the selected eigenvalues, with the i-th */ +/* column of Z holding the eigenvector associated with W(i). */ +/* The eigenvectors are normalized as follows: */ +/* if ITYPE = 1 or 2, Z**T*B*Z = I; */ +/* if ITYPE = 3, Z**T*inv(B)*Z = I. */ + +/* If an eigenvector fails to converge, then that column of Z */ +/* contains the latest approximation to the eigenvector, and the */ +/* index of the eigenvector is returned in IFAIL. */ +/* Note: the user must ensure that at least max(1,M) columns are */ +/* supplied in the array Z; if RANGE = 'V', the exact value of M */ +/* is not known in advance and an upper bound must be used. */ + +/* LDZ (input) INTEGER */ +/* The leading dimension of the array Z. LDZ >= 1, and if */ +/* JOBZ = 'V', LDZ >= max(1,N). */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (8*N) */ + +/* IWORK (workspace) INTEGER array, dimension (5*N) */ + +/* IFAIL (output) INTEGER array, dimension (N) */ +/* If JOBZ = 'V', then if INFO = 0, the first M elements of */ +/* IFAIL are zero. If INFO > 0, then IFAIL contains the */ +/* indices of the eigenvectors that failed to converge. */ +/* If JOBZ = 'N', then IFAIL is not referenced. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > 0: DPPTRF or DSPEVX returned an error code: */ +/* <= N: if INFO = i, DSPEVX failed to converge; */ +/* i eigenvectors failed to converge. Their indices */ +/* are stored in array IFAIL. */ +/* > N: if INFO = N + i, for 1 <= i <= N, then the leading */ +/* minor of order i of B is not positive definite. */ +/* The factorization of B could not be completed and */ +/* no eigenvalues or eigenvectors were computed. */ + +/* Further Details */ +/* =============== */ + +/* Based on contributions by */ +/* Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --ap; + --bp; + --w; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --work; + --iwork; + --ifail; + + /* Function Body */ + upper = _starpu_lsame_(uplo, "U"); + wantz = _starpu_lsame_(jobz, "V"); + alleig = _starpu_lsame_(range, "A"); + valeig = _starpu_lsame_(range, "V"); + indeig = _starpu_lsame_(range, "I"); + + *info = 0; + if (*itype < 1 || *itype > 3) { + *info = -1; + } else if (! (wantz || _starpu_lsame_(jobz, "N"))) { + *info = -2; + } else if (! (alleig || valeig || indeig)) { + *info = -3; + } else if (! (upper || _starpu_lsame_(uplo, "L"))) { + *info = -4; + } else if (*n < 0) { + *info = -5; + } else { + if (valeig) { + if (*n > 0 && *vu <= *vl) { + *info = -9; + } + } else if (indeig) { + if (*il < 1) { + *info = -10; + } else if (*iu < min(*n,*il) || *iu > *n) { + *info = -11; + } + } + } + if (*info == 0) { + if (*ldz < 1 || wantz && *ldz < *n) { + *info = -16; + } + } + + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DSPGVX", &i__1); + return 0; + } + +/* Quick return if possible */ + + *m = 0; + if (*n == 0) { + return 0; + } + +/* Form a Cholesky factorization of B. */ + + _starpu_dpptrf_(uplo, n, &bp[1], info); + if (*info != 0) { + *info = *n + *info; + return 0; + } + +/* Transform problem to standard eigenvalue problem and solve. */ + + _starpu_dspgst_(itype, uplo, n, &ap[1], &bp[1], info); + _starpu_dspevx_(jobz, range, uplo, n, &ap[1], vl, vu, il, iu, abstol, m, &w[1], & + z__[z_offset], ldz, &work[1], &iwork[1], &ifail[1], info); + + if (wantz) { + +/* Backtransform eigenvectors to the original problem. */ + + if (*info > 0) { + *m = *info - 1; + } + if (*itype == 1 || *itype == 2) { + +/* For A*x=(lambda)*B*x and A*B*x=(lambda)*x; */ +/* backtransform eigenvectors: x = inv(L)'*y or inv(U)*y */ + + if (upper) { + *(unsigned char *)trans = 'N'; + } else { + *(unsigned char *)trans = 'T'; + } + + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + _starpu_dtpsv_(uplo, trans, "Non-unit", n, &bp[1], &z__[j * z_dim1 + + 1], &c__1); +/* L10: */ + } + + } else if (*itype == 3) { + +/* For B*A*x=(lambda)*x; */ +/* backtransform eigenvectors: x = L*y or U'*y */ + + if (upper) { + *(unsigned char *)trans = 'T'; + } else { + *(unsigned char *)trans = 'N'; + } + + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + _starpu_dtpmv_(uplo, trans, "Non-unit", n, &bp[1], &z__[j * z_dim1 + + 1], &c__1); +/* L20: */ + } + } + } + + return 0; + +/* End of DSPGVX */ + +} /* _starpu_dspgvx_ */ diff --git a/min-dgels/base/SRC/dsposv.c b/min-dgels/base/SRC/dsposv.c new file mode 100644 index 0000000..61383f2 --- /dev/null +++ b/min-dgels/base/SRC/dsposv.c @@ -0,0 +1,418 @@ +/* dsposv.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static doublereal c_b10 = -1.; +static doublereal c_b11 = 1.; +static integer c__1 = 1; + +/* Subroutine */ int _starpu__starpu_dsposv_(char *uplo, integer *n, integer *nrhs, + doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal * + x, integer *ldx, doublereal *work, real *swork, integer *iter, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, work_dim1, work_offset, + x_dim1, x_offset, i__1; + doublereal d__1; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + integer i__; + doublereal cte, eps, anrm; + integer ptsa; + doublereal rnrm, xnrm; + integer ptsx; + extern logical _starpu_lsame_(char *, char *); + integer iiter; + extern /* Subroutine */ int _starpu_daxpy_(integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *), _starpu_dsymm_(char *, char *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_dlag2s_(integer *, integer *, doublereal *, + integer *, real *, integer *, integer *), _starpu_slag2d_(integer *, + integer *, real *, integer *, doublereal *, integer *, integer *), + _starpu_dlat2s_(char *, integer *, doublereal *, integer *, real *, + integer *, integer *); + extern doublereal _starpu_dlamch_(char *); + extern integer _starpu_idamax_(integer *, doublereal *, integer *); + extern /* Subroutine */ int _starpu_dlacpy_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *), + _starpu_xerbla_(char *, integer *); + extern doublereal _starpu_dlansy_(char *, char *, integer *, doublereal *, + integer *, doublereal *); + extern /* Subroutine */ int _starpu_dpotrf_(char *, integer *, doublereal *, + integer *, integer *), _starpu_dpotrs_(char *, integer *, integer + *, doublereal *, integer *, doublereal *, integer *, integer *), _starpu_spotrf_(char *, integer *, real *, integer *, integer *), _starpu_spotrs_(char *, integer *, integer *, real *, integer *, + real *, integer *, integer *); + + +/* -- LAPACK PROTOTYPE driver routine (version 3.1.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd.. */ +/* May 2007 */ + +/* .. */ +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DSPOSV computes the solution to a real system of linear equations */ +/* A * X = B, */ +/* where A is an N-by-N symmetric positive definite matrix and X and B */ +/* are N-by-NRHS matrices. */ + +/* DSPOSV first attempts to factorize the matrix in SINGLE PRECISION */ +/* and use this factorization within an iterative refinement procedure */ +/* to produce a solution with DOUBLE PRECISION normwise backward error */ +/* quality (see below). If the approach fails the method switches to a */ +/* DOUBLE PRECISION factorization and solve. */ + +/* The iterative refinement is not going to be a winning strategy if */ +/* the ratio SINGLE PRECISION performance over DOUBLE PRECISION */ +/* performance is too small. A reasonable strategy should take the */ +/* number of right-hand sides and the size of the matrix into account. */ +/* This might be done with a call to ILAENV in the future. Up to now, we */ +/* always try iterative refinement. */ + +/* The iterative refinement process is stopped if */ +/* ITER > ITERMAX */ +/* or for all the RHS we have: */ +/* RNRM < SQRT(N)*XNRM*ANRM*EPS*BWDMAX */ +/* where */ +/* o ITER is the number of the current iteration in the iterative */ +/* refinement process */ +/* o RNRM is the infinity-norm of the residual */ +/* o XNRM is the infinity-norm of the solution */ +/* o ANRM is the infinity-operator-norm of the matrix A */ +/* o EPS is the machine epsilon returned by DLAMCH('Epsilon') */ +/* The value ITERMAX and BWDMAX are fixed to 30 and 1.0D+00 */ +/* respectively. */ + +/* Arguments */ +/* ========= */ + +/* UPLO (input) CHARACTER */ +/* = 'U': Upper triangle of A is stored; */ +/* = 'L': Lower triangle of A is stored. */ + +/* N (input) INTEGER */ +/* The number of linear equations, i.e., the order of the */ +/* matrix A. N >= 0. */ + +/* NRHS (input) INTEGER */ +/* The number of right hand sides, i.e., the number of columns */ +/* of the matrix B. NRHS >= 0. */ + +/* A (input or input/ouptut) DOUBLE PRECISION array, */ +/* dimension (LDA,N) */ +/* On entry, the symmetric matrix A. If UPLO = 'U', the leading */ +/* N-by-N upper triangular part of A contains the upper */ +/* triangular part of the matrix A, and the strictly lower */ +/* triangular part of A is not referenced. If UPLO = 'L', the */ +/* leading N-by-N lower triangular part of A contains the lower */ +/* triangular part of the matrix A, and the strictly upper */ +/* triangular part of A is not referenced. */ +/* On exit, if iterative refinement has been successfully used */ +/* (INFO.EQ.0 and ITER.GE.0, see description below), then A is */ +/* unchanged, if double precision factorization has been used */ +/* (INFO.EQ.0 and ITER.LT.0, see description below), then the */ +/* array A contains the factor U or L from the Cholesky */ +/* factorization A = U**T*U or A = L*L**T. */ + + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,N). */ + +/* B (input) DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* The N-by-NRHS right hand side matrix B. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the array B. LDB >= max(1,N). */ + +/* X (output) DOUBLE PRECISION array, dimension (LDX,NRHS) */ +/* If INFO = 0, the N-by-NRHS solution matrix X. */ + +/* LDX (input) INTEGER */ +/* The leading dimension of the array X. LDX >= max(1,N). */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (N*NRHS) */ +/* This array is used to hold the residual vectors. */ + +/* SWORK (workspace) REAL array, dimension (N*(N+NRHS)) */ +/* This array is used to use the single precision matrix and the */ +/* right-hand sides or solutions in single precision. */ + +/* ITER (output) INTEGER */ +/* < 0: iterative refinement has failed, double precision */ +/* factorization has been performed */ +/* -1 : the routine fell back to full precision for */ +/* implementation- or machine-specific reasons */ +/* -2 : narrowing the precision induced an overflow, */ +/* the routine fell back to full precision */ +/* -3 : failure of SPOTRF */ +/* -31: stop the iterative refinement after the 30th */ +/* iterations */ +/* > 0: iterative refinement has been sucessfully used. */ +/* Returns the number of iterations */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > 0: if INFO = i, the leading minor of order i of (DOUBLE */ +/* PRECISION) A is not positive definite, so the */ +/* factorization could not be completed, and the solution */ +/* has not been computed. */ + +/* ========= */ + +/* .. Parameters .. */ + + + + +/* .. Local Scalars .. */ + +/* .. External Subroutines .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + work_dim1 = *n; + work_offset = 1 + work_dim1; + work -= work_offset; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + x_dim1 = *ldx; + x_offset = 1 + x_dim1; + x -= x_offset; + --swork; + + /* Function Body */ + *info = 0; + *iter = 0; + +/* Test the input parameters. */ + + if (! _starpu_lsame_(uplo, "U") && ! _starpu_lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } else if (*ldb < max(1,*n)) { + *info = -7; + } else if (*ldx < max(1,*n)) { + *info = -9; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DSPOSV", &i__1); + return 0; + } + +/* Quick return if (N.EQ.0). */ + + if (*n == 0) { + return 0; + } + +/* Skip single precision iterative refinement if a priori slower */ +/* than double precision factorization. */ + + if (FALSE_) { + *iter = -1; + goto L40; + } + +/* Compute some constants. */ + + anrm = _starpu_dlansy_("I", uplo, n, &a[a_offset], lda, &work[work_offset]); + eps = _starpu_dlamch_("Epsilon"); + cte = anrm * eps * sqrt((doublereal) (*n)) * 1.; + +/* Set the indices PTSA, PTSX for referencing SA and SX in SWORK. */ + + ptsa = 1; + ptsx = ptsa + *n * *n; + +/* Convert B from double precision to single precision and store the */ +/* result in SX. */ + + _starpu_dlag2s_(n, nrhs, &b[b_offset], ldb, &swork[ptsx], n, info); + + if (*info != 0) { + *iter = -2; + goto L40; + } + +/* Convert A from double precision to single precision and store the */ +/* result in SA. */ + + _starpu_dlat2s_(uplo, n, &a[a_offset], lda, &swork[ptsa], n, info); + + if (*info != 0) { + *iter = -2; + goto L40; + } + +/* Compute the Cholesky factorization of SA. */ + + _starpu_spotrf_(uplo, n, &swork[ptsa], n, info); + + if (*info != 0) { + *iter = -3; + goto L40; + } + +/* Solve the system SA*SX = SB. */ + + _starpu_spotrs_(uplo, n, nrhs, &swork[ptsa], n, &swork[ptsx], n, info); + +/* Convert SX back to double precision */ + + _starpu_slag2d_(n, nrhs, &swork[ptsx], n, &x[x_offset], ldx, info); + +/* Compute R = B - AX (R is WORK). */ + + _starpu_dlacpy_("All", n, nrhs, &b[b_offset], ldb, &work[work_offset], n); + + _starpu_dsymm_("Left", uplo, n, nrhs, &c_b10, &a[a_offset], lda, &x[x_offset], + ldx, &c_b11, &work[work_offset], n); + +/* Check whether the NRHS normwise backward errors satisfy the */ +/* stopping criterion. If yes, set ITER=0 and return. */ + + i__1 = *nrhs; + for (i__ = 1; i__ <= i__1; ++i__) { + xnrm = (d__1 = x[_starpu_idamax_(n, &x[i__ * x_dim1 + 1], &c__1) + i__ * + x_dim1], abs(d__1)); + rnrm = (d__1 = work[_starpu_idamax_(n, &work[i__ * work_dim1 + 1], &c__1) + + i__ * work_dim1], abs(d__1)); + if (rnrm > xnrm * cte) { + goto L10; + } + } + +/* If we are here, the NRHS normwise backward errors satisfy the */ +/* stopping criterion. We are good to exit. */ + + *iter = 0; + return 0; + +L10: + + for (iiter = 1; iiter <= 30; ++iiter) { + +/* Convert R (in WORK) from double precision to single precision */ +/* and store the result in SX. */ + + _starpu_dlag2s_(n, nrhs, &work[work_offset], n, &swork[ptsx], n, info); + + if (*info != 0) { + *iter = -2; + goto L40; + } + +/* Solve the system SA*SX = SR. */ + + _starpu_spotrs_(uplo, n, nrhs, &swork[ptsa], n, &swork[ptsx], n, info); + +/* Convert SX back to double precision and update the current */ +/* iterate. */ + + _starpu_slag2d_(n, nrhs, &swork[ptsx], n, &work[work_offset], n, info); + + i__1 = *nrhs; + for (i__ = 1; i__ <= i__1; ++i__) { + _starpu_daxpy_(n, &c_b11, &work[i__ * work_dim1 + 1], &c__1, &x[i__ * + x_dim1 + 1], &c__1); + } + +/* Compute R = B - AX (R is WORK). */ + + _starpu_dlacpy_("All", n, nrhs, &b[b_offset], ldb, &work[work_offset], n); + + _starpu_dsymm_("L", uplo, n, nrhs, &c_b10, &a[a_offset], lda, &x[x_offset], + ldx, &c_b11, &work[work_offset], n); + +/* Check whether the NRHS normwise backward errors satisfy the */ +/* stopping criterion. If yes, set ITER=IITER>0 and return. */ + + i__1 = *nrhs; + for (i__ = 1; i__ <= i__1; ++i__) { + xnrm = (d__1 = x[_starpu_idamax_(n, &x[i__ * x_dim1 + 1], &c__1) + i__ * + x_dim1], abs(d__1)); + rnrm = (d__1 = work[_starpu_idamax_(n, &work[i__ * work_dim1 + 1], &c__1) + + i__ * work_dim1], abs(d__1)); + if (rnrm > xnrm * cte) { + goto L20; + } + } + +/* If we are here, the NRHS normwise backward errors satisfy the */ +/* stopping criterion, we are good to exit. */ + + *iter = iiter; + + return 0; + +L20: + +/* L30: */ + ; + } + +/* If we are at this place of the code, this is because we have */ +/* performed ITER=ITERMAX iterations and never satisified the */ +/* stopping criterion, set up the ITER flag accordingly and follow */ +/* up on double precision routine. */ + + *iter = -31; + +L40: + +/* Single-precision iterative refinement failed to converge to a */ +/* satisfactory solution, so we resort to double precision. */ + + _starpu_dpotrf_(uplo, n, &a[a_offset], lda, info); + + if (*info != 0) { + return 0; + } + + _starpu_dlacpy_("All", n, nrhs, &b[b_offset], ldb, &x[x_offset], ldx); + _starpu_dpotrs_(uplo, n, nrhs, &a[a_offset], lda, &x[x_offset], ldx, info); + + return 0; + +/* End of DSPOSV. */ + +} /* _starpu__starpu_dsposv_ */ diff --git a/min-dgels/base/SRC/dsprfs.c b/min-dgels/base/SRC/dsprfs.c new file mode 100644 index 0000000..216871e --- /dev/null +++ b/min-dgels/base/SRC/dsprfs.c @@ -0,0 +1,421 @@ +/* dsprfs.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static doublereal c_b12 = -1.; +static doublereal c_b14 = 1.; + +/* Subroutine */ int _starpu_dsprfs_(char *uplo, integer *n, integer *nrhs, + doublereal *ap, doublereal *afp, integer *ipiv, doublereal *b, + integer *ldb, doublereal *x, integer *ldx, doublereal *ferr, + doublereal *berr, doublereal *work, integer *iwork, integer *info) +{ + /* System generated locals */ + integer b_dim1, b_offset, x_dim1, x_offset, i__1, i__2, i__3; + doublereal d__1, d__2, d__3; + + /* Local variables */ + integer i__, j, k; + doublereal s; + integer ik, kk; + doublereal xk; + integer nz; + doublereal eps; + integer kase; + doublereal safe1, safe2; + extern logical _starpu_lsame_(char *, char *); + integer isave[3]; + extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *), _starpu_daxpy_(integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *); + integer count; + extern /* Subroutine */ int _starpu_dspmv_(char *, integer *, doublereal *, + doublereal *, doublereal *, integer *, doublereal *, doublereal *, + integer *); + logical upper; + extern /* Subroutine */ int _starpu_dlacn2_(integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *); + extern doublereal _starpu_dlamch_(char *); + doublereal safmin; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + doublereal lstres; + extern /* Subroutine */ int _starpu_dsptrs_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *, integer *); + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* Modified to call DLACN2 in place of DLACON, 5 Feb 03, SJH. */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DSPRFS improves the computed solution to a system of linear */ +/* equations when the coefficient matrix is symmetric indefinite */ +/* and packed, and provides error bounds and backward error estimates */ +/* for the solution. */ + +/* Arguments */ +/* ========= */ + +/* UPLO (input) CHARACTER*1 */ +/* = 'U': Upper triangle of A is stored; */ +/* = 'L': Lower triangle of A is stored. */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* NRHS (input) INTEGER */ +/* The number of right hand sides, i.e., the number of columns */ +/* of the matrices B and X. NRHS >= 0. */ + +/* AP (input) DOUBLE PRECISION array, dimension (N*(N+1)/2) */ +/* The upper or lower triangle of the symmetric matrix A, packed */ +/* columnwise in a linear array. The j-th column of A is stored */ +/* in the array AP as follows: */ +/* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; */ +/* if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. */ + +/* AFP (input) DOUBLE PRECISION array, dimension (N*(N+1)/2) */ +/* The factored form of the matrix A. AFP contains the block */ +/* diagonal matrix D and the multipliers used to obtain the */ +/* factor U or L from the factorization A = U*D*U**T or */ +/* A = L*D*L**T as computed by DSPTRF, stored as a packed */ +/* triangular matrix. */ + +/* IPIV (input) INTEGER array, dimension (N) */ +/* Details of the interchanges and the block structure of D */ +/* as determined by DSPTRF. */ + +/* B (input) DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* The right hand side matrix B. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the array B. LDB >= max(1,N). */ + +/* X (input/output) DOUBLE PRECISION array, dimension (LDX,NRHS) */ +/* On entry, the solution matrix X, as computed by DSPTRS. */ +/* On exit, the improved solution matrix X. */ + +/* LDX (input) INTEGER */ +/* The leading dimension of the array X. LDX >= max(1,N). */ + +/* FERR (output) DOUBLE PRECISION array, dimension (NRHS) */ +/* The estimated forward error bound for each solution vector */ +/* X(j) (the j-th column of the solution matrix X). */ +/* If XTRUE is the true solution corresponding to X(j), FERR(j) */ +/* is an estimated upper bound for the magnitude of the largest */ +/* element in (X(j) - XTRUE) divided by the magnitude of the */ +/* largest element in X(j). The estimate is as reliable as */ +/* the estimate for RCOND, and is almost always a slight */ +/* overestimate of the true error. */ + +/* BERR (output) DOUBLE PRECISION array, dimension (NRHS) */ +/* The componentwise relative backward error of each solution */ +/* vector X(j) (i.e., the smallest relative change in */ +/* any element of A or B that makes X(j) an exact solution). */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (3*N) */ + +/* IWORK (workspace) INTEGER array, dimension (N) */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ + +/* Internal Parameters */ +/* =================== */ + +/* ITMAX is the maximum number of steps of iterative refinement. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --ap; + --afp; + --ipiv; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + x_dim1 = *ldx; + x_offset = 1 + x_dim1; + x -= x_offset; + --ferr; + --berr; + --work; + --iwork; + + /* Function Body */ + *info = 0; + upper = _starpu_lsame_(uplo, "U"); + if (! upper && ! _starpu_lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*ldb < max(1,*n)) { + *info = -8; + } else if (*ldx < max(1,*n)) { + *info = -10; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DSPRFS", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0 || *nrhs == 0) { + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + ferr[j] = 0.; + berr[j] = 0.; +/* L10: */ + } + return 0; + } + +/* NZ = maximum number of nonzero elements in each row of A, plus 1 */ + + nz = *n + 1; + eps = _starpu_dlamch_("Epsilon"); + safmin = _starpu_dlamch_("Safe minimum"); + safe1 = nz * safmin; + safe2 = safe1 / eps; + +/* Do for each right hand side */ + + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + + count = 1; + lstres = 3.; +L20: + +/* Loop until stopping criterion is satisfied. */ + +/* Compute residual R = B - A * X */ + + _starpu_dcopy_(n, &b[j * b_dim1 + 1], &c__1, &work[*n + 1], &c__1); + _starpu_dspmv_(uplo, n, &c_b12, &ap[1], &x[j * x_dim1 + 1], &c__1, &c_b14, & + work[*n + 1], &c__1); + +/* Compute componentwise relative backward error from formula */ + +/* max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) ) */ + +/* where abs(Z) is the componentwise absolute value of the matrix */ +/* or vector Z. If the i-th component of the denominator is less */ +/* than SAFE2, then SAFE1 is added to the i-th components of the */ +/* numerator and denominator before dividing. */ + + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + work[i__] = (d__1 = b[i__ + j * b_dim1], abs(d__1)); +/* L30: */ + } + +/* Compute abs(A)*abs(X) + abs(B). */ + + kk = 1; + if (upper) { + i__2 = *n; + for (k = 1; k <= i__2; ++k) { + s = 0.; + xk = (d__1 = x[k + j * x_dim1], abs(d__1)); + ik = kk; + i__3 = k - 1; + for (i__ = 1; i__ <= i__3; ++i__) { + work[i__] += (d__1 = ap[ik], abs(d__1)) * xk; + s += (d__1 = ap[ik], abs(d__1)) * (d__2 = x[i__ + j * + x_dim1], abs(d__2)); + ++ik; +/* L40: */ + } + work[k] = work[k] + (d__1 = ap[kk + k - 1], abs(d__1)) * xk + + s; + kk += k; +/* L50: */ + } + } else { + i__2 = *n; + for (k = 1; k <= i__2; ++k) { + s = 0.; + xk = (d__1 = x[k + j * x_dim1], abs(d__1)); + work[k] += (d__1 = ap[kk], abs(d__1)) * xk; + ik = kk + 1; + i__3 = *n; + for (i__ = k + 1; i__ <= i__3; ++i__) { + work[i__] += (d__1 = ap[ik], abs(d__1)) * xk; + s += (d__1 = ap[ik], abs(d__1)) * (d__2 = x[i__ + j * + x_dim1], abs(d__2)); + ++ik; +/* L60: */ + } + work[k] += s; + kk += *n - k + 1; +/* L70: */ + } + } + s = 0.; + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + if (work[i__] > safe2) { +/* Computing MAX */ + d__2 = s, d__3 = (d__1 = work[*n + i__], abs(d__1)) / work[ + i__]; + s = max(d__2,d__3); + } else { +/* Computing MAX */ + d__2 = s, d__3 = ((d__1 = work[*n + i__], abs(d__1)) + safe1) + / (work[i__] + safe1); + s = max(d__2,d__3); + } +/* L80: */ + } + berr[j] = s; + +/* Test stopping criterion. Continue iterating if */ +/* 1) The residual BERR(J) is larger than machine epsilon, and */ +/* 2) BERR(J) decreased by at least a factor of 2 during the */ +/* last iteration, and */ +/* 3) At most ITMAX iterations tried. */ + + if (berr[j] > eps && berr[j] * 2. <= lstres && count <= 5) { + +/* Update solution and try again. */ + + _starpu_dsptrs_(uplo, n, &c__1, &afp[1], &ipiv[1], &work[*n + 1], n, info); + _starpu_daxpy_(n, &c_b14, &work[*n + 1], &c__1, &x[j * x_dim1 + 1], &c__1) + ; + lstres = berr[j]; + ++count; + goto L20; + } + +/* Bound error from formula */ + +/* norm(X - XTRUE) / norm(X) .le. FERR = */ +/* norm( abs(inv(A))* */ +/* ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X) */ + +/* where */ +/* norm(Z) is the magnitude of the largest component of Z */ +/* inv(A) is the inverse of A */ +/* abs(Z) is the componentwise absolute value of the matrix or */ +/* vector Z */ +/* NZ is the maximum number of nonzeros in any row of A, plus 1 */ +/* EPS is machine epsilon */ + +/* The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B)) */ +/* is incremented by SAFE1 if the i-th component of */ +/* abs(A)*abs(X) + abs(B) is less than SAFE2. */ + +/* Use DLACN2 to estimate the infinity-norm of the matrix */ +/* inv(A) * diag(W), */ +/* where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) */ + + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + if (work[i__] > safe2) { + work[i__] = (d__1 = work[*n + i__], abs(d__1)) + nz * eps * + work[i__]; + } else { + work[i__] = (d__1 = work[*n + i__], abs(d__1)) + nz * eps * + work[i__] + safe1; + } +/* L90: */ + } + + kase = 0; +L100: + _starpu_dlacn2_(n, &work[(*n << 1) + 1], &work[*n + 1], &iwork[1], &ferr[j], & + kase, isave); + if (kase != 0) { + if (kase == 1) { + +/* Multiply by diag(W)*inv(A'). */ + + _starpu_dsptrs_(uplo, n, &c__1, &afp[1], &ipiv[1], &work[*n + 1], n, + info); + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + work[*n + i__] = work[i__] * work[*n + i__]; +/* L110: */ + } + } else if (kase == 2) { + +/* Multiply by inv(A)*diag(W). */ + + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + work[*n + i__] = work[i__] * work[*n + i__]; +/* L120: */ + } + _starpu_dsptrs_(uplo, n, &c__1, &afp[1], &ipiv[1], &work[*n + 1], n, + info); + } + goto L100; + } + +/* Normalize error. */ + + lstres = 0.; + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { +/* Computing MAX */ + d__2 = lstres, d__3 = (d__1 = x[i__ + j * x_dim1], abs(d__1)); + lstres = max(d__2,d__3); +/* L130: */ + } + if (lstres != 0.) { + ferr[j] /= lstres; + } + +/* L140: */ + } + + return 0; + +/* End of DSPRFS */ + +} /* _starpu_dsprfs_ */ diff --git a/min-dgels/base/SRC/dspsv.c b/min-dgels/base/SRC/dspsv.c new file mode 100644 index 0000000..9563d08 --- /dev/null +++ b/min-dgels/base/SRC/dspsv.c @@ -0,0 +1,176 @@ +/* dspsv.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dspsv_(char *uplo, integer *n, integer *nrhs, doublereal + *ap, integer *ipiv, doublereal *b, integer *ldb, integer *info) +{ + /* System generated locals */ + integer b_dim1, b_offset, i__1; + + /* Local variables */ + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *), _starpu_dsptrf_( + char *, integer *, doublereal *, integer *, integer *), + _starpu_dsptrs_(char *, integer *, integer *, doublereal *, integer *, + doublereal *, integer *, integer *); + + +/* -- LAPACK driver routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DSPSV computes the solution to a real system of linear equations */ +/* A * X = B, */ +/* where A is an N-by-N symmetric matrix stored in packed format and X */ +/* and B are N-by-NRHS matrices. */ + +/* The diagonal pivoting method is used to factor A as */ +/* A = U * D * U**T, if UPLO = 'U', or */ +/* A = L * D * L**T, if UPLO = 'L', */ +/* where U (or L) is a product of permutation and unit upper (lower) */ +/* triangular matrices, D is symmetric and block diagonal with 1-by-1 */ +/* and 2-by-2 diagonal blocks. The factored form of A is then used to */ +/* solve the system of equations A * X = B. */ + +/* Arguments */ +/* ========= */ + +/* UPLO (input) CHARACTER*1 */ +/* = 'U': Upper triangle of A is stored; */ +/* = 'L': Lower triangle of A is stored. */ + +/* N (input) INTEGER */ +/* The number of linear equations, i.e., the order of the */ +/* matrix A. N >= 0. */ + +/* NRHS (input) INTEGER */ +/* The number of right hand sides, i.e., the number of columns */ +/* of the matrix B. NRHS >= 0. */ + +/* AP (input/output) DOUBLE PRECISION array, dimension (N*(N+1)/2) */ +/* On entry, the upper or lower triangle of the symmetric matrix */ +/* A, packed columnwise in a linear array. The j-th column of A */ +/* is stored in the array AP as follows: */ +/* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; */ +/* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. */ +/* See below for further details. */ + +/* On exit, the block diagonal matrix D and the multipliers used */ +/* to obtain the factor U or L from the factorization */ +/* A = U*D*U**T or A = L*D*L**T as computed by DSPTRF, stored as */ +/* a packed triangular matrix in the same storage format as A. */ + +/* IPIV (output) INTEGER array, dimension (N) */ +/* Details of the interchanges and the block structure of D, as */ +/* determined by DSPTRF. If IPIV(k) > 0, then rows and columns */ +/* k and IPIV(k) were interchanged, and D(k,k) is a 1-by-1 */ +/* diagonal block. If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, */ +/* then rows and columns k-1 and -IPIV(k) were interchanged and */ +/* D(k-1:k,k-1:k) is a 2-by-2 diagonal block. If UPLO = 'L' and */ +/* IPIV(k) = IPIV(k+1) < 0, then rows and columns k+1 and */ +/* -IPIV(k) were interchanged and D(k:k+1,k:k+1) is a 2-by-2 */ +/* diagonal block. */ + +/* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* On entry, the N-by-NRHS right hand side matrix B. */ +/* On exit, if INFO = 0, the N-by-NRHS solution matrix X. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the array B. LDB >= max(1,N). */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > 0: if INFO = i, D(i,i) is exactly zero. The factorization */ +/* has been completed, but the block diagonal matrix D is */ +/* exactly singular, so the solution could not be */ +/* computed. */ + +/* Further Details */ +/* =============== */ + +/* The packed storage scheme is illustrated by the following example */ +/* when N = 4, UPLO = 'U': */ + +/* Two-dimensional storage of the symmetric matrix A: */ + +/* a11 a12 a13 a14 */ +/* a22 a23 a24 */ +/* a33 a34 (aij = aji) */ +/* a44 */ + +/* Packed storage of the upper triangle of A: */ + +/* AP = [ a11, a12, a22, a13, a23, a33, a14, a24, a34, a44 ] */ + +/* ===================================================================== */ + +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --ap; + --ipiv; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + *info = 0; + if (! _starpu_lsame_(uplo, "U") && ! _starpu_lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*ldb < max(1,*n)) { + *info = -7; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DSPSV ", &i__1); + return 0; + } + +/* Compute the factorization A = U*D*U' or A = L*D*L'. */ + + _starpu_dsptrf_(uplo, n, &ap[1], &ipiv[1], info); + if (*info == 0) { + +/* Solve the system A*X = B, overwriting B with X. */ + + _starpu_dsptrs_(uplo, n, nrhs, &ap[1], &ipiv[1], &b[b_offset], ldb, info); + + } + return 0; + +/* End of DSPSV */ + +} /* _starpu_dspsv_ */ diff --git a/min-dgels/base/SRC/dspsvx.c b/min-dgels/base/SRC/dspsvx.c new file mode 100644 index 0000000..2c86545 --- /dev/null +++ b/min-dgels/base/SRC/dspsvx.c @@ -0,0 +1,329 @@ +/* dspsvx.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +/* Subroutine */ int _starpu_dspsvx_(char *fact, char *uplo, integer *n, integer * + nrhs, doublereal *ap, doublereal *afp, integer *ipiv, doublereal *b, + integer *ldb, doublereal *x, integer *ldx, doublereal *rcond, + doublereal *ferr, doublereal *berr, doublereal *work, integer *iwork, + integer *info) +{ + /* System generated locals */ + integer b_dim1, b_offset, x_dim1, x_offset, i__1; + + /* Local variables */ + extern logical _starpu_lsame_(char *, char *); + doublereal anorm; + extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *); + extern doublereal _starpu_dlamch_(char *); + logical nofact; + extern /* Subroutine */ int _starpu_dlacpy_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *), + _starpu_xerbla_(char *, integer *); + extern doublereal _starpu_dlansp_(char *, char *, integer *, doublereal *, + doublereal *); + extern /* Subroutine */ int _starpu_dspcon_(char *, integer *, doublereal *, + integer *, doublereal *, doublereal *, doublereal *, integer *, + integer *), _starpu_dsprfs_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, doublereal *, doublereal *, + integer *, integer *), _starpu_dsptrf_(char *, integer *, + doublereal *, integer *, integer *), _starpu_dsptrs_(char *, + integer *, integer *, doublereal *, integer *, doublereal *, + integer *, integer *); + + +/* -- LAPACK driver routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DSPSVX uses the diagonal pivoting factorization A = U*D*U**T or */ +/* A = L*D*L**T to compute the solution to a real system of linear */ +/* equations A * X = B, where A is an N-by-N symmetric matrix stored */ +/* in packed format and X and B are N-by-NRHS matrices. */ + +/* Error bounds on the solution and a condition estimate are also */ +/* provided. */ + +/* Description */ +/* =========== */ + +/* The following steps are performed: */ + +/* 1. If FACT = 'N', the diagonal pivoting method is used to factor A as */ +/* A = U * D * U**T, if UPLO = 'U', or */ +/* A = L * D * L**T, if UPLO = 'L', */ +/* where U (or L) is a product of permutation and unit upper (lower) */ +/* triangular matrices and D is symmetric and block diagonal with */ +/* 1-by-1 and 2-by-2 diagonal blocks. */ + +/* 2. If some D(i,i)=0, so that D is exactly singular, then the routine */ +/* returns with INFO = i. Otherwise, the factored form of A is used */ +/* to estimate the condition number of the matrix A. If the */ +/* reciprocal of the condition number is less than machine precision, */ +/* INFO = N+1 is returned as a warning, but the routine still goes on */ +/* to solve for X and compute error bounds as described below. */ + +/* 3. The system of equations is solved for X using the factored form */ +/* of A. */ + +/* 4. Iterative refinement is applied to improve the computed solution */ +/* matrix and calculate error bounds and backward error estimates */ +/* for it. */ + +/* Arguments */ +/* ========= */ + +/* FACT (input) CHARACTER*1 */ +/* Specifies whether or not the factored form of A has been */ +/* supplied on entry. */ +/* = 'F': On entry, AFP and IPIV contain the factored form of */ +/* A. AP, AFP and IPIV will not be modified. */ +/* = 'N': The matrix A will be copied to AFP and factored. */ + +/* UPLO (input) CHARACTER*1 */ +/* = 'U': Upper triangle of A is stored; */ +/* = 'L': Lower triangle of A is stored. */ + +/* N (input) INTEGER */ +/* The number of linear equations, i.e., the order of the */ +/* matrix A. N >= 0. */ + +/* NRHS (input) INTEGER */ +/* The number of right hand sides, i.e., the number of columns */ +/* of the matrices B and X. NRHS >= 0. */ + +/* AP (input) DOUBLE PRECISION array, dimension (N*(N+1)/2) */ +/* The upper or lower triangle of the symmetric matrix A, packed */ +/* columnwise in a linear array. The j-th column of A is stored */ +/* in the array AP as follows: */ +/* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; */ +/* if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. */ +/* See below for further details. */ + +/* AFP (input or output) DOUBLE PRECISION array, dimension */ +/* (N*(N+1)/2) */ +/* If FACT = 'F', then AFP is an input argument and on entry */ +/* contains the block diagonal matrix D and the multipliers used */ +/* to obtain the factor U or L from the factorization */ +/* A = U*D*U**T or A = L*D*L**T as computed by DSPTRF, stored as */ +/* a packed triangular matrix in the same storage format as A. */ + +/* If FACT = 'N', then AFP is an output argument and on exit */ +/* contains the block diagonal matrix D and the multipliers used */ +/* to obtain the factor U or L from the factorization */ +/* A = U*D*U**T or A = L*D*L**T as computed by DSPTRF, stored as */ +/* a packed triangular matrix in the same storage format as A. */ + +/* IPIV (input or output) INTEGER array, dimension (N) */ +/* If FACT = 'F', then IPIV is an input argument and on entry */ +/* contains details of the interchanges and the block structure */ +/* of D, as determined by DSPTRF. */ +/* If IPIV(k) > 0, then rows and columns k and IPIV(k) were */ +/* interchanged and D(k,k) is a 1-by-1 diagonal block. */ +/* If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and */ +/* columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) */ +/* is a 2-by-2 diagonal block. If UPLO = 'L' and IPIV(k) = */ +/* IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were */ +/* interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. */ + +/* If FACT = 'N', then IPIV is an output argument and on exit */ +/* contains details of the interchanges and the block structure */ +/* of D, as determined by DSPTRF. */ + +/* B (input) DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* The N-by-NRHS right hand side matrix B. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the array B. LDB >= max(1,N). */ + +/* X (output) DOUBLE PRECISION array, dimension (LDX,NRHS) */ +/* If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X. */ + +/* LDX (input) INTEGER */ +/* The leading dimension of the array X. LDX >= max(1,N). */ + +/* RCOND (output) DOUBLE PRECISION */ +/* The estimate of the reciprocal condition number of the matrix */ +/* A. If RCOND is less than the machine precision (in */ +/* particular, if RCOND = 0), the matrix is singular to working */ +/* precision. This condition is indicated by a return code of */ +/* INFO > 0. */ + +/* FERR (output) DOUBLE PRECISION array, dimension (NRHS) */ +/* The estimated forward error bound for each solution vector */ +/* X(j) (the j-th column of the solution matrix X). */ +/* If XTRUE is the true solution corresponding to X(j), FERR(j) */ +/* is an estimated upper bound for the magnitude of the largest */ +/* element in (X(j) - XTRUE) divided by the magnitude of the */ +/* largest element in X(j). The estimate is as reliable as */ +/* the estimate for RCOND, and is almost always a slight */ +/* overestimate of the true error. */ + +/* BERR (output) DOUBLE PRECISION array, dimension (NRHS) */ +/* The componentwise relative backward error of each solution */ +/* vector X(j) (i.e., the smallest relative change in */ +/* any element of A or B that makes X(j) an exact solution). */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (3*N) */ + +/* IWORK (workspace) INTEGER array, dimension (N) */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > 0: if INFO = i, and i is */ +/* <= N: D(i,i) is exactly zero. The factorization */ +/* has been completed but the factor D is exactly */ +/* singular, so the solution and error bounds could */ +/* not be computed. RCOND = 0 is returned. */ +/* = N+1: D is nonsingular, but RCOND is less than machine */ +/* precision, meaning that the matrix is singular */ +/* to working precision. Nevertheless, the */ +/* solution and error bounds are computed because */ +/* there are a number of situations where the */ +/* computed solution can be more accurate than the */ +/* value of RCOND would suggest. */ + +/* Further Details */ +/* =============== */ + +/* The packed storage scheme is illustrated by the following example */ +/* when N = 4, UPLO = 'U': */ + +/* Two-dimensional storage of the symmetric matrix A: */ + +/* a11 a12 a13 a14 */ +/* a22 a23 a24 */ +/* a33 a34 (aij = aji) */ +/* a44 */ + +/* Packed storage of the upper triangle of A: */ + +/* AP = [ a11, a12, a22, a13, a23, a33, a14, a24, a34, a44 ] */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --ap; + --afp; + --ipiv; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + x_dim1 = *ldx; + x_offset = 1 + x_dim1; + x -= x_offset; + --ferr; + --berr; + --work; + --iwork; + + /* Function Body */ + *info = 0; + nofact = _starpu_lsame_(fact, "N"); + if (! nofact && ! _starpu_lsame_(fact, "F")) { + *info = -1; + } else if (! _starpu_lsame_(uplo, "U") && ! _starpu_lsame_(uplo, + "L")) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*nrhs < 0) { + *info = -4; + } else if (*ldb < max(1,*n)) { + *info = -9; + } else if (*ldx < max(1,*n)) { + *info = -11; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DSPSVX", &i__1); + return 0; + } + + if (nofact) { + +/* Compute the factorization A = U*D*U' or A = L*D*L'. */ + + i__1 = *n * (*n + 1) / 2; + _starpu_dcopy_(&i__1, &ap[1], &c__1, &afp[1], &c__1); + _starpu_dsptrf_(uplo, n, &afp[1], &ipiv[1], info); + +/* Return if INFO is non-zero. */ + + if (*info > 0) { + *rcond = 0.; + return 0; + } + } + +/* Compute the norm of the matrix A. */ + + anorm = _starpu_dlansp_("I", uplo, n, &ap[1], &work[1]); + +/* Compute the reciprocal of the condition number of A. */ + + _starpu_dspcon_(uplo, n, &afp[1], &ipiv[1], &anorm, rcond, &work[1], &iwork[1], + info); + +/* Compute the solution vectors X. */ + + _starpu_dlacpy_("Full", n, nrhs, &b[b_offset], ldb, &x[x_offset], ldx); + _starpu_dsptrs_(uplo, n, nrhs, &afp[1], &ipiv[1], &x[x_offset], ldx, info); + +/* Use iterative refinement to improve the computed solutions and */ +/* compute error bounds and backward error estimates for them. */ + + _starpu_dsprfs_(uplo, n, nrhs, &ap[1], &afp[1], &ipiv[1], &b[b_offset], ldb, &x[ + x_offset], ldx, &ferr[1], &berr[1], &work[1], &iwork[1], info); + +/* Set INFO = N+1 if the matrix is singular to working precision. */ + + if (*rcond < _starpu_dlamch_("Epsilon")) { + *info = *n + 1; + } + + return 0; + +/* End of DSPSVX */ + +} /* _starpu_dspsvx_ */ diff --git a/min-dgels/base/SRC/dsptrd.c b/min-dgels/base/SRC/dsptrd.c new file mode 100644 index 0000000..8a09644 --- /dev/null +++ b/min-dgels/base/SRC/dsptrd.c @@ -0,0 +1,277 @@ +/* dsptrd.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static doublereal c_b8 = 0.; +static doublereal c_b14 = -1.; + +/* Subroutine */ int _starpu_dsptrd_(char *uplo, integer *n, doublereal *ap, + doublereal *d__, doublereal *e, doublereal *tau, integer *info) +{ + /* System generated locals */ + integer i__1, i__2; + + /* Local variables */ + integer i__, i1, ii, i1i1; + extern doublereal _starpu_ddot_(integer *, doublereal *, integer *, doublereal *, + integer *); + doublereal taui; + extern /* Subroutine */ int _starpu_dspr2_(char *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, doublereal *); + doublereal alpha; + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int _starpu_daxpy_(integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *), _starpu_dspmv_(char *, integer *, + doublereal *, doublereal *, doublereal *, integer *, doublereal *, + doublereal *, integer *); + logical upper; + extern /* Subroutine */ int _starpu_dlarfg_(integer *, doublereal *, doublereal *, + integer *, doublereal *), _starpu_xerbla_(char *, integer *); + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DSPTRD reduces a real symmetric matrix A stored in packed form to */ +/* symmetric tridiagonal form T by an orthogonal similarity */ +/* transformation: Q**T * A * Q = T. */ + +/* Arguments */ +/* ========= */ + +/* UPLO (input) CHARACTER*1 */ +/* = 'U': Upper triangle of A is stored; */ +/* = 'L': Lower triangle of A is stored. */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* AP (input/output) DOUBLE PRECISION array, dimension (N*(N+1)/2) */ +/* On entry, the upper or lower triangle of the symmetric matrix */ +/* A, packed columnwise in a linear array. The j-th column of A */ +/* is stored in the array AP as follows: */ +/* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; */ +/* if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. */ +/* On exit, if UPLO = 'U', the diagonal and first superdiagonal */ +/* of A are overwritten by the corresponding elements of the */ +/* tridiagonal matrix T, and the elements above the first */ +/* superdiagonal, with the array TAU, represent the orthogonal */ +/* matrix Q as a product of elementary reflectors; if UPLO */ +/* = 'L', the diagonal and first subdiagonal of A are over- */ +/* written by the corresponding elements of the tridiagonal */ +/* matrix T, and the elements below the first subdiagonal, with */ +/* the array TAU, represent the orthogonal matrix Q as a product */ +/* of elementary reflectors. See Further Details. */ + +/* D (output) DOUBLE PRECISION array, dimension (N) */ +/* The diagonal elements of the tridiagonal matrix T: */ +/* D(i) = A(i,i). */ + +/* E (output) DOUBLE PRECISION array, dimension (N-1) */ +/* The off-diagonal elements of the tridiagonal matrix T: */ +/* E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. */ + +/* TAU (output) DOUBLE PRECISION array, dimension (N-1) */ +/* The scalar factors of the elementary reflectors (see Further */ +/* Details). */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ + +/* Further Details */ +/* =============== */ + +/* If UPLO = 'U', the matrix Q is represented as a product of elementary */ +/* reflectors */ + +/* Q = H(n-1) . . . H(2) H(1). */ + +/* Each H(i) has the form */ + +/* H(i) = I - tau * v * v' */ + +/* where tau is a real scalar, and v is a real vector with */ +/* v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in AP, */ +/* overwriting A(1:i-1,i+1), and tau is stored in TAU(i). */ + +/* If UPLO = 'L', the matrix Q is represented as a product of elementary */ +/* reflectors */ + +/* Q = H(1) H(2) . . . H(n-1). */ + +/* Each H(i) has the form */ + +/* H(i) = I - tau * v * v' */ + +/* where tau is a real scalar, and v is a real vector with */ +/* v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in AP, */ +/* overwriting A(i+2:n,i), and tau is stored in TAU(i). */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters */ + + /* Parameter adjustments */ + --tau; + --e; + --d__; + --ap; + + /* Function Body */ + *info = 0; + upper = _starpu_lsame_(uplo, "U"); + if (! upper && ! _starpu_lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DSPTRD", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n <= 0) { + return 0; + } + + if (upper) { + +/* Reduce the upper triangle of A. */ +/* I1 is the index in AP of A(1,I+1). */ + + i1 = *n * (*n - 1) / 2 + 1; + for (i__ = *n - 1; i__ >= 1; --i__) { + +/* Generate elementary reflector H(i) = I - tau * v * v' */ +/* to annihilate A(1:i-1,i+1) */ + + _starpu_dlarfg_(&i__, &ap[i1 + i__ - 1], &ap[i1], &c__1, &taui); + e[i__] = ap[i1 + i__ - 1]; + + if (taui != 0.) { + +/* Apply H(i) from both sides to A(1:i,1:i) */ + + ap[i1 + i__ - 1] = 1.; + +/* Compute y := tau * A * v storing y in TAU(1:i) */ + + _starpu_dspmv_(uplo, &i__, &taui, &ap[1], &ap[i1], &c__1, &c_b8, &tau[ + 1], &c__1); + +/* Compute w := y - 1/2 * tau * (y'*v) * v */ + + alpha = taui * -.5 * _starpu_ddot_(&i__, &tau[1], &c__1, &ap[i1], & + c__1); + _starpu_daxpy_(&i__, &alpha, &ap[i1], &c__1, &tau[1], &c__1); + +/* Apply the transformation as a rank-2 update: */ +/* A := A - v * w' - w * v' */ + + _starpu_dspr2_(uplo, &i__, &c_b14, &ap[i1], &c__1, &tau[1], &c__1, & + ap[1]); + + ap[i1 + i__ - 1] = e[i__]; + } + d__[i__ + 1] = ap[i1 + i__]; + tau[i__] = taui; + i1 -= i__; +/* L10: */ + } + d__[1] = ap[1]; + } else { + +/* Reduce the lower triangle of A. II is the index in AP of */ +/* A(i,i) and I1I1 is the index of A(i+1,i+1). */ + + ii = 1; + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + i1i1 = ii + *n - i__ + 1; + +/* Generate elementary reflector H(i) = I - tau * v * v' */ +/* to annihilate A(i+2:n,i) */ + + i__2 = *n - i__; + _starpu_dlarfg_(&i__2, &ap[ii + 1], &ap[ii + 2], &c__1, &taui); + e[i__] = ap[ii + 1]; + + if (taui != 0.) { + +/* Apply H(i) from both sides to A(i+1:n,i+1:n) */ + + ap[ii + 1] = 1.; + +/* Compute y := tau * A * v storing y in TAU(i:n-1) */ + + i__2 = *n - i__; + _starpu_dspmv_(uplo, &i__2, &taui, &ap[i1i1], &ap[ii + 1], &c__1, & + c_b8, &tau[i__], &c__1); + +/* Compute w := y - 1/2 * tau * (y'*v) * v */ + + i__2 = *n - i__; + alpha = taui * -.5 * _starpu_ddot_(&i__2, &tau[i__], &c__1, &ap[ii + + 1], &c__1); + i__2 = *n - i__; + _starpu_daxpy_(&i__2, &alpha, &ap[ii + 1], &c__1, &tau[i__], &c__1); + +/* Apply the transformation as a rank-2 update: */ +/* A := A - v * w' - w * v' */ + + i__2 = *n - i__; + _starpu_dspr2_(uplo, &i__2, &c_b14, &ap[ii + 1], &c__1, &tau[i__], & + c__1, &ap[i1i1]); + + ap[ii + 1] = e[i__]; + } + d__[i__] = ap[ii]; + tau[i__] = taui; + ii = i1i1; +/* L20: */ + } + d__[*n] = ap[ii]; + } + + return 0; + +/* End of DSPTRD */ + +} /* _starpu_dsptrd_ */ diff --git a/min-dgels/base/SRC/dsptrf.c b/min-dgels/base/SRC/dsptrf.c new file mode 100644 index 0000000..e02969b --- /dev/null +++ b/min-dgels/base/SRC/dsptrf.c @@ -0,0 +1,628 @@ +/* dsptrf.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +/* Subroutine */ int _starpu_dsptrf_(char *uplo, integer *n, doublereal *ap, integer * + ipiv, integer *info) +{ + /* System generated locals */ + integer i__1, i__2; + doublereal d__1, d__2, d__3; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + integer i__, j, k; + doublereal t, r1, d11, d12, d21, d22; + integer kc, kk, kp; + doublereal wk; + integer kx, knc, kpc, npp; + doublereal wkm1, wkp1; + integer imax, jmax; + extern /* Subroutine */ int _starpu_dspr_(char *, integer *, doublereal *, + doublereal *, integer *, doublereal *); + doublereal alpha; + extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, + integer *); + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int _starpu_dswap_(integer *, doublereal *, integer *, + doublereal *, integer *); + integer kstep; + logical upper; + doublereal absakk; + extern integer _starpu_idamax_(integer *, doublereal *, integer *); + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + doublereal colmax, rowmax; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DSPTRF computes the factorization of a real symmetric matrix A stored */ +/* in packed format using the Bunch-Kaufman diagonal pivoting method: */ + +/* A = U*D*U**T or A = L*D*L**T */ + +/* where U (or L) is a product of permutation and unit upper (lower) */ +/* triangular matrices, and D is symmetric and block diagonal with */ +/* 1-by-1 and 2-by-2 diagonal blocks. */ + +/* Arguments */ +/* ========= */ + +/* UPLO (input) CHARACTER*1 */ +/* = 'U': Upper triangle of A is stored; */ +/* = 'L': Lower triangle of A is stored. */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* AP (input/output) DOUBLE PRECISION array, dimension (N*(N+1)/2) */ +/* On entry, the upper or lower triangle of the symmetric matrix */ +/* A, packed columnwise in a linear array. The j-th column of A */ +/* is stored in the array AP as follows: */ +/* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; */ +/* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. */ + +/* On exit, the block diagonal matrix D and the multipliers used */ +/* to obtain the factor U or L, stored as a packed triangular */ +/* matrix overwriting A (see below for further details). */ + +/* IPIV (output) INTEGER array, dimension (N) */ +/* Details of the interchanges and the block structure of D. */ +/* If IPIV(k) > 0, then rows and columns k and IPIV(k) were */ +/* interchanged and D(k,k) is a 1-by-1 diagonal block. */ +/* If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and */ +/* columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) */ +/* is a 2-by-2 diagonal block. If UPLO = 'L' and IPIV(k) = */ +/* IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were */ +/* interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > 0: if INFO = i, D(i,i) is exactly zero. The factorization */ +/* has been completed, but the block diagonal matrix D is */ +/* exactly singular, and division by zero will occur if it */ +/* is used to solve a system of equations. */ + +/* Further Details */ +/* =============== */ + +/* 5-96 - Based on modifications by J. Lewis, Boeing Computer Services */ +/* Company */ + +/* If UPLO = 'U', then A = U*D*U', where */ +/* U = P(n)*U(n)* ... *P(k)U(k)* ..., */ +/* i.e., U is a product of terms P(k)*U(k), where k decreases from n to */ +/* 1 in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 */ +/* and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as */ +/* defined by IPIV(k), and U(k) is a unit upper triangular matrix, such */ +/* that if the diagonal block D(k) is of order s (s = 1 or 2), then */ + +/* ( I v 0 ) k-s */ +/* U(k) = ( 0 I 0 ) s */ +/* ( 0 0 I ) n-k */ +/* k-s s n-k */ + +/* If s = 1, D(k) overwrites A(k,k), and v overwrites A(1:k-1,k). */ +/* If s = 2, the upper triangle of D(k) overwrites A(k-1,k-1), A(k-1,k), */ +/* and A(k,k), and v overwrites A(1:k-2,k-1:k). */ + +/* If UPLO = 'L', then A = L*D*L', where */ +/* L = P(1)*L(1)* ... *P(k)*L(k)* ..., */ +/* i.e., L is a product of terms P(k)*L(k), where k increases from 1 to */ +/* n in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 */ +/* and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as */ +/* defined by IPIV(k), and L(k) is a unit lower triangular matrix, such */ +/* that if the diagonal block D(k) is of order s (s = 1 or 2), then */ + +/* ( I 0 0 ) k-1 */ +/* L(k) = ( 0 I 0 ) s */ +/* ( 0 v I ) n-k-s+1 */ +/* k-1 s n-k-s+1 */ + +/* If s = 1, D(k) overwrites A(k,k), and v overwrites A(k+1:n,k). */ +/* If s = 2, the lower triangle of D(k) overwrites A(k,k), A(k+1,k), */ +/* and A(k+1,k+1), and v overwrites A(k+2:n,k:k+1). */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --ipiv; + --ap; + + /* Function Body */ + *info = 0; + upper = _starpu_lsame_(uplo, "U"); + if (! upper && ! _starpu_lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DSPTRF", &i__1); + return 0; + } + +/* Initialize ALPHA for use in choosing pivot block size. */ + + alpha = (sqrt(17.) + 1.) / 8.; + + if (upper) { + +/* Factorize A as U*D*U' using the upper triangle of A */ + +/* K is the main loop index, decreasing from N to 1 in steps of */ +/* 1 or 2 */ + + k = *n; + kc = (*n - 1) * *n / 2 + 1; +L10: + knc = kc; + +/* If K < 1, exit from loop */ + + if (k < 1) { + goto L110; + } + kstep = 1; + +/* Determine rows and columns to be interchanged and whether */ +/* a 1-by-1 or 2-by-2 pivot block will be used */ + + absakk = (d__1 = ap[kc + k - 1], abs(d__1)); + +/* IMAX is the row-index of the largest off-diagonal element in */ +/* column K, and COLMAX is its absolute value */ + + if (k > 1) { + i__1 = k - 1; + imax = _starpu_idamax_(&i__1, &ap[kc], &c__1); + colmax = (d__1 = ap[kc + imax - 1], abs(d__1)); + } else { + colmax = 0.; + } + + if (max(absakk,colmax) == 0.) { + +/* Column K is zero: set INFO and continue */ + + if (*info == 0) { + *info = k; + } + kp = k; + } else { + if (absakk >= alpha * colmax) { + +/* no interchange, use 1-by-1 pivot block */ + + kp = k; + } else { + +/* JMAX is the column-index of the largest off-diagonal */ +/* element in row IMAX, and ROWMAX is its absolute value */ + + rowmax = 0.; + jmax = imax; + kx = imax * (imax + 1) / 2 + imax; + i__1 = k; + for (j = imax + 1; j <= i__1; ++j) { + if ((d__1 = ap[kx], abs(d__1)) > rowmax) { + rowmax = (d__1 = ap[kx], abs(d__1)); + jmax = j; + } + kx += j; +/* L20: */ + } + kpc = (imax - 1) * imax / 2 + 1; + if (imax > 1) { + i__1 = imax - 1; + jmax = _starpu_idamax_(&i__1, &ap[kpc], &c__1); +/* Computing MAX */ + d__2 = rowmax, d__3 = (d__1 = ap[kpc + jmax - 1], abs( + d__1)); + rowmax = max(d__2,d__3); + } + + if (absakk >= alpha * colmax * (colmax / rowmax)) { + +/* no interchange, use 1-by-1 pivot block */ + + kp = k; + } else if ((d__1 = ap[kpc + imax - 1], abs(d__1)) >= alpha * + rowmax) { + +/* interchange rows and columns K and IMAX, use 1-by-1 */ +/* pivot block */ + + kp = imax; + } else { + +/* interchange rows and columns K-1 and IMAX, use 2-by-2 */ +/* pivot block */ + + kp = imax; + kstep = 2; + } + } + + kk = k - kstep + 1; + if (kstep == 2) { + knc = knc - k + 1; + } + if (kp != kk) { + +/* Interchange rows and columns KK and KP in the leading */ +/* submatrix A(1:k,1:k) */ + + i__1 = kp - 1; + _starpu_dswap_(&i__1, &ap[knc], &c__1, &ap[kpc], &c__1); + kx = kpc + kp - 1; + i__1 = kk - 1; + for (j = kp + 1; j <= i__1; ++j) { + kx = kx + j - 1; + t = ap[knc + j - 1]; + ap[knc + j - 1] = ap[kx]; + ap[kx] = t; +/* L30: */ + } + t = ap[knc + kk - 1]; + ap[knc + kk - 1] = ap[kpc + kp - 1]; + ap[kpc + kp - 1] = t; + if (kstep == 2) { + t = ap[kc + k - 2]; + ap[kc + k - 2] = ap[kc + kp - 1]; + ap[kc + kp - 1] = t; + } + } + +/* Update the leading submatrix */ + + if (kstep == 1) { + +/* 1-by-1 pivot block D(k): column k now holds */ + +/* W(k) = U(k)*D(k) */ + +/* where U(k) is the k-th column of U */ + +/* Perform a rank-1 update of A(1:k-1,1:k-1) as */ + +/* A := A - U(k)*D(k)*U(k)' = A - W(k)*1/D(k)*W(k)' */ + + r1 = 1. / ap[kc + k - 1]; + i__1 = k - 1; + d__1 = -r1; + _starpu_dspr_(uplo, &i__1, &d__1, &ap[kc], &c__1, &ap[1]); + +/* Store U(k) in column k */ + + i__1 = k - 1; + _starpu_dscal_(&i__1, &r1, &ap[kc], &c__1); + } else { + +/* 2-by-2 pivot block D(k): columns k and k-1 now hold */ + +/* ( W(k-1) W(k) ) = ( U(k-1) U(k) )*D(k) */ + +/* where U(k) and U(k-1) are the k-th and (k-1)-th columns */ +/* of U */ + +/* Perform a rank-2 update of A(1:k-2,1:k-2) as */ + +/* A := A - ( U(k-1) U(k) )*D(k)*( U(k-1) U(k) )' */ +/* = A - ( W(k-1) W(k) )*inv(D(k))*( W(k-1) W(k) )' */ + + if (k > 2) { + + d12 = ap[k - 1 + (k - 1) * k / 2]; + d22 = ap[k - 1 + (k - 2) * (k - 1) / 2] / d12; + d11 = ap[k + (k - 1) * k / 2] / d12; + t = 1. / (d11 * d22 - 1.); + d12 = t / d12; + + for (j = k - 2; j >= 1; --j) { + wkm1 = d12 * (d11 * ap[j + (k - 2) * (k - 1) / 2] - + ap[j + (k - 1) * k / 2]); + wk = d12 * (d22 * ap[j + (k - 1) * k / 2] - ap[j + (k + - 2) * (k - 1) / 2]); + for (i__ = j; i__ >= 1; --i__) { + ap[i__ + (j - 1) * j / 2] = ap[i__ + (j - 1) * j / + 2] - ap[i__ + (k - 1) * k / 2] * wk - ap[ + i__ + (k - 2) * (k - 1) / 2] * wkm1; +/* L40: */ + } + ap[j + (k - 1) * k / 2] = wk; + ap[j + (k - 2) * (k - 1) / 2] = wkm1; +/* L50: */ + } + + } + + } + } + +/* Store details of the interchanges in IPIV */ + + if (kstep == 1) { + ipiv[k] = kp; + } else { + ipiv[k] = -kp; + ipiv[k - 1] = -kp; + } + +/* Decrease K and return to the start of the main loop */ + + k -= kstep; + kc = knc - k; + goto L10; + + } else { + +/* Factorize A as L*D*L' using the lower triangle of A */ + +/* K is the main loop index, increasing from 1 to N in steps of */ +/* 1 or 2 */ + + k = 1; + kc = 1; + npp = *n * (*n + 1) / 2; +L60: + knc = kc; + +/* If K > N, exit from loop */ + + if (k > *n) { + goto L110; + } + kstep = 1; + +/* Determine rows and columns to be interchanged and whether */ +/* a 1-by-1 or 2-by-2 pivot block will be used */ + + absakk = (d__1 = ap[kc], abs(d__1)); + +/* IMAX is the row-index of the largest off-diagonal element in */ +/* column K, and COLMAX is its absolute value */ + + if (k < *n) { + i__1 = *n - k; + imax = k + _starpu_idamax_(&i__1, &ap[kc + 1], &c__1); + colmax = (d__1 = ap[kc + imax - k], abs(d__1)); + } else { + colmax = 0.; + } + + if (max(absakk,colmax) == 0.) { + +/* Column K is zero: set INFO and continue */ + + if (*info == 0) { + *info = k; + } + kp = k; + } else { + if (absakk >= alpha * colmax) { + +/* no interchange, use 1-by-1 pivot block */ + + kp = k; + } else { + +/* JMAX is the column-index of the largest off-diagonal */ +/* element in row IMAX, and ROWMAX is its absolute value */ + + rowmax = 0.; + kx = kc + imax - k; + i__1 = imax - 1; + for (j = k; j <= i__1; ++j) { + if ((d__1 = ap[kx], abs(d__1)) > rowmax) { + rowmax = (d__1 = ap[kx], abs(d__1)); + jmax = j; + } + kx = kx + *n - j; +/* L70: */ + } + kpc = npp - (*n - imax + 1) * (*n - imax + 2) / 2 + 1; + if (imax < *n) { + i__1 = *n - imax; + jmax = imax + _starpu_idamax_(&i__1, &ap[kpc + 1], &c__1); +/* Computing MAX */ + d__2 = rowmax, d__3 = (d__1 = ap[kpc + jmax - imax], abs( + d__1)); + rowmax = max(d__2,d__3); + } + + if (absakk >= alpha * colmax * (colmax / rowmax)) { + +/* no interchange, use 1-by-1 pivot block */ + + kp = k; + } else if ((d__1 = ap[kpc], abs(d__1)) >= alpha * rowmax) { + +/* interchange rows and columns K and IMAX, use 1-by-1 */ +/* pivot block */ + + kp = imax; + } else { + +/* interchange rows and columns K+1 and IMAX, use 2-by-2 */ +/* pivot block */ + + kp = imax; + kstep = 2; + } + } + + kk = k + kstep - 1; + if (kstep == 2) { + knc = knc + *n - k + 1; + } + if (kp != kk) { + +/* Interchange rows and columns KK and KP in the trailing */ +/* submatrix A(k:n,k:n) */ + + if (kp < *n) { + i__1 = *n - kp; + _starpu_dswap_(&i__1, &ap[knc + kp - kk + 1], &c__1, &ap[kpc + 1], + &c__1); + } + kx = knc + kp - kk; + i__1 = kp - 1; + for (j = kk + 1; j <= i__1; ++j) { + kx = kx + *n - j + 1; + t = ap[knc + j - kk]; + ap[knc + j - kk] = ap[kx]; + ap[kx] = t; +/* L80: */ + } + t = ap[knc]; + ap[knc] = ap[kpc]; + ap[kpc] = t; + if (kstep == 2) { + t = ap[kc + 1]; + ap[kc + 1] = ap[kc + kp - k]; + ap[kc + kp - k] = t; + } + } + +/* Update the trailing submatrix */ + + if (kstep == 1) { + +/* 1-by-1 pivot block D(k): column k now holds */ + +/* W(k) = L(k)*D(k) */ + +/* where L(k) is the k-th column of L */ + + if (k < *n) { + +/* Perform a rank-1 update of A(k+1:n,k+1:n) as */ + +/* A := A - L(k)*D(k)*L(k)' = A - W(k)*(1/D(k))*W(k)' */ + + r1 = 1. / ap[kc]; + i__1 = *n - k; + d__1 = -r1; + _starpu_dspr_(uplo, &i__1, &d__1, &ap[kc + 1], &c__1, &ap[kc + *n + - k + 1]); + +/* Store L(k) in column K */ + + i__1 = *n - k; + _starpu_dscal_(&i__1, &r1, &ap[kc + 1], &c__1); + } + } else { + +/* 2-by-2 pivot block D(k): columns K and K+1 now hold */ + +/* ( W(k) W(k+1) ) = ( L(k) L(k+1) )*D(k) */ + +/* where L(k) and L(k+1) are the k-th and (k+1)-th columns */ +/* of L */ + + if (k < *n - 1) { + +/* Perform a rank-2 update of A(k+2:n,k+2:n) as */ + +/* A := A - ( L(k) L(k+1) )*D(k)*( L(k) L(k+1) )' */ +/* = A - ( W(k) W(k+1) )*inv(D(k))*( W(k) W(k+1) )' */ + + d21 = ap[k + 1 + (k - 1) * ((*n << 1) - k) / 2]; + d11 = ap[k + 1 + k * ((*n << 1) - k - 1) / 2] / d21; + d22 = ap[k + (k - 1) * ((*n << 1) - k) / 2] / d21; + t = 1. / (d11 * d22 - 1.); + d21 = t / d21; + + i__1 = *n; + for (j = k + 2; j <= i__1; ++j) { + wk = d21 * (d11 * ap[j + (k - 1) * ((*n << 1) - k) / + 2] - ap[j + k * ((*n << 1) - k - 1) / 2]); + wkp1 = d21 * (d22 * ap[j + k * ((*n << 1) - k - 1) / + 2] - ap[j + (k - 1) * ((*n << 1) - k) / 2]); + + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + ap[i__ + (j - 1) * ((*n << 1) - j) / 2] = ap[i__ + + (j - 1) * ((*n << 1) - j) / 2] - ap[i__ + + (k - 1) * ((*n << 1) - k) / 2] * wk - + ap[i__ + k * ((*n << 1) - k - 1) / 2] * + wkp1; +/* L90: */ + } + + ap[j + (k - 1) * ((*n << 1) - k) / 2] = wk; + ap[j + k * ((*n << 1) - k - 1) / 2] = wkp1; + +/* L100: */ + } + } + } + } + +/* Store details of the interchanges in IPIV */ + + if (kstep == 1) { + ipiv[k] = kp; + } else { + ipiv[k] = -kp; + ipiv[k + 1] = -kp; + } + +/* Increase K and return to the start of the main loop */ + + k += kstep; + kc = knc + *n - k + 2; + goto L60; + + } + +L110: + return 0; + +/* End of DSPTRF */ + +} /* _starpu_dsptrf_ */ diff --git a/min-dgels/base/SRC/dsptri.c b/min-dgels/base/SRC/dsptri.c new file mode 100644 index 0000000..4ec56b8 --- /dev/null +++ b/min-dgels/base/SRC/dsptri.c @@ -0,0 +1,411 @@ +/* dsptri.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static doublereal c_b11 = -1.; +static doublereal c_b13 = 0.; + +/* Subroutine */ int _starpu_dsptri_(char *uplo, integer *n, doublereal *ap, integer * + ipiv, doublereal *work, integer *info) +{ + /* System generated locals */ + integer i__1; + doublereal d__1; + + /* Local variables */ + doublereal d__; + integer j, k; + doublereal t, ak; + integer kc, kp, kx, kpc, npp; + doublereal akp1; + extern doublereal _starpu_ddot_(integer *, doublereal *, integer *, doublereal *, + integer *); + doublereal temp, akkp1; + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *), _starpu_dswap_(integer *, doublereal *, integer + *, doublereal *, integer *); + integer kstep; + extern /* Subroutine */ int _starpu_dspmv_(char *, integer *, doublereal *, + doublereal *, doublereal *, integer *, doublereal *, doublereal *, + integer *); + logical upper; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + integer kcnext; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DSPTRI computes the inverse of a real symmetric indefinite matrix */ +/* A in packed storage using the factorization A = U*D*U**T or */ +/* A = L*D*L**T computed by DSPTRF. */ + +/* Arguments */ +/* ========= */ + +/* UPLO (input) CHARACTER*1 */ +/* Specifies whether the details of the factorization are stored */ +/* as an upper or lower triangular matrix. */ +/* = 'U': Upper triangular, form is A = U*D*U**T; */ +/* = 'L': Lower triangular, form is A = L*D*L**T. */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* AP (input/output) DOUBLE PRECISION array, dimension (N*(N+1)/2) */ +/* On entry, the block diagonal matrix D and the multipliers */ +/* used to obtain the factor U or L as computed by DSPTRF, */ +/* stored as a packed triangular matrix. */ + +/* On exit, if INFO = 0, the (symmetric) inverse of the original */ +/* matrix, stored as a packed triangular matrix. The j-th column */ +/* of inv(A) is stored in the array AP as follows: */ +/* if UPLO = 'U', AP(i + (j-1)*j/2) = inv(A)(i,j) for 1<=i<=j; */ +/* if UPLO = 'L', */ +/* AP(i + (j-1)*(2n-j)/2) = inv(A)(i,j) for j<=i<=n. */ + +/* IPIV (input) INTEGER array, dimension (N) */ +/* Details of the interchanges and the block structure of D */ +/* as determined by DSPTRF. */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (N) */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its */ +/* inverse could not be computed. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --work; + --ipiv; + --ap; + + /* Function Body */ + *info = 0; + upper = _starpu_lsame_(uplo, "U"); + if (! upper && ! _starpu_lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DSPTRI", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Check that the diagonal matrix D is nonsingular. */ + + if (upper) { + +/* Upper triangular storage: examine D from bottom to top */ + + kp = *n * (*n + 1) / 2; + for (*info = *n; *info >= 1; --(*info)) { + if (ipiv[*info] > 0 && ap[kp] == 0.) { + return 0; + } + kp -= *info; +/* L10: */ + } + } else { + +/* Lower triangular storage: examine D from top to bottom. */ + + kp = 1; + i__1 = *n; + for (*info = 1; *info <= i__1; ++(*info)) { + if (ipiv[*info] > 0 && ap[kp] == 0.) { + return 0; + } + kp = kp + *n - *info + 1; +/* L20: */ + } + } + *info = 0; + + if (upper) { + +/* Compute inv(A) from the factorization A = U*D*U'. */ + +/* K is the main loop index, increasing from 1 to N in steps of */ +/* 1 or 2, depending on the size of the diagonal blocks. */ + + k = 1; + kc = 1; +L30: + +/* If K > N, exit from loop. */ + + if (k > *n) { + goto L50; + } + + kcnext = kc + k; + if (ipiv[k] > 0) { + +/* 1 x 1 diagonal block */ + +/* Invert the diagonal block. */ + + ap[kc + k - 1] = 1. / ap[kc + k - 1]; + +/* Compute column K of the inverse. */ + + if (k > 1) { + i__1 = k - 1; + _starpu_dcopy_(&i__1, &ap[kc], &c__1, &work[1], &c__1); + i__1 = k - 1; + _starpu_dspmv_(uplo, &i__1, &c_b11, &ap[1], &work[1], &c__1, &c_b13, & + ap[kc], &c__1); + i__1 = k - 1; + ap[kc + k - 1] -= _starpu_ddot_(&i__1, &work[1], &c__1, &ap[kc], & + c__1); + } + kstep = 1; + } else { + +/* 2 x 2 diagonal block */ + +/* Invert the diagonal block. */ + + t = (d__1 = ap[kcnext + k - 1], abs(d__1)); + ak = ap[kc + k - 1] / t; + akp1 = ap[kcnext + k] / t; + akkp1 = ap[kcnext + k - 1] / t; + d__ = t * (ak * akp1 - 1.); + ap[kc + k - 1] = akp1 / d__; + ap[kcnext + k] = ak / d__; + ap[kcnext + k - 1] = -akkp1 / d__; + +/* Compute columns K and K+1 of the inverse. */ + + if (k > 1) { + i__1 = k - 1; + _starpu_dcopy_(&i__1, &ap[kc], &c__1, &work[1], &c__1); + i__1 = k - 1; + _starpu_dspmv_(uplo, &i__1, &c_b11, &ap[1], &work[1], &c__1, &c_b13, & + ap[kc], &c__1); + i__1 = k - 1; + ap[kc + k - 1] -= _starpu_ddot_(&i__1, &work[1], &c__1, &ap[kc], & + c__1); + i__1 = k - 1; + ap[kcnext + k - 1] -= _starpu_ddot_(&i__1, &ap[kc], &c__1, &ap[kcnext] +, &c__1); + i__1 = k - 1; + _starpu_dcopy_(&i__1, &ap[kcnext], &c__1, &work[1], &c__1); + i__1 = k - 1; + _starpu_dspmv_(uplo, &i__1, &c_b11, &ap[1], &work[1], &c__1, &c_b13, & + ap[kcnext], &c__1); + i__1 = k - 1; + ap[kcnext + k] -= _starpu_ddot_(&i__1, &work[1], &c__1, &ap[kcnext], & + c__1); + } + kstep = 2; + kcnext = kcnext + k + 1; + } + + kp = (i__1 = ipiv[k], abs(i__1)); + if (kp != k) { + +/* Interchange rows and columns K and KP in the leading */ +/* submatrix A(1:k+1,1:k+1) */ + + kpc = (kp - 1) * kp / 2 + 1; + i__1 = kp - 1; + _starpu_dswap_(&i__1, &ap[kc], &c__1, &ap[kpc], &c__1); + kx = kpc + kp - 1; + i__1 = k - 1; + for (j = kp + 1; j <= i__1; ++j) { + kx = kx + j - 1; + temp = ap[kc + j - 1]; + ap[kc + j - 1] = ap[kx]; + ap[kx] = temp; +/* L40: */ + } + temp = ap[kc + k - 1]; + ap[kc + k - 1] = ap[kpc + kp - 1]; + ap[kpc + kp - 1] = temp; + if (kstep == 2) { + temp = ap[kc + k + k - 1]; + ap[kc + k + k - 1] = ap[kc + k + kp - 1]; + ap[kc + k + kp - 1] = temp; + } + } + + k += kstep; + kc = kcnext; + goto L30; +L50: + + ; + } else { + +/* Compute inv(A) from the factorization A = L*D*L'. */ + +/* K is the main loop index, increasing from 1 to N in steps of */ +/* 1 or 2, depending on the size of the diagonal blocks. */ + + npp = *n * (*n + 1) / 2; + k = *n; + kc = npp; +L60: + +/* If K < 1, exit from loop. */ + + if (k < 1) { + goto L80; + } + + kcnext = kc - (*n - k + 2); + if (ipiv[k] > 0) { + +/* 1 x 1 diagonal block */ + +/* Invert the diagonal block. */ + + ap[kc] = 1. / ap[kc]; + +/* Compute column K of the inverse. */ + + if (k < *n) { + i__1 = *n - k; + _starpu_dcopy_(&i__1, &ap[kc + 1], &c__1, &work[1], &c__1); + i__1 = *n - k; + _starpu_dspmv_(uplo, &i__1, &c_b11, &ap[kc + *n - k + 1], &work[1], & + c__1, &c_b13, &ap[kc + 1], &c__1); + i__1 = *n - k; + ap[kc] -= _starpu_ddot_(&i__1, &work[1], &c__1, &ap[kc + 1], &c__1); + } + kstep = 1; + } else { + +/* 2 x 2 diagonal block */ + +/* Invert the diagonal block. */ + + t = (d__1 = ap[kcnext + 1], abs(d__1)); + ak = ap[kcnext] / t; + akp1 = ap[kc] / t; + akkp1 = ap[kcnext + 1] / t; + d__ = t * (ak * akp1 - 1.); + ap[kcnext] = akp1 / d__; + ap[kc] = ak / d__; + ap[kcnext + 1] = -akkp1 / d__; + +/* Compute columns K-1 and K of the inverse. */ + + if (k < *n) { + i__1 = *n - k; + _starpu_dcopy_(&i__1, &ap[kc + 1], &c__1, &work[1], &c__1); + i__1 = *n - k; + _starpu_dspmv_(uplo, &i__1, &c_b11, &ap[kc + (*n - k + 1)], &work[1], + &c__1, &c_b13, &ap[kc + 1], &c__1); + i__1 = *n - k; + ap[kc] -= _starpu_ddot_(&i__1, &work[1], &c__1, &ap[kc + 1], &c__1); + i__1 = *n - k; + ap[kcnext + 1] -= _starpu_ddot_(&i__1, &ap[kc + 1], &c__1, &ap[kcnext + + 2], &c__1); + i__1 = *n - k; + _starpu_dcopy_(&i__1, &ap[kcnext + 2], &c__1, &work[1], &c__1); + i__1 = *n - k; + _starpu_dspmv_(uplo, &i__1, &c_b11, &ap[kc + (*n - k + 1)], &work[1], + &c__1, &c_b13, &ap[kcnext + 2], &c__1); + i__1 = *n - k; + ap[kcnext] -= _starpu_ddot_(&i__1, &work[1], &c__1, &ap[kcnext + 2], & + c__1); + } + kstep = 2; + kcnext -= *n - k + 3; + } + + kp = (i__1 = ipiv[k], abs(i__1)); + if (kp != k) { + +/* Interchange rows and columns K and KP in the trailing */ +/* submatrix A(k-1:n,k-1:n) */ + + kpc = npp - (*n - kp + 1) * (*n - kp + 2) / 2 + 1; + if (kp < *n) { + i__1 = *n - kp; + _starpu_dswap_(&i__1, &ap[kc + kp - k + 1], &c__1, &ap[kpc + 1], & + c__1); + } + kx = kc + kp - k; + i__1 = kp - 1; + for (j = k + 1; j <= i__1; ++j) { + kx = kx + *n - j + 1; + temp = ap[kc + j - k]; + ap[kc + j - k] = ap[kx]; + ap[kx] = temp; +/* L70: */ + } + temp = ap[kc]; + ap[kc] = ap[kpc]; + ap[kpc] = temp; + if (kstep == 2) { + temp = ap[kc - *n + k - 1]; + ap[kc - *n + k - 1] = ap[kc - *n + kp - 1]; + ap[kc - *n + kp - 1] = temp; + } + } + + k -= kstep; + kc = kcnext; + goto L60; +L80: + ; + } + + return 0; + +/* End of DSPTRI */ + +} /* _starpu_dsptri_ */ diff --git a/min-dgels/base/SRC/dsptrs.c b/min-dgels/base/SRC/dsptrs.c new file mode 100644 index 0000000..ce02b48 --- /dev/null +++ b/min-dgels/base/SRC/dsptrs.c @@ -0,0 +1,456 @@ +/* dsptrs.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static doublereal c_b7 = -1.; +static integer c__1 = 1; +static doublereal c_b19 = 1.; + +/* Subroutine */ int _starpu_dsptrs_(char *uplo, integer *n, integer *nrhs, + doublereal *ap, integer *ipiv, doublereal *b, integer *ldb, integer * + info) +{ + /* System generated locals */ + integer b_dim1, b_offset, i__1; + doublereal d__1; + + /* Local variables */ + integer j, k; + doublereal ak, bk; + integer kc, kp; + doublereal akm1, bkm1; + extern /* Subroutine */ int _starpu_dger_(integer *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *); + doublereal akm1k; + extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, + integer *); + extern logical _starpu_lsame_(char *, char *); + doublereal denom; + extern /* Subroutine */ int _starpu_dgemv_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *), _starpu_dswap_(integer *, + doublereal *, integer *, doublereal *, integer *); + logical upper; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DSPTRS solves a system of linear equations A*X = B with a real */ +/* symmetric matrix A stored in packed format using the factorization */ +/* A = U*D*U**T or A = L*D*L**T computed by DSPTRF. */ + +/* Arguments */ +/* ========= */ + +/* UPLO (input) CHARACTER*1 */ +/* Specifies whether the details of the factorization are stored */ +/* as an upper or lower triangular matrix. */ +/* = 'U': Upper triangular, form is A = U*D*U**T; */ +/* = 'L': Lower triangular, form is A = L*D*L**T. */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* NRHS (input) INTEGER */ +/* The number of right hand sides, i.e., the number of columns */ +/* of the matrix B. NRHS >= 0. */ + +/* AP (input) DOUBLE PRECISION array, dimension (N*(N+1)/2) */ +/* The block diagonal matrix D and the multipliers used to */ +/* obtain the factor U or L as computed by DSPTRF, stored as a */ +/* packed triangular matrix. */ + +/* IPIV (input) INTEGER array, dimension (N) */ +/* Details of the interchanges and the block structure of D */ +/* as determined by DSPTRF. */ + +/* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* On entry, the right hand side matrix B. */ +/* On exit, the solution matrix X. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the array B. LDB >= max(1,N). */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + --ap; + --ipiv; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + *info = 0; + upper = _starpu_lsame_(uplo, "U"); + if (! upper && ! _starpu_lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*ldb < max(1,*n)) { + *info = -7; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DSPTRS", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0 || *nrhs == 0) { + return 0; + } + + if (upper) { + +/* Solve A*X = B, where A = U*D*U'. */ + +/* First solve U*D*X = B, overwriting B with X. */ + +/* K is the main loop index, decreasing from N to 1 in steps of */ +/* 1 or 2, depending on the size of the diagonal blocks. */ + + k = *n; + kc = *n * (*n + 1) / 2 + 1; +L10: + +/* If K < 1, exit from loop. */ + + if (k < 1) { + goto L30; + } + + kc -= k; + if (ipiv[k] > 0) { + +/* 1 x 1 diagonal block */ + +/* Interchange rows K and IPIV(K). */ + + kp = ipiv[k]; + if (kp != k) { + _starpu_dswap_(nrhs, &b[k + b_dim1], ldb, &b[kp + b_dim1], ldb); + } + +/* Multiply by inv(U(K)), where U(K) is the transformation */ +/* stored in column K of A. */ + + i__1 = k - 1; + _starpu_dger_(&i__1, nrhs, &c_b7, &ap[kc], &c__1, &b[k + b_dim1], ldb, &b[ + b_dim1 + 1], ldb); + +/* Multiply by the inverse of the diagonal block. */ + + d__1 = 1. / ap[kc + k - 1]; + _starpu_dscal_(nrhs, &d__1, &b[k + b_dim1], ldb); + --k; + } else { + +/* 2 x 2 diagonal block */ + +/* Interchange rows K-1 and -IPIV(K). */ + + kp = -ipiv[k]; + if (kp != k - 1) { + _starpu_dswap_(nrhs, &b[k - 1 + b_dim1], ldb, &b[kp + b_dim1], ldb); + } + +/* Multiply by inv(U(K)), where U(K) is the transformation */ +/* stored in columns K-1 and K of A. */ + + i__1 = k - 2; + _starpu_dger_(&i__1, nrhs, &c_b7, &ap[kc], &c__1, &b[k + b_dim1], ldb, &b[ + b_dim1 + 1], ldb); + i__1 = k - 2; + _starpu_dger_(&i__1, nrhs, &c_b7, &ap[kc - (k - 1)], &c__1, &b[k - 1 + + b_dim1], ldb, &b[b_dim1 + 1], ldb); + +/* Multiply by the inverse of the diagonal block. */ + + akm1k = ap[kc + k - 2]; + akm1 = ap[kc - 1] / akm1k; + ak = ap[kc + k - 1] / akm1k; + denom = akm1 * ak - 1.; + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + bkm1 = b[k - 1 + j * b_dim1] / akm1k; + bk = b[k + j * b_dim1] / akm1k; + b[k - 1 + j * b_dim1] = (ak * bkm1 - bk) / denom; + b[k + j * b_dim1] = (akm1 * bk - bkm1) / denom; +/* L20: */ + } + kc = kc - k + 1; + k += -2; + } + + goto L10; +L30: + +/* Next solve U'*X = B, overwriting B with X. */ + +/* K is the main loop index, increasing from 1 to N in steps of */ +/* 1 or 2, depending on the size of the diagonal blocks. */ + + k = 1; + kc = 1; +L40: + +/* If K > N, exit from loop. */ + + if (k > *n) { + goto L50; + } + + if (ipiv[k] > 0) { + +/* 1 x 1 diagonal block */ + +/* Multiply by inv(U'(K)), where U(K) is the transformation */ +/* stored in column K of A. */ + + i__1 = k - 1; + _starpu_dgemv_("Transpose", &i__1, nrhs, &c_b7, &b[b_offset], ldb, &ap[kc] +, &c__1, &c_b19, &b[k + b_dim1], ldb); + +/* Interchange rows K and IPIV(K). */ + + kp = ipiv[k]; + if (kp != k) { + _starpu_dswap_(nrhs, &b[k + b_dim1], ldb, &b[kp + b_dim1], ldb); + } + kc += k; + ++k; + } else { + +/* 2 x 2 diagonal block */ + +/* Multiply by inv(U'(K+1)), where U(K+1) is the transformation */ +/* stored in columns K and K+1 of A. */ + + i__1 = k - 1; + _starpu_dgemv_("Transpose", &i__1, nrhs, &c_b7, &b[b_offset], ldb, &ap[kc] +, &c__1, &c_b19, &b[k + b_dim1], ldb); + i__1 = k - 1; + _starpu_dgemv_("Transpose", &i__1, nrhs, &c_b7, &b[b_offset], ldb, &ap[kc + + k], &c__1, &c_b19, &b[k + 1 + b_dim1], ldb); + +/* Interchange rows K and -IPIV(K). */ + + kp = -ipiv[k]; + if (kp != k) { + _starpu_dswap_(nrhs, &b[k + b_dim1], ldb, &b[kp + b_dim1], ldb); + } + kc = kc + (k << 1) + 1; + k += 2; + } + + goto L40; +L50: + + ; + } else { + +/* Solve A*X = B, where A = L*D*L'. */ + +/* First solve L*D*X = B, overwriting B with X. */ + +/* K is the main loop index, increasing from 1 to N in steps of */ +/* 1 or 2, depending on the size of the diagonal blocks. */ + + k = 1; + kc = 1; +L60: + +/* If K > N, exit from loop. */ + + if (k > *n) { + goto L80; + } + + if (ipiv[k] > 0) { + +/* 1 x 1 diagonal block */ + +/* Interchange rows K and IPIV(K). */ + + kp = ipiv[k]; + if (kp != k) { + _starpu_dswap_(nrhs, &b[k + b_dim1], ldb, &b[kp + b_dim1], ldb); + } + +/* Multiply by inv(L(K)), where L(K) is the transformation */ +/* stored in column K of A. */ + + if (k < *n) { + i__1 = *n - k; + _starpu_dger_(&i__1, nrhs, &c_b7, &ap[kc + 1], &c__1, &b[k + b_dim1], + ldb, &b[k + 1 + b_dim1], ldb); + } + +/* Multiply by the inverse of the diagonal block. */ + + d__1 = 1. / ap[kc]; + _starpu_dscal_(nrhs, &d__1, &b[k + b_dim1], ldb); + kc = kc + *n - k + 1; + ++k; + } else { + +/* 2 x 2 diagonal block */ + +/* Interchange rows K+1 and -IPIV(K). */ + + kp = -ipiv[k]; + if (kp != k + 1) { + _starpu_dswap_(nrhs, &b[k + 1 + b_dim1], ldb, &b[kp + b_dim1], ldb); + } + +/* Multiply by inv(L(K)), where L(K) is the transformation */ +/* stored in columns K and K+1 of A. */ + + if (k < *n - 1) { + i__1 = *n - k - 1; + _starpu_dger_(&i__1, nrhs, &c_b7, &ap[kc + 2], &c__1, &b[k + b_dim1], + ldb, &b[k + 2 + b_dim1], ldb); + i__1 = *n - k - 1; + _starpu_dger_(&i__1, nrhs, &c_b7, &ap[kc + *n - k + 2], &c__1, &b[k + + 1 + b_dim1], ldb, &b[k + 2 + b_dim1], ldb); + } + +/* Multiply by the inverse of the diagonal block. */ + + akm1k = ap[kc + 1]; + akm1 = ap[kc] / akm1k; + ak = ap[kc + *n - k + 1] / akm1k; + denom = akm1 * ak - 1.; + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + bkm1 = b[k + j * b_dim1] / akm1k; + bk = b[k + 1 + j * b_dim1] / akm1k; + b[k + j * b_dim1] = (ak * bkm1 - bk) / denom; + b[k + 1 + j * b_dim1] = (akm1 * bk - bkm1) / denom; +/* L70: */ + } + kc = kc + (*n - k << 1) + 1; + k += 2; + } + + goto L60; +L80: + +/* Next solve L'*X = B, overwriting B with X. */ + +/* K is the main loop index, decreasing from N to 1 in steps of */ +/* 1 or 2, depending on the size of the diagonal blocks. */ + + k = *n; + kc = *n * (*n + 1) / 2 + 1; +L90: + +/* If K < 1, exit from loop. */ + + if (k < 1) { + goto L100; + } + + kc -= *n - k + 1; + if (ipiv[k] > 0) { + +/* 1 x 1 diagonal block */ + +/* Multiply by inv(L'(K)), where L(K) is the transformation */ +/* stored in column K of A. */ + + if (k < *n) { + i__1 = *n - k; + _starpu_dgemv_("Transpose", &i__1, nrhs, &c_b7, &b[k + 1 + b_dim1], + ldb, &ap[kc + 1], &c__1, &c_b19, &b[k + b_dim1], ldb); + } + +/* Interchange rows K and IPIV(K). */ + + kp = ipiv[k]; + if (kp != k) { + _starpu_dswap_(nrhs, &b[k + b_dim1], ldb, &b[kp + b_dim1], ldb); + } + --k; + } else { + +/* 2 x 2 diagonal block */ + +/* Multiply by inv(L'(K-1)), where L(K-1) is the transformation */ +/* stored in columns K-1 and K of A. */ + + if (k < *n) { + i__1 = *n - k; + _starpu_dgemv_("Transpose", &i__1, nrhs, &c_b7, &b[k + 1 + b_dim1], + ldb, &ap[kc + 1], &c__1, &c_b19, &b[k + b_dim1], ldb); + i__1 = *n - k; + _starpu_dgemv_("Transpose", &i__1, nrhs, &c_b7, &b[k + 1 + b_dim1], + ldb, &ap[kc - (*n - k)], &c__1, &c_b19, &b[k - 1 + + b_dim1], ldb); + } + +/* Interchange rows K and -IPIV(K). */ + + kp = -ipiv[k]; + if (kp != k) { + _starpu_dswap_(nrhs, &b[k + b_dim1], ldb, &b[kp + b_dim1], ldb); + } + kc -= *n - k + 2; + k += -2; + } + + goto L90; +L100: + ; + } + + return 0; + +/* End of DSPTRS */ + +} /* _starpu_dsptrs_ */ diff --git a/min-dgels/base/SRC/dstebz.c b/min-dgels/base/SRC/dstebz.c new file mode 100644 index 0000000..3eac2ab --- /dev/null +++ b/min-dgels/base/SRC/dstebz.c @@ -0,0 +1,774 @@ +/* dstebz.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; +static integer c__3 = 3; +static integer c__2 = 2; +static integer c__0 = 0; + +/* Subroutine */ int _starpu_dstebz_(char *range, char *order, integer *n, doublereal + *vl, doublereal *vu, integer *il, integer *iu, doublereal *abstol, + doublereal *d__, doublereal *e, integer *m, integer *nsplit, + doublereal *w, integer *iblock, integer *isplit, doublereal *work, + integer *iwork, integer *info) +{ + /* System generated locals */ + integer i__1, i__2, i__3; + doublereal d__1, d__2, d__3, d__4, d__5; + + /* Builtin functions */ + double sqrt(doublereal), log(doublereal); + + /* Local variables */ + integer j, ib, jb, ie, je, nb; + doublereal gl; + integer im, in; + doublereal gu; + integer iw; + doublereal wl, wu; + integer nwl; + doublereal ulp, wlu, wul; + integer nwu; + doublereal tmp1, tmp2; + integer iend, ioff, iout, itmp1, jdisc; + extern logical _starpu_lsame_(char *, char *); + integer iinfo; + doublereal atoli; + integer iwoff; + doublereal bnorm; + integer itmax; + doublereal wkill, rtoli, tnorm; + extern doublereal _starpu_dlamch_(char *); + integer ibegin; + extern /* Subroutine */ int _starpu_dlaebz_(integer *, integer *, integer *, + integer *, integer *, integer *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *); + integer irange, idiscl; + doublereal safemn; + integer idumma[1]; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *); + integer idiscu, iorder; + logical ncnvrg; + doublereal pivmin; + logical toofew; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ +/* 8-18-00: Increase FUDGE factor for T3E (eca) */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DSTEBZ computes the eigenvalues of a symmetric tridiagonal */ +/* matrix T. The user may ask for all eigenvalues, all eigenvalues */ +/* in the half-open interval (VL, VU], or the IL-th through IU-th */ +/* eigenvalues. */ + +/* To avoid overflow, the matrix must be scaled so that its */ +/* largest element is no greater than overflow**(1/2) * */ +/* underflow**(1/4) in absolute value, and for greatest */ +/* accuracy, it should not be much smaller than that. */ + +/* See W. Kahan "Accurate Eigenvalues of a Symmetric Tridiagonal */ +/* Matrix", Report CS41, Computer Science Dept., Stanford */ +/* University, July 21, 1966. */ + +/* Arguments */ +/* ========= */ + +/* RANGE (input) CHARACTER*1 */ +/* = 'A': ("All") all eigenvalues will be found. */ +/* = 'V': ("Value") all eigenvalues in the half-open interval */ +/* (VL, VU] will be found. */ +/* = 'I': ("Index") the IL-th through IU-th eigenvalues (of the */ +/* entire matrix) will be found. */ + +/* ORDER (input) CHARACTER*1 */ +/* = 'B': ("By Block") the eigenvalues will be grouped by */ +/* split-off block (see IBLOCK, ISPLIT) and */ +/* ordered from smallest to largest within */ +/* the block. */ +/* = 'E': ("Entire matrix") */ +/* the eigenvalues for the entire matrix */ +/* will be ordered from smallest to */ +/* largest. */ + +/* N (input) INTEGER */ +/* The order of the tridiagonal matrix T. N >= 0. */ + +/* VL (input) DOUBLE PRECISION */ +/* VU (input) DOUBLE PRECISION */ +/* If RANGE='V', the lower and upper bounds of the interval to */ +/* be searched for eigenvalues. Eigenvalues less than or equal */ +/* to VL, or greater than VU, will not be returned. VL < VU. */ +/* Not referenced if RANGE = 'A' or 'I'. */ + +/* IL (input) INTEGER */ +/* IU (input) INTEGER */ +/* If RANGE='I', the indices (in ascending order) of the */ +/* smallest and largest eigenvalues to be returned. */ +/* 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. */ +/* Not referenced if RANGE = 'A' or 'V'. */ + +/* ABSTOL (input) DOUBLE PRECISION */ +/* The absolute tolerance for the eigenvalues. An eigenvalue */ +/* (or cluster) is considered to be located if it has been */ +/* determined to lie in an interval whose width is ABSTOL or */ +/* less. If ABSTOL is less than or equal to zero, then ULP*|T| */ +/* will be used, where |T| means the 1-norm of T. */ + +/* Eigenvalues will be computed most accurately when ABSTOL is */ +/* set to twice the underflow threshold 2*DLAMCH('S'), not zero. */ + +/* D (input) DOUBLE PRECISION array, dimension (N) */ +/* The n diagonal elements of the tridiagonal matrix T. */ + +/* E (input) DOUBLE PRECISION array, dimension (N-1) */ +/* The (n-1) off-diagonal elements of the tridiagonal matrix T. */ + +/* M (output) INTEGER */ +/* The actual number of eigenvalues found. 0 <= M <= N. */ +/* (See also the description of INFO=2,3.) */ + +/* NSPLIT (output) INTEGER */ +/* The number of diagonal blocks in the matrix T. */ +/* 1 <= NSPLIT <= N. */ + +/* W (output) DOUBLE PRECISION array, dimension (N) */ +/* On exit, the first M elements of W will contain the */ +/* eigenvalues. (DSTEBZ may use the remaining N-M elements as */ +/* workspace.) */ + +/* IBLOCK (output) INTEGER array, dimension (N) */ +/* At each row/column j where E(j) is zero or small, the */ +/* matrix T is considered to split into a block diagonal */ +/* matrix. On exit, if INFO = 0, IBLOCK(i) specifies to which */ +/* block (from 1 to the number of blocks) the eigenvalue W(i) */ +/* belongs. (DSTEBZ may use the remaining N-M elements as */ +/* workspace.) */ + +/* ISPLIT (output) INTEGER array, dimension (N) */ +/* The splitting points, at which T breaks up into submatrices. */ +/* The first submatrix consists of rows/columns 1 to ISPLIT(1), */ +/* the second of rows/columns ISPLIT(1)+1 through ISPLIT(2), */ +/* etc., and the NSPLIT-th consists of rows/columns */ +/* ISPLIT(NSPLIT-1)+1 through ISPLIT(NSPLIT)=N. */ +/* (Only the first NSPLIT elements will actually be used, but */ +/* since the user cannot know a priori what value NSPLIT will */ +/* have, N words must be reserved for ISPLIT.) */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (4*N) */ + +/* IWORK (workspace) INTEGER array, dimension (3*N) */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > 0: some or all of the eigenvalues failed to converge or */ +/* were not computed: */ +/* =1 or 3: Bisection failed to converge for some */ +/* eigenvalues; these eigenvalues are flagged by a */ +/* negative block number. The effect is that the */ +/* eigenvalues may not be as accurate as the */ +/* absolute and relative tolerances. This is */ +/* generally caused by unexpectedly inaccurate */ +/* arithmetic. */ +/* =2 or 3: RANGE='I' only: Not all of the eigenvalues */ +/* IL:IU were found. */ +/* Effect: M < IU+1-IL */ +/* Cause: non-monotonic arithmetic, causing the */ +/* Sturm sequence to be non-monotonic. */ +/* Cure: recalculate, using RANGE='A', and pick */ +/* out eigenvalues IL:IU. In some cases, */ +/* increasing the PARAMETER "FUDGE" may */ +/* make things work. */ +/* = 4: RANGE='I', and the Gershgorin interval */ +/* initially used was too small. No eigenvalues */ +/* were computed. */ +/* Probable cause: your machine has sloppy */ +/* floating-point arithmetic. */ +/* Cure: Increase the PARAMETER "FUDGE", */ +/* recompile, and try again. */ + +/* Internal Parameters */ +/* =================== */ + +/* RELFAC DOUBLE PRECISION, default = 2.0e0 */ +/* The relative tolerance. An interval (a,b] lies within */ +/* "relative tolerance" if b-a < RELFAC*ulp*max(|a|,|b|), */ +/* where "ulp" is the machine precision (distance from 1 to */ +/* the next larger floating point number.) */ + +/* FUDGE DOUBLE PRECISION, default = 2 */ +/* A "fudge factor" to widen the Gershgorin intervals. Ideally, */ +/* a value of 1 should work, but on machines with sloppy */ +/* arithmetic, this needs to be larger. The default for */ +/* publicly released versions should be large enough to handle */ +/* the worst machine around. Note that this has no effect */ +/* on accuracy of the solution. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + --iwork; + --work; + --isplit; + --iblock; + --w; + --e; + --d__; + + /* Function Body */ + *info = 0; + +/* Decode RANGE */ + + if (_starpu_lsame_(range, "A")) { + irange = 1; + } else if (_starpu_lsame_(range, "V")) { + irange = 2; + } else if (_starpu_lsame_(range, "I")) { + irange = 3; + } else { + irange = 0; + } + +/* Decode ORDER */ + + if (_starpu_lsame_(order, "B")) { + iorder = 2; + } else if (_starpu_lsame_(order, "E")) { + iorder = 1; + } else { + iorder = 0; + } + +/* Check for Errors */ + + if (irange <= 0) { + *info = -1; + } else if (iorder <= 0) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (irange == 2) { + if (*vl >= *vu) { + *info = -5; + } + } else if (irange == 3 && (*il < 1 || *il > max(1,*n))) { + *info = -6; + } else if (irange == 3 && (*iu < min(*n,*il) || *iu > *n)) { + *info = -7; + } + + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DSTEBZ", &i__1); + return 0; + } + +/* Initialize error flags */ + + *info = 0; + ncnvrg = FALSE_; + toofew = FALSE_; + +/* Quick return if possible */ + + *m = 0; + if (*n == 0) { + return 0; + } + +/* Simplifications: */ + + if (irange == 3 && *il == 1 && *iu == *n) { + irange = 1; + } + +/* Get machine constants */ +/* NB is the minimum vector length for vector bisection, or 0 */ +/* if only scalar is to be done. */ + + safemn = _starpu_dlamch_("S"); + ulp = _starpu_dlamch_("P"); + rtoli = ulp * 2.; + nb = _starpu_ilaenv_(&c__1, "DSTEBZ", " ", n, &c_n1, &c_n1, &c_n1); + if (nb <= 1) { + nb = 0; + } + +/* Special Case when N=1 */ + + if (*n == 1) { + *nsplit = 1; + isplit[1] = 1; + if (irange == 2 && (*vl >= d__[1] || *vu < d__[1])) { + *m = 0; + } else { + w[1] = d__[1]; + iblock[1] = 1; + *m = 1; + } + return 0; + } + +/* Compute Splitting Points */ + + *nsplit = 1; + work[*n] = 0.; + pivmin = 1.; + +/* DIR$ NOVECTOR */ + i__1 = *n; + for (j = 2; j <= i__1; ++j) { +/* Computing 2nd power */ + d__1 = e[j - 1]; + tmp1 = d__1 * d__1; +/* Computing 2nd power */ + d__2 = ulp; + if ((d__1 = d__[j] * d__[j - 1], abs(d__1)) * (d__2 * d__2) + safemn + > tmp1) { + isplit[*nsplit] = j - 1; + ++(*nsplit); + work[j - 1] = 0.; + } else { + work[j - 1] = tmp1; + pivmin = max(pivmin,tmp1); + } +/* L10: */ + } + isplit[*nsplit] = *n; + pivmin *= safemn; + +/* Compute Interval and ATOLI */ + + if (irange == 3) { + +/* RANGE='I': Compute the interval containing eigenvalues */ +/* IL through IU. */ + +/* Compute Gershgorin interval for entire (split) matrix */ +/* and use it as the initial interval */ + + gu = d__[1]; + gl = d__[1]; + tmp1 = 0.; + + i__1 = *n - 1; + for (j = 1; j <= i__1; ++j) { + tmp2 = sqrt(work[j]); +/* Computing MAX */ + d__1 = gu, d__2 = d__[j] + tmp1 + tmp2; + gu = max(d__1,d__2); +/* Computing MIN */ + d__1 = gl, d__2 = d__[j] - tmp1 - tmp2; + gl = min(d__1,d__2); + tmp1 = tmp2; +/* L20: */ + } + +/* Computing MAX */ + d__1 = gu, d__2 = d__[*n] + tmp1; + gu = max(d__1,d__2); +/* Computing MIN */ + d__1 = gl, d__2 = d__[*n] - tmp1; + gl = min(d__1,d__2); +/* Computing MAX */ + d__1 = abs(gl), d__2 = abs(gu); + tnorm = max(d__1,d__2); + gl = gl - tnorm * 2.1 * ulp * *n - pivmin * 4.2000000000000002; + gu = gu + tnorm * 2.1 * ulp * *n + pivmin * 2.1; + +/* Compute Iteration parameters */ + + itmax = (integer) ((log(tnorm + pivmin) - log(pivmin)) / log(2.)) + 2; + if (*abstol <= 0.) { + atoli = ulp * tnorm; + } else { + atoli = *abstol; + } + + work[*n + 1] = gl; + work[*n + 2] = gl; + work[*n + 3] = gu; + work[*n + 4] = gu; + work[*n + 5] = gl; + work[*n + 6] = gu; + iwork[1] = -1; + iwork[2] = -1; + iwork[3] = *n + 1; + iwork[4] = *n + 1; + iwork[5] = *il - 1; + iwork[6] = *iu; + + _starpu_dlaebz_(&c__3, &itmax, n, &c__2, &c__2, &nb, &atoli, &rtoli, &pivmin, + &d__[1], &e[1], &work[1], &iwork[5], &work[*n + 1], &work[*n + + 5], &iout, &iwork[1], &w[1], &iblock[1], &iinfo); + + if (iwork[6] == *iu) { + wl = work[*n + 1]; + wlu = work[*n + 3]; + nwl = iwork[1]; + wu = work[*n + 4]; + wul = work[*n + 2]; + nwu = iwork[4]; + } else { + wl = work[*n + 2]; + wlu = work[*n + 4]; + nwl = iwork[2]; + wu = work[*n + 3]; + wul = work[*n + 1]; + nwu = iwork[3]; + } + + if (nwl < 0 || nwl >= *n || nwu < 1 || nwu > *n) { + *info = 4; + return 0; + } + } else { + +/* RANGE='A' or 'V' -- Set ATOLI */ + +/* Computing MAX */ + d__3 = abs(d__[1]) + abs(e[1]), d__4 = (d__1 = d__[*n], abs(d__1)) + ( + d__2 = e[*n - 1], abs(d__2)); + tnorm = max(d__3,d__4); + + i__1 = *n - 1; + for (j = 2; j <= i__1; ++j) { +/* Computing MAX */ + d__4 = tnorm, d__5 = (d__1 = d__[j], abs(d__1)) + (d__2 = e[j - 1] + , abs(d__2)) + (d__3 = e[j], abs(d__3)); + tnorm = max(d__4,d__5); +/* L30: */ + } + + if (*abstol <= 0.) { + atoli = ulp * tnorm; + } else { + atoli = *abstol; + } + + if (irange == 2) { + wl = *vl; + wu = *vu; + } else { + wl = 0.; + wu = 0.; + } + } + +/* Find Eigenvalues -- Loop Over Blocks and recompute NWL and NWU. */ +/* NWL accumulates the number of eigenvalues .le. WL, */ +/* NWU accumulates the number of eigenvalues .le. WU */ + + *m = 0; + iend = 0; + *info = 0; + nwl = 0; + nwu = 0; + + i__1 = *nsplit; + for (jb = 1; jb <= i__1; ++jb) { + ioff = iend; + ibegin = ioff + 1; + iend = isplit[jb]; + in = iend - ioff; + + if (in == 1) { + +/* Special Case -- IN=1 */ + + if (irange == 1 || wl >= d__[ibegin] - pivmin) { + ++nwl; + } + if (irange == 1 || wu >= d__[ibegin] - pivmin) { + ++nwu; + } + if (irange == 1 || wl < d__[ibegin] - pivmin && wu >= d__[ibegin] + - pivmin) { + ++(*m); + w[*m] = d__[ibegin]; + iblock[*m] = jb; + } + } else { + +/* General Case -- IN > 1 */ + +/* Compute Gershgorin Interval */ +/* and use it as the initial interval */ + + gu = d__[ibegin]; + gl = d__[ibegin]; + tmp1 = 0.; + + i__2 = iend - 1; + for (j = ibegin; j <= i__2; ++j) { + tmp2 = (d__1 = e[j], abs(d__1)); +/* Computing MAX */ + d__1 = gu, d__2 = d__[j] + tmp1 + tmp2; + gu = max(d__1,d__2); +/* Computing MIN */ + d__1 = gl, d__2 = d__[j] - tmp1 - tmp2; + gl = min(d__1,d__2); + tmp1 = tmp2; +/* L40: */ + } + +/* Computing MAX */ + d__1 = gu, d__2 = d__[iend] + tmp1; + gu = max(d__1,d__2); +/* Computing MIN */ + d__1 = gl, d__2 = d__[iend] - tmp1; + gl = min(d__1,d__2); +/* Computing MAX */ + d__1 = abs(gl), d__2 = abs(gu); + bnorm = max(d__1,d__2); + gl = gl - bnorm * 2.1 * ulp * in - pivmin * 2.1; + gu = gu + bnorm * 2.1 * ulp * in + pivmin * 2.1; + +/* Compute ATOLI for the current submatrix */ + + if (*abstol <= 0.) { +/* Computing MAX */ + d__1 = abs(gl), d__2 = abs(gu); + atoli = ulp * max(d__1,d__2); + } else { + atoli = *abstol; + } + + if (irange > 1) { + if (gu < wl) { + nwl += in; + nwu += in; + goto L70; + } + gl = max(gl,wl); + gu = min(gu,wu); + if (gl >= gu) { + goto L70; + } + } + +/* Set Up Initial Interval */ + + work[*n + 1] = gl; + work[*n + in + 1] = gu; + _starpu_dlaebz_(&c__1, &c__0, &in, &in, &c__1, &nb, &atoli, &rtoli, & + pivmin, &d__[ibegin], &e[ibegin], &work[ibegin], idumma, & + work[*n + 1], &work[*n + (in << 1) + 1], &im, &iwork[1], & + w[*m + 1], &iblock[*m + 1], &iinfo); + + nwl += iwork[1]; + nwu += iwork[in + 1]; + iwoff = *m - iwork[1]; + +/* Compute Eigenvalues */ + + itmax = (integer) ((log(gu - gl + pivmin) - log(pivmin)) / log(2.) + ) + 2; + _starpu_dlaebz_(&c__2, &itmax, &in, &in, &c__1, &nb, &atoli, &rtoli, & + pivmin, &d__[ibegin], &e[ibegin], &work[ibegin], idumma, & + work[*n + 1], &work[*n + (in << 1) + 1], &iout, &iwork[1], + &w[*m + 1], &iblock[*m + 1], &iinfo); + +/* Copy Eigenvalues Into W and IBLOCK */ +/* Use -JB for block number for unconverged eigenvalues. */ + + i__2 = iout; + for (j = 1; j <= i__2; ++j) { + tmp1 = (work[j + *n] + work[j + in + *n]) * .5; + +/* Flag non-convergence. */ + + if (j > iout - iinfo) { + ncnvrg = TRUE_; + ib = -jb; + } else { + ib = jb; + } + i__3 = iwork[j + in] + iwoff; + for (je = iwork[j] + 1 + iwoff; je <= i__3; ++je) { + w[je] = tmp1; + iblock[je] = ib; +/* L50: */ + } +/* L60: */ + } + + *m += im; + } +L70: + ; + } + +/* If RANGE='I', then (WL,WU) contains eigenvalues NWL+1,...,NWU */ +/* If NWL+1 < IL or NWU > IU, discard extra eigenvalues. */ + + if (irange == 3) { + im = 0; + idiscl = *il - 1 - nwl; + idiscu = nwu - *iu; + + if (idiscl > 0 || idiscu > 0) { + i__1 = *m; + for (je = 1; je <= i__1; ++je) { + if (w[je] <= wlu && idiscl > 0) { + --idiscl; + } else if (w[je] >= wul && idiscu > 0) { + --idiscu; + } else { + ++im; + w[im] = w[je]; + iblock[im] = iblock[je]; + } +/* L80: */ + } + *m = im; + } + if (idiscl > 0 || idiscu > 0) { + +/* Code to deal with effects of bad arithmetic: */ +/* Some low eigenvalues to be discarded are not in (WL,WLU], */ +/* or high eigenvalues to be discarded are not in (WUL,WU] */ +/* so just kill off the smallest IDISCL/largest IDISCU */ +/* eigenvalues, by simply finding the smallest/largest */ +/* eigenvalue(s). */ + +/* (If N(w) is monotone non-decreasing, this should never */ +/* happen.) */ + + if (idiscl > 0) { + wkill = wu; + i__1 = idiscl; + for (jdisc = 1; jdisc <= i__1; ++jdisc) { + iw = 0; + i__2 = *m; + for (je = 1; je <= i__2; ++je) { + if (iblock[je] != 0 && (w[je] < wkill || iw == 0)) { + iw = je; + wkill = w[je]; + } +/* L90: */ + } + iblock[iw] = 0; +/* L100: */ + } + } + if (idiscu > 0) { + + wkill = wl; + i__1 = idiscu; + for (jdisc = 1; jdisc <= i__1; ++jdisc) { + iw = 0; + i__2 = *m; + for (je = 1; je <= i__2; ++je) { + if (iblock[je] != 0 && (w[je] > wkill || iw == 0)) { + iw = je; + wkill = w[je]; + } +/* L110: */ + } + iblock[iw] = 0; +/* L120: */ + } + } + im = 0; + i__1 = *m; + for (je = 1; je <= i__1; ++je) { + if (iblock[je] != 0) { + ++im; + w[im] = w[je]; + iblock[im] = iblock[je]; + } +/* L130: */ + } + *m = im; + } + if (idiscl < 0 || idiscu < 0) { + toofew = TRUE_; + } + } + +/* If ORDER='B', do nothing -- the eigenvalues are already sorted */ +/* by block. */ +/* If ORDER='E', sort the eigenvalues from smallest to largest */ + + if (iorder == 1 && *nsplit > 1) { + i__1 = *m - 1; + for (je = 1; je <= i__1; ++je) { + ie = 0; + tmp1 = w[je]; + i__2 = *m; + for (j = je + 1; j <= i__2; ++j) { + if (w[j] < tmp1) { + ie = j; + tmp1 = w[j]; + } +/* L140: */ + } + + if (ie != 0) { + itmp1 = iblock[ie]; + w[ie] = w[je]; + iblock[ie] = iblock[je]; + w[je] = tmp1; + iblock[je] = itmp1; + } +/* L150: */ + } + } + + *info = 0; + if (ncnvrg) { + ++(*info); + } + if (toofew) { + *info += 2; + } + return 0; + +/* End of DSTEBZ */ + +} /* _starpu_dstebz_ */ diff --git a/min-dgels/base/SRC/dstedc.c b/min-dgels/base/SRC/dstedc.c new file mode 100644 index 0000000..0e8e312 --- /dev/null +++ b/min-dgels/base/SRC/dstedc.c @@ -0,0 +1,488 @@ +/* dstedc.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__9 = 9; +static integer c__0 = 0; +static integer c__2 = 2; +static doublereal c_b17 = 0.; +static doublereal c_b18 = 1.; +static integer c__1 = 1; + +/* Subroutine */ int _starpu_dstedc_(char *compz, integer *n, doublereal *d__, + doublereal *e, doublereal *z__, integer *ldz, doublereal *work, + integer *lwork, integer *iwork, integer *liwork, integer *info) +{ + /* System generated locals */ + integer z_dim1, z_offset, i__1, i__2; + doublereal d__1, d__2; + + /* Builtin functions */ + double log(doublereal); + integer pow_ii(integer *, integer *); + double sqrt(doublereal); + + /* Local variables */ + integer i__, j, k, m; + doublereal p; + integer ii, lgn; + doublereal eps, tiny; + extern /* Subroutine */ int _starpu_dgemm_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *); + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int _starpu_dswap_(integer *, doublereal *, integer *, + doublereal *, integer *); + integer lwmin; + extern /* Subroutine */ int _starpu_dlaed0_(integer *, integer *, integer *, + doublereal *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, integer *, integer *); + integer start; + extern doublereal _starpu_dlamch_(char *); + extern /* Subroutine */ int _starpu_dlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *), _starpu_dlacpy_(char *, integer *, integer + *, doublereal *, integer *, doublereal *, integer *), + _starpu_dlaset_(char *, integer *, integer *, doublereal *, doublereal *, + doublereal *, integer *); + extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *); + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + integer finish; + extern doublereal _starpu_dlanst_(char *, integer *, doublereal *, doublereal *); + extern /* Subroutine */ int _starpu_dsterf_(integer *, doublereal *, doublereal *, + integer *), _starpu_dlasrt_(char *, integer *, doublereal *, integer *); + integer liwmin, icompz; + extern /* Subroutine */ int _starpu_dsteqr_(char *, integer *, doublereal *, + doublereal *, doublereal *, integer *, doublereal *, integer *); + doublereal orgnrm; + logical lquery; + integer smlsiz, storez, strtrw; + + +/* -- LAPACK driver routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DSTEDC computes all eigenvalues and, optionally, eigenvectors of a */ +/* symmetric tridiagonal matrix using the divide and conquer method. */ +/* The eigenvectors of a full or band real symmetric matrix can also be */ +/* found if DSYTRD or DSPTRD or DSBTRD has been used to reduce this */ +/* matrix to tridiagonal form. */ + +/* This code makes very mild assumptions about floating point */ +/* arithmetic. It will work on machines with a guard digit in */ +/* add/subtract, or on those binary machines without guard digits */ +/* which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. */ +/* It could conceivably fail on hexadecimal or decimal machines */ +/* without guard digits, but we know of none. See DLAED3 for details. */ + +/* Arguments */ +/* ========= */ + +/* COMPZ (input) CHARACTER*1 */ +/* = 'N': Compute eigenvalues only. */ +/* = 'I': Compute eigenvectors of tridiagonal matrix also. */ +/* = 'V': Compute eigenvectors of original dense symmetric */ +/* matrix also. On entry, Z contains the orthogonal */ +/* matrix used to reduce the original matrix to */ +/* tridiagonal form. */ + +/* N (input) INTEGER */ +/* The dimension of the symmetric tridiagonal matrix. N >= 0. */ + +/* D (input/output) DOUBLE PRECISION array, dimension (N) */ +/* On entry, the diagonal elements of the tridiagonal matrix. */ +/* On exit, if INFO = 0, the eigenvalues in ascending order. */ + +/* E (input/output) DOUBLE PRECISION array, dimension (N-1) */ +/* On entry, the subdiagonal elements of the tridiagonal matrix. */ +/* On exit, E has been destroyed. */ + +/* Z (input/output) DOUBLE PRECISION array, dimension (LDZ,N) */ +/* On entry, if COMPZ = 'V', then Z contains the orthogonal */ +/* matrix used in the reduction to tridiagonal form. */ +/* On exit, if INFO = 0, then if COMPZ = 'V', Z contains the */ +/* orthonormal eigenvectors of the original symmetric matrix, */ +/* and if COMPZ = 'I', Z contains the orthonormal eigenvectors */ +/* of the symmetric tridiagonal matrix. */ +/* If COMPZ = 'N', then Z is not referenced. */ + +/* LDZ (input) INTEGER */ +/* The leading dimension of the array Z. LDZ >= 1. */ +/* If eigenvectors are desired, then LDZ >= max(1,N). */ + +/* WORK (workspace/output) DOUBLE PRECISION array, */ +/* dimension (LWORK) */ +/* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ + +/* LWORK (input) INTEGER */ +/* The dimension of the array WORK. */ +/* If COMPZ = 'N' or N <= 1 then LWORK must be at least 1. */ +/* If COMPZ = 'V' and N > 1 then LWORK must be at least */ +/* ( 1 + 3*N + 2*N*lg N + 3*N**2 ), */ +/* where lg( N ) = smallest integer k such */ +/* that 2**k >= N. */ +/* If COMPZ = 'I' and N > 1 then LWORK must be at least */ +/* ( 1 + 4*N + N**2 ). */ +/* Note that for COMPZ = 'I' or 'V', then if N is less than or */ +/* equal to the minimum divide size, usually 25, then LWORK need */ +/* only be max(1,2*(N-1)). */ + +/* If LWORK = -1, then a workspace query is assumed; the routine */ +/* only calculates the optimal size of the WORK array, returns */ +/* this value as the first entry of the WORK array, and no error */ +/* message related to LWORK is issued by XERBLA. */ + +/* IWORK (workspace/output) INTEGER array, dimension (MAX(1,LIWORK)) */ +/* On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. */ + +/* LIWORK (input) INTEGER */ +/* The dimension of the array IWORK. */ +/* If COMPZ = 'N' or N <= 1 then LIWORK must be at least 1. */ +/* If COMPZ = 'V' and N > 1 then LIWORK must be at least */ +/* ( 6 + 6*N + 5*N*lg N ). */ +/* If COMPZ = 'I' and N > 1 then LIWORK must be at least */ +/* ( 3 + 5*N ). */ +/* Note that for COMPZ = 'I' or 'V', then if N is less than or */ +/* equal to the minimum divide size, usually 25, then LIWORK */ +/* need only be 1. */ + +/* If LIWORK = -1, then a workspace query is assumed; the */ +/* routine only calculates the optimal size of the IWORK array, */ +/* returns this value as the first entry of the IWORK array, and */ +/* no error message related to LIWORK is issued by XERBLA. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit. */ +/* < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > 0: The algorithm failed to compute an eigenvalue while */ +/* working on the submatrix lying in rows and columns */ +/* INFO/(N+1) through mod(INFO,N+1). */ + +/* Further Details */ +/* =============== */ + +/* Based on contributions by */ +/* Jeff Rutter, Computer Science Division, University of California */ +/* at Berkeley, USA */ +/* Modified by Francoise Tisseur, University of Tennessee. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --d__; + --e; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --work; + --iwork; + + /* Function Body */ + *info = 0; + lquery = *lwork == -1 || *liwork == -1; + + if (_starpu_lsame_(compz, "N")) { + icompz = 0; + } else if (_starpu_lsame_(compz, "V")) { + icompz = 1; + } else if (_starpu_lsame_(compz, "I")) { + icompz = 2; + } else { + icompz = -1; + } + if (icompz < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*ldz < 1 || icompz > 0 && *ldz < max(1,*n)) { + *info = -6; + } + + if (*info == 0) { + +/* Compute the workspace requirements */ + + smlsiz = _starpu_ilaenv_(&c__9, "DSTEDC", " ", &c__0, &c__0, &c__0, &c__0); + if (*n <= 1 || icompz == 0) { + liwmin = 1; + lwmin = 1; + } else if (*n <= smlsiz) { + liwmin = 1; + lwmin = *n - 1 << 1; + } else { + lgn = (integer) (log((doublereal) (*n)) / log(2.)); + if (pow_ii(&c__2, &lgn) < *n) { + ++lgn; + } + if (pow_ii(&c__2, &lgn) < *n) { + ++lgn; + } + if (icompz == 1) { +/* Computing 2nd power */ + i__1 = *n; + lwmin = *n * 3 + 1 + (*n << 1) * lgn + i__1 * i__1 * 3; + liwmin = *n * 6 + 6 + *n * 5 * lgn; + } else if (icompz == 2) { +/* Computing 2nd power */ + i__1 = *n; + lwmin = (*n << 2) + 1 + i__1 * i__1; + liwmin = *n * 5 + 3; + } + } + work[1] = (doublereal) lwmin; + iwork[1] = liwmin; + + if (*lwork < lwmin && ! lquery) { + *info = -8; + } else if (*liwork < liwmin && ! lquery) { + *info = -10; + } + } + + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DSTEDC", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + if (*n == 1) { + if (icompz != 0) { + z__[z_dim1 + 1] = 1.; + } + return 0; + } + +/* If the following conditional clause is removed, then the routine */ +/* will use the Divide and Conquer routine to compute only the */ +/* eigenvalues, which requires (3N + 3N**2) real workspace and */ +/* (2 + 5N + 2N lg(N)) integer workspace. */ +/* Since on many architectures DSTERF is much faster than any other */ +/* algorithm for finding eigenvalues only, it is used here */ +/* as the default. If the conditional clause is removed, then */ +/* information on the size of workspace needs to be changed. */ + +/* If COMPZ = 'N', use DSTERF to compute the eigenvalues. */ + + if (icompz == 0) { + _starpu_dsterf_(n, &d__[1], &e[1], info); + goto L50; + } + +/* If N is smaller than the minimum divide size (SMLSIZ+1), then */ +/* solve the problem with another solver. */ + + if (*n <= smlsiz) { + + _starpu_dsteqr_(compz, n, &d__[1], &e[1], &z__[z_offset], ldz, &work[1], info); + + } else { + +/* If COMPZ = 'V', the Z matrix must be stored elsewhere for later */ +/* use. */ + + if (icompz == 1) { + storez = *n * *n + 1; + } else { + storez = 1; + } + + if (icompz == 2) { + _starpu_dlaset_("Full", n, n, &c_b17, &c_b18, &z__[z_offset], ldz); + } + +/* Scale. */ + + orgnrm = _starpu_dlanst_("M", n, &d__[1], &e[1]); + if (orgnrm == 0.) { + goto L50; + } + + eps = _starpu_dlamch_("Epsilon"); + + start = 1; + +/* while ( START <= N ) */ + +L10: + if (start <= *n) { + +/* Let FINISH be the position of the next subdiagonal entry */ +/* such that E( FINISH ) <= TINY or FINISH = N if no such */ +/* subdiagonal exists. The matrix identified by the elements */ +/* between START and FINISH constitutes an independent */ +/* sub-problem. */ + + finish = start; +L20: + if (finish < *n) { + tiny = eps * sqrt((d__1 = d__[finish], abs(d__1))) * sqrt(( + d__2 = d__[finish + 1], abs(d__2))); + if ((d__1 = e[finish], abs(d__1)) > tiny) { + ++finish; + goto L20; + } + } + +/* (Sub) Problem determined. Compute its size and solve it. */ + + m = finish - start + 1; + if (m == 1) { + start = finish + 1; + goto L10; + } + if (m > smlsiz) { + +/* Scale. */ + + orgnrm = _starpu_dlanst_("M", &m, &d__[start], &e[start]); + _starpu_dlascl_("G", &c__0, &c__0, &orgnrm, &c_b18, &m, &c__1, &d__[ + start], &m, info); + i__1 = m - 1; + i__2 = m - 1; + _starpu_dlascl_("G", &c__0, &c__0, &orgnrm, &c_b18, &i__1, &c__1, &e[ + start], &i__2, info); + + if (icompz == 1) { + strtrw = 1; + } else { + strtrw = start; + } + _starpu_dlaed0_(&icompz, n, &m, &d__[start], &e[start], &z__[strtrw + + start * z_dim1], ldz, &work[1], n, &work[storez], & + iwork[1], info); + if (*info != 0) { + *info = (*info / (m + 1) + start - 1) * (*n + 1) + *info % + (m + 1) + start - 1; + goto L50; + } + +/* Scale back. */ + + _starpu_dlascl_("G", &c__0, &c__0, &c_b18, &orgnrm, &m, &c__1, &d__[ + start], &m, info); + + } else { + if (icompz == 1) { + +/* Since QR won't update a Z matrix which is larger than */ +/* the length of D, we must solve the sub-problem in a */ +/* workspace and then multiply back into Z. */ + + _starpu_dsteqr_("I", &m, &d__[start], &e[start], &work[1], &m, & + work[m * m + 1], info); + _starpu_dlacpy_("A", n, &m, &z__[start * z_dim1 + 1], ldz, &work[ + storez], n); + _starpu_dgemm_("N", "N", n, &m, &m, &c_b18, &work[storez], n, & + work[1], &m, &c_b17, &z__[start * z_dim1 + 1], + ldz); + } else if (icompz == 2) { + _starpu_dsteqr_("I", &m, &d__[start], &e[start], &z__[start + + start * z_dim1], ldz, &work[1], info); + } else { + _starpu_dsterf_(&m, &d__[start], &e[start], info); + } + if (*info != 0) { + *info = start * (*n + 1) + finish; + goto L50; + } + } + + start = finish + 1; + goto L10; + } + +/* endwhile */ + +/* If the problem split any number of times, then the eigenvalues */ +/* will not be properly ordered. Here we permute the eigenvalues */ +/* (and the associated eigenvectors) into ascending order. */ + + if (m != *n) { + if (icompz == 0) { + +/* Use Quick Sort */ + + _starpu_dlasrt_("I", n, &d__[1], info); + + } else { + +/* Use Selection Sort to minimize swaps of eigenvectors */ + + i__1 = *n; + for (ii = 2; ii <= i__1; ++ii) { + i__ = ii - 1; + k = i__; + p = d__[i__]; + i__2 = *n; + for (j = ii; j <= i__2; ++j) { + if (d__[j] < p) { + k = j; + p = d__[j]; + } +/* L30: */ + } + if (k != i__) { + d__[k] = d__[i__]; + d__[i__] = p; + _starpu_dswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[k * + z_dim1 + 1], &c__1); + } +/* L40: */ + } + } + } + } + +L50: + work[1] = (doublereal) lwmin; + iwork[1] = liwmin; + + return 0; + +/* End of DSTEDC */ + +} /* _starpu_dstedc_ */ diff --git a/min-dgels/base/SRC/dstegr.c b/min-dgels/base/SRC/dstegr.c new file mode 100644 index 0000000..77f822d --- /dev/null +++ b/min-dgels/base/SRC/dstegr.c @@ -0,0 +1,211 @@ +/* dstegr.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dstegr_(char *jobz, char *range, integer *n, doublereal * + d__, doublereal *e, doublereal *vl, doublereal *vu, integer *il, + integer *iu, doublereal *abstol, integer *m, doublereal *w, + doublereal *z__, integer *ldz, integer *isuppz, doublereal *work, + integer *lwork, integer *iwork, integer *liwork, integer *info) +{ + /* System generated locals */ + integer z_dim1, z_offset; + + /* Local variables */ + extern /* Subroutine */ int _starpu_dstemr_(char *, char *, integer *, doublereal + *, doublereal *, doublereal *, doublereal *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, integer *, + integer *, logical *, doublereal *, integer *, integer *, integer + *, integer *); + logical tryrac; + + + +/* -- LAPACK computational routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DSTEGR computes selected eigenvalues and, optionally, eigenvectors */ +/* of a real symmetric tridiagonal matrix T. Any such unreduced matrix has */ +/* a well defined set of pairwise different real eigenvalues, the corresponding */ +/* real eigenvectors are pairwise orthogonal. */ + +/* The spectrum may be computed either completely or partially by specifying */ +/* either an interval (VL,VU] or a range of indices IL:IU for the desired */ +/* eigenvalues. */ + +/* DSTEGR is a compatability wrapper around the improved DSTEMR routine. */ +/* See DSTEMR for further details. */ + +/* One important change is that the ABSTOL parameter no longer provides any */ +/* benefit and hence is no longer used. */ + +/* Note : DSTEGR and DSTEMR work only on machines which follow */ +/* IEEE-754 floating-point standard in their handling of infinities and */ +/* NaNs. Normal execution may create these exceptiona values and hence */ +/* may abort due to a floating point exception in environments which */ +/* do not conform to the IEEE-754 standard. */ + +/* Arguments */ +/* ========= */ + +/* JOBZ (input) CHARACTER*1 */ +/* = 'N': Compute eigenvalues only; */ +/* = 'V': Compute eigenvalues and eigenvectors. */ + +/* RANGE (input) CHARACTER*1 */ +/* = 'A': all eigenvalues will be found. */ +/* = 'V': all eigenvalues in the half-open interval (VL,VU] */ +/* will be found. */ +/* = 'I': the IL-th through IU-th eigenvalues will be found. */ + +/* N (input) INTEGER */ +/* The order of the matrix. N >= 0. */ + +/* D (input/output) DOUBLE PRECISION array, dimension (N) */ +/* On entry, the N diagonal elements of the tridiagonal matrix */ +/* T. On exit, D is overwritten. */ + +/* E (input/output) DOUBLE PRECISION array, dimension (N) */ +/* On entry, the (N-1) subdiagonal elements of the tridiagonal */ +/* matrix T in elements 1 to N-1 of E. E(N) need not be set on */ +/* input, but is used internally as workspace. */ +/* On exit, E is overwritten. */ + +/* VL (input) DOUBLE PRECISION */ +/* VU (input) DOUBLE PRECISION */ +/* If RANGE='V', the lower and upper bounds of the interval to */ +/* be searched for eigenvalues. VL < VU. */ +/* Not referenced if RANGE = 'A' or 'I'. */ + +/* IL (input) INTEGER */ +/* IU (input) INTEGER */ +/* If RANGE='I', the indices (in ascending order) of the */ +/* smallest and largest eigenvalues to be returned. */ +/* 1 <= IL <= IU <= N, if N > 0. */ +/* Not referenced if RANGE = 'A' or 'V'. */ + +/* ABSTOL (input) DOUBLE PRECISION */ +/* Unused. Was the absolute error tolerance for the */ +/* eigenvalues/eigenvectors in previous versions. */ + +/* M (output) INTEGER */ +/* The total number of eigenvalues found. 0 <= M <= N. */ +/* If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. */ + +/* W (output) DOUBLE PRECISION array, dimension (N) */ +/* The first M elements contain the selected eigenvalues in */ +/* ascending order. */ + +/* Z (output) DOUBLE PRECISION array, dimension (LDZ, max(1,M) ) */ +/* If JOBZ = 'V', and if INFO = 0, then the first M columns of Z */ +/* contain the orthonormal eigenvectors of the matrix T */ +/* corresponding to the selected eigenvalues, with the i-th */ +/* column of Z holding the eigenvector associated with W(i). */ +/* If JOBZ = 'N', then Z is not referenced. */ +/* Note: the user must ensure that at least max(1,M) columns are */ +/* supplied in the array Z; if RANGE = 'V', the exact value of M */ +/* is not known in advance and an upper bound must be used. */ +/* Supplying N columns is always safe. */ + +/* LDZ (input) INTEGER */ +/* The leading dimension of the array Z. LDZ >= 1, and if */ +/* JOBZ = 'V', then LDZ >= max(1,N). */ + +/* ISUPPZ (output) INTEGER ARRAY, dimension ( 2*max(1,M) ) */ +/* The support of the eigenvectors in Z, i.e., the indices */ +/* indicating the nonzero elements in Z. The i-th computed eigenvector */ +/* is nonzero only in elements ISUPPZ( 2*i-1 ) through */ +/* ISUPPZ( 2*i ). This is relevant in the case when the matrix */ +/* is split. ISUPPZ is only accessed when JOBZ is 'V' and N > 0. */ + +/* WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) */ +/* On exit, if INFO = 0, WORK(1) returns the optimal */ +/* (and minimal) LWORK. */ + +/* LWORK (input) INTEGER */ +/* The dimension of the array WORK. LWORK >= max(1,18*N) */ +/* if JOBZ = 'V', and LWORK >= max(1,12*N) if JOBZ = 'N'. */ +/* If LWORK = -1, then a workspace query is assumed; the routine */ +/* only calculates the optimal size of the WORK array, returns */ +/* this value as the first entry of the WORK array, and no error */ +/* message related to LWORK is issued by XERBLA. */ + +/* IWORK (workspace/output) INTEGER array, dimension (LIWORK) */ +/* On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. */ + +/* LIWORK (input) INTEGER */ +/* The dimension of the array IWORK. LIWORK >= max(1,10*N) */ +/* if the eigenvectors are desired, and LIWORK >= max(1,8*N) */ +/* if only the eigenvalues are to be computed. */ +/* If LIWORK = -1, then a workspace query is assumed; the */ +/* routine only calculates the optimal size of the IWORK array, */ +/* returns this value as the first entry of the IWORK array, and */ +/* no error message related to LIWORK is issued by XERBLA. */ + +/* INFO (output) INTEGER */ +/* On exit, INFO */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > 0: if INFO = 1X, internal error in DLARRE, */ +/* if INFO = 2X, internal error in DLARRV. */ +/* Here, the digit X = ABS( IINFO ) < 10, where IINFO is */ +/* the nonzero error code returned by DLARRE or */ +/* DLARRV, respectively. */ + +/* Further Details */ +/* =============== */ + +/* Based on contributions by */ +/* Inderjit Dhillon, IBM Almaden, USA */ +/* Osni Marques, LBNL/NERSC, USA */ +/* Christof Voemel, LBNL/NERSC, USA */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Executable Statements .. */ + /* Parameter adjustments */ + --d__; + --e; + --w; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --isuppz; + --work; + --iwork; + + /* Function Body */ + *info = 0; + tryrac = FALSE_; + _starpu_dstemr_(jobz, range, n, &d__[1], &e[1], vl, vu, il, iu, m, &w[1], &z__[ + z_offset], ldz, n, &isuppz[1], &tryrac, &work[1], lwork, &iwork[1] +, liwork, info); + +/* End of DSTEGR */ + + return 0; +} /* _starpu_dstegr_ */ diff --git a/min-dgels/base/SRC/dstein.c b/min-dgels/base/SRC/dstein.c new file mode 100644 index 0000000..b75096f --- /dev/null +++ b/min-dgels/base/SRC/dstein.c @@ -0,0 +1,452 @@ +/* dstein.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__2 = 2; +static integer c__1 = 1; +static integer c_n1 = -1; + +/* Subroutine */ int _starpu_dstein_(integer *n, doublereal *d__, doublereal *e, + integer *m, doublereal *w, integer *iblock, integer *isplit, + doublereal *z__, integer *ldz, doublereal *work, integer *iwork, + integer *ifail, integer *info) +{ + /* System generated locals */ + integer z_dim1, z_offset, i__1, i__2, i__3; + doublereal d__1, d__2, d__3, d__4, d__5; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + integer i__, j, b1, j1, bn; + doublereal xj, scl, eps, sep, nrm, tol; + integer its; + doublereal xjm, ztr, eps1; + integer jblk, nblk; + extern doublereal _starpu_ddot_(integer *, doublereal *, integer *, doublereal *, + integer *); + integer jmax; + extern doublereal _starpu_dnrm2_(integer *, doublereal *, integer *); + extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, + integer *); + integer iseed[4], gpind, iinfo; + extern doublereal _starpu_dasum_(integer *, doublereal *, integer *); + extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *), _starpu_daxpy_(integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *); + doublereal ortol; + integer indrv1, indrv2, indrv3, indrv4, indrv5; + extern doublereal _starpu_dlamch_(char *); + extern /* Subroutine */ int _starpu_dlagtf_(integer *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, integer * +, integer *); + extern integer _starpu_idamax_(integer *, doublereal *, integer *); + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *), _starpu_dlagts_( + integer *, integer *, doublereal *, doublereal *, doublereal *, + doublereal *, integer *, doublereal *, doublereal *, integer *); + integer nrmchk; + extern /* Subroutine */ int _starpu_dlarnv_(integer *, integer *, integer *, + doublereal *); + integer blksiz; + doublereal onenrm, dtpcrt, pertol; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DSTEIN computes the eigenvectors of a real symmetric tridiagonal */ +/* matrix T corresponding to specified eigenvalues, using inverse */ +/* iteration. */ + +/* The maximum number of iterations allowed for each eigenvector is */ +/* specified by an internal parameter MAXITS (currently set to 5). */ + +/* Arguments */ +/* ========= */ + +/* N (input) INTEGER */ +/* The order of the matrix. N >= 0. */ + +/* D (input) DOUBLE PRECISION array, dimension (N) */ +/* The n diagonal elements of the tridiagonal matrix T. */ + +/* E (input) DOUBLE PRECISION array, dimension (N-1) */ +/* The (n-1) subdiagonal elements of the tridiagonal matrix */ +/* T, in elements 1 to N-1. */ + +/* M (input) INTEGER */ +/* The number of eigenvectors to be found. 0 <= M <= N. */ + +/* W (input) DOUBLE PRECISION array, dimension (N) */ +/* The first M elements of W contain the eigenvalues for */ +/* which eigenvectors are to be computed. The eigenvalues */ +/* should be grouped by split-off block and ordered from */ +/* smallest to largest within the block. ( The output array */ +/* W from DSTEBZ with ORDER = 'B' is expected here. ) */ + +/* IBLOCK (input) INTEGER array, dimension (N) */ +/* The submatrix indices associated with the corresponding */ +/* eigenvalues in W; IBLOCK(i)=1 if eigenvalue W(i) belongs to */ +/* the first submatrix from the top, =2 if W(i) belongs to */ +/* the second submatrix, etc. ( The output array IBLOCK */ +/* from DSTEBZ is expected here. ) */ + +/* ISPLIT (input) INTEGER array, dimension (N) */ +/* The splitting points, at which T breaks up into submatrices. */ +/* The first submatrix consists of rows/columns 1 to */ +/* ISPLIT( 1 ), the second of rows/columns ISPLIT( 1 )+1 */ +/* through ISPLIT( 2 ), etc. */ +/* ( The output array ISPLIT from DSTEBZ is expected here. ) */ + +/* Z (output) DOUBLE PRECISION array, dimension (LDZ, M) */ +/* The computed eigenvectors. The eigenvector associated */ +/* with the eigenvalue W(i) is stored in the i-th column of */ +/* Z. Any vector which fails to converge is set to its current */ +/* iterate after MAXITS iterations. */ + +/* LDZ (input) INTEGER */ +/* The leading dimension of the array Z. LDZ >= max(1,N). */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (5*N) */ + +/* IWORK (workspace) INTEGER array, dimension (N) */ + +/* IFAIL (output) INTEGER array, dimension (M) */ +/* On normal exit, all elements of IFAIL are zero. */ +/* If one or more eigenvectors fail to converge after */ +/* MAXITS iterations, then their indices are stored in */ +/* array IFAIL. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit. */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > 0: if INFO = i, then i eigenvectors failed to converge */ +/* in MAXITS iterations. Their indices are stored in */ +/* array IFAIL. */ + +/* Internal Parameters */ +/* =================== */ + +/* MAXITS INTEGER, default = 5 */ +/* The maximum number of iterations performed. */ + +/* EXTRA INTEGER, default = 2 */ +/* The number of iterations performed after norm growth */ +/* criterion is satisfied, should be at least 1. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --d__; + --e; + --w; + --iblock; + --isplit; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --work; + --iwork; + --ifail; + + /* Function Body */ + *info = 0; + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + ifail[i__] = 0; +/* L10: */ + } + + if (*n < 0) { + *info = -1; + } else if (*m < 0 || *m > *n) { + *info = -4; + } else if (*ldz < max(1,*n)) { + *info = -9; + } else { + i__1 = *m; + for (j = 2; j <= i__1; ++j) { + if (iblock[j] < iblock[j - 1]) { + *info = -6; + goto L30; + } + if (iblock[j] == iblock[j - 1] && w[j] < w[j - 1]) { + *info = -5; + goto L30; + } +/* L20: */ + } +L30: + ; + } + + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DSTEIN", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0 || *m == 0) { + return 0; + } else if (*n == 1) { + z__[z_dim1 + 1] = 1.; + return 0; + } + +/* Get machine constants. */ + + eps = _starpu_dlamch_("Precision"); + +/* Initialize seed for random number generator DLARNV. */ + + for (i__ = 1; i__ <= 4; ++i__) { + iseed[i__ - 1] = 1; +/* L40: */ + } + +/* Initialize pointers. */ + + indrv1 = 0; + indrv2 = indrv1 + *n; + indrv3 = indrv2 + *n; + indrv4 = indrv3 + *n; + indrv5 = indrv4 + *n; + +/* Compute eigenvectors of matrix blocks. */ + + j1 = 1; + i__1 = iblock[*m]; + for (nblk = 1; nblk <= i__1; ++nblk) { + +/* Find starting and ending indices of block nblk. */ + + if (nblk == 1) { + b1 = 1; + } else { + b1 = isplit[nblk - 1] + 1; + } + bn = isplit[nblk]; + blksiz = bn - b1 + 1; + if (blksiz == 1) { + goto L60; + } + gpind = b1; + +/* Compute reorthogonalization criterion and stopping criterion. */ + + onenrm = (d__1 = d__[b1], abs(d__1)) + (d__2 = e[b1], abs(d__2)); +/* Computing MAX */ + d__3 = onenrm, d__4 = (d__1 = d__[bn], abs(d__1)) + (d__2 = e[bn - 1], + abs(d__2)); + onenrm = max(d__3,d__4); + i__2 = bn - 1; + for (i__ = b1 + 1; i__ <= i__2; ++i__) { +/* Computing MAX */ + d__4 = onenrm, d__5 = (d__1 = d__[i__], abs(d__1)) + (d__2 = e[ + i__ - 1], abs(d__2)) + (d__3 = e[i__], abs(d__3)); + onenrm = max(d__4,d__5); +/* L50: */ + } + ortol = onenrm * .001; + + dtpcrt = sqrt(.1 / blksiz); + +/* Loop through eigenvalues of block nblk. */ + +L60: + jblk = 0; + i__2 = *m; + for (j = j1; j <= i__2; ++j) { + if (iblock[j] != nblk) { + j1 = j; + goto L160; + } + ++jblk; + xj = w[j]; + +/* Skip all the work if the block size is one. */ + + if (blksiz == 1) { + work[indrv1 + 1] = 1.; + goto L120; + } + +/* If eigenvalues j and j-1 are too close, add a relatively */ +/* small perturbation. */ + + if (jblk > 1) { + eps1 = (d__1 = eps * xj, abs(d__1)); + pertol = eps1 * 10.; + sep = xj - xjm; + if (sep < pertol) { + xj = xjm + pertol; + } + } + + its = 0; + nrmchk = 0; + +/* Get random starting vector. */ + + _starpu_dlarnv_(&c__2, iseed, &blksiz, &work[indrv1 + 1]); + +/* Copy the matrix T so it won't be destroyed in factorization. */ + + _starpu_dcopy_(&blksiz, &d__[b1], &c__1, &work[indrv4 + 1], &c__1); + i__3 = blksiz - 1; + _starpu_dcopy_(&i__3, &e[b1], &c__1, &work[indrv2 + 2], &c__1); + i__3 = blksiz - 1; + _starpu_dcopy_(&i__3, &e[b1], &c__1, &work[indrv3 + 1], &c__1); + +/* Compute LU factors with partial pivoting ( PT = LU ) */ + + tol = 0.; + _starpu_dlagtf_(&blksiz, &work[indrv4 + 1], &xj, &work[indrv2 + 2], &work[ + indrv3 + 1], &tol, &work[indrv5 + 1], &iwork[1], &iinfo); + +/* Update iteration count. */ + +L70: + ++its; + if (its > 5) { + goto L100; + } + +/* Normalize and scale the righthand side vector Pb. */ + +/* Computing MAX */ + d__2 = eps, d__3 = (d__1 = work[indrv4 + blksiz], abs(d__1)); + scl = blksiz * onenrm * max(d__2,d__3) / _starpu_dasum_(&blksiz, &work[ + indrv1 + 1], &c__1); + _starpu_dscal_(&blksiz, &scl, &work[indrv1 + 1], &c__1); + +/* Solve the system LU = Pb. */ + + _starpu_dlagts_(&c_n1, &blksiz, &work[indrv4 + 1], &work[indrv2 + 2], & + work[indrv3 + 1], &work[indrv5 + 1], &iwork[1], &work[ + indrv1 + 1], &tol, &iinfo); + +/* Reorthogonalize by modified Gram-Schmidt if eigenvalues are */ +/* close enough. */ + + if (jblk == 1) { + goto L90; + } + if ((d__1 = xj - xjm, abs(d__1)) > ortol) { + gpind = j; + } + if (gpind != j) { + i__3 = j - 1; + for (i__ = gpind; i__ <= i__3; ++i__) { + ztr = -_starpu_ddot_(&blksiz, &work[indrv1 + 1], &c__1, &z__[b1 + + i__ * z_dim1], &c__1); + _starpu_daxpy_(&blksiz, &ztr, &z__[b1 + i__ * z_dim1], &c__1, & + work[indrv1 + 1], &c__1); +/* L80: */ + } + } + +/* Check the infinity norm of the iterate. */ + +L90: + jmax = _starpu_idamax_(&blksiz, &work[indrv1 + 1], &c__1); + nrm = (d__1 = work[indrv1 + jmax], abs(d__1)); + +/* Continue for additional iterations after norm reaches */ +/* stopping criterion. */ + + if (nrm < dtpcrt) { + goto L70; + } + ++nrmchk; + if (nrmchk < 3) { + goto L70; + } + + goto L110; + +/* If stopping criterion was not satisfied, update info and */ +/* store eigenvector number in array ifail. */ + +L100: + ++(*info); + ifail[*info] = j; + +/* Accept iterate as jth eigenvector. */ + +L110: + scl = 1. / _starpu_dnrm2_(&blksiz, &work[indrv1 + 1], &c__1); + jmax = _starpu_idamax_(&blksiz, &work[indrv1 + 1], &c__1); + if (work[indrv1 + jmax] < 0.) { + scl = -scl; + } + _starpu_dscal_(&blksiz, &scl, &work[indrv1 + 1], &c__1); +L120: + i__3 = *n; + for (i__ = 1; i__ <= i__3; ++i__) { + z__[i__ + j * z_dim1] = 0.; +/* L130: */ + } + i__3 = blksiz; + for (i__ = 1; i__ <= i__3; ++i__) { + z__[b1 + i__ - 1 + j * z_dim1] = work[indrv1 + i__]; +/* L140: */ + } + +/* Save the shift to check eigenvalue spacing at next */ +/* iteration. */ + + xjm = xj; + +/* L150: */ + } +L160: + ; + } + + return 0; + +/* End of DSTEIN */ + +} /* _starpu_dstein_ */ diff --git a/min-dgels/base/SRC/dstemr.c b/min-dgels/base/SRC/dstemr.c new file mode 100644 index 0000000..5d73373 --- /dev/null +++ b/min-dgels/base/SRC/dstemr.c @@ -0,0 +1,728 @@ +/* dstemr.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static doublereal c_b18 = .001; + +/* Subroutine */ int _starpu_dstemr_(char *jobz, char *range, integer *n, doublereal * + d__, doublereal *e, doublereal *vl, doublereal *vu, integer *il, + integer *iu, integer *m, doublereal *w, doublereal *z__, integer *ldz, + integer *nzc, integer *isuppz, logical *tryrac, doublereal *work, + integer *lwork, integer *iwork, integer *liwork, integer *info) +{ + /* System generated locals */ + integer z_dim1, z_offset, i__1, i__2; + doublereal d__1, d__2; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + integer i__, j; + doublereal r1, r2; + integer jj; + doublereal cs; + integer in; + doublereal sn, wl, wu; + integer iil, iiu; + doublereal eps, tmp; + integer indd, iend, jblk, wend; + doublereal rmin, rmax; + integer itmp; + doublereal tnrm; + extern /* Subroutine */ int _starpu_dlae2_(doublereal *, doublereal *, doublereal + *, doublereal *, doublereal *); + integer inde2, itmp2; + doublereal rtol1, rtol2; + extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, + integer *); + doublereal scale; + integer indgp; + extern logical _starpu_lsame_(char *, char *); + integer iinfo, iindw, ilast; + extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *), _starpu_dswap_(integer *, doublereal *, integer + *, doublereal *, integer *); + integer lwmin; + logical wantz; + extern /* Subroutine */ int _starpu_dlaev2_(doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *); + extern doublereal _starpu_dlamch_(char *); + logical alleig; + integer ibegin; + logical indeig; + integer iindbl; + logical valeig; + extern /* Subroutine */ int _starpu_dlarrc_(char *, integer *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, integer *, + integer *, integer *, integer *), _starpu_dlarre_(char *, + integer *, doublereal *, doublereal *, integer *, integer *, + doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, integer *, integer *, integer *, + doublereal *, doublereal *, doublereal *, integer *, integer *, + doublereal *, doublereal *, doublereal *, integer *, integer *); + integer wbegin; + doublereal safmin; + extern /* Subroutine */ int _starpu_dlarrj_(integer *, doublereal *, doublereal *, + integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, doublereal *, integer *, doublereal *, doublereal *, + integer *), _starpu_xerbla_(char *, integer *); + doublereal bignum; + integer inderr, iindwk, indgrs, offset; + extern doublereal _starpu_dlanst_(char *, integer *, doublereal *, doublereal *); + extern /* Subroutine */ int _starpu_dlarrr_(integer *, doublereal *, doublereal *, + integer *), _starpu_dlarrv_(integer *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, integer *, integer *, + integer *, integer *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *), _starpu_dlasrt_(char *, integer *, doublereal *, + integer *); + doublereal thresh; + integer iinspl, ifirst, indwrk, liwmin, nzcmin; + doublereal pivmin; + integer nsplit; + doublereal smlnum; + logical lquery, zquery; + + +/* -- LAPACK computational routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DSTEMR computes selected eigenvalues and, optionally, eigenvectors */ +/* of a real symmetric tridiagonal matrix T. Any such unreduced matrix has */ +/* a well defined set of pairwise different real eigenvalues, the corresponding */ +/* real eigenvectors are pairwise orthogonal. */ + +/* The spectrum may be computed either completely or partially by specifying */ +/* either an interval (VL,VU] or a range of indices IL:IU for the desired */ +/* eigenvalues. */ + +/* Depending on the number of desired eigenvalues, these are computed either */ +/* by bisection or the dqds algorithm. Numerically orthogonal eigenvectors are */ +/* computed by the use of various suitable L D L^T factorizations near clusters */ +/* of close eigenvalues (referred to as RRRs, Relatively Robust */ +/* Representations). An informal sketch of the algorithm follows. */ + +/* For each unreduced block (submatrix) of T, */ +/* (a) Compute T - sigma I = L D L^T, so that L and D */ +/* define all the wanted eigenvalues to high relative accuracy. */ +/* This means that small relative changes in the entries of D and L */ +/* cause only small relative changes in the eigenvalues and */ +/* eigenvectors. The standard (unfactored) representation of the */ +/* tridiagonal matrix T does not have this property in general. */ +/* (b) Compute the eigenvalues to suitable accuracy. */ +/* If the eigenvectors are desired, the algorithm attains full */ +/* accuracy of the computed eigenvalues only right before */ +/* the corresponding vectors have to be computed, see steps c) and d). */ +/* (c) For each cluster of close eigenvalues, select a new */ +/* shift close to the cluster, find a new factorization, and refine */ +/* the shifted eigenvalues to suitable accuracy. */ +/* (d) For each eigenvalue with a large enough relative separation compute */ +/* the corresponding eigenvector by forming a rank revealing twisted */ +/* factorization. Go back to (c) for any clusters that remain. */ + +/* For more details, see: */ +/* - Inderjit S. Dhillon and Beresford N. Parlett: "Multiple representations */ +/* to compute orthogonal eigenvectors of symmetric tridiagonal matrices," */ +/* Linear Algebra and its Applications, 387(1), pp. 1-28, August 2004. */ +/* - Inderjit Dhillon and Beresford Parlett: "Orthogonal Eigenvectors and */ +/* Relative Gaps," SIAM Journal on Matrix Analysis and Applications, Vol. 25, */ +/* 2004. Also LAPACK Working Note 154. */ +/* - Inderjit Dhillon: "A new O(n^2) algorithm for the symmetric */ +/* tridiagonal eigenvalue/eigenvector problem", */ +/* Computer Science Division Technical Report No. UCB/CSD-97-971, */ +/* UC Berkeley, May 1997. */ + +/* Notes: */ +/* 1.DSTEMR works only on machines which follow IEEE-754 */ +/* floating-point standard in their handling of infinities and NaNs. */ +/* This permits the use of efficient inner loops avoiding a check for */ +/* zero divisors. */ + +/* Arguments */ +/* ========= */ + +/* JOBZ (input) CHARACTER*1 */ +/* = 'N': Compute eigenvalues only; */ +/* = 'V': Compute eigenvalues and eigenvectors. */ + +/* RANGE (input) CHARACTER*1 */ +/* = 'A': all eigenvalues will be found. */ +/* = 'V': all eigenvalues in the half-open interval (VL,VU] */ +/* will be found. */ +/* = 'I': the IL-th through IU-th eigenvalues will be found. */ + +/* N (input) INTEGER */ +/* The order of the matrix. N >= 0. */ + +/* D (input/output) DOUBLE PRECISION array, dimension (N) */ +/* On entry, the N diagonal elements of the tridiagonal matrix */ +/* T. On exit, D is overwritten. */ + +/* E (input/output) DOUBLE PRECISION array, dimension (N) */ +/* On entry, the (N-1) subdiagonal elements of the tridiagonal */ +/* matrix T in elements 1 to N-1 of E. E(N) need not be set on */ +/* input, but is used internally as workspace. */ +/* On exit, E is overwritten. */ + +/* VL (input) DOUBLE PRECISION */ +/* VU (input) DOUBLE PRECISION */ +/* If RANGE='V', the lower and upper bounds of the interval to */ +/* be searched for eigenvalues. VL < VU. */ +/* Not referenced if RANGE = 'A' or 'I'. */ + +/* IL (input) INTEGER */ +/* IU (input) INTEGER */ +/* If RANGE='I', the indices (in ascending order) of the */ +/* smallest and largest eigenvalues to be returned. */ +/* 1 <= IL <= IU <= N, if N > 0. */ +/* Not referenced if RANGE = 'A' or 'V'. */ + +/* M (output) INTEGER */ +/* The total number of eigenvalues found. 0 <= M <= N. */ +/* If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. */ + +/* W (output) DOUBLE PRECISION array, dimension (N) */ +/* The first M elements contain the selected eigenvalues in */ +/* ascending order. */ + +/* Z (output) DOUBLE PRECISION array, dimension (LDZ, max(1,M) ) */ +/* If JOBZ = 'V', and if INFO = 0, then the first M columns of Z */ +/* contain the orthonormal eigenvectors of the matrix T */ +/* corresponding to the selected eigenvalues, with the i-th */ +/* column of Z holding the eigenvector associated with W(i). */ +/* If JOBZ = 'N', then Z is not referenced. */ +/* Note: the user must ensure that at least max(1,M) columns are */ +/* supplied in the array Z; if RANGE = 'V', the exact value of M */ +/* is not known in advance and can be computed with a workspace */ +/* query by setting NZC = -1, see below. */ + +/* LDZ (input) INTEGER */ +/* The leading dimension of the array Z. LDZ >= 1, and if */ +/* JOBZ = 'V', then LDZ >= max(1,N). */ + +/* NZC (input) INTEGER */ +/* The number of eigenvectors to be held in the array Z. */ +/* If RANGE = 'A', then NZC >= max(1,N). */ +/* If RANGE = 'V', then NZC >= the number of eigenvalues in (VL,VU]. */ +/* If RANGE = 'I', then NZC >= IU-IL+1. */ +/* If NZC = -1, then a workspace query is assumed; the */ +/* routine calculates the number of columns of the array Z that */ +/* are needed to hold the eigenvectors. */ +/* This value is returned as the first entry of the Z array, and */ +/* no error message related to NZC is issued by XERBLA. */ + +/* ISUPPZ (output) INTEGER ARRAY, dimension ( 2*max(1,M) ) */ +/* The support of the eigenvectors in Z, i.e., the indices */ +/* indicating the nonzero elements in Z. The i-th computed eigenvector */ +/* is nonzero only in elements ISUPPZ( 2*i-1 ) through */ +/* ISUPPZ( 2*i ). This is relevant in the case when the matrix */ +/* is split. ISUPPZ is only accessed when JOBZ is 'V' and N > 0. */ + +/* TRYRAC (input/output) LOGICAL */ +/* If TRYRAC.EQ..TRUE., indicates that the code should check whether */ +/* the tridiagonal matrix defines its eigenvalues to high relative */ +/* accuracy. If so, the code uses relative-accuracy preserving */ +/* algorithms that might be (a bit) slower depending on the matrix. */ +/* If the matrix does not define its eigenvalues to high relative */ +/* accuracy, the code can uses possibly faster algorithms. */ +/* If TRYRAC.EQ..FALSE., the code is not required to guarantee */ +/* relatively accurate eigenvalues and can use the fastest possible */ +/* techniques. */ +/* On exit, a .TRUE. TRYRAC will be set to .FALSE. if the matrix */ +/* does not define its eigenvalues to high relative accuracy. */ + +/* WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) */ +/* On exit, if INFO = 0, WORK(1) returns the optimal */ +/* (and minimal) LWORK. */ + +/* LWORK (input) INTEGER */ +/* The dimension of the array WORK. LWORK >= max(1,18*N) */ +/* if JOBZ = 'V', and LWORK >= max(1,12*N) if JOBZ = 'N'. */ +/* If LWORK = -1, then a workspace query is assumed; the routine */ +/* only calculates the optimal size of the WORK array, returns */ +/* this value as the first entry of the WORK array, and no error */ +/* message related to LWORK is issued by XERBLA. */ + +/* IWORK (workspace/output) INTEGER array, dimension (LIWORK) */ +/* On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. */ + +/* LIWORK (input) INTEGER */ +/* The dimension of the array IWORK. LIWORK >= max(1,10*N) */ +/* if the eigenvectors are desired, and LIWORK >= max(1,8*N) */ +/* if only the eigenvalues are to be computed. */ +/* If LIWORK = -1, then a workspace query is assumed; the */ +/* routine only calculates the optimal size of the IWORK array, */ +/* returns this value as the first entry of the IWORK array, and */ +/* no error message related to LIWORK is issued by XERBLA. */ + +/* INFO (output) INTEGER */ +/* On exit, INFO */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > 0: if INFO = 1X, internal error in DLARRE, */ +/* if INFO = 2X, internal error in DLARRV. */ +/* Here, the digit X = ABS( IINFO ) < 10, where IINFO is */ +/* the nonzero error code returned by DLARRE or */ +/* DLARRV, respectively. */ + + +/* Further Details */ +/* =============== */ + +/* Based on contributions by */ +/* Beresford Parlett, University of California, Berkeley, USA */ +/* Jim Demmel, University of California, Berkeley, USA */ +/* Inderjit Dhillon, University of Texas, Austin, USA */ +/* Osni Marques, LBNL/NERSC, USA */ +/* Christof Voemel, University of California, Berkeley, USA */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --d__; + --e; + --w; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --isuppz; + --work; + --iwork; + + /* Function Body */ + wantz = _starpu_lsame_(jobz, "V"); + alleig = _starpu_lsame_(range, "A"); + valeig = _starpu_lsame_(range, "V"); + indeig = _starpu_lsame_(range, "I"); + + lquery = *lwork == -1 || *liwork == -1; + zquery = *nzc == -1; +/* DSTEMR needs WORK of size 6*N, IWORK of size 3*N. */ +/* In addition, DLARRE needs WORK of size 6*N, IWORK of size 5*N. */ +/* Furthermore, DLARRV needs WORK of size 12*N, IWORK of size 7*N. */ + if (wantz) { + lwmin = *n * 18; + liwmin = *n * 10; + } else { +/* need less workspace if only the eigenvalues are wanted */ + lwmin = *n * 12; + liwmin = *n << 3; + } + wl = 0.; + wu = 0.; + iil = 0; + iiu = 0; + if (valeig) { +/* We do not reference VL, VU in the cases RANGE = 'I','A' */ +/* The interval (WL, WU] contains all the wanted eigenvalues. */ +/* It is either given by the user or computed in DLARRE. */ + wl = *vl; + wu = *vu; + } else if (indeig) { +/* We do not reference IL, IU in the cases RANGE = 'V','A' */ + iil = *il; + iiu = *iu; + } + + *info = 0; + if (! (wantz || _starpu_lsame_(jobz, "N"))) { + *info = -1; + } else if (! (alleig || valeig || indeig)) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (valeig && *n > 0 && wu <= wl) { + *info = -7; + } else if (indeig && (iil < 1 || iil > *n)) { + *info = -8; + } else if (indeig && (iiu < iil || iiu > *n)) { + *info = -9; + } else if (*ldz < 1 || wantz && *ldz < *n) { + *info = -13; + } else if (*lwork < lwmin && ! lquery) { + *info = -17; + } else if (*liwork < liwmin && ! lquery) { + *info = -19; + } + +/* Get machine constants. */ + + safmin = _starpu_dlamch_("Safe minimum"); + eps = _starpu_dlamch_("Precision"); + smlnum = safmin / eps; + bignum = 1. / smlnum; + rmin = sqrt(smlnum); +/* Computing MIN */ + d__1 = sqrt(bignum), d__2 = 1. / sqrt(sqrt(safmin)); + rmax = min(d__1,d__2); + + if (*info == 0) { + work[1] = (doublereal) lwmin; + iwork[1] = liwmin; + + if (wantz && alleig) { + nzcmin = *n; + } else if (wantz && valeig) { + _starpu_dlarrc_("T", n, vl, vu, &d__[1], &e[1], &safmin, &nzcmin, &itmp, & + itmp2, info); + } else if (wantz && indeig) { + nzcmin = iiu - iil + 1; + } else { +/* WANTZ .EQ. FALSE. */ + nzcmin = 0; + } + if (zquery && *info == 0) { + z__[z_dim1 + 1] = (doublereal) nzcmin; + } else if (*nzc < nzcmin && ! zquery) { + *info = -14; + } + } + if (*info != 0) { + + i__1 = -(*info); + _starpu_xerbla_("DSTEMR", &i__1); + + return 0; + } else if (lquery || zquery) { + return 0; + } + +/* Handle N = 0, 1, and 2 cases immediately */ + + *m = 0; + if (*n == 0) { + return 0; + } + + if (*n == 1) { + if (alleig || indeig) { + *m = 1; + w[1] = d__[1]; + } else { + if (wl < d__[1] && wu >= d__[1]) { + *m = 1; + w[1] = d__[1]; + } + } + if (wantz && ! zquery) { + z__[z_dim1 + 1] = 1.; + isuppz[1] = 1; + isuppz[2] = 1; + } + return 0; + } + + if (*n == 2) { + if (! wantz) { + _starpu_dlae2_(&d__[1], &e[1], &d__[2], &r1, &r2); + } else if (wantz && ! zquery) { + _starpu_dlaev2_(&d__[1], &e[1], &d__[2], &r1, &r2, &cs, &sn); + } + if (alleig || valeig && r2 > wl && r2 <= wu || indeig && iil == 1) { + ++(*m); + w[*m] = r2; + if (wantz && ! zquery) { + z__[*m * z_dim1 + 1] = -sn; + z__[*m * z_dim1 + 2] = cs; +/* Note: At most one of SN and CS can be zero. */ + if (sn != 0.) { + if (cs != 0.) { + isuppz[(*m << 1) - 1] = 1; + isuppz[(*m << 1) - 1] = 2; + } else { + isuppz[(*m << 1) - 1] = 1; + isuppz[(*m << 1) - 1] = 1; + } + } else { + isuppz[(*m << 1) - 1] = 2; + isuppz[*m * 2] = 2; + } + } + } + if (alleig || valeig && r1 > wl && r1 <= wu || indeig && iiu == 2) { + ++(*m); + w[*m] = r1; + if (wantz && ! zquery) { + z__[*m * z_dim1 + 1] = cs; + z__[*m * z_dim1 + 2] = sn; +/* Note: At most one of SN and CS can be zero. */ + if (sn != 0.) { + if (cs != 0.) { + isuppz[(*m << 1) - 1] = 1; + isuppz[(*m << 1) - 1] = 2; + } else { + isuppz[(*m << 1) - 1] = 1; + isuppz[(*m << 1) - 1] = 1; + } + } else { + isuppz[(*m << 1) - 1] = 2; + isuppz[*m * 2] = 2; + } + } + } + return 0; + } +/* Continue with general N */ + indgrs = 1; + inderr = (*n << 1) + 1; + indgp = *n * 3 + 1; + indd = (*n << 2) + 1; + inde2 = *n * 5 + 1; + indwrk = *n * 6 + 1; + + iinspl = 1; + iindbl = *n + 1; + iindw = (*n << 1) + 1; + iindwk = *n * 3 + 1; + +/* Scale matrix to allowable range, if necessary. */ +/* The allowable range is related to the PIVMIN parameter; see the */ +/* comments in DLARRD. The preference for scaling small values */ +/* up is heuristic; we expect users' matrices not to be close to the */ +/* RMAX threshold. */ + + scale = 1.; + tnrm = _starpu_dlanst_("M", n, &d__[1], &e[1]); + if (tnrm > 0. && tnrm < rmin) { + scale = rmin / tnrm; + } else if (tnrm > rmax) { + scale = rmax / tnrm; + } + if (scale != 1.) { + _starpu_dscal_(n, &scale, &d__[1], &c__1); + i__1 = *n - 1; + _starpu_dscal_(&i__1, &scale, &e[1], &c__1); + tnrm *= scale; + if (valeig) { +/* If eigenvalues in interval have to be found, */ +/* scale (WL, WU] accordingly */ + wl *= scale; + wu *= scale; + } + } + +/* Compute the desired eigenvalues of the tridiagonal after splitting */ +/* into smaller subblocks if the corresponding off-diagonal elements */ +/* are small */ +/* THRESH is the splitting parameter for DLARRE */ +/* A negative THRESH forces the old splitting criterion based on the */ +/* size of the off-diagonal. A positive THRESH switches to splitting */ +/* which preserves relative accuracy. */ + + if (*tryrac) { +/* Test whether the matrix warrants the more expensive relative approach. */ + _starpu_dlarrr_(n, &d__[1], &e[1], &iinfo); + } else { +/* The user does not care about relative accurately eigenvalues */ + iinfo = -1; + } +/* Set the splitting criterion */ + if (iinfo == 0) { + thresh = eps; + } else { + thresh = -eps; +/* relative accuracy is desired but T does not guarantee it */ + *tryrac = FALSE_; + } + + if (*tryrac) { +/* Copy original diagonal, needed to guarantee relative accuracy */ + _starpu_dcopy_(n, &d__[1], &c__1, &work[indd], &c__1); + } +/* Store the squares of the offdiagonal values of T */ + i__1 = *n - 1; + for (j = 1; j <= i__1; ++j) { +/* Computing 2nd power */ + d__1 = e[j]; + work[inde2 + j - 1] = d__1 * d__1; +/* L5: */ + } +/* Set the tolerance parameters for bisection */ + if (! wantz) { +/* DLARRE computes the eigenvalues to full precision. */ + rtol1 = eps * 4.; + rtol2 = eps * 4.; + } else { +/* DLARRE computes the eigenvalues to less than full precision. */ +/* DLARRV will refine the eigenvalue approximations, and we can */ +/* need less accurate initial bisection in DLARRE. */ +/* Note: these settings do only affect the subset case and DLARRE */ + rtol1 = sqrt(eps); +/* Computing MAX */ + d__1 = sqrt(eps) * .005, d__2 = eps * 4.; + rtol2 = max(d__1,d__2); + } + _starpu_dlarre_(range, n, &wl, &wu, &iil, &iiu, &d__[1], &e[1], &work[inde2], & + rtol1, &rtol2, &thresh, &nsplit, &iwork[iinspl], m, &w[1], &work[ + inderr], &work[indgp], &iwork[iindbl], &iwork[iindw], &work[ + indgrs], &pivmin, &work[indwrk], &iwork[iindwk], &iinfo); + if (iinfo != 0) { + *info = abs(iinfo) + 10; + return 0; + } +/* Note that if RANGE .NE. 'V', DLARRE computes bounds on the desired */ +/* part of the spectrum. All desired eigenvalues are contained in */ +/* (WL,WU] */ + if (wantz) { + +/* Compute the desired eigenvectors corresponding to the computed */ +/* eigenvalues */ + + _starpu_dlarrv_(n, &wl, &wu, &d__[1], &e[1], &pivmin, &iwork[iinspl], m, & + c__1, m, &c_b18, &rtol1, &rtol2, &w[1], &work[inderr], &work[ + indgp], &iwork[iindbl], &iwork[iindw], &work[indgrs], &z__[ + z_offset], ldz, &isuppz[1], &work[indwrk], &iwork[iindwk], & + iinfo); + if (iinfo != 0) { + *info = abs(iinfo) + 20; + return 0; + } + } else { +/* DLARRE computes eigenvalues of the (shifted) root representation */ +/* DLARRV returns the eigenvalues of the unshifted matrix. */ +/* However, if the eigenvectors are not desired by the user, we need */ +/* to apply the corresponding shifts from DLARRE to obtain the */ +/* eigenvalues of the original matrix. */ + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + itmp = iwork[iindbl + j - 1]; + w[j] += e[iwork[iinspl + itmp - 1]]; +/* L20: */ + } + } + + if (*tryrac) { +/* Refine computed eigenvalues so that they are relatively accurate */ +/* with respect to the original matrix T. */ + ibegin = 1; + wbegin = 1; + i__1 = iwork[iindbl + *m - 1]; + for (jblk = 1; jblk <= i__1; ++jblk) { + iend = iwork[iinspl + jblk - 1]; + in = iend - ibegin + 1; + wend = wbegin - 1; +/* check if any eigenvalues have to be refined in this block */ +L36: + if (wend < *m) { + if (iwork[iindbl + wend] == jblk) { + ++wend; + goto L36; + } + } + if (wend < wbegin) { + ibegin = iend + 1; + goto L39; + } + offset = iwork[iindw + wbegin - 1] - 1; + ifirst = iwork[iindw + wbegin - 1]; + ilast = iwork[iindw + wend - 1]; + rtol2 = eps * 4.; + _starpu_dlarrj_(&in, &work[indd + ibegin - 1], &work[inde2 + ibegin - 1], + &ifirst, &ilast, &rtol2, &offset, &w[wbegin], &work[ + inderr + wbegin - 1], &work[indwrk], &iwork[iindwk], & + pivmin, &tnrm, &iinfo); + ibegin = iend + 1; + wbegin = wend + 1; +L39: + ; + } + } + +/* If matrix was scaled, then rescale eigenvalues appropriately. */ + + if (scale != 1.) { + d__1 = 1. / scale; + _starpu_dscal_(m, &d__1, &w[1], &c__1); + } + +/* If eigenvalues are not in increasing order, then sort them, */ +/* possibly along with eigenvectors. */ + + if (nsplit > 1) { + if (! wantz) { + _starpu_dlasrt_("I", m, &w[1], &iinfo); + if (iinfo != 0) { + *info = 3; + return 0; + } + } else { + i__1 = *m - 1; + for (j = 1; j <= i__1; ++j) { + i__ = 0; + tmp = w[j]; + i__2 = *m; + for (jj = j + 1; jj <= i__2; ++jj) { + if (w[jj] < tmp) { + i__ = jj; + tmp = w[jj]; + } +/* L50: */ + } + if (i__ != 0) { + w[i__] = w[j]; + w[j] = tmp; + if (wantz) { + _starpu_dswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[j * + z_dim1 + 1], &c__1); + itmp = isuppz[(i__ << 1) - 1]; + isuppz[(i__ << 1) - 1] = isuppz[(j << 1) - 1]; + isuppz[(j << 1) - 1] = itmp; + itmp = isuppz[i__ * 2]; + isuppz[i__ * 2] = isuppz[j * 2]; + isuppz[j * 2] = itmp; + } + } +/* L60: */ + } + } + } + + + work[1] = (doublereal) lwmin; + iwork[1] = liwmin; + return 0; + +/* End of DSTEMR */ + +} /* _starpu_dstemr_ */ diff --git a/min-dgels/base/SRC/dsteqr.c b/min-dgels/base/SRC/dsteqr.c new file mode 100644 index 0000000..46eec0c --- /dev/null +++ b/min-dgels/base/SRC/dsteqr.c @@ -0,0 +1,621 @@ +/* dsteqr.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static doublereal c_b9 = 0.; +static doublereal c_b10 = 1.; +static integer c__0 = 0; +static integer c__1 = 1; +static integer c__2 = 2; + +/* Subroutine */ int _starpu_dsteqr_(char *compz, integer *n, doublereal *d__, + doublereal *e, doublereal *z__, integer *ldz, doublereal *work, + integer *info) +{ + /* System generated locals */ + integer z_dim1, z_offset, i__1, i__2; + doublereal d__1, d__2; + + /* Builtin functions */ + double sqrt(doublereal), d_sign(doublereal *, doublereal *); + + /* Local variables */ + doublereal b, c__, f, g; + integer i__, j, k, l, m; + doublereal p, r__, s; + integer l1, ii, mm, lm1, mm1, nm1; + doublereal rt1, rt2, eps; + integer lsv; + doublereal tst, eps2; + integer lend, jtot; + extern /* Subroutine */ int _starpu_dlae2_(doublereal *, doublereal *, doublereal + *, doublereal *, doublereal *); + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int _starpu_dlasr_(char *, char *, char *, integer *, + integer *, doublereal *, doublereal *, doublereal *, integer *); + doublereal anorm; + extern /* Subroutine */ int _starpu_dswap_(integer *, doublereal *, integer *, + doublereal *, integer *), _starpu_dlaev2_(doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *); + integer lendm1, lendp1; + extern doublereal _starpu_dlapy2_(doublereal *, doublereal *), _starpu_dlamch_(char *); + integer iscale; + extern /* Subroutine */ int _starpu_dlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *), _starpu_dlaset_(char *, integer *, integer + *, doublereal *, doublereal *, doublereal *, integer *); + doublereal safmin; + extern /* Subroutine */ int _starpu_dlartg_(doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *); + doublereal safmax; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + extern doublereal _starpu_dlanst_(char *, integer *, doublereal *, doublereal *); + extern /* Subroutine */ int _starpu_dlasrt_(char *, integer *, doublereal *, + integer *); + integer lendsv; + doublereal ssfmin; + integer nmaxit, icompz; + doublereal ssfmax; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DSTEQR computes all eigenvalues and, optionally, eigenvectors of a */ +/* symmetric tridiagonal matrix using the implicit QL or QR method. */ +/* The eigenvectors of a full or band symmetric matrix can also be found */ +/* if DSYTRD or DSPTRD or DSBTRD has been used to reduce this matrix to */ +/* tridiagonal form. */ + +/* Arguments */ +/* ========= */ + +/* COMPZ (input) CHARACTER*1 */ +/* = 'N': Compute eigenvalues only. */ +/* = 'V': Compute eigenvalues and eigenvectors of the original */ +/* symmetric matrix. On entry, Z must contain the */ +/* orthogonal matrix used to reduce the original matrix */ +/* to tridiagonal form. */ +/* = 'I': Compute eigenvalues and eigenvectors of the */ +/* tridiagonal matrix. Z is initialized to the identity */ +/* matrix. */ + +/* N (input) INTEGER */ +/* The order of the matrix. N >= 0. */ + +/* D (input/output) DOUBLE PRECISION array, dimension (N) */ +/* On entry, the diagonal elements of the tridiagonal matrix. */ +/* On exit, if INFO = 0, the eigenvalues in ascending order. */ + +/* E (input/output) DOUBLE PRECISION array, dimension (N-1) */ +/* On entry, the (n-1) subdiagonal elements of the tridiagonal */ +/* matrix. */ +/* On exit, E has been destroyed. */ + +/* Z (input/output) DOUBLE PRECISION array, dimension (LDZ, N) */ +/* On entry, if COMPZ = 'V', then Z contains the orthogonal */ +/* matrix used in the reduction to tridiagonal form. */ +/* On exit, if INFO = 0, then if COMPZ = 'V', Z contains the */ +/* orthonormal eigenvectors of the original symmetric matrix, */ +/* and if COMPZ = 'I', Z contains the orthonormal eigenvectors */ +/* of the symmetric tridiagonal matrix. */ +/* If COMPZ = 'N', then Z is not referenced. */ + +/* LDZ (input) INTEGER */ +/* The leading dimension of the array Z. LDZ >= 1, and if */ +/* eigenvectors are desired, then LDZ >= max(1,N). */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (max(1,2*N-2)) */ +/* If COMPZ = 'N', then WORK is not referenced. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > 0: the algorithm has failed to find all the eigenvalues in */ +/* a total of 30*N iterations; if INFO = i, then i */ +/* elements of E have not converged to zero; on exit, D */ +/* and E contain the elements of a symmetric tridiagonal */ +/* matrix which is orthogonally similar to the original */ +/* matrix. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --d__; + --e; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --work; + + /* Function Body */ + *info = 0; + + if (_starpu_lsame_(compz, "N")) { + icompz = 0; + } else if (_starpu_lsame_(compz, "V")) { + icompz = 1; + } else if (_starpu_lsame_(compz, "I")) { + icompz = 2; + } else { + icompz = -1; + } + if (icompz < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*ldz < 1 || icompz > 0 && *ldz < max(1,*n)) { + *info = -6; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DSTEQR", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + if (*n == 1) { + if (icompz == 2) { + z__[z_dim1 + 1] = 1.; + } + return 0; + } + +/* Determine the unit roundoff and over/underflow thresholds. */ + + eps = _starpu_dlamch_("E"); +/* Computing 2nd power */ + d__1 = eps; + eps2 = d__1 * d__1; + safmin = _starpu_dlamch_("S"); + safmax = 1. / safmin; + ssfmax = sqrt(safmax) / 3.; + ssfmin = sqrt(safmin) / eps2; + +/* Compute the eigenvalues and eigenvectors of the tridiagonal */ +/* matrix. */ + + if (icompz == 2) { + _starpu_dlaset_("Full", n, n, &c_b9, &c_b10, &z__[z_offset], ldz); + } + + nmaxit = *n * 30; + jtot = 0; + +/* Determine where the matrix splits and choose QL or QR iteration */ +/* for each block, according to whether top or bottom diagonal */ +/* element is smaller. */ + + l1 = 1; + nm1 = *n - 1; + +L10: + if (l1 > *n) { + goto L160; + } + if (l1 > 1) { + e[l1 - 1] = 0.; + } + if (l1 <= nm1) { + i__1 = nm1; + for (m = l1; m <= i__1; ++m) { + tst = (d__1 = e[m], abs(d__1)); + if (tst == 0.) { + goto L30; + } + if (tst <= sqrt((d__1 = d__[m], abs(d__1))) * sqrt((d__2 = d__[m + + 1], abs(d__2))) * eps) { + e[m] = 0.; + goto L30; + } +/* L20: */ + } + } + m = *n; + +L30: + l = l1; + lsv = l; + lend = m; + lendsv = lend; + l1 = m + 1; + if (lend == l) { + goto L10; + } + +/* Scale submatrix in rows and columns L to LEND */ + + i__1 = lend - l + 1; + anorm = _starpu_dlanst_("I", &i__1, &d__[l], &e[l]); + iscale = 0; + if (anorm == 0.) { + goto L10; + } + if (anorm > ssfmax) { + iscale = 1; + i__1 = lend - l + 1; + _starpu_dlascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &d__[l], n, + info); + i__1 = lend - l; + _starpu_dlascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &e[l], n, + info); + } else if (anorm < ssfmin) { + iscale = 2; + i__1 = lend - l + 1; + _starpu_dlascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &d__[l], n, + info); + i__1 = lend - l; + _starpu_dlascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &e[l], n, + info); + } + +/* Choose between QL and QR iteration */ + + if ((d__1 = d__[lend], abs(d__1)) < (d__2 = d__[l], abs(d__2))) { + lend = lsv; + l = lendsv; + } + + if (lend > l) { + +/* QL Iteration */ + +/* Look for small subdiagonal element. */ + +L40: + if (l != lend) { + lendm1 = lend - 1; + i__1 = lendm1; + for (m = l; m <= i__1; ++m) { +/* Computing 2nd power */ + d__2 = (d__1 = e[m], abs(d__1)); + tst = d__2 * d__2; + if (tst <= eps2 * (d__1 = d__[m], abs(d__1)) * (d__2 = d__[m + + 1], abs(d__2)) + safmin) { + goto L60; + } +/* L50: */ + } + } + + m = lend; + +L60: + if (m < lend) { + e[m] = 0.; + } + p = d__[l]; + if (m == l) { + goto L80; + } + +/* If remaining matrix is 2-by-2, use DLAE2 or SLAEV2 */ +/* to compute its eigensystem. */ + + if (m == l + 1) { + if (icompz > 0) { + _starpu_dlaev2_(&d__[l], &e[l], &d__[l + 1], &rt1, &rt2, &c__, &s); + work[l] = c__; + work[*n - 1 + l] = s; + _starpu_dlasr_("R", "V", "B", n, &c__2, &work[l], &work[*n - 1 + l], & + z__[l * z_dim1 + 1], ldz); + } else { + _starpu_dlae2_(&d__[l], &e[l], &d__[l + 1], &rt1, &rt2); + } + d__[l] = rt1; + d__[l + 1] = rt2; + e[l] = 0.; + l += 2; + if (l <= lend) { + goto L40; + } + goto L140; + } + + if (jtot == nmaxit) { + goto L140; + } + ++jtot; + +/* Form shift. */ + + g = (d__[l + 1] - p) / (e[l] * 2.); + r__ = _starpu_dlapy2_(&g, &c_b10); + g = d__[m] - p + e[l] / (g + d_sign(&r__, &g)); + + s = 1.; + c__ = 1.; + p = 0.; + +/* Inner loop */ + + mm1 = m - 1; + i__1 = l; + for (i__ = mm1; i__ >= i__1; --i__) { + f = s * e[i__]; + b = c__ * e[i__]; + _starpu_dlartg_(&g, &f, &c__, &s, &r__); + if (i__ != m - 1) { + e[i__ + 1] = r__; + } + g = d__[i__ + 1] - p; + r__ = (d__[i__] - g) * s + c__ * 2. * b; + p = s * r__; + d__[i__ + 1] = g + p; + g = c__ * r__ - b; + +/* If eigenvectors are desired, then save rotations. */ + + if (icompz > 0) { + work[i__] = c__; + work[*n - 1 + i__] = -s; + } + +/* L70: */ + } + +/* If eigenvectors are desired, then apply saved rotations. */ + + if (icompz > 0) { + mm = m - l + 1; + _starpu_dlasr_("R", "V", "B", n, &mm, &work[l], &work[*n - 1 + l], &z__[l + * z_dim1 + 1], ldz); + } + + d__[l] -= p; + e[l] = g; + goto L40; + +/* Eigenvalue found. */ + +L80: + d__[l] = p; + + ++l; + if (l <= lend) { + goto L40; + } + goto L140; + + } else { + +/* QR Iteration */ + +/* Look for small superdiagonal element. */ + +L90: + if (l != lend) { + lendp1 = lend + 1; + i__1 = lendp1; + for (m = l; m >= i__1; --m) { +/* Computing 2nd power */ + d__2 = (d__1 = e[m - 1], abs(d__1)); + tst = d__2 * d__2; + if (tst <= eps2 * (d__1 = d__[m], abs(d__1)) * (d__2 = d__[m + - 1], abs(d__2)) + safmin) { + goto L110; + } +/* L100: */ + } + } + + m = lend; + +L110: + if (m > lend) { + e[m - 1] = 0.; + } + p = d__[l]; + if (m == l) { + goto L130; + } + +/* If remaining matrix is 2-by-2, use DLAE2 or SLAEV2 */ +/* to compute its eigensystem. */ + + if (m == l - 1) { + if (icompz > 0) { + _starpu_dlaev2_(&d__[l - 1], &e[l - 1], &d__[l], &rt1, &rt2, &c__, &s) + ; + work[m] = c__; + work[*n - 1 + m] = s; + _starpu_dlasr_("R", "V", "F", n, &c__2, &work[m], &work[*n - 1 + m], & + z__[(l - 1) * z_dim1 + 1], ldz); + } else { + _starpu_dlae2_(&d__[l - 1], &e[l - 1], &d__[l], &rt1, &rt2); + } + d__[l - 1] = rt1; + d__[l] = rt2; + e[l - 1] = 0.; + l += -2; + if (l >= lend) { + goto L90; + } + goto L140; + } + + if (jtot == nmaxit) { + goto L140; + } + ++jtot; + +/* Form shift. */ + + g = (d__[l - 1] - p) / (e[l - 1] * 2.); + r__ = _starpu_dlapy2_(&g, &c_b10); + g = d__[m] - p + e[l - 1] / (g + d_sign(&r__, &g)); + + s = 1.; + c__ = 1.; + p = 0.; + +/* Inner loop */ + + lm1 = l - 1; + i__1 = lm1; + for (i__ = m; i__ <= i__1; ++i__) { + f = s * e[i__]; + b = c__ * e[i__]; + _starpu_dlartg_(&g, &f, &c__, &s, &r__); + if (i__ != m) { + e[i__ - 1] = r__; + } + g = d__[i__] - p; + r__ = (d__[i__ + 1] - g) * s + c__ * 2. * b; + p = s * r__; + d__[i__] = g + p; + g = c__ * r__ - b; + +/* If eigenvectors are desired, then save rotations. */ + + if (icompz > 0) { + work[i__] = c__; + work[*n - 1 + i__] = s; + } + +/* L120: */ + } + +/* If eigenvectors are desired, then apply saved rotations. */ + + if (icompz > 0) { + mm = l - m + 1; + _starpu_dlasr_("R", "V", "F", n, &mm, &work[m], &work[*n - 1 + m], &z__[m + * z_dim1 + 1], ldz); + } + + d__[l] -= p; + e[lm1] = g; + goto L90; + +/* Eigenvalue found. */ + +L130: + d__[l] = p; + + --l; + if (l >= lend) { + goto L90; + } + goto L140; + + } + +/* Undo scaling if necessary */ + +L140: + if (iscale == 1) { + i__1 = lendsv - lsv + 1; + _starpu_dlascl_("G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &d__[lsv], + n, info); + i__1 = lendsv - lsv; + _starpu_dlascl_("G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &e[lsv], n, + info); + } else if (iscale == 2) { + i__1 = lendsv - lsv + 1; + _starpu_dlascl_("G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &d__[lsv], + n, info); + i__1 = lendsv - lsv; + _starpu_dlascl_("G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &e[lsv], n, + info); + } + +/* Check for no convergence to an eigenvalue after a total */ +/* of N*MAXIT iterations. */ + + if (jtot < nmaxit) { + goto L10; + } + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + if (e[i__] != 0.) { + ++(*info); + } +/* L150: */ + } + goto L190; + +/* Order eigenvalues and eigenvectors. */ + +L160: + if (icompz == 0) { + +/* Use Quick Sort */ + + _starpu_dlasrt_("I", n, &d__[1], info); + + } else { + +/* Use Selection Sort to minimize swaps of eigenvectors */ + + i__1 = *n; + for (ii = 2; ii <= i__1; ++ii) { + i__ = ii - 1; + k = i__; + p = d__[i__]; + i__2 = *n; + for (j = ii; j <= i__2; ++j) { + if (d__[j] < p) { + k = j; + p = d__[j]; + } +/* L170: */ + } + if (k != i__) { + d__[k] = d__[i__]; + d__[i__] = p; + _starpu_dswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[k * z_dim1 + 1], + &c__1); + } +/* L180: */ + } + } + +L190: + return 0; + +/* End of DSTEQR */ + +} /* _starpu_dsteqr_ */ diff --git a/min-dgels/base/SRC/dsterf.c b/min-dgels/base/SRC/dsterf.c new file mode 100644 index 0000000..e769a53 --- /dev/null +++ b/min-dgels/base/SRC/dsterf.c @@ -0,0 +1,461 @@ +/* dsterf.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__0 = 0; +static integer c__1 = 1; +static doublereal c_b32 = 1.; + +/* Subroutine */ int _starpu_dsterf_(integer *n, doublereal *d__, doublereal *e, + integer *info) +{ + /* System generated locals */ + integer i__1; + doublereal d__1, d__2, d__3; + + /* Builtin functions */ + double sqrt(doublereal), d_sign(doublereal *, doublereal *); + + /* Local variables */ + doublereal c__; + integer i__, l, m; + doublereal p, r__, s; + integer l1; + doublereal bb, rt1, rt2, eps, rte; + integer lsv; + doublereal eps2, oldc; + integer lend, jtot; + extern /* Subroutine */ int _starpu_dlae2_(doublereal *, doublereal *, doublereal + *, doublereal *, doublereal *); + doublereal gamma, alpha, sigma, anorm; + extern doublereal _starpu_dlapy2_(doublereal *, doublereal *), _starpu_dlamch_(char *); + integer iscale; + extern /* Subroutine */ int _starpu_dlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *); + doublereal oldgam, safmin; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + doublereal safmax; + extern doublereal _starpu_dlanst_(char *, integer *, doublereal *, doublereal *); + extern /* Subroutine */ int _starpu_dlasrt_(char *, integer *, doublereal *, + integer *); + integer lendsv; + doublereal ssfmin; + integer nmaxit; + doublereal ssfmax; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DSTERF computes all eigenvalues of a symmetric tridiagonal matrix */ +/* using the Pal-Walker-Kahan variant of the QL or QR algorithm. */ + +/* Arguments */ +/* ========= */ + +/* N (input) INTEGER */ +/* The order of the matrix. N >= 0. */ + +/* D (input/output) DOUBLE PRECISION array, dimension (N) */ +/* On entry, the n diagonal elements of the tridiagonal matrix. */ +/* On exit, if INFO = 0, the eigenvalues in ascending order. */ + +/* E (input/output) DOUBLE PRECISION array, dimension (N-1) */ +/* On entry, the (n-1) subdiagonal elements of the tridiagonal */ +/* matrix. */ +/* On exit, E has been destroyed. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > 0: the algorithm failed to find all of the eigenvalues in */ +/* a total of 30*N iterations; if INFO = i, then i */ +/* elements of E have not converged to zero. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --e; + --d__; + + /* Function Body */ + *info = 0; + +/* Quick return if possible */ + + if (*n < 0) { + *info = -1; + i__1 = -(*info); + _starpu_xerbla_("DSTERF", &i__1); + return 0; + } + if (*n <= 1) { + return 0; + } + +/* Determine the unit roundoff for this environment. */ + + eps = _starpu_dlamch_("E"); +/* Computing 2nd power */ + d__1 = eps; + eps2 = d__1 * d__1; + safmin = _starpu_dlamch_("S"); + safmax = 1. / safmin; + ssfmax = sqrt(safmax) / 3.; + ssfmin = sqrt(safmin) / eps2; + +/* Compute the eigenvalues of the tridiagonal matrix. */ + + nmaxit = *n * 30; + sigma = 0.; + jtot = 0; + +/* Determine where the matrix splits and choose QL or QR iteration */ +/* for each block, according to whether top or bottom diagonal */ +/* element is smaller. */ + + l1 = 1; + +L10: + if (l1 > *n) { + goto L170; + } + if (l1 > 1) { + e[l1 - 1] = 0.; + } + i__1 = *n - 1; + for (m = l1; m <= i__1; ++m) { + if ((d__3 = e[m], abs(d__3)) <= sqrt((d__1 = d__[m], abs(d__1))) * + sqrt((d__2 = d__[m + 1], abs(d__2))) * eps) { + e[m] = 0.; + goto L30; + } +/* L20: */ + } + m = *n; + +L30: + l = l1; + lsv = l; + lend = m; + lendsv = lend; + l1 = m + 1; + if (lend == l) { + goto L10; + } + +/* Scale submatrix in rows and columns L to LEND */ + + i__1 = lend - l + 1; + anorm = _starpu_dlanst_("I", &i__1, &d__[l], &e[l]); + iscale = 0; + if (anorm > ssfmax) { + iscale = 1; + i__1 = lend - l + 1; + _starpu_dlascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &d__[l], n, + info); + i__1 = lend - l; + _starpu_dlascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &e[l], n, + info); + } else if (anorm < ssfmin) { + iscale = 2; + i__1 = lend - l + 1; + _starpu_dlascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &d__[l], n, + info); + i__1 = lend - l; + _starpu_dlascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &e[l], n, + info); + } + + i__1 = lend - 1; + for (i__ = l; i__ <= i__1; ++i__) { +/* Computing 2nd power */ + d__1 = e[i__]; + e[i__] = d__1 * d__1; +/* L40: */ + } + +/* Choose between QL and QR iteration */ + + if ((d__1 = d__[lend], abs(d__1)) < (d__2 = d__[l], abs(d__2))) { + lend = lsv; + l = lendsv; + } + + if (lend >= l) { + +/* QL Iteration */ + +/* Look for small subdiagonal element. */ + +L50: + if (l != lend) { + i__1 = lend - 1; + for (m = l; m <= i__1; ++m) { + if ((d__2 = e[m], abs(d__2)) <= eps2 * (d__1 = d__[m] * d__[m + + 1], abs(d__1))) { + goto L70; + } +/* L60: */ + } + } + m = lend; + +L70: + if (m < lend) { + e[m] = 0.; + } + p = d__[l]; + if (m == l) { + goto L90; + } + +/* If remaining matrix is 2 by 2, use DLAE2 to compute its */ +/* eigenvalues. */ + + if (m == l + 1) { + rte = sqrt(e[l]); + _starpu_dlae2_(&d__[l], &rte, &d__[l + 1], &rt1, &rt2); + d__[l] = rt1; + d__[l + 1] = rt2; + e[l] = 0.; + l += 2; + if (l <= lend) { + goto L50; + } + goto L150; + } + + if (jtot == nmaxit) { + goto L150; + } + ++jtot; + +/* Form shift. */ + + rte = sqrt(e[l]); + sigma = (d__[l + 1] - p) / (rte * 2.); + r__ = _starpu_dlapy2_(&sigma, &c_b32); + sigma = p - rte / (sigma + d_sign(&r__, &sigma)); + + c__ = 1.; + s = 0.; + gamma = d__[m] - sigma; + p = gamma * gamma; + +/* Inner loop */ + + i__1 = l; + for (i__ = m - 1; i__ >= i__1; --i__) { + bb = e[i__]; + r__ = p + bb; + if (i__ != m - 1) { + e[i__ + 1] = s * r__; + } + oldc = c__; + c__ = p / r__; + s = bb / r__; + oldgam = gamma; + alpha = d__[i__]; + gamma = c__ * (alpha - sigma) - s * oldgam; + d__[i__ + 1] = oldgam + (alpha - gamma); + if (c__ != 0.) { + p = gamma * gamma / c__; + } else { + p = oldc * bb; + } +/* L80: */ + } + + e[l] = s * p; + d__[l] = sigma + gamma; + goto L50; + +/* Eigenvalue found. */ + +L90: + d__[l] = p; + + ++l; + if (l <= lend) { + goto L50; + } + goto L150; + + } else { + +/* QR Iteration */ + +/* Look for small superdiagonal element. */ + +L100: + i__1 = lend + 1; + for (m = l; m >= i__1; --m) { + if ((d__2 = e[m - 1], abs(d__2)) <= eps2 * (d__1 = d__[m] * d__[m + - 1], abs(d__1))) { + goto L120; + } +/* L110: */ + } + m = lend; + +L120: + if (m > lend) { + e[m - 1] = 0.; + } + p = d__[l]; + if (m == l) { + goto L140; + } + +/* If remaining matrix is 2 by 2, use DLAE2 to compute its */ +/* eigenvalues. */ + + if (m == l - 1) { + rte = sqrt(e[l - 1]); + _starpu_dlae2_(&d__[l], &rte, &d__[l - 1], &rt1, &rt2); + d__[l] = rt1; + d__[l - 1] = rt2; + e[l - 1] = 0.; + l += -2; + if (l >= lend) { + goto L100; + } + goto L150; + } + + if (jtot == nmaxit) { + goto L150; + } + ++jtot; + +/* Form shift. */ + + rte = sqrt(e[l - 1]); + sigma = (d__[l - 1] - p) / (rte * 2.); + r__ = _starpu_dlapy2_(&sigma, &c_b32); + sigma = p - rte / (sigma + d_sign(&r__, &sigma)); + + c__ = 1.; + s = 0.; + gamma = d__[m] - sigma; + p = gamma * gamma; + +/* Inner loop */ + + i__1 = l - 1; + for (i__ = m; i__ <= i__1; ++i__) { + bb = e[i__]; + r__ = p + bb; + if (i__ != m) { + e[i__ - 1] = s * r__; + } + oldc = c__; + c__ = p / r__; + s = bb / r__; + oldgam = gamma; + alpha = d__[i__ + 1]; + gamma = c__ * (alpha - sigma) - s * oldgam; + d__[i__] = oldgam + (alpha - gamma); + if (c__ != 0.) { + p = gamma * gamma / c__; + } else { + p = oldc * bb; + } +/* L130: */ + } + + e[l - 1] = s * p; + d__[l] = sigma + gamma; + goto L100; + +/* Eigenvalue found. */ + +L140: + d__[l] = p; + + --l; + if (l >= lend) { + goto L100; + } + goto L150; + + } + +/* Undo scaling if necessary */ + +L150: + if (iscale == 1) { + i__1 = lendsv - lsv + 1; + _starpu_dlascl_("G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &d__[lsv], + n, info); + } + if (iscale == 2) { + i__1 = lendsv - lsv + 1; + _starpu_dlascl_("G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &d__[lsv], + n, info); + } + +/* Check for no convergence to an eigenvalue after a total */ +/* of N*MAXIT iterations. */ + + if (jtot < nmaxit) { + goto L10; + } + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + if (e[i__] != 0.) { + ++(*info); + } +/* L160: */ + } + goto L180; + +/* Sort eigenvalues in increasing order. */ + +L170: + _starpu_dlasrt_("I", n, &d__[1], info); + +L180: + return 0; + +/* End of DSTERF */ + +} /* _starpu_dsterf_ */ diff --git a/min-dgels/base/SRC/dstev.c b/min-dgels/base/SRC/dstev.c new file mode 100644 index 0000000..3617524 --- /dev/null +++ b/min-dgels/base/SRC/dstev.c @@ -0,0 +1,212 @@ +/* dstev.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +/* Subroutine */ int _starpu_dstev_(char *jobz, integer *n, doublereal *d__, + doublereal *e, doublereal *z__, integer *ldz, doublereal *work, + integer *info) +{ + /* System generated locals */ + integer z_dim1, z_offset, i__1; + doublereal d__1; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + doublereal eps; + integer imax; + doublereal rmin, rmax, tnrm; + extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, + integer *); + doublereal sigma; + extern logical _starpu_lsame_(char *, char *); + logical wantz; + extern doublereal _starpu_dlamch_(char *); + integer iscale; + doublereal safmin; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + doublereal bignum; + extern doublereal _starpu_dlanst_(char *, integer *, doublereal *, doublereal *); + extern /* Subroutine */ int _starpu_dsterf_(integer *, doublereal *, doublereal *, + integer *), _starpu_dsteqr_(char *, integer *, doublereal *, doublereal * +, doublereal *, integer *, doublereal *, integer *); + doublereal smlnum; + + +/* -- LAPACK driver routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DSTEV computes all eigenvalues and, optionally, eigenvectors of a */ +/* real symmetric tridiagonal matrix A. */ + +/* Arguments */ +/* ========= */ + +/* JOBZ (input) CHARACTER*1 */ +/* = 'N': Compute eigenvalues only; */ +/* = 'V': Compute eigenvalues and eigenvectors. */ + +/* N (input) INTEGER */ +/* The order of the matrix. N >= 0. */ + +/* D (input/output) DOUBLE PRECISION array, dimension (N) */ +/* On entry, the n diagonal elements of the tridiagonal matrix */ +/* A. */ +/* On exit, if INFO = 0, the eigenvalues in ascending order. */ + +/* E (input/output) DOUBLE PRECISION array, dimension (N-1) */ +/* On entry, the (n-1) subdiagonal elements of the tridiagonal */ +/* matrix A, stored in elements 1 to N-1 of E. */ +/* On exit, the contents of E are destroyed. */ + +/* Z (output) DOUBLE PRECISION array, dimension (LDZ, N) */ +/* If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal */ +/* eigenvectors of the matrix A, with the i-th column of Z */ +/* holding the eigenvector associated with D(i). */ +/* If JOBZ = 'N', then Z is not referenced. */ + +/* LDZ (input) INTEGER */ +/* The leading dimension of the array Z. LDZ >= 1, and if */ +/* JOBZ = 'V', LDZ >= max(1,N). */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (max(1,2*N-2)) */ +/* If JOBZ = 'N', WORK is not referenced. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > 0: if INFO = i, the algorithm failed to converge; i */ +/* off-diagonal elements of E did not converge to zero. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --d__; + --e; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --work; + + /* Function Body */ + wantz = _starpu_lsame_(jobz, "V"); + + *info = 0; + if (! (wantz || _starpu_lsame_(jobz, "N"))) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*ldz < 1 || wantz && *ldz < *n) { + *info = -6; + } + + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DSTEV ", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + if (*n == 1) { + if (wantz) { + z__[z_dim1 + 1] = 1.; + } + return 0; + } + +/* Get machine constants. */ + + safmin = _starpu_dlamch_("Safe minimum"); + eps = _starpu_dlamch_("Precision"); + smlnum = safmin / eps; + bignum = 1. / smlnum; + rmin = sqrt(smlnum); + rmax = sqrt(bignum); + +/* Scale matrix to allowable range, if necessary. */ + + iscale = 0; + tnrm = _starpu_dlanst_("M", n, &d__[1], &e[1]); + if (tnrm > 0. && tnrm < rmin) { + iscale = 1; + sigma = rmin / tnrm; + } else if (tnrm > rmax) { + iscale = 1; + sigma = rmax / tnrm; + } + if (iscale == 1) { + _starpu_dscal_(n, &sigma, &d__[1], &c__1); + i__1 = *n - 1; + _starpu_dscal_(&i__1, &sigma, &e[1], &c__1); + } + +/* For eigenvalues only, call DSTERF. For eigenvalues and */ +/* eigenvectors, call DSTEQR. */ + + if (! wantz) { + _starpu_dsterf_(n, &d__[1], &e[1], info); + } else { + _starpu_dsteqr_("I", n, &d__[1], &e[1], &z__[z_offset], ldz, &work[1], info); + } + +/* If matrix was scaled, then rescale eigenvalues appropriately. */ + + if (iscale == 1) { + if (*info == 0) { + imax = *n; + } else { + imax = *info - 1; + } + d__1 = 1. / sigma; + _starpu_dscal_(&imax, &d__1, &d__[1], &c__1); + } + + return 0; + +/* End of DSTEV */ + +} /* _starpu_dstev_ */ diff --git a/min-dgels/base/SRC/dstevd.c b/min-dgels/base/SRC/dstevd.c new file mode 100644 index 0000000..2caab58 --- /dev/null +++ b/min-dgels/base/SRC/dstevd.c @@ -0,0 +1,273 @@ +/* dstevd.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +/* Subroutine */ int _starpu_dstevd_(char *jobz, integer *n, doublereal *d__, + doublereal *e, doublereal *z__, integer *ldz, doublereal *work, + integer *lwork, integer *iwork, integer *liwork, integer *info) +{ + /* System generated locals */ + integer z_dim1, z_offset, i__1; + doublereal d__1; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + doublereal eps, rmin, rmax, tnrm; + extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, + integer *); + doublereal sigma; + extern logical _starpu_lsame_(char *, char *); + integer lwmin; + logical wantz; + extern doublereal _starpu_dlamch_(char *); + integer iscale; + extern /* Subroutine */ int _starpu_dstedc_(char *, integer *, doublereal *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + integer *, integer *, integer *); + doublereal safmin; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + doublereal bignum; + extern doublereal _starpu_dlanst_(char *, integer *, doublereal *, doublereal *); + extern /* Subroutine */ int _starpu_dsterf_(integer *, doublereal *, doublereal *, + integer *); + integer liwmin; + doublereal smlnum; + logical lquery; + + +/* -- LAPACK driver routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DSTEVD computes all eigenvalues and, optionally, eigenvectors of a */ +/* real symmetric tridiagonal matrix. If eigenvectors are desired, it */ +/* uses a divide and conquer algorithm. */ + +/* The divide and conquer algorithm makes very mild assumptions about */ +/* floating point arithmetic. It will work on machines with a guard */ +/* digit in add/subtract, or on those binary machines without guard */ +/* digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or */ +/* Cray-2. It could conceivably fail on hexadecimal or decimal machines */ +/* without guard digits, but we know of none. */ + +/* Arguments */ +/* ========= */ + +/* JOBZ (input) CHARACTER*1 */ +/* = 'N': Compute eigenvalues only; */ +/* = 'V': Compute eigenvalues and eigenvectors. */ + +/* N (input) INTEGER */ +/* The order of the matrix. N >= 0. */ + +/* D (input/output) DOUBLE PRECISION array, dimension (N) */ +/* On entry, the n diagonal elements of the tridiagonal matrix */ +/* A. */ +/* On exit, if INFO = 0, the eigenvalues in ascending order. */ + +/* E (input/output) DOUBLE PRECISION array, dimension (N-1) */ +/* On entry, the (n-1) subdiagonal elements of the tridiagonal */ +/* matrix A, stored in elements 1 to N-1 of E. */ +/* On exit, the contents of E are destroyed. */ + +/* Z (output) DOUBLE PRECISION array, dimension (LDZ, N) */ +/* If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal */ +/* eigenvectors of the matrix A, with the i-th column of Z */ +/* holding the eigenvector associated with D(i). */ +/* If JOBZ = 'N', then Z is not referenced. */ + +/* LDZ (input) INTEGER */ +/* The leading dimension of the array Z. LDZ >= 1, and if */ +/* JOBZ = 'V', LDZ >= max(1,N). */ + +/* WORK (workspace/output) DOUBLE PRECISION array, */ +/* dimension (LWORK) */ +/* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ + +/* LWORK (input) INTEGER */ +/* The dimension of the array WORK. */ +/* If JOBZ = 'N' or N <= 1 then LWORK must be at least 1. */ +/* If JOBZ = 'V' and N > 1 then LWORK must be at least */ +/* ( 1 + 4*N + N**2 ). */ + +/* If LWORK = -1, then a workspace query is assumed; the routine */ +/* only calculates the optimal sizes of the WORK and IWORK */ +/* arrays, returns these values as the first entries of the WORK */ +/* and IWORK arrays, and no error message related to LWORK or */ +/* LIWORK is issued by XERBLA. */ + +/* IWORK (workspace/output) INTEGER array, dimension (MAX(1,LIWORK)) */ +/* On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. */ + +/* LIWORK (input) INTEGER */ +/* The dimension of the array IWORK. */ +/* If JOBZ = 'N' or N <= 1 then LIWORK must be at least 1. */ +/* If JOBZ = 'V' and N > 1 then LIWORK must be at least 3+5*N. */ + +/* If LIWORK = -1, then a workspace query is assumed; the */ +/* routine only calculates the optimal sizes of the WORK and */ +/* IWORK arrays, returns these values as the first entries of */ +/* the WORK and IWORK arrays, and no error message related to */ +/* LWORK or LIWORK is issued by XERBLA. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > 0: if INFO = i, the algorithm failed to converge; i */ +/* off-diagonal elements of E did not converge to zero. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --d__; + --e; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --work; + --iwork; + + /* Function Body */ + wantz = _starpu_lsame_(jobz, "V"); + lquery = *lwork == -1 || *liwork == -1; + + *info = 0; + liwmin = 1; + lwmin = 1; + if (*n > 1 && wantz) { +/* Computing 2nd power */ + i__1 = *n; + lwmin = (*n << 2) + 1 + i__1 * i__1; + liwmin = *n * 5 + 3; + } + + if (! (wantz || _starpu_lsame_(jobz, "N"))) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*ldz < 1 || wantz && *ldz < *n) { + *info = -6; + } + + if (*info == 0) { + work[1] = (doublereal) lwmin; + iwork[1] = liwmin; + + if (*lwork < lwmin && ! lquery) { + *info = -8; + } else if (*liwork < liwmin && ! lquery) { + *info = -10; + } + } + + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DSTEVD", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + if (*n == 1) { + if (wantz) { + z__[z_dim1 + 1] = 1.; + } + return 0; + } + +/* Get machine constants. */ + + safmin = _starpu_dlamch_("Safe minimum"); + eps = _starpu_dlamch_("Precision"); + smlnum = safmin / eps; + bignum = 1. / smlnum; + rmin = sqrt(smlnum); + rmax = sqrt(bignum); + +/* Scale matrix to allowable range, if necessary. */ + + iscale = 0; + tnrm = _starpu_dlanst_("M", n, &d__[1], &e[1]); + if (tnrm > 0. && tnrm < rmin) { + iscale = 1; + sigma = rmin / tnrm; + } else if (tnrm > rmax) { + iscale = 1; + sigma = rmax / tnrm; + } + if (iscale == 1) { + _starpu_dscal_(n, &sigma, &d__[1], &c__1); + i__1 = *n - 1; + _starpu_dscal_(&i__1, &sigma, &e[1], &c__1); + } + +/* For eigenvalues only, call DSTERF. For eigenvalues and */ +/* eigenvectors, call DSTEDC. */ + + if (! wantz) { + _starpu_dsterf_(n, &d__[1], &e[1], info); + } else { + _starpu_dstedc_("I", n, &d__[1], &e[1], &z__[z_offset], ldz, &work[1], lwork, + &iwork[1], liwork, info); + } + +/* If matrix was scaled, then rescale eigenvalues appropriately. */ + + if (iscale == 1) { + d__1 = 1. / sigma; + _starpu_dscal_(n, &d__1, &d__[1], &c__1); + } + + work[1] = (doublereal) lwmin; + iwork[1] = liwmin; + + return 0; + +/* End of DSTEVD */ + +} /* _starpu_dstevd_ */ diff --git a/min-dgels/base/SRC/dstevr.c b/min-dgels/base/SRC/dstevr.c new file mode 100644 index 0000000..8ff861c --- /dev/null +++ b/min-dgels/base/SRC/dstevr.c @@ -0,0 +1,550 @@ +/* dstevr.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__10 = 10; +static integer c__1 = 1; +static integer c__2 = 2; +static integer c__3 = 3; +static integer c__4 = 4; + +/* Subroutine */ int _starpu_dstevr_(char *jobz, char *range, integer *n, doublereal * + d__, doublereal *e, doublereal *vl, doublereal *vu, integer *il, + integer *iu, doublereal *abstol, integer *m, doublereal *w, + doublereal *z__, integer *ldz, integer *isuppz, doublereal *work, + integer *lwork, integer *iwork, integer *liwork, integer *info) +{ + /* System generated locals */ + integer z_dim1, z_offset, i__1, i__2; + doublereal d__1, d__2; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + integer i__, j, jj; + doublereal eps, vll, vuu, tmp1; + integer imax; + doublereal rmin, rmax; + logical test; + doublereal tnrm; + integer itmp1; + extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, + integer *); + doublereal sigma; + extern logical _starpu_lsame_(char *, char *); + char order[1]; + extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *), _starpu_dswap_(integer *, doublereal *, integer + *, doublereal *, integer *); + integer lwmin; + logical wantz; + extern doublereal _starpu_dlamch_(char *); + logical alleig, indeig; + integer iscale, ieeeok, indibl, indifl; + logical valeig; + doublereal safmin; + extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *); + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + doublereal bignum; + extern doublereal _starpu_dlanst_(char *, integer *, doublereal *, doublereal *); + integer indisp; + extern /* Subroutine */ int _starpu_dstein_(integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *, doublereal *, + integer *, doublereal *, integer *, integer *, integer *), + _starpu_dsterf_(integer *, doublereal *, doublereal *, integer *); + integer indiwo; + extern /* Subroutine */ int _starpu_dstebz_(char *, char *, integer *, doublereal + *, doublereal *, integer *, integer *, doublereal *, doublereal *, + doublereal *, integer *, integer *, doublereal *, integer *, + integer *, doublereal *, integer *, integer *), + _starpu_dstemr_(char *, char *, integer *, doublereal *, doublereal *, + doublereal *, doublereal *, integer *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, integer *, + logical *, doublereal *, integer *, integer *, integer *, integer + *); + integer liwmin; + logical tryrac; + integer nsplit; + doublereal smlnum; + logical lquery; + + +/* -- LAPACK driver routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DSTEVR computes selected eigenvalues and, optionally, eigenvectors */ +/* of a real symmetric tridiagonal matrix T. Eigenvalues and */ +/* eigenvectors can be selected by specifying either a range of values */ +/* or a range of indices for the desired eigenvalues. */ + +/* Whenever possible, DSTEVR calls DSTEMR to compute the */ +/* eigenspectrum using Relatively Robust Representations. DSTEMR */ +/* computes eigenvalues by the dqds algorithm, while orthogonal */ +/* eigenvectors are computed from various "good" L D L^T representations */ +/* (also known as Relatively Robust Representations). Gram-Schmidt */ +/* orthogonalization is avoided as far as possible. More specifically, */ +/* the various steps of the algorithm are as follows. For the i-th */ +/* unreduced block of T, */ +/* (a) Compute T - sigma_i = L_i D_i L_i^T, such that L_i D_i L_i^T */ +/* is a relatively robust representation, */ +/* (b) Compute the eigenvalues, lambda_j, of L_i D_i L_i^T to high */ +/* relative accuracy by the dqds algorithm, */ +/* (c) If there is a cluster of close eigenvalues, "choose" sigma_i */ +/* close to the cluster, and go to step (a), */ +/* (d) Given the approximate eigenvalue lambda_j of L_i D_i L_i^T, */ +/* compute the corresponding eigenvector by forming a */ +/* rank-revealing twisted factorization. */ +/* The desired accuracy of the output can be specified by the input */ +/* parameter ABSTOL. */ + +/* For more details, see "A new O(n^2) algorithm for the symmetric */ +/* tridiagonal eigenvalue/eigenvector problem", by Inderjit Dhillon, */ +/* Computer Science Division Technical Report No. UCB//CSD-97-971, */ +/* UC Berkeley, May 1997. */ + + +/* Note 1 : DSTEVR calls DSTEMR when the full spectrum is requested */ +/* on machines which conform to the ieee-754 floating point standard. */ +/* DSTEVR calls DSTEBZ and DSTEIN on non-ieee machines and */ +/* when partial spectrum requests are made. */ + +/* Normal execution of DSTEMR may create NaNs and infinities and */ +/* hence may abort due to a floating point exception in environments */ +/* which do not handle NaNs and infinities in the ieee standard default */ +/* manner. */ + +/* Arguments */ +/* ========= */ + +/* JOBZ (input) CHARACTER*1 */ +/* = 'N': Compute eigenvalues only; */ +/* = 'V': Compute eigenvalues and eigenvectors. */ + +/* RANGE (input) CHARACTER*1 */ +/* = 'A': all eigenvalues will be found. */ +/* = 'V': all eigenvalues in the half-open interval (VL,VU] */ +/* will be found. */ +/* = 'I': the IL-th through IU-th eigenvalues will be found. */ +/* ********* For RANGE = 'V' or 'I' and IU - IL < N - 1, DSTEBZ and */ +/* ********* DSTEIN are called */ + +/* N (input) INTEGER */ +/* The order of the matrix. N >= 0. */ + +/* D (input/output) DOUBLE PRECISION array, dimension (N) */ +/* On entry, the n diagonal elements of the tridiagonal matrix */ +/* A. */ +/* On exit, D may be multiplied by a constant factor chosen */ +/* to avoid over/underflow in computing the eigenvalues. */ + +/* E (input/output) DOUBLE PRECISION array, dimension (max(1,N-1)) */ +/* On entry, the (n-1) subdiagonal elements of the tridiagonal */ +/* matrix A in elements 1 to N-1 of E. */ +/* On exit, E may be multiplied by a constant factor chosen */ +/* to avoid over/underflow in computing the eigenvalues. */ + +/* VL (input) DOUBLE PRECISION */ +/* VU (input) DOUBLE PRECISION */ +/* If RANGE='V', the lower and upper bounds of the interval to */ +/* be searched for eigenvalues. VL < VU. */ +/* Not referenced if RANGE = 'A' or 'I'. */ + +/* IL (input) INTEGER */ +/* IU (input) INTEGER */ +/* If RANGE='I', the indices (in ascending order) of the */ +/* smallest and largest eigenvalues to be returned. */ +/* 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. */ +/* Not referenced if RANGE = 'A' or 'V'. */ + +/* ABSTOL (input) DOUBLE PRECISION */ +/* The absolute error tolerance for the eigenvalues. */ +/* An approximate eigenvalue is accepted as converged */ +/* when it is determined to lie in an interval [a,b] */ +/* of width less than or equal to */ + +/* ABSTOL + EPS * max( |a|,|b| ) , */ + +/* where EPS is the machine precision. If ABSTOL is less than */ +/* or equal to zero, then EPS*|T| will be used in its place, */ +/* where |T| is the 1-norm of the tridiagonal matrix obtained */ +/* by reducing A to tridiagonal form. */ + +/* See "Computing Small Singular Values of Bidiagonal Matrices */ +/* with Guaranteed High Relative Accuracy," by Demmel and */ +/* Kahan, LAPACK Working Note #3. */ + +/* If high relative accuracy is important, set ABSTOL to */ +/* DLAMCH( 'Safe minimum' ). Doing so will guarantee that */ +/* eigenvalues are computed to high relative accuracy when */ +/* possible in future releases. The current code does not */ +/* make any guarantees about high relative accuracy, but */ +/* future releases will. See J. Barlow and J. Demmel, */ +/* "Computing Accurate Eigensystems of Scaled Diagonally */ +/* Dominant Matrices", LAPACK Working Note #7, for a discussion */ +/* of which matrices define their eigenvalues to high relative */ +/* accuracy. */ + +/* M (output) INTEGER */ +/* The total number of eigenvalues found. 0 <= M <= N. */ +/* If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. */ + +/* W (output) DOUBLE PRECISION array, dimension (N) */ +/* The first M elements contain the selected eigenvalues in */ +/* ascending order. */ + +/* Z (output) DOUBLE PRECISION array, dimension (LDZ, max(1,M) ) */ +/* If JOBZ = 'V', then if INFO = 0, the first M columns of Z */ +/* contain the orthonormal eigenvectors of the matrix A */ +/* corresponding to the selected eigenvalues, with the i-th */ +/* column of Z holding the eigenvector associated with W(i). */ +/* Note: the user must ensure that at least max(1,M) columns are */ +/* supplied in the array Z; if RANGE = 'V', the exact value of M */ +/* is not known in advance and an upper bound must be used. */ + +/* LDZ (input) INTEGER */ +/* The leading dimension of the array Z. LDZ >= 1, and if */ +/* JOBZ = 'V', LDZ >= max(1,N). */ + +/* ISUPPZ (output) INTEGER array, dimension ( 2*max(1,M) ) */ +/* The support of the eigenvectors in Z, i.e., the indices */ +/* indicating the nonzero elements in Z. The i-th eigenvector */ +/* is nonzero only in elements ISUPPZ( 2*i-1 ) through */ +/* ISUPPZ( 2*i ). */ +/* ********* Implemented only for RANGE = 'A' or 'I' and IU - IL = N - 1 */ + +/* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* On exit, if INFO = 0, WORK(1) returns the optimal (and */ +/* minimal) LWORK. */ + +/* LWORK (input) INTEGER */ +/* The dimension of the array WORK. LWORK >= max(1,20*N). */ + +/* If LWORK = -1, then a workspace query is assumed; the routine */ +/* only calculates the optimal sizes of the WORK and IWORK */ +/* arrays, returns these values as the first entries of the WORK */ +/* and IWORK arrays, and no error message related to LWORK or */ +/* LIWORK is issued by XERBLA. */ + +/* IWORK (workspace/output) INTEGER array, dimension (MAX(1,LIWORK)) */ +/* On exit, if INFO = 0, IWORK(1) returns the optimal (and */ +/* minimal) LIWORK. */ + +/* LIWORK (input) INTEGER */ +/* The dimension of the array IWORK. LIWORK >= max(1,10*N). */ + +/* If LIWORK = -1, then a workspace query is assumed; the */ +/* routine only calculates the optimal sizes of the WORK and */ +/* IWORK arrays, returns these values as the first entries of */ +/* the WORK and IWORK arrays, and no error message related to */ +/* LWORK or LIWORK is issued by XERBLA. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > 0: Internal error */ + +/* Further Details */ +/* =============== */ + +/* Based on contributions by */ +/* Inderjit Dhillon, IBM Almaden, USA */ +/* Osni Marques, LBNL/NERSC, USA */ +/* Ken Stanley, Computer Science Division, University of */ +/* California at Berkeley, USA */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --d__; + --e; + --w; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --isuppz; + --work; + --iwork; + + /* Function Body */ + ieeeok = _starpu_ilaenv_(&c__10, "DSTEVR", "N", &c__1, &c__2, &c__3, &c__4); + + wantz = _starpu_lsame_(jobz, "V"); + alleig = _starpu_lsame_(range, "A"); + valeig = _starpu_lsame_(range, "V"); + indeig = _starpu_lsame_(range, "I"); + + lquery = *lwork == -1 || *liwork == -1; +/* Computing MAX */ + i__1 = 1, i__2 = *n * 20; + lwmin = max(i__1,i__2); +/* Computing MAX */ + i__1 = 1, i__2 = *n * 10; + liwmin = max(i__1,i__2); + + + *info = 0; + if (! (wantz || _starpu_lsame_(jobz, "N"))) { + *info = -1; + } else if (! (alleig || valeig || indeig)) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else { + if (valeig) { + if (*n > 0 && *vu <= *vl) { + *info = -7; + } + } else if (indeig) { + if (*il < 1 || *il > max(1,*n)) { + *info = -8; + } else if (*iu < min(*n,*il) || *iu > *n) { + *info = -9; + } + } + } + if (*info == 0) { + if (*ldz < 1 || wantz && *ldz < *n) { + *info = -14; + } + } + + if (*info == 0) { + work[1] = (doublereal) lwmin; + iwork[1] = liwmin; + + if (*lwork < lwmin && ! lquery) { + *info = -17; + } else if (*liwork < liwmin && ! lquery) { + *info = -19; + } + } + + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DSTEVR", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + *m = 0; + if (*n == 0) { + return 0; + } + + if (*n == 1) { + if (alleig || indeig) { + *m = 1; + w[1] = d__[1]; + } else { + if (*vl < d__[1] && *vu >= d__[1]) { + *m = 1; + w[1] = d__[1]; + } + } + if (wantz) { + z__[z_dim1 + 1] = 1.; + } + return 0; + } + +/* Get machine constants. */ + + safmin = _starpu_dlamch_("Safe minimum"); + eps = _starpu_dlamch_("Precision"); + smlnum = safmin / eps; + bignum = 1. / smlnum; + rmin = sqrt(smlnum); +/* Computing MIN */ + d__1 = sqrt(bignum), d__2 = 1. / sqrt(sqrt(safmin)); + rmax = min(d__1,d__2); + + +/* Scale matrix to allowable range, if necessary. */ + + iscale = 0; + vll = *vl; + vuu = *vu; + + tnrm = _starpu_dlanst_("M", n, &d__[1], &e[1]); + if (tnrm > 0. && tnrm < rmin) { + iscale = 1; + sigma = rmin / tnrm; + } else if (tnrm > rmax) { + iscale = 1; + sigma = rmax / tnrm; + } + if (iscale == 1) { + _starpu_dscal_(n, &sigma, &d__[1], &c__1); + i__1 = *n - 1; + _starpu_dscal_(&i__1, &sigma, &e[1], &c__1); + if (valeig) { + vll = *vl * sigma; + vuu = *vu * sigma; + } + } +/* Initialize indices into workspaces. Note: These indices are used only */ +/* if DSTERF or DSTEMR fail. */ +/* IWORK(INDIBL:INDIBL+M-1) corresponds to IBLOCK in DSTEBZ and */ +/* stores the block indices of each of the M<=N eigenvalues. */ + indibl = 1; +/* IWORK(INDISP:INDISP+NSPLIT-1) corresponds to ISPLIT in DSTEBZ and */ +/* stores the starting and finishing indices of each block. */ + indisp = indibl + *n; +/* IWORK(INDIFL:INDIFL+N-1) stores the indices of eigenvectors */ +/* that corresponding to eigenvectors that fail to converge in */ +/* DSTEIN. This information is discarded; if any fail, the driver */ +/* returns INFO > 0. */ + indifl = indisp + *n; +/* INDIWO is the offset of the remaining integer workspace. */ + indiwo = indisp + *n; + +/* If all eigenvalues are desired, then */ +/* call DSTERF or DSTEMR. If this fails for some eigenvalue, then */ +/* try DSTEBZ. */ + + + test = FALSE_; + if (indeig) { + if (*il == 1 && *iu == *n) { + test = TRUE_; + } + } + if ((alleig || test) && ieeeok == 1) { + i__1 = *n - 1; + _starpu_dcopy_(&i__1, &e[1], &c__1, &work[1], &c__1); + if (! wantz) { + _starpu_dcopy_(n, &d__[1], &c__1, &w[1], &c__1); + _starpu_dsterf_(n, &w[1], &work[1], info); + } else { + _starpu_dcopy_(n, &d__[1], &c__1, &work[*n + 1], &c__1); + if (*abstol <= *n * 2. * eps) { + tryrac = TRUE_; + } else { + tryrac = FALSE_; + } + i__1 = *lwork - (*n << 1); + _starpu_dstemr_(jobz, "A", n, &work[*n + 1], &work[1], vl, vu, il, iu, m, + &w[1], &z__[z_offset], ldz, n, &isuppz[1], &tryrac, &work[ + (*n << 1) + 1], &i__1, &iwork[1], liwork, info); + + } + if (*info == 0) { + *m = *n; + goto L10; + } + *info = 0; + } + +/* Otherwise, call DSTEBZ and, if eigenvectors are desired, DSTEIN. */ + + if (wantz) { + *(unsigned char *)order = 'B'; + } else { + *(unsigned char *)order = 'E'; + } + _starpu_dstebz_(range, order, n, &vll, &vuu, il, iu, abstol, &d__[1], &e[1], m, & + nsplit, &w[1], &iwork[indibl], &iwork[indisp], &work[1], &iwork[ + indiwo], info); + + if (wantz) { + _starpu_dstein_(n, &d__[1], &e[1], m, &w[1], &iwork[indibl], &iwork[indisp], & + z__[z_offset], ldz, &work[1], &iwork[indiwo], &iwork[indifl], + info); + } + +/* If matrix was scaled, then rescale eigenvalues appropriately. */ + +L10: + if (iscale == 1) { + if (*info == 0) { + imax = *m; + } else { + imax = *info - 1; + } + d__1 = 1. / sigma; + _starpu_dscal_(&imax, &d__1, &w[1], &c__1); + } + +/* If eigenvalues are not in order, then sort them, along with */ +/* eigenvectors. */ + + if (wantz) { + i__1 = *m - 1; + for (j = 1; j <= i__1; ++j) { + i__ = 0; + tmp1 = w[j]; + i__2 = *m; + for (jj = j + 1; jj <= i__2; ++jj) { + if (w[jj] < tmp1) { + i__ = jj; + tmp1 = w[jj]; + } +/* L20: */ + } + + if (i__ != 0) { + itmp1 = iwork[i__]; + w[i__] = w[j]; + iwork[i__] = iwork[j]; + w[j] = tmp1; + iwork[j] = itmp1; + _starpu_dswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[j * z_dim1 + 1], + &c__1); + } +/* L30: */ + } + } + +/* Causes problems with tests 19 & 20: */ +/* IF (wantz .and. INDEIG ) Z( 1,1) = Z(1,1) / 1.002 + .002 */ + + + work[1] = (doublereal) lwmin; + iwork[1] = liwmin; + return 0; + +/* End of DSTEVR */ + +} /* _starpu_dstevr_ */ diff --git a/min-dgels/base/SRC/dstevx.c b/min-dgels/base/SRC/dstevx.c new file mode 100644 index 0000000..211ca25 --- /dev/null +++ b/min-dgels/base/SRC/dstevx.c @@ -0,0 +1,432 @@ +/* dstevx.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +/* Subroutine */ int _starpu_dstevx_(char *jobz, char *range, integer *n, doublereal * + d__, doublereal *e, doublereal *vl, doublereal *vu, integer *il, + integer *iu, doublereal *abstol, integer *m, doublereal *w, + doublereal *z__, integer *ldz, doublereal *work, integer *iwork, + integer *ifail, integer *info) +{ + /* System generated locals */ + integer z_dim1, z_offset, i__1, i__2; + doublereal d__1, d__2; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + integer i__, j, jj; + doublereal eps, vll, vuu, tmp1; + integer imax; + doublereal rmin, rmax; + logical test; + doublereal tnrm; + integer itmp1; + extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, + integer *); + doublereal sigma; + extern logical _starpu_lsame_(char *, char *); + char order[1]; + extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *), _starpu_dswap_(integer *, doublereal *, integer + *, doublereal *, integer *); + logical wantz; + extern doublereal _starpu_dlamch_(char *); + logical alleig, indeig; + integer iscale, indibl; + logical valeig; + doublereal safmin; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + doublereal bignum; + extern doublereal _starpu_dlanst_(char *, integer *, doublereal *, doublereal *); + integer indisp; + extern /* Subroutine */ int _starpu_dstein_(integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *, doublereal *, + integer *, doublereal *, integer *, integer *, integer *), + _starpu_dsterf_(integer *, doublereal *, doublereal *, integer *); + integer indiwo; + extern /* Subroutine */ int _starpu_dstebz_(char *, char *, integer *, doublereal + *, doublereal *, integer *, integer *, doublereal *, doublereal *, + doublereal *, integer *, integer *, doublereal *, integer *, + integer *, doublereal *, integer *, integer *); + integer indwrk; + extern /* Subroutine */ int _starpu_dsteqr_(char *, integer *, doublereal *, + doublereal *, doublereal *, integer *, doublereal *, integer *); + integer nsplit; + doublereal smlnum; + + +/* -- LAPACK driver routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DSTEVX computes selected eigenvalues and, optionally, eigenvectors */ +/* of a real symmetric tridiagonal matrix A. Eigenvalues and */ +/* eigenvectors can be selected by specifying either a range of values */ +/* or a range of indices for the desired eigenvalues. */ + +/* Arguments */ +/* ========= */ + +/* JOBZ (input) CHARACTER*1 */ +/* = 'N': Compute eigenvalues only; */ +/* = 'V': Compute eigenvalues and eigenvectors. */ + +/* RANGE (input) CHARACTER*1 */ +/* = 'A': all eigenvalues will be found. */ +/* = 'V': all eigenvalues in the half-open interval (VL,VU] */ +/* will be found. */ +/* = 'I': the IL-th through IU-th eigenvalues will be found. */ + +/* N (input) INTEGER */ +/* The order of the matrix. N >= 0. */ + +/* D (input/output) DOUBLE PRECISION array, dimension (N) */ +/* On entry, the n diagonal elements of the tridiagonal matrix */ +/* A. */ +/* On exit, D may be multiplied by a constant factor chosen */ +/* to avoid over/underflow in computing the eigenvalues. */ + +/* E (input/output) DOUBLE PRECISION array, dimension (max(1,N-1)) */ +/* On entry, the (n-1) subdiagonal elements of the tridiagonal */ +/* matrix A in elements 1 to N-1 of E. */ +/* On exit, E may be multiplied by a constant factor chosen */ +/* to avoid over/underflow in computing the eigenvalues. */ + +/* VL (input) DOUBLE PRECISION */ +/* VU (input) DOUBLE PRECISION */ +/* If RANGE='V', the lower and upper bounds of the interval to */ +/* be searched for eigenvalues. VL < VU. */ +/* Not referenced if RANGE = 'A' or 'I'. */ + +/* IL (input) INTEGER */ +/* IU (input) INTEGER */ +/* If RANGE='I', the indices (in ascending order) of the */ +/* smallest and largest eigenvalues to be returned. */ +/* 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. */ +/* Not referenced if RANGE = 'A' or 'V'. */ + +/* ABSTOL (input) DOUBLE PRECISION */ +/* The absolute error tolerance for the eigenvalues. */ +/* An approximate eigenvalue is accepted as converged */ +/* when it is determined to lie in an interval [a,b] */ +/* of width less than or equal to */ + +/* ABSTOL + EPS * max( |a|,|b| ) , */ + +/* where EPS is the machine precision. If ABSTOL is less */ +/* than or equal to zero, then EPS*|T| will be used in */ +/* its place, where |T| is the 1-norm of the tridiagonal */ +/* matrix. */ + +/* Eigenvalues will be computed most accurately when ABSTOL is */ +/* set to twice the underflow threshold 2*DLAMCH('S'), not zero. */ +/* If this routine returns with INFO>0, indicating that some */ +/* eigenvectors did not converge, try setting ABSTOL to */ +/* 2*DLAMCH('S'). */ + +/* See "Computing Small Singular Values of Bidiagonal Matrices */ +/* with Guaranteed High Relative Accuracy," by Demmel and */ +/* Kahan, LAPACK Working Note #3. */ + +/* M (output) INTEGER */ +/* The total number of eigenvalues found. 0 <= M <= N. */ +/* If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. */ + +/* W (output) DOUBLE PRECISION array, dimension (N) */ +/* The first M elements contain the selected eigenvalues in */ +/* ascending order. */ + +/* Z (output) DOUBLE PRECISION array, dimension (LDZ, max(1,M) ) */ +/* If JOBZ = 'V', then if INFO = 0, the first M columns of Z */ +/* contain the orthonormal eigenvectors of the matrix A */ +/* corresponding to the selected eigenvalues, with the i-th */ +/* column of Z holding the eigenvector associated with W(i). */ +/* If an eigenvector fails to converge (INFO > 0), then that */ +/* column of Z contains the latest approximation to the */ +/* eigenvector, and the index of the eigenvector is returned */ +/* in IFAIL. If JOBZ = 'N', then Z is not referenced. */ +/* Note: the user must ensure that at least max(1,M) columns are */ +/* supplied in the array Z; if RANGE = 'V', the exact value of M */ +/* is not known in advance and an upper bound must be used. */ + +/* LDZ (input) INTEGER */ +/* The leading dimension of the array Z. LDZ >= 1, and if */ +/* JOBZ = 'V', LDZ >= max(1,N). */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (5*N) */ + +/* IWORK (workspace) INTEGER array, dimension (5*N) */ + +/* IFAIL (output) INTEGER array, dimension (N) */ +/* If JOBZ = 'V', then if INFO = 0, the first M elements of */ +/* IFAIL are zero. If INFO > 0, then IFAIL contains the */ +/* indices of the eigenvectors that failed to converge. */ +/* If JOBZ = 'N', then IFAIL is not referenced. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > 0: if INFO = i, then i eigenvectors failed to converge. */ +/* Their indices are stored in array IFAIL. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --d__; + --e; + --w; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --work; + --iwork; + --ifail; + + /* Function Body */ + wantz = _starpu_lsame_(jobz, "V"); + alleig = _starpu_lsame_(range, "A"); + valeig = _starpu_lsame_(range, "V"); + indeig = _starpu_lsame_(range, "I"); + + *info = 0; + if (! (wantz || _starpu_lsame_(jobz, "N"))) { + *info = -1; + } else if (! (alleig || valeig || indeig)) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else { + if (valeig) { + if (*n > 0 && *vu <= *vl) { + *info = -7; + } + } else if (indeig) { + if (*il < 1 || *il > max(1,*n)) { + *info = -8; + } else if (*iu < min(*n,*il) || *iu > *n) { + *info = -9; + } + } + } + if (*info == 0) { + if (*ldz < 1 || wantz && *ldz < *n) { + *info = -14; + } + } + + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DSTEVX", &i__1); + return 0; + } + +/* Quick return if possible */ + + *m = 0; + if (*n == 0) { + return 0; + } + + if (*n == 1) { + if (alleig || indeig) { + *m = 1; + w[1] = d__[1]; + } else { + if (*vl < d__[1] && *vu >= d__[1]) { + *m = 1; + w[1] = d__[1]; + } + } + if (wantz) { + z__[z_dim1 + 1] = 1.; + } + return 0; + } + +/* Get machine constants. */ + + safmin = _starpu_dlamch_("Safe minimum"); + eps = _starpu_dlamch_("Precision"); + smlnum = safmin / eps; + bignum = 1. / smlnum; + rmin = sqrt(smlnum); +/* Computing MIN */ + d__1 = sqrt(bignum), d__2 = 1. / sqrt(sqrt(safmin)); + rmax = min(d__1,d__2); + +/* Scale matrix to allowable range, if necessary. */ + + iscale = 0; + if (valeig) { + vll = *vl; + vuu = *vu; + } else { + vll = 0.; + vuu = 0.; + } + tnrm = _starpu_dlanst_("M", n, &d__[1], &e[1]); + if (tnrm > 0. && tnrm < rmin) { + iscale = 1; + sigma = rmin / tnrm; + } else if (tnrm > rmax) { + iscale = 1; + sigma = rmax / tnrm; + } + if (iscale == 1) { + _starpu_dscal_(n, &sigma, &d__[1], &c__1); + i__1 = *n - 1; + _starpu_dscal_(&i__1, &sigma, &e[1], &c__1); + if (valeig) { + vll = *vl * sigma; + vuu = *vu * sigma; + } + } + +/* If all eigenvalues are desired and ABSTOL is less than zero, then */ +/* call DSTERF or SSTEQR. If this fails for some eigenvalue, then */ +/* try DSTEBZ. */ + + test = FALSE_; + if (indeig) { + if (*il == 1 && *iu == *n) { + test = TRUE_; + } + } + if ((alleig || test) && *abstol <= 0.) { + _starpu_dcopy_(n, &d__[1], &c__1, &w[1], &c__1); + i__1 = *n - 1; + _starpu_dcopy_(&i__1, &e[1], &c__1, &work[1], &c__1); + indwrk = *n + 1; + if (! wantz) { + _starpu_dsterf_(n, &w[1], &work[1], info); + } else { + _starpu_dsteqr_("I", n, &w[1], &work[1], &z__[z_offset], ldz, &work[ + indwrk], info); + if (*info == 0) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + ifail[i__] = 0; +/* L10: */ + } + } + } + if (*info == 0) { + *m = *n; + goto L20; + } + *info = 0; + } + +/* Otherwise, call DSTEBZ and, if eigenvectors are desired, SSTEIN. */ + + if (wantz) { + *(unsigned char *)order = 'B'; + } else { + *(unsigned char *)order = 'E'; + } + indwrk = 1; + indibl = 1; + indisp = indibl + *n; + indiwo = indisp + *n; + _starpu_dstebz_(range, order, n, &vll, &vuu, il, iu, abstol, &d__[1], &e[1], m, & + nsplit, &w[1], &iwork[indibl], &iwork[indisp], &work[indwrk], & + iwork[indiwo], info); + + if (wantz) { + _starpu_dstein_(n, &d__[1], &e[1], m, &w[1], &iwork[indibl], &iwork[indisp], & + z__[z_offset], ldz, &work[indwrk], &iwork[indiwo], &ifail[1], + info); + } + +/* If matrix was scaled, then rescale eigenvalues appropriately. */ + +L20: + if (iscale == 1) { + if (*info == 0) { + imax = *m; + } else { + imax = *info - 1; + } + d__1 = 1. / sigma; + _starpu_dscal_(&imax, &d__1, &w[1], &c__1); + } + +/* If eigenvalues are not in order, then sort them, along with */ +/* eigenvectors. */ + + if (wantz) { + i__1 = *m - 1; + for (j = 1; j <= i__1; ++j) { + i__ = 0; + tmp1 = w[j]; + i__2 = *m; + for (jj = j + 1; jj <= i__2; ++jj) { + if (w[jj] < tmp1) { + i__ = jj; + tmp1 = w[jj]; + } +/* L30: */ + } + + if (i__ != 0) { + itmp1 = iwork[indibl + i__ - 1]; + w[i__] = w[j]; + iwork[indibl + i__ - 1] = iwork[indibl + j - 1]; + w[j] = tmp1; + iwork[indibl + j - 1] = itmp1; + _starpu_dswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[j * z_dim1 + 1], + &c__1); + if (*info != 0) { + itmp1 = ifail[i__]; + ifail[i__] = ifail[j]; + ifail[j] = itmp1; + } + } +/* L40: */ + } + } + + return 0; + +/* End of DSTEVX */ + +} /* _starpu_dstevx_ */ diff --git a/min-dgels/base/SRC/dsycon.c b/min-dgels/base/SRC/dsycon.c new file mode 100644 index 0000000..e0cffa7 --- /dev/null +++ b/min-dgels/base/SRC/dsycon.c @@ -0,0 +1,204 @@ +/* dsycon.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +/* Subroutine */ int _starpu_dsycon_(char *uplo, integer *n, doublereal *a, integer * + lda, integer *ipiv, doublereal *anorm, doublereal *rcond, doublereal * + work, integer *iwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1; + + /* Local variables */ + integer i__, kase; + extern logical _starpu_lsame_(char *, char *); + integer isave[3]; + logical upper; + extern /* Subroutine */ int _starpu_dlacn2_(integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *), _starpu_xerbla_(char *, + integer *); + doublereal ainvnm; + extern /* Subroutine */ int _starpu_dsytrs_(char *, integer *, integer *, + doublereal *, integer *, integer *, doublereal *, integer *, + integer *); + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* Modified to call DLACN2 in place of DLACON, 5 Feb 03, SJH. */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DSYCON estimates the reciprocal of the condition number (in the */ +/* 1-norm) of a real symmetric matrix A using the factorization */ +/* A = U*D*U**T or A = L*D*L**T computed by DSYTRF. */ + +/* An estimate is obtained for norm(inv(A)), and the reciprocal of the */ +/* condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). */ + +/* Arguments */ +/* ========= */ + +/* UPLO (input) CHARACTER*1 */ +/* Specifies whether the details of the factorization are stored */ +/* as an upper or lower triangular matrix. */ +/* = 'U': Upper triangular, form is A = U*D*U**T; */ +/* = 'L': Lower triangular, form is A = L*D*L**T. */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ +/* The block diagonal matrix D and the multipliers used to */ +/* obtain the factor U or L as computed by DSYTRF. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,N). */ + +/* IPIV (input) INTEGER array, dimension (N) */ +/* Details of the interchanges and the block structure of D */ +/* as determined by DSYTRF. */ + +/* ANORM (input) DOUBLE PRECISION */ +/* The 1-norm of the original matrix A. */ + +/* RCOND (output) DOUBLE PRECISION */ +/* The reciprocal of the condition number of the matrix A, */ +/* computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an */ +/* estimate of the 1-norm of inv(A) computed in this routine. */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (2*N) */ + +/* IWORK (workspace) INTEGER array, dimension (N) */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + --work; + --iwork; + + /* Function Body */ + *info = 0; + upper = _starpu_lsame_(uplo, "U"); + if (! upper && ! _starpu_lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } else if (*anorm < 0.) { + *info = -6; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DSYCON", &i__1); + return 0; + } + +/* Quick return if possible */ + + *rcond = 0.; + if (*n == 0) { + *rcond = 1.; + return 0; + } else if (*anorm <= 0.) { + return 0; + } + +/* Check that the diagonal matrix D is nonsingular. */ + + if (upper) { + +/* Upper triangular storage: examine D from bottom to top */ + + for (i__ = *n; i__ >= 1; --i__) { + if (ipiv[i__] > 0 && a[i__ + i__ * a_dim1] == 0.) { + return 0; + } +/* L10: */ + } + } else { + +/* Lower triangular storage: examine D from top to bottom. */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + if (ipiv[i__] > 0 && a[i__ + i__ * a_dim1] == 0.) { + return 0; + } +/* L20: */ + } + } + +/* Estimate the 1-norm of the inverse. */ + + kase = 0; +L30: + _starpu_dlacn2_(n, &work[*n + 1], &work[1], &iwork[1], &ainvnm, &kase, isave); + if (kase != 0) { + +/* Multiply by inv(L*D*L') or inv(U*D*U'). */ + + _starpu_dsytrs_(uplo, n, &c__1, &a[a_offset], lda, &ipiv[1], &work[1], n, + info); + goto L30; + } + +/* Compute the estimate of the reciprocal condition number. */ + + if (ainvnm != 0.) { + *rcond = 1. / ainvnm / *anorm; + } + + return 0; + +/* End of DSYCON */ + +} /* _starpu_dsycon_ */ diff --git a/min-dgels/base/SRC/dsyequb.c b/min-dgels/base/SRC/dsyequb.c new file mode 100644 index 0000000..79a7b59 --- /dev/null +++ b/min-dgels/base/SRC/dsyequb.c @@ -0,0 +1,333 @@ +/* dsyequb.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +/* Subroutine */ int _starpu_dsyequb_(char *uplo, integer *n, doublereal *a, integer * + lda, doublereal *s, doublereal *scond, doublereal *amax, doublereal * + work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + doublereal d__1, d__2, d__3; + + /* Builtin functions */ + double sqrt(doublereal), log(doublereal), pow_di(doublereal *, integer *); + + /* Local variables */ + doublereal d__; + integer i__, j; + doublereal t, u, c0, c1, c2, si; + logical up; + doublereal avg, std, tol, base; + integer iter; + doublereal smin, smax, scale; + extern logical _starpu_lsame_(char *, char *); + doublereal sumsq; + extern doublereal _starpu_dlamch_(char *); + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + doublereal bignum; + extern /* Subroutine */ int _starpu_dlassq_(integer *, doublereal *, integer *, + doublereal *, doublereal *); + doublereal smlnum; + + +/* -- LAPACK routine (version 3.2) -- */ +/* -- Contributed by James Demmel, Deaglan Halligan, Yozo Hida and -- */ +/* -- Jason Riedy of Univ. of California Berkeley. -- */ +/* -- November 2008 -- */ + +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley and NAG Ltd. -- */ + +/* .. */ +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DSYEQUB computes row and column scalings intended to equilibrate a */ +/* symmetric matrix A and reduce its condition number */ +/* (with respect to the two-norm). S contains the scale factors, */ +/* S(i) = 1/sqrt(A(i,i)), chosen so that the scaled matrix B with */ +/* elements B(i,j) = S(i)*A(i,j)*S(j) has ones on the diagonal. This */ +/* choice of S puts the condition number of B within a factor N of the */ +/* smallest possible condition number over all possible diagonal */ +/* scalings. */ + +/* Arguments */ +/* ========= */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ +/* The N-by-N symmetric matrix whose scaling */ +/* factors are to be computed. Only the diagonal elements of A */ +/* are referenced. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,N). */ + +/* S (output) DOUBLE PRECISION array, dimension (N) */ +/* If INFO = 0, S contains the scale factors for A. */ + +/* SCOND (output) DOUBLE PRECISION */ +/* If INFO = 0, S contains the ratio of the smallest S(i) to */ +/* the largest S(i). If SCOND >= 0.1 and AMAX is neither too */ +/* large nor too small, it is not worth scaling by S. */ + +/* AMAX (output) DOUBLE PRECISION */ +/* Absolute value of largest matrix element. If AMAX is very */ +/* close to overflow or very close to underflow, the matrix */ +/* should be scaled. */ +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > 0: if INFO = i, the i-th diagonal element is nonpositive. */ + +/* Further Details */ +/* ======= ======= */ + +/* Reference: Livne, O.E. and Golub, G.H., "Scaling by Binormalization", */ +/* Numerical Algorithms, vol. 35, no. 1, pp. 97-120, January 2004. */ +/* DOI 10.1023/B:NUMA.0000016606.32820.69 */ +/* Tech report version: http://ruready.utah.edu/archive/papers/bin.pdf */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --s; + --work; + + /* Function Body */ + *info = 0; + if (! (_starpu_lsame_(uplo, "U") || _starpu_lsame_(uplo, "L"))) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DSYEQUB", &i__1); + return 0; + } + up = _starpu_lsame_(uplo, "U"); + *amax = 0.; + +/* Quick return if possible. */ + + if (*n == 0) { + *scond = 1.; + return 0; + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + s[i__] = 0.; + } + *amax = 0.; + if (up) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { +/* Computing MAX */ + d__2 = s[i__], d__3 = (d__1 = a[i__ + j * a_dim1], abs(d__1)); + s[i__] = max(d__2,d__3); +/* Computing MAX */ + d__2 = s[j], d__3 = (d__1 = a[i__ + j * a_dim1], abs(d__1)); + s[j] = max(d__2,d__3); +/* Computing MAX */ + d__2 = *amax, d__3 = (d__1 = a[i__ + j * a_dim1], abs(d__1)); + *amax = max(d__2,d__3); + } +/* Computing MAX */ + d__2 = s[j], d__3 = (d__1 = a[j + j * a_dim1], abs(d__1)); + s[j] = max(d__2,d__3); +/* Computing MAX */ + d__2 = *amax, d__3 = (d__1 = a[j + j * a_dim1], abs(d__1)); + *amax = max(d__2,d__3); + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MAX */ + d__2 = s[j], d__3 = (d__1 = a[j + j * a_dim1], abs(d__1)); + s[j] = max(d__2,d__3); +/* Computing MAX */ + d__2 = *amax, d__3 = (d__1 = a[j + j * a_dim1], abs(d__1)); + *amax = max(d__2,d__3); + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { +/* Computing MAX */ + d__2 = s[i__], d__3 = (d__1 = a[i__ + j * a_dim1], abs(d__1)); + s[i__] = max(d__2,d__3); +/* Computing MAX */ + d__2 = s[j], d__3 = (d__1 = a[i__ + j * a_dim1], abs(d__1)); + s[j] = max(d__2,d__3); +/* Computing MAX */ + d__2 = *amax, d__3 = (d__1 = a[i__ + j * a_dim1], abs(d__1)); + *amax = max(d__2,d__3); + } + } + } + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + s[j] = 1. / s[j]; + } + tol = 1. / sqrt(*n * 2.); + for (iter = 1; iter <= 100; ++iter) { + scale = 0.; + sumsq = 0.; +/* BETA = |A|S */ + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + work[i__] = 0.; + } + if (up) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + t = (d__1 = a[i__ + j * a_dim1], abs(d__1)); + work[i__] += (d__1 = a[i__ + j * a_dim1], abs(d__1)) * s[ + j]; + work[j] += (d__1 = a[i__ + j * a_dim1], abs(d__1)) * s[ + i__]; + } + work[j] += (d__1 = a[j + j * a_dim1], abs(d__1)) * s[j]; + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + work[j] += (d__1 = a[j + j * a_dim1], abs(d__1)) * s[j]; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + t = (d__1 = a[i__ + j * a_dim1], abs(d__1)); + work[i__] += (d__1 = a[i__ + j * a_dim1], abs(d__1)) * s[ + j]; + work[j] += (d__1 = a[i__ + j * a_dim1], abs(d__1)) * s[ + i__]; + } + } + } +/* avg = s^T beta / n */ + avg = 0.; + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + avg += s[i__] * work[i__]; + } + avg /= *n; + std = 0.; + i__1 = *n * 3; + for (i__ = (*n << 1) + 1; i__ <= i__1; ++i__) { + work[i__] = s[i__ - (*n << 1)] * work[i__ - (*n << 1)] - avg; + } + _starpu_dlassq_(n, &work[(*n << 1) + 1], &c__1, &scale, &sumsq); + std = scale * sqrt(sumsq / *n); + if (std < tol * avg) { + goto L999; + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + t = (d__1 = a[i__ + i__ * a_dim1], abs(d__1)); + si = s[i__]; + c2 = (*n - 1) * t; + c1 = (*n - 2) * (work[i__] - t * si); + c0 = -(t * si) * si + work[i__] * 2 * si - *n * avg; + d__ = c1 * c1 - c0 * 4 * c2; + if (d__ <= 0.) { + *info = -1; + return 0; + } + si = c0 * -2 / (c1 + sqrt(d__)); + d__ = si - s[i__]; + u = 0.; + if (up) { + i__2 = i__; + for (j = 1; j <= i__2; ++j) { + t = (d__1 = a[j + i__ * a_dim1], abs(d__1)); + u += s[j] * t; + work[j] += d__ * t; + } + i__2 = *n; + for (j = i__ + 1; j <= i__2; ++j) { + t = (d__1 = a[i__ + j * a_dim1], abs(d__1)); + u += s[j] * t; + work[j] += d__ * t; + } + } else { + i__2 = i__; + for (j = 1; j <= i__2; ++j) { + t = (d__1 = a[i__ + j * a_dim1], abs(d__1)); + u += s[j] * t; + work[j] += d__ * t; + } + i__2 = *n; + for (j = i__ + 1; j <= i__2; ++j) { + t = (d__1 = a[j + i__ * a_dim1], abs(d__1)); + u += s[j] * t; + work[j] += d__ * t; + } + } + avg += (u + work[i__]) * d__ / *n; + s[i__] = si; + } + } +L999: + smlnum = _starpu_dlamch_("SAFEMIN"); + bignum = 1. / smlnum; + smin = bignum; + smax = 0.; + t = 1. / sqrt(avg); + base = _starpu_dlamch_("B"); + u = 1. / log(base); + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = (integer) (u * log(s[i__] * t)); + s[i__] = pow_di(&base, &i__2); +/* Computing MIN */ + d__1 = smin, d__2 = s[i__]; + smin = min(d__1,d__2); +/* Computing MAX */ + d__1 = smax, d__2 = s[i__]; + smax = max(d__1,d__2); + } + *scond = max(smin,smlnum) / min(smax,bignum); + + return 0; +} /* _starpu_dsyequb_ */ diff --git a/min-dgels/base/SRC/dsyev.c b/min-dgels/base/SRC/dsyev.c new file mode 100644 index 0000000..58d44cd --- /dev/null +++ b/min-dgels/base/SRC/dsyev.c @@ -0,0 +1,283 @@ +/* dsyev.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; +static integer c__0 = 0; +static doublereal c_b17 = 1.; + +/* Subroutine */ int _starpu_dsyev_(char *jobz, char *uplo, integer *n, doublereal *a, + integer *lda, doublereal *w, doublereal *work, integer *lwork, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + doublereal d__1; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + integer nb; + doublereal eps; + integer inde; + doublereal anrm; + integer imax; + doublereal rmin, rmax; + extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, + integer *); + doublereal sigma; + extern logical _starpu_lsame_(char *, char *); + integer iinfo; + logical lower, wantz; + extern doublereal _starpu_dlamch_(char *); + integer iscale; + extern /* Subroutine */ int _starpu_dlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *); + doublereal safmin; + extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *); + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + doublereal bignum; + integer indtau; + extern /* Subroutine */ int _starpu_dsterf_(integer *, doublereal *, doublereal *, + integer *); + extern doublereal _starpu_dlansy_(char *, char *, integer *, doublereal *, + integer *, doublereal *); + integer indwrk; + extern /* Subroutine */ int _starpu_dorgtr_(char *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, integer *), _starpu_dsteqr_(char *, integer *, doublereal *, doublereal *, + doublereal *, integer *, doublereal *, integer *), + _starpu_dsytrd_(char *, integer *, doublereal *, integer *, doublereal *, + doublereal *, doublereal *, doublereal *, integer *, integer *); + integer llwork; + doublereal smlnum; + integer lwkopt; + logical lquery; + + +/* -- LAPACK driver routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DSYEV computes all eigenvalues and, optionally, eigenvectors of a */ +/* real symmetric matrix A. */ + +/* Arguments */ +/* ========= */ + +/* JOBZ (input) CHARACTER*1 */ +/* = 'N': Compute eigenvalues only; */ +/* = 'V': Compute eigenvalues and eigenvectors. */ + +/* UPLO (input) CHARACTER*1 */ +/* = 'U': Upper triangle of A is stored; */ +/* = 'L': Lower triangle of A is stored. */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA, N) */ +/* On entry, the symmetric matrix A. If UPLO = 'U', the */ +/* leading N-by-N upper triangular part of A contains the */ +/* upper triangular part of the matrix A. If UPLO = 'L', */ +/* the leading N-by-N lower triangular part of A contains */ +/* the lower triangular part of the matrix A. */ +/* On exit, if JOBZ = 'V', then if INFO = 0, A contains the */ +/* orthonormal eigenvectors of the matrix A. */ +/* If JOBZ = 'N', then on exit the lower triangle (if UPLO='L') */ +/* or the upper triangle (if UPLO='U') of A, including the */ +/* diagonal, is destroyed. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,N). */ + +/* W (output) DOUBLE PRECISION array, dimension (N) */ +/* If INFO = 0, the eigenvalues in ascending order. */ + +/* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ + +/* LWORK (input) INTEGER */ +/* The length of the array WORK. LWORK >= max(1,3*N-1). */ +/* For optimal efficiency, LWORK >= (NB+2)*N, */ +/* where NB is the blocksize for DSYTRD returned by ILAENV. */ + +/* If LWORK = -1, then a workspace query is assumed; the routine */ +/* only calculates the optimal size of the WORK array, returns */ +/* this value as the first entry of the WORK array, and no error */ +/* message related to LWORK is issued by XERBLA. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > 0: if INFO = i, the algorithm failed to converge; i */ +/* off-diagonal elements of an intermediate tridiagonal */ +/* form did not converge to zero. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --w; + --work; + + /* Function Body */ + wantz = _starpu_lsame_(jobz, "V"); + lower = _starpu_lsame_(uplo, "L"); + lquery = *lwork == -1; + + *info = 0; + if (! (wantz || _starpu_lsame_(jobz, "N"))) { + *info = -1; + } else if (! (lower || _starpu_lsame_(uplo, "U"))) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } + + if (*info == 0) { + nb = _starpu_ilaenv_(&c__1, "DSYTRD", uplo, n, &c_n1, &c_n1, &c_n1); +/* Computing MAX */ + i__1 = 1, i__2 = (nb + 2) * *n; + lwkopt = max(i__1,i__2); + work[1] = (doublereal) lwkopt; + +/* Computing MAX */ + i__1 = 1, i__2 = *n * 3 - 1; + if (*lwork < max(i__1,i__2) && ! lquery) { + *info = -8; + } + } + + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DSYEV ", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + if (*n == 1) { + w[1] = a[a_dim1 + 1]; + work[1] = 2.; + if (wantz) { + a[a_dim1 + 1] = 1.; + } + return 0; + } + +/* Get machine constants. */ + + safmin = _starpu_dlamch_("Safe minimum"); + eps = _starpu_dlamch_("Precision"); + smlnum = safmin / eps; + bignum = 1. / smlnum; + rmin = sqrt(smlnum); + rmax = sqrt(bignum); + +/* Scale matrix to allowable range, if necessary. */ + + anrm = _starpu_dlansy_("M", uplo, n, &a[a_offset], lda, &work[1]); + iscale = 0; + if (anrm > 0. && anrm < rmin) { + iscale = 1; + sigma = rmin / anrm; + } else if (anrm > rmax) { + iscale = 1; + sigma = rmax / anrm; + } + if (iscale == 1) { + _starpu_dlascl_(uplo, &c__0, &c__0, &c_b17, &sigma, n, n, &a[a_offset], lda, + info); + } + +/* Call DSYTRD to reduce symmetric matrix to tridiagonal form. */ + + inde = 1; + indtau = inde + *n; + indwrk = indtau + *n; + llwork = *lwork - indwrk + 1; + _starpu_dsytrd_(uplo, n, &a[a_offset], lda, &w[1], &work[inde], &work[indtau], & + work[indwrk], &llwork, &iinfo); + +/* For eigenvalues only, call DSTERF. For eigenvectors, first call */ +/* DORGTR to generate the orthogonal matrix, then call DSTEQR. */ + + if (! wantz) { + _starpu_dsterf_(n, &w[1], &work[inde], info); + } else { + _starpu_dorgtr_(uplo, n, &a[a_offset], lda, &work[indtau], &work[indwrk], & + llwork, &iinfo); + _starpu_dsteqr_(jobz, n, &w[1], &work[inde], &a[a_offset], lda, &work[indtau], + info); + } + +/* If matrix was scaled, then rescale eigenvalues appropriately. */ + + if (iscale == 1) { + if (*info == 0) { + imax = *n; + } else { + imax = *info - 1; + } + d__1 = 1. / sigma; + _starpu_dscal_(&imax, &d__1, &w[1], &c__1); + } + +/* Set WORK(1) to optimal workspace size. */ + + work[1] = (doublereal) lwkopt; + + return 0; + +/* End of DSYEV */ + +} /* _starpu_dsyev_ */ diff --git a/min-dgels/base/SRC/dsyevd.c b/min-dgels/base/SRC/dsyevd.c new file mode 100644 index 0000000..d5ab353 --- /dev/null +++ b/min-dgels/base/SRC/dsyevd.c @@ -0,0 +1,353 @@ +/* dsyevd.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; +static integer c__0 = 0; +static doublereal c_b17 = 1.; + +/* Subroutine */ int _starpu_dsyevd_(char *jobz, char *uplo, integer *n, doublereal * + a, integer *lda, doublereal *w, doublereal *work, integer *lwork, + integer *iwork, integer *liwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + doublereal d__1; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + doublereal eps; + integer inde; + doublereal anrm, rmin, rmax; + integer lopt; + extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, + integer *); + doublereal sigma; + extern logical _starpu_lsame_(char *, char *); + integer iinfo, lwmin, liopt; + logical lower, wantz; + integer indwk2, llwrk2; + extern doublereal _starpu_dlamch_(char *); + integer iscale; + extern /* Subroutine */ int _starpu_dlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *), _starpu_dstedc_(char *, integer *, + doublereal *, doublereal *, doublereal *, integer *, doublereal *, + integer *, integer *, integer *, integer *), _starpu_dlacpy_( + char *, integer *, integer *, doublereal *, integer *, doublereal + *, integer *); + doublereal safmin; + extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *); + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + doublereal bignum; + integer indtau; + extern /* Subroutine */ int _starpu_dsterf_(integer *, doublereal *, doublereal *, + integer *); + extern doublereal _starpu_dlansy_(char *, char *, integer *, doublereal *, + integer *, doublereal *); + integer indwrk, liwmin; + extern /* Subroutine */ int _starpu_dormtr_(char *, char *, char *, integer *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *), _starpu_dsytrd_(char *, integer *, doublereal *, integer *, + doublereal *, doublereal *, doublereal *, doublereal *, integer *, + integer *); + integer llwork; + doublereal smlnum; + logical lquery; + + +/* -- LAPACK driver routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DSYEVD computes all eigenvalues and, optionally, eigenvectors of a */ +/* real symmetric matrix A. If eigenvectors are desired, it uses a */ +/* divide and conquer algorithm. */ + +/* The divide and conquer algorithm makes very mild assumptions about */ +/* floating point arithmetic. It will work on machines with a guard */ +/* digit in add/subtract, or on those binary machines without guard */ +/* digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or */ +/* Cray-2. It could conceivably fail on hexadecimal or decimal machines */ +/* without guard digits, but we know of none. */ + +/* Because of large use of BLAS of level 3, DSYEVD needs N**2 more */ +/* workspace than DSYEVX. */ + +/* Arguments */ +/* ========= */ + +/* JOBZ (input) CHARACTER*1 */ +/* = 'N': Compute eigenvalues only; */ +/* = 'V': Compute eigenvalues and eigenvectors. */ + +/* UPLO (input) CHARACTER*1 */ +/* = 'U': Upper triangle of A is stored; */ +/* = 'L': Lower triangle of A is stored. */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA, N) */ +/* On entry, the symmetric matrix A. If UPLO = 'U', the */ +/* leading N-by-N upper triangular part of A contains the */ +/* upper triangular part of the matrix A. If UPLO = 'L', */ +/* the leading N-by-N lower triangular part of A contains */ +/* the lower triangular part of the matrix A. */ +/* On exit, if JOBZ = 'V', then if INFO = 0, A contains the */ +/* orthonormal eigenvectors of the matrix A. */ +/* If JOBZ = 'N', then on exit the lower triangle (if UPLO='L') */ +/* or the upper triangle (if UPLO='U') of A, including the */ +/* diagonal, is destroyed. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,N). */ + +/* W (output) DOUBLE PRECISION array, dimension (N) */ +/* If INFO = 0, the eigenvalues in ascending order. */ + +/* WORK (workspace/output) DOUBLE PRECISION array, */ +/* dimension (LWORK) */ +/* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ + +/* LWORK (input) INTEGER */ +/* The dimension of the array WORK. */ +/* If N <= 1, LWORK must be at least 1. */ +/* If JOBZ = 'N' and N > 1, LWORK must be at least 2*N+1. */ +/* If JOBZ = 'V' and N > 1, LWORK must be at least */ +/* 1 + 6*N + 2*N**2. */ + +/* If LWORK = -1, then a workspace query is assumed; the routine */ +/* only calculates the optimal sizes of the WORK and IWORK */ +/* arrays, returns these values as the first entries of the WORK */ +/* and IWORK arrays, and no error message related to LWORK or */ +/* LIWORK is issued by XERBLA. */ + +/* IWORK (workspace/output) INTEGER array, dimension (MAX(1,LIWORK)) */ +/* On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. */ + +/* LIWORK (input) INTEGER */ +/* The dimension of the array IWORK. */ +/* If N <= 1, LIWORK must be at least 1. */ +/* If JOBZ = 'N' and N > 1, LIWORK must be at least 1. */ +/* If JOBZ = 'V' and N > 1, LIWORK must be at least 3 + 5*N. */ + +/* If LIWORK = -1, then a workspace query is assumed; the */ +/* routine only calculates the optimal sizes of the WORK and */ +/* IWORK arrays, returns these values as the first entries of */ +/* the WORK and IWORK arrays, and no error message related to */ +/* LWORK or LIWORK is issued by XERBLA. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > 0: if INFO = i and JOBZ = 'N', then the algorithm failed */ +/* to converge; i off-diagonal elements of an intermediate */ +/* tridiagonal form did not converge to zero; */ +/* if INFO = i and JOBZ = 'V', then the algorithm failed */ +/* to compute an eigenvalue while working on the submatrix */ +/* lying in rows and columns INFO/(N+1) through */ +/* mod(INFO,N+1). */ + +/* Further Details */ +/* =============== */ + +/* Based on contributions by */ +/* Jeff Rutter, Computer Science Division, University of California */ +/* at Berkeley, USA */ +/* Modified by Francoise Tisseur, University of Tennessee. */ + +/* Modified description of INFO. Sven, 16 Feb 05. */ +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ + +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --w; + --work; + --iwork; + + /* Function Body */ + wantz = _starpu_lsame_(jobz, "V"); + lower = _starpu_lsame_(uplo, "L"); + lquery = *lwork == -1 || *liwork == -1; + + *info = 0; + if (! (wantz || _starpu_lsame_(jobz, "N"))) { + *info = -1; + } else if (! (lower || _starpu_lsame_(uplo, "U"))) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } + + if (*info == 0) { + if (*n <= 1) { + liwmin = 1; + lwmin = 1; + lopt = lwmin; + liopt = liwmin; + } else { + if (wantz) { + liwmin = *n * 5 + 3; +/* Computing 2nd power */ + i__1 = *n; + lwmin = *n * 6 + 1 + (i__1 * i__1 << 1); + } else { + liwmin = 1; + lwmin = (*n << 1) + 1; + } +/* Computing MAX */ + i__1 = lwmin, i__2 = (*n << 1) + _starpu_ilaenv_(&c__1, "DSYTRD", uplo, n, + &c_n1, &c_n1, &c_n1); + lopt = max(i__1,i__2); + liopt = liwmin; + } + work[1] = (doublereal) lopt; + iwork[1] = liopt; + + if (*lwork < lwmin && ! lquery) { + *info = -8; + } else if (*liwork < liwmin && ! lquery) { + *info = -10; + } + } + + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DSYEVD", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + if (*n == 1) { + w[1] = a[a_dim1 + 1]; + if (wantz) { + a[a_dim1 + 1] = 1.; + } + return 0; + } + +/* Get machine constants. */ + + safmin = _starpu_dlamch_("Safe minimum"); + eps = _starpu_dlamch_("Precision"); + smlnum = safmin / eps; + bignum = 1. / smlnum; + rmin = sqrt(smlnum); + rmax = sqrt(bignum); + +/* Scale matrix to allowable range, if necessary. */ + + anrm = _starpu_dlansy_("M", uplo, n, &a[a_offset], lda, &work[1]); + iscale = 0; + if (anrm > 0. && anrm < rmin) { + iscale = 1; + sigma = rmin / anrm; + } else if (anrm > rmax) { + iscale = 1; + sigma = rmax / anrm; + } + if (iscale == 1) { + _starpu_dlascl_(uplo, &c__0, &c__0, &c_b17, &sigma, n, n, &a[a_offset], lda, + info); + } + +/* Call DSYTRD to reduce symmetric matrix to tridiagonal form. */ + + inde = 1; + indtau = inde + *n; + indwrk = indtau + *n; + llwork = *lwork - indwrk + 1; + indwk2 = indwrk + *n * *n; + llwrk2 = *lwork - indwk2 + 1; + + _starpu_dsytrd_(uplo, n, &a[a_offset], lda, &w[1], &work[inde], &work[indtau], & + work[indwrk], &llwork, &iinfo); + lopt = (integer) ((*n << 1) + work[indwrk]); + +/* For eigenvalues only, call DSTERF. For eigenvectors, first call */ +/* DSTEDC to generate the eigenvector matrix, WORK(INDWRK), of the */ +/* tridiagonal matrix, then call DORMTR to multiply it by the */ +/* Householder transformations stored in A. */ + + if (! wantz) { + _starpu_dsterf_(n, &w[1], &work[inde], info); + } else { + _starpu_dstedc_("I", n, &w[1], &work[inde], &work[indwrk], n, &work[indwk2], & + llwrk2, &iwork[1], liwork, info); + _starpu_dormtr_("L", uplo, "N", n, n, &a[a_offset], lda, &work[indtau], &work[ + indwrk], n, &work[indwk2], &llwrk2, &iinfo); + _starpu_dlacpy_("A", n, n, &work[indwrk], n, &a[a_offset], lda); +/* Computing MAX */ +/* Computing 2nd power */ + i__3 = *n; + i__1 = lopt, i__2 = *n * 6 + 1 + (i__3 * i__3 << 1); + lopt = max(i__1,i__2); + } + +/* If matrix was scaled, then rescale eigenvalues appropriately. */ + + if (iscale == 1) { + d__1 = 1. / sigma; + _starpu_dscal_(n, &d__1, &w[1], &c__1); + } + + work[1] = (doublereal) lopt; + iwork[1] = liopt; + + return 0; + +/* End of DSYEVD */ + +} /* _starpu_dsyevd_ */ diff --git a/min-dgels/base/SRC/dsyevr.c b/min-dgels/base/SRC/dsyevr.c new file mode 100644 index 0000000..ef59c38 --- /dev/null +++ b/min-dgels/base/SRC/dsyevr.c @@ -0,0 +1,652 @@ +/* dsyevr.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__10 = 10; +static integer c__1 = 1; +static integer c__2 = 2; +static integer c__3 = 3; +static integer c__4 = 4; +static integer c_n1 = -1; + +/* Subroutine */ int _starpu_dsyevr_(char *jobz, char *range, char *uplo, integer *n, + doublereal *a, integer *lda, doublereal *vl, doublereal *vu, integer * + il, integer *iu, doublereal *abstol, integer *m, doublereal *w, + doublereal *z__, integer *ldz, integer *isuppz, doublereal *work, + integer *lwork, integer *iwork, integer *liwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, z_dim1, z_offset, i__1, i__2; + doublereal d__1, d__2; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + integer i__, j, nb, jj; + doublereal eps, vll, vuu, tmp1; + integer indd, inde; + doublereal anrm; + integer imax; + doublereal rmin, rmax; + integer inddd, indee; + extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, + integer *); + doublereal sigma; + extern logical _starpu_lsame_(char *, char *); + integer iinfo; + char order[1]; + integer indwk; + extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *), _starpu_dswap_(integer *, doublereal *, integer + *, doublereal *, integer *); + integer lwmin; + logical lower, wantz; + extern doublereal _starpu_dlamch_(char *); + logical alleig, indeig; + integer iscale, ieeeok, indibl, indifl; + logical valeig; + doublereal safmin; + extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *); + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + doublereal abstll, bignum; + integer indtau, indisp; + extern /* Subroutine */ int _starpu_dstein_(integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *, doublereal *, + integer *, doublereal *, integer *, integer *, integer *), + _starpu_dsterf_(integer *, doublereal *, doublereal *, integer *); + integer indiwo, indwkn; + extern doublereal _starpu_dlansy_(char *, char *, integer *, doublereal *, + integer *, doublereal *); + extern /* Subroutine */ int _starpu_dstebz_(char *, char *, integer *, doublereal + *, doublereal *, integer *, integer *, doublereal *, doublereal *, + doublereal *, integer *, integer *, doublereal *, integer *, + integer *, doublereal *, integer *, integer *), + _starpu_dstemr_(char *, char *, integer *, doublereal *, doublereal *, + doublereal *, doublereal *, integer *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, integer *, + logical *, doublereal *, integer *, integer *, integer *, integer + *); + integer liwmin; + logical tryrac; + extern /* Subroutine */ int _starpu_dormtr_(char *, char *, char *, integer *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *); + integer llwrkn, llwork, nsplit; + doublereal smlnum; + extern /* Subroutine */ int _starpu_dsytrd_(char *, integer *, doublereal *, + integer *, doublereal *, doublereal *, doublereal *, doublereal *, + integer *, integer *); + integer lwkopt; + logical lquery; + + +/* -- LAPACK driver routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DSYEVR computes selected eigenvalues and, optionally, eigenvectors */ +/* of a real symmetric matrix A. Eigenvalues and eigenvectors can be */ +/* selected by specifying either a range of values or a range of */ +/* indices for the desired eigenvalues. */ + +/* DSYEVR first reduces the matrix A to tridiagonal form T with a call */ +/* to DSYTRD. Then, whenever possible, DSYEVR calls DSTEMR to compute */ +/* the eigenspectrum using Relatively Robust Representations. DSTEMR */ +/* computes eigenvalues by the dqds algorithm, while orthogonal */ +/* eigenvectors are computed from various "good" L D L^T representations */ +/* (also known as Relatively Robust Representations). Gram-Schmidt */ +/* orthogonalization is avoided as far as possible. More specifically, */ +/* the various steps of the algorithm are as follows. */ + +/* For each unreduced block (submatrix) of T, */ +/* (a) Compute T - sigma I = L D L^T, so that L and D */ +/* define all the wanted eigenvalues to high relative accuracy. */ +/* This means that small relative changes in the entries of D and L */ +/* cause only small relative changes in the eigenvalues and */ +/* eigenvectors. The standard (unfactored) representation of the */ +/* tridiagonal matrix T does not have this property in general. */ +/* (b) Compute the eigenvalues to suitable accuracy. */ +/* If the eigenvectors are desired, the algorithm attains full */ +/* accuracy of the computed eigenvalues only right before */ +/* the corresponding vectors have to be computed, see steps c) and d). */ +/* (c) For each cluster of close eigenvalues, select a new */ +/* shift close to the cluster, find a new factorization, and refine */ +/* the shifted eigenvalues to suitable accuracy. */ +/* (d) For each eigenvalue with a large enough relative separation compute */ +/* the corresponding eigenvector by forming a rank revealing twisted */ +/* factorization. Go back to (c) for any clusters that remain. */ + +/* The desired accuracy of the output can be specified by the input */ +/* parameter ABSTOL. */ + +/* For more details, see DSTEMR's documentation and: */ +/* - Inderjit S. Dhillon and Beresford N. Parlett: "Multiple representations */ +/* to compute orthogonal eigenvectors of symmetric tridiagonal matrices," */ +/* Linear Algebra and its Applications, 387(1), pp. 1-28, August 2004. */ +/* - Inderjit Dhillon and Beresford Parlett: "Orthogonal Eigenvectors and */ +/* Relative Gaps," SIAM Journal on Matrix Analysis and Applications, Vol. 25, */ +/* 2004. Also LAPACK Working Note 154. */ +/* - Inderjit Dhillon: "A new O(n^2) algorithm for the symmetric */ +/* tridiagonal eigenvalue/eigenvector problem", */ +/* Computer Science Division Technical Report No. UCB/CSD-97-971, */ +/* UC Berkeley, May 1997. */ + + +/* Note 1 : DSYEVR calls DSTEMR when the full spectrum is requested */ +/* on machines which conform to the ieee-754 floating point standard. */ +/* DSYEVR calls DSTEBZ and SSTEIN on non-ieee machines and */ +/* when partial spectrum requests are made. */ + +/* Normal execution of DSTEMR may create NaNs and infinities and */ +/* hence may abort due to a floating point exception in environments */ +/* which do not handle NaNs and infinities in the ieee standard default */ +/* manner. */ + +/* Arguments */ +/* ========= */ + +/* JOBZ (input) CHARACTER*1 */ +/* = 'N': Compute eigenvalues only; */ +/* = 'V': Compute eigenvalues and eigenvectors. */ + +/* RANGE (input) CHARACTER*1 */ +/* = 'A': all eigenvalues will be found. */ +/* = 'V': all eigenvalues in the half-open interval (VL,VU] */ +/* will be found. */ +/* = 'I': the IL-th through IU-th eigenvalues will be found. */ +/* ********* For RANGE = 'V' or 'I' and IU - IL < N - 1, DSTEBZ and */ +/* ********* DSTEIN are called */ + +/* UPLO (input) CHARACTER*1 */ +/* = 'U': Upper triangle of A is stored; */ +/* = 'L': Lower triangle of A is stored. */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA, N) */ +/* On entry, the symmetric matrix A. If UPLO = 'U', the */ +/* leading N-by-N upper triangular part of A contains the */ +/* upper triangular part of the matrix A. If UPLO = 'L', */ +/* the leading N-by-N lower triangular part of A contains */ +/* the lower triangular part of the matrix A. */ +/* On exit, the lower triangle (if UPLO='L') or the upper */ +/* triangle (if UPLO='U') of A, including the diagonal, is */ +/* destroyed. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,N). */ + +/* VL (input) DOUBLE PRECISION */ +/* VU (input) DOUBLE PRECISION */ +/* If RANGE='V', the lower and upper bounds of the interval to */ +/* be searched for eigenvalues. VL < VU. */ +/* Not referenced if RANGE = 'A' or 'I'. */ + +/* IL (input) INTEGER */ +/* IU (input) INTEGER */ +/* If RANGE='I', the indices (in ascending order) of the */ +/* smallest and largest eigenvalues to be returned. */ +/* 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. */ +/* Not referenced if RANGE = 'A' or 'V'. */ + +/* ABSTOL (input) DOUBLE PRECISION */ +/* The absolute error tolerance for the eigenvalues. */ +/* An approximate eigenvalue is accepted as converged */ +/* when it is determined to lie in an interval [a,b] */ +/* of width less than or equal to */ + +/* ABSTOL + EPS * max( |a|,|b| ) , */ + +/* where EPS is the machine precision. If ABSTOL is less than */ +/* or equal to zero, then EPS*|T| will be used in its place, */ +/* where |T| is the 1-norm of the tridiagonal matrix obtained */ +/* by reducing A to tridiagonal form. */ + +/* See "Computing Small Singular Values of Bidiagonal Matrices */ +/* with Guaranteed High Relative Accuracy," by Demmel and */ +/* Kahan, LAPACK Working Note #3. */ + +/* If high relative accuracy is important, set ABSTOL to */ +/* DLAMCH( 'Safe minimum' ). Doing so will guarantee that */ +/* eigenvalues are computed to high relative accuracy when */ +/* possible in future releases. The current code does not */ +/* make any guarantees about high relative accuracy, but */ +/* future releases will. See J. Barlow and J. Demmel, */ +/* "Computing Accurate Eigensystems of Scaled Diagonally */ +/* Dominant Matrices", LAPACK Working Note #7, for a discussion */ +/* of which matrices define their eigenvalues to high relative */ +/* accuracy. */ + +/* M (output) INTEGER */ +/* The total number of eigenvalues found. 0 <= M <= N. */ +/* If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. */ + +/* W (output) DOUBLE PRECISION array, dimension (N) */ +/* The first M elements contain the selected eigenvalues in */ +/* ascending order. */ + +/* Z (output) DOUBLE PRECISION array, dimension (LDZ, max(1,M)) */ +/* If JOBZ = 'V', then if INFO = 0, the first M columns of Z */ +/* contain the orthonormal eigenvectors of the matrix A */ +/* corresponding to the selected eigenvalues, with the i-th */ +/* column of Z holding the eigenvector associated with W(i). */ +/* If JOBZ = 'N', then Z is not referenced. */ +/* Note: the user must ensure that at least max(1,M) columns are */ +/* supplied in the array Z; if RANGE = 'V', the exact value of M */ +/* is not known in advance and an upper bound must be used. */ +/* Supplying N columns is always safe. */ + +/* LDZ (input) INTEGER */ +/* The leading dimension of the array Z. LDZ >= 1, and if */ +/* JOBZ = 'V', LDZ >= max(1,N). */ + +/* ISUPPZ (output) INTEGER array, dimension ( 2*max(1,M) ) */ +/* The support of the eigenvectors in Z, i.e., the indices */ +/* indicating the nonzero elements in Z. The i-th eigenvector */ +/* is nonzero only in elements ISUPPZ( 2*i-1 ) through */ +/* ISUPPZ( 2*i ). */ +/* ********* Implemented only for RANGE = 'A' or 'I' and IU - IL = N - 1 */ + +/* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ + +/* LWORK (input) INTEGER */ +/* The dimension of the array WORK. LWORK >= max(1,26*N). */ +/* For optimal efficiency, LWORK >= (NB+6)*N, */ +/* where NB is the max of the blocksize for DSYTRD and DORMTR */ +/* returned by ILAENV. */ + +/* If LWORK = -1, then a workspace query is assumed; the routine */ +/* only calculates the optimal size of the WORK array, returns */ +/* this value as the first entry of the WORK array, and no error */ +/* message related to LWORK is issued by XERBLA. */ + +/* IWORK (workspace/output) INTEGER array, dimension (MAX(1,LIWORK)) */ +/* On exit, if INFO = 0, IWORK(1) returns the optimal LWORK. */ + +/* LIWORK (input) INTEGER */ +/* The dimension of the array IWORK. LIWORK >= max(1,10*N). */ + +/* If LIWORK = -1, then a workspace query is assumed; the */ +/* routine only calculates the optimal size of the IWORK array, */ +/* returns this value as the first entry of the IWORK array, and */ +/* no error message related to LIWORK is issued by XERBLA. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > 0: Internal error */ + +/* Further Details */ +/* =============== */ + +/* Based on contributions by */ +/* Inderjit Dhillon, IBM Almaden, USA */ +/* Osni Marques, LBNL/NERSC, USA */ +/* Ken Stanley, Computer Science Division, University of */ +/* California at Berkeley, USA */ +/* Jason Riedy, Computer Science Division, University of */ +/* California at Berkeley, USA */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --w; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --isuppz; + --work; + --iwork; + + /* Function Body */ + ieeeok = _starpu_ilaenv_(&c__10, "DSYEVR", "N", &c__1, &c__2, &c__3, &c__4); + + lower = _starpu_lsame_(uplo, "L"); + wantz = _starpu_lsame_(jobz, "V"); + alleig = _starpu_lsame_(range, "A"); + valeig = _starpu_lsame_(range, "V"); + indeig = _starpu_lsame_(range, "I"); + + lquery = *lwork == -1 || *liwork == -1; + +/* Computing MAX */ + i__1 = 1, i__2 = *n * 26; + lwmin = max(i__1,i__2); +/* Computing MAX */ + i__1 = 1, i__2 = *n * 10; + liwmin = max(i__1,i__2); + + *info = 0; + if (! (wantz || _starpu_lsame_(jobz, "N"))) { + *info = -1; + } else if (! (alleig || valeig || indeig)) { + *info = -2; + } else if (! (lower || _starpu_lsame_(uplo, "U"))) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*lda < max(1,*n)) { + *info = -6; + } else { + if (valeig) { + if (*n > 0 && *vu <= *vl) { + *info = -8; + } + } else if (indeig) { + if (*il < 1 || *il > max(1,*n)) { + *info = -9; + } else if (*iu < min(*n,*il) || *iu > *n) { + *info = -10; + } + } + } + if (*info == 0) { + if (*ldz < 1 || wantz && *ldz < *n) { + *info = -15; + } else if (*lwork < lwmin && ! lquery) { + *info = -18; + } else if (*liwork < liwmin && ! lquery) { + *info = -20; + } + } + + if (*info == 0) { + nb = _starpu_ilaenv_(&c__1, "DSYTRD", uplo, n, &c_n1, &c_n1, &c_n1); +/* Computing MAX */ + i__1 = nb, i__2 = _starpu_ilaenv_(&c__1, "DORMTR", uplo, n, &c_n1, &c_n1, & + c_n1); + nb = max(i__1,i__2); +/* Computing MAX */ + i__1 = (nb + 1) * *n; + lwkopt = max(i__1,lwmin); + work[1] = (doublereal) lwkopt; + iwork[1] = liwmin; + } + + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DSYEVR", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + *m = 0; + if (*n == 0) { + work[1] = 1.; + return 0; + } + + if (*n == 1) { + work[1] = 7.; + if (alleig || indeig) { + *m = 1; + w[1] = a[a_dim1 + 1]; + } else { + if (*vl < a[a_dim1 + 1] && *vu >= a[a_dim1 + 1]) { + *m = 1; + w[1] = a[a_dim1 + 1]; + } + } + if (wantz) { + z__[z_dim1 + 1] = 1.; + } + return 0; + } + +/* Get machine constants. */ + + safmin = _starpu_dlamch_("Safe minimum"); + eps = _starpu_dlamch_("Precision"); + smlnum = safmin / eps; + bignum = 1. / smlnum; + rmin = sqrt(smlnum); +/* Computing MIN */ + d__1 = sqrt(bignum), d__2 = 1. / sqrt(sqrt(safmin)); + rmax = min(d__1,d__2); + +/* Scale matrix to allowable range, if necessary. */ + + iscale = 0; + abstll = *abstol; + vll = *vl; + vuu = *vu; + anrm = _starpu_dlansy_("M", uplo, n, &a[a_offset], lda, &work[1]); + if (anrm > 0. && anrm < rmin) { + iscale = 1; + sigma = rmin / anrm; + } else if (anrm > rmax) { + iscale = 1; + sigma = rmax / anrm; + } + if (iscale == 1) { + if (lower) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *n - j + 1; + _starpu_dscal_(&i__2, &sigma, &a[j + j * a_dim1], &c__1); +/* L10: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + _starpu_dscal_(&j, &sigma, &a[j * a_dim1 + 1], &c__1); +/* L20: */ + } + } + if (*abstol > 0.) { + abstll = *abstol * sigma; + } + if (valeig) { + vll = *vl * sigma; + vuu = *vu * sigma; + } + } +/* Initialize indices into workspaces. Note: The IWORK indices are */ +/* used only if DSTERF or DSTEMR fail. */ +/* WORK(INDTAU:INDTAU+N-1) stores the scalar factors of the */ +/* elementary reflectors used in DSYTRD. */ + indtau = 1; +/* WORK(INDD:INDD+N-1) stores the tridiagonal's diagonal entries. */ + indd = indtau + *n; +/* WORK(INDE:INDE+N-1) stores the off-diagonal entries of the */ +/* tridiagonal matrix from DSYTRD. */ + inde = indd + *n; +/* WORK(INDDD:INDDD+N-1) is a copy of the diagonal entries over */ +/* -written by DSTEMR (the DSTERF path copies the diagonal to W). */ + inddd = inde + *n; +/* WORK(INDEE:INDEE+N-1) is a copy of the off-diagonal entries over */ +/* -written while computing the eigenvalues in DSTERF and DSTEMR. */ + indee = inddd + *n; +/* INDWK is the starting offset of the left-over workspace, and */ +/* LLWORK is the remaining workspace size. */ + indwk = indee + *n; + llwork = *lwork - indwk + 1; +/* IWORK(INDIBL:INDIBL+M-1) corresponds to IBLOCK in DSTEBZ and */ +/* stores the block indices of each of the M<=N eigenvalues. */ + indibl = 1; +/* IWORK(INDISP:INDISP+NSPLIT-1) corresponds to ISPLIT in DSTEBZ and */ +/* stores the starting and finishing indices of each block. */ + indisp = indibl + *n; +/* IWORK(INDIFL:INDIFL+N-1) stores the indices of eigenvectors */ +/* that corresponding to eigenvectors that fail to converge in */ +/* DSTEIN. This information is discarded; if any fail, the driver */ +/* returns INFO > 0. */ + indifl = indisp + *n; +/* INDIWO is the offset of the remaining integer workspace. */ + indiwo = indisp + *n; + +/* Call DSYTRD to reduce symmetric matrix to tridiagonal form. */ + + _starpu_dsytrd_(uplo, n, &a[a_offset], lda, &work[indd], &work[inde], &work[ + indtau], &work[indwk], &llwork, &iinfo); + +/* If all eigenvalues are desired */ +/* then call DSTERF or DSTEMR and DORMTR. */ + + if ((alleig || indeig && *il == 1 && *iu == *n) && ieeeok == 1) { + if (! wantz) { + _starpu_dcopy_(n, &work[indd], &c__1, &w[1], &c__1); + i__1 = *n - 1; + _starpu_dcopy_(&i__1, &work[inde], &c__1, &work[indee], &c__1); + _starpu_dsterf_(n, &w[1], &work[indee], info); + } else { + i__1 = *n - 1; + _starpu_dcopy_(&i__1, &work[inde], &c__1, &work[indee], &c__1); + _starpu_dcopy_(n, &work[indd], &c__1, &work[inddd], &c__1); + + if (*abstol <= *n * 2. * eps) { + tryrac = TRUE_; + } else { + tryrac = FALSE_; + } + _starpu_dstemr_(jobz, "A", n, &work[inddd], &work[indee], vl, vu, il, iu, + m, &w[1], &z__[z_offset], ldz, n, &isuppz[1], &tryrac, & + work[indwk], lwork, &iwork[1], liwork, info); + + + +/* Apply orthogonal matrix used in reduction to tridiagonal */ +/* form to eigenvectors returned by DSTEIN. */ + + if (wantz && *info == 0) { + indwkn = inde; + llwrkn = *lwork - indwkn + 1; + _starpu_dormtr_("L", uplo, "N", n, m, &a[a_offset], lda, &work[indtau] +, &z__[z_offset], ldz, &work[indwkn], &llwrkn, &iinfo); + } + } + + + if (*info == 0) { +/* Everything worked. Skip DSTEBZ/DSTEIN. IWORK(:) are */ +/* undefined. */ + *m = *n; + goto L30; + } + *info = 0; + } + +/* Otherwise, call DSTEBZ and, if eigenvectors are desired, DSTEIN. */ +/* Also call DSTEBZ and DSTEIN if DSTEMR fails. */ + + if (wantz) { + *(unsigned char *)order = 'B'; + } else { + *(unsigned char *)order = 'E'; + } + _starpu_dstebz_(range, order, n, &vll, &vuu, il, iu, &abstll, &work[indd], &work[ + inde], m, &nsplit, &w[1], &iwork[indibl], &iwork[indisp], &work[ + indwk], &iwork[indiwo], info); + + if (wantz) { + _starpu_dstein_(n, &work[indd], &work[inde], m, &w[1], &iwork[indibl], &iwork[ + indisp], &z__[z_offset], ldz, &work[indwk], &iwork[indiwo], & + iwork[indifl], info); + +/* Apply orthogonal matrix used in reduction to tridiagonal */ +/* form to eigenvectors returned by DSTEIN. */ + + indwkn = inde; + llwrkn = *lwork - indwkn + 1; + _starpu_dormtr_("L", uplo, "N", n, m, &a[a_offset], lda, &work[indtau], &z__[ + z_offset], ldz, &work[indwkn], &llwrkn, &iinfo); + } + +/* If matrix was scaled, then rescale eigenvalues appropriately. */ + +/* Jump here if DSTEMR/DSTEIN succeeded. */ +L30: + if (iscale == 1) { + if (*info == 0) { + imax = *m; + } else { + imax = *info - 1; + } + d__1 = 1. / sigma; + _starpu_dscal_(&imax, &d__1, &w[1], &c__1); + } + +/* If eigenvalues are not in order, then sort them, along with */ +/* eigenvectors. Note: We do not sort the IFAIL portion of IWORK. */ +/* It may not be initialized (if DSTEMR/DSTEIN succeeded), and we do */ +/* not return this detailed information to the user. */ + + if (wantz) { + i__1 = *m - 1; + for (j = 1; j <= i__1; ++j) { + i__ = 0; + tmp1 = w[j]; + i__2 = *m; + for (jj = j + 1; jj <= i__2; ++jj) { + if (w[jj] < tmp1) { + i__ = jj; + tmp1 = w[jj]; + } +/* L40: */ + } + + if (i__ != 0) { + w[i__] = w[j]; + w[j] = tmp1; + _starpu_dswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[j * z_dim1 + 1], + &c__1); + } +/* L50: */ + } + } + +/* Set WORK(1) to optimal workspace size. */ + + work[1] = (doublereal) lwkopt; + iwork[1] = liwmin; + + return 0; + +/* End of DSYEVR */ + +} /* _starpu_dsyevr_ */ diff --git a/min-dgels/base/SRC/dsyevx.c b/min-dgels/base/SRC/dsyevx.c new file mode 100644 index 0000000..a746a6c --- /dev/null +++ b/min-dgels/base/SRC/dsyevx.c @@ -0,0 +1,536 @@ +/* dsyevx.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; + +/* Subroutine */ int _starpu_dsyevx_(char *jobz, char *range, char *uplo, integer *n, + doublereal *a, integer *lda, doublereal *vl, doublereal *vu, integer * + il, integer *iu, doublereal *abstol, integer *m, doublereal *w, + doublereal *z__, integer *ldz, doublereal *work, integer *lwork, + integer *iwork, integer *ifail, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, z_dim1, z_offset, i__1, i__2; + doublereal d__1, d__2; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + integer i__, j, nb, jj; + doublereal eps, vll, vuu, tmp1; + integer indd, inde; + doublereal anrm; + integer imax; + doublereal rmin, rmax; + logical test; + integer itmp1, indee; + extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, + integer *); + doublereal sigma; + extern logical _starpu_lsame_(char *, char *); + integer iinfo; + char order[1]; + extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *), _starpu_dswap_(integer *, doublereal *, integer + *, doublereal *, integer *); + logical lower, wantz; + extern doublereal _starpu_dlamch_(char *); + logical alleig, indeig; + integer iscale, indibl; + logical valeig; + extern /* Subroutine */ int _starpu_dlacpy_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *); + doublereal safmin; + extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *); + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + doublereal abstll, bignum; + integer indtau, indisp; + extern /* Subroutine */ int _starpu_dstein_(integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *, doublereal *, + integer *, doublereal *, integer *, integer *, integer *), + _starpu_dsterf_(integer *, doublereal *, doublereal *, integer *); + integer indiwo, indwkn; + extern doublereal _starpu_dlansy_(char *, char *, integer *, doublereal *, + integer *, doublereal *); + extern /* Subroutine */ int _starpu_dstebz_(char *, char *, integer *, doublereal + *, doublereal *, integer *, integer *, doublereal *, doublereal *, + doublereal *, integer *, integer *, doublereal *, integer *, + integer *, doublereal *, integer *, integer *); + integer indwrk, lwkmin; + extern /* Subroutine */ int _starpu_dorgtr_(char *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, integer *), _starpu_dsteqr_(char *, integer *, doublereal *, doublereal *, + doublereal *, integer *, doublereal *, integer *), + _starpu_dormtr_(char *, char *, char *, integer *, integer *, doublereal * +, integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, integer *); + integer llwrkn, llwork, nsplit; + doublereal smlnum; + extern /* Subroutine */ int _starpu_dsytrd_(char *, integer *, doublereal *, + integer *, doublereal *, doublereal *, doublereal *, doublereal *, + integer *, integer *); + integer lwkopt; + logical lquery; + + +/* -- LAPACK driver routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DSYEVX computes selected eigenvalues and, optionally, eigenvectors */ +/* of a real symmetric matrix A. Eigenvalues and eigenvectors can be */ +/* selected by specifying either a range of values or a range of indices */ +/* for the desired eigenvalues. */ + +/* Arguments */ +/* ========= */ + +/* JOBZ (input) CHARACTER*1 */ +/* = 'N': Compute eigenvalues only; */ +/* = 'V': Compute eigenvalues and eigenvectors. */ + +/* RANGE (input) CHARACTER*1 */ +/* = 'A': all eigenvalues will be found. */ +/* = 'V': all eigenvalues in the half-open interval (VL,VU] */ +/* will be found. */ +/* = 'I': the IL-th through IU-th eigenvalues will be found. */ + +/* UPLO (input) CHARACTER*1 */ +/* = 'U': Upper triangle of A is stored; */ +/* = 'L': Lower triangle of A is stored. */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA, N) */ +/* On entry, the symmetric matrix A. If UPLO = 'U', the */ +/* leading N-by-N upper triangular part of A contains the */ +/* upper triangular part of the matrix A. If UPLO = 'L', */ +/* the leading N-by-N lower triangular part of A contains */ +/* the lower triangular part of the matrix A. */ +/* On exit, the lower triangle (if UPLO='L') or the upper */ +/* triangle (if UPLO='U') of A, including the diagonal, is */ +/* destroyed. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,N). */ + +/* VL (input) DOUBLE PRECISION */ +/* VU (input) DOUBLE PRECISION */ +/* If RANGE='V', the lower and upper bounds of the interval to */ +/* be searched for eigenvalues. VL < VU. */ +/* Not referenced if RANGE = 'A' or 'I'. */ + +/* IL (input) INTEGER */ +/* IU (input) INTEGER */ +/* If RANGE='I', the indices (in ascending order) of the */ +/* smallest and largest eigenvalues to be returned. */ +/* 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. */ +/* Not referenced if RANGE = 'A' or 'V'. */ + +/* ABSTOL (input) DOUBLE PRECISION */ +/* The absolute error tolerance for the eigenvalues. */ +/* An approximate eigenvalue is accepted as converged */ +/* when it is determined to lie in an interval [a,b] */ +/* of width less than or equal to */ + +/* ABSTOL + EPS * max( |a|,|b| ) , */ + +/* where EPS is the machine precision. If ABSTOL is less than */ +/* or equal to zero, then EPS*|T| will be used in its place, */ +/* where |T| is the 1-norm of the tridiagonal matrix obtained */ +/* by reducing A to tridiagonal form. */ + +/* Eigenvalues will be computed most accurately when ABSTOL is */ +/* set to twice the underflow threshold 2*DLAMCH('S'), not zero. */ +/* If this routine returns with INFO>0, indicating that some */ +/* eigenvectors did not converge, try setting ABSTOL to */ +/* 2*DLAMCH('S'). */ + +/* See "Computing Small Singular Values of Bidiagonal Matrices */ +/* with Guaranteed High Relative Accuracy," by Demmel and */ +/* Kahan, LAPACK Working Note #3. */ + +/* M (output) INTEGER */ +/* The total number of eigenvalues found. 0 <= M <= N. */ +/* If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. */ + +/* W (output) DOUBLE PRECISION array, dimension (N) */ +/* On normal exit, the first M elements contain the selected */ +/* eigenvalues in ascending order. */ + +/* Z (output) DOUBLE PRECISION array, dimension (LDZ, max(1,M)) */ +/* If JOBZ = 'V', then if INFO = 0, the first M columns of Z */ +/* contain the orthonormal eigenvectors of the matrix A */ +/* corresponding to the selected eigenvalues, with the i-th */ +/* column of Z holding the eigenvector associated with W(i). */ +/* If an eigenvector fails to converge, then that column of Z */ +/* contains the latest approximation to the eigenvector, and the */ +/* index of the eigenvector is returned in IFAIL. */ +/* If JOBZ = 'N', then Z is not referenced. */ +/* Note: the user must ensure that at least max(1,M) columns are */ +/* supplied in the array Z; if RANGE = 'V', the exact value of M */ +/* is not known in advance and an upper bound must be used. */ + +/* LDZ (input) INTEGER */ +/* The leading dimension of the array Z. LDZ >= 1, and if */ +/* JOBZ = 'V', LDZ >= max(1,N). */ + +/* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ + +/* LWORK (input) INTEGER */ +/* The length of the array WORK. LWORK >= 1, when N <= 1; */ +/* otherwise 8*N. */ +/* For optimal efficiency, LWORK >= (NB+3)*N, */ +/* where NB is the max of the blocksize for DSYTRD and DORMTR */ +/* returned by ILAENV. */ + +/* If LWORK = -1, then a workspace query is assumed; the routine */ +/* only calculates the optimal size of the WORK array, returns */ +/* this value as the first entry of the WORK array, and no error */ +/* message related to LWORK is issued by XERBLA. */ + +/* IWORK (workspace) INTEGER array, dimension (5*N) */ + +/* IFAIL (output) INTEGER array, dimension (N) */ +/* If JOBZ = 'V', then if INFO = 0, the first M elements of */ +/* IFAIL are zero. If INFO > 0, then IFAIL contains the */ +/* indices of the eigenvectors that failed to converge. */ +/* If JOBZ = 'N', then IFAIL is not referenced. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > 0: if INFO = i, then i eigenvectors failed to converge. */ +/* Their indices are stored in array IFAIL. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --w; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --work; + --iwork; + --ifail; + + /* Function Body */ + lower = _starpu_lsame_(uplo, "L"); + wantz = _starpu_lsame_(jobz, "V"); + alleig = _starpu_lsame_(range, "A"); + valeig = _starpu_lsame_(range, "V"); + indeig = _starpu_lsame_(range, "I"); + lquery = *lwork == -1; + + *info = 0; + if (! (wantz || _starpu_lsame_(jobz, "N"))) { + *info = -1; + } else if (! (alleig || valeig || indeig)) { + *info = -2; + } else if (! (lower || _starpu_lsame_(uplo, "U"))) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*lda < max(1,*n)) { + *info = -6; + } else { + if (valeig) { + if (*n > 0 && *vu <= *vl) { + *info = -8; + } + } else if (indeig) { + if (*il < 1 || *il > max(1,*n)) { + *info = -9; + } else if (*iu < min(*n,*il) || *iu > *n) { + *info = -10; + } + } + } + if (*info == 0) { + if (*ldz < 1 || wantz && *ldz < *n) { + *info = -15; + } + } + + if (*info == 0) { + if (*n <= 1) { + lwkmin = 1; + work[1] = (doublereal) lwkmin; + } else { + lwkmin = *n << 3; + nb = _starpu_ilaenv_(&c__1, "DSYTRD", uplo, n, &c_n1, &c_n1, &c_n1); +/* Computing MAX */ + i__1 = nb, i__2 = _starpu_ilaenv_(&c__1, "DORMTR", uplo, n, &c_n1, &c_n1, + &c_n1); + nb = max(i__1,i__2); +/* Computing MAX */ + i__1 = lwkmin, i__2 = (nb + 3) * *n; + lwkopt = max(i__1,i__2); + work[1] = (doublereal) lwkopt; + } + + if (*lwork < lwkmin && ! lquery) { + *info = -17; + } + } + + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DSYEVX", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + *m = 0; + if (*n == 0) { + return 0; + } + + if (*n == 1) { + if (alleig || indeig) { + *m = 1; + w[1] = a[a_dim1 + 1]; + } else { + if (*vl < a[a_dim1 + 1] && *vu >= a[a_dim1 + 1]) { + *m = 1; + w[1] = a[a_dim1 + 1]; + } + } + if (wantz) { + z__[z_dim1 + 1] = 1.; + } + return 0; + } + +/* Get machine constants. */ + + safmin = _starpu_dlamch_("Safe minimum"); + eps = _starpu_dlamch_("Precision"); + smlnum = safmin / eps; + bignum = 1. / smlnum; + rmin = sqrt(smlnum); +/* Computing MIN */ + d__1 = sqrt(bignum), d__2 = 1. / sqrt(sqrt(safmin)); + rmax = min(d__1,d__2); + +/* Scale matrix to allowable range, if necessary. */ + + iscale = 0; + abstll = *abstol; + if (valeig) { + vll = *vl; + vuu = *vu; + } + anrm = _starpu_dlansy_("M", uplo, n, &a[a_offset], lda, &work[1]); + if (anrm > 0. && anrm < rmin) { + iscale = 1; + sigma = rmin / anrm; + } else if (anrm > rmax) { + iscale = 1; + sigma = rmax / anrm; + } + if (iscale == 1) { + if (lower) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *n - j + 1; + _starpu_dscal_(&i__2, &sigma, &a[j + j * a_dim1], &c__1); +/* L10: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + _starpu_dscal_(&j, &sigma, &a[j * a_dim1 + 1], &c__1); +/* L20: */ + } + } + if (*abstol > 0.) { + abstll = *abstol * sigma; + } + if (valeig) { + vll = *vl * sigma; + vuu = *vu * sigma; + } + } + +/* Call DSYTRD to reduce symmetric matrix to tridiagonal form. */ + + indtau = 1; + inde = indtau + *n; + indd = inde + *n; + indwrk = indd + *n; + llwork = *lwork - indwrk + 1; + _starpu_dsytrd_(uplo, n, &a[a_offset], lda, &work[indd], &work[inde], &work[ + indtau], &work[indwrk], &llwork, &iinfo); + +/* If all eigenvalues are desired and ABSTOL is less than or equal to */ +/* zero, then call DSTERF or DORGTR and SSTEQR. If this fails for */ +/* some eigenvalue, then try DSTEBZ. */ + + test = FALSE_; + if (indeig) { + if (*il == 1 && *iu == *n) { + test = TRUE_; + } + } + if ((alleig || test) && *abstol <= 0.) { + _starpu_dcopy_(n, &work[indd], &c__1, &w[1], &c__1); + indee = indwrk + (*n << 1); + if (! wantz) { + i__1 = *n - 1; + _starpu_dcopy_(&i__1, &work[inde], &c__1, &work[indee], &c__1); + _starpu_dsterf_(n, &w[1], &work[indee], info); + } else { + _starpu_dlacpy_("A", n, n, &a[a_offset], lda, &z__[z_offset], ldz); + _starpu_dorgtr_(uplo, n, &z__[z_offset], ldz, &work[indtau], &work[indwrk] +, &llwork, &iinfo); + i__1 = *n - 1; + _starpu_dcopy_(&i__1, &work[inde], &c__1, &work[indee], &c__1); + _starpu_dsteqr_(jobz, n, &w[1], &work[indee], &z__[z_offset], ldz, &work[ + indwrk], info); + if (*info == 0) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + ifail[i__] = 0; +/* L30: */ + } + } + } + if (*info == 0) { + *m = *n; + goto L40; + } + *info = 0; + } + +/* Otherwise, call DSTEBZ and, if eigenvectors are desired, SSTEIN. */ + + if (wantz) { + *(unsigned char *)order = 'B'; + } else { + *(unsigned char *)order = 'E'; + } + indibl = 1; + indisp = indibl + *n; + indiwo = indisp + *n; + _starpu_dstebz_(range, order, n, &vll, &vuu, il, iu, &abstll, &work[indd], &work[ + inde], m, &nsplit, &w[1], &iwork[indibl], &iwork[indisp], &work[ + indwrk], &iwork[indiwo], info); + + if (wantz) { + _starpu_dstein_(n, &work[indd], &work[inde], m, &w[1], &iwork[indibl], &iwork[ + indisp], &z__[z_offset], ldz, &work[indwrk], &iwork[indiwo], & + ifail[1], info); + +/* Apply orthogonal matrix used in reduction to tridiagonal */ +/* form to eigenvectors returned by DSTEIN. */ + + indwkn = inde; + llwrkn = *lwork - indwkn + 1; + _starpu_dormtr_("L", uplo, "N", n, m, &a[a_offset], lda, &work[indtau], &z__[ + z_offset], ldz, &work[indwkn], &llwrkn, &iinfo); + } + +/* If matrix was scaled, then rescale eigenvalues appropriately. */ + +L40: + if (iscale == 1) { + if (*info == 0) { + imax = *m; + } else { + imax = *info - 1; + } + d__1 = 1. / sigma; + _starpu_dscal_(&imax, &d__1, &w[1], &c__1); + } + +/* If eigenvalues are not in order, then sort them, along with */ +/* eigenvectors. */ + + if (wantz) { + i__1 = *m - 1; + for (j = 1; j <= i__1; ++j) { + i__ = 0; + tmp1 = w[j]; + i__2 = *m; + for (jj = j + 1; jj <= i__2; ++jj) { + if (w[jj] < tmp1) { + i__ = jj; + tmp1 = w[jj]; + } +/* L50: */ + } + + if (i__ != 0) { + itmp1 = iwork[indibl + i__ - 1]; + w[i__] = w[j]; + iwork[indibl + i__ - 1] = iwork[indibl + j - 1]; + w[j] = tmp1; + iwork[indibl + j - 1] = itmp1; + _starpu_dswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[j * z_dim1 + 1], + &c__1); + if (*info != 0) { + itmp1 = ifail[i__]; + ifail[i__] = ifail[j]; + ifail[j] = itmp1; + } + } +/* L60: */ + } + } + +/* Set WORK(1) to optimal workspace size. */ + + work[1] = (doublereal) lwkopt; + + return 0; + +/* End of DSYEVX */ + +} /* _starpu_dsyevx_ */ diff --git a/min-dgels/base/SRC/dsygs2.c b/min-dgels/base/SRC/dsygs2.c new file mode 100644 index 0000000..f56493b --- /dev/null +++ b/min-dgels/base/SRC/dsygs2.c @@ -0,0 +1,299 @@ +/* dsygs2.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static doublereal c_b6 = -1.; +static integer c__1 = 1; +static doublereal c_b27 = 1.; + +/* Subroutine */ int _starpu_dsygs2_(integer *itype, char *uplo, integer *n, + doublereal *a, integer *lda, doublereal *b, integer *ldb, integer * + info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2; + doublereal d__1; + + /* Local variables */ + integer k; + doublereal ct, akk, bkk; + extern /* Subroutine */ int _starpu_dsyr2_(char *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *), _starpu_dscal_(integer *, doublereal *, doublereal *, + integer *); + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int _starpu_daxpy_(integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *); + logical upper; + extern /* Subroutine */ int _starpu_dtrmv_(char *, char *, char *, integer *, + doublereal *, integer *, doublereal *, integer *), _starpu_dtrsv_(char *, char *, char *, integer *, doublereal *, + integer *, doublereal *, integer *), + _starpu_xerbla_(char *, integer *); + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DSYGS2 reduces a real symmetric-definite generalized eigenproblem */ +/* to standard form. */ + +/* If ITYPE = 1, the problem is A*x = lambda*B*x, */ +/* and A is overwritten by inv(U')*A*inv(U) or inv(L)*A*inv(L') */ + +/* If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or */ +/* B*A*x = lambda*x, and A is overwritten by U*A*U` or L'*A*L. */ + +/* B must have been previously factorized as U'*U or L*L' by DPOTRF. */ + +/* Arguments */ +/* ========= */ + +/* ITYPE (input) INTEGER */ +/* = 1: compute inv(U')*A*inv(U) or inv(L)*A*inv(L'); */ +/* = 2 or 3: compute U*A*U' or L'*A*L. */ + +/* UPLO (input) CHARACTER*1 */ +/* Specifies whether the upper or lower triangular part of the */ +/* symmetric matrix A is stored, and how B has been factorized. */ +/* = 'U': Upper triangular */ +/* = 'L': Lower triangular */ + +/* N (input) INTEGER */ +/* The order of the matrices A and B. N >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the symmetric matrix A. If UPLO = 'U', the leading */ +/* n by n upper triangular part of A contains the upper */ +/* triangular part of the matrix A, and the strictly lower */ +/* triangular part of A is not referenced. If UPLO = 'L', the */ +/* leading n by n lower triangular part of A contains the lower */ +/* triangular part of the matrix A, and the strictly upper */ +/* triangular part of A is not referenced. */ + +/* On exit, if INFO = 0, the transformed matrix, stored in the */ +/* same format as A. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,N). */ + +/* B (input) DOUBLE PRECISION array, dimension (LDB,N) */ +/* The triangular factor from the Cholesky factorization of B, */ +/* as returned by DPOTRF. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the array B. LDB >= max(1,N). */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit. */ +/* < 0: if INFO = -i, the i-th argument had an illegal value. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + *info = 0; + upper = _starpu_lsame_(uplo, "U"); + if (*itype < 1 || *itype > 3) { + *info = -1; + } else if (! upper && ! _starpu_lsame_(uplo, "L")) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } else if (*ldb < max(1,*n)) { + *info = -7; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DSYGS2", &i__1); + return 0; + } + + if (*itype == 1) { + if (upper) { + +/* Compute inv(U')*A*inv(U) */ + + i__1 = *n; + for (k = 1; k <= i__1; ++k) { + +/* Update the upper triangle of A(k:n,k:n) */ + + akk = a[k + k * a_dim1]; + bkk = b[k + k * b_dim1]; +/* Computing 2nd power */ + d__1 = bkk; + akk /= d__1 * d__1; + a[k + k * a_dim1] = akk; + if (k < *n) { + i__2 = *n - k; + d__1 = 1. / bkk; + _starpu_dscal_(&i__2, &d__1, &a[k + (k + 1) * a_dim1], lda); + ct = akk * -.5; + i__2 = *n - k; + _starpu_daxpy_(&i__2, &ct, &b[k + (k + 1) * b_dim1], ldb, &a[k + ( + k + 1) * a_dim1], lda); + i__2 = *n - k; + _starpu_dsyr2_(uplo, &i__2, &c_b6, &a[k + (k + 1) * a_dim1], lda, + &b[k + (k + 1) * b_dim1], ldb, &a[k + 1 + (k + 1) + * a_dim1], lda); + i__2 = *n - k; + _starpu_daxpy_(&i__2, &ct, &b[k + (k + 1) * b_dim1], ldb, &a[k + ( + k + 1) * a_dim1], lda); + i__2 = *n - k; + _starpu_dtrsv_(uplo, "Transpose", "Non-unit", &i__2, &b[k + 1 + ( + k + 1) * b_dim1], ldb, &a[k + (k + 1) * a_dim1], + lda); + } +/* L10: */ + } + } else { + +/* Compute inv(L)*A*inv(L') */ + + i__1 = *n; + for (k = 1; k <= i__1; ++k) { + +/* Update the lower triangle of A(k:n,k:n) */ + + akk = a[k + k * a_dim1]; + bkk = b[k + k * b_dim1]; +/* Computing 2nd power */ + d__1 = bkk; + akk /= d__1 * d__1; + a[k + k * a_dim1] = akk; + if (k < *n) { + i__2 = *n - k; + d__1 = 1. / bkk; + _starpu_dscal_(&i__2, &d__1, &a[k + 1 + k * a_dim1], &c__1); + ct = akk * -.5; + i__2 = *n - k; + _starpu_daxpy_(&i__2, &ct, &b[k + 1 + k * b_dim1], &c__1, &a[k + + 1 + k * a_dim1], &c__1); + i__2 = *n - k; + _starpu_dsyr2_(uplo, &i__2, &c_b6, &a[k + 1 + k * a_dim1], &c__1, + &b[k + 1 + k * b_dim1], &c__1, &a[k + 1 + (k + 1) + * a_dim1], lda); + i__2 = *n - k; + _starpu_daxpy_(&i__2, &ct, &b[k + 1 + k * b_dim1], &c__1, &a[k + + 1 + k * a_dim1], &c__1); + i__2 = *n - k; + _starpu_dtrsv_(uplo, "No transpose", "Non-unit", &i__2, &b[k + 1 + + (k + 1) * b_dim1], ldb, &a[k + 1 + k * a_dim1], + &c__1); + } +/* L20: */ + } + } + } else { + if (upper) { + +/* Compute U*A*U' */ + + i__1 = *n; + for (k = 1; k <= i__1; ++k) { + +/* Update the upper triangle of A(1:k,1:k) */ + + akk = a[k + k * a_dim1]; + bkk = b[k + k * b_dim1]; + i__2 = k - 1; + _starpu_dtrmv_(uplo, "No transpose", "Non-unit", &i__2, &b[b_offset], + ldb, &a[k * a_dim1 + 1], &c__1); + ct = akk * .5; + i__2 = k - 1; + _starpu_daxpy_(&i__2, &ct, &b[k * b_dim1 + 1], &c__1, &a[k * a_dim1 + + 1], &c__1); + i__2 = k - 1; + _starpu_dsyr2_(uplo, &i__2, &c_b27, &a[k * a_dim1 + 1], &c__1, &b[k * + b_dim1 + 1], &c__1, &a[a_offset], lda); + i__2 = k - 1; + _starpu_daxpy_(&i__2, &ct, &b[k * b_dim1 + 1], &c__1, &a[k * a_dim1 + + 1], &c__1); + i__2 = k - 1; + _starpu_dscal_(&i__2, &bkk, &a[k * a_dim1 + 1], &c__1); +/* Computing 2nd power */ + d__1 = bkk; + a[k + k * a_dim1] = akk * (d__1 * d__1); +/* L30: */ + } + } else { + +/* Compute L'*A*L */ + + i__1 = *n; + for (k = 1; k <= i__1; ++k) { + +/* Update the lower triangle of A(1:k,1:k) */ + + akk = a[k + k * a_dim1]; + bkk = b[k + k * b_dim1]; + i__2 = k - 1; + _starpu_dtrmv_(uplo, "Transpose", "Non-unit", &i__2, &b[b_offset], + ldb, &a[k + a_dim1], lda); + ct = akk * .5; + i__2 = k - 1; + _starpu_daxpy_(&i__2, &ct, &b[k + b_dim1], ldb, &a[k + a_dim1], lda); + i__2 = k - 1; + _starpu_dsyr2_(uplo, &i__2, &c_b27, &a[k + a_dim1], lda, &b[k + + b_dim1], ldb, &a[a_offset], lda); + i__2 = k - 1; + _starpu_daxpy_(&i__2, &ct, &b[k + b_dim1], ldb, &a[k + a_dim1], lda); + i__2 = k - 1; + _starpu_dscal_(&i__2, &bkk, &a[k + a_dim1], lda); +/* Computing 2nd power */ + d__1 = bkk; + a[k + k * a_dim1] = akk * (d__1 * d__1); +/* L40: */ + } + } + } + return 0; + +/* End of DSYGS2 */ + +} /* _starpu_dsygs2_ */ diff --git a/min-dgels/base/SRC/dsygst.c b/min-dgels/base/SRC/dsygst.c new file mode 100644 index 0000000..48020a4 --- /dev/null +++ b/min-dgels/base/SRC/dsygst.c @@ -0,0 +1,347 @@ +/* dsygst.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; +static doublereal c_b14 = 1.; +static doublereal c_b16 = -.5; +static doublereal c_b19 = -1.; +static doublereal c_b52 = .5; + +/* Subroutine */ int _starpu_dsygst_(integer *itype, char *uplo, integer *n, + doublereal *a, integer *lda, doublereal *b, integer *ldb, integer * + info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3; + + /* Local variables */ + integer k, kb, nb; + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int _starpu_dtrmm_(char *, char *, char *, char *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *), _starpu_dsymm_( + char *, char *, integer *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + integer *); + logical upper; + extern /* Subroutine */ int _starpu_dtrsm_(char *, char *, char *, char *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *), _starpu_dsygs2_( + integer *, char *, integer *, doublereal *, integer *, doublereal + *, integer *, integer *), _starpu_dsyr2k_(char *, char *, integer + *, integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *) + , _starpu_xerbla_(char *, integer *); + extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *); + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DSYGST reduces a real symmetric-definite generalized eigenproblem */ +/* to standard form. */ + +/* If ITYPE = 1, the problem is A*x = lambda*B*x, */ +/* and A is overwritten by inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T) */ + +/* If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or */ +/* B*A*x = lambda*x, and A is overwritten by U*A*U**T or L**T*A*L. */ + +/* B must have been previously factorized as U**T*U or L*L**T by DPOTRF. */ + +/* Arguments */ +/* ========= */ + +/* ITYPE (input) INTEGER */ +/* = 1: compute inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T); */ +/* = 2 or 3: compute U*A*U**T or L**T*A*L. */ + +/* UPLO (input) CHARACTER*1 */ +/* = 'U': Upper triangle of A is stored and B is factored as */ +/* U**T*U; */ +/* = 'L': Lower triangle of A is stored and B is factored as */ +/* L*L**T. */ + +/* N (input) INTEGER */ +/* The order of the matrices A and B. N >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the symmetric matrix A. If UPLO = 'U', the leading */ +/* N-by-N upper triangular part of A contains the upper */ +/* triangular part of the matrix A, and the strictly lower */ +/* triangular part of A is not referenced. If UPLO = 'L', the */ +/* leading N-by-N lower triangular part of A contains the lower */ +/* triangular part of the matrix A, and the strictly upper */ +/* triangular part of A is not referenced. */ + +/* On exit, if INFO = 0, the transformed matrix, stored in the */ +/* same format as A. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,N). */ + +/* B (input) DOUBLE PRECISION array, dimension (LDB,N) */ +/* The triangular factor from the Cholesky factorization of B, */ +/* as returned by DPOTRF. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the array B. LDB >= max(1,N). */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + *info = 0; + upper = _starpu_lsame_(uplo, "U"); + if (*itype < 1 || *itype > 3) { + *info = -1; + } else if (! upper && ! _starpu_lsame_(uplo, "L")) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } else if (*ldb < max(1,*n)) { + *info = -7; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DSYGST", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Determine the block size for this environment. */ + + nb = _starpu_ilaenv_(&c__1, "DSYGST", uplo, n, &c_n1, &c_n1, &c_n1); + + if (nb <= 1 || nb >= *n) { + +/* Use unblocked code */ + + _starpu_dsygs2_(itype, uplo, n, &a[a_offset], lda, &b[b_offset], ldb, info); + } else { + +/* Use blocked code */ + + if (*itype == 1) { + if (upper) { + +/* Compute inv(U')*A*inv(U) */ + + i__1 = *n; + i__2 = nb; + for (k = 1; i__2 < 0 ? k >= i__1 : k <= i__1; k += i__2) { +/* Computing MIN */ + i__3 = *n - k + 1; + kb = min(i__3,nb); + +/* Update the upper triangle of A(k:n,k:n) */ + + _starpu_dsygs2_(itype, uplo, &kb, &a[k + k * a_dim1], lda, &b[k + + k * b_dim1], ldb, info); + if (k + kb <= *n) { + i__3 = *n - k - kb + 1; + _starpu_dtrsm_("Left", uplo, "Transpose", "Non-unit", &kb, & + i__3, &c_b14, &b[k + k * b_dim1], ldb, &a[k + + (k + kb) * a_dim1], lda); + i__3 = *n - k - kb + 1; + _starpu_dsymm_("Left", uplo, &kb, &i__3, &c_b16, &a[k + k * + a_dim1], lda, &b[k + (k + kb) * b_dim1], ldb, + &c_b14, &a[k + (k + kb) * a_dim1], lda); + i__3 = *n - k - kb + 1; + _starpu_dsyr2k_(uplo, "Transpose", &i__3, &kb, &c_b19, &a[k + + (k + kb) * a_dim1], lda, &b[k + (k + kb) * + b_dim1], ldb, &c_b14, &a[k + kb + (k + kb) * + a_dim1], lda); + i__3 = *n - k - kb + 1; + _starpu_dsymm_("Left", uplo, &kb, &i__3, &c_b16, &a[k + k * + a_dim1], lda, &b[k + (k + kb) * b_dim1], ldb, + &c_b14, &a[k + (k + kb) * a_dim1], lda); + i__3 = *n - k - kb + 1; + _starpu_dtrsm_("Right", uplo, "No transpose", "Non-unit", &kb, + &i__3, &c_b14, &b[k + kb + (k + kb) * b_dim1] +, ldb, &a[k + (k + kb) * a_dim1], lda); + } +/* L10: */ + } + } else { + +/* Compute inv(L)*A*inv(L') */ + + i__2 = *n; + i__1 = nb; + for (k = 1; i__1 < 0 ? k >= i__2 : k <= i__2; k += i__1) { +/* Computing MIN */ + i__3 = *n - k + 1; + kb = min(i__3,nb); + +/* Update the lower triangle of A(k:n,k:n) */ + + _starpu_dsygs2_(itype, uplo, &kb, &a[k + k * a_dim1], lda, &b[k + + k * b_dim1], ldb, info); + if (k + kb <= *n) { + i__3 = *n - k - kb + 1; + _starpu_dtrsm_("Right", uplo, "Transpose", "Non-unit", &i__3, + &kb, &c_b14, &b[k + k * b_dim1], ldb, &a[k + + kb + k * a_dim1], lda); + i__3 = *n - k - kb + 1; + _starpu_dsymm_("Right", uplo, &i__3, &kb, &c_b16, &a[k + k * + a_dim1], lda, &b[k + kb + k * b_dim1], ldb, & + c_b14, &a[k + kb + k * a_dim1], lda); + i__3 = *n - k - kb + 1; + _starpu_dsyr2k_(uplo, "No transpose", &i__3, &kb, &c_b19, &a[ + k + kb + k * a_dim1], lda, &b[k + kb + k * + b_dim1], ldb, &c_b14, &a[k + kb + (k + kb) * + a_dim1], lda); + i__3 = *n - k - kb + 1; + _starpu_dsymm_("Right", uplo, &i__3, &kb, &c_b16, &a[k + k * + a_dim1], lda, &b[k + kb + k * b_dim1], ldb, & + c_b14, &a[k + kb + k * a_dim1], lda); + i__3 = *n - k - kb + 1; + _starpu_dtrsm_("Left", uplo, "No transpose", "Non-unit", & + i__3, &kb, &c_b14, &b[k + kb + (k + kb) * + b_dim1], ldb, &a[k + kb + k * a_dim1], lda); + } +/* L20: */ + } + } + } else { + if (upper) { + +/* Compute U*A*U' */ + + i__1 = *n; + i__2 = nb; + for (k = 1; i__2 < 0 ? k >= i__1 : k <= i__1; k += i__2) { +/* Computing MIN */ + i__3 = *n - k + 1; + kb = min(i__3,nb); + +/* Update the upper triangle of A(1:k+kb-1,1:k+kb-1) */ + + i__3 = k - 1; + _starpu_dtrmm_("Left", uplo, "No transpose", "Non-unit", &i__3, & + kb, &c_b14, &b[b_offset], ldb, &a[k * a_dim1 + 1], + lda) + ; + i__3 = k - 1; + _starpu_dsymm_("Right", uplo, &i__3, &kb, &c_b52, &a[k + k * + a_dim1], lda, &b[k * b_dim1 + 1], ldb, &c_b14, &a[ + k * a_dim1 + 1], lda); + i__3 = k - 1; + _starpu_dsyr2k_(uplo, "No transpose", &i__3, &kb, &c_b14, &a[k * + a_dim1 + 1], lda, &b[k * b_dim1 + 1], ldb, &c_b14, + &a[a_offset], lda); + i__3 = k - 1; + _starpu_dsymm_("Right", uplo, &i__3, &kb, &c_b52, &a[k + k * + a_dim1], lda, &b[k * b_dim1 + 1], ldb, &c_b14, &a[ + k * a_dim1 + 1], lda); + i__3 = k - 1; + _starpu_dtrmm_("Right", uplo, "Transpose", "Non-unit", &i__3, &kb, + &c_b14, &b[k + k * b_dim1], ldb, &a[k * a_dim1 + + 1], lda); + _starpu_dsygs2_(itype, uplo, &kb, &a[k + k * a_dim1], lda, &b[k + + k * b_dim1], ldb, info); +/* L30: */ + } + } else { + +/* Compute L'*A*L */ + + i__2 = *n; + i__1 = nb; + for (k = 1; i__1 < 0 ? k >= i__2 : k <= i__2; k += i__1) { +/* Computing MIN */ + i__3 = *n - k + 1; + kb = min(i__3,nb); + +/* Update the lower triangle of A(1:k+kb-1,1:k+kb-1) */ + + i__3 = k - 1; + _starpu_dtrmm_("Right", uplo, "No transpose", "Non-unit", &kb, & + i__3, &c_b14, &b[b_offset], ldb, &a[k + a_dim1], + lda); + i__3 = k - 1; + _starpu_dsymm_("Left", uplo, &kb, &i__3, &c_b52, &a[k + k * + a_dim1], lda, &b[k + b_dim1], ldb, &c_b14, &a[k + + a_dim1], lda); + i__3 = k - 1; + _starpu_dsyr2k_(uplo, "Transpose", &i__3, &kb, &c_b14, &a[k + + a_dim1], lda, &b[k + b_dim1], ldb, &c_b14, &a[ + a_offset], lda); + i__3 = k - 1; + _starpu_dsymm_("Left", uplo, &kb, &i__3, &c_b52, &a[k + k * + a_dim1], lda, &b[k + b_dim1], ldb, &c_b14, &a[k + + a_dim1], lda); + i__3 = k - 1; + _starpu_dtrmm_("Left", uplo, "Transpose", "Non-unit", &kb, &i__3, + &c_b14, &b[k + k * b_dim1], ldb, &a[k + a_dim1], + lda); + _starpu_dsygs2_(itype, uplo, &kb, &a[k + k * a_dim1], lda, &b[k + + k * b_dim1], ldb, info); +/* L40: */ + } + } + } + } + return 0; + +/* End of DSYGST */ + +} /* _starpu_dsygst_ */ diff --git a/min-dgels/base/SRC/dsygv.c b/min-dgels/base/SRC/dsygv.c new file mode 100644 index 0000000..eeed548 --- /dev/null +++ b/min-dgels/base/SRC/dsygv.c @@ -0,0 +1,285 @@ +/* dsygv.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; +static doublereal c_b16 = 1.; + +/* Subroutine */ int _starpu_dsygv_(integer *itype, char *jobz, char *uplo, integer * + n, doublereal *a, integer *lda, doublereal *b, integer *ldb, + doublereal *w, doublereal *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2; + + /* Local variables */ + integer nb, neig; + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int _starpu_dtrmm_(char *, char *, char *, char *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *); + char trans[1]; + extern /* Subroutine */ int _starpu_dtrsm_(char *, char *, char *, char *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *); + logical upper; + extern /* Subroutine */ int _starpu_dsyev_(char *, char *, integer *, doublereal * +, integer *, doublereal *, doublereal *, integer *, integer *); + logical wantz; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *); + extern /* Subroutine */ int _starpu_dpotrf_(char *, integer *, doublereal *, + integer *, integer *); + integer lwkmin; + extern /* Subroutine */ int _starpu_dsygst_(integer *, char *, integer *, + doublereal *, integer *, doublereal *, integer *, integer *); + integer lwkopt; + logical lquery; + + +/* -- LAPACK driver routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DSYGV computes all the eigenvalues, and optionally, the eigenvectors */ +/* of a real generalized symmetric-definite eigenproblem, of the form */ +/* A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. */ +/* Here A and B are assumed to be symmetric and B is also */ +/* positive definite. */ + +/* Arguments */ +/* ========= */ + +/* ITYPE (input) INTEGER */ +/* Specifies the problem type to be solved: */ +/* = 1: A*x = (lambda)*B*x */ +/* = 2: A*B*x = (lambda)*x */ +/* = 3: B*A*x = (lambda)*x */ + +/* JOBZ (input) CHARACTER*1 */ +/* = 'N': Compute eigenvalues only; */ +/* = 'V': Compute eigenvalues and eigenvectors. */ + +/* UPLO (input) CHARACTER*1 */ +/* = 'U': Upper triangles of A and B are stored; */ +/* = 'L': Lower triangles of A and B are stored. */ + +/* N (input) INTEGER */ +/* The order of the matrices A and B. N >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA, N) */ +/* On entry, the symmetric matrix A. If UPLO = 'U', the */ +/* leading N-by-N upper triangular part of A contains the */ +/* upper triangular part of the matrix A. If UPLO = 'L', */ +/* the leading N-by-N lower triangular part of A contains */ +/* the lower triangular part of the matrix A. */ + +/* On exit, if JOBZ = 'V', then if INFO = 0, A contains the */ +/* matrix Z of eigenvectors. The eigenvectors are normalized */ +/* as follows: */ +/* if ITYPE = 1 or 2, Z**T*B*Z = I; */ +/* if ITYPE = 3, Z**T*inv(B)*Z = I. */ +/* If JOBZ = 'N', then on exit the upper triangle (if UPLO='U') */ +/* or the lower triangle (if UPLO='L') of A, including the */ +/* diagonal, is destroyed. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,N). */ + +/* B (input/output) DOUBLE PRECISION array, dimension (LDB, N) */ +/* On entry, the symmetric positive definite matrix B. */ +/* If UPLO = 'U', the leading N-by-N upper triangular part of B */ +/* contains the upper triangular part of the matrix B. */ +/* If UPLO = 'L', the leading N-by-N lower triangular part of B */ +/* contains the lower triangular part of the matrix B. */ + +/* On exit, if INFO <= N, the part of B containing the matrix is */ +/* overwritten by the triangular factor U or L from the Cholesky */ +/* factorization B = U**T*U or B = L*L**T. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the array B. LDB >= max(1,N). */ + +/* W (output) DOUBLE PRECISION array, dimension (N) */ +/* If INFO = 0, the eigenvalues in ascending order. */ + +/* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ + +/* LWORK (input) INTEGER */ +/* The length of the array WORK. LWORK >= max(1,3*N-1). */ +/* For optimal efficiency, LWORK >= (NB+2)*N, */ +/* where NB is the blocksize for DSYTRD returned by ILAENV. */ + +/* If LWORK = -1, then a workspace query is assumed; the routine */ +/* only calculates the optimal size of the WORK array, returns */ +/* this value as the first entry of the WORK array, and no error */ +/* message related to LWORK is issued by XERBLA. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > 0: DPOTRF or DSYEV returned an error code: */ +/* <= N: if INFO = i, DSYEV failed to converge; */ +/* i off-diagonal elements of an intermediate */ +/* tridiagonal form did not converge to zero; */ +/* > N: if INFO = N + i, for 1 <= i <= N, then the leading */ +/* minor of order i of B is not positive definite. */ +/* The factorization of B could not be completed and */ +/* no eigenvalues or eigenvectors were computed. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + --w; + --work; + + /* Function Body */ + wantz = _starpu_lsame_(jobz, "V"); + upper = _starpu_lsame_(uplo, "U"); + lquery = *lwork == -1; + + *info = 0; + if (*itype < 1 || *itype > 3) { + *info = -1; + } else if (! (wantz || _starpu_lsame_(jobz, "N"))) { + *info = -2; + } else if (! (upper || _starpu_lsame_(uplo, "L"))) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*lda < max(1,*n)) { + *info = -6; + } else if (*ldb < max(1,*n)) { + *info = -8; + } + + if (*info == 0) { +/* Computing MAX */ + i__1 = 1, i__2 = *n * 3 - 1; + lwkmin = max(i__1,i__2); + nb = _starpu_ilaenv_(&c__1, "DSYTRD", uplo, n, &c_n1, &c_n1, &c_n1); +/* Computing MAX */ + i__1 = lwkmin, i__2 = (nb + 2) * *n; + lwkopt = max(i__1,i__2); + work[1] = (doublereal) lwkopt; + + if (*lwork < lwkmin && ! lquery) { + *info = -11; + } + } + + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DSYGV ", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Form a Cholesky factorization of B. */ + + _starpu_dpotrf_(uplo, n, &b[b_offset], ldb, info); + if (*info != 0) { + *info = *n + *info; + return 0; + } + +/* Transform problem to standard eigenvalue problem and solve. */ + + _starpu_dsygst_(itype, uplo, n, &a[a_offset], lda, &b[b_offset], ldb, info); + _starpu_dsyev_(jobz, uplo, n, &a[a_offset], lda, &w[1], &work[1], lwork, info); + + if (wantz) { + +/* Backtransform eigenvectors to the original problem. */ + + neig = *n; + if (*info > 0) { + neig = *info - 1; + } + if (*itype == 1 || *itype == 2) { + +/* For A*x=(lambda)*B*x and A*B*x=(lambda)*x; */ +/* backtransform eigenvectors: x = inv(L)'*y or inv(U)*y */ + + if (upper) { + *(unsigned char *)trans = 'N'; + } else { + *(unsigned char *)trans = 'T'; + } + + _starpu_dtrsm_("Left", uplo, trans, "Non-unit", n, &neig, &c_b16, &b[ + b_offset], ldb, &a[a_offset], lda); + + } else if (*itype == 3) { + +/* For B*A*x=(lambda)*x; */ +/* backtransform eigenvectors: x = L*y or U'*y */ + + if (upper) { + *(unsigned char *)trans = 'T'; + } else { + *(unsigned char *)trans = 'N'; + } + + _starpu_dtrmm_("Left", uplo, trans, "Non-unit", n, &neig, &c_b16, &b[ + b_offset], ldb, &a[a_offset], lda); + } + } + + work[1] = (doublereal) lwkopt; + return 0; + +/* End of DSYGV */ + +} /* _starpu_dsygv_ */ diff --git a/min-dgels/base/SRC/dsygvd.c b/min-dgels/base/SRC/dsygvd.c new file mode 100644 index 0000000..a73fdc8 --- /dev/null +++ b/min-dgels/base/SRC/dsygvd.c @@ -0,0 +1,338 @@ +/* dsygvd.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static doublereal c_b11 = 1.; + +/* Subroutine */ int _starpu_dsygvd_(integer *itype, char *jobz, char *uplo, integer * + n, doublereal *a, integer *lda, doublereal *b, integer *ldb, + doublereal *w, doublereal *work, integer *lwork, integer *iwork, + integer *liwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1; + doublereal d__1, d__2; + + /* Local variables */ + integer lopt; + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int _starpu_dtrmm_(char *, char *, char *, char *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *); + integer lwmin; + char trans[1]; + integer liopt; + extern /* Subroutine */ int _starpu_dtrsm_(char *, char *, char *, char *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *); + logical upper, wantz; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *), _starpu_dpotrf_( + char *, integer *, doublereal *, integer *, integer *); + integer liwmin; + extern /* Subroutine */ int _starpu_dsyevd_(char *, char *, integer *, doublereal + *, integer *, doublereal *, doublereal *, integer *, integer *, + integer *, integer *), _starpu_dsygst_(integer *, char *, + integer *, doublereal *, integer *, doublereal *, integer *, + integer *); + logical lquery; + + +/* -- LAPACK driver routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DSYGVD computes all the eigenvalues, and optionally, the eigenvectors */ +/* of a real generalized symmetric-definite eigenproblem, of the form */ +/* A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. Here A and */ +/* B are assumed to be symmetric and B is also positive definite. */ +/* If eigenvectors are desired, it uses a divide and conquer algorithm. */ + +/* The divide and conquer algorithm makes very mild assumptions about */ +/* floating point arithmetic. It will work on machines with a guard */ +/* digit in add/subtract, or on those binary machines without guard */ +/* digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or */ +/* Cray-2. It could conceivably fail on hexadecimal or decimal machines */ +/* without guard digits, but we know of none. */ + +/* Arguments */ +/* ========= */ + +/* ITYPE (input) INTEGER */ +/* Specifies the problem type to be solved: */ +/* = 1: A*x = (lambda)*B*x */ +/* = 2: A*B*x = (lambda)*x */ +/* = 3: B*A*x = (lambda)*x */ + +/* JOBZ (input) CHARACTER*1 */ +/* = 'N': Compute eigenvalues only; */ +/* = 'V': Compute eigenvalues and eigenvectors. */ + +/* UPLO (input) CHARACTER*1 */ +/* = 'U': Upper triangles of A and B are stored; */ +/* = 'L': Lower triangles of A and B are stored. */ + +/* N (input) INTEGER */ +/* The order of the matrices A and B. N >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA, N) */ +/* On entry, the symmetric matrix A. If UPLO = 'U', the */ +/* leading N-by-N upper triangular part of A contains the */ +/* upper triangular part of the matrix A. If UPLO = 'L', */ +/* the leading N-by-N lower triangular part of A contains */ +/* the lower triangular part of the matrix A. */ + +/* On exit, if JOBZ = 'V', then if INFO = 0, A contains the */ +/* matrix Z of eigenvectors. The eigenvectors are normalized */ +/* as follows: */ +/* if ITYPE = 1 or 2, Z**T*B*Z = I; */ +/* if ITYPE = 3, Z**T*inv(B)*Z = I. */ +/* If JOBZ = 'N', then on exit the upper triangle (if UPLO='U') */ +/* or the lower triangle (if UPLO='L') of A, including the */ +/* diagonal, is destroyed. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,N). */ + +/* B (input/output) DOUBLE PRECISION array, dimension (LDB, N) */ +/* On entry, the symmetric matrix B. If UPLO = 'U', the */ +/* leading N-by-N upper triangular part of B contains the */ +/* upper triangular part of the matrix B. If UPLO = 'L', */ +/* the leading N-by-N lower triangular part of B contains */ +/* the lower triangular part of the matrix B. */ + +/* On exit, if INFO <= N, the part of B containing the matrix is */ +/* overwritten by the triangular factor U or L from the Cholesky */ +/* factorization B = U**T*U or B = L*L**T. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the array B. LDB >= max(1,N). */ + +/* W (output) DOUBLE PRECISION array, dimension (N) */ +/* If INFO = 0, the eigenvalues in ascending order. */ + +/* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ + +/* LWORK (input) INTEGER */ +/* The dimension of the array WORK. */ +/* If N <= 1, LWORK >= 1. */ +/* If JOBZ = 'N' and N > 1, LWORK >= 2*N+1. */ +/* If JOBZ = 'V' and N > 1, LWORK >= 1 + 6*N + 2*N**2. */ + +/* If LWORK = -1, then a workspace query is assumed; the routine */ +/* only calculates the optimal sizes of the WORK and IWORK */ +/* arrays, returns these values as the first entries of the WORK */ +/* and IWORK arrays, and no error message related to LWORK or */ +/* LIWORK is issued by XERBLA. */ + +/* IWORK (workspace/output) INTEGER array, dimension (MAX(1,LIWORK)) */ +/* On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. */ + +/* LIWORK (input) INTEGER */ +/* The dimension of the array IWORK. */ +/* If N <= 1, LIWORK >= 1. */ +/* If JOBZ = 'N' and N > 1, LIWORK >= 1. */ +/* If JOBZ = 'V' and N > 1, LIWORK >= 3 + 5*N. */ + +/* If LIWORK = -1, then a workspace query is assumed; the */ +/* routine only calculates the optimal sizes of the WORK and */ +/* IWORK arrays, returns these values as the first entries of */ +/* the WORK and IWORK arrays, and no error message related to */ +/* LWORK or LIWORK is issued by XERBLA. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > 0: DPOTRF or DSYEVD returned an error code: */ +/* <= N: if INFO = i and JOBZ = 'N', then the algorithm */ +/* failed to converge; i off-diagonal elements of an */ +/* intermediate tridiagonal form did not converge to */ +/* zero; */ +/* if INFO = i and JOBZ = 'V', then the algorithm */ +/* failed to compute an eigenvalue while working on */ +/* the submatrix lying in rows and columns INFO/(N+1) */ +/* through mod(INFO,N+1); */ +/* > N: if INFO = N + i, for 1 <= i <= N, then the leading */ +/* minor of order i of B is not positive definite. */ +/* The factorization of B could not be completed and */ +/* no eigenvalues or eigenvectors were computed. */ + +/* Further Details */ +/* =============== */ + +/* Based on contributions by */ +/* Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA */ + +/* Modified so that no backsubstitution is performed if DSYEVD fails to */ +/* converge (NEIG in old code could be greater than N causing out of */ +/* bounds reference to A - reported by Ralf Meyer). Also corrected the */ +/* description of INFO and the test on ITYPE. Sven, 16 Feb 05. */ +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + --w; + --work; + --iwork; + + /* Function Body */ + wantz = _starpu_lsame_(jobz, "V"); + upper = _starpu_lsame_(uplo, "U"); + lquery = *lwork == -1 || *liwork == -1; + + *info = 0; + if (*n <= 1) { + liwmin = 1; + lwmin = 1; + } else if (wantz) { + liwmin = *n * 5 + 3; +/* Computing 2nd power */ + i__1 = *n; + lwmin = *n * 6 + 1 + (i__1 * i__1 << 1); + } else { + liwmin = 1; + lwmin = (*n << 1) + 1; + } + lopt = lwmin; + liopt = liwmin; + if (*itype < 1 || *itype > 3) { + *info = -1; + } else if (! (wantz || _starpu_lsame_(jobz, "N"))) { + *info = -2; + } else if (! (upper || _starpu_lsame_(uplo, "L"))) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*lda < max(1,*n)) { + *info = -6; + } else if (*ldb < max(1,*n)) { + *info = -8; + } + + if (*info == 0) { + work[1] = (doublereal) lopt; + iwork[1] = liopt; + + if (*lwork < lwmin && ! lquery) { + *info = -11; + } else if (*liwork < liwmin && ! lquery) { + *info = -13; + } + } + + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DSYGVD", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Form a Cholesky factorization of B. */ + + _starpu_dpotrf_(uplo, n, &b[b_offset], ldb, info); + if (*info != 0) { + *info = *n + *info; + return 0; + } + +/* Transform problem to standard eigenvalue problem and solve. */ + + _starpu_dsygst_(itype, uplo, n, &a[a_offset], lda, &b[b_offset], ldb, info); + _starpu_dsyevd_(jobz, uplo, n, &a[a_offset], lda, &w[1], &work[1], lwork, &iwork[ + 1], liwork, info); +/* Computing MAX */ + d__1 = (doublereal) lopt; + lopt = (integer) max(d__1,work[1]); +/* Computing MAX */ + d__1 = (doublereal) liopt, d__2 = (doublereal) iwork[1]; + liopt = (integer) max(d__1,d__2); + + if (wantz && *info == 0) { + +/* Backtransform eigenvectors to the original problem. */ + + if (*itype == 1 || *itype == 2) { + +/* For A*x=(lambda)*B*x and A*B*x=(lambda)*x; */ +/* backtransform eigenvectors: x = inv(L)'*y or inv(U)*y */ + + if (upper) { + *(unsigned char *)trans = 'N'; + } else { + *(unsigned char *)trans = 'T'; + } + + _starpu_dtrsm_("Left", uplo, trans, "Non-unit", n, n, &c_b11, &b[b_offset] +, ldb, &a[a_offset], lda); + + } else if (*itype == 3) { + +/* For B*A*x=(lambda)*x; */ +/* backtransform eigenvectors: x = L*y or U'*y */ + + if (upper) { + *(unsigned char *)trans = 'T'; + } else { + *(unsigned char *)trans = 'N'; + } + + _starpu_dtrmm_("Left", uplo, trans, "Non-unit", n, n, &c_b11, &b[b_offset] +, ldb, &a[a_offset], lda); + } + } + + work[1] = (doublereal) lopt; + iwork[1] = liopt; + + return 0; + +/* End of DSYGVD */ + +} /* _starpu_dsygvd_ */ diff --git a/min-dgels/base/SRC/dsygvx.c b/min-dgels/base/SRC/dsygvx.c new file mode 100644 index 0000000..47c4489 --- /dev/null +++ b/min-dgels/base/SRC/dsygvx.c @@ -0,0 +1,396 @@ +/* dsygvx.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; +static doublereal c_b19 = 1.; + +/* Subroutine */ int _starpu_dsygvx_(integer *itype, char *jobz, char *range, char * + uplo, integer *n, doublereal *a, integer *lda, doublereal *b, integer + *ldb, doublereal *vl, doublereal *vu, integer *il, integer *iu, + doublereal *abstol, integer *m, doublereal *w, doublereal *z__, + integer *ldz, doublereal *work, integer *lwork, integer *iwork, + integer *ifail, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, z_dim1, z_offset, i__1, i__2; + + /* Local variables */ + integer nb; + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int _starpu_dtrmm_(char *, char *, char *, char *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *); + char trans[1]; + extern /* Subroutine */ int _starpu_dtrsm_(char *, char *, char *, char *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *); + logical upper, wantz, alleig, indeig, valeig; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *); + extern /* Subroutine */ int _starpu_dpotrf_(char *, integer *, doublereal *, + integer *, integer *); + integer lwkmin; + extern /* Subroutine */ int _starpu_dsygst_(integer *, char *, integer *, + doublereal *, integer *, doublereal *, integer *, integer *); + integer lwkopt; + logical lquery; + extern /* Subroutine */ int _starpu_dsyevx_(char *, char *, char *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *, integer *, integer + *); + + +/* -- LAPACK driver routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DSYGVX computes selected eigenvalues, and optionally, eigenvectors */ +/* of a real generalized symmetric-definite eigenproblem, of the form */ +/* A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. Here A */ +/* and B are assumed to be symmetric and B is also positive definite. */ +/* Eigenvalues and eigenvectors can be selected by specifying either a */ +/* range of values or a range of indices for the desired eigenvalues. */ + +/* Arguments */ +/* ========= */ + +/* ITYPE (input) INTEGER */ +/* Specifies the problem type to be solved: */ +/* = 1: A*x = (lambda)*B*x */ +/* = 2: A*B*x = (lambda)*x */ +/* = 3: B*A*x = (lambda)*x */ + +/* JOBZ (input) CHARACTER*1 */ +/* = 'N': Compute eigenvalues only; */ +/* = 'V': Compute eigenvalues and eigenvectors. */ + +/* RANGE (input) CHARACTER*1 */ +/* = 'A': all eigenvalues will be found. */ +/* = 'V': all eigenvalues in the half-open interval (VL,VU] */ +/* will be found. */ +/* = 'I': the IL-th through IU-th eigenvalues will be found. */ + +/* UPLO (input) CHARACTER*1 */ +/* = 'U': Upper triangle of A and B are stored; */ +/* = 'L': Lower triangle of A and B are stored. */ + +/* N (input) INTEGER */ +/* The order of the matrix pencil (A,B). N >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA, N) */ +/* On entry, the symmetric matrix A. If UPLO = 'U', the */ +/* leading N-by-N upper triangular part of A contains the */ +/* upper triangular part of the matrix A. If UPLO = 'L', */ +/* the leading N-by-N lower triangular part of A contains */ +/* the lower triangular part of the matrix A. */ + +/* On exit, the lower triangle (if UPLO='L') or the upper */ +/* triangle (if UPLO='U') of A, including the diagonal, is */ +/* destroyed. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,N). */ + +/* B (input/output) DOUBLE PRECISION array, dimension (LDA, N) */ +/* On entry, the symmetric matrix B. If UPLO = 'U', the */ +/* leading N-by-N upper triangular part of B contains the */ +/* upper triangular part of the matrix B. If UPLO = 'L', */ +/* the leading N-by-N lower triangular part of B contains */ +/* the lower triangular part of the matrix B. */ + +/* On exit, if INFO <= N, the part of B containing the matrix is */ +/* overwritten by the triangular factor U or L from the Cholesky */ +/* factorization B = U**T*U or B = L*L**T. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the array B. LDB >= max(1,N). */ + +/* VL (input) DOUBLE PRECISION */ +/* VU (input) DOUBLE PRECISION */ +/* If RANGE='V', the lower and upper bounds of the interval to */ +/* be searched for eigenvalues. VL < VU. */ +/* Not referenced if RANGE = 'A' or 'I'. */ + +/* IL (input) INTEGER */ +/* IU (input) INTEGER */ +/* If RANGE='I', the indices (in ascending order) of the */ +/* smallest and largest eigenvalues to be returned. */ +/* 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. */ +/* Not referenced if RANGE = 'A' or 'V'. */ + +/* ABSTOL (input) DOUBLE PRECISION */ +/* The absolute error tolerance for the eigenvalues. */ +/* An approximate eigenvalue is accepted as converged */ +/* when it is determined to lie in an interval [a,b] */ +/* of width less than or equal to */ + +/* ABSTOL + EPS * max( |a|,|b| ) , */ + +/* where EPS is the machine precision. If ABSTOL is less than */ +/* or equal to zero, then EPS*|T| will be used in its place, */ +/* where |T| is the 1-norm of the tridiagonal matrix obtained */ +/* by reducing A to tridiagonal form. */ + +/* Eigenvalues will be computed most accurately when ABSTOL is */ +/* set to twice the underflow threshold 2*DLAMCH('S'), not zero. */ +/* If this routine returns with INFO>0, indicating that some */ +/* eigenvectors did not converge, try setting ABSTOL to */ +/* 2*DLAMCH('S'). */ + +/* M (output) INTEGER */ +/* The total number of eigenvalues found. 0 <= M <= N. */ +/* If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. */ + +/* W (output) DOUBLE PRECISION array, dimension (N) */ +/* On normal exit, the first M elements contain the selected */ +/* eigenvalues in ascending order. */ + +/* Z (output) DOUBLE PRECISION array, dimension (LDZ, max(1,M)) */ +/* If JOBZ = 'N', then Z is not referenced. */ +/* If JOBZ = 'V', then if INFO = 0, the first M columns of Z */ +/* contain the orthonormal eigenvectors of the matrix A */ +/* corresponding to the selected eigenvalues, with the i-th */ +/* column of Z holding the eigenvector associated with W(i). */ +/* The eigenvectors are normalized as follows: */ +/* if ITYPE = 1 or 2, Z**T*B*Z = I; */ +/* if ITYPE = 3, Z**T*inv(B)*Z = I. */ + +/* If an eigenvector fails to converge, then that column of Z */ +/* contains the latest approximation to the eigenvector, and the */ +/* index of the eigenvector is returned in IFAIL. */ +/* Note: the user must ensure that at least max(1,M) columns are */ +/* supplied in the array Z; if RANGE = 'V', the exact value of M */ +/* is not known in advance and an upper bound must be used. */ + +/* LDZ (input) INTEGER */ +/* The leading dimension of the array Z. LDZ >= 1, and if */ +/* JOBZ = 'V', LDZ >= max(1,N). */ + +/* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ + +/* LWORK (input) INTEGER */ +/* The length of the array WORK. LWORK >= max(1,8*N). */ +/* For optimal efficiency, LWORK >= (NB+3)*N, */ +/* where NB is the blocksize for DSYTRD returned by ILAENV. */ + +/* If LWORK = -1, then a workspace query is assumed; the routine */ +/* only calculates the optimal size of the WORK array, returns */ +/* this value as the first entry of the WORK array, and no error */ +/* message related to LWORK is issued by XERBLA. */ + +/* IWORK (workspace) INTEGER array, dimension (5*N) */ + +/* IFAIL (output) INTEGER array, dimension (N) */ +/* If JOBZ = 'V', then if INFO = 0, the first M elements of */ +/* IFAIL are zero. If INFO > 0, then IFAIL contains the */ +/* indices of the eigenvectors that failed to converge. */ +/* If JOBZ = 'N', then IFAIL is not referenced. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > 0: DPOTRF or DSYEVX returned an error code: */ +/* <= N: if INFO = i, DSYEVX failed to converge; */ +/* i eigenvectors failed to converge. Their indices */ +/* are stored in array IFAIL. */ +/* > N: if INFO = N + i, for 1 <= i <= N, then the leading */ +/* minor of order i of B is not positive definite. */ +/* The factorization of B could not be completed and */ +/* no eigenvalues or eigenvectors were computed. */ + +/* Further Details */ +/* =============== */ + +/* Based on contributions by */ +/* Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + --w; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --work; + --iwork; + --ifail; + + /* Function Body */ + upper = _starpu_lsame_(uplo, "U"); + wantz = _starpu_lsame_(jobz, "V"); + alleig = _starpu_lsame_(range, "A"); + valeig = _starpu_lsame_(range, "V"); + indeig = _starpu_lsame_(range, "I"); + lquery = *lwork == -1; + + *info = 0; + if (*itype < 1 || *itype > 3) { + *info = -1; + } else if (! (wantz || _starpu_lsame_(jobz, "N"))) { + *info = -2; + } else if (! (alleig || valeig || indeig)) { + *info = -3; + } else if (! (upper || _starpu_lsame_(uplo, "L"))) { + *info = -4; + } else if (*n < 0) { + *info = -5; + } else if (*lda < max(1,*n)) { + *info = -7; + } else if (*ldb < max(1,*n)) { + *info = -9; + } else { + if (valeig) { + if (*n > 0 && *vu <= *vl) { + *info = -11; + } + } else if (indeig) { + if (*il < 1 || *il > max(1,*n)) { + *info = -12; + } else if (*iu < min(*n,*il) || *iu > *n) { + *info = -13; + } + } + } + if (*info == 0) { + if (*ldz < 1 || wantz && *ldz < *n) { + *info = -18; + } + } + + if (*info == 0) { +/* Computing MAX */ + i__1 = 1, i__2 = *n << 3; + lwkmin = max(i__1,i__2); + nb = _starpu_ilaenv_(&c__1, "DSYTRD", uplo, n, &c_n1, &c_n1, &c_n1); +/* Computing MAX */ + i__1 = lwkmin, i__2 = (nb + 3) * *n; + lwkopt = max(i__1,i__2); + work[1] = (doublereal) lwkopt; + + if (*lwork < lwkmin && ! lquery) { + *info = -20; + } + } + + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DSYGVX", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + *m = 0; + if (*n == 0) { + return 0; + } + +/* Form a Cholesky factorization of B. */ + + _starpu_dpotrf_(uplo, n, &b[b_offset], ldb, info); + if (*info != 0) { + *info = *n + *info; + return 0; + } + +/* Transform problem to standard eigenvalue problem and solve. */ + + _starpu_dsygst_(itype, uplo, n, &a[a_offset], lda, &b[b_offset], ldb, info); + _starpu_dsyevx_(jobz, range, uplo, n, &a[a_offset], lda, vl, vu, il, iu, abstol, + m, &w[1], &z__[z_offset], ldz, &work[1], lwork, &iwork[1], &ifail[ + 1], info); + + if (wantz) { + +/* Backtransform eigenvectors to the original problem. */ + + if (*info > 0) { + *m = *info - 1; + } + if (*itype == 1 || *itype == 2) { + +/* For A*x=(lambda)*B*x and A*B*x=(lambda)*x; */ +/* backtransform eigenvectors: x = inv(L)'*y or inv(U)*y */ + + if (upper) { + *(unsigned char *)trans = 'N'; + } else { + *(unsigned char *)trans = 'T'; + } + + _starpu_dtrsm_("Left", uplo, trans, "Non-unit", n, m, &c_b19, &b[b_offset] +, ldb, &z__[z_offset], ldz); + + } else if (*itype == 3) { + +/* For B*A*x=(lambda)*x; */ +/* backtransform eigenvectors: x = L*y or U'*y */ + + if (upper) { + *(unsigned char *)trans = 'T'; + } else { + *(unsigned char *)trans = 'N'; + } + + _starpu_dtrmm_("Left", uplo, trans, "Non-unit", n, m, &c_b19, &b[b_offset] +, ldb, &z__[z_offset], ldz); + } + } + +/* Set WORK(1) to optimal workspace size. */ + + work[1] = (doublereal) lwkopt; + + return 0; + +/* End of DSYGVX */ + +} /* _starpu_dsygvx_ */ diff --git a/min-dgels/base/SRC/dsyrfs.c b/min-dgels/base/SRC/dsyrfs.c new file mode 100644 index 0000000..455e71d --- /dev/null +++ b/min-dgels/base/SRC/dsyrfs.c @@ -0,0 +1,429 @@ +/* dsyrfs.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static doublereal c_b12 = -1.; +static doublereal c_b14 = 1.; + +/* Subroutine */ int _starpu_dsyrfs_(char *uplo, integer *n, integer *nrhs, + doublereal *a, integer *lda, doublereal *af, integer *ldaf, integer * + ipiv, doublereal *b, integer *ldb, doublereal *x, integer *ldx, + doublereal *ferr, doublereal *berr, doublereal *work, integer *iwork, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, af_dim1, af_offset, b_dim1, b_offset, x_dim1, + x_offset, i__1, i__2, i__3; + doublereal d__1, d__2, d__3; + + /* Local variables */ + integer i__, j, k; + doublereal s, xk; + integer nz; + doublereal eps; + integer kase; + doublereal safe1, safe2; + extern logical _starpu_lsame_(char *, char *); + integer isave[3]; + extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *), _starpu_daxpy_(integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *); + integer count; + logical upper; + extern /* Subroutine */ int _starpu_dsymv_(char *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *), _starpu_dlacn2_(integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, integer *); + extern doublereal _starpu_dlamch_(char *); + doublereal safmin; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + doublereal lstres; + extern /* Subroutine */ int _starpu_dsytrs_(char *, integer *, integer *, + doublereal *, integer *, integer *, doublereal *, integer *, + integer *); + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* Modified to call DLACN2 in place of DLACON, 5 Feb 03, SJH. */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DSYRFS improves the computed solution to a system of linear */ +/* equations when the coefficient matrix is symmetric indefinite, and */ +/* provides error bounds and backward error estimates for the solution. */ + +/* Arguments */ +/* ========= */ + +/* UPLO (input) CHARACTER*1 */ +/* = 'U': Upper triangle of A is stored; */ +/* = 'L': Lower triangle of A is stored. */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* NRHS (input) INTEGER */ +/* The number of right hand sides, i.e., the number of columns */ +/* of the matrices B and X. NRHS >= 0. */ + +/* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ +/* The symmetric matrix A. If UPLO = 'U', the leading N-by-N */ +/* upper triangular part of A contains the upper triangular part */ +/* of the matrix A, and the strictly lower triangular part of A */ +/* is not referenced. If UPLO = 'L', the leading N-by-N lower */ +/* triangular part of A contains the lower triangular part of */ +/* the matrix A, and the strictly upper triangular part of A is */ +/* not referenced. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,N). */ + +/* AF (input) DOUBLE PRECISION array, dimension (LDAF,N) */ +/* The factored form of the matrix A. AF contains the block */ +/* diagonal matrix D and the multipliers used to obtain the */ +/* factor U or L from the factorization A = U*D*U**T or */ +/* A = L*D*L**T as computed by DSYTRF. */ + +/* LDAF (input) INTEGER */ +/* The leading dimension of the array AF. LDAF >= max(1,N). */ + +/* IPIV (input) INTEGER array, dimension (N) */ +/* Details of the interchanges and the block structure of D */ +/* as determined by DSYTRF. */ + +/* B (input) DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* The right hand side matrix B. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the array B. LDB >= max(1,N). */ + +/* X (input/output) DOUBLE PRECISION array, dimension (LDX,NRHS) */ +/* On entry, the solution matrix X, as computed by DSYTRS. */ +/* On exit, the improved solution matrix X. */ + +/* LDX (input) INTEGER */ +/* The leading dimension of the array X. LDX >= max(1,N). */ + +/* FERR (output) DOUBLE PRECISION array, dimension (NRHS) */ +/* The estimated forward error bound for each solution vector */ +/* X(j) (the j-th column of the solution matrix X). */ +/* If XTRUE is the true solution corresponding to X(j), FERR(j) */ +/* is an estimated upper bound for the magnitude of the largest */ +/* element in (X(j) - XTRUE) divided by the magnitude of the */ +/* largest element in X(j). The estimate is as reliable as */ +/* the estimate for RCOND, and is almost always a slight */ +/* overestimate of the true error. */ + +/* BERR (output) DOUBLE PRECISION array, dimension (NRHS) */ +/* The componentwise relative backward error of each solution */ +/* vector X(j) (i.e., the smallest relative change in */ +/* any element of A or B that makes X(j) an exact solution). */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (3*N) */ + +/* IWORK (workspace) INTEGER array, dimension (N) */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ + +/* Internal Parameters */ +/* =================== */ + +/* ITMAX is the maximum number of steps of iterative refinement. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + af_dim1 = *ldaf; + af_offset = 1 + af_dim1; + af -= af_offset; + --ipiv; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + x_dim1 = *ldx; + x_offset = 1 + x_dim1; + x -= x_offset; + --ferr; + --berr; + --work; + --iwork; + + /* Function Body */ + *info = 0; + upper = _starpu_lsame_(uplo, "U"); + if (! upper && ! _starpu_lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } else if (*ldaf < max(1,*n)) { + *info = -7; + } else if (*ldb < max(1,*n)) { + *info = -10; + } else if (*ldx < max(1,*n)) { + *info = -12; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DSYRFS", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0 || *nrhs == 0) { + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + ferr[j] = 0.; + berr[j] = 0.; +/* L10: */ + } + return 0; + } + +/* NZ = maximum number of nonzero elements in each row of A, plus 1 */ + + nz = *n + 1; + eps = _starpu_dlamch_("Epsilon"); + safmin = _starpu_dlamch_("Safe minimum"); + safe1 = nz * safmin; + safe2 = safe1 / eps; + +/* Do for each right hand side */ + + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + + count = 1; + lstres = 3.; +L20: + +/* Loop until stopping criterion is satisfied. */ + +/* Compute residual R = B - A * X */ + + _starpu_dcopy_(n, &b[j * b_dim1 + 1], &c__1, &work[*n + 1], &c__1); + _starpu_dsymv_(uplo, n, &c_b12, &a[a_offset], lda, &x[j * x_dim1 + 1], &c__1, + &c_b14, &work[*n + 1], &c__1); + +/* Compute componentwise relative backward error from formula */ + +/* max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) ) */ + +/* where abs(Z) is the componentwise absolute value of the matrix */ +/* or vector Z. If the i-th component of the denominator is less */ +/* than SAFE2, then SAFE1 is added to the i-th components of the */ +/* numerator and denominator before dividing. */ + + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + work[i__] = (d__1 = b[i__ + j * b_dim1], abs(d__1)); +/* L30: */ + } + +/* Compute abs(A)*abs(X) + abs(B). */ + + if (upper) { + i__2 = *n; + for (k = 1; k <= i__2; ++k) { + s = 0.; + xk = (d__1 = x[k + j * x_dim1], abs(d__1)); + i__3 = k - 1; + for (i__ = 1; i__ <= i__3; ++i__) { + work[i__] += (d__1 = a[i__ + k * a_dim1], abs(d__1)) * xk; + s += (d__1 = a[i__ + k * a_dim1], abs(d__1)) * (d__2 = x[ + i__ + j * x_dim1], abs(d__2)); +/* L40: */ + } + work[k] = work[k] + (d__1 = a[k + k * a_dim1], abs(d__1)) * + xk + s; +/* L50: */ + } + } else { + i__2 = *n; + for (k = 1; k <= i__2; ++k) { + s = 0.; + xk = (d__1 = x[k + j * x_dim1], abs(d__1)); + work[k] += (d__1 = a[k + k * a_dim1], abs(d__1)) * xk; + i__3 = *n; + for (i__ = k + 1; i__ <= i__3; ++i__) { + work[i__] += (d__1 = a[i__ + k * a_dim1], abs(d__1)) * xk; + s += (d__1 = a[i__ + k * a_dim1], abs(d__1)) * (d__2 = x[ + i__ + j * x_dim1], abs(d__2)); +/* L60: */ + } + work[k] += s; +/* L70: */ + } + } + s = 0.; + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + if (work[i__] > safe2) { +/* Computing MAX */ + d__2 = s, d__3 = (d__1 = work[*n + i__], abs(d__1)) / work[ + i__]; + s = max(d__2,d__3); + } else { +/* Computing MAX */ + d__2 = s, d__3 = ((d__1 = work[*n + i__], abs(d__1)) + safe1) + / (work[i__] + safe1); + s = max(d__2,d__3); + } +/* L80: */ + } + berr[j] = s; + +/* Test stopping criterion. Continue iterating if */ +/* 1) The residual BERR(J) is larger than machine epsilon, and */ +/* 2) BERR(J) decreased by at least a factor of 2 during the */ +/* last iteration, and */ +/* 3) At most ITMAX iterations tried. */ + + if (berr[j] > eps && berr[j] * 2. <= lstres && count <= 5) { + +/* Update solution and try again. */ + + _starpu_dsytrs_(uplo, n, &c__1, &af[af_offset], ldaf, &ipiv[1], &work[*n + + 1], n, info); + _starpu_daxpy_(n, &c_b14, &work[*n + 1], &c__1, &x[j * x_dim1 + 1], &c__1) + ; + lstres = berr[j]; + ++count; + goto L20; + } + +/* Bound error from formula */ + +/* norm(X - XTRUE) / norm(X) .le. FERR = */ +/* norm( abs(inv(A))* */ +/* ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X) */ + +/* where */ +/* norm(Z) is the magnitude of the largest component of Z */ +/* inv(A) is the inverse of A */ +/* abs(Z) is the componentwise absolute value of the matrix or */ +/* vector Z */ +/* NZ is the maximum number of nonzeros in any row of A, plus 1 */ +/* EPS is machine epsilon */ + +/* The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B)) */ +/* is incremented by SAFE1 if the i-th component of */ +/* abs(A)*abs(X) + abs(B) is less than SAFE2. */ + +/* Use DLACN2 to estimate the infinity-norm of the matrix */ +/* inv(A) * diag(W), */ +/* where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) */ + + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + if (work[i__] > safe2) { + work[i__] = (d__1 = work[*n + i__], abs(d__1)) + nz * eps * + work[i__]; + } else { + work[i__] = (d__1 = work[*n + i__], abs(d__1)) + nz * eps * + work[i__] + safe1; + } +/* L90: */ + } + + kase = 0; +L100: + _starpu_dlacn2_(n, &work[(*n << 1) + 1], &work[*n + 1], &iwork[1], &ferr[j], & + kase, isave); + if (kase != 0) { + if (kase == 1) { + +/* Multiply by diag(W)*inv(A'). */ + + _starpu_dsytrs_(uplo, n, &c__1, &af[af_offset], ldaf, &ipiv[1], &work[ + *n + 1], n, info); + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + work[*n + i__] = work[i__] * work[*n + i__]; +/* L110: */ + } + } else if (kase == 2) { + +/* Multiply by inv(A)*diag(W). */ + + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + work[*n + i__] = work[i__] * work[*n + i__]; +/* L120: */ + } + _starpu_dsytrs_(uplo, n, &c__1, &af[af_offset], ldaf, &ipiv[1], &work[ + *n + 1], n, info); + } + goto L100; + } + +/* Normalize error. */ + + lstres = 0.; + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { +/* Computing MAX */ + d__2 = lstres, d__3 = (d__1 = x[i__ + j * x_dim1], abs(d__1)); + lstres = max(d__2,d__3); +/* L130: */ + } + if (lstres != 0.) { + ferr[j] /= lstres; + } + +/* L140: */ + } + + return 0; + +/* End of DSYRFS */ + +} /* _starpu_dsyrfs_ */ diff --git a/min-dgels/base/SRC/dsyrfsx.c b/min-dgels/base/SRC/dsyrfsx.c new file mode 100644 index 0000000..c1a5eae --- /dev/null +++ b/min-dgels/base/SRC/dsyrfsx.c @@ -0,0 +1,629 @@ +/* dsyrfsx.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c_n1 = -1; +static integer c__0 = 0; +static integer c__1 = 1; + +/* Subroutine */ int _starpu_dsyrfsx_(char *uplo, char *equed, integer *n, integer * + nrhs, doublereal *a, integer *lda, doublereal *af, integer *ldaf, + integer *ipiv, doublereal *s, doublereal *b, integer *ldb, doublereal + *x, integer *ldx, doublereal *rcond, doublereal *berr, integer * + n_err_bnds__, doublereal *err_bnds_norm__, doublereal * + err_bnds_comp__, integer *nparams, doublereal *params, doublereal * + work, integer *iwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, af_dim1, af_offset, b_dim1, b_offset, x_dim1, + x_offset, err_bnds_norm_dim1, err_bnds_norm_offset, + err_bnds_comp_dim1, err_bnds_comp_offset, i__1; + doublereal d__1, d__2; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + doublereal illrcond_thresh__, unstable_thresh__, err_lbnd__; + integer ref_type__, j; + doublereal rcond_tmp__; + integer prec_type__; + extern doublereal _starpu_dla_syrcond__(char *, integer *, doublereal *, integer * + , doublereal *, integer *, integer *, integer *, doublereal *, + integer *, doublereal *, integer *, ftnlen); + doublereal cwise_wrong__; + extern /* Subroutine */ int _starpu_dla_syrfsx_extended__(integer *, char *, + integer *, integer *, doublereal *, integer *, doublereal *, + integer *, integer *, logical *, doublereal *, doublereal *, + integer *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, integer *, doublereal *, + doublereal *, logical *, integer *, ftnlen); + char norm[1]; + logical ignore_cwise__; + extern logical _starpu_lsame_(char *, char *); + doublereal anorm; + logical rcequ; + extern doublereal _starpu_dlamch_(char *); + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + extern doublereal _starpu_dlansy_(char *, char *, integer *, doublereal *, + integer *, doublereal *); + extern /* Subroutine */ int _starpu_dsycon_(char *, integer *, doublereal *, + integer *, integer *, doublereal *, doublereal *, doublereal *, + integer *, integer *); + extern integer _starpu_ilaprec_(char *); + integer ithresh, n_norms__; + doublereal rthresh; + + +/* -- LAPACK routine (version 3.2.1) -- */ +/* -- Contributed by James Demmel, Deaglan Halligan, Yozo Hida and -- */ +/* -- Jason Riedy of Univ. of California Berkeley. -- */ +/* -- April 2009 -- */ + +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley and NAG Ltd. -- */ + +/* .. */ +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DSYRFSX improves the computed solution to a system of linear */ +/* equations when the coefficient matrix is symmetric indefinite, and */ +/* provides error bounds and backward error estimates for the */ +/* solution. In addition to normwise error bound, the code provides */ +/* maximum componentwise error bound if possible. See comments for */ +/* ERR_BNDS_NORM and ERR_BNDS_COMP for details of the error bounds. */ + +/* The original system of linear equations may have been equilibrated */ +/* before calling this routine, as described by arguments EQUED and S */ +/* below. In this case, the solution and error bounds returned are */ +/* for the original unequilibrated system. */ + +/* Arguments */ +/* ========= */ + +/* Some optional parameters are bundled in the PARAMS array. These */ +/* settings determine how refinement is performed, but often the */ +/* defaults are acceptable. If the defaults are acceptable, users */ +/* can pass NPARAMS = 0 which prevents the source code from accessing */ +/* the PARAMS argument. */ + +/* UPLO (input) CHARACTER*1 */ +/* = 'U': Upper triangle of A is stored; */ +/* = 'L': Lower triangle of A is stored. */ + +/* EQUED (input) CHARACTER*1 */ +/* Specifies the form of equilibration that was done to A */ +/* before calling this routine. This is needed to compute */ +/* the solution and error bounds correctly. */ +/* = 'N': No equilibration */ +/* = 'Y': Both row and column equilibration, i.e., A has been */ +/* replaced by diag(S) * A * diag(S). */ +/* The right hand side B has been changed accordingly. */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* NRHS (input) INTEGER */ +/* The number of right hand sides, i.e., the number of columns */ +/* of the matrices B and X. NRHS >= 0. */ + +/* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ +/* The symmetric matrix A. If UPLO = 'U', the leading N-by-N */ +/* upper triangular part of A contains the upper triangular */ +/* part of the matrix A, and the strictly lower triangular */ +/* part of A is not referenced. If UPLO = 'L', the leading */ +/* N-by-N lower triangular part of A contains the lower */ +/* triangular part of the matrix A, and the strictly upper */ +/* triangular part of A is not referenced. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,N). */ + +/* AF (input) DOUBLE PRECISION array, dimension (LDAF,N) */ +/* The factored form of the matrix A. AF contains the block */ +/* diagonal matrix D and the multipliers used to obtain the */ +/* factor U or L from the factorization A = U*D*U**T or A = */ +/* L*D*L**T as computed by DSYTRF. */ + +/* LDAF (input) INTEGER */ +/* The leading dimension of the array AF. LDAF >= max(1,N). */ + +/* IPIV (input) INTEGER array, dimension (N) */ +/* Details of the interchanges and the block structure of D */ +/* as determined by DSYTRF. */ + +/* S (input or output) DOUBLE PRECISION array, dimension (N) */ +/* The scale factors for A. If EQUED = 'Y', A is multiplied on */ +/* the left and right by diag(S). S is an input argument if FACT = */ +/* 'F'; otherwise, S is an output argument. If FACT = 'F' and EQUED */ +/* = 'Y', each element of S must be positive. If S is output, each */ +/* element of S is a power of the radix. If S is input, each element */ +/* of S should be a power of the radix to ensure a reliable solution */ +/* and error estimates. Scaling by powers of the radix does not cause */ +/* rounding errors unless the result underflows or overflows. */ +/* Rounding errors during scaling lead to refining with a matrix that */ +/* is not equivalent to the input matrix, producing error estimates */ +/* that may not be reliable. */ + +/* B (input) DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* The right hand side matrix B. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the array B. LDB >= max(1,N). */ + +/* X (input/output) DOUBLE PRECISION array, dimension (LDX,NRHS) */ +/* On entry, the solution matrix X, as computed by DGETRS. */ +/* On exit, the improved solution matrix X. */ + +/* LDX (input) INTEGER */ +/* The leading dimension of the array X. LDX >= max(1,N). */ + +/* RCOND (output) DOUBLE PRECISION */ +/* Reciprocal scaled condition number. This is an estimate of the */ +/* reciprocal Skeel condition number of the matrix A after */ +/* equilibration (if done). If this is less than the machine */ +/* precision (in particular, if it is zero), the matrix is singular */ +/* to working precision. Note that the error may still be small even */ +/* if this number is very small and the matrix appears ill- */ +/* conditioned. */ + +/* BERR (output) DOUBLE PRECISION array, dimension (NRHS) */ +/* Componentwise relative backward error. This is the */ +/* componentwise relative backward error of each solution vector X(j) */ +/* (i.e., the smallest relative change in any element of A or B that */ +/* makes X(j) an exact solution). */ + +/* N_ERR_BNDS (input) INTEGER */ +/* Number of error bounds to return for each right hand side */ +/* and each type (normwise or componentwise). See ERR_BNDS_NORM and */ +/* ERR_BNDS_COMP below. */ + +/* ERR_BNDS_NORM (output) DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) */ +/* For each right-hand side, this array contains information about */ +/* various error bounds and condition numbers corresponding to the */ +/* normwise relative error, which is defined as follows: */ + +/* Normwise relative error in the ith solution vector: */ +/* max_j (abs(XTRUE(j,i) - X(j,i))) */ +/* ------------------------------ */ +/* max_j abs(X(j,i)) */ + +/* The array is indexed by the type of error information as described */ +/* below. There currently are up to three pieces of information */ +/* returned. */ + +/* The first index in ERR_BNDS_NORM(i,:) corresponds to the ith */ +/* right-hand side. */ + +/* The second index in ERR_BNDS_NORM(:,err) contains the following */ +/* three fields: */ +/* err = 1 "Trust/don't trust" boolean. Trust the answer if the */ +/* reciprocal condition number is less than the threshold */ +/* sqrt(n) * dlamch('Epsilon'). */ + +/* err = 2 "Guaranteed" error bound: The estimated forward error, */ +/* almost certainly within a factor of 10 of the true error */ +/* so long as the next entry is greater than the threshold */ +/* sqrt(n) * dlamch('Epsilon'). This error bound should only */ +/* be trusted if the previous boolean is true. */ + +/* err = 3 Reciprocal condition number: Estimated normwise */ +/* reciprocal condition number. Compared with the threshold */ +/* sqrt(n) * dlamch('Epsilon') to determine if the error */ +/* estimate is "guaranteed". These reciprocal condition */ +/* numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some */ +/* appropriately scaled matrix Z. */ +/* Let Z = S*A, where S scales each row by a power of the */ +/* radix so all absolute row sums of Z are approximately 1. */ + +/* See Lapack Working Note 165 for further details and extra */ +/* cautions. */ + +/* ERR_BNDS_COMP (output) DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) */ +/* For each right-hand side, this array contains information about */ +/* various error bounds and condition numbers corresponding to the */ +/* componentwise relative error, which is defined as follows: */ + +/* Componentwise relative error in the ith solution vector: */ +/* abs(XTRUE(j,i) - X(j,i)) */ +/* max_j ---------------------- */ +/* abs(X(j,i)) */ + +/* The array is indexed by the right-hand side i (on which the */ +/* componentwise relative error depends), and the type of error */ +/* information as described below. There currently are up to three */ +/* pieces of information returned for each right-hand side. If */ +/* componentwise accuracy is not requested (PARAMS(3) = 0.0), then */ +/* ERR_BNDS_COMP is not accessed. If N_ERR_BNDS .LT. 3, then at most */ +/* the first (:,N_ERR_BNDS) entries are returned. */ + +/* The first index in ERR_BNDS_COMP(i,:) corresponds to the ith */ +/* right-hand side. */ + +/* The second index in ERR_BNDS_COMP(:,err) contains the following */ +/* three fields: */ +/* err = 1 "Trust/don't trust" boolean. Trust the answer if the */ +/* reciprocal condition number is less than the threshold */ +/* sqrt(n) * dlamch('Epsilon'). */ + +/* err = 2 "Guaranteed" error bound: The estimated forward error, */ +/* almost certainly within a factor of 10 of the true error */ +/* so long as the next entry is greater than the threshold */ +/* sqrt(n) * dlamch('Epsilon'). This error bound should only */ +/* be trusted if the previous boolean is true. */ + +/* err = 3 Reciprocal condition number: Estimated componentwise */ +/* reciprocal condition number. Compared with the threshold */ +/* sqrt(n) * dlamch('Epsilon') to determine if the error */ +/* estimate is "guaranteed". These reciprocal condition */ +/* numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some */ +/* appropriately scaled matrix Z. */ +/* Let Z = S*(A*diag(x)), where x is the solution for the */ +/* current right-hand side and S scales each row of */ +/* A*diag(x) by a power of the radix so all absolute row */ +/* sums of Z are approximately 1. */ + +/* See Lapack Working Note 165 for further details and extra */ +/* cautions. */ + +/* NPARAMS (input) INTEGER */ +/* Specifies the number of parameters set in PARAMS. If .LE. 0, the */ +/* PARAMS array is never referenced and default values are used. */ + +/* PARAMS (input / output) DOUBLE PRECISION array, dimension NPARAMS */ +/* Specifies algorithm parameters. If an entry is .LT. 0.0, then */ +/* that entry will be filled with default value used for that */ +/* parameter. Only positions up to NPARAMS are accessed; defaults */ +/* are used for higher-numbered parameters. */ + +/* PARAMS(LA_LINRX_ITREF_I = 1) : Whether to perform iterative */ +/* refinement or not. */ +/* Default: 1.0D+0 */ +/* = 0.0 : No refinement is performed, and no error bounds are */ +/* computed. */ +/* = 1.0 : Use the double-precision refinement algorithm, */ +/* possibly with doubled-single computations if the */ +/* compilation environment does not support DOUBLE */ +/* PRECISION. */ +/* (other values are reserved for future use) */ + +/* PARAMS(LA_LINRX_ITHRESH_I = 2) : Maximum number of residual */ +/* computations allowed for refinement. */ +/* Default: 10 */ +/* Aggressive: Set to 100 to permit convergence using approximate */ +/* factorizations or factorizations other than LU. If */ +/* the factorization uses a technique other than */ +/* Gaussian elimination, the guarantees in */ +/* err_bnds_norm and err_bnds_comp may no longer be */ +/* trustworthy. */ + +/* PARAMS(LA_LINRX_CWISE_I = 3) : Flag determining if the code */ +/* will attempt to find a solution with small componentwise */ +/* relative error in the double-precision algorithm. Positive */ +/* is true, 0.0 is false. */ +/* Default: 1.0 (attempt componentwise convergence) */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (4*N) */ + +/* IWORK (workspace) INTEGER array, dimension (N) */ + +/* INFO (output) INTEGER */ +/* = 0: Successful exit. The solution to every right-hand side is */ +/* guaranteed. */ +/* < 0: If INFO = -i, the i-th argument had an illegal value */ +/* > 0 and <= N: U(INFO,INFO) is exactly zero. The factorization */ +/* has been completed, but the factor U is exactly singular, so */ +/* the solution and error bounds could not be computed. RCOND = 0 */ +/* is returned. */ +/* = N+J: The solution corresponding to the Jth right-hand side is */ +/* not guaranteed. The solutions corresponding to other right- */ +/* hand sides K with K > J may not be guaranteed as well, but */ +/* only the first such right-hand side is reported. If a small */ +/* componentwise error is not requested (PARAMS(3) = 0.0) then */ +/* the Jth right-hand side is the first with a normwise error */ +/* bound that is not guaranteed (the smallest J such */ +/* that ERR_BNDS_NORM(J,1) = 0.0). By default (PARAMS(3) = 1.0) */ +/* the Jth right-hand side is the first with either a normwise or */ +/* componentwise error bound that is not guaranteed (the smallest */ +/* J such that either ERR_BNDS_NORM(J,1) = 0.0 or */ +/* ERR_BNDS_COMP(J,1) = 0.0). See the definition of */ +/* ERR_BNDS_NORM(:,1) and ERR_BNDS_COMP(:,1). To get information */ +/* about all of the right-hand sides check ERR_BNDS_NORM or */ +/* ERR_BNDS_COMP. */ + +/* ================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Check the input parameters. */ + + /* Parameter adjustments */ + err_bnds_comp_dim1 = *nrhs; + err_bnds_comp_offset = 1 + err_bnds_comp_dim1; + err_bnds_comp__ -= err_bnds_comp_offset; + err_bnds_norm_dim1 = *nrhs; + err_bnds_norm_offset = 1 + err_bnds_norm_dim1; + err_bnds_norm__ -= err_bnds_norm_offset; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + af_dim1 = *ldaf; + af_offset = 1 + af_dim1; + af -= af_offset; + --ipiv; + --s; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + x_dim1 = *ldx; + x_offset = 1 + x_dim1; + x -= x_offset; + --berr; + --params; + --work; + --iwork; + + /* Function Body */ + *info = 0; + ref_type__ = 1; + if (*nparams >= 1) { + if (params[1] < 0.) { + params[1] = 1.; + } else { + ref_type__ = (integer) params[1]; + } + } + +/* Set default parameters. */ + + illrcond_thresh__ = (doublereal) (*n) * _starpu_dlamch_("Epsilon"); + ithresh = 10; + rthresh = .5; + unstable_thresh__ = .25; + ignore_cwise__ = FALSE_; + + if (*nparams >= 2) { + if (params[2] < 0.) { + params[2] = (doublereal) ithresh; + } else { + ithresh = (integer) params[2]; + } + } + if (*nparams >= 3) { + if (params[3] < 0.) { + if (ignore_cwise__) { + params[3] = 0.; + } else { + params[3] = 1.; + } + } else { + ignore_cwise__ = params[3] == 0.; + } + } + if (ref_type__ == 0 || *n_err_bnds__ == 0) { + n_norms__ = 0; + } else if (ignore_cwise__) { + n_norms__ = 1; + } else { + n_norms__ = 2; + } + + rcequ = _starpu_lsame_(equed, "Y"); + +/* Test input parameters. */ + + if (! _starpu_lsame_(uplo, "U") && ! _starpu_lsame_(uplo, "L")) { + *info = -1; + } else if (! rcequ && ! _starpu_lsame_(equed, "N")) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*nrhs < 0) { + *info = -4; + } else if (*lda < max(1,*n)) { + *info = -6; + } else if (*ldaf < max(1,*n)) { + *info = -8; + } else if (*ldb < max(1,*n)) { + *info = -11; + } else if (*ldx < max(1,*n)) { + *info = -13; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DSYRFSX", &i__1); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0 || *nrhs == 0) { + *rcond = 1.; + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + berr[j] = 0.; + if (*n_err_bnds__ >= 1) { + err_bnds_norm__[j + err_bnds_norm_dim1] = 1.; + err_bnds_comp__[j + err_bnds_comp_dim1] = 1.; + } else if (*n_err_bnds__ >= 2) { + err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] = 0.; + err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] = 0.; + } else if (*n_err_bnds__ >= 3) { + err_bnds_norm__[j + err_bnds_norm_dim1 * 3] = 1.; + err_bnds_comp__[j + err_bnds_comp_dim1 * 3] = 1.; + } + } + return 0; + } + +/* Default to failure. */ + + *rcond = 0.; + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + berr[j] = 1.; + if (*n_err_bnds__ >= 1) { + err_bnds_norm__[j + err_bnds_norm_dim1] = 1.; + err_bnds_comp__[j + err_bnds_comp_dim1] = 1.; + } else if (*n_err_bnds__ >= 2) { + err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] = 1.; + err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] = 1.; + } else if (*n_err_bnds__ >= 3) { + err_bnds_norm__[j + err_bnds_norm_dim1 * 3] = 0.; + err_bnds_comp__[j + err_bnds_comp_dim1 * 3] = 0.; + } + } + +/* Compute the norm of A and the reciprocal of the condition */ +/* number of A. */ + + *(unsigned char *)norm = 'I'; + anorm = _starpu_dlansy_(norm, uplo, n, &a[a_offset], lda, &work[1]); + _starpu_dsycon_(uplo, n, &af[af_offset], ldaf, &ipiv[1], &anorm, rcond, &work[1], + &iwork[1], info); + +/* Perform refinement on each right-hand side */ + + if (ref_type__ != 0) { + prec_type__ = _starpu_ilaprec_("E"); + _starpu_dla_syrfsx_extended__(&prec_type__, uplo, n, nrhs, &a[a_offset], lda, + &af[af_offset], ldaf, &ipiv[1], &rcequ, &s[1], &b[b_offset], + ldb, &x[x_offset], ldx, &berr[1], &n_norms__, & + err_bnds_norm__[err_bnds_norm_offset], &err_bnds_comp__[ + err_bnds_comp_offset], &work[*n + 1], &work[1], &work[(*n << + 1) + 1], &work[1], rcond, &ithresh, &rthresh, & + unstable_thresh__, &ignore_cwise__, info, (ftnlen)1); + } +/* Computing MAX */ + d__1 = 10., d__2 = sqrt((doublereal) (*n)); + err_lbnd__ = max(d__1,d__2) * _starpu_dlamch_("Epsilon"); + if (*n_err_bnds__ >= 1 && n_norms__ >= 1) { + +/* Compute scaled normwise condition number cond(A*C). */ + + if (rcequ) { + rcond_tmp__ = _starpu_dla_syrcond__(uplo, n, &a[a_offset], lda, &af[ + af_offset], ldaf, &ipiv[1], &c_n1, &s[1], info, &work[1], + &iwork[1], (ftnlen)1); + } else { + rcond_tmp__ = _starpu_dla_syrcond__(uplo, n, &a[a_offset], lda, &af[ + af_offset], ldaf, &ipiv[1], &c__0, &s[1], info, &work[1], + &iwork[1], (ftnlen)1); + } + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + +/* Cap the error at 1.0. */ + + if (*n_err_bnds__ >= 2 && err_bnds_norm__[j + (err_bnds_norm_dim1 + << 1)] > 1.) { + err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] = 1.; + } + +/* Threshold the error (see LAWN). */ + + if (rcond_tmp__ < illrcond_thresh__) { + err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] = 1.; + err_bnds_norm__[j + err_bnds_norm_dim1] = 0.; + if (*info <= *n) { + *info = *n + j; + } + } else if (err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] < + err_lbnd__) { + err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] = err_lbnd__; + err_bnds_norm__[j + err_bnds_norm_dim1] = 1.; + } + +/* Save the condition number. */ + + if (*n_err_bnds__ >= 3) { + err_bnds_norm__[j + err_bnds_norm_dim1 * 3] = rcond_tmp__; + } + } + } + if (*n_err_bnds__ >= 1 && n_norms__ >= 2) { + +/* Compute componentwise condition number cond(A*diag(Y(:,J))) for */ +/* each right-hand side using the current solution as an estimate of */ +/* the true solution. If the componentwise error estimate is too */ +/* large, then the solution is a lousy estimate of truth and the */ +/* estimated RCOND may be too optimistic. To avoid misleading users, */ +/* the inverse condition number is set to 0.0 when the estimated */ +/* cwise error is at least CWISE_WRONG. */ + + cwise_wrong__ = sqrt(_starpu_dlamch_("Epsilon")); + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + if (err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] < + cwise_wrong__) { + rcond_tmp__ = _starpu_dla_syrcond__(uplo, n, &a[a_offset], lda, &af[ + af_offset], ldaf, &ipiv[1], &c__1, &x[j * x_dim1 + 1], + info, &work[1], &iwork[1], (ftnlen)1); + } else { + rcond_tmp__ = 0.; + } + +/* Cap the error at 1.0. */ + + if (*n_err_bnds__ >= 2 && err_bnds_comp__[j + (err_bnds_comp_dim1 + << 1)] > 1.) { + err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] = 1.; + } + +/* Threshold the error (see LAWN). */ + + if (rcond_tmp__ < illrcond_thresh__) { + err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] = 1.; + err_bnds_comp__[j + err_bnds_comp_dim1] = 0.; + if (params[3] == 1. && *info < *n + j) { + *info = *n + j; + } + } else if (err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] < + err_lbnd__) { + err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] = err_lbnd__; + err_bnds_comp__[j + err_bnds_comp_dim1] = 1.; + } + +/* Save the condition number. */ + + if (*n_err_bnds__ >= 3) { + err_bnds_comp__[j + err_bnds_comp_dim1 * 3] = rcond_tmp__; + } + } + } + + return 0; + +/* End of DSYRFSX */ + +} /* _starpu_dsyrfsx_ */ diff --git a/min-dgels/base/SRC/dsysv.c b/min-dgels/base/SRC/dsysv.c new file mode 100644 index 0000000..75fe313 --- /dev/null +++ b/min-dgels/base/SRC/dsysv.c @@ -0,0 +1,215 @@ +/* dsysv.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; + +/* Subroutine */ int _starpu_dsysv_(char *uplo, integer *n, integer *nrhs, doublereal + *a, integer *lda, integer *ipiv, doublereal *b, integer *ldb, + doublereal *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1; + + /* Local variables */ + integer nb; + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *); + extern /* Subroutine */ int _starpu_dsytrf_(char *, integer *, doublereal *, + integer *, integer *, doublereal *, integer *, integer *); + integer lwkopt; + logical lquery; + extern /* Subroutine */ int _starpu_dsytrs_(char *, integer *, integer *, + doublereal *, integer *, integer *, doublereal *, integer *, + integer *); + + +/* -- LAPACK driver routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DSYSV computes the solution to a real system of linear equations */ +/* A * X = B, */ +/* where A is an N-by-N symmetric matrix and X and B are N-by-NRHS */ +/* matrices. */ + +/* The diagonal pivoting method is used to factor A as */ +/* A = U * D * U**T, if UPLO = 'U', or */ +/* A = L * D * L**T, if UPLO = 'L', */ +/* where U (or L) is a product of permutation and unit upper (lower) */ +/* triangular matrices, and D is symmetric and block diagonal with */ +/* 1-by-1 and 2-by-2 diagonal blocks. The factored form of A is then */ +/* used to solve the system of equations A * X = B. */ + +/* Arguments */ +/* ========= */ + +/* UPLO (input) CHARACTER*1 */ +/* = 'U': Upper triangle of A is stored; */ +/* = 'L': Lower triangle of A is stored. */ + +/* N (input) INTEGER */ +/* The number of linear equations, i.e., the order of the */ +/* matrix A. N >= 0. */ + +/* NRHS (input) INTEGER */ +/* The number of right hand sides, i.e., the number of columns */ +/* of the matrix B. NRHS >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the symmetric matrix A. If UPLO = 'U', the leading */ +/* N-by-N upper triangular part of A contains the upper */ +/* triangular part of the matrix A, and the strictly lower */ +/* triangular part of A is not referenced. If UPLO = 'L', the */ +/* leading N-by-N lower triangular part of A contains the lower */ +/* triangular part of the matrix A, and the strictly upper */ +/* triangular part of A is not referenced. */ + +/* On exit, if INFO = 0, the block diagonal matrix D and the */ +/* multipliers used to obtain the factor U or L from the */ +/* factorization A = U*D*U**T or A = L*D*L**T as computed by */ +/* DSYTRF. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,N). */ + +/* IPIV (output) INTEGER array, dimension (N) */ +/* Details of the interchanges and the block structure of D, as */ +/* determined by DSYTRF. If IPIV(k) > 0, then rows and columns */ +/* k and IPIV(k) were interchanged, and D(k,k) is a 1-by-1 */ +/* diagonal block. If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, */ +/* then rows and columns k-1 and -IPIV(k) were interchanged and */ +/* D(k-1:k,k-1:k) is a 2-by-2 diagonal block. If UPLO = 'L' and */ +/* IPIV(k) = IPIV(k+1) < 0, then rows and columns k+1 and */ +/* -IPIV(k) were interchanged and D(k:k+1,k:k+1) is a 2-by-2 */ +/* diagonal block. */ + +/* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* On entry, the N-by-NRHS right hand side matrix B. */ +/* On exit, if INFO = 0, the N-by-NRHS solution matrix X. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the array B. LDB >= max(1,N). */ + +/* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ + +/* LWORK (input) INTEGER */ +/* The length of WORK. LWORK >= 1, and for best performance */ +/* LWORK >= max(1,N*NB), where NB is the optimal blocksize for */ +/* DSYTRF. */ + +/* If LWORK = -1, then a workspace query is assumed; the routine */ +/* only calculates the optimal size of the WORK array, returns */ +/* this value as the first entry of the WORK array, and no error */ +/* message related to LWORK is issued by XERBLA. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > 0: if INFO = i, D(i,i) is exactly zero. The factorization */ +/* has been completed, but the block diagonal matrix D is */ +/* exactly singular, so the solution could not be computed. */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + --work; + + /* Function Body */ + *info = 0; + lquery = *lwork == -1; + if (! _starpu_lsame_(uplo, "U") && ! _starpu_lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } else if (*ldb < max(1,*n)) { + *info = -8; + } else if (*lwork < 1 && ! lquery) { + *info = -10; + } + + if (*info == 0) { + if (*n == 0) { + lwkopt = 1; + } else { + nb = _starpu_ilaenv_(&c__1, "DSYTRF", uplo, n, &c_n1, &c_n1, &c_n1); + lwkopt = *n * nb; + } + work[1] = (doublereal) lwkopt; + } + + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DSYSV ", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Compute the factorization A = U*D*U' or A = L*D*L'. */ + + _starpu_dsytrf_(uplo, n, &a[a_offset], lda, &ipiv[1], &work[1], lwork, info); + if (*info == 0) { + +/* Solve the system A*X = B, overwriting B with X. */ + + _starpu_dsytrs_(uplo, n, nrhs, &a[a_offset], lda, &ipiv[1], &b[b_offset], ldb, + info); + + } + + work[1] = (doublereal) lwkopt; + + return 0; + +/* End of DSYSV */ + +} /* _starpu_dsysv_ */ diff --git a/min-dgels/base/SRC/dsysvx.c b/min-dgels/base/SRC/dsysvx.c new file mode 100644 index 0000000..ef1e47f --- /dev/null +++ b/min-dgels/base/SRC/dsysvx.c @@ -0,0 +1,370 @@ +/* dsysvx.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; + +/* Subroutine */ int _starpu_dsysvx_(char *fact, char *uplo, integer *n, integer * + nrhs, doublereal *a, integer *lda, doublereal *af, integer *ldaf, + integer *ipiv, doublereal *b, integer *ldb, doublereal *x, integer * + ldx, doublereal *rcond, doublereal *ferr, doublereal *berr, + doublereal *work, integer *lwork, integer *iwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, af_dim1, af_offset, b_dim1, b_offset, x_dim1, + x_offset, i__1, i__2; + + /* Local variables */ + integer nb; + extern logical _starpu_lsame_(char *, char *); + doublereal anorm; + extern doublereal _starpu_dlamch_(char *); + logical nofact; + extern /* Subroutine */ int _starpu_dlacpy_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *), + _starpu_xerbla_(char *, integer *); + extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *); + extern doublereal _starpu_dlansy_(char *, char *, integer *, doublereal *, + integer *, doublereal *); + extern /* Subroutine */ int _starpu_dsycon_(char *, integer *, doublereal *, + integer *, integer *, doublereal *, doublereal *, doublereal *, + integer *, integer *), _starpu_dsyrfs_(char *, integer *, integer + *, doublereal *, integer *, doublereal *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + doublereal *, doublereal *, integer *, integer *), + _starpu_dsytrf_(char *, integer *, doublereal *, integer *, integer *, + doublereal *, integer *, integer *); + integer lwkopt; + logical lquery; + extern /* Subroutine */ int _starpu_dsytrs_(char *, integer *, integer *, + doublereal *, integer *, integer *, doublereal *, integer *, + integer *); + + +/* -- LAPACK driver routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DSYSVX uses the diagonal pivoting factorization to compute the */ +/* solution to a real system of linear equations A * X = B, */ +/* where A is an N-by-N symmetric matrix and X and B are N-by-NRHS */ +/* matrices. */ + +/* Error bounds on the solution and a condition estimate are also */ +/* provided. */ + +/* Description */ +/* =========== */ + +/* The following steps are performed: */ + +/* 1. If FACT = 'N', the diagonal pivoting method is used to factor A. */ +/* The form of the factorization is */ +/* A = U * D * U**T, if UPLO = 'U', or */ +/* A = L * D * L**T, if UPLO = 'L', */ +/* where U (or L) is a product of permutation and unit upper (lower) */ +/* triangular matrices, and D is symmetric and block diagonal with */ +/* 1-by-1 and 2-by-2 diagonal blocks. */ + +/* 2. If some D(i,i)=0, so that D is exactly singular, then the routine */ +/* returns with INFO = i. Otherwise, the factored form of A is used */ +/* to estimate the condition number of the matrix A. If the */ +/* reciprocal of the condition number is less than machine precision, */ +/* INFO = N+1 is returned as a warning, but the routine still goes on */ +/* to solve for X and compute error bounds as described below. */ + +/* 3. The system of equations is solved for X using the factored form */ +/* of A. */ + +/* 4. Iterative refinement is applied to improve the computed solution */ +/* matrix and calculate error bounds and backward error estimates */ +/* for it. */ + +/* Arguments */ +/* ========= */ + +/* FACT (input) CHARACTER*1 */ +/* Specifies whether or not the factored form of A has been */ +/* supplied on entry. */ +/* = 'F': On entry, AF and IPIV contain the factored form of */ +/* A. AF and IPIV will not be modified. */ +/* = 'N': The matrix A will be copied to AF and factored. */ + +/* UPLO (input) CHARACTER*1 */ +/* = 'U': Upper triangle of A is stored; */ +/* = 'L': Lower triangle of A is stored. */ + +/* N (input) INTEGER */ +/* The number of linear equations, i.e., the order of the */ +/* matrix A. N >= 0. */ + +/* NRHS (input) INTEGER */ +/* The number of right hand sides, i.e., the number of columns */ +/* of the matrices B and X. NRHS >= 0. */ + +/* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ +/* The symmetric matrix A. If UPLO = 'U', the leading N-by-N */ +/* upper triangular part of A contains the upper triangular part */ +/* of the matrix A, and the strictly lower triangular part of A */ +/* is not referenced. If UPLO = 'L', the leading N-by-N lower */ +/* triangular part of A contains the lower triangular part of */ +/* the matrix A, and the strictly upper triangular part of A is */ +/* not referenced. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,N). */ + +/* AF (input or output) DOUBLE PRECISION array, dimension (LDAF,N) */ +/* If FACT = 'F', then AF is an input argument and on entry */ +/* contains the block diagonal matrix D and the multipliers used */ +/* to obtain the factor U or L from the factorization */ +/* A = U*D*U**T or A = L*D*L**T as computed by DSYTRF. */ + +/* If FACT = 'N', then AF is an output argument and on exit */ +/* returns the block diagonal matrix D and the multipliers used */ +/* to obtain the factor U or L from the factorization */ +/* A = U*D*U**T or A = L*D*L**T. */ + +/* LDAF (input) INTEGER */ +/* The leading dimension of the array AF. LDAF >= max(1,N). */ + +/* IPIV (input or output) INTEGER array, dimension (N) */ +/* If FACT = 'F', then IPIV is an input argument and on entry */ +/* contains details of the interchanges and the block structure */ +/* of D, as determined by DSYTRF. */ +/* If IPIV(k) > 0, then rows and columns k and IPIV(k) were */ +/* interchanged and D(k,k) is a 1-by-1 diagonal block. */ +/* If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and */ +/* columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) */ +/* is a 2-by-2 diagonal block. If UPLO = 'L' and IPIV(k) = */ +/* IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were */ +/* interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. */ + +/* If FACT = 'N', then IPIV is an output argument and on exit */ +/* contains details of the interchanges and the block structure */ +/* of D, as determined by DSYTRF. */ + +/* B (input) DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* The N-by-NRHS right hand side matrix B. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the array B. LDB >= max(1,N). */ + +/* X (output) DOUBLE PRECISION array, dimension (LDX,NRHS) */ +/* If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X. */ + +/* LDX (input) INTEGER */ +/* The leading dimension of the array X. LDX >= max(1,N). */ + +/* RCOND (output) DOUBLE PRECISION */ +/* The estimate of the reciprocal condition number of the matrix */ +/* A. If RCOND is less than the machine precision (in */ +/* particular, if RCOND = 0), the matrix is singular to working */ +/* precision. This condition is indicated by a return code of */ +/* INFO > 0. */ + +/* FERR (output) DOUBLE PRECISION array, dimension (NRHS) */ +/* The estimated forward error bound for each solution vector */ +/* X(j) (the j-th column of the solution matrix X). */ +/* If XTRUE is the true solution corresponding to X(j), FERR(j) */ +/* is an estimated upper bound for the magnitude of the largest */ +/* element in (X(j) - XTRUE) divided by the magnitude of the */ +/* largest element in X(j). The estimate is as reliable as */ +/* the estimate for RCOND, and is almost always a slight */ +/* overestimate of the true error. */ + +/* BERR (output) DOUBLE PRECISION array, dimension (NRHS) */ +/* The componentwise relative backward error of each solution */ +/* vector X(j) (i.e., the smallest relative change in */ +/* any element of A or B that makes X(j) an exact solution). */ + +/* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ + +/* LWORK (input) INTEGER */ +/* The length of WORK. LWORK >= max(1,3*N), and for best */ +/* performance, when FACT = 'N', LWORK >= max(1,3*N,N*NB), where */ +/* NB is the optimal blocksize for DSYTRF. */ + +/* If LWORK = -1, then a workspace query is assumed; the routine */ +/* only calculates the optimal size of the WORK array, returns */ +/* this value as the first entry of the WORK array, and no error */ +/* message related to LWORK is issued by XERBLA. */ + +/* IWORK (workspace) INTEGER array, dimension (N) */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > 0: if INFO = i, and i is */ +/* <= N: D(i,i) is exactly zero. The factorization */ +/* has been completed but the factor D is exactly */ +/* singular, so the solution and error bounds could */ +/* not be computed. RCOND = 0 is returned. */ +/* = N+1: D is nonsingular, but RCOND is less than machine */ +/* precision, meaning that the matrix is singular */ +/* to working precision. Nevertheless, the */ +/* solution and error bounds are computed because */ +/* there are a number of situations where the */ +/* computed solution can be more accurate than the */ +/* value of RCOND would suggest. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + af_dim1 = *ldaf; + af_offset = 1 + af_dim1; + af -= af_offset; + --ipiv; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + x_dim1 = *ldx; + x_offset = 1 + x_dim1; + x -= x_offset; + --ferr; + --berr; + --work; + --iwork; + + /* Function Body */ + *info = 0; + nofact = _starpu_lsame_(fact, "N"); + lquery = *lwork == -1; + if (! nofact && ! _starpu_lsame_(fact, "F")) { + *info = -1; + } else if (! _starpu_lsame_(uplo, "U") && ! _starpu_lsame_(uplo, + "L")) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*nrhs < 0) { + *info = -4; + } else if (*lda < max(1,*n)) { + *info = -6; + } else if (*ldaf < max(1,*n)) { + *info = -8; + } else if (*ldb < max(1,*n)) { + *info = -11; + } else if (*ldx < max(1,*n)) { + *info = -13; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = 1, i__2 = *n * 3; + if (*lwork < max(i__1,i__2) && ! lquery) { + *info = -18; + } + } + + if (*info == 0) { +/* Computing MAX */ + i__1 = 1, i__2 = *n * 3; + lwkopt = max(i__1,i__2); + if (nofact) { + nb = _starpu_ilaenv_(&c__1, "DSYTRF", uplo, n, &c_n1, &c_n1, &c_n1); +/* Computing MAX */ + i__1 = lwkopt, i__2 = *n * nb; + lwkopt = max(i__1,i__2); + } + work[1] = (doublereal) lwkopt; + } + + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DSYSVX", &i__1); + return 0; + } else if (lquery) { + return 0; + } + + if (nofact) { + +/* Compute the factorization A = U*D*U' or A = L*D*L'. */ + + _starpu_dlacpy_(uplo, n, n, &a[a_offset], lda, &af[af_offset], ldaf); + _starpu_dsytrf_(uplo, n, &af[af_offset], ldaf, &ipiv[1], &work[1], lwork, + info); + +/* Return if INFO is non-zero. */ + + if (*info > 0) { + *rcond = 0.; + return 0; + } + } + +/* Compute the norm of the matrix A. */ + + anorm = _starpu_dlansy_("I", uplo, n, &a[a_offset], lda, &work[1]); + +/* Compute the reciprocal of the condition number of A. */ + + _starpu_dsycon_(uplo, n, &af[af_offset], ldaf, &ipiv[1], &anorm, rcond, &work[1], + &iwork[1], info); + +/* Compute the solution vectors X. */ + + _starpu_dlacpy_("Full", n, nrhs, &b[b_offset], ldb, &x[x_offset], ldx); + _starpu_dsytrs_(uplo, n, nrhs, &af[af_offset], ldaf, &ipiv[1], &x[x_offset], ldx, + info); + +/* Use iterative refinement to improve the computed solutions and */ +/* compute error bounds and backward error estimates for them. */ + + _starpu_dsyrfs_(uplo, n, nrhs, &a[a_offset], lda, &af[af_offset], ldaf, &ipiv[1], + &b[b_offset], ldb, &x[x_offset], ldx, &ferr[1], &berr[1], &work[1] +, &iwork[1], info); + +/* Set INFO = N+1 if the matrix is singular to working precision. */ + + if (*rcond < _starpu_dlamch_("Epsilon")) { + *info = *n + 1; + } + + work[1] = (doublereal) lwkopt; + + return 0; + +/* End of DSYSVX */ + +} /* _starpu_dsysvx_ */ diff --git a/min-dgels/base/SRC/dsysvxx.c b/min-dgels/base/SRC/dsysvxx.c new file mode 100644 index 0000000..20e0f41 --- /dev/null +++ b/min-dgels/base/SRC/dsysvxx.c @@ -0,0 +1,631 @@ +/* dsysvxx.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dsysvxx_(char *fact, char *uplo, integer *n, integer * + nrhs, doublereal *a, integer *lda, doublereal *af, integer *ldaf, + integer *ipiv, char *equed, doublereal *s, doublereal *b, integer * + ldb, doublereal *x, integer *ldx, doublereal *rcond, doublereal * + rpvgrw, doublereal *berr, integer *n_err_bnds__, doublereal * + err_bnds_norm__, doublereal *err_bnds_comp__, integer *nparams, + doublereal *params, doublereal *work, integer *iwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, af_dim1, af_offset, b_dim1, b_offset, x_dim1, + x_offset, err_bnds_norm_dim1, err_bnds_norm_offset, + err_bnds_comp_dim1, err_bnds_comp_offset, i__1; + doublereal d__1, d__2; + + /* Local variables */ + integer j; + doublereal amax, smin, smax; + extern doublereal _starpu_dla_syrpvgrw__(char *, integer *, integer *, doublereal + *, integer *, doublereal *, integer *, integer *, doublereal *, + ftnlen); + extern logical _starpu_lsame_(char *, char *); + doublereal scond; + logical equil, rcequ; + extern doublereal _starpu_dlamch_(char *); + logical nofact; + extern /* Subroutine */ int _starpu_dlacpy_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *), + _starpu_xerbla_(char *, integer *); + doublereal bignum; + integer infequ; + extern /* Subroutine */ int _starpu_dlaqsy_(char *, integer *, doublereal *, + integer *, doublereal *, doublereal *, doublereal *, char *); + doublereal smlnum; + extern /* Subroutine */ int _starpu_dsytrf_(char *, integer *, doublereal *, + integer *, integer *, doublereal *, integer *, integer *), + _starpu_dlascl2_(integer *, integer *, doublereal *, doublereal *, + integer *), _starpu_dsytrs_(char *, integer *, integer *, doublereal *, + integer *, integer *, doublereal *, integer *, integer *), + _starpu_dsyequb_(char *, integer *, doublereal *, integer *, doublereal * +, doublereal *, doublereal *, doublereal *, integer *), + _starpu_dsyrfsx_(char *, char *, integer *, integer *, doublereal *, + integer *, doublereal *, integer *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + doublereal *, doublereal *, integer *, integer *); + + +/* -- LAPACK routine (version 3.2.1) -- */ +/* -- Contributed by James Demmel, Deaglan Halligan, Yozo Hida and -- */ +/* -- Jason Riedy of Univ. of California Berkeley. -- */ +/* -- April 2009 -- */ + +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley and NAG Ltd. -- */ + +/* .. */ +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DSYSVXX uses the diagonal pivoting factorization to compute the */ +/* solution to a double precision system of linear equations A * X = B, where A */ +/* is an N-by-N symmetric matrix and X and B are N-by-NRHS matrices. */ + +/* If requested, both normwise and maximum componentwise error bounds */ +/* are returned. DSYSVXX will return a solution with a tiny */ +/* guaranteed error (O(eps) where eps is the working machine */ +/* precision) unless the matrix is very ill-conditioned, in which */ +/* case a warning is returned. Relevant condition numbers also are */ +/* calculated and returned. */ + +/* DSYSVXX accepts user-provided factorizations and equilibration */ +/* factors; see the definitions of the FACT and EQUED options. */ +/* Solving with refinement and using a factorization from a previous */ +/* DSYSVXX call will also produce a solution with either O(eps) */ +/* errors or warnings, but we cannot make that claim for general */ +/* user-provided factorizations and equilibration factors if they */ +/* differ from what DSYSVXX would itself produce. */ + +/* Description */ +/* =========== */ + +/* The following steps are performed: */ + +/* 1. If FACT = 'E', double precision scaling factors are computed to equilibrate */ +/* the system: */ + +/* diag(S)*A*diag(S) *inv(diag(S))*X = diag(S)*B */ + +/* Whether or not the system will be equilibrated depends on the */ +/* scaling of the matrix A, but if equilibration is used, A is */ +/* overwritten by diag(S)*A*diag(S) and B by diag(S)*B. */ + +/* 2. If FACT = 'N' or 'E', the LU decomposition is used to factor */ +/* the matrix A (after equilibration if FACT = 'E') as */ + +/* A = U * D * U**T, if UPLO = 'U', or */ +/* A = L * D * L**T, if UPLO = 'L', */ + +/* where U (or L) is a product of permutation and unit upper (lower) */ +/* triangular matrices, and D is symmetric and block diagonal with */ +/* 1-by-1 and 2-by-2 diagonal blocks. */ + +/* 3. If some D(i,i)=0, so that D is exactly singular, then the */ +/* routine returns with INFO = i. Otherwise, the factored form of A */ +/* is used to estimate the condition number of the matrix A (see */ +/* argument RCOND). If the reciprocal of the condition number is */ +/* less than machine precision, the routine still goes on to solve */ +/* for X and compute error bounds as described below. */ + +/* 4. The system of equations is solved for X using the factored form */ +/* of A. */ + +/* 5. By default (unless PARAMS(LA_LINRX_ITREF_I) is set to zero), */ +/* the routine will use iterative refinement to try to get a small */ +/* error and error bounds. Refinement calculates the residual to at */ +/* least twice the working precision. */ + +/* 6. If equilibration was used, the matrix X is premultiplied by */ +/* diag(R) so that it solves the original system before */ +/* equilibration. */ + +/* Arguments */ +/* ========= */ + +/* Some optional parameters are bundled in the PARAMS array. These */ +/* settings determine how refinement is performed, but often the */ +/* defaults are acceptable. If the defaults are acceptable, users */ +/* can pass NPARAMS = 0 which prevents the source code from accessing */ +/* the PARAMS argument. */ + +/* FACT (input) CHARACTER*1 */ +/* Specifies whether or not the factored form of the matrix A is */ +/* supplied on entry, and if not, whether the matrix A should be */ +/* equilibrated before it is factored. */ +/* = 'F': On entry, AF and IPIV contain the factored form of A. */ +/* If EQUED is not 'N', the matrix A has been */ +/* equilibrated with scaling factors given by S. */ +/* A, AF, and IPIV are not modified. */ +/* = 'N': The matrix A will be copied to AF and factored. */ +/* = 'E': The matrix A will be equilibrated if necessary, then */ +/* copied to AF and factored. */ + +/* UPLO (input) CHARACTER*1 */ +/* = 'U': Upper triangle of A is stored; */ +/* = 'L': Lower triangle of A is stored. */ + +/* N (input) INTEGER */ +/* The number of linear equations, i.e., the order of the */ +/* matrix A. N >= 0. */ + +/* NRHS (input) INTEGER */ +/* The number of right hand sides, i.e., the number of columns */ +/* of the matrices B and X. NRHS >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* The symmetric matrix A. If UPLO = 'U', the leading N-by-N */ +/* upper triangular part of A contains the upper triangular */ +/* part of the matrix A, and the strictly lower triangular */ +/* part of A is not referenced. If UPLO = 'L', the leading */ +/* N-by-N lower triangular part of A contains the lower */ +/* triangular part of the matrix A, and the strictly upper */ +/* triangular part of A is not referenced. */ + +/* On exit, if FACT = 'E' and EQUED = 'Y', A is overwritten by */ +/* diag(S)*A*diag(S). */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,N). */ + +/* AF (input or output) DOUBLE PRECISION array, dimension (LDAF,N) */ +/* If FACT = 'F', then AF is an input argument and on entry */ +/* contains the block diagonal matrix D and the multipliers */ +/* used to obtain the factor U or L from the factorization A = */ +/* U*D*U**T or A = L*D*L**T as computed by DSYTRF. */ + +/* If FACT = 'N', then AF is an output argument and on exit */ +/* returns the block diagonal matrix D and the multipliers */ +/* used to obtain the factor U or L from the factorization A = */ +/* U*D*U**T or A = L*D*L**T. */ + +/* LDAF (input) INTEGER */ +/* The leading dimension of the array AF. LDAF >= max(1,N). */ + +/* IPIV (input or output) INTEGER array, dimension (N) */ +/* If FACT = 'F', then IPIV is an input argument and on entry */ +/* contains details of the interchanges and the block */ +/* structure of D, as determined by DSYTRF. If IPIV(k) > 0, */ +/* then rows and columns k and IPIV(k) were interchanged and */ +/* D(k,k) is a 1-by-1 diagonal block. If UPLO = 'U' and */ +/* IPIV(k) = IPIV(k-1) < 0, then rows and columns k-1 and */ +/* -IPIV(k) were interchanged and D(k-1:k,k-1:k) is a 2-by-2 */ +/* diagonal block. If UPLO = 'L' and IPIV(k) = IPIV(k+1) < 0, */ +/* then rows and columns k+1 and -IPIV(k) were interchanged */ +/* and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. */ + +/* If FACT = 'N', then IPIV is an output argument and on exit */ +/* contains details of the interchanges and the block */ +/* structure of D, as determined by DSYTRF. */ + +/* EQUED (input or output) CHARACTER*1 */ +/* Specifies the form of equilibration that was done. */ +/* = 'N': No equilibration (always true if FACT = 'N'). */ +/* = 'Y': Both row and column equilibration, i.e., A has been */ +/* replaced by diag(S) * A * diag(S). */ +/* EQUED is an input argument if FACT = 'F'; otherwise, it is an */ +/* output argument. */ + +/* S (input or output) DOUBLE PRECISION array, dimension (N) */ +/* The scale factors for A. If EQUED = 'Y', A is multiplied on */ +/* the left and right by diag(S). S is an input argument if FACT = */ +/* 'F'; otherwise, S is an output argument. If FACT = 'F' and EQUED */ +/* = 'Y', each element of S must be positive. If S is output, each */ +/* element of S is a power of the radix. If S is input, each element */ +/* of S should be a power of the radix to ensure a reliable solution */ +/* and error estimates. Scaling by powers of the radix does not cause */ +/* rounding errors unless the result underflows or overflows. */ +/* Rounding errors during scaling lead to refining with a matrix that */ +/* is not equivalent to the input matrix, producing error estimates */ +/* that may not be reliable. */ + +/* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* On entry, the N-by-NRHS right hand side matrix B. */ +/* On exit, */ +/* if EQUED = 'N', B is not modified; */ +/* if EQUED = 'Y', B is overwritten by diag(S)*B; */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the array B. LDB >= max(1,N). */ + +/* X (output) DOUBLE PRECISION array, dimension (LDX,NRHS) */ +/* If INFO = 0, the N-by-NRHS solution matrix X to the original */ +/* system of equations. Note that A and B are modified on exit if */ +/* EQUED .ne. 'N', and the solution to the equilibrated system is */ +/* inv(diag(S))*X. */ + +/* LDX (input) INTEGER */ +/* The leading dimension of the array X. LDX >= max(1,N). */ + +/* RCOND (output) DOUBLE PRECISION */ +/* Reciprocal scaled condition number. This is an estimate of the */ +/* reciprocal Skeel condition number of the matrix A after */ +/* equilibration (if done). If this is less than the machine */ +/* precision (in particular, if it is zero), the matrix is singular */ +/* to working precision. Note that the error may still be small even */ +/* if this number is very small and the matrix appears ill- */ +/* conditioned. */ + +/* RPVGRW (output) DOUBLE PRECISION */ +/* Reciprocal pivot growth. On exit, this contains the reciprocal */ +/* pivot growth factor norm(A)/norm(U). The "max absolute element" */ +/* norm is used. If this is much less than 1, then the stability of */ +/* the LU factorization of the (equilibrated) matrix A could be poor. */ +/* This also means that the solution X, estimated condition numbers, */ +/* and error bounds could be unreliable. If factorization fails with */ +/* 0 0 and <= N: U(INFO,INFO) is exactly zero. The factorization */ +/* has been completed, but the factor U is exactly singular, so */ +/* the solution and error bounds could not be computed. RCOND = 0 */ +/* is returned. */ +/* = N+J: The solution corresponding to the Jth right-hand side is */ +/* not guaranteed. The solutions corresponding to other right- */ +/* hand sides K with K > J may not be guaranteed as well, but */ +/* only the first such right-hand side is reported. If a small */ +/* componentwise error is not requested (PARAMS(3) = 0.0) then */ +/* the Jth right-hand side is the first with a normwise error */ +/* bound that is not guaranteed (the smallest J such */ +/* that ERR_BNDS_NORM(J,1) = 0.0). By default (PARAMS(3) = 1.0) */ +/* the Jth right-hand side is the first with either a normwise or */ +/* componentwise error bound that is not guaranteed (the smallest */ +/* J such that either ERR_BNDS_NORM(J,1) = 0.0 or */ +/* ERR_BNDS_COMP(J,1) = 0.0). See the definition of */ +/* ERR_BNDS_NORM(:,1) and ERR_BNDS_COMP(:,1). To get information */ +/* about all of the right-hand sides check ERR_BNDS_NORM or */ +/* ERR_BNDS_COMP. */ + +/* ================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + err_bnds_comp_dim1 = *nrhs; + err_bnds_comp_offset = 1 + err_bnds_comp_dim1; + err_bnds_comp__ -= err_bnds_comp_offset; + err_bnds_norm_dim1 = *nrhs; + err_bnds_norm_offset = 1 + err_bnds_norm_dim1; + err_bnds_norm__ -= err_bnds_norm_offset; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + af_dim1 = *ldaf; + af_offset = 1 + af_dim1; + af -= af_offset; + --ipiv; + --s; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + x_dim1 = *ldx; + x_offset = 1 + x_dim1; + x -= x_offset; + --berr; + --params; + --work; + --iwork; + + /* Function Body */ + *info = 0; + nofact = _starpu_lsame_(fact, "N"); + equil = _starpu_lsame_(fact, "E"); + smlnum = _starpu_dlamch_("Safe minimum"); + bignum = 1. / smlnum; + if (nofact || equil) { + *(unsigned char *)equed = 'N'; + rcequ = FALSE_; + } else { + rcequ = _starpu_lsame_(equed, "Y"); + } + +/* Default is failure. If an input parameter is wrong or */ +/* factorization fails, make everything look horrible. Only the */ +/* pivot growth is set here, the rest is initialized in DSYRFSX. */ + + *rpvgrw = 0.; + +/* Test the input parameters. PARAMS is not tested until DSYRFSX. */ + + if (! nofact && ! equil && ! _starpu_lsame_(fact, "F")) { + *info = -1; + } else if (! _starpu_lsame_(uplo, "U") && ! _starpu_lsame_(uplo, + "L")) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*nrhs < 0) { + *info = -4; + } else if (*lda < max(1,*n)) { + *info = -6; + } else if (*ldaf < max(1,*n)) { + *info = -8; + } else if (_starpu_lsame_(fact, "F") && ! (rcequ || _starpu_lsame_( + equed, "N"))) { + *info = -9; + } else { + if (rcequ) { + smin = bignum; + smax = 0.; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + d__1 = smin, d__2 = s[j]; + smin = min(d__1,d__2); +/* Computing MAX */ + d__1 = smax, d__2 = s[j]; + smax = max(d__1,d__2); +/* L10: */ + } + if (smin <= 0.) { + *info = -10; + } else if (*n > 0) { + scond = max(smin,smlnum) / min(smax,bignum); + } else { + scond = 1.; + } + } + if (*info == 0) { + if (*ldb < max(1,*n)) { + *info = -12; + } else if (*ldx < max(1,*n)) { + *info = -14; + } + } + } + + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DSYSVXX", &i__1); + return 0; + } + + if (equil) { + +/* Compute row and column scalings to equilibrate the matrix A. */ + + _starpu_dsyequb_(uplo, n, &a[a_offset], lda, &s[1], &scond, &amax, &work[1], & + infequ); + if (infequ == 0) { + +/* Equilibrate the matrix. */ + + _starpu_dlaqsy_(uplo, n, &a[a_offset], lda, &s[1], &scond, &amax, equed); + rcequ = _starpu_lsame_(equed, "Y"); + } + } + +/* Scale the right-hand side. */ + + if (rcequ) { + _starpu_dlascl2_(n, nrhs, &s[1], &b[b_offset], ldb); + } + + if (nofact || equil) { + +/* Compute the LU factorization of A. */ + + _starpu_dlacpy_(uplo, n, n, &a[a_offset], lda, &af[af_offset], ldaf); + i__1 = max(1,*n) * 5; + _starpu_dsytrf_(uplo, n, &af[af_offset], ldaf, &ipiv[1], &work[1], &i__1, + info); + +/* Return if INFO is non-zero. */ + + if (*info > 0) { + +/* Pivot in column INFO is exactly 0 */ +/* Compute the reciprocal pivot growth factor of the */ +/* leading rank-deficient INFO columns of A. */ + + if (*n > 0) { + *rpvgrw = _starpu_dla_syrpvgrw__(uplo, n, info, &a[a_offset], lda, & + af[af_offset], ldaf, &ipiv[1], &work[1], (ftnlen)1); + } + return 0; + } + } + +/* Compute the reciprocal pivot growth factor RPVGRW. */ + + if (*n > 0) { + *rpvgrw = _starpu_dla_syrpvgrw__(uplo, n, info, &a[a_offset], lda, &af[ + af_offset], ldaf, &ipiv[1], &work[1], (ftnlen)1); + } + +/* Compute the solution matrix X. */ + + _starpu_dlacpy_("Full", n, nrhs, &b[b_offset], ldb, &x[x_offset], ldx); + _starpu_dsytrs_(uplo, n, nrhs, &af[af_offset], ldaf, &ipiv[1], &x[x_offset], ldx, + info); + +/* Use iterative refinement to improve the computed solution and */ +/* compute error bounds and backward error estimates for it. */ + + _starpu_dsyrfsx_(uplo, equed, n, nrhs, &a[a_offset], lda, &af[af_offset], ldaf, & + ipiv[1], &s[1], &b[b_offset], ldb, &x[x_offset], ldx, rcond, & + berr[1], n_err_bnds__, &err_bnds_norm__[err_bnds_norm_offset], & + err_bnds_comp__[err_bnds_comp_offset], nparams, ¶ms[1], &work[ + 1], &iwork[1], info); + +/* Scale solutions. */ + + if (rcequ) { + _starpu_dlascl2_(n, nrhs, &s[1], &x[x_offset], ldx); + } + + return 0; + +/* End of DSYSVXX */ + +} /* _starpu_dsysvxx_ */ diff --git a/min-dgels/base/SRC/dsytd2.c b/min-dgels/base/SRC/dsytd2.c new file mode 100644 index 0000000..b7aec6a --- /dev/null +++ b/min-dgels/base/SRC/dsytd2.c @@ -0,0 +1,306 @@ +/* dsytd2.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static doublereal c_b8 = 0.; +static doublereal c_b14 = -1.; + +/* Subroutine */ int _starpu_dsytd2_(char *uplo, integer *n, doublereal *a, integer * + lda, doublereal *d__, doublereal *e, doublereal *tau, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + integer i__; + extern doublereal _starpu_ddot_(integer *, doublereal *, integer *, doublereal *, + integer *); + doublereal taui; + extern /* Subroutine */ int _starpu_dsyr2_(char *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *); + doublereal alpha; + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int _starpu_daxpy_(integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *); + logical upper; + extern /* Subroutine */ int _starpu_dsymv_(char *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *), _starpu_dlarfg_(integer *, doublereal *, + doublereal *, integer *, doublereal *), _starpu_xerbla_(char *, integer * +); + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DSYTD2 reduces a real symmetric matrix A to symmetric tridiagonal */ +/* form T by an orthogonal similarity transformation: Q' * A * Q = T. */ + +/* Arguments */ +/* ========= */ + +/* UPLO (input) CHARACTER*1 */ +/* Specifies whether the upper or lower triangular part of the */ +/* symmetric matrix A is stored: */ +/* = 'U': Upper triangular */ +/* = 'L': Lower triangular */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the symmetric matrix A. If UPLO = 'U', the leading */ +/* n-by-n upper triangular part of A contains the upper */ +/* triangular part of the matrix A, and the strictly lower */ +/* triangular part of A is not referenced. If UPLO = 'L', the */ +/* leading n-by-n lower triangular part of A contains the lower */ +/* triangular part of the matrix A, and the strictly upper */ +/* triangular part of A is not referenced. */ +/* On exit, if UPLO = 'U', the diagonal and first superdiagonal */ +/* of A are overwritten by the corresponding elements of the */ +/* tridiagonal matrix T, and the elements above the first */ +/* superdiagonal, with the array TAU, represent the orthogonal */ +/* matrix Q as a product of elementary reflectors; if UPLO */ +/* = 'L', the diagonal and first subdiagonal of A are over- */ +/* written by the corresponding elements of the tridiagonal */ +/* matrix T, and the elements below the first subdiagonal, with */ +/* the array TAU, represent the orthogonal matrix Q as a product */ +/* of elementary reflectors. See Further Details. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,N). */ + +/* D (output) DOUBLE PRECISION array, dimension (N) */ +/* The diagonal elements of the tridiagonal matrix T: */ +/* D(i) = A(i,i). */ + +/* E (output) DOUBLE PRECISION array, dimension (N-1) */ +/* The off-diagonal elements of the tridiagonal matrix T: */ +/* E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. */ + +/* TAU (output) DOUBLE PRECISION array, dimension (N-1) */ +/* The scalar factors of the elementary reflectors (see Further */ +/* Details). */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value. */ + +/* Further Details */ +/* =============== */ + +/* If UPLO = 'U', the matrix Q is represented as a product of elementary */ +/* reflectors */ + +/* Q = H(n-1) . . . H(2) H(1). */ + +/* Each H(i) has the form */ + +/* H(i) = I - tau * v * v' */ + +/* where tau is a real scalar, and v is a real vector with */ +/* v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in */ +/* A(1:i-1,i+1), and tau in TAU(i). */ + +/* If UPLO = 'L', the matrix Q is represented as a product of elementary */ +/* reflectors */ + +/* Q = H(1) H(2) . . . H(n-1). */ + +/* Each H(i) has the form */ + +/* H(i) = I - tau * v * v' */ + +/* where tau is a real scalar, and v is a real vector with */ +/* v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i), */ +/* and tau in TAU(i). */ + +/* The contents of A on exit are illustrated by the following examples */ +/* with n = 5: */ + +/* if UPLO = 'U': if UPLO = 'L': */ + +/* ( d e v2 v3 v4 ) ( d ) */ +/* ( d e v3 v4 ) ( e d ) */ +/* ( d e v4 ) ( v1 e d ) */ +/* ( d e ) ( v1 v2 e d ) */ +/* ( d ) ( v1 v2 v3 e d ) */ + +/* where d and e denote diagonal and off-diagonal elements of T, and vi */ +/* denotes an element of the vector defining H(i). */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --d__; + --e; + --tau; + + /* Function Body */ + *info = 0; + upper = _starpu_lsame_(uplo, "U"); + if (! upper && ! _starpu_lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DSYTD2", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n <= 0) { + return 0; + } + + if (upper) { + +/* Reduce the upper triangle of A */ + + for (i__ = *n - 1; i__ >= 1; --i__) { + +/* Generate elementary reflector H(i) = I - tau * v * v' */ +/* to annihilate A(1:i-1,i+1) */ + + _starpu_dlarfg_(&i__, &a[i__ + (i__ + 1) * a_dim1], &a[(i__ + 1) * a_dim1 + + 1], &c__1, &taui); + e[i__] = a[i__ + (i__ + 1) * a_dim1]; + + if (taui != 0.) { + +/* Apply H(i) from both sides to A(1:i,1:i) */ + + a[i__ + (i__ + 1) * a_dim1] = 1.; + +/* Compute x := tau * A * v storing x in TAU(1:i) */ + + _starpu_dsymv_(uplo, &i__, &taui, &a[a_offset], lda, &a[(i__ + 1) * + a_dim1 + 1], &c__1, &c_b8, &tau[1], &c__1); + +/* Compute w := x - 1/2 * tau * (x'*v) * v */ + + alpha = taui * -.5 * _starpu_ddot_(&i__, &tau[1], &c__1, &a[(i__ + 1) + * a_dim1 + 1], &c__1); + _starpu_daxpy_(&i__, &alpha, &a[(i__ + 1) * a_dim1 + 1], &c__1, &tau[ + 1], &c__1); + +/* Apply the transformation as a rank-2 update: */ +/* A := A - v * w' - w * v' */ + + _starpu_dsyr2_(uplo, &i__, &c_b14, &a[(i__ + 1) * a_dim1 + 1], &c__1, + &tau[1], &c__1, &a[a_offset], lda); + + a[i__ + (i__ + 1) * a_dim1] = e[i__]; + } + d__[i__ + 1] = a[i__ + 1 + (i__ + 1) * a_dim1]; + tau[i__] = taui; +/* L10: */ + } + d__[1] = a[a_dim1 + 1]; + } else { + +/* Reduce the lower triangle of A */ + + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Generate elementary reflector H(i) = I - tau * v * v' */ +/* to annihilate A(i+2:n,i) */ + + i__2 = *n - i__; +/* Computing MIN */ + i__3 = i__ + 2; + _starpu_dlarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3, *n)+ i__ * + a_dim1], &c__1, &taui); + e[i__] = a[i__ + 1 + i__ * a_dim1]; + + if (taui != 0.) { + +/* Apply H(i) from both sides to A(i+1:n,i+1:n) */ + + a[i__ + 1 + i__ * a_dim1] = 1.; + +/* Compute x := tau * A * v storing y in TAU(i:n-1) */ + + i__2 = *n - i__; + _starpu_dsymv_(uplo, &i__2, &taui, &a[i__ + 1 + (i__ + 1) * a_dim1], + lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b8, &tau[ + i__], &c__1); + +/* Compute w := x - 1/2 * tau * (x'*v) * v */ + + i__2 = *n - i__; + alpha = taui * -.5 * _starpu_ddot_(&i__2, &tau[i__], &c__1, &a[i__ + + 1 + i__ * a_dim1], &c__1); + i__2 = *n - i__; + _starpu_daxpy_(&i__2, &alpha, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[ + i__], &c__1); + +/* Apply the transformation as a rank-2 update: */ +/* A := A - v * w' - w * v' */ + + i__2 = *n - i__; + _starpu_dsyr2_(uplo, &i__2, &c_b14, &a[i__ + 1 + i__ * a_dim1], &c__1, + &tau[i__], &c__1, &a[i__ + 1 + (i__ + 1) * a_dim1], + lda); + + a[i__ + 1 + i__ * a_dim1] = e[i__]; + } + d__[i__] = a[i__ + i__ * a_dim1]; + tau[i__] = taui; +/* L20: */ + } + d__[*n] = a[*n + *n * a_dim1]; + } + + return 0; + +/* End of DSYTD2 */ + +} /* _starpu_dsytd2_ */ diff --git a/min-dgels/base/SRC/dsytf2.c b/min-dgels/base/SRC/dsytf2.c new file mode 100644 index 0000000..215cd5c --- /dev/null +++ b/min-dgels/base/SRC/dsytf2.c @@ -0,0 +1,608 @@ +/* dsytf2.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +/* Subroutine */ int _starpu_dsytf2_(char *uplo, integer *n, doublereal *a, integer * + lda, integer *ipiv, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + doublereal d__1, d__2, d__3; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + integer i__, j, k; + doublereal t, r1, d11, d12, d21, d22; + integer kk, kp; + doublereal wk, wkm1, wkp1; + integer imax, jmax; + extern /* Subroutine */ int _starpu_dsyr_(char *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *); + doublereal alpha; + extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, + integer *); + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int _starpu_dswap_(integer *, doublereal *, integer *, + doublereal *, integer *); + integer kstep; + logical upper; + doublereal absakk; + extern integer _starpu_idamax_(integer *, doublereal *, integer *); + extern logical _starpu_disnan_(doublereal *); + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + doublereal colmax, rowmax; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DSYTF2 computes the factorization of a real symmetric matrix A using */ +/* the Bunch-Kaufman diagonal pivoting method: */ + +/* A = U*D*U' or A = L*D*L' */ + +/* where U (or L) is a product of permutation and unit upper (lower) */ +/* triangular matrices, U' is the transpose of U, and D is symmetric and */ +/* block diagonal with 1-by-1 and 2-by-2 diagonal blocks. */ + +/* This is the unblocked version of the algorithm, calling Level 2 BLAS. */ + +/* Arguments */ +/* ========= */ + +/* UPLO (input) CHARACTER*1 */ +/* Specifies whether the upper or lower triangular part of the */ +/* symmetric matrix A is stored: */ +/* = 'U': Upper triangular */ +/* = 'L': Lower triangular */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the symmetric matrix A. If UPLO = 'U', the leading */ +/* n-by-n upper triangular part of A contains the upper */ +/* triangular part of the matrix A, and the strictly lower */ +/* triangular part of A is not referenced. If UPLO = 'L', the */ +/* leading n-by-n lower triangular part of A contains the lower */ +/* triangular part of the matrix A, and the strictly upper */ +/* triangular part of A is not referenced. */ + +/* On exit, the block diagonal matrix D and the multipliers used */ +/* to obtain the factor U or L (see below for further details). */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,N). */ + +/* IPIV (output) INTEGER array, dimension (N) */ +/* Details of the interchanges and the block structure of D. */ +/* If IPIV(k) > 0, then rows and columns k and IPIV(k) were */ +/* interchanged and D(k,k) is a 1-by-1 diagonal block. */ +/* If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and */ +/* columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) */ +/* is a 2-by-2 diagonal block. If UPLO = 'L' and IPIV(k) = */ +/* IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were */ +/* interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -k, the k-th argument had an illegal value */ +/* > 0: if INFO = k, D(k,k) is exactly zero. The factorization */ +/* has been completed, but the block diagonal matrix D is */ +/* exactly singular, and division by zero will occur if it */ +/* is used to solve a system of equations. */ + +/* Further Details */ +/* =============== */ + +/* 09-29-06 - patch from */ +/* Bobby Cheng, MathWorks */ + +/* Replace l.204 and l.372 */ +/* IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN */ +/* by */ +/* IF( (MAX( ABSAKK, COLMAX ).EQ.ZERO) .OR. DISNAN(ABSAKK) ) THEN */ + +/* 01-01-96 - Based on modifications by */ +/* J. Lewis, Boeing Computer Services Company */ +/* A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA */ +/* 1-96 - Based on modifications by J. Lewis, Boeing Computer Services */ +/* Company */ + +/* If UPLO = 'U', then A = U*D*U', where */ +/* U = P(n)*U(n)* ... *P(k)U(k)* ..., */ +/* i.e., U is a product of terms P(k)*U(k), where k decreases from n to */ +/* 1 in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 */ +/* and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as */ +/* defined by IPIV(k), and U(k) is a unit upper triangular matrix, such */ +/* that if the diagonal block D(k) is of order s (s = 1 or 2), then */ + +/* ( I v 0 ) k-s */ +/* U(k) = ( 0 I 0 ) s */ +/* ( 0 0 I ) n-k */ +/* k-s s n-k */ + +/* If s = 1, D(k) overwrites A(k,k), and v overwrites A(1:k-1,k). */ +/* If s = 2, the upper triangle of D(k) overwrites A(k-1,k-1), A(k-1,k), */ +/* and A(k,k), and v overwrites A(1:k-2,k-1:k). */ + +/* If UPLO = 'L', then A = L*D*L', where */ +/* L = P(1)*L(1)* ... *P(k)*L(k)* ..., */ +/* i.e., L is a product of terms P(k)*L(k), where k increases from 1 to */ +/* n in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 */ +/* and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as */ +/* defined by IPIV(k), and L(k) is a unit lower triangular matrix, such */ +/* that if the diagonal block D(k) is of order s (s = 1 or 2), then */ + +/* ( I 0 0 ) k-1 */ +/* L(k) = ( 0 I 0 ) s */ +/* ( 0 v I ) n-k-s+1 */ +/* k-1 s n-k-s+1 */ + +/* If s = 1, D(k) overwrites A(k,k), and v overwrites A(k+1:n,k). */ +/* If s = 2, the lower triangle of D(k) overwrites A(k,k), A(k+1,k), */ +/* and A(k+1,k+1), and v overwrites A(k+2:n,k:k+1). */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + + /* Function Body */ + *info = 0; + upper = _starpu_lsame_(uplo, "U"); + if (! upper && ! _starpu_lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DSYTF2", &i__1); + return 0; + } + +/* Initialize ALPHA for use in choosing pivot block size. */ + + alpha = (sqrt(17.) + 1.) / 8.; + + if (upper) { + +/* Factorize A as U*D*U' using the upper triangle of A */ + +/* K is the main loop index, decreasing from N to 1 in steps of */ +/* 1 or 2 */ + + k = *n; +L10: + +/* If K < 1, exit from loop */ + + if (k < 1) { + goto L70; + } + kstep = 1; + +/* Determine rows and columns to be interchanged and whether */ +/* a 1-by-1 or 2-by-2 pivot block will be used */ + + absakk = (d__1 = a[k + k * a_dim1], abs(d__1)); + +/* IMAX is the row-index of the largest off-diagonal element in */ +/* column K, and COLMAX is its absolute value */ + + if (k > 1) { + i__1 = k - 1; + imax = _starpu_idamax_(&i__1, &a[k * a_dim1 + 1], &c__1); + colmax = (d__1 = a[imax + k * a_dim1], abs(d__1)); + } else { + colmax = 0.; + } + + if (max(absakk,colmax) == 0. || _starpu_disnan_(&absakk)) { + +/* Column K is zero or contains a NaN: set INFO and continue */ + + if (*info == 0) { + *info = k; + } + kp = k; + } else { + if (absakk >= alpha * colmax) { + +/* no interchange, use 1-by-1 pivot block */ + + kp = k; + } else { + +/* JMAX is the column-index of the largest off-diagonal */ +/* element in row IMAX, and ROWMAX is its absolute value */ + + i__1 = k - imax; + jmax = imax + _starpu_idamax_(&i__1, &a[imax + (imax + 1) * a_dim1], + lda); + rowmax = (d__1 = a[imax + jmax * a_dim1], abs(d__1)); + if (imax > 1) { + i__1 = imax - 1; + jmax = _starpu_idamax_(&i__1, &a[imax * a_dim1 + 1], &c__1); +/* Computing MAX */ + d__2 = rowmax, d__3 = (d__1 = a[jmax + imax * a_dim1], + abs(d__1)); + rowmax = max(d__2,d__3); + } + + if (absakk >= alpha * colmax * (colmax / rowmax)) { + +/* no interchange, use 1-by-1 pivot block */ + + kp = k; + } else if ((d__1 = a[imax + imax * a_dim1], abs(d__1)) >= + alpha * rowmax) { + +/* interchange rows and columns K and IMAX, use 1-by-1 */ +/* pivot block */ + + kp = imax; + } else { + +/* interchange rows and columns K-1 and IMAX, use 2-by-2 */ +/* pivot block */ + + kp = imax; + kstep = 2; + } + } + + kk = k - kstep + 1; + if (kp != kk) { + +/* Interchange rows and columns KK and KP in the leading */ +/* submatrix A(1:k,1:k) */ + + i__1 = kp - 1; + _starpu_dswap_(&i__1, &a[kk * a_dim1 + 1], &c__1, &a[kp * a_dim1 + 1], + &c__1); + i__1 = kk - kp - 1; + _starpu_dswap_(&i__1, &a[kp + 1 + kk * a_dim1], &c__1, &a[kp + (kp + + 1) * a_dim1], lda); + t = a[kk + kk * a_dim1]; + a[kk + kk * a_dim1] = a[kp + kp * a_dim1]; + a[kp + kp * a_dim1] = t; + if (kstep == 2) { + t = a[k - 1 + k * a_dim1]; + a[k - 1 + k * a_dim1] = a[kp + k * a_dim1]; + a[kp + k * a_dim1] = t; + } + } + +/* Update the leading submatrix */ + + if (kstep == 1) { + +/* 1-by-1 pivot block D(k): column k now holds */ + +/* W(k) = U(k)*D(k) */ + +/* where U(k) is the k-th column of U */ + +/* Perform a rank-1 update of A(1:k-1,1:k-1) as */ + +/* A := A - U(k)*D(k)*U(k)' = A - W(k)*1/D(k)*W(k)' */ + + r1 = 1. / a[k + k * a_dim1]; + i__1 = k - 1; + d__1 = -r1; + _starpu_dsyr_(uplo, &i__1, &d__1, &a[k * a_dim1 + 1], &c__1, &a[ + a_offset], lda); + +/* Store U(k) in column k */ + + i__1 = k - 1; + _starpu_dscal_(&i__1, &r1, &a[k * a_dim1 + 1], &c__1); + } else { + +/* 2-by-2 pivot block D(k): columns k and k-1 now hold */ + +/* ( W(k-1) W(k) ) = ( U(k-1) U(k) )*D(k) */ + +/* where U(k) and U(k-1) are the k-th and (k-1)-th columns */ +/* of U */ + +/* Perform a rank-2 update of A(1:k-2,1:k-2) as */ + +/* A := A - ( U(k-1) U(k) )*D(k)*( U(k-1) U(k) )' */ +/* = A - ( W(k-1) W(k) )*inv(D(k))*( W(k-1) W(k) )' */ + + if (k > 2) { + + d12 = a[k - 1 + k * a_dim1]; + d22 = a[k - 1 + (k - 1) * a_dim1] / d12; + d11 = a[k + k * a_dim1] / d12; + t = 1. / (d11 * d22 - 1.); + d12 = t / d12; + + for (j = k - 2; j >= 1; --j) { + wkm1 = d12 * (d11 * a[j + (k - 1) * a_dim1] - a[j + k + * a_dim1]); + wk = d12 * (d22 * a[j + k * a_dim1] - a[j + (k - 1) * + a_dim1]); + for (i__ = j; i__ >= 1; --i__) { + a[i__ + j * a_dim1] = a[i__ + j * a_dim1] - a[i__ + + k * a_dim1] * wk - a[i__ + (k - 1) * + a_dim1] * wkm1; +/* L20: */ + } + a[j + k * a_dim1] = wk; + a[j + (k - 1) * a_dim1] = wkm1; +/* L30: */ + } + + } + + } + } + +/* Store details of the interchanges in IPIV */ + + if (kstep == 1) { + ipiv[k] = kp; + } else { + ipiv[k] = -kp; + ipiv[k - 1] = -kp; + } + +/* Decrease K and return to the start of the main loop */ + + k -= kstep; + goto L10; + + } else { + +/* Factorize A as L*D*L' using the lower triangle of A */ + +/* K is the main loop index, increasing from 1 to N in steps of */ +/* 1 or 2 */ + + k = 1; +L40: + +/* If K > N, exit from loop */ + + if (k > *n) { + goto L70; + } + kstep = 1; + +/* Determine rows and columns to be interchanged and whether */ +/* a 1-by-1 or 2-by-2 pivot block will be used */ + + absakk = (d__1 = a[k + k * a_dim1], abs(d__1)); + +/* IMAX is the row-index of the largest off-diagonal element in */ +/* column K, and COLMAX is its absolute value */ + + if (k < *n) { + i__1 = *n - k; + imax = k + _starpu_idamax_(&i__1, &a[k + 1 + k * a_dim1], &c__1); + colmax = (d__1 = a[imax + k * a_dim1], abs(d__1)); + } else { + colmax = 0.; + } + + if (max(absakk,colmax) == 0. || _starpu_disnan_(&absakk)) { + +/* Column K is zero or contains a NaN: set INFO and continue */ + + if (*info == 0) { + *info = k; + } + kp = k; + } else { + if (absakk >= alpha * colmax) { + +/* no interchange, use 1-by-1 pivot block */ + + kp = k; + } else { + +/* JMAX is the column-index of the largest off-diagonal */ +/* element in row IMAX, and ROWMAX is its absolute value */ + + i__1 = imax - k; + jmax = k - 1 + _starpu_idamax_(&i__1, &a[imax + k * a_dim1], lda); + rowmax = (d__1 = a[imax + jmax * a_dim1], abs(d__1)); + if (imax < *n) { + i__1 = *n - imax; + jmax = imax + _starpu_idamax_(&i__1, &a[imax + 1 + imax * a_dim1], + &c__1); +/* Computing MAX */ + d__2 = rowmax, d__3 = (d__1 = a[jmax + imax * a_dim1], + abs(d__1)); + rowmax = max(d__2,d__3); + } + + if (absakk >= alpha * colmax * (colmax / rowmax)) { + +/* no interchange, use 1-by-1 pivot block */ + + kp = k; + } else if ((d__1 = a[imax + imax * a_dim1], abs(d__1)) >= + alpha * rowmax) { + +/* interchange rows and columns K and IMAX, use 1-by-1 */ +/* pivot block */ + + kp = imax; + } else { + +/* interchange rows and columns K+1 and IMAX, use 2-by-2 */ +/* pivot block */ + + kp = imax; + kstep = 2; + } + } + + kk = k + kstep - 1; + if (kp != kk) { + +/* Interchange rows and columns KK and KP in the trailing */ +/* submatrix A(k:n,k:n) */ + + if (kp < *n) { + i__1 = *n - kp; + _starpu_dswap_(&i__1, &a[kp + 1 + kk * a_dim1], &c__1, &a[kp + 1 + + kp * a_dim1], &c__1); + } + i__1 = kp - kk - 1; + _starpu_dswap_(&i__1, &a[kk + 1 + kk * a_dim1], &c__1, &a[kp + (kk + + 1) * a_dim1], lda); + t = a[kk + kk * a_dim1]; + a[kk + kk * a_dim1] = a[kp + kp * a_dim1]; + a[kp + kp * a_dim1] = t; + if (kstep == 2) { + t = a[k + 1 + k * a_dim1]; + a[k + 1 + k * a_dim1] = a[kp + k * a_dim1]; + a[kp + k * a_dim1] = t; + } + } + +/* Update the trailing submatrix */ + + if (kstep == 1) { + +/* 1-by-1 pivot block D(k): column k now holds */ + +/* W(k) = L(k)*D(k) */ + +/* where L(k) is the k-th column of L */ + + if (k < *n) { + +/* Perform a rank-1 update of A(k+1:n,k+1:n) as */ + +/* A := A - L(k)*D(k)*L(k)' = A - W(k)*(1/D(k))*W(k)' */ + + d11 = 1. / a[k + k * a_dim1]; + i__1 = *n - k; + d__1 = -d11; + _starpu_dsyr_(uplo, &i__1, &d__1, &a[k + 1 + k * a_dim1], &c__1, & + a[k + 1 + (k + 1) * a_dim1], lda); + +/* Store L(k) in column K */ + + i__1 = *n - k; + _starpu_dscal_(&i__1, &d11, &a[k + 1 + k * a_dim1], &c__1); + } + } else { + +/* 2-by-2 pivot block D(k) */ + + if (k < *n - 1) { + +/* Perform a rank-2 update of A(k+2:n,k+2:n) as */ + +/* A := A - ( (A(k) A(k+1))*D(k)**(-1) ) * (A(k) A(k+1))' */ + +/* where L(k) and L(k+1) are the k-th and (k+1)-th */ +/* columns of L */ + + d21 = a[k + 1 + k * a_dim1]; + d11 = a[k + 1 + (k + 1) * a_dim1] / d21; + d22 = a[k + k * a_dim1] / d21; + t = 1. / (d11 * d22 - 1.); + d21 = t / d21; + + i__1 = *n; + for (j = k + 2; j <= i__1; ++j) { + + wk = d21 * (d11 * a[j + k * a_dim1] - a[j + (k + 1) * + a_dim1]); + wkp1 = d21 * (d22 * a[j + (k + 1) * a_dim1] - a[j + k + * a_dim1]); + + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = a[i__ + j * a_dim1] - a[i__ + + k * a_dim1] * wk - a[i__ + (k + 1) * + a_dim1] * wkp1; +/* L50: */ + } + + a[j + k * a_dim1] = wk; + a[j + (k + 1) * a_dim1] = wkp1; + +/* L60: */ + } + } + } + } + +/* Store details of the interchanges in IPIV */ + + if (kstep == 1) { + ipiv[k] = kp; + } else { + ipiv[k] = -kp; + ipiv[k + 1] = -kp; + } + +/* Increase K and return to the start of the main loop */ + + k += kstep; + goto L40; + + } + +L70: + + return 0; + +/* End of DSYTF2 */ + +} /* _starpu_dsytf2_ */ diff --git a/min-dgels/base/SRC/dsytrd.c b/min-dgels/base/SRC/dsytrd.c new file mode 100644 index 0000000..e71b6a4 --- /dev/null +++ b/min-dgels/base/SRC/dsytrd.c @@ -0,0 +1,360 @@ +/* dsytrd.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; +static integer c__3 = 3; +static integer c__2 = 2; +static doublereal c_b22 = -1.; +static doublereal c_b23 = 1.; + +/* Subroutine */ int _starpu_dsytrd_(char *uplo, integer *n, doublereal *a, integer * + lda, doublereal *d__, doublereal *e, doublereal *tau, doublereal * + work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + integer i__, j, nb, kk, nx, iws; + extern logical _starpu_lsame_(char *, char *); + integer nbmin, iinfo; + logical upper; + extern /* Subroutine */ int _starpu_dsytd2_(char *, integer *, doublereal *, + integer *, doublereal *, doublereal *, doublereal *, integer *), _starpu_dsyr2k_(char *, char *, integer *, integer *, doublereal + *, doublereal *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *), _starpu_dlatrd_(char *, + integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, doublereal *, integer *), _starpu_xerbla_(char *, + integer *); + extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *); + integer ldwork, lwkopt; + logical lquery; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DSYTRD reduces a real symmetric matrix A to real symmetric */ +/* tridiagonal form T by an orthogonal similarity transformation: */ +/* Q**T * A * Q = T. */ + +/* Arguments */ +/* ========= */ + +/* UPLO (input) CHARACTER*1 */ +/* = 'U': Upper triangle of A is stored; */ +/* = 'L': Lower triangle of A is stored. */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the symmetric matrix A. If UPLO = 'U', the leading */ +/* N-by-N upper triangular part of A contains the upper */ +/* triangular part of the matrix A, and the strictly lower */ +/* triangular part of A is not referenced. If UPLO = 'L', the */ +/* leading N-by-N lower triangular part of A contains the lower */ +/* triangular part of the matrix A, and the strictly upper */ +/* triangular part of A is not referenced. */ +/* On exit, if UPLO = 'U', the diagonal and first superdiagonal */ +/* of A are overwritten by the corresponding elements of the */ +/* tridiagonal matrix T, and the elements above the first */ +/* superdiagonal, with the array TAU, represent the orthogonal */ +/* matrix Q as a product of elementary reflectors; if UPLO */ +/* = 'L', the diagonal and first subdiagonal of A are over- */ +/* written by the corresponding elements of the tridiagonal */ +/* matrix T, and the elements below the first subdiagonal, with */ +/* the array TAU, represent the orthogonal matrix Q as a product */ +/* of elementary reflectors. See Further Details. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,N). */ + +/* D (output) DOUBLE PRECISION array, dimension (N) */ +/* The diagonal elements of the tridiagonal matrix T: */ +/* D(i) = A(i,i). */ + +/* E (output) DOUBLE PRECISION array, dimension (N-1) */ +/* The off-diagonal elements of the tridiagonal matrix T: */ +/* E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. */ + +/* TAU (output) DOUBLE PRECISION array, dimension (N-1) */ +/* The scalar factors of the elementary reflectors (see Further */ +/* Details). */ + +/* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ + +/* LWORK (input) INTEGER */ +/* The dimension of the array WORK. LWORK >= 1. */ +/* For optimum performance LWORK >= N*NB, where NB is the */ +/* optimal blocksize. */ + +/* If LWORK = -1, then a workspace query is assumed; the routine */ +/* only calculates the optimal size of the WORK array, returns */ +/* this value as the first entry of the WORK array, and no error */ +/* message related to LWORK is issued by XERBLA. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ + +/* Further Details */ +/* =============== */ + +/* If UPLO = 'U', the matrix Q is represented as a product of elementary */ +/* reflectors */ + +/* Q = H(n-1) . . . H(2) H(1). */ + +/* Each H(i) has the form */ + +/* H(i) = I - tau * v * v' */ + +/* where tau is a real scalar, and v is a real vector with */ +/* v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in */ +/* A(1:i-1,i+1), and tau in TAU(i). */ + +/* If UPLO = 'L', the matrix Q is represented as a product of elementary */ +/* reflectors */ + +/* Q = H(1) H(2) . . . H(n-1). */ + +/* Each H(i) has the form */ + +/* H(i) = I - tau * v * v' */ + +/* where tau is a real scalar, and v is a real vector with */ +/* v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i), */ +/* and tau in TAU(i). */ + +/* The contents of A on exit are illustrated by the following examples */ +/* with n = 5: */ + +/* if UPLO = 'U': if UPLO = 'L': */ + +/* ( d e v2 v3 v4 ) ( d ) */ +/* ( d e v3 v4 ) ( e d ) */ +/* ( d e v4 ) ( v1 e d ) */ +/* ( d e ) ( v1 v2 e d ) */ +/* ( d ) ( v1 v2 v3 e d ) */ + +/* where d and e denote diagonal and off-diagonal elements of T, and vi */ +/* denotes an element of the vector defining H(i). */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --d__; + --e; + --tau; + --work; + + /* Function Body */ + *info = 0; + upper = _starpu_lsame_(uplo, "U"); + lquery = *lwork == -1; + if (! upper && ! _starpu_lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } else if (*lwork < 1 && ! lquery) { + *info = -9; + } + + if (*info == 0) { + +/* Determine the block size. */ + + nb = _starpu_ilaenv_(&c__1, "DSYTRD", uplo, n, &c_n1, &c_n1, &c_n1); + lwkopt = *n * nb; + work[1] = (doublereal) lwkopt; + } + + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DSYTRD", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + work[1] = 1.; + return 0; + } + + nx = *n; + iws = 1; + if (nb > 1 && nb < *n) { + +/* Determine when to cross over from blocked to unblocked code */ +/* (last block is always handled by unblocked code). */ + +/* Computing MAX */ + i__1 = nb, i__2 = _starpu_ilaenv_(&c__3, "DSYTRD", uplo, n, &c_n1, &c_n1, & + c_n1); + nx = max(i__1,i__2); + if (nx < *n) { + +/* Determine if workspace is large enough for blocked code. */ + + ldwork = *n; + iws = ldwork * nb; + if (*lwork < iws) { + +/* Not enough workspace to use optimal NB: determine the */ +/* minimum value of NB, and reduce NB or force use of */ +/* unblocked code by setting NX = N. */ + +/* Computing MAX */ + i__1 = *lwork / ldwork; + nb = max(i__1,1); + nbmin = _starpu_ilaenv_(&c__2, "DSYTRD", uplo, n, &c_n1, &c_n1, &c_n1); + if (nb < nbmin) { + nx = *n; + } + } + } else { + nx = *n; + } + } else { + nb = 1; + } + + if (upper) { + +/* Reduce the upper triangle of A. */ +/* Columns 1:kk are handled by the unblocked method. */ + + kk = *n - (*n - nx + nb - 1) / nb * nb; + i__1 = kk + 1; + i__2 = -nb; + for (i__ = *n - nb + 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += + i__2) { + +/* Reduce columns i:i+nb-1 to tridiagonal form and form the */ +/* matrix W which is needed to update the unreduced part of */ +/* the matrix */ + + i__3 = i__ + nb - 1; + _starpu_dlatrd_(uplo, &i__3, &nb, &a[a_offset], lda, &e[1], &tau[1], & + work[1], &ldwork); + +/* Update the unreduced submatrix A(1:i-1,1:i-1), using an */ +/* update of the form: A := A - V*W' - W*V' */ + + i__3 = i__ - 1; + _starpu_dsyr2k_(uplo, "No transpose", &i__3, &nb, &c_b22, &a[i__ * a_dim1 + + 1], lda, &work[1], &ldwork, &c_b23, &a[a_offset], lda); + +/* Copy superdiagonal elements back into A, and diagonal */ +/* elements into D */ + + i__3 = i__ + nb - 1; + for (j = i__; j <= i__3; ++j) { + a[j - 1 + j * a_dim1] = e[j - 1]; + d__[j] = a[j + j * a_dim1]; +/* L10: */ + } +/* L20: */ + } + +/* Use unblocked code to reduce the last or only block */ + + _starpu_dsytd2_(uplo, &kk, &a[a_offset], lda, &d__[1], &e[1], &tau[1], &iinfo); + } else { + +/* Reduce the lower triangle of A */ + + i__2 = *n - nx; + i__1 = nb; + for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) { + +/* Reduce columns i:i+nb-1 to tridiagonal form and form the */ +/* matrix W which is needed to update the unreduced part of */ +/* the matrix */ + + i__3 = *n - i__ + 1; + _starpu_dlatrd_(uplo, &i__3, &nb, &a[i__ + i__ * a_dim1], lda, &e[i__], & + tau[i__], &work[1], &ldwork); + +/* Update the unreduced submatrix A(i+ib:n,i+ib:n), using */ +/* an update of the form: A := A - V*W' - W*V' */ + + i__3 = *n - i__ - nb + 1; + _starpu_dsyr2k_(uplo, "No transpose", &i__3, &nb, &c_b22, &a[i__ + nb + + i__ * a_dim1], lda, &work[nb + 1], &ldwork, &c_b23, &a[ + i__ + nb + (i__ + nb) * a_dim1], lda); + +/* Copy subdiagonal elements back into A, and diagonal */ +/* elements into D */ + + i__3 = i__ + nb - 1; + for (j = i__; j <= i__3; ++j) { + a[j + 1 + j * a_dim1] = e[j]; + d__[j] = a[j + j * a_dim1]; +/* L30: */ + } +/* L40: */ + } + +/* Use unblocked code to reduce the last or only block */ + + i__1 = *n - i__ + 1; + _starpu_dsytd2_(uplo, &i__1, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[i__], + &tau[i__], &iinfo); + } + + work[1] = (doublereal) lwkopt; + return 0; + +/* End of DSYTRD */ + +} /* _starpu_dsytrd_ */ diff --git a/min-dgels/base/SRC/dsytrf.c b/min-dgels/base/SRC/dsytrf.c new file mode 100644 index 0000000..8fe4182 --- /dev/null +++ b/min-dgels/base/SRC/dsytrf.c @@ -0,0 +1,341 @@ +/* dsytrf.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; +static integer c__2 = 2; + +/* Subroutine */ int _starpu_dsytrf_(char *uplo, integer *n, doublereal *a, integer * + lda, integer *ipiv, doublereal *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + + /* Local variables */ + integer j, k, kb, nb, iws; + extern logical _starpu_lsame_(char *, char *); + integer nbmin, iinfo; + logical upper; + extern /* Subroutine */ int _starpu_dsytf2_(char *, integer *, doublereal *, + integer *, integer *, integer *), _starpu_xerbla_(char *, integer + *); + extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *); + extern /* Subroutine */ int _starpu_dlasyf_(char *, integer *, integer *, integer + *, doublereal *, integer *, integer *, doublereal *, integer *, + integer *); + integer ldwork, lwkopt; + logical lquery; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DSYTRF computes the factorization of a real symmetric matrix A using */ +/* the Bunch-Kaufman diagonal pivoting method. The form of the */ +/* factorization is */ + +/* A = U*D*U**T or A = L*D*L**T */ + +/* where U (or L) is a product of permutation and unit upper (lower) */ +/* triangular matrices, and D is symmetric and block diagonal with */ +/* 1-by-1 and 2-by-2 diagonal blocks. */ + +/* This is the blocked version of the algorithm, calling Level 3 BLAS. */ + +/* Arguments */ +/* ========= */ + +/* UPLO (input) CHARACTER*1 */ +/* = 'U': Upper triangle of A is stored; */ +/* = 'L': Lower triangle of A is stored. */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the symmetric matrix A. If UPLO = 'U', the leading */ +/* N-by-N upper triangular part of A contains the upper */ +/* triangular part of the matrix A, and the strictly lower */ +/* triangular part of A is not referenced. If UPLO = 'L', the */ +/* leading N-by-N lower triangular part of A contains the lower */ +/* triangular part of the matrix A, and the strictly upper */ +/* triangular part of A is not referenced. */ + +/* On exit, the block diagonal matrix D and the multipliers used */ +/* to obtain the factor U or L (see below for further details). */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,N). */ + +/* IPIV (output) INTEGER array, dimension (N) */ +/* Details of the interchanges and the block structure of D. */ +/* If IPIV(k) > 0, then rows and columns k and IPIV(k) were */ +/* interchanged and D(k,k) is a 1-by-1 diagonal block. */ +/* If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and */ +/* columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) */ +/* is a 2-by-2 diagonal block. If UPLO = 'L' and IPIV(k) = */ +/* IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were */ +/* interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. */ + +/* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ + +/* LWORK (input) INTEGER */ +/* The length of WORK. LWORK >=1. For best performance */ +/* LWORK >= N*NB, where NB is the block size returned by ILAENV. */ + +/* If LWORK = -1, then a workspace query is assumed; the routine */ +/* only calculates the optimal size of the WORK array, returns */ +/* this value as the first entry of the WORK array, and no error */ +/* message related to LWORK is issued by XERBLA. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > 0: if INFO = i, D(i,i) is exactly zero. The factorization */ +/* has been completed, but the block diagonal matrix D is */ +/* exactly singular, and division by zero will occur if it */ +/* is used to solve a system of equations. */ + +/* Further Details */ +/* =============== */ + +/* If UPLO = 'U', then A = U*D*U', where */ +/* U = P(n)*U(n)* ... *P(k)U(k)* ..., */ +/* i.e., U is a product of terms P(k)*U(k), where k decreases from n to */ +/* 1 in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 */ +/* and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as */ +/* defined by IPIV(k), and U(k) is a unit upper triangular matrix, such */ +/* that if the diagonal block D(k) is of order s (s = 1 or 2), then */ + +/* ( I v 0 ) k-s */ +/* U(k) = ( 0 I 0 ) s */ +/* ( 0 0 I ) n-k */ +/* k-s s n-k */ + +/* If s = 1, D(k) overwrites A(k,k), and v overwrites A(1:k-1,k). */ +/* If s = 2, the upper triangle of D(k) overwrites A(k-1,k-1), A(k-1,k), */ +/* and A(k,k), and v overwrites A(1:k-2,k-1:k). */ + +/* If UPLO = 'L', then A = L*D*L', where */ +/* L = P(1)*L(1)* ... *P(k)*L(k)* ..., */ +/* i.e., L is a product of terms P(k)*L(k), where k increases from 1 to */ +/* n in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 */ +/* and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as */ +/* defined by IPIV(k), and L(k) is a unit lower triangular matrix, such */ +/* that if the diagonal block D(k) is of order s (s = 1 or 2), then */ + +/* ( I 0 0 ) k-1 */ +/* L(k) = ( 0 I 0 ) s */ +/* ( 0 v I ) n-k-s+1 */ +/* k-1 s n-k-s+1 */ + +/* If s = 1, D(k) overwrites A(k,k), and v overwrites A(k+1:n,k). */ +/* If s = 2, the lower triangle of D(k) overwrites A(k,k), A(k+1,k), */ +/* and A(k+1,k+1), and v overwrites A(k+2:n,k:k+1). */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + --work; + + /* Function Body */ + *info = 0; + upper = _starpu_lsame_(uplo, "U"); + lquery = *lwork == -1; + if (! upper && ! _starpu_lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } else if (*lwork < 1 && ! lquery) { + *info = -7; + } + + if (*info == 0) { + +/* Determine the block size */ + + nb = _starpu_ilaenv_(&c__1, "DSYTRF", uplo, n, &c_n1, &c_n1, &c_n1); + lwkopt = *n * nb; + work[1] = (doublereal) lwkopt; + } + + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DSYTRF", &i__1); + return 0; + } else if (lquery) { + return 0; + } + + nbmin = 2; + ldwork = *n; + if (nb > 1 && nb < *n) { + iws = ldwork * nb; + if (*lwork < iws) { +/* Computing MAX */ + i__1 = *lwork / ldwork; + nb = max(i__1,1); +/* Computing MAX */ + i__1 = 2, i__2 = _starpu_ilaenv_(&c__2, "DSYTRF", uplo, n, &c_n1, &c_n1, & + c_n1); + nbmin = max(i__1,i__2); + } + } else { + iws = 1; + } + if (nb < nbmin) { + nb = *n; + } + + if (upper) { + +/* Factorize A as U*D*U' using the upper triangle of A */ + +/* K is the main loop index, decreasing from N to 1 in steps of */ +/* KB, where KB is the number of columns factorized by DLASYF; */ +/* KB is either NB or NB-1, or K for the last block */ + + k = *n; +L10: + +/* If K < 1, exit from loop */ + + if (k < 1) { + goto L40; + } + + if (k > nb) { + +/* Factorize columns k-kb+1:k of A and use blocked code to */ +/* update columns 1:k-kb */ + + _starpu_dlasyf_(uplo, &k, &nb, &kb, &a[a_offset], lda, &ipiv[1], &work[1], + &ldwork, &iinfo); + } else { + +/* Use unblocked code to factorize columns 1:k of A */ + + _starpu_dsytf2_(uplo, &k, &a[a_offset], lda, &ipiv[1], &iinfo); + kb = k; + } + +/* Set INFO on the first occurrence of a zero pivot */ + + if (*info == 0 && iinfo > 0) { + *info = iinfo; + } + +/* Decrease K and return to the start of the main loop */ + + k -= kb; + goto L10; + + } else { + +/* Factorize A as L*D*L' using the lower triangle of A */ + +/* K is the main loop index, increasing from 1 to N in steps of */ +/* KB, where KB is the number of columns factorized by DLASYF; */ +/* KB is either NB or NB-1, or N-K+1 for the last block */ + + k = 1; +L20: + +/* If K > N, exit from loop */ + + if (k > *n) { + goto L40; + } + + if (k <= *n - nb) { + +/* Factorize columns k:k+kb-1 of A and use blocked code to */ +/* update columns k+kb:n */ + + i__1 = *n - k + 1; + _starpu_dlasyf_(uplo, &i__1, &nb, &kb, &a[k + k * a_dim1], lda, &ipiv[k], + &work[1], &ldwork, &iinfo); + } else { + +/* Use unblocked code to factorize columns k:n of A */ + + i__1 = *n - k + 1; + _starpu_dsytf2_(uplo, &i__1, &a[k + k * a_dim1], lda, &ipiv[k], &iinfo); + kb = *n - k + 1; + } + +/* Set INFO on the first occurrence of a zero pivot */ + + if (*info == 0 && iinfo > 0) { + *info = iinfo + k - 1; + } + +/* Adjust IPIV */ + + i__1 = k + kb - 1; + for (j = k; j <= i__1; ++j) { + if (ipiv[j] > 0) { + ipiv[j] = ipiv[j] + k - 1; + } else { + ipiv[j] = ipiv[j] - k + 1; + } +/* L30: */ + } + +/* Increase K and return to the start of the main loop */ + + k += kb; + goto L20; + + } + +L40: + work[1] = (doublereal) lwkopt; + return 0; + +/* End of DSYTRF */ + +} /* _starpu_dsytrf_ */ diff --git a/min-dgels/base/SRC/dsytri.c b/min-dgels/base/SRC/dsytri.c new file mode 100644 index 0000000..4b4c76d --- /dev/null +++ b/min-dgels/base/SRC/dsytri.c @@ -0,0 +1,396 @@ +/* dsytri.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static doublereal c_b11 = -1.; +static doublereal c_b13 = 0.; + +/* Subroutine */ int _starpu_dsytri_(char *uplo, integer *n, doublereal *a, integer * + lda, integer *ipiv, doublereal *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1; + doublereal d__1; + + /* Local variables */ + doublereal d__; + integer k; + doublereal t, ak; + integer kp; + doublereal akp1; + extern doublereal _starpu_ddot_(integer *, doublereal *, integer *, doublereal *, + integer *); + doublereal temp, akkp1; + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *), _starpu_dswap_(integer *, doublereal *, integer + *, doublereal *, integer *); + integer kstep; + logical upper; + extern /* Subroutine */ int _starpu_dsymv_(char *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *), _starpu_xerbla_(char *, integer *); + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DSYTRI computes the inverse of a real symmetric indefinite matrix */ +/* A using the factorization A = U*D*U**T or A = L*D*L**T computed by */ +/* DSYTRF. */ + +/* Arguments */ +/* ========= */ + +/* UPLO (input) CHARACTER*1 */ +/* Specifies whether the details of the factorization are stored */ +/* as an upper or lower triangular matrix. */ +/* = 'U': Upper triangular, form is A = U*D*U**T; */ +/* = 'L': Lower triangular, form is A = L*D*L**T. */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the block diagonal matrix D and the multipliers */ +/* used to obtain the factor U or L as computed by DSYTRF. */ + +/* On exit, if INFO = 0, the (symmetric) inverse of the original */ +/* matrix. If UPLO = 'U', the upper triangular part of the */ +/* inverse is formed and the part of A below the diagonal is not */ +/* referenced; if UPLO = 'L' the lower triangular part of the */ +/* inverse is formed and the part of A above the diagonal is */ +/* not referenced. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,N). */ + +/* IPIV (input) INTEGER array, dimension (N) */ +/* Details of the interchanges and the block structure of D */ +/* as determined by DSYTRF. */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (N) */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its */ +/* inverse could not be computed. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + --work; + + /* Function Body */ + *info = 0; + upper = _starpu_lsame_(uplo, "U"); + if (! upper && ! _starpu_lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DSYTRI", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Check that the diagonal matrix D is nonsingular. */ + + if (upper) { + +/* Upper triangular storage: examine D from bottom to top */ + + for (*info = *n; *info >= 1; --(*info)) { + if (ipiv[*info] > 0 && a[*info + *info * a_dim1] == 0.) { + return 0; + } +/* L10: */ + } + } else { + +/* Lower triangular storage: examine D from top to bottom. */ + + i__1 = *n; + for (*info = 1; *info <= i__1; ++(*info)) { + if (ipiv[*info] > 0 && a[*info + *info * a_dim1] == 0.) { + return 0; + } +/* L20: */ + } + } + *info = 0; + + if (upper) { + +/* Compute inv(A) from the factorization A = U*D*U'. */ + +/* K is the main loop index, increasing from 1 to N in steps of */ +/* 1 or 2, depending on the size of the diagonal blocks. */ + + k = 1; +L30: + +/* If K > N, exit from loop. */ + + if (k > *n) { + goto L40; + } + + if (ipiv[k] > 0) { + +/* 1 x 1 diagonal block */ + +/* Invert the diagonal block. */ + + a[k + k * a_dim1] = 1. / a[k + k * a_dim1]; + +/* Compute column K of the inverse. */ + + if (k > 1) { + i__1 = k - 1; + _starpu_dcopy_(&i__1, &a[k * a_dim1 + 1], &c__1, &work[1], &c__1); + i__1 = k - 1; + _starpu_dsymv_(uplo, &i__1, &c_b11, &a[a_offset], lda, &work[1], & + c__1, &c_b13, &a[k * a_dim1 + 1], &c__1); + i__1 = k - 1; + a[k + k * a_dim1] -= _starpu_ddot_(&i__1, &work[1], &c__1, &a[k * + a_dim1 + 1], &c__1); + } + kstep = 1; + } else { + +/* 2 x 2 diagonal block */ + +/* Invert the diagonal block. */ + + t = (d__1 = a[k + (k + 1) * a_dim1], abs(d__1)); + ak = a[k + k * a_dim1] / t; + akp1 = a[k + 1 + (k + 1) * a_dim1] / t; + akkp1 = a[k + (k + 1) * a_dim1] / t; + d__ = t * (ak * akp1 - 1.); + a[k + k * a_dim1] = akp1 / d__; + a[k + 1 + (k + 1) * a_dim1] = ak / d__; + a[k + (k + 1) * a_dim1] = -akkp1 / d__; + +/* Compute columns K and K+1 of the inverse. */ + + if (k > 1) { + i__1 = k - 1; + _starpu_dcopy_(&i__1, &a[k * a_dim1 + 1], &c__1, &work[1], &c__1); + i__1 = k - 1; + _starpu_dsymv_(uplo, &i__1, &c_b11, &a[a_offset], lda, &work[1], & + c__1, &c_b13, &a[k * a_dim1 + 1], &c__1); + i__1 = k - 1; + a[k + k * a_dim1] -= _starpu_ddot_(&i__1, &work[1], &c__1, &a[k * + a_dim1 + 1], &c__1); + i__1 = k - 1; + a[k + (k + 1) * a_dim1] -= _starpu_ddot_(&i__1, &a[k * a_dim1 + 1], & + c__1, &a[(k + 1) * a_dim1 + 1], &c__1); + i__1 = k - 1; + _starpu_dcopy_(&i__1, &a[(k + 1) * a_dim1 + 1], &c__1, &work[1], & + c__1); + i__1 = k - 1; + _starpu_dsymv_(uplo, &i__1, &c_b11, &a[a_offset], lda, &work[1], & + c__1, &c_b13, &a[(k + 1) * a_dim1 + 1], &c__1); + i__1 = k - 1; + a[k + 1 + (k + 1) * a_dim1] -= _starpu_ddot_(&i__1, &work[1], &c__1, & + a[(k + 1) * a_dim1 + 1], &c__1); + } + kstep = 2; + } + + kp = (i__1 = ipiv[k], abs(i__1)); + if (kp != k) { + +/* Interchange rows and columns K and KP in the leading */ +/* submatrix A(1:k+1,1:k+1) */ + + i__1 = kp - 1; + _starpu_dswap_(&i__1, &a[k * a_dim1 + 1], &c__1, &a[kp * a_dim1 + 1], & + c__1); + i__1 = k - kp - 1; + _starpu_dswap_(&i__1, &a[kp + 1 + k * a_dim1], &c__1, &a[kp + (kp + 1) * + a_dim1], lda); + temp = a[k + k * a_dim1]; + a[k + k * a_dim1] = a[kp + kp * a_dim1]; + a[kp + kp * a_dim1] = temp; + if (kstep == 2) { + temp = a[k + (k + 1) * a_dim1]; + a[k + (k + 1) * a_dim1] = a[kp + (k + 1) * a_dim1]; + a[kp + (k + 1) * a_dim1] = temp; + } + } + + k += kstep; + goto L30; +L40: + + ; + } else { + +/* Compute inv(A) from the factorization A = L*D*L'. */ + +/* K is the main loop index, increasing from 1 to N in steps of */ +/* 1 or 2, depending on the size of the diagonal blocks. */ + + k = *n; +L50: + +/* If K < 1, exit from loop. */ + + if (k < 1) { + goto L60; + } + + if (ipiv[k] > 0) { + +/* 1 x 1 diagonal block */ + +/* Invert the diagonal block. */ + + a[k + k * a_dim1] = 1. / a[k + k * a_dim1]; + +/* Compute column K of the inverse. */ + + if (k < *n) { + i__1 = *n - k; + _starpu_dcopy_(&i__1, &a[k + 1 + k * a_dim1], &c__1, &work[1], &c__1); + i__1 = *n - k; + _starpu_dsymv_(uplo, &i__1, &c_b11, &a[k + 1 + (k + 1) * a_dim1], lda, + &work[1], &c__1, &c_b13, &a[k + 1 + k * a_dim1], & + c__1); + i__1 = *n - k; + a[k + k * a_dim1] -= _starpu_ddot_(&i__1, &work[1], &c__1, &a[k + 1 + + k * a_dim1], &c__1); + } + kstep = 1; + } else { + +/* 2 x 2 diagonal block */ + +/* Invert the diagonal block. */ + + t = (d__1 = a[k + (k - 1) * a_dim1], abs(d__1)); + ak = a[k - 1 + (k - 1) * a_dim1] / t; + akp1 = a[k + k * a_dim1] / t; + akkp1 = a[k + (k - 1) * a_dim1] / t; + d__ = t * (ak * akp1 - 1.); + a[k - 1 + (k - 1) * a_dim1] = akp1 / d__; + a[k + k * a_dim1] = ak / d__; + a[k + (k - 1) * a_dim1] = -akkp1 / d__; + +/* Compute columns K-1 and K of the inverse. */ + + if (k < *n) { + i__1 = *n - k; + _starpu_dcopy_(&i__1, &a[k + 1 + k * a_dim1], &c__1, &work[1], &c__1); + i__1 = *n - k; + _starpu_dsymv_(uplo, &i__1, &c_b11, &a[k + 1 + (k + 1) * a_dim1], lda, + &work[1], &c__1, &c_b13, &a[k + 1 + k * a_dim1], & + c__1); + i__1 = *n - k; + a[k + k * a_dim1] -= _starpu_ddot_(&i__1, &work[1], &c__1, &a[k + 1 + + k * a_dim1], &c__1); + i__1 = *n - k; + a[k + (k - 1) * a_dim1] -= _starpu_ddot_(&i__1, &a[k + 1 + k * a_dim1] +, &c__1, &a[k + 1 + (k - 1) * a_dim1], &c__1); + i__1 = *n - k; + _starpu_dcopy_(&i__1, &a[k + 1 + (k - 1) * a_dim1], &c__1, &work[1], & + c__1); + i__1 = *n - k; + _starpu_dsymv_(uplo, &i__1, &c_b11, &a[k + 1 + (k + 1) * a_dim1], lda, + &work[1], &c__1, &c_b13, &a[k + 1 + (k - 1) * a_dim1] +, &c__1); + i__1 = *n - k; + a[k - 1 + (k - 1) * a_dim1] -= _starpu_ddot_(&i__1, &work[1], &c__1, & + a[k + 1 + (k - 1) * a_dim1], &c__1); + } + kstep = 2; + } + + kp = (i__1 = ipiv[k], abs(i__1)); + if (kp != k) { + +/* Interchange rows and columns K and KP in the trailing */ +/* submatrix A(k-1:n,k-1:n) */ + + if (kp < *n) { + i__1 = *n - kp; + _starpu_dswap_(&i__1, &a[kp + 1 + k * a_dim1], &c__1, &a[kp + 1 + kp * + a_dim1], &c__1); + } + i__1 = kp - k - 1; + _starpu_dswap_(&i__1, &a[k + 1 + k * a_dim1], &c__1, &a[kp + (k + 1) * + a_dim1], lda); + temp = a[k + k * a_dim1]; + a[k + k * a_dim1] = a[kp + kp * a_dim1]; + a[kp + kp * a_dim1] = temp; + if (kstep == 2) { + temp = a[k + (k - 1) * a_dim1]; + a[k + (k - 1) * a_dim1] = a[kp + (k - 1) * a_dim1]; + a[kp + (k - 1) * a_dim1] = temp; + } + } + + k -= kstep; + goto L50; +L60: + ; + } + + return 0; + +/* End of DSYTRI */ + +} /* _starpu_dsytri_ */ diff --git a/min-dgels/base/SRC/dsytrs.c b/min-dgels/base/SRC/dsytrs.c new file mode 100644 index 0000000..21fdfc6 --- /dev/null +++ b/min-dgels/base/SRC/dsytrs.c @@ -0,0 +1,453 @@ +/* dsytrs.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static doublereal c_b7 = -1.; +static integer c__1 = 1; +static doublereal c_b19 = 1.; + +/* Subroutine */ int _starpu_dsytrs_(char *uplo, integer *n, integer *nrhs, + doublereal *a, integer *lda, integer *ipiv, doublereal *b, integer * + ldb, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1; + doublereal d__1; + + /* Local variables */ + integer j, k; + doublereal ak, bk; + integer kp; + doublereal akm1, bkm1; + extern /* Subroutine */ int _starpu_dger_(integer *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *); + doublereal akm1k; + extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, + integer *); + extern logical _starpu_lsame_(char *, char *); + doublereal denom; + extern /* Subroutine */ int _starpu_dgemv_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *), _starpu_dswap_(integer *, + doublereal *, integer *, doublereal *, integer *); + logical upper; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DSYTRS solves a system of linear equations A*X = B with a real */ +/* symmetric matrix A using the factorization A = U*D*U**T or */ +/* A = L*D*L**T computed by DSYTRF. */ + +/* Arguments */ +/* ========= */ + +/* UPLO (input) CHARACTER*1 */ +/* Specifies whether the details of the factorization are stored */ +/* as an upper or lower triangular matrix. */ +/* = 'U': Upper triangular, form is A = U*D*U**T; */ +/* = 'L': Lower triangular, form is A = L*D*L**T. */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* NRHS (input) INTEGER */ +/* The number of right hand sides, i.e., the number of columns */ +/* of the matrix B. NRHS >= 0. */ + +/* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ +/* The block diagonal matrix D and the multipliers used to */ +/* obtain the factor U or L as computed by DSYTRF. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,N). */ + +/* IPIV (input) INTEGER array, dimension (N) */ +/* Details of the interchanges and the block structure of D */ +/* as determined by DSYTRF. */ + +/* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* On entry, the right hand side matrix B. */ +/* On exit, the solution matrix X. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the array B. LDB >= max(1,N). */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + *info = 0; + upper = _starpu_lsame_(uplo, "U"); + if (! upper && ! _starpu_lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } else if (*ldb < max(1,*n)) { + *info = -8; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DSYTRS", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0 || *nrhs == 0) { + return 0; + } + + if (upper) { + +/* Solve A*X = B, where A = U*D*U'. */ + +/* First solve U*D*X = B, overwriting B with X. */ + +/* K is the main loop index, decreasing from N to 1 in steps of */ +/* 1 or 2, depending on the size of the diagonal blocks. */ + + k = *n; +L10: + +/* If K < 1, exit from loop. */ + + if (k < 1) { + goto L30; + } + + if (ipiv[k] > 0) { + +/* 1 x 1 diagonal block */ + +/* Interchange rows K and IPIV(K). */ + + kp = ipiv[k]; + if (kp != k) { + _starpu_dswap_(nrhs, &b[k + b_dim1], ldb, &b[kp + b_dim1], ldb); + } + +/* Multiply by inv(U(K)), where U(K) is the transformation */ +/* stored in column K of A. */ + + i__1 = k - 1; + _starpu_dger_(&i__1, nrhs, &c_b7, &a[k * a_dim1 + 1], &c__1, &b[k + + b_dim1], ldb, &b[b_dim1 + 1], ldb); + +/* Multiply by the inverse of the diagonal block. */ + + d__1 = 1. / a[k + k * a_dim1]; + _starpu_dscal_(nrhs, &d__1, &b[k + b_dim1], ldb); + --k; + } else { + +/* 2 x 2 diagonal block */ + +/* Interchange rows K-1 and -IPIV(K). */ + + kp = -ipiv[k]; + if (kp != k - 1) { + _starpu_dswap_(nrhs, &b[k - 1 + b_dim1], ldb, &b[kp + b_dim1], ldb); + } + +/* Multiply by inv(U(K)), where U(K) is the transformation */ +/* stored in columns K-1 and K of A. */ + + i__1 = k - 2; + _starpu_dger_(&i__1, nrhs, &c_b7, &a[k * a_dim1 + 1], &c__1, &b[k + + b_dim1], ldb, &b[b_dim1 + 1], ldb); + i__1 = k - 2; + _starpu_dger_(&i__1, nrhs, &c_b7, &a[(k - 1) * a_dim1 + 1], &c__1, &b[k - + 1 + b_dim1], ldb, &b[b_dim1 + 1], ldb); + +/* Multiply by the inverse of the diagonal block. */ + + akm1k = a[k - 1 + k * a_dim1]; + akm1 = a[k - 1 + (k - 1) * a_dim1] / akm1k; + ak = a[k + k * a_dim1] / akm1k; + denom = akm1 * ak - 1.; + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + bkm1 = b[k - 1 + j * b_dim1] / akm1k; + bk = b[k + j * b_dim1] / akm1k; + b[k - 1 + j * b_dim1] = (ak * bkm1 - bk) / denom; + b[k + j * b_dim1] = (akm1 * bk - bkm1) / denom; +/* L20: */ + } + k += -2; + } + + goto L10; +L30: + +/* Next solve U'*X = B, overwriting B with X. */ + +/* K is the main loop index, increasing from 1 to N in steps of */ +/* 1 or 2, depending on the size of the diagonal blocks. */ + + k = 1; +L40: + +/* If K > N, exit from loop. */ + + if (k > *n) { + goto L50; + } + + if (ipiv[k] > 0) { + +/* 1 x 1 diagonal block */ + +/* Multiply by inv(U'(K)), where U(K) is the transformation */ +/* stored in column K of A. */ + + i__1 = k - 1; + _starpu_dgemv_("Transpose", &i__1, nrhs, &c_b7, &b[b_offset], ldb, &a[k * + a_dim1 + 1], &c__1, &c_b19, &b[k + b_dim1], ldb); + +/* Interchange rows K and IPIV(K). */ + + kp = ipiv[k]; + if (kp != k) { + _starpu_dswap_(nrhs, &b[k + b_dim1], ldb, &b[kp + b_dim1], ldb); + } + ++k; + } else { + +/* 2 x 2 diagonal block */ + +/* Multiply by inv(U'(K+1)), where U(K+1) is the transformation */ +/* stored in columns K and K+1 of A. */ + + i__1 = k - 1; + _starpu_dgemv_("Transpose", &i__1, nrhs, &c_b7, &b[b_offset], ldb, &a[k * + a_dim1 + 1], &c__1, &c_b19, &b[k + b_dim1], ldb); + i__1 = k - 1; + _starpu_dgemv_("Transpose", &i__1, nrhs, &c_b7, &b[b_offset], ldb, &a[(k + + 1) * a_dim1 + 1], &c__1, &c_b19, &b[k + 1 + b_dim1], + ldb); + +/* Interchange rows K and -IPIV(K). */ + + kp = -ipiv[k]; + if (kp != k) { + _starpu_dswap_(nrhs, &b[k + b_dim1], ldb, &b[kp + b_dim1], ldb); + } + k += 2; + } + + goto L40; +L50: + + ; + } else { + +/* Solve A*X = B, where A = L*D*L'. */ + +/* First solve L*D*X = B, overwriting B with X. */ + +/* K is the main loop index, increasing from 1 to N in steps of */ +/* 1 or 2, depending on the size of the diagonal blocks. */ + + k = 1; +L60: + +/* If K > N, exit from loop. */ + + if (k > *n) { + goto L80; + } + + if (ipiv[k] > 0) { + +/* 1 x 1 diagonal block */ + +/* Interchange rows K and IPIV(K). */ + + kp = ipiv[k]; + if (kp != k) { + _starpu_dswap_(nrhs, &b[k + b_dim1], ldb, &b[kp + b_dim1], ldb); + } + +/* Multiply by inv(L(K)), where L(K) is the transformation */ +/* stored in column K of A. */ + + if (k < *n) { + i__1 = *n - k; + _starpu_dger_(&i__1, nrhs, &c_b7, &a[k + 1 + k * a_dim1], &c__1, &b[k + + b_dim1], ldb, &b[k + 1 + b_dim1], ldb); + } + +/* Multiply by the inverse of the diagonal block. */ + + d__1 = 1. / a[k + k * a_dim1]; + _starpu_dscal_(nrhs, &d__1, &b[k + b_dim1], ldb); + ++k; + } else { + +/* 2 x 2 diagonal block */ + +/* Interchange rows K+1 and -IPIV(K). */ + + kp = -ipiv[k]; + if (kp != k + 1) { + _starpu_dswap_(nrhs, &b[k + 1 + b_dim1], ldb, &b[kp + b_dim1], ldb); + } + +/* Multiply by inv(L(K)), where L(K) is the transformation */ +/* stored in columns K and K+1 of A. */ + + if (k < *n - 1) { + i__1 = *n - k - 1; + _starpu_dger_(&i__1, nrhs, &c_b7, &a[k + 2 + k * a_dim1], &c__1, &b[k + + b_dim1], ldb, &b[k + 2 + b_dim1], ldb); + i__1 = *n - k - 1; + _starpu_dger_(&i__1, nrhs, &c_b7, &a[k + 2 + (k + 1) * a_dim1], &c__1, + &b[k + 1 + b_dim1], ldb, &b[k + 2 + b_dim1], ldb); + } + +/* Multiply by the inverse of the diagonal block. */ + + akm1k = a[k + 1 + k * a_dim1]; + akm1 = a[k + k * a_dim1] / akm1k; + ak = a[k + 1 + (k + 1) * a_dim1] / akm1k; + denom = akm1 * ak - 1.; + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + bkm1 = b[k + j * b_dim1] / akm1k; + bk = b[k + 1 + j * b_dim1] / akm1k; + b[k + j * b_dim1] = (ak * bkm1 - bk) / denom; + b[k + 1 + j * b_dim1] = (akm1 * bk - bkm1) / denom; +/* L70: */ + } + k += 2; + } + + goto L60; +L80: + +/* Next solve L'*X = B, overwriting B with X. */ + +/* K is the main loop index, decreasing from N to 1 in steps of */ +/* 1 or 2, depending on the size of the diagonal blocks. */ + + k = *n; +L90: + +/* If K < 1, exit from loop. */ + + if (k < 1) { + goto L100; + } + + if (ipiv[k] > 0) { + +/* 1 x 1 diagonal block */ + +/* Multiply by inv(L'(K)), where L(K) is the transformation */ +/* stored in column K of A. */ + + if (k < *n) { + i__1 = *n - k; + _starpu_dgemv_("Transpose", &i__1, nrhs, &c_b7, &b[k + 1 + b_dim1], + ldb, &a[k + 1 + k * a_dim1], &c__1, &c_b19, &b[k + + b_dim1], ldb); + } + +/* Interchange rows K and IPIV(K). */ + + kp = ipiv[k]; + if (kp != k) { + _starpu_dswap_(nrhs, &b[k + b_dim1], ldb, &b[kp + b_dim1], ldb); + } + --k; + } else { + +/* 2 x 2 diagonal block */ + +/* Multiply by inv(L'(K-1)), where L(K-1) is the transformation */ +/* stored in columns K-1 and K of A. */ + + if (k < *n) { + i__1 = *n - k; + _starpu_dgemv_("Transpose", &i__1, nrhs, &c_b7, &b[k + 1 + b_dim1], + ldb, &a[k + 1 + k * a_dim1], &c__1, &c_b19, &b[k + + b_dim1], ldb); + i__1 = *n - k; + _starpu_dgemv_("Transpose", &i__1, nrhs, &c_b7, &b[k + 1 + b_dim1], + ldb, &a[k + 1 + (k - 1) * a_dim1], &c__1, &c_b19, &b[ + k - 1 + b_dim1], ldb); + } + +/* Interchange rows K and -IPIV(K). */ + + kp = -ipiv[k]; + if (kp != k) { + _starpu_dswap_(nrhs, &b[k + b_dim1], ldb, &b[kp + b_dim1], ldb); + } + k += -2; + } + + goto L90; +L100: + ; + } + + return 0; + +/* End of DSYTRS */ + +} /* _starpu_dsytrs_ */ diff --git a/min-dgels/base/SRC/dtbcon.c b/min-dgels/base/SRC/dtbcon.c new file mode 100644 index 0000000..beae61c --- /dev/null +++ b/min-dgels/base/SRC/dtbcon.c @@ -0,0 +1,247 @@ +/* dtbcon.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +/* Subroutine */ int _starpu_dtbcon_(char *norm, char *uplo, char *diag, integer *n, + integer *kd, doublereal *ab, integer *ldab, doublereal *rcond, + doublereal *work, integer *iwork, integer *info) +{ + /* System generated locals */ + integer ab_dim1, ab_offset, i__1; + doublereal d__1; + + /* Local variables */ + integer ix, kase, kase1; + doublereal scale; + extern logical _starpu_lsame_(char *, char *); + integer isave[3]; + extern /* Subroutine */ int _starpu_drscl_(integer *, doublereal *, doublereal *, + integer *); + doublereal anorm; + logical upper; + doublereal xnorm; + extern /* Subroutine */ int _starpu_dlacn2_(integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *); + extern doublereal _starpu_dlamch_(char *); + extern integer _starpu_idamax_(integer *, doublereal *, integer *); + extern doublereal _starpu_dlantb_(char *, char *, char *, integer *, integer *, + doublereal *, integer *, doublereal *); + extern /* Subroutine */ int _starpu_dlatbs_(char *, char *, char *, char *, + integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); + doublereal ainvnm; + logical onenrm; + char normin[1]; + doublereal smlnum; + logical nounit; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* Modified to call DLACN2 in place of DLACON, 5 Feb 03, SJH. */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DTBCON estimates the reciprocal of the condition number of a */ +/* triangular band matrix A, in either the 1-norm or the infinity-norm. */ + +/* The norm of A is computed and an estimate is obtained for */ +/* norm(inv(A)), then the reciprocal of the condition number is */ +/* computed as */ +/* RCOND = 1 / ( norm(A) * norm(inv(A)) ). */ + +/* Arguments */ +/* ========= */ + +/* NORM (input) CHARACTER*1 */ +/* Specifies whether the 1-norm condition number or the */ +/* infinity-norm condition number is required: */ +/* = '1' or 'O': 1-norm; */ +/* = 'I': Infinity-norm. */ + +/* UPLO (input) CHARACTER*1 */ +/* = 'U': A is upper triangular; */ +/* = 'L': A is lower triangular. */ + +/* DIAG (input) CHARACTER*1 */ +/* = 'N': A is non-unit triangular; */ +/* = 'U': A is unit triangular. */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* KD (input) INTEGER */ +/* The number of superdiagonals or subdiagonals of the */ +/* triangular band matrix A. KD >= 0. */ + +/* AB (input) DOUBLE PRECISION array, dimension (LDAB,N) */ +/* The upper or lower triangular band matrix A, stored in the */ +/* first kd+1 rows of the array. The j-th column of A is stored */ +/* in the j-th column of the array AB as follows: */ +/* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; */ +/* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). */ +/* If DIAG = 'U', the diagonal elements of A are not referenced */ +/* and are assumed to be 1. */ + +/* LDAB (input) INTEGER */ +/* The leading dimension of the array AB. LDAB >= KD+1. */ + +/* RCOND (output) DOUBLE PRECISION */ +/* The reciprocal of the condition number of the matrix A, */ +/* computed as RCOND = 1/(norm(A) * norm(inv(A))). */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (3*N) */ + +/* IWORK (workspace) INTEGER array, dimension (N) */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + ab_dim1 = *ldab; + ab_offset = 1 + ab_dim1; + ab -= ab_offset; + --work; + --iwork; + + /* Function Body */ + *info = 0; + upper = _starpu_lsame_(uplo, "U"); + onenrm = *(unsigned char *)norm == '1' || _starpu_lsame_(norm, "O"); + nounit = _starpu_lsame_(diag, "N"); + + if (! onenrm && ! _starpu_lsame_(norm, "I")) { + *info = -1; + } else if (! upper && ! _starpu_lsame_(uplo, "L")) { + *info = -2; + } else if (! nounit && ! _starpu_lsame_(diag, "U")) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*kd < 0) { + *info = -5; + } else if (*ldab < *kd + 1) { + *info = -7; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DTBCON", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + *rcond = 1.; + return 0; + } + + *rcond = 0.; + smlnum = _starpu_dlamch_("Safe minimum") * (doublereal) max(1,*n); + +/* Compute the norm of the triangular matrix A. */ + + anorm = _starpu_dlantb_(norm, uplo, diag, n, kd, &ab[ab_offset], ldab, &work[1]); + +/* Continue only if ANORM > 0. */ + + if (anorm > 0.) { + +/* Estimate the norm of the inverse of A. */ + + ainvnm = 0.; + *(unsigned char *)normin = 'N'; + if (onenrm) { + kase1 = 1; + } else { + kase1 = 2; + } + kase = 0; +L10: + _starpu_dlacn2_(n, &work[*n + 1], &work[1], &iwork[1], &ainvnm, &kase, isave); + if (kase != 0) { + if (kase == kase1) { + +/* Multiply by inv(A). */ + + _starpu_dlatbs_(uplo, "No transpose", diag, normin, n, kd, &ab[ + ab_offset], ldab, &work[1], &scale, &work[(*n << 1) + + 1], info) + ; + } else { + +/* Multiply by inv(A'). */ + + _starpu_dlatbs_(uplo, "Transpose", diag, normin, n, kd, &ab[ab_offset] +, ldab, &work[1], &scale, &work[(*n << 1) + 1], info); + } + *(unsigned char *)normin = 'Y'; + +/* Multiply by 1/SCALE if doing so will not cause overflow. */ + + if (scale != 1.) { + ix = _starpu_idamax_(n, &work[1], &c__1); + xnorm = (d__1 = work[ix], abs(d__1)); + if (scale < xnorm * smlnum || scale == 0.) { + goto L20; + } + _starpu_drscl_(n, &scale, &work[1], &c__1); + } + goto L10; + } + +/* Compute the estimate of the reciprocal condition number. */ + + if (ainvnm != 0.) { + *rcond = 1. / anorm / ainvnm; + } + } + +L20: + return 0; + +/* End of DTBCON */ + +} /* _starpu_dtbcon_ */ diff --git a/min-dgels/base/SRC/dtbrfs.c b/min-dgels/base/SRC/dtbrfs.c new file mode 100644 index 0000000..d543269 --- /dev/null +++ b/min-dgels/base/SRC/dtbrfs.c @@ -0,0 +1,519 @@ +/* dtbrfs.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static doublereal c_b19 = -1.; + +/* Subroutine */ int _starpu_dtbrfs_(char *uplo, char *trans, char *diag, integer *n, + integer *kd, integer *nrhs, doublereal *ab, integer *ldab, doublereal + *b, integer *ldb, doublereal *x, integer *ldx, doublereal *ferr, + doublereal *berr, doublereal *work, integer *iwork, integer *info) +{ + /* System generated locals */ + integer ab_dim1, ab_offset, b_dim1, b_offset, x_dim1, x_offset, i__1, + i__2, i__3, i__4, i__5; + doublereal d__1, d__2, d__3; + + /* Local variables */ + integer i__, j, k; + doublereal s, xk; + integer nz; + doublereal eps; + integer kase; + doublereal safe1, safe2; + extern logical _starpu_lsame_(char *, char *); + integer isave[3]; + extern /* Subroutine */ int _starpu_dtbmv_(char *, char *, char *, integer *, + integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dcopy_(integer *, doublereal *, integer * +, doublereal *, integer *), _starpu_dtbsv_(char *, char *, char *, + integer *, integer *, doublereal *, integer *, doublereal *, + integer *), _starpu_daxpy_(integer *, doublereal * +, doublereal *, integer *, doublereal *, integer *); + logical upper; + extern /* Subroutine */ int _starpu_dlacn2_(integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *); + extern doublereal _starpu_dlamch_(char *); + doublereal safmin; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + logical notran; + char transt[1]; + logical nounit; + doublereal lstres; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* Modified to call DLACN2 in place of DLACON, 5 Feb 03, SJH. */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DTBRFS provides error bounds and backward error estimates for the */ +/* solution to a system of linear equations with a triangular band */ +/* coefficient matrix. */ + +/* The solution matrix X must be computed by DTBTRS or some other */ +/* means before entering this routine. DTBRFS does not do iterative */ +/* refinement because doing so cannot improve the backward error. */ + +/* Arguments */ +/* ========= */ + +/* UPLO (input) CHARACTER*1 */ +/* = 'U': A is upper triangular; */ +/* = 'L': A is lower triangular. */ + +/* TRANS (input) CHARACTER*1 */ +/* Specifies the form of the system of equations: */ +/* = 'N': A * X = B (No transpose) */ +/* = 'T': A**T * X = B (Transpose) */ +/* = 'C': A**H * X = B (Conjugate transpose = Transpose) */ + +/* DIAG (input) CHARACTER*1 */ +/* = 'N': A is non-unit triangular; */ +/* = 'U': A is unit triangular. */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* KD (input) INTEGER */ +/* The number of superdiagonals or subdiagonals of the */ +/* triangular band matrix A. KD >= 0. */ + +/* NRHS (input) INTEGER */ +/* The number of right hand sides, i.e., the number of columns */ +/* of the matrices B and X. NRHS >= 0. */ + +/* AB (input) DOUBLE PRECISION array, dimension (LDAB,N) */ +/* The upper or lower triangular band matrix A, stored in the */ +/* first kd+1 rows of the array. The j-th column of A is stored */ +/* in the j-th column of the array AB as follows: */ +/* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; */ +/* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). */ +/* If DIAG = 'U', the diagonal elements of A are not referenced */ +/* and are assumed to be 1. */ + +/* LDAB (input) INTEGER */ +/* The leading dimension of the array AB. LDAB >= KD+1. */ + +/* B (input) DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* The right hand side matrix B. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the array B. LDB >= max(1,N). */ + +/* X (input) DOUBLE PRECISION array, dimension (LDX,NRHS) */ +/* The solution matrix X. */ + +/* LDX (input) INTEGER */ +/* The leading dimension of the array X. LDX >= max(1,N). */ + +/* FERR (output) DOUBLE PRECISION array, dimension (NRHS) */ +/* The estimated forward error bound for each solution vector */ +/* X(j) (the j-th column of the solution matrix X). */ +/* If XTRUE is the true solution corresponding to X(j), FERR(j) */ +/* is an estimated upper bound for the magnitude of the largest */ +/* element in (X(j) - XTRUE) divided by the magnitude of the */ +/* largest element in X(j). The estimate is as reliable as */ +/* the estimate for RCOND, and is almost always a slight */ +/* overestimate of the true error. */ + +/* BERR (output) DOUBLE PRECISION array, dimension (NRHS) */ +/* The componentwise relative backward error of each solution */ +/* vector X(j) (i.e., the smallest relative change in */ +/* any element of A or B that makes X(j) an exact solution). */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (3*N) */ + +/* IWORK (workspace) INTEGER array, dimension (N) */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + ab_dim1 = *ldab; + ab_offset = 1 + ab_dim1; + ab -= ab_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + x_dim1 = *ldx; + x_offset = 1 + x_dim1; + x -= x_offset; + --ferr; + --berr; + --work; + --iwork; + + /* Function Body */ + *info = 0; + upper = _starpu_lsame_(uplo, "U"); + notran = _starpu_lsame_(trans, "N"); + nounit = _starpu_lsame_(diag, "N"); + + if (! upper && ! _starpu_lsame_(uplo, "L")) { + *info = -1; + } else if (! notran && ! _starpu_lsame_(trans, "T") && ! + _starpu_lsame_(trans, "C")) { + *info = -2; + } else if (! nounit && ! _starpu_lsame_(diag, "U")) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*kd < 0) { + *info = -5; + } else if (*nrhs < 0) { + *info = -6; + } else if (*ldab < *kd + 1) { + *info = -8; + } else if (*ldb < max(1,*n)) { + *info = -10; + } else if (*ldx < max(1,*n)) { + *info = -12; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DTBRFS", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0 || *nrhs == 0) { + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + ferr[j] = 0.; + berr[j] = 0.; +/* L10: */ + } + return 0; + } + + if (notran) { + *(unsigned char *)transt = 'T'; + } else { + *(unsigned char *)transt = 'N'; + } + +/* NZ = maximum number of nonzero elements in each row of A, plus 1 */ + + nz = *kd + 2; + eps = _starpu_dlamch_("Epsilon"); + safmin = _starpu_dlamch_("Safe minimum"); + safe1 = nz * safmin; + safe2 = safe1 / eps; + +/* Do for each right hand side */ + + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + +/* Compute residual R = B - op(A) * X, */ +/* where op(A) = A or A', depending on TRANS. */ + + _starpu_dcopy_(n, &x[j * x_dim1 + 1], &c__1, &work[*n + 1], &c__1); + _starpu_dtbmv_(uplo, trans, diag, n, kd, &ab[ab_offset], ldab, &work[*n + 1], + &c__1); + _starpu_daxpy_(n, &c_b19, &b[j * b_dim1 + 1], &c__1, &work[*n + 1], &c__1); + +/* Compute componentwise relative backward error from formula */ + +/* max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) */ + +/* where abs(Z) is the componentwise absolute value of the matrix */ +/* or vector Z. If the i-th component of the denominator is less */ +/* than SAFE2, then SAFE1 is added to the i-th components of the */ +/* numerator and denominator before dividing. */ + + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + work[i__] = (d__1 = b[i__ + j * b_dim1], abs(d__1)); +/* L20: */ + } + + if (notran) { + +/* Compute abs(A)*abs(X) + abs(B). */ + + if (upper) { + if (nounit) { + i__2 = *n; + for (k = 1; k <= i__2; ++k) { + xk = (d__1 = x[k + j * x_dim1], abs(d__1)); +/* Computing MAX */ + i__3 = 1, i__4 = k - *kd; + i__5 = k; + for (i__ = max(i__3,i__4); i__ <= i__5; ++i__) { + work[i__] += (d__1 = ab[*kd + 1 + i__ - k + k * + ab_dim1], abs(d__1)) * xk; +/* L30: */ + } +/* L40: */ + } + } else { + i__2 = *n; + for (k = 1; k <= i__2; ++k) { + xk = (d__1 = x[k + j * x_dim1], abs(d__1)); +/* Computing MAX */ + i__5 = 1, i__3 = k - *kd; + i__4 = k - 1; + for (i__ = max(i__5,i__3); i__ <= i__4; ++i__) { + work[i__] += (d__1 = ab[*kd + 1 + i__ - k + k * + ab_dim1], abs(d__1)) * xk; +/* L50: */ + } + work[k] += xk; +/* L60: */ + } + } + } else { + if (nounit) { + i__2 = *n; + for (k = 1; k <= i__2; ++k) { + xk = (d__1 = x[k + j * x_dim1], abs(d__1)); +/* Computing MIN */ + i__5 = *n, i__3 = k + *kd; + i__4 = min(i__5,i__3); + for (i__ = k; i__ <= i__4; ++i__) { + work[i__] += (d__1 = ab[i__ + 1 - k + k * ab_dim1] + , abs(d__1)) * xk; +/* L70: */ + } +/* L80: */ + } + } else { + i__2 = *n; + for (k = 1; k <= i__2; ++k) { + xk = (d__1 = x[k + j * x_dim1], abs(d__1)); +/* Computing MIN */ + i__5 = *n, i__3 = k + *kd; + i__4 = min(i__5,i__3); + for (i__ = k + 1; i__ <= i__4; ++i__) { + work[i__] += (d__1 = ab[i__ + 1 - k + k * ab_dim1] + , abs(d__1)) * xk; +/* L90: */ + } + work[k] += xk; +/* L100: */ + } + } + } + } else { + +/* Compute abs(A')*abs(X) + abs(B). */ + + if (upper) { + if (nounit) { + i__2 = *n; + for (k = 1; k <= i__2; ++k) { + s = 0.; +/* Computing MAX */ + i__4 = 1, i__5 = k - *kd; + i__3 = k; + for (i__ = max(i__4,i__5); i__ <= i__3; ++i__) { + s += (d__1 = ab[*kd + 1 + i__ - k + k * ab_dim1], + abs(d__1)) * (d__2 = x[i__ + j * x_dim1], + abs(d__2)); +/* L110: */ + } + work[k] += s; +/* L120: */ + } + } else { + i__2 = *n; + for (k = 1; k <= i__2; ++k) { + s = (d__1 = x[k + j * x_dim1], abs(d__1)); +/* Computing MAX */ + i__3 = 1, i__4 = k - *kd; + i__5 = k - 1; + for (i__ = max(i__3,i__4); i__ <= i__5; ++i__) { + s += (d__1 = ab[*kd + 1 + i__ - k + k * ab_dim1], + abs(d__1)) * (d__2 = x[i__ + j * x_dim1], + abs(d__2)); +/* L130: */ + } + work[k] += s; +/* L140: */ + } + } + } else { + if (nounit) { + i__2 = *n; + for (k = 1; k <= i__2; ++k) { + s = 0.; +/* Computing MIN */ + i__3 = *n, i__4 = k + *kd; + i__5 = min(i__3,i__4); + for (i__ = k; i__ <= i__5; ++i__) { + s += (d__1 = ab[i__ + 1 - k + k * ab_dim1], abs( + d__1)) * (d__2 = x[i__ + j * x_dim1], abs( + d__2)); +/* L150: */ + } + work[k] += s; +/* L160: */ + } + } else { + i__2 = *n; + for (k = 1; k <= i__2; ++k) { + s = (d__1 = x[k + j * x_dim1], abs(d__1)); +/* Computing MIN */ + i__3 = *n, i__4 = k + *kd; + i__5 = min(i__3,i__4); + for (i__ = k + 1; i__ <= i__5; ++i__) { + s += (d__1 = ab[i__ + 1 - k + k * ab_dim1], abs( + d__1)) * (d__2 = x[i__ + j * x_dim1], abs( + d__2)); +/* L170: */ + } + work[k] += s; +/* L180: */ + } + } + } + } + s = 0.; + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + if (work[i__] > safe2) { +/* Computing MAX */ + d__2 = s, d__3 = (d__1 = work[*n + i__], abs(d__1)) / work[ + i__]; + s = max(d__2,d__3); + } else { +/* Computing MAX */ + d__2 = s, d__3 = ((d__1 = work[*n + i__], abs(d__1)) + safe1) + / (work[i__] + safe1); + s = max(d__2,d__3); + } +/* L190: */ + } + berr[j] = s; + +/* Bound error from formula */ + +/* norm(X - XTRUE) / norm(X) .le. FERR = */ +/* norm( abs(inv(op(A)))* */ +/* ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) */ + +/* where */ +/* norm(Z) is the magnitude of the largest component of Z */ +/* inv(op(A)) is the inverse of op(A) */ +/* abs(Z) is the componentwise absolute value of the matrix or */ +/* vector Z */ +/* NZ is the maximum number of nonzeros in any row of A, plus 1 */ +/* EPS is machine epsilon */ + +/* The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) */ +/* is incremented by SAFE1 if the i-th component of */ +/* abs(op(A))*abs(X) + abs(B) is less than SAFE2. */ + +/* Use DLACN2 to estimate the infinity-norm of the matrix */ +/* inv(op(A)) * diag(W), */ +/* where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) */ + + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + if (work[i__] > safe2) { + work[i__] = (d__1 = work[*n + i__], abs(d__1)) + nz * eps * + work[i__]; + } else { + work[i__] = (d__1 = work[*n + i__], abs(d__1)) + nz * eps * + work[i__] + safe1; + } +/* L200: */ + } + + kase = 0; +L210: + _starpu_dlacn2_(n, &work[(*n << 1) + 1], &work[*n + 1], &iwork[1], &ferr[j], & + kase, isave); + if (kase != 0) { + if (kase == 1) { + +/* Multiply by diag(W)*inv(op(A)'). */ + + _starpu_dtbsv_(uplo, transt, diag, n, kd, &ab[ab_offset], ldab, &work[ + *n + 1], &c__1); + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + work[*n + i__] = work[i__] * work[*n + i__]; +/* L220: */ + } + } else { + +/* Multiply by inv(op(A))*diag(W). */ + + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + work[*n + i__] = work[i__] * work[*n + i__]; +/* L230: */ + } + _starpu_dtbsv_(uplo, trans, diag, n, kd, &ab[ab_offset], ldab, &work[* + n + 1], &c__1); + } + goto L210; + } + +/* Normalize error. */ + + lstres = 0.; + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { +/* Computing MAX */ + d__2 = lstres, d__3 = (d__1 = x[i__ + j * x_dim1], abs(d__1)); + lstres = max(d__2,d__3); +/* L240: */ + } + if (lstres != 0.) { + ferr[j] /= lstres; + } + +/* L250: */ + } + + return 0; + +/* End of DTBRFS */ + +} /* _starpu_dtbrfs_ */ diff --git a/min-dgels/base/SRC/dtbtrs.c b/min-dgels/base/SRC/dtbtrs.c new file mode 100644 index 0000000..0a5b1a0 --- /dev/null +++ b/min-dgels/base/SRC/dtbtrs.c @@ -0,0 +1,204 @@ +/* dtbtrs.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +/* Subroutine */ int _starpu_dtbtrs_(char *uplo, char *trans, char *diag, integer *n, + integer *kd, integer *nrhs, doublereal *ab, integer *ldab, doublereal + *b, integer *ldb, integer *info) +{ + /* System generated locals */ + integer ab_dim1, ab_offset, b_dim1, b_offset, i__1; + + /* Local variables */ + integer j; + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int _starpu_dtbsv_(char *, char *, char *, integer *, + integer *, doublereal *, integer *, doublereal *, integer *); + logical upper; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + logical nounit; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DTBTRS solves a triangular system of the form */ + +/* A * X = B or A**T * X = B, */ + +/* where A is a triangular band matrix of order N, and B is an */ +/* N-by NRHS matrix. A check is made to verify that A is nonsingular. */ + +/* Arguments */ +/* ========= */ + +/* UPLO (input) CHARACTER*1 */ +/* = 'U': A is upper triangular; */ +/* = 'L': A is lower triangular. */ + +/* TRANS (input) CHARACTER*1 */ +/* Specifies the form the system of equations: */ +/* = 'N': A * X = B (No transpose) */ +/* = 'T': A**T * X = B (Transpose) */ +/* = 'C': A**H * X = B (Conjugate transpose = Transpose) */ + +/* DIAG (input) CHARACTER*1 */ +/* = 'N': A is non-unit triangular; */ +/* = 'U': A is unit triangular. */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* KD (input) INTEGER */ +/* The number of superdiagonals or subdiagonals of the */ +/* triangular band matrix A. KD >= 0. */ + +/* NRHS (input) INTEGER */ +/* The number of right hand sides, i.e., the number of columns */ +/* of the matrix B. NRHS >= 0. */ + +/* AB (input) DOUBLE PRECISION array, dimension (LDAB,N) */ +/* The upper or lower triangular band matrix A, stored in the */ +/* first kd+1 rows of AB. The j-th column of A is stored */ +/* in the j-th column of the array AB as follows: */ +/* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; */ +/* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). */ +/* If DIAG = 'U', the diagonal elements of A are not referenced */ +/* and are assumed to be 1. */ + +/* LDAB (input) INTEGER */ +/* The leading dimension of the array AB. LDAB >= KD+1. */ + +/* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* On entry, the right hand side matrix B. */ +/* On exit, if INFO = 0, the solution matrix X. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the array B. LDB >= max(1,N). */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > 0: if INFO = i, the i-th diagonal element of A is zero, */ +/* indicating that the matrix is singular and the */ +/* solutions X have not been computed. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + ab_dim1 = *ldab; + ab_offset = 1 + ab_dim1; + ab -= ab_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + *info = 0; + nounit = _starpu_lsame_(diag, "N"); + upper = _starpu_lsame_(uplo, "U"); + if (! upper && ! _starpu_lsame_(uplo, "L")) { + *info = -1; + } else if (! _starpu_lsame_(trans, "N") && ! _starpu_lsame_(trans, + "T") && ! _starpu_lsame_(trans, "C")) { + *info = -2; + } else if (! nounit && ! _starpu_lsame_(diag, "U")) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*kd < 0) { + *info = -5; + } else if (*nrhs < 0) { + *info = -6; + } else if (*ldab < *kd + 1) { + *info = -8; + } else if (*ldb < max(1,*n)) { + *info = -10; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DTBTRS", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Check for singularity. */ + + if (nounit) { + if (upper) { + i__1 = *n; + for (*info = 1; *info <= i__1; ++(*info)) { + if (ab[*kd + 1 + *info * ab_dim1] == 0.) { + return 0; + } +/* L10: */ + } + } else { + i__1 = *n; + for (*info = 1; *info <= i__1; ++(*info)) { + if (ab[*info * ab_dim1 + 1] == 0.) { + return 0; + } +/* L20: */ + } + } + } + *info = 0; + +/* Solve A * X = B or A' * X = B. */ + + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + _starpu_dtbsv_(uplo, trans, diag, n, kd, &ab[ab_offset], ldab, &b[j * b_dim1 + + 1], &c__1); +/* L30: */ + } + + return 0; + +/* End of DTBTRS */ + +} /* _starpu_dtbtrs_ */ diff --git a/min-dgels/base/SRC/dtfsm.c b/min-dgels/base/SRC/dtfsm.c new file mode 100644 index 0000000..d43d7e2 --- /dev/null +++ b/min-dgels/base/SRC/dtfsm.c @@ -0,0 +1,976 @@ +/* dtfsm.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static doublereal c_b23 = -1.; +static doublereal c_b27 = 1.; + +/* Subroutine */ int _starpu_dtfsm_(char *transr, char *side, char *uplo, char *trans, + char *diag, integer *m, integer *n, doublereal *alpha, doublereal *a, + doublereal *b, integer *ldb) +{ + /* System generated locals */ + integer b_dim1, b_offset, i__1, i__2; + + /* Local variables */ + integer i__, j, k, m1, m2, n1, n2, info; + logical normaltransr; + extern /* Subroutine */ int _starpu_dgemm_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *); + logical lside; + extern logical _starpu_lsame_(char *, char *); + logical lower; + extern /* Subroutine */ int _starpu_dtrsm_(char *, char *, char *, char *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *), _starpu_xerbla_( + char *, integer *); + logical misodd, nisodd, notrans; + + +/* -- LAPACK routine (version 3.2.1) -- */ + +/* -- Contributed by Fred Gustavson of the IBM Watson Research Center -- */ +/* -- April 2009 -- */ + +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. */ +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* Level 3 BLAS like routine for A in RFP Format. */ + +/* DTFSM solves the matrix equation */ + +/* op( A )*X = alpha*B or X*op( A ) = alpha*B */ + +/* where alpha is a scalar, X and B are m by n matrices, A is a unit, or */ +/* non-unit, upper or lower triangular matrix and op( A ) is one of */ + +/* op( A ) = A or op( A ) = A'. */ + +/* A is in Rectangular Full Packed (RFP) Format. */ + +/* The matrix X is overwritten on B. */ + +/* Arguments */ +/* ========== */ + +/* TRANSR - (input) CHARACTER */ +/* = 'N': The Normal Form of RFP A is stored; */ +/* = 'T': The Transpose Form of RFP A is stored. */ + +/* SIDE - (input) CHARACTER */ +/* On entry, SIDE specifies whether op( A ) appears on the left */ +/* or right of X as follows: */ + +/* SIDE = 'L' or 'l' op( A )*X = alpha*B. */ + +/* SIDE = 'R' or 'r' X*op( A ) = alpha*B. */ + +/* Unchanged on exit. */ + +/* UPLO - (input) CHARACTER */ +/* On entry, UPLO specifies whether the RFP matrix A came from */ +/* an upper or lower triangular matrix as follows: */ +/* UPLO = 'U' or 'u' RFP A came from an upper triangular matrix */ +/* UPLO = 'L' or 'l' RFP A came from a lower triangular matrix */ + +/* Unchanged on exit. */ + +/* TRANS - (input) CHARACTER */ +/* On entry, TRANS specifies the form of op( A ) to be used */ +/* in the matrix multiplication as follows: */ + +/* TRANS = 'N' or 'n' op( A ) = A. */ + +/* TRANS = 'T' or 't' op( A ) = A'. */ + +/* Unchanged on exit. */ + +/* DIAG - (input) CHARACTER */ +/* On entry, DIAG specifies whether or not RFP A is unit */ +/* triangular as follows: */ + +/* DIAG = 'U' or 'u' A is assumed to be unit triangular. */ + +/* DIAG = 'N' or 'n' A is not assumed to be unit */ +/* triangular. */ + +/* Unchanged on exit. */ + +/* M - (input) INTEGER. */ +/* On entry, M specifies the number of rows of B. M must be at */ +/* least zero. */ +/* Unchanged on exit. */ + +/* N - (input) INTEGER. */ +/* On entry, N specifies the number of columns of B. N must be */ +/* at least zero. */ +/* Unchanged on exit. */ + +/* ALPHA - (input) DOUBLE PRECISION. */ +/* On entry, ALPHA specifies the scalar alpha. When alpha is */ +/* zero then A is not referenced and B need not be set before */ +/* entry. */ +/* Unchanged on exit. */ + +/* A - (input) DOUBLE PRECISION array, dimension (NT); */ +/* NT = N*(N+1)/2. On entry, the matrix A in RFP Format. */ +/* RFP Format is described by TRANSR, UPLO and N as follows: */ +/* If TRANSR='N' then RFP A is (0:N,0:K-1) when N is even; */ +/* K=N/2. RFP A is (0:N-1,0:K) when N is odd; K=N/2. If */ +/* TRANSR = 'T' then RFP is the transpose of RFP A as */ +/* defined when TRANSR = 'N'. The contents of RFP A are defined */ +/* by UPLO as follows: If UPLO = 'U' the RFP A contains the NT */ +/* elements of upper packed A either in normal or */ +/* transpose Format. If UPLO = 'L' the RFP A contains */ +/* the NT elements of lower packed A either in normal or */ +/* transpose Format. The LDA of RFP A is (N+1)/2 when */ +/* TRANSR = 'T'. When TRANSR is 'N' the LDA is N+1 when N is */ +/* even and is N when is odd. */ +/* See the Note below for more details. Unchanged on exit. */ + +/* B - (input/ouptut) DOUBLE PRECISION array, DIMENSION (LDB,N) */ +/* Before entry, the leading m by n part of the array B must */ +/* contain the right-hand side matrix B, and on exit is */ +/* overwritten by the solution matrix X. */ + +/* LDB - (input) INTEGER. */ +/* On entry, LDB specifies the first dimension of B as declared */ +/* in the calling (sub) program. LDB must be at least */ +/* max( 1, m ). */ +/* Unchanged on exit. */ + +/* Further Details */ +/* =============== */ + +/* We first consider Rectangular Full Packed (RFP) Format when N is */ +/* even. We give an example where N = 6. */ + +/* AP is Upper AP is Lower */ + +/* 00 01 02 03 04 05 00 */ +/* 11 12 13 14 15 10 11 */ +/* 22 23 24 25 20 21 22 */ +/* 33 34 35 30 31 32 33 */ +/* 44 45 40 41 42 43 44 */ +/* 55 50 51 52 53 54 55 */ + + +/* Let TRANSR = 'N'. RFP holds AP as follows: */ +/* For UPLO = 'U' the upper trapezoid A(0:5,0:2) consists of the last */ +/* three columns of AP upper. The lower triangle A(4:6,0:2) consists of */ +/* the transpose of the first three columns of AP upper. */ +/* For UPLO = 'L' the lower trapezoid A(1:6,0:2) consists of the first */ +/* three columns of AP lower. The upper triangle A(0:2,0:2) consists of */ +/* the transpose of the last three columns of AP lower. */ +/* This covers the case N even and TRANSR = 'N'. */ + +/* RFP A RFP A */ + +/* 03 04 05 33 43 53 */ +/* 13 14 15 00 44 54 */ +/* 23 24 25 10 11 55 */ +/* 33 34 35 20 21 22 */ +/* 00 44 45 30 31 32 */ +/* 01 11 55 40 41 42 */ +/* 02 12 22 50 51 52 */ + +/* Now let TRANSR = 'T'. RFP A in both UPLO cases is just the */ +/* transpose of RFP A above. One therefore gets: */ + + +/* RFP A RFP A */ + +/* 03 13 23 33 00 01 02 33 00 10 20 30 40 50 */ +/* 04 14 24 34 44 11 12 43 44 11 21 31 41 51 */ +/* 05 15 25 35 45 55 22 53 54 55 22 32 42 52 */ + + +/* We first consider Rectangular Full Packed (RFP) Format when N is */ +/* odd. We give an example where N = 5. */ + +/* AP is Upper AP is Lower */ + +/* 00 01 02 03 04 00 */ +/* 11 12 13 14 10 11 */ +/* 22 23 24 20 21 22 */ +/* 33 34 30 31 32 33 */ +/* 44 40 41 42 43 44 */ + + +/* Let TRANSR = 'N'. RFP holds AP as follows: */ +/* For UPLO = 'U' the upper trapezoid A(0:4,0:2) consists of the last */ +/* three columns of AP upper. The lower triangle A(3:4,0:1) consists of */ +/* the transpose of the first two columns of AP upper. */ +/* For UPLO = 'L' the lower trapezoid A(0:4,0:2) consists of the first */ +/* three columns of AP lower. The upper triangle A(0:1,1:2) consists of */ +/* the transpose of the last two columns of AP lower. */ +/* This covers the case N odd and TRANSR = 'N'. */ + +/* RFP A RFP A */ + +/* 02 03 04 00 33 43 */ +/* 12 13 14 10 11 44 */ +/* 22 23 24 20 21 22 */ +/* 00 33 34 30 31 32 */ +/* 01 11 44 40 41 42 */ + +/* Now let TRANSR = 'T'. RFP A in both UPLO cases is just the */ +/* transpose of RFP A above. One therefore gets: */ + +/* RFP A RFP A */ + +/* 02 12 22 00 01 00 10 20 30 40 50 */ +/* 03 13 23 33 11 33 11 21 31 41 51 */ +/* 04 14 24 34 44 43 44 22 32 42 52 */ + +/* Reference */ +/* ========= */ + +/* ===================================================================== */ + +/* .. */ +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + b_dim1 = *ldb - 1 - 0 + 1; + b_offset = 0 + b_dim1 * 0; + b -= b_offset; + + /* Function Body */ + info = 0; + normaltransr = _starpu_lsame_(transr, "N"); + lside = _starpu_lsame_(side, "L"); + lower = _starpu_lsame_(uplo, "L"); + notrans = _starpu_lsame_(trans, "N"); + if (! normaltransr && ! _starpu_lsame_(transr, "T")) { + info = -1; + } else if (! lside && ! _starpu_lsame_(side, "R")) { + info = -2; + } else if (! lower && ! _starpu_lsame_(uplo, "U")) { + info = -3; + } else if (! notrans && ! _starpu_lsame_(trans, "T")) { + info = -4; + } else if (! _starpu_lsame_(diag, "N") && ! _starpu_lsame_(diag, + "U")) { + info = -5; + } else if (*m < 0) { + info = -6; + } else if (*n < 0) { + info = -7; + } else if (*ldb < max(1,*m)) { + info = -11; + } + if (info != 0) { + i__1 = -info; + _starpu_xerbla_("DTFSM ", &i__1); + return 0; + } + +/* Quick return when ( (N.EQ.0).OR.(M.EQ.0) ) */ + + if (*m == 0 || *n == 0) { + return 0; + } + +/* Quick return when ALPHA.EQ.(0D+0) */ + + if (*alpha == 0.) { + i__1 = *n - 1; + for (j = 0; j <= i__1; ++j) { + i__2 = *m - 1; + for (i__ = 0; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = 0.; +/* L10: */ + } +/* L20: */ + } + return 0; + } + + if (lside) { + +/* SIDE = 'L' */ + +/* A is M-by-M. */ +/* If M is odd, set NISODD = .TRUE., and M1 and M2. */ +/* If M is even, NISODD = .FALSE., and M. */ + + if (*m % 2 == 0) { + misodd = FALSE_; + k = *m / 2; + } else { + misodd = TRUE_; + if (lower) { + m2 = *m / 2; + m1 = *m - m2; + } else { + m1 = *m / 2; + m2 = *m - m1; + } + } + + + if (misodd) { + +/* SIDE = 'L' and N is odd */ + + if (normaltransr) { + +/* SIDE = 'L', N is odd, and TRANSR = 'N' */ + + if (lower) { + +/* SIDE ='L', N is odd, TRANSR = 'N', and UPLO = 'L' */ + + if (notrans) { + +/* SIDE ='L', N is odd, TRANSR = 'N', UPLO = 'L', and */ +/* TRANS = 'N' */ + + if (*m == 1) { + _starpu_dtrsm_("L", "L", "N", diag, &m1, n, alpha, a, m, & + b[b_offset], ldb); + } else { + _starpu_dtrsm_("L", "L", "N", diag, &m1, n, alpha, a, m, & + b[b_offset], ldb); + _starpu_dgemm_("N", "N", &m2, n, &m1, &c_b23, &a[m1], m, & + b[b_offset], ldb, alpha, &b[m1], ldb); + _starpu_dtrsm_("L", "U", "T", diag, &m2, n, &c_b27, &a[*m] +, m, &b[m1], ldb); + } + + } else { + +/* SIDE ='L', N is odd, TRANSR = 'N', UPLO = 'L', and */ +/* TRANS = 'T' */ + + if (*m == 1) { + _starpu_dtrsm_("L", "L", "T", diag, &m1, n, alpha, a, m, & + b[b_offset], ldb); + } else { + _starpu_dtrsm_("L", "U", "N", diag, &m2, n, alpha, &a[*m], + m, &b[m1], ldb); + _starpu_dgemm_("T", "N", &m1, n, &m2, &c_b23, &a[m1], m, & + b[m1], ldb, alpha, &b[b_offset], ldb); + _starpu_dtrsm_("L", "L", "T", diag, &m1, n, &c_b27, a, m, + &b[b_offset], ldb); + } + + } + + } else { + +/* SIDE ='L', N is odd, TRANSR = 'N', and UPLO = 'U' */ + + if (! notrans) { + +/* SIDE ='L', N is odd, TRANSR = 'N', UPLO = 'U', and */ +/* TRANS = 'N' */ + + _starpu_dtrsm_("L", "L", "N", diag, &m1, n, alpha, &a[m2], m, + &b[b_offset], ldb); + _starpu_dgemm_("T", "N", &m2, n, &m1, &c_b23, a, m, &b[ + b_offset], ldb, alpha, &b[m1], ldb); + _starpu_dtrsm_("L", "U", "T", diag, &m2, n, &c_b27, &a[m1], m, + &b[m1], ldb); + + } else { + +/* SIDE ='L', N is odd, TRANSR = 'N', UPLO = 'U', and */ +/* TRANS = 'T' */ + + _starpu_dtrsm_("L", "U", "N", diag, &m2, n, alpha, &a[m1], m, + &b[m1], ldb); + _starpu_dgemm_("N", "N", &m1, n, &m2, &c_b23, a, m, &b[m1], + ldb, alpha, &b[b_offset], ldb); + _starpu_dtrsm_("L", "L", "T", diag, &m1, n, &c_b27, &a[m2], m, + &b[b_offset], ldb); + + } + + } + + } else { + +/* SIDE = 'L', N is odd, and TRANSR = 'T' */ + + if (lower) { + +/* SIDE ='L', N is odd, TRANSR = 'T', and UPLO = 'L' */ + + if (notrans) { + +/* SIDE ='L', N is odd, TRANSR = 'T', UPLO = 'L', and */ +/* TRANS = 'N' */ + + if (*m == 1) { + _starpu_dtrsm_("L", "U", "T", diag, &m1, n, alpha, a, &m1, + &b[b_offset], ldb); + } else { + _starpu_dtrsm_("L", "U", "T", diag, &m1, n, alpha, a, &m1, + &b[b_offset], ldb); + _starpu_dgemm_("T", "N", &m2, n, &m1, &c_b23, &a[m1 * m1], + &m1, &b[b_offset], ldb, alpha, &b[m1], + ldb); + _starpu_dtrsm_("L", "L", "N", diag, &m2, n, &c_b27, &a[1], + &m1, &b[m1], ldb); + } + + } else { + +/* SIDE ='L', N is odd, TRANSR = 'T', UPLO = 'L', and */ +/* TRANS = 'T' */ + + if (*m == 1) { + _starpu_dtrsm_("L", "U", "N", diag, &m1, n, alpha, a, &m1, + &b[b_offset], ldb); + } else { + _starpu_dtrsm_("L", "L", "T", diag, &m2, n, alpha, &a[1], + &m1, &b[m1], ldb); + _starpu_dgemm_("N", "N", &m1, n, &m2, &c_b23, &a[m1 * m1], + &m1, &b[m1], ldb, alpha, &b[b_offset], + ldb); + _starpu_dtrsm_("L", "U", "N", diag, &m1, n, &c_b27, a, & + m1, &b[b_offset], ldb); + } + + } + + } else { + +/* SIDE ='L', N is odd, TRANSR = 'T', and UPLO = 'U' */ + + if (! notrans) { + +/* SIDE ='L', N is odd, TRANSR = 'T', UPLO = 'U', and */ +/* TRANS = 'N' */ + + _starpu_dtrsm_("L", "U", "T", diag, &m1, n, alpha, &a[m2 * m2] +, &m2, &b[b_offset], ldb); + _starpu_dgemm_("N", "N", &m2, n, &m1, &c_b23, a, &m2, &b[ + b_offset], ldb, alpha, &b[m1], ldb); + _starpu_dtrsm_("L", "L", "N", diag, &m2, n, &c_b27, &a[m1 * + m2], &m2, &b[m1], ldb); + + } else { + +/* SIDE ='L', N is odd, TRANSR = 'T', UPLO = 'U', and */ +/* TRANS = 'T' */ + + _starpu_dtrsm_("L", "L", "T", diag, &m2, n, alpha, &a[m1 * m2] +, &m2, &b[m1], ldb); + _starpu_dgemm_("T", "N", &m1, n, &m2, &c_b23, a, &m2, &b[m1], + ldb, alpha, &b[b_offset], ldb); + _starpu_dtrsm_("L", "U", "N", diag, &m1, n, &c_b27, &a[m2 * + m2], &m2, &b[b_offset], ldb); + + } + + } + + } + + } else { + +/* SIDE = 'L' and N is even */ + + if (normaltransr) { + +/* SIDE = 'L', N is even, and TRANSR = 'N' */ + + if (lower) { + +/* SIDE ='L', N is even, TRANSR = 'N', and UPLO = 'L' */ + + if (notrans) { + +/* SIDE ='L', N is even, TRANSR = 'N', UPLO = 'L', */ +/* and TRANS = 'N' */ + + i__1 = *m + 1; + _starpu_dtrsm_("L", "L", "N", diag, &k, n, alpha, &a[1], & + i__1, &b[b_offset], ldb); + i__1 = *m + 1; + _starpu_dgemm_("N", "N", &k, n, &k, &c_b23, &a[k + 1], &i__1, + &b[b_offset], ldb, alpha, &b[k], ldb); + i__1 = *m + 1; + _starpu_dtrsm_("L", "U", "T", diag, &k, n, &c_b27, a, &i__1, & + b[k], ldb); + + } else { + +/* SIDE ='L', N is even, TRANSR = 'N', UPLO = 'L', */ +/* and TRANS = 'T' */ + + i__1 = *m + 1; + _starpu_dtrsm_("L", "U", "N", diag, &k, n, alpha, a, &i__1, & + b[k], ldb); + i__1 = *m + 1; + _starpu_dgemm_("T", "N", &k, n, &k, &c_b23, &a[k + 1], &i__1, + &b[k], ldb, alpha, &b[b_offset], ldb); + i__1 = *m + 1; + _starpu_dtrsm_("L", "L", "T", diag, &k, n, &c_b27, &a[1], & + i__1, &b[b_offset], ldb); + + } + + } else { + +/* SIDE ='L', N is even, TRANSR = 'N', and UPLO = 'U' */ + + if (! notrans) { + +/* SIDE ='L', N is even, TRANSR = 'N', UPLO = 'U', */ +/* and TRANS = 'N' */ + + i__1 = *m + 1; + _starpu_dtrsm_("L", "L", "N", diag, &k, n, alpha, &a[k + 1], & + i__1, &b[b_offset], ldb); + i__1 = *m + 1; + _starpu_dgemm_("T", "N", &k, n, &k, &c_b23, a, &i__1, &b[ + b_offset], ldb, alpha, &b[k], ldb); + i__1 = *m + 1; + _starpu_dtrsm_("L", "U", "T", diag, &k, n, &c_b27, &a[k], & + i__1, &b[k], ldb); + + } else { + +/* SIDE ='L', N is even, TRANSR = 'N', UPLO = 'U', */ +/* and TRANS = 'T' */ + i__1 = *m + 1; + _starpu_dtrsm_("L", "U", "N", diag, &k, n, alpha, &a[k], & + i__1, &b[k], ldb); + i__1 = *m + 1; + _starpu_dgemm_("N", "N", &k, n, &k, &c_b23, a, &i__1, &b[k], + ldb, alpha, &b[b_offset], ldb); + i__1 = *m + 1; + _starpu_dtrsm_("L", "L", "T", diag, &k, n, &c_b27, &a[k + 1], + &i__1, &b[b_offset], ldb); + + } + + } + + } else { + +/* SIDE = 'L', N is even, and TRANSR = 'T' */ + + if (lower) { + +/* SIDE ='L', N is even, TRANSR = 'T', and UPLO = 'L' */ + + if (notrans) { + +/* SIDE ='L', N is even, TRANSR = 'T', UPLO = 'L', */ +/* and TRANS = 'N' */ + + _starpu_dtrsm_("L", "U", "T", diag, &k, n, alpha, &a[k], &k, & + b[b_offset], ldb); + _starpu_dgemm_("T", "N", &k, n, &k, &c_b23, &a[k * (k + 1)], & + k, &b[b_offset], ldb, alpha, &b[k], ldb); + _starpu_dtrsm_("L", "L", "N", diag, &k, n, &c_b27, a, &k, &b[ + k], ldb); + + } else { + +/* SIDE ='L', N is even, TRANSR = 'T', UPLO = 'L', */ +/* and TRANS = 'T' */ + + _starpu_dtrsm_("L", "L", "T", diag, &k, n, alpha, a, &k, &b[k] +, ldb); + _starpu_dgemm_("N", "N", &k, n, &k, &c_b23, &a[k * (k + 1)], & + k, &b[k], ldb, alpha, &b[b_offset], ldb); + _starpu_dtrsm_("L", "U", "N", diag, &k, n, &c_b27, &a[k], &k, + &b[b_offset], ldb); + + } + + } else { + +/* SIDE ='L', N is even, TRANSR = 'T', and UPLO = 'U' */ + + if (! notrans) { + +/* SIDE ='L', N is even, TRANSR = 'T', UPLO = 'U', */ +/* and TRANS = 'N' */ + + _starpu_dtrsm_("L", "U", "T", diag, &k, n, alpha, &a[k * (k + + 1)], &k, &b[b_offset], ldb); + _starpu_dgemm_("N", "N", &k, n, &k, &c_b23, a, &k, &b[ + b_offset], ldb, alpha, &b[k], ldb); + _starpu_dtrsm_("L", "L", "N", diag, &k, n, &c_b27, &a[k * k], + &k, &b[k], ldb); + + } else { + +/* SIDE ='L', N is even, TRANSR = 'T', UPLO = 'U', */ +/* and TRANS = 'T' */ + + _starpu_dtrsm_("L", "L", "T", diag, &k, n, alpha, &a[k * k], & + k, &b[k], ldb); + _starpu_dgemm_("T", "N", &k, n, &k, &c_b23, a, &k, &b[k], ldb, + alpha, &b[b_offset], ldb); + _starpu_dtrsm_("L", "U", "N", diag, &k, n, &c_b27, &a[k * (k + + 1)], &k, &b[b_offset], ldb); + + } + + } + + } + + } + + } else { + +/* SIDE = 'R' */ + +/* A is N-by-N. */ +/* If N is odd, set NISODD = .TRUE., and N1 and N2. */ +/* If N is even, NISODD = .FALSE., and K. */ + + if (*n % 2 == 0) { + nisodd = FALSE_; + k = *n / 2; + } else { + nisodd = TRUE_; + if (lower) { + n2 = *n / 2; + n1 = *n - n2; + } else { + n1 = *n / 2; + n2 = *n - n1; + } + } + + if (nisodd) { + +/* SIDE = 'R' and N is odd */ + + if (normaltransr) { + +/* SIDE = 'R', N is odd, and TRANSR = 'N' */ + + if (lower) { + +/* SIDE ='R', N is odd, TRANSR = 'N', and UPLO = 'L' */ + + if (notrans) { + +/* SIDE ='R', N is odd, TRANSR = 'N', UPLO = 'L', and */ +/* TRANS = 'N' */ + + _starpu_dtrsm_("R", "U", "T", diag, m, &n2, alpha, &a[*n], n, + &b[n1 * b_dim1], ldb); + _starpu_dgemm_("N", "N", m, &n1, &n2, &c_b23, &b[n1 * b_dim1], + ldb, &a[n1], n, alpha, b, ldb); + _starpu_dtrsm_("R", "L", "N", diag, m, &n1, &c_b27, a, n, b, + ldb); + + } else { + +/* SIDE ='R', N is odd, TRANSR = 'N', UPLO = 'L', and */ +/* TRANS = 'T' */ + + _starpu_dtrsm_("R", "L", "T", diag, m, &n1, alpha, a, n, b, + ldb); + _starpu_dgemm_("N", "T", m, &n2, &n1, &c_b23, b, ldb, &a[n1], + n, alpha, &b[n1 * b_dim1], ldb); + _starpu_dtrsm_("R", "U", "N", diag, m, &n2, &c_b27, &a[*n], n, + &b[n1 * b_dim1], ldb); + + } + + } else { + +/* SIDE ='R', N is odd, TRANSR = 'N', and UPLO = 'U' */ + + if (notrans) { + +/* SIDE ='R', N is odd, TRANSR = 'N', UPLO = 'U', and */ +/* TRANS = 'N' */ + + _starpu_dtrsm_("R", "L", "T", diag, m, &n1, alpha, &a[n2], n, + b, ldb); + _starpu_dgemm_("N", "N", m, &n2, &n1, &c_b23, b, ldb, a, n, + alpha, &b[n1 * b_dim1], ldb); + _starpu_dtrsm_("R", "U", "N", diag, m, &n2, &c_b27, &a[n1], n, + &b[n1 * b_dim1], ldb); + + } else { + +/* SIDE ='R', N is odd, TRANSR = 'N', UPLO = 'U', and */ +/* TRANS = 'T' */ + + _starpu_dtrsm_("R", "U", "T", diag, m, &n2, alpha, &a[n1], n, + &b[n1 * b_dim1], ldb); + _starpu_dgemm_("N", "T", m, &n1, &n2, &c_b23, &b[n1 * b_dim1], + ldb, a, n, alpha, b, ldb); + _starpu_dtrsm_("R", "L", "N", diag, m, &n1, &c_b27, &a[n2], n, + b, ldb); + + } + + } + + } else { + +/* SIDE = 'R', N is odd, and TRANSR = 'T' */ + + if (lower) { + +/* SIDE ='R', N is odd, TRANSR = 'T', and UPLO = 'L' */ + + if (notrans) { + +/* SIDE ='R', N is odd, TRANSR = 'T', UPLO = 'L', and */ +/* TRANS = 'N' */ + + _starpu_dtrsm_("R", "L", "N", diag, m, &n2, alpha, &a[1], &n1, + &b[n1 * b_dim1], ldb); + _starpu_dgemm_("N", "T", m, &n1, &n2, &c_b23, &b[n1 * b_dim1], + ldb, &a[n1 * n1], &n1, alpha, b, ldb); + _starpu_dtrsm_("R", "U", "T", diag, m, &n1, &c_b27, a, &n1, b, + ldb); + + } else { + +/* SIDE ='R', N is odd, TRANSR = 'T', UPLO = 'L', and */ +/* TRANS = 'T' */ + + _starpu_dtrsm_("R", "U", "N", diag, m, &n1, alpha, a, &n1, b, + ldb); + _starpu_dgemm_("N", "N", m, &n2, &n1, &c_b23, b, ldb, &a[n1 * + n1], &n1, alpha, &b[n1 * b_dim1], ldb); + _starpu_dtrsm_("R", "L", "T", diag, m, &n2, &c_b27, &a[1], & + n1, &b[n1 * b_dim1], ldb); + + } + + } else { + +/* SIDE ='R', N is odd, TRANSR = 'T', and UPLO = 'U' */ + + if (notrans) { + +/* SIDE ='R', N is odd, TRANSR = 'T', UPLO = 'U', and */ +/* TRANS = 'N' */ + + _starpu_dtrsm_("R", "U", "N", diag, m, &n1, alpha, &a[n2 * n2] +, &n2, b, ldb); + _starpu_dgemm_("N", "T", m, &n2, &n1, &c_b23, b, ldb, a, &n2, + alpha, &b[n1 * b_dim1], ldb); + _starpu_dtrsm_("R", "L", "T", diag, m, &n2, &c_b27, &a[n1 * + n2], &n2, &b[n1 * b_dim1], ldb); + + } else { + +/* SIDE ='R', N is odd, TRANSR = 'T', UPLO = 'U', and */ +/* TRANS = 'T' */ + + _starpu_dtrsm_("R", "L", "N", diag, m, &n2, alpha, &a[n1 * n2] +, &n2, &b[n1 * b_dim1], ldb); + _starpu_dgemm_("N", "N", m, &n1, &n2, &c_b23, &b[n1 * b_dim1], + ldb, a, &n2, alpha, b, ldb); + _starpu_dtrsm_("R", "U", "T", diag, m, &n1, &c_b27, &a[n2 * + n2], &n2, b, ldb); + + } + + } + + } + + } else { + +/* SIDE = 'R' and N is even */ + + if (normaltransr) { + +/* SIDE = 'R', N is even, and TRANSR = 'N' */ + + if (lower) { + +/* SIDE ='R', N is even, TRANSR = 'N', and UPLO = 'L' */ + + if (notrans) { + +/* SIDE ='R', N is even, TRANSR = 'N', UPLO = 'L', */ +/* and TRANS = 'N' */ + + i__1 = *n + 1; + _starpu_dtrsm_("R", "U", "T", diag, m, &k, alpha, a, &i__1, & + b[k * b_dim1], ldb); + i__1 = *n + 1; + _starpu_dgemm_("N", "N", m, &k, &k, &c_b23, &b[k * b_dim1], + ldb, &a[k + 1], &i__1, alpha, b, ldb); + i__1 = *n + 1; + _starpu_dtrsm_("R", "L", "N", diag, m, &k, &c_b27, &a[1], & + i__1, b, ldb); + + } else { + +/* SIDE ='R', N is even, TRANSR = 'N', UPLO = 'L', */ +/* and TRANS = 'T' */ + + i__1 = *n + 1; + _starpu_dtrsm_("R", "L", "T", diag, m, &k, alpha, &a[1], & + i__1, b, ldb); + i__1 = *n + 1; + _starpu_dgemm_("N", "T", m, &k, &k, &c_b23, b, ldb, &a[k + 1], + &i__1, alpha, &b[k * b_dim1], ldb); + i__1 = *n + 1; + _starpu_dtrsm_("R", "U", "N", diag, m, &k, &c_b27, a, &i__1, & + b[k * b_dim1], ldb); + + } + + } else { + +/* SIDE ='R', N is even, TRANSR = 'N', and UPLO = 'U' */ + + if (notrans) { + +/* SIDE ='R', N is even, TRANSR = 'N', UPLO = 'U', */ +/* and TRANS = 'N' */ + + i__1 = *n + 1; + _starpu_dtrsm_("R", "L", "T", diag, m, &k, alpha, &a[k + 1], & + i__1, b, ldb); + i__1 = *n + 1; + _starpu_dgemm_("N", "N", m, &k, &k, &c_b23, b, ldb, a, &i__1, + alpha, &b[k * b_dim1], ldb); + i__1 = *n + 1; + _starpu_dtrsm_("R", "U", "N", diag, m, &k, &c_b27, &a[k], & + i__1, &b[k * b_dim1], ldb); + + } else { + +/* SIDE ='R', N is even, TRANSR = 'N', UPLO = 'U', */ +/* and TRANS = 'T' */ + + i__1 = *n + 1; + _starpu_dtrsm_("R", "U", "T", diag, m, &k, alpha, &a[k], & + i__1, &b[k * b_dim1], ldb); + i__1 = *n + 1; + _starpu_dgemm_("N", "T", m, &k, &k, &c_b23, &b[k * b_dim1], + ldb, a, &i__1, alpha, b, ldb); + i__1 = *n + 1; + _starpu_dtrsm_("R", "L", "N", diag, m, &k, &c_b27, &a[k + 1], + &i__1, b, ldb); + + } + + } + + } else { + +/* SIDE = 'R', N is even, and TRANSR = 'T' */ + + if (lower) { + +/* SIDE ='R', N is even, TRANSR = 'T', and UPLO = 'L' */ + + if (notrans) { + +/* SIDE ='R', N is even, TRANSR = 'T', UPLO = 'L', */ +/* and TRANS = 'N' */ + + _starpu_dtrsm_("R", "L", "N", diag, m, &k, alpha, a, &k, &b[k + * b_dim1], ldb); + _starpu_dgemm_("N", "T", m, &k, &k, &c_b23, &b[k * b_dim1], + ldb, &a[(k + 1) * k], &k, alpha, b, ldb); + _starpu_dtrsm_("R", "U", "T", diag, m, &k, &c_b27, &a[k], &k, + b, ldb); + + } else { + +/* SIDE ='R', N is even, TRANSR = 'T', UPLO = 'L', */ +/* and TRANS = 'T' */ + + _starpu_dtrsm_("R", "U", "N", diag, m, &k, alpha, &a[k], &k, + b, ldb); + _starpu_dgemm_("N", "N", m, &k, &k, &c_b23, b, ldb, &a[(k + 1) + * k], &k, alpha, &b[k * b_dim1], ldb); + _starpu_dtrsm_("R", "L", "T", diag, m, &k, &c_b27, a, &k, &b[ + k * b_dim1], ldb); + + } + + } else { + +/* SIDE ='R', N is even, TRANSR = 'T', and UPLO = 'U' */ + + if (notrans) { + +/* SIDE ='R', N is even, TRANSR = 'T', UPLO = 'U', */ +/* and TRANS = 'N' */ + + _starpu_dtrsm_("R", "U", "N", diag, m, &k, alpha, &a[(k + 1) * + k], &k, b, ldb); + _starpu_dgemm_("N", "T", m, &k, &k, &c_b23, b, ldb, a, &k, + alpha, &b[k * b_dim1], ldb); + _starpu_dtrsm_("R", "L", "T", diag, m, &k, &c_b27, &a[k * k], + &k, &b[k * b_dim1], ldb); + + } else { + +/* SIDE ='R', N is even, TRANSR = 'T', UPLO = 'U', */ +/* and TRANS = 'T' */ + + _starpu_dtrsm_("R", "L", "N", diag, m, &k, alpha, &a[k * k], & + k, &b[k * b_dim1], ldb); + _starpu_dgemm_("N", "N", m, &k, &k, &c_b23, &b[k * b_dim1], + ldb, a, &k, alpha, b, ldb); + _starpu_dtrsm_("R", "U", "T", diag, m, &k, &c_b27, &a[(k + 1) + * k], &k, b, ldb); + + } + + } + + } + + } + } + + return 0; + +/* End of DTFSM */ + +} /* _starpu_dtfsm_ */ diff --git a/min-dgels/base/SRC/dtftri.c b/min-dgels/base/SRC/dtftri.c new file mode 100644 index 0000000..d54c54e --- /dev/null +++ b/min-dgels/base/SRC/dtftri.c @@ -0,0 +1,474 @@ +/* dtftri.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static doublereal c_b13 = -1.; +static doublereal c_b18 = 1.; + +/* Subroutine */ int _starpu_dtftri_(char *transr, char *uplo, char *diag, integer *n, + doublereal *a, integer *info) +{ + /* System generated locals */ + integer i__1, i__2; + + /* Local variables */ + integer k, n1, n2; + logical normaltransr; + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int _starpu_dtrmm_(char *, char *, char *, char *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *); + logical lower; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + logical nisodd; + extern /* Subroutine */ int _starpu_dtrtri_(char *, char *, integer *, doublereal + *, integer *, integer *); + + +/* -- LAPACK routine (version 3.2) -- */ + +/* -- Contributed by Fred Gustavson of the IBM Watson Research Center -- */ +/* -- November 2008 -- */ + +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DTFTRI computes the inverse of a triangular matrix A stored in RFP */ +/* format. */ + +/* This is a Level 3 BLAS version of the algorithm. */ + +/* Arguments */ +/* ========= */ + +/* TRANSR (input) CHARACTER */ +/* = 'N': The Normal TRANSR of RFP A is stored; */ +/* = 'T': The Transpose TRANSR of RFP A is stored. */ + +/* UPLO (input) CHARACTER */ +/* = 'U': A is upper triangular; */ +/* = 'L': A is lower triangular. */ + +/* DIAG (input) CHARACTER */ +/* = 'N': A is non-unit triangular; */ +/* = 'U': A is unit triangular. */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (0:nt-1); */ +/* nt=N*(N+1)/2. On entry, the triangular factor of a Hermitian */ +/* Positive Definite matrix A in RFP format. RFP format is */ +/* described by TRANSR, UPLO, and N as follows: If TRANSR = 'N' */ +/* then RFP A is (0:N,0:k-1) when N is even; k=N/2. RFP A is */ +/* (0:N-1,0:k) when N is odd; k=N/2. IF TRANSR = 'T' then RFP is */ +/* the transpose of RFP A as defined when */ +/* TRANSR = 'N'. The contents of RFP A are defined by UPLO as */ +/* follows: If UPLO = 'U' the RFP A contains the nt elements of */ +/* upper packed A; If UPLO = 'L' the RFP A contains the nt */ +/* elements of lower packed A. The LDA of RFP A is (N+1)/2 when */ +/* TRANSR = 'T'. When TRANSR is 'N' the LDA is N+1 when N is */ +/* even and N is odd. See the Note below for more details. */ + +/* On exit, the (triangular) inverse of the original matrix, in */ +/* the same storage format. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > 0: if INFO = i, A(i,i) is exactly zero. The triangular */ +/* matrix is singular and its inverse can not be computed. */ + +/* Notes */ +/* ===== */ + +/* We first consider Rectangular Full Packed (RFP) Format when N is */ +/* even. We give an example where N = 6. */ + +/* AP is Upper AP is Lower */ + +/* 00 01 02 03 04 05 00 */ +/* 11 12 13 14 15 10 11 */ +/* 22 23 24 25 20 21 22 */ +/* 33 34 35 30 31 32 33 */ +/* 44 45 40 41 42 43 44 */ +/* 55 50 51 52 53 54 55 */ + + +/* Let TRANSR = 'N'. RFP holds AP as follows: */ +/* For UPLO = 'U' the upper trapezoid A(0:5,0:2) consists of the last */ +/* three columns of AP upper. The lower triangle A(4:6,0:2) consists of */ +/* the transpose of the first three columns of AP upper. */ +/* For UPLO = 'L' the lower trapezoid A(1:6,0:2) consists of the first */ +/* three columns of AP lower. The upper triangle A(0:2,0:2) consists of */ +/* the transpose of the last three columns of AP lower. */ +/* This covers the case N even and TRANSR = 'N'. */ + +/* RFP A RFP A */ + +/* 03 04 05 33 43 53 */ +/* 13 14 15 00 44 54 */ +/* 23 24 25 10 11 55 */ +/* 33 34 35 20 21 22 */ +/* 00 44 45 30 31 32 */ +/* 01 11 55 40 41 42 */ +/* 02 12 22 50 51 52 */ + +/* Now let TRANSR = 'T'. RFP A in both UPLO cases is just the */ +/* transpose of RFP A above. One therefore gets: */ + + +/* RFP A RFP A */ + +/* 03 13 23 33 00 01 02 33 00 10 20 30 40 50 */ +/* 04 14 24 34 44 11 12 43 44 11 21 31 41 51 */ +/* 05 15 25 35 45 55 22 53 54 55 22 32 42 52 */ + + +/* We first consider Rectangular Full Packed (RFP) Format when N is */ +/* odd. We give an example where N = 5. */ + +/* AP is Upper AP is Lower */ + +/* 00 01 02 03 04 00 */ +/* 11 12 13 14 10 11 */ +/* 22 23 24 20 21 22 */ +/* 33 34 30 31 32 33 */ +/* 44 40 41 42 43 44 */ + + +/* Let TRANSR = 'N'. RFP holds AP as follows: */ +/* For UPLO = 'U' the upper trapezoid A(0:4,0:2) consists of the last */ +/* three columns of AP upper. The lower triangle A(3:4,0:1) consists of */ +/* the transpose of the first two columns of AP upper. */ +/* For UPLO = 'L' the lower trapezoid A(0:4,0:2) consists of the first */ +/* three columns of AP lower. The upper triangle A(0:1,1:2) consists of */ +/* the transpose of the last two columns of AP lower. */ +/* This covers the case N odd and TRANSR = 'N'. */ + +/* RFP A RFP A */ + +/* 02 03 04 00 33 43 */ +/* 12 13 14 10 11 44 */ +/* 22 23 24 20 21 22 */ +/* 00 33 34 30 31 32 */ +/* 01 11 44 40 41 42 */ + +/* Now let TRANSR = 'T'. RFP A in both UPLO cases is just the */ +/* transpose of RFP A above. One therefore gets: */ + +/* RFP A RFP A */ + +/* 02 12 22 00 01 00 10 20 30 40 50 */ +/* 03 13 23 33 11 33 11 21 31 41 51 */ +/* 04 14 24 34 44 43 44 22 32 42 52 */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + *info = 0; + normaltransr = _starpu_lsame_(transr, "N"); + lower = _starpu_lsame_(uplo, "L"); + if (! normaltransr && ! _starpu_lsame_(transr, "T")) { + *info = -1; + } else if (! lower && ! _starpu_lsame_(uplo, "U")) { + *info = -2; + } else if (! _starpu_lsame_(diag, "N") && ! _starpu_lsame_(diag, + "U")) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DTFTRI", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* If N is odd, set NISODD = .TRUE. */ +/* If N is even, set K = N/2 and NISODD = .FALSE. */ + + if (*n % 2 == 0) { + k = *n / 2; + nisodd = FALSE_; + } else { + nisodd = TRUE_; + } + +/* Set N1 and N2 depending on LOWER */ + + if (lower) { + n2 = *n / 2; + n1 = *n - n2; + } else { + n1 = *n / 2; + n2 = *n - n1; + } + + +/* start execution: there are eight cases */ + + if (nisodd) { + +/* N is odd */ + + if (normaltransr) { + +/* N is odd and TRANSR = 'N' */ + + if (lower) { + +/* SRPA for LOWER, NORMAL and N is odd ( a(0:n-1,0:n1-1) ) */ +/* T1 -> a(0,0), T2 -> a(0,1), S -> a(n1,0) */ +/* T1 -> a(0), T2 -> a(n), S -> a(n1) */ + + _starpu_dtrtri_("L", diag, &n1, a, n, info); + if (*info > 0) { + return 0; + } + _starpu_dtrmm_("R", "L", "N", diag, &n2, &n1, &c_b13, a, n, &a[n1], n); + _starpu_dtrtri_("U", diag, &n2, &a[*n], n, info) + ; + if (*info > 0) { + *info += n1; + } + if (*info > 0) { + return 0; + } + _starpu_dtrmm_("L", "U", "T", diag, &n2, &n1, &c_b18, &a[*n], n, &a[ + n1], n); + + } else { + +/* SRPA for UPPER, NORMAL and N is odd ( a(0:n-1,0:n2-1) */ +/* T1 -> a(n1+1,0), T2 -> a(n1,0), S -> a(0,0) */ +/* T1 -> a(n2), T2 -> a(n1), S -> a(0) */ + + _starpu_dtrtri_("L", diag, &n1, &a[n2], n, info) + ; + if (*info > 0) { + return 0; + } + _starpu_dtrmm_("L", "L", "T", diag, &n1, &n2, &c_b13, &a[n2], n, a, n); + _starpu_dtrtri_("U", diag, &n2, &a[n1], n, info) + ; + if (*info > 0) { + *info += n1; + } + if (*info > 0) { + return 0; + } + _starpu_dtrmm_("R", "U", "N", diag, &n1, &n2, &c_b18, &a[n1], n, a, n); + + } + + } else { + +/* N is odd and TRANSR = 'T' */ + + if (lower) { + +/* SRPA for LOWER, TRANSPOSE and N is odd */ +/* T1 -> a(0), T2 -> a(1), S -> a(0+n1*n1) */ + + _starpu_dtrtri_("U", diag, &n1, a, &n1, info); + if (*info > 0) { + return 0; + } + _starpu_dtrmm_("L", "U", "N", diag, &n1, &n2, &c_b13, a, &n1, &a[n1 * + n1], &n1); + _starpu_dtrtri_("L", diag, &n2, &a[1], &n1, info); + if (*info > 0) { + *info += n1; + } + if (*info > 0) { + return 0; + } + _starpu_dtrmm_("R", "L", "T", diag, &n1, &n2, &c_b18, &a[1], &n1, &a[ + n1 * n1], &n1); + + } else { + +/* SRPA for UPPER, TRANSPOSE and N is odd */ +/* T1 -> a(0+n2*n2), T2 -> a(0+n1*n2), S -> a(0) */ + + _starpu_dtrtri_("U", diag, &n1, &a[n2 * n2], &n2, info); + if (*info > 0) { + return 0; + } + _starpu_dtrmm_("R", "U", "T", diag, &n2, &n1, &c_b13, &a[n2 * n2], & + n2, a, &n2); + _starpu_dtrtri_("L", diag, &n2, &a[n1 * n2], &n2, info); + if (*info > 0) { + *info += n1; + } + if (*info > 0) { + return 0; + } + _starpu_dtrmm_("L", "L", "N", diag, &n2, &n1, &c_b18, &a[n1 * n2], & + n2, a, &n2); + } + + } + + } else { + +/* N is even */ + + if (normaltransr) { + +/* N is even and TRANSR = 'N' */ + + if (lower) { + +/* SRPA for LOWER, NORMAL, and N is even ( a(0:n,0:k-1) ) */ +/* T1 -> a(1,0), T2 -> a(0,0), S -> a(k+1,0) */ +/* T1 -> a(1), T2 -> a(0), S -> a(k+1) */ + + i__1 = *n + 1; + _starpu_dtrtri_("L", diag, &k, &a[1], &i__1, info); + if (*info > 0) { + return 0; + } + i__1 = *n + 1; + i__2 = *n + 1; + _starpu_dtrmm_("R", "L", "N", diag, &k, &k, &c_b13, &a[1], &i__1, &a[ + k + 1], &i__2); + i__1 = *n + 1; + _starpu_dtrtri_("U", diag, &k, a, &i__1, info); + if (*info > 0) { + *info += k; + } + if (*info > 0) { + return 0; + } + i__1 = *n + 1; + i__2 = *n + 1; + _starpu_dtrmm_("L", "U", "T", diag, &k, &k, &c_b18, a, &i__1, &a[k + + 1], &i__2) + ; + + } else { + +/* SRPA for UPPER, NORMAL, and N is even ( a(0:n,0:k-1) ) */ +/* T1 -> a(k+1,0) , T2 -> a(k,0), S -> a(0,0) */ +/* T1 -> a(k+1), T2 -> a(k), S -> a(0) */ + + i__1 = *n + 1; + _starpu_dtrtri_("L", diag, &k, &a[k + 1], &i__1, info); + if (*info > 0) { + return 0; + } + i__1 = *n + 1; + i__2 = *n + 1; + _starpu_dtrmm_("L", "L", "T", diag, &k, &k, &c_b13, &a[k + 1], &i__1, + a, &i__2); + i__1 = *n + 1; + _starpu_dtrtri_("U", diag, &k, &a[k], &i__1, info); + if (*info > 0) { + *info += k; + } + if (*info > 0) { + return 0; + } + i__1 = *n + 1; + i__2 = *n + 1; + _starpu_dtrmm_("R", "U", "N", diag, &k, &k, &c_b18, &a[k], &i__1, a, & + i__2); + } + } else { + +/* N is even and TRANSR = 'T' */ + + if (lower) { + +/* SRPA for LOWER, TRANSPOSE and N is even (see paper) */ +/* T1 -> B(0,1), T2 -> B(0,0), S -> B(0,k+1) */ +/* T1 -> a(0+k), T2 -> a(0+0), S -> a(0+k*(k+1)); lda=k */ + + _starpu_dtrtri_("U", diag, &k, &a[k], &k, info); + if (*info > 0) { + return 0; + } + _starpu_dtrmm_("L", "U", "N", diag, &k, &k, &c_b13, &a[k], &k, &a[k * + (k + 1)], &k); + _starpu_dtrtri_("L", diag, &k, a, &k, info); + if (*info > 0) { + *info += k; + } + if (*info > 0) { + return 0; + } + _starpu_dtrmm_("R", "L", "T", diag, &k, &k, &c_b18, a, &k, &a[k * (k + + 1)], &k) + ; + } else { + +/* SRPA for UPPER, TRANSPOSE and N is even (see paper) */ +/* T1 -> B(0,k+1), T2 -> B(0,k), S -> B(0,0) */ +/* T1 -> a(0+k*(k+1)), T2 -> a(0+k*k), S -> a(0+0)); lda=k */ + + _starpu_dtrtri_("U", diag, &k, &a[k * (k + 1)], &k, info); + if (*info > 0) { + return 0; + } + _starpu_dtrmm_("R", "U", "T", diag, &k, &k, &c_b13, &a[k * (k + 1)], & + k, a, &k); + _starpu_dtrtri_("L", diag, &k, &a[k * k], &k, info); + if (*info > 0) { + *info += k; + } + if (*info > 0) { + return 0; + } + _starpu_dtrmm_("L", "L", "N", diag, &k, &k, &c_b18, &a[k * k], &k, a, + &k); + } + } + } + + return 0; + +/* End of DTFTRI */ + +} /* _starpu_dtftri_ */ diff --git a/min-dgels/base/SRC/dtfttp.c b/min-dgels/base/SRC/dtfttp.c new file mode 100644 index 0000000..58470a6 --- /dev/null +++ b/min-dgels/base/SRC/dtfttp.c @@ -0,0 +1,514 @@ +/* dtfttp.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dtfttp_(char *transr, char *uplo, integer *n, doublereal + *arf, doublereal *ap, integer *info) +{ + /* System generated locals */ + integer i__1, i__2, i__3; + + /* Local variables */ + integer i__, j, k, n1, n2, ij, jp, js, nt, lda, ijp; + logical normaltransr; + extern logical _starpu_lsame_(char *, char *); + logical lower; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + logical nisodd; + + +/* -- LAPACK routine (version 3.2) -- */ + +/* -- Contributed by Fred Gustavson of the IBM Watson Research Center -- */ +/* -- November 2008 -- */ + +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. */ +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DTFTTP copies a triangular matrix A from rectangular full packed */ +/* format (TF) to standard packed format (TP). */ + +/* Arguments */ +/* ========= */ + +/* TRANSR (input) CHARACTER */ +/* = 'N': ARF is in Normal format; */ +/* = 'T': ARF is in Transpose format; */ + +/* UPLO (input) CHARACTER */ +/* = 'U': A is upper triangular; */ +/* = 'L': A is lower triangular. */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* ARF (input) DOUBLE PRECISION array, dimension ( N*(N+1)/2 ), */ +/* On entry, the upper or lower triangular matrix A stored in */ +/* RFP format. For a further discussion see Notes below. */ + +/* AP (output) DOUBLE PRECISION array, dimension ( N*(N+1)/2 ), */ +/* On exit, the upper or lower triangular matrix A, packed */ +/* columnwise in a linear array. The j-th column of A is stored */ +/* in the array AP as follows: */ +/* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; */ +/* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ + +/* Notes */ +/* ===== */ + +/* We first consider Rectangular Full Packed (RFP) Format when N is */ +/* even. We give an example where N = 6. */ + +/* AP is Upper AP is Lower */ + +/* 00 01 02 03 04 05 00 */ +/* 11 12 13 14 15 10 11 */ +/* 22 23 24 25 20 21 22 */ +/* 33 34 35 30 31 32 33 */ +/* 44 45 40 41 42 43 44 */ +/* 55 50 51 52 53 54 55 */ + + +/* Let TRANSR = 'N'. RFP holds AP as follows: */ +/* For UPLO = 'U' the upper trapezoid A(0:5,0:2) consists of the last */ +/* three columns of AP upper. The lower triangle A(4:6,0:2) consists of */ +/* the transpose of the first three columns of AP upper. */ +/* For UPLO = 'L' the lower trapezoid A(1:6,0:2) consists of the first */ +/* three columns of AP lower. The upper triangle A(0:2,0:2) consists of */ +/* the transpose of the last three columns of AP lower. */ +/* This covers the case N even and TRANSR = 'N'. */ + +/* RFP A RFP A */ + +/* 03 04 05 33 43 53 */ +/* 13 14 15 00 44 54 */ +/* 23 24 25 10 11 55 */ +/* 33 34 35 20 21 22 */ +/* 00 44 45 30 31 32 */ +/* 01 11 55 40 41 42 */ +/* 02 12 22 50 51 52 */ + +/* Now let TRANSR = 'T'. RFP A in both UPLO cases is just the */ +/* transpose of RFP A above. One therefore gets: */ + + +/* RFP A RFP A */ + +/* 03 13 23 33 00 01 02 33 00 10 20 30 40 50 */ +/* 04 14 24 34 44 11 12 43 44 11 21 31 41 51 */ +/* 05 15 25 35 45 55 22 53 54 55 22 32 42 52 */ + + +/* We first consider Rectangular Full Packed (RFP) Format when N is */ +/* odd. We give an example where N = 5. */ + +/* AP is Upper AP is Lower */ + +/* 00 01 02 03 04 00 */ +/* 11 12 13 14 10 11 */ +/* 22 23 24 20 21 22 */ +/* 33 34 30 31 32 33 */ +/* 44 40 41 42 43 44 */ + + +/* Let TRANSR = 'N'. RFP holds AP as follows: */ +/* For UPLO = 'U' the upper trapezoid A(0:4,0:2) consists of the last */ +/* three columns of AP upper. The lower triangle A(3:4,0:1) consists of */ +/* the transpose of the first two columns of AP upper. */ +/* For UPLO = 'L' the lower trapezoid A(0:4,0:2) consists of the first */ +/* three columns of AP lower. The upper triangle A(0:1,1:2) consists of */ +/* the transpose of the last two columns of AP lower. */ +/* This covers the case N odd and TRANSR = 'N'. */ + +/* RFP A RFP A */ + +/* 02 03 04 00 33 43 */ +/* 12 13 14 10 11 44 */ +/* 22 23 24 20 21 22 */ +/* 00 33 34 30 31 32 */ +/* 01 11 44 40 41 42 */ + +/* Now let TRANSR = 'T'. RFP A in both UPLO cases is just the */ +/* transpose of RFP A above. One therefore gets: */ + +/* RFP A RFP A */ + +/* 02 12 22 00 01 00 10 20 30 40 50 */ +/* 03 13 23 33 11 33 11 21 31 41 51 */ +/* 04 14 24 34 44 43 44 22 32 42 52 */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + *info = 0; + normaltransr = _starpu_lsame_(transr, "N"); + lower = _starpu_lsame_(uplo, "L"); + if (! normaltransr && ! _starpu_lsame_(transr, "T")) { + *info = -1; + } else if (! lower && ! _starpu_lsame_(uplo, "U")) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DTFTTP", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + if (*n == 1) { + if (normaltransr) { + ap[0] = arf[0]; + } else { + ap[0] = arf[0]; + } + return 0; + } + +/* Size of array ARF(0:NT-1) */ + + nt = *n * (*n + 1) / 2; + +/* Set N1 and N2 depending on LOWER */ + + if (lower) { + n2 = *n / 2; + n1 = *n - n2; + } else { + n1 = *n / 2; + n2 = *n - n1; + } + +/* If N is odd, set NISODD = .TRUE. */ +/* If N is even, set K = N/2 and NISODD = .FALSE. */ + +/* set lda of ARF^C; ARF^C is (0:(N+1)/2-1,0:N-noe) */ +/* where noe = 0 if n is even, noe = 1 if n is odd */ + + if (*n % 2 == 0) { + k = *n / 2; + nisodd = FALSE_; + lda = *n + 1; + } else { + nisodd = TRUE_; + lda = *n; + } + +/* ARF^C has lda rows and n+1-noe cols */ + + if (! normaltransr) { + lda = (*n + 1) / 2; + } + +/* start execution: there are eight cases */ + + if (nisodd) { + +/* N is odd */ + + if (normaltransr) { + +/* N is odd and TRANSR = 'N' */ + + if (lower) { + +/* SRPA for LOWER, NORMAL and N is odd ( a(0:n-1,0:n1-1) ) */ +/* T1 -> a(0,0), T2 -> a(0,1), S -> a(n1,0) */ +/* T1 -> a(0), T2 -> a(n), S -> a(n1); lda = n */ + + ijp = 0; + jp = 0; + i__1 = n2; + for (j = 0; j <= i__1; ++j) { + i__2 = *n - 1; + for (i__ = j; i__ <= i__2; ++i__) { + ij = i__ + jp; + ap[ijp] = arf[ij]; + ++ijp; + } + jp += lda; + } + i__1 = n2 - 1; + for (i__ = 0; i__ <= i__1; ++i__) { + i__2 = n2; + for (j = i__ + 1; j <= i__2; ++j) { + ij = i__ + j * lda; + ap[ijp] = arf[ij]; + ++ijp; + } + } + + } else { + +/* SRPA for UPPER, NORMAL and N is odd ( a(0:n-1,0:n2-1) */ +/* T1 -> a(n1+1,0), T2 -> a(n1,0), S -> a(0,0) */ +/* T1 -> a(n2), T2 -> a(n1), S -> a(0) */ + + ijp = 0; + i__1 = n1 - 1; + for (j = 0; j <= i__1; ++j) { + ij = n2 + j; + i__2 = j; + for (i__ = 0; i__ <= i__2; ++i__) { + ap[ijp] = arf[ij]; + ++ijp; + ij += lda; + } + } + js = 0; + i__1 = *n - 1; + for (j = n1; j <= i__1; ++j) { + ij = js; + i__2 = js + j; + for (ij = js; ij <= i__2; ++ij) { + ap[ijp] = arf[ij]; + ++ijp; + } + js += lda; + } + + } + + } else { + +/* N is odd and TRANSR = 'T' */ + + if (lower) { + +/* SRPA for LOWER, TRANSPOSE and N is odd */ +/* T1 -> A(0,0) , T2 -> A(1,0) , S -> A(0,n1) */ +/* T1 -> a(0+0) , T2 -> a(1+0) , S -> a(0+n1*n1); lda=n1 */ + + ijp = 0; + i__1 = n2; + for (i__ = 0; i__ <= i__1; ++i__) { + i__2 = *n * lda - 1; + i__3 = lda; + for (ij = i__ * (lda + 1); i__3 < 0 ? ij >= i__2 : ij <= + i__2; ij += i__3) { + ap[ijp] = arf[ij]; + ++ijp; + } + } + js = 1; + i__1 = n2 - 1; + for (j = 0; j <= i__1; ++j) { + i__3 = js + n2 - j - 1; + for (ij = js; ij <= i__3; ++ij) { + ap[ijp] = arf[ij]; + ++ijp; + } + js = js + lda + 1; + } + + } else { + +/* SRPA for UPPER, TRANSPOSE and N is odd */ +/* T1 -> A(0,n1+1), T2 -> A(0,n1), S -> A(0,0) */ +/* T1 -> a(n2*n2), T2 -> a(n1*n2), S -> a(0); lda = n2 */ + + ijp = 0; + js = n2 * lda; + i__1 = n1 - 1; + for (j = 0; j <= i__1; ++j) { + i__3 = js + j; + for (ij = js; ij <= i__3; ++ij) { + ap[ijp] = arf[ij]; + ++ijp; + } + js += lda; + } + i__1 = n1; + for (i__ = 0; i__ <= i__1; ++i__) { + i__3 = i__ + (n1 + i__) * lda; + i__2 = lda; + for (ij = i__; i__2 < 0 ? ij >= i__3 : ij <= i__3; ij += + i__2) { + ap[ijp] = arf[ij]; + ++ijp; + } + } + + } + + } + + } else { + +/* N is even */ + + if (normaltransr) { + +/* N is even and TRANSR = 'N' */ + + if (lower) { + +/* SRPA for LOWER, NORMAL, and N is even ( a(0:n,0:k-1) ) */ +/* T1 -> a(1,0), T2 -> a(0,0), S -> a(k+1,0) */ +/* T1 -> a(1), T2 -> a(0), S -> a(k+1) */ + + ijp = 0; + jp = 0; + i__1 = k - 1; + for (j = 0; j <= i__1; ++j) { + i__2 = *n - 1; + for (i__ = j; i__ <= i__2; ++i__) { + ij = i__ + 1 + jp; + ap[ijp] = arf[ij]; + ++ijp; + } + jp += lda; + } + i__1 = k - 1; + for (i__ = 0; i__ <= i__1; ++i__) { + i__2 = k - 1; + for (j = i__; j <= i__2; ++j) { + ij = i__ + j * lda; + ap[ijp] = arf[ij]; + ++ijp; + } + } + + } else { + +/* SRPA for UPPER, NORMAL, and N is even ( a(0:n,0:k-1) ) */ +/* T1 -> a(k+1,0) , T2 -> a(k,0), S -> a(0,0) */ +/* T1 -> a(k+1), T2 -> a(k), S -> a(0) */ + + ijp = 0; + i__1 = k - 1; + for (j = 0; j <= i__1; ++j) { + ij = k + 1 + j; + i__2 = j; + for (i__ = 0; i__ <= i__2; ++i__) { + ap[ijp] = arf[ij]; + ++ijp; + ij += lda; + } + } + js = 0; + i__1 = *n - 1; + for (j = k; j <= i__1; ++j) { + ij = js; + i__2 = js + j; + for (ij = js; ij <= i__2; ++ij) { + ap[ijp] = arf[ij]; + ++ijp; + } + js += lda; + } + + } + + } else { + +/* N is even and TRANSR = 'T' */ + + if (lower) { + +/* SRPA for LOWER, TRANSPOSE and N is even (see paper) */ +/* T1 -> B(0,1), T2 -> B(0,0), S -> B(0,k+1) */ +/* T1 -> a(0+k), T2 -> a(0+0), S -> a(0+k*(k+1)); lda=k */ + + ijp = 0; + i__1 = k - 1; + for (i__ = 0; i__ <= i__1; ++i__) { + i__2 = (*n + 1) * lda - 1; + i__3 = lda; + for (ij = i__ + (i__ + 1) * lda; i__3 < 0 ? ij >= i__2 : + ij <= i__2; ij += i__3) { + ap[ijp] = arf[ij]; + ++ijp; + } + } + js = 0; + i__1 = k - 1; + for (j = 0; j <= i__1; ++j) { + i__3 = js + k - j - 1; + for (ij = js; ij <= i__3; ++ij) { + ap[ijp] = arf[ij]; + ++ijp; + } + js = js + lda + 1; + } + + } else { + +/* SRPA for UPPER, TRANSPOSE and N is even (see paper) */ +/* T1 -> B(0,k+1), T2 -> B(0,k), S -> B(0,0) */ +/* T1 -> a(0+k*(k+1)), T2 -> a(0+k*k), S -> a(0+0)); lda=k */ + + ijp = 0; + js = (k + 1) * lda; + i__1 = k - 1; + for (j = 0; j <= i__1; ++j) { + i__3 = js + j; + for (ij = js; ij <= i__3; ++ij) { + ap[ijp] = arf[ij]; + ++ijp; + } + js += lda; + } + i__1 = k - 1; + for (i__ = 0; i__ <= i__1; ++i__) { + i__3 = i__ + (k + i__) * lda; + i__2 = lda; + for (ij = i__; i__2 < 0 ? ij >= i__3 : ij <= i__3; ij += + i__2) { + ap[ijp] = arf[ij]; + ++ijp; + } + } + + } + + } + + } + + return 0; + +/* End of DTFTTP */ + +} /* _starpu_dtfttp_ */ diff --git a/min-dgels/base/SRC/dtfttr.c b/min-dgels/base/SRC/dtfttr.c new file mode 100644 index 0000000..c57e836 --- /dev/null +++ b/min-dgels/base/SRC/dtfttr.c @@ -0,0 +1,491 @@ +/* dtfttr.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dtfttr_(char *transr, char *uplo, integer *n, doublereal + *arf, doublereal *a, integer *lda, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + + /* Local variables */ + integer i__, j, k, l, n1, n2, ij, nt, nx2, np1x2; + logical normaltransr; + extern logical _starpu_lsame_(char *, char *); + logical lower; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + logical nisodd; + + +/* -- LAPACK routine (version 3.2) -- */ + +/* -- Contributed by Fred Gustavson of the IBM Watson Research Center -- */ +/* -- November 2008 -- */ + +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DTFTTR copies a triangular matrix A from rectangular full packed */ +/* format (TF) to standard full format (TR). */ + +/* Arguments */ +/* ========= */ + +/* TRANSR (input) CHARACTER */ +/* = 'N': ARF is in Normal format; */ +/* = 'T': ARF is in Transpose format. */ + +/* UPLO (input) CHARACTER */ +/* = 'U': A is upper triangular; */ +/* = 'L': A is lower triangular. */ + +/* N (input) INTEGER */ +/* The order of the matrices ARF and A. N >= 0. */ + +/* ARF (input) DOUBLE PRECISION array, dimension (N*(N+1)/2). */ +/* On entry, the upper (if UPLO = 'U') or lower (if UPLO = 'L') */ +/* matrix A in RFP format. See the "Notes" below for more */ +/* details. */ + +/* A (output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On exit, the triangular matrix A. If UPLO = 'U', the */ +/* leading N-by-N upper triangular part of the array A contains */ +/* the upper triangular matrix, and the strictly lower */ +/* triangular part of A is not referenced. If UPLO = 'L', the */ +/* leading N-by-N lower triangular part of the array A contains */ +/* the lower triangular matrix, and the strictly upper */ +/* triangular part of A is not referenced. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,N). */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ + +/* Notes */ +/* ===== */ + +/* We first consider Rectangular Full Packed (RFP) Format when N is */ +/* even. We give an example where N = 6. */ + +/* AP is Upper AP is Lower */ + +/* 00 01 02 03 04 05 00 */ +/* 11 12 13 14 15 10 11 */ +/* 22 23 24 25 20 21 22 */ +/* 33 34 35 30 31 32 33 */ +/* 44 45 40 41 42 43 44 */ +/* 55 50 51 52 53 54 55 */ + + +/* Let TRANSR = 'N'. RFP holds AP as follows: */ +/* For UPLO = 'U' the upper trapezoid A(0:5,0:2) consists of the last */ +/* three columns of AP upper. The lower triangle A(4:6,0:2) consists of */ +/* the transpose of the first three columns of AP upper. */ +/* For UPLO = 'L' the lower trapezoid A(1:6,0:2) consists of the first */ +/* three columns of AP lower. The upper triangle A(0:2,0:2) consists of */ +/* the transpose of the last three columns of AP lower. */ +/* This covers the case N even and TRANSR = 'N'. */ + +/* RFP A RFP A */ + +/* 03 04 05 33 43 53 */ +/* 13 14 15 00 44 54 */ +/* 23 24 25 10 11 55 */ +/* 33 34 35 20 21 22 */ +/* 00 44 45 30 31 32 */ +/* 01 11 55 40 41 42 */ +/* 02 12 22 50 51 52 */ + +/* Now let TRANSR = 'T'. RFP A in both UPLO cases is just the */ +/* transpose of RFP A above. One therefore gets: */ + + +/* RFP A RFP A */ + +/* 03 13 23 33 00 01 02 33 00 10 20 30 40 50 */ +/* 04 14 24 34 44 11 12 43 44 11 21 31 41 51 */ +/* 05 15 25 35 45 55 22 53 54 55 22 32 42 52 */ + + +/* We first consider Rectangular Full Packed (RFP) Format when N is */ +/* odd. We give an example where N = 5. */ + +/* AP is Upper AP is Lower */ + +/* 00 01 02 03 04 00 */ +/* 11 12 13 14 10 11 */ +/* 22 23 24 20 21 22 */ +/* 33 34 30 31 32 33 */ +/* 44 40 41 42 43 44 */ + + +/* Let TRANSR = 'N'. RFP holds AP as follows: */ +/* For UPLO = 'U' the upper trapezoid A(0:4,0:2) consists of the last */ +/* three columns of AP upper. The lower triangle A(3:4,0:1) consists of */ +/* the transpose of the first two columns of AP upper. */ +/* For UPLO = 'L' the lower trapezoid A(0:4,0:2) consists of the first */ +/* three columns of AP lower. The upper triangle A(0:1,1:2) consists of */ +/* the transpose of the last two columns of AP lower. */ +/* This covers the case N odd and TRANSR = 'N'. */ + +/* RFP A RFP A */ + +/* 02 03 04 00 33 43 */ +/* 12 13 14 10 11 44 */ +/* 22 23 24 20 21 22 */ +/* 00 33 34 30 31 32 */ +/* 01 11 44 40 41 42 */ + +/* Now let TRANSR = 'T'. RFP A in both UPLO cases is just the */ +/* transpose of RFP A above. One therefore gets: */ + +/* RFP A RFP A */ + +/* 02 12 22 00 01 00 10 20 30 40 50 */ +/* 03 13 23 33 11 33 11 21 31 41 51 */ +/* 04 14 24 34 44 43 44 22 32 42 52 */ + +/* Reference */ +/* ========= */ + +/* ===================================================================== */ + +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda - 1 - 0 + 1; + a_offset = 0 + a_dim1 * 0; + a -= a_offset; + + /* Function Body */ + *info = 0; + normaltransr = _starpu_lsame_(transr, "N"); + lower = _starpu_lsame_(uplo, "L"); + if (! normaltransr && ! _starpu_lsame_(transr, "T")) { + *info = -1; + } else if (! lower && ! _starpu_lsame_(uplo, "U")) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -6; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DTFTTR", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n <= 1) { + if (*n == 1) { + a[0] = arf[0]; + } + return 0; + } + +/* Size of array ARF(0:nt-1) */ + + nt = *n * (*n + 1) / 2; + +/* set N1 and N2 depending on LOWER: for N even N1=N2=K */ + + if (lower) { + n2 = *n / 2; + n1 = *n - n2; + } else { + n1 = *n / 2; + n2 = *n - n1; + } + +/* If N is odd, set NISODD = .TRUE., LDA=N+1 and A is (N+1)--by--K2. */ +/* If N is even, set K = N/2 and NISODD = .FALSE., LDA=N and A is */ +/* N--by--(N+1)/2. */ + + if (*n % 2 == 0) { + k = *n / 2; + nisodd = FALSE_; + if (! lower) { + np1x2 = *n + *n + 2; + } + } else { + nisodd = TRUE_; + if (! lower) { + nx2 = *n + *n; + } + } + + if (nisodd) { + +/* N is odd */ + + if (normaltransr) { + +/* N is odd and TRANSR = 'N' */ + + if (lower) { + +/* N is odd, TRANSR = 'N', and UPLO = 'L' */ + + ij = 0; + i__1 = n2; + for (j = 0; j <= i__1; ++j) { + i__2 = n2 + j; + for (i__ = n1; i__ <= i__2; ++i__) { + a[n2 + j + i__ * a_dim1] = arf[ij]; + ++ij; + } + i__2 = *n - 1; + for (i__ = j; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = arf[ij]; + ++ij; + } + } + + } else { + +/* N is odd, TRANSR = 'N', and UPLO = 'U' */ + + ij = nt - *n; + i__1 = n1; + for (j = *n - 1; j >= i__1; --j) { + i__2 = j; + for (i__ = 0; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = arf[ij]; + ++ij; + } + i__2 = n1 - 1; + for (l = j - n1; l <= i__2; ++l) { + a[j - n1 + l * a_dim1] = arf[ij]; + ++ij; + } + ij -= nx2; + } + + } + + } else { + +/* N is odd and TRANSR = 'T' */ + + if (lower) { + +/* N is odd, TRANSR = 'T', and UPLO = 'L' */ + + ij = 0; + i__1 = n2 - 1; + for (j = 0; j <= i__1; ++j) { + i__2 = j; + for (i__ = 0; i__ <= i__2; ++i__) { + a[j + i__ * a_dim1] = arf[ij]; + ++ij; + } + i__2 = *n - 1; + for (i__ = n1 + j; i__ <= i__2; ++i__) { + a[i__ + (n1 + j) * a_dim1] = arf[ij]; + ++ij; + } + } + i__1 = *n - 1; + for (j = n2; j <= i__1; ++j) { + i__2 = n1 - 1; + for (i__ = 0; i__ <= i__2; ++i__) { + a[j + i__ * a_dim1] = arf[ij]; + ++ij; + } + } + + } else { + +/* N is odd, TRANSR = 'T', and UPLO = 'U' */ + + ij = 0; + i__1 = n1; + for (j = 0; j <= i__1; ++j) { + i__2 = *n - 1; + for (i__ = n1; i__ <= i__2; ++i__) { + a[j + i__ * a_dim1] = arf[ij]; + ++ij; + } + } + i__1 = n1 - 1; + for (j = 0; j <= i__1; ++j) { + i__2 = j; + for (i__ = 0; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = arf[ij]; + ++ij; + } + i__2 = *n - 1; + for (l = n2 + j; l <= i__2; ++l) { + a[n2 + j + l * a_dim1] = arf[ij]; + ++ij; + } + } + + } + + } + + } else { + +/* N is even */ + + if (normaltransr) { + +/* N is even and TRANSR = 'N' */ + + if (lower) { + +/* N is even, TRANSR = 'N', and UPLO = 'L' */ + + ij = 0; + i__1 = k - 1; + for (j = 0; j <= i__1; ++j) { + i__2 = k + j; + for (i__ = k; i__ <= i__2; ++i__) { + a[k + j + i__ * a_dim1] = arf[ij]; + ++ij; + } + i__2 = *n - 1; + for (i__ = j; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = arf[ij]; + ++ij; + } + } + + } else { + +/* N is even, TRANSR = 'N', and UPLO = 'U' */ + + ij = nt - *n - 1; + i__1 = k; + for (j = *n - 1; j >= i__1; --j) { + i__2 = j; + for (i__ = 0; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = arf[ij]; + ++ij; + } + i__2 = k - 1; + for (l = j - k; l <= i__2; ++l) { + a[j - k + l * a_dim1] = arf[ij]; + ++ij; + } + ij -= np1x2; + } + + } + + } else { + +/* N is even and TRANSR = 'T' */ + + if (lower) { + +/* N is even, TRANSR = 'T', and UPLO = 'L' */ + + ij = 0; + j = k; + i__1 = *n - 1; + for (i__ = k; i__ <= i__1; ++i__) { + a[i__ + j * a_dim1] = arf[ij]; + ++ij; + } + i__1 = k - 2; + for (j = 0; j <= i__1; ++j) { + i__2 = j; + for (i__ = 0; i__ <= i__2; ++i__) { + a[j + i__ * a_dim1] = arf[ij]; + ++ij; + } + i__2 = *n - 1; + for (i__ = k + 1 + j; i__ <= i__2; ++i__) { + a[i__ + (k + 1 + j) * a_dim1] = arf[ij]; + ++ij; + } + } + i__1 = *n - 1; + for (j = k - 1; j <= i__1; ++j) { + i__2 = k - 1; + for (i__ = 0; i__ <= i__2; ++i__) { + a[j + i__ * a_dim1] = arf[ij]; + ++ij; + } + } + + } else { + +/* N is even, TRANSR = 'T', and UPLO = 'U' */ + + ij = 0; + i__1 = k; + for (j = 0; j <= i__1; ++j) { + i__2 = *n - 1; + for (i__ = k; i__ <= i__2; ++i__) { + a[j + i__ * a_dim1] = arf[ij]; + ++ij; + } + } + i__1 = k - 2; + for (j = 0; j <= i__1; ++j) { + i__2 = j; + for (i__ = 0; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = arf[ij]; + ++ij; + } + i__2 = *n - 1; + for (l = k + 1 + j; l <= i__2; ++l) { + a[k + 1 + j + l * a_dim1] = arf[ij]; + ++ij; + } + } +/* Note that here, on exit of the loop, J = K-1 */ + i__1 = j; + for (i__ = 0; i__ <= i__1; ++i__) { + a[i__ + j * a_dim1] = arf[ij]; + ++ij; + } + + } + + } + + } + + return 0; + +/* End of DTFTTR */ + +} /* _starpu_dtfttr_ */ diff --git a/min-dgels/base/SRC/dtgevc.c b/min-dgels/base/SRC/dtgevc.c new file mode 100644 index 0000000..bd35adb --- /dev/null +++ b/min-dgels/base/SRC/dtgevc.c @@ -0,0 +1,1418 @@ +/* dtgevc.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static logical c_true = TRUE_; +static integer c__2 = 2; +static doublereal c_b34 = 1.; +static integer c__1 = 1; +static doublereal c_b36 = 0.; +static logical c_false = FALSE_; + +/* Subroutine */ int _starpu_dtgevc_(char *side, char *howmny, logical *select, + integer *n, doublereal *s, integer *lds, doublereal *p, integer *ldp, + doublereal *vl, integer *ldvl, doublereal *vr, integer *ldvr, integer + *mm, integer *m, doublereal *work, integer *info) +{ + /* System generated locals */ + integer p_dim1, p_offset, s_dim1, s_offset, vl_dim1, vl_offset, vr_dim1, + vr_offset, i__1, i__2, i__3, i__4, i__5; + doublereal d__1, d__2, d__3, d__4, d__5, d__6; + + /* Local variables */ + integer i__, j, ja, jc, je, na, im, jr, jw, nw; + doublereal big; + logical lsa, lsb; + doublereal ulp, sum[4] /* was [2][2] */; + integer ibeg, ieig, iend; + doublereal dmin__, temp, xmax, sump[4] /* was [2][2] */, sums[4] + /* was [2][2] */; + extern /* Subroutine */ int _starpu_dlag2_(doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *); + doublereal cim2a, cim2b, cre2a, cre2b, temp2, bdiag[2], acoef, scale; + logical ilall; + integer iside; + doublereal sbeta; + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int _starpu_dgemv_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *); + logical il2by2; + integer iinfo; + doublereal small; + logical compl; + doublereal anorm, bnorm; + logical compr; + extern /* Subroutine */ int _starpu_dlaln2_(logical *, integer *, integer *, + doublereal *, doublereal *, doublereal *, integer *, doublereal *, + doublereal *, doublereal *, integer *, doublereal *, doublereal * +, doublereal *, integer *, doublereal *, doublereal *, integer *); + doublereal temp2i; + extern /* Subroutine */ int _starpu_dlabad_(doublereal *, doublereal *); + doublereal temp2r; + logical ilabad, ilbbad; + doublereal acoefa, bcoefa, cimaga, cimagb; + logical ilback; + doublereal bcoefi, ascale, bscale, creala, crealb; + extern doublereal _starpu_dlamch_(char *); + doublereal bcoefr, salfar, safmin; + extern /* Subroutine */ int _starpu_dlacpy_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *); + doublereal xscale, bignum; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + logical ilcomp, ilcplx; + integer ihwmny; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + + +/* Purpose */ +/* ======= */ + +/* DTGEVC computes some or all of the right and/or left eigenvectors of */ +/* a pair of real matrices (S,P), where S is a quasi-triangular matrix */ +/* and P is upper triangular. Matrix pairs of this type are produced by */ +/* the generalized Schur factorization of a matrix pair (A,B): */ + +/* A = Q*S*Z**T, B = Q*P*Z**T */ + +/* as computed by DGGHRD + DHGEQZ. */ + +/* The right eigenvector x and the left eigenvector y of (S,P) */ +/* corresponding to an eigenvalue w are defined by: */ + +/* S*x = w*P*x, (y**H)*S = w*(y**H)*P, */ + +/* where y**H denotes the conjugate tranpose of y. */ +/* The eigenvalues are not input to this routine, but are computed */ +/* directly from the diagonal blocks of S and P. */ + +/* This routine returns the matrices X and/or Y of right and left */ +/* eigenvectors of (S,P), or the products Z*X and/or Q*Y, */ +/* where Z and Q are input matrices. */ +/* If Q and Z are the orthogonal factors from the generalized Schur */ +/* factorization of a matrix pair (A,B), then Z*X and Q*Y */ +/* are the matrices of right and left eigenvectors of (A,B). */ + +/* Arguments */ +/* ========= */ + +/* SIDE (input) CHARACTER*1 */ +/* = 'R': compute right eigenvectors only; */ +/* = 'L': compute left eigenvectors only; */ +/* = 'B': compute both right and left eigenvectors. */ + +/* HOWMNY (input) CHARACTER*1 */ +/* = 'A': compute all right and/or left eigenvectors; */ +/* = 'B': compute all right and/or left eigenvectors, */ +/* backtransformed by the matrices in VR and/or VL; */ +/* = 'S': compute selected right and/or left eigenvectors, */ +/* specified by the logical array SELECT. */ + +/* SELECT (input) LOGICAL array, dimension (N) */ +/* If HOWMNY='S', SELECT specifies the eigenvectors to be */ +/* computed. If w(j) is a real eigenvalue, the corresponding */ +/* real eigenvector is computed if SELECT(j) is .TRUE.. */ +/* If w(j) and w(j+1) are the real and imaginary parts of a */ +/* complex eigenvalue, the corresponding complex eigenvector */ +/* is computed if either SELECT(j) or SELECT(j+1) is .TRUE., */ +/* and on exit SELECT(j) is set to .TRUE. and SELECT(j+1) is */ +/* set to .FALSE.. */ +/* Not referenced if HOWMNY = 'A' or 'B'. */ + +/* N (input) INTEGER */ +/* The order of the matrices S and P. N >= 0. */ + +/* S (input) DOUBLE PRECISION array, dimension (LDS,N) */ +/* The upper quasi-triangular matrix S from a generalized Schur */ +/* factorization, as computed by DHGEQZ. */ + +/* LDS (input) INTEGER */ +/* The leading dimension of array S. LDS >= max(1,N). */ + +/* P (input) DOUBLE PRECISION array, dimension (LDP,N) */ +/* The upper triangular matrix P from a generalized Schur */ +/* factorization, as computed by DHGEQZ. */ +/* 2-by-2 diagonal blocks of P corresponding to 2-by-2 blocks */ +/* of S must be in positive diagonal form. */ + +/* LDP (input) INTEGER */ +/* The leading dimension of array P. LDP >= max(1,N). */ + +/* VL (input/output) DOUBLE PRECISION array, dimension (LDVL,MM) */ +/* On entry, if SIDE = 'L' or 'B' and HOWMNY = 'B', VL must */ +/* contain an N-by-N matrix Q (usually the orthogonal matrix Q */ +/* of left Schur vectors returned by DHGEQZ). */ +/* On exit, if SIDE = 'L' or 'B', VL contains: */ +/* if HOWMNY = 'A', the matrix Y of left eigenvectors of (S,P); */ +/* if HOWMNY = 'B', the matrix Q*Y; */ +/* if HOWMNY = 'S', the left eigenvectors of (S,P) specified by */ +/* SELECT, stored consecutively in the columns of */ +/* VL, in the same order as their eigenvalues. */ + +/* A complex eigenvector corresponding to a complex eigenvalue */ +/* is stored in two consecutive columns, the first holding the */ +/* real part, and the second the imaginary part. */ + +/* Not referenced if SIDE = 'R'. */ + +/* LDVL (input) INTEGER */ +/* The leading dimension of array VL. LDVL >= 1, and if */ +/* SIDE = 'L' or 'B', LDVL >= N. */ + +/* VR (input/output) DOUBLE PRECISION array, dimension (LDVR,MM) */ +/* On entry, if SIDE = 'R' or 'B' and HOWMNY = 'B', VR must */ +/* contain an N-by-N matrix Z (usually the orthogonal matrix Z */ +/* of right Schur vectors returned by DHGEQZ). */ + +/* On exit, if SIDE = 'R' or 'B', VR contains: */ +/* if HOWMNY = 'A', the matrix X of right eigenvectors of (S,P); */ +/* if HOWMNY = 'B' or 'b', the matrix Z*X; */ +/* if HOWMNY = 'S' or 's', the right eigenvectors of (S,P) */ +/* specified by SELECT, stored consecutively in the */ +/* columns of VR, in the same order as their */ +/* eigenvalues. */ + +/* A complex eigenvector corresponding to a complex eigenvalue */ +/* is stored in two consecutive columns, the first holding the */ +/* real part and the second the imaginary part. */ + +/* Not referenced if SIDE = 'L'. */ + +/* LDVR (input) INTEGER */ +/* The leading dimension of the array VR. LDVR >= 1, and if */ +/* SIDE = 'R' or 'B', LDVR >= N. */ + +/* MM (input) INTEGER */ +/* The number of columns in the arrays VL and/or VR. MM >= M. */ + +/* M (output) INTEGER */ +/* The number of columns in the arrays VL and/or VR actually */ +/* used to store the eigenvectors. If HOWMNY = 'A' or 'B', M */ +/* is set to N. Each selected real eigenvector occupies one */ +/* column and each selected complex eigenvector occupies two */ +/* columns. */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (6*N) */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit. */ +/* < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > 0: the 2-by-2 block (INFO:INFO+1) does not have a complex */ +/* eigenvalue. */ + +/* Further Details */ +/* =============== */ + +/* Allocation of workspace: */ +/* ---------- -- --------- */ + +/* WORK( j ) = 1-norm of j-th column of A, above the diagonal */ +/* WORK( N+j ) = 1-norm of j-th column of B, above the diagonal */ +/* WORK( 2*N+1:3*N ) = real part of eigenvector */ +/* WORK( 3*N+1:4*N ) = imaginary part of eigenvector */ +/* WORK( 4*N+1:5*N ) = real part of back-transformed eigenvector */ +/* WORK( 5*N+1:6*N ) = imaginary part of back-transformed eigenvector */ + +/* Rowwise vs. columnwise solution methods: */ +/* ------- -- ---------- -------- ------- */ + +/* Finding a generalized eigenvector consists basically of solving the */ +/* singular triangular system */ + +/* (A - w B) x = 0 (for right) or: (A - w B)**H y = 0 (for left) */ + +/* Consider finding the i-th right eigenvector (assume all eigenvalues */ +/* are real). The equation to be solved is: */ +/* n i */ +/* 0 = sum C(j,k) v(k) = sum C(j,k) v(k) for j = i,. . .,1 */ +/* k=j k=j */ + +/* where C = (A - w B) (The components v(i+1:n) are 0.) */ + +/* The "rowwise" method is: */ + +/* (1) v(i) := 1 */ +/* for j = i-1,. . .,1: */ +/* i */ +/* (2) compute s = - sum C(j,k) v(k) and */ +/* k=j+1 */ + +/* (3) v(j) := s / C(j,j) */ + +/* Step 2 is sometimes called the "dot product" step, since it is an */ +/* inner product between the j-th row and the portion of the eigenvector */ +/* that has been computed so far. */ + +/* The "columnwise" method consists basically in doing the sums */ +/* for all the rows in parallel. As each v(j) is computed, the */ +/* contribution of v(j) times the j-th column of C is added to the */ +/* partial sums. Since FORTRAN arrays are stored columnwise, this has */ +/* the advantage that at each step, the elements of C that are accessed */ +/* are adjacent to one another, whereas with the rowwise method, the */ +/* elements accessed at a step are spaced LDS (and LDP) words apart. */ + +/* When finding left eigenvectors, the matrix in question is the */ +/* transpose of the one in storage, so the rowwise method then */ +/* actually accesses columns of A and B at each step, and so is the */ +/* preferred method. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Decode and Test the input parameters */ + + /* Parameter adjustments */ + --select; + s_dim1 = *lds; + s_offset = 1 + s_dim1; + s -= s_offset; + p_dim1 = *ldp; + p_offset = 1 + p_dim1; + p -= p_offset; + vl_dim1 = *ldvl; + vl_offset = 1 + vl_dim1; + vl -= vl_offset; + vr_dim1 = *ldvr; + vr_offset = 1 + vr_dim1; + vr -= vr_offset; + --work; + + /* Function Body */ + if (_starpu_lsame_(howmny, "A")) { + ihwmny = 1; + ilall = TRUE_; + ilback = FALSE_; + } else if (_starpu_lsame_(howmny, "S")) { + ihwmny = 2; + ilall = FALSE_; + ilback = FALSE_; + } else if (_starpu_lsame_(howmny, "B")) { + ihwmny = 3; + ilall = TRUE_; + ilback = TRUE_; + } else { + ihwmny = -1; + ilall = TRUE_; + } + + if (_starpu_lsame_(side, "R")) { + iside = 1; + compl = FALSE_; + compr = TRUE_; + } else if (_starpu_lsame_(side, "L")) { + iside = 2; + compl = TRUE_; + compr = FALSE_; + } else if (_starpu_lsame_(side, "B")) { + iside = 3; + compl = TRUE_; + compr = TRUE_; + } else { + iside = -1; + } + + *info = 0; + if (iside < 0) { + *info = -1; + } else if (ihwmny < 0) { + *info = -2; + } else if (*n < 0) { + *info = -4; + } else if (*lds < max(1,*n)) { + *info = -6; + } else if (*ldp < max(1,*n)) { + *info = -8; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DTGEVC", &i__1); + return 0; + } + +/* Count the number of eigenvectors to be computed */ + + if (! ilall) { + im = 0; + ilcplx = FALSE_; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (ilcplx) { + ilcplx = FALSE_; + goto L10; + } + if (j < *n) { + if (s[j + 1 + j * s_dim1] != 0.) { + ilcplx = TRUE_; + } + } + if (ilcplx) { + if (select[j] || select[j + 1]) { + im += 2; + } + } else { + if (select[j]) { + ++im; + } + } +L10: + ; + } + } else { + im = *n; + } + +/* Check 2-by-2 diagonal blocks of A, B */ + + ilabad = FALSE_; + ilbbad = FALSE_; + i__1 = *n - 1; + for (j = 1; j <= i__1; ++j) { + if (s[j + 1 + j * s_dim1] != 0.) { + if (p[j + j * p_dim1] == 0. || p[j + 1 + (j + 1) * p_dim1] == 0. + || p[j + (j + 1) * p_dim1] != 0.) { + ilbbad = TRUE_; + } + if (j < *n - 1) { + if (s[j + 2 + (j + 1) * s_dim1] != 0.) { + ilabad = TRUE_; + } + } + } +/* L20: */ + } + + if (ilabad) { + *info = -5; + } else if (ilbbad) { + *info = -7; + } else if (compl && *ldvl < *n || *ldvl < 1) { + *info = -10; + } else if (compr && *ldvr < *n || *ldvr < 1) { + *info = -12; + } else if (*mm < im) { + *info = -13; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DTGEVC", &i__1); + return 0; + } + +/* Quick return if possible */ + + *m = im; + if (*n == 0) { + return 0; + } + +/* Machine Constants */ + + safmin = _starpu_dlamch_("Safe minimum"); + big = 1. / safmin; + _starpu_dlabad_(&safmin, &big); + ulp = _starpu_dlamch_("Epsilon") * _starpu_dlamch_("Base"); + small = safmin * *n / ulp; + big = 1. / small; + bignum = 1. / (safmin * *n); + +/* Compute the 1-norm of each column of the strictly upper triangular */ +/* part (i.e., excluding all elements belonging to the diagonal */ +/* blocks) of A and B to check for possible overflow in the */ +/* triangular solver. */ + + anorm = (d__1 = s[s_dim1 + 1], abs(d__1)); + if (*n > 1) { + anorm += (d__1 = s[s_dim1 + 2], abs(d__1)); + } + bnorm = (d__1 = p[p_dim1 + 1], abs(d__1)); + work[1] = 0.; + work[*n + 1] = 0.; + + i__1 = *n; + for (j = 2; j <= i__1; ++j) { + temp = 0.; + temp2 = 0.; + if (s[j + (j - 1) * s_dim1] == 0.) { + iend = j - 1; + } else { + iend = j - 2; + } + i__2 = iend; + for (i__ = 1; i__ <= i__2; ++i__) { + temp += (d__1 = s[i__ + j * s_dim1], abs(d__1)); + temp2 += (d__1 = p[i__ + j * p_dim1], abs(d__1)); +/* L30: */ + } + work[j] = temp; + work[*n + j] = temp2; +/* Computing MIN */ + i__3 = j + 1; + i__2 = min(i__3,*n); + for (i__ = iend + 1; i__ <= i__2; ++i__) { + temp += (d__1 = s[i__ + j * s_dim1], abs(d__1)); + temp2 += (d__1 = p[i__ + j * p_dim1], abs(d__1)); +/* L40: */ + } + anorm = max(anorm,temp); + bnorm = max(bnorm,temp2); +/* L50: */ + } + + ascale = 1. / max(anorm,safmin); + bscale = 1. / max(bnorm,safmin); + +/* Left eigenvectors */ + + if (compl) { + ieig = 0; + +/* Main loop over eigenvalues */ + + ilcplx = FALSE_; + i__1 = *n; + for (je = 1; je <= i__1; ++je) { + +/* Skip this iteration if (a) HOWMNY='S' and SELECT=.FALSE., or */ +/* (b) this would be the second of a complex pair. */ +/* Check for complex eigenvalue, so as to be sure of which */ +/* entry(-ies) of SELECT to look at. */ + + if (ilcplx) { + ilcplx = FALSE_; + goto L220; + } + nw = 1; + if (je < *n) { + if (s[je + 1 + je * s_dim1] != 0.) { + ilcplx = TRUE_; + nw = 2; + } + } + if (ilall) { + ilcomp = TRUE_; + } else if (ilcplx) { + ilcomp = select[je] || select[je + 1]; + } else { + ilcomp = select[je]; + } + if (! ilcomp) { + goto L220; + } + +/* Decide if (a) singular pencil, (b) real eigenvalue, or */ +/* (c) complex eigenvalue. */ + + if (! ilcplx) { + if ((d__1 = s[je + je * s_dim1], abs(d__1)) <= safmin && ( + d__2 = p[je + je * p_dim1], abs(d__2)) <= safmin) { + +/* Singular matrix pencil -- return unit eigenvector */ + + ++ieig; + i__2 = *n; + for (jr = 1; jr <= i__2; ++jr) { + vl[jr + ieig * vl_dim1] = 0.; +/* L60: */ + } + vl[ieig + ieig * vl_dim1] = 1.; + goto L220; + } + } + +/* Clear vector */ + + i__2 = nw * *n; + for (jr = 1; jr <= i__2; ++jr) { + work[(*n << 1) + jr] = 0.; +/* L70: */ + } +/* T */ +/* Compute coefficients in ( a A - b B ) y = 0 */ +/* a is ACOEF */ +/* b is BCOEFR + i*BCOEFI */ + + if (! ilcplx) { + +/* Real eigenvalue */ + +/* Computing MAX */ + d__3 = (d__1 = s[je + je * s_dim1], abs(d__1)) * ascale, d__4 + = (d__2 = p[je + je * p_dim1], abs(d__2)) * bscale, + d__3 = max(d__3,d__4); + temp = 1. / max(d__3,safmin); + salfar = temp * s[je + je * s_dim1] * ascale; + sbeta = temp * p[je + je * p_dim1] * bscale; + acoef = sbeta * ascale; + bcoefr = salfar * bscale; + bcoefi = 0.; + +/* Scale to avoid underflow */ + + scale = 1.; + lsa = abs(sbeta) >= safmin && abs(acoef) < small; + lsb = abs(salfar) >= safmin && abs(bcoefr) < small; + if (lsa) { + scale = small / abs(sbeta) * min(anorm,big); + } + if (lsb) { +/* Computing MAX */ + d__1 = scale, d__2 = small / abs(salfar) * min(bnorm,big); + scale = max(d__1,d__2); + } + if (lsa || lsb) { +/* Computing MIN */ +/* Computing MAX */ + d__3 = 1., d__4 = abs(acoef), d__3 = max(d__3,d__4), d__4 + = abs(bcoefr); + d__1 = scale, d__2 = 1. / (safmin * max(d__3,d__4)); + scale = min(d__1,d__2); + if (lsa) { + acoef = ascale * (scale * sbeta); + } else { + acoef = scale * acoef; + } + if (lsb) { + bcoefr = bscale * (scale * salfar); + } else { + bcoefr = scale * bcoefr; + } + } + acoefa = abs(acoef); + bcoefa = abs(bcoefr); + +/* First component is 1 */ + + work[(*n << 1) + je] = 1.; + xmax = 1.; + } else { + +/* Complex eigenvalue */ + + d__1 = safmin * 100.; + _starpu_dlag2_(&s[je + je * s_dim1], lds, &p[je + je * p_dim1], ldp, & + d__1, &acoef, &temp, &bcoefr, &temp2, &bcoefi); + bcoefi = -bcoefi; + if (bcoefi == 0.) { + *info = je; + return 0; + } + +/* Scale to avoid over/underflow */ + + acoefa = abs(acoef); + bcoefa = abs(bcoefr) + abs(bcoefi); + scale = 1.; + if (acoefa * ulp < safmin && acoefa >= safmin) { + scale = safmin / ulp / acoefa; + } + if (bcoefa * ulp < safmin && bcoefa >= safmin) { +/* Computing MAX */ + d__1 = scale, d__2 = safmin / ulp / bcoefa; + scale = max(d__1,d__2); + } + if (safmin * acoefa > ascale) { + scale = ascale / (safmin * acoefa); + } + if (safmin * bcoefa > bscale) { +/* Computing MIN */ + d__1 = scale, d__2 = bscale / (safmin * bcoefa); + scale = min(d__1,d__2); + } + if (scale != 1.) { + acoef = scale * acoef; + acoefa = abs(acoef); + bcoefr = scale * bcoefr; + bcoefi = scale * bcoefi; + bcoefa = abs(bcoefr) + abs(bcoefi); + } + +/* Compute first two components of eigenvector */ + + temp = acoef * s[je + 1 + je * s_dim1]; + temp2r = acoef * s[je + je * s_dim1] - bcoefr * p[je + je * + p_dim1]; + temp2i = -bcoefi * p[je + je * p_dim1]; + if (abs(temp) > abs(temp2r) + abs(temp2i)) { + work[(*n << 1) + je] = 1.; + work[*n * 3 + je] = 0.; + work[(*n << 1) + je + 1] = -temp2r / temp; + work[*n * 3 + je + 1] = -temp2i / temp; + } else { + work[(*n << 1) + je + 1] = 1.; + work[*n * 3 + je + 1] = 0.; + temp = acoef * s[je + (je + 1) * s_dim1]; + work[(*n << 1) + je] = (bcoefr * p[je + 1 + (je + 1) * + p_dim1] - acoef * s[je + 1 + (je + 1) * s_dim1]) / + temp; + work[*n * 3 + je] = bcoefi * p[je + 1 + (je + 1) * p_dim1] + / temp; + } +/* Computing MAX */ + d__5 = (d__1 = work[(*n << 1) + je], abs(d__1)) + (d__2 = + work[*n * 3 + je], abs(d__2)), d__6 = (d__3 = work[(* + n << 1) + je + 1], abs(d__3)) + (d__4 = work[*n * 3 + + je + 1], abs(d__4)); + xmax = max(d__5,d__6); + } + +/* Computing MAX */ + d__1 = ulp * acoefa * anorm, d__2 = ulp * bcoefa * bnorm, d__1 = + max(d__1,d__2); + dmin__ = max(d__1,safmin); + +/* T */ +/* Triangular solve of (a A - b B) y = 0 */ + +/* T */ +/* (rowwise in (a A - b B) , or columnwise in (a A - b B) ) */ + + il2by2 = FALSE_; + + i__2 = *n; + for (j = je + nw; j <= i__2; ++j) { + if (il2by2) { + il2by2 = FALSE_; + goto L160; + } + + na = 1; + bdiag[0] = p[j + j * p_dim1]; + if (j < *n) { + if (s[j + 1 + j * s_dim1] != 0.) { + il2by2 = TRUE_; + bdiag[1] = p[j + 1 + (j + 1) * p_dim1]; + na = 2; + } + } + +/* Check whether scaling is necessary for dot products */ + + xscale = 1. / max(1.,xmax); +/* Computing MAX */ + d__1 = work[j], d__2 = work[*n + j], d__1 = max(d__1,d__2), + d__2 = acoefa * work[j] + bcoefa * work[*n + j]; + temp = max(d__1,d__2); + if (il2by2) { +/* Computing MAX */ + d__1 = temp, d__2 = work[j + 1], d__1 = max(d__1,d__2), + d__2 = work[*n + j + 1], d__1 = max(d__1,d__2), + d__2 = acoefa * work[j + 1] + bcoefa * work[*n + + j + 1]; + temp = max(d__1,d__2); + } + if (temp > bignum * xscale) { + i__3 = nw - 1; + for (jw = 0; jw <= i__3; ++jw) { + i__4 = j - 1; + for (jr = je; jr <= i__4; ++jr) { + work[(jw + 2) * *n + jr] = xscale * work[(jw + 2) + * *n + jr]; +/* L80: */ + } +/* L90: */ + } + xmax *= xscale; + } + +/* Compute dot products */ + +/* j-1 */ +/* SUM = sum conjg( a*S(k,j) - b*P(k,j) )*x(k) */ +/* k=je */ + +/* To reduce the op count, this is done as */ + +/* _ j-1 _ j-1 */ +/* a*conjg( sum S(k,j)*x(k) ) - b*conjg( sum P(k,j)*x(k) ) */ +/* k=je k=je */ + +/* which may cause underflow problems if A or B are close */ +/* to underflow. (E.g., less than SMALL.) */ + + +/* A series of compiler directives to defeat vectorization */ +/* for the next loop */ + +/* $PL$ CMCHAR=' ' */ +/* DIR$ NEXTSCALAR */ +/* $DIR SCALAR */ +/* DIR$ NEXT SCALAR */ +/* VD$L NOVECTOR */ +/* DEC$ NOVECTOR */ +/* VD$ NOVECTOR */ +/* VDIR NOVECTOR */ +/* VOCL LOOP,SCALAR */ +/* IBM PREFER SCALAR */ +/* $PL$ CMCHAR='*' */ + + i__3 = nw; + for (jw = 1; jw <= i__3; ++jw) { + +/* $PL$ CMCHAR=' ' */ +/* DIR$ NEXTSCALAR */ +/* $DIR SCALAR */ +/* DIR$ NEXT SCALAR */ +/* VD$L NOVECTOR */ +/* DEC$ NOVECTOR */ +/* VD$ NOVECTOR */ +/* VDIR NOVECTOR */ +/* VOCL LOOP,SCALAR */ +/* IBM PREFER SCALAR */ +/* $PL$ CMCHAR='*' */ + + i__4 = na; + for (ja = 1; ja <= i__4; ++ja) { + sums[ja + (jw << 1) - 3] = 0.; + sump[ja + (jw << 1) - 3] = 0.; + + i__5 = j - 1; + for (jr = je; jr <= i__5; ++jr) { + sums[ja + (jw << 1) - 3] += s[jr + (j + ja - 1) * + s_dim1] * work[(jw + 1) * *n + jr]; + sump[ja + (jw << 1) - 3] += p[jr + (j + ja - 1) * + p_dim1] * work[(jw + 1) * *n + jr]; +/* L100: */ + } +/* L110: */ + } +/* L120: */ + } + +/* $PL$ CMCHAR=' ' */ +/* DIR$ NEXTSCALAR */ +/* $DIR SCALAR */ +/* DIR$ NEXT SCALAR */ +/* VD$L NOVECTOR */ +/* DEC$ NOVECTOR */ +/* VD$ NOVECTOR */ +/* VDIR NOVECTOR */ +/* VOCL LOOP,SCALAR */ +/* IBM PREFER SCALAR */ +/* $PL$ CMCHAR='*' */ + + i__3 = na; + for (ja = 1; ja <= i__3; ++ja) { + if (ilcplx) { + sum[ja - 1] = -acoef * sums[ja - 1] + bcoefr * sump[ + ja - 1] - bcoefi * sump[ja + 1]; + sum[ja + 1] = -acoef * sums[ja + 1] + bcoefr * sump[ + ja + 1] + bcoefi * sump[ja - 1]; + } else { + sum[ja - 1] = -acoef * sums[ja - 1] + bcoefr * sump[ + ja - 1]; + } +/* L130: */ + } + +/* T */ +/* Solve ( a A - b B ) y = SUM(,) */ +/* with scaling and perturbation of the denominator */ + + _starpu_dlaln2_(&c_true, &na, &nw, &dmin__, &acoef, &s[j + j * s_dim1] +, lds, bdiag, &bdiag[1], sum, &c__2, &bcoefr, &bcoefi, + &work[(*n << 1) + j], n, &scale, &temp, &iinfo); + if (scale < 1.) { + i__3 = nw - 1; + for (jw = 0; jw <= i__3; ++jw) { + i__4 = j - 1; + for (jr = je; jr <= i__4; ++jr) { + work[(jw + 2) * *n + jr] = scale * work[(jw + 2) * + *n + jr]; +/* L140: */ + } +/* L150: */ + } + xmax = scale * xmax; + } + xmax = max(xmax,temp); +L160: + ; + } + +/* Copy eigenvector to VL, back transforming if */ +/* HOWMNY='B'. */ + + ++ieig; + if (ilback) { + i__2 = nw - 1; + for (jw = 0; jw <= i__2; ++jw) { + i__3 = *n + 1 - je; + _starpu_dgemv_("N", n, &i__3, &c_b34, &vl[je * vl_dim1 + 1], ldvl, + &work[(jw + 2) * *n + je], &c__1, &c_b36, &work[( + jw + 4) * *n + 1], &c__1); +/* L170: */ + } + _starpu_dlacpy_(" ", n, &nw, &work[(*n << 2) + 1], n, &vl[je * + vl_dim1 + 1], ldvl); + ibeg = 1; + } else { + _starpu_dlacpy_(" ", n, &nw, &work[(*n << 1) + 1], n, &vl[ieig * + vl_dim1 + 1], ldvl); + ibeg = je; + } + +/* Scale eigenvector */ + + xmax = 0.; + if (ilcplx) { + i__2 = *n; + for (j = ibeg; j <= i__2; ++j) { +/* Computing MAX */ + d__3 = xmax, d__4 = (d__1 = vl[j + ieig * vl_dim1], abs( + d__1)) + (d__2 = vl[j + (ieig + 1) * vl_dim1], + abs(d__2)); + xmax = max(d__3,d__4); +/* L180: */ + } + } else { + i__2 = *n; + for (j = ibeg; j <= i__2; ++j) { +/* Computing MAX */ + d__2 = xmax, d__3 = (d__1 = vl[j + ieig * vl_dim1], abs( + d__1)); + xmax = max(d__2,d__3); +/* L190: */ + } + } + + if (xmax > safmin) { + xscale = 1. / xmax; + + i__2 = nw - 1; + for (jw = 0; jw <= i__2; ++jw) { + i__3 = *n; + for (jr = ibeg; jr <= i__3; ++jr) { + vl[jr + (ieig + jw) * vl_dim1] = xscale * vl[jr + ( + ieig + jw) * vl_dim1]; +/* L200: */ + } +/* L210: */ + } + } + ieig = ieig + nw - 1; + +L220: + ; + } + } + +/* Right eigenvectors */ + + if (compr) { + ieig = im + 1; + +/* Main loop over eigenvalues */ + + ilcplx = FALSE_; + for (je = *n; je >= 1; --je) { + +/* Skip this iteration if (a) HOWMNY='S' and SELECT=.FALSE., or */ +/* (b) this would be the second of a complex pair. */ +/* Check for complex eigenvalue, so as to be sure of which */ +/* entry(-ies) of SELECT to look at -- if complex, SELECT(JE) */ +/* or SELECT(JE-1). */ +/* If this is a complex pair, the 2-by-2 diagonal block */ +/* corresponding to the eigenvalue is in rows/columns JE-1:JE */ + + if (ilcplx) { + ilcplx = FALSE_; + goto L500; + } + nw = 1; + if (je > 1) { + if (s[je + (je - 1) * s_dim1] != 0.) { + ilcplx = TRUE_; + nw = 2; + } + } + if (ilall) { + ilcomp = TRUE_; + } else if (ilcplx) { + ilcomp = select[je] || select[je - 1]; + } else { + ilcomp = select[je]; + } + if (! ilcomp) { + goto L500; + } + +/* Decide if (a) singular pencil, (b) real eigenvalue, or */ +/* (c) complex eigenvalue. */ + + if (! ilcplx) { + if ((d__1 = s[je + je * s_dim1], abs(d__1)) <= safmin && ( + d__2 = p[je + je * p_dim1], abs(d__2)) <= safmin) { + +/* Singular matrix pencil -- unit eigenvector */ + + --ieig; + i__1 = *n; + for (jr = 1; jr <= i__1; ++jr) { + vr[jr + ieig * vr_dim1] = 0.; +/* L230: */ + } + vr[ieig + ieig * vr_dim1] = 1.; + goto L500; + } + } + +/* Clear vector */ + + i__1 = nw - 1; + for (jw = 0; jw <= i__1; ++jw) { + i__2 = *n; + for (jr = 1; jr <= i__2; ++jr) { + work[(jw + 2) * *n + jr] = 0.; +/* L240: */ + } +/* L250: */ + } + +/* Compute coefficients in ( a A - b B ) x = 0 */ +/* a is ACOEF */ +/* b is BCOEFR + i*BCOEFI */ + + if (! ilcplx) { + +/* Real eigenvalue */ + +/* Computing MAX */ + d__3 = (d__1 = s[je + je * s_dim1], abs(d__1)) * ascale, d__4 + = (d__2 = p[je + je * p_dim1], abs(d__2)) * bscale, + d__3 = max(d__3,d__4); + temp = 1. / max(d__3,safmin); + salfar = temp * s[je + je * s_dim1] * ascale; + sbeta = temp * p[je + je * p_dim1] * bscale; + acoef = sbeta * ascale; + bcoefr = salfar * bscale; + bcoefi = 0.; + +/* Scale to avoid underflow */ + + scale = 1.; + lsa = abs(sbeta) >= safmin && abs(acoef) < small; + lsb = abs(salfar) >= safmin && abs(bcoefr) < small; + if (lsa) { + scale = small / abs(sbeta) * min(anorm,big); + } + if (lsb) { +/* Computing MAX */ + d__1 = scale, d__2 = small / abs(salfar) * min(bnorm,big); + scale = max(d__1,d__2); + } + if (lsa || lsb) { +/* Computing MIN */ +/* Computing MAX */ + d__3 = 1., d__4 = abs(acoef), d__3 = max(d__3,d__4), d__4 + = abs(bcoefr); + d__1 = scale, d__2 = 1. / (safmin * max(d__3,d__4)); + scale = min(d__1,d__2); + if (lsa) { + acoef = ascale * (scale * sbeta); + } else { + acoef = scale * acoef; + } + if (lsb) { + bcoefr = bscale * (scale * salfar); + } else { + bcoefr = scale * bcoefr; + } + } + acoefa = abs(acoef); + bcoefa = abs(bcoefr); + +/* First component is 1 */ + + work[(*n << 1) + je] = 1.; + xmax = 1.; + +/* Compute contribution from column JE of A and B to sum */ +/* (See "Further Details", above.) */ + + i__1 = je - 1; + for (jr = 1; jr <= i__1; ++jr) { + work[(*n << 1) + jr] = bcoefr * p[jr + je * p_dim1] - + acoef * s[jr + je * s_dim1]; +/* L260: */ + } + } else { + +/* Complex eigenvalue */ + + d__1 = safmin * 100.; + _starpu_dlag2_(&s[je - 1 + (je - 1) * s_dim1], lds, &p[je - 1 + (je - + 1) * p_dim1], ldp, &d__1, &acoef, &temp, &bcoefr, & + temp2, &bcoefi); + if (bcoefi == 0.) { + *info = je - 1; + return 0; + } + +/* Scale to avoid over/underflow */ + + acoefa = abs(acoef); + bcoefa = abs(bcoefr) + abs(bcoefi); + scale = 1.; + if (acoefa * ulp < safmin && acoefa >= safmin) { + scale = safmin / ulp / acoefa; + } + if (bcoefa * ulp < safmin && bcoefa >= safmin) { +/* Computing MAX */ + d__1 = scale, d__2 = safmin / ulp / bcoefa; + scale = max(d__1,d__2); + } + if (safmin * acoefa > ascale) { + scale = ascale / (safmin * acoefa); + } + if (safmin * bcoefa > bscale) { +/* Computing MIN */ + d__1 = scale, d__2 = bscale / (safmin * bcoefa); + scale = min(d__1,d__2); + } + if (scale != 1.) { + acoef = scale * acoef; + acoefa = abs(acoef); + bcoefr = scale * bcoefr; + bcoefi = scale * bcoefi; + bcoefa = abs(bcoefr) + abs(bcoefi); + } + +/* Compute first two components of eigenvector */ +/* and contribution to sums */ + + temp = acoef * s[je + (je - 1) * s_dim1]; + temp2r = acoef * s[je + je * s_dim1] - bcoefr * p[je + je * + p_dim1]; + temp2i = -bcoefi * p[je + je * p_dim1]; + if (abs(temp) >= abs(temp2r) + abs(temp2i)) { + work[(*n << 1) + je] = 1.; + work[*n * 3 + je] = 0.; + work[(*n << 1) + je - 1] = -temp2r / temp; + work[*n * 3 + je - 1] = -temp2i / temp; + } else { + work[(*n << 1) + je - 1] = 1.; + work[*n * 3 + je - 1] = 0.; + temp = acoef * s[je - 1 + je * s_dim1]; + work[(*n << 1) + je] = (bcoefr * p[je - 1 + (je - 1) * + p_dim1] - acoef * s[je - 1 + (je - 1) * s_dim1]) / + temp; + work[*n * 3 + je] = bcoefi * p[je - 1 + (je - 1) * p_dim1] + / temp; + } + +/* Computing MAX */ + d__5 = (d__1 = work[(*n << 1) + je], abs(d__1)) + (d__2 = + work[*n * 3 + je], abs(d__2)), d__6 = (d__3 = work[(* + n << 1) + je - 1], abs(d__3)) + (d__4 = work[*n * 3 + + je - 1], abs(d__4)); + xmax = max(d__5,d__6); + +/* Compute contribution from columns JE and JE-1 */ +/* of A and B to the sums. */ + + creala = acoef * work[(*n << 1) + je - 1]; + cimaga = acoef * work[*n * 3 + je - 1]; + crealb = bcoefr * work[(*n << 1) + je - 1] - bcoefi * work[*n + * 3 + je - 1]; + cimagb = bcoefi * work[(*n << 1) + je - 1] + bcoefr * work[*n + * 3 + je - 1]; + cre2a = acoef * work[(*n << 1) + je]; + cim2a = acoef * work[*n * 3 + je]; + cre2b = bcoefr * work[(*n << 1) + je] - bcoefi * work[*n * 3 + + je]; + cim2b = bcoefi * work[(*n << 1) + je] + bcoefr * work[*n * 3 + + je]; + i__1 = je - 2; + for (jr = 1; jr <= i__1; ++jr) { + work[(*n << 1) + jr] = -creala * s[jr + (je - 1) * s_dim1] + + crealb * p[jr + (je - 1) * p_dim1] - cre2a * s[ + jr + je * s_dim1] + cre2b * p[jr + je * p_dim1]; + work[*n * 3 + jr] = -cimaga * s[jr + (je - 1) * s_dim1] + + cimagb * p[jr + (je - 1) * p_dim1] - cim2a * s[jr + + je * s_dim1] + cim2b * p[jr + je * p_dim1]; +/* L270: */ + } + } + +/* Computing MAX */ + d__1 = ulp * acoefa * anorm, d__2 = ulp * bcoefa * bnorm, d__1 = + max(d__1,d__2); + dmin__ = max(d__1,safmin); + +/* Columnwise triangular solve of (a A - b B) x = 0 */ + + il2by2 = FALSE_; + for (j = je - nw; j >= 1; --j) { + +/* If a 2-by-2 block, is in position j-1:j, wait until */ +/* next iteration to process it (when it will be j:j+1) */ + + if (! il2by2 && j > 1) { + if (s[j + (j - 1) * s_dim1] != 0.) { + il2by2 = TRUE_; + goto L370; + } + } + bdiag[0] = p[j + j * p_dim1]; + if (il2by2) { + na = 2; + bdiag[1] = p[j + 1 + (j + 1) * p_dim1]; + } else { + na = 1; + } + +/* Compute x(j) (and x(j+1), if 2-by-2 block) */ + + _starpu_dlaln2_(&c_false, &na, &nw, &dmin__, &acoef, &s[j + j * + s_dim1], lds, bdiag, &bdiag[1], &work[(*n << 1) + j], + n, &bcoefr, &bcoefi, sum, &c__2, &scale, &temp, & + iinfo); + if (scale < 1.) { + + i__1 = nw - 1; + for (jw = 0; jw <= i__1; ++jw) { + i__2 = je; + for (jr = 1; jr <= i__2; ++jr) { + work[(jw + 2) * *n + jr] = scale * work[(jw + 2) * + *n + jr]; +/* L280: */ + } +/* L290: */ + } + } +/* Computing MAX */ + d__1 = scale * xmax; + xmax = max(d__1,temp); + + i__1 = nw; + for (jw = 1; jw <= i__1; ++jw) { + i__2 = na; + for (ja = 1; ja <= i__2; ++ja) { + work[(jw + 1) * *n + j + ja - 1] = sum[ja + (jw << 1) + - 3]; +/* L300: */ + } +/* L310: */ + } + +/* w = w + x(j)*(a S(*,j) - b P(*,j) ) with scaling */ + + if (j > 1) { + +/* Check whether scaling is necessary for sum. */ + + xscale = 1. / max(1.,xmax); + temp = acoefa * work[j] + bcoefa * work[*n + j]; + if (il2by2) { +/* Computing MAX */ + d__1 = temp, d__2 = acoefa * work[j + 1] + bcoefa * + work[*n + j + 1]; + temp = max(d__1,d__2); + } +/* Computing MAX */ + d__1 = max(temp,acoefa); + temp = max(d__1,bcoefa); + if (temp > bignum * xscale) { + + i__1 = nw - 1; + for (jw = 0; jw <= i__1; ++jw) { + i__2 = je; + for (jr = 1; jr <= i__2; ++jr) { + work[(jw + 2) * *n + jr] = xscale * work[(jw + + 2) * *n + jr]; +/* L320: */ + } +/* L330: */ + } + xmax *= xscale; + } + +/* Compute the contributions of the off-diagonals of */ +/* column j (and j+1, if 2-by-2 block) of A and B to the */ +/* sums. */ + + + i__1 = na; + for (ja = 1; ja <= i__1; ++ja) { + if (ilcplx) { + creala = acoef * work[(*n << 1) + j + ja - 1]; + cimaga = acoef * work[*n * 3 + j + ja - 1]; + crealb = bcoefr * work[(*n << 1) + j + ja - 1] - + bcoefi * work[*n * 3 + j + ja - 1]; + cimagb = bcoefi * work[(*n << 1) + j + ja - 1] + + bcoefr * work[*n * 3 + j + ja - 1]; + i__2 = j - 1; + for (jr = 1; jr <= i__2; ++jr) { + work[(*n << 1) + jr] = work[(*n << 1) + jr] - + creala * s[jr + (j + ja - 1) * s_dim1] + + crealb * p[jr + (j + ja - 1) * + p_dim1]; + work[*n * 3 + jr] = work[*n * 3 + jr] - + cimaga * s[jr + (j + ja - 1) * s_dim1] + + cimagb * p[jr + (j + ja - 1) * + p_dim1]; +/* L340: */ + } + } else { + creala = acoef * work[(*n << 1) + j + ja - 1]; + crealb = bcoefr * work[(*n << 1) + j + ja - 1]; + i__2 = j - 1; + for (jr = 1; jr <= i__2; ++jr) { + work[(*n << 1) + jr] = work[(*n << 1) + jr] - + creala * s[jr + (j + ja - 1) * s_dim1] + + crealb * p[jr + (j + ja - 1) * + p_dim1]; +/* L350: */ + } + } +/* L360: */ + } + } + + il2by2 = FALSE_; +L370: + ; + } + +/* Copy eigenvector to VR, back transforming if */ +/* HOWMNY='B'. */ + + ieig -= nw; + if (ilback) { + + i__1 = nw - 1; + for (jw = 0; jw <= i__1; ++jw) { + i__2 = *n; + for (jr = 1; jr <= i__2; ++jr) { + work[(jw + 4) * *n + jr] = work[(jw + 2) * *n + 1] * + vr[jr + vr_dim1]; +/* L380: */ + } + +/* A series of compiler directives to defeat */ +/* vectorization for the next loop */ + + + i__2 = je; + for (jc = 2; jc <= i__2; ++jc) { + i__3 = *n; + for (jr = 1; jr <= i__3; ++jr) { + work[(jw + 4) * *n + jr] += work[(jw + 2) * *n + + jc] * vr[jr + jc * vr_dim1]; +/* L390: */ + } +/* L400: */ + } +/* L410: */ + } + + i__1 = nw - 1; + for (jw = 0; jw <= i__1; ++jw) { + i__2 = *n; + for (jr = 1; jr <= i__2; ++jr) { + vr[jr + (ieig + jw) * vr_dim1] = work[(jw + 4) * *n + + jr]; +/* L420: */ + } +/* L430: */ + } + + iend = *n; + } else { + i__1 = nw - 1; + for (jw = 0; jw <= i__1; ++jw) { + i__2 = *n; + for (jr = 1; jr <= i__2; ++jr) { + vr[jr + (ieig + jw) * vr_dim1] = work[(jw + 2) * *n + + jr]; +/* L440: */ + } +/* L450: */ + } + + iend = je; + } + +/* Scale eigenvector */ + + xmax = 0.; + if (ilcplx) { + i__1 = iend; + for (j = 1; j <= i__1; ++j) { +/* Computing MAX */ + d__3 = xmax, d__4 = (d__1 = vr[j + ieig * vr_dim1], abs( + d__1)) + (d__2 = vr[j + (ieig + 1) * vr_dim1], + abs(d__2)); + xmax = max(d__3,d__4); +/* L460: */ + } + } else { + i__1 = iend; + for (j = 1; j <= i__1; ++j) { +/* Computing MAX */ + d__2 = xmax, d__3 = (d__1 = vr[j + ieig * vr_dim1], abs( + d__1)); + xmax = max(d__2,d__3); +/* L470: */ + } + } + + if (xmax > safmin) { + xscale = 1. / xmax; + i__1 = nw - 1; + for (jw = 0; jw <= i__1; ++jw) { + i__2 = iend; + for (jr = 1; jr <= i__2; ++jr) { + vr[jr + (ieig + jw) * vr_dim1] = xscale * vr[jr + ( + ieig + jw) * vr_dim1]; +/* L480: */ + } +/* L490: */ + } + } +L500: + ; + } + } + + return 0; + +/* End of DTGEVC */ + +} /* _starpu_dtgevc_ */ diff --git a/min-dgels/base/SRC/dtgex2.c b/min-dgels/base/SRC/dtgex2.c new file mode 100644 index 0000000..a5280b0 --- /dev/null +++ b/min-dgels/base/SRC/dtgex2.c @@ -0,0 +1,711 @@ +/* dtgex2.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__4 = 4; +static doublereal c_b5 = 0.; +static integer c__1 = 1; +static integer c__2 = 2; +static doublereal c_b42 = 1.; +static doublereal c_b48 = -1.; +static integer c__0 = 0; + +/* Subroutine */ int _starpu_dtgex2_(logical *wantq, logical *wantz, integer *n, + doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal * + q, integer *ldq, doublereal *z__, integer *ldz, integer *j1, integer * + n1, integer *n2, doublereal *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, q_dim1, q_offset, z_dim1, + z_offset, i__1, i__2; + doublereal d__1; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + doublereal f, g; + integer i__, m; + doublereal s[16] /* was [4][4] */, t[16] /* was [4][4] */, be[2], ai[2] + , ar[2], sa, sb, li[16] /* was [4][4] */, ir[16] /* + was [4][4] */, ss, ws, eps; + logical weak; + doublereal ddum; + integer idum; + doublereal taul[4], dsum; + extern /* Subroutine */ int _starpu_drot_(integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, doublereal *); + doublereal taur[4], scpy[16] /* was [4][4] */, tcpy[16] /* + was [4][4] */; + extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, + integer *); + doublereal scale, bqra21, brqa21; + extern /* Subroutine */ int _starpu_dgemm_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *); + doublereal licop[16] /* was [4][4] */; + integer linfo; + doublereal ircop[16] /* was [4][4] */, dnorm; + integer iwork[4]; + extern /* Subroutine */ int _starpu_dlagv2_(doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, doublereal *, doublereal * +, doublereal *, doublereal *, doublereal *), _starpu_dgeqr2_(integer *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + integer *), _starpu_dgerq2_(integer *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *), _starpu_dorg2r_(integer *, + integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *), _starpu_dorgr2_(integer *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *), + _starpu_dorm2r_(char *, char *, integer *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *), _starpu_dormr2_(char *, char *, + integer *, integer *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *), _starpu_dtgsy2_(char *, integer *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *, doublereal *, integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, doublereal *, doublereal *, + integer *, integer *, integer *); + extern doublereal _starpu_dlamch_(char *); + doublereal dscale; + extern /* Subroutine */ int _starpu_dlacpy_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *), + _starpu_dlartg_(doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *), _starpu_dlaset_(char *, integer *, integer *, doublereal *, + doublereal *, doublereal *, integer *), _starpu_dlassq_(integer * +, doublereal *, integer *, doublereal *, doublereal *); + logical dtrong; + doublereal thresh, smlnum; + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DTGEX2 swaps adjacent diagonal blocks (A11, B11) and (A22, B22) */ +/* of size 1-by-1 or 2-by-2 in an upper (quasi) triangular matrix pair */ +/* (A, B) by an orthogonal equivalence transformation. */ + +/* (A, B) must be in generalized real Schur canonical form (as returned */ +/* by DGGES), i.e. A is block upper triangular with 1-by-1 and 2-by-2 */ +/* diagonal blocks. B is upper triangular. */ + +/* Optionally, the matrices Q and Z of generalized Schur vectors are */ +/* updated. */ + +/* Q(in) * A(in) * Z(in)' = Q(out) * A(out) * Z(out)' */ +/* Q(in) * B(in) * Z(in)' = Q(out) * B(out) * Z(out)' */ + + +/* Arguments */ +/* ========= */ + +/* WANTQ (input) LOGICAL */ +/* .TRUE. : update the left transformation matrix Q; */ +/* .FALSE.: do not update Q. */ + +/* WANTZ (input) LOGICAL */ +/* .TRUE. : update the right transformation matrix Z; */ +/* .FALSE.: do not update Z. */ + +/* N (input) INTEGER */ +/* The order of the matrices A and B. N >= 0. */ + +/* A (input/output) DOUBLE PRECISION arrays, dimensions (LDA,N) */ +/* On entry, the matrix A in the pair (A, B). */ +/* On exit, the updated matrix A. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,N). */ + +/* B (input/output) DOUBLE PRECISION arrays, dimensions (LDB,N) */ +/* On entry, the matrix B in the pair (A, B). */ +/* On exit, the updated matrix B. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the array B. LDB >= max(1,N). */ + +/* Q (input/output) DOUBLE PRECISION array, dimension (LDZ,N) */ +/* On entry, if WANTQ = .TRUE., the orthogonal matrix Q. */ +/* On exit, the updated matrix Q. */ +/* Not referenced if WANTQ = .FALSE.. */ + +/* LDQ (input) INTEGER */ +/* The leading dimension of the array Q. LDQ >= 1. */ +/* If WANTQ = .TRUE., LDQ >= N. */ + +/* Z (input/output) DOUBLE PRECISION array, dimension (LDZ,N) */ +/* On entry, if WANTZ =.TRUE., the orthogonal matrix Z. */ +/* On exit, the updated matrix Z. */ +/* Not referenced if WANTZ = .FALSE.. */ + +/* LDZ (input) INTEGER */ +/* The leading dimension of the array Z. LDZ >= 1. */ +/* If WANTZ = .TRUE., LDZ >= N. */ + +/* J1 (input) INTEGER */ +/* The index to the first block (A11, B11). 1 <= J1 <= N. */ + +/* N1 (input) INTEGER */ +/* The order of the first block (A11, B11). N1 = 0, 1 or 2. */ + +/* N2 (input) INTEGER */ +/* The order of the second block (A22, B22). N2 = 0, 1 or 2. */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (MAX(1,LWORK)). */ + +/* LWORK (input) INTEGER */ +/* The dimension of the array WORK. */ +/* LWORK >= MAX( 1, N*(N2+N1), (N2+N1)*(N2+N1)*2 ) */ + +/* INFO (output) INTEGER */ +/* =0: Successful exit */ +/* >0: If INFO = 1, the transformed matrix (A, B) would be */ +/* too far from generalized Schur form; the blocks are */ +/* not swapped and (A, B) and (Q, Z) are unchanged. */ +/* The problem of swapping is too ill-conditioned. */ +/* <0: If INFO = -16: LWORK is too small. Appropriate value */ +/* for LWORK is returned in WORK(1). */ + +/* Further Details */ +/* =============== */ + +/* Based on contributions by */ +/* Bo Kagstrom and Peter Poromaa, Department of Computing Science, */ +/* Umea University, S-901 87 Umea, Sweden. */ + +/* In the current code both weak and strong stability tests are */ +/* performed. The user can omit the strong stability test by changing */ +/* the internal logical parameter WANDS to .FALSE.. See ref. [2] for */ +/* details. */ + +/* [1] B. Kagstrom; A Direct Method for Reordering Eigenvalues in the */ +/* Generalized Real Schur Form of a Regular Matrix Pair (A, B), in */ +/* M.S. Moonen et al (eds), Linear Algebra for Large Scale and */ +/* Real-Time Applications, Kluwer Academic Publ. 1993, pp 195-218. */ + +/* [2] B. Kagstrom and P. Poromaa; Computing Eigenspaces with Specified */ +/* Eigenvalues of a Regular Matrix Pair (A, B) and Condition */ +/* Estimation: Theory, Algorithms and Software, */ +/* Report UMINF - 94.04, Department of Computing Science, Umea */ +/* University, S-901 87 Umea, Sweden, 1994. Also as LAPACK Working */ +/* Note 87. To appear in Numerical Algorithms, 1996. */ + +/* ===================================================================== */ +/* Replaced various illegal calls to DCOPY by calls to DLASET, or by DO */ +/* loops. Sven Hammarling, 1/5/02. */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --work; + + /* Function Body */ + *info = 0; + +/* Quick return if possible */ + + if (*n <= 1 || *n1 <= 0 || *n2 <= 0) { + return 0; + } + if (*n1 > *n || *j1 + *n1 > *n) { + return 0; + } + m = *n1 + *n2; +/* Computing MAX */ + i__1 = 1, i__2 = *n * m, i__1 = max(i__1,i__2), i__2 = m * m << 1; + if (*lwork < max(i__1,i__2)) { + *info = -16; +/* Computing MAX */ + i__1 = 1, i__2 = *n * m, i__1 = max(i__1,i__2), i__2 = m * m << 1; + work[1] = (doublereal) max(i__1,i__2); + return 0; + } + + weak = FALSE_; + dtrong = FALSE_; + +/* Make a local copy of selected block */ + + _starpu_dlaset_("Full", &c__4, &c__4, &c_b5, &c_b5, li, &c__4); + _starpu_dlaset_("Full", &c__4, &c__4, &c_b5, &c_b5, ir, &c__4); + _starpu_dlacpy_("Full", &m, &m, &a[*j1 + *j1 * a_dim1], lda, s, &c__4); + _starpu_dlacpy_("Full", &m, &m, &b[*j1 + *j1 * b_dim1], ldb, t, &c__4); + +/* Compute threshold for testing acceptance of swapping. */ + + eps = _starpu_dlamch_("P"); + smlnum = _starpu_dlamch_("S") / eps; + dscale = 0.; + dsum = 1.; + _starpu_dlacpy_("Full", &m, &m, s, &c__4, &work[1], &m); + i__1 = m * m; + _starpu_dlassq_(&i__1, &work[1], &c__1, &dscale, &dsum); + _starpu_dlacpy_("Full", &m, &m, t, &c__4, &work[1], &m); + i__1 = m * m; + _starpu_dlassq_(&i__1, &work[1], &c__1, &dscale, &dsum); + dnorm = dscale * sqrt(dsum); +/* Computing MAX */ + d__1 = eps * 10. * dnorm; + thresh = max(d__1,smlnum); + + if (m == 2) { + +/* CASE 1: Swap 1-by-1 and 1-by-1 blocks. */ + +/* Compute orthogonal QL and RQ that swap 1-by-1 and 1-by-1 blocks */ +/* using Givens rotations and perform the swap tentatively. */ + + f = s[5] * t[0] - t[5] * s[0]; + g = s[5] * t[4] - t[5] * s[4]; + sb = abs(t[5]); + sa = abs(s[5]); + _starpu_dlartg_(&f, &g, &ir[4], ir, &ddum); + ir[1] = -ir[4]; + ir[5] = ir[0]; + _starpu_drot_(&c__2, s, &c__1, &s[4], &c__1, ir, &ir[1]); + _starpu_drot_(&c__2, t, &c__1, &t[4], &c__1, ir, &ir[1]); + if (sa >= sb) { + _starpu_dlartg_(s, &s[1], li, &li[1], &ddum); + } else { + _starpu_dlartg_(t, &t[1], li, &li[1], &ddum); + } + _starpu_drot_(&c__2, s, &c__4, &s[1], &c__4, li, &li[1]); + _starpu_drot_(&c__2, t, &c__4, &t[1], &c__4, li, &li[1]); + li[5] = li[0]; + li[4] = -li[1]; + +/* Weak stability test: */ +/* |S21| + |T21| <= O(EPS * F-norm((S, T))) */ + + ws = abs(s[1]) + abs(t[1]); + weak = ws <= thresh; + if (! weak) { + goto L70; + } + + if (TRUE_) { + +/* Strong stability test: */ +/* F-norm((A-QL'*S*QR, B-QL'*T*QR)) <= O(EPS*F-norm((A,B))) */ + + _starpu_dlacpy_("Full", &m, &m, &a[*j1 + *j1 * a_dim1], lda, &work[m * m + + 1], &m); + _starpu_dgemm_("N", "N", &m, &m, &m, &c_b42, li, &c__4, s, &c__4, &c_b5, & + work[1], &m); + _starpu_dgemm_("N", "T", &m, &m, &m, &c_b48, &work[1], &m, ir, &c__4, & + c_b42, &work[m * m + 1], &m); + dscale = 0.; + dsum = 1.; + i__1 = m * m; + _starpu_dlassq_(&i__1, &work[m * m + 1], &c__1, &dscale, &dsum); + + _starpu_dlacpy_("Full", &m, &m, &b[*j1 + *j1 * b_dim1], ldb, &work[m * m + + 1], &m); + _starpu_dgemm_("N", "N", &m, &m, &m, &c_b42, li, &c__4, t, &c__4, &c_b5, & + work[1], &m); + _starpu_dgemm_("N", "T", &m, &m, &m, &c_b48, &work[1], &m, ir, &c__4, & + c_b42, &work[m * m + 1], &m); + i__1 = m * m; + _starpu_dlassq_(&i__1, &work[m * m + 1], &c__1, &dscale, &dsum); + ss = dscale * sqrt(dsum); + dtrong = ss <= thresh; + if (! dtrong) { + goto L70; + } + } + +/* Update (A(J1:J1+M-1, M+J1:N), B(J1:J1+M-1, M+J1:N)) and */ +/* (A(1:J1-1, J1:J1+M), B(1:J1-1, J1:J1+M)). */ + + i__1 = *j1 + 1; + _starpu_drot_(&i__1, &a[*j1 * a_dim1 + 1], &c__1, &a[(*j1 + 1) * a_dim1 + 1], + &c__1, ir, &ir[1]); + i__1 = *j1 + 1; + _starpu_drot_(&i__1, &b[*j1 * b_dim1 + 1], &c__1, &b[(*j1 + 1) * b_dim1 + 1], + &c__1, ir, &ir[1]); + i__1 = *n - *j1 + 1; + _starpu_drot_(&i__1, &a[*j1 + *j1 * a_dim1], lda, &a[*j1 + 1 + *j1 * a_dim1], + lda, li, &li[1]); + i__1 = *n - *j1 + 1; + _starpu_drot_(&i__1, &b[*j1 + *j1 * b_dim1], ldb, &b[*j1 + 1 + *j1 * b_dim1], + ldb, li, &li[1]); + +/* Set N1-by-N2 (2,1) - blocks to ZERO. */ + + a[*j1 + 1 + *j1 * a_dim1] = 0.; + b[*j1 + 1 + *j1 * b_dim1] = 0.; + +/* Accumulate transformations into Q and Z if requested. */ + + if (*wantz) { + _starpu_drot_(n, &z__[*j1 * z_dim1 + 1], &c__1, &z__[(*j1 + 1) * z_dim1 + + 1], &c__1, ir, &ir[1]); + } + if (*wantq) { + _starpu_drot_(n, &q[*j1 * q_dim1 + 1], &c__1, &q[(*j1 + 1) * q_dim1 + 1], + &c__1, li, &li[1]); + } + +/* Exit with INFO = 0 if swap was successfully performed. */ + + return 0; + + } else { + +/* CASE 2: Swap 1-by-1 and 2-by-2 blocks, or 2-by-2 */ +/* and 2-by-2 blocks. */ + +/* Solve the generalized Sylvester equation */ +/* S11 * R - L * S22 = SCALE * S12 */ +/* T11 * R - L * T22 = SCALE * T12 */ +/* for R and L. Solutions in LI and IR. */ + + _starpu_dlacpy_("Full", n1, n2, &t[(*n1 + 1 << 2) - 4], &c__4, li, &c__4); + _starpu_dlacpy_("Full", n1, n2, &s[(*n1 + 1 << 2) - 4], &c__4, &ir[*n2 + 1 + ( + *n1 + 1 << 2) - 5], &c__4); + _starpu_dtgsy2_("N", &c__0, n1, n2, s, &c__4, &s[*n1 + 1 + (*n1 + 1 << 2) - 5] +, &c__4, &ir[*n2 + 1 + (*n1 + 1 << 2) - 5], &c__4, t, &c__4, & + t[*n1 + 1 + (*n1 + 1 << 2) - 5], &c__4, li, &c__4, &scale, & + dsum, &dscale, iwork, &idum, &linfo); + +/* Compute orthogonal matrix QL: */ + +/* QL' * LI = [ TL ] */ +/* [ 0 ] */ +/* where */ +/* LI = [ -L ] */ +/* [ SCALE * identity(N2) ] */ + + i__1 = *n2; + for (i__ = 1; i__ <= i__1; ++i__) { + _starpu_dscal_(n1, &c_b48, &li[(i__ << 2) - 4], &c__1); + li[*n1 + i__ + (i__ << 2) - 5] = scale; +/* L10: */ + } + _starpu_dgeqr2_(&m, n2, li, &c__4, taul, &work[1], &linfo); + if (linfo != 0) { + goto L70; + } + _starpu_dorg2r_(&m, &m, n2, li, &c__4, taul, &work[1], &linfo); + if (linfo != 0) { + goto L70; + } + +/* Compute orthogonal matrix RQ: */ + +/* IR * RQ' = [ 0 TR], */ + +/* where IR = [ SCALE * identity(N1), R ] */ + + i__1 = *n1; + for (i__ = 1; i__ <= i__1; ++i__) { + ir[*n2 + i__ + (i__ << 2) - 5] = scale; +/* L20: */ + } + _starpu_dgerq2_(n1, &m, &ir[*n2], &c__4, taur, &work[1], &linfo); + if (linfo != 0) { + goto L70; + } + _starpu_dorgr2_(&m, &m, n1, ir, &c__4, taur, &work[1], &linfo); + if (linfo != 0) { + goto L70; + } + +/* Perform the swapping tentatively: */ + + _starpu_dgemm_("T", "N", &m, &m, &m, &c_b42, li, &c__4, s, &c__4, &c_b5, & + work[1], &m); + _starpu_dgemm_("N", "T", &m, &m, &m, &c_b42, &work[1], &m, ir, &c__4, &c_b5, + s, &c__4); + _starpu_dgemm_("T", "N", &m, &m, &m, &c_b42, li, &c__4, t, &c__4, &c_b5, & + work[1], &m); + _starpu_dgemm_("N", "T", &m, &m, &m, &c_b42, &work[1], &m, ir, &c__4, &c_b5, + t, &c__4); + _starpu_dlacpy_("F", &m, &m, s, &c__4, scpy, &c__4); + _starpu_dlacpy_("F", &m, &m, t, &c__4, tcpy, &c__4); + _starpu_dlacpy_("F", &m, &m, ir, &c__4, ircop, &c__4); + _starpu_dlacpy_("F", &m, &m, li, &c__4, licop, &c__4); + +/* Triangularize the B-part by an RQ factorization. */ +/* Apply transformation (from left) to A-part, giving S. */ + + _starpu_dgerq2_(&m, &m, t, &c__4, taur, &work[1], &linfo); + if (linfo != 0) { + goto L70; + } + _starpu_dormr2_("R", "T", &m, &m, &m, t, &c__4, taur, s, &c__4, &work[1], & + linfo); + if (linfo != 0) { + goto L70; + } + _starpu_dormr2_("L", "N", &m, &m, &m, t, &c__4, taur, ir, &c__4, &work[1], & + linfo); + if (linfo != 0) { + goto L70; + } + +/* Compute F-norm(S21) in BRQA21. (T21 is 0.) */ + + dscale = 0.; + dsum = 1.; + i__1 = *n2; + for (i__ = 1; i__ <= i__1; ++i__) { + _starpu_dlassq_(n1, &s[*n2 + 1 + (i__ << 2) - 5], &c__1, &dscale, &dsum); +/* L30: */ + } + brqa21 = dscale * sqrt(dsum); + +/* Triangularize the B-part by a QR factorization. */ +/* Apply transformation (from right) to A-part, giving S. */ + + _starpu_dgeqr2_(&m, &m, tcpy, &c__4, taul, &work[1], &linfo); + if (linfo != 0) { + goto L70; + } + _starpu_dorm2r_("L", "T", &m, &m, &m, tcpy, &c__4, taul, scpy, &c__4, &work[1] +, info); + _starpu_dorm2r_("R", "N", &m, &m, &m, tcpy, &c__4, taul, licop, &c__4, &work[ + 1], info); + if (linfo != 0) { + goto L70; + } + +/* Compute F-norm(S21) in BQRA21. (T21 is 0.) */ + + dscale = 0.; + dsum = 1.; + i__1 = *n2; + for (i__ = 1; i__ <= i__1; ++i__) { + _starpu_dlassq_(n1, &scpy[*n2 + 1 + (i__ << 2) - 5], &c__1, &dscale, & + dsum); +/* L40: */ + } + bqra21 = dscale * sqrt(dsum); + +/* Decide which method to use. */ +/* Weak stability test: */ +/* F-norm(S21) <= O(EPS * F-norm((S, T))) */ + + if (bqra21 <= brqa21 && bqra21 <= thresh) { + _starpu_dlacpy_("F", &m, &m, scpy, &c__4, s, &c__4); + _starpu_dlacpy_("F", &m, &m, tcpy, &c__4, t, &c__4); + _starpu_dlacpy_("F", &m, &m, ircop, &c__4, ir, &c__4); + _starpu_dlacpy_("F", &m, &m, licop, &c__4, li, &c__4); + } else if (brqa21 >= thresh) { + goto L70; + } + +/* Set lower triangle of B-part to zero */ + + i__1 = m - 1; + i__2 = m - 1; + _starpu_dlaset_("Lower", &i__1, &i__2, &c_b5, &c_b5, &t[1], &c__4); + + if (TRUE_) { + +/* Strong stability test: */ +/* F-norm((A-QL*S*QR', B-QL*T*QR')) <= O(EPS*F-norm((A,B))) */ + + _starpu_dlacpy_("Full", &m, &m, &a[*j1 + *j1 * a_dim1], lda, &work[m * m + + 1], &m); + _starpu_dgemm_("N", "N", &m, &m, &m, &c_b42, li, &c__4, s, &c__4, &c_b5, & + work[1], &m); + _starpu_dgemm_("N", "N", &m, &m, &m, &c_b48, &work[1], &m, ir, &c__4, & + c_b42, &work[m * m + 1], &m); + dscale = 0.; + dsum = 1.; + i__1 = m * m; + _starpu_dlassq_(&i__1, &work[m * m + 1], &c__1, &dscale, &dsum); + + _starpu_dlacpy_("Full", &m, &m, &b[*j1 + *j1 * b_dim1], ldb, &work[m * m + + 1], &m); + _starpu_dgemm_("N", "N", &m, &m, &m, &c_b42, li, &c__4, t, &c__4, &c_b5, & + work[1], &m); + _starpu_dgemm_("N", "N", &m, &m, &m, &c_b48, &work[1], &m, ir, &c__4, & + c_b42, &work[m * m + 1], &m); + i__1 = m * m; + _starpu_dlassq_(&i__1, &work[m * m + 1], &c__1, &dscale, &dsum); + ss = dscale * sqrt(dsum); + dtrong = ss <= thresh; + if (! dtrong) { + goto L70; + } + + } + +/* If the swap is accepted ("weakly" and "strongly"), apply the */ +/* transformations and set N1-by-N2 (2,1)-block to zero. */ + + _starpu_dlaset_("Full", n1, n2, &c_b5, &c_b5, &s[*n2], &c__4); + +/* copy back M-by-M diagonal block starting at index J1 of (A, B) */ + + _starpu_dlacpy_("F", &m, &m, s, &c__4, &a[*j1 + *j1 * a_dim1], lda) + ; + _starpu_dlacpy_("F", &m, &m, t, &c__4, &b[*j1 + *j1 * b_dim1], ldb) + ; + _starpu_dlaset_("Full", &c__4, &c__4, &c_b5, &c_b5, t, &c__4); + +/* Standardize existing 2-by-2 blocks. */ + + i__1 = m * m; + for (i__ = 1; i__ <= i__1; ++i__) { + work[i__] = 0.; +/* L50: */ + } + work[1] = 1.; + t[0] = 1.; + idum = *lwork - m * m - 2; + if (*n2 > 1) { + _starpu_dlagv2_(&a[*j1 + *j1 * a_dim1], lda, &b[*j1 + *j1 * b_dim1], ldb, + ar, ai, be, &work[1], &work[2], t, &t[1]); + work[m + 1] = -work[2]; + work[m + 2] = work[1]; + t[*n2 + (*n2 << 2) - 5] = t[0]; + t[4] = -t[1]; + } + work[m * m] = 1.; + t[m + (m << 2) - 5] = 1.; + + if (*n1 > 1) { + _starpu_dlagv2_(&a[*j1 + *n2 + (*j1 + *n2) * a_dim1], lda, &b[*j1 + *n2 + + (*j1 + *n2) * b_dim1], ldb, taur, taul, &work[m * m + 1], + &work[*n2 * m + *n2 + 1], &work[*n2 * m + *n2 + 2], &t[* + n2 + 1 + (*n2 + 1 << 2) - 5], &t[m + (m - 1 << 2) - 5]); + work[m * m] = work[*n2 * m + *n2 + 1]; + work[m * m - 1] = -work[*n2 * m + *n2 + 2]; + t[m + (m << 2) - 5] = t[*n2 + 1 + (*n2 + 1 << 2) - 5]; + t[m - 1 + (m << 2) - 5] = -t[m + (m - 1 << 2) - 5]; + } + _starpu_dgemm_("T", "N", n2, n1, n2, &c_b42, &work[1], &m, &a[*j1 + (*j1 + * + n2) * a_dim1], lda, &c_b5, &work[m * m + 1], n2); + _starpu_dlacpy_("Full", n2, n1, &work[m * m + 1], n2, &a[*j1 + (*j1 + *n2) * + a_dim1], lda); + _starpu_dgemm_("T", "N", n2, n1, n2, &c_b42, &work[1], &m, &b[*j1 + (*j1 + * + n2) * b_dim1], ldb, &c_b5, &work[m * m + 1], n2); + _starpu_dlacpy_("Full", n2, n1, &work[m * m + 1], n2, &b[*j1 + (*j1 + *n2) * + b_dim1], ldb); + _starpu_dgemm_("N", "N", &m, &m, &m, &c_b42, li, &c__4, &work[1], &m, &c_b5, & + work[m * m + 1], &m); + _starpu_dlacpy_("Full", &m, &m, &work[m * m + 1], &m, li, &c__4); + _starpu_dgemm_("N", "N", n2, n1, n1, &c_b42, &a[*j1 + (*j1 + *n2) * a_dim1], + lda, &t[*n2 + 1 + (*n2 + 1 << 2) - 5], &c__4, &c_b5, &work[1], + n2); + _starpu_dlacpy_("Full", n2, n1, &work[1], n2, &a[*j1 + (*j1 + *n2) * a_dim1], + lda); + _starpu_dgemm_("N", "N", n2, n1, n1, &c_b42, &b[*j1 + (*j1 + *n2) * b_dim1], + ldb, &t[*n2 + 1 + (*n2 + 1 << 2) - 5], &c__4, &c_b5, &work[1], + n2); + _starpu_dlacpy_("Full", n2, n1, &work[1], n2, &b[*j1 + (*j1 + *n2) * b_dim1], + ldb); + _starpu_dgemm_("T", "N", &m, &m, &m, &c_b42, ir, &c__4, t, &c__4, &c_b5, & + work[1], &m); + _starpu_dlacpy_("Full", &m, &m, &work[1], &m, ir, &c__4); + +/* Accumulate transformations into Q and Z if requested. */ + + if (*wantq) { + _starpu_dgemm_("N", "N", n, &m, &m, &c_b42, &q[*j1 * q_dim1 + 1], ldq, li, + &c__4, &c_b5, &work[1], n); + _starpu_dlacpy_("Full", n, &m, &work[1], n, &q[*j1 * q_dim1 + 1], ldq); + + } + + if (*wantz) { + _starpu_dgemm_("N", "N", n, &m, &m, &c_b42, &z__[*j1 * z_dim1 + 1], ldz, + ir, &c__4, &c_b5, &work[1], n); + _starpu_dlacpy_("Full", n, &m, &work[1], n, &z__[*j1 * z_dim1 + 1], ldz); + + } + +/* Update (A(J1:J1+M-1, M+J1:N), B(J1:J1+M-1, M+J1:N)) and */ +/* (A(1:J1-1, J1:J1+M), B(1:J1-1, J1:J1+M)). */ + + i__ = *j1 + m; + if (i__ <= *n) { + i__1 = *n - i__ + 1; + _starpu_dgemm_("T", "N", &m, &i__1, &m, &c_b42, li, &c__4, &a[*j1 + i__ * + a_dim1], lda, &c_b5, &work[1], &m); + i__1 = *n - i__ + 1; + _starpu_dlacpy_("Full", &m, &i__1, &work[1], &m, &a[*j1 + i__ * a_dim1], + lda); + i__1 = *n - i__ + 1; + _starpu_dgemm_("T", "N", &m, &i__1, &m, &c_b42, li, &c__4, &b[*j1 + i__ * + b_dim1], lda, &c_b5, &work[1], &m); + i__1 = *n - i__ + 1; + _starpu_dlacpy_("Full", &m, &i__1, &work[1], &m, &b[*j1 + i__ * b_dim1], + ldb); + } + i__ = *j1 - 1; + if (i__ > 0) { + _starpu_dgemm_("N", "N", &i__, &m, &m, &c_b42, &a[*j1 * a_dim1 + 1], lda, + ir, &c__4, &c_b5, &work[1], &i__); + _starpu_dlacpy_("Full", &i__, &m, &work[1], &i__, &a[*j1 * a_dim1 + 1], + lda); + _starpu_dgemm_("N", "N", &i__, &m, &m, &c_b42, &b[*j1 * b_dim1 + 1], ldb, + ir, &c__4, &c_b5, &work[1], &i__); + _starpu_dlacpy_("Full", &i__, &m, &work[1], &i__, &b[*j1 * b_dim1 + 1], + ldb); + } + +/* Exit with INFO = 0 if swap was successfully performed. */ + + return 0; + + } + +/* Exit with INFO = 1 if swap was rejected. */ + +L70: + + *info = 1; + return 0; + +/* End of DTGEX2 */ + +} /* _starpu_dtgex2_ */ diff --git a/min-dgels/base/SRC/dtgexc.c b/min-dgels/base/SRC/dtgexc.c new file mode 100644 index 0000000..957e444 --- /dev/null +++ b/min-dgels/base/SRC/dtgexc.c @@ -0,0 +1,514 @@ +/* dtgexc.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c__2 = 2; + +/* Subroutine */ int _starpu_dtgexc_(logical *wantq, logical *wantz, integer *n, + doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal * + q, integer *ldq, doublereal *z__, integer *ldz, integer *ifst, + integer *ilst, doublereal *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, q_dim1, q_offset, z_dim1, + z_offset, i__1; + + /* Local variables */ + integer nbf, nbl, here, lwmin; + extern /* Subroutine */ int _starpu_dtgex2_(logical *, logical *, integer *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *, doublereal *, integer *, integer *, integer *, integer + *, doublereal *, integer *, integer *), _starpu_xerbla_(char *, integer *); + integer nbnext; + logical lquery; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DTGEXC reorders the generalized real Schur decomposition of a real */ +/* matrix pair (A,B) using an orthogonal equivalence transformation */ + +/* (A, B) = Q * (A, B) * Z', */ + +/* so that the diagonal block of (A, B) with row index IFST is moved */ +/* to row ILST. */ + +/* (A, B) must be in generalized real Schur canonical form (as returned */ +/* by DGGES), i.e. A is block upper triangular with 1-by-1 and 2-by-2 */ +/* diagonal blocks. B is upper triangular. */ + +/* Optionally, the matrices Q and Z of generalized Schur vectors are */ +/* updated. */ + +/* Q(in) * A(in) * Z(in)' = Q(out) * A(out) * Z(out)' */ +/* Q(in) * B(in) * Z(in)' = Q(out) * B(out) * Z(out)' */ + + +/* Arguments */ +/* ========= */ + +/* WANTQ (input) LOGICAL */ +/* .TRUE. : update the left transformation matrix Q; */ +/* .FALSE.: do not update Q. */ + +/* WANTZ (input) LOGICAL */ +/* .TRUE. : update the right transformation matrix Z; */ +/* .FALSE.: do not update Z. */ + +/* N (input) INTEGER */ +/* The order of the matrices A and B. N >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the matrix A in generalized real Schur canonical */ +/* form. */ +/* On exit, the updated matrix A, again in generalized */ +/* real Schur canonical form. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,N). */ + +/* B (input/output) DOUBLE PRECISION array, dimension (LDB,N) */ +/* On entry, the matrix B in generalized real Schur canonical */ +/* form (A,B). */ +/* On exit, the updated matrix B, again in generalized */ +/* real Schur canonical form (A,B). */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the array B. LDB >= max(1,N). */ + +/* Q (input/output) DOUBLE PRECISION array, dimension (LDZ,N) */ +/* On entry, if WANTQ = .TRUE., the orthogonal matrix Q. */ +/* On exit, the updated matrix Q. */ +/* If WANTQ = .FALSE., Q is not referenced. */ + +/* LDQ (input) INTEGER */ +/* The leading dimension of the array Q. LDQ >= 1. */ +/* If WANTQ = .TRUE., LDQ >= N. */ + +/* Z (input/output) DOUBLE PRECISION array, dimension (LDZ,N) */ +/* On entry, if WANTZ = .TRUE., the orthogonal matrix Z. */ +/* On exit, the updated matrix Z. */ +/* If WANTZ = .FALSE., Z is not referenced. */ + +/* LDZ (input) INTEGER */ +/* The leading dimension of the array Z. LDZ >= 1. */ +/* If WANTZ = .TRUE., LDZ >= N. */ + +/* IFST (input/output) INTEGER */ +/* ILST (input/output) INTEGER */ +/* Specify the reordering of the diagonal blocks of (A, B). */ +/* The block with row index IFST is moved to row ILST, by a */ +/* sequence of swapping between adjacent blocks. */ +/* On exit, if IFST pointed on entry to the second row of */ +/* a 2-by-2 block, it is changed to point to the first row; */ +/* ILST always points to the first row of the block in its */ +/* final position (which may differ from its input value by */ +/* +1 or -1). 1 <= IFST, ILST <= N. */ + +/* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ + +/* LWORK (input) INTEGER */ +/* The dimension of the array WORK. */ +/* LWORK >= 1 when N <= 1, otherwise LWORK >= 4*N + 16. */ + +/* If LWORK = -1, then a workspace query is assumed; the routine */ +/* only calculates the optimal size of the WORK array, returns */ +/* this value as the first entry of the WORK array, and no error */ +/* message related to LWORK is issued by XERBLA. */ + +/* INFO (output) INTEGER */ +/* =0: successful exit. */ +/* <0: if INFO = -i, the i-th argument had an illegal value. */ +/* =1: The transformed matrix pair (A, B) would be too far */ +/* from generalized Schur form; the problem is ill- */ +/* conditioned. (A, B) may have been partially reordered, */ +/* and ILST points to the first row of the current */ +/* position of the block being moved. */ + +/* Further Details */ +/* =============== */ + +/* Based on contributions by */ +/* Bo Kagstrom and Peter Poromaa, Department of Computing Science, */ +/* Umea University, S-901 87 Umea, Sweden. */ + +/* [1] B. Kagstrom; A Direct Method for Reordering Eigenvalues in the */ +/* Generalized Real Schur Form of a Regular Matrix Pair (A, B), in */ +/* M.S. Moonen et al (eds), Linear Algebra for Large Scale and */ +/* Real-Time Applications, Kluwer Academic Publ. 1993, pp 195-218. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Decode and test input arguments. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --work; + + /* Function Body */ + *info = 0; + lquery = *lwork == -1; + if (*n < 0) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } else if (*ldb < max(1,*n)) { + *info = -7; + } else if (*ldq < 1 || *wantq && *ldq < max(1,*n)) { + *info = -9; + } else if (*ldz < 1 || *wantz && *ldz < max(1,*n)) { + *info = -11; + } else if (*ifst < 1 || *ifst > *n) { + *info = -12; + } else if (*ilst < 1 || *ilst > *n) { + *info = -13; + } + + if (*info == 0) { + if (*n <= 1) { + lwmin = 1; + } else { + lwmin = (*n << 2) + 16; + } + work[1] = (doublereal) lwmin; + + if (*lwork < lwmin && ! lquery) { + *info = -15; + } + } + + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DTGEXC", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n <= 1) { + return 0; + } + +/* Determine the first row of the specified block and find out */ +/* if it is 1-by-1 or 2-by-2. */ + + if (*ifst > 1) { + if (a[*ifst + (*ifst - 1) * a_dim1] != 0.) { + --(*ifst); + } + } + nbf = 1; + if (*ifst < *n) { + if (a[*ifst + 1 + *ifst * a_dim1] != 0.) { + nbf = 2; + } + } + +/* Determine the first row of the final block */ +/* and find out if it is 1-by-1 or 2-by-2. */ + + if (*ilst > 1) { + if (a[*ilst + (*ilst - 1) * a_dim1] != 0.) { + --(*ilst); + } + } + nbl = 1; + if (*ilst < *n) { + if (a[*ilst + 1 + *ilst * a_dim1] != 0.) { + nbl = 2; + } + } + if (*ifst == *ilst) { + return 0; + } + + if (*ifst < *ilst) { + +/* Update ILST. */ + + if (nbf == 2 && nbl == 1) { + --(*ilst); + } + if (nbf == 1 && nbl == 2) { + ++(*ilst); + } + + here = *ifst; + +L10: + +/* Swap with next one below. */ + + if (nbf == 1 || nbf == 2) { + +/* Current block either 1-by-1 or 2-by-2. */ + + nbnext = 1; + if (here + nbf + 1 <= *n) { + if (a[here + nbf + 1 + (here + nbf) * a_dim1] != 0.) { + nbnext = 2; + } + } + _starpu_dtgex2_(wantq, wantz, n, &a[a_offset], lda, &b[b_offset], ldb, &q[ + q_offset], ldq, &z__[z_offset], ldz, &here, &nbf, &nbnext, + &work[1], lwork, info); + if (*info != 0) { + *ilst = here; + return 0; + } + here += nbnext; + +/* Test if 2-by-2 block breaks into two 1-by-1 blocks. */ + + if (nbf == 2) { + if (a[here + 1 + here * a_dim1] == 0.) { + nbf = 3; + } + } + + } else { + +/* Current block consists of two 1-by-1 blocks, each of which */ +/* must be swapped individually. */ + + nbnext = 1; + if (here + 3 <= *n) { + if (a[here + 3 + (here + 2) * a_dim1] != 0.) { + nbnext = 2; + } + } + i__1 = here + 1; + _starpu_dtgex2_(wantq, wantz, n, &a[a_offset], lda, &b[b_offset], ldb, &q[ + q_offset], ldq, &z__[z_offset], ldz, &i__1, &c__1, & + nbnext, &work[1], lwork, info); + if (*info != 0) { + *ilst = here; + return 0; + } + if (nbnext == 1) { + +/* Swap two 1-by-1 blocks. */ + + _starpu_dtgex2_(wantq, wantz, n, &a[a_offset], lda, &b[b_offset], ldb, + &q[q_offset], ldq, &z__[z_offset], ldz, &here, &c__1, + &c__1, &work[1], lwork, info); + if (*info != 0) { + *ilst = here; + return 0; + } + ++here; + + } else { + +/* Recompute NBNEXT in case of 2-by-2 split. */ + + if (a[here + 2 + (here + 1) * a_dim1] == 0.) { + nbnext = 1; + } + if (nbnext == 2) { + +/* 2-by-2 block did not split. */ + + _starpu_dtgex2_(wantq, wantz, n, &a[a_offset], lda, &b[b_offset], + ldb, &q[q_offset], ldq, &z__[z_offset], ldz, & + here, &c__1, &nbnext, &work[1], lwork, info); + if (*info != 0) { + *ilst = here; + return 0; + } + here += 2; + } else { + +/* 2-by-2 block did split. */ + + _starpu_dtgex2_(wantq, wantz, n, &a[a_offset], lda, &b[b_offset], + ldb, &q[q_offset], ldq, &z__[z_offset], ldz, & + here, &c__1, &c__1, &work[1], lwork, info); + if (*info != 0) { + *ilst = here; + return 0; + } + ++here; + _starpu_dtgex2_(wantq, wantz, n, &a[a_offset], lda, &b[b_offset], + ldb, &q[q_offset], ldq, &z__[z_offset], ldz, & + here, &c__1, &c__1, &work[1], lwork, info); + if (*info != 0) { + *ilst = here; + return 0; + } + ++here; + } + + } + } + if (here < *ilst) { + goto L10; + } + } else { + here = *ifst; + +L20: + +/* Swap with next one below. */ + + if (nbf == 1 || nbf == 2) { + +/* Current block either 1-by-1 or 2-by-2. */ + + nbnext = 1; + if (here >= 3) { + if (a[here - 1 + (here - 2) * a_dim1] != 0.) { + nbnext = 2; + } + } + i__1 = here - nbnext; + _starpu_dtgex2_(wantq, wantz, n, &a[a_offset], lda, &b[b_offset], ldb, &q[ + q_offset], ldq, &z__[z_offset], ldz, &i__1, &nbnext, &nbf, + &work[1], lwork, info); + if (*info != 0) { + *ilst = here; + return 0; + } + here -= nbnext; + +/* Test if 2-by-2 block breaks into two 1-by-1 blocks. */ + + if (nbf == 2) { + if (a[here + 1 + here * a_dim1] == 0.) { + nbf = 3; + } + } + + } else { + +/* Current block consists of two 1-by-1 blocks, each of which */ +/* must be swapped individually. */ + + nbnext = 1; + if (here >= 3) { + if (a[here - 1 + (here - 2) * a_dim1] != 0.) { + nbnext = 2; + } + } + i__1 = here - nbnext; + _starpu_dtgex2_(wantq, wantz, n, &a[a_offset], lda, &b[b_offset], ldb, &q[ + q_offset], ldq, &z__[z_offset], ldz, &i__1, &nbnext, & + c__1, &work[1], lwork, info); + if (*info != 0) { + *ilst = here; + return 0; + } + if (nbnext == 1) { + +/* Swap two 1-by-1 blocks. */ + + _starpu_dtgex2_(wantq, wantz, n, &a[a_offset], lda, &b[b_offset], ldb, + &q[q_offset], ldq, &z__[z_offset], ldz, &here, & + nbnext, &c__1, &work[1], lwork, info); + if (*info != 0) { + *ilst = here; + return 0; + } + --here; + } else { + +/* Recompute NBNEXT in case of 2-by-2 split. */ + + if (a[here + (here - 1) * a_dim1] == 0.) { + nbnext = 1; + } + if (nbnext == 2) { + +/* 2-by-2 block did not split. */ + + i__1 = here - 1; + _starpu_dtgex2_(wantq, wantz, n, &a[a_offset], lda, &b[b_offset], + ldb, &q[q_offset], ldq, &z__[z_offset], ldz, & + i__1, &c__2, &c__1, &work[1], lwork, info); + if (*info != 0) { + *ilst = here; + return 0; + } + here += -2; + } else { + +/* 2-by-2 block did split. */ + + _starpu_dtgex2_(wantq, wantz, n, &a[a_offset], lda, &b[b_offset], + ldb, &q[q_offset], ldq, &z__[z_offset], ldz, & + here, &c__1, &c__1, &work[1], lwork, info); + if (*info != 0) { + *ilst = here; + return 0; + } + --here; + _starpu_dtgex2_(wantq, wantz, n, &a[a_offset], lda, &b[b_offset], + ldb, &q[q_offset], ldq, &z__[z_offset], ldz, & + here, &c__1, &c__1, &work[1], lwork, info); + if (*info != 0) { + *ilst = here; + return 0; + } + --here; + } + } + } + if (here > *ilst) { + goto L20; + } + } + *ilst = here; + work[1] = (doublereal) lwmin; + return 0; + +/* End of DTGEXC */ + +} /* _starpu_dtgexc_ */ diff --git a/min-dgels/base/SRC/dtgsen.c b/min-dgels/base/SRC/dtgsen.c new file mode 100644 index 0000000..53078b4 --- /dev/null +++ b/min-dgels/base/SRC/dtgsen.c @@ -0,0 +1,836 @@ +/* dtgsen.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c__2 = 2; +static doublereal c_b28 = 1.; + +/* Subroutine */ int _starpu_dtgsen_(integer *ijob, logical *wantq, logical *wantz, + logical *select, integer *n, doublereal *a, integer *lda, doublereal * + b, integer *ldb, doublereal *alphar, doublereal *alphai, doublereal * + beta, doublereal *q, integer *ldq, doublereal *z__, integer *ldz, + integer *m, doublereal *pl, doublereal *pr, doublereal *dif, + doublereal *work, integer *lwork, integer *iwork, integer *liwork, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, q_dim1, q_offset, z_dim1, + z_offset, i__1, i__2; + doublereal d__1; + + /* Builtin functions */ + double sqrt(doublereal), d_sign(doublereal *, doublereal *); + + /* Local variables */ + integer i__, k, n1, n2, kk, ks, mn2, ijb; + doublereal eps; + integer kase; + logical pair; + integer ierr; + doublereal dsum; + logical swap; + extern /* Subroutine */ int _starpu_dlag2_(doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *); + integer isave[3]; + logical wantd; + integer lwmin; + logical wantp; + extern /* Subroutine */ int _starpu_dlacn2_(integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *); + logical wantd1, wantd2; + extern doublereal _starpu_dlamch_(char *); + doublereal dscale, rdscal; + extern /* Subroutine */ int _starpu_dlacpy_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *), + _starpu_xerbla_(char *, integer *), _starpu_dtgexc_(logical *, logical *, + integer *, doublereal *, integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, integer *, integer *, + integer *, doublereal *, integer *, integer *), _starpu_dlassq_(integer *, + doublereal *, integer *, doublereal *, doublereal *); + integer liwmin; + extern /* Subroutine */ int _starpu_dtgsyl_(char *, integer *, integer *, integer + *, doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *, doublereal *, integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, doublereal *, doublereal *, + integer *, integer *, integer *); + doublereal smlnum; + logical lquery; + + +/* -- LAPACK routine (version 3.2) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* January 2007 */ + +/* Modified to call DLACN2 in place of DLACON, 5 Feb 03, SJH. */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DTGSEN reorders the generalized real Schur decomposition of a real */ +/* matrix pair (A, B) (in terms of an orthonormal equivalence trans- */ +/* formation Q' * (A, B) * Z), so that a selected cluster of eigenvalues */ +/* appears in the leading diagonal blocks of the upper quasi-triangular */ +/* matrix A and the upper triangular B. The leading columns of Q and */ +/* Z form orthonormal bases of the corresponding left and right eigen- */ +/* spaces (deflating subspaces). (A, B) must be in generalized real */ +/* Schur canonical form (as returned by DGGES), i.e. A is block upper */ +/* triangular with 1-by-1 and 2-by-2 diagonal blocks. B is upper */ +/* triangular. */ + +/* DTGSEN also computes the generalized eigenvalues */ + +/* w(j) = (ALPHAR(j) + i*ALPHAI(j))/BETA(j) */ + +/* of the reordered matrix pair (A, B). */ + +/* Optionally, DTGSEN computes the estimates of reciprocal condition */ +/* numbers for eigenvalues and eigenspaces. These are Difu[(A11,B11), */ +/* (A22,B22)] and Difl[(A11,B11), (A22,B22)], i.e. the separation(s) */ +/* between the matrix pairs (A11, B11) and (A22,B22) that correspond to */ +/* the selected cluster and the eigenvalues outside the cluster, resp., */ +/* and norms of "projections" onto left and right eigenspaces w.r.t. */ +/* the selected cluster in the (1,1)-block. */ + +/* Arguments */ +/* ========= */ + +/* IJOB (input) INTEGER */ +/* Specifies whether condition numbers are required for the */ +/* cluster of eigenvalues (PL and PR) or the deflating subspaces */ +/* (Difu and Difl): */ +/* =0: Only reorder w.r.t. SELECT. No extras. */ +/* =1: Reciprocal of norms of "projections" onto left and right */ +/* eigenspaces w.r.t. the selected cluster (PL and PR). */ +/* =2: Upper bounds on Difu and Difl. F-norm-based estimate */ +/* (DIF(1:2)). */ +/* =3: Estimate of Difu and Difl. 1-norm-based estimate */ +/* (DIF(1:2)). */ +/* About 5 times as expensive as IJOB = 2. */ +/* =4: Compute PL, PR and DIF (i.e. 0, 1 and 2 above): Economic */ +/* version to get it all. */ +/* =5: Compute PL, PR and DIF (i.e. 0, 1 and 3 above) */ + +/* WANTQ (input) LOGICAL */ +/* .TRUE. : update the left transformation matrix Q; */ +/* .FALSE.: do not update Q. */ + +/* WANTZ (input) LOGICAL */ +/* .TRUE. : update the right transformation matrix Z; */ +/* .FALSE.: do not update Z. */ + +/* SELECT (input) LOGICAL array, dimension (N) */ +/* SELECT specifies the eigenvalues in the selected cluster. */ +/* To select a real eigenvalue w(j), SELECT(j) must be set to */ +/* .TRUE.. To select a complex conjugate pair of eigenvalues */ +/* w(j) and w(j+1), corresponding to a 2-by-2 diagonal block, */ +/* either SELECT(j) or SELECT(j+1) or both must be set to */ +/* .TRUE.; a complex conjugate pair of eigenvalues must be */ +/* either both included in the cluster or both excluded. */ + +/* N (input) INTEGER */ +/* The order of the matrices A and B. N >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension(LDA,N) */ +/* On entry, the upper quasi-triangular matrix A, with (A, B) in */ +/* generalized real Schur canonical form. */ +/* On exit, A is overwritten by the reordered matrix A. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,N). */ + +/* B (input/output) DOUBLE PRECISION array, dimension(LDB,N) */ +/* On entry, the upper triangular matrix B, with (A, B) in */ +/* generalized real Schur canonical form. */ +/* On exit, B is overwritten by the reordered matrix B. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the array B. LDB >= max(1,N). */ + +/* ALPHAR (output) DOUBLE PRECISION array, dimension (N) */ +/* ALPHAI (output) DOUBLE PRECISION array, dimension (N) */ +/* BETA (output) DOUBLE PRECISION array, dimension (N) */ +/* On exit, (ALPHAR(j) + ALPHAI(j)*i)/BETA(j), j=1,...,N, will */ +/* be the generalized eigenvalues. ALPHAR(j) + ALPHAI(j)*i */ +/* and BETA(j),j=1,...,N are the diagonals of the complex Schur */ +/* form (S,T) that would result if the 2-by-2 diagonal blocks of */ +/* the real generalized Schur form of (A,B) were further reduced */ +/* to triangular form using complex unitary transformations. */ +/* If ALPHAI(j) is zero, then the j-th eigenvalue is real; if */ +/* positive, then the j-th and (j+1)-st eigenvalues are a */ +/* complex conjugate pair, with ALPHAI(j+1) negative. */ + +/* Q (input/output) DOUBLE PRECISION array, dimension (LDQ,N) */ +/* On entry, if WANTQ = .TRUE., Q is an N-by-N matrix. */ +/* On exit, Q has been postmultiplied by the left orthogonal */ +/* transformation matrix which reorder (A, B); The leading M */ +/* columns of Q form orthonormal bases for the specified pair of */ +/* left eigenspaces (deflating subspaces). */ +/* If WANTQ = .FALSE., Q is not referenced. */ + +/* LDQ (input) INTEGER */ +/* The leading dimension of the array Q. LDQ >= 1; */ +/* and if WANTQ = .TRUE., LDQ >= N. */ + +/* Z (input/output) DOUBLE PRECISION array, dimension (LDZ,N) */ +/* On entry, if WANTZ = .TRUE., Z is an N-by-N matrix. */ +/* On exit, Z has been postmultiplied by the left orthogonal */ +/* transformation matrix which reorder (A, B); The leading M */ +/* columns of Z form orthonormal bases for the specified pair of */ +/* left eigenspaces (deflating subspaces). */ +/* If WANTZ = .FALSE., Z is not referenced. */ + +/* LDZ (input) INTEGER */ +/* The leading dimension of the array Z. LDZ >= 1; */ +/* If WANTZ = .TRUE., LDZ >= N. */ + +/* M (output) INTEGER */ +/* The dimension of the specified pair of left and right eigen- */ +/* spaces (deflating subspaces). 0 <= M <= N. */ + +/* PL (output) DOUBLE PRECISION */ +/* PR (output) DOUBLE PRECISION */ +/* If IJOB = 1, 4 or 5, PL, PR are lower bounds on the */ +/* reciprocal of the norm of "projections" onto left and right */ +/* eigenspaces with respect to the selected cluster. */ +/* 0 < PL, PR <= 1. */ +/* If M = 0 or M = N, PL = PR = 1. */ +/* If IJOB = 0, 2 or 3, PL and PR are not referenced. */ + +/* DIF (output) DOUBLE PRECISION array, dimension (2). */ +/* If IJOB >= 2, DIF(1:2) store the estimates of Difu and Difl. */ +/* If IJOB = 2 or 4, DIF(1:2) are F-norm-based upper bounds on */ +/* Difu and Difl. If IJOB = 3 or 5, DIF(1:2) are 1-norm-based */ +/* estimates of Difu and Difl. */ +/* If M = 0 or N, DIF(1:2) = F-norm([A, B]). */ +/* If IJOB = 0 or 1, DIF is not referenced. */ + +/* WORK (workspace/output) DOUBLE PRECISION array, */ +/* dimension (MAX(1,LWORK)) */ +/* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ + +/* LWORK (input) INTEGER */ +/* The dimension of the array WORK. LWORK >= 4*N+16. */ +/* If IJOB = 1, 2 or 4, LWORK >= MAX(4*N+16, 2*M*(N-M)). */ +/* If IJOB = 3 or 5, LWORK >= MAX(4*N+16, 4*M*(N-M)). */ + +/* If LWORK = -1, then a workspace query is assumed; the routine */ +/* only calculates the optimal size of the WORK array, returns */ +/* this value as the first entry of the WORK array, and no error */ +/* message related to LWORK is issued by XERBLA. */ + +/* IWORK (workspace/output) INTEGER array, dimension (MAX(1,LIWORK)) */ +/* IF IJOB = 0, IWORK is not referenced. Otherwise, */ +/* on exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. */ + +/* LIWORK (input) INTEGER */ +/* The dimension of the array IWORK. LIWORK >= 1. */ +/* If IJOB = 1, 2 or 4, LIWORK >= N+6. */ +/* If IJOB = 3 or 5, LIWORK >= MAX(2*M*(N-M), N+6). */ + +/* If LIWORK = -1, then a workspace query is assumed; the */ +/* routine only calculates the optimal size of the IWORK array, */ +/* returns this value as the first entry of the IWORK array, and */ +/* no error message related to LIWORK is issued by XERBLA. */ + +/* INFO (output) INTEGER */ +/* =0: Successful exit. */ +/* <0: If INFO = -i, the i-th argument had an illegal value. */ +/* =1: Reordering of (A, B) failed because the transformed */ +/* matrix pair (A, B) would be too far from generalized */ +/* Schur form; the problem is very ill-conditioned. */ +/* (A, B) may have been partially reordered. */ +/* If requested, 0 is returned in DIF(*), PL and PR. */ + +/* Further Details */ +/* =============== */ + +/* DTGSEN first collects the selected eigenvalues by computing */ +/* orthogonal U and W that move them to the top left corner of (A, B). */ +/* In other words, the selected eigenvalues are the eigenvalues of */ +/* (A11, B11) in: */ + +/* U'*(A, B)*W = (A11 A12) (B11 B12) n1 */ +/* ( 0 A22),( 0 B22) n2 */ +/* n1 n2 n1 n2 */ + +/* where N = n1+n2 and U' means the transpose of U. The first n1 columns */ +/* of U and W span the specified pair of left and right eigenspaces */ +/* (deflating subspaces) of (A, B). */ + +/* If (A, B) has been obtained from the generalized real Schur */ +/* decomposition of a matrix pair (C, D) = Q*(A, B)*Z', then the */ +/* reordered generalized real Schur form of (C, D) is given by */ + +/* (C, D) = (Q*U)*(U'*(A, B)*W)*(Z*W)', */ + +/* and the first n1 columns of Q*U and Z*W span the corresponding */ +/* deflating subspaces of (C, D) (Q and Z store Q*U and Z*W, resp.). */ + +/* Note that if the selected eigenvalue is sufficiently ill-conditioned, */ +/* then its value may differ significantly from its value before */ +/* reordering. */ + +/* The reciprocal condition numbers of the left and right eigenspaces */ +/* spanned by the first n1 columns of U and W (or Q*U and Z*W) may */ +/* be returned in DIF(1:2), corresponding to Difu and Difl, resp. */ + +/* The Difu and Difl are defined as: */ + +/* Difu[(A11, B11), (A22, B22)] = sigma-min( Zu ) */ +/* and */ +/* Difl[(A11, B11), (A22, B22)] = Difu[(A22, B22), (A11, B11)], */ + +/* where sigma-min(Zu) is the smallest singular value of the */ +/* (2*n1*n2)-by-(2*n1*n2) matrix */ + +/* Zu = [ kron(In2, A11) -kron(A22', In1) ] */ +/* [ kron(In2, B11) -kron(B22', In1) ]. */ + +/* Here, Inx is the identity matrix of size nx and A22' is the */ +/* transpose of A22. kron(X, Y) is the Kronecker product between */ +/* the matrices X and Y. */ + +/* When DIF(2) is small, small changes in (A, B) can cause large changes */ +/* in the deflating subspace. An approximate (asymptotic) bound on the */ +/* maximum angular error in the computed deflating subspaces is */ + +/* EPS * norm((A, B)) / DIF(2), */ + +/* where EPS is the machine precision. */ + +/* The reciprocal norm of the projectors on the left and right */ +/* eigenspaces associated with (A11, B11) may be returned in PL and PR. */ +/* They are computed as follows. First we compute L and R so that */ +/* P*(A, B)*Q is block diagonal, where */ + +/* P = ( I -L ) n1 Q = ( I R ) n1 */ +/* ( 0 I ) n2 and ( 0 I ) n2 */ +/* n1 n2 n1 n2 */ + +/* and (L, R) is the solution to the generalized Sylvester equation */ + +/* A11*R - L*A22 = -A12 */ +/* B11*R - L*B22 = -B12 */ + +/* Then PL = (F-norm(L)**2+1)**(-1/2) and PR = (F-norm(R)**2+1)**(-1/2). */ +/* An approximate (asymptotic) bound on the average absolute error of */ +/* the selected eigenvalues is */ + +/* EPS * norm((A, B)) / PL. */ + +/* There are also global error bounds which valid for perturbations up */ +/* to a certain restriction: A lower bound (x) on the smallest */ +/* F-norm(E,F) for which an eigenvalue of (A11, B11) may move and */ +/* coalesce with an eigenvalue of (A22, B22) under perturbation (E,F), */ +/* (i.e. (A + E, B + F), is */ + +/* x = min(Difu,Difl)/((1/(PL*PL)+1/(PR*PR))**(1/2)+2*max(1/PL,1/PR)). */ + +/* An approximate bound on x can be computed from DIF(1:2), PL and PR. */ + +/* If y = ( F-norm(E,F) / x) <= 1, the angles between the perturbed */ +/* (L', R') and unperturbed (L, R) left and right deflating subspaces */ +/* associated with the selected cluster in the (1,1)-blocks can be */ +/* bounded as */ + +/* max-angle(L, L') <= arctan( y * PL / (1 - y * (1 - PL * PL)**(1/2)) */ +/* max-angle(R, R') <= arctan( y * PR / (1 - y * (1 - PR * PR)**(1/2)) */ + +/* See LAPACK User's Guide section 4.11 or the following references */ +/* for more information. */ + +/* Note that if the default method for computing the Frobenius-norm- */ +/* based estimate DIF is not wanted (see DLATDF), then the parameter */ +/* IDIFJB (see below) should be changed from 3 to 4 (routine DLATDF */ +/* (IJOB = 2 will be used)). See DTGSYL for more details. */ + +/* Based on contributions by */ +/* Bo Kagstrom and Peter Poromaa, Department of Computing Science, */ +/* Umea University, S-901 87 Umea, Sweden. */ + +/* References */ +/* ========== */ + +/* [1] B. Kagstrom; A Direct Method for Reordering Eigenvalues in the */ +/* Generalized Real Schur Form of a Regular Matrix Pair (A, B), in */ +/* M.S. Moonen et al (eds), Linear Algebra for Large Scale and */ +/* Real-Time Applications, Kluwer Academic Publ. 1993, pp 195-218. */ + +/* [2] B. Kagstrom and P. Poromaa; Computing Eigenspaces with Specified */ +/* Eigenvalues of a Regular Matrix Pair (A, B) and Condition */ +/* Estimation: Theory, Algorithms and Software, */ +/* Report UMINF - 94.04, Department of Computing Science, Umea */ +/* University, S-901 87 Umea, Sweden, 1994. Also as LAPACK Working */ +/* Note 87. To appear in Numerical Algorithms, 1996. */ + +/* [3] B. Kagstrom and P. Poromaa, LAPACK-Style Algorithms and Software */ +/* for Solving the Generalized Sylvester Equation and Estimating the */ +/* Separation between Regular Matrix Pairs, Report UMINF - 93.23, */ +/* Department of Computing Science, Umea University, S-901 87 Umea, */ +/* Sweden, December 1993, Revised April 1994, Also as LAPACK Working */ +/* Note 75. To appear in ACM Trans. on Math. Software, Vol 22, No 1, */ +/* 1996. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Decode and test the input parameters */ + + /* Parameter adjustments */ + --select; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + --alphar; + --alphai; + --beta; + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --dif; + --work; + --iwork; + + /* Function Body */ + *info = 0; + lquery = *lwork == -1 || *liwork == -1; + + if (*ijob < 0 || *ijob > 5) { + *info = -1; + } else if (*n < 0) { + *info = -5; + } else if (*lda < max(1,*n)) { + *info = -7; + } else if (*ldb < max(1,*n)) { + *info = -9; + } else if (*ldq < 1 || *wantq && *ldq < *n) { + *info = -14; + } else if (*ldz < 1 || *wantz && *ldz < *n) { + *info = -16; + } + + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DTGSEN", &i__1); + return 0; + } + +/* Get machine constants */ + + eps = _starpu_dlamch_("P"); + smlnum = _starpu_dlamch_("S") / eps; + ierr = 0; + + wantp = *ijob == 1 || *ijob >= 4; + wantd1 = *ijob == 2 || *ijob == 4; + wantd2 = *ijob == 3 || *ijob == 5; + wantd = wantd1 || wantd2; + +/* Set M to the dimension of the specified pair of deflating */ +/* subspaces. */ + + *m = 0; + pair = FALSE_; + i__1 = *n; + for (k = 1; k <= i__1; ++k) { + if (pair) { + pair = FALSE_; + } else { + if (k < *n) { + if (a[k + 1 + k * a_dim1] == 0.) { + if (select[k]) { + ++(*m); + } + } else { + pair = TRUE_; + if (select[k] || select[k + 1]) { + *m += 2; + } + } + } else { + if (select[*n]) { + ++(*m); + } + } + } +/* L10: */ + } + + if (*ijob == 1 || *ijob == 2 || *ijob == 4) { +/* Computing MAX */ + i__1 = 1, i__2 = (*n << 2) + 16, i__1 = max(i__1,i__2), i__2 = (*m << + 1) * (*n - *m); + lwmin = max(i__1,i__2); +/* Computing MAX */ + i__1 = 1, i__2 = *n + 6; + liwmin = max(i__1,i__2); + } else if (*ijob == 3 || *ijob == 5) { +/* Computing MAX */ + i__1 = 1, i__2 = (*n << 2) + 16, i__1 = max(i__1,i__2), i__2 = (*m << + 2) * (*n - *m); + lwmin = max(i__1,i__2); +/* Computing MAX */ + i__1 = 1, i__2 = (*m << 1) * (*n - *m), i__1 = max(i__1,i__2), i__2 = + *n + 6; + liwmin = max(i__1,i__2); + } else { +/* Computing MAX */ + i__1 = 1, i__2 = (*n << 2) + 16; + lwmin = max(i__1,i__2); + liwmin = 1; + } + + work[1] = (doublereal) lwmin; + iwork[1] = liwmin; + + if (*lwork < lwmin && ! lquery) { + *info = -22; + } else if (*liwork < liwmin && ! lquery) { + *info = -24; + } + + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DTGSEN", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible. */ + + if (*m == *n || *m == 0) { + if (wantp) { + *pl = 1.; + *pr = 1.; + } + if (wantd) { + dscale = 0.; + dsum = 1.; + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + _starpu_dlassq_(n, &a[i__ * a_dim1 + 1], &c__1, &dscale, &dsum); + _starpu_dlassq_(n, &b[i__ * b_dim1 + 1], &c__1, &dscale, &dsum); +/* L20: */ + } + dif[1] = dscale * sqrt(dsum); + dif[2] = dif[1]; + } + goto L60; + } + +/* Collect the selected blocks at the top-left corner of (A, B). */ + + ks = 0; + pair = FALSE_; + i__1 = *n; + for (k = 1; k <= i__1; ++k) { + if (pair) { + pair = FALSE_; + } else { + + swap = select[k]; + if (k < *n) { + if (a[k + 1 + k * a_dim1] != 0.) { + pair = TRUE_; + swap = swap || select[k + 1]; + } + } + + if (swap) { + ++ks; + +/* Swap the K-th block to position KS. */ +/* Perform the reordering of diagonal blocks in (A, B) */ +/* by orthogonal transformation matrices and update */ +/* Q and Z accordingly (if requested): */ + + kk = k; + if (k != ks) { + _starpu_dtgexc_(wantq, wantz, n, &a[a_offset], lda, &b[b_offset], + ldb, &q[q_offset], ldq, &z__[z_offset], ldz, &kk, + &ks, &work[1], lwork, &ierr); + } + + if (ierr > 0) { + +/* Swap is rejected: exit. */ + + *info = 1; + if (wantp) { + *pl = 0.; + *pr = 0.; + } + if (wantd) { + dif[1] = 0.; + dif[2] = 0.; + } + goto L60; + } + + if (pair) { + ++ks; + } + } + } +/* L30: */ + } + if (wantp) { + +/* Solve generalized Sylvester equation for R and L */ +/* and compute PL and PR. */ + + n1 = *m; + n2 = *n - *m; + i__ = n1 + 1; + ijb = 0; + _starpu_dlacpy_("Full", &n1, &n2, &a[i__ * a_dim1 + 1], lda, &work[1], &n1); + _starpu_dlacpy_("Full", &n1, &n2, &b[i__ * b_dim1 + 1], ldb, &work[n1 * n2 + + 1], &n1); + i__1 = *lwork - (n1 << 1) * n2; + _starpu_dtgsyl_("N", &ijb, &n1, &n2, &a[a_offset], lda, &a[i__ + i__ * a_dim1] +, lda, &work[1], &n1, &b[b_offset], ldb, &b[i__ + i__ * + b_dim1], ldb, &work[n1 * n2 + 1], &n1, &dscale, &dif[1], & + work[(n1 * n2 << 1) + 1], &i__1, &iwork[1], &ierr); + +/* Estimate the reciprocal of norms of "projections" onto left */ +/* and right eigenspaces. */ + + rdscal = 0.; + dsum = 1.; + i__1 = n1 * n2; + _starpu_dlassq_(&i__1, &work[1], &c__1, &rdscal, &dsum); + *pl = rdscal * sqrt(dsum); + if (*pl == 0.) { + *pl = 1.; + } else { + *pl = dscale / (sqrt(dscale * dscale / *pl + *pl) * sqrt(*pl)); + } + rdscal = 0.; + dsum = 1.; + i__1 = n1 * n2; + _starpu_dlassq_(&i__1, &work[n1 * n2 + 1], &c__1, &rdscal, &dsum); + *pr = rdscal * sqrt(dsum); + if (*pr == 0.) { + *pr = 1.; + } else { + *pr = dscale / (sqrt(dscale * dscale / *pr + *pr) * sqrt(*pr)); + } + } + + if (wantd) { + +/* Compute estimates of Difu and Difl. */ + + if (wantd1) { + n1 = *m; + n2 = *n - *m; + i__ = n1 + 1; + ijb = 3; + +/* Frobenius norm-based Difu-estimate. */ + + i__1 = *lwork - (n1 << 1) * n2; + _starpu_dtgsyl_("N", &ijb, &n1, &n2, &a[a_offset], lda, &a[i__ + i__ * + a_dim1], lda, &work[1], &n1, &b[b_offset], ldb, &b[i__ + + i__ * b_dim1], ldb, &work[n1 * n2 + 1], &n1, &dscale, & + dif[1], &work[(n1 << 1) * n2 + 1], &i__1, &iwork[1], & + ierr); + +/* Frobenius norm-based Difl-estimate. */ + + i__1 = *lwork - (n1 << 1) * n2; + _starpu_dtgsyl_("N", &ijb, &n2, &n1, &a[i__ + i__ * a_dim1], lda, &a[ + a_offset], lda, &work[1], &n2, &b[i__ + i__ * b_dim1], + ldb, &b[b_offset], ldb, &work[n1 * n2 + 1], &n2, &dscale, + &dif[2], &work[(n1 << 1) * n2 + 1], &i__1, &iwork[1], & + ierr); + } else { + + +/* Compute 1-norm-based estimates of Difu and Difl using */ +/* reversed communication with DLACN2. In each step a */ +/* generalized Sylvester equation or a transposed variant */ +/* is solved. */ + + kase = 0; + n1 = *m; + n2 = *n - *m; + i__ = n1 + 1; + ijb = 0; + mn2 = (n1 << 1) * n2; + +/* 1-norm-based estimate of Difu. */ + +L40: + _starpu_dlacn2_(&mn2, &work[mn2 + 1], &work[1], &iwork[1], &dif[1], &kase, + isave); + if (kase != 0) { + if (kase == 1) { + +/* Solve generalized Sylvester equation. */ + + i__1 = *lwork - (n1 << 1) * n2; + _starpu_dtgsyl_("N", &ijb, &n1, &n2, &a[a_offset], lda, &a[i__ + + i__ * a_dim1], lda, &work[1], &n1, &b[b_offset], + ldb, &b[i__ + i__ * b_dim1], ldb, &work[n1 * n2 + + 1], &n1, &dscale, &dif[1], &work[(n1 << 1) * n2 + + 1], &i__1, &iwork[1], &ierr); + } else { + +/* Solve the transposed variant. */ + + i__1 = *lwork - (n1 << 1) * n2; + _starpu_dtgsyl_("T", &ijb, &n1, &n2, &a[a_offset], lda, &a[i__ + + i__ * a_dim1], lda, &work[1], &n1, &b[b_offset], + ldb, &b[i__ + i__ * b_dim1], ldb, &work[n1 * n2 + + 1], &n1, &dscale, &dif[1], &work[(n1 << 1) * n2 + + 1], &i__1, &iwork[1], &ierr); + } + goto L40; + } + dif[1] = dscale / dif[1]; + +/* 1-norm-based estimate of Difl. */ + +L50: + _starpu_dlacn2_(&mn2, &work[mn2 + 1], &work[1], &iwork[1], &dif[2], &kase, + isave); + if (kase != 0) { + if (kase == 1) { + +/* Solve generalized Sylvester equation. */ + + i__1 = *lwork - (n1 << 1) * n2; + _starpu_dtgsyl_("N", &ijb, &n2, &n1, &a[i__ + i__ * a_dim1], lda, + &a[a_offset], lda, &work[1], &n2, &b[i__ + i__ * + b_dim1], ldb, &b[b_offset], ldb, &work[n1 * n2 + + 1], &n2, &dscale, &dif[2], &work[(n1 << 1) * n2 + + 1], &i__1, &iwork[1], &ierr); + } else { + +/* Solve the transposed variant. */ + + i__1 = *lwork - (n1 << 1) * n2; + _starpu_dtgsyl_("T", &ijb, &n2, &n1, &a[i__ + i__ * a_dim1], lda, + &a[a_offset], lda, &work[1], &n2, &b[i__ + i__ * + b_dim1], ldb, &b[b_offset], ldb, &work[n1 * n2 + + 1], &n2, &dscale, &dif[2], &work[(n1 << 1) * n2 + + 1], &i__1, &iwork[1], &ierr); + } + goto L50; + } + dif[2] = dscale / dif[2]; + + } + } + +L60: + +/* Compute generalized eigenvalues of reordered pair (A, B) and */ +/* normalize the generalized Schur form. */ + + pair = FALSE_; + i__1 = *n; + for (k = 1; k <= i__1; ++k) { + if (pair) { + pair = FALSE_; + } else { + + if (k < *n) { + if (a[k + 1 + k * a_dim1] != 0.) { + pair = TRUE_; + } + } + + if (pair) { + +/* Compute the eigenvalue(s) at position K. */ + + work[1] = a[k + k * a_dim1]; + work[2] = a[k + 1 + k * a_dim1]; + work[3] = a[k + (k + 1) * a_dim1]; + work[4] = a[k + 1 + (k + 1) * a_dim1]; + work[5] = b[k + k * b_dim1]; + work[6] = b[k + 1 + k * b_dim1]; + work[7] = b[k + (k + 1) * b_dim1]; + work[8] = b[k + 1 + (k + 1) * b_dim1]; + d__1 = smlnum * eps; + _starpu_dlag2_(&work[1], &c__2, &work[5], &c__2, &d__1, &beta[k], & + beta[k + 1], &alphar[k], &alphar[k + 1], &alphai[k]); + alphai[k + 1] = -alphai[k]; + + } else { + + if (d_sign(&c_b28, &b[k + k * b_dim1]) < 0.) { + +/* If B(K,K) is negative, make it positive */ + + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + a[k + i__ * a_dim1] = -a[k + i__ * a_dim1]; + b[k + i__ * b_dim1] = -b[k + i__ * b_dim1]; + if (*wantq) { + q[i__ + k * q_dim1] = -q[i__ + k * q_dim1]; + } +/* L70: */ + } + } + + alphar[k] = a[k + k * a_dim1]; + alphai[k] = 0.; + beta[k] = b[k + k * b_dim1]; + + } + } +/* L80: */ + } + + work[1] = (doublereal) lwmin; + iwork[1] = liwmin; + + return 0; + +/* End of DTGSEN */ + +} /* _starpu_dtgsen_ */ diff --git a/min-dgels/base/SRC/dtgsja.c b/min-dgels/base/SRC/dtgsja.c new file mode 100644 index 0000000..ae942e6 --- /dev/null +++ b/min-dgels/base/SRC/dtgsja.c @@ -0,0 +1,625 @@ +/* dtgsja.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static doublereal c_b13 = 0.; +static doublereal c_b14 = 1.; +static integer c__1 = 1; +static doublereal c_b43 = -1.; + +/* Subroutine */ int _starpu_dtgsja_(char *jobu, char *jobv, char *jobq, integer *m, + integer *p, integer *n, integer *k, integer *l, doublereal *a, + integer *lda, doublereal *b, integer *ldb, doublereal *tola, + doublereal *tolb, doublereal *alpha, doublereal *beta, doublereal *u, + integer *ldu, doublereal *v, integer *ldv, doublereal *q, integer * + ldq, doublereal *work, integer *ncycle, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, q_dim1, q_offset, u_dim1, + u_offset, v_dim1, v_offset, i__1, i__2, i__3, i__4; + doublereal d__1; + + /* Local variables */ + integer i__, j; + doublereal a1, a2, a3, b1, b2, b3, csq, csu, csv, snq, rwk, snu, snv; + extern /* Subroutine */ int _starpu_drot_(integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, doublereal *); + doublereal gamma; + extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, + integer *); + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *); + logical initq, initu, initv, wantq, upper; + doublereal error, ssmin; + logical wantu, wantv; + extern /* Subroutine */ int _starpu_dlags2_(logical *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *), _starpu_dlapll_(integer *, doublereal *, + integer *, doublereal *, integer *, doublereal *); + integer kcycle; + extern /* Subroutine */ int _starpu_dlartg_(doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *), _starpu_dlaset_(char *, + integer *, integer *, doublereal *, doublereal *, doublereal *, + integer *), _starpu_xerbla_(char *, integer *); + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DTGSJA computes the generalized singular value decomposition (GSVD) */ +/* of two real upper triangular (or trapezoidal) matrices A and B. */ + +/* On entry, it is assumed that matrices A and B have the following */ +/* forms, which may be obtained by the preprocessing subroutine DGGSVP */ +/* from a general M-by-N matrix A and P-by-N matrix B: */ + +/* N-K-L K L */ +/* A = K ( 0 A12 A13 ) if M-K-L >= 0; */ +/* L ( 0 0 A23 ) */ +/* M-K-L ( 0 0 0 ) */ + +/* N-K-L K L */ +/* A = K ( 0 A12 A13 ) if M-K-L < 0; */ +/* M-K ( 0 0 A23 ) */ + +/* N-K-L K L */ +/* B = L ( 0 0 B13 ) */ +/* P-L ( 0 0 0 ) */ + +/* where the K-by-K matrix A12 and L-by-L matrix B13 are nonsingular */ +/* upper triangular; A23 is L-by-L upper triangular if M-K-L >= 0, */ +/* otherwise A23 is (M-K)-by-L upper trapezoidal. */ + +/* On exit, */ + +/* U'*A*Q = D1*( 0 R ), V'*B*Q = D2*( 0 R ), */ + +/* where U, V and Q are orthogonal matrices, Z' denotes the transpose */ +/* of Z, R is a nonsingular upper triangular matrix, and D1 and D2 are */ +/* ``diagonal'' matrices, which are of the following structures: */ + +/* If M-K-L >= 0, */ + +/* K L */ +/* D1 = K ( I 0 ) */ +/* L ( 0 C ) */ +/* M-K-L ( 0 0 ) */ + +/* K L */ +/* D2 = L ( 0 S ) */ +/* P-L ( 0 0 ) */ + +/* N-K-L K L */ +/* ( 0 R ) = K ( 0 R11 R12 ) K */ +/* L ( 0 0 R22 ) L */ + +/* where */ + +/* C = diag( ALPHA(K+1), ... , ALPHA(K+L) ), */ +/* S = diag( BETA(K+1), ... , BETA(K+L) ), */ +/* C**2 + S**2 = I. */ + +/* R is stored in A(1:K+L,N-K-L+1:N) on exit. */ + +/* If M-K-L < 0, */ + +/* K M-K K+L-M */ +/* D1 = K ( I 0 0 ) */ +/* M-K ( 0 C 0 ) */ + +/* K M-K K+L-M */ +/* D2 = M-K ( 0 S 0 ) */ +/* K+L-M ( 0 0 I ) */ +/* P-L ( 0 0 0 ) */ + +/* N-K-L K M-K K+L-M */ +/* ( 0 R ) = K ( 0 R11 R12 R13 ) */ +/* M-K ( 0 0 R22 R23 ) */ +/* K+L-M ( 0 0 0 R33 ) */ + +/* where */ +/* C = diag( ALPHA(K+1), ... , ALPHA(M) ), */ +/* S = diag( BETA(K+1), ... , BETA(M) ), */ +/* C**2 + S**2 = I. */ + +/* R = ( R11 R12 R13 ) is stored in A(1:M, N-K-L+1:N) and R33 is stored */ +/* ( 0 R22 R23 ) */ +/* in B(M-K+1:L,N+M-K-L+1:N) on exit. */ + +/* The computation of the orthogonal transformation matrices U, V or Q */ +/* is optional. These matrices may either be formed explicitly, or they */ +/* may be postmultiplied into input matrices U1, V1, or Q1. */ + +/* Arguments */ +/* ========= */ + +/* JOBU (input) CHARACTER*1 */ +/* = 'U': U must contain an orthogonal matrix U1 on entry, and */ +/* the product U1*U is returned; */ +/* = 'I': U is initialized to the unit matrix, and the */ +/* orthogonal matrix U is returned; */ +/* = 'N': U is not computed. */ + +/* JOBV (input) CHARACTER*1 */ +/* = 'V': V must contain an orthogonal matrix V1 on entry, and */ +/* the product V1*V is returned; */ +/* = 'I': V is initialized to the unit matrix, and the */ +/* orthogonal matrix V is returned; */ +/* = 'N': V is not computed. */ + +/* JOBQ (input) CHARACTER*1 */ +/* = 'Q': Q must contain an orthogonal matrix Q1 on entry, and */ +/* the product Q1*Q is returned; */ +/* = 'I': Q is initialized to the unit matrix, and the */ +/* orthogonal matrix Q is returned; */ +/* = 'N': Q is not computed. */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix A. M >= 0. */ + +/* P (input) INTEGER */ +/* The number of rows of the matrix B. P >= 0. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrices A and B. N >= 0. */ + +/* K (input) INTEGER */ +/* L (input) INTEGER */ +/* K and L specify the subblocks in the input matrices A and B: */ +/* A23 = A(K+1:MIN(K+L,M),N-L+1:N) and B13 = B(1:L,N-L+1:N) */ +/* of A and B, whose GSVD is going to be computed by DTGSJA. */ +/* See Further details. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the M-by-N matrix A. */ +/* On exit, A(N-K+1:N,1:MIN(K+L,M) ) contains the triangular */ +/* matrix R or part of R. See Purpose for details. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,M). */ + +/* B (input/output) DOUBLE PRECISION array, dimension (LDB,N) */ +/* On entry, the P-by-N matrix B. */ +/* On exit, if necessary, B(M-K+1:L,N+M-K-L+1:N) contains */ +/* a part of R. See Purpose for details. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the array B. LDB >= max(1,P). */ + +/* TOLA (input) DOUBLE PRECISION */ +/* TOLB (input) DOUBLE PRECISION */ +/* TOLA and TOLB are the convergence criteria for the Jacobi- */ +/* Kogbetliantz iteration procedure. Generally, they are the */ +/* same as used in the preprocessing step, say */ +/* TOLA = max(M,N)*norm(A)*MAZHEPS, */ +/* TOLB = max(P,N)*norm(B)*MAZHEPS. */ + +/* ALPHA (output) DOUBLE PRECISION array, dimension (N) */ +/* BETA (output) DOUBLE PRECISION array, dimension (N) */ +/* On exit, ALPHA and BETA contain the generalized singular */ +/* value pairs of A and B; */ +/* ALPHA(1:K) = 1, */ +/* BETA(1:K) = 0, */ +/* and if M-K-L >= 0, */ +/* ALPHA(K+1:K+L) = diag(C), */ +/* BETA(K+1:K+L) = diag(S), */ +/* or if M-K-L < 0, */ +/* ALPHA(K+1:M)= C, ALPHA(M+1:K+L)= 0 */ +/* BETA(K+1:M) = S, BETA(M+1:K+L) = 1. */ +/* Furthermore, if K+L < N, */ +/* ALPHA(K+L+1:N) = 0 and */ +/* BETA(K+L+1:N) = 0. */ + +/* U (input/output) DOUBLE PRECISION array, dimension (LDU,M) */ +/* On entry, if JOBU = 'U', U must contain a matrix U1 (usually */ +/* the orthogonal matrix returned by DGGSVP). */ +/* On exit, */ +/* if JOBU = 'I', U contains the orthogonal matrix U; */ +/* if JOBU = 'U', U contains the product U1*U. */ +/* If JOBU = 'N', U is not referenced. */ + +/* LDU (input) INTEGER */ +/* The leading dimension of the array U. LDU >= max(1,M) if */ +/* JOBU = 'U'; LDU >= 1 otherwise. */ + +/* V (input/output) DOUBLE PRECISION array, dimension (LDV,P) */ +/* On entry, if JOBV = 'V', V must contain a matrix V1 (usually */ +/* the orthogonal matrix returned by DGGSVP). */ +/* On exit, */ +/* if JOBV = 'I', V contains the orthogonal matrix V; */ +/* if JOBV = 'V', V contains the product V1*V. */ +/* If JOBV = 'N', V is not referenced. */ + +/* LDV (input) INTEGER */ +/* The leading dimension of the array V. LDV >= max(1,P) if */ +/* JOBV = 'V'; LDV >= 1 otherwise. */ + +/* Q (input/output) DOUBLE PRECISION array, dimension (LDQ,N) */ +/* On entry, if JOBQ = 'Q', Q must contain a matrix Q1 (usually */ +/* the orthogonal matrix returned by DGGSVP). */ +/* On exit, */ +/* if JOBQ = 'I', Q contains the orthogonal matrix Q; */ +/* if JOBQ = 'Q', Q contains the product Q1*Q. */ +/* If JOBQ = 'N', Q is not referenced. */ + +/* LDQ (input) INTEGER */ +/* The leading dimension of the array Q. LDQ >= max(1,N) if */ +/* JOBQ = 'Q'; LDQ >= 1 otherwise. */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (2*N) */ + +/* NCYCLE (output) INTEGER */ +/* The number of cycles required for convergence. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* = 1: the procedure does not converge after MAXIT cycles. */ + +/* Internal Parameters */ +/* =================== */ + +/* MAXIT INTEGER */ +/* MAXIT specifies the total loops that the iterative procedure */ +/* may take. If after MAXIT cycles, the routine fails to */ +/* converge, we return INFO = 1. */ + +/* Further Details */ +/* =============== */ + +/* DTGSJA essentially uses a variant of Kogbetliantz algorithm to reduce */ +/* min(L,M-K)-by-L triangular (or trapezoidal) matrix A23 and L-by-L */ +/* matrix B13 to the form: */ + +/* U1'*A13*Q1 = C1*R1; V1'*B13*Q1 = S1*R1, */ + +/* where U1, V1 and Q1 are orthogonal matrix, and Z' is the transpose */ +/* of Z. C1 and S1 are diagonal matrices satisfying */ + +/* C1**2 + S1**2 = I, */ + +/* and R1 is an L-by-L nonsingular upper triangular matrix. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ + +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Decode and test the input parameters */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + --alpha; + --beta; + u_dim1 = *ldu; + u_offset = 1 + u_dim1; + u -= u_offset; + v_dim1 = *ldv; + v_offset = 1 + v_dim1; + v -= v_offset; + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + --work; + + /* Function Body */ + initu = _starpu_lsame_(jobu, "I"); + wantu = initu || _starpu_lsame_(jobu, "U"); + + initv = _starpu_lsame_(jobv, "I"); + wantv = initv || _starpu_lsame_(jobv, "V"); + + initq = _starpu_lsame_(jobq, "I"); + wantq = initq || _starpu_lsame_(jobq, "Q"); + + *info = 0; + if (! (initu || wantu || _starpu_lsame_(jobu, "N"))) { + *info = -1; + } else if (! (initv || wantv || _starpu_lsame_(jobv, "N"))) + { + *info = -2; + } else if (! (initq || wantq || _starpu_lsame_(jobq, "N"))) + { + *info = -3; + } else if (*m < 0) { + *info = -4; + } else if (*p < 0) { + *info = -5; + } else if (*n < 0) { + *info = -6; + } else if (*lda < max(1,*m)) { + *info = -10; + } else if (*ldb < max(1,*p)) { + *info = -12; + } else if (*ldu < 1 || wantu && *ldu < *m) { + *info = -18; + } else if (*ldv < 1 || wantv && *ldv < *p) { + *info = -20; + } else if (*ldq < 1 || wantq && *ldq < *n) { + *info = -22; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DTGSJA", &i__1); + return 0; + } + +/* Initialize U, V and Q, if necessary */ + + if (initu) { + _starpu_dlaset_("Full", m, m, &c_b13, &c_b14, &u[u_offset], ldu); + } + if (initv) { + _starpu_dlaset_("Full", p, p, &c_b13, &c_b14, &v[v_offset], ldv); + } + if (initq) { + _starpu_dlaset_("Full", n, n, &c_b13, &c_b14, &q[q_offset], ldq); + } + +/* Loop until convergence */ + + upper = FALSE_; + for (kcycle = 1; kcycle <= 40; ++kcycle) { + + upper = ! upper; + + i__1 = *l - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = *l; + for (j = i__ + 1; j <= i__2; ++j) { + + a1 = 0.; + a2 = 0.; + a3 = 0.; + if (*k + i__ <= *m) { + a1 = a[*k + i__ + (*n - *l + i__) * a_dim1]; + } + if (*k + j <= *m) { + a3 = a[*k + j + (*n - *l + j) * a_dim1]; + } + + b1 = b[i__ + (*n - *l + i__) * b_dim1]; + b3 = b[j + (*n - *l + j) * b_dim1]; + + if (upper) { + if (*k + i__ <= *m) { + a2 = a[*k + i__ + (*n - *l + j) * a_dim1]; + } + b2 = b[i__ + (*n - *l + j) * b_dim1]; + } else { + if (*k + j <= *m) { + a2 = a[*k + j + (*n - *l + i__) * a_dim1]; + } + b2 = b[j + (*n - *l + i__) * b_dim1]; + } + + _starpu_dlags2_(&upper, &a1, &a2, &a3, &b1, &b2, &b3, &csu, &snu, & + csv, &snv, &csq, &snq); + +/* Update (K+I)-th and (K+J)-th rows of matrix A: U'*A */ + + if (*k + j <= *m) { + _starpu_drot_(l, &a[*k + j + (*n - *l + 1) * a_dim1], lda, &a[*k + + i__ + (*n - *l + 1) * a_dim1], lda, &csu, &snu); + } + +/* Update I-th and J-th rows of matrix B: V'*B */ + + _starpu_drot_(l, &b[j + (*n - *l + 1) * b_dim1], ldb, &b[i__ + (*n - * + l + 1) * b_dim1], ldb, &csv, &snv); + +/* Update (N-L+I)-th and (N-L+J)-th columns of matrices */ +/* A and B: A*Q and B*Q */ + +/* Computing MIN */ + i__4 = *k + *l; + i__3 = min(i__4,*m); + _starpu_drot_(&i__3, &a[(*n - *l + j) * a_dim1 + 1], &c__1, &a[(*n - * + l + i__) * a_dim1 + 1], &c__1, &csq, &snq); + + _starpu_drot_(l, &b[(*n - *l + j) * b_dim1 + 1], &c__1, &b[(*n - *l + + i__) * b_dim1 + 1], &c__1, &csq, &snq); + + if (upper) { + if (*k + i__ <= *m) { + a[*k + i__ + (*n - *l + j) * a_dim1] = 0.; + } + b[i__ + (*n - *l + j) * b_dim1] = 0.; + } else { + if (*k + j <= *m) { + a[*k + j + (*n - *l + i__) * a_dim1] = 0.; + } + b[j + (*n - *l + i__) * b_dim1] = 0.; + } + +/* Update orthogonal matrices U, V, Q, if desired. */ + + if (wantu && *k + j <= *m) { + _starpu_drot_(m, &u[(*k + j) * u_dim1 + 1], &c__1, &u[(*k + i__) * + u_dim1 + 1], &c__1, &csu, &snu); + } + + if (wantv) { + _starpu_drot_(p, &v[j * v_dim1 + 1], &c__1, &v[i__ * v_dim1 + 1], + &c__1, &csv, &snv); + } + + if (wantq) { + _starpu_drot_(n, &q[(*n - *l + j) * q_dim1 + 1], &c__1, &q[(*n - * + l + i__) * q_dim1 + 1], &c__1, &csq, &snq); + } + +/* L10: */ + } +/* L20: */ + } + + if (! upper) { + +/* The matrices A13 and B13 were lower triangular at the start */ +/* of the cycle, and are now upper triangular. */ + +/* Convergence test: test the parallelism of the corresponding */ +/* rows of A and B. */ + + error = 0.; +/* Computing MIN */ + i__2 = *l, i__3 = *m - *k; + i__1 = min(i__2,i__3); + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = *l - i__ + 1; + _starpu_dcopy_(&i__2, &a[*k + i__ + (*n - *l + i__) * a_dim1], lda, & + work[1], &c__1); + i__2 = *l - i__ + 1; + _starpu_dcopy_(&i__2, &b[i__ + (*n - *l + i__) * b_dim1], ldb, &work[* + l + 1], &c__1); + i__2 = *l - i__ + 1; + _starpu_dlapll_(&i__2, &work[1], &c__1, &work[*l + 1], &c__1, &ssmin); + error = max(error,ssmin); +/* L30: */ + } + + if (abs(error) <= min(*tola,*tolb)) { + goto L50; + } + } + +/* End of cycle loop */ + +/* L40: */ + } + +/* The algorithm has not converged after MAXIT cycles. */ + + *info = 1; + goto L100; + +L50: + +/* If ERROR <= MIN(TOLA,TOLB), then the algorithm has converged. */ +/* Compute the generalized singular value pairs (ALPHA, BETA), and */ +/* set the triangular matrix R to array A. */ + + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + alpha[i__] = 1.; + beta[i__] = 0.; +/* L60: */ + } + +/* Computing MIN */ + i__2 = *l, i__3 = *m - *k; + i__1 = min(i__2,i__3); + for (i__ = 1; i__ <= i__1; ++i__) { + + a1 = a[*k + i__ + (*n - *l + i__) * a_dim1]; + b1 = b[i__ + (*n - *l + i__) * b_dim1]; + + if (a1 != 0.) { + gamma = b1 / a1; + +/* change sign if necessary */ + + if (gamma < 0.) { + i__2 = *l - i__ + 1; + _starpu_dscal_(&i__2, &c_b43, &b[i__ + (*n - *l + i__) * b_dim1], ldb) + ; + if (wantv) { + _starpu_dscal_(p, &c_b43, &v[i__ * v_dim1 + 1], &c__1); + } + } + + d__1 = abs(gamma); + _starpu_dlartg_(&d__1, &c_b14, &beta[*k + i__], &alpha[*k + i__], &rwk); + + if (alpha[*k + i__] >= beta[*k + i__]) { + i__2 = *l - i__ + 1; + d__1 = 1. / alpha[*k + i__]; + _starpu_dscal_(&i__2, &d__1, &a[*k + i__ + (*n - *l + i__) * a_dim1], + lda); + } else { + i__2 = *l - i__ + 1; + d__1 = 1. / beta[*k + i__]; + _starpu_dscal_(&i__2, &d__1, &b[i__ + (*n - *l + i__) * b_dim1], ldb); + i__2 = *l - i__ + 1; + _starpu_dcopy_(&i__2, &b[i__ + (*n - *l + i__) * b_dim1], ldb, &a[*k + + i__ + (*n - *l + i__) * a_dim1], lda); + } + + } else { + + alpha[*k + i__] = 0.; + beta[*k + i__] = 1.; + i__2 = *l - i__ + 1; + _starpu_dcopy_(&i__2, &b[i__ + (*n - *l + i__) * b_dim1], ldb, &a[*k + + i__ + (*n - *l + i__) * a_dim1], lda); + + } + +/* L70: */ + } + +/* Post-assignment */ + + i__1 = *k + *l; + for (i__ = *m + 1; i__ <= i__1; ++i__) { + alpha[i__] = 0.; + beta[i__] = 1.; +/* L80: */ + } + + if (*k + *l < *n) { + i__1 = *n; + for (i__ = *k + *l + 1; i__ <= i__1; ++i__) { + alpha[i__] = 0.; + beta[i__] = 0.; +/* L90: */ + } + } + +L100: + *ncycle = kcycle; + return 0; + +/* End of DTGSJA */ + +} /* _starpu_dtgsja_ */ diff --git a/min-dgels/base/SRC/dtgsna.c b/min-dgels/base/SRC/dtgsna.c new file mode 100644 index 0000000..c56373a --- /dev/null +++ b/min-dgels/base/SRC/dtgsna.c @@ -0,0 +1,695 @@ +/* dtgsna.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static doublereal c_b19 = 1.; +static doublereal c_b21 = 0.; +static integer c__2 = 2; +static logical c_false = FALSE_; +static integer c__3 = 3; + +/* Subroutine */ int _starpu_dtgsna_(char *job, char *howmny, logical *select, + integer *n, doublereal *a, integer *lda, doublereal *b, integer *ldb, + doublereal *vl, integer *ldvl, doublereal *vr, integer *ldvr, + doublereal *s, doublereal *dif, integer *mm, integer *m, doublereal * + work, integer *lwork, integer *iwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, vl_dim1, vl_offset, vr_dim1, + vr_offset, i__1, i__2; + doublereal d__1, d__2; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + integer i__, k; + doublereal c1, c2; + integer n1, n2, ks, iz; + doublereal eps, beta, cond; + extern doublereal _starpu_ddot_(integer *, doublereal *, integer *, doublereal *, + integer *); + logical pair; + integer ierr; + doublereal uhav, uhbv; + integer ifst; + doublereal lnrm; + integer ilst; + doublereal rnrm; + extern /* Subroutine */ int _starpu_dlag2_(doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *); + extern doublereal _starpu_dnrm2_(integer *, doublereal *, integer *); + doublereal root1, root2, scale; + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int _starpu_dgemv_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *); + doublereal uhavi, uhbvi, tmpii; + integer lwmin; + logical wants; + doublereal tmpir, tmpri, dummy[1], tmprr; + extern doublereal _starpu_dlapy2_(doublereal *, doublereal *); + doublereal dummy1[1]; + extern doublereal _starpu_dlamch_(char *); + doublereal alphai, alphar; + extern /* Subroutine */ int _starpu_dlacpy_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *), + _starpu_xerbla_(char *, integer *), _starpu_dtgexc_(logical *, logical *, + integer *, doublereal *, integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, integer *, integer *, + integer *, doublereal *, integer *, integer *); + logical wantbh, wantdf, somcon; + doublereal alprqt; + extern /* Subroutine */ int _starpu_dtgsyl_(char *, integer *, integer *, integer + *, doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *, doublereal *, integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, doublereal *, doublereal *, + integer *, integer *, integer *); + doublereal smlnum; + logical lquery; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DTGSNA estimates reciprocal condition numbers for specified */ +/* eigenvalues and/or eigenvectors of a matrix pair (A, B) in */ +/* generalized real Schur canonical form (or of any matrix pair */ +/* (Q*A*Z', Q*B*Z') with orthogonal matrices Q and Z, where */ +/* Z' denotes the transpose of Z. */ + +/* (A, B) must be in generalized real Schur form (as returned by DGGES), */ +/* i.e. A is block upper triangular with 1-by-1 and 2-by-2 diagonal */ +/* blocks. B is upper triangular. */ + + +/* Arguments */ +/* ========= */ + +/* JOB (input) CHARACTER*1 */ +/* Specifies whether condition numbers are required for */ +/* eigenvalues (S) or eigenvectors (DIF): */ +/* = 'E': for eigenvalues only (S); */ +/* = 'V': for eigenvectors only (DIF); */ +/* = 'B': for both eigenvalues and eigenvectors (S and DIF). */ + +/* HOWMNY (input) CHARACTER*1 */ +/* = 'A': compute condition numbers for all eigenpairs; */ +/* = 'S': compute condition numbers for selected eigenpairs */ +/* specified by the array SELECT. */ + +/* SELECT (input) LOGICAL array, dimension (N) */ +/* If HOWMNY = 'S', SELECT specifies the eigenpairs for which */ +/* condition numbers are required. To select condition numbers */ +/* for the eigenpair corresponding to a real eigenvalue w(j), */ +/* SELECT(j) must be set to .TRUE.. To select condition numbers */ +/* corresponding to a complex conjugate pair of eigenvalues w(j) */ +/* and w(j+1), either SELECT(j) or SELECT(j+1) or both, must be */ +/* set to .TRUE.. */ +/* If HOWMNY = 'A', SELECT is not referenced. */ + +/* N (input) INTEGER */ +/* The order of the square matrix pair (A, B). N >= 0. */ + +/* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ +/* The upper quasi-triangular matrix A in the pair (A,B). */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,N). */ + +/* B (input) DOUBLE PRECISION array, dimension (LDB,N) */ +/* The upper triangular matrix B in the pair (A,B). */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the array B. LDB >= max(1,N). */ + +/* VL (input) DOUBLE PRECISION array, dimension (LDVL,M) */ +/* If JOB = 'E' or 'B', VL must contain left eigenvectors of */ +/* (A, B), corresponding to the eigenpairs specified by HOWMNY */ +/* and SELECT. The eigenvectors must be stored in consecutive */ +/* columns of VL, as returned by DTGEVC. */ +/* If JOB = 'V', VL is not referenced. */ + +/* LDVL (input) INTEGER */ +/* The leading dimension of the array VL. LDVL >= 1. */ +/* If JOB = 'E' or 'B', LDVL >= N. */ + +/* VR (input) DOUBLE PRECISION array, dimension (LDVR,M) */ +/* If JOB = 'E' or 'B', VR must contain right eigenvectors of */ +/* (A, B), corresponding to the eigenpairs specified by HOWMNY */ +/* and SELECT. The eigenvectors must be stored in consecutive */ +/* columns ov VR, as returned by DTGEVC. */ +/* If JOB = 'V', VR is not referenced. */ + +/* LDVR (input) INTEGER */ +/* The leading dimension of the array VR. LDVR >= 1. */ +/* If JOB = 'E' or 'B', LDVR >= N. */ + +/* S (output) DOUBLE PRECISION array, dimension (MM) */ +/* If JOB = 'E' or 'B', the reciprocal condition numbers of the */ +/* selected eigenvalues, stored in consecutive elements of the */ +/* array. For a complex conjugate pair of eigenvalues two */ +/* consecutive elements of S are set to the same value. Thus */ +/* S(j), DIF(j), and the j-th columns of VL and VR all */ +/* correspond to the same eigenpair (but not in general the */ +/* j-th eigenpair, unless all eigenpairs are selected). */ +/* If JOB = 'V', S is not referenced. */ + +/* DIF (output) DOUBLE PRECISION array, dimension (MM) */ +/* If JOB = 'V' or 'B', the estimated reciprocal condition */ +/* numbers of the selected eigenvectors, stored in consecutive */ +/* elements of the array. For a complex eigenvector two */ +/* consecutive elements of DIF are set to the same value. If */ +/* the eigenvalues cannot be reordered to compute DIF(j), DIF(j) */ +/* is set to 0; this can only occur when the true value would be */ +/* very small anyway. */ +/* If JOB = 'E', DIF is not referenced. */ + +/* MM (input) INTEGER */ +/* The number of elements in the arrays S and DIF. MM >= M. */ + +/* M (output) INTEGER */ +/* The number of elements of the arrays S and DIF used to store */ +/* the specified condition numbers; for each selected real */ +/* eigenvalue one element is used, and for each selected complex */ +/* conjugate pair of eigenvalues, two elements are used. */ +/* If HOWMNY = 'A', M is set to N. */ + +/* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ + +/* LWORK (input) INTEGER */ +/* The dimension of the array WORK. LWORK >= max(1,N). */ +/* If JOB = 'V' or 'B' LWORK >= 2*N*(N+2)+16. */ + +/* If LWORK = -1, then a workspace query is assumed; the routine */ +/* only calculates the optimal size of the WORK array, returns */ +/* this value as the first entry of the WORK array, and no error */ +/* message related to LWORK is issued by XERBLA. */ + +/* IWORK (workspace) INTEGER array, dimension (N + 6) */ +/* If JOB = 'E', IWORK is not referenced. */ + +/* INFO (output) INTEGER */ +/* =0: Successful exit */ +/* <0: If INFO = -i, the i-th argument had an illegal value */ + + +/* Further Details */ +/* =============== */ + +/* The reciprocal of the condition number of a generalized eigenvalue */ +/* w = (a, b) is defined as */ + +/* S(w) = (|u'Av|**2 + |u'Bv|**2)**(1/2) / (norm(u)*norm(v)) */ + +/* where u and v are the left and right eigenvectors of (A, B) */ +/* corresponding to w; |z| denotes the absolute value of the complex */ +/* number, and norm(u) denotes the 2-norm of the vector u. */ +/* The pair (a, b) corresponds to an eigenvalue w = a/b (= u'Av/u'Bv) */ +/* of the matrix pair (A, B). If both a and b equal zero, then (A B) is */ +/* singular and S(I) = -1 is returned. */ + +/* An approximate error bound on the chordal distance between the i-th */ +/* computed generalized eigenvalue w and the corresponding exact */ +/* eigenvalue lambda is */ + +/* chord(w, lambda) <= EPS * norm(A, B) / S(I) */ + +/* where EPS is the machine precision. */ + +/* The reciprocal of the condition number DIF(i) of right eigenvector u */ +/* and left eigenvector v corresponding to the generalized eigenvalue w */ +/* is defined as follows: */ + +/* a) If the i-th eigenvalue w = (a,b) is real */ + +/* Suppose U and V are orthogonal transformations such that */ + +/* U'*(A, B)*V = (S, T) = ( a * ) ( b * ) 1 */ +/* ( 0 S22 ),( 0 T22 ) n-1 */ +/* 1 n-1 1 n-1 */ + +/* Then the reciprocal condition number DIF(i) is */ + +/* Difl((a, b), (S22, T22)) = sigma-min( Zl ), */ + +/* where sigma-min(Zl) denotes the smallest singular value of the */ +/* 2(n-1)-by-2(n-1) matrix */ + +/* Zl = [ kron(a, In-1) -kron(1, S22) ] */ +/* [ kron(b, In-1) -kron(1, T22) ] . */ + +/* Here In-1 is the identity matrix of size n-1. kron(X, Y) is the */ +/* Kronecker product between the matrices X and Y. */ + +/* Note that if the default method for computing DIF(i) is wanted */ +/* (see DLATDF), then the parameter DIFDRI (see below) should be */ +/* changed from 3 to 4 (routine DLATDF(IJOB = 2 will be used)). */ +/* See DTGSYL for more details. */ + +/* b) If the i-th and (i+1)-th eigenvalues are complex conjugate pair, */ + +/* Suppose U and V are orthogonal transformations such that */ + +/* U'*(A, B)*V = (S, T) = ( S11 * ) ( T11 * ) 2 */ +/* ( 0 S22 ),( 0 T22) n-2 */ +/* 2 n-2 2 n-2 */ + +/* and (S11, T11) corresponds to the complex conjugate eigenvalue */ +/* pair (w, conjg(w)). There exist unitary matrices U1 and V1 such */ +/* that */ + +/* U1'*S11*V1 = ( s11 s12 ) and U1'*T11*V1 = ( t11 t12 ) */ +/* ( 0 s22 ) ( 0 t22 ) */ + +/* where the generalized eigenvalues w = s11/t11 and */ +/* conjg(w) = s22/t22. */ + +/* Then the reciprocal condition number DIF(i) is bounded by */ + +/* min( d1, max( 1, |real(s11)/real(s22)| )*d2 ) */ + +/* where, d1 = Difl((s11, t11), (s22, t22)) = sigma-min(Z1), where */ +/* Z1 is the complex 2-by-2 matrix */ + +/* Z1 = [ s11 -s22 ] */ +/* [ t11 -t22 ], */ + +/* This is done by computing (using real arithmetic) the */ +/* roots of the characteristical polynomial det(Z1' * Z1 - lambda I), */ +/* where Z1' denotes the conjugate transpose of Z1 and det(X) denotes */ +/* the determinant of X. */ + +/* and d2 is an upper bound on Difl((S11, T11), (S22, T22)), i.e. an */ +/* upper bound on sigma-min(Z2), where Z2 is (2n-2)-by-(2n-2) */ + +/* Z2 = [ kron(S11', In-2) -kron(I2, S22) ] */ +/* [ kron(T11', In-2) -kron(I2, T22) ] */ + +/* Note that if the default method for computing DIF is wanted (see */ +/* DLATDF), then the parameter DIFDRI (see below) should be changed */ +/* from 3 to 4 (routine DLATDF(IJOB = 2 will be used)). See DTGSYL */ +/* for more details. */ + +/* For each eigenvalue/vector specified by SELECT, DIF stores a */ +/* Frobenius norm-based estimate of Difl. */ + +/* An approximate error bound for the i-th computed eigenvector VL(i) or */ +/* VR(i) is given by */ + +/* EPS * norm(A, B) / DIF(i). */ + +/* See ref. [2-3] for more details and further references. */ + +/* Based on contributions by */ +/* Bo Kagstrom and Peter Poromaa, Department of Computing Science, */ +/* Umea University, S-901 87 Umea, Sweden. */ + +/* References */ +/* ========== */ + +/* [1] B. Kagstrom; A Direct Method for Reordering Eigenvalues in the */ +/* Generalized Real Schur Form of a Regular Matrix Pair (A, B), in */ +/* M.S. Moonen et al (eds), Linear Algebra for Large Scale and */ +/* Real-Time Applications, Kluwer Academic Publ. 1993, pp 195-218. */ + +/* [2] B. Kagstrom and P. Poromaa; Computing Eigenspaces with Specified */ +/* Eigenvalues of a Regular Matrix Pair (A, B) and Condition */ +/* Estimation: Theory, Algorithms and Software, */ +/* Report UMINF - 94.04, Department of Computing Science, Umea */ +/* University, S-901 87 Umea, Sweden, 1994. Also as LAPACK Working */ +/* Note 87. To appear in Numerical Algorithms, 1996. */ + +/* [3] B. Kagstrom and P. Poromaa, LAPACK-Style Algorithms and Software */ +/* for Solving the Generalized Sylvester Equation and Estimating the */ +/* Separation between Regular Matrix Pairs, Report UMINF - 93.23, */ +/* Department of Computing Science, Umea University, S-901 87 Umea, */ +/* Sweden, December 1993, Revised April 1994, Also as LAPACK Working */ +/* Note 75. To appear in ACM Trans. on Math. Software, Vol 22, */ +/* No 1, 1996. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Decode and test the input parameters */ + + /* Parameter adjustments */ + --select; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + vl_dim1 = *ldvl; + vl_offset = 1 + vl_dim1; + vl -= vl_offset; + vr_dim1 = *ldvr; + vr_offset = 1 + vr_dim1; + vr -= vr_offset; + --s; + --dif; + --work; + --iwork; + + /* Function Body */ + wantbh = _starpu_lsame_(job, "B"); + wants = _starpu_lsame_(job, "E") || wantbh; + wantdf = _starpu_lsame_(job, "V") || wantbh; + + somcon = _starpu_lsame_(howmny, "S"); + + *info = 0; + lquery = *lwork == -1; + + if (! wants && ! wantdf) { + *info = -1; + } else if (! _starpu_lsame_(howmny, "A") && ! somcon) { + *info = -2; + } else if (*n < 0) { + *info = -4; + } else if (*lda < max(1,*n)) { + *info = -6; + } else if (*ldb < max(1,*n)) { + *info = -8; + } else if (wants && *ldvl < *n) { + *info = -10; + } else if (wants && *ldvr < *n) { + *info = -12; + } else { + +/* Set M to the number of eigenpairs for which condition numbers */ +/* are required, and test MM. */ + + if (somcon) { + *m = 0; + pair = FALSE_; + i__1 = *n; + for (k = 1; k <= i__1; ++k) { + if (pair) { + pair = FALSE_; + } else { + if (k < *n) { + if (a[k + 1 + k * a_dim1] == 0.) { + if (select[k]) { + ++(*m); + } + } else { + pair = TRUE_; + if (select[k] || select[k + 1]) { + *m += 2; + } + } + } else { + if (select[*n]) { + ++(*m); + } + } + } +/* L10: */ + } + } else { + *m = *n; + } + + if (*n == 0) { + lwmin = 1; + } else if (_starpu_lsame_(job, "V") || _starpu_lsame_(job, + "B")) { + lwmin = (*n << 1) * (*n + 2) + 16; + } else { + lwmin = *n; + } + work[1] = (doublereal) lwmin; + + if (*mm < *m) { + *info = -15; + } else if (*lwork < lwmin && ! lquery) { + *info = -18; + } + } + + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DTGSNA", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Get machine constants */ + + eps = _starpu_dlamch_("P"); + smlnum = _starpu_dlamch_("S") / eps; + ks = 0; + pair = FALSE_; + + i__1 = *n; + for (k = 1; k <= i__1; ++k) { + +/* Determine whether A(k,k) begins a 1-by-1 or 2-by-2 block. */ + + if (pair) { + pair = FALSE_; + goto L20; + } else { + if (k < *n) { + pair = a[k + 1 + k * a_dim1] != 0.; + } + } + +/* Determine whether condition numbers are required for the k-th */ +/* eigenpair. */ + + if (somcon) { + if (pair) { + if (! select[k] && ! select[k + 1]) { + goto L20; + } + } else { + if (! select[k]) { + goto L20; + } + } + } + + ++ks; + + if (wants) { + +/* Compute the reciprocal condition number of the k-th */ +/* eigenvalue. */ + + if (pair) { + +/* Complex eigenvalue pair. */ + + d__1 = _starpu_dnrm2_(n, &vr[ks * vr_dim1 + 1], &c__1); + d__2 = _starpu_dnrm2_(n, &vr[(ks + 1) * vr_dim1 + 1], &c__1); + rnrm = _starpu_dlapy2_(&d__1, &d__2); + d__1 = _starpu_dnrm2_(n, &vl[ks * vl_dim1 + 1], &c__1); + d__2 = _starpu_dnrm2_(n, &vl[(ks + 1) * vl_dim1 + 1], &c__1); + lnrm = _starpu_dlapy2_(&d__1, &d__2); + _starpu_dgemv_("N", n, n, &c_b19, &a[a_offset], lda, &vr[ks * vr_dim1 + + 1], &c__1, &c_b21, &work[1], &c__1); + tmprr = _starpu_ddot_(n, &work[1], &c__1, &vl[ks * vl_dim1 + 1], & + c__1); + tmpri = _starpu_ddot_(n, &work[1], &c__1, &vl[(ks + 1) * vl_dim1 + 1], + &c__1); + _starpu_dgemv_("N", n, n, &c_b19, &a[a_offset], lda, &vr[(ks + 1) * + vr_dim1 + 1], &c__1, &c_b21, &work[1], &c__1); + tmpii = _starpu_ddot_(n, &work[1], &c__1, &vl[(ks + 1) * vl_dim1 + 1], + &c__1); + tmpir = _starpu_ddot_(n, &work[1], &c__1, &vl[ks * vl_dim1 + 1], & + c__1); + uhav = tmprr + tmpii; + uhavi = tmpir - tmpri; + _starpu_dgemv_("N", n, n, &c_b19, &b[b_offset], ldb, &vr[ks * vr_dim1 + + 1], &c__1, &c_b21, &work[1], &c__1); + tmprr = _starpu_ddot_(n, &work[1], &c__1, &vl[ks * vl_dim1 + 1], & + c__1); + tmpri = _starpu_ddot_(n, &work[1], &c__1, &vl[(ks + 1) * vl_dim1 + 1], + &c__1); + _starpu_dgemv_("N", n, n, &c_b19, &b[b_offset], ldb, &vr[(ks + 1) * + vr_dim1 + 1], &c__1, &c_b21, &work[1], &c__1); + tmpii = _starpu_ddot_(n, &work[1], &c__1, &vl[(ks + 1) * vl_dim1 + 1], + &c__1); + tmpir = _starpu_ddot_(n, &work[1], &c__1, &vl[ks * vl_dim1 + 1], & + c__1); + uhbv = tmprr + tmpii; + uhbvi = tmpir - tmpri; + uhav = _starpu_dlapy2_(&uhav, &uhavi); + uhbv = _starpu_dlapy2_(&uhbv, &uhbvi); + cond = _starpu_dlapy2_(&uhav, &uhbv); + s[ks] = cond / (rnrm * lnrm); + s[ks + 1] = s[ks]; + + } else { + +/* Real eigenvalue. */ + + rnrm = _starpu_dnrm2_(n, &vr[ks * vr_dim1 + 1], &c__1); + lnrm = _starpu_dnrm2_(n, &vl[ks * vl_dim1 + 1], &c__1); + _starpu_dgemv_("N", n, n, &c_b19, &a[a_offset], lda, &vr[ks * vr_dim1 + + 1], &c__1, &c_b21, &work[1], &c__1); + uhav = _starpu_ddot_(n, &work[1], &c__1, &vl[ks * vl_dim1 + 1], &c__1) + ; + _starpu_dgemv_("N", n, n, &c_b19, &b[b_offset], ldb, &vr[ks * vr_dim1 + + 1], &c__1, &c_b21, &work[1], &c__1); + uhbv = _starpu_ddot_(n, &work[1], &c__1, &vl[ks * vl_dim1 + 1], &c__1) + ; + cond = _starpu_dlapy2_(&uhav, &uhbv); + if (cond == 0.) { + s[ks] = -1.; + } else { + s[ks] = cond / (rnrm * lnrm); + } + } + } + + if (wantdf) { + if (*n == 1) { + dif[ks] = _starpu_dlapy2_(&a[a_dim1 + 1], &b[b_dim1 + 1]); + goto L20; + } + +/* Estimate the reciprocal condition number of the k-th */ +/* eigenvectors. */ + if (pair) { + +/* Copy the 2-by 2 pencil beginning at (A(k,k), B(k, k)). */ +/* Compute the eigenvalue(s) at position K. */ + + work[1] = a[k + k * a_dim1]; + work[2] = a[k + 1 + k * a_dim1]; + work[3] = a[k + (k + 1) * a_dim1]; + work[4] = a[k + 1 + (k + 1) * a_dim1]; + work[5] = b[k + k * b_dim1]; + work[6] = b[k + 1 + k * b_dim1]; + work[7] = b[k + (k + 1) * b_dim1]; + work[8] = b[k + 1 + (k + 1) * b_dim1]; + d__1 = smlnum * eps; + _starpu_dlag2_(&work[1], &c__2, &work[5], &c__2, &d__1, &beta, dummy1, + &alphar, dummy, &alphai); + alprqt = 1.; + c1 = (alphar * alphar + alphai * alphai + beta * beta) * 2.; + c2 = beta * 4. * beta * alphai * alphai; + root1 = c1 + sqrt(c1 * c1 - c2 * 4.); + root2 = c2 / root1; + root1 /= 2.; +/* Computing MIN */ + d__1 = sqrt(root1), d__2 = sqrt(root2); + cond = min(d__1,d__2); + } + +/* Copy the matrix (A, B) to the array WORK and swap the */ +/* diagonal block beginning at A(k,k) to the (1,1) position. */ + + _starpu_dlacpy_("Full", n, n, &a[a_offset], lda, &work[1], n); + _starpu_dlacpy_("Full", n, n, &b[b_offset], ldb, &work[*n * *n + 1], n); + ifst = k; + ilst = 1; + + i__2 = *lwork - (*n << 1) * *n; + _starpu_dtgexc_(&c_false, &c_false, n, &work[1], n, &work[*n * *n + 1], n, + dummy, &c__1, dummy1, &c__1, &ifst, &ilst, &work[(*n * * + n << 1) + 1], &i__2, &ierr); + + if (ierr > 0) { + +/* Ill-conditioned problem - swap rejected. */ + + dif[ks] = 0.; + } else { + +/* Reordering successful, solve generalized Sylvester */ +/* equation for R and L, */ +/* A22 * R - L * A11 = A12 */ +/* B22 * R - L * B11 = B12, */ +/* and compute estimate of Difl((A11,B11), (A22, B22)). */ + + n1 = 1; + if (work[2] != 0.) { + n1 = 2; + } + n2 = *n - n1; + if (n2 == 0) { + dif[ks] = cond; + } else { + i__ = *n * *n + 1; + iz = (*n << 1) * *n + 1; + i__2 = *lwork - (*n << 1) * *n; + _starpu_dtgsyl_("N", &c__3, &n2, &n1, &work[*n * n1 + n1 + 1], n, + &work[1], n, &work[n1 + 1], n, &work[*n * n1 + n1 + + i__], n, &work[i__], n, &work[n1 + i__], n, & + scale, &dif[ks], &work[iz + 1], &i__2, &iwork[1], + &ierr); + + if (pair) { +/* Computing MIN */ + d__1 = max(1.,alprqt) * dif[ks]; + dif[ks] = min(d__1,cond); + } + } + } + if (pair) { + dif[ks + 1] = dif[ks]; + } + } + if (pair) { + ++ks; + } + +L20: + ; + } + work[1] = (doublereal) lwmin; + return 0; + +/* End of DTGSNA */ + +} /* _starpu_dtgsna_ */ diff --git a/min-dgels/base/SRC/dtgsy2.c b/min-dgels/base/SRC/dtgsy2.c new file mode 100644 index 0000000..993b9d2 --- /dev/null +++ b/min-dgels/base/SRC/dtgsy2.c @@ -0,0 +1,1113 @@ +/* dtgsy2.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__8 = 8; +static integer c__1 = 1; +static doublereal c_b27 = -1.; +static doublereal c_b42 = 1.; +static doublereal c_b56 = 0.; + +/* Subroutine */ int _starpu_dtgsy2_(char *trans, integer *ijob, integer *m, integer * + n, doublereal *a, integer *lda, doublereal *b, integer *ldb, + doublereal *c__, integer *ldc, doublereal *d__, integer *ldd, + doublereal *e, integer *lde, doublereal *f, integer *ldf, doublereal * + scale, doublereal *rdsum, doublereal *rdscal, integer *iwork, integer + *pq, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, d_dim1, + d_offset, e_dim1, e_offset, f_dim1, f_offset, i__1, i__2, i__3; + + /* Local variables */ + integer i__, j, k, p, q; + doublereal z__[64] /* was [8][8] */; + integer ie, je, mb, nb, ii, jj, is, js; + doublereal rhs[8]; + integer isp1, jsp1; + extern /* Subroutine */ int _starpu_dger_(integer *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *); + integer ierr, zdim, ipiv[8], jpiv[8]; + doublereal alpha; + extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, + integer *), _starpu_dgemm_(char *, char *, integer *, integer *, integer * +, doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *); + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int _starpu_dgemv_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *), _starpu_dcopy_(integer *, + doublereal *, integer *, doublereal *, integer *), _starpu_daxpy_(integer + *, doublereal *, doublereal *, integer *, doublereal *, integer *) + , _starpu_dgesc2_(integer *, doublereal *, integer *, doublereal *, + integer *, integer *, doublereal *), _starpu_dgetc2_(integer *, + doublereal *, integer *, integer *, integer *, integer *), + _starpu_dlatdf_(integer *, integer *, doublereal *, integer *, doublereal + *, doublereal *, doublereal *, integer *, integer *); + doublereal scaloc; + extern /* Subroutine */ int _starpu_dlaset_(char *, integer *, integer *, + doublereal *, doublereal *, doublereal *, integer *), + _starpu_xerbla_(char *, integer *); + logical notran; + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* January 2007 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DTGSY2 solves the generalized Sylvester equation: */ + +/* A * R - L * B = scale * C (1) */ +/* D * R - L * E = scale * F, */ + +/* using Level 1 and 2 BLAS. where R and L are unknown M-by-N matrices, */ +/* (A, D), (B, E) and (C, F) are given matrix pairs of size M-by-M, */ +/* N-by-N and M-by-N, respectively, with real entries. (A, D) and (B, E) */ +/* must be in generalized Schur canonical form, i.e. A, B are upper */ +/* quasi triangular and D, E are upper triangular. The solution (R, L) */ +/* overwrites (C, F). 0 <= SCALE <= 1 is an output scaling factor */ +/* chosen to avoid overflow. */ + +/* In matrix notation solving equation (1) corresponds to solve */ +/* Z*x = scale*b, where Z is defined as */ + +/* Z = [ kron(In, A) -kron(B', Im) ] (2) */ +/* [ kron(In, D) -kron(E', Im) ], */ + +/* Ik is the identity matrix of size k and X' is the transpose of X. */ +/* kron(X, Y) is the Kronecker product between the matrices X and Y. */ +/* In the process of solving (1), we solve a number of such systems */ +/* where Dim(In), Dim(In) = 1 or 2. */ + +/* If TRANS = 'T', solve the transposed system Z'*y = scale*b for y, */ +/* which is equivalent to solve for R and L in */ + +/* A' * R + D' * L = scale * C (3) */ +/* R * B' + L * E' = scale * -F */ + +/* This case is used to compute an estimate of Dif[(A, D), (B, E)] = */ +/* sigma_min(Z) using reverse communicaton with DLACON. */ + +/* DTGSY2 also (IJOB >= 1) contributes to the computation in DTGSYL */ +/* of an upper bound on the separation between to matrix pairs. Then */ +/* the input (A, D), (B, E) are sub-pencils of the matrix pair in */ +/* DTGSYL. See DTGSYL for details. */ + +/* Arguments */ +/* ========= */ + +/* TRANS (input) CHARACTER*1 */ +/* = 'N', solve the generalized Sylvester equation (1). */ +/* = 'T': solve the 'transposed' system (3). */ + +/* IJOB (input) INTEGER */ +/* Specifies what kind of functionality to be performed. */ +/* = 0: solve (1) only. */ +/* = 1: A contribution from this subsystem to a Frobenius */ +/* norm-based estimate of the separation between two matrix */ +/* pairs is computed. (look ahead strategy is used). */ +/* = 2: A contribution from this subsystem to a Frobenius */ +/* norm-based estimate of the separation between two matrix */ +/* pairs is computed. (DGECON on sub-systems is used.) */ +/* Not referenced if TRANS = 'T'. */ + +/* M (input) INTEGER */ +/* On entry, M specifies the order of A and D, and the row */ +/* dimension of C, F, R and L. */ + +/* N (input) INTEGER */ +/* On entry, N specifies the order of B and E, and the column */ +/* dimension of C, F, R and L. */ + +/* A (input) DOUBLE PRECISION array, dimension (LDA, M) */ +/* On entry, A contains an upper quasi triangular matrix. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the matrix A. LDA >= max(1, M). */ + +/* B (input) DOUBLE PRECISION array, dimension (LDB, N) */ +/* On entry, B contains an upper quasi triangular matrix. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the matrix B. LDB >= max(1, N). */ + +/* C (input/output) DOUBLE PRECISION array, dimension (LDC, N) */ +/* On entry, C contains the right-hand-side of the first matrix */ +/* equation in (1). */ +/* On exit, if IJOB = 0, C has been overwritten by the */ +/* solution R. */ + +/* LDC (input) INTEGER */ +/* The leading dimension of the matrix C. LDC >= max(1, M). */ + +/* D (input) DOUBLE PRECISION array, dimension (LDD, M) */ +/* On entry, D contains an upper triangular matrix. */ + +/* LDD (input) INTEGER */ +/* The leading dimension of the matrix D. LDD >= max(1, M). */ + +/* E (input) DOUBLE PRECISION array, dimension (LDE, N) */ +/* On entry, E contains an upper triangular matrix. */ + +/* LDE (input) INTEGER */ +/* The leading dimension of the matrix E. LDE >= max(1, N). */ + +/* F (input/output) DOUBLE PRECISION array, dimension (LDF, N) */ +/* On entry, F contains the right-hand-side of the second matrix */ +/* equation in (1). */ +/* On exit, if IJOB = 0, F has been overwritten by the */ +/* solution L. */ + +/* LDF (input) INTEGER */ +/* The leading dimension of the matrix F. LDF >= max(1, M). */ + +/* SCALE (output) DOUBLE PRECISION */ +/* On exit, 0 <= SCALE <= 1. If 0 < SCALE < 1, the solutions */ +/* R and L (C and F on entry) will hold the solutions to a */ +/* slightly perturbed system but the input matrices A, B, D and */ +/* E have not been changed. If SCALE = 0, R and L will hold the */ +/* solutions to the homogeneous system with C = F = 0. Normally, */ +/* SCALE = 1. */ + +/* RDSUM (input/output) DOUBLE PRECISION */ +/* On entry, the sum of squares of computed contributions to */ +/* the Dif-estimate under computation by DTGSYL, where the */ +/* scaling factor RDSCAL (see below) has been factored out. */ +/* On exit, the corresponding sum of squares updated with the */ +/* contributions from the current sub-system. */ +/* If TRANS = 'T' RDSUM is not touched. */ +/* NOTE: RDSUM only makes sense when DTGSY2 is called by DTGSYL. */ + +/* RDSCAL (input/output) DOUBLE PRECISION */ +/* On entry, scaling factor used to prevent overflow in RDSUM. */ +/* On exit, RDSCAL is updated w.r.t. the current contributions */ +/* in RDSUM. */ +/* If TRANS = 'T', RDSCAL is not touched. */ +/* NOTE: RDSCAL only makes sense when DTGSY2 is called by */ +/* DTGSYL. */ + +/* IWORK (workspace) INTEGER array, dimension (M+N+2) */ + +/* PQ (output) INTEGER */ +/* On exit, the number of subsystems (of size 2-by-2, 4-by-4 and */ +/* 8-by-8) solved by this routine. */ + +/* INFO (output) INTEGER */ +/* On exit, if INFO is set to */ +/* =0: Successful exit */ +/* <0: If INFO = -i, the i-th argument had an illegal value. */ +/* >0: The matrix pairs (A, D) and (B, E) have common or very */ +/* close eigenvalues. */ + +/* Further Details */ +/* =============== */ + +/* Based on contributions by */ +/* Bo Kagstrom and Peter Poromaa, Department of Computing Science, */ +/* Umea University, S-901 87 Umea, Sweden. */ + +/* ===================================================================== */ +/* Replaced various illegal calls to DCOPY by calls to DLASET. */ +/* Sven Hammarling, 27/5/02. */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Decode and test input parameters */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + d_dim1 = *ldd; + d_offset = 1 + d_dim1; + d__ -= d_offset; + e_dim1 = *lde; + e_offset = 1 + e_dim1; + e -= e_offset; + f_dim1 = *ldf; + f_offset = 1 + f_dim1; + f -= f_offset; + --iwork; + + /* Function Body */ + *info = 0; + ierr = 0; + notran = _starpu_lsame_(trans, "N"); + if (! notran && ! _starpu_lsame_(trans, "T")) { + *info = -1; + } else if (notran) { + if (*ijob < 0 || *ijob > 2) { + *info = -2; + } + } + if (*info == 0) { + if (*m <= 0) { + *info = -3; + } else if (*n <= 0) { + *info = -4; + } else if (*lda < max(1,*m)) { + *info = -5; + } else if (*ldb < max(1,*n)) { + *info = -8; + } else if (*ldc < max(1,*m)) { + *info = -10; + } else if (*ldd < max(1,*m)) { + *info = -12; + } else if (*lde < max(1,*n)) { + *info = -14; + } else if (*ldf < max(1,*m)) { + *info = -16; + } + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DTGSY2", &i__1); + return 0; + } + +/* Determine block structure of A */ + + *pq = 0; + p = 0; + i__ = 1; +L10: + if (i__ > *m) { + goto L20; + } + ++p; + iwork[p] = i__; + if (i__ == *m) { + goto L20; + } + if (a[i__ + 1 + i__ * a_dim1] != 0.) { + i__ += 2; + } else { + ++i__; + } + goto L10; +L20: + iwork[p + 1] = *m + 1; + +/* Determine block structure of B */ + + q = p + 1; + j = 1; +L30: + if (j > *n) { + goto L40; + } + ++q; + iwork[q] = j; + if (j == *n) { + goto L40; + } + if (b[j + 1 + j * b_dim1] != 0.) { + j += 2; + } else { + ++j; + } + goto L30; +L40: + iwork[q + 1] = *n + 1; + *pq = p * (q - p - 1); + + if (notran) { + +/* Solve (I, J) - subsystem */ +/* A(I, I) * R(I, J) - L(I, J) * B(J, J) = C(I, J) */ +/* D(I, I) * R(I, J) - L(I, J) * E(J, J) = F(I, J) */ +/* for I = P, P - 1, ..., 1; J = 1, 2, ..., Q */ + + *scale = 1.; + scaloc = 1.; + i__1 = q; + for (j = p + 2; j <= i__1; ++j) { + js = iwork[j]; + jsp1 = js + 1; + je = iwork[j + 1] - 1; + nb = je - js + 1; + for (i__ = p; i__ >= 1; --i__) { + + is = iwork[i__]; + isp1 = is + 1; + ie = iwork[i__ + 1] - 1; + mb = ie - is + 1; + zdim = mb * nb << 1; + + if (mb == 1 && nb == 1) { + +/* Build a 2-by-2 system Z * x = RHS */ + + z__[0] = a[is + is * a_dim1]; + z__[1] = d__[is + is * d_dim1]; + z__[8] = -b[js + js * b_dim1]; + z__[9] = -e[js + js * e_dim1]; + +/* Set up right hand side(s) */ + + rhs[0] = c__[is + js * c_dim1]; + rhs[1] = f[is + js * f_dim1]; + +/* Solve Z * x = RHS */ + + _starpu_dgetc2_(&zdim, z__, &c__8, ipiv, jpiv, &ierr); + if (ierr > 0) { + *info = ierr; + } + + if (*ijob == 0) { + _starpu_dgesc2_(&zdim, z__, &c__8, rhs, ipiv, jpiv, &scaloc); + if (scaloc != 1.) { + i__2 = *n; + for (k = 1; k <= i__2; ++k) { + _starpu_dscal_(m, &scaloc, &c__[k * c_dim1 + 1], & + c__1); + _starpu_dscal_(m, &scaloc, &f[k * f_dim1 + 1], &c__1); +/* L50: */ + } + *scale *= scaloc; + } + } else { + _starpu_dlatdf_(ijob, &zdim, z__, &c__8, rhs, rdsum, rdscal, + ipiv, jpiv); + } + +/* Unpack solution vector(s) */ + + c__[is + js * c_dim1] = rhs[0]; + f[is + js * f_dim1] = rhs[1]; + +/* Substitute R(I, J) and L(I, J) into remaining */ +/* equation. */ + + if (i__ > 1) { + alpha = -rhs[0]; + i__2 = is - 1; + _starpu_daxpy_(&i__2, &alpha, &a[is * a_dim1 + 1], &c__1, & + c__[js * c_dim1 + 1], &c__1); + i__2 = is - 1; + _starpu_daxpy_(&i__2, &alpha, &d__[is * d_dim1 + 1], &c__1, & + f[js * f_dim1 + 1], &c__1); + } + if (j < q) { + i__2 = *n - je; + _starpu_daxpy_(&i__2, &rhs[1], &b[js + (je + 1) * b_dim1], + ldb, &c__[is + (je + 1) * c_dim1], ldc); + i__2 = *n - je; + _starpu_daxpy_(&i__2, &rhs[1], &e[js + (je + 1) * e_dim1], + lde, &f[is + (je + 1) * f_dim1], ldf); + } + + } else if (mb == 1 && nb == 2) { + +/* Build a 4-by-4 system Z * x = RHS */ + + z__[0] = a[is + is * a_dim1]; + z__[1] = 0.; + z__[2] = d__[is + is * d_dim1]; + z__[3] = 0.; + + z__[8] = 0.; + z__[9] = a[is + is * a_dim1]; + z__[10] = 0.; + z__[11] = d__[is + is * d_dim1]; + + z__[16] = -b[js + js * b_dim1]; + z__[17] = -b[js + jsp1 * b_dim1]; + z__[18] = -e[js + js * e_dim1]; + z__[19] = -e[js + jsp1 * e_dim1]; + + z__[24] = -b[jsp1 + js * b_dim1]; + z__[25] = -b[jsp1 + jsp1 * b_dim1]; + z__[26] = 0.; + z__[27] = -e[jsp1 + jsp1 * e_dim1]; + +/* Set up right hand side(s) */ + + rhs[0] = c__[is + js * c_dim1]; + rhs[1] = c__[is + jsp1 * c_dim1]; + rhs[2] = f[is + js * f_dim1]; + rhs[3] = f[is + jsp1 * f_dim1]; + +/* Solve Z * x = RHS */ + + _starpu_dgetc2_(&zdim, z__, &c__8, ipiv, jpiv, &ierr); + if (ierr > 0) { + *info = ierr; + } + + if (*ijob == 0) { + _starpu_dgesc2_(&zdim, z__, &c__8, rhs, ipiv, jpiv, &scaloc); + if (scaloc != 1.) { + i__2 = *n; + for (k = 1; k <= i__2; ++k) { + _starpu_dscal_(m, &scaloc, &c__[k * c_dim1 + 1], & + c__1); + _starpu_dscal_(m, &scaloc, &f[k * f_dim1 + 1], &c__1); +/* L60: */ + } + *scale *= scaloc; + } + } else { + _starpu_dlatdf_(ijob, &zdim, z__, &c__8, rhs, rdsum, rdscal, + ipiv, jpiv); + } + +/* Unpack solution vector(s) */ + + c__[is + js * c_dim1] = rhs[0]; + c__[is + jsp1 * c_dim1] = rhs[1]; + f[is + js * f_dim1] = rhs[2]; + f[is + jsp1 * f_dim1] = rhs[3]; + +/* Substitute R(I, J) and L(I, J) into remaining */ +/* equation. */ + + if (i__ > 1) { + i__2 = is - 1; + _starpu_dger_(&i__2, &nb, &c_b27, &a[is * a_dim1 + 1], &c__1, + rhs, &c__1, &c__[js * c_dim1 + 1], ldc); + i__2 = is - 1; + _starpu_dger_(&i__2, &nb, &c_b27, &d__[is * d_dim1 + 1], & + c__1, rhs, &c__1, &f[js * f_dim1 + 1], ldf); + } + if (j < q) { + i__2 = *n - je; + _starpu_daxpy_(&i__2, &rhs[2], &b[js + (je + 1) * b_dim1], + ldb, &c__[is + (je + 1) * c_dim1], ldc); + i__2 = *n - je; + _starpu_daxpy_(&i__2, &rhs[2], &e[js + (je + 1) * e_dim1], + lde, &f[is + (je + 1) * f_dim1], ldf); + i__2 = *n - je; + _starpu_daxpy_(&i__2, &rhs[3], &b[jsp1 + (je + 1) * b_dim1], + ldb, &c__[is + (je + 1) * c_dim1], ldc); + i__2 = *n - je; + _starpu_daxpy_(&i__2, &rhs[3], &e[jsp1 + (je + 1) * e_dim1], + lde, &f[is + (je + 1) * f_dim1], ldf); + } + + } else if (mb == 2 && nb == 1) { + +/* Build a 4-by-4 system Z * x = RHS */ + + z__[0] = a[is + is * a_dim1]; + z__[1] = a[isp1 + is * a_dim1]; + z__[2] = d__[is + is * d_dim1]; + z__[3] = 0.; + + z__[8] = a[is + isp1 * a_dim1]; + z__[9] = a[isp1 + isp1 * a_dim1]; + z__[10] = d__[is + isp1 * d_dim1]; + z__[11] = d__[isp1 + isp1 * d_dim1]; + + z__[16] = -b[js + js * b_dim1]; + z__[17] = 0.; + z__[18] = -e[js + js * e_dim1]; + z__[19] = 0.; + + z__[24] = 0.; + z__[25] = -b[js + js * b_dim1]; + z__[26] = 0.; + z__[27] = -e[js + js * e_dim1]; + +/* Set up right hand side(s) */ + + rhs[0] = c__[is + js * c_dim1]; + rhs[1] = c__[isp1 + js * c_dim1]; + rhs[2] = f[is + js * f_dim1]; + rhs[3] = f[isp1 + js * f_dim1]; + +/* Solve Z * x = RHS */ + + _starpu_dgetc2_(&zdim, z__, &c__8, ipiv, jpiv, &ierr); + if (ierr > 0) { + *info = ierr; + } + if (*ijob == 0) { + _starpu_dgesc2_(&zdim, z__, &c__8, rhs, ipiv, jpiv, &scaloc); + if (scaloc != 1.) { + i__2 = *n; + for (k = 1; k <= i__2; ++k) { + _starpu_dscal_(m, &scaloc, &c__[k * c_dim1 + 1], & + c__1); + _starpu_dscal_(m, &scaloc, &f[k * f_dim1 + 1], &c__1); +/* L70: */ + } + *scale *= scaloc; + } + } else { + _starpu_dlatdf_(ijob, &zdim, z__, &c__8, rhs, rdsum, rdscal, + ipiv, jpiv); + } + +/* Unpack solution vector(s) */ + + c__[is + js * c_dim1] = rhs[0]; + c__[isp1 + js * c_dim1] = rhs[1]; + f[is + js * f_dim1] = rhs[2]; + f[isp1 + js * f_dim1] = rhs[3]; + +/* Substitute R(I, J) and L(I, J) into remaining */ +/* equation. */ + + if (i__ > 1) { + i__2 = is - 1; + _starpu_dgemv_("N", &i__2, &mb, &c_b27, &a[is * a_dim1 + 1], + lda, rhs, &c__1, &c_b42, &c__[js * c_dim1 + 1] +, &c__1); + i__2 = is - 1; + _starpu_dgemv_("N", &i__2, &mb, &c_b27, &d__[is * d_dim1 + 1], + ldd, rhs, &c__1, &c_b42, &f[js * f_dim1 + 1], + &c__1); + } + if (j < q) { + i__2 = *n - je; + _starpu_dger_(&mb, &i__2, &c_b42, &rhs[2], &c__1, &b[js + (je + + 1) * b_dim1], ldb, &c__[is + (je + 1) * + c_dim1], ldc); + i__2 = *n - je; + _starpu_dger_(&mb, &i__2, &c_b42, &rhs[2], &c__1, &e[js + (je + + 1) * e_dim1], lde, &f[is + (je + 1) * + f_dim1], ldf); + } + + } else if (mb == 2 && nb == 2) { + +/* Build an 8-by-8 system Z * x = RHS */ + + _starpu_dlaset_("F", &c__8, &c__8, &c_b56, &c_b56, z__, &c__8); + + z__[0] = a[is + is * a_dim1]; + z__[1] = a[isp1 + is * a_dim1]; + z__[4] = d__[is + is * d_dim1]; + + z__[8] = a[is + isp1 * a_dim1]; + z__[9] = a[isp1 + isp1 * a_dim1]; + z__[12] = d__[is + isp1 * d_dim1]; + z__[13] = d__[isp1 + isp1 * d_dim1]; + + z__[18] = a[is + is * a_dim1]; + z__[19] = a[isp1 + is * a_dim1]; + z__[22] = d__[is + is * d_dim1]; + + z__[26] = a[is + isp1 * a_dim1]; + z__[27] = a[isp1 + isp1 * a_dim1]; + z__[30] = d__[is + isp1 * d_dim1]; + z__[31] = d__[isp1 + isp1 * d_dim1]; + + z__[32] = -b[js + js * b_dim1]; + z__[34] = -b[js + jsp1 * b_dim1]; + z__[36] = -e[js + js * e_dim1]; + z__[38] = -e[js + jsp1 * e_dim1]; + + z__[41] = -b[js + js * b_dim1]; + z__[43] = -b[js + jsp1 * b_dim1]; + z__[45] = -e[js + js * e_dim1]; + z__[47] = -e[js + jsp1 * e_dim1]; + + z__[48] = -b[jsp1 + js * b_dim1]; + z__[50] = -b[jsp1 + jsp1 * b_dim1]; + z__[54] = -e[jsp1 + jsp1 * e_dim1]; + + z__[57] = -b[jsp1 + js * b_dim1]; + z__[59] = -b[jsp1 + jsp1 * b_dim1]; + z__[63] = -e[jsp1 + jsp1 * e_dim1]; + +/* Set up right hand side(s) */ + + k = 1; + ii = mb * nb + 1; + i__2 = nb - 1; + for (jj = 0; jj <= i__2; ++jj) { + _starpu_dcopy_(&mb, &c__[is + (js + jj) * c_dim1], &c__1, & + rhs[k - 1], &c__1); + _starpu_dcopy_(&mb, &f[is + (js + jj) * f_dim1], &c__1, &rhs[ + ii - 1], &c__1); + k += mb; + ii += mb; +/* L80: */ + } + +/* Solve Z * x = RHS */ + + _starpu_dgetc2_(&zdim, z__, &c__8, ipiv, jpiv, &ierr); + if (ierr > 0) { + *info = ierr; + } + if (*ijob == 0) { + _starpu_dgesc2_(&zdim, z__, &c__8, rhs, ipiv, jpiv, &scaloc); + if (scaloc != 1.) { + i__2 = *n; + for (k = 1; k <= i__2; ++k) { + _starpu_dscal_(m, &scaloc, &c__[k * c_dim1 + 1], & + c__1); + _starpu_dscal_(m, &scaloc, &f[k * f_dim1 + 1], &c__1); +/* L90: */ + } + *scale *= scaloc; + } + } else { + _starpu_dlatdf_(ijob, &zdim, z__, &c__8, rhs, rdsum, rdscal, + ipiv, jpiv); + } + +/* Unpack solution vector(s) */ + + k = 1; + ii = mb * nb + 1; + i__2 = nb - 1; + for (jj = 0; jj <= i__2; ++jj) { + _starpu_dcopy_(&mb, &rhs[k - 1], &c__1, &c__[is + (js + jj) * + c_dim1], &c__1); + _starpu_dcopy_(&mb, &rhs[ii - 1], &c__1, &f[is + (js + jj) * + f_dim1], &c__1); + k += mb; + ii += mb; +/* L100: */ + } + +/* Substitute R(I, J) and L(I, J) into remaining */ +/* equation. */ + + if (i__ > 1) { + i__2 = is - 1; + _starpu_dgemm_("N", "N", &i__2, &nb, &mb, &c_b27, &a[is * + a_dim1 + 1], lda, rhs, &mb, &c_b42, &c__[js * + c_dim1 + 1], ldc); + i__2 = is - 1; + _starpu_dgemm_("N", "N", &i__2, &nb, &mb, &c_b27, &d__[is * + d_dim1 + 1], ldd, rhs, &mb, &c_b42, &f[js * + f_dim1 + 1], ldf); + } + if (j < q) { + k = mb * nb + 1; + i__2 = *n - je; + _starpu_dgemm_("N", "N", &mb, &i__2, &nb, &c_b42, &rhs[k - 1], + &mb, &b[js + (je + 1) * b_dim1], ldb, &c_b42, + &c__[is + (je + 1) * c_dim1], ldc); + i__2 = *n - je; + _starpu_dgemm_("N", "N", &mb, &i__2, &nb, &c_b42, &rhs[k - 1], + &mb, &e[js + (je + 1) * e_dim1], lde, &c_b42, + &f[is + (je + 1) * f_dim1], ldf); + } + + } + +/* L110: */ + } +/* L120: */ + } + } else { + +/* Solve (I, J) - subsystem */ +/* A(I, I)' * R(I, J) + D(I, I)' * L(J, J) = C(I, J) */ +/* R(I, I) * B(J, J) + L(I, J) * E(J, J) = -F(I, J) */ +/* for I = 1, 2, ..., P, J = Q, Q - 1, ..., 1 */ + + *scale = 1.; + scaloc = 1.; + i__1 = p; + for (i__ = 1; i__ <= i__1; ++i__) { + + is = iwork[i__]; + isp1 = is + 1; + ie = i__; + mb = ie - is + 1; + i__2 = p + 2; + for (j = q; j >= i__2; --j) { + + js = iwork[j]; + jsp1 = js + 1; + je = iwork[j + 1] - 1; + nb = je - js + 1; + zdim = mb * nb << 1; + if (mb == 1 && nb == 1) { + +/* Build a 2-by-2 system Z' * x = RHS */ + + z__[0] = a[is + is * a_dim1]; + z__[1] = -b[js + js * b_dim1]; + z__[8] = d__[is + is * d_dim1]; + z__[9] = -e[js + js * e_dim1]; + +/* Set up right hand side(s) */ + + rhs[0] = c__[is + js * c_dim1]; + rhs[1] = f[is + js * f_dim1]; + +/* Solve Z' * x = RHS */ + + _starpu_dgetc2_(&zdim, z__, &c__8, ipiv, jpiv, &ierr); + if (ierr > 0) { + *info = ierr; + } + + _starpu_dgesc2_(&zdim, z__, &c__8, rhs, ipiv, jpiv, &scaloc); + if (scaloc != 1.) { + i__3 = *n; + for (k = 1; k <= i__3; ++k) { + _starpu_dscal_(m, &scaloc, &c__[k * c_dim1 + 1], &c__1); + _starpu_dscal_(m, &scaloc, &f[k * f_dim1 + 1], &c__1); +/* L130: */ + } + *scale *= scaloc; + } + +/* Unpack solution vector(s) */ + + c__[is + js * c_dim1] = rhs[0]; + f[is + js * f_dim1] = rhs[1]; + +/* Substitute R(I, J) and L(I, J) into remaining */ +/* equation. */ + + if (j > p + 2) { + alpha = rhs[0]; + i__3 = js - 1; + _starpu_daxpy_(&i__3, &alpha, &b[js * b_dim1 + 1], &c__1, &f[ + is + f_dim1], ldf); + alpha = rhs[1]; + i__3 = js - 1; + _starpu_daxpy_(&i__3, &alpha, &e[js * e_dim1 + 1], &c__1, &f[ + is + f_dim1], ldf); + } + if (i__ < p) { + alpha = -rhs[0]; + i__3 = *m - ie; + _starpu_daxpy_(&i__3, &alpha, &a[is + (ie + 1) * a_dim1], lda, + &c__[ie + 1 + js * c_dim1], &c__1); + alpha = -rhs[1]; + i__3 = *m - ie; + _starpu_daxpy_(&i__3, &alpha, &d__[is + (ie + 1) * d_dim1], + ldd, &c__[ie + 1 + js * c_dim1], &c__1); + } + + } else if (mb == 1 && nb == 2) { + +/* Build a 4-by-4 system Z' * x = RHS */ + + z__[0] = a[is + is * a_dim1]; + z__[1] = 0.; + z__[2] = -b[js + js * b_dim1]; + z__[3] = -b[jsp1 + js * b_dim1]; + + z__[8] = 0.; + z__[9] = a[is + is * a_dim1]; + z__[10] = -b[js + jsp1 * b_dim1]; + z__[11] = -b[jsp1 + jsp1 * b_dim1]; + + z__[16] = d__[is + is * d_dim1]; + z__[17] = 0.; + z__[18] = -e[js + js * e_dim1]; + z__[19] = 0.; + + z__[24] = 0.; + z__[25] = d__[is + is * d_dim1]; + z__[26] = -e[js + jsp1 * e_dim1]; + z__[27] = -e[jsp1 + jsp1 * e_dim1]; + +/* Set up right hand side(s) */ + + rhs[0] = c__[is + js * c_dim1]; + rhs[1] = c__[is + jsp1 * c_dim1]; + rhs[2] = f[is + js * f_dim1]; + rhs[3] = f[is + jsp1 * f_dim1]; + +/* Solve Z' * x = RHS */ + + _starpu_dgetc2_(&zdim, z__, &c__8, ipiv, jpiv, &ierr); + if (ierr > 0) { + *info = ierr; + } + _starpu_dgesc2_(&zdim, z__, &c__8, rhs, ipiv, jpiv, &scaloc); + if (scaloc != 1.) { + i__3 = *n; + for (k = 1; k <= i__3; ++k) { + _starpu_dscal_(m, &scaloc, &c__[k * c_dim1 + 1], &c__1); + _starpu_dscal_(m, &scaloc, &f[k * f_dim1 + 1], &c__1); +/* L140: */ + } + *scale *= scaloc; + } + +/* Unpack solution vector(s) */ + + c__[is + js * c_dim1] = rhs[0]; + c__[is + jsp1 * c_dim1] = rhs[1]; + f[is + js * f_dim1] = rhs[2]; + f[is + jsp1 * f_dim1] = rhs[3]; + +/* Substitute R(I, J) and L(I, J) into remaining */ +/* equation. */ + + if (j > p + 2) { + i__3 = js - 1; + _starpu_daxpy_(&i__3, rhs, &b[js * b_dim1 + 1], &c__1, &f[is + + f_dim1], ldf); + i__3 = js - 1; + _starpu_daxpy_(&i__3, &rhs[1], &b[jsp1 * b_dim1 + 1], &c__1, & + f[is + f_dim1], ldf); + i__3 = js - 1; + _starpu_daxpy_(&i__3, &rhs[2], &e[js * e_dim1 + 1], &c__1, &f[ + is + f_dim1], ldf); + i__3 = js - 1; + _starpu_daxpy_(&i__3, &rhs[3], &e[jsp1 * e_dim1 + 1], &c__1, & + f[is + f_dim1], ldf); + } + if (i__ < p) { + i__3 = *m - ie; + _starpu_dger_(&i__3, &nb, &c_b27, &a[is + (ie + 1) * a_dim1], + lda, rhs, &c__1, &c__[ie + 1 + js * c_dim1], + ldc); + i__3 = *m - ie; + _starpu_dger_(&i__3, &nb, &c_b27, &d__[is + (ie + 1) * d_dim1] +, ldd, &rhs[2], &c__1, &c__[ie + 1 + js * + c_dim1], ldc); + } + + } else if (mb == 2 && nb == 1) { + +/* Build a 4-by-4 system Z' * x = RHS */ + + z__[0] = a[is + is * a_dim1]; + z__[1] = a[is + isp1 * a_dim1]; + z__[2] = -b[js + js * b_dim1]; + z__[3] = 0.; + + z__[8] = a[isp1 + is * a_dim1]; + z__[9] = a[isp1 + isp1 * a_dim1]; + z__[10] = 0.; + z__[11] = -b[js + js * b_dim1]; + + z__[16] = d__[is + is * d_dim1]; + z__[17] = d__[is + isp1 * d_dim1]; + z__[18] = -e[js + js * e_dim1]; + z__[19] = 0.; + + z__[24] = 0.; + z__[25] = d__[isp1 + isp1 * d_dim1]; + z__[26] = 0.; + z__[27] = -e[js + js * e_dim1]; + +/* Set up right hand side(s) */ + + rhs[0] = c__[is + js * c_dim1]; + rhs[1] = c__[isp1 + js * c_dim1]; + rhs[2] = f[is + js * f_dim1]; + rhs[3] = f[isp1 + js * f_dim1]; + +/* Solve Z' * x = RHS */ + + _starpu_dgetc2_(&zdim, z__, &c__8, ipiv, jpiv, &ierr); + if (ierr > 0) { + *info = ierr; + } + + _starpu_dgesc2_(&zdim, z__, &c__8, rhs, ipiv, jpiv, &scaloc); + if (scaloc != 1.) { + i__3 = *n; + for (k = 1; k <= i__3; ++k) { + _starpu_dscal_(m, &scaloc, &c__[k * c_dim1 + 1], &c__1); + _starpu_dscal_(m, &scaloc, &f[k * f_dim1 + 1], &c__1); +/* L150: */ + } + *scale *= scaloc; + } + +/* Unpack solution vector(s) */ + + c__[is + js * c_dim1] = rhs[0]; + c__[isp1 + js * c_dim1] = rhs[1]; + f[is + js * f_dim1] = rhs[2]; + f[isp1 + js * f_dim1] = rhs[3]; + +/* Substitute R(I, J) and L(I, J) into remaining */ +/* equation. */ + + if (j > p + 2) { + i__3 = js - 1; + _starpu_dger_(&mb, &i__3, &c_b42, rhs, &c__1, &b[js * b_dim1 + + 1], &c__1, &f[is + f_dim1], ldf); + i__3 = js - 1; + _starpu_dger_(&mb, &i__3, &c_b42, &rhs[2], &c__1, &e[js * + e_dim1 + 1], &c__1, &f[is + f_dim1], ldf); + } + if (i__ < p) { + i__3 = *m - ie; + _starpu_dgemv_("T", &mb, &i__3, &c_b27, &a[is + (ie + 1) * + a_dim1], lda, rhs, &c__1, &c_b42, &c__[ie + 1 + + js * c_dim1], &c__1); + i__3 = *m - ie; + _starpu_dgemv_("T", &mb, &i__3, &c_b27, &d__[is + (ie + 1) * + d_dim1], ldd, &rhs[2], &c__1, &c_b42, &c__[ie + + 1 + js * c_dim1], &c__1); + } + + } else if (mb == 2 && nb == 2) { + +/* Build an 8-by-8 system Z' * x = RHS */ + + _starpu_dlaset_("F", &c__8, &c__8, &c_b56, &c_b56, z__, &c__8); + + z__[0] = a[is + is * a_dim1]; + z__[1] = a[is + isp1 * a_dim1]; + z__[4] = -b[js + js * b_dim1]; + z__[6] = -b[jsp1 + js * b_dim1]; + + z__[8] = a[isp1 + is * a_dim1]; + z__[9] = a[isp1 + isp1 * a_dim1]; + z__[13] = -b[js + js * b_dim1]; + z__[15] = -b[jsp1 + js * b_dim1]; + + z__[18] = a[is + is * a_dim1]; + z__[19] = a[is + isp1 * a_dim1]; + z__[20] = -b[js + jsp1 * b_dim1]; + z__[22] = -b[jsp1 + jsp1 * b_dim1]; + + z__[26] = a[isp1 + is * a_dim1]; + z__[27] = a[isp1 + isp1 * a_dim1]; + z__[29] = -b[js + jsp1 * b_dim1]; + z__[31] = -b[jsp1 + jsp1 * b_dim1]; + + z__[32] = d__[is + is * d_dim1]; + z__[33] = d__[is + isp1 * d_dim1]; + z__[36] = -e[js + js * e_dim1]; + + z__[41] = d__[isp1 + isp1 * d_dim1]; + z__[45] = -e[js + js * e_dim1]; + + z__[50] = d__[is + is * d_dim1]; + z__[51] = d__[is + isp1 * d_dim1]; + z__[52] = -e[js + jsp1 * e_dim1]; + z__[54] = -e[jsp1 + jsp1 * e_dim1]; + + z__[59] = d__[isp1 + isp1 * d_dim1]; + z__[61] = -e[js + jsp1 * e_dim1]; + z__[63] = -e[jsp1 + jsp1 * e_dim1]; + +/* Set up right hand side(s) */ + + k = 1; + ii = mb * nb + 1; + i__3 = nb - 1; + for (jj = 0; jj <= i__3; ++jj) { + _starpu_dcopy_(&mb, &c__[is + (js + jj) * c_dim1], &c__1, & + rhs[k - 1], &c__1); + _starpu_dcopy_(&mb, &f[is + (js + jj) * f_dim1], &c__1, &rhs[ + ii - 1], &c__1); + k += mb; + ii += mb; +/* L160: */ + } + + +/* Solve Z' * x = RHS */ + + _starpu_dgetc2_(&zdim, z__, &c__8, ipiv, jpiv, &ierr); + if (ierr > 0) { + *info = ierr; + } + + _starpu_dgesc2_(&zdim, z__, &c__8, rhs, ipiv, jpiv, &scaloc); + if (scaloc != 1.) { + i__3 = *n; + for (k = 1; k <= i__3; ++k) { + _starpu_dscal_(m, &scaloc, &c__[k * c_dim1 + 1], &c__1); + _starpu_dscal_(m, &scaloc, &f[k * f_dim1 + 1], &c__1); +/* L170: */ + } + *scale *= scaloc; + } + +/* Unpack solution vector(s) */ + + k = 1; + ii = mb * nb + 1; + i__3 = nb - 1; + for (jj = 0; jj <= i__3; ++jj) { + _starpu_dcopy_(&mb, &rhs[k - 1], &c__1, &c__[is + (js + jj) * + c_dim1], &c__1); + _starpu_dcopy_(&mb, &rhs[ii - 1], &c__1, &f[is + (js + jj) * + f_dim1], &c__1); + k += mb; + ii += mb; +/* L180: */ + } + +/* Substitute R(I, J) and L(I, J) into remaining */ +/* equation. */ + + if (j > p + 2) { + i__3 = js - 1; + _starpu_dgemm_("N", "T", &mb, &i__3, &nb, &c_b42, &c__[is + + js * c_dim1], ldc, &b[js * b_dim1 + 1], ldb, & + c_b42, &f[is + f_dim1], ldf); + i__3 = js - 1; + _starpu_dgemm_("N", "T", &mb, &i__3, &nb, &c_b42, &f[is + js * + f_dim1], ldf, &e[js * e_dim1 + 1], lde, & + c_b42, &f[is + f_dim1], ldf); + } + if (i__ < p) { + i__3 = *m - ie; + _starpu_dgemm_("T", "N", &i__3, &nb, &mb, &c_b27, &a[is + (ie + + 1) * a_dim1], lda, &c__[is + js * c_dim1], + ldc, &c_b42, &c__[ie + 1 + js * c_dim1], ldc); + i__3 = *m - ie; + _starpu_dgemm_("T", "N", &i__3, &nb, &mb, &c_b27, &d__[is + ( + ie + 1) * d_dim1], ldd, &f[is + js * f_dim1], + ldf, &c_b42, &c__[ie + 1 + js * c_dim1], ldc); + } + + } + +/* L190: */ + } +/* L200: */ + } + + } + return 0; + +/* End of DTGSY2 */ + +} /* _starpu_dtgsy2_ */ diff --git a/min-dgels/base/SRC/dtgsyl.c b/min-dgels/base/SRC/dtgsyl.c new file mode 100644 index 0000000..9491814 --- /dev/null +++ b/min-dgels/base/SRC/dtgsyl.c @@ -0,0 +1,692 @@ +/* dtgsyl.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__2 = 2; +static integer c_n1 = -1; +static integer c__5 = 5; +static doublereal c_b14 = 0.; +static integer c__1 = 1; +static doublereal c_b51 = -1.; +static doublereal c_b52 = 1.; + +/* Subroutine */ int _starpu_dtgsyl_(char *trans, integer *ijob, integer *m, integer * + n, doublereal *a, integer *lda, doublereal *b, integer *ldb, + doublereal *c__, integer *ldc, doublereal *d__, integer *ldd, + doublereal *e, integer *lde, doublereal *f, integer *ldf, doublereal * + scale, doublereal *dif, doublereal *work, integer *lwork, integer * + iwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, d_dim1, + d_offset, e_dim1, e_offset, f_dim1, f_offset, i__1, i__2, i__3, + i__4; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + integer i__, j, k, p, q, ie, je, mb, nb, is, js, pq; + doublereal dsum; + integer ppqq; + extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, + integer *), _starpu_dgemm_(char *, char *, integer *, integer *, integer * +, doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *); + extern logical _starpu_lsame_(char *, char *); + integer ifunc, linfo, lwmin; + doublereal scale2; + extern /* Subroutine */ int _starpu_dtgsy2_(char *, integer *, integer *, integer + *, doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *, doublereal *, integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, doublereal *, doublereal *, + integer *, integer *, integer *); + doublereal dscale, scaloc; + extern /* Subroutine */ int _starpu_dlacpy_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *), + _starpu_dlaset_(char *, integer *, integer *, doublereal *, doublereal *, + doublereal *, integer *); + extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *); + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + integer iround; + logical notran; + integer isolve; + logical lquery; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DTGSYL solves the generalized Sylvester equation: */ + +/* A * R - L * B = scale * C (1) */ +/* D * R - L * E = scale * F */ + +/* where R and L are unknown m-by-n matrices, (A, D), (B, E) and */ +/* (C, F) are given matrix pairs of size m-by-m, n-by-n and m-by-n, */ +/* respectively, with real entries. (A, D) and (B, E) must be in */ +/* generalized (real) Schur canonical form, i.e. A, B are upper quasi */ +/* triangular and D, E are upper triangular. */ + +/* The solution (R, L) overwrites (C, F). 0 <= SCALE <= 1 is an output */ +/* scaling factor chosen to avoid overflow. */ + +/* In matrix notation (1) is equivalent to solve Zx = scale b, where */ +/* Z is defined as */ + +/* Z = [ kron(In, A) -kron(B', Im) ] (2) */ +/* [ kron(In, D) -kron(E', Im) ]. */ + +/* Here Ik is the identity matrix of size k and X' is the transpose of */ +/* X. kron(X, Y) is the Kronecker product between the matrices X and Y. */ + +/* If TRANS = 'T', DTGSYL solves the transposed system Z'*y = scale*b, */ +/* which is equivalent to solve for R and L in */ + +/* A' * R + D' * L = scale * C (3) */ +/* R * B' + L * E' = scale * (-F) */ + +/* This case (TRANS = 'T') is used to compute an one-norm-based estimate */ +/* of Dif[(A,D), (B,E)], the separation between the matrix pairs (A,D) */ +/* and (B,E), using DLACON. */ + +/* If IJOB >= 1, DTGSYL computes a Frobenius norm-based estimate */ +/* of Dif[(A,D),(B,E)]. That is, the reciprocal of a lower bound on the */ +/* reciprocal of the smallest singular value of Z. See [1-2] for more */ +/* information. */ + +/* This is a level 3 BLAS algorithm. */ + +/* Arguments */ +/* ========= */ + +/* TRANS (input) CHARACTER*1 */ +/* = 'N', solve the generalized Sylvester equation (1). */ +/* = 'T', solve the 'transposed' system (3). */ + +/* IJOB (input) INTEGER */ +/* Specifies what kind of functionality to be performed. */ +/* =0: solve (1) only. */ +/* =1: The functionality of 0 and 3. */ +/* =2: The functionality of 0 and 4. */ +/* =3: Only an estimate of Dif[(A,D), (B,E)] is computed. */ +/* (look ahead strategy IJOB = 1 is used). */ +/* =4: Only an estimate of Dif[(A,D), (B,E)] is computed. */ +/* ( DGECON on sub-systems is used ). */ +/* Not referenced if TRANS = 'T'. */ + +/* M (input) INTEGER */ +/* The order of the matrices A and D, and the row dimension of */ +/* the matrices C, F, R and L. */ + +/* N (input) INTEGER */ +/* The order of the matrices B and E, and the column dimension */ +/* of the matrices C, F, R and L. */ + +/* A (input) DOUBLE PRECISION array, dimension (LDA, M) */ +/* The upper quasi triangular matrix A. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1, M). */ + +/* B (input) DOUBLE PRECISION array, dimension (LDB, N) */ +/* The upper quasi triangular matrix B. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the array B. LDB >= max(1, N). */ + +/* C (input/output) DOUBLE PRECISION array, dimension (LDC, N) */ +/* On entry, C contains the right-hand-side of the first matrix */ +/* equation in (1) or (3). */ +/* On exit, if IJOB = 0, 1 or 2, C has been overwritten by */ +/* the solution R. If IJOB = 3 or 4 and TRANS = 'N', C holds R, */ +/* the solution achieved during the computation of the */ +/* Dif-estimate. */ + +/* LDC (input) INTEGER */ +/* The leading dimension of the array C. LDC >= max(1, M). */ + +/* D (input) DOUBLE PRECISION array, dimension (LDD, M) */ +/* The upper triangular matrix D. */ + +/* LDD (input) INTEGER */ +/* The leading dimension of the array D. LDD >= max(1, M). */ + +/* E (input) DOUBLE PRECISION array, dimension (LDE, N) */ +/* The upper triangular matrix E. */ + +/* LDE (input) INTEGER */ +/* The leading dimension of the array E. LDE >= max(1, N). */ + +/* F (input/output) DOUBLE PRECISION array, dimension (LDF, N) */ +/* On entry, F contains the right-hand-side of the second matrix */ +/* equation in (1) or (3). */ +/* On exit, if IJOB = 0, 1 or 2, F has been overwritten by */ +/* the solution L. If IJOB = 3 or 4 and TRANS = 'N', F holds L, */ +/* the solution achieved during the computation of the */ +/* Dif-estimate. */ + +/* LDF (input) INTEGER */ +/* The leading dimension of the array F. LDF >= max(1, M). */ + +/* DIF (output) DOUBLE PRECISION */ +/* On exit DIF is the reciprocal of a lower bound of the */ +/* reciprocal of the Dif-function, i.e. DIF is an upper bound of */ +/* Dif[(A,D), (B,E)] = sigma_min(Z), where Z as in (2). */ +/* IF IJOB = 0 or TRANS = 'T', DIF is not touched. */ + +/* SCALE (output) DOUBLE PRECISION */ +/* On exit SCALE is the scaling factor in (1) or (3). */ +/* If 0 < SCALE < 1, C and F hold the solutions R and L, resp., */ +/* to a slightly perturbed system but the input matrices A, B, D */ +/* and E have not been changed. If SCALE = 0, C and F hold the */ +/* solutions R and L, respectively, to the homogeneous system */ +/* with C = F = 0. Normally, SCALE = 1. */ + +/* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ + +/* LWORK (input) INTEGER */ +/* The dimension of the array WORK. LWORK > = 1. */ +/* If IJOB = 1 or 2 and TRANS = 'N', LWORK >= max(1,2*M*N). */ + +/* If LWORK = -1, then a workspace query is assumed; the routine */ +/* only calculates the optimal size of the WORK array, returns */ +/* this value as the first entry of the WORK array, and no error */ +/* message related to LWORK is issued by XERBLA. */ + +/* IWORK (workspace) INTEGER array, dimension (M+N+6) */ + +/* INFO (output) INTEGER */ +/* =0: successful exit */ +/* <0: If INFO = -i, the i-th argument had an illegal value. */ +/* >0: (A, D) and (B, E) have common or close eigenvalues. */ + +/* Further Details */ +/* =============== */ + +/* Based on contributions by */ +/* Bo Kagstrom and Peter Poromaa, Department of Computing Science, */ +/* Umea University, S-901 87 Umea, Sweden. */ + +/* [1] B. Kagstrom and P. Poromaa, LAPACK-Style Algorithms and Software */ +/* for Solving the Generalized Sylvester Equation and Estimating the */ +/* Separation between Regular Matrix Pairs, Report UMINF - 93.23, */ +/* Department of Computing Science, Umea University, S-901 87 Umea, */ +/* Sweden, December 1993, Revised April 1994, Also as LAPACK Working */ +/* Note 75. To appear in ACM Trans. on Math. Software, Vol 22, */ +/* No 1, 1996. */ + +/* [2] B. Kagstrom, A Perturbation Analysis of the Generalized Sylvester */ +/* Equation (AR - LB, DR - LE ) = (C, F), SIAM J. Matrix Anal. */ +/* Appl., 15(4):1045-1060, 1994 */ + +/* [3] B. Kagstrom and L. Westin, Generalized Schur Methods with */ +/* Condition Estimators for Solving the Generalized Sylvester */ +/* Equation, IEEE Transactions on Automatic Control, Vol. 34, No. 7, */ +/* July 1989, pp 745-751. */ + +/* ===================================================================== */ +/* Replaced various illegal calls to DCOPY by calls to DLASET. */ +/* Sven Hammarling, 1/5/02. */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Decode and test input parameters */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + d_dim1 = *ldd; + d_offset = 1 + d_dim1; + d__ -= d_offset; + e_dim1 = *lde; + e_offset = 1 + e_dim1; + e -= e_offset; + f_dim1 = *ldf; + f_offset = 1 + f_dim1; + f -= f_offset; + --work; + --iwork; + + /* Function Body */ + *info = 0; + notran = _starpu_lsame_(trans, "N"); + lquery = *lwork == -1; + + if (! notran && ! _starpu_lsame_(trans, "T")) { + *info = -1; + } else if (notran) { + if (*ijob < 0 || *ijob > 4) { + *info = -2; + } + } + if (*info == 0) { + if (*m <= 0) { + *info = -3; + } else if (*n <= 0) { + *info = -4; + } else if (*lda < max(1,*m)) { + *info = -6; + } else if (*ldb < max(1,*n)) { + *info = -8; + } else if (*ldc < max(1,*m)) { + *info = -10; + } else if (*ldd < max(1,*m)) { + *info = -12; + } else if (*lde < max(1,*n)) { + *info = -14; + } else if (*ldf < max(1,*m)) { + *info = -16; + } + } + + if (*info == 0) { + if (notran) { + if (*ijob == 1 || *ijob == 2) { +/* Computing MAX */ + i__1 = 1, i__2 = (*m << 1) * *n; + lwmin = max(i__1,i__2); + } else { + lwmin = 1; + } + } else { + lwmin = 1; + } + work[1] = (doublereal) lwmin; + + if (*lwork < lwmin && ! lquery) { + *info = -20; + } + } + + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DTGSYL", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + *scale = 1.; + if (notran) { + if (*ijob != 0) { + *dif = 0.; + } + } + return 0; + } + +/* Determine optimal block sizes MB and NB */ + + mb = _starpu_ilaenv_(&c__2, "DTGSYL", trans, m, n, &c_n1, &c_n1); + nb = _starpu_ilaenv_(&c__5, "DTGSYL", trans, m, n, &c_n1, &c_n1); + + isolve = 1; + ifunc = 0; + if (notran) { + if (*ijob >= 3) { + ifunc = *ijob - 2; + _starpu_dlaset_("F", m, n, &c_b14, &c_b14, &c__[c_offset], ldc) + ; + _starpu_dlaset_("F", m, n, &c_b14, &c_b14, &f[f_offset], ldf); + } else if (*ijob >= 1) { + isolve = 2; + } + } + + if (mb <= 1 && nb <= 1 || mb >= *m && nb >= *n) { + + i__1 = isolve; + for (iround = 1; iround <= i__1; ++iround) { + +/* Use unblocked Level 2 solver */ + + dscale = 0.; + dsum = 1.; + pq = 0; + _starpu_dtgsy2_(trans, &ifunc, m, n, &a[a_offset], lda, &b[b_offset], ldb, + &c__[c_offset], ldc, &d__[d_offset], ldd, &e[e_offset], + lde, &f[f_offset], ldf, scale, &dsum, &dscale, &iwork[1], + &pq, info); + if (dscale != 0.) { + if (*ijob == 1 || *ijob == 3) { + *dif = sqrt((doublereal) ((*m << 1) * *n)) / (dscale * + sqrt(dsum)); + } else { + *dif = sqrt((doublereal) pq) / (dscale * sqrt(dsum)); + } + } + + if (isolve == 2 && iround == 1) { + if (notran) { + ifunc = *ijob; + } + scale2 = *scale; + _starpu_dlacpy_("F", m, n, &c__[c_offset], ldc, &work[1], m); + _starpu_dlacpy_("F", m, n, &f[f_offset], ldf, &work[*m * *n + 1], m); + _starpu_dlaset_("F", m, n, &c_b14, &c_b14, &c__[c_offset], ldc); + _starpu_dlaset_("F", m, n, &c_b14, &c_b14, &f[f_offset], ldf); + } else if (isolve == 2 && iround == 2) { + _starpu_dlacpy_("F", m, n, &work[1], m, &c__[c_offset], ldc); + _starpu_dlacpy_("F", m, n, &work[*m * *n + 1], m, &f[f_offset], ldf); + *scale = scale2; + } +/* L30: */ + } + + return 0; + } + +/* Determine block structure of A */ + + p = 0; + i__ = 1; +L40: + if (i__ > *m) { + goto L50; + } + ++p; + iwork[p] = i__; + i__ += mb; + if (i__ >= *m) { + goto L50; + } + if (a[i__ + (i__ - 1) * a_dim1] != 0.) { + ++i__; + } + goto L40; +L50: + + iwork[p + 1] = *m + 1; + if (iwork[p] == iwork[p + 1]) { + --p; + } + +/* Determine block structure of B */ + + q = p + 1; + j = 1; +L60: + if (j > *n) { + goto L70; + } + ++q; + iwork[q] = j; + j += nb; + if (j >= *n) { + goto L70; + } + if (b[j + (j - 1) * b_dim1] != 0.) { + ++j; + } + goto L60; +L70: + + iwork[q + 1] = *n + 1; + if (iwork[q] == iwork[q + 1]) { + --q; + } + + if (notran) { + + i__1 = isolve; + for (iround = 1; iround <= i__1; ++iround) { + +/* Solve (I, J)-subsystem */ +/* A(I, I) * R(I, J) - L(I, J) * B(J, J) = C(I, J) */ +/* D(I, I) * R(I, J) - L(I, J) * E(J, J) = F(I, J) */ +/* for I = P, P - 1,..., 1; J = 1, 2,..., Q */ + + dscale = 0.; + dsum = 1.; + pq = 0; + *scale = 1.; + i__2 = q; + for (j = p + 2; j <= i__2; ++j) { + js = iwork[j]; + je = iwork[j + 1] - 1; + nb = je - js + 1; + for (i__ = p; i__ >= 1; --i__) { + is = iwork[i__]; + ie = iwork[i__ + 1] - 1; + mb = ie - is + 1; + ppqq = 0; + _starpu_dtgsy2_(trans, &ifunc, &mb, &nb, &a[is + is * a_dim1], + lda, &b[js + js * b_dim1], ldb, &c__[is + js * + c_dim1], ldc, &d__[is + is * d_dim1], ldd, &e[js + + js * e_dim1], lde, &f[is + js * f_dim1], ldf, & + scaloc, &dsum, &dscale, &iwork[q + 2], &ppqq, & + linfo); + if (linfo > 0) { + *info = linfo; + } + + pq += ppqq; + if (scaloc != 1.) { + i__3 = js - 1; + for (k = 1; k <= i__3; ++k) { + _starpu_dscal_(m, &scaloc, &c__[k * c_dim1 + 1], &c__1); + _starpu_dscal_(m, &scaloc, &f[k * f_dim1 + 1], &c__1); +/* L80: */ + } + i__3 = je; + for (k = js; k <= i__3; ++k) { + i__4 = is - 1; + _starpu_dscal_(&i__4, &scaloc, &c__[k * c_dim1 + 1], & + c__1); + i__4 = is - 1; + _starpu_dscal_(&i__4, &scaloc, &f[k * f_dim1 + 1], &c__1); +/* L90: */ + } + i__3 = je; + for (k = js; k <= i__3; ++k) { + i__4 = *m - ie; + _starpu_dscal_(&i__4, &scaloc, &c__[ie + 1 + k * c_dim1], + &c__1); + i__4 = *m - ie; + _starpu_dscal_(&i__4, &scaloc, &f[ie + 1 + k * f_dim1], & + c__1); +/* L100: */ + } + i__3 = *n; + for (k = je + 1; k <= i__3; ++k) { + _starpu_dscal_(m, &scaloc, &c__[k * c_dim1 + 1], &c__1); + _starpu_dscal_(m, &scaloc, &f[k * f_dim1 + 1], &c__1); +/* L110: */ + } + *scale *= scaloc; + } + +/* Substitute R(I, J) and L(I, J) into remaining */ +/* equation. */ + + if (i__ > 1) { + i__3 = is - 1; + _starpu_dgemm_("N", "N", &i__3, &nb, &mb, &c_b51, &a[is * + a_dim1 + 1], lda, &c__[is + js * c_dim1], ldc, + &c_b52, &c__[js * c_dim1 + 1], ldc); + i__3 = is - 1; + _starpu_dgemm_("N", "N", &i__3, &nb, &mb, &c_b51, &d__[is * + d_dim1 + 1], ldd, &c__[is + js * c_dim1], ldc, + &c_b52, &f[js * f_dim1 + 1], ldf); + } + if (j < q) { + i__3 = *n - je; + _starpu_dgemm_("N", "N", &mb, &i__3, &nb, &c_b52, &f[is + js * + f_dim1], ldf, &b[js + (je + 1) * b_dim1], + ldb, &c_b52, &c__[is + (je + 1) * c_dim1], + ldc); + i__3 = *n - je; + _starpu_dgemm_("N", "N", &mb, &i__3, &nb, &c_b52, &f[is + js * + f_dim1], ldf, &e[js + (je + 1) * e_dim1], + lde, &c_b52, &f[is + (je + 1) * f_dim1], ldf); + } +/* L120: */ + } +/* L130: */ + } + if (dscale != 0.) { + if (*ijob == 1 || *ijob == 3) { + *dif = sqrt((doublereal) ((*m << 1) * *n)) / (dscale * + sqrt(dsum)); + } else { + *dif = sqrt((doublereal) pq) / (dscale * sqrt(dsum)); + } + } + if (isolve == 2 && iround == 1) { + if (notran) { + ifunc = *ijob; + } + scale2 = *scale; + _starpu_dlacpy_("F", m, n, &c__[c_offset], ldc, &work[1], m); + _starpu_dlacpy_("F", m, n, &f[f_offset], ldf, &work[*m * *n + 1], m); + _starpu_dlaset_("F", m, n, &c_b14, &c_b14, &c__[c_offset], ldc); + _starpu_dlaset_("F", m, n, &c_b14, &c_b14, &f[f_offset], ldf); + } else if (isolve == 2 && iround == 2) { + _starpu_dlacpy_("F", m, n, &work[1], m, &c__[c_offset], ldc); + _starpu_dlacpy_("F", m, n, &work[*m * *n + 1], m, &f[f_offset], ldf); + *scale = scale2; + } +/* L150: */ + } + + } else { + +/* Solve transposed (I, J)-subsystem */ +/* A(I, I)' * R(I, J) + D(I, I)' * L(I, J) = C(I, J) */ +/* R(I, J) * B(J, J)' + L(I, J) * E(J, J)' = -F(I, J) */ +/* for I = 1,2,..., P; J = Q, Q-1,..., 1 */ + + *scale = 1.; + i__1 = p; + for (i__ = 1; i__ <= i__1; ++i__) { + is = iwork[i__]; + ie = iwork[i__ + 1] - 1; + mb = ie - is + 1; + i__2 = p + 2; + for (j = q; j >= i__2; --j) { + js = iwork[j]; + je = iwork[j + 1] - 1; + nb = je - js + 1; + _starpu_dtgsy2_(trans, &ifunc, &mb, &nb, &a[is + is * a_dim1], lda, & + b[js + js * b_dim1], ldb, &c__[is + js * c_dim1], ldc, + &d__[is + is * d_dim1], ldd, &e[js + js * e_dim1], + lde, &f[is + js * f_dim1], ldf, &scaloc, &dsum, & + dscale, &iwork[q + 2], &ppqq, &linfo); + if (linfo > 0) { + *info = linfo; + } + if (scaloc != 1.) { + i__3 = js - 1; + for (k = 1; k <= i__3; ++k) { + _starpu_dscal_(m, &scaloc, &c__[k * c_dim1 + 1], &c__1); + _starpu_dscal_(m, &scaloc, &f[k * f_dim1 + 1], &c__1); +/* L160: */ + } + i__3 = je; + for (k = js; k <= i__3; ++k) { + i__4 = is - 1; + _starpu_dscal_(&i__4, &scaloc, &c__[k * c_dim1 + 1], &c__1); + i__4 = is - 1; + _starpu_dscal_(&i__4, &scaloc, &f[k * f_dim1 + 1], &c__1); +/* L170: */ + } + i__3 = je; + for (k = js; k <= i__3; ++k) { + i__4 = *m - ie; + _starpu_dscal_(&i__4, &scaloc, &c__[ie + 1 + k * c_dim1], & + c__1); + i__4 = *m - ie; + _starpu_dscal_(&i__4, &scaloc, &f[ie + 1 + k * f_dim1], &c__1) + ; +/* L180: */ + } + i__3 = *n; + for (k = je + 1; k <= i__3; ++k) { + _starpu_dscal_(m, &scaloc, &c__[k * c_dim1 + 1], &c__1); + _starpu_dscal_(m, &scaloc, &f[k * f_dim1 + 1], &c__1); +/* L190: */ + } + *scale *= scaloc; + } + +/* Substitute R(I, J) and L(I, J) into remaining equation. */ + + if (j > p + 2) { + i__3 = js - 1; + _starpu_dgemm_("N", "T", &mb, &i__3, &nb, &c_b52, &c__[is + js * + c_dim1], ldc, &b[js * b_dim1 + 1], ldb, &c_b52, & + f[is + f_dim1], ldf); + i__3 = js - 1; + _starpu_dgemm_("N", "T", &mb, &i__3, &nb, &c_b52, &f[is + js * + f_dim1], ldf, &e[js * e_dim1 + 1], lde, &c_b52, & + f[is + f_dim1], ldf); + } + if (i__ < p) { + i__3 = *m - ie; + _starpu_dgemm_("T", "N", &i__3, &nb, &mb, &c_b51, &a[is + (ie + 1) + * a_dim1], lda, &c__[is + js * c_dim1], ldc, & + c_b52, &c__[ie + 1 + js * c_dim1], ldc); + i__3 = *m - ie; + _starpu_dgemm_("T", "N", &i__3, &nb, &mb, &c_b51, &d__[is + (ie + + 1) * d_dim1], ldd, &f[is + js * f_dim1], ldf, & + c_b52, &c__[ie + 1 + js * c_dim1], ldc); + } +/* L200: */ + } +/* L210: */ + } + + } + + work[1] = (doublereal) lwmin; + + return 0; + +/* End of DTGSYL */ + +} /* _starpu_dtgsyl_ */ diff --git a/min-dgels/base/SRC/dtpcon.c b/min-dgels/base/SRC/dtpcon.c new file mode 100644 index 0000000..17f3444 --- /dev/null +++ b/min-dgels/base/SRC/dtpcon.c @@ -0,0 +1,233 @@ +/* dtpcon.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +/* Subroutine */ int _starpu_dtpcon_(char *norm, char *uplo, char *diag, integer *n, + doublereal *ap, doublereal *rcond, doublereal *work, integer *iwork, + integer *info) +{ + /* System generated locals */ + integer i__1; + doublereal d__1; + + /* Local variables */ + integer ix, kase, kase1; + doublereal scale; + extern logical _starpu_lsame_(char *, char *); + integer isave[3]; + extern /* Subroutine */ int _starpu_drscl_(integer *, doublereal *, doublereal *, + integer *); + doublereal anorm; + logical upper; + doublereal xnorm; + extern /* Subroutine */ int _starpu_dlacn2_(integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *); + extern doublereal _starpu_dlamch_(char *); + extern integer _starpu_idamax_(integer *, doublereal *, integer *); + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + extern doublereal _starpu_dlantp_(char *, char *, char *, integer *, doublereal *, + doublereal *); + doublereal ainvnm; + extern /* Subroutine */ int _starpu_dlatps_(char *, char *, char *, char *, + integer *, doublereal *, doublereal *, doublereal *, doublereal *, + integer *); + logical onenrm; + char normin[1]; + doublereal smlnum; + logical nounit; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* Modified to call DLACN2 in place of DLACON, 5 Feb 03, SJH. */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DTPCON estimates the reciprocal of the condition number of a packed */ +/* triangular matrix A, in either the 1-norm or the infinity-norm. */ + +/* The norm of A is computed and an estimate is obtained for */ +/* norm(inv(A)), then the reciprocal of the condition number is */ +/* computed as */ +/* RCOND = 1 / ( norm(A) * norm(inv(A)) ). */ + +/* Arguments */ +/* ========= */ + +/* NORM (input) CHARACTER*1 */ +/* Specifies whether the 1-norm condition number or the */ +/* infinity-norm condition number is required: */ +/* = '1' or 'O': 1-norm; */ +/* = 'I': Infinity-norm. */ + +/* UPLO (input) CHARACTER*1 */ +/* = 'U': A is upper triangular; */ +/* = 'L': A is lower triangular. */ + +/* DIAG (input) CHARACTER*1 */ +/* = 'N': A is non-unit triangular; */ +/* = 'U': A is unit triangular. */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* AP (input) DOUBLE PRECISION array, dimension (N*(N+1)/2) */ +/* The upper or lower triangular matrix A, packed columnwise in */ +/* a linear array. The j-th column of A is stored in the array */ +/* AP as follows: */ +/* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; */ +/* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. */ +/* If DIAG = 'U', the diagonal elements of A are not referenced */ +/* and are assumed to be 1. */ + +/* RCOND (output) DOUBLE PRECISION */ +/* The reciprocal of the condition number of the matrix A, */ +/* computed as RCOND = 1/(norm(A) * norm(inv(A))). */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (3*N) */ + +/* IWORK (workspace) INTEGER array, dimension (N) */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --iwork; + --work; + --ap; + + /* Function Body */ + *info = 0; + upper = _starpu_lsame_(uplo, "U"); + onenrm = *(unsigned char *)norm == '1' || _starpu_lsame_(norm, "O"); + nounit = _starpu_lsame_(diag, "N"); + + if (! onenrm && ! _starpu_lsame_(norm, "I")) { + *info = -1; + } else if (! upper && ! _starpu_lsame_(uplo, "L")) { + *info = -2; + } else if (! nounit && ! _starpu_lsame_(diag, "U")) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DTPCON", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + *rcond = 1.; + return 0; + } + + *rcond = 0.; + smlnum = _starpu_dlamch_("Safe minimum") * (doublereal) max(1,*n); + +/* Compute the norm of the triangular matrix A. */ + + anorm = _starpu_dlantp_(norm, uplo, diag, n, &ap[1], &work[1]); + +/* Continue only if ANORM > 0. */ + + if (anorm > 0.) { + +/* Estimate the norm of the inverse of A. */ + + ainvnm = 0.; + *(unsigned char *)normin = 'N'; + if (onenrm) { + kase1 = 1; + } else { + kase1 = 2; + } + kase = 0; +L10: + _starpu_dlacn2_(n, &work[*n + 1], &work[1], &iwork[1], &ainvnm, &kase, isave); + if (kase != 0) { + if (kase == kase1) { + +/* Multiply by inv(A). */ + + _starpu_dlatps_(uplo, "No transpose", diag, normin, n, &ap[1], &work[ + 1], &scale, &work[(*n << 1) + 1], info); + } else { + +/* Multiply by inv(A'). */ + + _starpu_dlatps_(uplo, "Transpose", diag, normin, n, &ap[1], &work[1], + &scale, &work[(*n << 1) + 1], info); + } + *(unsigned char *)normin = 'Y'; + +/* Multiply by 1/SCALE if doing so will not cause overflow. */ + + if (scale != 1.) { + ix = _starpu_idamax_(n, &work[1], &c__1); + xnorm = (d__1 = work[ix], abs(d__1)); + if (scale < xnorm * smlnum || scale == 0.) { + goto L20; + } + _starpu_drscl_(n, &scale, &work[1], &c__1); + } + goto L10; + } + +/* Compute the estimate of the reciprocal condition number. */ + + if (ainvnm != 0.) { + *rcond = 1. / anorm / ainvnm; + } + } + +L20: + return 0; + +/* End of DTPCON */ + +} /* _starpu_dtpcon_ */ diff --git a/min-dgels/base/SRC/dtprfs.c b/min-dgels/base/SRC/dtprfs.c new file mode 100644 index 0000000..42a7bed --- /dev/null +++ b/min-dgels/base/SRC/dtprfs.c @@ -0,0 +1,496 @@ +/* dtprfs.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static doublereal c_b19 = -1.; + +/* Subroutine */ int _starpu_dtprfs_(char *uplo, char *trans, char *diag, integer *n, + integer *nrhs, doublereal *ap, doublereal *b, integer *ldb, + doublereal *x, integer *ldx, doublereal *ferr, doublereal *berr, + doublereal *work, integer *iwork, integer *info) +{ + /* System generated locals */ + integer b_dim1, b_offset, x_dim1, x_offset, i__1, i__2, i__3; + doublereal d__1, d__2, d__3; + + /* Local variables */ + integer i__, j, k; + doublereal s; + integer kc; + doublereal xk; + integer nz; + doublereal eps; + integer kase; + doublereal safe1, safe2; + extern logical _starpu_lsame_(char *, char *); + integer isave[3]; + extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *), _starpu_daxpy_(integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *), _starpu_dtpmv_(char *, + char *, char *, integer *, doublereal *, doublereal *, integer *); + logical upper; + extern /* Subroutine */ int _starpu_dtpsv_(char *, char *, char *, integer *, + doublereal *, doublereal *, integer *), + _starpu_dlacn2_(integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *, integer *); + extern doublereal _starpu_dlamch_(char *); + doublereal safmin; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + logical notran; + char transt[1]; + logical nounit; + doublereal lstres; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* Modified to call DLACN2 in place of DLACON, 5 Feb 03, SJH. */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DTPRFS provides error bounds and backward error estimates for the */ +/* solution to a system of linear equations with a triangular packed */ +/* coefficient matrix. */ + +/* The solution matrix X must be computed by DTPTRS or some other */ +/* means before entering this routine. DTPRFS does not do iterative */ +/* refinement because doing so cannot improve the backward error. */ + +/* Arguments */ +/* ========= */ + +/* UPLO (input) CHARACTER*1 */ +/* = 'U': A is upper triangular; */ +/* = 'L': A is lower triangular. */ + +/* TRANS (input) CHARACTER*1 */ +/* Specifies the form of the system of equations: */ +/* = 'N': A * X = B (No transpose) */ +/* = 'T': A**T * X = B (Transpose) */ +/* = 'C': A**H * X = B (Conjugate transpose = Transpose) */ + +/* DIAG (input) CHARACTER*1 */ +/* = 'N': A is non-unit triangular; */ +/* = 'U': A is unit triangular. */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* NRHS (input) INTEGER */ +/* The number of right hand sides, i.e., the number of columns */ +/* of the matrices B and X. NRHS >= 0. */ + +/* AP (input) DOUBLE PRECISION array, dimension (N*(N+1)/2) */ +/* The upper or lower triangular matrix A, packed columnwise in */ +/* a linear array. The j-th column of A is stored in the array */ +/* AP as follows: */ +/* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; */ +/* if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. */ +/* If DIAG = 'U', the diagonal elements of A are not referenced */ +/* and are assumed to be 1. */ + +/* B (input) DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* The right hand side matrix B. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the array B. LDB >= max(1,N). */ + +/* X (input) DOUBLE PRECISION array, dimension (LDX,NRHS) */ +/* The solution matrix X. */ + +/* LDX (input) INTEGER */ +/* The leading dimension of the array X. LDX >= max(1,N). */ + +/* FERR (output) DOUBLE PRECISION array, dimension (NRHS) */ +/* The estimated forward error bound for each solution vector */ +/* X(j) (the j-th column of the solution matrix X). */ +/* If XTRUE is the true solution corresponding to X(j), FERR(j) */ +/* is an estimated upper bound for the magnitude of the largest */ +/* element in (X(j) - XTRUE) divided by the magnitude of the */ +/* largest element in X(j). The estimate is as reliable as */ +/* the estimate for RCOND, and is almost always a slight */ +/* overestimate of the true error. */ + +/* BERR (output) DOUBLE PRECISION array, dimension (NRHS) */ +/* The componentwise relative backward error of each solution */ +/* vector X(j) (i.e., the smallest relative change in */ +/* any element of A or B that makes X(j) an exact solution). */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (3*N) */ + +/* IWORK (workspace) INTEGER array, dimension (N) */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --ap; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + x_dim1 = *ldx; + x_offset = 1 + x_dim1; + x -= x_offset; + --ferr; + --berr; + --work; + --iwork; + + /* Function Body */ + *info = 0; + upper = _starpu_lsame_(uplo, "U"); + notran = _starpu_lsame_(trans, "N"); + nounit = _starpu_lsame_(diag, "N"); + + if (! upper && ! _starpu_lsame_(uplo, "L")) { + *info = -1; + } else if (! notran && ! _starpu_lsame_(trans, "T") && ! + _starpu_lsame_(trans, "C")) { + *info = -2; + } else if (! nounit && ! _starpu_lsame_(diag, "U")) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*nrhs < 0) { + *info = -5; + } else if (*ldb < max(1,*n)) { + *info = -8; + } else if (*ldx < max(1,*n)) { + *info = -10; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DTPRFS", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0 || *nrhs == 0) { + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + ferr[j] = 0.; + berr[j] = 0.; +/* L10: */ + } + return 0; + } + + if (notran) { + *(unsigned char *)transt = 'T'; + } else { + *(unsigned char *)transt = 'N'; + } + +/* NZ = maximum number of nonzero elements in each row of A, plus 1 */ + + nz = *n + 1; + eps = _starpu_dlamch_("Epsilon"); + safmin = _starpu_dlamch_("Safe minimum"); + safe1 = nz * safmin; + safe2 = safe1 / eps; + +/* Do for each right hand side */ + + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + +/* Compute residual R = B - op(A) * X, */ +/* where op(A) = A or A', depending on TRANS. */ + + _starpu_dcopy_(n, &x[j * x_dim1 + 1], &c__1, &work[*n + 1], &c__1); + _starpu_dtpmv_(uplo, trans, diag, n, &ap[1], &work[*n + 1], &c__1); + _starpu_daxpy_(n, &c_b19, &b[j * b_dim1 + 1], &c__1, &work[*n + 1], &c__1); + +/* Compute componentwise relative backward error from formula */ + +/* max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) */ + +/* where abs(Z) is the componentwise absolute value of the matrix */ +/* or vector Z. If the i-th component of the denominator is less */ +/* than SAFE2, then SAFE1 is added to the i-th components of the */ +/* numerator and denominator before dividing. */ + + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + work[i__] = (d__1 = b[i__ + j * b_dim1], abs(d__1)); +/* L20: */ + } + + if (notran) { + +/* Compute abs(A)*abs(X) + abs(B). */ + + if (upper) { + kc = 1; + if (nounit) { + i__2 = *n; + for (k = 1; k <= i__2; ++k) { + xk = (d__1 = x[k + j * x_dim1], abs(d__1)); + i__3 = k; + for (i__ = 1; i__ <= i__3; ++i__) { + work[i__] += (d__1 = ap[kc + i__ - 1], abs(d__1)) + * xk; +/* L30: */ + } + kc += k; +/* L40: */ + } + } else { + i__2 = *n; + for (k = 1; k <= i__2; ++k) { + xk = (d__1 = x[k + j * x_dim1], abs(d__1)); + i__3 = k - 1; + for (i__ = 1; i__ <= i__3; ++i__) { + work[i__] += (d__1 = ap[kc + i__ - 1], abs(d__1)) + * xk; +/* L50: */ + } + work[k] += xk; + kc += k; +/* L60: */ + } + } + } else { + kc = 1; + if (nounit) { + i__2 = *n; + for (k = 1; k <= i__2; ++k) { + xk = (d__1 = x[k + j * x_dim1], abs(d__1)); + i__3 = *n; + for (i__ = k; i__ <= i__3; ++i__) { + work[i__] += (d__1 = ap[kc + i__ - k], abs(d__1)) + * xk; +/* L70: */ + } + kc = kc + *n - k + 1; +/* L80: */ + } + } else { + i__2 = *n; + for (k = 1; k <= i__2; ++k) { + xk = (d__1 = x[k + j * x_dim1], abs(d__1)); + i__3 = *n; + for (i__ = k + 1; i__ <= i__3; ++i__) { + work[i__] += (d__1 = ap[kc + i__ - k], abs(d__1)) + * xk; +/* L90: */ + } + work[k] += xk; + kc = kc + *n - k + 1; +/* L100: */ + } + } + } + } else { + +/* Compute abs(A')*abs(X) + abs(B). */ + + if (upper) { + kc = 1; + if (nounit) { + i__2 = *n; + for (k = 1; k <= i__2; ++k) { + s = 0.; + i__3 = k; + for (i__ = 1; i__ <= i__3; ++i__) { + s += (d__1 = ap[kc + i__ - 1], abs(d__1)) * (d__2 + = x[i__ + j * x_dim1], abs(d__2)); +/* L110: */ + } + work[k] += s; + kc += k; +/* L120: */ + } + } else { + i__2 = *n; + for (k = 1; k <= i__2; ++k) { + s = (d__1 = x[k + j * x_dim1], abs(d__1)); + i__3 = k - 1; + for (i__ = 1; i__ <= i__3; ++i__) { + s += (d__1 = ap[kc + i__ - 1], abs(d__1)) * (d__2 + = x[i__ + j * x_dim1], abs(d__2)); +/* L130: */ + } + work[k] += s; + kc += k; +/* L140: */ + } + } + } else { + kc = 1; + if (nounit) { + i__2 = *n; + for (k = 1; k <= i__2; ++k) { + s = 0.; + i__3 = *n; + for (i__ = k; i__ <= i__3; ++i__) { + s += (d__1 = ap[kc + i__ - k], abs(d__1)) * (d__2 + = x[i__ + j * x_dim1], abs(d__2)); +/* L150: */ + } + work[k] += s; + kc = kc + *n - k + 1; +/* L160: */ + } + } else { + i__2 = *n; + for (k = 1; k <= i__2; ++k) { + s = (d__1 = x[k + j * x_dim1], abs(d__1)); + i__3 = *n; + for (i__ = k + 1; i__ <= i__3; ++i__) { + s += (d__1 = ap[kc + i__ - k], abs(d__1)) * (d__2 + = x[i__ + j * x_dim1], abs(d__2)); +/* L170: */ + } + work[k] += s; + kc = kc + *n - k + 1; +/* L180: */ + } + } + } + } + s = 0.; + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + if (work[i__] > safe2) { +/* Computing MAX */ + d__2 = s, d__3 = (d__1 = work[*n + i__], abs(d__1)) / work[ + i__]; + s = max(d__2,d__3); + } else { +/* Computing MAX */ + d__2 = s, d__3 = ((d__1 = work[*n + i__], abs(d__1)) + safe1) + / (work[i__] + safe1); + s = max(d__2,d__3); + } +/* L190: */ + } + berr[j] = s; + +/* Bound error from formula */ + +/* norm(X - XTRUE) / norm(X) .le. FERR = */ +/* norm( abs(inv(op(A)))* */ +/* ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) */ + +/* where */ +/* norm(Z) is the magnitude of the largest component of Z */ +/* inv(op(A)) is the inverse of op(A) */ +/* abs(Z) is the componentwise absolute value of the matrix or */ +/* vector Z */ +/* NZ is the maximum number of nonzeros in any row of A, plus 1 */ +/* EPS is machine epsilon */ + +/* The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) */ +/* is incremented by SAFE1 if the i-th component of */ +/* abs(op(A))*abs(X) + abs(B) is less than SAFE2. */ + +/* Use DLACN2 to estimate the infinity-norm of the matrix */ +/* inv(op(A)) * diag(W), */ +/* where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) */ + + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + if (work[i__] > safe2) { + work[i__] = (d__1 = work[*n + i__], abs(d__1)) + nz * eps * + work[i__]; + } else { + work[i__] = (d__1 = work[*n + i__], abs(d__1)) + nz * eps * + work[i__] + safe1; + } +/* L200: */ + } + + kase = 0; +L210: + _starpu_dlacn2_(n, &work[(*n << 1) + 1], &work[*n + 1], &iwork[1], &ferr[j], & + kase, isave); + if (kase != 0) { + if (kase == 1) { + +/* Multiply by diag(W)*inv(op(A)'). */ + + _starpu_dtpsv_(uplo, transt, diag, n, &ap[1], &work[*n + 1], &c__1); + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + work[*n + i__] = work[i__] * work[*n + i__]; +/* L220: */ + } + } else { + +/* Multiply by inv(op(A))*diag(W). */ + + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + work[*n + i__] = work[i__] * work[*n + i__]; +/* L230: */ + } + _starpu_dtpsv_(uplo, trans, diag, n, &ap[1], &work[*n + 1], &c__1); + } + goto L210; + } + +/* Normalize error. */ + + lstres = 0.; + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { +/* Computing MAX */ + d__2 = lstres, d__3 = (d__1 = x[i__ + j * x_dim1], abs(d__1)); + lstres = max(d__2,d__3); +/* L240: */ + } + if (lstres != 0.) { + ferr[j] /= lstres; + } + +/* L250: */ + } + + return 0; + +/* End of DTPRFS */ + +} /* _starpu_dtprfs_ */ diff --git a/min-dgels/base/SRC/dtptri.c b/min-dgels/base/SRC/dtptri.c new file mode 100644 index 0000000..05b4edd --- /dev/null +++ b/min-dgels/base/SRC/dtptri.c @@ -0,0 +1,219 @@ +/* dtptri.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +/* Subroutine */ int _starpu_dtptri_(char *uplo, char *diag, integer *n, doublereal * + ap, integer *info) +{ + /* System generated locals */ + integer i__1, i__2; + + /* Local variables */ + integer j, jc, jj; + doublereal ajj; + extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, + integer *); + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int _starpu_dtpmv_(char *, char *, char *, integer *, + doublereal *, doublereal *, integer *); + logical upper; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + integer jclast; + logical nounit; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DTPTRI computes the inverse of a real upper or lower triangular */ +/* matrix A stored in packed format. */ + +/* Arguments */ +/* ========= */ + +/* UPLO (input) CHARACTER*1 */ +/* = 'U': A is upper triangular; */ +/* = 'L': A is lower triangular. */ + +/* DIAG (input) CHARACTER*1 */ +/* = 'N': A is non-unit triangular; */ +/* = 'U': A is unit triangular. */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* AP (input/output) DOUBLE PRECISION array, dimension (N*(N+1)/2) */ +/* On entry, the upper or lower triangular matrix A, stored */ +/* columnwise in a linear array. The j-th column of A is stored */ +/* in the array AP as follows: */ +/* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; */ +/* if UPLO = 'L', AP(i + (j-1)*((2*n-j)/2) = A(i,j) for j<=i<=n. */ +/* See below for further details. */ +/* On exit, the (triangular) inverse of the original matrix, in */ +/* the same packed storage format. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > 0: if INFO = i, A(i,i) is exactly zero. The triangular */ +/* matrix is singular and its inverse can not be computed. */ + +/* Further Details */ +/* =============== */ + +/* A triangular matrix A can be transferred to packed storage using one */ +/* of the following program segments: */ + +/* UPLO = 'U': UPLO = 'L': */ + +/* JC = 1 JC = 1 */ +/* DO 2 J = 1, N DO 2 J = 1, N */ +/* DO 1 I = 1, J DO 1 I = J, N */ +/* AP(JC+I-1) = A(I,J) AP(JC+I-J) = A(I,J) */ +/* 1 CONTINUE 1 CONTINUE */ +/* JC = JC + J JC = JC + N - J + 1 */ +/* 2 CONTINUE 2 CONTINUE */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --ap; + + /* Function Body */ + *info = 0; + upper = _starpu_lsame_(uplo, "U"); + nounit = _starpu_lsame_(diag, "N"); + if (! upper && ! _starpu_lsame_(uplo, "L")) { + *info = -1; + } else if (! nounit && ! _starpu_lsame_(diag, "U")) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DTPTRI", &i__1); + return 0; + } + +/* Check for singularity if non-unit. */ + + if (nounit) { + if (upper) { + jj = 0; + i__1 = *n; + for (*info = 1; *info <= i__1; ++(*info)) { + jj += *info; + if (ap[jj] == 0.) { + return 0; + } +/* L10: */ + } + } else { + jj = 1; + i__1 = *n; + for (*info = 1; *info <= i__1; ++(*info)) { + if (ap[jj] == 0.) { + return 0; + } + jj = jj + *n - *info + 1; +/* L20: */ + } + } + *info = 0; + } + + if (upper) { + +/* Compute inverse of upper triangular matrix. */ + + jc = 1; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (nounit) { + ap[jc + j - 1] = 1. / ap[jc + j - 1]; + ajj = -ap[jc + j - 1]; + } else { + ajj = -1.; + } + +/* Compute elements 1:j-1 of j-th column. */ + + i__2 = j - 1; + _starpu_dtpmv_("Upper", "No transpose", diag, &i__2, &ap[1], &ap[jc], & + c__1); + i__2 = j - 1; + _starpu_dscal_(&i__2, &ajj, &ap[jc], &c__1); + jc += j; +/* L30: */ + } + + } else { + +/* Compute inverse of lower triangular matrix. */ + + jc = *n * (*n + 1) / 2; + for (j = *n; j >= 1; --j) { + if (nounit) { + ap[jc] = 1. / ap[jc]; + ajj = -ap[jc]; + } else { + ajj = -1.; + } + if (j < *n) { + +/* Compute elements j+1:n of j-th column. */ + + i__1 = *n - j; + _starpu_dtpmv_("Lower", "No transpose", diag, &i__1, &ap[jclast], &ap[ + jc + 1], &c__1); + i__1 = *n - j; + _starpu_dscal_(&i__1, &ajj, &ap[jc + 1], &c__1); + } + jclast = jc; + jc = jc - *n + j - 2; +/* L40: */ + } + } + + return 0; + +/* End of DTPTRI */ + +} /* _starpu_dtptri_ */ diff --git a/min-dgels/base/SRC/dtptrs.c b/min-dgels/base/SRC/dtptrs.c new file mode 100644 index 0000000..b424c53 --- /dev/null +++ b/min-dgels/base/SRC/dtptrs.c @@ -0,0 +1,193 @@ +/* dtptrs.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +/* Subroutine */ int _starpu_dtptrs_(char *uplo, char *trans, char *diag, integer *n, + integer *nrhs, doublereal *ap, doublereal *b, integer *ldb, integer * + info) +{ + /* System generated locals */ + integer b_dim1, b_offset, i__1; + + /* Local variables */ + integer j, jc; + extern logical _starpu_lsame_(char *, char *); + logical upper; + extern /* Subroutine */ int _starpu_dtpsv_(char *, char *, char *, integer *, + doublereal *, doublereal *, integer *), + _starpu_xerbla_(char *, integer *); + logical nounit; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DTPTRS solves a triangular system of the form */ + +/* A * X = B or A**T * X = B, */ + +/* where A is a triangular matrix of order N stored in packed format, */ +/* and B is an N-by-NRHS matrix. A check is made to verify that A is */ +/* nonsingular. */ + +/* Arguments */ +/* ========= */ + +/* UPLO (input) CHARACTER*1 */ +/* = 'U': A is upper triangular; */ +/* = 'L': A is lower triangular. */ + +/* TRANS (input) CHARACTER*1 */ +/* Specifies the form of the system of equations: */ +/* = 'N': A * X = B (No transpose) */ +/* = 'T': A**T * X = B (Transpose) */ +/* = 'C': A**H * X = B (Conjugate transpose = Transpose) */ + +/* DIAG (input) CHARACTER*1 */ +/* = 'N': A is non-unit triangular; */ +/* = 'U': A is unit triangular. */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* NRHS (input) INTEGER */ +/* The number of right hand sides, i.e., the number of columns */ +/* of the matrix B. NRHS >= 0. */ + +/* AP (input) DOUBLE PRECISION array, dimension (N*(N+1)/2) */ +/* The upper or lower triangular matrix A, packed columnwise in */ +/* a linear array. The j-th column of A is stored in the array */ +/* AP as follows: */ +/* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; */ +/* if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. */ + +/* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* On entry, the right hand side matrix B. */ +/* On exit, if INFO = 0, the solution matrix X. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the array B. LDB >= max(1,N). */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > 0: if INFO = i, the i-th diagonal element of A is zero, */ +/* indicating that the matrix is singular and the */ +/* solutions X have not been computed. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --ap; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + *info = 0; + upper = _starpu_lsame_(uplo, "U"); + nounit = _starpu_lsame_(diag, "N"); + if (! upper && ! _starpu_lsame_(uplo, "L")) { + *info = -1; + } else if (! _starpu_lsame_(trans, "N") && ! _starpu_lsame_(trans, + "T") && ! _starpu_lsame_(trans, "C")) { + *info = -2; + } else if (! nounit && ! _starpu_lsame_(diag, "U")) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*nrhs < 0) { + *info = -5; + } else if (*ldb < max(1,*n)) { + *info = -8; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DTPTRS", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Check for singularity. */ + + if (nounit) { + if (upper) { + jc = 1; + i__1 = *n; + for (*info = 1; *info <= i__1; ++(*info)) { + if (ap[jc + *info - 1] == 0.) { + return 0; + } + jc += *info; +/* L10: */ + } + } else { + jc = 1; + i__1 = *n; + for (*info = 1; *info <= i__1; ++(*info)) { + if (ap[jc] == 0.) { + return 0; + } + jc = jc + *n - *info + 1; +/* L20: */ + } + } + } + *info = 0; + +/* Solve A * x = b or A' * x = b. */ + + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + _starpu_dtpsv_(uplo, trans, diag, n, &ap[1], &b[j * b_dim1 + 1], &c__1); +/* L30: */ + } + + return 0; + +/* End of DTPTRS */ + +} /* _starpu_dtptrs_ */ diff --git a/min-dgels/base/SRC/dtpttf.c b/min-dgels/base/SRC/dtpttf.c new file mode 100644 index 0000000..e373ff9 --- /dev/null +++ b/min-dgels/base/SRC/dtpttf.c @@ -0,0 +1,499 @@ +/* dtpttf.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dtpttf_(char *transr, char *uplo, integer *n, doublereal + *ap, doublereal *arf, integer *info) +{ + /* System generated locals */ + integer i__1, i__2, i__3; + + /* Local variables */ + integer i__, j, k, n1, n2, ij, jp, js, nt, lda, ijp; + logical normaltransr; + extern logical _starpu_lsame_(char *, char *); + logical lower; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + logical nisodd; + + +/* -- LAPACK routine (version 3.2) -- */ + +/* -- Contributed by Fred Gustavson of the IBM Watson Research Center -- */ +/* -- November 2008 -- */ + +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. */ +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ + +/* Purpose */ +/* ======= */ + +/* DTPTTF copies a triangular matrix A from standard packed format (TP) */ +/* to rectangular full packed format (TF). */ + +/* Arguments */ +/* ========= */ + +/* TRANSR (input) CHARACTER */ +/* = 'N': ARF in Normal format is wanted; */ +/* = 'T': ARF in Conjugate-transpose format is wanted. */ + +/* UPLO (input) CHARACTER */ +/* = 'U': A is upper triangular; */ +/* = 'L': A is lower triangular. */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* AP (input) DOUBLE PRECISION array, dimension ( N*(N+1)/2 ), */ +/* On entry, the upper or lower triangular matrix A, packed */ +/* columnwise in a linear array. The j-th column of A is stored */ +/* in the array AP as follows: */ +/* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; */ +/* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. */ + +/* ARF (output) DOUBLE PRECISION array, dimension ( N*(N+1)/2 ), */ +/* On exit, the upper or lower triangular matrix A stored in */ +/* RFP format. For a further discussion see Notes below. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ + +/* Notes */ +/* ===== */ + +/* We first consider Rectangular Full Packed (RFP) Format when N is */ +/* even. We give an example where N = 6. */ + +/* AP is Upper AP is Lower */ + +/* 00 01 02 03 04 05 00 */ +/* 11 12 13 14 15 10 11 */ +/* 22 23 24 25 20 21 22 */ +/* 33 34 35 30 31 32 33 */ +/* 44 45 40 41 42 43 44 */ +/* 55 50 51 52 53 54 55 */ + + +/* Let TRANSR = 'N'. RFP holds AP as follows: */ +/* For UPLO = 'U' the upper trapezoid A(0:5,0:2) consists of the last */ +/* three columns of AP upper. The lower triangle A(4:6,0:2) consists of */ +/* the transpose of the first three columns of AP upper. */ +/* For UPLO = 'L' the lower trapezoid A(1:6,0:2) consists of the first */ +/* three columns of AP lower. The upper triangle A(0:2,0:2) consists of */ +/* the transpose of the last three columns of AP lower. */ +/* This covers the case N even and TRANSR = 'N'. */ + +/* RFP A RFP A */ + +/* 03 04 05 33 43 53 */ +/* 13 14 15 00 44 54 */ +/* 23 24 25 10 11 55 */ +/* 33 34 35 20 21 22 */ +/* 00 44 45 30 31 32 */ +/* 01 11 55 40 41 42 */ +/* 02 12 22 50 51 52 */ + +/* Now let TRANSR = 'T'. RFP A in both UPLO cases is just the */ +/* transpose of RFP A above. One therefore gets: */ + + +/* RFP A RFP A */ + +/* 03 13 23 33 00 01 02 33 00 10 20 30 40 50 */ +/* 04 14 24 34 44 11 12 43 44 11 21 31 41 51 */ +/* 05 15 25 35 45 55 22 53 54 55 22 32 42 52 */ + + +/* We first consider Rectangular Full Packed (RFP) Format when N is */ +/* odd. We give an example where N = 5. */ + +/* AP is Upper AP is Lower */ + +/* 00 01 02 03 04 00 */ +/* 11 12 13 14 10 11 */ +/* 22 23 24 20 21 22 */ +/* 33 34 30 31 32 33 */ +/* 44 40 41 42 43 44 */ + + +/* Let TRANSR = 'N'. RFP holds AP as follows: */ +/* For UPLO = 'U' the upper trapezoid A(0:4,0:2) consists of the last */ +/* three columns of AP upper. The lower triangle A(3:4,0:1) consists of */ +/* the transpose of the first two columns of AP upper. */ +/* For UPLO = 'L' the lower trapezoid A(0:4,0:2) consists of the first */ +/* three columns of AP lower. The upper triangle A(0:1,1:2) consists of */ +/* the transpose of the last two columns of AP lower. */ +/* This covers the case N odd and TRANSR = 'N'. */ + +/* RFP A RFP A */ + +/* 02 03 04 00 33 43 */ +/* 12 13 14 10 11 44 */ +/* 22 23 24 20 21 22 */ +/* 00 33 34 30 31 32 */ +/* 01 11 44 40 41 42 */ + +/* Now let TRANSR = 'T'. RFP A in both UPLO cases is just the */ +/* transpose of RFP A above. One therefore gets: */ + +/* RFP A RFP A */ + +/* 02 12 22 00 01 00 10 20 30 40 50 */ +/* 03 13 23 33 11 33 11 21 31 41 51 */ +/* 04 14 24 34 44 43 44 22 32 42 52 */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + *info = 0; + normaltransr = _starpu_lsame_(transr, "N"); + lower = _starpu_lsame_(uplo, "L"); + if (! normaltransr && ! _starpu_lsame_(transr, "T")) { + *info = -1; + } else if (! lower && ! _starpu_lsame_(uplo, "U")) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DTPTTF", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + if (*n == 1) { + if (normaltransr) { + arf[0] = ap[0]; + } else { + arf[0] = ap[0]; + } + return 0; + } + +/* Size of array ARF(0:NT-1) */ + + nt = *n * (*n + 1) / 2; + +/* Set N1 and N2 depending on LOWER */ + + if (lower) { + n2 = *n / 2; + n1 = *n - n2; + } else { + n1 = *n / 2; + n2 = *n - n1; + } + +/* If N is odd, set NISODD = .TRUE. */ +/* If N is even, set K = N/2 and NISODD = .FALSE. */ + +/* set lda of ARF^C; ARF^C is (0:(N+1)/2-1,0:N-noe) */ +/* where noe = 0 if n is even, noe = 1 if n is odd */ + + if (*n % 2 == 0) { + k = *n / 2; + nisodd = FALSE_; + lda = *n + 1; + } else { + nisodd = TRUE_; + lda = *n; + } + +/* ARF^C has lda rows and n+1-noe cols */ + + if (! normaltransr) { + lda = (*n + 1) / 2; + } + +/* start execution: there are eight cases */ + + if (nisodd) { + +/* N is odd */ + + if (normaltransr) { + +/* N is odd and TRANSR = 'N' */ + + if (lower) { + +/* N is odd, TRANSR = 'N', and UPLO = 'L' */ + + ijp = 0; + jp = 0; + i__1 = n2; + for (j = 0; j <= i__1; ++j) { + i__2 = *n - 1; + for (i__ = j; i__ <= i__2; ++i__) { + ij = i__ + jp; + arf[ij] = ap[ijp]; + ++ijp; + } + jp += lda; + } + i__1 = n2 - 1; + for (i__ = 0; i__ <= i__1; ++i__) { + i__2 = n2; + for (j = i__ + 1; j <= i__2; ++j) { + ij = i__ + j * lda; + arf[ij] = ap[ijp]; + ++ijp; + } + } + + } else { + +/* N is odd, TRANSR = 'N', and UPLO = 'U' */ + + ijp = 0; + i__1 = n1 - 1; + for (j = 0; j <= i__1; ++j) { + ij = n2 + j; + i__2 = j; + for (i__ = 0; i__ <= i__2; ++i__) { + arf[ij] = ap[ijp]; + ++ijp; + ij += lda; + } + } + js = 0; + i__1 = *n - 1; + for (j = n1; j <= i__1; ++j) { + ij = js; + i__2 = js + j; + for (ij = js; ij <= i__2; ++ij) { + arf[ij] = ap[ijp]; + ++ijp; + } + js += lda; + } + + } + + } else { + +/* N is odd and TRANSR = 'T' */ + + if (lower) { + +/* N is odd, TRANSR = 'T', and UPLO = 'L' */ + + ijp = 0; + i__1 = n2; + for (i__ = 0; i__ <= i__1; ++i__) { + i__2 = *n * lda - 1; + i__3 = lda; + for (ij = i__ * (lda + 1); i__3 < 0 ? ij >= i__2 : ij <= + i__2; ij += i__3) { + arf[ij] = ap[ijp]; + ++ijp; + } + } + js = 1; + i__1 = n2 - 1; + for (j = 0; j <= i__1; ++j) { + i__3 = js + n2 - j - 1; + for (ij = js; ij <= i__3; ++ij) { + arf[ij] = ap[ijp]; + ++ijp; + } + js = js + lda + 1; + } + + } else { + +/* N is odd, TRANSR = 'T', and UPLO = 'U' */ + + ijp = 0; + js = n2 * lda; + i__1 = n1 - 1; + for (j = 0; j <= i__1; ++j) { + i__3 = js + j; + for (ij = js; ij <= i__3; ++ij) { + arf[ij] = ap[ijp]; + ++ijp; + } + js += lda; + } + i__1 = n1; + for (i__ = 0; i__ <= i__1; ++i__) { + i__3 = i__ + (n1 + i__) * lda; + i__2 = lda; + for (ij = i__; i__2 < 0 ? ij >= i__3 : ij <= i__3; ij += + i__2) { + arf[ij] = ap[ijp]; + ++ijp; + } + } + + } + + } + + } else { + +/* N is even */ + + if (normaltransr) { + +/* N is even and TRANSR = 'N' */ + + if (lower) { + +/* N is even, TRANSR = 'N', and UPLO = 'L' */ + + ijp = 0; + jp = 0; + i__1 = k - 1; + for (j = 0; j <= i__1; ++j) { + i__2 = *n - 1; + for (i__ = j; i__ <= i__2; ++i__) { + ij = i__ + 1 + jp; + arf[ij] = ap[ijp]; + ++ijp; + } + jp += lda; + } + i__1 = k - 1; + for (i__ = 0; i__ <= i__1; ++i__) { + i__2 = k - 1; + for (j = i__; j <= i__2; ++j) { + ij = i__ + j * lda; + arf[ij] = ap[ijp]; + ++ijp; + } + } + + } else { + +/* N is even, TRANSR = 'N', and UPLO = 'U' */ + + ijp = 0; + i__1 = k - 1; + for (j = 0; j <= i__1; ++j) { + ij = k + 1 + j; + i__2 = j; + for (i__ = 0; i__ <= i__2; ++i__) { + arf[ij] = ap[ijp]; + ++ijp; + ij += lda; + } + } + js = 0; + i__1 = *n - 1; + for (j = k; j <= i__1; ++j) { + ij = js; + i__2 = js + j; + for (ij = js; ij <= i__2; ++ij) { + arf[ij] = ap[ijp]; + ++ijp; + } + js += lda; + } + + } + + } else { + +/* N is even and TRANSR = 'T' */ + + if (lower) { + +/* N is even, TRANSR = 'T', and UPLO = 'L' */ + + ijp = 0; + i__1 = k - 1; + for (i__ = 0; i__ <= i__1; ++i__) { + i__2 = (*n + 1) * lda - 1; + i__3 = lda; + for (ij = i__ + (i__ + 1) * lda; i__3 < 0 ? ij >= i__2 : + ij <= i__2; ij += i__3) { + arf[ij] = ap[ijp]; + ++ijp; + } + } + js = 0; + i__1 = k - 1; + for (j = 0; j <= i__1; ++j) { + i__3 = js + k - j - 1; + for (ij = js; ij <= i__3; ++ij) { + arf[ij] = ap[ijp]; + ++ijp; + } + js = js + lda + 1; + } + + } else { + +/* N is even, TRANSR = 'T', and UPLO = 'U' */ + + ijp = 0; + js = (k + 1) * lda; + i__1 = k - 1; + for (j = 0; j <= i__1; ++j) { + i__3 = js + j; + for (ij = js; ij <= i__3; ++ij) { + arf[ij] = ap[ijp]; + ++ijp; + } + js += lda; + } + i__1 = k - 1; + for (i__ = 0; i__ <= i__1; ++i__) { + i__3 = i__ + (k + i__) * lda; + i__2 = lda; + for (ij = i__; i__2 < 0 ? ij >= i__3 : ij <= i__3; ij += + i__2) { + arf[ij] = ap[ijp]; + ++ijp; + } + } + + } + + } + + } + + return 0; + +/* End of DTPTTF */ + +} /* _starpu_dtpttf_ */ diff --git a/min-dgels/base/SRC/dtpttr.c b/min-dgels/base/SRC/dtpttr.c new file mode 100644 index 0000000..fc4e967 --- /dev/null +++ b/min-dgels/base/SRC/dtpttr.c @@ -0,0 +1,144 @@ +/* dtpttr.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dtpttr_(char *uplo, integer *n, doublereal *ap, + doublereal *a, integer *lda, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + + /* Local variables */ + integer i__, j, k; + extern logical _starpu_lsame_(char *, char *); + logical lower; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + + +/* -- LAPACK routine (version 3.2) -- */ + +/* -- Contributed by Julien Langou of the Univ. of Colorado Denver -- */ +/* -- November 2008 -- */ + +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DTPTTR copies a triangular matrix A from standard packed format (TP) */ +/* to standard full format (TR). */ + +/* Arguments */ +/* ========= */ + +/* UPLO (input) CHARACTER */ +/* = 'U': A is upper triangular. */ +/* = 'L': A is lower triangular. */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* AP (input) DOUBLE PRECISION array, dimension ( N*(N+1)/2 ), */ +/* On entry, the upper or lower triangular matrix A, packed */ +/* columnwise in a linear array. The j-th column of A is stored */ +/* in the array AP as follows: */ +/* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; */ +/* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. */ + +/* A (output) DOUBLE PRECISION array, dimension ( LDA, N ) */ +/* On exit, the triangular matrix A. If UPLO = 'U', the leading */ +/* N-by-N upper triangular part of A contains the upper */ +/* triangular part of the matrix A, and the strictly lower */ +/* triangular part of A is not referenced. If UPLO = 'L', the */ +/* leading N-by-N lower triangular part of A contains the lower */ +/* triangular part of the matrix A, and the strictly upper */ +/* triangular part of A is not referenced. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,N). */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --ap; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + *info = 0; + lower = _starpu_lsame_(uplo, "L"); + if (! lower && ! _starpu_lsame_(uplo, "U")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -5; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DTPTTR", &i__1); + return 0; + } + + if (lower) { + k = 0; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + ++k; + a[i__ + j * a_dim1] = ap[k]; + } + } + } else { + k = 0; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + ++k; + a[i__ + j * a_dim1] = ap[k]; + } + } + } + + + return 0; + +/* End of DTPTTR */ + +} /* _starpu_dtpttr_ */ diff --git a/min-dgels/base/SRC/dtrcon.c b/min-dgels/base/SRC/dtrcon.c new file mode 100644 index 0000000..46cdbf2 --- /dev/null +++ b/min-dgels/base/SRC/dtrcon.c @@ -0,0 +1,241 @@ +/* dtrcon.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +/* Subroutine */ int _starpu_dtrcon_(char *norm, char *uplo, char *diag, integer *n, + doublereal *a, integer *lda, doublereal *rcond, doublereal *work, + integer *iwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1; + doublereal d__1; + + /* Local variables */ + integer ix, kase, kase1; + doublereal scale; + extern logical _starpu_lsame_(char *, char *); + integer isave[3]; + extern /* Subroutine */ int _starpu_drscl_(integer *, doublereal *, doublereal *, + integer *); + doublereal anorm; + logical upper; + doublereal xnorm; + extern /* Subroutine */ int _starpu_dlacn2_(integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *); + extern doublereal _starpu_dlamch_(char *); + extern integer _starpu_idamax_(integer *, doublereal *, integer *); + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + extern doublereal _starpu_dlantr_(char *, char *, char *, integer *, integer *, + doublereal *, integer *, doublereal *); + doublereal ainvnm; + extern /* Subroutine */ int _starpu_dlatrs_(char *, char *, char *, char *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + doublereal *, integer *); + logical onenrm; + char normin[1]; + doublereal smlnum; + logical nounit; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* Modified to call DLACN2 in place of DLACON, 5 Feb 03, SJH. */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DTRCON estimates the reciprocal of the condition number of a */ +/* triangular matrix A, in either the 1-norm or the infinity-norm. */ + +/* The norm of A is computed and an estimate is obtained for */ +/* norm(inv(A)), then the reciprocal of the condition number is */ +/* computed as */ +/* RCOND = 1 / ( norm(A) * norm(inv(A)) ). */ + +/* Arguments */ +/* ========= */ + +/* NORM (input) CHARACTER*1 */ +/* Specifies whether the 1-norm condition number or the */ +/* infinity-norm condition number is required: */ +/* = '1' or 'O': 1-norm; */ +/* = 'I': Infinity-norm. */ + +/* UPLO (input) CHARACTER*1 */ +/* = 'U': A is upper triangular; */ +/* = 'L': A is lower triangular. */ + +/* DIAG (input) CHARACTER*1 */ +/* = 'N': A is non-unit triangular; */ +/* = 'U': A is unit triangular. */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ +/* The triangular matrix A. If UPLO = 'U', the leading N-by-N */ +/* upper triangular part of the array A contains the upper */ +/* triangular matrix, and the strictly lower triangular part of */ +/* A is not referenced. If UPLO = 'L', the leading N-by-N lower */ +/* triangular part of the array A contains the lower triangular */ +/* matrix, and the strictly upper triangular part of A is not */ +/* referenced. If DIAG = 'U', the diagonal elements of A are */ +/* also not referenced and are assumed to be 1. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,N). */ + +/* RCOND (output) DOUBLE PRECISION */ +/* The reciprocal of the condition number of the matrix A, */ +/* computed as RCOND = 1/(norm(A) * norm(inv(A))). */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (3*N) */ + +/* IWORK (workspace) INTEGER array, dimension (N) */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --work; + --iwork; + + /* Function Body */ + *info = 0; + upper = _starpu_lsame_(uplo, "U"); + onenrm = *(unsigned char *)norm == '1' || _starpu_lsame_(norm, "O"); + nounit = _starpu_lsame_(diag, "N"); + + if (! onenrm && ! _starpu_lsame_(norm, "I")) { + *info = -1; + } else if (! upper && ! _starpu_lsame_(uplo, "L")) { + *info = -2; + } else if (! nounit && ! _starpu_lsame_(diag, "U")) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*lda < max(1,*n)) { + *info = -6; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DTRCON", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + *rcond = 1.; + return 0; + } + + *rcond = 0.; + smlnum = _starpu_dlamch_("Safe minimum") * (doublereal) max(1,*n); + +/* Compute the norm of the triangular matrix A. */ + + anorm = _starpu_dlantr_(norm, uplo, diag, n, n, &a[a_offset], lda, &work[1]); + +/* Continue only if ANORM > 0. */ + + if (anorm > 0.) { + +/* Estimate the norm of the inverse of A. */ + + ainvnm = 0.; + *(unsigned char *)normin = 'N'; + if (onenrm) { + kase1 = 1; + } else { + kase1 = 2; + } + kase = 0; +L10: + _starpu_dlacn2_(n, &work[*n + 1], &work[1], &iwork[1], &ainvnm, &kase, isave); + if (kase != 0) { + if (kase == kase1) { + +/* Multiply by inv(A). */ + + _starpu_dlatrs_(uplo, "No transpose", diag, normin, n, &a[a_offset], + lda, &work[1], &scale, &work[(*n << 1) + 1], info); + } else { + +/* Multiply by inv(A'). */ + + _starpu_dlatrs_(uplo, "Transpose", diag, normin, n, &a[a_offset], lda, + &work[1], &scale, &work[(*n << 1) + 1], info); + } + *(unsigned char *)normin = 'Y'; + +/* Multiply by 1/SCALE if doing so will not cause overflow. */ + + if (scale != 1.) { + ix = _starpu_idamax_(n, &work[1], &c__1); + xnorm = (d__1 = work[ix], abs(d__1)); + if (scale < xnorm * smlnum || scale == 0.) { + goto L20; + } + _starpu_drscl_(n, &scale, &work[1], &c__1); + } + goto L10; + } + +/* Compute the estimate of the reciprocal condition number. */ + + if (ainvnm != 0.) { + *rcond = 1. / anorm / ainvnm; + } + } + +L20: + return 0; + +/* End of DTRCON */ + +} /* _starpu_dtrcon_ */ diff --git a/min-dgels/base/SRC/dtrevc.c b/min-dgels/base/SRC/dtrevc.c new file mode 100644 index 0000000..785a352 --- /dev/null +++ b/min-dgels/base/SRC/dtrevc.c @@ -0,0 +1,1228 @@ +/* dtrevc.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static logical c_false = FALSE_; +static integer c__1 = 1; +static doublereal c_b22 = 1.; +static doublereal c_b25 = 0.; +static integer c__2 = 2; +static logical c_true = TRUE_; + +/* Subroutine */ int _starpu_dtrevc_(char *side, char *howmny, logical *select, + integer *n, doublereal *t, integer *ldt, doublereal *vl, integer * + ldvl, doublereal *vr, integer *ldvr, integer *mm, integer *m, + doublereal *work, integer *info) +{ + /* System generated locals */ + integer t_dim1, t_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1, + i__2, i__3; + doublereal d__1, d__2, d__3, d__4; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + integer i__, j, k; + doublereal x[4] /* was [2][2] */; + integer j1, j2, n2, ii, ki, ip, is; + doublereal wi, wr, rec, ulp, beta, emax; + logical pair; + extern doublereal _starpu_ddot_(integer *, doublereal *, integer *, doublereal *, + integer *); + logical allv; + integer ierr; + doublereal unfl, ovfl, smin; + logical over; + doublereal vmax; + integer jnxt; + extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, + integer *); + doublereal scale; + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int _starpu_dgemv_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *); + doublereal remax; + extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *); + logical leftv, bothv; + extern /* Subroutine */ int _starpu_daxpy_(integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *); + doublereal vcrit; + logical somev; + doublereal xnorm; + extern /* Subroutine */ int _starpu_dlaln2_(logical *, integer *, integer *, + doublereal *, doublereal *, doublereal *, integer *, doublereal *, + doublereal *, doublereal *, integer *, doublereal *, doublereal * +, doublereal *, integer *, doublereal *, doublereal *, integer *), + _starpu_dlabad_(doublereal *, doublereal *); + extern doublereal _starpu_dlamch_(char *); + extern integer _starpu_idamax_(integer *, doublereal *, integer *); + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + doublereal bignum; + logical rightv; + doublereal smlnum; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DTREVC computes some or all of the right and/or left eigenvectors of */ +/* a real upper quasi-triangular matrix T. */ +/* Matrices of this type are produced by the Schur factorization of */ +/* a real general matrix: A = Q*T*Q**T, as computed by DHSEQR. */ + +/* The right eigenvector x and the left eigenvector y of T corresponding */ +/* to an eigenvalue w are defined by: */ + +/* T*x = w*x, (y**H)*T = w*(y**H) */ + +/* where y**H denotes the conjugate transpose of y. */ +/* The eigenvalues are not input to this routine, but are read directly */ +/* from the diagonal blocks of T. */ + +/* This routine returns the matrices X and/or Y of right and left */ +/* eigenvectors of T, or the products Q*X and/or Q*Y, where Q is an */ +/* input matrix. If Q is the orthogonal factor that reduces a matrix */ +/* A to Schur form T, then Q*X and Q*Y are the matrices of right and */ +/* left eigenvectors of A. */ + +/* Arguments */ +/* ========= */ + +/* SIDE (input) CHARACTER*1 */ +/* = 'R': compute right eigenvectors only; */ +/* = 'L': compute left eigenvectors only; */ +/* = 'B': compute both right and left eigenvectors. */ + +/* HOWMNY (input) CHARACTER*1 */ +/* = 'A': compute all right and/or left eigenvectors; */ +/* = 'B': compute all right and/or left eigenvectors, */ +/* backtransformed by the matrices in VR and/or VL; */ +/* = 'S': compute selected right and/or left eigenvectors, */ +/* as indicated by the logical array SELECT. */ + +/* SELECT (input/output) LOGICAL array, dimension (N) */ +/* If HOWMNY = 'S', SELECT specifies the eigenvectors to be */ +/* computed. */ +/* If w(j) is a real eigenvalue, the corresponding real */ +/* eigenvector is computed if SELECT(j) is .TRUE.. */ +/* If w(j) and w(j+1) are the real and imaginary parts of a */ +/* complex eigenvalue, the corresponding complex eigenvector is */ +/* computed if either SELECT(j) or SELECT(j+1) is .TRUE., and */ +/* on exit SELECT(j) is set to .TRUE. and SELECT(j+1) is set to */ +/* .FALSE.. */ +/* Not referenced if HOWMNY = 'A' or 'B'. */ + +/* N (input) INTEGER */ +/* The order of the matrix T. N >= 0. */ + +/* T (input) DOUBLE PRECISION array, dimension (LDT,N) */ +/* The upper quasi-triangular matrix T in Schur canonical form. */ + +/* LDT (input) INTEGER */ +/* The leading dimension of the array T. LDT >= max(1,N). */ + +/* VL (input/output) DOUBLE PRECISION array, dimension (LDVL,MM) */ +/* On entry, if SIDE = 'L' or 'B' and HOWMNY = 'B', VL must */ +/* contain an N-by-N matrix Q (usually the orthogonal matrix Q */ +/* of Schur vectors returned by DHSEQR). */ +/* On exit, if SIDE = 'L' or 'B', VL contains: */ +/* if HOWMNY = 'A', the matrix Y of left eigenvectors of T; */ +/* if HOWMNY = 'B', the matrix Q*Y; */ +/* if HOWMNY = 'S', the left eigenvectors of T specified by */ +/* SELECT, stored consecutively in the columns */ +/* of VL, in the same order as their */ +/* eigenvalues. */ +/* A complex eigenvector corresponding to a complex eigenvalue */ +/* is stored in two consecutive columns, the first holding the */ +/* real part, and the second the imaginary part. */ +/* Not referenced if SIDE = 'R'. */ + +/* LDVL (input) INTEGER */ +/* The leading dimension of the array VL. LDVL >= 1, and if */ +/* SIDE = 'L' or 'B', LDVL >= N. */ + +/* VR (input/output) DOUBLE PRECISION array, dimension (LDVR,MM) */ +/* On entry, if SIDE = 'R' or 'B' and HOWMNY = 'B', VR must */ +/* contain an N-by-N matrix Q (usually the orthogonal matrix Q */ +/* of Schur vectors returned by DHSEQR). */ +/* On exit, if SIDE = 'R' or 'B', VR contains: */ +/* if HOWMNY = 'A', the matrix X of right eigenvectors of T; */ +/* if HOWMNY = 'B', the matrix Q*X; */ +/* if HOWMNY = 'S', the right eigenvectors of T specified by */ +/* SELECT, stored consecutively in the columns */ +/* of VR, in the same order as their */ +/* eigenvalues. */ +/* A complex eigenvector corresponding to a complex eigenvalue */ +/* is stored in two consecutive columns, the first holding the */ +/* real part and the second the imaginary part. */ +/* Not referenced if SIDE = 'L'. */ + +/* LDVR (input) INTEGER */ +/* The leading dimension of the array VR. LDVR >= 1, and if */ +/* SIDE = 'R' or 'B', LDVR >= N. */ + +/* MM (input) INTEGER */ +/* The number of columns in the arrays VL and/or VR. MM >= M. */ + +/* M (output) INTEGER */ +/* The number of columns in the arrays VL and/or VR actually */ +/* used to store the eigenvectors. */ +/* If HOWMNY = 'A' or 'B', M is set to N. */ +/* Each selected real eigenvector occupies one column and each */ +/* selected complex eigenvector occupies two columns. */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (3*N) */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ + +/* Further Details */ +/* =============== */ + +/* The algorithm used in this program is basically backward (forward) */ +/* substitution, with scaling to make the the code robust against */ +/* possible overflow. */ + +/* Each eigenvector is normalized so that the element of largest */ +/* magnitude has magnitude 1; here the magnitude of a complex number */ +/* (x,y) is taken to be |x| + |y|. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Decode and test the input parameters */ + + /* Parameter adjustments */ + --select; + t_dim1 = *ldt; + t_offset = 1 + t_dim1; + t -= t_offset; + vl_dim1 = *ldvl; + vl_offset = 1 + vl_dim1; + vl -= vl_offset; + vr_dim1 = *ldvr; + vr_offset = 1 + vr_dim1; + vr -= vr_offset; + --work; + + /* Function Body */ + bothv = _starpu_lsame_(side, "B"); + rightv = _starpu_lsame_(side, "R") || bothv; + leftv = _starpu_lsame_(side, "L") || bothv; + + allv = _starpu_lsame_(howmny, "A"); + over = _starpu_lsame_(howmny, "B"); + somev = _starpu_lsame_(howmny, "S"); + + *info = 0; + if (! rightv && ! leftv) { + *info = -1; + } else if (! allv && ! over && ! somev) { + *info = -2; + } else if (*n < 0) { + *info = -4; + } else if (*ldt < max(1,*n)) { + *info = -6; + } else if (*ldvl < 1 || leftv && *ldvl < *n) { + *info = -8; + } else if (*ldvr < 1 || rightv && *ldvr < *n) { + *info = -10; + } else { + +/* Set M to the number of columns required to store the selected */ +/* eigenvectors, standardize the array SELECT if necessary, and */ +/* test MM. */ + + if (somev) { + *m = 0; + pair = FALSE_; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (pair) { + pair = FALSE_; + select[j] = FALSE_; + } else { + if (j < *n) { + if (t[j + 1 + j * t_dim1] == 0.) { + if (select[j]) { + ++(*m); + } + } else { + pair = TRUE_; + if (select[j] || select[j + 1]) { + select[j] = TRUE_; + *m += 2; + } + } + } else { + if (select[*n]) { + ++(*m); + } + } + } +/* L10: */ + } + } else { + *m = *n; + } + + if (*mm < *m) { + *info = -11; + } + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DTREVC", &i__1); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0) { + return 0; + } + +/* Set the constants to control overflow. */ + + unfl = _starpu_dlamch_("Safe minimum"); + ovfl = 1. / unfl; + _starpu_dlabad_(&unfl, &ovfl); + ulp = _starpu_dlamch_("Precision"); + smlnum = unfl * (*n / ulp); + bignum = (1. - ulp) / smlnum; + +/* Compute 1-norm of each column of strictly upper triangular */ +/* part of T to control overflow in triangular solver. */ + + work[1] = 0.; + i__1 = *n; + for (j = 2; j <= i__1; ++j) { + work[j] = 0.; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + work[j] += (d__1 = t[i__ + j * t_dim1], abs(d__1)); +/* L20: */ + } +/* L30: */ + } + +/* Index IP is used to specify the real or complex eigenvalue: */ +/* IP = 0, real eigenvalue, */ +/* 1, first of conjugate complex pair: (wr,wi) */ +/* -1, second of conjugate complex pair: (wr,wi) */ + + n2 = *n << 1; + + if (rightv) { + +/* Compute right eigenvectors. */ + + ip = 0; + is = *m; + for (ki = *n; ki >= 1; --ki) { + + if (ip == 1) { + goto L130; + } + if (ki == 1) { + goto L40; + } + if (t[ki + (ki - 1) * t_dim1] == 0.) { + goto L40; + } + ip = -1; + +L40: + if (somev) { + if (ip == 0) { + if (! select[ki]) { + goto L130; + } + } else { + if (! select[ki - 1]) { + goto L130; + } + } + } + +/* Compute the KI-th eigenvalue (WR,WI). */ + + wr = t[ki + ki * t_dim1]; + wi = 0.; + if (ip != 0) { + wi = sqrt((d__1 = t[ki + (ki - 1) * t_dim1], abs(d__1))) * + sqrt((d__2 = t[ki - 1 + ki * t_dim1], abs(d__2))); + } +/* Computing MAX */ + d__1 = ulp * (abs(wr) + abs(wi)); + smin = max(d__1,smlnum); + + if (ip == 0) { + +/* Real right eigenvector */ + + work[ki + *n] = 1.; + +/* Form right-hand side */ + + i__1 = ki - 1; + for (k = 1; k <= i__1; ++k) { + work[k + *n] = -t[k + ki * t_dim1]; +/* L50: */ + } + +/* Solve the upper quasi-triangular system: */ +/* (T(1:KI-1,1:KI-1) - WR)*X = SCALE*WORK. */ + + jnxt = ki - 1; + for (j = ki - 1; j >= 1; --j) { + if (j > jnxt) { + goto L60; + } + j1 = j; + j2 = j; + jnxt = j - 1; + if (j > 1) { + if (t[j + (j - 1) * t_dim1] != 0.) { + j1 = j - 1; + jnxt = j - 2; + } + } + + if (j1 == j2) { + +/* 1-by-1 diagonal block */ + + _starpu_dlaln2_(&c_false, &c__1, &c__1, &smin, &c_b22, &t[j + + j * t_dim1], ldt, &c_b22, &c_b22, &work[j + * + n], n, &wr, &c_b25, x, &c__2, &scale, &xnorm, + &ierr); + +/* Scale X(1,1) to avoid overflow when updating */ +/* the right-hand side. */ + + if (xnorm > 1.) { + if (work[j] > bignum / xnorm) { + x[0] /= xnorm; + scale /= xnorm; + } + } + +/* Scale if necessary */ + + if (scale != 1.) { + _starpu_dscal_(&ki, &scale, &work[*n + 1], &c__1); + } + work[j + *n] = x[0]; + +/* Update right-hand side */ + + i__1 = j - 1; + d__1 = -x[0]; + _starpu_daxpy_(&i__1, &d__1, &t[j * t_dim1 + 1], &c__1, &work[ + *n + 1], &c__1); + + } else { + +/* 2-by-2 diagonal block */ + + _starpu_dlaln2_(&c_false, &c__2, &c__1, &smin, &c_b22, &t[j - + 1 + (j - 1) * t_dim1], ldt, &c_b22, &c_b22, & + work[j - 1 + *n], n, &wr, &c_b25, x, &c__2, & + scale, &xnorm, &ierr); + +/* Scale X(1,1) and X(2,1) to avoid overflow when */ +/* updating the right-hand side. */ + + if (xnorm > 1.) { +/* Computing MAX */ + d__1 = work[j - 1], d__2 = work[j]; + beta = max(d__1,d__2); + if (beta > bignum / xnorm) { + x[0] /= xnorm; + x[1] /= xnorm; + scale /= xnorm; + } + } + +/* Scale if necessary */ + + if (scale != 1.) { + _starpu_dscal_(&ki, &scale, &work[*n + 1], &c__1); + } + work[j - 1 + *n] = x[0]; + work[j + *n] = x[1]; + +/* Update right-hand side */ + + i__1 = j - 2; + d__1 = -x[0]; + _starpu_daxpy_(&i__1, &d__1, &t[(j - 1) * t_dim1 + 1], &c__1, + &work[*n + 1], &c__1); + i__1 = j - 2; + d__1 = -x[1]; + _starpu_daxpy_(&i__1, &d__1, &t[j * t_dim1 + 1], &c__1, &work[ + *n + 1], &c__1); + } +L60: + ; + } + +/* Copy the vector x or Q*x to VR and normalize. */ + + if (! over) { + _starpu_dcopy_(&ki, &work[*n + 1], &c__1, &vr[is * vr_dim1 + 1], & + c__1); + + ii = _starpu_idamax_(&ki, &vr[is * vr_dim1 + 1], &c__1); + remax = 1. / (d__1 = vr[ii + is * vr_dim1], abs(d__1)); + _starpu_dscal_(&ki, &remax, &vr[is * vr_dim1 + 1], &c__1); + + i__1 = *n; + for (k = ki + 1; k <= i__1; ++k) { + vr[k + is * vr_dim1] = 0.; +/* L70: */ + } + } else { + if (ki > 1) { + i__1 = ki - 1; + _starpu_dgemv_("N", n, &i__1, &c_b22, &vr[vr_offset], ldvr, & + work[*n + 1], &c__1, &work[ki + *n], &vr[ki * + vr_dim1 + 1], &c__1); + } + + ii = _starpu_idamax_(n, &vr[ki * vr_dim1 + 1], &c__1); + remax = 1. / (d__1 = vr[ii + ki * vr_dim1], abs(d__1)); + _starpu_dscal_(n, &remax, &vr[ki * vr_dim1 + 1], &c__1); + } + + } else { + +/* Complex right eigenvector. */ + +/* Initial solve */ +/* [ (T(KI-1,KI-1) T(KI-1,KI) ) - (WR + I* WI)]*X = 0. */ +/* [ (T(KI,KI-1) T(KI,KI) ) ] */ + + if ((d__1 = t[ki - 1 + ki * t_dim1], abs(d__1)) >= (d__2 = t[ + ki + (ki - 1) * t_dim1], abs(d__2))) { + work[ki - 1 + *n] = 1.; + work[ki + n2] = wi / t[ki - 1 + ki * t_dim1]; + } else { + work[ki - 1 + *n] = -wi / t[ki + (ki - 1) * t_dim1]; + work[ki + n2] = 1.; + } + work[ki + *n] = 0.; + work[ki - 1 + n2] = 0.; + +/* Form right-hand side */ + + i__1 = ki - 2; + for (k = 1; k <= i__1; ++k) { + work[k + *n] = -work[ki - 1 + *n] * t[k + (ki - 1) * + t_dim1]; + work[k + n2] = -work[ki + n2] * t[k + ki * t_dim1]; +/* L80: */ + } + +/* Solve upper quasi-triangular system: */ +/* (T(1:KI-2,1:KI-2) - (WR+i*WI))*X = SCALE*(WORK+i*WORK2) */ + + jnxt = ki - 2; + for (j = ki - 2; j >= 1; --j) { + if (j > jnxt) { + goto L90; + } + j1 = j; + j2 = j; + jnxt = j - 1; + if (j > 1) { + if (t[j + (j - 1) * t_dim1] != 0.) { + j1 = j - 1; + jnxt = j - 2; + } + } + + if (j1 == j2) { + +/* 1-by-1 diagonal block */ + + _starpu_dlaln2_(&c_false, &c__1, &c__2, &smin, &c_b22, &t[j + + j * t_dim1], ldt, &c_b22, &c_b22, &work[j + * + n], n, &wr, &wi, x, &c__2, &scale, &xnorm, & + ierr); + +/* Scale X(1,1) and X(1,2) to avoid overflow when */ +/* updating the right-hand side. */ + + if (xnorm > 1.) { + if (work[j] > bignum / xnorm) { + x[0] /= xnorm; + x[2] /= xnorm; + scale /= xnorm; + } + } + +/* Scale if necessary */ + + if (scale != 1.) { + _starpu_dscal_(&ki, &scale, &work[*n + 1], &c__1); + _starpu_dscal_(&ki, &scale, &work[n2 + 1], &c__1); + } + work[j + *n] = x[0]; + work[j + n2] = x[2]; + +/* Update the right-hand side */ + + i__1 = j - 1; + d__1 = -x[0]; + _starpu_daxpy_(&i__1, &d__1, &t[j * t_dim1 + 1], &c__1, &work[ + *n + 1], &c__1); + i__1 = j - 1; + d__1 = -x[2]; + _starpu_daxpy_(&i__1, &d__1, &t[j * t_dim1 + 1], &c__1, &work[ + n2 + 1], &c__1); + + } else { + +/* 2-by-2 diagonal block */ + + _starpu_dlaln2_(&c_false, &c__2, &c__2, &smin, &c_b22, &t[j - + 1 + (j - 1) * t_dim1], ldt, &c_b22, &c_b22, & + work[j - 1 + *n], n, &wr, &wi, x, &c__2, & + scale, &xnorm, &ierr); + +/* Scale X to avoid overflow when updating */ +/* the right-hand side. */ + + if (xnorm > 1.) { +/* Computing MAX */ + d__1 = work[j - 1], d__2 = work[j]; + beta = max(d__1,d__2); + if (beta > bignum / xnorm) { + rec = 1. / xnorm; + x[0] *= rec; + x[2] *= rec; + x[1] *= rec; + x[3] *= rec; + scale *= rec; + } + } + +/* Scale if necessary */ + + if (scale != 1.) { + _starpu_dscal_(&ki, &scale, &work[*n + 1], &c__1); + _starpu_dscal_(&ki, &scale, &work[n2 + 1], &c__1); + } + work[j - 1 + *n] = x[0]; + work[j + *n] = x[1]; + work[j - 1 + n2] = x[2]; + work[j + n2] = x[3]; + +/* Update the right-hand side */ + + i__1 = j - 2; + d__1 = -x[0]; + _starpu_daxpy_(&i__1, &d__1, &t[(j - 1) * t_dim1 + 1], &c__1, + &work[*n + 1], &c__1); + i__1 = j - 2; + d__1 = -x[1]; + _starpu_daxpy_(&i__1, &d__1, &t[j * t_dim1 + 1], &c__1, &work[ + *n + 1], &c__1); + i__1 = j - 2; + d__1 = -x[2]; + _starpu_daxpy_(&i__1, &d__1, &t[(j - 1) * t_dim1 + 1], &c__1, + &work[n2 + 1], &c__1); + i__1 = j - 2; + d__1 = -x[3]; + _starpu_daxpy_(&i__1, &d__1, &t[j * t_dim1 + 1], &c__1, &work[ + n2 + 1], &c__1); + } +L90: + ; + } + +/* Copy the vector x or Q*x to VR and normalize. */ + + if (! over) { + _starpu_dcopy_(&ki, &work[*n + 1], &c__1, &vr[(is - 1) * vr_dim1 + + 1], &c__1); + _starpu_dcopy_(&ki, &work[n2 + 1], &c__1, &vr[is * vr_dim1 + 1], & + c__1); + + emax = 0.; + i__1 = ki; + for (k = 1; k <= i__1; ++k) { +/* Computing MAX */ + d__3 = emax, d__4 = (d__1 = vr[k + (is - 1) * vr_dim1] + , abs(d__1)) + (d__2 = vr[k + is * vr_dim1], + abs(d__2)); + emax = max(d__3,d__4); +/* L100: */ + } + + remax = 1. / emax; + _starpu_dscal_(&ki, &remax, &vr[(is - 1) * vr_dim1 + 1], &c__1); + _starpu_dscal_(&ki, &remax, &vr[is * vr_dim1 + 1], &c__1); + + i__1 = *n; + for (k = ki + 1; k <= i__1; ++k) { + vr[k + (is - 1) * vr_dim1] = 0.; + vr[k + is * vr_dim1] = 0.; +/* L110: */ + } + + } else { + + if (ki > 2) { + i__1 = ki - 2; + _starpu_dgemv_("N", n, &i__1, &c_b22, &vr[vr_offset], ldvr, & + work[*n + 1], &c__1, &work[ki - 1 + *n], &vr[( + ki - 1) * vr_dim1 + 1], &c__1); + i__1 = ki - 2; + _starpu_dgemv_("N", n, &i__1, &c_b22, &vr[vr_offset], ldvr, & + work[n2 + 1], &c__1, &work[ki + n2], &vr[ki * + vr_dim1 + 1], &c__1); + } else { + _starpu_dscal_(n, &work[ki - 1 + *n], &vr[(ki - 1) * vr_dim1 + + 1], &c__1); + _starpu_dscal_(n, &work[ki + n2], &vr[ki * vr_dim1 + 1], & + c__1); + } + + emax = 0.; + i__1 = *n; + for (k = 1; k <= i__1; ++k) { +/* Computing MAX */ + d__3 = emax, d__4 = (d__1 = vr[k + (ki - 1) * vr_dim1] + , abs(d__1)) + (d__2 = vr[k + ki * vr_dim1], + abs(d__2)); + emax = max(d__3,d__4); +/* L120: */ + } + remax = 1. / emax; + _starpu_dscal_(n, &remax, &vr[(ki - 1) * vr_dim1 + 1], &c__1); + _starpu_dscal_(n, &remax, &vr[ki * vr_dim1 + 1], &c__1); + } + } + + --is; + if (ip != 0) { + --is; + } +L130: + if (ip == 1) { + ip = 0; + } + if (ip == -1) { + ip = 1; + } +/* L140: */ + } + } + + if (leftv) { + +/* Compute left eigenvectors. */ + + ip = 0; + is = 1; + i__1 = *n; + for (ki = 1; ki <= i__1; ++ki) { + + if (ip == -1) { + goto L250; + } + if (ki == *n) { + goto L150; + } + if (t[ki + 1 + ki * t_dim1] == 0.) { + goto L150; + } + ip = 1; + +L150: + if (somev) { + if (! select[ki]) { + goto L250; + } + } + +/* Compute the KI-th eigenvalue (WR,WI). */ + + wr = t[ki + ki * t_dim1]; + wi = 0.; + if (ip != 0) { + wi = sqrt((d__1 = t[ki + (ki + 1) * t_dim1], abs(d__1))) * + sqrt((d__2 = t[ki + 1 + ki * t_dim1], abs(d__2))); + } +/* Computing MAX */ + d__1 = ulp * (abs(wr) + abs(wi)); + smin = max(d__1,smlnum); + + if (ip == 0) { + +/* Real left eigenvector. */ + + work[ki + *n] = 1.; + +/* Form right-hand side */ + + i__2 = *n; + for (k = ki + 1; k <= i__2; ++k) { + work[k + *n] = -t[ki + k * t_dim1]; +/* L160: */ + } + +/* Solve the quasi-triangular system: */ +/* (T(KI+1:N,KI+1:N) - WR)'*X = SCALE*WORK */ + + vmax = 1.; + vcrit = bignum; + + jnxt = ki + 1; + i__2 = *n; + for (j = ki + 1; j <= i__2; ++j) { + if (j < jnxt) { + goto L170; + } + j1 = j; + j2 = j; + jnxt = j + 1; + if (j < *n) { + if (t[j + 1 + j * t_dim1] != 0.) { + j2 = j + 1; + jnxt = j + 2; + } + } + + if (j1 == j2) { + +/* 1-by-1 diagonal block */ + +/* Scale if necessary to avoid overflow when forming */ +/* the right-hand side. */ + + if (work[j] > vcrit) { + rec = 1. / vmax; + i__3 = *n - ki + 1; + _starpu_dscal_(&i__3, &rec, &work[ki + *n], &c__1); + vmax = 1.; + vcrit = bignum; + } + + i__3 = j - ki - 1; + work[j + *n] -= _starpu_ddot_(&i__3, &t[ki + 1 + j * t_dim1], + &c__1, &work[ki + 1 + *n], &c__1); + +/* Solve (T(J,J)-WR)'*X = WORK */ + + _starpu_dlaln2_(&c_false, &c__1, &c__1, &smin, &c_b22, &t[j + + j * t_dim1], ldt, &c_b22, &c_b22, &work[j + * + n], n, &wr, &c_b25, x, &c__2, &scale, &xnorm, + &ierr); + +/* Scale if necessary */ + + if (scale != 1.) { + i__3 = *n - ki + 1; + _starpu_dscal_(&i__3, &scale, &work[ki + *n], &c__1); + } + work[j + *n] = x[0]; +/* Computing MAX */ + d__2 = (d__1 = work[j + *n], abs(d__1)); + vmax = max(d__2,vmax); + vcrit = bignum / vmax; + + } else { + +/* 2-by-2 diagonal block */ + +/* Scale if necessary to avoid overflow when forming */ +/* the right-hand side. */ + +/* Computing MAX */ + d__1 = work[j], d__2 = work[j + 1]; + beta = max(d__1,d__2); + if (beta > vcrit) { + rec = 1. / vmax; + i__3 = *n - ki + 1; + _starpu_dscal_(&i__3, &rec, &work[ki + *n], &c__1); + vmax = 1.; + vcrit = bignum; + } + + i__3 = j - ki - 1; + work[j + *n] -= _starpu_ddot_(&i__3, &t[ki + 1 + j * t_dim1], + &c__1, &work[ki + 1 + *n], &c__1); + + i__3 = j - ki - 1; + work[j + 1 + *n] -= _starpu_ddot_(&i__3, &t[ki + 1 + (j + 1) * + t_dim1], &c__1, &work[ki + 1 + *n], &c__1); + +/* Solve */ +/* [T(J,J)-WR T(J,J+1) ]'* X = SCALE*( WORK1 ) */ +/* [T(J+1,J) T(J+1,J+1)-WR] ( WORK2 ) */ + + _starpu_dlaln2_(&c_true, &c__2, &c__1, &smin, &c_b22, &t[j + + j * t_dim1], ldt, &c_b22, &c_b22, &work[j + * + n], n, &wr, &c_b25, x, &c__2, &scale, &xnorm, + &ierr); + +/* Scale if necessary */ + + if (scale != 1.) { + i__3 = *n - ki + 1; + _starpu_dscal_(&i__3, &scale, &work[ki + *n], &c__1); + } + work[j + *n] = x[0]; + work[j + 1 + *n] = x[1]; + +/* Computing MAX */ + d__3 = (d__1 = work[j + *n], abs(d__1)), d__4 = (d__2 + = work[j + 1 + *n], abs(d__2)), d__3 = max( + d__3,d__4); + vmax = max(d__3,vmax); + vcrit = bignum / vmax; + + } +L170: + ; + } + +/* Copy the vector x or Q*x to VL and normalize. */ + + if (! over) { + i__2 = *n - ki + 1; + _starpu_dcopy_(&i__2, &work[ki + *n], &c__1, &vl[ki + is * + vl_dim1], &c__1); + + i__2 = *n - ki + 1; + ii = _starpu_idamax_(&i__2, &vl[ki + is * vl_dim1], &c__1) + ki - + 1; + remax = 1. / (d__1 = vl[ii + is * vl_dim1], abs(d__1)); + i__2 = *n - ki + 1; + _starpu_dscal_(&i__2, &remax, &vl[ki + is * vl_dim1], &c__1); + + i__2 = ki - 1; + for (k = 1; k <= i__2; ++k) { + vl[k + is * vl_dim1] = 0.; +/* L180: */ + } + + } else { + + if (ki < *n) { + i__2 = *n - ki; + _starpu_dgemv_("N", n, &i__2, &c_b22, &vl[(ki + 1) * vl_dim1 + + 1], ldvl, &work[ki + 1 + *n], &c__1, &work[ + ki + *n], &vl[ki * vl_dim1 + 1], &c__1); + } + + ii = _starpu_idamax_(n, &vl[ki * vl_dim1 + 1], &c__1); + remax = 1. / (d__1 = vl[ii + ki * vl_dim1], abs(d__1)); + _starpu_dscal_(n, &remax, &vl[ki * vl_dim1 + 1], &c__1); + + } + + } else { + +/* Complex left eigenvector. */ + +/* Initial solve: */ +/* ((T(KI,KI) T(KI,KI+1) )' - (WR - I* WI))*X = 0. */ +/* ((T(KI+1,KI) T(KI+1,KI+1)) ) */ + + if ((d__1 = t[ki + (ki + 1) * t_dim1], abs(d__1)) >= (d__2 = + t[ki + 1 + ki * t_dim1], abs(d__2))) { + work[ki + *n] = wi / t[ki + (ki + 1) * t_dim1]; + work[ki + 1 + n2] = 1.; + } else { + work[ki + *n] = 1.; + work[ki + 1 + n2] = -wi / t[ki + 1 + ki * t_dim1]; + } + work[ki + 1 + *n] = 0.; + work[ki + n2] = 0.; + +/* Form right-hand side */ + + i__2 = *n; + for (k = ki + 2; k <= i__2; ++k) { + work[k + *n] = -work[ki + *n] * t[ki + k * t_dim1]; + work[k + n2] = -work[ki + 1 + n2] * t[ki + 1 + k * t_dim1] + ; +/* L190: */ + } + +/* Solve complex quasi-triangular system: */ +/* ( T(KI+2,N:KI+2,N) - (WR-i*WI) )*X = WORK1+i*WORK2 */ + + vmax = 1.; + vcrit = bignum; + + jnxt = ki + 2; + i__2 = *n; + for (j = ki + 2; j <= i__2; ++j) { + if (j < jnxt) { + goto L200; + } + j1 = j; + j2 = j; + jnxt = j + 1; + if (j < *n) { + if (t[j + 1 + j * t_dim1] != 0.) { + j2 = j + 1; + jnxt = j + 2; + } + } + + if (j1 == j2) { + +/* 1-by-1 diagonal block */ + +/* Scale if necessary to avoid overflow when */ +/* forming the right-hand side elements. */ + + if (work[j] > vcrit) { + rec = 1. / vmax; + i__3 = *n - ki + 1; + _starpu_dscal_(&i__3, &rec, &work[ki + *n], &c__1); + i__3 = *n - ki + 1; + _starpu_dscal_(&i__3, &rec, &work[ki + n2], &c__1); + vmax = 1.; + vcrit = bignum; + } + + i__3 = j - ki - 2; + work[j + *n] -= _starpu_ddot_(&i__3, &t[ki + 2 + j * t_dim1], + &c__1, &work[ki + 2 + *n], &c__1); + i__3 = j - ki - 2; + work[j + n2] -= _starpu_ddot_(&i__3, &t[ki + 2 + j * t_dim1], + &c__1, &work[ki + 2 + n2], &c__1); + +/* Solve (T(J,J)-(WR-i*WI))*(X11+i*X12)= WK+I*WK2 */ + + d__1 = -wi; + _starpu_dlaln2_(&c_false, &c__1, &c__2, &smin, &c_b22, &t[j + + j * t_dim1], ldt, &c_b22, &c_b22, &work[j + * + n], n, &wr, &d__1, x, &c__2, &scale, &xnorm, & + ierr); + +/* Scale if necessary */ + + if (scale != 1.) { + i__3 = *n - ki + 1; + _starpu_dscal_(&i__3, &scale, &work[ki + *n], &c__1); + i__3 = *n - ki + 1; + _starpu_dscal_(&i__3, &scale, &work[ki + n2], &c__1); + } + work[j + *n] = x[0]; + work[j + n2] = x[2]; +/* Computing MAX */ + d__3 = (d__1 = work[j + *n], abs(d__1)), d__4 = (d__2 + = work[j + n2], abs(d__2)), d__3 = max(d__3, + d__4); + vmax = max(d__3,vmax); + vcrit = bignum / vmax; + + } else { + +/* 2-by-2 diagonal block */ + +/* Scale if necessary to avoid overflow when forming */ +/* the right-hand side elements. */ + +/* Computing MAX */ + d__1 = work[j], d__2 = work[j + 1]; + beta = max(d__1,d__2); + if (beta > vcrit) { + rec = 1. / vmax; + i__3 = *n - ki + 1; + _starpu_dscal_(&i__3, &rec, &work[ki + *n], &c__1); + i__3 = *n - ki + 1; + _starpu_dscal_(&i__3, &rec, &work[ki + n2], &c__1); + vmax = 1.; + vcrit = bignum; + } + + i__3 = j - ki - 2; + work[j + *n] -= _starpu_ddot_(&i__3, &t[ki + 2 + j * t_dim1], + &c__1, &work[ki + 2 + *n], &c__1); + + i__3 = j - ki - 2; + work[j + n2] -= _starpu_ddot_(&i__3, &t[ki + 2 + j * t_dim1], + &c__1, &work[ki + 2 + n2], &c__1); + + i__3 = j - ki - 2; + work[j + 1 + *n] -= _starpu_ddot_(&i__3, &t[ki + 2 + (j + 1) * + t_dim1], &c__1, &work[ki + 2 + *n], &c__1); + + i__3 = j - ki - 2; + work[j + 1 + n2] -= _starpu_ddot_(&i__3, &t[ki + 2 + (j + 1) * + t_dim1], &c__1, &work[ki + 2 + n2], &c__1); + +/* Solve 2-by-2 complex linear equation */ +/* ([T(j,j) T(j,j+1) ]'-(wr-i*wi)*I)*X = SCALE*B */ +/* ([T(j+1,j) T(j+1,j+1)] ) */ + + d__1 = -wi; + _starpu_dlaln2_(&c_true, &c__2, &c__2, &smin, &c_b22, &t[j + + j * t_dim1], ldt, &c_b22, &c_b22, &work[j + * + n], n, &wr, &d__1, x, &c__2, &scale, &xnorm, & + ierr); + +/* Scale if necessary */ + + if (scale != 1.) { + i__3 = *n - ki + 1; + _starpu_dscal_(&i__3, &scale, &work[ki + *n], &c__1); + i__3 = *n - ki + 1; + _starpu_dscal_(&i__3, &scale, &work[ki + n2], &c__1); + } + work[j + *n] = x[0]; + work[j + n2] = x[2]; + work[j + 1 + *n] = x[1]; + work[j + 1 + n2] = x[3]; +/* Computing MAX */ + d__1 = abs(x[0]), d__2 = abs(x[2]), d__1 = max(d__1, + d__2), d__2 = abs(x[1]), d__1 = max(d__1,d__2) + , d__2 = abs(x[3]), d__1 = max(d__1,d__2); + vmax = max(d__1,vmax); + vcrit = bignum / vmax; + + } +L200: + ; + } + +/* Copy the vector x or Q*x to VL and normalize. */ + + if (! over) { + i__2 = *n - ki + 1; + _starpu_dcopy_(&i__2, &work[ki + *n], &c__1, &vl[ki + is * + vl_dim1], &c__1); + i__2 = *n - ki + 1; + _starpu_dcopy_(&i__2, &work[ki + n2], &c__1, &vl[ki + (is + 1) * + vl_dim1], &c__1); + + emax = 0.; + i__2 = *n; + for (k = ki; k <= i__2; ++k) { +/* Computing MAX */ + d__3 = emax, d__4 = (d__1 = vl[k + is * vl_dim1], abs( + d__1)) + (d__2 = vl[k + (is + 1) * vl_dim1], + abs(d__2)); + emax = max(d__3,d__4); +/* L220: */ + } + remax = 1. / emax; + i__2 = *n - ki + 1; + _starpu_dscal_(&i__2, &remax, &vl[ki + is * vl_dim1], &c__1); + i__2 = *n - ki + 1; + _starpu_dscal_(&i__2, &remax, &vl[ki + (is + 1) * vl_dim1], &c__1) + ; + + i__2 = ki - 1; + for (k = 1; k <= i__2; ++k) { + vl[k + is * vl_dim1] = 0.; + vl[k + (is + 1) * vl_dim1] = 0.; +/* L230: */ + } + } else { + if (ki < *n - 1) { + i__2 = *n - ki - 1; + _starpu_dgemv_("N", n, &i__2, &c_b22, &vl[(ki + 2) * vl_dim1 + + 1], ldvl, &work[ki + 2 + *n], &c__1, &work[ + ki + *n], &vl[ki * vl_dim1 + 1], &c__1); + i__2 = *n - ki - 1; + _starpu_dgemv_("N", n, &i__2, &c_b22, &vl[(ki + 2) * vl_dim1 + + 1], ldvl, &work[ki + 2 + n2], &c__1, &work[ + ki + 1 + n2], &vl[(ki + 1) * vl_dim1 + 1], & + c__1); + } else { + _starpu_dscal_(n, &work[ki + *n], &vl[ki * vl_dim1 + 1], & + c__1); + _starpu_dscal_(n, &work[ki + 1 + n2], &vl[(ki + 1) * vl_dim1 + + 1], &c__1); + } + + emax = 0.; + i__2 = *n; + for (k = 1; k <= i__2; ++k) { +/* Computing MAX */ + d__3 = emax, d__4 = (d__1 = vl[k + ki * vl_dim1], abs( + d__1)) + (d__2 = vl[k + (ki + 1) * vl_dim1], + abs(d__2)); + emax = max(d__3,d__4); +/* L240: */ + } + remax = 1. / emax; + _starpu_dscal_(n, &remax, &vl[ki * vl_dim1 + 1], &c__1); + _starpu_dscal_(n, &remax, &vl[(ki + 1) * vl_dim1 + 1], &c__1); + + } + + } + + ++is; + if (ip != 0) { + ++is; + } +L250: + if (ip == -1) { + ip = 0; + } + if (ip == 1) { + ip = -1; + } + +/* L260: */ + } + + } + + return 0; + +/* End of DTREVC */ + +} /* _starpu_dtrevc_ */ diff --git a/min-dgels/base/SRC/dtrexc.c b/min-dgels/base/SRC/dtrexc.c new file mode 100644 index 0000000..75fd6f4 --- /dev/null +++ b/min-dgels/base/SRC/dtrexc.c @@ -0,0 +1,403 @@ +/* dtrexc.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c__2 = 2; + +/* Subroutine */ int _starpu_dtrexc_(char *compq, integer *n, doublereal *t, integer * + ldt, doublereal *q, integer *ldq, integer *ifst, integer *ilst, + doublereal *work, integer *info) +{ + /* System generated locals */ + integer q_dim1, q_offset, t_dim1, t_offset, i__1; + + /* Local variables */ + integer nbf, nbl, here; + extern logical _starpu_lsame_(char *, char *); + logical wantq; + extern /* Subroutine */ int _starpu_dlaexc_(logical *, integer *, doublereal *, + integer *, doublereal *, integer *, integer *, integer *, integer + *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); + integer nbnext; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DTREXC reorders the real Schur factorization of a real matrix */ +/* A = Q*T*Q**T, so that the diagonal block of T with row index IFST is */ +/* moved to row ILST. */ + +/* The real Schur form T is reordered by an orthogonal similarity */ +/* transformation Z**T*T*Z, and optionally the matrix Q of Schur vectors */ +/* is updated by postmultiplying it with Z. */ + +/* T must be in Schur canonical form (as returned by DHSEQR), that is, */ +/* block upper triangular with 1-by-1 and 2-by-2 diagonal blocks; each */ +/* 2-by-2 diagonal block has its diagonal elements equal and its */ +/* off-diagonal elements of opposite sign. */ + +/* Arguments */ +/* ========= */ + +/* COMPQ (input) CHARACTER*1 */ +/* = 'V': update the matrix Q of Schur vectors; */ +/* = 'N': do not update Q. */ + +/* N (input) INTEGER */ +/* The order of the matrix T. N >= 0. */ + +/* T (input/output) DOUBLE PRECISION array, dimension (LDT,N) */ +/* On entry, the upper quasi-triangular matrix T, in Schur */ +/* Schur canonical form. */ +/* On exit, the reordered upper quasi-triangular matrix, again */ +/* in Schur canonical form. */ + +/* LDT (input) INTEGER */ +/* The leading dimension of the array T. LDT >= max(1,N). */ + +/* Q (input/output) DOUBLE PRECISION array, dimension (LDQ,N) */ +/* On entry, if COMPQ = 'V', the matrix Q of Schur vectors. */ +/* On exit, if COMPQ = 'V', Q has been postmultiplied by the */ +/* orthogonal transformation matrix Z which reorders T. */ +/* If COMPQ = 'N', Q is not referenced. */ + +/* LDQ (input) INTEGER */ +/* The leading dimension of the array Q. LDQ >= max(1,N). */ + +/* IFST (input/output) INTEGER */ +/* ILST (input/output) INTEGER */ +/* Specify the reordering of the diagonal blocks of T. */ +/* The block with row index IFST is moved to row ILST, by a */ +/* sequence of transpositions between adjacent blocks. */ +/* On exit, if IFST pointed on entry to the second row of a */ +/* 2-by-2 block, it is changed to point to the first row; ILST */ +/* always points to the first row of the block in its final */ +/* position (which may differ from its input value by +1 or -1). */ +/* 1 <= IFST <= N; 1 <= ILST <= N. */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (N) */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ +/* = 1: two adjacent blocks were too close to swap (the problem */ +/* is very ill-conditioned); T may have been partially */ +/* reordered, and ILST points to the first row of the */ +/* current position of the block being moved. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Decode and test the input arguments. */ + + /* Parameter adjustments */ + t_dim1 = *ldt; + t_offset = 1 + t_dim1; + t -= t_offset; + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + --work; + + /* Function Body */ + *info = 0; + wantq = _starpu_lsame_(compq, "V"); + if (! wantq && ! _starpu_lsame_(compq, "N")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*ldt < max(1,*n)) { + *info = -4; + } else if (*ldq < 1 || wantq && *ldq < max(1,*n)) { + *info = -6; + } else if (*ifst < 1 || *ifst > *n) { + *info = -7; + } else if (*ilst < 1 || *ilst > *n) { + *info = -8; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DTREXC", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n <= 1) { + return 0; + } + +/* Determine the first row of specified block */ +/* and find out it is 1 by 1 or 2 by 2. */ + + if (*ifst > 1) { + if (t[*ifst + (*ifst - 1) * t_dim1] != 0.) { + --(*ifst); + } + } + nbf = 1; + if (*ifst < *n) { + if (t[*ifst + 1 + *ifst * t_dim1] != 0.) { + nbf = 2; + } + } + +/* Determine the first row of the final block */ +/* and find out it is 1 by 1 or 2 by 2. */ + + if (*ilst > 1) { + if (t[*ilst + (*ilst - 1) * t_dim1] != 0.) { + --(*ilst); + } + } + nbl = 1; + if (*ilst < *n) { + if (t[*ilst + 1 + *ilst * t_dim1] != 0.) { + nbl = 2; + } + } + + if (*ifst == *ilst) { + return 0; + } + + if (*ifst < *ilst) { + +/* Update ILST */ + + if (nbf == 2 && nbl == 1) { + --(*ilst); + } + if (nbf == 1 && nbl == 2) { + ++(*ilst); + } + + here = *ifst; + +L10: + +/* Swap block with next one below */ + + if (nbf == 1 || nbf == 2) { + +/* Current block either 1 by 1 or 2 by 2 */ + + nbnext = 1; + if (here + nbf + 1 <= *n) { + if (t[here + nbf + 1 + (here + nbf) * t_dim1] != 0.) { + nbnext = 2; + } + } + _starpu_dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, &here, & + nbf, &nbnext, &work[1], info); + if (*info != 0) { + *ilst = here; + return 0; + } + here += nbnext; + +/* Test if 2 by 2 block breaks into two 1 by 1 blocks */ + + if (nbf == 2) { + if (t[here + 1 + here * t_dim1] == 0.) { + nbf = 3; + } + } + + } else { + +/* Current block consists of two 1 by 1 blocks each of which */ +/* must be swapped individually */ + + nbnext = 1; + if (here + 3 <= *n) { + if (t[here + 3 + (here + 2) * t_dim1] != 0.) { + nbnext = 2; + } + } + i__1 = here + 1; + _starpu_dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, &i__1, & + c__1, &nbnext, &work[1], info); + if (*info != 0) { + *ilst = here; + return 0; + } + if (nbnext == 1) { + +/* Swap two 1 by 1 blocks, no problems possible */ + + _starpu_dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, & + here, &c__1, &nbnext, &work[1], info); + ++here; + } else { + +/* Recompute NBNEXT in case 2 by 2 split */ + + if (t[here + 2 + (here + 1) * t_dim1] == 0.) { + nbnext = 1; + } + if (nbnext == 2) { + +/* 2 by 2 Block did not split */ + + _starpu_dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, & + here, &c__1, &nbnext, &work[1], info); + if (*info != 0) { + *ilst = here; + return 0; + } + here += 2; + } else { + +/* 2 by 2 Block did split */ + + _starpu_dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, & + here, &c__1, &c__1, &work[1], info); + i__1 = here + 1; + _starpu_dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, & + i__1, &c__1, &c__1, &work[1], info); + here += 2; + } + } + } + if (here < *ilst) { + goto L10; + } + + } else { + + here = *ifst; +L20: + +/* Swap block with next one above */ + + if (nbf == 1 || nbf == 2) { + +/* Current block either 1 by 1 or 2 by 2 */ + + nbnext = 1; + if (here >= 3) { + if (t[here - 1 + (here - 2) * t_dim1] != 0.) { + nbnext = 2; + } + } + i__1 = here - nbnext; + _starpu_dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, &i__1, & + nbnext, &nbf, &work[1], info); + if (*info != 0) { + *ilst = here; + return 0; + } + here -= nbnext; + +/* Test if 2 by 2 block breaks into two 1 by 1 blocks */ + + if (nbf == 2) { + if (t[here + 1 + here * t_dim1] == 0.) { + nbf = 3; + } + } + + } else { + +/* Current block consists of two 1 by 1 blocks each of which */ +/* must be swapped individually */ + + nbnext = 1; + if (here >= 3) { + if (t[here - 1 + (here - 2) * t_dim1] != 0.) { + nbnext = 2; + } + } + i__1 = here - nbnext; + _starpu_dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, &i__1, & + nbnext, &c__1, &work[1], info); + if (*info != 0) { + *ilst = here; + return 0; + } + if (nbnext == 1) { + +/* Swap two 1 by 1 blocks, no problems possible */ + + _starpu_dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, & + here, &nbnext, &c__1, &work[1], info); + --here; + } else { + +/* Recompute NBNEXT in case 2 by 2 split */ + + if (t[here + (here - 1) * t_dim1] == 0.) { + nbnext = 1; + } + if (nbnext == 2) { + +/* 2 by 2 Block did not split */ + + i__1 = here - 1; + _starpu_dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, & + i__1, &c__2, &c__1, &work[1], info); + if (*info != 0) { + *ilst = here; + return 0; + } + here += -2; + } else { + +/* 2 by 2 Block did split */ + + _starpu_dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, & + here, &c__1, &c__1, &work[1], info); + i__1 = here - 1; + _starpu_dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, & + i__1, &c__1, &c__1, &work[1], info); + here += -2; + } + } + } + if (here > *ilst) { + goto L20; + } + } + *ilst = here; + + return 0; + +/* End of DTREXC */ + +} /* _starpu_dtrexc_ */ diff --git a/min-dgels/base/SRC/dtrrfs.c b/min-dgels/base/SRC/dtrrfs.c new file mode 100644 index 0000000..98202af --- /dev/null +++ b/min-dgels/base/SRC/dtrrfs.c @@ -0,0 +1,493 @@ +/* dtrrfs.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static doublereal c_b19 = -1.; + +/* Subroutine */ int _starpu_dtrrfs_(char *uplo, char *trans, char *diag, integer *n, + integer *nrhs, doublereal *a, integer *lda, doublereal *b, integer * + ldb, doublereal *x, integer *ldx, doublereal *ferr, doublereal *berr, + doublereal *work, integer *iwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, x_dim1, x_offset, i__1, i__2, + i__3; + doublereal d__1, d__2, d__3; + + /* Local variables */ + integer i__, j, k; + doublereal s, xk; + integer nz; + doublereal eps; + integer kase; + doublereal safe1, safe2; + extern logical _starpu_lsame_(char *, char *); + integer isave[3]; + extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *), _starpu_daxpy_(integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *); + logical upper; + extern /* Subroutine */ int _starpu_dtrmv_(char *, char *, char *, integer *, + doublereal *, integer *, doublereal *, integer *), _starpu_dtrsv_(char *, char *, char *, integer *, doublereal *, + integer *, doublereal *, integer *), + _starpu_dlacn2_(integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *, integer *); + extern doublereal _starpu_dlamch_(char *); + doublereal safmin; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + logical notran; + char transt[1]; + logical nounit; + doublereal lstres; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* Modified to call DLACN2 in place of DLACON, 5 Feb 03, SJH. */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DTRRFS provides error bounds and backward error estimates for the */ +/* solution to a system of linear equations with a triangular */ +/* coefficient matrix. */ + +/* The solution matrix X must be computed by DTRTRS or some other */ +/* means before entering this routine. DTRRFS does not do iterative */ +/* refinement because doing so cannot improve the backward error. */ + +/* Arguments */ +/* ========= */ + +/* UPLO (input) CHARACTER*1 */ +/* = 'U': A is upper triangular; */ +/* = 'L': A is lower triangular. */ + +/* TRANS (input) CHARACTER*1 */ +/* Specifies the form of the system of equations: */ +/* = 'N': A * X = B (No transpose) */ +/* = 'T': A**T * X = B (Transpose) */ +/* = 'C': A**H * X = B (Conjugate transpose = Transpose) */ + +/* DIAG (input) CHARACTER*1 */ +/* = 'N': A is non-unit triangular; */ +/* = 'U': A is unit triangular. */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* NRHS (input) INTEGER */ +/* The number of right hand sides, i.e., the number of columns */ +/* of the matrices B and X. NRHS >= 0. */ + +/* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ +/* The triangular matrix A. If UPLO = 'U', the leading N-by-N */ +/* upper triangular part of the array A contains the upper */ +/* triangular matrix, and the strictly lower triangular part of */ +/* A is not referenced. If UPLO = 'L', the leading N-by-N lower */ +/* triangular part of the array A contains the lower triangular */ +/* matrix, and the strictly upper triangular part of A is not */ +/* referenced. If DIAG = 'U', the diagonal elements of A are */ +/* also not referenced and are assumed to be 1. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,N). */ + +/* B (input) DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* The right hand side matrix B. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the array B. LDB >= max(1,N). */ + +/* X (input) DOUBLE PRECISION array, dimension (LDX,NRHS) */ +/* The solution matrix X. */ + +/* LDX (input) INTEGER */ +/* The leading dimension of the array X. LDX >= max(1,N). */ + +/* FERR (output) DOUBLE PRECISION array, dimension (NRHS) */ +/* The estimated forward error bound for each solution vector */ +/* X(j) (the j-th column of the solution matrix X). */ +/* If XTRUE is the true solution corresponding to X(j), FERR(j) */ +/* is an estimated upper bound for the magnitude of the largest */ +/* element in (X(j) - XTRUE) divided by the magnitude of the */ +/* largest element in X(j). The estimate is as reliable as */ +/* the estimate for RCOND, and is almost always a slight */ +/* overestimate of the true error. */ + +/* BERR (output) DOUBLE PRECISION array, dimension (NRHS) */ +/* The componentwise relative backward error of each solution */ +/* vector X(j) (i.e., the smallest relative change in */ +/* any element of A or B that makes X(j) an exact solution). */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (3*N) */ + +/* IWORK (workspace) INTEGER array, dimension (N) */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + x_dim1 = *ldx; + x_offset = 1 + x_dim1; + x -= x_offset; + --ferr; + --berr; + --work; + --iwork; + + /* Function Body */ + *info = 0; + upper = _starpu_lsame_(uplo, "U"); + notran = _starpu_lsame_(trans, "N"); + nounit = _starpu_lsame_(diag, "N"); + + if (! upper && ! _starpu_lsame_(uplo, "L")) { + *info = -1; + } else if (! notran && ! _starpu_lsame_(trans, "T") && ! + _starpu_lsame_(trans, "C")) { + *info = -2; + } else if (! nounit && ! _starpu_lsame_(diag, "U")) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*nrhs < 0) { + *info = -5; + } else if (*lda < max(1,*n)) { + *info = -7; + } else if (*ldb < max(1,*n)) { + *info = -9; + } else if (*ldx < max(1,*n)) { + *info = -11; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DTRRFS", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0 || *nrhs == 0) { + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + ferr[j] = 0.; + berr[j] = 0.; +/* L10: */ + } + return 0; + } + + if (notran) { + *(unsigned char *)transt = 'T'; + } else { + *(unsigned char *)transt = 'N'; + } + +/* NZ = maximum number of nonzero elements in each row of A, plus 1 */ + + nz = *n + 1; + eps = _starpu_dlamch_("Epsilon"); + safmin = _starpu_dlamch_("Safe minimum"); + safe1 = nz * safmin; + safe2 = safe1 / eps; + +/* Do for each right hand side */ + + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + +/* Compute residual R = B - op(A) * X, */ +/* where op(A) = A or A', depending on TRANS. */ + + _starpu_dcopy_(n, &x[j * x_dim1 + 1], &c__1, &work[*n + 1], &c__1); + _starpu_dtrmv_(uplo, trans, diag, n, &a[a_offset], lda, &work[*n + 1], &c__1); + _starpu_daxpy_(n, &c_b19, &b[j * b_dim1 + 1], &c__1, &work[*n + 1], &c__1); + +/* Compute componentwise relative backward error from formula */ + +/* max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) */ + +/* where abs(Z) is the componentwise absolute value of the matrix */ +/* or vector Z. If the i-th component of the denominator is less */ +/* than SAFE2, then SAFE1 is added to the i-th components of the */ +/* numerator and denominator before dividing. */ + + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + work[i__] = (d__1 = b[i__ + j * b_dim1], abs(d__1)); +/* L20: */ + } + + if (notran) { + +/* Compute abs(A)*abs(X) + abs(B). */ + + if (upper) { + if (nounit) { + i__2 = *n; + for (k = 1; k <= i__2; ++k) { + xk = (d__1 = x[k + j * x_dim1], abs(d__1)); + i__3 = k; + for (i__ = 1; i__ <= i__3; ++i__) { + work[i__] += (d__1 = a[i__ + k * a_dim1], abs( + d__1)) * xk; +/* L30: */ + } +/* L40: */ + } + } else { + i__2 = *n; + for (k = 1; k <= i__2; ++k) { + xk = (d__1 = x[k + j * x_dim1], abs(d__1)); + i__3 = k - 1; + for (i__ = 1; i__ <= i__3; ++i__) { + work[i__] += (d__1 = a[i__ + k * a_dim1], abs( + d__1)) * xk; +/* L50: */ + } + work[k] += xk; +/* L60: */ + } + } + } else { + if (nounit) { + i__2 = *n; + for (k = 1; k <= i__2; ++k) { + xk = (d__1 = x[k + j * x_dim1], abs(d__1)); + i__3 = *n; + for (i__ = k; i__ <= i__3; ++i__) { + work[i__] += (d__1 = a[i__ + k * a_dim1], abs( + d__1)) * xk; +/* L70: */ + } +/* L80: */ + } + } else { + i__2 = *n; + for (k = 1; k <= i__2; ++k) { + xk = (d__1 = x[k + j * x_dim1], abs(d__1)); + i__3 = *n; + for (i__ = k + 1; i__ <= i__3; ++i__) { + work[i__] += (d__1 = a[i__ + k * a_dim1], abs( + d__1)) * xk; +/* L90: */ + } + work[k] += xk; +/* L100: */ + } + } + } + } else { + +/* Compute abs(A')*abs(X) + abs(B). */ + + if (upper) { + if (nounit) { + i__2 = *n; + for (k = 1; k <= i__2; ++k) { + s = 0.; + i__3 = k; + for (i__ = 1; i__ <= i__3; ++i__) { + s += (d__1 = a[i__ + k * a_dim1], abs(d__1)) * ( + d__2 = x[i__ + j * x_dim1], abs(d__2)); +/* L110: */ + } + work[k] += s; +/* L120: */ + } + } else { + i__2 = *n; + for (k = 1; k <= i__2; ++k) { + s = (d__1 = x[k + j * x_dim1], abs(d__1)); + i__3 = k - 1; + for (i__ = 1; i__ <= i__3; ++i__) { + s += (d__1 = a[i__ + k * a_dim1], abs(d__1)) * ( + d__2 = x[i__ + j * x_dim1], abs(d__2)); +/* L130: */ + } + work[k] += s; +/* L140: */ + } + } + } else { + if (nounit) { + i__2 = *n; + for (k = 1; k <= i__2; ++k) { + s = 0.; + i__3 = *n; + for (i__ = k; i__ <= i__3; ++i__) { + s += (d__1 = a[i__ + k * a_dim1], abs(d__1)) * ( + d__2 = x[i__ + j * x_dim1], abs(d__2)); +/* L150: */ + } + work[k] += s; +/* L160: */ + } + } else { + i__2 = *n; + for (k = 1; k <= i__2; ++k) { + s = (d__1 = x[k + j * x_dim1], abs(d__1)); + i__3 = *n; + for (i__ = k + 1; i__ <= i__3; ++i__) { + s += (d__1 = a[i__ + k * a_dim1], abs(d__1)) * ( + d__2 = x[i__ + j * x_dim1], abs(d__2)); +/* L170: */ + } + work[k] += s; +/* L180: */ + } + } + } + } + s = 0.; + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + if (work[i__] > safe2) { +/* Computing MAX */ + d__2 = s, d__3 = (d__1 = work[*n + i__], abs(d__1)) / work[ + i__]; + s = max(d__2,d__3); + } else { +/* Computing MAX */ + d__2 = s, d__3 = ((d__1 = work[*n + i__], abs(d__1)) + safe1) + / (work[i__] + safe1); + s = max(d__2,d__3); + } +/* L190: */ + } + berr[j] = s; + +/* Bound error from formula */ + +/* norm(X - XTRUE) / norm(X) .le. FERR = */ +/* norm( abs(inv(op(A)))* */ +/* ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) */ + +/* where */ +/* norm(Z) is the magnitude of the largest component of Z */ +/* inv(op(A)) is the inverse of op(A) */ +/* abs(Z) is the componentwise absolute value of the matrix or */ +/* vector Z */ +/* NZ is the maximum number of nonzeros in any row of A, plus 1 */ +/* EPS is machine epsilon */ + +/* The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) */ +/* is incremented by SAFE1 if the i-th component of */ +/* abs(op(A))*abs(X) + abs(B) is less than SAFE2. */ + +/* Use DLACN2 to estimate the infinity-norm of the matrix */ +/* inv(op(A)) * diag(W), */ +/* where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) */ + + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + if (work[i__] > safe2) { + work[i__] = (d__1 = work[*n + i__], abs(d__1)) + nz * eps * + work[i__]; + } else { + work[i__] = (d__1 = work[*n + i__], abs(d__1)) + nz * eps * + work[i__] + safe1; + } +/* L200: */ + } + + kase = 0; +L210: + _starpu_dlacn2_(n, &work[(*n << 1) + 1], &work[*n + 1], &iwork[1], &ferr[j], & + kase, isave); + if (kase != 0) { + if (kase == 1) { + +/* Multiply by diag(W)*inv(op(A)'). */ + + _starpu_dtrsv_(uplo, transt, diag, n, &a[a_offset], lda, &work[*n + 1] +, &c__1); + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + work[*n + i__] = work[i__] * work[*n + i__]; +/* L220: */ + } + } else { + +/* Multiply by inv(op(A))*diag(W). */ + + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + work[*n + i__] = work[i__] * work[*n + i__]; +/* L230: */ + } + _starpu_dtrsv_(uplo, trans, diag, n, &a[a_offset], lda, &work[*n + 1], + &c__1); + } + goto L210; + } + +/* Normalize error. */ + + lstres = 0.; + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { +/* Computing MAX */ + d__2 = lstres, d__3 = (d__1 = x[i__ + j * x_dim1], abs(d__1)); + lstres = max(d__2,d__3); +/* L240: */ + } + if (lstres != 0.) { + ferr[j] /= lstres; + } + +/* L250: */ + } + + return 0; + +/* End of DTRRFS */ + +} /* _starpu_dtrrfs_ */ diff --git a/min-dgels/base/SRC/dtrsen.c b/min-dgels/base/SRC/dtrsen.c new file mode 100644 index 0000000..d487d14 --- /dev/null +++ b/min-dgels/base/SRC/dtrsen.c @@ -0,0 +1,530 @@ +/* dtrsen.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c_n1 = -1; + +/* Subroutine */ int _starpu_dtrsen_(char *job, char *compq, logical *select, integer + *n, doublereal *t, integer *ldt, doublereal *q, integer *ldq, + doublereal *wr, doublereal *wi, integer *m, doublereal *s, doublereal + *sep, doublereal *work, integer *lwork, integer *iwork, integer * + liwork, integer *info) +{ + /* System generated locals */ + integer q_dim1, q_offset, t_dim1, t_offset, i__1, i__2; + doublereal d__1, d__2; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + integer k, n1, n2, kk, nn, ks; + doublereal est; + integer kase; + logical pair; + integer ierr; + logical swap; + doublereal scale; + extern logical _starpu_lsame_(char *, char *); + integer isave[3], lwmin; + logical wantq, wants; + doublereal rnorm; + extern /* Subroutine */ int _starpu_dlacn2_(integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *); + extern doublereal _starpu_dlange_(char *, integer *, integer *, doublereal *, + integer *, doublereal *); + extern /* Subroutine */ int _starpu_dlacpy_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *), + _starpu_xerbla_(char *, integer *); + logical wantbh; + extern /* Subroutine */ int _starpu_dtrexc_(char *, integer *, doublereal *, + integer *, doublereal *, integer *, integer *, integer *, + doublereal *, integer *); + integer liwmin; + logical wantsp, lquery; + extern /* Subroutine */ int _starpu_dtrsyl_(char *, char *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, integer *); + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DTRSEN reorders the real Schur factorization of a real matrix */ +/* A = Q*T*Q**T, so that a selected cluster of eigenvalues appears in */ +/* the leading diagonal blocks of the upper quasi-triangular matrix T, */ +/* and the leading columns of Q form an orthonormal basis of the */ +/* corresponding right invariant subspace. */ + +/* Optionally the routine computes the reciprocal condition numbers of */ +/* the cluster of eigenvalues and/or the invariant subspace. */ + +/* T must be in Schur canonical form (as returned by DHSEQR), that is, */ +/* block upper triangular with 1-by-1 and 2-by-2 diagonal blocks; each */ +/* 2-by-2 diagonal block has its diagonal elemnts equal and its */ +/* off-diagonal elements of opposite sign. */ + +/* Arguments */ +/* ========= */ + +/* JOB (input) CHARACTER*1 */ +/* Specifies whether condition numbers are required for the */ +/* cluster of eigenvalues (S) or the invariant subspace (SEP): */ +/* = 'N': none; */ +/* = 'E': for eigenvalues only (S); */ +/* = 'V': for invariant subspace only (SEP); */ +/* = 'B': for both eigenvalues and invariant subspace (S and */ +/* SEP). */ + +/* COMPQ (input) CHARACTER*1 */ +/* = 'V': update the matrix Q of Schur vectors; */ +/* = 'N': do not update Q. */ + +/* SELECT (input) LOGICAL array, dimension (N) */ +/* SELECT specifies the eigenvalues in the selected cluster. To */ +/* select a real eigenvalue w(j), SELECT(j) must be set to */ +/* .TRUE.. To select a complex conjugate pair of eigenvalues */ +/* w(j) and w(j+1), corresponding to a 2-by-2 diagonal block, */ +/* either SELECT(j) or SELECT(j+1) or both must be set to */ +/* .TRUE.; a complex conjugate pair of eigenvalues must be */ +/* either both included in the cluster or both excluded. */ + +/* N (input) INTEGER */ +/* The order of the matrix T. N >= 0. */ + +/* T (input/output) DOUBLE PRECISION array, dimension (LDT,N) */ +/* On entry, the upper quasi-triangular matrix T, in Schur */ +/* canonical form. */ +/* On exit, T is overwritten by the reordered matrix T, again in */ +/* Schur canonical form, with the selected eigenvalues in the */ +/* leading diagonal blocks. */ + +/* LDT (input) INTEGER */ +/* The leading dimension of the array T. LDT >= max(1,N). */ + +/* Q (input/output) DOUBLE PRECISION array, dimension (LDQ,N) */ +/* On entry, if COMPQ = 'V', the matrix Q of Schur vectors. */ +/* On exit, if COMPQ = 'V', Q has been postmultiplied by the */ +/* orthogonal transformation matrix which reorders T; the */ +/* leading M columns of Q form an orthonormal basis for the */ +/* specified invariant subspace. */ +/* If COMPQ = 'N', Q is not referenced. */ + +/* LDQ (input) INTEGER */ +/* The leading dimension of the array Q. */ +/* LDQ >= 1; and if COMPQ = 'V', LDQ >= N. */ + +/* WR (output) DOUBLE PRECISION array, dimension (N) */ +/* WI (output) DOUBLE PRECISION array, dimension (N) */ +/* The real and imaginary parts, respectively, of the reordered */ +/* eigenvalues of T. The eigenvalues are stored in the same */ +/* order as on the diagonal of T, with WR(i) = T(i,i) and, if */ +/* T(i:i+1,i:i+1) is a 2-by-2 diagonal block, WI(i) > 0 and */ +/* WI(i+1) = -WI(i). Note that if a complex eigenvalue is */ +/* sufficiently ill-conditioned, then its value may differ */ +/* significantly from its value before reordering. */ + +/* M (output) INTEGER */ +/* The dimension of the specified invariant subspace. */ +/* 0 < = M <= N. */ + +/* S (output) DOUBLE PRECISION */ +/* If JOB = 'E' or 'B', S is a lower bound on the reciprocal */ +/* condition number for the selected cluster of eigenvalues. */ +/* S cannot underestimate the true reciprocal condition number */ +/* by more than a factor of sqrt(N). If M = 0 or N, S = 1. */ +/* If JOB = 'N' or 'V', S is not referenced. */ + +/* SEP (output) DOUBLE PRECISION */ +/* If JOB = 'V' or 'B', SEP is the estimated reciprocal */ +/* condition number of the specified invariant subspace. If */ +/* M = 0 or N, SEP = norm(T). */ +/* If JOB = 'N' or 'E', SEP is not referenced. */ + +/* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ + +/* LWORK (input) INTEGER */ +/* The dimension of the array WORK. */ +/* If JOB = 'N', LWORK >= max(1,N); */ +/* if JOB = 'E', LWORK >= max(1,M*(N-M)); */ +/* if JOB = 'V' or 'B', LWORK >= max(1,2*M*(N-M)). */ + +/* If LWORK = -1, then a workspace query is assumed; the routine */ +/* only calculates the optimal size of the WORK array, returns */ +/* this value as the first entry of the WORK array, and no error */ +/* message related to LWORK is issued by XERBLA. */ + +/* IWORK (workspace) INTEGER array, dimension (MAX(1,LIWORK)) */ +/* On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. */ + +/* LIWORK (input) INTEGER */ +/* The dimension of the array IWORK. */ +/* If JOB = 'N' or 'E', LIWORK >= 1; */ +/* if JOB = 'V' or 'B', LIWORK >= max(1,M*(N-M)). */ + +/* If LIWORK = -1, then a workspace query is assumed; the */ +/* routine only calculates the optimal size of the IWORK array, */ +/* returns this value as the first entry of the IWORK array, and */ +/* no error message related to LIWORK is issued by XERBLA. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ +/* = 1: reordering of T failed because some eigenvalues are too */ +/* close to separate (the problem is very ill-conditioned); */ +/* T may have been partially reordered, and WR and WI */ +/* contain the eigenvalues in the same order as in T; S and */ +/* SEP (if requested) are set to zero. */ + +/* Further Details */ +/* =============== */ + +/* DTRSEN first collects the selected eigenvalues by computing an */ +/* orthogonal transformation Z to move them to the top left corner of T. */ +/* In other words, the selected eigenvalues are the eigenvalues of T11 */ +/* in: */ + +/* Z'*T*Z = ( T11 T12 ) n1 */ +/* ( 0 T22 ) n2 */ +/* n1 n2 */ + +/* where N = n1+n2 and Z' means the transpose of Z. The first n1 columns */ +/* of Z span the specified invariant subspace of T. */ + +/* If T has been obtained from the real Schur factorization of a matrix */ +/* A = Q*T*Q', then the reordered real Schur factorization of A is given */ +/* by A = (Q*Z)*(Z'*T*Z)*(Q*Z)', and the first n1 columns of Q*Z span */ +/* the corresponding invariant subspace of A. */ + +/* The reciprocal condition number of the average of the eigenvalues of */ +/* T11 may be returned in S. S lies between 0 (very badly conditioned) */ +/* and 1 (very well conditioned). It is computed as follows. First we */ +/* compute R so that */ + +/* P = ( I R ) n1 */ +/* ( 0 0 ) n2 */ +/* n1 n2 */ + +/* is the projector on the invariant subspace associated with T11. */ +/* R is the solution of the Sylvester equation: */ + +/* T11*R - R*T22 = T12. */ + +/* Let F-norm(M) denote the Frobenius-norm of M and 2-norm(M) denote */ +/* the two-norm of M. Then S is computed as the lower bound */ + +/* (1 + F-norm(R)**2)**(-1/2) */ + +/* on the reciprocal of 2-norm(P), the true reciprocal condition number. */ +/* S cannot underestimate 1 / 2-norm(P) by more than a factor of */ +/* sqrt(N). */ + +/* An approximate error bound for the computed average of the */ +/* eigenvalues of T11 is */ + +/* EPS * norm(T) / S */ + +/* where EPS is the machine precision. */ + +/* The reciprocal condition number of the right invariant subspace */ +/* spanned by the first n1 columns of Z (or of Q*Z) is returned in SEP. */ +/* SEP is defined as the separation of T11 and T22: */ + +/* sep( T11, T22 ) = sigma-min( C ) */ + +/* where sigma-min(C) is the smallest singular value of the */ +/* n1*n2-by-n1*n2 matrix */ + +/* C = kprod( I(n2), T11 ) - kprod( transpose(T22), I(n1) ) */ + +/* I(m) is an m by m identity matrix, and kprod denotes the Kronecker */ +/* product. We estimate sigma-min(C) by the reciprocal of an estimate of */ +/* the 1-norm of inverse(C). The true reciprocal 1-norm of inverse(C) */ +/* cannot differ from sigma-min(C) by more than a factor of sqrt(n1*n2). */ + +/* When SEP is small, small changes in T can cause large changes in */ +/* the invariant subspace. An approximate bound on the maximum angular */ +/* error in the computed right invariant subspace is */ + +/* EPS * norm(T) / SEP */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Decode and test the input parameters */ + + /* Parameter adjustments */ + --select; + t_dim1 = *ldt; + t_offset = 1 + t_dim1; + t -= t_offset; + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + --wr; + --wi; + --work; + --iwork; + + /* Function Body */ + wantbh = _starpu_lsame_(job, "B"); + wants = _starpu_lsame_(job, "E") || wantbh; + wantsp = _starpu_lsame_(job, "V") || wantbh; + wantq = _starpu_lsame_(compq, "V"); + + *info = 0; + lquery = *lwork == -1; + if (! _starpu_lsame_(job, "N") && ! wants && ! wantsp) { + *info = -1; + } else if (! _starpu_lsame_(compq, "N") && ! wantq) { + *info = -2; + } else if (*n < 0) { + *info = -4; + } else if (*ldt < max(1,*n)) { + *info = -6; + } else if (*ldq < 1 || wantq && *ldq < *n) { + *info = -8; + } else { + +/* Set M to the dimension of the specified invariant subspace, */ +/* and test LWORK and LIWORK. */ + + *m = 0; + pair = FALSE_; + i__1 = *n; + for (k = 1; k <= i__1; ++k) { + if (pair) { + pair = FALSE_; + } else { + if (k < *n) { + if (t[k + 1 + k * t_dim1] == 0.) { + if (select[k]) { + ++(*m); + } + } else { + pair = TRUE_; + if (select[k] || select[k + 1]) { + *m += 2; + } + } + } else { + if (select[*n]) { + ++(*m); + } + } + } +/* L10: */ + } + + n1 = *m; + n2 = *n - *m; + nn = n1 * n2; + + if (wantsp) { +/* Computing MAX */ + i__1 = 1, i__2 = nn << 1; + lwmin = max(i__1,i__2); + liwmin = max(1,nn); + } else if (_starpu_lsame_(job, "N")) { + lwmin = max(1,*n); + liwmin = 1; + } else if (_starpu_lsame_(job, "E")) { + lwmin = max(1,nn); + liwmin = 1; + } + + if (*lwork < lwmin && ! lquery) { + *info = -15; + } else if (*liwork < liwmin && ! lquery) { + *info = -17; + } + } + + if (*info == 0) { + work[1] = (doublereal) lwmin; + iwork[1] = liwmin; + } + + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DTRSEN", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible. */ + + if (*m == *n || *m == 0) { + if (wants) { + *s = 1.; + } + if (wantsp) { + *sep = _starpu_dlange_("1", n, n, &t[t_offset], ldt, &work[1]); + } + goto L40; + } + +/* Collect the selected blocks at the top-left corner of T. */ + + ks = 0; + pair = FALSE_; + i__1 = *n; + for (k = 1; k <= i__1; ++k) { + if (pair) { + pair = FALSE_; + } else { + swap = select[k]; + if (k < *n) { + if (t[k + 1 + k * t_dim1] != 0.) { + pair = TRUE_; + swap = swap || select[k + 1]; + } + } + if (swap) { + ++ks; + +/* Swap the K-th block to position KS. */ + + ierr = 0; + kk = k; + if (k != ks) { + _starpu_dtrexc_(compq, n, &t[t_offset], ldt, &q[q_offset], ldq, & + kk, &ks, &work[1], &ierr); + } + if (ierr == 1 || ierr == 2) { + +/* Blocks too close to swap: exit. */ + + *info = 1; + if (wants) { + *s = 0.; + } + if (wantsp) { + *sep = 0.; + } + goto L40; + } + if (pair) { + ++ks; + } + } + } +/* L20: */ + } + + if (wants) { + +/* Solve Sylvester equation for R: */ + +/* T11*R - R*T22 = scale*T12 */ + + _starpu_dlacpy_("F", &n1, &n2, &t[(n1 + 1) * t_dim1 + 1], ldt, &work[1], &n1); + _starpu_dtrsyl_("N", "N", &c_n1, &n1, &n2, &t[t_offset], ldt, &t[n1 + 1 + (n1 + + 1) * t_dim1], ldt, &work[1], &n1, &scale, &ierr); + +/* Estimate the reciprocal of the condition number of the cluster */ +/* of eigenvalues. */ + + rnorm = _starpu_dlange_("F", &n1, &n2, &work[1], &n1, &work[1]); + if (rnorm == 0.) { + *s = 1.; + } else { + *s = scale / (sqrt(scale * scale / rnorm + rnorm) * sqrt(rnorm)); + } + } + + if (wantsp) { + +/* Estimate sep(T11,T22). */ + + est = 0.; + kase = 0; +L30: + _starpu_dlacn2_(&nn, &work[nn + 1], &work[1], &iwork[1], &est, &kase, isave); + if (kase != 0) { + if (kase == 1) { + +/* Solve T11*R - R*T22 = scale*X. */ + + _starpu_dtrsyl_("N", "N", &c_n1, &n1, &n2, &t[t_offset], ldt, &t[n1 + + 1 + (n1 + 1) * t_dim1], ldt, &work[1], &n1, &scale, & + ierr); + } else { + +/* Solve T11'*R - R*T22' = scale*X. */ + + _starpu_dtrsyl_("T", "T", &c_n1, &n1, &n2, &t[t_offset], ldt, &t[n1 + + 1 + (n1 + 1) * t_dim1], ldt, &work[1], &n1, &scale, & + ierr); + } + goto L30; + } + + *sep = scale / est; + } + +L40: + +/* Store the output eigenvalues in WR and WI. */ + + i__1 = *n; + for (k = 1; k <= i__1; ++k) { + wr[k] = t[k + k * t_dim1]; + wi[k] = 0.; +/* L50: */ + } + i__1 = *n - 1; + for (k = 1; k <= i__1; ++k) { + if (t[k + 1 + k * t_dim1] != 0.) { + wi[k] = sqrt((d__1 = t[k + (k + 1) * t_dim1], abs(d__1))) * sqrt(( + d__2 = t[k + 1 + k * t_dim1], abs(d__2))); + wi[k + 1] = -wi[k]; + } +/* L60: */ + } + + work[1] = (doublereal) lwmin; + iwork[1] = liwmin; + + return 0; + +/* End of DTRSEN */ + +} /* _starpu_dtrsen_ */ diff --git a/min-dgels/base/SRC/dtrsna.c b/min-dgels/base/SRC/dtrsna.c new file mode 100644 index 0000000..695b8c6 --- /dev/null +++ b/min-dgels/base/SRC/dtrsna.c @@ -0,0 +1,606 @@ +/* dtrsna.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static logical c_true = TRUE_; +static logical c_false = FALSE_; + +/* Subroutine */ int _starpu_dtrsna_(char *job, char *howmny, logical *select, + integer *n, doublereal *t, integer *ldt, doublereal *vl, integer * + ldvl, doublereal *vr, integer *ldvr, doublereal *s, doublereal *sep, + integer *mm, integer *m, doublereal *work, integer *ldwork, integer * + iwork, integer *info) +{ + /* System generated locals */ + integer t_dim1, t_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, + work_dim1, work_offset, i__1, i__2; + doublereal d__1, d__2; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + integer i__, j, k, n2; + doublereal cs; + integer nn, ks; + doublereal sn, mu, eps, est; + integer kase; + doublereal cond; + extern doublereal _starpu_ddot_(integer *, doublereal *, integer *, doublereal *, + integer *); + logical pair; + integer ierr; + doublereal dumm, prod; + integer ifst; + doublereal lnrm; + integer ilst; + doublereal rnrm; + extern doublereal _starpu_dnrm2_(integer *, doublereal *, integer *); + doublereal prod1, prod2, scale, delta; + extern logical _starpu_lsame_(char *, char *); + integer isave[3]; + logical wants; + doublereal dummy[1]; + extern /* Subroutine */ int _starpu_dlacn2_(integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *); + extern doublereal _starpu_dlapy2_(doublereal *, doublereal *); + extern /* Subroutine */ int _starpu_dlabad_(doublereal *, doublereal *); + extern doublereal _starpu_dlamch_(char *); + extern /* Subroutine */ int _starpu_dlacpy_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *), + _starpu_xerbla_(char *, integer *); + doublereal bignum; + logical wantbh; + extern /* Subroutine */ int _starpu_dlaqtr_(logical *, logical *, integer *, + doublereal *, integer *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, integer *), _starpu_dtrexc_(char *, integer * +, doublereal *, integer *, doublereal *, integer *, integer *, + integer *, doublereal *, integer *); + logical somcon; + doublereal smlnum; + logical wantsp; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* Modified to call DLACN2 in place of DLACON, 5 Feb 03, SJH. */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DTRSNA estimates reciprocal condition numbers for specified */ +/* eigenvalues and/or right eigenvectors of a real upper */ +/* quasi-triangular matrix T (or of any matrix Q*T*Q**T with Q */ +/* orthogonal). */ + +/* T must be in Schur canonical form (as returned by DHSEQR), that is, */ +/* block upper triangular with 1-by-1 and 2-by-2 diagonal blocks; each */ +/* 2-by-2 diagonal block has its diagonal elements equal and its */ +/* off-diagonal elements of opposite sign. */ + +/* Arguments */ +/* ========= */ + +/* JOB (input) CHARACTER*1 */ +/* Specifies whether condition numbers are required for */ +/* eigenvalues (S) or eigenvectors (SEP): */ +/* = 'E': for eigenvalues only (S); */ +/* = 'V': for eigenvectors only (SEP); */ +/* = 'B': for both eigenvalues and eigenvectors (S and SEP). */ + +/* HOWMNY (input) CHARACTER*1 */ +/* = 'A': compute condition numbers for all eigenpairs; */ +/* = 'S': compute condition numbers for selected eigenpairs */ +/* specified by the array SELECT. */ + +/* SELECT (input) LOGICAL array, dimension (N) */ +/* If HOWMNY = 'S', SELECT specifies the eigenpairs for which */ +/* condition numbers are required. To select condition numbers */ +/* for the eigenpair corresponding to a real eigenvalue w(j), */ +/* SELECT(j) must be set to .TRUE.. To select condition numbers */ +/* corresponding to a complex conjugate pair of eigenvalues w(j) */ +/* and w(j+1), either SELECT(j) or SELECT(j+1) or both, must be */ +/* set to .TRUE.. */ +/* If HOWMNY = 'A', SELECT is not referenced. */ + +/* N (input) INTEGER */ +/* The order of the matrix T. N >= 0. */ + +/* T (input) DOUBLE PRECISION array, dimension (LDT,N) */ +/* The upper quasi-triangular matrix T, in Schur canonical form. */ + +/* LDT (input) INTEGER */ +/* The leading dimension of the array T. LDT >= max(1,N). */ + +/* VL (input) DOUBLE PRECISION array, dimension (LDVL,M) */ +/* If JOB = 'E' or 'B', VL must contain left eigenvectors of T */ +/* (or of any Q*T*Q**T with Q orthogonal), corresponding to the */ +/* eigenpairs specified by HOWMNY and SELECT. The eigenvectors */ +/* must be stored in consecutive columns of VL, as returned by */ +/* DHSEIN or DTREVC. */ +/* If JOB = 'V', VL is not referenced. */ + +/* LDVL (input) INTEGER */ +/* The leading dimension of the array VL. */ +/* LDVL >= 1; and if JOB = 'E' or 'B', LDVL >= N. */ + +/* VR (input) DOUBLE PRECISION array, dimension (LDVR,M) */ +/* If JOB = 'E' or 'B', VR must contain right eigenvectors of T */ +/* (or of any Q*T*Q**T with Q orthogonal), corresponding to the */ +/* eigenpairs specified by HOWMNY and SELECT. The eigenvectors */ +/* must be stored in consecutive columns of VR, as returned by */ +/* DHSEIN or DTREVC. */ +/* If JOB = 'V', VR is not referenced. */ + +/* LDVR (input) INTEGER */ +/* The leading dimension of the array VR. */ +/* LDVR >= 1; and if JOB = 'E' or 'B', LDVR >= N. */ + +/* S (output) DOUBLE PRECISION array, dimension (MM) */ +/* If JOB = 'E' or 'B', the reciprocal condition numbers of the */ +/* selected eigenvalues, stored in consecutive elements of the */ +/* array. For a complex conjugate pair of eigenvalues two */ +/* consecutive elements of S are set to the same value. Thus */ +/* S(j), SEP(j), and the j-th columns of VL and VR all */ +/* correspond to the same eigenpair (but not in general the */ +/* j-th eigenpair, unless all eigenpairs are selected). */ +/* If JOB = 'V', S is not referenced. */ + +/* SEP (output) DOUBLE PRECISION array, dimension (MM) */ +/* If JOB = 'V' or 'B', the estimated reciprocal condition */ +/* numbers of the selected eigenvectors, stored in consecutive */ +/* elements of the array. For a complex eigenvector two */ +/* consecutive elements of SEP are set to the same value. If */ +/* the eigenvalues cannot be reordered to compute SEP(j), SEP(j) */ +/* is set to 0; this can only occur when the true value would be */ +/* very small anyway. */ +/* If JOB = 'E', SEP is not referenced. */ + +/* MM (input) INTEGER */ +/* The number of elements in the arrays S (if JOB = 'E' or 'B') */ +/* and/or SEP (if JOB = 'V' or 'B'). MM >= M. */ + +/* M (output) INTEGER */ +/* The number of elements of the arrays S and/or SEP actually */ +/* used to store the estimated condition numbers. */ +/* If HOWMNY = 'A', M is set to N. */ + +/* WORK (workspace) DOUBLE PRECISION array, dimension (LDWORK,N+6) */ +/* If JOB = 'E', WORK is not referenced. */ + +/* LDWORK (input) INTEGER */ +/* The leading dimension of the array WORK. */ +/* LDWORK >= 1; and if JOB = 'V' or 'B', LDWORK >= N. */ + +/* IWORK (workspace) INTEGER array, dimension (2*(N-1)) */ +/* If JOB = 'E', IWORK is not referenced. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ + +/* Further Details */ +/* =============== */ + +/* The reciprocal of the condition number of an eigenvalue lambda is */ +/* defined as */ + +/* S(lambda) = |v'*u| / (norm(u)*norm(v)) */ + +/* where u and v are the right and left eigenvectors of T corresponding */ +/* to lambda; v' denotes the conjugate-transpose of v, and norm(u) */ +/* denotes the Euclidean norm. These reciprocal condition numbers always */ +/* lie between zero (very badly conditioned) and one (very well */ +/* conditioned). If n = 1, S(lambda) is defined to be 1. */ + +/* An approximate error bound for a computed eigenvalue W(i) is given by */ + +/* EPS * norm(T) / S(i) */ + +/* where EPS is the machine precision. */ + +/* The reciprocal of the condition number of the right eigenvector u */ +/* corresponding to lambda is defined as follows. Suppose */ + +/* T = ( lambda c ) */ +/* ( 0 T22 ) */ + +/* Then the reciprocal condition number is */ + +/* SEP( lambda, T22 ) = sigma-min( T22 - lambda*I ) */ + +/* where sigma-min denotes the smallest singular value. We approximate */ +/* the smallest singular value by the reciprocal of an estimate of the */ +/* one-norm of the inverse of T22 - lambda*I. If n = 1, SEP(1) is */ +/* defined to be abs(T(1,1)). */ + +/* An approximate error bound for a computed right eigenvector VR(i) */ +/* is given by */ + +/* EPS * norm(T) / SEP(i) */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Decode and test the input parameters */ + + /* Parameter adjustments */ + --select; + t_dim1 = *ldt; + t_offset = 1 + t_dim1; + t -= t_offset; + vl_dim1 = *ldvl; + vl_offset = 1 + vl_dim1; + vl -= vl_offset; + vr_dim1 = *ldvr; + vr_offset = 1 + vr_dim1; + vr -= vr_offset; + --s; + --sep; + work_dim1 = *ldwork; + work_offset = 1 + work_dim1; + work -= work_offset; + --iwork; + + /* Function Body */ + wantbh = _starpu_lsame_(job, "B"); + wants = _starpu_lsame_(job, "E") || wantbh; + wantsp = _starpu_lsame_(job, "V") || wantbh; + + somcon = _starpu_lsame_(howmny, "S"); + + *info = 0; + if (! wants && ! wantsp) { + *info = -1; + } else if (! _starpu_lsame_(howmny, "A") && ! somcon) { + *info = -2; + } else if (*n < 0) { + *info = -4; + } else if (*ldt < max(1,*n)) { + *info = -6; + } else if (*ldvl < 1 || wants && *ldvl < *n) { + *info = -8; + } else if (*ldvr < 1 || wants && *ldvr < *n) { + *info = -10; + } else { + +/* Set M to the number of eigenpairs for which condition numbers */ +/* are required, and test MM. */ + + if (somcon) { + *m = 0; + pair = FALSE_; + i__1 = *n; + for (k = 1; k <= i__1; ++k) { + if (pair) { + pair = FALSE_; + } else { + if (k < *n) { + if (t[k + 1 + k * t_dim1] == 0.) { + if (select[k]) { + ++(*m); + } + } else { + pair = TRUE_; + if (select[k] || select[k + 1]) { + *m += 2; + } + } + } else { + if (select[*n]) { + ++(*m); + } + } + } +/* L10: */ + } + } else { + *m = *n; + } + + if (*mm < *m) { + *info = -13; + } else if (*ldwork < 1 || wantsp && *ldwork < *n) { + *info = -16; + } + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DTRSNA", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + if (*n == 1) { + if (somcon) { + if (! select[1]) { + return 0; + } + } + if (wants) { + s[1] = 1.; + } + if (wantsp) { + sep[1] = (d__1 = t[t_dim1 + 1], abs(d__1)); + } + return 0; + } + +/* Get machine constants */ + + eps = _starpu_dlamch_("P"); + smlnum = _starpu_dlamch_("S") / eps; + bignum = 1. / smlnum; + _starpu_dlabad_(&smlnum, &bignum); + + ks = 0; + pair = FALSE_; + i__1 = *n; + for (k = 1; k <= i__1; ++k) { + +/* Determine whether T(k,k) begins a 1-by-1 or 2-by-2 block. */ + + if (pair) { + pair = FALSE_; + goto L60; + } else { + if (k < *n) { + pair = t[k + 1 + k * t_dim1] != 0.; + } + } + +/* Determine whether condition numbers are required for the k-th */ +/* eigenpair. */ + + if (somcon) { + if (pair) { + if (! select[k] && ! select[k + 1]) { + goto L60; + } + } else { + if (! select[k]) { + goto L60; + } + } + } + + ++ks; + + if (wants) { + +/* Compute the reciprocal condition number of the k-th */ +/* eigenvalue. */ + + if (! pair) { + +/* Real eigenvalue. */ + + prod = _starpu_ddot_(n, &vr[ks * vr_dim1 + 1], &c__1, &vl[ks * + vl_dim1 + 1], &c__1); + rnrm = _starpu_dnrm2_(n, &vr[ks * vr_dim1 + 1], &c__1); + lnrm = _starpu_dnrm2_(n, &vl[ks * vl_dim1 + 1], &c__1); + s[ks] = abs(prod) / (rnrm * lnrm); + } else { + +/* Complex eigenvalue. */ + + prod1 = _starpu_ddot_(n, &vr[ks * vr_dim1 + 1], &c__1, &vl[ks * + vl_dim1 + 1], &c__1); + prod1 += _starpu_ddot_(n, &vr[(ks + 1) * vr_dim1 + 1], &c__1, &vl[(ks + + 1) * vl_dim1 + 1], &c__1); + prod2 = _starpu_ddot_(n, &vl[ks * vl_dim1 + 1], &c__1, &vr[(ks + 1) * + vr_dim1 + 1], &c__1); + prod2 -= _starpu_ddot_(n, &vl[(ks + 1) * vl_dim1 + 1], &c__1, &vr[ks * + vr_dim1 + 1], &c__1); + d__1 = _starpu_dnrm2_(n, &vr[ks * vr_dim1 + 1], &c__1); + d__2 = _starpu_dnrm2_(n, &vr[(ks + 1) * vr_dim1 + 1], &c__1); + rnrm = _starpu_dlapy2_(&d__1, &d__2); + d__1 = _starpu_dnrm2_(n, &vl[ks * vl_dim1 + 1], &c__1); + d__2 = _starpu_dnrm2_(n, &vl[(ks + 1) * vl_dim1 + 1], &c__1); + lnrm = _starpu_dlapy2_(&d__1, &d__2); + cond = _starpu_dlapy2_(&prod1, &prod2) / (rnrm * lnrm); + s[ks] = cond; + s[ks + 1] = cond; + } + } + + if (wantsp) { + +/* Estimate the reciprocal condition number of the k-th */ +/* eigenvector. */ + +/* Copy the matrix T to the array WORK and swap the diagonal */ +/* block beginning at T(k,k) to the (1,1) position. */ + + _starpu_dlacpy_("Full", n, n, &t[t_offset], ldt, &work[work_offset], + ldwork); + ifst = k; + ilst = 1; + _starpu_dtrexc_("No Q", n, &work[work_offset], ldwork, dummy, &c__1, & + ifst, &ilst, &work[(*n + 1) * work_dim1 + 1], &ierr); + + if (ierr == 1 || ierr == 2) { + +/* Could not swap because blocks not well separated */ + + scale = 1.; + est = bignum; + } else { + +/* Reordering successful */ + + if (work[work_dim1 + 2] == 0.) { + +/* Form C = T22 - lambda*I in WORK(2:N,2:N). */ + + i__2 = *n; + for (i__ = 2; i__ <= i__2; ++i__) { + work[i__ + i__ * work_dim1] -= work[work_dim1 + 1]; +/* L20: */ + } + n2 = 1; + nn = *n - 1; + } else { + +/* Triangularize the 2 by 2 block by unitary */ +/* transformation U = [ cs i*ss ] */ +/* [ i*ss cs ]. */ +/* such that the (1,1) position of WORK is complex */ +/* eigenvalue lambda with positive imaginary part. (2,2) */ +/* position of WORK is the complex eigenvalue lambda */ +/* with negative imaginary part. */ + + mu = sqrt((d__1 = work[(work_dim1 << 1) + 1], abs(d__1))) + * sqrt((d__2 = work[work_dim1 + 2], abs(d__2))); + delta = _starpu_dlapy2_(&mu, &work[work_dim1 + 2]); + cs = mu / delta; + sn = -work[work_dim1 + 2] / delta; + +/* Form */ + +/* C' = WORK(2:N,2:N) + i*[rwork(1) ..... rwork(n-1) ] */ +/* [ mu ] */ +/* [ .. ] */ +/* [ .. ] */ +/* [ mu ] */ +/* where C' is conjugate transpose of complex matrix C, */ +/* and RWORK is stored starting in the N+1-st column of */ +/* WORK. */ + + i__2 = *n; + for (j = 3; j <= i__2; ++j) { + work[j * work_dim1 + 2] = cs * work[j * work_dim1 + 2] + ; + work[j + j * work_dim1] -= work[work_dim1 + 1]; +/* L30: */ + } + work[(work_dim1 << 1) + 2] = 0.; + + work[(*n + 1) * work_dim1 + 1] = mu * 2.; + i__2 = *n - 1; + for (i__ = 2; i__ <= i__2; ++i__) { + work[i__ + (*n + 1) * work_dim1] = sn * work[(i__ + 1) + * work_dim1 + 1]; +/* L40: */ + } + n2 = 2; + nn = *n - 1 << 1; + } + +/* Estimate norm(inv(C')) */ + + est = 0.; + kase = 0; +L50: + _starpu_dlacn2_(&nn, &work[(*n + 2) * work_dim1 + 1], &work[(*n + 4) * + work_dim1 + 1], &iwork[1], &est, &kase, isave); + if (kase != 0) { + if (kase == 1) { + if (n2 == 1) { + +/* Real eigenvalue: solve C'*x = scale*c. */ + + i__2 = *n - 1; + _starpu_dlaqtr_(&c_true, &c_true, &i__2, &work[(work_dim1 + << 1) + 2], ldwork, dummy, &dumm, &scale, + &work[(*n + 4) * work_dim1 + 1], &work[(* + n + 6) * work_dim1 + 1], &ierr); + } else { + +/* Complex eigenvalue: solve */ +/* C'*(p+iq) = scale*(c+id) in real arithmetic. */ + + i__2 = *n - 1; + _starpu_dlaqtr_(&c_true, &c_false, &i__2, &work[( + work_dim1 << 1) + 2], ldwork, &work[(*n + + 1) * work_dim1 + 1], &mu, &scale, &work[(* + n + 4) * work_dim1 + 1], &work[(*n + 6) * + work_dim1 + 1], &ierr); + } + } else { + if (n2 == 1) { + +/* Real eigenvalue: solve C*x = scale*c. */ + + i__2 = *n - 1; + _starpu_dlaqtr_(&c_false, &c_true, &i__2, &work[( + work_dim1 << 1) + 2], ldwork, dummy, & + dumm, &scale, &work[(*n + 4) * work_dim1 + + 1], &work[(*n + 6) * work_dim1 + 1], & + ierr); + } else { + +/* Complex eigenvalue: solve */ +/* C*(p+iq) = scale*(c+id) in real arithmetic. */ + + i__2 = *n - 1; + _starpu_dlaqtr_(&c_false, &c_false, &i__2, &work[( + work_dim1 << 1) + 2], ldwork, &work[(*n + + 1) * work_dim1 + 1], &mu, &scale, &work[(* + n + 4) * work_dim1 + 1], &work[(*n + 6) * + work_dim1 + 1], &ierr); + + } + } + + goto L50; + } + } + + sep[ks] = scale / max(est,smlnum); + if (pair) { + sep[ks + 1] = sep[ks]; + } + } + + if (pair) { + ++ks; + } + +L60: + ; + } + return 0; + +/* End of DTRSNA */ + +} /* _starpu_dtrsna_ */ diff --git a/min-dgels/base/SRC/dtrsyl.c b/min-dgels/base/SRC/dtrsyl.c new file mode 100644 index 0000000..ddb5b07 --- /dev/null +++ b/min-dgels/base/SRC/dtrsyl.c @@ -0,0 +1,1319 @@ +/* dtrsyl.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static logical c_false = FALSE_; +static integer c__2 = 2; +static doublereal c_b26 = 1.; +static doublereal c_b30 = 0.; +static logical c_true = TRUE_; + +/* Subroutine */ int _starpu_dtrsyl_(char *trana, char *tranb, integer *isgn, integer + *m, integer *n, doublereal *a, integer *lda, doublereal *b, integer * + ldb, doublereal *c__, integer *ldc, doublereal *scale, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, + i__3, i__4; + doublereal d__1, d__2; + + /* Local variables */ + integer j, k, l; + doublereal x[4] /* was [2][2] */; + integer k1, k2, l1, l2; + doublereal a11, db, da11, vec[4] /* was [2][2] */, dum[1], eps, sgn; + extern doublereal _starpu_ddot_(integer *, doublereal *, integer *, doublereal *, + integer *); + integer ierr; + doublereal smin, suml, sumr; + extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, + integer *); + extern logical _starpu_lsame_(char *, char *); + integer knext, lnext; + doublereal xnorm; + extern /* Subroutine */ int _starpu_dlaln2_(logical *, integer *, integer *, + doublereal *, doublereal *, doublereal *, integer *, doublereal *, + doublereal *, doublereal *, integer *, doublereal *, doublereal * +, doublereal *, integer *, doublereal *, doublereal *, integer *), + _starpu_dlasy2_(logical *, logical *, integer *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *), _starpu_dlabad_(doublereal *, doublereal *); + extern doublereal _starpu_dlamch_(char *), _starpu_dlange_(char *, integer *, + integer *, doublereal *, integer *, doublereal *); + doublereal scaloc; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + doublereal bignum; + logical notrna, notrnb; + doublereal smlnum; + + +/* -- LAPACK routine (version 3.2) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DTRSYL solves the real Sylvester matrix equation: */ + +/* op(A)*X + X*op(B) = scale*C or */ +/* op(A)*X - X*op(B) = scale*C, */ + +/* where op(A) = A or A**T, and A and B are both upper quasi- */ +/* triangular. A is M-by-M and B is N-by-N; the right hand side C and */ +/* the solution X are M-by-N; and scale is an output scale factor, set */ +/* <= 1 to avoid overflow in X. */ + +/* A and B must be in Schur canonical form (as returned by DHSEQR), that */ +/* is, block upper triangular with 1-by-1 and 2-by-2 diagonal blocks; */ +/* each 2-by-2 diagonal block has its diagonal elements equal and its */ +/* off-diagonal elements of opposite sign. */ + +/* Arguments */ +/* ========= */ + +/* TRANA (input) CHARACTER*1 */ +/* Specifies the option op(A): */ +/* = 'N': op(A) = A (No transpose) */ +/* = 'T': op(A) = A**T (Transpose) */ +/* = 'C': op(A) = A**H (Conjugate transpose = Transpose) */ + +/* TRANB (input) CHARACTER*1 */ +/* Specifies the option op(B): */ +/* = 'N': op(B) = B (No transpose) */ +/* = 'T': op(B) = B**T (Transpose) */ +/* = 'C': op(B) = B**H (Conjugate transpose = Transpose) */ + +/* ISGN (input) INTEGER */ +/* Specifies the sign in the equation: */ +/* = +1: solve op(A)*X + X*op(B) = scale*C */ +/* = -1: solve op(A)*X - X*op(B) = scale*C */ + +/* M (input) INTEGER */ +/* The order of the matrix A, and the number of rows in the */ +/* matrices X and C. M >= 0. */ + +/* N (input) INTEGER */ +/* The order of the matrix B, and the number of columns in the */ +/* matrices X and C. N >= 0. */ + +/* A (input) DOUBLE PRECISION array, dimension (LDA,M) */ +/* The upper quasi-triangular matrix A, in Schur canonical form. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,M). */ + +/* B (input) DOUBLE PRECISION array, dimension (LDB,N) */ +/* The upper quasi-triangular matrix B, in Schur canonical form. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the array B. LDB >= max(1,N). */ + +/* C (input/output) DOUBLE PRECISION array, dimension (LDC,N) */ +/* On entry, the M-by-N right hand side matrix C. */ +/* On exit, C is overwritten by the solution matrix X. */ + +/* LDC (input) INTEGER */ +/* The leading dimension of the array C. LDC >= max(1,M) */ + +/* SCALE (output) DOUBLE PRECISION */ +/* The scale factor, scale, set <= 1 to avoid overflow in X. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ +/* = 1: A and B have common or very close eigenvalues; perturbed */ +/* values were used to solve the equation (but the matrices */ +/* A and B are unchanged). */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Decode and Test input parameters */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + + /* Function Body */ + notrna = _starpu_lsame_(trana, "N"); + notrnb = _starpu_lsame_(tranb, "N"); + + *info = 0; + if (! notrna && ! _starpu_lsame_(trana, "T") && ! _starpu_lsame_( + trana, "C")) { + *info = -1; + } else if (! notrnb && ! _starpu_lsame_(tranb, "T") && ! + _starpu_lsame_(tranb, "C")) { + *info = -2; + } else if (*isgn != 1 && *isgn != -1) { + *info = -3; + } else if (*m < 0) { + *info = -4; + } else if (*n < 0) { + *info = -5; + } else if (*lda < max(1,*m)) { + *info = -7; + } else if (*ldb < max(1,*n)) { + *info = -9; + } else if (*ldc < max(1,*m)) { + *info = -11; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DTRSYL", &i__1); + return 0; + } + +/* Quick return if possible */ + + *scale = 1.; + if (*m == 0 || *n == 0) { + return 0; + } + +/* Set constants to control overflow */ + + eps = _starpu_dlamch_("P"); + smlnum = _starpu_dlamch_("S"); + bignum = 1. / smlnum; + _starpu_dlabad_(&smlnum, &bignum); + smlnum = smlnum * (doublereal) (*m * *n) / eps; + bignum = 1. / smlnum; + +/* Computing MAX */ + d__1 = smlnum, d__2 = eps * _starpu_dlange_("M", m, m, &a[a_offset], lda, dum), d__1 = max(d__1,d__2), d__2 = eps * _starpu_dlange_("M", n, n, + &b[b_offset], ldb, dum); + smin = max(d__1,d__2); + + sgn = (doublereal) (*isgn); + + if (notrna && notrnb) { + +/* Solve A*X + ISGN*X*B = scale*C. */ + +/* The (K,L)th block of X is determined starting from */ +/* bottom-left corner column by column by */ + +/* A(K,K)*X(K,L) + ISGN*X(K,L)*B(L,L) = C(K,L) - R(K,L) */ + +/* Where */ +/* M L-1 */ +/* R(K,L) = SUM [A(K,I)*X(I,L)] + ISGN*SUM [X(K,J)*B(J,L)]. */ +/* I=K+1 J=1 */ + +/* Start column loop (index = L) */ +/* L1 (L2) : column index of the first (first) row of X(K,L). */ + + lnext = 1; + i__1 = *n; + for (l = 1; l <= i__1; ++l) { + if (l < lnext) { + goto L60; + } + if (l == *n) { + l1 = l; + l2 = l; + } else { + if (b[l + 1 + l * b_dim1] != 0.) { + l1 = l; + l2 = l + 1; + lnext = l + 2; + } else { + l1 = l; + l2 = l; + lnext = l + 1; + } + } + +/* Start row loop (index = K) */ +/* K1 (K2): row index of the first (last) row of X(K,L). */ + + knext = *m; + for (k = *m; k >= 1; --k) { + if (k > knext) { + goto L50; + } + if (k == 1) { + k1 = k; + k2 = k; + } else { + if (a[k + (k - 1) * a_dim1] != 0.) { + k1 = k - 1; + k2 = k; + knext = k - 2; + } else { + k1 = k; + k2 = k; + knext = k - 1; + } + } + + if (l1 == l2 && k1 == k2) { + i__2 = *m - k1; +/* Computing MIN */ + i__3 = k1 + 1; +/* Computing MIN */ + i__4 = k1 + 1; + suml = _starpu_ddot_(&i__2, &a[k1 + min(i__3, *m)* a_dim1], lda, & + c__[min(i__4, *m)+ l1 * c_dim1], &c__1); + i__2 = l1 - 1; + sumr = _starpu_ddot_(&i__2, &c__[k1 + c_dim1], ldc, &b[l1 * + b_dim1 + 1], &c__1); + vec[0] = c__[k1 + l1 * c_dim1] - (suml + sgn * sumr); + scaloc = 1.; + + a11 = a[k1 + k1 * a_dim1] + sgn * b[l1 + l1 * b_dim1]; + da11 = abs(a11); + if (da11 <= smin) { + a11 = smin; + da11 = smin; + *info = 1; + } + db = abs(vec[0]); + if (da11 < 1. && db > 1.) { + if (db > bignum * da11) { + scaloc = 1. / db; + } + } + x[0] = vec[0] * scaloc / a11; + + if (scaloc != 1.) { + i__2 = *n; + for (j = 1; j <= i__2; ++j) { + _starpu_dscal_(m, &scaloc, &c__[j * c_dim1 + 1], &c__1); +/* L10: */ + } + *scale *= scaloc; + } + c__[k1 + l1 * c_dim1] = x[0]; + + } else if (l1 == l2 && k1 != k2) { + + i__2 = *m - k2; +/* Computing MIN */ + i__3 = k2 + 1; +/* Computing MIN */ + i__4 = k2 + 1; + suml = _starpu_ddot_(&i__2, &a[k1 + min(i__3, *m)* a_dim1], lda, & + c__[min(i__4, *m)+ l1 * c_dim1], &c__1); + i__2 = l1 - 1; + sumr = _starpu_ddot_(&i__2, &c__[k1 + c_dim1], ldc, &b[l1 * + b_dim1 + 1], &c__1); + vec[0] = c__[k1 + l1 * c_dim1] - (suml + sgn * sumr); + + i__2 = *m - k2; +/* Computing MIN */ + i__3 = k2 + 1; +/* Computing MIN */ + i__4 = k2 + 1; + suml = _starpu_ddot_(&i__2, &a[k2 + min(i__3, *m)* a_dim1], lda, & + c__[min(i__4, *m)+ l1 * c_dim1], &c__1); + i__2 = l1 - 1; + sumr = _starpu_ddot_(&i__2, &c__[k2 + c_dim1], ldc, &b[l1 * + b_dim1 + 1], &c__1); + vec[1] = c__[k2 + l1 * c_dim1] - (suml + sgn * sumr); + + d__1 = -sgn * b[l1 + l1 * b_dim1]; + _starpu_dlaln2_(&c_false, &c__2, &c__1, &smin, &c_b26, &a[k1 + k1 + * a_dim1], lda, &c_b26, &c_b26, vec, &c__2, &d__1, + &c_b30, x, &c__2, &scaloc, &xnorm, &ierr); + if (ierr != 0) { + *info = 1; + } + + if (scaloc != 1.) { + i__2 = *n; + for (j = 1; j <= i__2; ++j) { + _starpu_dscal_(m, &scaloc, &c__[j * c_dim1 + 1], &c__1); +/* L20: */ + } + *scale *= scaloc; + } + c__[k1 + l1 * c_dim1] = x[0]; + c__[k2 + l1 * c_dim1] = x[1]; + + } else if (l1 != l2 && k1 == k2) { + + i__2 = *m - k1; +/* Computing MIN */ + i__3 = k1 + 1; +/* Computing MIN */ + i__4 = k1 + 1; + suml = _starpu_ddot_(&i__2, &a[k1 + min(i__3, *m)* a_dim1], lda, & + c__[min(i__4, *m)+ l1 * c_dim1], &c__1); + i__2 = l1 - 1; + sumr = _starpu_ddot_(&i__2, &c__[k1 + c_dim1], ldc, &b[l1 * + b_dim1 + 1], &c__1); + vec[0] = sgn * (c__[k1 + l1 * c_dim1] - (suml + sgn * + sumr)); + + i__2 = *m - k1; +/* Computing MIN */ + i__3 = k1 + 1; +/* Computing MIN */ + i__4 = k1 + 1; + suml = _starpu_ddot_(&i__2, &a[k1 + min(i__3, *m)* a_dim1], lda, & + c__[min(i__4, *m)+ l2 * c_dim1], &c__1); + i__2 = l1 - 1; + sumr = _starpu_ddot_(&i__2, &c__[k1 + c_dim1], ldc, &b[l2 * + b_dim1 + 1], &c__1); + vec[1] = sgn * (c__[k1 + l2 * c_dim1] - (suml + sgn * + sumr)); + + d__1 = -sgn * a[k1 + k1 * a_dim1]; + _starpu_dlaln2_(&c_true, &c__2, &c__1, &smin, &c_b26, &b[l1 + l1 * + b_dim1], ldb, &c_b26, &c_b26, vec, &c__2, &d__1, + &c_b30, x, &c__2, &scaloc, &xnorm, &ierr); + if (ierr != 0) { + *info = 1; + } + + if (scaloc != 1.) { + i__2 = *n; + for (j = 1; j <= i__2; ++j) { + _starpu_dscal_(m, &scaloc, &c__[j * c_dim1 + 1], &c__1); +/* L30: */ + } + *scale *= scaloc; + } + c__[k1 + l1 * c_dim1] = x[0]; + c__[k1 + l2 * c_dim1] = x[1]; + + } else if (l1 != l2 && k1 != k2) { + + i__2 = *m - k2; +/* Computing MIN */ + i__3 = k2 + 1; +/* Computing MIN */ + i__4 = k2 + 1; + suml = _starpu_ddot_(&i__2, &a[k1 + min(i__3, *m)* a_dim1], lda, & + c__[min(i__4, *m)+ l1 * c_dim1], &c__1); + i__2 = l1 - 1; + sumr = _starpu_ddot_(&i__2, &c__[k1 + c_dim1], ldc, &b[l1 * + b_dim1 + 1], &c__1); + vec[0] = c__[k1 + l1 * c_dim1] - (suml + sgn * sumr); + + i__2 = *m - k2; +/* Computing MIN */ + i__3 = k2 + 1; +/* Computing MIN */ + i__4 = k2 + 1; + suml = _starpu_ddot_(&i__2, &a[k1 + min(i__3, *m)* a_dim1], lda, & + c__[min(i__4, *m)+ l2 * c_dim1], &c__1); + i__2 = l1 - 1; + sumr = _starpu_ddot_(&i__2, &c__[k1 + c_dim1], ldc, &b[l2 * + b_dim1 + 1], &c__1); + vec[2] = c__[k1 + l2 * c_dim1] - (suml + sgn * sumr); + + i__2 = *m - k2; +/* Computing MIN */ + i__3 = k2 + 1; +/* Computing MIN */ + i__4 = k2 + 1; + suml = _starpu_ddot_(&i__2, &a[k2 + min(i__3, *m)* a_dim1], lda, & + c__[min(i__4, *m)+ l1 * c_dim1], &c__1); + i__2 = l1 - 1; + sumr = _starpu_ddot_(&i__2, &c__[k2 + c_dim1], ldc, &b[l1 * + b_dim1 + 1], &c__1); + vec[1] = c__[k2 + l1 * c_dim1] - (suml + sgn * sumr); + + i__2 = *m - k2; +/* Computing MIN */ + i__3 = k2 + 1; +/* Computing MIN */ + i__4 = k2 + 1; + suml = _starpu_ddot_(&i__2, &a[k2 + min(i__3, *m)* a_dim1], lda, & + c__[min(i__4, *m)+ l2 * c_dim1], &c__1); + i__2 = l1 - 1; + sumr = _starpu_ddot_(&i__2, &c__[k2 + c_dim1], ldc, &b[l2 * + b_dim1 + 1], &c__1); + vec[3] = c__[k2 + l2 * c_dim1] - (suml + sgn * sumr); + + _starpu_dlasy2_(&c_false, &c_false, isgn, &c__2, &c__2, &a[k1 + + k1 * a_dim1], lda, &b[l1 + l1 * b_dim1], ldb, vec, + &c__2, &scaloc, x, &c__2, &xnorm, &ierr); + if (ierr != 0) { + *info = 1; + } + + if (scaloc != 1.) { + i__2 = *n; + for (j = 1; j <= i__2; ++j) { + _starpu_dscal_(m, &scaloc, &c__[j * c_dim1 + 1], &c__1); +/* L40: */ + } + *scale *= scaloc; + } + c__[k1 + l1 * c_dim1] = x[0]; + c__[k1 + l2 * c_dim1] = x[2]; + c__[k2 + l1 * c_dim1] = x[1]; + c__[k2 + l2 * c_dim1] = x[3]; + } + +L50: + ; + } + +L60: + ; + } + + } else if (! notrna && notrnb) { + +/* Solve A' *X + ISGN*X*B = scale*C. */ + +/* The (K,L)th block of X is determined starting from */ +/* upper-left corner column by column by */ + +/* A(K,K)'*X(K,L) + ISGN*X(K,L)*B(L,L) = C(K,L) - R(K,L) */ + +/* Where */ +/* K-1 L-1 */ +/* R(K,L) = SUM [A(I,K)'*X(I,L)] +ISGN*SUM [X(K,J)*B(J,L)] */ +/* I=1 J=1 */ + +/* Start column loop (index = L) */ +/* L1 (L2): column index of the first (last) row of X(K,L) */ + + lnext = 1; + i__1 = *n; + for (l = 1; l <= i__1; ++l) { + if (l < lnext) { + goto L120; + } + if (l == *n) { + l1 = l; + l2 = l; + } else { + if (b[l + 1 + l * b_dim1] != 0.) { + l1 = l; + l2 = l + 1; + lnext = l + 2; + } else { + l1 = l; + l2 = l; + lnext = l + 1; + } + } + +/* Start row loop (index = K) */ +/* K1 (K2): row index of the first (last) row of X(K,L) */ + + knext = 1; + i__2 = *m; + for (k = 1; k <= i__2; ++k) { + if (k < knext) { + goto L110; + } + if (k == *m) { + k1 = k; + k2 = k; + } else { + if (a[k + 1 + k * a_dim1] != 0.) { + k1 = k; + k2 = k + 1; + knext = k + 2; + } else { + k1 = k; + k2 = k; + knext = k + 1; + } + } + + if (l1 == l2 && k1 == k2) { + i__3 = k1 - 1; + suml = _starpu_ddot_(&i__3, &a[k1 * a_dim1 + 1], &c__1, &c__[l1 * + c_dim1 + 1], &c__1); + i__3 = l1 - 1; + sumr = _starpu_ddot_(&i__3, &c__[k1 + c_dim1], ldc, &b[l1 * + b_dim1 + 1], &c__1); + vec[0] = c__[k1 + l1 * c_dim1] - (suml + sgn * sumr); + scaloc = 1.; + + a11 = a[k1 + k1 * a_dim1] + sgn * b[l1 + l1 * b_dim1]; + da11 = abs(a11); + if (da11 <= smin) { + a11 = smin; + da11 = smin; + *info = 1; + } + db = abs(vec[0]); + if (da11 < 1. && db > 1.) { + if (db > bignum * da11) { + scaloc = 1. / db; + } + } + x[0] = vec[0] * scaloc / a11; + + if (scaloc != 1.) { + i__3 = *n; + for (j = 1; j <= i__3; ++j) { + _starpu_dscal_(m, &scaloc, &c__[j * c_dim1 + 1], &c__1); +/* L70: */ + } + *scale *= scaloc; + } + c__[k1 + l1 * c_dim1] = x[0]; + + } else if (l1 == l2 && k1 != k2) { + + i__3 = k1 - 1; + suml = _starpu_ddot_(&i__3, &a[k1 * a_dim1 + 1], &c__1, &c__[l1 * + c_dim1 + 1], &c__1); + i__3 = l1 - 1; + sumr = _starpu_ddot_(&i__3, &c__[k1 + c_dim1], ldc, &b[l1 * + b_dim1 + 1], &c__1); + vec[0] = c__[k1 + l1 * c_dim1] - (suml + sgn * sumr); + + i__3 = k1 - 1; + suml = _starpu_ddot_(&i__3, &a[k2 * a_dim1 + 1], &c__1, &c__[l1 * + c_dim1 + 1], &c__1); + i__3 = l1 - 1; + sumr = _starpu_ddot_(&i__3, &c__[k2 + c_dim1], ldc, &b[l1 * + b_dim1 + 1], &c__1); + vec[1] = c__[k2 + l1 * c_dim1] - (suml + sgn * sumr); + + d__1 = -sgn * b[l1 + l1 * b_dim1]; + _starpu_dlaln2_(&c_true, &c__2, &c__1, &smin, &c_b26, &a[k1 + k1 * + a_dim1], lda, &c_b26, &c_b26, vec, &c__2, &d__1, + &c_b30, x, &c__2, &scaloc, &xnorm, &ierr); + if (ierr != 0) { + *info = 1; + } + + if (scaloc != 1.) { + i__3 = *n; + for (j = 1; j <= i__3; ++j) { + _starpu_dscal_(m, &scaloc, &c__[j * c_dim1 + 1], &c__1); +/* L80: */ + } + *scale *= scaloc; + } + c__[k1 + l1 * c_dim1] = x[0]; + c__[k2 + l1 * c_dim1] = x[1]; + + } else if (l1 != l2 && k1 == k2) { + + i__3 = k1 - 1; + suml = _starpu_ddot_(&i__3, &a[k1 * a_dim1 + 1], &c__1, &c__[l1 * + c_dim1 + 1], &c__1); + i__3 = l1 - 1; + sumr = _starpu_ddot_(&i__3, &c__[k1 + c_dim1], ldc, &b[l1 * + b_dim1 + 1], &c__1); + vec[0] = sgn * (c__[k1 + l1 * c_dim1] - (suml + sgn * + sumr)); + + i__3 = k1 - 1; + suml = _starpu_ddot_(&i__3, &a[k1 * a_dim1 + 1], &c__1, &c__[l2 * + c_dim1 + 1], &c__1); + i__3 = l1 - 1; + sumr = _starpu_ddot_(&i__3, &c__[k1 + c_dim1], ldc, &b[l2 * + b_dim1 + 1], &c__1); + vec[1] = sgn * (c__[k1 + l2 * c_dim1] - (suml + sgn * + sumr)); + + d__1 = -sgn * a[k1 + k1 * a_dim1]; + _starpu_dlaln2_(&c_true, &c__2, &c__1, &smin, &c_b26, &b[l1 + l1 * + b_dim1], ldb, &c_b26, &c_b26, vec, &c__2, &d__1, + &c_b30, x, &c__2, &scaloc, &xnorm, &ierr); + if (ierr != 0) { + *info = 1; + } + + if (scaloc != 1.) { + i__3 = *n; + for (j = 1; j <= i__3; ++j) { + _starpu_dscal_(m, &scaloc, &c__[j * c_dim1 + 1], &c__1); +/* L90: */ + } + *scale *= scaloc; + } + c__[k1 + l1 * c_dim1] = x[0]; + c__[k1 + l2 * c_dim1] = x[1]; + + } else if (l1 != l2 && k1 != k2) { + + i__3 = k1 - 1; + suml = _starpu_ddot_(&i__3, &a[k1 * a_dim1 + 1], &c__1, &c__[l1 * + c_dim1 + 1], &c__1); + i__3 = l1 - 1; + sumr = _starpu_ddot_(&i__3, &c__[k1 + c_dim1], ldc, &b[l1 * + b_dim1 + 1], &c__1); + vec[0] = c__[k1 + l1 * c_dim1] - (suml + sgn * sumr); + + i__3 = k1 - 1; + suml = _starpu_ddot_(&i__3, &a[k1 * a_dim1 + 1], &c__1, &c__[l2 * + c_dim1 + 1], &c__1); + i__3 = l1 - 1; + sumr = _starpu_ddot_(&i__3, &c__[k1 + c_dim1], ldc, &b[l2 * + b_dim1 + 1], &c__1); + vec[2] = c__[k1 + l2 * c_dim1] - (suml + sgn * sumr); + + i__3 = k1 - 1; + suml = _starpu_ddot_(&i__3, &a[k2 * a_dim1 + 1], &c__1, &c__[l1 * + c_dim1 + 1], &c__1); + i__3 = l1 - 1; + sumr = _starpu_ddot_(&i__3, &c__[k2 + c_dim1], ldc, &b[l1 * + b_dim1 + 1], &c__1); + vec[1] = c__[k2 + l1 * c_dim1] - (suml + sgn * sumr); + + i__3 = k1 - 1; + suml = _starpu_ddot_(&i__3, &a[k2 * a_dim1 + 1], &c__1, &c__[l2 * + c_dim1 + 1], &c__1); + i__3 = l1 - 1; + sumr = _starpu_ddot_(&i__3, &c__[k2 + c_dim1], ldc, &b[l2 * + b_dim1 + 1], &c__1); + vec[3] = c__[k2 + l2 * c_dim1] - (suml + sgn * sumr); + + _starpu_dlasy2_(&c_true, &c_false, isgn, &c__2, &c__2, &a[k1 + k1 + * a_dim1], lda, &b[l1 + l1 * b_dim1], ldb, vec, & + c__2, &scaloc, x, &c__2, &xnorm, &ierr); + if (ierr != 0) { + *info = 1; + } + + if (scaloc != 1.) { + i__3 = *n; + for (j = 1; j <= i__3; ++j) { + _starpu_dscal_(m, &scaloc, &c__[j * c_dim1 + 1], &c__1); +/* L100: */ + } + *scale *= scaloc; + } + c__[k1 + l1 * c_dim1] = x[0]; + c__[k1 + l2 * c_dim1] = x[2]; + c__[k2 + l1 * c_dim1] = x[1]; + c__[k2 + l2 * c_dim1] = x[3]; + } + +L110: + ; + } +L120: + ; + } + + } else if (! notrna && ! notrnb) { + +/* Solve A'*X + ISGN*X*B' = scale*C. */ + +/* The (K,L)th block of X is determined starting from */ +/* top-right corner column by column by */ + +/* A(K,K)'*X(K,L) + ISGN*X(K,L)*B(L,L)' = C(K,L) - R(K,L) */ + +/* Where */ +/* K-1 N */ +/* R(K,L) = SUM [A(I,K)'*X(I,L)] + ISGN*SUM [X(K,J)*B(L,J)']. */ +/* I=1 J=L+1 */ + +/* Start column loop (index = L) */ +/* L1 (L2): column index of the first (last) row of X(K,L) */ + + lnext = *n; + for (l = *n; l >= 1; --l) { + if (l > lnext) { + goto L180; + } + if (l == 1) { + l1 = l; + l2 = l; + } else { + if (b[l + (l - 1) * b_dim1] != 0.) { + l1 = l - 1; + l2 = l; + lnext = l - 2; + } else { + l1 = l; + l2 = l; + lnext = l - 1; + } + } + +/* Start row loop (index = K) */ +/* K1 (K2): row index of the first (last) row of X(K,L) */ + + knext = 1; + i__1 = *m; + for (k = 1; k <= i__1; ++k) { + if (k < knext) { + goto L170; + } + if (k == *m) { + k1 = k; + k2 = k; + } else { + if (a[k + 1 + k * a_dim1] != 0.) { + k1 = k; + k2 = k + 1; + knext = k + 2; + } else { + k1 = k; + k2 = k; + knext = k + 1; + } + } + + if (l1 == l2 && k1 == k2) { + i__2 = k1 - 1; + suml = _starpu_ddot_(&i__2, &a[k1 * a_dim1 + 1], &c__1, &c__[l1 * + c_dim1 + 1], &c__1); + i__2 = *n - l1; +/* Computing MIN */ + i__3 = l1 + 1; +/* Computing MIN */ + i__4 = l1 + 1; + sumr = _starpu_ddot_(&i__2, &c__[k1 + min(i__3, *n)* c_dim1], ldc, + &b[l1 + min(i__4, *n)* b_dim1], ldb); + vec[0] = c__[k1 + l1 * c_dim1] - (suml + sgn * sumr); + scaloc = 1.; + + a11 = a[k1 + k1 * a_dim1] + sgn * b[l1 + l1 * b_dim1]; + da11 = abs(a11); + if (da11 <= smin) { + a11 = smin; + da11 = smin; + *info = 1; + } + db = abs(vec[0]); + if (da11 < 1. && db > 1.) { + if (db > bignum * da11) { + scaloc = 1. / db; + } + } + x[0] = vec[0] * scaloc / a11; + + if (scaloc != 1.) { + i__2 = *n; + for (j = 1; j <= i__2; ++j) { + _starpu_dscal_(m, &scaloc, &c__[j * c_dim1 + 1], &c__1); +/* L130: */ + } + *scale *= scaloc; + } + c__[k1 + l1 * c_dim1] = x[0]; + + } else if (l1 == l2 && k1 != k2) { + + i__2 = k1 - 1; + suml = _starpu_ddot_(&i__2, &a[k1 * a_dim1 + 1], &c__1, &c__[l1 * + c_dim1 + 1], &c__1); + i__2 = *n - l2; +/* Computing MIN */ + i__3 = l2 + 1; +/* Computing MIN */ + i__4 = l2 + 1; + sumr = _starpu_ddot_(&i__2, &c__[k1 + min(i__3, *n)* c_dim1], ldc, + &b[l1 + min(i__4, *n)* b_dim1], ldb); + vec[0] = c__[k1 + l1 * c_dim1] - (suml + sgn * sumr); + + i__2 = k1 - 1; + suml = _starpu_ddot_(&i__2, &a[k2 * a_dim1 + 1], &c__1, &c__[l1 * + c_dim1 + 1], &c__1); + i__2 = *n - l2; +/* Computing MIN */ + i__3 = l2 + 1; +/* Computing MIN */ + i__4 = l2 + 1; + sumr = _starpu_ddot_(&i__2, &c__[k2 + min(i__3, *n)* c_dim1], ldc, + &b[l1 + min(i__4, *n)* b_dim1], ldb); + vec[1] = c__[k2 + l1 * c_dim1] - (suml + sgn * sumr); + + d__1 = -sgn * b[l1 + l1 * b_dim1]; + _starpu_dlaln2_(&c_true, &c__2, &c__1, &smin, &c_b26, &a[k1 + k1 * + a_dim1], lda, &c_b26, &c_b26, vec, &c__2, &d__1, + &c_b30, x, &c__2, &scaloc, &xnorm, &ierr); + if (ierr != 0) { + *info = 1; + } + + if (scaloc != 1.) { + i__2 = *n; + for (j = 1; j <= i__2; ++j) { + _starpu_dscal_(m, &scaloc, &c__[j * c_dim1 + 1], &c__1); +/* L140: */ + } + *scale *= scaloc; + } + c__[k1 + l1 * c_dim1] = x[0]; + c__[k2 + l1 * c_dim1] = x[1]; + + } else if (l1 != l2 && k1 == k2) { + + i__2 = k1 - 1; + suml = _starpu_ddot_(&i__2, &a[k1 * a_dim1 + 1], &c__1, &c__[l1 * + c_dim1 + 1], &c__1); + i__2 = *n - l2; +/* Computing MIN */ + i__3 = l2 + 1; +/* Computing MIN */ + i__4 = l2 + 1; + sumr = _starpu_ddot_(&i__2, &c__[k1 + min(i__3, *n)* c_dim1], ldc, + &b[l1 + min(i__4, *n)* b_dim1], ldb); + vec[0] = sgn * (c__[k1 + l1 * c_dim1] - (suml + sgn * + sumr)); + + i__2 = k1 - 1; + suml = _starpu_ddot_(&i__2, &a[k1 * a_dim1 + 1], &c__1, &c__[l2 * + c_dim1 + 1], &c__1); + i__2 = *n - l2; +/* Computing MIN */ + i__3 = l2 + 1; +/* Computing MIN */ + i__4 = l2 + 1; + sumr = _starpu_ddot_(&i__2, &c__[k1 + min(i__3, *n)* c_dim1], ldc, + &b[l2 + min(i__4, *n)* b_dim1], ldb); + vec[1] = sgn * (c__[k1 + l2 * c_dim1] - (suml + sgn * + sumr)); + + d__1 = -sgn * a[k1 + k1 * a_dim1]; + _starpu_dlaln2_(&c_false, &c__2, &c__1, &smin, &c_b26, &b[l1 + l1 + * b_dim1], ldb, &c_b26, &c_b26, vec, &c__2, &d__1, + &c_b30, x, &c__2, &scaloc, &xnorm, &ierr); + if (ierr != 0) { + *info = 1; + } + + if (scaloc != 1.) { + i__2 = *n; + for (j = 1; j <= i__2; ++j) { + _starpu_dscal_(m, &scaloc, &c__[j * c_dim1 + 1], &c__1); +/* L150: */ + } + *scale *= scaloc; + } + c__[k1 + l1 * c_dim1] = x[0]; + c__[k1 + l2 * c_dim1] = x[1]; + + } else if (l1 != l2 && k1 != k2) { + + i__2 = k1 - 1; + suml = _starpu_ddot_(&i__2, &a[k1 * a_dim1 + 1], &c__1, &c__[l1 * + c_dim1 + 1], &c__1); + i__2 = *n - l2; +/* Computing MIN */ + i__3 = l2 + 1; +/* Computing MIN */ + i__4 = l2 + 1; + sumr = _starpu_ddot_(&i__2, &c__[k1 + min(i__3, *n)* c_dim1], ldc, + &b[l1 + min(i__4, *n)* b_dim1], ldb); + vec[0] = c__[k1 + l1 * c_dim1] - (suml + sgn * sumr); + + i__2 = k1 - 1; + suml = _starpu_ddot_(&i__2, &a[k1 * a_dim1 + 1], &c__1, &c__[l2 * + c_dim1 + 1], &c__1); + i__2 = *n - l2; +/* Computing MIN */ + i__3 = l2 + 1; +/* Computing MIN */ + i__4 = l2 + 1; + sumr = _starpu_ddot_(&i__2, &c__[k1 + min(i__3, *n)* c_dim1], ldc, + &b[l2 + min(i__4, *n)* b_dim1], ldb); + vec[2] = c__[k1 + l2 * c_dim1] - (suml + sgn * sumr); + + i__2 = k1 - 1; + suml = _starpu_ddot_(&i__2, &a[k2 * a_dim1 + 1], &c__1, &c__[l1 * + c_dim1 + 1], &c__1); + i__2 = *n - l2; +/* Computing MIN */ + i__3 = l2 + 1; +/* Computing MIN */ + i__4 = l2 + 1; + sumr = _starpu_ddot_(&i__2, &c__[k2 + min(i__3, *n)* c_dim1], ldc, + &b[l1 + min(i__4, *n)* b_dim1], ldb); + vec[1] = c__[k2 + l1 * c_dim1] - (suml + sgn * sumr); + + i__2 = k1 - 1; + suml = _starpu_ddot_(&i__2, &a[k2 * a_dim1 + 1], &c__1, &c__[l2 * + c_dim1 + 1], &c__1); + i__2 = *n - l2; +/* Computing MIN */ + i__3 = l2 + 1; +/* Computing MIN */ + i__4 = l2 + 1; + sumr = _starpu_ddot_(&i__2, &c__[k2 + min(i__3, *n)* c_dim1], ldc, + &b[l2 + min(i__4, *n)* b_dim1], ldb); + vec[3] = c__[k2 + l2 * c_dim1] - (suml + sgn * sumr); + + _starpu_dlasy2_(&c_true, &c_true, isgn, &c__2, &c__2, &a[k1 + k1 * + a_dim1], lda, &b[l1 + l1 * b_dim1], ldb, vec, & + c__2, &scaloc, x, &c__2, &xnorm, &ierr); + if (ierr != 0) { + *info = 1; + } + + if (scaloc != 1.) { + i__2 = *n; + for (j = 1; j <= i__2; ++j) { + _starpu_dscal_(m, &scaloc, &c__[j * c_dim1 + 1], &c__1); +/* L160: */ + } + *scale *= scaloc; + } + c__[k1 + l1 * c_dim1] = x[0]; + c__[k1 + l2 * c_dim1] = x[2]; + c__[k2 + l1 * c_dim1] = x[1]; + c__[k2 + l2 * c_dim1] = x[3]; + } + +L170: + ; + } +L180: + ; + } + + } else if (notrna && ! notrnb) { + +/* Solve A*X + ISGN*X*B' = scale*C. */ + +/* The (K,L)th block of X is determined starting from */ +/* bottom-right corner column by column by */ + +/* A(K,K)*X(K,L) + ISGN*X(K,L)*B(L,L)' = C(K,L) - R(K,L) */ + +/* Where */ +/* M N */ +/* R(K,L) = SUM [A(K,I)*X(I,L)] + ISGN*SUM [X(K,J)*B(L,J)']. */ +/* I=K+1 J=L+1 */ + +/* Start column loop (index = L) */ +/* L1 (L2): column index of the first (last) row of X(K,L) */ + + lnext = *n; + for (l = *n; l >= 1; --l) { + if (l > lnext) { + goto L240; + } + if (l == 1) { + l1 = l; + l2 = l; + } else { + if (b[l + (l - 1) * b_dim1] != 0.) { + l1 = l - 1; + l2 = l; + lnext = l - 2; + } else { + l1 = l; + l2 = l; + lnext = l - 1; + } + } + +/* Start row loop (index = K) */ +/* K1 (K2): row index of the first (last) row of X(K,L) */ + + knext = *m; + for (k = *m; k >= 1; --k) { + if (k > knext) { + goto L230; + } + if (k == 1) { + k1 = k; + k2 = k; + } else { + if (a[k + (k - 1) * a_dim1] != 0.) { + k1 = k - 1; + k2 = k; + knext = k - 2; + } else { + k1 = k; + k2 = k; + knext = k - 1; + } + } + + if (l1 == l2 && k1 == k2) { + i__1 = *m - k1; +/* Computing MIN */ + i__2 = k1 + 1; +/* Computing MIN */ + i__3 = k1 + 1; + suml = _starpu_ddot_(&i__1, &a[k1 + min(i__2, *m)* a_dim1], lda, & + c__[min(i__3, *m)+ l1 * c_dim1], &c__1); + i__1 = *n - l1; +/* Computing MIN */ + i__2 = l1 + 1; +/* Computing MIN */ + i__3 = l1 + 1; + sumr = _starpu_ddot_(&i__1, &c__[k1 + min(i__2, *n)* c_dim1], ldc, + &b[l1 + min(i__3, *n)* b_dim1], ldb); + vec[0] = c__[k1 + l1 * c_dim1] - (suml + sgn * sumr); + scaloc = 1.; + + a11 = a[k1 + k1 * a_dim1] + sgn * b[l1 + l1 * b_dim1]; + da11 = abs(a11); + if (da11 <= smin) { + a11 = smin; + da11 = smin; + *info = 1; + } + db = abs(vec[0]); + if (da11 < 1. && db > 1.) { + if (db > bignum * da11) { + scaloc = 1. / db; + } + } + x[0] = vec[0] * scaloc / a11; + + if (scaloc != 1.) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + _starpu_dscal_(m, &scaloc, &c__[j * c_dim1 + 1], &c__1); +/* L190: */ + } + *scale *= scaloc; + } + c__[k1 + l1 * c_dim1] = x[0]; + + } else if (l1 == l2 && k1 != k2) { + + i__1 = *m - k2; +/* Computing MIN */ + i__2 = k2 + 1; +/* Computing MIN */ + i__3 = k2 + 1; + suml = _starpu_ddot_(&i__1, &a[k1 + min(i__2, *m)* a_dim1], lda, & + c__[min(i__3, *m)+ l1 * c_dim1], &c__1); + i__1 = *n - l2; +/* Computing MIN */ + i__2 = l2 + 1; +/* Computing MIN */ + i__3 = l2 + 1; + sumr = _starpu_ddot_(&i__1, &c__[k1 + min(i__2, *n)* c_dim1], ldc, + &b[l1 + min(i__3, *n)* b_dim1], ldb); + vec[0] = c__[k1 + l1 * c_dim1] - (suml + sgn * sumr); + + i__1 = *m - k2; +/* Computing MIN */ + i__2 = k2 + 1; +/* Computing MIN */ + i__3 = k2 + 1; + suml = _starpu_ddot_(&i__1, &a[k2 + min(i__2, *m)* a_dim1], lda, & + c__[min(i__3, *m)+ l1 * c_dim1], &c__1); + i__1 = *n - l2; +/* Computing MIN */ + i__2 = l2 + 1; +/* Computing MIN */ + i__3 = l2 + 1; + sumr = _starpu_ddot_(&i__1, &c__[k2 + min(i__2, *n)* c_dim1], ldc, + &b[l1 + min(i__3, *n)* b_dim1], ldb); + vec[1] = c__[k2 + l1 * c_dim1] - (suml + sgn * sumr); + + d__1 = -sgn * b[l1 + l1 * b_dim1]; + _starpu_dlaln2_(&c_false, &c__2, &c__1, &smin, &c_b26, &a[k1 + k1 + * a_dim1], lda, &c_b26, &c_b26, vec, &c__2, &d__1, + &c_b30, x, &c__2, &scaloc, &xnorm, &ierr); + if (ierr != 0) { + *info = 1; + } + + if (scaloc != 1.) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + _starpu_dscal_(m, &scaloc, &c__[j * c_dim1 + 1], &c__1); +/* L200: */ + } + *scale *= scaloc; + } + c__[k1 + l1 * c_dim1] = x[0]; + c__[k2 + l1 * c_dim1] = x[1]; + + } else if (l1 != l2 && k1 == k2) { + + i__1 = *m - k1; +/* Computing MIN */ + i__2 = k1 + 1; +/* Computing MIN */ + i__3 = k1 + 1; + suml = _starpu_ddot_(&i__1, &a[k1 + min(i__2, *m)* a_dim1], lda, & + c__[min(i__3, *m)+ l1 * c_dim1], &c__1); + i__1 = *n - l2; +/* Computing MIN */ + i__2 = l2 + 1; +/* Computing MIN */ + i__3 = l2 + 1; + sumr = _starpu_ddot_(&i__1, &c__[k1 + min(i__2, *n)* c_dim1], ldc, + &b[l1 + min(i__3, *n)* b_dim1], ldb); + vec[0] = sgn * (c__[k1 + l1 * c_dim1] - (suml + sgn * + sumr)); + + i__1 = *m - k1; +/* Computing MIN */ + i__2 = k1 + 1; +/* Computing MIN */ + i__3 = k1 + 1; + suml = _starpu_ddot_(&i__1, &a[k1 + min(i__2, *m)* a_dim1], lda, & + c__[min(i__3, *m)+ l2 * c_dim1], &c__1); + i__1 = *n - l2; +/* Computing MIN */ + i__2 = l2 + 1; +/* Computing MIN */ + i__3 = l2 + 1; + sumr = _starpu_ddot_(&i__1, &c__[k1 + min(i__2, *n)* c_dim1], ldc, + &b[l2 + min(i__3, *n)* b_dim1], ldb); + vec[1] = sgn * (c__[k1 + l2 * c_dim1] - (suml + sgn * + sumr)); + + d__1 = -sgn * a[k1 + k1 * a_dim1]; + _starpu_dlaln2_(&c_false, &c__2, &c__1, &smin, &c_b26, &b[l1 + l1 + * b_dim1], ldb, &c_b26, &c_b26, vec, &c__2, &d__1, + &c_b30, x, &c__2, &scaloc, &xnorm, &ierr); + if (ierr != 0) { + *info = 1; + } + + if (scaloc != 1.) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + _starpu_dscal_(m, &scaloc, &c__[j * c_dim1 + 1], &c__1); +/* L210: */ + } + *scale *= scaloc; + } + c__[k1 + l1 * c_dim1] = x[0]; + c__[k1 + l2 * c_dim1] = x[1]; + + } else if (l1 != l2 && k1 != k2) { + + i__1 = *m - k2; +/* Computing MIN */ + i__2 = k2 + 1; +/* Computing MIN */ + i__3 = k2 + 1; + suml = _starpu_ddot_(&i__1, &a[k1 + min(i__2, *m)* a_dim1], lda, & + c__[min(i__3, *m)+ l1 * c_dim1], &c__1); + i__1 = *n - l2; +/* Computing MIN */ + i__2 = l2 + 1; +/* Computing MIN */ + i__3 = l2 + 1; + sumr = _starpu_ddot_(&i__1, &c__[k1 + min(i__2, *n)* c_dim1], ldc, + &b[l1 + min(i__3, *n)* b_dim1], ldb); + vec[0] = c__[k1 + l1 * c_dim1] - (suml + sgn * sumr); + + i__1 = *m - k2; +/* Computing MIN */ + i__2 = k2 + 1; +/* Computing MIN */ + i__3 = k2 + 1; + suml = _starpu_ddot_(&i__1, &a[k1 + min(i__2, *m)* a_dim1], lda, & + c__[min(i__3, *m)+ l2 * c_dim1], &c__1); + i__1 = *n - l2; +/* Computing MIN */ + i__2 = l2 + 1; +/* Computing MIN */ + i__3 = l2 + 1; + sumr = _starpu_ddot_(&i__1, &c__[k1 + min(i__2, *n)* c_dim1], ldc, + &b[l2 + min(i__3, *n)* b_dim1], ldb); + vec[2] = c__[k1 + l2 * c_dim1] - (suml + sgn * sumr); + + i__1 = *m - k2; +/* Computing MIN */ + i__2 = k2 + 1; +/* Computing MIN */ + i__3 = k2 + 1; + suml = _starpu_ddot_(&i__1, &a[k2 + min(i__2, *m)* a_dim1], lda, & + c__[min(i__3, *m)+ l1 * c_dim1], &c__1); + i__1 = *n - l2; +/* Computing MIN */ + i__2 = l2 + 1; +/* Computing MIN */ + i__3 = l2 + 1; + sumr = _starpu_ddot_(&i__1, &c__[k2 + min(i__2, *n)* c_dim1], ldc, + &b[l1 + min(i__3, *n)* b_dim1], ldb); + vec[1] = c__[k2 + l1 * c_dim1] - (suml + sgn * sumr); + + i__1 = *m - k2; +/* Computing MIN */ + i__2 = k2 + 1; +/* Computing MIN */ + i__3 = k2 + 1; + suml = _starpu_ddot_(&i__1, &a[k2 + min(i__2, *m)* a_dim1], lda, & + c__[min(i__3, *m)+ l2 * c_dim1], &c__1); + i__1 = *n - l2; +/* Computing MIN */ + i__2 = l2 + 1; +/* Computing MIN */ + i__3 = l2 + 1; + sumr = _starpu_ddot_(&i__1, &c__[k2 + min(i__2, *n)* c_dim1], ldc, + &b[l2 + min(i__3, *n)* b_dim1], ldb); + vec[3] = c__[k2 + l2 * c_dim1] - (suml + sgn * sumr); + + _starpu_dlasy2_(&c_false, &c_true, isgn, &c__2, &c__2, &a[k1 + k1 + * a_dim1], lda, &b[l1 + l1 * b_dim1], ldb, vec, & + c__2, &scaloc, x, &c__2, &xnorm, &ierr); + if (ierr != 0) { + *info = 1; + } + + if (scaloc != 1.) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + _starpu_dscal_(m, &scaloc, &c__[j * c_dim1 + 1], &c__1); +/* L220: */ + } + *scale *= scaloc; + } + c__[k1 + l1 * c_dim1] = x[0]; + c__[k1 + l2 * c_dim1] = x[2]; + c__[k2 + l1 * c_dim1] = x[1]; + c__[k2 + l2 * c_dim1] = x[3]; + } + +L230: + ; + } +L240: + ; + } + + } + + return 0; + +/* End of DTRSYL */ + +} /* _starpu_dtrsyl_ */ diff --git a/min-dgels/base/SRC/dtrti2.c b/min-dgels/base/SRC/dtrti2.c new file mode 100644 index 0000000..7054787 --- /dev/null +++ b/min-dgels/base/SRC/dtrti2.c @@ -0,0 +1,183 @@ +/* dtrti2.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +/* Subroutine */ int _starpu_dtrti2_(char *uplo, char *diag, integer *n, doublereal * + a, integer *lda, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + + /* Local variables */ + integer j; + doublereal ajj; + extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, + integer *); + extern logical _starpu_lsame_(char *, char *); + logical upper; + extern /* Subroutine */ int _starpu_dtrmv_(char *, char *, char *, integer *, + doublereal *, integer *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); + logical nounit; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DTRTI2 computes the inverse of a real upper or lower triangular */ +/* matrix. */ + +/* This is the Level 2 BLAS version of the algorithm. */ + +/* Arguments */ +/* ========= */ + +/* UPLO (input) CHARACTER*1 */ +/* Specifies whether the matrix A is upper or lower triangular. */ +/* = 'U': Upper triangular */ +/* = 'L': Lower triangular */ + +/* DIAG (input) CHARACTER*1 */ +/* Specifies whether or not the matrix A is unit triangular. */ +/* = 'N': Non-unit triangular */ +/* = 'U': Unit triangular */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the triangular matrix A. If UPLO = 'U', the */ +/* leading n by n upper triangular part of the array A contains */ +/* the upper triangular matrix, and the strictly lower */ +/* triangular part of A is not referenced. If UPLO = 'L', the */ +/* leading n by n lower triangular part of the array A contains */ +/* the lower triangular matrix, and the strictly upper */ +/* triangular part of A is not referenced. If DIAG = 'U', the */ +/* diagonal elements of A are also not referenced and are */ +/* assumed to be 1. */ + +/* On exit, the (triangular) inverse of the original matrix, in */ +/* the same storage format. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,N). */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -k, the k-th argument had an illegal value */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + *info = 0; + upper = _starpu_lsame_(uplo, "U"); + nounit = _starpu_lsame_(diag, "N"); + if (! upper && ! _starpu_lsame_(uplo, "L")) { + *info = -1; + } else if (! nounit && ! _starpu_lsame_(diag, "U")) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DTRTI2", &i__1); + return 0; + } + + if (upper) { + +/* Compute inverse of upper triangular matrix. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (nounit) { + a[j + j * a_dim1] = 1. / a[j + j * a_dim1]; + ajj = -a[j + j * a_dim1]; + } else { + ajj = -1.; + } + +/* Compute elements 1:j-1 of j-th column. */ + + i__2 = j - 1; + _starpu_dtrmv_("Upper", "No transpose", diag, &i__2, &a[a_offset], lda, & + a[j * a_dim1 + 1], &c__1); + i__2 = j - 1; + _starpu_dscal_(&i__2, &ajj, &a[j * a_dim1 + 1], &c__1); +/* L10: */ + } + } else { + +/* Compute inverse of lower triangular matrix. */ + + for (j = *n; j >= 1; --j) { + if (nounit) { + a[j + j * a_dim1] = 1. / a[j + j * a_dim1]; + ajj = -a[j + j * a_dim1]; + } else { + ajj = -1.; + } + if (j < *n) { + +/* Compute elements j+1:n of j-th column. */ + + i__1 = *n - j; + _starpu_dtrmv_("Lower", "No transpose", diag, &i__1, &a[j + 1 + (j + + 1) * a_dim1], lda, &a[j + 1 + j * a_dim1], &c__1); + i__1 = *n - j; + _starpu_dscal_(&i__1, &ajj, &a[j + 1 + j * a_dim1], &c__1); + } +/* L20: */ + } + } + + return 0; + +/* End of DTRTI2 */ + +} /* _starpu_dtrti2_ */ diff --git a/min-dgels/base/SRC/dtrtri.c b/min-dgels/base/SRC/dtrtri.c new file mode 100644 index 0000000..e10db4d --- /dev/null +++ b/min-dgels/base/SRC/dtrtri.c @@ -0,0 +1,242 @@ +/* dtrtri.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; +static integer c__2 = 2; +static doublereal c_b18 = 1.; +static doublereal c_b22 = -1.; + +/* Subroutine */ int _starpu_dtrtri_(char *uplo, char *diag, integer *n, doublereal * + a, integer *lda, integer *info) +{ + /* System generated locals */ + address a__1[2]; + integer a_dim1, a_offset, i__1, i__2[2], i__3, i__4, i__5; + char ch__1[2]; + + /* Builtin functions */ + /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); + + /* Local variables */ + integer j, jb, nb, nn; + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int _starpu_dtrmm_(char *, char *, char *, char *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *), _starpu_dtrsm_( + char *, char *, char *, char *, integer *, integer *, doublereal * +, doublereal *, integer *, doublereal *, integer *); + logical upper; + extern /* Subroutine */ int _starpu_dtrti2_(char *, char *, integer *, doublereal + *, integer *, integer *), _starpu_xerbla_(char *, integer + *); + extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *); + logical nounit; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DTRTRI computes the inverse of a real upper or lower triangular */ +/* matrix A. */ + +/* This is the Level 3 BLAS version of the algorithm. */ + +/* Arguments */ +/* ========= */ + +/* UPLO (input) CHARACTER*1 */ +/* = 'U': A is upper triangular; */ +/* = 'L': A is lower triangular. */ + +/* DIAG (input) CHARACTER*1 */ +/* = 'N': A is non-unit triangular; */ +/* = 'U': A is unit triangular. */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the triangular matrix A. If UPLO = 'U', the */ +/* leading N-by-N upper triangular part of the array A contains */ +/* the upper triangular matrix, and the strictly lower */ +/* triangular part of A is not referenced. If UPLO = 'L', the */ +/* leading N-by-N lower triangular part of the array A contains */ +/* the lower triangular matrix, and the strictly upper */ +/* triangular part of A is not referenced. If DIAG = 'U', the */ +/* diagonal elements of A are also not referenced and are */ +/* assumed to be 1. */ +/* On exit, the (triangular) inverse of the original matrix, in */ +/* the same storage format. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,N). */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > 0: if INFO = i, A(i,i) is exactly zero. The triangular */ +/* matrix is singular and its inverse can not be computed. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + *info = 0; + upper = _starpu_lsame_(uplo, "U"); + nounit = _starpu_lsame_(diag, "N"); + if (! upper && ! _starpu_lsame_(uplo, "L")) { + *info = -1; + } else if (! nounit && ! _starpu_lsame_(diag, "U")) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DTRTRI", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Check for singularity if non-unit. */ + + if (nounit) { + i__1 = *n; + for (*info = 1; *info <= i__1; ++(*info)) { + if (a[*info + *info * a_dim1] == 0.) { + return 0; + } +/* L10: */ + } + *info = 0; + } + +/* Determine the block size for this environment. */ + +/* Writing concatenation */ + i__2[0] = 1, a__1[0] = uplo; + i__2[1] = 1, a__1[1] = diag; + s_cat(ch__1, a__1, i__2, &c__2, (ftnlen)2); + nb = _starpu_ilaenv_(&c__1, "DTRTRI", ch__1, n, &c_n1, &c_n1, &c_n1); + if (nb <= 1 || nb >= *n) { + +/* Use unblocked code */ + + _starpu_dtrti2_(uplo, diag, n, &a[a_offset], lda, info); + } else { + +/* Use blocked code */ + + if (upper) { + +/* Compute inverse of upper triangular matrix */ + + i__1 = *n; + i__3 = nb; + for (j = 1; i__3 < 0 ? j >= i__1 : j <= i__1; j += i__3) { +/* Computing MIN */ + i__4 = nb, i__5 = *n - j + 1; + jb = min(i__4,i__5); + +/* Compute rows 1:j-1 of current block column */ + + i__4 = j - 1; + _starpu_dtrmm_("Left", "Upper", "No transpose", diag, &i__4, &jb, & + c_b18, &a[a_offset], lda, &a[j * a_dim1 + 1], lda); + i__4 = j - 1; + _starpu_dtrsm_("Right", "Upper", "No transpose", diag, &i__4, &jb, & + c_b22, &a[j + j * a_dim1], lda, &a[j * a_dim1 + 1], + lda); + +/* Compute inverse of current diagonal block */ + + _starpu_dtrti2_("Upper", diag, &jb, &a[j + j * a_dim1], lda, info); +/* L20: */ + } + } else { + +/* Compute inverse of lower triangular matrix */ + + nn = (*n - 1) / nb * nb + 1; + i__3 = -nb; + for (j = nn; i__3 < 0 ? j >= 1 : j <= 1; j += i__3) { +/* Computing MIN */ + i__1 = nb, i__4 = *n - j + 1; + jb = min(i__1,i__4); + if (j + jb <= *n) { + +/* Compute rows j+jb:n of current block column */ + + i__1 = *n - j - jb + 1; + _starpu_dtrmm_("Left", "Lower", "No transpose", diag, &i__1, &jb, + &c_b18, &a[j + jb + (j + jb) * a_dim1], lda, &a[j + + jb + j * a_dim1], lda); + i__1 = *n - j - jb + 1; + _starpu_dtrsm_("Right", "Lower", "No transpose", diag, &i__1, &jb, + &c_b22, &a[j + j * a_dim1], lda, &a[j + jb + j * + a_dim1], lda); + } + +/* Compute inverse of current diagonal block */ + + _starpu_dtrti2_("Lower", diag, &jb, &a[j + j * a_dim1], lda, info); +/* L30: */ + } + } + } + + return 0; + +/* End of DTRTRI */ + +} /* _starpu_dtrtri_ */ diff --git a/min-dgels/base/SRC/dtrtrs.c b/min-dgels/base/SRC/dtrtrs.c new file mode 100644 index 0000000..6b41d70 --- /dev/null +++ b/min-dgels/base/SRC/dtrtrs.c @@ -0,0 +1,183 @@ +/* dtrtrs.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static doublereal c_b12 = 1.; + +/* Subroutine */ int _starpu_dtrtrs_(char *uplo, char *trans, char *diag, integer *n, + integer *nrhs, doublereal *a, integer *lda, doublereal *b, integer * + ldb, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1; + + /* Local variables */ + extern logical _starpu_lsame_(char *, char *); + extern /* Subroutine */ int _starpu_dtrsm_(char *, char *, char *, char *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *), _starpu_xerbla_( + char *, integer *); + logical nounit; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DTRTRS solves a triangular system of the form */ + +/* A * X = B or A**T * X = B, */ + +/* where A is a triangular matrix of order N, and B is an N-by-NRHS */ +/* matrix. A check is made to verify that A is nonsingular. */ + +/* Arguments */ +/* ========= */ + +/* UPLO (input) CHARACTER*1 */ +/* = 'U': A is upper triangular; */ +/* = 'L': A is lower triangular. */ + +/* TRANS (input) CHARACTER*1 */ +/* Specifies the form of the system of equations: */ +/* = 'N': A * X = B (No transpose) */ +/* = 'T': A**T * X = B (Transpose) */ +/* = 'C': A**H * X = B (Conjugate transpose = Transpose) */ + +/* DIAG (input) CHARACTER*1 */ +/* = 'N': A is non-unit triangular; */ +/* = 'U': A is unit triangular. */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* NRHS (input) INTEGER */ +/* The number of right hand sides, i.e., the number of columns */ +/* of the matrix B. NRHS >= 0. */ + +/* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ +/* The triangular matrix A. If UPLO = 'U', the leading N-by-N */ +/* upper triangular part of the array A contains the upper */ +/* triangular matrix, and the strictly lower triangular part of */ +/* A is not referenced. If UPLO = 'L', the leading N-by-N lower */ +/* triangular part of the array A contains the lower triangular */ +/* matrix, and the strictly upper triangular part of A is not */ +/* referenced. If DIAG = 'U', the diagonal elements of A are */ +/* also not referenced and are assumed to be 1. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,N). */ + +/* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* On entry, the right hand side matrix B. */ +/* On exit, if INFO = 0, the solution matrix X. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the array B. LDB >= max(1,N). */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > 0: if INFO = i, the i-th diagonal element of A is zero, */ +/* indicating that the matrix is singular and the solutions */ +/* X have not been computed. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + *info = 0; + nounit = _starpu_lsame_(diag, "N"); + if (! _starpu_lsame_(uplo, "U") && ! _starpu_lsame_(uplo, "L")) { + *info = -1; + } else if (! _starpu_lsame_(trans, "N") && ! _starpu_lsame_(trans, + "T") && ! _starpu_lsame_(trans, "C")) { + *info = -2; + } else if (! nounit && ! _starpu_lsame_(diag, "U")) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*nrhs < 0) { + *info = -5; + } else if (*lda < max(1,*n)) { + *info = -7; + } else if (*ldb < max(1,*n)) { + *info = -9; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DTRTRS", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Check for singularity. */ + + if (nounit) { + i__1 = *n; + for (*info = 1; *info <= i__1; ++(*info)) { + if (a[*info + *info * a_dim1] == 0.) { + return 0; + } +/* L10: */ + } + } + *info = 0; + +/* Solve A * x = b or A' * x = b. */ + + _starpu_dtrsm_("Left", uplo, trans, diag, n, nrhs, &c_b12, &a[a_offset], lda, &b[ + b_offset], ldb); + + return 0; + +/* End of DTRTRS */ + +} /* _starpu_dtrtrs_ */ diff --git a/min-dgels/base/SRC/dtrttf.c b/min-dgels/base/SRC/dtrttf.c new file mode 100644 index 0000000..2aacca0 --- /dev/null +++ b/min-dgels/base/SRC/dtrttf.c @@ -0,0 +1,489 @@ +/* dtrttf.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dtrttf_(char *transr, char *uplo, integer *n, doublereal + *a, integer *lda, doublereal *arf, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + + /* Local variables */ + integer i__, j, k, l, n1, n2, ij, nt, nx2, np1x2; + logical normaltransr; + extern logical _starpu_lsame_(char *, char *); + logical lower; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + logical nisodd; + + +/* -- LAPACK routine (version 3.2) -- */ + +/* -- Contributed by Fred Gustavson of the IBM Watson Research Center -- */ +/* -- November 2008 -- */ + +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DTRTTF copies a triangular matrix A from standard full format (TR) */ +/* to rectangular full packed format (TF) . */ + +/* Arguments */ +/* ========= */ + +/* TRANSR (input) CHARACTER */ +/* = 'N': ARF in Normal form is wanted; */ +/* = 'T': ARF in Transpose form is wanted. */ + +/* UPLO (input) CHARACTER */ +/* = 'U': Upper triangle of A is stored; */ +/* = 'L': Lower triangle of A is stored. */ + +/* N (input) INTEGER */ +/* The order of the matrix A. N >= 0. */ + +/* A (input) DOUBLE PRECISION array, dimension (LDA,N). */ +/* On entry, the triangular matrix A. If UPLO = 'U', the */ +/* leading N-by-N upper triangular part of the array A contains */ +/* the upper triangular matrix, and the strictly lower */ +/* triangular part of A is not referenced. If UPLO = 'L', the */ +/* leading N-by-N lower triangular part of the array A contains */ +/* the lower triangular matrix, and the strictly upper */ +/* triangular part of A is not referenced. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the matrix A. LDA >= max(1,N). */ + +/* ARF (output) DOUBLE PRECISION array, dimension (NT). */ +/* NT=N*(N+1)/2. On exit, the triangular matrix A in RFP format. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ + +/* Notes */ +/* ===== */ + +/* We first consider Rectangular Full Packed (RFP) Format when N is */ +/* even. We give an example where N = 6. */ + +/* AP is Upper AP is Lower */ + +/* 00 01 02 03 04 05 00 */ +/* 11 12 13 14 15 10 11 */ +/* 22 23 24 25 20 21 22 */ +/* 33 34 35 30 31 32 33 */ +/* 44 45 40 41 42 43 44 */ +/* 55 50 51 52 53 54 55 */ + + +/* Let TRANSR = 'N'. RFP holds AP as follows: */ +/* For UPLO = 'U' the upper trapezoid A(0:5,0:2) consists of the last */ +/* three columns of AP upper. The lower triangle A(4:6,0:2) consists of */ +/* the transpose of the first three columns of AP upper. */ +/* For UPLO = 'L' the lower trapezoid A(1:6,0:2) consists of the first */ +/* three columns of AP lower. The upper triangle A(0:2,0:2) consists of */ +/* the transpose of the last three columns of AP lower. */ +/* This covers the case N even and TRANSR = 'N'. */ + +/* RFP A RFP A */ + +/* 03 04 05 33 43 53 */ +/* 13 14 15 00 44 54 */ +/* 23 24 25 10 11 55 */ +/* 33 34 35 20 21 22 */ +/* 00 44 45 30 31 32 */ +/* 01 11 55 40 41 42 */ +/* 02 12 22 50 51 52 */ + +/* Now let TRANSR = 'T'. RFP A in both UPLO cases is just the */ +/* transpose of RFP A above. One therefore gets: */ + + +/* RFP A RFP A */ + +/* 03 13 23 33 00 01 02 33 00 10 20 30 40 50 */ +/* 04 14 24 34 44 11 12 43 44 11 21 31 41 51 */ +/* 05 15 25 35 45 55 22 53 54 55 22 32 42 52 */ + + +/* We first consider Rectangular Full Packed (RFP) Format when N is */ +/* odd. We give an example where N = 5. */ + +/* AP is Upper AP is Lower */ + +/* 00 01 02 03 04 00 */ +/* 11 12 13 14 10 11 */ +/* 22 23 24 20 21 22 */ +/* 33 34 30 31 32 33 */ +/* 44 40 41 42 43 44 */ + + +/* Let TRANSR = 'N'. RFP holds AP as follows: */ +/* For UPLO = 'U' the upper trapezoid A(0:4,0:2) consists of the last */ +/* three columns of AP upper. The lower triangle A(3:4,0:1) consists of */ +/* the transpose of the first two columns of AP upper. */ +/* For UPLO = 'L' the lower trapezoid A(0:4,0:2) consists of the first */ +/* three columns of AP lower. The upper triangle A(0:1,1:2) consists of */ +/* the transpose of the last two columns of AP lower. */ +/* This covers the case N odd and TRANSR = 'N'. */ + +/* RFP A RFP A */ + +/* 02 03 04 00 33 43 */ +/* 12 13 14 10 11 44 */ +/* 22 23 24 20 21 22 */ +/* 00 33 34 30 31 32 */ +/* 01 11 44 40 41 42 */ + +/* Now let TRANSR = 'T'. RFP A in both UPLO cases is just the */ +/* transpose of RFP A above. One therefore gets: */ + +/* RFP A RFP A */ + +/* 02 12 22 00 01 00 10 20 30 40 50 */ +/* 03 13 23 33 11 33 11 21 31 41 51 */ +/* 04 14 24 34 44 43 44 22 32 42 52 */ + +/* Reference */ +/* ========= */ + +/* ===================================================================== */ + +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda - 1 - 0 + 1; + a_offset = 0 + a_dim1 * 0; + a -= a_offset; + + /* Function Body */ + *info = 0; + normaltransr = _starpu_lsame_(transr, "N"); + lower = _starpu_lsame_(uplo, "L"); + if (! normaltransr && ! _starpu_lsame_(transr, "T")) { + *info = -1; + } else if (! lower && ! _starpu_lsame_(uplo, "U")) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DTRTTF", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n <= 1) { + if (*n == 1) { + arf[0] = a[0]; + } + return 0; + } + +/* Size of array ARF(0:nt-1) */ + + nt = *n * (*n + 1) / 2; + +/* Set N1 and N2 depending on LOWER: for N even N1=N2=K */ + + if (lower) { + n2 = *n / 2; + n1 = *n - n2; + } else { + n1 = *n / 2; + n2 = *n - n1; + } + +/* If N is odd, set NISODD = .TRUE., LDA=N+1 and A is (N+1)--by--K2. */ +/* If N is even, set K = N/2 and NISODD = .FALSE., LDA=N and A is */ +/* N--by--(N+1)/2. */ + + if (*n % 2 == 0) { + k = *n / 2; + nisodd = FALSE_; + if (! lower) { + np1x2 = *n + *n + 2; + } + } else { + nisodd = TRUE_; + if (! lower) { + nx2 = *n + *n; + } + } + + if (nisodd) { + +/* N is odd */ + + if (normaltransr) { + +/* N is odd and TRANSR = 'N' */ + + if (lower) { + +/* N is odd, TRANSR = 'N', and UPLO = 'L' */ + + ij = 0; + i__1 = n2; + for (j = 0; j <= i__1; ++j) { + i__2 = n2 + j; + for (i__ = n1; i__ <= i__2; ++i__) { + arf[ij] = a[n2 + j + i__ * a_dim1]; + ++ij; + } + i__2 = *n - 1; + for (i__ = j; i__ <= i__2; ++i__) { + arf[ij] = a[i__ + j * a_dim1]; + ++ij; + } + } + + } else { + +/* N is odd, TRANSR = 'N', and UPLO = 'U' */ + + ij = nt - *n; + i__1 = n1; + for (j = *n - 1; j >= i__1; --j) { + i__2 = j; + for (i__ = 0; i__ <= i__2; ++i__) { + arf[ij] = a[i__ + j * a_dim1]; + ++ij; + } + i__2 = n1 - 1; + for (l = j - n1; l <= i__2; ++l) { + arf[ij] = a[j - n1 + l * a_dim1]; + ++ij; + } + ij -= nx2; + } + + } + + } else { + +/* N is odd and TRANSR = 'T' */ + + if (lower) { + +/* N is odd, TRANSR = 'T', and UPLO = 'L' */ + + ij = 0; + i__1 = n2 - 1; + for (j = 0; j <= i__1; ++j) { + i__2 = j; + for (i__ = 0; i__ <= i__2; ++i__) { + arf[ij] = a[j + i__ * a_dim1]; + ++ij; + } + i__2 = *n - 1; + for (i__ = n1 + j; i__ <= i__2; ++i__) { + arf[ij] = a[i__ + (n1 + j) * a_dim1]; + ++ij; + } + } + i__1 = *n - 1; + for (j = n2; j <= i__1; ++j) { + i__2 = n1 - 1; + for (i__ = 0; i__ <= i__2; ++i__) { + arf[ij] = a[j + i__ * a_dim1]; + ++ij; + } + } + + } else { + +/* N is odd, TRANSR = 'T', and UPLO = 'U' */ + + ij = 0; + i__1 = n1; + for (j = 0; j <= i__1; ++j) { + i__2 = *n - 1; + for (i__ = n1; i__ <= i__2; ++i__) { + arf[ij] = a[j + i__ * a_dim1]; + ++ij; + } + } + i__1 = n1 - 1; + for (j = 0; j <= i__1; ++j) { + i__2 = j; + for (i__ = 0; i__ <= i__2; ++i__) { + arf[ij] = a[i__ + j * a_dim1]; + ++ij; + } + i__2 = *n - 1; + for (l = n2 + j; l <= i__2; ++l) { + arf[ij] = a[n2 + j + l * a_dim1]; + ++ij; + } + } + + } + + } + + } else { + +/* N is even */ + + if (normaltransr) { + +/* N is even and TRANSR = 'N' */ + + if (lower) { + +/* N is even, TRANSR = 'N', and UPLO = 'L' */ + + ij = 0; + i__1 = k - 1; + for (j = 0; j <= i__1; ++j) { + i__2 = k + j; + for (i__ = k; i__ <= i__2; ++i__) { + arf[ij] = a[k + j + i__ * a_dim1]; + ++ij; + } + i__2 = *n - 1; + for (i__ = j; i__ <= i__2; ++i__) { + arf[ij] = a[i__ + j * a_dim1]; + ++ij; + } + } + + } else { + +/* N is even, TRANSR = 'N', and UPLO = 'U' */ + + ij = nt - *n - 1; + i__1 = k; + for (j = *n - 1; j >= i__1; --j) { + i__2 = j; + for (i__ = 0; i__ <= i__2; ++i__) { + arf[ij] = a[i__ + j * a_dim1]; + ++ij; + } + i__2 = k - 1; + for (l = j - k; l <= i__2; ++l) { + arf[ij] = a[j - k + l * a_dim1]; + ++ij; + } + ij -= np1x2; + } + + } + + } else { + +/* N is even and TRANSR = 'T' */ + + if (lower) { + +/* N is even, TRANSR = 'T', and UPLO = 'L' */ + + ij = 0; + j = k; + i__1 = *n - 1; + for (i__ = k; i__ <= i__1; ++i__) { + arf[ij] = a[i__ + j * a_dim1]; + ++ij; + } + i__1 = k - 2; + for (j = 0; j <= i__1; ++j) { + i__2 = j; + for (i__ = 0; i__ <= i__2; ++i__) { + arf[ij] = a[j + i__ * a_dim1]; + ++ij; + } + i__2 = *n - 1; + for (i__ = k + 1 + j; i__ <= i__2; ++i__) { + arf[ij] = a[i__ + (k + 1 + j) * a_dim1]; + ++ij; + } + } + i__1 = *n - 1; + for (j = k - 1; j <= i__1; ++j) { + i__2 = k - 1; + for (i__ = 0; i__ <= i__2; ++i__) { + arf[ij] = a[j + i__ * a_dim1]; + ++ij; + } + } + + } else { + +/* N is even, TRANSR = 'T', and UPLO = 'U' */ + + ij = 0; + i__1 = k; + for (j = 0; j <= i__1; ++j) { + i__2 = *n - 1; + for (i__ = k; i__ <= i__2; ++i__) { + arf[ij] = a[j + i__ * a_dim1]; + ++ij; + } + } + i__1 = k - 2; + for (j = 0; j <= i__1; ++j) { + i__2 = j; + for (i__ = 0; i__ <= i__2; ++i__) { + arf[ij] = a[i__ + j * a_dim1]; + ++ij; + } + i__2 = *n - 1; + for (l = k + 1 + j; l <= i__2; ++l) { + arf[ij] = a[k + 1 + j + l * a_dim1]; + ++ij; + } + } +/* Note that here, on exit of the loop, J = K-1 */ + i__1 = j; + for (i__ = 0; i__ <= i__1; ++i__) { + arf[ij] = a[i__ + j * a_dim1]; + ++ij; + } + + } + + } + + } + + return 0; + +/* End of DTRTTF */ + +} /* _starpu_dtrttf_ */ diff --git a/min-dgels/base/SRC/dtrttp.c b/min-dgels/base/SRC/dtrttp.c new file mode 100644 index 0000000..70e389b --- /dev/null +++ b/min-dgels/base/SRC/dtrttp.c @@ -0,0 +1,144 @@ +/* dtrttp.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_dtrttp_(char *uplo, integer *n, doublereal *a, integer * + lda, doublereal *ap, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + + /* Local variables */ + integer i__, j, k; + extern logical _starpu_lsame_(char *, char *); + logical lower; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + + +/* -- LAPACK routine (version 3.2) -- */ +/* -- Contributed by Fred Gustavson of the IBM Watson Research Center -- */ +/* -- and Julien Langou of the Univ. of Colorado Denver -- */ +/* -- November 2008 -- */ + +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DTRTTP copies a triangular matrix A from full format (TR) to standard */ +/* packed format (TP). */ + +/* Arguments */ +/* ========= */ + +/* UPLO (input) CHARACTER */ +/* = 'U': A is upper triangular. */ +/* = 'L': A is lower triangular. */ + +/* N (input) INTEGER */ +/* The order of the matrices AP and A. N >= 0. */ + +/* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On exit, the triangular matrix A. If UPLO = 'U', the leading */ +/* N-by-N upper triangular part of A contains the upper */ +/* triangular part of the matrix A, and the strictly lower */ +/* triangular part of A is not referenced. If UPLO = 'L', the */ +/* leading N-by-N lower triangular part of A contains the lower */ +/* triangular part of the matrix A, and the strictly upper */ +/* triangular part of A is not referenced. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,N). */ + +/* AP (output) DOUBLE PRECISION array, dimension (N*(N+1)/2 */ +/* On exit, the upper or lower triangular matrix A, packed */ +/* columnwise in a linear array. The j-th column of A is stored */ +/* in the array AP as follows: */ +/* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; */ +/* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ap; + + /* Function Body */ + *info = 0; + lower = _starpu_lsame_(uplo, "L"); + if (! lower && ! _starpu_lsame_(uplo, "U")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DTRTTP", &i__1); + return 0; + } + + if (lower) { + k = 0; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + ++k; + ap[k] = a[i__ + j * a_dim1]; + } + } + } else { + k = 0; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + ++k; + ap[k] = a[i__ + j * a_dim1]; + } + } + } + + + return 0; + +/* End of DTRTTP */ + +} /* _starpu_dtrttp_ */ diff --git a/min-dgels/base/SRC/dtzrqf.c b/min-dgels/base/SRC/dtzrqf.c new file mode 100644 index 0000000..3191c13 --- /dev/null +++ b/min-dgels/base/SRC/dtzrqf.c @@ -0,0 +1,221 @@ +/* dtzrqf.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static doublereal c_b8 = 1.; + +/* Subroutine */ int _starpu_dtzrqf_(integer *m, integer *n, doublereal *a, integer * + lda, doublereal *tau, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + doublereal d__1; + + /* Local variables */ + integer i__, k, m1; + extern /* Subroutine */ int _starpu_dger_(integer *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *), _starpu_dgemv_(char *, integer *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *), _starpu_dcopy_(integer *, doublereal *, + integer *, doublereal *, integer *), _starpu_daxpy_(integer *, doublereal + *, doublereal *, integer *, doublereal *, integer *), _starpu_dlarfp_( + integer *, doublereal *, doublereal *, integer *, doublereal *), + _starpu_xerbla_(char *, integer *); + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* This routine is deprecated and has been replaced by routine DTZRZF. */ + +/* DTZRQF reduces the M-by-N ( M<=N ) real upper trapezoidal matrix A */ +/* to upper triangular form by means of orthogonal transformations. */ + +/* The upper trapezoidal matrix A is factored as */ + +/* A = ( R 0 ) * Z, */ + +/* where Z is an N-by-N orthogonal matrix and R is an M-by-M upper */ +/* triangular matrix. */ + +/* Arguments */ +/* ========= */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix A. M >= 0. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix A. N >= M. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the leading M-by-N upper trapezoidal part of the */ +/* array A must contain the matrix to be factorized. */ +/* On exit, the leading M-by-M upper triangular part of A */ +/* contains the upper triangular matrix R, and elements M+1 to */ +/* N of the first M rows of A, with the array TAU, represent the */ +/* orthogonal matrix Z as a product of M elementary reflectors. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,M). */ + +/* TAU (output) DOUBLE PRECISION array, dimension (M) */ +/* The scalar factors of the elementary reflectors. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ + +/* Further Details */ +/* =============== */ + +/* The factorization is obtained by Householder's method. The kth */ +/* transformation matrix, Z( k ), which is used to introduce zeros into */ +/* the ( m - k + 1 )th row of A, is given in the form */ + +/* Z( k ) = ( I 0 ), */ +/* ( 0 T( k ) ) */ + +/* where */ + +/* T( k ) = I - tau*u( k )*u( k )', u( k ) = ( 1 ), */ +/* ( 0 ) */ +/* ( z( k ) ) */ + +/* tau is a scalar and z( k ) is an ( n - m ) element vector. */ +/* tau and z( k ) are chosen to annihilate the elements of the kth row */ +/* of X. */ + +/* The scalar tau is returned in the kth element of TAU and the vector */ +/* u( k ) in the kth row of A, such that the elements of z( k ) are */ +/* in a( k, m + 1 ), ..., a( k, n ). The elements of R are returned in */ +/* the upper triangular part of A. */ + +/* Z is given by */ + +/* Z = Z( 1 ) * Z( 2 ) * ... * Z( m ). */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < *m) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DTZRQF", &i__1); + return 0; + } + +/* Perform the factorization. */ + + if (*m == 0) { + return 0; + } + if (*m == *n) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + tau[i__] = 0.; +/* L10: */ + } + } else { +/* Computing MIN */ + i__1 = *m + 1; + m1 = min(i__1,*n); + for (k = *m; k >= 1; --k) { + +/* Use a Householder reflection to zero the kth row of A. */ +/* First set up the reflection. */ + + i__1 = *n - *m + 1; + _starpu_dlarfp_(&i__1, &a[k + k * a_dim1], &a[k + m1 * a_dim1], lda, &tau[ + k]); + + if (tau[k] != 0. && k > 1) { + +/* We now perform the operation A := A*P( k ). */ + +/* Use the first ( k - 1 ) elements of TAU to store a( k ), */ +/* where a( k ) consists of the first ( k - 1 ) elements of */ +/* the kth column of A. Also let B denote the first */ +/* ( k - 1 ) rows of the last ( n - m ) columns of A. */ + + i__1 = k - 1; + _starpu_dcopy_(&i__1, &a[k * a_dim1 + 1], &c__1, &tau[1], &c__1); + +/* Form w = a( k ) + B*z( k ) in TAU. */ + + i__1 = k - 1; + i__2 = *n - *m; + _starpu_dgemv_("No transpose", &i__1, &i__2, &c_b8, &a[m1 * a_dim1 + + 1], lda, &a[k + m1 * a_dim1], lda, &c_b8, &tau[1], & + c__1); + +/* Now form a( k ) := a( k ) - tau*w */ +/* and B := B - tau*w*z( k )'. */ + + i__1 = k - 1; + d__1 = -tau[k]; + _starpu_daxpy_(&i__1, &d__1, &tau[1], &c__1, &a[k * a_dim1 + 1], & + c__1); + i__1 = k - 1; + i__2 = *n - *m; + d__1 = -tau[k]; + _starpu_dger_(&i__1, &i__2, &d__1, &tau[1], &c__1, &a[k + m1 * a_dim1] +, lda, &a[m1 * a_dim1 + 1], lda); + } +/* L20: */ + } + } + + return 0; + +/* End of DTZRQF */ + +} /* _starpu_dtzrqf_ */ diff --git a/min-dgels/base/SRC/dtzrzf.c b/min-dgels/base/SRC/dtzrzf.c new file mode 100644 index 0000000..38cfe49 --- /dev/null +++ b/min-dgels/base/SRC/dtzrzf.c @@ -0,0 +1,308 @@ +/* dtzrzf.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; +static integer c__3 = 3; +static integer c__2 = 2; + +/* Subroutine */ int _starpu_dtzrzf_(integer *m, integer *n, doublereal *a, integer * + lda, doublereal *tau, doublereal *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + + /* Local variables */ + integer i__, m1, ib, nb, ki, kk, mu, nx, iws, nbmin; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *), _starpu_dlarzb_( + char *, char *, char *, char *, integer *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, integer *); + extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *); + extern /* Subroutine */ int _starpu_dlarzt_(char *, char *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_dlatrz_(integer *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *); + integer ldwork, lwkopt; + logical lquery; + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DTZRZF reduces the M-by-N ( M<=N ) real upper trapezoidal matrix A */ +/* to upper triangular form by means of orthogonal transformations. */ + +/* The upper trapezoidal matrix A is factored as */ + +/* A = ( R 0 ) * Z, */ + +/* where Z is an N-by-N orthogonal matrix and R is an M-by-M upper */ +/* triangular matrix. */ + +/* Arguments */ +/* ========= */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix A. M >= 0. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix A. N >= M. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the leading M-by-N upper trapezoidal part of the */ +/* array A must contain the matrix to be factorized. */ +/* On exit, the leading M-by-M upper triangular part of A */ +/* contains the upper triangular matrix R, and elements M+1 to */ +/* N of the first M rows of A, with the array TAU, represent the */ +/* orthogonal matrix Z as a product of M elementary reflectors. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,M). */ + +/* TAU (output) DOUBLE PRECISION array, dimension (M) */ +/* The scalar factors of the elementary reflectors. */ + +/* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ + +/* LWORK (input) INTEGER */ +/* The dimension of the array WORK. LWORK >= max(1,M). */ +/* For optimum performance LWORK >= M*NB, where NB is */ +/* the optimal blocksize. */ + +/* If LWORK = -1, then a workspace query is assumed; the routine */ +/* only calculates the optimal size of the WORK array, returns */ +/* this value as the first entry of the WORK array, and no error */ +/* message related to LWORK is issued by XERBLA. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ + +/* Further Details */ +/* =============== */ + +/* Based on contributions by */ +/* A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA */ + +/* The factorization is obtained by Householder's method. The kth */ +/* transformation matrix, Z( k ), which is used to introduce zeros into */ +/* the ( m - k + 1 )th row of A, is given in the form */ + +/* Z( k ) = ( I 0 ), */ +/* ( 0 T( k ) ) */ + +/* where */ + +/* T( k ) = I - tau*u( k )*u( k )', u( k ) = ( 1 ), */ +/* ( 0 ) */ +/* ( z( k ) ) */ + +/* tau is a scalar and z( k ) is an ( n - m ) element vector. */ +/* tau and z( k ) are chosen to annihilate the elements of the kth row */ +/* of X. */ + +/* The scalar tau is returned in the kth element of TAU and the vector */ +/* u( k ) in the kth row of A, such that the elements of z( k ) are */ +/* in a( k, m + 1 ), ..., a( k, n ). The elements of R are returned in */ +/* the upper triangular part of A. */ + +/* Z is given by */ + +/* Z = Z( 1 ) * Z( 2 ) * ... * Z( m ). */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < *m) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } + + if (*info == 0) { + if (*m == 0 || *m == *n) { + lwkopt = 1; + } else { + +/* Determine the block size. */ + + nb = _starpu_ilaenv_(&c__1, "DGERQF", " ", m, n, &c_n1, &c_n1); + lwkopt = *m * nb; + } + work[1] = (doublereal) lwkopt; + + if (*lwork < max(1,*m) && ! lquery) { + *info = -7; + } + } + + if (*info != 0) { + i__1 = -(*info); + _starpu_xerbla_("DTZRZF", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m == 0) { + return 0; + } else if (*m == *n) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + tau[i__] = 0.; +/* L10: */ + } + return 0; + } + + nbmin = 2; + nx = 1; + iws = *m; + if (nb > 1 && nb < *m) { + +/* Determine when to cross over from blocked to unblocked code. */ + +/* Computing MAX */ + i__1 = 0, i__2 = _starpu_ilaenv_(&c__3, "DGERQF", " ", m, n, &c_n1, &c_n1); + nx = max(i__1,i__2); + if (nx < *m) { + +/* Determine if workspace is large enough for blocked code. */ + + ldwork = *m; + iws = ldwork * nb; + if (*lwork < iws) { + +/* Not enough workspace to use optimal NB: reduce NB and */ +/* determine the minimum value of NB. */ + + nb = *lwork / ldwork; +/* Computing MAX */ + i__1 = 2, i__2 = _starpu_ilaenv_(&c__2, "DGERQF", " ", m, n, &c_n1, & + c_n1); + nbmin = max(i__1,i__2); + } + } + } + + if (nb >= nbmin && nb < *m && nx < *m) { + +/* Use blocked code initially. */ +/* The last kk rows are handled by the block method. */ + +/* Computing MIN */ + i__1 = *m + 1; + m1 = min(i__1,*n); + ki = (*m - nx - 1) / nb * nb; +/* Computing MIN */ + i__1 = *m, i__2 = ki + nb; + kk = min(i__1,i__2); + + i__1 = *m - kk + 1; + i__2 = -nb; + for (i__ = *m - kk + ki + 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; + i__ += i__2) { +/* Computing MIN */ + i__3 = *m - i__ + 1; + ib = min(i__3,nb); + +/* Compute the TZ factorization of the current block */ +/* A(i:i+ib-1,i:n) */ + + i__3 = *n - i__ + 1; + i__4 = *n - *m; + _starpu_dlatrz_(&ib, &i__3, &i__4, &a[i__ + i__ * a_dim1], lda, &tau[i__], + &work[1]); + if (i__ > 1) { + +/* Form the triangular factor of the block reflector */ +/* H = H(i+ib-1) . . . H(i+1) H(i) */ + + i__3 = *n - *m; + _starpu_dlarzt_("Backward", "Rowwise", &i__3, &ib, &a[i__ + m1 * + a_dim1], lda, &tau[i__], &work[1], &ldwork); + +/* Apply H to A(1:i-1,i:n) from the right */ + + i__3 = i__ - 1; + i__4 = *n - i__ + 1; + i__5 = *n - *m; + _starpu_dlarzb_("Right", "No transpose", "Backward", "Rowwise", &i__3, + &i__4, &ib, &i__5, &a[i__ + m1 * a_dim1], lda, &work[ + 1], &ldwork, &a[i__ * a_dim1 + 1], lda, &work[ib + 1], + &ldwork) + ; + } +/* L20: */ + } + mu = i__ + nb - 1; + } else { + mu = *m; + } + +/* Use unblocked code to factor the last or only block */ + + if (mu > 0) { + i__2 = *n - *m; + _starpu_dlatrz_(&mu, n, &i__2, &a[a_offset], lda, &tau[1], &work[1]); + } + + work[1] = (doublereal) lwkopt; + + return 0; + +/* End of DTZRZF */ + +} /* _starpu_dtzrzf_ */ diff --git a/min-dgels/base/SRC/dzsum1.c b/min-dgels/base/SRC/dzsum1.c new file mode 100644 index 0000000..085de9e --- /dev/null +++ b/min-dgels/base/SRC/dzsum1.c @@ -0,0 +1,114 @@ +/* dzsum1.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +doublereal _starpu_dzsum1_(integer *n, doublecomplex *cx, integer *incx) +{ + /* System generated locals */ + integer i__1, i__2; + doublereal ret_val; + + /* Builtin functions */ + double z_abs(doublecomplex *); + + /* Local variables */ + integer i__, nincx; + doublereal stemp; + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DZSUM1 takes the sum of the absolute values of a complex */ +/* vector and returns a double precision result. */ + +/* Based on DZASUM from the Level 1 BLAS. */ +/* The change is to use the 'genuine' absolute value. */ + +/* Contributed by Nick Higham for use with ZLACON. */ + +/* Arguments */ +/* ========= */ + +/* N (input) INTEGER */ +/* The number of elements in the vector CX. */ + +/* CX (input) COMPLEX*16 array, dimension (N) */ +/* The vector whose elements will be summed. */ + +/* INCX (input) INTEGER */ +/* The spacing between successive values of CX. INCX > 0. */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + --cx; + + /* Function Body */ + ret_val = 0.; + stemp = 0.; + if (*n <= 0) { + return ret_val; + } + if (*incx == 1) { + goto L20; + } + +/* CODE FOR INCREMENT NOT EQUAL TO 1 */ + + nincx = *n * *incx; + i__1 = nincx; + i__2 = *incx; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + +/* NEXT LINE MODIFIED. */ + + stemp += z_abs(&cx[i__]); +/* L10: */ + } + ret_val = stemp; + return ret_val; + +/* CODE FOR INCREMENT EQUAL TO 1 */ + +L20: + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + +/* NEXT LINE MODIFIED. */ + + stemp += z_abs(&cx[i__]); +/* L30: */ + } + ret_val = stemp; + return ret_val; + +/* End of DZSUM1 */ + +} /* _starpu_dzsum1_ */ diff --git a/min-dgels/base/SRC/icmax1.c b/min-dgels/base/SRC/icmax1.c new file mode 100644 index 0000000..d54623b --- /dev/null +++ b/min-dgels/base/SRC/icmax1.c @@ -0,0 +1,127 @@ +/* icmax1.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +integer _starpu_icmax1_(integer *n, complex *cx, integer *incx) +{ + /* System generated locals */ + integer ret_val, i__1; + + /* Builtin functions */ + double c_abs(complex *); + + /* Local variables */ + integer i__, ix; + real smax; + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* ICMAX1 finds the index of the element whose real part has maximum */ +/* absolute value. */ + +/* Based on ICAMAX from Level 1 BLAS. */ +/* The change is to use the 'genuine' absolute value. */ + +/* Contributed by Nick Higham for use with CLACON. */ + +/* Arguments */ +/* ========= */ + +/* N (input) INTEGER */ +/* The number of elements in the vector CX. */ + +/* CX (input) COMPLEX array, dimension (N) */ +/* The vector whose elements will be summed. */ + +/* INCX (input) INTEGER */ +/* The spacing between successive values of CX. INCX >= 1. */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Statement Functions .. */ +/* .. */ +/* .. Statement Function definitions .. */ + +/* NEXT LINE IS THE ONLY MODIFICATION. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + --cx; + + /* Function Body */ + ret_val = 0; + if (*n < 1) { + return ret_val; + } + ret_val = 1; + if (*n == 1) { + return ret_val; + } + if (*incx == 1) { + goto L30; + } + +/* CODE FOR INCREMENT NOT EQUAL TO 1 */ + + ix = 1; + smax = c_abs(&cx[1]); + ix += *incx; + i__1 = *n; + for (i__ = 2; i__ <= i__1; ++i__) { + if (c_abs(&cx[ix]) <= smax) { + goto L10; + } + ret_val = i__; + smax = c_abs(&cx[ix]); +L10: + ix += *incx; +/* L20: */ + } + return ret_val; + +/* CODE FOR INCREMENT EQUAL TO 1 */ + +L30: + smax = c_abs(&cx[1]); + i__1 = *n; + for (i__ = 2; i__ <= i__1; ++i__) { + if (c_abs(&cx[i__]) <= smax) { + goto L40; + } + ret_val = i__; + smax = c_abs(&cx[i__]); +L40: + ; + } + return ret_val; + +/* End of ICMAX1 */ + +} /* _starpu_icmax1_ */ diff --git a/min-dgels/base/SRC/ieeeck.c b/min-dgels/base/SRC/ieeeck.c new file mode 100644 index 0000000..7172d72 --- /dev/null +++ b/min-dgels/base/SRC/ieeeck.c @@ -0,0 +1,166 @@ +/* ieeeck.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +integer _starpu_ieeeck_(integer *ispec, real *zero, real *one) +{ + /* System generated locals */ + integer ret_val; + + /* Local variables */ + real nan1, nan2, nan3, nan4, nan5, nan6, neginf, posinf, negzro, newzro; + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* IEEECK is called from the ILAENV to verify that Infinity and */ +/* possibly NaN arithmetic is safe (i.e. will not trap). */ + +/* Arguments */ +/* ========= */ + +/* ISPEC (input) INTEGER */ +/* Specifies whether to test just for inifinity arithmetic */ +/* or whether to test for infinity and NaN arithmetic. */ +/* = 0: Verify infinity arithmetic only. */ +/* = 1: Verify infinity and NaN arithmetic. */ + +/* ZERO (input) REAL */ +/* Must contain the value 0.0 */ +/* This is passed to prevent the compiler from optimizing */ +/* away this code. */ + +/* ONE (input) REAL */ +/* Must contain the value 1.0 */ +/* This is passed to prevent the compiler from optimizing */ +/* away this code. */ + +/* RETURN VALUE: INTEGER */ +/* = 0: Arithmetic failed to produce the correct answers */ +/* = 1: Arithmetic produced the correct answers */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. Executable Statements .. */ + ret_val = 1; + + posinf = *one / *zero; + if (posinf <= *one) { + ret_val = 0; + return ret_val; + } + + neginf = -(*one) / *zero; + if (neginf >= *zero) { + ret_val = 0; + return ret_val; + } + + negzro = *one / (neginf + *one); + if (negzro != *zero) { + ret_val = 0; + return ret_val; + } + + neginf = *one / negzro; + if (neginf >= *zero) { + ret_val = 0; + return ret_val; + } + + newzro = negzro + *zero; + if (newzro != *zero) { + ret_val = 0; + return ret_val; + } + + posinf = *one / newzro; + if (posinf <= *one) { + ret_val = 0; + return ret_val; + } + + neginf *= posinf; + if (neginf >= *zero) { + ret_val = 0; + return ret_val; + } + + posinf *= posinf; + if (posinf <= *one) { + ret_val = 0; + return ret_val; + } + + + + +/* Return if we were only asked to check infinity arithmetic */ + + if (*ispec == 0) { + return ret_val; + } + + nan1 = posinf + neginf; + + nan2 = posinf / neginf; + + nan3 = posinf / posinf; + + nan4 = posinf * *zero; + + nan5 = neginf * negzro; + + nan6 = nan5 * 0.f; + + if (nan1 == nan1) { + ret_val = 0; + return ret_val; + } + + if (nan2 == nan2) { + ret_val = 0; + return ret_val; + } + + if (nan3 == nan3) { + ret_val = 0; + return ret_val; + } + + if (nan4 == nan4) { + ret_val = 0; + return ret_val; + } + + if (nan5 == nan5) { + ret_val = 0; + return ret_val; + } + + if (nan6 == nan6) { + ret_val = 0; + return ret_val; + } + + return ret_val; +} /* _starpu_ieeeck_ */ diff --git a/min-dgels/base/SRC/ilaclc.c b/min-dgels/base/SRC/ilaclc.c new file mode 100644 index 0000000..947e825 --- /dev/null +++ b/min-dgels/base/SRC/ilaclc.c @@ -0,0 +1,94 @@ +/* ilaclc.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +integer _starpu_ilaclc_(integer *m, integer *n, complex *a, integer *lda) +{ + /* System generated locals */ + integer a_dim1, a_offset, ret_val, i__1, i__2; + + /* Local variables */ + integer i__; + + +/* -- LAPACK auxiliary routine (version 3.2.1) -- */ + +/* -- April 2009 -- */ + +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* ILACLC scans A for its last non-zero column. */ + +/* Arguments */ +/* ========= */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix A. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix A. */ + +/* A (input) COMPLEX array, dimension (LDA,N) */ +/* The m by n matrix A. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,M). */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Quick test for the common case where one corner is non-zero. */ + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + if (*n == 0) { + ret_val = *n; + } else /* if(complicated condition) */ { + i__1 = *n * a_dim1 + 1; + i__2 = *m + *n * a_dim1; + if (a[i__1].r != 0.f || a[i__1].i != 0.f || (a[i__2].r != 0.f || a[ + i__2].i != 0.f)) { + ret_val = *n; + } else { +/* Now scan each column from the end, returning with the first non-zero. */ + for (ret_val = *n; ret_val >= 1; --ret_val) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + ret_val * a_dim1; + if (a[i__2].r != 0.f || a[i__2].i != 0.f) { + return ret_val; + } + } + } + } + } + return ret_val; +} /* _starpu_ilaclc_ */ diff --git a/min-dgels/base/SRC/ilaclr.c b/min-dgels/base/SRC/ilaclr.c new file mode 100644 index 0000000..959e025 --- /dev/null +++ b/min-dgels/base/SRC/ilaclr.c @@ -0,0 +1,96 @@ +/* ilaclr.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +integer _starpu_ilaclr_(integer *m, integer *n, complex *a, integer *lda) +{ + /* System generated locals */ + integer a_dim1, a_offset, ret_val, i__1, i__2; + + /* Local variables */ + integer i__, j; + + +/* -- LAPACK auxiliary routine (version 3.2.1) -- */ + +/* -- April 2009 -- */ + +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* ILACLR scans A for its last non-zero row. */ + +/* Arguments */ +/* ========= */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix A. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix A. */ + +/* A (input) COMPLEX array, dimension (LDA,N) */ +/* The m by n matrix A. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,M). */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Quick test for the common case where one corner is non-zero. */ + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + if (*m == 0) { + ret_val = *m; + } else /* if(complicated condition) */ { + i__1 = *m + a_dim1; + i__2 = *m + *n * a_dim1; + if (a[i__1].r != 0.f || a[i__1].i != 0.f || (a[i__2].r != 0.f || a[ + i__2].i != 0.f)) { + ret_val = *m; + } else { +/* Scan up each column tracking the last zero row seen. */ + ret_val = 0; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + for (i__ = *m; i__ >= 1; --i__) { + i__2 = i__ + j * a_dim1; + if (a[i__2].r != 0.f || a[i__2].i != 0.f) { + break; + } + } + ret_val = max(ret_val,i__); + } + } + } + return ret_val; +} /* _starpu_ilaclr_ */ diff --git a/min-dgels/base/SRC/iladiag.c b/min-dgels/base/SRC/iladiag.c new file mode 100644 index 0000000..932698a --- /dev/null +++ b/min-dgels/base/SRC/iladiag.c @@ -0,0 +1,65 @@ +/* iladiag.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +integer _starpu_iladiag_(char *diag) +{ + /* System generated locals */ + integer ret_val; + + /* Local variables */ + extern logical _starpu_lsame_(char *, char *); + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* October 2008 */ +/* .. Scalar Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* This subroutine translated from a character string specifying if a */ +/* matrix has unit diagonal or not to the relevant BLAST-specified */ +/* integer constant. */ + +/* ILADIAG returns an INTEGER. If ILADIAG < 0, then the input is not a */ +/* character indicating a unit or non-unit diagonal. Otherwise ILADIAG */ +/* returns the constant value corresponding to DIAG. */ + +/* Arguments */ +/* ========= */ +/* DIAG (input) CHARACTER*1 */ +/* = 'N': A is non-unit triangular; */ +/* = 'U': A is unit triangular. */ +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + if (_starpu_lsame_(diag, "N")) { + ret_val = 131; + } else if (_starpu_lsame_(diag, "U")) { + ret_val = 132; + } else { + ret_val = -1; + } + return ret_val; + +/* End of ILADIAG */ + +} /* _starpu_iladiag_ */ diff --git a/min-dgels/base/SRC/iladlc.c b/min-dgels/base/SRC/iladlc.c new file mode 100644 index 0000000..55c605f --- /dev/null +++ b/min-dgels/base/SRC/iladlc.c @@ -0,0 +1,88 @@ +/* iladlc.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +integer _starpu_iladlc_(integer *m, integer *n, doublereal *a, integer *lda) +{ + /* System generated locals */ + integer a_dim1, a_offset, ret_val, i__1; + + /* Local variables */ + integer i__; + + +/* -- LAPACK auxiliary routine (version 3.2.1) -- */ + +/* -- April 2009 -- */ + +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* ILADLC scans A for its last non-zero column. */ + +/* Arguments */ +/* ========= */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix A. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix A. */ + +/* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ +/* The m by n matrix A. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,M). */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Quick test for the common case where one corner is non-zero. */ + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + if (*n == 0) { + ret_val = *n; + } else if (a[*n * a_dim1 + 1] != 0. || a[*m + *n * a_dim1] != 0.) { + ret_val = *n; + } else { +/* Now scan each column from the end, returning with the first non-zero. */ + for (ret_val = *n; ret_val >= 1; --ret_val) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + if (a[i__ + ret_val * a_dim1] != 0.) { + return ret_val; + } + } + } + } + return ret_val; +} /* _starpu_iladlc_ */ diff --git a/min-dgels/base/SRC/iladlr.c b/min-dgels/base/SRC/iladlr.c new file mode 100644 index 0000000..a0fc23e --- /dev/null +++ b/min-dgels/base/SRC/iladlr.c @@ -0,0 +1,90 @@ +/* iladlr.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +integer _starpu_iladlr_(integer *m, integer *n, doublereal *a, integer *lda) +{ + /* System generated locals */ + integer a_dim1, a_offset, ret_val, i__1; + + /* Local variables */ + integer i__, j; + + +/* -- LAPACK auxiliary routine (version 3.2.1) -- */ + +/* -- April 2009 -- */ + +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* ILADLR scans A for its last non-zero row. */ + +/* Arguments */ +/* ========= */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix A. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix A. */ + +/* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ +/* The m by n matrix A. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,M). */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Quick test for the common case where one corner is non-zero. */ + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + if (*m == 0) { + ret_val = *m; + } else if (a[*m + a_dim1] != 0. || a[*m + *n * a_dim1] != 0.) { + ret_val = *m; + } else { +/* Scan up each column tracking the last zero row seen. */ + ret_val = 0; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + for (i__ = *m; i__ >= 1; --i__) { + if (a[i__ + j * a_dim1] != 0.) { + break; + } + } + ret_val = max(ret_val,i__); + } + } + return ret_val; +} /* _starpu_iladlr_ */ diff --git a/min-dgels/base/SRC/ilaenv.c b/min-dgels/base/SRC/ilaenv.c new file mode 100644 index 0000000..9d84120 --- /dev/null +++ b/min-dgels/base/SRC/ilaenv.c @@ -0,0 +1,654 @@ +/* ilaenv.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" +#include "string.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static real c_b163 = 0.f; +static real c_b164 = 1.f; +static integer c__0 = 0; + +integer _starpu_ilaenv_(integer *ispec, char *name__, char *opts, integer *n1, + integer *n2, integer *n3, integer *n4) +{ + /* System generated locals */ + integer ret_val; + + /* Builtin functions */ + /* Subroutine */ int s_copy(char *, char *, ftnlen, ftnlen); + integer s_cmp(char *, char *, ftnlen, ftnlen); + + /* Local variables */ + integer i__; + char c1[1], c2[1], c3[1], c4[1]; + integer ic, nb, iz, nx; + logical cname; + integer nbmin; + logical sname; + extern integer _starpu_ieeeck_(integer *, real *, real *); + char subnam[1]; + extern integer _starpu_iparmq_(integer *, char *, char *, integer *, integer *, + integer *, integer *); + + ftnlen name_len, opts_len; + + name_len = strlen (name__); + opts_len = strlen (opts); + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* January 2007 */ + +/* .. Scalar Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* ILAENV is called from the LAPACK routines to choose problem-dependent */ +/* parameters for the local environment. See ISPEC for a description of */ +/* the parameters. */ + +/* ILAENV returns an INTEGER */ +/* if ILAENV >= 0: ILAENV returns the value of the parameter specified by ISPEC */ +/* if ILAENV < 0: if ILAENV = -k, the k-th argument had an illegal value. */ + +/* This version provides a set of parameters which should give good, */ +/* but not optimal, performance on many of the currently available */ +/* computers. Users are encouraged to modify this subroutine to set */ +/* the tuning parameters for their particular machine using the option */ +/* and problem size information in the arguments. */ + +/* This routine will not function correctly if it is converted to all */ +/* lower case. Converting it to all upper case is allowed. */ + +/* Arguments */ +/* ========= */ + +/* ISPEC (input) INTEGER */ +/* Specifies the parameter to be returned as the value of */ +/* ILAENV. */ +/* = 1: the optimal blocksize; if this value is 1, an unblocked */ +/* algorithm will give the best performance. */ +/* = 2: the minimum block size for which the block routine */ +/* should be used; if the usable block size is less than */ +/* this value, an unblocked routine should be used. */ +/* = 3: the crossover point (in a block routine, for N less */ +/* than this value, an unblocked routine should be used) */ +/* = 4: the number of shifts, used in the nonsymmetric */ +/* eigenvalue routines (DEPRECATED) */ +/* = 5: the minimum column dimension for blocking to be used; */ +/* rectangular blocks must have dimension at least k by m, */ +/* where k is given by ILAENV(2,...) and m by ILAENV(5,...) */ +/* = 6: the crossover point for the SVD (when reducing an m by n */ +/* matrix to bidiagonal form, if max(m,n)/min(m,n) exceeds */ +/* this value, a QR factorization is used first to reduce */ +/* the matrix to a triangular form.) */ +/* = 7: the number of processors */ +/* = 8: the crossover point for the multishift QR method */ +/* for nonsymmetric eigenvalue problems (DEPRECATED) */ +/* = 9: maximum size of the subproblems at the bottom of the */ +/* computation tree in the divide-and-conquer algorithm */ +/* (used by xGELSD and xGESDD) */ +/* =10: ieee NaN arithmetic can be trusted not to trap */ +/* =11: infinity arithmetic can be trusted not to trap */ +/* 12 <= ISPEC <= 16: */ +/* xHSEQR or one of its subroutines, */ +/* see IPARMQ for detailed explanation */ + +/* NAME (input) CHARACTER*(*) */ +/* The name of the calling subroutine, in either upper case or */ +/* lower case. */ + +/* OPTS (input) CHARACTER*(*) */ +/* The character options to the subroutine NAME, concatenated */ +/* into a single character string. For example, UPLO = 'U', */ +/* TRANS = 'T', and DIAG = 'N' for a triangular routine would */ +/* be specified as OPTS = 'UTN'. */ + +/* N1 (input) INTEGER */ +/* N2 (input) INTEGER */ +/* N3 (input) INTEGER */ +/* N4 (input) INTEGER */ +/* Problem dimensions for the subroutine NAME; these may not all */ +/* be required. */ + +/* Further Details */ +/* =============== */ + +/* The following conventions have been used when calling ILAENV from the */ +/* LAPACK routines: */ +/* 1) OPTS is a concatenation of all of the character options to */ +/* subroutine NAME, in the same order that they appear in the */ +/* argument list for NAME, even if they are not used in determining */ +/* the value of the parameter specified by ISPEC. */ +/* 2) The problem dimensions N1, N2, N3, N4 are specified in the order */ +/* that they appear in the argument list for NAME. N1 is used */ +/* first, N2 second, and so on, and unused problem dimensions are */ +/* passed a value of -1. */ +/* 3) The parameter value returned by ILAENV is checked for validity in */ +/* the calling subroutine. For example, ILAENV is used to retrieve */ +/* the optimal blocksize for STRTRI as follows: */ + +/* NB = ILAENV( 1, 'STRTRI', UPLO // DIAG, N, -1, -1, -1 ) */ +/* IF( NB.LE.1 ) NB = MAX( 1, N ) */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + switch (*ispec) { + case 1: goto L10; + case 2: goto L10; + case 3: goto L10; + case 4: goto L80; + case 5: goto L90; + case 6: goto L100; + case 7: goto L110; + case 8: goto L120; + case 9: goto L130; + case 10: goto L140; + case 11: goto L150; + case 12: goto L160; + case 13: goto L160; + case 14: goto L160; + case 15: goto L160; + case 16: goto L160; + } + +/* Invalid value for ISPEC */ + + ret_val = -1; + return ret_val; + +L10: + +/* Convert NAME to upper case if the first character is lower case. */ + + ret_val = 1; + s_copy(subnam, name__, (ftnlen)1, name_len); + ic = *(unsigned char *)subnam; + iz = 'Z'; + if (iz == 90 || iz == 122) { + +/* ASCII character set */ + + if (ic >= 97 && ic <= 122) { + *(unsigned char *)subnam = (char) (ic - 32); + for (i__ = 2; i__ <= 6; ++i__) { + ic = *(unsigned char *)&subnam[i__ - 1]; + if (ic >= 97 && ic <= 122) { + *(unsigned char *)&subnam[i__ - 1] = (char) (ic - 32); + } +/* L20: */ + } + } + + } else if (iz == 233 || iz == 169) { + +/* EBCDIC character set */ + + if (ic >= 129 && ic <= 137 || ic >= 145 && ic <= 153 || ic >= 162 && + ic <= 169) { + *(unsigned char *)subnam = (char) (ic + 64); + for (i__ = 2; i__ <= 6; ++i__) { + ic = *(unsigned char *)&subnam[i__ - 1]; + if (ic >= 129 && ic <= 137 || ic >= 145 && ic <= 153 || ic >= + 162 && ic <= 169) { + *(unsigned char *)&subnam[i__ - 1] = (char) (ic + 64); + } +/* L30: */ + } + } + + } else if (iz == 218 || iz == 250) { + +/* Prime machines: ASCII+128 */ + + if (ic >= 225 && ic <= 250) { + *(unsigned char *)subnam = (char) (ic - 32); + for (i__ = 2; i__ <= 6; ++i__) { + ic = *(unsigned char *)&subnam[i__ - 1]; + if (ic >= 225 && ic <= 250) { + *(unsigned char *)&subnam[i__ - 1] = (char) (ic - 32); + } +/* L40: */ + } + } + } + + *(unsigned char *)c1 = *(unsigned char *)subnam; + sname = *(unsigned char *)c1 == 'S' || *(unsigned char *)c1 == 'D'; + cname = *(unsigned char *)c1 == 'C' || *(unsigned char *)c1 == 'Z'; + if (! (cname || sname)) { + return ret_val; + } + s_copy(c2, subnam + 1, (ftnlen)1, (ftnlen)2); + s_copy(c3, subnam + 3, (ftnlen)1, (ftnlen)3); + s_copy(c4, c3 + 1, (ftnlen)1, (ftnlen)2); + + switch (*ispec) { + case 1: goto L50; + case 2: goto L60; + case 3: goto L70; + } + +L50: + +/* ISPEC = 1: block size */ + +/* In these examples, separate code is provided for setting NB for */ +/* real and complex. We assume that NB will take the same value in */ +/* single or double precision. */ + + nb = 1; + + if (s_cmp(c2, "GE", (ftnlen)1, (ftnlen)2) == 0) { + if (s_cmp(c3, "TRF", (ftnlen)1, (ftnlen)3) == 0) { + if (sname) { + nb = 64; + } else { + nb = 64; + } + } else if (s_cmp(c3, "QRF", (ftnlen)1, (ftnlen)3) == 0 || s_cmp(c3, + "RQF", (ftnlen)1, (ftnlen)3) == 0 || s_cmp(c3, "LQF", (ftnlen) + 1, (ftnlen)3) == 0 || s_cmp(c3, "QLF", (ftnlen)1, (ftnlen)3) + == 0) { + if (sname) { + nb = 32; + } else { + nb = 32; + } + } else if (s_cmp(c3, "HRD", (ftnlen)1, (ftnlen)3) == 0) { + if (sname) { + nb = 32; + } else { + nb = 32; + } + } else if (s_cmp(c3, "BRD", (ftnlen)1, (ftnlen)3) == 0) { + if (sname) { + nb = 32; + } else { + nb = 32; + } + } else if (s_cmp(c3, "TRI", (ftnlen)1, (ftnlen)3) == 0) { + if (sname) { + nb = 64; + } else { + nb = 64; + } + } + } else if (s_cmp(c2, "PO", (ftnlen)1, (ftnlen)2) == 0) { + if (s_cmp(c3, "TRF", (ftnlen)1, (ftnlen)3) == 0) { + if (sname) { + nb = 64; + } else { + nb = 64; + } + } + } else if (s_cmp(c2, "SY", (ftnlen)1, (ftnlen)2) == 0) { + if (s_cmp(c3, "TRF", (ftnlen)1, (ftnlen)3) == 0) { + if (sname) { + nb = 64; + } else { + nb = 64; + } + } else if (sname && s_cmp(c3, "TRD", (ftnlen)1, (ftnlen)3) == 0) { + nb = 32; + } else if (sname && s_cmp(c3, "GST", (ftnlen)1, (ftnlen)3) == 0) { + nb = 64; + } + } else if (cname && s_cmp(c2, "HE", (ftnlen)1, (ftnlen)2) == 0) { + if (s_cmp(c3, "TRF", (ftnlen)1, (ftnlen)3) == 0) { + nb = 64; + } else if (s_cmp(c3, "TRD", (ftnlen)1, (ftnlen)3) == 0) { + nb = 32; + } else if (s_cmp(c3, "GST", (ftnlen)1, (ftnlen)3) == 0) { + nb = 64; + } + } else if (sname && s_cmp(c2, "OR", (ftnlen)1, (ftnlen)2) == 0) { + if (*(unsigned char *)c3 == 'G') { + if (s_cmp(c4, "QR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "RQ", + (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)1, ( + ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)1, (ftnlen)2) == + 0 || s_cmp(c4, "HR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp( + c4, "TR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( + ftnlen)1, (ftnlen)2) == 0) { + nb = 32; + } + } else if (*(unsigned char *)c3 == 'M') { + if (s_cmp(c4, "QR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "RQ", + (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)1, ( + ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)1, (ftnlen)2) == + 0 || s_cmp(c4, "HR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp( + c4, "TR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( + ftnlen)1, (ftnlen)2) == 0) { + nb = 32; + } + } + } else if (cname && s_cmp(c2, "UN", (ftnlen)1, (ftnlen)2) == 0) { + if (*(unsigned char *)c3 == 'G') { + if (s_cmp(c4, "QR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "RQ", + (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)1, ( + ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)1, (ftnlen)2) == + 0 || s_cmp(c4, "HR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp( + c4, "TR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( + ftnlen)1, (ftnlen)2) == 0) { + nb = 32; + } + } else if (*(unsigned char *)c3 == 'M') { + if (s_cmp(c4, "QR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "RQ", + (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)1, ( + ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)1, (ftnlen)2) == + 0 || s_cmp(c4, "HR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp( + c4, "TR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( + ftnlen)1, (ftnlen)2) == 0) { + nb = 32; + } + } + } else if (s_cmp(c2, "GB", (ftnlen)1, (ftnlen)2) == 0) { + if (s_cmp(c3, "TRF", (ftnlen)1, (ftnlen)3) == 0) { + if (sname) { + if (*n4 <= 64) { + nb = 1; + } else { + nb = 32; + } + } else { + if (*n4 <= 64) { + nb = 1; + } else { + nb = 32; + } + } + } + } else if (s_cmp(c2, "PB", (ftnlen)1, (ftnlen)2) == 0) { + if (s_cmp(c3, "TRF", (ftnlen)1, (ftnlen)3) == 0) { + if (sname) { + if (*n2 <= 64) { + nb = 1; + } else { + nb = 32; + } + } else { + if (*n2 <= 64) { + nb = 1; + } else { + nb = 32; + } + } + } + } else if (s_cmp(c2, "TR", (ftnlen)1, (ftnlen)2) == 0) { + if (s_cmp(c3, "TRI", (ftnlen)1, (ftnlen)3) == 0) { + if (sname) { + nb = 64; + } else { + nb = 64; + } + } + } else if (s_cmp(c2, "LA", (ftnlen)1, (ftnlen)2) == 0) { + if (s_cmp(c3, "UUM", (ftnlen)1, (ftnlen)3) == 0) { + if (sname) { + nb = 64; + } else { + nb = 64; + } + } + } else if (sname && s_cmp(c2, "ST", (ftnlen)1, (ftnlen)2) == 0) { + if (s_cmp(c3, "EBZ", (ftnlen)1, (ftnlen)3) == 0) { + nb = 1; + } + } + ret_val = nb; + return ret_val; + +L60: + +/* ISPEC = 2: minimum block size */ + + nbmin = 2; + if (s_cmp(c2, "GE", (ftnlen)1, (ftnlen)2) == 0) { + if (s_cmp(c3, "QRF", (ftnlen)1, (ftnlen)3) == 0 || s_cmp(c3, "RQF", ( + ftnlen)1, (ftnlen)3) == 0 || s_cmp(c3, "LQF", (ftnlen)1, ( + ftnlen)3) == 0 || s_cmp(c3, "QLF", (ftnlen)1, (ftnlen)3) == 0) + { + if (sname) { + nbmin = 2; + } else { + nbmin = 2; + } + } else if (s_cmp(c3, "HRD", (ftnlen)1, (ftnlen)3) == 0) { + if (sname) { + nbmin = 2; + } else { + nbmin = 2; + } + } else if (s_cmp(c3, "BRD", (ftnlen)1, (ftnlen)3) == 0) { + if (sname) { + nbmin = 2; + } else { + nbmin = 2; + } + } else if (s_cmp(c3, "TRI", (ftnlen)1, (ftnlen)3) == 0) { + if (sname) { + nbmin = 2; + } else { + nbmin = 2; + } + } + } else if (s_cmp(c2, "SY", (ftnlen)1, (ftnlen)2) == 0) { + if (s_cmp(c3, "TRF", (ftnlen)1, (ftnlen)3) == 0) { + if (sname) { + nbmin = 8; + } else { + nbmin = 8; + } + } else if (sname && s_cmp(c3, "TRD", (ftnlen)1, (ftnlen)3) == 0) { + nbmin = 2; + } + } else if (cname && s_cmp(c2, "HE", (ftnlen)1, (ftnlen)2) == 0) { + if (s_cmp(c3, "TRD", (ftnlen)1, (ftnlen)3) == 0) { + nbmin = 2; + } + } else if (sname && s_cmp(c2, "OR", (ftnlen)1, (ftnlen)2) == 0) { + if (*(unsigned char *)c3 == 'G') { + if (s_cmp(c4, "QR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "RQ", + (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)1, ( + ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)1, (ftnlen)2) == + 0 || s_cmp(c4, "HR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp( + c4, "TR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( + ftnlen)1, (ftnlen)2) == 0) { + nbmin = 2; + } + } else if (*(unsigned char *)c3 == 'M') { + if (s_cmp(c4, "QR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "RQ", + (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)1, ( + ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)1, (ftnlen)2) == + 0 || s_cmp(c4, "HR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp( + c4, "TR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( + ftnlen)1, (ftnlen)2) == 0) { + nbmin = 2; + } + } + } else if (cname && s_cmp(c2, "UN", (ftnlen)1, (ftnlen)2) == 0) { + if (*(unsigned char *)c3 == 'G') { + if (s_cmp(c4, "QR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "RQ", + (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)1, ( + ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)1, (ftnlen)2) == + 0 || s_cmp(c4, "HR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp( + c4, "TR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( + ftnlen)1, (ftnlen)2) == 0) { + nbmin = 2; + } + } else if (*(unsigned char *)c3 == 'M') { + if (s_cmp(c4, "QR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "RQ", + (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)1, ( + ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)1, (ftnlen)2) == + 0 || s_cmp(c4, "HR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp( + c4, "TR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( + ftnlen)1, (ftnlen)2) == 0) { + nbmin = 2; + } + } + } + ret_val = nbmin; + return ret_val; + +L70: + +/* ISPEC = 3: crossover point */ + + nx = 0; + if (s_cmp(c2, "GE", (ftnlen)1, (ftnlen)2) == 0) { + if (s_cmp(c3, "QRF", (ftnlen)1, (ftnlen)3) == 0 || s_cmp(c3, "RQF", ( + ftnlen)1, (ftnlen)3) == 0 || s_cmp(c3, "LQF", (ftnlen)1, ( + ftnlen)3) == 0 || s_cmp(c3, "QLF", (ftnlen)1, (ftnlen)3) == 0) + { + if (sname) { + nx = 128; + } else { + nx = 128; + } + } else if (s_cmp(c3, "HRD", (ftnlen)1, (ftnlen)3) == 0) { + if (sname) { + nx = 128; + } else { + nx = 128; + } + } else if (s_cmp(c3, "BRD", (ftnlen)1, (ftnlen)3) == 0) { + if (sname) { + nx = 128; + } else { + nx = 128; + } + } + } else if (s_cmp(c2, "SY", (ftnlen)1, (ftnlen)2) == 0) { + if (sname && s_cmp(c3, "TRD", (ftnlen)1, (ftnlen)3) == 0) { + nx = 32; + } + } else if (cname && s_cmp(c2, "HE", (ftnlen)1, (ftnlen)2) == 0) { + if (s_cmp(c3, "TRD", (ftnlen)1, (ftnlen)3) == 0) { + nx = 32; + } + } else if (sname && s_cmp(c2, "OR", (ftnlen)1, (ftnlen)2) == 0) { + if (*(unsigned char *)c3 == 'G') { + if (s_cmp(c4, "QR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "RQ", + (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)1, ( + ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)1, (ftnlen)2) == + 0 || s_cmp(c4, "HR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp( + c4, "TR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( + ftnlen)1, (ftnlen)2) == 0) { + nx = 128; + } + } + } else if (cname && s_cmp(c2, "UN", (ftnlen)1, (ftnlen)2) == 0) { + if (*(unsigned char *)c3 == 'G') { + if (s_cmp(c4, "QR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "RQ", + (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)1, ( + ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)1, (ftnlen)2) == + 0 || s_cmp(c4, "HR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp( + c4, "TR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( + ftnlen)1, (ftnlen)2) == 0) { + nx = 128; + } + } + } + ret_val = nx; + return ret_val; + +L80: + +/* ISPEC = 4: number of shifts (used by xHSEQR) */ + + ret_val = 6; + return ret_val; + +L90: + +/* ISPEC = 5: minimum column dimension (not used) */ + + ret_val = 2; + return ret_val; + +L100: + +/* ISPEC = 6: crossover point for SVD (used by xGELSS and xGESVD) */ + + ret_val = (integer) ((real) min(*n1,*n2) * 1.6f); + return ret_val; + +L110: + +/* ISPEC = 7: number of processors (not used) */ + + ret_val = 1; + return ret_val; + +L120: + +/* ISPEC = 8: crossover point for multishift (used by xHSEQR) */ + + ret_val = 50; + return ret_val; + +L130: + +/* ISPEC = 9: maximum size of the subproblems at the bottom of the */ +/* computation tree in the divide-and-conquer algorithm */ +/* (used by xGELSD and xGESDD) */ + + ret_val = 25; + return ret_val; + +L140: + +/* ISPEC = 10: ieee NaN arithmetic can be trusted not to trap */ + +/* ILAENV = 0 */ + ret_val = 1; + if (ret_val == 1) { + ret_val = _starpu_ieeeck_(&c__1, &c_b163, &c_b164); + } + return ret_val; + +L150: + +/* ISPEC = 11: infinity arithmetic can be trusted not to trap */ + +/* ILAENV = 0 */ + ret_val = 1; + if (ret_val == 1) { + ret_val = _starpu_ieeeck_(&c__0, &c_b163, &c_b164); + } + return ret_val; + +L160: + +/* 12 <= ISPEC <= 16: xHSEQR or one of its subroutines. */ + + ret_val = _starpu_iparmq_(ispec, name__, opts, n1, n2, n3, n4) + ; + return ret_val; + +/* End of ILAENV */ + +} /* _starpu_ilaenv_ */ diff --git a/min-dgels/base/SRC/ilaprec.c b/min-dgels/base/SRC/ilaprec.c new file mode 100644 index 0000000..9c72485 --- /dev/null +++ b/min-dgels/base/SRC/ilaprec.c @@ -0,0 +1,72 @@ +/* ilaprec.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +integer _starpu_ilaprec_(char *prec) +{ + /* System generated locals */ + integer ret_val; + + /* Local variables */ + extern logical _starpu_lsame_(char *, char *); + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* October 2008 */ +/* .. Scalar Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* This subroutine translated from a character string specifying an */ +/* intermediate precision to the relevant BLAST-specified integer */ +/* constant. */ + +/* ILAPREC returns an INTEGER. If ILAPREC < 0, then the input is not a */ +/* character indicating a supported intermediate precision. Otherwise */ +/* ILAPREC returns the constant value corresponding to PREC. */ + +/* Arguments */ +/* ========= */ +/* PREC (input) CHARACTER*1 */ +/* Specifies the form of the system of equations: */ +/* = 'S': Single */ +/* = 'D': Double */ +/* = 'I': Indigenous */ +/* = 'X', 'E': Extra */ +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + if (_starpu_lsame_(prec, "S")) { + ret_val = 211; + } else if (_starpu_lsame_(prec, "D")) { + ret_val = 212; + } else if (_starpu_lsame_(prec, "I")) { + ret_val = 213; + } else if (_starpu_lsame_(prec, "X") || _starpu_lsame_(prec, "E")) { + ret_val = 214; + } else { + ret_val = -1; + } + return ret_val; + +/* End of ILAPREC */ + +} /* _starpu_ilaprec_ */ diff --git a/min-dgels/base/SRC/ilaslc.c b/min-dgels/base/SRC/ilaslc.c new file mode 100644 index 0000000..48feb40 --- /dev/null +++ b/min-dgels/base/SRC/ilaslc.c @@ -0,0 +1,88 @@ +/* ilaslc.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +integer _starpu_ilaslc_(integer *m, integer *n, real *a, integer *lda) +{ + /* System generated locals */ + integer a_dim1, a_offset, ret_val, i__1; + + /* Local variables */ + integer i__; + + +/* -- LAPACK auxiliary routine (version 3.2.1) -- */ + +/* -- April 2009 -- */ + +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* ILASLC scans A for its last non-zero column. */ + +/* Arguments */ +/* ========= */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix A. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix A. */ + +/* A (input) REAL array, dimension (LDA,N) */ +/* The m by n matrix A. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,M). */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Quick test for the common case where one corner is non-zero. */ + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + if (*n == 0) { + ret_val = *n; + } else if (a[*n * a_dim1 + 1] != 0.f || a[*m + *n * a_dim1] != 0.f) { + ret_val = *n; + } else { +/* Now scan each column from the end, returning with the first non-zero. */ + for (ret_val = *n; ret_val >= 1; --ret_val) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + if (a[i__ + ret_val * a_dim1] != 0.f) { + return ret_val; + } + } + } + } + return ret_val; +} /* _starpu_ilaslc_ */ diff --git a/min-dgels/base/SRC/ilaslr.c b/min-dgels/base/SRC/ilaslr.c new file mode 100644 index 0000000..3f47d1c --- /dev/null +++ b/min-dgels/base/SRC/ilaslr.c @@ -0,0 +1,90 @@ +/* ilaslr.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +integer _starpu_ilaslr_(integer *m, integer *n, real *a, integer *lda) +{ + /* System generated locals */ + integer a_dim1, a_offset, ret_val, i__1; + + /* Local variables */ + integer i__, j; + + +/* -- LAPACK auxiliary routine (version 3.2.1) -- */ + +/* -- April 2009 -- */ + +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* ILASLR scans A for its last non-zero row. */ + +/* Arguments */ +/* ========= */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix A. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix A. */ + +/* A (input) REAL array, dimension (LDA,N) */ +/* The m by n matrix A. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,M). */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Quick test for the common case where one corner is non-zero. */ + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + if (*m == 0) { + ret_val = *m; + } else if (a[*m + a_dim1] != 0.f || a[*m + *n * a_dim1] != 0.f) { + ret_val = *m; + } else { +/* Scan up each column tracking the last zero row seen. */ + ret_val = 0; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + for (i__ = *m; i__ >= 1; --i__) { + if (a[i__ + j * a_dim1] != 0.f) { + break; + } + } + ret_val = max(ret_val,i__); + } + } + return ret_val; +} /* _starpu_ilaslr_ */ diff --git a/min-dgels/base/SRC/ilatrans.c b/min-dgels/base/SRC/ilatrans.c new file mode 100644 index 0000000..b41a09e --- /dev/null +++ b/min-dgels/base/SRC/ilatrans.c @@ -0,0 +1,69 @@ +/* ilatrans.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +integer _starpu_ilatrans_(char *trans) +{ + /* System generated locals */ + integer ret_val; + + /* Local variables */ + extern logical _starpu_lsame_(char *, char *); + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* October 2008 */ +/* .. Scalar Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* This subroutine translates from a character string specifying a */ +/* transposition operation to the relevant BLAST-specified integer */ +/* constant. */ + +/* ILATRANS returns an INTEGER. If ILATRANS < 0, then the input is not */ +/* a character indicating a transposition operator. Otherwise ILATRANS */ +/* returns the constant value corresponding to TRANS. */ + +/* Arguments */ +/* ========= */ +/* TRANS (input) CHARACTER*1 */ +/* Specifies the form of the system of equations: */ +/* = 'N': No transpose */ +/* = 'T': Transpose */ +/* = 'C': Conjugate transpose */ +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + if (_starpu_lsame_(trans, "N")) { + ret_val = 111; + } else if (_starpu_lsame_(trans, "T")) { + ret_val = 112; + } else if (_starpu_lsame_(trans, "C")) { + ret_val = 113; + } else { + ret_val = -1; + } + return ret_val; + +/* End of ILATRANS */ + +} /* _starpu_ilatrans_ */ diff --git a/min-dgels/base/SRC/ilauplo.c b/min-dgels/base/SRC/ilauplo.c new file mode 100644 index 0000000..c5f0df4 --- /dev/null +++ b/min-dgels/base/SRC/ilauplo.c @@ -0,0 +1,65 @@ +/* ilauplo.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +integer _starpu_ilauplo_(char *uplo) +{ + /* System generated locals */ + integer ret_val; + + /* Local variables */ + extern logical _starpu_lsame_(char *, char *); + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* October 2008 */ +/* .. Scalar Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* This subroutine translated from a character string specifying a */ +/* upper- or lower-triangular matrix to the relevant BLAST-specified */ +/* integer constant. */ + +/* ILAUPLO returns an INTEGER. If ILAUPLO < 0, then the input is not */ +/* a character indicating an upper- or lower-triangular matrix. */ +/* Otherwise ILAUPLO returns the constant value corresponding to UPLO. */ + +/* Arguments */ +/* ========= */ +/* UPLO (input) CHARACTER */ +/* = 'U': A is upper triangular; */ +/* = 'L': A is lower triangular. */ +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + if (_starpu_lsame_(uplo, "U")) { + ret_val = 121; + } else if (_starpu_lsame_(uplo, "L")) { + ret_val = 122; + } else { + ret_val = -1; + } + return ret_val; + +/* End of ILAUPLO */ + +} /* _starpu_ilauplo_ */ diff --git a/min-dgels/base/SRC/ilaver.c b/min-dgels/base/SRC/ilaver.c new file mode 100644 index 0000000..f9ed67d --- /dev/null +++ b/min-dgels/base/SRC/ilaver.c @@ -0,0 +1,47 @@ +/* ilaver.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_ilaver_(integer *vers_major__, integer *vers_minor__, + integer *vers_patch__) +{ + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* January 2007 */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* This subroutine return the Lapack version */ + +/* Arguments */ +/* ========= */ +/* VERS_MAJOR (output) INTEGER */ +/* return the lapack major version */ +/* VERS_MINOR (output) INTEGER */ +/* return the lapack minor version from the major version */ +/* VERS_PATCH (output) INTEGER */ +/* return the lapack patch version from the minor version */ +/* ===================================================================== */ + +/* ===================================================================== */ + *vers_major__ = 3; + *vers_minor__ = 1; + *vers_patch__ = 1; +/* ===================================================================== */ + + return 0; +} /* _starpu_ilaver_ */ diff --git a/min-dgels/base/SRC/ilazlc.c b/min-dgels/base/SRC/ilazlc.c new file mode 100644 index 0000000..5fde832 --- /dev/null +++ b/min-dgels/base/SRC/ilazlc.c @@ -0,0 +1,94 @@ +/* ilazlc.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +integer _starpu_ilazlc_(integer *m, integer *n, doublecomplex *a, integer *lda) +{ + /* System generated locals */ + integer a_dim1, a_offset, ret_val, i__1, i__2; + + /* Local variables */ + integer i__; + + +/* -- LAPACK auxiliary routine (version 3.2.1) -- */ + +/* -- April 2009 -- */ + +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* ILAZLC scans A for its last non-zero column. */ + +/* Arguments */ +/* ========= */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix A. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix A. */ + +/* A (input) COMPLEX*16 array, dimension (LDA,N) */ +/* The m by n matrix A. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,M). */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Quick test for the common case where one corner is non-zero. */ + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + if (*n == 0) { + ret_val = *n; + } else /* if(complicated condition) */ { + i__1 = *n * a_dim1 + 1; + i__2 = *m + *n * a_dim1; + if (a[i__1].r != 0. || a[i__1].i != 0. || (a[i__2].r != 0. || a[i__2] + .i != 0.)) { + ret_val = *n; + } else { +/* Now scan each column from the end, returning with the first non-zero. */ + for (ret_val = *n; ret_val >= 1; --ret_val) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + ret_val * a_dim1; + if (a[i__2].r != 0. || a[i__2].i != 0.) { + return ret_val; + } + } + } + } + } + return ret_val; +} /* _starpu_ilazlc_ */ diff --git a/min-dgels/base/SRC/ilazlr.c b/min-dgels/base/SRC/ilazlr.c new file mode 100644 index 0000000..4d24f9d --- /dev/null +++ b/min-dgels/base/SRC/ilazlr.c @@ -0,0 +1,96 @@ +/* ilazlr.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +integer _starpu_ilazlr_(integer *m, integer *n, doublecomplex *a, integer *lda) +{ + /* System generated locals */ + integer a_dim1, a_offset, ret_val, i__1, i__2; + + /* Local variables */ + integer i__, j; + + +/* -- LAPACK auxiliary routine (version 3.2.1) -- */ + +/* -- April 2009 -- */ + +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* ILAZLR scans A for its last non-zero row. */ + +/* Arguments */ +/* ========= */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix A. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix A. */ + +/* A (input) COMPLEX*16 array, dimension (LDA,N) */ +/* The m by n matrix A. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,M). */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Quick test for the common case where one corner is non-zero. */ + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + if (*m == 0) { + ret_val = *m; + } else /* if(complicated condition) */ { + i__1 = *m + a_dim1; + i__2 = *m + *n * a_dim1; + if (a[i__1].r != 0. || a[i__1].i != 0. || (a[i__2].r != 0. || a[i__2] + .i != 0.)) { + ret_val = *m; + } else { +/* Scan up each column tracking the last zero row seen. */ + ret_val = 0; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + for (i__ = *m; i__ >= 1; --i__) { + i__2 = i__ + j * a_dim1; + if (a[i__2].r != 0. || a[i__2].i != 0.) { + break; + } + } + ret_val = max(ret_val,i__); + } + } + } + return ret_val; +} /* _starpu_ilazlr_ */ diff --git a/min-dgels/base/SRC/iparmq.c b/min-dgels/base/SRC/iparmq.c new file mode 100644 index 0000000..356c8e3 --- /dev/null +++ b/min-dgels/base/SRC/iparmq.c @@ -0,0 +1,282 @@ +/* iparmq.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +integer _starpu_iparmq_(integer *ispec, char *name__, char *opts, integer *n, integer + *ilo, integer *ihi, integer *lwork) +{ + /* System generated locals */ + integer ret_val, i__1, i__2; + real r__1; + + /* Builtin functions */ + double log(doublereal); + integer i_nint(real *); + + /* Local variables */ + integer nh, ns; + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ + +/* Purpose */ +/* ======= */ + +/* This program sets problem and machine dependent parameters */ +/* useful for xHSEQR and its subroutines. It is called whenever */ +/* ILAENV is called with 12 <= ISPEC <= 16 */ + +/* Arguments */ +/* ========= */ + +/* ISPEC (input) integer scalar */ +/* ISPEC specifies which tunable parameter IPARMQ should */ +/* return. */ + +/* ISPEC=12: (INMIN) Matrices of order nmin or less */ +/* are sent directly to xLAHQR, the implicit */ +/* double shift QR algorithm. NMIN must be */ +/* at least 11. */ + +/* ISPEC=13: (INWIN) Size of the deflation window. */ +/* This is best set greater than or equal to */ +/* the number of simultaneous shifts NS. */ +/* Larger matrices benefit from larger deflation */ +/* windows. */ + +/* ISPEC=14: (INIBL) Determines when to stop nibbling and */ +/* invest in an (expensive) multi-shift QR sweep. */ +/* If the aggressive early deflation subroutine */ +/* finds LD converged eigenvalues from an order */ +/* NW deflation window and LD.GT.(NW*NIBBLE)/100, */ +/* then the next QR sweep is skipped and early */ +/* deflation is applied immediately to the */ +/* remaining active diagonal block. Setting */ +/* IPARMQ(ISPEC=14) = 0 causes TTQRE to skip a */ +/* multi-shift QR sweep whenever early deflation */ +/* finds a converged eigenvalue. Setting */ +/* IPARMQ(ISPEC=14) greater than or equal to 100 */ +/* prevents TTQRE from skipping a multi-shift */ +/* QR sweep. */ + +/* ISPEC=15: (NSHFTS) The number of simultaneous shifts in */ +/* a multi-shift QR iteration. */ + +/* ISPEC=16: (IACC22) IPARMQ is set to 0, 1 or 2 with the */ +/* following meanings. */ +/* 0: During the multi-shift QR sweep, */ +/* xLAQR5 does not accumulate reflections and */ +/* does not use matrix-matrix multiply to */ +/* update the far-from-diagonal matrix */ +/* entries. */ +/* 1: During the multi-shift QR sweep, */ +/* xLAQR5 and/or xLAQRaccumulates reflections and uses */ +/* matrix-matrix multiply to update the */ +/* far-from-diagonal matrix entries. */ +/* 2: During the multi-shift QR sweep. */ +/* xLAQR5 accumulates reflections and takes */ +/* advantage of 2-by-2 block structure during */ +/* matrix-matrix multiplies. */ +/* (If xTRMM is slower than xGEMM, then */ +/* IPARMQ(ISPEC=16)=1 may be more efficient than */ +/* IPARMQ(ISPEC=16)=2 despite the greater level of */ +/* arithmetic work implied by the latter choice.) */ + +/* NAME (input) character string */ +/* Name of the calling subroutine */ + +/* OPTS (input) character string */ +/* This is a concatenation of the string arguments to */ +/* TTQRE. */ + +/* N (input) integer scalar */ +/* N is the order of the Hessenberg matrix H. */ + +/* ILO (input) INTEGER */ +/* IHI (input) INTEGER */ +/* It is assumed that H is already upper triangular */ +/* in rows and columns 1:ILO-1 and IHI+1:N. */ + +/* LWORK (input) integer scalar */ +/* The amount of workspace available. */ + +/* Further Details */ +/* =============== */ + +/* Little is known about how best to choose these parameters. */ +/* It is possible to use different values of the parameters */ +/* for each of CHSEQR, DHSEQR, SHSEQR and ZHSEQR. */ + +/* It is probably best to choose different parameters for */ +/* different matrices and different parameters at different */ +/* times during the iteration, but this has not been */ +/* implemented --- yet. */ + + +/* The best choices of most of the parameters depend */ +/* in an ill-understood way on the relative execution */ +/* rate of xLAQR3 and xLAQR5 and on the nature of each */ +/* particular eigenvalue problem. Experiment may be the */ +/* only practical way to determine which choices are most */ +/* effective. */ + +/* Following is a list of default values supplied by IPARMQ. */ +/* These defaults may be adjusted in order to attain better */ +/* performance in any particular computational environment. */ + +/* IPARMQ(ISPEC=12) The xLAHQR vs xLAQR0 crossover point. */ +/* Default: 75. (Must be at least 11.) */ + +/* IPARMQ(ISPEC=13) Recommended deflation window size. */ +/* This depends on ILO, IHI and NS, the */ +/* number of simultaneous shifts returned */ +/* by IPARMQ(ISPEC=15). The default for */ +/* (IHI-ILO+1).LE.500 is NS. The default */ +/* for (IHI-ILO+1).GT.500 is 3*NS/2. */ + +/* IPARMQ(ISPEC=14) Nibble crossover point. Default: 14. */ + +/* IPARMQ(ISPEC=15) Number of simultaneous shifts, NS. */ +/* a multi-shift QR iteration. */ + +/* If IHI-ILO+1 is ... */ + +/* greater than ...but less ... the */ +/* or equal to ... than default is */ + +/* 0 30 NS = 2+ */ +/* 30 60 NS = 4+ */ +/* 60 150 NS = 10 */ +/* 150 590 NS = ** */ +/* 590 3000 NS = 64 */ +/* 3000 6000 NS = 128 */ +/* 6000 infinity NS = 256 */ + +/* (+) By default matrices of this order are */ +/* passed to the implicit double shift routine */ +/* xLAHQR. See IPARMQ(ISPEC=12) above. These */ +/* values of NS are used only in case of a rare */ +/* xLAHQR failure. */ + +/* (**) The asterisks (**) indicate an ad-hoc */ +/* function increasing from 10 to 64. */ + +/* IPARMQ(ISPEC=16) Select structured matrix multiply. */ +/* (See ISPEC=16 above for details.) */ +/* Default: 3. */ + +/* ================================================================ */ +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + if (*ispec == 15 || *ispec == 13 || *ispec == 16) { + +/* ==== Set the number simultaneous shifts ==== */ + + nh = *ihi - *ilo + 1; + ns = 2; + if (nh >= 30) { + ns = 4; + } + if (nh >= 60) { + ns = 10; + } + if (nh >= 150) { +/* Computing MAX */ + r__1 = log((real) nh) / log(2.f); + i__1 = 10, i__2 = nh / i_nint(&r__1); + ns = max(i__1,i__2); + } + if (nh >= 590) { + ns = 64; + } + if (nh >= 3000) { + ns = 128; + } + if (nh >= 6000) { + ns = 256; + } +/* Computing MAX */ + i__1 = 2, i__2 = ns - ns % 2; + ns = max(i__1,i__2); + } + + if (*ispec == 12) { + + +/* ===== Matrices of order smaller than NMIN get sent */ +/* . to xLAHQR, the classic double shift algorithm. */ +/* . This must be at least 11. ==== */ + + ret_val = 75; + + } else if (*ispec == 14) { + +/* ==== INIBL: skip a multi-shift qr iteration and */ +/* . whenever aggressive early deflation finds */ +/* . at least (NIBBLE*(window size)/100) deflations. ==== */ + + ret_val = 14; + + } else if (*ispec == 15) { + +/* ==== NSHFTS: The number of simultaneous shifts ===== */ + + ret_val = ns; + + } else if (*ispec == 13) { + +/* ==== NW: deflation window size. ==== */ + + if (nh <= 500) { + ret_val = ns; + } else { + ret_val = ns * 3 / 2; + } + + } else if (*ispec == 16) { + +/* ==== IACC22: Whether to accumulate reflections */ +/* . before updating the far-from-diagonal elements */ +/* . and whether to use 2-by-2 block structure while */ +/* . doing it. A small amount of work could be saved */ +/* . by making this choice dependent also upon the */ +/* . NH=IHI-ILO+1. */ + + ret_val = 0; + if (ns >= 14) { + ret_val = 1; + } + if (ns >= 14) { + ret_val = 2; + } + + } else { +/* ===== invalid value of ispec ===== */ + ret_val = -1; + + } + +/* ==== End of IPARMQ ==== */ + + return ret_val; +} /* _starpu_iparmq_ */ diff --git a/min-dgels/base/SRC/izmax1.c b/min-dgels/base/SRC/izmax1.c new file mode 100644 index 0000000..3b5e6aa --- /dev/null +++ b/min-dgels/base/SRC/izmax1.c @@ -0,0 +1,127 @@ +/* izmax1.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +integer _starpu_izmax1_(integer *n, doublecomplex *cx, integer *incx) +{ + /* System generated locals */ + integer ret_val, i__1; + + /* Builtin functions */ + double z_abs(doublecomplex *); + + /* Local variables */ + integer i__, ix; + doublereal smax; + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* IZMAX1 finds the index of the element whose real part has maximum */ +/* absolute value. */ + +/* Based on IZAMAX from Level 1 BLAS. */ +/* The change is to use the 'genuine' absolute value. */ + +/* Contributed by Nick Higham for use with ZLACON. */ + +/* Arguments */ +/* ========= */ + +/* N (input) INTEGER */ +/* The number of elements in the vector CX. */ + +/* CX (input) COMPLEX*16 array, dimension (N) */ +/* The vector whose elements will be summed. */ + +/* INCX (input) INTEGER */ +/* The spacing between successive values of CX. INCX >= 1. */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Statement Functions .. */ +/* .. */ +/* .. Statement Function definitions .. */ + +/* NEXT LINE IS THE ONLY MODIFICATION. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + --cx; + + /* Function Body */ + ret_val = 0; + if (*n < 1) { + return ret_val; + } + ret_val = 1; + if (*n == 1) { + return ret_val; + } + if (*incx == 1) { + goto L30; + } + +/* CODE FOR INCREMENT NOT EQUAL TO 1 */ + + ix = 1; + smax = z_abs(&cx[1]); + ix += *incx; + i__1 = *n; + for (i__ = 2; i__ <= i__1; ++i__) { + if (z_abs(&cx[ix]) <= smax) { + goto L10; + } + ret_val = i__; + smax = z_abs(&cx[ix]); +L10: + ix += *incx; +/* L20: */ + } + return ret_val; + +/* CODE FOR INCREMENT EQUAL TO 1 */ + +L30: + smax = z_abs(&cx[1]); + i__1 = *n; + for (i__ = 2; i__ <= i__1; ++i__) { + if (z_abs(&cx[i__]) <= smax) { + goto L40; + } + ret_val = i__; + smax = z_abs(&cx[i__]); +L40: + ; + } + return ret_val; + +/* End of IZMAX1 */ + +} /* _starpu_izmax1_ */ diff --git a/min-dgels/base/SRC/lsamen.c b/min-dgels/base/SRC/lsamen.c new file mode 100644 index 0000000..e593be2 --- /dev/null +++ b/min-dgels/base/SRC/lsamen.c @@ -0,0 +1,98 @@ +/* lsamen.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" +#include "string.h" + +logical _starpu_lsamen_(integer *n, char *ca, char *cb) +{ + /* System generated locals */ + integer i__1; + logical ret_val; + + /* Builtin functions */ + integer i_len(char *, ftnlen); + + /* Local variables */ + integer i__; + extern logical _starpu_lsame_(char *, char *); + + ftnlen ca_len, cb_len; + + ca_len = strlen (ca); + cb_len = strlen (cb); + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* LSAMEN tests if the first N letters of CA are the same as the */ +/* first N letters of CB, regardless of case. */ +/* LSAMEN returns .TRUE. if CA and CB are equivalent except for case */ +/* and .FALSE. otherwise. LSAMEN also returns .FALSE. if LEN( CA ) */ +/* or LEN( CB ) is less than N. */ + +/* Arguments */ +/* ========= */ + +/* N (input) INTEGER */ +/* The number of characters in CA and CB to be compared. */ + +/* CA (input) CHARACTER*(*) */ +/* CB (input) CHARACTER*(*) */ +/* CA and CB specify two character strings of length at least N. */ +/* Only the first N characters of each string will be accessed. */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + ret_val = FALSE_; + if (i_len(ca, ca_len) < *n || i_len(cb, cb_len) < *n) { + goto L20; + } + +/* Do for each character in the two strings. */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Test if the characters are equal using LSAME. */ + + if (! _starpu_lsame_(ca + (i__ - 1), cb + (i__ - 1))) { + goto L20; + } + +/* L10: */ + } + ret_val = TRUE_; + +L20: + return ret_val; + +/* End of LSAMEN */ + +} /* _starpu_lsamen_ */ diff --git a/min-dgels/base/SRC/maxloc.c b/min-dgels/base/SRC/maxloc.c new file mode 100644 index 0000000..ac64926 --- /dev/null +++ b/min-dgels/base/SRC/maxloc.c @@ -0,0 +1,71 @@ +/* maxloc.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" + +/* ********************************************************************************** */ +integer _starpu_smaxloc_(real *a, integer *dimm) +{ + /* System generated locals */ + integer ret_val, i__1; + + /* Local variables */ + integer i__; + real smax; + + + + /* Parameter adjustments */ + --a; + + /* Function Body */ + ret_val = 1; + smax = a[1]; + i__1 = *dimm; + for (i__ = 2; i__ <= i__1; ++i__) { + if (smax < a[i__]) { + smax = a[i__]; + ret_val = i__; + } +/* L10: */ + } + return ret_val; +} /* _starpu_smaxloc_ */ + +/* ********************************************************************************** */ +integer _starpu_dmaxloc_(doublereal *a, integer *dimm) +{ + /* System generated locals */ + integer ret_val, i__1; + + /* Local variables */ + integer i__; + doublereal dmax__; + + + + /* Parameter adjustments */ + --a; + + /* Function Body */ + ret_val = 1; + dmax__ = a[1]; + i__1 = *dimm; + for (i__ = 2; i__ <= i__1; ++i__) { + if (dmax__ < a[i__]) { + dmax__ = a[i__]; + ret_val = i__; + } +/* L20: */ + } + return ret_val; +} /* _starpu_dmaxloc_ */ diff --git a/min-dgels/base/SRC/xerbla.c b/min-dgels/base/SRC/xerbla.c new file mode 100644 index 0000000..e6dbc08 --- /dev/null +++ b/min-dgels/base/SRC/xerbla.c @@ -0,0 +1,65 @@ +/* xerbla.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" +#include "stdio.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +/* Subroutine */ int _starpu_xerbla_(char *srname, integer *info) +{ + + +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* XERBLA is an error handler for the LAPACK routines. */ +/* It is called by an LAPACK routine if an input parameter has an */ +/* invalid value. A message is printed and execution stops. */ + +/* Installers may consider modifying the STOP statement in order to */ +/* call system-specific exception-handling facilities. */ + +/* Arguments */ +/* ========= */ + +/* SRNAME (input) CHARACTER*(*) */ +/* The name of the routine which called XERBLA. */ + +/* INFO (input) INTEGER */ +/* The position of the invalid parameter in the parameter list */ +/* of the calling routine. */ + +/* ===================================================================== */ + +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + printf("** On entry to %s, parameter number %ld had an illegal value\n", + srname, *info); + + +/* End of XERBLA */ + + return 0; +} /* _starpu_xerbla_ */ diff --git a/min-dgels/base/SRC/xerbla_array.c b/min-dgels/base/SRC/xerbla_array.c new file mode 100644 index 0000000..3469039 --- /dev/null +++ b/min-dgels/base/SRC/xerbla_array.c @@ -0,0 +1,102 @@ +/* _starpu_xerbla_array.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int _starpu_xerbla_array__(char *srname_array__, integer * + srname_len__, integer *info, ftnlen srname_array_len) +{ + /* System generated locals */ + integer i__1, i__2, i__3; + + /* Builtin functions */ + /* Subroutine */ int s_copy(char *, char *, ftnlen, ftnlen); + integer i_len(char *, ftnlen); + + /* Local variables */ + integer i__; + extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); + char srname[32]; + + +/* -- LAPACK auxiliary routine (version 3.0) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., */ +/* September 19, 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* XERBLA_ARRAY assists other languages in calling XERBLA, the LAPACK */ +/* and BLAS error handler. Rather than taking a Fortran string argument */ +/* as the function's name, XERBLA_ARRAY takes an array of single */ +/* characters along with the array's length. XERBLA_ARRAY then copies */ +/* up to 32 characters of that array into a Fortran string and passes */ +/* that to XERBLA. If called with a non-positive SRNAME_LEN, */ +/* XERBLA_ARRAY will call XERBLA with a string of all blank characters. */ + +/* Say some macro or other device makes XERBLA_ARRAY available to C99 */ +/* by a name lapack_xerbla and with a common Fortran calling convention. */ +/* Then a C99 program could invoke XERBLA via: */ +/* { */ +/* int flen = strlen(__func__); */ +/* lapack_xerbla(__func__, &flen, &info); */ +/* } */ + +/* Providing XERBLA_ARRAY is not necessary for intercepting LAPACK */ +/* errors. XERBLA_ARRAY calls XERBLA. */ + +/* Arguments */ +/* ========= */ + +/* SRNAME_ARRAY (input) CHARACTER(1) array, dimension (SRNAME_LEN) */ +/* The name of the routine which called XERBLA_ARRAY. */ + +/* SRNAME_LEN (input) INTEGER */ +/* The length of the name in SRNAME_ARRAY. */ + +/* INFO (input) INTEGER */ +/* The position of the invalid parameter in the parameter list */ +/* of the calling routine. */ + +/* ===================================================================== */ + +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + /* Parameter adjustments */ + --srname_array__; + + /* Function Body */ + s_copy(srname, "", (ftnlen)32, (ftnlen)0); +/* Computing MIN */ + i__2 = *srname_len__, i__3 = i_len(srname, (ftnlen)32); + i__1 = min(i__2,i__3); + for (i__ = 1; i__ <= i__1; ++i__) { + *(unsigned char *)&srname[i__ - 1] = *(unsigned char *)& + srname_array__[i__]; + } + _starpu_xerbla_(srname, info); + return 0; +} /* _starpu_xerbla_array__ */ diff --git a/min-dgels/base/make.inc b/min-dgels/base/make.inc new file mode 100644 index 0000000..b321423 --- /dev/null +++ b/min-dgels/base/make.inc @@ -0,0 +1,60 @@ +# -*- Makefile -*- +#################################################################### +# LAPACK make include file. # +# LAPACK, Version 3.2.1 # +# June 2009 # +#################################################################### +# +# See the INSTALL/ directory for more examples. +# +SHELL = /bin/sh +# +# The machine (platform) identifier to append to the library names +# +PLAT = _LINUX +# +# Modify the FORTRAN and OPTS definitions to refer to the +# compiler and desired compiler options for your machine. NOOPT +# refers to the compiler options desired when NO OPTIMIZATION is +# selected. Define LOADER and LOADOPTS to refer to the loader +# and desired load options for your machine. +# +####################################################### +# This is used to compile C libary +#CC = gcc +# if no wrapping of the blas library is needed, uncomment next line +#CC = gcc -DNO_BLAS_WRAP +CFLAGS = -O3 -I$(TOPDIR)/INCLUDE -fPIC +#LOADER = gcc +LOADER = $(CC) +LOADOPTS = +NOOPT = -O0 -I$(TOPDIR)/INCLUDE +DRVCFLAGS = $(CFLAGS) +F2CCFLAGS = $(CFLAGS) +####################################################################### + +# +# Timer for the SECOND and DSECND routines +# +# Default : SECOND and DSECND will use a call to the EXTERNAL FUNCTION ETIME +# TIMER = EXT_ETIME +# For RS6K : SECOND and DSECND will use a call to the EXTERNAL FUNCTION ETIME_ +# TIMER = EXT_ETIME_ +# For gfortran compiler: SECOND and DSECND will use a call to the INTERNAL FUNCTION ETIME +# TIMER = INT_ETIME +# If your Fortran compiler does not provide etime (like Nag Fortran Compiler, etc...) +# SECOND and DSECND will use a call to the Fortran standard INTERNAL FUNCTION CPU_TIME +TIMER = INT_CPU_TIME +# If neither of this works...you can use the NONE value... In that case, SECOND and DSECND will always return 0 +# TIMER = NONE +# +# The archiver and the flag(s) to use when building archive (library) +# If you system has no ranlib, set RANLIB = echo. +# + +ARCH = ar +ARCHFLAGS= cr +RANLIB = ranlib + +BLASLIB=../../../build/minlibblas.a +F2CLIB=../../../build/minlibf2c.a diff --git a/mpi/GNUmakefile.in b/mpi/GNUmakefile.in new file mode 100644 index 0000000..9451378 --- /dev/null +++ b/mpi/GNUmakefile.in @@ -0,0 +1,48 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +SUBDIRS= + +@STARPU_BUILD_EXAMPLES_TRUE@SUBDIRS += examples +@STARPU_BUILD_TESTS_TRUE@SUBDIRS += tests + +all: + +@STARPU_SIMGRID_FALSE@check: check-recursive +@STARPU_SIMGRID_FALSE@ : + +# divide by 4 the number of jobs to run in parallel, since mpirun will start 4 +# processes in the tests and examples +@STARPU_SIMGRID_FALSE@check-recursive: +@STARPU_SIMGRID_FALSE@ RET=0 ; \ +@STARPU_SIMGRID_FALSE@ NJOBS=`printf %s "$(MAKEFLAGS)" | sed -ne 's/.*-j \?\([0-9]\+\).*/\1/p'` ; \ +@STARPU_SIMGRID_FALSE@ JOBS="" ; \ +@STARPU_SIMGRID_FALSE@ if [ -n "$$NJOBS" ] ; then \ +@STARPU_SIMGRID_FALSE@ if [ "$$NJOBS" -ge 4 ] ; then \ +@STARPU_SIMGRID_FALSE@ JOBS="-j$$(($$NJOBS / 4))" ; \ +@STARPU_SIMGRID_FALSE@ else \ +@STARPU_SIMGRID_FALSE@ JOBS="-j1" ; \ +@STARPU_SIMGRID_FALSE@ fi ; \ +@STARPU_SIMGRID_FALSE@ fi ; \ +@STARPU_SIMGRID_FALSE@ for i in $(SUBDIRS) ; do \ +@STARPU_SIMGRID_FALSE@ $(MAKE) check -C $$i MAKEFLAGS="$(MAKEFLAGS) $$JOBS" || RET=1; \ +@STARPU_SIMGRID_FALSE@ done ; \ +@STARPU_SIMGRID_FALSE@ exit $$RET + +%: force + @$(MAKE) -f Makefile $@ + +force: ; diff --git a/mpi/Makefile.am b/mpi/Makefile.am new file mode 100644 index 0000000..298b38e --- /dev/null +++ b/mpi/Makefile.am @@ -0,0 +1,39 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +include $(top_srcdir)/make/starpu-subdirtests.mk + +SUBDIRS=src tools + +if STARPU_BUILD_EXAMPLES +SUBDIRS += examples +endif +if STARPU_BUILD_TESTS +SUBDIRS += tests +endif + +EXTRA_DIST = \ + dev/starpu_mpi_comm_check.sh + +pkgconfigdir = $(libdir)/pkgconfig +pkgconfig_DATA = packages/libstarpumpi.pc packages/starpumpi-1.0.pc packages/starpumpi-1.1.pc packages/starpumpi-1.2.pc packages/starpumpi-1.3.pc packages/starpumpi-1.4.pc + +versincludedir = $(includedir)/starpu/$(STARPU_EFFECTIVE_VERSION) +versinclude_HEADERS = \ + include/starpu_mpi.h \ + include/starpu_mpi_ft.h \ + include/starpu_mpi_lb.h \ + include/fstarpu_mpi_mod.f90 diff --git a/mpi/Makefile.in b/mpi/Makefile.in new file mode 100644 index 0000000..7c1961f --- /dev/null +++ b/mpi/Makefile.in @@ -0,0 +1,986 @@ +# Makefile.in generated by automake 1.16.5 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2021 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +target_triplet = @target@ +@STARPU_BUILD_EXAMPLES_TRUE@am__append_1 = examples +@STARPU_BUILD_TESTS_TRUE@am__append_2 = tests +subdir = mpi +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ + $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ + $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ + $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(versinclude_HEADERS) \ + $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/src/common/config.h \ + $(top_builddir)/src/common/config-src-build.h \ + $(top_builddir)/include/starpu_config.h \ + $(top_builddir)/starpurm/include/starpurm_config.h +CONFIG_CLEAN_FILES = GNUmakefile +CONFIG_CLEAN_VPATH_FILES = +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +SOURCES = +DIST_SOURCES = +RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \ + ctags-recursive dvi-recursive html-recursive info-recursive \ + install-data-recursive install-dvi-recursive \ + install-exec-recursive install-html-recursive \ + install-info-recursive install-pdf-recursive \ + install-ps-recursive install-recursive installcheck-recursive \ + installdirs-recursive pdf-recursive ps-recursive \ + tags-recursive uninstall-recursive +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__installdirs = "$(DESTDIR)$(pkgconfigdir)" \ + "$(DESTDIR)$(versincludedir)" +DATA = $(pkgconfig_DATA) +HEADERS = $(versinclude_HEADERS) +RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ + distclean-recursive maintainer-clean-recursive +am__recursive_targets = \ + $(RECURSIVE_TARGETS) \ + $(RECURSIVE_CLEAN_TARGETS) \ + $(am__extra_recursive_targets) +AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \ + distdir distdir-am +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +DIST_SUBDIRS = src tools examples tests +am__DIST_COMMON = $(srcdir)/GNUmakefile.in $(srcdir)/Makefile.in \ + $(top_srcdir)/make/starpu-subdirtests.mk +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +am__relativize = \ + dir0=`pwd`; \ + sed_first='s,^\([^/]*\)/.*$$,\1,'; \ + sed_rest='s,^[^/]*/*,,'; \ + sed_last='s,^.*/\([^/]*\)$$,\1,'; \ + sed_butlast='s,/*[^/]*$$,,'; \ + while test -n "$$dir1"; do \ + first=`echo "$$dir1" | sed -e "$$sed_first"`; \ + if test "$$first" != "."; then \ + if test "$$first" = ".."; then \ + dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ + dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ + else \ + first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ + if test "$$first2" = "$$first"; then \ + dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ + else \ + dir2="../$$dir2"; \ + fi; \ + dir0="$$dir0"/"$$first"; \ + fi; \ + fi; \ + dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ + done; \ + reldir="$$dir2" +pkglibdir = @pkglibdir@ +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +APP_CFLAGS = @APP_CFLAGS@ +APP_CXXFLAGS = @APP_CXXFLAGS@ +APP_FCFLAGS = @APP_FCFLAGS@ +APP_FFLAGS = @APP_FFLAGS@ +AR = @AR@ +AS = @AS@ +ATLASDIR = @ATLASDIR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BLAS_LIB = @BLAS_LIB@ +BLAS_LIBS = @BLAS_LIBS@ +BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ +BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CC_OR_MPICC = @CC_OR_MPICC@ +CC_OR_NVCC = @CC_OR_NVCC@ +CFLAGS = @CFLAGS@ +COVERAGE = @COVERAGE@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CSCOPE = @CSCOPE@ +CTAGS = @CTAGS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DGELS_LIBS = @DGELS_LIBS@ +DLB_CFLAGS = @DLB_CFLAGS@ +DLB_LIBS = @DLB_LIBS@ +DLLTOOL = @DLLTOOL@ +DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +ECLIPSE = @ECLIPSE@ +EGREP = @EGREP@ +ETAGS = @ETAGS@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FC = @FC@ +FCFLAGS = @FCFLAGS@ +FFLAGS = @FFLAGS@ +FFTWF_CFLAGS = @FFTWF_CFLAGS@ +FFTWF_LIBS = @FFTWF_LIBS@ +FFTWL_CFLAGS = @FFTWL_CFLAGS@ +FFTWL_LIBS = @FFTWL_LIBS@ +FFTW_CFLAGS = @FFTW_CFLAGS@ +FFTW_LIBS = @FFTW_LIBS@ +FGREP = @FGREP@ +FILECMD = @FILECMD@ +FXTDIR = @FXTDIR@ +FXT_CFLAGS = @FXT_CFLAGS@ +FXT_LDFLAGS = @FXT_LDFLAGS@ +FXT_LIBS = @FXT_LIBS@ +GDB = @GDB@ +GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ +GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ +GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ +GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ +GOTODIR = @GOTODIR@ +GREP = @GREP@ +HAVE_CXX11 = @HAVE_CXX11@ +HAVE_FFTWFL = @HAVE_FFTWFL@ +HELP2MAN = @HELP2MAN@ +HIPCC = @HIPCC@ +HIPCCFLAGS = @HIPCCFLAGS@ +HIPCONFIG = @HIPCONFIG@ +HWLOC_CFLAGS = @HWLOC_CFLAGS@ +HWLOC_LIBS = @HWLOC_LIBS@ +HWLOC_REQUIRES = @HWLOC_REQUIRES@ +ICC = @ICC@ +ICC_ARGS = @ICC_ARGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JULIA = @JULIA@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ +LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ +LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ +LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ +LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ +LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ +LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ +LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ +LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ +LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ +LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ +LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ +LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ +LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ +LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ +LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ +LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ +LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ +LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ +LIBSTARPU_LINK = @LIBSTARPU_LINK@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAGMA_CFLAGS = @MAGMA_CFLAGS@ +MAGMA_LIBS = @MAGMA_LIBS@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPICC_LDFLAGS = @MPICC_LDFLAGS@ +MPICXX = @MPICXX@ +MPIEXEC = @MPIEXEC@ +MPIEXEC_ARGS = @MPIEXEC_ARGS@ +MPIFORT = @MPIFORT@ +MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ +MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ +NM = @NM@ +NMAD_CFLAGS = @NMAD_CFLAGS@ +NMAD_LIBS = @NMAD_LIBS@ +NMEDIT = @NMEDIT@ +NVCC = @NVCC@ +NVCCFLAGS = @NVCCFLAGS@ +NVCC_CC = @NVCC_CC@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ +OPENBLAS_LIBS = @OPENBLAS_LIBS@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PAPI_CFLAGS = @PAPI_CFLAGS@ +PAPI_LIBS = @PAPI_LIBS@ +PARALLEL = @PARALLEL@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PKG_CONFIG = @PKG_CONFIG@ +POTI_CFLAGS = @POTI_CFLAGS@ +POTI_LIBS = @POTI_LIBS@ +PROG_CLANG = @PROG_CLANG@ +PROG_DATE = @PROG_DATE@ +PROG_FIND = @PROG_FIND@ +PROG_STAT = @PROG_STAT@ +PYTHON = @PYTHON@ +PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ +PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ +PYTHON_VERSION = @PYTHON_VERSION@ +RANLIB = @RANLIB@ +REALBASH = @REALBASH@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ +SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ +SIMGRID_LIBS = @SIMGRID_LIBS@ +SIMGRID_MC = @SIMGRID_MC@ +SLIC_CONFIG = @SLIC_CONFIG@ +SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ +SOCL_VENDORS = @SOCL_VENDORS@ +STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ +STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ +STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ +STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ +STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ +STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ +STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ +STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ +STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ +STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ +STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ +STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ +STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ +STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ +STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ +STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ +STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ +STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ +STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ +STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ +STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ +STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ +STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ +STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ +STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ +STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ +STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ +STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ +STARPU_LIB_PATH = @STARPU_LIB_PATH@ +STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ +STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ +STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ +STARPU_MS_LIB = @STARPU_MS_LIB@ +STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ +STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ +STARPU_OPENBLAS = @STARPU_OPENBLAS@ +STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ +STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ +STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ +STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ +STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ +STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ +STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ +STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ +STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ +STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ +STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ +STARPU_SRC_DIR = @STARPU_SRC_DIR@ +STARPU_USE_CPU = @STARPU_USE_CPU@ +STARPU_USE_CUDA = @STARPU_USE_CUDA@ +STARPU_USE_FXT = @STARPU_USE_FXT@ +STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ +STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ +STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ +STRIP = @STRIP@ +VERSION = @VERSION@ +XMKMF = @XMKMF@ +X_CFLAGS = @X_CFLAGS@ +X_EXTRA_LIBS = @X_EXTRA_LIBS@ +X_LIBS = @X_LIBS@ +X_PRE_LIBS = @X_PRE_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_ct_F77 = @ac_ct_F77@ +ac_ct_FC = @ac_ct_FC@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +doxygencommand = @doxygencommand@ +dvidir = @dvidir@ +eclipsepath = @eclipsepath@ +epstopdfcommand = @epstopdfcommand@ +exec_prefix = @exec_prefix@ +gitcommand = @gitcommand@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +hwloccalccommand = @hwloccalccommand@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +juliapath = @juliapath@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +mpicc_path = @mpicc_path@ +mpicxx_path = @mpicxx_path@ +mpiexec_path = @mpiexec_path@ +mpifort_path = @mpifort_path@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +pdflatexcommand = @pdflatexcommand@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +runstatedir = @runstatedir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target = @target@ +target_alias = @target_alias@ +target_cpu = @target_cpu@ +target_os = @target_os@ +target_vendor = @target_vendor@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +SUBDIRS = src tools $(am__append_1) $(am__append_2) +EXTRA_DIST = \ + dev/starpu_mpi_comm_check.sh + +pkgconfigdir = $(libdir)/pkgconfig +pkgconfig_DATA = packages/libstarpumpi.pc packages/starpumpi-1.0.pc packages/starpumpi-1.1.pc packages/starpumpi-1.2.pc packages/starpumpi-1.3.pc packages/starpumpi-1.4.pc +versincludedir = $(includedir)/starpu/$(STARPU_EFFECTIVE_VERSION) +versinclude_HEADERS = \ + include/starpu_mpi.h \ + include/starpu_mpi_ft.h \ + include/starpu_mpi_lb.h \ + include/fstarpu_mpi_mod.f90 + +all: all-recursive + +.SUFFIXES: +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/make/starpu-subdirtests.mk $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign mpi/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign mpi/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; +$(top_srcdir)/make/starpu-subdirtests.mk $(am__empty): + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): +GNUmakefile: $(top_builddir)/config.status $(srcdir)/GNUmakefile.in + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs +install-pkgconfigDATA: $(pkgconfig_DATA) + @$(NORMAL_INSTALL) + @list='$(pkgconfig_DATA)'; test -n "$(pkgconfigdir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(pkgconfigdir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(pkgconfigdir)" || exit 1; \ + fi; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; \ + done | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(pkgconfigdir)'"; \ + $(INSTALL_DATA) $$files "$(DESTDIR)$(pkgconfigdir)" || exit $$?; \ + done + +uninstall-pkgconfigDATA: + @$(NORMAL_UNINSTALL) + @list='$(pkgconfig_DATA)'; test -n "$(pkgconfigdir)" || list=; \ + files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ + dir='$(DESTDIR)$(pkgconfigdir)'; $(am__uninstall_files_from_dir) +install-versincludeHEADERS: $(versinclude_HEADERS) + @$(NORMAL_INSTALL) + @list='$(versinclude_HEADERS)'; test -n "$(versincludedir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(versincludedir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(versincludedir)" || exit 1; \ + fi; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; \ + done | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_HEADER) $$files '$(DESTDIR)$(versincludedir)'"; \ + $(INSTALL_HEADER) $$files "$(DESTDIR)$(versincludedir)" || exit $$?; \ + done + +uninstall-versincludeHEADERS: + @$(NORMAL_UNINSTALL) + @list='$(versinclude_HEADERS)'; test -n "$(versincludedir)" || list=; \ + files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ + dir='$(DESTDIR)$(versincludedir)'; $(am__uninstall_files_from_dir) + +# This directory's subdirectories are mostly independent; you can cd +# into them and run 'make' without going through this Makefile. +# To change the values of 'make' variables: instead of editing Makefiles, +# (1) if the variable is set in 'config.status', edit 'config.status' +# (which will cause the Makefiles to be regenerated when you run 'make'); +# (2) otherwise, pass the desired values on the 'make' command line. +$(am__recursive_targets): + @fail=; \ + if $(am__make_keepgoing); then \ + failcom='fail=yes'; \ + else \ + failcom='exit 1'; \ + fi; \ + dot_seen=no; \ + target=`echo $@ | sed s/-recursive//`; \ + case "$@" in \ + distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ + *) list='$(SUBDIRS)' ;; \ + esac; \ + for subdir in $$list; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + dot_seen=yes; \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || eval $$failcom; \ + done; \ + if test "$$dot_seen" = "no"; then \ + $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ + fi; test -z "$$fail" + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-recursive +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ + include_option=--etags-include; \ + empty_fix=.; \ + else \ + include_option=--include; \ + empty_fix=; \ + fi; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + test ! -f $$subdir/TAGS || \ + set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ + fi; \ + done; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-recursive + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-recursive + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done + @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + $(am__make_dryrun) \ + || test -d "$(distdir)/$$subdir" \ + || $(MKDIR_P) "$(distdir)/$$subdir" \ + || exit 1; \ + dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ + $(am__relativize); \ + new_distdir=$$reldir; \ + dir1=$$subdir; dir2="$(top_distdir)"; \ + $(am__relativize); \ + new_top_distdir=$$reldir; \ + echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ + echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ + ($(am__cd) $$subdir && \ + $(MAKE) $(AM_MAKEFLAGS) \ + top_distdir="$$new_top_distdir" \ + distdir="$$new_distdir" \ + am__remove_distdir=: \ + am__skip_length_check=: \ + am__skip_mode_fix=: \ + distdir) \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-recursive +all-am: Makefile $(DATA) $(HEADERS) +installdirs: installdirs-recursive +installdirs-am: + for dir in "$(DESTDIR)$(pkgconfigdir)" "$(DESTDIR)$(versincludedir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-recursive +install-exec: install-exec-recursive +install-data: install-data-recursive +uninstall: uninstall-recursive + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-recursive +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-recursive + +clean-am: clean-generic clean-libtool mostlyclean-am + +distclean: distclean-recursive + -rm -f Makefile +distclean-am: clean-am distclean-generic distclean-tags + +dvi: dvi-recursive + +dvi-am: + +html: html-recursive + +html-am: + +info: info-recursive + +info-am: + +install-data-am: install-pkgconfigDATA install-versincludeHEADERS + +install-dvi: install-dvi-recursive + +install-dvi-am: + +install-exec-am: + +install-html: install-html-recursive + +install-html-am: + +install-info: install-info-recursive + +install-info-am: + +install-man: + +install-pdf: install-pdf-recursive + +install-pdf-am: + +install-ps: install-ps-recursive + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-recursive + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-recursive + +mostlyclean-am: mostlyclean-generic mostlyclean-libtool + +pdf: pdf-recursive + +pdf-am: + +ps: ps-recursive + +ps-am: + +uninstall-am: uninstall-pkgconfigDATA uninstall-versincludeHEADERS + +.MAKE: $(am__recursive_targets) install-am install-strip + +.PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am check \ + check-am clean clean-generic clean-libtool cscopelist-am ctags \ + ctags-am distclean distclean-generic distclean-libtool \ + distclean-tags distdir dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-html \ + install-html-am install-info install-info-am install-man \ + install-pdf install-pdf-am install-pkgconfigDATA install-ps \ + install-ps-am install-strip install-versincludeHEADERS \ + installcheck installcheck-am installdirs installdirs-am \ + maintainer-clean maintainer-clean-generic mostlyclean \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags tags-am uninstall uninstall-am uninstall-pkgconfigDATA \ + uninstall-versincludeHEADERS + +.PRECIOUS: Makefile + + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +recheck: + RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i recheck || RET=1 ; \ + done ; \ + exit $$RET + +showcheckfailed: + @RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i showcheckfailed || RET=1 ; \ + done ; \ + exit $$RET + +showfailed: + @RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -s -C $$i showfailed || RET=1 ; \ + done ; \ + exit $$RET + +showcheck: + RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i showcheck || RET=1 ; \ + done ; \ + exit $$RET + +showsuite: + RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i showsuite || RET=1 ; \ + done ; \ + exit $$RET + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/mpi/dev/starpu_mpi_comm_check.sh b/mpi/dev/starpu_mpi_comm_check.sh new file mode 100755 index 0000000..8e5d6ad --- /dev/null +++ b/mpi/dev/starpu_mpi_comm_check.sh @@ -0,0 +1,108 @@ +#!/bin/bash +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2017-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +# Script to check MPI communications are done properly +# The application should be launched with STARPU_MPI_COMM=1 +# e.g +# $ export STARPU_MPI_COMM=1 +# $ mpirun --output-filename starpu_mpi.log appli parameters +# and then the script can be launched with the output files +# $ starpu_mpi_comm_check.sh starpu_mpi.log.* + +if test -z "$1" +then + echo Syntax error: parameter missing + exit 1 +fi + +# Get the nodes identifiers +nodes=$(for f in $* + do + grep starpu_mpi $f | grep '\[' | awk '{print $1}'| sed 's/\[\(.*\)\]\[starpu_mpi\]/\1/' | grep "^[[:digit:]]*$" + done |sort|uniq + ) +echo nodes $nodes + +DIR=/tmp + +# for each node, extract send and receive communications +for node in $nodes +do + for f in $* + do + grep starpu_mpi $f |grep "\[$node" + done > $DIR/starpu_mpi_node$node.log + grep -- "-->" $DIR/starpu_mpi_node$node.log > $DIR/starpu_mpi_node${node}_send.log + grep -- "<--" $DIR/starpu_mpi_node$node.log > $DIR/starpu_mpi_node${node}_recv.log +done + +# count the number of traced lines +#for node in $nodes +#do +# wc -l $DIR/starpu_mpi_node${node}_recv.log +# lines=$(grep :42:42 $DIR/starpu_mpi_node${node}_recv.log | wc -l) +# lines2=$(( lines + lines )) +# echo $lines2 +# lines3=$(( lines2 + lines )) +# echo $lines3 +#done + +# for each pair of nodes, check tags are sent and received in the same order +for src in $nodes +do + for dst in $nodes + do + if test $src != $dst + then + grep ":$dst:42:" $DIR/starpu_mpi_node${src}_send.log| awk -F':' '{print $6}' > $DIR/node${src}_send_to_${dst}.log + grep ":$src:42:" $DIR/starpu_mpi_node${dst}_recv.log|awk -F ':' '{print $6}'> $DIR/node${dst}_recv_from_${src}.log + diff --side-by-side --suppress-common-lines $DIR/node${src}_send_to_${dst}.log $DIR/node${dst}_recv_from_${src}.log > $DIR/check_$$ + if test -s $DIR/check_$$ + then + echo $src $dst + less $DIR/check_$$ + fi + fi + done +done + +# check each envelope reception is followed by the appropriate data reception +# first line: MPI_Recv of the envelope +# second line: display envelope information +# third line: MPI_Recv of the data +for node in $nodes +do + echo processing $DIR/starpu_mpi_node${node}_recv.log + ( + while read line + do + read line2 + read line3 + #echo processing + tag2=$(echo $line2 | awk -F ':' '{print $6}') + tag3=$(echo $line3 | awk -F ':' '{print $6}') + if test "$tag2" != "$tag3" + then + echo erreur + echo $tag2 $tag3 + echo $line + echo $line2 + echo $line3 + fi + done + ) < $DIR/starpu_mpi_node${node}_recv.log +done + diff --git a/mpi/examples/Makefile.am b/mpi/examples/Makefile.am new file mode 100644 index 0000000..c2eaf10 --- /dev/null +++ b/mpi/examples/Makefile.am @@ -0,0 +1,630 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# Copyright (C) 2013-2013 Thibaut Lambert +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +include $(top_srcdir)/make/starpu-tests.mk + +CC=$(MPICC) +CCLD=$(MPICC) +FC=$(MPIFORT) +FCLD=$(MPIFORT) + +include $(top_srcdir)/make/starpu-loader.mk + +LAUNCHER = $(STARPU_MPIEXEC) +LAUNCHER_ENV = $(MPI_RUN_ENV) + +if STARPU_SIMGRID +LOADER_BIN = $(LAUNCHER) +endif + +if STARPU_MPI_CHECK +TESTS = $(starpu_mpi_EXAMPLES) +endif + +check_PROGRAMS = $(LOADER) $(starpu_mpi_EXAMPLES) +starpu_mpi_EXAMPLES = + +BUILT_SOURCES = + +CLEANFILES = *.gcno *.gcda *.linkinfo starpu_idle_microsec.log *.mod native_fortran/fstarpu_mod.f90 native_fortran/fstarpu_mpi_mod.f90 + +EXTRA_DIST = \ + mpi_lu/mpi_lu-float.h \ + mpi_lu/mpi_lu-double.h \ + mpi_lu/plu_example.c \ + mpi_lu/plu_implicit_example.c \ + mpi_lu/plu_outofcore_example.c \ + mpi_lu/plu_solve.c \ + mpi_lu/pxlu.h \ + mpi_lu/pxlu.c \ + mpi_lu/pxlu_implicit.c \ + mpi_lu/pxlu_kernels.h \ + mpi_lu/pxlu_kernels.c \ + matrix_mult/mm_2dbc.c \ + native_fortran/nf_mm_2dbc.f90 \ + matrix_decomposition/mpi_cholesky.h \ + matrix_decomposition/mpi_cholesky_codelets.h \ + matrix_decomposition/mpi_cholesky_kernels.h \ + matrix_decomposition/mpi_cholesky_models.h \ + matrix_decomposition/mpi_decomposition_params.h \ + matrix_decomposition/mpi_decomposition_matrix.h \ + user_datatype/my_interface.h \ + benchs/abstract_sendrecv_bench.h\ + benchs/bench_helper.h \ + benchs/gemm_helper.h \ + benchs/burst_helper.h \ + helper.h \ + perf.sh + +examplebindir = $(libdir)/starpu/mpi + +examplebin_PROGRAMS = + +AM_CFLAGS += $(MAGMA_CFLAGS) $(APP_CFLAGS) +AM_CXXFLAGS += $(MAGMA_CFLAGS) $(APP_CXXFLAGS) +AM_FFLAGS += $(MAGMA_CFLAGS) $(APP_FFLAGS) +AM_FCFLAGS += $(MAGMA_CFLAGS) $(APP_FCFLAGS) +AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_srcdir)/examples/ -I$(top_builddir)/include -I$(top_srcdir)/mpi/include $(STARPU_H_CPPFLAGS) +AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@ +LIBS += $(top_builddir)/src/@LIBSTARPU_LINK@ ../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la $(STARPU_EXPORTED_LIBS) +LIBS += $(STARPU_CUDA_LDFLAGS) +LIBS += -lm $(MAGMA_LIBS) + +################### +# Stencil example # +################### +examplebin_PROGRAMS += \ + stencil/stencil5 +starpu_mpi_EXAMPLES += \ + stencil/stencil5 + +if STARPU_USE_MPI_MPI +examplebin_PROGRAMS += \ + stencil/stencil5_lb +starpu_mpi_EXAMPLES += \ + stencil/stencil5_lb +endif + +################## +# Cache examples # +################## +examplebin_PROGRAMS += \ + cache/cache \ + cache/cache_disable +starpu_mpi_EXAMPLES += \ + cache/cache \ + cache/cache_disable + + +################## +# MPI LU example # +################## + +if !STARPU_NO_BLAS_LIB + +examplebin_PROGRAMS += \ + mpi_lu/plu_example_float \ + mpi_lu/plu_example_double \ + mpi_lu/plu_implicit_example_float \ + mpi_lu/plu_implicit_example_double \ + mpi_lu/plu_outofcore_example_float \ + mpi_lu/plu_outofcore_example_double + +if !STARPU_SIMGRID +starpu_mpi_EXAMPLES += \ + mpi_lu/plu_example_float \ + mpi_lu/plu_example_double \ + mpi_lu/plu_implicit_example_float \ + mpi_lu/plu_implicit_example_double \ + mpi_lu/plu_outofcore_example_float \ + mpi_lu/plu_outofcore_example_double +endif + +mpi_lu_plu_example_float_LDADD = \ + $(STARPU_LIBNUMA_LDFLAGS) \ + $(STARPU_BLAS_LDFLAGS) -lm + +mpi_lu_plu_example_float_SOURCES = \ + mpi_lu/plu_example_float.c \ + mpi_lu/plu_solve_float.c \ + mpi_lu/pslu_kernels.c \ + mpi_lu/pslu.c \ + ../../examples/common/blas.c + +mpi_lu_plu_example_double_LDADD = \ + $(STARPU_LIBNUMA_LDFLAGS) \ + $(STARPU_BLAS_LDFLAGS) -lm + +mpi_lu_plu_example_double_SOURCES = \ + mpi_lu/plu_example_double.c \ + mpi_lu/plu_solve_double.c \ + mpi_lu/pdlu_kernels.c \ + mpi_lu/pdlu.c \ + ../../examples/common/blas.c + +mpi_lu_plu_implicit_example_float_LDADD = \ + $(STARPU_LIBNUMA_LDFLAGS) \ + $(STARPU_BLAS_LDFLAGS) -lm + +mpi_lu_plu_implicit_example_float_SOURCES = \ + mpi_lu/plu_implicit_example_float.c \ + mpi_lu/plu_solve_float.c \ + mpi_lu/pslu_kernels.c \ + mpi_lu/pslu_implicit.c \ + ../../examples/common/blas.c + +mpi_lu_plu_implicit_example_double_LDADD = \ + $(STARPU_LIBNUMA_LDFLAGS) \ + $(STARPU_BLAS_LDFLAGS) -lm + +mpi_lu_plu_implicit_example_double_SOURCES = \ + mpi_lu/plu_implicit_example_double.c \ + mpi_lu/plu_solve_double.c \ + mpi_lu/pdlu_kernels.c \ + mpi_lu/pdlu_implicit.c \ + ../../examples/common/blas.c + +mpi_lu_plu_outofcore_example_float_LDADD = \ + $(STARPU_LIBNUMA_LDFLAGS) \ + $(STARPU_BLAS_LDFLAGS) -lm + +mpi_lu_plu_outofcore_example_float_SOURCES = \ + mpi_lu/plu_outofcore_example_float.c \ + mpi_lu/plu_solve_float.c \ + mpi_lu/pslu_kernels.c \ + mpi_lu/pslu_implicit.c \ + ../../examples/common/blas.c + +mpi_lu_plu_outofcore_example_double_LDADD = \ + $(STARPU_LIBNUMA_LDFLAGS) \ + $(STARPU_BLAS_LDFLAGS) -lm + +mpi_lu_plu_outofcore_example_double_SOURCES = \ + mpi_lu/plu_outofcore_example_double.c \ + mpi_lu/plu_solve_double.c \ + mpi_lu/pdlu_kernels.c \ + mpi_lu/pdlu_implicit.c \ + ../../examples/common/blas.c +endif + +######################## +# MPI Cholesky example # +######################## + +if !STARPU_NO_BLAS_LIB +examplebin_PROGRAMS += \ + matrix_decomposition/mpi_cholesky \ + matrix_decomposition/mpi_cholesky_distributed + +matrix_decomposition_mpi_cholesky_SOURCES = \ + matrix_decomposition/mpi_cholesky.c \ + matrix_decomposition/mpi_cholesky_models.c \ + matrix_decomposition/mpi_cholesky_kernels.c \ + matrix_decomposition/mpi_cholesky_codelets.c \ + matrix_decomposition/mpi_decomposition_params.c \ + matrix_decomposition/mpi_decomposition_matrix.c \ + ../../examples/common/blas.c + +matrix_decomposition_mpi_cholesky_LDADD = \ + $(STARPU_BLAS_LDFLAGS) -lm + +matrix_decomposition_mpi_cholesky_distributed_SOURCES = \ + matrix_decomposition/mpi_cholesky_distributed.c \ + matrix_decomposition/mpi_cholesky_models.c \ + matrix_decomposition/mpi_cholesky_kernels.c \ + matrix_decomposition/mpi_cholesky_codelets.c \ + matrix_decomposition/mpi_decomposition_params.c \ + matrix_decomposition/mpi_decomposition_matrix.c \ + ../../examples/common/blas.c + +matrix_decomposition_mpi_cholesky_distributed_LDADD = \ + $(STARPU_BLAS_LDFLAGS) -lm + +starpu_mpi_EXAMPLES += \ + matrix_decomposition/mpi_cholesky \ + matrix_decomposition/mpi_cholesky_distributed +endif + +############## +# CG example # +############## + +if !STARPU_SIMGRID +if !STARPU_NO_BLAS_LIB +examplebin_PROGRAMS += cg/cg + +cg_cg_SOURCES = \ + cg/cg.c \ + ../../examples/common/blas.c + +cg_cg_LDADD = \ + $(STARPU_BLAS_LDFLAGS) +endif +endif + +############################ +# MPI Matrix mult examples # +############################ + +examplebin_PROGRAMS += \ + matrix_mult/mm + +matrix_mult_mm_LDADD = \ + -lm + +if !STARPU_SIMGRID +starpu_mpi_EXAMPLES += \ + matrix_mult/mm +endif + +if !STARPU_NO_BLAS_LIB +examplebin_PROGRAMS += \ + matrix_mult/mm_2dbc + +matrix_mult_mm_2dbc_SOURCES = \ + matrix_mult/mm_2dbc.c \ + ../../examples/common/blas.c + +matrix_mult_mm_2dbc_LDADD = \ + $(STARPU_LIBNUMA_LDFLAGS) \ + $(STARPU_BLAS_LDFLAGS) -lm + +if !STARPU_SIMGRID +starpu_mpi_EXAMPLES += \ + matrix_mult/mm_2dbc +endif +endif + +######################## +# MPI STARPU_MPI_REDUX # +######################## + +examplebin_PROGRAMS += \ + mpi_redux/mpi_redux \ + mpi_redux/mpi_redux_autowrapup \ + mpi_redux/mpi_redux_tree + +mpi_redux_mpi_redux_LDADD = \ + -lm +mpi_redux_mpi_redux_autowrapup_LDADD = \ + -lm +mpi_redux_mpi_redux_tree_LDADD = \ + -lm +if !STARPU_SIMGRID +starpu_mpi_EXAMPLES += \ + mpi_redux/mpi_redux \ + mpi_redux/mpi_redux_autowrapup \ + mpi_redux/mpi_redux_tree +endif + +########################################## +# Native Fortran MPI Matrix mult example # +########################################## + +if STARPU_HAVE_MPIFORT +if !STARPU_SANITIZE +examplebin_PROGRAMS += \ + native_fortran/nf_mm \ + native_fortran/nf_mm_task_build \ + native_fortran/nf_basic_ring + +native_fortran_nf_mm_SOURCES = \ + native_fortran/nf_mm_cl.f90 \ + native_fortran/fstarpu_mpi_mod.f90 \ + native_fortran/fstarpu_mod.f90 \ + native_fortran/nf_mm.f90 + +native_fortran_nf_mm_LDADD = \ + -lm + +native_fortran_nf_mm_task_build_SOURCES = \ + native_fortran/nf_mm_cl.f90 \ + native_fortran/fstarpu_mpi_mod.f90 \ + native_fortran/fstarpu_mod.f90 \ + native_fortran/nf_mm_task_build.f90 + +native_fortran_nf_mm_task_build_LDADD = \ + -lm + +native_fortran_nf_basic_ring_SOURCES = \ + native_fortran/fstarpu_mpi_mod.f90 \ + native_fortran/fstarpu_mod.f90 \ + native_fortran/nf_basic_ring.f90 + +native_fortran_nf_basic_ring_LDADD = \ + -lm + +if !STARPU_SIMGRID +starpu_mpi_EXAMPLES += \ + native_fortran/nf_mm \ + native_fortran/nf_mm_task_build \ + native_fortran/nf_basic_ring +endif + +if !STARPU_NO_BLAS_LIB +if STARPU_HAVE_LIBLAPACK +examplebin_PROGRAMS += \ + native_fortran/nf_mm_2dbc + +native_fortran_nf_mm_2dbc_SOURCES = \ + native_fortran/nf_mm_cl_blas.f90 \ + native_fortran/fstarpu_mpi_mod.f90 \ + native_fortran/fstarpu_mod.f90 \ + native_fortran/nf_mm_2dbc.f90 + +native_fortran_nf_mm_2dbc_LDADD = \ + $(STARPU_LIBNUMA_LDFLAGS) \ + $(STARPU_BLAS_LDFLAGS) -lm -llapack + +if !STARPU_SIMGRID +starpu_mpi_EXAMPLES += \ + native_fortran/nf_mm_2dbc +endif +endif +endif + +endif +endif + +######################################## +# Native Fortran MPI STARPU_REDUX test # +######################################## + +if STARPU_HAVE_MPIFORT +if !STARPU_SANITIZE +examplebin_PROGRAMS += \ + native_fortran/nf_mpi_redux \ + native_fortran/nf_mpi_redux_tree + +native_fortran_nf_mpi_redux_SOURCES = \ + native_fortran/fstarpu_mpi_mod.f90 \ + native_fortran/fstarpu_mod.f90 \ + native_fortran/nf_mpi_redux.f90 + +native_fortran_nf_mpi_redux_LDADD = \ + -lm + +native_fortran_nf_mpi_redux_tree_SOURCES = \ + native_fortran/fstarpu_mpi_mod.f90 \ + native_fortran/fstarpu_mod.f90 \ + native_fortran/nf_mpi_redux_tree.f90 + +native_fortran_nf_mpi_redux_tree_LDADD = \ + -lm + +if !STARPU_SIMGRID +starpu_mpi_EXAMPLES += \ + native_fortran/nf_mpi_redux \ + native_fortran/nf_mpi_redux_tree +endif +endif +endif + +######################################## +# Native Fortran MPI STARPU_REDUX test # +######################################## + +if STARPU_HAVE_MPIFORT +if !STARPU_SANITIZE +examplebin_PROGRAMS += \ + native_fortran/nf_redux_test + +native_fortran_nf_redux_test_SOURCES = \ + native_fortran/fstarpu_mpi_mod.f90 \ + native_fortran/fstarpu_mod.f90 \ + native_fortran/nf_redux_test.f90 + +native_fortran_nf_redux_test_LDADD = \ + -lm + +if !STARPU_SIMGRID +starpu_mpi_EXAMPLES += \ + native_fortran/nf_redux_test +endif +endif +endif + + +################### +# complex example # +################### + +examplebin_PROGRAMS += \ + complex/mpi_complex + +complex_mpi_complex_SOURCES = \ + complex/mpi_complex.c \ + ../../examples/interface/complex_interface.c + +starpu_mpi_EXAMPLES += \ + complex/mpi_complex + +######################### +# user_datatype example # +######################### + +examplebin_PROGRAMS += \ + user_datatype/user_datatype \ + user_datatype/user_datatype2 \ + user_datatype/user_datatype_early \ + user_datatype/user_datatype_interface + +user_datatype_user_datatype_SOURCES = \ + user_datatype/user_datatype.c \ + user_datatype/my_interface.c + +user_datatype_user_datatype2_SOURCES = \ + user_datatype/user_datatype2.c \ + user_datatype/my_interface.c + +user_datatype_user_datatype_early_SOURCES = \ + user_datatype/user_datatype_early.c \ + user_datatype/my_interface.c + +user_datatype_user_datatype_interface_SOURCES = \ + user_datatype/user_datatype_interface.c \ + user_datatype/my_interface.c + +if !STARPU_SIMGRID +starpu_mpi_EXAMPLES += \ + user_datatype/user_datatype2 \ + user_datatype/user_datatype_early \ + user_datatype/user_datatype \ + user_datatype/user_datatype_interface +endif + +################### +# comm example # +################### + +examplebin_PROGRAMS += \ + comm/comm \ + comm/mix_comm \ + comm/group + +if !STARPU_SIMGRID +starpu_mpi_EXAMPLES += \ + comm/comm \ + comm/mix_comm \ + comm/group +endif + +################## +# filter example # +################## + +examplebin_PROGRAMS += \ + filters/filter + +if !STARPU_SIMGRID +starpu_mpi_EXAMPLES += \ + filters/filter +endif + +# Native Fortran example + +# - link over source file to build our own object +native_fortran/fstarpu_mod.f90: + @$(MKDIR_P) $(dir $@) + $(V_ln) $(LN_S) $(abs_top_srcdir)/include/$(notdir $@) $@ +native_fortran/fstarpu_mpi_mod.f90: + @$(MKDIR_P) $(dir $@) + $(V_ln) $(LN_S) $(abs_top_srcdir)/mpi/include/$(notdir $@) $@ + +if STARPU_HAVE_MPIFORT +if !STARPU_SANITIZE +# - express the creation of .mod along .o +fstarpu_mod.mod: native_fortran/fstarpu_mod.o +fstarpu_mpi_mod.mod: native_fortran/fstarpu_mpi_mod.o +nf_mm_cl.mod: native_fortran/nf_mm_cl.o +nf_mm_cl_blas.mod: native_fortran/nf_mm_cl_blas.o + +# - list explicit dependences to control proper module files dependencies +native_fortran/fstarpu_mpi_mod.o: fstarpu_mod.mod +native_fortran/nf_mm_cl.o: fstarpu_mod.mod fstarpu_mpi_mod.mod +native_fortran/nf_mm_cl_blas.o: fstarpu_mod.mod fstarpu_mpi_mod.mod +native_fortran/nf_mm.o: nf_mm_cl.mod fstarpu_mpi_mod.mod fstarpu_mod.mod +native_fortran/nf_mm_2dbc.o: nf_mm_cl.mod nf_mm_cl_blas.mod fstarpu_mpi_mod.mod fstarpu_mod.mod +native_fortran/nf_mm_task_build.o: nf_mm_cl.mod fstarpu_mpi_mod.mod fstarpu_mod.mod +native_fortran/nf_basic_ring.o: fstarpu_mpi_mod.mod fstarpu_mod.mod +native_fortran/nf_redux_test.o: fstarpu_mpi_mod.mod fstarpu_mod.mod +native_fortran/nf_mpi_redux.o: fstarpu_mpi_mod.mod fstarpu_mod.mod +native_fortran/nf_mpi_redux_tree.o: fstarpu_mpi_mod.mod fstarpu_mod.mod +endif +endif + + +########## +# benchs # +########## + +examplebin_PROGRAMS += \ + benchs/sendrecv_bench \ + benchs/burst + +if !STARPU_USE_MPI_MPI +examplebin_PROGRAMS += \ + benchs/sendrecv_parallel_tasks_bench +endif + +if !STARPU_NO_BLAS_LIB +examplebin_PROGRAMS += \ + benchs/sendrecv_gemm_bench \ + benchs/burst_gemm +endif + +if !STARPU_SIMGRID +starpu_mpi_EXAMPLES += \ + benchs/sendrecv_bench \ + benchs/burst + +if STARPU_MPI_SYNC_CLOCKS +examplebin_PROGRAMS += \ + benchs/bcast_bench \ + benchs/recv_wait_finalize_bench + +starpu_mpi_EXAMPLES += \ + benchs/bcast_bench \ + benchs/recv_wait_finalize_bench +endif + +if !STARPU_USE_MPI_MPI +starpu_mpi_EXAMPLES += \ + benchs/sendrecv_parallel_tasks_bench +endif + +if !STARPU_NO_BLAS_LIB +starpu_mpi_EXAMPLES += \ + benchs/sendrecv_gemm_bench \ + benchs/burst_gemm +endif +endif + +benchs_sendrecv_bench_SOURCES = benchs/sendrecv_bench.c +benchs_sendrecv_bench_SOURCES += benchs/bench_helper.c +benchs_sendrecv_bench_SOURCES += benchs/abstract_sendrecv_bench.c + +benchs_bcast_bench_SOURCES = benchs/bcast_bench.c +benchs_bcast_bench_SOURCES += benchs/bench_helper.c +benchs_bcast_bench_LDADD = $(MPI_SYNC_CLOCKS_LIBS) +benchs_bcast_bench_CFLAGS = $(MPI_SYNC_CLOCKS_CFLAGS) + +benchs_recv_wait_finalize_bench_SOURCES = benchs/recv_wait_finalize_bench.c +benchs_recv_wait_finalize_bench_LDADD = $(MPI_SYNC_CLOCKS_LIBS) +benchs_recv_wait_finalize_bench_CFLAGS = $(MPI_SYNC_CLOCKS_CFLAGS) + +benchs_sendrecv_parallel_tasks_bench_SOURCES = benchs/sendrecv_parallel_tasks_bench.c +benchs_sendrecv_parallel_tasks_bench_SOURCES += benchs/bench_helper.c + +benchs_burst_SOURCES = benchs/burst.c +benchs_burst_SOURCES += benchs/burst_helper.c + +if !STARPU_NO_BLAS_LIB +benchs_sendrecv_gemm_bench_SOURCES = benchs/sendrecv_gemm_bench.c +benchs_sendrecv_gemm_bench_SOURCES += benchs/bench_helper.c +benchs_sendrecv_gemm_bench_SOURCES += benchs/gemm_helper.c +benchs_sendrecv_gemm_bench_SOURCES += benchs/abstract_sendrecv_bench.c +benchs_sendrecv_gemm_bench_SOURCES += ../../examples/common/blas.c + +benchs_sendrecv_gemm_bench_LDADD = $(STARPU_BLAS_LDFLAGS) + +benchs_burst_gemm_SOURCES = benchs/burst_gemm.c +benchs_burst_gemm_SOURCES += benchs/gemm_helper.c +benchs_burst_gemm_SOURCES += benchs/burst_helper.c +benchs_burst_gemm_SOURCES += ../../examples/common/blas.c + +benchs_burst_gemm_LDADD = $(STARPU_BLAS_LDFLAGS) +endif diff --git a/mpi/examples/Makefile.in b/mpi/examples/Makefile.in new file mode 100644 index 0000000..c06256f --- /dev/null +++ b/mpi/examples/Makefile.in @@ -0,0 +1,3407 @@ +# Makefile.in generated by automake 1.16.5 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2021 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +target_triplet = @target@ +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@am__append_1 = --compiler-options -fno-strict-aliasing -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ $(STARPU_NVCC_H_CPPFLAGS) +@STARPU_USE_HIP_TRUE@am__append_2 = -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ +noinst_PROGRAMS = $(am__EXEEXT_23) +# Make tests run through mpiexec +@STARPU_USE_MPI_MASTER_SLAVE_TRUE@am__append_3 = $(abs_top_srcdir)/tools/starpu_msexec +@STARPU_USE_MPI_MASTER_SLAVE_TRUE@am__append_4 = $(MPI_RUN_ENV) STARPU_NMPIMSTHREADS=4 +@STARPU_USE_TCPIP_MASTER_SLAVE_TRUE@am__append_5 = $(abs_top_srcdir)/tools/starpu_msexec +# switch off local socket usage +#MS_LAUNCHER = $(abs_top_builddir)/tools/starpu_tcpipexec -np 2 -nobind -ncpus 1 -nolocal +@STARPU_USE_TCPIP_MASTER_SLAVE_TRUE@am__append_6 = STARPU_RESERVE_NCPU=2 +@STARPU_HAVE_WINDOWS_FALSE@am__append_7 = loader +@STARPU_MPI_CHECK_TRUE@TESTS = $(am__EXEEXT_14) +check_PROGRAMS = $(am__EXEEXT_14) +examplebin_PROGRAMS = stencil/stencil5$(EXEEXT) $(am__EXEEXT_1) \ + cache/cache$(EXEEXT) cache/cache_disable$(EXEEXT) \ + $(am__EXEEXT_15) $(am__EXEEXT_16) matrix_mult/mm$(EXEEXT) \ + $(am__EXEEXT_17) mpi_redux/mpi_redux$(EXEEXT) \ + mpi_redux/mpi_redux_autowrapup$(EXEEXT) \ + mpi_redux/mpi_redux_tree$(EXEEXT) $(am__EXEEXT_18) \ + $(am__EXEEXT_19) $(am__EXEEXT_20) complex/mpi_complex$(EXEEXT) \ + user_datatype/user_datatype$(EXEEXT) \ + user_datatype/user_datatype2$(EXEEXT) \ + user_datatype/user_datatype_early$(EXEEXT) \ + user_datatype/user_datatype_interface$(EXEEXT) \ + comm/comm$(EXEEXT) comm/mix_comm$(EXEEXT) comm/group$(EXEEXT) \ + filters/filter$(EXEEXT) benchs/sendrecv_bench$(EXEEXT) \ + benchs/burst$(EXEEXT) $(am__EXEEXT_21) $(am__EXEEXT_22) \ + $(am__EXEEXT_11) +@STARPU_USE_MPI_MPI_TRUE@am__append_8 = \ +@STARPU_USE_MPI_MPI_TRUE@ stencil/stencil5_lb + +@STARPU_USE_MPI_MPI_TRUE@am__append_9 = \ +@STARPU_USE_MPI_MPI_TRUE@ stencil/stencil5_lb + + +################## +# MPI LU example # +################## + +######################## +# MPI Cholesky example # +######################## +@STARPU_NO_BLAS_LIB_FALSE@am__append_10 = mpi_lu/plu_example_float \ +@STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/plu_example_double \ +@STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/plu_implicit_example_float \ +@STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/plu_implicit_example_double \ +@STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/plu_outofcore_example_float \ +@STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/plu_outofcore_example_double \ +@STARPU_NO_BLAS_LIB_FALSE@ matrix_decomposition/mpi_cholesky \ +@STARPU_NO_BLAS_LIB_FALSE@ matrix_decomposition/mpi_cholesky_distributed +@STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@am__append_11 = \ +@STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@ mpi_lu/plu_example_float \ +@STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@ mpi_lu/plu_example_double \ +@STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@ mpi_lu/plu_implicit_example_float \ +@STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@ mpi_lu/plu_implicit_example_double \ +@STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@ mpi_lu/plu_outofcore_example_float \ +@STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@ mpi_lu/plu_outofcore_example_double + +@STARPU_NO_BLAS_LIB_FALSE@am__append_12 = \ +@STARPU_NO_BLAS_LIB_FALSE@ matrix_decomposition/mpi_cholesky \ +@STARPU_NO_BLAS_LIB_FALSE@ matrix_decomposition/mpi_cholesky_distributed + + +############## +# CG example # +############## +@STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@am__append_13 = cg/cg +@STARPU_SIMGRID_FALSE@am__append_14 = \ +@STARPU_SIMGRID_FALSE@ matrix_mult/mm + +@STARPU_NO_BLAS_LIB_FALSE@am__append_15 = \ +@STARPU_NO_BLAS_LIB_FALSE@ matrix_mult/mm_2dbc + +@STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@am__append_16 = \ +@STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@ matrix_mult/mm_2dbc + +@STARPU_SIMGRID_FALSE@am__append_17 = \ +@STARPU_SIMGRID_FALSE@ mpi_redux/mpi_redux \ +@STARPU_SIMGRID_FALSE@ mpi_redux/mpi_redux_autowrapup \ +@STARPU_SIMGRID_FALSE@ mpi_redux/mpi_redux_tree + + +########################################## +# Native Fortran MPI Matrix mult example # +########################################## +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@am__append_18 = \ +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ native_fortran/nf_mm \ +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ native_fortran/nf_mm_task_build \ +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ native_fortran/nf_basic_ring + +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@@STARPU_SIMGRID_FALSE@am__append_19 = \ +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@@STARPU_SIMGRID_FALSE@ native_fortran/nf_mm \ +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@@STARPU_SIMGRID_FALSE@ native_fortran/nf_mm_task_build \ +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@@STARPU_SIMGRID_FALSE@ native_fortran/nf_basic_ring + +@STARPU_HAVE_LIBLAPACK_TRUE@@STARPU_HAVE_MPIFORT_TRUE@@STARPU_NO_BLAS_LIB_FALSE@@STARPU_SANITIZE_FALSE@am__append_20 = \ +@STARPU_HAVE_LIBLAPACK_TRUE@@STARPU_HAVE_MPIFORT_TRUE@@STARPU_NO_BLAS_LIB_FALSE@@STARPU_SANITIZE_FALSE@ native_fortran/nf_mm_2dbc + +@STARPU_HAVE_LIBLAPACK_TRUE@@STARPU_HAVE_MPIFORT_TRUE@@STARPU_NO_BLAS_LIB_FALSE@@STARPU_SANITIZE_FALSE@@STARPU_SIMGRID_FALSE@am__append_21 = \ +@STARPU_HAVE_LIBLAPACK_TRUE@@STARPU_HAVE_MPIFORT_TRUE@@STARPU_NO_BLAS_LIB_FALSE@@STARPU_SANITIZE_FALSE@@STARPU_SIMGRID_FALSE@ native_fortran/nf_mm_2dbc + + +######################################## +# Native Fortran MPI STARPU_REDUX test # +######################################## + +######################################## +# Native Fortran MPI STARPU_REDUX test # +######################################## +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@am__append_22 = native_fortran/nf_mpi_redux \ +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ native_fortran/nf_mpi_redux_tree \ +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ native_fortran/nf_redux_test +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@@STARPU_SIMGRID_FALSE@am__append_23 = native_fortran/nf_mpi_redux \ +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@@STARPU_SIMGRID_FALSE@ native_fortran/nf_mpi_redux_tree \ +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@@STARPU_SIMGRID_FALSE@ native_fortran/nf_redux_test +@STARPU_SIMGRID_FALSE@am__append_24 = user_datatype/user_datatype2 \ +@STARPU_SIMGRID_FALSE@ user_datatype/user_datatype_early \ +@STARPU_SIMGRID_FALSE@ user_datatype/user_datatype \ +@STARPU_SIMGRID_FALSE@ user_datatype/user_datatype_interface \ +@STARPU_SIMGRID_FALSE@ comm/comm comm/mix_comm comm/group \ +@STARPU_SIMGRID_FALSE@ filters/filter benchs/sendrecv_bench \ +@STARPU_SIMGRID_FALSE@ benchs/burst +@STARPU_USE_MPI_MPI_FALSE@am__append_25 = \ +@STARPU_USE_MPI_MPI_FALSE@ benchs/sendrecv_parallel_tasks_bench + +@STARPU_NO_BLAS_LIB_FALSE@am__append_26 = \ +@STARPU_NO_BLAS_LIB_FALSE@ benchs/sendrecv_gemm_bench \ +@STARPU_NO_BLAS_LIB_FALSE@ benchs/burst_gemm + +@STARPU_MPI_SYNC_CLOCKS_TRUE@@STARPU_SIMGRID_FALSE@am__append_27 = \ +@STARPU_MPI_SYNC_CLOCKS_TRUE@@STARPU_SIMGRID_FALSE@ benchs/bcast_bench \ +@STARPU_MPI_SYNC_CLOCKS_TRUE@@STARPU_SIMGRID_FALSE@ benchs/recv_wait_finalize_bench + +@STARPU_MPI_SYNC_CLOCKS_TRUE@@STARPU_SIMGRID_FALSE@am__append_28 = \ +@STARPU_MPI_SYNC_CLOCKS_TRUE@@STARPU_SIMGRID_FALSE@ benchs/bcast_bench \ +@STARPU_MPI_SYNC_CLOCKS_TRUE@@STARPU_SIMGRID_FALSE@ benchs/recv_wait_finalize_bench + +@STARPU_SIMGRID_FALSE@@STARPU_USE_MPI_MPI_FALSE@am__append_29 = \ +@STARPU_SIMGRID_FALSE@@STARPU_USE_MPI_MPI_FALSE@ benchs/sendrecv_parallel_tasks_bench + +@STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@am__append_30 = \ +@STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@ benchs/sendrecv_gemm_bench \ +@STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@ benchs/burst_gemm + +subdir = mpi/examples +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ + $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ + $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ + $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/src/common/config.h \ + $(top_builddir)/src/common/config-src-build.h \ + $(top_builddir)/include/starpu_config.h \ + $(top_builddir)/starpurm/include/starpurm_config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +@STARPU_USE_MPI_MPI_TRUE@am__EXEEXT_1 = stencil/stencil5_lb$(EXEEXT) +@STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@am__EXEEXT_2 = mpi_lu/plu_example_float$(EXEEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@ mpi_lu/plu_example_double$(EXEEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@ mpi_lu/plu_implicit_example_float$(EXEEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@ mpi_lu/plu_implicit_example_double$(EXEEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@ mpi_lu/plu_outofcore_example_float$(EXEEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@ mpi_lu/plu_outofcore_example_double$(EXEEXT) +@STARPU_NO_BLAS_LIB_FALSE@am__EXEEXT_3 = matrix_decomposition/mpi_cholesky$(EXEEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ matrix_decomposition/mpi_cholesky_distributed$(EXEEXT) +@STARPU_SIMGRID_FALSE@am__EXEEXT_4 = matrix_mult/mm$(EXEEXT) +@STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@am__EXEEXT_5 = matrix_mult/mm_2dbc$(EXEEXT) +@STARPU_SIMGRID_FALSE@am__EXEEXT_6 = mpi_redux/mpi_redux$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ mpi_redux/mpi_redux_autowrapup$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ mpi_redux/mpi_redux_tree$(EXEEXT) +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@@STARPU_SIMGRID_FALSE@am__EXEEXT_7 = native_fortran/nf_mm$(EXEEXT) \ +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@@STARPU_SIMGRID_FALSE@ native_fortran/nf_mm_task_build$(EXEEXT) \ +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@@STARPU_SIMGRID_FALSE@ native_fortran/nf_basic_ring$(EXEEXT) +@STARPU_HAVE_LIBLAPACK_TRUE@@STARPU_HAVE_MPIFORT_TRUE@@STARPU_NO_BLAS_LIB_FALSE@@STARPU_SANITIZE_FALSE@@STARPU_SIMGRID_FALSE@am__EXEEXT_8 = native_fortran/nf_mm_2dbc$(EXEEXT) +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@@STARPU_SIMGRID_FALSE@am__EXEEXT_9 = native_fortran/nf_mpi_redux$(EXEEXT) \ +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@@STARPU_SIMGRID_FALSE@ native_fortran/nf_mpi_redux_tree$(EXEEXT) \ +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@@STARPU_SIMGRID_FALSE@ native_fortran/nf_redux_test$(EXEEXT) +@STARPU_SIMGRID_FALSE@am__EXEEXT_10 = \ +@STARPU_SIMGRID_FALSE@ user_datatype/user_datatype2$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ user_datatype/user_datatype_early$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ user_datatype/user_datatype$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ user_datatype/user_datatype_interface$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ comm/comm$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ comm/mix_comm$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ comm/group$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ filters/filter$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ benchs/sendrecv_bench$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ benchs/burst$(EXEEXT) +@STARPU_MPI_SYNC_CLOCKS_TRUE@@STARPU_SIMGRID_FALSE@am__EXEEXT_11 = benchs/bcast_bench$(EXEEXT) \ +@STARPU_MPI_SYNC_CLOCKS_TRUE@@STARPU_SIMGRID_FALSE@ benchs/recv_wait_finalize_bench$(EXEEXT) +@STARPU_SIMGRID_FALSE@@STARPU_USE_MPI_MPI_FALSE@am__EXEEXT_12 = benchs/sendrecv_parallel_tasks_bench$(EXEEXT) +@STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@am__EXEEXT_13 = benchs/sendrecv_gemm_bench$(EXEEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@ benchs/burst_gemm$(EXEEXT) +am__EXEEXT_14 = stencil/stencil5$(EXEEXT) $(am__EXEEXT_1) \ + cache/cache$(EXEEXT) cache/cache_disable$(EXEEXT) \ + $(am__EXEEXT_2) $(am__EXEEXT_3) $(am__EXEEXT_4) \ + $(am__EXEEXT_5) $(am__EXEEXT_6) $(am__EXEEXT_7) \ + $(am__EXEEXT_8) $(am__EXEEXT_9) complex/mpi_complex$(EXEEXT) \ + $(am__EXEEXT_10) $(am__EXEEXT_11) $(am__EXEEXT_12) \ + $(am__EXEEXT_13) +@STARPU_NO_BLAS_LIB_FALSE@am__EXEEXT_15 = \ +@STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/plu_example_float$(EXEEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/plu_example_double$(EXEEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/plu_implicit_example_float$(EXEEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/plu_implicit_example_double$(EXEEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/plu_outofcore_example_float$(EXEEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/plu_outofcore_example_double$(EXEEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ matrix_decomposition/mpi_cholesky$(EXEEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ matrix_decomposition/mpi_cholesky_distributed$(EXEEXT) +@STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@am__EXEEXT_16 = cg/cg$(EXEEXT) +@STARPU_NO_BLAS_LIB_FALSE@am__EXEEXT_17 = \ +@STARPU_NO_BLAS_LIB_FALSE@ matrix_mult/mm_2dbc$(EXEEXT) +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@am__EXEEXT_18 = native_fortran/nf_mm$(EXEEXT) \ +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ native_fortran/nf_mm_task_build$(EXEEXT) \ +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ native_fortran/nf_basic_ring$(EXEEXT) +@STARPU_HAVE_LIBLAPACK_TRUE@@STARPU_HAVE_MPIFORT_TRUE@@STARPU_NO_BLAS_LIB_FALSE@@STARPU_SANITIZE_FALSE@am__EXEEXT_19 = native_fortran/nf_mm_2dbc$(EXEEXT) +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@am__EXEEXT_20 = native_fortran/nf_mpi_redux$(EXEEXT) \ +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ native_fortran/nf_mpi_redux_tree$(EXEEXT) \ +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ native_fortran/nf_redux_test$(EXEEXT) +@STARPU_USE_MPI_MPI_FALSE@am__EXEEXT_21 = benchs/sendrecv_parallel_tasks_bench$(EXEEXT) +@STARPU_NO_BLAS_LIB_FALSE@am__EXEEXT_22 = \ +@STARPU_NO_BLAS_LIB_FALSE@ benchs/sendrecv_gemm_bench$(EXEEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ benchs/burst_gemm$(EXEEXT) +am__installdirs = "$(DESTDIR)$(examplebindir)" +@STARPU_HAVE_WINDOWS_FALSE@am__EXEEXT_23 = loader$(EXEEXT) +PROGRAMS = $(examplebin_PROGRAMS) $(noinst_PROGRAMS) +am__dirstamp = $(am__leading_dot)dirstamp +am_benchs_bcast_bench_OBJECTS = \ + benchs/bcast_bench-bcast_bench.$(OBJEXT) \ + benchs/bcast_bench-bench_helper.$(OBJEXT) +benchs_bcast_bench_OBJECTS = $(am_benchs_bcast_bench_OBJECTS) +am__DEPENDENCIES_1 = +benchs_bcast_bench_DEPENDENCIES = $(am__DEPENDENCIES_1) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +benchs_bcast_bench_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ + $(benchs_bcast_bench_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \ + $(LDFLAGS) -o $@ +am_benchs_burst_OBJECTS = benchs/burst.$(OBJEXT) \ + benchs/burst_helper.$(OBJEXT) +benchs_burst_OBJECTS = $(am_benchs_burst_OBJECTS) +benchs_burst_LDADD = $(LDADD) +am__benchs_burst_gemm_SOURCES_DIST = benchs/burst_gemm.c \ + benchs/gemm_helper.c benchs/burst_helper.c \ + ../../examples/common/blas.c +@STARPU_NO_BLAS_LIB_FALSE@am_benchs_burst_gemm_OBJECTS = \ +@STARPU_NO_BLAS_LIB_FALSE@ benchs/burst_gemm.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ benchs/gemm_helper.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ benchs/burst_helper.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ ../../examples/common/blas.$(OBJEXT) +benchs_burst_gemm_OBJECTS = $(am_benchs_burst_gemm_OBJECTS) +@STARPU_NO_BLAS_LIB_FALSE@benchs_burst_gemm_DEPENDENCIES = \ +@STARPU_NO_BLAS_LIB_FALSE@ $(am__DEPENDENCIES_1) +am_benchs_recv_wait_finalize_bench_OBJECTS = benchs/recv_wait_finalize_bench-recv_wait_finalize_bench.$(OBJEXT) +benchs_recv_wait_finalize_bench_OBJECTS = \ + $(am_benchs_recv_wait_finalize_bench_OBJECTS) +benchs_recv_wait_finalize_bench_DEPENDENCIES = $(am__DEPENDENCIES_1) +benchs_recv_wait_finalize_bench_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ + $(benchs_recv_wait_finalize_bench_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +am_benchs_sendrecv_bench_OBJECTS = benchs/sendrecv_bench.$(OBJEXT) \ + benchs/bench_helper.$(OBJEXT) \ + benchs/abstract_sendrecv_bench.$(OBJEXT) +benchs_sendrecv_bench_OBJECTS = $(am_benchs_sendrecv_bench_OBJECTS) +benchs_sendrecv_bench_LDADD = $(LDADD) +am__benchs_sendrecv_gemm_bench_SOURCES_DIST = \ + benchs/sendrecv_gemm_bench.c benchs/bench_helper.c \ + benchs/gemm_helper.c benchs/abstract_sendrecv_bench.c \ + ../../examples/common/blas.c +@STARPU_NO_BLAS_LIB_FALSE@am_benchs_sendrecv_gemm_bench_OBJECTS = benchs/sendrecv_gemm_bench.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ benchs/bench_helper.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ benchs/gemm_helper.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ benchs/abstract_sendrecv_bench.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ ../../examples/common/blas.$(OBJEXT) +benchs_sendrecv_gemm_bench_OBJECTS = \ + $(am_benchs_sendrecv_gemm_bench_OBJECTS) +@STARPU_NO_BLAS_LIB_FALSE@benchs_sendrecv_gemm_bench_DEPENDENCIES = \ +@STARPU_NO_BLAS_LIB_FALSE@ $(am__DEPENDENCIES_1) +am_benchs_sendrecv_parallel_tasks_bench_OBJECTS = \ + benchs/sendrecv_parallel_tasks_bench.$(OBJEXT) \ + benchs/bench_helper.$(OBJEXT) +benchs_sendrecv_parallel_tasks_bench_OBJECTS = \ + $(am_benchs_sendrecv_parallel_tasks_bench_OBJECTS) +benchs_sendrecv_parallel_tasks_bench_LDADD = $(LDADD) +cache_cache_SOURCES = cache/cache.c +cache_cache_OBJECTS = cache/cache.$(OBJEXT) +cache_cache_LDADD = $(LDADD) +cache_cache_disable_SOURCES = cache/cache_disable.c +cache_cache_disable_OBJECTS = cache/cache_disable.$(OBJEXT) +cache_cache_disable_LDADD = $(LDADD) +am__cg_cg_SOURCES_DIST = cg/cg.c ../../examples/common/blas.c +@STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@am_cg_cg_OBJECTS = cg/cg.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@ ../../examples/common/blas.$(OBJEXT) +cg_cg_OBJECTS = $(am_cg_cg_OBJECTS) +@STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@cg_cg_DEPENDENCIES = $(am__DEPENDENCIES_1) +comm_comm_SOURCES = comm/comm.c +comm_comm_OBJECTS = comm/comm.$(OBJEXT) +comm_comm_LDADD = $(LDADD) +comm_group_SOURCES = comm/group.c +comm_group_OBJECTS = comm/group.$(OBJEXT) +comm_group_LDADD = $(LDADD) +comm_mix_comm_SOURCES = comm/mix_comm.c +comm_mix_comm_OBJECTS = comm/mix_comm.$(OBJEXT) +comm_mix_comm_LDADD = $(LDADD) +am_complex_mpi_complex_OBJECTS = complex/mpi_complex.$(OBJEXT) \ + ../../examples/interface/complex_interface.$(OBJEXT) +complex_mpi_complex_OBJECTS = $(am_complex_mpi_complex_OBJECTS) +complex_mpi_complex_LDADD = $(LDADD) +filters_filter_SOURCES = filters/filter.c +filters_filter_OBJECTS = filters/filter.$(OBJEXT) +filters_filter_LDADD = $(LDADD) +loader_SOURCES = loader.c +loader_OBJECTS = loader-loader.$(OBJEXT) +loader_LDADD = $(LDADD) +am__matrix_decomposition_mpi_cholesky_SOURCES_DIST = \ + matrix_decomposition/mpi_cholesky.c \ + matrix_decomposition/mpi_cholesky_models.c \ + matrix_decomposition/mpi_cholesky_kernels.c \ + matrix_decomposition/mpi_cholesky_codelets.c \ + matrix_decomposition/mpi_decomposition_params.c \ + matrix_decomposition/mpi_decomposition_matrix.c \ + ../../examples/common/blas.c +@STARPU_NO_BLAS_LIB_FALSE@am_matrix_decomposition_mpi_cholesky_OBJECTS = matrix_decomposition/mpi_cholesky.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ matrix_decomposition/mpi_cholesky_models.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ matrix_decomposition/mpi_cholesky_kernels.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ matrix_decomposition/mpi_cholesky_codelets.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ matrix_decomposition/mpi_decomposition_params.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ matrix_decomposition/mpi_decomposition_matrix.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ ../../examples/common/blas.$(OBJEXT) +matrix_decomposition_mpi_cholesky_OBJECTS = \ + $(am_matrix_decomposition_mpi_cholesky_OBJECTS) +@STARPU_NO_BLAS_LIB_FALSE@matrix_decomposition_mpi_cholesky_DEPENDENCIES = \ +@STARPU_NO_BLAS_LIB_FALSE@ $(am__DEPENDENCIES_1) +am__matrix_decomposition_mpi_cholesky_distributed_SOURCES_DIST = \ + matrix_decomposition/mpi_cholesky_distributed.c \ + matrix_decomposition/mpi_cholesky_models.c \ + matrix_decomposition/mpi_cholesky_kernels.c \ + matrix_decomposition/mpi_cholesky_codelets.c \ + matrix_decomposition/mpi_decomposition_params.c \ + matrix_decomposition/mpi_decomposition_matrix.c \ + ../../examples/common/blas.c +@STARPU_NO_BLAS_LIB_FALSE@am_matrix_decomposition_mpi_cholesky_distributed_OBJECTS = matrix_decomposition/mpi_cholesky_distributed.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ matrix_decomposition/mpi_cholesky_models.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ matrix_decomposition/mpi_cholesky_kernels.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ matrix_decomposition/mpi_cholesky_codelets.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ matrix_decomposition/mpi_decomposition_params.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ matrix_decomposition/mpi_decomposition_matrix.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ ../../examples/common/blas.$(OBJEXT) +matrix_decomposition_mpi_cholesky_distributed_OBJECTS = \ + $(am_matrix_decomposition_mpi_cholesky_distributed_OBJECTS) +@STARPU_NO_BLAS_LIB_FALSE@matrix_decomposition_mpi_cholesky_distributed_DEPENDENCIES = \ +@STARPU_NO_BLAS_LIB_FALSE@ $(am__DEPENDENCIES_1) +matrix_mult_mm_SOURCES = matrix_mult/mm.c +matrix_mult_mm_OBJECTS = matrix_mult/mm.$(OBJEXT) +matrix_mult_mm_DEPENDENCIES = +am__matrix_mult_mm_2dbc_SOURCES_DIST = matrix_mult/mm_2dbc.c \ + ../../examples/common/blas.c +@STARPU_NO_BLAS_LIB_FALSE@am_matrix_mult_mm_2dbc_OBJECTS = \ +@STARPU_NO_BLAS_LIB_FALSE@ matrix_mult/mm_2dbc.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ ../../examples/common/blas.$(OBJEXT) +matrix_mult_mm_2dbc_OBJECTS = $(am_matrix_mult_mm_2dbc_OBJECTS) +@STARPU_NO_BLAS_LIB_FALSE@matrix_mult_mm_2dbc_DEPENDENCIES = \ +@STARPU_NO_BLAS_LIB_FALSE@ $(am__DEPENDENCIES_1) \ +@STARPU_NO_BLAS_LIB_FALSE@ $(am__DEPENDENCIES_1) +am__mpi_lu_plu_example_double_SOURCES_DIST = \ + mpi_lu/plu_example_double.c mpi_lu/plu_solve_double.c \ + mpi_lu/pdlu_kernels.c mpi_lu/pdlu.c \ + ../../examples/common/blas.c +@STARPU_NO_BLAS_LIB_FALSE@am_mpi_lu_plu_example_double_OBJECTS = \ +@STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/plu_example_double.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/plu_solve_double.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/pdlu_kernels.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/pdlu.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ ../../examples/common/blas.$(OBJEXT) +mpi_lu_plu_example_double_OBJECTS = \ + $(am_mpi_lu_plu_example_double_OBJECTS) +@STARPU_NO_BLAS_LIB_FALSE@mpi_lu_plu_example_double_DEPENDENCIES = \ +@STARPU_NO_BLAS_LIB_FALSE@ $(am__DEPENDENCIES_1) \ +@STARPU_NO_BLAS_LIB_FALSE@ $(am__DEPENDENCIES_1) +am__mpi_lu_plu_example_float_SOURCES_DIST = \ + mpi_lu/plu_example_float.c mpi_lu/plu_solve_float.c \ + mpi_lu/pslu_kernels.c mpi_lu/pslu.c \ + ../../examples/common/blas.c +@STARPU_NO_BLAS_LIB_FALSE@am_mpi_lu_plu_example_float_OBJECTS = \ +@STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/plu_example_float.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/plu_solve_float.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/pslu_kernels.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/pslu.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ ../../examples/common/blas.$(OBJEXT) +mpi_lu_plu_example_float_OBJECTS = \ + $(am_mpi_lu_plu_example_float_OBJECTS) +@STARPU_NO_BLAS_LIB_FALSE@mpi_lu_plu_example_float_DEPENDENCIES = \ +@STARPU_NO_BLAS_LIB_FALSE@ $(am__DEPENDENCIES_1) \ +@STARPU_NO_BLAS_LIB_FALSE@ $(am__DEPENDENCIES_1) +am__mpi_lu_plu_implicit_example_double_SOURCES_DIST = \ + mpi_lu/plu_implicit_example_double.c mpi_lu/plu_solve_double.c \ + mpi_lu/pdlu_kernels.c mpi_lu/pdlu_implicit.c \ + ../../examples/common/blas.c +@STARPU_NO_BLAS_LIB_FALSE@am_mpi_lu_plu_implicit_example_double_OBJECTS = mpi_lu/plu_implicit_example_double.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/plu_solve_double.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/pdlu_kernels.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/pdlu_implicit.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ ../../examples/common/blas.$(OBJEXT) +mpi_lu_plu_implicit_example_double_OBJECTS = \ + $(am_mpi_lu_plu_implicit_example_double_OBJECTS) +@STARPU_NO_BLAS_LIB_FALSE@mpi_lu_plu_implicit_example_double_DEPENDENCIES = \ +@STARPU_NO_BLAS_LIB_FALSE@ $(am__DEPENDENCIES_1) \ +@STARPU_NO_BLAS_LIB_FALSE@ $(am__DEPENDENCIES_1) +am__mpi_lu_plu_implicit_example_float_SOURCES_DIST = \ + mpi_lu/plu_implicit_example_float.c mpi_lu/plu_solve_float.c \ + mpi_lu/pslu_kernels.c mpi_lu/pslu_implicit.c \ + ../../examples/common/blas.c +@STARPU_NO_BLAS_LIB_FALSE@am_mpi_lu_plu_implicit_example_float_OBJECTS = mpi_lu/plu_implicit_example_float.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/plu_solve_float.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/pslu_kernels.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/pslu_implicit.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ ../../examples/common/blas.$(OBJEXT) +mpi_lu_plu_implicit_example_float_OBJECTS = \ + $(am_mpi_lu_plu_implicit_example_float_OBJECTS) +@STARPU_NO_BLAS_LIB_FALSE@mpi_lu_plu_implicit_example_float_DEPENDENCIES = \ +@STARPU_NO_BLAS_LIB_FALSE@ $(am__DEPENDENCIES_1) \ +@STARPU_NO_BLAS_LIB_FALSE@ $(am__DEPENDENCIES_1) +am__mpi_lu_plu_outofcore_example_double_SOURCES_DIST = \ + mpi_lu/plu_outofcore_example_double.c \ + mpi_lu/plu_solve_double.c mpi_lu/pdlu_kernels.c \ + mpi_lu/pdlu_implicit.c ../../examples/common/blas.c +@STARPU_NO_BLAS_LIB_FALSE@am_mpi_lu_plu_outofcore_example_double_OBJECTS = mpi_lu/plu_outofcore_example_double.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/plu_solve_double.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/pdlu_kernels.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/pdlu_implicit.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ ../../examples/common/blas.$(OBJEXT) +mpi_lu_plu_outofcore_example_double_OBJECTS = \ + $(am_mpi_lu_plu_outofcore_example_double_OBJECTS) +@STARPU_NO_BLAS_LIB_FALSE@mpi_lu_plu_outofcore_example_double_DEPENDENCIES = \ +@STARPU_NO_BLAS_LIB_FALSE@ $(am__DEPENDENCIES_1) \ +@STARPU_NO_BLAS_LIB_FALSE@ $(am__DEPENDENCIES_1) +am__mpi_lu_plu_outofcore_example_float_SOURCES_DIST = \ + mpi_lu/plu_outofcore_example_float.c mpi_lu/plu_solve_float.c \ + mpi_lu/pslu_kernels.c mpi_lu/pslu_implicit.c \ + ../../examples/common/blas.c +@STARPU_NO_BLAS_LIB_FALSE@am_mpi_lu_plu_outofcore_example_float_OBJECTS = mpi_lu/plu_outofcore_example_float.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/plu_solve_float.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/pslu_kernels.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/pslu_implicit.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ ../../examples/common/blas.$(OBJEXT) +mpi_lu_plu_outofcore_example_float_OBJECTS = \ + $(am_mpi_lu_plu_outofcore_example_float_OBJECTS) +@STARPU_NO_BLAS_LIB_FALSE@mpi_lu_plu_outofcore_example_float_DEPENDENCIES = \ +@STARPU_NO_BLAS_LIB_FALSE@ $(am__DEPENDENCIES_1) \ +@STARPU_NO_BLAS_LIB_FALSE@ $(am__DEPENDENCIES_1) +mpi_redux_mpi_redux_SOURCES = mpi_redux/mpi_redux.c +mpi_redux_mpi_redux_OBJECTS = mpi_redux/mpi_redux.$(OBJEXT) +mpi_redux_mpi_redux_DEPENDENCIES = +mpi_redux_mpi_redux_autowrapup_SOURCES = \ + mpi_redux/mpi_redux_autowrapup.c +mpi_redux_mpi_redux_autowrapup_OBJECTS = \ + mpi_redux/mpi_redux_autowrapup.$(OBJEXT) +mpi_redux_mpi_redux_autowrapup_DEPENDENCIES = +mpi_redux_mpi_redux_tree_SOURCES = mpi_redux/mpi_redux_tree.c +mpi_redux_mpi_redux_tree_OBJECTS = mpi_redux/mpi_redux_tree.$(OBJEXT) +mpi_redux_mpi_redux_tree_DEPENDENCIES = +am__native_fortran_nf_basic_ring_SOURCES_DIST = \ + native_fortran/fstarpu_mpi_mod.f90 \ + native_fortran/fstarpu_mod.f90 \ + native_fortran/nf_basic_ring.f90 +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@am_native_fortran_nf_basic_ring_OBJECTS = native_fortran/fstarpu_mpi_mod.$(OBJEXT) \ +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ native_fortran/fstarpu_mod.$(OBJEXT) \ +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ native_fortran/nf_basic_ring.$(OBJEXT) +native_fortran_nf_basic_ring_OBJECTS = \ + $(am_native_fortran_nf_basic_ring_OBJECTS) +native_fortran_nf_basic_ring_DEPENDENCIES = +am__native_fortran_nf_mm_SOURCES_DIST = native_fortran/nf_mm_cl.f90 \ + native_fortran/fstarpu_mpi_mod.f90 \ + native_fortran/fstarpu_mod.f90 native_fortran/nf_mm.f90 +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@am_native_fortran_nf_mm_OBJECTS = native_fortran/nf_mm_cl.$(OBJEXT) \ +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ native_fortran/fstarpu_mpi_mod.$(OBJEXT) \ +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ native_fortran/fstarpu_mod.$(OBJEXT) \ +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ native_fortran/nf_mm.$(OBJEXT) +native_fortran_nf_mm_OBJECTS = $(am_native_fortran_nf_mm_OBJECTS) +native_fortran_nf_mm_DEPENDENCIES = +am__native_fortran_nf_mm_2dbc_SOURCES_DIST = \ + native_fortran/nf_mm_cl_blas.f90 \ + native_fortran/fstarpu_mpi_mod.f90 \ + native_fortran/fstarpu_mod.f90 native_fortran/nf_mm_2dbc.f90 +@STARPU_HAVE_LIBLAPACK_TRUE@@STARPU_HAVE_MPIFORT_TRUE@@STARPU_NO_BLAS_LIB_FALSE@@STARPU_SANITIZE_FALSE@am_native_fortran_nf_mm_2dbc_OBJECTS = native_fortran/nf_mm_cl_blas.$(OBJEXT) \ +@STARPU_HAVE_LIBLAPACK_TRUE@@STARPU_HAVE_MPIFORT_TRUE@@STARPU_NO_BLAS_LIB_FALSE@@STARPU_SANITIZE_FALSE@ native_fortran/fstarpu_mpi_mod.$(OBJEXT) \ +@STARPU_HAVE_LIBLAPACK_TRUE@@STARPU_HAVE_MPIFORT_TRUE@@STARPU_NO_BLAS_LIB_FALSE@@STARPU_SANITIZE_FALSE@ native_fortran/fstarpu_mod.$(OBJEXT) \ +@STARPU_HAVE_LIBLAPACK_TRUE@@STARPU_HAVE_MPIFORT_TRUE@@STARPU_NO_BLAS_LIB_FALSE@@STARPU_SANITIZE_FALSE@ native_fortran/nf_mm_2dbc.$(OBJEXT) +native_fortran_nf_mm_2dbc_OBJECTS = \ + $(am_native_fortran_nf_mm_2dbc_OBJECTS) +@STARPU_HAVE_LIBLAPACK_TRUE@@STARPU_HAVE_MPIFORT_TRUE@@STARPU_NO_BLAS_LIB_FALSE@@STARPU_SANITIZE_FALSE@native_fortran_nf_mm_2dbc_DEPENDENCIES = $(am__DEPENDENCIES_1) \ +@STARPU_HAVE_LIBLAPACK_TRUE@@STARPU_HAVE_MPIFORT_TRUE@@STARPU_NO_BLAS_LIB_FALSE@@STARPU_SANITIZE_FALSE@ $(am__DEPENDENCIES_1) +am__native_fortran_nf_mm_task_build_SOURCES_DIST = \ + native_fortran/nf_mm_cl.f90 native_fortran/fstarpu_mpi_mod.f90 \ + native_fortran/fstarpu_mod.f90 \ + native_fortran/nf_mm_task_build.f90 +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@am_native_fortran_nf_mm_task_build_OBJECTS = native_fortran/nf_mm_cl.$(OBJEXT) \ +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ native_fortran/fstarpu_mpi_mod.$(OBJEXT) \ +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ native_fortran/fstarpu_mod.$(OBJEXT) \ +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ native_fortran/nf_mm_task_build.$(OBJEXT) +native_fortran_nf_mm_task_build_OBJECTS = \ + $(am_native_fortran_nf_mm_task_build_OBJECTS) +native_fortran_nf_mm_task_build_DEPENDENCIES = +am__native_fortran_nf_mpi_redux_SOURCES_DIST = \ + native_fortran/fstarpu_mpi_mod.f90 \ + native_fortran/fstarpu_mod.f90 native_fortran/nf_mpi_redux.f90 +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@am_native_fortran_nf_mpi_redux_OBJECTS = native_fortran/fstarpu_mpi_mod.$(OBJEXT) \ +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ native_fortran/fstarpu_mod.$(OBJEXT) \ +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ native_fortran/nf_mpi_redux.$(OBJEXT) +native_fortran_nf_mpi_redux_OBJECTS = \ + $(am_native_fortran_nf_mpi_redux_OBJECTS) +native_fortran_nf_mpi_redux_DEPENDENCIES = +am__native_fortran_nf_mpi_redux_tree_SOURCES_DIST = \ + native_fortran/fstarpu_mpi_mod.f90 \ + native_fortran/fstarpu_mod.f90 \ + native_fortran/nf_mpi_redux_tree.f90 +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@am_native_fortran_nf_mpi_redux_tree_OBJECTS = native_fortran/fstarpu_mpi_mod.$(OBJEXT) \ +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ native_fortran/fstarpu_mod.$(OBJEXT) \ +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ native_fortran/nf_mpi_redux_tree.$(OBJEXT) +native_fortran_nf_mpi_redux_tree_OBJECTS = \ + $(am_native_fortran_nf_mpi_redux_tree_OBJECTS) +native_fortran_nf_mpi_redux_tree_DEPENDENCIES = +am__native_fortran_nf_redux_test_SOURCES_DIST = \ + native_fortran/fstarpu_mpi_mod.f90 \ + native_fortran/fstarpu_mod.f90 \ + native_fortran/nf_redux_test.f90 +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@am_native_fortran_nf_redux_test_OBJECTS = native_fortran/fstarpu_mpi_mod.$(OBJEXT) \ +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ native_fortran/fstarpu_mod.$(OBJEXT) \ +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ native_fortran/nf_redux_test.$(OBJEXT) +native_fortran_nf_redux_test_OBJECTS = \ + $(am_native_fortran_nf_redux_test_OBJECTS) +native_fortran_nf_redux_test_DEPENDENCIES = +stencil_stencil5_SOURCES = stencil/stencil5.c +stencil_stencil5_OBJECTS = stencil/stencil5.$(OBJEXT) +stencil_stencil5_LDADD = $(LDADD) +stencil_stencil5_lb_SOURCES = stencil/stencil5_lb.c +stencil_stencil5_lb_OBJECTS = stencil/stencil5_lb.$(OBJEXT) +stencil_stencil5_lb_LDADD = $(LDADD) +am_user_datatype_user_datatype_OBJECTS = \ + user_datatype/user_datatype.$(OBJEXT) \ + user_datatype/my_interface.$(OBJEXT) +user_datatype_user_datatype_OBJECTS = \ + $(am_user_datatype_user_datatype_OBJECTS) +user_datatype_user_datatype_LDADD = $(LDADD) +am_user_datatype_user_datatype2_OBJECTS = \ + user_datatype/user_datatype2.$(OBJEXT) \ + user_datatype/my_interface.$(OBJEXT) +user_datatype_user_datatype2_OBJECTS = \ + $(am_user_datatype_user_datatype2_OBJECTS) +user_datatype_user_datatype2_LDADD = $(LDADD) +am_user_datatype_user_datatype_early_OBJECTS = \ + user_datatype/user_datatype_early.$(OBJEXT) \ + user_datatype/my_interface.$(OBJEXT) +user_datatype_user_datatype_early_OBJECTS = \ + $(am_user_datatype_user_datatype_early_OBJECTS) +user_datatype_user_datatype_early_LDADD = $(LDADD) +am_user_datatype_user_datatype_interface_OBJECTS = \ + user_datatype/user_datatype_interface.$(OBJEXT) \ + user_datatype/my_interface.$(OBJEXT) +user_datatype_user_datatype_interface_OBJECTS = \ + $(am_user_datatype_user_datatype_interface_OBJECTS) +user_datatype_user_datatype_interface_LDADD = $(LDADD) +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)/src/common -I$(top_builddir)/include -I$(top_builddir)/starpurm/include +depcomp = $(SHELL) $(top_srcdir)/build-aux/depcomp +am__maybe_remake_depfiles = depfiles +am__depfiles_remade = ../../examples/common/$(DEPDIR)/blas.Po \ + ../../examples/interface/$(DEPDIR)/complex_interface.Po \ + ./$(DEPDIR)/loader-loader.Po \ + benchs/$(DEPDIR)/abstract_sendrecv_bench.Po \ + benchs/$(DEPDIR)/bcast_bench-bcast_bench.Po \ + benchs/$(DEPDIR)/bcast_bench-bench_helper.Po \ + benchs/$(DEPDIR)/bench_helper.Po benchs/$(DEPDIR)/burst.Po \ + benchs/$(DEPDIR)/burst_gemm.Po \ + benchs/$(DEPDIR)/burst_helper.Po \ + benchs/$(DEPDIR)/gemm_helper.Po \ + benchs/$(DEPDIR)/recv_wait_finalize_bench-recv_wait_finalize_bench.Po \ + benchs/$(DEPDIR)/sendrecv_bench.Po \ + benchs/$(DEPDIR)/sendrecv_gemm_bench.Po \ + benchs/$(DEPDIR)/sendrecv_parallel_tasks_bench.Po \ + cache/$(DEPDIR)/cache.Po cache/$(DEPDIR)/cache_disable.Po \ + cg/$(DEPDIR)/cg.Po comm/$(DEPDIR)/comm.Po \ + comm/$(DEPDIR)/group.Po comm/$(DEPDIR)/mix_comm.Po \ + complex/$(DEPDIR)/mpi_complex.Po filters/$(DEPDIR)/filter.Po \ + matrix_decomposition/$(DEPDIR)/mpi_cholesky.Po \ + matrix_decomposition/$(DEPDIR)/mpi_cholesky_codelets.Po \ + matrix_decomposition/$(DEPDIR)/mpi_cholesky_distributed.Po \ + matrix_decomposition/$(DEPDIR)/mpi_cholesky_kernels.Po \ + matrix_decomposition/$(DEPDIR)/mpi_cholesky_models.Po \ + matrix_decomposition/$(DEPDIR)/mpi_decomposition_matrix.Po \ + matrix_decomposition/$(DEPDIR)/mpi_decomposition_params.Po \ + matrix_mult/$(DEPDIR)/mm.Po matrix_mult/$(DEPDIR)/mm_2dbc.Po \ + mpi_lu/$(DEPDIR)/pdlu.Po mpi_lu/$(DEPDIR)/pdlu_implicit.Po \ + mpi_lu/$(DEPDIR)/pdlu_kernels.Po \ + mpi_lu/$(DEPDIR)/plu_example_double.Po \ + mpi_lu/$(DEPDIR)/plu_example_float.Po \ + mpi_lu/$(DEPDIR)/plu_implicit_example_double.Po \ + mpi_lu/$(DEPDIR)/plu_implicit_example_float.Po \ + mpi_lu/$(DEPDIR)/plu_outofcore_example_double.Po \ + mpi_lu/$(DEPDIR)/plu_outofcore_example_float.Po \ + mpi_lu/$(DEPDIR)/plu_solve_double.Po \ + mpi_lu/$(DEPDIR)/plu_solve_float.Po mpi_lu/$(DEPDIR)/pslu.Po \ + mpi_lu/$(DEPDIR)/pslu_implicit.Po \ + mpi_lu/$(DEPDIR)/pslu_kernels.Po \ + mpi_redux/$(DEPDIR)/mpi_redux.Po \ + mpi_redux/$(DEPDIR)/mpi_redux_autowrapup.Po \ + mpi_redux/$(DEPDIR)/mpi_redux_tree.Po \ + stencil/$(DEPDIR)/stencil5.Po stencil/$(DEPDIR)/stencil5_lb.Po \ + user_datatype/$(DEPDIR)/my_interface.Po \ + user_datatype/$(DEPDIR)/user_datatype.Po \ + user_datatype/$(DEPDIR)/user_datatype2.Po \ + user_datatype/$(DEPDIR)/user_datatype_early.Po \ + user_datatype/$(DEPDIR)/user_datatype_interface.Po +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +FCCOMPILE = $(FC) $(AM_FCFLAGS) $(FCFLAGS) +LTFCCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(FC) $(AM_FCFLAGS) $(FCFLAGS) +AM_V_FC = $(am__v_FC_@AM_V@) +am__v_FC_ = $(am__v_FC_@AM_DEFAULT_V@) +am__v_FC_0 = @echo " FC " $@; +am__v_FC_1 = +FCLINK = $(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(FCLD) $(AM_FCFLAGS) $(FCFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_FCLD = $(am__v_FCLD_@AM_V@) +am__v_FCLD_ = $(am__v_FCLD_@AM_DEFAULT_V@) +am__v_FCLD_0 = @echo " FCLD " $@; +am__v_FCLD_1 = +SOURCES = $(benchs_bcast_bench_SOURCES) $(benchs_burst_SOURCES) \ + $(benchs_burst_gemm_SOURCES) \ + $(benchs_recv_wait_finalize_bench_SOURCES) \ + $(benchs_sendrecv_bench_SOURCES) \ + $(benchs_sendrecv_gemm_bench_SOURCES) \ + $(benchs_sendrecv_parallel_tasks_bench_SOURCES) cache/cache.c \ + cache/cache_disable.c $(cg_cg_SOURCES) comm/comm.c \ + comm/group.c comm/mix_comm.c $(complex_mpi_complex_SOURCES) \ + filters/filter.c loader.c \ + $(matrix_decomposition_mpi_cholesky_SOURCES) \ + $(matrix_decomposition_mpi_cholesky_distributed_SOURCES) \ + matrix_mult/mm.c $(matrix_mult_mm_2dbc_SOURCES) \ + $(mpi_lu_plu_example_double_SOURCES) \ + $(mpi_lu_plu_example_float_SOURCES) \ + $(mpi_lu_plu_implicit_example_double_SOURCES) \ + $(mpi_lu_plu_implicit_example_float_SOURCES) \ + $(mpi_lu_plu_outofcore_example_double_SOURCES) \ + $(mpi_lu_plu_outofcore_example_float_SOURCES) \ + mpi_redux/mpi_redux.c mpi_redux/mpi_redux_autowrapup.c \ + mpi_redux/mpi_redux_tree.c \ + $(native_fortran_nf_basic_ring_SOURCES) \ + $(native_fortran_nf_mm_SOURCES) \ + $(native_fortran_nf_mm_2dbc_SOURCES) \ + $(native_fortran_nf_mm_task_build_SOURCES) \ + $(native_fortran_nf_mpi_redux_SOURCES) \ + $(native_fortran_nf_mpi_redux_tree_SOURCES) \ + $(native_fortran_nf_redux_test_SOURCES) stencil/stencil5.c \ + stencil/stencil5_lb.c $(user_datatype_user_datatype_SOURCES) \ + $(user_datatype_user_datatype2_SOURCES) \ + $(user_datatype_user_datatype_early_SOURCES) \ + $(user_datatype_user_datatype_interface_SOURCES) +DIST_SOURCES = $(benchs_bcast_bench_SOURCES) $(benchs_burst_SOURCES) \ + $(am__benchs_burst_gemm_SOURCES_DIST) \ + $(benchs_recv_wait_finalize_bench_SOURCES) \ + $(benchs_sendrecv_bench_SOURCES) \ + $(am__benchs_sendrecv_gemm_bench_SOURCES_DIST) \ + $(benchs_sendrecv_parallel_tasks_bench_SOURCES) cache/cache.c \ + cache/cache_disable.c $(am__cg_cg_SOURCES_DIST) comm/comm.c \ + comm/group.c comm/mix_comm.c $(complex_mpi_complex_SOURCES) \ + filters/filter.c loader.c \ + $(am__matrix_decomposition_mpi_cholesky_SOURCES_DIST) \ + $(am__matrix_decomposition_mpi_cholesky_distributed_SOURCES_DIST) \ + matrix_mult/mm.c $(am__matrix_mult_mm_2dbc_SOURCES_DIST) \ + $(am__mpi_lu_plu_example_double_SOURCES_DIST) \ + $(am__mpi_lu_plu_example_float_SOURCES_DIST) \ + $(am__mpi_lu_plu_implicit_example_double_SOURCES_DIST) \ + $(am__mpi_lu_plu_implicit_example_float_SOURCES_DIST) \ + $(am__mpi_lu_plu_outofcore_example_double_SOURCES_DIST) \ + $(am__mpi_lu_plu_outofcore_example_float_SOURCES_DIST) \ + mpi_redux/mpi_redux.c mpi_redux/mpi_redux_autowrapup.c \ + mpi_redux/mpi_redux_tree.c \ + $(am__native_fortran_nf_basic_ring_SOURCES_DIST) \ + $(am__native_fortran_nf_mm_SOURCES_DIST) \ + $(am__native_fortran_nf_mm_2dbc_SOURCES_DIST) \ + $(am__native_fortran_nf_mm_task_build_SOURCES_DIST) \ + $(am__native_fortran_nf_mpi_redux_SOURCES_DIST) \ + $(am__native_fortran_nf_mpi_redux_tree_SOURCES_DIST) \ + $(am__native_fortran_nf_redux_test_SOURCES_DIST) \ + stencil/stencil5.c stencil/stencil5_lb.c \ + $(user_datatype_user_datatype_SOURCES) \ + $(user_datatype_user_datatype2_SOURCES) \ + $(user_datatype_user_datatype_early_SOURCES) \ + $(user_datatype_user_datatype_interface_SOURCES) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +am__tty_colors_dummy = \ + mgn= red= grn= lgn= blu= brg= std=; \ + am__color_tests=no +am__tty_colors = { \ + $(am__tty_colors_dummy); \ + if test "X$(AM_COLOR_TESTS)" = Xno; then \ + am__color_tests=no; \ + elif test "X$(AM_COLOR_TESTS)" = Xalways; then \ + am__color_tests=yes; \ + elif test "X$$TERM" != Xdumb && { test -t 1; } 2>/dev/null; then \ + am__color_tests=yes; \ + fi; \ + if test $$am__color_tests = yes; then \ + red=''; \ + grn=''; \ + lgn=''; \ + blu=''; \ + mgn=''; \ + brg=''; \ + std=''; \ + fi; \ +} +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__recheck_rx = ^[ ]*:recheck:[ ]* +am__global_test_result_rx = ^[ ]*:global-test-result:[ ]* +am__copy_in_global_log_rx = ^[ ]*:copy-in-global-log:[ ]* +# A command that, given a newline-separated list of test names on the +# standard input, print the name of the tests that are to be re-run +# upon "make recheck". +am__list_recheck_tests = $(AWK) '{ \ + recheck = 1; \ + while ((rc = (getline line < ($$0 ".trs"))) != 0) \ + { \ + if (rc < 0) \ + { \ + if ((getline line2 < ($$0 ".log")) < 0) \ + recheck = 0; \ + break; \ + } \ + else if (line ~ /$(am__recheck_rx)[nN][Oo]/) \ + { \ + recheck = 0; \ + break; \ + } \ + else if (line ~ /$(am__recheck_rx)[yY][eE][sS]/) \ + { \ + break; \ + } \ + }; \ + if (recheck) \ + print $$0; \ + close ($$0 ".trs"); \ + close ($$0 ".log"); \ +}' +# A command that, given a newline-separated list of test names on the +# standard input, create the global log from their .trs and .log files. +am__create_global_log = $(AWK) ' \ +function fatal(msg) \ +{ \ + print "fatal: making $@: " msg | "cat >&2"; \ + exit 1; \ +} \ +function rst_section(header) \ +{ \ + print header; \ + len = length(header); \ + for (i = 1; i <= len; i = i + 1) \ + printf "="; \ + printf "\n\n"; \ +} \ +{ \ + copy_in_global_log = 1; \ + global_test_result = "RUN"; \ + while ((rc = (getline line < ($$0 ".trs"))) != 0) \ + { \ + if (rc < 0) \ + fatal("failed to read from " $$0 ".trs"); \ + if (line ~ /$(am__global_test_result_rx)/) \ + { \ + sub("$(am__global_test_result_rx)", "", line); \ + sub("[ ]*$$", "", line); \ + global_test_result = line; \ + } \ + else if (line ~ /$(am__copy_in_global_log_rx)[nN][oO]/) \ + copy_in_global_log = 0; \ + }; \ + if (copy_in_global_log) \ + { \ + rst_section(global_test_result ": " $$0); \ + while ((rc = (getline line < ($$0 ".log"))) != 0) \ + { \ + if (rc < 0) \ + fatal("failed to read from " $$0 ".log"); \ + print line; \ + }; \ + printf "\n"; \ + }; \ + close ($$0 ".trs"); \ + close ($$0 ".log"); \ +}' +# Restructured Text title. +am__rst_title = { sed 's/.*/ & /;h;s/./=/g;p;x;s/ *$$//;p;g' && echo; } +# Solaris 10 'make', and several other traditional 'make' implementations, +# pass "-e" to $(SHELL), and POSIX 2008 even requires this. Work around it +# by disabling -e (using the XSI extension "set +e") if it's set. +am__sh_e_setup = case $$- in *e*) set +e;; esac +# Default flags passed to test drivers. +am__common_driver_flags = \ + --color-tests "$$am__color_tests" \ + --enable-hard-errors "$$am__enable_hard_errors" \ + --expect-failure "$$am__expect_failure" +# To be inserted before the command running the test. Creates the +# directory for the log if needed. Stores in $dir the directory +# containing $f, in $tst the test, in $log the log. Executes the +# developer- defined test setup AM_TESTS_ENVIRONMENT (if any), and +# passes TESTS_ENVIRONMENT. Set up options for the wrapper that +# will run the test scripts (or their associated LOG_COMPILER, if +# thy have one). +am__check_pre = \ +$(am__sh_e_setup); \ +$(am__vpath_adj_setup) $(am__vpath_adj) \ +$(am__tty_colors); \ +srcdir=$(srcdir); export srcdir; \ +case "$@" in \ + */*) am__odir=`echo "./$@" | sed 's|/[^/]*$$||'`;; \ + *) am__odir=.;; \ +esac; \ +test "x$$am__odir" = x"." || test -d "$$am__odir" \ + || $(MKDIR_P) "$$am__odir" || exit $$?; \ +if test -f "./$$f"; then dir=./; \ +elif test -f "$$f"; then dir=; \ +else dir="$(srcdir)/"; fi; \ +tst=$$dir$$f; log='$@'; \ +if test -n '$(DISABLE_HARD_ERRORS)'; then \ + am__enable_hard_errors=no; \ +else \ + am__enable_hard_errors=yes; \ +fi; \ +case " $(XFAIL_TESTS) " in \ + *[\ \ ]$$f[\ \ ]* | *[\ \ ]$$dir$$f[\ \ ]*) \ + am__expect_failure=yes;; \ + *) \ + am__expect_failure=no;; \ +esac; \ +$(AM_TESTS_ENVIRONMENT) $(TESTS_ENVIRONMENT) +# A shell command to get the names of the tests scripts with any registered +# extension removed (i.e., equivalently, the names of the test logs, with +# the '.log' extension removed). The result is saved in the shell variable +# '$bases'. This honors runtime overriding of TESTS and TEST_LOGS. Sadly, +# we cannot use something simpler, involving e.g., "$(TEST_LOGS:.log=)", +# since that might cause problem with VPATH rewrites for suffix-less tests. +# See also 'test-harness-vpath-rewrite.sh' and 'test-trs-basic.sh'. +am__set_TESTS_bases = \ + bases='$(TEST_LOGS)'; \ + bases=`for i in $$bases; do echo $$i; done | sed 's/\.log$$//'`; \ + bases=`echo $$bases` +AM_TESTSUITE_SUMMARY_HEADER = ' for $(PACKAGE_STRING)' +RECHECK_LOGS = $(TEST_LOGS) +AM_RECURSIVE_TARGETS = check recheck +TEST_SUITE_LOG = test-suite.log +TEST_EXTENSIONS = @EXEEXT@ .test +LOG_DRIVER = $(SHELL) $(top_srcdir)/build-aux/test-driver +LOG_COMPILE = $(LOG_COMPILER) $(AM_LOG_FLAGS) $(LOG_FLAGS) +am__set_b = \ + case '$@' in \ + */*) \ + case '$*' in \ + */*) b='$*';; \ + *) b=`echo '$@' | sed 's/\.log$$//'`; \ + esac;; \ + *) \ + b='$*';; \ + esac +am__test_logs1 = $(TESTS:=.log) +am__test_logs2 = $(am__test_logs1:@EXEEXT@.log=.log) +TEST_LOGS = $(am__test_logs2:.test.log=.log) +TEST_LOG_DRIVER = $(SHELL) $(top_srcdir)/build-aux/test-driver +TEST_LOG_COMPILE = $(TEST_LOG_COMPILER) $(AM_TEST_LOG_FLAGS) \ + $(TEST_LOG_FLAGS) +am__DIST_COMMON = $(srcdir)/Makefile.in \ + $(top_srcdir)/build-aux/depcomp \ + $(top_srcdir)/build-aux/test-driver \ + $(top_srcdir)/make/starpu-loader.mk \ + $(top_srcdir)/make/starpu-tests.mk \ + $(top_srcdir)/make/starpu.mk +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +pkglibdir = @pkglibdir@ +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +APP_CFLAGS = @APP_CFLAGS@ +APP_CXXFLAGS = @APP_CXXFLAGS@ +APP_FCFLAGS = @APP_FCFLAGS@ +APP_FFLAGS = @APP_FFLAGS@ +AR = @AR@ +AS = @AS@ +ATLASDIR = @ATLASDIR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BLAS_LIB = @BLAS_LIB@ +BLAS_LIBS = @BLAS_LIBS@ +BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ +BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# Copyright (C) 2013-2013 Thibaut Lambert +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +CC = $(MPICC) +CCDEPMODE = @CCDEPMODE@ +CC_OR_MPICC = @CC_OR_MPICC@ +CC_OR_NVCC = @CC_OR_NVCC@ +CFLAGS = @CFLAGS@ +COVERAGE = @COVERAGE@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CSCOPE = @CSCOPE@ +CTAGS = @CTAGS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DGELS_LIBS = @DGELS_LIBS@ +DLB_CFLAGS = @DLB_CFLAGS@ +DLB_LIBS = @DLB_LIBS@ +DLLTOOL = @DLLTOOL@ +DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +ECLIPSE = @ECLIPSE@ +EGREP = @EGREP@ +ETAGS = @ETAGS@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FC = $(MPIFORT) +FCFLAGS = @FCFLAGS@ +FFLAGS = @FFLAGS@ +FFTWF_CFLAGS = @FFTWF_CFLAGS@ +FFTWF_LIBS = @FFTWF_LIBS@ +FFTWL_CFLAGS = @FFTWL_CFLAGS@ +FFTWL_LIBS = @FFTWL_LIBS@ +FFTW_CFLAGS = @FFTW_CFLAGS@ +FFTW_LIBS = @FFTW_LIBS@ +FGREP = @FGREP@ +FILECMD = @FILECMD@ +FXTDIR = @FXTDIR@ +FXT_CFLAGS = @FXT_CFLAGS@ +FXT_LDFLAGS = @FXT_LDFLAGS@ +FXT_LIBS = @FXT_LIBS@ +GDB = @GDB@ +GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ +GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ +GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ +GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ +GOTODIR = @GOTODIR@ +GREP = @GREP@ +HAVE_CXX11 = @HAVE_CXX11@ +HAVE_FFTWFL = @HAVE_FFTWFL@ +HELP2MAN = @HELP2MAN@ +HIPCC = @HIPCC@ +HIPCCFLAGS = @HIPCCFLAGS@ $(am__append_2) +HIPCONFIG = @HIPCONFIG@ +HWLOC_CFLAGS = @HWLOC_CFLAGS@ +HWLOC_LIBS = @HWLOC_LIBS@ +HWLOC_REQUIRES = @HWLOC_REQUIRES@ +ICC = @ICC@ +ICC_ARGS = @ICC_ARGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JULIA = @JULIA@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ $(top_builddir)/src/@LIBSTARPU_LINK@ \ + ../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la \ + $(STARPU_EXPORTED_LIBS) $(STARPU_CUDA_LDFLAGS) -lm \ + $(MAGMA_LIBS) +LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ +LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ +LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ +LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ +LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ +LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ +LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ +LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ +LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ +LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ +LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ +LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ +LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ +LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ +LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ +LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ +LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ +LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ +LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ +LIBSTARPU_LINK = @LIBSTARPU_LINK@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAGMA_CFLAGS = @MAGMA_CFLAGS@ +MAGMA_LIBS = @MAGMA_LIBS@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPICC_LDFLAGS = @MPICC_LDFLAGS@ +MPICXX = @MPICXX@ +MPIEXEC = @MPIEXEC@ +MPIEXEC_ARGS = @MPIEXEC_ARGS@ +MPIFORT = @MPIFORT@ +MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ +MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ +NM = @NM@ +NMAD_CFLAGS = @NMAD_CFLAGS@ +NMAD_LIBS = @NMAD_LIBS@ +NMEDIT = @NMEDIT@ +NVCC = @NVCC@ +NVCCFLAGS = @NVCCFLAGS@ $(am__append_1) +NVCC_CC = @NVCC_CC@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ +OPENBLAS_LIBS = @OPENBLAS_LIBS@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PAPI_CFLAGS = @PAPI_CFLAGS@ +PAPI_LIBS = @PAPI_LIBS@ +PARALLEL = @PARALLEL@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PKG_CONFIG = @PKG_CONFIG@ +POTI_CFLAGS = @POTI_CFLAGS@ +POTI_LIBS = @POTI_LIBS@ +PROG_CLANG = @PROG_CLANG@ +PROG_DATE = @PROG_DATE@ +PROG_FIND = @PROG_FIND@ +PROG_STAT = @PROG_STAT@ +PYTHON = @PYTHON@ +PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ +PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ +PYTHON_VERSION = @PYTHON_VERSION@ +RANLIB = @RANLIB@ +REALBASH = @REALBASH@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ +SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ +SIMGRID_LIBS = @SIMGRID_LIBS@ +SIMGRID_MC = @SIMGRID_MC@ +SLIC_CONFIG = @SLIC_CONFIG@ +SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ +SOCL_VENDORS = @SOCL_VENDORS@ +STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ +STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ +STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ +STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ +STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ +STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ +STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ +STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ +STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ +STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ +STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ +STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ +STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ +STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ +STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ +STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ +STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ +STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ +STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ +STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ +STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ +STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ +STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ +STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ +STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ +STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ +STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ +STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ +STARPU_LIB_PATH = @STARPU_LIB_PATH@ +STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ +STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ +STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ +STARPU_MS_LIB = @STARPU_MS_LIB@ +STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ +STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ +STARPU_OPENBLAS = @STARPU_OPENBLAS@ +STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ +STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ +STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ +STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ +STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ +STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ +STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ +STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ +STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ +STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ +STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ +STARPU_SRC_DIR = @STARPU_SRC_DIR@ +STARPU_USE_CPU = @STARPU_USE_CPU@ +STARPU_USE_CUDA = @STARPU_USE_CUDA@ +STARPU_USE_FXT = @STARPU_USE_FXT@ +STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ +STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ +STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ +STRIP = @STRIP@ +VERSION = @VERSION@ +XMKMF = @XMKMF@ +X_CFLAGS = @X_CFLAGS@ +X_EXTRA_LIBS = @X_EXTRA_LIBS@ +X_LIBS = @X_LIBS@ +X_PRE_LIBS = @X_PRE_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_ct_F77 = @ac_ct_F77@ +ac_ct_FC = @ac_ct_FC@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +doxygencommand = @doxygencommand@ +dvidir = @dvidir@ +eclipsepath = @eclipsepath@ +epstopdfcommand = @epstopdfcommand@ +exec_prefix = @exec_prefix@ +gitcommand = @gitcommand@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +hwloccalccommand = @hwloccalccommand@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +juliapath = @juliapath@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +mpicc_path = @mpicc_path@ +mpicxx_path = @mpicxx_path@ +mpiexec_path = @mpiexec_path@ +mpifort_path = @mpifort_path@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +pdflatexcommand = @pdflatexcommand@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +runstatedir = @runstatedir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target = @target@ +target_alias = @target_alias@ +target_cpu = @target_cpu@ +target_os = @target_os@ +target_vendor = @target_vendor@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +LAUNCHER_ENV = $(MPI_RUN_ENV) +LAUNCHER = $(STARPU_MPIEXEC) +AM_CFLAGS = $(GLOBAL_AM_CFLAGS) $(MAGMA_CFLAGS) $(APP_CFLAGS) +AM_CXXFLAGS = $(GLOBAL_AM_CXXFLAGS) $(MAGMA_CFLAGS) $(APP_CXXFLAGS) +AM_FFLAGS = $(GLOBAL_AM_FFLAGS) $(MAGMA_CFLAGS) $(APP_FFLAGS) +AM_FCFLAGS = $(GLOBAL_AM_FCFLAGS) $(MAGMA_CFLAGS) $(APP_FCFLAGS) +@STARPU_USE_CUDA_TRUE@V_nvcc_ = $(V_nvcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_USE_CUDA_TRUE@V_nvcc_0 = @echo " NVCC " $@; +@STARPU_USE_CUDA_TRUE@V_nvcc_1 = +@STARPU_USE_CUDA_TRUE@V_nvcc = $(V_nvcc_$(V)) + +# Avoid using nvcc when making a coverity build, nvcc produces millions of +# lines of code which we don't want to analyze. Instead, build dumb .o files +# containing empty functions. +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_ = $(V_mynvcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_0 = @echo " myNVCC " $@; +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_1 = +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc = $(V_mynvcc_$(V)) +@STARPU_USE_HIP_TRUE@V_hipcc_ = $(V_hipcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_USE_HIP_TRUE@V_hipcc_0 = @echo " HIPCC " $@; +@STARPU_USE_HIP_TRUE@V_hipcc_1 = +@STARPU_USE_HIP_TRUE@V_hipcc = $(V_hipcc_$(V)) +V_icc_ = $(V_icc_$(AM_DEFAULT_VERBOSITY)) +V_icc_0 = @echo " ICC " $@; +V_icc_1 = +V_icc = $(V_icc_$(V)) +V_ln_ = $(V_ln_$(AM_DEFAULT_VERBOSITY)) +V_ln_0 = @echo " LN " $@; +V_ln_1 = +V_ln = $(V_ln_$(V)) +V_help2man_ = $(V_help2man_$(AM_DEFAULT_VERBOSITY)) +V_help2man_0 = @echo " HELP2MAN" $@; +V_help2man_1 = +V_help2man = $(V_help2man_$(V)) +# These are always defined, both for starpu-mpi and for mpi-ms +# For MPI tests we don't want to oversubscribe the system +MPI_RUN_ENV = STARPU_WORKERS_GETBIND=0 STARPU_WORKERS_NOBIND=1 STARPU_NCPU=3 +@STARPU_SIMGRID_FALSE@STARPU_MPIEXEC = $(MPIEXEC) $(MPIEXEC_ARGS) -np $(STARPU_MPI_NP) +@STARPU_SIMGRID_TRUE@STARPU_MPIEXEC = $(abs_top_builddir)/tools/starpu_smpirun -np $(STARPU_MPI_NP) -platform $(abs_top_srcdir)/tools/perfmodels/cluster.xml -hostfile $(abs_top_srcdir)/tools/perfmodels/hostfile +CCLD = $(MPICC) +FCLD = $(MPIFORT) + +# When GNU parallel is available and -j is passed to make, run tests through +# parallel, using a "starpu" semaphore. +# Also make test shell scripts run its tests through parallel, using a +# "substarpu" semaphore. This brings some overload, but only one level. +@HAVE_PARALLEL_TRUE@STARPU_SUB_PARALLEL = $(shell echo $(MAKEFLAGS) | sed -ne 's/.*-j\([0-9]\+\).*/parallel --semaphore --id substarpu --fg --fg-exit -j \1/p') +@STARPU_USE_MPI_MASTER_SLAVE_TRUE@MS_LAUNCHER = $(STARPU_MPIEXEC) +@STARPU_USE_TCPIP_MASTER_SLAVE_TRUE@MS_LAUNCHER = $(abs_top_builddir)/tools/starpu_tcpipexec -np 2 -nobind -ncpus 1 +@STARPU_HAVE_WINDOWS_FALSE@LOADER_BIN = $(LAUNCHER) $(LOADER) $(EXTERNAL) +@STARPU_HAVE_WINDOWS_TRUE@LOADER_BIN = $(LAUNCHER) $(EXTERNAL) +@STARPU_SIMGRID_TRUE@LOADER_BIN = $(LAUNCHER) +@STARPU_HAVE_WINDOWS_FALSE@loader_CPPFLAGS = $(AM_CPPFLAGS) -I$(top_builddir)/src/ +@STARPU_HAVE_AM111_FALSE@TESTS_ENVIRONMENT = $(LAUNCHER_ENV) top_builddir="$(abs_top_builddir)" top_srcdir="$(abs_top_srcdir)" $(LOADER_BIN) +@STARPU_HAVE_AM111_TRUE@TESTS_ENVIRONMENT = $(LAUNCHER_ENV) top_builddir="$(abs_top_builddir)" top_srcdir="$(abs_top_srcdir)" +@STARPU_HAVE_AM111_TRUE@LOG_COMPILER = $(LOADER_BIN) +AM_TESTS_FD_REDIRECT = 9>&2 +starpu_mpi_EXAMPLES = stencil/stencil5 $(am__append_9) cache/cache \ + cache/cache_disable $(am__append_11) $(am__append_12) \ + $(am__append_14) $(am__append_16) $(am__append_17) \ + $(am__append_19) $(am__append_21) $(am__append_23) \ + complex/mpi_complex $(am__append_24) $(am__append_28) \ + $(am__append_29) $(am__append_30) +BUILT_SOURCES = +CLEANFILES = *.gcno *.gcda *.linkinfo starpu_idle_microsec.log *.mod native_fortran/fstarpu_mod.f90 native_fortran/fstarpu_mpi_mod.f90 +EXTRA_DIST = \ + mpi_lu/mpi_lu-float.h \ + mpi_lu/mpi_lu-double.h \ + mpi_lu/plu_example.c \ + mpi_lu/plu_implicit_example.c \ + mpi_lu/plu_outofcore_example.c \ + mpi_lu/plu_solve.c \ + mpi_lu/pxlu.h \ + mpi_lu/pxlu.c \ + mpi_lu/pxlu_implicit.c \ + mpi_lu/pxlu_kernels.h \ + mpi_lu/pxlu_kernels.c \ + matrix_mult/mm_2dbc.c \ + native_fortran/nf_mm_2dbc.f90 \ + matrix_decomposition/mpi_cholesky.h \ + matrix_decomposition/mpi_cholesky_codelets.h \ + matrix_decomposition/mpi_cholesky_kernels.h \ + matrix_decomposition/mpi_cholesky_models.h \ + matrix_decomposition/mpi_decomposition_params.h \ + matrix_decomposition/mpi_decomposition_matrix.h \ + user_datatype/my_interface.h \ + benchs/abstract_sendrecv_bench.h\ + benchs/bench_helper.h \ + benchs/gemm_helper.h \ + benchs/burst_helper.h \ + helper.h \ + perf.sh + +examplebindir = $(libdir)/starpu/mpi +AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_srcdir)/examples/ -I$(top_builddir)/include -I$(top_srcdir)/mpi/include $(STARPU_H_CPPFLAGS) +AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@ +@STARPU_NO_BLAS_LIB_FALSE@mpi_lu_plu_example_float_LDADD = \ +@STARPU_NO_BLAS_LIB_FALSE@ $(STARPU_LIBNUMA_LDFLAGS) \ +@STARPU_NO_BLAS_LIB_FALSE@ $(STARPU_BLAS_LDFLAGS) -lm + +@STARPU_NO_BLAS_LIB_FALSE@mpi_lu_plu_example_float_SOURCES = \ +@STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/plu_example_float.c \ +@STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/plu_solve_float.c \ +@STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/pslu_kernels.c \ +@STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/pslu.c \ +@STARPU_NO_BLAS_LIB_FALSE@ ../../examples/common/blas.c + +@STARPU_NO_BLAS_LIB_FALSE@mpi_lu_plu_example_double_LDADD = \ +@STARPU_NO_BLAS_LIB_FALSE@ $(STARPU_LIBNUMA_LDFLAGS) \ +@STARPU_NO_BLAS_LIB_FALSE@ $(STARPU_BLAS_LDFLAGS) -lm + +@STARPU_NO_BLAS_LIB_FALSE@mpi_lu_plu_example_double_SOURCES = \ +@STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/plu_example_double.c \ +@STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/plu_solve_double.c \ +@STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/pdlu_kernels.c \ +@STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/pdlu.c \ +@STARPU_NO_BLAS_LIB_FALSE@ ../../examples/common/blas.c + +@STARPU_NO_BLAS_LIB_FALSE@mpi_lu_plu_implicit_example_float_LDADD = \ +@STARPU_NO_BLAS_LIB_FALSE@ $(STARPU_LIBNUMA_LDFLAGS) \ +@STARPU_NO_BLAS_LIB_FALSE@ $(STARPU_BLAS_LDFLAGS) -lm + +@STARPU_NO_BLAS_LIB_FALSE@mpi_lu_plu_implicit_example_float_SOURCES = \ +@STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/plu_implicit_example_float.c \ +@STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/plu_solve_float.c \ +@STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/pslu_kernels.c \ +@STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/pslu_implicit.c \ +@STARPU_NO_BLAS_LIB_FALSE@ ../../examples/common/blas.c + +@STARPU_NO_BLAS_LIB_FALSE@mpi_lu_plu_implicit_example_double_LDADD = \ +@STARPU_NO_BLAS_LIB_FALSE@ $(STARPU_LIBNUMA_LDFLAGS) \ +@STARPU_NO_BLAS_LIB_FALSE@ $(STARPU_BLAS_LDFLAGS) -lm + +@STARPU_NO_BLAS_LIB_FALSE@mpi_lu_plu_implicit_example_double_SOURCES = \ +@STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/plu_implicit_example_double.c \ +@STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/plu_solve_double.c \ +@STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/pdlu_kernels.c \ +@STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/pdlu_implicit.c \ +@STARPU_NO_BLAS_LIB_FALSE@ ../../examples/common/blas.c + +@STARPU_NO_BLAS_LIB_FALSE@mpi_lu_plu_outofcore_example_float_LDADD = \ +@STARPU_NO_BLAS_LIB_FALSE@ $(STARPU_LIBNUMA_LDFLAGS) \ +@STARPU_NO_BLAS_LIB_FALSE@ $(STARPU_BLAS_LDFLAGS) -lm + +@STARPU_NO_BLAS_LIB_FALSE@mpi_lu_plu_outofcore_example_float_SOURCES = \ +@STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/plu_outofcore_example_float.c \ +@STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/plu_solve_float.c \ +@STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/pslu_kernels.c \ +@STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/pslu_implicit.c \ +@STARPU_NO_BLAS_LIB_FALSE@ ../../examples/common/blas.c + +@STARPU_NO_BLAS_LIB_FALSE@mpi_lu_plu_outofcore_example_double_LDADD = \ +@STARPU_NO_BLAS_LIB_FALSE@ $(STARPU_LIBNUMA_LDFLAGS) \ +@STARPU_NO_BLAS_LIB_FALSE@ $(STARPU_BLAS_LDFLAGS) -lm + +@STARPU_NO_BLAS_LIB_FALSE@mpi_lu_plu_outofcore_example_double_SOURCES = \ +@STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/plu_outofcore_example_double.c \ +@STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/plu_solve_double.c \ +@STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/pdlu_kernels.c \ +@STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/pdlu_implicit.c \ +@STARPU_NO_BLAS_LIB_FALSE@ ../../examples/common/blas.c + +@STARPU_NO_BLAS_LIB_FALSE@matrix_decomposition_mpi_cholesky_SOURCES = \ +@STARPU_NO_BLAS_LIB_FALSE@ matrix_decomposition/mpi_cholesky.c \ +@STARPU_NO_BLAS_LIB_FALSE@ matrix_decomposition/mpi_cholesky_models.c \ +@STARPU_NO_BLAS_LIB_FALSE@ matrix_decomposition/mpi_cholesky_kernels.c \ +@STARPU_NO_BLAS_LIB_FALSE@ matrix_decomposition/mpi_cholesky_codelets.c \ +@STARPU_NO_BLAS_LIB_FALSE@ matrix_decomposition/mpi_decomposition_params.c \ +@STARPU_NO_BLAS_LIB_FALSE@ matrix_decomposition/mpi_decomposition_matrix.c \ +@STARPU_NO_BLAS_LIB_FALSE@ ../../examples/common/blas.c + +@STARPU_NO_BLAS_LIB_FALSE@matrix_decomposition_mpi_cholesky_LDADD = \ +@STARPU_NO_BLAS_LIB_FALSE@ $(STARPU_BLAS_LDFLAGS) -lm + +@STARPU_NO_BLAS_LIB_FALSE@matrix_decomposition_mpi_cholesky_distributed_SOURCES = \ +@STARPU_NO_BLAS_LIB_FALSE@ matrix_decomposition/mpi_cholesky_distributed.c \ +@STARPU_NO_BLAS_LIB_FALSE@ matrix_decomposition/mpi_cholesky_models.c \ +@STARPU_NO_BLAS_LIB_FALSE@ matrix_decomposition/mpi_cholesky_kernels.c \ +@STARPU_NO_BLAS_LIB_FALSE@ matrix_decomposition/mpi_cholesky_codelets.c \ +@STARPU_NO_BLAS_LIB_FALSE@ matrix_decomposition/mpi_decomposition_params.c \ +@STARPU_NO_BLAS_LIB_FALSE@ matrix_decomposition/mpi_decomposition_matrix.c \ +@STARPU_NO_BLAS_LIB_FALSE@ ../../examples/common/blas.c + +@STARPU_NO_BLAS_LIB_FALSE@matrix_decomposition_mpi_cholesky_distributed_LDADD = \ +@STARPU_NO_BLAS_LIB_FALSE@ $(STARPU_BLAS_LDFLAGS) -lm + +@STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@cg_cg_SOURCES = \ +@STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@ cg/cg.c \ +@STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@ ../../examples/common/blas.c + +@STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@cg_cg_LDADD = \ +@STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@ $(STARPU_BLAS_LDFLAGS) + +matrix_mult_mm_LDADD = \ + -lm + +@STARPU_NO_BLAS_LIB_FALSE@matrix_mult_mm_2dbc_SOURCES = \ +@STARPU_NO_BLAS_LIB_FALSE@ matrix_mult/mm_2dbc.c \ +@STARPU_NO_BLAS_LIB_FALSE@ ../../examples/common/blas.c + +@STARPU_NO_BLAS_LIB_FALSE@matrix_mult_mm_2dbc_LDADD = \ +@STARPU_NO_BLAS_LIB_FALSE@ $(STARPU_LIBNUMA_LDFLAGS) \ +@STARPU_NO_BLAS_LIB_FALSE@ $(STARPU_BLAS_LDFLAGS) -lm + +mpi_redux_mpi_redux_LDADD = \ + -lm + +mpi_redux_mpi_redux_autowrapup_LDADD = \ + -lm + +mpi_redux_mpi_redux_tree_LDADD = \ + -lm + +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@native_fortran_nf_mm_SOURCES = \ +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ native_fortran/nf_mm_cl.f90 \ +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ native_fortran/fstarpu_mpi_mod.f90 \ +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ native_fortran/fstarpu_mod.f90 \ +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ native_fortran/nf_mm.f90 + +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@native_fortran_nf_mm_LDADD = \ +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ -lm + +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@native_fortran_nf_mm_task_build_SOURCES = \ +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ native_fortran/nf_mm_cl.f90 \ +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ native_fortran/fstarpu_mpi_mod.f90 \ +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ native_fortran/fstarpu_mod.f90 \ +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ native_fortran/nf_mm_task_build.f90 + +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@native_fortran_nf_mm_task_build_LDADD = \ +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ -lm + +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@native_fortran_nf_basic_ring_SOURCES = \ +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ native_fortran/fstarpu_mpi_mod.f90 \ +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ native_fortran/fstarpu_mod.f90 \ +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ native_fortran/nf_basic_ring.f90 + +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@native_fortran_nf_basic_ring_LDADD = \ +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ -lm + +@STARPU_HAVE_LIBLAPACK_TRUE@@STARPU_HAVE_MPIFORT_TRUE@@STARPU_NO_BLAS_LIB_FALSE@@STARPU_SANITIZE_FALSE@native_fortran_nf_mm_2dbc_SOURCES = \ +@STARPU_HAVE_LIBLAPACK_TRUE@@STARPU_HAVE_MPIFORT_TRUE@@STARPU_NO_BLAS_LIB_FALSE@@STARPU_SANITIZE_FALSE@ native_fortran/nf_mm_cl_blas.f90 \ +@STARPU_HAVE_LIBLAPACK_TRUE@@STARPU_HAVE_MPIFORT_TRUE@@STARPU_NO_BLAS_LIB_FALSE@@STARPU_SANITIZE_FALSE@ native_fortran/fstarpu_mpi_mod.f90 \ +@STARPU_HAVE_LIBLAPACK_TRUE@@STARPU_HAVE_MPIFORT_TRUE@@STARPU_NO_BLAS_LIB_FALSE@@STARPU_SANITIZE_FALSE@ native_fortran/fstarpu_mod.f90 \ +@STARPU_HAVE_LIBLAPACK_TRUE@@STARPU_HAVE_MPIFORT_TRUE@@STARPU_NO_BLAS_LIB_FALSE@@STARPU_SANITIZE_FALSE@ native_fortran/nf_mm_2dbc.f90 + +@STARPU_HAVE_LIBLAPACK_TRUE@@STARPU_HAVE_MPIFORT_TRUE@@STARPU_NO_BLAS_LIB_FALSE@@STARPU_SANITIZE_FALSE@native_fortran_nf_mm_2dbc_LDADD = \ +@STARPU_HAVE_LIBLAPACK_TRUE@@STARPU_HAVE_MPIFORT_TRUE@@STARPU_NO_BLAS_LIB_FALSE@@STARPU_SANITIZE_FALSE@ $(STARPU_LIBNUMA_LDFLAGS) \ +@STARPU_HAVE_LIBLAPACK_TRUE@@STARPU_HAVE_MPIFORT_TRUE@@STARPU_NO_BLAS_LIB_FALSE@@STARPU_SANITIZE_FALSE@ $(STARPU_BLAS_LDFLAGS) -lm -llapack + +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@native_fortran_nf_mpi_redux_SOURCES = \ +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ native_fortran/fstarpu_mpi_mod.f90 \ +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ native_fortran/fstarpu_mod.f90 \ +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ native_fortran/nf_mpi_redux.f90 + +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@native_fortran_nf_mpi_redux_LDADD = \ +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ -lm + +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@native_fortran_nf_mpi_redux_tree_SOURCES = \ +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ native_fortran/fstarpu_mpi_mod.f90 \ +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ native_fortran/fstarpu_mod.f90 \ +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ native_fortran/nf_mpi_redux_tree.f90 + +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@native_fortran_nf_mpi_redux_tree_LDADD = \ +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ -lm + +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@native_fortran_nf_redux_test_SOURCES = \ +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ native_fortran/fstarpu_mpi_mod.f90 \ +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ native_fortran/fstarpu_mod.f90 \ +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ native_fortran/nf_redux_test.f90 + +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@native_fortran_nf_redux_test_LDADD = \ +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ -lm + +complex_mpi_complex_SOURCES = \ + complex/mpi_complex.c \ + ../../examples/interface/complex_interface.c + +user_datatype_user_datatype_SOURCES = \ + user_datatype/user_datatype.c \ + user_datatype/my_interface.c + +user_datatype_user_datatype2_SOURCES = \ + user_datatype/user_datatype2.c \ + user_datatype/my_interface.c + +user_datatype_user_datatype_early_SOURCES = \ + user_datatype/user_datatype_early.c \ + user_datatype/my_interface.c + +user_datatype_user_datatype_interface_SOURCES = \ + user_datatype/user_datatype_interface.c \ + user_datatype/my_interface.c + +benchs_sendrecv_bench_SOURCES = benchs/sendrecv_bench.c \ + benchs/bench_helper.c benchs/abstract_sendrecv_bench.c +benchs_bcast_bench_SOURCES = benchs/bcast_bench.c \ + benchs/bench_helper.c +benchs_bcast_bench_LDADD = $(MPI_SYNC_CLOCKS_LIBS) +benchs_bcast_bench_CFLAGS = $(MPI_SYNC_CLOCKS_CFLAGS) +benchs_recv_wait_finalize_bench_SOURCES = benchs/recv_wait_finalize_bench.c +benchs_recv_wait_finalize_bench_LDADD = $(MPI_SYNC_CLOCKS_LIBS) +benchs_recv_wait_finalize_bench_CFLAGS = $(MPI_SYNC_CLOCKS_CFLAGS) +benchs_sendrecv_parallel_tasks_bench_SOURCES = \ + benchs/sendrecv_parallel_tasks_bench.c benchs/bench_helper.c +benchs_burst_SOURCES = benchs/burst.c benchs/burst_helper.c +@STARPU_NO_BLAS_LIB_FALSE@benchs_sendrecv_gemm_bench_SOURCES = \ +@STARPU_NO_BLAS_LIB_FALSE@ benchs/sendrecv_gemm_bench.c \ +@STARPU_NO_BLAS_LIB_FALSE@ benchs/bench_helper.c \ +@STARPU_NO_BLAS_LIB_FALSE@ benchs/gemm_helper.c \ +@STARPU_NO_BLAS_LIB_FALSE@ benchs/abstract_sendrecv_bench.c \ +@STARPU_NO_BLAS_LIB_FALSE@ ../../examples/common/blas.c +@STARPU_NO_BLAS_LIB_FALSE@benchs_sendrecv_gemm_bench_LDADD = $(STARPU_BLAS_LDFLAGS) +@STARPU_NO_BLAS_LIB_FALSE@benchs_burst_gemm_SOURCES = \ +@STARPU_NO_BLAS_LIB_FALSE@ benchs/burst_gemm.c \ +@STARPU_NO_BLAS_LIB_FALSE@ benchs/gemm_helper.c \ +@STARPU_NO_BLAS_LIB_FALSE@ benchs/burst_helper.c \ +@STARPU_NO_BLAS_LIB_FALSE@ ../../examples/common/blas.c +@STARPU_NO_BLAS_LIB_FALSE@benchs_burst_gemm_LDADD = $(STARPU_BLAS_LDFLAGS) +all: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) all-am + +.SUFFIXES: +.SUFFIXES: .c .cu .cubin .f90 .hip .lo .log .o .obj .test .test$(EXEEXT) .trs +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/make/starpu-tests.mk $(top_srcdir)/make/starpu.mk $(top_srcdir)/make/starpu-loader.mk $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign mpi/examples/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign mpi/examples/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; +$(top_srcdir)/make/starpu-tests.mk $(top_srcdir)/make/starpu.mk $(top_srcdir)/make/starpu-loader.mk $(am__empty): + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +clean-checkPROGRAMS: + @list='$(check_PROGRAMS)'; test -n "$$list" || exit 0; \ + echo " rm -f" $$list; \ + rm -f $$list || exit $$?; \ + test -n "$(EXEEXT)" || exit 0; \ + list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ + echo " rm -f" $$list; \ + rm -f $$list +install-examplebinPROGRAMS: $(examplebin_PROGRAMS) + @$(NORMAL_INSTALL) + @list='$(examplebin_PROGRAMS)'; test -n "$(examplebindir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(examplebindir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(examplebindir)" || exit 1; \ + fi; \ + for p in $$list; do echo "$$p $$p"; done | \ + sed 's/$(EXEEXT)$$//' | \ + while read p p1; do if test -f $$p \ + || test -f $$p1 \ + ; then echo "$$p"; echo "$$p"; else :; fi; \ + done | \ + sed -e 'p;s,.*/,,;n;h' \ + -e 's|.*|.|' \ + -e 'p;x;s,.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/' | \ + sed 'N;N;N;s,\n, ,g' | \ + $(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1 } \ + { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \ + if ($$2 == $$4) files[d] = files[d] " " $$1; \ + else { print "f", $$3 "/" $$4, $$1; } } \ + END { for (d in files) print "f", d, files[d] }' | \ + while read type dir files; do \ + if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \ + test -z "$$files" || { \ + echo " $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files '$(DESTDIR)$(examplebindir)$$dir'"; \ + $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files "$(DESTDIR)$(examplebindir)$$dir" || exit $$?; \ + } \ + ; done + +uninstall-examplebinPROGRAMS: + @$(NORMAL_UNINSTALL) + @list='$(examplebin_PROGRAMS)'; test -n "$(examplebindir)" || list=; \ + files=`for p in $$list; do echo "$$p"; done | \ + sed -e 'h;s,^.*/,,;s/$(EXEEXT)$$//;$(transform)' \ + -e 's/$$/$(EXEEXT)/' \ + `; \ + test -n "$$list" || exit 0; \ + echo " ( cd '$(DESTDIR)$(examplebindir)' && rm -f" $$files ")"; \ + cd "$(DESTDIR)$(examplebindir)" && rm -f $$files + +clean-examplebinPROGRAMS: + @list='$(examplebin_PROGRAMS)'; test -n "$$list" || exit 0; \ + echo " rm -f" $$list; \ + rm -f $$list || exit $$?; \ + test -n "$(EXEEXT)" || exit 0; \ + list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ + echo " rm -f" $$list; \ + rm -f $$list + +clean-noinstPROGRAMS: + @list='$(noinst_PROGRAMS)'; test -n "$$list" || exit 0; \ + echo " rm -f" $$list; \ + rm -f $$list || exit $$?; \ + test -n "$(EXEEXT)" || exit 0; \ + list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ + echo " rm -f" $$list; \ + rm -f $$list +benchs/$(am__dirstamp): + @$(MKDIR_P) benchs + @: > benchs/$(am__dirstamp) +benchs/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) benchs/$(DEPDIR) + @: > benchs/$(DEPDIR)/$(am__dirstamp) +benchs/bcast_bench-bcast_bench.$(OBJEXT): benchs/$(am__dirstamp) \ + benchs/$(DEPDIR)/$(am__dirstamp) +benchs/bcast_bench-bench_helper.$(OBJEXT): benchs/$(am__dirstamp) \ + benchs/$(DEPDIR)/$(am__dirstamp) + +benchs/bcast_bench$(EXEEXT): $(benchs_bcast_bench_OBJECTS) $(benchs_bcast_bench_DEPENDENCIES) $(EXTRA_benchs_bcast_bench_DEPENDENCIES) benchs/$(am__dirstamp) + @rm -f benchs/bcast_bench$(EXEEXT) + $(AM_V_CCLD)$(benchs_bcast_bench_LINK) $(benchs_bcast_bench_OBJECTS) $(benchs_bcast_bench_LDADD) $(LIBS) +benchs/burst.$(OBJEXT): benchs/$(am__dirstamp) \ + benchs/$(DEPDIR)/$(am__dirstamp) +benchs/burst_helper.$(OBJEXT): benchs/$(am__dirstamp) \ + benchs/$(DEPDIR)/$(am__dirstamp) + +benchs/burst$(EXEEXT): $(benchs_burst_OBJECTS) $(benchs_burst_DEPENDENCIES) $(EXTRA_benchs_burst_DEPENDENCIES) benchs/$(am__dirstamp) + @rm -f benchs/burst$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(benchs_burst_OBJECTS) $(benchs_burst_LDADD) $(LIBS) +benchs/burst_gemm.$(OBJEXT): benchs/$(am__dirstamp) \ + benchs/$(DEPDIR)/$(am__dirstamp) +benchs/gemm_helper.$(OBJEXT): benchs/$(am__dirstamp) \ + benchs/$(DEPDIR)/$(am__dirstamp) +../../examples/common/$(am__dirstamp): + @$(MKDIR_P) ../../examples/common + @: > ../../examples/common/$(am__dirstamp) +../../examples/common/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) ../../examples/common/$(DEPDIR) + @: > ../../examples/common/$(DEPDIR)/$(am__dirstamp) +../../examples/common/blas.$(OBJEXT): \ + ../../examples/common/$(am__dirstamp) \ + ../../examples/common/$(DEPDIR)/$(am__dirstamp) + +benchs/burst_gemm$(EXEEXT): $(benchs_burst_gemm_OBJECTS) $(benchs_burst_gemm_DEPENDENCIES) $(EXTRA_benchs_burst_gemm_DEPENDENCIES) benchs/$(am__dirstamp) + @rm -f benchs/burst_gemm$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(benchs_burst_gemm_OBJECTS) $(benchs_burst_gemm_LDADD) $(LIBS) +benchs/recv_wait_finalize_bench-recv_wait_finalize_bench.$(OBJEXT): \ + benchs/$(am__dirstamp) benchs/$(DEPDIR)/$(am__dirstamp) + +benchs/recv_wait_finalize_bench$(EXEEXT): $(benchs_recv_wait_finalize_bench_OBJECTS) $(benchs_recv_wait_finalize_bench_DEPENDENCIES) $(EXTRA_benchs_recv_wait_finalize_bench_DEPENDENCIES) benchs/$(am__dirstamp) + @rm -f benchs/recv_wait_finalize_bench$(EXEEXT) + $(AM_V_CCLD)$(benchs_recv_wait_finalize_bench_LINK) $(benchs_recv_wait_finalize_bench_OBJECTS) $(benchs_recv_wait_finalize_bench_LDADD) $(LIBS) +benchs/sendrecv_bench.$(OBJEXT): benchs/$(am__dirstamp) \ + benchs/$(DEPDIR)/$(am__dirstamp) +benchs/bench_helper.$(OBJEXT): benchs/$(am__dirstamp) \ + benchs/$(DEPDIR)/$(am__dirstamp) +benchs/abstract_sendrecv_bench.$(OBJEXT): benchs/$(am__dirstamp) \ + benchs/$(DEPDIR)/$(am__dirstamp) + +benchs/sendrecv_bench$(EXEEXT): $(benchs_sendrecv_bench_OBJECTS) $(benchs_sendrecv_bench_DEPENDENCIES) $(EXTRA_benchs_sendrecv_bench_DEPENDENCIES) benchs/$(am__dirstamp) + @rm -f benchs/sendrecv_bench$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(benchs_sendrecv_bench_OBJECTS) $(benchs_sendrecv_bench_LDADD) $(LIBS) +benchs/sendrecv_gemm_bench.$(OBJEXT): benchs/$(am__dirstamp) \ + benchs/$(DEPDIR)/$(am__dirstamp) + +benchs/sendrecv_gemm_bench$(EXEEXT): $(benchs_sendrecv_gemm_bench_OBJECTS) $(benchs_sendrecv_gemm_bench_DEPENDENCIES) $(EXTRA_benchs_sendrecv_gemm_bench_DEPENDENCIES) benchs/$(am__dirstamp) + @rm -f benchs/sendrecv_gemm_bench$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(benchs_sendrecv_gemm_bench_OBJECTS) $(benchs_sendrecv_gemm_bench_LDADD) $(LIBS) +benchs/sendrecv_parallel_tasks_bench.$(OBJEXT): \ + benchs/$(am__dirstamp) benchs/$(DEPDIR)/$(am__dirstamp) + +benchs/sendrecv_parallel_tasks_bench$(EXEEXT): $(benchs_sendrecv_parallel_tasks_bench_OBJECTS) $(benchs_sendrecv_parallel_tasks_bench_DEPENDENCIES) $(EXTRA_benchs_sendrecv_parallel_tasks_bench_DEPENDENCIES) benchs/$(am__dirstamp) + @rm -f benchs/sendrecv_parallel_tasks_bench$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(benchs_sendrecv_parallel_tasks_bench_OBJECTS) $(benchs_sendrecv_parallel_tasks_bench_LDADD) $(LIBS) +cache/$(am__dirstamp): + @$(MKDIR_P) cache + @: > cache/$(am__dirstamp) +cache/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) cache/$(DEPDIR) + @: > cache/$(DEPDIR)/$(am__dirstamp) +cache/cache.$(OBJEXT): cache/$(am__dirstamp) \ + cache/$(DEPDIR)/$(am__dirstamp) + +cache/cache$(EXEEXT): $(cache_cache_OBJECTS) $(cache_cache_DEPENDENCIES) $(EXTRA_cache_cache_DEPENDENCIES) cache/$(am__dirstamp) + @rm -f cache/cache$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(cache_cache_OBJECTS) $(cache_cache_LDADD) $(LIBS) +cache/cache_disable.$(OBJEXT): cache/$(am__dirstamp) \ + cache/$(DEPDIR)/$(am__dirstamp) + +cache/cache_disable$(EXEEXT): $(cache_cache_disable_OBJECTS) $(cache_cache_disable_DEPENDENCIES) $(EXTRA_cache_cache_disable_DEPENDENCIES) cache/$(am__dirstamp) + @rm -f cache/cache_disable$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(cache_cache_disable_OBJECTS) $(cache_cache_disable_LDADD) $(LIBS) +cg/$(am__dirstamp): + @$(MKDIR_P) cg + @: > cg/$(am__dirstamp) +cg/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) cg/$(DEPDIR) + @: > cg/$(DEPDIR)/$(am__dirstamp) +cg/cg.$(OBJEXT): cg/$(am__dirstamp) cg/$(DEPDIR)/$(am__dirstamp) + +cg/cg$(EXEEXT): $(cg_cg_OBJECTS) $(cg_cg_DEPENDENCIES) $(EXTRA_cg_cg_DEPENDENCIES) cg/$(am__dirstamp) + @rm -f cg/cg$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(cg_cg_OBJECTS) $(cg_cg_LDADD) $(LIBS) +comm/$(am__dirstamp): + @$(MKDIR_P) comm + @: > comm/$(am__dirstamp) +comm/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) comm/$(DEPDIR) + @: > comm/$(DEPDIR)/$(am__dirstamp) +comm/comm.$(OBJEXT): comm/$(am__dirstamp) \ + comm/$(DEPDIR)/$(am__dirstamp) + +comm/comm$(EXEEXT): $(comm_comm_OBJECTS) $(comm_comm_DEPENDENCIES) $(EXTRA_comm_comm_DEPENDENCIES) comm/$(am__dirstamp) + @rm -f comm/comm$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(comm_comm_OBJECTS) $(comm_comm_LDADD) $(LIBS) +comm/group.$(OBJEXT): comm/$(am__dirstamp) \ + comm/$(DEPDIR)/$(am__dirstamp) + +comm/group$(EXEEXT): $(comm_group_OBJECTS) $(comm_group_DEPENDENCIES) $(EXTRA_comm_group_DEPENDENCIES) comm/$(am__dirstamp) + @rm -f comm/group$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(comm_group_OBJECTS) $(comm_group_LDADD) $(LIBS) +comm/mix_comm.$(OBJEXT): comm/$(am__dirstamp) \ + comm/$(DEPDIR)/$(am__dirstamp) + +comm/mix_comm$(EXEEXT): $(comm_mix_comm_OBJECTS) $(comm_mix_comm_DEPENDENCIES) $(EXTRA_comm_mix_comm_DEPENDENCIES) comm/$(am__dirstamp) + @rm -f comm/mix_comm$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(comm_mix_comm_OBJECTS) $(comm_mix_comm_LDADD) $(LIBS) +complex/$(am__dirstamp): + @$(MKDIR_P) complex + @: > complex/$(am__dirstamp) +complex/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) complex/$(DEPDIR) + @: > complex/$(DEPDIR)/$(am__dirstamp) +complex/mpi_complex.$(OBJEXT): complex/$(am__dirstamp) \ + complex/$(DEPDIR)/$(am__dirstamp) +../../examples/interface/$(am__dirstamp): + @$(MKDIR_P) ../../examples/interface + @: > ../../examples/interface/$(am__dirstamp) +../../examples/interface/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) ../../examples/interface/$(DEPDIR) + @: > ../../examples/interface/$(DEPDIR)/$(am__dirstamp) +../../examples/interface/complex_interface.$(OBJEXT): \ + ../../examples/interface/$(am__dirstamp) \ + ../../examples/interface/$(DEPDIR)/$(am__dirstamp) + +complex/mpi_complex$(EXEEXT): $(complex_mpi_complex_OBJECTS) $(complex_mpi_complex_DEPENDENCIES) $(EXTRA_complex_mpi_complex_DEPENDENCIES) complex/$(am__dirstamp) + @rm -f complex/mpi_complex$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(complex_mpi_complex_OBJECTS) $(complex_mpi_complex_LDADD) $(LIBS) +filters/$(am__dirstamp): + @$(MKDIR_P) filters + @: > filters/$(am__dirstamp) +filters/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) filters/$(DEPDIR) + @: > filters/$(DEPDIR)/$(am__dirstamp) +filters/filter.$(OBJEXT): filters/$(am__dirstamp) \ + filters/$(DEPDIR)/$(am__dirstamp) + +filters/filter$(EXEEXT): $(filters_filter_OBJECTS) $(filters_filter_DEPENDENCIES) $(EXTRA_filters_filter_DEPENDENCIES) filters/$(am__dirstamp) + @rm -f filters/filter$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(filters_filter_OBJECTS) $(filters_filter_LDADD) $(LIBS) + +loader$(EXEEXT): $(loader_OBJECTS) $(loader_DEPENDENCIES) $(EXTRA_loader_DEPENDENCIES) + @rm -f loader$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(loader_OBJECTS) $(loader_LDADD) $(LIBS) +matrix_decomposition/$(am__dirstamp): + @$(MKDIR_P) matrix_decomposition + @: > matrix_decomposition/$(am__dirstamp) +matrix_decomposition/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) matrix_decomposition/$(DEPDIR) + @: > matrix_decomposition/$(DEPDIR)/$(am__dirstamp) +matrix_decomposition/mpi_cholesky.$(OBJEXT): \ + matrix_decomposition/$(am__dirstamp) \ + matrix_decomposition/$(DEPDIR)/$(am__dirstamp) +matrix_decomposition/mpi_cholesky_models.$(OBJEXT): \ + matrix_decomposition/$(am__dirstamp) \ + matrix_decomposition/$(DEPDIR)/$(am__dirstamp) +matrix_decomposition/mpi_cholesky_kernels.$(OBJEXT): \ + matrix_decomposition/$(am__dirstamp) \ + matrix_decomposition/$(DEPDIR)/$(am__dirstamp) +matrix_decomposition/mpi_cholesky_codelets.$(OBJEXT): \ + matrix_decomposition/$(am__dirstamp) \ + matrix_decomposition/$(DEPDIR)/$(am__dirstamp) +matrix_decomposition/mpi_decomposition_params.$(OBJEXT): \ + matrix_decomposition/$(am__dirstamp) \ + matrix_decomposition/$(DEPDIR)/$(am__dirstamp) +matrix_decomposition/mpi_decomposition_matrix.$(OBJEXT): \ + matrix_decomposition/$(am__dirstamp) \ + matrix_decomposition/$(DEPDIR)/$(am__dirstamp) + +matrix_decomposition/mpi_cholesky$(EXEEXT): $(matrix_decomposition_mpi_cholesky_OBJECTS) $(matrix_decomposition_mpi_cholesky_DEPENDENCIES) $(EXTRA_matrix_decomposition_mpi_cholesky_DEPENDENCIES) matrix_decomposition/$(am__dirstamp) + @rm -f matrix_decomposition/mpi_cholesky$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(matrix_decomposition_mpi_cholesky_OBJECTS) $(matrix_decomposition_mpi_cholesky_LDADD) $(LIBS) +matrix_decomposition/mpi_cholesky_distributed.$(OBJEXT): \ + matrix_decomposition/$(am__dirstamp) \ + matrix_decomposition/$(DEPDIR)/$(am__dirstamp) + +matrix_decomposition/mpi_cholesky_distributed$(EXEEXT): $(matrix_decomposition_mpi_cholesky_distributed_OBJECTS) $(matrix_decomposition_mpi_cholesky_distributed_DEPENDENCIES) $(EXTRA_matrix_decomposition_mpi_cholesky_distributed_DEPENDENCIES) matrix_decomposition/$(am__dirstamp) + @rm -f matrix_decomposition/mpi_cholesky_distributed$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(matrix_decomposition_mpi_cholesky_distributed_OBJECTS) $(matrix_decomposition_mpi_cholesky_distributed_LDADD) $(LIBS) +matrix_mult/$(am__dirstamp): + @$(MKDIR_P) matrix_mult + @: > matrix_mult/$(am__dirstamp) +matrix_mult/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) matrix_mult/$(DEPDIR) + @: > matrix_mult/$(DEPDIR)/$(am__dirstamp) +matrix_mult/mm.$(OBJEXT): matrix_mult/$(am__dirstamp) \ + matrix_mult/$(DEPDIR)/$(am__dirstamp) + +matrix_mult/mm$(EXEEXT): $(matrix_mult_mm_OBJECTS) $(matrix_mult_mm_DEPENDENCIES) $(EXTRA_matrix_mult_mm_DEPENDENCIES) matrix_mult/$(am__dirstamp) + @rm -f matrix_mult/mm$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(matrix_mult_mm_OBJECTS) $(matrix_mult_mm_LDADD) $(LIBS) +matrix_mult/mm_2dbc.$(OBJEXT): matrix_mult/$(am__dirstamp) \ + matrix_mult/$(DEPDIR)/$(am__dirstamp) + +matrix_mult/mm_2dbc$(EXEEXT): $(matrix_mult_mm_2dbc_OBJECTS) $(matrix_mult_mm_2dbc_DEPENDENCIES) $(EXTRA_matrix_mult_mm_2dbc_DEPENDENCIES) matrix_mult/$(am__dirstamp) + @rm -f matrix_mult/mm_2dbc$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(matrix_mult_mm_2dbc_OBJECTS) $(matrix_mult_mm_2dbc_LDADD) $(LIBS) +mpi_lu/$(am__dirstamp): + @$(MKDIR_P) mpi_lu + @: > mpi_lu/$(am__dirstamp) +mpi_lu/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) mpi_lu/$(DEPDIR) + @: > mpi_lu/$(DEPDIR)/$(am__dirstamp) +mpi_lu/plu_example_double.$(OBJEXT): mpi_lu/$(am__dirstamp) \ + mpi_lu/$(DEPDIR)/$(am__dirstamp) +mpi_lu/plu_solve_double.$(OBJEXT): mpi_lu/$(am__dirstamp) \ + mpi_lu/$(DEPDIR)/$(am__dirstamp) +mpi_lu/pdlu_kernels.$(OBJEXT): mpi_lu/$(am__dirstamp) \ + mpi_lu/$(DEPDIR)/$(am__dirstamp) +mpi_lu/pdlu.$(OBJEXT): mpi_lu/$(am__dirstamp) \ + mpi_lu/$(DEPDIR)/$(am__dirstamp) + +mpi_lu/plu_example_double$(EXEEXT): $(mpi_lu_plu_example_double_OBJECTS) $(mpi_lu_plu_example_double_DEPENDENCIES) $(EXTRA_mpi_lu_plu_example_double_DEPENDENCIES) mpi_lu/$(am__dirstamp) + @rm -f mpi_lu/plu_example_double$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(mpi_lu_plu_example_double_OBJECTS) $(mpi_lu_plu_example_double_LDADD) $(LIBS) +mpi_lu/plu_example_float.$(OBJEXT): mpi_lu/$(am__dirstamp) \ + mpi_lu/$(DEPDIR)/$(am__dirstamp) +mpi_lu/plu_solve_float.$(OBJEXT): mpi_lu/$(am__dirstamp) \ + mpi_lu/$(DEPDIR)/$(am__dirstamp) +mpi_lu/pslu_kernels.$(OBJEXT): mpi_lu/$(am__dirstamp) \ + mpi_lu/$(DEPDIR)/$(am__dirstamp) +mpi_lu/pslu.$(OBJEXT): mpi_lu/$(am__dirstamp) \ + mpi_lu/$(DEPDIR)/$(am__dirstamp) + +mpi_lu/plu_example_float$(EXEEXT): $(mpi_lu_plu_example_float_OBJECTS) $(mpi_lu_plu_example_float_DEPENDENCIES) $(EXTRA_mpi_lu_plu_example_float_DEPENDENCIES) mpi_lu/$(am__dirstamp) + @rm -f mpi_lu/plu_example_float$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(mpi_lu_plu_example_float_OBJECTS) $(mpi_lu_plu_example_float_LDADD) $(LIBS) +mpi_lu/plu_implicit_example_double.$(OBJEXT): mpi_lu/$(am__dirstamp) \ + mpi_lu/$(DEPDIR)/$(am__dirstamp) +mpi_lu/pdlu_implicit.$(OBJEXT): mpi_lu/$(am__dirstamp) \ + mpi_lu/$(DEPDIR)/$(am__dirstamp) + +mpi_lu/plu_implicit_example_double$(EXEEXT): $(mpi_lu_plu_implicit_example_double_OBJECTS) $(mpi_lu_plu_implicit_example_double_DEPENDENCIES) $(EXTRA_mpi_lu_plu_implicit_example_double_DEPENDENCIES) mpi_lu/$(am__dirstamp) + @rm -f mpi_lu/plu_implicit_example_double$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(mpi_lu_plu_implicit_example_double_OBJECTS) $(mpi_lu_plu_implicit_example_double_LDADD) $(LIBS) +mpi_lu/plu_implicit_example_float.$(OBJEXT): mpi_lu/$(am__dirstamp) \ + mpi_lu/$(DEPDIR)/$(am__dirstamp) +mpi_lu/pslu_implicit.$(OBJEXT): mpi_lu/$(am__dirstamp) \ + mpi_lu/$(DEPDIR)/$(am__dirstamp) + +mpi_lu/plu_implicit_example_float$(EXEEXT): $(mpi_lu_plu_implicit_example_float_OBJECTS) $(mpi_lu_plu_implicit_example_float_DEPENDENCIES) $(EXTRA_mpi_lu_plu_implicit_example_float_DEPENDENCIES) mpi_lu/$(am__dirstamp) + @rm -f mpi_lu/plu_implicit_example_float$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(mpi_lu_plu_implicit_example_float_OBJECTS) $(mpi_lu_plu_implicit_example_float_LDADD) $(LIBS) +mpi_lu/plu_outofcore_example_double.$(OBJEXT): mpi_lu/$(am__dirstamp) \ + mpi_lu/$(DEPDIR)/$(am__dirstamp) + +mpi_lu/plu_outofcore_example_double$(EXEEXT): $(mpi_lu_plu_outofcore_example_double_OBJECTS) $(mpi_lu_plu_outofcore_example_double_DEPENDENCIES) $(EXTRA_mpi_lu_plu_outofcore_example_double_DEPENDENCIES) mpi_lu/$(am__dirstamp) + @rm -f mpi_lu/plu_outofcore_example_double$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(mpi_lu_plu_outofcore_example_double_OBJECTS) $(mpi_lu_plu_outofcore_example_double_LDADD) $(LIBS) +mpi_lu/plu_outofcore_example_float.$(OBJEXT): mpi_lu/$(am__dirstamp) \ + mpi_lu/$(DEPDIR)/$(am__dirstamp) + +mpi_lu/plu_outofcore_example_float$(EXEEXT): $(mpi_lu_plu_outofcore_example_float_OBJECTS) $(mpi_lu_plu_outofcore_example_float_DEPENDENCIES) $(EXTRA_mpi_lu_plu_outofcore_example_float_DEPENDENCIES) mpi_lu/$(am__dirstamp) + @rm -f mpi_lu/plu_outofcore_example_float$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(mpi_lu_plu_outofcore_example_float_OBJECTS) $(mpi_lu_plu_outofcore_example_float_LDADD) $(LIBS) +mpi_redux/$(am__dirstamp): + @$(MKDIR_P) mpi_redux + @: > mpi_redux/$(am__dirstamp) +mpi_redux/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) mpi_redux/$(DEPDIR) + @: > mpi_redux/$(DEPDIR)/$(am__dirstamp) +mpi_redux/mpi_redux.$(OBJEXT): mpi_redux/$(am__dirstamp) \ + mpi_redux/$(DEPDIR)/$(am__dirstamp) + +mpi_redux/mpi_redux$(EXEEXT): $(mpi_redux_mpi_redux_OBJECTS) $(mpi_redux_mpi_redux_DEPENDENCIES) $(EXTRA_mpi_redux_mpi_redux_DEPENDENCIES) mpi_redux/$(am__dirstamp) + @rm -f mpi_redux/mpi_redux$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(mpi_redux_mpi_redux_OBJECTS) $(mpi_redux_mpi_redux_LDADD) $(LIBS) +mpi_redux/mpi_redux_autowrapup.$(OBJEXT): mpi_redux/$(am__dirstamp) \ + mpi_redux/$(DEPDIR)/$(am__dirstamp) + +mpi_redux/mpi_redux_autowrapup$(EXEEXT): $(mpi_redux_mpi_redux_autowrapup_OBJECTS) $(mpi_redux_mpi_redux_autowrapup_DEPENDENCIES) $(EXTRA_mpi_redux_mpi_redux_autowrapup_DEPENDENCIES) mpi_redux/$(am__dirstamp) + @rm -f mpi_redux/mpi_redux_autowrapup$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(mpi_redux_mpi_redux_autowrapup_OBJECTS) $(mpi_redux_mpi_redux_autowrapup_LDADD) $(LIBS) +mpi_redux/mpi_redux_tree.$(OBJEXT): mpi_redux/$(am__dirstamp) \ + mpi_redux/$(DEPDIR)/$(am__dirstamp) + +mpi_redux/mpi_redux_tree$(EXEEXT): $(mpi_redux_mpi_redux_tree_OBJECTS) $(mpi_redux_mpi_redux_tree_DEPENDENCIES) $(EXTRA_mpi_redux_mpi_redux_tree_DEPENDENCIES) mpi_redux/$(am__dirstamp) + @rm -f mpi_redux/mpi_redux_tree$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(mpi_redux_mpi_redux_tree_OBJECTS) $(mpi_redux_mpi_redux_tree_LDADD) $(LIBS) +native_fortran/$(am__dirstamp): + @$(MKDIR_P) native_fortran + @: > native_fortran/$(am__dirstamp) +native_fortran/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) native_fortran/$(DEPDIR) + @: > native_fortran/$(DEPDIR)/$(am__dirstamp) +native_fortran/fstarpu_mpi_mod.$(OBJEXT): \ + native_fortran/$(am__dirstamp) \ + native_fortran/$(DEPDIR)/$(am__dirstamp) +native_fortran/fstarpu_mod.$(OBJEXT): native_fortran/$(am__dirstamp) \ + native_fortran/$(DEPDIR)/$(am__dirstamp) +native_fortran/nf_basic_ring.$(OBJEXT): \ + native_fortran/$(am__dirstamp) \ + native_fortran/$(DEPDIR)/$(am__dirstamp) + +native_fortran/nf_basic_ring$(EXEEXT): $(native_fortran_nf_basic_ring_OBJECTS) $(native_fortran_nf_basic_ring_DEPENDENCIES) $(EXTRA_native_fortran_nf_basic_ring_DEPENDENCIES) native_fortran/$(am__dirstamp) + @rm -f native_fortran/nf_basic_ring$(EXEEXT) + $(AM_V_FCLD)$(FCLINK) $(native_fortran_nf_basic_ring_OBJECTS) $(native_fortran_nf_basic_ring_LDADD) $(LIBS) +native_fortran/nf_mm_cl.$(OBJEXT): native_fortran/$(am__dirstamp) \ + native_fortran/$(DEPDIR)/$(am__dirstamp) +native_fortran/nf_mm.$(OBJEXT): native_fortran/$(am__dirstamp) \ + native_fortran/$(DEPDIR)/$(am__dirstamp) + +native_fortran/nf_mm$(EXEEXT): $(native_fortran_nf_mm_OBJECTS) $(native_fortran_nf_mm_DEPENDENCIES) $(EXTRA_native_fortran_nf_mm_DEPENDENCIES) native_fortran/$(am__dirstamp) + @rm -f native_fortran/nf_mm$(EXEEXT) + $(AM_V_FCLD)$(FCLINK) $(native_fortran_nf_mm_OBJECTS) $(native_fortran_nf_mm_LDADD) $(LIBS) +native_fortran/nf_mm_cl_blas.$(OBJEXT): \ + native_fortran/$(am__dirstamp) \ + native_fortran/$(DEPDIR)/$(am__dirstamp) +native_fortran/nf_mm_2dbc.$(OBJEXT): native_fortran/$(am__dirstamp) \ + native_fortran/$(DEPDIR)/$(am__dirstamp) + +native_fortran/nf_mm_2dbc$(EXEEXT): $(native_fortran_nf_mm_2dbc_OBJECTS) $(native_fortran_nf_mm_2dbc_DEPENDENCIES) $(EXTRA_native_fortran_nf_mm_2dbc_DEPENDENCIES) native_fortran/$(am__dirstamp) + @rm -f native_fortran/nf_mm_2dbc$(EXEEXT) + $(AM_V_FCLD)$(FCLINK) $(native_fortran_nf_mm_2dbc_OBJECTS) $(native_fortran_nf_mm_2dbc_LDADD) $(LIBS) +native_fortran/nf_mm_task_build.$(OBJEXT): \ + native_fortran/$(am__dirstamp) \ + native_fortran/$(DEPDIR)/$(am__dirstamp) + +native_fortran/nf_mm_task_build$(EXEEXT): $(native_fortran_nf_mm_task_build_OBJECTS) $(native_fortran_nf_mm_task_build_DEPENDENCIES) $(EXTRA_native_fortran_nf_mm_task_build_DEPENDENCIES) native_fortran/$(am__dirstamp) + @rm -f native_fortran/nf_mm_task_build$(EXEEXT) + $(AM_V_FCLD)$(FCLINK) $(native_fortran_nf_mm_task_build_OBJECTS) $(native_fortran_nf_mm_task_build_LDADD) $(LIBS) +native_fortran/nf_mpi_redux.$(OBJEXT): native_fortran/$(am__dirstamp) \ + native_fortran/$(DEPDIR)/$(am__dirstamp) + +native_fortran/nf_mpi_redux$(EXEEXT): $(native_fortran_nf_mpi_redux_OBJECTS) $(native_fortran_nf_mpi_redux_DEPENDENCIES) $(EXTRA_native_fortran_nf_mpi_redux_DEPENDENCIES) native_fortran/$(am__dirstamp) + @rm -f native_fortran/nf_mpi_redux$(EXEEXT) + $(AM_V_FCLD)$(FCLINK) $(native_fortran_nf_mpi_redux_OBJECTS) $(native_fortran_nf_mpi_redux_LDADD) $(LIBS) +native_fortran/nf_mpi_redux_tree.$(OBJEXT): \ + native_fortran/$(am__dirstamp) \ + native_fortran/$(DEPDIR)/$(am__dirstamp) + +native_fortran/nf_mpi_redux_tree$(EXEEXT): $(native_fortran_nf_mpi_redux_tree_OBJECTS) $(native_fortran_nf_mpi_redux_tree_DEPENDENCIES) $(EXTRA_native_fortran_nf_mpi_redux_tree_DEPENDENCIES) native_fortran/$(am__dirstamp) + @rm -f native_fortran/nf_mpi_redux_tree$(EXEEXT) + $(AM_V_FCLD)$(FCLINK) $(native_fortran_nf_mpi_redux_tree_OBJECTS) $(native_fortran_nf_mpi_redux_tree_LDADD) $(LIBS) +native_fortran/nf_redux_test.$(OBJEXT): \ + native_fortran/$(am__dirstamp) \ + native_fortran/$(DEPDIR)/$(am__dirstamp) + +native_fortran/nf_redux_test$(EXEEXT): $(native_fortran_nf_redux_test_OBJECTS) $(native_fortran_nf_redux_test_DEPENDENCIES) $(EXTRA_native_fortran_nf_redux_test_DEPENDENCIES) native_fortran/$(am__dirstamp) + @rm -f native_fortran/nf_redux_test$(EXEEXT) + $(AM_V_FCLD)$(FCLINK) $(native_fortran_nf_redux_test_OBJECTS) $(native_fortran_nf_redux_test_LDADD) $(LIBS) +stencil/$(am__dirstamp): + @$(MKDIR_P) stencil + @: > stencil/$(am__dirstamp) +stencil/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) stencil/$(DEPDIR) + @: > stencil/$(DEPDIR)/$(am__dirstamp) +stencil/stencil5.$(OBJEXT): stencil/$(am__dirstamp) \ + stencil/$(DEPDIR)/$(am__dirstamp) + +stencil/stencil5$(EXEEXT): $(stencil_stencil5_OBJECTS) $(stencil_stencil5_DEPENDENCIES) $(EXTRA_stencil_stencil5_DEPENDENCIES) stencil/$(am__dirstamp) + @rm -f stencil/stencil5$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(stencil_stencil5_OBJECTS) $(stencil_stencil5_LDADD) $(LIBS) +stencil/stencil5_lb.$(OBJEXT): stencil/$(am__dirstamp) \ + stencil/$(DEPDIR)/$(am__dirstamp) + +stencil/stencil5_lb$(EXEEXT): $(stencil_stencil5_lb_OBJECTS) $(stencil_stencil5_lb_DEPENDENCIES) $(EXTRA_stencil_stencil5_lb_DEPENDENCIES) stencil/$(am__dirstamp) + @rm -f stencil/stencil5_lb$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(stencil_stencil5_lb_OBJECTS) $(stencil_stencil5_lb_LDADD) $(LIBS) +user_datatype/$(am__dirstamp): + @$(MKDIR_P) user_datatype + @: > user_datatype/$(am__dirstamp) +user_datatype/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) user_datatype/$(DEPDIR) + @: > user_datatype/$(DEPDIR)/$(am__dirstamp) +user_datatype/user_datatype.$(OBJEXT): user_datatype/$(am__dirstamp) \ + user_datatype/$(DEPDIR)/$(am__dirstamp) +user_datatype/my_interface.$(OBJEXT): user_datatype/$(am__dirstamp) \ + user_datatype/$(DEPDIR)/$(am__dirstamp) + +user_datatype/user_datatype$(EXEEXT): $(user_datatype_user_datatype_OBJECTS) $(user_datatype_user_datatype_DEPENDENCIES) $(EXTRA_user_datatype_user_datatype_DEPENDENCIES) user_datatype/$(am__dirstamp) + @rm -f user_datatype/user_datatype$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(user_datatype_user_datatype_OBJECTS) $(user_datatype_user_datatype_LDADD) $(LIBS) +user_datatype/user_datatype2.$(OBJEXT): user_datatype/$(am__dirstamp) \ + user_datatype/$(DEPDIR)/$(am__dirstamp) + +user_datatype/user_datatype2$(EXEEXT): $(user_datatype_user_datatype2_OBJECTS) $(user_datatype_user_datatype2_DEPENDENCIES) $(EXTRA_user_datatype_user_datatype2_DEPENDENCIES) user_datatype/$(am__dirstamp) + @rm -f user_datatype/user_datatype2$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(user_datatype_user_datatype2_OBJECTS) $(user_datatype_user_datatype2_LDADD) $(LIBS) +user_datatype/user_datatype_early.$(OBJEXT): \ + user_datatype/$(am__dirstamp) \ + user_datatype/$(DEPDIR)/$(am__dirstamp) + +user_datatype/user_datatype_early$(EXEEXT): $(user_datatype_user_datatype_early_OBJECTS) $(user_datatype_user_datatype_early_DEPENDENCIES) $(EXTRA_user_datatype_user_datatype_early_DEPENDENCIES) user_datatype/$(am__dirstamp) + @rm -f user_datatype/user_datatype_early$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(user_datatype_user_datatype_early_OBJECTS) $(user_datatype_user_datatype_early_LDADD) $(LIBS) +user_datatype/user_datatype_interface.$(OBJEXT): \ + user_datatype/$(am__dirstamp) \ + user_datatype/$(DEPDIR)/$(am__dirstamp) + +user_datatype/user_datatype_interface$(EXEEXT): $(user_datatype_user_datatype_interface_OBJECTS) $(user_datatype_user_datatype_interface_DEPENDENCIES) $(EXTRA_user_datatype_user_datatype_interface_DEPENDENCIES) user_datatype/$(am__dirstamp) + @rm -f user_datatype/user_datatype_interface$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(user_datatype_user_datatype_interface_OBJECTS) $(user_datatype_user_datatype_interface_LDADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + -rm -f ../../examples/common/*.$(OBJEXT) + -rm -f ../../examples/interface/*.$(OBJEXT) + -rm -f benchs/*.$(OBJEXT) + -rm -f cache/*.$(OBJEXT) + -rm -f cg/*.$(OBJEXT) + -rm -f comm/*.$(OBJEXT) + -rm -f complex/*.$(OBJEXT) + -rm -f filters/*.$(OBJEXT) + -rm -f matrix_decomposition/*.$(OBJEXT) + -rm -f matrix_mult/*.$(OBJEXT) + -rm -f mpi_lu/*.$(OBJEXT) + -rm -f mpi_redux/*.$(OBJEXT) + -rm -f native_fortran/*.$(OBJEXT) + -rm -f stencil/*.$(OBJEXT) + -rm -f user_datatype/*.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@../../examples/common/$(DEPDIR)/blas.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@../../examples/interface/$(DEPDIR)/complex_interface.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/loader-loader.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@benchs/$(DEPDIR)/abstract_sendrecv_bench.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@benchs/$(DEPDIR)/bcast_bench-bcast_bench.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@benchs/$(DEPDIR)/bcast_bench-bench_helper.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@benchs/$(DEPDIR)/bench_helper.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@benchs/$(DEPDIR)/burst.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@benchs/$(DEPDIR)/burst_gemm.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@benchs/$(DEPDIR)/burst_helper.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@benchs/$(DEPDIR)/gemm_helper.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@benchs/$(DEPDIR)/recv_wait_finalize_bench-recv_wait_finalize_bench.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@benchs/$(DEPDIR)/sendrecv_bench.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@benchs/$(DEPDIR)/sendrecv_gemm_bench.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@benchs/$(DEPDIR)/sendrecv_parallel_tasks_bench.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@cache/$(DEPDIR)/cache.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@cache/$(DEPDIR)/cache_disable.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@cg/$(DEPDIR)/cg.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@comm/$(DEPDIR)/comm.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@comm/$(DEPDIR)/group.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@comm/$(DEPDIR)/mix_comm.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@complex/$(DEPDIR)/mpi_complex.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/filter.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@matrix_decomposition/$(DEPDIR)/mpi_cholesky.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@matrix_decomposition/$(DEPDIR)/mpi_cholesky_codelets.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@matrix_decomposition/$(DEPDIR)/mpi_cholesky_distributed.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@matrix_decomposition/$(DEPDIR)/mpi_cholesky_kernels.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@matrix_decomposition/$(DEPDIR)/mpi_cholesky_models.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@matrix_decomposition/$(DEPDIR)/mpi_decomposition_matrix.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@matrix_decomposition/$(DEPDIR)/mpi_decomposition_params.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@matrix_mult/$(DEPDIR)/mm.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@matrix_mult/$(DEPDIR)/mm_2dbc.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@mpi_lu/$(DEPDIR)/pdlu.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@mpi_lu/$(DEPDIR)/pdlu_implicit.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@mpi_lu/$(DEPDIR)/pdlu_kernels.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@mpi_lu/$(DEPDIR)/plu_example_double.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@mpi_lu/$(DEPDIR)/plu_example_float.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@mpi_lu/$(DEPDIR)/plu_implicit_example_double.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@mpi_lu/$(DEPDIR)/plu_implicit_example_float.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@mpi_lu/$(DEPDIR)/plu_outofcore_example_double.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@mpi_lu/$(DEPDIR)/plu_outofcore_example_float.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@mpi_lu/$(DEPDIR)/plu_solve_double.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@mpi_lu/$(DEPDIR)/plu_solve_float.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@mpi_lu/$(DEPDIR)/pslu.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@mpi_lu/$(DEPDIR)/pslu_implicit.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@mpi_lu/$(DEPDIR)/pslu_kernels.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@mpi_redux/$(DEPDIR)/mpi_redux.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@mpi_redux/$(DEPDIR)/mpi_redux_autowrapup.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@mpi_redux/$(DEPDIR)/mpi_redux_tree.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@stencil/$(DEPDIR)/stencil5.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@stencil/$(DEPDIR)/stencil5_lb.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@user_datatype/$(DEPDIR)/my_interface.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@user_datatype/$(DEPDIR)/user_datatype.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@user_datatype/$(DEPDIR)/user_datatype2.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@user_datatype/$(DEPDIR)/user_datatype_early.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@user_datatype/$(DEPDIR)/user_datatype_interface.Po@am__quote@ # am--include-marker + +$(am__depfiles_remade): + @$(MKDIR_P) $(@D) + @echo '# dummy' >$@-t && $(am__mv) $@-t $@ + +am--depfiles: $(am__depfiles_remade) + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ +@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +benchs/bcast_bench-bcast_bench.o: benchs/bcast_bench.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(benchs_bcast_bench_CFLAGS) $(CFLAGS) -MT benchs/bcast_bench-bcast_bench.o -MD -MP -MF benchs/$(DEPDIR)/bcast_bench-bcast_bench.Tpo -c -o benchs/bcast_bench-bcast_bench.o `test -f 'benchs/bcast_bench.c' || echo '$(srcdir)/'`benchs/bcast_bench.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) benchs/$(DEPDIR)/bcast_bench-bcast_bench.Tpo benchs/$(DEPDIR)/bcast_bench-bcast_bench.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='benchs/bcast_bench.c' object='benchs/bcast_bench-bcast_bench.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(benchs_bcast_bench_CFLAGS) $(CFLAGS) -c -o benchs/bcast_bench-bcast_bench.o `test -f 'benchs/bcast_bench.c' || echo '$(srcdir)/'`benchs/bcast_bench.c + +benchs/bcast_bench-bcast_bench.obj: benchs/bcast_bench.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(benchs_bcast_bench_CFLAGS) $(CFLAGS) -MT benchs/bcast_bench-bcast_bench.obj -MD -MP -MF benchs/$(DEPDIR)/bcast_bench-bcast_bench.Tpo -c -o benchs/bcast_bench-bcast_bench.obj `if test -f 'benchs/bcast_bench.c'; then $(CYGPATH_W) 'benchs/bcast_bench.c'; else $(CYGPATH_W) '$(srcdir)/benchs/bcast_bench.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) benchs/$(DEPDIR)/bcast_bench-bcast_bench.Tpo benchs/$(DEPDIR)/bcast_bench-bcast_bench.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='benchs/bcast_bench.c' object='benchs/bcast_bench-bcast_bench.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(benchs_bcast_bench_CFLAGS) $(CFLAGS) -c -o benchs/bcast_bench-bcast_bench.obj `if test -f 'benchs/bcast_bench.c'; then $(CYGPATH_W) 'benchs/bcast_bench.c'; else $(CYGPATH_W) '$(srcdir)/benchs/bcast_bench.c'; fi` + +benchs/bcast_bench-bench_helper.o: benchs/bench_helper.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(benchs_bcast_bench_CFLAGS) $(CFLAGS) -MT benchs/bcast_bench-bench_helper.o -MD -MP -MF benchs/$(DEPDIR)/bcast_bench-bench_helper.Tpo -c -o benchs/bcast_bench-bench_helper.o `test -f 'benchs/bench_helper.c' || echo '$(srcdir)/'`benchs/bench_helper.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) benchs/$(DEPDIR)/bcast_bench-bench_helper.Tpo benchs/$(DEPDIR)/bcast_bench-bench_helper.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='benchs/bench_helper.c' object='benchs/bcast_bench-bench_helper.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(benchs_bcast_bench_CFLAGS) $(CFLAGS) -c -o benchs/bcast_bench-bench_helper.o `test -f 'benchs/bench_helper.c' || echo '$(srcdir)/'`benchs/bench_helper.c + +benchs/bcast_bench-bench_helper.obj: benchs/bench_helper.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(benchs_bcast_bench_CFLAGS) $(CFLAGS) -MT benchs/bcast_bench-bench_helper.obj -MD -MP -MF benchs/$(DEPDIR)/bcast_bench-bench_helper.Tpo -c -o benchs/bcast_bench-bench_helper.obj `if test -f 'benchs/bench_helper.c'; then $(CYGPATH_W) 'benchs/bench_helper.c'; else $(CYGPATH_W) '$(srcdir)/benchs/bench_helper.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) benchs/$(DEPDIR)/bcast_bench-bench_helper.Tpo benchs/$(DEPDIR)/bcast_bench-bench_helper.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='benchs/bench_helper.c' object='benchs/bcast_bench-bench_helper.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(benchs_bcast_bench_CFLAGS) $(CFLAGS) -c -o benchs/bcast_bench-bench_helper.obj `if test -f 'benchs/bench_helper.c'; then $(CYGPATH_W) 'benchs/bench_helper.c'; else $(CYGPATH_W) '$(srcdir)/benchs/bench_helper.c'; fi` + +benchs/recv_wait_finalize_bench-recv_wait_finalize_bench.o: benchs/recv_wait_finalize_bench.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(benchs_recv_wait_finalize_bench_CFLAGS) $(CFLAGS) -MT benchs/recv_wait_finalize_bench-recv_wait_finalize_bench.o -MD -MP -MF benchs/$(DEPDIR)/recv_wait_finalize_bench-recv_wait_finalize_bench.Tpo -c -o benchs/recv_wait_finalize_bench-recv_wait_finalize_bench.o `test -f 'benchs/recv_wait_finalize_bench.c' || echo '$(srcdir)/'`benchs/recv_wait_finalize_bench.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) benchs/$(DEPDIR)/recv_wait_finalize_bench-recv_wait_finalize_bench.Tpo benchs/$(DEPDIR)/recv_wait_finalize_bench-recv_wait_finalize_bench.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='benchs/recv_wait_finalize_bench.c' object='benchs/recv_wait_finalize_bench-recv_wait_finalize_bench.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(benchs_recv_wait_finalize_bench_CFLAGS) $(CFLAGS) -c -o benchs/recv_wait_finalize_bench-recv_wait_finalize_bench.o `test -f 'benchs/recv_wait_finalize_bench.c' || echo '$(srcdir)/'`benchs/recv_wait_finalize_bench.c + +benchs/recv_wait_finalize_bench-recv_wait_finalize_bench.obj: benchs/recv_wait_finalize_bench.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(benchs_recv_wait_finalize_bench_CFLAGS) $(CFLAGS) -MT benchs/recv_wait_finalize_bench-recv_wait_finalize_bench.obj -MD -MP -MF benchs/$(DEPDIR)/recv_wait_finalize_bench-recv_wait_finalize_bench.Tpo -c -o benchs/recv_wait_finalize_bench-recv_wait_finalize_bench.obj `if test -f 'benchs/recv_wait_finalize_bench.c'; then $(CYGPATH_W) 'benchs/recv_wait_finalize_bench.c'; else $(CYGPATH_W) '$(srcdir)/benchs/recv_wait_finalize_bench.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) benchs/$(DEPDIR)/recv_wait_finalize_bench-recv_wait_finalize_bench.Tpo benchs/$(DEPDIR)/recv_wait_finalize_bench-recv_wait_finalize_bench.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='benchs/recv_wait_finalize_bench.c' object='benchs/recv_wait_finalize_bench-recv_wait_finalize_bench.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(benchs_recv_wait_finalize_bench_CFLAGS) $(CFLAGS) -c -o benchs/recv_wait_finalize_bench-recv_wait_finalize_bench.obj `if test -f 'benchs/recv_wait_finalize_bench.c'; then $(CYGPATH_W) 'benchs/recv_wait_finalize_bench.c'; else $(CYGPATH_W) '$(srcdir)/benchs/recv_wait_finalize_bench.c'; fi` + +loader-loader.o: loader.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(loader_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT loader-loader.o -MD -MP -MF $(DEPDIR)/loader-loader.Tpo -c -o loader-loader.o `test -f 'loader.c' || echo '$(srcdir)/'`loader.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/loader-loader.Tpo $(DEPDIR)/loader-loader.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='loader.c' object='loader-loader.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(loader_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o loader-loader.o `test -f 'loader.c' || echo '$(srcdir)/'`loader.c + +loader-loader.obj: loader.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(loader_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT loader-loader.obj -MD -MP -MF $(DEPDIR)/loader-loader.Tpo -c -o loader-loader.obj `if test -f 'loader.c'; then $(CYGPATH_W) 'loader.c'; else $(CYGPATH_W) '$(srcdir)/loader.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/loader-loader.Tpo $(DEPDIR)/loader-loader.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='loader.c' object='loader-loader.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(loader_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o loader-loader.obj `if test -f 'loader.c'; then $(CYGPATH_W) 'loader.c'; else $(CYGPATH_W) '$(srcdir)/loader.c'; fi` + +.f90.o: + $(AM_V_FC)$(FCCOMPILE) -c -o $@ $< + +.f90.obj: + $(AM_V_FC)$(FCCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.f90.lo: + $(AM_V_FC)$(LTFCCOMPILE) -c -o $@ $< + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + -rm -rf benchs/.libs benchs/_libs + -rm -rf cache/.libs cache/_libs + -rm -rf cg/.libs cg/_libs + -rm -rf comm/.libs comm/_libs + -rm -rf complex/.libs complex/_libs + -rm -rf filters/.libs filters/_libs + -rm -rf matrix_decomposition/.libs matrix_decomposition/_libs + -rm -rf matrix_mult/.libs matrix_mult/_libs + -rm -rf mpi_lu/.libs mpi_lu/_libs + -rm -rf mpi_redux/.libs mpi_redux/_libs + -rm -rf native_fortran/.libs native_fortran/_libs + -rm -rf stencil/.libs stencil/_libs + -rm -rf user_datatype/.libs user_datatype/_libs + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-am +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-am + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-am + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +# Recover from deleted '.trs' file; this should ensure that +# "rm -f foo.log; make foo.trs" re-run 'foo.test', and re-create +# both 'foo.log' and 'foo.trs'. Break the recipe in two subshells +# to avoid problems with "make -n". +.log.trs: + rm -f $< $@ + $(MAKE) $(AM_MAKEFLAGS) $< + +# Leading 'am--fnord' is there to ensure the list of targets does not +# expand to empty, as could happen e.g. with make check TESTS=''. +am--fnord $(TEST_LOGS) $(TEST_LOGS:.log=.trs): $(am__force_recheck) +am--force-recheck: + @: + +$(TEST_SUITE_LOG): $(TEST_LOGS) + @$(am__set_TESTS_bases); \ + am__f_ok () { test -f "$$1" && test -r "$$1"; }; \ + redo_bases=`for i in $$bases; do \ + am__f_ok $$i.trs && am__f_ok $$i.log || echo $$i; \ + done`; \ + if test -n "$$redo_bases"; then \ + redo_logs=`for i in $$redo_bases; do echo $$i.log; done`; \ + redo_results=`for i in $$redo_bases; do echo $$i.trs; done`; \ + if $(am__make_dryrun); then :; else \ + rm -f $$redo_logs && rm -f $$redo_results || exit 1; \ + fi; \ + fi; \ + if test -n "$$am__remaking_logs"; then \ + echo "fatal: making $(TEST_SUITE_LOG): possible infinite" \ + "recursion detected" >&2; \ + elif test -n "$$redo_logs"; then \ + am__remaking_logs=yes $(MAKE) $(AM_MAKEFLAGS) $$redo_logs; \ + fi; \ + if $(am__make_dryrun); then :; else \ + st=0; \ + errmsg="fatal: making $(TEST_SUITE_LOG): failed to create"; \ + for i in $$redo_bases; do \ + test -f $$i.trs && test -r $$i.trs \ + || { echo "$$errmsg $$i.trs" >&2; st=1; }; \ + test -f $$i.log && test -r $$i.log \ + || { echo "$$errmsg $$i.log" >&2; st=1; }; \ + done; \ + test $$st -eq 0 || exit 1; \ + fi + @$(am__sh_e_setup); $(am__tty_colors); $(am__set_TESTS_bases); \ + ws='[ ]'; \ + results=`for b in $$bases; do echo $$b.trs; done`; \ + test -n "$$results" || results=/dev/null; \ + all=` grep "^$$ws*:test-result:" $$results | wc -l`; \ + pass=` grep "^$$ws*:test-result:$$ws*PASS" $$results | wc -l`; \ + fail=` grep "^$$ws*:test-result:$$ws*FAIL" $$results | wc -l`; \ + skip=` grep "^$$ws*:test-result:$$ws*SKIP" $$results | wc -l`; \ + xfail=`grep "^$$ws*:test-result:$$ws*XFAIL" $$results | wc -l`; \ + xpass=`grep "^$$ws*:test-result:$$ws*XPASS" $$results | wc -l`; \ + error=`grep "^$$ws*:test-result:$$ws*ERROR" $$results | wc -l`; \ + if test `expr $$fail + $$xpass + $$error` -eq 0; then \ + success=true; \ + else \ + success=false; \ + fi; \ + br='==================='; br=$$br$$br$$br$$br; \ + result_count () \ + { \ + if test x"$$1" = x"--maybe-color"; then \ + maybe_colorize=yes; \ + elif test x"$$1" = x"--no-color"; then \ + maybe_colorize=no; \ + else \ + echo "$@: invalid 'result_count' usage" >&2; exit 4; \ + fi; \ + shift; \ + desc=$$1 count=$$2; \ + if test $$maybe_colorize = yes && test $$count -gt 0; then \ + color_start=$$3 color_end=$$std; \ + else \ + color_start= color_end=; \ + fi; \ + echo "$${color_start}# $$desc $$count$${color_end}"; \ + }; \ + create_testsuite_report () \ + { \ + result_count $$1 "TOTAL:" $$all "$$brg"; \ + result_count $$1 "PASS: " $$pass "$$grn"; \ + result_count $$1 "SKIP: " $$skip "$$blu"; \ + result_count $$1 "XFAIL:" $$xfail "$$lgn"; \ + result_count $$1 "FAIL: " $$fail "$$red"; \ + result_count $$1 "XPASS:" $$xpass "$$red"; \ + result_count $$1 "ERROR:" $$error "$$mgn"; \ + }; \ + { \ + echo "$(PACKAGE_STRING): $(subdir)/$(TEST_SUITE_LOG)" | \ + $(am__rst_title); \ + create_testsuite_report --no-color; \ + echo; \ + echo ".. contents:: :depth: 2"; \ + echo; \ + for b in $$bases; do echo $$b; done \ + | $(am__create_global_log); \ + } >$(TEST_SUITE_LOG).tmp || exit 1; \ + mv $(TEST_SUITE_LOG).tmp $(TEST_SUITE_LOG); \ + if $$success; then \ + col="$$grn"; \ + else \ + col="$$red"; \ + test x"$$VERBOSE" = x || cat $(TEST_SUITE_LOG); \ + fi; \ + echo "$${col}$$br$${std}"; \ + echo "$${col}Testsuite summary"$(AM_TESTSUITE_SUMMARY_HEADER)"$${std}"; \ + echo "$${col}$$br$${std}"; \ + create_testsuite_report --maybe-color; \ + echo "$$col$$br$$std"; \ + if $$success; then :; else \ + echo "$${col}See $(subdir)/$(TEST_SUITE_LOG)$${std}"; \ + if test -n "$(PACKAGE_BUGREPORT)"; then \ + echo "$${col}Please report to $(PACKAGE_BUGREPORT)$${std}"; \ + fi; \ + echo "$$col$$br$$std"; \ + fi; \ + $$success || exit 1 + +check-TESTS: $(check_PROGRAMS) + @list='$(RECHECK_LOGS)'; test -z "$$list" || rm -f $$list + @list='$(RECHECK_LOGS:.log=.trs)'; test -z "$$list" || rm -f $$list + @test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) + @set +e; $(am__set_TESTS_bases); \ + log_list=`for i in $$bases; do echo $$i.log; done`; \ + trs_list=`for i in $$bases; do echo $$i.trs; done`; \ + log_list=`echo $$log_list`; trs_list=`echo $$trs_list`; \ + $(MAKE) $(AM_MAKEFLAGS) $(TEST_SUITE_LOG) TEST_LOGS="$$log_list"; \ + exit $$?; +recheck: all $(check_PROGRAMS) + @test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) + @set +e; $(am__set_TESTS_bases); \ + bases=`for i in $$bases; do echo $$i; done \ + | $(am__list_recheck_tests)` || exit 1; \ + log_list=`for i in $$bases; do echo $$i.log; done`; \ + log_list=`echo $$log_list`; \ + $(MAKE) $(AM_MAKEFLAGS) $(TEST_SUITE_LOG) \ + am__force_recheck=am--force-recheck \ + TEST_LOGS="$$log_list"; \ + exit $$? +stencil/stencil5.log: stencil/stencil5$(EXEEXT) + @p='stencil/stencil5$(EXEEXT)'; \ + b='stencil/stencil5'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +stencil/stencil5_lb.log: stencil/stencil5_lb$(EXEEXT) + @p='stencil/stencil5_lb$(EXEEXT)'; \ + b='stencil/stencil5_lb'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +cache/cache.log: cache/cache$(EXEEXT) + @p='cache/cache$(EXEEXT)'; \ + b='cache/cache'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +cache/cache_disable.log: cache/cache_disable$(EXEEXT) + @p='cache/cache_disable$(EXEEXT)'; \ + b='cache/cache_disable'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +mpi_lu/plu_example_float.log: mpi_lu/plu_example_float$(EXEEXT) + @p='mpi_lu/plu_example_float$(EXEEXT)'; \ + b='mpi_lu/plu_example_float'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +mpi_lu/plu_example_double.log: mpi_lu/plu_example_double$(EXEEXT) + @p='mpi_lu/plu_example_double$(EXEEXT)'; \ + b='mpi_lu/plu_example_double'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +mpi_lu/plu_implicit_example_float.log: mpi_lu/plu_implicit_example_float$(EXEEXT) + @p='mpi_lu/plu_implicit_example_float$(EXEEXT)'; \ + b='mpi_lu/plu_implicit_example_float'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +mpi_lu/plu_implicit_example_double.log: mpi_lu/plu_implicit_example_double$(EXEEXT) + @p='mpi_lu/plu_implicit_example_double$(EXEEXT)'; \ + b='mpi_lu/plu_implicit_example_double'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +mpi_lu/plu_outofcore_example_float.log: mpi_lu/plu_outofcore_example_float$(EXEEXT) + @p='mpi_lu/plu_outofcore_example_float$(EXEEXT)'; \ + b='mpi_lu/plu_outofcore_example_float'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +mpi_lu/plu_outofcore_example_double.log: mpi_lu/plu_outofcore_example_double$(EXEEXT) + @p='mpi_lu/plu_outofcore_example_double$(EXEEXT)'; \ + b='mpi_lu/plu_outofcore_example_double'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +matrix_decomposition/mpi_cholesky.log: matrix_decomposition/mpi_cholesky$(EXEEXT) + @p='matrix_decomposition/mpi_cholesky$(EXEEXT)'; \ + b='matrix_decomposition/mpi_cholesky'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +matrix_decomposition/mpi_cholesky_distributed.log: matrix_decomposition/mpi_cholesky_distributed$(EXEEXT) + @p='matrix_decomposition/mpi_cholesky_distributed$(EXEEXT)'; \ + b='matrix_decomposition/mpi_cholesky_distributed'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +matrix_mult/mm.log: matrix_mult/mm$(EXEEXT) + @p='matrix_mult/mm$(EXEEXT)'; \ + b='matrix_mult/mm'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +matrix_mult/mm_2dbc.log: matrix_mult/mm_2dbc$(EXEEXT) + @p='matrix_mult/mm_2dbc$(EXEEXT)'; \ + b='matrix_mult/mm_2dbc'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +mpi_redux/mpi_redux.log: mpi_redux/mpi_redux$(EXEEXT) + @p='mpi_redux/mpi_redux$(EXEEXT)'; \ + b='mpi_redux/mpi_redux'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +mpi_redux/mpi_redux_autowrapup.log: mpi_redux/mpi_redux_autowrapup$(EXEEXT) + @p='mpi_redux/mpi_redux_autowrapup$(EXEEXT)'; \ + b='mpi_redux/mpi_redux_autowrapup'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +mpi_redux/mpi_redux_tree.log: mpi_redux/mpi_redux_tree$(EXEEXT) + @p='mpi_redux/mpi_redux_tree$(EXEEXT)'; \ + b='mpi_redux/mpi_redux_tree'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +native_fortran/nf_mm.log: native_fortran/nf_mm$(EXEEXT) + @p='native_fortran/nf_mm$(EXEEXT)'; \ + b='native_fortran/nf_mm'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +native_fortran/nf_mm_task_build.log: native_fortran/nf_mm_task_build$(EXEEXT) + @p='native_fortran/nf_mm_task_build$(EXEEXT)'; \ + b='native_fortran/nf_mm_task_build'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +native_fortran/nf_basic_ring.log: native_fortran/nf_basic_ring$(EXEEXT) + @p='native_fortran/nf_basic_ring$(EXEEXT)'; \ + b='native_fortran/nf_basic_ring'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +native_fortran/nf_mm_2dbc.log: native_fortran/nf_mm_2dbc$(EXEEXT) + @p='native_fortran/nf_mm_2dbc$(EXEEXT)'; \ + b='native_fortran/nf_mm_2dbc'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +native_fortran/nf_mpi_redux.log: native_fortran/nf_mpi_redux$(EXEEXT) + @p='native_fortran/nf_mpi_redux$(EXEEXT)'; \ + b='native_fortran/nf_mpi_redux'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +native_fortran/nf_mpi_redux_tree.log: native_fortran/nf_mpi_redux_tree$(EXEEXT) + @p='native_fortran/nf_mpi_redux_tree$(EXEEXT)'; \ + b='native_fortran/nf_mpi_redux_tree'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +native_fortran/nf_redux_test.log: native_fortran/nf_redux_test$(EXEEXT) + @p='native_fortran/nf_redux_test$(EXEEXT)'; \ + b='native_fortran/nf_redux_test'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +complex/mpi_complex.log: complex/mpi_complex$(EXEEXT) + @p='complex/mpi_complex$(EXEEXT)'; \ + b='complex/mpi_complex'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +user_datatype/user_datatype2.log: user_datatype/user_datatype2$(EXEEXT) + @p='user_datatype/user_datatype2$(EXEEXT)'; \ + b='user_datatype/user_datatype2'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +user_datatype/user_datatype_early.log: user_datatype/user_datatype_early$(EXEEXT) + @p='user_datatype/user_datatype_early$(EXEEXT)'; \ + b='user_datatype/user_datatype_early'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +user_datatype/user_datatype.log: user_datatype/user_datatype$(EXEEXT) + @p='user_datatype/user_datatype$(EXEEXT)'; \ + b='user_datatype/user_datatype'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +user_datatype/user_datatype_interface.log: user_datatype/user_datatype_interface$(EXEEXT) + @p='user_datatype/user_datatype_interface$(EXEEXT)'; \ + b='user_datatype/user_datatype_interface'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +comm/comm.log: comm/comm$(EXEEXT) + @p='comm/comm$(EXEEXT)'; \ + b='comm/comm'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +comm/mix_comm.log: comm/mix_comm$(EXEEXT) + @p='comm/mix_comm$(EXEEXT)'; \ + b='comm/mix_comm'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +comm/group.log: comm/group$(EXEEXT) + @p='comm/group$(EXEEXT)'; \ + b='comm/group'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +filters/filter.log: filters/filter$(EXEEXT) + @p='filters/filter$(EXEEXT)'; \ + b='filters/filter'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +benchs/sendrecv_bench.log: benchs/sendrecv_bench$(EXEEXT) + @p='benchs/sendrecv_bench$(EXEEXT)'; \ + b='benchs/sendrecv_bench'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +benchs/burst.log: benchs/burst$(EXEEXT) + @p='benchs/burst$(EXEEXT)'; \ + b='benchs/burst'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +benchs/bcast_bench.log: benchs/bcast_bench$(EXEEXT) + @p='benchs/bcast_bench$(EXEEXT)'; \ + b='benchs/bcast_bench'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +benchs/recv_wait_finalize_bench.log: benchs/recv_wait_finalize_bench$(EXEEXT) + @p='benchs/recv_wait_finalize_bench$(EXEEXT)'; \ + b='benchs/recv_wait_finalize_bench'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +benchs/sendrecv_parallel_tasks_bench.log: benchs/sendrecv_parallel_tasks_bench$(EXEEXT) + @p='benchs/sendrecv_parallel_tasks_bench$(EXEEXT)'; \ + b='benchs/sendrecv_parallel_tasks_bench'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +benchs/sendrecv_gemm_bench.log: benchs/sendrecv_gemm_bench$(EXEEXT) + @p='benchs/sendrecv_gemm_bench$(EXEEXT)'; \ + b='benchs/sendrecv_gemm_bench'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +benchs/burst_gemm.log: benchs/burst_gemm$(EXEEXT) + @p='benchs/burst_gemm$(EXEEXT)'; \ + b='benchs/burst_gemm'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +.test.log: + @p='$<'; \ + $(am__set_b); \ + $(am__check_pre) $(TEST_LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_TEST_LOG_DRIVER_FLAGS) $(TEST_LOG_DRIVER_FLAGS) -- $(TEST_LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +@am__EXEEXT_TRUE@.test$(EXEEXT).log: +@am__EXEEXT_TRUE@ @p='$<'; \ +@am__EXEEXT_TRUE@ $(am__set_b); \ +@am__EXEEXT_TRUE@ $(am__check_pre) $(TEST_LOG_DRIVER) --test-name "$$f" \ +@am__EXEEXT_TRUE@ --log-file $$b.log --trs-file $$b.trs \ +@am__EXEEXT_TRUE@ $(am__common_driver_flags) $(AM_TEST_LOG_DRIVER_FLAGS) $(TEST_LOG_DRIVER_FLAGS) -- $(TEST_LOG_COMPILE) \ +@am__EXEEXT_TRUE@ "$$tst" $(AM_TESTS_FD_REDIRECT) +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am + $(MAKE) $(AM_MAKEFLAGS) $(check_PROGRAMS) + $(MAKE) $(AM_MAKEFLAGS) check-TESTS +check: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) check-am +all-am: Makefile $(PROGRAMS) +installdirs: + for dir in "$(DESTDIR)$(examplebindir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) install-am +install-exec: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + -test -z "$(TEST_LOGS)" || rm -f $(TEST_LOGS) + -test -z "$(TEST_LOGS:.log=.trs)" || rm -f $(TEST_LOGS:.log=.trs) + -test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) + +clean-generic: + -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + -rm -f ../../examples/common/$(DEPDIR)/$(am__dirstamp) + -rm -f ../../examples/common/$(am__dirstamp) + -rm -f ../../examples/interface/$(DEPDIR)/$(am__dirstamp) + -rm -f ../../examples/interface/$(am__dirstamp) + -rm -f benchs/$(DEPDIR)/$(am__dirstamp) + -rm -f benchs/$(am__dirstamp) + -rm -f cache/$(DEPDIR)/$(am__dirstamp) + -rm -f cache/$(am__dirstamp) + -rm -f cg/$(DEPDIR)/$(am__dirstamp) + -rm -f cg/$(am__dirstamp) + -rm -f comm/$(DEPDIR)/$(am__dirstamp) + -rm -f comm/$(am__dirstamp) + -rm -f complex/$(DEPDIR)/$(am__dirstamp) + -rm -f complex/$(am__dirstamp) + -rm -f filters/$(DEPDIR)/$(am__dirstamp) + -rm -f filters/$(am__dirstamp) + -rm -f matrix_decomposition/$(DEPDIR)/$(am__dirstamp) + -rm -f matrix_decomposition/$(am__dirstamp) + -rm -f matrix_mult/$(DEPDIR)/$(am__dirstamp) + -rm -f matrix_mult/$(am__dirstamp) + -rm -f mpi_lu/$(DEPDIR)/$(am__dirstamp) + -rm -f mpi_lu/$(am__dirstamp) + -rm -f mpi_redux/$(DEPDIR)/$(am__dirstamp) + -rm -f mpi_redux/$(am__dirstamp) + -rm -f native_fortran/$(DEPDIR)/$(am__dirstamp) + -rm -f native_fortran/$(am__dirstamp) + -rm -f stencil/$(DEPDIR)/$(am__dirstamp) + -rm -f stencil/$(am__dirstamp) + -rm -f user_datatype/$(DEPDIR)/$(am__dirstamp) + -rm -f user_datatype/$(am__dirstamp) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." + -test -z "$(BUILT_SOURCES)" || rm -f $(BUILT_SOURCES) +clean: clean-am + +clean-am: clean-checkPROGRAMS clean-examplebinPROGRAMS clean-generic \ + clean-libtool clean-noinstPROGRAMS mostlyclean-am + +distclean: distclean-am + -rm -f ../../examples/common/$(DEPDIR)/blas.Po + -rm -f ../../examples/interface/$(DEPDIR)/complex_interface.Po + -rm -f ./$(DEPDIR)/loader-loader.Po + -rm -f benchs/$(DEPDIR)/abstract_sendrecv_bench.Po + -rm -f benchs/$(DEPDIR)/bcast_bench-bcast_bench.Po + -rm -f benchs/$(DEPDIR)/bcast_bench-bench_helper.Po + -rm -f benchs/$(DEPDIR)/bench_helper.Po + -rm -f benchs/$(DEPDIR)/burst.Po + -rm -f benchs/$(DEPDIR)/burst_gemm.Po + -rm -f benchs/$(DEPDIR)/burst_helper.Po + -rm -f benchs/$(DEPDIR)/gemm_helper.Po + -rm -f benchs/$(DEPDIR)/recv_wait_finalize_bench-recv_wait_finalize_bench.Po + -rm -f benchs/$(DEPDIR)/sendrecv_bench.Po + -rm -f benchs/$(DEPDIR)/sendrecv_gemm_bench.Po + -rm -f benchs/$(DEPDIR)/sendrecv_parallel_tasks_bench.Po + -rm -f cache/$(DEPDIR)/cache.Po + -rm -f cache/$(DEPDIR)/cache_disable.Po + -rm -f cg/$(DEPDIR)/cg.Po + -rm -f comm/$(DEPDIR)/comm.Po + -rm -f comm/$(DEPDIR)/group.Po + -rm -f comm/$(DEPDIR)/mix_comm.Po + -rm -f complex/$(DEPDIR)/mpi_complex.Po + -rm -f filters/$(DEPDIR)/filter.Po + -rm -f matrix_decomposition/$(DEPDIR)/mpi_cholesky.Po + -rm -f matrix_decomposition/$(DEPDIR)/mpi_cholesky_codelets.Po + -rm -f matrix_decomposition/$(DEPDIR)/mpi_cholesky_distributed.Po + -rm -f matrix_decomposition/$(DEPDIR)/mpi_cholesky_kernels.Po + -rm -f matrix_decomposition/$(DEPDIR)/mpi_cholesky_models.Po + -rm -f matrix_decomposition/$(DEPDIR)/mpi_decomposition_matrix.Po + -rm -f matrix_decomposition/$(DEPDIR)/mpi_decomposition_params.Po + -rm -f matrix_mult/$(DEPDIR)/mm.Po + -rm -f matrix_mult/$(DEPDIR)/mm_2dbc.Po + -rm -f mpi_lu/$(DEPDIR)/pdlu.Po + -rm -f mpi_lu/$(DEPDIR)/pdlu_implicit.Po + -rm -f mpi_lu/$(DEPDIR)/pdlu_kernels.Po + -rm -f mpi_lu/$(DEPDIR)/plu_example_double.Po + -rm -f mpi_lu/$(DEPDIR)/plu_example_float.Po + -rm -f mpi_lu/$(DEPDIR)/plu_implicit_example_double.Po + -rm -f mpi_lu/$(DEPDIR)/plu_implicit_example_float.Po + -rm -f mpi_lu/$(DEPDIR)/plu_outofcore_example_double.Po + -rm -f mpi_lu/$(DEPDIR)/plu_outofcore_example_float.Po + -rm -f mpi_lu/$(DEPDIR)/plu_solve_double.Po + -rm -f mpi_lu/$(DEPDIR)/plu_solve_float.Po + -rm -f mpi_lu/$(DEPDIR)/pslu.Po + -rm -f mpi_lu/$(DEPDIR)/pslu_implicit.Po + -rm -f mpi_lu/$(DEPDIR)/pslu_kernels.Po + -rm -f mpi_redux/$(DEPDIR)/mpi_redux.Po + -rm -f mpi_redux/$(DEPDIR)/mpi_redux_autowrapup.Po + -rm -f mpi_redux/$(DEPDIR)/mpi_redux_tree.Po + -rm -f stencil/$(DEPDIR)/stencil5.Po + -rm -f stencil/$(DEPDIR)/stencil5_lb.Po + -rm -f user_datatype/$(DEPDIR)/my_interface.Po + -rm -f user_datatype/$(DEPDIR)/user_datatype.Po + -rm -f user_datatype/$(DEPDIR)/user_datatype2.Po + -rm -f user_datatype/$(DEPDIR)/user_datatype_early.Po + -rm -f user_datatype/$(DEPDIR)/user_datatype_interface.Po + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: install-examplebinPROGRAMS + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f ../../examples/common/$(DEPDIR)/blas.Po + -rm -f ../../examples/interface/$(DEPDIR)/complex_interface.Po + -rm -f ./$(DEPDIR)/loader-loader.Po + -rm -f benchs/$(DEPDIR)/abstract_sendrecv_bench.Po + -rm -f benchs/$(DEPDIR)/bcast_bench-bcast_bench.Po + -rm -f benchs/$(DEPDIR)/bcast_bench-bench_helper.Po + -rm -f benchs/$(DEPDIR)/bench_helper.Po + -rm -f benchs/$(DEPDIR)/burst.Po + -rm -f benchs/$(DEPDIR)/burst_gemm.Po + -rm -f benchs/$(DEPDIR)/burst_helper.Po + -rm -f benchs/$(DEPDIR)/gemm_helper.Po + -rm -f benchs/$(DEPDIR)/recv_wait_finalize_bench-recv_wait_finalize_bench.Po + -rm -f benchs/$(DEPDIR)/sendrecv_bench.Po + -rm -f benchs/$(DEPDIR)/sendrecv_gemm_bench.Po + -rm -f benchs/$(DEPDIR)/sendrecv_parallel_tasks_bench.Po + -rm -f cache/$(DEPDIR)/cache.Po + -rm -f cache/$(DEPDIR)/cache_disable.Po + -rm -f cg/$(DEPDIR)/cg.Po + -rm -f comm/$(DEPDIR)/comm.Po + -rm -f comm/$(DEPDIR)/group.Po + -rm -f comm/$(DEPDIR)/mix_comm.Po + -rm -f complex/$(DEPDIR)/mpi_complex.Po + -rm -f filters/$(DEPDIR)/filter.Po + -rm -f matrix_decomposition/$(DEPDIR)/mpi_cholesky.Po + -rm -f matrix_decomposition/$(DEPDIR)/mpi_cholesky_codelets.Po + -rm -f matrix_decomposition/$(DEPDIR)/mpi_cholesky_distributed.Po + -rm -f matrix_decomposition/$(DEPDIR)/mpi_cholesky_kernels.Po + -rm -f matrix_decomposition/$(DEPDIR)/mpi_cholesky_models.Po + -rm -f matrix_decomposition/$(DEPDIR)/mpi_decomposition_matrix.Po + -rm -f matrix_decomposition/$(DEPDIR)/mpi_decomposition_params.Po + -rm -f matrix_mult/$(DEPDIR)/mm.Po + -rm -f matrix_mult/$(DEPDIR)/mm_2dbc.Po + -rm -f mpi_lu/$(DEPDIR)/pdlu.Po + -rm -f mpi_lu/$(DEPDIR)/pdlu_implicit.Po + -rm -f mpi_lu/$(DEPDIR)/pdlu_kernels.Po + -rm -f mpi_lu/$(DEPDIR)/plu_example_double.Po + -rm -f mpi_lu/$(DEPDIR)/plu_example_float.Po + -rm -f mpi_lu/$(DEPDIR)/plu_implicit_example_double.Po + -rm -f mpi_lu/$(DEPDIR)/plu_implicit_example_float.Po + -rm -f mpi_lu/$(DEPDIR)/plu_outofcore_example_double.Po + -rm -f mpi_lu/$(DEPDIR)/plu_outofcore_example_float.Po + -rm -f mpi_lu/$(DEPDIR)/plu_solve_double.Po + -rm -f mpi_lu/$(DEPDIR)/plu_solve_float.Po + -rm -f mpi_lu/$(DEPDIR)/pslu.Po + -rm -f mpi_lu/$(DEPDIR)/pslu_implicit.Po + -rm -f mpi_lu/$(DEPDIR)/pslu_kernels.Po + -rm -f mpi_redux/$(DEPDIR)/mpi_redux.Po + -rm -f mpi_redux/$(DEPDIR)/mpi_redux_autowrapup.Po + -rm -f mpi_redux/$(DEPDIR)/mpi_redux_tree.Po + -rm -f stencil/$(DEPDIR)/stencil5.Po + -rm -f stencil/$(DEPDIR)/stencil5_lb.Po + -rm -f user_datatype/$(DEPDIR)/my_interface.Po + -rm -f user_datatype/$(DEPDIR)/user_datatype.Po + -rm -f user_datatype/$(DEPDIR)/user_datatype2.Po + -rm -f user_datatype/$(DEPDIR)/user_datatype_early.Po + -rm -f user_datatype/$(DEPDIR)/user_datatype_interface.Po + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-examplebinPROGRAMS + +.MAKE: all check check-am install install-am install-exec \ + install-strip + +.PHONY: CTAGS GTAGS TAGS all all-am am--depfiles check check-TESTS \ + check-am clean clean-checkPROGRAMS clean-examplebinPROGRAMS \ + clean-generic clean-libtool clean-noinstPROGRAMS cscopelist-am \ + ctags ctags-am distclean distclean-compile distclean-generic \ + distclean-libtool distclean-tags distdir dvi dvi-am html \ + html-am info info-am install install-am install-data \ + install-data-am install-dvi install-dvi-am \ + install-examplebinPROGRAMS install-exec install-exec-am \ + install-html install-html-am install-info install-info-am \ + install-man install-pdf install-pdf-am install-ps \ + install-ps-am install-strip installcheck installcheck-am \ + installdirs maintainer-clean maintainer-clean-generic \ + mostlyclean mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool pdf pdf-am ps ps-am recheck tags tags-am \ + uninstall uninstall-am uninstall-examplebinPROGRAMS + +.PRECIOUS: Makefile + +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@.cu.o: +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ @$(MKDIR_P) `dirname $@` +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ $(V_mynvcc)grep 'extern *"C" *void *' $< | sed -ne 's/extern *"C" *void *\([a-zA-Z0-9_]*\) *(.*/void \1(void) {}/p' | $(CC) -x c - -o $@ -c + +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.cubin: +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) -cubin $< -o $@ $(NVCCFLAGS) + +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.o: +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) $< -c -o $@ $(NVCCFLAGS) +@STARPU_USE_HIP_TRUE@.hip.o: +@STARPU_USE_HIP_TRUE@ $(V_hipcc) $(HIPCC) $< -c -o $@ $(HIPCCFLAGS) + +STARPU_MPI_NP ?= 4 + +showcheckfailed: + @ for x in $(shell grep -l "^FAIL " $(TEST_LOGS) /dev/null 2>/dev/null) ; do cat $$x ; done + @RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i showcheckfailed || RET=1 ; \ + done ; \ + exit $$RET + +showfailed: + @! grep "^FAIL " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "ERROR: AddressSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "WARNING: AddressSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "ERROR: ThreadSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "WARNING: ThreadSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "ERROR: LeakSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "WARNING: LeakSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l " runtime error: " $(TEST_LOGS) /dev/null 2>/dev/null + @RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -s -C $$i showfailed || RET=1 ; \ + done ; \ + exit $$RET + +showcheck: + -cat $(TEST_LOGS) /dev/null + @! grep -q "ERROR: AddressSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q "WARNING: AddressSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q "ERROR: ThreadSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q "WARNING: ThreadSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q "ERROR: LeakSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q "WARNING: LeakSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q " runtime error: " $(TEST_LOGS) /dev/null + RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i showcheck || RET=1 ; \ + done ; \ + exit $$RET + +showsuite: + -cat $(TEST_SUITE_LOG) /dev/null + @! grep -q "ERROR: AddressSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q "WARNING: AddressSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q "ERROR: ThreadSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q "WARNING: ThreadSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q "ERROR: LeakSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q "WARNING: LeakSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q " runtime error: " $(TEST_SUITE_LOG) /dev/null + RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i showsuite || RET=1 ; \ + done ; \ + exit $$RET + +@STARPU_SIMGRID_TRUE@export STARPU_PERF_MODEL_DIR=$(abs_top_srcdir)/tools/perfmodels/sampling +@STARPU_SIMGRID_TRUE@export STARPU_HOSTNAME=mirage +@STARPU_SIMGRID_TRUE@export MALLOC_PERTURB_=0 + +@STARPU_SIMGRID_TRUE@env: +@STARPU_SIMGRID_TRUE@ @echo export STARPU_PERF_MODEL_DIR=$(STARPU_PERF_MODEL_DIR) +@STARPU_SIMGRID_TRUE@ @echo export STARPU_HOSTNAME=$(STARPU_HOSTNAME) +@STARPU_SIMGRID_TRUE@ @echo export MALLOC_PERTURB_=$(MALLOC_PERTURB_) + +@STARPU_SIMGRID_TRUE@export STARPU_SIMGRID=1 + +@STARPU_QUICK_CHECK_TRUE@export STARPU_QUICK_CHECK=1 + +@STARPU_LONG_CHECK_TRUE@export STARPU_LONG_CHECK=1 + +# +# Test loading goes through a lot of launchers: +# +# - $(LAUNCHER) is called first, to run the test through starpu_msexec, i.e. +# either mpirun or starpu_tcpipexec +# +# - $(LOADER), i.e. tests/loader, is then called to implement timeout, running +# gdb, etc. But if it detects that the test is a .sh script, it just executes +# it +# +# - $(STARPU_CHECK_LAUNCHER) $(STARPU_CHECK_LAUNCHER_ARGS) is called by loader +# to run the program through e.g. valgrind.sh +# +# When the program is a shell script, additionally: +# +# - $(STARPU_SUB_PARALLEL) is called to control parallelism (see below) +# +# - $(MS_LAUNCHER) is called to run the test through starpu_msexec +# +# - $(STARPU_LAUNCH) was set by tests/loader to its own path, to run the program +# through it. +# +# - $(STARPU_CHECK_LAUNCHER) $(STARPU_CHECK_LAUNCHER_ARGS) is called by loader +# + +export LAUNCHER +@HAVE_PARALLEL_TRUE@export STARPU_SUB_PARALLEL + +export MS_LAUNCHER + +LAUNCHER ?= +MS_LAUNCHER ?= +@STARPU_HAVE_WINDOWS_FALSE@LOADER ?= ./loader + +LSAN_OPTIONS ?= suppressions=$(abs_top_srcdir)/tools/dev/lsan/suppressions +TSAN_OPTIONS ?= suppressions=$(abs_top_srcdir)/tools/dev/tsan/starpu.suppr +export LSAN_OPTIONS +export TSAN_OPTIONS + +# Native Fortran example + +# - link over source file to build our own object +native_fortran/fstarpu_mod.f90: + @$(MKDIR_P) $(dir $@) + $(V_ln) $(LN_S) $(abs_top_srcdir)/include/$(notdir $@) $@ +native_fortran/fstarpu_mpi_mod.f90: + @$(MKDIR_P) $(dir $@) + $(V_ln) $(LN_S) $(abs_top_srcdir)/mpi/include/$(notdir $@) $@ + +# - express the creation of .mod along .o +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@fstarpu_mod.mod: native_fortran/fstarpu_mod.o +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@fstarpu_mpi_mod.mod: native_fortran/fstarpu_mpi_mod.o +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@nf_mm_cl.mod: native_fortran/nf_mm_cl.o +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@nf_mm_cl_blas.mod: native_fortran/nf_mm_cl_blas.o + +# - list explicit dependences to control proper module files dependencies +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@native_fortran/fstarpu_mpi_mod.o: fstarpu_mod.mod +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@native_fortran/nf_mm_cl.o: fstarpu_mod.mod fstarpu_mpi_mod.mod +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@native_fortran/nf_mm_cl_blas.o: fstarpu_mod.mod fstarpu_mpi_mod.mod +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@native_fortran/nf_mm.o: nf_mm_cl.mod fstarpu_mpi_mod.mod fstarpu_mod.mod +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@native_fortran/nf_mm_2dbc.o: nf_mm_cl.mod nf_mm_cl_blas.mod fstarpu_mpi_mod.mod fstarpu_mod.mod +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@native_fortran/nf_mm_task_build.o: nf_mm_cl.mod fstarpu_mpi_mod.mod fstarpu_mod.mod +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@native_fortran/nf_basic_ring.o: fstarpu_mpi_mod.mod fstarpu_mod.mod +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@native_fortran/nf_redux_test.o: fstarpu_mpi_mod.mod fstarpu_mod.mod +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@native_fortran/nf_mpi_redux.o: fstarpu_mpi_mod.mod fstarpu_mod.mod +@STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@native_fortran/nf_mpi_redux_tree.o: fstarpu_mpi_mod.mod fstarpu_mod.mod + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/mpi/examples/benchs/abstract_sendrecv_bench.c b/mpi/examples/benchs/abstract_sendrecv_bench.c new file mode 100644 index 0000000..35ac531 --- /dev/null +++ b/mpi/examples/benchs/abstract_sendrecv_bench.c @@ -0,0 +1,221 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "bench_helper.h" +#include "abstract_sendrecv_bench.h" + +/* + * Memset + */ + +#ifdef STARPU_USE_CUDA +static void cuda_memset_codelet(void *descr[], void *arg) +{ + (void)arg; + + char *buf = (char *)STARPU_VECTOR_GET_PTR(descr[0]); + unsigned length = STARPU_VECTOR_GET_NX(descr[0]); + + cudaMemsetAsync(buf, 0, length, starpu_cuda_get_local_stream()); +} +#endif + +void cpu_memset_codelet(void *descr[], void *arg) +{ + (void)arg; + + char *buf = (char *)STARPU_VECTOR_GET_PTR(descr[0]); + unsigned length = STARPU_VECTOR_GET_NX(descr[0]); + + memset(buf, 0, length * sizeof(*buf)); +} + +static struct starpu_codelet memset_cl = +{ + .cpu_funcs = {cpu_memset_codelet}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {cuda_memset_codelet}, + .cuda_flags = {STARPU_CUDA_ASYNC}, +#endif + .cpu_funcs_name = {"cpu_memset_codelet"}, + .nbuffers = 1, + .modes = {STARPU_W} +}; + +int sendrecv_bench(int mpi_rank, starpu_pthread_barrier_t* thread_barrier, int bidir, int mem_node) +{ + uint64_t iterations = LOOPS_DEFAULT; + uint64_t s; + uint64_t j; + uint64_t k; + + if (mpi_rank >= 2) + { + starpu_pause(); + if (thread_barrier != NULL) + { + STARPU_PTHREAD_BARRIER_WAIT(thread_barrier); + } + + for (s = NX_MIN; s <= NX_MAX; s = bench_next_size(s)) + { + iterations = bench_nb_iterations(iterations, s); + + starpu_mpi_barrier(MPI_COMM_WORLD); + + for (j = 0; j < iterations; j++) + { + starpu_mpi_barrier(MPI_COMM_WORLD); + } + } + starpu_resume(); + + return 0; + } + + if (mpi_rank == 0) + { + printf("Times in us\n"); + printf("# size (Bytes)\t| latency \t| 10^6 B/s \t| MB/s \t| d1 \t|median \t| avg \t| d9 \t| max\n"); + } + + starpu_data_handle_t handle_send, handle_recv; + float* vector_send = NULL; + float* vector_recv = NULL; + double t1, t2, global_tstart, global_tend; + double* lats = malloc(sizeof(double) * LOOPS_DEFAULT); + starpu_mpi_req send_req, recv_req; + int ret; + + if (thread_barrier != NULL) + { + STARPU_PTHREAD_BARRIER_WAIT(thread_barrier); + } + + global_tstart = starpu_timing_now(); + for (s = NX_MIN; s <= NX_MAX; s = bench_next_size(s)) + { + vector_send = (void *)starpu_malloc_on_node_flags(mem_node, s, STARPU_MALLOC_PINNED); + vector_recv = (void *)starpu_malloc_on_node_flags(mem_node, s, STARPU_MALLOC_PINNED); + + starpu_vector_data_register(&handle_send, mem_node, (uintptr_t) vector_send, s, 1); + starpu_vector_data_register(&handle_recv, mem_node, (uintptr_t) vector_recv, s, 1); + + ret = starpu_task_insert(&memset_cl, STARPU_W, handle_send, 0); + if (ret == -ENODEV) return ret; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + ret = starpu_task_insert(&memset_cl, STARPU_W, handle_recv, 0); + if (ret == -ENODEV) return ret; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + + iterations = bench_nb_iterations(iterations, s); + + starpu_mpi_barrier(MPI_COMM_WORLD); + + for (j = 0; j < iterations; j++) + { + if (mpi_rank == 0) + { + t1 = starpu_timing_now(); + if (bidir) + { + ret = starpu_mpi_isend(handle_send, &send_req, 1, 0, MPI_COMM_WORLD); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend"); + ret = starpu_mpi_irecv(handle_recv, &recv_req, 1, 1, MPI_COMM_WORLD); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_irecv"); + ret = starpu_mpi_wait(&send_req, MPI_STATUS_IGNORE); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_wait"); + ret = starpu_mpi_wait(&recv_req, MPI_STATUS_IGNORE); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_wait"); + } + else + { + ret = starpu_mpi_send(handle_send, 1, 0, MPI_COMM_WORLD); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send"); + ret = starpu_mpi_recv(handle_recv, 1, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); + } + t2 = starpu_timing_now(); + + const double t = (t2 - t1) / 2; + + lats[j] = t; + } + else + { + if (bidir) + { + ret = starpu_mpi_irecv(handle_recv, &recv_req, 0, 0, MPI_COMM_WORLD); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_irecv"); + ret = starpu_mpi_isend(handle_send, &send_req, 0, 1, MPI_COMM_WORLD); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend"); + ret = starpu_mpi_wait(&recv_req, MPI_STATUS_IGNORE); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_wait"); + ret = starpu_mpi_wait(&send_req, MPI_STATUS_IGNORE); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_wait"); + } + else + { + ret = starpu_mpi_recv(handle_recv, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); + ret = starpu_mpi_send(handle_send, 0, 1, MPI_COMM_WORLD); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send"); + } + } + + starpu_mpi_barrier(MPI_COMM_WORLD); + } + + if (mpi_rank == 0) + { + qsort(lats, iterations, sizeof(double), &comp_double); + + const double min_lat = lats[0]; + const double max_lat = lats[iterations - 1]; + const double med_lat = lats[(iterations - 1) / 2]; + const double d1_lat = lats[(iterations - 1) / 10]; + const double d9_lat = lats[9 * (iterations - 1) / 10]; + double avg_lat = 0.0; + + for(k = 0; k < iterations; k++) + { + avg_lat += lats[k]; + } + + avg_lat /= iterations; + const double bw_million_byte = s / min_lat; + const double bw_mbyte = bw_million_byte / 1.048576; + + printf("%9lld\t%9.3lf\t%9.3f\t%9.3f\t%9.3lf\t%9.3lf\t%9.3lf\t%9.3lf\t%9.3lf\n", + (long long)s, min_lat, bw_million_byte, bw_mbyte, d1_lat, med_lat, avg_lat, d9_lat, max_lat); + fflush(stdout); + } + starpu_data_unregister(handle_recv); + starpu_data_unregister(handle_send); + + starpu_free_on_node_flags(mem_node, (uintptr_t)vector_send, s, STARPU_MALLOC_PINNED); + starpu_free_on_node_flags(mem_node, (uintptr_t)vector_recv, s, STARPU_MALLOC_PINNED); + } + global_tend = starpu_timing_now(); + + if (mpi_rank == 0) + { + printf("Comm bench took %9.3lf ms\n", (global_tend - global_tstart) / 1000); + } + + free(lats); + return 0; +} diff --git a/mpi/examples/benchs/abstract_sendrecv_bench.h b/mpi/examples/benchs/abstract_sendrecv_bench.h new file mode 100644 index 0000000..c54a588 --- /dev/null +++ b/mpi/examples/benchs/abstract_sendrecv_bench.h @@ -0,0 +1,19 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +int sendrecv_bench(int mpi_rank, starpu_pthread_barrier_t* thread_barrier, int bidir, int memnode); diff --git a/mpi/examples/benchs/bcast_bench.c b/mpi/examples/benchs/bcast_bench.c new file mode 100644 index 0000000..bf4ccee --- /dev/null +++ b/mpi/examples/benchs/bcast_bench.c @@ -0,0 +1,351 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2021-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * Basic broadcast benchmark with synchronized clocks. + * Inspired a lot from NewMadeleine examples/bench-coll/nm_bench_coll_mcast.c + * + * Synchronized clocks (mpi_sync_clocks) are available here: + * https://gitlab.inria.fr/pm2/pm2/-/tree/master/mpi_sync_clocks + * and are detected during StarPU's configure. + */ + +#include +#include +#include "helper.h" +#include "bench_helper.h" + +#define SERVER_PRINTF(fmt, ...) do { if(rank == 0) { printf(fmt, ## __VA_ARGS__); fflush(stdout); }} while(0) + +#undef MULT_DEFAULT +#undef LOOPS_DEFAULT + +#ifdef STARPU_QUICK_CHECK +#define MIN_DEFAULT 1 +#define MAX_DEFAULT 1024 +#define LOOPS_DEFAULT 2 +#define INCR_DEFAULT 2 +#define MULT_DEFAULT 2 +#else +#define MIN_DEFAULT 1 +#define MAX_DEFAULT (16*1024*1024) +#define LOOPS_DEFAULT 50 +#define INCR_DEFAULT 1 +#define MULT_DEFAULT 1.4 +#endif + +#define NODE_INCREMENT 1 + +static starpu_data_handle_t data_handle, data_handle_in, data_handle_out; +static int use_tasks = 0; + +static void writer_cpu_func(void *descr[], void *args) +{ + (void) descr; + (void) args; +} + +static struct starpu_codelet writer_cl = +{ + .cpu_funcs = { writer_cpu_func }, + .cpu_funcs_name = { "writer_task" }, + .nbuffers = 1, + .modes = { STARPU_W } +}; + +static void reader_cpu_func(void* descr[], void* args) +{ + (void) descr; + (void) args; +} + +static struct starpu_codelet reader_cl = +{ + .cpu_funcs = { reader_cpu_func }, + .cpu_funcs_name = { "reader_task" }, + .nbuffers = 2, + .modes = { STARPU_R, STARPU_W } +}; + +static void usage(void) +{ + fprintf(stderr, "-N iterations - iterations per length [%d]\n", LOOPS_DEFAULT); + fprintf(stderr, "--tasks - triggers coop through task dependency instead of StarPU's MPI interface\n"); + fprintf(stderr, "-P incr - number of nodes increment [%d]\n", NODE_INCREMENT); +} + +static inline uint64_t _next(uint64_t len, double multiplier, uint64_t increment) +{ + uint64_t next = len * multiplier + increment; + if (next <= len) + next++; + return next; +} + +static void bcast(MPI_Comm subcomm, int rank, int nb_dests) +{ + int i = 0, ret; + + if (use_tasks) + { + starpu_mpi_task_insert(subcomm, &writer_cl, STARPU_W, data_handle_in, 0); + for (i = 1; i <= nb_dests; i++) + { + starpu_mpi_data_register(data_handle_out, i, i); + starpu_mpi_task_insert(subcomm, &reader_cl, STARPU_R, data_handle_in, STARPU_W, data_handle_out, 0); + } + /* Resume StarPU's workers only after submitting tasks, to make + * sure the coop will be correctly detected. */ + starpu_resume(); + starpu_task_wait_for_all(); + + starpu_pause(); + } + else + { + if (rank == 0) + { + /* We explicitly tell StarPU this send will be a broadcast with n recipients. */ + starpu_mpi_coop_sends_data_handle_nb_sends(data_handle, nb_dests); + for (i = 1; i <= nb_dests; i++) + { + ret = starpu_mpi_isend_detached(data_handle, i , 0x42, subcomm, NULL, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend_detached"); + } + } + else + { + ret = starpu_mpi_recv(data_handle, 0, 0x42, subcomm, MPI_STATUS_IGNORE); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); + } + } +} + + +int main(int argc, char**argv) +{ + const uint64_t start_len = MIN_DEFAULT; + const uint64_t end_len = MAX_DEFAULT; + const double multiplier = MULT_DEFAULT; + const uint64_t increment = INCR_DEFAULT; + int iterations = LOOPS_DEFAULT; + int node_increment = NODE_INCREMENT; + int i, ret, rank, worldsize, subcomm_rank, thread_support; + MPI_Group world_group; + + for (i = 1; i < argc; i++) + { + if (strcmp(argv[i], "-N") == 0) + { + iterations = atoi(argv[++i]); + } + else if (strcmp(argv[i], "--tasks") == 0) + { + use_tasks = 1; + } + else if (strcmp(argv[i], "-P") == 0) + { + node_increment = atoi(argv[++i]); + } + else + { + fprintf(stderr, "%s: illegal argument %s\n", argv[0], argv[i]); + usage(); + exit(1); + } + } + + if (MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &thread_support) != MPI_SUCCESS) + { + FPRINTF(stderr, "MPI_Init_thread failed\n"); + return EXIT_FAILURE; + } + + if (thread_support < MPI_THREAD_MULTIPLE) + { + /* We need MPI_THREAD_MULTIPLE for the StarPU's MPI thread and + * the main thread calling functions from mpi_sync_clocks. */ + FPRINTF(stderr, "This benchmark requires MPI_THREAD_MULTIPLE support.\n"); + MPI_Finalize(); + return STARPU_TEST_SKIPPED; + } + + ret = starpu_mpi_init_conf(NULL, NULL, 0, MPI_COMM_WORLD, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); + starpu_mpi_comm_size(MPI_COMM_WORLD, &worldsize); + + if (worldsize < 2) + { + if (rank == 0) + FPRINTF(stderr, "We need at least 2 processes.\n"); + + starpu_mpi_shutdown(); + + return STARPU_TEST_SKIPPED; + } + + MPI_Comm_group(MPI_COMM_WORLD, &world_group); + + /* Pause workers for this bench, to avoid any impact on performances from polling workers */ + starpu_pause(); + + starpu_mpi_barrier(MPI_COMM_WORLD); + + SERVER_PRINTF("# start_len = %lu B\n", start_len); + SERVER_PRINTF("# end_len = %lu B\n", end_len); + SERVER_PRINTF("# increment = %lu\n", increment); + SERVER_PRINTF("# multiplier = %f\n", multiplier); + SERVER_PRINTF("# iterations = %d\n", iterations); + SERVER_PRINTF("# coop = %s\n", starpu_mpi_coop_sends_get_use() ? "on" : "off"); + SERVER_PRINTF("# n.nodes length n.iter min.lat. median average max.lat. \n"); + + int nb_nodes; + for (nb_nodes = 2; nb_nodes <= worldsize; nb_nodes += node_increment) + { + SERVER_PRINTF("# starting %d nb_nodes...\n", nb_nodes); + + if (rank >= nb_nodes) + { + continue; + } + + int* group_ranks = malloc(nb_nodes * sizeof(int)); + for (i = 0; i < nb_nodes; i++) + { + group_ranks[i] = i; + } + + MPI_Group sub_group; + MPI_Group_incl(world_group, nb_nodes, group_ranks, &sub_group); + + MPI_Comm sub_comm; + MPI_Comm_create_group(MPI_COMM_WORLD, sub_group, 0, &sub_comm); + + MPI_Comm_rank(sub_comm, &subcomm_rank); + + uint64_t len; + for (len = start_len; len < end_len; len = _next(len, multiplier, increment)) + { + char* buf1 = malloc(len); + char* buf2 = malloc(len); + /* Precise the buffer where the data will be received, to take benefit from the rcache. */ + if (use_tasks) + { + starpu_vector_data_register(&data_handle_in, STARPU_MAIN_RAM, (uintptr_t) buf1, len, 1); + starpu_vector_data_register(&data_handle_out, STARPU_MAIN_RAM, (uintptr_t) buf2, len, 1); + starpu_mpi_data_register(data_handle_in, 0, 0); + } + else + { + starpu_vector_data_register(&data_handle, STARPU_MAIN_RAM, (uintptr_t) buf1, len, 1); + } + mpi_sync_clocks_t clocks = mpi_sync_clocks_init(sub_comm); + double* lats = (subcomm_rank == 0) ? malloc(iterations * sizeof(double)) : NULL; + int k; + for (k = 0; k < iterations; k++) + { + int* rc_all = (subcomm_rank == 0) ? malloc(nb_nodes * sizeof(int)) : NULL; + double local_lat = -1.0; + int rc = 0; + do + { + const double b = mpi_sync_clocks_barrier(clocks, NULL); + rc = (b < 0.0); + + const double t_begin = mpi_sync_clocks_get_time_usec(clocks); + bcast(sub_comm, subcomm_rank, nb_nodes-1); + const double t_end = mpi_sync_clocks_get_time_usec(clocks); + + local_lat = t_end - t_begin; + + /* collect sync barrier success */ + MPI_Gather(&rc, 1, MPI_INT, rc_all, 1, MPI_INT, 0, sub_comm); + if (subcomm_rank == 0) + { + int i; + for (i = 0; i < nb_nodes; i++) + { + rc |= rc_all[i]; + } + } + MPI_Bcast(&rc, 1, MPI_INT, 0, sub_comm); + } while(rc != 0); + + /* find maximum latency across nb_nodes */ + double* lat_all = (subcomm_rank == 0) ? malloc(nb_nodes * sizeof(double)) : NULL; + MPI_Gather(&local_lat, 1, MPI_DOUBLE, lat_all, 1, MPI_DOUBLE, 0, sub_comm); + + if (subcomm_rank == 0) + { + int i; + double max_lat = 0.0; + for (i = 0; i < nb_nodes; i++) + { + if (lat_all[i] > max_lat) + { + max_lat = lat_all[i]; + } + } + lats[k] = max_lat; + free(rc_all); + free(lat_all); + } + } + + /* compute time stats across iterations */ + if (subcomm_rank == 0) + { + qsort(lats, iterations, sizeof(double), &comp_double); + const double min_lat = lats[0]; + const double max_lat = lats[iterations - 1]; + const double med_lat = lats[(iterations - 1) / 2]; + double avg_lat = 0.0; + for (k = 0; k < iterations; k++) + { + avg_lat += lats[k]; + } + avg_lat /= iterations; + printf("%4d\t%9lu\t%7d\t%9.3lf\t%9.3lf\t%9.3lf\t%9.3lf \n", nb_nodes, len, iterations, min_lat, med_lat, avg_lat, max_lat); + fflush(stdout); + free(lats); + } + if (use_tasks) + { + starpu_data_unregister(data_handle_in); + starpu_data_unregister(data_handle_out); + } + else + { + starpu_data_unregister(data_handle); + } + free(buf1); + free(buf2); + mpi_sync_clocks_shutdown(clocks); + clocks = NULL; + } + } + + SERVER_PRINTF("# bench end\n"); + + MPI_Group_free(&world_group); + starpu_resume(); + starpu_mpi_shutdown(); + MPI_Finalize(); + + return 0; +} diff --git a/mpi/examples/benchs/bench_helper.c b/mpi/examples/benchs/bench_helper.c new file mode 100644 index 0000000..aedee66 --- /dev/null +++ b/mpi/examples/benchs/bench_helper.c @@ -0,0 +1,62 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "bench_helper.h" + + +int comp_double(const void*_a, const void*_b) +{ + const double* a = _a; + const double* b = _b; + + if(*a < *b) + return -1; + else if(*a > *b) + return 1; + else + return 0; +} + + +uint64_t bench_next_size(uint64_t len) +{ + uint64_t next = len * MULT_DEFAULT; + + if(next <= len) + next++; + + return next; +} + + +uint64_t bench_nb_iterations(int iterations, uint64_t len) +{ + const uint64_t max_data = NX_MAX; + + if(len == 0) + len = 1; + + uint64_t data_size = ((uint64_t)iterations * (uint64_t)len); + + if(data_size > max_data) + { + iterations = (max_data / (uint64_t)len); + if(iterations < 2) + iterations = 2; + } + + return iterations; +} diff --git a/mpi/examples/benchs/bench_helper.h b/mpi/examples/benchs/bench_helper.h new file mode 100644 index 0000000..f1ab6f2 --- /dev/null +++ b/mpi/examples/benchs/bench_helper.h @@ -0,0 +1,39 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include "helper.h" + +#define NX_MIN 1 + +#ifdef STARPU_QUICK_CHECK + #define MULT_DEFAULT 4 + #define LOOPS_DEFAULT 50 + #define NX_MAX (64 * 1) // kB +#elif !defined(STARPU_LONG_CHECK) + #define MULT_DEFAULT 4 + #define LOOPS_DEFAULT 10000 + #define NX_MAX (128 * 1024 * 1024) // kB +#else + #define MULT_DEFAULT 2 + #define LOOPS_DEFAULT 100000 + #define NX_MAX (512 * 1024 * 1024) // kB +#endif + +int comp_double(const void*_a, const void*_b); +uint64_t bench_next_size(uint64_t len); +uint64_t bench_nb_iterations(int iterations, uint64_t len); diff --git a/mpi/examples/benchs/burst.c b/mpi/examples/benchs/burst.c new file mode 100644 index 0000000..b0e763c --- /dev/null +++ b/mpi/examples/benchs/burst.c @@ -0,0 +1,71 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * This test sends simultaneously many communications, with various configurations. + * + * Global purpose is to run with trace recording, to watch the behaviour of communications. + */ + +#include +#include "helper.h" +#include "burst_helper.h" + +void parse_args(int argc, char **argv) +{ + int i; + for (i = 1; i < argc; i++) + { + if (strcmp(argv[i], "-nreqs") == 0) + { + burst_nb_requests = atoi(argv[++i]); + } + else if (strcmp(argv[i], "-help") == 0 || strcmp(argv[i], "--help") == 0 || strcmp(argv[i], "-h") == 0) + { + fprintf(stderr,"Usage: %s [-nreqs nreqs]\n", argv[0]); + fprintf(stderr,"Currently selected: %d requests in each burst\n", burst_nb_requests); + exit(EXIT_SUCCESS); + } + else + { + fprintf(stderr,"Unrecognized option %s\n", argv[i]); + exit(EXIT_FAILURE); + } + } +} + +int main(int argc, char **argv) +{ + int ret, rank; + + parse_args(argc, argv); + + ret = starpu_mpi_init_conf(&argc, &argv, 1, MPI_COMM_WORLD, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); + + burst_init_data(rank); + + burst_all(rank); + + /* Clear up */ + burst_free_data(rank); + + starpu_mpi_shutdown(); + + return 0; +} diff --git a/mpi/examples/benchs/burst_gemm.c b/mpi/examples/benchs/burst_gemm.c new file mode 100644 index 0000000..d150751 --- /dev/null +++ b/mpi/examples/benchs/burst_gemm.c @@ -0,0 +1,211 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * Program to be executed with trace recording to watch the impact of + * computations (or task polling) on communications. + */ +#include +#include +#include +#include +#include +#include + +#include "helper.h" +#include "gemm_helper.h" +#include "burst_helper.h" + +static int gemm_warmup = 1; +static int gemm_warmup_wait = 0; + +void parse_args(int argc, char **argv) +{ + int i; + for (i = 1; i < argc; i++) + { + if (strcmp(argv[i], "-nblocks") == 0) + { + char *argptr; + nslices = strtol(argv[++i], &argptr, 10); + matrix_dim = 320 * nslices; + } + else if (strcmp(argv[i], "-size") == 0) + { + char *argptr; + unsigned matrix_dim_tmp = strtol(argv[++i], &argptr, 10); + if (matrix_dim_tmp % 320 != 0) + { + fprintf(stderr, "Matrix size has to be a multiple of 320\n"); + } + else + { + matrix_dim = matrix_dim_tmp; + nslices = matrix_dim / 320; + } + } + else if (strcmp(argv[i], "-check") == 0) + { + check = 1; + } + else if (strcmp(argv[i], "-nreqs") == 0) + { + burst_nb_requests = atoi(argv[++i]); + } + else if (strcmp(argv[i], "-no-gemm-warmup") == 0) + { + gemm_warmup = 0; + } + else if (strcmp(argv[i], "-gemm-warmup-wait") == 0) + { + /* All warmup GEMMs will start at the same moment */ + gemm_warmup_wait = 1; + } + else if (strcmp(argv[i], "-help") == 0 || strcmp(argv[i], "--help") == 0 || strcmp(argv[i], "-h") == 0) + { + fprintf(stderr,"Usage: %s [-nblocks n] [-size size] [-check] [-nreqs nreqs] [-no-gemm-warmup] [-gemm-warmup-wait]\n", argv[0]); + fprintf(stderr,"Currently selected: matrix size: %u - %u blocks - %d requests in each burst - gemm warmup: %d -gemm-warmup-wait: %d\n", matrix_dim, nslices, burst_nb_requests, gemm_warmup, gemm_warmup_wait); + exit(EXIT_SUCCESS); + } + else + { + fprintf(stderr,"Unrecognized option %s\n", argv[i]); + exit(EXIT_FAILURE); + } + } +} + +int main(int argc, char **argv) +{ + int ret, worldsize, mpi_rank; + +#ifdef STARPU_HAVE_VALGRIND_H + if (RUNNING_ON_VALGRIND) + matrix_dim = 16; +#endif + + parse_args(argc, argv); + + ret = starpu_mpi_init_conf(&argc, &argv, 1, MPI_COMM_WORLD, NULL); + if (ret == -ENODEV) + return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + + starpu_mpi_comm_rank(MPI_COMM_WORLD, &mpi_rank); + starpu_mpi_comm_size(MPI_COMM_WORLD, &worldsize); + + if (worldsize < 2) + { + if (mpi_rank == 0) + FPRINTF(stderr, "We need 2 processes.\n"); + + starpu_mpi_shutdown(); + + return STARPU_TEST_SKIPPED; + } + + gemm_alloc_data(); + if (gemm_init_data() == -ENODEV) + goto enodev; + + /* GEMM warmup, to really load the BLAS library */ + if (gemm_warmup) + { + if (gemm_warmup_wait) + { + starpu_task_wait_for_all(); + starpu_pause(); + } + + if(gemm_submit_tasks() == -ENODEV) + goto enodev; + + if (gemm_warmup_wait) + { + starpu_resume(); + } + } + + burst_init_data(mpi_rank); + + /* Wait for everything and everybody: */ + starpu_task_wait_for_all(); + starpu_mpi_barrier(MPI_COMM_WORLD); + + FPRINTF(stderr, "** Burst warmup **\n"); + burst_all(mpi_rank); + + starpu_sleep(0.3); // sleep to easily distinguish different bursts in traces + + FPRINTF(stderr, "** Burst while there is no task available, but workers are polling **\n"); + burst_all(mpi_rank); + + starpu_sleep(0.3); // sleep to easily distinguish different bursts in traces + + FPRINTF(stderr, "** Burst while there is no task available, workers are paused **\n"); + starpu_pause(); + burst_all(mpi_rank); + + starpu_sleep(0.3); // sleep to easily distinguish different bursts in traces + + FPRINTF(stderr, "** Burst while workers are really working **\n"); + if(gemm_submit_tasks() == -ENODEV) + goto enodev; + starpu_resume(); + + burst_all(mpi_rank); + + FPRINTF(stderr, "Burst done, now waiting for computing tasks to finish\n"); + + /* Wait for everything and everybody: */ + starpu_task_wait_for_all(); + starpu_mpi_barrier(MPI_COMM_WORLD); + + starpu_sleep(0.3); // sleep to easily distinguish different parts in traces + + FPRINTF(stderr, "** Workers are computing, without communications **\n"); + starpu_pause(); + if(gemm_submit_tasks() == -ENODEV) + goto enodev; + starpu_resume(); + + /* Wait for everything and everybody: */ + starpu_task_wait_for_all(); + starpu_mpi_barrier(MPI_COMM_WORLD); + + starpu_sleep(0.3); // sleep to easily distinguish different parts in traces + + FPRINTF(stderr, "** Burst while workers are computing, but polling a moment between each task **\n"); + starpu_pause(); + gemm_add_polling_dependencies(); + if(gemm_submit_tasks_with_tags(/* enable task tags */ 1) == -ENODEV) + goto enodev; + starpu_resume(); + + burst_all(mpi_rank); + + /* Wait for everything and everybody: */ + starpu_task_wait_for_all(); + starpu_mpi_barrier(MPI_COMM_WORLD); + +enodev: + gemm_release(); + burst_free_data(mpi_rank); + + starpu_mpi_shutdown(); + + return ret; +} diff --git a/mpi/examples/benchs/burst_helper.c b/mpi/examples/benchs/burst_helper.c new file mode 100644 index 0000000..c1c3261 --- /dev/null +++ b/mpi/examples/benchs/burst_helper.c @@ -0,0 +1,250 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ +#include + +#include "helper.h" +#include "burst_helper.h" + +#if defined(STARPU_SIMGRID) || defined(STARPU_QUICK_CHECK) +#define NB_REQUESTS 10 +#else +#define NB_REQUESTS 50 +#endif +#define NX_ARRAY (320 * 320) + +static starpu_data_handle_t* recv_handles; +static starpu_data_handle_t* send_handles; +static float** recv_buffers; +static float** send_buffers; +static starpu_mpi_req* recv_reqs; +static starpu_mpi_req* send_reqs; + +int burst_nb_requests = NB_REQUESTS; + +void burst_init_data(int rank) +{ + unsigned nx = NX_ARRAY; +#ifdef STARPU_HAVE_VALGRIND_H + if (RUNNING_ON_VALGRIND) + { + nx = 4*4; + burst_nb_requests = 4; + } +#endif + + if (rank == 0 || rank == 1) + { + recv_handles = malloc(burst_nb_requests * sizeof(starpu_data_handle_t)); + send_handles = malloc(burst_nb_requests * sizeof(starpu_data_handle_t)); + recv_buffers = malloc(burst_nb_requests * sizeof(float*)); + send_buffers = malloc(burst_nb_requests * sizeof(float*)); + recv_reqs = malloc(burst_nb_requests * sizeof(starpu_mpi_req)); + send_reqs = malloc(burst_nb_requests * sizeof(starpu_mpi_req)); + + int i = 0; + for (i = 0; i < burst_nb_requests; i++) + { + starpu_malloc((void **)&send_buffers[i], NX_ARRAY * sizeof(float)); + memset(send_buffers[i], 0, NX_ARRAY * sizeof(float)); + starpu_vector_data_register(&send_handles[i], STARPU_MAIN_RAM, (uintptr_t) send_buffers[i], NX_ARRAY, sizeof(float)); + + starpu_malloc((void **)&recv_buffers[i], NX_ARRAY * sizeof(float)); + memset(recv_buffers[i], 0, NX_ARRAY * sizeof(float)); + starpu_vector_data_register(&recv_handles[i], STARPU_MAIN_RAM, (uintptr_t) recv_buffers[i], NX_ARRAY, sizeof(float)); + } + } +} + +void burst_free_data(int rank) +{ + if (rank == 0 || rank == 1) + { + int i = 0; + for (i = 0; i < burst_nb_requests; i++) + { + starpu_data_unregister(send_handles[i]); + starpu_free_noflag(send_buffers[i], NX_ARRAY * sizeof(float)); + + starpu_data_unregister(recv_handles[i]); + starpu_free_noflag(recv_buffers[i], NX_ARRAY * sizeof(float)); + } + + free(recv_handles); + free(send_handles); + free(recv_buffers); + free(send_buffers); + free(recv_reqs); + free(send_reqs); + } +} + +/* Burst simultaneous from both nodes: 0 and 1 post all the recvs, synchronise, and then post all the sends */ +void burst_bidir(int rank) +{ + int other_rank = (rank == 0) ? 1 : 0; + int i, ret; + + FPRINTF(stderr, "Simultaneous....start (rank %d)\n", rank); + + if (rank == 0 || rank == 1) + { + for (i = 0; i < burst_nb_requests; i++) + { + recv_reqs[i] = NULL; + ret = starpu_mpi_irecv(recv_handles[i], &recv_reqs[i], other_rank, i, MPI_COMM_WORLD); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_irecv"); + } + } + + starpu_mpi_barrier(MPI_COMM_WORLD); + + if (rank == 0 || rank == 1) + { + for (i = 0; i < burst_nb_requests; i++) + { + send_reqs[i] = NULL; + ret = starpu_mpi_isend_prio(send_handles[i], &send_reqs[i], other_rank, i, i, MPI_COMM_WORLD); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend_prio"); + } + + for (i = 0; i < burst_nb_requests; i++) + { + if (recv_reqs[i]) ret = starpu_mpi_wait(&recv_reqs[i], MPI_STATUS_IGNORE); + if (send_reqs[i]) ret = starpu_mpi_wait(&send_reqs[i], MPI_STATUS_IGNORE); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_wait"); + } + } + + FPRINTF(stderr, "Simultaneous....end (rank %d)\n", rank); + starpu_mpi_barrier(MPI_COMM_WORLD); +} + +void burst_unidir(int sender, int receiver, int rank) +{ + FPRINTF(stderr, "%d -> %d... start (rank %d)\n", sender, receiver, rank); + int i, ret; + + if (rank == receiver) + { + for (i = 0; i < burst_nb_requests; i++) + { + recv_reqs[i] = NULL; + ret = starpu_mpi_irecv(recv_handles[i], &recv_reqs[i], sender, i, MPI_COMM_WORLD); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_irecv"); + } + } + + starpu_mpi_barrier(MPI_COMM_WORLD); + + if (rank == sender) + { + for (i = 0; i < burst_nb_requests; i++) + { + send_reqs[i] = NULL; + ret = starpu_mpi_isend_prio(send_handles[i], &send_reqs[i], receiver, i, i, MPI_COMM_WORLD); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend_prio"); + } + } + + if (rank == sender || rank == receiver) + { + for (i = 0; i < burst_nb_requests; i++) + { + if (rank != sender && recv_reqs[i]) ret = starpu_mpi_wait(&recv_reqs[i], MPI_STATUS_IGNORE); + if (rank == sender && send_reqs[i]) ret = starpu_mpi_wait(&send_reqs[i], MPI_STATUS_IGNORE); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_wait"); + } + } + + FPRINTF(stderr, "%d -> %d... end (rank %d)\n", sender, receiver, rank); + + starpu_mpi_barrier(MPI_COMM_WORLD); +} + +/* Half burst from both nodes, second half burst is triggered after some requests finished. */ +void burst_bidir_half_postponed(int rank) +{ + int other_rank = (rank == 0) ? 1 : 0; + int i, ret; + + FPRINTF(stderr, "Half/half burst...start (rank %d)\n", rank); + + if (rank == 0 || rank == 1) + { + for (i = 0; i < burst_nb_requests; i++) + { + recv_reqs[i] = NULL; + ret = starpu_mpi_irecv(recv_handles[i], &recv_reqs[i], other_rank, i, MPI_COMM_WORLD); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_irecv"); + } + } + + starpu_mpi_barrier(MPI_COMM_WORLD); + + if (rank == 0 || rank == 1) + { + for (i = 0; i < (burst_nb_requests / 2); i++) + { + send_reqs[i] = NULL; + ret = starpu_mpi_isend_prio(send_handles[i], &send_reqs[i], other_rank, i, i, MPI_COMM_WORLD); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend_prio"); + } + + if (recv_reqs[burst_nb_requests / 4]) + { + ret = starpu_mpi_wait(&recv_reqs[burst_nb_requests / 4], MPI_STATUS_IGNORE); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_wait"); + } + + for (i = (burst_nb_requests / 2); i < burst_nb_requests; i++) + { + send_reqs[i] = NULL; + ret = starpu_mpi_isend_prio(send_handles[i], &send_reqs[i], other_rank, i, i, MPI_COMM_WORLD); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend_prio"); + } + + for (i = 0; i < burst_nb_requests; i++) + { + if (recv_reqs[i]) ret = starpu_mpi_wait(&recv_reqs[i], MPI_STATUS_IGNORE); + if (send_reqs[i]) ret = starpu_mpi_wait(&send_reqs[i], MPI_STATUS_IGNORE); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_wait"); + } + } + + FPRINTF(stderr, "Half/half burst...done (rank %d)\n", rank); + starpu_mpi_barrier(MPI_COMM_WORLD); +} + +void burst_all(int rank) +{ + double start, end; + start = starpu_timing_now(); + + /* Burst simultaneous from both nodes: 0 and 1 post all the recvs, synchronise, and then post all the sends */ + burst_bidir(rank); + + /* Burst from 0 to 1 : rank 1 posts all the recvs, barrier, then rank 0 posts all the sends */ + burst_unidir(0, 1, rank); + + /* Burst from 1 to 0 : rank 0 posts all the recvs, barrier, then rank 1 posts all the sends */ + burst_unidir(1, 0, rank); + + /* Half burst from both nodes, second half burst is triggered after some requests finished. */ + burst_bidir_half_postponed(rank); + + end = starpu_timing_now(); + FPRINTF(stderr, "All bursts took %.0f ms\n", (end - start) / 1000.0); +} diff --git a/mpi/examples/benchs/burst_helper.h b/mpi/examples/benchs/burst_helper.h new file mode 100644 index 0000000..4f2788c --- /dev/null +++ b/mpi/examples/benchs/burst_helper.h @@ -0,0 +1,29 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __MPI_TESTS_BURST_HELPER__ +#define __MPI_TESTS_BURST_HELPER__ + +extern int burst_nb_requests; + +void burst_init_data(int rank); +void burst_free_data(int rank); +void burst_bidir(int rank); +void burst_unidir(int sender, int receiver, int rank); +void burst_bidir_half_postponed(int rank); +void burst_all(int rank); + +#endif /* __MPI_TESTS_BURST_HELPER__ */ diff --git a/mpi/examples/benchs/gemm_helper.c b/mpi/examples/benchs/gemm_helper.c new file mode 100644 index 0000000..f6045e3 --- /dev/null +++ b/mpi/examples/benchs/gemm_helper.c @@ -0,0 +1,336 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include "../../examples/mult/simple.h" +#include "helper.h" +#include "gemm_helper.h" + + +#define CHECK_TASK_SUBMIT(ret) do { \ + if (ret == -ENODEV) \ + { \ + return -ENODEV; \ + } \ + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); \ +} while(0) + + +unsigned nslices = 4; +#if defined(STARPU_QUICK_CHECK) && !defined(STARPU_SIMGRID) +unsigned matrix_dim = 256; +#else +unsigned matrix_dim = 320 * 4; +#endif +unsigned check = 0; +int comm_thread_cpuid = -1; + +static TYPE *A, *B, *C; +static starpu_data_handle_t A_handle, B_handle, C_handle; + +static void check_output(void) +{ + /* compute C = C - AB */ + CPU_GEMM("N", "N", matrix_dim, matrix_dim, matrix_dim, (TYPE)-1.0f, A, matrix_dim, B, matrix_dim, (TYPE)1.0f, C, matrix_dim); + + /* make sure C = 0 */ + TYPE err; + err = CPU_ASUM(matrix_dim*matrix_dim, C, 1); + + if (err < matrix_dim*matrix_dim*0.001) + { + FPRINTF(stderr, "Results are OK\n"); + } + else + { + int max; + max = CPU_IAMAX(matrix_dim*matrix_dim, C, 1); + + FPRINTF(stderr, "There were errors ... err = %f\n", err); + FPRINTF(stderr, "Max error : %e\n", C[max]); + } +} + + +static void partition_mult_data(void) +{ + starpu_matrix_data_register(&A_handle, STARPU_MAIN_RAM, (uintptr_t)A, + matrix_dim, matrix_dim, matrix_dim, sizeof(TYPE)); + starpu_matrix_data_register(&B_handle, STARPU_MAIN_RAM, (uintptr_t)B, + matrix_dim, matrix_dim, matrix_dim, sizeof(TYPE)); + starpu_matrix_data_register(&C_handle, STARPU_MAIN_RAM, (uintptr_t)C, + matrix_dim, matrix_dim, matrix_dim, sizeof(TYPE)); + + struct starpu_data_filter vert; + memset(&vert, 0, sizeof(vert)); + vert.filter_func = starpu_matrix_filter_vertical_block; + vert.nchildren = nslices; + + struct starpu_data_filter horiz; + memset(&horiz, 0, sizeof(horiz)); + horiz.filter_func = starpu_matrix_filter_block; + horiz.nchildren = nslices; + + starpu_data_partition(B_handle, &vert); + starpu_data_partition(A_handle, &horiz); + + starpu_data_map_filters(C_handle, 2, &vert, &horiz); +} + + +static void cpu_init_matrix_random(void *descr[], void *arg) +{ + (void)arg; + TYPE *subA = (TYPE *)STARPU_MATRIX_GET_PTR(descr[0]); + TYPE *subB = (TYPE *)STARPU_MATRIX_GET_PTR(descr[1]); + unsigned nx = STARPU_MATRIX_GET_NX(descr[0]); + unsigned ny = STARPU_MATRIX_GET_NY(descr[0]); + unsigned i = 0; + + for (i = 0; i < nx *ny; i++) + { + subA[i] = (TYPE) (starpu_drand48()); + subB[i] = (TYPE) (starpu_drand48()); + } +} + + +static void cpu_init_matrix_zero(void *descr[], void *arg) +{ + (void)arg; + TYPE *subA = (TYPE *)STARPU_MATRIX_GET_PTR(descr[0]); + unsigned nx = STARPU_MATRIX_GET_NX(descr[0]); + unsigned ny = STARPU_MATRIX_GET_NY(descr[0]); + unsigned i = 0; + + for (i = 0; i < nx *ny; i++) + { + subA[i] = (TYPE) (0); + } +} + + +static void cpu_mult(void *descr[], void *arg) +{ + (void)arg; + TYPE *subA = (TYPE *)STARPU_MATRIX_GET_PTR(descr[0]); + TYPE *subB = (TYPE *)STARPU_MATRIX_GET_PTR(descr[1]); + TYPE *subC = (TYPE *)STARPU_MATRIX_GET_PTR(descr[2]); + + unsigned nxC = STARPU_MATRIX_GET_NX(descr[2]); + unsigned nyC = STARPU_MATRIX_GET_NY(descr[2]); + unsigned nyA = STARPU_MATRIX_GET_NY(descr[0]); + + unsigned ldA = STARPU_MATRIX_GET_LD(descr[0]); + unsigned ldB = STARPU_MATRIX_GET_LD(descr[1]); + unsigned ldC = STARPU_MATRIX_GET_LD(descr[2]); + + int worker_size = starpu_combined_worker_get_size(); + + if (worker_size == 1) + { + /* Sequential CPU task */ + CPU_GEMM("N", "N", nxC, nyC, nyA, (TYPE)1.0, subA, ldA, subB, ldB, (TYPE)0.0, subC, ldC); + } + else + { + /* Parallel CPU task */ + unsigned rank = starpu_combined_worker_get_rank(); + + unsigned block_size = (nyC + worker_size - 1)/worker_size; + unsigned new_nyC = STARPU_MIN(nyC, block_size*(rank+1)) - block_size*rank; + + STARPU_ASSERT(nyC == STARPU_MATRIX_GET_NY(descr[1])); + + TYPE *new_subB = &subB[block_size*rank]; + TYPE *new_subC = &subC[block_size*rank]; + + CPU_GEMM("N", "N", nxC, new_nyC, nyA, (TYPE)1.0, subA, ldA, new_subB, ldB, (TYPE)0.0, new_subC, ldC); + } +} + +static struct starpu_perfmodel starpu_gemm_model = +{ + .type = STARPU_HISTORY_BASED, + .symbol = STARPU_GEMM_STR(gemm) +}; + +static struct starpu_codelet cl = +{ + .type = STARPU_SEQ, /* changed to STARPU_SPMD if -spmd is passed */ + .max_parallelism = INT_MAX, + .cpu_funcs = {cpu_mult}, + .cpu_funcs_name = {"cpu_mult"}, + .nbuffers = 3, + .modes = {STARPU_R, STARPU_R, STARPU_RW}, + .model = &starpu_gemm_model +}; + +static struct starpu_codelet cl_init_matrix_random = +{ + .max_parallelism = INT_MAX, + .cpu_funcs = {cpu_init_matrix_random}, + .cpu_funcs_name = {"cpu_init_matrix_random"}, + .nbuffers = 2, + .modes = {STARPU_W, STARPU_W}, + .name = "init_matrix_random", + .color = 0xffa500 // orange +}; + +static struct starpu_codelet cl_init_matrix_zero = +{ + .max_parallelism = INT_MAX, + .cpu_funcs = {cpu_init_matrix_zero}, + .cpu_funcs_name = {"cpu_init_matrix_zero"}, + .nbuffers = 1, + .modes = {STARPU_W}, + .name = "init_matrix_zero", + .color = 0x808000 // olive +}; + +/* Allocate and partition buffers */ +void gemm_alloc_data() +{ + starpu_malloc_flags((void **)&A, matrix_dim*matrix_dim*sizeof(TYPE), STARPU_MALLOC_PINNED|STARPU_MALLOC_SIMULATION_FOLDED); + starpu_malloc_flags((void **)&B, matrix_dim*matrix_dim*sizeof(TYPE), STARPU_MALLOC_PINNED|STARPU_MALLOC_SIMULATION_FOLDED); + starpu_malloc_flags((void **)&C, matrix_dim*matrix_dim*sizeof(TYPE), STARPU_MALLOC_PINNED|STARPU_MALLOC_SIMULATION_FOLDED); + partition_mult_data(); +} + +/* Submit tasks to initialize matrices: fill them with zeros or random numbers */ +int gemm_init_data() +{ +#ifndef STARPU_SIMGRID + int ret; + unsigned x, y; + + for (x = 0; x < nslices; x++) + { + struct starpu_task *task = starpu_task_create(); + task->cl = &cl_init_matrix_random; + task->handles[0] = starpu_data_get_sub_data(A_handle, 1, x); + task->handles[1] = starpu_data_get_sub_data(B_handle, 1, x); + ret = starpu_task_submit(task); + CHECK_TASK_SUBMIT(ret); + + for (y = 0; y < nslices; y++) + { + task = starpu_task_create(); + task->cl = &cl_init_matrix_zero; + task->handles[0] = starpu_data_get_sub_data(C_handle, 2, x, y); + ret = starpu_task_submit(task); + CHECK_TASK_SUBMIT(ret); + } + } +#endif + return 0; +} + +/* Submit tasks to compute the GEMM */ +int gemm_submit_tasks() +{ + return gemm_submit_tasks_with_tags(/* by default, disable task tags */ 0); +} + +int gemm_submit_tasks_with_tags(int with_tags) +{ + int ret; + unsigned x, y; + starpu_tag_t task_tag = 0; + + for (x = 0; x < nslices; x++) + for (y = 0; y < nslices; y++) + { + struct starpu_task *task = starpu_task_create(); + task->cl = &cl; + task->handles[0] = starpu_data_get_sub_data(A_handle, 1, y); + task->handles[1] = starpu_data_get_sub_data(B_handle, 1, x); + task->handles[2] = starpu_data_get_sub_data(C_handle, 2, x, y); + task->flops = 2ULL * (matrix_dim/nslices) * (matrix_dim/nslices) * matrix_dim; + + if (with_tags) + { + task->use_tag = 1; + task->tag_id = ++task_tag; + } + + ret = starpu_task_submit(task); + CHECK_TASK_SUBMIT(ret); + starpu_data_wont_use(starpu_data_get_sub_data(C_handle, 2, x, y)); + } + + return 0; +} + +/* Add dependencies between GEMM tasks to see the impact of polling workers which will at the end get a task. + * The new dependency graph has the following shape: + * - the same number of GEMMs as the number of workers are executed in parallel on all workers ("a column of tasks") + * - then a GEMM waits all tasks of the previous column of tasks, and is executed on a worker + * - the next column of tasks waits for the previous GEMM + * - and so on... + * + * worker 0 | 1 | 4 | 5 | 8 | 9 | + * worker 1 | 2 | | 6 | | 10 | ... + * worker 2 | 3 | | 7 | | 11 | + * + * This function has to be called before gemm_submit_tasks_with_tags(1). + */ +void gemm_add_polling_dependencies() +{ + starpu_tag_t nb_tasks = (starpu_tag_t) nslices * (starpu_tag_t) nslices; + unsigned nb_workers = starpu_worker_get_count(); + starpu_tag_t synchro_tag; + starpu_tag_t previous_tag; + starpu_tag_t next_tag; + + for (synchro_tag = nb_workers+1; synchro_tag <= nb_tasks; synchro_tag += (nb_workers+1)) + { + // this synchro tag depends on tasks of previous column of tasks: + for (previous_tag = synchro_tag - nb_workers; previous_tag < synchro_tag; previous_tag++) + { + starpu_tag_declare_deps(synchro_tag, 1, previous_tag); + } + + // tasks of the next column of tasks depend on this synchro tag: + // this actually allows workers to poll for new tasks, while no task is available + for (next_tag = synchro_tag+1; next_tag < (synchro_tag + nb_workers + 1) && next_tag <= nb_tasks; next_tag++) + { + starpu_tag_declare_deps(next_tag, 1, synchro_tag); + } + } + +} + +void gemm_release() +{ + starpu_data_unpartition(C_handle, STARPU_MAIN_RAM); + starpu_data_unpartition(B_handle, STARPU_MAIN_RAM); + starpu_data_unpartition(A_handle, STARPU_MAIN_RAM); + + starpu_data_unregister(A_handle); + starpu_data_unregister(B_handle); + starpu_data_unregister(C_handle); + + if (check) + check_output(); + + starpu_free_flags(A, matrix_dim*matrix_dim*sizeof(TYPE), STARPU_MALLOC_PINNED|STARPU_MALLOC_SIMULATION_FOLDED); + starpu_free_flags(B, matrix_dim*matrix_dim*sizeof(TYPE), STARPU_MALLOC_PINNED|STARPU_MALLOC_SIMULATION_FOLDED); + starpu_free_flags(C, matrix_dim*matrix_dim*sizeof(TYPE), STARPU_MALLOC_PINNED|STARPU_MALLOC_SIMULATION_FOLDED); +} + + diff --git a/mpi/examples/benchs/gemm_helper.h b/mpi/examples/benchs/gemm_helper.h new file mode 100644 index 0000000..199a4b6 --- /dev/null +++ b/mpi/examples/benchs/gemm_helper.h @@ -0,0 +1,35 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __MPI_TESTS_GEMM_HELPER__ +#define __MPI_TESTS_GEMM_HELPER__ + +#include + +extern unsigned nslices; +extern unsigned matrix_dim; +extern unsigned check; +extern int comm_thread_cpuid; + + +void gemm_alloc_data(); +int gemm_init_data(); +int gemm_submit_tasks(); +void gemm_release(); +void gemm_add_polling_dependencies(); +int gemm_submit_tasks_with_tags(int with_tags); + +#endif /* __MPI_TESTS_GEMM_HELPER__ */ diff --git a/mpi/examples/benchs/recv_wait_finalize_bench.c b/mpi/examples/benchs/recv_wait_finalize_bench.c new file mode 100644 index 0000000..6bf01b5 --- /dev/null +++ b/mpi/examples/benchs/recv_wait_finalize_bench.c @@ -0,0 +1,294 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2021-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * This benchmark measures the impact of the STARPU_MPI_RECV_FINALIZE env var: + * when set to 0, StarPU can use received buffers for task only reading these + * buffers, while the communication library still holds a reference on this + * buffer (to continue the tree broadcast, for instance). + * Inspired a lot from NewMadeleine examples/mcast/nm_mcast_prio.c + * + * Synchronized clocks (mpi_sync_clocks) are available here: + * https://gitlab.inria.fr/pm2/pm2/-/tree/master/mpi_sync_clocks + * and are detected during StarPU's configure. + */ + +#include +#include +#include "helper.h" + +#define SERVER_PRINTF(fmt, ...) do { if(rank == 0) { printf(fmt, ## __VA_ARGS__); fflush(stdout); }} while(0) + +#define DEFAULT_ARRAY_SIZE 1 +#ifdef STARPU_QUICK_CHECK + #define DEFAULT_ROUND 5 +#else + #define DEFAULT_ROUND 200 +#endif + +static starpu_data_handle_t data_handle; +static int rank; +static double received_time, finalized_time; +static mpi_sync_clocks_t clocks; +static int* prios; + +// Codelet executed just to block start of the broadcast and be sure the broadcast will be correctly detected: +static void trigger_coop_cpu_func(void *descr[], void *args) +{ + (void) descr; + (void) args; +} + +static struct starpu_codelet trigger_coop_cl = +{ + .cpu_funcs = { trigger_coop_cpu_func }, + .cpu_funcs_name = { "trigger_coop_task" }, + .name = "trigger_coop", + .nbuffers = 1, + .modes = { STARPU_W } +}; + +// Codelet executed when data just arrived, but communication library has still a reference on it +static void received_cpu_func(void *descr[], void *args) +{ + (void) descr; + (void) args; + + received_time = mpi_sync_clocks_get_time_usec(clocks); +} + +static struct starpu_codelet received_cl = +{ + .cpu_funcs = { received_cpu_func }, + .cpu_funcs_name = { "received_task" }, + .name = "received", + .nbuffers = 1, + .modes = { STARPU_R } +}; + +// Codelet executed when data is released by communication library +static void finalized_cpu_func(void *descr[], void *args) +{ + (void) descr; + (void) args; + + finalized_time = mpi_sync_clocks_get_time_usec(clocks); +} + +static struct starpu_codelet finalized_cl = +{ + .cpu_funcs = { finalized_cpu_func }, + .cpu_funcs_name = { "finalized_task" }, + .name = "finalized", + .nbuffers = 1, + .modes = { STARPU_W } +}; + + +static void usage(void) +{ + fprintf(stderr, "-s array size - number of bytes to broadcast [%d]\n", DEFAULT_ARRAY_SIZE); + fprintf(stderr, "-rounds rounds - number of iterations [%d]\n", DEFAULT_ROUND); +} + + +static void bcast(int nb_dests, double* time_to_receive, double* time_to_finalize) +{ + int i = 0; + + starpu_mpi_data_set_rank(data_handle, 0); + + /* This first task is just to retain communications, and be sure they + * will be detected as a broadcast, if there are enough nodes. */ + starpu_mpi_task_insert(MPI_COMM_WORLD, &trigger_coop_cl, STARPU_W, data_handle, 0); + + for (i = 1; i < nb_dests; i++) + { + starpu_mpi_task_insert(MPI_COMM_WORLD, &received_cl, + STARPU_R, data_handle, + STARPU_EXECUTE_ON_NODE, i, + STARPU_PRIORITY, prios[i-1], + 0); + } + for (i = 1; i < nb_dests; i++) + { + /* Little bit hacky here: we change the owner of the handle to + * be the node on which we are just about to submit a task to + * be executed on that node, with this handle. This is done to + * avoid additional communications we don't want in this bench. + * In real applications, the coherency of the data will + * probably be broken, but for this bench we don't care. */ + starpu_mpi_data_set_rank(data_handle, i); + starpu_mpi_task_insert(MPI_COMM_WORLD, &finalized_cl, STARPU_W, data_handle, 0); + } + + mpi_sync_clocks_barrier(clocks, NULL); + + const double t_begin = mpi_sync_clocks_get_time_usec(clocks); + + /* Resume StarPU's workers only after submitting tasks, to make + * sure the coop will be correctly detected. */ + starpu_resume(); + starpu_task_wait_for_all(); + + starpu_pause(); + + *time_to_receive = received_time - t_begin; + *time_to_finalize = finalized_time - t_begin; +} + + +int main(int argc, char**argv) +{ + int i, ret, worldsize, rounds = DEFAULT_ROUND, thread_support; + long long int s = DEFAULT_ARRAY_SIZE; + double time_to_receive, time_to_finalize; + double total_time_to_receive = 0.0, total_time_to_finalize = 0.0; + + for (i = 1; i < argc; i++) + { + if (strcmp(argv[i], "-s") == 0) + { + s = (int long long) atoi(argv[++i]); + continue; + } + if (strcmp(argv[i], "-rounds") == 0) + { + rounds = atoi(argv[++i]); + continue; + } + else + { + fprintf(stderr, "%s: illegal argument %s\n", argv[0], argv[i]); + usage(); + exit(1); + } + } + + if (rounds <= 0) + { + FPRINTF(stderr, "The number of iterations has to be greater than 0.\n"); + return EXIT_FAILURE; + } + + if (MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &thread_support) != MPI_SUCCESS) + { + FPRINTF(stderr, "MPI_Init_thread failed\n"); + return EXIT_FAILURE; + } + + if (thread_support < MPI_THREAD_MULTIPLE) + { + /* We need MPI_THREAD_MULTIPLE for the StarPU's MPI thread and + * the main thread calling functions from mpi_sync_clocks. */ + FPRINTF(stderr, "This benchmark requires MPI_THREAD_MULTIPLE support.\n"); + MPI_Finalize(); + return STARPU_TEST_SKIPPED; + } + + ret = starpu_mpi_init_conf(NULL, NULL, 0, MPI_COMM_WORLD, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); + starpu_mpi_comm_size(MPI_COMM_WORLD, &worldsize); + + if (worldsize < 2) + { + if (rank == 0) + FPRINTF(stderr, "We need at least 2 processes.\n"); + + starpu_mpi_shutdown(); + + return STARPU_TEST_SKIPPED; + } + + /* Pause workers for this bench, to avoid any impact on performances + * from polling workers, and to detect correctly coop */ + starpu_pause(); + + starpu_mpi_barrier(MPI_COMM_WORLD); + + SERVER_PRINTF("# message size = %lld B\n", s); + SERVER_PRINTF("# iterations = %d\n", rounds); + SERVER_PRINTF("# coop = %s\n", starpu_mpi_coop_sends_get_use() ? "on" : "off"); + SERVER_PRINTF("# node ; prio ; delay data (usec.); finalized (usec.)\n"); + + clocks = mpi_sync_clocks_init(MPI_COMM_WORLD); + + prios = malloc((worldsize-1) * sizeof(int)); + for (i = 0; i < worldsize-1; i++) + { + prios[i] = i; + } + + char* buffer = malloc(s); + memset(buffer, 0, s); + + /* To keep the same buffer and get good performances with rcache, we + * provide the buffer for sender and receivers. If we let StarPU manage + * the buffer, it can change it between iterations. + * The original owner of the data (the sender) is defined with + * starpu_mpi_data_set_rank() in bcast(). */ + starpu_vector_data_register(&data_handle, STARPU_MAIN_RAM, (uintptr_t) buffer, s, sizeof(char)); + starpu_mpi_data_set_tag(data_handle, 0xee); + + for (i = 0; i < rounds; i++) + { + bcast(worldsize, &time_to_receive, &time_to_finalize); + total_time_to_receive += time_to_receive; + total_time_to_finalize += time_to_finalize; + } + + total_time_to_receive /= rounds; + total_time_to_finalize /= rounds; + + if (rank == 0) + { + double* totals_time_to_receive = malloc(sizeof(double) * worldsize); + double* totals_time_to_finalize = malloc(sizeof(double) * worldsize); + + MPI_Gather(&total_time_to_receive, 1, MPI_DOUBLE, totals_time_to_receive, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Gather(&total_time_to_finalize, 1, MPI_DOUBLE, totals_time_to_finalize, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); + + for (i = 1; i < worldsize; i++) + { + printf("%d \t %d \t %g \t %g\n", i, prios[i-1], totals_time_to_receive[i], totals_time_to_finalize[i]); + } + + free(totals_time_to_receive); + free(totals_time_to_finalize); + } + else + { + MPI_Gather(&total_time_to_receive, 1, MPI_DOUBLE, NULL, 0, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Gather(&total_time_to_finalize, 1, MPI_DOUBLE, NULL, 0, MPI_DOUBLE, 0, MPI_COMM_WORLD); + } + + starpu_data_unregister(data_handle); + free(buffer); + + free(prios); + + mpi_sync_clocks_shutdown(clocks); + + SERVER_PRINTF("# bench end\n"); + + starpu_resume(); + starpu_mpi_shutdown(); + MPI_Finalize(); + + return 0; +} diff --git a/mpi/examples/benchs/sendrecv_bench.c b/mpi/examples/benchs/sendrecv_bench.c new file mode 100644 index 0000000..87922f6 --- /dev/null +++ b/mpi/examples/benchs/sendrecv_bench.c @@ -0,0 +1,126 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ +/* + * Basic send receive benchmark. + * Inspired a lot from NewMadeleine examples/benchmarks/nm_bench_sendrecv.c + * + * The option --bidir is available to do full-duplex communications. + */ + +#include +#include "helper.h" +#include "abstract_sendrecv_bench.h" + +static inline void man() +{ + fprintf(stderr, "Options:\n"); + fprintf(stderr, "\t-h --help display this help\n"); + fprintf(stderr, "\t-p pause workers during benchmark\n"); + fprintf(stderr, "\t--bidir full-duplex communications\n"); + fprintf(stderr, "\t--memnode-cuda allocate message buffers on first cuda device\n"); + exit(EXIT_SUCCESS); +} + +int main(int argc, char **argv) +{ + int ret, rank, worldsize; + int pause_workers = 0; + int i; + int bidir = 0; + int mem_node = STARPU_MAIN_RAM; + + ret = starpu_mpi_init_conf(&argc, &argv, 1, MPI_COMM_WORLD, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + + for (i = 1; i < argc; i++) + { + if (strcmp(argv[i], "-p") == 0) + { + pause_workers = 1; + printf("Workers will be paused during benchmark.\n"); + } + else if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i], "--help") == 0) + { + man(); + } + else if (strcmp(argv[i], "--bidir") == 0) + { + bidir = 1; + printf("Communications will be full-duplex.\n"); + } + else if (strcmp(argv[i], "--memnode-cuda") == 0) + { + int worker_id = starpu_worker_get_by_type(STARPU_CUDA_WORKER, 0); + if(worker_id == -1) + { + fprintf(stderr,"Error: asked for CUDA memory node allocation, but no cuda worker found.\n"); + starpu_mpi_shutdown(); + return STARPU_TEST_SKIPPED; + } + else + { + mem_node = starpu_worker_get_memory_node(worker_id); + fprintf(stderr,"Memory will be allocated on the first CUDA worker.\n"); + } + } + else + { + fprintf(stderr,"Unrecognized option %s\n", argv[i]); + man(); + } + } + + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); + starpu_mpi_comm_size(MPI_COMM_WORLD, &worldsize); + + if (worldsize < 2) + { + if (rank == 0) + FPRINTF(stderr, "We need 2 processes.\n"); + + starpu_mpi_shutdown(); + + return STARPU_TEST_SKIPPED; + } + +#if !defined(STARPU_LONG_CHECK) + if (rank == 0) + { + printf("To have a more precise benchmark, configure StarPU with --enable-long-check\n"); + } +#endif + + if (pause_workers) + { + /* Pause workers for this bench: all workers polling for tasks has a strong impact on performances */ + starpu_pause(); + } + + ret = sendrecv_bench(rank, NULL, bidir, mem_node); + if (ret == -ENODEV) + { + fprintf(stderr, "No device available\n"); + } + + if (pause_workers) + { + starpu_resume(); + } + + starpu_mpi_shutdown(); + + return 0; +} diff --git a/mpi/examples/benchs/sendrecv_gemm_bench.c b/mpi/examples/benchs/sendrecv_gemm_bench.c new file mode 100644 index 0000000..a66ca1f --- /dev/null +++ b/mpi/examples/benchs/sendrecv_gemm_bench.c @@ -0,0 +1,203 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * Simple *not distributed* parallel GEMM implementation and sendrecv bench at the same time. + * + * This bench is a merge of mpi/tests/sendrecv_bench and examples/mult/sgemm + * + * A *non-distributed* GEMM is computed on each node, while a sendrecv bench is running, + * completely independently. The goal is to measure the impact of worker computations on + * communications. + * + * Use the -nblocks parameter to define the matrix size (matrix size = nblocks * 320), such as + * the GEMM finishes after the sendrecv bench. + */ +#include +#include +#include +#include +#include +#include + +#include "helper.h" +#include "abstract_sendrecv_bench.h" +#include "gemm_helper.h" + +static int mpi_rank; +static starpu_pthread_barrier_t thread_barrier; + +static void* comm_thread_func(void* arg) +{ + if (comm_thread_cpuid < 0) + { + comm_thread_cpuid = starpu_get_next_bindid(STARPU_THREAD_ACTIVE, NULL, 0); + } + + if (starpu_bind_thread_on(comm_thread_cpuid, 0, "Comm") < 0) + { + char hostname[65]; + gethostname(hostname, sizeof(hostname)); + fprintf(stderr, "[%s] No core was available for the comm thread. You should increase STARPU_RESERVE_NCPU or decrease STARPU_NCPU\n", hostname); + } + + int ret = sendrecv_bench(mpi_rank, &thread_barrier, /* half-duplex communications */ 0, /* allocate MPI buffers on CPU */ STARPU_MAIN_RAM); + if (ret == -ENODEV) + { + fprintf(stderr, "No device available\n"); + } + + return NULL; +} + +void parse_args(int argc, char **argv) +{ + int i; + for (i = 1; i < argc; i++) + { + if (strcmp(argv[i], "-nblocks") == 0) + { + char *argptr; + nslices = strtol(argv[++i], &argptr, 10); + matrix_dim = 320 * nslices; + } + + else if (strcmp(argv[i], "-size") == 0) + { + char *argptr; + unsigned matrix_dim_tmp = strtol(argv[++i], &argptr, 10); + if (matrix_dim_tmp % 320 != 0) + { + fprintf(stderr, "Matrix size has to be a multiple of 320\n"); + } + else + { + matrix_dim = matrix_dim_tmp; + nslices = matrix_dim / 320; + } + } + + else if (strcmp(argv[i], "-check") == 0) + { + check = 1; + } + + else if (strcmp(argv[i], "-comm-thread-cpuid") == 0) + { + comm_thread_cpuid = atoi(argv[++i]); + } + + else if (strcmp(argv[i], "-help") == 0 || strcmp(argv[i], "--help") == 0 || strcmp(argv[i], "-h") == 0) + { + fprintf(stderr,"Usage: %s [-nblocks n] [-size size] [-check] [-comm-thread-cpuid cpuid]\n", argv[0]); + fprintf(stderr,"Currently selected: matrix size: %u - %u blocks\n", matrix_dim, nslices); + fprintf(stderr, "Use -comm-thread-cpuid to specify where to bind the comm benchmarking thread\n"); + exit(EXIT_SUCCESS); + } + + else + { + fprintf(stderr,"Unrecognized option %s\n", argv[i]); + exit(EXIT_FAILURE); + } + } +} + +int main(int argc, char **argv) +{ + double start, end; + int ret, worldsize; + starpu_pthread_t comm_thread; + + char hostname[255]; + gethostname(hostname, 255); + + parse_args(argc, argv); + + starpu_fxt_autostart_profiling(0); + + ret = starpu_mpi_init_conf(&argc, &argv, 1, MPI_COMM_WORLD, NULL); + if (ret == -ENODEV) + return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + + starpu_mpi_comm_rank(MPI_COMM_WORLD, &mpi_rank); + starpu_mpi_comm_size(MPI_COMM_WORLD, &worldsize); + + if (worldsize < 2) + { + if (mpi_rank == 0) + FPRINTF(stderr, "We need 2 processes.\n"); + + starpu_mpi_shutdown(); + + return STARPU_TEST_SKIPPED; + } + + STARPU_PTHREAD_BARRIER_INIT(&thread_barrier, NULL, 2); + + // Start comm thread, benchmarking sendrecv: + STARPU_PTHREAD_CREATE(&comm_thread, NULL, comm_thread_func, NULL); + + // Main thread will submit GEMM tasks: + gemm_alloc_data(); + + if (mpi_rank == 0) + { + printf("# node\tx\ty\tz\tms\tGFlops\n"); + } + + starpu_pause(); + + if(gemm_init_data() == -ENODEV || gemm_submit_tasks() == -ENODEV) + { + starpu_mpi_barrier(MPI_COMM_WORLD); + STARPU_PTHREAD_BARRIER_WAIT(&thread_barrier); + ret = 77; + goto enodev; + } + + starpu_mpi_barrier(MPI_COMM_WORLD); + starpu_fxt_start_profiling(); + + STARPU_PTHREAD_BARRIER_WAIT(&thread_barrier); + + start = starpu_timing_now(); + starpu_resume(); + starpu_task_wait_for_all(); + end = starpu_timing_now(); + + double timing = end - start; + double flops = 2.0*((unsigned long long)matrix_dim) * ((unsigned long long)matrix_dim)*((unsigned long long)matrix_dim); + + printf("%s\t%u\t%u\t%u\t%.0f\t%.1f\n", hostname, matrix_dim, matrix_dim, matrix_dim, timing/1000.0, flops/timing/1000.0); + + +enodev: + gemm_release(); + + // Wait comm thread: + STARPU_PTHREAD_JOIN(comm_thread, NULL); + STARPU_PTHREAD_BARRIER_DESTROY(&thread_barrier); + + starpu_fxt_stop_profiling(); + + if (ret) + starpu_resume(); + starpu_mpi_shutdown(); + + return ret; +} diff --git a/mpi/examples/benchs/sendrecv_parallel_tasks_bench.c b/mpi/examples/benchs/sendrecv_parallel_tasks_bench.c new file mode 100644 index 0000000..20210e8 --- /dev/null +++ b/mpi/examples/benchs/sendrecv_parallel_tasks_bench.c @@ -0,0 +1,238 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * sendrecv benchmark from different tasks, executed simultaneously on several + * workers. + * Inspired a lot from NewMadeleine examples/piom/nm_piom_pingpong.c + * + * The goal is to measure impact of calls to starpu_mpi_* from different threads. + * + * Use STARPU_NCPU to set the number of parallel ping pongs + * + * + * Note: This currently can not work with the MPI backend with more than 1 CPU, + * since with big sizes, the MPI_Wait call in the MPI thread may block waiting + * for the peer to call MPI_Recv+Wait, and there is no guarantee that the peer + * will call MPI_Recv+Wait for the same data since tasks can proceed in any + * order. + */ + +#include +#include "helper.h" +#include "bench_helper.h" + +#define NB_WARMUP_PINGPONGS 10 + +/* We reduce NX_MAX, since some NICs don't support exchanging simultaneously such amount of memory */ +#undef NX_MAX +#ifdef STARPU_QUICK_CHECK +#define NX_MAX (1024) +#else +#define NX_MAX (64 * 1024 * 1024) +#endif + + +void cpu_task(void* descr[], void* args) +{ + int mpi_rank; + uint64_t iterations = +#ifdef STARPU_QUICK_CHECK + 10; +#else + LOOPS_DEFAULT / 100; +#endif + uint64_t s; + starpu_data_handle_t handle_send, handle_recv; + double t1, t2; + int asked_worker; + int current_worker = starpu_worker_get_id(); + uint64_t j; + uint64_t k; + int ret; + + starpu_codelet_unpack_args(args, &mpi_rank, &asked_worker, &s, &handle_send, &handle_recv); + + STARPU_ASSERT(asked_worker == current_worker); + + iterations = bench_nb_iterations(iterations, s); + double* lats = malloc(sizeof(double) * iterations); + + for (j = 0; j < NB_WARMUP_PINGPONGS; j++) + { + if (mpi_rank == 0) + { + ret = starpu_mpi_send(handle_send, 1, 0, MPI_COMM_WORLD); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send"); + ret = starpu_mpi_recv(handle_recv, 1, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); + } + else + { + ret = starpu_mpi_recv(handle_recv, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); + ret = starpu_mpi_send(handle_send, 0, 1, MPI_COMM_WORLD); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send"); + } + } + + for (j = 0; j < iterations; j++) + { + if (mpi_rank == 0) + { + t1 = starpu_timing_now(); + ret = starpu_mpi_send(handle_send, 1, 0, MPI_COMM_WORLD); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send"); + ret = starpu_mpi_recv(handle_recv, 1, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); + t2 = starpu_timing_now(); + + lats[j] = (t2 - t1) / 2; + } + else + { + ret = starpu_mpi_recv(handle_recv, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); + ret = starpu_mpi_send(handle_send, 0, 1, MPI_COMM_WORLD); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send"); + } + } + + if (mpi_rank == 0) + { + qsort(lats, iterations, sizeof(double), &comp_double); + + const double min_lat = lats[0]; + const double max_lat = lats[iterations - 1]; + const double med_lat = lats[(iterations - 1) / 2]; + const double d1_lat = lats[(iterations - 1) / 10]; + const double d9_lat = lats[9 * (iterations - 1) / 10]; + double avg_lat = 0.0; + + for(k = 0; k < iterations; k++) + { + avg_lat += lats[k]; + } + + avg_lat /= iterations; + const double bw_million_byte = s / min_lat; + const double bw_mbyte = bw_million_byte / 1.048576; + + printf("%2d\t\t%9lld\t%9.3lf\t%9.3f\t%9.3f\t%9.3lf\t%9.3lf\t%9.3lf\t%9.3lf\t%9.3lf\n", + current_worker, (long long) s, min_lat, bw_million_byte, bw_mbyte, d1_lat, med_lat, avg_lat, d9_lat, max_lat); + fflush(stdout); + } + + free(lats); +} + +static struct starpu_codelet cl = +{ + .cpu_funcs = { cpu_task }, + .cpu_funcs_name = { "cpu_task" }, + .nbuffers = 0 +}; + +int main(int argc, char **argv) +{ + int ret, rank, worldsize; + + ret = starpu_mpi_init_conf(&argc, &argv, 1, MPI_COMM_WORLD, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); + starpu_mpi_comm_size(MPI_COMM_WORLD, &worldsize); + + if (worldsize < 2) + { + if (rank == 0) + FPRINTF(stderr, "We need 2 processes.\n"); + + starpu_mpi_shutdown(); + + return STARPU_TEST_SKIPPED; + } + + if (rank == 0) + { + printf("Times in us\n"); + printf("# worker | size (Bytes)\t| latency \t| 10^6 B/s \t| MB/s \t| d1 \t|median \t| avg \t| d9 \t| max\n"); + } + else if (rank >= 2) + { + starpu_mpi_shutdown(); + + return 0; + } + + + unsigned cpu_count = starpu_cpu_worker_get_count(); + uint64_t s; + unsigned i; + + int* workers = malloc(cpu_count * sizeof(int)); + float** vectors_send = malloc(cpu_count * sizeof(float*)); + float** vectors_recv = malloc(cpu_count * sizeof(float*)); + starpu_data_handle_t* handles_send = malloc(cpu_count * sizeof(starpu_data_handle_t)); + starpu_data_handle_t* handles_recv = malloc(cpu_count * sizeof(starpu_data_handle_t)); + + for (s = NX_MIN; s <= NX_MAX; s = bench_next_size(s)) + { + starpu_pause(); + + for (i = 0; i < cpu_count; i++) + { + workers[i] = i; + vectors_send[i] = malloc(s); + vectors_recv[i] = malloc(s); + memset(vectors_send[i], 0, s); + memset(vectors_recv[i], 0, s); + + starpu_vector_data_register(&handles_send[i], STARPU_MAIN_RAM, (uintptr_t) vectors_send[i], s, 1); + starpu_vector_data_register(&handles_recv[i], STARPU_MAIN_RAM, (uintptr_t) vectors_recv[i], s, 1); + + ret = starpu_task_insert(&cl, + STARPU_EXECUTE_ON_WORKER, workers[i], + STARPU_VALUE, &rank, sizeof(int), + STARPU_VALUE, workers + i, sizeof(int), + STARPU_VALUE, &s, sizeof(uint64_t), + STARPU_VALUE, &handles_send[i], sizeof(starpu_data_handle_t), + STARPU_VALUE, &handles_recv[i], sizeof(starpu_data_handle_t), 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + } + + starpu_resume(); + starpu_task_wait_for_all(); + + for (i = 0; i < cpu_count; i++) + { + starpu_data_unregister(handles_send[i]); + starpu_data_unregister(handles_recv[i]); + free(vectors_send[i]); + free(vectors_recv[i]); + } + } + + free(workers); + free(vectors_send); + free(vectors_recv); + free(handles_send); + free(handles_recv); + + starpu_mpi_shutdown(); + + return 0; +} diff --git a/mpi/examples/cache/cache.c b/mpi/examples/cache/cache.c new file mode 100644 index 0000000..9816a9f --- /dev/null +++ b/mpi/examples/cache/cache.c @@ -0,0 +1,112 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include "helper.h" + +void func_cpu(void *descr[], void *_args) +{ + (void)descr; + (void)_args; +} + +struct starpu_codelet mycodelet_r = +{ + .cpu_funcs = {func_cpu}, + .nbuffers = 1, + .modes = {STARPU_R}, + .model = &starpu_perfmodel_nop, +}; + +struct starpu_codelet mycodelet_w = +{ + .cpu_funcs = {func_cpu}, + .nbuffers = 1, + .modes = {STARPU_W}, + .model = &starpu_perfmodel_nop, +}; + +struct starpu_codelet mycodelet_rw = +{ + .cpu_funcs = {func_cpu}, + .nbuffers = 1, + .modes = {STARPU_RW}, + .model = &starpu_perfmodel_nop, +}; + +void test(struct starpu_codelet *codelet, enum starpu_data_access_mode mode, starpu_data_handle_t data, int rank, int in_cache) +{ + int cache; + int ret; + + ret = starpu_mpi_task_insert(MPI_COMM_WORLD, codelet, mode, data, STARPU_EXECUTE_ON_NODE, 1, 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_task_insert"); + + cache = starpu_mpi_cached_receive(data); + + if (rank == 1) + { + if (in_cache) + { + STARPU_ASSERT_MSG(cache == 1, "Data should be in cache\n"); + } + else + { + STARPU_ASSERT_MSG(cache == 0, "Data should NOT be in cache\n"); + } + } +} + +int main(int argc, char **argv) +{ + int rank; + int ret; + unsigned val = 42; + starpu_data_handle_t data; + + ret = starpu_mpi_init_conf(&argc, &argv, 1, MPI_COMM_WORLD, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); + + if (starpu_mpi_cache_is_enabled() == 0) + goto skip; + + if (rank == 0) + starpu_variable_data_register(&data, STARPU_MAIN_RAM, (uintptr_t)&val, sizeof(unsigned)); + else + starpu_variable_data_register(&data, -1, (uintptr_t)NULL, sizeof(unsigned)); + starpu_mpi_data_register(data, 42, 0); + FPRINTF_MPI(stderr, "Registering data %p with tag %d and node %d\n", data, 42, 0); + + // We use the same data with different access modes and we check if it is + // available or not in the cache + test(&mycodelet_r, STARPU_R, data, rank, 1); + test(&mycodelet_rw, STARPU_RW, data, rank, 0); + test(&mycodelet_r, STARPU_R, data, rank, 1); + test(&mycodelet_r, STARPU_R, data, rank, 1); + test(&mycodelet_w, STARPU_W, data, rank, 0); + + FPRINTF(stderr, "Waiting ...\n"); + starpu_task_wait_for_all(); + + starpu_data_unregister(data); + +skip: + starpu_mpi_shutdown(); + + return starpu_mpi_cache_is_enabled() == 0 ? STARPU_TEST_SKIPPED : 0; +} diff --git a/mpi/examples/cache/cache_disable.c b/mpi/examples/cache/cache_disable.c new file mode 100644 index 0000000..dfe7543 --- /dev/null +++ b/mpi/examples/cache/cache_disable.c @@ -0,0 +1,99 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include "helper.h" + +void func_cpu(void *descr[], void *_args) +{ + (void)descr; + (void)_args; +} + +struct starpu_codelet mycodelet_r = +{ + .cpu_funcs = {func_cpu}, + .nbuffers = 1, + .modes = {STARPU_R}, + .model = &starpu_perfmodel_nop, +}; + +int main(int argc, char **argv) +{ + int rank; + int ret; + unsigned *val; + starpu_data_handle_t data; + int in_cache; + int cache; + + ret = starpu_mpi_init_conf(&argc, &argv, 1, MPI_COMM_WORLD, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); + + cache = starpu_mpi_cache_is_enabled(); + if (cache == 0) + goto skip; + + val = malloc(sizeof(*val)); + *val = 12; + + if (rank == 0) + starpu_variable_data_register(&data, STARPU_MAIN_RAM, (uintptr_t)val, sizeof(unsigned)); + else + starpu_variable_data_register(&data, -1, (uintptr_t)NULL, sizeof(unsigned)); + starpu_mpi_data_register(data, 42, 0); + FPRINTF_MPI(stderr, "Registering data %p with tag %d and node %d\n", data, 42, 0); + + ret = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet_r, STARPU_R, data, STARPU_EXECUTE_ON_NODE, 1, 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_task_insert"); + + in_cache = starpu_mpi_cached_receive(data); + if (rank == 1) + { + STARPU_ASSERT_MSG(in_cache == 1, "Data should be in cache\n"); + } + + // We clean the cache + starpu_mpi_cache_set(0); + + // We check the data is no longer in the cache + in_cache = starpu_mpi_cached_receive(data); + if (rank == 1) + { + STARPU_ASSERT_MSG(in_cache == 0, "Data should NOT be in cache\n"); + } + + ret = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet_r, STARPU_R, data, STARPU_EXECUTE_ON_NODE, 1, 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_task_insert"); + in_cache = starpu_mpi_cached_receive(data); + if (rank == 1) + { + STARPU_ASSERT_MSG(in_cache == 0, "Data should NOT be in cache\n"); + } + + FPRINTF(stderr, "Waiting ...\n"); + starpu_task_wait_for_all(); + + starpu_data_unregister(data); + free(val); + +skip: + starpu_mpi_shutdown(); + + return cache == 0 ? STARPU_TEST_SKIPPED : 0; +} diff --git a/mpi/examples/cg/cg.c b/mpi/examples/cg/cg.c new file mode 100644 index 0000000..6f999a1 --- /dev/null +++ b/mpi/examples/cg/cg.c @@ -0,0 +1,424 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2021-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include + +/* + * Distributed version of Conjugate Gradient implemented in examples/cg/cg.c + * + * Use -display-result option and compare with the non-distributed version: the + * x vector should be the same. + */ + +#include "../../../examples/cg/cg.h" + +static int copy_handle(starpu_data_handle_t* dst, starpu_data_handle_t* src, unsigned nblocks); + +#define HANDLE_TYPE_VECTOR starpu_data_handle_t* +#define HANDLE_TYPE_MATRIX starpu_data_handle_t** +#define TASK_INSERT(cl, ...) starpu_mpi_task_insert(MPI_COMM_WORLD, cl, ##__VA_ARGS__) +#define GET_VECTOR_BLOCK(v, i) v[i] +#define GET_MATRIX_BLOCK(m, i, j) m[i][j] +#define BARRIER() starpu_mpi_barrier(MPI_COMM_WORLD); +#define GET_DATA_HANDLE(handle) starpu_mpi_get_data_on_all_nodes_detached(MPI_COMM_WORLD, handle) + +static unsigned block_size; + +static int rank; +static int nodes_p = 2; +static int nodes_q; + +static TYPE ***A; +static TYPE **x; +static TYPE **b; + +static TYPE **r; +static TYPE **d; +static TYPE **q; + +#define FPRINTF_SERVER(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT") && rank == 0) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +#include "../../../examples/cg/cg_kernels.c" + +static int my_distrib(const int yy, const int xx) +{ + return (yy%nodes_q)*nodes_p + (xx%nodes_p); +} + +static int copy_handle(starpu_data_handle_t* dst, starpu_data_handle_t* src, unsigned nb) +{ + unsigned block; + + for (block = 0; block < nb; block++) + { + if (rank == my_distrib(block, 0)) + { + starpu_data_cpy(dst[block], src[block], /* asynchronous */ 1, /* without callback */ NULL, NULL); + } + } + + return 0; +} + +/* + * Generate Input data + */ +static void generate_random_problem(void) +{ + unsigned ii, jj, j, i; + int mpi_rank; + + A = malloc(nblocks * sizeof(TYPE **)); + x = malloc(nblocks * sizeof(TYPE *)); + b = malloc(nblocks * sizeof(TYPE *)); + + r = malloc(nblocks * sizeof(TYPE *)); + d = malloc(nblocks * sizeof(TYPE *)); + q = malloc(nblocks * sizeof(TYPE *)); + + for (j = 0; j < nblocks; j++) + { + A[j] = malloc(nblocks * sizeof(TYPE*)); + + mpi_rank = my_distrib(j, 0); + + if (mpi_rank == rank || display_result) + { + starpu_malloc((void**) &x[j], block_size*sizeof(TYPE)); + } + + if (mpi_rank == rank) + { + starpu_malloc((void**) &b[j], block_size*sizeof(TYPE)); + starpu_malloc((void**) &r[j], block_size*sizeof(TYPE)); + starpu_malloc((void**) &d[j], block_size*sizeof(TYPE)); + starpu_malloc((void**) &q[j], block_size*sizeof(TYPE)); + + for (jj = 0; jj < block_size; jj++) + { + x[j][jj] = (TYPE) 0.0; + b[j][jj] = (TYPE) 1.0; + r[j][jj] = (TYPE) 0.0; + d[j][jj] = (TYPE) 0.0; + q[j][jj] = (TYPE) 0.0; + } + } + + for (i = 0; i < nblocks; i++) + { + mpi_rank = my_distrib(j, i); + if (mpi_rank == rank) + { + starpu_malloc((void**) &A[j][i], block_size*block_size*sizeof(TYPE)); + + for (ii = 0; ii < block_size; ii++) + { + for (jj = 0; jj < block_size; jj++) + { + /* We take Hilbert matrix that is not well conditioned but definite positive: H(i,j) = 1/(1+i+j) */ + A[j][i][jj + ii*block_size] = (TYPE) (1.0/(1.0+(ii+(j*block_size)+jj+(i*block_size)))); + } + } + } + } + } +} + +static void free_data(void) +{ + unsigned j, i; + int mpi_rank; + + for (j = 0; j < nblocks; j++) + { + mpi_rank = my_distrib(j, 0); + + if (mpi_rank == rank || display_result) + { + starpu_free_noflag((void*) x[j], block_size*sizeof(TYPE)); + } + + if (mpi_rank == rank) + { + starpu_free_noflag((void*) b[j], block_size*sizeof(TYPE)); + starpu_free_noflag((void*) r[j], block_size*sizeof(TYPE)); + starpu_free_noflag((void*) d[j], block_size*sizeof(TYPE)); + starpu_free_noflag((void*) q[j], block_size*sizeof(TYPE)); + } + + for (i = 0; i < nblocks; i++) + { + mpi_rank = my_distrib(j, i); + if (mpi_rank == rank) + { + starpu_free_noflag((void*) A[j][i], block_size*block_size*sizeof(TYPE)); + } + } + + free(A[j]); + } + + free(A); + free(x); + free(b); + free(r); + free(d); + free(q); +} + +static void register_data(void) +{ + unsigned j, i; + int mpi_rank; + starpu_mpi_tag_t mpi_tag = 0; + + A_handle = malloc(nblocks*sizeof(starpu_data_handle_t*)); + x_handle = malloc(nblocks*sizeof(starpu_data_handle_t)); + b_handle = malloc(nblocks*sizeof(starpu_data_handle_t)); + r_handle = malloc(nblocks*sizeof(starpu_data_handle_t)); + d_handle = malloc(nblocks*sizeof(starpu_data_handle_t)); + q_handle = malloc(nblocks*sizeof(starpu_data_handle_t)); + + for (j = 0; j < nblocks; j++) + { + mpi_rank = my_distrib(j, 0); + A_handle[j] = malloc(nblocks*sizeof(starpu_data_handle_t)); + + if (mpi_rank == rank || display_result) + { + starpu_vector_data_register(&x_handle[j], STARPU_MAIN_RAM, (uintptr_t) x[j], block_size, sizeof(TYPE)); + } + else if (!display_result) + { + assert(mpi_rank != rank); + starpu_vector_data_register(&x_handle[j], -1, (uintptr_t) NULL, block_size, sizeof(TYPE)); + } + + if (mpi_rank == rank) + { + starpu_vector_data_register(&b_handle[j], STARPU_MAIN_RAM, (uintptr_t) b[j], block_size, sizeof(TYPE)); + starpu_vector_data_register(&r_handle[j], STARPU_MAIN_RAM, (uintptr_t) r[j], block_size, sizeof(TYPE)); + starpu_vector_data_register(&d_handle[j], STARPU_MAIN_RAM, (uintptr_t) d[j], block_size, sizeof(TYPE)); + starpu_vector_data_register(&q_handle[j], STARPU_MAIN_RAM, (uintptr_t) q[j], block_size, sizeof(TYPE)); + } + else + { + starpu_vector_data_register(&b_handle[j], -1, (uintptr_t) NULL, block_size, sizeof(TYPE)); + starpu_vector_data_register(&r_handle[j], -1, (uintptr_t) NULL, block_size, sizeof(TYPE)); + starpu_vector_data_register(&d_handle[j], -1, (uintptr_t) NULL, block_size, sizeof(TYPE)); + starpu_vector_data_register(&q_handle[j], -1, (uintptr_t) NULL, block_size, sizeof(TYPE)); + } + + starpu_data_set_coordinates(x_handle[j], 1, j); + starpu_mpi_data_register(x_handle[j], ++mpi_tag, mpi_rank); + starpu_data_set_coordinates(b_handle[j], 1, j); + starpu_mpi_data_register(b_handle[j], ++mpi_tag, mpi_rank); + starpu_data_set_coordinates(r_handle[j], 1, j); + starpu_mpi_data_register(r_handle[j], ++mpi_tag, mpi_rank); + starpu_data_set_coordinates(d_handle[j], 1, j); + starpu_mpi_data_register(d_handle[j], ++mpi_tag, mpi_rank); + starpu_data_set_coordinates(q_handle[j], 1, j); + starpu_mpi_data_register(q_handle[j], ++mpi_tag, mpi_rank); + + if (use_reduction) + { + starpu_data_set_reduction_methods(q_handle[j], &accumulate_vector_cl, &bzero_vector_cl); + starpu_data_set_reduction_methods(r_handle[j], &accumulate_vector_cl, &bzero_vector_cl); + } + + for (i = 0; i < nblocks; i++) + { + mpi_rank = my_distrib(j, i); + + if (mpi_rank == rank) + { + starpu_matrix_data_register(&A_handle[j][i], STARPU_MAIN_RAM, (uintptr_t) A[j][i], block_size, block_size, block_size, sizeof(TYPE)); + } + else + { + starpu_matrix_data_register(&A_handle[j][i], -1, (uintptr_t) NULL, block_size, block_size, block_size, sizeof(TYPE)); + } + + starpu_data_set_coordinates(A_handle[j][i], 2, i, j); + starpu_mpi_data_register(A_handle[j][i], ++mpi_tag, mpi_rank); + } + } + + starpu_variable_data_register(&dtq_handle, STARPU_MAIN_RAM, (uintptr_t)&dtq, sizeof(TYPE)); + starpu_variable_data_register(&rtr_handle, STARPU_MAIN_RAM, (uintptr_t)&rtr, sizeof(TYPE)); + starpu_mpi_data_register(rtr_handle, ++mpi_tag, 0); + starpu_mpi_data_register(dtq_handle, ++mpi_tag, 0); + + if (use_reduction) + { + starpu_data_set_reduction_methods(dtq_handle, &accumulate_variable_cl, &bzero_variable_cl); + starpu_data_set_reduction_methods(rtr_handle, &accumulate_variable_cl, &bzero_variable_cl); + } +} + +static void unregister_data(void) +{ + unsigned j, i; + + for (j = 0; j < nblocks; j++) + { + starpu_data_unregister(x_handle[j]); + starpu_data_unregister(b_handle[j]); + starpu_data_unregister(r_handle[j]); + starpu_data_unregister(d_handle[j]); + starpu_data_unregister(q_handle[j]); + + for (i = 0; i < nblocks; i++) + { + starpu_data_unregister(A_handle[j][i]); + } + + free(A_handle[j]); + } + + starpu_data_unregister(dtq_handle); + starpu_data_unregister(rtr_handle); + + free(A_handle); + free(x_handle); + free(b_handle); + free(r_handle); + free(d_handle); + free(q_handle); +} + +static void display_x_result(void) +{ + unsigned j, i; + + for (j = 0; j < nblocks; j++) + { + starpu_mpi_get_data_on_node(MPI_COMM_WORLD, x_handle[j], 0); + } + + if (rank == 0) + { + FPRINTF_SERVER(stderr, "Computed X vector:\n"); + for (j = 0; j < nblocks; j++) + { + starpu_data_acquire(x_handle[j], STARPU_R); + for (i = 0; i < block_size; i++) + { + FPRINTF(stderr, "% 02.2e\n", x[j][i]); + } + starpu_data_release(x_handle[j]); + } + } +} + +static void parse_args(int argc, char **argv) +{ + int i; + for (i = 1; i < argc; i++) + { + if (strcmp(argv[i], "-p") == 0) + { + nodes_p = atoi(argv[++i]); + continue; + } + + if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i], "--help") == 0 || strcmp(argv[i], "-help") == 0) + { + FPRINTF_SERVER(stderr, "usage: %s [-h] [-nblocks #blocks] [-display-result] [-p node_grid_width] [-n problem_size] [-no-reduction] [-maxiter i]\n", argv[0]); + exit(-1); + } + } + + parse_common_args(argc, argv); +} + +int main(int argc, char **argv) +{ + int worldsize, ret; + double start, end; + + /* Not supported yet */ + if (starpu_getenv_number_default("STARPU_GLOBAL_ARBITER", 0) > 0) + return 77; + + ret = starpu_mpi_init_conf(&argc, &argv, 1, MPI_COMM_WORLD, NULL); + if (ret == -ENODEV) + return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); + starpu_mpi_comm_size(MPI_COMM_WORLD, &worldsize); + + parse_args(argc, argv); + + if (worldsize % nodes_p != 0) + { + FPRINTF_SERVER(stderr, "Node grid (%d) width must divide the number of nodes (%d).\n", nodes_p, worldsize); + starpu_mpi_shutdown(); + return 1; + } + nodes_q = worldsize / nodes_p; + + if (n % nblocks != 0) + { + FPRINTF_SERVER(stderr, "The number of blocks (%u) must divide the matrix size (%lld).\n", nblocks, n); + starpu_mpi_shutdown(); + return 1; + } + block_size = n / nblocks; + + starpu_cublas_init(); + + FPRINTF_SERVER(stderr, "************** PARAMETERS ***************\n"); + FPRINTF_SERVER(stderr, "%d nodes (%dx%d)\n", worldsize, nodes_p, nodes_q); + FPRINTF_SERVER(stderr, "Problem size (-n): %lld\n", n); + FPRINTF_SERVER(stderr, "Maximum number of iterations (-maxiter): %d\n", i_max); + FPRINTF_SERVER(stderr, "Number of blocks (-nblocks): %u\n", nblocks); + FPRINTF_SERVER(stderr, "Reduction (-no-reduction): %s\n", use_reduction ? "enabled" : "disabled"); + + starpu_mpi_barrier(MPI_COMM_WORLD); + start = starpu_timing_now(); + generate_random_problem(); + register_data(); + starpu_mpi_barrier(MPI_COMM_WORLD); + end = starpu_timing_now(); + + FPRINTF_SERVER(stderr, "Problem initialization timing : %2.2f seconds\n", (end-start)/1e6); + + ret = cg(); + if (ret == -ENODEV) + { + ret = 77; + goto enodev; + } + + starpu_task_wait_for_all(); + + if (display_result) + { + display_x_result(); + } + +enodev: + unregister_data(); + free_data(); + starpu_cublas_shutdown(); + starpu_mpi_shutdown(); + return ret; +} diff --git a/mpi/examples/comm/comm.c b/mpi/examples/comm/comm.c new file mode 100644 index 0000000..f0844d2 --- /dev/null +++ b/mpi/examples/comm/comm.c @@ -0,0 +1,158 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * This example splits the whole set of communicators in subgroups, + * all communications take place within each subgroups + */ + +#include +#include "../helper.h" + +#define DATA0_TAG 12 +#define DATA1_TAG 22 + +MPI_Comm newcomm; + +void func_cpu(void *descr[], void *_args) +{ + int *value = (int *)STARPU_VARIABLE_GET_PTR(descr[0]); + int rank; + + starpu_codelet_unpack_args(_args, &rank); + FPRINTF_MPI_COMM(stderr, newcomm, "Executing codelet with value %d and rank %d\n", *value, rank); + STARPU_ASSERT_MSG(*value == rank, "Received value %d is not the expected value %d\n", *value, rank); +} + +struct starpu_codelet mycodelet = +{ + .cpu_funcs = {func_cpu}, + .nbuffers = 1, + .modes = {STARPU_RW}, + .model = &starpu_perfmodel_nop, +}; + +int main(int argc, char **argv) +{ + int size, x=789; + int color; + int rank, newrank; + int ret; + starpu_data_handle_t data[2]; + int thread_support; + + if (MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &thread_support) != MPI_SUCCESS) + { + fprintf(stderr,"MPI_Init_thread failed\n"); + exit(1); + } + if (thread_support == MPI_THREAD_FUNNELED) + fprintf(stderr,"Warning: MPI only has funneled thread support, not serialized, hoping this will work\n"); + if (thread_support < MPI_THREAD_FUNNELED) + fprintf(stderr,"Warning: MPI does not have thread support!\n"); + + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &size); + + if (size < 4) + { + FPRINTF(stderr, "We need at least 4 processes.\n"); + MPI_Finalize(); + return STARPU_TEST_SKIPPED; + } + + color = rank%2; + MPI_Comm_split(MPI_COMM_WORLD, color, rank, &newcomm); + MPI_Comm_rank(newcomm, &newrank); + FPRINTF(stderr, "[%d][%d] color %d\n", rank, newrank, color); + + if (newrank == 0) + { + FPRINTF(stderr, "[%d][%d] sending %d\n", rank, newrank, rank); + MPI_Send(&rank, 1, MPI_INT, 1, 10, newcomm); + } + else if (newrank == 1) + { + MPI_Recv(&x, 1, MPI_INT, 0, 10, newcomm, MPI_STATUS_IGNORE); + FPRINTF(stderr, "[%d][%d] received %d\n", rank, newrank, x); + } + + ret = starpu_mpi_init_conf(NULL, NULL, 0, newcomm, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + + if (newrank == 0) + { + starpu_variable_data_register(&data[0], STARPU_MAIN_RAM, (uintptr_t)&rank, sizeof(int)); + starpu_variable_data_register(&data[1], STARPU_MAIN_RAM, (uintptr_t)&rank, sizeof(int)); + starpu_mpi_data_register_comm(data[1], DATA1_TAG, 0, newcomm); + } + else + starpu_variable_data_register(&data[0], -1, (uintptr_t)NULL, sizeof(int)); + starpu_mpi_data_register_comm(data[0], DATA0_TAG, 0, newcomm); + + if (newrank == 0) + { + starpu_mpi_req req[2]; + ret = starpu_mpi_issend(data[1], &req[0], 1, DATA1_TAG, newcomm); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_issend"); + ret = starpu_mpi_isend(data[0], &req[1], 1, DATA0_TAG, newcomm); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend"); + ret = starpu_mpi_wait(&req[0], MPI_STATUS_IGNORE); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_wait"); + ret = starpu_mpi_wait(&req[1], MPI_STATUS_IGNORE); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_wait"); + } + else if (newrank == 1) + { + int *xx; + + ret = starpu_mpi_recv(data[0], 0, DATA0_TAG, newcomm, MPI_STATUS_IGNORE); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); + starpu_data_acquire(data[0], STARPU_R); + xx = (int *)starpu_variable_get_local_ptr(data[0]); + FPRINTF(stderr, "[%d][%d] received %d\n", rank, newrank, *xx); + STARPU_ASSERT_MSG(x==*xx, "Received value %d is incorrect (should be %d)\n", *xx, x); + starpu_data_release(data[0]); + + starpu_variable_data_register(&data[1], -1, (uintptr_t)NULL, sizeof(int)); + starpu_mpi_data_register_comm(data[1], DATA1_TAG, 0, newcomm); + ret = starpu_mpi_recv(data[0], 0, DATA1_TAG, newcomm, MPI_STATUS_IGNORE); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); + starpu_data_acquire(data[0], STARPU_R); + xx = (int *)starpu_variable_get_local_ptr(data[0]); + FPRINTF(stderr, "[%d][%d] received %d\n", rank, newrank, *xx); + STARPU_ASSERT_MSG(x==*xx, "Received value %d is incorrect (should be %d)\n", *xx, x); + starpu_data_release(data[0]); + } + + if (newrank == 0 || newrank == 1) + { + starpu_mpi_task_insert(newcomm, &mycodelet, + STARPU_RW, data[0], + STARPU_VALUE, &x, sizeof(x), + STARPU_EXECUTE_ON_NODE, 1, + 0); + + starpu_task_wait_for_all(); + starpu_data_unregister(data[0]); + starpu_data_unregister(data[1]); + } + + starpu_mpi_shutdown_comm(newcomm); + MPI_Comm_free(&newcomm); + MPI_Finalize(); + return 0; +} diff --git a/mpi/examples/comm/group.c b/mpi/examples/comm/group.c new file mode 100644 index 0000000..77f62bc --- /dev/null +++ b/mpi/examples/comm/group.c @@ -0,0 +1,132 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2023-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../helper.h" + +int main(int argc, char **argv) +{ +#ifdef STARPU_HAVE_MPI_COMM_CREATE_GROUP + int thread_support; + + if (MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &thread_support) != MPI_SUCCESS) + { + fprintf(stderr,"MPI_Init_thread failed\n"); + exit(1); + } + if (thread_support == MPI_THREAD_FUNNELED) + fprintf(stderr,"Warning: MPI only has funneled thread support, not serialized, hoping this will work\n"); + if (thread_support < MPI_THREAD_FUNNELED) + fprintf(stderr,"Warning: MPI does not have thread support!\n"); + + int world_rank, world_size; + MPI_Comm_rank(MPI_COMM_WORLD, &world_rank); + MPI_Comm_size(MPI_COMM_WORLD, &world_size); + + if (world_size < 4) + { + FPRINTF(stderr, "We need at least 4 processes.\n"); + MPI_Finalize(); + return (world_rank==0) ? STARPU_TEST_SKIPPED : 0; + } + + // create a new communicator with the even ranks processes + int ranks[world_size/2]; + int pos,n; + for(pos=0,n=0 ; pos +#include "../helper.h" + +MPI_Comm newcomm; + +void func_cpu(void *descr[], void *_args) +{ + int *value = (int *)STARPU_VARIABLE_GET_PTR(descr[0]); + int rank; + + starpu_codelet_unpack_args(_args, &rank); + FPRINTF_MPI_COMM(stderr, newcomm, "Executing codelet with value %d and rank %d\n", *value, rank); + STARPU_ASSERT_MSG(*value == rank, "Received value %d is not the expected value %d\n", *value, rank); +} + +struct starpu_codelet mycodelet = +{ + .cpu_funcs = {func_cpu}, + .nbuffers = 1, + .modes = {STARPU_RW}, + .model = &starpu_perfmodel_nop, +}; + +int main(int argc, char **argv) +{ + int size, x; + int color; + int rank, newrank; + int ret; + starpu_data_handle_t data[3]; + int value = 90; + int thread_support; + if (MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &thread_support) != MPI_SUCCESS) + { + fprintf(stderr,"MPI_Init_thread failed\n"); + exit(1); + } + if (thread_support == MPI_THREAD_FUNNELED) + fprintf(stderr,"Warning: MPI only has funneled thread support, not serialized, hoping this will work\n"); + if (thread_support < MPI_THREAD_FUNNELED) + fprintf(stderr,"Warning: MPI does not have thread support!\n"); + + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &size); + + if (size < 4) + { + FPRINTF(stderr, "We need at least 4 processes.\n"); + MPI_Finalize(); + return STARPU_TEST_SKIPPED; + } + + color = rank%2; + MPI_Comm_split(MPI_COMM_WORLD, color, rank, &newcomm); + MPI_Comm_rank(newcomm, &newrank); + FPRINTF(stderr, "[%d][%d] color %d\n", rank, newrank, color); + + if (newrank == 0) + { + FPRINTF(stderr, "[%d][%d] sending %d\n", rank, newrank, rank); + MPI_Send(&rank, 1, MPI_INT, 1, 10, newcomm); + } + else if (newrank == 1) + { + MPI_Recv(&x, 1, MPI_INT, 0, 10, newcomm, MPI_STATUS_IGNORE); + FPRINTF(stderr, "[%d][%d] received %d\n", rank, newrank, x); + } + + ret = starpu_mpi_init_conf(NULL, NULL, 0, MPI_COMM_WORLD, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + + ret = starpu_mpi_comm_register(newcomm); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_comm_register"); + + if (rank == 0) + { + starpu_variable_data_register(&data[2], STARPU_MAIN_RAM, (uintptr_t)&value, sizeof(int)); + } + else + starpu_variable_data_register(&data[2], -1, (uintptr_t)NULL, sizeof(int)); + starpu_mpi_data_register_comm(data[2], 44, 0, MPI_COMM_WORLD); + + if (newrank == 0) + { + starpu_variable_data_register(&data[0], STARPU_MAIN_RAM, (uintptr_t)&rank, sizeof(int)); + starpu_variable_data_register(&data[1], STARPU_MAIN_RAM, (uintptr_t)&rank, sizeof(int)); + starpu_mpi_data_register_comm(data[1], 22, 0, newcomm); + } + else + starpu_variable_data_register(&data[0], -1, (uintptr_t)NULL, sizeof(int)); + starpu_mpi_data_register_comm(data[0], 12, 0, newcomm); + + if (newrank == 0) + { + starpu_mpi_req req[2]; + ret = starpu_mpi_issend(data[1], &req[0], 1, 22, newcomm); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_issend"); + ret = starpu_mpi_isend(data[0], &req[1], 1, 12, newcomm); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend"); + ret = starpu_mpi_wait(&req[0], MPI_STATUS_IGNORE); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_wait"); + ret = starpu_mpi_wait(&req[1], MPI_STATUS_IGNORE); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_wait"); + } + else if (newrank == 1) + { + int *xx; + + ret = starpu_mpi_recv(data[0], 0, 12, newcomm, MPI_STATUS_IGNORE); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); + starpu_data_acquire(data[0], STARPU_R); + xx = (int *)starpu_variable_get_local_ptr(data[0]); + FPRINTF(stderr, "[%d][%d] received %d\n", rank, newrank, *xx); + STARPU_ASSERT_MSG(x==*xx, "Received value %d is incorrect (should be %d)\n", *xx, x); + starpu_data_release(data[0]); + + starpu_variable_data_register(&data[1], -1, (uintptr_t)NULL, sizeof(int)); + starpu_mpi_data_register_comm(data[1], 22, 0, newcomm); + ret = starpu_mpi_recv(data[0], 0, 22, newcomm, MPI_STATUS_IGNORE); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); + starpu_data_acquire(data[0], STARPU_R); + xx = (int *)starpu_variable_get_local_ptr(data[0]); + FPRINTF(stderr, "[%d][%d] received %d\n", rank, newrank, *xx); + STARPU_ASSERT_MSG(x==*xx, "Received value %d is incorrect (should be %d)\n", *xx, x); + starpu_data_release(data[0]); + } + + if (rank == 0) + { + starpu_data_acquire(data[2], STARPU_R); + int rvalue = *((int *)starpu_variable_get_local_ptr(data[2])); + starpu_data_release(data[2]); + FPRINTF_MPI_COMM(stderr, MPI_COMM_WORLD, "sending value %d to %d and receiving from %d\n", rvalue, 1, size-1); + ret = starpu_mpi_send(data[2], 1, 44, MPI_COMM_WORLD); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send"); + ret = starpu_mpi_recv(data[2], size-1, 44, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); + starpu_data_acquire(data[2], STARPU_R); + int *xx = (int *)starpu_variable_get_local_ptr(data[2]); + FPRINTF_MPI_COMM(stderr, MPI_COMM_WORLD, "Value back is %d\n", *xx); + STARPU_ASSERT_MSG(*xx == rvalue + (2*(size-1)), "Received value %d is incorrect (should be %d)\n", *xx, rvalue + (2*(size-1))); + starpu_data_release(data[2]); + } + else + { + int next = (rank == size-1) ? 0 : rank+1; + ret = starpu_mpi_recv(data[2], rank-1, 44, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); + starpu_data_acquire(data[2], STARPU_RW); + int *xx = (int *)starpu_variable_get_local_ptr(data[2]); + FPRINTF_MPI_COMM(stderr, MPI_COMM_WORLD, "receiving %d from %d and sending %d to %d\n", *xx, rank-1, *xx+2, next); + *xx = *xx + 2; + starpu_data_release(data[2]); + ret = starpu_mpi_send(data[2], next, 44, MPI_COMM_WORLD); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send"); + } + + if (newrank == 0 || newrank == 1) + { + starpu_mpi_task_insert(newcomm, &mycodelet, + STARPU_RW, data[0], + STARPU_VALUE, &x, sizeof(x), + STARPU_EXECUTE_ON_NODE, 1, + 0); + + starpu_task_wait_for_all(); + starpu_data_unregister(data[0]); + starpu_data_unregister(data[1]); + } + starpu_data_unregister(data[2]); + + starpu_mpi_shutdown(); + MPI_Comm_free(&newcomm); + MPI_Finalize(); + return 0; +} diff --git a/mpi/examples/complex/mpi_complex.c b/mpi/examples/complex/mpi_complex.c new file mode 100644 index 0000000..55a97d8 --- /dev/null +++ b/mpi/examples/complex/mpi_complex.c @@ -0,0 +1,142 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +void display_foo_codelet(void *descr[], void *_args) +{ + (void)_args; + int *foo = (int *)STARPU_VARIABLE_GET_PTR(descr[0]); + FPRINTF(stderr, "foo = %d\n", *foo); +} + +struct starpu_codelet foo_display = +{ + .cpu_funcs = {display_foo_codelet}, + .nbuffers = 1, + .modes = {STARPU_R}, + .model = &starpu_perfmodel_nop, +}; + +int main(int argc, char **argv) +{ + int rank, nodes; + int ret; + int compare=0; + + ret = starpu_mpi_init_conf(&argc, &argv, 1, MPI_COMM_WORLD, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); + starpu_mpi_comm_size(MPI_COMM_WORLD, &nodes); + + if (nodes < 2 || (starpu_cpu_worker_get_count() == 0)) + { + if (rank == 0) + { + if (nodes < 2) + fprintf(stderr, "We need at least 2 processes.\n"); + else + fprintf(stderr, "We need at least 1 CPU.\n"); + } + starpu_mpi_shutdown(); + return 77; + } + + starpu_data_handle_t handle; + starpu_data_handle_t handle2; + + double real[2] = {4.0, 2.0}; + double imaginary[2] = {7.0, 9.0}; + + double real2[2] = {14.0, 12.0}; + double imaginary2[2] = {17.0, 19.0}; + + if (rank == 1) + { + real[0] = 0.0; + real[1] = 0.0; + imaginary[0] = 0.0; + imaginary[1] = 0.0; + } + + starpu_complex_data_register(&handle, STARPU_MAIN_RAM, real, imaginary, 2); + starpu_complex_data_register(&handle2, -1, real2, imaginary2, 2); + + // Ping-pong + if (rank == 0) + { + int *compare_ptr = &compare; + + ret = starpu_task_insert(&cl_display, STARPU_VALUE, "node0 initial value", strlen("node0 initial value")+1, STARPU_R, handle, 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + ret = starpu_mpi_isend_detached(handle, 1, 10, MPI_COMM_WORLD, NULL, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend_detached"); + ret = starpu_mpi_irecv_detached(handle2, 1, 20, MPI_COMM_WORLD, NULL, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_irecv_detached"); + + ret = starpu_task_insert(&cl_display, STARPU_VALUE, "node0 received value", strlen("node0 received value")+1, STARPU_R, handle2, 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + ret = starpu_task_insert(&cl_compare, STARPU_R, handle, STARPU_R, handle2, STARPU_VALUE, &compare_ptr, sizeof(compare_ptr), 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + } + else if (rank == 1) + { + ret = starpu_mpi_irecv_detached(handle, 0, 10, MPI_COMM_WORLD, NULL, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_irecv_detached"); + ret = starpu_task_insert(&cl_display, STARPU_VALUE, "node1 received value", strlen("node1 received value")+1, STARPU_R, handle, 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + ret = starpu_mpi_isend_detached(handle, 0, 20, MPI_COMM_WORLD, NULL, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend_detached"); + } + + // Ping + if (rank == 0) + { + starpu_data_handle_t xhandle; + double xreal = 4.0; + double ximaginary = 8.0; + starpu_complex_data_register(&xhandle, STARPU_MAIN_RAM, &xreal, &ximaginary, 1); + ret = starpu_mpi_send(xhandle, 1, 30, MPI_COMM_WORLD); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send"); + starpu_data_unregister(xhandle); + } + else if (rank == 1) + { + MPI_Status status; + starpu_data_handle_t xhandle; + double xreal = 14.0; + double ximaginary = 18.0; + starpu_complex_data_register(&xhandle, STARPU_MAIN_RAM, &xreal, &ximaginary, 1); + ret = starpu_mpi_recv(xhandle, 0, 30, MPI_COMM_WORLD, &status); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); + starpu_data_unregister(xhandle); + FPRINTF(stderr, "[received] real %f imaginary %f\n", xreal, ximaginary); + STARPU_ASSERT_MSG(xreal == 4 && ximaginary == 8, "Incorrect received value\n"); + } + + starpu_task_wait_for_all(); + + starpu_data_unregister(handle); + starpu_data_unregister(handle2); + + starpu_mpi_shutdown(); + + return (rank == 0) ? !compare : 0; +} diff --git a/mpi/examples/filters/filter.c b/mpi/examples/filters/filter.c new file mode 100644 index 0000000..daa5e14 --- /dev/null +++ b/mpi/examples/filters/filter.c @@ -0,0 +1,171 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * This examplifies how to declare a new filter function. + */ + +#include + +#define NX 20 + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +void cpu_func(void *buffers[], void *cl_arg) +{ + unsigned i; + int factor; + int rank; + + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); + fprintf(stderr, "computing on rank %d\n", rank); + unsigned n = STARPU_VECTOR_GET_NX(buffers[0]); + int *val = (int *)STARPU_VECTOR_GET_PTR(buffers[0]); + starpu_codelet_unpack_args(cl_arg, &factor); + + for (i = 0; i < n; i++) + val[i] *= factor; +} + +struct starpu_codelet cl = +{ + .cpu_funcs = {cpu_func}, + .cpu_funcs_name = {"cpu_func"}, + .nbuffers = 1, + .modes = {STARPU_RW}, + .name = "vector_scal" +}; + +void vector_filter(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nchunks) +{ + struct starpu_vector_interface *vector_father = (struct starpu_vector_interface *) father_interface; + struct starpu_vector_interface *vector_child = (struct starpu_vector_interface *) child_interface; + + uint32_t nx = vector_father->nx; + size_t elemsize = vector_father->elemsize; + + STARPU_ASSERT_MSG(nchunks <= nx, "%u parts for %u elements", nchunks, nx); + STARPU_ASSERT(nchunks == 2); + STARPU_ASSERT_MSG((nx % nchunks) == 0, "nx=%u is not a multiple of nchunks %u\n", nx, nchunks); + + vector_child->id = vector_father->id; + vector_child->nx = nx/2; + vector_child->elemsize = elemsize; + vector_child->allocsize = vector_child->nx * elemsize; + + if (vector_father->dev_handle) + { + size_t offset = (id *(nx/nchunks)) * elemsize; + if (vector_father->ptr) vector_child->ptr = vector_father->ptr + offset; + vector_child->dev_handle = vector_father->dev_handle; + vector_child->offset = vector_father->offset + offset; + } +} + +int main(int argc, char **argv) +{ + int i, rank, nodes; + int vector[NX]; + int vector_check[NX]; + starpu_data_handle_t vhandle; + starpu_data_handle_t handles[2]; + int factor[2] = {2, 3}; + int ret; + + ret = starpu_mpi_init_conf(&argc, &argv, 1, MPI_COMM_WORLD, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); + starpu_mpi_comm_size(MPI_COMM_WORLD, &nodes); + + if (nodes < 2 || (starpu_cpu_worker_get_count() == 0)) + { + if (rank == 0) + { + if (nodes < 2) + fprintf(stderr, "We need at least 2 processes.\n"); + else + fprintf(stderr, "We need at least 1 CPU.\n"); + } + starpu_mpi_shutdown(); + return 77; + } + + for(i=0 ; i +#include + +#ifdef STARPU_HAVE_VALGRIND_H +#include +#endif + +#ifdef STARPU_HAVE_HELGRIND_H +#include +#endif + +#define STARPU_TEST_SKIPPED 77 + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) +#define FPRINTF_MPI(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) { \ + int _disp_rank; starpu_mpi_comm_rank(MPI_COMM_WORLD, &_disp_rank); \ + fprintf(ofile, "[%d][starpu_mpi][%s] " fmt , _disp_rank, __starpu_func__ ,## __VA_ARGS__); \ + fflush(ofile); }} while(0); +#define FPRINTF_MPI_COMM(ofile, comm, fmt, ...) do { if (!getenv("STARPU_SSILENT")) { \ + int _disp_rank; starpu_mpi_comm_rank(comm, &_disp_rank); \ + fprintf(ofile, "[%d][starpu_mpi][%s] " fmt , _disp_rank, __starpu_func__ ,## __VA_ARGS__); \ + fflush(ofile); }} while(0); + diff --git a/mpi/examples/loader.c b/mpi/examples/loader.c new file mode 100644 index 0000000..804797d --- /dev/null +++ b/mpi/examples/loader.c @@ -0,0 +1,505 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if defined(_WIN32) && !defined(__MINGW32__) && !defined(__CYGWIN__) +#include +#else +#include +#endif + +#ifdef STARPU_QUICK_CHECK +/* Quick checks are supposed to be real quick, typically less than 1s each, sometimes 10s + add some extra times for tests which run with all schedulers +*/ +#define DEFAULT_TIMEOUT 100 +#elif !defined(STARPU_LONG_CHECK) +/* Normal checks are supposed to be short enough, typically less than 10s each, sometimes 1-2m */ +#define DEFAULT_TIMEOUT 300 +#else +/* Long checks can be very long */ +#define DEFAULT_TIMEOUT 1000 +#endif +#define AUTOTEST_SKIPPED_TEST 77 + +static pid_t child_pid = 0; +static int timeout; + +#if defined(_WIN32) && !defined(__MINGW32__) && !defined(__CYGWIN__) +static int mygettimeofday(struct timeval *tv, void *tz) +{ + if (tv) + { + FILETIME ft; + unsigned long long res; + GetSystemTimeAsFileTime(&ft); + /* 100-nanosecond intervals since January 1, 1601 */ + res = ft.dwHighDateTime; + res <<= 32; + res |= ft.dwLowDateTime; + res /= 10; + /* Now we have microseconds */ + res -= (((1970-1601)*365) + 89) * 24ULL * 3600ULL * 1000000ULL; + /* Now we are based on epoch */ + tv->tv_sec = res / 1000000ULL; + tv->tv_usec = res % 1000000ULL; + } +} +#else +#define mygettimeofday(tv,tz) gettimeofday(tv,tz) +#endif + +#ifdef STARPU_GDB_PATH +static int try_launch_gdb(const char *exe, const char *core) +{ +# define GDB_COMMANDS \ + "-ex", "py-list", \ + "-ex", "starpu-tasks", \ + "-ex", "starpu-workers", \ + "-ex", "starpu-print-datas-summary", \ + "-ex", "starpu-memusage", \ + "-ex", "starpu-print-archs", \ + "-ex", "starpu-print-registered-models", \ + "-ex", "bt full", \ + "-ex", "py-bt", \ + "-ex", "thread apply all bt full", \ + "-ex", "thread apply all py-bt", \ + + int err; + pid_t pid; + struct stat st; + const char *top_builddir; + char *gdb; + + err = stat(core, &st); + if (err != 0) + { + fprintf(stderr, "while looking for core file of %s: %s: %m\n", + exe, core); + return -1; + } + + if (!(st.st_mode & S_IFREG)) + { + fprintf(stderr, "%s: not a regular file\n", core); + return -1; + } + + top_builddir = getenv("top_builddir"); + + pid = fork(); + switch (pid) + { + case 0: /* kid */ + if (top_builddir != NULL) + { + /* Run gdb with Libtool. */ + gdb = alloca(strlen(top_builddir) + + sizeof("/libtool") + 1); + strcpy(gdb, top_builddir); + strcat(gdb, "/libtool"); + err = execl(gdb, "gdb", "--mode=execute", + STARPU_GDB_PATH, "--batch", + GDB_COMMANDS + exe, core, NULL); + } + else + { + /* Run gdb directly */ + gdb = STARPU_GDB_PATH; + err = execl(gdb, "gdb", "--batch", + GDB_COMMANDS + exe, core, NULL); + } + if (err != 0) + { + fprintf(stderr, "while launching `%s': %m\n", gdb); + exit(EXIT_FAILURE); + } + exit(EXIT_SUCCESS); + break; + + case -1: + fprintf(stderr, "fork: %m\n"); + return -1; + + default: /* parent */ + { + pid_t who; + int status; + who = waitpid(pid, &status, 0); + if (who != pid) + fprintf(stderr, "while waiting for gdb " + "process %d: %m\n", pid); + } + } + return 0; +# undef GDB_COMMANDS +} +#endif /* STARPU_GDB_PATH */ + +static void launch_gdb(const char *exe) +{ +#ifdef STARPU_GDB_PATH + char s[32]; + snprintf(s, sizeof(s), "core.%d", child_pid); + if (try_launch_gdb(exe, s) < 0) + try_launch_gdb(exe, "core"); +#endif /* STARPU_GDB_PATH */ +} + +static char *test_name; + +static void test_cleaner(int sig) +{ + pid_t child_gid; + int status; + (void) sig; + + // send signal to all loader family members + fprintf(stderr, "[error] test %s has been blocked for %d seconds. Mark it as failed\n", test_name, timeout); + child_gid = getpgid(child_pid); + kill(-child_gid, SIGQUIT); + waitpid(child_pid, &status, 0); + launch_gdb(test_name); + raise(SIGALRM); + exit(EXIT_FAILURE); +} + +static void forwardsig(int sig) +{ + pid_t child_gid; + child_gid = getpgid(child_pid); + kill(-child_gid, sig); +} + +static int _decode(char **src, char *motif, const char *value) +{ + char *found; + + found = strstr(*src, motif); + if (found == NULL) return 0; + + char *new_src = calloc(1, strlen(*src)-strlen(motif)+strlen(value)+1); + + strncpy(new_src, *src, found - *src); + strcat(new_src, value); + strcat(new_src, found+strlen(motif)); + + *src = new_src; + return 1; +} + +static void decode(char **src, char *motif, const char *value) +{ + if (*src) + { + if (strstr(*src, motif) && value == NULL) + { + fprintf(stderr, "error: $%s undefined\n", motif); + exit(EXIT_FAILURE); + } + int d = _decode(src, motif, value); + while (d) + d = _decode(src, motif, value); + } +} + +int main(int argc, char *argv[]) +{ + int child_exit_status; + char *test_args; + char *launcher; + char *launcher_args; + char *libtool; + char *cflags; + const char *top_builddir = getenv("top_builddir"); + struct sigaction sa; + int ret; + struct timeval start; + struct timeval end; + double timing; + int x=1; + int asan = 0, lsan = 0, tsan = 0, usan = 0; + + (void) argc; + test_args = NULL; + timeout = 0; + + launcher=getenv("STARPU_CHECK_LAUNCHER"); + launcher_args=getenv("STARPU_CHECK_LAUNCHER_ARGS"); + cflags = getenv("CFLAGS"); + if (cflags) + { + if (strstr(cflags, "-fsanitize=address")) + asan = 1; + if (strstr(cflags, "-fsanitize=leak")) + lsan = 1; + if (strstr(cflags, "-fsanitize=thread")) + tsan = 1; + if (strstr(cflags, "-fsanitize=undefined")) + usan = 1; + } + + if (argv[x] && strcmp(argv[x], "-t") == 0) + { + timeout = strtol(argv[x+1], NULL, 10); + x += 2; + } + else if (getenv("STARPU_TIMEOUT_ENV")) + { + /* get user-defined iter_max value */ + timeout = strtol(getenv("STARPU_TIMEOUT_ENV"), NULL, 10); + } + else if (timeout <= 0) + { + timeout = DEFAULT_TIMEOUT; + if ((launcher && strstr(launcher, "valgrind")) || + (launcher && strstr(launcher, "helgrind")) || + tsan) + timeout *= 20; + if (asan || usan || lsan || + (launcher && strstr(launcher, "compute-sanitizer"))) + timeout *= 5; + + if (timeout > 1750) + timeout = 1750; + } + +#ifdef STARPU_SIMGRID +#ifdef STARPU_DEBUG + timeout *= 20; +#endif +#endif + +#ifdef STARPU_USE_MPI_MASTER_SLAVE + /* compare values between the 2 values of timeout */ + if (getenv("MPIEXEC_TIMEOUT")) + { + int mpiexec_timeout = strtol(getenv("MPIEXEC_TIMEOUT"), NULL, 10); + if (mpiexec_timeout != timeout) + fprintf(stderr, "[warning] MPIEXEC_TIMEOUT and STARPU_TIMEOUT_ENV values are different (%d and %d). The behavior may be different than expected !\n", mpiexec_timeout, timeout); + } +#endif + + if (argv[x] && strcmp(argv[x], "-p") == 0) + { + test_name = malloc(strlen(argv[x+1]) + 1 + strlen(argv[x+2]) + 1); + sprintf(test_name, "%s/%s", argv[x+1], argv[x+2]); + x += 3; + } + else + { + test_name = argv[x]; + x += 1; + } + + if (!test_name) + { + fprintf(stderr, "[error] Need name of program to start\n"); + exit(EXIT_FAILURE); + } + + size_t len = strlen(test_name); + if (len >= 3 && + test_name[len-3] == '.' && + test_name[len-2] == 's' && + test_name[len-1] == 'h') + { + /* This is a shell script, don't run ourself on bash, but make + * the script call us for each program invocation */ + + char *launch = NULL; + if (top_builddir == NULL) + // this may fail if .libs is in the directory path + setenv("STARPU_LAUNCH", argv[0], 1); + else + { + launch = malloc(strlen(top_builddir) + strlen("/tests/loader") + 1); + strcpy(launch, top_builddir); + strcat(launch, "/tests/loader"); + setenv("STARPU_LAUNCH", launch, 1); + } + + execvp(test_name, argv+x-1); + + fprintf(stderr, "[error] '%s' failed to exec. test marked as failed\n", test_name); + free(launch); + exit(EXIT_FAILURE); + } + + if (strstr(test_name, "spmv/dw_block_spmv")) + { + test_args = (char *) calloc(512, sizeof(char)); + snprintf(test_args, 512, "%s/examples/spmv/matrix_market/examples/fidapm05.mtx", STARPU_SRC_DIR); + } + else if (strstr(test_name, "starpu_perfmodel_display")) + { + if (x >= argc) + test_args = strdup("-l"); + } + else if (strstr(test_name, "starpu_perfmodel_plot")) + { + if (x >= argc) + test_args = strdup("-l"); + } + + /* get launcher program */ + if (launcher_args) + launcher_args=strdup(launcher_args); + + if (top_builddir == NULL) + { + fprintf(stderr, + "warning: $top_builddir undefined, " + "so $STARPU_CHECK_LAUNCHER ignored\n"); + launcher = NULL; + launcher_args = NULL; + libtool = NULL; + } + else + { + libtool = malloc(strlen(top_builddir) + 1 + strlen("libtool") + 1); + strcpy(libtool, top_builddir); + strcat(libtool, "/libtool"); + } + + if (launcher) + { + const char *top_srcdir = getenv("top_srcdir"); + decode(&launcher, "@top_srcdir@", top_srcdir); + decode(&launcher_args, "@top_srcdir@", top_srcdir); + } + + setenv("STARPU_OPENCL_PROGRAM_DIR", STARPU_SRC_DIR, 1); + + /* set SIGALARM handler */ + sa.sa_flags = SA_RESETHAND | SA_NODEFER; + sigemptyset(&sa.sa_mask); + sa.sa_handler = test_cleaner; + if (-1 == sigaction(SIGALRM, &sa, NULL)) + perror("sigaction"); + + signal(SIGINT, forwardsig); + signal(SIGHUP, forwardsig); + signal(SIGPIPE, forwardsig); + signal(SIGTERM, forwardsig); + + child_pid = fork(); + if (child_pid == 0) + { + char *launcher_argv[100]; + int i=0; + + setpgid(0, 0); + + /* "Launchers" such as Valgrind need to be inserted + * after the Libtool-generated wrapper scripts, hence + * this special-case. */ + if (launcher && top_builddir != NULL) + { + launcher_argv[i++] = libtool; + launcher_argv[i++] = "--mode=execute"; + launcher_argv[i++] = launcher; + if (launcher_args) + { + launcher_argv[i++] = strtok(launcher_args, " "); + while (launcher_argv[i-1]) + { + launcher_argv[i++] = strtok(NULL, " "); + } + } + } + + launcher_argv[i++] = test_name; + if (test_args) + launcher_argv[i++] = test_args; + else while (argv[x]) + { + launcher_argv[i++] = argv[x++]; + } +#ifdef STARPU_SIMGRID +#ifdef STARPU_DEBUG + launcher_argv[i++] = "--cfg=contexts/factory:thread"; +#endif +#endif + launcher_argv[i++] = NULL; + execvp(*launcher_argv, launcher_argv); + + fprintf(stderr, "[error] '%s' failed to exec. test marked as failed\n", test_name); + exit(EXIT_FAILURE); + } + if (child_pid == -1) + { + fprintf(stderr, "[error] fork. test marked as failed\n"); + exit(EXIT_FAILURE); + } + free(test_args); + free(libtool); + + ret = EXIT_SUCCESS; + gettimeofday(&start, NULL); + alarm(timeout); + if (child_pid == waitpid(child_pid, &child_exit_status, 0)) + { + if (WIFEXITED(child_exit_status)) + { + int status = WEXITSTATUS(child_exit_status); + if (status == EXIT_SUCCESS) + { + alarm(0); + } + else + { + if (status != AUTOTEST_SKIPPED_TEST) + fprintf(stdout, "`%s' exited with return code %d\n", + test_name, status); + ret = status; + } + } + else if (WIFSIGNALED(child_exit_status)) + { + fprintf(stderr, "[error] `%s' killed with signal %d; test marked as failed\n", + test_name, WTERMSIG(child_exit_status)); + launch_gdb(test_name); + ret = EXIT_FAILURE; + } + else + { + fprintf(stderr, "[error] `%s' did not terminate normally; test marked as failed\n", + test_name); + ret = EXIT_FAILURE; + } + } + + gettimeofday(&end, NULL); + timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec)); + fprintf(stderr, "#Execution_time_in_seconds %f %s\n", timing/1000000, test_name); + + return ret; +} diff --git a/mpi/examples/matrix_decomposition/mpi_cholesky.c b/mpi/examples/matrix_decomposition/mpi_cholesky.c new file mode 100644 index 0000000..fba88ae --- /dev/null +++ b/mpi/examples/matrix_decomposition/mpi_cholesky.c @@ -0,0 +1,87 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2010-2010 Mehdi Juhoor + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "mpi_cholesky.h" +#include "helper.h" + +int main(int argc, char **argv) +{ + /* create a simple definite positive symmetric matrix example + * + * Hilbert matrix : h(i,j) = 1/(i+j+1) + * */ + + float ***bmat; + int rank, nodes, ret; + double timing, flops; +#ifndef STARPU_SIMGRID + int correctness; +#endif + + ret = starpu_mpi_init_conf(&argc, &argv, 1, MPI_COMM_WORLD, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); + starpu_mpi_comm_size(MPI_COMM_WORLD, &nodes); + starpu_cublas_init(); + + if (starpu_cpu_worker_get_count() + starpu_cuda_worker_get_count() == 0) + { + if (rank == 0) + { + FPRINTF(stderr, "We need at least 1 CPU or CUDA worker.\n"); + } + starpu_mpi_shutdown(); + return STARPU_TEST_SKIPPED; + } + + parse_args(argc, argv, nodes); + + if (checkpoint_enabled) + starpu_mpi_checkpoint_init(); + + matrix_init(&bmat, rank, nodes, 1); + matrix_display(bmat, rank, nodes); + + dw_cholesky(bmat, size/nblocks, rank, nodes, &timing, &flops); + +#ifndef STARPU_SIMGRID + matrix_display(bmat, rank, nodes); + + if (check && rank == 0) + dw_cholesky_check_computation(bmat, rank, nodes, &correctness, &flops, 0.001); +#endif + + matrix_free(&bmat, rank, nodes, 1); + + starpu_cublas_shutdown(); + if (checkpoint_enabled) + starpu_mpi_checkpoint_shutdown(); + starpu_mpi_shutdown(); + +#ifndef STARPU_SIMGRID + if (check && rank == 0) + assert(correctness); +#endif + + if (rank == 0) + { + FPRINTF(stdout, "Computation time (in ms): %2.2f\n", timing/1000); + FPRINTF(stdout, "Synthetic GFlops : %2.2f\n", (flops/timing/1000.0f)); + } + + return 0; +} diff --git a/mpi/examples/matrix_decomposition/mpi_cholesky.h b/mpi/examples/matrix_decomposition/mpi_cholesky.h new file mode 100644 index 0000000..88e9afe --- /dev/null +++ b/mpi/examples/matrix_decomposition/mpi_cholesky.h @@ -0,0 +1,30 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __MPI_CHOLESKY_H__ +#define __MPI_CHOLESKY_H__ + +#include +#include +#include "mpi_cholesky_codelets.h" +#include "mpi_cholesky_kernels.h" +#include "mpi_cholesky_models.h" +#include "mpi_decomposition_matrix.h" +#include "mpi_decomposition_params.h" + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +#endif // __MPI_CHOLESKY_H__ diff --git a/mpi/examples/matrix_decomposition/mpi_cholesky_codelets.c b/mpi/examples/matrix_decomposition/mpi_cholesky_codelets.c new file mode 100644 index 0000000..c6b49ab --- /dev/null +++ b/mpi/examples/matrix_decomposition/mpi_cholesky_codelets.c @@ -0,0 +1,758 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "mpi_cholesky.h" +#include +#include +#include +#include + +/* This is from magma + + -- Innovative Computing Laboratory + -- Electrical Engineering and Computer Science Department + -- University of Tennessee + -- (C) Copyright 2009 + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the University of Tennessee, Knoxville nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + */ + +#define FMULS_POTRF(__n) ((double)(__n) * (((1. / 6.) * (double)(__n) + 0.5) * (double)(__n) + (1. / 3.))) +#define FADDS_POTRF(__n) ((double)(__n) * (((1. / 6.) * (double)(__n)) * (double)(__n) - (1. / 6.))) + +#define FLOPS_SPOTRF(__n) (FMULS_POTRF((__n)) + FADDS_POTRF((__n))) + +#define FMULS_TRMM_2(__m, __n) (0.5 * (double)(__n) * (double)(__m) * ((double)(__m)+1.)) +#define FADDS_TRMM_2(__m, __n) (0.5 * (double)(__n) * (double)(__m) * ((double)(__m)-1.)) + +#define FMULS_TRMM(__m, __n) (/*((__side) == PlasmaLeft) ? FMULS_TRMM_2((__m), (__n)) :*/ FMULS_TRMM_2((__n), (__m))) +#define FADDS_TRMM(__m, __n) (/*((__side) == PlasmaLeft) ? FADDS_TRMM_2((__m), (__n)) :*/ FADDS_TRMM_2((__n), (__m))) + +#define FMULS_TRSM FMULS_TRMM +#define FADDS_TRSM FMULS_TRMM + +#define FLOPS_STRSM(__m, __n) (FMULS_TRSM((__m), (__n)) + FADDS_TRSM((__m), (__n))) + + +#define FMULS_SYRK(__k, __n) (0.5 * (double)(__k) * (double)(__n) * ((double)(__n)+1.)) +#define FADDS_SYRK(__k, __n) (0.5 * (double)(__k) * (double)(__n) * ((double)(__n)+1.)) + +#define FLOPS_SSYRK(__k, __n) (FMULS_SYRK((__k), (__n)) + FADDS_SYRK((__k), (__n))) + + + +#define FMULS_GEMM(__m, __n, __k) ((double)(__m) * (double)(__n) * (double)(__k)) +#define FADDS_GEMM(__m, __n, __k) ((double)(__m) * (double)(__n) * (double)(__k)) + +#define FLOPS_SGEMM(__m, __n, __k) (FMULS_GEMM((__m), (__n), (__k)) + FADDS_GEMM((__m), (__n), (__k))) + +/* End of magma code */ + +int _nodes; +starpu_mpi_checkpoint_template_t* checkpoint_p; + +int backup_function(int rank) +{ + return (rank/dblockx)*dblockx +(rank+1)%dblockx; +// return (rank+1)%_nodes; +} + +/* + * Create the codelets + */ + +static struct starpu_codelet cl_potrf = +{ + .cpu_funcs = {chol_cpu_codelet_update_potrf}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {chol_cublas_codelet_update_potrf}, +#elif defined(STARPU_SIMGRID) + .cuda_funcs = {(void*)1}, +#endif + .nbuffers = 1, + .modes = {STARPU_RW}, + .model = &chol_model_potrf, + .color = 0xffff00, +}; + +static struct starpu_codelet cl_trsm = +{ + .cpu_funcs = {chol_cpu_codelet_update_trsm}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {chol_cublas_codelet_update_trsm}, +#elif defined(STARPU_SIMGRID) + .cuda_funcs = {(void*)1}, +#endif + .cuda_flags = {STARPU_CUDA_ASYNC}, + .nbuffers = 2, + .modes = {STARPU_R, STARPU_RW}, + .model = &chol_model_trsm, + .color = 0x8080ff, +}; + +static struct starpu_codelet cl_syrk = +{ + .cpu_funcs = {chol_cpu_codelet_update_syrk}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {chol_cublas_codelet_update_syrk}, +#elif defined(STARPU_SIMGRID) + .cuda_funcs = {(void*)1}, +#endif + .cuda_flags = {STARPU_CUDA_ASYNC}, + .nbuffers = 2, + .modes = {STARPU_R, STARPU_RW | STARPU_COMMUTE}, + .model = &chol_model_syrk, + .color = 0x00ff00, +}; + +static struct starpu_codelet cl_gemm = +{ + .cpu_funcs = {chol_cpu_codelet_update_gemm}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {chol_cublas_codelet_update_gemm}, +#elif defined(STARPU_SIMGRID) + .cuda_funcs = {(void*)1}, +#endif + .cuda_flags = {STARPU_CUDA_ASYNC}, + .nbuffers = 3, + .modes = {STARPU_R, STARPU_R, STARPU_RW | STARPU_COMMUTE}, + .model = &chol_model_gemm, + .color = 0x00c000, +}; + +static void run_cholesky(starpu_data_handle_t **data_handles, int rank, int nodes) +{ + unsigned k, m, n; + unsigned unbound_prio = STARPU_MAX_PRIO == INT_MAX && STARPU_MIN_PRIO == INT_MIN; + unsigned nn = size/nblocks; + + if (checkpoint_enabled) + { + starpu_mpi_checkpoint_template_add_entry(checkpoint_p, STARPU_VALUE, &k, sizeof(k), nblocks*nblocks+10, backup_function); + starpu_mpi_checkpoint_template_freeze(checkpoint_p); + } + +#ifdef STARPU_DEVEL +#warning Add pruning +#endif + for (k = 0; k < nblocks; k++) + { + starpu_iteration_push(k); + + starpu_mpi_task_insert(MPI_COMM_WORLD, &cl_potrf, + STARPU_PRIORITY, noprio ? STARPU_DEFAULT_PRIO : unbound_prio ? (int)(2*nblocks - 2*k) : STARPU_MAX_PRIO, + STARPU_RW, data_handles[k][k], + STARPU_FLOPS, (double) FLOPS_SPOTRF(nn), + 0); + + for (m = k+1; m= nblocks) + /* Skip first item when even number of tiles */ + continue; + + /* Accumulate updates from TRSMs */ + for (k = 0; k < n; k++) + { + if (m == n) + starpu_mpi_task_insert(MPI_COMM_WORLD, &cl_syrk, + STARPU_PRIORITY, noprio ? STARPU_DEFAULT_PRIO : unbound_prio ? (int)(2*nblocks - 2*k - m - n) : ((n == k+1) && (m == k+1))?STARPU_MAX_PRIO:STARPU_DEFAULT_PRIO, + STARPU_R, data_handles[n][k], + STARPU_RW | STARPU_COMMUTE, data_handles[m][n], + STARPU_FLOPS, (double) FLOPS_SSYRK(nn, nn), + 0); + else + starpu_mpi_task_insert(MPI_COMM_WORLD, &cl_gemm, + STARPU_PRIORITY, noprio ? STARPU_DEFAULT_PRIO : unbound_prio ? (int)(2*nblocks - 2*k - m - n) : ((n == k+1) && (m == k+1))?STARPU_MAX_PRIO:STARPU_DEFAULT_PRIO, + STARPU_R, data_handles[n][k], + STARPU_R, data_handles[m][k], + STARPU_RW | STARPU_COMMUTE, data_handles[m][n], + STARPU_FLOPS, (double) FLOPS_SGEMM(nn, nn, nn), + 0); + + if (m == nblocks-1) + { + /* Nobody else will need it */ + starpu_mpi_cache_flush(MPI_COMM_WORLD, data_handles[n][k]); + starpu_data_wont_use(data_handles[n][k]); + } + } + /* non-diagonal block, solve */ + k = n; + starpu_mpi_task_insert(MPI_COMM_WORLD, &cl_trsm, + STARPU_PRIORITY, noprio ? STARPU_DEFAULT_PRIO : unbound_prio ? (int)(2*nblocks - 2*k - m) : (m == k+1)?STARPU_MAX_PRIO:STARPU_DEFAULT_PRIO, + STARPU_R, data_handles[k][k], + STARPU_RW, data_handles[m][k], + STARPU_FLOPS, (double) FLOPS_STRSM(nn, nn), + 0); + + if (m == nblocks - 1) + { + /* We do not need the potrf result any more */ + starpu_mpi_cache_flush(MPI_COMM_WORLD, data_handles[n][n]); + starpu_data_wont_use(data_handles[n][n]); + } + } + + if (checkpoint_enabled) + { + if (a%checkpoint_period==checkpoint_period-1) + starpu_mpi_checkpoint_template_submit(*checkpoint_p, (int)(2*nblocks -4*a)); + } + + starpu_iteration_pop(); + } +} + +/* TODO: generate from compiler polyhedral analysis of classical algorithm */ +static void run_cholesky_prio(starpu_data_handle_t **data_handles, int rank STARPU_ATTRIBUTE_UNUSED, int nodes STARPU_ATTRIBUTE_UNUSED) +{ + unsigned a; + int k, m, n; + unsigned unbound_prio = STARPU_MAX_PRIO == INT_MAX && STARPU_MIN_PRIO == INT_MIN; + unsigned nn = size/nblocks; + + /* + * This is basically similar to above, except that we shift k according to the priorities set in the algorithm, so that gemm prio ~= 2*nblocks - a + * double-antidiagonal number: + * - a=0 contains (0,0) plus (1,0) + * - a=1 contains (2,0), (1,1) plus (3,0), (2, 1) + * - etc. + */ + + if (checkpoint_enabled) + { + starpu_mpi_checkpoint_template_add_entry(checkpoint_p, STARPU_VALUE, &a, sizeof(a), nblocks*nblocks+10, backup_function); + starpu_mpi_checkpoint_template_freeze(checkpoint_p); + } + + for (a = 0; a < 4*nblocks; a++) + { + starpu_iteration_push(a); + + for (k = 0; k < (int) nblocks; k++) + { + n = k; + /* Should be m = a-k-n; for potrf and trsm to respect + priorities, but needs to be this for dependencies */ + m = a-2*k-n; + + if (m == n) + { + /* diagonal block, factorize */ + starpu_mpi_task_insert(MPI_COMM_WORLD, &cl_potrf, + STARPU_PRIORITY, noprio ? STARPU_DEFAULT_PRIO : unbound_prio ? (int)(2*nblocks - 2*k) : STARPU_MAX_PRIO, + STARPU_RW, data_handles[k][k], + STARPU_FLOPS, (double) FLOPS_SPOTRF(nn), + 0); + } + else if (m >= n && m < (int) nblocks) + { + /* non-diagonal block, solve */ + starpu_mpi_task_insert(MPI_COMM_WORLD, &cl_trsm, + STARPU_PRIORITY, noprio ? STARPU_DEFAULT_PRIO : unbound_prio ? (int)(2*nblocks - 2*k - m) : (m == k+1)?STARPU_MAX_PRIO:STARPU_DEFAULT_PRIO, + STARPU_R, data_handles[k][k], + STARPU_RW, data_handles[m][k], + STARPU_FLOPS, (double) FLOPS_STRSM(nn, nn), + 0); + } + + if (m == (int) nblocks - 1) + { + /* We do not need the potrf result any more */ + starpu_mpi_cache_flush(MPI_COMM_WORLD, data_handles[n][n]); + starpu_data_wont_use(data_handles[n][n]); + } + + /* column within antidiagonal for a */ + for (n = k + 1; n < (int) nblocks; n++) + { + /* row */ + m = a-2*k-n; + + if (m >= n && m < (int) nblocks) + { + /* Update */ + if (m == n) + starpu_mpi_task_insert(MPI_COMM_WORLD, &cl_syrk, + STARPU_PRIORITY, noprio ? STARPU_DEFAULT_PRIO : unbound_prio ? (int)(2*nblocks - 2*k - m - n) : ((n == k+1) && (m == k+1))?STARPU_MAX_PRIO:STARPU_DEFAULT_PRIO, + STARPU_R, data_handles[n][k], + STARPU_RW | STARPU_COMMUTE, data_handles[m][n], + STARPU_FLOPS, (double) FLOPS_SSYRK(nn, nn), + 0); + else + starpu_mpi_task_insert(MPI_COMM_WORLD, &cl_gemm, + STARPU_PRIORITY, noprio ? STARPU_DEFAULT_PRIO : unbound_prio ? (int)(2*nblocks - 2*k - m - n) : ((n == k+1) && (m == k+1))?STARPU_MAX_PRIO:STARPU_DEFAULT_PRIO, + STARPU_R, data_handles[n][k], + STARPU_R, data_handles[m][k], + STARPU_RW | STARPU_COMMUTE, data_handles[m][n], + STARPU_FLOPS, (double) FLOPS_SGEMM(nn, nn, nn), + 0); + if (m == (int) nblocks - 1) + { + /* Nobody else will need it */ + starpu_data_wont_use(data_handles[n][k]); + starpu_mpi_cache_flush(MPI_COMM_WORLD, data_handles[n][k]); + } + } + } + + } + + if (checkpoint_enabled) + { + if (a%(4*checkpoint_period)==(4*checkpoint_period)-1) + starpu_mpi_checkpoint_template_submit(*checkpoint_p, (int)(2*nblocks - a)); + } + + starpu_iteration_pop(); + } +} + +/* + * code to bootstrap the factorization + * and construct the DAG + */ +void dw_cholesky(float ***matA, unsigned ld, int rank, int nodes, double *timing, double *flops) +{ + double start; + double end; + starpu_data_handle_t **data_handles; + unsigned m, n; + + /* create all the DAG nodes */ + + if (checkpoint_enabled) + { + _nodes = nodes; + starpu_malloc((void**)&checkpoint_p, sizeof(starpu_mpi_checkpoint_template_t)); + starpu_mpi_checkpoint_template_create(checkpoint_p, 13, 0); + } + + data_handles = malloc(nblocks*sizeof(starpu_data_handle_t *)); + for(m=0 ; m=n) + starpu_mpi_checkpoint_template_add_entry(checkpoint_p, STARPU_R, data_handles[m][n], backup_function(mpi_rank)); + } + } + } + } + + starpu_mpi_wait_for_all(MPI_COMM_WORLD); + starpu_mpi_barrier(MPI_COMM_WORLD); + start = starpu_timing_now(); + + switch (submission) + { + case TRIANGLES: run_cholesky(data_handles, rank, nodes); break; + case COLUMNS: run_cholesky_column(data_handles, rank, nodes); break; + case ANTIDIAGONALS: run_cholesky_antidiagonal(data_handles, rank, nodes); break; + case PRIOS: run_cholesky_prio(data_handles, rank, nodes); break; + default: STARPU_ABORT(); + } + + starpu_mpi_wait_for_all(MPI_COMM_WORLD); + starpu_mpi_barrier(MPI_COMM_WORLD); + end = starpu_timing_now(); + + for (m = 0; m < nblocks; m++) + { + for(n = 0; n < nblocks ; n++) + { + /* Get back data on node 0 for the check */ + if (check && data_handles[m][n]) + starpu_mpi_get_data_on_node(MPI_COMM_WORLD, data_handles[m][n], 0); + + if (data_handles[m][n]) + starpu_data_unregister(data_handles[m][n]); + } + free(data_handles[m]); + } + free(data_handles); + + if (rank == 0) + { + *timing = end - start; + *flops = FLOPS_SPOTRF(size); + } +} + +void dw_cholesky_check_computation(float ***matA, int rank, int nodes, int *correctness, double *flops, double epsilon) +{ + unsigned nn,mm,n,m; + float *rmat = malloc(size*size*sizeof(float)); + + for(n=0 ; n mm) + { + rmat[mm+nn*size] = 0.0f; // debug + } + } + } + float *test_mat = malloc(size*size*sizeof(float)); + STARPU_ASSERT(test_mat); + + STARPU_SSYRK("L", "N", size, size, 1.0f, + rmat, size, 0.0f, test_mat, size); + + FPRINTF(stderr, "[%d] comparing results ...\n", rank); + if (display) + { + for (mm = 0; mm < size; mm++) + { + for (nn = 0; nn < size; nn++) + { + if (nn <= mm) + { + printf("%2.2f\t", test_mat[mm +nn*size]); + } + else + { + printf(".\t"); + } + } + printf("\n"); + } + } + + *correctness = 1; + for(n = 0; n < nblocks ; n++) + { + for (m = 0; m < nblocks; m++) + { + for (nn = BLOCKSIZE*n ; nn < BLOCKSIZE*(n+1); nn++) + { + for (mm = BLOCKSIZE*m ; mm < BLOCKSIZE*(m+1); mm++) + { + if (nn <= mm) + { + float orig = (1.0f/(1.0f+nn+mm)) + ((nn == mm)?1.0f*size:0.0f); + float err = fabsf(test_mat[mm +nn*size] - orig) / orig; + if (err > epsilon) + { + FPRINTF(stderr, "[%d] Error[%u, %u] --> %2.20f != %2.20f (err %2.20f)\n", rank, nn, mm, test_mat[mm +nn*size], orig, err); + *correctness = 0; + *flops = 0; + break; + } + } + } + } + } + } + free(rmat); + free(test_mat); +} diff --git a/mpi/examples/matrix_decomposition/mpi_cholesky_codelets.h b/mpi/examples/matrix_decomposition/mpi_cholesky_codelets.h new file mode 100644 index 0000000..0019224 --- /dev/null +++ b/mpi/examples/matrix_decomposition/mpi_cholesky_codelets.h @@ -0,0 +1,29 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __MPI_CHOLESKY_CODELETS_H__ +#define __MPI_CHOLESKY_CODELETS_H__ + + +/* + * code to bootstrap the factorization + * and construct the DAG + */ +void dw_cholesky(float ***matA, unsigned ld, int rank, int nodes, double *timing, double *flops); + +void dw_cholesky_check_computation(float ***matA, int rank, int nodes, int *correctness, double *flops, double epsilon); + +#endif /* __MPI_CHOLESKY_CODELETS_H__ */ diff --git a/mpi/examples/matrix_decomposition/mpi_cholesky_distributed.c b/mpi/examples/matrix_decomposition/mpi_cholesky_distributed.c new file mode 100644 index 0000000..76c708b --- /dev/null +++ b/mpi/examples/matrix_decomposition/mpi_cholesky_distributed.c @@ -0,0 +1,70 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2010-2010 Mehdi Juhoor + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "mpi_cholesky.h" + +/* This is the same as matrix_decomposition, but the matrix is not allocated in + * totality on all nodes, thus allowing much bigger matrices, but doesn't allow + * trivial checks */ + +int main(int argc, char **argv) +{ + /* create a simple definite positive symmetric matrix example + * + * Hilbert matrix : h(i,j) = 1/(i+j+1) + * */ + + float ***bmat; + int rank, nodes, ret; + double timing, flops; + + ret = starpu_mpi_init_conf(&argc, &argv, 1, MPI_COMM_WORLD, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); + starpu_mpi_comm_size(MPI_COMM_WORLD, &nodes); + starpu_cublas_init(); + + parse_args(argc, argv, nodes); + + if (checkpoint_enabled) + starpu_mpi_checkpoint_init(); + + if (check) + { + fprintf(stderr,"can't check in distributed mode\n"); + check = 0; + } + + matrix_init(&bmat, rank, nodes, 0); + + dw_cholesky(bmat, size/nblocks, rank, nodes, &timing, &flops); + + matrix_free(&bmat, rank, nodes, 0); + + starpu_cublas_shutdown(); + if (checkpoint_enabled) + starpu_mpi_checkpoint_shutdown(); + starpu_mpi_shutdown(); + + if (rank == 0) + { + FPRINTF(stdout, "Computation time (in ms): %2.2f\n", timing/1000); + FPRINTF(stdout, "Synthetic GFlops : %2.2f\n", (flops/timing/1000.0f)); + } + + return 0; +} diff --git a/mpi/examples/matrix_decomposition/mpi_cholesky_kernels.c b/mpi/examples/matrix_decomposition/mpi_cholesky_kernels.c new file mode 100644 index 0000000..41cc41c --- /dev/null +++ b/mpi/examples/matrix_decomposition/mpi_cholesky_kernels.c @@ -0,0 +1,338 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "mpi_cholesky.h" +#include +#include "common/blas.h" +#ifdef STARPU_USE_CUDA +#include +#include +#include +#ifdef STARPU_HAVE_MAGMA +#include "magma.h" +#include "magma_lapack.h" +#endif +#endif + +/* + * GEMM + */ + +#if defined(STARPU_USE_CUDA) +static const float p1 = 1.0; +static const float m1 = -1.0; +#endif + +static inline void chol_common_cpu_codelet_update_gemm(void *descr[], int s, void *_args) +{ + (void)_args; + /* printf("gemm\n"); */ + float *left = (float *)STARPU_MATRIX_GET_PTR(descr[0]); + float *right = (float *)STARPU_MATRIX_GET_PTR(descr[1]); + float *center = (float *)STARPU_MATRIX_GET_PTR(descr[2]); + + unsigned dx = STARPU_MATRIX_GET_NY(descr[2]); + unsigned dy = STARPU_MATRIX_GET_NX(descr[2]); + unsigned dz = STARPU_MATRIX_GET_NY(descr[0]); + + unsigned ld21 = STARPU_MATRIX_GET_LD(descr[0]); + unsigned ld12 = STARPU_MATRIX_GET_LD(descr[1]); + unsigned ld22 = STARPU_MATRIX_GET_LD(descr[2]); + + switch (s) + { + case 0: + /* CPU kernel */ + STARPU_SGEMM("N", "T", dy, dx, dz, -1.0f, left, ld21, + right, ld12, 1.0f, center, ld22); + break; +#ifdef STARPU_USE_CUDA + case 1: + { + /* CUDA kernel */ + cublasStatus_t status = cublasSgemm(starpu_cublas_get_local_handle(), + CUBLAS_OP_N, CUBLAS_OP_T, dy, dx, dz, + &m1, left, ld21, right, ld12, + &p1, center, ld22); + if (status != CUBLAS_STATUS_SUCCESS) + STARPU_CUBLAS_REPORT_ERROR(status); + + break; + } +#endif + default: + STARPU_ABORT(); + break; + } +} + +void chol_cpu_codelet_update_gemm(void *descr[], void *_args) +{ + chol_common_cpu_codelet_update_gemm(descr, 0, _args); +} + +#ifdef STARPU_USE_CUDA +void chol_cublas_codelet_update_gemm(void *descr[], void *_args) +{ + chol_common_cpu_codelet_update_gemm(descr, 1, _args); +} +#endif /* STARPU_USE_CUDA */ + +/* + * SYRK + */ + +static inline void chol_common_cpu_codelet_update_syrk(void *descr[], int s, void *_args) +{ + (void)_args; + /* printf("syrk\n"); */ + float *left = (float *)STARPU_MATRIX_GET_PTR(descr[0]); + float *center = (float *)STARPU_MATRIX_GET_PTR(descr[1]); + + unsigned dx = STARPU_MATRIX_GET_NY(descr[1]); + unsigned dz = STARPU_MATRIX_GET_NY(descr[0]); + + unsigned ld21 = STARPU_MATRIX_GET_LD(descr[0]); + unsigned ld22 = STARPU_MATRIX_GET_LD(descr[1]); + + switch (s) + { + case 0: + { + /* CPU kernel */ + STARPU_SSYRK("L", "N", dx, dz, -1.0f, left, ld21, + 1.0f, center, ld22); + break; + } +#ifdef STARPU_USE_CUDA + case 1: + { + /* CUDA kernel */ + cublasStatus_t status = cublasSsyrk(starpu_cublas_get_local_handle(), + CUBLAS_FILL_MODE_LOWER, CUBLAS_OP_N, dx, dz, + &m1, left, ld21, + &p1, center, ld22); + if (status != CUBLAS_STATUS_SUCCESS) + STARPU_CUBLAS_REPORT_ERROR(status); + break; + } +#endif + default: + STARPU_ABORT(); + break; + } +} + +void chol_cpu_codelet_update_syrk(void *descr[], void *_args) +{ + chol_common_cpu_codelet_update_syrk(descr, 0, _args); +} + +#ifdef STARPU_USE_CUDA +void chol_cublas_codelet_update_syrk(void *descr[], void *_args) +{ + chol_common_cpu_codelet_update_syrk(descr, 1, _args); +} +#endif /* STARPU_USE_CUDA */ + +/* + * TRSM + */ + +static inline void chol_common_codelet_update_trsm(void *descr[], int s, void *_args) +{ + (void)_args; +/* printf("trsm\n"); */ + float *sub11; + float *sub21; + + sub11 = (float *)STARPU_MATRIX_GET_PTR(descr[0]); + sub21 = (float *)STARPU_MATRIX_GET_PTR(descr[1]); + + unsigned ld11 = STARPU_MATRIX_GET_LD(descr[0]); + unsigned ld21 = STARPU_MATRIX_GET_LD(descr[1]); + + unsigned nx21 = STARPU_MATRIX_GET_NY(descr[1]); + unsigned ny21 = STARPU_MATRIX_GET_NX(descr[1]); + +#ifdef STARPU_USE_CUDA + cublasStatus_t status; +#endif + + switch (s) + { + case 0: + STARPU_STRSM("R", "L", "T", "N", nx21, ny21, 1.0f, sub11, ld11, sub21, ld21); + break; +#ifdef STARPU_USE_CUDA + case 1: + status = cublasStrsm(starpu_cublas_get_local_handle(), + CUBLAS_SIDE_RIGHT, CUBLAS_FILL_MODE_LOWER, CUBLAS_OP_T, CUBLAS_DIAG_NON_UNIT, + nx21, ny21, &p1, sub11, ld11, sub21, ld21); + if (status != CUBLAS_STATUS_SUCCESS) + STARPU_CUBLAS_REPORT_ERROR(status); + break; +#endif + default: + STARPU_ABORT(); + break; + } +} + +void chol_cpu_codelet_update_trsm(void *descr[], void *_args) +{ + chol_common_codelet_update_trsm(descr, 0, _args); +} + +#ifdef STARPU_USE_CUDA +void chol_cublas_codelet_update_trsm(void *descr[], void *_args) +{ + chol_common_codelet_update_trsm(descr, 1, _args); +} +#endif + +/* + * POTRF + */ + +static inline void chol_common_codelet_update_potrf(void *descr[], int s, void *_args) +{ + (void)_args; +/* printf("potrf\n"); */ + float *sub11; + + sub11 = (float *)STARPU_MATRIX_GET_PTR(descr[0]); + + unsigned nx = STARPU_MATRIX_GET_NY(descr[0]); + unsigned ld = STARPU_MATRIX_GET_LD(descr[0]); + + unsigned z; + + switch (s) + { + case 0: + +#ifdef STARPU_MKL + STARPU_SPOTRF("L", nx, sub11, ld); +#else + /* + * - alpha 11 <- lambda 11 = sqrt(alpha11) + * - alpha 21 <- l 21 = alpha 21 / lambda 11 + * - A22 <- A22 - l21 trans(l21) + */ + + for (z = 0; z < nx; z++) + { + float lambda11; + lambda11 = sqrt(sub11[z+z*ld]); + sub11[z+z*ld] = lambda11; + + STARPU_ASSERT(lambda11 != 0.0f); + + STARPU_SSCAL(nx - z - 1, 1.0f/lambda11, &sub11[(z+1)+z*ld], 1); + + STARPU_SSYR("L", nx - z - 1, -1.0f, + &sub11[(z+1)+z*ld], 1, + &sub11[(z+1)+(z+1)*ld], ld); + } +#endif + break; +#ifdef STARPU_USE_CUDA + case 1: +#ifdef STARPU_HAVE_MAGMA + { + int ret; + int info; +#if (MAGMA_VERSION_MAJOR > 1) || (MAGMA_VERSION_MAJOR == 1 && MAGMA_VERSION_MINOR >= 4) + cudaStream_t stream = starpu_cuda_get_local_stream(); + cublasSetKernelStream(stream); + magmablasSetKernelStream(stream); +#else + starpu_cublas_set_stream(); +#endif + ret = magma_spotrf_gpu(MagmaLower, nx, sub11, ld, &info); + if (ret != MAGMA_SUCCESS) + { + fprintf(stderr, "Error in Magma: %d\n", ret); + STARPU_ABORT(); + } +#if (MAGMA_VERSION_MAJOR > 1) || (MAGMA_VERSION_MAJOR == 1 && MAGMA_VERSION_MINOR >= 4) + cudaError_t cures = cudaStreamSynchronize(stream); +#else + cudaError_t cures = cudaDeviceSynchronize(); +#endif + STARPU_ASSERT(!cures); + } +#else + { + + float *lambda11; + cublasStatus_t status; + cudaStream_t stream = starpu_cuda_get_local_stream(); + cublasHandle_t handle = starpu_cublas_get_local_handle(); + cudaHostAlloc((void **)&lambda11, sizeof(float), 0); + + for (z = 0; z < nx; z++) + { + cudaMemcpyAsync(lambda11, &sub11[z+z*ld], sizeof(float), cudaMemcpyDeviceToHost, stream); + cudaStreamSynchronize(stream); + + STARPU_ASSERT(*lambda11 != 0.0f); + + *lambda11 = sqrt(*lambda11); + +/* cublasSetVector(1, sizeof(float), lambda11, sizeof(float), &sub11[z+z*ld], sizeof(float)); */ + cudaMemcpyAsync(&sub11[z+z*ld], lambda11, sizeof(float), cudaMemcpyHostToDevice, stream); + float scal = 1.0f/(*lambda11); + + status = cublasSscal(handle, + nx - z - 1, &scal, &sub11[(z+1)+z*ld], 1); + if (status != CUBLAS_STATUS_SUCCESS) + STARPU_CUBLAS_REPORT_ERROR(status); + + status = cublasSsyr(handle, + CUBLAS_FILL_MODE_UPPER, + nx - z - 1, &m1, + &sub11[(z+1)+z*ld], 1, + &sub11[(z+1)+(z+1)*ld], ld); + if (status != CUBLAS_STATUS_SUCCESS) + STARPU_CUBLAS_REPORT_ERROR(status); + } + + cudaStreamSynchronize(stream); + cudaFreeHost(lambda11); + } +#endif + break; +#endif + default: + STARPU_ABORT(); + break; + } +} + + +void chol_cpu_codelet_update_potrf(void *descr[], void *_args) +{ + chol_common_codelet_update_potrf(descr, 0, _args); +} + +#ifdef STARPU_USE_CUDA +void chol_cublas_codelet_update_potrf(void *descr[], void *_args) +{ + chol_common_codelet_update_potrf(descr, 1, _args); +} +#endif/* STARPU_USE_CUDA */ diff --git a/mpi/examples/matrix_decomposition/mpi_cholesky_kernels.h b/mpi/examples/matrix_decomposition/mpi_cholesky_kernels.h new file mode 100644 index 0000000..233d517 --- /dev/null +++ b/mpi/examples/matrix_decomposition/mpi_cholesky_kernels.h @@ -0,0 +1,34 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __MPI_CHOLESKY_KERNELS_H__ +#define __MPI_CHOLESKY_KERNELS_H__ + +#include + +void chol_cpu_codelet_update_potrf(void **, void *); +void chol_cpu_codelet_update_trsm(void **, void *); +void chol_cpu_codelet_update_syrk(void **, void *); +void chol_cpu_codelet_update_gemm(void **, void *); + +#ifdef STARPU_USE_CUDA +void chol_cublas_codelet_update_potrf(void *descr[], void *_args); +void chol_cublas_codelet_update_trsm(void *descr[], void *_args); +void chol_cublas_codelet_update_syrk(void *descr[], void *_args); +void chol_cublas_codelet_update_gemm(void *descr[], void *_args); +#endif + +#endif // __MPI_CHOLESKY_KERNELS_H__ diff --git a/mpi/examples/matrix_decomposition/mpi_cholesky_models.c b/mpi/examples/matrix_decomposition/mpi_cholesky_models.c new file mode 100644 index 0000000..d073f19 --- /dev/null +++ b/mpi/examples/matrix_decomposition/mpi_cholesky_models.c @@ -0,0 +1,45 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "mpi_cholesky.h" + +/* + * Number of flops of Gemm + */ + +struct starpu_perfmodel chol_model_potrf = +{ + .type = STARPU_HISTORY_BASED, + .symbol = "chol_model_potrf" +}; + +struct starpu_perfmodel chol_model_trsm = +{ + .type = STARPU_HISTORY_BASED, + .symbol = "chol_model_trsm" +}; + +struct starpu_perfmodel chol_model_syrk = +{ + .type = STARPU_HISTORY_BASED, + .symbol = "chol_model_syrk" +}; + +struct starpu_perfmodel chol_model_gemm = +{ + .type = STARPU_HISTORY_BASED, + .symbol = "chol_model_gemm" +}; diff --git a/mpi/examples/matrix_decomposition/mpi_cholesky_models.h b/mpi/examples/matrix_decomposition/mpi_cholesky_models.h new file mode 100644 index 0000000..c51025c --- /dev/null +++ b/mpi/examples/matrix_decomposition/mpi_cholesky_models.h @@ -0,0 +1,25 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __DW_CHOLESKY_MODELS_H__ +#define __DW_CHOLESKY_MODELS_H__ + +extern struct starpu_perfmodel chol_model_potrf; +extern struct starpu_perfmodel chol_model_trsm; +extern struct starpu_perfmodel chol_model_syrk; +extern struct starpu_perfmodel chol_model_gemm; + +#endif // __DW_CHOLESKY_MODELS_H__ diff --git a/mpi/examples/matrix_decomposition/mpi_decomposition_matrix.c b/mpi/examples/matrix_decomposition/mpi_decomposition_matrix.c new file mode 100644 index 0000000..27577df --- /dev/null +++ b/mpi/examples/matrix_decomposition/mpi_decomposition_matrix.c @@ -0,0 +1,130 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2010-2010 Mehdi Juhoor + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "mpi_cholesky.h" + +/* Returns the MPI node number where data indexes index is */ +int my_distrib(int y, int x, int nb_nodes) +{ + (void)nb_nodes; + //return (x+y) % nb_nodes; + return (x%dblockx)+(y%dblocky)*dblockx; +} + + +void matrix_display(float ***bmat, int rank, int nodes) +{ + int n; + + if (!display) + return; + + starpu_mpi_barrier(MPI_COMM_WORLD); + + for (n = 0; n < rank; n++) + starpu_mpi_barrier(MPI_COMM_WORLD); + + unsigned y; + printf("[%d] Input :\n", rank); + + for(y=0 ; y +#include +#include +#include + +#ifdef STARPU_HAVE_VALGRIND_H +#include +#endif + +#ifdef STARPU_QUICK_CHECK +unsigned size = 2*320; +unsigned nblocks = 2; +unsigned nbigblocks = 2; +#elif !defined(STARPU_LONG_CHECK) +unsigned size = 4*320; +unsigned nblocks = 4; +unsigned nbigblocks = 2; +#else +unsigned size = 16*320; +unsigned nblocks = 16; +unsigned nbigblocks = 2; +#endif +unsigned noprio = 0; +unsigned check = 0; +unsigned display = 0; +int dblockx = -1; +int dblocky = -1; +enum submission submission = TRIANGLES; +unsigned long checkpoint_period = 1; +#ifdef STARPU_USE_MPI_FT +int checkpoint_enabled = 1; +#else +int checkpoint_enabled = 0; +#endif + +void parse_args(int argc, char **argv, int nodes) +{ + int i; + for (i = 1; i < argc; i++) + { + if (strcmp(argv[i], "-size") == 0) + { + char *argptr; + size = strtol(argv[++i], &argptr, 10); + } + + else if (strcmp(argv[i], "-dblockx") == 0) + { + char *argptr; + dblockx = strtol(argv[++i], &argptr, 10); + } + + else if (strcmp(argv[i], "-dblocky") == 0) + { + char *argptr; + dblocky = strtol(argv[++i], &argptr, 10); + } + + else if (strcmp(argv[i], "-nblocks") == 0) + { + char *argptr; + nblocks = strtol(argv[++i], &argptr, 10); + } + + else if (strcmp(argv[i], "-nbigblocks") == 0) + { + char *argptr; + nbigblocks = strtol(argv[++i], &argptr, 10); + } + + else if (strcmp(argv[i], "-columns") == 0) + { + submission = COLUMNS; + } + + else if (strcmp(argv[i], "-antidiagonals") == 0) + { + submission = ANTIDIAGONALS; + } + + else if (strcmp(argv[i], "-prios") == 0) + { + submission = PRIOS; + } + + else if (strcmp(argv[i], "-no-prio") == 0) + { + noprio = 1; + } + + else if (strcmp(argv[i], "-checkpoint-period") == 0) + { + char *argptr; + checkpoint_period = strtol(argv[++i], &argptr, 10); + } + + else if (strcmp(argv[i], "-checkpoint-enabled") == 0) + { + char *argptr; + checkpoint_enabled = strtol(argv[++i], &argptr, 10); + } + + else if (strcmp(argv[i], "-check") == 0) + { + check = 1; + } + + else if (strcmp(argv[i], "-display") == 0) + { + display = 1; + } + + else + /* if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i], "--help") == 0) */ + { + printf("usage : %s [-size size] [-nblocks nblocks] [-columns] [-antidiagonals] [-prios] [-no-prio] [-display] [-check] [-checkpoint-period period] [-checkpoint-enabled 0/1]\n", argv[0]); + fprintf(stderr,"Currently selected: %ux%u and %ux%u blocks checkpoint enabled %d with period %lu\n", size, size, nblocks, nblocks, checkpoint_enabled, checkpoint_period); + exit(0); + } + } + +#ifdef STARPU_HAVE_VALGRIND_H + if (RUNNING_ON_VALGRIND) + size = 16; +#endif + + if (nblocks > size) + nblocks = size; + + if (dblockx == -1 || dblocky == -1) + { + int factor; + dblockx = nodes; + dblocky = 1; + for(factor=sqrt(nodes) ; factor>1 ; factor--) + { + if (nodes % factor == 0) + { + dblockx = nodes/factor; + dblocky = factor; + break; + } + } + } + FPRINTF(stdout, "size: %u - nblocks: %u - dblocksx: %d - dblocksy: %d\n", size, nblocks, dblockx, dblocky); +} + diff --git a/mpi/examples/matrix_decomposition/mpi_decomposition_params.h b/mpi/examples/matrix_decomposition/mpi_decomposition_params.h new file mode 100644 index 0000000..acdc9bb --- /dev/null +++ b/mpi/examples/matrix_decomposition/mpi_decomposition_params.h @@ -0,0 +1,45 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __MPI_CHOLESKY_PARAMS_H__ +#define __MPI_CHOLESKY_PARAMS_H__ + +#define BLOCKSIZE (size/nblocks) + +extern unsigned size; +extern unsigned nblocks; +extern unsigned nbigblocks; +extern unsigned noprio; +extern unsigned check; +extern unsigned display; +extern int dblockx; +extern int dblocky; +extern unsigned long checkpoint_period; +extern int checkpoint_enabled; + +enum submission +{ + TRIANGLES, + COLUMNS, + ANTIDIAGONALS, + PRIOS, +}; +extern enum submission submission; + +void parse_args(int argc, char **argv, int nodes); + +#endif // __MPI_CHOLESKY_PARAMS_H__ + diff --git a/mpi/examples/matrix_mult/mm.c b/mpi/examples/matrix_mult/mm.c new file mode 100644 index 0000000..18ff040 --- /dev/null +++ b/mpi/examples/matrix_mult/mm.c @@ -0,0 +1,385 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * This example illustrates how to distribute a pre-existing data structure to + * a set of computing nodes using StarPU-MPI routines. + */ + +#include +#include +#include +#include +#include +#include +#include "helper.h" + +#define VERBOSE 0 + +static int N = 16; /* Matrix size */ +static int BS = 4; /* Block size */ + +#define NB ((N)/(BS)) /* Number of blocks */ + +/* Matrices. Will be allocated as regular, linearized C arrays */ +static double *A = NULL; /* A will be partitioned as BS rows x N cols blocks */ +static double *B = NULL; /* B will be partitioned as N rows x BS cols blocks */ +static double *C = NULL; /* C will be partitioned as BS rows x BS cols blocks */ + +/* Arrays of data handles for managing matrix blocks */ +static starpu_data_handle_t *A_h; +static starpu_data_handle_t *B_h; +static starpu_data_handle_t *C_h; + +static int comm_rank; /* mpi rank of the process */ +static int comm_size; /* size of the mpi session */ + +static void alloc_matrices(void) +{ + /* Regular 'malloc' can also be used instead, however, starpu_malloc make sure that + * the area is allocated in suitably pinned memory to improve data transfers, especially + * with CUDA */ + starpu_malloc((void **)&A, N*N*sizeof(double)); + starpu_malloc((void **)&B, N*N*sizeof(double)); + starpu_malloc((void **)&C, N*N*sizeof(double)); +} + +static void free_matrices(void) +{ + starpu_free_noflag(A, N*N*sizeof(double)); + starpu_free_noflag(B, N*N*sizeof(double)); + starpu_free_noflag(C, N*N*sizeof(double)); +} + +static void init_matrices(void) +{ + int row,col; + for (row = 0; row < N; row++) + { + for (col = 0; col < N; col++) + { + A[row*N+col] = (row==col)?2:0; + B[row*N+col] = row*N+col; + C[row*N+col] = 0; + } + } +} + +#if VERBOSE +static void disp_matrix(double *m) +{ + int row,col; + for (row = 0; row < N; row++) + { + for (col = 0; col < N; col++) + { + printf("\t%.2lf", m[row*N+col]); + } + printf("\n"); + } +} +#endif + +static void check_result(void) +{ + int row,col; + for (row = 0; row < N; row++) + { + for (col = 0; col < N; col++) + { + if (fabs(C[row*N+col] - 2*(row*N+col)) > 1.0) + { + fprintf(stderr, "check failed\n"); + exit(1); + } + } + } +#if VERBOSE + printf("success\n"); +#endif +} + + +/* Register the matrix blocks to StarPU and to StarPU-MPI */ +static void register_matrices() +{ + A_h = calloc(NB, sizeof(starpu_data_handle_t)); + B_h = calloc(NB, sizeof(starpu_data_handle_t)); + C_h = calloc(NB*NB, sizeof(starpu_data_handle_t)); + + /* Memory region, where the data being registered resides. + * In this example, all blocks are allocated by node 0, thus + * - node 0 specifies STARPU_MAIN_RAM to indicate that it owns the block in its main memory + * - nodes !0 specify -1 to indicate that they don't have a copy of the block initially + */ + int mr = (comm_rank == 0) ? STARPU_MAIN_RAM : -1; + + /* mpi tag used for the block */ + starpu_mpi_tag_t tag = 0; + + int b_row,b_col; + + for (b_row = 0; b_row < NB; b_row++) + { + /* Register a block to StarPU */ + starpu_matrix_data_register(&A_h[b_row], + mr, + (comm_rank == 0)?(uintptr_t)(A+b_row*BS*N):0, N, N, BS, + sizeof(double)); + + /* Register a block to StarPU-MPI, specifying the mpi tag to use for transferring the block + * and the rank of the owner node. + * + * Note: StarPU-MPI is an autonomous layer built on top of StarPU, hence the two separate + * registration steps. + */ + starpu_data_set_coordinates(A_h[b_row], 2, 0, b_row); + starpu_mpi_data_register(A_h[b_row], tag++, 0); + } + + for (b_col = 0; b_col < NB; b_col++) + { + starpu_matrix_data_register(&B_h[b_col], + mr, + (comm_rank == 0)?(uintptr_t)(B+b_col*BS):0, N, BS, N, + sizeof(double)); + starpu_data_set_coordinates(B_h[b_col], 2, b_col, 0); + starpu_mpi_data_register(B_h[b_col], tag++, 0); + } + + for (b_row = 0; b_row < NB; b_row++) + { + for (b_col = 0; b_col < NB; b_col++) + { + starpu_matrix_data_register(&C_h[b_row*NB+b_col], + mr, + (comm_rank == 0)?(uintptr_t)(C+b_row*BS*N+b_col*BS):0, N, BS, BS, + sizeof(double)); + starpu_data_set_coordinates(C_h[b_row*NB+b_col], 2, b_col, b_row); + starpu_mpi_data_register(C_h[b_row*NB+b_col], tag++, 0); + } + } +} + +/* Transfer ownership of the C matrix blocks following some user-defined distribution over the nodes. + * Note: since C will be Write-accessed, it will implicitly define which node perform the task + * associated to a given block. */ +static void distribute_matrix_C(void) +{ + int b_row,b_col; + for (b_row = 0; b_row < NB; b_row++) + { + for (b_col = 0; b_col < NB; b_col++) + { + starpu_data_handle_t h = C_h[b_row*NB+b_col]; + + /* Select the node where the block should be computed. */ + int target_rank = (b_row+b_col)%comm_size; + + /* Move the block on to its new owner. */ + starpu_mpi_data_migrate(MPI_COMM_WORLD, h, target_rank); + } + } +} + +/* Transfer ownership of the C matrix blocks back to node 0, for display purpose. This is not mandatory. */ +static void undistribute_matrix_C(void) +{ + int b_row,b_col; + for (b_row = 0; b_row < NB; b_row++) + { + for (b_col = 0; b_col < NB; b_col++) + { + starpu_data_handle_t h = C_h[b_row*NB+b_col]; + starpu_mpi_data_migrate(MPI_COMM_WORLD, h, 0); + } + } +} + +/* Unregister matrices from the StarPU management. */ +static void unregister_matrices() +{ + int b_row,b_col; + + for (b_row = 0; b_row < NB; b_row++) + { + starpu_data_unregister(A_h[b_row]); + } + + for (b_col = 0; b_col < NB; b_col++) + { + starpu_data_unregister(B_h[b_col]); + } + + for (b_row = 0; b_row < NB; b_row++) + { + for (b_col = 0; b_col < NB; b_col++) + { + starpu_data_unregister(C_h[b_row*NB+b_col]); + } + } + + free(A_h); + free(B_h); + free(C_h); +} + +/* Perform the actual computation. In a real-life case, this would rather call a BLAS 'gemm' routine + * instead. */ +static void cpu_mult(void *handles[], void *arg) +{ + (void)arg; + double *block_A = (double *)STARPU_MATRIX_GET_PTR(handles[0]); + double *block_B = (double *)STARPU_MATRIX_GET_PTR(handles[1]); + double *block_C = (double *)STARPU_MATRIX_GET_PTR(handles[2]); + + unsigned n_col_A = STARPU_MATRIX_GET_NX(handles[0]); + unsigned n_col_B = STARPU_MATRIX_GET_NX(handles[1]); + unsigned n_col_C = STARPU_MATRIX_GET_NX(handles[2]); + + unsigned n_row_A = STARPU_MATRIX_GET_NY(handles[0]); + unsigned n_row_B = STARPU_MATRIX_GET_NY(handles[1]); + unsigned n_row_C = STARPU_MATRIX_GET_NY(handles[2]); + + unsigned ld_A = STARPU_MATRIX_GET_LD(handles[0]); + unsigned ld_B = STARPU_MATRIX_GET_LD(handles[1]); + unsigned ld_C = STARPU_MATRIX_GET_LD(handles[2]); + + /* Sanity check, not needed in real life case */ + assert(n_col_C == n_col_B); + assert(n_row_C == n_row_A); + assert(n_col_A == n_row_B); + + unsigned i,j,k; + for (k = 0; k < n_row_C; k++) + { + for (j = 0; j < n_col_C; j++) + { + for (i = 0; i < n_col_A; i++) + { + block_C[k*ld_C+j] += block_A[k*ld_A+i] * block_B[i*ld_B+j]; + } + +#if VERBOSE + /* For illustration purpose, shows which node computed + * the block in the decimal part of the cell */ + block_C[k*ld_C+j] += comm_rank / 100.0; +#endif + } + } +} + +/* Define a StarPU 'codelet' structure for the matrix multiply kernel above. + * This structure enable specifying multiple implementations for the kernel (such as CUDA or OpenCL versions) + */ +static struct starpu_codelet gemm_cl = +{ + .cpu_funcs = {cpu_mult}, /* cpu implementation(s) of the routine */ + .nbuffers = 3, /* number of data handles referenced by this routine */ + .modes = {STARPU_R, STARPU_R, STARPU_RW}, /* access modes for each data handle */ + .name = "gemm" /* to display task name in traces */ +}; + +int main(int argc, char *argv[]) +{ + /* Initializes STarPU and the StarPU-MPI layer */ + int ret = starpu_mpi_init_conf(&argc, &argv, 1, MPI_COMM_WORLD, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_ini_conft"); + + if (starpu_cpu_worker_get_count() == 0) + { + FPRINTF(stderr, "We need at least 1 CPU worker.\n"); + starpu_mpi_shutdown(); + return STARPU_TEST_SKIPPED; + } + + /* Parse the matrix size and block size optional args */ + if (argc > 1) + { + N = atoi(argv[1]); + if (N < 1) + { + fprintf(stderr, "invalid matrix size\n"); + exit(1); + } + if (argc > 2) + { + BS = atoi(argv[2]); + } + if (BS < 1 || N % BS != 0) + { + fprintf(stderr, "invalid block size\n"); + exit(1); + } + } + + /* Get the process rank and session size */ + starpu_mpi_comm_rank(MPI_COMM_WORLD, &comm_rank); + starpu_mpi_comm_size(MPI_COMM_WORLD, &comm_size); + + if (comm_rank == 0) + { +#if VERBOSE + printf("N = %d\n", N); + printf("BS = %d\n", BS); + printf("NB = %d\n", NB); + printf("comm_size = %d\n", comm_size); +#endif + /* In this example, node rank 0 performs all the memory allocations and initializations, + * and the blocks are later distributed on the other nodes. + * This is not mandatory however, and blocks could be allocated on other nodes right + * from the beginning, depending on the application needs (in particular for the case + * where the session wide data footprint is larger than a single node available memory. */ + alloc_matrices(); + init_matrices(); + } + + /* Register matrices to StarPU and StarPU-MPI */ + register_matrices(); + /* Distribute C blocks */ + distribute_matrix_C(); + + int b_row,b_col; + + for (b_row = 0; b_row < NB; b_row++) + { + for (b_col = 0; b_col < NB; b_col++) + { + starpu_mpi_task_insert(MPI_COMM_WORLD, &gemm_cl, + STARPU_R, A_h[b_row], + STARPU_R, B_h[b_col], + STARPU_RW, C_h[b_row*NB+b_col], + 0); + } + } + + starpu_task_wait_for_all(); + + undistribute_matrix_C(); + unregister_matrices(); + + if (comm_rank == 0) + { +#if VERBOSE + disp_matrix(C); +#endif + check_result(); + free_matrices(); + } + + starpu_mpi_shutdown(); + return 0; +} diff --git a/mpi/examples/matrix_mult/mm_2dbc.c b/mpi/examples/matrix_mult/mm_2dbc.c new file mode 100644 index 0000000..131d7ed --- /dev/null +++ b/mpi/examples/matrix_mult/mm_2dbc.c @@ -0,0 +1,401 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * This example illustrates the computation of general matrices with originally + * distributed A, B and C matrices to a set of computing nodes. + */ + +#include +#include +#include +#include +#include +#include +#include +#include "helper.h" +#include + +#define VERBOSE 0 + + +static int M = 1024; /* Matrix size */ +static int N = 1024; /* Matrix size */ +static int K = 1024; /* Matrix size */ +static int BS = 512; /* Block size */ +static int P = 2; /* height of the grid */ +static int Q = 2; /* width of the grid */ +static int T = 1; /* number of runs */ +static int trace = 0; /* whether to trace */ + +#define MB ((M)/(BS)) /* Number of blocks */ +#define NB ((N)/(BS)) /* Number of blocks */ +#define KB ((K)/(BS)) /* Number of blocks */ + + +/* Arrays of data handles for managing matrix blocks */ +static starpu_data_handle_t *A_h; +static starpu_data_handle_t *B_h; +static starpu_data_handle_t *C_h; + +static int comm_rank; /* mpi rank of the process */ +static int comm_size; /* size of the mpi session */ + +struct block +{ + double* c; + int owner; +}; + +struct matrix +{ + int mb, nb, b; + struct block* blocks; +}; + +/* Matrices. Will be allocated as regular, linearized C arrays */ +static struct matrix *A = NULL; /* A will be partitioned as MB x KB blocks */ +static struct matrix *B = NULL; /* B will be partitioned as KB x NB blocks */ +static struct matrix *C = NULL; /* C will be partitioned as MB x NB blocks */ + +struct matrix* alloc_matrix(int mb, int nb) +{ + struct matrix* X; + X = malloc(sizeof(struct matrix)); + X->blocks = malloc(mb*nb*sizeof(struct block)); + int i,j; + for (i = 0; iblocks[i*nb+j].owner = (i%P)*Q + (j%Q); + if (X->blocks[i*nb+j].owner == comm_rank) + X->blocks[i*nb+j].c = malloc(BS*BS*sizeof(double)); + } + } + X->mb = mb; + X->nb = nb; + X->b = BS; + return X; +} +static void alloc_matrices(void) +{ + if (VERBOSE) printf("Allocating matrices\n"); + A = alloc_matrix(MB,KB); + B = alloc_matrix(KB,NB); + C = alloc_matrix(MB,NB); +} + +static void free_matrix(struct matrix* X, int mb, int nb) +{ + int i,j; + for (i = 0; iblocks[i*nb+j].owner == comm_rank) + free(X->blocks[i*nb+j].c); + } + } + free(X->blocks); + free(X); +} + +static void free_matrices(void) +{ + if (VERBOSE) printf("Freeing matrices\n"); + free_matrix(A,MB,KB); + free_matrix(B,KB,NB); + free_matrix(C,MB,NB); +} + +static void register_matrix(struct matrix* X, starpu_data_handle_t* X_h, starpu_mpi_tag_t *tag, int mb, int nb) +{ + int b_row, b_col; + for (b_row = 0; b_row < mb; b_row++) + { + for (b_col = 0; b_col < nb; b_col++) + { + if (X->blocks[b_row*nb+b_col].owner == comm_rank) + { + starpu_matrix_data_register(&X_h[b_row*nb+b_col], + STARPU_MAIN_RAM, + (uintptr_t) X->blocks[b_row*nb+b_col].c, BS, BS, BS, + sizeof(double)); + } + else + { + starpu_matrix_data_register(&X_h[b_row*nb+b_col], + -1, (uintptr_t) NULL, BS, BS, BS, + sizeof(double)); + } +// printf("tag:%d\n",*tag); + starpu_mpi_data_register(X_h[b_row*nb+b_col], (*tag)++, X->blocks[b_row*nb+b_col].owner); + } + } +} + +starpu_mpi_tag_t tag = 0; +/* Register the matrix blocks to StarPU and to StarPU-MPI */ +static void register_matrices() +{ + if (VERBOSE) printf("Registering matrices\n"); + A_h = calloc(MB*KB, sizeof(starpu_data_handle_t)); + B_h = calloc(KB*NB, sizeof(starpu_data_handle_t)); + C_h = calloc(MB*NB, sizeof(starpu_data_handle_t)); + + /* mpi tag used for the block */ + register_matrix(A,A_h,&tag,MB,KB); + register_matrix(B,B_h,&tag,KB,NB); + register_matrix(C,C_h,&tag,MB,NB); +} + +static void unregister_matrix(struct matrix* X, starpu_data_handle_t* X_h, int mb, int nb) +{ + int b_row,b_col; + for (b_row = 0; b_row < mb; b_row++) + { + for (b_col = 0; b_col < nb; b_col++) + { + starpu_data_unregister(X_h[b_row*nb+b_col]); + } + } + free(X_h); +} + +/* Unregister matrices from the StarPU management. */ +static void unregister_matrices() +{ + if (VERBOSE) printf("Unregistering matrices\n"); + unregister_matrix(A,A_h,MB,KB); + unregister_matrix(B,B_h,KB,NB); + unregister_matrix(C,C_h,MB,NB); +} + +static void cpu_mult(void *handles[], void *arg) +{ + (void)arg; + double *block_A = (double *)STARPU_MATRIX_GET_PTR(handles[0]); + double *block_B = (double *)STARPU_MATRIX_GET_PTR(handles[1]); + double *block_C = (double *)STARPU_MATRIX_GET_PTR(handles[2]); + + unsigned n_col_A = STARPU_MATRIX_GET_NX(handles[0]); + unsigned n_col_C = STARPU_MATRIX_GET_NX(handles[2]); + unsigned n_row_C = STARPU_MATRIX_GET_NY(handles[2]); + + unsigned ld_A = STARPU_MATRIX_GET_LD(handles[0]); + unsigned ld_B = STARPU_MATRIX_GET_LD(handles[1]); + unsigned ld_C = STARPU_MATRIX_GET_LD(handles[2]); + + if (VERBOSE) printf("gemm_task\n"); + STARPU_DGEMM("N", "N", n_row_C,n_col_C,n_col_A, + 1.0, block_A, ld_A, block_B, ld_B, + 1.0, block_C, ld_C); +} + +static void cpu_fill(void *handles[], void *arg) +{ + (void)arg; + double *block_A = (double *)STARPU_MATRIX_GET_PTR(handles[0]); + + unsigned n_col_A = STARPU_MATRIX_GET_NX(handles[0]); + unsigned n_row_A = STARPU_MATRIX_GET_NY(handles[0]); + + unsigned i,j; + if (VERBOSE) printf("fill_task\n"); + for (i=0;iblocks[row*nb+col].owner == comm_rank) + { + starpu_mpi_task_insert(MPI_COMM_WORLD, &fill_cl, + STARPU_W, X_h[row*nb+col], 0); + } + } + } +} + +static void init_matrices(void) +{ + if (VERBOSE) printf("Initializing matrices\n"); + // I own all the blocks + init_matrix(A,A_h,MB,KB); + starpu_mpi_wait_for_all(MPI_COMM_WORLD); + init_matrix(B,B_h,KB,NB); + starpu_mpi_wait_for_all(MPI_COMM_WORLD); + init_matrix(C,C_h,MB,NB); + starpu_mpi_wait_for_all(MPI_COMM_WORLD); +} + + +int main(int argc, char *argv[]) +{ + /* Initializes StarPU and the StarPU-MPI layer */ + starpu_fxt_autostart_profiling(0); + int ret = starpu_mpi_init_conf(&argc, &argv, 1, MPI_COMM_WORLD, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_ini_conft"); + + /* Get the process rank and session size */ + starpu_mpi_comm_rank(MPI_COMM_WORLD, &comm_rank); + starpu_mpi_comm_size(MPI_COMM_WORLD, &comm_size); + + if (comm_rank == 0) printf("Launching with %d arguments\n",argc); + + if (starpu_cpu_worker_get_count() == 0) + { + FPRINTF(stderr, "We need at least 1 CPU worker.\n"); + starpu_mpi_shutdown(); + return (comm_rank == 0) ? STARPU_TEST_SKIPPED : 0; + } + + /* Parse the matrix size and block size optional args */ + // M, N, K, B, P, Q + if (argc < 8) + { + if (comm_rank == 0) fprintf(stderr, "using default sizes for arguments\n"); + } + else + { + M = atoi(argv[1]); + N = atoi(argv[2]); + K = atoi(argv[3]); + BS = atoi(argv[4]); + P = atoi(argv[5]); + Q = atoi(argv[6]); + T = atoi(argv[7]); + } + + if (BS < 1 || M % BS != 0) + { + if (comm_rank == 0) fprintf(stderr, "invalid block size\n"); + starpu_mpi_shutdown(); + return (comm_rank == 0) ? 1 : 0; + } + if (BS < 1 || N % BS != 0) + { + if (comm_rank == 0) fprintf(stderr, "invalid block size\n"); + starpu_mpi_shutdown(); + return (comm_rank == 0) ? 1 : 0; + } + if (BS < 1 || K % BS != 0) + { + if (comm_rank == 0) fprintf(stderr, "invalid block size\n"); + starpu_mpi_shutdown(); + return (comm_rank == 0) ? 1 : 0; + } + if (argc > 9) + { + if (comm_rank == 0) fprintf(stderr, "invalid argument size (reuqire 8 arguments, 9 if tracing ; given %d)\n",argc); + starpu_mpi_shutdown(); + return (comm_rank == 0) ? 1 : 0; + } + else if (argc == 9) + { + trace = 1; + } + if (P < 1 || Q < 1 || P*Q != comm_size) + { + fprintf(stderr, "invalid grid size\n"); + starpu_mpi_shutdown(); + return (comm_rank == 0) ? 1 : 0; + } + + if (comm_rank == 0) + { + printf("MxNxK = %dx%dx%d\n", M, N, K); + printf("BS = %d\n", BS); + printf("MxNxKb = %dx%dx%d\n", MB,NB,KB); + printf("comm_size = %d\n", comm_size); + printf("PxQ = %dx%d\n", P, Q); + } + int trial; + double start, stop; + if (trace) starpu_fxt_start_profiling(); + for (trial =0; trial < T; trial++) + { + alloc_matrices(); + register_matrices(); + + init_matrices(); + starpu_mpi_barrier(MPI_COMM_WORLD); + start = starpu_timing_now(); + + int b_row,b_col,b_aisle; + for (b_row = 0; b_row < MB; b_row++) + { + for (b_col = 0; b_col < NB; b_col++) + { + for (b_aisle=0;b_aisle took %f s | %f Gflop/s\n", comm_rank, timing/1000/1000, 2.0*M*N*K/(timing*1000)); + + starpu_mpi_cache_flush_all_data(MPI_COMM_WORLD); + unregister_matrices(); + free_matrices(); + } + + if (trace) starpu_fxt_stop_profiling(); + starpu_mpi_shutdown(); + return 0; +} diff --git a/mpi/examples/mpi_lu/mpi_lu-double.h b/mpi/examples/mpi_lu/mpi_lu-double.h new file mode 100644 index 0000000..2d30a7f --- /dev/null +++ b/mpi/examples/mpi_lu/mpi_lu-double.h @@ -0,0 +1,44 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#define TYPE double +#define MPI_TYPE MPI_DOUBLE +#define CUBLAS_TYPE TYPE + +#define STARPU_PLU(name) starpu_pdlu_##name + +#define CUBLAS_GEMM cublasDgemm +#define CUBLAS_TRSM cublasDtrsm +#define CUBLAS_SCAL cublasDscal +#define CUBLAS_GER cublasDger +#define CUBLAS_SWAP cublasDswap +#define CUBLAS_IAMAX cublasIdamax + +#define CPU_GEMM STARPU_DGEMM +#define CPU_GEMV STARPU_DGEMV +#define CPU_TRSM STARPU_DTRSM +#define CPU_SCAL STARPU_DSCAL +#define CPU_GER STARPU_DGER +#define CPU_SWAP STARPU_DSWAP + +#define CPU_TRMM STARPU_DTRMM +#define CPU_AXPY STARPU_DAXPY +#define CPU_ASUM STARPU_DASUM +#define CPU_IAMAX STARPU_IDAMAX + +#define PIVOT_THRESHHOLD 10e-10 + +#define ISZERO(f) (fpclassify(f) == FP_ZERO) diff --git a/mpi/examples/mpi_lu/mpi_lu-float.h b/mpi/examples/mpi_lu/mpi_lu-float.h new file mode 100644 index 0000000..1a8b4ba --- /dev/null +++ b/mpi/examples/mpi_lu/mpi_lu-float.h @@ -0,0 +1,44 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#define TYPE float +#define MPI_TYPE MPI_FLOAT +#define CUBLAS_TYPE TYPE + +#define STARPU_PLU(name) starpu_pslu_##name + +#define CUBLAS_GEMM cublasSgemm +#define CUBLAS_TRSM cublasStrsm +#define CUBLAS_SCAL cublasSscal +#define CUBLAS_GER cublasSger +#define CUBLAS_SWAP cublasSswap +#define CUBLAS_IAMAX cublasIsamax + +#define CPU_GEMM STARPU_SGEMM +#define CPU_GEMV STARPU_SGEMV +#define CPU_TRSM STARPU_STRSM +#define CPU_SCAL STARPU_SSCAL +#define CPU_GER STARPU_SGER +#define CPU_SWAP STARPU_SSWAP + +#define CPU_TRMM STARPU_STRMM +#define CPU_AXPY STARPU_SAXPY +#define CPU_ASUM STARPU_SASUM +#define CPU_IAMAX STARPU_ISAMAX + +#define PIVOT_THRESHHOLD 10e-5 + +#define ISZERO(f) (fpclassify(f) == FP_ZERO) diff --git a/mpi/examples/mpi_lu/pdlu.c b/mpi/examples/mpi_lu/pdlu.c new file mode 100644 index 0000000..5b70f1c --- /dev/null +++ b/mpi/examples/mpi_lu/pdlu.c @@ -0,0 +1,18 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "mpi_lu-double.h" +#include "pxlu.c" diff --git a/mpi/examples/mpi_lu/pdlu_implicit.c b/mpi/examples/mpi_lu/pdlu_implicit.c new file mode 100644 index 0000000..3de55b3 --- /dev/null +++ b/mpi/examples/mpi_lu/pdlu_implicit.c @@ -0,0 +1,19 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Thibaut Lambert + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "mpi_lu-double.h" +#include "pxlu_implicit.c" diff --git a/mpi/examples/mpi_lu/pdlu_kernels.c b/mpi/examples/mpi_lu/pdlu_kernels.c new file mode 100644 index 0000000..c6004b6 --- /dev/null +++ b/mpi/examples/mpi_lu/pdlu_kernels.c @@ -0,0 +1,18 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "mpi_lu-double.h" +#include "pxlu_kernels.c" diff --git a/mpi/examples/mpi_lu/plu_example.c b/mpi/examples/mpi_lu/plu_example.c new file mode 100644 index 0000000..e996dfa --- /dev/null +++ b/mpi/examples/mpi_lu/plu_example.c @@ -0,0 +1,697 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Thibaut Lambert + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "helper.h" +#include +#include +#include +#include +#include +#include + +#include "pxlu.h" +//#include "pxlu_kernels.h" + +#ifdef STARPU_HAVE_LIBNUMA +#include +#endif + +#ifdef STARPU_HAVE_VALGRIND_H +#include +#endif + +static unsigned long size = 4096; +static unsigned nblocks = 16; +static unsigned check = 0; +static int p = -1; +static int q = -1; +static unsigned display = 0; +static unsigned no_prio = 0; + +#ifdef STARPU_HAVE_LIBNUMA +static unsigned numa = 0; +#endif + +static size_t allocated_memory = 0; +static size_t allocated_memory_extra = 0; + +static starpu_data_handle_t *dataA_handles; +static TYPE **dataA; + +/* In order to implement the distributed LU decomposition, we allocate + * temporary buffers */ +#ifdef SINGLE_TMP11 +static starpu_data_handle_t tmp_11_block_handle; +static TYPE *tmp_11_block; +#else +static starpu_data_handle_t *tmp_11_block_handles; +static TYPE **tmp_11_block; +#endif +#ifdef SINGLE_TMP1221 +static starpu_data_handle_t *tmp_12_block_handles; +static TYPE **tmp_12_block; +static starpu_data_handle_t *tmp_21_block_handles; +static TYPE **tmp_21_block; +#else +static starpu_data_handle_t *(tmp_12_block_handles[2]); +static TYPE **(tmp_12_block[2]); +static starpu_data_handle_t *(tmp_21_block_handles[2]); +static TYPE **(tmp_21_block[2]); +#endif + +static void parse_args(int rank, int argc, char **argv) +{ + (void)rank; + int i; + for (i = 1; i < argc; i++) + { + if (strcmp(argv[i], "-size") == 0) + { + char *argptr; + size = strtol(argv[++i], &argptr, 10); + } + + if (strcmp(argv[i], "-nblocks") == 0) + { + char *argptr; + nblocks = strtol(argv[++i], &argptr, 10); + } + + if (strcmp(argv[i], "-check") == 0) + { + check = 1; + } + + if (strcmp(argv[i], "-display") == 0) + { + display = 1; + } + + if (strcmp(argv[i], "-numa") == 0) + { +#ifdef STARPU_HAVE_LIBNUMA + numa = 1; +#else + if (rank == 0) + fprintf(stderr, "Warning: libnuma is not available\n"); +#endif + } + + if (strcmp(argv[i], "-p") == 0) + { + char *argptr; + p = strtol(argv[++i], &argptr, 10); + } + + if (strcmp(argv[i], "-q") == 0) + { + char *argptr; + q = strtol(argv[++i], &argptr, 10); + } + + if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i], "-help") == 0 || strcmp(argv[i], "--help") == 0) + { + fprintf(stderr,"usage: %s [-size n] [-nblocks b] [-check] [-display] [-numa] [-p p] [-q q]\n", argv[0]); + fprintf(stderr,"\np * q must be equal to the number of MPI nodes\n"); + exit(0); + } + } + +#ifdef STARPU_HAVE_VALGRIND_H + if (RUNNING_ON_VALGRIND) + { + size = 4; + nblocks = 4; + } +#endif +} + +unsigned STARPU_PLU(display_flag)(void) +{ + return display; +} + +static void fill_block_with_random(TYPE *blockptr, unsigned psize, unsigned pnblocks) +{ + const unsigned block_size = (psize/pnblocks); + + unsigned i, j; + for (i = 0; i < block_size; i++) + for (j = 0; j < block_size; j++) + { + blockptr[j+i*block_size] = (TYPE)starpu_drand48(); + } +} + +#ifdef SINGLE_TMP11 +starpu_data_handle_t STARPU_PLU(get_tmp_11_block_handle)(void) +{ + return tmp_11_block_handle; +} +#else +starpu_data_handle_t STARPU_PLU(get_tmp_11_block_handle)(unsigned k) +{ + return tmp_11_block_handles[k]; +} +#endif + +#ifdef SINGLE_TMP1221 +starpu_data_handle_t STARPU_PLU(get_tmp_12_block_handle)(unsigned j) +{ + return tmp_12_block_handles[j]; +} + +starpu_data_handle_t STARPU_PLU(get_tmp_21_block_handle)(unsigned i) +{ + return tmp_21_block_handles[i]; +} +#else +starpu_data_handle_t STARPU_PLU(get_tmp_12_block_handle)(unsigned j, unsigned k) +{ + return tmp_12_block_handles[k%2][j]; +} + +starpu_data_handle_t STARPU_PLU(get_tmp_21_block_handle)(unsigned i, unsigned k) +{ + return tmp_21_block_handles[k%2][i]; +} +#endif + +static unsigned tmp_11_block_is_needed(int rank, unsigned pnblocks, unsigned k) +{ + (void)rank; + (void)pnblocks; + (void)k; + return 1; +} + +static unsigned tmp_12_block_is_needed(int rank, unsigned pnblocks, unsigned j) +{ + unsigned i; + for (i = 1; i < pnblocks; i++) + { + if (get_block_rank(i, j) == rank) + return 1; + } + + return 0; +} + +static unsigned tmp_21_block_is_needed(int rank, unsigned pnblocks, unsigned i) +{ + unsigned j; + for (j = 1; j < pnblocks; j++) + { + if (get_block_rank(i, j) == rank) + return 1; + } + + return 0; +} + +static void init_matrix(int rank) +{ +#ifdef STARPU_HAVE_LIBNUMA + if (numa) + { + fprintf(stderr, "Using INTERLEAVE policy\n"); + unsigned long nodemask = ((1<<0)|(1<<1)); + int ret = set_mempolicy(MPOL_INTERLEAVE, &nodemask, 3); + if (ret) + perror("set_mempolicy failed"); + } +#endif + + /* Allocate a grid of data handles, not all of them have to be allocated later on */ + dataA_handles = calloc(nblocks*nblocks, sizeof(starpu_data_handle_t)); + dataA = calloc(nblocks*nblocks, sizeof(TYPE *)); + allocated_memory_extra += nblocks*nblocks*(sizeof(starpu_data_handle_t) + sizeof(TYPE *)); + + size_t blocksize = (size_t)(size/nblocks)*(size/nblocks)*sizeof(TYPE); + + /* Allocate all the blocks that belong to this mpi node */ + unsigned long i,j; + for (j = 0; j < nblocks; j++) + { + for (i = 0; i < nblocks; i++) + { + TYPE **blockptr = &dataA[j+i*nblocks]; +// starpu_data_handle_t *handleptr = &dataA_handles[j+nblocks*i]; + starpu_data_handle_t *handleptr = &dataA_handles[j+nblocks*i]; + + if (get_block_rank(i, j) == rank) + { + /* This blocks should be treated by the current MPI process */ + /* Allocate and fill it */ + starpu_malloc((void **)blockptr, blocksize); + allocated_memory += blocksize; + + //fprintf(stderr, "Rank %d : fill block (i = %d, j = %d)\n", rank, i, j); + fill_block_with_random(*blockptr, size, nblocks); + //fprintf(stderr, "Rank %d : fill block (i = %d, j = %d)\n", rank, i, j); + if (i == j) + { + unsigned tmp; + for (tmp = 0; tmp < size/nblocks; tmp++) + { + (*blockptr)[tmp*((size/nblocks)+1)] += 1; + (*blockptr)[tmp*((size/nblocks)+1)] *= 100; + } + } + + /* Register it to StarPU */ + starpu_matrix_data_register(handleptr, STARPU_MAIN_RAM, + (uintptr_t)*blockptr, size/nblocks, + size/nblocks, size/nblocks, sizeof(TYPE)); + starpu_data_set_coordinates(*handleptr, 2, j, i); + } + else + { + *blockptr = STARPU_POISON_PTR; + *handleptr = STARPU_POISON_PTR; + } + } + } + + /* Allocate the temporary buffers required for the distributed algorithm */ + + unsigned k; + + /* tmp buffer 11 */ +#ifdef SINGLE_TMP11 + starpu_malloc((void **)&tmp_11_block, blocksize); + allocated_memory_extra += blocksize; + starpu_matrix_data_register(&tmp_11_block_handle, STARPU_MAIN_RAM, (uintptr_t)tmp_11_block, + size/nblocks, size/nblocks, size/nblocks, sizeof(TYPE)); +#else + tmp_11_block_handles = calloc(nblocks, sizeof(starpu_data_handle_t)); + tmp_11_block = calloc(nblocks, sizeof(TYPE *)); + allocated_memory_extra += nblocks*(sizeof(starpu_data_handle_t) + sizeof(TYPE *)); + + for (k = 0; k < nblocks; k++) + { + if (tmp_11_block_is_needed(rank, nblocks, k)) + { + starpu_malloc((void **)&tmp_11_block[k], blocksize); + allocated_memory_extra += blocksize; + STARPU_ASSERT(tmp_11_block[k]); + + starpu_matrix_data_register(&tmp_11_block_handles[k], STARPU_MAIN_RAM, + (uintptr_t)tmp_11_block[k], + size/nblocks, size/nblocks, size/nblocks, sizeof(TYPE)); + } + } +#endif + + /* tmp buffers 12 and 21 */ +#ifdef SINGLE_TMP1221 + tmp_12_block_handles = calloc(nblocks, sizeof(starpu_data_handle_t)); + tmp_21_block_handles = calloc(nblocks, sizeof(starpu_data_handle_t)); + tmp_12_block = calloc(nblocks, sizeof(TYPE *)); + tmp_21_block = calloc(nblocks, sizeof(TYPE *)); + + allocated_memory_extra += 2*nblocks*(sizeof(starpu_data_handle_t) + sizeof(TYPE *)); +#else + for (i = 0; i < 2; i++) + { + tmp_12_block_handles[i] = calloc(nblocks, sizeof(starpu_data_handle_t)); + tmp_21_block_handles[i] = calloc(nblocks, sizeof(starpu_data_handle_t)); + tmp_12_block[i] = calloc(nblocks, sizeof(TYPE *)); + tmp_21_block[i] = calloc(nblocks, sizeof(TYPE *)); + + allocated_memory_extra += 2*nblocks*(sizeof(starpu_data_handle_t) + sizeof(TYPE *)); + } +#endif + + for (k = 0; k < nblocks; k++) + { +#ifdef SINGLE_TMP1221 + if (tmp_12_block_is_needed(rank, nblocks, k)) + { + starpu_malloc((void **)&tmp_12_block[k], blocksize); + allocated_memory_extra += blocksize; + STARPU_ASSERT(tmp_12_block[k]); + + starpu_matrix_data_register(&tmp_12_block_handles[k], STARPU_MAIN_RAM, + (uintptr_t)tmp_12_block[k], + size/nblocks, size/nblocks, size/nblocks, sizeof(TYPE)); + } + + if (tmp_21_block_is_needed(rank, nblocks, k)) + { + starpu_malloc((void **)&tmp_21_block[k], blocksize); + allocated_memory_extra += blocksize; + STARPU_ASSERT(tmp_21_block[k]); + + starpu_matrix_data_register(&tmp_21_block_handles[k], STARPU_MAIN_RAM, + (uintptr_t)tmp_21_block[k], + size/nblocks, size/nblocks, size/nblocks, sizeof(TYPE)); + } +#else + for (i = 0; i < 2; i++) + { + if (tmp_12_block_is_needed(rank, nblocks, k)) + { + starpu_malloc((void **)&tmp_12_block[i][k], blocksize); + allocated_memory_extra += blocksize; + STARPU_ASSERT(tmp_12_block[i][k]); + + starpu_matrix_data_register(&tmp_12_block_handles[i][k], STARPU_MAIN_RAM, + (uintptr_t)tmp_12_block[i][k], + size/nblocks, size/nblocks, size/nblocks, sizeof(TYPE)); + } + + if (tmp_21_block_is_needed(rank, nblocks, k)) + { + starpu_malloc((void **)&tmp_21_block[i][k], blocksize); + allocated_memory_extra += blocksize; + STARPU_ASSERT(tmp_21_block[i][k]); + + starpu_matrix_data_register(&tmp_21_block_handles[i][k], STARPU_MAIN_RAM, + (uintptr_t)tmp_21_block[i][k], + size/nblocks, size/nblocks, size/nblocks, sizeof(TYPE)); + } + } +#endif + } + + //display_all_blocks(nblocks, size/nblocks); +} + +TYPE *STARPU_PLU(get_block)(unsigned i, unsigned j) +{ + return dataA[j+i*nblocks]; +} + +int get_block_rank(unsigned i, unsigned j) +{ + /* Take a 2D block cyclic distribution */ + /* NB: p (resp. q) is for "direction" i (resp. j) */ + return (j % q) * p + (i % p); +} + +starpu_data_handle_t STARPU_PLU(get_block_handle)(unsigned i, unsigned j) +{ + return dataA_handles[j+i*nblocks]; +} + +static void display_grid(int rank, unsigned pnblocks) +{ + if (!display) + return; + + //if (rank == 0) + { + fprintf(stderr, "2D grid layout (Rank %d): \n", rank); + + unsigned i, j; + for (j = 0; j < pnblocks; j++) + { + for (i = 0; i < pnblocks; i++) + { + TYPE *blockptr = STARPU_PLU(get_block)(i, j); + starpu_data_handle_t handle = STARPU_PLU(get_block_handle)(i, j); + + fprintf(stderr, "%d (data %p handle %p)", get_block_rank(i, j), blockptr, handle); + } + fprintf(stderr, "\n"); + } + } +} + +int main(int argc, char **argv) +{ + int rank; + int world_size; + int ret; + unsigned i, j, k; + + /* + * Initialization + */ + int thread_support; + if (MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &thread_support) != MPI_SUCCESS) + { + fprintf(stderr,"MPI_Init_thread failed\n"); + exit(1); + } + if (thread_support == MPI_THREAD_FUNNELED) + fprintf(stderr,"Warning: MPI only has funneled thread support, not serialized, hoping this will work\n"); + if (thread_support < MPI_THREAD_FUNNELED) + fprintf(stderr,"Warning: MPI does not have thread support!\n"); + + starpu_srand48((long int)time(NULL)); + + parse_args(rank, argc, argv); + + ret = starpu_mpi_init_conf(NULL, NULL, 0, MPI_COMM_WORLD, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); + starpu_mpi_comm_size(MPI_COMM_WORLD, &world_size); + + /* We disable sequential consistency in this example */ + starpu_data_set_default_sequential_consistency_flag(0); + + if (p == -1 && q==-1) + { + fprintf(stderr, "Setting default values for p and q\n"); + p = (q % 2 == 0) ? 2 : 1; + q = world_size / p; + + } + STARPU_ASSERT_MSG(p*q == world_size, "p=%d, q=%d, world_size=%d\n", p, q, world_size); + + starpu_cublas_init(); + + int barrier_ret = MPI_Barrier(MPI_COMM_WORLD); + STARPU_ASSERT(barrier_ret == MPI_SUCCESS); + + /* + * Problem Init + */ + + init_matrix(rank); + + fprintf(stderr, "Rank %d: allocated (%d + %d) MB = %d MB\n", rank, + (int)(allocated_memory/(1024*1024)), + (int)(allocated_memory_extra/(1024*1024)), + (int)((allocated_memory+allocated_memory_extra)/(1024*1024))); + + display_grid(rank, nblocks); + + TYPE *a_r = NULL; +// STARPU_PLU(display_data_content)(a_r, size); + + if (check) + { + TYPE *x, *y; + + x = calloc(size, sizeof(TYPE)); + STARPU_ASSERT(x); + + y = calloc(size, sizeof(TYPE)); + STARPU_ASSERT(y); + + if (rank == 0) + { + unsigned ind; + for (ind = 0; ind < size; ind++) + x[ind] = (TYPE)starpu_drand48(); + } + + a_r = STARPU_PLU(reconstruct_matrix)(size, nblocks); + + if (rank == 0) + STARPU_PLU(display_data_content)(a_r, size); + +// STARPU_PLU(compute_ax)(size, x, y, nblocks, rank); + + free(x); + free(y); + } + + barrier_ret = MPI_Barrier(MPI_COMM_WORLD); + STARPU_ASSERT(barrier_ret == MPI_SUCCESS); + + double timing = STARPU_PLU(plu_main)(nblocks, rank, world_size, no_prio); + + /* + * Report performance + */ + + int reduce_ret; + double min_timing = timing; + double max_timing = timing; + double sum_timing = timing; + + reduce_ret = MPI_Reduce(&timing, &min_timing, 1, MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD); + STARPU_ASSERT(reduce_ret == MPI_SUCCESS); + + reduce_ret = MPI_Reduce(&timing, &max_timing, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); + STARPU_ASSERT(reduce_ret == MPI_SUCCESS); + + reduce_ret = MPI_Reduce(&timing, &sum_timing, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); + STARPU_ASSERT(reduce_ret == MPI_SUCCESS); + + if (rank == 0) + { + fprintf(stderr, "Computation took: %f ms\n", max_timing/1000); + fprintf(stderr, "\tMIN : %f ms\n", min_timing/1000); + fprintf(stderr, "\tMAX : %f ms\n", max_timing/1000); + fprintf(stderr, "\tAVG : %f ms\n", sum_timing/(world_size*1000)); + + unsigned n = size; + double flop = (2.0f*n*n*n)/3.0f; + fprintf(stderr, "Synthetic GFlops : %2.2f\n", (flop/max_timing/1000.0f)); + } + + /* + * Test Result Correctness + */ + + if (check) + { + /* + * Compute || A - LU || + */ + + STARPU_PLU(compute_lu_matrix)(size, nblocks, a_r); + +#if 0 + /* + * Compute || Ax - LUx || + */ + + unsigned ind; + + y2 = calloc(size, sizeof(TYPE)); + STARPU_ASSERT(y); + + if (rank == 0) + { + for (ind = 0; ind < size; ind++) + { + y2[ind] = (TYPE)0.0; + } + } + + STARPU_PLU(compute_lux)(size, x, y2, nblocks, rank); + + /* Compute y2 = y2 - y */ + CPU_AXPY(size, -1.0, y, 1, y2, 1); + + TYPE err = CPU_ASUM(size, y2, 1); + int max = CPU_IAMAX(size, y2, 1); + + fprintf(stderr, "(A - LU)X Avg error : %e\n", err/(size*size)); + fprintf(stderr, "(A - LU)X Max error : %e\n", y2[max]); +#endif + } + + /* + * Termination + */ + size_t blocksize = (size_t)(size/nblocks)*(size/nblocks)*sizeof(TYPE); + for (j = 0; j < nblocks; j++) + { + for (i = 0; i < nblocks; i++) + { + starpu_data_handle_t handle = dataA_handles[j+nblocks*i]; + if (handle != STARPU_POISON_PTR) + starpu_data_unregister(handle); + TYPE *blockptr = dataA[j+i*nblocks]; + if (blockptr != STARPU_POISON_PTR) + starpu_free_noflag(blockptr, blocksize); + } + } + free(dataA_handles); + free(dataA); + +#ifdef SINGLE_TMP11 + starpu_data_unregister(tmp_11_block_handle); + starpu_free_noflag(tmp_11_block, blocksize); +#else + for (k = 0; k < nblocks; k++) + { + if (tmp_11_block_is_needed(rank, nblocks, k)) + { + starpu_data_unregister(tmp_11_block_handles[k]); + starpu_free_noflag(tmp_11_block[k], blocksize); + } + } + free(tmp_11_block_handles); + free(tmp_11_block); +#endif + + for (k = 0; k < nblocks; k++) + { +#ifdef SINGLE_TMP1221 + if (tmp_12_block_is_needed(rank, nblocks, k)) + { + starpu_data_unregister(tmp_12_block_handles); + starpu_free_noflag(tmp_12_block[k], blocksize); + } + + if (tmp_21_block_is_needed(rank, nblocks, k)) + { + starpu_data_unregister(tmp_21_block_handles[k]); + starpu_free_noflag(tmp_21_block[k], blocksize); + } +#else + for (i = 0; i < 2; i++) + { + if (tmp_12_block_is_needed(rank, nblocks, k)) + { + starpu_data_unregister(tmp_12_block_handles[i][k]); + starpu_free_noflag(tmp_12_block[i][k], blocksize); + } + + if (tmp_21_block_is_needed(rank, nblocks, k)) + { + starpu_data_unregister(tmp_21_block_handles[i][k]); + starpu_free_noflag(tmp_21_block[i][k], blocksize); + } + } +#endif + } + +#ifdef SINGLE_TMP1221 + free(tmp_12_block_handles); + free(tmp_21_block_handles); + free(tmp_12_block); + free(tmp_21_block); +#else + for (i = 0; i < 2; i++) + { + free(tmp_12_block_handles[i]); + free(tmp_21_block_handles[i]); + free(tmp_12_block[i]); + free(tmp_21_block[i]); + } +#endif + + barrier_ret = MPI_Barrier(MPI_COMM_WORLD); + STARPU_ASSERT(barrier_ret == MPI_SUCCESS); + + starpu_cublas_shutdown(); + starpu_mpi_shutdown(); + + MPI_Finalize(); + + return 0; +} diff --git a/mpi/examples/mpi_lu/plu_example_double.c b/mpi/examples/mpi_lu/plu_example_double.c new file mode 100644 index 0000000..2daef17 --- /dev/null +++ b/mpi/examples/mpi_lu/plu_example_double.c @@ -0,0 +1,18 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "mpi_lu-double.h" +#include "plu_example.c" diff --git a/mpi/examples/mpi_lu/plu_example_float.c b/mpi/examples/mpi_lu/plu_example_float.c new file mode 100644 index 0000000..9b77329 --- /dev/null +++ b/mpi/examples/mpi_lu/plu_example_float.c @@ -0,0 +1,18 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "mpi_lu-float.h" +#include "plu_example.c" diff --git a/mpi/examples/mpi_lu/plu_implicit_example.c b/mpi/examples/mpi_lu/plu_implicit_example.c new file mode 100644 index 0000000..17c3335 --- /dev/null +++ b/mpi/examples/mpi_lu/plu_implicit_example.c @@ -0,0 +1,400 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Thibaut Lambert + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "helper.h" +#include +#include +#include +#include +#include +#include + +#include "pxlu.h" +//#include "pxlu_kernels.h" + +#ifdef STARPU_HAVE_LIBNUMA +#include +#endif + +#ifdef STARPU_HAVE_VALGRIND_H +#include +#endif + +static unsigned long size = 4096; +static unsigned nblocks = 16; +static unsigned check = 0; +static int p = -1; +static int q = -1; +static unsigned display = 0; +static unsigned no_prio = 0; + +#ifdef STARPU_HAVE_LIBNUMA +static unsigned numa = 0; +#endif + +static size_t allocated_memory = 0; +static size_t allocated_memory_extra = 0; + +static starpu_data_handle_t *dataA_handles; +static TYPE **dataA; + +int get_block_rank(unsigned i, unsigned j); + +static void parse_args(int argc, char **argv) +{ + int i; + for (i = 1; i < argc; i++) + { + if (strcmp(argv[i], "-size") == 0) + { + char *argptr; + size = strtol(argv[++i], &argptr, 10); + } + + if (strcmp(argv[i], "-nblocks") == 0) + { + char *argptr; + nblocks = strtol(argv[++i], &argptr, 10); + } + + if (strcmp(argv[i], "-check") == 0) + { + check = 1; + } + + if (strcmp(argv[i], "-display") == 0) + { + display = 1; + } + + if (strcmp(argv[i], "-numa") == 0) + { +#ifdef STARPU_HAVE_LIBNUMA + numa = 1; +#else + fprintf(stderr, "Warning: libnuma is not available\n"); +#endif + } + + if (strcmp(argv[i], "-p") == 0) + { + char *argptr; + p = strtol(argv[++i], &argptr, 10); + } + + if (strcmp(argv[i], "-q") == 0) + { + char *argptr; + q = strtol(argv[++i], &argptr, 10); + } + + if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i], "-help") == 0 || strcmp(argv[i], "--help") == 0) + { + fprintf(stderr,"usage: %s [-size n] [-nblocks b] [-check] [-display] [-numa] [-p p] [-q q]\n", argv[0]); + fprintf(stderr,"\np * q must be equal to the number of MPI nodes\n"); + exit(0); + } + } + +#ifdef STARPU_HAVE_VALGRIND_H + if (RUNNING_ON_VALGRIND) + { + size = 4; + nblocks = 4; + } +#endif +} + +unsigned STARPU_PLU(display_flag)(void) +{ + return display; +} + +static void fill_block_with_random(TYPE *blockptr, unsigned psize, unsigned pnblocks) +{ + const unsigned block_size = (psize/pnblocks); + + unsigned i, j; + for (i = 0; i < block_size; i++) + for (j = 0; j < block_size; j++) + { + blockptr[j+i*block_size] = (TYPE)starpu_drand48(); + } +} + +static void init_matrix(int rank) +{ +#ifdef STARPU_HAVE_LIBNUMA + if (numa) + { + fprintf(stderr, "Using INTERLEAVE policy\n"); + unsigned long nodemask = ((1<<0)|(1<<1)); + int ret = set_mempolicy(MPOL_INTERLEAVE, &nodemask, 3); + if (ret) + perror("set_mempolicy failed"); + } +#endif + + /* Allocate a grid of data handles, not all of them have to be allocated later on */ + dataA_handles = calloc(nblocks*nblocks, sizeof(starpu_data_handle_t)); + dataA = calloc(nblocks*nblocks, sizeof(TYPE *)); + allocated_memory_extra += nblocks*nblocks*(sizeof(starpu_data_handle_t) + sizeof(TYPE *)); + + size_t blocksize = (size_t)(size/nblocks)*(size/nblocks)*sizeof(TYPE); + + /* Allocate all the blocks that belong to this mpi node */ + unsigned long i,j; + for (j = 0; j < nblocks; j++) + { + for (i = 0; i < nblocks; i++) + { + int block_rank = get_block_rank(i, j); + TYPE **blockptr = &dataA[j+i*nblocks]; +// starpu_data_handle_t *handleptr = &dataA_handles[j+nblocks*i]; + starpu_data_handle_t *handleptr = &dataA_handles[j+nblocks*i]; + + if (block_rank == rank) + { + /* This blocks should be treated by the current MPI process */ + /* Allocate and fill it */ + starpu_malloc((void **)blockptr, blocksize); + allocated_memory += blocksize; + + //fprintf(stderr, "Rank %d : fill block (i = %d, j = %d)\n", rank, i, j); + fill_block_with_random(*blockptr, size, nblocks); + //fprintf(stderr, "Rank %d : fill block (i = %d, j = %d)\n", rank, i, j); + if (i == j) + { + unsigned tmp; + for (tmp = 0; tmp < size/nblocks; tmp++) + { + (*blockptr)[tmp*((size/nblocks)+1)] += (TYPE)10*nblocks; + } + } + + /* Register it to StarPU */ + starpu_matrix_data_register(handleptr, STARPU_MAIN_RAM, + (uintptr_t)*blockptr, size/nblocks, + size/nblocks, size/nblocks, sizeof(TYPE)); + } + else + { + starpu_matrix_data_register(handleptr, -1, + 0, size/nblocks, + size/nblocks, size/nblocks, sizeof(TYPE)); + *blockptr = STARPU_POISON_PTR; + } + starpu_data_set_coordinates(*handleptr, 2, j, i); + starpu_mpi_data_register(*handleptr, j+i*nblocks, block_rank); + } + } + + //display_all_blocks(nblocks, size/nblocks); +} + +TYPE *STARPU_PLU(get_block)(unsigned i, unsigned j) +{ + return dataA[j+i*nblocks]; +} + +int get_block_rank(unsigned i, unsigned j) +{ + /* Take a 2D block cyclic distribution */ + /* NB: p (resp. q) is for "direction" i (resp. j) */ + return (j % q) * p + (i % p); +} + +starpu_data_handle_t STARPU_PLU(get_block_handle)(unsigned i, unsigned j) +{ + return dataA_handles[j+i*nblocks]; +} + +static void display_grid(int rank, unsigned pnblocks) +{ + if (!display) + return; + + //if (rank == 0) + { + fprintf(stderr, "2D grid layout (Rank %d): \n", rank); + + unsigned i, j; + for (j = 0; j < pnblocks; j++) + { + for (i = 0; i < pnblocks; i++) + { + TYPE *blockptr = STARPU_PLU(get_block)(i, j); + starpu_data_handle_t handle = STARPU_PLU(get_block_handle)(i, j); + + fprintf(stderr, "%d (data %p handle %p)", get_block_rank(i, j), blockptr, handle); + } + fprintf(stderr, "\n"); + } + } +} + +int main(int argc, char **argv) +{ + int rank; + int world_size; + int ret; + unsigned i, j; + starpu_srand48((long int)time(NULL)); + + parse_args(argc, argv); + + ret = starpu_mpi_init_conf(&argc, &argv, 1, MPI_COMM_WORLD, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); + starpu_mpi_comm_size(MPI_COMM_WORLD, &world_size); + + if (p == -1 && q==-1) + { + fprintf(stderr, "Setting default values for p and q\n"); + p = (q % 2 == 0) ? 2 : 1; + q = world_size / p; + + } + STARPU_ASSERT_MSG(p*q == world_size, "p=%d, q=%d, world_size=%d\n", p, q, world_size); + + starpu_cublas_init(); + + /* + * Problem Init + */ + + init_matrix(rank); + + fprintf(stderr, "Rank %d: allocated (%d + %d) MB = %d MB\n", rank, + (int)(allocated_memory/(1024*1024)), + (int)(allocated_memory_extra/(1024*1024)), + (int)((allocated_memory+allocated_memory_extra)/(1024*1024))); + + display_grid(rank, nblocks); + + TYPE *a_r = NULL; +// STARPU_PLU(display_data_content)(a_r, size); + + if (check) + { + TYPE *x, *y; + + x = calloc(size, sizeof(TYPE)); + STARPU_ASSERT(x); + + y = calloc(size, sizeof(TYPE)); + STARPU_ASSERT(y); + + if (rank == 0) + { + unsigned ind; + for (ind = 0; ind < size; ind++) + x[ind] = (TYPE)starpu_drand48(); + } + + a_r = STARPU_PLU(reconstruct_matrix)(size, nblocks); + + if (rank == 0) + STARPU_PLU(display_data_content)(a_r, size); + +// STARPU_PLU(compute_ax)(size, x, y, nblocks, rank); + + free(x); + free(y); + } + + double timing = STARPU_PLU(plu_main)(nblocks, rank, world_size, no_prio); + + /* + * Report performance + */ + + if (rank == 0) + { + fprintf(stderr, "Computation took: %f ms\n", timing/1000); + + unsigned n = size; + double flop = (2.0f*n*n*n)/3.0f; + fprintf(stderr, "Synthetic GFlops : %2.2f\n", (flop/timing/1000.0f)); + } + + /* + * Test Result Correctness + */ + + if (check) + { + /* + * Compute || A - LU || + */ + + STARPU_PLU(compute_lu_matrix)(size, nblocks, a_r); + +#if 0 + /* + * Compute || Ax - LUx || + */ + + unsigned ind; + + y2 = calloc(size, sizeof(TYPE)); + STARPU_ASSERT(y); + + if (rank == 0) + { + for (ind = 0; ind < size; ind++) + { + y2[ind] = (TYPE)0.0; + } + } + + STARPU_PLU(compute_lux)(size, x, y2, nblocks, rank); + + /* Compute y2 = y2 - y */ + CPU_AXPY(size, -1.0, y, 1, y2, 1); + + TYPE err = CPU_ASUM(size, y2, 1); + int max = CPU_IAMAX(size, y2, 1); + + fprintf(stderr, "(A - LU)X Avg error : %e\n", err/(size*size)); + fprintf(stderr, "(A - LU)X Max error : %e\n", y2[max]); +#endif + } + + /* + * Termination + */ + size_t blocksize = (size_t)(size/nblocks)*(size/nblocks)*sizeof(TYPE); + for (j = 0; j < nblocks; j++) + { + for (i = 0; i < nblocks; i++) + { + starpu_data_unregister(dataA_handles[j+nblocks*i]); + TYPE *blockptr = dataA[j+i*nblocks]; + if (blockptr != STARPU_POISON_PTR) + starpu_free_noflag(blockptr, blocksize); + } + } + free(dataA_handles); + free(dataA); + + starpu_cublas_shutdown(); + starpu_mpi_shutdown(); + + return 0; +} diff --git a/mpi/examples/mpi_lu/plu_implicit_example_double.c b/mpi/examples/mpi_lu/plu_implicit_example_double.c new file mode 100644 index 0000000..08e8031 --- /dev/null +++ b/mpi/examples/mpi_lu/plu_implicit_example_double.c @@ -0,0 +1,19 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Thibaut Lambert + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "mpi_lu-double.h" +#include "plu_implicit_example.c" diff --git a/mpi/examples/mpi_lu/plu_implicit_example_float.c b/mpi/examples/mpi_lu/plu_implicit_example_float.c new file mode 100644 index 0000000..b2a8dc1 --- /dev/null +++ b/mpi/examples/mpi_lu/plu_implicit_example_float.c @@ -0,0 +1,19 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Thibaut Lambert + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "mpi_lu-float.h" +#include "plu_implicit_example.c" diff --git a/mpi/examples/mpi_lu/plu_outofcore_example.c b/mpi/examples/mpi_lu/plu_outofcore_example.c new file mode 100644 index 0000000..dc900b1 --- /dev/null +++ b/mpi/examples/mpi_lu/plu_outofcore_example.c @@ -0,0 +1,515 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Thibaut Lambert + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "helper.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "pxlu.h" +//#include "pxlu_kernels.h" + +#ifdef STARPU_HAVE_LIBNUMA +#include +#endif + +#ifdef STARPU_HAVE_VALGRIND_H +#include +#endif + +static unsigned long size = 4096; +static unsigned nblocks = 16; +static size_t blocksize; +static unsigned check = 0; +static int p = -1; +static int q = -1; +static unsigned display = 0; +static unsigned no_prio = 0; + +#ifdef STARPU_HAVE_LIBNUMA +static unsigned numa = 0; +#endif + +unsigned bound = 0; +unsigned bounddeps = 0; +unsigned boundprio = 0; + +static size_t allocated_memory = 0; + +static starpu_data_handle_t *dataA_handles; +static void **disk_objs; + +static int disk_node; + +int get_block_rank(unsigned i, unsigned j); + +static void parse_args(int argc, char **argv) +{ + int i; + for (i = 1; i < argc; i++) + { + if (strcmp(argv[i], "-size") == 0) + { + char *argptr; + size = strtol(argv[++i], &argptr, 10); + } + + if (strcmp(argv[i], "-nblocks") == 0) + { + char *argptr; + nblocks = strtol(argv[++i], &argptr, 10); + } + + if (strcmp(argv[i], "-check") == 0) + { + check = 1; + } + + if (strcmp(argv[i], "-display") == 0) + { + display = 1; + } + + if (strcmp(argv[i], "-numa") == 0) + { +#ifdef STARPU_HAVE_LIBNUMA + numa = 1; +#else + fprintf(stderr, "Warning: libnuma is not available\n"); +#endif + } + + if (strcmp(argv[i], "-p") == 0) + { + char *argptr; + p = strtol(argv[++i], &argptr, 10); + } + + if (strcmp(argv[i], "-q") == 0) + { + char *argptr; + q = strtol(argv[++i], &argptr, 10); + } + + if (strcmp(argv[i], "-path") == 0) + { + path = argv[++i]; + } + + if (strcmp(argv[i], "-bound") == 0) + { + bound = 1; + } + if (strcmp(argv[i], "-bounddeps") == 0) + { + bound = 1; + bounddeps = 1; + } + if (strcmp(argv[i], "-bounddepsprio") == 0) + { + bound = 1; + bounddeps = 1; + boundprio = 1; + } + + if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i], "-help") == 0 || strcmp(argv[i], "--help") == 0) + { + fprintf(stderr,"usage: %s [-size n] [-nblocks b] [-check] [-display] [-numa] [-p p] [-q q] [-path PATH] [-bound] [-bounddeps] [-bounddepsprio]\n", argv[0]); + fprintf(stderr,"\np * q must be equal to the number of MPI nodes\n"); + exit(0); + } + } + +#ifdef STARPU_HAVE_VALGRIND_H + if (RUNNING_ON_VALGRIND) + { + size = 4; + nblocks = 4; + } +#endif +} + +unsigned STARPU_PLU(display_flag)(void) +{ + return display; +} + +static void fill_block_with_random(TYPE *blockptr, unsigned psize, unsigned pnblocks) +{ + const unsigned block_size = (psize/pnblocks); + + unsigned i, j; + for (i = 0; i < block_size; i++) + for (j = 0; j < block_size; j++) + { + blockptr[j+i*block_size] = (TYPE)starpu_drand48(); + } +} + +static void create_matrix() +{ + TYPE *blockptr = malloc(blocksize); + int fd; + char *filename; + unsigned filename_length = strlen(path) + 1 + sizeof(nblocks)*3 + 1 + sizeof(nblocks)*3 + 1; + + filename = malloc(filename_length); + + allocated_memory += nblocks*nblocks*blocksize; + + /* Create the whole matrix on the disk */ + unsigned i,j; + for (j = 0; j < nblocks; j++) + { + for (i = 0; i < nblocks; i++) + { + fill_block_with_random(blockptr, size, nblocks); + if (i == j) + { + unsigned tmp; + for (tmp = 0; tmp < size/nblocks; tmp++) + { + blockptr[tmp*((size/nblocks)+1)] += (TYPE)10*nblocks; + } + } + snprintf(filename, filename_length, "%s/%u,%u", path, i, j); + fd = open(filename, O_WRONLY|O_CREAT|O_TRUNC, 0777); + if (fd < 0) + { + perror("open"); + exit(1); + } + if (write(fd, blockptr, blocksize) != (starpu_ssize_t) blocksize) + { + fprintf(stderr,"short write"); + exit(1); + } + if (close(fd) < 0) + { + perror("close"); + exit(1); + } + } + } + + free(blockptr); + free(filename); +} + +static void init_matrix(int rank) +{ + /* Allocate a grid of data handles, not all of them have to be allocated later on */ + dataA_handles = calloc(nblocks*nblocks, sizeof(starpu_data_handle_t)); + disk_objs = calloc(nblocks*nblocks, sizeof(*disk_objs)); + + disk_node = starpu_disk_register(&starpu_disk_unistd_ops, path, STARPU_MAX(16*1024*1024, size*size*sizeof(TYPE))); + assert(disk_node >= 0); + + char filename[sizeof(nblocks)*3 + 1 + sizeof(nblocks)*3 + 1]; + + /* Allocate all the blocks that belong to this mpi node */ + unsigned i,j; + for (j = 0; j < nblocks; j++) + { + for (i = 0; i < nblocks; i++) + { + int block_rank = get_block_rank(i, j); +// starpu_data_handle_t *handleptr = &dataA_handles[j+nblocks*i]; + starpu_data_handle_t *handleptr = &dataA_handles[j+nblocks*i]; + + if (block_rank == rank) + { + snprintf(filename, sizeof(filename), "%u,%u", i, j); + /* Register it to StarPU */ + disk_objs[j+nblocks*i] = starpu_disk_open(disk_node, filename, blocksize); + if (!disk_objs[j+nblocks*i]) + { + fprintf(stderr,"could not open %s\n", filename); + exit(1); + } + starpu_matrix_data_register(handleptr, disk_node, + (uintptr_t) disk_objs[j+nblocks*i], size/nblocks, + size/nblocks, size/nblocks, sizeof(TYPE)); + starpu_data_acquire_on_node(*handleptr, STARPU_MAIN_RAM, STARPU_W); + void *interface = starpu_data_get_interface_on_node(*handleptr, STARPU_MAIN_RAM); + TYPE *data = (void*) STARPU_MATRIX_GET_PTR(interface); + fill_block_with_random(data, size, nblocks); + if (i == j) + { + unsigned tmp; + for (tmp = 0; tmp < size/nblocks; tmp++) + { + data[tmp*((size/nblocks)+1)] += 1; + data[tmp*((size/nblocks)+1)] *= 100; + } + } + starpu_data_release_on_node(*handleptr, STARPU_MAIN_RAM); + } + else + { + disk_objs[j+nblocks*i] = NULL; + starpu_matrix_data_register(handleptr, -1, + 0, size/nblocks, + size/nblocks, size/nblocks, sizeof(TYPE)); + } + starpu_data_set_coordinates(*handleptr, 2, j, i); + starpu_mpi_data_register(*handleptr, j+i*nblocks, block_rank); + } + } + + //display_all_blocks(nblocks, size/nblocks); +} + +static void destroy_matrix(int rank) +{ + char *filename; + unsigned filename_length = strlen(path) + 1 + sizeof(nblocks)*3 + 1 + sizeof(nblocks)*3 + 1; + unsigned i,j; + + filename = malloc(filename_length); + + for (j = 0; j < nblocks; j++) + { + for (i = 0; i < nblocks; i++) + { + int block_rank = get_block_rank(i, j); + if (block_rank == rank) + { + snprintf(filename, filename_length, "%s/%u,%u", path, i, j); + unlink(filename); + } + } + } + + free(filename); + + rmdir(path); +} + +TYPE *STARPU_PLU(get_block)(unsigned i, unsigned j) +{ + (void)i; + (void)j; + /* This does not really make sense in out of core */ + assert(0); +} + +int get_block_rank(unsigned i, unsigned j) +{ + /* Take a 2D block cyclic distribution */ + /* NB: p (resp. q) is for "direction" i (resp. j) */ + return (j % q) * p + (i % p); +} + +starpu_data_handle_t STARPU_PLU(get_block_handle)(unsigned i, unsigned j) +{ + return dataA_handles[j+i*nblocks]; +} + +#if STARPU_MAXNODES == 1 +/* Cannot register a disk */ +int main(int argc, char **argv) +{ + return STARPU_TEST_SKIPPED; +} +#else +int main(int argc, char **argv) +{ + int rank; + int world_size; + int ret; + unsigned i, j; + + starpu_srand48((long int)time(NULL)); + + parse_args(argc, argv); + + blocksize = (size_t)(size/nblocks)*(size/nblocks)*sizeof(TYPE); + + ret = mkdir(path, 0777); + if (ret != 0 && errno != EEXIST) + { + fprintf(stderr,"%s does not exist\n", path); + exit(1); + } + + ret = starpu_mpi_init_conf(&argc, &argv, 1, MPI_COMM_WORLD, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); + starpu_mpi_comm_size(MPI_COMM_WORLD, &world_size); + + if (p == -1 && q==-1) + { + fprintf(stderr, "Setting default values for p and q\n"); + p = (q % 2 == 0) ? 2 : 1; + q = world_size / p; + + } + STARPU_ASSERT_MSG(p*q == world_size, "p=%d, q=%d, world_size=%d\n", p, q, world_size); + + starpu_cublas_init(); + + /* + * Problem Init + */ + + if (rank == 0) + create_matrix(); + + starpu_mpi_barrier(MPI_COMM_WORLD); + + init_matrix(rank); + + if (rank == 0) + fprintf(stderr, "%dMB on disk\n", (int)(allocated_memory/(1024*1024))); + + TYPE *a_r = NULL; +// STARPU_PLU(display_data_content)(a_r, size); + + if (check) + { + TYPE *x, *y; + + x = calloc(size, sizeof(TYPE)); + STARPU_ASSERT(x); + + y = calloc(size, sizeof(TYPE)); + STARPU_ASSERT(y); + + if (rank == 0) + { + unsigned ind; + for (ind = 0; ind < size; ind++) + x[ind] = (TYPE)starpu_drand48(); + } + + a_r = STARPU_PLU(reconstruct_matrix)(size, nblocks); + + if (rank == 0) + STARPU_PLU(display_data_content)(a_r, size); + +// STARPU_PLU(compute_ax)(size, x, y, nblocks, rank); + + free(x); + free(y); + } + + if (bound) + starpu_bound_start(bounddeps, boundprio); + + double timing = STARPU_PLU(plu_main)(nblocks, rank, world_size, no_prio); + + if (bound) + starpu_bound_stop(); + + /* + * Report performance + */ + + if (rank == 0) + { + fprintf(stderr, "Computation took: %f ms\n", timing/1000); + + unsigned n = size; + double flop = (2.0f*n*n*n)/3.0f; + printf("# size\tms\tGFlops"); fflush(stdout); + if (bound) + printf("\tTms\tTGFlops"); + printf("\n"); + printf("%u\t%.0f\t%2.2f", n, timing/1000, (flop/timing/1000.0f)); + if (bound) + { + double min; + starpu_bound_compute(&min, NULL, 0); + printf("\t%.0f\t%.1f", min, flop/min/1000000.0f); + } + printf("\n"); + } + + /* + * Test Result Correctness + */ + + if (check) + { + /* + * Compute || A - LU || + */ + + STARPU_PLU(compute_lu_matrix)(size, nblocks, a_r); + +#if 0 + /* + * Compute || Ax - LUx || + */ + + unsigned ind; + + y2 = calloc(size, sizeof(TYPE)); + STARPU_ASSERT(y); + + if (rank == 0) + { + for (ind = 0; ind < size; ind++) + { + y2[ind] = (TYPE)0.0; + } + } + + STARPU_PLU(compute_lux)(size, x, y2, nblocks, rank); + + /* Compute y2 = y2 - y */ + CPU_AXPY(size, -1.0, y, 1, y2, 1); + + TYPE err = CPU_ASUM(size, y2, 1); + int max = CPU_IAMAX(size, y2, 1); + + fprintf(stderr, "(A - LU)X Avg error : %e\n", err/(size*size)); + fprintf(stderr, "(A - LU)X Max error : %e\n", y2[max]); +#endif + } + + /* + * Termination + */ + for (j = 0; j < nblocks; j++) + { + for (i = 0; i < nblocks; i++) + { + starpu_data_unregister(dataA_handles[j+nblocks*i]); + if (disk_objs[j+nblocks*i]) + starpu_disk_close(disk_node, disk_objs[j+nblocks*i], blocksize); + } + } + free(dataA_handles); + free(disk_objs); + + destroy_matrix(rank); + + starpu_cublas_shutdown(); + starpu_mpi_shutdown(); + + return 0; +} +#endif diff --git a/mpi/examples/mpi_lu/plu_outofcore_example_double.c b/mpi/examples/mpi_lu/plu_outofcore_example_double.c new file mode 100644 index 0000000..2e403c7 --- /dev/null +++ b/mpi/examples/mpi_lu/plu_outofcore_example_double.c @@ -0,0 +1,20 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Thibaut Lambert + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +static char *path = "./starpu-ooc-files-double"; +#include "mpi_lu-double.h" +#include "plu_outofcore_example.c" diff --git a/mpi/examples/mpi_lu/plu_outofcore_example_float.c b/mpi/examples/mpi_lu/plu_outofcore_example_float.c new file mode 100644 index 0000000..bbc7708 --- /dev/null +++ b/mpi/examples/mpi_lu/plu_outofcore_example_float.c @@ -0,0 +1,20 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Thibaut Lambert + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +static char *path = "./starpu-ooc-files-float"; +#include "mpi_lu-float.h" +#include "plu_outofcore_example.c" diff --git a/mpi/examples/mpi_lu/plu_solve.c b/mpi/examples/mpi_lu/plu_solve.c new file mode 100644 index 0000000..cbcf64b --- /dev/null +++ b/mpi/examples/mpi_lu/plu_solve.c @@ -0,0 +1,396 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include "pxlu.h" + +/* + * Various useful functions + */ + +static double frobenius_norm(TYPE *v, unsigned n) +{ + double sum2 = 0.0; + + /* compute sqrt(Sum(|x|^2)) */ + + unsigned i,j; + for (j = 0; j < n; j++) + for (i = 0; i < n; i++) + { + double a = fabsl((double)v[i+n*j]); + sum2 += a*a; + } + + return sqrt(sum2); +} + +void STARPU_PLU(display_data_content)(TYPE *data, unsigned blocksize) +{ + if (!STARPU_PLU(display_flag)()) + return; + + fprintf(stderr, "DISPLAY BLOCK\n"); + + unsigned i, j; + for (j = 0; j < blocksize; j++) + { + for (i = 0; i < blocksize; i++) + { + fprintf(stderr, "%f ", data[j+i*blocksize]); + } + fprintf(stderr, "\n"); + } + + fprintf(stderr, "****\n"); +} + +void STARPU_PLU(extract_upper)(unsigned block_size, TYPE *inblock, TYPE *outblock) +{ + unsigned li, lj; + for (lj = 0; lj < block_size; lj++) + { + /* Upper block diag is 1 */ + outblock[lj*(block_size + 1)] = (TYPE)1.0; + + for (li = lj + 1; li < block_size; li++) + { + outblock[lj + li*block_size] = inblock[lj + li*block_size]; + } + } +} + +void STARPU_PLU(extract_lower)(unsigned block_size, TYPE *inblock, TYPE *outblock) +{ + unsigned li, lj; + for (lj = 0; lj < block_size; lj++) + { + for (li = 0; li <= lj; li++) + { + outblock[lj + li*block_size] = inblock[lj + li*block_size]; + } + } +} + +/* + * Compute Ax = y + */ + +static void STARPU_PLU(compute_ax_block)(unsigned block_size, TYPE *block_data, TYPE *sub_x, TYPE *sub_y) +{ + fprintf(stderr, "block data %p sub x %p sub y %p\n", block_data, sub_x, sub_y); + CPU_GEMV("N", block_size, block_size, 1.0, block_data, block_size, sub_x, 1, 1.0, sub_y, 1); +} + +static void STARPU_PLU(compute_ax_block_upper)(unsigned size, unsigned nblocks, + TYPE *block_data, TYPE *sub_x, TYPE *sub_y) +{ + unsigned block_size = size/nblocks; + + /* Take a copy of the upper part of the diagonal block */ + TYPE *upper_block_copy = calloc((block_size)*(block_size), sizeof(TYPE)); + STARPU_PLU(extract_upper)(block_size, block_data, upper_block_copy); + + STARPU_PLU(compute_ax_block)(block_size, upper_block_copy, sub_x, sub_y); + + free(upper_block_copy); +} + +static void STARPU_PLU(compute_ax_block_lower)(unsigned size, unsigned nblocks, + TYPE *block_data, TYPE *sub_x, TYPE *sub_y) +{ + unsigned block_size = size/nblocks; + + /* Take a copy of the upper part of the diagonal block */ + TYPE *lower_block_copy = calloc((block_size)*(block_size), sizeof(TYPE)); + STARPU_PLU(extract_lower)(block_size, block_data, lower_block_copy); + + STARPU_PLU(compute_ax_block)(size/nblocks, lower_block_copy, sub_x, sub_y); + + free(lower_block_copy); +} + +void STARPU_PLU(compute_lux)(unsigned size, TYPE *x, TYPE *y, unsigned nblocks, int rank) +{ + /* Create temporary buffers where all MPI processes are going to + * compute Ui x = yi where Ai is the matrix containing the blocks of U + * affected to process i, and 0 everywhere else. We then have y as the + * sum of all yi. */ + TYPE *yi = calloc(size, sizeof(TYPE)); + + fprintf(stderr, "Compute LU\n"); + + unsigned block_size = size/nblocks; + + /* Compute UiX = Yi */ + unsigned long i,j; + for (j = 0; j < nblocks; j++) + { + if (get_block_rank(j, j) == rank) + { + TYPE *block_data = STARPU_PLU(get_block)(j, j); + TYPE *sub_x = &x[j*(block_size)]; + TYPE *sub_yi = &yi[j*(block_size)]; + + STARPU_PLU(compute_ax_block_upper)(size, nblocks, block_data, sub_x, sub_yi); + } + + for (i = j + 1; i < nblocks; i++) + { + if (get_block_rank(i, j) == rank) + { + /* That block belongs to the current MPI process */ + TYPE *block_data = STARPU_PLU(get_block)(i, j); + TYPE *sub_x = &x[i*(block_size)]; + TYPE *sub_yi = &yi[j*(block_size)]; + + STARPU_PLU(compute_ax_block)(size/nblocks, block_data, sub_x, sub_yi); + } + } + } + + /* Grab Sum Yi in X */ + MPI_Reduce(yi, x, size, MPI_TYPE, MPI_SUM, 0, MPI_COMM_WORLD); + memset(yi, 0, size*sizeof(TYPE)); + +// unsigned ind; +// if (rank == 0) +// { +// fprintf(stderr, "INTERMEDIATE\n"); +// for (ind = 0; ind < STARPU_MIN(10, size); ind++) +// { +// fprintf(stderr, "x[%d] = %f\n", ind, (float)x[ind]); +// } +// fprintf(stderr, "****\n"); +// } + + /* Everyone needs x */ + int bcst_ret; + bcst_ret = MPI_Bcast(&x, size, MPI_TYPE, 0, MPI_COMM_WORLD); + STARPU_ASSERT(bcst_ret == MPI_SUCCESS); + + /* Compute LiX = Yi (with X = UX) */ + for (j = 0; j < nblocks; j++) + { + if (j > 0) + for (i = 0; i < j; i++) + { + if (get_block_rank(i, j) == rank) + { + /* That block belongs to the current MPI process */ + TYPE *block_data = STARPU_PLU(get_block)(i, j); + TYPE *sub_x = &x[i*(block_size)]; + TYPE *sub_yi = &yi[j*(block_size)]; + + STARPU_PLU(compute_ax_block)(size/nblocks, block_data, sub_x, sub_yi); + } + } + + if (get_block_rank(j, j) == rank) + { + TYPE *block_data = STARPU_PLU(get_block)(j, j); + TYPE *sub_x = &x[j*(block_size)]; + TYPE *sub_yi = &yi[j*(block_size)]; + + STARPU_PLU(compute_ax_block_lower)(size, nblocks, block_data, sub_x, sub_yi); + } + } + + /* Grab Sum Yi in Y */ + MPI_Reduce(yi, y, size, MPI_TYPE, MPI_SUM, 0, MPI_COMM_WORLD); + + free(yi); +} + + + +/* + * Allocate a contiguous matrix on node 0 and fill it with the whole + * content of the matrix distributed across all nodes. + */ + +TYPE *STARPU_PLU(reconstruct_matrix)(unsigned size, unsigned nblocks) +{ +// fprintf(stderr, "RECONSTRUCT MATRIX size %d nblocks %d\n", size, nblocks); + + TYPE *bigmatrix = calloc(size*size, sizeof(TYPE)); + + unsigned block_size = size/nblocks; + + int rank; + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); + + unsigned bi, bj; + for (bj = 0; bj < nblocks; bj++) + for (bi = 0; bi < nblocks; bi++) + { + TYPE *block = NULL; + + int block_rank = get_block_rank(bi, bj); + + if (block_rank == 0) + { + block = STARPU_PLU(get_block)(bi, bj); + } + else + { + MPI_Status status; + + if (rank == 0) + { + block = calloc(block_size*block_size, sizeof(TYPE)); + + int ret = MPI_Recv(block, block_size*block_size, MPI_TYPE, block_rank, 0, MPI_COMM_WORLD, &status); + STARPU_ASSERT(ret == MPI_SUCCESS); + } + else if (rank == block_rank) + { + block = STARPU_PLU(get_block)(bi, bj); + int ret = MPI_Send(block, block_size*block_size, MPI_TYPE, 0, 0, MPI_COMM_WORLD); + STARPU_ASSERT(ret == MPI_SUCCESS); + } + } + + if (rank == 0) + { + unsigned j, i; + for (j = 0; j < block_size; j++) + for (i = 0; i < block_size; i++) + { + bigmatrix[(j + bj*block_size)+(i+bi*block_size)*size] = + block[j+i*block_size]; + } + + if (get_block_rank(bi, bj) != 0) + free(block); + } + } + + return bigmatrix; +} + +/* x and y must be valid (at least) on 0 */ +void STARPU_PLU(compute_ax)(unsigned size, TYPE *x, TYPE *y, unsigned nblocks, int rank) +{ + unsigned block_size = size/nblocks; + + /* Send x to everyone */ + int bcst_ret; + bcst_ret = MPI_Bcast(&x, size, MPI_TYPE, 0, MPI_COMM_WORLD); + STARPU_ASSERT(bcst_ret == MPI_SUCCESS); + + /* Create temporary buffers where all MPI processes are going to + * compute Ai x = yi where Ai is the matrix containing the blocks of A + * affected to process i, and 0 everywhere else. We then have y as the + * sum of all yi. */ + TYPE *yi = calloc(size, sizeof(TYPE)); + + /* Compute Aix = yi */ + unsigned long i,j; + for (j = 0; j < nblocks; j++) + { + for (i = 0; i < nblocks; i++) + { + if (get_block_rank(i, j) == rank) + { + /* That block belongs to the current MPI process */ + TYPE *block_data = STARPU_PLU(get_block)(i, j); + TYPE *sub_x = &x[i*block_size]; + TYPE *sub_yi = &yi[j*block_size]; + + STARPU_PLU(compute_ax_block)(block_size, block_data, sub_x, sub_yi); + } + } + } + + /* Compute the Sum of all yi = y */ + MPI_Reduce(yi, y, size, MPI_TYPE, MPI_SUM, 0, MPI_COMM_WORLD); + + fprintf(stderr, "RANK %d - FOO 1 y[0] %f\n", rank, y[0]); + + free(yi); +} + +void STARPU_PLU(compute_lu_matrix)(unsigned size, unsigned nblocks, TYPE *Asaved) +{ + TYPE *all_r = STARPU_PLU(reconstruct_matrix)(size, nblocks); + + unsigned display = STARPU_PLU(display_flag)(); + + int rank; + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); + + if (rank == 0) + { + TYPE *L = malloc((size_t)size*size*sizeof(TYPE)); + TYPE *U = malloc((size_t)size*size*sizeof(TYPE)); + + memset(L, 0, size*size*sizeof(TYPE)); + memset(U, 0, size*size*sizeof(TYPE)); + + /* only keep the lower part */ + unsigned i, j; + for (j = 0; j < size; j++) + { + for (i = 0; i < j; i++) + { + L[j+i*size] = all_r[j+i*size]; + } + + /* diag i = j */ + L[j+j*size] = all_r[j+j*size]; + U[j+j*size] = 1.0; + + for (i = j+1; i < size; i++) + { + U[j+i*size] = all_r[j+i*size]; + } + } + + STARPU_PLU(display_data_content)(L, size); + STARPU_PLU(display_data_content)(U, size); + + /* now A_err = L, compute L*U */ + CPU_TRMM("R", "U", "N", "U", size, size, 1.0f, U, size, L, size); + + if (display) + fprintf(stderr, "\nLU\n"); + + STARPU_PLU(display_data_content)(L, size); + + /* compute "LU - A" in L*/ + CPU_AXPY(size*size, -1.0, Asaved, 1, L, 1); + + TYPE err = CPU_ASUM(size*size, L, 1); + int max = CPU_IAMAX(size*size, L, 1); + + if (display) + fprintf(stderr, "DISPLAY ERROR\n"); + + STARPU_PLU(display_data_content)(L, size); + + fprintf(stderr, "(A - LU) Avg error : %e\n", err/(size*size)); + fprintf(stderr, "(A - LU) Max error : %e\n", L[max]); + + double residual = frobenius_norm(L, size); + double matnorm = frobenius_norm(Asaved, size); + + fprintf(stderr, "||A-LU|| / (||A||*N) : %e\n", residual/(matnorm*size)); + } + + free(all_r); +} diff --git a/mpi/examples/mpi_lu/plu_solve_double.c b/mpi/examples/mpi_lu/plu_solve_double.c new file mode 100644 index 0000000..42d880f --- /dev/null +++ b/mpi/examples/mpi_lu/plu_solve_double.c @@ -0,0 +1,18 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "mpi_lu-double.h" +#include "plu_solve.c" diff --git a/mpi/examples/mpi_lu/plu_solve_float.c b/mpi/examples/mpi_lu/plu_solve_float.c new file mode 100644 index 0000000..5e0c965 --- /dev/null +++ b/mpi/examples/mpi_lu/plu_solve_float.c @@ -0,0 +1,18 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "mpi_lu-float.h" +#include "plu_solve.c" diff --git a/mpi/examples/mpi_lu/pslu.c b/mpi/examples/mpi_lu/pslu.c new file mode 100644 index 0000000..c0dad2a --- /dev/null +++ b/mpi/examples/mpi_lu/pslu.c @@ -0,0 +1,18 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "mpi_lu-float.h" +#include "pxlu.c" diff --git a/mpi/examples/mpi_lu/pslu_implicit.c b/mpi/examples/mpi_lu/pslu_implicit.c new file mode 100644 index 0000000..5617ef2 --- /dev/null +++ b/mpi/examples/mpi_lu/pslu_implicit.c @@ -0,0 +1,19 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Thibaut Lambert + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "mpi_lu-float.h" +#include "pxlu_implicit.c" diff --git a/mpi/examples/mpi_lu/pslu_kernels.c b/mpi/examples/mpi_lu/pslu_kernels.c new file mode 100644 index 0000000..bdb81a3 --- /dev/null +++ b/mpi/examples/mpi_lu/pslu_kernels.c @@ -0,0 +1,18 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "mpi_lu-float.h" +#include "pxlu_kernels.c" diff --git a/mpi/examples/mpi_lu/pxlu.c b/mpi/examples/mpi_lu/pxlu.c new file mode 100644 index 0000000..5db210f --- /dev/null +++ b/mpi/examples/mpi_lu/pxlu.c @@ -0,0 +1,919 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "pxlu.h" +#include "pxlu_kernels.h" +#include + +#define MPI_TAG_GETRF(k) ((1U << 16) | (k)) +#define MPI_TAG_TRSM_LL(k, j) ((2U << 16) | (k)<<8 | (j)) +#define MPI_TAG_TRSM_RU(k, i) ((3U << 16) | (i)<<8 | (k)) + +// GETRF TRSM_RU +// TRSM_LL GEMM + +#define TAG_GETRF(k) ((starpu_tag_t)((1ULL<<50) | (unsigned long long)(k))) +#define TAG_TRSM_LL(k,j) ((starpu_tag_t)(((2ULL<<50) | (((unsigned long long)(k))<<32) \ + | (unsigned long long)(j)))) +#define TAG_TRSM_RU(k,i) ((starpu_tag_t)(((3ULL<<50) | (((unsigned long long)(k))<<32) \ + | (unsigned long long)(i)))) +#define TAG_GEMM(k,i,j) ((starpu_tag_t)(((4ULL<<50) | ((unsigned long long)(k)<<32) \ + | ((unsigned long long)(i)<<16) \ + | (unsigned long long)(j)))) +#define TAG_GETRF_SAVE(k) ((starpu_tag_t)((5ULL<<50) | (unsigned long long)(k))) +#define TAG_TRSM_LL_SAVE(k,j) ((starpu_tag_t)(((6ULL<<50) | (((unsigned long long)(k))<<32) \ + | (unsigned long long)(j)))) +#define TAG_TRSM_RU_SAVE(k,i) ((starpu_tag_t)(((7ULL<<50) | (((unsigned long long)(k))<<32) \ + | (unsigned long long)(i)))) + +#define TAG_GETRF_SAVE_PARTIAL(k) ((starpu_tag_t)((8ULL<<50) | (unsigned long long)(k))) +#define TAG_TRSM_LL_SAVE_PARTIAL(k,j) ((starpu_tag_t)(((9ULL<<50) | (((unsigned long long)(k))<<32) \ + | (unsigned long long)(j)))) +#define TAG_TRSM_RU_SAVE_PARTIAL(k,i) ((starpu_tag_t)(((10ULL<<50) | (((unsigned long long)(k))<<32) \ + | (unsigned long long)(i)))) + +#define STARPU_TAG_INIT ((starpu_tag_t)(11ULL<<50)) + +//#define VERBOSE_INIT 1 + +//#define DEBUG 1 + +static unsigned no_prio = 0; + +static unsigned nblocks = 0; +static int rank = -1; +static int world_size = -1; + +struct callback_arg +{ + unsigned i, j, k; +}; + +/* + * Various + */ + +static struct debug_info *create_debug_info(unsigned i, unsigned j, unsigned k) +{ + struct debug_info *info = malloc(sizeof(struct debug_info)); + + info->i = i; + info->j = j; + info->k = k; + + return info; +} + +static struct starpu_task *create_task(starpu_tag_t id) +{ + struct starpu_task *task = starpu_task_create(); + task->cl_arg = NULL; + + task->use_tag = 1; + task->tag_id = id; + + return task; +} + +/* Send handle to every node appearing in the mask, and unlock tag once the + * transfers are done. */ +static void send_data_to_mask(starpu_data_handle_t handle, int *rank_mask, starpu_mpi_tag_t mpi_tag, starpu_tag_t tag) +{ + unsigned cnt = 0; + + STARPU_ASSERT(handle != STARPU_POISON_PTR); + + int rank_array[world_size]; + MPI_Comm comm_array[world_size]; + starpu_mpi_tag_t mpi_tag_array[world_size]; + starpu_data_handle_t handle_array[world_size]; + + int r; + for (r = 0; r < world_size; r++) + { + if (rank_mask[r]) + { + rank_array[cnt] = r; + + comm_array[cnt] = MPI_COMM_WORLD; + mpi_tag_array[cnt] = mpi_tag; + handle_array[cnt] = handle; + cnt++; + } + } + + if (cnt == 0) + { + /* In case there is no message to send, we release the tag at + * once */ + starpu_tag_notify_from_apps(tag); + } + else + { + int ret = starpu_mpi_isend_array_detached_unlock_tag(cnt, handle_array, + rank_array, mpi_tag_array, comm_array, tag); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend_array_detached_unlock_tag"); + } +} + +/* Initiate a receive request once all dependencies are fulfilled and unlock + * tag 'unlocked_tag' once it's done. */ + +struct recv_when_done_callback_arg +{ + int source; + starpu_mpi_tag_t mpi_tag; + starpu_data_handle_t handle; + starpu_tag_t unlocked_tag; +}; + +static void callback_receive_when_done(void *_arg) +{ + struct recv_when_done_callback_arg *arg = _arg; + + int ret = starpu_mpi_irecv_detached_unlock_tag(arg->handle, arg->source, + arg->mpi_tag, MPI_COMM_WORLD, arg->unlocked_tag); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_irecv_detached_unlock_tag"); + + free(arg); +} + +static void receive_when_deps_are_done(unsigned ndeps, starpu_tag_t *deps_tags, + int source, starpu_mpi_tag_t mpi_tag, + starpu_data_handle_t handle, + starpu_tag_t partial_tag, + starpu_tag_t unlocked_tag) +{ + STARPU_ASSERT(handle != STARPU_POISON_PTR); + + struct recv_when_done_callback_arg *arg = malloc(sizeof(struct recv_when_done_callback_arg)); + + arg->source = source; + arg->mpi_tag = mpi_tag; + arg->handle = handle; + arg->unlocked_tag = unlocked_tag; + + if (ndeps == 0) + { + callback_receive_when_done(arg); + return; + } + + starpu_create_sync_task(partial_tag, ndeps, deps_tags, callback_receive_when_done, arg); +} + +/* + * Task GETRF (diagonal factorization) + */ + +static void create_task_getrf_recv(unsigned k) +{ + /* The current node is not computing that task, so we receive the block + * with MPI */ + + /* We don't issue a MPI receive request until everyone using the + * temporary buffer is done : 11_(k-1) can be used by 12_(k-1)j and + * 21(k-1)i with i,j >= k */ + unsigned ndeps = 0; + starpu_tag_t tag_array[2*nblocks]; + +#ifdef SINGLE_TMP11 + if (k > 0) + { + unsigned i; + for (i = (k-1)+1; i < nblocks; i++) + { + if (rank == get_block_rank(i, k-1)) + tag_array[ndeps++] = TAG_TRSM_RU(k-1, i); + } + + unsigned j; + for (j = (k-1)+1; j < nblocks; j++) + { + if (rank == get_block_rank(k-1, j)) + tag_array[ndeps++] = TAG_TRSM_LL(k-1, j); + } + } +#endif + + int source = get_block_rank(k, k); +#ifdef SINGLE_TMP11 + starpu_data_handle_t block_handle = STARPU_PLU(get_tmp_11_block_handle)(); +#else + starpu_data_handle_t block_handle = STARPU_PLU(get_tmp_11_block_handle)(k); +#endif + starpu_mpi_tag_t mpi_tag = MPI_TAG_GETRF(k); + starpu_tag_t partial_tag = TAG_GETRF_SAVE_PARTIAL(k); + starpu_tag_t unlocked_tag = TAG_GETRF_SAVE(k); + +// fprintf(stderr, "NODE %d - 11 (%d) - recv when done ndeps %d - tag array %lx\n", rank, k, ndeps, tag_array[0]); + receive_when_deps_are_done(ndeps, tag_array, source, mpi_tag, block_handle, partial_tag, unlocked_tag); +} + +static void find_nodes_using_11(unsigned k, int *rank_mask) +{ + memset(rank_mask, 0, world_size*sizeof(int)); + + /* Block 11_k is used to compute 12_kj + 12ki with i,j > k */ + unsigned i; + for (i = k+1; i < nblocks; i++) + { + int r = get_block_rank(i, k); + rank_mask[r] = 1; + } + + unsigned j; + for (j = k+1; j < nblocks; j++) + { + int r = get_block_rank(k, j); + rank_mask[r] = 1; + } +} + +static void callback_task_getrf_real(void *_arg) +{ + struct callback_arg *arg = _arg; + + unsigned k = arg->k; + + /* Find all the nodes potentially requiring this block */ + int rank_mask[world_size]; + find_nodes_using_11(k, rank_mask); + rank_mask[rank] = 0; + + /* Send the block to those nodes */ + starpu_data_handle_t block_handle = STARPU_PLU(get_block_handle)(k, k); + starpu_tag_t tag = TAG_GETRF_SAVE(k); + starpu_mpi_tag_t mpi_tag = MPI_TAG_GETRF(k); + send_data_to_mask(block_handle, rank_mask, mpi_tag, tag); + + free(arg); +} + +static void create_task_getrf_real(unsigned k) +{ + struct starpu_task *task = create_task(TAG_GETRF(k)); + + task->cl = &STARPU_PLU(cl_getrf); + task->color = 0xffff00; + + task->cl_arg = create_debug_info(k, k, k); + task->cl_arg_free = 1; + + /* which sub-data is manipulated ? */ + task->handles[0] = STARPU_PLU(get_block_handle)(k, k); + + struct callback_arg *arg = malloc(sizeof(struct callback_arg)); + arg->k = k; + + task->callback_func = callback_task_getrf_real; + task->callback_arg = arg; + + /* this is an important task */ + if (!no_prio) + task->priority = 3*nblocks - 3*k; + + /* enforce dependencies ... */ + if (k > 0) + { + starpu_tag_declare_deps(TAG_GETRF(k), 1, TAG_GEMM(k-1, k, k)); + } + else + { + starpu_tag_declare_deps(TAG_GETRF(k), 1, STARPU_TAG_INIT); + } + + int ret = starpu_task_submit(task); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); +} + +static void create_task_getrf(unsigned k) +{ + if (get_block_rank(k, k) == rank) + { +#ifdef VERBOSE_INIT + fprintf(stderr, "CREATE real task 11(%u) (TAG_GETRF_SAVE(%u) = %llux) on node %d\n", k, k, (unsigned long long) TAG_GETRF_SAVE(k), rank); +#endif + create_task_getrf_real(k); + } + else + { + /* We don't handle the task, but perhaps we have to generate MPI transfers. */ + int rank_mask[world_size]; + find_nodes_using_11(k, rank_mask); + + if (rank_mask[rank]) + { +#ifdef VERBOSE_INIT + fprintf(stderr, "create RECV task 11(%u) on node %d\n", k, rank); +#endif + create_task_getrf_recv(k); + } + else + { +#ifdef VERBOSE_INIT + fprintf(stderr, "Node %d needs not 11(%u)\n", rank, k); +#endif + } + } +} + + + +/* + * Task TRSM_LL + */ + +static void create_task_trsm_ll_recv(unsigned k, unsigned j) +{ + /* The current node is not computing that task, so we receive the block + * with MPI */ + + /* We don't issue a MPI receive request until everyone using the + * temporary buffer is done : 12_(k-1)j can be used by 22_(k-1)ij with + * i >= k */ + unsigned ndeps = 0; + starpu_tag_t tag_array[nblocks]; + + unsigned start; + unsigned bound; + +#ifdef SINGLE_TMP1221 + bound = 0; + start = (k-1)+1; +#else + bound = 1; + start = (k-2)+1; +#endif + + if (k > bound) + { + unsigned i; + for (i = start; i < nblocks; i++) + { + if (rank == get_block_rank(i, j)) +#ifdef SINGLE_TMP1221 + tag_array[ndeps++] = TAG_GEMM(k-1, i, j); +#else + tag_array[ndeps++] = TAG_GEMM(k-2, i, j); +#endif + } + } + + int source = get_block_rank(k, j); +#ifdef SINGLE_TMP1221 + starpu_data_handle_t block_handle = STARPU_PLU(get_tmp_12_block_handle)(j); +#else + starpu_data_handle_t block_handle = STARPU_PLU(get_tmp_12_block_handle)(j,k); +#endif + starpu_mpi_tag_t mpi_tag = MPI_TAG_TRSM_LL(k, j); + starpu_tag_t partial_tag = TAG_TRSM_LL_SAVE_PARTIAL(k, j); + starpu_tag_t unlocked_tag = TAG_TRSM_LL_SAVE(k, j); + + receive_when_deps_are_done(ndeps, tag_array, source, mpi_tag, block_handle, partial_tag, unlocked_tag); +} + +static void find_nodes_using_12(unsigned k, unsigned j, int *rank_mask) +{ + memset(rank_mask, 0, world_size*sizeof(int)); + + /* Block 12_kj is used to compute 22_kij with i > k */ + unsigned i; + for (i = k+1; i < nblocks; i++) + { + int r = get_block_rank(i, j); + rank_mask[r] = 1; + } +} + +static void callback_task_trsm_ll_real(void *_arg) +{ + struct callback_arg *arg = _arg; + + unsigned k = arg->k; + unsigned j = arg->j; + + /* Find all the nodes potentially requiring this block */ + int rank_mask[world_size]; + find_nodes_using_12(k, j, rank_mask); + rank_mask[rank] = 0; + + /* Send the block to those nodes */ + starpu_data_handle_t block_handle = STARPU_PLU(get_block_handle)(k, j); + starpu_tag_t tag = TAG_TRSM_LL_SAVE(k, j); + starpu_mpi_tag_t mpi_tag = MPI_TAG_TRSM_LL(k, j); + send_data_to_mask(block_handle, rank_mask, mpi_tag, tag); + + free(arg); +} + +static void create_task_trsm_ll_real(unsigned k, unsigned j) +{ + struct starpu_task *task = create_task(TAG_TRSM_LL(k, j)); + +#ifdef STARPU_DEVEL +#warning temporary fix :/ +#endif +// task->cl = &STARPU_PLU(cl_trsm_ll); + task->cl = &STARPU_PLU(cl_trsm_ru); + task->color = 0x8080ff; + + task->cl_arg = create_debug_info(j, j, k); + task->cl_arg_free = 1; + + unsigned diag_block_is_local = (get_block_rank(k, k) == rank); + + starpu_tag_t tag_11_dep; + + /* which sub-data is manipulated ? */ + starpu_data_handle_t diag_block; + if (diag_block_is_local) + { + diag_block = STARPU_PLU(get_block_handle)(k, k); + tag_11_dep = TAG_GETRF(k); + } + else + { +#ifdef SINGLE_TMP11 + diag_block = STARPU_PLU(get_tmp_11_block_handle)(); +#else + diag_block = STARPU_PLU(get_tmp_11_block_handle)(k); +#endif + tag_11_dep = TAG_GETRF_SAVE(k); + } + + task->handles[0] = diag_block; + task->handles[1] = STARPU_PLU(get_block_handle)(k, j); + + STARPU_ASSERT(get_block_rank(k, j) == rank); + + STARPU_ASSERT(task->handles[0] != STARPU_POISON_PTR); + STARPU_ASSERT(task->handles[1] != STARPU_POISON_PTR); + + struct callback_arg *arg = malloc(sizeof(struct callback_arg)); + arg->j = j; + arg->k = k; + + task->callback_func = callback_task_trsm_ll_real; + task->callback_arg = arg; + + if (!no_prio) + task->priority = 3*nblocks - (2*k + j); + + /* enforce dependencies ... */ + if (k > 0) + { + starpu_tag_declare_deps(TAG_TRSM_LL(k, j), 2, tag_11_dep, TAG_GEMM(k-1, k, j)); + } + else + { + starpu_tag_declare_deps(TAG_TRSM_LL(k, j), 1, tag_11_dep); + } + + int ret = starpu_task_submit(task); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); +} + +static void create_task_trsm_ll(unsigned k, unsigned j) +{ + if (get_block_rank(k, j) == rank) + { +#ifdef VERBOSE_INIT + fprintf(stderr, "CREATE real task 12(k = %u, j = %u) on node %d\n", k, j, rank); +#endif + create_task_trsm_ll_real(k, j); + } + else + { + /* We don't handle the task, but perhaps we have to generate MPI transfers. */ + int rank_mask[world_size]; + find_nodes_using_12(k, j, rank_mask); + + if (rank_mask[rank]) + { +#ifdef VERBOSE_INIT + fprintf(stderr, "create RECV task 12(k = %u, j = %u) on node %d\n", k, j, rank); +#endif + create_task_trsm_ll_recv(k, j); + } + else + { +#ifdef VERBOSE_INIT + fprintf(stderr, "Node %d needs not 12(k=%u, i=%u)\n", rank, k, j); +#endif + } + } +} + +/* + * Task TRSM_RU + */ + +static void create_task_trsm_ru_recv(unsigned k, unsigned i) +{ + /* The current node is not computing that task, so we receive the block + * with MPI */ + + /* We don't issue a MPI receive request until everyone using the + * temporary buffer is done : 21_(k-1)i can be used by 22_(k-1)ij with + * j >= k */ + unsigned ndeps = 0; + starpu_tag_t tag_array[nblocks]; + + unsigned bound; + unsigned start; + +#ifdef SINGLE_TMP1221 + bound = 0; + start = (k-1)+1; +#else + bound = 1; + start = (k-2)+1; +#endif + if (k > bound) + { + unsigned j; + for (j = start; j < nblocks; j++) + { + if (rank == get_block_rank(i, j)) +#ifdef SINGLE_TMP1221 + tag_array[ndeps++] = TAG_GEMM(k-1, i, j); +#else + tag_array[ndeps++] = TAG_GEMM(k-2, i, j); +#endif + } + } + + int source = get_block_rank(i, k); +#ifdef SINGLE_TMP1221 + starpu_data_handle_t block_handle = STARPU_PLU(get_tmp_21_block_handle)(i); +#else + starpu_data_handle_t block_handle = STARPU_PLU(get_tmp_21_block_handle)(i, k); +#endif + starpu_mpi_tag_t mpi_tag = MPI_TAG_TRSM_RU(k, i); + starpu_tag_t partial_tag = TAG_TRSM_RU_SAVE_PARTIAL(k, i); + starpu_tag_t unlocked_tag = TAG_TRSM_RU_SAVE(k, i); + +// fprintf(stderr, "NODE %d - 21 (%d, %d) - recv when done ndeps %d - tag array %lx\n", rank, k, i, ndeps, tag_array[0]); + receive_when_deps_are_done(ndeps, tag_array, source, mpi_tag, block_handle, partial_tag, unlocked_tag); +} + +static void find_nodes_using_21(unsigned k, unsigned i, int *rank_mask) +{ + memset(rank_mask, 0, world_size*sizeof(int)); + + /* Block 21_ki is used to compute 22_kij with j > k */ + unsigned j; + for (j = k+1; j < nblocks; j++) + { + int r = get_block_rank(i, j); + rank_mask[r] = 1; + } +} + +static void callback_task_trsm_ru_real(void *_arg) +{ + struct callback_arg *arg = _arg; + + unsigned k = arg->k; + unsigned i = arg->i; + + /* Find all the nodes potentially requiring this block */ + int rank_mask[world_size]; + find_nodes_using_21(k, i, rank_mask); + rank_mask[rank] = 0; + + /* Send the block to those nodes */ + starpu_data_handle_t block_handle = STARPU_PLU(get_block_handle)(i, k); + starpu_tag_t tag = TAG_TRSM_RU_SAVE(k, i); + starpu_mpi_tag_t mpi_tag = MPI_TAG_TRSM_RU(k, i); + send_data_to_mask(block_handle, rank_mask, mpi_tag, tag); + + free(arg); +} + +static void create_task_trsm_ru_real(unsigned k, unsigned i) +{ + struct starpu_task *task = create_task(TAG_TRSM_RU(k, i)); + +#ifdef STARPU_DEVEL +#warning temporary fix +#endif +// task->cl = &STARPU_PLU(cl_trsm_ru); + task->cl = &STARPU_PLU(cl_trsm_ll); + task->color = 0x8080c0; + + task->cl_arg = create_debug_info(i, i, k); + task->cl_arg_free = 1; + + unsigned diag_block_is_local = (get_block_rank(k, k) == rank); + + starpu_tag_t tag_11_dep; + + /* which sub-data is manipulated ? */ + starpu_data_handle_t diag_block; + if (diag_block_is_local) + { + diag_block = STARPU_PLU(get_block_handle)(k, k); + tag_11_dep = TAG_GETRF(k); + } + else + { +#ifdef SINGLE_TMP11 + diag_block = STARPU_PLU(get_tmp_11_block_handle)(); +#else + diag_block = STARPU_PLU(get_tmp_11_block_handle)(k); +#endif + tag_11_dep = TAG_GETRF_SAVE(k); + } + + task->handles[0] = diag_block; + task->handles[1] = STARPU_PLU(get_block_handle)(i, k); + + STARPU_ASSERT(task->handles[0] != STARPU_POISON_PTR); + STARPU_ASSERT(task->handles[1] != STARPU_POISON_PTR); + + struct callback_arg *arg = malloc(sizeof(struct callback_arg)); + arg->i = i; + arg->k = k; + + task->callback_func = callback_task_trsm_ru_real; + task->callback_arg = arg; + + if (!no_prio) + task->priority = 3*nblocks - (2*k + i); + + /* enforce dependencies ... */ + if (k > 0) + { + starpu_tag_declare_deps(TAG_TRSM_RU(k, i), 2, tag_11_dep, TAG_GEMM(k-1, i, k)); + } + else + { + starpu_tag_declare_deps(TAG_TRSM_RU(k, i), 1, tag_11_dep); + } + + int ret = starpu_task_submit(task); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); +} + +static void create_task_trsm_ru(unsigned k, unsigned i) +{ + if (get_block_rank(i, k) == rank) + { +#ifdef VERBOSE_INIT + fprintf(stderr, "CREATE real task 21(k = %u, i = %u) on node %d\n", k, i, rank); +#endif + create_task_trsm_ru_real(k, i); + } + else + { + /* We don't handle the task, but perhaps we have to generate MPI transfers. */ + int rank_mask[world_size]; + find_nodes_using_21(k, i, rank_mask); + + if (rank_mask[rank]) + { +#ifdef VERBOSE_INIT + fprintf(stderr, "create RECV task 21(k = %u, i = %u) on node %d\n", k, i, rank); +#endif + create_task_trsm_ru_recv(k, i); + } + else + { +#ifdef VERBOSE_INIT + fprintf(stderr, "Node %d needs not 21(k=%u, i=%u)\n", rank, k,i); +#endif + } + } +} + +/* + * Task GEMM + */ + +static void create_task_gemm_real(unsigned k, unsigned i, unsigned j) +{ +// printf("task 22 k,i,j = %d,%d,%d TAG = %llx\n", k,i,j, TAG_GEMM(k,i,j)); + + struct starpu_task *task = create_task(TAG_GEMM(k, i, j)); + + task->cl = &STARPU_PLU(cl_gemm); + task->color = 0x00ff00; + + task->cl_arg = create_debug_info(i, j, k); + task->cl_arg_free = 1; + + /* which sub-data is manipulated ? */ + + /* produced by TAG_TRSM_RU_SAVE(k, i) */ + unsigned block21_is_local = (get_block_rank(i, k) == rank); + starpu_tag_t tag_21_dep; + + starpu_data_handle_t block21; + if (block21_is_local) + { + block21 = STARPU_PLU(get_block_handle)(i, k); + tag_21_dep = TAG_TRSM_RU(k, i); + } + else + { +#ifdef SINGLE_TMP1221 + block21 = STARPU_PLU(get_tmp_21_block_handle)(i); +#else + block21 = STARPU_PLU(get_tmp_21_block_handle)(i, k); +#endif + tag_21_dep = TAG_TRSM_RU_SAVE(k, i); + } + + /* produced by TAG_TRSM_LL_SAVE(k, j) */ + unsigned block12_is_local = (get_block_rank(k, j) == rank); + starpu_tag_t tag_12_dep; + + starpu_data_handle_t block12; + if (block12_is_local) + { + // block12 = STARPU_PLU(get_block_handle)(j, k); + block12 = STARPU_PLU(get_block_handle)(k, j); + tag_12_dep = TAG_TRSM_LL(k, j); + } + else + { +#ifdef SINGLE_TMP1221 + block12 = STARPU_PLU(get_tmp_12_block_handle)(j); +#else + block12 = STARPU_PLU(get_tmp_12_block_handle)(j, k); +#endif + tag_12_dep = TAG_TRSM_LL_SAVE(k, j); + } + + + +#ifdef STARPU_DEVEL +#warning temporary fix :/ +#endif + //task->handles[0] = block21; + task->handles[0] = block12; + + //task->handles[1] = block12; + task->handles[1] = block21; + + /* produced by TAG_GEMM(k-1, i, j) */ + task->handles[2] = STARPU_PLU(get_block_handle)(i, j); + + STARPU_ASSERT(task->handles[0] != STARPU_POISON_PTR); + STARPU_ASSERT(task->handles[1] != STARPU_POISON_PTR); + STARPU_ASSERT(task->handles[2] != STARPU_POISON_PTR); + + if (!no_prio) + task->priority = 3*nblocks - (k + i + j); + + /* enforce dependencies ... */ + if (k > 0) + { + starpu_tag_declare_deps(TAG_GEMM(k, i, j), 3, TAG_GEMM(k-1, i, j), tag_12_dep, tag_21_dep); + } + else + { + starpu_tag_declare_deps(TAG_GEMM(k, i, j), 2, tag_12_dep, tag_21_dep); + } + + int ret = starpu_task_submit(task); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); +} + +static void create_task_gemm(unsigned k, unsigned i, unsigned j) +{ + if (get_block_rank(i, j) == rank) + { + // fprintf(stderr, "CREATE real task 22(k = %d, i = %d, j = %d) on node %d\n", k, i, j, rank); + create_task_gemm_real(k, i, j); + } +// else +// { +// fprintf(stderr, "Node %d needs not 22(k=%d, i=%d, j = %d)\n", rank, k,i,j); +// } +} + +static void wait_tag_and_fetch_handle(starpu_tag_t tag, starpu_data_handle_t handle) +{ + STARPU_ASSERT(handle != STARPU_POISON_PTR); + + starpu_tag_wait(tag); +// fprintf(stderr, "Rank %d : tag %lx is done\n", rank, tag); + + starpu_data_acquire(handle, STARPU_R); + starpu_data_release(handle); + +// starpu_data_unregister(handle); +} + +static void wait_termination(void) +{ + unsigned k, i, j; + for (k = 0; k < nblocks; k++) + { + /* Wait task 11k if needed */ + if (get_block_rank(k, k) == rank) + { + starpu_data_handle_t diag_block = STARPU_PLU(get_block_handle)(k, k); + wait_tag_and_fetch_handle(TAG_GETRF_SAVE(k), diag_block); + } + + + for (i = k + 1; i < nblocks; i++) + { + /* Wait task 21ki if needed */ + if (get_block_rank(i, k) == rank) + { + starpu_data_handle_t block21 = STARPU_PLU(get_block_handle)(i, k); + //starpu_data_handle_t block21 = STARPU_PLU(get_block_handle)(k, i); + //fprintf(stderr, "BLOCK21 i %d k %d -> handle %p\n", i, k, block21); + wait_tag_and_fetch_handle(TAG_TRSM_RU_SAVE(k, i), block21); + } + } + + for (j = k + 1; j < nblocks; j++) + { + /* Wait task 12kj if needed */ + if (get_block_rank(k, j) == rank) + { + //starpu_data_handle_t block12 = STARPU_PLU(get_block_handle)(j, k); + starpu_data_handle_t block12 = STARPU_PLU(get_block_handle)(k, j); + //fprintf(stderr, "BLOCK12 j %d k %d -> handle %p\n", j, k, block12); + wait_tag_and_fetch_handle(TAG_TRSM_LL_SAVE(k, j), block12); + } + } + } +} + +/* + * code to bootstrap the factorization + */ + +double STARPU_PLU(plu_main)(unsigned _nblocks, int _rank, int _world_size, unsigned _no_prio) +{ + double start; + double end; + + nblocks = _nblocks; + rank = _rank; + world_size = _world_size; + no_prio = _no_prio; + + /* create all the DAG nodes */ + unsigned i,j,k; + + for (k = 0; k < nblocks; k++) + { + starpu_iteration_push(k); + + create_task_getrf(k); + + for (i = k+1; i took %f ms\n", rank, timing/1000); + + return timing; +} diff --git a/mpi/examples/mpi_lu/pxlu.h b/mpi/examples/mpi_lu/pxlu.h new file mode 100644 index 0000000..183207c --- /dev/null +++ b/mpi/examples/mpi_lu/pxlu.h @@ -0,0 +1,67 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __PXLU_H__ +#define __PXLU_H__ + +#include +#include +#include +#ifdef STARPU_USE_CUDA +#include +#endif + +#define BLAS3_FLOP(n1,n2,n3) (2*((uint64_t)n1)*((uint64_t)n2)*((uint64_t)n3)) + +//#define SINGLE_TMP11 1 +//#define SINGLE_TMP1221 1 + +struct debug_info +{ + unsigned i; + unsigned j; + unsigned k; +}; + +double STARPU_PLU(plu_main)(unsigned nblocks, int rank, int world_size, unsigned no_prio); + +TYPE *STARPU_PLU(reconstruct_matrix)(unsigned size, unsigned nblocks); +void STARPU_PLU(compute_lu_matrix)(unsigned size, unsigned nblocks, TYPE *Asaved); + +unsigned STARPU_PLU(display_flag)(void); + +void STARPU_PLU(compute_ax)(unsigned size, TYPE *x, TYPE *y, unsigned nblocks, int rank); +void STARPU_PLU(compute_lux)(unsigned size, TYPE *x, TYPE *y, unsigned nblocks, int rank); +starpu_data_handle_t STARPU_PLU(get_block_handle)(unsigned i, unsigned j); +TYPE *STARPU_PLU(get_block)(unsigned i, unsigned j); +#ifdef SINGLE_TMP11 +starpu_data_handle_t STARPU_PLU(get_tmp_11_block_handle)(void); +#else +starpu_data_handle_t STARPU_PLU(get_tmp_11_block_handle)(unsigned k); +#endif +#ifdef SINGLE_TMP1221 +starpu_data_handle_t STARPU_PLU(get_tmp_12_block_handle)(unsigned j); +starpu_data_handle_t STARPU_PLU(get_tmp_21_block_handle)(unsigned i); +#else +starpu_data_handle_t STARPU_PLU(get_tmp_12_block_handle)(unsigned j, unsigned k); +starpu_data_handle_t STARPU_PLU(get_tmp_21_block_handle)(unsigned i, unsigned k); +#endif + +void STARPU_PLU(display_data_content)(TYPE *data, unsigned blocksize); + +int get_block_rank(unsigned i, unsigned j); + +#endif // __PXLU_H__ diff --git a/mpi/examples/mpi_lu/pxlu_implicit.c b/mpi/examples/mpi_lu/pxlu_implicit.c new file mode 100644 index 0000000..6c50650 --- /dev/null +++ b/mpi/examples/mpi_lu/pxlu_implicit.c @@ -0,0 +1,189 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Thibaut Lambert + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "pxlu.h" +#include "pxlu_kernels.h" +#include + +//#define VERBOSE_INIT 1 + +//#define DEBUG 1 + +static unsigned no_prio = 0; +static unsigned nblocks = 0; +static int rank = -1; +static int world_size = -1; + +struct callback_arg +{ + unsigned i, j, k; +}; + +/* + * Task GETRF (diagonal factorization) + */ + +static void create_task_getrf(unsigned k) +{ + starpu_mpi_task_insert(MPI_COMM_WORLD, + &STARPU_PLU(cl_getrf), + STARPU_VALUE, &k, sizeof(k), + STARPU_VALUE, &k, sizeof(k), + STARPU_VALUE, &k, sizeof(k), + STARPU_RW, STARPU_PLU(get_block_handle)(k, k), + STARPU_PRIORITY, !no_prio ? + (int) (3*nblocks - 3*k) : STARPU_MIN_PRIO, + 0); +} + +/* + * Task TRSM_LL + */ + +static void create_task_trsm_ll(unsigned k, unsigned j) +{ +#ifdef STARPU_DEVEL +#warning temporary fix +#endif + starpu_mpi_task_insert(MPI_COMM_WORLD, + //&STARPU_PLU(cl_trsm_ll), + &STARPU_PLU(cl_trsm_ru), + STARPU_VALUE, &j, sizeof(j), + STARPU_VALUE, &j, sizeof(j), + STARPU_VALUE, &k, sizeof(k), + STARPU_R, STARPU_PLU(get_block_handle)(k, k), + STARPU_RW, STARPU_PLU(get_block_handle)(k, j), + STARPU_PRIORITY, !no_prio ? + (int) (3*nblocks - (2*k + j)) : STARPU_MIN_PRIO, + 0); +} + +/* + * Task TRSM_RU + */ + +static void create_task_trsm_ru(unsigned k, unsigned i) +{ +#ifdef STARPU_DEVEL +#warning temporary fix +#endif + starpu_mpi_task_insert(MPI_COMM_WORLD, + //&STARPU_PLU(cl_trsm_ru), + &STARPU_PLU(cl_trsm_ll), + STARPU_VALUE, &i, sizeof(i), + STARPU_VALUE, &i, sizeof(i), + STARPU_VALUE, &k, sizeof(k), + STARPU_R, STARPU_PLU(get_block_handle)(k, k), + STARPU_RW, STARPU_PLU(get_block_handle)(i, k), + STARPU_PRIORITY, !no_prio ? + (int) (3*nblocks - (2*k + i)) : STARPU_MIN_PRIO, + 0); +} + +/* + * Task GEMM + */ + +static void create_task_gemm(unsigned k, unsigned i, unsigned j) +{ + starpu_mpi_task_insert(MPI_COMM_WORLD, + &STARPU_PLU(cl_gemm), + STARPU_VALUE, &i, sizeof(i), + STARPU_VALUE, &j, sizeof(j), + STARPU_VALUE, &k, sizeof(k), + STARPU_R, STARPU_PLU(get_block_handle)(k, j), + STARPU_R, STARPU_PLU(get_block_handle)(i, k), + STARPU_RW, STARPU_PLU(get_block_handle)(i, j), + STARPU_PRIORITY, !no_prio ? + (int) (3*nblocks - (k + i + j)) : STARPU_MIN_PRIO, + 0); +} + +/* + * code to bootstrap the factorization + */ + +double STARPU_PLU(plu_main)(unsigned _nblocks, int _rank, int _world_size, unsigned _no_prio) +{ + double start; + double end; + int ret; + + nblocks = _nblocks; + rank = _rank; + world_size = _world_size; + no_prio = _no_prio; + + /* create all the DAG nodes */ + unsigned i,j,k; + + ret = starpu_mpi_wait_for_all(MPI_COMM_WORLD); + STARPU_ASSERT(ret == MPI_SUCCESS); + ret = starpu_mpi_barrier(MPI_COMM_WORLD); + STARPU_ASSERT(ret == MPI_SUCCESS); + + start = starpu_timing_now(); + + for (k = 0; k < nblocks; k++) + { + starpu_iteration_push(k); + + create_task_getrf(k); + + for (i = k+1; i took %f ms\n", rank, timing/1000); + + return timing; +} diff --git a/mpi/examples/mpi_lu/pxlu_kernels.c b/mpi/examples/mpi_lu/pxlu_kernels.c new file mode 100644 index 0000000..2af7771 --- /dev/null +++ b/mpi/examples/mpi_lu/pxlu_kernels.c @@ -0,0 +1,491 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "pxlu.h" +#include "pxlu_kernels.h" +#include + +///#define VERBOSE_KERNELS 1 + +#ifdef STARPU_USE_CUDA +static const TYPE p1 = 1.0f; +static const TYPE m1 = -1.0f; +#endif + +/* + * GEMM + */ + +static inline void STARPU_PLU(common_gemm)(void *descr[], int s, void *_args) +{ + TYPE *right = (TYPE *)STARPU_MATRIX_GET_PTR(descr[0]); + TYPE *left = (TYPE *)STARPU_MATRIX_GET_PTR(descr[1]); + TYPE *center = (TYPE *)STARPU_MATRIX_GET_PTR(descr[2]); + + unsigned dx = STARPU_MATRIX_GET_NX(descr[2]); + unsigned dy = STARPU_MATRIX_GET_NY(descr[2]); + unsigned dz = STARPU_MATRIX_GET_NY(descr[0]); + + unsigned ld12 = STARPU_MATRIX_GET_LD(descr[0]); + unsigned ld21 = STARPU_MATRIX_GET_LD(descr[1]); + unsigned ld22 = STARPU_MATRIX_GET_LD(descr[2]); + +#ifdef VERBOSE_KERNELS + struct debug_info *info = _args; + + int rank; + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); + fprintf(stderr, "KERNEL GEMM %d - k = %u i = %u j = %u\n", rank, info->k, info->i, info->j); +#else + (void)_args; +#endif + +#ifdef STARPU_USE_CUDA + cublasStatus_t status; +#endif + + switch (s) + { + case 0: + CPU_GEMM("N", "N", dy, dx, dz, + (TYPE)-1.0, right, ld21, left, ld12, + (TYPE)1.0, center, ld22); + break; + +#ifdef STARPU_USE_CUDA + case 1: + { + status = CUBLAS_GEMM(starpu_cublas_get_local_handle(), + CUBLAS_OP_N, CUBLAS_OP_N, dx, dy, dz, + (CUBLAS_TYPE *)&m1, (CUBLAS_TYPE *)right, ld21, (CUBLAS_TYPE *)left, ld12, + (CUBLAS_TYPE *)&p1, (CUBLAS_TYPE *)center, ld22); + + if (STARPU_UNLIKELY(status != CUBLAS_STATUS_SUCCESS)) + STARPU_CUBLAS_REPORT_ERROR(status); + + break; + } +#endif + default: + STARPU_ABORT(); + break; + } +#ifdef VERBOSE_KERNELS + fprintf(stderr, "KERNEL GEMM %d - k = %u i = %u j = %u done\n", rank, info->k, info->i, info->j); +#endif +} + +static void STARPU_PLU(cpu_gemm)(void *descr[], void *_args) +{ + STARPU_PLU(common_gemm)(descr, 0, _args); +} + +#ifdef STARPU_USE_CUDA +static void STARPU_PLU(cublas_gemm)(void *descr[], void *_args) +{ + STARPU_PLU(common_gemm)(descr, 1, _args); +} +#endif// STARPU_USE_CUDA + +static struct starpu_perfmodel STARPU_PLU(model_gemm) = +{ + .type = STARPU_HISTORY_BASED, +#ifdef STARPU_ATLAS + .symbol = STARPU_PLU_STR(lu_model_gemm_atlas) +#elif defined(STARPU_GOTO) + .symbol = STARPU_PLU_STR(lu_model_gemm_goto) +#elif defined(STARPU_OPENBLAS) + .symbol = STARPU_PLU_STR(lu_model_gemm_openblas) +#else + .symbol = STARPU_PLU_STR(lu_model_gemm) +#endif +}; + +#define STRINGIFY_(x) #x +#define STRINGIFY(x) STRINGIFY_(x) +struct starpu_codelet STARPU_PLU(cl_gemm) = +{ + .cpu_funcs = {STARPU_PLU(cpu_gemm)}, + .cpu_funcs_name = {STRINGIFY(STARPU_PLU(cpu_gemm))}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {STARPU_PLU(cublas_gemm)}, +#elif defined(STARPU_SIMGRID) + .cuda_funcs = {(void*)1}, +#endif + .cuda_flags = {STARPU_CUDA_ASYNC}, + .nbuffers = 3, + .modes = {STARPU_R, STARPU_R, STARPU_RW}, + .model = &STARPU_PLU(model_gemm) +}; + +/* + * TRSM_LL + */ + +static inline void STARPU_PLU(common_trsmll)(void *descr[], int s, void *_args) +{ + TYPE *sub11; + TYPE *sub12; + + sub11 = (TYPE *)STARPU_MATRIX_GET_PTR(descr[0]); + sub12 = (TYPE *)STARPU_MATRIX_GET_PTR(descr[1]); + + unsigned ld11 = STARPU_MATRIX_GET_LD(descr[0]); + unsigned ld12 = STARPU_MATRIX_GET_LD(descr[1]); + + unsigned nx12 = STARPU_MATRIX_GET_NX(descr[1]); + unsigned ny12 = STARPU_MATRIX_GET_NY(descr[1]); + +#ifdef VERBOSE_KERNELS + struct debug_info *info = _args; + + int rank; + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); +#warning fixed debugging according to other tweak + //fprintf(stderr, "KERNEL TRSM_LL %d - k = %u i %u\n", rank, info->k, info->i); + fprintf(stderr, "KERNEL TRSM_RU %d - k = %u i %u\n", rank, info->k, info->j); + + //fprintf(stderr, "INPUT 12 GETRF\n"); + fprintf(stderr, "INPUT 21 GETRF\n"); + STARPU_PLU(display_data_content)(sub11, nx12); + //fprintf(stderr, "INPUT 12 TRSM_LL\n"); + fprintf(stderr, "INPUT 21 TRSM_RU\n"); + STARPU_PLU(display_data_content)(sub12, nx12); +#else + (void)_args; +#endif + +#ifdef STARPU_USE_CUDA + cublasStatus_t status; +#endif + + /* solve L11 U12 = A12 (find U12) */ + switch (s) + { + case 0: + CPU_TRSM("L", "L", "N", "N", nx12, ny12, + (TYPE)1.0, sub11, ld11, sub12, ld12); + break; +#ifdef STARPU_USE_CUDA + case 1: + status = CUBLAS_TRSM(starpu_cublas_get_local_handle(), + CUBLAS_SIDE_LEFT, CUBLAS_FILL_MODE_LOWER, CUBLAS_OP_N, CUBLAS_DIAG_NON_UNIT, + ny12, nx12, + (CUBLAS_TYPE*)&p1, (CUBLAS_TYPE*)sub11, ld11, (CUBLAS_TYPE*)sub12, ld12); + + if (STARPU_UNLIKELY(status != CUBLAS_STATUS_SUCCESS)) + STARPU_CUBLAS_REPORT_ERROR(status); + + break; +#endif + default: + STARPU_ABORT(); + break; + } + +#ifdef VERBOSE_KERNELS + //fprintf(stderr, "OUTPUT 12 TRSM_LL\n"); + fprintf(stderr, "OUTPUT 21 TRSM_RU\n"); + STARPU_PLU(display_data_content)(sub12, nx12); +#endif +} + +static void STARPU_PLU(cpu_trsmll)(void *descr[], void *_args) +{ + STARPU_PLU(common_trsmll)(descr, 0, _args); +} + +#ifdef STARPU_USE_CUDA +static void STARPU_PLU(cublas_trsmll)(void *descr[], void *_args) +{ + STARPU_PLU(common_trsmll)(descr, 1, _args); +} +#endif // STARPU_USE_CUDA + +static struct starpu_perfmodel STARPU_PLU(model_trsm_ll) = +{ + .type = STARPU_HISTORY_BASED, +#ifdef STARPU_ATLAS + .symbol = STARPU_PLU_STR(lu_model_trsm_ll_atlas) +#elif defined(STARPU_GOTO) + .symbol = STARPU_PLU_STR(lu_model_trsm_ll_goto) +#elif defined(STARPU_OPENBLAS) + .symbol = STARPU_PLU_STR(lu_model_trsm_ll_openblas) +#else + .symbol = STARPU_PLU_STR(lu_model_trsm_ll) +#endif +}; + +struct starpu_codelet STARPU_PLU(cl_trsm_ll) = +{ + .cpu_funcs = {STARPU_PLU(cpu_trsmll)}, + .cpu_funcs_name = {STRINGIFY(STARPU_PLU(cpu_trsmll))}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {STARPU_PLU(cublas_trsmll)}, +#elif defined(STARPU_SIMGRID) + .cuda_funcs = {(void*)1}, +#endif + .cuda_flags = {STARPU_CUDA_ASYNC}, + .nbuffers = 2, + .modes = {STARPU_R, STARPU_RW}, + .model = &STARPU_PLU(model_trsm_ll) +}; + +/* + * TRSM_RU + */ + +static inline void STARPU_PLU(common_trsmru)(void *descr[], int s, void *_args) +{ + TYPE *sub11; + TYPE *sub21; + + sub11 = (TYPE *)STARPU_MATRIX_GET_PTR(descr[0]); + sub21 = (TYPE *)STARPU_MATRIX_GET_PTR(descr[1]); + + unsigned ld11 = STARPU_MATRIX_GET_LD(descr[0]); + unsigned ld21 = STARPU_MATRIX_GET_LD(descr[1]); + + unsigned nx21 = STARPU_MATRIX_GET_NX(descr[1]); + unsigned ny21 = STARPU_MATRIX_GET_NY(descr[1]); + +#ifdef VERBOSE_KERNELS + struct debug_info *info = _args; + + int rank; + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); +#warning fixed debugging according to other tweak + //fprintf(stderr, "KERNEL TRSM_RU %d (k = %u, i = %u)\n", rank, info->k, info->i); + fprintf(stderr, "KERNEL TRSM_LL %d (k = %u, j = %u)\n", rank, info->k, info->j); + + //fprintf(stderr, "INPUT 21 GETRF\n"); + fprintf(stderr, "INPUT 12 GETRF\n"); + STARPU_PLU(display_data_content)(sub11, nx21); + //fprintf(stderr, "INPUT 21 TRSM_RU\n"); + fprintf(stderr, "INPUT 12 TRSM_LL\n"); + STARPU_PLU(display_data_content)(sub21, nx21); +#else + (void)_args; +#endif + +#ifdef STARPU_USE_CUDA + cublasStatus_t status; +#endif + + switch (s) + { + case 0: + CPU_TRSM("R", "U", "N", "U", nx21, ny21, + (TYPE)1.0, sub11, ld11, sub21, ld21); + break; +#ifdef STARPU_USE_CUDA + case 1: + status = CUBLAS_TRSM(starpu_cublas_get_local_handle(), + CUBLAS_SIDE_RIGHT, CUBLAS_FILL_MODE_UPPER, CUBLAS_OP_N, CUBLAS_DIAG_UNIT, + ny21, nx21, + (CUBLAS_TYPE*)&p1, (CUBLAS_TYPE*)sub11, ld11, (CUBLAS_TYPE*)sub21, ld21); + + if (status != CUBLAS_STATUS_SUCCESS) + STARPU_CUBLAS_REPORT_ERROR(status); + + break; +#endif + default: + STARPU_ABORT(); + break; + } + +#ifdef VERBOSE_KERNELS + //fprintf(stderr, "OUTPUT 21 GETRF\n"); + fprintf(stderr, "OUTPUT 12 GETRF\n"); + STARPU_PLU(display_data_content)(sub11, nx21); + //fprintf(stderr, "OUTPUT 21 TRSM_RU\n"); + fprintf(stderr, "OUTPUT 12 TRSM_LL\n"); + STARPU_PLU(display_data_content)(sub21, nx21); +#endif +} + +static void STARPU_PLU(cpu_trsmru)(void *descr[], void *_args) +{ + STARPU_PLU(common_trsmru)(descr, 0, _args); +} + +#ifdef STARPU_USE_CUDA +static void STARPU_PLU(cublas_trsmru)(void *descr[], void *_args) +{ + STARPU_PLU(common_trsmru)(descr, 1, _args); +} +#endif + +static struct starpu_perfmodel STARPU_PLU(model_trsm_ru) = +{ + .type = STARPU_HISTORY_BASED, +#ifdef STARPU_ATLAS + .symbol = STARPU_PLU_STR(lu_model_trsm_ru_atlas) +#elif defined(STARPU_GOTO) + .symbol = STARPU_PLU_STR(lu_model_trsm_ru_goto) +#elif defined(STARPU_OPENBLAS) + .symbol = STARPU_PLU_STR(lu_model_trsm_ru_openblas) +#else + .symbol = STARPU_PLU_STR(lu_model_trsm_ru) +#endif +}; + +struct starpu_codelet STARPU_PLU(cl_trsm_ru) = +{ + .cpu_funcs = {STARPU_PLU(cpu_trsmru)}, + .cpu_funcs_name = {STRINGIFY(STARPU_PLU(cpu_trsmru))}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {STARPU_PLU(cublas_trsmru)}, +#elif defined(STARPU_SIMGRID) + .cuda_funcs = {(void*)1}, +#endif + .cuda_flags = {STARPU_CUDA_ASYNC}, + .nbuffers = 2, + .modes = {STARPU_R, STARPU_RW}, + .model = &STARPU_PLU(model_trsm_ru) +}; + + +/* + * GETRF + */ + +static inline void STARPU_PLU(common_getrf)(void *descr[], int s, void *_args) +{ + TYPE *sub11; + + sub11 = (TYPE *)STARPU_MATRIX_GET_PTR(descr[0]); + + unsigned long nx = STARPU_MATRIX_GET_NX(descr[0]); + unsigned long ld = STARPU_MATRIX_GET_LD(descr[0]); + + unsigned long z; + +#ifdef STARPU_USE_CUDA + cublasStatus_t status; + cublasHandle_t handle; + cudaStream_t stream; +#endif + +#ifdef VERBOSE_KERNELS + struct debug_info *info = _args; + + int rank; + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); + fprintf(stderr, "KERNEL 11 %d - k = %u\n", rank, info->k); +#else + (void)_args; +#endif + + switch (s) + { + case 0: + for (z = 0; z < nx; z++) + { + TYPE pivot; + pivot = sub11[z+z*ld]; + STARPU_ASSERT(!ISZERO(pivot)); + + CPU_SCAL(nx - z - 1, (1.0/pivot), &sub11[z+(z+1)*ld], ld); + + CPU_GER(nx - z - 1, nx - z - 1, -1.0, + &sub11[(z+1)+z*ld], 1, + &sub11[z+(z+1)*ld], ld, + &sub11[(z+1) + (z+1)*ld],ld); + } + break; +#ifdef STARPU_USE_CUDA + case 1: + handle = starpu_cublas_get_local_handle(); + stream = starpu_cuda_get_local_stream(); + for (z = 0; z < nx; z++) + { + TYPE pivot; + TYPE inv_pivot; + cudaMemcpyAsync(&pivot, &sub11[z+z*ld], sizeof(TYPE), cudaMemcpyDeviceToHost, stream); + cudaStreamSynchronize(stream); + STARPU_ASSERT(!ISZERO(pivot)); + + inv_pivot = 1.0/pivot; + status = CUBLAS_SCAL(handle, + nx - z - 1, + (CUBLAS_TYPE*)&inv_pivot, (CUBLAS_TYPE*)&sub11[z+(z+1)*ld], ld); + if (status != CUBLAS_STATUS_SUCCESS) + STARPU_CUBLAS_REPORT_ERROR(status); + + status = CUBLAS_GER(handle, + nx - z - 1, nx - z - 1, + (CUBLAS_TYPE*)&m1, + (CUBLAS_TYPE*)&sub11[(z+1)+z*ld], 1, + (CUBLAS_TYPE*)&sub11[z+(z+1)*ld], ld, + (CUBLAS_TYPE*)&sub11[(z+1) + (z+1)*ld],ld); + if (status != CUBLAS_STATUS_SUCCESS) + STARPU_CUBLAS_REPORT_ERROR(status); + } + + cudaStreamSynchronize(stream); + + break; +#endif + default: + STARPU_ABORT(); + break; + } +#ifdef VERBOSE_KERNELS + fprintf(stderr, "KERNEL GETRF %d - k = %u\n", rank, info->k); +#endif +} + +static void STARPU_PLU(cpu_getrf)(void *descr[], void *_args) +{ + STARPU_PLU(common_getrf)(descr, 0, _args); +} + +#ifdef STARPU_USE_CUDA +static void STARPU_PLU(cublas_getrf)(void *descr[], void *_args) +{ + STARPU_PLU(common_getrf)(descr, 1, _args); +} +#endif// STARPU_USE_CUDA + +static struct starpu_perfmodel STARPU_PLU(model_getrf) = +{ + .type = STARPU_HISTORY_BASED, +#ifdef STARPU_ATLAS + .symbol = STARPU_PLU_STR(lu_model_getrf_atlas) +#elif defined(STARPU_GOTO) + .symbol = STARPU_PLU_STR(lu_model_getrf_goto) +#elif defined(STARPU_OPENBLAS) + .symbol = STARPU_PLU_STR(lu_model_getrf_openblas) +#else + .symbol = STARPU_PLU_STR(lu_model_getrf) +#endif +}; + +struct starpu_codelet STARPU_PLU(cl_getrf) = +{ + .cpu_funcs = {STARPU_PLU(cpu_getrf)}, + .cpu_funcs_name = {STRINGIFY(STARPU_PLU(cpu_getrf))}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {STARPU_PLU(cublas_getrf)}, +#elif defined(STARPU_SIMGRID) + .cuda_funcs = {(void*)1}, +#endif + .nbuffers = 1, + .modes = {STARPU_RW}, + .model = &STARPU_PLU(model_getrf) +}; diff --git a/mpi/examples/mpi_lu/pxlu_kernels.h b/mpi/examples/mpi_lu/pxlu_kernels.h new file mode 100644 index 0000000..d1b1329 --- /dev/null +++ b/mpi/examples/mpi_lu/pxlu_kernels.h @@ -0,0 +1,31 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __PXLU_KERNELS_H__ +#define __PXLU_KERNELS_H__ + +#include + +#define str(s) #s +#define xstr(s) str(s) +#define STARPU_PLU_STR(name) xstr(STARPU_PLU(name)) + +extern struct starpu_codelet STARPU_PLU(cl_getrf); +extern struct starpu_codelet STARPU_PLU(cl_trsm_ll); +extern struct starpu_codelet STARPU_PLU(cl_trsm_ru); +extern struct starpu_codelet STARPU_PLU(cl_gemm); + +#endif // __PXLU_KERNELS_H__ diff --git a/mpi/examples/mpi_redux/mpi_redux.c b/mpi/examples/mpi_redux/mpi_redux.c new file mode 100644 index 0000000..a67385a --- /dev/null +++ b/mpi/examples/mpi_redux/mpi_redux.c @@ -0,0 +1,210 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * This example illustrates how to use the STARPU_MPI_REDUX mode + * and compare it with the standard STARPU_REDUX. + * + * In order to make this comparison salliant, the init codelet is not + * a task that set the handle to a neutral element but rather depends + * on the working node. + * This is not a proper way to use a reduction pattern however it + * can be analogous to the cost/weight of each contribution. + */ + +#include +#include +#include +#include +#include +#include +#include "helper.h" +#include + +static void cl_cpu_work(void *handles[], void*arg) +{ + (void)arg; + double *a = (double *)STARPU_VARIABLE_GET_PTR(handles[0]); + double *b = (double *)STARPU_VARIABLE_GET_PTR(handles[1]); + starpu_sleep(0.01); + FPRINTF(stderr, "work_cl (rank:%d,worker:%d) %f =>",starpu_mpi_world_rank(), starpu_worker_get_id(), *a); + *a = 3.0 + *a + *b; + FPRINTF(stderr, "%f\n",*a); +} + +static struct starpu_codelet work_cl = +{ + .cpu_funcs = { cl_cpu_work }, + .nbuffers = 2, + .modes = { STARPU_REDUX, STARPU_R }, + .name = "task_init" +}; + +static struct starpu_codelet mpi_work_cl = +{ + .cpu_funcs = { cl_cpu_work }, + .nbuffers = 2, + .modes = { STARPU_RW | STARPU_COMMUTE, STARPU_R }, + .name = "task_init-mpi" +}; + +static void cl_cpu_task_init(void *handles[], void*arg) +{ + (void) arg; + double *a = (double *)STARPU_VARIABLE_GET_PTR(handles[0]); + starpu_sleep(0.005); + FPRINTF(stderr, "init_cl (rank:%d,worker:%d) %d (was %f)\n", starpu_mpi_world_rank(), starpu_worker_get_id(), starpu_mpi_world_rank(), +#ifdef STARPU_HAVE_VALGRIND_H + RUNNING_ON_VALGRIND ? 0. : +#endif + *a); + *a = starpu_mpi_world_rank(); +} + +static struct starpu_codelet task_init_cl = +{ + .cpu_funcs = { cl_cpu_task_init }, + .nbuffers = 1, + .modes = { STARPU_W }, + .name = "task_init" +}; + +static void cl_cpu_task_red(void *handles[], void*arg) +{ + (void) arg; + double *ad = (double *)STARPU_VARIABLE_GET_PTR(handles[0]); + double *as = (double *)STARPU_VARIABLE_GET_PTR(handles[1]); + starpu_sleep(0.01); + FPRINTF(stderr, "red_cl (rank:%d,worker:%d) %f ; %f --> %f\n", starpu_mpi_world_rank(), starpu_worker_get_id(), *as, *ad, *as+*ad); + *ad = *ad + *as; +} + +static struct starpu_codelet task_red_cl = +{ + .cpu_funcs = { cl_cpu_task_red }, + .nbuffers = 2, + .modes = { STARPU_RW|STARPU_COMMUTE, STARPU_R }, + .name = "task_red" +}; + +int main(int argc, char *argv[]) +{ + int comm_rank, comm_size; + /* Initializes STarPU and the StarPU-MPI layer */ + starpu_fxt_autostart_profiling(0); + int ret = starpu_mpi_init_conf(&argc, &argv, 1, MPI_COMM_WORLD, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_ini_conft"); + + int nworkers = starpu_cpu_worker_get_count(); + if (nworkers < 2) + { + FPRINTF(stderr, "We need at least 2 CPU worker per node.\n"); + starpu_mpi_shutdown(); + return STARPU_TEST_SKIPPED; + } + starpu_mpi_comm_size(MPI_COMM_WORLD, &comm_size); + if (comm_size < 2) + { + FPRINTF(stderr, "We need at least 2 nodes.\n"); + starpu_mpi_shutdown(); + return STARPU_TEST_SKIPPED; + } + starpu_mpi_comm_rank(MPI_COMM_WORLD, &comm_rank); + + double a, b[comm_size]; + starpu_data_handle_t a_h, b_h[comm_size]; + double work_coef = 2; + enum starpu_data_access_mode task_mode; + int i,j,work_node; + starpu_mpi_tag_t tag = 0; + for (i = 0 ; i < 2 ; i++) + { + starpu_mpi_barrier(MPI_COMM_WORLD); + if (i==0) + task_mode = STARPU_MPI_REDUX; + else + task_mode = STARPU_REDUX; + if (comm_rank == 0) + { + a = 1.0; + FPRINTF(stderr, "init a = %f\n", a); + starpu_variable_data_register(&a_h, STARPU_MAIN_RAM, (uintptr_t)&a, sizeof(double)); + for (j=0;j %f expected %f\n", a, + 1.0 + (comm_size - 1.0)*(comm_size)/2.0 + work_coef*nworkers*((comm_size-1)*3.0 + tmp1) + tmp2); + } + starpu_data_unregister(a_h); + for (work_node=0; work_node < comm_size;work_node++) + starpu_data_unregister(b_h[work_node]); + starpu_mpi_barrier(MPI_COMM_WORLD); + } + starpu_mpi_shutdown(); + return 0; +} diff --git a/mpi/examples/mpi_redux/mpi_redux_autowrapup.c b/mpi/examples/mpi_redux/mpi_redux_autowrapup.c new file mode 100644 index 0000000..cff6501 --- /dev/null +++ b/mpi/examples/mpi_redux/mpi_redux_autowrapup.c @@ -0,0 +1,233 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * This example is similar to mpi_redux.c + * + * It iterates over multiple ways to wrap-up reduction patterns : either by + * - waiting for all mpi + tasks + * - calling mpi_redux yourself + * - inserting a reading task on the handle to reduce + */ + +#include +#include +#include +#include +#include +#include +#include "helper.h" +#include + +static void cl_cpu_read(void *handles[], void*arg) +{ + (void) arg; + (void) handles; +} + +static struct starpu_codelet read_cl = +{ + .cpu_funcs = { cl_cpu_read }, + .nbuffers = 1, + .modes = { STARPU_R }, + .name = "task_read" +}; +static void cl_cpu_work(void *handles[], void*arg) +{ + (void)arg; + double *a = (double *)STARPU_VARIABLE_GET_PTR(handles[0]); + double *b = (double *)STARPU_VARIABLE_GET_PTR(handles[1]); + starpu_sleep(0.01); + FPRINTF(stderr, "work_cl (rank:%d,worker:%d) %f =>",starpu_mpi_world_rank(), starpu_worker_get_id(), *a); + *a = 3.0 + *a + *b; + FPRINTF(stderr, "%f\n",*a); +} + +static struct starpu_codelet work_cl = +{ + .cpu_funcs = { cl_cpu_work }, + .nbuffers = 2, + .modes = { STARPU_REDUX, STARPU_R }, + .name = "task_init" +}; + +static struct starpu_codelet mpi_work_cl = +{ + .cpu_funcs = { cl_cpu_work }, + .nbuffers = 2, + .modes = { STARPU_RW | STARPU_COMMUTE, STARPU_R }, + .name = "task_init-mpi" +}; + +static void cl_cpu_task_init(void *handles[], void*arg) +{ + (void) arg; + double *a = (double *)STARPU_VARIABLE_GET_PTR(handles[0]); + starpu_sleep(0.005); + FPRINTF(stderr, "init_cl (rank:%d,worker:%d) %d (was %f)\n", starpu_mpi_world_rank(), starpu_worker_get_id(), starpu_mpi_world_rank(), +#ifdef STARPU_HAVE_VALGRIND_H + RUNNING_ON_VALGRIND ? 0. : +#endif + *a); + *a = starpu_mpi_world_rank(); +} + +static struct starpu_codelet task_init_cl = +{ + .cpu_funcs = { cl_cpu_task_init }, + .nbuffers = 1, + .modes = { STARPU_W }, + .name = "task_init" +}; + +static void cl_cpu_task_red(void *handles[], void*arg) +{ + (void) arg; + double *ad = (double *)STARPU_VARIABLE_GET_PTR(handles[0]); + double *as = (double *)STARPU_VARIABLE_GET_PTR(handles[1]); + starpu_sleep(0.01); + FPRINTF(stderr, "red_cl (rank:%d,worker:%d) %f ; %f --> %f\n", starpu_mpi_world_rank(), starpu_worker_get_id(), *as, *ad, *as+*ad); + *ad = *ad + *as; +} + +static struct starpu_codelet task_red_cl = +{ + .cpu_funcs = { cl_cpu_task_red }, + .nbuffers = 2, + .modes = { STARPU_RW|STARPU_COMMUTE, STARPU_R }, + .name = "task_red" +}; + +int main(int argc, char *argv[]) +{ + int comm_rank, comm_size; + /* Initializes STarPU and the StarPU-MPI layer */ + starpu_fxt_autostart_profiling(0); + int ret = starpu_mpi_init_conf(&argc, &argv, 1, MPI_COMM_WORLD, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_ini_conft"); + + int nworkers = starpu_cpu_worker_get_count(); + if (nworkers < 2) + { + FPRINTF(stderr, "We need at least 2 CPU worker per node.\n"); + starpu_mpi_shutdown(); + return STARPU_TEST_SKIPPED; + } + FPRINTF(stderr, "there are %d workers\n", nworkers); + starpu_mpi_comm_size(MPI_COMM_WORLD, &comm_size); + if (comm_size < 2) + { + FPRINTF(stderr, "We need at least 2 nodes.\n"); + starpu_mpi_shutdown(); + return STARPU_TEST_SKIPPED; + } + starpu_mpi_comm_rank(MPI_COMM_WORLD, &comm_rank); + + double a, b[comm_size]; + starpu_data_handle_t a_h, b_h[comm_size]; + double work_coef = 2; + enum starpu_data_access_mode task_mode; + int wrapup,i,j,work_node; + starpu_mpi_tag_t tag = 0; + for (wrapup = 0; wrapup <= 2; wrapup ++) + { + for (i = 0 ; i < 2 ; i++) + { + starpu_mpi_barrier(MPI_COMM_WORLD); + if (i==0) + task_mode = STARPU_MPI_REDUX; + else + task_mode = STARPU_REDUX; + if (comm_rank == 0) + { + a = 1.0; + FPRINTF(stderr, "init a = %f\n", a); + starpu_variable_data_register(&a_h, STARPU_MAIN_RAM, (uintptr_t)&a, sizeof(double)); + for (j=0;j %f expected %f\n", a, + 1.0 + (comm_size - 1.0)*(comm_size)/2.0 + work_coef*nworkers*((comm_size-1)*3.0 + tmp1) + tmp2); + } + starpu_data_unregister(a_h); + for (work_node=0; work_node < comm_size;work_node++) + starpu_data_unregister(b_h[work_node]); + starpu_mpi_barrier(MPI_COMM_WORLD); + } + } + starpu_mpi_shutdown(); + return 0; +} diff --git a/mpi/examples/mpi_redux/mpi_redux_tree.c b/mpi/examples/mpi_redux/mpi_redux_tree.c new file mode 100644 index 0000000..f6a052e --- /dev/null +++ b/mpi/examples/mpi_redux/mpi_redux_tree.c @@ -0,0 +1,188 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * This example illustrates how to use the STARPU_MPI_REDUX mode + * and compare it with the standard STARPU_REDUX. + * + * In order to make this comparison salliant, the init codelet is not + * a task that set the handle to a neutral element but rather depends + * on the working node. + * This is not a proper way to use a reduction pattern however it + * can be analogous to the cost/weight of each contribution. + */ + +#include +#include +#include +#include +#include +#include +#include "helper.h" +#include + +static void cl_cpu_work(void *handles[], void*arg) +{ + (void)arg; + double *a = (double *)STARPU_VARIABLE_GET_PTR(handles[0]); + double *b = (double *)STARPU_VARIABLE_GET_PTR(handles[1]); + starpu_sleep(0.01); + FPRINTF(stderr, "work_cl (rank:%d,worker:%d) %f =>",starpu_mpi_world_rank(), starpu_worker_get_id(), *a); + *a = 3.0 + *a + *b; + FPRINTF(stderr, "%f\n",*a); +} + +static struct starpu_codelet work_cl = +{ + .cpu_funcs = { cl_cpu_work }, + .nbuffers = 2, + .modes = { STARPU_REDUX, STARPU_R }, + .name = "task_init" +}; + +static struct starpu_codelet mpi_work_cl = +{ + .cpu_funcs = { cl_cpu_work }, + .nbuffers = 2, + .modes = { STARPU_RW | STARPU_COMMUTE, STARPU_R }, + .name = "task_init-mpi" +}; + +static void cl_cpu_task_init(void *handles[], void*arg) +{ + (void) arg; + double *a = (double *)STARPU_VARIABLE_GET_PTR(handles[0]); + starpu_sleep(0.005); + FPRINTF(stderr, "init_cl (rank:%d,worker:%d) %d (was %f)\n", starpu_mpi_world_rank(), starpu_worker_get_id(), starpu_mpi_world_rank(), +#ifdef STARPU_HAVE_VALGRIND_H + RUNNING_ON_VALGRIND ? 0. : +#endif + *a); + *a = starpu_mpi_world_rank(); +} + +static struct starpu_codelet task_init_cl = +{ + .cpu_funcs = { cl_cpu_task_init }, + .nbuffers = 1, + .modes = { STARPU_W }, + .name = "task_init" +}; + +static void cl_cpu_task_red(void *handles[], void*arg) +{ + (void) arg; + double *ad = (double *)STARPU_VARIABLE_GET_PTR(handles[0]); + double *as = (double *)STARPU_VARIABLE_GET_PTR(handles[1]); + starpu_sleep(0.01); + FPRINTF(stderr, "red_cl (rank:%d,worker:%d) %f ; %f --> %f\n", starpu_mpi_world_rank(), starpu_worker_get_id(), *as, *ad, *as+*ad); + *ad = *ad + *as; +} + +static struct starpu_codelet task_red_cl = +{ + .cpu_funcs = { cl_cpu_task_red }, + .nbuffers = 2, + .modes = { STARPU_RW|STARPU_COMMUTE, STARPU_R }, + .name = "task_red" +}; + +int main(int argc, char *argv[]) +{ + int comm_rank, comm_size; + /* Initializes STarPU and the StarPU-MPI layer */ + starpu_fxt_autostart_profiling(0); + int ret = starpu_mpi_init_conf(&argc, &argv, 1, MPI_COMM_WORLD, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_ini_conft"); + + int nworkers = starpu_cpu_worker_get_count(); + starpu_mpi_comm_size(MPI_COMM_WORLD, &comm_size); + if (comm_size < 2) + { + FPRINTF(stderr, "We need at least 2 nodes.\n"); + starpu_mpi_shutdown(); + return STARPU_TEST_SKIPPED; + } + starpu_mpi_comm_rank(MPI_COMM_WORLD, &comm_rank); + + double a, b[comm_size]; + starpu_data_handle_t a_h, b_h[comm_size]; + double work_coef = 2; + enum starpu_data_access_mode task_mode; + int arity,j,work_node; + starpu_mpi_tag_t tag = 0; + for (arity = 2 ; arity < comm_size ; arity++) + { + starpu_mpi_barrier(MPI_COMM_WORLD); + task_mode = STARPU_MPI_REDUX; + if (comm_rank == 0) + { + a = 1.0; + FPRINTF(stderr, "init a = %f\n", a); + starpu_variable_data_register(&a_h, STARPU_MAIN_RAM, (uintptr_t)&a, sizeof(double)); + for (j=0;j %f expected %f\n", a, 1.0 + (comm_size - 1.0)*(comm_size)/2.0 + work_coef*nworkers*((comm_size-1)*3.0 + tmp)); + } + starpu_data_unregister(a_h); + for (work_node=0; work_node < comm_size;work_node++) + starpu_data_unregister(b_h[work_node]); + starpu_mpi_barrier(MPI_COMM_WORLD); + } + starpu_mpi_shutdown(); + return 0; +} diff --git a/mpi/examples/native_fortran/fstarpu_mod.f90 b/mpi/examples/native_fortran/fstarpu_mod.f90 new file mode 100644 index 0000000..041de99 --- /dev/null +++ b/mpi/examples/native_fortran/fstarpu_mod.f90 @@ -0,0 +1,2697 @@ +! StarPU --- Runtime system for heterogeneous multicore architectures. +! +! Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +! +! StarPU is free software; you can redistribute it and/or modify +! it under the terms of the GNU Lesser General Public License as published by +! the Free Software Foundation; either version 2.1 of the License, or (at +! your option) any later version. +! +! StarPU is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of +! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +! +! See the GNU Lesser General Public License in COPYING.LGPL for more details. +! +!> @ingroup API_Fortran +!> @brief Fortran API +module fstarpu_mod + use iso_c_binding + implicit none + + ! Note: Constants truly are intptr_t, but are declared as c_ptr to be + ! readily usable in c_ptr arrays to mimic variadic functions. + ! Note: Bitwise or operator is provided by the .ior. overloaded operator + type(c_ptr), bind(C) :: FSTARPU_R + type(c_ptr), bind(C) :: FSTARPU_W + type(c_ptr), bind(C) :: FSTARPU_RW + type(c_ptr), bind(C) :: FSTARPU_SCRATCH + type(c_ptr), bind(C) :: FSTARPU_REDUX + type(c_ptr), bind(C) :: FSTARPU_MPI_REDUX + type(c_ptr), bind(C) :: FSTARPU_COMMUTE + type(c_ptr), bind(C) :: FSTARPU_SSEND + type(c_ptr), bind(C) :: FSTARPU_LOCALITY + + type(c_ptr), bind(C) :: FSTARPU_DATA_ARRAY + type(c_ptr), bind(C) :: FSTARPU_DATA_MODE_ARRAY + type(c_ptr), bind(C) :: FSTARPU_CL_ARGS + type(c_ptr), bind(C) :: FSTARPU_CL_ARGS_NFREE + type(c_ptr), bind(C) :: FSTARPU_TASK_DEPS_ARRAY + type(c_ptr), bind(C) :: FSTARPU_CALLBACK + type(c_ptr), bind(C) :: FSTARPU_CALLBACK_WITH_ARG + type(c_ptr), bind(C) :: FSTARPU_CALLBACK_WITH_ARG_NFREE + type(c_ptr), bind(C) :: FSTARPU_CALLBACK_ARG + type(c_ptr), bind(C) :: FSTARPU_CALLBACK_ARG_NFREE + type(c_ptr), bind(C) :: FSTARPU_PROLOGUE_CALLBACK + type(c_ptr), bind(C) :: FSTARPU_PROLOGUE_CALLBACK_ARG + type(c_ptr), bind(C) :: FSTARPU_PROLOGUE_CALLBACK_ARG_NFREE + type(c_ptr), bind(C) :: FSTARPU_PROLOGUE_CALLBACK_POP + type(c_ptr), bind(C) :: FSTARPU_PROLOGUE_CALLBACK_POP_ARG + type(c_ptr), bind(C) :: FSTARPU_PROLOGUE_CALLBACK_POP_ARG_NFREE + type(c_ptr), bind(C) :: FSTARPU_PRIORITY + type(c_ptr), bind(C) :: FSTARPU_EXECUTE_ON_NODE + type(c_ptr), bind(C) :: FSTARPU_EXECUTE_ON_DATA + type(c_ptr), bind(C) :: FSTARPU_EXECUTE_ON_WORKER + type(c_ptr), bind(C) :: FSTARPU_WORKER_ORDER + type(c_ptr), bind(C) :: FSTARPU_EXECUTE_WHERE + type(c_ptr), bind(C) :: FSTARPU_HYPERVISOR_TAG + type(c_ptr), bind(C) :: FSTARPU_POSSIBLY_PARALLEL + type(c_ptr), bind(C) :: FSTARPU_FLOPS + type(c_ptr), bind(C) :: FSTARPU_TAG + type(c_ptr), bind(C) :: FSTARPU_TAG_ONLY + type(c_ptr), bind(C) :: FSTARPU_NAME + type(c_ptr), bind(C) :: FSTARPU_TASK_COLOR + type(c_ptr), bind(C) :: FSTARPU_TASK_SYNCHRONOUS + type(c_ptr), bind(C) :: FSTARPU_HANDLES_SEQUENTIAL_CONSISTENCY + type(c_ptr), bind(C) :: FSTARPU_TASK_END_DEP + type(c_ptr), bind(C) :: FSTARPU_NODE_SELECTION_POLICY + type(c_ptr), bind(C) :: FSTARPU_TASK_SCHED_DATA + + type(c_ptr), bind(C) :: FSTARPU_VALUE + type(c_ptr), bind(C) :: FSTARPU_SCHED_CTX + + type(c_ptr), bind(C) :: FSTARPU_CPU_WORKER + type(c_ptr), bind(C) :: FSTARPU_CUDA_WORKER + type(c_ptr), bind(C) :: FSTARPU_OPENCL_WORKER + type(c_ptr), bind(C) :: FSTARPU_ANY_WORKER + + integer(c_int), bind(C) :: FSTARPU_NMAXBUFS + + type(c_ptr), bind(C) :: FSTARPU_SCHED_CTX_POLICY_NAME + type(c_ptr), bind(C) :: FSTARPU_SCHED_CTX_POLICY_STRUCT + type(c_ptr), bind(C) :: FSTARPU_SCHED_CTX_POLICY_MIN_PRIO + type(c_ptr), bind(C) :: FSTARPU_SCHED_CTX_POLICY_MAX_PRIO + type(c_ptr), bind(C) :: FSTARPU_SCHED_CTX_HIERARCHY_LEVEL + type(c_ptr), bind(C) :: FSTARPU_SCHED_CTX_NESTED + type(c_ptr), bind(C) :: FSTARPU_SCHED_CTX_AWAKE_WORKERS + type(c_ptr), bind(C) :: FSTARPU_SCHED_CTX_POLICY_INIT + type(c_ptr), bind(C) :: FSTARPU_SCHED_CTX_USER_DATA + + type(c_ptr), bind(C) :: FSTARPU_NOWHERE + type(c_ptr), bind(C) :: FSTARPU_CPU + type(c_ptr), bind(C) :: FSTARPU_CUDA + type(c_ptr), bind(C) :: FSTARPU_OPENCL + + type(c_ptr), bind(C) :: FSTARPU_CODELET_SIMGRID_EXECUTE + type(c_ptr), bind(C) :: FSTARPU_CODELET_SIMGRID_EXECUTE_AND_INJECT + type(c_ptr), bind(C) :: FSTARPU_CUDA_ASYNC + type(c_ptr), bind(C) :: FSTARPU_OPENCL_ASYNC + + !type(c_ptr), bind(C) :: FSTARPU_PER_WORKER + !type(c_ptr), bind(C) :: FSTARPU_PER_ARCH + !type(c_ptr), bind(C) :: FSTARPU_PER_COMMON + type(c_ptr), bind(C) :: FSTARPU_HISTORY_BASED + type(c_ptr), bind(C) :: FSTARPU_REGRESSION_BASED + type(c_ptr), bind(C) :: FSTARPU_NL_REGRESSION_BASED + type(c_ptr), bind(C) :: FSTARPU_MULTIPLE_REGRESSION_BASED + + type(c_ptr), bind(C) :: FSTARPU_SEQ + type(c_ptr), bind(C) :: FSTARPU_SPMD + type(c_ptr), bind(C) :: FSTARPU_FORKJOIN + + ! (some) portable iso_c_binding types + type(c_ptr), bind(C) :: FSTARPU_SZ_C_DOUBLE + type(c_ptr), bind(C) :: FSTARPU_SZ_C_FLOAT + type(c_ptr), bind(C) :: FSTARPU_SZ_C_CHAR + type(c_ptr), bind(C) :: FSTARPU_SZ_C_INT + type(c_ptr), bind(C) :: FSTARPU_SZ_C_INTPTR_T + type(c_ptr), bind(C) :: FSTARPU_SZ_C_PTR + type(c_ptr), bind(C) :: FSTARPU_SZ_C_SIZE_T + + ! (some) native Fortran types + type(c_ptr), bind(C) :: FSTARPU_SZ_CHARACTER + + type(c_ptr), bind(C) :: FSTARPU_SZ_INTEGER + type(c_ptr), bind(C) :: FSTARPU_SZ_INT4 + type(c_ptr), bind(C) :: FSTARPU_SZ_INT8 + + type(c_ptr), bind(C) :: FSTARPU_SZ_REAL + type(c_ptr), bind(C) :: FSTARPU_SZ_REAL4 + type(c_ptr), bind(C) :: FSTARPU_SZ_REAL8 + + type(c_ptr), bind(C) :: FSTARPU_SZ_DOUBLE_PRECISION + + type(c_ptr), bind(C) :: FSTARPU_SZ_COMPLEX + type(c_ptr), bind(C) :: FSTARPU_SZ_COMPLEX4 + type(c_ptr), bind(C) :: FSTARPU_SZ_COMPLEX8 + + integer(c_int), bind(C), target :: FSTARPU_DEFAULT_PRIO + + interface operator (.ior.) + procedure or_cptrs + end interface operator (.ior.) + + interface + ! == starpu.h == + + ! void starpu_conf_init(struct starpu_conf *conf); + subroutine fstarpu_conf_init (conf) bind(C,name="starpu_conf_init") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: conf + end subroutine fstarpu_conf_init + + function fstarpu_conf_allocate () bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr) :: fstarpu_conf_allocate + end function fstarpu_conf_allocate + + subroutine fstarpu_conf_free (conf) bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: conf + end subroutine fstarpu_conf_free + + subroutine fstarpu_conf_set_sched_policy_name (conf, policy_name) bind(C) + use iso_c_binding, only: c_ptr, c_char + type(c_ptr), value, intent(in) :: conf + character(c_char), intent(in) :: policy_name + end subroutine fstarpu_conf_set_sched_policy_name + + subroutine fstarpu_conf_set_min_prio (conf, min_prio) bind(C) + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: conf + integer(c_int), value, intent(in) :: min_prio + end subroutine fstarpu_conf_set_min_prio + + subroutine fstarpu_conf_set_max_prio (conf, max_prio) bind(C) + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: conf + integer(c_int), value, intent(in) :: max_prio + end subroutine fstarpu_conf_set_max_prio + + subroutine fstarpu_conf_set_ncpu (conf, ncpu) bind(C) + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: conf + integer(c_int), value, intent(in) :: ncpu + end subroutine fstarpu_conf_set_ncpu + + subroutine fstarpu_conf_set_ncuda (conf, ncuda) bind(C) + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: conf + integer(c_int), value, intent(in) :: ncuda + end subroutine fstarpu_conf_set_ncuda + + subroutine fstarpu_conf_set_nopencl (conf, nopencl) bind(C) + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: conf + integer(c_int), value, intent(in) :: nopencl + end subroutine fstarpu_conf_set_nopencl + + ! starpu_init: see fstarpu_init + ! starpu_initialize: see fstarpu_init + + ! void starpu_pause(void); + subroutine fstarpu_pause() bind(C,name="starpu_pause") + end subroutine fstarpu_pause + + ! void starpu_resume(void); + subroutine fstarpu_resume() bind(C,name="starpu_resume") + end subroutine fstarpu_resume + + ! int starpu_is_paused(void); + function fstarpu_is_paused() bind(C,name="starpu_is_paused") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_is_paused + end function fstarpu_is_paused + + ! void starpu_shutdown(void); + subroutine fstarpu_shutdown () bind(C,name="starpu_shutdown") + end subroutine fstarpu_shutdown + + ! starpu_topology_print + subroutine fstarpu_topology_print () bind(C) + end subroutine fstarpu_topology_print + + ! int starpu_asynchronous_copy_disabled(void); + function fstarpu_asynchronous_copy_disabled() bind(C,name="starpu_asynchronous_copy_disabled") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_asynchronous_copy_disabled + end function fstarpu_asynchronous_copy_disabled + + ! int starpu_asynchronous_cuda_copy_disabled(void); + function fstarpu_asynchronous_cuda_copy_disabled() bind(C,name="starpu_asynchronous_cuda_copy_disabled") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_asynchronous_cuda_copy_disabled + end function fstarpu_asynchronous_cuda_copy_disabled + + ! int starpu_asynchronous_opencl_copy_disabled(void); + function fstarpu_asynchronous_opencl_copy_disabled() bind(C,name="starpu_asynchronous_opencl_copy_disabled") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_asynchronous_opencl_copy_disabled + end function fstarpu_asynchronous_opencl_copy_disabled + + ! void starpu_display_stats(); + subroutine fstarpu_display_stats() bind(C,name="starpu_display_stats") + end subroutine fstarpu_display_stats + + ! void starpu_get_version(int *major, int *minor, int *release); + subroutine fstarpu_get_version(major,minor,release) bind(C,name="starpu_get_version") + use iso_c_binding, only: c_int + integer(c_int), intent(out) :: major,minor,release + end subroutine fstarpu_get_version + + ! == starpu_worker.h == + + ! unsigned starpu_worker_get_count(void); + function fstarpu_worker_get_count() bind(C,name="starpu_worker_get_count") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_worker_get_count + end function fstarpu_worker_get_count + + ! unsigned starpu_combined_worker_get_count(void); + function fstarpu_combined_worker_get_count() bind(C,name="starpu_combined_worker_get_count") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_combined_worker_get_count + end function fstarpu_combined_worker_get_count + + ! unsigned starpu_worker_is_combined_worker(int id); + function fstarpu_worker_is_combined_worker(id) bind(C,name="starpu_worker_is_combined_worker") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_worker_is_combined_worker + integer(c_int), value, intent(in) :: id + end function fstarpu_worker_is_combined_worker + + + ! unsigned starpu_cpu_worker_get_count(void); + function fstarpu_cpu_worker_get_count() bind(C,name="starpu_cpu_worker_get_count") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_cpu_worker_get_count + end function fstarpu_cpu_worker_get_count + + ! unsigned starpu_cuda_worker_get_count(void); + function fstarpu_cuda_worker_get_count() bind(C,name="starpu_cuda_worker_get_count") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_cuda_worker_get_count + end function fstarpu_cuda_worker_get_count + + ! unsigned starpu_opencl_worker_get_count(void); + function fstarpu_opencl_worker_get_count() bind(C,name="starpu_opencl_worker_get_count") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_opencl_worker_get_count + end function fstarpu_opencl_worker_get_count + + ! int starpu_worker_get_id(void); + function fstarpu_worker_get_id() bind(C,name="starpu_worker_get_id") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_worker_get_id + end function fstarpu_worker_get_id + + ! _starpu_worker_get_id_check + ! starpu_worker_get_id_check + + ! int starpu_worker_get_bindid(int workerid); + function fstarpu_worker_get_bindid(id) bind(C,name="starpu_worker_get_bindid") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_worker_get_bindid + integer(c_int), value, intent(in) :: id + end function fstarpu_worker_get_bindid + + ! int starpu_combined_worker_get_id(void); + function fstarpu_combined_worker_get_id() bind(C,name="starpu_combined_worker_get_id") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_combined_worker_get_id + end function fstarpu_combined_worker_get_id + + ! int starpu_combined_worker_get_size(void); + function fstarpu_combined_worker_get_size() bind(C,name="starpu_combined_worker_get_size") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_combined_worker_get_size + end function fstarpu_combined_worker_get_size + + ! int starpu_combined_worker_get_rank(void); + function fstarpu_combined_worker_get_rank() bind(C,name="starpu_combined_worker_get_rank") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_combined_worker_get_rank + end function fstarpu_combined_worker_get_rank + + ! enum starpu_worker_archtype starpu_worker_get_type(int id); + function fstarpu_worker_get_type(id) bind(C) + use iso_c_binding, only: c_int, c_ptr + type(c_ptr) :: fstarpu_worker_get_type ! C function returns c_intptr_t + integer(c_int),value,intent(in) :: id + end function fstarpu_worker_get_type + + ! int starpu_worker_get_count_by_type(enum starpu_worker_archtype type); + function fstarpu_worker_get_count_by_type(typeid) bind(C) + use iso_c_binding, only: c_int, c_ptr + integer(c_int) :: fstarpu_worker_get_count_by_type + type(c_ptr),value,intent(in) :: typeid ! c_intptr_t expected by C func + end function fstarpu_worker_get_count_by_type + + ! int starpu_worker_get_ids_by_type(enum starpu_worker_archtype type, int *workerids, int maxsize); + function fstarpu_worker_get_ids_by_type(typeid, workerids, maxsize) bind(C) + use iso_c_binding, only: c_int, c_ptr + integer(c_int) :: fstarpu_worker_get_ids_by_type + type(c_ptr),value,intent(in) :: typeid ! c_intptr_t expected by C func + integer(c_int),intent(out) :: workerids(*) + integer(c_int),value,intent(in) :: maxsize + end function fstarpu_worker_get_ids_by_type + + ! int starpu_worker_get_by_type(enum starpu_worker_archtype type, int num); + function fstarpu_worker_get_by_type(typeid, num) bind(C) + use iso_c_binding, only: c_int, c_ptr + integer(c_int) :: fstarpu_worker_get_by_type + type(c_ptr),value,intent(in) :: typeid ! c_intptr_t expected by C func + integer(c_int),value,intent(in) :: num + end function fstarpu_worker_get_by_type + + ! int starpu_worker_get_by_devid(enum starpu_worker_archtype type, int devid); + function fstarpu_worker_get_by_devid(typeid, devid) bind(C) + use iso_c_binding, only: c_int, c_ptr + integer(c_int) :: fstarpu_worker_get_by_devid + type(c_ptr),value,intent(in) :: typeid ! c_intptr_t expected by C func + integer(c_int),value,intent(in) :: devid + end function fstarpu_worker_get_by_devid + + ! void starpu_worker_get_name(int id, char *dst, size_t maxlen); + subroutine fstarpu_worker_get_name(id, dst, maxlen) bind(C,name="starpu_worker_get_name") + use iso_c_binding, only: c_int, c_char, c_size_t + integer(c_int),value,intent(in) :: id + character(c_char),intent(out) :: dst(*) + integer(c_size_t),value,intent(in) :: maxlen + end subroutine fstarpu_worker_get_name + + + ! int starpu_worker_get_devid(int id); + function fstarpu_worker_get_devid(id) bind(C,name="starpu_worker_get_devid") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_worker_get_devid + integer(c_int), value, intent(in) :: id + end function fstarpu_worker_get_devid + + ! struct starpu_tree* starpu_workers_get_tree(void); + ! unsigned starpu_worker_get_sched_ctx_list(int worker, unsigned **sched_ctx); + + ! unsigned starpu_worker_is_blocked(int workerid); + function fstarpu_worker_is_blocked(id) bind(C,name="starpu_worker_is_blocked") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_worker_is_blocked + integer(c_int), value, intent(in) :: id + end function fstarpu_worker_is_blocked + + ! unsigned starpu_worker_is_slave_somewhere(int workerid); + function fstarpu_worker_is_slave_somewhere(id) bind(C,name="starpu_worker_is_slave_somewhere") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_worker_is_slave_somewhere + integer(c_int), value, intent(in) :: id + end function fstarpu_worker_is_slave_somewhere + + ! char *starpu_worker_get_type_as_string(enum starpu_worker_archtype type); + subroutine fstarpu_worker_get_type_as_string(typeid,dst,maxlen) bind(C) + use iso_c_binding, only: c_ptr, c_char, c_size_t + type(c_ptr),value,intent(in) :: typeid ! c_intptr_t expected by C func + character(c_char),intent(out) :: dst(*) + integer(c_size_t),value,intent(in) :: maxlen + end subroutine fstarpu_worker_get_type_as_string + + ! int starpu_bindid_get_workerids(int bindid, int **workerids); + + ! == starpu_task.h == + + function fstarpu_task_create_sync (handle, mode) bind(C,name="starpu_task_create_sync") + use iso_c_binding, only: c_ptr + type(c_ptr) :: fstarpu_task_create_sync + type(c_ptr), value, intent(in) :: handle + type(c_ptr), value, intent(in) :: mode + end function fstarpu_task_create_sync + + ! void starpu_tag_declare_deps_array(starpu_tag_t id, unsigned ndeps, starpu_tag_t *array); + subroutine fstarpu_tag_declare_deps_array(id,ndeps,tag_array) bind(C,name="starpu_tag_declare_deps_array") + use iso_c_binding, only: c_int, c_long_long + integer(c_int), value, intent(in) :: id + integer(c_int), value, intent(in) :: ndeps + integer(c_long_long), intent(in) :: tag_array(*) + end subroutine fstarpu_tag_declare_deps_array + + ! void starpu_task_declare_deps(starpu_tag_t id, unsigned ndeps, ...); + subroutine fstarpu_task_declare_deps(task,ndeps,root_task) bind(C,name="starpu_task_declare_deps") + use iso_c_binding, only: c_int, c_ptr + type(c_ptr), value, intent(in) :: task + integer(c_int), value, intent(in) :: ndeps + type(c_ptr), value, intent(in) :: root_task + end subroutine fstarpu_task_declare_deps + + ! void starpu_task_declare_deps_array(struct starpu_task *task, unsigned ndeps, struct starpu_task *task_array[]); + subroutine fstarpu_task_declare_deps_array(task,ndeps,task_array) bind(C,name="starpu_task_declare_deps_array") + use iso_c_binding, only: c_int, c_ptr + type(c_ptr), value, intent(in) :: task + integer(c_int), value, intent(in) :: ndeps + type(c_ptr), intent(in) :: task_array(*) + end subroutine fstarpu_task_declare_deps_array + + ! void starpu_task_end_dep_add(struct starpu_task *t, int nb_deps) + subroutine fstarpu_task_end_dep_add(task, nb_deps) & + bind(C,name="starpu_task_end_dep_add") + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: task + integer(c_int), value, intent(in) :: nb_deps + end subroutine fstarpu_task_end_dep_add + + ! void starpu_task_end_dep_release(struct starpu_task *t) + subroutine fstarpu_task_end_dep_release(task) & + bind(C,name="starpu_task_end_dep_release") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: task + end subroutine fstarpu_task_end_dep_release + + + ! int starpu_tag_wait(starpu_tag_t id); + function fstarpu_tag_wait(id) bind(C,name="starpu_tag_wait") + use iso_c_binding, only: c_int, c_long_long + integer(c_int) :: fstarpu_tag_wait + integer(c_long_long), value, intent(in) :: id + end function fstarpu_tag_wait + + ! int starpu_tag_wait_array(unsigned ntags, starpu_tag_t *id); + function fstarpu_tag_wait_array(ntags,tag_array) bind(C,name="starpu_tag_wait_array") + use iso_c_binding, only: c_int, c_long_long + integer(c_int) :: fstarpu_tag_wait_array + integer(c_int), value, intent(in) :: ntags + integer(c_long_long), intent(in) :: tag_array(*) + end function fstarpu_tag_wait_array + + ! void starpu_tag_notify_from_apps(starpu_tag_t id); + subroutine fstarpu_tag_notify_from_apps(id) bind(C,name="starpu_tag_notify_from_apps") + use iso_c_binding, only: c_long_long + integer(c_long_long), value, intent(in) :: id + end subroutine fstarpu_tag_notify_from_apps + + ! void starpu_tag_restart(starpu_tag_t id); + subroutine fstarpu_tag_restart(id) bind(C,name="starpu_tag_restart") + use iso_c_binding, only: c_long_long + integer(c_long_long), value, intent(in) :: id + end subroutine fstarpu_tag_restart + + ! void starpu_tag_remove(starpu_tag_t id); + subroutine fstarpu_tag_remove(id) bind(C,name="starpu_tag_remove") + use iso_c_binding, only: c_long_long + integer(c_long_long), value, intent(in) :: id + end subroutine fstarpu_tag_remove + + ! struct starpu_task *starpu_tag_get_task(starpu_tag_t id); + function fstarpu_tag_get_task(id) bind(C,name="starpu_tag_get_task") + use iso_c_binding, only: c_ptr, c_long_long + type(c_ptr) :: fstarpu_tag_get_task + integer(c_long_long), value, intent(in) :: id + end function fstarpu_tag_get_task + + + ! void starpu_task_init(struct starpu_task *task); + subroutine fstarpu_task_init (task) bind(C,name="starpu_task_init") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: task + end subroutine fstarpu_task_init + + ! void starpu_task_clean(struct starpu_task *task); + subroutine fstarpu_task_clean (task) bind(C,name="starpu_task_clean") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: task + end subroutine fstarpu_task_clean + + ! struct starpu_task *starpu_task_create(void) STARPU_ATTRIBUTE_MALLOC; + function fstarpu_task_create () bind(C,name="starpu_task_create") + use iso_c_binding, only: c_ptr + type(c_ptr) :: fstarpu_task_create + end function fstarpu_task_create + + ! void starpu_task_destroy(struct starpu_task *task); + subroutine fstarpu_task_destroy (task) bind(C,name="starpu_task_destroy") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: task + end subroutine fstarpu_task_destroy + + ! void starpu_task_set_destroy(struct starpu_task *task); + subroutine fstarpu_task_set_destroy (task) bind(C,name="starpu_task_set_destroy") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: task + end subroutine fstarpu_task_set_destroy + + ! int starpu_task_submit(struct starpu_task *task) STARPU_WARN_UNUSED_RESULT; + function fstarpu_task_submit (task) bind(C,name="starpu_task_submit") + use iso_c_binding, only: c_int,c_ptr + integer(c_int) :: fstarpu_task_submit + type(c_ptr), value, intent(in) :: task + end function fstarpu_task_submit + + ! int starpu_task_submit_to_ctx(struct starpu_task *task, unsigned sched_ctx_id); + function fstarpu_task_submit_to_ctx (task,sched_ctx_id) bind(C,name="starpu_task_submit_to_ctx") + use iso_c_binding, only: c_int,c_ptr + integer(c_int) :: fstarpu_task_submit_to_ctx + type(c_ptr), value, intent(in) :: task + integer(c_int), value, intent(in) :: sched_ctx_id + end function fstarpu_task_submit_to_ctx + + ! int starpu_task_finished(struct starpu_task *task) STARPU_WARN_UNUSED_RESULT; + function fstarpu_task_finished (task) bind(C,name="starpu_task_finished") + use iso_c_binding, only: c_int,c_ptr + integer(c_int) :: fstarpu_task_finished + type(c_ptr), value, intent(in) :: task + end function fstarpu_task_finished + + ! int starpu_task_wait(struct starpu_task *task) STARPU_WARN_UNUSED_RESULT; + function fstarpu_task_wait (task) bind(C,name="starpu_task_wait") + use iso_c_binding, only: c_int,c_ptr + integer(c_int) :: fstarpu_task_wait + type(c_ptr), value, intent(in) :: task + end function fstarpu_task_wait + + ! int starpu_task_wait_array(struct starpu_task **tasks, unsigned nb_tasks) STARPU_WARN_UNUSED_RESULT; + function fstarpu_task_wait_array(task_array,ntasks) bind(C,name="starpu_task_wait_array") + use iso_c_binding, only: c_int, c_ptr + integer(c_int) :: fstarpu_task_wait_array + integer(c_int), value, intent(in) :: ntasks + type(c_ptr), intent(in) :: task_array + end function fstarpu_task_wait_array + + + ! int starpu_task_wait_for_all(void); + subroutine fstarpu_task_wait_for_all () bind(C,name="starpu_task_wait_for_all") + end subroutine fstarpu_task_wait_for_all + + ! int starpu_task_wait_for_n_submitted(unsigned n); + subroutine fstarpu_task_wait_for_n_submitted (n) bind(C,name="starpu_task_wait_for_n_submitted") + use iso_c_binding, only: c_int + integer(c_int), value, intent(in) :: n + end subroutine fstarpu_task_wait_for_n_submitted + + ! int starpu_task_wait_for_all_in_ctx(unsigned sched_ctx_id); + subroutine fstarpu_task_wait_for_all_in_ctx (ctx) bind(C,name="starpu_task_wait_for_all_in_ctx") + use iso_c_binding, only: c_int + integer(c_int), value, intent(in) :: ctx + end subroutine fstarpu_task_wait_for_all_in_ctx + + ! int starpu_task_wait_for_n_submitted_in_ctx(unsigned sched_ctx_id, unsigned n); + subroutine fstarpu_task_wait_for_n_submitted_in_ctx (ctx,n) bind(C,name="starpu_task_wait_for_n_submitted_in_ctx") + use iso_c_binding, only: c_int + integer(c_int), value, intent(in) :: ctx + integer(c_int), value, intent(in) :: n + end subroutine fstarpu_task_wait_for_n_submitted_in_ctx + + ! int starpu_task_wait_for_no_ready(void); + function fstarpu_task_wait_for_no_ready () bind(C,name="starpu_task_wait_for_no_ready") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_task_wait_for_no_ready + end function fstarpu_task_wait_for_no_ready + + ! int starpu_task_nready(void); + function fstarpu_task_nready () bind(C,name="starpu_task_nready") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_task_nready + end function fstarpu_task_nready + + ! int starpu_task_nsubmitted(void); + function fstarpu_task_nsubmitted () bind(C,name="starpu_task_nsubmitted") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_task_nsubmitted + end function fstarpu_task_nsubmitted + + ! void starpu_do_schedule(void); + subroutine fstarpu_do_schedule () bind(C,name="starpu_do_schedule") + end subroutine fstarpu_do_schedule + + ! starpu_codelet_init + subroutine fstarpu_codelet_init (codelet) bind(C,name="starpu_codelet_init") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: codelet + end subroutine fstarpu_codelet_init + + ! starpu_codelet_display_stats + subroutine fstarpu_codelet_display_stats (codelet) bind(C,name="starpu_codelet_display_stats") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: codelet + end subroutine fstarpu_codelet_display_stats + + + ! struct starpu_task *starpu_task_get_current(void); + function fstarpu_task_get_current () bind(C,name="starpu_task_get_current") + use iso_c_binding, only: c_ptr + type(c_ptr) :: fstarpu_task_get_current + end function fstarpu_task_get_current + + ! void starpu_parallel_task_barrier_init(struct starpu_task *task, int workerid); + subroutine fstarpu_parallel_task_barrier_init_init (task,id) & + bind(C,name="starpu_parallel_task_barrier_init_init") + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: task + integer(c_int), value, intent(in) :: id + end subroutine fstarpu_parallel_task_barrier_init_init + + ! void starpu_parallel_task_barrier_init_n(struct starpu_task *task, int worker_size); + subroutine fstarpu_parallel_task_barrier_init_n_init_n (task,sz) & + bind(C,name="starpu_parallel_task_barrier_init_n_init_n") + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: task + integer(c_int), value, intent(in) :: sz + end subroutine fstarpu_parallel_task_barrier_init_n_init_n + + ! struct starpu_task *starpu_task_dup(struct starpu_task *task); + function fstarpu_task_dup (task) bind(C,name="starpu_task_dup") + use iso_c_binding, only: c_ptr + type(c_ptr) :: fstarpu_task_dup + type(c_ptr), value, intent(in) :: task + end function fstarpu_task_dup + + ! void starpu_task_set_implementation(struct starpu_task *task, unsigned impl); + subroutine fstarpu_task_set_implementation (task,impl) & + bind(C,name="starpu_task_set_implementation") + use iso_c_binding, only: c_ptr,c_int + type(c_ptr), value, intent(in) :: task + integer(c_int), value, intent(in) :: impl + end subroutine fstarpu_task_set_implementation + + ! unsigned starpu_task_get_implementation(struct starpu_task *task); + function fstarpu_task_get_implementation (task) & + bind(C,name="starpu_task_get_implementation") + use iso_c_binding, only: c_ptr,c_int + type(c_ptr), value, intent(in) :: task + integer(c_int) :: fstarpu_task_get_implementation + end function fstarpu_task_get_implementation + + ! -- + + function fstarpu_codelet_allocate () bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr) :: fstarpu_codelet_allocate + end function fstarpu_codelet_allocate + + subroutine fstarpu_codelet_free (cl) bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: cl + end subroutine fstarpu_codelet_free + + subroutine fstarpu_codelet_set_name (cl, cl_name) bind(C) + use iso_c_binding, only: c_ptr, c_char + type(c_ptr), value, intent(in) :: cl + character(c_char), intent(in) :: cl_name + end subroutine fstarpu_codelet_set_name + + subroutine fstarpu_codelet_set_color (cl, cl_color) bind(C) + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: cl + integer(c_int), value, intent(in) :: cl_color + end subroutine fstarpu_codelet_set_color + + subroutine fstarpu_codelet_set_model (cl, cl_perfmodel) bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: cl + type(c_ptr), value, intent(in) :: cl_perfmodel + end subroutine fstarpu_codelet_set_model + + subroutine fstarpu_codelet_set_energy_model (cl, cl_perfmodel) bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: cl + type(c_ptr), value, intent(in) :: cl_perfmodel + end subroutine fstarpu_codelet_set_energy_model + + subroutine fstarpu_codelet_add_cpu_func (cl, f_ptr) bind(C) + use iso_c_binding, only: c_ptr, c_funptr + type(c_ptr), value, intent(in) :: cl + type(c_funptr), value, intent(in) :: f_ptr + end subroutine fstarpu_codelet_add_cpu_func + + subroutine fstarpu_codelet_add_cuda_func (cl, f_ptr) bind(C) + use iso_c_binding, only: c_ptr, c_funptr + type(c_ptr), value, intent(in) :: cl + type(c_funptr), value, intent(in) :: f_ptr + end subroutine fstarpu_codelet_add_cuda_func + + subroutine fstarpu_codelet_add_cuda_flags (cl, flags) bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: cl + type(c_ptr), value, intent(in) :: flags ! C function expects an intptr_t + end subroutine fstarpu_codelet_add_cuda_flags + + subroutine fstarpu_codelet_add_opencl_func (cl, f_ptr) bind(C) + use iso_c_binding, only: c_ptr, c_funptr + type(c_ptr), value, intent(in) :: cl + type(c_funptr), value, intent(in) :: f_ptr + end subroutine fstarpu_codelet_add_opencl_func + + subroutine fstarpu_codelet_add_opencl_flags (cl, flags) bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: cl + type(c_ptr), value, intent(in) :: flags ! C function expects an intptr_t + end subroutine fstarpu_codelet_add_opencl_flags + + subroutine fstarpu_codelet_add_buffer (cl, mode) bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: cl + type(c_ptr), value, intent(in) :: mode ! C function expects an intptr_t + end subroutine fstarpu_codelet_add_buffer + + subroutine fstarpu_codelet_set_variable_nbuffers (cl) bind(C) + use iso_c_binding, only: c_ptr,c_int + type(c_ptr), value, intent(in) :: cl + end subroutine fstarpu_codelet_set_variable_nbuffers + + subroutine fstarpu_codelet_set_nbuffers (cl, nbuffers) bind(C) + use iso_c_binding, only: c_ptr,c_int + type(c_ptr), value, intent(in) :: cl + integer(c_int), value, intent(in) :: nbuffers + end subroutine fstarpu_codelet_set_nbuffers + + subroutine fstarpu_codelet_set_flags (cl, flags) bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: cl + type(c_ptr), value, intent(in) :: flags ! C function expects an intptr_t + end subroutine fstarpu_codelet_set_flags + + subroutine fstarpu_codelet_set_where (cl, where) bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: cl + type(c_ptr), value, intent(in) :: where ! C function expects an intptr_t + end subroutine fstarpu_codelet_set_where + + subroutine fstarpu_codelet_set_type (cl, type_constant) bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: cl + type(c_ptr), value, intent(in) :: type_constant ! C function expects an intptr_t + end subroutine fstarpu_codelet_set_type + + subroutine fstarpu_codelet_set_max_parallelism (cl, max_parallelism) bind(C) + use iso_c_binding, only: c_ptr,c_int + type(c_ptr), value, intent(in) :: cl + integer(c_int), value, intent(in) :: max_parallelism + end subroutine fstarpu_codelet_set_max_parallelism + + function fstarpu_perfmodel_allocate () bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr) :: fstarpu_perfmodel_allocate + end function fstarpu_perfmodel_allocate + + subroutine fstarpu_perfmodel_free (model) bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: model + end subroutine fstarpu_perfmodel_free + + subroutine fstarpu_perfmodel_set_symbol (model, model_symbol) bind(C) + use iso_c_binding, only: c_ptr, c_char + type(c_ptr), value, intent(in) :: model + character(c_char), intent(in) :: model_symbol + end subroutine fstarpu_perfmodel_set_symbol + + subroutine fstarpu_perfmodel_set_type (model, type) bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: model + type(c_ptr), value, intent(in) :: type ! C function expects an intptr_t + end subroutine fstarpu_perfmodel_set_type + + ! == starpu_data_interface.h == + + ! uintptr_t starpu_malloc_on_node_flags(unsigned dst_node, size_t size, int flags); + + ! uintptr_t starpu_malloc_on_node(unsigned dst_node, size_t size); + function fstarpu_malloc_on_node(node,sz) bind(C,name="starpu_malloc_on_node") + use iso_c_binding, only: c_int,c_intptr_t,c_size_t + integer(c_intptr_t) :: fstarpu_malloc_on_node + integer(c_int), value, intent(in) :: node + integer(c_size_t), value, intent(in) :: sz + end function fstarpu_malloc_on_node + + ! void starpu_free_on_node_flags(unsigned dst_node, uintptr_t addr, size_t size, int flags); + + ! void starpu_free_on_node(unsigned dst_node, uintptr_t addr, size_t size); + subroutine fstarpu_free_on_node(node,addr,sz) bind(C,name="starpu_free_on_node") + use iso_c_binding, only: c_int,c_intptr_t,c_size_t + integer(c_int), value, intent(in) :: node + integer(c_intptr_t), value, intent(in) :: addr + integer(c_size_t), value, intent(in) :: sz + end subroutine fstarpu_free_on_node + + ! void starpu_malloc_on_node_set_default_flags(unsigned node, int flags); + + ! int starpu_data_interface_get_next_id(void); + ! void starpu_data_register(starpu_data_handle_t *handleptr, unsigned home_node, void *data_interface, struct starpu_data_interface_ops *ops); + + + ! void starpu_data_ptr_register(starpu_data_handle_t handle, unsigned node); + subroutine fstarpug_data_ptr_register (dh,node) bind(C,name="starpu_data_ptr_register") + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: node + end subroutine fstarpug_data_ptr_register + + ! void starpu_data_register_same(starpu_data_handle_t *handledst, starpu_data_handle_t handlesrc); + subroutine fstarpu_data_register_same (dh_dst,dh_src) bind(C,name="starpu_data_register_same") + use iso_c_binding, only: c_ptr + type(c_ptr), intent(out) :: dh_dst + type(c_ptr), value, intent(in) :: dh_src + end subroutine fstarpu_data_register_same + + ! void *starpu_data_handle_to_pointer(starpu_data_handle_t handle, unsigned node); + function fstarpu_data_handle_to_pointer (dh,node) bind(C,name="starpu_data_handle_to_pointer") + use iso_c_binding, only: c_ptr, c_int + type(c_ptr) :: fstarpu_data_handle_to_pointer + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: node + end function fstarpu_data_handle_to_pointer + + ! void *starpu_data_get_local_ptr(starpu_data_handle_t handle); + function fstarpu_data_get_local_ptr (dh) bind(C,name="starpu_data_get_local_ptr") + use iso_c_binding, only: c_ptr, c_int + type(c_ptr) :: fstarpu_data_get_local_ptr + type(c_ptr), value, intent(in) :: dh + end function fstarpu_data_get_local_ptr + + ! void *starpu_data_get_interface_on_node(starpu_data_handle_t handle, unsigned memory_node); + + ! == starpu_data_interface.h: tensor == + + ! void starpu_tensor_data_register(starpu_data_handle_t *handle, unsigned home_node, uintptr_t ptr, uint32_t ldy, uint32_t ldz, uint32_t ldt, uint32_t nx, uint32_t ny, uint32_t nz, uint32_t nt, size_t elemsize); + subroutine fstarpu_tensor_data_register(dh, home_node, ptr, ldy, ldz, ldt, nx, ny, nz, nt, elt_size) & + bind(C,name="starpu_tensor_data_register") + use iso_c_binding, only: c_ptr, c_int, c_size_t + type(c_ptr), intent(out) :: dh + integer(c_int), value, intent(in) :: home_node + type(c_ptr), value, intent(in) :: ptr + integer(c_int), value, intent(in) :: ldy + integer(c_int), value, intent(in) :: ldz + integer(c_int), value, intent(in) :: ldt + integer(c_int), value, intent(in) :: nx + integer(c_int), value, intent(in) :: ny + integer(c_int), value, intent(in) :: nz + integer(c_int), value, intent(in) :: nt + integer(c_size_t), value, intent(in) :: elt_size + end subroutine fstarpu_tensor_data_register + + ! void starpu_tensor_ptr_register(starpu_data_handle_t handle, unsigned node, uintptr_t ptr, uintptr_t dev_handle, size_t offset, uint32_t ldy, uint32_t ldz, uint32_t ldt); + subroutine fstarpu_tensor_ptr_register(dh, node, ptr, dev_handle, offset, ldy, ldz, ldt) & + bind(C,name="starpu_tensor_ptr_register") + use iso_c_binding, only: c_ptr, c_int, c_size_t + type(c_ptr), intent(out) :: dh + integer(c_int), value, intent(in) :: node + type(c_ptr), value, intent(in) :: ptr + type(c_ptr), value, intent(in) :: dev_handle + integer(c_size_t), value, intent(in) :: offset + integer(c_int), value, intent(in) :: ldy + integer(c_int), value, intent(in) :: ldz + integer(c_int), value, intent(in) :: ldt + end subroutine fstarpu_tensor_ptr_register + + function fstarpu_tensor_get_ptr(buffers, i) bind(C) + use iso_c_binding, only: c_ptr, c_int + type(c_ptr) :: fstarpu_tensor_get_ptr + type(c_ptr), value, intent(in) :: buffers + integer(c_int), value, intent(in) :: i + end function fstarpu_tensor_get_ptr + + function fstarpu_tensor_get_ldy(buffers, i) bind(C) + use iso_c_binding, only: c_ptr, c_int + integer(c_int) :: fstarpu_tensor_get_ldy + type(c_ptr), value, intent(in) :: buffers + integer(c_int), value, intent(in) :: i + end function fstarpu_tensor_get_ldy + + function fstarpu_tensor_get_ldz(buffers, i) bind(C) + use iso_c_binding, only: c_ptr, c_int + integer(c_int) :: fstarpu_tensor_get_ldz + type(c_ptr), value, intent(in) :: buffers + integer(c_int), value, intent(in) :: i + end function fstarpu_tensor_get_ldz + + function fstarpu_tensor_get_ldt(buffers, i) bind(C) + use iso_c_binding, only: c_ptr, c_int + integer(c_int) :: fstarpu_tensor_get_ldt + type(c_ptr), value, intent(in) :: buffers + integer(c_int), value, intent(in) :: i + end function fstarpu_tensor_get_ldt + + function fstarpu_tensor_get_nx(buffers, i) bind(C) + use iso_c_binding, only: c_ptr, c_int + integer(c_int) :: fstarpu_tensor_get_nx + type(c_ptr), value, intent(in) :: buffers + integer(c_int), value, intent(in) :: i + end function fstarpu_tensor_get_nx + + function fstarpu_tensor_get_ny(buffers, i) bind(C) + use iso_c_binding, only: c_ptr, c_int + integer(c_int) :: fstarpu_tensor_get_ny + type(c_ptr), value, intent(in) :: buffers + integer(c_int), value, intent(in) :: i + end function fstarpu_tensor_get_ny + + function fstarpu_tensor_get_nz(buffers, i) bind(C) + use iso_c_binding, only: c_ptr, c_int + integer(c_int) :: fstarpu_tensor_get_nz + type(c_ptr), value, intent(in) :: buffers + integer(c_int), value, intent(in) :: i + end function fstarpu_tensor_get_nz + + function fstarpu_tensor_get_nt(buffers, i) bind(C) + use iso_c_binding, only: c_ptr, c_int + integer(c_int) :: fstarpu_tensor_get_nt + type(c_ptr), value, intent(in) :: buffers + integer(c_int), value, intent(in) :: i + end function fstarpu_tensor_get_nt + + ! == starpu_data_interface.h: block == + + ! void starpu_block_data_register(starpu_data_handle_t *handle, unsigned home_node, uintptr_t ptr, uint32_t ldy, uint32_t ldz, uint32_t nx, uint32_t ny, uint32_t nz, size_t elemsize); + subroutine fstarpu_block_data_register(dh, home_node, ptr, ldy, ldz, nx, ny, nz, elt_size) & + bind(C,name="starpu_block_data_register") + use iso_c_binding, only: c_ptr, c_int, c_size_t + type(c_ptr), intent(out) :: dh + integer(c_int), value, intent(in) :: home_node + type(c_ptr), value, intent(in) :: ptr + integer(c_int), value, intent(in) :: ldy + integer(c_int), value, intent(in) :: ldz + integer(c_int), value, intent(in) :: nx + integer(c_int), value, intent(in) :: ny + integer(c_int), value, intent(in) :: nz + integer(c_size_t), value, intent(in) :: elt_size + end subroutine fstarpu_block_data_register + + ! void starpu_block_ptr_register(starpu_data_handle_t handle, unsigned node, uintptr_t ptr, uintptr_t dev_handle, size_t offset, uint32_t ldy, uint32_t ldz); + subroutine fstarpu_block_ptr_register(dh, node, ptr, dev_handle, offset, ldy, ldz) & + bind(C,name="starpu_block_ptr_register") + use iso_c_binding, only: c_ptr, c_int, c_size_t + type(c_ptr), intent(out) :: dh + integer(c_int), value, intent(in) :: node + type(c_ptr), value, intent(in) :: ptr + type(c_ptr), value, intent(in) :: dev_handle + integer(c_size_t), value, intent(in) :: offset + integer(c_int), value, intent(in) :: ldy + integer(c_int), value, intent(in) :: ldz + end subroutine fstarpu_block_ptr_register + + function fstarpu_block_get_ptr(buffers, i) bind(C) + use iso_c_binding, only: c_ptr, c_int + type(c_ptr) :: fstarpu_block_get_ptr + type(c_ptr), value, intent(in) :: buffers + integer(c_int), value, intent(in) :: i + end function fstarpu_block_get_ptr + + function fstarpu_block_get_ldy(buffers, i) bind(C) + use iso_c_binding, only: c_ptr, c_int + integer(c_int) :: fstarpu_block_get_ldy + type(c_ptr), value, intent(in) :: buffers + integer(c_int), value, intent(in) :: i + end function fstarpu_block_get_ldy + + function fstarpu_block_get_ldz(buffers, i) bind(C) + use iso_c_binding, only: c_ptr, c_int + integer(c_int) :: fstarpu_block_get_ldz + type(c_ptr), value, intent(in) :: buffers + integer(c_int), value, intent(in) :: i + end function fstarpu_block_get_ldz + + function fstarpu_block_get_nx(buffers, i) bind(C) + use iso_c_binding, only: c_ptr, c_int + integer(c_int) :: fstarpu_block_get_nx + type(c_ptr), value, intent(in) :: buffers + integer(c_int), value, intent(in) :: i + end function fstarpu_block_get_nx + + function fstarpu_block_get_ny(buffers, i) bind(C) + use iso_c_binding, only: c_ptr, c_int + integer(c_int) :: fstarpu_block_get_ny + type(c_ptr), value, intent(in) :: buffers + integer(c_int), value, intent(in) :: i + end function fstarpu_block_get_ny + + function fstarpu_block_get_nz(buffers, i) bind(C) + use iso_c_binding, only: c_ptr, c_int + integer(c_int) :: fstarpu_block_get_nz + type(c_ptr), value, intent(in) :: buffers + integer(c_int), value, intent(in) :: i + end function fstarpu_block_get_nz + + ! == starpu_data_interface.h: matrix == + + ! void starpu_matrix_data_register(starpu_data_handle_t *handle, unsigned home_node, uintptr_t ptr, uint32_t ld, uint32_t nx, uint32_t ny, size_t elemsize); + subroutine fstarpu_matrix_data_register(dh, home_node, ptr, ld, nx, ny, elt_size) & + bind(C,name="starpu_matrix_data_register") + use iso_c_binding, only: c_ptr, c_int, c_size_t + type(c_ptr), intent(out) :: dh + integer(c_int), value, intent(in) :: home_node + type(c_ptr), value, intent(in) :: ptr + integer(c_int), value, intent(in) :: ld + integer(c_int), value, intent(in) :: nx + integer(c_int), value, intent(in) :: ny + integer(c_size_t), value, intent(in) :: elt_size + end subroutine fstarpu_matrix_data_register + + ! void starpu_matrix_ptr_register(starpu_data_handle_t handle, unsigned node, uintptr_t ptr, uintptr_t dev_handle, size_t offset, uint32_t ld); + subroutine fstarpu_matrix_ptr_register(dh, node, ptr, dev_handle, offset, ld) & + bind(C,name="starpu_matrix_ptr_register") + use iso_c_binding, only: c_ptr, c_int, c_size_t + type(c_ptr), intent(out) :: dh + integer(c_int), value, intent(in) :: node + type(c_ptr), value, intent(in) :: ptr + type(c_ptr), value, intent(in) :: dev_handle + integer(c_size_t), value, intent(in) :: offset + integer(c_int), value, intent(in) :: ld + end subroutine fstarpu_matrix_ptr_register + + function fstarpu_matrix_get_ptr(buffers, i) bind(C) + use iso_c_binding, only: c_ptr, c_int + type(c_ptr) :: fstarpu_matrix_get_ptr + type(c_ptr), value, intent(in) :: buffers + integer(c_int), value, intent(in) :: i + end function fstarpu_matrix_get_ptr + + function fstarpu_matrix_get_ld(buffers, i) bind(C) + use iso_c_binding, only: c_ptr, c_int + integer(c_int) :: fstarpu_matrix_get_ld + type(c_ptr), value, intent(in) :: buffers + integer(c_int), value, intent(in) :: i + end function fstarpu_matrix_get_ld + + function fstarpu_matrix_get_nx(buffers, i) bind(C) + use iso_c_binding, only: c_ptr, c_int + integer(c_int) :: fstarpu_matrix_get_nx + type(c_ptr), value, intent(in) :: buffers + integer(c_int), value, intent(in) :: i + end function fstarpu_matrix_get_nx + + function fstarpu_matrix_get_ny(buffers, i) bind(C) + use iso_c_binding, only: c_ptr, c_int + integer(c_int) :: fstarpu_matrix_get_ny + type(c_ptr), value, intent(in) :: buffers + integer(c_int), value, intent(in) :: i + end function fstarpu_matrix_get_ny + + ! == starpu_data_interface.h: vector == + + ! void starpu_vector_data_register(starpu_data_handle_t *handle, unsigned home_node, uintptr_t ptr, uint32_t nx, size_t elemsize); + subroutine fstarpu_vector_data_register(dh, home_node, ptr,nx, elt_size) & + bind(C,name="starpu_vector_data_register") + use iso_c_binding, only: c_ptr, c_int, c_size_t + type(c_ptr), intent(out) :: dh + integer(c_int), value, intent(in) :: home_node + type(c_ptr), value, intent(in) :: ptr + integer(c_int), value, intent(in) :: nx + integer(c_size_t), value, intent(in) :: elt_size + end subroutine fstarpu_vector_data_register + + ! void starpu_vector_ptr_register(starpu_data_handle_t handle, unsigned node, uintptr_t ptr, uintptr_t dev_handle, size_t offset); + subroutine fstarpu_vector_ptr_register(dh, node, ptr, dev_handle, offset) & + bind(C,name="starpu_vector_ptr_register") + use iso_c_binding, only: c_ptr, c_int, c_size_t + type(c_ptr), intent(out) :: dh + integer(c_int), value, intent(in) :: node + type(c_ptr), value, intent(in) :: ptr + type(c_ptr), value, intent(in) :: dev_handle + integer(c_size_t), value, intent(in) :: offset + end subroutine fstarpu_vector_ptr_register + + + function fstarpu_vector_get_ptr(buffers, i) bind(C) + use iso_c_binding, only: c_ptr, c_int + type(c_ptr) :: fstarpu_vector_get_ptr + type(c_ptr), value, intent(in) :: buffers + integer(c_int), value, intent(in) :: i + end function fstarpu_vector_get_ptr + + function fstarpu_vector_get_nx(buffers, i) bind(C) + use iso_c_binding, only: c_ptr, c_int + integer(c_int) :: fstarpu_vector_get_nx + type(c_ptr), value, intent(in) :: buffers + integer(c_int), value, intent(in) :: i + end function fstarpu_vector_get_nx + + ! == starpu_data_interface.h: variable == + + ! void starpu_variable_data_register(starpu_data_handle_t *handle, unsigned home_node, uintptr_t ptr, size_t size); + subroutine fstarpu_variable_data_register(dh, home_node, ptr, elt_size) & + bind(C,name="starpu_variable_data_register") + use iso_c_binding, only: c_ptr, c_int, c_size_t + type(c_ptr), intent(out) :: dh + integer(c_int), value, intent(in) :: home_node + type(c_ptr), value, intent(in) :: ptr + integer(c_size_t), value, intent(in) :: elt_size + end subroutine fstarpu_variable_data_register + + ! void starpu_variable_ptr_register(starpu_data_handle_t handle, unsigned node, uintptr_t ptr, uintptr_t dev_handle, size_t offset); + subroutine fstarpu_variable_ptr_register(dh, node, ptr, dev_handle, offset) & + bind(C,name="starpu_variable_ptr_register") + use iso_c_binding, only: c_ptr, c_int, c_size_t + type(c_ptr), intent(out) :: dh + integer(c_int), value, intent(in) :: node + type(c_ptr), value, intent(in) :: ptr + type(c_ptr), value, intent(in) :: dev_handle + integer(c_size_t), value, intent(in) :: offset + end subroutine fstarpu_variable_ptr_register + + function fstarpu_variable_get_ptr(buffers, i) bind(C) + use iso_c_binding, only: c_ptr, c_int + type(c_ptr) :: fstarpu_variable_get_ptr + type(c_ptr), value, intent(in) :: buffers + integer(c_int), value, intent(in) :: i + end function fstarpu_variable_get_ptr + + ! == starpu_data_interface.h: void == + + ! void starpu_void_data_register(starpu_data_handle_t *handle); + subroutine fstarpu_void_data_register(dh) & + bind(C,name="starpu_void_data_register") + use iso_c_binding, only: c_ptr, c_int, c_size_t + type(c_ptr), intent(out) :: dh + end subroutine fstarpu_void_data_register + + ! == starpu_data_filter.h == + + function fstarpu_data_filter_allocate () bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr) :: fstarpu_data_filter_allocate + end function fstarpu_data_filter_allocate + + subroutine fstarpu_data_filter_free (filter) bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: filter + end subroutine fstarpu_data_filter_free + + ! Note: use fstarpu_df_alloc_ prefix instead of fstarpu_data_filter_allocate_ + ! to fit within the Fortran id length limit */ + function fstarpu_df_alloc_bcsr_filter_canonical_block () bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr) :: fstarpu_df_alloc_bcsr_filter_canonical_block + end function fstarpu_df_alloc_bcsr_filter_canonical_block + + function fstarpu_df_alloc_csr_filter_vertical_block () bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr) :: fstarpu_df_alloc_csr_filter_vertical_block + end function fstarpu_df_alloc_csr_filter_vertical_block + + function fstarpu_df_alloc_matrix_filter_block () bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr) :: fstarpu_df_alloc_matrix_filter_block + end function fstarpu_df_alloc_matrix_filter_block + + function fstarpu_df_alloc_matrix_filter_block_shadow () bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr) :: fstarpu_df_alloc_matrix_filter_block_shadow + end function fstarpu_df_alloc_matrix_filter_block_shadow + + function fstarpu_df_alloc_matrix_filter_vertical_block () bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr) :: fstarpu_df_alloc_matrix_filter_vertical_block + end function fstarpu_df_alloc_matrix_filter_vertical_block + + function fstarpu_df_alloc_matrix_filter_vertical_block_shadow () bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr) :: fstarpu_df_alloc_matrix_filter_vertical_block_shadow + end function fstarpu_df_alloc_matrix_filter_vertical_block_shadow + + function fstarpu_df_alloc_vector_filter_block () bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr) :: fstarpu_df_alloc_vector_filter_block + end function fstarpu_df_alloc_vector_filter_block + + function fstarpu_df_alloc_vector_filter_block_shadow () bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr) :: fstarpu_df_alloc_vector_filter_block_shadow + end function fstarpu_df_alloc_vector_filter_block_shadow + + function fstarpu_df_alloc_vector_filter_list () bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr) :: fstarpu_df_alloc_vector_filter_list + end function fstarpu_df_alloc_vector_filter_list + + function fstarpu_df_alloc_vector_filter_divide_in_2 () bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr) :: fstarpu_df_alloc_vector_filter_divide_in_2 + end function fstarpu_df_alloc_vector_filter_divide_in_2 + + function fstarpu_df_alloc_block_filter_block () bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr) :: fstarpu_df_alloc_block_filter_block + end function fstarpu_df_alloc_block_filter_block + + function fstarpu_df_alloc_block_filter_block_shadow () bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr) :: fstarpu_df_alloc_block_filter_block_shadow + end function fstarpu_df_alloc_block_filter_block_shadow + + function fstarpu_df_alloc_block_filter_vertical_block () bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr) :: fstarpu_df_alloc_block_filter_vertical_block + end function fstarpu_df_alloc_block_filter_vertical_block + + function fstarpu_df_alloc_block_filter_vertical_block_shadow () bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr) :: fstarpu_df_alloc_block_filter_vertical_block_shadow + end function fstarpu_df_alloc_block_filter_vertical_block_shadow + + subroutine fstarpu_data_filter_set_filter_func (filter, f_ptr) bind(C) + use iso_c_binding, only: c_ptr, c_funptr + type(c_ptr), value, intent(in) :: filter + type(c_funptr), value, intent(in) :: f_ptr + end subroutine fstarpu_data_filter_set_filter_func + + subroutine fstarpu_data_filter_set_nchildren (filter, nchildren) bind(C) + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: filter + integer(c_int), value, intent(in) :: nchildren + end subroutine fstarpu_data_filter_set_nchildren + + subroutine fstarpu_data_filter_set_get_nchildren_func (filter, f_ptr) bind(C) + use iso_c_binding, only: c_ptr, c_funptr + type(c_ptr), value, intent(in) :: filter + type(c_funptr), value, intent(in) :: f_ptr + end subroutine fstarpu_data_filter_set_get_nchildren_func + + subroutine fstarpu_data_filter_set_get_child_ops_func (filter, f_ptr) bind(C) + use iso_c_binding, only: c_ptr, c_funptr + type(c_ptr), value, intent(in) :: filter + type(c_funptr), value, intent(in) :: f_ptr + end subroutine fstarpu_data_filter_set_get_child_ops_func + + subroutine fstarpu_data_filter_set_filter_arg (filter, filter_arg) bind(C) + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: filter + integer(c_int), value, intent(in) :: filter_arg + end subroutine fstarpu_data_filter_set_filter_arg + + subroutine fstarpu_data_filter_set_filter_arg_ptr (filter, filter_arg_ptr) bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: filter + type(c_ptr), value, intent(in) :: filter_arg_ptr + end subroutine fstarpu_data_filter_set_filter_arg_ptr + + ! void starpu_data_partition(starpu_data_handle_t initial_handle, struct starpu_data_filter *f); + subroutine fstarpu_data_partition (dh,filter) bind(C,name="starpu_data_partition") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: dh + type(c_ptr), value, intent(in) :: filter + end subroutine fstarpu_data_partition + + ! void starpu_data_unpartition(starpu_data_handle_t root_data, unsigned gathering_node); + subroutine fstarpu_data_unpartition (root_dh,gathering_node) bind(C,name="starpu_data_unpartition") + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: root_dh + integer(c_int), value, intent(in) :: gathering_node + end subroutine fstarpu_data_unpartition + + ! void starpu_data_partition_plan(starpu_data_handle_t initial_handle, struct starpu_data_filter *f, starpu_data_handle_t *children); + subroutine fstarpu_data_partition_plan (dh,filter,children) & + bind(C,name="starpu_data_partition_plan") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: dh + type(c_ptr), value, intent(in) :: filter + type(c_ptr), intent(in) :: children(*) + end subroutine fstarpu_data_partition_plan + + ! void starpu_data_partition_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children); + subroutine fstarpu_data_partition_submit (dh,nparts,children) & + bind(C,name="starpu_data_partition_submit") + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: nparts + type(c_ptr), intent(in) :: children(*) + end subroutine fstarpu_data_partition_submit + + ! void starpu_data_partition_readonly_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children); + subroutine fstarpu_data_partition_readonly_submit (dh,nparts,children) & + bind(C,name="starpu_data_partition_readonly_submit") + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: nparts + type(c_ptr), intent(in) :: children(*) + end subroutine fstarpu_data_partition_readonly_submit + + ! void starpu_data_partition_readwrite_upgrade_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children); + subroutine fstarpu_data_partition_readwrite_upgrade_submit (dh,nparts,children) & + bind(C,name="starpu_data_partition_readwrite_upgrade_submit") + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: nparts + type(c_ptr), intent(in) :: children(*) + end subroutine fstarpu_data_partition_readwrite_upgrade_submit + + ! void starpu_data_unpartition_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, int gathering_node); + subroutine fstarpu_data_unpartition_submit (dh,nparts,children,gathering_node) & + bind(C,name="starpu_data_unpartition_submit") + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: nparts + type(c_ptr), intent(in) :: children(*) + integer(c_int), value, intent(in) :: gathering_node + end subroutine fstarpu_data_unpartition_submit + + ! void starpu_data_unpartition_readonly_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, int gathering_node); + subroutine fstarpu_data_unpartition_readonly_submit (dh,nparts,children,gathering_node) & + bind(C,name="starpu_data_unpartition_readonly_submit") + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: nparts + type(c_ptr), intent(in) :: children(*) + integer(c_int), value, intent(in) :: gathering_node + end subroutine fstarpu_data_unpartition_readonly_submit + + ! void starpu_data_partition_clean(starpu_data_handle_t root_data, unsigned nparts, starpu_data_handle_t *children); + subroutine fstarpu_data_partition_clean (dh,nparts,children) & + bind(C,name="starpu_data_partition_clean") + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: nparts + type(c_ptr), intent(in) :: children(*) + end subroutine fstarpu_data_partition_clean + + ! int starpu_data_get_nb_children(starpu_data_handle_t handle); + function fstarpu_data_get_nb_children(dh) bind(C,name="starpu_data_get_nb_children") + use iso_c_binding, only: c_ptr, c_int + integer(c_int) :: fstarpu_data_get_nb_children + type(c_ptr), value, intent(in) :: dh + end function fstarpu_data_get_nb_children + + ! starpu_data_handle_t starpu_data_get_child(starpu_data_handle_t handle, unsigned i); + function fstarpu_data_get_child(dh,i) bind(C,name="starpu_data_get_child") + use iso_c_binding, only: c_ptr, c_int + type(c_ptr) :: fstarpu_data_get_child + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: i + end function fstarpu_data_get_child + + ! starpu_data_handle_t starpu_data_get_sub_data(starpu_data_handle_t root_data, unsigned depth, ... ); + ! . see: fstarpu_data_get_sub_data + ! starpu_data_handle_t starpu_data_vget_sub_data(starpu_data_handle_t root_data, unsigned depth, va_list pa); + ! . see: fstarpu_data_get_sub_data + + ! note: defined in filters.c + function fstarpu_data_get_sub_data (root_dh,depth,indices) bind(C) + use iso_c_binding, only: c_ptr, c_int + type(c_ptr) :: fstarpu_data_get_sub_data + type(c_ptr), value, intent(in) :: root_dh + integer(c_int), value, intent(in) :: depth + integer(c_int), intent(in) :: indices(*) + end function fstarpu_data_get_sub_data + + ! void starpu_data_map_filters(starpu_data_handle_t root_data, unsigned nfilters, ...); + ! . see fstarpu_data_map_filters + ! void starpu_data_vmap_filters(starpu_data_handle_t root_data, unsigned nfilters, va_list pa); + ! . see fstarpu_data_map_filters + + ! note: defined in filters.c + subroutine fstarpu_data_map_filters (root_dh,nfilters,filters) bind(C) + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: root_dh + integer(c_int), value, intent(in) :: nfilters + type(c_ptr), intent(in) :: filters(*) + end subroutine fstarpu_data_map_filters + + ! void starpu_matrix_filter_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + subroutine fstarpu_matrix_filter_block (father_interface,child_interface,filter,id,nparts) & + bind(C,name="starpu_matrix_filter_block") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: father_interface + type(c_ptr), value, intent(in) :: child_interface + type(c_ptr), value, intent(in) :: filter + type(c_ptr), value, intent(in) :: id + type(c_ptr), value, intent(in) :: nparts + end subroutine fstarpu_matrix_filter_block + + ! void starpu_matrix_filter_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + subroutine fstarpu_matrix_filter_block_shadow (father_interface,child_interface,filter,id,nparts) & + bind(C,name="starpu_matrix_filter_block_shadow") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: father_interface + type(c_ptr), value, intent(in) :: child_interface + type(c_ptr), value, intent(in) :: filter + type(c_ptr), value, intent(in) :: id + type(c_ptr), value, intent(in) :: nparts + end subroutine fstarpu_matrix_filter_block_shadow + + ! void starpu_matrix_filter_vertical_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + subroutine fstarpu_matrix_filter_vertical_block (father_interface,child_interface,filter,id,nparts) & + bind(C,name="starpu_matrix_filter_vertical_block") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: father_interface + type(c_ptr), value, intent(in) :: child_interface + type(c_ptr), value, intent(in) :: filter + type(c_ptr), value, intent(in) :: id + type(c_ptr), value, intent(in) :: nparts + end subroutine fstarpu_matrix_filter_vertical_block + + ! void starpu_matrix_filter_vertical_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + subroutine fstarpu_matrix_filter_vertical_block_shadow (father_interface,child_interface,filter,id,nparts) & + bind(C,name="starpu_matrix_filter_vertical_block_shadow") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: father_interface + type(c_ptr), value, intent(in) :: child_interface + type(c_ptr), value, intent(in) :: filter + type(c_ptr), value, intent(in) :: id + type(c_ptr), value, intent(in) :: nparts + end subroutine fstarpu_matrix_filter_vertical_block_shadow + + ! void starpu_vector_filter_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + subroutine fstarpu_vector_filter_block (father_interface,child_interface,filter,id,nparts) & + bind(C,name="starpu_vector_filter_block") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: father_interface + type(c_ptr), value, intent(in) :: child_interface + type(c_ptr), value, intent(in) :: filter + type(c_ptr), value, intent(in) :: id + type(c_ptr), value, intent(in) :: nparts + end subroutine fstarpu_vector_filter_block + + ! void starpu_vector_filter_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + subroutine fstarpu_vector_filter_block_shadow (father_interface,child_interface,filter,id,nparts) & + bind(C,name="starpu_vector_filter_block_shadow") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: father_interface + type(c_ptr), value, intent(in) :: child_interface + type(c_ptr), value, intent(in) :: filter + type(c_ptr), value, intent(in) :: id + type(c_ptr), value, intent(in) :: nparts + end subroutine fstarpu_vector_filter_block_shadow + + ! void starpu_vector_filter_list_long(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + subroutine fstarpu_vector_filter_list_long (father_interface,child_interface,filter,id,nparts) & + bind(C,name="starpu_vector_filter_list_long") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: father_interface + type(c_ptr), value, intent(in) :: child_interface + type(c_ptr), value, intent(in) :: filter + type(c_ptr), value, intent(in) :: id + type(c_ptr), value, intent(in) :: nparts + end subroutine fstarpu_vector_filter_list_long + + ! void starpu_vector_filter_list(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + subroutine fstarpu_vector_filter_list (father_interface,child_interface,filter,id,nparts) & + bind(C,name="starpu_vector_filter_list") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: father_interface + type(c_ptr), value, intent(in) :: child_interface + type(c_ptr), value, intent(in) :: filter + type(c_ptr), value, intent(in) :: id + type(c_ptr), value, intent(in) :: nparts + end subroutine fstarpu_vector_filter_list + + ! void starpu_vector_filter_divide_in_2(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + subroutine fstarpu_vector_divide_in_2 (father_interface,child_interface,filter,id,nparts) & + bind(C,name="starpu_vector_divide_in_2") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: father_interface + type(c_ptr), value, intent(in) :: child_interface + type(c_ptr), value, intent(in) :: filter + type(c_ptr), value, intent(in) :: id + type(c_ptr), value, intent(in) :: nparts + end subroutine fstarpu_vector_divide_in_2 + + ! void starpu_block_filter_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + subroutine fstarpu_block_filter_block (father_interface,child_interface,filter,id,nparts) & + bind(C,name="starpu_block_filter_block") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: father_interface + type(c_ptr), value, intent(in) :: child_interface + type(c_ptr), value, intent(in) :: filter + type(c_ptr), value, intent(in) :: id + type(c_ptr), value, intent(in) :: nparts + end subroutine fstarpu_block_filter_block + + ! void starpu_block_filter_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + subroutine fstarpu_block_filter_block_shadow (father_interface,child_interface,filter,id,nparts) & + bind(C,name="starpu_block_filter_block_shadow") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: father_interface + type(c_ptr), value, intent(in) :: child_interface + type(c_ptr), value, intent(in) :: filter + type(c_ptr), value, intent(in) :: id + type(c_ptr), value, intent(in) :: nparts + end subroutine fstarpu_block_filter_block_shadow + + ! void starpu_block_filter_vertical_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + subroutine fstarpu_block_filter_vertical_block (father_interface,child_interface,filter,id,nparts) & + bind(C,name="starpu_block_filter_vertical_block") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: father_interface + type(c_ptr), value, intent(in) :: child_interface + type(c_ptr), value, intent(in) :: filter + type(c_ptr), value, intent(in) :: id + type(c_ptr), value, intent(in) :: nparts + end subroutine fstarpu_block_filter_vertical_block + + ! void starpu_block_filter_vertical_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + subroutine fstarpu_block_filter_vertical_block_shadow (father_interface,child_interface,filter,id,nparts) & + bind(C,name="starpu_block_filter_vertical_block_shadow") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: father_interface + type(c_ptr), value, intent(in) :: child_interface + type(c_ptr), value, intent(in) :: filter + type(c_ptr), value, intent(in) :: id + type(c_ptr), value, intent(in) :: nparts + end subroutine fstarpu_block_filter_vertical_block_shadow + + ! void starpu_block_filter_depth_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + subroutine fstarpu_block_filter_depth_block (father_interface,child_interface,filter,id,nparts) & + bind(C,name="starpu_block_filter_depth_block") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: father_interface + type(c_ptr), value, intent(in) :: child_interface + type(c_ptr), value, intent(in) :: filter + type(c_ptr), value, intent(in) :: id + type(c_ptr), value, intent(in) :: nparts + end subroutine fstarpu_block_filter_depth_block + + ! void starpu_block_filter_depth_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); + subroutine fstarpu_block_filter_depth_block_shadow (father_interface,child_interface,filter,id,nparts) & + bind(C,name="starpu_block_filter_depth_block_shadow") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: father_interface + type(c_ptr), value, intent(in) :: child_interface + type(c_ptr), value, intent(in) :: filter + type(c_ptr), value, intent(in) :: id + type(c_ptr), value, intent(in) :: nparts + end subroutine fstarpu_block_filter_depth_block_shadow + + + ! == starpu_data.h == + + ! void starpu_data_unregister(starpu_data_handle_t handle); + subroutine fstarpu_data_unregister (dh) bind(C,name="starpu_data_unregister") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: dh + end subroutine fstarpu_data_unregister + + ! void starpu_data_unregister_no_coherency(starpu_data_handle_t handle); + subroutine fstarpu_data_unregister_no_coherency (dh) bind(C,name="starpu_data_unregister_no_coherency") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: dh + end subroutine fstarpu_data_unregister_no_coherency + + ! void starpu_data_unregister_submit(starpu_data_handle_t handle); + subroutine fstarpu_data_unregister_submit (dh) bind(C,name="starpu_data_unregister_submit") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: dh + end subroutine fstarpu_data_unregister_submit + + ! void starpu_data_deinitialize(starpu_data_handle_t handle); + subroutine fstarpu_data_deinitialize (dh) bind(C,name="starpu_data_deinitialize") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: dh + end subroutine fstarpu_data_deinitialize + + ! void starpu_data_deinitialize_submit(starpu_data_handle_t handle); + subroutine fstarpu_data_deinitialize_submit (dh) bind(C,name="starpu_data_deinitialize_submit") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: dh + end subroutine fstarpu_data_deinitialize_submit + + ! void starpu_data_invalidate(starpu_data_handle_t handle); + subroutine fstarpu_data_invalidate (dh) bind(C,name="starpu_data_invalidate") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: dh + end subroutine fstarpu_data_invalidate + + ! void starpu_data_invalidate_submit(starpu_data_handle_t handle); + subroutine fstarpu_data_invalidate_submit (dh) bind(C,name="starpu_data_invalidate_submit") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: dh + end subroutine fstarpu_data_invalidate_submit + + ! void starpu_data_advise_as_important(starpu_data_handle_t handle, unsigned is_important); + subroutine fstarpu_data_advise_as_important (dh,is_important) bind(C,name="starpu_data_advise_as_important") + use iso_c_binding, only: c_ptr,c_int + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: is_important + end subroutine fstarpu_data_advise_as_important + + ! starpu_data_acquire: see fstarpu_data_acquire + subroutine fstarpu_data_acquire (dh, mode) bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: dh + type(c_ptr), value, intent(in) :: mode ! C function expects an intptr_t + end subroutine fstarpu_data_acquire + + ! int starpu_data_acquire_on_node(starpu_data_handle_t handle, int node, enum starpu_data_access_mode mode); + ! int starpu_data_acquire_cb(starpu_data_handle_t handle, enum starpu_data_access_mode mode, void (*callback)(void *), void *arg); + ! int starpu_data_acquire_on_node_cb(starpu_data_handle_t handle, int node, enum starpu_data_access_mode mode, void (*callback)(void *), void *arg); + ! int starpu_data_acquire_cb_sequential_consistency(starpu_data_handle_t handle, enum starpu_data_access_mode mode, void (*callback)(void *), void *arg, int sequential_consistency); + ! int starpu_data_acquire_on_node_cb_sequential_consistency(starpu_data_handle_t handle, int node, enum starpu_data_access_mode mode, void (*callback)(void *), void *arg, int sequential_consistency); + + ! void starpu_data_release(starpu_data_handle_t handle); + subroutine fstarpu_data_release (dh) bind(C,name="starpu_data_release") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: dh + end subroutine fstarpu_data_release + + ! void starpu_data_release_on_node(starpu_data_handle_t handle, int node); + subroutine fstarpu_data_release_on_node (dh, node) bind(C,name="starpu_data_release_on_node") + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: node + end subroutine fstarpu_data_release_on_node + + ! starpu_arbiter_t starpu_arbiter_create(void) STARPU_ATTRIBUTE_MALLOC; + function fstarpu_arbiter_create () bind(C,name="starpu_arbiter_create") + use iso_c_binding, only: c_ptr + type(c_ptr) :: fstarpu_arbiter_create + end function fstarpu_arbiter_create + + ! void starpu_data_assign_arbiter(starpu_data_handle_t handle, starpu_arbiter_t arbiter); + subroutine fstarpu_data_assign_arbiter (dh,arbiter) bind(C,name="starpu_data_assign_arbiter") + use iso_c_binding, only: c_ptr + type(c_ptr), intent(out) :: dh + type(c_ptr), value, intent(in) :: arbiter + end subroutine fstarpu_data_assign_arbiter + + ! void starpu_arbiter_destroy(starpu_arbiter_t arbiter); + subroutine fstarpu_arbiter_destroy (arbiter) bind(C,name="starpu_arbiter_destroy") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: arbiter + end subroutine fstarpu_arbiter_destroy + + ! void starpu_data_display_memory_stats(); + subroutine fstarpu_display_memory_stats() bind(C,name="starpu_display_memory_stats") + end subroutine fstarpu_display_memory_stats + + ! int starpu_data_request_allocation(starpu_data_handle_t handle, unsigned node); + subroutine fstarpu_data_request_allocation (dh, node) & + bind(C,name="starpu_data_request_allocation") + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: node + end subroutine fstarpu_data_request_allocation + + ! int starpu_data_fetch_on_node(starpu_data_handle_t handle, unsigned node, unsigned async); + subroutine fstarpu_data_fetch_on_node (dh, node, async) & + bind(C,name="starpu_data_fetch_on_node") + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: node + integer(c_int), value, intent(in) :: async + end subroutine fstarpu_data_fetch_on_node + + ! int starpu_data_prefetch_on_node(starpu_data_handle_t handle, unsigned node, unsigned async); + subroutine fstarpu_data_prefetch_on_node (dh, node, async) & + bind(C,name="starpu_data_prefetch_on_node") + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: node + integer(c_int), value, intent(in) :: async + end subroutine fstarpu_data_prefetch_on_node + + ! int starpu_data_prefetch_on_node_prio(starpu_data_handle_t handle, unsigned node, unsigned async, int prio); + subroutine fstarpu_data_prefetch_on_node_prio (dh, node, async, prio) & + bind(C,name="starpu_data_prefetch_on_node_prio") + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: node + integer(c_int), value, intent(in) :: async + integer(c_int), value, intent(in) :: prio + end subroutine fstarpu_data_prefetch_on_node_prio + + ! int starpu_data_idle_prefetch_on_node(starpu_data_handle_t handle, unsigned node, unsigned async); + subroutine fstarpu_data_idle_prefetch_on_node (dh, node, async) & + bind(C,name="starpu_data_idle_prefetch_on_node") + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: node + integer(c_int), value, intent(in) :: async + end subroutine fstarpu_data_idle_prefetch_on_node + + ! int starpu_data_idle_prefetch_on_node_prio(starpu_data_handle_t handle, unsigned node, unsigned async, int prio); + subroutine fstarpu_data_idle_prefetch_on_node_prio (dh, node, async, prio) & + bind(C,name="starpu_data_idle_prefetch_on_node_prio") + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: node + integer(c_int), value, intent(in) :: async + integer(c_int), value, intent(in) :: prio + end subroutine fstarpu_data_idle_prefetch_on_node_prio + + !unsigned starpu_data_is_on_node(starpu_data_handle_t handle, unsigned node); + function fstarpu_data_is_on_node(dh, node) & + bind(C,name="starpu_data_is_on_node") + use iso_c_binding, only: c_ptr, c_int + integer(c_int) :: fstarpu_data_is_on_node + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: node + end function fstarpu_data_is_on_node + + ! void starpu_data_wont_use(starpu_data_handle_t handle); + subroutine fstarpu_data_wont_use (dh) bind(c,name="starpu_data_wont_use") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: dh + end subroutine fstarpu_data_wont_use + + ! unsigned starpu_worker_get_memory_node(unsigned workerid); + function fstarpu_worker_get_memory_node(id) bind(C,name="starpu_worker_get_memory_node") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_worker_get_memory_node + integer(c_int), value, intent(in) :: id + end function fstarpu_worker_get_memory_node + + ! unsigned starpu_memory_nodes_get_count(void); + function fstarpu_memory_nodes_get_count() bind(C,name="starpu_memory_nodes_get_count") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_memory_nodes_get_count + end function fstarpu_memory_nodes_get_count + + ! enum starpu_node_kind starpu_node_get_kind(unsigned node); + ! void starpu_data_set_wt_mask(starpu_data_handle_t handle, uint32_t wt_mask); + ! void starpu_data_set_sequential_consistency_flag(starpu_data_handle_t handle, unsigned flag); + ! unsigned starpu_data_get_sequential_consistency_flag(starpu_data_handle_t handle); + ! unsigned starpu_data_get_default_sequential_consistency_flag(void); + ! void starpu_data_set_default_sequential_consistency_flag(unsigned flag); + ! void starpu_data_query_status(starpu_data_handle_t handle, int memory_node, int *is_allocated, int *is_valid, int *is_requested); + + ! void starpu_data_set_reduction_methods(starpu_data_handle_t handle, struct starpu_codelet *redux_cl, struct starpu_codelet *init_cl); + subroutine fstarpu_data_set_reduction_methods (dh,redux_cl,init_cl) bind(C,name="starpu_data_set_reduction_methods") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: dh + type(c_ptr), value, intent(in) :: redux_cl + type(c_ptr), value, intent(in) :: init_cl + end subroutine fstarpu_data_set_reduction_methods + + ! void starpu_data_set_reduction_methods_with_args(starpu_data_handle_t handle, struct starpu_codelet *redux_cl, void *redux_args, struct starpu_codelet *init_cl, void *init_args) + subroutine fstarpu_data_set_reduction_methods_with_args (dh,redux_cl,redux_args,init_cl,init_args) & + bind(C,name="starpu_data_set_reduction_methods_with_args") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: dh + type(c_ptr), value, intent(in) :: redux_cl + type(c_ptr), value, intent(in) :: redux_args + type(c_ptr), value, intent(in) :: init_cl + type(c_ptr), value, intent(in) :: init_args + end subroutine fstarpu_data_set_reduction_methods_with_args + + ! struct starpu_data_interface_ops* starpu_data_get_interface_ops(starpu_data_handle_t handle); + + ! unsigned starpu_data_test_if_allocated_on_node(starpu_data_handle_t handle, unsigned memory_node); + function fstarpu_data_test_if_allocated_on_node(dh,mem_node) bind(C,name="starpu_data_test_if_allocated_on_node") + use iso_c_binding, only: c_ptr, c_int + integer(c_int) :: fstarpu_data_test_if_allocated_on_node + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: mem_node + end function fstarpu_data_test_if_allocated_on_node + + ! void starpu_memchunk_tidy(unsigned memory_node); + subroutine fstarpu_memchunk_tidy (mem_node) bind(c,name="starpu_memchunk_tidy") + use iso_c_binding, only: c_int + integer(c_int), value, intent(in) :: mem_node + end subroutine fstarpu_memchunk_tidy + + ! == starpu_task_util.h == + + ! starpu_data_handle_t *fstarpu_data_handle_array_alloc(int nb); + function fstarpu_data_handle_array_alloc (nb) bind(C) + use iso_c_binding, only: c_ptr, c_int + type(c_ptr) :: fstarpu_data_handle_array_alloc + integer(c_int), value, intent(in) :: nb + end function fstarpu_data_handle_array_alloc + + ! void fstarpu_data_handle_array_free(starpu_data_handle_t *handles); + subroutine fstarpu_data_handle_array_free (handles) bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: handles + end subroutine fstarpu_data_handle_array_free + + ! void fstarpu_data_handle_array_set(starpu_data_handle_t *handles, int i, starpu_data_handle_t handle); + subroutine fstarpu_data_handle_array_set (handles, i, handle) bind(C) + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: handles + integer(c_int), value, intent(in) :: i + type(c_ptr), value, intent(in) :: handle + end subroutine fstarpu_data_handle_array_set + + ! struct starpu_data_descr *fstarpu_data_descr_array_alloc(int nb); + function fstarpu_data_descr_array_alloc (nb) bind(C) + use iso_c_binding, only: c_ptr, c_int + type(c_ptr) :: fstarpu_data_descr_array_alloc + integer(c_int), value, intent(in) :: nb + end function fstarpu_data_descr_array_alloc + + ! struct starpu_data_descr *fstarpu_data_descr_alloc(void); + function fstarpu_data_descr_alloc () bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr) :: fstarpu_data_descr_alloc + end function fstarpu_data_descr_alloc + + ! void fstarpu_data_descr_array_free(struct starpu_data_descr *descrs); + subroutine fstarpu_data_descr_array_free (descrs) bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: descrs + end subroutine fstarpu_data_descr_array_free + + ! void fstarpu_data_descr_free(struct starpu_data_descr *descr); + subroutine fstarpu_data_descrg_free (descr) bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: descr + end subroutine fstarpu_data_descrg_free + + ! void fstarpu_data_descr_array_set(struct starpu_data_descr *descrs, int i, starpu_data_handle_t handle, intptr_t mode); + subroutine fstarpu_data_descr_array_set (descrs, i, handle, mode) bind(C) + use iso_c_binding, only: c_ptr, c_int, c_intptr_t + type(c_ptr), value, intent(in) :: descrs + integer(c_int), value, intent(in) :: i + type(c_ptr), value, intent(in) :: handle + type(c_ptr), value, intent(in) :: mode ! C func expects c_intptr_t + end subroutine fstarpu_data_descr_array_set + + ! void fstarpu_data_descr_set(struct starpu_data_descr *descr, starpu_data_handle_t handle, intptr_t mode); + subroutine fstarpu_data_descr_set (descr, handle, mode) bind(C) + use iso_c_binding, only: c_ptr, c_intptr_t + type(c_ptr), value, intent(in) :: descr + type(c_ptr), value, intent(in) :: handle + type(c_ptr), value, intent(in) :: mode ! C func expects c_intptr_t + end subroutine fstarpu_data_descr_set + + + subroutine fstarpu_task_insert(arglist) bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr), dimension(*), intent(in) :: arglist + end subroutine fstarpu_task_insert + subroutine fstarpu_insert_task(arglist) bind(C,name="fstarpu_task_insert") + use iso_c_binding, only: c_ptr + type(c_ptr), dimension(*), intent(in) :: arglist + end subroutine fstarpu_insert_task + + subroutine fstarpu_unpack_arg(cl_arg,bufferlist) bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: cl_arg + type(c_ptr), dimension(*), intent(in) :: bufferlist + end subroutine fstarpu_unpack_arg + + ! void starpu_create_sync_task(starpu_tag_t sync_tag, unsigned ndeps, starpu_tag_t *deps, void (*callback)(void *), void *callback_arg) + subroutine fstarpu_create_sync_task(sync_tag, ndeps, tag_array, callback, callback_arg) & + bind(C,name="starpu_create_sync_task") + use iso_c_binding, only: c_int, c_long_long, c_ptr, c_funptr + integer(c_int), value, intent(in) :: sync_tag + integer(c_int), value, intent(in) :: ndeps + integer(c_long_long), intent(in) :: tag_array(*) + type(c_funptr), value, intent(in) :: callback + type(c_ptr), value, intent(in) :: callback_arg + end subroutine fstarpu_create_sync_task + + ! == starpu_sched_ctx.h == + + ! starpu_sched_ctx_create: see fstarpu_sched_ctx_create + function fstarpu_sched_ctx_create(workers_array,nworkers,ctx_name, arglist) bind(C) + use iso_c_binding, only: c_int, c_char, c_ptr + integer(c_int) :: fstarpu_sched_ctx_create + integer(c_int), intent(in) :: workers_array(*) + integer(c_int), value, intent(in) :: nworkers + character(c_char), intent(in) :: ctx_name + type(c_ptr), dimension(*), intent(in) :: arglist + end function fstarpu_sched_ctx_create + + ! unsigned starpu_sched_ctx_create_inside_interval(const char *policy_name, const char *sched_ctx_name, int min_ncpus, int max_ncpus, int min_ngpus, int max_ngpus, unsigned allow_overlap); + function fstarpu_sched_ctx_create_inside_interval(policy_name, sched_ctx_name, & + min_ncpus, max_ncpus, min_ngpus, max_ngpus, allow_overlap) & + bind(C,name="starpu_sched_ctx_create_inside_interval") + use iso_c_binding, only: c_int, c_char + integer(c_int) :: fstarpu_sched_ctx_create_inside_interval + character(c_char), intent(in) :: policy_name + character(c_char), intent(in) :: sched_ctx_name + integer(c_int), value, intent(in) :: min_ncpus + integer(c_int), value, intent(in) :: max_ncpus + integer(c_int), value, intent(in) :: min_ngpus + integer(c_int), value, intent(in) :: max_ngpus + integer(c_int), value, intent(in) :: allow_overlap + end function fstarpu_sched_ctx_create_inside_interval + + ! void starpu_sched_ctx_register_close_callback(unsigned sched_ctx_id, void (*close_callback)(unsigned sched_ctx_id, void* args), void *args); + subroutine fstarpu_sched_ctx_register_close_callback (sched_ctx_id, close_callback, args) & + bind(c,name="starpu_sched_ctx_register_close_callback") + use iso_c_binding, only: c_ptr, c_funptr, c_int + integer(c_int), value, intent(in) :: sched_ctx_id + type(c_funptr), value, intent(in) :: close_callback + type(c_ptr), value, intent(in) :: args + end subroutine fstarpu_sched_ctx_register_close_callback + + ! void starpu_sched_ctx_add_workers(int *workerids_ctx, int nworkers_ctx, unsigned sched_ctx_id); + subroutine fstarpu_sched_ctx_add_workers(workerids,nworkers,ctx) bind(C,name="starpu_sched_ctx_add_workers") + use iso_c_binding, only: c_int + integer(c_int), intent(in) :: workerids (*) + integer(c_int), value, intent(in) :: nworkers + integer(c_int), value, intent(in) :: ctx + end subroutine fstarpu_sched_ctx_add_workers + + ! void starpu_sched_ctx_remove_workers(int *workerids_ctx, int nworkers_ctx, unsigned sched_ctx_id); + subroutine fstarpu_sched_ctx_remove_workers(workerids,nworkers,ctx) bind(C,name="starpu_sched_ctx_remove_workers") + use iso_c_binding, only: c_int + integer(c_int), intent(in) :: workerids (*) + integer(c_int), value, intent(in) :: nworkers + integer(c_int), value, intent(in) :: ctx + end subroutine fstarpu_sched_ctx_remove_workers + + ! starpu_sched_ctx_display_workers: see fstarpu_sched_ctx_display_workers + subroutine fstarpu_sched_ctx_display_workers (ctx) bind(C) + use iso_c_binding, only: c_int + integer(c_int), value, intent(in) :: ctx + end subroutine fstarpu_sched_ctx_display_workers + + ! void starpu_sched_ctx_delete(unsigned sched_ctx_id); + subroutine fstarpu_sched_ctx_delete (ctx) bind(C,name="starpu_sched_ctx_delete") + use iso_c_binding, only: c_int + integer(c_int), value, intent(in) :: ctx + end subroutine fstarpu_sched_ctx_delete + + ! void starpu_sched_ctx_set_inheritor(unsigned sched_ctx_id, unsigned inheritor); + subroutine fstarpu_sched_ctx_set_inheritor (ctx,inheritor) bind(C,name="starpu_sched_ctx_set_inheritor") + use iso_c_binding, only: c_int + integer(c_int), value, intent(in) :: ctx + integer(c_int), value, intent(in) :: inheritor + end subroutine fstarpu_sched_ctx_set_inheritor + + ! unsigned starpu_sched_ctx_get_inheritor(unsigned sched_ctx_id); + function fstarpu_sched_ctx_get_inheritor (ctx) bind(C,name="starpu_sched_ctx_get_inheritor") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_sched_ctx_get_inheritor + integer(c_int), value, intent(in) :: ctx + end function fstarpu_sched_ctx_get_inheritor + + ! unsigned starpu_sched_ctx_get_hierarchy_level(unsigned sched_ctx_id); + function fstarpu_sched_ctx_get_hierarchy_level (ctx) bind(C,name="starpu_sched_ctx_get_hierarchy_level") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_sched_ctx_get_hierarchy_level + integer(c_int), value, intent(in) :: ctx + end function fstarpu_sched_ctx_get_hierarchy_level + + ! void starpu_sched_ctx_set_context(unsigned *sched_ctx_id); + subroutine fstarpu_sched_ctx_set_context (ctx_ptr) bind(C,name="starpu_sched_ctx_set_context") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: ctx_ptr + end subroutine fstarpu_sched_ctx_set_context + + ! unsigned starpu_sched_ctx_get_context(void); + function fstarpu_sched_ctx_get_context () bind(C,name="starpu_sched_ctx_get_context") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_sched_ctx_get_context + end function fstarpu_sched_ctx_get_context + + ! void starpu_sched_ctx_stop_task_submission(void); + subroutine fstarpu_sched_ctx_stop_task_submission () bind(c,name="starpu_sched_ctx_stop_task_submission") + use iso_c_binding + end subroutine fstarpu_sched_ctx_stop_task_submission + + ! void starpu_sched_ctx_finished_submit(unsigned sched_ctx_id); + subroutine fstarpu_sched_ctx_finished_submit (sched_ctx_id) bind(c,name="starpu_sched_ctx_finished_submit") + use iso_c_binding, only: c_int + integer(c_int), value, intent(in) :: sched_ctx_id + end subroutine fstarpu_sched_ctx_finished_submit + + ! unsigned starpu_sched_ctx_get_workers_list(unsigned sched_ctx_id, int **workerids); + ! unsigned starpu_sched_ctx_get_workers_list_raw(unsigned sched_ctx_id, int **workerids); + + ! unsigned starpu_sched_ctx_get_nworkers(unsigned sched_ctx_id); + function fstarpu_sched_ctx_get_nworkers (sched_ctx_id) & + bind(c,name="starpu_sched_ctx_get_nworkers") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_sched_ctx_get_nworkers + integer(c_int), value, intent(in) :: sched_ctx_id + end function fstarpu_sched_ctx_get_nworkers + + ! unsigned starpu_sched_ctx_get_nshared_workers(unsigned sched_ctx_id, unsigned sched_ctx_id2); + function fstarpu_sched_ctx_get_nshared_workers (sched_ctx_id, sched_ctx_id2) & + bind(c,name="starpu_sched_ctx_get_nshared_workers") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_sched_ctx_get_nshared_workers + integer(c_int), value, intent(in) :: sched_ctx_id + integer(c_int), value, intent(in) :: sched_ctx_id2 + end function fstarpu_sched_ctx_get_nshared_workers + + ! unsigned starpu_sched_ctx_contains_worker(int workerid, unsigned sched_ctx_id); + function fstarpu_sched_ctx_contains_worker (workerid, sched_ctx_id) & + bind(c,name="starpu_sched_ctx_contains_worker") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_sched_ctx_contains_worker + integer(c_int), value, intent(in) :: workerid + integer(c_int), value, intent(in) :: sched_ctx_id + end function fstarpu_sched_ctx_contains_worker + + ! unsigned starpu_sched_ctx_contains_type_of_worker(enum starpu_worker_archtype arch, unsigned sched_ctx_id); + function fstarpu_sched_ctx_contains_type_of_worker (arch, sched_ctx_id) & + bind(c,name="starpu_sched_ctx_contains_type_of_worker") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_sched_ctx_contains_type_of_worker + integer(c_int), value, intent(in) :: arch + integer(c_int), value, intent(in) :: sched_ctx_id + end function fstarpu_sched_ctx_contains_type_of_worker + + ! unsigned starpu_sched_ctx_worker_get_id(unsigned sched_ctx_id); + function fstarpu_sched_ctx_worker_get_id (sched_ctx_id) & + bind(c,name="starpu_sched_ctx_worker_get_id") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_sched_ctx_worker_get_id + integer(c_int), value, intent(in) :: sched_ctx_id + end function fstarpu_sched_ctx_worker_get_id + + ! unsigned starpu_sched_ctx_get_ctx_for_task(struct starpu_task *task); + function fstarpu_sched_ctx_get_ctx_for_task (task) & + bind(c,name="starpu_sched_ctx_get_ctx_for_task") + use iso_c_binding, only: c_int, c_ptr + integer(c_int) :: fstarpu_sched_ctx_get_ctx_for_task + type(c_ptr), value, intent(in) :: task + end function fstarpu_sched_ctx_get_ctx_for_task + + ! unsigned starpu_sched_ctx_overlapping_ctxs_on_worker(int workerid); + function fstarpu_sched_ctx_overlapping_ctxs_on_worker (workerid) & + bind(c,name="starpu_sched_ctx_overlapping_ctxs_on_worker") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_sched_ctx_overlapping_ctxs_on_worker + integer(c_int), value, intent(in) :: workerid + end function fstarpu_sched_ctx_overlapping_ctxs_on_worker + + ! int starpu_sched_get_min_priority(void); + function fstarpu_sched_get_min_priority () & + bind(c,name="starpu_sched_get_min_priority") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_sched_get_min_priority + end function fstarpu_sched_get_min_priority + + ! int starpu_sched_get_max_priority(void); + function fstarpu_sched_get_max_priority () & + bind(c,name="starpu_sched_get_max_priority") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_sched_get_max_priority + end function fstarpu_sched_get_max_priority + + ! int starpu_sched_set_min_priority(int min_prio); + function fstarpu_sched_set_min_priority (min_prio) & + bind(c,name="starpu_sched_set_min_priority") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_sched_set_min_priority + integer(c_int), value, intent(in) :: min_prio + end function fstarpu_sched_set_min_priority + + ! int starpu_sched_set_max_priority(int max_prio); + function fstarpu_sched_set_max_priority (max_prio) & + bind(c,name="starpu_sched_set_max_priority") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_sched_set_max_priority + integer(c_int), value, intent(in) :: max_prio + end function fstarpu_sched_set_max_priority + + ! int starpu_sched_ctx_get_min_priority(unsigned sched_ctx_id); + function fstarpu_sched_ctx_get_min_priority (sched_ctx_id) & + bind(c,name="starpu_sched_ctx_get_min_priority") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_sched_ctx_get_min_priority + integer(c_int), value, intent(in) :: sched_ctx_id + end function fstarpu_sched_ctx_get_min_priority + + ! int starpu_sched_ctx_get_max_priority(unsigned sched_ctx_id); + function fstarpu_sched_ctx_get_max_priority (sched_ctx_id) & + bind(c,name="starpu_sched_ctx_get_max_priority") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_sched_ctx_get_max_priority + integer(c_int), value, intent(in) :: sched_ctx_id + end function fstarpu_sched_ctx_get_max_priority + + ! int starpu_sched_ctx_set_min_priority(unsigned sched_ctx_id, int min_prio); + function fstarpu_sched_ctx_set_min_priority (sched_ctx_id, min_prio) & + bind(c,name="starpu_sched_ctx_set_min_priority") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_sched_ctx_set_min_priority + integer(c_int), value, intent(in) :: sched_ctx_id + integer(c_int), value, intent(in) :: min_prio + end function fstarpu_sched_ctx_set_min_priority + + ! int starpu_sched_ctx_set_max_priority(unsigned sched_ctx_id, int max_prio); + function fstarpu_sched_ctx_set_max_priority (sched_ctx_id, max_prio) & + bind(c,name="starpu_sched_ctx_set_max_priority") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_sched_ctx_set_max_priority + integer(c_int), value, intent(in) :: sched_ctx_id + integer(c_int), value, intent(in) :: max_prio + end function fstarpu_sched_ctx_set_max_priority + + ! int starpu_sched_ctx_min_priority_is_set(unsigned sched_ctx_id); + function fstarpu_sched_ctx_min_priority_is_set (sched_ctx_id) & + bind(c,name="starpu_sched_ctx_min_priority_is_set") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_sched_ctx_min_priority_is_set + integer(c_int), value, intent(in) :: sched_ctx_id + end function fstarpu_sched_ctx_min_priority_is_set + + ! int starpu_sched_ctx_max_priority_is_set(unsigned sched_ctx_id); + function fstarpu_sched_ctx_max_priority_is_set (sched_ctx_id) & + bind(c,name="starpu_sched_ctx_max_priority_is_set") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_sched_ctx_max_priority_is_set + integer(c_int), value, intent(in) :: sched_ctx_id + end function fstarpu_sched_ctx_max_priority_is_set + + ! void *starpu_sched_ctx_get_user_data(unsigned sched_ctx_id); + function fstarpu_sched_ctx_get_user_data(sched_ctx_id) & + bind(c,name="starpu_sched_ctx_get_user_data") + use iso_c_binding, only: c_int, c_ptr + integer(c_int), value, intent(in) :: sched_ctx_id + type(c_ptr) :: fstarpu_sched_ctx_get_user_data + end function fstarpu_sched_ctx_get_user_data + + ! struct starpu_worker_collection *starpu_sched_ctx_create_worker_collection(unsigned sched_ctx_id, enum starpu_worker_collection_type type) STARPU_ATTRIBUTE_MALLOC; + + ! void starpu_sched_ctx_delete_worker_collection(unsigned sched_ctx_id); + subroutine fstarpu_sched_ctx_delete_worker_collection (sched_ctx_id) & + bind(c,name="starpu_sched_ctx_delete_worker_collection") + use iso_c_binding, only: c_int + integer(c_int), value, intent(in) :: sched_ctx_id + end subroutine fstarpu_sched_ctx_delete_worker_collection + + ! struct starpu_worker_collection *starpu_sched_ctx_get_worker_collection(unsigned sched_ctx_id); + + ! void starpu_sched_ctx_set_policy_data(unsigned sched_ctx_id, void *policy_data); + subroutine fstarpu_sched_ctx_set_policy_data (sched_ctx_id, policy_data) & + bind(c,name="starpu_sched_ctx_set_policy_data") + use iso_c_binding, only: c_int, c_ptr + integer(c_int), value, intent(in) :: sched_ctx_id + type(c_ptr), value, intent(in) :: policy_data + end subroutine fstarpu_sched_ctx_set_policy_data + + ! void *starpu_sched_ctx_get_policy_data(unsigned sched_ctx_id); + function fstarpu_sched_ctx_get_policy_data (sched_ctx_id) & + bind(c,name="starpu_sched_ctx_get_policy_data") + use iso_c_binding, only: c_int, c_ptr + type(c_ptr) :: fstarpu_sched_ctx_get_policy_data + integer(c_int), value, intent(in) :: sched_ctx_id + end function fstarpu_sched_ctx_get_policy_data + + ! void *starpu_sched_ctx_exec_parallel_code(void* (*func)(void*), void *param, unsigned sched_ctx_id); + function fstarpu_sched_ctx_exec_parallel_code (func, param, sched_ctx_id) & + bind(c,name="starpu_sched_ctx_exec_parallel_code") + use iso_c_binding, only: c_int, c_funptr, c_ptr + type(c_ptr) :: fstarpu_sched_ctx_exec_parallel_code + type(c_funptr), value, intent(in) :: func + type(c_ptr), value, intent(in) :: param + integer(c_int), value, intent(in) :: sched_ctx_id + end function fstarpu_sched_ctx_exec_parallel_code + + + ! int starpu_sched_ctx_get_nready_tasks(unsigned sched_ctx_id); + function fstarpu_sched_ctx_get_nready_tasks (sched_ctx_id) & + bind(c,name="starpu_sched_ctx_get_nready_tasks") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_sched_ctx_get_nready_tasks + integer(c_int), value, intent(in) :: sched_ctx_id + end function fstarpu_sched_ctx_get_nready_tasks + + ! double starpu_sched_ctx_get_nready_flops(unsigned sched_ctx_id); + function fstarpu_sched_ctx_get_nready_flops (sched_ctx_id) & + bind(c,name="starpu_sched_ctx_get_nready_flops") + use iso_c_binding, only: c_int, c_double + real(c_double) :: fstarpu_sched_ctx_get_nready_flops + integer(c_int), value, intent(in) :: sched_ctx_id + end function fstarpu_sched_ctx_get_nready_flops + + ! void starpu_sched_ctx_list_task_counters_increment(unsigned sched_ctx_id, int workerid); + subroutine fstarpu_sched_ctx_list_task_counters_increment (sched_ctx_id, workerid) & + bind(c,name="starpu_sched_ctx_list_task_counters_increment") + use iso_c_binding, only: c_int + integer(c_int), value, intent(in) :: sched_ctx_id + integer(c_int), value, intent(in) :: workerid + end subroutine fstarpu_sched_ctx_list_task_counters_increment + + ! void starpu_sched_ctx_list_task_counters_decrement(unsigned sched_ctx_id, int workerid); + subroutine fstarpu_sched_ctx_list_task_counters_decrement (sched_ctx_id, workerid) & + bind(c,name="starpu_sched_ctx_list_task_counters_decrement") + use iso_c_binding, only: c_int + integer(c_int), value, intent(in) :: sched_ctx_id + integer(c_int), value, intent(in) :: workerid + + end subroutine fstarpu_sched_ctx_list_task_counters_decrement + + ! void starpu_sched_ctx_list_task_counters_reset(unsigned sched_ctx_id, int workerid); + subroutine fstarpu_sched_ctx_list_task_counters_reset (sched_ctx_id, workerid) & + bind(c,name="starpu_sched_ctx_list_task_counters_reset") + use iso_c_binding, only: c_int + integer(c_int), value, intent(in) :: sched_ctx_id + integer(c_int), value, intent(in) :: workerid + + end subroutine fstarpu_sched_ctx_list_task_counters_reset + + ! void starpu_sched_ctx_list_task_counters_increment_all(struct starpu_task *task, unsigned sched_ctx_id); + subroutine fstarpu_sched_ctx_list_task_counters_increment_all (task, sched_ctx_id) & + bind(c,name="starpu_sched_ctx_list_task_counters_increment_all") + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: task + integer(c_int), value, intent(in) :: sched_ctx_id + end subroutine fstarpu_sched_ctx_list_task_counters_increment_all + + ! void starpu_sched_ctx_list_task_counters_decrement_all(struct starpu_task *task, unsigned sched_ctx_id); + subroutine fstarpu_sched_ctx_list_task_counters_decrement_all (task, sched_ctx_id) & + bind(c,name="starpu_sched_ctx_list_task_counters_decrement_all") + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: task + integer(c_int), value, intent(in) :: sched_ctx_id + end subroutine fstarpu_sched_ctx_list_task_counters_decrement_all + + ! void starpu_sched_ctx_list_task_counters_reset_all(struct starpu_task *task, unsigned sched_ctx_id); + subroutine fstarpu_sched_ctx_list_task_counters_reset_all (task, sched_ctx_id) & + bind(c,name="starpu_sched_ctx_list_task_counters_reset_all") + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: task + integer(c_int), value, intent(in) :: sched_ctx_id + end subroutine fstarpu_sched_ctx_list_task_counters_reset_all + + ! unsigned starpu_sched_ctx_get_priority(int worker, unsigned sched_ctx_id); + function fstarpu_sched_ctx_get_priority (worker, sched_ctx_id) & + bind(c,name="starpu_sched_ctx_get_priority") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_sched_ctx_get_priority + integer(c_int), value, intent(in) :: worker + integer(c_int), value, intent(in) :: sched_ctx_id + end function fstarpu_sched_ctx_get_priority + + ! void starpu_sched_ctx_get_available_cpuids(unsigned sched_ctx_id, int **cpuids, int *ncpuids); + + ! void starpu_sched_ctx_bind_current_thread_to_cpuid(unsigned cpuid); + subroutine fstarpu_sched_ctx_bind_current_thread_to_cpuid (cpuid) & + bind(c,name="starpu_sched_ctx_bind_current_thread_to_cpuid") + use iso_c_binding, only: c_int + integer(c_int), value, intent(in) :: cpuid + end subroutine fstarpu_sched_ctx_bind_current_thread_to_cpuid + + ! int starpu_sched_ctx_book_workers_for_task(unsigned sched_ctx_id, int *workerids, int nworkers); + function fstarpu_sched_ctx_book_workers_for_task (sched_ctx_id, workerids, nworkers) & + bind(c,name="starpu_sched_ctx_book_workers_for_task") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_sched_ctx_book_workers_for_task + integer(c_int), value, intent(in) :: sched_ctx_id + integer(c_int), intent(in) :: workerids(*) + integer(c_int), value, intent(in) :: nworkers + end function fstarpu_sched_ctx_book_workers_for_task + + ! void starpu_sched_ctx_unbook_workers_for_task(unsigned sched_ctx_id, int master); + subroutine fstarpu_sched_ctx_unbook_workers_for_task (sched_ctx_id, master) & + bind(c,name="starpu_sched_ctx_unbook_workers_for_task") + use iso_c_binding, only: c_int + integer(c_int), value, intent(in) :: sched_ctx_id + integer(c_int), value, intent(in) :: master + end subroutine fstarpu_sched_ctx_unbook_workers_for_task + + ! unsigned starpu_sched_ctx_worker_is_master_for_child_ctx(int workerid, unsigned sched_ctx_id); + function fstarpu_sched_ctx_worker_is_master_for_child_ctx (workerid, sched_ctx_id) & + bind(c,name="starpu_sched_ctx_worker_is_master_for_child_ctx") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_sched_ctx_worker_is_master_for_child_ctx + integer(c_int), value, intent(in) :: workerid + integer(c_int), value, intent(in) :: sched_ctx_id + end function fstarpu_sched_ctx_worker_is_master_for_child_ctx + + ! unsigned starpu_sched_ctx_master_get_context(int masterid); + function fstarpu_sched_ctx_master_get_context (masterid) & + bind(c,name="starpu_sched_ctx_master_get_context") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_sched_ctx_master_get_context + integer(c_int), value, intent(in) :: masterid + end function fstarpu_sched_ctx_master_get_context + + ! void starpu_sched_ctx_revert_task_counters(unsigned sched_ctx_id, double flops); + subroutine fstarpu_sched_ctx_revert_task_counters (sched_ctx_id, flops) & + bind(c,name="starpu_sched_ctx_revert_task_counters") + use iso_c_binding, only: c_int, c_double + integer(c_int), value, intent(in) :: sched_ctx_id + real(c_double), value, intent(in) :: flops + end subroutine fstarpu_sched_ctx_revert_task_counters + + ! void starpu_sched_ctx_move_task_to_ctx(struct starpu_task *task, unsigned sched_ctx, unsigned manage_mutex); + subroutine fstarpu_sched_ctx_move_task_to_ctx (task, sched_ctx, manage_mutex) & + bind(c,name="starpu_sched_ctx_move_task_to_ctx") + use iso_c_binding, only: c_ptr, c_int + type(c_ptr), value, intent(in) :: task + integer(c_int), value, intent(in) :: sched_ctx + integer(c_int), value, intent(in) :: manage_mutex + end subroutine fstarpu_sched_ctx_move_task_to_ctx + + ! int starpu_sched_ctx_get_worker_rank(unsigned sched_ctx_id); + function fstarpu_sched_ctx_get_worker_rank (sched_ctx_id) & + bind(c,name="starpu_sched_ctx_get_worker_rank") + use iso_c_binding, only: c_int + integer(c_int) :: fstarpu_sched_ctx_get_worker_rank + integer(c_int), value, intent(in) :: sched_ctx_id + end function fstarpu_sched_ctx_get_worker_rank + + ! unsigned starpu_sched_ctx_has_starpu_scheduler(unsigned sched_ctx_id, unsigned *awake_workers); + + ! void starpu_sched_ctx_call_pushed_task_cb(int workerid, unsigned sched_ctx_id); + subroutine fstarpu_sched_ctx_call_pushed_task_cb (workerid, sched_ctx_id) & + bind(c,name="starpu_sched_ctx_call_pushed_task_cb") + use iso_c_binding, only: c_int + integer(c_int), value, intent(in) :: workerid + integer(c_int), value, intent(in) :: sched_ctx_id + end subroutine fstarpu_sched_ctx_call_pushed_task_cb + + ! == starpu_fxt.h == + + ! void starpu_fxt_options_init(struct starpu_fxt_options *options); + subroutine fstarpu_fxt_options_init (fxt_options) bind(C,name="starpu_fxt_options_init") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: fxt_options + end subroutine fstarpu_fxt_options_init + + ! void starpu_fxt_generate_trace(struct starpu_fxt_options *options); + subroutine fstarpu_fxt_generate_trace (fxt_options) bind(C,name="starpu_fxt_generate_trace") + use iso_c_binding, only: c_ptr + type(c_ptr), value, intent(in) :: fxt_options + end subroutine fstarpu_fxt_generate_trace + + ! void starpu_fxt_autostart_profiling(int autostart); + subroutine fstarpu_fxt_autostart_profiling (autostart) bind(c,name="starpu_fxt_autostart_profiling") + use iso_c_binding, only: c_int + integer(c_int), value, intent(in) :: autostart + end subroutine fstarpu_fxt_autostart_profiling + + ! void starpu_fxt_start_profiling(void); + subroutine fstarpu_fxt_start_profiling () bind(c,name="starpu_fxt_start_profiling") + use iso_c_binding + end subroutine fstarpu_fxt_start_profiling + + ! void starpu_fxt_stop_profiling(void); + subroutine fstarpu_fxt_stop_profiling () bind(c,name="starpu_fxt_stop_profiling") + use iso_c_binding + end subroutine fstarpu_fxt_stop_profiling + + ! void starpu_fxt_write_data_trace(char *filename_in); + subroutine fstarpu_fxt_write_data_trace (filename) bind(c,name="starpu_fxt_write_data_trace") + use iso_c_binding, only: c_char + character(c_char), intent(in) :: filename + end subroutine fstarpu_fxt_write_data_trace + + ! void starpu_fxt_trace_user_event(unsigned long code); + subroutine fstarpu_trace_user_event (code) bind(c,name="starpu_trace_user_event") + use iso_c_binding, only: c_long + integer(c_long), value, intent(in) :: code + end subroutine fstarpu_trace_user_event + + ! double starpu_timing_now(void) + function fstarpu_timing_now () bind(C,name="starpu_timing_now") + use iso_c_binding, only: c_double + real(c_double) :: fstarpu_timing_now + end function fstarpu_timing_now + + ! == starpu_cuda.h == + + ! cudaStream_t starpu_cuda_get_local_stream(void); + function fstarpu_cuda_get_local_stream () bind(C,name="starpu_cuda_get_local_stream") + use iso_c_binding, only: c_ptr + type(c_ptr) :: fstarpu_cuda_get_local_stream + end function fstarpu_cuda_get_local_stream + + ! == starpu_stdlib.h == + + ! int starpu_malloc(void **A, size_t dim); + function fstarpu_malloc (ptr, len) bind(C,name="starpu_malloc") + use iso_c_binding, only: c_ptr, c_long, c_int + type(c_ptr), intent(out) :: ptr + integer(c_long), value, intent(in) :: len + integer(c_int) :: fstarpu_malloc + end function fstarpu_malloc + + ! int starpu_free_noflag(void *A, size_t dim); + function fstarpu_free_noflag (ptr, len) bind(C,name="starpu_free_noflag") + use iso_c_binding, only: c_ptr, c_long, c_int + type(c_ptr), value, intent(in) :: ptr + integer(c_long), value, intent(in) :: len + integer(c_int) :: fstarpu_free_noflag + end function fstarpu_free_noflag + + ! int starpu_memory_pin(void *addr, size_t size); + function fstarpu_memory_pin (ptr, len) bind(C,name="starpu_memory_pin") + use iso_c_binding, only: c_ptr, c_long, c_int + type(c_ptr), value, intent(in) :: ptr + integer(c_long), value, intent(in) :: len + integer(c_int) :: fstarpu_memory_pin + end function fstarpu_memory_pin + + ! int starpu_memory_unpin(void *addr, size_t size); + function fstarpu_memory_unpin (ptr, len) bind(C,name="starpu_memory_unpin") + use iso_c_binding, only: c_ptr, c_long, c_int + type(c_ptr), value, intent(in) :: ptr + integer(c_long), value, intent(in) :: len + integer(c_int) :: fstarpu_memory_unpin + end function fstarpu_memory_unpin + + ! int starpu_sleep(float nb_sec); + subroutine fstarpu_sleep (nb_sec) bind(C,name="starpu_sleep") + use iso_c_binding, only: c_float + real(c_float), value, intent(in) :: nb_sec + end subroutine fstarpu_sleep + + ! int starpu_usleep(float nb_sec); + subroutine fstarpu_usleep (nb_sec) bind(C,name="starpu_usleep") + use iso_c_binding, only: c_float + real(c_float), value, intent(in) :: nb_sec + end subroutine fstarpu_usleep + + ! void starpu_cublas_init(void); + subroutine fstarpu_cublas_init () bind(C,name="starpu_cublas_init") + end subroutine fstarpu_cublas_init + + ! void starpu_cublas_shutdown(void); + subroutine fstarpu_cublas_shutdown () bind(C,name="starpu_cublas_shutdown") + end subroutine fstarpu_cublas_shutdown + + end interface + + contains + function or_cptrs(op1,op2) + type(c_ptr) :: or_cptrs + type(c_ptr),intent(in) :: op1,op2 + integer(c_intptr_t) :: i_op1,i_op2 + i_op1 = transfer(op1,0_c_intptr_t) + i_op2 = transfer(op2,0_c_intptr_t) + or_cptrs = transfer(ior(i_op1,i_op2), C_NULL_PTR) + end function + + function ip_to_p(i) bind(C) + use iso_c_binding, only: c_ptr,c_intptr_t,C_NULL_PTR + type(c_ptr) :: ip_to_p + integer(c_intptr_t), value, intent(in) :: i + ip_to_p = transfer(i,C_NULL_PTR) + end function ip_to_p + + function p_to_ip(p) bind(C) + use iso_c_binding, only: c_ptr,c_intptr_t + integer(c_intptr_t) :: p_to_ip + type(c_ptr), value, intent(in) :: p + p_to_ip = transfer(p,0_c_intptr_t) + end function p_to_ip + + function sz_to_p(sz) bind(C) + use iso_c_binding, only: c_ptr,c_size_t,c_intptr_t + type(c_ptr) :: sz_to_p + integer(c_size_t), value, intent(in) :: sz + sz_to_p = ip_to_p(int(sz,kind=c_intptr_t)) + end function sz_to_p + + function fstarpu_init (conf) bind(C) + use iso_c_binding + integer(c_int) :: fstarpu_init + type(c_ptr), value, intent(in) :: conf + + real(c_double) :: FSTARPU_SZ_C_DOUBLE_dummy + real(c_float) :: FSTARPU_SZ_C_FLOAT_dummy + character(c_char) :: FSTARPU_SZ_C_CHAR_dummy + integer(c_int) :: FSTARPU_SZ_C_INT_dummy + integer(c_intptr_t) :: FSTARPU_SZ_C_INTPTR_T_dummy + type(c_ptr) :: FSTARPU_SZ_C_PTR_dummy + integer(c_size_t) :: FSTARPU_SZ_C_SIZE_T_dummy + + character :: FSTARPU_SZ_CHARACTER_dummy + + integer :: FSTARPU_SZ_INTEGER_dummy + integer(4) :: FSTARPU_SZ_INT4_dummy + integer(8) :: FSTARPU_SZ_INT8_dummy + + real :: FSTARPU_SZ_REAL_dummy + real(4) :: FSTARPU_SZ_REAL4_dummy + real(8) :: FSTARPU_SZ_REAL8_dummy + + double precision :: FSTARPU_SZ_DOUBLE_PRECISION_dummy + + complex :: FSTARPU_SZ_COMPLEX_dummy + complex(4) :: FSTARPU_SZ_COMPLEX4_dummy + complex(8) :: FSTARPU_SZ_COMPLEX8_dummy + + ! Note: Referencing global C constants from Fortran has + ! been found unreliable on some architectures, notably + ! on Darwin. The get_integer/get_pointer_constant + ! scheme is a workaround to that issue. + + interface + ! These functions are not exported to the end user + function fstarpu_get_constant(s) bind(C) + use iso_c_binding, only: c_ptr,c_char + type(c_ptr) :: fstarpu_get_constant ! C function returns an intptr_t + character(kind=c_char) :: s + end function fstarpu_get_constant + + function fstarpu_init_internal (conf) bind(C,name="starpu_init") + use iso_c_binding, only: c_ptr,c_int + integer(c_int) :: fstarpu_init_internal + type(c_ptr), value :: conf + end function fstarpu_init_internal + + end interface + + ! Initialize Fortran constants from C peers + FSTARPU_R = fstarpu_get_constant(C_CHAR_"FSTARPU_R"//C_NULL_CHAR) + FSTARPU_W = fstarpu_get_constant(C_CHAR_"FSTARPU_W"//C_NULL_CHAR) + FSTARPU_RW = fstarpu_get_constant(C_CHAR_"FSTARPU_RW"//C_NULL_CHAR) + FSTARPU_SCRATCH = fstarpu_get_constant(C_CHAR_"FSTARPU_SCRATCH"//C_NULL_CHAR) + FSTARPU_REDUX = fstarpu_get_constant(C_CHAR_"FSTARPU_REDUX"//C_NULL_CHAR) + FSTARPU_MPI_REDUX = fstarpu_get_constant(C_CHAR_"FSTARPU_MPI_REDUX"//C_NULL_CHAR) + FSTARPU_COMMUTE = fstarpu_get_constant(C_CHAR_"FSTARPU_COMMUTE"//C_NULL_CHAR) + FSTARPU_SSEND = fstarpu_get_constant(C_CHAR_"FSTARPU_SSEND"//C_NULL_CHAR) + FSTARPU_LOCALITY = fstarpu_get_constant(C_CHAR_"FSTARPU_LOCALITY"//C_NULL_CHAR) + + FSTARPU_DATA_ARRAY = fstarpu_get_constant(C_CHAR_"FSTARPU_DATA_ARRAY"//C_NULL_CHAR) + FSTARPU_DATA_MODE_ARRAY = fstarpu_get_constant(C_CHAR_"FSTARPU_DATA_MODE_ARRAY"//C_NULL_CHAR) + FSTARPU_CL_ARGS = fstarpu_get_constant(C_CHAR_"FSTARPU_CL_ARGS"//C_NULL_CHAR) + FSTARPU_CL_ARGS_NFREE = fstarpu_get_constant(C_CHAR_"FSTARPU_CL_ARGS_NFREE"//C_NULL_CHAR) + FSTARPU_TASK_DEPS_ARRAY = fstarpu_get_constant(C_CHAR_"FSTARPU_TASK_DEPS_ARRAY"//C_NULL_CHAR) + FSTARPU_CALLBACK = fstarpu_get_constant(C_CHAR_"FSTARPU_CALLBACK"//C_NULL_CHAR) + FSTARPU_CALLBACK_WITH_ARG = fstarpu_get_constant(C_CHAR_"FSTARPU_CALLBACK_WITH_ARG"//C_NULL_CHAR) + FSTARPU_CALLBACK_WITH_ARG_NFREE = & + fstarpu_get_constant(C_CHAR_"FSTARPU_CALLBACK_WITH_ARG_NFREE"//C_NULL_CHAR) + FSTARPU_CALLBACK_ARG = fstarpu_get_constant(C_CHAR_"FSTARPU_CALLBACK_ARG"//C_NULL_CHAR) + FSTARPU_CALLBACK_ARG_NFREE = fstarpu_get_constant(C_CHAR_"FSTARPU_CALLBACK_ARG_NFREE"//C_NULL_CHAR) + FSTARPU_PROLOGUE_CALLBACK = fstarpu_get_constant(C_CHAR_"FSTARPU_PROLOGUE_CALLBACK"//C_NULL_CHAR) + FSTARPU_PROLOGUE_CALLBACK_ARG = fstarpu_get_constant(C_CHAR_"FSTARPU_PROLOGUE_CALLBACK_ARG"//C_NULL_CHAR) + FSTARPU_PROLOGUE_CALLBACK_ARG_NFREE = & + fstarpu_get_constant(C_CHAR_"FSTARPU_PROLOGUE_CALLBACK_ARG_NFREE"//C_NULL_CHAR) + FSTARPU_PROLOGUE_CALLBACK_POP = fstarpu_get_constant(C_CHAR_"FSTARPU_PROLOGUE_CALLBACK_POP"//C_NULL_CHAR) + FSTARPU_PROLOGUE_CALLBACK_POP_ARG = & + fstarpu_get_constant(C_CHAR_"FSTARPU_PROLOGUE_CALLBACK_POP_ARG"//C_NULL_CHAR) + FSTARPU_PROLOGUE_CALLBACK_POP_ARG_NFREE = & + fstarpu_get_constant(C_CHAR_"FSTARPU_PROLOGUE_CALLBACK_POP_ARG_NFREE"//C_NULL_CHAR) + FSTARPU_PRIORITY = fstarpu_get_constant(C_CHAR_"FSTARPU_PRIORITY"//C_NULL_CHAR) + FSTARPU_EXECUTE_ON_NODE = fstarpu_get_constant(C_CHAR_"FSTARPU_EXECUTE_ON_NODE"//C_NULL_CHAR) + FSTARPU_EXECUTE_ON_DATA = fstarpu_get_constant(C_CHAR_"FSTARPU_EXECUTE_ON_DATA"//C_NULL_CHAR) + FSTARPU_EXECUTE_ON_WORKER = fstarpu_get_constant(C_CHAR_"FSTARPU_EXECUTE_ON_WORKER"//C_NULL_CHAR) + FSTARPU_WORKER_ORDER = fstarpu_get_constant(C_CHAR_"FSTARPU_WORKER_ORDER"//C_NULL_CHAR) + FSTARPU_EXECUTE_WHERE = fstarpu_get_constant(C_CHAR_"FSTARPU_EXECUTE_WHERE"//C_NULL_CHAR) + FSTARPU_HYPERVISOR_TAG = fstarpu_get_constant(C_CHAR_"FSTARPU_HYPERVISOR_TAG"//C_NULL_CHAR) + FSTARPU_POSSIBLY_PARALLEL = fstarpu_get_constant(C_CHAR_"FSTARPU_POSSIBLY_PARALLEL"//C_NULL_CHAR) + FSTARPU_FLOPS = fstarpu_get_constant(C_CHAR_"FSTARPU_FLOPS"//C_NULL_CHAR) + FSTARPU_TAG = fstarpu_get_constant(C_CHAR_"FSTARPU_TAG"//C_NULL_CHAR) + FSTARPU_TAG_ONLY = fstarpu_get_constant(C_CHAR_"FSTARPU_TAG_ONLY"//C_NULL_CHAR) + FSTARPU_NAME = fstarpu_get_constant(C_CHAR_"FSTARPU_NAME"//C_NULL_CHAR) + FSTARPU_NODE_SELECTION_POLICY = fstarpu_get_constant(C_CHAR_"FSTARPU_NODE_SELECTION_POLICY"//C_NULL_CHAR) + FSTARPU_TASK_SCHED_DATA = fstarpu_get_constant(C_CHAR_"FSTARPU_TASK_SCHED_DATA"//C_NULL_CHAR) + + FSTARPU_VALUE = fstarpu_get_constant(C_CHAR_"FSTARPU_VALUE"//C_NULL_CHAR) + FSTARPU_SCHED_CTX = fstarpu_get_constant(C_CHAR_"FSTARPU_SCHED_CTX"//C_NULL_CHAR) + FSTARPU_CPU_WORKER = fstarpu_get_constant(C_CHAR_"FSTARPU_CPU_WORKER"//C_NULL_CHAR) + FSTARPU_CUDA_WORKER = fstarpu_get_constant(C_CHAR_"FSTARPU_CUDA_WORKER"//C_NULL_CHAR) + FSTARPU_OPENCL_WORKER = fstarpu_get_constant(C_CHAR_"FSTARPU_OPENCL_WORKER"//C_NULL_CHAR) + FSTARPU_ANY_WORKER = fstarpu_get_constant(C_CHAR_"FSTARPU_ANY_WORKER"//C_NULL_CHAR) + + FSTARPU_NMAXBUFS = int(p_to_ip(fstarpu_get_constant(C_CHAR_"FSTARPU_NMAXBUFS"//C_NULL_CHAR)),c_int) + + FSTARPU_SCHED_CTX_POLICY_NAME = & + fstarpu_get_constant(C_CHAR_"FSTARPU_SCHED_CTX_POLICY_NAME"//C_NULL_CHAR) + FSTARPU_SCHED_CTX_POLICY_STRUCT = & + fstarpu_get_constant(C_CHAR_"FSTARPU_SCHED_CTX_POLICY_STRUCT"//C_NULL_CHAR) + FSTARPU_SCHED_CTX_POLICY_MIN_PRIO = & + fstarpu_get_constant(C_CHAR_"FSTARPU_SCHED_CTX_POLICY_MIN_PRIO"//C_NULL_CHAR) + FSTARPU_SCHED_CTX_POLICY_MAX_PRIO = & + fstarpu_get_constant(C_CHAR_"FSTARPU_SCHED_CTX_POLICY_MAX_PRIO"//C_NULL_CHAR) + FSTARPU_SCHED_CTX_HIERARCHY_LEVEL = & + fstarpu_get_constant(C_CHAR_"FSTARPU_SCHED_CTX_HIERARCHY_LEVEL"//C_NULL_CHAR) + FSTARPU_SCHED_CTX_NESTED = & + fstarpu_get_constant(C_CHAR_"FSTARPU_SCHED_CTX_NESTED"//C_NULL_CHAR) + FSTARPU_SCHED_CTX_AWAKE_WORKERS = & + fstarpu_get_constant(C_CHAR_"FSTARPU_SCHED_CTX_AWAKE_WORKERS"//C_NULL_CHAR) + FSTARPU_SCHED_CTX_POLICY_INIT = & + fstarpu_get_constant(C_CHAR_"FSTARPU_SCHED_CTX_POLICY_INIT"//C_NULL_CHAR) + FSTARPU_SCHED_CTX_USER_DATA = & + fstarpu_get_constant(C_CHAR_"FSTARPU_SCHED_CTX_USER_DATA"//C_NULL_CHAR) + + FSTARPU_NOWHERE = & + fstarpu_get_constant(C_CHAR_"FSTARPU_NOWHERE"//C_NULL_CHAR) + FSTARPU_CPU = & + fstarpu_get_constant(C_CHAR_"FSTARPU_CPU"//C_NULL_CHAR) + FSTARPU_CUDA = & + fstarpu_get_constant(C_CHAR_"FSTARPU_CUDA"//C_NULL_CHAR) + FSTARPU_OPENCL = & + fstarpu_get_constant(C_CHAR_"FSTARPU_OPENCL"//C_NULL_CHAR) + + FSTARPU_CODELET_SIMGRID_EXECUTE = & + fstarpu_get_constant(C_CHAR_"FSTARPU_CODELET_SIMGRID_EXECUTE"//C_NULL_CHAR) + FSTARPU_CODELET_SIMGRID_EXECUTE_AND_INJECT = & + fstarpu_get_constant(C_CHAR_"FSTARPU_CODELET_SIMGRID_EXECUTE_AND_INJECT"//C_NULL_CHAR) + FSTARPU_CUDA_ASYNC = & + fstarpu_get_constant(C_CHAR_"FSTARPU_CUDA_ASYNC"//C_NULL_CHAR) + FSTARPU_OPENCL_ASYNC = & + fstarpu_get_constant(C_CHAR_"FSTARPU_OPENCL_ASYNC"//C_NULL_CHAR) + + !FSTARPU_PER_WORKER = & + ! fstarpu_get_constant(C_CHAR_"FSTARPU_PER_WORKER"//C_NULL_CHAR) + !FSTARPU_PER_ARCH = & + ! fstarpu_get_constant(C_CHAR_"FSTARPU_PER_ARCH"//C_NULL_CHAR) + !FSTARPU_PER_COMMON = & + ! fstarpu_get_constant(C_CHAR_"FSTARPU_PER_COMMON"//C_NULL_CHAR) + FSTARPU_HISTORY_BASED = & + fstarpu_get_constant(C_CHAR_"FSTARPU_HISTORY_BASED"//C_NULL_CHAR) + FSTARPU_REGRESSION_BASED = & + fstarpu_get_constant(C_CHAR_"FSTARPU_REGRESSION_BASED"//C_NULL_CHAR) + FSTARPU_NL_REGRESSION_BASED = & + fstarpu_get_constant(C_CHAR_"FSTARPU_NL_REGRESSION_BASED"//C_NULL_CHAR) + FSTARPU_MULTIPLE_REGRESSION_BASED = & + fstarpu_get_constant(C_CHAR_"FSTARPU_MULTIPLE_REGRESSION_BASED"//C_NULL_CHAR) + + FSTARPU_SEQ = & + fstarpu_get_constant(C_CHAR_"FSTARPU_SEQ"//C_NULL_CHAR) + FSTARPU_SPMD = & + fstarpu_get_constant(C_CHAR_"FSTARPU_SPMD"//C_NULL_CHAR) + FSTARPU_FORKJOIN = & + fstarpu_get_constant(C_CHAR_"FSTARPU_FORKJOIN"//C_NULL_CHAR) + + ! Initialize size constants as 'c_ptr' + FSTARPU_SZ_C_DOUBLE = sz_to_p(c_sizeof(FSTARPU_SZ_C_DOUBLE_dummy)) + FSTARPU_SZ_C_FLOAT = sz_to_p(c_sizeof(FSTARPU_SZ_C_FLOAT_dummy)) + FSTARPU_SZ_C_CHAR = sz_to_p(c_sizeof(FSTARPU_SZ_C_CHAR_dummy)) + FSTARPU_SZ_C_INT = sz_to_p(c_sizeof(FSTARPU_SZ_C_INT_dummy)) + FSTARPU_SZ_C_INTPTR_T = sz_to_p(c_sizeof(FSTARPU_SZ_C_INTPTR_T_dummy)) + FSTARPU_SZ_C_PTR = sz_to_p(c_sizeof(FSTARPU_SZ_C_PTR_dummy)) + FSTARPU_SZ_C_SIZE_T = sz_to_p(c_sizeof(FSTARPU_SZ_C_SIZE_T_dummy)) + + FSTARPU_SZ_CHARACTER = sz_to_p(c_sizeof(FSTARPU_SZ_CHARACTER_dummy)) + + FSTARPU_SZ_INTEGER = sz_to_p(c_sizeof(FSTARPU_SZ_INTEGER_dummy)) + FSTARPU_SZ_INT4 = sz_to_p(c_sizeof(FSTARPU_SZ_INT4_dummy)) + FSTARPU_SZ_INT8 = sz_to_p(c_sizeof(FSTARPU_SZ_INT8_dummy)) + + FSTARPU_SZ_REAL = sz_to_p(c_sizeof(FSTARPU_SZ_REAL_dummy)) + FSTARPU_SZ_REAL4 = sz_to_p(c_sizeof(FSTARPU_SZ_REAL4_dummy)) + FSTARPU_SZ_REAL8 = sz_to_p(c_sizeof(FSTARPU_SZ_REAL8_dummy)) + + FSTARPU_SZ_DOUBLE_PRECISION = sz_to_p(c_sizeof(FSTARPU_SZ_DOUBLE_PRECISION_dummy)) + + FSTARPU_SZ_COMPLEX = sz_to_p(c_sizeof(FSTARPU_SZ_COMPLEX_dummy)) + FSTARPU_SZ_COMPLEX4 = sz_to_p(c_sizeof(FSTARPU_SZ_COMPLEX4_dummy)) + FSTARPU_SZ_COMPLEX8 = sz_to_p(c_sizeof(FSTARPU_SZ_COMPLEX8_dummy)) + FSTARPU_SZ_COMPLEX8 = sz_to_p(c_sizeof(FSTARPU_SZ_COMPLEX8_dummy)) + + FSTARPU_DEFAULT_PRIO = int(p_to_ip(& + fstarpu_get_constant(C_CHAR_"FSTARPU_DEFAULT_PRIO"//C_NULL_CHAR)),c_int) + + ! Initialize StarPU + if (c_associated(conf)) then + fstarpu_init = fstarpu_init_internal(conf) + else + fstarpu_init = fstarpu_init_internal(C_NULL_PTR) + end if + end function fstarpu_init + + function fstarpu_csizet_to_cptr(i) bind(C) + use iso_c_binding + type(c_ptr) :: fstarpu_csizet_to_cptr + integer(c_size_t) :: i + fstarpu_csizet_to_cptr = transfer(int(i,kind=c_intptr_t),C_NULL_PTR) + end function fstarpu_csizet_to_cptr + + function fstarpu_int_to_cptr(i) bind(C) + use iso_c_binding + type(c_ptr) :: fstarpu_int_to_cptr + integer(c_int) :: i + fstarpu_int_to_cptr = transfer(int(i,kind=c_intptr_t),C_NULL_PTR) + end function fstarpu_int_to_cptr + + function fstarpu_long_to_cptr(i) bind(C) + use iso_c_binding + type(c_ptr) :: fstarpu_long_to_cptr + integer(c_long) :: i + fstarpu_long_to_cptr = transfer(int(i,kind=c_intptr_t),C_NULL_PTR) + end function fstarpu_long_to_cptr + + ! Note: do not add binding declarations here in 'CONTAINS' + ! section, because the compiler generates empty functions for + ! them. + ! Instead, put binding declarations in the 'INTERFACE' section + ! above. + +end module fstarpu_mod diff --git a/mpi/examples/native_fortran/fstarpu_mpi_mod.f90 b/mpi/examples/native_fortran/fstarpu_mpi_mod.f90 new file mode 100644 index 0000000..b0a93fa --- /dev/null +++ b/mpi/examples/native_fortran/fstarpu_mpi_mod.f90 @@ -0,0 +1,776 @@ +! StarPU --- Runtime system for heterogeneous multicore architectures. +! +! Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +! +! StarPU is free software; you can redistribute it and/or modify +! it under the terms of the GNU Lesser General Public License as published by +! the Free Software Foundation; either version 2.1 of the License, or (at +! your option) any later version. +! +! StarPU is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of +! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +! +! See the GNU Lesser General Public License in COPYING.LGPL for more details. +! +module fstarpu_mpi_mod + use iso_c_binding + use fstarpu_mod + implicit none + + interface + ! == mpi/include/starpu_mpi.h == + ! int starpu_mpi_isend(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm); + function fstarpu_mpi_isend (dh, mpi_req, dst, data_tag, mpi_comm) bind(C) + use iso_c_binding + implicit none + integer(c_int) :: fstarpu_mpi_isend + type(c_ptr), value, intent(in) :: dh + type(c_ptr), value, intent(in) :: mpi_req + integer(c_int), value, intent(in) :: dst + integer(c_int64_t), value, intent(in) :: data_tag + integer(c_int), value, intent(in) :: mpi_comm + end function fstarpu_mpi_isend + + ! == mpi/include/starpu_mpi.h == + ! int starpu_mpi_isend_prio(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm); + function fstarpu_mpi_isend_prio (dh, mpi_req, dst, data_tag, prio, mpi_comm) bind(C) + use iso_c_binding + implicit none + integer(c_int) :: fstarpu_mpi_isend_prio + type(c_ptr), value, intent(in) :: dh + type(c_ptr), value, intent(in) :: mpi_req + integer(c_int), value, intent(in) :: dst + integer(c_int64_t), value, intent(in) :: data_tag + integer(c_int), value, intent(in) :: prio + integer(c_int), value, intent(in) :: mpi_comm + end function fstarpu_mpi_isend_prio + + ! int starpu_mpi_irecv(starpu_data_handle_t data_handle, starpu_mpi_req *req, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm); + function fstarpu_mpi_irecv (dh, mpi_req, src, data_tag, mpi_comm) bind(C) + use iso_c_binding + implicit none + integer(c_int) :: fstarpu_mpi_irecv + type(c_ptr), value, intent(in) :: dh + type(c_ptr), value, intent(in) :: mpi_req + integer(c_int), value, intent(in) :: src + integer(c_int64_t), value, intent(in) :: data_tag + integer(c_int), value, intent(in) :: mpi_comm + end function fstarpu_mpi_irecv + + ! int starpu_mpi_send(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm); + function fstarpu_mpi_send (dh, dst, data_tag, mpi_comm) bind(C) + use iso_c_binding + implicit none + integer(c_int) :: fstarpu_mpi_send + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: dst + integer(c_int64_t), value, intent(in) :: data_tag + integer(c_int), value, intent(in) :: mpi_comm + end function fstarpu_mpi_send + + ! int starpu_mpi_send_prio(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm); + function fstarpu_mpi_send_prio (dh, dst, data_tag, prio, mpi_comm) bind(C) + use iso_c_binding + implicit none + integer(c_int) :: fstarpu_mpi_send_prio + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: dst + integer(c_int64_t), value, intent(in) :: data_tag + integer(c_int), value, intent(in) :: prio + integer(c_int), value, intent(in) :: mpi_comm + end function fstarpu_mpi_send_prio + + ! int starpu_mpi_recv(starpu_data_handle_t data_handle, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm, MPI_Status *status); + function fstarpu_mpi_recv (dh, src, data_tag, mpi_comm, mpi_status) bind(C) + use iso_c_binding + implicit none + integer(c_int) :: fstarpu_mpi_recv + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: src + integer(c_int64_t), value, intent(in) :: data_tag + integer(c_int), value, intent(in) :: mpi_comm + type(c_ptr), value, intent(in) :: mpi_status + end function fstarpu_mpi_recv + + ! int starpu_mpi_isend_detached(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg); + function fstarpu_mpi_isend_detached (dh, dst, data_tag, mpi_comm, callback, arg) bind(C) + use iso_c_binding + implicit none + integer(c_int) :: fstarpu_mpi_isend_detached + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: dst + integer(c_int64_t), value, intent(in) :: data_tag + integer(c_int), value, intent(in) :: mpi_comm + type(c_funptr), value, intent(in) :: callback + type(c_ptr), value, intent(in) :: arg + end function fstarpu_mpi_isend_detached + + ! int starpu_mpi_isend_detached_prio(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm, void (*callback)(void *), void *arg); + function fstarpu_mpi_isend_detached_prio (dh, dst, data_tag, prio, mpi_comm, callback, arg) bind(C) + use iso_c_binding + implicit none + integer(c_int) :: fstarpu_mpi_isend_detached_prio + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: dst + integer(c_int64_t), value, intent(in) :: data_tag + integer(c_int), value, intent(in) :: prio + integer(c_int), value, intent(in) :: mpi_comm + type(c_funptr), value, intent(in) :: callback + type(c_ptr), value, intent(in) :: arg + end function fstarpu_mpi_isend_detached_prio + + ! int starpu_mpi_irecv_detached(starpu_data_handle_t data_handle, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg); + function fstarpu_mpi_recv_detached (dh, src, data_tag, mpi_comm, callback, arg) bind(C) + use iso_c_binding + implicit none + integer(c_int) :: fstarpu_mpi_recv_detached + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: src + integer(c_int64_t), value, intent(in) :: data_tag + integer(c_int), value, intent(in) :: mpi_comm + type(c_funptr), value, intent(in) :: callback + type(c_ptr), value, intent(in) :: arg + end function fstarpu_mpi_recv_detached + + ! int starpu_mpi_issend(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm); + function fstarpu_mpi_issend (dh, mpi_req, dst, data_tag, mpi_comm) bind(C) + use iso_c_binding + implicit none + integer(c_int) :: fstarpu_mpi_issend + type(c_ptr), value, intent(in) :: dh + type(c_ptr), value, intent(in) :: mpi_req + integer(c_int), value, intent(in) :: dst + integer(c_int64_t), value, intent(in) :: data_tag + integer(c_int), value, intent(in) :: mpi_comm + end function fstarpu_mpi_issend + + ! int starpu_mpi_issend_prio(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm); + function fstarpu_mpi_issend_prio (dh, mpi_req, dst, data_tag, prio, mpi_comm) bind(C) + use iso_c_binding + implicit none + integer(c_int) :: fstarpu_mpi_issend_prio + type(c_ptr), value, intent(in) :: dh + type(c_ptr), value, intent(in) :: mpi_req + integer(c_int), value, intent(in) :: dst + integer(c_int64_t), value, intent(in) :: data_tag + integer(c_int), value, intent(in) :: prio + integer(c_int), value, intent(in) :: mpi_comm + end function fstarpu_mpi_issend_prio + + ! int starpu_mpi_issend_detached(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg); + function fstarpu_mpi_issend_detached (dh, dst, data_tag, mpi_comm, callback, arg) bind(C) + use iso_c_binding + implicit none + integer(c_int) :: fstarpu_mpi_issend_detached + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: dst + integer(c_int64_t), value, intent(in) :: data_tag + integer(c_int), value, intent(in) :: mpi_comm + type(c_funptr), value, intent(in) :: callback + type(c_ptr), value, intent(in) :: arg + end function fstarpu_mpi_issend_detached + + ! int starpu_mpi_issend_detached_prio(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm, void (*callback)(void *), void *arg); + function fstarpu_mpi_issend_detached_prio (dh, dst, data_tag, prio, mpi_comm, callback, arg) bind(C) + use iso_c_binding + implicit none + integer(c_int) :: fstarpu_mpi_issend_detached_prio + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: dst + integer(c_int64_t), value, intent(in) :: data_tag + integer(c_int), value, intent(in) :: prio + integer(c_int), value, intent(in) :: mpi_comm + type(c_funptr), value, intent(in) :: callback + type(c_ptr), value, intent(in) :: arg + end function fstarpu_mpi_issend_detached_prio + + ! int starpu_mpi_wait(starpu_mpi_req *req, MPI_Status *status); + function fstarpu_mpi_wait(req,st) bind(C,name="starpu_mpi_wait") + use iso_c_binding + implicit none + integer(c_int) :: fstarpu_mpi_wait + type(c_ptr), value, intent(in) :: req + type(c_ptr), value, intent(in) :: st + end function fstarpu_mpi_wait + + ! int starpu_mpi_test(starpu_mpi_req *req, int *flag, MPI_Status *status); + function fstarpu_mpi_test(req,flag,st) bind(C,name="starpu_mpi_test") + use iso_c_binding + implicit none + integer(c_int) :: fstarpu_mpi_test + type(c_ptr), value, intent(in) :: req + type(c_ptr), value, intent(in) :: flag + type(c_ptr), value, intent(in) :: st + end function fstarpu_mpi_test + + ! int starpu_mpi_barrier(MPI_Comm comm); + function fstarpu_mpi_barrier (mpi_comm) bind(C) + use iso_c_binding + implicit none + integer(c_int) :: fstarpu_mpi_barrier + integer(c_int), value, intent(in) :: mpi_comm + end function fstarpu_mpi_barrier + + ! int starpu_mpi_irecv_detached_sequential_consistency(starpu_data_handle_t data_handle, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg, int sequential_consistency); + function fstarpu_mpi_recv_detached_sequential_consistency (dh, src, data_tag, mpi_comm, callback, arg, seq_const) & + bind(C) + use iso_c_binding + implicit none + integer(c_int) :: fstarpu_mpi_recv_detached_sequential_consistency + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: src + integer(c_int64_t), value, intent(in) :: data_tag + integer(c_int), value, intent(in) :: mpi_comm + type(c_funptr), value, intent(in) :: callback + type(c_ptr), value, intent(in) :: arg + integer(c_int), value, intent(in) :: seq_const + end function fstarpu_mpi_recv_detached_sequential_consistency + + + ! int starpu_mpi_init_comm(int *argc, char ***argv, int initialize_mpi, MPI_Comm comm); + ! -> cf fstarpu_mpi_init + ! int starpu_mpi_init(int *argc, char ***argv, int initialize_mpi); + ! -> cf fstarpu_mpi_init + ! int starpu_mpi_initialize(void) STARPU_DEPRECATED; + ! -> cf fstarpu_mpi_init + ! int starpu_mpi_initialize_extended(int *rank, int *world_size) STARPU_DEPRECATED; + ! -> cf fstarpu_mpi_init + + ! int starpu_mpi_shutdown(void); + function fstarpu_mpi_shutdown () bind(C,name="starpu_mpi_shutdown") + use iso_c_binding + implicit none + integer(c_int) :: fstarpu_mpi_shutdown + end function fstarpu_mpi_shutdown + + ! struct starpu_task *starpu_mpi_task_build(MPI_Comm comm, struct starpu_codelet *codelet, ...); + function fstarpu_mpi_task_build(arglist) bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr) :: fstarpu_mpi_task_build + type(c_ptr), dimension(*), intent(in) :: arglist + end function fstarpu_mpi_task_build + + ! int starpu_mpi_task_post_build(MPI_Comm comm, struct starpu_codelet *codelet, ...); + subroutine fstarpu_mpi_task_post_build(arglist) bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr), dimension(*), intent(in) :: arglist + end subroutine fstarpu_mpi_task_post_build + + ! int starpu_mpi_task_insert(MPI_Comm comm, struct starpu_codelet *codelet, ...); + subroutine fstarpu_mpi_task_insert(arglist) bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr), dimension(*), intent(in) :: arglist + end subroutine fstarpu_mpi_task_insert + subroutine fstarpu_mpi_insert_task(arglist) bind(C,name="fstarpu_mpi_task_insert") + use iso_c_binding, only: c_ptr + type(c_ptr), dimension(*), intent(in) :: arglist + end subroutine fstarpu_mpi_insert_task + + ! void starpu_mpi_get_data_on_node(MPI_Comm comm, starpu_data_handle_t data_handle, int node); + subroutine fstarpu_mpi_get_data_on_node(mpi_comm,dh,node) bind(C) + use iso_c_binding + implicit none + integer(c_int), value, intent(in) :: mpi_comm + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: node + end subroutine fstarpu_mpi_get_data_on_node + + ! void starpu_mpi_get_data_on_node_detached(MPI_Comm comm, starpu_data_handle_t data_handle, int node, void (*callback)(void*), void *arg); + subroutine fstarpu_mpi_get_data_on_node_detached(mpi_comm,dh,node,callback,arg) bind(C) + use iso_c_binding + implicit none + integer(c_int), value, intent(in) :: mpi_comm + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: node + type(c_funptr), value, intent(in) :: callback + type(c_ptr), value, intent(in) :: arg + end subroutine fstarpu_mpi_get_data_on_node_detached + + ! void starpu_mpi_redux_data(MPI_Comm comm, starpu_data_handle_t data_handle); + subroutine fstarpu_mpi_redux_data(mpi_comm,dh) bind(C) + use iso_c_binding + implicit none + integer(c_int), value, intent(in) :: mpi_comm + type(c_ptr), value, intent(in) :: dh + end subroutine fstarpu_mpi_redux_data + + ! void starpu_mpi_redux_data_prio(MPI_Comm comm, starpu_data_handle_t data_handle, int prio); + subroutine fstarpu_mpi_redux_data_prio(mpi_comm,dh, prio) bind(C) + use iso_c_binding + implicit none + integer(c_int), value, intent(in) :: mpi_comm + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: prio + end subroutine fstarpu_mpi_redux_data_prio + + ! void starpu_mpi_redux_data_tree(MPI_Comm comm, starpu_data_handle_t data_handle, int arity); + subroutine fstarpu_mpi_redux_data_tree(mpi_comm,dh, arity) bind(C) + use iso_c_binding + implicit none + integer(c_int), value, intent(in) :: mpi_comm + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: arity + end subroutine fstarpu_mpi_redux_data_tree + + ! void starpu_mpi_redux_data_prio_tree(MPI_Comm comm, starpu_data_handle_t data_handle, int prio, int arity); + subroutine fstarpu_mpi_redux_data_prio_tree(mpi_comm,dh, prio, arity) bind(C) + use iso_c_binding + implicit none + integer(c_int), value, intent(in) :: mpi_comm + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: prio + integer(c_int), value, intent(in) :: arity + end subroutine fstarpu_mpi_redux_data_prio_tree + + ! int starpu_mpi_scatter_detached(starpu_data_handle_t *data_handles, int count, int root, MPI_Comm comm, void (*scallback)(void *), void *sarg, void (*rcallback)(void *), void *rarg); + function fstarpu_mpi_scatter_detached (dhs, cnt, root, mpi_comm, scallback, sarg, rcallback, rarg) bind(C) + use iso_c_binding + implicit none + integer(c_int) :: fstarpu_mpi_scatter_detached + type(c_ptr), intent(in) :: dhs(*) + integer(c_int), value, intent(in) :: cnt + integer(c_int), value, intent(in) :: root + integer(c_int), value, intent(in) :: mpi_comm + type(c_funptr), value, intent(in) :: scallback + type(c_ptr), value, intent(in) :: sarg + type(c_funptr), value, intent(in) :: rcallback + type(c_ptr), value, intent(in) :: rarg + end function fstarpu_mpi_scatter_detached + + ! int starpu_mpi_gather_detached(starpu_data_handle_t *data_handles, int count, int root, MPI_Comm comm, void (*scallback)(void *), void *sarg, void (*rcallback)(void *), void *rarg); + function fstarpu_mpi_gather_detached (dhs, cnt, root, mpi_comm, scallback, sarg, rcallback, rarg) bind(C) + use iso_c_binding + implicit none + integer(c_int) :: fstarpu_mpi_gather_detached + type(c_ptr), intent(in) :: dhs(*) + integer(c_int), value, intent(in) :: cnt + integer(c_int), value, intent(in) :: root + integer(c_int), value, intent(in) :: mpi_comm + type(c_funptr), value, intent(in) :: scallback + type(c_ptr), value, intent(in) :: sarg + type(c_funptr), value, intent(in) :: rcallback + type(c_ptr), value, intent(in) :: rarg + end function fstarpu_mpi_gather_detached + + + ! int starpu_mpi_isend_detached_unlock_tag(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm, starpu_tag_t tag); + function fstarpu_mpi_isend_detached_unlock_tag (dh, dst, data_tag, mpi_comm, starpu_tag) bind(C) + use iso_c_binding + implicit none + integer(c_int) :: fstarpu_mpi_isend_detached_unlock_tag + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: dst + integer(c_int64_t), value, intent(in) :: data_tag + integer(c_int), value, intent(in) :: mpi_comm + type(c_ptr), value, intent(in) :: starpu_tag + end function fstarpu_mpi_isend_detached_unlock_tag + + ! int starpu_mpi_isend_detached_unlock_tag_prio(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm, starpu_tag_t tag); + function fstarpu_mpi_isend_detached_unlock_tag_prio (dh, dst, data_tag, prio, mpi_comm, starpu_tag) bind(C) + use iso_c_binding + implicit none + integer(c_int) :: fstarpu_mpi_isend_detached_unlock_tag_prio + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: dst + integer(c_int64_t), value, intent(in) :: data_tag + integer(c_int), value, intent(in) :: prio + integer(c_int), value, intent(in) :: mpi_comm + type(c_ptr), value, intent(in) :: starpu_tag + end function fstarpu_mpi_isend_detached_unlock_tag_prio + + ! int starpu_mpi_irecv_detached_unlock_tag(starpu_data_handle_t data_handle, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm, starpu_tag_t tag); + function fstarpu_mpi_recv_detached_unlock_tag (dh, src, data_tag, mpi_comm, starpu_tag) bind(C) + use iso_c_binding + implicit none + integer(c_int) :: fstarpu_mpi_recv_detached_unlock_tag + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: src + integer(c_int64_t), value, intent(in) :: data_tag + integer(c_int), value, intent(in) :: mpi_comm + type(c_ptr), value, intent(in) :: starpu_tag + end function fstarpu_mpi_recv_detached_unlock_tag + + ! int starpu_mpi_isend_array_detached_unlock_tag(unsigned array_size, starpu_data_handle_t *data_handle, int *dest, int *data_tag, MPI_Comm *comm, starpu_tag_t tag); + function fstarpu_mpi_isend_array_detached_unlock_tag (array_size, dhs, dsts, data_tags, mpi_comms, starpu_tag) & + bind(C) + use iso_c_binding + implicit none + integer(c_int) :: fstarpu_mpi_isend_array_detached_unlock_tag + integer(c_int), value, intent(in) :: array_size + type(c_ptr), intent(in) :: dhs(*) + integer(c_int), intent(in) :: dsts(*) + integer(c_int64_t), intent(in) :: data_tags(*) + integer(c_int), intent(in) :: mpi_comms(*) + type(c_ptr), value, intent(in) :: starpu_tag + end function fstarpu_mpi_isend_array_detached_unlock_tag + + ! int starpu_mpi_isend_array_detached_unlock_tag_prio(unsigned array_size, starpu_data_handle_t *data_handle, int *dest, int *data_tag, int *prio, MPI_Comm *comm, starpu_tag_t tag); + function fstarpu_mpi_isend_array_detached_unlock_tag_prio (array_size, dhs, dsts, data_tags, prio, mpi_comms, & + starpu_tag) bind(C) + use iso_c_binding + implicit none + integer(c_int) :: fstarpu_mpi_isend_array_detached_unlock_tag_prio + integer(c_int), value, intent(in) :: array_size + type(c_ptr), intent(in) :: dhs(*) + integer(c_int), intent(in) :: dsts(*) + integer(c_int64_t), intent(in) :: data_tags(*) + integer(c_int), intent(in) :: prio(*) + integer(c_int), intent(in) :: mpi_comms(*) + type(c_ptr), value, intent(in) :: starpu_tag + end function fstarpu_mpi_isend_array_detached_unlock_tag_prio + + ! int starpu_mpi_irecv_array_detached_unlock_tag(unsigned array_size, starpu_data_handle_t *data_handle, int *source, int *data_tag, MPI_Comm *comm, starpu_tag_t tag); + function fstarpu_mpi_recv_array_detached_unlock_tag (array_size, dhs, srcs, data_tags, mpi_comms, starpu_tag) & + bind(C) + use iso_c_binding + implicit none + integer(c_int) :: fstarpu_mpi_recv_array_detached_unlock_tag + integer(c_int), value, intent(in) :: array_size + type(c_ptr), intent(in) :: dhs(*) + integer(c_int), intent(in) :: srcs(*) + integer(c_int64_t), intent(in) :: data_tags(*) + integer(c_int), intent(in) :: mpi_comms(*) + type(c_ptr), value, intent(in) :: starpu_tag + end function fstarpu_mpi_recv_array_detached_unlock_tag + + ! void starpu_mpi_comm_stats_retrieve(size_t *comm_stats); + subroutine fstarpu_mpi_comm_stats_retrieve (comm_stats) bind(C,name="starpu_mpi_comm_stats_retrieve") + use iso_c_binding + implicit none + integer(c_size_t), intent(in) :: comm_stats(*) + end subroutine fstarpu_mpi_comm_stats_retrieve + + + ! void starpu_mpi_cache_flush(MPI_Comm comm, starpu_data_handle_t data_handle); + subroutine fstarpu_mpi_cache_flush(mpi_comm,dh) bind(C) + use iso_c_binding + implicit none + integer(c_int), value, intent(in) :: mpi_comm + type(c_ptr), value, intent(in) :: dh + end subroutine fstarpu_mpi_cache_flush + + ! void starpu_mpi_cache_flush_all_data(MPI_Comm comm); + subroutine fstarpu_mpi_cache_flush_all_data(mpi_comm) bind(C) + use iso_c_binding + implicit none + integer(c_int), value, intent(in) :: mpi_comm + end subroutine fstarpu_mpi_cache_flush_all_data + + ! int starpu_mpi_comm_size(MPI_Comm comm, int *size); + function fstarpu_mpi_comm_size(mpi_comm,sz) bind(C) + use iso_c_binding + implicit none + integer(c_int), value, intent(in) :: mpi_comm + integer(c_int), intent(out) :: sz + integer(c_int) :: fstarpu_mpi_comm_size + end function fstarpu_mpi_comm_size + + ! int starpu_mpi_comm_rank(MPI_Comm comm, int *rank); + function fstarpu_mpi_comm_rank(mpi_comm,rank) bind(C) + use iso_c_binding + implicit none + integer(c_int), value, intent(in) :: mpi_comm + integer(c_int), intent(out) :: rank + integer(c_int) :: fstarpu_mpi_comm_rank + end function fstarpu_mpi_comm_rank + + + ! int starpu_mpi_world_rank(void); + function fstarpu_mpi_world_rank() bind(C,name="starpu_mpi_world_rank") + use iso_c_binding + implicit none + integer(c_int) :: fstarpu_mpi_world_rank + end function fstarpu_mpi_world_rank + + ! int starpu_mpi_world_size(void); + function fstarpu_mpi_world_size() bind(C,name="starpu_mpi_world_size") + use iso_c_binding + implicit none + integer(c_int) :: fstarpu_mpi_world_size + end function fstarpu_mpi_world_size + + ! int starpu_mpi_world_size(void); + function fstarpu_mpi_world_comm() bind(C) + use iso_c_binding + implicit none + integer(c_int) :: fstarpu_mpi_world_comm + end function fstarpu_mpi_world_comm + + ! void starpu_mpi_comm_stats_enable() + subroutine fstarpu_mpi_comm_stats_enable() bind(C) + use iso_c_binding + implicit none + end subroutine fstarpu_mpi_comm_stats_enable + + ! void starpu_mpi_comm_stats_disable() + subroutine fstarpu_mpi_comm_stats_disable() bind(C) + use iso_c_binding + implicit none + end subroutine fstarpu_mpi_comm_stats_disable + + ! int starpu_mpi_get_communication_tag(void); + function fstarpu_mpi_get_communication_tag() bind(C,name="starpu_mpi_get_communication_tag") + use iso_c_binding + implicit none + integer(c_int) :: fstarpu_mpi_get_communication_tag + end function fstarpu_mpi_get_communication_tag + + ! void starpu_mpi_set_communication_tag(int tag); + subroutine fstarpu_mpi_set_communication_tag(tag) bind(C,name="starpu_mpi_set_communication_tag") + use iso_c_binding + implicit none + integer(c_int64_t), value, intent(in) :: tag + end subroutine fstarpu_mpi_set_communication_tag + + ! void starpu_mpi_data_register_comm(starpu_data_handle_t data_handle, int tag, int rank, MPI_Comm comm); + subroutine fstarpu_mpi_data_register_comm(dh,tag,rank,mpi_comm) bind(C) + use iso_c_binding + implicit none + type(c_ptr), value, intent(in) :: dh + integer(c_int64_t), value, intent(in) :: tag + integer(c_int), value, intent(in) :: rank + integer(c_int), value, intent(in) :: mpi_comm + end subroutine fstarpu_mpi_data_register_comm + + ! #define starpu_mpi_data_register(data_handle, tag, rank) starpu_mpi_data_register_comm(data_handle, tag, rank, MPI_COMM_WORLD) + subroutine fstarpu_mpi_data_register(dh,tag,rank) bind(C) + use iso_c_binding + implicit none + type(c_ptr), value, intent(in) :: dh + integer(c_int64_t), value, intent(in) :: tag + integer(c_int), value, intent(in) :: rank + end subroutine fstarpu_mpi_data_register + + ! void starpu_mpi_data_set_rank_comm(starpu_data_handle_t handle, int rank, MPI_Comm comm); + subroutine fstarpu_mpi_data_set_rank_comm(dh,rank,mpi_comm) bind(C) + use iso_c_binding + implicit none + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: rank + integer(c_int), value, intent(in) :: mpi_comm + end subroutine fstarpu_mpi_data_set_rank_comm + + ! #define starpu_mpi_data_set_rank(handle, rank) starpu_mpi_data_set_rank_comm(handle, rank, MPI_COMM_WORLD) + subroutine fstarpu_mpi_data_set_rank(dh,rank) bind(C) + use iso_c_binding + implicit none + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: rank + end subroutine fstarpu_mpi_data_set_rank + + ! void starpu_mpi_data_set_tag(starpu_data_handle_t handle, int tag); + subroutine fstarpu_mpi_data_set_tag(dh,tag) bind(C,name="starpu_mpi_data_set_tag") + use iso_c_binding + implicit none + type(c_ptr), value, intent(in) :: dh + integer(c_int64_t), value, intent(in) :: tag + end subroutine fstarpu_mpi_data_set_tag + + ! int starpu_mpi_data_get_rank(starpu_data_handle_t handle); + function fstarpu_mpi_data_get_rank(dh) bind(C,name="starpu_mpi_data_get_rank") + use iso_c_binding + implicit none + integer(c_int) :: fstarpu_mpi_data_get_rank + type(c_ptr), value, intent(in) :: dh + end function fstarpu_mpi_data_get_rank + + ! int starpu_mpi_data_get_tag(starpu_data_handle_t handle); + function fstarpu_mpi_data_get_tag(dh) bind(C,name="starpu_mpi_data_get_tag") + use iso_c_binding + implicit none + integer(c_int64_t) :: fstarpu_mpi_data_get_tag + type(c_ptr), value, intent(in) :: dh + end function fstarpu_mpi_data_get_tag + + ! void starpu_mpi_data_migrate(MPI_Comm comm, starpu_data_handle_t handle, int rank); + subroutine fstarpu_mpi_data_migrate(mpi_comm,dh,rank) bind(C) + use iso_c_binding + implicit none + integer(c_int), value, intent(in) :: mpi_comm + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: rank + end subroutine fstarpu_mpi_data_migrate + + ! #define STARPU_MPI_NODE_SELECTION_CURRENT_POLICY -1 + ! #define STARPU_MPI_NODE_SELECTION_MOST_R_DATA 0 + + ! int starpu_mpi_node_selection_register_policy(starpu_mpi_select_node_policy_func_t policy_func); + function fstarpu_mpi_node_selection_register_policy(policy_func) & + bind(C,name="starpu_mpi_node_selection_register_policy") + use iso_c_binding + implicit none + integer(c_int) :: fstarpu_mpi_node_selection_register_policy + type(c_funptr), value, intent(in) :: policy_func + end function fstarpu_mpi_node_selection_register_policy + + ! int starpu_mpi_node_selection_unregister_policy(int policy); + function fstarpu_mpi_node_selection_unregister_policy(policy) & + bind(C,name="starpu_mpi_node_selection_unregister_policy") + use iso_c_binding + implicit none + integer(c_int) :: fstarpu_mpi_node_selection_unregister_policy + type(c_ptr), value, intent(in) :: policy + end function fstarpu_mpi_node_selection_unregister_policy + + ! int starpu_mpi_node_selection_get_current_policy(); + function fstarpu_mpi_data_selection_get_current_policy() & + bind(C,name="starpu_mpi_data_selection_get_current_policy") + use iso_c_binding + implicit none + integer(c_int) :: fstarpu_mpi_data_selection_get_current_policy + end function fstarpu_mpi_data_selection_get_current_policy + + ! int starpu_mpi_node_selection_set_current_policy(int policy); + function fstarpu_mpi_data_selection_set_current_policy(policy) & + bind(C,name="starpu_mpi_data_selection_set_current_policy") + use iso_c_binding + implicit none + integer(c_int) :: fstarpu_mpi_data_selection_set_current_policy + type(c_ptr), value, intent(in) :: policy + end function fstarpu_mpi_data_selection_set_current_policy + + ! int starpu_mpi_cache_is_enabled(); + function fstarpu_mpi_cache_is_enabled() bind(C,name="starpu_mpi_cache_is_enabled") + use iso_c_binding + implicit none + integer(c_int) :: fstarpu_mpi_cache_is_enabled + end function fstarpu_mpi_cache_is_enabled + + ! int starpu_mpi_cache_set(int enabled); + function fstarpu_mpi_cache_set(enabled) bind(C,name="starpu_mpi_cache_set") + use iso_c_binding + implicit none + integer(c_int) :: fstarpu_mpi_cache_set + integer(c_int), value, intent(in) :: enabled + end function fstarpu_mpi_cache_set + + ! int starpu_mpi_wait_for_all(MPI_Comm comm); + function fstarpu_mpi_wait_for_all (mpi_comm) bind(C) + use iso_c_binding + implicit none + integer(c_int) :: fstarpu_mpi_wait_for_all + integer(c_int), value, intent(in) :: mpi_comm + end function fstarpu_mpi_wait_for_all + + ! int starpu_mpi_datatype_register(starpu_data_handle_t handle, starpu_mpi_datatype_allocate_func_t allocate_datatype_func, starpu_mpi_datatype_free_func_t free_datatype_func); + function fstarpu_mpi_datatype_register(dh, alloc_func, free_func) bind(C,name="starpu_mpi_datatype_register") + use iso_c_binding + implicit none + integer(c_int) :: fstarpu_mpi_datatype_register + type(c_ptr), value, intent(in) :: dh + type(c_funptr), value, intent(in) :: alloc_func + type(c_funptr), value, intent(in) :: free_func + end function fstarpu_mpi_datatype_register + + ! int starpu_mpi_datatype_unregister(starpu_data_handle_t handle); + function fstarpu_mpi_datatype_unregister(dh) bind(C,name="starpu_mpi_datatype_unregister") + use iso_c_binding + implicit none + integer(c_int) :: fstarpu_mpi_datatype_unregister + type(c_ptr), value, intent(in) :: dh + end function fstarpu_mpi_datatype_unregister + + + function fstarpu_mpi_req_alloc() bind(C) + use iso_c_binding + implicit none + type(c_ptr) :: fstarpu_mpi_req_alloc + end function fstarpu_mpi_req_alloc + + subroutine fstarpu_mpi_req_free(req) bind(C) + use iso_c_binding + implicit none + type(c_ptr),value,intent(in) :: req + end subroutine fstarpu_mpi_req_free + + function fstarpu_mpi_status_alloc() bind(C) + use iso_c_binding + implicit none + type(c_ptr) :: fstarpu_mpi_status_alloc + end function fstarpu_mpi_status_alloc + + subroutine fstarpu_mpi_status_free(st) bind(C) + use iso_c_binding + implicit none + type(c_ptr),value,intent(in) :: st + end subroutine fstarpu_mpi_status_free + + + + end interface + + contains + function fstarpu_mpi_init (initialize_mpi,mpi_comm) bind(C) + use iso_c_binding + implicit none + integer(c_int) :: fstarpu_mpi_init + integer(c_int), intent(in) :: initialize_mpi + integer(c_int), optional, intent(in) :: mpi_comm + type(c_ptr) :: argcv + integer(c_int) :: fargc,i,farg_len + character(len=1) :: farg_1 + character(len=:), allocatable :: farg + integer(c_int) :: mpi_comm_present, mpi_comm_or_0 + integer(c_int) :: ret + + interface + function fstarpu_mpi_argcv_alloc(argc, initialize_mpi, comm_present, comm) bind(C) + use iso_c_binding + implicit none + type(c_ptr) :: fstarpu_mpi_argcv_alloc + integer(c_int),value,intent(in) :: argc + integer(c_int),value,intent(in) :: initialize_mpi + integer(c_int),value,intent(in) :: comm_present + integer(c_int),value,intent(in) :: comm + end function fstarpu_mpi_argcv_alloc + + subroutine fstarpu_mpi_argcv_set_arg(argcv, i, l, s) bind(C) + use iso_c_binding + implicit none + type(c_ptr),value,intent(in) :: argcv + integer(c_int),value,intent(in) :: i + integer(c_int),value,intent(in) :: l + character(c_char),intent(in) :: s + end subroutine fstarpu_mpi_argcv_set_arg + + subroutine fstarpu_mpi_argcv_free(argcv) bind(C) + use iso_c_binding + implicit none + type(c_ptr),value,intent(in) :: argcv + end subroutine fstarpu_mpi_argcv_free + + function fstarpu_mpi_init_c(argcv) bind(C) + use iso_c_binding + implicit none + integer(c_int) :: fstarpu_mpi_init_c + type(c_ptr),value,intent(in) :: argcv + end function fstarpu_mpi_init_c + end interface + + fargc = command_argument_count() + !write(*,*) "fargc",fargc + + if (present(mpi_comm)) then + mpi_comm_present = 1 + mpi_comm_or_0 = mpi_comm + else + mpi_comm_present = 0 + mpi_comm_or_0 = 0 + end if + !write(*,*) "initialize_mpi",initialize_mpi + !write(*,*) "mpi_comm_present",mpi_comm_present + argcv = fstarpu_mpi_argcv_alloc(fargc, initialize_mpi, mpi_comm_present, mpi_comm_or_0) + do i=0,fargc-1 + call get_command_argument(i, farg_1, farg_len) + allocate (character(len=farg_len) :: farg) + call get_command_argument(i, farg) + call fstarpu_mpi_argcv_set_arg(argcv, i, farg_len, farg) + deallocate (farg) + end do + ret = fstarpu_mpi_init_c(argcv) + call fstarpu_mpi_argcv_free(argcv) + fstarpu_mpi_init = ret + end function fstarpu_mpi_init + +end module fstarpu_mpi_mod diff --git a/mpi/examples/native_fortran/nf_basic_ring.f90 b/mpi/examples/native_fortran/nf_basic_ring.f90 new file mode 100644 index 0000000..2fe6448 --- /dev/null +++ b/mpi/examples/native_fortran/nf_basic_ring.f90 @@ -0,0 +1,108 @@ +! StarPU --- Runtime system for heterogeneous multicore architectures. +! +! Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +! +! StarPU is free software; you can redistribute it and/or modify +! it under the terms of the GNU Lesser General Public License as published by +! the Free Software Foundation; either version 2.1 of the License, or (at +! your option) any later version. +! +! StarPU is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of +! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +! +! See the GNU Lesser General Public License in COPYING.LGPL for more details. +! +program nf_basic_ring + use iso_c_binding ! C interfacing module + use fstarpu_mod ! StarPU interfacing module + use fstarpu_mpi_mod ! StarPU-MPI interfacing module + implicit none + + integer(c_int) :: ncpu + integer(c_int) :: ret + integer(c_int) :: rank,sz + integer(c_int),target :: token = 42 + integer(c_int) :: nloops = 32 + integer(c_int) :: loop + integer(c_int64_t) :: tag + integer(c_int) :: world + integer(c_int) :: src,dst + type(c_ptr) :: token_dh, st + + ret = fstarpu_init(C_NULL_PTR) + if (ret == -19) then + stop 77 + else if (ret /= 0) then + stop 1 + end if + + ret = fstarpu_mpi_init(1) + print *,"fstarpu_mpi_init status:", ret + if (ret /= 0) then + stop 1 + end if + + ! stop there if no CPU worker available + ncpu = fstarpu_cpu_worker_get_count() + if (ncpu == 0) then + call fstarpu_shutdown() + ret = fstarpu_mpi_shutdown() + stop 77 + end if + + world = fstarpu_mpi_world_comm() + rank = fstarpu_mpi_world_rank() + sz = fstarpu_mpi_world_size() + write(*,*) "rank=", rank,"size=",sz,"world=",world + if (sz < 2) then + call fstarpu_shutdown() + ret = fstarpu_mpi_shutdown() + stop 77 + end if + + call fstarpu_variable_data_register(token_dh, 0, c_loc(token), c_sizeof(token)) + + st = fstarpu_mpi_status_alloc() + do loop=1,nloops + tag = loop*sz+rank + token = 0 + if (loop == 1.and.rank == 0) then + write(*,*) "rank=", rank,"token=",token + else + src = modulo((rank+sz-1),sz) + write(*,*) "rank=", rank,"recv--> src =", src, "tag =", tag + ret = fstarpu_mpi_recv(token_dh, src, tag, world, st) + if (ret /= 0) then + write(*,*) "fstarpu_mpi_recv failed" + stop 1 + end if + write(*,*) "rank=", rank,"recv<--","token=",token + token = token+1 + end if + if (loop == nloops.and.rank == (sz-1)) then + call fstarpu_data_acquire(token_dh, FSTARPU_R) + write(*,*) "finished: rank=", rank,"token=",token + call fstarpu_data_release(token_dh) + else + dst = modulo((rank+1),sz) + write(*,*) "rank=", rank,"send--> dst =", dst, "tag =", tag+1 + ret = fstarpu_mpi_send(token_dh, dst, tag+1, world) + if (ret /= 0) then + write(*,*) "fstarpu_mpi_recv failed" + stop 1 + end if + write(*,*) "rank=", rank,"send<--" + end if + end do + call fstarpu_mpi_status_free(st) + call fstarpu_data_unregister(token_dh) + call fstarpu_shutdown() + + ret = fstarpu_mpi_shutdown() + print *,"fstarpu_mpi_shutdown status:", ret + if (ret /= 0) then + stop 1 + end if +end program nf_basic_ring + diff --git a/mpi/examples/native_fortran/nf_mm.f90 b/mpi/examples/native_fortran/nf_mm.f90 new file mode 100644 index 0000000..6ada963 --- /dev/null +++ b/mpi/examples/native_fortran/nf_mm.f90 @@ -0,0 +1,238 @@ +! StarPU --- Runtime system for heterogeneous multicore architectures. +! +! Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +! +! StarPU is free software; you can redistribute it and/or modify +! it under the terms of the GNU Lesser General Public License as published by +! the Free Software Foundation; either version 2.1 of the License, or (at +! your option) any later version. +! +! StarPU is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of +! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +! +! See the GNU Lesser General Public License in COPYING.LGPL for more details. +! +program nf_mm + use iso_c_binding ! C interfacing module + use fstarpu_mod ! StarPU interfacing module + use fstarpu_mpi_mod ! StarPU-MPI interfacing module + use nf_mm_cl + implicit none + + logical, parameter :: verbose = .false. + integer(c_int) :: comm_size, comm_rank + integer(c_int), target :: comm_world + integer(c_int) :: N = 16, BS = 4, NB + real(kind=c_double),allocatable,target :: A(:,:), B(:,:), C(:,:) + type(c_ptr),allocatable :: dh_A(:), dh_B(:), dh_C(:,:) + type(c_ptr) :: cl_mm + integer(c_int) :: ncpu + integer(c_int) :: ret + integer(c_int) :: row, col + integer(c_int) :: b_row, b_col + integer(c_int) :: mr, rank + integer(c_int64_t) :: tag + + ret = fstarpu_init(C_NULL_PTR) + if (ret == -19) then + stop 77 + else if (ret /= 0) then + stop 1 + end if + + ret = fstarpu_mpi_init(1) + print *,"fstarpu_mpi_init status:", ret + if (ret /= 0) then + stop 1 + end if + + ! stop there if no CPU worker available + ncpu = fstarpu_cpu_worker_get_count() + if (ncpu == 0) then + call fstarpu_shutdown() + stop 77 + end if + + comm_world = fstarpu_mpi_world_comm() + comm_size = fstarpu_mpi_world_size() + comm_rank = fstarpu_mpi_world_rank() + + if (comm_size < 2) then + call fstarpu_shutdown() + ret = fstarpu_mpi_shutdown() + stop 77 + end if + + ! TODO: process app's argc/argv + NB = N/BS + + ! allocate and initialize codelet + cl_mm = fstarpu_codelet_allocate() + call fstarpu_codelet_set_name(cl_mm, c_char_"nf_mm_cl"//c_null_char) + call fstarpu_codelet_add_cpu_func(cl_mm, C_FUNLOC(cl_cpu_mult)) + call fstarpu_codelet_add_buffer(cl_mm, FSTARPU_R) + call fstarpu_codelet_add_buffer(cl_mm, FSTARPU_R) + call fstarpu_codelet_add_buffer(cl_mm, FSTARPU_RW) + + ! allocate matrices + if (comm_rank == 0) then + allocate(A(N,N)) + allocate(B(N,N)) + allocate(C(N,N)) + end if + + ! init matrices + if (comm_rank == 0) then + do col=1,N + do row=1,N + if (row == col) then + A(row,col) = 2 + else + A(row,col) = 0 + end if + B(row,col) = row*N+col + C(row,col) = 0 + end do + end do + + if (verbose) then + print *,"A" + call mat_disp(A) + print *,"B" + call mat_disp(B) + print *,"C" + call mat_disp(C) + end if + end if + + ! allocate data handles + allocate(dh_A(NB)) + allocate(dh_B(NB)) + allocate(dh_C(NB,NB)) + + ! register matrices + if (comm_rank == 0) then + mr = 0 ! TODO: use STARPU_MAIN_RAM constant + else + mr = -1 + end if + tag = 0 + + do b_row=1,NB + if (comm_rank == 0) then + call fstarpu_matrix_data_register(dh_A(b_row), mr, & + c_loc( A(1+(b_row-1)*BS,1) ), N, BS, N, c_sizeof(A(1,1))) + else + call fstarpu_matrix_data_register(dh_A(b_row), mr, & + c_null_ptr, N, BS, N, c_sizeof(A(1,1))) + end if + call fstarpu_mpi_data_register(dh_A(b_row), tag, 0) + tag = tag+1 + end do + + do b_col=1,NB + if (comm_rank == 0) then + call fstarpu_matrix_data_register(dh_B(b_col), mr, & + c_loc( B(1,1+(b_col-1)*BS) ), N, N, BS, c_sizeof(B(1,1))) + else + call fstarpu_matrix_data_register(dh_B(b_col), mr, & + c_null_ptr, N, N, BS, c_sizeof(B(1,1))) + end if + call fstarpu_mpi_data_register(dh_B(b_col), tag, 0) + tag = tag+1 + end do + + do b_col=1,NB + do b_row=1,NB + if (comm_rank == 0) then + call fstarpu_matrix_data_register(dh_C(b_row,b_col), mr, & + c_loc( C(1+(b_row-1)*BS,1+(b_col-1)*BS) ), N, BS, BS, c_sizeof(C(1,1))) + else + call fstarpu_matrix_data_register(dh_C(b_row,b_col), mr, & + c_null_ptr, N, BS, BS, c_sizeof(C(1,1))) + end if + call fstarpu_mpi_data_register(dh_C(b_row,b_col), tag, 0) + tag = tag+1 + end do + end do + + ! distribute matrix C + do b_col=1,NB + do b_row=1,NB + rank = modulo(b_row+b_col, comm_size) + call fstarpu_mpi_data_migrate(comm_world, dh_c(b_row,b_col), rank) + end do + end do + + do b_col=1,NB + do b_row=1,NB + call fstarpu_mpi_task_insert((/ c_loc(comm_world), cl_mm, & + FSTARPU_R, dh_A(b_row), & + FSTARPU_R, dh_B(b_col), & + FSTARPU_RW, dh_C(b_row,b_col), & + C_NULL_PTR /)) + end do + end do + + call fstarpu_task_wait_for_all() + + ! undistribute matrix C + do b_col=1,NB + do b_row=1,NB + call fstarpu_mpi_data_migrate(comm_world, dh_c(b_row,b_col), 0) + end do + end do + + ! unregister matrices + do b_row=1,NB + call fstarpu_data_unregister(dh_A(b_row)) + end do + + do b_col=1,NB + call fstarpu_data_unregister(dh_B(b_col)) + end do + + do b_col=1,NB + do b_row=1,NB + call fstarpu_data_unregister(dh_C(b_row,b_col)) + end do + end do + + ! check result + if (comm_rank == 0) then + if (verbose) then + print *,"final C" + call mat_disp(C) + end if + + do col=1,N + do row=1,N + if (abs(C(row,col) - 2*(row*N+col)) > 1.0) then + print *, "check failed" + stop 1 + end if + end do + end do + end if + + ! free handles + deallocate(dh_A) + deallocate(dh_B) + deallocate(dh_C) + + ! free matrices + if (comm_rank == 0) then + deallocate(A) + deallocate(B) + deallocate(C) + end if + call fstarpu_codelet_free(cl_mm) + call fstarpu_shutdown() + + ret = fstarpu_mpi_shutdown() + print *,"fstarpu_mpi_shutdown status:", ret + if (ret /= 0) then + stop 1 + end if +end program nf_mm diff --git a/mpi/examples/native_fortran/nf_mm_2dbc.f90 b/mpi/examples/native_fortran/nf_mm_2dbc.f90 new file mode 100644 index 0000000..0de69c6 --- /dev/null +++ b/mpi/examples/native_fortran/nf_mm_2dbc.f90 @@ -0,0 +1,310 @@ +! StarPU --- Runtime system for heterogeneous multicore architectures. +! +! Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +! +! StarPU is free software; you can redistribute it and/or modify +! it under the terms of the GNU Lesser General Public License as published by +! the Free Software Foundation; either version 2.1 of the License, or (at +! your option) any later version. +! +! StarPU is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of +! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +! +! See the GNU Lesser General Public License in COPYING.LGPL for more details. +! +program nf_mm_2dbc + use iso_c_binding ! C interfacing module + use fstarpu_mod ! StarPU interfacing module + use fstarpu_mpi_mod ! StarPU-MPI interfacing module + use nf_mm_cl + use nf_mm_cl_blas + implicit none + + type block_type + real(kind=c_double), allocatable :: c(:,:) + type(c_ptr) :: h + integer :: owner + end type block_type + + type dsmat_type + integer :: m, n, b + type(block_type), allocatable :: blocks(:,:) + end type dsmat_type + + + logical, parameter :: verbose = .false. + logical :: trace = .false. + integer(c_int) :: comm_size, comm_rank + integer(c_int), target :: comm_world + + integer :: bs + integer(c_int) :: m, mb + integer(c_int) :: n, nb + integer(c_int) :: k, kb + character(len=20) :: str + + type(dsmat_type),target :: A, B, C + real(kind=c_double), target :: alpha, beta, zbeta + type(c_ptr) :: cl_mm, cl_fill + integer(c_int) :: ncpu + integer(c_int) :: ret + integer :: i, j, l, p , q, trial, t + integer :: te, ts, tr + real :: tf, gflops + + ret = fstarpu_init(C_NULL_PTR) + if (ret == -19) then + stop 77 + else if (ret /= 0) then + stop 1 + end if + + ret = fstarpu_mpi_init(1) + if (ret /= 0) then + write(*,'("fstarpu_mpi_init status:",i4)') ret + stop 1 + end if + + ! stop there if no CPU worker available + ncpu = fstarpu_cpu_worker_get_count() + if (ncpu == 0) then + call fstarpu_shutdown() + stop 77 + end if + + comm_world = fstarpu_mpi_world_comm() + comm_size = fstarpu_mpi_world_size() + comm_rank = fstarpu_mpi_world_rank() + + if (comm_size < 2) then + call fstarpu_shutdown() + ret = fstarpu_mpi_shutdown() + stop 77 + end if + + if (command_argument_count() >= 1) then + call get_command_argument(1, value=str, length=i) + read(str(1:i),*) m + else + m = 10 + end if + if (command_argument_count() >= 2) then + call get_command_argument(2, value=str, length=i) + read(str(1:i),*) n + else + n = 10 + end if + if (command_argument_count() >= 3) then + call get_command_argument(3, value=str, length=i) + read(str(1:i),*) k + else + k = 10 + end if + if (command_argument_count() >= 4) then + call get_command_argument(4, value=str, length=i) + read(str(1:i),*) bs + else + bs = 1 + end if + if (command_argument_count() >= 5) then + call get_command_argument(5, value=str, length=i) + read(str(1:i),*) p + else + p = 1 + end if + if (command_argument_count() >= 6) then + call get_command_argument(6, value=str, length=i) + read(str(1:i),*) q + else + q = 1 + end if + if (command_argument_count() >= 8) then + call get_command_argument(7, value=str, length=i) + read(str(1:i),*) t + else + t = 1 + end if + if (command_argument_count() == 8) then + trace = .true. + end if + + if (mod(m,bs).ne.0) stop 75 + if (mod(n,bs).ne.0) stop 75 + if (mod(k,bs).ne.0) stop 75 + mb = m/bs + nb = n/bs + kb = k/bs + if (comm_rank.eq.0) then + write(*,'("========================================")') + write(*,'("mxnxk = ",i5,"x",i5,"x",i5)') m, n, k + write(*,'("mbxnbxkb = ",i5,"x",i5,"x",i5)') mb, nb, kb + write(*,'("B = ",i5)') bs + write(*,'("PxQ = ",i3,"x",i3)') p,q + write(*,'("trace = ",l)') trace + write(*,'("========================================")') + end if + ret = fstarpu_mpi_barrier(comm_world) + + ! initialize codelets + call initialize_codelets() + alpha = 0.42 + beta = 3.14 + + do trial=1,t + ! allocate matrices + call initialize_matrix(a,mb,kb,"A") + call initialize_matrix(b,kb,nb,"B") + call initialize_matrix(c,mb,nb,"C") + ret = fstarpu_mpi_barrier(comm_world) + + call fill_matrix(A, mb,kb,"A") + ret = fstarpu_mpi_wait_for_all(comm_world) + ret = fstarpu_mpi_barrier(comm_world) + + call fill_matrix(B, kb,nb,"B") + ret = fstarpu_mpi_wait_for_all(comm_world) + ret = fstarpu_mpi_barrier(comm_world) + + call fill_matrix(C, mb,nb,"C") + ret = fstarpu_mpi_wait_for_all(comm_world) + ret = fstarpu_mpi_barrier(comm_world) + + call system_clock(ts) + ! submit matrix multiplication + do i=1,mb + do j=1,nb + do l=1,kb + ! if (comm_rank.eq.0) write(*,*) "GEMM", b_col,b_row,b_aisle + if (l.eq.1) then; zbeta = beta; else; zbeta = 1.0d0; end if + call fstarpu_mpi_task_insert((/ c_loc(comm_world), cl_mm, & + FSTARPU_VALUE, c_loc(alpha), FSTARPU_SZ_REAL8, & + FSTARPU_VALUE, c_loc(zbeta), FSTARPU_SZ_REAL8, & + FSTARPU_R, A%blocks(i,l)%h, & + FSTARPU_R, B%blocks(l,j)%h, & + FSTARPU_RW, C%blocks(i,j)%h, & + c_null_ptr /)) + end do + end do + end do + + ret = fstarpu_mpi_wait_for_all(comm_world) + ret = fstarpu_mpi_barrier(comm_world) + call system_clock(te,tr) + tf = max(real(te-ts)/real(tr),1e-20) + gflops = 2.0*m*n*k/(tf*10**9) + if (comm_rank.eq.0) write(*,'("RANK ",i3," -> took ",e15.8," s | ", e15.8,"Gflop/s")') & + comm_rank, tf, gflops + + ! unregister matrices + call unregister_matrix(A,mb,kb) + call unregister_matrix(B,kb,nb) + call unregister_matrix(C,mb,nb) + end do + + + call fstarpu_codelet_free(cl_mm) + call fstarpu_codelet_free(cl_fill) + call fstarpu_shutdown() + + ret = fstarpu_mpi_shutdown() + if (ret /= 0) then + write(*,'("fstarpu_mpi_shutdown status:",i4)') ret + stop 1 + end if + +contains + + subroutine initialize_codelets() + implicit none + cl_mm = fstarpu_codelet_allocate() + call fstarpu_codelet_set_name(cl_mm, c_char_"nf_gemm_cl"//c_null_char) + call fstarpu_codelet_add_cpu_func(cl_mm, C_FUNLOC(cl_cpu_gemm)) + call fstarpu_codelet_add_buffer(cl_mm, FSTARPU_R) + call fstarpu_codelet_add_buffer(cl_mm, FSTARPU_R) + call fstarpu_codelet_add_buffer(cl_mm, FSTARPU_RW) + cl_fill = fstarpu_codelet_allocate() + call fstarpu_codelet_set_name(cl_fill, c_char_"nf_fill_cl"//c_null_char) + call fstarpu_codelet_add_cpu_func(cl_fill, C_FUNLOC(cl_cpu_fill)) + call fstarpu_codelet_add_buffer(cl_fill, FSTARPU_W) + end subroutine initialize_codelets + + subroutine initialize_matrix(X,mb,nb,cname) + implicit none + type(dsmat_type), target :: x + integer :: mb, nb + character :: cname + + integer :: i, j + type(block_type), pointer :: xij + integer(c_int64_t), save :: tag = 1 + + x%m = mb*bs + x%n = nb*bs + x%b = bs + allocate(x%blocks(mb,nb)) + do i=1,mb + do j=1,nb + xij => x%blocks(i,j) + xij%owner = mod(i-1,p)*q + mod(j-1,q) + if (comm_rank.eq.xij%owner) then + ! write(*,*) comm_rank,"] I own ",cname,"_",i,j,"so I register it with tag",tag + allocate(xij%c(bs,bs)) + call fstarpu_matrix_data_register( xij%h, 0, c_loc( xij%c(1,1) ), & + bs, bs, bs, c_sizeof(xij%c(1,1)) ) + else + ! write(*,*) comm_rank,"] ",xij%owner," owns ",cname,"_",i,j,"so it registers it with tag",tag + call fstarpu_matrix_data_register( xij%h, -1, c_null_ptr, & + bs, bs, bs, c_sizeof(alpha) ) + end if + call fstarpu_mpi_data_register(xij%h, tag, xij%owner) + tag = tag + 1 + end do + end do + end subroutine initialize_matrix + + subroutine fill_matrix(x,mb,nb,cname) + implicit none + type(dsmat_type), target :: x + integer :: mb, nb + character :: cname + + integer :: i, j + type(block_type), pointer :: xij + + do i=1,mb + do j=1,nb + xij => x%blocks(i,j) + if (comm_rank.eq.xij%owner) then + ! write(*,*) comm_rank,"] I own ",cname,"_",i,j,"so I fill it" + call fstarpu_mpi_task_insert((/ c_loc(comm_world), cl_fill, & + FSTARPU_W, xij%h, & + c_null_ptr /)) + else + !write(*,*) comm_rank,"] ",xij%owner,"owns ",cname,"_",i,j,"so it fills it" + end if + end do + end do + end subroutine fill_matrix + + subroutine unregister_matrix(x,mb,nb) + implicit none + integer :: mb, nb + type(block_type), pointer :: xij + type(dsmat_type), target :: x + + integer :: i, j + + do i=1,mb + do j=1,nb + xij => x%blocks(i,j) + call fstarpu_data_unregister(xij%h) + if (comm_rank.eq.xij%owner) then + deallocate(xij%c) + end if + end do + end do + deallocate(x%blocks) + end subroutine unregister_matrix + +end program diff --git a/mpi/examples/native_fortran/nf_mm_cl.f90 b/mpi/examples/native_fortran/nf_mm_cl.f90 new file mode 100644 index 0000000..aae5d7b --- /dev/null +++ b/mpi/examples/native_fortran/nf_mm_cl.f90 @@ -0,0 +1,90 @@ +! StarPU --- Runtime system for heterogeneous multicore architectures. +! +! Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +! +! StarPU is free software; you can redistribute it and/or modify +! it under the terms of the GNU Lesser General Public License as published by +! the Free Software Foundation; either version 2.1 of the License, or (at +! your option) any later version. +! +! StarPU is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of +! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +! +! See the GNU Lesser General Public License in COPYING.LGPL for more details. +! +module nf_mm_cl +contains +subroutine mat_disp (m) + ! declared here so it can be used both for the + ! program and for debugging codelet routines + + use iso_c_binding ! C interfacing module + implicit none + real(kind=c_double) :: m(:,:) + integer i,j + + do i=lbound(m,1),ubound(m,1) + write(*, fmt="(A2) ",advance="no") "| " + do j=lbound(m,2),ubound(m,2) + write(*, fmt="(F6.1,A1) ", advance="no") m(i,j)," " + end do + write(*,*) "|" + end do + write(*,*) + +end subroutine + +recursive subroutine cl_cpu_mult (buffers, cl_args) bind(C) + use iso_c_binding ! C interfacing module + use fstarpu_mod ! StarPU interfacing module + implicit none + + type(c_ptr), value, intent(in) :: buffers, cl_args ! cl_args is unused + real(kind=c_double),pointer :: A(:,:), B(:,:), C(:,:) + integer :: ld_A,nx_A,ny_A + integer :: ld_B,nx_B,ny_B + integer :: ld_C,nx_C,ny_C + integer :: i,j,k + + ld_A = fstarpu_matrix_get_ld(buffers, 0) + ld_B = fstarpu_matrix_get_ld(buffers, 1) + ld_C = fstarpu_matrix_get_ld(buffers, 2) + + nx_A = fstarpu_matrix_get_nx(buffers, 0) + nx_B = fstarpu_matrix_get_nx(buffers, 1) + nx_C = fstarpu_matrix_get_nx(buffers, 2) + + ny_A = fstarpu_matrix_get_ny(buffers, 0) + ny_B = fstarpu_matrix_get_ny(buffers, 1) + ny_C = fstarpu_matrix_get_ny(buffers, 2) + + if (ny_C /= ny_B) then + write(*,*) "C -- B column mismatch" + stop 1 + end if + + if (nx_C /= nx_A) then + write(*,*) "C -- A row mismatch" + stop 1 + end if + + if (ny_A /= nx_B) then + write(*,*) "A -- B col/row mismatch" + stop 1 + end if + + call c_f_pointer(fstarpu_matrix_get_ptr(buffers, 0), A, shape=[ld_A,ny_A]) + call c_f_pointer(fstarpu_matrix_get_ptr(buffers, 1), B, shape=[ld_B,ny_B]) + call c_f_pointer(fstarpu_matrix_get_ptr(buffers, 2), C, shape=[ld_C,ny_C]) + + do k = 1, ny_C + do j = 1, nx_C + do i = 1, nx_B + C(j,k) = C(j,k) + A(j,i) * B(i,k) + end do + end do + end do + +end subroutine cl_cpu_mult +end module nf_mm_cl diff --git a/mpi/examples/native_fortran/nf_mm_cl_blas.f90 b/mpi/examples/native_fortran/nf_mm_cl_blas.f90 new file mode 100644 index 0000000..c52f03d --- /dev/null +++ b/mpi/examples/native_fortran/nf_mm_cl_blas.f90 @@ -0,0 +1,91 @@ +! StarPU --- Runtime system for heterogeneous multicore architectures. +! +! Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +! +! StarPU is free software; you can redistribute it and/or modify +! it under the terms of the GNU Lesser General Public License as published by +! the Free Software Foundation; either version 2.1 of the License, or (at +! your option) any later version. +! +! StarPU is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of +! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +! +! See the GNU Lesser General Public License in COPYING.LGPL for more details. +! +module nf_mm_cl_blas +contains + +recursive subroutine cl_cpu_gemm (buffers, cl_args) bind(C) + use iso_c_binding ! C interfacing module + use fstarpu_mod ! StarPU interfacing module + implicit none + + type(c_ptr), value, intent(in) :: buffers, cl_args ! cl_args is unused + real, target :: alpha, beta + real(kind=c_double),pointer :: A(:,:), B(:,:), C(:,:) + integer :: ld_A,nx_A,ny_A + integer :: ld_B,nx_B,ny_B + integer :: ld_C,nx_C,ny_C + integer :: i,j,k + + write(*,*) "gemm task" + call fstarpu_unpack_arg( cl_args, (/ c_loc(alpha), c_loc(beta) /)) + + ld_A = fstarpu_matrix_get_ld(buffers, 0) + ld_B = fstarpu_matrix_get_ld(buffers, 1) + ld_C = fstarpu_matrix_get_ld(buffers, 2) + + nx_A = fstarpu_matrix_get_nx(buffers, 0) + nx_B = fstarpu_matrix_get_nx(buffers, 1) + nx_C = fstarpu_matrix_get_nx(buffers, 2) + + ny_A = fstarpu_matrix_get_ny(buffers, 0) + ny_B = fstarpu_matrix_get_ny(buffers, 1) + ny_C = fstarpu_matrix_get_ny(buffers, 2) + + call c_f_pointer(fstarpu_matrix_get_ptr(buffers, 0), A, shape=[ld_A,ny_A]) + call c_f_pointer(fstarpu_matrix_get_ptr(buffers, 1), B, shape=[ld_B,ny_B]) + call c_f_pointer(fstarpu_matrix_get_ptr(buffers, 2), C, shape=[ld_C,ny_C]) + call dgemm('n','n',nx_C,ny_C,nx_B, alpha, A(1,1), ld_A, B(1,1), ld_B, & + beta, C(1,1), ld_C) + write(*,*) "end gemm task" + return + +end subroutine cl_cpu_gemm + +recursive subroutine cl_cpu_fill (buffers, cl_args) bind(C) + use iso_c_binding ! C interfacing module + use fstarpu_mod ! StarPU interfacing module + use fstarpu_mpi_mod + implicit none + + type(c_ptr), value, intent(in) :: cl_args + type(c_ptr), value, intent(in) :: buffers + + real(kind=c_double), pointer :: x(:,:) + integer :: m, n, ld + integer :: j + integer :: iseed(4) = (/1,1,1,1/) + + integer :: comm_rank + + comm_rank = fstarpu_mpi_world_rank() + + m = fstarpu_matrix_get_nx(buffers, 0) + n = fstarpu_matrix_get_ny(buffers, 0) + ld = fstarpu_matrix_get_ld(buffers, 0) + write(*,*) comm_rank,"] fill", m, n, ld + + call c_f_pointer(fstarpu_matrix_get_ptr(buffers, 0), x, shape=(/ld,n/)) + + ! copied from qrm_dsmat_fill_task a few lines up + do j=1,n + call dlarnv(2, iseed(1), m, x(1, j)) + end do + write(*,*) comm_rank,"]end fill task" + return + +end subroutine cl_cpu_fill + +end module nf_mm_cl_blas diff --git a/mpi/examples/native_fortran/nf_mm_task_build.f90 b/mpi/examples/native_fortran/nf_mm_task_build.f90 new file mode 100644 index 0000000..248d401 --- /dev/null +++ b/mpi/examples/native_fortran/nf_mm_task_build.f90 @@ -0,0 +1,247 @@ +! StarPU --- Runtime system for heterogeneous multicore architectures. +! +! Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +! +! StarPU is free software; you can redistribute it and/or modify +! it under the terms of the GNU Lesser General Public License as published by +! the Free Software Foundation; either version 2.1 of the License, or (at +! your option) any later version. +! +! StarPU is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of +! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +! +! See the GNU Lesser General Public License in COPYING.LGPL for more details. +! +program nf_mm + use iso_c_binding ! C interfacing module + use fstarpu_mod ! StarPU interfacing module + use fstarpu_mpi_mod ! StarPU-MPI interfacing module + use nf_mm_cl + implicit none + + logical, parameter :: verbose = .false. + integer(c_int) :: comm_size, comm_rank + integer(c_int), target :: comm_world + integer(c_int) :: N = 16, BS = 4, NB + real(kind=c_double),allocatable,target :: A(:,:), B(:,:), C(:,:) + type(c_ptr),allocatable :: dh_A(:), dh_B(:), dh_C(:,:) + type(c_ptr) :: cl_mm + type(c_ptr) :: task + integer(c_int) :: ncpu + integer(c_int) :: ret + integer(c_int) :: row, col + integer(c_int) :: b_row, b_col + integer(c_int) :: mr, rank + integer(c_int64_t) :: tag + + ret = fstarpu_init(C_NULL_PTR) + if (ret == -19) then + stop 77 + else if (ret /= 0) then + stop 1 + end if + + ret = fstarpu_mpi_init(1) + print *,"fstarpu_mpi_init status:", ret + if (ret /= 0) then + stop 1 + end if + + ! stop there if no CPU worker available + ncpu = fstarpu_cpu_worker_get_count() + if (ncpu == 0) then + call fstarpu_shutdown() + stop 77 + end if + + comm_world = fstarpu_mpi_world_comm() + comm_size = fstarpu_mpi_world_size() + comm_rank = fstarpu_mpi_world_rank() + + if (comm_size < 2) then + call fstarpu_shutdown() + ret = fstarpu_mpi_shutdown() + stop 77 + end if + + ! TODO: process app's argc/argv + NB = N/BS + + ! allocate and initialize codelet + cl_mm = fstarpu_codelet_allocate() + call fstarpu_codelet_set_name(cl_mm, c_char_"nf_mm_cl"//c_null_char) + call fstarpu_codelet_add_cpu_func(cl_mm, C_FUNLOC(cl_cpu_mult)) + call fstarpu_codelet_add_buffer(cl_mm, FSTARPU_R) + call fstarpu_codelet_add_buffer(cl_mm, FSTARPU_R) + call fstarpu_codelet_add_buffer(cl_mm, FSTARPU_RW) + + ! allocate matrices + if (comm_rank == 0) then + allocate(A(N,N)) + allocate(B(N,N)) + allocate(C(N,N)) + end if + + ! init matrices + if (comm_rank == 0) then + do col=1,N + do row=1,N + if (row == col) then + A(row,col) = 2 + else + A(row,col) = 0 + end if + B(row,col) = row*N+col + C(row,col) = 0 + end do + end do + + if (verbose) then + print *,"A" + call mat_disp(A) + print *,"B" + call mat_disp(B) + print *,"C" + call mat_disp(C) + end if + end if + + ! allocate data handles + allocate(dh_A(NB)) + allocate(dh_B(NB)) + allocate(dh_C(NB,NB)) + + ! register matrices + if (comm_rank == 0) then + mr = 0 ! TODO: use STARPU_MAIN_RAM constant + else + mr = -1 + end if + tag = 0 + + do b_row=1,NB + if (comm_rank == 0) then + call fstarpu_matrix_data_register(dh_A(b_row), mr, & + c_loc( A(1+(b_row-1)*BS,1) ), N, BS, N, c_sizeof(A(1,1))) + else + call fstarpu_matrix_data_register(dh_A(b_row), mr, & + c_null_ptr, N, BS, N, c_sizeof(A(1,1))) + end if + call fstarpu_mpi_data_register(dh_A(b_row), tag, 0) + tag = tag+1 + end do + + do b_col=1,NB + if (comm_rank == 0) then + call fstarpu_matrix_data_register(dh_B(b_col), mr, & + c_loc( B(1,1+(b_col-1)*BS) ), N, N, BS, c_sizeof(B(1,1))) + else + call fstarpu_matrix_data_register(dh_B(b_col), mr, & + c_null_ptr, N, N, BS, c_sizeof(B(1,1))) + end if + call fstarpu_mpi_data_register(dh_B(b_col), tag, 0) + tag = tag+1 + end do + + do b_col=1,NB + do b_row=1,NB + if (comm_rank == 0) then + call fstarpu_matrix_data_register(dh_C(b_row,b_col), mr, & + c_loc( C(1+(b_row-1)*BS,1+(b_col-1)*BS) ), N, BS, BS, c_sizeof(C(1,1))) + else + call fstarpu_matrix_data_register(dh_C(b_row,b_col), mr, & + c_null_ptr, N, BS, BS, c_sizeof(C(1,1))) + end if + call fstarpu_mpi_data_register(dh_C(b_row,b_col), tag, 0) + tag = tag+1 + end do + end do + + ! distribute matrix C + do b_col=1,NB + do b_row=1,NB + rank = modulo(b_row+b_col, comm_size) + call fstarpu_mpi_data_migrate(comm_world, dh_c(b_row,b_col), rank) + end do + end do + + do b_col=1,NB + do b_row=1,NB + task = fstarpu_mpi_task_build((/ c_loc(comm_world), cl_mm, & + FSTARPU_R, dh_A(b_row), & + FSTARPU_R, dh_B(b_col), & + FSTARPU_RW, dh_C(b_row,b_col), & + C_NULL_PTR /)) + if (c_associated(task)) then + ret = fstarpu_task_submit(task) + endif + call fstarpu_mpi_task_post_build((/ c_loc(comm_world), cl_mm, & + FSTARPU_R, dh_A(b_row), & + FSTARPU_R, dh_B(b_col), & + FSTARPU_RW, dh_C(b_row,b_col), & + C_NULL_PTR /)) + end do + end do + + call fstarpu_task_wait_for_all() + + ! undistribute matrix C + do b_col=1,NB + do b_row=1,NB + call fstarpu_mpi_data_migrate(comm_world, dh_c(b_row,b_col), 0) + end do + end do + + ! unregister matrices + do b_row=1,NB + call fstarpu_data_unregister(dh_A(b_row)) + end do + + do b_col=1,NB + call fstarpu_data_unregister(dh_B(b_col)) + end do + + do b_col=1,NB + do b_row=1,NB + call fstarpu_data_unregister(dh_C(b_row,b_col)) + end do + end do + + ! check result + if (comm_rank == 0) then + if (verbose) then + print *,"final C" + call mat_disp(C) + end if + + do col=1,N + do row=1,N + if (abs(C(row,col) - 2*(row*N+col)) > 1.0) then + print *, "check failed" + stop 1 + end if + end do + end do + end if + + ! free handles + deallocate(dh_A) + deallocate(dh_B) + deallocate(dh_C) + + ! free matrices + if (comm_rank == 0) then + deallocate(A) + deallocate(B) + deallocate(C) + end if + call fstarpu_codelet_free(cl_mm) + call fstarpu_shutdown() + + ret = fstarpu_mpi_shutdown() + print *,"fstarpu_mpi_shutdown status:", ret + if (ret /= 0) then + stop 1 + end if +end program nf_mm diff --git a/mpi/examples/native_fortran/nf_mpi_redux.f90 b/mpi/examples/native_fortran/nf_mpi_redux.f90 new file mode 100644 index 0000000..a8fb2fd --- /dev/null +++ b/mpi/examples/native_fortran/nf_mpi_redux.f90 @@ -0,0 +1,240 @@ +! StarPU --- Runtime system for heterogeneous multicore architectures. +! +! Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +! +! StarPU is free software; you can redistribute it and/or modify +! it under the terms of the GNU Lesser General Public License as published by +! the Free Software Foundation; either version 2.1 of the License, or (at +! your option) any later version. +! +! StarPU is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of +! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +! +! See the GNU Lesser General Public License in COPYING.LGPL for more details. +! +program nf_mpi_redux + use iso_c_binding + use fstarpu_mod + use fstarpu_mpi_mod + + implicit none + + integer, target :: ret, np, i, j, trial + type(c_ptr) :: work_cl, task_rw_cl,task_red_cl, task_ini_cl + character(kind=c_char,len=*), parameter :: name=C_CHAR_"task"//C_NULL_CHAR + character(kind=c_char,len=*), parameter :: namered=C_CHAR_"task_red"//C_NULL_CHAR + character(kind=c_char,len=*), parameter :: nameini=C_CHAR_"task_ini"//C_NULL_CHAR + real(kind(1.d0)), target :: a,tmp + real(kind(1.d0)), target, allocatable :: b(:) + integer(kind=8) :: tag, err + type(c_ptr) :: ahdl + type(c_ptr), target, allocatable :: bhdl(:) + type(c_ptr) :: task_mode, codelet_mode + integer, target :: comm_world,comm_w_rank, comm_size + integer(c_int), target :: w_node, nworkers, work_coef + + call fstarpu_fxt_autostart_profiling(0) + ret = fstarpu_init(c_null_ptr) + ret = fstarpu_mpi_init(1) + + comm_world = fstarpu_mpi_world_comm() + comm_w_rank = fstarpu_mpi_world_rank() + comm_size = fstarpu_mpi_world_size() + if (comm_size.lt.2) then + write(*,'(" ")') + write(*,'("This application is meant to run with at least two nodes (found ",i4," ; i am ",i4,").")') comm_size, comm_w_rank + stop 2 + end if + allocate(b(comm_size-1), bhdl(comm_size-1)) + nworkers = fstarpu_worker_get_count() + if (nworkers.lt.1) then + write(*,'(" ")') + write(*,'("This application is meant to run with at least one worker per node.")') + stop 2 + end if + + ! allocate and reduction codelets + task_red_cl = fstarpu_codelet_allocate() + call fstarpu_codelet_set_name(task_red_cl, namered) + call fstarpu_codelet_add_cpu_func(task_red_cl,C_FUNLOC(cl_cpu_task_red)) + call fstarpu_codelet_add_buffer(task_red_cl, FSTARPU_RW.ior.FSTARPU_COMMUTE) + call fstarpu_codelet_add_buffer(task_red_cl, FSTARPU_R) + + task_ini_cl = fstarpu_codelet_allocate() + call fstarpu_codelet_set_name(task_ini_cl, nameini) + call fstarpu_codelet_add_cpu_func(task_ini_cl,C_FUNLOC(cl_cpu_task_ini)) + call fstarpu_codelet_add_buffer(task_ini_cl, FSTARPU_W) + + work_coef=2 + + do trial=1,2 + + if (trial.eq.2) then + write(*,*) "Using STARPU_MPI_REDUX" + codelet_mode = FSTARPU_RW.ior.FSTARPU_COMMUTE + task_mode = FSTARPU_MPI_REDUX + else if (trial.eq.1) then + write(*,*) "Using STARPU_REDUX" + codelet_mode = FSTARPU_REDUX + task_mode = FSTARPU_REDUX + end if + ! allocate and fill codelet structs + work_cl = fstarpu_codelet_allocate() + call fstarpu_codelet_set_name(work_cl, name) + call fstarpu_codelet_add_cpu_func(work_cl, C_FUNLOC(cl_cpu_task)) + call fstarpu_codelet_add_buffer(work_cl, codelet_mode) + call fstarpu_codelet_add_buffer(work_cl, FSTARPU_R) + err = fstarpu_mpi_barrier(comm_world) + + if(comm_w_rank.eq.0) then + write(*,'(" ")') + a = 1.0 + write(*,*) "init a = ", a + else + b(comm_w_rank) = 1.0 / (comm_w_rank + 1.0) + write(*,*) "init b_",comm_w_rank,"=", b(comm_w_rank) + end if + + err = fstarpu_mpi_barrier(comm_world) + + tag = 0 + if(comm_w_rank.eq.0) then + call fstarpu_variable_data_register(ahdl, 0, c_loc(a),c_sizeof(a)) + do i=1,comm_size-1 + call fstarpu_variable_data_register(bhdl(i), -1, c_null_ptr,c_sizeof(b(i))) + end do + else + call fstarpu_variable_data_register(ahdl, -1, c_null_ptr,c_sizeof(a)) + do i=1,comm_size-1 + if (i.eq.comm_w_rank) then + call fstarpu_variable_data_register(bhdl(i), 0, c_loc(b(i)),c_sizeof(b(i))) + else + call fstarpu_variable_data_register(bhdl(i), -1, c_null_ptr,c_sizeof(b(i))) + end if + end do + end if + call fstarpu_mpi_data_register(ahdl, tag, 0) + do i=1,comm_size-1 + call fstarpu_mpi_data_register(bhdl(i), tag+i,i) + end do + + tag = tag + comm_size + + call fstarpu_data_set_reduction_methods(ahdl,task_red_cl,task_ini_cl) + + err = fstarpu_mpi_barrier(comm_world) + + + call fstarpu_fxt_start_profiling() + do w_node=1,comm_size-1 + do i=1,work_coef*nworkers + call fstarpu_mpi_task_insert( (/ c_loc(comm_world), & + work_cl, & + task_mode, ahdl, & + FSTARPU_R, bhdl(w_node), & + FSTARPU_EXECUTE_ON_NODE, c_loc(w_node), & + C_NULL_PTR /)) + end do + end do + call fstarpu_mpi_redux_data(comm_world, ahdl) + err = fstarpu_mpi_wait_for_all(comm_world) + + if(comm_w_rank.eq.0) then + tmp = 0 + do w_node=1,comm_size-1 + tmp = tmp + 1.0 / (w_node+1.0) + end do + write(*,*) 'computed result ---> ',a, "expected =",& + 1.0 + (comm_size-1.0)*(comm_size)/2.0 + work_coef*nworkers*((comm_size-1.0)*3.0 + tmp) + end if + err = fstarpu_mpi_barrier(comm_world) + call fstarpu_data_unregister(ahdl) + do w_node=1,comm_size-1 + call fstarpu_data_unregister(bhdl(w_node)) + end do + call fstarpu_codelet_free(work_cl) + + end do + + call fstarpu_fxt_stop_profiling() + call fstarpu_codelet_free(task_red_cl) + call fstarpu_codelet_free(task_ini_cl) + + + err = fstarpu_mpi_shutdown() + call fstarpu_shutdown() + deallocate(b, bhdl) + stop 0 + +contains + + recursive subroutine cl_cpu_task (buffers, cl_args) bind(C) + use iso_c_binding ! C interfacing module + use fstarpu_mod ! StarPU interfacing module + implicit none + + type(c_ptr), value, intent(in) :: buffers, cl_args ! cl_args is unused + integer(c_int) :: ret, worker_id + integer :: comm_rank + integer, target :: i + real(kind(1.d0)), pointer :: a, b + real(kind(1.d0)) :: old_a + + worker_id = fstarpu_worker_get_id() + comm_rank = fstarpu_mpi_world_rank() + + call c_f_pointer(fstarpu_variable_get_ptr(buffers, 0), a) + call c_f_pointer(fstarpu_variable_get_ptr(buffers, 1), b) + call fstarpu_sleep(real(0.01, c_float)) + old_a = a + a = old_a + 3.0 + b + write(*,*) "task (c_w_rank:",comm_rank," worker_id:",worker_id,") from ",old_a,"to",a + + return + end subroutine cl_cpu_task + + recursive subroutine cl_cpu_task_red (buffers, cl_args) bind(C) + use iso_c_binding ! C interfacing module + use fstarpu_mod ! StarPU interfacing module + implicit none + + type(c_ptr), value, intent(in) :: buffers, cl_args ! cl_args is unused + integer(c_int) :: ret, worker_id + integer, target :: comm_rank + real(kind(1.d0)), pointer :: as, ad + real(kind(1.d0)) :: old_ad + worker_id = fstarpu_worker_get_id() + comm_rank = fstarpu_mpi_world_rank() + call c_f_pointer(fstarpu_variable_get_ptr(buffers, 0), ad) + call c_f_pointer(fstarpu_variable_get_ptr(buffers, 1), as) + old_ad = ad + ad = ad + as + call fstarpu_sleep(real(0.01, c_float)) + write(*,*) "red_cl (c_w_rank:",comm_rank,"worker_id:",worker_id,")",as, old_ad, ' ---> ',ad + + return + end subroutine cl_cpu_task_red + + recursive subroutine cl_cpu_task_ini (buffers, cl_args) bind(C) + use iso_c_binding ! C interfacing module + use fstarpu_mod ! StarPU interfacing module + implicit none + + type(c_ptr), value, intent(in) :: buffers, cl_args + ! cl_args is unused + integer(c_int) :: ret, worker_id + integer, target :: comm_rank + real(kind(1.d0)), pointer :: a + worker_id = fstarpu_worker_get_id() + comm_rank = fstarpu_mpi_world_rank() + call c_f_pointer(fstarpu_variable_get_ptr(buffers, 0), a) + call fstarpu_sleep(real(0.005, c_float)) + ! As this codelet is run by each worker in the REDUX mode case + ! this initialization makes salient the number of copies spawned + write(*,*) "ini_cl (c_w_rank:",comm_rank,"worker_id:",worker_id,") set to", comm_rank + a = comm_rank + return + end subroutine cl_cpu_task_ini + +end program diff --git a/mpi/examples/native_fortran/nf_mpi_redux_tree.f90 b/mpi/examples/native_fortran/nf_mpi_redux_tree.f90 new file mode 100644 index 0000000..a021449 --- /dev/null +++ b/mpi/examples/native_fortran/nf_mpi_redux_tree.f90 @@ -0,0 +1,228 @@ +! StarPU --- Runtime system for heterogeneous multicore architectures. +! +! Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +! +! StarPU is free software; you can redistribute it and/or modify +! it under the terms of the GNU Lesser General Public License as published by +! the Free Software Foundation; either version 2.1 of the License, or (at +! your option) any later version. +! +! StarPU is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of +! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +! +! See the GNU Lesser General Public License in COPYING.LGPL for more details. +! +program nf_mpi_redux + use iso_c_binding + use fstarpu_mod + use fstarpu_mpi_mod + + implicit none + + integer, target :: ret, np, i, j, arity + type(c_ptr) :: work_cl, task_rw_cl,task_red_cl, task_ini_cl + character(kind=c_char,len=*), parameter :: name=C_CHAR_"task"//C_NULL_CHAR + character(kind=c_char,len=*), parameter :: namered=C_CHAR_"task_red"//C_NULL_CHAR + character(kind=c_char,len=*), parameter :: nameini=C_CHAR_"task_ini"//C_NULL_CHAR + real(kind(1.d0)), target :: a,tmp + real(kind(1.d0)), target, allocatable :: b(:) + integer(kind=8) :: tag, err + type(c_ptr), target :: ahdl + type(c_ptr), target, allocatable :: bhdl(:) + type(c_ptr) :: task_mode, codelet_mode + integer, target :: comm_world,comm_w_rank, comm_size + integer(c_int), target :: w_node, nworkers, work_coef + + !call fstarpu_fxt_autostart_profiling(0) + ret = fstarpu_init(c_null_ptr) + ret = fstarpu_mpi_init(1) + + comm_world = fstarpu_mpi_world_comm() + comm_w_rank = fstarpu_mpi_world_rank() + comm_size = fstarpu_mpi_world_size() + allocate(b(comm_size-1), bhdl(comm_size-1)) + nworkers = fstarpu_worker_get_count() + if (nworkers.lt.1) then + write(*,'(" ")') + write(*,'("This application is meant to run with at least one worker per node.")') + stop 2 + end if + + ! allocate and reduction codelets + task_red_cl = fstarpu_codelet_allocate() + call fstarpu_codelet_set_name(task_red_cl, namered) + call fstarpu_codelet_add_cpu_func(task_red_cl,C_FUNLOC(cl_cpu_task_red)) + call fstarpu_codelet_add_buffer(task_red_cl, FSTARPU_RW.ior.FSTARPU_COMMUTE) + call fstarpu_codelet_add_buffer(task_red_cl, FSTARPU_R) + + task_ini_cl = fstarpu_codelet_allocate() + call fstarpu_codelet_set_name(task_ini_cl, nameini) + call fstarpu_codelet_add_cpu_func(task_ini_cl,C_FUNLOC(cl_cpu_task_ini)) + call fstarpu_codelet_add_buffer(task_ini_cl, FSTARPU_W) + + work_coef=2 + + codelet_mode = FSTARPU_RW.ior.FSTARPU_COMMUTE + task_mode = FSTARPU_MPI_REDUX + ! allocate and fill codelet structs + work_cl = fstarpu_codelet_allocate() + call fstarpu_codelet_set_name(work_cl, name) + call fstarpu_codelet_add_cpu_func(work_cl, C_FUNLOC(cl_cpu_task)) + call fstarpu_codelet_add_buffer(work_cl, codelet_mode) + call fstarpu_codelet_add_buffer(work_cl, FSTARPU_R) + err = fstarpu_mpi_barrier(comm_world) + + do arity=2,comm_size + + if(comm_w_rank.eq.0) then + write(*,'(" ")') + a = 1.0 + write(*,*) "init a = ", a + else + b(comm_w_rank) = 1.0 / (comm_w_rank + 1.0) + write(*,*) "init b_",comm_w_rank,"=", b(comm_w_rank) + end if + + err = fstarpu_mpi_barrier(comm_world) + + tag = 0 + if(comm_w_rank.eq.0) then + call fstarpu_variable_data_register(ahdl, 0, c_loc(a),c_sizeof(a)) + do i=1,comm_size-1 + call fstarpu_variable_data_register(bhdl(i), -1, c_null_ptr,c_sizeof(b(i))) + end do + else + call fstarpu_variable_data_register(ahdl, -1, c_null_ptr,c_sizeof(a)) + do i=1,comm_size-1 + if (i.eq.comm_w_rank) then + call fstarpu_variable_data_register(bhdl(i), 0, c_loc(b(i)),c_sizeof(b(i))) + else + call fstarpu_variable_data_register(bhdl(i), -1, c_null_ptr,c_sizeof(b(i))) + end if + end do + end if + call fstarpu_mpi_data_register(ahdl, tag, 0) + do i=1,comm_size-1 + call fstarpu_mpi_data_register(bhdl(i), tag+i,i) + end do + + tag = tag + comm_size + + call fstarpu_data_set_reduction_methods(ahdl,task_red_cl,task_ini_cl) + + err = fstarpu_mpi_barrier(comm_world) + + + call fstarpu_fxt_start_profiling() + do w_node=1,comm_size-1 + do i=1,work_coef*nworkers + call fstarpu_mpi_task_insert( (/ c_loc(comm_world), & + work_cl, & + task_mode, ahdl, & + FSTARPU_R, bhdl(w_node), & + FSTARPU_EXECUTE_ON_NODE, c_loc(w_node), & + C_NULL_PTR /)) + end do + end do + call fstarpu_mpi_redux_data_tree(comm_world, ahdl, arity) + err = fstarpu_mpi_wait_for_all(comm_world) + + if(comm_w_rank.eq.0) then + tmp = 0 + do w_node=1,comm_size-1 + tmp = tmp + 1.0 / (w_node+1.0) + end do + write(*,*) 'computed result ---> ',a, "expected =",& + 1.0 + (comm_size-1.0)*(comm_size)/2.0 + work_coef*nworkers*((comm_size-1.0)*3.0 + tmp) + end if + err = fstarpu_mpi_barrier(comm_world) + call fstarpu_data_unregister(ahdl) + do w_node=1,comm_size-1 + call fstarpu_data_unregister(bhdl(w_node)) + end do + + call fstarpu_fxt_stop_profiling() + end do + + call fstarpu_codelet_free(work_cl) + call fstarpu_codelet_free(task_red_cl) + call fstarpu_codelet_free(task_ini_cl) + + + err = fstarpu_mpi_shutdown() + call fstarpu_shutdown() + deallocate(b, bhdl) + stop 0 + +contains + + recursive subroutine cl_cpu_task (buffers, cl_args) bind(C) + use iso_c_binding ! C interfacing module + use fstarpu_mod ! StarPU interfacing module + implicit none + + type(c_ptr), value, intent(in) :: buffers, cl_args ! cl_args is unused + integer(c_int) :: ret, worker_id + integer :: comm_rank + integer, target :: i + real(kind(1.d0)), pointer :: a, b + real(kind(1.d0)) :: old_a + + worker_id = fstarpu_worker_get_id() + comm_rank = fstarpu_mpi_world_rank() + + call c_f_pointer(fstarpu_variable_get_ptr(buffers, 0), a) + call c_f_pointer(fstarpu_variable_get_ptr(buffers, 1), b) + call fstarpu_sleep(real(0.01, c_float)) + old_a = a + a = old_a + 3.0 + b + write(*,*) "task (c_w_rank:",comm_rank," worker_id:",worker_id,") from ",old_a,"to",a + + return + end subroutine cl_cpu_task + + recursive subroutine cl_cpu_task_red (buffers, cl_args) bind(C) + use iso_c_binding ! C interfacing module + use fstarpu_mod ! StarPU interfacing module + implicit none + + type(c_ptr), value, intent(in) :: buffers, cl_args ! cl_args is unused + integer(c_int) :: ret, worker_id + integer, target :: comm_rank + real(kind(1.d0)), pointer :: as, ad + real(kind(1.d0)) :: old_ad + worker_id = fstarpu_worker_get_id() + comm_rank = fstarpu_mpi_world_rank() + call c_f_pointer(fstarpu_variable_get_ptr(buffers, 0), ad) + call c_f_pointer(fstarpu_variable_get_ptr(buffers, 1), as) + old_ad = ad + ad = ad + as + call fstarpu_sleep(real(0.01, c_float)) + write(*,*) "red_cl (c_w_rank:",comm_rank,"worker_id:",worker_id,")",as, old_ad, ' ---> ',ad + + return + end subroutine cl_cpu_task_red + + recursive subroutine cl_cpu_task_ini (buffers, cl_args) bind(C) + use iso_c_binding ! C interfacing module + use fstarpu_mod ! StarPU interfacing module + implicit none + + type(c_ptr), value, intent(in) :: buffers, cl_args + ! cl_args is unused + integer(c_int) :: ret, worker_id + integer, target :: comm_rank + real(kind(1.d0)), pointer :: a + worker_id = fstarpu_worker_get_id() + comm_rank = fstarpu_mpi_world_rank() + call c_f_pointer(fstarpu_variable_get_ptr(buffers, 0), a) + call fstarpu_sleep(real(0.005, c_float)) + ! As this codelet is run by each worker in the REDUX mode case + ! this initialization makes salient the number of copies spawned + write(*,*) "ini_cl (c_w_rank:",comm_rank,"worker_id:",worker_id,") set to", comm_rank + a = comm_rank + return + end subroutine cl_cpu_task_ini + +end program diff --git a/mpi/examples/native_fortran/nf_redux_test.f90 b/mpi/examples/native_fortran/nf_redux_test.f90 new file mode 100644 index 0000000..36d5eab --- /dev/null +++ b/mpi/examples/native_fortran/nf_redux_test.f90 @@ -0,0 +1,226 @@ +! StarPU --- Runtime system for heterogeneous multicore architectures. +! +! Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +! +! StarPU is free software; you can redistribute it and/or modify +! it under the terms of the GNU Lesser General Public License as published by +! the Free Software Foundation; either version 2.1 of the License, or (at +! your option) any later version. +! +! StarPU is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of +! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +! +! See the GNU Lesser General Public License in COPYING.LGPL for more details. +! +program main + use iso_c_binding + use fstarpu_mod + use fstarpu_mpi_mod + + implicit none + + integer, target :: ret, np, i, j + type(c_ptr) :: task_cl, task_rw_cl, task_red_cl, task_ini_cl + character(kind=c_char,len=*), parameter :: name=C_CHAR_"task"//C_NULL_CHAR + character(kind=c_char,len=*), parameter :: namered=C_CHAR_"task_red"//C_NULL_CHAR + character(kind=c_char,len=*), parameter :: nameini=C_CHAR_"task_ini"//C_NULL_CHAR + real(kind(1.d0)), target :: a1, a2, b1, b2 + integer(kind=8) :: tag, err + type(c_ptr) :: a1hdl, a2hdl, b1hdl, b2hdl + integer, target :: comm, comm_world, comm_w_rank, comm_size + integer(c_int), target :: w_node + + call fstarpu_fxt_autostart_profiling(0) + ret = fstarpu_init(c_null_ptr) + ret = fstarpu_mpi_init(1) + + comm_world = fstarpu_mpi_world_comm() + comm_w_rank = fstarpu_mpi_world_rank() + comm_size = fstarpu_mpi_world_size() + if (comm_size.ne.4) then + write(*,'(" ")') + write(*,'("This application is meant to run with 4 MPI")') + stop 1 + end if + err = fstarpu_mpi_barrier(comm_world) + + if(comm_w_rank.eq.0) then + write(*,'(" ")') + a1 = 1.0 + write(*,*) "init_a1", a1 + b1 = 0.5 + write(*,*) "init b1", b1 + end if + if(comm_w_rank.eq.1) then + write(*,'(" ")') + a2 = 2.0 + write(*,*) "init_a2", a2 + b2 = 0.8 + write(*,*) "init b2", b2 + end if + + ! allocate and fill codelet structs + task_cl = fstarpu_codelet_allocate() + call fstarpu_codelet_set_name(task_cl, name) + call fstarpu_codelet_add_cpu_func(task_cl, C_FUNLOC(cl_cpu_task)) + call fstarpu_codelet_add_buffer(task_cl, FSTARPU_REDUX) + call fstarpu_codelet_add_buffer(task_cl, FSTARPU_R) + + ! allocate and reduction codelets + task_red_cl = fstarpu_codelet_allocate() + call fstarpu_codelet_set_name(task_red_cl, namered) + call fstarpu_codelet_add_cpu_func(task_red_cl,C_FUNLOC(cl_cpu_task_red)) + call fstarpu_codelet_add_buffer(task_red_cl, FSTARPU_RW.ior.FSTARPU_COMMUTE) + call fstarpu_codelet_add_buffer(task_red_cl, FSTARPU_R) + + task_ini_cl = fstarpu_codelet_allocate() + call fstarpu_codelet_set_name(task_ini_cl, nameini) + call fstarpu_codelet_add_cpu_func(task_ini_cl,C_FUNLOC(cl_cpu_task_ini)) + call fstarpu_codelet_add_buffer(task_ini_cl, FSTARPU_W) + + err = fstarpu_mpi_barrier(comm_world) + + tag = 0 + if(comm_w_rank.eq.0) then + call fstarpu_variable_data_register(a1hdl, 0, c_loc(a1),c_sizeof(a1)) + call fstarpu_variable_data_register(b1hdl, 0, c_loc(b1),c_sizeof(b1)) + else + call fstarpu_variable_data_register(a1hdl, -1, c_null_ptr,c_sizeof(a1)) + call fstarpu_variable_data_register(b1hdl, -1, c_null_ptr,c_sizeof(b1)) + end if + call fstarpu_mpi_data_register(a1hdl,tag,0) + call fstarpu_mpi_data_register(b1hdl, tag+1,0) + + tag = tag + 2 + if(comm_w_rank.eq.1) then + call fstarpu_variable_data_register(a2hdl, 0, c_loc(a2),c_sizeof(a2)) + call fstarpu_variable_data_register(b2hdl, 0, c_loc(b2),c_sizeof(b2)) + else + call fstarpu_variable_data_register(a2hdl, -1, c_null_ptr,c_sizeof(a2)) + call fstarpu_variable_data_register(b2hdl, -1, c_null_ptr,c_sizeof(b2)) + end if + call fstarpu_mpi_data_register(a2hdl,tag,1) + call fstarpu_mpi_data_register(b2hdl, tag+1, 1) + tag = tag + 2 + + call fstarpu_data_set_reduction_methods(a1hdl, task_red_cl,task_ini_cl) + call fstarpu_data_set_reduction_methods(a2hdl, task_red_cl,task_ini_cl) + + err = fstarpu_mpi_barrier(comm_world) + + call fstarpu_fxt_start_profiling() + + w_node = 3 + comm = comm_world + call fstarpu_mpi_task_insert( (/ c_loc(comm), & + task_cl, & + FSTARPU_REDUX, a1hdl, & + FSTARPU_R, b1hdl, & + FSTARPU_EXECUTE_ON_NODE, c_loc(w_node), & + C_NULL_PTR /)) + w_node = 2 + comm = comm_world + call fstarpu_mpi_task_insert( (/ c_loc(comm), & + task_cl, & + FSTARPU_REDUX, a2hdl, & + FSTARPU_R, b2hdl, & + FSTARPU_EXECUTE_ON_NODE, c_loc(w_node), & + C_NULL_PTR /)) + + call fstarpu_mpi_redux_data(comm_world, a1hdl) + call fstarpu_mpi_redux_data(comm_world, a2hdl) + ! write(*,*) "waiting all tasks ..." + err = fstarpu_mpi_wait_for_all(comm_world) + + if(comm_w_rank.eq.0) then + write(*,*) 'computed result ---> ',a1, "expected =",4.5 + end if + if(comm_w_rank.eq.1) then + write(*,*) 'computed result ---> ',a2, "expected=",5.8 + end if + call fstarpu_data_unregister(a1hdl) + call fstarpu_data_unregister(a2hdl) + call fstarpu_data_unregister(b1hdl) + call fstarpu_data_unregister(b2hdl) + + call fstarpu_fxt_stop_profiling() + call fstarpu_codelet_free(task_cl) + call fstarpu_codelet_free(task_red_cl) + call fstarpu_codelet_free(task_ini_cl) + + + err = fstarpu_mpi_shutdown() + call fstarpu_shutdown() + + stop + +contains + + recursive subroutine cl_cpu_task (buffers, cl_args) bind(C) + use iso_c_binding ! C interfacing module + use fstarpu_mod ! StarPU interfacing module + implicit none + + type(c_ptr), value, intent(in) :: buffers, cl_args ! cl_args is unused + integer(c_int) :: ret, worker_id + integer :: comm_rank + integer, target :: i + real(kind(1.d0)), pointer :: a, b + real(kind(1.d0)) :: old_a + + worker_id = fstarpu_worker_get_id() + comm_rank = fstarpu_mpi_world_rank() + + call c_f_pointer(fstarpu_variable_get_ptr(buffers, 0), a) + call c_f_pointer(fstarpu_variable_get_ptr(buffers, 1), b) + call fstarpu_sleep(real(0.01, c_float)) + old_a = a + a = 3.0 + b + write(*,*) "task (c_w_rank:",comm_rank,") from ",old_a,"to",a + + return + end subroutine cl_cpu_task + + recursive subroutine cl_cpu_task_red (buffers, cl_args) bind(C) + use iso_c_binding ! C interfacing module + use fstarpu_mod ! StarPU interfacing module + implicit none + + type(c_ptr), value, intent(in) :: buffers, cl_args ! cl_args is unused + integer(c_int) :: ret + integer, target :: comm_rank + real(kind(1.d0)), pointer :: as, ad + real(kind(1.d0)) :: old_ad + + comm_rank = fstarpu_mpi_world_rank() + call c_f_pointer(fstarpu_variable_get_ptr(buffers, 0), ad) + call c_f_pointer(fstarpu_variable_get_ptr(buffers, 1), as) + old_ad = ad + ad = ad + as + call fstarpu_sleep(real(0.01, c_float)) + write(*,*) "red_cl (c_w_rank:",comm_rank,")",as, old_ad, ' ---> ',ad + + return + end subroutine cl_cpu_task_red + + recursive subroutine cl_cpu_task_ini (buffers, cl_args) bind(C) + use iso_c_binding ! C interfacing module + use fstarpu_mod ! StarPU interfacing module + implicit none + + type(c_ptr), value, intent(in) :: buffers, cl_args + ! cl_args is unused + integer(c_int) :: ret + integer, target :: comm_rank + real(kind(1.d0)), pointer :: a + + comm_rank = fstarpu_mpi_world_rank() + call c_f_pointer(fstarpu_variable_get_ptr(buffers, 0), a) + call fstarpu_sleep(real(0.005, c_float)) + a = 0.0 + write(*,*) "ini_cl (c_w_rank:",comm_rank,")" + return + end subroutine cl_cpu_task_ini + +end program main diff --git a/mpi/examples/perf.sh b/mpi/examples/perf.sh new file mode 100755 index 0000000..d4424ed --- /dev/null +++ b/mpi/examples/perf.sh @@ -0,0 +1,104 @@ +#!/bin/bash +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +# 4G x np = 4 * (k*1K) ^ 2 +# A G * np = 4 * k^2 * 1M +# A * 250 * np = k^2 +# A = 6 +# k = sqrt(1500*np) +# np = 1 => k = 32 +# np = 2 => k = 48 +# np = 3 => k = 64 +# np = 4 => k = 64 + +# Problem size +NBLOCKS=16 +BLOCKSIZE=1024 +SIZE=$(($NBLOCKS*$BLOCKSIZE)) + +echo "JOB ID ${PBS_JOBID}" + +nnodes=$(cat machinefile.${PBS_JOBID}|wc -l) +echo "got $nnodes mpi nodes" + +# Calibrate +ncalibrate=0 +for i in `seq 1 $ncalibrate` +do +echo "STARPU_CALIBRATE $i/$ncalibrate" +STARPU_CALIBRATE=1 STARPU_SCHED="dmda" STARPU_PREFETCH=1 mpirun -machinefile machinefile.${PBS_JOBID} -np $nnodes $MS_LAUNCHER $STARPU_LAUNCH ./mpi_lu/plu_example_float -p 2 -q 2 -nblocks 32 -size $((32*$BLOCKSIZE)) -numa +done + +func() +{ +ngpus=$1 +np=$2 +p=$3 +q=$4 +nblocks=$5 + +echo "*******************************************"> log +echo "*************** NGPUS $ngpus - np $np - nblocks $nblocks **************">> log +echo "*******************************************">> log +cat log +cat log >> log.all + +STARPU_NCPUS=0 STARPU_NCUDA=$ngpus STARPU_SCHED="dmda" STARPU_PREFETCH=1 mpirun -machinefile machinefile.${PBS_JOBID} -np $np $MS_LAUNCHER $STARPU_LAUNCH ./mpi_lu/plu_example_float -p $p -q $q -nblocks $nblocks -size $(($nblocks * $BLOCKSIZE)) -numa > log.out 2> log.err +cat log.out > log +cat log.err >> log +cat log +cat log >> log.all +} + +rm -f log.all + +#how many time do we repeat each experiment ? +nloops=3 + +per_node_max_memory=7000 + +for np in 1 2 4 +do + for nblocks in 16 32 48 64 80 + do + for ngpus_per_node in 1 2 3 4 + do + for loop in `seq 1 $nloops` + do + # Compute p and q from np + case $np in + 1) p=1; q=1;; + 2) p=2; q=1;; + 4) p=2; q=2;; + *) echo -n "does not support $np nodes yet";; + esac + + # Does the problem fit into memory ? + matrix_size=$(($nblocks * $BLOCKSIZE)) + per_node_memory=$(($((4*$matrix_size*$matrix_size/(1024*1024))) / $np)) + + echo "NP $np P $p Q $q SIZE $per_node_memory NBLOCKS $nblocks" + + if test $per_node_memory -ge $per_node_max_memory; then + echo "Problem is too large !" + else + func $ngpus_per_node $np $p $q $nblocks + echo "go !" + fi + done + done + done +done diff --git a/mpi/examples/stencil/stencil5.c b/mpi/examples/stencil/stencil5.c new file mode 100644 index 0000000..db7ef04 --- /dev/null +++ b/mpi/examples/stencil/stencil5.c @@ -0,0 +1,264 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include "helper.h" + +void stencil5_cpu(void *descr[], void *_args) +{ + (void)_args; + float *xy = (float *)STARPU_VARIABLE_GET_PTR(descr[0]); + float *xm1y = (float *)STARPU_VARIABLE_GET_PTR(descr[1]); + float *xp1y = (float *)STARPU_VARIABLE_GET_PTR(descr[2]); + float *xym1 = (float *)STARPU_VARIABLE_GET_PTR(descr[3]); + float *xyp1 = (float *)STARPU_VARIABLE_GET_PTR(descr[4]); + +// fprintf(stdout, "VALUES: %2.2f %2.2f %2.2f %2.2f %2.2f\n", *xy, *xm1y, *xp1y, *xym1, *xyp1); + *xy = (*xy + *xm1y + *xp1y + *xym1 + *xyp1) / 5; +// fprintf(stdout, "VALUES: %2.2f %2.2f %2.2f %2.2f %2.2f\n", *xy, *xm1y, *xp1y, *xym1, *xyp1); +} + +struct starpu_codelet stencil5_cl = +{ + .cpu_funcs = {stencil5_cpu}, + .nbuffers = 5, + .modes = {STARPU_RW, STARPU_R, STARPU_R, STARPU_R, STARPU_R}, + .model = &starpu_perfmodel_nop, +}; + +#ifdef STARPU_QUICK_CHECK +# define NITER_DEF 5 +# define X 4 +# define Y 4 +#elif !defined(STARPU_LONG_CHECK) +# define NITER_DEF 10 +# define X 5 +# define Y 5 +#else +# define NITER_DEF 100 +# define X 20 +# define Y 20 +#endif + +int display = 0; +int niter = NITER_DEF; + +/* Returns the MPI node number where data indexes index is */ +int my_distrib(int x, int y, int nb_nodes) +{ + /* Block distrib */ + return ((int)(x / sqrt(nb_nodes) + (y / sqrt(nb_nodes)) * sqrt(nb_nodes))) % nb_nodes; +} + +/* Shifted distribution, for migration example */ +int my_distrib2(int x, int y, int nb_nodes) +{ + return (my_distrib(x, y, nb_nodes) + 1) % nb_nodes; +} + +static void parse_args(int argc, char **argv) +{ + int i; + for (i = 1; i < argc; i++) + { + if (strcmp(argv[i], "-iter") == 0) + { + char *argptr; + niter = strtol(argv[++i], &argptr, 10); + } + if (strcmp(argv[i], "-display") == 0) + { + display = 1; + } + } +} + +int main(int argc, char **argv) +{ + int my_rank, size, x, y, loop; + float mean=0; + float matrix[X][Y]; + starpu_data_handle_t data_handles[X][Y]; + int ret; + + ret = starpu_mpi_init_conf(&argc, &argv, 1, MPI_COMM_WORLD, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + starpu_mpi_comm_rank(MPI_COMM_WORLD, &my_rank); + starpu_mpi_comm_size(MPI_COMM_WORLD, &size); + + if (starpu_cpu_worker_get_count() == 0) + { + FPRINTF(stderr, "We need at least 1 CPU worker.\n"); + starpu_mpi_shutdown(); + if (my_rank == 0) return 77; else return 0; + } + + parse_args(argc, argv); + + /* Initial data values */ + starpu_srand48((long int)time(NULL)); + for(x = 0; x < X; x++) + { + for (y = 0; y < Y; y++) + { + matrix[x][y] = (float)starpu_drand48(); + mean += matrix[x][y]; + } + } + mean /= (X*Y); + + if (display) + { + FPRINTF_MPI(stdout, "mean=%2.2f\n", mean); + for(x = 0; x < X; x++) + { + fprintf(stdout, "[%d] ", my_rank); + for (y = 0; y < Y; y++) + { + fprintf(stdout, "%2.2f ", matrix[x][y]); + } + fprintf(stdout, "\n"); + } + } + + /* Initial distribution */ + for(x = 0; x < X; x++) + { + for (y = 0; y < Y; y++) + { + int mpi_rank = my_distrib(x, y, size); + if (mpi_rank == my_rank) + { + //FPRINTF(stderr, "[%d] Owning data[%d][%d]\n", my_rank, x, y); + starpu_variable_data_register(&data_handles[x][y], 0, (uintptr_t)&(matrix[x][y]), sizeof(float)); + } + else if (my_rank == my_distrib(x+1, y, size) || my_rank == my_distrib(x-1, y, size) + || my_rank == my_distrib(x, y+1, size) || my_rank == my_distrib(x, y-1, size)) + { + /* I don't own this index, but will need it for my computations */ + //FPRINTF(stderr, "[%d] Neighbour of data[%d][%d]\n", my_rank, x, y); + starpu_variable_data_register(&data_handles[x][y], -1, (uintptr_t)NULL, sizeof(float)); + } + else + { + /* I know it's useless to allocate anything for this */ + data_handles[x][y] = NULL; + } + if (data_handles[x][y]) + { + starpu_data_set_coordinates(data_handles[x][y], 2, x, y); + starpu_mpi_data_register(data_handles[x][y], (y*X)+x, mpi_rank); + } + } + } + + /* First computation with initial distribution */ + for(loop=0 ; loop +#include +#include +#include + +void stencil5_cpu(void *descr[], void *_args) +{ + (void)_args; + float *xy = (float *)STARPU_VARIABLE_GET_PTR(descr[0]); + float *xm1y = (float *)STARPU_VARIABLE_GET_PTR(descr[1]); + float *xp1y = (float *)STARPU_VARIABLE_GET_PTR(descr[2]); + float *xym1 = (float *)STARPU_VARIABLE_GET_PTR(descr[3]); + float *xyp1 = (float *)STARPU_VARIABLE_GET_PTR(descr[4]); + +// fprintf(stdout, "VALUES: %2.2f %2.2f %2.2f %2.2f %2.2f\n", *xy, *xm1y, *xp1y, *xym1, *xyp1); + *xy = (*xy + *xm1y + *xp1y + *xym1 + *xyp1) / 5; +// fprintf(stdout, "VALUES: %2.2f %2.2f %2.2f %2.2f %2.2f\n", *xy, *xm1y, *xp1y, *xym1, *xyp1); +} + +struct starpu_codelet stencil5_cl = +{ + .cpu_funcs = {stencil5_cpu}, + .nbuffers = 5, + .modes = {STARPU_RW, STARPU_R, STARPU_R, STARPU_R, STARPU_R}, + .model = &starpu_perfmodel_nop, +}; + +#ifdef STARPU_QUICK_CHECK +# define NITER_DEF 5 +# define X 4 +# define Y 4 +#elif !defined(STARPU_LONG_CHECK) +# define NITER_DEF 10 +# define X 5 +# define Y 5 +#else +# define NITER_DEF 100 +# define X 20 +# define Y 20 +#endif + +int display = 0; +int niter = NITER_DEF; + +/* Returns the MPI node number where data indexes index is */ +int my_distrib(int x, int y, int nb_nodes) +{ + /* Block distrib */ + return ((int)(x / sqrt(nb_nodes) + (y / sqrt(nb_nodes)) * sqrt(nb_nodes))) % nb_nodes; +} + +static void parse_args(int argc, char **argv) +{ + int i; + for (i = 1; i < argc; i++) + { + if (strcmp(argv[i], "-iter") == 0) + { + char *argptr; + niter = strtol(argv[++i], &argptr, 10); + } + if (strcmp(argv[i], "-display") == 0) + { + display = 1; + } + } +} + +void get_neighbors(int **neighbor_ids, int *nneighbors) +{ + int rank, size; + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); + starpu_mpi_comm_size(MPI_COMM_WORLD, &size); + + if (size <= 2) + { + *nneighbors = 1; + *neighbor_ids = malloc(sizeof(int)); + *neighbor_ids[0] = rank==size-1?0:rank+1; + fprintf(stderr, "rank %d has neighbor %d\n", rank, *neighbor_ids[0]); + } + else + { + *nneighbors = 2; + *neighbor_ids = malloc(2*sizeof(int)); + (*neighbor_ids)[0] = rank==size-1?0:rank+1; + (*neighbor_ids)[1] = rank==0?size-1:rank-1; + fprintf(stderr, "rank %d has neighbor %d and %d\n", rank, (*neighbor_ids)[0], (*neighbor_ids)[1]); + } +} + +struct data_node +{ + starpu_data_handle_t data_handle; + int node; +}; + +struct data_node data_nodes[X][Y]; + +void get_data_unit_to_migrate(starpu_data_handle_t **handle_unit, int *nhandles, int dst_node) +{ + int rank, x, y; + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); + fprintf(stderr, "Looking to move data from %d to %d\n", rank, dst_node); + for(x = 0; x < X; x++) + { + for (y = 0; y < Y; y++) + { + if (data_nodes[x][y].node == rank) + { + *handle_unit = malloc(sizeof(starpu_data_handle_t)); + *handle_unit[0] = data_nodes[x][y].data_handle; + *nhandles = 1; + data_nodes[x][y].node = dst_node; + return; + } + } + } + *nhandles = 0; +} + +int main(int argc, char **argv) +{ + int my_rank, size, x, y, loop; + float mean=0; + float matrix[X][Y]; + struct starpu_mpi_lb_conf itf; + int ret; + + itf.get_neighbors = get_neighbors; + itf.get_data_unit_to_migrate = get_data_unit_to_migrate; + + ret = starpu_mpi_init_conf(&argc, &argv, 1, MPI_COMM_WORLD, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + starpu_mpi_comm_rank(MPI_COMM_WORLD, &my_rank); + starpu_mpi_comm_size(MPI_COMM_WORLD, &size); + + if (size > 2) + { + FPRINTF(stderr, "Only works with 2 nodes\n"); + starpu_mpi_shutdown(); + if (my_rank == 0) return 77; else return 0; + } + if (starpu_cpu_worker_get_count() == 0) + { + FPRINTF(stderr, "We need at least 1 CPU worker.\n"); + starpu_mpi_shutdown(); + if (my_rank == 0) return 77; else return 0; + } + + { + char sleep_thr[10]; + snprintf(sleep_thr, 10, "%d", Y); + setenv("LB_HEAT_SLEEP_THRESHOLD", sleep_thr, 1); + } + starpu_mpi_lb_init("heat", &itf); + + parse_args(argc, argv); + + /* Initial data values */ + starpu_srand48((long int)time(NULL)); + for(x = 0; x < X; x++) + { + for (y = 0; y < Y; y++) + { + matrix[x][y] = (float)starpu_drand48(); + mean += matrix[x][y]; + } + } + mean /= (X*Y); + + if (display) + { + FPRINTF_MPI(stdout, "mean=%2.2f\n", mean); + for(x = 0; x < X; x++) + { + fprintf(stdout, "[%d] ", my_rank); + for (y = 0; y < Y; y++) + { + fprintf(stdout, "%2.2f ", matrix[x][y]); + } + fprintf(stdout, "\n"); + } + } + + /* Initial distribution */ + for(x = 0; x < X; x++) + { + for (y = 0; y < Y; y++) + { + data_nodes[x][y].node = my_distrib(x, y, size); + if (data_nodes[x][y].node == my_rank) + { + //FPRINTF(stderr, "[%d] Owning data[%d][%d]\n", my_rank, x, y); + starpu_variable_data_register(&data_nodes[x][y].data_handle, 0, (uintptr_t)&(matrix[x][y]), sizeof(float)); + } + else if (my_rank == my_distrib(x+1, y, size) || my_rank == my_distrib(x-1, y, size) + || my_rank == my_distrib(x, y+1, size) || my_rank == my_distrib(x, y-1, size)) + { + /* I don't own this index, but will need it for my computations */ + //FPRINTF(stderr, "[%d] Neighbour of data[%d][%d]\n", my_rank, x, y); + starpu_variable_data_register(&data_nodes[x][y].data_handle, -1, (uintptr_t)NULL, sizeof(float)); + } + else + { + /* I know it's useless to allocate anything for this */ + data_nodes[x][y].data_handle = NULL; + } + if (data_nodes[x][y].data_handle) + { + starpu_data_set_coordinates(data_nodes[x][y].data_handle, 2, x, y); + starpu_mpi_data_register(data_nodes[x][y].data_handle, (y*X)+x, data_nodes[x][y].node); + } + } + } + + /* First computation with initial distribution */ + for(loop=0 ; loop +#include + +#include "my_interface.h" + +void starpu_my_data_display_codelet_cpu(void *descr[], void *_args) +{ + char c = STARPU_MY_DATA_GET_CHAR(descr[0]); + int d = STARPU_MY_DATA_GET_INT(descr[0]); + char msg[100]=""; + + if (_args) + starpu_codelet_unpack_args(_args, &msg); + + fprintf(stderr, "[%s] My value = '%c' %d\n", msg, c, d); +} + +void starpu_my_data_compare_codelet_cpu(void *descr[], void *_args) +{ + int *compare; + + starpu_codelet_unpack_args(_args, &compare); + + int d0 = STARPU_MY_DATA_GET_INT(descr[0]); + char c0 = STARPU_MY_DATA_GET_CHAR(descr[0]); + int d1 = STARPU_MY_DATA_GET_INT(descr[1]); + char c1 = STARPU_MY_DATA_GET_CHAR(descr[1]); + + *compare = (d0 == d1 && c0 == c1); +} + +void _starpu_my_data_datatype_allocate(unsigned node, MPI_Datatype *mpi_datatype) +{ + int ret; + int blocklengths[2] = {1, 1}; + MPI_Aint displacements[2]; + MPI_Datatype types[2] = {MPI_INT, MPI_CHAR}; + struct starpu_my_data *myinterface; + myinterface = malloc(sizeof(struct starpu_my_data)); + + MPI_Get_address(myinterface, displacements); + MPI_Get_address(&myinterface[0].c, displacements+1); + displacements[1] -= displacements[0]; + displacements[0] = 0; + + ret = MPI_Type_create_struct(2, blocklengths, displacements, types, mpi_datatype); + STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_contiguous failed"); + + ret = MPI_Type_commit(mpi_datatype); + STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_commit failed"); + + free(myinterface); +} + +int starpu_my_data_datatype_allocate(starpu_data_handle_t handle, unsigned node, MPI_Datatype *mpi_datatype) +{ + (void)handle; + _starpu_my_data_datatype_allocate(node, mpi_datatype); + return 0; +} + +void starpu_my_data_datatype_free(MPI_Datatype *mpi_datatype) +{ + int ret = MPI_Type_free(mpi_datatype); + STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_free failed"); +} + +int starpu_my_data2_datatype_allocate(starpu_data_handle_t handle, unsigned node, MPI_Datatype *mpi_datatype) +{ + (void)handle; + (void)mpi_datatype; + return -1; +} + +void starpu_my_data2_datatype_free(MPI_Datatype *mpi_datatype) +{ + STARPU_ASSERT_MSG(0, "should not be called\n"); +} + +char starpu_my_data_interface_get_char(void *interface) +{ + struct starpu_my_data_interface *my_data = (struct starpu_my_data_interface *) interface; + struct starpu_my_data *data = (struct starpu_my_data *)my_data->ptr; + return data->c; +} + +int starpu_my_data_interface_get_int(void *interface) +{ + struct starpu_my_data_interface *my_data = (struct starpu_my_data_interface *) interface; + struct starpu_my_data *data = (struct starpu_my_data *)my_data->ptr; + return data->d; +} + +int starpu_my_data_get_int(starpu_data_handle_t handle) +{ + struct starpu_my_data_interface *my_data = (struct starpu_my_data_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + struct starpu_my_data *data = (struct starpu_my_data *)my_data->ptr; + return data->d; +} + +char starpu_my_data_get_char(starpu_data_handle_t handle) +{ + struct starpu_my_data_interface *my_data = (struct starpu_my_data_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + struct starpu_my_data *data = (struct starpu_my_data *)my_data->ptr; + return data->c; +} + +static void data_register_data_handle(starpu_data_handle_t handle, int home_node, void *data_interface) +{ + struct starpu_my_data_interface *my_data_interface = (struct starpu_my_data_interface *) data_interface; + + int node; + for (node = 0; node < STARPU_MAXNODES; node++) + { + struct starpu_my_data_interface *local_interface = + (struct starpu_my_data_interface *) starpu_data_get_interface_on_node(handle, node); + + if (node == home_node) + { + local_interface->ptr = my_data_interface->ptr; + local_interface->dev_handle = my_data_interface->dev_handle; + local_interface->offset = my_data_interface->offset; + } + else + { + local_interface->ptr = 0; + local_interface->dev_handle = 0; + local_interface->offset = 0; + } + } +} + +static starpu_ssize_t data_allocate_data_on_node(void *data_interface, unsigned node) +{ + uintptr_t addr = 0, handle; + + struct starpu_my_data_interface *my_data_interface = (struct starpu_my_data_interface *) data_interface; + + starpu_ssize_t allocated_memory = sizeof(int)+sizeof(char); + handle = starpu_malloc_on_node(node, allocated_memory); + if (!handle) + return -ENOMEM; + + if (starpu_node_get_kind(node) != STARPU_OPENCL_RAM) + addr = handle; + + /* update the data properly in consequence */ + my_data_interface->ptr = addr; + my_data_interface->dev_handle = handle; + my_data_interface->offset = 0; + + return allocated_memory; +} + +static void data_free_data_on_node(void *data_interface, unsigned node) +{ + struct starpu_my_data_interface *my_data_interface = (struct starpu_my_data_interface *) data_interface; + starpu_free_on_node(node, my_data_interface->dev_handle, sizeof(int)+sizeof(char)); + my_data_interface->ptr = 0; + my_data_interface->dev_handle = 0; +} + +static size_t data_get_size(starpu_data_handle_t handle) +{ + (void)handle; + return sizeof(int) + sizeof(char); +} + +static size_t data_get_alloc_size(starpu_data_handle_t handle) +{ + (void)handle; + return sizeof(int) + sizeof(char); +} + +static uint32_t data_footprint(starpu_data_handle_t handle) +{ + struct starpu_my_data_interface *my_data = (struct starpu_my_data_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + return starpu_hash_crc32c_be(my_data->ptr, 0); +} + +static int data_pack_data(starpu_data_handle_t handle, unsigned node, void **ptr, starpu_ssize_t *count) +{ + (void)handle; + (void)node; + (void)ptr; + (void)count; + STARPU_ASSERT_MSG(0, "The data interface has been registered with starpu_mpi_datatype_register(). Calling the pack_data function should not happen\n"); + return 0; +} + +static int data_peek_data(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count) +{ + (void)handle; + (void)node; + (void)ptr; + STARPU_ASSERT_MSG(0, "The data interface has been registered with starpu_mpi_datatype_register(). Calling the unpack_data function should not happen\n"); + return 0; +} + +static int data_unpack_data(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count) +{ + (void)handle; + (void)node; + (void)ptr; + (void)count; + STARPU_ASSERT_MSG(0, "The data interface has been registered with starpu_mpi_datatype_register(). Calling the unpack_data function should not happen\n"); + return 0; +} + +static int data_pack_data2(starpu_data_handle_t handle, unsigned node, void **ptr, starpu_ssize_t *count) +{ + STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); + + *count = sizeof(int) + sizeof(char); + if (ptr != NULL) + { + int d = starpu_my_data_get_int(handle); + char c = starpu_my_data_get_char(handle); + + *ptr = (void*) starpu_malloc_on_node_flags(node, *count, 0); + memcpy(*ptr, &d, sizeof(int)); + char *x = *ptr; + x += sizeof(int); + memcpy(x, &c, sizeof(char)); + } + + return 0; +} + +static int data_peek_data2(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count) +{ + (void)count; + STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); + STARPU_ASSERT(count == sizeof(int)+sizeof(char)); + + struct starpu_my_data_interface *my_data = (struct starpu_my_data_interface *) starpu_data_get_interface_on_node(handle, node); + struct starpu_my_data *data = (struct starpu_my_data *)my_data->ptr; + memcpy(&data->d, ptr, sizeof(int)); + char *x = ptr; + x += sizeof(int); + memcpy(&data->c, x, sizeof(char)); + return 0; +} + +static int data_unpack_data2(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count) +{ + data_peek_data2(handle, node, ptr, count); + + starpu_free_on_node_flags(node, (uintptr_t)ptr, count, 0); + return 0; +} + +static starpu_ssize_t data_describe(void *data_interface, char *buf, size_t size) +{ + struct starpu_my_data_interface *my_data = (struct starpu_my_data_interface *) data_interface; + struct starpu_my_data *data = (struct starpu_my_data *)my_data->ptr; + if (data) + return snprintf(buf, size, "Data%d-%c", data->d, data->c); + else + return snprintf(buf, size, "DataUNKNOWN"); +} + +static void *data_to_pointer(void *data_interface, unsigned node) +{ + (void) node; + struct starpu_my_data_interface *my_data_interface = data_interface; + + return (void*) my_data_interface->ptr; +} + +static int copy_any_to_any(void *src_interface, unsigned src_node, + void *dst_interface, unsigned dst_node, + void *async_data) +{ + struct starpu_my_data_interface *src = src_interface; + struct starpu_my_data_interface *dst = dst_interface; + int ret = 0; + + fprintf(stderr, "copying data src=%p to dst=%p\n", (void*) src->ptr, (void*) dst->ptr); + + if (starpu_interface_copy(src->dev_handle, src->offset, src_node, + dst->dev_handle, dst->offset, dst_node, + sizeof(int) + sizeof(char), async_data)) + ret = -EAGAIN; + return ret; +} + +static const struct starpu_data_copy_methods data_copy_methods = +{ + .any_to_any = copy_any_to_any +}; + +static struct starpu_data_interface_ops interface_data_ops = +{ + .register_data_handle = data_register_data_handle, + .allocate_data_on_node = data_allocate_data_on_node, + .free_data_on_node = data_free_data_on_node, + .copy_methods = &data_copy_methods, + .get_size = data_get_size, + .get_alloc_size = data_get_alloc_size, + .footprint = data_footprint, + .interfaceid = STARPU_UNKNOWN_INTERFACE_ID, + .interface_size = sizeof(struct starpu_my_data_interface), + .to_pointer = data_to_pointer, + .pack_data = data_pack_data, + .peek_data = data_peek_data, + .unpack_data = data_unpack_data, + .describe = data_describe +}; + +void starpu_my_data_register(starpu_data_handle_t *handleptr, unsigned home_node, struct starpu_my_data *xc) +{ + if (interface_data_ops.interfaceid == STARPU_UNKNOWN_INTERFACE_ID) + { + interface_data_ops.interfaceid = starpu_data_interface_get_next_id(); + starpu_mpi_interface_datatype_node_register(interface_data_ops.interfaceid, starpu_my_data_datatype_allocate, starpu_my_data_datatype_free); + } + + struct starpu_my_data_interface data = + { + .id = interface_data_ops.interfaceid, + .ptr = (uintptr_t) xc, + .dev_handle = (uintptr_t) xc, + .offset = 0, + }; + + starpu_data_register(handleptr, home_node, &data, &interface_data_ops); +} + +void starpu_my_data_shutdown(void) +{ + starpu_mpi_interface_datatype_unregister(interface_data_ops.interfaceid); + +} + +static struct starpu_data_interface_ops interface_data2_ops = +{ + .register_data_handle = data_register_data_handle, + .allocate_data_on_node = data_allocate_data_on_node, + .free_data_on_node = data_free_data_on_node, + .copy_methods = &data_copy_methods, + .get_size = data_get_size, + .get_alloc_size = data_get_alloc_size, + .footprint = data_footprint, + .interfaceid = STARPU_UNKNOWN_INTERFACE_ID, + .interface_size = sizeof(struct starpu_my_data_interface), + .to_pointer = data_to_pointer, + .pack_data = data_pack_data2, + .peek_data = data_peek_data2, + .unpack_data = data_unpack_data2, + .describe = data_describe +}; + +void starpu_my_data2_register(starpu_data_handle_t *handleptr, unsigned home_node, struct starpu_my_data *xc) +{ + if (interface_data2_ops.interfaceid == STARPU_UNKNOWN_INTERFACE_ID) + { + interface_data2_ops.interfaceid = starpu_data_interface_get_next_id(); + starpu_mpi_interface_datatype_node_register(interface_data2_ops.interfaceid, starpu_my_data2_datatype_allocate, starpu_my_data2_datatype_free); + } + + struct starpu_my_data_interface data = + { + .id = interface_data_ops.interfaceid, + .ptr = (uintptr_t) xc, + .dev_handle = (uintptr_t) xc, + .offset = 0, + }; + + starpu_data_register(handleptr, home_node, &data, &interface_data2_ops); +} + +void starpu_my_data2_shutdown(void) +{ + starpu_mpi_interface_datatype_unregister(interface_data2_ops.interfaceid); + +} diff --git a/mpi/examples/user_datatype/my_interface.h b/mpi/examples/user_datatype/my_interface.h new file mode 100644 index 0000000..2e796b1 --- /dev/null +++ b/mpi/examples/user_datatype/my_interface.h @@ -0,0 +1,82 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include + +#ifndef __DATA_INTERFACE_H +#define __DATA_INTERFACE_H + +struct starpu_my_data_interface +{ + enum starpu_data_interface_id id; /**< Identifier of the interface */ + + uintptr_t ptr; /**< local pointer of the data */ + uintptr_t dev_handle; /**< device handle of the data. */ + size_t offset; /**< offset in the data */ +}; + +struct starpu_my_data +{ + int d; + char c; +}; + +void starpu_my_data_register(starpu_data_handle_t *handle, unsigned home_node, struct starpu_my_data *xc); +void starpu_my_data2_register(starpu_data_handle_t *handle, unsigned home_node, struct starpu_my_data *xc); + +char starpu_my_data_get_char(starpu_data_handle_t handle); +int starpu_my_data_get_int(starpu_data_handle_t handle); + +char starpu_my_data_interface_get_char(void *interface); +int starpu_my_data_interface_get_int(void *interface); + +#define STARPU_MY_DATA_GET_CHAR(interface) starpu_my_data_interface_get_char(interface) +#define STARPU_MY_DATA_GET_INT(interface) starpu_my_data_interface_get_int(interface) + +void _starpu_my_data_datatype_allocate(unsigned node, MPI_Datatype *mpi_datatype); +int starpu_my_data_datatype_allocate(starpu_data_handle_t handle, unsigned node, MPI_Datatype *mpi_datatype); +void starpu_my_data_datatype_free(MPI_Datatype *mpi_datatype); +int starpu_my_data2_datatype_allocate(starpu_data_handle_t handle, unsigned node, MPI_Datatype *mpi_datatype); +void starpu_my_data2_datatype_free(MPI_Datatype *mpi_datatype); + +void starpu_my_data_display_codelet_cpu(void *descr[], void *_args); +void starpu_my_data_compare_codelet_cpu(void *descr[], void *_args); + +static struct starpu_codelet starpu_my_data_display_codelet = +{ + .cpu_funcs = {starpu_my_data_display_codelet_cpu}, + .cpu_funcs_name = {"starpu_my_data_display_codelet_cpu"}, + .nbuffers = 1, + .modes = {STARPU_R}, + .model = &starpu_perfmodel_nop, + .name = "starpu_my_data_display_codelet" +}; + +static struct starpu_codelet starpu_my_data_compare_codelet = +{ + .cpu_funcs = {starpu_my_data_compare_codelet_cpu}, + .cpu_funcs_name = {"starpu_my_data_compare_codelet_cpu"}, + .nbuffers = 2, + .modes = {STARPU_R, STARPU_R}, + .model = &starpu_perfmodel_nop, + .name = "starpu_my_data_compare_codelet" +}; + +void starpu_my_data_shutdown(void); +void starpu_my_data2_shutdown(void); + +#endif /* __MY_INTERFACE_H */ diff --git a/mpi/examples/user_datatype/user_datatype.c b/mpi/examples/user_datatype/user_datatype.c new file mode 100644 index 0000000..1b8e39d --- /dev/null +++ b/mpi/examples/user_datatype/user_datatype.c @@ -0,0 +1,145 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "my_interface.h" + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +int main(int argc, char **argv) +{ + int rank, nodes; + int ret=0; + int compare=0; + + ret = starpu_mpi_init_conf(&argc, &argv, 1, MPI_COMM_WORLD, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); + starpu_mpi_comm_size(MPI_COMM_WORLD, &nodes); + + if (nodes < 2 || (starpu_cpu_worker_get_count() == 0)) + { + if (rank == 0) + { + if (nodes < 2) + fprintf(stderr, "We need at least 2 processes.\n"); + else + fprintf(stderr, "We need at least 1 CPU.\n"); + } + starpu_mpi_shutdown(); + return 77; + } + + struct starpu_my_data my0 = {.d = 42 , .c = 'n'}; + struct starpu_my_data my1 = {.d = 98 , .c = 'z'}; + + starpu_data_handle_t handle0; + starpu_data_handle_t handle1; + + if (rank == 1) + { + my0.d = 0; + my0.c = 'z'; + } + + starpu_my_data_register(&handle0, STARPU_MAIN_RAM, &my0); + starpu_my_data_register(&handle1, -1, &my1); + + // Send data directly with MPI + if (rank == 0) + { + MPI_Datatype mpi_datatype; + _starpu_my_data_datatype_allocate(STARPU_MAIN_RAM, &mpi_datatype); + MPI_Send(&my0, 1, mpi_datatype, 1, 42, MPI_COMM_WORLD); + starpu_my_data_datatype_free(&mpi_datatype); + } + else if (rank == 1) + { + MPI_Datatype mpi_datatype; + MPI_Status status; + struct starpu_my_data myx; + _starpu_my_data_datatype_allocate(STARPU_MAIN_RAM, &mpi_datatype); + MPI_Recv(&myx, 1, mpi_datatype, 0, 42, MPI_COMM_WORLD, &status); + FPRINTF(stderr, "[mpi] Received value: '%c' %d\n", myx.c, myx.d); + starpu_my_data_datatype_free(&mpi_datatype); + STARPU_ASSERT_MSG(myx.d == 42 && myx.c == 'n', "Incorrect received value\n"); + } + + if (rank == 0) + { + struct starpu_my_data myx = {.d = 98 , .c = 'z'}; + starpu_data_handle_t handlex; + starpu_my_data_register(&handlex, STARPU_MAIN_RAM, &myx); + ret = starpu_mpi_send(handlex, 1, 10, MPI_COMM_WORLD); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_send"); + starpu_data_unregister(handlex); + } + else if (rank == 1) + { + MPI_Status status; + struct starpu_my_data myx = {.d = 11 , .c = 'a'}; + starpu_data_handle_t handlex; + starpu_my_data_register(&handlex, STARPU_MAIN_RAM, &myx); + ret = starpu_mpi_recv(handlex, 0, 10, MPI_COMM_WORLD, &status); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); + starpu_data_unregister(handlex); + FPRINTF(stderr, "[starpu mpi] myx.d=%d myx.c=%c\n", myx.d, myx.c); + STARPU_ASSERT_MSG(myx.d == 98 && myx.c == 'z', "Incorrect received value\n"); + } + + if (rank == 0) + { + int *compare_ptr = &compare; + + ret = starpu_task_insert(&starpu_my_data_display_codelet, STARPU_VALUE, "node0 initial value", strlen("node0 initial value")+1, STARPU_R, handle0, 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + ret = starpu_mpi_isend_detached(handle0, 1, 20, MPI_COMM_WORLD, NULL, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend_detached"); + ret = starpu_mpi_irecv_detached(handle1, 1, 30, MPI_COMM_WORLD, NULL, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_irecv_detached"); + ret = starpu_task_insert(&starpu_my_data_display_codelet, STARPU_VALUE, "node0 received value", strlen("node0 received value")+1, STARPU_R, handle1, 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + ret = starpu_task_insert(&starpu_my_data_compare_codelet, STARPU_R, handle0, STARPU_R, handle1, STARPU_VALUE, &compare_ptr, sizeof(compare_ptr), 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + } + else if (rank == 1) + { + ret = starpu_task_insert(&starpu_my_data_display_codelet, STARPU_VALUE, "node1 initial value", strlen("node1 initial value")+1, STARPU_R, handle0, 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + ret = starpu_mpi_irecv_detached(handle0, 0, 20, MPI_COMM_WORLD, NULL, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_irecv_detached"); + ret = starpu_task_insert(&starpu_my_data_display_codelet, STARPU_VALUE, "node1 received value", strlen("node1 received value")+1, STARPU_R, handle0, 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + ret = starpu_mpi_isend_detached(handle0, 0, 30, MPI_COMM_WORLD, NULL, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend_detached"); + } + + starpu_mpi_wait_for_all(MPI_COMM_WORLD); + starpu_mpi_barrier(MPI_COMM_WORLD); + + starpu_data_unregister(handle0); + starpu_data_unregister(handle1); + + starpu_my_data_shutdown(); + starpu_mpi_shutdown(); + + if (rank == 0) + { + FPRINTF(stderr, "[node 0] %s\n", compare==1?"SUCCESS":"FAILURE"); + } + + return (rank == 0) ? !compare : 0; +} diff --git a/mpi/examples/user_datatype/user_datatype2.c b/mpi/examples/user_datatype/user_datatype2.c new file mode 100644 index 0000000..8fbb79c --- /dev/null +++ b/mpi/examples/user_datatype/user_datatype2.c @@ -0,0 +1,107 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "my_interface.h" + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +int main(int argc, char **argv) +{ + int rank, nodes; + int ret=0; + int compare=0; + + ret = starpu_mpi_init_conf(&argc, &argv, 1, MPI_COMM_WORLD, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); + starpu_mpi_comm_size(MPI_COMM_WORLD, &nodes); + + if (nodes < 2 || (starpu_cpu_worker_get_count() == 0)) + { + if (rank == 0) + { + if (nodes < 2) + fprintf(stderr, "We need at least 2 processes.\n"); + else + fprintf(stderr, "We need at least 1 CPU.\n"); + } + starpu_mpi_shutdown(); + return 77; + } + + struct starpu_my_data my0 = {.d = 42 , .c = 'n'}; + struct starpu_my_data my1 = {.d = 98 , .c = 'z'}; + + starpu_data_handle_t handle0; + starpu_data_handle_t handle1; + + if (rank == 1) + { + my0.d = 0; + my0.c = 'z'; + } + + starpu_my_data2_register(&handle0, STARPU_MAIN_RAM, &my0); + starpu_my_data2_register(&handle1, -1, &my1); + + starpu_mpi_barrier(MPI_COMM_WORLD); + + if (rank == 0) + { + int *compare_ptr = &compare; + + ret = starpu_task_insert(&starpu_my_data_display_codelet, STARPU_VALUE, "node0 initial value", strlen("node0 initial value")+1, STARPU_R, handle0, 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + ret = starpu_mpi_isend_detached(handle0, 1, 10, MPI_COMM_WORLD, NULL, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend_detached"); + ret = starpu_mpi_irecv_detached(handle1, 1, 20, MPI_COMM_WORLD, NULL, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_irecv_detached"); + + ret = starpu_task_insert(&starpu_my_data_display_codelet, STARPU_VALUE, "node0 received value", strlen("node0 received value")+1, STARPU_R, handle1, 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + ret = starpu_task_insert(&starpu_my_data_compare_codelet, STARPU_R, handle0, STARPU_R, handle1, STARPU_VALUE, &compare_ptr, sizeof(compare_ptr), 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + } + else if (rank == 1) + { + ret = starpu_task_insert(&starpu_my_data_display_codelet, STARPU_VALUE, "node1 initial value", strlen("node1 initial value")+1, STARPU_R, handle0, 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + ret = starpu_mpi_irecv_detached(handle0, 0, 10, MPI_COMM_WORLD, NULL, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_irecv_detached"); + ret = starpu_task_insert(&starpu_my_data_display_codelet, STARPU_VALUE, "node1 received value", strlen("node1 received value")+1, STARPU_R, handle0, 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + ret = starpu_mpi_isend_detached(handle0, 0, 20, MPI_COMM_WORLD, NULL, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend_detached"); + } + + starpu_mpi_wait_for_all(MPI_COMM_WORLD); + starpu_mpi_barrier(MPI_COMM_WORLD); + + starpu_mpi_datatype_unregister(handle0); + starpu_data_unregister(handle0); + starpu_data_unregister(handle1); + + starpu_my_data2_shutdown(); + starpu_mpi_shutdown(); + + if (rank == 0) + { + FPRINTF(stderr, "[node 0] %s\n", compare==1?"SUCCESS":"FAILURE"); + } + + return (rank == 0) ? !compare : 0; +} diff --git a/mpi/examples/user_datatype/user_datatype_early.c b/mpi/examples/user_datatype/user_datatype_early.c new file mode 100644 index 0000000..79f5c24 --- /dev/null +++ b/mpi/examples/user_datatype/user_datatype_early.c @@ -0,0 +1,100 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "my_interface.h" + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +int main(int argc, char **argv) +{ + int rank, nodes; + int ret=0; + + ret = starpu_mpi_init_conf(&argc, &argv, 1, MPI_COMM_WORLD, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); + starpu_mpi_comm_size(MPI_COMM_WORLD, &nodes); + + if (nodes < 2 || (starpu_cpu_worker_get_count() == 0)) + { + if (rank == 0) + { + if (nodes < 2) + fprintf(stderr, "We need at least 2 processes.\n"); + else + fprintf(stderr, "We need at least 1 CPU.\n"); + } + starpu_mpi_shutdown(); + return 77; + } + + struct starpu_my_data my0 = {.d = 42 , .c = 'n'}; + struct starpu_my_data my1 = {.d = 11 , .c = 'a'}; + + if (rank == 1) + { + my0.d *= 2; + my0.c += 1; + my1.d *= 2; + my1.c += 1; + } + + starpu_data_handle_t handle0; + starpu_data_handle_t handle1; + starpu_my_data_register(&handle0, STARPU_MAIN_RAM, &my0); + starpu_my_data_register(&handle1, STARPU_MAIN_RAM, &my1); + + if (rank == 0) + { + ret = starpu_mpi_send(handle0, 1, 10, MPI_COMM_WORLD); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send"); + ret = starpu_mpi_send(handle1, 1, 20, MPI_COMM_WORLD); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send"); + } + else if (rank == 1) + { + // We want handle0 to be received as early_data and as starpu_mpi_data_register() has not be called, it will be received as raw memory, and then unpacked with MPI_Unpack() + ret = starpu_task_insert(&starpu_my_data_display_codelet, STARPU_VALUE, "node1 handle0 init value", strlen("node1 handle0 init value")+1, STARPU_R, handle0, 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + ret = starpu_task_insert(&starpu_my_data_display_codelet, STARPU_VALUE, "node1 handle1 init value", strlen("node1 handle1 init value")+1, STARPU_R, handle1, 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + ret = starpu_mpi_recv(handle1, 0, 20, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv1"); + ret = starpu_mpi_recv(handle0, 0, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv2"); + ret = starpu_task_insert(&starpu_my_data_display_codelet, STARPU_VALUE, "node1 handle0 received value", strlen("node1 handle0 received value")+1, STARPU_R, handle0, 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + ret = starpu_task_insert(&starpu_my_data_display_codelet, STARPU_VALUE, "node1 handle1 received value", strlen("node1 handle1 received value")+1, STARPU_R, handle1, 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + } + + starpu_mpi_wait_for_all(MPI_COMM_WORLD); + starpu_mpi_barrier(MPI_COMM_WORLD); + + starpu_data_unregister(handle0); + starpu_data_unregister(handle1); + + if (rank == 1) + { + STARPU_ASSERT_MSG(my0.d == 42 && my0.c == 'n' && my1.d == 11 && my1.c == 'a', "Incorrect received values"); + } + + starpu_my_data_shutdown(); + starpu_mpi_shutdown(); + + return 0; +} diff --git a/mpi/examples/user_datatype/user_datatype_interface.c b/mpi/examples/user_datatype/user_datatype_interface.c new file mode 100644 index 0000000..2a40534 --- /dev/null +++ b/mpi/examples/user_datatype/user_datatype_interface.c @@ -0,0 +1,105 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "my_interface.h" + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +int main(int argc, char **argv) +{ + int rank, nodes; + int ret=0; + struct starpu_my_data my_data; + struct starpu_my_data my_data2 = {.d = 77, .c = 'x'}; + starpu_data_handle_t my_handle1; + starpu_data_handle_t my_handle2; + starpu_data_handle_t my_handle3; + + ret = starpu_mpi_init_conf(&argc, &argv, 1, MPI_COMM_WORLD, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); + starpu_mpi_comm_size(MPI_COMM_WORLD, &nodes); + + if (nodes < 2 || (starpu_cpu_worker_get_count() == 0)) + { + if (rank == 0) + { + if (nodes < 2) + fprintf(stderr, "We need at least 2 processes.\n"); + else + fprintf(stderr, "We need at least 1 CPU.\n"); + } + starpu_mpi_shutdown(); + return 77; + } + + if (rank == 0) + { + my_data.d = 42; + my_data.c = 'n'; + } + else + { + my_data.d = 0; + my_data.c = 'z'; + } + + starpu_my_data_register(&my_handle1, STARPU_MAIN_RAM, &my_data2); + starpu_my_data_register(&my_handle2, STARPU_MAIN_RAM, &my_data2); + starpu_my_data_register(&my_handle3, STARPU_MAIN_RAM, &my_data); + starpu_mpi_barrier(MPI_COMM_WORLD); + + if (rank == 0) + { + ret = starpu_mpi_send(my_handle1, 1, 10, MPI_COMM_WORLD); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send"); + ret = starpu_mpi_send(my_handle2, 1, 12, MPI_COMM_WORLD); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send"); + ret = starpu_mpi_send(my_handle3, 1, 14, MPI_COMM_WORLD); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send"); + } + else if (rank == 1) + { + starpu_mpi_req req; + + ret = starpu_task_insert(&starpu_my_data_display_codelet, STARPU_VALUE, "node1 initial value", strlen("node1 initial value")+1, STARPU_R, my_handle3, 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + ret = starpu_mpi_irecv(my_handle3, &req, 0, 14, MPI_COMM_WORLD); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_irecv"); + ret = starpu_mpi_recv(my_handle2, 0, 12, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); + ret = starpu_mpi_recv(my_handle1, 0, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); + ret = starpu_mpi_wait(&req, MPI_STATUS_IGNORE); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_wait"); + ret = starpu_task_insert(&starpu_my_data_display_codelet, STARPU_VALUE, "node1 rceived value", strlen("node1 rceived value")+1, STARPU_R, my_handle3, 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + } + + starpu_task_wait_for_all(); + starpu_mpi_wait_for_all(MPI_COMM_WORLD); + starpu_mpi_barrier(MPI_COMM_WORLD); + + starpu_data_unregister(my_handle1); + starpu_data_unregister(my_handle2); + starpu_data_unregister(my_handle3); + + starpu_my_data_shutdown(); + starpu_mpi_shutdown(); + + return 0; +} diff --git a/mpi/include/fstarpu_mpi_mod.f90 b/mpi/include/fstarpu_mpi_mod.f90 new file mode 100644 index 0000000..b0a93fa --- /dev/null +++ b/mpi/include/fstarpu_mpi_mod.f90 @@ -0,0 +1,776 @@ +! StarPU --- Runtime system for heterogeneous multicore architectures. +! +! Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +! +! StarPU is free software; you can redistribute it and/or modify +! it under the terms of the GNU Lesser General Public License as published by +! the Free Software Foundation; either version 2.1 of the License, or (at +! your option) any later version. +! +! StarPU is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of +! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +! +! See the GNU Lesser General Public License in COPYING.LGPL for more details. +! +module fstarpu_mpi_mod + use iso_c_binding + use fstarpu_mod + implicit none + + interface + ! == mpi/include/starpu_mpi.h == + ! int starpu_mpi_isend(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm); + function fstarpu_mpi_isend (dh, mpi_req, dst, data_tag, mpi_comm) bind(C) + use iso_c_binding + implicit none + integer(c_int) :: fstarpu_mpi_isend + type(c_ptr), value, intent(in) :: dh + type(c_ptr), value, intent(in) :: mpi_req + integer(c_int), value, intent(in) :: dst + integer(c_int64_t), value, intent(in) :: data_tag + integer(c_int), value, intent(in) :: mpi_comm + end function fstarpu_mpi_isend + + ! == mpi/include/starpu_mpi.h == + ! int starpu_mpi_isend_prio(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm); + function fstarpu_mpi_isend_prio (dh, mpi_req, dst, data_tag, prio, mpi_comm) bind(C) + use iso_c_binding + implicit none + integer(c_int) :: fstarpu_mpi_isend_prio + type(c_ptr), value, intent(in) :: dh + type(c_ptr), value, intent(in) :: mpi_req + integer(c_int), value, intent(in) :: dst + integer(c_int64_t), value, intent(in) :: data_tag + integer(c_int), value, intent(in) :: prio + integer(c_int), value, intent(in) :: mpi_comm + end function fstarpu_mpi_isend_prio + + ! int starpu_mpi_irecv(starpu_data_handle_t data_handle, starpu_mpi_req *req, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm); + function fstarpu_mpi_irecv (dh, mpi_req, src, data_tag, mpi_comm) bind(C) + use iso_c_binding + implicit none + integer(c_int) :: fstarpu_mpi_irecv + type(c_ptr), value, intent(in) :: dh + type(c_ptr), value, intent(in) :: mpi_req + integer(c_int), value, intent(in) :: src + integer(c_int64_t), value, intent(in) :: data_tag + integer(c_int), value, intent(in) :: mpi_comm + end function fstarpu_mpi_irecv + + ! int starpu_mpi_send(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm); + function fstarpu_mpi_send (dh, dst, data_tag, mpi_comm) bind(C) + use iso_c_binding + implicit none + integer(c_int) :: fstarpu_mpi_send + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: dst + integer(c_int64_t), value, intent(in) :: data_tag + integer(c_int), value, intent(in) :: mpi_comm + end function fstarpu_mpi_send + + ! int starpu_mpi_send_prio(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm); + function fstarpu_mpi_send_prio (dh, dst, data_tag, prio, mpi_comm) bind(C) + use iso_c_binding + implicit none + integer(c_int) :: fstarpu_mpi_send_prio + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: dst + integer(c_int64_t), value, intent(in) :: data_tag + integer(c_int), value, intent(in) :: prio + integer(c_int), value, intent(in) :: mpi_comm + end function fstarpu_mpi_send_prio + + ! int starpu_mpi_recv(starpu_data_handle_t data_handle, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm, MPI_Status *status); + function fstarpu_mpi_recv (dh, src, data_tag, mpi_comm, mpi_status) bind(C) + use iso_c_binding + implicit none + integer(c_int) :: fstarpu_mpi_recv + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: src + integer(c_int64_t), value, intent(in) :: data_tag + integer(c_int), value, intent(in) :: mpi_comm + type(c_ptr), value, intent(in) :: mpi_status + end function fstarpu_mpi_recv + + ! int starpu_mpi_isend_detached(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg); + function fstarpu_mpi_isend_detached (dh, dst, data_tag, mpi_comm, callback, arg) bind(C) + use iso_c_binding + implicit none + integer(c_int) :: fstarpu_mpi_isend_detached + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: dst + integer(c_int64_t), value, intent(in) :: data_tag + integer(c_int), value, intent(in) :: mpi_comm + type(c_funptr), value, intent(in) :: callback + type(c_ptr), value, intent(in) :: arg + end function fstarpu_mpi_isend_detached + + ! int starpu_mpi_isend_detached_prio(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm, void (*callback)(void *), void *arg); + function fstarpu_mpi_isend_detached_prio (dh, dst, data_tag, prio, mpi_comm, callback, arg) bind(C) + use iso_c_binding + implicit none + integer(c_int) :: fstarpu_mpi_isend_detached_prio + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: dst + integer(c_int64_t), value, intent(in) :: data_tag + integer(c_int), value, intent(in) :: prio + integer(c_int), value, intent(in) :: mpi_comm + type(c_funptr), value, intent(in) :: callback + type(c_ptr), value, intent(in) :: arg + end function fstarpu_mpi_isend_detached_prio + + ! int starpu_mpi_irecv_detached(starpu_data_handle_t data_handle, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg); + function fstarpu_mpi_recv_detached (dh, src, data_tag, mpi_comm, callback, arg) bind(C) + use iso_c_binding + implicit none + integer(c_int) :: fstarpu_mpi_recv_detached + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: src + integer(c_int64_t), value, intent(in) :: data_tag + integer(c_int), value, intent(in) :: mpi_comm + type(c_funptr), value, intent(in) :: callback + type(c_ptr), value, intent(in) :: arg + end function fstarpu_mpi_recv_detached + + ! int starpu_mpi_issend(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm); + function fstarpu_mpi_issend (dh, mpi_req, dst, data_tag, mpi_comm) bind(C) + use iso_c_binding + implicit none + integer(c_int) :: fstarpu_mpi_issend + type(c_ptr), value, intent(in) :: dh + type(c_ptr), value, intent(in) :: mpi_req + integer(c_int), value, intent(in) :: dst + integer(c_int64_t), value, intent(in) :: data_tag + integer(c_int), value, intent(in) :: mpi_comm + end function fstarpu_mpi_issend + + ! int starpu_mpi_issend_prio(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm); + function fstarpu_mpi_issend_prio (dh, mpi_req, dst, data_tag, prio, mpi_comm) bind(C) + use iso_c_binding + implicit none + integer(c_int) :: fstarpu_mpi_issend_prio + type(c_ptr), value, intent(in) :: dh + type(c_ptr), value, intent(in) :: mpi_req + integer(c_int), value, intent(in) :: dst + integer(c_int64_t), value, intent(in) :: data_tag + integer(c_int), value, intent(in) :: prio + integer(c_int), value, intent(in) :: mpi_comm + end function fstarpu_mpi_issend_prio + + ! int starpu_mpi_issend_detached(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg); + function fstarpu_mpi_issend_detached (dh, dst, data_tag, mpi_comm, callback, arg) bind(C) + use iso_c_binding + implicit none + integer(c_int) :: fstarpu_mpi_issend_detached + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: dst + integer(c_int64_t), value, intent(in) :: data_tag + integer(c_int), value, intent(in) :: mpi_comm + type(c_funptr), value, intent(in) :: callback + type(c_ptr), value, intent(in) :: arg + end function fstarpu_mpi_issend_detached + + ! int starpu_mpi_issend_detached_prio(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm, void (*callback)(void *), void *arg); + function fstarpu_mpi_issend_detached_prio (dh, dst, data_tag, prio, mpi_comm, callback, arg) bind(C) + use iso_c_binding + implicit none + integer(c_int) :: fstarpu_mpi_issend_detached_prio + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: dst + integer(c_int64_t), value, intent(in) :: data_tag + integer(c_int), value, intent(in) :: prio + integer(c_int), value, intent(in) :: mpi_comm + type(c_funptr), value, intent(in) :: callback + type(c_ptr), value, intent(in) :: arg + end function fstarpu_mpi_issend_detached_prio + + ! int starpu_mpi_wait(starpu_mpi_req *req, MPI_Status *status); + function fstarpu_mpi_wait(req,st) bind(C,name="starpu_mpi_wait") + use iso_c_binding + implicit none + integer(c_int) :: fstarpu_mpi_wait + type(c_ptr), value, intent(in) :: req + type(c_ptr), value, intent(in) :: st + end function fstarpu_mpi_wait + + ! int starpu_mpi_test(starpu_mpi_req *req, int *flag, MPI_Status *status); + function fstarpu_mpi_test(req,flag,st) bind(C,name="starpu_mpi_test") + use iso_c_binding + implicit none + integer(c_int) :: fstarpu_mpi_test + type(c_ptr), value, intent(in) :: req + type(c_ptr), value, intent(in) :: flag + type(c_ptr), value, intent(in) :: st + end function fstarpu_mpi_test + + ! int starpu_mpi_barrier(MPI_Comm comm); + function fstarpu_mpi_barrier (mpi_comm) bind(C) + use iso_c_binding + implicit none + integer(c_int) :: fstarpu_mpi_barrier + integer(c_int), value, intent(in) :: mpi_comm + end function fstarpu_mpi_barrier + + ! int starpu_mpi_irecv_detached_sequential_consistency(starpu_data_handle_t data_handle, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg, int sequential_consistency); + function fstarpu_mpi_recv_detached_sequential_consistency (dh, src, data_tag, mpi_comm, callback, arg, seq_const) & + bind(C) + use iso_c_binding + implicit none + integer(c_int) :: fstarpu_mpi_recv_detached_sequential_consistency + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: src + integer(c_int64_t), value, intent(in) :: data_tag + integer(c_int), value, intent(in) :: mpi_comm + type(c_funptr), value, intent(in) :: callback + type(c_ptr), value, intent(in) :: arg + integer(c_int), value, intent(in) :: seq_const + end function fstarpu_mpi_recv_detached_sequential_consistency + + + ! int starpu_mpi_init_comm(int *argc, char ***argv, int initialize_mpi, MPI_Comm comm); + ! -> cf fstarpu_mpi_init + ! int starpu_mpi_init(int *argc, char ***argv, int initialize_mpi); + ! -> cf fstarpu_mpi_init + ! int starpu_mpi_initialize(void) STARPU_DEPRECATED; + ! -> cf fstarpu_mpi_init + ! int starpu_mpi_initialize_extended(int *rank, int *world_size) STARPU_DEPRECATED; + ! -> cf fstarpu_mpi_init + + ! int starpu_mpi_shutdown(void); + function fstarpu_mpi_shutdown () bind(C,name="starpu_mpi_shutdown") + use iso_c_binding + implicit none + integer(c_int) :: fstarpu_mpi_shutdown + end function fstarpu_mpi_shutdown + + ! struct starpu_task *starpu_mpi_task_build(MPI_Comm comm, struct starpu_codelet *codelet, ...); + function fstarpu_mpi_task_build(arglist) bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr) :: fstarpu_mpi_task_build + type(c_ptr), dimension(*), intent(in) :: arglist + end function fstarpu_mpi_task_build + + ! int starpu_mpi_task_post_build(MPI_Comm comm, struct starpu_codelet *codelet, ...); + subroutine fstarpu_mpi_task_post_build(arglist) bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr), dimension(*), intent(in) :: arglist + end subroutine fstarpu_mpi_task_post_build + + ! int starpu_mpi_task_insert(MPI_Comm comm, struct starpu_codelet *codelet, ...); + subroutine fstarpu_mpi_task_insert(arglist) bind(C) + use iso_c_binding, only: c_ptr + type(c_ptr), dimension(*), intent(in) :: arglist + end subroutine fstarpu_mpi_task_insert + subroutine fstarpu_mpi_insert_task(arglist) bind(C,name="fstarpu_mpi_task_insert") + use iso_c_binding, only: c_ptr + type(c_ptr), dimension(*), intent(in) :: arglist + end subroutine fstarpu_mpi_insert_task + + ! void starpu_mpi_get_data_on_node(MPI_Comm comm, starpu_data_handle_t data_handle, int node); + subroutine fstarpu_mpi_get_data_on_node(mpi_comm,dh,node) bind(C) + use iso_c_binding + implicit none + integer(c_int), value, intent(in) :: mpi_comm + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: node + end subroutine fstarpu_mpi_get_data_on_node + + ! void starpu_mpi_get_data_on_node_detached(MPI_Comm comm, starpu_data_handle_t data_handle, int node, void (*callback)(void*), void *arg); + subroutine fstarpu_mpi_get_data_on_node_detached(mpi_comm,dh,node,callback,arg) bind(C) + use iso_c_binding + implicit none + integer(c_int), value, intent(in) :: mpi_comm + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: node + type(c_funptr), value, intent(in) :: callback + type(c_ptr), value, intent(in) :: arg + end subroutine fstarpu_mpi_get_data_on_node_detached + + ! void starpu_mpi_redux_data(MPI_Comm comm, starpu_data_handle_t data_handle); + subroutine fstarpu_mpi_redux_data(mpi_comm,dh) bind(C) + use iso_c_binding + implicit none + integer(c_int), value, intent(in) :: mpi_comm + type(c_ptr), value, intent(in) :: dh + end subroutine fstarpu_mpi_redux_data + + ! void starpu_mpi_redux_data_prio(MPI_Comm comm, starpu_data_handle_t data_handle, int prio); + subroutine fstarpu_mpi_redux_data_prio(mpi_comm,dh, prio) bind(C) + use iso_c_binding + implicit none + integer(c_int), value, intent(in) :: mpi_comm + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: prio + end subroutine fstarpu_mpi_redux_data_prio + + ! void starpu_mpi_redux_data_tree(MPI_Comm comm, starpu_data_handle_t data_handle, int arity); + subroutine fstarpu_mpi_redux_data_tree(mpi_comm,dh, arity) bind(C) + use iso_c_binding + implicit none + integer(c_int), value, intent(in) :: mpi_comm + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: arity + end subroutine fstarpu_mpi_redux_data_tree + + ! void starpu_mpi_redux_data_prio_tree(MPI_Comm comm, starpu_data_handle_t data_handle, int prio, int arity); + subroutine fstarpu_mpi_redux_data_prio_tree(mpi_comm,dh, prio, arity) bind(C) + use iso_c_binding + implicit none + integer(c_int), value, intent(in) :: mpi_comm + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: prio + integer(c_int), value, intent(in) :: arity + end subroutine fstarpu_mpi_redux_data_prio_tree + + ! int starpu_mpi_scatter_detached(starpu_data_handle_t *data_handles, int count, int root, MPI_Comm comm, void (*scallback)(void *), void *sarg, void (*rcallback)(void *), void *rarg); + function fstarpu_mpi_scatter_detached (dhs, cnt, root, mpi_comm, scallback, sarg, rcallback, rarg) bind(C) + use iso_c_binding + implicit none + integer(c_int) :: fstarpu_mpi_scatter_detached + type(c_ptr), intent(in) :: dhs(*) + integer(c_int), value, intent(in) :: cnt + integer(c_int), value, intent(in) :: root + integer(c_int), value, intent(in) :: mpi_comm + type(c_funptr), value, intent(in) :: scallback + type(c_ptr), value, intent(in) :: sarg + type(c_funptr), value, intent(in) :: rcallback + type(c_ptr), value, intent(in) :: rarg + end function fstarpu_mpi_scatter_detached + + ! int starpu_mpi_gather_detached(starpu_data_handle_t *data_handles, int count, int root, MPI_Comm comm, void (*scallback)(void *), void *sarg, void (*rcallback)(void *), void *rarg); + function fstarpu_mpi_gather_detached (dhs, cnt, root, mpi_comm, scallback, sarg, rcallback, rarg) bind(C) + use iso_c_binding + implicit none + integer(c_int) :: fstarpu_mpi_gather_detached + type(c_ptr), intent(in) :: dhs(*) + integer(c_int), value, intent(in) :: cnt + integer(c_int), value, intent(in) :: root + integer(c_int), value, intent(in) :: mpi_comm + type(c_funptr), value, intent(in) :: scallback + type(c_ptr), value, intent(in) :: sarg + type(c_funptr), value, intent(in) :: rcallback + type(c_ptr), value, intent(in) :: rarg + end function fstarpu_mpi_gather_detached + + + ! int starpu_mpi_isend_detached_unlock_tag(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm, starpu_tag_t tag); + function fstarpu_mpi_isend_detached_unlock_tag (dh, dst, data_tag, mpi_comm, starpu_tag) bind(C) + use iso_c_binding + implicit none + integer(c_int) :: fstarpu_mpi_isend_detached_unlock_tag + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: dst + integer(c_int64_t), value, intent(in) :: data_tag + integer(c_int), value, intent(in) :: mpi_comm + type(c_ptr), value, intent(in) :: starpu_tag + end function fstarpu_mpi_isend_detached_unlock_tag + + ! int starpu_mpi_isend_detached_unlock_tag_prio(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm, starpu_tag_t tag); + function fstarpu_mpi_isend_detached_unlock_tag_prio (dh, dst, data_tag, prio, mpi_comm, starpu_tag) bind(C) + use iso_c_binding + implicit none + integer(c_int) :: fstarpu_mpi_isend_detached_unlock_tag_prio + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: dst + integer(c_int64_t), value, intent(in) :: data_tag + integer(c_int), value, intent(in) :: prio + integer(c_int), value, intent(in) :: mpi_comm + type(c_ptr), value, intent(in) :: starpu_tag + end function fstarpu_mpi_isend_detached_unlock_tag_prio + + ! int starpu_mpi_irecv_detached_unlock_tag(starpu_data_handle_t data_handle, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm, starpu_tag_t tag); + function fstarpu_mpi_recv_detached_unlock_tag (dh, src, data_tag, mpi_comm, starpu_tag) bind(C) + use iso_c_binding + implicit none + integer(c_int) :: fstarpu_mpi_recv_detached_unlock_tag + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: src + integer(c_int64_t), value, intent(in) :: data_tag + integer(c_int), value, intent(in) :: mpi_comm + type(c_ptr), value, intent(in) :: starpu_tag + end function fstarpu_mpi_recv_detached_unlock_tag + + ! int starpu_mpi_isend_array_detached_unlock_tag(unsigned array_size, starpu_data_handle_t *data_handle, int *dest, int *data_tag, MPI_Comm *comm, starpu_tag_t tag); + function fstarpu_mpi_isend_array_detached_unlock_tag (array_size, dhs, dsts, data_tags, mpi_comms, starpu_tag) & + bind(C) + use iso_c_binding + implicit none + integer(c_int) :: fstarpu_mpi_isend_array_detached_unlock_tag + integer(c_int), value, intent(in) :: array_size + type(c_ptr), intent(in) :: dhs(*) + integer(c_int), intent(in) :: dsts(*) + integer(c_int64_t), intent(in) :: data_tags(*) + integer(c_int), intent(in) :: mpi_comms(*) + type(c_ptr), value, intent(in) :: starpu_tag + end function fstarpu_mpi_isend_array_detached_unlock_tag + + ! int starpu_mpi_isend_array_detached_unlock_tag_prio(unsigned array_size, starpu_data_handle_t *data_handle, int *dest, int *data_tag, int *prio, MPI_Comm *comm, starpu_tag_t tag); + function fstarpu_mpi_isend_array_detached_unlock_tag_prio (array_size, dhs, dsts, data_tags, prio, mpi_comms, & + starpu_tag) bind(C) + use iso_c_binding + implicit none + integer(c_int) :: fstarpu_mpi_isend_array_detached_unlock_tag_prio + integer(c_int), value, intent(in) :: array_size + type(c_ptr), intent(in) :: dhs(*) + integer(c_int), intent(in) :: dsts(*) + integer(c_int64_t), intent(in) :: data_tags(*) + integer(c_int), intent(in) :: prio(*) + integer(c_int), intent(in) :: mpi_comms(*) + type(c_ptr), value, intent(in) :: starpu_tag + end function fstarpu_mpi_isend_array_detached_unlock_tag_prio + + ! int starpu_mpi_irecv_array_detached_unlock_tag(unsigned array_size, starpu_data_handle_t *data_handle, int *source, int *data_tag, MPI_Comm *comm, starpu_tag_t tag); + function fstarpu_mpi_recv_array_detached_unlock_tag (array_size, dhs, srcs, data_tags, mpi_comms, starpu_tag) & + bind(C) + use iso_c_binding + implicit none + integer(c_int) :: fstarpu_mpi_recv_array_detached_unlock_tag + integer(c_int), value, intent(in) :: array_size + type(c_ptr), intent(in) :: dhs(*) + integer(c_int), intent(in) :: srcs(*) + integer(c_int64_t), intent(in) :: data_tags(*) + integer(c_int), intent(in) :: mpi_comms(*) + type(c_ptr), value, intent(in) :: starpu_tag + end function fstarpu_mpi_recv_array_detached_unlock_tag + + ! void starpu_mpi_comm_stats_retrieve(size_t *comm_stats); + subroutine fstarpu_mpi_comm_stats_retrieve (comm_stats) bind(C,name="starpu_mpi_comm_stats_retrieve") + use iso_c_binding + implicit none + integer(c_size_t), intent(in) :: comm_stats(*) + end subroutine fstarpu_mpi_comm_stats_retrieve + + + ! void starpu_mpi_cache_flush(MPI_Comm comm, starpu_data_handle_t data_handle); + subroutine fstarpu_mpi_cache_flush(mpi_comm,dh) bind(C) + use iso_c_binding + implicit none + integer(c_int), value, intent(in) :: mpi_comm + type(c_ptr), value, intent(in) :: dh + end subroutine fstarpu_mpi_cache_flush + + ! void starpu_mpi_cache_flush_all_data(MPI_Comm comm); + subroutine fstarpu_mpi_cache_flush_all_data(mpi_comm) bind(C) + use iso_c_binding + implicit none + integer(c_int), value, intent(in) :: mpi_comm + end subroutine fstarpu_mpi_cache_flush_all_data + + ! int starpu_mpi_comm_size(MPI_Comm comm, int *size); + function fstarpu_mpi_comm_size(mpi_comm,sz) bind(C) + use iso_c_binding + implicit none + integer(c_int), value, intent(in) :: mpi_comm + integer(c_int), intent(out) :: sz + integer(c_int) :: fstarpu_mpi_comm_size + end function fstarpu_mpi_comm_size + + ! int starpu_mpi_comm_rank(MPI_Comm comm, int *rank); + function fstarpu_mpi_comm_rank(mpi_comm,rank) bind(C) + use iso_c_binding + implicit none + integer(c_int), value, intent(in) :: mpi_comm + integer(c_int), intent(out) :: rank + integer(c_int) :: fstarpu_mpi_comm_rank + end function fstarpu_mpi_comm_rank + + + ! int starpu_mpi_world_rank(void); + function fstarpu_mpi_world_rank() bind(C,name="starpu_mpi_world_rank") + use iso_c_binding + implicit none + integer(c_int) :: fstarpu_mpi_world_rank + end function fstarpu_mpi_world_rank + + ! int starpu_mpi_world_size(void); + function fstarpu_mpi_world_size() bind(C,name="starpu_mpi_world_size") + use iso_c_binding + implicit none + integer(c_int) :: fstarpu_mpi_world_size + end function fstarpu_mpi_world_size + + ! int starpu_mpi_world_size(void); + function fstarpu_mpi_world_comm() bind(C) + use iso_c_binding + implicit none + integer(c_int) :: fstarpu_mpi_world_comm + end function fstarpu_mpi_world_comm + + ! void starpu_mpi_comm_stats_enable() + subroutine fstarpu_mpi_comm_stats_enable() bind(C) + use iso_c_binding + implicit none + end subroutine fstarpu_mpi_comm_stats_enable + + ! void starpu_mpi_comm_stats_disable() + subroutine fstarpu_mpi_comm_stats_disable() bind(C) + use iso_c_binding + implicit none + end subroutine fstarpu_mpi_comm_stats_disable + + ! int starpu_mpi_get_communication_tag(void); + function fstarpu_mpi_get_communication_tag() bind(C,name="starpu_mpi_get_communication_tag") + use iso_c_binding + implicit none + integer(c_int) :: fstarpu_mpi_get_communication_tag + end function fstarpu_mpi_get_communication_tag + + ! void starpu_mpi_set_communication_tag(int tag); + subroutine fstarpu_mpi_set_communication_tag(tag) bind(C,name="starpu_mpi_set_communication_tag") + use iso_c_binding + implicit none + integer(c_int64_t), value, intent(in) :: tag + end subroutine fstarpu_mpi_set_communication_tag + + ! void starpu_mpi_data_register_comm(starpu_data_handle_t data_handle, int tag, int rank, MPI_Comm comm); + subroutine fstarpu_mpi_data_register_comm(dh,tag,rank,mpi_comm) bind(C) + use iso_c_binding + implicit none + type(c_ptr), value, intent(in) :: dh + integer(c_int64_t), value, intent(in) :: tag + integer(c_int), value, intent(in) :: rank + integer(c_int), value, intent(in) :: mpi_comm + end subroutine fstarpu_mpi_data_register_comm + + ! #define starpu_mpi_data_register(data_handle, tag, rank) starpu_mpi_data_register_comm(data_handle, tag, rank, MPI_COMM_WORLD) + subroutine fstarpu_mpi_data_register(dh,tag,rank) bind(C) + use iso_c_binding + implicit none + type(c_ptr), value, intent(in) :: dh + integer(c_int64_t), value, intent(in) :: tag + integer(c_int), value, intent(in) :: rank + end subroutine fstarpu_mpi_data_register + + ! void starpu_mpi_data_set_rank_comm(starpu_data_handle_t handle, int rank, MPI_Comm comm); + subroutine fstarpu_mpi_data_set_rank_comm(dh,rank,mpi_comm) bind(C) + use iso_c_binding + implicit none + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: rank + integer(c_int), value, intent(in) :: mpi_comm + end subroutine fstarpu_mpi_data_set_rank_comm + + ! #define starpu_mpi_data_set_rank(handle, rank) starpu_mpi_data_set_rank_comm(handle, rank, MPI_COMM_WORLD) + subroutine fstarpu_mpi_data_set_rank(dh,rank) bind(C) + use iso_c_binding + implicit none + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: rank + end subroutine fstarpu_mpi_data_set_rank + + ! void starpu_mpi_data_set_tag(starpu_data_handle_t handle, int tag); + subroutine fstarpu_mpi_data_set_tag(dh,tag) bind(C,name="starpu_mpi_data_set_tag") + use iso_c_binding + implicit none + type(c_ptr), value, intent(in) :: dh + integer(c_int64_t), value, intent(in) :: tag + end subroutine fstarpu_mpi_data_set_tag + + ! int starpu_mpi_data_get_rank(starpu_data_handle_t handle); + function fstarpu_mpi_data_get_rank(dh) bind(C,name="starpu_mpi_data_get_rank") + use iso_c_binding + implicit none + integer(c_int) :: fstarpu_mpi_data_get_rank + type(c_ptr), value, intent(in) :: dh + end function fstarpu_mpi_data_get_rank + + ! int starpu_mpi_data_get_tag(starpu_data_handle_t handle); + function fstarpu_mpi_data_get_tag(dh) bind(C,name="starpu_mpi_data_get_tag") + use iso_c_binding + implicit none + integer(c_int64_t) :: fstarpu_mpi_data_get_tag + type(c_ptr), value, intent(in) :: dh + end function fstarpu_mpi_data_get_tag + + ! void starpu_mpi_data_migrate(MPI_Comm comm, starpu_data_handle_t handle, int rank); + subroutine fstarpu_mpi_data_migrate(mpi_comm,dh,rank) bind(C) + use iso_c_binding + implicit none + integer(c_int), value, intent(in) :: mpi_comm + type(c_ptr), value, intent(in) :: dh + integer(c_int), value, intent(in) :: rank + end subroutine fstarpu_mpi_data_migrate + + ! #define STARPU_MPI_NODE_SELECTION_CURRENT_POLICY -1 + ! #define STARPU_MPI_NODE_SELECTION_MOST_R_DATA 0 + + ! int starpu_mpi_node_selection_register_policy(starpu_mpi_select_node_policy_func_t policy_func); + function fstarpu_mpi_node_selection_register_policy(policy_func) & + bind(C,name="starpu_mpi_node_selection_register_policy") + use iso_c_binding + implicit none + integer(c_int) :: fstarpu_mpi_node_selection_register_policy + type(c_funptr), value, intent(in) :: policy_func + end function fstarpu_mpi_node_selection_register_policy + + ! int starpu_mpi_node_selection_unregister_policy(int policy); + function fstarpu_mpi_node_selection_unregister_policy(policy) & + bind(C,name="starpu_mpi_node_selection_unregister_policy") + use iso_c_binding + implicit none + integer(c_int) :: fstarpu_mpi_node_selection_unregister_policy + type(c_ptr), value, intent(in) :: policy + end function fstarpu_mpi_node_selection_unregister_policy + + ! int starpu_mpi_node_selection_get_current_policy(); + function fstarpu_mpi_data_selection_get_current_policy() & + bind(C,name="starpu_mpi_data_selection_get_current_policy") + use iso_c_binding + implicit none + integer(c_int) :: fstarpu_mpi_data_selection_get_current_policy + end function fstarpu_mpi_data_selection_get_current_policy + + ! int starpu_mpi_node_selection_set_current_policy(int policy); + function fstarpu_mpi_data_selection_set_current_policy(policy) & + bind(C,name="starpu_mpi_data_selection_set_current_policy") + use iso_c_binding + implicit none + integer(c_int) :: fstarpu_mpi_data_selection_set_current_policy + type(c_ptr), value, intent(in) :: policy + end function fstarpu_mpi_data_selection_set_current_policy + + ! int starpu_mpi_cache_is_enabled(); + function fstarpu_mpi_cache_is_enabled() bind(C,name="starpu_mpi_cache_is_enabled") + use iso_c_binding + implicit none + integer(c_int) :: fstarpu_mpi_cache_is_enabled + end function fstarpu_mpi_cache_is_enabled + + ! int starpu_mpi_cache_set(int enabled); + function fstarpu_mpi_cache_set(enabled) bind(C,name="starpu_mpi_cache_set") + use iso_c_binding + implicit none + integer(c_int) :: fstarpu_mpi_cache_set + integer(c_int), value, intent(in) :: enabled + end function fstarpu_mpi_cache_set + + ! int starpu_mpi_wait_for_all(MPI_Comm comm); + function fstarpu_mpi_wait_for_all (mpi_comm) bind(C) + use iso_c_binding + implicit none + integer(c_int) :: fstarpu_mpi_wait_for_all + integer(c_int), value, intent(in) :: mpi_comm + end function fstarpu_mpi_wait_for_all + + ! int starpu_mpi_datatype_register(starpu_data_handle_t handle, starpu_mpi_datatype_allocate_func_t allocate_datatype_func, starpu_mpi_datatype_free_func_t free_datatype_func); + function fstarpu_mpi_datatype_register(dh, alloc_func, free_func) bind(C,name="starpu_mpi_datatype_register") + use iso_c_binding + implicit none + integer(c_int) :: fstarpu_mpi_datatype_register + type(c_ptr), value, intent(in) :: dh + type(c_funptr), value, intent(in) :: alloc_func + type(c_funptr), value, intent(in) :: free_func + end function fstarpu_mpi_datatype_register + + ! int starpu_mpi_datatype_unregister(starpu_data_handle_t handle); + function fstarpu_mpi_datatype_unregister(dh) bind(C,name="starpu_mpi_datatype_unregister") + use iso_c_binding + implicit none + integer(c_int) :: fstarpu_mpi_datatype_unregister + type(c_ptr), value, intent(in) :: dh + end function fstarpu_mpi_datatype_unregister + + + function fstarpu_mpi_req_alloc() bind(C) + use iso_c_binding + implicit none + type(c_ptr) :: fstarpu_mpi_req_alloc + end function fstarpu_mpi_req_alloc + + subroutine fstarpu_mpi_req_free(req) bind(C) + use iso_c_binding + implicit none + type(c_ptr),value,intent(in) :: req + end subroutine fstarpu_mpi_req_free + + function fstarpu_mpi_status_alloc() bind(C) + use iso_c_binding + implicit none + type(c_ptr) :: fstarpu_mpi_status_alloc + end function fstarpu_mpi_status_alloc + + subroutine fstarpu_mpi_status_free(st) bind(C) + use iso_c_binding + implicit none + type(c_ptr),value,intent(in) :: st + end subroutine fstarpu_mpi_status_free + + + + end interface + + contains + function fstarpu_mpi_init (initialize_mpi,mpi_comm) bind(C) + use iso_c_binding + implicit none + integer(c_int) :: fstarpu_mpi_init + integer(c_int), intent(in) :: initialize_mpi + integer(c_int), optional, intent(in) :: mpi_comm + type(c_ptr) :: argcv + integer(c_int) :: fargc,i,farg_len + character(len=1) :: farg_1 + character(len=:), allocatable :: farg + integer(c_int) :: mpi_comm_present, mpi_comm_or_0 + integer(c_int) :: ret + + interface + function fstarpu_mpi_argcv_alloc(argc, initialize_mpi, comm_present, comm) bind(C) + use iso_c_binding + implicit none + type(c_ptr) :: fstarpu_mpi_argcv_alloc + integer(c_int),value,intent(in) :: argc + integer(c_int),value,intent(in) :: initialize_mpi + integer(c_int),value,intent(in) :: comm_present + integer(c_int),value,intent(in) :: comm + end function fstarpu_mpi_argcv_alloc + + subroutine fstarpu_mpi_argcv_set_arg(argcv, i, l, s) bind(C) + use iso_c_binding + implicit none + type(c_ptr),value,intent(in) :: argcv + integer(c_int),value,intent(in) :: i + integer(c_int),value,intent(in) :: l + character(c_char),intent(in) :: s + end subroutine fstarpu_mpi_argcv_set_arg + + subroutine fstarpu_mpi_argcv_free(argcv) bind(C) + use iso_c_binding + implicit none + type(c_ptr),value,intent(in) :: argcv + end subroutine fstarpu_mpi_argcv_free + + function fstarpu_mpi_init_c(argcv) bind(C) + use iso_c_binding + implicit none + integer(c_int) :: fstarpu_mpi_init_c + type(c_ptr),value,intent(in) :: argcv + end function fstarpu_mpi_init_c + end interface + + fargc = command_argument_count() + !write(*,*) "fargc",fargc + + if (present(mpi_comm)) then + mpi_comm_present = 1 + mpi_comm_or_0 = mpi_comm + else + mpi_comm_present = 0 + mpi_comm_or_0 = 0 + end if + !write(*,*) "initialize_mpi",initialize_mpi + !write(*,*) "mpi_comm_present",mpi_comm_present + argcv = fstarpu_mpi_argcv_alloc(fargc, initialize_mpi, mpi_comm_present, mpi_comm_or_0) + do i=0,fargc-1 + call get_command_argument(i, farg_1, farg_len) + allocate (character(len=farg_len) :: farg) + call get_command_argument(i, farg) + call fstarpu_mpi_argcv_set_arg(argcv, i, farg_len, farg) + deallocate (farg) + end do + ret = fstarpu_mpi_init_c(argcv) + call fstarpu_mpi_argcv_free(argcv) + fstarpu_mpi_init = ret + end function fstarpu_mpi_init + +end module fstarpu_mpi_mod diff --git a/mpi/include/starpu_mpi.h b/mpi/include/starpu_mpi.h new file mode 100644 index 0000000..386135b --- /dev/null +++ b/mpi/include/starpu_mpi.h @@ -0,0 +1,1026 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2021-2021 Federal University of Rio Grande do Sul (UFRGS) + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __STARPU_MPI_H__ +#define __STARPU_MPI_H__ + +#include + +#if defined(STARPU_USE_MPI) + +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + @defgroup API_MPI_Support MPI Support + @{ +*/ + +/** + @name Initialisation + @{ +*/ + +/** + Initialize the StarPU library with the given \p conf, and + initialize the StarPU-MPI library with the given MPI communicator + \p comm. \p initialize_mpi indicates if MPI should be initialized + or not by StarPU. StarPU-MPI takes the opportunity to modify \p + conf to either reserve a core for its MPI thread (by default), or + execute MPI calls on the CPU driver 0 between tasks. +*/ +int starpu_mpi_init_conf(int *argc, char ***argv, int initialize_mpi, MPI_Comm comm, struct starpu_conf *conf); + +/** + Same as starpu_mpi_init_conf(), except that this does not initialize the + StarPU library. The caller thus has to call starpu_init() before this, and it + can not reserve a core for the MPI communications. +*/ +int starpu_mpi_init_comm(int *argc, char ***argv, int initialize_mpi, MPI_Comm comm); + +/** + Call starpu_mpi_init_comm() with the MPI communicator \c MPI_COMM_WORLD. +*/ +int starpu_mpi_init(int *argc, char ***argv, int initialize_mpi); + +/** + @deprecated + This function has been made deprecated. One should use instead the + function starpu_mpi_init(). This function does not call \c + MPI_Init(), it should be called beforehand. +*/ +int starpu_mpi_initialize(void) STARPU_DEPRECATED; + +/** + @deprecated + This function has been made deprecated. One should use instead the + function starpu_mpi_init(). MPI will be initialized by starpumpi by + calling MPI_Init_Thread(argc, argv, MPI_THREAD_SERIALIZED, + ...). +*/ +int starpu_mpi_initialize_extended(int *rank, int *world_size) STARPU_DEPRECATED; + +/** + Call starpu_mpi_shutdown_comm() with the MPI communicator \c MPI_COMM_WORLD +*/ +int starpu_mpi_shutdown(void); + +/** + Clean the starpumpi library. This must be called after calling any + \c starpu_mpi functions and before the call to starpu_shutdown(), + if any. \c MPI_Finalize() will be called if StarPU-MPI has been + initialized by starpu_mpi_init(). +*/ +int starpu_mpi_shutdown_comm(MPI_Comm comm); + +/** + Register \p comm. The function is automatically called for the + communicator given to starpu_mpi_init_comm(). +*/ +int starpu_mpi_comm_register(MPI_Comm comm); + +/** + Return in \p size the size of the communicator \p comm. The + function will fail if starpu_mpi_comm_register() has not been + previously called with the given communicator. +*/ +int starpu_mpi_comm_size(MPI_Comm comm, int *size); + +/** + Return in \p rank the rank of the calling process in the + communicator \p comm. The function will fail if + starpu_mpi_comm_register() has not been previously called with the + given communicator. +*/ +int starpu_mpi_comm_rank(MPI_Comm comm, int *rank); + +/** + Return the rank of the calling process in the communicator \c + MPI_COMM_WORLD +*/ +int starpu_mpi_world_rank(void); + +/** + Return the size of the communicator \c MPI_COMM_WORLD +*/ +int starpu_mpi_world_size(void); + +/** + When given to the function starpu_mpi_comm_get_attr(), retrieve the + value for the upper bound for tag value. +*/ +#define STARPU_MPI_TAG_UB MPI_TAG_UB + +/** + Retrieve an attribute value by key, similarly to the MPI function + \c MPI_comm_get_attr(), except that the value is a pointer to + int64_t instead of int. If an attribute is attached on \p comm to + \p keyval, then the call returns \p flag equal to \c 1, and the + attribute value in \p attribute_val. Otherwise, \p flag is set to + \0. +*/ +int starpu_mpi_comm_get_attr(MPI_Comm comm, int keyval, void *attribute_val, int *flag); + +/** + Get the logical index of the core where the MPI thread is bound. +*/ +int starpu_mpi_get_thread_cpuid(void); + +/** + Get the tag used for MPI communications submitted by StarPU. +*/ +int starpu_mpi_get_communication_tag(void); + +/** + Set the tag used for MPI communications submitted by StarPU. +*/ +void starpu_mpi_set_communication_tag(int tag); + +/** @} */ + +/** + @name Communication + \anchor MPIPtpCommunication + @{ +*/ + +/** + Opaque type for communication request +*/ +typedef void *starpu_mpi_req; + +/** + Type of the message tag. +*/ +typedef int64_t starpu_mpi_tag_t; + +/** + Post a standard-mode, non blocking send of \p data_handle to the + node \p dest using the message tag \p data_tag within the + communicator \p comm. After the call, the pointer to the request \p + req can be used to test or to wait for the completion of the + communication. +*/ +int starpu_mpi_isend(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm); + +/** + Similar to starpu_mpi_isend(), but take a priority \p prio. +*/ +int starpu_mpi_isend_prio(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm); + +/** + Post a nonblocking receive in \p data_handle from the node \p + source using the message tag \p data_tag within the communicator \p + comm. After the call, the pointer to the request \p req can be used + to test or to wait for the completion of the communication. +*/ +int starpu_mpi_irecv(starpu_data_handle_t data_handle, starpu_mpi_req *req, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm); + +/** + Perform a standard-mode, blocking send of \p data_handle to the + node \p dest using the message tag \p data_tag within the + communicator \p comm. +*/ +int starpu_mpi_send(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm); + +/** + Similar to starpu_mpi_send(), but take a priority \p prio. +*/ +int starpu_mpi_send_prio(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm); + +/** + Perform a standard-mode, blocking receive in \p data_handle from + the node \p source using the message tag \p data_tag within the + communicator \p comm. + The value of \p status cannot be NULL, use the predefined value + MPI_STATUS_IGNORE to ignore the status. +*/ +int starpu_mpi_recv(starpu_data_handle_t data_handle, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm, MPI_Status *status); + +/** + Similar to starpu_mpi_recv(), but take a priority \p prio +*/ +int starpu_mpi_recv_prio(starpu_data_handle_t data_handle, int source, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm, MPI_Status *status); + +/** + Post a standard-mode, non blocking send of \p data_handle to the + node \p dest using the message tag \p data_tag within the + communicator \p comm. On completion, the \p callback function is + called with the argument \p arg. + Similarly to the pthread detached functionality, when a detached + communication completes, its resources are automatically released + back to the system, there is no need to test or to wait for the + completion of the request. +*/ +int starpu_mpi_isend_detached(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg); + +/** + Similar to starpu_mpi_isend_detached(), but take a priority \p prio. +*/ +int starpu_mpi_isend_detached_prio(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm, void (*callback)(void *), void *arg); + +/** + Post a nonblocking receive in \p data_handle from the node \p + source using the message tag \p data_tag within the communicator \p + comm. On completion, the \p callback function is called with the + argument \p arg. + Similarly to the pthread detached functionality, when a detached + communication completes, its resources are automatically released + back to the system, there is no need to test or to wait for the + completion of the request. +*/ +int starpu_mpi_irecv_detached(starpu_data_handle_t data_handle, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg); + +/** + Same of starpu_mpi_irecv_detached but with the \p prio parameter. +*/ +int starpu_mpi_irecv_detached_prio(starpu_data_handle_t data_handle, int source, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm, void (*callback)(void *), void *arg); + +/** + Post a nonblocking receive in \p data_handle from the node \p + source using the message tag \p data_tag within the communicator \p + comm. On completion, the \p callback function is called with the + argument \p arg. + The parameter \p sequential_consistency allows to enable or disable + the sequential consistency for \p data handle (sequential + consistency will be enabled or disabled based on the value of the + parameter \p sequential_consistency and the value of the sequential + consistency defined for \p data_handle). + Similarly to the pthread detached functionality, when a detached + communication completes, its resources are automatically released + back to the system, there is no need to test or to wait for the + completion of the request. +*/ +int starpu_mpi_irecv_detached_sequential_consistency(starpu_data_handle_t data_handle, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg, int sequential_consistency); + +/** + Perform a synchronous-mode, non-blocking send of \p data_handle to + the node \p dest using the message tag \p data_tag within the + communicator \p comm. +*/ +int starpu_mpi_issend(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm); + +/** + Similar to starpu_mpi_issend(), but take a priority \p prio. +*/ +int starpu_mpi_issend_prio(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm); + +/** + Perform a synchronous-mode, non-blocking send of \p data_handle to + the node \p dest using the message tag \p data_tag within the + communicator \p comm. On completion, the \p callback function is + called with the argument \p arg. + Similarly to the pthread detached functionality, when a detached + communication completes, its resources are automatically released + back to the system, there is no need to test or to wait for the + completion of the request. +*/ +int starpu_mpi_issend_detached(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg); + +/** + Similar to starpu_mpi_issend_detached(), but take a priority \p prio. +*/ +int starpu_mpi_issend_detached_prio(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm, void (*callback)(void *), void *arg); + +/** + Return when the operation identified by request \p req is complete. + The value of \p status cannot be NULL, use the predefined value + MPI_STATUS_IGNORE to ignore the status. +*/ +int starpu_mpi_wait(starpu_mpi_req *req, MPI_Status *status); + +/** + If the operation identified by \p req is complete, set \p flag to + 1. The \p status object is set to contain information on the + completed operation. +*/ +int starpu_mpi_test(starpu_mpi_req *req, int *flag, MPI_Status *status); + +/** + Block the caller until all group members of the communicator \p + comm have called it. +*/ +int starpu_mpi_barrier(MPI_Comm comm); + +/** + Wait until all StarPU tasks and communications for the given + communicator are completed. +*/ +int starpu_mpi_wait_for_all(MPI_Comm comm); + +/** + Post a standard-mode, non blocking send of \p data_handle to the + node \p dest using the message tag \p data_tag within the + communicator \p comm. On completion, \p tag is unlocked. +*/ +int starpu_mpi_isend_detached_unlock_tag(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm, starpu_tag_t tag); + +/** + Similar to starpu_mpi_isend_detached_unlock_tag(), but take a + priority \p prio. +*/ +int starpu_mpi_isend_detached_unlock_tag_prio(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm, starpu_tag_t tag); + +/** + Post a nonblocking receive in \p data_handle from the node \p + source using the message tag \p data_tag within the communicator \p + comm. On completion, \p tag is unlocked. +*/ +int starpu_mpi_irecv_detached_unlock_tag(starpu_data_handle_t data_handle, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm, starpu_tag_t tag); + +/** + Post \p array_size standard-mode, non blocking send. Each post + sends the n-th data of the array \p data_handle to the n-th node of + the array \p dest using the n-th message tag of the array \p + data_tag within the n-th communicator of the array \p comm. On + completion of the all the requests, \p tag is unlocked. +*/ +int starpu_mpi_isend_array_detached_unlock_tag(unsigned array_size, starpu_data_handle_t *data_handle, int *dest, starpu_mpi_tag_t *data_tag, MPI_Comm *comm, starpu_tag_t tag); + +/** + Similar to starpu_mpi_isend_array_detached_unlock_tag(), but take a + priority \p prio. +*/ +int starpu_mpi_isend_array_detached_unlock_tag_prio(unsigned array_size, starpu_data_handle_t *data_handle, int *dest, starpu_mpi_tag_t *data_tag, int *prio, MPI_Comm *comm, starpu_tag_t tag); + +/** + Post \p array_size nonblocking receive. Each post receives in the + n-th data of the array \p data_handle from the n-th node of the + array \p source using the n-th message tag of the array \p data_tag + within the n-th communicator of the array \p comm. On completion of + the all the requests, \p tag is unlocked. +*/ +int starpu_mpi_irecv_array_detached_unlock_tag(unsigned array_size, starpu_data_handle_t *data_handle, int *source, starpu_mpi_tag_t *data_tag, MPI_Comm *comm, starpu_tag_t tag); + +typedef int (*starpu_mpi_datatype_allocate_func_t)(starpu_data_handle_t, MPI_Datatype *); +typedef int (*starpu_mpi_datatype_node_allocate_func_t)(starpu_data_handle_t, unsigned node, MPI_Datatype *); +typedef void (*starpu_mpi_datatype_free_func_t)(MPI_Datatype *); + +/** + Register functions to create and free a MPI datatype for the given + handle. + Similar to starpu_mpi_interface_datatype_register(). + It is important that the function is called before any + communication can take place for a data with the given handle. See + \ref ExchangingUserDefinedDataInterface for an example. +*/ +int starpu_mpi_datatype_register(starpu_data_handle_t handle, starpu_mpi_datatype_allocate_func_t allocate_datatype_func, starpu_mpi_datatype_free_func_t free_datatype_func); + +/** + Register functions to create and free a MPI datatype for the given + interface id. + Similar to starpu_mpi_datatype_register(). + It is important that the function is called before any + communication can take place for a data with the given handle. See + \ref ExchangingUserDefinedDataInterface for an example. +*/ +int starpu_mpi_interface_datatype_register(enum starpu_data_interface_id id, starpu_mpi_datatype_allocate_func_t allocate_datatype_func, starpu_mpi_datatype_free_func_t free_datatype_func); + +/** + Register functions to create and free a MPI datatype for the given + handle. + Similar to starpu_mpi_interface_datatype_register(). + It is important that the function is called before any + communication can take place for a data with the given handle. See + \ref ExchangingUserDefinedDataInterface for an example. +*/ +int starpu_mpi_datatype_node_register(starpu_data_handle_t handle, starpu_mpi_datatype_node_allocate_func_t allocate_datatype_func, starpu_mpi_datatype_free_func_t free_datatype_func); + +/** + Register functions to create and free a MPI datatype for the given + interface id. + Similar to starpu_mpi_datatype_register(). + It is important that the function is called before any + communication can take place for a data with the given handle. See + \ref ExchangingUserDefinedDataInterface for an example. +*/ +int starpu_mpi_interface_datatype_node_register(enum starpu_data_interface_id id, starpu_mpi_datatype_node_allocate_func_t allocate_datatype_func, starpu_mpi_datatype_free_func_t free_datatype_func); + +/** + Unregister the MPI datatype functions stored for the interface of + the given handle. +*/ +int starpu_mpi_datatype_unregister(starpu_data_handle_t handle); + +/** + Unregister the MPI datatype functions stored for the interface of + the given interface id. Similar to starpu_mpi_datatype_unregister(). +*/ +int starpu_mpi_interface_datatype_unregister(enum starpu_data_interface_id id); + +/** @} */ + +/** + @name Communication Cache + @{ +*/ + +/** + Return 1 if the communication cache is enabled, 0 otherwise +*/ +int starpu_mpi_cache_is_enabled(void); + +/** + If \p enabled is 1, enable the communication cache. Otherwise, + clean the cache if it was enabled and disable it. +*/ +int starpu_mpi_cache_set(int enabled); + +/** + Clear the send and receive communication cache for the data \p + data_handle and invalidate the value. The function has to be called + at the same point of task graph submission by all the MPI nodes on + which the handle was registered. The function does nothing if the + cache mechanism is disabled (see \ref STARPU_MPI_CACHE). +*/ +void starpu_mpi_cache_flush(MPI_Comm comm, starpu_data_handle_t data_handle); + +/** + Clear the send and receive communication cache for all data and + invalidate their values. The function has to be called at the same + point of task graph submission by all the MPI nodes. The function + does nothing if the cache mechanism is disabled (see \ref + STARPU_MPI_CACHE). +*/ +void starpu_mpi_cache_flush_all_data(MPI_Comm comm); + +/** + Test whether \p data_handle is cached for reception, i.e. the value + was previously received from the owner node, and not flushed since + then. +*/ +int starpu_mpi_cached_receive(starpu_data_handle_t data_handle); + +/** + * If \p data is already available in the reception cache, return 1 + * If \p data is NOT available in the reception cache, add it to the + * cache and return 0 + * Return 0 if the communication cache is not enabled + */ +int starpu_mpi_cached_receive_set(starpu_data_handle_t data); +int starpu_mpi_cached_cp_receive_set(starpu_data_handle_t data_handle); + +/** + * Remove \p data from the reception cache + */ +void starpu_mpi_cached_receive_clear(starpu_data_handle_t data); + +/** + Test whether \p data_handle is cached for emission to node \p dest, + i.e. the value was previously sent to \p dest, and not flushed + since then. +*/ +int starpu_mpi_cached_send(starpu_data_handle_t data_handle, int dest); + +/** + * If \p data is already available in the emission cache for node + * \p dest, return 1 + * If \p data is NOT available in the emission cache for node \p dest, + * add it to the cache and return 0 + * Return 0 if the communication cache is not enabled + */ +int starpu_mpi_cached_send_set(starpu_data_handle_t data, int dest); + +/** + * Remove \p data from the emission cache + */ +void starpu_mpi_cached_send_clear(starpu_data_handle_t data); + +/** @} */ + +/** + @name MPI Insert Task + \anchor MPIInsertTask + @{ +*/ + +/** + Can be used as rank when calling starpu_mpi_data_register() and + alike, to specify that the data is per-node: each node will have + its own value. Tasks writing to such data will be replicated on all + nodes (and all parameters then have to be per-node). Tasks not + writing to such data will just take the node-local value without + any MPI communication. +*/ +#define STARPU_MPI_PER_NODE -2 + +/** + Register to MPI a StarPU data handle with the given tag, rank and + MPI communicator. It also automatically clears the MPI + communication cache when unregistering the data. +*/ +void starpu_mpi_data_register_comm(starpu_data_handle_t data_handle, starpu_mpi_tag_t data_tag, int rank, MPI_Comm comm); + +/** + Register to MPI a StarPU data handle with the given tag, rank and + the MPI communicator \c MPI_COMM_WORLD. + It also automatically clears the MPI communication cache when + unregistering the data. +*/ +#define starpu_mpi_data_register(data_handle, data_tag, rank) starpu_mpi_data_register_comm(data_handle, data_tag, rank, MPI_COMM_WORLD) + +/** + Register to MPI a StarPU data handle with the given tag. No rank + will be defined. + It also automatically clears the MPI communication cache when + unregistering the data. +*/ +void starpu_mpi_data_set_tag(starpu_data_handle_t handle, starpu_mpi_tag_t data_tag); + +/** + Symbol kept for backward compatibility. Call function starpu_mpi_data_set_tag() +*/ +#define starpu_data_set_tag starpu_mpi_data_set_tag + +/** + Register to MPI a StarPU data handle with the given rank and given + communicator. No tag will be defined. + It also automatically clears the MPI communication cache when + unregistering the data. +*/ +void starpu_mpi_data_set_rank_comm(starpu_data_handle_t handle, int rank, MPI_Comm comm); + +/** + Register to MPI a StarPU data handle with the given rank and the + MPI communicator \c MPI_COMM_WORLD. No tag will be defined. + It also automatically clears the MPI communication cache when + unregistering the data. +*/ +#define starpu_mpi_data_set_rank(handle, rank) starpu_mpi_data_set_rank_comm(handle, rank, MPI_COMM_WORLD) + +/** + Symbol kept for backward compatibility. Call function starpu_mpi_data_set_rank() +*/ +#define starpu_data_set_rank starpu_mpi_data_set_rank + +/** + Return the rank of the given data. +*/ +int starpu_mpi_data_get_rank(starpu_data_handle_t handle); + +/** + Symbol kept for backward compatibility. Call function starpu_mpi_data_get_rank() +*/ +#define starpu_data_get_rank starpu_mpi_data_get_rank + +/** + Return the tag of the given data. +*/ +starpu_mpi_tag_t starpu_mpi_data_get_tag(starpu_data_handle_t handle); +/** + Return the redux map of the given data. +*/ +char *starpu_mpi_data_get_redux_map(starpu_data_handle_t handle); + +/** + Symbol kept for backward compatibility. Call function starpu_mpi_data_get_tag() +*/ +#define starpu_data_get_tag starpu_mpi_data_get_tag + +/** + Create and submit a task corresponding to codelet with the + following arguments. The argument list must be zero-terminated. + The arguments following the codelet are the same types as for the + function starpu_task_insert(). + Access modes for data can also be + set with ::STARPU_SSEND to specify the data has to be sent using a + synchronous and non-blocking mode (see starpu_mpi_issend()). + The extra argument ::STARPU_EXECUTE_ON_NODE followed by an integer + allows to specify the MPI node to execute the codelet. It is also + possible to specify that the node owning a specific data will + execute the codelet, by using ::STARPU_EXECUTE_ON_DATA followed by + a data handle. + + The internal algorithm is as follows: +
      +
    1. + Find out which MPI node is going to execute the codelet. +
        +
      • + If there is only one node owning data in ::STARPU_W mode, it + will be selected; +
      • + If there is several nodes owning data in ::STARPU_W mode, a + node will be selected according to a given node selection + policy (see ::STARPU_NODE_SELECTION_POLICY or + starpu_mpi_node_selection_set_current_policy()) +
      • + The argument ::STARPU_EXECUTE_ON_NODE followed by an integer + can be used to specify the node; Ignored if the node value is -1. +
      • + The argument ::STARPU_EXECUTE_ON_DATA followed by a data handle can be used to specify that the node owing the given data will execute the codelet. +
      +
    2. +
    3. + Send and receive data as requested. Nodes owning data which need to + be read by the task are sending them to the MPI node which will + execute it. The latter receives them. +
    4. +
    5. + Execute the codelet. This is done by the MPI node selected in the + 1st step of the algorithm. +
    6. +
    7. + If several MPI nodes own data to be written to, send written data + back to their owners. +
    8. +
    + + The algorithm also includes a communication cache mechanism that + allows not to send data twice to the same MPI node, unless the data + has been modified. The cache can be disabled (see \ref + STARPU_MPI_CACHE). +*/ +int starpu_mpi_task_insert(MPI_Comm comm, struct starpu_codelet *codelet, ...); +#ifdef STARPU_USE_FXT +#define starpu_mpi_task_insert(comm, cl, ...) \ + starpu_mpi_task_insert(comm, cl, STARPU_TASK_FILE, __FILE__, STARPU_TASK_LINE, __LINE__, ##__VA_ARGS__) +#endif + +/** + Identical to starpu_mpi_task_insert(). Symbol kept for backward compatibility. +*/ +int starpu_mpi_insert_task(MPI_Comm comm, struct starpu_codelet *codelet, ...); +#ifdef STARPU_USE_FXT +#define starpu_mpi_insert_task(comm, cl, ...) \ + starpu_mpi_insert_task(comm, cl, STARPU_TASK_FILE, __FILE__, STARPU_TASK_LINE, __LINE__, ##__VA_ARGS__) +#endif + +/** + Create a task corresponding to \p codelet with the following given + arguments. The argument list must be zero-terminated. The function + performs the first two steps of the function + starpu_mpi_task_insert(), i.e. submitting the MPI communications + needed before the execution of the task, and the creation of the + task on one node. Only the MPI node selected in the first step of + the algorithm will return a valid task structure which can then be + submitted, others will return NULL. The function + starpu_mpi_task_post_build() MUST be called after that on all + nodes, and after the submission of the task on the node which + creates it, with the SAME list of arguments. +*/ +struct starpu_task *starpu_mpi_task_build(MPI_Comm comm, struct starpu_codelet *codelet, ...); +#ifdef STARPU_USE_FXT +#define starpu_mpi_task_build(comm, cl, ...) \ + starpu_mpi_task_build(comm, cl, STARPU_TASK_FILE, __FILE__, STARPU_TASK_LINE, __LINE__, ##__VA_ARGS__) +#endif + +/** + Offer a va_list variant of starpu_mpi_task_build. + */ +struct starpu_task *starpu_mpi_task_build_v(MPI_Comm comm, struct starpu_codelet *codelet, va_list varg_list); + +/** + MUST be called after a call to starpu_mpi_task_build(), + with the SAME list of arguments. Perform the fourth -- last -- step of + the algorithm described in starpu_mpi_task_insert(). +*/ +int starpu_mpi_task_post_build(MPI_Comm comm, struct starpu_codelet *codelet, ...); + +/** + Offer a va_list variant of starpu_mpi_task_post_build. + */ +int starpu_mpi_task_post_build_v(MPI_Comm comm, struct starpu_codelet *codelet, va_list varg_list); + +/** + Structure used to pass data from + starpu_mpi_task_exchange_data_before_execution() to + starpu_mpi_task_exchange_data_after_execution() + */ +struct starpu_mpi_task_exchange_params +{ + int do_execute; /**< is the caller going to execute the task */ + int xrank; /**< node executing the task */ + int priority; /**< priority of the task being executed */ +}; + +/** + Perform all necessary communications needed before the execution of + the given task. The field \c priority of \p params will be set with + the rank of the node which is selected to submit \p task. + After calling this function, and the submission of the task for the + selected node, all nodes MUST call the function + starpu_mpi_task_exchange_data_after_execution() with the parameters + \p descrs and \p params. + */ +int starpu_mpi_task_exchange_data_before_execution(MPI_Comm comm, struct starpu_task *task, struct starpu_data_descr *descrs, struct starpu_mpi_task_exchange_params *params); + +/** + MUST be called after a call to + starpu_mpi_task_exchange_data_before_execution() with the same + arguments \p descrs and \p params. + \p nb_data is the number of data in \p descrs. + Perform all the necessary communications needed after the execution + of the task, i.e the fourth -- last -- step of the algorithm + described in starpu_mpi_task_insert(). +*/ +int starpu_mpi_task_exchange_data_after_execution(MPI_Comm comm, struct starpu_data_descr *descrs, unsigned nb_data, struct starpu_mpi_task_exchange_params params); + +/** + Transfer data \p data_handle to MPI node \p node, sending it from + its owner if needed. At least the target node and the owner have to + call the function. + This waits for the transfer to be over. +*/ +int starpu_mpi_get_data_on_node(MPI_Comm comm, starpu_data_handle_t data_handle, int node); + +/** + Transfer data \p data_handle to MPI node \p node, sending it from + its owner if needed. At least the target node and the owner have to + call the function. On reception, the \p callback function is called + with the argument \p arg. +*/ +int starpu_mpi_get_data_on_node_detached(MPI_Comm comm, starpu_data_handle_t data_handle, int node, void (*callback)(void *), void *arg); + +/** + Transfer data \p data_handle to all MPI nodes, sending it from its + owner if needed. All nodes have to call the function. +*/ +void starpu_mpi_get_data_on_all_nodes_detached(MPI_Comm comm, starpu_data_handle_t data_handle); + +/** + Submit migration of the data onto the \p new_rank MPI node. This + means both submitting the transfer of the data to node \p new_rank + if it hasn't been submitted already, and setting the home node of + the data to the new node. Further data transfers submitted by + starpu_mpi_task_insert() will be done from that new node. This + function thus needs to be called on all nodes which have registered + the data at the same point of tasks submissions. This also flushes + the cache for this data to avoid incoherencies. +*/ +void starpu_mpi_data_migrate(MPI_Comm comm, starpu_data_handle_t handle, int new_rank); + +/** @} */ + +/** + @name Node Selection Policy + \anchor MPINodeSelectionPolicy + @{ +*/ + +/** + Define the current policy + */ +#define STARPU_MPI_NODE_SELECTION_CURRENT_POLICY -1 +/** + Define the policy in which the selected node is the one having the + most data in ::STARPU_R mode +*/ +#define STARPU_MPI_NODE_SELECTION_MOST_R_DATA 0 + +typedef int (*starpu_mpi_select_node_policy_func_t)(int me, int nb_nodes, struct starpu_data_descr *descr, int nb_data); + +/** + Register a new policy which can then be used when there is several + nodes owning data in ::STARPU_W mode. + Here an example of function defining a node selection policy. + The codelet will be executed on the node owing the first data with + a size bigger than 1M, or on the node 0 if no data fits the given + size. + \code{.c} + int my_node_selection_policy(int me, int nb_nodes, struct starpu_data_descr *descr, int nb_data) + { + // me is the current MPI rank + // nb_nodes is the number of MPI nodes + // descr is the description of the data specified when calling starpu_mpi_task_insert + // nb_data is the number of data in descr + int i; + for(i= 0 ; i 1024*1024) return rank; + } + } + return 0; + } + \endcode +*/ +int starpu_mpi_node_selection_register_policy(starpu_mpi_select_node_policy_func_t policy_func); + +/** + Unregister a previously registered policy. +*/ +int starpu_mpi_node_selection_unregister_policy(int policy); + +/** + Return the current policy used to select the node which will + execute the codelet +*/ +int starpu_mpi_node_selection_get_current_policy(void); + +/** + Set the current policy used to select the node which will execute + the codelet. The policy ::STARPU_MPI_NODE_SELECTION_MOST_R_DATA + selects the node having the most data in ::STARPU_R mode so as to + minimize the amount of data to be transferred. +*/ +int starpu_mpi_node_selection_set_current_policy(int policy); + +/** @} */ + +/** + @name Collective Operations + \anchor MPICollectiveOperations + @{ +*/ + +/** + Perform a reduction on the given data \p handle. All nodes send the + data to its owner node which will perform a reduction. +*/ +int starpu_mpi_redux_data(MPI_Comm comm, starpu_data_handle_t data_handle); + +/** + Similar to starpu_mpi_redux_data(), but take a priority \p prio. +*/ +int starpu_mpi_redux_data_prio(MPI_Comm comm, starpu_data_handle_t data_handle, int prio); + +/** + Perform a reduction on the given data \p handle. + Nodes perform the reduction through in a tree-based fashion. + The tree use is an \p arity - ary tree. +*/ +int starpu_mpi_redux_data_tree(MPI_Comm comm, starpu_data_handle_t data_handle, int arity); + +/** + Similar to starpu_mpi_redux_data_tree(), but take a priority \p prio. +*/ +int starpu_mpi_redux_data_prio_tree(MPI_Comm comm, starpu_data_handle_t data_handle, int prio, int arity); + +/** + Scatter data among processes of the communicator based on the + ownership of the data. For each data of the array \p data_handles, + the process \p root sends the data to the process owning this data. + Processes receiving data must have valid data handles to receive + them. On completion of the collective communication, the \p + scallback function is called with the argument \p sarg on the + process \p root, the \p rcallback function is called with the + argument \p rarg on any other process. +*/ +int starpu_mpi_scatter_detached(starpu_data_handle_t *data_handles, int count, int root, MPI_Comm comm, void (*scallback)(void *), void *sarg, void (*rcallback)(void *), void *rarg); + +/** + Gather data from the different processes of the communicator onto + the process \p root. Each process owning data handle in the array + \p data_handles will send them to the process \p root. The process + \p root must have valid data handles to receive the data. On + completion of the collective communication, the \p rcallback + function is called with the argument \p rarg on the process root, + the \p scallback function is called with the argument \p sarg on + any other process. +*/ +int starpu_mpi_gather_detached(starpu_data_handle_t *data_handles, int count, int root, MPI_Comm comm, void (*scallback)(void *), void *sarg, void (*rcallback)(void *), void *rarg); + +/** @} */ + +/** + @name Dynamic Broadcasts + \anchor MPIDynamicBroadcasts + @{ +*/ + +/** + Enable or disable coop sends.
    + Used for benchmark, not recommended for production: can cause problems + if there are still communications while disabling, or when shutting down + StarPU.
    + This function must be called after the initialization of StarPU. +*/ +void starpu_mpi_coop_sends_set_use(int use_coop_sends); + +/** + Return whether coop sends are enabled or not. +*/ +int starpu_mpi_coop_sends_get_use(void); + +/** + Explicit the number of different sends of the \p data_handle. When the number + of sends is reached, a collective operation is triggered. If this function isn't + called, StarPU will trigger a collective operation containing only posted sends + while the data wasn't available. +*/ +void starpu_mpi_coop_sends_data_handle_nb_sends(starpu_data_handle_t data_handle, int nb_sends); + +/** @} */ + +/** + @name Statistics + \anchor MPIStats + @{ +*/ + +/** + Disable the aggregation of communications statistics. +*/ +void starpu_mpi_comm_stats_disable(void); + +/** + Enable the aggregation of communications statistics. +*/ +void starpu_mpi_comm_stats_enable(void); + +/** + Retrieve the current communications statistics from the current node + in the array \p comm_stats which must have a size greater or + equal to the world size. Communications statistics must have been enabled, + either through the function starpu_mpi_comm_stats_enable() or + through the environment variable \ref STARPU_MPI_STATS. +*/ +void starpu_mpi_comm_stats_retrieve(size_t *comm_stats); + +/** @} */ + +/** + @name Miscellaneous + \anchor MPIMisc + @{ +*/ + +int starpu_mpi_pre_submit_hook_register(void (*f)(struct starpu_task *)); +int starpu_mpi_pre_submit_hook_unregister(void); + +/** + Copy the content of \p src_handle into \p dst_handle. If both data + are on the same node, the function starpu_data_cpy() is called, + otherwise a MPI transfer is initiated between both nodes. + The parameter \p asynchronous indicates whether the function should + block or not. + If \p callback_func is not NULL, this callback function + is executed on the owner node of the data \p dst_handle after the + handle has been received, and it is given the pointer \p + callback_arg as argument. + See \ref MPITaskUtility for more details. +*/ +int starpu_mpi_data_cpy(starpu_data_handle_t dst_handle, starpu_data_handle_t src_handle, MPI_Comm comm, int asynchronous, void (*callback_func)(void *), void *callback_arg); + +/** + Similar to starpu_mpi_data_cpy(), but take a priority \p prio. +*/ +int starpu_mpi_data_cpy_priority(starpu_data_handle_t dst_handle, starpu_data_handle_t src_handle, MPI_Comm comm, int asynchronous, void (*callback_func)(void *), void *callback_arg, int priority); + +/** @} */ + +/** + @name Data Tags Management + \anchor MPITags + @{ +*/ + +/** + Book a range of unique tags of size \p nbtags to be used to register + StarPU data handles. + This function returns the minimal tag value available \c mintag to + allow the registration of data with tags in the continuous range [[ + \c mintag, \c mintag + \p nbtags ]] + + Note that this function must be called by all MPI processes + involved in the computations with the same parameters and in the + exact same order to make sure the tags are identical from one node + to another. +*/ +int64_t starpu_mpi_tags_allocate(int64_t nbtags); + +/** + Release the range of tags starting by the given \p mintag value. The + mintag value must be a value obtained through a call to + starpu_mpi_tags_allocate(). + + Note that this function must be called by all MPI processes + involved in the computations with the same parameters and in the + exact same order to make sure the tags are identical from one node + to another as for starpu_mpi_tags_allocate(). +*/ +void starpu_mpi_tags_free(int64_t mintag); + +/** @} */ + +#ifdef __cplusplus +} +#endif + +#endif // STARPU_USE_MPI +#endif // __STARPU_MPI_H__ diff --git a/mpi/include/starpu_mpi_ft.h b/mpi/include/starpu_mpi_ft.h new file mode 100644 index 0000000..4d8e43a --- /dev/null +++ b/mpi/include/starpu_mpi_ft.h @@ -0,0 +1,134 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __STARPU_MPI_FT_H__ +#define __STARPU_MPI_FT_H__ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +struct _starpu_mpi_checkpoint_template; +typedef struct _starpu_mpi_checkpoint_template *starpu_mpi_checkpoint_template_t; + +#if defined(STARPU_USE_MPI_FT) +/** + @defgroup API_MPI_FT_Support MPI Fault Tolerance Support + @{ +*/ + +/** + Initialise the checkpoint mechanism +*/ +int starpu_mpi_checkpoint_init(void); + +/** + Shutdown the checkpoint mechanism +*/ +int starpu_mpi_checkpoint_shutdown(void); + +/** + * Wrapped function to register a checkpoint template \p cp_template with the given arguments. + * It is then ready to use with ::starpu_mpi_checkpoint_template_submit() during the program execution. + * This command executes ::starpu_mpi_checkpoint_template_create(), adds the given checkpoint entry and freezes the + * checkpoint, and therefore can no longer be modified. + * A unique checkpoint id \p cp_id is requested from the user in order to create several templates and to + * match with a corresponding ::starpu_mpi_init_from_checkpoint() (not implemented yet). + * + * The arguments following the \p cp_template and the \p cp_id can be of the following types: + *
      + *
    • ::STARPU_R followed by a data handle and the backup rank; + *
    • ::STARPU_DATA_ARRAY followed by an array of data handles, + * its number of elements and a backup rank (non functional); + *
    • ::STARPU_VALUE followed by a pointer to the unregistered value, + * its size in bytes, a unique tag (as the ones given for data handle registering) + * and the function giving the back up rank of the rank argument : int(backup_of)(int) . + *
    • The argument list must be ended by the value 0. + *
    + */ +int starpu_mpi_checkpoint_template_register(starpu_mpi_checkpoint_template_t *cp_template, int cp_id, int cp_domain, ...); + +/** + * Create a new checkpoint template. A unique checkpoint id \p cp_id is requested from + * the user in order to create several templates and to + * match with a corresponding ::starpu_mpi_init_from_checkpoint() (not implemented yet). + * Note a template must be frozen with ::starpu_mpi_checkpoint_template_freeze() in order to use it + * with ::starpu_mpi_checkpoint_template_submit(). +*/ +int starpu_mpi_checkpoint_template_create(starpu_mpi_checkpoint_template_t *cp_template, int cp_id, int cp_domain); + +/** + * Add a single entry to a checkpoint template previously created with ::starpu_mpi_checkpoint_template_create(). + * As many entries can be added to a template with as many argument to a single function call, or with as many + * calls to this function. + * Once all the entry added, the + * template must be frozen before using ::starpu_mpi_checkpoint_template_submit(). + * + * The arguments following the \p cp_template can be of the following types: + *
      + *
    • ::STARPU_R followed by a data handle and the backup rank; + *
    • (non functional) ::STARPU_DATA_ARRAY followed by an array of data handles, + * its number of elements and a backup rank (non functional); + *
    • ::STARPU_VALUE followed by a pointer to the unregistered value, + * its size in bytes, a unique tag (as the ones given for data handle registering) + * and the function giving the back up rank of the rank argument : int(backup_of)(int) . + *
    • The argument list must be ended by the value 0. + *
    + */ +int starpu_mpi_checkpoint_template_add_entry(starpu_mpi_checkpoint_template_t *cp_template, ...); + +/** + * Freeze the given template. + * A frozen template can no longer be modified with ::starpu_mpi_checkpoint_template_add_entry(). + * A template must be frozen before using ::starpu_mpi_checkpoint_template_submit(). + */ +int starpu_mpi_checkpoint_template_freeze(starpu_mpi_checkpoint_template_t *cp_template); + +/** + * Submit the checkpoint to StarPU, and can be seen as a cut in the task graph. StarPU will save the data as currently + * described in the submission. Note that the data external to StarPu (::STARPU_VALUE) will be saved with the current value + * at submission time (when ::starpu_mpi_checkpoint_template_submit() is called). + * The data internal to StarPU (aka handles given with ::STARPU_R) will be saved with their value at + * execution time (when the task submitted before the ::starpu_mpi_checkpoint_template_submit() have been executed, + * and before this data is modified by the tasks submitted after the ::starpu_mpi_checkpoint_template_submit()) + */ +int starpu_mpi_checkpoint_template_submit(starpu_mpi_checkpoint_template_t cp_template, int prio); + +int starpu_mpi_checkpoint_template_print(starpu_mpi_checkpoint_template_t cp_template); + +#else // !STARPU_USE_MPI_FT +static inline int starpu_mpi_checkpoint_template_register(starpu_mpi_checkpoint_template_t *cp_template STARPU_ATTRIBUTE_UNUSED, int cp_id STARPU_ATTRIBUTE_UNUSED, int cp_domain STARPU_ATTRIBUTE_UNUSED, ...) { return 0; } +static inline int starpu_mpi_checkpoint_template_create(starpu_mpi_checkpoint_template_t *cp_template STARPU_ATTRIBUTE_UNUSED, int cp_id STARPU_ATTRIBUTE_UNUSED, int cp_domain STARPU_ATTRIBUTE_UNUSED) { return 0; } +static inline int starpu_mpi_checkpoint_template_add_entry(starpu_mpi_checkpoint_template_t *cp_template STARPU_ATTRIBUTE_UNUSED, ...) { return 0; } +static inline int starpu_mpi_checkpoint_template_freeze(starpu_mpi_checkpoint_template_t *cp_template STARPU_ATTRIBUTE_UNUSED) { return 0; } +static inline int starpu_mpi_checkpoint_template_submit(starpu_mpi_checkpoint_template_t cp_template STARPU_ATTRIBUTE_UNUSED, int prio STARPU_ATTRIBUTE_UNUSED) { return 0; } +static inline int starpu_mpi_ft_turn_on(void) { return 0; } +static inline int starpu_mpi_ft_turn_off(void) { return 0; } +static inline int starpu_mpi_checkpoint_template_print(starpu_mpi_checkpoint_template_t cp_template STARPU_ATTRIBUTE_UNUSED) { return 0; } +static inline int starpu_mpi_checkpoint_init(void) { return 0; } +static inline int starpu_mpi_checkpoint_shutdown(void) { return 0; } + +/** @} */ + +#endif // STARPU_USE_MPI_FT + +#ifdef __cplusplus +} +#endif + +#endif // __STARPU_MPI_FT_H__ diff --git a/mpi/include/starpu_mpi_lb.h b/mpi/include/starpu_mpi_lb.h new file mode 100644 index 0000000..d8f0773 --- /dev/null +++ b/mpi/include/starpu_mpi_lb.h @@ -0,0 +1,46 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __STARPU_MPI_LOAD_BALANCER_H__ +#define __STARPU_MPI_LOAD_BALANCER_H__ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + todo +*/ +struct starpu_mpi_lb_conf +{ + void (*get_neighbors)(int **neighbor_ids, int *nneighbors); + void (*get_data_unit_to_migrate)(starpu_data_handle_t **handle_unit, int *nhandles, int dst_node); +}; + +/** + Initialize the load balancer's environment with the load policy provided by the + user +*/ +void starpu_mpi_lb_init(const char *lb_policy_name, struct starpu_mpi_lb_conf *); +void starpu_mpi_lb_shutdown(void); + +#ifdef __cplusplus +} +#endif + +#endif // __STARPU_MPI_LOAD_BALANCER_H__ diff --git a/mpi/packages/libstarpumpi.pc.in b/mpi/packages/libstarpumpi.pc.in new file mode 100644 index 0000000..2c262c8 --- /dev/null +++ b/mpi/packages/libstarpumpi.pc.in @@ -0,0 +1,28 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +prefix=@prefix@ +exec_prefix=@exec_prefix@ +libdir=@libdir@ +includedir=@includedir@ + +Name: starpumpi +Description: offers MPI support for heterogeneous multicore architecture +Version: @PACKAGE_VERSION@ +Cflags: -I${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ -DSTARPU_USE_DEPRECATED_API +Libs: -L${libdir} -lstarpumpi-@STARPU_EFFECTIVE_VERSION@ +Libs.private: @LDFLAGS@ @LIBS@ @STARPU_EXPORTED_LIBS@ +Requires: libstarpu +Requires.private: diff --git a/mpi/packages/starpumpi-1.0.pc.in b/mpi/packages/starpumpi-1.0.pc.in new file mode 100644 index 0000000..c0b0766 --- /dev/null +++ b/mpi/packages/starpumpi-1.0.pc.in @@ -0,0 +1,28 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +prefix=@prefix@ +exec_prefix=@exec_prefix@ +libdir=@libdir@ +includedir=@includedir@ + +Name: starpumpi +Description: offers MPI support for heterogeneous multicore architecture +Version: @PACKAGE_VERSION@ +Cflags: -I${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ +Libs: -L${libdir} -lstarpumpi-@STARPU_EFFECTIVE_VERSION@ +Libs.private: @LDFLAGS@ @LIBS@ @STARPU_EXPORTED_LIBS@ +Requires: starpu-1.0 +Requires.private: diff --git a/mpi/packages/starpumpi-1.1.pc.in b/mpi/packages/starpumpi-1.1.pc.in new file mode 100644 index 0000000..7d8731a --- /dev/null +++ b/mpi/packages/starpumpi-1.1.pc.in @@ -0,0 +1,28 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +prefix=@prefix@ +exec_prefix=@exec_prefix@ +libdir=@libdir@ +includedir=@includedir@ + +Name: starpumpi +Description: offers MPI support for heterogeneous multicore architecture +Version: @PACKAGE_VERSION@ +Cflags: -I${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ +Libs: -L${libdir} -lstarpumpi-@STARPU_EFFECTIVE_VERSION@ +Libs.private: @LDFLAGS@ @LIBS@ @STARPU_EXPORTED_LIBS@ +Requires: starpu-1.1 +Requires.private: diff --git a/mpi/packages/starpumpi-1.2.pc.in b/mpi/packages/starpumpi-1.2.pc.in new file mode 100644 index 0000000..a06fd91 --- /dev/null +++ b/mpi/packages/starpumpi-1.2.pc.in @@ -0,0 +1,28 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +prefix=@prefix@ +exec_prefix=@exec_prefix@ +libdir=@libdir@ +includedir=@includedir@ + +Name: starpumpi +Description: offers MPI support for heterogeneous multicore architecture +Version: @PACKAGE_VERSION@ +Cflags: -I${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ +Libs: -L${libdir} -lstarpumpi-@STARPU_EFFECTIVE_VERSION@ +Libs.private: @LDFLAGS@ @LIBS@ @STARPU_EXPORTED_LIBS@ +Requires: starpu-1.2 +Requires.private: diff --git a/mpi/packages/starpumpi-1.3.pc.in b/mpi/packages/starpumpi-1.3.pc.in new file mode 100644 index 0000000..6248d00 --- /dev/null +++ b/mpi/packages/starpumpi-1.3.pc.in @@ -0,0 +1,28 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +prefix=@prefix@ +exec_prefix=@exec_prefix@ +libdir=@libdir@ +includedir=@includedir@ + +Name: starpumpi +Description: offers MPI support for heterogeneous multicore architecture +Version: @PACKAGE_VERSION@ +Cflags: -I${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ +Libs: -L${libdir} -lstarpumpi-@STARPU_EFFECTIVE_VERSION@ +Libs.private: @LDFLAGS@ @LIBS@ @STARPU_EXPORTED_LIBS@ +Requires: starpu-1.3 +Requires.private: diff --git a/mpi/packages/starpumpi-1.4.pc.in b/mpi/packages/starpumpi-1.4.pc.in new file mode 100644 index 0000000..6011fc2 --- /dev/null +++ b/mpi/packages/starpumpi-1.4.pc.in @@ -0,0 +1,28 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +prefix=@prefix@ +exec_prefix=@exec_prefix@ +libdir=@libdir@ +includedir=@includedir@ + +Name: starpumpi +Description: offers MPI support for heterogeneous multicore architecture +Version: @PACKAGE_VERSION@ +Cflags: -I${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ +Libs: -L${libdir} -lstarpumpi-@STARPU_EFFECTIVE_VERSION@ +Libs.private: @LDFLAGS@ @LIBS@ @STARPU_EXPORTED_LIBS@ +Requires: starpu-1.4 +Requires.private: diff --git a/mpi/src/Makefile.am b/mpi/src/Makefile.am new file mode 100644 index 0000000..635d270 --- /dev/null +++ b/mpi/src/Makefile.am @@ -0,0 +1,148 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +include $(top_srcdir)/make/starpu-notests.mk + +CC=$(MPICC) +CCLD=$(MPICC) + +BUILT_SOURCES = + +SUBDIRS = + +CLEANFILES = *.gcno *.gcda *.linkinfo + +AM_CFLAGS += $(FXT_CFLAGS) $(NMAD_CFLAGS) $(MPI_SYNC_CLOCKS_CFLAGS) +AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_srcdir)/src/ -I$(top_builddir)/src -I$(top_builddir)/include -I$(top_srcdir)/mpi/include -I$(top_srcdir)/mpi/src $(STARPU_H_CPPFLAGS) -DBUILDING_STARPU +LIBS += $(top_builddir)/src/@LIBSTARPU_LINK@ $(STARPU_EXPORTED_LIBS) +LIBS += $(FXT_LDFLAGS) $(FXT_LIBS) +LIBS += $(NMAD_LDFLAGS) $(NMAD_LIBS) +LIBS += $(MPICC_LDFLAGS) +LIBS += $(MPI_SYNC_CLOCKS_LIBS) + +ldflags = + +if STARPU_HAVE_WINDOWS + +LC_MESSAGES=C +export LC_MESSAGES + +ldflags += -Xlinker --output-def -Xlinker .libs/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.def + +if STARPU_HAVE_MS_LIB +.libs/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.lib: libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la dolib + ./dolib "$(STARPU_MS_LIB)" $(STARPU_MS_LIB_ARCH) .libs/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.def @STARPU_EFFECTIVE_VERSION@ $(libstarpumpi_so_version) .libs/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.lib +all-local: .libs/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.lib +endif STARPU_HAVE_MS_LIB + +install-exec-hook: + $(INSTALL) .libs/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.def $(DESTDIR)$(libdir) +if STARPU_HAVE_MS_LIB + $(INSTALL) .libs/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.lib $(DESTDIR)$(libdir) + $(INSTALL) .libs/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.exp $(DESTDIR)$(libdir) +endif STARPU_HAVE_MS_LIB + +endif STARPU_HAVE_WINDOWS + +lib_LTLIBRARIES = libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la + +libstarpumpi_@STARPU_EFFECTIVE_VERSION@_la_LDFLAGS = $(ldflags) -no-undefined \ + -version-info $(LIBSTARPUMPI_INTERFACE_CURRENT):$(LIBSTARPUMPI_INTERFACE_REVISION):$(LIBSTARPUMPI_INTERFACE_AGE) +noinst_HEADERS = \ + starpu_mpi_private.h \ + starpu_mpi_fxt.h \ + starpu_mpi_stats.h \ + starpu_mpi_datatype.h \ + starpu_mpi_cache.h \ + starpu_mpi_select_node.h \ + starpu_mpi_cache_stats.h \ + starpu_mpi_task_insert.h \ + starpu_mpi_init.h \ + nmad/starpu_mpi_nmad_coop.h \ + mpi/starpu_mpi_mpi.h \ + mpi/starpu_mpi_early_data.h \ + mpi/starpu_mpi_early_request.h \ + mpi/starpu_mpi_sync_data.h \ + mpi/starpu_mpi_comm.h \ + mpi/starpu_mpi_tag.h \ + mpi/starpu_mpi_driver.h \ + mpi/starpu_mpi_mpi_backend.h \ + nmad/starpu_mpi_nmad_backend.h \ + nmad/starpu_mpi_nmad_unknown_datatype.h \ + nmad/starpu_mpi_nmad.h \ + load_balancer/policy/data_movements_interface.h \ + load_balancer/policy/load_data_interface.h \ + load_balancer/policy/load_balancer_policy.h + +if STARPU_USE_MPI_FT +noinst_HEADERS += \ + mpi_failure_tolerance/starpu_mpi_ft.h \ + mpi_failure_tolerance/starpu_mpi_checkpoint.h \ + mpi_failure_tolerance/starpu_mpi_checkpoint_template.h \ + mpi_failure_tolerance/starpu_mpi_ft_service_comms.h \ + mpi_failure_tolerance/starpu_mpi_checkpoint_package.h \ + mpi_failure_tolerance/starpu_mpi_checkpoint_tracker.h \ + mpi_failure_tolerance/starpu_mpi_ft_stats.h +endif STARPU_USE_MPI_FT + +libstarpumpi_@STARPU_EFFECTIVE_VERSION@_la_SOURCES = \ + starpu_mpi.c \ + starpu_mpi_req.c \ + starpu_mpi_coop_sends.c \ + starpu_mpi_helper.c \ + starpu_mpi_datatype.c \ + starpu_mpi_task_insert.c \ + starpu_mpi_collective.c \ + starpu_mpi_stats.c \ + starpu_mpi_private.c \ + starpu_mpi_cache.c \ + starpu_mpi_select_node.c \ + starpu_mpi_cache_stats.c \ + starpu_mpi_fortran.c \ + starpu_mpi_task_insert_fortran.c \ + starpu_mpi_init.c \ + starpu_mpi_tags.c \ + nmad/starpu_mpi_nmad_coop.c \ + nmad/starpu_mpi_nmad_unknown_datatype.c \ + nmad/starpu_mpi_nmad.c \ + nmad/starpu_mpi_nmad_backend.c \ + mpi/starpu_mpi_mpi.c \ + mpi/starpu_mpi_mpi_backend.c \ + mpi/starpu_mpi_early_data.c \ + mpi/starpu_mpi_early_request.c \ + mpi/starpu_mpi_sync_data.c \ + mpi/starpu_mpi_comm.c \ + mpi/starpu_mpi_tag.c \ + load_balancer/policy/data_movements_interface.c \ + load_balancer/policy/load_data_interface.c \ + load_balancer/policy/load_heat_propagation.c \ + load_balancer/load_balancer.c + +if STARPU_USE_MPI_FT +libstarpumpi_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += \ + mpi_failure_tolerance/starpu_mpi_ft.c \ + mpi_failure_tolerance/starpu_mpi_checkpoint.c \ + mpi_failure_tolerance/starpu_mpi_checkpoint_template.c \ + mpi_failure_tolerance/starpu_mpi_ft_service_comms.c \ + mpi_failure_tolerance/starpu_mpi_checkpoint_package.c \ + mpi_failure_tolerance/starpu_mpi_checkpoint_tracker.c \ + mpi_failure_tolerance/starpu_mpi_ft_stats.c +endif STARPU_USE_MPI_FT + +if STARPU_USE_FXT +libstarpumpi_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += \ + starpu_mpi_fxt.c +endif diff --git a/mpi/src/Makefile.in b/mpi/src/Makefile.in new file mode 100644 index 0000000..69ec09e --- /dev/null +++ b/mpi/src/Makefile.in @@ -0,0 +1,1511 @@ +# Makefile.in generated by automake 1.16.5 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2021 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +target_triplet = @target@ +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@am__append_1 = --compiler-options -fno-strict-aliasing -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ $(STARPU_NVCC_H_CPPFLAGS) +@STARPU_USE_HIP_TRUE@am__append_2 = -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ +@STARPU_HAVE_WINDOWS_TRUE@am__append_3 = -Xlinker --output-def -Xlinker .libs/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.def +@STARPU_USE_MPI_FT_TRUE@am__append_4 = \ +@STARPU_USE_MPI_FT_TRUE@ mpi_failure_tolerance/starpu_mpi_ft.h \ +@STARPU_USE_MPI_FT_TRUE@ mpi_failure_tolerance/starpu_mpi_checkpoint.h \ +@STARPU_USE_MPI_FT_TRUE@ mpi_failure_tolerance/starpu_mpi_checkpoint_template.h \ +@STARPU_USE_MPI_FT_TRUE@ mpi_failure_tolerance/starpu_mpi_ft_service_comms.h \ +@STARPU_USE_MPI_FT_TRUE@ mpi_failure_tolerance/starpu_mpi_checkpoint_package.h \ +@STARPU_USE_MPI_FT_TRUE@ mpi_failure_tolerance/starpu_mpi_checkpoint_tracker.h \ +@STARPU_USE_MPI_FT_TRUE@ mpi_failure_tolerance/starpu_mpi_ft_stats.h + +@STARPU_USE_MPI_FT_TRUE@am__append_5 = \ +@STARPU_USE_MPI_FT_TRUE@ mpi_failure_tolerance/starpu_mpi_ft.c \ +@STARPU_USE_MPI_FT_TRUE@ mpi_failure_tolerance/starpu_mpi_checkpoint.c \ +@STARPU_USE_MPI_FT_TRUE@ mpi_failure_tolerance/starpu_mpi_checkpoint_template.c \ +@STARPU_USE_MPI_FT_TRUE@ mpi_failure_tolerance/starpu_mpi_ft_service_comms.c \ +@STARPU_USE_MPI_FT_TRUE@ mpi_failure_tolerance/starpu_mpi_checkpoint_package.c \ +@STARPU_USE_MPI_FT_TRUE@ mpi_failure_tolerance/starpu_mpi_checkpoint_tracker.c \ +@STARPU_USE_MPI_FT_TRUE@ mpi_failure_tolerance/starpu_mpi_ft_stats.c + +@STARPU_USE_FXT_TRUE@am__append_6 = \ +@STARPU_USE_FXT_TRUE@ starpu_mpi_fxt.c + +subdir = mpi/src +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ + $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ + $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ + $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(am__noinst_HEADERS_DIST) \ + $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/src/common/config.h \ + $(top_builddir)/src/common/config-src-build.h \ + $(top_builddir)/include/starpu_config.h \ + $(top_builddir)/starpurm/include/starpurm_config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__installdirs = "$(DESTDIR)$(libdir)" +LTLIBRARIES = $(lib_LTLIBRARIES) +libstarpumpi_@STARPU_EFFECTIVE_VERSION@_la_LIBADD = +am__libstarpumpi_@STARPU_EFFECTIVE_VERSION@_la_SOURCES_DIST = \ + starpu_mpi.c starpu_mpi_req.c starpu_mpi_coop_sends.c \ + starpu_mpi_helper.c starpu_mpi_datatype.c \ + starpu_mpi_task_insert.c starpu_mpi_collective.c \ + starpu_mpi_stats.c starpu_mpi_private.c starpu_mpi_cache.c \ + starpu_mpi_select_node.c starpu_mpi_cache_stats.c \ + starpu_mpi_fortran.c starpu_mpi_task_insert_fortran.c \ + starpu_mpi_init.c starpu_mpi_tags.c \ + nmad/starpu_mpi_nmad_coop.c \ + nmad/starpu_mpi_nmad_unknown_datatype.c nmad/starpu_mpi_nmad.c \ + nmad/starpu_mpi_nmad_backend.c mpi/starpu_mpi_mpi.c \ + mpi/starpu_mpi_mpi_backend.c mpi/starpu_mpi_early_data.c \ + mpi/starpu_mpi_early_request.c mpi/starpu_mpi_sync_data.c \ + mpi/starpu_mpi_comm.c mpi/starpu_mpi_tag.c \ + load_balancer/policy/data_movements_interface.c \ + load_balancer/policy/load_data_interface.c \ + load_balancer/policy/load_heat_propagation.c \ + load_balancer/load_balancer.c \ + mpi_failure_tolerance/starpu_mpi_ft.c \ + mpi_failure_tolerance/starpu_mpi_checkpoint.c \ + mpi_failure_tolerance/starpu_mpi_checkpoint_template.c \ + mpi_failure_tolerance/starpu_mpi_ft_service_comms.c \ + mpi_failure_tolerance/starpu_mpi_checkpoint_package.c \ + mpi_failure_tolerance/starpu_mpi_checkpoint_tracker.c \ + mpi_failure_tolerance/starpu_mpi_ft_stats.c starpu_mpi_fxt.c +am__dirstamp = $(am__leading_dot)dirstamp +@STARPU_USE_MPI_FT_TRUE@am__objects_1 = mpi_failure_tolerance/starpu_mpi_ft.lo \ +@STARPU_USE_MPI_FT_TRUE@ mpi_failure_tolerance/starpu_mpi_checkpoint.lo \ +@STARPU_USE_MPI_FT_TRUE@ mpi_failure_tolerance/starpu_mpi_checkpoint_template.lo \ +@STARPU_USE_MPI_FT_TRUE@ mpi_failure_tolerance/starpu_mpi_ft_service_comms.lo \ +@STARPU_USE_MPI_FT_TRUE@ mpi_failure_tolerance/starpu_mpi_checkpoint_package.lo \ +@STARPU_USE_MPI_FT_TRUE@ mpi_failure_tolerance/starpu_mpi_checkpoint_tracker.lo \ +@STARPU_USE_MPI_FT_TRUE@ mpi_failure_tolerance/starpu_mpi_ft_stats.lo +@STARPU_USE_FXT_TRUE@am__objects_2 = starpu_mpi_fxt.lo +am_libstarpumpi_@STARPU_EFFECTIVE_VERSION@_la_OBJECTS = starpu_mpi.lo \ + starpu_mpi_req.lo starpu_mpi_coop_sends.lo \ + starpu_mpi_helper.lo starpu_mpi_datatype.lo \ + starpu_mpi_task_insert.lo starpu_mpi_collective.lo \ + starpu_mpi_stats.lo starpu_mpi_private.lo starpu_mpi_cache.lo \ + starpu_mpi_select_node.lo starpu_mpi_cache_stats.lo \ + starpu_mpi_fortran.lo starpu_mpi_task_insert_fortran.lo \ + starpu_mpi_init.lo starpu_mpi_tags.lo \ + nmad/starpu_mpi_nmad_coop.lo \ + nmad/starpu_mpi_nmad_unknown_datatype.lo \ + nmad/starpu_mpi_nmad.lo nmad/starpu_mpi_nmad_backend.lo \ + mpi/starpu_mpi_mpi.lo mpi/starpu_mpi_mpi_backend.lo \ + mpi/starpu_mpi_early_data.lo mpi/starpu_mpi_early_request.lo \ + mpi/starpu_mpi_sync_data.lo mpi/starpu_mpi_comm.lo \ + mpi/starpu_mpi_tag.lo \ + load_balancer/policy/data_movements_interface.lo \ + load_balancer/policy/load_data_interface.lo \ + load_balancer/policy/load_heat_propagation.lo \ + load_balancer/load_balancer.lo $(am__objects_1) \ + $(am__objects_2) +libstarpumpi_@STARPU_EFFECTIVE_VERSION@_la_OBJECTS = \ + $(am_libstarpumpi_@STARPU_EFFECTIVE_VERSION@_la_OBJECTS) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +libstarpumpi_@STARPU_EFFECTIVE_VERSION@_la_LINK = $(LIBTOOL) \ + $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ + --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(libstarpumpi_@STARPU_EFFECTIVE_VERSION@_la_LDFLAGS) \ + $(LDFLAGS) -o $@ +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)/src/common -I$(top_builddir)/include -I$(top_builddir)/starpurm/include +depcomp = $(SHELL) $(top_srcdir)/build-aux/depcomp +am__maybe_remake_depfiles = depfiles +am__depfiles_remade = ./$(DEPDIR)/starpu_mpi.Plo \ + ./$(DEPDIR)/starpu_mpi_cache.Plo \ + ./$(DEPDIR)/starpu_mpi_cache_stats.Plo \ + ./$(DEPDIR)/starpu_mpi_collective.Plo \ + ./$(DEPDIR)/starpu_mpi_coop_sends.Plo \ + ./$(DEPDIR)/starpu_mpi_datatype.Plo \ + ./$(DEPDIR)/starpu_mpi_fortran.Plo \ + ./$(DEPDIR)/starpu_mpi_fxt.Plo \ + ./$(DEPDIR)/starpu_mpi_helper.Plo \ + ./$(DEPDIR)/starpu_mpi_init.Plo \ + ./$(DEPDIR)/starpu_mpi_private.Plo \ + ./$(DEPDIR)/starpu_mpi_req.Plo \ + ./$(DEPDIR)/starpu_mpi_select_node.Plo \ + ./$(DEPDIR)/starpu_mpi_stats.Plo \ + ./$(DEPDIR)/starpu_mpi_tags.Plo \ + ./$(DEPDIR)/starpu_mpi_task_insert.Plo \ + ./$(DEPDIR)/starpu_mpi_task_insert_fortran.Plo \ + load_balancer/$(DEPDIR)/load_balancer.Plo \ + load_balancer/policy/$(DEPDIR)/data_movements_interface.Plo \ + load_balancer/policy/$(DEPDIR)/load_data_interface.Plo \ + load_balancer/policy/$(DEPDIR)/load_heat_propagation.Plo \ + mpi/$(DEPDIR)/starpu_mpi_comm.Plo \ + mpi/$(DEPDIR)/starpu_mpi_early_data.Plo \ + mpi/$(DEPDIR)/starpu_mpi_early_request.Plo \ + mpi/$(DEPDIR)/starpu_mpi_mpi.Plo \ + mpi/$(DEPDIR)/starpu_mpi_mpi_backend.Plo \ + mpi/$(DEPDIR)/starpu_mpi_sync_data.Plo \ + mpi/$(DEPDIR)/starpu_mpi_tag.Plo \ + mpi_failure_tolerance/$(DEPDIR)/starpu_mpi_checkpoint.Plo \ + mpi_failure_tolerance/$(DEPDIR)/starpu_mpi_checkpoint_package.Plo \ + mpi_failure_tolerance/$(DEPDIR)/starpu_mpi_checkpoint_template.Plo \ + mpi_failure_tolerance/$(DEPDIR)/starpu_mpi_checkpoint_tracker.Plo \ + mpi_failure_tolerance/$(DEPDIR)/starpu_mpi_ft.Plo \ + mpi_failure_tolerance/$(DEPDIR)/starpu_mpi_ft_service_comms.Plo \ + mpi_failure_tolerance/$(DEPDIR)/starpu_mpi_ft_stats.Plo \ + nmad/$(DEPDIR)/starpu_mpi_nmad.Plo \ + nmad/$(DEPDIR)/starpu_mpi_nmad_backend.Plo \ + nmad/$(DEPDIR)/starpu_mpi_nmad_coop.Plo \ + nmad/$(DEPDIR)/starpu_mpi_nmad_unknown_datatype.Plo +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(libstarpumpi_@STARPU_EFFECTIVE_VERSION@_la_SOURCES) +DIST_SOURCES = $(am__libstarpumpi_@STARPU_EFFECTIVE_VERSION@_la_SOURCES_DIST) +RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \ + ctags-recursive dvi-recursive html-recursive info-recursive \ + install-data-recursive install-dvi-recursive \ + install-exec-recursive install-html-recursive \ + install-info-recursive install-pdf-recursive \ + install-ps-recursive install-recursive installcheck-recursive \ + installdirs-recursive pdf-recursive ps-recursive \ + tags-recursive uninstall-recursive +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__noinst_HEADERS_DIST = starpu_mpi_private.h starpu_mpi_fxt.h \ + starpu_mpi_stats.h starpu_mpi_datatype.h starpu_mpi_cache.h \ + starpu_mpi_select_node.h starpu_mpi_cache_stats.h \ + starpu_mpi_task_insert.h starpu_mpi_init.h \ + nmad/starpu_mpi_nmad_coop.h mpi/starpu_mpi_mpi.h \ + mpi/starpu_mpi_early_data.h mpi/starpu_mpi_early_request.h \ + mpi/starpu_mpi_sync_data.h mpi/starpu_mpi_comm.h \ + mpi/starpu_mpi_tag.h mpi/starpu_mpi_driver.h \ + mpi/starpu_mpi_mpi_backend.h nmad/starpu_mpi_nmad_backend.h \ + nmad/starpu_mpi_nmad_unknown_datatype.h nmad/starpu_mpi_nmad.h \ + load_balancer/policy/data_movements_interface.h \ + load_balancer/policy/load_data_interface.h \ + load_balancer/policy/load_balancer_policy.h \ + mpi_failure_tolerance/starpu_mpi_ft.h \ + mpi_failure_tolerance/starpu_mpi_checkpoint.h \ + mpi_failure_tolerance/starpu_mpi_checkpoint_template.h \ + mpi_failure_tolerance/starpu_mpi_ft_service_comms.h \ + mpi_failure_tolerance/starpu_mpi_checkpoint_package.h \ + mpi_failure_tolerance/starpu_mpi_checkpoint_tracker.h \ + mpi_failure_tolerance/starpu_mpi_ft_stats.h +HEADERS = $(noinst_HEADERS) +RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ + distclean-recursive maintainer-clean-recursive +am__recursive_targets = \ + $(RECURSIVE_TARGETS) \ + $(RECURSIVE_CLEAN_TARGETS) \ + $(am__extra_recursive_targets) +AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \ + distdir distdir-am +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +DIST_SUBDIRS = $(SUBDIRS) +am__DIST_COMMON = $(srcdir)/Makefile.in \ + $(top_srcdir)/build-aux/depcomp \ + $(top_srcdir)/make/starpu-notests.mk \ + $(top_srcdir)/make/starpu.mk +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +am__relativize = \ + dir0=`pwd`; \ + sed_first='s,^\([^/]*\)/.*$$,\1,'; \ + sed_rest='s,^[^/]*/*,,'; \ + sed_last='s,^.*/\([^/]*\)$$,\1,'; \ + sed_butlast='s,/*[^/]*$$,,'; \ + while test -n "$$dir1"; do \ + first=`echo "$$dir1" | sed -e "$$sed_first"`; \ + if test "$$first" != "."; then \ + if test "$$first" = ".."; then \ + dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ + dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ + else \ + first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ + if test "$$first2" = "$$first"; then \ + dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ + else \ + dir2="../$$dir2"; \ + fi; \ + dir0="$$dir0"/"$$first"; \ + fi; \ + fi; \ + dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ + done; \ + reldir="$$dir2" +pkglibdir = @pkglibdir@ +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +APP_CFLAGS = @APP_CFLAGS@ +APP_CXXFLAGS = @APP_CXXFLAGS@ +APP_FCFLAGS = @APP_FCFLAGS@ +APP_FFLAGS = @APP_FFLAGS@ +AR = @AR@ +AS = @AS@ +ATLASDIR = @ATLASDIR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BLAS_LIB = @BLAS_LIB@ +BLAS_LIBS = @BLAS_LIBS@ +BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ +BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ +CC = $(MPICC) +CCDEPMODE = @CCDEPMODE@ +CC_OR_MPICC = @CC_OR_MPICC@ +CC_OR_NVCC = @CC_OR_NVCC@ +CFLAGS = @CFLAGS@ +COVERAGE = @COVERAGE@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CSCOPE = @CSCOPE@ +CTAGS = @CTAGS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DGELS_LIBS = @DGELS_LIBS@ +DLB_CFLAGS = @DLB_CFLAGS@ +DLB_LIBS = @DLB_LIBS@ +DLLTOOL = @DLLTOOL@ +DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +ECLIPSE = @ECLIPSE@ +EGREP = @EGREP@ +ETAGS = @ETAGS@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FC = @FC@ +FCFLAGS = @FCFLAGS@ +FFLAGS = @FFLAGS@ +FFTWF_CFLAGS = @FFTWF_CFLAGS@ +FFTWF_LIBS = @FFTWF_LIBS@ +FFTWL_CFLAGS = @FFTWL_CFLAGS@ +FFTWL_LIBS = @FFTWL_LIBS@ +FFTW_CFLAGS = @FFTW_CFLAGS@ +FFTW_LIBS = @FFTW_LIBS@ +FGREP = @FGREP@ +FILECMD = @FILECMD@ +FXTDIR = @FXTDIR@ +FXT_CFLAGS = @FXT_CFLAGS@ +FXT_LDFLAGS = @FXT_LDFLAGS@ +FXT_LIBS = @FXT_LIBS@ +GDB = @GDB@ +GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ +GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ +GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ +GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ +GOTODIR = @GOTODIR@ +GREP = @GREP@ +HAVE_CXX11 = @HAVE_CXX11@ +HAVE_FFTWFL = @HAVE_FFTWFL@ +HELP2MAN = @HELP2MAN@ +HIPCC = @HIPCC@ +HIPCCFLAGS = @HIPCCFLAGS@ $(am__append_2) +HIPCONFIG = @HIPCONFIG@ +HWLOC_CFLAGS = @HWLOC_CFLAGS@ +HWLOC_LIBS = @HWLOC_LIBS@ +HWLOC_REQUIRES = @HWLOC_REQUIRES@ +ICC = @ICC@ +ICC_ARGS = @ICC_ARGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JULIA = @JULIA@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ $(top_builddir)/src/@LIBSTARPU_LINK@ \ + $(STARPU_EXPORTED_LIBS) $(FXT_LDFLAGS) $(FXT_LIBS) \ + $(NMAD_LDFLAGS) $(NMAD_LIBS) $(MPICC_LDFLAGS) \ + $(MPI_SYNC_CLOCKS_LIBS) +LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ +LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ +LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ +LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ +LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ +LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ +LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ +LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ +LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ +LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ +LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ +LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ +LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ +LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ +LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ +LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ +LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ +LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ +LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ +LIBSTARPU_LINK = @LIBSTARPU_LINK@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAGMA_CFLAGS = @MAGMA_CFLAGS@ +MAGMA_LIBS = @MAGMA_LIBS@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPICC_LDFLAGS = @MPICC_LDFLAGS@ +MPICXX = @MPICXX@ +MPIEXEC = @MPIEXEC@ +MPIEXEC_ARGS = @MPIEXEC_ARGS@ +MPIFORT = @MPIFORT@ +MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ +MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ +NM = @NM@ +NMAD_CFLAGS = @NMAD_CFLAGS@ +NMAD_LIBS = @NMAD_LIBS@ +NMEDIT = @NMEDIT@ +NVCC = @NVCC@ +NVCCFLAGS = @NVCCFLAGS@ $(am__append_1) +NVCC_CC = @NVCC_CC@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ +OPENBLAS_LIBS = @OPENBLAS_LIBS@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PAPI_CFLAGS = @PAPI_CFLAGS@ +PAPI_LIBS = @PAPI_LIBS@ +PARALLEL = @PARALLEL@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PKG_CONFIG = @PKG_CONFIG@ +POTI_CFLAGS = @POTI_CFLAGS@ +POTI_LIBS = @POTI_LIBS@ +PROG_CLANG = @PROG_CLANG@ +PROG_DATE = @PROG_DATE@ +PROG_FIND = @PROG_FIND@ +PROG_STAT = @PROG_STAT@ +PYTHON = @PYTHON@ +PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ +PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ +PYTHON_VERSION = @PYTHON_VERSION@ +RANLIB = @RANLIB@ +REALBASH = @REALBASH@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ +SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ +SIMGRID_LIBS = @SIMGRID_LIBS@ +SIMGRID_MC = @SIMGRID_MC@ +SLIC_CONFIG = @SLIC_CONFIG@ +SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ +SOCL_VENDORS = @SOCL_VENDORS@ +STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ +STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ +STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ +STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ +STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ +STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ +STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ +STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ +STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ +STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ +STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ +STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ +STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ +STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ +STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ +STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ +STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ +STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ +STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ +STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ +STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ +STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ +STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ +STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ +STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ +STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ +STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ +STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ +STARPU_LIB_PATH = @STARPU_LIB_PATH@ +STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ +STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ +STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ +STARPU_MS_LIB = @STARPU_MS_LIB@ +STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ +STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ +STARPU_OPENBLAS = @STARPU_OPENBLAS@ +STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ +STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ +STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ +STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ +STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ +STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ +STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ +STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ +STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ +STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ +STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ +STARPU_SRC_DIR = @STARPU_SRC_DIR@ +STARPU_USE_CPU = @STARPU_USE_CPU@ +STARPU_USE_CUDA = @STARPU_USE_CUDA@ +STARPU_USE_FXT = @STARPU_USE_FXT@ +STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ +STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ +STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ +STRIP = @STRIP@ +VERSION = @VERSION@ +XMKMF = @XMKMF@ +X_CFLAGS = @X_CFLAGS@ +X_EXTRA_LIBS = @X_EXTRA_LIBS@ +X_LIBS = @X_LIBS@ +X_PRE_LIBS = @X_PRE_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_ct_F77 = @ac_ct_F77@ +ac_ct_FC = @ac_ct_FC@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +doxygencommand = @doxygencommand@ +dvidir = @dvidir@ +eclipsepath = @eclipsepath@ +epstopdfcommand = @epstopdfcommand@ +exec_prefix = @exec_prefix@ +gitcommand = @gitcommand@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +hwloccalccommand = @hwloccalccommand@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +juliapath = @juliapath@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +mpicc_path = @mpicc_path@ +mpicxx_path = @mpicxx_path@ +mpiexec_path = @mpiexec_path@ +mpifort_path = @mpifort_path@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +pdflatexcommand = @pdflatexcommand@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +runstatedir = @runstatedir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target = @target@ +target_alias = @target_alias@ +target_cpu = @target_cpu@ +target_os = @target_os@ +target_vendor = @target_vendor@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +AM_CFLAGS = $(GLOBAL_AM_CFLAGS) $(FXT_CFLAGS) $(NMAD_CFLAGS) \ + $(MPI_SYNC_CLOCKS_CFLAGS) +AM_CXXFLAGS = $(GLOBAL_AM_CXXFLAGS) +AM_FFLAGS = $(GLOBAL_AM_FFLAGS) +AM_FCFLAGS = $(GLOBAL_AM_FCFLAGS) +@STARPU_USE_CUDA_TRUE@V_nvcc_ = $(V_nvcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_USE_CUDA_TRUE@V_nvcc_0 = @echo " NVCC " $@; +@STARPU_USE_CUDA_TRUE@V_nvcc_1 = +@STARPU_USE_CUDA_TRUE@V_nvcc = $(V_nvcc_$(V)) + +# Avoid using nvcc when making a coverity build, nvcc produces millions of +# lines of code which we don't want to analyze. Instead, build dumb .o files +# containing empty functions. +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_ = $(V_mynvcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_0 = @echo " myNVCC " $@; +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_1 = +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc = $(V_mynvcc_$(V)) +@STARPU_USE_HIP_TRUE@V_hipcc_ = $(V_hipcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_USE_HIP_TRUE@V_hipcc_0 = @echo " HIPCC " $@; +@STARPU_USE_HIP_TRUE@V_hipcc_1 = +@STARPU_USE_HIP_TRUE@V_hipcc = $(V_hipcc_$(V)) +V_icc_ = $(V_icc_$(AM_DEFAULT_VERBOSITY)) +V_icc_0 = @echo " ICC " $@; +V_icc_1 = +V_icc = $(V_icc_$(V)) +V_ln_ = $(V_ln_$(AM_DEFAULT_VERBOSITY)) +V_ln_0 = @echo " LN " $@; +V_ln_1 = +V_ln = $(V_ln_$(V)) +V_help2man_ = $(V_help2man_$(AM_DEFAULT_VERBOSITY)) +V_help2man_0 = @echo " HELP2MAN" $@; +V_help2man_1 = +V_help2man = $(V_help2man_$(V)) +CCLD = $(MPICC) +BUILT_SOURCES = +SUBDIRS = +CLEANFILES = *.gcno *.gcda *.linkinfo +AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_srcdir)/src/ -I$(top_builddir)/src -I$(top_builddir)/include -I$(top_srcdir)/mpi/include -I$(top_srcdir)/mpi/src $(STARPU_H_CPPFLAGS) -DBUILDING_STARPU +ldflags = $(am__append_3) +@STARPU_HAVE_WINDOWS_TRUE@LC_MESSAGES = C +lib_LTLIBRARIES = libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la +libstarpumpi_@STARPU_EFFECTIVE_VERSION@_la_LDFLAGS = $(ldflags) -no-undefined \ + -version-info $(LIBSTARPUMPI_INTERFACE_CURRENT):$(LIBSTARPUMPI_INTERFACE_REVISION):$(LIBSTARPUMPI_INTERFACE_AGE) + +noinst_HEADERS = starpu_mpi_private.h starpu_mpi_fxt.h \ + starpu_mpi_stats.h starpu_mpi_datatype.h starpu_mpi_cache.h \ + starpu_mpi_select_node.h starpu_mpi_cache_stats.h \ + starpu_mpi_task_insert.h starpu_mpi_init.h \ + nmad/starpu_mpi_nmad_coop.h mpi/starpu_mpi_mpi.h \ + mpi/starpu_mpi_early_data.h mpi/starpu_mpi_early_request.h \ + mpi/starpu_mpi_sync_data.h mpi/starpu_mpi_comm.h \ + mpi/starpu_mpi_tag.h mpi/starpu_mpi_driver.h \ + mpi/starpu_mpi_mpi_backend.h nmad/starpu_mpi_nmad_backend.h \ + nmad/starpu_mpi_nmad_unknown_datatype.h nmad/starpu_mpi_nmad.h \ + load_balancer/policy/data_movements_interface.h \ + load_balancer/policy/load_data_interface.h \ + load_balancer/policy/load_balancer_policy.h $(am__append_4) +libstarpumpi_@STARPU_EFFECTIVE_VERSION@_la_SOURCES = starpu_mpi.c \ + starpu_mpi_req.c starpu_mpi_coop_sends.c starpu_mpi_helper.c \ + starpu_mpi_datatype.c starpu_mpi_task_insert.c \ + starpu_mpi_collective.c starpu_mpi_stats.c \ + starpu_mpi_private.c starpu_mpi_cache.c \ + starpu_mpi_select_node.c starpu_mpi_cache_stats.c \ + starpu_mpi_fortran.c starpu_mpi_task_insert_fortran.c \ + starpu_mpi_init.c starpu_mpi_tags.c \ + nmad/starpu_mpi_nmad_coop.c \ + nmad/starpu_mpi_nmad_unknown_datatype.c nmad/starpu_mpi_nmad.c \ + nmad/starpu_mpi_nmad_backend.c mpi/starpu_mpi_mpi.c \ + mpi/starpu_mpi_mpi_backend.c mpi/starpu_mpi_early_data.c \ + mpi/starpu_mpi_early_request.c mpi/starpu_mpi_sync_data.c \ + mpi/starpu_mpi_comm.c mpi/starpu_mpi_tag.c \ + load_balancer/policy/data_movements_interface.c \ + load_balancer/policy/load_data_interface.c \ + load_balancer/policy/load_heat_propagation.c \ + load_balancer/load_balancer.c $(am__append_5) $(am__append_6) +all: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) all-recursive + +.SUFFIXES: +.SUFFIXES: .c .cu .cubin .hip .lo .o .obj +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/make/starpu-notests.mk $(top_srcdir)/make/starpu.mk $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign mpi/src/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign mpi/src/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; +$(top_srcdir)/make/starpu-notests.mk $(top_srcdir)/make/starpu.mk $(am__empty): + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +install-libLTLIBRARIES: $(lib_LTLIBRARIES) + @$(NORMAL_INSTALL) + @list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \ + list2=; for p in $$list; do \ + if test -f $$p; then \ + list2="$$list2 $$p"; \ + else :; fi; \ + done; \ + test -z "$$list2" || { \ + echo " $(MKDIR_P) '$(DESTDIR)$(libdir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(libdir)" || exit 1; \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(libdir)'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(libdir)"; \ + } + +uninstall-libLTLIBRARIES: + @$(NORMAL_UNINSTALL) + @list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \ + for p in $$list; do \ + $(am__strip_dir) \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(libdir)/$$f'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(libdir)/$$f"; \ + done + +clean-libLTLIBRARIES: + -test -z "$(lib_LTLIBRARIES)" || rm -f $(lib_LTLIBRARIES) + @list='$(lib_LTLIBRARIES)'; \ + locs=`for p in $$list; do echo $$p; done | \ + sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ + sort -u`; \ + test -z "$$locs" || { \ + echo rm -f $${locs}; \ + rm -f $${locs}; \ + } +nmad/$(am__dirstamp): + @$(MKDIR_P) nmad + @: > nmad/$(am__dirstamp) +nmad/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) nmad/$(DEPDIR) + @: > nmad/$(DEPDIR)/$(am__dirstamp) +nmad/starpu_mpi_nmad_coop.lo: nmad/$(am__dirstamp) \ + nmad/$(DEPDIR)/$(am__dirstamp) +nmad/starpu_mpi_nmad_unknown_datatype.lo: nmad/$(am__dirstamp) \ + nmad/$(DEPDIR)/$(am__dirstamp) +nmad/starpu_mpi_nmad.lo: nmad/$(am__dirstamp) \ + nmad/$(DEPDIR)/$(am__dirstamp) +nmad/starpu_mpi_nmad_backend.lo: nmad/$(am__dirstamp) \ + nmad/$(DEPDIR)/$(am__dirstamp) +mpi/$(am__dirstamp): + @$(MKDIR_P) mpi + @: > mpi/$(am__dirstamp) +mpi/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) mpi/$(DEPDIR) + @: > mpi/$(DEPDIR)/$(am__dirstamp) +mpi/starpu_mpi_mpi.lo: mpi/$(am__dirstamp) \ + mpi/$(DEPDIR)/$(am__dirstamp) +mpi/starpu_mpi_mpi_backend.lo: mpi/$(am__dirstamp) \ + mpi/$(DEPDIR)/$(am__dirstamp) +mpi/starpu_mpi_early_data.lo: mpi/$(am__dirstamp) \ + mpi/$(DEPDIR)/$(am__dirstamp) +mpi/starpu_mpi_early_request.lo: mpi/$(am__dirstamp) \ + mpi/$(DEPDIR)/$(am__dirstamp) +mpi/starpu_mpi_sync_data.lo: mpi/$(am__dirstamp) \ + mpi/$(DEPDIR)/$(am__dirstamp) +mpi/starpu_mpi_comm.lo: mpi/$(am__dirstamp) \ + mpi/$(DEPDIR)/$(am__dirstamp) +mpi/starpu_mpi_tag.lo: mpi/$(am__dirstamp) \ + mpi/$(DEPDIR)/$(am__dirstamp) +load_balancer/policy/$(am__dirstamp): + @$(MKDIR_P) load_balancer/policy + @: > load_balancer/policy/$(am__dirstamp) +load_balancer/policy/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) load_balancer/policy/$(DEPDIR) + @: > load_balancer/policy/$(DEPDIR)/$(am__dirstamp) +load_balancer/policy/data_movements_interface.lo: \ + load_balancer/policy/$(am__dirstamp) \ + load_balancer/policy/$(DEPDIR)/$(am__dirstamp) +load_balancer/policy/load_data_interface.lo: \ + load_balancer/policy/$(am__dirstamp) \ + load_balancer/policy/$(DEPDIR)/$(am__dirstamp) +load_balancer/policy/load_heat_propagation.lo: \ + load_balancer/policy/$(am__dirstamp) \ + load_balancer/policy/$(DEPDIR)/$(am__dirstamp) +load_balancer/$(am__dirstamp): + @$(MKDIR_P) load_balancer + @: > load_balancer/$(am__dirstamp) +load_balancer/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) load_balancer/$(DEPDIR) + @: > load_balancer/$(DEPDIR)/$(am__dirstamp) +load_balancer/load_balancer.lo: load_balancer/$(am__dirstamp) \ + load_balancer/$(DEPDIR)/$(am__dirstamp) +mpi_failure_tolerance/$(am__dirstamp): + @$(MKDIR_P) mpi_failure_tolerance + @: > mpi_failure_tolerance/$(am__dirstamp) +mpi_failure_tolerance/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) mpi_failure_tolerance/$(DEPDIR) + @: > mpi_failure_tolerance/$(DEPDIR)/$(am__dirstamp) +mpi_failure_tolerance/starpu_mpi_ft.lo: \ + mpi_failure_tolerance/$(am__dirstamp) \ + mpi_failure_tolerance/$(DEPDIR)/$(am__dirstamp) +mpi_failure_tolerance/starpu_mpi_checkpoint.lo: \ + mpi_failure_tolerance/$(am__dirstamp) \ + mpi_failure_tolerance/$(DEPDIR)/$(am__dirstamp) +mpi_failure_tolerance/starpu_mpi_checkpoint_template.lo: \ + mpi_failure_tolerance/$(am__dirstamp) \ + mpi_failure_tolerance/$(DEPDIR)/$(am__dirstamp) +mpi_failure_tolerance/starpu_mpi_ft_service_comms.lo: \ + mpi_failure_tolerance/$(am__dirstamp) \ + mpi_failure_tolerance/$(DEPDIR)/$(am__dirstamp) +mpi_failure_tolerance/starpu_mpi_checkpoint_package.lo: \ + mpi_failure_tolerance/$(am__dirstamp) \ + mpi_failure_tolerance/$(DEPDIR)/$(am__dirstamp) +mpi_failure_tolerance/starpu_mpi_checkpoint_tracker.lo: \ + mpi_failure_tolerance/$(am__dirstamp) \ + mpi_failure_tolerance/$(DEPDIR)/$(am__dirstamp) +mpi_failure_tolerance/starpu_mpi_ft_stats.lo: \ + mpi_failure_tolerance/$(am__dirstamp) \ + mpi_failure_tolerance/$(DEPDIR)/$(am__dirstamp) + +libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la: $(libstarpumpi_@STARPU_EFFECTIVE_VERSION@_la_OBJECTS) $(libstarpumpi_@STARPU_EFFECTIVE_VERSION@_la_DEPENDENCIES) $(EXTRA_libstarpumpi_@STARPU_EFFECTIVE_VERSION@_la_DEPENDENCIES) + $(AM_V_CCLD)$(libstarpumpi_@STARPU_EFFECTIVE_VERSION@_la_LINK) -rpath $(libdir) $(libstarpumpi_@STARPU_EFFECTIVE_VERSION@_la_OBJECTS) $(libstarpumpi_@STARPU_EFFECTIVE_VERSION@_la_LIBADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + -rm -f load_balancer/*.$(OBJEXT) + -rm -f load_balancer/*.lo + -rm -f load_balancer/policy/*.$(OBJEXT) + -rm -f load_balancer/policy/*.lo + -rm -f mpi/*.$(OBJEXT) + -rm -f mpi/*.lo + -rm -f mpi_failure_tolerance/*.$(OBJEXT) + -rm -f mpi_failure_tolerance/*.lo + -rm -f nmad/*.$(OBJEXT) + -rm -f nmad/*.lo + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/starpu_mpi.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/starpu_mpi_cache.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/starpu_mpi_cache_stats.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/starpu_mpi_collective.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/starpu_mpi_coop_sends.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/starpu_mpi_datatype.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/starpu_mpi_fortran.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/starpu_mpi_fxt.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/starpu_mpi_helper.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/starpu_mpi_init.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/starpu_mpi_private.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/starpu_mpi_req.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/starpu_mpi_select_node.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/starpu_mpi_stats.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/starpu_mpi_tags.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/starpu_mpi_task_insert.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/starpu_mpi_task_insert_fortran.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@load_balancer/$(DEPDIR)/load_balancer.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@load_balancer/policy/$(DEPDIR)/data_movements_interface.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@load_balancer/policy/$(DEPDIR)/load_data_interface.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@load_balancer/policy/$(DEPDIR)/load_heat_propagation.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@mpi/$(DEPDIR)/starpu_mpi_comm.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@mpi/$(DEPDIR)/starpu_mpi_early_data.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@mpi/$(DEPDIR)/starpu_mpi_early_request.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@mpi/$(DEPDIR)/starpu_mpi_mpi.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@mpi/$(DEPDIR)/starpu_mpi_mpi_backend.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@mpi/$(DEPDIR)/starpu_mpi_sync_data.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@mpi/$(DEPDIR)/starpu_mpi_tag.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@mpi_failure_tolerance/$(DEPDIR)/starpu_mpi_checkpoint.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@mpi_failure_tolerance/$(DEPDIR)/starpu_mpi_checkpoint_package.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@mpi_failure_tolerance/$(DEPDIR)/starpu_mpi_checkpoint_template.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@mpi_failure_tolerance/$(DEPDIR)/starpu_mpi_checkpoint_tracker.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@mpi_failure_tolerance/$(DEPDIR)/starpu_mpi_ft.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@mpi_failure_tolerance/$(DEPDIR)/starpu_mpi_ft_service_comms.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@mpi_failure_tolerance/$(DEPDIR)/starpu_mpi_ft_stats.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@nmad/$(DEPDIR)/starpu_mpi_nmad.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@nmad/$(DEPDIR)/starpu_mpi_nmad_backend.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@nmad/$(DEPDIR)/starpu_mpi_nmad_coop.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@nmad/$(DEPDIR)/starpu_mpi_nmad_unknown_datatype.Plo@am__quote@ # am--include-marker + +$(am__depfiles_remade): + @$(MKDIR_P) $(@D) + @echo '# dummy' >$@-t && $(am__mv) $@-t $@ + +am--depfiles: $(am__depfiles_remade) + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ +@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + -rm -rf load_balancer/.libs load_balancer/_libs + -rm -rf load_balancer/policy/.libs load_balancer/policy/_libs + -rm -rf mpi/.libs mpi/_libs + -rm -rf mpi_failure_tolerance/.libs mpi_failure_tolerance/_libs + -rm -rf nmad/.libs nmad/_libs + +# This directory's subdirectories are mostly independent; you can cd +# into them and run 'make' without going through this Makefile. +# To change the values of 'make' variables: instead of editing Makefiles, +# (1) if the variable is set in 'config.status', edit 'config.status' +# (which will cause the Makefiles to be regenerated when you run 'make'); +# (2) otherwise, pass the desired values on the 'make' command line. +$(am__recursive_targets): + @fail=; \ + if $(am__make_keepgoing); then \ + failcom='fail=yes'; \ + else \ + failcom='exit 1'; \ + fi; \ + dot_seen=no; \ + target=`echo $@ | sed s/-recursive//`; \ + case "$@" in \ + distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ + *) list='$(SUBDIRS)' ;; \ + esac; \ + for subdir in $$list; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + dot_seen=yes; \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || eval $$failcom; \ + done; \ + if test "$$dot_seen" = "no"; then \ + $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ + fi; test -z "$$fail" + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-recursive +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ + include_option=--etags-include; \ + empty_fix=.; \ + else \ + include_option=--include; \ + empty_fix=; \ + fi; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + test ! -f $$subdir/TAGS || \ + set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ + fi; \ + done; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-recursive + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-recursive + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done + @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + $(am__make_dryrun) \ + || test -d "$(distdir)/$$subdir" \ + || $(MKDIR_P) "$(distdir)/$$subdir" \ + || exit 1; \ + dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ + $(am__relativize); \ + new_distdir=$$reldir; \ + dir1=$$subdir; dir2="$(top_distdir)"; \ + $(am__relativize); \ + new_top_distdir=$$reldir; \ + echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ + echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ + ($(am__cd) $$subdir && \ + $(MAKE) $(AM_MAKEFLAGS) \ + top_distdir="$$new_top_distdir" \ + distdir="$$new_distdir" \ + am__remove_distdir=: \ + am__skip_length_check=: \ + am__skip_mode_fix=: \ + distdir) \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) check-recursive +@STARPU_HAVE_MS_LIB_FALSE@all-local: +@STARPU_HAVE_WINDOWS_FALSE@all-local: +all-am: Makefile $(LTLIBRARIES) $(HEADERS) all-local +installdirs: installdirs-recursive +installdirs-am: + for dir in "$(DESTDIR)$(libdir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) install-recursive +install-exec: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) install-exec-recursive +install-data: install-data-recursive +uninstall: uninstall-recursive + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-recursive +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + -rm -f load_balancer/$(DEPDIR)/$(am__dirstamp) + -rm -f load_balancer/$(am__dirstamp) + -rm -f load_balancer/policy/$(DEPDIR)/$(am__dirstamp) + -rm -f load_balancer/policy/$(am__dirstamp) + -rm -f mpi/$(DEPDIR)/$(am__dirstamp) + -rm -f mpi/$(am__dirstamp) + -rm -f mpi_failure_tolerance/$(DEPDIR)/$(am__dirstamp) + -rm -f mpi_failure_tolerance/$(am__dirstamp) + -rm -f nmad/$(DEPDIR)/$(am__dirstamp) + -rm -f nmad/$(am__dirstamp) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." + -test -z "$(BUILT_SOURCES)" || rm -f $(BUILT_SOURCES) +@STARPU_HAVE_WINDOWS_FALSE@install-exec-hook: +clean: clean-recursive + +clean-am: clean-generic clean-libLTLIBRARIES clean-libtool \ + mostlyclean-am + +distclean: distclean-recursive + -rm -f ./$(DEPDIR)/starpu_mpi.Plo + -rm -f ./$(DEPDIR)/starpu_mpi_cache.Plo + -rm -f ./$(DEPDIR)/starpu_mpi_cache_stats.Plo + -rm -f ./$(DEPDIR)/starpu_mpi_collective.Plo + -rm -f ./$(DEPDIR)/starpu_mpi_coop_sends.Plo + -rm -f ./$(DEPDIR)/starpu_mpi_datatype.Plo + -rm -f ./$(DEPDIR)/starpu_mpi_fortran.Plo + -rm -f ./$(DEPDIR)/starpu_mpi_fxt.Plo + -rm -f ./$(DEPDIR)/starpu_mpi_helper.Plo + -rm -f ./$(DEPDIR)/starpu_mpi_init.Plo + -rm -f ./$(DEPDIR)/starpu_mpi_private.Plo + -rm -f ./$(DEPDIR)/starpu_mpi_req.Plo + -rm -f ./$(DEPDIR)/starpu_mpi_select_node.Plo + -rm -f ./$(DEPDIR)/starpu_mpi_stats.Plo + -rm -f ./$(DEPDIR)/starpu_mpi_tags.Plo + -rm -f ./$(DEPDIR)/starpu_mpi_task_insert.Plo + -rm -f ./$(DEPDIR)/starpu_mpi_task_insert_fortran.Plo + -rm -f load_balancer/$(DEPDIR)/load_balancer.Plo + -rm -f load_balancer/policy/$(DEPDIR)/data_movements_interface.Plo + -rm -f load_balancer/policy/$(DEPDIR)/load_data_interface.Plo + -rm -f load_balancer/policy/$(DEPDIR)/load_heat_propagation.Plo + -rm -f mpi/$(DEPDIR)/starpu_mpi_comm.Plo + -rm -f mpi/$(DEPDIR)/starpu_mpi_early_data.Plo + -rm -f mpi/$(DEPDIR)/starpu_mpi_early_request.Plo + -rm -f mpi/$(DEPDIR)/starpu_mpi_mpi.Plo + -rm -f mpi/$(DEPDIR)/starpu_mpi_mpi_backend.Plo + -rm -f mpi/$(DEPDIR)/starpu_mpi_sync_data.Plo + -rm -f mpi/$(DEPDIR)/starpu_mpi_tag.Plo + -rm -f mpi_failure_tolerance/$(DEPDIR)/starpu_mpi_checkpoint.Plo + -rm -f mpi_failure_tolerance/$(DEPDIR)/starpu_mpi_checkpoint_package.Plo + -rm -f mpi_failure_tolerance/$(DEPDIR)/starpu_mpi_checkpoint_template.Plo + -rm -f mpi_failure_tolerance/$(DEPDIR)/starpu_mpi_checkpoint_tracker.Plo + -rm -f mpi_failure_tolerance/$(DEPDIR)/starpu_mpi_ft.Plo + -rm -f mpi_failure_tolerance/$(DEPDIR)/starpu_mpi_ft_service_comms.Plo + -rm -f mpi_failure_tolerance/$(DEPDIR)/starpu_mpi_ft_stats.Plo + -rm -f nmad/$(DEPDIR)/starpu_mpi_nmad.Plo + -rm -f nmad/$(DEPDIR)/starpu_mpi_nmad_backend.Plo + -rm -f nmad/$(DEPDIR)/starpu_mpi_nmad_coop.Plo + -rm -f nmad/$(DEPDIR)/starpu_mpi_nmad_unknown_datatype.Plo + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-recursive + +dvi-am: + +html: html-recursive + +html-am: + +info: info-recursive + +info-am: + +install-data-am: + +install-dvi: install-dvi-recursive + +install-dvi-am: + +install-exec-am: install-libLTLIBRARIES + @$(NORMAL_INSTALL) + $(MAKE) $(AM_MAKEFLAGS) install-exec-hook +install-html: install-html-recursive + +install-html-am: + +install-info: install-info-recursive + +install-info-am: + +install-man: + +install-pdf: install-pdf-recursive + +install-pdf-am: + +install-ps: install-ps-recursive + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-recursive + -rm -f ./$(DEPDIR)/starpu_mpi.Plo + -rm -f ./$(DEPDIR)/starpu_mpi_cache.Plo + -rm -f ./$(DEPDIR)/starpu_mpi_cache_stats.Plo + -rm -f ./$(DEPDIR)/starpu_mpi_collective.Plo + -rm -f ./$(DEPDIR)/starpu_mpi_coop_sends.Plo + -rm -f ./$(DEPDIR)/starpu_mpi_datatype.Plo + -rm -f ./$(DEPDIR)/starpu_mpi_fortran.Plo + -rm -f ./$(DEPDIR)/starpu_mpi_fxt.Plo + -rm -f ./$(DEPDIR)/starpu_mpi_helper.Plo + -rm -f ./$(DEPDIR)/starpu_mpi_init.Plo + -rm -f ./$(DEPDIR)/starpu_mpi_private.Plo + -rm -f ./$(DEPDIR)/starpu_mpi_req.Plo + -rm -f ./$(DEPDIR)/starpu_mpi_select_node.Plo + -rm -f ./$(DEPDIR)/starpu_mpi_stats.Plo + -rm -f ./$(DEPDIR)/starpu_mpi_tags.Plo + -rm -f ./$(DEPDIR)/starpu_mpi_task_insert.Plo + -rm -f ./$(DEPDIR)/starpu_mpi_task_insert_fortran.Plo + -rm -f load_balancer/$(DEPDIR)/load_balancer.Plo + -rm -f load_balancer/policy/$(DEPDIR)/data_movements_interface.Plo + -rm -f load_balancer/policy/$(DEPDIR)/load_data_interface.Plo + -rm -f load_balancer/policy/$(DEPDIR)/load_heat_propagation.Plo + -rm -f mpi/$(DEPDIR)/starpu_mpi_comm.Plo + -rm -f mpi/$(DEPDIR)/starpu_mpi_early_data.Plo + -rm -f mpi/$(DEPDIR)/starpu_mpi_early_request.Plo + -rm -f mpi/$(DEPDIR)/starpu_mpi_mpi.Plo + -rm -f mpi/$(DEPDIR)/starpu_mpi_mpi_backend.Plo + -rm -f mpi/$(DEPDIR)/starpu_mpi_sync_data.Plo + -rm -f mpi/$(DEPDIR)/starpu_mpi_tag.Plo + -rm -f mpi_failure_tolerance/$(DEPDIR)/starpu_mpi_checkpoint.Plo + -rm -f mpi_failure_tolerance/$(DEPDIR)/starpu_mpi_checkpoint_package.Plo + -rm -f mpi_failure_tolerance/$(DEPDIR)/starpu_mpi_checkpoint_template.Plo + -rm -f mpi_failure_tolerance/$(DEPDIR)/starpu_mpi_checkpoint_tracker.Plo + -rm -f mpi_failure_tolerance/$(DEPDIR)/starpu_mpi_ft.Plo + -rm -f mpi_failure_tolerance/$(DEPDIR)/starpu_mpi_ft_service_comms.Plo + -rm -f mpi_failure_tolerance/$(DEPDIR)/starpu_mpi_ft_stats.Plo + -rm -f nmad/$(DEPDIR)/starpu_mpi_nmad.Plo + -rm -f nmad/$(DEPDIR)/starpu_mpi_nmad_backend.Plo + -rm -f nmad/$(DEPDIR)/starpu_mpi_nmad_coop.Plo + -rm -f nmad/$(DEPDIR)/starpu_mpi_nmad_unknown_datatype.Plo + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-recursive + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-recursive + +pdf-am: + +ps: ps-recursive + +ps-am: + +uninstall-am: uninstall-libLTLIBRARIES + +.MAKE: $(am__recursive_targets) all check install install-am \ + install-exec install-exec-am install-strip + +.PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am all-local \ + am--depfiles check check-am clean clean-generic \ + clean-libLTLIBRARIES clean-libtool cscopelist-am ctags \ + ctags-am distclean distclean-compile distclean-generic \ + distclean-libtool distclean-tags distdir dvi dvi-am html \ + html-am info info-am install install-am install-data \ + install-data-am install-dvi install-dvi-am install-exec \ + install-exec-am install-exec-hook install-html install-html-am \ + install-info install-info-am install-libLTLIBRARIES \ + install-man install-pdf install-pdf-am install-ps \ + install-ps-am install-strip installcheck installcheck-am \ + installdirs installdirs-am maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags tags-am uninstall uninstall-am uninstall-libLTLIBRARIES + +.PRECIOUS: Makefile + +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@.cu.o: +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ @$(MKDIR_P) `dirname $@` +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ $(V_mynvcc)grep 'extern *"C" *void *' $< | sed -ne 's/extern *"C" *void *\([a-zA-Z0-9_]*\) *(.*/void \1(void) {}/p' | $(CC) -x c - -o $@ -c + +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.cubin: +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) -cubin $< -o $@ $(NVCCFLAGS) + +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.o: +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) $< -c -o $@ $(NVCCFLAGS) +@STARPU_USE_HIP_TRUE@.hip.o: +@STARPU_USE_HIP_TRUE@ $(V_hipcc) $(HIPCC) $< -c -o $@ $(HIPCCFLAGS) + +recheck: + -cat /dev/null + +showcheckfailed: + @-cat /dev/null + +showfailed: + @-cat /dev/null + +showcheck: + -cat /dev/null + +showsuite: + -cat /dev/null +@STARPU_HAVE_WINDOWS_TRUE@export LC_MESSAGES + +@STARPU_HAVE_MS_LIB_TRUE@@STARPU_HAVE_WINDOWS_TRUE@.libs/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.lib: libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la dolib +@STARPU_HAVE_MS_LIB_TRUE@@STARPU_HAVE_WINDOWS_TRUE@ ./dolib "$(STARPU_MS_LIB)" $(STARPU_MS_LIB_ARCH) .libs/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.def @STARPU_EFFECTIVE_VERSION@ $(libstarpumpi_so_version) .libs/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.lib +@STARPU_HAVE_MS_LIB_TRUE@@STARPU_HAVE_WINDOWS_TRUE@all-local: .libs/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.lib + +@STARPU_HAVE_WINDOWS_TRUE@install-exec-hook: +@STARPU_HAVE_WINDOWS_TRUE@ $(INSTALL) .libs/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.def $(DESTDIR)$(libdir) +@STARPU_HAVE_MS_LIB_TRUE@@STARPU_HAVE_WINDOWS_TRUE@ $(INSTALL) .libs/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.lib $(DESTDIR)$(libdir) +@STARPU_HAVE_MS_LIB_TRUE@@STARPU_HAVE_WINDOWS_TRUE@ $(INSTALL) .libs/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.exp $(DESTDIR)$(libdir) + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/mpi/src/load_balancer/load_balancer.c b/mpi/src/load_balancer/load_balancer.c new file mode 100644 index 0000000..a65c22a --- /dev/null +++ b/mpi/src/load_balancer/load_balancer.c @@ -0,0 +1,160 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include "policy/load_balancer_policy.h" + +#if defined(STARPU_USE_MPI_MPI) + +static struct load_balancer_policy *defined_policy = NULL; +typedef void (*_post_exec_hook_func_t)(struct starpu_task *task, unsigned sched_ctx_id); +static _post_exec_hook_func_t saved_post_exec_hook[STARPU_NMAX_SCHED_CTXS]; + +static void post_exec_hook_wrapper(struct starpu_task *task, unsigned sched_ctx_id) +{ + //fprintf(stderr,"I am called ! \n"); + if (defined_policy && defined_policy->finished_task_entry_point) + defined_policy->finished_task_entry_point(); + if (saved_post_exec_hook[sched_ctx_id]) + saved_post_exec_hook[sched_ctx_id](task, sched_ctx_id); +} + +static struct load_balancer_policy *predefined_policies[] = +{ + &load_heat_propagation_policy, + NULL +}; + +void starpu_mpi_lb_init(const char *lb_policy_name, struct starpu_mpi_lb_conf *itf) +{ + int ret; + + const char *policy_name = starpu_getenv("STARPU_MPI_LB"); + if (!policy_name) + policy_name = lb_policy_name; + + if (!policy_name || (strcmp(policy_name, "help") == 0)) + { + _STARPU_MSG("Warning : load balancing is disabled for this run.\n"); + _STARPU_MSG("Use the STARPU_MPI_LB = environment variable to use a load balancer.\n"); + _STARPU_MSG("Available load balancers :\n"); + struct load_balancer_policy **policy; + for(policy=predefined_policies ; *policy!=NULL ; policy++) + { + struct load_balancer_policy *p = *policy; + fprintf(stderr," - %s\n", p->policy_name); + } + return; + } + + if (policy_name) + { + struct load_balancer_policy **policy; + for(policy=predefined_policies ; *policy!=NULL ; policy++) + { + struct load_balancer_policy *p = *policy; + if (p->policy_name) + { + if (strcmp(policy_name, p->policy_name) == 0) + { + /* we found a policy with the requested name */ + defined_policy = p; + break; + } + } + } + } + + if (!defined_policy) + { + _STARPU_MSG("Error : no load balancer with the name %s. Load balancing will be disabled for this run.\n", policy_name); + return; + } + + ret = defined_policy->init(itf); + if (ret != 0) + { + _STARPU_MSG("Error (%d) in %s->init: invalid starpu_mpi_lb_conf. Load balancing will be disabled for this run.\n", ret, defined_policy->policy_name); + return; + } + + /* starpu_register_hook(submitted_task, defined_policy->submitted_task_entry_point); */ + if (defined_policy->submitted_task_entry_point) + starpu_mpi_pre_submit_hook_register(defined_policy->submitted_task_entry_point); + + /* starpu_register_hook(finished_task, defined_policy->finished_task_entry_point); */ + if (defined_policy->finished_task_entry_point) + { + int i; + for(i = 0; i < STARPU_NMAX_SCHED_CTXS; i++) + { + struct starpu_sched_policy *sched_policy = starpu_sched_ctx_get_sched_policy(i); + if (sched_policy) + { + _STARPU_DEBUG("Setting post_exec_hook for scheduling context %d %s (%d)\n", i, sched_policy->policy_name, STARPU_NMAX_SCHED_CTXS); + saved_post_exec_hook[i] = sched_policy->post_exec_hook; + sched_policy->post_exec_hook = post_exec_hook_wrapper; + } + else + saved_post_exec_hook[i] = NULL; + } + } + + return; +} + +void starpu_mpi_lb_shutdown() +{ + if (!defined_policy) + return; + + int ret = defined_policy->deinit(); + if (ret != 0) + { + _STARPU_MSG("Error (%d) in %s->deinit\n", ret, defined_policy->policy_name); + return; + } + + /* starpu_unregister_hook(submitted_task, defined_policy->submitted_task_entry_point); */ + if (defined_policy->submitted_task_entry_point) + starpu_mpi_pre_submit_hook_unregister(); + + /* starpu_unregister_hook(finished_task, defined_policy->finished_task_entry_point); */ + if (defined_policy->finished_task_entry_point) + { + int i; + for(i = 0; i < STARPU_NMAX_SCHED_CTXS; i++) + { + if (saved_post_exec_hook[i]) + { + struct starpu_sched_policy *sched_policy = starpu_sched_ctx_get_sched_policy(i); + sched_policy->post_exec_hook = saved_post_exec_hook[i]; + saved_post_exec_hook[i] = NULL; + } + } + } + defined_policy = NULL; +} + +#endif /* STARPU_USE_MPI_MPI */ diff --git a/mpi/src/load_balancer/policy/data_movements_interface.c b/mpi/src/load_balancer/policy/data_movements_interface.c new file mode 100644 index 0000000..0b89f1e --- /dev/null +++ b/mpi/src/load_balancer/policy/data_movements_interface.c @@ -0,0 +1,308 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include + +#include "data_movements_interface.h" + +#if defined(STARPU_USE_MPI_MPI) + +starpu_mpi_tag_t **data_movements_get_ref_tags_table(starpu_data_handle_t handle) +{ + struct data_movements_interface *dm_interface = + (struct data_movements_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + + if (dm_interface->tags) + return &dm_interface->tags; + else + return NULL; +} + +int **data_movements_get_ref_ranks_table(starpu_data_handle_t handle) +{ + struct data_movements_interface *dm_interface = + (struct data_movements_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + + if (dm_interface->ranks) + return &dm_interface->ranks; + else + return NULL; +} + +starpu_mpi_tag_t *data_movements_get_tags_table(starpu_data_handle_t handle) +{ + struct data_movements_interface *dm_interface = + (struct data_movements_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + + return dm_interface->tags; +} + +int *data_movements_get_ranks_table(starpu_data_handle_t handle) +{ + struct data_movements_interface *dm_interface = + (struct data_movements_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + + return dm_interface->ranks; +} + +int data_movements_get_size_tables(starpu_data_handle_t handle) +{ + struct data_movements_interface *dm_interface = + (struct data_movements_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + + return dm_interface->size; +} + +static void data_movements_free_data_on_node(void *data_interface, unsigned node); +static starpu_ssize_t data_movements_allocate_data_on_node(void *data_interface, unsigned node); + +int data_movements_reallocate_tables_interface(struct data_movements_interface *dm_interface, unsigned node, int size) +{ + if (dm_interface->tags) + { + data_movements_free_data_on_node(dm_interface, node); + dm_interface->tags = NULL; + dm_interface->ranks = NULL; + } + else + { + STARPU_ASSERT(!dm_interface->tags); + STARPU_ASSERT(!dm_interface->ranks); + } + + dm_interface->size = size; + + if (dm_interface->size) + { + starpu_ssize_t resize = data_movements_allocate_data_on_node(dm_interface, node); + STARPU_ASSERT(resize > 0); + } + + return 0 ; +} + +int data_movements_reallocate_tables(starpu_data_handle_t handle, unsigned node, int size) +{ + struct data_movements_interface *dm_interface = + (struct data_movements_interface *) starpu_data_get_interface_on_node(handle, node); + return data_movements_reallocate_tables_interface(dm_interface, node, size); +} + +static void data_movements_register_data_handle(starpu_data_handle_t handle, int home_node, void *data_interface) +{ + struct data_movements_interface *dm_interface = (struct data_movements_interface *) data_interface; + + int node; + for (node = 0; node < STARPU_MAXNODES; node++) + { + struct data_movements_interface *local_interface = (struct data_movements_interface *) + starpu_data_get_interface_on_node(handle, node); + + local_interface->size = dm_interface->size; + if (node == home_node) + { + local_interface->tags = dm_interface->tags; + local_interface->ranks = dm_interface->ranks; + } + else + { + local_interface->tags = NULL; + local_interface->ranks = NULL; + } + } +} + +static starpu_ssize_t data_movements_allocate_data_on_node(void *data_interface, unsigned node) +{ + struct data_movements_interface *dm_interface = (struct data_movements_interface *) data_interface; + + if (!dm_interface->size) + { + dm_interface->tags = NULL; + dm_interface->ranks = NULL; + return 0; + } + + starpu_mpi_tag_t *addr_tags; + int *addr_ranks; + starpu_ssize_t requested_memory_tags = dm_interface->size * sizeof(starpu_mpi_tag_t); + starpu_ssize_t requested_memory_ranks = dm_interface->size * sizeof(int); + + addr_tags = (starpu_mpi_tag_t*) starpu_malloc_on_node(node, requested_memory_tags); + if (!addr_tags) + goto fail_tags; + addr_ranks = (int*) starpu_malloc_on_node(node, requested_memory_ranks); + if (!addr_ranks) + goto fail_ranks; + + /* update the data properly in consequence */ + dm_interface->tags = addr_tags; + dm_interface->ranks = addr_ranks; + + return requested_memory_tags+requested_memory_ranks; + +fail_ranks: + starpu_free_on_node(node, (uintptr_t) addr_tags, requested_memory_tags); +fail_tags: + return -ENOMEM; +} + +static void data_movements_free_data_on_node(void *data_interface, unsigned node) +{ + struct data_movements_interface *dm_interface = (struct data_movements_interface *) data_interface; + + if (! dm_interface->tags) + return; + + starpu_ssize_t requested_memory_tags = dm_interface->size * sizeof(starpu_mpi_tag_t); + starpu_ssize_t requested_memory_ranks = dm_interface->size * sizeof(int); + + starpu_free_on_node(node, (uintptr_t) dm_interface->tags, requested_memory_tags); + dm_interface->tags = NULL; + starpu_free_on_node(node, (uintptr_t) dm_interface->ranks, requested_memory_ranks); + dm_interface->ranks = NULL; +} + +static size_t data_movements_get_size(starpu_data_handle_t handle) +{ + size_t size; + struct data_movements_interface *dm_interface = (struct data_movements_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + + size = (dm_interface->size * sizeof(starpu_mpi_tag_t)) + (dm_interface->size * sizeof(int)) + sizeof(int); + return size; +} + +static uint32_t data_movements_footprint(starpu_data_handle_t handle) +{ + return starpu_hash_crc32c_be(data_movements_get_size(handle), 0); +} + +static int data_movements_pack_data(starpu_data_handle_t handle, unsigned node, void **ptr, starpu_ssize_t *count) +{ + STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); + + struct data_movements_interface *dm_interface = (struct data_movements_interface *) + starpu_data_get_interface_on_node(handle, node); + + *count = data_movements_get_size(handle); + if (ptr != NULL) + { + char *data = (void*) starpu_malloc_on_node_flags(node, *count, 0); + assert(data); + *ptr = data; + memcpy(data, &dm_interface->size, sizeof(int)); + if (dm_interface->size) + { + memcpy(data+sizeof(int), dm_interface->tags, (dm_interface->size*sizeof(starpu_mpi_tag_t))); + memcpy(data+sizeof(int)+(dm_interface->size*sizeof(starpu_mpi_tag_t)), dm_interface->ranks, dm_interface->size*sizeof(int)); + } + } + + return 0; +} + +static int data_movements_peek_data(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count) +{ + char *data = ptr; + STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); + + struct data_movements_interface *dm_interface = (struct data_movements_interface *) + starpu_data_get_interface_on_node(handle, node); + + int size = 0; + memcpy(&size, data, sizeof(int)); + STARPU_ASSERT(count == (2 * size * sizeof(int)) + sizeof(int)); + + data_movements_reallocate_tables(handle, node, size); + + if (dm_interface->size) + { + memcpy(dm_interface->tags, data+sizeof(int), dm_interface->size*sizeof(starpu_mpi_tag_t)); + memcpy(dm_interface->ranks, data+sizeof(int)+(dm_interface->size*sizeof(starpu_mpi_tag_t)), dm_interface->size*sizeof(int)); + } + + return 0; +} + +static int data_movements_unpack_data(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count) +{ + data_movements_peek_data(handle, node, ptr, count); + starpu_free_on_node_flags(node, (uintptr_t)ptr, count, 0); + + return 0; +} + +static int copy_any_to_any(void *src_interface, unsigned src_node, + void *dst_interface, unsigned dst_node, + void *async_data) +{ + struct data_movements_interface *src_data_movements = src_interface; + struct data_movements_interface *dst_data_movements = dst_interface; + int ret = 0; + + data_movements_reallocate_tables_interface(dst_data_movements, dst_node, src_data_movements->size); + + if (starpu_interface_copy((uintptr_t) src_data_movements->tags, 0, src_node, + (uintptr_t) dst_data_movements->tags, 0, dst_node, + src_data_movements->size*sizeof(starpu_mpi_tag_t), + async_data)) + ret = -EAGAIN; + if (starpu_interface_copy((uintptr_t) src_data_movements->ranks, 0, src_node, + (uintptr_t) dst_data_movements->ranks, 0, dst_node, + src_data_movements->size*sizeof(int), + async_data)) + ret = -EAGAIN; + return ret; +} + +static const struct starpu_data_copy_methods data_movements_copy_methods = +{ + .any_to_any = copy_any_to_any +}; + +static struct starpu_data_interface_ops interface_data_movements_ops = +{ + .register_data_handle = data_movements_register_data_handle, + .allocate_data_on_node = data_movements_allocate_data_on_node, + .free_data_on_node = data_movements_free_data_on_node, + .copy_methods = &data_movements_copy_methods, + .get_size = data_movements_get_size, + .footprint = data_movements_footprint, + .interfaceid = STARPU_UNKNOWN_INTERFACE_ID, + .interface_size = sizeof(struct data_movements_interface), + .to_pointer = NULL, + .pack_data = data_movements_pack_data, + .peek_data = data_movements_peek_data, + .unpack_data = data_movements_unpack_data, + .describe = NULL +}; + +void data_movements_data_register(starpu_data_handle_t *handleptr, unsigned home_node, int *ranks, starpu_mpi_tag_t *tags, int size) +{ + struct data_movements_interface data_movements = + { + .tags = tags, + .ranks = ranks, + .size = size + }; + + starpu_data_register(handleptr, home_node, &data_movements, &interface_data_movements_ops); +} + +#endif diff --git a/mpi/src/load_balancer/policy/data_movements_interface.h b/mpi/src/load_balancer/policy/data_movements_interface.h new file mode 100644 index 0000000..c684427 --- /dev/null +++ b/mpi/src/load_balancer/policy/data_movements_interface.h @@ -0,0 +1,49 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +/** @file */ + +#ifndef __DATA_MOVEMENTS_INTERFACE_H +#define __DATA_MOVEMENTS_INTERFACE_H + +/** interface for data_movements */ +struct data_movements_interface +{ + /** Data tags table */ + starpu_mpi_tag_t *tags; + /** Ranks table (where to move the corresponding data) */ + int *ranks; + /** Size of the tables */ + int size; +}; + +void data_movements_data_register(starpu_data_handle_t *handle, unsigned home_node, int *ranks, starpu_mpi_tag_t *tags, int size); + +starpu_mpi_tag_t **data_movements_get_ref_tags_table(starpu_data_handle_t handle); +int **data_movements_get_ref_ranks_table(starpu_data_handle_t handle); +int data_movements_reallocate_tables(starpu_data_handle_t handle, unsigned node, int size); + +starpu_mpi_tag_t *data_movements_get_tags_table(starpu_data_handle_t handle); +int *data_movements_get_ranks_table(starpu_data_handle_t handle); +int data_movements_get_size_tables(starpu_data_handle_t handle); + +#define DATA_MOVEMENTS_GET_SIZE_TABLES(interface) (((struct data_movements_interface *)(interface))->size) +#define DATA_MOVEMENTS_GET_TAGS_TABLE(interface) (((struct data_movements_interface *)(interface))->tags) +#define DATA_MOVEMENTS_GET_RANKS_TABLE(interface) (((struct data_movements_interface *)(interface))->ranks) + +#endif /* __DATA_MOVEMENTS_INTERFACE_H */ diff --git a/mpi/src/load_balancer/policy/load_balancer_policy.h b/mpi/src/load_balancer/policy/load_balancer_policy.h new file mode 100644 index 0000000..d9b4a0e --- /dev/null +++ b/mpi/src/load_balancer/policy/load_balancer_policy.h @@ -0,0 +1,54 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __LOAD_BALANCER_POLICY_H__ +#define __LOAD_BALANCER_POLICY_H__ + +#include + +/** @file */ + +#ifdef __cplusplus +extern "C" +{ +#endif + +/** A load balancer consists in a collection of operations on a data + * representing the load of the application (in terms of computation, memory, + * whatever). StarPU allows several entry points for the user. The load + * balancer allows the user to give its load balancing methods to be used on + * these entry points of the runtime system. */ +struct load_balancer_policy +{ + int (*init)(struct starpu_mpi_lb_conf *); + int (*deinit)(); + void (*submitted_task_entry_point)(struct starpu_task *task); + void (*finished_task_entry_point)(void); + + /** Name of the load balancing policy. The selection of the load balancer is + * performed through the use of the STARPU_MPI_LB=name environment + * variable. + */ + const char *policy_name; +}; + +extern struct load_balancer_policy load_heat_propagation_policy; + +#ifdef __cplusplus +} +#endif + +#endif // __LOAD_BALANCER_POLICY_H__ diff --git a/mpi/src/load_balancer/policy/load_data_interface.c b/mpi/src/load_balancer/policy/load_data_interface.c new file mode 100644 index 0000000..d9ed377 --- /dev/null +++ b/mpi/src/load_balancer/policy/load_data_interface.c @@ -0,0 +1,276 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include + +#include "load_data_interface.h" + +#if defined(STARPU_USE_MPI_MPI) + +int load_data_get_sleep_threshold(starpu_data_handle_t handle) +{ + struct load_data_interface *ld_interface = + (struct load_data_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + + return ld_interface->sleep_task_threshold; +} + +int load_data_get_wakeup_threshold(starpu_data_handle_t handle) +{ + struct load_data_interface *ld_interface = + (struct load_data_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + + return ld_interface->wakeup_task_threshold; +} + +int load_data_get_current_phase(starpu_data_handle_t handle) +{ + struct load_data_interface *ld_interface = + (struct load_data_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + + return ld_interface->phase; +} + +int load_data_get_nsubmitted_tasks(starpu_data_handle_t handle) +{ + struct load_data_interface *ld_interface = + (struct load_data_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + + return ld_interface->nsubmitted_tasks; +} + +int load_data_get_nfinished_tasks(starpu_data_handle_t handle) +{ + struct load_data_interface *ld_interface = + (struct load_data_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + + return ld_interface->nfinished_tasks; +} + +int load_data_inc_nsubmitted_tasks(starpu_data_handle_t handle) +{ + struct load_data_interface *ld_interface = + (struct load_data_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + + (ld_interface->nsubmitted_tasks)++; + + return 0; +} + +int load_data_inc_nfinished_tasks(starpu_data_handle_t handle) +{ + struct load_data_interface *ld_interface = + (struct load_data_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + + (ld_interface->nfinished_tasks)++; + + return 0; +} + +int load_data_next_phase(starpu_data_handle_t handle) +{ + struct load_data_interface *ld_interface = + (struct load_data_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + + ld_interface->phase++; + + return 0; +} + +int load_data_update_elapsed_time(starpu_data_handle_t handle) +{ + struct load_data_interface *ld_interface = + (struct load_data_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + + ld_interface->elapsed_time = starpu_timing_now() - ld_interface->start; + + return 0; +} + +double load_data_get_elapsed_time(starpu_data_handle_t handle) +{ + struct load_data_interface *ld_interface = + (struct load_data_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + + return ld_interface->elapsed_time; +} + +int load_data_update_wakeup_cond(starpu_data_handle_t handle) +{ + struct load_data_interface *ld_interface = + (struct load_data_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + + int previous_threshold = ld_interface->wakeup_task_threshold; + ld_interface->wakeup_task_threshold += (ld_interface->nsubmitted_tasks - previous_threshold) * ld_interface->wakeup_ratio; + + return 0; +} + +int load_data_wakeup_cond(starpu_data_handle_t handle) +{ + struct load_data_interface *ld_interface = + (struct load_data_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + + return (ld_interface->wakeup_task_threshold > 0) && (ld_interface->nfinished_tasks == ld_interface->wakeup_task_threshold); +} + +static void load_data_register_data_handle(starpu_data_handle_t handle, int home_node, void *data_interface) +{ + (void) home_node; + struct load_data_interface *ld_interface = (struct load_data_interface *) data_interface; + + unsigned node; + for (node = 0; node < STARPU_MAXNODES; node++) + { + struct load_data_interface *local_interface = (struct load_data_interface *) + starpu_data_get_interface_on_node(handle, node); + + local_interface->start = ld_interface->start; + local_interface->elapsed_time = ld_interface->elapsed_time; + local_interface->phase = ld_interface->phase; + local_interface->nsubmitted_tasks = ld_interface->nsubmitted_tasks; + local_interface->nfinished_tasks = ld_interface->nsubmitted_tasks; + local_interface->wakeup_task_threshold = ld_interface->wakeup_task_threshold; + local_interface->wakeup_ratio = ld_interface->wakeup_ratio; + local_interface->sleep_task_threshold = ld_interface->sleep_task_threshold; + } +} + +static starpu_ssize_t load_data_allocate_data_on_node(void *data_interface, unsigned node) +{ + (void) data_interface; + (void) node; + + return 0; +} + +static void load_data_free_data_on_node(void *data_interface, unsigned node) +{ + (void) data_interface; + (void) node; +} + +static size_t load_data_get_size(starpu_data_handle_t handle) +{ + (void) handle; + return sizeof(struct load_data_interface); +} + +static uint32_t load_data_footprint(starpu_data_handle_t handle) +{ + struct load_data_interface *ld_interface = + (struct load_data_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + return starpu_hash_crc32c_be(ld_interface->start, + starpu_hash_crc32c_be(ld_interface->elapsed_time, + starpu_hash_crc32c_be(ld_interface->nsubmitted_tasks, + starpu_hash_crc32c_be(ld_interface->sleep_task_threshold, ld_interface->wakeup_task_threshold)))); +} + +static int load_data_pack_data(starpu_data_handle_t handle, unsigned node, void **ptr, starpu_ssize_t *count) +{ + STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); + + struct load_data_interface *ld_interface = (struct load_data_interface *) + starpu_data_get_interface_on_node(handle, node); + + *count = load_data_get_size(handle); + if (ptr != NULL) + { + char *data = (void*) starpu_malloc_on_node_flags(node, *count, 0); + *ptr = data; + memcpy(data, ld_interface, *count); + } + + return 0; +} + +static int load_data_peek_data(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count) +{ + char *data = ptr; + STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); + + struct load_data_interface *ld_interface = (struct load_data_interface *) + starpu_data_get_interface_on_node(handle, node); + + STARPU_ASSERT(count == sizeof(struct load_data_interface)); + memcpy(ld_interface, data, count); + + return 0; +} + +static int load_data_unpack_data(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count) +{ + load_data_peek_data(handle, node, ptr, count); + starpu_free_on_node_flags(node, (uintptr_t) ptr, count, 0); + + return 0; +} + +static int copy_any_to_any(void *src_interface, unsigned src_node, + void *dst_interface, unsigned dst_node, + void *async_data) +{ + (void) src_interface; + (void) dst_interface; + (void) src_node; + (void) dst_node; + (void) async_data; + + return 0; +} + +static const struct starpu_data_copy_methods load_data_copy_methods = +{ + .any_to_any = copy_any_to_any +}; + +static struct starpu_data_interface_ops interface_load_data_ops = +{ + .register_data_handle = load_data_register_data_handle, + .allocate_data_on_node = load_data_allocate_data_on_node, + .free_data_on_node = load_data_free_data_on_node, + .copy_methods = &load_data_copy_methods, + .get_size = load_data_get_size, + .footprint = load_data_footprint, + .interfaceid = STARPU_UNKNOWN_INTERFACE_ID, + .interface_size = sizeof(struct load_data_interface), + .to_pointer = NULL, + .pack_data = load_data_pack_data, + .peek_data = load_data_peek_data, + .unpack_data = load_data_unpack_data, + .describe = NULL +}; + +void load_data_data_register(starpu_data_handle_t *handleptr, unsigned home_node, int sleep_task_threshold, double wakeup_ratio) +{ + struct load_data_interface load_data = + { + .start = starpu_timing_now(), + .elapsed_time = 0, + .phase = 0, + .nsubmitted_tasks = 0, + .nfinished_tasks = 0, + .sleep_task_threshold = sleep_task_threshold, + .wakeup_task_threshold = 0, + .wakeup_ratio = wakeup_ratio + }; + + starpu_data_register(handleptr, home_node, &load_data, &interface_load_data_ops); +} + +#endif diff --git a/mpi/src/load_balancer/policy/load_data_interface.h b/mpi/src/load_balancer/policy/load_data_interface.h new file mode 100644 index 0000000..ca201a9 --- /dev/null +++ b/mpi/src/load_balancer/policy/load_data_interface.h @@ -0,0 +1,71 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +/** @file */ + +#ifndef __LOAD_DATA_INTERFACE_H +#define __LOAD_DATA_INTERFACE_H + +/** interface for load_data */ +struct load_data_interface +{ + /** Starting time of the execution */ + double start; + /** Elapsed time until the start time and the time when event "launch a load + * balancing phase" is triggered */ + double elapsed_time; + /** Current submission phase, i.e how many balanced steps have already + * happened so far. */ + int phase; + /** Number of currently submitted tasks */ + int nsubmitted_tasks; + /** Number of currently finished tasks */ + int nfinished_tasks; + /** Task threshold to sleep the submission thread */ + int sleep_task_threshold; + /** Task threshold to wake-up the submission thread */ + int wakeup_task_threshold; + /** Ratio of submitted tasks to wait for completion before waking up the + * submission thread */ + double wakeup_ratio; +}; + +void load_data_data_register(starpu_data_handle_t *handle, unsigned home_node, int sleep_task_threshold, double wakeup_ratio); + +int load_data_get_sleep_threshold(starpu_data_handle_t handle); +int load_data_get_wakeup_threshold(starpu_data_handle_t handle); +int load_data_get_current_phase(starpu_data_handle_t handle); +int load_data_get_nsubmitted_tasks(starpu_data_handle_t handle); +int load_data_get_nfinished_tasks(starpu_data_handle_t handle); + +int load_data_inc_nsubmitted_tasks(starpu_data_handle_t handle); +int load_data_inc_nfinished_tasks(starpu_data_handle_t handle); + +int load_data_next_phase(starpu_data_handle_t handle); + +int load_data_update_elapsed_time(starpu_data_handle_t handle); +double load_data_get_elapsed_time(starpu_data_handle_t handle); + +int load_data_update_wakeup_cond(starpu_data_handle_t handle); +int load_data_wakeup_cond(starpu_data_handle_t handle); + +#define LOAD_DATA_GET_NSUBMITTED_TASKS(interface) (((struct load_data_interface *)(interface))->nsubmitted_tasks) +#define LOAD_DATA_GET_SLEEP_THRESHOLD(interface) (((struct load_data_interface *)(interface))->sleep_task_threshold) +#define LOAD_DATA_GET_WAKEUP_THRESHOLD(interface) (((struct load_data_interface *)(interface))->wakeup_task_threshold) + +#endif /* __LOAD_DATA_INTERFACE_H */ diff --git a/mpi/src/load_balancer/policy/load_heat_propagation.c b/mpi/src/load_balancer/policy/load_heat_propagation.c new file mode 100644 index 0000000..4ce7b1d --- /dev/null +++ b/mpi/src/load_balancer/policy/load_heat_propagation.c @@ -0,0 +1,666 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include +#include +#include "load_balancer_policy.h" +#include "data_movements_interface.h" +#include "load_data_interface.h" +#include + +#if defined(STARPU_USE_MPI_MPI) + +static starpu_mpi_tag_t TAG_LOAD(int n) +{ + return ((starpu_mpi_tag_t) n+1) << 24; +} + +static starpu_mpi_tag_t TAG_MOV(int n) +{ + return ((starpu_mpi_tag_t) n+1) << 20; +} + +/* Hash table of local pieces of data that has been moved out of the local MPI + * node by the load balancer. All of these pieces of data must be migrated back + * to the local node at the end of the execution. */ +struct moved_data_entry +{ + UT_hash_handle hh; + starpu_data_handle_t handle; +}; + +static struct moved_data_entry *mdh = NULL; + +static starpu_pthread_mutex_t load_data_mutex; +static starpu_pthread_cond_t load_data_cond; + +/* MPI infos */ +static int my_rank; +static int world_size; + +/* Number of neighbours of the local MPI node and their IDs. These are given by + * the get_neighbors() method, and thus can be easily changed. */ +static int *neighbor_ids = NULL; +static int nneighbors = 0; + +/* Local load data */ +static starpu_data_handle_t *load_data_handle = NULL; +static starpu_data_handle_t *load_data_handle_cpy = NULL; +/* Load data of neighbours */ +static starpu_data_handle_t *neighbor_load_data_handles = NULL; + +/* Table which contains a data_movements_handle for each MPI node of + * MPI_COMM_WORLD. Since all the MPI nodes must be advised of any data + * movement, this table will be used to perform communications of data + * movements handles following an all-to-all model. */ +static starpu_data_handle_t *data_movements_handles = NULL; + +/* Load balancer interface which contains the application-specific methods for + * the load balancer to use. */ +static struct starpu_mpi_lb_conf *user_itf = NULL; + +static double time_threshold = 20000; + +/****************************************************************************** + * Balancing * + *****************************************************************************/ + + +/* Decides which data has to move where, and fills the + * data_movements_handles[my_rank] data handle from that. + * In data : + * - local load_data_handle + * - nneighbors + * - neighbor_ids[nneighbors] + * - neighbor_load_data_handles[nneighbors] + * Out data : + * - data_movements_handles[my_rank] + */ + +static void balance(starpu_data_handle_t load_data_cpy) +{ + int less_loaded = -1; + int n; + double ref_elapsed_time; + double my_elapsed_time = load_data_get_elapsed_time(load_data_cpy); + + /* Search for the less loaded neighbor */ + ref_elapsed_time = my_elapsed_time; + for (n = 0; n < nneighbors; n++) + { + double elapsed_time = load_data_get_elapsed_time(neighbor_load_data_handles[n]); + if (ref_elapsed_time > elapsed_time) + { + //fprintf(stderr,"Node%d: ref local time %lf vs neighbour%d time %lf\n", my_rank, ref_elapsed_time, neighbor_ids[n], elapsed_time); + less_loaded = neighbor_ids[n]; + ref_elapsed_time = elapsed_time; + } + } + + starpu_data_acquire_on_node(data_movements_handles[my_rank], STARPU_MAIN_RAM, STARPU_RW); + /* We found it */ + if (less_loaded >= 0) + { + _STARPU_DEBUG("Less loaded found on node %d : %d\n", my_rank, less_loaded); + double diff_time = my_elapsed_time - ref_elapsed_time; + /* If the difference is higher than a time threshold, we move + * one data to the less loaded neighbour. */ + /* TODO: How to decide the time threshold ? */ + if ((time_threshold > 0) && (diff_time >= time_threshold)) + { + starpu_data_handle_t *handles = NULL; + int nhandles = 0; + user_itf->get_data_unit_to_migrate(&handles, &nhandles, less_loaded); + + data_movements_reallocate_tables(data_movements_handles[my_rank], STARPU_MAIN_RAM, nhandles); + + if (nhandles) + { + starpu_mpi_tag_t *tags = data_movements_get_tags_table(data_movements_handles[my_rank]); + int *ranks = data_movements_get_ranks_table(data_movements_handles[my_rank]); + + for (n = 0; n < nhandles; n++) + { + tags[n] = starpu_mpi_data_get_tag(handles[n]); + ranks[n] = less_loaded; + } + + free(handles); + } + } + else + data_movements_reallocate_tables(data_movements_handles[my_rank], STARPU_MAIN_RAM, 0); + } + else + data_movements_reallocate_tables(data_movements_handles[my_rank], STARPU_MAIN_RAM, 0); + starpu_data_release_on_node(data_movements_handles[my_rank], STARPU_MAIN_RAM); +} + +static void exchange_load_data_infos(starpu_data_handle_t load_data_cpy) +{ + int i; + + /* Allocate all requests and status for point-to-point communications */ + starpu_mpi_req load_send_req[nneighbors]; + starpu_mpi_req load_recv_req[nneighbors]; + + MPI_Status load_send_status[nneighbors]; + MPI_Status load_recv_status[nneighbors]; + + int flag, ret; + + /* Send the local load data to neighbour nodes, and receive the remote load + * data from neighbour nodes */ + for (i = 0; i < nneighbors; i++) + { + //_STARPU_DEBUG("[node %d] sending and receiving with %i-th neighbor %i\n", my_rank, i, neighbor_ids[i]); + ret = starpu_mpi_isend(load_data_cpy, &load_send_req[i], neighbor_ids[i], TAG_LOAD(my_rank), MPI_COMM_WORLD); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend"); + + ret = starpu_mpi_irecv(neighbor_load_data_handles[i], &load_recv_req[i], neighbor_ids[i], TAG_LOAD(neighbor_ids[i]), MPI_COMM_WORLD); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_irecv"); + } + + /* Wait for completion of all send requests */ + for (i = 0; i < nneighbors; i++) + { + flag = 0; + while (!flag) + { + ret = starpu_mpi_test(&load_send_req[i], &flag, &load_send_status[i]); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_test"); + } + } + + /* Wait for completion of all receive requests */ + for (i = 0; i < nneighbors; i++) + { + flag = 0; + while (!flag) + { + ret = starpu_mpi_test(&load_recv_req[i], &flag, &load_recv_status[i]); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_test"); + } + } +} + +static void exchange_data_movements_infos() +{ + int i; + + /* Allocate all requests and status for point-to-point communications */ + starpu_mpi_req data_movements_send_req[world_size]; + starpu_mpi_req data_movements_recv_req[world_size]; + + MPI_Status data_movements_send_status[world_size]; + MPI_Status data_movements_recv_status[world_size]; + + int flag, ret; + + /* Send the new ranks of local data to all other nodes, and receive the new + * ranks of all remote data from all other nodes */ + for (i = 0; i < world_size; i++) + { + if (i != my_rank) + { + //_STARPU_DEBUG("[node %d] Send and receive data movement with %d\n", my_rank, i); + ret = starpu_mpi_isend(data_movements_handles[my_rank], &data_movements_send_req[i], i, TAG_MOV(my_rank), MPI_COMM_WORLD); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend"); + ret = starpu_mpi_irecv(data_movements_handles[i], &data_movements_recv_req[i], i, TAG_MOV(i), MPI_COMM_WORLD); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_irecv"); + } + } + + /* Wait for completion of all send requests */ + for (i = 0; i < world_size; i++) + { + if (i != my_rank) + { + //fprintf(stderr,"Wait for sending data movement of %d to %d\n", my_rank, i); + flag = 0; + while (!flag) + { + ret = starpu_mpi_test(&data_movements_send_req[i], &flag, &data_movements_send_status[i]); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_test"); + } + } + } + + /* Wait for completion of all receive requests */ + for (i = 0; i < world_size; i++) + { + if (i != my_rank) + { + //fprintf(stderr,"Wait for receiving data movement from %d on %d\n", i, my_rank); + flag = 0; + while (!flag) + { + ret = starpu_mpi_test(&data_movements_recv_req[i], &flag, &data_movements_recv_status[i]); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_test"); + } + } + } +} + +static void update_data_ranks() +{ + int i,j; + + /* Update the new ranks for all concerned data */ + for (i = 0; i < world_size; i++) + { + int ndata_to_update = data_movements_get_size_tables(data_movements_handles[i]); + if (ndata_to_update) + { + //fprintf(stderr,"Update %d data from table %d on node %d\n", ndata_to_update, i, my_rank); + + for (j = 0; j < ndata_to_update; j++) + { + starpu_data_handle_t handle = _starpu_mpi_tag_get_data_handle_from_tag((data_movements_get_tags_table(data_movements_handles[i]))[j]); + STARPU_ASSERT(handle); + int dst_rank = (data_movements_get_ranks_table(data_movements_handles[i]))[j]; + + /* Save the fact that the data has been moved out of this node */ + if (i == my_rank) + { + struct moved_data_entry *md; + _STARPU_MPI_MALLOC(md, sizeof(struct moved_data_entry)); + md->handle = handle; + HASH_ADD_PTR(mdh, handle, md); + } + else if (dst_rank == my_rank) + { + /* The data has been moved out, and now is moved back, so + * update the state of the moved_data hash table to reflect + * this change */ + struct moved_data_entry *md = NULL; + HASH_FIND_PTR(mdh, &handle, md); + if (md) + { + HASH_DEL(mdh, md); + free(md); + } + } + + //if (i == my_rank) + //{ + // if (dst_rank != my_rank) + // fprintf(stderr,"Move data %p (tag %d) from node %d to node %d\n", handle, (data_movements_get_tags_table(data_movements_handles[i]))[j], my_rank, dst_rank); + // else + // fprintf(stderr,"Bring back data %p (tag %d) from node %d on node %d\n", handle, (data_movements_get_tags_table(data_movements_handles[i]))[j], starpu_mpi_data_get_rank(handle), my_rank); + //} + + _STARPU_DEBUG("Call of starpu_mpi_get_data_on_node(%"PRIi64",%d) on node %d\n", starpu_mpi_data_get_tag(handle), dst_rank, my_rank); + + /* Migrate the data handle */ + int ret = starpu_mpi_get_data_on_node_detached(MPI_COMM_WORLD, handle, dst_rank, NULL, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend"); + + _STARPU_DEBUG("New rank (%d) of data %"PRIi64" upgraded on node %d\n", dst_rank, starpu_mpi_data_get_tag(handle), my_rank); + starpu_mpi_data_set_rank_comm(handle, dst_rank, MPI_COMM_WORLD); + } + } + } +} + +static void clean_balance() +{ + int i; + starpu_mpi_cache_flush(MPI_COMM_WORLD, *load_data_handle_cpy); + for (i = 0; i < nneighbors; i++) + starpu_mpi_cache_flush(MPI_COMM_WORLD, neighbor_load_data_handles[i]); + for (i = 0; i < world_size; i++) + starpu_mpi_cache_flush(MPI_COMM_WORLD, data_movements_handles[i]); +} + +/* Core function of the load balancer. Computes from the load_data_cpy handle a + * load balancing of the work to come (if needed), perform the necessary data + * communications and negotiate with the other nodes the rebalancing. */ +static void heat_balance(starpu_data_handle_t load_data_cpy) +{ + /* Exchange load data handles with neighboring nodes */ + exchange_load_data_infos(load_data_cpy); + + /* Determine if this node should sent data to other nodes : + * which ones, how much data */ + balance(load_data_cpy); + + /* Exchange data movements with neighboring nodes */ + exchange_data_movements_infos(); + + /* Perform data movements */ + update_data_ranks(); + + /* Clean the data handles to properly launch the next balance phase */ + clean_balance(); +} + +/****************************************************************************** + * Heat Load Balancer Entry Points * + *****************************************************************************/ + +static void submitted_task_heat(struct starpu_task *task) +{ + load_data_inc_nsubmitted_tasks(*load_data_handle); + //if (load_data_get_nsubmitted_tasks(*load_data_handle) > task->tag_id) + //{ + // fprintf(stderr,"Error : nsubmitted_tasks (%d) > tag_id (%lld) ! \n", load_data_get_nsubmitted_tasks(*load_data_handle), (long long int)task->tag_id); + // STARPU_ASSERT(0); + //} + + int phase = load_data_get_current_phase(*load_data_handle); + /* Numbering of tasks in StarPU-MPI should be given by the application with + * the STARPU_TAG_ONLY insert task option for now. */ + /* TODO: Properly implement a solution for numbering tasks in StarPU-MPI */ + if (((int)task->tag_id / load_data_get_sleep_threshold(*load_data_handle)) > phase) + { + STARPU_PTHREAD_MUTEX_LOCK(&load_data_mutex); + load_data_update_wakeup_cond(*load_data_handle); + //fprintf(stderr,"Node %d sleep on tag %lld\n", my_rank, (long long int)task->tag_id); + //if (load_data_get_nsubmitted_tasks(*load_data_handle) < load_data_get_wakeup_threshold(*load_data_handle)) + //{ + // fprintf(stderr,"Error : nsubmitted_tasks (%d) lower than wakeup_threshold (%d) !\n", load_data_get_nsubmitted_tasks(*load_data_handle), load_data_get_wakeup_threshold(*load_data_handle)); + // STARPU_ASSERT(0); + //} + + if (load_data_get_wakeup_threshold(*load_data_handle) > load_data_get_nfinished_tasks(*load_data_handle)) + STARPU_PTHREAD_COND_WAIT(&load_data_cond, &load_data_mutex); + + load_data_next_phase(*load_data_handle); + + /* Register a copy of the load data at this moment, to allow to compute + * the heat balance while not locking the load data during the whole + * balance step, which could cause all the workers to wait on the lock + * to update the data. */ + struct starpu_data_interface_ops *itf_load_data = starpu_data_get_interface_ops(*load_data_handle); + void* itf_src = starpu_data_get_interface_on_node(*load_data_handle, STARPU_MAIN_RAM); + void* itf_dst = starpu_data_get_interface_on_node(*load_data_handle_cpy, STARPU_MAIN_RAM); + memcpy(itf_dst, itf_src, itf_load_data->interface_size); + + _STARPU_DEBUG("[node %d] Balance phase %d\n", my_rank, load_data_get_current_phase(*load_data_handle)); + STARPU_PTHREAD_MUTEX_UNLOCK(&load_data_mutex); + + heat_balance(*load_data_handle_cpy); + } +} + +static void finished_task_heat(void) +{ + //fprintf(stderr,"Try to decrement nsubmitted_tasks..."); + STARPU_PTHREAD_MUTEX_LOCK(&load_data_mutex); + + load_data_inc_nfinished_tasks(*load_data_handle); + //fprintf(stderr,"Decrement nsubmitted_tasks, now %d\n", load_data_get_nsubmitted_tasks(*load_data_handle)); + if (load_data_wakeup_cond(*load_data_handle)) + { + //fprintf(stderr,"Wakeup ! nfinished_tasks = %d, wakeup_threshold = %d\n", load_data_get_nfinished_tasks(*load_data_handle), load_data_get_wakeup_threshold(*load_data_handle)); + load_data_update_elapsed_time(*load_data_handle); + STARPU_PTHREAD_COND_SIGNAL(&load_data_cond); + STARPU_PTHREAD_MUTEX_UNLOCK(&load_data_mutex); + } + else + STARPU_PTHREAD_MUTEX_UNLOCK(&load_data_mutex); +} + +/****************************************************************************** + * Initialization / Deinitialization * + *****************************************************************************/ + +static int init_heat(struct starpu_mpi_lb_conf *itf) +{ + int i; + int sleep_task_threshold; + double wakeup_ratio; + + starpu_mpi_comm_size(MPI_COMM_WORLD, &world_size); + starpu_mpi_comm_rank(MPI_COMM_WORLD, &my_rank); + + /* Immediately return if the starpu_mpi_lb_conf is invalid. */ + if (!(itf && itf->get_neighbors && itf->get_data_unit_to_migrate)) + { + _STARPU_MSG("Error: struct starpu_mpi_lb_conf %p invalid\n", itf); + return 1; + } + + _STARPU_MPI_MALLOC(user_itf, sizeof(struct starpu_mpi_lb_conf)); + memcpy(user_itf, itf, sizeof(struct starpu_mpi_lb_conf)); + + /* Get the neighbors of the local MPI node */ + user_itf->get_neighbors(&neighbor_ids, &nneighbors); + if (nneighbors == 0) + { + _STARPU_MSG("Error: Function get_neighbors returning 0 neighbor\n"); + free(user_itf); + user_itf = NULL; + return 2; + } + + /* The sleep threshold is deducted from the numbering of tasks by the + * application. For example, with this threshold, the submission thread + * will stop when a task for which the numbering is 2000 or above will be + * submitted to StarPU-MPI. However, much less tasks can be really + * submitted to the local MPI node: the sleeping of the submission threads + * checks the numbering of the tasks, not how many tasks have been + * submitted to the local MPI node, which are two different things. */ + char *sleep_env = starpu_getenv("LB_HEAT_SLEEP_THRESHOLD"); + if (sleep_env) + sleep_task_threshold = atoi(sleep_env); + else + sleep_task_threshold = 2000; + + char *wakeup_env = starpu_getenv("LB_HEAT_WAKEUP_RATIO"); + if (wakeup_env) + wakeup_ratio = atof(wakeup_env); + else + wakeup_ratio = 0.5; + + char *time_env = starpu_getenv("LB_HEAT_TIME_THRESHOLD"); + if (time_env) + time_threshold = atoi(time_env); + else + time_threshold = 2000; + + STARPU_PTHREAD_MUTEX_INIT(&load_data_mutex, NULL); + STARPU_PTHREAD_COND_INIT(&load_data_cond, NULL); + + /* Allocate, initialize and register all the data handles that will be + * needed for the load balancer, to not reallocate them at each balance + * step. */ + + /* Local load data */ + _STARPU_MPI_CALLOC(load_data_handle, 1, sizeof(starpu_data_handle_t)); + load_data_data_register(load_data_handle, STARPU_MAIN_RAM, sleep_task_threshold, wakeup_ratio); + + /* Copy of the local load data to enable parallel update of the load data + * with communications to neighbor nodes */ + _STARPU_MPI_CALLOC(load_data_handle_cpy, 1, sizeof(starpu_data_handle_t)); + void *local_interface = starpu_data_get_interface_on_node(*load_data_handle, STARPU_MAIN_RAM); + struct starpu_data_interface_ops *itf_load_data = starpu_data_get_interface_ops(*load_data_handle); + starpu_data_register(load_data_handle_cpy, STARPU_MAIN_RAM, local_interface, itf_load_data); + starpu_mpi_data_register(*load_data_handle_cpy, TAG_LOAD(my_rank), my_rank); + + /* Remote load data */ + _STARPU_MPI_CALLOC(neighbor_load_data_handles, nneighbors, sizeof(starpu_data_handle_t)); + for (i = 0; i < nneighbors; i++) + { + load_data_data_register(&neighbor_load_data_handles[i], STARPU_MAIN_RAM, sleep_task_threshold, wakeup_ratio); + starpu_mpi_data_register(neighbor_load_data_handles[i], TAG_LOAD(neighbor_ids[i]), neighbor_ids[i]); + } + + /* Data movements handles */ + _STARPU_MPI_MALLOC(data_movements_handles, world_size*sizeof(starpu_data_handle_t)); + for (i = 0; i < world_size; i++) + { + data_movements_data_register(&data_movements_handles[i], STARPU_MAIN_RAM, NULL, NULL, 0); + starpu_mpi_data_register(data_movements_handles[i], TAG_MOV(i), i); + } + + /* Hash table of moved data that will be brought back on the node at + * termination time */ + mdh = NULL; + + return 0; +} + +/* Move back all the data that has been migrated out of this node at + * denitialization time of the load balancer, to ensure the consistency with + * the ranks of data originally registered by the application. */ +static void move_back_data() +{ + int i,j; + + /* Update the new ranks for all concerned data */ + for (i = 0; i < world_size; i++) + { + /* In this case, each data_movements_handles contains the handles to move back on the specific node */ + int ndata_to_update = data_movements_get_size_tables(data_movements_handles[i]); + if (ndata_to_update) + { + _STARPU_DEBUG("Move back %d data from table %d on node %d\n", ndata_to_update, i, my_rank); + + for (j = 0; j < ndata_to_update; j++) + { + starpu_data_handle_t handle = _starpu_mpi_tag_get_data_handle_from_tag((data_movements_get_tags_table(data_movements_handles[i]))[j]); + STARPU_ASSERT(handle); + + int dst_rank = (data_movements_get_ranks_table(data_movements_handles[i]))[j]; + STARPU_ASSERT(i == dst_rank); + + if (i == my_rank) + { + /* The data is moved back, so update the state of the + * moved_data hash table to reflect this change */ + struct moved_data_entry *md = NULL; + HASH_FIND_PTR(mdh, &handle, md); + if (md) + { + HASH_DEL(mdh, md); + free(md); + } + } + + //fprintf(stderr,"Call of starpu_mpi_get_data_on_node(%d,%d) on node %d\n", starpu_mpi_data_get_tag(handle), dst_rank, my_rank); + + /* Migrate the data handle */ + starpu_mpi_get_data_on_node_detached(MPI_COMM_WORLD, handle, dst_rank, NULL, NULL); + + //fprintf(stderr,"New rank (%d) of data %d upgraded on node %d\n", dst_rank, starpu_mpi_data_get_tag(handle), my_rank); + starpu_mpi_data_set_rank_comm(handle, dst_rank, MPI_COMM_WORLD); + } + } + } +} + +static int deinit_heat() +{ + int i; + + if ((!user_itf) || (nneighbors == 0)) + return 1; + + _STARPU_DEBUG("Shutting down heat lb policy\n"); + + unsigned int ndata_to_move_back = HASH_COUNT(mdh); + + starpu_data_acquire_on_node(data_movements_handles[my_rank], STARPU_MAIN_RAM, STARPU_RW); + if (ndata_to_move_back) + { + _STARPU_DEBUG("Move back %u data on node %d ..\n", ndata_to_move_back, my_rank); + data_movements_reallocate_tables(data_movements_handles[my_rank], STARPU_MAIN_RAM, ndata_to_move_back); + + starpu_mpi_tag_t *tags = data_movements_get_tags_table(data_movements_handles[my_rank]); + int *ranks = data_movements_get_ranks_table(data_movements_handles[my_rank]); + + int n = 0; + struct moved_data_entry *md=NULL, *tmp=NULL; + HASH_ITER(hh, mdh, md, tmp) + { + tags[n] = starpu_mpi_data_get_tag(md->handle); + ranks[n] = my_rank; + n++; + } + } + else + data_movements_reallocate_tables(data_movements_handles[my_rank], STARPU_MAIN_RAM, 0); + starpu_data_release_on_node(data_movements_handles[my_rank], STARPU_MAIN_RAM); + + exchange_data_movements_infos(); + move_back_data(); + + /* This assert ensures that all nodes have properly gotten back all the + * data that has been moven out of the node. */ + STARPU_ASSERT(HASH_COUNT(mdh) == 0); + free(mdh); + mdh = NULL; + + starpu_data_unregister(*load_data_handle); + free(load_data_handle); + load_data_handle = NULL; + + starpu_mpi_cache_flush(MPI_COMM_WORLD, *load_data_handle_cpy); + starpu_data_unregister(*load_data_handle_cpy); + free(load_data_handle_cpy); + load_data_handle_cpy = NULL; + + for (i = 0; i < nneighbors; i++) + { + starpu_mpi_cache_flush(MPI_COMM_WORLD, neighbor_load_data_handles[i]); + starpu_data_unregister(neighbor_load_data_handles[i]); + } + free(neighbor_load_data_handles); + neighbor_load_data_handles = NULL; + + nneighbors = 0; + free(neighbor_ids); + neighbor_ids = NULL; + + for (i = 0; i < world_size; i++) + { + starpu_mpi_cache_flush(MPI_COMM_WORLD, data_movements_handles[i]); + starpu_data_acquire_on_node(data_movements_handles[i], STARPU_MAIN_RAM, STARPU_W); + data_movements_reallocate_tables(data_movements_handles[i], STARPU_MAIN_RAM, 0); + starpu_data_release_on_node(data_movements_handles[i], STARPU_MAIN_RAM); + starpu_data_unregister(data_movements_handles[i]); + } + free(data_movements_handles); + data_movements_handles = NULL; + + STARPU_PTHREAD_MUTEX_DESTROY(&load_data_mutex); + STARPU_PTHREAD_COND_DESTROY(&load_data_cond); + free(user_itf); + user_itf = NULL; + + return 0; +} + +/****************************************************************************** + * Policy * + *****************************************************************************/ + +struct load_balancer_policy load_heat_propagation_policy = +{ + .init = init_heat, + .deinit = deinit_heat, + .submitted_task_entry_point = submitted_task_heat, + .finished_task_entry_point = finished_task_heat, + .policy_name = "heat" +}; + +#endif diff --git a/mpi/src/mpi/starpu_mpi_comm.c b/mpi/src/mpi/starpu_mpi_comm.c new file mode 100644 index 0000000..d588edf --- /dev/null +++ b/mpi/src/mpi/starpu_mpi_comm.c @@ -0,0 +1,234 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2017-2017 Guillaume Beauchamp + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include + +#ifdef STARPU_USE_MPI_MPI + +struct _starpu_mpi_comm +{ + MPI_Comm comm; + struct _starpu_mpi_envelope *envelope; + MPI_Request request; + int posted; + +#ifdef STARPU_SIMGRID + MPI_Status status; + starpu_pthread_queue_t queue; + unsigned done; +#endif +}; +struct _starpu_mpi_comm_hashtable +{ + UT_hash_handle hh; + MPI_Comm comm; +}; + +/* Protect between comm addition from submitting tasks and MPI thread */ +static starpu_pthread_rwlock_t _starpu_mpi_comms_mutex; + +struct _starpu_mpi_comm_hashtable *_starpu_mpi_comms_cache; +struct _starpu_mpi_comm **_starpu_mpi_comms; +int _starpu_mpi_comm_nb; +int _starpu_mpi_comm_allocated; +int _starpu_mpi_comm_tested; + +void _starpu_mpi_comm_init(MPI_Comm comm) +{ + _STARPU_MPI_DEBUG(10, "allocating for %d communicators\n", _starpu_mpi_comm_allocated); + _starpu_mpi_comm_allocated=10; + _STARPU_MPI_CALLOC(_starpu_mpi_comms, _starpu_mpi_comm_allocated, sizeof(struct _starpu_mpi_comm *)); + _starpu_mpi_comm_nb=0; + _starpu_mpi_comm_tested=0; + _starpu_mpi_comms_cache = NULL; + STARPU_PTHREAD_RWLOCK_INIT(&_starpu_mpi_comms_mutex, NULL); + + _starpu_mpi_comm_register(comm); +} + +void _starpu_mpi_comm_shutdown() +{ + int i; + for(i=0 ; i<_starpu_mpi_comm_nb ; i++) + { + struct _starpu_mpi_comm *_comm = _starpu_mpi_comms[i]; // get the ith _comm; + free(_comm->envelope); +#ifdef STARPU_SIMGRID + starpu_pthread_queue_unregister(&_starpu_mpi_thread_wait, &_comm->queue); + starpu_pthread_queue_destroy(&_comm->queue); +#endif + free(_comm); + } + free(_starpu_mpi_comms); + + struct _starpu_mpi_comm_hashtable *entry=NULL, *tmp=NULL; + HASH_ITER(hh, _starpu_mpi_comms_cache, entry, tmp) + { + HASH_DEL(_starpu_mpi_comms_cache, entry); + free(entry); + } + + STARPU_PTHREAD_RWLOCK_DESTROY(&_starpu_mpi_comms_mutex); +} + +void _starpu_mpi_comm_register(MPI_Comm comm) +{ + struct _starpu_mpi_comm_hashtable *found; + + STARPU_PTHREAD_RWLOCK_RDLOCK(&_starpu_mpi_comms_mutex); + HASH_FIND(hh, _starpu_mpi_comms_cache, &comm, sizeof(MPI_Comm), found); + STARPU_PTHREAD_RWLOCK_UNLOCK(&_starpu_mpi_comms_mutex); + if (found) + { + _STARPU_MPI_DEBUG(10, "comm %ld (%ld) already registered\n", (long int)comm, (long int)MPI_COMM_WORLD); + return; + } + + STARPU_PTHREAD_RWLOCK_WRLOCK(&_starpu_mpi_comms_mutex); + HASH_FIND(hh, _starpu_mpi_comms_cache, &comm, sizeof(MPI_Comm), found); + if (found) + { + _STARPU_MPI_DEBUG(10, "comm %ld (%ld) already registered in between\n", (long int)comm, (long int)MPI_COMM_WORLD); + } + else + { + if (_starpu_mpi_comm_nb == _starpu_mpi_comm_allocated) + { + _starpu_mpi_comm_allocated *= 2; + _STARPU_MPI_DEBUG(10, "reallocating for %d communicators\n", _starpu_mpi_comm_allocated); + _STARPU_MPI_REALLOC(_starpu_mpi_comms, _starpu_mpi_comm_allocated * sizeof(struct _starpu_mpi_comm *)); + } + _STARPU_MPI_DEBUG(10, "registering comm %ld (%ld) number %d\n", (long int)comm, (long int)MPI_COMM_WORLD, _starpu_mpi_comm_nb); + struct _starpu_mpi_comm *_comm; + _STARPU_MPI_CALLOC(_comm, 1, sizeof(struct _starpu_mpi_comm)); + _comm->comm = comm; + _STARPU_MPI_CALLOC(_comm->envelope, 1,sizeof(struct _starpu_mpi_envelope)); + _comm->posted = 0; + _starpu_mpi_comms[_starpu_mpi_comm_nb] = _comm; + _starpu_mpi_comm_nb++; + struct _starpu_mpi_comm_hashtable *entry; + _STARPU_MPI_MALLOC(entry, sizeof(*entry)); + entry->comm = comm; + HASH_ADD(hh, _starpu_mpi_comms_cache, comm, sizeof(entry->comm), entry); + +#ifdef STARPU_SIMGRID + starpu_pthread_queue_init(&_comm->queue); + starpu_pthread_queue_register(&_starpu_mpi_thread_wait, &_comm->queue); + _comm->done = 0; +#endif + } + STARPU_PTHREAD_RWLOCK_UNLOCK(&_starpu_mpi_comms_mutex); +} + +void _starpu_mpi_comm_post_recv() +{ + int i; + + STARPU_PTHREAD_RWLOCK_RDLOCK(&_starpu_mpi_comms_mutex); + for(i=0 ; i<_starpu_mpi_comm_nb ; i++) + { + struct _starpu_mpi_comm *_comm = _starpu_mpi_comms[i]; // get the ith _comm; + if (_comm->posted == 0) + { + _STARPU_MPI_DEBUG(3, "Posting a receive to get a data envelop on comm %d %ld\n", i, (long int)_comm->comm); + _STARPU_MPI_COMM_FROM_DEBUG(_comm->envelope, sizeof(struct _starpu_mpi_envelope), MPI_BYTE, MPI_ANY_SOURCE, _STARPU_MPI_TAG_ENVELOPE, (int64_t)_STARPU_MPI_TAG_ENVELOPE, _comm->comm); + MPI_Irecv(_comm->envelope, sizeof(struct _starpu_mpi_envelope), MPI_BYTE, MPI_ANY_SOURCE, _STARPU_MPI_TAG_ENVELOPE, _comm->comm, &_comm->request); +#ifdef STARPU_SIMGRID + _starpu_mpi_simgrid_wait_req(&_comm->request, &_comm->status, &_comm->queue, &_comm->done); +#endif + _comm->posted = 1; + } + } + STARPU_PTHREAD_RWLOCK_UNLOCK(&_starpu_mpi_comms_mutex); +} + +int _starpu_mpi_comm_test_recv(MPI_Status *status, struct _starpu_mpi_envelope **envelope, MPI_Comm *comm) +{ + int i=_starpu_mpi_comm_tested; + + STARPU_PTHREAD_RWLOCK_RDLOCK(&_starpu_mpi_comms_mutex); + while (1) + { + struct _starpu_mpi_comm *_comm = _starpu_mpi_comms[i]; // get the ith _comm; + + if (_comm->posted) + { + int flag, res; + /* test whether an envelope has arrived. */ +#ifdef STARPU_SIMGRID + res = _starpu_mpi_simgrid_mpi_test(&_comm->done, &flag); + memcpy(status, &_comm->status, sizeof(*status)); +#else + res = MPI_Test(&_comm->request, &flag, status); +#endif + STARPU_ASSERT(res == MPI_SUCCESS); + if (flag) + { + _comm->posted = 0; + _starpu_mpi_comm_tested++; + if (_starpu_mpi_comm_tested == _starpu_mpi_comm_nb) + _starpu_mpi_comm_tested = 0; + *envelope = _comm->envelope; + *comm = _comm->comm; + STARPU_PTHREAD_RWLOCK_UNLOCK(&_starpu_mpi_comms_mutex); + return 1; + } + } + i++; + if (i == _starpu_mpi_comm_nb) + { + i=0; + } + if (i == _starpu_mpi_comm_tested) + { + // We have tested all the requests, none has completed + STARPU_PTHREAD_RWLOCK_UNLOCK(&_starpu_mpi_comms_mutex); + return 0; + } + } + STARPU_PTHREAD_RWLOCK_UNLOCK(&_starpu_mpi_comms_mutex); + return 0; +} + +void _starpu_mpi_comm_cancel_recv() +{ + int i; + + STARPU_PTHREAD_RWLOCK_RDLOCK(&_starpu_mpi_comms_mutex); + for(i=0 ; i<_starpu_mpi_comm_nb ; i++) + { + struct _starpu_mpi_comm *_comm = _starpu_mpi_comms[i]; // get the ith _comm; + if (_comm->posted == 1) + { + MPI_Cancel(&_comm->request); +#ifndef STARPU_SIMGRID + { + MPI_Status status; + MPI_Wait(&_comm->request, &status); + } +#endif + _comm->posted = 0; + } + } + STARPU_PTHREAD_RWLOCK_UNLOCK(&_starpu_mpi_comms_mutex); +} + +#endif /* STARPU_USE_MPI_MPI */ diff --git a/mpi/src/mpi/starpu_mpi_comm.h b/mpi/src/mpi/starpu_mpi_comm.h new file mode 100644 index 0000000..630ec2b --- /dev/null +++ b/mpi/src/mpi/starpu_mpi_comm.h @@ -0,0 +1,47 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __STARPU_MPI_COMM_H__ +#define __STARPU_MPI_COMM_H__ + +#include +#include +#include + +#ifdef STARPU_USE_MPI_MPI + +#include + +/** @file */ + +#ifdef __cplusplus +extern "C" +{ +#endif + +void _starpu_mpi_comm_init(MPI_Comm comm); +void _starpu_mpi_comm_shutdown(); +void _starpu_mpi_comm_register(MPI_Comm comm); +void _starpu_mpi_comm_post_recv(); +int _starpu_mpi_comm_test_recv(MPI_Status *status, struct _starpu_mpi_envelope **envelope, MPI_Comm *comm); +void _starpu_mpi_comm_cancel_recv(); + +#ifdef __cplusplus +} +#endif + +#endif // STARPU_USE_MPI_MPI +#endif // __STARPU_MPI_COMM_H__ diff --git a/mpi/src/mpi/starpu_mpi_driver.h b/mpi/src/mpi/starpu_mpi_driver.h new file mode 100644 index 0000000..adad452 --- /dev/null +++ b/mpi/src/mpi/starpu_mpi_driver.h @@ -0,0 +1,39 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __STARPU_MPI_DRIVER_H__ +#define __STARPU_MPI_DRIVER_H__ + +#include + +/** @file */ + +#ifdef STARPU_USE_MPI_MPI + +#ifdef __cplusplus +extern "C" +{ +#endif + +void _starpu_mpi_driver_init(struct starpu_conf *conf); +void _starpu_mpi_driver_shutdown(); + +#ifdef __cplusplus +} +#endif + +#endif // STARPU_USE_MPI_MPI +#endif // __STARPU_MPI_DRIVER_H__ diff --git a/mpi/src/mpi/starpu_mpi_early_data.c b/mpi/src/mpi/starpu_mpi_early_data.c new file mode 100644 index 0000000..89568cb --- /dev/null +++ b/mpi/src/mpi/starpu_mpi_early_data.c @@ -0,0 +1,191 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include + +#ifdef STARPU_USE_MPI_MPI + +/** the hashlist is on 2 levels, the first top level is indexed on (node, rank), the second lower level is indexed on the data tag */ + +struct _starpu_mpi_early_data_handle_hashlist +{ + struct _starpu_mpi_early_data_handle_tag_hashlist *datahash; + UT_hash_handle hh; + struct _starpu_mpi_node node; +}; + +/** stores data which have been received by MPI but have not been requested by the application */ +static starpu_pthread_mutex_t _starpu_mpi_early_data_handle_mutex; +static struct _starpu_mpi_early_data_handle_hashlist *_starpu_mpi_early_data_handle_hashmap = NULL; +static int _starpu_mpi_early_data_handle_hashmap_count = 0; + +void _starpu_mpi_early_data_init(void) +{ + _starpu_mpi_early_data_handle_hashmap = NULL; + _starpu_mpi_early_data_handle_hashmap_count = 0; + STARPU_PTHREAD_MUTEX_INIT(&_starpu_mpi_early_data_handle_mutex, NULL); +} + +void _starpu_mpi_early_data_check_termination(void) +{ + if (_starpu_mpi_early_data_handle_hashmap_count != 0) + { + struct _starpu_mpi_early_data_handle_hashlist *current=NULL, *tmp=NULL; + HASH_ITER(hh, _starpu_mpi_early_data_handle_hashmap, current, tmp) + { + struct _starpu_mpi_early_data_handle_tag_hashlist *tag_current=NULL, *tag_tmp=NULL; + HASH_ITER(hh, current->datahash, tag_current, tag_tmp) + { + _STARPU_MSG("Unexpected message with comm %ld source %d tag %ld\n", (long int)current->node.comm, current->node.rank, tag_current->data_tag); + } + } + STARPU_ASSERT_MSG(_starpu_mpi_early_data_handle_hashmap_count == 0, "Number of unexpected received messages left is not 0 (but %d), did you forget to post a receive corresponding to a send?", _starpu_mpi_early_data_handle_hashmap_count); + } +} + +void _starpu_mpi_early_data_shutdown(void) +{ + struct _starpu_mpi_early_data_handle_hashlist *current=NULL, *tmp=NULL; + HASH_ITER(hh, _starpu_mpi_early_data_handle_hashmap, current, tmp) + { + _STARPU_MPI_DEBUG(600, "Hash early_data with comm %ld source %d\n", (long int) current->node.comm, current->node.rank); + struct _starpu_mpi_early_data_handle_tag_hashlist *tag_entry=NULL, *tag_tmp=NULL; + HASH_ITER(hh, current->datahash, tag_entry, tag_tmp) + { + _STARPU_MPI_DEBUG(600, "Hash 2nd level with tag %ld\n", tag_entry->data_tag); + STARPU_ASSERT(_starpu_mpi_early_data_handle_list_empty(&tag_entry->list)); + HASH_DEL(current->datahash, tag_entry); + free(tag_entry); + } + HASH_DEL(_starpu_mpi_early_data_handle_hashmap, current); + free(current); + } + STARPU_PTHREAD_MUTEX_DESTROY(&_starpu_mpi_early_data_handle_mutex); +} + +struct _starpu_mpi_early_data_handle *_starpu_mpi_early_data_create(struct _starpu_mpi_envelope *envelope, int source, MPI_Comm comm) +{ + struct _starpu_mpi_early_data_handle* early_data_handle; + _STARPU_MPI_CALLOC(early_data_handle, 1, sizeof(struct _starpu_mpi_early_data_handle)); + STARPU_PTHREAD_MUTEX_INIT(&early_data_handle->req_mutex, NULL); + STARPU_PTHREAD_COND_INIT(&early_data_handle->req_cond, NULL); + early_data_handle->node_tag.node.comm = comm; + early_data_handle->node_tag.node.rank = source; + early_data_handle->node_tag.data_tag = envelope->data_tag; + return early_data_handle; +} + +void _starpu_mpi_early_data_delete(struct _starpu_mpi_early_data_handle *early_data_handle) +{ + free(early_data_handle); +} + +struct _starpu_mpi_early_data_handle *_starpu_mpi_early_data_find(struct _starpu_mpi_node_tag *node_tag) +{ + struct _starpu_mpi_early_data_handle_hashlist *hashlist; + struct _starpu_mpi_early_data_handle *early_data_handle; + + STARPU_PTHREAD_MUTEX_LOCK(&_starpu_mpi_early_data_handle_mutex); + _STARPU_MPI_DEBUG(60, "Looking for early_data_handle with comm %ld source %d tag %ld\n", (long int)node_tag->node.comm, node_tag->node.rank, node_tag->data_tag); + HASH_FIND(hh, _starpu_mpi_early_data_handle_hashmap, &node_tag->node, sizeof(struct _starpu_mpi_node), hashlist); + if (hashlist == NULL) + { + _STARPU_MPI_DEBUG(600, "No entry for (comm %ld, source %d)\n", (long int)node_tag->node.comm, node_tag->node.rank); + early_data_handle = NULL; + } + else + { + struct _starpu_mpi_early_data_handle_tag_hashlist *tag_hashlist; + HASH_FIND(hh, hashlist->datahash, &node_tag->data_tag, sizeof(starpu_mpi_tag_t), tag_hashlist); + if (tag_hashlist == NULL) + { + _STARPU_MPI_DEBUG(600, "No entry for tag %ld\n", node_tag->data_tag); + early_data_handle = NULL; + } + else if (_starpu_mpi_early_data_handle_list_empty(&tag_hashlist->list)) + { + _STARPU_MPI_DEBUG(600, "List empty for tag %ld\n", node_tag->data_tag); + early_data_handle = NULL; + } + else + { + _starpu_mpi_early_data_handle_hashmap_count --; + early_data_handle = _starpu_mpi_early_data_handle_list_pop_front(&tag_hashlist->list); + } + } + _STARPU_MPI_DEBUG(60, "Found early_data_handle %p with comm %ld source %d tag %ld\n", early_data_handle, (long int)node_tag->node.comm, node_tag->node.rank, node_tag->data_tag); + STARPU_PTHREAD_MUTEX_UNLOCK(&_starpu_mpi_early_data_handle_mutex); + return early_data_handle; +} + +struct _starpu_mpi_early_data_handle_tag_hashlist *_starpu_mpi_early_data_extract(struct _starpu_mpi_node_tag *node_tag) +{ + struct _starpu_mpi_early_data_handle_hashlist *hashlist; + struct _starpu_mpi_early_data_handle_tag_hashlist *tag_hashlist = NULL; + + STARPU_PTHREAD_MUTEX_LOCK(&_starpu_mpi_early_data_handle_mutex); + _STARPU_MPI_DEBUG(60, "Looking for hashlist for (comm %ld, source %d)\n", (long int)node_tag->node.comm, node_tag->node.rank); + HASH_FIND(hh, _starpu_mpi_early_data_handle_hashmap, &node_tag->node, sizeof(struct _starpu_mpi_node), hashlist); + if (hashlist) + { + _STARPU_MPI_DEBUG(60, "Looking for hashlist for (tag %ld)\n", node_tag->data_tag); + HASH_FIND(hh, hashlist->datahash, &node_tag->data_tag, sizeof(starpu_mpi_tag_t), tag_hashlist); + if (tag_hashlist) + { + _starpu_mpi_early_data_handle_hashmap_count -= _starpu_mpi_early_data_handle_list_size(&tag_hashlist->list); + HASH_DEL(hashlist->datahash, tag_hashlist); + } + } + _STARPU_MPI_DEBUG(60, "Found hashlist %p for (comm %ld, source %d) and (tag %ld)\n", tag_hashlist, (long int)node_tag->node.comm, node_tag->node.rank, node_tag->data_tag); + STARPU_PTHREAD_MUTEX_UNLOCK(&_starpu_mpi_early_data_handle_mutex); + return tag_hashlist; +} + +void _starpu_mpi_early_data_add(struct _starpu_mpi_early_data_handle *early_data_handle) +{ + STARPU_PTHREAD_MUTEX_LOCK(&_starpu_mpi_early_data_handle_mutex); + _STARPU_MPI_DEBUG(60, "Adding early_data_handle %p with comm %ld source %d tag %ld (%p)\n", early_data_handle, (long int)early_data_handle->node_tag.node.comm, early_data_handle->node_tag.node.rank, early_data_handle->node_tag.data_tag, &early_data_handle->node_tag.node); + + struct _starpu_mpi_early_data_handle_hashlist *hashlist; + HASH_FIND(hh, _starpu_mpi_early_data_handle_hashmap, &early_data_handle->node_tag.node, sizeof(struct _starpu_mpi_node), hashlist); + if (hashlist == NULL) + { + _STARPU_MPI_MALLOC(hashlist, sizeof(struct _starpu_mpi_early_data_handle_hashlist)); + hashlist->node = early_data_handle->node_tag.node; + hashlist->datahash = NULL; + HASH_ADD(hh, _starpu_mpi_early_data_handle_hashmap, node, sizeof(hashlist->node), hashlist); + } + + struct _starpu_mpi_early_data_handle_tag_hashlist *tag_hashlist; + HASH_FIND(hh, hashlist->datahash, &early_data_handle->node_tag.data_tag, sizeof(starpu_mpi_tag_t), tag_hashlist); + if (tag_hashlist == NULL) + { + _STARPU_MPI_MALLOC(tag_hashlist, sizeof(struct _starpu_mpi_early_data_handle_tag_hashlist)); + tag_hashlist->data_tag = early_data_handle->node_tag.data_tag; + HASH_ADD(hh, hashlist->datahash, data_tag, sizeof(tag_hashlist->data_tag), tag_hashlist); + _starpu_mpi_early_data_handle_list_init(&tag_hashlist->list); + } + + _starpu_mpi_early_data_handle_list_push_back(&tag_hashlist->list, early_data_handle); + _starpu_mpi_early_data_handle_hashmap_count ++; + STARPU_PTHREAD_MUTEX_UNLOCK(&_starpu_mpi_early_data_handle_mutex); +} + +#endif // STARPU_USE_MPI_MPI diff --git a/mpi/src/mpi/starpu_mpi_early_data.h b/mpi/src/mpi/starpu_mpi_early_data.h new file mode 100644 index 0000000..473ba26 --- /dev/null +++ b/mpi/src/mpi/starpu_mpi_early_data.h @@ -0,0 +1,74 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __STARPU_MPI_EARLY_DATA_H__ +#define __STARPU_MPI_EARLY_DATA_H__ + +#include +#include +#include +#include +#include +#include +#include + +/** @file */ + +#ifdef STARPU_USE_MPI_MPI + +#ifdef __cplusplus +extern "C" +{ +#endif + +LIST_TYPE(_starpu_mpi_early_data_handle, + starpu_data_handle_t handle; + struct _starpu_mpi_req *req; + void *buffer; + size_t size; + unsigned buffer_node; + struct _starpu_mpi_node_tag node_tag; + starpu_pthread_mutex_t req_mutex; + starpu_pthread_cond_t req_cond; +); + +struct _starpu_mpi_early_data_handle_tag_hashlist +{ + struct _starpu_mpi_early_data_handle_list list; + UT_hash_handle hh; + starpu_mpi_tag_t data_tag; +}; + +struct _starpu_mpi_envelope; + +void _starpu_mpi_early_data_init(void); +void _starpu_mpi_early_data_check_termination(void); +void _starpu_mpi_early_data_shutdown(void); + +struct _starpu_mpi_early_data_handle *_starpu_mpi_early_data_create(struct _starpu_mpi_envelope *envelope, int source, MPI_Comm comm) STARPU_ATTRIBUTE_MALLOC; +struct _starpu_mpi_early_data_handle *_starpu_mpi_early_data_find(struct _starpu_mpi_node_tag *node_tag); +void _starpu_mpi_early_data_add(struct _starpu_mpi_early_data_handle *early_data_handle); +void _starpu_mpi_early_data_delete(struct _starpu_mpi_early_data_handle *early_data_handle); + +// Not used now but needed for fault tolerance +struct _starpu_mpi_early_data_handle_tag_hashlist *_starpu_mpi_early_data_extract(struct _starpu_mpi_node_tag *node_tag); + +#ifdef __cplusplus +} +#endif + +#endif /* STARPU_USE_MPI_MPI */ +#endif /* __STARPU_MPI_EARLY_DATA_H__ */ diff --git a/mpi/src/mpi/starpu_mpi_early_request.c b/mpi/src/mpi/starpu_mpi_early_request.c new file mode 100644 index 0000000..59f8957 --- /dev/null +++ b/mpi/src/mpi/starpu_mpi_early_request.c @@ -0,0 +1,173 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include + +#ifdef STARPU_USE_MPI_MPI + +/** stores application requests for which data have not been received yet */ +/** the hashlist is on 2 levels, the first top level is indexed on (node, rank), the second lower level is indexed on the data tag */ +struct _starpu_mpi_early_request_hashlist +{ + struct _starpu_mpi_early_request_tag_hashlist *datahash; + UT_hash_handle hh; + struct _starpu_mpi_node node; +}; + +static starpu_pthread_mutex_t _starpu_mpi_early_request_mutex; +struct _starpu_mpi_early_request_hashlist *_starpu_mpi_early_request_hash; +int _starpu_mpi_early_request_hash_count; + +void _starpu_mpi_early_request_init() +{ + _starpu_mpi_early_request_hash = NULL; + _starpu_mpi_early_request_hash_count = 0; + STARPU_PTHREAD_MUTEX_INIT(&_starpu_mpi_early_request_mutex, NULL); +} + +void _starpu_mpi_early_request_shutdown() +{ + struct _starpu_mpi_early_request_hashlist *entry=NULL, *tmp=NULL; + HASH_ITER(hh, _starpu_mpi_early_request_hash, entry, tmp) + { + struct _starpu_mpi_early_request_tag_hashlist *tag_entry=NULL, *tag_tmp=NULL; + HASH_ITER(hh, entry->datahash, tag_entry, tag_tmp) + { + STARPU_ASSERT(_starpu_mpi_req_list_empty(&tag_entry->list)); + HASH_DEL(entry->datahash, tag_entry); + free(tag_entry); + } + + HASH_DEL(_starpu_mpi_early_request_hash, entry); + free(entry); + } + STARPU_PTHREAD_MUTEX_DESTROY(&_starpu_mpi_early_request_mutex); +} + +int _starpu_mpi_early_request_count() +{ + return _starpu_mpi_early_request_hash_count; +} + +void _starpu_mpi_early_request_check_termination() +{ + STARPU_ASSERT_MSG(_starpu_mpi_early_request_count() == 0, "Number of early requests left is not zero"); +} + +struct _starpu_mpi_req* _starpu_mpi_early_request_dequeue(starpu_mpi_tag_t data_tag, int source, MPI_Comm comm) +{ + struct _starpu_mpi_node_tag node_tag; + struct _starpu_mpi_req *found; + struct _starpu_mpi_early_request_hashlist *hashlist; + + memset(&node_tag, 0, sizeof(struct _starpu_mpi_node_tag)); + node_tag.node.comm = comm; + node_tag.node.rank = source; + node_tag.data_tag = data_tag; + + STARPU_PTHREAD_MUTEX_LOCK(&_starpu_mpi_early_request_mutex); + _STARPU_MPI_DEBUG(100, "Looking for early_request with comm %ld source %d tag %ld\n", (long int)node_tag.node.comm, node_tag.node.rank, node_tag.data_tag); + HASH_FIND(hh, _starpu_mpi_early_request_hash, &node_tag.node, sizeof(struct _starpu_mpi_node), hashlist); + if (hashlist == NULL) + { + found = NULL; + } + else + { + struct _starpu_mpi_early_request_tag_hashlist *tag_hashlist; + HASH_FIND(hh, hashlist->datahash, &node_tag.data_tag, sizeof(starpu_mpi_tag_t), tag_hashlist); + if (tag_hashlist == NULL) + { + found = NULL; + } + else if (_starpu_mpi_req_list_empty(&tag_hashlist->list)) + { + found = NULL; + } + else + { + found = _starpu_mpi_req_list_pop_front(&tag_hashlist->list); + _starpu_mpi_early_request_hash_count --; + } + } + _STARPU_MPI_DEBUG(100, "Found early_request %p with comm %ld source %d tag %ld\n", found, (long int)node_tag.node.comm, node_tag.node.rank, node_tag.data_tag); + STARPU_PTHREAD_MUTEX_UNLOCK(&_starpu_mpi_early_request_mutex); + return found; +} + +struct _starpu_mpi_early_request_tag_hashlist *_starpu_mpi_early_request_extract(starpu_mpi_tag_t data_tag, int source, MPI_Comm comm) +{ + struct _starpu_mpi_node_tag node_tag; + struct _starpu_mpi_early_request_hashlist *hashlist; + struct _starpu_mpi_early_request_tag_hashlist *tag_hashlist = NULL; + + memset(&node_tag, 0, sizeof(struct _starpu_mpi_node_tag)); + node_tag.node.comm = comm; + node_tag.node.rank = source; + node_tag.data_tag = data_tag; + + STARPU_PTHREAD_MUTEX_LOCK(&_starpu_mpi_early_request_mutex); + _STARPU_MPI_DEBUG(100, "Looking for early_request with comm %ld source %d tag %ld\n", (long int)node_tag.node.comm, node_tag.node.rank, node_tag.data_tag); + HASH_FIND(hh, _starpu_mpi_early_request_hash, &node_tag.node, sizeof(struct _starpu_mpi_node), hashlist); + if (hashlist) + { + HASH_FIND(hh, hashlist->datahash, &node_tag.data_tag, sizeof(starpu_mpi_tag_t), tag_hashlist); + if (tag_hashlist) + { + _starpu_mpi_early_request_hash_count -= _starpu_mpi_req_list_size(&tag_hashlist->list); + HASH_DEL(hashlist->datahash, tag_hashlist); + } + } + _STARPU_MPI_DEBUG(100, "Found hashlist %p with comm %ld source %d tag %ld\n", hashlist, (long int)node_tag.node.comm, node_tag.node.rank, node_tag.data_tag); + STARPU_PTHREAD_MUTEX_UNLOCK(&_starpu_mpi_early_request_mutex); + return tag_hashlist; +} + +void _starpu_mpi_early_request_enqueue(struct _starpu_mpi_req *req) +{ + STARPU_PTHREAD_MUTEX_LOCK(&_starpu_mpi_early_request_mutex); + _STARPU_MPI_DEBUG(100, "Adding request %p with comm %ld source %d tag %ld in the application request hashmap\n", req, (long int)req->node_tag.node.comm, req->node_tag.node.rank, req->node_tag.data_tag); + + struct _starpu_mpi_early_request_hashlist *hashlist; + HASH_FIND(hh, _starpu_mpi_early_request_hash, &req->node_tag.node, sizeof(struct _starpu_mpi_node), hashlist); + if (hashlist == NULL) + { + _STARPU_MPI_MALLOC(hashlist, sizeof(struct _starpu_mpi_early_request_hashlist)); + hashlist->node = req->node_tag.node; + hashlist->datahash = NULL; + HASH_ADD(hh, _starpu_mpi_early_request_hash, node, sizeof(hashlist->node), hashlist); + } + + struct _starpu_mpi_early_request_tag_hashlist *tag_hashlist; + HASH_FIND(hh, hashlist->datahash, &req->node_tag.data_tag, sizeof(starpu_mpi_tag_t), tag_hashlist); + if (tag_hashlist == NULL) + { + _STARPU_MPI_MALLOC(tag_hashlist, sizeof(struct _starpu_mpi_early_request_tag_hashlist)); + tag_hashlist->data_tag = req->node_tag.data_tag; + HASH_ADD(hh, hashlist->datahash, data_tag, sizeof(tag_hashlist->data_tag), tag_hashlist); + _starpu_mpi_req_list_init(&tag_hashlist->list); + } + + _starpu_mpi_req_list_push_back(&tag_hashlist->list, req); + _starpu_mpi_early_request_hash_count ++; + STARPU_PTHREAD_MUTEX_UNLOCK(&_starpu_mpi_early_request_mutex); +} + +#endif // STARPU_USE_MPI_MPI diff --git a/mpi/src/mpi/starpu_mpi_early_request.h b/mpi/src/mpi/starpu_mpi_early_request.h new file mode 100644 index 0000000..1898393 --- /dev/null +++ b/mpi/src/mpi/starpu_mpi_early_request.h @@ -0,0 +1,58 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __STARPU_MPI_EARLY_REQUEST_H__ +#define __STARPU_MPI_EARLY_REQUEST_H__ + +#include +#include +#include +#include +#include + +/** @file */ + +#ifdef STARPU_USE_MPI_MPI + +#ifdef __cplusplus +extern "C" +{ +#endif + +struct _starpu_mpi_early_request_tag_hashlist +{ + struct _starpu_mpi_req_list list; + UT_hash_handle hh; + starpu_mpi_tag_t data_tag; +}; + +void _starpu_mpi_early_request_init(void); +void _starpu_mpi_early_request_shutdown(void); +int _starpu_mpi_early_request_count(void); +void _starpu_mpi_early_request_check_termination(void); + +void _starpu_mpi_early_request_enqueue(struct _starpu_mpi_req *req); +struct _starpu_mpi_req* _starpu_mpi_early_request_dequeue(starpu_mpi_tag_t data_tag, int source, MPI_Comm comm); + +// Not used now but needed for fault tolerance +struct _starpu_mpi_early_request_tag_hashlist *_starpu_mpi_early_request_extract(starpu_mpi_tag_t data_tag, int source, MPI_Comm comm); + +#ifdef __cplusplus +} +#endif + +#endif /* STARPU_USE_MPI_MPI */ +#endif /* __STARPU_MPI_EARLY_REQUEST_H__ */ diff --git a/mpi/src/mpi/starpu_mpi_mpi.c b/mpi/src/mpi/starpu_mpi_mpi.c new file mode 100644 index 0000000..c31cd4a --- /dev/null +++ b/mpi/src/mpi/starpu_mpi_mpi.c @@ -0,0 +1,1787 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2017-2017 Guillaume Beauchamp + * Copyright (C) 2021-2021 Federal University of Rio Grande do Sul (UFRGS) + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#ifdef HAVE_UNISTD_H +#include +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef STARPU_USE_MPI_FT +#include +#endif // STARPU_USE_MPI_FT + +#ifdef STARPU_USE_FXT +#include +#endif + +#ifdef STARPU_USE_MPI_MPI + +/* Number of ready requests to process before polling for completed requests */ +static unsigned nready_process; + +/* Force allocation of early data */ +static int early_data_force_allocate; + +static void _starpu_mpi_handle_ready_request(struct _starpu_mpi_req *req); +static void _starpu_mpi_handle_request_termination(struct _starpu_mpi_req *req); +static void _starpu_mpi_handle_detached_request(struct _starpu_mpi_req *req); +static void _starpu_mpi_early_data_cb(void* arg); + +/* The list of ready requests */ +static struct _starpu_mpi_req_list ready_recv_requests; +static struct _starpu_mpi_req_prio_list ready_send_requests; + +/* The list of detached requests that have already been submitted to MPI */ +static struct _starpu_mpi_req_list detached_requests; + +/* Number of send requests to submit to MPI at the same time */ +static unsigned ndetached_send_requests_max; +static unsigned ndetached_send_requests = 0; + +/* Condition to wake up progression thread */ +static starpu_pthread_cond_t progress_cond; +static starpu_pthread_mutex_t progress_mutex; +/* Condition to wake up waiting for all current MPI requests to finish */ +static starpu_pthread_cond_t barrier_cond; +#ifndef STARPU_SIMGRID +static starpu_pthread_t progress_thread; +#endif +static int running = 0; + +/* Provides synchronization between an early request, a sync request, and an early data handle: + * we keep it held while checking and posting one to prevent the other. + * This is to be taken always before the progress_mutex. */ +static starpu_pthread_mutex_t early_data_mutex; + +/* Driver taken by StarPU-MPI to process tasks when there is no requests to + * handle instead of polling endlessly */ +static struct starpu_driver *mpi_driver = NULL; +static int mpi_driver_call_freq = 0; +static int mpi_driver_task_freq = 0; + +#ifdef STARPU_SIMGRID +static int wait_counter; +static starpu_pthread_cond_t wait_counter_cond; +static starpu_pthread_mutex_t wait_counter_mutex; +starpu_pthread_wait_t _starpu_mpi_thread_wait; +starpu_pthread_queue_t _starpu_mpi_thread_dontsleep; +#endif + +/* Count requests posted by the application and not yet submitted to MPI */ +static starpu_pthread_mutex_t posted_requests_mutex; +static int posted_requests = 0; +static int newer_requests; +static int mpi_wait_for_all_running = 0; + +#define _STARPU_MPI_INC_POSTED_REQUESTS(req, value) { \ + STARPU_PTHREAD_MUTEX_LOCK(&posted_requests_mutex); \ + posted_requests += value; \ + _STARPU_MPI_DEBUG(0, "posted_requests : %d with req %p srcdst %d tag %"PRIi64" and type %s %d\n", posted_requests, req, req->node_tag.node.rank, req->node_tag.data_tag, _starpu_mpi_request_type(req->request_type), req->backend->is_internal_req); \ + STARPU_PTHREAD_MUTEX_UNLOCK(&posted_requests_mutex); \ +} + +#ifdef STARPU_SIMGRID +#pragma weak smpi_simulated_main_ +extern int smpi_simulated_main_(int argc, char *argv[]); + +static +#if SIMGRID_VERSION >= 32600 +void +#else +int +#endif +_starpu_smpi_simulated_main(int argc, char *argv[]) +{ +#if SIMGRID_VERSION < 32600 + return +#endif + smpi_simulated_main_(argc, argv); +} + +#pragma weak smpi_process_set_user_data +#if !HAVE_DECL_SMPI_PROCESS_SET_USER_DATA && !defined(smpi_process_set_user_data) +extern void smpi_process_set_user_data(void *); +#endif +#endif + +#ifdef STARPU_USE_FXT +static int trace_loop = 0; +#endif + +/********************************************************/ +/* */ +/* Send/Receive functionalities */ +/* */ +/********************************************************/ + +struct _starpu_mpi_early_data_cb_args +{ + starpu_data_handle_t data_handle; + starpu_data_handle_t early_handle; + struct _starpu_mpi_req *req; + void *buffer; + size_t size; + unsigned buffer_node; +}; + +#if 0 +void _starpu_mpi_coop_sends_build_tree(struct _starpu_mpi_coop_sends *coop_sends) +{ + (void)coop_sends; + /* TODO: turn them into redirects & forwards */ +} +#endif + +void _starpu_mpi_submit_coop_sends(struct _starpu_mpi_coop_sends *coop_sends, int submit_control, int submit_data) +{ + (void)submit_control; + unsigned i, n = coop_sends->n; + + /* Note: coop_sends might disappear very very soon after last request is submitted */ + for (i = 0; i < n; i++) + { + if (coop_sends->reqs_array[i]->request_type == SEND_REQ && submit_data) + { + _STARPU_MPI_DEBUG(0, "cooperative sends %p sending to %d\n", coop_sends, coop_sends->reqs_array[i]->node_tag.node.rank); + _starpu_mpi_submit_ready_request(coop_sends->reqs_array[i]); + } + /* TODO: handle redirect requests */ + } +} + +void _starpu_mpi_submit_ready_request(void *arg) +{ + _STARPU_MPI_LOG_IN(); + struct _starpu_mpi_req *req = arg; + + if (req->reserved_size) + { + /* The core will have really allocated the reception buffer now, release our reservation */ + starpu_memory_deallocate(req->node, req->reserved_size); + req->reserved_size = 0; + } + + _STARPU_MPI_DEBUG(0, "new req %p srcdst %d tag %"PRIi64" and type %s %d\n", req, req->node_tag.node.rank, req->node_tag.data_tag, _starpu_mpi_request_type(req->request_type), req->backend->is_internal_req); + + if (req->request_type == RECV_REQ) + { + /* Case : the request is the internal receive request submitted + * by StarPU-MPI to receive incoming data without a matching + * early_request from the application. We immediately allocate the + * pointer associated to the data_handle, and push it into the + * ready_requests list, so as the real MPI request can be submitted + * before the next submission of the envelope-catching request. */ + if (req->backend->is_internal_req) + { + _starpu_mpi_datatype_allocate(req->data_handle, req); + if (req->registered_datatype == 1) + { + req->count = 1; + req->ptr = starpu_data_handle_to_pointer(req->data_handle, req->node); + } + else + { + STARPU_ASSERT(req->count); + req->ptr = (void *)starpu_malloc_on_node_flags(req->node, req->count, 0); + } + + STARPU_PTHREAD_MUTEX_LOCK(&progress_mutex); + _STARPU_MPI_DEBUG(3, "Pushing internal starpu_mpi_irecv request %p type %s tag %"PRIi64" src %d data %p ptr %p datatype '%s' count %d registered_datatype %d \n", + req, _starpu_mpi_request_type(req->request_type), req->node_tag.data_tag, req->node_tag.node.rank, req->data_handle, req->ptr, + req->datatype_name, (int)req->count, req->registered_datatype); + _starpu_mpi_req_list_push_front(&ready_recv_requests, req); + + /* inform the starpu mpi thread that the request has been pushed in the ready_requests list */ + req->posted = 1; + STARPU_PTHREAD_COND_BROADCAST(&req->backend->posted_cond); + } + else + { + STARPU_PTHREAD_MUTEX_LOCK(&early_data_mutex); + /* test whether some data with the given tag and source have already been received by StarPU-MPI*/ + struct _starpu_mpi_early_data_handle *early_data_handle = _starpu_mpi_early_data_find(&req->node_tag); + + if (early_data_handle) + { + /* Got the early_data_handle */ + STARPU_PTHREAD_MUTEX_UNLOCK(&early_data_mutex); + + /* Case: a receive request for a data with the given tag and source has already been + * posted to MPI by StarPU. Asynchronously requests a Read permission over the temporary handle , + * so as when the internal receive is completed, the _starpu_mpi_early_data_cb function + * will be called to bring the data back to the original data handle associated to the request.*/ + _STARPU_MPI_DEBUG(3, "The RECV request %p with tag %"PRIi64" has already been received, copying previously received data into handle's pointer..\n", req, req->node_tag.data_tag); + STARPU_ASSERT(req->data_handle != early_data_handle->handle); + + req->backend->internal_req = early_data_handle->req; + req->backend->early_data_handle = early_data_handle; + + struct _starpu_mpi_early_data_cb_args *cb_args; + _STARPU_MPI_MALLOC(cb_args, sizeof(struct _starpu_mpi_early_data_cb_args)); + cb_args->data_handle = req->data_handle; + cb_args->early_handle = early_data_handle->handle; + cb_args->buffer = early_data_handle->buffer; + cb_args->size = early_data_handle->size; + cb_args->buffer_node = early_data_handle->buffer_node; + cb_args->req = req; + + _STARPU_MPI_DEBUG(3, "Calling data_acquire_cb on starpu_mpi_copy_cb..\n"); + // FIXME: when buffer == NULL, do not hardcode acquiring on early_data_handle->buffer_node, to just acquire where the data happens to have been stored by MPI + starpu_data_acquire_on_node_cb_sequential_consistency_sync_jobids(early_data_handle->handle,early_data_handle->buffer_node,STARPU_R,NULL,_starpu_mpi_early_data_cb,(void*) cb_args, 1, 0, NULL, NULL, req->prio); + STARPU_PTHREAD_MUTEX_LOCK(&progress_mutex); + } + else + { + struct _starpu_mpi_req *sync_req = _starpu_mpi_sync_data_find(req->node_tag.data_tag, req->node_tag.node.rank, req->node_tag.node.comm); + _STARPU_MPI_DEBUG(3, "----------> Looking for sync data for tag %"PRIi64" and src %d = %p\n", req->node_tag.data_tag, req->node_tag.node.rank, sync_req); + if (sync_req) + { + /* Got the sync req */ + STARPU_PTHREAD_MUTEX_UNLOCK(&early_data_mutex); + /* Case: we already received the send envelope, we can proceed with the receive */ + req->sync = 1; + _starpu_mpi_datatype_allocate(req->data_handle, req); + if (req->registered_datatype == 1) + { + req->count = 1; + req->ptr = starpu_data_handle_to_pointer(req->data_handle, req->node); + } + else + { + req->count = sync_req->count; + STARPU_ASSERT(req->count); + req->ptr = (void *)starpu_malloc_on_node_flags(req->node, req->count, 0); + } + STARPU_PTHREAD_MUTEX_LOCK(&progress_mutex); + _starpu_mpi_req_list_push_front(&ready_recv_requests, req); + /* Throw away the dumb request that was only used to know that we got the envelope */ + _starpu_mpi_request_destroy(sync_req); + } + else + { + /* Case: no matching data has been received. Store the receive request as an early_request. */ + _STARPU_MPI_DEBUG(3, "Adding the pending receive request %p (srcdst %d tag %"PRIi64") into the request hashmap\n", req, req->node_tag.node.rank, req->node_tag.data_tag); + STARPU_PTHREAD_MUTEX_LOCK(&progress_mutex); + _starpu_mpi_early_request_enqueue(req); + /* We have queued our early request, we can let the progression thread look at it */ + STARPU_PTHREAD_MUTEX_UNLOCK(&early_data_mutex); + } + } + } + } + else + { + STARPU_PTHREAD_MUTEX_LOCK(&progress_mutex); + if (req->request_type == SEND_REQ) + { + if (_starpu_mpi_thread_multiple_send && (ndetached_send_requests_max == 0 || ndetached_send_requests < ndetached_send_requests_max)) + { + /* Directly send from this thread */ + STARPU_PTHREAD_MUTEX_UNLOCK(&progress_mutex); + _starpu_mpi_handle_ready_request(req); + STARPU_PTHREAD_MUTEX_LOCK(&progress_mutex); + } + else + /* Defer to MPI thread */ + _starpu_mpi_req_prio_list_push_front(&ready_send_requests, req); + } + else + _starpu_mpi_req_list_push_front(&ready_recv_requests, req); + _STARPU_MPI_DEBUG(3, "Pushing new request %p type %s tag %"PRIi64" src %d data %p ptr %p datatype '%s' count %d registered_datatype %d \n", + req, _starpu_mpi_request_type(req->request_type), req->node_tag.data_tag, req->node_tag.node.rank, req->data_handle, req->ptr, + req->datatype_name, (int)req->count, req->registered_datatype); + } + + newer_requests = 1; + STARPU_PTHREAD_COND_BROADCAST(&progress_cond); +#ifdef STARPU_SIMGRID + starpu_pthread_queue_signal(&_starpu_mpi_thread_dontsleep); +#endif + STARPU_PTHREAD_MUTEX_UNLOCK(&progress_mutex); + + _STARPU_MPI_LOG_OUT(); +} + +void _starpu_mpi_req_willpost(struct _starpu_mpi_req *req) +{ + (void)req; + _STARPU_MPI_INC_POSTED_REQUESTS(req, 1); +} + +#ifdef STARPU_SIMGRID +int _starpu_mpi_simgrid_mpi_test(unsigned *done, int *flag) +{ + *flag = 0; + if (*done) + { + starpu_pthread_queue_signal(&_starpu_mpi_thread_dontsleep); + *flag = 1; + } + return MPI_SUCCESS; +} + +static void _starpu_mpi_simgrid_wait_req_func(void* arg) +{ + struct _starpu_simgrid_mpi_req *sim_req = arg; + int ret; + + starpu_pthread_detach(starpu_pthread_self()); + + STARPU_PTHREAD_MUTEX_LOCK(&wait_counter_mutex); + wait_counter++; + STARPU_PTHREAD_MUTEX_UNLOCK(&wait_counter_mutex); + + ret = MPI_Wait(sim_req->request, sim_req->status); + + STARPU_MPI_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Wait returning %s", _starpu_mpi_get_mpi_error_code(ret)); + _STARPU_MPI_DEBUG(0, "request %p finished\n", sim_req->request); + + *(sim_req->done) = 1; + starpu_pthread_queue_broadcast(sim_req->queue); + + free(sim_req); + + STARPU_PTHREAD_MUTEX_LOCK(&wait_counter_mutex); + if (--wait_counter == 0) + STARPU_PTHREAD_COND_SIGNAL(&wait_counter_cond); + STARPU_PTHREAD_MUTEX_UNLOCK(&wait_counter_mutex); +} + +void _starpu_mpi_simgrid_wait_req(MPI_Request *request, MPI_Status *status, starpu_pthread_queue_t *queue, unsigned *done) +{ + struct _starpu_simgrid_mpi_req *sim_req; + _STARPU_MPI_CALLOC(sim_req, 1, sizeof(struct _starpu_simgrid_mpi_req)); + sim_req->request = request; + sim_req->status = status; + sim_req->queue = queue; + sim_req->done = done; + *done = 0; + + _STARPU_MPI_DEBUG(0, "will wait for request %p to finish\n", sim_req->request); + starpu_pthread_attr_t attr; + starpu_pthread_attr_init(&attr); + starpu_pthread_attr_setstacksize(&attr, 32786); + _starpu_simgrid_xbt_thread_create("wait for mpi transfer", &attr, _starpu_mpi_simgrid_wait_req_func, sim_req); +} +#endif + +/********************************************************/ +/* */ +/* Send functionalities */ +/* */ +/********************************************************/ + +static void _starpu_mpi_isend_data_func(struct _starpu_mpi_req *req) +{ + _STARPU_MPI_LOG_IN(); + + _STARPU_MPI_DEBUG(0, "post MPI isend request %p type %s tag %"PRIi64" dst %d data %p datasize %ld ptr %p datatype '%s' count %d registered_datatype %d sync %d\n", req, _starpu_mpi_request_type(req->request_type), req->node_tag.data_tag, req->node_tag.node.rank, req->data_handle, starpu_data_get_size(req->data_handle), req->ptr, req->datatype_name, (int)req->count, req->registered_datatype, req->sync); + + _starpu_mpi_comm_amounts_inc(req->node_tag.node.comm, req->node, req->node_tag.node.rank, req->datatype, req->count); + + _STARPU_MPI_TRACE_ISEND_SUBMIT_BEGIN(req->node_tag.node.rank, req->node_tag.data_tag, 0); + + if (req->sync == 0) + { + _STARPU_MPI_COMM_TO_DEBUG(req, req->count, req->datatype, req->node_tag.node.rank, _STARPU_MPI_TAG_DATA, req->node_tag.data_tag, req->node_tag.node.comm); + req->ret = MPI_Isend(req->ptr, req->count, req->datatype, req->node_tag.node.rank, _STARPU_MPI_TAG_DATA, req->node_tag.node.comm, &req->backend->data_request); + STARPU_MPI_ASSERT_MSG(req->ret == MPI_SUCCESS, "MPI_Isend returning %s", _starpu_mpi_get_mpi_error_code(req->ret)); + } + else + { + _STARPU_MPI_COMM_TO_DEBUG(req, req->count, req->datatype, req->node_tag.node.rank, _STARPU_MPI_TAG_SYNC_DATA, req->node_tag.data_tag, req->node_tag.node.comm); + req->ret = MPI_Issend(req->ptr, req->count, req->datatype, req->node_tag.node.rank, _STARPU_MPI_TAG_SYNC_DATA, req->node_tag.node.comm, &req->backend->data_request); + STARPU_MPI_ASSERT_MSG(req->ret == MPI_SUCCESS, "MPI_Issend returning %s", _starpu_mpi_get_mpi_error_code(req->ret)); + } + +#ifdef STARPU_SIMGRID + _starpu_mpi_simgrid_wait_req(&req->backend->data_request, &req->status_store, &req->queue, &req->done); +#endif + + // this trace event is the start of the communication link: + _STARPU_MPI_TRACE_ISEND_SUBMIT_END(_STARPU_MPI_FUT_POINT_TO_POINT_SEND, req, req->prio); + + /* somebody is perhaps waiting for the MPI request to be posted */ + STARPU_PTHREAD_MUTEX_LOCK(&req->backend->req_mutex); + req->submitted = 1; + STARPU_PTHREAD_COND_BROADCAST(&req->backend->req_cond); + STARPU_PTHREAD_MUTEX_UNLOCK(&req->backend->req_mutex); + + _starpu_mpi_handle_detached_request(req); + + _STARPU_MPI_LOG_OUT(); +} + +void _starpu_mpi_isend_size_func(struct _starpu_mpi_req *req) +{ + _starpu_mpi_datatype_allocate(req->data_handle, req); + + _STARPU_MPI_CALLOC(req->backend->envelope, 1,sizeof(struct _starpu_mpi_envelope)); + req->backend->envelope->mode = _STARPU_MPI_ENVELOPE_DATA; + req->backend->envelope->data_tag = req->node_tag.data_tag; + req->backend->envelope->sync = req->sync; + + if (req->registered_datatype == 1) + { + int size, ret; + req->count = 1; + req->ptr = starpu_data_handle_to_pointer(req->data_handle, req->node); + + MPI_Type_size(req->datatype, &size); + req->backend->envelope->size = (starpu_ssize_t)req->count * size; + _STARPU_MPI_DEBUG(20, "Post MPI isend count (%ld) datatype_size %ld request to %d\n",req->count,starpu_data_get_size(req->data_handle), req->node_tag.node.rank); + _STARPU_MPI_COMM_TO_DEBUG(req->backend->envelope, sizeof(struct _starpu_mpi_envelope), MPI_BYTE, req->node_tag.node.rank, _STARPU_MPI_TAG_ENVELOPE, req->backend->envelope->data_tag, req->node_tag.node.comm); + ret = MPI_Isend(req->backend->envelope, sizeof(struct _starpu_mpi_envelope), MPI_BYTE, req->node_tag.node.rank, _STARPU_MPI_TAG_ENVELOPE, req->node_tag.node.comm, &req->backend->size_req); + STARPU_MPI_ASSERT_MSG(ret == MPI_SUCCESS, "when sending envelope, MPI_Isend returning %s", _starpu_mpi_get_mpi_error_code(ret)); + } + else + { + int ret; + + // Do not pack the data, just try to find out the size + starpu_data_pack_node(req->data_handle, req->node, NULL, &(req->backend->envelope->size)); + + if (req->backend->envelope->size != -1) + { + // We already know the size of the data, let's send it to overlap with the packing of the data + _STARPU_MPI_DEBUG(20, "Sending size %ld (%ld %s) to node %d (first call to pack)\n", req->backend->envelope->size, sizeof(req->count), "MPI_BYTE", req->node_tag.node.rank); + req->count = req->backend->envelope->size; + _STARPU_MPI_COMM_TO_DEBUG(req->backend->envelope, sizeof(struct _starpu_mpi_envelope), MPI_BYTE, req->node_tag.node.rank, _STARPU_MPI_TAG_ENVELOPE, req->backend->envelope->data_tag, req->node_tag.node.comm); + ret = MPI_Isend(req->backend->envelope, sizeof(struct _starpu_mpi_envelope), MPI_BYTE, req->node_tag.node.rank, _STARPU_MPI_TAG_ENVELOPE, req->node_tag.node.comm, &req->backend->size_req); + STARPU_MPI_ASSERT_MSG(ret == MPI_SUCCESS, "when sending size, MPI_Isend returning %s", _starpu_mpi_get_mpi_error_code(ret)); + } + + // Pack the data + starpu_data_pack_node(req->data_handle, req->node, &req->ptr, &req->count); + if (req->backend->envelope->size == -1) + { + // We know the size now, let's send it + _STARPU_MPI_DEBUG(20, "Sending size %ld (%ld %s) to node %d (second call to pack)\n", req->backend->envelope->size, sizeof(req->count), "MPI_BYTE", req->node_tag.node.rank); + _STARPU_MPI_COMM_TO_DEBUG(req->backend->envelope, sizeof(struct _starpu_mpi_envelope), MPI_BYTE, req->node_tag.node.rank, _STARPU_MPI_TAG_ENVELOPE, req->backend->envelope->data_tag, req->node_tag.node.comm); + ret = MPI_Isend(req->backend->envelope, sizeof(struct _starpu_mpi_envelope), MPI_BYTE, req->node_tag.node.rank, _STARPU_MPI_TAG_ENVELOPE, req->node_tag.node.comm, &req->backend->size_req); + STARPU_MPI_ASSERT_MSG(ret == MPI_SUCCESS, "when sending size, MPI_Isend returning %s", _starpu_mpi_get_mpi_error_code(ret)); + } + else + { + // We check the size returned with the 2 calls to pack is the same + STARPU_MPI_ASSERT_MSG(req->count == req->backend->envelope->size, "Calls to pack_data returned different sizes %ld != %ld", req->count, req->backend->envelope->size); + } + // We can send the data now + } + + if (req->sync) + { + // If the data is to be sent in synchronous mode, we need to wait for the receiver ready message + _starpu_mpi_sync_data_add(req); + } + else + { + // Otherwise we can send the data + _starpu_mpi_isend_data_func(req); + } +} + +/********************************************************/ +/* */ +/* receive functionalities */ +/* */ +/********************************************************/ + +void _starpu_mpi_irecv_size_func(struct _starpu_mpi_req *req) +{ + _STARPU_MPI_LOG_IN(); + + _STARPU_MPI_DEBUG(0, "post MPI irecv request %p type %s tag %"PRIi64" src %d data %p ptr %p datatype '%s' count %d registered_datatype %d \n", req, _starpu_mpi_request_type(req->request_type), req->node_tag.data_tag, req->node_tag.node.rank, req->data_handle, req->ptr, req->datatype_name, (int)req->count, req->registered_datatype); + + _STARPU_MPI_TRACE_IRECV_SUBMIT_BEGIN(req->node_tag.node.rank, req->node_tag.data_tag); + + if (req->sync) + { + struct _starpu_mpi_envelope *_envelope; + _STARPU_MPI_CALLOC(_envelope, 1, sizeof(struct _starpu_mpi_envelope)); + _envelope->mode = _STARPU_MPI_ENVELOPE_SYNC_READY; + _envelope->data_tag = req->node_tag.data_tag; + _STARPU_MPI_DEBUG(20, "Telling node %d it can send the data and waiting for the data back ...\n", req->node_tag.node.rank); + _STARPU_MPI_COMM_TO_DEBUG(_envelope, sizeof(struct _starpu_mpi_envelope), MPI_BYTE, req->node_tag.node.rank, _STARPU_MPI_TAG_ENVELOPE, _envelope->data_tag, req->node_tag.node.comm); + req->ret = MPI_Send(_envelope, sizeof(struct _starpu_mpi_envelope), MPI_BYTE, req->node_tag.node.rank, _STARPU_MPI_TAG_ENVELOPE, req->node_tag.node.comm); + STARPU_MPI_ASSERT_MSG(req->ret == MPI_SUCCESS, "MPI_Send returning %s", _starpu_mpi_get_mpi_error_code(req->ret)); + free(_envelope); + _envelope = NULL; + } + + if (req->sync) + { + _STARPU_MPI_COMM_FROM_DEBUG(req, req->count, req->datatype, req->node_tag.node.rank, _STARPU_MPI_TAG_SYNC_DATA, req->node_tag.data_tag, req->node_tag.node.comm); + req->ret = MPI_Irecv(req->ptr, req->count, req->datatype, req->node_tag.node.rank, _STARPU_MPI_TAG_SYNC_DATA, req->node_tag.node.comm, &req->backend->data_request); + } + else + { + _STARPU_MPI_COMM_FROM_DEBUG(req, req->count, req->datatype, req->node_tag.node.rank, _STARPU_MPI_TAG_DATA, req->node_tag.data_tag, req->node_tag.node.comm); + req->ret = MPI_Irecv(req->ptr, req->count, req->datatype, req->node_tag.node.rank, _STARPU_MPI_TAG_DATA, req->node_tag.node.comm, &req->backend->data_request); + } +#ifdef STARPU_SIMGRID + _starpu_mpi_simgrid_wait_req(&req->backend->data_request, &req->status_store, &req->queue, &req->done); +#endif + STARPU_MPI_ASSERT_MSG(req->ret == MPI_SUCCESS, "MPI_IRecv returning %s", _starpu_mpi_get_mpi_error_code(req->ret)); + + _STARPU_MPI_TRACE_IRECV_SUBMIT_END(req->node_tag.node.rank, req->node_tag.data_tag); + + /* somebody is perhaps waiting for the MPI request to be posted */ + STARPU_PTHREAD_MUTEX_LOCK(&req->backend->req_mutex); + req->submitted = 1; + STARPU_PTHREAD_COND_BROADCAST(&req->backend->req_cond); + STARPU_PTHREAD_MUTEX_UNLOCK(&req->backend->req_mutex); + + _starpu_mpi_handle_detached_request(req); + + _STARPU_MPI_LOG_OUT(); +} + +/********************************************************/ +/* */ +/* Wait functionalities */ +/* */ +/********************************************************/ + +#ifndef STARPU_SIMGRID +void _starpu_mpi_wait_func(struct _starpu_mpi_req *waiting_req) +{ + _STARPU_MPI_LOG_IN(); + /* Which is the mpi request we are waiting for ? */ + struct _starpu_mpi_req *req = waiting_req->backend->other_request; + + _STARPU_MPI_TRACE_UWAIT_BEGIN(req->node_tag.node.rank, req->node_tag.data_tag); + if (req->backend->data_request != MPI_REQUEST_NULL) + { + req->ret = MPI_Wait(&req->backend->data_request, waiting_req->status); + STARPU_MPI_ASSERT_MSG(req->ret == MPI_SUCCESS, "MPI_Wait returning %s", _starpu_mpi_get_mpi_error_code(req->ret)); + } + _STARPU_MPI_TRACE_UWAIT_END(req->node_tag.node.rank, req->node_tag.data_tag); + + _starpu_mpi_handle_request_termination(req); + + _STARPU_MPI_LOG_OUT(); +} +#endif + +int _starpu_mpi_wait(starpu_mpi_req *public_req, MPI_Status *status) +{ + int ret; + struct _starpu_mpi_req *req = *public_req; + + _STARPU_MPI_LOG_IN(); + +#ifdef STARPU_SIMGRID + _STARPU_MPI_TRACE_UWAIT_BEGIN(req->node_tag.node.rank, req->node_tag.data_tag); + starpu_pthread_wait_t wait; + starpu_pthread_wait_init(&wait); + starpu_pthread_queue_register(&wait, &req->queue); + while (1) + { + starpu_pthread_wait_reset(&wait); + if (req->done) + break; + starpu_pthread_wait_wait(&wait); + } + starpu_pthread_queue_unregister(&wait, &req->queue); + starpu_pthread_wait_destroy(&wait); + _STARPU_MPI_TRACE_UWAIT_END(req->node_tag.node.rank, req->node_tag.data_tag); + + if (status) + *status = req->status_store; + _starpu_mpi_handle_request_termination(req); +#else + struct _starpu_mpi_req *waiting_req; + /* We cannot try to complete a MPI request that was not actually posted + * to MPI yet. */ + STARPU_PTHREAD_MUTEX_LOCK(&(req->backend->req_mutex)); + while (!(req->submitted)) + STARPU_PTHREAD_COND_WAIT(&(req->backend->req_cond), &(req->backend->req_mutex)); + STARPU_PTHREAD_MUTEX_UNLOCK(&(req->backend->req_mutex)); + + /* Initialize the request structure */ + _starpu_mpi_request_init(&waiting_req); + waiting_req->prio = INT_MAX; + waiting_req->status = status; + waiting_req->backend->other_request = req; + waiting_req->func = _starpu_mpi_wait_func; + waiting_req->request_type = WAIT_REQ; + + _STARPU_MPI_INC_POSTED_REQUESTS(waiting_req, 1); + _starpu_mpi_submit_ready_request(waiting_req); + + /* We wait for the MPI request to finish */ + STARPU_PTHREAD_MUTEX_LOCK(&req->backend->req_mutex); + while (!req->completed) + STARPU_PTHREAD_COND_WAIT(&req->backend->req_cond, &req->backend->req_mutex); + STARPU_PTHREAD_MUTEX_UNLOCK(&req->backend->req_mutex); + + /* The internal request structure was automatically allocated */ + _STARPU_MPI_INC_POSTED_REQUESTS(waiting_req, -1); + _starpu_mpi_request_destroy(waiting_req); +#endif + + *public_req = NULL; + if (req->backend->internal_req) + { + _starpu_mpi_request_destroy(req->backend->internal_req); + } + ret = req->ret; + _starpu_mpi_request_destroy(req); + + _STARPU_MPI_LOG_OUT(); +#ifdef STARPU_DEVEL +#warning see if we can return ret instead of 0 +#endif + (void)ret; + return 0; +} + +/********************************************************/ +/* */ +/* Test functionalities */ +/* */ +/********************************************************/ + +#ifndef STARPU_SIMGRID +void _starpu_mpi_test_func(struct _starpu_mpi_req *testing_req) +{ + _STARPU_MPI_LOG_IN(); + /* Which is the mpi request we are testing for ? */ + struct _starpu_mpi_req *req = testing_req->backend->other_request; + + _STARPU_MPI_DEBUG(0, "Test request %p type %s tag %"PRIi64" src %d data %p ptr %p datatype '%s' count %d registered_datatype %d \n", + req, _starpu_mpi_request_type(req->request_type), req->node_tag.data_tag, req->node_tag.node.rank, req->data_handle, req->ptr, + req->datatype_name, (int)req->count, req->registered_datatype); + + _STARPU_MPI_TRACE_UTESTING_BEGIN(req->node_tag.node.rank, req->node_tag.data_tag); + + req->ret = MPI_Test(&req->backend->data_request, testing_req->flag, testing_req->status); + + STARPU_MPI_ASSERT_MSG(req->ret == MPI_SUCCESS, "MPI_Test returning %s", _starpu_mpi_get_mpi_error_code(req->ret)); + + _STARPU_MPI_TRACE_UTESTING_END(req->node_tag.node.rank, req->node_tag.data_tag); + + if (*testing_req->flag) + { + testing_req->ret = req->ret; + _starpu_mpi_handle_request_termination(req); + } + + STARPU_PTHREAD_MUTEX_LOCK(&testing_req->backend->req_mutex); + testing_req->completed = 1; + STARPU_PTHREAD_COND_SIGNAL(&testing_req->backend->req_cond); + STARPU_PTHREAD_MUTEX_UNLOCK(&testing_req->backend->req_mutex); + _STARPU_MPI_LOG_OUT(); +} +#endif + +int _starpu_mpi_test(starpu_mpi_req *public_req, int *flag, MPI_Status *status) +{ + _STARPU_MPI_LOG_IN(); + int ret = 0; + + STARPU_MPI_ASSERT_MSG(public_req, "starpu_mpi_test needs a valid starpu_mpi_req"); + + struct _starpu_mpi_req *req = *public_req; + + STARPU_MPI_ASSERT_MSG(!req->detached, "MPI_Test cannot be called on a detached request"); + + STARPU_VALGRIND_YIELD(); + +#ifdef STARPU_SIMGRID + ret = req->ret = _starpu_mpi_simgrid_mpi_test(&req->done, flag); + if (*flag) + { + if (status) + *status = req->status_store; + _starpu_mpi_handle_request_termination(req); + } +#else + STARPU_PTHREAD_MUTEX_LOCK(&req->backend->req_mutex); + unsigned submitted = req->submitted; + STARPU_PTHREAD_MUTEX_UNLOCK(&req->backend->req_mutex); + + if (submitted) + { + struct _starpu_mpi_req *testing_req; + + /* Initialize the request structure */ + _starpu_mpi_request_init(&testing_req); + testing_req->prio = INT_MAX; + testing_req->flag = flag; + testing_req->status = status; + testing_req->backend->other_request = req; + testing_req->func = _starpu_mpi_test_func; + testing_req->completed = 0; + testing_req->request_type = TEST_REQ; + + _STARPU_MPI_INC_POSTED_REQUESTS(testing_req, 1); + _starpu_mpi_submit_ready_request(testing_req); + + /* We wait for the test request to finish */ + STARPU_PTHREAD_MUTEX_LOCK(&(testing_req->backend->req_mutex)); + while (!(testing_req->completed)) + STARPU_PTHREAD_COND_WAIT(&(testing_req->backend->req_cond), &(testing_req->backend->req_mutex)); + STARPU_PTHREAD_MUTEX_UNLOCK(&(testing_req->backend->req_mutex)); + + ret = testing_req->ret; + + _STARPU_MPI_INC_POSTED_REQUESTS(testing_req, -1); + _starpu_mpi_request_destroy(testing_req); + } + else + { + *flag = 0; + } +#endif + + if (*flag) + { + /* The request was completed so we free the internal + * request structure which was automatically allocated + * */ + *public_req = NULL; + if (req->backend->internal_req) + { + _starpu_mpi_request_destroy(req->backend->internal_req); + } + _starpu_mpi_request_destroy(req); + } + + _STARPU_MPI_LOG_OUT(); +#ifdef STARPU_DEVEL +#warning see if we can return ret instead of 0 +#endif + (void)ret; + return 0; +} + +/********************************************************/ +/* */ +/* Barrier functionalities */ +/* */ +/********************************************************/ + +static void _starpu_mpi_barrier_func(struct _starpu_mpi_req *barrier_req) +{ + _STARPU_MPI_LOG_IN(); + + /* FIXME: rather use MPI_Ibarrier and make it a detached request. + * We'd then be able to introduce starpu_mpi_ibarrier, and make + * starpu_mpi_barrier just call starpu_mpi_ibarrier(); starpu_mpi_wait(); + * That'll solve locking issue when intermixing starpu_mpi_barrier with + * other communications. + */ + barrier_req->ret = MPI_Barrier(barrier_req->node_tag.node.comm); + STARPU_MPI_ASSERT_MSG(barrier_req->ret == MPI_SUCCESS, "MPI_Barrier returning %s", _starpu_mpi_get_mpi_error_code(barrier_req->ret)); + + _starpu_mpi_handle_request_termination(barrier_req); + _STARPU_MPI_LOG_OUT(); +} + +int _starpu_mpi_barrier(MPI_Comm comm) +{ + struct _starpu_mpi_req *barrier_req; + + /* Initialize the request structure */ + _starpu_mpi_request_init(&barrier_req); + barrier_req->prio = INT_MAX; + barrier_req->func = _starpu_mpi_barrier_func; + barrier_req->request_type = BARRIER_REQ; + barrier_req->node_tag.node.comm = comm; + + _STARPU_MPI_INC_POSTED_REQUESTS(barrier_req, 1); + _starpu_mpi_submit_ready_request(barrier_req); + + /* We wait for the MPI request to finish */ + STARPU_PTHREAD_MUTEX_LOCK(&barrier_req->backend->req_mutex); + while (!barrier_req->completed) + STARPU_PTHREAD_COND_WAIT(&barrier_req->backend->req_cond, &barrier_req->backend->req_mutex); + STARPU_PTHREAD_MUTEX_UNLOCK(&barrier_req->backend->req_mutex); + + // posted_requests will be decreated in _starpu_mpi_handle_request_termination() called by _starpu_mpi_barrier_func() + _starpu_mpi_request_destroy(barrier_req); + _STARPU_MPI_LOG_OUT(); + + return 0; +} + +int _starpu_mpi_wait_for_all(MPI_Comm comm) +{ + (void) comm; + _STARPU_MPI_LOG_IN(); + + /* First wait for *both* all tasks and MPI requests to finish, in case + * some tasks generate MPI requests, MPI requests generate tasks, etc. + */ + STARPU_PTHREAD_MUTEX_LOCK(&progress_mutex); + STARPU_MPI_ASSERT_MSG(!mpi_wait_for_all_running, "Concurrent starpu_mpi_wait_for_all is not implemented, even on different communicators"); + mpi_wait_for_all_running = 1; + do + { + while (posted_requests)// || !_starpu_mpi_req_list_empty(&ready_recv_requests) || !_starpu_mpi_req_prio_list_empty(&ready_send_requests) || !_starpu_mpi_req_list_empty(&detached_requests) || _starpu_mpi_early_request_count() != 0 || _starpu_mpi_sync_data_count() != 0) + /* Wait for all current MPI requests to finish */ + STARPU_PTHREAD_COND_WAIT(&barrier_cond, &progress_mutex); + /* No current request, clear flag */ + newer_requests = 0; + STARPU_PTHREAD_MUTEX_UNLOCK(&progress_mutex); + /* Now wait for all tasks */ + starpu_task_wait_for_all(); + STARPU_PTHREAD_MUTEX_LOCK(&progress_mutex); + /* Check newer_requests again, in case some MPI requests + * triggered by tasks completed and triggered tasks between + * wait_for_all finished and we take the lock */ + } while (posted_requests || newer_requests);// || !_starpu_mpi_req_list_empty(&ready_recv_requests) || !_starpu_mpi_req_prio_list_empty(&ready_send_requests) || !_starpu_mpi_req_list_empty(&detached_requests) || _starpu_mpi_early_request_count() != 0 || _starpu_mpi_sync_data_count() != 0 ); + mpi_wait_for_all_running = 0; + STARPU_PTHREAD_MUTEX_UNLOCK(&progress_mutex); + return 0; +} + +/********************************************************/ +/* */ +/* Progression */ +/* */ +/********************************************************/ + +static void _starpu_mpi_handle_request_termination(struct _starpu_mpi_req *req) +{ + _STARPU_MPI_LOG_IN(); + + _STARPU_MPI_DEBUG(2, "complete MPI request %p type %s tag %"PRIi64" src %d data %p ptr %p datatype '%s' count %d registered_datatype %d internal_req %p\n", + req, _starpu_mpi_request_type(req->request_type), req->node_tag.data_tag, req->node_tag.node.rank, req->data_handle, req->ptr, + req->datatype_name, (int)req->count, req->registered_datatype, req->backend->internal_req); + + if (req->backend->internal_req) + { + _starpu_mpi_early_data_delete(req->backend->early_data_handle); + } + else + { + if (req->request_type == RECV_REQ || req->request_type == SEND_REQ) + { + if (req->request_type == SEND_REQ) + { + // We need to make sure the communication for sending the size + // has completed, as MPI can re-order messages, let's call + // MPI_Wait to make sure data have been sent + int ret; + ret = MPI_Wait(&req->backend->size_req, MPI_STATUS_IGNORE); + STARPU_MPI_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Wait returning %s", _starpu_mpi_get_mpi_error_code(ret)); + } + if (req->registered_datatype == 0) + { + if (req->request_type == SEND_REQ) + { + starpu_free_on_node_flags(req->node, (uintptr_t)req->ptr, req->count, 0); + req->ptr = NULL; + } + else if (req->request_type == RECV_REQ) + { + if (starpu_data_get_interface_ops(req->data_handle)->peek_data) + { + starpu_data_peek_node(req->data_handle, req->node, req->ptr, req->count); + starpu_free_on_node_flags(req->node, (uintptr_t)req->ptr, req->count, 0); + } + else + { + starpu_data_unpack_node(req->data_handle, req->node, req->ptr, req->count); + } + starpu_memory_deallocate(req->node, req->count); + } + } + else + { + _starpu_mpi_datatype_free(req->data_handle, &req->datatype); + } + } + // for recv requests, this event is the end of the communication link: + _STARPU_MPI_TRACE_TERMINATED(req); + } + + _starpu_mpi_release_req_data(req); + + if (req->backend->envelope) + { + free(req->backend->envelope); + req->backend->envelope = NULL; + } + + /* Execute the specified callback, if any */ + if (req->callback) + req->callback(req->callback_arg); + + _STARPU_MPI_INC_POSTED_REQUESTS(req, -1); + + /* tell anyone potentially waiting on the request that it is + * terminated now */ + STARPU_PTHREAD_MUTEX_LOCK(&req->backend->req_mutex); + req->completed = 1; + STARPU_PTHREAD_COND_BROADCAST(&req->backend->req_cond); + STARPU_PTHREAD_MUTEX_UNLOCK(&req->backend->req_mutex); + _STARPU_MPI_LOG_OUT(); +} + +/* This is called when the data is now received in the early data handle, we can + * now copy it over to the real handle. */ +static void _starpu_mpi_early_data_cb(void* arg) +{ + struct _starpu_mpi_early_data_cb_args *args = arg; + + if (args->buffer) + { + /* Data has been received as a raw memory, it has to be unpacked */ + struct starpu_data_interface_ops *itf_src = starpu_data_get_interface_ops(args->early_handle); + struct starpu_data_interface_ops *itf_dst = starpu_data_get_interface_ops(args->data_handle); + MPI_Datatype datatype = _starpu_mpi_datatype_get_user_defined_datatype(args->data_handle, args->req->node); + + if (datatype) + { + int position=0; + void *ptr = starpu_data_handle_to_pointer(args->data_handle, args->req->node); + MPI_Unpack(args->buffer, itf_src->get_size(args->early_handle), &position, ptr, 1, datatype, args->req->node_tag.node.comm); + starpu_free_on_node_flags(args->buffer_node, (uintptr_t) args->buffer, args->size, 0); + args->buffer = NULL; + _starpu_mpi_datatype_free(args->data_handle, &datatype); + } + else + { + STARPU_MPI_ASSERT_MSG(itf_dst->peek_data || itf_dst->unpack_data , "The data interface does not define an unpack function\n"); + // FIXME: Actually we may not want unpack_data to free the buffer, for the case when we are participating to a collective send + if (itf_dst->peek_data) + { + itf_dst->peek_data(args->data_handle, args->req->node, args->buffer, itf_src->get_size(args->early_handle)); + starpu_free_on_node_flags(args->buffer_node, (uintptr_t) args->buffer, itf_src->get_size(args->early_handle), 0); + } + else + itf_dst->unpack_data(args->data_handle, args->req->node, args->buffer, itf_src->get_size(args->early_handle)); + args->buffer = NULL; + } + } + else + { + struct starpu_data_interface_ops *itf = starpu_data_get_interface_ops(args->early_handle); + void* itf_src = starpu_data_get_interface_on_node(args->early_handle, args->buffer_node); + void* itf_dst = starpu_data_get_interface_on_node(args->data_handle, args->req->node); + + if (!itf->copy_methods->ram_to_ram) + { + _STARPU_MPI_DEBUG(3, "Initiating any_to_any copy..\n"); + itf->copy_methods->any_to_any(itf_src, args->buffer_node, itf_dst, args->req->node, NULL); + } + else + { + _STARPU_MPI_DEBUG(3, "Initiating ram_to_ram copy..\n"); + itf->copy_methods->ram_to_ram(itf_src, args->buffer_node, itf_dst, args->req->node); + } + } + + _STARPU_MPI_DEBUG(3, "Done, handling release of early_handle..\n"); + starpu_data_release_on_node(args->early_handle, args->buffer_node); + + _STARPU_MPI_DEBUG(3, "Done, handling unregister of early_handle..\n"); + /* XXX: note that we have already freed the registered buffer above. In + * principle that's unsafe. As of now it is fine because StarPU has no + reason to access it. */ + starpu_data_unregister_submit(args->early_handle); + + _STARPU_MPI_DEBUG(3, "Done, handling request %p termination of the already received request\n",args->req); + // If the request is detached, we need to call _starpu_mpi_handle_request_termination + // as it will not be called automatically as the request is not in the list detached_requests + if (args->req->detached) + { + /* have the internal request destroyed now or when completed */ + STARPU_PTHREAD_MUTEX_LOCK(&args->req->backend->internal_req->backend->req_mutex); + if (args->req->backend->internal_req->backend->to_destroy) + { + /* The request completed first, can now destroy it */ + STARPU_PTHREAD_MUTEX_UNLOCK(&args->req->backend->internal_req->backend->req_mutex); + _starpu_mpi_request_destroy(args->req->backend->internal_req); + } + else + { + /* The request didn't complete yet, tell it to destroy it when it completes */ + args->req->backend->internal_req->backend->to_destroy = 1; + STARPU_PTHREAD_MUTEX_UNLOCK(&args->req->backend->internal_req->backend->req_mutex); + } + _starpu_mpi_handle_request_termination(args->req); + _starpu_mpi_request_destroy(args->req); + } + else + { + // else: If the request is not detached its termination will + // be handled when calling starpu_mpi_wait + // We store in the application request the internal MPI + // request so that it can be used by starpu_mpi_wait + args->req->backend->data_request = args->req->backend->internal_req->backend->data_request; + STARPU_PTHREAD_MUTEX_LOCK(&args->req->backend->req_mutex); + args->req->submitted = 1; + STARPU_PTHREAD_COND_BROADCAST(&args->req->backend->req_cond); + STARPU_PTHREAD_MUTEX_UNLOCK(&args->req->backend->req_mutex); +#ifdef STARPU_SIMGRID + args->req->done = 1; +#endif + } + + free(args); + args = NULL; +} + +// We suppose progress_mutex is locked +static void _starpu_mpi_test_detached_requests(void) +{ + //_STARPU_MPI_LOG_IN(); + int flag; + struct _starpu_mpi_req *req; + + if (_starpu_mpi_req_list_empty(&detached_requests)) + { + //_STARPU_MPI_LOG_OUT(); + return; + } + + _STARPU_MPI_TRACE_TESTING_DETACHED_BEGIN(); + req = _starpu_mpi_req_list_begin(&detached_requests); + while (req != _starpu_mpi_req_list_end(&detached_requests)) + { + STARPU_PTHREAD_MUTEX_UNLOCK(&progress_mutex); + + _STARPU_MPI_TRACE_TEST_BEGIN(req->node_tag.node.rank, req->node_tag.data_tag); + //_STARPU_MPI_DEBUG(3, "Test detached request %p - mpitag %"PRIi64" - TYPE %s %d\n", &req->backend->data_request, req->node_tag.data_tag, _starpu_mpi_request_type(req->request_type), req->node_tag.node.rank); +#ifdef STARPU_SIMGRID + req->ret = _starpu_mpi_simgrid_mpi_test(&req->done, &flag); +#else + STARPU_MPI_ASSERT_MSG(req->backend->data_request != MPI_REQUEST_NULL, "Cannot test completion of the request MPI_REQUEST_NULL"); + req->ret = MPI_Test(&req->backend->data_request, &flag, MPI_STATUS_IGNORE); +#endif + + STARPU_MPI_ASSERT_MSG(req->ret == MPI_SUCCESS, "MPI_Test returning %s", _starpu_mpi_get_mpi_error_code(req->ret)); + _STARPU_MPI_TRACE_TEST_END(req->node_tag.node.rank, req->node_tag.data_tag); + + if (!flag) + { + req = _starpu_mpi_req_list_next(req); + } + else + { + _STARPU_MPI_TRACE_POLLING_END(); + struct _starpu_mpi_req *next_req; + next_req = _starpu_mpi_req_list_next(req); + + _STARPU_MPI_TRACE_COMPLETE_BEGIN(req->request_type, req->node_tag.node.rank, req->node_tag.data_tag); + + _starpu_mpi_handle_request_termination(req); + + STARPU_PTHREAD_MUTEX_LOCK(&progress_mutex); + if (req->request_type == SEND_REQ && ndetached_send_requests_max > 0) + // if ndetached_send_requests_max == 0, we don't limit the number of concurrent MPI send requests + ndetached_send_requests--; + _starpu_mpi_req_list_erase(&detached_requests, req); + STARPU_PTHREAD_MUTEX_UNLOCK(&progress_mutex); + + _STARPU_MPI_TRACE_COMPLETE_END(req->request_type, req->node_tag.node.rank, req->node_tag.data_tag); + + STARPU_PTHREAD_MUTEX_LOCK(&req->backend->req_mutex); + /* We don't want to free internal non-detached + requests, we need to get their MPI request before + destroying them */ + if (req->backend->is_internal_req && !req->backend->to_destroy) + { + /* We have completed the request, let the application request destroy it */ + req->backend->to_destroy = 1; + STARPU_PTHREAD_MUTEX_UNLOCK(&req->backend->req_mutex); + } + else + { + STARPU_PTHREAD_MUTEX_UNLOCK(&req->backend->req_mutex); + _starpu_mpi_request_destroy(req); + } + + req = next_req; + _STARPU_MPI_TRACE_POLLING_BEGIN(); + } + + STARPU_PTHREAD_MUTEX_LOCK(&progress_mutex); + } + _STARPU_MPI_TRACE_TESTING_DETACHED_END(); + + //_STARPU_MPI_LOG_OUT(); +} + +static void _starpu_mpi_handle_detached_request(struct _starpu_mpi_req *req) +{ + if (req->detached) + { + STARPU_PTHREAD_MUTEX_LOCK(&progress_mutex); + + if (req->request_type == SEND_REQ && ndetached_send_requests_max > 0) + // if ndetached_send_requests_max == 0, we don't limit the number of concurrent MPI send requests + ndetached_send_requests++; + + /* put the submitted request into the list of pending requests + * so that it can be handled by the progression mechanisms */ + _starpu_mpi_req_list_push_back(&detached_requests, req); + + STARPU_PTHREAD_COND_SIGNAL(&progress_cond); + STARPU_PTHREAD_MUTEX_UNLOCK(&progress_mutex); + } +} + +static void _starpu_mpi_handle_ready_request(struct _starpu_mpi_req *req) +{ + _STARPU_MPI_LOG_IN(); + STARPU_MPI_ASSERT_MSG(req, "Invalid request"); + + /* submit the request to MPI */ + _STARPU_MPI_DEBUG(2, "Handling new request %p type %s tag %"PRIi64" src %d data %p ptr %p datatype '%s' count %d registered_datatype %d \n", + req, _starpu_mpi_request_type(req->request_type), req->node_tag.data_tag, req->node_tag.node.rank, req->data_handle, + req->ptr, req->datatype_name, (int)req->count, req->registered_datatype); + + /* Set GPU device for current request if GPU Direct is supported */ + if (_starpu_mpi_has_cuda) + { + int mem_node = req->node; + if (mem_node >= 0) + { + enum starpu_node_kind node_kind = starpu_node_get_kind (mem_node); + switch (node_kind) + { +#ifdef STARPU_USE_CUDA + case STARPU_CUDA_RAM: + if (_starpu_mpi_cuda_devid == -1 && starpu_cuda_worker_get_count() > 1) + cudaSetDevice(starpu_memory_node_get_devid(mem_node)); + break; +#endif + default: + break; + } + } + } + + req->func(req); + + _STARPU_MPI_LOG_OUT(); +} + +static void _starpu_mpi_receive_early_data(struct _starpu_mpi_envelope *envelope, MPI_Status status, MPI_Comm comm) +{ + _STARPU_MPI_DEBUG(20, "Request with tag %"PRIi64" and source %d not found, creating a early_data_handle to receive incoming data..\n", envelope->data_tag, status.MPI_SOURCE); + _STARPU_MPI_DEBUG(20, "Request sync %d\n", envelope->sync); + + struct _starpu_mpi_early_data_handle* early_data_handle = _starpu_mpi_early_data_create(envelope, status.MPI_SOURCE, comm); + _starpu_mpi_early_data_add(early_data_handle); + + starpu_data_handle_t data_handle; + data_handle = _starpu_mpi_tag_get_data_handle_from_tag(envelope->data_tag); + + // TODO: rather select some memory node next to the NIC + unsigned buffer_node = STARPU_MAIN_RAM; + if (data_handle && starpu_data_get_interface_id(data_handle) < STARPU_MAX_INTERFACE_ID && !early_data_force_allocate) + { + /* We know which data will receive it and we won't have to unpack, use just the same kind of data. */ + early_data_handle->buffer = NULL; + early_data_handle->buffer_node = buffer_node; + starpu_data_register_same(&early_data_handle->handle, data_handle); + //_starpu_mpi_early_data_add(early_data_handle); + } + else + { + /* The application has not registered yet a data with the tag, + * we are going to receive the data as a raw memory, and give it + * to the application when it post a receive for this tag + */ + _STARPU_MPI_DEBUG(3, "Posting a receive for a data of size %d which has not yet been registered\n", (int)envelope->size); + early_data_handle->buffer = (void *)starpu_malloc_on_node_flags(buffer_node, envelope->size, 0); + early_data_handle->size = envelope->size; + early_data_handle->buffer_node = buffer_node; + starpu_variable_data_register(&early_data_handle->handle, buffer_node, (uintptr_t) early_data_handle->buffer, envelope->size); + //_starpu_mpi_early_data_add(early_data_handle); + } + + _STARPU_MPI_DEBUG(20, "Posting internal detached irecv on early_data_handle with tag %"PRIi64" from comm %ld src %d ..\n", + early_data_handle->node_tag.data_tag, (long int)comm, status.MPI_SOURCE); + STARPU_PTHREAD_MUTEX_UNLOCK(&progress_mutex); + early_data_handle->req = _starpu_mpi_irecv_common(early_data_handle->handle, status.MPI_SOURCE, + early_data_handle->node_tag.data_tag, comm, 1, 0, + NULL, NULL, 1, 1, envelope->size, STARPU_DEFAULT_PRIO); + /* The early data handle is ready, we can let _starpu_mpi_submit_ready_request + * proceed with acquiring it */ + STARPU_PTHREAD_MUTEX_UNLOCK(&early_data_mutex); + + STARPU_PTHREAD_MUTEX_LOCK(&progress_mutex); + // We wait until the request is pushed in the + // ready_request list + while (!(early_data_handle->req->posted)) + STARPU_PTHREAD_COND_WAIT(&(early_data_handle->req->backend->posted_cond), &progress_mutex); + + // Handle the request immediately to make sure the mpi_irecv is + // posted before receiving an other envelope + _starpu_mpi_req_list_erase(&ready_recv_requests, early_data_handle->req); + STARPU_PTHREAD_MUTEX_UNLOCK(&progress_mutex); + _starpu_mpi_handle_ready_request(early_data_handle->req); + STARPU_PTHREAD_MUTEX_LOCK(&progress_mutex); +} + +static void *_starpu_mpi_progress_thread_func(void *arg) +{ + struct _starpu_mpi_argc_argv *argc_argv = (struct _starpu_mpi_argc_argv *) arg; + + starpu_pthread_setname("MPI"); + + _starpu_mpi_env_init(); + +#ifndef STARPU_SIMGRID + if (_starpu_mpi_thread_cpuid < 0) + { + _starpu_mpi_thread_cpuid = starpu_get_next_bindid(STARPU_THREAD_ACTIVE, NULL, 0); + } + + if (!_starpu_mpi_nobind && starpu_bind_thread_on(_starpu_mpi_thread_cpuid, STARPU_THREAD_ACTIVE, "MPI") < 0) + { + char hostname[65]; + gethostname(hostname, sizeof(hostname)); + _STARPU_DISP("[%s] No core was available for the MPI thread. You should use STARPU_RESERVE_NCPU to leave one core available for MPI, or specify one core less in STARPU_NCPU\n", hostname); + } + _starpu_mpi_do_initialize(argc_argv); + if (!_starpu_mpi_nobind && _starpu_mpi_thread_cpuid >= 0) + /* In case MPI changed the binding */ + starpu_bind_thread_on(_starpu_mpi_thread_cpuid, STARPU_THREAD_ACTIVE, "MPI"); +#else + /* Now that MPI is set up, let the rest of simgrid get initialized */ + char **argv_cpy; + _STARPU_MPI_MALLOC(argv_cpy, *(argc_argv->argc) * sizeof(char*)); + int i; + for (i = 0; i < *(argc_argv->argc); i++) + argv_cpy[i] = strdup((*(argc_argv->argv))[i]); + void **tsd; + _STARPU_CALLOC(tsd, MAX_TSD + 1, sizeof(void*)); +#if defined(HAVE_SG_ACTOR_DATA) || defined(HAVE_SG_ACTOR_SET_DATA) + _starpu_simgrid_actor_create("main", _starpu_smpi_simulated_main, _starpu_simgrid_get_host_by_name("MAIN"), *(argc_argv->argc), argv_cpy); + /* And set TSD for us */ +#ifdef HAVE_SG_ACTOR_SET_DATA + sg_actor_set_data(sg_actor_self(), tsd); +#else + sg_actor_data_set(sg_actor_self(), tsd); +#endif +#else + MSG_process_create_with_arguments("main", _starpu_smpi_simulated_main, NULL, _starpu_simgrid_get_host_by_name("MAIN"), *(argc_argv->argc), argv_cpy); + /* And set TSD for us */ + if (!smpi_process_set_user_data) + { + _STARPU_ERROR("Your version of simgrid does not provide smpi_process_set_user_data, we can not continue without it\n"); + } + smpi_process_set_user_data(tsd); +#endif + /* And wait for StarPU to get initialized, to come back to the same + * situation as native execution where that's always the case. */ + starpu_wait_initialized(); +#endif + + _starpu_mpi_comm_amounts_init(argc_argv->comm); + _starpu_mpi_cache_init(argc_argv->comm); + _starpu_mpi_select_node_init(); + _starpu_mpi_tag_init(); + _starpu_mpi_comm_init(argc_argv->comm); + _starpu_mpi_tags_init(); + + _starpu_mpi_early_request_init(); + _starpu_mpi_early_data_init(); + _starpu_mpi_sync_data_init(); + _starpu_mpi_datatype_init(); + + if (mpi_driver) + starpu_driver_init(mpi_driver); + +#ifdef STARPU_SIMGRID + starpu_pthread_wait_init(&_starpu_mpi_thread_wait); + starpu_pthread_queue_init(&_starpu_mpi_thread_dontsleep); + starpu_pthread_queue_register(&_starpu_mpi_thread_wait, &_starpu_mpi_thread_dontsleep); +#endif + +#ifdef STARPU_USE_FXT + _starpu_mpi_fxt_init(argc_argv); +#endif + + /* notify the main thread that the progression thread is ready */ + STARPU_PTHREAD_MUTEX_LOCK(&progress_mutex); + running = 1; + STARPU_PTHREAD_COND_SIGNAL(&progress_cond); + + int envelope_request_submitted = 0; + int mpi_driver_loop_counter = 0; + int mpi_driver_task_counter = 0; + _STARPU_MPI_TRACE_POLLING_BEGIN(); + + while (running || posted_requests || !(_starpu_mpi_req_list_empty(&ready_recv_requests)) || !(_starpu_mpi_req_prio_list_empty(&ready_send_requests)) || !(_starpu_mpi_req_list_empty(&detached_requests))) + { +#ifdef STARPU_SIMGRID + starpu_pthread_wait_reset(&_starpu_mpi_thread_wait); +#endif + /* shall we block ? */ + unsigned block = _starpu_mpi_req_list_empty(&ready_recv_requests) && _starpu_mpi_req_prio_list_empty(&ready_send_requests) && _starpu_mpi_early_request_count() == 0 && _starpu_mpi_sync_data_count() == 0 && _starpu_mpi_req_list_empty(&detached_requests); + + if (block) + { + //_STARPU_MPI_DEBUG(3, "NO MORE REQUESTS TO HANDLE\n"); + _STARPU_MPI_TRACE_SLEEP_BEGIN(); + + /* Notify mpi_barrier */ + STARPU_PTHREAD_COND_SIGNAL(&barrier_cond); + } +#ifdef STARPU_USE_MPI_FT + block = block && !starpu_mpi_ft_busy(); +#endif // STARPU_USE_MPI_FT + if (block) + { + //_STARPU_MPI_DEBUG(3, "NO MORE REQUESTS TO HANDLE\n"); + _STARPU_MPI_TRACE_SLEEP_BEGIN(); + + STARPU_PTHREAD_COND_WAIT(&progress_cond, &progress_mutex); + + _STARPU_MPI_TRACE_SLEEP_END(); + } + + /* get one recv request */ + unsigned n = 0; + while (!_starpu_mpi_req_list_empty(&ready_recv_requests)) + { + _STARPU_MPI_TRACE_POLLING_END(); + struct _starpu_mpi_req *req; + + if (n++ == nready_process) + /* Already spent some time on submitting ready recv requests, poll before processing more ready recv requests */ + break; + + req = _starpu_mpi_req_list_pop_back(&ready_recv_requests); + + /* handling a request is likely to block for a while + * (on a sync_data_with_mem call), we want to let the + * application submit requests in the meantime, so we + * release the lock. */ + STARPU_PTHREAD_MUTEX_UNLOCK(&progress_mutex); + _starpu_mpi_handle_ready_request(req); + STARPU_PTHREAD_MUTEX_LOCK(&progress_mutex); + } + + /* get one send request */ + n = 0; + while (!_starpu_mpi_req_prio_list_empty(&ready_send_requests) && (ndetached_send_requests_max == 0 || ndetached_send_requests < ndetached_send_requests_max)) + { + struct _starpu_mpi_req *req; + + if (n++ == nready_process) + /* Already spent some time on submitting ready send requests, poll before processing more ready send requests */ + break; + + req = _starpu_mpi_req_prio_list_pop_back_highest(&ready_send_requests); + + /* handling a request is likely to block for a while + * (on a sync_data_with_mem call), we want to let the + * application submit requests in the meantime, so we + * release the lock. */ + STARPU_PTHREAD_MUTEX_UNLOCK(&progress_mutex); + _starpu_mpi_handle_ready_request(req); + STARPU_PTHREAD_MUTEX_LOCK(&progress_mutex); + } + + _STARPU_MPI_TRACE_POLLING_BEGIN(); + + /* If there is no currently submitted envelope_request submitted to + * catch envelopes from senders, and there is some pending + * receive requests on our side, we resubmit a header request. */ + if (((_starpu_mpi_early_request_count() > 0) || (_starpu_mpi_sync_data_count() > 0)) && (envelope_request_submitted == 0))// && (HASH_COUNT(_starpu_mpi_early_data_handle_hashmap) == 0)) + { + _starpu_mpi_comm_post_recv(); + envelope_request_submitted = 1; + } + + /* test whether there are some terminated "detached request" */ + _starpu_mpi_test_detached_requests(); + + if (envelope_request_submitted == 1) + { + int flag; + struct _starpu_mpi_envelope *envelope; + MPI_Status envelope_status; + MPI_Comm envelope_comm; + + /* test whether an envelope has arrived. */ + flag = _starpu_mpi_comm_test_recv(&envelope_status, &envelope, &envelope_comm); + + if (flag) + { + _STARPU_MPI_TRACE_POLLING_END(); + _STARPU_MPI_COMM_FROM_DEBUG(envelope, sizeof(struct _starpu_mpi_envelope), MPI_BYTE, envelope_status.MPI_SOURCE, _STARPU_MPI_TAG_ENVELOPE, envelope->data_tag, envelope_comm); + _STARPU_MPI_DEBUG(4, "Envelope received with mode %d\n", envelope->mode); + if (envelope->mode == _STARPU_MPI_ENVELOPE_SYNC_READY) + { + struct _starpu_mpi_req *_sync_req = _starpu_mpi_sync_data_find(envelope->data_tag, envelope_status.MPI_SOURCE, envelope_comm); + _STARPU_MPI_DEBUG(20, "Sending data with tag %"PRIi64" to node %d\n", _sync_req->node_tag.data_tag, envelope_status.MPI_SOURCE); + STARPU_MPI_ASSERT_MSG(envelope->data_tag == _sync_req->node_tag.data_tag, "Tag mismatch (envelope %"PRIi64" != req %"PRIi64")\n", + envelope->data_tag, _sync_req->node_tag.data_tag); + STARPU_PTHREAD_MUTEX_UNLOCK(&progress_mutex); + _starpu_mpi_isend_data_func(_sync_req); + STARPU_PTHREAD_MUTEX_LOCK(&progress_mutex); + } + else + { + _STARPU_MPI_DEBUG(3, "Searching for application request with tag %"PRIi64" and source %d (size %ld)\n", envelope->data_tag, envelope_status.MPI_SOURCE, envelope->size); + + STARPU_PTHREAD_MUTEX_UNLOCK(&progress_mutex); + STARPU_PTHREAD_MUTEX_LOCK(&early_data_mutex); + STARPU_PTHREAD_MUTEX_LOCK(&progress_mutex); + struct _starpu_mpi_req *early_request = _starpu_mpi_early_request_dequeue(envelope->data_tag, envelope_status.MPI_SOURCE, envelope_comm); + + /* Case: a data will arrive before a matching receive is + * posted by the application. Create a temporary handle to + * store the incoming data, submit a starpu_mpi_irecv_detached + * on this handle, and store it as an early_data + */ + if (early_request == NULL) + { + if (envelope->sync) + { + _STARPU_MPI_DEBUG(2000, "-------------------------> adding request for tag %"PRIi64"\n", envelope->data_tag); + struct _starpu_mpi_req *new_req; +#ifdef STARPU_DEVEL +#warning creating a request is not really useful. +#endif + /* Initialize the request structure */ + _starpu_mpi_request_init(&new_req); + new_req->request_type = RECV_REQ; + new_req->data_handle = NULL; + new_req->node_tag.node.rank = envelope_status.MPI_SOURCE; + new_req->node_tag.data_tag = envelope->data_tag; + new_req->node_tag.node.comm = envelope_comm; + new_req->detached = 1; + new_req->sync = 1; + new_req->callback = NULL; + new_req->callback_arg = NULL; + new_req->func = _starpu_mpi_irecv_size_func; + new_req->sequential_consistency = 1; + new_req->backend->is_internal_req = 0; // ???? + new_req->count = envelope->size; + _starpu_mpi_sync_data_add(new_req); + /* We have queued our sync request, we can let _starpu_mpi_submit_ready_request find it */ + STARPU_PTHREAD_MUTEX_UNLOCK(&early_data_mutex); + } + else + { + /* This will release early_data_mutex when appropriate */ + _starpu_mpi_receive_early_data(envelope, envelope_status, envelope_comm); + } + } + /* Case: a matching application request has been found for + * the incoming data, we handle the correct allocation + * of the pointer associated to the data handle, then + * submit the corresponding receive with + * _starpu_mpi_handle_ready_request. */ + else + { + /* Got the early request */ + STARPU_PTHREAD_MUTEX_UNLOCK(&early_data_mutex); + _STARPU_MPI_DEBUG(2000, "A matching application request has been found for the incoming data with tag %"PRIi64"\n", envelope->data_tag); + _STARPU_MPI_DEBUG(2000, "Request sync %d\n", envelope->sync); + + early_request->sync = envelope->sync; + _starpu_mpi_datatype_allocate(early_request->data_handle, early_request); + if (early_request->registered_datatype == 1) + { + early_request->count = 1; + early_request->ptr = starpu_data_handle_to_pointer(early_request->data_handle, early_request->node); + } + else + { + early_request->count = envelope->size; + early_request->ptr = (void *)starpu_malloc_on_node_flags(early_request->node, early_request->count, 0); + starpu_memory_allocate(early_request->node, early_request->count, STARPU_MEMORY_OVERFLOW); + + STARPU_MPI_ASSERT_MSG(early_request->ptr, "cannot allocate message of size %ld\n", early_request->count); + } + + _STARPU_MPI_DEBUG(3, "Handling new request... \n"); + /* handling a request is likely to block for a while + * (on a sync_data_with_mem call), we want to let the + * application submit requests in the meantime, so we + * release the lock. */ + STARPU_PTHREAD_MUTEX_UNLOCK(&progress_mutex); + _starpu_mpi_handle_ready_request(early_request); + STARPU_PTHREAD_MUTEX_LOCK(&progress_mutex); + } + } + envelope_request_submitted = 0; + _STARPU_MPI_TRACE_POLLING_BEGIN(); + } + else + { + /* A call is made to driver_run_once only when + * the progression thread have gone through the + * communication progression loop + * mpi_driver_call_freq times. It is + * interesting to tune the + * STARPU_MPI_DRIVER_CALL_FREQUENCY + * depending on whether the user wants + * reactivity or computing power from the MPI + * progression thread. */ + if (mpi_driver && (++mpi_driver_loop_counter == mpi_driver_call_freq)) + { + mpi_driver_loop_counter = 0; + mpi_driver_task_counter = 0; + while (mpi_driver_task_counter++ < mpi_driver_task_freq) + { + _STARPU_MPI_TRACE_DRIVER_RUN_BEGIN(); + STARPU_PTHREAD_MUTEX_UNLOCK(&progress_mutex); + _STARPU_MPI_DEBUG(4, "running once mpi driver\n"); + starpu_driver_run_once(mpi_driver); + STARPU_PTHREAD_MUTEX_LOCK(&progress_mutex); + _STARPU_MPI_TRACE_DRIVER_RUN_END(); + } + } + + //_STARPU_MPI_DEBUG(4, "Nothing received, continue ..\n"); + } + } +#ifdef STARPU_USE_MPI_FT + STARPU_PTHREAD_MUTEX_UNLOCK(&progress_mutex); + starpu_mpi_ft_progress(); + STARPU_PTHREAD_MUTEX_LOCK(&progress_mutex); +#endif // STARPU_USE_MPI_FT +#ifdef STARPU_SIMGRID + STARPU_PTHREAD_MUTEX_UNLOCK(&progress_mutex); + starpu_pthread_wait_wait(&_starpu_mpi_thread_wait); + STARPU_PTHREAD_MUTEX_LOCK(&progress_mutex); +#endif + // We release the lock to avoid monopolizing it while polling for terminations + STARPU_PTHREAD_MUTEX_UNLOCK(&progress_mutex); + STARPU_VALGRIND_YIELD(); + STARPU_PTHREAD_MUTEX_LOCK(&progress_mutex); + } + + _STARPU_MPI_TRACE_POLLING_END(); + if (envelope_request_submitted) + { + _starpu_mpi_comm_cancel_recv(); + envelope_request_submitted = 0; + } + + +#ifdef STARPU_SIMGRID + STARPU_PTHREAD_MUTEX_LOCK(&wait_counter_mutex); + while (wait_counter != 0) + STARPU_PTHREAD_COND_WAIT(&wait_counter_cond, &wait_counter_mutex); + STARPU_PTHREAD_MUTEX_UNLOCK(&wait_counter_mutex); + + STARPU_PTHREAD_MUTEX_DESTROY(&wait_counter_mutex); + STARPU_PTHREAD_COND_DESTROY(&wait_counter_cond); + + starpu_pthread_queue_unregister(&_starpu_mpi_thread_wait, &_starpu_mpi_thread_dontsleep); + starpu_pthread_queue_destroy(&_starpu_mpi_thread_dontsleep); + starpu_pthread_wait_destroy(&_starpu_mpi_thread_wait); +#endif + + STARPU_MPI_ASSERT_MSG(_starpu_mpi_req_list_empty(&detached_requests), "List of detached requests not empty"); + STARPU_MPI_ASSERT_MSG(ndetached_send_requests == 0, "Number of detached send requests not 0"); + STARPU_MPI_ASSERT_MSG(_starpu_mpi_req_list_empty(&ready_recv_requests), "List of ready requests not empty"); + STARPU_MPI_ASSERT_MSG(_starpu_mpi_req_prio_list_empty(&ready_send_requests), "List of ready requests not empty"); + STARPU_MPI_ASSERT_MSG(posted_requests == 0, "Number of posted request is not zero"); + _starpu_mpi_early_request_check_termination(); + _starpu_mpi_early_data_check_termination(); + _starpu_mpi_sync_data_check_termination(); + _starpu_mpi_req_prio_list_deinit(&ready_send_requests); + +#ifdef STARPU_USE_FXT + _starpu_mpi_fxt_shutdown(); +#endif + + if (argc_argv->initialize_mpi) + { + _STARPU_MPI_DEBUG(0, "Calling MPI_Finalize()\n"); + MPI_Finalize(); + } + + STARPU_PTHREAD_MUTEX_UNLOCK(&progress_mutex); + + _starpu_mpi_sync_data_shutdown(); + _starpu_mpi_early_data_shutdown(); + _starpu_mpi_early_request_shutdown(); + _starpu_mpi_datatype_shutdown(); + free(argc_argv); + + return NULL; +} + +int _starpu_mpi_progress_init(struct _starpu_mpi_argc_argv *argc_argv) +{ + STARPU_PTHREAD_MUTEX_INIT(&progress_mutex, NULL); + STARPU_PTHREAD_MUTEX_INIT(&early_data_mutex, NULL); + STARPU_PTHREAD_COND_INIT(&progress_cond, NULL); + STARPU_PTHREAD_COND_INIT(&barrier_cond, NULL); + _starpu_mpi_req_list_init(&ready_recv_requests); + _starpu_mpi_req_prio_list_init(&ready_send_requests); + + _starpu_mpi_req_list_init(&detached_requests); + + STARPU_PTHREAD_MUTEX_INIT(&posted_requests_mutex, NULL); + + nready_process = starpu_getenv_number_default("STARPU_MPI_NREADY_PROCESS", 10); + ndetached_send_requests_max = starpu_getenv_number_default("STARPU_MPI_NDETACHED_SEND", 10); + early_data_force_allocate = starpu_getenv_number_default("STARPU_MPI_EARLYDATA_ALLOCATE", 0); + +#ifdef STARPU_SIMGRID + STARPU_PTHREAD_MUTEX_INIT(&wait_counter_mutex, NULL); + STARPU_PTHREAD_COND_INIT(&wait_counter_cond, NULL); +#endif + +#ifdef STARPU_SIMGRID + _starpu_mpi_progress_thread_func(argc_argv); + return 0; +#else + STARPU_PTHREAD_CREATE(&progress_thread, NULL, _starpu_mpi_progress_thread_func, argc_argv); + + STARPU_PTHREAD_MUTEX_LOCK(&progress_mutex); + while (!running) + STARPU_PTHREAD_COND_WAIT(&progress_cond, &progress_mutex); + STARPU_PTHREAD_MUTEX_UNLOCK(&progress_mutex); + + return 0; +#endif +} + +#ifdef STARPU_SIMGRID +void _starpu_mpi_wait_for_initialization() +{ + /* Wait for MPI initialization to finish */ + STARPU_PTHREAD_MUTEX_LOCK(&progress_mutex); + while (!running) + STARPU_PTHREAD_COND_WAIT(&progress_cond, &progress_mutex); + STARPU_PTHREAD_MUTEX_UNLOCK(&progress_mutex); +} +#endif + +void _starpu_mpi_progress_shutdown(void **value) +{ + if (!running) + { + _STARPU_ERROR("The progress thread was not launched. Was StarPU successfully initialized?\n"); + } + + STARPU_PTHREAD_MUTEX_LOCK(&progress_mutex); + running = 0; + STARPU_PTHREAD_COND_BROADCAST(&progress_cond); + +#ifdef STARPU_SIMGRID + starpu_pthread_queue_signal(&_starpu_mpi_thread_dontsleep); +#endif + STARPU_PTHREAD_MUTEX_UNLOCK(&progress_mutex); + +#ifdef STARPU_SIMGRID + /* FIXME: should rather properly wait for _starpu_mpi_progress_thread_func to finish */ + (void) value; + starpu_sleep(1); +#else + STARPU_PTHREAD_JOIN(progress_thread, value); +#endif + + STARPU_PTHREAD_MUTEX_DESTROY(&posted_requests_mutex); + STARPU_PTHREAD_MUTEX_DESTROY(&progress_mutex); + STARPU_PTHREAD_MUTEX_DESTROY(&early_data_mutex); + STARPU_PTHREAD_COND_DESTROY(&barrier_cond); +} + +static int64_t _starpu_mpi_tag_max = INT64_MAX; + +int starpu_mpi_comm_get_attr(MPI_Comm comm, int keyval, void *attribute_val, int *flag) +{ + (void) comm; + if (keyval == STARPU_MPI_TAG_UB) + { + *flag = 1; + *(int64_t **)attribute_val = &_starpu_mpi_tag_max; + } + else + { + *flag = 0; + } + return 0; +} + +void _starpu_mpi_driver_init(struct starpu_conf *conf) +{ + /* We only initialize the driver if the environment variable + * STARPU_MPI_DRIVER_CALL_FREQUENCY is defined by the user. If this environment + * variable is not defined or defined at a value lower than or equal to zero, + * StarPU-MPI will not use a driver. */ + int driver_env = starpu_getenv_number_default("STARPU_MPI_DRIVER_CALL_FREQUENCY", 0); + if (driver_env > 0) + { +#ifdef STARPU_SIMGRID + _STARPU_DISP("Warning: MPI driver is not supported with simgrid, this will be disabled\n"); + return; +#endif + mpi_driver_call_freq = driver_env; + + _STARPU_MALLOC(mpi_driver, sizeof(struct starpu_driver)); + mpi_driver->type = STARPU_CPU_WORKER; + mpi_driver->id.cpu_id = 0; + + conf->not_launched_drivers = mpi_driver; + conf->n_not_launched_drivers = 1; + + int tasks_freq_env = starpu_getenv_number_default("STARPU_MPI_DRIVER_TASK_FREQUENCY", 0); + if (tasks_freq_env > 0) + mpi_driver_task_freq = tasks_freq_env; + } +} + +void _starpu_mpi_wake_up_progress_thread() +{ + STARPU_PTHREAD_COND_SIGNAL(&progress_cond); +} + +void _starpu_mpi_driver_shutdown() +{ + if (mpi_driver) + { + starpu_driver_deinit(mpi_driver); + free(mpi_driver); + mpi_driver = NULL; + } +} + +#endif /* STARPU_USE_MPI_MPI */ diff --git a/mpi/src/mpi/starpu_mpi_mpi.h b/mpi/src/mpi/starpu_mpi_mpi.h new file mode 100644 index 0000000..71176e5 --- /dev/null +++ b/mpi/src/mpi/starpu_mpi_mpi.h @@ -0,0 +1,57 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2014-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __STARPU_MPI_MPI_H__ +#define __STARPU_MPI_MPI_H__ + +#include +#include +#include +#include +#include + +/** @file */ + +#ifdef STARPU_USE_MPI_MPI + +#ifdef __cplusplus +extern "C" +{ +#endif + +int _starpu_mpi_progress_init(struct _starpu_mpi_argc_argv *argc_argv); +void _starpu_mpi_progress_shutdown(void **value); + +#ifdef STARPU_SIMGRID +void _starpu_mpi_wait_for_initialization(); +#endif + +int _starpu_mpi_barrier(MPI_Comm comm); +int _starpu_mpi_wait_for_all(MPI_Comm comm); +int _starpu_mpi_wait(starpu_mpi_req *public_req, MPI_Status *status); +int _starpu_mpi_test(starpu_mpi_req *public_req, int *flag, MPI_Status *status); + +void _starpu_mpi_wake_up_progress_thread(); + +void _starpu_mpi_isend_size_func(struct _starpu_mpi_req *req); +void _starpu_mpi_irecv_size_func(struct _starpu_mpi_req *req); + +#ifdef __cplusplus +} +#endif + +#endif /* STARPU_USE_MPI_MPI */ +#endif /* __STARPU_MPI_MPI_H__ */ diff --git a/mpi/src/mpi/starpu_mpi_mpi_backend.c b/mpi/src/mpi/starpu_mpi_mpi_backend.c new file mode 100644 index 0000000..b0ca1e2 --- /dev/null +++ b/mpi/src/mpi/starpu_mpi_mpi_backend.c @@ -0,0 +1,137 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include + +#ifdef STARPU_USE_MPI_MPI + +#include +#include +#include +#include +#include +#include +#include + +static void starpu_mpi_mpi_backend_constructor(void) __attribute__((constructor)); +static void starpu_mpi_mpi_backend_constructor(void) +{ +#ifdef HAVE_PIOMAN + /* We don't want progression in both PIOman and StarPU */ + setenv("PIOM_ENABLE_PROGRESSION", "0", 0); +#endif +} + +void _starpu_mpi_mpi_backend_init(struct starpu_conf *conf) +{ + _starpu_mpi_driver_init(conf); +} + +void _starpu_mpi_mpi_backend_shutdown(void) +{ + _starpu_mpi_tag_shutdown(); + _starpu_mpi_comm_shutdown(); + _starpu_mpi_driver_shutdown(); +} + +int _starpu_mpi_mpi_backend_reserve_core(void) +{ + return (starpu_getenv_number_default("STARPU_MPI_DRIVER_CALL_FREQUENCY", 0) <= 0); +} + +void _starpu_mpi_mpi_backend_request_init(struct _starpu_mpi_req *req) +{ + _STARPU_MPI_CALLOC(req->backend, 1, sizeof(struct _starpu_mpi_req_backend)); + + //req->backend->data_request = 0; + + STARPU_PTHREAD_MUTEX_INIT0(&req->backend->req_mutex, NULL); + STARPU_PTHREAD_COND_INIT0(&req->backend->req_cond, NULL); + STARPU_PTHREAD_COND_INIT0(&req->backend->posted_cond, NULL); + + //req->backend->other_request = NULL; + + //req->backend->size_req = 0; + //req->backend->internal_req = NULL; + //req->backend->is_internal_req = 0; + req->backend->to_destroy = 1; + //req->backend->early_data_handle = NULL; + //req->backend->envelope = NULL; +} + +void _starpu_mpi_mpi_backend_request_fill(struct _starpu_mpi_req *req, int is_internal_req) +{ + _starpu_mpi_comm_register(req->node_tag.node.comm); + req->backend->is_internal_req = is_internal_req; + /* For internal requests, we wait for both the request completion and the matching application request completion */ + req->backend->to_destroy = !is_internal_req; +} + +void _starpu_mpi_mpi_backend_request_destroy(struct _starpu_mpi_req *req) +{ + STARPU_PTHREAD_MUTEX_DESTROY(&req->backend->req_mutex); + STARPU_PTHREAD_COND_DESTROY(&req->backend->req_cond); + STARPU_PTHREAD_COND_DESTROY(&req->backend->posted_cond); + free(req->backend); + req->backend = NULL; +} + +void _starpu_mpi_mpi_backend_data_clear(starpu_data_handle_t data_handle) +{ + _starpu_mpi_tag_data_release(data_handle); +} + +void _starpu_mpi_mpi_backend_data_register(starpu_data_handle_t data_handle, starpu_mpi_tag_t data_tag) +{ + _starpu_mpi_tag_data_register(data_handle, data_tag); +} + +void _starpu_mpi_mpi_backend_comm_register(MPI_Comm comm) +{ + _starpu_mpi_comm_register(comm); +} + +struct _starpu_mpi_backend _mpi_backend = +{ + ._starpu_mpi_backend_init = _starpu_mpi_mpi_backend_init, + ._starpu_mpi_backend_shutdown = _starpu_mpi_mpi_backend_shutdown, + ._starpu_mpi_backend_reserve_core = _starpu_mpi_mpi_backend_reserve_core, + ._starpu_mpi_backend_request_init = _starpu_mpi_mpi_backend_request_init, + ._starpu_mpi_backend_request_fill = _starpu_mpi_mpi_backend_request_fill, + ._starpu_mpi_backend_request_destroy = _starpu_mpi_mpi_backend_request_destroy, + ._starpu_mpi_backend_data_clear = _starpu_mpi_mpi_backend_data_clear, + ._starpu_mpi_backend_data_register = _starpu_mpi_mpi_backend_data_register, + ._starpu_mpi_backend_comm_register = _starpu_mpi_mpi_backend_comm_register, + + ._starpu_mpi_backend_progress_init = _starpu_mpi_progress_init, + ._starpu_mpi_backend_progress_shutdown = _starpu_mpi_progress_shutdown, +#ifdef STARPU_SIMGRID + ._starpu_mpi_backend_wait_for_initialization = _starpu_mpi_wait_for_initialization, +#endif + + ._starpu_mpi_backend_barrier = _starpu_mpi_barrier, + ._starpu_mpi_backend_wait_for_all = _starpu_mpi_wait_for_all, + ._starpu_mpi_backend_wait = _starpu_mpi_wait, + ._starpu_mpi_backend_test = _starpu_mpi_test, + + ._starpu_mpi_backend_isend_size_func = _starpu_mpi_isend_size_func, + ._starpu_mpi_backend_irecv_size_func = _starpu_mpi_irecv_size_func, +}; + +#endif /* STARPU_USE_MPI_MPI*/ diff --git a/mpi/src/mpi/starpu_mpi_mpi_backend.h b/mpi/src/mpi/starpu_mpi_mpi_backend.h new file mode 100644 index 0000000..0848651 --- /dev/null +++ b/mpi/src/mpi/starpu_mpi_mpi_backend.h @@ -0,0 +1,86 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __STARPU_MPI_MPI_BACKEND_H__ +#define __STARPU_MPI_MPI_BACKEND_H__ + +#include +#include + +/** @file */ + +#ifdef __cplusplus +extern "C" +{ +#endif + +#ifdef STARPU_USE_MPI_MPI + +extern int _starpu_mpi_tag; +#define _STARPU_MPI_TAG_ENVELOPE _starpu_mpi_tag +#define _STARPU_MPI_TAG_DATA _starpu_mpi_tag+1 +#define _STARPU_MPI_TAG_SYNC_DATA _starpu_mpi_tag+2 + +#ifdef STARPU_USE_MPI_FT +#define _STARPU_MPI_TAG_CP_ACK _starpu_mpi_tag+3 +#define _STARPU_MPI_TAG_CP_RCVRY _starpu_mpi_tag+4 +#define _STARPU_MPI_TAG_EXT_DATA _starpu_mpi_tag+5 +#define _STARPU_MPI_TAG_CP_INFO _starpu_mpi_tag+6 +#endif // STARPU_USE_MPI_FT + +enum _starpu_envelope_mode +{ + _STARPU_MPI_ENVELOPE_DATA=0, + _STARPU_MPI_ENVELOPE_SYNC_READY=1 +}; + +struct _starpu_mpi_envelope +{ + enum _starpu_envelope_mode mode; + starpu_ssize_t size; + starpu_mpi_tag_t data_tag; + unsigned sync; +}; + +struct _starpu_mpi_req_backend +{ + MPI_Request data_request; + + starpu_pthread_mutex_t req_mutex; + starpu_pthread_cond_t req_cond; + starpu_pthread_cond_t posted_cond; + /** In the case of a Wait/Test request, we are going to post a request + * to test the completion of another request */ + struct _starpu_mpi_req *other_request; + + MPI_Request size_req; + + struct _starpu_mpi_envelope* envelope; + + unsigned is_internal_req:1; + unsigned to_destroy:1; + struct _starpu_mpi_req *internal_req; + struct _starpu_mpi_early_data_handle *early_data_handle; + UT_hash_handle hh; +}; + +#endif // STARPU_USE_MPI_MPI + +#ifdef __cplusplus +} +#endif + +#endif // __STARPU_MPI_MPI_BACKEND_H__ diff --git a/mpi/src/mpi/starpu_mpi_sync_data.c b/mpi/src/mpi/starpu_mpi_sync_data.c new file mode 100644 index 0000000..26016bd --- /dev/null +++ b/mpi/src/mpi/starpu_mpi_sync_data.c @@ -0,0 +1,153 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include + +#ifdef STARPU_USE_MPI_MPI + +struct _starpu_mpi_sync_data_handle_hashlist +{ + struct _starpu_mpi_req_list list; + UT_hash_handle hh; + struct _starpu_mpi_node_tag node_tag; +}; + +/** stores data which have been received by MPI but have not been requested by the application */ +static starpu_pthread_mutex_t _starpu_mpi_sync_data_handle_mutex; +static struct _starpu_mpi_sync_data_handle_hashlist *_starpu_mpi_sync_data_handle_hashmap = NULL; +static int _starpu_mpi_sync_data_handle_hashmap_count = 0; + +void _starpu_mpi_sync_data_init(void) +{ + _starpu_mpi_sync_data_handle_hashmap = NULL; + STARPU_PTHREAD_MUTEX_INIT(&_starpu_mpi_sync_data_handle_mutex, NULL); + _starpu_mpi_sync_data_handle_hashmap_count = 0; +} + +void _starpu_mpi_sync_data_shutdown(void) +{ + struct _starpu_mpi_sync_data_handle_hashlist *current=NULL, *tmp=NULL; + HASH_ITER(hh, _starpu_mpi_sync_data_handle_hashmap, current, tmp) + { + STARPU_ASSERT(_starpu_mpi_req_list_empty(¤t->list)); + HASH_DEL(_starpu_mpi_sync_data_handle_hashmap, current); + free(current); + } + STARPU_PTHREAD_MUTEX_DESTROY(&_starpu_mpi_sync_data_handle_mutex); +} + +#ifdef STARPU_VERBOSE +static +void _starpu_mpi_sync_data_handle_display_hash(struct _starpu_mpi_node_tag *node_tag) +{ + struct _starpu_mpi_sync_data_handle_hashlist *hashlist; + HASH_FIND(hh, _starpu_mpi_sync_data_handle_hashmap, node_tag, sizeof(struct _starpu_mpi_node_tag), hashlist); + + if (hashlist == NULL) + { + _STARPU_MPI_DEBUG(60, "Hashlist for comm %ld source %d and tag %ld does not exist\n", (long int)node_tag->node.comm, node_tag->node.rank, node_tag->data_tag); + } + else if (_starpu_mpi_req_list_empty(&hashlist->list)) + { + _STARPU_MPI_DEBUG(60, "Hashlist for comm %ld source %d and tag %ld is empty\n", (long int)node_tag->node.comm, node_tag->node.rank, node_tag->data_tag); + } + else + { + struct _starpu_mpi_req *cur; + for (cur = _starpu_mpi_req_list_begin(&hashlist->list) ; + cur != _starpu_mpi_req_list_end(&hashlist->list); + cur = _starpu_mpi_req_list_next(cur)) + { + _STARPU_MPI_DEBUG(60, "Element for comm %ld source %d and tag %ld: %p\n", (long int)node_tag->node.comm, node_tag->node.rank, node_tag->data_tag, cur); + } + } +} +#endif + +void _starpu_mpi_sync_data_check_termination(void) +{ + STARPU_ASSERT_MSG(_starpu_mpi_sync_data_handle_hashmap_count == 0, "Number of sync received messages left is not zero, did you forget to post a receive corresponding to a send?"); +} + +int _starpu_mpi_sync_data_count(void) +{ + return _starpu_mpi_sync_data_handle_hashmap_count; +} + +struct _starpu_mpi_req *_starpu_mpi_sync_data_find(starpu_mpi_tag_t data_tag, int source, MPI_Comm comm) +{ + struct _starpu_mpi_req *req; + struct _starpu_mpi_node_tag node_tag; + struct _starpu_mpi_sync_data_handle_hashlist *found; + + memset(&node_tag, 0, sizeof(struct _starpu_mpi_node_tag)); + node_tag.node.comm = comm; + node_tag.node.rank = source; + node_tag.data_tag = data_tag; + + _STARPU_MPI_DEBUG(60, "Looking for sync_data_handle with comm %ld source %d tag %ld in the hashmap\n", (long int)comm, source, data_tag); + + STARPU_PTHREAD_MUTEX_LOCK(&_starpu_mpi_sync_data_handle_mutex); + HASH_FIND(hh, _starpu_mpi_sync_data_handle_hashmap, &node_tag, sizeof(struct _starpu_mpi_node_tag), found); + if (found == NULL) + { + req = NULL; + } + else + { + if (_starpu_mpi_req_list_empty(&found->list)) + { + req = NULL; + } + else + { + req = _starpu_mpi_req_list_pop_front(&found->list); + _starpu_mpi_sync_data_handle_hashmap_count --; + } + } + STARPU_PTHREAD_MUTEX_UNLOCK(&_starpu_mpi_sync_data_handle_mutex); + _STARPU_MPI_DEBUG(60, "Found sync_data_handle %p with comm %ld source %d tag %ld in the hashmap\n", req, (long int)comm, source, data_tag); + return req; +} + +void _starpu_mpi_sync_data_add(struct _starpu_mpi_req *sync_req) +{ + struct _starpu_mpi_sync_data_handle_hashlist *hashlist; + + _STARPU_MPI_DEBUG(2000, "Adding sync_req %p with comm %ld source %d tag %ld in the hashmap\n", sync_req, (long int)sync_req->node_tag.node.comm, sync_req->node_tag.node.rank, sync_req->node_tag.data_tag); + + STARPU_PTHREAD_MUTEX_LOCK(&_starpu_mpi_sync_data_handle_mutex); + HASH_FIND(hh, _starpu_mpi_sync_data_handle_hashmap, &sync_req->node_tag, sizeof(struct _starpu_mpi_node_tag), hashlist); + if (hashlist == NULL) + { + _STARPU_MPI_MALLOC(hashlist, sizeof(struct _starpu_mpi_sync_data_handle_hashlist)); + _starpu_mpi_req_list_init(&hashlist->list); + hashlist->node_tag = sync_req->node_tag; + HASH_ADD(hh, _starpu_mpi_sync_data_handle_hashmap, node_tag, sizeof(hashlist->node_tag), hashlist); + } + _starpu_mpi_req_list_push_back(&hashlist->list, sync_req); + _starpu_mpi_sync_data_handle_hashmap_count ++; + STARPU_PTHREAD_MUTEX_UNLOCK(&_starpu_mpi_sync_data_handle_mutex); +#ifdef STARPU_VERBOSE + _starpu_mpi_sync_data_handle_display_hash(&sync_req->node_tag); +#endif +} + +#endif // STARPU_USE_MPI_MPI diff --git a/mpi/src/mpi/starpu_mpi_sync_data.h b/mpi/src/mpi/starpu_mpi_sync_data.h new file mode 100644 index 0000000..66d8fd2 --- /dev/null +++ b/mpi/src/mpi/starpu_mpi_sync_data.h @@ -0,0 +1,48 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2014-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __STARPU_MPI_SYNC_DATA_H__ +#define __STARPU_MPI_SYNC_DATA_H__ + +#include +#include +#include +#include +#include + +/** @file */ + +#ifdef STARPU_USE_MPI_MPI + +#ifdef __cplusplus +extern "C" +{ +#endif + +void _starpu_mpi_sync_data_init(void); +void _starpu_mpi_sync_data_check_termination(void); +void _starpu_mpi_sync_data_shutdown(void); + +struct _starpu_mpi_req *_starpu_mpi_sync_data_find(starpu_mpi_tag_t data_tag, int source, MPI_Comm comm); +void _starpu_mpi_sync_data_add(struct _starpu_mpi_req *req); +int _starpu_mpi_sync_data_count(); + +#ifdef __cplusplus +} +#endif + +#endif /* STARPU_USE_MPI_MPI */ +#endif /* __STARPU_MPI_SYNC_DATA_H__ */ diff --git a/mpi/src/mpi/starpu_mpi_tag.c b/mpi/src/mpi/starpu_mpi_tag.c new file mode 100644 index 0000000..560b9c5 --- /dev/null +++ b/mpi/src/mpi/starpu_mpi_tag.c @@ -0,0 +1,128 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include +#include + +#ifdef STARPU_USE_MPI_MPI + +/* Entry in the `registered_tag_handles' hash table. */ +struct handle_tag_entry +{ + UT_hash_handle hh; + starpu_mpi_tag_t data_tag; + starpu_data_handle_t handle; +}; + +/* Hash table mapping host tags to data handles. */ +static struct handle_tag_entry *registered_tag_handles; +static struct _starpu_spinlock registered_tag_handles_lock; + +void _starpu_mpi_tag_init(void) +{ + _starpu_spin_init(®istered_tag_handles_lock); +} + +void _starpu_mpi_tag_shutdown(void) +{ + struct handle_tag_entry *tag_entry=NULL, *tag_tmp=NULL; + + _starpu_spin_destroy(®istered_tag_handles_lock); + + HASH_ITER(hh, registered_tag_handles, tag_entry, tag_tmp) + { + HASH_DEL(registered_tag_handles, tag_entry); + free(tag_entry); + } + + registered_tag_handles = NULL; +} + +starpu_data_handle_t _starpu_mpi_tag_get_data_handle_from_tag(starpu_mpi_tag_t data_tag) +{ + struct handle_tag_entry *ret; + + _starpu_spin_lock(®istered_tag_handles_lock); + HASH_FIND(hh, registered_tag_handles, &data_tag, sizeof(ret->data_tag), ret); + _starpu_spin_unlock(®istered_tag_handles_lock); + + if (ret) + { + return ret->handle; + } + else + { + return NULL; + } +} + +void _starpu_mpi_tag_data_register(starpu_data_handle_t handle, starpu_mpi_tag_t data_tag) +{ + if (data_tag == -1) + { + /* No tag for this data, probably a temporary data not to be communicated */ + return; + } + + struct handle_tag_entry *entry; + _STARPU_MPI_MALLOC(entry, sizeof(*entry)); + + STARPU_ASSERT_MSG(!(_starpu_mpi_tag_get_data_handle_from_tag(data_tag)), + "There is already a data handle %p registered with the tag %ld\n", _starpu_mpi_tag_get_data_handle_from_tag(data_tag), data_tag); + + _STARPU_MPI_DEBUG(42, "Adding handle %p with tag %"PRIi64" in hashtable\n", handle, data_tag); + + entry->handle = handle; + entry->data_tag = data_tag; + + _starpu_spin_lock(®istered_tag_handles_lock); +#ifndef STARPU_NO_ASSERT + struct handle_tag_entry *old; + HASH_FIND(hh, registered_tag_handles, &data_tag, sizeof(entry->data_tag), old); + STARPU_ASSERT_MSG(!old, "tag %"PRIi64" being registered for data %p, but is already used by data %p!\n", data_tag, handle, old?old->handle:NULL); +#endif + HASH_ADD(hh, registered_tag_handles, data_tag, sizeof(entry->data_tag), entry); + _starpu_spin_unlock(®istered_tag_handles_lock); +} + +int _starpu_mpi_tag_data_release(starpu_data_handle_t handle) +{ + starpu_mpi_tag_t data_tag = starpu_mpi_data_get_tag(handle); + + _STARPU_MPI_DEBUG(42, "Removing handle %p with tag %"PRIi64" from hashtable\n", handle, data_tag); + + if (data_tag != -1) + { + struct handle_tag_entry *tag_entry; + + _starpu_spin_lock(®istered_tag_handles_lock); + HASH_FIND(hh, registered_tag_handles, &(((struct _starpu_mpi_data *)(handle->mpi_data))->node_tag.data_tag), sizeof(tag_entry->data_tag), tag_entry); + STARPU_ASSERT_MSG((tag_entry != NULL),"Data handle %p with tag %"PRIi64" isn't in the hashmap !", handle, data_tag); + + HASH_DEL(registered_tag_handles, tag_entry); + + _starpu_spin_unlock(®istered_tag_handles_lock); + + free(tag_entry); + } + return 0; +} + +#endif // STARPU_USE_MPI_MPI diff --git a/mpi/src/mpi/starpu_mpi_tag.h b/mpi/src/mpi/starpu_mpi_tag.h new file mode 100644 index 0000000..a80636c --- /dev/null +++ b/mpi/src/mpi/starpu_mpi_tag.h @@ -0,0 +1,45 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __STARPU_MPI_TAG_H__ +#define __STARPU_MPI_TAG_H__ + +#include +#include +#include + +/** @file */ + +#ifdef STARPU_USE_MPI_MPI + +#ifdef __cplusplus +extern "C" +{ +#endif + +void _starpu_mpi_tag_init(void); +void _starpu_mpi_tag_shutdown(void); + +void _starpu_mpi_tag_data_register(starpu_data_handle_t handle, starpu_mpi_tag_t data_tag); +int _starpu_mpi_tag_data_release(starpu_data_handle_t handle); +starpu_data_handle_t _starpu_mpi_tag_get_data_handle_from_tag(starpu_mpi_tag_t data_tag); + +#ifdef __cplusplus +} +#endif + +#endif // STARPU_USE_MPI_MPI +#endif // __STARPU_MPI_TAG_H__ diff --git a/mpi/src/mpi_failure_tolerance/starpu_mpi_checkpoint.c b/mpi/src/mpi_failure_tolerance/starpu_mpi_checkpoint.c new file mode 100644 index 0000000..a376029 --- /dev/null +++ b/mpi/src/mpi_failure_tolerance/starpu_mpi_checkpoint.c @@ -0,0 +1,276 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2014-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include // Should be deduced at preprocessing (Nmad vs MPI) +#include "starpu_mpi_cache.h" + +#define MAX_CP_TEMPLATE_NUMBER 32 // Arbitrary limit + +starpu_pthread_mutex_t cp_lib_mutex; + +void _ack_msg_send_cb(void* _args) +{ + struct _starpu_mpi_cp_ack_arg_cb* arg = (struct _starpu_mpi_cp_ack_arg_cb*) _args; + _STARPU_MPI_FT_STATS_SEND_FT_SERVICE_MSG(sizeof(struct _starpu_mpi_cp_ack_msg)); + _STARPU_MPI_DEBUG(3, "Ack send succeeded cpid:%d, cpinst:%d, dest:%d\n", arg->msg.checkpoint_id, arg->msg.checkpoint_instance, arg->rank); + //free(arg); +} + +void _ack_msg_recv_cb(void* _args) +{ + struct _starpu_mpi_cp_ack_arg_cb* arg = (struct _starpu_mpi_cp_ack_arg_cb*) _args; + int ret; + _STARPU_MPI_FT_STATS_RECV_FT_SERVICE_MSG(sizeof(struct _starpu_mpi_cp_ack_msg)); + _STARPU_MPI_DEBUG(3, "ack msg recved id:%d inst:%d\n", arg->msg.checkpoint_id, arg->msg.checkpoint_instance); + ret = _checkpoint_template_digest_ack_reception(arg->msg.checkpoint_id, arg->msg.checkpoint_instance); + if (ret == 0) + { + //free(arg); + } + else if (ret == -1) + { + STARPU_ABORT_MSG("Could not find CP template, cpid:%d - cpinst:%d\n", arg->msg.checkpoint_id, arg->msg.checkpoint_instance); + } +} + +void _starpu_mpi_store_data_and_send_ack_cb(struct _starpu_mpi_cp_ack_arg_cb* arg) +{ + checkpoint_package_data_add(arg->msg.checkpoint_id, arg->msg.checkpoint_instance, arg->rank, arg->tag, arg->type, arg->copy_handle, arg->count); + _STARPU_MPI_DEBUG(3,"Send ack msg to %d: id=%d inst=%d\n", arg->rank, arg->msg.checkpoint_id, arg->msg.checkpoint_instance); + _starpu_mpi_ft_service_post_send((void *) &arg->msg, sizeof(struct _starpu_mpi_cp_ack_msg), arg->rank, + _STARPU_MPI_TAG_CP_ACK, MPI_COMM_WORLD, _ack_msg_send_cb, arg); +} + +void _starpu_mpi_push_cp_ack_recv_cb(struct _starpu_mpi_cp_ack_arg_cb* arg) +{ + _STARPU_MPI_DEBUG(3, "Posting ack recv cb from %d\n", arg->rank); + _starpu_mpi_ft_service_post_special_recv(_STARPU_MPI_TAG_CP_ACK); +// _ft_service_msg_irecv_cb((void *) &arg->msg, sizeof(struct _starpu_mpi_cp_ack_msg), arg->rank, +// _STARPU_MPI_TAG_CP_ACK, MPI_COMM_WORLD, _ack_msg_recv_cb, arg); +} + +void _recv_internal_dup_ro_cb(void* _args) +{ + struct _starpu_mpi_cp_ack_arg_cb* arg = (struct _starpu_mpi_cp_ack_arg_cb*) _args; + starpu_data_release(arg->copy_handle); + _starpu_mpi_store_data_and_send_ack_cb(arg); +} + +void _recv_cp_external_data_cb(void* _args) +{ + struct _starpu_mpi_cp_ack_arg_cb* arg = (struct _starpu_mpi_cp_ack_arg_cb*) _args; + _STARPU_MPI_FT_STATS_RECV_CP_DATA(starpu_data_get_size(arg->handle)); + // an handle has specifically been created, Let's get the value back, and unregister the handle + arg->copy_handle = starpu_data_handle_to_pointer(arg->handle, STARPU_MAIN_RAM); + starpu_data_unregister_submit(arg->handle); + _starpu_mpi_store_data_and_send_ack_cb(arg); +} + +void _send_cp_external_data_cb(void* _args) +{ + struct _starpu_mpi_cp_ack_arg_cb* arg = (struct _starpu_mpi_cp_ack_arg_cb*) _args; + _STARPU_MPI_FT_STATS_SEND_CP_DATA(starpu_data_get_size(arg->handle)); + free(starpu_data_handle_to_pointer(arg->handle, STARPU_MAIN_RAM)); + starpu_data_unregister_submit(arg->handle); + _starpu_mpi_push_cp_ack_recv_cb(arg); +} + +void _send_cp_internal_data_cb(void* _args) +{ + struct _starpu_mpi_cp_ack_arg_cb* arg = (struct _starpu_mpi_cp_ack_arg_cb*) _args; + _starpu_mpi_push_cp_ack_recv_cb(_args); + if (!arg->cache_flag) + { + //TODO: check cp_domain! + struct _starpu_mpi_checkpoint_tracker* tracker = _starpu_mpi_checkpoint_template_get_tracking_inst_by_id_inst(0, arg->checkpoint_instance_hint); + if(!tracker->first_msg_sent_flag) + { + tracker->first_msg_sent_flag = 1; + _STARPU_MPI_TRACE_CHECKPOINT_BEGIN(arg->checkpoint_instance_hint,0); + } + } +} + +void _send_internal_data_stats(struct _starpu_mpi_cp_ack_arg_cb* arg) +{ + if (arg->cache_flag) + { + _STARPU_MPI_FT_STATS_SEND_CACHED_CP_DATA(starpu_data_get_size(arg->handle)); + } + else + { + _STARPU_MPI_FT_STATS_SEND_CP_DATA(starpu_data_get_size(arg->handle)); + } +} + +int starpu_mpi_checkpoint_template_submit(starpu_mpi_checkpoint_template_t cp_template, int prio) +{ + starpu_data_handle_t handle; + struct _starpu_mpi_data* mpi_data; + struct _starpu_mpi_cp_ack_arg_cb* arg; + void* cpy_ptr; + struct _starpu_mpi_checkpoint_template_item* item; + int current_instance; + + current_instance = increment_current_instance(); + _starpu_mpi_checkpoint_post_cp_discard_recv(cp_template); + _starpu_mpi_checkpoint_template_create_instance_tracker(cp_template, cp_template->cp_id, cp_template->checkpoint_domain, current_instance); + //TODO check what happens when all the ack msg are received when we arrive here. + item = _starpu_mpi_checkpoint_template_get_first_data(cp_template); + while (item != _starpu_mpi_checkpoint_template_end(cp_template)) + { + switch (item->type) + { + case STARPU_VALUE: + // TODO: Maybe do not pass via starpu handles for external data, and need to reimplement mpi comm layer for + _STARPU_MALLOC(arg, sizeof(struct _starpu_mpi_cp_ack_arg_cb)); + arg->tag = item->tag; + arg->type = STARPU_VALUE; + arg->count = item->count; + arg->cache_flag = 0; + if (item->backupped_by != -1) + { + _STARPU_MALLOC(cpy_ptr, item->count); + memcpy(cpy_ptr, item->ptr, item->count); + starpu_variable_data_register(&arg->handle, STARPU_MAIN_RAM, (uintptr_t)cpy_ptr, item->count); + arg->rank = item->backupped_by; + _STARPU_MPI_DEBUG(0, "Submit CP: sending external data:%d, tag:%ld, to :%d\n", (int)(*(int*)cpy_ptr), arg->tag, arg->rank); + starpu_mpi_isend_detached_prio(arg->handle, arg->rank, arg->tag, prio, MPI_COMM_WORLD, + &_send_cp_external_data_cb, (void*)arg); + // The callback needs to free the handle specially created for the send, and post ack recv + } + else if (item->backup_of != -1) + { + int ret; + arg->msg.checkpoint_id = cp_template->cp_id; + arg->msg.checkpoint_instance = current_instance; + _STARPU_MALLOC(cpy_ptr, item->count); + starpu_variable_data_register(&arg->handle, STARPU_MAIN_RAM, (uintptr_t)cpy_ptr, item->count); + arg->rank = item->backup_of; + _STARPU_MPI_DEBUG(0, "Submit CP: receiving external data tag:%ld, from :%d\n", arg->tag, arg->rank); + ret = starpu_mpi_irecv_detached(arg->handle, arg->rank, arg->tag, MPI_COMM_WORLD, + &_recv_cp_external_data_cb, (void*)arg); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_irecv_detached"); + // The callback needs to store the received data and post ack send + } + break; + case STARPU_R: + handle = (starpu_data_handle_t)item->ptr; + mpi_data = _starpu_mpi_data_get(handle); + if (starpu_mpi_data_get_rank(handle)==_my_rank) + { + if (!mpi_data->modified) + { + _starpu_mpi_checkpoint_tracker_update(cp_template, cp_template->cp_id, cp_template->checkpoint_domain, current_instance); + //TODO: check if the data are all acknowledged + _STARPU_MPI_DEBUG(0, "Submit CP: skip send starPU data to %d (tag %d)\n", item->backupped_by, (int)starpu_mpi_data_get_tag(handle)); + _STARPU_MPI_FT_STATS_SEND_CACHED_CP_DATA(starpu_data_get_size(handle)); + break; // We don't want to CP a data that is still at initial state. + } + _STARPU_MPI_DEBUG(0, "Submit CP: sending starPU data to %d (tag %d)\n", item->backupped_by, (int)starpu_mpi_data_get_tag(handle)); + _STARPU_MALLOC(arg, sizeof(struct _starpu_mpi_cp_ack_arg_cb)); + arg->rank = item->backupped_by; + arg->handle = handle; + arg->tag = starpu_mpi_data_get_tag(handle); + arg->type = STARPU_R; + arg->count = item->count; + arg->checkpoint_instance_hint = current_instance; + _starpu_mpi_isend_cache_aware(handle, item->backupped_by, starpu_mpi_data_get_tag(handle), MPI_COMM_WORLD, 1, 0, prio, + &_send_cp_internal_data_cb, (void*)arg, 1, &arg->cache_flag); + // the callbacks need to post ack recv. The cache one needs to release the handle. + _send_internal_data_stats(arg); + } + else if (item->backup_of == starpu_mpi_data_get_rank(handle)) + { + if (!mpi_data->modified) + { + _STARPU_MPI_DEBUG(0, "Submit CP: skip recv starPU data to %d (tag %d)\n", item->backupped_by, (int)starpu_mpi_data_get_tag(handle)); + _STARPU_MPI_FT_STATS_RECV_CACHED_CP_DATA(starpu_data_get_size(handle)); + break; // We don't want to CP a data that is still at initial state. + } + _STARPU_MPI_DEBUG(0, "Submit CP: receiving starPU data from %d (tag %d)\n", starpu_mpi_data_get_rank(handle), (int)starpu_mpi_data_get_tag(handle)); + _STARPU_MALLOC(arg, sizeof(struct _starpu_mpi_cp_ack_arg_cb)); + arg->rank = item->backup_of; + arg->handle = handle; + arg->tag = starpu_mpi_data_get_tag(handle); + arg->type = STARPU_R; + arg->count = item->count; + arg->msg.checkpoint_id = cp_template->cp_id; + arg->msg.checkpoint_instance = current_instance; + _starpu_mpi_irecv_cache_aware(handle, starpu_mpi_data_get_rank(handle), starpu_mpi_data_get_tag(handle), MPI_COMM_WORLD, 1, 0, + NULL, NULL, 1, 0, 1, &arg->cache_flag); + // The callback needs to do nothing. The cached one must release the handle. + // _recv_internal_data_stats(arg); // Now done in data_cache_set + starpu_data_dup_ro(&arg->copy_handle, arg->handle, 1); + starpu_data_acquire_cb(arg->copy_handle, STARPU_R, _recv_internal_dup_ro_cb, arg); + // The callback need to store the data and post ack send. + } + break; + } + + item = _starpu_mpi_checkpoint_template_get_next_data(cp_template, item); + } + + return 0; +} + +// +///** +// * receives param of type starpu_mpi_checkpoint_template_t +// * @param args +// * @return +// */ +//void _starpu_mpi_checkpoint_ack_send_cb(void* args) +//{ +// starpu_mpi_checkpoint_template_t cp_template = (starpu_mpi_checkpoint_template_t) args; +// starpu_pthread_mutex_lock(&cp_template->mutex); +// cp_template->remaining_ack_awaited--; +// starpu_pthread_mutex_unlock(&cp_template->mutex); +//} + + +// +//void _starpu_checkpoint_cached_data_send_copy_and_ack(void* _arg) +//{ +// struct _starpu_mpi_cp_ack_arg_cb* arg = (struct _starpu_mpi_cp_ack_arg_cb*) _arg; +// starpu_data_register_same(&arg->copy_handle, arg->handle); +// starpu_data_cpy_priority(arg->copy_handle, arg->handle, 1, _starpu_mpi_push_cp_ack_recv_cb, _arg, STARPU_MAX_PRIO); +// starpu_data_release(arg->handle); +//} +// +//void _starpu_checkpoint_data_send_copy_and_ack(void* _args) +//{ +// struct _starpu_mpi_cp_ack_arg_cb* arg = (struct _starpu_mpi_cp_ack_arg_cb*) _args; +// starpu_data_register_same(&arg->copy_handle, arg->handle); +// starpu_data_cpy_priority(arg->copy_handle, arg->handle, 1, _starpu_mpi_push_cp_ack_recv_cb, _args, STARPU_MAX_PRIO); +//} +// +//void _starpu_mpi_treat_cache_ack_no_lock_cb(void* _args) +//{ +// starpu_mpi_checkpoint_template_t cp_template = (starpu_mpi_checkpoint_template_t)_args; +// cp_template->remaining_ack_awaited--; +//} + diff --git a/mpi/src/mpi_failure_tolerance/starpu_mpi_checkpoint.h b/mpi/src/mpi_failure_tolerance/starpu_mpi_checkpoint.h new file mode 100644 index 0000000..107a50d --- /dev/null +++ b/mpi/src/mpi_failure_tolerance/starpu_mpi_checkpoint.h @@ -0,0 +1,70 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2014-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef FT_STARPU_STARPU_MPI_CHECKPOINT_H +#define FT_STARPU_STARPU_MPI_CHECKPOINT_H + +#include +#include +#include + +#ifdef __cplusplus +extern "C" +{ +#endif + +extern int _my_rank; + +struct _starpu_mpi_cp_ack_msg +{ + int checkpoint_id; + int checkpoint_instance; +}; + +struct _starpu_mpi_cp_info_msg +{ + int checkpoint_id; + int checkpoint_instance; + int validation:1; + int discard:1; +}; + +struct _starpu_mpi_cp_ack_arg_cb +{ + int rank; + starpu_data_handle_t handle; + starpu_data_handle_t copy_handle; + int type; + int count; + starpu_mpi_tag_t tag; + struct _starpu_mpi_cp_ack_msg msg; + int checkpoint_instance_hint; + int cache_flag; +}; + +struct _starpu_mpi_cp_discard_arg_cb +{ + int rank; + struct _starpu_mpi_cp_info_msg msg; +}; + +void _ack_msg_recv_cb(void* _args); + +#ifdef __cplusplus +} +#endif + +#endif //FT_STARPU_STARPU_MPI_CHECKPOINT_H diff --git a/mpi/src/mpi_failure_tolerance/starpu_mpi_checkpoint_package.c b/mpi/src/mpi_failure_tolerance/starpu_mpi_checkpoint_package.c new file mode 100644 index 0000000..905e280 --- /dev/null +++ b/mpi/src/mpi_failure_tolerance/starpu_mpi_checkpoint_package.c @@ -0,0 +1,177 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include + +struct _starpu_mpi_checkpoint_data_list* checkpoint_data_list; +starpu_pthread_mutex_t package_package_mutex; + +int _checkpoint_package_data_delete_all(); + +int checkpoint_package_init() +{ + STARPU_PTHREAD_MUTEX_INIT(&package_package_mutex, NULL); + checkpoint_data_list = _starpu_mpi_checkpoint_data_list_new(); + _starpu_mpi_checkpoint_data_list_init(checkpoint_data_list); + return 0; +} + +int checkpoint_package_shutdown() +{ + _checkpoint_package_data_delete_all(); + STARPU_PTHREAD_MUTEX_DESTROY(&package_package_mutex); + return 0; +} + +#ifdef STARPU_USE_MPI_FT_STATS +void _stats_store_checkpoint_data(struct _starpu_mpi_checkpoint_data* new_checkpoint_data) +{ + struct _starpu_mpi_checkpoint_data* next_checkpoint_data; + struct _starpu_mpi_checkpoint_data* checkpoint_data = _starpu_mpi_checkpoint_data_list_begin(checkpoint_data_list); + while (checkpoint_data != _starpu_mpi_checkpoint_data_list_end(checkpoint_data_list)) + { + next_checkpoint_data = _starpu_mpi_checkpoint_data_list_next(checkpoint_data); + if (checkpoint_data->tag == new_checkpoint_data->tag && checkpoint_data->ptr == new_checkpoint_data->ptr) + { + // The data is already in the CP data list,don't count it as a new data + return; + } + checkpoint_data = next_checkpoint_data; + } + _STARPU_MPI_FT_STATS_STORE_CP_DATA(new_checkpoint_data->type==STARPU_VALUE?new_checkpoint_data->count:new_checkpoint_data->type==STARPU_R?starpu_data_get_size((starpu_data_handle_t) new_checkpoint_data->ptr):-1); +} +#else +void _stats_store_checkpoint_data(STARPU_ATTRIBUTE_UNUSED struct _starpu_mpi_checkpoint_data* new_checkpoint_data) +{ + return; +} +#endif + +#ifdef STARPU_USE_MPI_FT_STATS +void _stats_discard_checkpoint_data(struct _starpu_mpi_checkpoint_data* new_checkpoint_data) +{ + struct _starpu_mpi_checkpoint_data* next_checkpoint_data; + struct _starpu_mpi_checkpoint_data* checkpoint_data = _starpu_mpi_checkpoint_data_list_begin(checkpoint_data_list); + while (checkpoint_data != _starpu_mpi_checkpoint_data_list_end(checkpoint_data_list)) + { + next_checkpoint_data = _starpu_mpi_checkpoint_data_list_next(checkpoint_data); + if (checkpoint_data->tag == new_checkpoint_data->tag && checkpoint_data->ptr == new_checkpoint_data->ptr) + { + // The data is still in the CP data list, don't count it as a discard + return; + } + checkpoint_data = next_checkpoint_data; + } + _STARPU_MPI_FT_STATS_DISCARD_CP_DATA(new_checkpoint_data->type==STARPU_VALUE?new_checkpoint_data->count:new_checkpoint_data->type==STARPU_R?starpu_data_get_size((starpu_data_handle_t) new_checkpoint_data->ptr):-1); +} +#else +void _stats_discard_checkpoint_data(STARPU_ATTRIBUTE_UNUSED struct _starpu_mpi_checkpoint_data* new_checkpoint_data) +{ + return; +} +#endif + +int checkpoint_package_data_add(int cp_id, int cp_inst, int rank, starpu_mpi_tag_t tag, int type, void* ptr, int count) +{ + struct _starpu_mpi_checkpoint_data* checkpoint_data = _starpu_mpi_checkpoint_data_new(); + checkpoint_data->cp_id = cp_id; + checkpoint_data->cp_inst = cp_inst; + checkpoint_data->rank = rank; + checkpoint_data->tag = tag; + checkpoint_data->type = type; + checkpoint_data->ptr = ptr; + checkpoint_data->count = count; + STARPU_PTHREAD_MUTEX_LOCK(&package_package_mutex); + _stats_store_checkpoint_data(checkpoint_data); + _starpu_mpi_checkpoint_data_list_push_back(checkpoint_data_list, checkpoint_data); + STARPU_PTHREAD_MUTEX_UNLOCK(&package_package_mutex); + _STARPU_MPI_DEBUG(8, "CP data (%p) added - cpid:%d - cpinst:%d - rank:%d - tag:%ld\n", checkpoint_data->ptr, checkpoint_data->cp_id, checkpoint_data->cp_inst, checkpoint_data->rank, checkpoint_data->tag); + return 0; +} + +int _checkpoint_package_data_delete(struct _starpu_mpi_checkpoint_data* checkpoint_data) +{ + size_t size; + _starpu_mpi_checkpoint_data_list_erase(checkpoint_data_list, checkpoint_data); + _stats_discard_checkpoint_data(checkpoint_data); + if (checkpoint_data->type==STARPU_R) + { + starpu_data_handle_t handle = checkpoint_data->ptr; + size = starpu_data_get_size(handle); + _STARPU_MPI_DEBUG(8, "Clearing handle %p entry\n", handle); + starpu_data_unregister_submit(handle); + } + else if (checkpoint_data->type==STARPU_VALUE) + { + size = checkpoint_data->count; + _STARPU_MPI_DEBUG(8, "Clearing external data entry\n"); + free(checkpoint_data->ptr); + } + else + { + STARPU_ABORT_MSG("Unrecognized data type: %d\n", checkpoint_data->type); + } + free(checkpoint_data); + return size; +} + +int checkpoint_package_data_del(int cp_id, int cp_inst, int rank) +{ + (void)cp_id; + int done = 0; + size_t size = 0; + struct _starpu_mpi_checkpoint_data* next_checkpoint_data = NULL; + STARPU_PTHREAD_MUTEX_LOCK(&package_package_mutex); + struct _starpu_mpi_checkpoint_data* checkpoint_data = _starpu_mpi_checkpoint_data_list_begin(checkpoint_data_list); + while (checkpoint_data != _starpu_mpi_checkpoint_data_list_end(checkpoint_data_list)) + { + next_checkpoint_data = _starpu_mpi_checkpoint_data_list_next(checkpoint_data); + // I delete all the old data (i.e. the cp inst is strictly lower than the one of the just validated CP) only for + // the rank that initiated the CP + if (checkpoint_data->cp_instrank==rank) + { + size += _checkpoint_package_data_delete(checkpoint_data); + done++; + } + checkpoint_data = next_checkpoint_data; + } + STARPU_PTHREAD_MUTEX_UNLOCK(&package_package_mutex); + _STARPU_MPI_DEBUG(0, "cleared %d data from checkpoint database (%ld bytes).\n", done, size); + + return done; +} + +int _checkpoint_package_data_delete_all() +{ + int done = 0; + size_t size = 0; + struct _starpu_mpi_checkpoint_data* next_checkpoint_data = NULL; + STARPU_PTHREAD_MUTEX_LOCK(&package_package_mutex); + struct _starpu_mpi_checkpoint_data* checkpoint_data = _starpu_mpi_checkpoint_data_list_begin(checkpoint_data_list); + while (checkpoint_data != _starpu_mpi_checkpoint_data_list_end(checkpoint_data_list)) + { + next_checkpoint_data = _starpu_mpi_checkpoint_data_list_next(checkpoint_data); + // I delete all the data + size += _checkpoint_package_data_delete(checkpoint_data); + done++; + checkpoint_data = next_checkpoint_data; + } + STARPU_PTHREAD_MUTEX_UNLOCK(&package_package_mutex); + _STARPU_MPI_DEBUG(0, "cleared %d data from checkpoint database (%ld bytes).\n", done, size); + + return done; +} diff --git a/mpi/src/mpi_failure_tolerance/starpu_mpi_checkpoint_package.h b/mpi/src/mpi_failure_tolerance/starpu_mpi_checkpoint_package.h new file mode 100644 index 0000000..3899844 --- /dev/null +++ b/mpi/src/mpi_failure_tolerance/starpu_mpi_checkpoint_package.h @@ -0,0 +1,51 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef _STARPU_MPI_CHECKPOINT_PACKAGE_H +#define _STARPU_MPI_CHECKPOINT_PACKAGE_H + +#include +#include +#include + +#ifdef __cplusplus +extern "C" +{ +#endif + +/*TODO: This structure should be a hashtable accessible with these keys: + * CPid > CPinstance > Rank > tag */ + +LIST_TYPE(_starpu_mpi_checkpoint_data, + int cp_id; + int cp_inst; + int rank; + starpu_mpi_tag_t tag; + int type; + void* ptr; + int count; +); + +int checkpoint_package_init(); +int checkpoint_package_shutdown(); +int checkpoint_package_data_add(int cp_id, int cp_inst, int rank, starpu_mpi_tag_t tag, int type, void* ptr, int count); +int checkpoint_package_data_del(int cp_id, int cp_inst, int rank); + +#ifdef __cplusplus +} +#endif + +#endif //_STARPU_MPI_CHECKPOINT_PACKAGE_H diff --git a/mpi/src/mpi_failure_tolerance/starpu_mpi_checkpoint_template.c b/mpi/src/mpi_failure_tolerance/starpu_mpi_checkpoint_template.c new file mode 100644 index 0000000..a519b90 --- /dev/null +++ b/mpi/src/mpi_failure_tolerance/starpu_mpi_checkpoint_template.c @@ -0,0 +1,554 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +starpu_pthread_mutex_t cp_template_mutex; +starpu_pthread_mutex_t current_instance_mutex; +starpu_mpi_checkpoint_template_t cp_template_array[MAX_CP_TEMPLATE_NUMBER]; +int cp_template_array_size = 0; +static int my_rank; +static int comm_size; +static int current_instance; + +typedef int (*backup_of_fn)(int); + +int increment_current_instance() +{ + int _inst; + STARPU_PTHREAD_MUTEX_LOCK(¤t_instance_mutex); + _inst = ++current_instance; + STARPU_PTHREAD_MUTEX_UNLOCK(¤t_instance_mutex); + return _inst; +} + +int get_current_instance() +{ + int _inst; + STARPU_PTHREAD_MUTEX_LOCK(¤t_instance_mutex); + _inst = current_instance; + STARPU_PTHREAD_MUTEX_UNLOCK(¤t_instance_mutex); + return _inst; +} + +void checkpoint_template_lib_init(void) +{ + STARPU_PTHREAD_MUTEX_INIT(¤t_instance_mutex, NULL); + STARPU_PTHREAD_MUTEX_INIT(&cp_template_mutex, NULL); + starpu_mpi_comm_rank(MPI_COMM_WORLD, &_my_rank); + starpu_mpi_comm_size(MPI_COMM_WORLD, &comm_size); + current_instance = 0; +#ifdef STARPU_MPI_VERBOSE + _starpu_mpi_set_debug_level_max(1000); +#endif +} + +void checkpoint_template_lib_quit(void) +{ + int i; + for (i=0 ; imutex); + STARPU_ASSERT_MSG(!cp_template->frozen, "It is not possible to modify registered checkpoint template.\n"); + struct _starpu_mpi_checkpoint_template_item* item; + item = _starpu_mpi_checkpoint_template_item_create(type, ptr, count, backupped_by, backup_of, tag); + _starpu_mpi_checkpoint_template_item_list_push_back(&cp_template->list, item); + _checkpoint_template_add_to_backup_arrays(cp_template, backupped_by, backup_of); + _STARPU_MPI_DEBUG(5, "New checkpoint data entry %p (data:%p) has been added to cp_template with id:%d. (%s)\n", item, item->ptr, cp_template->cp_id, backupped_by == -1 ? "BACKUP_OF" : "BACKUPPED_BY"); + STARPU_PTHREAD_MUTEX_UNLOCK(&cp_template->mutex); + return 0; +} + +int starpu_mpi_checkpoint_template_create(starpu_mpi_checkpoint_template_t* cp_template, int cp_id, int cp_domain) +{ + *cp_template = _starpu_mpi_checkpoint_template_new(cp_id, cp_domain); + return 0; +} + +int _starpu_mpi_checkpoint_template_add_entry(starpu_mpi_checkpoint_template_t cp_template, int arg_type, va_list varg_list) +{ + void* ptr; + int count; + int backupped_by; + int data_rank; + starpu_mpi_tag_t tag; + backup_of_fn _backup_of; + int i; + + arg_type = arg_type & ~STARPU_COMMUTE; + + switch(arg_type) + { + case STARPU_R: + ptr = va_arg(varg_list, void*); + count = 1; + backupped_by = va_arg(varg_list, int); + data_rank = starpu_mpi_data_get_rank((starpu_data_handle_t)ptr); + if (_my_rank==data_rank) + { + return _starpu_mpi_checkpoint_template_add_data(cp_template, arg_type, ptr, count, backupped_by, -1, -1); + } + else if(_my_rank == backupped_by) + { + return _starpu_mpi_checkpoint_template_add_data(cp_template, arg_type, ptr, count, -1, data_rank, -1); + } + else + { + /* Since this data does not concern me (i.e. it is nor my data neither a data which I'm the back up) + * it is considered unnecessary to register in the CP */ + return 0; + } + break; + case STARPU_VALUE: + ptr = va_arg(varg_list, void*); + count = va_arg(varg_list, int); + tag = va_arg(varg_list, starpu_mpi_tag_t); + _backup_of = va_arg(varg_list, backup_of_fn); + /* I register the backup that will save this data */ + _starpu_mpi_checkpoint_template_add_data(cp_template, arg_type, ptr, count, _backup_of(_my_rank), -1, tag); + for (i=0 ; i<_my_rank ; i++) + { + if (_backup_of(i) == _my_rank) + { + /* I'm the back up of someone else for this data, I have to remember it */ + _starpu_mpi_checkpoint_template_add_data(cp_template, arg_type, ptr, count, -1, i, tag); + } + } + for (i=_my_rank+1 ; irank, arg->msg.checkpoint_id, arg->msg.checkpoint_instance); + checkpoint_package_data_del(arg->msg.checkpoint_id, arg->msg.checkpoint_instance, arg->rank); + // TODO free _args +} + + +int _starpu_mpi_checkpoint_post_cp_discard_recv(starpu_mpi_checkpoint_template_t cp_template) +{ + /* A new CP is submitted. We must post matching recv for the message warning the future checkpoint integrity (so + * I can discard old data from deprecated checkpoint). + * I will receive a msg if I have old CP data. + * TODO: For the message logging discard, I will receive message from the people I exchanged with since the last checkpoint. + * */ + struct _starpu_mpi_cp_discard_arg_cb* arg; + int i; + + for (i=0 ; ibackup_of_array_used_size ; i++) + { + _STARPU_MPI_MALLOC(arg, sizeof(struct _starpu_mpi_cp_discard_arg_cb)); + arg->rank = cp_template->backup_of_array[i]; + _STARPU_MPI_DEBUG(10, "Post DISCARD msg reception from %d\n", arg->rank); + + _starpu_mpi_ft_service_post_special_recv(_STARPU_MPI_TAG_CP_INFO); +// _ft_service_msg_irecv_cb(&arg->msg, sizeof(struct _starpu_mpi_cp_ack_msg), arg->rank, _STARPU_MPI_TAG_CP_INFO, +// MPI_COMM_WORLD, _cp_discard_message_recv_cb, (void *) arg); + } + return i; +} + +void _cp_discard_message_send_cb(void* _args) +{ + _STARPU_MPI_FT_STATS_SEND_FT_SERVICE_MSG(sizeof(struct _starpu_mpi_cp_ack_msg)); + free(_args); +} + +int _starpu_mpi_checkpoint_post_cp_discard_send(starpu_mpi_checkpoint_template_t cp_template, int cp_id, int cp_instance) +{ + /* The CP data replication has succeeded. I must send the message warning the checkpoint integrity (so + * they can discard old data from deprecated checkpoint). + * I will send to the ones if it has old CP data from me. + * TODO: For the message logging discard, I will send message to the people I exchanged with since the last checkpoint. + * */ + struct _starpu_mpi_cp_discard_arg_cb* arg; + int i; + + for (i=0 ; i < cp_template->backupped_by_array_used_size ; i++) + { + _STARPU_MPI_MALLOC(arg, sizeof(struct _starpu_mpi_cp_discard_arg_cb)); + arg->rank = cp_template->backupped_by_array[i]; + _STARPU_MPI_DEBUG(10, "Post CP DISCARD msg sending to %d\n", arg->rank); + arg->msg.discard=1; + arg->msg.validation=0; + arg->msg.checkpoint_id = cp_id; + arg->msg.checkpoint_instance = cp_instance; + _starpu_mpi_ft_service_post_send(&arg->msg, sizeof(struct _starpu_mpi_cp_ack_msg), arg->rank, + _STARPU_MPI_TAG_CP_INFO, MPI_COMM_WORLD, _cp_discard_message_send_cb, (void *) arg); + } + + return 0; +} + +starpu_mpi_checkpoint_template_t _starpu_mpi_get_checkpoint_template_by_id(int checkpoint_id) +{ + int i; + STARPU_PTHREAD_MUTEX_LOCK(&cp_template_mutex); + for (i=0 ; i < cp_template_array_size ; i++) + { +// STARPU_PTHREAD_MUTEX_LOCK(&cp_template_array[i]->mutex); + if (cp_template_array[i]->cp_id == checkpoint_id) + { +// STARPU_PTHREAD_MUTEX_UNLOCK(&cp_template_array[i]->mutex); + STARPU_PTHREAD_MUTEX_UNLOCK(&cp_template_mutex); + return cp_template_array[i]; + } +// STARPU_PTHREAD_MUTEX_UNLOCK(&cp_template_array[i]->mutex); + } + STARPU_PTHREAD_MUTEX_UNLOCK(&cp_template_mutex); + return NULL; +} + +//int _starpu_mpi_checkpoint_post_cp_discard_recv(starpu_mpi_checkpoint_template_t cp_template) +//{ +// /* A new CP is submitted. We must post matching recv for the message warning the future checkpoint integrity (so +// * I can tag the data as CP validated, and discard old data from deprecated checkpoint). +// * I will receive a msg if I have old CP data, or if I am the back up for a node into the upcoming Checkpoint. +// * * Here the union of the different list is processed to post message reception only once. +// * TODO: For the message logging discard, I will receive message from the people I exchanged with since the last checkpoint. +// * */ +// struct _starpu_mpi_cp_discard_arg_cb* arg; +// int i, j, flag; +// starpu_mpi_checkpoint_template_t old_template; +// for (i=0 ; ibackup_of_array_used_size ; i++) +// { +// STARPU_MPI_MALLOC(arg, sizeof(struct _starpu_mpi_cp_discard_arg_cb)); +// arg->rank = cp_template->backup_of_array[i]; +// _STARPU_MPI_DEBUG(10, "Posting DISCARD msg reception from %d\n", arg->rank); +// _ft_service_msg_irecv_cb(&arg->msg, sizeof(struct _starpu_mpi_cp_ack_msg), arg->rank, _STARPU_MPI_TAG_CP_INFO, MPI_COMM_WORLD, _cp_discard_message_recv_cb, (void*)arg); +// } +// if (last_valid_checkpoint.checkpoint_id == -1) +// { +// return -1; +// } +// else if (last_valid_checkpoint.checkpoint_id!=cp_template->cp_id) +// { +// old_template = _starpu_mpi_get_checkpoint_template_by_id(last_valid_checkpoint.checkpoint_id); +// for (i=0 ; ibackup_of_array_used_size ; i++) +// { +// flag=0; +// for(j=0 ; jbackup_of_array_used_size ; j++) +// { +// if (cp_template->backup_of_array[j] == old_template->backup_of_array[i]) +// { +// flag = 1; +// break; +// } +// } +// if (flag==0) +// { +// STARPU_MPI_MALLOC(arg, sizeof(struct _starpu_mpi_cp_discard_arg_cb)); +// arg->rank = old_template->backup_of_array[i]; +// _STARPU_MPI_DEBUG(10, "Posting DISCARD msg reception from %d - LAST VALIDATED CP\n", arg->rank); +// _ft_service_msg_irecv_cb(&arg->msg, sizeof(struct _starpu_mpi_cp_ack_msg), arg->rank, _STARPU_MPI_TAG_CP_INFO, MPI_COMM_WORLD, _cp_discard_message_recv_cb, (void*)arg); +// } +// } +// } +// return 0; +//} + +//int _starpu_mpi_checkpoint_post_cp_discard_send(starpu_mpi_checkpoint_template_t cp_template, int cp_id, int cp_instance) +//{ +// /* The CP data replication has succeeded. I must send the message warning the future checkpoint integrity (so +// * they can tag the data as CP validated, and discard old data from deprecated checkpoint). +// * I will send to one if it has old CP data from me, or if it is my backup for a data into the just succeeded Checkpoint. +// * * Here the union of the different list is processed to send message only once. +// * TODO: For the message logging discard, I will send message to the people I exchanged with since the last checkpoint. +// * */ +// struct _starpu_mpi_cp_discard_arg_cb* arg; +// int i, j, flag; +// starpu_mpi_checkpoint_template_t old_template; +// for (i=0 ; ibackupped_by_array_used_size ; i++) +// { +// STARPU_MPI_MALLOC(arg, sizeof(struct _starpu_mpi_cp_discard_arg_cb)); +// arg->rank = cp_template->backupped_by_array[i]; +// _STARPU_MPI_DEBUG(10, "Sending DISCARD msg reception to %d\n", arg->rank); +// arg->msg.checkpoint_id = cp_id; +// arg->msg.checkpoint_instance = cp_instance; +// _ft_service_msg_isend_cb(&arg->msg, sizeof(struct _starpu_mpi_cp_ack_msg), arg->rank, _STARPU_MPI_TAG_CP_INFO, MPI_COMM_WORLD, _cp_discard_message_send_cb, (void*)arg); +// } +// if (last_valid_checkpoint.checkpoint_id == -1) +// { +// return -1; +// } +// else if (last_valid_checkpoint.checkpoint_id!=cp_template->cp_id) +// { +// old_template = _starpu_mpi_get_checkpoint_template_by_id(last_valid_checkpoint.checkpoint_id); +// for (i=0 ; ibackupped_by_array_used_size ; i++) +// { +// flag=0; +// for(j=0 ; jbackupped_by_array_used_size ; j++) +// { +// if (cp_template->backupped_by_array[j] == old_template->backupped_by_array[i]) +// { +// flag = 1; +// break; +// } +// } +// if (flag==0) +// { +// STARPU_MPI_MALLOC(arg, sizeof(struct _starpu_mpi_cp_discard_arg_cb)); +// arg->rank = old_template->backupped_by_array[i]; +// _STARPU_MPI_DEBUG(10, "Sending DISCARD msg to %d - OLD CP\n", arg->rank); +// arg->msg.checkpoint_id = cp_id; +// arg->msg.checkpoint_instance = cp_instance; +// _ft_service_msg_isend_cb(&arg->msg, sizeof(struct _starpu_mpi_cp_ack_msg), arg->rank, _STARPU_MPI_TAG_CP_INFO, MPI_COMM_WORLD, _cp_discard_message_send_cb, (void*)arg); +// } +// } +// } +// return 0; +//} + +int _starpu_mpi_checkpoint_template_freeze(starpu_mpi_checkpoint_template_t cp_template) +{ +// char str[256]; + int i; + STARPU_PTHREAD_MUTEX_LOCK(&cp_template->mutex); + _STARPU_MPI_DEBUG(2, "Start freezing checkpoint id:%d\n", cp_template->cp_id); + cp_template->frozen = 1; + cp_template->message_to_send_number = 0; + cp_template->size = _starpu_mpi_checkpoint_template_item_list_size(&cp_template->list); + + struct _starpu_mpi_checkpoint_template_item* item = _starpu_mpi_checkpoint_template_get_first_data(cp_template); + + while (item != _starpu_mpi_checkpoint_template_end(cp_template)) + { + if (item->backup_of==-1 && item->backupped_by!=-1) + { + cp_template->message_to_send_number++; + } + item = _starpu_mpi_checkpoint_template_get_next_data(cp_template, item); + } +// sprintf(str, "backupped by Array maxsize:%d - currentsize:%d - ", cp_template->backupped_by_array_max_size, cp_template->backupped_by_array_used_size); +// for (int i=0 ; ibackupped_by_array_used_size ; i++) +// { +// sprintf(str,"%s%d ", str, cp_template->backupped_by_array[i]); +// } +// fprintf(stderr, "%s\n", str); +// +// sprintf(str,"backup of Array maxsize:%d - currentsize:%d - ", cp_template->backup_of_array_max_size, cp_template->backup_of_array_used_size); +// for (int i=0 ; ibackup_of_array_used_size ; i++) +// { +// sprintf(str,"%s%d ", str, cp_template->backup_of_array[i]); +// } +// fprintf(stderr, "%s\n", str); + + STARPU_PTHREAD_MUTEX_UNLOCK(&cp_template->mutex); + + STARPU_PTHREAD_MUTEX_LOCK(&cp_template_mutex); + for (i=0 ; i < cp_template_array_size ; i++) + { + STARPU_ASSERT_MSG(cp_template_array[i]->cp_id != cp_template->cp_id, "A checkpoint with id %d has already been registered.\n", cp_template->cp_id); + } + cp_template_array[cp_template_array_size] = cp_template; + cp_template_array_size++; + STARPU_PTHREAD_MUTEX_UNLOCK(&cp_template_mutex); + + _STARPU_MPI_DEBUG(2, "Checkpoint id:%d is frozen and registered.\n", cp_template->cp_id); + return cp_template->size; +} + +int _starpu_mpi_checkpoint_template_register(starpu_mpi_checkpoint_template_t* cp_template, int cp_id, int cp_domain, va_list varg_list) +{ + int arg_type; + + starpu_mpi_checkpoint_template_t _cp_template = _starpu_mpi_checkpoint_template_new(cp_id, cp_domain); + + va_list varg_list_copy; + va_copy(varg_list_copy, varg_list); + + while ((arg_type = va_arg(varg_list_copy, int)) != 0) + { + _starpu_mpi_checkpoint_template_add_entry(_cp_template, arg_type, varg_list_copy); + } + va_end(varg_list_copy); + + _starpu_mpi_checkpoint_template_freeze(_cp_template); + + *cp_template = _cp_template; + + return 0; +} + +int starpu_mpi_checkpoint_template_freeze(starpu_mpi_checkpoint_template_t* cp_template) +{ + return _starpu_mpi_checkpoint_template_freeze(*cp_template); +} + +int starpu_mpi_checkpoint_template_register(starpu_mpi_checkpoint_template_t* cp_template, int cp_id, int cp_domain, ...) +{ + va_list varg_list; + va_start(varg_list, cp_domain); + int ret = _starpu_mpi_checkpoint_template_register(cp_template, cp_id, cp_domain, varg_list); + va_end(varg_list); + return ret; +} + +int starpu_mpi_checkpoint_template_add_entry(starpu_mpi_checkpoint_template_t* cp_template, ...) +{ + va_list varg_list; + int arg_type; + int ret; + va_start(varg_list, cp_template); + arg_type = va_arg(varg_list, int); + STARPU_ASSERT_MSG(arg_type!=STARPU_NONE, "Unhandled arg_type: STARPU_NONE(0).\n"); + ret = _starpu_mpi_checkpoint_template_add_entry(*cp_template, arg_type, varg_list); + va_end(varg_list); + return ret; +} + +int _checkpoint_template_digest_ack_reception(int checkpoint_id, int checkpoint_instance) +{ + int remaining_ack_messages; + struct _starpu_mpi_checkpoint_tracker* tracker, *tracker1; + starpu_mpi_checkpoint_template_t cp_template = _starpu_mpi_get_checkpoint_template_by_id(checkpoint_id); + STARPU_PTHREAD_MUTEX_LOCK(&cp_template_mutex); + _STARPU_MPI_DEBUG(20, "Digesting ack recv: id=%d, inst=%d\n", checkpoint_id, checkpoint_instance); + + tracker = _starpu_mpi_checkpoint_tracker_update(cp_template, checkpoint_id, cp_template->checkpoint_domain, checkpoint_instance); + remaining_ack_messages = _starpu_mpi_checkpoint_check_tracker(tracker); + + if (remaining_ack_messages>0) + { + _STARPU_MPI_DEBUG(20, "The CP (id:%d - inst:%d) found, remaining ack msg awaited:%d.\n", checkpoint_id, + checkpoint_instance, remaining_ack_messages); + } + else if (remaining_ack_messages==0) + { + _STARPU_MPI_DEBUG(0, "The CP (id:%d - inst:%d) has been successfully saved and acknowledged.\n", checkpoint_id, checkpoint_instance); + tracker = _starpu_mpi_checkpoint_tracker_validate_instance(tracker); + _STARPU_MPI_TRACE_CHECKPOINT_END(checkpoint_instance, cp_template->checkpoint_domain); + if (tracker==NULL) + { + // TODO:should warn some people, because the msg logging is not implemented(this precise nodes to contact) + _STARPU_MPI_DEBUG(0, "No previous checkpoint to discard\n"); + } + else + { + if (tracker->old) + { + tracker1 = _starpu_mpi_checkpoint_tracker_get_last_valid_tracker(tracker->cp_domain); + _starpu_mpi_checkpoint_post_cp_discard_send(tracker->cp_template, tracker1->cp_id, tracker1->cp_inst); + } + else + { + _starpu_mpi_checkpoint_post_cp_discard_send(tracker->cp_template, checkpoint_id, checkpoint_instance); + } + } + } + else if (remaining_ack_messages==-1) + { + STARPU_ABORT_MSG("Inst (id:%d - inst:%d) is already valid. should not have received an ack msg.\n", checkpoint_id, checkpoint_instance); + } + else + { + STARPU_ABORT_MSG("Critical error, can not identify %d as remaining messages\n", remaining_ack_messages); + } + + _STARPU_MPI_DEBUG(20, "Digested\n"); + STARPU_PTHREAD_MUTEX_UNLOCK(&cp_template_mutex); + return 0; +} + +void _checkpoint_template_digest_ack_reception_cb(void* _arg) +{ + struct _starpu_mpi_cp_ack_arg_cb* arg = (struct _starpu_mpi_cp_ack_arg_cb*) _arg; + _checkpoint_template_digest_ack_reception(arg->msg.checkpoint_id, arg->msg.checkpoint_instance); +} + +// For test purpose +int starpu_mpi_checkpoint_template_print(starpu_mpi_checkpoint_template_t cp_template) +{ +// int val; + int i = 0; + struct _starpu_mpi_checkpoint_template_item* item = _starpu_mpi_checkpoint_template_get_first_data(cp_template); + + while (item != _starpu_mpi_checkpoint_template_end(cp_template)) + { + fprintf(stderr,"Item %2d: ", i); + if (item->type == STARPU_VALUE) + { +// fprintf(stderr, "STARPU_VALUE - Value=%d - backupof:%d - backupedby:%d\n", (*(int *)(item->ptr)), item->backup_of, item->backupped_by); + fprintf(stderr, "STARPU_VALUE - pointer:%p - backupof:%d - backupedby:%d\n", item->ptr, item->backup_of, item->backupped_by); + } + else if (item->type == STARPU_R) + { +// val = *(int*)starpu_data_handle_to_pointer(*(starpu_data_handle_t*)(item->ptr), 0); +// fprintf(stderr, "STARPU_R - Value=%d - backupof:%d - backupedby:%d\n", val, item->backup_of, item->backupped_by); + fprintf(stderr, "STARPU_R - pointer:%p - backupof:%d - backupedby:%d\n", item->ptr, item->backup_of, item->backupped_by); + } + else if (item->type == STARPU_DATA_ARRAY) + { +// fprintf(stderr, "STARPU_DATA_ARRAY - Multiple values: %d", *(int*)starpu_data_handle_to_pointer(((starpu_data_handle_t)item->ptr), 0)); +// +// for (int j=1 ; jcount, 5) ; j++) +// { +// fprintf(stderr, ", %d", *(int*)starpu_data_handle_to_pointer(((starpu_data_handle_t*)item->ptr)[j], 0)); //j*sizeof(starpu_data_handle_t) +// } +// fprintf(stderr, "...\n"); + } + else + { + printf("Unrecognized type.\n"); + } + + item = _starpu_mpi_checkpoint_template_get_next_data(cp_template, item); + i++; + } + return 0; +} diff --git a/mpi/src/mpi_failure_tolerance/starpu_mpi_checkpoint_template.h b/mpi/src/mpi_failure_tolerance/starpu_mpi_checkpoint_template.h new file mode 100644 index 0000000..c3e97fb --- /dev/null +++ b/mpi/src/mpi_failure_tolerance/starpu_mpi_checkpoint_template.h @@ -0,0 +1,227 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef _STARPU_MPI_CHECKPOINT_TEMPLATE_H +#define _STARPU_MPI_CHECKPOINT_TEMPLATE_H + +#include +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" +{ +#endif + +#define MAX_CP_TEMPLATE_NUMBER 32 // Arbitrary limit + +#define _CHECKPOINT_TEMPLATE_BACKUPED_RANK_ARRAY_DEFAULT_SIZE 2 + +extern starpu_pthread_mutex_t cp_template_mutex; +extern int cp_template_array_size; + +extern starpu_mpi_checkpoint_template_t cp_template_array[MAX_CP_TEMPLATE_NUMBER]; + +int increment_current_instance(); +int get_current_instance(); + +void checkpoint_template_lib_init(void); + +void checkpoint_template_lib_quit(void); + +int _checkpoint_template_digest_ack_reception(int checkpoint_id, int checkpoint_instance); +void _checkpoint_template_digest_ack_reception_cb(void* _arg); +void _cp_discard_message_recv_cb(void* _args); + +starpu_mpi_checkpoint_template_t _starpu_mpi_get_checkpoint_template_by_id(int checkpoint_id); +int _starpu_mpi_checkpoint_post_cp_discard_recv(starpu_mpi_checkpoint_template_t cp_template); + +int _starpu_mpi_checkpoint_template_register(starpu_mpi_checkpoint_template_t *cp_template, int cp_id, int cp_domain, va_list varg_list); + +LIST_TYPE(_starpu_mpi_checkpoint_template_tracking_inst, + int cp_id; + int cp_inst; + int cp_domain; + starpu_mpi_checkpoint_template_t cp_template; + int ack_msg_count; + int valid:1; +) + +LIST_TYPE(_starpu_mpi_checkpoint_template_item, + int type; + void *ptr; + size_t count; + int backupped_by; + int backup_of; + starpu_mpi_tag_t tag; +) + +struct _starpu_mpi_checkpoint_template +{ + struct _starpu_mpi_checkpoint_template_item_list list; + int size; + int cp_id; + int checkpoint_domain; + int message_to_send_number; + int frozen; + starpu_pthread_mutex_t mutex; + int *backup_of_array; + int backup_of_array_max_size; + int backup_of_array_used_size; + int *backupped_by_array; + int backupped_by_array_max_size; + int backupped_by_array_used_size; + +}; + +static inline int checkpoint_template_array_realloc(int** array, int* max_size, int growth_factor) +{ + // fprintf(stderr, "old array %p - first elem %d\n", *array, *array[0]); + // fprintf(stderr, "Newsize=%d\n", growth_factor*(*max_size)); + _STARPU_MPI_REALLOC(*array, growth_factor*(*max_size)*sizeof(int)); + // fprintf(stderr, "Newarray=%p\n", *array); + *max_size = growth_factor*(*max_size); + return *max_size; +} + +static inline int checkpoint_template_backup_of_array_realloc_double(struct _starpu_mpi_checkpoint_template* checkpoint_template) +{ + return checkpoint_template_array_realloc(&checkpoint_template->backup_of_array, &checkpoint_template->backup_of_array_max_size, 2); +} + +static inline int checkpoint_template_backupped_by_array_realloc_double(struct _starpu_mpi_checkpoint_template* checkpoint_template) +{ + return checkpoint_template_array_realloc(&checkpoint_template->backupped_by_array, &checkpoint_template->backupped_by_array_max_size, 2); +} + +static inline struct _starpu_mpi_checkpoint_template_item* _starpu_mpi_checkpoint_template_item_create(int type, void* ptr, int count, int backupped_by, int backup_of, starpu_mpi_tag_t tag) +{ + struct _starpu_mpi_checkpoint_template_item* item; + _STARPU_MPI_CALLOC(item, 1, sizeof(struct _starpu_mpi_checkpoint_template_item)); + item->type = type; + item->ptr = ptr; + item->count = count; + item->backupped_by = backupped_by; + item->backup_of = backup_of; + item->tag = tag; + + return item; +} + +static inline starpu_mpi_checkpoint_template_t _starpu_mpi_checkpoint_template_new(int cp_id, int cp_domain) +{ + starpu_mpi_checkpoint_template_t _cp_template; + _STARPU_MPI_CALLOC(_cp_template, 1, sizeof(struct _starpu_mpi_checkpoint_template)); + _cp_template->cp_id = cp_id; + _cp_template->checkpoint_domain = cp_domain; + _cp_template->backup_of_array_max_size = _CHECKPOINT_TEMPLATE_BACKUPED_RANK_ARRAY_DEFAULT_SIZE; + _STARPU_MPI_MALLOC(_cp_template->backup_of_array, _CHECKPOINT_TEMPLATE_BACKUPED_RANK_ARRAY_DEFAULT_SIZE); + _cp_template->backup_of_array[0] = -1; + _cp_template->backup_of_array_used_size = 0; + _cp_template->backupped_by_array_max_size = _CHECKPOINT_TEMPLATE_BACKUPED_RANK_ARRAY_DEFAULT_SIZE; + _STARPU_MPI_MALLOC(_cp_template->backupped_by_array, _CHECKPOINT_TEMPLATE_BACKUPED_RANK_ARRAY_DEFAULT_SIZE); + _cp_template->backupped_by_array[0] = -1; + _cp_template->backupped_by_array_used_size = 0; + STARPU_PTHREAD_MUTEX_INIT(&_cp_template->mutex, NULL); + return _cp_template; +} + +static inline int _checkpoint_template_add_to_backup_arrays(starpu_mpi_checkpoint_template_t cp_template, int backupped_by, int backup_of) +{ + int i; + if (backup_of == -1) + { + for (i = 0; i < cp_template->backupped_by_array_used_size; i++) + { + if (backupped_by == cp_template->backupped_by_array[i]) + { + return 0; + } + } + if (cp_template->backupped_by_array_used_size + 1 == cp_template->backupped_by_array_max_size) + { + checkpoint_template_backupped_by_array_realloc_double(cp_template); + } + cp_template->backupped_by_array[cp_template->backupped_by_array_used_size] = backupped_by; + cp_template->backupped_by_array_used_size++; + cp_template->backupped_by_array[cp_template->backupped_by_array_used_size] = -1; + return backupped_by; + } + else if (backupped_by == -1) + { + for (i = 0; i < cp_template->backup_of_array_used_size; i++) + { + if (backup_of == cp_template->backup_of_array[i]) + { + return 0; + } + } + if (cp_template->backup_of_array_used_size + 1 == cp_template->backup_of_array_max_size) + { + checkpoint_template_backup_of_array_realloc_double(cp_template); + } + cp_template->backup_of_array[cp_template->backup_of_array_used_size] = backup_of; + cp_template->backup_of_array_used_size++; + cp_template->backup_of_array[cp_template->backup_of_array_used_size] = -1; + return backup_of; + } + else + { + _STARPU_DISP("[warning] Checkpoint template item does not refer any backup information. This should not happen.\n"); + } + return -1; +} + +static inline struct _starpu_mpi_checkpoint_template_item* _starpu_mpi_checkpoint_template_get_first_data(starpu_mpi_checkpoint_template_t template) +{ + return _starpu_mpi_checkpoint_template_item_list_front(&template->list); +} + +static inline struct _starpu_mpi_checkpoint_template_item* _starpu_mpi_checkpoint_template_get_next_data(starpu_mpi_checkpoint_template_t template STARPU_ATTRIBUTE_UNUSED, struct _starpu_mpi_checkpoint_template_item* ref_data) +{ + return _starpu_mpi_checkpoint_template_item_list_next(ref_data); +} + +static inline struct _starpu_mpi_checkpoint_template_item* _starpu_mpi_checkpoint_template_end(starpu_mpi_checkpoint_template_t template STARPU_ATTRIBUTE_UNUSED) +{ + return NULL; +} + +static inline int _starpu_checkpoint_template_free(starpu_mpi_checkpoint_template_t cp_template) +{ + struct _starpu_mpi_checkpoint_template_item* item; + struct _starpu_mpi_checkpoint_template_item* next_item; + STARPU_PTHREAD_MUTEX_LOCK(&cp_template->mutex); + item = _starpu_mpi_checkpoint_template_get_first_data(cp_template); + while (item != _starpu_mpi_checkpoint_template_end(cp_template)) + { + next_item = _starpu_mpi_checkpoint_template_get_next_data(cp_template, item); + free(item); + item = next_item; + } + STARPU_PTHREAD_MUTEX_UNLOCK(&cp_template->mutex); + STARPU_PTHREAD_MUTEX_DESTROY(&cp_template->mutex); + free(cp_template); + return 0; +} + +#ifdef __cplusplus +} +#endif + +#endif //_STARPU_MPI_CHECKPOINT_TEMPLATE_H diff --git a/mpi/src/mpi_failure_tolerance/starpu_mpi_checkpoint_tracker.c b/mpi/src/mpi_failure_tolerance/starpu_mpi_checkpoint_tracker.c new file mode 100644 index 0000000..c9cfee9 --- /dev/null +++ b/mpi/src/mpi_failure_tolerance/starpu_mpi_checkpoint_tracker.c @@ -0,0 +1,250 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include "starpu_mpi_checkpoint_template.h" + +struct _starpu_mpi_checkpoint_domain_tracker_index_list* domain_tracker_list; +starpu_pthread_mutex_t tracker_mutex; + +struct _starpu_mpi_checkpoint_domain_tracker_entry +{ + UT_hash_handle hh; + int instance; + struct _starpu_mpi_checkpoint_tracker tracker; +}; + +LIST_TYPE(_starpu_mpi_checkpoint_domain_tracker_index, + int domain; + struct _starpu_mpi_checkpoint_tracker* last_valid_instance; + struct _starpu_mpi_checkpoint_domain_tracker_entry* tracked_inst_hash_table; +) + +static inline void _starpu_mpi_checkpoint_domain_tracker_index_init(struct _starpu_mpi_checkpoint_domain_tracker_index* index) +{ + index->domain = -1; + index->tracked_inst_hash_table = NULL; + index->last_valid_instance = NULL; +} + +static inline void _starpu_mpi_checkpoint_domain_tracker_entry_init(struct _starpu_mpi_checkpoint_domain_tracker_entry* entry) +{ + entry->instance = -1; + entry->tracker.cp_id = -1; + entry->tracker.cp_inst = -1; + entry->tracker.cp_domain = -1; + entry->tracker.cp_template = NULL; + entry->tracker.ack_msg_count = 0; + entry->tracker.first_msg_sent_flag = 0; + entry->tracker.valid = 0; + entry->tracker.old = 0; +} + +static inline struct _starpu_mpi_checkpoint_domain_tracker_index* get_domain_tracker_index(int domain) +{ + struct _starpu_mpi_checkpoint_domain_tracker_index* index; + for (index = _starpu_mpi_checkpoint_domain_tracker_index_list_begin(domain_tracker_list) ; + index != _starpu_mpi_checkpoint_domain_tracker_index_list_end(domain_tracker_list) ; + index = _starpu_mpi_checkpoint_domain_tracker_index_list_next(index)) + { + if (index->domain == domain) + { + return index; + } + } + return NULL; +} + +static inline struct _starpu_mpi_checkpoint_domain_tracker_index* add_domain_tracker_index(int domain) +{ + struct _starpu_mpi_checkpoint_domain_tracker_index* index; + _STARPU_MPI_MALLOC(index, sizeof(struct _starpu_mpi_checkpoint_domain_tracker_index)); + _starpu_mpi_checkpoint_domain_tracker_index_init(index); + index->domain = domain; + _starpu_mpi_checkpoint_domain_tracker_index_list_push_back(domain_tracker_list, index); + return index; +} + +static inline struct _starpu_mpi_checkpoint_domain_tracker_entry* get_tracker_entry(struct _starpu_mpi_checkpoint_domain_tracker_index* index, int instance) +{ + struct _starpu_mpi_checkpoint_domain_tracker_entry* entry = NULL; + if (index->tracked_inst_hash_table) + { + HASH_FIND_INT(index->tracked_inst_hash_table, &instance, entry); + } + return entry; +} + +static inline struct _starpu_mpi_checkpoint_domain_tracker_entry* add_tracker_entry(struct _starpu_mpi_checkpoint_domain_tracker_index* index, int cp_id, int cp_inst, int cp_domain, starpu_mpi_checkpoint_template_t cp_template) +{ + struct _starpu_mpi_checkpoint_domain_tracker_entry* entry; + _STARPU_MPI_MALLOC(entry, sizeof(struct _starpu_mpi_checkpoint_domain_tracker_entry)); + _starpu_mpi_checkpoint_domain_tracker_entry_init(entry); + entry->instance = cp_inst; + entry->tracker.cp_id = cp_id; + entry->tracker.cp_inst = cp_inst; + entry->tracker.cp_domain = cp_domain; + entry->tracker.cp_template = cp_template; + entry->tracker.ack_msg_count = cp_template->message_to_send_number; + HASH_ADD_INT(index->tracked_inst_hash_table, instance, entry); + return entry; +} + +static inline int _clear_domain_tracker_index(struct _starpu_mpi_checkpoint_domain_tracker_index* index) +{ + struct _starpu_mpi_checkpoint_domain_tracker_entry* entry, *tmp; + HASH_ITER(hh, index->tracked_inst_hash_table, entry, tmp) + { + HASH_DEL(index->tracked_inst_hash_table, entry); + free(entry); + } + return 0; +} + +static inline int _domain_tracker_delete_all() +{ + struct _starpu_mpi_checkpoint_domain_tracker_index* temp_index; + struct _starpu_mpi_checkpoint_domain_tracker_index* index = _starpu_mpi_checkpoint_domain_tracker_index_list_begin(domain_tracker_list) ; + while (index != _starpu_mpi_checkpoint_domain_tracker_index_list_end(domain_tracker_list)) + { + temp_index = _starpu_mpi_checkpoint_domain_tracker_index_list_next(index); + _clear_domain_tracker_index(index); + _starpu_mpi_checkpoint_domain_tracker_index_list_erase(domain_tracker_list, index); + free(index); + index = temp_index; + } + return 0; +} + +int _starpu_mpi_checkpoint_tracker_init() +{ + domain_tracker_list = _starpu_mpi_checkpoint_domain_tracker_index_list_new(); + STARPU_PTHREAD_MUTEX_INIT(&tracker_mutex, NULL); + return 0; +} + +int _starpu_mpi_checkpoint_tracker_shutdown() +{ + _domain_tracker_delete_all(); + STARPU_PTHREAD_MUTEX_DESTROY(&tracker_mutex); + free(domain_tracker_list); + return 0; +} + +struct _starpu_mpi_checkpoint_tracker* _starpu_mpi_checkpoint_template_get_tracking_inst_by_id_inst(int cp_domain, int cp_inst) +{ + STARPU_PTHREAD_MUTEX_LOCK(&tracker_mutex); + struct _starpu_mpi_checkpoint_domain_tracker_index *index = get_domain_tracker_index(cp_domain); + if (NULL == index) + { + STARPU_PTHREAD_MUTEX_UNLOCK(&tracker_mutex); + return NULL; + } + struct _starpu_mpi_checkpoint_domain_tracker_entry *entry = get_tracker_entry(index, cp_inst); + if (NULL == entry) + { + STARPU_PTHREAD_MUTEX_UNLOCK(&tracker_mutex); + return NULL; + } + STARPU_PTHREAD_MUTEX_UNLOCK(&tracker_mutex); + return &entry->tracker; +} + +struct _starpu_mpi_checkpoint_tracker* _starpu_mpi_checkpoint_template_create_instance_tracker(starpu_mpi_checkpoint_template_t cp_template, int cp_id, int cp_domain, int cp_inst) +{ + STARPU_PTHREAD_MUTEX_LOCK(&tracker_mutex); + struct _starpu_mpi_checkpoint_domain_tracker_entry *entry; + struct _starpu_mpi_checkpoint_domain_tracker_index *index = get_domain_tracker_index(cp_domain); + if (NULL == index) + index = add_domain_tracker_index(cp_domain); + entry = get_tracker_entry(index, cp_inst); + if (NULL == entry) + entry = add_tracker_entry(index, cp_id, cp_inst, cp_domain, cp_template); + STARPU_PTHREAD_MUTEX_UNLOCK(&tracker_mutex); + return &entry->tracker; +} + +struct _starpu_mpi_checkpoint_tracker* _starpu_mpi_checkpoint_tracker_update(starpu_mpi_checkpoint_template_t cp_template, int cp_id, int cp_domain, int cp_instance) +{ + STARPU_PTHREAD_MUTEX_LOCK(&tracker_mutex); + struct _starpu_mpi_checkpoint_domain_tracker_entry* entry; + struct _starpu_mpi_checkpoint_domain_tracker_index* index = get_domain_tracker_index(cp_domain); + if (NULL == index) + index = add_domain_tracker_index(cp_domain); + entry = get_tracker_entry(index, cp_instance); + if (NULL == entry) + { + STARPU_ASSERT_MSG(cp_template!=NULL, "Couldn't find a CP template with the cpid:%d\n", cp_id); + entry = add_tracker_entry(index, cp_id, cp_instance, cp_domain, cp_template); + } + STARPU_ASSERT_MSG(entry->tracker.ack_msg_count>0, "Error. Trying to count ack message while all have already been received. id:%d, inst:%d, remaining_ack_messages:%d\n", entry->tracker.cp_id, entry->instance, entry->tracker.ack_msg_count); + entry->tracker.ack_msg_count--; + STARPU_PTHREAD_MUTEX_UNLOCK(&tracker_mutex); + return &entry->tracker; +} + +int _starpu_mpi_checkpoint_check_tracker(struct _starpu_mpi_checkpoint_tracker* tracker) +{ + if (tracker->valid) + { + return -1; + } + return tracker->ack_msg_count; +} + +struct _starpu_mpi_checkpoint_tracker* _starpu_mpi_checkpoint_tracker_validate_instance(struct _starpu_mpi_checkpoint_tracker* tracker) +{ + STARPU_PTHREAD_MUTEX_LOCK(&tracker_mutex); + // Here we validate a checkpoint and return the old cp info that must be discarded + struct _starpu_mpi_checkpoint_tracker* temp_tracker; + struct _starpu_mpi_checkpoint_domain_tracker_index* index = get_domain_tracker_index(tracker->cp_domain); + if (NULL == index->last_valid_instance || tracker->cp_inst > index->last_valid_instance->cp_inst) + { + _STARPU_MPI_DEBUG(0, "The CP (id:%d - dom:%d - inst:%d) has been fully acknowledged, and is now the latest valid CP for the domain.\n", tracker->cp_id, tracker->cp_domain, tracker->cp_inst); + // The checkpoint to validate is the newest of the domain. Update the latest CP and return the old "latest" + temp_tracker = index->last_valid_instance; + index->last_valid_instance = tracker; + tracker->valid = 1; + if (STARPU_LIKELY(temp_tracker!=NULL)) + { + temp_tracker->old = 1; + } + STARPU_PTHREAD_MUTEX_UNLOCK(&tracker_mutex); + return temp_tracker; + } + else + { + _STARPU_MPI_DEBUG(0, "The CP (id:%d - dom:%d - inst:%d) has been fully acknowledged, while a more recent one (id:%d - dom:%d - inst:%d) is already validated.\n", + tracker->cp_id, tracker->cp_domain, tracker->cp_inst, + index->last_valid_instance->cp_id, index->last_valid_instance->cp_domain, index->last_valid_instance->cp_inst); + // The checkpoint to validate is older than the latest validated, just return it to discard it + tracker->valid = 1; + tracker->old =1; + STARPU_PTHREAD_MUTEX_UNLOCK(&tracker_mutex); + return tracker; + } +} + +struct _starpu_mpi_checkpoint_tracker* _starpu_mpi_checkpoint_tracker_get_last_valid_tracker(int domain) +{ + STARPU_PTHREAD_MUTEX_LOCK(&tracker_mutex); + struct _starpu_mpi_checkpoint_domain_tracker_index* index = get_domain_tracker_index(domain); + STARPU_PTHREAD_MUTEX_UNLOCK(&tracker_mutex); + return index->last_valid_instance; +} diff --git a/mpi/src/mpi_failure_tolerance/starpu_mpi_checkpoint_tracker.h b/mpi/src/mpi_failure_tolerance/starpu_mpi_checkpoint_tracker.h new file mode 100644 index 0000000..4bb2120 --- /dev/null +++ b/mpi/src/mpi_failure_tolerance/starpu_mpi_checkpoint_tracker.h @@ -0,0 +1,50 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef FT_STARPU_STARPU_MPI_CHECKPOINT_TRACKER_H +#define FT_STARPU_STARPU_MPI_CHECKPOINT_TRACKER_H + +#ifdef __cplusplus +extern "C" +{ +#endif + +struct _starpu_mpi_checkpoint_tracker +{ + int cp_id; + int cp_inst; + int cp_domain; + starpu_mpi_checkpoint_template_t cp_template; + int ack_msg_count; + int first_msg_sent_flag; + int old:1; + int valid: 1; +}; + +int _starpu_mpi_checkpoint_tracker_init(); +int _starpu_mpi_checkpoint_tracker_shutdown(); +struct _starpu_mpi_checkpoint_tracker* _starpu_mpi_checkpoint_template_get_tracking_inst_by_id_inst(int cp_domain, int cp_inst); +struct _starpu_mpi_checkpoint_tracker* _starpu_mpi_checkpoint_template_create_instance_tracker(starpu_mpi_checkpoint_template_t cp_template, int cp_id, int cp_domain, int cp_inst); +struct _starpu_mpi_checkpoint_tracker* _starpu_mpi_checkpoint_tracker_update(starpu_mpi_checkpoint_template_t cp_template, int cp_id, int cp_domain, int cp_instance); +int _starpu_mpi_checkpoint_check_tracker(struct _starpu_mpi_checkpoint_tracker* tracker); +struct _starpu_mpi_checkpoint_tracker* _starpu_mpi_checkpoint_tracker_validate_instance(struct _starpu_mpi_checkpoint_tracker* tracker); +struct _starpu_mpi_checkpoint_tracker* _starpu_mpi_checkpoint_tracker_get_last_valid_tracker(int domain); + +#ifdef __cplusplus +} +#endif + +#endif //FT_STARPU_STARPU_MPI_CHECKPOINT_TRACKER_H diff --git a/mpi/src/mpi_failure_tolerance/starpu_mpi_ft.c b/mpi/src/mpi_failure_tolerance/starpu_mpi_ft.c new file mode 100644 index 0000000..bc91a99 --- /dev/null +++ b/mpi/src/mpi_failure_tolerance/starpu_mpi_ft.c @@ -0,0 +1,57 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include + +starpu_pthread_mutex_t ft_mutex; +int _my_rank; + +int starpu_mpi_checkpoint_init(void) +{ + STARPU_PTHREAD_MUTEX_INIT(&ft_mutex, NULL); + starpu_mpi_comm_rank(MPI_COMM_WORLD, &_my_rank); //TODO: check compatibility with several Comms behaviour + starpu_mpi_ft_service_lib_init(_ack_msg_recv_cb, _cp_discard_message_recv_cb); + checkpoint_template_lib_init(); + _starpu_mpi_checkpoint_tracker_init(); + checkpoint_package_init(); + _STARPU_MPI_FT_STATS_INIT(); + return 0; +} + +int starpu_mpi_checkpoint_shutdown(void) +{ + checkpoint_template_lib_quit(); + checkpoint_package_shutdown(); + _starpu_mpi_checkpoint_tracker_shutdown(); + STARPU_PTHREAD_MUTEX_DESTROY(&ft_mutex); + _STARPU_MPI_FT_STATS_WRITE_TO_FD(stderr); + _STARPU_MPI_FT_STATS_SHUTDOWN(); + return 0; +} + +void starpu_mpi_ft_progress(void) +{ + starpu_mpi_ft_service_progress(); +} + +int starpu_mpi_ft_busy() +{ + return starpu_mpi_ft_service_lib_busy(); +} diff --git a/mpi/src/mpi_failure_tolerance/starpu_mpi_ft.h b/mpi/src/mpi_failure_tolerance/starpu_mpi_ft.h new file mode 100644 index 0000000..37cec7d --- /dev/null +++ b/mpi/src/mpi_failure_tolerance/starpu_mpi_ft.h @@ -0,0 +1,32 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef _STARPU_MPI_FT_H +#define _STARPU_MPI_FT_H + +#ifdef __cplusplus +extern "C" +{ +#endif + +void starpu_mpi_ft_progress(void); +int starpu_mpi_ft_busy(); + +#ifdef __cplusplus +} +#endif + +#endif //_STARPU_MPI_FT_H diff --git a/mpi/src/mpi_failure_tolerance/starpu_mpi_ft_service_comms.c b/mpi/src/mpi_failure_tolerance/starpu_mpi_ft_service_comms.c new file mode 100644 index 0000000..48d8108 --- /dev/null +++ b/mpi/src/mpi_failure_tolerance/starpu_mpi_ft_service_comms.c @@ -0,0 +1,371 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include // Should be deduced at preprocessing (Nmad vs MPI) +#include +#include "starpu_mpi_cache.h" + +#define SIMULTANEOUS_ACK_MSG_RECV_MAX 2 +#define SIMULTANEOUS_CP_INFO_RECV_MAX 2 +#define SIMULTANEOUS_PENDING_SEND_MAX 40 + +static struct _starpu_mpi_req_list detached_ft_service_requests; +static struct _starpu_mpi_req_list ready_send_ft_service_requests; +static unsigned detached_send_n_ft_service_requests; +static starpu_pthread_mutex_t detached_ft_service_requests_mutex; +static starpu_pthread_mutex_t ft_service_requests_mutex; + +int ready_ack_msgs_recv; +int pending_ack_msgs_recv; +int ready_cp_info_msgs_recv; +int pending_cp_info_msgs_recv; +int ready_send_ft_service_msg; +int pending_send_ft_service_msg; + +typedef void (*cb_fn_type)(void*); +cb_fn_type ack_msg_recv_cb; +cb_fn_type cp_info_recv_cb; + +int _starpu_mpi_ft_service_submit_rdy() +{ + int i; + struct _starpu_mpi_req* req; + int max_loop; + + STARPU_PTHREAD_MUTEX_LOCK(&ft_service_requests_mutex); + max_loop = MIN(SIMULTANEOUS_ACK_MSG_RECV_MAX-pending_ack_msgs_recv, ready_ack_msgs_recv); + for (i=0 ; imsg)); + req->ptr = (void*)&arg->msg; + req->datatype = MPI_BYTE; + _STARPU_MALLOC(req->status, sizeof(MPI_Status)); + + STARPU_PTHREAD_MUTEX_LOCK(&detached_ft_service_requests_mutex); + MPI_Irecv(req->ptr, req->count, req->datatype, req->node_tag.node.rank, req->node_tag.data_tag, + req->node_tag.node.comm, &req->backend->data_request); + _STARPU_MPI_DEBUG(5, "Posting MPI_Irecv ft service msg: req %p tag %"PRIi64" src %d comm %ld ptr %p\n", req, req->node_tag.data_tag, req->node_tag.node.rank, (long int)req->node_tag.node.comm, req->ptr); + _starpu_mpi_req_list_push_back(&detached_ft_service_requests, req); + pending_ack_msgs_recv++; + ready_ack_msgs_recv--; + req->submitted = 1; + STARPU_PTHREAD_MUTEX_UNLOCK(&detached_ft_service_requests_mutex); + } + + max_loop = MIN(SIMULTANEOUS_CP_INFO_RECV_MAX-pending_cp_info_msgs_recv, ready_cp_info_msgs_recv); + for (i=0 ; imsg)); + req->ptr = (void*)&arg->msg; + req->datatype = MPI_BYTE; + _STARPU_MALLOC(req->status, sizeof(MPI_Status)); + + STARPU_PTHREAD_MUTEX_LOCK(&detached_ft_service_requests_mutex); + MPI_Irecv(req->ptr, req->count, req->datatype, req->node_tag.node.rank, req->node_tag.data_tag, + req->node_tag.node.comm, &req->backend->data_request); + _STARPU_MPI_DEBUG(5, "Posting MPI_Irecv ft service msg: req %p tag %"PRIi64" src %d comm %ld ptr %p\n", req, req->node_tag.data_tag, req->node_tag.node.rank, (long int)req->node_tag.node.comm, req->ptr); + _starpu_mpi_req_list_push_back(&detached_ft_service_requests, req); + pending_cp_info_msgs_recv++; + ready_cp_info_msgs_recv--; + req->submitted = 1; + STARPU_PTHREAD_MUTEX_UNLOCK(&detached_ft_service_requests_mutex); + } + + max_loop = MIN(SIMULTANEOUS_PENDING_SEND_MAX-pending_send_ft_service_msg, ready_send_ft_service_msg); + for (i=0 ; iptr, req->count, req->datatype, req->node_tag.node.rank, req->node_tag.data_tag, + req->node_tag.node.comm, &req->backend->data_request); + + _STARPU_MPI_DEBUG(5, "Posting MPI_Isend ft service msg: req %p tag %"PRIi64" src %d comm %ld ptr %p\n", req, req->node_tag.data_tag, req->node_tag.node.rank, (long int)req->node_tag.node.comm, req->ptr); + _starpu_mpi_req_list_push_back(&detached_ft_service_requests, req); + pending_send_ft_service_msg++; + ready_send_ft_service_msg--; + req->submitted = 1; + STARPU_PTHREAD_MUTEX_UNLOCK(&detached_ft_service_requests_mutex); + } + + STARPU_PTHREAD_MUTEX_UNLOCK(&ft_service_requests_mutex); +} + +int _starpu_mpi_ft_service_post_special_recv(int tag) +{ + _STARPU_MPI_DEBUG(5, "Pushing ft service msg: %s tag %"PRIi64" ANYSOURCE\n", _starpu_mpi_request_type(RECV_REQ), tag); + + if (tag==_STARPU_MPI_TAG_CP_ACK) + { + STARPU_PTHREAD_MUTEX_LOCK(&ft_service_requests_mutex); + ready_ack_msgs_recv++; + STARPU_PTHREAD_MUTEX_UNLOCK(&ft_service_requests_mutex); + } + else if (tag==_STARPU_MPI_TAG_CP_INFO) + { + STARPU_PTHREAD_MUTEX_LOCK(&ft_service_requests_mutex); + ready_cp_info_msgs_recv++; + STARPU_PTHREAD_MUTEX_UNLOCK(&ft_service_requests_mutex); + } + else + { + STARPU_ABORT_MSG("Only _STARPU_MPI_TAG_CP_ACK or _STARPU_MPI_TAG_CP_INFO are service msgs.\n"); + } + _starpu_mpi_wake_up_progress_thread(); + return 0; +} + +int _starpu_mpi_ft_service_post_send(void* msg, int count, int rank, int tag, MPI_Comm comm, void (*callback)(void *), void* arg) +{ + struct _starpu_mpi_req* req; + + /* Check if the tag is a service message */ + STARPU_ASSERT_MSG(tag==_STARPU_MPI_TAG_CP_ACK || tag == _STARPU_MPI_TAG_CP_INFO, "Only _STARPU_MPI_TAG_CP_ACK or _STARPU_MPI_TAG_CP_INFO are service msgs."); + + /* Initialize the request structure */ + req = _starpu_mpi_request_fill(NULL, rank, tag, comm, 1, 0, 0, callback, arg, SEND_REQ, NULL, 1, 0, count); +// TODO: Check compatibility with prio + req->ptr = msg; + req->datatype = MPI_BYTE; + _STARPU_MALLOC(req->status, sizeof(MPI_Status)); + + _STARPU_MPI_DEBUG(5, "Pushing ft service msg: %s req %p tag %"PRIi64" src %d ptr %p\n", _starpu_mpi_request_type(SEND_REQ), req, tag, rank, msg); + + STARPU_PTHREAD_MUTEX_LOCK(&ft_service_requests_mutex); + ready_send_ft_service_msg++; + _starpu_mpi_req_list_push_back(&ready_send_ft_service_requests, req); + STARPU_PTHREAD_MUTEX_UNLOCK(&ft_service_requests_mutex); + + _starpu_mpi_wake_up_progress_thread(); + + return 0; +} + +static void _starpu_mpi_handle_ft_request_termination(struct _starpu_mpi_req *req) +{ + _STARPU_MPI_LOG_IN(); + _STARPU_MPI_DEBUG(2, + "complete MPI request %p type %s tag %"PRIi64" src %d data %p ptr %p datatype '%s' count %d registered_datatype %d internal_req %p\n", + req, _starpu_mpi_request_type(req->request_type), req->node_tag.data_tag, req->node_tag.node.rank, + req->data_handle, req->ptr, + req->datatype_name, (int) req->count, req->registered_datatype, req->backend->internal_req); + + if (req->backend->internal_req) + { +// free(req->backend->early_data_handle); +// req->backend->early_data_handle = NULL; + } + else + { + if (req->request_type == RECV_REQ || req->request_type == SEND_REQ) + { + if (req->registered_datatype == 0) + { + if (req->request_type == SEND_REQ) + { + // We need to make sure the communication for sending the size + // has completed, as MPI can re-order messages, let's call + // MPI_Wait to make sure data have been sent + starpu_free_on_node_flags(STARPU_MAIN_RAM, (uintptr_t) req->ptr, req->count, 0); + req->ptr = NULL; + } + else if (req->request_type == RECV_REQ) + { + // req->ptr is freed by starpu_data_unpack + starpu_data_unpack(req->data_handle, req->ptr, req->count); + starpu_memory_deallocate(STARPU_MAIN_RAM, req->count); + } + } + else + { + //_starpu_mpi_datatype_free(req->data_handle, &req->datatype); + } + } + //_STARPU_MPI_TRACE_TERMINATED(req, req->node_tag.node.rank, req->node_tag.data_tag); + } + + _starpu_mpi_release_req_data(req); + + if (req->backend->envelope) + { + free(req->backend->envelope); + req->backend->envelope = NULL; + } + + /* Execute the specified callback, if any */ + if (req->callback) + { + if (req->request_type == RECV_REQ) + { + if (req->node_tag.data_tag == _STARPU_MPI_TAG_CP_ACK) + { + struct _starpu_mpi_cp_ack_arg_cb* tmp = (struct _starpu_mpi_cp_ack_arg_cb *) req->callback_arg; + tmp->rank = req->status->MPI_SOURCE; + } + else if (req->node_tag.data_tag == _STARPU_MPI_TAG_CP_INFO) + { + struct _starpu_mpi_cp_discard_arg_cb* tmp = (struct _starpu_mpi_cp_discard_arg_cb *) req->callback_arg; + tmp->rank = req->status->MPI_SOURCE; + } + } + req->callback(req->callback_arg); + } + /* tell anyone potentially waiting on the request that it is + * terminated now */ + STARPU_PTHREAD_MUTEX_LOCK(&req->backend->req_mutex); + req->completed = 1; + STARPU_PTHREAD_COND_BROADCAST(&req->backend->req_cond); + STARPU_PTHREAD_MUTEX_UNLOCK(&req->backend->req_mutex); + _STARPU_MPI_LOG_OUT(); +} + +void starpu_mpi_test_ft_detached_service_requests(void) +{ + //_STARPU_MPI_LOG_IN(); + int flag; + struct _starpu_mpi_req *req; + + STARPU_PTHREAD_MUTEX_LOCK(&detached_ft_service_requests_mutex); + + if (_starpu_mpi_req_list_empty(&detached_ft_service_requests)) + { + STARPU_PTHREAD_MUTEX_UNLOCK(&detached_ft_service_requests_mutex); + //_STARPU_MPI_LOG_OUT(); + return; + } + + //_STARPU_MPI_TRACE_TESTING_DETACHED_BEGIN(); + req = _starpu_mpi_req_list_begin(&detached_ft_service_requests); + while (req != _starpu_mpi_req_list_end(&detached_ft_service_requests)) + { + STARPU_PTHREAD_MUTEX_UNLOCK(&detached_ft_service_requests_mutex); + + //_STARPU_MPI_TRACE_TEST_BEGIN(req->node_tag.node.rank, req->node_tag.data_tag); + //_STARPU_MPI_DEBUG(3, "Test detached request %p - mpitag %"PRIi64" - TYPE %s %d\n", &req->backend->data_request, req->node_tag.data_tag, _starpu_mpi_request_type(req->request_type), req->node_tag.node.rank); +#ifdef STARPU_SIMGRID + req->ret = _starpu_mpi_simgrid_mpi_test(&req->done, &flag); +#else + STARPU_MPI_ASSERT_MSG(req->backend->data_request != MPI_REQUEST_NULL, "Cannot test completion of the request MPI_REQUEST_NULL"); + req->ret = MPI_Test(&req->backend->data_request, &flag, req->status); +#endif + + STARPU_MPI_ASSERT_MSG(req->ret == MPI_SUCCESS, "MPI_Test returning %s", _starpu_mpi_get_mpi_error_code(req->ret)); + //_STARPU_MPI_TRACE_TEST_END(req->node_tag.node.rank, req->node_tag.data_tag); + + if (!flag) + { + req = _starpu_mpi_req_list_next(req); + } + else + { + //_STARPU_MPI_TRACE_POLLING_END(); + struct _starpu_mpi_req *next_req; + next_req = _starpu_mpi_req_list_next(req); + + //_STARPU_MPI_TRACE_COMPLETE_BEGIN(req->request_type, req->node_tag.node.rank, req->node_tag.data_tag); + + STARPU_PTHREAD_MUTEX_LOCK(&detached_ft_service_requests_mutex); + STARPU_PTHREAD_MUTEX_LOCK(&ft_service_requests_mutex); + if (req->request_type == SEND_REQ) + pending_send_ft_service_msg--; + if (req->request_type == RECV_REQ) + { + if (req->node_tag.data_tag == _STARPU_MPI_TAG_CP_ACK) + pending_ack_msgs_recv--; + else if (req->node_tag.data_tag == _STARPU_MPI_TAG_CP_INFO) + pending_cp_info_msgs_recv--; + } + STARPU_PTHREAD_MUTEX_UNLOCK(&ft_service_requests_mutex); + _starpu_mpi_req_list_erase(&detached_ft_service_requests, req); + STARPU_PTHREAD_MUTEX_UNLOCK(&detached_ft_service_requests_mutex); + _starpu_mpi_handle_ft_request_termination(req); + + //_STARPU_MPI_TRACE_COMPLETE_END(req->request_type, req->node_tag.node.rank, req->node_tag.data_tag); + + STARPU_PTHREAD_MUTEX_LOCK(&req->backend->req_mutex); + /* We don't want to free internal non-detached + requests, we need to get their MPI request before + destroying them */ + if (req->backend->is_internal_req && !req->backend->to_destroy) + { + /* We have completed the request, let the application request destroy it */ + req->backend->to_destroy = 1; + STARPU_PTHREAD_MUTEX_UNLOCK(&req->backend->req_mutex); + } + else + { + STARPU_PTHREAD_MUTEX_UNLOCK(&req->backend->req_mutex); + _starpu_mpi_request_destroy(req); + } + + req = next_req; + //_STARPU_MPI_TRACE_POLLING_BEGIN(); + } + + STARPU_PTHREAD_MUTEX_LOCK(&detached_ft_service_requests_mutex); + } + //_STARPU_MPI_TRACE_TESTING_DETACHED_END(); + + STARPU_PTHREAD_MUTEX_UNLOCK(&detached_ft_service_requests_mutex); + //_STARPU_MPI_LOG_OUT(); +} + +int starpu_mpi_ft_service_progress() +{ + starpu_mpi_test_ft_detached_service_requests(); + _starpu_mpi_ft_service_submit_rdy(); + return 0; +} + +int starpu_mpi_ft_service_lib_init(void(*_ack_msg_recv_cb)(void*), void(*_cp_info_recv_cb)(void*)) +{ + _starpu_mpi_req_list_init(&detached_ft_service_requests); + _starpu_mpi_req_list_init(&ready_send_ft_service_requests); + STARPU_PTHREAD_MUTEX_INIT(&detached_ft_service_requests_mutex, NULL); + STARPU_PTHREAD_MUTEX_INIT(&ft_service_requests_mutex, NULL); + ready_ack_msgs_recv = 0; + pending_ack_msgs_recv = 0; + ready_cp_info_msgs_recv = 0; + pending_cp_info_msgs_recv = 0; + ready_send_ft_service_msg = 0; + pending_send_ft_service_msg = 0; + + ack_msg_recv_cb = _ack_msg_recv_cb; + cp_info_recv_cb = _cp_info_recv_cb; + + return 0; +} + +int starpu_mpi_ft_service_lib_busy() +{ + return !_starpu_mpi_req_list_empty(&detached_ft_service_requests); +} diff --git a/mpi/src/mpi_failure_tolerance/starpu_mpi_ft_service_comms.h b/mpi/src/mpi_failure_tolerance/starpu_mpi_ft_service_comms.h new file mode 100644 index 0000000..9524b4f --- /dev/null +++ b/mpi/src/mpi_failure_tolerance/starpu_mpi_ft_service_comms.h @@ -0,0 +1,37 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef FT_STARPU_STARPU_MPI_FT_SERVICE_COMMS_H +#define FT_STARPU_STARPU_MPI_FT_SERVICE_COMMS_H + +#ifdef __cplusplus +extern "C" +{ +#endif + +int _starpu_mpi_ft_service_post_special_recv(int tag); +int _starpu_mpi_ft_service_post_send(void* msg, int count, int rank, int tag, MPI_Comm comm, void (*callback)(void *), void* arg); + +void starpu_mpi_test_ft_detached_service_requests(void); +int starpu_mpi_ft_service_progress(); +int starpu_mpi_ft_service_lib_init(void(*_ack_msg_recv_cb)(void*), void(*cp_info_recv_cb)(void*)); +int starpu_mpi_ft_service_lib_busy(); + +#ifdef __cplusplus +} +#endif + +#endif //FT_STARPU_STARPU_MPI_FT_SERVICE_COMMS_H diff --git a/mpi/src/mpi_failure_tolerance/starpu_mpi_ft_stats.c b/mpi/src/mpi_failure_tolerance/starpu_mpi_ft_stats.c new file mode 100644 index 0000000..ad9ff75 --- /dev/null +++ b/mpi/src/mpi_failure_tolerance/starpu_mpi_ft_stats.c @@ -0,0 +1,41 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +starpu_pthread_mutex_t _ft_stats_mutex; + +int cp_data_msgs_sent_count; +size_t cp_data_msgs_sent_total_size; +int cp_data_msgs_received_count; +size_t cp_data_msgs_received_total_size; + +int cp_data_msgs_sent_cached_count; +size_t cp_data_msgs_sent_cached_total_size; +int cp_data_msgs_received_cached_count; +size_t cp_data_msgs_received_cached_total_size; +int cp_data_msgs_received_cp_cached_count; +size_t cp_data_msgs_received_cp_cached_total_size; + +int ft_service_msgs_sent_count; +size_t ft_service_msgs_sent_total_size; +int ft_service_msgs_received_count; +size_t ft_service_msgs_received_total_size; + +struct size_sample_list cp_data_in_memory_list; //over time +size_t cp_data_in_memory_size_max_at_t; +size_t cp_data_in_memory_size_total; + diff --git a/mpi/src/mpi_failure_tolerance/starpu_mpi_ft_stats.h b/mpi/src/mpi_failure_tolerance/starpu_mpi_ft_stats.h new file mode 100644 index 0000000..1231791 --- /dev/null +++ b/mpi/src/mpi_failure_tolerance/starpu_mpi_ft_stats.h @@ -0,0 +1,288 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef FT_STARPU_STARPU_MPI_FT_STATS_H +#define FT_STARPU_STARPU_MPI_FT_STATS_H + +#include +#include + +#ifdef STARPU_USE_MPI_FT + +#ifdef __cplusplus +extern "C" +{ +#endif + +extern starpu_pthread_mutex_t _ft_stats_mutex; + +extern int cp_data_msgs_sent_count; +extern size_t cp_data_msgs_sent_total_size; +extern int cp_data_msgs_received_count; +extern size_t cp_data_msgs_received_total_size; + +extern int cp_data_msgs_sent_cached_count; +extern size_t cp_data_msgs_sent_cached_total_size; +extern int cp_data_msgs_received_cached_count; +extern size_t cp_data_msgs_received_cached_total_size; +extern int cp_data_msgs_received_cp_cached_count; +extern size_t cp_data_msgs_received_cp_cached_total_size; + +extern int ft_service_msgs_sent_count; +extern size_t ft_service_msgs_sent_total_size; +extern int ft_service_msgs_received_count; +extern size_t ft_service_msgs_received_total_size; + +extern struct size_sample_list cp_data_in_memory_list; //over time +extern size_t cp_data_in_memory_size_total; +extern size_t cp_data_in_memory_size_max_at_t; + +static inline void stat_init(); +static inline void _starpu_ft_stats_shutdown(); +static inline void _starpu_ft_stats_write_to_fd(); +static inline void _starpu_ft_stats_send_data(size_t size); +static inline void _starpu_ft_stats_send_data_cached(size_t size);; +static inline void _starpu_ft_stats_recv_data(size_t size); +static inline void _starpu_ft_stats_recv_data_cached(size_t size); +static inline void _starpu_ft_stats_recv_data_cp_cached(size_t size); +static inline void _starpu_ft_stats_service_msg_send(size_t size); +static inline void _starpu_ft_stats_service_msg_recv(size_t size); +static inline void _starpu_ft_stats_add_cp_data_in_memory(size_t size); +static inline void _starpu_ft_stats_free_cp_data_in_memory(size_t size); + +#ifdef STARPU_USE_MPI_FT_STATS +#define _STARPU_MPI_FT_STATS_INIT() do{ stat_init(); }while(0) +#define _STARPU_MPI_FT_STATS_SHUTDOWN() do{ _starpu_ft_stats_shutdown(); }while(0) +#define _STARPU_MPI_FT_STATS_WRITE_TO_FD(fd) do{ _starpu_ft_stats_write_to_fd(fd); }while(0) +#define _STARPU_MPI_FT_STATS_SEND_CP_DATA(size) do{ _starpu_ft_stats_send_data(size); }while(0) +#define _STARPU_MPI_FT_STATS_CANCEL_SEND_CP_DATA(size) do{ _starpu_ft_stats_cancel_send_data(size); }while(0) +#define _STARPU_MPI_FT_STATS_SEND_CACHED_CP_DATA(size) do{ _starpu_ft_stats_send_data_cached(size); }while(0) +#define _STARPU_MPI_FT_STATS_RECV_CP_DATA(size) do{ _starpu_ft_stats_recv_data(size); }while(0) +#define _STARPU_MPI_FT_STATS_CANCEL_RECV_CP_DATA(size) do{ _starpu_ft_stats_cancel_recv_data(size); }while(0) +#define _STARPU_MPI_FT_STATS_RECV_CACHED_CP_DATA(size) do{ _starpu_ft_stats_recv_data_cached(size); }while(0) +#define _STARPU_MPI_FT_STATS_RECV_CP_CACHED_CP_DATA(size) do{ _starpu_ft_stats_recv_data_cp_cached(size); }while(0) +#define _STARPU_MPI_FT_STATS_SEND_FT_SERVICE_MSG(size) do{ _starpu_ft_stats_service_msg_send(size); }while(0) +#define _STARPU_MPI_FT_STATS_RECV_FT_SERVICE_MSG(size) do{ _starpu_ft_stats_service_msg_recv(size); }while(0) +#define _STARPU_MPI_FT_STATS_STORE_CP_DATA(size) do{ _starpu_ft_stats_add_cp_data_in_memory(size); }while(0) +#define _STARPU_MPI_FT_STATS_DISCARD_CP_DATA(size) do{ _starpu_ft_stats_free_cp_data_in_memory(size); }while(0) + +#else //_STARPU_MPI_FT_STATS +#define _STARPU_MPI_FT_STATS_INIT() do{}while(0) +#define _STARPU_MPI_FT_STATS_SHUTDOWN() do{}while(0) +#define _STARPU_MPI_FT_STATS_WRITE_TO_FD(fd) do{}while(0) +#define _STARPU_MPI_FT_STATS_SEND_CP_DATA(size) do{}while(0) +#define _STARPU_MPI_FT_STATS_CANCEL_SEND_CP_DATA(size) do{}while(0) +#define _STARPU_MPI_FT_STATS_SEND_CACHED_CP_DATA(size) do{}while(0) +#define _STARPU_MPI_FT_STATS_RECV_CP_DATA(size) do{}while(0) +#define _STARPU_MPI_FT_STATS_CANCEL_RECV_CP_DATA(size) do{}while(0) +#define _STARPU_MPI_FT_STATS_RECV_CACHED_CP_DATA(size) do{}while(0) +#define _STARPU_MPI_FT_STATS_RECV_CP_CACHED_CP_DATA(size) do{}while(0) +#define _STARPU_MPI_FT_STATS_SEND_FT_SERVICE_MSG(size) do{}while(0) +#define _STARPU_MPI_FT_STATS_RECV_FT_SERVICE_MSG(size) do{}while(0) +#define _STARPU_MPI_FT_STATS_STORE_CP_DATA(size) do{}while(0) +#define _STARPU_MPI_FT_STATS_DISCARD_CP_DATA(size) do{}while(0) + +#endif //_STARPU_MPI_FT_STATS + +LIST_TYPE(size_sample, \ + size_t size; +) + +static inline void stat_init() +{ + STARPU_PTHREAD_MUTEX_INIT(&_ft_stats_mutex, NULL); + size_sample_list_init(&cp_data_in_memory_list); + cp_data_msgs_sent_count = 0; + cp_data_msgs_sent_total_size = 0; + cp_data_msgs_received_count = 0; + cp_data_msgs_received_total_size = 0; + + cp_data_msgs_sent_cached_count = 0; + cp_data_msgs_sent_cached_total_size = 0; + cp_data_msgs_received_cached_count = 0; + cp_data_msgs_received_cached_total_size = 0; + cp_data_msgs_received_cp_cached_count = 0; + cp_data_msgs_received_cp_cached_total_size = 0; + + ft_service_msgs_sent_count = 0; + ft_service_msgs_sent_total_size = 0; + ft_service_msgs_received_count = 0; + ft_service_msgs_received_total_size = 0; + + cp_data_in_memory_size_total = 0; + cp_data_in_memory_size_max_at_t = 0; +} + +static inline void _starpu_ft_stats_send_data(size_t size) +{ + STARPU_ASSERT_MSG((int)size != -1, "Cannot count a data of size -1. An error has occurred.\n"); + STARPU_PTHREAD_MUTEX_LOCK(&_ft_stats_mutex); + cp_data_msgs_sent_count++; + cp_data_msgs_sent_total_size+=size; + STARPU_PTHREAD_MUTEX_UNLOCK(&_ft_stats_mutex); +} + +static inline void _starpu_ft_stats_cancel_send_data(size_t size) +{ + STARPU_ASSERT_MSG((int)size != -1, "Cannot count a data of size -1. An error has occurred.\n"); + STARPU_PTHREAD_MUTEX_LOCK(&_ft_stats_mutex); + cp_data_msgs_sent_count--; + cp_data_msgs_sent_total_size-=size; + STARPU_PTHREAD_MUTEX_UNLOCK(&_ft_stats_mutex); +} + +static inline void _starpu_ft_stats_send_data_cached(size_t size) +{ + STARPU_ASSERT_MSG((int)size != -1, "Cannot count a data of size -1. An error has occurred.\n"); + STARPU_PTHREAD_MUTEX_LOCK(&_ft_stats_mutex); + cp_data_msgs_sent_cached_count++; + cp_data_msgs_sent_cached_total_size+=size; + STARPU_PTHREAD_MUTEX_UNLOCK(&_ft_stats_mutex); +} + +static inline void _starpu_ft_stats_recv_data(size_t size) +{ + STARPU_ASSERT_MSG((int)size != -1, "Cannot count a data of size -1. An error has occurred.\n"); + STARPU_PTHREAD_MUTEX_LOCK(&_ft_stats_mutex); + cp_data_msgs_received_count++; + cp_data_msgs_received_total_size+=size; + STARPU_PTHREAD_MUTEX_UNLOCK(&_ft_stats_mutex); +} + +static inline void _starpu_ft_stats_cancel_recv_data(size_t size) +{ + STARPU_ASSERT_MSG((int)size != -1, "Cannot count a data of size -1. An error has occurred.\n"); + STARPU_PTHREAD_MUTEX_LOCK(&_ft_stats_mutex); + cp_data_msgs_received_count--; + cp_data_msgs_received_total_size-=size; + STARPU_PTHREAD_MUTEX_UNLOCK(&_ft_stats_mutex); +} + +static inline void _starpu_ft_stats_recv_data_cached(size_t size) +{ + STARPU_ASSERT_MSG((int)size != -1, "Cannot count a data of size -1. An error has occurred.\n"); + STARPU_PTHREAD_MUTEX_LOCK(&_ft_stats_mutex); + cp_data_msgs_received_cached_count++; + cp_data_msgs_received_cached_total_size+=size; + STARPU_PTHREAD_MUTEX_UNLOCK(&_ft_stats_mutex); +} + +static inline void _starpu_ft_stats_recv_data_cp_cached(size_t size) +{ + STARPU_ASSERT_MSG((int)size != -1, "Cannot count a data of size -1. An error has occurred.\n"); + STARPU_PTHREAD_MUTEX_LOCK(&_ft_stats_mutex); + cp_data_msgs_received_cp_cached_count++; + cp_data_msgs_received_cp_cached_total_size+=size; + STARPU_PTHREAD_MUTEX_UNLOCK(&_ft_stats_mutex); +} + +static inline void _starpu_ft_stats_service_msg_send(size_t size) +{ + STARPU_ASSERT_MSG((int)size != -1, "Cannot count a data of size -1. An error has occurred.\n"); + STARPU_PTHREAD_MUTEX_LOCK(&_ft_stats_mutex); + ft_service_msgs_sent_count++; + ft_service_msgs_sent_total_size+=size; + STARPU_PTHREAD_MUTEX_UNLOCK(&_ft_stats_mutex); +} + +static inline void _starpu_ft_stats_service_msg_recv(size_t size) +{ + STARPU_ASSERT_MSG((int)size != -1, "Cannot count a data of size -1. An error has occurred.\n"); + STARPU_PTHREAD_MUTEX_LOCK(&_ft_stats_mutex); + ft_service_msgs_received_count++; + ft_service_msgs_received_total_size+=size; + STARPU_PTHREAD_MUTEX_UNLOCK(&_ft_stats_mutex); +} + +static inline void _starpu_ft_stats_add_cp_data_in_memory(size_t size) +{ + size_t tmp; + struct size_sample *tmp_sample, *sample = malloc(sizeof(struct size_sample)); + STARPU_ASSERT_MSG((int)size != -1, "Cannot count a data of size -1. An error has occurred.\n"); + STARPU_PTHREAD_MUTEX_LOCK(&_ft_stats_mutex); + cp_data_in_memory_size_total+=size; + tmp_sample = size_sample_list_back(&cp_data_in_memory_list); + tmp = (NULL==tmp_sample?0:tmp_sample->size); + tmp+=size; + if (tmp>cp_data_in_memory_size_max_at_t) + { + cp_data_in_memory_size_max_at_t = tmp; + } + sample->size = tmp; + size_sample_list_push_back(&cp_data_in_memory_list, sample); + STARPU_PTHREAD_MUTEX_UNLOCK(&_ft_stats_mutex); +} + +static inline void _starpu_ft_stats_free_cp_data_in_memory(size_t size) +{ + size_t tmp; + struct size_sample* sample = malloc(sizeof(struct size_sample)); + STARPU_ASSERT_MSG((int)size != -1, "Cannot count a data of size -1. An error has occurred.\n"); + STARPU_PTHREAD_MUTEX_LOCK(&_ft_stats_mutex); + tmp = size_sample_list_back(&cp_data_in_memory_list)->size; + tmp-=size; + sample->size = tmp; + size_sample_list_push_back(&cp_data_in_memory_list, sample); + STARPU_PTHREAD_MUTEX_UNLOCK(&_ft_stats_mutex); +} + +static inline void _ft_stats_free_cp_data_in_memory_list() +{ + struct size_sample *next, *sample = size_sample_list_begin(&cp_data_in_memory_list); + while (sample != size_sample_list_end(&cp_data_in_memory_list)) + { + next = size_sample_list_next(sample); + size_sample_list_erase(&cp_data_in_memory_list, sample); + free(sample); + sample = next; + } +} + +static inline void _starpu_ft_stats_write_to_fd(FILE* fd) +{ + // HEADER + fprintf(fd, "TYPE\tCP_DATA_NORMAL_COUNT\tCP_DATA_NORMAL_TOTAL_SIZE\tCP_DATA_CACHED_COUNT\tCP_DATA_CACHED_SIZE\tFT_SERVICE_MSGS_COUNT\tFT_SERVICE_MSGS_TOTAL_SIZE\n"); + // DATA + fprintf(fd, "SEND\t%d\t" "%ld\t" "%d\t" "%ld\t" "%d\t" "%ld\n", + cp_data_msgs_sent_count, cp_data_msgs_sent_total_size, cp_data_msgs_sent_cached_count, cp_data_msgs_sent_cached_total_size, ft_service_msgs_sent_count, ft_service_msgs_sent_total_size); + fprintf(fd, "RECV\t%d\t" "%ld\t" "%d\t" "%ld\t" "%d\t" "%ld\n", + cp_data_msgs_received_count, cp_data_msgs_received_total_size, cp_data_msgs_received_cached_count, cp_data_msgs_received_cached_total_size+cp_data_msgs_received_cp_cached_total_size, ft_service_msgs_received_count, ft_service_msgs_received_total_size); + fprintf(fd, "\n"); + fprintf(fd, "IN_MEM_CP_DATA_TOTAL:%lu\n", cp_data_in_memory_size_total); + fprintf(fd, "\n"); + fprintf(fd, "IN_MEM_CP_DATA_MAX_AT_T:%lu\n", cp_data_in_memory_size_max_at_t); + fprintf(fd, "\n"); +// fprintf(fd, "IN_MEM_CP_DATA_TRACKING\n"); +// struct size_sample *sample = size_sample_list_begin(&cp_data_in_memory_list); +// while (sample != size_sample_list_end(&cp_data_in_memory_list)) +// { +// fprintf(fd, "%ld\n", sample->size); +// sample = size_sample_list_next(sample); +// } +// fprintf(fd, "\n"); +} + +static inline void _starpu_ft_stats_shutdown() +{ + _ft_stats_free_cp_data_in_memory_list(); +} + +#ifdef __cplusplus +} +#endif + +#endif // STARPU_USE_MPI_FT +#endif //FT_STARPU_STARPU_MPI_FT_STATS_H diff --git a/mpi/src/nmad/starpu_mpi_nmad.c b/mpi/src/nmad/starpu_mpi_nmad.c new file mode 100644 index 0000000..bc1b1ba --- /dev/null +++ b/mpi/src/nmad/starpu_mpi_nmad.c @@ -0,0 +1,857 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2017-2017 Guillaume Beauchamp + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#ifdef HAVE_UNISTD_H +#include +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef STARPU_USE_FXT +#include +#endif + +#ifdef STARPU_USE_MPI_NMAD +#include +#include +#include +#include + +#include "starpu_mpi_nmad_coop.h" +#include "starpu_mpi_nmad_backend.h" +#include "starpu_mpi_nmad_unknown_datatype.h" + +void _starpu_mpi_handle_request_termination(struct _starpu_mpi_req *req); +void _starpu_mpi_handle_pending_request(struct _starpu_mpi_req *req); +static inline void _starpu_mpi_request_end(struct _starpu_mpi_req* req, int post_callback_sem); +static inline void _starpu_mpi_request_try_end(struct _starpu_mpi_req* req, int post_callback_sem); + + +/* Condition to wake up waiting for all current MPI requests to finish */ +static starpu_pthread_t progress_thread; +static starpu_pthread_cond_t progress_cond; +static starpu_pthread_mutex_t progress_mutex; +static volatile int running = 0; + +static starpu_pthread_cond_t mpi_wait_for_all_running_cond; +static int mpi_wait_for_all_running = 0; +static starpu_pthread_mutex_t mpi_wait_for_all_running_mutex; + +/* Count running requests: this counter is incremented just before StarPU + * submits a MPI request, and decremented when a MPI request finishes. */ +static volatile int nb_pending_requests = 0; + +#define REQ_FINALIZED 0x1 + +PUK_LFSTACK_TYPE(callback, struct _starpu_mpi_req *req;); +static callback_lfstack_t callback_stack; + +static starpu_sem_t callback_sem; + +static int nmad_mcast_started = 0; + + +/********************************************************/ +/* */ +/* Send/Receive functionalities */ +/* */ +/********************************************************/ + +void _starpu_mpi_req_willpost(struct _starpu_mpi_req *req STARPU_ATTRIBUTE_UNUSED) +{ + int new_nb = STARPU_ATOMIC_ADD(&nb_pending_requests, 1); + (void)new_nb; +} + +/********************************************************/ +/* */ +/* Send functionalities */ +/* */ +/********************************************************/ + +static void _starpu_mpi_isend_known_datatype(struct _starpu_mpi_req *req) +{ + _STARPU_MPI_LOG_IN(); + + STARPU_ASSERT_MSG(req->registered_datatype == 1, "Datatype is not registered, it cannot be sent through this way !"); + + _STARPU_MPI_DEBUG(30, "post NM isend request %p type %s tag %ld src %d data %p datasize %ld ptr %p datatype '%s' count %d registered_datatype %d sync %d\n", req, _starpu_mpi_request_type(req->request_type), req->node_tag.data_tag, req->node_tag.node.rank, req->data_handle, starpu_data_get_size(req->data_handle), req->ptr, req->datatype_name, (int)req->count, req->registered_datatype, req->sync); + + _starpu_mpi_comm_amounts_inc(req->node_tag.node.comm, req->node, req->node_tag.node.rank, req->datatype, req->count); + + _STARPU_MPI_TRACE_ISEND_SUBMIT_BEGIN(req->node_tag.node.rank, req->node_tag.data_tag, 0); + + struct nm_data_s data; + nm_mpi_nmad_data_get(&data, (void*)req->ptr, req->datatype, req->count); + nm_sr_send_init(req->backend->session, &(req->backend->data_request)); + nm_sr_send_pack_data(req->backend->session, &(req->backend->data_request), &data); + nm_sr_send_set_priority(req->backend->session, &req->backend->data_request, req->prio); + + // this trace event is the start of the communication link: + _STARPU_MPI_TRACE_ISEND_SUBMIT_END(_STARPU_MPI_FUT_POINT_TO_POINT_SEND, req, req->prio); + + if (req->sync == 0) + { + req->ret = nm_sr_send_isend(req->backend->session, &(req->backend->data_request), req->backend->gate, req->node_tag.data_tag); + STARPU_ASSERT_MSG(req->ret == NM_ESUCCESS, "MPI_Isend returning %d", req->ret); + } + else + { + req->ret = nm_sr_send_issend(req->backend->session, &(req->backend->data_request), req->backend->gate, req->node_tag.data_tag); + STARPU_ASSERT_MSG(req->ret == NM_ESUCCESS, "MPI_Issend returning %d", req->ret); + } + + _starpu_mpi_handle_pending_request(req); + + _STARPU_MPI_LOG_OUT(); +} + +void _starpu_mpi_isend_func(struct _starpu_mpi_req *req) +{ + _STARPU_MPI_LOG_IN(); + + _starpu_mpi_datatype_allocate(req->data_handle, req); + + if (req->registered_datatype == 1) + { + req->count = 1; + req->ptr = starpu_data_handle_to_pointer(req->data_handle, req->node); + + _starpu_mpi_isend_known_datatype(req); + } + else + { + _starpu_mpi_isend_unknown_datatype(req); + } + + _STARPU_MPI_LOG_OUT(); +} + +/********************************************************/ +/* */ +/* Receive functionalities */ +/* */ +/********************************************************/ + +static void _starpu_mpi_irecv_known_datatype(struct _starpu_mpi_req *req) +{ + _STARPU_MPI_LOG_IN(); + + STARPU_ASSERT_MSG(req->registered_datatype == 1, "Datatype is not registered, it cannot be received through this way !"); + + _STARPU_MPI_DEBUG(20, "post NM irecv request %p type %s tag %ld src %d data %p ptr %p datatype '%s' count %d registered_datatype %d \n", req, _starpu_mpi_request_type(req->request_type), req->node_tag.data_tag, req->node_tag.node.rank, req->data_handle, req->ptr, req->datatype_name, (int)req->count, req->registered_datatype); + + _STARPU_MPI_TRACE_IRECV_SUBMIT_BEGIN(req->node_tag.node.rank, req->node_tag.data_tag); + + struct nm_data_s data; + nm_mpi_nmad_data_get(&data, (void*)req->ptr, req->datatype, req->count); + nm_sr_recv_init(req->backend->session, &(req->backend->data_request)); + nm_sr_recv_unpack_data(req->backend->session, &(req->backend->data_request), &data); + nm_sr_recv_irecv(req->backend->session, &(req->backend->data_request), req->backend->gate, req->node_tag.data_tag, NM_TAG_MASK_FULL); + + _STARPU_MPI_TRACE_IRECV_SUBMIT_END(req->node_tag.node.rank, req->node_tag.data_tag); + + _starpu_mpi_handle_pending_request(req); + + _STARPU_MPI_LOG_OUT(); +} + +void _starpu_mpi_irecv_func(struct _starpu_mpi_req *req) +{ + _STARPU_MPI_LOG_IN(); + + _starpu_mpi_datatype_allocate(req->data_handle, req); + if (req->registered_datatype == 1) + { + req->count = 1; + req->ptr = starpu_data_handle_to_pointer(req->data_handle, req->node); + _starpu_mpi_irecv_known_datatype(req); + } + else + { + _starpu_mpi_irecv_unknown_datatype(req); + } + + _STARPU_MPI_LOG_OUT(); +} + +/********************************************************/ +/* */ +/* Wait functionalities */ +/* */ +/********************************************************/ + +#define _starpu_mpi_req_status(PUBLIC_REQ,STATUS) do { \ + STATUS->MPI_SOURCE=PUBLIC_REQ->node_tag.node.rank; /**< field name mandatory by spec */ \ + STATUS->MPI_TAG=PUBLIC_REQ->node_tag.data_tag; /**< field name mandatory by spec */ \ + STATUS->MPI_ERROR=PUBLIC_REQ->ret; /**< field name mandatory by spec */ \ + STATUS->size=PUBLIC_REQ->count; /**< size of data received */ \ + STATUS->cancelled=0; /**< whether request was cancelled */ \ +} while(0) + +int _starpu_mpi_wait(starpu_mpi_req *public_req, MPI_Status *status) +{ + _STARPU_MPI_LOG_IN(); + STARPU_MPI_ASSERT_MSG(public_req, "starpu_mpi_wait needs a valid starpu_mpi_req"); + struct _starpu_mpi_req *req = *public_req; + STARPU_MPI_ASSERT_MSG(!req->detached, "MPI_Wait cannot be called on a detached request"); + + /* we must do a test_locked to avoid race condition : + * without req_cond could still be used and couldn't be freed)*/ + while (!req->completed || ! piom_cond_test_locked(&(req->backend->req_cond),REQ_FINALIZED)) + { + piom_cond_wait(&(req->backend->req_cond),REQ_FINALIZED); + } + + if (status!=MPI_STATUS_IGNORE) + _starpu_mpi_req_status(req,status); + + _starpu_mpi_request_try_end(req, 1); + *public_req = NULL; + + _STARPU_MPI_LOG_OUT(); + return MPI_SUCCESS; +} + +/********************************************************/ +/* */ +/* Test functionalities */ +/* */ +/********************************************************/ + +int _starpu_mpi_test(starpu_mpi_req *public_req, int *flag, MPI_Status *status) +{ + _STARPU_MPI_LOG_IN(); + STARPU_MPI_ASSERT_MSG(public_req, "starpu_mpi_test needs a valid starpu_mpi_req"); + struct _starpu_mpi_req *req = *public_req; + STARPU_MPI_ASSERT_MSG(!req->detached, "MPI_Test cannot be called on a detached request"); + _STARPU_MPI_DEBUG(2, "Test request %p type %s tag %ld src %d data %p ptr %p datatype '%s' count %d registered_datatype %d \n", + req, _starpu_mpi_request_type(req->request_type), req->node_tag.data_tag, req->node_tag.node.rank, req->data_handle, req->ptr, req->datatype_name, (int)req->count, req->registered_datatype); + + STARPU_VALGRIND_YIELD(); + + _STARPU_MPI_TRACE_UTESTING_BEGIN(req->node_tag.node.rank, req->node_tag.data_tag); + + /* we must do a test_locked to avoid race condition : + * without req_cond could still be used and couldn't be freed)*/ + *flag = req->completed && piom_cond_test_locked(&(req->backend->req_cond),REQ_FINALIZED); + if (*flag && status!=MPI_STATUS_IGNORE) + _starpu_mpi_req_status(req,status); + + _STARPU_MPI_TRACE_UTESTING_END(req->node_tag.node.rank, req->node_tag.data_tag); + + if(*flag) + { + _starpu_mpi_request_try_end(req, 1); + *public_req = NULL; + } + _STARPU_MPI_LOG_OUT(); + return MPI_SUCCESS; +} + +/********************************************************/ +/* */ +/* Barrier functionalities */ +/* */ +/********************************************************/ + +int _starpu_mpi_barrier(MPI_Comm comm) +{ + _STARPU_MPI_LOG_IN(); + + int ret = MPI_Barrier(comm); + STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Barrier returning %d", ret); + + _STARPU_MPI_LOG_OUT(); + return ret; +} + +int _starpu_mpi_wait_for_all(MPI_Comm comm) +{ + (void) comm; + _STARPU_MPI_LOG_IN(); + + STARPU_PTHREAD_MUTEX_LOCK(&mpi_wait_for_all_running_mutex); + STARPU_MPI_ASSERT_MSG(!mpi_wait_for_all_running, "Concurrent starpu_mpi_wait_for_all is not implemented, even on different communicators"); + mpi_wait_for_all_running = 1; + do + { + while (nb_pending_requests) + STARPU_PTHREAD_COND_WAIT(&mpi_wait_for_all_running_cond, &mpi_wait_for_all_running_mutex); + STARPU_PTHREAD_MUTEX_UNLOCK(&mpi_wait_for_all_running_mutex); + + starpu_task_wait_for_all(); + + STARPU_PTHREAD_MUTEX_LOCK(&mpi_wait_for_all_running_mutex); + } while (nb_pending_requests); + mpi_wait_for_all_running = 0; + STARPU_PTHREAD_MUTEX_UNLOCK(&mpi_wait_for_all_running_mutex); + + _STARPU_MPI_LOG_OUT(); + return 0; +} + +/********************************************************/ +/* */ +/* Progression */ +/* */ +/********************************************************/ + +/* Completely finalize a request: destroy it and decrement the number of pending requests */ +static inline void _starpu_mpi_request_end(struct _starpu_mpi_req* req, int post_callback_sem) +{ + /* Destroying a request and decrementing the number of pending requests + * should be done together, so let's wrap these two things in a + * function. This means instead of calling _starpu_mpi_request_destroy(), + * you should call this function. */ + + /* If request went through _starpu_mpi_handle_received_data(), finalized has to be true: */ + assert((req->backend->has_received_data && req->backend->finalized) || !req->backend->has_received_data); + + _starpu_mpi_request_destroy(req); + + int pending_remaining = STARPU_ATOMIC_ADD(&nb_pending_requests, -1); + assert(pending_remaining >= 0); + if (!pending_remaining) + { + STARPU_PTHREAD_COND_BROADCAST(&mpi_wait_for_all_running_cond); + if (post_callback_sem && !running) + { + starpu_sem_post(&callback_sem); + } + } +} + +/* Check if the caller has to completely finalize a request and try to do it */ +static inline void _starpu_mpi_request_try_end(struct _starpu_mpi_req* req, int post_callback_sem) +{ + _starpu_spin_lock(&req->backend->finalized_to_destroy_lock); + if (!req->backend->has_received_data || req->backend->finalized) + { + _starpu_spin_unlock(&req->backend->finalized_to_destroy_lock); + _starpu_mpi_request_end(req, post_callback_sem); + } + else + { + /* Request isn't finalized yet (NewMadeleine still needs it), since + * this function should have destroyed the request, tell + * _starpu_mpi_handle_request_termination() to destroy it when + * NewMadeleine won't need it anymore. */ + req->backend->to_destroy = 1; + _starpu_spin_unlock(&req->backend->finalized_to_destroy_lock); + } +} + +/* Do required actions when a request is completed (but maybe not finalized!) */ +static inline void _starpu_mpi_handle_post_actions(struct _starpu_mpi_req* req) +{ + if (req->callback) + { + /* Callbacks are executed outside of this function, later by the + * progression thread. + * Indeed, this current function is executed by a NewMadeleine handler, + * and possibly inside of a PIOman ltask. In such context, some locking + * or system calls can be forbidden to avoid any deadlock, thus + * callbacks are deported outside of this handler. */ + struct callback_lfstack_cell_s* c = padico_malloc(sizeof(struct callback_lfstack_cell_s)); + c->req = req; + callback_lfstack_push(&callback_stack, c); + + /* The main thread can exit without waiting + * the end of the detached request. Callback thread + * must then be kept alive if they have a callback.*/ + starpu_sem_post(&callback_sem); + } + else if(!req->detached) + { + /* tell anyone potentially waiting on the request that it is + * terminated now (should be done after the callback)*/ + req->completed = 1; + piom_cond_signal(&req->backend->req_cond, REQ_FINALIZED); + } +} + +/* Function called when data arrived, but NewMadeleine still holds a reference + * on it (to make progress a broadcast for instance). Application can thus read + * the data, but not yet write it. */ +void _starpu_mpi_handle_received_data(struct _starpu_mpi_req* req) +{ + _STARPU_MPI_LOG_IN(); + + assert(req->request_type == RECV_REQ); + assert(!_starpu_mpi_recv_wait_finalize); + assert(!req->backend->has_received_data); + assert(!req->backend->finalized); + + req->backend->has_received_data = 1; + + if (req->registered_datatype == 0) + { + /* Without peek_data, we can't unpack data for StarPU's use and keep + * the buffer alive for NewMadeleine, so calling + * _starpu_mpi_handle_received_data() makes no sense. */ + assert(starpu_data_get_interface_ops(req->data_handle)->peek_data); + starpu_data_peek_node(req->data_handle, req->node, req->ptr, req->count); + } + + // Release write acquire on the handle: can unlock tasks waiting to read the handle: + starpu_data_release_to(req->data_handle, STARPU_R); + + _starpu_mpi_handle_post_actions(req); + + _STARPU_MPI_LOG_OUT(); +} + +/* Function called when nmad completely finished a request */ +void _starpu_mpi_handle_request_termination(struct _starpu_mpi_req* req) +{ + _STARPU_MPI_LOG_IN(); + + _STARPU_MPI_DEBUG(2, "complete MPI request %p type %s tag %ld src %d data %p ptr %p datatype '%s' count %d registered_datatype %d \n", + req, _starpu_mpi_request_type(req->request_type), req->node_tag.data_tag, req->node_tag.node.rank, req->data_handle, req->ptr, req->datatype_name, (int)req->count, req->registered_datatype); + + assert(!req->backend->finalized); + + if (req->request_type == RECV_REQ || req->request_type == SEND_REQ) + { + if (req->registered_datatype == 0) + { + if (req->request_type == RECV_REQ) + { + if (starpu_data_get_interface_ops(req->data_handle)->peek_data) + { + if (!req->backend->has_received_data) + { + starpu_data_peek_node(req->data_handle, req->node, req->ptr, req->count); + } + starpu_free_on_node_flags(req->node, (uintptr_t) req->ptr, req->count, 0); + } + else + { + // req->ptr is freed by starpu_data_unpack + starpu_data_unpack_node(req->data_handle, req->node, req->ptr, req->count); + } + } + else + starpu_free_on_node_flags(req->node, (uintptr_t) req->ptr, req->count, 0); + } + else if (req->backend->posted) // with coop, only one request is really used to do the broadcast, so only posted request really allocates memory for the data: + { + nm_mpi_nmad_data_release(req->datatype); + _starpu_mpi_datatype_free(req->data_handle, &req->datatype); + } + } + + // for recv requests, this event is the end of the communication link: + _STARPU_MPI_TRACE_TERMINATED(req); + + _starpu_mpi_release_req_data(req); + + if (req->backend->has_received_data) + { + assert(req->request_type == RECV_REQ); + + /* Callback, test or wait were unlocked by + * _starpu_mpi_handle_received_data(), maybe they were already + * executed and since the request wasn't finalized yet, they didn't + * destroy the request, and we have to do it now: */ + _starpu_spin_lock(&req->backend->finalized_to_destroy_lock); + req->backend->finalized = 1; + if (req->backend->to_destroy || req->detached) + { + _starpu_spin_unlock(&req->backend->finalized_to_destroy_lock); + _starpu_mpi_request_end(req, 1); + } + else + { + _starpu_spin_unlock(&req->backend->finalized_to_destroy_lock); + } + } + else if (!req->callback && req->detached) + { + /* This request has no callback and is detached: we have to end it now: */ + _starpu_mpi_request_end(req, 1); + } + else + { + _starpu_mpi_handle_post_actions(req); + } + + _STARPU_MPI_LOG_OUT(); +} + +void _starpu_mpi_handle_request_termination_callback(nm_sr_event_t event STARPU_ATTRIBUTE_UNUSED, const nm_sr_event_info_t* event_info STARPU_ATTRIBUTE_UNUSED, void* ref) +{ + assert(ref != NULL); + + struct _starpu_mpi_req* req = (struct _starpu_mpi_req*) ref; + req->backend->posted = 1; // a network event was triggered for this request, so it was really posted + + if (event & NM_SR_EVENT_FINALIZED) + { + _starpu_mpi_handle_request_termination(req); + } + else if (event & NM_SR_EVENT_RECV_COMPLETED && req->request_type == RECV_REQ && !_starpu_mpi_recv_wait_finalize && req->sequential_consistency) + { + /* About required sequential consistency: + * "If it is 0, user can launch tasks writing in the handle, which will + * mix data manipulated by nmad and data manipulated by tasks, this + * could break some expected behaviours." (sthibault) */ + + /* Unknown datatype case is in starpu_mpi_nmad_unknown_datatype.c */ + assert(req->registered_datatype == 1); + + _starpu_mpi_handle_received_data(req); + } +} + +void _starpu_mpi_handle_pending_request(struct _starpu_mpi_req *req) +{ + assert(req != NULL); + nm_sr_request_set_ref(&req->backend->data_request, req); + int ret = nm_sr_request_monitor(req->backend->session, &req->backend->data_request, + NM_SR_EVENT_FINALIZED | NM_SR_EVENT_RECV_COMPLETED, + _starpu_mpi_handle_request_termination_callback); + assert(ret == NM_ESUCCESS); +} + +void _starpu_mpi_submit_ready_request(void *arg) +{ + _STARPU_MPI_LOG_IN(); + struct _starpu_mpi_req *req = arg; + STARPU_ASSERT_MSG(req, "Invalid request"); + + if (req->reserved_size) + { + /* The core will have really allocated the reception buffer now, release our reservation */ + starpu_memory_deallocate(req->node, req->reserved_size); + req->reserved_size = 0; + } + + /* submit the request to MPI directly from submitter */ + _STARPU_MPI_DEBUG(2, "Handling new request %p type %s tag %ld src %d data %p ptr %p datatype '%s' count %d registered_datatype %d \n", + req, _starpu_mpi_request_type(req->request_type), req->node_tag.data_tag, req->node_tag.node.rank, req->data_handle, req->ptr, req->datatype_name, (int)req->count, req->registered_datatype); + req->func(req); + + _STARPU_MPI_LOG_OUT(); +} + +static void *_starpu_mpi_progress_thread_func(void *arg) +{ + struct _starpu_mpi_argc_argv *argc_argv = (struct _starpu_mpi_argc_argv *) arg; + +#ifndef STARPU_SIMGRID + if (!_starpu_mpi_nobind && starpu_bind_thread_on(_starpu_mpi_thread_cpuid, 0, "MPI") < 0) + { + char hostname[65]; + gethostname(hostname, sizeof(hostname)); + _STARPU_DISP("[%s] No core was available for the MPI thread. You should use STARPU_RESERVE_NCPU to leave one core available for MPI, or specify one core less in STARPU_NCPU\n", hostname); + } +#endif + +#ifdef STARPU_SIMGRID + /* Now that MPI is set up, let the rest of simgrid get initialized */ + char **argv_cpy; + _STARPU_MPI_MALLOC(argv_cpy, *(argc_argv->argc) * sizeof(char*)); + int i; + for (i = 0; i < *(argc_argv->argc); i++) + argv_cpy[i] = strdup((*(argc_argv->argv))[i]); +#if defined(HAVE_SG_ACTOR_DATA) || defined(HAVE_SG_ACTOR_SET_DATA) + _starpu_simgrid_actor_create("main", smpi_simulated_main_, _starpu_simgrid_get_host_by_name("MAIN"), *(argc_argv->argc), argv_cpy); +#else + MSG_process_create_with_arguments("main", smpi_simulated_main_, NULL, _starpu_simgrid_get_host_by_name("MAIN"), *(argc_argv->argc), argv_cpy); + /* And set TSD for us */ + void **tsd; + _STARPU_CALLOC(tsd, MAX_TSD + 1, sizeof(void*)); + if (!smpi_process_set_user_data) + { + _STARPU_ERROR("Your version of simgrid does not provide smpi_process_set_user_data, we can not continue without it\n"); + } + smpi_process_set_user_data(tsd); +#endif +#endif + + _starpu_mpi_comm_amounts_init(argc_argv->comm); + _starpu_mpi_cache_init(argc_argv->comm); + _starpu_mpi_select_node_init(); + _starpu_mpi_datatype_init(); + _starpu_mpi_tags_init(); + +#ifdef STARPU_USE_FXT + _starpu_mpi_fxt_init(argc_argv); + +#ifdef HAVE_NM_TRACE_ADD_SYNCHRO_POINT + starpu_fxt_trace_user_meta_string("Clock_synchronize"); + nm_trace_add_synchro_point(); +#endif +#endif + + + if (_starpu_mpi_use_coop_sends) + { + if (argc_argv->world_size > 2) + { + _starpu_mpi_nmad_coop_init(); + nmad_mcast_started = 1; // to shutdown mcast + } + else + { + _starpu_mpi_use_coop_sends = 0; + } + } + + /* notify the main thread that the progression thread is ready */ + STARPU_PTHREAD_MUTEX_LOCK(&progress_mutex); + running = 1; + STARPU_PTHREAD_COND_SIGNAL(&progress_cond); + STARPU_PTHREAD_MUTEX_UNLOCK(&progress_mutex); + + while (1) + { + struct callback_lfstack_cell_s* c = callback_lfstack_pop(&callback_stack); + int err=0; + + if(running || nb_pending_requests>0) + { + /* shall we block ? */ + err = starpu_sem_wait(&callback_sem); + //running nb_pending_requests can change while waiting + } + if(c==NULL) + { + c = callback_lfstack_pop(&callback_stack); + if (c == NULL) + { + if(running && nb_pending_requests>0) + { + STARPU_ASSERT_MSG(c!=NULL, "Callback thread awakened without callback ready with error %d.",err); + } + else + { + if (nb_pending_requests==0) + break; + } + continue; + } + } + + c->req->callback(c->req->callback_arg); + if (c->req->detached) + { + _starpu_mpi_request_try_end(c->req, 0); + } + else + { + c->req->completed=1; + piom_cond_signal(&(c->req->backend->req_cond), REQ_FINALIZED); + } + + free(c); + } + + + /** Now, shutting down MPI **/ + + + STARPU_ASSERT_MSG(callback_lfstack_pop(&callback_stack)==NULL, "List of callback not empty."); + STARPU_ASSERT_MSG(nb_pending_requests==0, "Request still pending."); + + /* We cannot rely on _starpu_mpi_use_coop_sends to shutdown mcast: + * coops can be disabled with starpu_mpi_coop_sends_set_use() after + * initialization of mcast. */ + if (nmad_mcast_started) + { + _starpu_mpi_nmad_coop_shutdown(); + } + +#ifdef STARPU_USE_FXT + _starpu_mpi_fxt_shutdown(); +#endif + + if (argc_argv->initialize_mpi) + { + _STARPU_MPI_DEBUG(3, "Calling MPI_Finalize()\n"); + MPI_Finalize(); + } + + starpu_sem_destroy(&callback_sem); + free(argc_argv); + return NULL; +} + +/********************************************************/ +/* */ +/* (De)Initialization methods */ +/* */ +/********************************************************/ + +// #ifdef STARPU_MPI_ACTIVITY +// static int hookid = - 1; +// #endif /* STARPU_MPI_ACTIVITY */ + +int _starpu_mpi_progress_init(struct _starpu_mpi_argc_argv *argc_argv) +{ + STARPU_PTHREAD_MUTEX_INIT(&progress_mutex, NULL); + STARPU_PTHREAD_COND_INIT(&progress_cond, NULL); + + STARPU_PTHREAD_MUTEX_INIT(&mpi_wait_for_all_running_mutex, NULL); + STARPU_PTHREAD_COND_INIT(&mpi_wait_for_all_running_cond, NULL); + + starpu_sem_init(&callback_sem, 0, 0); + running = 0; + + _starpu_mpi_env_init(); + + /* This function calls MPI_Init_thread if needed, and it initializes internal NMAD/Pioman variables, + * required for piom_ltask_set_bound_thread_indexes() */ + _starpu_mpi_do_initialize(argc_argv); + + if (!_starpu_mpi_nobind && _starpu_mpi_thread_cpuid < 0) + { + _starpu_mpi_thread_cpuid = starpu_get_next_bindid(STARPU_THREAD_ACTIVE, NULL, 0); + } + + callback_lfstack_init(&callback_stack); + + /* Tell pioman to use a bound thread for communication progression: + * share the same core as StarPU's MPI thread, the MPI thread has very low activity with NMAD backend */ +#ifdef HAVE_PIOM_LTASK_SET_BOUND_THREAD_OS_INDEXES + /* We prefer to give the OS index of the core, because StarPU can have + * a different vision of the topology, especially if STARPU_WORKERS_GETBIND + * is enabled */ + int indexes[1] = { starpu_get_pu_os_index((unsigned) _starpu_mpi_thread_cpuid) }; + if (!_starpu_mpi_nobind) + piom_ltask_set_bound_thread_os_indexes(HWLOC_OBJ_PU, indexes, 1); +#else + int indexes[1] = { _starpu_mpi_thread_cpuid }; + if (!_starpu_mpi_nobind) + piom_ltask_set_bound_thread_indexes(HWLOC_OBJ_PU, indexes, 1); +#endif + + /* Register some hooks for communication progress if needed */ + int polling_point_prog, polling_point_idle; + char *s_prog_hooks = starpu_getenv("STARPU_MPI_NMAD_PROG_HOOKS"); + char *s_idle_hooks = starpu_getenv("STARPU_MPI_NMAD_IDLE_HOOKS"); + + if(!s_prog_hooks) + { + polling_point_prog = 0; + } + else + { + polling_point_prog = + (strcmp(s_prog_hooks, "FORCED") == 0) ? PIOM_POLL_POINT_FORCED : + (strcmp(s_prog_hooks, "SINGLE") == 0) ? PIOM_POLL_POINT_SINGLE : + (strcmp(s_prog_hooks, "HOOK") == 0) ? PIOM_POLL_POINT_HOOK : + 0; + } + + if(!s_idle_hooks) + { + polling_point_idle = 0; + } + else + { + polling_point_idle = + (strcmp(s_idle_hooks, "FORCED") == 0) ? PIOM_POLL_POINT_FORCED : + (strcmp(s_idle_hooks, "SINGLE") == 0) ? PIOM_POLL_POINT_SINGLE : + (strcmp(s_idle_hooks, "HOOK") == 0) ? PIOM_POLL_POINT_HOOK : + 0; + } + + if(polling_point_prog) + { + starpu_progression_hook_register((void *)&piom_ltask_schedule, (void *)&polling_point_prog); + } + + if(polling_point_idle) + { + starpu_idle_hook_register((void *)&piom_ltask_schedule, (void *)&polling_point_idle); + } + + /* Launch thread used for nmad callbacks */ + STARPU_PTHREAD_CREATE(&progress_thread, NULL, _starpu_mpi_progress_thread_func, argc_argv); + + STARPU_PTHREAD_MUTEX_LOCK(&progress_mutex); + while (!running) + STARPU_PTHREAD_COND_WAIT(&progress_cond, &progress_mutex); + STARPU_PTHREAD_MUTEX_UNLOCK(&progress_mutex); + + return 0; +} + +void _starpu_mpi_progress_shutdown(void **value) +{ + if (!running) + { + _STARPU_ERROR("The progress thread was not launched. Was StarPU successfully initialized?\n"); + } + + /* kill the progression thread */ + STARPU_PTHREAD_MUTEX_LOCK(&progress_mutex); + running = 0; + STARPU_PTHREAD_COND_BROADCAST(&progress_cond); + STARPU_PTHREAD_MUTEX_UNLOCK(&progress_mutex); + + starpu_sem_post(&callback_sem); + + STARPU_PTHREAD_JOIN(progress_thread, value); + + callback_lfstack_destroy(&callback_stack); + + STARPU_PTHREAD_MUTEX_DESTROY(&progress_mutex); + STARPU_PTHREAD_COND_DESTROY(&progress_cond); + + STARPU_PTHREAD_MUTEX_DESTROY(&mpi_wait_for_all_running_mutex); + STARPU_PTHREAD_COND_DESTROY(&mpi_wait_for_all_running_cond); +} + +static int64_t _starpu_mpi_tag_max = INT64_MAX; + +int starpu_mpi_comm_get_attr(MPI_Comm comm, int keyval, void *attribute_val, int *flag) +{ + (void) comm; + if (keyval == STARPU_MPI_TAG_UB) + { + if ((uint64_t) _starpu_mpi_tag_max > NM_TAG_MAX) + _starpu_mpi_tag_max = NM_TAG_MAX; + /* manage case where nmad max tag causes overflow if represented as starpu tag */ + *(int64_t **)attribute_val = &_starpu_mpi_tag_max; + *flag = 1; + } + else + { + *flag = 0; + } + return 0; +} + +#endif /* STARPU_USE_MPI_NMAD*/ diff --git a/mpi/src/nmad/starpu_mpi_nmad.h b/mpi/src/nmad/starpu_mpi_nmad.h new file mode 100644 index 0000000..9ca822a --- /dev/null +++ b/mpi/src/nmad/starpu_mpi_nmad.h @@ -0,0 +1,58 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2014-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __STARPU_MPI_NMAD_H__ +#define __STARPU_MPI_NMAD_H__ + +#include +#include +#include +#include +#include + +/** @file */ + +#ifdef STARPU_USE_MPI_NMAD + +#ifdef __cplusplus +extern "C" +{ +#endif + +int _starpu_mpi_progress_init(struct _starpu_mpi_argc_argv *argc_argv); +void _starpu_mpi_progress_shutdown(void **value); + +//#ifdef STARPU_SIMGRID +//void _starpu_mpi_wait_for_initialization(); +//#endif + +int _starpu_mpi_barrier(MPI_Comm comm); +int _starpu_mpi_wait_for_all(MPI_Comm comm); +int _starpu_mpi_wait(starpu_mpi_req *public_req, MPI_Status *status); +int _starpu_mpi_test(starpu_mpi_req *public_req, int *flag, MPI_Status *status); + +void _starpu_mpi_isend_func(struct _starpu_mpi_req *req); +void _starpu_mpi_irecv_func(struct _starpu_mpi_req *req); +void _starpu_mpi_handle_request_termination(struct _starpu_mpi_req *req); +void _starpu_mpi_handle_pending_request(struct _starpu_mpi_req *req); +void _starpu_mpi_handle_received_data(struct _starpu_mpi_req* req); + +#ifdef __cplusplus +} +#endif + +#endif /* STARPU_USE_MPI_NMAD */ +#endif /* __STARPU_MPI_NMAD_H__ */ diff --git a/mpi/src/nmad/starpu_mpi_nmad_backend.c b/mpi/src/nmad/starpu_mpi_nmad_backend.c new file mode 100644 index 0000000..2d7e77c --- /dev/null +++ b/mpi/src/nmad/starpu_mpi_nmad_backend.c @@ -0,0 +1,122 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "starpu_mpi_nmad_backend.h" +#include +#include "starpu_mpi_nmad.h" + +#ifdef STARPU_USE_MPI_NMAD + +#include + +static void starpu_mpi_nmad_backend_constructor(void) __attribute__((constructor)); +static void starpu_mpi_nmad_backend_constructor(void) +{ + /* strat_prio is preferred for StarPU instead of default strat_aggreg */ + setenv("NMAD_STRATEGY", "prio", 0 /* do not overwrite user-supplied value, if set */); + /* prefer rcache on ibverbs */ + setenv("NMAD_IBVERBS_RCACHE", "1", 0); + /* use pioman dedicated thread */ + setenv("PIOM_DEDICATED", "1", 0); + /* pioman waits for starpu to place its dedicated thread */ + setenv("PIOM_DEDICATED_WAIT", "1", 0); +} + +void _starpu_mpi_nmad_backend_init(struct starpu_conf *conf) +{ + (void)conf; + nm_abi_config_check(); +} + +void _starpu_mpi_nmad_backend_shutdown(void) +{ +} + +int _starpu_mpi_nmad_backend_reserve_core(void) +{ + return 1; +} + +void _starpu_mpi_nmad_backend_request_init(struct _starpu_mpi_req *req) +{ + _STARPU_MPI_CALLOC(req->backend, 1, sizeof(struct _starpu_mpi_req_backend)); + piom_cond_init(&req->backend->req_cond, 0); + req->backend->data_request = NM_SR_REQUEST_NULL; + req->backend->posted = 0; + req->backend->has_received_data = 0; + req->backend->finalized = 0; + req->backend->to_destroy = 0; + _starpu_spin_init(&req->backend->finalized_to_destroy_lock); +} + +void _starpu_mpi_nmad_backend_request_fill(struct _starpu_mpi_req *req, int is_internal_req STARPU_ATTRIBUTE_UNUSED) +{ + /* this function gives session and gate: */ + nm_mpi_nmad_dest(&req->backend->session, &req->backend->gate, req->node_tag.node.comm, req->node_tag.node.rank); +} + +void _starpu_mpi_nmad_backend_request_destroy(struct _starpu_mpi_req *req) +{ + piom_cond_destroy(&(req->backend->req_cond)); + _starpu_spin_destroy(&req->backend->finalized_to_destroy_lock); + free(req->backend); +} + +void _starpu_mpi_nmad_backend_data_clear(starpu_data_handle_t data_handle) +{ + (void)data_handle; +} + +void _starpu_mpi_nmad_backend_data_register(starpu_data_handle_t data_handle, starpu_mpi_tag_t data_tag) +{ + (void)data_handle; + (void)data_tag; +} + +void _starpu_mpi_nmad_backend_comm_register(MPI_Comm comm) +{ + (void)comm; +} + +struct _starpu_mpi_backend _mpi_backend = +{ + ._starpu_mpi_backend_init = _starpu_mpi_nmad_backend_init, + ._starpu_mpi_backend_shutdown = _starpu_mpi_nmad_backend_shutdown, + ._starpu_mpi_backend_reserve_core = _starpu_mpi_nmad_backend_reserve_core, + ._starpu_mpi_backend_request_init = _starpu_mpi_nmad_backend_request_init, + ._starpu_mpi_backend_request_fill = _starpu_mpi_nmad_backend_request_fill, + ._starpu_mpi_backend_request_destroy = _starpu_mpi_nmad_backend_request_destroy, + ._starpu_mpi_backend_data_clear = _starpu_mpi_nmad_backend_data_clear, + ._starpu_mpi_backend_data_register = _starpu_mpi_nmad_backend_data_register, + ._starpu_mpi_backend_comm_register = _starpu_mpi_nmad_backend_comm_register, + + ._starpu_mpi_backend_progress_init = _starpu_mpi_progress_init, + ._starpu_mpi_backend_progress_shutdown = _starpu_mpi_progress_shutdown, +//#ifdef STARPU_SIMGRID +// ._starpu_mpi_backend_wait_for_initialization = _starpu_mpi_wait_for_initialization, +//#endif + + ._starpu_mpi_backend_barrier = _starpu_mpi_barrier, + ._starpu_mpi_backend_wait_for_all = _starpu_mpi_wait_for_all, + ._starpu_mpi_backend_wait = _starpu_mpi_wait, + ._starpu_mpi_backend_test = _starpu_mpi_test, + + ._starpu_mpi_backend_isend_size_func = _starpu_mpi_isend_func, + ._starpu_mpi_backend_irecv_size_func = _starpu_mpi_irecv_func, +}; + +#endif /* STARPU_USE_MPI_NMAD*/ diff --git a/mpi/src/nmad/starpu_mpi_nmad_backend.h b/mpi/src/nmad/starpu_mpi_nmad_backend.h new file mode 100644 index 0000000..e3ac27d --- /dev/null +++ b/mpi/src/nmad/starpu_mpi_nmad_backend.h @@ -0,0 +1,60 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __STARPU_MPI_NMAD_BACKEND_H__ +#define __STARPU_MPI_NMAD_BACKEND_H__ + +#include +#include + +/** @file */ + +#ifdef __cplusplus +extern "C" +{ +#endif + +#ifdef STARPU_USE_MPI_NMAD + +#include +#include +#include + +struct _starpu_mpi_req_backend +{ + nm_gate_t gate; + nm_session_t session; + nm_sr_request_t data_request; + piom_cond_t req_cond; + + int posted; // with coop, only one request is really posted, we need to know if the request was really posted to possibly free data + int has_received_data; // tell if request went through _starpu_mpi_handle_received_data() to release write lock + int finalized; // tell if _starpu_mpi_handle_request_termination() was called, so starpu_mpi_test() and starpu_mpi_wait() have to free the request + int to_destroy; // tell if starpu_mpi_wait() or starpu_mpi_test() was called before _starpu_mpi_handle_request_termination() and thus this last function will have to free the request + struct _starpu_spinlock finalized_to_destroy_lock; + + /** When datatype is unknown */ + struct nm_data_s unknown_datatype_data; // will contain size of the datatype and data itself + struct iovec unknown_datatype_v[2]; +}; + +#endif // STARPU_USE_MPI_NMAD + +#ifdef __cplusplus +} +#endif + +#endif // __STARPU_MPI_NMAD_BACKEND_H__ diff --git a/mpi/src/nmad/starpu_mpi_nmad_coop.c b/mpi/src/nmad/starpu_mpi_nmad_coop.c new file mode 100644 index 0000000..b4722d3 --- /dev/null +++ b/mpi/src/nmad/starpu_mpi_nmad_coop.c @@ -0,0 +1,177 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +#include + +#ifdef STARPU_USE_MPI_NMAD +#include +#include +#include +#include +#include +#include +#include "starpu_mpi_nmad_coop.h" +#include "starpu_mpi_nmad_backend.h" +#include "starpu_mpi_nmad_unknown_datatype.h" + +extern void _starpu_mpi_handle_request_termination(struct _starpu_mpi_req *req); + +struct mcast_send +{ + nm_mcast_t mcast; + int* dests; + int* prios; + struct _starpu_mpi_req* req; + struct nm_data_s data; +}; + +static nm_mcast_service_t mcast_service; + +void _starpu_mpi_nmad_coop_init(void) +{ + mcast_service = nm_mcast_init(nm_mpi_comm(MPI_COMM_WORLD)); +} + +void _starpu_mpi_nmad_end_coop_callback(void* arg) +{ + /* Callback called by the root node of the broadcast, when its job is done; + * not by receivers. */ + struct mcast_send* mcast = (struct mcast_send*) arg; + + mcast->req->backend->posted = 1; + + _starpu_mpi_handle_request_termination(mcast->req); + + nm_mcast_send_destroy(&mcast->mcast); + free(mcast->dests); + if (_starpu_mpi_use_prio) + { + free(mcast->prios); + } + free(mcast); +} + +void _starpu_mpi_submit_coop_sends(struct _starpu_mpi_coop_sends *coop_sends, int submit_control STARPU_ATTRIBUTE_UNUSED, int submit_data) +{ + if (!submit_data) + { + return; + } + + _STARPU_MPI_LOG_IN(); + + unsigned n = coop_sends->n; + assert(n >= 2); + +#if 0 // sure, a tree does not worth it for only two recipients, but if the user wants a broadcast with a chain routing, he really wants only one request to be sent from this node. + if (n == 2) // a broadcast tree does not worth it for only two recipients + { + assert(coop_sends->reqs_array[0]->request_type == SEND_REQ); + _starpu_mpi_submit_ready_request(coop_sends->reqs_array[0]); + + assert(coop_sends->reqs_array[1]->request_type == SEND_REQ); + _starpu_mpi_submit_ready_request(coop_sends->reqs_array[1]); + } + else +#endif + { + starpu_fxt_trace_user_event_string("collective send"); + + unsigned i = 0; + struct _starpu_mpi_req *starpu_req; + + struct mcast_send* mcast = malloc(sizeof(struct mcast_send)); + mcast->dests = malloc(n * sizeof(int)); + if (_starpu_mpi_use_prio) + { + mcast->prios = malloc(n * sizeof(int)); + } + else + { + mcast->prios = NULL; + } + + /* We don't increase the amount of communicated data, because we don't + * know which tree type will be executed to do the broadcast, so we + * don't know how many data will actually be sent from this node. */ + _starpu_mpi_nb_coop_inc(n); + + for (i = 0; i < n; i++) + { + starpu_req = coop_sends->reqs_array[i]; + + assert(starpu_req->request_type == SEND_REQ); + assert(starpu_req->coop_sends_head != NULL); + mcast->dests[i] = starpu_req->node_tag.node.rank; + if (_starpu_mpi_use_prio) + { + mcast->prios[i] = starpu_req->prio; + } + + // this trace event is the start of the communication link: + _STARPU_MPI_TRACE_ISEND_SUBMIT_END(_STARPU_MPI_FUT_COLLECTIVE_SEND, starpu_req, starpu_req->prio); + + // Keep the first request to do the mcast, but consider other as finished: + if (i > 0) + { + _starpu_mpi_handle_request_termination(starpu_req); + } + } + + starpu_req = coop_sends->reqs_array[0]; + + _starpu_mpi_datatype_allocate(starpu_req->data_handle, starpu_req); + + nm_len_t header_len = 0; + + if (starpu_req->registered_datatype == 1) + { + starpu_req->count = 1; + starpu_req->ptr = starpu_data_handle_to_pointer(starpu_req->data_handle, STARPU_MAIN_RAM); + nm_mpi_nmad_data_get(&mcast->data, (void*)starpu_req->ptr, starpu_req->datatype, starpu_req->count); + } + else + { + _starpu_mpi_isend_prepare_unknown_datatype(starpu_req, &mcast->data); + header_len = sizeof(starpu_ssize_t); // we send the size of the data as a header + } + + mcast->req = starpu_req; + + nm_comm_t comm = nm_comm_get_by_session(starpu_req->backend->session); + assert(comm != NULL); + + nm_mcast_send_init(mcast_service, &mcast->mcast); + nm_mcast_send_set_notifier(&mcast->mcast, _starpu_mpi_nmad_end_coop_callback, mcast); + nm_mcast_isend(&mcast->mcast, comm, mcast->dests, mcast->prios, n, starpu_req->node_tag.data_tag, &mcast->data, header_len, NM_COLL_TREE_DEFAULT); + } + + _STARPU_MPI_LOG_OUT(); +} + +void _starpu_mpi_nmad_coop_shutdown(void) +{ + nm_mcast_finalize(mcast_service); +} + +void _starpu_mpi_coop_sends_build_tree(struct _starpu_mpi_coop_sends *coop_sends STARPU_ATTRIBUTE_UNUSED) +{ + /* The NMAD implementation doesn't use this function. */ +} + +#endif /* STARPU_USE_MPI_NMAD*/ diff --git a/mpi/src/nmad/starpu_mpi_nmad_coop.h b/mpi/src/nmad/starpu_mpi_nmad_coop.h new file mode 100644 index 0000000..ac6a581 --- /dev/null +++ b/mpi/src/nmad/starpu_mpi_nmad_coop.h @@ -0,0 +1,41 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2014-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __STARPU_MPI_NMAD_COOP_H__ +#define __STARPU_MPI_NMAD_COOP_H__ + +#include + +#ifdef STARPU_USE_MPI_NMAD + +#ifdef __cplusplus +extern "C" +{ +#endif + +#include +#include + +void _starpu_mpi_nmad_coop_init(void); +void _starpu_mpi_nmad_coop_shutdown(void); +void _starpu_mpi_nmad_end_coop_callback(void* arg); + +#ifdef __cplusplus +} +#endif + +#endif // STARPU_USE_MPI_NMAD +#endif // __STARPU_MPI_NMAD_COOP_H__ diff --git a/mpi/src/nmad/starpu_mpi_nmad_unknown_datatype.c b/mpi/src/nmad/starpu_mpi_nmad_unknown_datatype.c new file mode 100644 index 0000000..f78dc53 --- /dev/null +++ b/mpi/src/nmad/starpu_mpi_nmad_unknown_datatype.c @@ -0,0 +1,156 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + + +#include + +#ifdef STARPU_USE_MPI_NMAD +#include +#include +#include +#include +#include +#include "starpu_mpi_nmad.h" +#include "starpu_mpi_nmad_backend.h" +#include "starpu_mpi_nmad_unknown_datatype.h" + + + +/********************************************** +* Send +**********************************************/ +void _starpu_mpi_isend_prepare_unknown_datatype(struct _starpu_mpi_req* req, struct nm_data_s* data) +{ + STARPU_ASSERT_MSG(req->registered_datatype != 1, "Datatype is registered, no need to send it through this way !"); + + starpu_data_pack_node(req->data_handle, req->node, &req->ptr, &req->count); + + req->backend->unknown_datatype_v[0].iov_base = &req->count; + req->backend->unknown_datatype_v[0].iov_len = sizeof(starpu_ssize_t); + req->backend->unknown_datatype_v[1].iov_base = req->ptr; + req->backend->unknown_datatype_v[1].iov_len = req->count; + nm_data_iov_build(data, req->backend->unknown_datatype_v, 2); +} + +void _starpu_mpi_isend_unknown_datatype(struct _starpu_mpi_req *req) +{ + _STARPU_MPI_LOG_IN(); + + _STARPU_MPI_DEBUG(30, "post NM isend (unknown datatype) request %p type %s tag %ld src %d data %p datasize %ld ptr %p datatype '%s' count %d registered_datatype %d sync %d\n", req, _starpu_mpi_request_type(req->request_type), req->node_tag.data_tag, req->node_tag.node.rank, req->data_handle, starpu_data_get_size(req->data_handle), req->ptr, req->datatype_name, (int)req->count, req->registered_datatype, req->sync); + + _starpu_mpi_comm_amounts_inc(req->node_tag.node.comm, req->node, req->node_tag.node.rank, req->datatype, req->count); + + _STARPU_MPI_TRACE_ISEND_SUBMIT_BEGIN(req->node_tag.node.rank, req->node_tag.data_tag, 0); + + _starpu_mpi_isend_prepare_unknown_datatype(req, &req->backend->unknown_datatype_data); + + nm_sr_send_init(req->backend->session, &req->backend->data_request); + nm_sr_send_pack_data(req->backend->session, &req->backend->data_request, &req->backend->unknown_datatype_data); + nm_sr_send_set_priority(req->backend->session, &req->backend->data_request, req->prio); + nm_sr_send_header(req->backend->session, &req->backend->data_request, sizeof(starpu_ssize_t)); + + // this trace event is the start of the communication link: + _STARPU_MPI_TRACE_ISEND_SUBMIT_END(_STARPU_MPI_FUT_POINT_TO_POINT_SEND, req, req->prio); + + if (req->sync == 0) + { + req->ret = nm_sr_send_isend(req->backend->session, &req->backend->data_request, req->backend->gate, req->node_tag.data_tag); + STARPU_ASSERT_MSG(req->ret == NM_ESUCCESS, "nm_sr_send_isend returning %d", req->ret); + } + else + { + req->ret = nm_sr_send_issend(req->backend->session, &req->backend->data_request, req->backend->gate, req->node_tag.data_tag); + STARPU_ASSERT_MSG(req->ret == NM_ESUCCESS, "nm_sr_send_issend returning %d", req->ret); + } + + _starpu_mpi_handle_pending_request(req); + + _STARPU_MPI_LOG_OUT(); +} + + +/********************************************** + * Receive + **********************************************/ + +static void _starpu_mpi_unknown_datatype_recv_callback(nm_sr_event_t event, const nm_sr_event_info_t* p_info STARPU_ATTRIBUTE_UNUSED, void* ref) +{ + STARPU_ASSERT_MSG(!((event & NM_SR_EVENT_FINALIZED) && (event & NM_SR_EVENT_RECV_DATA)), "Both events can't be triggered at the same time !"); + + struct _starpu_mpi_req* req = (struct _starpu_mpi_req*) ref; + assert(req->request_type == RECV_REQ); + assert(req->registered_datatype != 1); + + req->backend->posted = 1; // a network event was triggered for this request, so it was really posted + + if (event & NM_SR_EVENT_RECV_DATA) + { + // Header arrived, so get the size of the datatype and store it in req->count: + struct nm_data_s data_header; + nm_data_contiguous_build(&data_header, &req->count, sizeof(starpu_ssize_t)); + nm_sr_recv_peek(req->backend->session, &req->backend->data_request, &data_header); + + // Now we know the size, allocate the buffer: + req->ptr = (void *)starpu_malloc_on_node_flags(req->node, req->count, 0); + STARPU_ASSERT_MSG(req->ptr, "cannot allocate message of size %ld", req->count); + + /* Last step: give this buffer to NewMadeleine to receive data + * We need to use an iov to easily take into account the offset used + * during the peek. */ + req->backend->unknown_datatype_v[0].iov_base = &req->count; + req->backend->unknown_datatype_v[0].iov_len = sizeof(starpu_ssize_t); + req->backend->unknown_datatype_v[1].iov_base = req->ptr; + req->backend->unknown_datatype_v[1].iov_len = req->count; + nm_data_iov_build(&req->backend->unknown_datatype_data, req->backend->unknown_datatype_v, 2); + nm_sr_recv_offset(req->backend->session, &req->backend->data_request, sizeof(starpu_ssize_t)); + nm_sr_recv_unpack_data(req->backend->session, &req->backend->data_request, &req->backend->unknown_datatype_data); + } + else if (event & NM_SR_EVENT_FINALIZED) + { + _starpu_mpi_handle_request_termination(req); + } + else if (event & NM_SR_EVENT_RECV_COMPLETED && !_starpu_mpi_recv_wait_finalize && req->sequential_consistency && starpu_data_get_interface_ops(req->data_handle)->peek_data) + { + _starpu_mpi_handle_received_data(req); + } +} + +void _starpu_mpi_irecv_unknown_datatype(struct _starpu_mpi_req *req) +{ + _STARPU_MPI_LOG_IN(); + + STARPU_ASSERT_MSG(req->registered_datatype != 1, "Datatype is registered, no need to receive it through this way !"); + + _STARPU_MPI_DEBUG(20, "post NM irecv (datatype unknown) request %p type %s tag %ld src %d data %p ptr %p datatype '%s' count %d registered_datatype %d \n", req, _starpu_mpi_request_type(req->request_type), req->node_tag.data_tag, req->node_tag.node.rank, req->data_handle, req->ptr, req->datatype_name, (int)req->count, req->registered_datatype); + + _STARPU_MPI_TRACE_IRECV_SUBMIT_BEGIN(req->node_tag.node.rank, req->node_tag.data_tag); + + /* we post a recv without giving a buffer because we don't know the required size of this buffer, + * the buffer will be allocated and provided to nmad when the header of data will be received, + * in _starpu_mpi_unknown_datatype_recv_callback() */ + nm_sr_recv_init(req->backend->session, &req->backend->data_request); + nm_sr_request_set_ref(&req->backend->data_request, req); + nm_sr_request_monitor(req->backend->session, &req->backend->data_request, + NM_SR_EVENT_FINALIZED | NM_SR_EVENT_RECV_DATA | NM_SR_EVENT_RECV_COMPLETED, + &_starpu_mpi_unknown_datatype_recv_callback); + nm_sr_recv_irecv(req->backend->session, &req->backend->data_request, req->backend->gate, req->node_tag.data_tag, NM_TAG_MASK_FULL); + + _STARPU_MPI_TRACE_IRECV_SUBMIT_END(req->node_tag.node.rank, req->node_tag.data_tag); + + _STARPU_MPI_LOG_OUT(); +} + +#endif // STARPU_USE_MPI_NMAD diff --git a/mpi/src/nmad/starpu_mpi_nmad_unknown_datatype.h b/mpi/src/nmad/starpu_mpi_nmad_unknown_datatype.h new file mode 100644 index 0000000..9c0d395 --- /dev/null +++ b/mpi/src/nmad/starpu_mpi_nmad_unknown_datatype.h @@ -0,0 +1,46 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2014-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __STARPU_MPI_NMAD_UNKNOWN_DATATYPE_H__ +#define __STARPU_MPI_NMAD_UNKNOWN_DATATYPE_H__ + +#include + +/** @file */ + +#ifdef __cplusplus +extern "C" +{ +#endif + +#ifdef STARPU_USE_MPI_NMAD + +#include +#include + + +void _starpu_mpi_isend_prepare_unknown_datatype(struct _starpu_mpi_req* req, struct nm_data_s* data); +void _starpu_mpi_isend_unknown_datatype(struct _starpu_mpi_req *req); +void _starpu_mpi_irecv_unknown_datatype(struct _starpu_mpi_req *req); + + +#endif // STARPU_USE_MPI_NMAD + +#ifdef __cplusplus +} +#endif + +#endif // __STARPU_MPI_NMAD_UNKNOWN_DATATYPE_H__ diff --git a/mpi/src/starpu_mpi.c b/mpi/src/starpu_mpi.c new file mode 100644 index 0000000..e604a87 --- /dev/null +++ b/mpi/src/starpu_mpi.c @@ -0,0 +1,742 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2019,2021 Federal University of Rio Grande do Sul (UFRGS) + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +int _starpu_mpi_choose_node(starpu_data_handle_t handle, enum starpu_data_access_mode mode) +{ + if (mode & STARPU_W) + { + /* Receiving */ + + /* TODO: lookup NIC location */ + /* Where to receive the data? */ + if (handle->home_node >= 0 && starpu_node_get_kind(handle->home_node) == STARPU_CPU_RAM) + /* For now, better use the home node to avoid duplicates */ + return handle->home_node; + + /* Several potential places */ + unsigned i; + if (_starpu_mpi_has_cuda) + for (i = 0; i < STARPU_MAXNODES; i++) + { + /* Note: We take as a hint that it's allocated on the GPU as + * a clue that we want to push directly to the GPU */ + if (starpu_node_get_kind(i) == STARPU_CUDA_RAM + && handle->per_node[i].allocated + && (_starpu_mpi_cuda_devid == -1 || _starpu_mpi_cuda_devid == starpu_memory_node_get_devid(i))) + /* This node already has allocated buffers, let's just use it */ + return i; + } + + for (i = 0; i < STARPU_MAXNODES; i++) + { + /* Note: We take as a hint that it's allocated on a NUMA node as + * a clue that we want to push directly to that NUMA node */ + if (starpu_node_get_kind(i) == STARPU_CPU_RAM + && handle->per_node[i].allocated) + /* This node already has allocated buffers, let's just use it */ + return i; + } + + /* No luck, take the least loaded node */ + starpu_ssize_t maximum = 0; + starpu_ssize_t needed = starpu_data_get_alloc_size(handle); + unsigned node = STARPU_MAIN_RAM; + + for (i = 0; i < STARPU_MAXNODES; i++) + { + if (starpu_node_get_kind(i) == STARPU_CPU_RAM + || (_starpu_mpi_has_cuda && starpu_node_get_kind(i) == STARPU_CUDA_RAM)) + { + starpu_ssize_t size = starpu_memory_get_available(i); + if (size >= needed && size > maximum) + { + node = i; + maximum = size; + } + } + } + return node; + } + else + { + /* Sending */ + + /* Several potential places */ + unsigned i; + for (i = 0; i < STARPU_MAXNODES; i++) + { + if ((starpu_node_get_kind(i) == STARPU_CPU_RAM || + (starpu_node_get_kind(i) == STARPU_CUDA_RAM && _starpu_mpi_has_cuda + && (_starpu_mpi_cuda_devid == -1 || _starpu_mpi_cuda_devid == starpu_memory_node_get_devid(i)))) + && handle->per_node[i].state != STARPU_INVALID) + /* This node already has the value, let's just use it */ + /* TODO: rather pick up place next to NIC */ + return i; + } + + /* No luck, take the least loaded node, to transfer from e.g. GPU */ + starpu_ssize_t maximum = 0; + starpu_ssize_t needed = starpu_data_get_alloc_size(handle); + unsigned node = STARPU_MAIN_RAM; + + for (i = 0; i < STARPU_MAXNODES; i++) + { + if (starpu_node_get_kind(i) == STARPU_CPU_RAM) + { + starpu_ssize_t size = starpu_memory_get_available(i); + if (size >= needed && size > maximum) + { + node = i; + maximum = size; + } + } + } + return node; + } +} + +static void _starpu_mpi_acquired_callback(void *arg, int *nodep, enum starpu_data_access_mode mode) +{ + struct _starpu_mpi_req *req = arg; + int node = *nodep; + + /* The data was acquired in terms of dependencies, we can now look the + * current state of the handle and decide which node we prefer for the data + * fetch */ + + if (node < 0) + node = _starpu_mpi_choose_node(req->data_handle, mode); + + req->node = *nodep = node; +} + +void _starpu_mpi_isend_irecv_common(struct _starpu_mpi_req *req, enum starpu_data_access_mode mode, int sequential_consistency) +{ + int node = -1; + + /* Asynchronously request StarPU to fetch the data in main memory: when + * it is available in main memory, _starpu_mpi_submit_ready_request(req) is called and + * the request is actually submitted */ + + if (_starpu_mpi_mem_throttle && mode & STARPU_W && !req->data_handle->initialized) + { + /* We will trigger allocation, pre-reserve for it */ + size_t size = starpu_data_get_size(req->data_handle); + if (size) + { + /* FIXME: rather take the less-loaded NUMA node */ + node = STARPU_MAIN_RAM; + + /* This will potentially block */ + starpu_memory_allocate(node, size, STARPU_MEMORY_WAIT); + req->reserved_size = size; + /* This also decides where we will store the data */ + req->node = node; + } + } + + if (sequential_consistency) + { + starpu_data_acquire_on_node_cb_sequential_consistency_sync_jobids(req->data_handle, node, mode, _starpu_mpi_acquired_callback, _starpu_mpi_submit_ready_request, (void *)req, 1 /*sequential consistency*/, 1, &req->pre_sync_jobid, &req->post_sync_jobid, req->prio); + } + else + { + /* post_sync_job_id has already been filled */ + starpu_data_acquire_on_node_cb_sequential_consistency_sync_jobids(req->data_handle, node, mode, _starpu_mpi_acquired_callback, _starpu_mpi_submit_ready_request, (void *)req, 0 /*sequential consistency*/, 1, &req->pre_sync_jobid, NULL, req->prio); + } +} + +struct _starpu_mpi_req *_starpu_mpi_isend_common(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm, unsigned detached, unsigned sync, int prio, void (*callback)(void *), void *arg, int sequential_consistency) +{ + if (STARPU_UNLIKELY(_starpu_mpi_fake_world_size != -1)) + { + /* Don't actually do the communication */ + return NULL; + } + +#ifdef STARPU_MPI_PEDANTIC_ISEND + enum starpu_data_access_mode mode = STARPU_RW; +#else + enum starpu_data_access_mode mode = STARPU_R; +#endif + + struct _starpu_mpi_req *req = _starpu_mpi_request_fill(data_handle, dest, data_tag, comm, detached, sync, prio, callback, arg, SEND_REQ, _mpi_backend._starpu_mpi_backend_isend_size_func, sequential_consistency, 0, 0); + _starpu_mpi_req_willpost(req); + + if (_starpu_mpi_use_coop_sends && detached == 1 && sync == 0 && callback == NULL) + { + /* It's a send & forget send, we can perhaps optimize its distribution over several nodes */ + _starpu_mpi_coop_send(data_handle, req, mode, sequential_consistency); + return req; + } + + /* Post normally */ + _starpu_mpi_isend_irecv_common(req, mode, sequential_consistency); + return req; +} + +int starpu_mpi_isend_prio(starpu_data_handle_t data_handle, starpu_mpi_req *public_req, int dest, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm) +{ + _STARPU_MPI_LOG_IN(); + STARPU_MPI_ASSERT_MSG(public_req, "starpu_mpi_isend needs a valid starpu_mpi_req"); + + struct _starpu_mpi_req *req; + _STARPU_MPI_TRACE_ISEND_COMPLETE_BEGIN(dest, data_tag, 0); + req = _starpu_mpi_isend_common(data_handle, dest, data_tag, comm, 0, 0, prio, NULL, NULL, 1); + _STARPU_MPI_TRACE_ISEND_COMPLETE_END(dest, data_tag, 0); + + STARPU_MPI_ASSERT_MSG(req, "Invalid return for _starpu_mpi_isend_common"); + *public_req = req; + + _STARPU_MPI_LOG_OUT(); + return 0; +} + +int starpu_mpi_isend(starpu_data_handle_t data_handle, starpu_mpi_req *public_req, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm) +{ + return starpu_mpi_isend_prio(data_handle, public_req, dest, data_tag, 0, comm); +} + +int starpu_mpi_isend_detached_prio(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm, void (*callback)(void *), void *arg) +{ + _STARPU_MPI_LOG_IN(); + _starpu_mpi_isend_common(data_handle, dest, data_tag, comm, 1, 0, prio, callback, arg, 1); + _STARPU_MPI_LOG_OUT(); + return 0; +} + +int starpu_mpi_isend_detached(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg) +{ + return starpu_mpi_isend_detached_prio(data_handle, dest, data_tag, 0, comm, callback, arg); +} + +int starpu_mpi_send_prio(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm) +{ + starpu_mpi_req req; + MPI_Status status; + int ret; + + _STARPU_MPI_LOG_IN(); + ret = starpu_mpi_isend_prio(data_handle, &req, dest, data_tag, prio, comm); + if (ret) + return ret; + + memset(&status, 0, sizeof(MPI_Status)); + ret = starpu_mpi_wait(&req, &status); + + _STARPU_MPI_LOG_OUT(); + return ret; +} + +int starpu_mpi_send(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm) +{ + return starpu_mpi_send_prio(data_handle, dest, data_tag, 0, comm); +} + +int starpu_mpi_issend_prio(starpu_data_handle_t data_handle, starpu_mpi_req *public_req, int dest, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm) +{ + _STARPU_MPI_LOG_IN(); + STARPU_MPI_ASSERT_MSG(public_req, "starpu_mpi_issend needs a valid starpu_mpi_req"); + + struct _starpu_mpi_req *req; + req = _starpu_mpi_isend_common(data_handle, dest, data_tag, comm, 0, 1, prio, NULL, NULL, 1); + + STARPU_MPI_ASSERT_MSG(req, "Invalid return for _starpu_mpi_isend_common"); + *public_req = req; + + _STARPU_MPI_LOG_OUT(); + return 0; +} + +int starpu_mpi_issend(starpu_data_handle_t data_handle, starpu_mpi_req *public_req, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm) +{ + return starpu_mpi_issend_prio(data_handle, public_req, dest, data_tag, 0, comm); +} + +int starpu_mpi_issend_detached_prio(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm, void (*callback)(void *), void *arg) +{ + _STARPU_MPI_LOG_IN(); + + _starpu_mpi_isend_common(data_handle, dest, data_tag, comm, 1, 1, prio, callback, arg, 1); + + _STARPU_MPI_LOG_OUT(); + return 0; +} + +int starpu_mpi_issend_detached(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg) +{ + return starpu_mpi_issend_detached_prio(data_handle, dest, data_tag, 0, comm, callback, arg); +} + +struct _starpu_mpi_req* _starpu_mpi_isend_cache_aware(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm, unsigned detached, unsigned sync, int prio, void (*callback)(void *), void *_arg, int sequential_consistency, int* cache_flag) +{ + struct _starpu_mpi_req* req = NULL; + int already_sent = starpu_mpi_cached_send_set(data_handle, dest); + if (already_sent == 0) + { + *cache_flag = 0; + if (data_tag == -1) + _STARPU_ERROR("StarPU needs to be told the MPI tag of this data, using starpu_mpi_data_register\n"); + _STARPU_MPI_DEBUG(1, "Send data %p to %d\n", data_handle, dest); + req = _starpu_mpi_isend_common(data_handle, dest, data_tag, comm, detached, sync, prio, callback, _arg, sequential_consistency); + } + else + { + _STARPU_MPI_DEBUG(1, "STARPU CACHE: Data already sent\n"); + *cache_flag = 1; + if (callback) + callback(_arg); + } + return req; +} + +struct _starpu_mpi_req *_starpu_mpi_irecv_common(starpu_data_handle_t data_handle, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm, unsigned detached, unsigned sync, void (*callback)(void *), void *arg, int sequential_consistency, int is_internal_req, starpu_ssize_t count, int prio) +{ + if (_starpu_mpi_fake_world_size != -1) + { + /* Don't actually do the communication */ + return NULL; + } + + struct _starpu_mpi_req *req = _starpu_mpi_request_fill(data_handle, source, data_tag, comm, detached, sync, prio, callback, arg, RECV_REQ, _mpi_backend._starpu_mpi_backend_irecv_size_func, sequential_consistency, is_internal_req, count); + _starpu_mpi_req_willpost(req); + + if (sequential_consistency == 0) + { + /* Synchronization task jobid from redux is used */ + _starpu_mpi_redux_fill_post_sync_jobid(arg, &(req->post_sync_jobid)); + } + + _starpu_mpi_isend_irecv_common(req, STARPU_W, sequential_consistency); + return req; +} + +int _starpu_mpi_irecv_prio(starpu_data_handle_t data_handle, starpu_mpi_req *public_req, int source, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm) +{ + _STARPU_MPI_LOG_IN(); + STARPU_MPI_ASSERT_MSG(public_req, "starpu_mpi_irecv needs a valid starpu_mpi_req"); + + struct _starpu_mpi_req *req; + _STARPU_MPI_TRACE_IRECV_COMPLETE_BEGIN(source, data_tag); + req = _starpu_mpi_irecv_common(data_handle, source, data_tag, comm, 0, 0, NULL, NULL, 1, 0, 0, prio); + _STARPU_MPI_TRACE_IRECV_COMPLETE_END(source, data_tag); + + STARPU_MPI_ASSERT_MSG(req, "Invalid return for _starpu_mpi_irecv_common"); + *public_req = req; + + _STARPU_MPI_LOG_OUT(); + return 0; +} + +int starpu_mpi_irecv(starpu_data_handle_t data_handle, starpu_mpi_req *public_req, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm) +{ + return _starpu_mpi_irecv_prio(data_handle, public_req, source, data_tag, STARPU_DEFAULT_PRIO, comm); +} + +int starpu_mpi_irecv_detached(starpu_data_handle_t data_handle, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg) +{ + _STARPU_MPI_LOG_IN(); + + _starpu_mpi_irecv_common(data_handle, source, data_tag, comm, 1, 0, callback, arg, 1, 0, 0, STARPU_DEFAULT_PRIO); + _STARPU_MPI_LOG_OUT(); + return 0; +} + +int starpu_mpi_irecv_detached_prio(starpu_data_handle_t data_handle, int source, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm, void (*callback)(void *), void *arg) +{ + _STARPU_MPI_LOG_IN(); + + _starpu_mpi_irecv_common(data_handle, source, data_tag, comm, 1, 0, callback, arg, 1, 0, 0, prio); + + _STARPU_MPI_LOG_OUT(); + return 0; +} + +int starpu_mpi_irecv_detached_sequential_consistency(starpu_data_handle_t data_handle, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg, int sequential_consistency) +{ + _STARPU_MPI_LOG_IN(); + + _starpu_mpi_irecv_common(data_handle, source, data_tag, comm, 1, 0, callback, arg, sequential_consistency, 0, 0, STARPU_DEFAULT_PRIO); + + _STARPU_MPI_LOG_OUT(); + return 0; +} + +int _starpu_mpi_recv_prio(starpu_data_handle_t data_handle, int source, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm, MPI_Status *status) +{ + STARPU_ASSERT_MSG(status != NULL || status == MPI_STATUS_IGNORE, "MPI_Status value cannot be NULL or different from MPI_STATUS_IGNORE"); + + starpu_mpi_req req; + int ret; + + _STARPU_MPI_LOG_IN(); + + ret = _starpu_mpi_irecv_prio(data_handle, &req, source, data_tag, prio, comm); + if (ret) + return ret; + ret = starpu_mpi_wait(&req, status); + + _STARPU_MPI_LOG_OUT(); + return ret; +} + +int starpu_mpi_recv(starpu_data_handle_t data_handle, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm, MPI_Status *status) +{ + return _starpu_mpi_recv_prio(data_handle, source, data_tag, STARPU_DEFAULT_PRIO, comm, status); +} + +int starpu_mpi_recv_prio(starpu_data_handle_t data_handle, int source, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm, MPI_Status *status) +{ + return _starpu_mpi_recv_prio(data_handle, source, data_tag, prio, comm, status); +} + +struct _starpu_mpi_req* _starpu_mpi_irecv_cache_aware(starpu_data_handle_t data_handle, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm, unsigned detached, unsigned sync, void (*callback)(void *), void *_arg, int sequential_consistency, int is_internal_req, starpu_ssize_t count, int* cache_flag) +{ + struct _starpu_mpi_req* req = NULL; + int already_received = starpu_mpi_cached_cp_receive_set(data_handle); + if (already_received == 0) + { + if (data_tag == -1) + _STARPU_ERROR("StarPU needs to be told the MPI tag of this data, using starpu_mpi_data_register\n"); + _STARPU_MPI_DEBUG(1, "Receiving data %p from %d\n", data_handle, source); + req = _starpu_mpi_irecv_common(data_handle, source, data_tag, comm, detached, sync, callback, _arg, sequential_consistency, is_internal_req, count, STARPU_DEFAULT_PRIO); //TODO: Allow to pass prio in args + *cache_flag = 0; + } + else + { + _STARPU_MPI_DEBUG(1, "STARPU CACHE: Data already received\n"); + *cache_flag =1; + if (callback) + callback(_arg); + } + return req; +} + +int starpu_mpi_wait(starpu_mpi_req *public_req, MPI_Status *status) +{ + STARPU_ASSERT_MSG(status != NULL || status == MPI_STATUS_IGNORE, "MPI_Status value cannot be NULL or different from MPI_STATUS_IGNORE"); + return _mpi_backend._starpu_mpi_backend_wait(public_req, status); +} + +int starpu_mpi_test(starpu_mpi_req *public_req, int *flag, MPI_Status *status) +{ + STARPU_ASSERT_MSG(status != NULL || status == MPI_STATUS_IGNORE, "MPI_Status value cannot be NULL or different from MPI_STATUS_IGNORE"); + return _mpi_backend._starpu_mpi_backend_test(public_req, flag, status); +} + +int starpu_mpi_barrier(MPI_Comm comm) +{ + return _mpi_backend._starpu_mpi_backend_barrier(comm); +} + +void _starpu_mpi_data_clear(starpu_data_handle_t data_handle) +{ + struct _starpu_mpi_data *data = data_handle->mpi_data; + _mpi_backend._starpu_mpi_backend_data_clear(data_handle); + _starpu_mpi_cache_data_clear(data_handle); + _starpu_spin_destroy(&data->coop_lock); + free(data->redux_map); + data->redux_map = NULL; + free(data); +} + +struct _starpu_mpi_data *_starpu_mpi_data_get(starpu_data_handle_t data_handle) +{ + struct _starpu_mpi_data *mpi_data = data_handle->mpi_data; + if (mpi_data) + { + STARPU_ASSERT(mpi_data->magic == 42); + } + else + { + _STARPU_CALLOC(mpi_data, 1, sizeof(struct _starpu_mpi_data)); + mpi_data->magic = 42; + mpi_data->node_tag.data_tag = -1; + mpi_data->node_tag.node.rank = -1; + mpi_data->node_tag.node.comm = MPI_COMM_WORLD; + mpi_data->nb_future_sends = 0; + _starpu_spin_init(&mpi_data->coop_lock); + data_handle->mpi_data = mpi_data; + _starpu_mpi_cache_data_init(data_handle); + _starpu_data_set_unregister_hook(data_handle, _starpu_mpi_data_clear); + } + return mpi_data; +} + +void starpu_mpi_data_register_comm(starpu_data_handle_t data_handle, starpu_mpi_tag_t data_tag, int rank, MPI_Comm comm) +{ + struct _starpu_mpi_data *mpi_data = _starpu_mpi_data_get(data_handle); + + if (data_tag != -1) + { + _mpi_backend._starpu_mpi_backend_data_register(data_handle, data_tag); + mpi_data->node_tag.data_tag = data_tag; + _STARPU_MPI_TRACE_DATA_SET_TAG(data_handle, data_tag); + } + if (rank != -1) + { + _STARPU_MPI_TRACE_DATA_SET_RANK(data_handle, rank); + mpi_data->node_tag.node.rank = rank; + mpi_data->node_tag.node.comm = comm; + } +} + +void starpu_mpi_data_set_rank_comm(starpu_data_handle_t handle, int rank, MPI_Comm comm) +{ + starpu_mpi_data_register_comm(handle, -1, rank, comm); +} + +void starpu_mpi_data_set_tag(starpu_data_handle_t handle, starpu_mpi_tag_t data_tag) +{ + starpu_mpi_data_register_comm(handle, data_tag, -1, MPI_COMM_WORLD); +} + +int starpu_mpi_data_get_rank(starpu_data_handle_t data) +{ + STARPU_ASSERT_MSG(data->mpi_data, "starpu_mpi_data_register MUST be called for data %p\n", data); + return ((struct _starpu_mpi_data *)(data->mpi_data))->node_tag.node.rank; +} + +starpu_mpi_tag_t starpu_mpi_data_get_tag(starpu_data_handle_t data) +{ + STARPU_ASSERT_MSG(data->mpi_data, "starpu_mpi_data_register MUST be called for data %p\n", data); + return ((struct _starpu_mpi_data *)(data->mpi_data))->node_tag.data_tag; +} + +char* starpu_mpi_data_get_redux_map(starpu_data_handle_t data) +{ + STARPU_ASSERT_MSG(data->mpi_data, "starpu_mpi_data_register MUST be called for data %p\n", data); + return ((struct _starpu_mpi_data *)(data->mpi_data))->redux_map; +} + +int starpu_mpi_get_data_on_node_detached(MPI_Comm comm, starpu_data_handle_t data_handle, int node, void (*callback)(void*), void *arg) +{ + int me, rank; + starpu_mpi_tag_t data_tag; + + rank = starpu_mpi_data_get_rank(data_handle); + if (rank == -1) + { + _STARPU_ERROR("StarPU needs to be told the MPI rank of this data, using starpu_mpi_data_register() or starpu_mpi_data_register_comm()\n"); + } + + starpu_mpi_comm_rank(comm, &me); + if (node == rank) + return 0; + + data_tag = starpu_mpi_data_get_tag(data_handle); + if (data_tag == -1) + { + _STARPU_ERROR("StarPU needs to be told the MPI tag of this data, using starpu_mpi_data_register() or starpu_mpi_data_register_comm()\n"); + } + + if (me == node) + { + _STARPU_MPI_DEBUG(1, "Migrating data %p from %d to %d\n", data_handle, rank, node); + int already_received = starpu_mpi_cached_receive_set(data_handle); + if (already_received == 0) + { + _STARPU_MPI_DEBUG(1, "Receiving data %p from %d\n", data_handle, rank); + return starpu_mpi_irecv_detached(data_handle, rank, data_tag, comm, callback, arg); + } + } + else if (me == rank) + { + _STARPU_MPI_DEBUG(1, "Migrating data %p from %d to %d\n", data_handle, rank, node); + int already_sent = starpu_mpi_cached_send_set(data_handle, node); + if (already_sent == 0) + { + _STARPU_MPI_DEBUG(1, "Sending data %p to %d\n", data_handle, node); + return starpu_mpi_isend_detached(data_handle, node, data_tag, comm, NULL, NULL); + } + } + return 0; +} + +int starpu_mpi_get_data_on_node(MPI_Comm comm, starpu_data_handle_t data_handle, int node) +{ + int me, rank; + starpu_mpi_tag_t data_tag; + + rank = starpu_mpi_data_get_rank(data_handle); + if (rank == -1) + { + _STARPU_ERROR("StarPU needs to be told the MPI rank of this data, using starpu_mpi_data_register\n"); + } + + starpu_mpi_comm_rank(comm, &me); + if (node == rank) + return 0; + + data_tag = starpu_mpi_data_get_tag(data_handle); + if (data_tag == -1) + { + _STARPU_ERROR("StarPU needs to be told the MPI tag of this data, using starpu_mpi_data_register\n"); + } + + if (me == node) + { + MPI_Status status; + _STARPU_MPI_DEBUG(1, "Migrating data %p from %d to %d\n", data_handle, rank, node); + int already_received = starpu_mpi_cached_receive_set(data_handle); + if (already_received == 0) + { + _STARPU_MPI_DEBUG(1, "Receiving data %p from %d\n", data_handle, rank); + return starpu_mpi_recv(data_handle, rank, data_tag, comm, &status); + } + } + else if (me == rank) + { + _STARPU_MPI_DEBUG(1, "Migrating data %p from %d to %d\n", data_handle, rank, node); + int already_sent = starpu_mpi_cached_send_set(data_handle, node); + if (already_sent == 0) + { + _STARPU_MPI_DEBUG(1, "Sending data %p to %d\n", data_handle, node); + return starpu_mpi_send(data_handle, node, data_tag, comm); + } + } + return 0; +} + +void starpu_mpi_get_data_on_all_nodes_detached(MPI_Comm comm, starpu_data_handle_t data_handle) +{ + int size, i; + starpu_mpi_comm_size(comm, &size); + for (i = 0; i < size; i++) + starpu_mpi_get_data_on_node_detached(comm, data_handle, i, NULL, NULL); +} + +void starpu_mpi_data_migrate(MPI_Comm comm, starpu_data_handle_t data, int new_rank) +{ + int old_rank = starpu_mpi_data_get_rank(data); + if (new_rank == old_rank) + /* Already there */ + return; + + /* First submit data migration if it's not already on destination */ + starpu_mpi_get_data_on_node_detached(comm, data, new_rank, NULL, NULL); + + /* And note new owner */ + starpu_mpi_data_set_rank_comm(data, new_rank, comm); + + /* Flush cache in all other nodes */ + /* TODO: Ideally we'd transmit the knowledge of who owns it */ + /* TODO: or at least remember that the previous owner has the data, that's an easy case to support */ + starpu_mpi_cache_flush(comm, data); + return; +} + +int starpu_mpi_wait_for_all(MPI_Comm comm) +{ + /* If the user forgets to call mpi_redux_data or insert R tasks on the reduced handles */ + /* then, we wrap reduction patterns for them. This is typical of benchmarks */ + _starpu_mpi_redux_wrapup_data_all(); + return _mpi_backend._starpu_mpi_backend_wait_for_all(comm); +} + +void starpu_mpi_comm_stats_disable() +{ + _starpu_mpi_comm_stats_disable(); +} + +void starpu_mpi_comm_stats_enable() +{ + _starpu_mpi_comm_stats_enable(); +} + +int _starpu_mpi_data_cpy(starpu_data_handle_t dst_handle, starpu_data_handle_t src_handle, MPI_Comm comm, int asynchronous, void (*callback_func)(void *), void *callback_arg, int priority) +{ + int src, dst; + int ret; + + if (dst_handle == src_handle) + { + if (callback_func) + callback_func(callback_arg); + return 0; + } + + ret = 0; + src = starpu_mpi_data_get_rank(src_handle); + dst = starpu_mpi_data_get_rank(dst_handle); + + if (src == dst) + // Both data are on the same node, no need to transfer data + ret = starpu_data_cpy_priority(dst_handle, src_handle, asynchronous, callback_func, callback_arg, priority); + else + { + // We need to transfer data + int rank; + starpu_mpi_tag_t tag; + + tag = starpu_mpi_data_get_tag(dst_handle); + starpu_mpi_comm_rank(comm, &rank); + + if (rank == src) + { + if (asynchronous == 1) + ret = starpu_mpi_isend_detached_prio(src_handle, dst, tag, priority, comm, NULL, NULL); + else + ret = starpu_mpi_send_prio(src_handle, dst, tag, priority, comm); + } + else if (rank == dst) + { + if (asynchronous == 1) + ret = starpu_mpi_irecv_detached_prio(dst_handle, src, tag, priority, comm, callback_func, callback_arg); + else + { + ret = starpu_mpi_recv_prio(dst_handle, src, tag, priority, comm, MPI_STATUS_IGNORE); + if (callback_func) + callback_func(callback_arg); + } + return ret; + + } + } + starpu_mpi_cache_flush(comm, dst_handle); + return ret; +} + +int starpu_mpi_data_cpy(starpu_data_handle_t dst_handle, starpu_data_handle_t src_handle, MPI_Comm comm, int asynchronous, void (*callback_func)(void *), void *callback_arg) +{ + return _starpu_mpi_data_cpy(dst_handle, src_handle, comm, asynchronous, callback_func, callback_arg, STARPU_DEFAULT_PRIO); +} + +int starpu_mpi_data_cpy_priority(starpu_data_handle_t dst_handle, starpu_data_handle_t src_handle, MPI_Comm comm, int asynchronous, void (*callback_func)(void *), void *callback_arg, int priority) +{ + return _starpu_mpi_data_cpy(dst_handle, src_handle, comm, asynchronous, callback_func, callback_arg, priority); +} diff --git a/mpi/src/starpu_mpi_cache.c b/mpi/src/starpu_mpi_cache.c new file mode 100644 index 0000000..31ddb4d --- /dev/null +++ b/mpi/src/starpu_mpi_cache.c @@ -0,0 +1,432 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include + +#include +#include +#include +#include + +/* Whether we are allowed to keep copies of remote data. */ +struct _starpu_data_entry +{ + UT_hash_handle hh; + starpu_data_handle_t data_handle; +}; + +static starpu_pthread_mutex_t _cache_mutex; +static struct _starpu_data_entry *_cache_data = NULL; +int _starpu_cache_enabled=1; +static MPI_Comm _starpu_cache_comm; +static int _starpu_cache_comm_size; + +static void _starpu_mpi_cache_flush_nolock(starpu_data_handle_t data_handle); + +int starpu_mpi_cache_is_enabled() +{ + return _starpu_cache_enabled==1; +} + +int starpu_mpi_cache_set(int enabled) +{ + if (enabled == 1) + { + _starpu_cache_enabled = 1; + } + else + { + if (_starpu_cache_enabled) + { + // We need to clean the cache + starpu_mpi_cache_flush_all_data(_starpu_cache_comm); + _starpu_mpi_cache_shutdown(); + } + _starpu_cache_enabled = 0; + } + return 0; +} + +void _starpu_mpi_cache_init(MPI_Comm comm) +{ + _starpu_cache_enabled = starpu_getenv_number("STARPU_MPI_CACHE"); + if (_starpu_cache_enabled == -1) + { + _starpu_cache_enabled = 1; + } + + if (_starpu_cache_enabled == 0) + { + _STARPU_DISP("Warning: StarPU MPI Communication cache is disabled\n"); + return; + } + + _starpu_cache_comm = comm; + starpu_mpi_comm_size(comm, &_starpu_cache_comm_size); + _starpu_mpi_cache_stats_init(); + STARPU_PTHREAD_MUTEX_INIT(&_cache_mutex, NULL); +} + +void _starpu_mpi_cache_shutdown(void) +{ + if (_starpu_cache_enabled == 0) + return; + + struct _starpu_data_entry *entry=NULL, *tmp=NULL; + + STARPU_PTHREAD_MUTEX_LOCK(&_cache_mutex); + HASH_ITER(hh, _cache_data, entry, tmp) + { + HASH_DEL(_cache_data, entry); + free(entry); + } + STARPU_PTHREAD_MUTEX_UNLOCK(&_cache_mutex); + STARPU_PTHREAD_MUTEX_DESTROY(&_cache_mutex); + _starpu_mpi_cache_stats_shutdown(); +} + +void _starpu_mpi_cache_data_clear(starpu_data_handle_t data_handle) +{ + struct _starpu_mpi_data *mpi_data = data_handle->mpi_data; + + if (_starpu_cache_enabled == 1) + { + struct _starpu_data_entry *entry; + STARPU_PTHREAD_MUTEX_LOCK(&_cache_mutex); + _starpu_mpi_cache_flush_nolock(data_handle); + HASH_FIND_PTR(_cache_data, &data_handle, entry); + if (entry != NULL) + { + HASH_DEL(_cache_data, entry); + free(entry); + } + STARPU_PTHREAD_MUTEX_UNLOCK(&_cache_mutex); + } + + free(mpi_data->cache_sent); +} + +void _starpu_mpi_cache_data_init(starpu_data_handle_t data_handle) +{ + int i; + struct _starpu_mpi_data *mpi_data = data_handle->mpi_data; + + if (_starpu_cache_enabled == 0) + return; + + STARPU_PTHREAD_MUTEX_LOCK(&_cache_mutex); + mpi_data->cache_received = 0; + mpi_data->ft_induced_cache_received = 0; + mpi_data->ft_induced_cache_received_count = 0; + _STARPU_MALLOC(mpi_data->cache_sent, _starpu_cache_comm_size*sizeof(mpi_data->cache_sent[0])); + for(i=0 ; i<_starpu_cache_comm_size ; i++) + { + mpi_data->cache_sent[i] = 0; + } + STARPU_PTHREAD_MUTEX_UNLOCK(&_cache_mutex); +} + +static void _starpu_mpi_cache_data_add_nolock(starpu_data_handle_t data_handle) +{ + struct _starpu_data_entry *entry; + + if (_starpu_cache_enabled == 0) + return; + + HASH_FIND_PTR(_cache_data, &data_handle, entry); + if (entry == NULL) + { + _STARPU_MPI_MALLOC(entry, sizeof(*entry)); + entry->data_handle = data_handle; + HASH_ADD_PTR(_cache_data, data_handle, entry); + } +} + +static void _starpu_mpi_cache_data_remove_nolock(starpu_data_handle_t data_handle) +{ + struct _starpu_data_entry *entry; + + if (_starpu_cache_enabled == 0) + return; + + HASH_FIND_PTR(_cache_data, &data_handle, entry); + if (entry) + { + HASH_DEL(_cache_data, entry); + free(entry); + } +} + +/************************************** + * Received cache + **************************************/ +void starpu_mpi_cached_receive_clear(starpu_data_handle_t data_handle) +{ + int mpi_rank = starpu_mpi_data_get_rank(data_handle); + struct _starpu_mpi_data *mpi_data = data_handle->mpi_data; + + if (_starpu_cache_enabled == 0) + return; + + STARPU_PTHREAD_MUTEX_LOCK(&_cache_mutex); + STARPU_ASSERT(mpi_data->magic == 42); + STARPU_MPI_ASSERT_MSG(mpi_rank < _starpu_cache_comm_size, "Node %d invalid. Max node is %d\n", mpi_rank, _starpu_cache_comm_size); + + if (mpi_data->cache_received == 1) + { +#ifdef STARPU_DEVEL +# warning TODO: Somebody else will write to the data, so discard our cached copy if any. starpu_mpi could just remember itself. +#endif + _STARPU_MPI_DEBUG(2, "Clearing receive cache for data %p\n", data_handle); + mpi_data->cache_received = 0; + mpi_data->ft_induced_cache_received = 0; + mpi_data->ft_induced_cache_received_count = 0; + starpu_data_invalidate_submit(data_handle); + _starpu_mpi_cache_data_remove_nolock(data_handle); + _starpu_mpi_cache_stats_dec(mpi_rank, data_handle); + } + STARPU_PTHREAD_MUTEX_UNLOCK(&_cache_mutex); +} + + +int starpu_mpi_cached_receive_set(starpu_data_handle_t data_handle) +{ + int mpi_rank = starpu_mpi_data_get_rank(data_handle); + struct _starpu_mpi_data *mpi_data = data_handle->mpi_data; + + if (_starpu_cache_enabled == 0) + return 0; + + STARPU_PTHREAD_MUTEX_LOCK(&_cache_mutex); + STARPU_ASSERT(mpi_data->magic == 42); + STARPU_MPI_ASSERT_MSG(mpi_rank < _starpu_cache_comm_size, "Node %d invalid. Max node is %d\n", mpi_rank, _starpu_cache_comm_size); + + int already_received = mpi_data->cache_received; + if (already_received == 0) + { + _STARPU_MPI_DEBUG(2, "Noting that data %p has already been received by %d\n", data_handle, mpi_rank); + mpi_data->cache_received = 1; + _starpu_mpi_cache_data_add_nolock(data_handle); + _starpu_mpi_cache_stats_inc(mpi_rank, data_handle); + } + else + { +#ifdef STARPU_USE_MPI_FT_STATS + if (mpi_data->ft_induced_cache_received == 1 && mpi_data->ft_induced_cache_received_count == 0) + { + _STARPU_MPI_FT_STATS_RECV_CACHED_CP_DATA(starpu_data_get_size(data_handle)); + _STARPU_MPI_FT_STATS_CANCEL_RECV_CP_DATA(starpu_data_get_size(data_handle)); + mpi_data->ft_induced_cache_received_count = 1; + } +#endif //STARPU_USE_MPI_FT_STATS + _STARPU_MPI_DEBUG(2, "Do not receive data %p from node %d as it is already available\n", data_handle, mpi_rank); + } + STARPU_PTHREAD_MUTEX_UNLOCK(&_cache_mutex); + return already_received; +} + +int starpu_mpi_cached_cp_receive_set(starpu_data_handle_t data_handle) +{ + int mpi_rank = starpu_mpi_data_get_rank(data_handle); + struct _starpu_mpi_data *mpi_data = data_handle->mpi_data; + + if (_starpu_cache_enabled == 0) + return 0; + + STARPU_PTHREAD_MUTEX_LOCK(&_cache_mutex); + STARPU_ASSERT(mpi_data->magic == 42); + STARPU_MPI_ASSERT_MSG(mpi_rank < _starpu_cache_comm_size, "Node %d invalid. Max node is %d\n", mpi_rank, _starpu_cache_comm_size); + + int already_received = mpi_data->cache_received; + if (already_received == 0) + { + _STARPU_MPI_DEBUG(2, "Noting that data %p has already been received by %d\n", data_handle, mpi_rank); + mpi_data->cache_received = 1; + mpi_data->ft_induced_cache_received = 1; +#ifdef STARPU_USE_MPI_FT_STATS + _STARPU_MPI_FT_STATS_RECV_CP_DATA(starpu_data_get_size(data_handle)); +#endif + _starpu_mpi_cache_data_add_nolock(data_handle); + _starpu_mpi_cache_stats_inc(mpi_rank, data_handle); + } + else + { +#ifdef STARPU_USE_MPI_FT_STATS + if (mpi_data->ft_induced_cache_received == 1) + _STARPU_MPI_FT_STATS_RECV_CP_CACHED_CP_DATA(starpu_data_get_size(data_handle)); + else + _STARPU_MPI_FT_STATS_RECV_CACHED_CP_DATA(starpu_data_get_size(data_handle)); +#endif + _STARPU_MPI_DEBUG(2, "Do not receive data %p from node %d as it is already available\n", data_handle, mpi_rank); + } + STARPU_PTHREAD_MUTEX_UNLOCK(&_cache_mutex); + return already_received; +} + +int starpu_mpi_cached_receive(starpu_data_handle_t data_handle) +{ + int already_received; + struct _starpu_mpi_data *mpi_data = data_handle->mpi_data; + + if (_starpu_cache_enabled == 0) + return 0; + + STARPU_PTHREAD_MUTEX_LOCK(&_cache_mutex); + STARPU_ASSERT(mpi_data->magic == 42); + already_received = mpi_data->cache_received; + STARPU_PTHREAD_MUTEX_UNLOCK(&_cache_mutex); + return already_received; +} + +/************************************** + * Send cache + **************************************/ +void starpu_mpi_cached_send_clear(starpu_data_handle_t data_handle) +{ + int n, size; + struct _starpu_mpi_data *mpi_data = data_handle->mpi_data; + + if (_starpu_cache_enabled == 0) + return; + + STARPU_PTHREAD_MUTEX_LOCK(&_cache_mutex); + starpu_mpi_comm_size(mpi_data->node_tag.node.comm, &size); + for(n=0 ; ncache_sent[n] == 1) + { + _STARPU_MPI_DEBUG(2, "Clearing send cache for data %p\n", data_handle); + mpi_data->cache_sent[n] = 0; + _starpu_mpi_cache_data_remove_nolock(data_handle); + } + } + STARPU_PTHREAD_MUTEX_UNLOCK(&_cache_mutex); +} + +int starpu_mpi_cached_send_set(starpu_data_handle_t data_handle, int dest) +{ + struct _starpu_mpi_data *mpi_data = data_handle->mpi_data; + + if (_starpu_cache_enabled == 0) + return 0; + + STARPU_MPI_ASSERT_MSG(dest < _starpu_cache_comm_size, "Node %d invalid. Max node is %d\n", dest, _starpu_cache_comm_size); + + STARPU_PTHREAD_MUTEX_LOCK(&_cache_mutex); + int already_sent = mpi_data->cache_sent[dest]; + if (mpi_data->cache_sent[dest] == 0) + { + mpi_data->cache_sent[dest] = 1; + _starpu_mpi_cache_data_add_nolock(data_handle); + _STARPU_MPI_DEBUG(2, "Noting that data %p has already been sent to %d\n", data_handle, dest); + } + else + { + _STARPU_MPI_DEBUG(2, "Do not send data %p to node %d as it has already been sent\n", data_handle, dest); + } + STARPU_PTHREAD_MUTEX_UNLOCK(&_cache_mutex); + return already_sent; +} + +int starpu_mpi_cached_send(starpu_data_handle_t data_handle, int dest) +{ + struct _starpu_mpi_data *mpi_data = data_handle->mpi_data; + int already_sent; + + if (_starpu_cache_enabled == 0) + return 0; + + STARPU_PTHREAD_MUTEX_LOCK(&_cache_mutex); + STARPU_MPI_ASSERT_MSG(dest < _starpu_cache_comm_size, "Node %d invalid. Max node is %d\n", dest, _starpu_cache_comm_size); + already_sent = mpi_data->cache_sent[dest]; + STARPU_PTHREAD_MUTEX_UNLOCK(&_cache_mutex); + return already_sent; +} + +static void _starpu_mpi_cache_flush_nolock(starpu_data_handle_t data_handle) +{ + struct _starpu_mpi_data *mpi_data = data_handle->mpi_data; + int i, nb_nodes; + + if (_starpu_cache_enabled == 0) + return; + + starpu_mpi_comm_size(mpi_data->node_tag.node.comm, &nb_nodes); + for(i=0 ; icache_sent[i] == 1) + { + _STARPU_MPI_DEBUG(2, "Clearing send cache for data %p\n", data_handle); + mpi_data->cache_sent[i] = 0; + _starpu_mpi_cache_stats_dec(i, data_handle); + } + } + + if (mpi_data->cache_received == 1) + { + int mpi_rank = starpu_mpi_data_get_rank(data_handle); + _STARPU_MPI_DEBUG(2, "Clearing received cache for data %p\n", data_handle); + mpi_data->cache_received = 0; + mpi_data->ft_induced_cache_received = 0; + mpi_data->ft_induced_cache_received_count = 0; + _starpu_mpi_cache_stats_dec(mpi_rank, data_handle); + } +} + +static void _starpu_mpi_cache_flush_and_invalidate_nolock(MPI_Comm comm, starpu_data_handle_t data_handle) +{ + int my_rank, mpi_rank; + + _starpu_mpi_cache_flush_nolock(data_handle); + + starpu_mpi_comm_rank(comm, &my_rank); + mpi_rank = starpu_mpi_data_get_rank(data_handle); + if (mpi_rank != my_rank && mpi_rank != -1) + // Clean the memory on nodes which do not own the data + starpu_data_invalidate_submit(data_handle); +} + +void starpu_mpi_cache_flush(MPI_Comm comm, starpu_data_handle_t data_handle) +{ + _starpu_mpi_data_flush(data_handle); + + if (_starpu_cache_enabled == 0) + return; + + STARPU_PTHREAD_MUTEX_LOCK(&_cache_mutex); + _starpu_mpi_cache_flush_and_invalidate_nolock(comm, data_handle); + _starpu_mpi_cache_data_remove_nolock(data_handle); + STARPU_PTHREAD_MUTEX_UNLOCK(&_cache_mutex); +} + +void starpu_mpi_cache_flush_all_data(MPI_Comm comm) +{ + struct _starpu_data_entry *entry=NULL, *tmp=NULL; + + if (_starpu_cache_enabled == 0) + return; + + STARPU_PTHREAD_MUTEX_LOCK(&_cache_mutex); + HASH_ITER(hh, _cache_data, entry, tmp) + { + _starpu_mpi_cache_flush_and_invalidate_nolock(comm, entry->data_handle); + HASH_DEL(_cache_data, entry); + free(entry); + } + STARPU_PTHREAD_MUTEX_UNLOCK(&_cache_mutex); +} diff --git a/mpi/src/starpu_mpi_cache.h b/mpi/src/starpu_mpi_cache.h new file mode 100644 index 0000000..8686eda --- /dev/null +++ b/mpi/src/starpu_mpi_cache.h @@ -0,0 +1,41 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __STARPU_MPI_CACHE_H__ +#define __STARPU_MPI_CACHE_H__ + +#include +#include +#include + +/** @file */ + +#ifdef __cplusplus +extern "C" +{ +#endif + +extern int _starpu_cache_enabled; +void _starpu_mpi_cache_init(MPI_Comm comm); +void _starpu_mpi_cache_shutdown(void); +void _starpu_mpi_cache_data_init(starpu_data_handle_t data_handle); +void _starpu_mpi_cache_data_clear(starpu_data_handle_t data_handle); + +#ifdef __cplusplus +} +#endif + +#endif // __STARPU_MPI_CACHE_H__ diff --git a/mpi/src/starpu_mpi_cache_stats.c b/mpi/src/starpu_mpi_cache_stats.c new file mode 100644 index 0000000..6ae5692 --- /dev/null +++ b/mpi/src/starpu_mpi_cache_stats.c @@ -0,0 +1,61 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2014-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include + +static int stats_enabled=0; + +void _starpu_mpi_cache_stats_init() +{ + stats_enabled = starpu_getenv_number("STARPU_MPI_CACHE_STATS"); + if (stats_enabled == -1) + { + stats_enabled = 0; + } + if (stats_enabled == 0) + return; + + _STARPU_DISP("Warning: StarPU is executed with STARPU_MPI_CACHE_STATS=1, which slows down a bit\n"); + +} + +void _starpu_mpi_cache_stats_shutdown() +{ + if (stats_enabled == 0) + return; +} + +void _starpu_mpi_cache_stats_update(unsigned dst, starpu_data_handle_t data_handle, int count) +{ + size_t size; + + if (stats_enabled == 0) + return; + + size = starpu_data_get_size(data_handle); + + if (count == 1) + { + _STARPU_MPI_MSG("[communication cache] + %10ld to %u\n", (long)size, dst); + } + else // count == -1 + { + _STARPU_MPI_MSG("[communication cache] - %10ld from %u\n", (long)size, dst); + } +} diff --git a/mpi/src/starpu_mpi_cache_stats.h b/mpi/src/starpu_mpi_cache_stats.h new file mode 100644 index 0000000..389ce3d --- /dev/null +++ b/mpi/src/starpu_mpi_cache_stats.h @@ -0,0 +1,43 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __STARPU_MPI_CACHE_STATS_H__ +#define __STARPU_MPI_CACHE_STATS_H__ + +#include +#include +#include + +/** @file */ + +#ifdef __cplusplus +extern "C" +{ +#endif + +void _starpu_mpi_cache_stats_init(); +void _starpu_mpi_cache_stats_shutdown(); + +void _starpu_mpi_cache_stats_update(unsigned dst, starpu_data_handle_t data_handle, int count); + +#define _starpu_mpi_cache_stats_inc(dst, data_handle) _starpu_mpi_cache_stats_update(dst, data_handle, +1) +#define _starpu_mpi_cache_stats_dec(dst, data_handle) _starpu_mpi_cache_stats_update(dst, data_handle, -1) + +#ifdef __cplusplus +} +#endif + +#endif // __STARPU_MPI_CACHE_STATS_H__ diff --git a/mpi/src/starpu_mpi_collective.c b/mpi/src/starpu_mpi_collective.c new file mode 100644 index 0000000..56e5c23 --- /dev/null +++ b/mpi/src/starpu_mpi_collective.c @@ -0,0 +1,169 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Thibaut Lambert + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include + +struct _callback_arg +{ + void (*callback)(void *); + void *arg; + int nb; + int count; +}; + +static +void _callback_collective(void *arg) +{ + struct _callback_arg *callback_arg = arg; + callback_arg->nb ++; + if (callback_arg->nb == callback_arg->count) + { + callback_arg->callback(callback_arg->arg); + free(callback_arg); + } +} + +static +int _callback_set(int rank, starpu_data_handle_t *data_handles, int count, int root, void (*scallback)(void *), void *sarg, void (*rcallback)(void *), void *rarg, void (**callback_func)(void *), struct _callback_arg **callback_arg) +{ + void (*callback)(void *); + + callback = (rank == root) ? scallback : rcallback; + if (*callback) + { + int x; + + *callback_func = _callback_collective; + + _STARPU_MPI_MALLOC(*callback_arg, sizeof(struct _callback_arg)); + (*callback_arg)->count = 0; + (*callback_arg)->nb = 0; + (*callback_arg)->callback = (rank == root) ? scallback : rcallback; + (*callback_arg)->arg = (rank == root) ? sarg : rarg; + + for(x = 0; x < count ; x++) + { + if (data_handles[x]) + { + int owner = starpu_mpi_data_get_rank(data_handles[x]); + starpu_mpi_tag_t data_tag = starpu_mpi_data_get_tag(data_handles[x]); + STARPU_ASSERT_MSG(data_tag >= 0, "Invalid tag for data handle"); + if ((rank == root) && (owner != root)) + { + (*callback_arg)->count ++; + } + if ((rank != root) && (owner == rank)) + { + (*callback_arg)->count ++; + } + } + } + + if (!(*callback_arg)->count) + { + free(*callback_arg); + return 1; + } + } + + return 0; +} + +int starpu_mpi_scatter_detached(starpu_data_handle_t *data_handles, int count, int root, MPI_Comm comm, void (*scallback)(void *), void *sarg, void (*rcallback)(void *), void *rarg) +{ + int rank; + int x; + struct _callback_arg *callback_arg = NULL; + void (*callback_func)(void *) = NULL; + + starpu_mpi_comm_rank(comm, &rank); + + x = _callback_set(rank, data_handles, count, root, scallback, sarg, rcallback, rarg, &callback_func, &callback_arg); + if (x == 1) + return 0; + + for(x = 0; x < count ; x++) + { + if (data_handles[x]) + { + int ret; + int owner = starpu_mpi_data_get_rank(data_handles[x]); + starpu_mpi_tag_t data_tag = starpu_mpi_data_get_tag(data_handles[x]); + STARPU_ASSERT_MSG(data_tag >= 0, "Invalid tag for data handle"); + if ((rank == root) && (owner != root)) + { + //fprintf(stderr, "[%d] Sending data[%d] to %d\n", rank, x, owner); + ret = starpu_mpi_isend_detached(data_handles[x], owner, data_tag, comm, callback_func, callback_arg); + if (ret) + return ret; + + } + if ((rank != root) && (owner == rank)) + { + //fprintf(stderr, "[%d] Receiving data[%d] from %d\n", rank, x, root); + ret = starpu_mpi_irecv_detached(data_handles[x], root, data_tag, comm, callback_func, callback_arg); + if (ret) + return ret; + } + } + } + return 0; +} + +int starpu_mpi_gather_detached(starpu_data_handle_t *data_handles, int count, int root, MPI_Comm comm, void (*scallback)(void *), void *sarg, void (*rcallback)(void *), void *rarg) +{ + int rank; + int x; + struct _callback_arg *callback_arg = NULL; + void (*callback_func)(void *) = NULL; + + starpu_mpi_comm_rank(comm, &rank); + + x = _callback_set(rank, data_handles, count, root, scallback, sarg, rcallback, rarg, &callback_func, &callback_arg); + if (x == 1) + return 0; + + for(x = 0; x < count ; x++) + { + if (data_handles[x]) + { + int ret; + int owner = starpu_mpi_data_get_rank(data_handles[x]); + starpu_mpi_tag_t data_tag = starpu_mpi_data_get_tag(data_handles[x]); + STARPU_ASSERT_MSG(data_tag >= 0, "Invalid tag for data handle"); + if ((rank == root) && (owner != root)) + { + //fprintf(stderr, "[%d] Receiving data[%d] from %d\n", rank, x, owner); + ret = starpu_mpi_irecv_detached(data_handles[x], owner, data_tag, comm, callback_func, callback_arg); + if (ret) + return ret; + + } + if ((rank != root) && (owner == rank)) + { + //fprintf(stderr, "[%d] Sending data[%d] to %d\n", rank, x, root); + ret = starpu_mpi_isend_detached(data_handles[x], root, data_tag, comm, callback_func, callback_arg); + if (ret) + return ret; + } + } + } + return 0; +} diff --git a/mpi/src/starpu_mpi_coop_sends.c b/mpi/src/starpu_mpi_coop_sends.c new file mode 100644 index 0000000..8ba6fc6 --- /dev/null +++ b/mpi/src/starpu_mpi_coop_sends.c @@ -0,0 +1,369 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2021 Federal University of Rio Grande do Sul (UFRGS) + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include + +/* + * One node sends the same data to several nodes. Gather them into a + * "coop_sends", which then has a global view of all the required sends, and can + * establish a diffusion tree by telling receiving nodes to retransmit what they + * received (forwards) to others, and to others that they will receive from the + * former (redirects). + */ + +/* This is called after a request is finished processing, to release the data */ +void _starpu_mpi_release_req_data(struct _starpu_mpi_req *req) +{ + if (!req->data_handle) + return; + + if (_starpu_mpi_req_multilist_queued_coop_sends(req)) + { + struct _starpu_mpi_coop_sends *coop_sends = req->coop_sends_head; + assert(coop_sends != NULL); + + struct _starpu_mpi_data *mpi_data = coop_sends->mpi_data; + int last; + _starpu_spin_lock(&mpi_data->coop_lock); + /* Part of a cooperative send, dequeue ourself from others */ + _starpu_mpi_req_multilist_erase_coop_sends(&coop_sends->reqs, req); + last = _starpu_mpi_req_multilist_empty_coop_sends(&coop_sends->reqs); + _starpu_spin_unlock(&mpi_data->coop_lock); + if (last) + { + /* We were last, release data */ + free(coop_sends->reqs_array); + free(coop_sends); + starpu_data_release_on_node(req->data_handle, req->node); + } + } + else + { + /* Trivial request */ + starpu_data_release_on_node(req->data_handle, req->node); + } +} + +/* The data was acquired in terms of dependencies, we can now look the + * current state of the handle and decide which node we prefer for the data + * fetch */ +static void _starpu_mpi_coop_send_acquired_callback(void *arg, int *nodep, enum starpu_data_access_mode mode) +{ + struct _starpu_mpi_coop_sends *coop_sends = arg; + int node = *nodep; + + if (node < 0) + node = _starpu_mpi_choose_node(coop_sends->data_handle, mode); + + /* Record the node in the first req */ + _starpu_mpi_req_multilist_begin_coop_sends(&coop_sends->reqs)->node = node; + + *nodep = node; +} + +/* Comparison function for getting qsort to put requests with high priority first */ +static int _starpu_mpi_reqs_prio_compare(const void *a, const void *b) +{ + const struct _starpu_mpi_req * const *ra = a; + const struct _starpu_mpi_req * const *rb = b; + if ((*rb)->prio < (*ra)->prio) + return -1; + else if ((*rb)->prio == (*ra)->prio) + return 0; + else + return 1; +} + +/* Sort the requests by priority and build a diffusion tree. Actually does something only once per coop_sends bag. */ +static void _starpu_mpi_coop_sends_optimize(struct _starpu_mpi_coop_sends *coop_sends) +{ + STARPU_ASSERT(coop_sends->n > 1); + + _starpu_spin_lock(&coop_sends->lock); + if (!coop_sends->reqs_array) + { + unsigned n = coop_sends->n, i; + struct _starpu_mpi_req *cur; + struct _starpu_mpi_req **reqs; + + _STARPU_MPI_DEBUG(0, "handling cooperative sends %p for %u neighbours\n", coop_sends, n); + + /* Store them in an array */ + _STARPU_CALLOC(reqs, n, sizeof(*reqs)); + for (cur = _starpu_mpi_req_multilist_begin_coop_sends(&coop_sends->reqs), i = 0; + cur != _starpu_mpi_req_multilist_end_coop_sends(&coop_sends->reqs); + cur = _starpu_mpi_req_multilist_next_coop_sends(cur), i++) + reqs[i] = cur; + coop_sends->reqs_array = reqs; + + /* Sort them */ + qsort(reqs, n, sizeof(*reqs), _starpu_mpi_reqs_prio_compare); + +#if 0 + /* And build the diffusion tree */ + _starpu_mpi_coop_sends_build_tree(coop_sends); +#endif + } + _starpu_spin_unlock(&coop_sends->lock); +} + +/* This is called on completion of acquisition of data for a cooperative send */ +static void _starpu_mpi_coop_sends_data_ready(void *arg) +{ + _STARPU_MPI_LOG_IN(); + struct _starpu_mpi_coop_sends *coop_sends = arg; + struct _starpu_mpi_data *mpi_data = coop_sends->mpi_data; + struct _starpu_mpi_req *cur; + unsigned node; + + /* Take the cooperative send bag out from more submissions */ + if (mpi_data->coop_sends == coop_sends) + { + _starpu_spin_lock(&mpi_data->coop_lock); + if (mpi_data->coop_sends == coop_sends) + mpi_data->coop_sends = NULL; + _starpu_spin_unlock(&mpi_data->coop_lock); + } + + /* Copy over the memory node number */ + cur = _starpu_mpi_req_multilist_begin_coop_sends(&coop_sends->reqs); + node = cur->node; + + for (; + cur != _starpu_mpi_req_multilist_end_coop_sends(&coop_sends->reqs); + cur = _starpu_mpi_req_multilist_next_coop_sends(cur)) + { + cur->node = node; + cur->pre_sync_jobid = coop_sends->pre_sync_jobid; // for tracing purposes + } + + if (coop_sends->n == 1) + { + /* Trivial case, just submit it */ + _starpu_mpi_submit_ready_request(_starpu_mpi_req_multilist_begin_coop_sends(&coop_sends->reqs)); + } + else + { + /* Build diffusion tree */ + _starpu_mpi_coop_sends_optimize(coop_sends); + + /* And submit them */ + if (STARPU_TEST_AND_SET(&coop_sends->redirects_sent, 1) == 0) + { + mpi_data->nb_future_sends = 0; + _starpu_mpi_submit_coop_sends(coop_sends, 1, 1); + } + else + _starpu_mpi_submit_coop_sends(coop_sends, 0, 1); + } + _STARPU_MPI_LOG_OUT(); +} + +/* This is called when we want to stop including new members in a cooperative send, + * either because we know there won't be any other members due to the algorithm + * or because the value has changed. */ +static void _starpu_mpi_coop_send_flush(struct _starpu_mpi_coop_sends *coop_sends) +{ + if (!coop_sends || coop_sends->n == 1) + return; + + /* Build diffusion tree */ + _starpu_mpi_coop_sends_optimize(coop_sends); + + /* And submit them */ + if (STARPU_TEST_AND_SET(&coop_sends->redirects_sent, 1) == 0) + _starpu_mpi_submit_coop_sends(coop_sends, 1, 0); +} + +/* This is called when a write to the data was just submitted, which means we + * can't make future sends cooperate with past sends since it's not the same value + */ +void _starpu_mpi_data_flush(starpu_data_handle_t data_handle) +{ + struct _starpu_mpi_data *mpi_data = data_handle->mpi_data; + struct _starpu_mpi_coop_sends *coop_sends; + if (!mpi_data) + return; + + _starpu_spin_lock(&mpi_data->coop_lock); + coop_sends = mpi_data->coop_sends; + if (coop_sends) + mpi_data->coop_sends = NULL; + _starpu_spin_unlock(&mpi_data->coop_lock); + if (coop_sends) + { + _STARPU_MPI_DEBUG(0, "%p: data written to, flush cooperative sends %p\n", data_handle, coop_sends); + _starpu_mpi_coop_send_flush(coop_sends); + } +} + +/* Test whether a request is compatible with a cooperative send */ +static int _starpu_mpi_coop_send_compatible(struct _starpu_mpi_req *req, struct _starpu_mpi_coop_sends *coop_sends) +{ + if (!_starpu_cache_enabled) + { + /* If MPI cache isn't enabled, duplicates can appear in the list + * of recipients. + * Presence of duplicates can lead to deadlocks, so if adding + * this req request to the coop_sends will introduce + * duplicates, we consider this req as incompatible. + * + * This a requirement coming from the NewMadeleine + * implementation. If one day, there is a MPI implementation, + * this constraint might move to the NewMadeleine backend. + * + * See mpi/tests/coop_cache.c for a test case. + */ + int inserting_dest = req->node_tag.node.rank; + struct _starpu_mpi_req* cur = NULL; + for (cur = _starpu_mpi_req_multilist_begin_coop_sends(&coop_sends->reqs); + cur != _starpu_mpi_req_multilist_end_coop_sends(&coop_sends->reqs); + cur = _starpu_mpi_req_multilist_next_coop_sends(cur)) + { + if (cur->node_tag.node.rank == inserting_dest) + { + return 0; + } + } + } + + struct _starpu_mpi_req *prevreq = _starpu_mpi_req_multilist_begin_coop_sends(&coop_sends->reqs); + return /* we can cope with tag being different */ + prevreq->node_tag.node.comm == req->node_tag.node.comm + && prevreq->sequential_consistency == req->sequential_consistency; +} + + +void _starpu_mpi_coop_send(starpu_data_handle_t data_handle, struct _starpu_mpi_req *req, enum starpu_data_access_mode mode, int sequential_consistency) +{ + struct _starpu_mpi_data *mpi_data = _starpu_mpi_data_get(data_handle); + struct _starpu_mpi_coop_sends *coop_sends = NULL, *tofree = NULL; + int done = 0, queue, first = 1; + + /* Try to add ourself to something existing, otherwise create one. */ + while (!done) + { + _starpu_spin_lock(&mpi_data->coop_lock); + if (mpi_data->coop_sends) + { + /* Already something, check we are coherent with it */ + queue = _starpu_mpi_coop_send_compatible(req, mpi_data->coop_sends); + if (queue) + { + /* Yes, queue ourself there */ + if (coop_sends) + { + /* Remove ourself from what we created for ourself first */ + + /* Note 2022-09-21: according to code coverage(see + * https://files.inria.fr/starpu/testing/master/coverage/mpi/src/starpu_mpi_coop_sends.c.gcov.html), + * this block is dead code. */ + _starpu_mpi_req_multilist_erase_coop_sends(&coop_sends->reqs, req); + tofree = coop_sends; + } + coop_sends = mpi_data->coop_sends; + _STARPU_MPI_DEBUG(0, "%p: add to cooperative sends %p, dest %d\n", data_handle, coop_sends, req->node_tag.node.rank); + + /* Get the pre_sync_jobid of the first send request, to build a coherent DAG in the traces: */ + struct _starpu_mpi_req *firstreq; + firstreq = _starpu_mpi_req_multilist_begin_coop_sends(&coop_sends->reqs); + req->pre_sync_jobid = firstreq->pre_sync_jobid; + + _starpu_mpi_req_multilist_push_back_coop_sends(&coop_sends->reqs, req); + coop_sends->n++; + req->coop_sends_head = coop_sends; + first = 0; + done = 1; + } + else + { + /* Nope, incompatible, send it as a regular point-to-point communication + * + * TODO: this could be improved by having several coop_sends "bags" available + * simultaneously, which will trigger different broadcasts. */ + _starpu_spin_unlock(&mpi_data->coop_lock); + + _starpu_mpi_isend_irecv_common(req, mode, sequential_consistency); + return; + } + } + else if (coop_sends) + { + /* Nobody else and we have allocated one, we're first! */ + _STARPU_MPI_DEBUG(0, "%p: new cooperative sends %p for tag %"PRIi64", dest %d\n", data_handle, coop_sends, req->node_tag.data_tag, req->node_tag.node.rank); + mpi_data->coop_sends = coop_sends; + first = 1; + done = 1; + } + _starpu_spin_unlock(&mpi_data->coop_lock); + + if (!done && !coop_sends) + { + /* Didn't find something to join, create one out of critical section */ + _STARPU_MPI_CALLOC(coop_sends, 1, sizeof(*coop_sends)); + coop_sends->data_handle = data_handle; + coop_sends->redirects_sent = 0; + coop_sends->n = 1; + _starpu_mpi_req_multilist_head_init_coop_sends(&coop_sends->reqs); + _starpu_mpi_req_multilist_push_back_coop_sends(&coop_sends->reqs, req); + _starpu_spin_init(&coop_sends->lock); + req->coop_sends_head = coop_sends; + coop_sends->mpi_data = mpi_data; + } + /* We at worse do two iteration */ + STARPU_ASSERT(done || coop_sends); + } + + STARPU_ASSERT(coop_sends); + + /* In case we created one for nothing after all */ + free(tofree); + + if ((mpi_data->nb_future_sends != 0 && mpi_data->nb_future_sends == coop_sends->n) || (mpi_data->nb_future_sends == 0 && first)) + /* We were first, we are responsible for acquiring the data for everybody */ + starpu_data_acquire_on_node_cb_sequential_consistency_sync_jobids(req->data_handle, -1, mode, _starpu_mpi_coop_send_acquired_callback, _starpu_mpi_coop_sends_data_ready, coop_sends, sequential_consistency, 0, &coop_sends->pre_sync_jobid, NULL, req->prio); + else + req->pre_sync_jobid = coop_sends->pre_sync_jobid; +} + +void starpu_mpi_coop_sends_data_handle_nb_sends(starpu_data_handle_t data_handle, int nb_sends) +{ + struct _starpu_mpi_data *mpi_data = _starpu_mpi_data_get(data_handle); + + /* Has no effect is coops are disabled: this attribute is used only in + * _starpu_mpi_coop_send() that is called only if coops are enabled */ + mpi_data->nb_future_sends = nb_sends; +} + +void starpu_mpi_coop_sends_set_use(int use_coop_sends) +{ + if (starpu_mpi_world_size() <= 2) + { + _STARPU_DISP("Not enough MPI processes to use coop_sends\n"); + return; + } + + _starpu_mpi_use_coop_sends = use_coop_sends; +} + +int starpu_mpi_coop_sends_get_use(void) +{ + return _starpu_mpi_use_coop_sends; +} diff --git a/mpi/src/starpu_mpi_datatype.c b/mpi/src/starpu_mpi_datatype.c new file mode 100644 index 0000000..8d333fd --- /dev/null +++ b/mpi/src/starpu_mpi_datatype.c @@ -0,0 +1,482 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include + +struct _starpu_mpi_datatype_funcs +{ + enum starpu_data_interface_id id; + starpu_mpi_datatype_allocate_func_t allocate_datatype_func; + starpu_mpi_datatype_node_allocate_func_t allocate_datatype_node_func; + starpu_mpi_datatype_free_func_t free_datatype_func; + UT_hash_handle hh; +}; + +/* We want to allow applications calling starpu_mpi_interface_datatype_register/unregister as constructor/destructor */ +static starpu_pthread_mutex_t _starpu_mpi_datatype_funcs_table_mutex = STARPU_PTHREAD_MUTEX_INITIALIZER; +static struct _starpu_mpi_datatype_funcs *_starpu_mpi_datatype_funcs_table = NULL; + +void _starpu_mpi_datatype_init(void) +{ +} + +void _starpu_mpi_datatype_shutdown(void) +{ +} + +/* + * Matrix + */ + +static int handle_to_datatype_matrix(starpu_data_handle_t data_handle, unsigned node, MPI_Datatype *datatype) +{ + struct starpu_matrix_interface *matrix_interface = starpu_data_get_interface_on_node(data_handle, node); + + int ret; + + unsigned nx = STARPU_MATRIX_GET_NX(matrix_interface); + unsigned ny = STARPU_MATRIX_GET_NY(matrix_interface); + unsigned ld = STARPU_MATRIX_GET_LD(matrix_interface); + size_t elemsize = STARPU_MATRIX_GET_ELEMSIZE(matrix_interface); + + ret = MPI_Type_vector(ny, nx*elemsize, ld*elemsize, MPI_BYTE, datatype); + STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_vector failed"); + + ret = MPI_Type_commit(datatype); + STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_commit failed"); + + return 0; +} + +/* + * Block + */ + +static int handle_to_datatype_block(starpu_data_handle_t data_handle, unsigned node, MPI_Datatype *datatype) +{ + struct starpu_block_interface *block_interface = starpu_data_get_interface_on_node(data_handle, node); + + int ret; + + unsigned nx = STARPU_BLOCK_GET_NX(block_interface); + unsigned ny = STARPU_BLOCK_GET_NY(block_interface); + unsigned nz = STARPU_BLOCK_GET_NZ(block_interface); + unsigned ldy = STARPU_BLOCK_GET_LDY(block_interface); + unsigned ldz = STARPU_BLOCK_GET_LDZ(block_interface); + size_t elemsize = STARPU_BLOCK_GET_ELEMSIZE(block_interface); + + MPI_Datatype datatype_2dlayer; + ret = MPI_Type_vector(ny, nx*elemsize, ldy*elemsize, MPI_BYTE, &datatype_2dlayer); + STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_vector failed"); + + ret = MPI_Type_create_hvector(nz, 1, ldz*elemsize, datatype_2dlayer, datatype); + STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_hvector failed"); + + ret = MPI_Type_commit(datatype); + STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_commit failed"); + + ret = MPI_Type_free(&datatype_2dlayer); + STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_free failed"); + + return 0; +} + +/* + * Tensor + */ + +static int handle_to_datatype_tensor(starpu_data_handle_t data_handle, unsigned node, MPI_Datatype *datatype) +{ + struct starpu_tensor_interface *tensor_interface = starpu_data_get_interface_on_node(data_handle, node); + + int ret; + + unsigned nx = STARPU_TENSOR_GET_NX(tensor_interface); + unsigned ny = STARPU_TENSOR_GET_NY(tensor_interface); + unsigned nz = STARPU_TENSOR_GET_NZ(tensor_interface); + unsigned nt = STARPU_TENSOR_GET_NT(tensor_interface); + unsigned ldy = STARPU_TENSOR_GET_LDY(tensor_interface); + unsigned ldz = STARPU_TENSOR_GET_LDZ(tensor_interface); + unsigned ldt = STARPU_TENSOR_GET_LDT(tensor_interface); + size_t elemsize = STARPU_TENSOR_GET_ELEMSIZE(tensor_interface); + + MPI_Datatype datatype_3dlayer; + ret = MPI_Type_vector(ny, nx*elemsize, ldy*elemsize, MPI_BYTE, &datatype_3dlayer); + STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_vector failed"); + + MPI_Datatype datatype_2dlayer; + ret = MPI_Type_create_hvector(nz, 1, ldz*elemsize, datatype_3dlayer, &datatype_2dlayer); + STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_hvector failed"); + + ret = MPI_Type_create_hvector(nt, 1, ldt*elemsize, datatype_2dlayer, datatype); + STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_hvector failed"); + + ret = MPI_Type_commit(datatype); + STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_commit failed"); + + ret = MPI_Type_free(&datatype_3dlayer); + STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_free failed"); + + ret = MPI_Type_free(&datatype_2dlayer); + STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_free failed"); + + return 0; +} + +/* + * Ndim + */ + +static int handle_to_datatype_ndim(starpu_data_handle_t data_handle, unsigned node, MPI_Datatype *datatype) +{ + struct starpu_ndim_interface *ndim_interface = starpu_data_get_interface_on_node(data_handle, node); + + int ret; + + unsigned *nn = STARPU_NDIM_GET_NN(ndim_interface); + unsigned *ldn = STARPU_NDIM_GET_LDN(ndim_interface); + size_t ndim = STARPU_NDIM_GET_NDIM(ndim_interface); + size_t elemsize = STARPU_NDIM_GET_ELEMSIZE(ndim_interface); + + if (ndim > 1) + { + MPI_Datatype datatype_ndlayer; + ret = MPI_Type_vector(nn[1], nn[0]*elemsize, ldn[1]*elemsize, MPI_BYTE, &datatype_ndlayer); + STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_vector failed"); + + MPI_Datatype oldtype = datatype_ndlayer, newtype; + unsigned i; + for (i = 2; i < ndim; i++) + { + ret = MPI_Type_create_hvector(nn[i], 1, ldn[i]*elemsize, oldtype, &newtype); + STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_hvector failed"); + + ret = MPI_Type_free(&oldtype); + STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_free failed"); + + oldtype = newtype; + } + *datatype = oldtype; + } + else if (ndim == 1) + { + ret = MPI_Type_contiguous(nn[0]*elemsize, MPI_BYTE, datatype); + STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_contiguous failed"); + } + + ret = MPI_Type_commit(datatype); + STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_commit failed"); + return 0; +} + +/* + * Vector + */ + +static int handle_to_datatype_vector(starpu_data_handle_t data_handle, unsigned node, MPI_Datatype *datatype) +{ + struct starpu_vector_interface *vector_interface = starpu_data_get_interface_on_node(data_handle, node); + + int ret; + + unsigned nx = STARPU_VECTOR_GET_NX(vector_interface); + size_t elemsize = STARPU_VECTOR_GET_ELEMSIZE(vector_interface); + + ret = MPI_Type_contiguous(nx*elemsize, MPI_BYTE, datatype); + STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_contiguous failed"); + + ret = MPI_Type_commit(datatype); + STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_commit failed"); + + return 0; +} + +/* + * Variable + */ + +static int handle_to_datatype_variable(starpu_data_handle_t data_handle, unsigned node, MPI_Datatype *datatype) +{ + struct starpu_variable_interface *variable_interface = starpu_data_get_interface_on_node(data_handle, node); + + int ret; + + size_t elemsize = STARPU_VARIABLE_GET_ELEMSIZE(variable_interface); + + ret = MPI_Type_contiguous(elemsize, MPI_BYTE, datatype); + STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_contiguous failed"); + + ret = MPI_Type_commit(datatype); + STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_commit failed"); + + return 0; +} + +/* + * Void + */ + +static int handle_to_datatype_void(starpu_data_handle_t data_handle, unsigned node, MPI_Datatype *datatype) +{ + int ret; + (void)data_handle; + (void)node; + + ret = MPI_Type_contiguous(0, MPI_BYTE, datatype); + STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_contiguous failed"); + + ret = MPI_Type_commit(datatype); + STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_commit failed"); + + return 0; +} + +/* + * Generic + */ + +static starpu_mpi_datatype_node_allocate_func_t handle_to_datatype_funcs[STARPU_MAX_INTERFACE_ID] = +{ +//#define DYNAMIC_MATRICES +#ifndef DYNAMIC_MATRICES + [STARPU_MATRIX_INTERFACE_ID] = handle_to_datatype_matrix, +#endif + [STARPU_BLOCK_INTERFACE_ID] = handle_to_datatype_block, + [STARPU_TENSOR_INTERFACE_ID] = handle_to_datatype_tensor, + [STARPU_NDIM_INTERFACE_ID] = handle_to_datatype_ndim, + [STARPU_VECTOR_INTERFACE_ID] = handle_to_datatype_vector, + [STARPU_CSR_INTERFACE_ID] = NULL, /* Sent through pack/unpack operations */ + [STARPU_BCSR_INTERFACE_ID] = NULL, /* Sent through pack/unpack operations */ + [STARPU_VARIABLE_INTERFACE_ID] = handle_to_datatype_variable, + [STARPU_VOID_INTERFACE_ID] = handle_to_datatype_void, + [STARPU_MULTIFORMAT_INTERFACE_ID] = NULL, +}; + +MPI_Datatype _starpu_mpi_datatype_get_user_defined_datatype(starpu_data_handle_t data_handle, unsigned node) +{ + enum starpu_data_interface_id id = starpu_data_get_interface_id(data_handle); + if (id < STARPU_MAX_INTERFACE_ID) return 0; + + struct _starpu_mpi_datatype_funcs *table; + STARPU_PTHREAD_MUTEX_LOCK(&_starpu_mpi_datatype_funcs_table_mutex); + HASH_FIND_INT(_starpu_mpi_datatype_funcs_table, &id, table); + STARPU_PTHREAD_MUTEX_UNLOCK(&_starpu_mpi_datatype_funcs_table_mutex); + if (table && (table->allocate_datatype_node_func || table->allocate_datatype_func)) + { + MPI_Datatype datatype; + int ret; + if (table->allocate_datatype_node_func) + ret = table->allocate_datatype_node_func(data_handle, node, &datatype); + else + ret = table->allocate_datatype_func(data_handle, &datatype); + if (ret == 0) + return datatype; + else + return 0; + } + return 0; +} + +void _starpu_mpi_datatype_allocate(starpu_data_handle_t data_handle, struct _starpu_mpi_req *req) +{ + enum starpu_data_interface_id id = starpu_data_get_interface_id(data_handle); + + if (id < STARPU_MAX_INTERFACE_ID) + { + starpu_mpi_datatype_node_allocate_func_t func = handle_to_datatype_funcs[id]; + if (func) + { + func(data_handle, req->node, &req->datatype); + req->registered_datatype = 1; + } + else + { + /* The datatype is predefined by StarPU but it will be sent as a memory area */ + req->datatype = MPI_BYTE; + req->registered_datatype = 0; + } + } + else + { + struct _starpu_mpi_datatype_funcs *table; + STARPU_PTHREAD_MUTEX_LOCK(&_starpu_mpi_datatype_funcs_table_mutex); + HASH_FIND_INT(_starpu_mpi_datatype_funcs_table, &id, table); + STARPU_PTHREAD_MUTEX_UNLOCK(&_starpu_mpi_datatype_funcs_table_mutex); + if (table) + { + STARPU_ASSERT_MSG(table->allocate_datatype_node_func || table->allocate_datatype_func, "Handle To Datatype Function not defined for StarPU data interface %d", id); + int ret; + if (table->allocate_datatype_node_func) + ret = table->allocate_datatype_node_func(data_handle, req->node, &req->datatype); + else + ret = table->allocate_datatype_func(data_handle, &req->datatype); + if (ret == 0) + req->registered_datatype = 1; + else + { + /* Couldn't register, probably complex data which needs packing. */ + req->datatype = MPI_BYTE; + req->registered_datatype = 0; + } + } + else + { + /* The datatype is not predefined by StarPU */ + req->datatype = MPI_BYTE; + req->registered_datatype = 0; + } + } +#ifdef STARPU_VERBOSE + { + char datatype_name[MPI_MAX_OBJECT_NAME]; + int datatype_name_len; + MPI_Type_get_name(req->datatype, datatype_name, &datatype_name_len); + if (datatype_name_len == 0) + req->datatype_name = strdup("User defined datatype"); + else + req->datatype_name = strdup(datatype_name); + } +#endif +} + +static void _starpu_mpi_handle_free_simple_datatype(MPI_Datatype *datatype) +{ + int ret = MPI_Type_free(datatype); + STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_free failed"); +} + +static starpu_mpi_datatype_free_func_t handle_free_datatype_funcs[STARPU_MAX_INTERFACE_ID] = +{ +#ifndef DYNAMIC_MATRICES + [STARPU_MATRIX_INTERFACE_ID] = _starpu_mpi_handle_free_simple_datatype, +#endif + [STARPU_BLOCK_INTERFACE_ID] = _starpu_mpi_handle_free_simple_datatype, + [STARPU_TENSOR_INTERFACE_ID] = _starpu_mpi_handle_free_simple_datatype, + [STARPU_VECTOR_INTERFACE_ID] = _starpu_mpi_handle_free_simple_datatype, + [STARPU_NDIM_INTERFACE_ID] = _starpu_mpi_handle_free_simple_datatype, + [STARPU_CSR_INTERFACE_ID] = NULL, /* Sent through pack/unpack operations */ + [STARPU_BCSR_INTERFACE_ID] = NULL, /* Sent through pack/unpack operations */ + [STARPU_VARIABLE_INTERFACE_ID] = _starpu_mpi_handle_free_simple_datatype, + [STARPU_VOID_INTERFACE_ID] = _starpu_mpi_handle_free_simple_datatype, + [STARPU_MULTIFORMAT_INTERFACE_ID] = NULL, +}; + +void _starpu_mpi_datatype_free(starpu_data_handle_t data_handle, MPI_Datatype *datatype) +{ + enum starpu_data_interface_id id = starpu_data_get_interface_id(data_handle); + + if (id < STARPU_MAX_INTERFACE_ID) + { + starpu_mpi_datatype_free_func_t func = handle_free_datatype_funcs[id]; + if (func) + func(datatype); + } + else + { + struct _starpu_mpi_datatype_funcs *table; + STARPU_PTHREAD_MUTEX_LOCK(&_starpu_mpi_datatype_funcs_table_mutex); + HASH_FIND_INT(_starpu_mpi_datatype_funcs_table, &id, table); + STARPU_PTHREAD_MUTEX_UNLOCK(&_starpu_mpi_datatype_funcs_table_mutex); + if (table) + { + STARPU_ASSERT_MSG(table->free_datatype_func, "Free Datatype Function not defined for StarPU data interface %d", id); + if (*datatype != MPI_BYTE) + table->free_datatype_func(datatype); + } + + } + /* else the datatype is not predefined by StarPU */ +} + +int _starpu_mpi_interface_datatype_register(enum starpu_data_interface_id id, starpu_mpi_datatype_node_allocate_func_t allocate_datatype_node_func, starpu_mpi_datatype_allocate_func_t allocate_datatype_func, starpu_mpi_datatype_free_func_t free_datatype_func) +{ + struct _starpu_mpi_datatype_funcs *table; + + STARPU_ASSERT_MSG(id >= STARPU_MAX_INTERFACE_ID, "Cannot redefine the MPI datatype for a predefined StarPU datatype"); + + STARPU_PTHREAD_MUTEX_LOCK(&_starpu_mpi_datatype_funcs_table_mutex); + HASH_FIND_INT(_starpu_mpi_datatype_funcs_table, &id, table); + if (table) + { + table->allocate_datatype_node_func = allocate_datatype_node_func; + table->allocate_datatype_func = allocate_datatype_func; + table->free_datatype_func = free_datatype_func; + } + else + { + _STARPU_MPI_MALLOC(table, sizeof(struct _starpu_mpi_datatype_funcs)); + table->id = id; + table->allocate_datatype_node_func = allocate_datatype_node_func; + table->allocate_datatype_func = allocate_datatype_func; + table->free_datatype_func = free_datatype_func; + HASH_ADD_INT(_starpu_mpi_datatype_funcs_table, id, table); + } + STARPU_PTHREAD_MUTEX_UNLOCK(&_starpu_mpi_datatype_funcs_table_mutex); + return 0; +} + +int starpu_mpi_interface_datatype_node_register(enum starpu_data_interface_id id, starpu_mpi_datatype_node_allocate_func_t allocate_datatype_node_func, starpu_mpi_datatype_free_func_t free_datatype_func) +{ + return _starpu_mpi_interface_datatype_register(id, allocate_datatype_node_func, NULL, free_datatype_func); +} + +int starpu_mpi_interface_datatype_register(enum starpu_data_interface_id id, starpu_mpi_datatype_allocate_func_t allocate_datatype_func, starpu_mpi_datatype_free_func_t free_datatype_func) +{ + return _starpu_mpi_interface_datatype_register(id, NULL, allocate_datatype_func, free_datatype_func); +} + +int starpu_mpi_datatype_node_register(starpu_data_handle_t handle, starpu_mpi_datatype_node_allocate_func_t allocate_datatype_node_func, starpu_mpi_datatype_free_func_t free_datatype_func) +{ + enum starpu_data_interface_id id = starpu_data_get_interface_id(handle); + int ret; + ret = starpu_mpi_interface_datatype_node_register(id, allocate_datatype_node_func, free_datatype_func); + STARPU_ASSERT_MSG(handle->ops->handle_to_pointer || handle->ops->to_pointer, "The data interface must define the operation 'to_pointer'\n"); + return ret; +} + +int starpu_mpi_datatype_register(starpu_data_handle_t handle, starpu_mpi_datatype_allocate_func_t allocate_datatype_func, starpu_mpi_datatype_free_func_t free_datatype_func) +{ + enum starpu_data_interface_id id = starpu_data_get_interface_id(handle); + int ret; + ret = starpu_mpi_interface_datatype_register(id, allocate_datatype_func, free_datatype_func); + STARPU_ASSERT_MSG(handle->ops->handle_to_pointer || handle->ops->to_pointer, "The data interface must define the operation 'to_pointer'\n"); + return ret; +} + +int starpu_mpi_interface_datatype_unregister(enum starpu_data_interface_id id) +{ + struct _starpu_mpi_datatype_funcs *table; + + STARPU_ASSERT_MSG(id >= STARPU_MAX_INTERFACE_ID, "Cannot redefine the MPI datatype for a predefined StarPU datatype"); + + STARPU_PTHREAD_MUTEX_LOCK(&_starpu_mpi_datatype_funcs_table_mutex); + HASH_FIND_INT(_starpu_mpi_datatype_funcs_table, &id, table); + if (table) + { + HASH_DEL(_starpu_mpi_datatype_funcs_table, table); + free(table); + } + STARPU_PTHREAD_MUTEX_UNLOCK(&_starpu_mpi_datatype_funcs_table_mutex); + return 0; +} + +int starpu_mpi_datatype_unregister(starpu_data_handle_t handle) +{ + enum starpu_data_interface_id id = starpu_data_get_interface_id(handle); + return starpu_mpi_interface_datatype_unregister(id); +} diff --git a/mpi/src/starpu_mpi_datatype.h b/mpi/src/starpu_mpi_datatype.h new file mode 100644 index 0000000..3aa82db --- /dev/null +++ b/mpi/src/starpu_mpi_datatype.h @@ -0,0 +1,42 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __STARPU_MPI_DATATYPE_H__ +#define __STARPU_MPI_DATATYPE_H__ + +#include +#include + +/** @file */ + +#ifdef __cplusplus +extern "C" +{ +#endif + +void _starpu_mpi_datatype_init(void); +void _starpu_mpi_datatype_shutdown(void); + +void _starpu_mpi_datatype_allocate(starpu_data_handle_t data_handle, struct _starpu_mpi_req *req); +void _starpu_mpi_datatype_free(starpu_data_handle_t data_handle, MPI_Datatype *datatype); + +MPI_Datatype _starpu_mpi_datatype_get_user_defined_datatype(starpu_data_handle_t data_handle, unsigned node); + +#ifdef __cplusplus +} +#endif + +#endif // __STARPU_MPI_DATATYPE_H__ diff --git a/mpi/src/starpu_mpi_fortran.c b/mpi/src/starpu_mpi_fortran.c new file mode 100644 index 0000000..07d3abc --- /dev/null +++ b/mpi/src/starpu_mpi_fortran.c @@ -0,0 +1,326 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include "starpu_mpi_private.h" + +#ifdef HAVE_MPI_COMM_F2C +/* Fortran related functions */ +struct _starpu_mpi_argc_argv *fstarpu_mpi_argcv_alloc(int argc, int initialize_mpi, int comm_present, MPI_Fint comm) +{ + struct _starpu_mpi_argc_argv *argcv; + _STARPU_MPI_CALLOC(argcv, 1,sizeof(*argcv)); + argcv->initialize_mpi = initialize_mpi; + if (comm_present) + { + argcv->comm = MPI_Comm_f2c(comm); + } + else + { + argcv->comm = MPI_COMM_WORLD; + } + argcv->fargc = argc; + argcv->argc = &argcv->fargc; + _STARPU_MPI_CALLOC(argcv->fargv, argc, sizeof(char *)); + argcv->argv = &argcv->fargv; + return argcv; +} + +void fstarpu_mpi_argcv_set_arg(struct _starpu_mpi_argc_argv *argcv, int i, int len, char *_s) +{ + STARPU_ASSERT(len >= 0); + STARPU_ASSERT(i >= 0 && i < argcv->fargc); + char *s; + _STARPU_MPI_MALLOC(s, len+1); + memcpy(s, _s, len); + s[len] = '\0'; + argcv->fargv[i] = s; +} + +void fstarpu_mpi_argcv_free(struct _starpu_mpi_argc_argv *argcv) +{ + if (argcv->fargv != NULL) + { + int i; + for (i=0; ifargc; i++) + { + free(argcv->fargv[i]); + } + free(argcv->fargv); + } + free(argcv); +} + +starpu_mpi_req *fstarpu_mpi_req_alloc(void) +{ + void *ptr; + _STARPU_MPI_CALLOC(ptr, 1, sizeof(starpu_mpi_req)); + return ptr; +} + +void fstarpu_mpi_req_free(starpu_mpi_req *req) +{ + free(req); +} + +MPI_Status *fstarpu_mpi_status_alloc(void) +{ + void *ptr; + _STARPU_MPI_CALLOC(ptr, 1, sizeof(MPI_Status)); + return ptr; +} + +void fstarpu_mpi_status_free(MPI_Status *status) +{ + free(status); +} + +int fstarpu_mpi_barrier(MPI_Fint comm) +{ + return starpu_mpi_barrier(MPI_Comm_f2c(comm)); +} + +int fstarpu_mpi_irecv_detached_sequential_consistency(starpu_data_handle_t data_handle, int src, starpu_mpi_tag_t data_tag, MPI_Fint comm, void (*callback)(void *), void *arg, int seq_const) +{ + return starpu_mpi_irecv_detached_sequential_consistency(data_handle, src, data_tag, MPI_Comm_f2c(comm), callback, arg, seq_const); +} + +int fstarpu_mpi_init_c(struct _starpu_mpi_argc_argv *argcv) +{ + return starpu_mpi_init_comm(argcv->argc, argcv->argv, argcv->initialize_mpi, argcv->comm); +} + +int fstarpu_mpi_get_data_on_node(MPI_Fint comm, starpu_data_handle_t data_handle, int node) +{ + return starpu_mpi_get_data_on_node(MPI_Comm_f2c(comm), data_handle, node); +} + +int fstarpu_mpi_get_data_on_node_detached(MPI_Fint comm, starpu_data_handle_t data_handle, int node, void (*callback)(void *), void *arg) +{ + return starpu_mpi_get_data_on_node_detached(MPI_Comm_f2c(comm), data_handle, node, callback, arg); +} + +int fstarpu_mpi_redux_data(MPI_Fint comm, starpu_data_handle_t data_handle) +{ + return starpu_mpi_redux_data(MPI_Comm_f2c(comm), data_handle); +} + +int fstarpu_mpi_redux_data_prio(MPI_Fint comm, starpu_data_handle_t data_handle, int prio) +{ + return starpu_mpi_redux_data_prio(MPI_Comm_f2c(comm), data_handle, prio); +} + +int fstarpu_mpi_redux_data_tree(MPI_Fint comm, starpu_data_handle_t data_handle, int arity) +{ + return starpu_mpi_redux_data_tree(MPI_Comm_f2c(comm), data_handle, arity); +} + +int fstarpu_mpi_redux_data_prio_tree(MPI_Fint comm, starpu_data_handle_t data_handle, int prio, int arity) +{ + return starpu_mpi_redux_data_prio_tree(MPI_Comm_f2c(comm), data_handle, prio, arity); +} + +/* scatter/gather */ +int fstarpu_mpi_scatter_detached(starpu_data_handle_t *data_handles, int cnt, int root, MPI_Fint comm, void (*scallback)(void *), void *sarg, void (*rcallback)(void *), void *rarg) +{ + return starpu_mpi_scatter_detached(data_handles, cnt, root, MPI_Comm_f2c(comm), scallback, sarg, rcallback, rarg); +} + +int fstarpu_mpi_gather_detached(starpu_data_handle_t *data_handles, int cnt, int root, MPI_Fint comm, void (*scallback)(void *), void *sarg, void (*rcallback)(void *), void *rarg) +{ + return starpu_mpi_gather_detached(data_handles, cnt, root, MPI_Comm_f2c(comm), scallback, sarg, rcallback, rarg); +} + +/* isend/irecv detached unlock tag */ +int fstarpu_mpi_isend_detached_unlock_tag(starpu_data_handle_t data_handle, int dst, starpu_mpi_tag_t data_tag, MPI_Fint comm, starpu_tag_t *starpu_tag) +{ + return starpu_mpi_isend_detached_unlock_tag(data_handle, dst, data_tag, MPI_Comm_f2c(comm), *starpu_tag); +} +int fstarpu_mpi_isend_detached_unlock_tag_prio(starpu_data_handle_t data_handle, int dst, starpu_mpi_tag_t data_tag, int prio, MPI_Fint comm, starpu_tag_t *starpu_tag) +{ + return starpu_mpi_isend_detached_unlock_tag_prio(data_handle, dst, data_tag, prio, MPI_Comm_f2c(comm), *starpu_tag); +} + +int fstarpu_mpi_irecv_detached_unlock_tag(starpu_data_handle_t data_handle, int src, starpu_mpi_tag_t data_tag, MPI_Fint comm, starpu_tag_t *starpu_tag) +{ + return starpu_mpi_irecv_detached_unlock_tag(data_handle, src, data_tag, MPI_Comm_f2c(comm), *starpu_tag); +} + +/* isend/irecv array detached unlock tag */ +int fstarpu_mpi_isend_array_detached_unlock_tag_prio(int array_size, starpu_data_handle_t *data_handles, int *dsts, starpu_mpi_tag_t *data_tags, int *prio, MPI_Fint *_comms, starpu_tag_t *starpu_tag) +{ + MPI_Comm comms[array_size]; + int i; + for (i = 0; i < array_size; i++) + { + comms[i] = MPI_Comm_f2c(_comms[i]); + } + int ret = starpu_mpi_isend_array_detached_unlock_tag_prio((unsigned)array_size, data_handles, dsts, data_tags, prio, comms, *starpu_tag); + return ret; +} + +int fstarpu_mpi_isend_array_detached_unlock_tag(int array_size, starpu_data_handle_t *data_handles, int *dsts, starpu_mpi_tag_t *data_tags, MPI_Fint *_comms, starpu_tag_t *starpu_tag) +{ + return fstarpu_mpi_isend_array_detached_unlock_tag_prio(array_size, data_handles, dsts, data_tags, NULL, _comms, starpu_tag); +} + +int fstarpu_mpi_irecv_array_detached_unlock_tag(int array_size, starpu_data_handle_t *data_handles, int *srcs, starpu_mpi_tag_t *data_tags, MPI_Fint *_comms, starpu_tag_t *starpu_tag) +{ + MPI_Comm comms[array_size]; + int i; + for (i = 0; i < array_size; i++) + { + comms[i] = MPI_Comm_f2c(_comms[i]); + } + int ret = starpu_mpi_irecv_array_detached_unlock_tag((unsigned)array_size, data_handles, srcs, data_tags, comms, *starpu_tag); + return ret; +} + +/* isend/irecv */ +int fstarpu_mpi_isend(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dst, starpu_mpi_tag_t data_tag, MPI_Fint comm) +{ + return starpu_mpi_isend(data_handle, req, dst, data_tag, MPI_Comm_f2c(comm)); +} + +int fstarpu_mpi_isend_prio(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dst, starpu_mpi_tag_t data_tag, int prio, MPI_Fint comm) +{ + return starpu_mpi_isend_prio(data_handle, req, dst, data_tag, prio, MPI_Comm_f2c(comm)); +} + +int fstarpu_mpi_irecv(starpu_data_handle_t data_handle, starpu_mpi_req *req, int src, starpu_mpi_tag_t data_tag, MPI_Fint comm) +{ + return starpu_mpi_irecv(data_handle, req, src, data_tag, MPI_Comm_f2c(comm)); +} + +/* send/recv */ +int fstarpu_mpi_send(starpu_data_handle_t data_handle, int dst, starpu_mpi_tag_t data_tag, MPI_Fint comm) +{ + return starpu_mpi_send(data_handle, dst, data_tag, MPI_Comm_f2c(comm)); +} + +int fstarpu_mpi_send_prio(starpu_data_handle_t data_handle, int dst, starpu_mpi_tag_t data_tag, int prio, MPI_Fint comm) +{ + return starpu_mpi_send_prio(data_handle, dst, data_tag, prio, MPI_Comm_f2c(comm)); +} + +int fstarpu_mpi_recv(starpu_data_handle_t data_handle, int src, starpu_mpi_tag_t data_tag, MPI_Fint comm, MPI_Status *status) +{ + return starpu_mpi_recv(data_handle, src, data_tag, MPI_Comm_f2c(comm), status); +} + +/* isend/irecv detached */ +int fstarpu_mpi_isend_detached(starpu_data_handle_t data_handle, int dst, starpu_mpi_tag_t data_tag, MPI_Fint comm, void (*callback)(void *), void *arg) +{ + return starpu_mpi_isend_detached(data_handle, dst, data_tag, MPI_Comm_f2c(comm), callback, arg); +} + +int fstarpu_mpi_isend_detached_prio(starpu_data_handle_t data_handle, int dst, starpu_mpi_tag_t data_tag, int prio, MPI_Fint comm, void (*callback)(void *), void *arg) +{ + return starpu_mpi_isend_detached_prio(data_handle, dst, data_tag, prio, MPI_Comm_f2c(comm), callback, arg); +} + +int fstarpu_mpi_irecv_detached(starpu_data_handle_t data_handle, int src, starpu_mpi_tag_t data_tag, MPI_Fint comm, void (*callback)(void *), void *arg) +{ + return starpu_mpi_irecv_detached(data_handle, src, data_tag, MPI_Comm_f2c(comm), callback, arg); +} + +/* issend / issend detached */ +int fstarpu_mpi_issend(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dst, starpu_mpi_tag_t data_tag, MPI_Fint comm) +{ + return starpu_mpi_issend(data_handle, req, dst, data_tag, MPI_Comm_f2c(comm)); +} +int fstarpu_mpi_issend_prio(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dst, starpu_mpi_tag_t data_tag, int prio, MPI_Fint comm) +{ + return starpu_mpi_issend_prio(data_handle, req, dst, data_tag, prio, MPI_Comm_f2c(comm)); +} + +int fstarpu_mpi_issend_detached(starpu_data_handle_t data_handle, int dst, starpu_mpi_tag_t data_tag, MPI_Fint comm, void (*callback)(void *), void *arg) +{ + return starpu_mpi_issend_detached(data_handle, dst, data_tag, MPI_Comm_f2c(comm), callback, arg); +} + +int fstarpu_mpi_issend_detached_prio(starpu_data_handle_t data_handle, int dst, starpu_mpi_tag_t data_tag, int prio, MPI_Fint comm, void (*callback)(void *), void *arg) +{ + return starpu_mpi_issend_detached_prio(data_handle, dst, data_tag, prio, MPI_Comm_f2c(comm), callback, arg); +} + +/* cache */ +void fstarpu_mpi_cache_flush(MPI_Fint comm, starpu_data_handle_t data_handle) +{ + return starpu_mpi_cache_flush(MPI_Comm_f2c(comm), data_handle); +} + +void fstarpu_mpi_cache_flush_all_data(MPI_Fint comm) +{ + return starpu_mpi_cache_flush_all_data(MPI_Comm_f2c(comm)); +} + +int fstarpu_mpi_comm_size(MPI_Fint comm, int *size) +{ + return starpu_mpi_comm_size(MPI_Comm_f2c(comm), size); +} + +int fstarpu_mpi_comm_rank(MPI_Fint comm, int *rank) +{ + return starpu_mpi_comm_rank(MPI_Comm_f2c(comm), rank); +} + +MPI_Fint fstarpu_mpi_world_comm() +{ + return MPI_Comm_c2f(MPI_COMM_WORLD); +} + +void fstarpu_mpi_comm_stats_disable() +{ + starpu_mpi_comm_stats_disable(); +} +void fstarpu_mpi_comm_stats_enable() +{ + starpu_mpi_comm_stats_enable(); +} + +void fstarpu_mpi_data_register_comm(starpu_data_handle_t handle, starpu_mpi_tag_t data_tag, int rank, MPI_Fint comm) +{ + return starpu_mpi_data_register_comm(handle, data_tag, rank, MPI_Comm_f2c(comm)); +} + +void fstarpu_mpi_data_register(starpu_data_handle_t handle, starpu_mpi_tag_t data_tag, int rank) +{ + return starpu_mpi_data_register_comm(handle, data_tag, rank, MPI_COMM_WORLD); +} + +void fstarpu_mpi_data_set_rank_comm(starpu_data_handle_t handle, int rank, MPI_Fint comm) +{ + return starpu_mpi_data_set_rank_comm(handle, rank, MPI_Comm_f2c(comm)); +} + +void fstarpu_mpi_data_set_rank(starpu_data_handle_t handle, int rank) +{ + return starpu_mpi_data_set_rank_comm(handle, rank, MPI_COMM_WORLD); +} + +void fstarpu_mpi_data_migrate(MPI_Fint comm, starpu_data_handle_t handle, int rank) +{ + return starpu_mpi_data_migrate(MPI_Comm_f2c(comm), handle, rank); +} + +int fstarpu_mpi_wait_for_all(MPI_Fint comm) +{ + return starpu_mpi_wait_for_all(MPI_Comm_f2c(comm)); +} +#endif diff --git a/mpi/src/starpu_mpi_fxt.c b/mpi/src/starpu_mpi_fxt.c new file mode 100644 index 0000000..9ee90e7 --- /dev/null +++ b/mpi/src/starpu_mpi_fxt.c @@ -0,0 +1,137 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2021-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include + +#ifdef STARPU_HAVE_MPI_SYNC_CLOCKS +#include + +static mpi_sync_clocks_t mpi_sync_clock; +#endif + +static int fxt_random_number = -1; + +#if defined(STARPU_HAVE_MPI_SYNC_CLOCKS) && !defined(STARPU_SIMGRID) +/* Use the same clock as the one used by mpi_sync_clocks */ +uint64_t fut_getstamp(void) +{ + sync_clocks_generic_tick_t tick; + sync_clocks_generic_get_tick(tick); + return (uint64_t) (sync_clocks_generic_tick2usec(tick)*1000.); +} +#endif + +static void _starpu_mpi_add_sync_point_in_fxt(void) +{ + int rank, worldsize, ret; + + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); + starpu_mpi_comm_size(MPI_COMM_WORLD, &worldsize); + + STARPU_ASSERT(worldsize > 1); + + ret = MPI_Barrier(MPI_COMM_WORLD); + STARPU_MPI_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Barrier returning %s", _starpu_mpi_get_mpi_error_code(ret)); + + if (fxt_random_number == -1) // only for the first sync point + { + /* We generate a "unique" key so that we can make sure that different + * FxT traces come from the same MPI run. */ + if (rank == 0) + fxt_random_number = time(NULL); + + _STARPU_MPI_DEBUG(3, "unique key %x\n", fxt_random_number); + + ret = MPI_Bcast(&fxt_random_number, 1, MPI_INT, 0, MPI_COMM_WORLD); + STARPU_MPI_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Bcast returning %s", _starpu_mpi_get_mpi_error_code(ret)); + } + +#ifdef STARPU_HAVE_MPI_SYNC_CLOCKS + if (starpu_getenv_number("STARPU_MPI_TRACE_SYNC_CLOCKS") != 0) + { + mpi_sync_clocks_synchronize(mpi_sync_clock); + double local_sync_time; + mpi_sync_clocks_barrier(mpi_sync_clock, &local_sync_time); + /* Even if with this synchronized barrier, all nodes are supposed to left + * out the barrier exactly at the same time, we can't be sure, the + * following event will be recorded at the same time on each MPI processes, + * because this thread can be preempted between the end of the barrier and + * the event record. That's why we need to store the local time when the + * barrier was unlocked as an additional information of the event, we can't + * rely on the timestamp of the event. */ + _STARPU_MPI_TRACE_BARRIER(rank, worldsize, fxt_random_number, (mpi_sync_clocks_get_time_origin_usec(mpi_sync_clock) + local_sync_time) * 1000.); + } + else /* mpi_sync_synchronize() can be long (several seconds), one can prefer to use a less precise but faster method: */ +#endif + { + ret = MPI_Barrier(MPI_COMM_WORLD); + STARPU_MPI_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Barrier returning %s", _starpu_mpi_get_mpi_error_code(ret)); + + _STARPU_MPI_TRACE_BARRIER(rank, worldsize, fxt_random_number, 0); + } +} + +void _starpu_mpi_fxt_init(void* arg) +{ + struct _starpu_mpi_argc_argv *argc_argv = (struct _starpu_mpi_argc_argv *) arg; + + if (_starpu_fxt_wait_initialisation()) + { +#ifdef STARPU_HAVE_MPI_SYNC_CLOCKS + if (argc_argv->world_size > 1 && starpu_getenv_number("STARPU_MPI_TRACE_SYNC_CLOCKS") != 0) + { + mpi_sync_clock = mpi_sync_clocks_init(MPI_COMM_WORLD); + } +#endif + + /* We need to record our ID in the trace before the main thread makes any MPI call */ + _STARPU_MPI_TRACE_START(argc_argv->rank, argc_argv->world_size); + starpu_profiling_set_id(argc_argv->rank); + _starpu_profiling_set_mpi_worldsize(argc_argv->world_size); + + if (argc_argv->world_size > 1) + { + _starpu_mpi_add_sync_point_in_fxt(); + } + } +} + +void _starpu_mpi_fxt_shutdown() +{ + if (starpu_fxt_is_enabled()) + { + int worldsize; + starpu_mpi_comm_size(MPI_COMM_WORLD, &worldsize); + + if (worldsize > 1) + { + /* We add a synchronization point at the end of the trace, + * to be able to interpolate times, in order to correct + * time drift. + */ + _starpu_mpi_add_sync_point_in_fxt(); + +#ifdef STARPU_HAVE_MPI_SYNC_CLOCKS + if (starpu_getenv_number("STARPU_MPI_TRACE_SYNC_CLOCKS") != 0) + { + mpi_sync_clocks_shutdown(mpi_sync_clock); + } +#endif + } + } +} diff --git a/mpi/src/starpu_mpi_fxt.h b/mpi/src/starpu_mpi_fxt.h new file mode 100644 index 0000000..d76f418 --- /dev/null +++ b/mpi/src/starpu_mpi_fxt.h @@ -0,0 +1,208 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2019-2019 Federal University of Rio Grande do Sul (UFRGS) + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __STARPU_MPI_FXT_H__ +#define __STARPU_MPI_FXT_H__ + +#include +#include +#include + +/** @file */ + +#ifdef __cplusplus +extern "C" +{ +#endif + +#define _STARPU_MPI_FUT_POINT_TO_POINT_SEND 0x100 +#define _STARPU_MPI_FUT_COLLECTIVE_SEND 0x101 + +#define _STARPU_MPI_FUT_START 0x5201 +#define _STARPU_MPI_FUT_STOP 0x5202 +#define _STARPU_MPI_FUT_BARRIER 0x5203 +#define _STARPU_MPI_FUT_ISEND_SUBMIT_BEGIN 0x5204 +#define _STARPU_MPI_FUT_ISEND_SUBMIT_END 0x5205 +#define _STARPU_MPI_FUT_IRECV_SUBMIT_BEGIN 0x5206 +#define _STARPU_MPI_FUT_IRECV_SUBMIT_END 0x5207 +#define _STARPU_MPI_FUT_ISEND_COMPLETE_BEGIN 0x5208 +#define _STARPU_MPI_FUT_ISEND_COMPLETE_END 0x5209 +#define _STARPU_MPI_FUT_DATA_SET_RANK 0x521a +#define _STARPU_MPI_FUT_IRECV_TERMINATED 0x521b +#define _STARPU_MPI_FUT_ISEND_TERMINATED 0x521c +#define _STARPU_MPI_FUT_TESTING_DETACHED_BEGIN 0x521d +#define _STARPU_MPI_FUT_TESTING_DETACHED_END 0x521e +#define _STARPU_MPI_FUT_TEST_BEGIN 0x521f +#define _STARPU_MPI_FUT_TEST_END 0x5220 +#define _STARPU_MPI_FUT_IRECV_COMPLETE_BEGIN 0x520a +#define _STARPU_MPI_FUT_IRECV_COMPLETE_END 0x520b +#define _STARPU_MPI_FUT_SLEEP_BEGIN 0x520c +#define _STARPU_MPI_FUT_SLEEP_END 0x520d +#define _STARPU_MPI_FUT_DTESTING_BEGIN 0x520e +#define _STARPU_MPI_FUT_DTESTING_END 0x520f +#define _STARPU_MPI_FUT_UTESTING_BEGIN 0x5210 +#define _STARPU_MPI_FUT_UTESTING_END 0x5211 +#define _STARPU_MPI_FUT_UWAIT_BEGIN 0x5212 +#define _STARPU_MPI_FUT_UWAIT_END 0x5213 +#define _STARPU_MPI_FUT_POLLING_BEGIN 0x5214 +#define _STARPU_MPI_FUT_POLLING_END 0x5215 +#define _STARPU_MPI_FUT_DRIVER_RUN_BEGIN 0x5216 +#define _STARPU_MPI_FUT_DRIVER_RUN_END 0x5217 +#define _STARPU_MPI_FUT_DATA_SET_TAG 0x5218 +#define _STARPU_MPI_FUT_IRECV_NUMA_NODE 0x5219 +#define _STARPU_MPI_FUT_ISEND_NUMA_NODE 0x5221 +#define _STARPU_MPI_FUT_CHECKPOINT_BEGIN 0x5222 +#define _STARPU_MPI_FUT_CHECKPOINT_END 0x5223 + +#ifdef STARPU_USE_FXT + +#define _STARPU_MPI_TRACE_START(rank, worldsize) \ + FUT_DO_ALWAYS_PROBE3(_STARPU_MPI_FUT_START, (rank), (worldsize), _starpu_gettid()); +#define _STARPU_MPI_TRACE_STOP(rank, worldsize) \ + FUT_DO_ALWAYS_PROBE3(_STARPU_MPI_FUT_STOP, (rank), (worldsize), _starpu_gettid()); +#define _STARPU_MPI_TRACE_BARRIER(rank, worldsize, key, local_time) do {\ + if (_starpu_fxt_started) \ + FUT_DO_ALWAYS_PROBE5(_STARPU_MPI_FUT_BARRIER, (rank), (worldsize), (key), (local_time), _starpu_gettid()); \ +} while (0) +#define _STARPU_MPI_TRACE_ISEND_SUBMIT_BEGIN(dest, data_tag, size) \ + FUT_FULL_PROBE4(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_ISEND_SUBMIT_BEGIN, (dest), (data_tag), (size), _starpu_gettid()); +#define _STARPU_MPI_TRACE_ISEND_SUBMIT_END(type, req, prio) \ + FUT_FULL_PROBE8(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_ISEND_SUBMIT_END, (type), (req)->node_tag.node.rank, (req)->node_tag.data_tag, starpu_data_get_size((req)->data_handle), (req)->pre_sync_jobid, (req)->data_handle, (prio), _starpu_gettid()); \ + FUT_FULL_PROBE4(_STARPU_FUT_KEYMASK_MPI_VERBOSE_EXTRA, _STARPU_MPI_FUT_ISEND_NUMA_NODE, (req)->node_tag.node.rank, (req)->pre_sync_jobid, starpu_get_memory_location_bitmap((req)->ptr, starpu_data_get_size((req)->data_handle)), _starpu_gettid()); +#define _STARPU_MPI_TRACE_IRECV_SUBMIT_BEGIN(src, data_tag) \ + FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_IRECV_SUBMIT_BEGIN, (src), (data_tag), _starpu_gettid()); +#define _STARPU_MPI_TRACE_IRECV_SUBMIT_END(src, data_tag) \ + FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_IRECV_SUBMIT_END, (src), (data_tag), _starpu_gettid()); +#define _STARPU_MPI_TRACE_ISEND_COMPLETE_BEGIN(dest, data_tag, size) \ + FUT_FULL_PROBE4(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_ISEND_COMPLETE_BEGIN, (dest), (data_tag), (size), _starpu_gettid()); +#define _STARPU_MPI_TRACE_COMPLETE_BEGIN(type, rank, data_tag) \ + if (type == RECV_REQ) { _STARPU_MPI_TRACE_IRECV_COMPLETE_BEGIN((rank), (data_tag)); } else if (type == SEND_REQ) { _STARPU_MPI_TRACE_ISEND_COMPLETE_BEGIN((rank), (data_tag), 0); } +#define _STARPU_MPI_TRACE_ISEND_COMPLETE_END(dest, data_tag, size) \ + FUT_FULL_PROBE4(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_ISEND_COMPLETE_END, (dest), (data_tag), (size), _starpu_gettid()); +#define _STARPU_MPI_TRACE_IRECV_COMPLETE_BEGIN(src, data_tag) \ + FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_IRECV_COMPLETE_BEGIN, (src), (data_tag), _starpu_gettid()); +#define _STARPU_MPI_TRACE_IRECV_COMPLETE_END(src, data_tag) \ + FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_IRECV_COMPLETE_END, (src), (data_tag), _starpu_gettid()); +#define _STARPU_MPI_TRACE_COMPLETE_END(type, rank, data_tag) \ + if (type == RECV_REQ) { _STARPU_MPI_TRACE_IRECV_COMPLETE_END((rank), (data_tag)); } else if (type == SEND_REQ) { _STARPU_MPI_TRACE_ISEND_COMPLETE_END((rank), (data_tag), 0); } +#define _STARPU_MPI_TRACE_TERMINATED(req) \ + if ((req)->request_type == RECV_REQ) { \ + FUT_FULL_PROBE5(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_IRECV_TERMINATED, (req)->node_tag.node.rank, (req)->node_tag.data_tag, (req)->post_sync_jobid, _starpu_gettid(), (req)->data_handle); \ + FUT_FULL_PROBE4(_STARPU_FUT_KEYMASK_MPI_VERBOSE_EXTRA, _STARPU_MPI_FUT_IRECV_NUMA_NODE, (req)->node_tag.node.rank, (req)->post_sync_jobid, starpu_get_memory_location_bitmap((req)->ptr, starpu_data_get_size((req)->data_handle)), _starpu_gettid()); \ + } else \ + if ((req)->request_type == SEND_REQ) FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_ISEND_TERMINATED, (req)->node_tag.node.rank, (req)->node_tag.data_tag, _starpu_gettid()); +#define _STARPU_MPI_TRACE_SLEEP_BEGIN() \ + FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_SLEEP_BEGIN, _starpu_gettid()); +#define _STARPU_MPI_TRACE_SLEEP_END() \ + FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_SLEEP_END, _starpu_gettid()); +#define _STARPU_MPI_TRACE_DTESTING_BEGIN() \ + FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_DTESTING_BEGIN, _starpu_gettid()); +#define _STARPU_MPI_TRACE_DTESTING_END() \ + FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_DTESTING_END, _starpu_gettid()); +#define _STARPU_MPI_TRACE_UTESTING_BEGIN(src, data_tag) \ + FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_UTESTING_BEGIN, (src), (data_tag), _starpu_gettid()); +#define _STARPU_MPI_TRACE_UTESTING_END(src, data_tag) \ + FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_UTESTING_END, (src), (data_tag), _starpu_gettid()); +#define _STARPU_MPI_TRACE_UWAIT_BEGIN(src, data_tag) \ + FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_UWAIT_BEGIN, (src), (data_tag), _starpu_gettid()); +#define _STARPU_MPI_TRACE_UWAIT_END(src, data_tag) \ + FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_UWAIT_END, (src), (data_tag), _starpu_gettid()); +#define _STARPU_MPI_TRACE_DATA_SET_RANK(handle, rank) \ + FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_DATA_SET_RANK, (handle), (rank), _starpu_gettid()); +#define _STARPU_MPI_TRACE_DATA_SET_TAG(handle, data_tag) \ + FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_DATA_SET_TAG, (handle), (data_tag), _starpu_gettid()); +#if 0 +/* This is very expensive in the trace, only enable for debugging */ +#define _STARPU_MPI_TRACE_TESTING_DETACHED_BEGIN() \ + FUT_DO_PROBE1(_STARPU_MPI_FUT_TESTING_DETACHED_BEGIN, _starpu_gettid()); +#define _STARPU_MPI_TRACE_TESTING_DETACHED_END() \ + FUT_DO_PROBE1(_STARPU_MPI_FUT_TESTING_DETACHED_END, _starpu_gettid()); +#define _STARPU_MPI_TRACE_TEST_BEGIN(peer, data_tag) \ + FUT_DO_PROBE3(_STARPU_MPI_FUT_TEST_BEGIN, (peer), (data_tag), _starpu_gettid()); +#define _STARPU_MPI_TRACE_TEST_END(peer, data_tag) \ + FUT_DO_PROBE3(_STARPU_MPI_FUT_TEST_END, (peer), (data_tag), _starpu_gettid()); +#else +#define _STARPU_MPI_TRACE_TESTING_DETACHED_BEGIN() do {} while(0) +#define _STARPU_MPI_TRACE_TESTING_DETACHED_END() do {} while(0) +#define _STARPU_MPI_TRACE_TEST_BEGIN(peer, data_tag) do {} while(0) +#define _STARPU_MPI_TRACE_TEST_END(peer, data_tag) do {} while(0) +#endif +#define _STARPU_MPI_TRACE_POLLING_BEGIN() \ + if(!trace_loop) { \ + trace_loop = 1; \ + FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_POLLING_BEGIN, _starpu_gettid()); \ + } +#define _STARPU_MPI_TRACE_POLLING_END() \ + if(trace_loop) { \ + trace_loop = 0; \ + FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_POLLING_END, _starpu_gettid()); \ + } +#define _STARPU_MPI_TRACE_DRIVER_RUN_BEGIN() \ + FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_DRIVER_RUN_BEGIN, _starpu_gettid()); +#define _STARPU_MPI_TRACE_DRIVER_RUN_END() \ + FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_DRIVER_RUN_END, _starpu_gettid()); +#define _STARPU_MPI_TRACE_CHECKPOINT_BEGIN(cp_instance, cp_domain) \ + FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_EVENT, _STARPU_MPI_FUT_CHECKPOINT_BEGIN, (cp_instance), (cp_domain), _starpu_gettid()); +#define _STARPU_MPI_TRACE_CHECKPOINT_END(cp_instance, cp_domain) \ + FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_EVENT, _STARPU_MPI_FUT_CHECKPOINT_END, (cp_instance), (cp_domain), _starpu_gettid()); +#define TRACE +#else +#define _STARPU_MPI_TRACE_START(a, b) do {} while(0); +#define _STARPU_MPI_TRACE_STOP(a, b) do {} while(0); +#define _STARPU_MPI_TRACE_BARRIER(a, b, c, d) do {} while(0); +#define _STARPU_MPI_TRACE_ISEND_SUBMIT_BEGIN(a, b, c) do {} while(0); +#define _STARPU_MPI_TRACE_ISEND_SUBMIT_END(a, b, c) do {} while(0); +#define _STARPU_MPI_TRACE_IRECV_SUBMIT_BEGIN(a, b) do {} while(0); +#define _STARPU_MPI_TRACE_IRECV_SUBMIT_END(a, b) do {} while(0); +#define _STARPU_MPI_TRACE_ISEND_COMPLETE_BEGIN(a, b, c) do {} while(0); +#define _STARPU_MPI_TRACE_COMPLETE_BEGIN(a, b, c) do {} while(0); +#define _STARPU_MPI_TRACE_COMPLETE_END(a, b, c) do {} while(0); +#define _STARPU_MPI_TRACE_TERMINATED(a) do {} while(0); +#define _STARPU_MPI_TRACE_ISEND_COMPLETE_END(a, b, c) do {} while(0); +#define _STARPU_MPI_TRACE_IRECV_COMPLETE_BEGIN(a, b) do {} while(0); +#define _STARPU_MPI_TRACE_IRECV_COMPLETE_END(a, b) do {} while(0); +#define _STARPU_MPI_TRACE_SLEEP_BEGIN() do {} while(0); +#define _STARPU_MPI_TRACE_SLEEP_END() do {} while(0); +#define _STARPU_MPI_TRACE_DTESTING_BEGIN() do {} while(0); +#define _STARPU_MPI_TRACE_DTESTING_END() do {} while(0); +#define _STARPU_MPI_TRACE_UTESTING_BEGIN(a, b) do {} while(0); +#define _STARPU_MPI_TRACE_UTESTING_END(a, b) do {} while(0); +#define _STARPU_MPI_TRACE_UWAIT_BEGIN(a, b) do {} while(0); +#define _STARPU_MPI_TRACE_UWAIT_END(a, b) do {} while(0); +#define _STARPU_MPI_TRACE_DATA_SET_RANK(a, b) do {} while(0); +#define _STARPU_MPI_TRACE_DATA_SET_TAG(a, b) do {} while(0); +#define _STARPU_MPI_TRACE_TESTING_DETACHED_BEGIN() do {} while(0) +#define _STARPU_MPI_TRACE_TESTING_DETACHED_END() do {} while(0) +#define _STARPU_MPI_TRACE_TEST_BEGIN(peer, data_tag) do {} while(0) +#define _STARPU_MPI_TRACE_TEST_END(peer, data_tag) do {} while(0) +#define _STARPU_MPI_TRACE_POLLING_BEGIN() do {} while(0); +#define _STARPU_MPI_TRACE_POLLING_END() do {} while(0); +#define _STARPU_MPI_TRACE_DRIVER_RUN_BEGIN() do {} while(0); +#define _STARPU_MPI_TRACE_DRIVER_RUN_END() do {} while(0); +#define _STARPU_MPI_TRACE_CHECKPOINT_BEGIN(cp_instance, cp_domain) do {} while(0) +#define _STARPU_MPI_TRACE_CHECKPOINT_END(cp_instance, cp_domain) do {} while(0) +#endif + +void _starpu_mpi_fxt_init(void* arg); +void _starpu_mpi_fxt_shutdown(); + +#ifdef __cplusplus +} +#endif + + +#endif // __STARPU_MPI_FXT_H__ diff --git a/mpi/src/starpu_mpi_helper.c b/mpi/src/starpu_mpi_helper.c new file mode 100644 index 0000000..fc799b5 --- /dev/null +++ b/mpi/src/starpu_mpi_helper.c @@ -0,0 +1,123 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include + +static void starpu_mpi_unlock_tag_callback(void *arg) +{ + starpu_tag_t *tagptr = arg; + + starpu_tag_notify_from_apps(*tagptr); + + free(tagptr); +} + +int starpu_mpi_isend_detached_unlock_tag_prio(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm, starpu_tag_t tag) +{ + starpu_tag_t *tagptr; + _STARPU_MPI_MALLOC(tagptr, sizeof(starpu_tag_t)); + *tagptr = tag; + + return starpu_mpi_isend_detached_prio(data_handle, dest, data_tag, prio, comm, starpu_mpi_unlock_tag_callback, tagptr); +} + +int starpu_mpi_isend_detached_unlock_tag(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm, starpu_tag_t tag) +{ + return starpu_mpi_isend_detached_unlock_tag_prio(data_handle, dest, data_tag, 0, comm, tag); +} + +int starpu_mpi_irecv_detached_unlock_tag(starpu_data_handle_t data_handle, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm, starpu_tag_t tag) +{ + starpu_tag_t *tagptr; + _STARPU_MPI_MALLOC(tagptr, sizeof(starpu_tag_t)); + *tagptr = tag; + + return starpu_mpi_irecv_detached(data_handle, source, data_tag, comm, starpu_mpi_unlock_tag_callback, tagptr); +} + +struct arg_array +{ + int array_size; + starpu_tag_t tag; +}; + +static void starpu_mpi_array_unlock_callback(void *_arg) +{ + struct arg_array *arg = _arg; + + int remaining = STARPU_ATOMIC_ADD(&arg->array_size, -1); + + if (remaining == 0) + { + starpu_tag_notify_from_apps(arg->tag); + free(arg); + } +} + +int starpu_mpi_isend_array_detached_unlock_tag_prio(unsigned array_size, starpu_data_handle_t *data_handle, int *dest, starpu_mpi_tag_t *data_tag, int *prio, MPI_Comm *comm, starpu_tag_t tag) +{ + int ret; + + if (!array_size) + return 0; + struct arg_array *arg; + _STARPU_MPI_MALLOC(arg, sizeof(struct arg_array)); + + arg->array_size = array_size; + arg->tag = tag; + + unsigned elem; + for (elem = 0; elem < array_size; elem++) + { + int p = 0; + if (prio) + p = prio[elem]; + ret = starpu_mpi_isend_detached_prio(data_handle[elem], dest[elem], data_tag[elem], p, comm[elem], starpu_mpi_array_unlock_callback, arg); + if (ret) + return ret; + } + + return 0; +} + +int starpu_mpi_isend_array_detached_unlock_tag(unsigned array_size, starpu_data_handle_t *data_handle, int *dest, starpu_mpi_tag_t *data_tag, MPI_Comm *comm, starpu_tag_t tag) +{ + return starpu_mpi_isend_array_detached_unlock_tag_prio(array_size, data_handle, dest, data_tag, NULL, comm, tag); +} + +int starpu_mpi_irecv_array_detached_unlock_tag(unsigned array_size, starpu_data_handle_t *data_handle, int *source, starpu_mpi_tag_t *data_tag, MPI_Comm *comm, starpu_tag_t tag) +{ + if (!array_size) + return 0; + + int ret; + struct arg_array *arg; + _STARPU_MPI_MALLOC(arg, sizeof(struct arg_array)); + + arg->array_size = array_size; + arg->tag = tag; + + unsigned elem; + for (elem = 0; elem < array_size; elem++) + { + ret = starpu_mpi_irecv_detached(data_handle[elem], source[elem], data_tag[elem], comm[elem], starpu_mpi_array_unlock_callback, arg); + if (ret) + return ret; + } + + return 0; +} diff --git a/mpi/src/starpu_mpi_init.c b/mpi/src/starpu_mpi_init.c new file mode 100644 index 0000000..5423bb8 --- /dev/null +++ b/mpi/src/starpu_mpi_init.c @@ -0,0 +1,456 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef STARPU_HAVE_MPI_EXT +#include +#endif + +#ifdef STARPU_SIMGRID +static int _mpi_world_size; +static int _mpi_world_rank; +#endif +static int _mpi_initialized_starpu; +static int _starpu_mpi_gpudirect; /* Whether GPU direct was explicitly requested (1) or disabled (0), or should be enabled if available (-1) */ +int _starpu_mpi_has_cuda; /* Whether GPU direct is available */ +int _starpu_mpi_psm2; /* Whether MPI has PSM2 or not. Useful when using old intel compilers, for which psm2 detection is buggy */ +int _starpu_mpi_cuda_devid = -1; /* Which device GPU direct is enabled for (-1 = all) */ + +static void _starpu_mpi_print_thread_level_support(int thread_level, char *msg) +{ + const char *level = NULL; + switch (thread_level) + { + case MPI_THREAD_SERIALIZED: + { + level = "MPI_THREAD_SERIALIZED"; + _STARPU_DISP("MPI%s %s; Multiple threads may make MPI calls, but only one at a time.\n", msg, level); + break; + } + case MPI_THREAD_FUNNELED: + { + level = "MPI_THREAD_FUNNELED"; + _STARPU_DISP("MPI%s %s; The application can safely make calls to StarPU-MPI functions, but should not call directly MPI communication functions.\n", msg, level); + break; + } + case MPI_THREAD_SINGLE: + { + level = "MPI_THREAD_SINGLE"; + _STARPU_DISP("MPI%s %s; MPI does not have multi-thread support, this might cause problems. The application can make calls to StarPU-MPI functions, but not call directly MPI Communication functions.\n", msg, level); + break; + } + case MPI_THREAD_MULTIPLE: + /* no problem */ + break; + } + if (thread_level != MPI_THREAD_MULTIPLE && _starpu_mpi_thread_multiple_send) + { + _STARPU_DISP("STARPU_MPI_THREAD_MULTIPLE_SEND requested but MPI%s %s, disabling STARPU_MPI_THREAD_MULTIPLE_SEND\n", msg, level); + _starpu_mpi_thread_multiple_send = 0; + } +} + +void _starpu_mpi_do_initialize(struct _starpu_mpi_argc_argv *argc_argv) +{ +#ifdef STARPU_USE_CUDA + if (_starpu_mpi_gpudirect != 0 && starpu_cuda_worker_get_count() > 0) + { + /* Some GPUDirect implementations (e.g. psm2) want cudaSetDevice to be called before MPI_Init */ + int cuda_worker = starpu_worker_get_by_type(STARPU_CUDA_WORKER, 0); + STARPU_ASSERT(cuda_worker >= 0); + int devid = starpu_worker_get_devid(cuda_worker); + STARPU_ASSERT(devid >= 0); + cudaSetDevice(devid); + } +#endif + if (argc_argv->initialize_mpi) + { + STARPU_ASSERT_MSG(argc_argv->comm == MPI_COMM_WORLD, "It does not make sense to ask StarPU-MPI to initialize MPI while a non-world communicator was given"); + int thread_support; + _STARPU_DEBUG("Calling MPI_Init_thread\n"); + if (MPI_Init_thread(argc_argv->argc, argc_argv->argv, _starpu_mpi_thread_multiple_send ? MPI_THREAD_MULTIPLE : MPI_THREAD_SERIALIZED, &thread_support) != MPI_SUCCESS) + { + _STARPU_ERROR("MPI_Init_thread failed\n"); + } + _starpu_mpi_print_thread_level_support(thread_support, "_Init_thread level ="); + } + else + { + int provided; + MPI_Query_thread(&provided); + _starpu_mpi_print_thread_level_support(provided, " has been initialized with"); + } + + // automatically register the given communicator + starpu_mpi_comm_register(argc_argv->comm); + if (argc_argv->comm != MPI_COMM_WORLD) + starpu_mpi_comm_register(MPI_COMM_WORLD); + + MPI_Comm_rank(argc_argv->comm, &argc_argv->rank); + MPI_Comm_size(argc_argv->comm, &argc_argv->world_size); + MPI_Comm_set_errhandler(argc_argv->comm, MPI_ERRORS_RETURN); + +#ifdef STARPU_USE_CUDA +#ifdef MPIX_CUDA_AWARE_SUPPORT + if (MPIX_Query_cuda_support()) + _starpu_mpi_has_cuda = 1; + else if (_starpu_mpi_gpudirect > 0) + _STARPU_DISP("Warning: MPI GPUDirect requested, but MPIX_Query_cuda_support reports that it is not supported.\n"); + _STARPU_DEBUG("MPI has CUDA: %d\n", _starpu_mpi_has_cuda); + if (!_starpu_mpi_gpudirect) + { + _STARPU_DEBUG("But disabled by user\n"); + _starpu_mpi_has_cuda = 0; + } + if (_starpu_mpi_has_cuda && _starpu_mpi_psm2) + { +#pragma weak psm2_init + extern int psm2_init(int *major, int *minor); + if (psm2_init && starpu_cuda_worker_get_count() > 1) + { + int cuda_worker = starpu_worker_get_by_type(STARPU_CUDA_WORKER, 0); + _starpu_mpi_cuda_devid = starpu_worker_get_devid(cuda_worker); + + _STARPU_DISP("Warning: MPI GPUDirect is enabled using the PSM2 driver, but StarPU will be driving several CUDA GPUs.\n"); + _STARPU_DISP("Since the PSM2 driver only supports one CUDA GPU at a time for GPU Direct (at least as of its version 11.2.185), StarPU-MPI will use GPU Direct only for CUDA%d.\n", _starpu_mpi_cuda_devid); + _STARPU_DISP("To get GPU Direct working with all CUDA GPUs with the PSM2 driver, you will unfortunately have to run one MPI rank per GPU.\n"); + _STARPU_DISP("if you are sure you are not actually using PSM2, you can set STARPU_MPI_PSM2=0 to disable PSM2 detection.\n"); + } + } +#else + if (_starpu_mpi_gpudirect > 0) + _STARPU_DISP("Warning: MPI GPUDirect requested, but the MPIX_Query_cuda_support function is not provided by the MPI Implementation, did you compile it with CUDA support and the Cuda MPI extension?\n"); + _STARPU_DEBUG("No CUDA support in MPI\n"); +#endif +#endif + +#ifdef STARPU_SIMGRID + _mpi_world_size = argc_argv->world_size; + _mpi_world_rank = argc_argv->rank; +#endif +} + +static +void _starpu_mpi_backend_check() +{ + STARPU_ASSERT(_mpi_backend._starpu_mpi_backend_init != NULL); + STARPU_ASSERT(_mpi_backend._starpu_mpi_backend_shutdown != NULL); + STARPU_ASSERT(_mpi_backend._starpu_mpi_backend_reserve_core != NULL); + STARPU_ASSERT(_mpi_backend._starpu_mpi_backend_request_init != NULL); + STARPU_ASSERT(_mpi_backend._starpu_mpi_backend_request_fill != NULL); + STARPU_ASSERT(_mpi_backend._starpu_mpi_backend_request_destroy != NULL); + STARPU_ASSERT(_mpi_backend._starpu_mpi_backend_data_clear != NULL); + STARPU_ASSERT(_mpi_backend._starpu_mpi_backend_data_register != NULL); + STARPU_ASSERT(_mpi_backend._starpu_mpi_backend_comm_register != NULL); + STARPU_ASSERT(_mpi_backend._starpu_mpi_backend_progress_init != NULL); + STARPU_ASSERT(_mpi_backend._starpu_mpi_backend_progress_shutdown != NULL); +#ifdef STARPU_SIMGRID + STARPU_ASSERT(_mpi_backend._starpu_mpi_backend_wait_for_initialization != NULL); +#endif + STARPU_ASSERT(_mpi_backend._starpu_mpi_backend_barrier != NULL); + STARPU_ASSERT(_mpi_backend._starpu_mpi_backend_wait_for_all != NULL); + STARPU_ASSERT(_mpi_backend._starpu_mpi_backend_wait != NULL); + STARPU_ASSERT(_mpi_backend._starpu_mpi_backend_test != NULL); + STARPU_ASSERT(_mpi_backend._starpu_mpi_backend_isend_size_func != NULL); + STARPU_ASSERT(_mpi_backend._starpu_mpi_backend_irecv_size_func != NULL); +} + +static +int _starpu_mpi_initialize(int *argc, char ***argv, int initialize_mpi, MPI_Comm comm) +{ + struct _starpu_mpi_argc_argv *argc_argv; + _STARPU_MALLOC(argc_argv, sizeof(struct _starpu_mpi_argc_argv)); + argc_argv->initialize_mpi = initialize_mpi; + argc_argv->argc = argc; + argc_argv->argv = argv; + argc_argv->comm = comm; + _starpu_implicit_data_deps_write_hook(_starpu_mpi_data_flush); + + _starpu_mpi_backend_check(); + + _starpu_mpi_gpudirect = starpu_getenv_number("STARPU_MPI_GPUDIRECT"); + _starpu_mpi_psm2 = starpu_getenv_number_default("STARPU_MPI_PSM2", 1); +#ifdef STARPU_SIMGRID + /* Call MPI_Init_thread as early as possible, to initialize simgrid + * before working with mutexes etc. */ + _starpu_mpi_do_initialize(argc_argv); +#endif + + int ret = _mpi_backend._starpu_mpi_backend_progress_init(argc_argv); + + if (starpu_getenv_number_default("STARPU_DISPLAY_BINDINGS", 0)) + { + int rank, size, i; + char hostname[65]; + + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); + starpu_mpi_comm_size(MPI_COMM_WORLD, &size); + gethostname(hostname, sizeof(hostname)); + + /* We make a barrier between each node calling hwloc-ps, to avoid mixing + * outputs in stdout. */ + for (i = 0; i < size; i++) + { + starpu_mpi_barrier(MPI_COMM_WORLD); + if (rank == i) + { + fprintf(stdout, "== Binding for rank %d on node %s ==\n", rank, hostname); + starpu_display_bindings(); + fflush(stdout); + } + } + starpu_mpi_barrier(MPI_COMM_WORLD); + if (rank == 0) + { + fprintf(stdout, "== End of bindings ==\n"); + fflush(stdout); + } + } + + return ret; +} + +#ifdef STARPU_SIMGRID +/* This is called before application's main, to initialize SMPI before we can + * create MSG processes to run application's main */ +int _starpu_mpi_simgrid_init(int argc, char *argv[]) +{ + return _starpu_mpi_initialize(&argc, &argv, 1, MPI_COMM_WORLD); +} +#endif + +int starpu_mpi_init_comm(int *argc, char ***argv, int initialize_mpi, MPI_Comm comm) +{ +#ifdef STARPU_SIMGRID + (void)argc; + (void)argv; + (void)initialize_mpi; + (void)comm; + _mpi_backend._starpu_mpi_backend_wait_for_initialization(); + return 0; +#else + return _starpu_mpi_initialize(argc, argv, initialize_mpi, comm); +#endif +} + +int starpu_mpi_init(int *argc, char ***argv, int initialize_mpi) +{ + return starpu_mpi_init_comm(argc, argv, initialize_mpi, MPI_COMM_WORLD); +} + +int starpu_mpi_initialize(void) +{ +#ifdef STARPU_SIMGRID + return 0; +#else + return _starpu_mpi_initialize(NULL, NULL, 0, MPI_COMM_WORLD); +#endif +} + +int starpu_mpi_initialize_extended(int *rank, int *world_size) +{ +#ifdef STARPU_SIMGRID + *world_size = _mpi_world_size; + *rank = _mpi_world_rank; + return 0; +#else + int ret; + + ret = _starpu_mpi_initialize(NULL, NULL, 1, MPI_COMM_WORLD); + if (ret == 0) + { + starpu_mpi_comm_rank(MPI_COMM_WORLD, rank); + starpu_mpi_comm_size(MPI_COMM_WORLD, world_size); + } + return ret; +#endif +} + +int starpu_mpi_init_conf(int *argc, char ***argv, int initialize_mpi, MPI_Comm comm, struct starpu_conf *conf) +{ + struct starpu_conf localconf; + if (!conf) + { + starpu_conf_init(&localconf); + conf = &localconf; + } + + _mpi_backend._starpu_mpi_backend_init(conf); + + /* Reserve a core only if required by the backend and if STARPU_NCPU isn't provided */ + int mpi_thread_cpuid = starpu_getenv_number_default("STARPU_MPI_THREAD_CPUID", -1); + int mpi_thread_coreid = starpu_getenv_number_default("STARPU_MPI_THREAD_COREID", -1); + if (mpi_thread_cpuid < 0 && mpi_thread_coreid < 0 && _mpi_backend._starpu_mpi_backend_reserve_core() && conf->ncpus == -1) + { + /* Reserve a core for our progression thread */ + if (conf->reserve_ncpus == -1) + conf->reserve_ncpus = 1; + else + conf->reserve_ncpus++; + } + + conf->will_use_mpi = 1; + + int ret = starpu_init(conf); + if (ret < 0) + return ret; + _mpi_initialized_starpu = 1; + + return starpu_mpi_init_comm(argc, argv, initialize_mpi, comm); +} + +int starpu_mpi_shutdown(void) +{ + return starpu_mpi_shutdown_comm(MPI_COMM_WORLD); +} + +struct comm_size_entry +{ + UT_hash_handle hh; + MPI_Comm comm; + int size; + int rank; +}; + +static struct comm_size_entry *registered_comms = NULL; + +int starpu_mpi_shutdown_comm(MPI_Comm comm) +{ + void *value; + int rank, world_size; + + /* Make sure we do not have MPI communications pending in the task graph + * before shutting down MPI */ + starpu_mpi_wait_for_all(comm); + + /* We need to get the rank before calling MPI_Finalize to pass to _starpu_mpi_comm_amounts_display() */ + starpu_mpi_comm_rank(comm, &rank); + starpu_mpi_comm_size(comm, &world_size); + + /* kill the progression thread */ + _mpi_backend._starpu_mpi_backend_progress_shutdown(&value); + +#ifdef STARPU_USE_FXT + if (starpu_fxt_is_enabled()) + { + _STARPU_MPI_TRACE_STOP(rank, world_size); + } +#endif // STARPU_USE_FXT + + _starpu_mpi_comm_amounts_display(stderr, rank); + _starpu_mpi_comm_amounts_shutdown(); + _starpu_mpi_cache_shutdown(); + + _mpi_backend._starpu_mpi_backend_shutdown(); + + struct comm_size_entry *entry=NULL, *tmp=NULL; + HASH_ITER(hh, registered_comms, entry, tmp) + { + HASH_DEL(registered_comms, entry); + free(entry); + } + + if (_mpi_initialized_starpu) + starpu_shutdown(); + + return 0; +} + +int starpu_mpi_comm_register(MPI_Comm comm) +{ + struct comm_size_entry *entry; + + _STARPU_MPI_MALLOC(entry, sizeof(*entry)); + entry->comm = comm; + MPI_Comm_size(entry->comm, &(entry->size)); + MPI_Comm_rank(entry->comm, &(entry->rank)); + HASH_ADD(hh, registered_comms, comm, sizeof(entry->comm), entry); + return 0; +} + +int starpu_mpi_comm_size(MPI_Comm comm, int *size) +{ + if (_starpu_mpi_fake_world_size != -1) + { + *size = _starpu_mpi_fake_world_size; + return 0; + } +#ifdef STARPU_SIMGRID + STARPU_MPI_ASSERT_MSG(comm == MPI_COMM_WORLD, "StarPU-SMPI only works with MPI_COMM_WORLD for now"); + *size = _mpi_world_size; + return 0; +#else + struct comm_size_entry *entry; + HASH_FIND(hh, registered_comms, &comm, sizeof(entry->comm), entry); + STARPU_ASSERT_MSG(entry, "Communicator %ld has not been registered\n", (long int)comm); + *size = entry->size; + return 0; +#endif +} + +int starpu_mpi_comm_rank(MPI_Comm comm, int *rank) +{ + if (_starpu_mpi_fake_world_rank != -1) + { + *rank = _starpu_mpi_fake_world_rank; + return 0; + } +#ifdef STARPU_SIMGRID + STARPU_MPI_ASSERT_MSG(comm == MPI_COMM_WORLD, "StarPU-SMPI only works with MPI_COMM_WORLD for now"); + *rank = _mpi_world_rank; + return 0; +#else + struct comm_size_entry *entry; + HASH_FIND(hh, registered_comms, &comm, sizeof(entry->comm), entry); + STARPU_ASSERT_MSG(entry, "Communicator %ld has not been registered\n", (long int)comm); + *rank = entry->rank; + return 0; +#endif +} + +int starpu_mpi_world_size(void) +{ + int size; + starpu_mpi_comm_size(MPI_COMM_WORLD, &size); + return size; +} + +int starpu_mpi_world_rank(void) +{ + int rank; + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); + return rank; +} + +int starpu_mpi_get_thread_cpuid(void) +{ + return _starpu_mpi_thread_cpuid; +} diff --git a/mpi/src/starpu_mpi_init.h b/mpi/src/starpu_mpi_init.h new file mode 100644 index 0000000..debe57a --- /dev/null +++ b/mpi/src/starpu_mpi_init.h @@ -0,0 +1,36 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __STARPU_MPI_INIT_H__ +#define __STARPU_MPI_INIT_H__ + +#include +#include + +/** @file */ + +#ifdef __cplusplus +extern "C" +{ +#endif + +void _starpu_mpi_do_initialize(struct _starpu_mpi_argc_argv *argc_argv); + +#ifdef __cplusplus +} +#endif + +#endif // __STARPU_MPI_INIT_H__ diff --git a/mpi/src/starpu_mpi_private.c b/mpi/src/starpu_mpi_private.c new file mode 100644 index 0000000..de71291 --- /dev/null +++ b/mpi/src/starpu_mpi_private.c @@ -0,0 +1,103 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2020-2020 Federal University of Rio Grande do Sul (UFRGS) + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include + +int _starpu_debug_rank=-1; +int _starpu_debug_level_min=0; +int _starpu_debug_level_max=0; +int _starpu_mpi_tag = 42; +int _starpu_mpi_comm_debug; + +int _starpu_mpi_nobind = -1; +int _starpu_mpi_thread_cpuid = -1; +int _starpu_mpi_thread_multiple_send = 0; +int _starpu_mpi_use_prio = 1; +int _starpu_mpi_fake_world_size = -1; +int _starpu_mpi_fake_world_rank = -1; +int _starpu_mpi_use_coop_sends = 1; +int _starpu_mpi_mem_throttle = 0; +int _starpu_mpi_recv_wait_finalize = 0; + +void _starpu_mpi_set_debug_level_min(int level) +{ + _starpu_debug_level_min = level; +} + +void _starpu_mpi_set_debug_level_max(int level) +{ + _starpu_debug_level_max = level; +} + +int starpu_mpi_get_communication_tag(void) +{ + return _starpu_mpi_tag; +} + +void starpu_mpi_set_communication_tag(int tag) +{ + _starpu_mpi_tag = tag; +} + +char *_starpu_mpi_get_mpi_error_code(int code) +{ + static char str[MPI_MAX_OBJECT_NAME]; + int len; + MPI_Error_string(code, str, &len); + return str; +} + +void _starpu_mpi_env_init(void) +{ + _starpu_mpi_comm_debug = starpu_getenv("STARPU_MPI_COMM") != NULL; + _starpu_mpi_fake_world_size = starpu_getenv_number("STARPU_MPI_FAKE_SIZE"); + _starpu_mpi_fake_world_rank = starpu_getenv_number("STARPU_MPI_FAKE_RANK"); + _starpu_mpi_nobind = starpu_getenv_number_default("STARPU_MPI_NOBIND", 0); + _starpu_mpi_thread_cpuid = starpu_getenv_number_default("STARPU_MPI_THREAD_CPUID", -1); + _starpu_mpi_thread_multiple_send = starpu_getenv_number_default("STARPU_MPI_THREAD_MULTIPLE_SEND", 0); + _starpu_mpi_use_prio = starpu_getenv_number_default("STARPU_MPI_PRIORITIES", 1); + _starpu_mpi_use_coop_sends = starpu_getenv_number_default("STARPU_MPI_COOP_SENDS", 1); + _starpu_mpi_mem_throttle = starpu_getenv_number_default("STARPU_MPI_MEM_THROTTLE", 0); + _starpu_debug_level_min = starpu_getenv_number_default("STARPU_MPI_DEBUG_LEVEL_MIN", 0); + _starpu_debug_level_max = starpu_getenv_number_default("STARPU_MPI_DEBUG_LEVEL_MAX", 0); + _starpu_mpi_recv_wait_finalize = starpu_getenv_number_default("STARPU_MPI_RECV_WAIT_FINALIZE", _starpu_mpi_recv_wait_finalize); + + int mpi_thread_coreid = starpu_getenv_number_default("STARPU_MPI_THREAD_COREID", -1); + if (_starpu_mpi_thread_cpuid >= 0 && mpi_thread_coreid >= 0) + { + _STARPU_DISP("Warning: STARPU_MPI_THREAD_CPUID and STARPU_MPI_THREAD_COREID cannot be set at the same time. STARPU_MAIN_THREAD_CPUID will be used.\n"); + } + if (_starpu_mpi_thread_cpuid == -1 && mpi_thread_coreid >= 0) + { + _starpu_mpi_thread_cpuid = mpi_thread_coreid * _starpu_get_nhyperthreads(); + } +} + +char *_starpu_mpi_request_type(enum _starpu_mpi_request_type request_type) +{ + switch (request_type) + { + case SEND_REQ: return "SEND_REQ"; + case RECV_REQ: return "RECV_REQ"; + case WAIT_REQ: return "WAIT_REQ"; + case TEST_REQ: return "TEST_REQ"; + case BARRIER_REQ: return "BARRIER_REQ"; + case UNKNOWN_REQ: return "UNSET_REQ"; + default: return "unknown request type"; + } +} diff --git a/mpi/src/starpu_mpi_private.h b/mpi/src/starpu_mpi_private.h new file mode 100644 index 0000000..6e162e6 --- /dev/null +++ b/mpi/src/starpu_mpi_private.h @@ -0,0 +1,403 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __STARPU_MPI_PRIVATE_H__ +#define __STARPU_MPI_PRIVATE_H__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/** @file */ + +#ifdef __cplusplus +extern "C" +{ +#endif + +#ifdef STARPU_SIMGRID +extern starpu_pthread_wait_t _starpu_mpi_thread_wait; +extern starpu_pthread_queue_t _starpu_mpi_thread_dontsleep; + +struct _starpu_simgrid_mpi_req +{ + MPI_Request *request; + MPI_Status *status; + starpu_pthread_queue_t *queue; + unsigned *done; +}; + +int _starpu_mpi_simgrid_mpi_test(unsigned *done, int *flag); +void _starpu_mpi_simgrid_wait_req(MPI_Request *request, MPI_Status *status, starpu_pthread_queue_t *queue, unsigned *done); +#endif + +struct _starpu_mpi_req* _starpu_mpi_isend_cache_aware(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm, unsigned detached, unsigned sync, int prio, void (*callback)(void *), void *_arg, int sequential_consistency, int* cache_flag); +struct _starpu_mpi_req* _starpu_mpi_irecv_cache_aware(starpu_data_handle_t data_handle, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm, unsigned detached, unsigned sync, void (*callback)(void *), void *_arg, int sequential_consistency, int is_internal_req, starpu_ssize_t count, int* cache_flag); + +extern int _starpu_debug_rank; +char *_starpu_mpi_get_mpi_error_code(int code); +extern int _starpu_mpi_comm_debug; + +#ifdef STARPU_MPI_VERBOSE +extern int _starpu_debug_level_min; +extern int _starpu_debug_level_max; +void _starpu_mpi_set_debug_level_min(int level); +void _starpu_mpi_set_debug_level_max(int level); +#endif +extern int _starpu_mpi_fake_world_size; +extern int _starpu_mpi_fake_world_rank; +extern int _starpu_mpi_use_prio; +extern int _starpu_mpi_nobind; +extern int _starpu_mpi_thread_cpuid; +extern int _starpu_mpi_thread_multiple_send; +extern int _starpu_mpi_use_coop_sends; +extern int _starpu_mpi_mem_throttle; +extern int _starpu_mpi_recv_wait_finalize; +extern int _starpu_mpi_has_cuda; +extern int _starpu_mpi_cuda_devid; +void _starpu_mpi_env_init(void); + +#ifdef STARPU_NO_ASSERT +# define STARPU_MPI_ASSERT_MSG(x, msg, ...) do { if (0) { (void) (x); }} while(0) +#else +# if defined(__CUDACC__) && defined(STARPU_HAVE_WINDOWS) +int _starpu_debug_rank; +# define STARPU_MPI_ASSERT_MSG(x, msg, ...) \ + do \ + { \ + if (STARPU_UNLIKELY(!(x))) \ + { \ + if (_starpu_debug_rank == -1) starpu_mpi_comm_rank(MPI_COMM_WORLD, &_starpu_debug_rank); \ + fprintf(stderr, "\n[%d][starpu_mpi][%s][assert failure] " msg "\n\n", _starpu_debug_rank, __starpu_func__, ## __VA_ARGS__); *(int*)NULL = 0; \ + } \ + } while(0) +# else +# define STARPU_MPI_ASSERT_MSG(x, msg, ...) \ + do \ + { \ + if (STARPU_UNLIKELY(!(x))) \ + { \ + if (_starpu_debug_rank == -1) starpu_mpi_comm_rank(MPI_COMM_WORLD, &_starpu_debug_rank); \ + fprintf(stderr, "\n[%d][starpu_mpi][%s][assert failure] " msg "\n\n", _starpu_debug_rank, __starpu_func__, ## __VA_ARGS__); \ + } \ + assert(x); \ + } while(0) + +# endif +#endif + +#define _STARPU_MPI_MALLOC(ptr, size) do { ptr = malloc(size); STARPU_MPI_ASSERT_MSG(ptr != NULL, "Cannot allocate %ld bytes\n", (long) (size)); } while (0) +#define _STARPU_MPI_CALLOC(ptr, nmemb, size) do { ptr = calloc(nmemb, size); STARPU_MPI_ASSERT_MSG(ptr != NULL, "Cannot allocate %ld bytes\n", (long) (nmemb*size)); } while (0) +#define _STARPU_MPI_REALLOC(ptr, size) do { void *_new_ptr = realloc(ptr, size); STARPU_MPI_ASSERT_MSG(_new_ptr != NULL, "Cannot reallocate %ld bytes\n", (long) (size)); ptr = _new_ptr; } while (0) + +#ifdef STARPU_MPI_VERBOSE +# define _STARPU_MPI_COMM_DEBUG(ptr, count, datatype, node, tag, utag, comm, way) \ + do \ + { \ + if (_starpu_mpi_comm_debug) \ + { \ + int __size; \ + char _comm_name[128]; \ + int _comm_name_len; \ + int _rank; \ + starpu_mpi_comm_rank(comm, &_rank); \ + MPI_Type_size(datatype, &__size); \ + MPI_Comm_get_name(comm, _comm_name, &_comm_name_len); \ + fprintf(stderr, "[%d][starpu_mpi] :%d:%s:%d:%d:%ld:%s:%p:%ld:%d:%s:%d\n", _rank, _rank, way, node, tag, utag, _comm_name, ptr, count, __size, __starpu_func__ , __LINE__); \ + fflush(stderr); \ + } \ + } while(0) +# define _STARPU_MPI_COMM_TO_DEBUG(ptr, count, datatype, dest, tag, utag, comm) _STARPU_MPI_COMM_DEBUG(ptr, count, datatype, dest, tag, utag, comm, "-->") +# define _STARPU_MPI_COMM_FROM_DEBUG(ptr, count, datatype, source, tag, utag, comm) _STARPU_MPI_COMM_DEBUG(ptr, count, datatype, source, tag, utag, comm, "<--") +# define _STARPU_MPI_DEBUG(level, fmt, ...) \ + do \ + { \ + if (!_starpu_silent && _starpu_debug_level_min <= level && level <= _starpu_debug_level_max) \ + { \ + if (_starpu_debug_rank == -1) starpu_mpi_comm_rank(MPI_COMM_WORLD, &_starpu_debug_rank); \ + fprintf(stderr, "%*s[%d][starpu_mpi][%s:%d] " fmt , (_starpu_debug_rank+1)*4, "", _starpu_debug_rank, __starpu_func__ , __LINE__,## __VA_ARGS__); \ + fflush(stderr); \ + } \ + } while(0) +#else +# define _STARPU_MPI_COMM_DEBUG(ptr, count, datatype, node, tag, utag, comm, way) do { } while(0) +# define _STARPU_MPI_COMM_TO_DEBUG(ptr, count, datatype, dest, tag, utag, comm) do { } while(0) +# define _STARPU_MPI_COMM_FROM_DEBUG(ptr, count, datatype, source, tag, utag, comm) do { } while(0) +# define _STARPU_MPI_DEBUG(level, fmt, ...) do { } while(0) +#endif + +#define _STARPU_MPI_DISP(fmt, ...) do { if (!_starpu_silent) { \ + if (_starpu_debug_rank == -1) starpu_mpi_comm_rank(MPI_COMM_WORLD, &_starpu_debug_rank); \ + fprintf(stderr, "%*s[%d][starpu_mpi][%s:%d] " fmt , (_starpu_debug_rank+1)*4, "", _starpu_debug_rank, __starpu_func__ , __LINE__ ,## __VA_ARGS__); \ + fflush(stderr); }} while(0) +#define _STARPU_MPI_MSG(fmt, ...) do { if (_starpu_debug_rank == -1) starpu_mpi_comm_rank(MPI_COMM_WORLD, &_starpu_debug_rank); \ + fprintf(stderr, "[%d][starpu_mpi][%s:%d] " fmt , _starpu_debug_rank, __starpu_func__ , __LINE__ ,## __VA_ARGS__); \ + fflush(stderr); } while(0) + +#ifdef STARPU_MPI_EXTRA_VERBOSE +# define _STARPU_MPI_LOG_IN() do { if (!_starpu_silent) { \ + if (_starpu_debug_rank == -1) starpu_mpi_comm_rank(MPI_COMM_WORLD, &_starpu_debug_rank); \ + fprintf(stderr, "%*s[%d][starpu_mpi][%s:%d] -->\n", (_starpu_debug_rank+1)*4, "", _starpu_debug_rank, __starpu_func__ , __LINE__); \ + fflush(stderr); }} while(0) +# define _STARPU_MPI_LOG_OUT() do { if (!_starpu_silent) { \ + if (_starpu_debug_rank == -1) starpu_mpi_comm_rank(MPI_COMM_WORLD, &_starpu_debug_rank); \ + fprintf(stderr, "%*s[%d][starpu_mpi][%s:%d] <--\n", (_starpu_debug_rank+1)*4, "", _starpu_debug_rank, __starpu_func__, __LINE__); \ + fflush(stderr); }} while(0) +#else +# define _STARPU_MPI_LOG_IN() +# define _STARPU_MPI_LOG_OUT() +#endif + +enum _starpu_mpi_request_type +{ + SEND_REQ=0, + RECV_REQ=1, + WAIT_REQ=2, + TEST_REQ=3, + BARRIER_REQ=4, + PROBE_REQ=5, + UNKNOWN_REQ=6, +}; + +struct _starpu_mpi_node +{ + MPI_Comm comm; + int rank; +}; + +struct _starpu_mpi_node_tag +{ + struct _starpu_mpi_node node; + starpu_mpi_tag_t data_tag; +}; + +MULTILIST_CREATE_TYPE(_starpu_mpi_req, coop_sends) +/** One bag of cooperative sends */ +struct _starpu_mpi_coop_sends +{ + starpu_data_handle_t data_handle; + + /** List of send requests */ + struct _starpu_mpi_req_multilist_coop_sends reqs; + struct _starpu_mpi_data *mpi_data; + + /** Array of send requests, after sorting out */ + struct _starpu_spinlock lock; + struct _starpu_mpi_req **reqs_array; + unsigned n; + unsigned redirects_sent; + + /* Used to trace dependencies */ + long pre_sync_jobid; +}; + +/** Initialized in starpu_mpi_data_register_comm */ +struct _starpu_mpi_data +{ + int magic; + struct _starpu_mpi_node_tag node_tag; + char *cache_sent; + unsigned int cache_received; + unsigned int ft_induced_cache_received:1; + unsigned int ft_induced_cache_received_count:1; + unsigned int modified:1; // Whether the data has been modified since the registration. + + /** Array used to store the contributing nodes to this data + * when it is accessed in (MPI_)REDUX mode. */ + char* redux_map; + + /** Rendez-vous data for opportunistic cooperative sends, + * Needed to synchronize between submit thread and workers */ + struct _starpu_spinlock coop_lock; + /** Current cooperative send bag */ + struct _starpu_mpi_coop_sends *coop_sends; + + /** When provided, wait the given number of sends to start a coop, instead of just waiting that data are ready */ + unsigned nb_future_sends; +}; + +struct _starpu_mpi_data *_starpu_mpi_data_get(starpu_data_handle_t data_handle); + +struct _starpu_mpi_req_backend; +struct _starpu_mpi_req; +LIST_TYPE(_starpu_mpi_req, + /** description of the data at StarPU level */ + starpu_data_handle_t data_handle; + + int prio; + unsigned node; /* Which StarPU memory node this will read from / write to */ + + /** description of the data to be sent/received */ + MPI_Datatype datatype; + char *datatype_name; + void *ptr; + starpu_ssize_t count; + int registered_datatype; // = 0: datatype is not predefined by StarPU; = 1: otherwise; initialized with -1 + + struct _starpu_mpi_req_backend *backend; + + /** who are we talking to ? */ + struct _starpu_mpi_node_tag node_tag; + void (*func)(struct _starpu_mpi_req *); + + MPI_Status *status; + struct _starpu_mpi_req_multilist_coop_sends coop_sends; + struct _starpu_mpi_coop_sends *coop_sends_head; + + int *flag; + unsigned sync; + + /** Amount of memory pre-reserved for the reception buffer */ + size_t reserved_size; + + int ret; + + /** 0 send, 1 recv */ + enum _starpu_mpi_request_type request_type; + + unsigned submitted; + unsigned completed; + unsigned posted; + + /** in the case of detached requests */ + int detached; + void *callback_arg; + void (*callback)(void *); + + int sequential_consistency; + + long pre_sync_jobid; + long post_sync_jobid; + +#ifdef STARPU_SIMGRID + MPI_Status status_store; + starpu_pthread_queue_t queue; + unsigned done; +#endif +); +PRIO_LIST_TYPE(_starpu_mpi_req, prio) + +MULTILIST_CREATE_INLINES(struct _starpu_mpi_req, _starpu_mpi_req, coop_sends) + +/** To be called before actually queueing a request, so the communication layer knows it has something to look at */ +void _starpu_mpi_req_willpost(struct _starpu_mpi_req *req); +/** To be called to actually submit the request */ +void _starpu_mpi_submit_ready_request(void *arg); +/** To be called when request is completed */ +void _starpu_mpi_release_req_data(struct _starpu_mpi_req *req); + +void _starpu_mpi_isend_irecv_common(struct _starpu_mpi_req *req, enum starpu_data_access_mode mode, int sequential_consistency); + +#if 0 +/** Build a communication tree. Called before _starpu_mpi_coop_send is ever called. coop_sends->lock is held. */ +void _starpu_mpi_coop_sends_build_tree(struct _starpu_mpi_coop_sends *coop_sends); +#endif +/** Try to merge with send request with other send requests */ +void _starpu_mpi_coop_send(starpu_data_handle_t data_handle, struct _starpu_mpi_req *req, enum starpu_data_access_mode mode, int sequential_consistency); + +/** Actually submit the coop_sends bag to MPI. + * At least one of submit_control or submit_data is true. + * _starpu_mpi_submit_coop_sends may be called either + * - just once with both parameters being true, + * - or once with submit_control being true (data is not available yet, but we + * can send control messages), and a second time with submit_data being true. Or + * the converse, possibly on different threads, etc. + */ +void _starpu_mpi_submit_coop_sends(struct _starpu_mpi_coop_sends *coop_sends, int submit_control, int submit_data); + +/* + * Fills post_sync_jobid with the reduction synchronization task jobid + */ +void _starpu_mpi_redux_fill_post_sync_jobid(const void * const redux_data_args, long * const post_sync_jobid); + +void _starpu_mpi_request_init(struct _starpu_mpi_req **req); +struct _starpu_mpi_req * _starpu_mpi_request_fill(starpu_data_handle_t data_handle, + int srcdst, starpu_mpi_tag_t data_tag, MPI_Comm comm, + unsigned detached, unsigned sync, int prio, void (*callback)(void *), void *arg, + enum _starpu_mpi_request_type request_type, void (*func)(struct _starpu_mpi_req *), + int sequential_consistency, + int is_internal_req, + starpu_ssize_t count); + +void _starpu_mpi_request_destroy(struct _starpu_mpi_req *req); + +char *_starpu_mpi_request_type(enum _starpu_mpi_request_type request_type); + +struct _starpu_mpi_req *_starpu_mpi_irecv_common(starpu_data_handle_t data_handle, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm, unsigned detached, unsigned sync, void (*callback)(void *), void *arg, int sequential_consistency, int is_internal_req, starpu_ssize_t count, int prio); + +int _starpu_mpi_choose_node(starpu_data_handle_t data_handle, enum starpu_data_access_mode mode); + +void _starpu_mpi_data_flush(starpu_data_handle_t data_handle); + +/** To be called at initialization to set up the tags upper bound */ +void _starpu_mpi_tags_init(void); + +struct _starpu_mpi_argc_argv +{ + int initialize_mpi; + int *argc; + char ***argv; + MPI_Comm comm; + /** Fortran argc */ + int fargc; + /** Fortran argv */ + char **fargv; + int rank; + int world_size; +}; + +/** + * Specific functions to backend implementation + */ +struct _starpu_mpi_backend +{ + void (*_starpu_mpi_backend_init)(struct starpu_conf *conf); + void (*_starpu_mpi_backend_shutdown)(void); + int (*_starpu_mpi_backend_reserve_core)(void); + void (*_starpu_mpi_backend_request_init)(struct _starpu_mpi_req *req); + void (*_starpu_mpi_backend_request_fill)(struct _starpu_mpi_req *req, int is_internal_req); + void (*_starpu_mpi_backend_request_destroy)(struct _starpu_mpi_req *req); + void (*_starpu_mpi_backend_data_clear)(starpu_data_handle_t data_handle); + void (*_starpu_mpi_backend_data_register)(starpu_data_handle_t data_handle, starpu_mpi_tag_t data_tag); + void (*_starpu_mpi_backend_comm_register)(MPI_Comm comm); + + int (*_starpu_mpi_backend_progress_init)(struct _starpu_mpi_argc_argv *argc_argv); + void (*_starpu_mpi_backend_progress_shutdown)(void **value); +#ifdef STARPU_SIMGRID + void (*_starpu_mpi_backend_wait_for_initialization)(); +#endif + + int (*_starpu_mpi_backend_barrier)(MPI_Comm comm); + int (*_starpu_mpi_backend_wait_for_all)(MPI_Comm comm); + int (*_starpu_mpi_backend_wait)(starpu_mpi_req *public_req, MPI_Status *status); + int (*_starpu_mpi_backend_test)(starpu_mpi_req *public_req, int *flag, MPI_Status *status); + + void (*_starpu_mpi_backend_isend_size_func)(struct _starpu_mpi_req *req); + void (*_starpu_mpi_backend_irecv_size_func)(struct _starpu_mpi_req *req); +}; + +extern struct _starpu_mpi_backend _mpi_backend; +#ifdef __cplusplus +} +#endif + +#endif // __STARPU_MPI_PRIVATE_H__ diff --git a/mpi/src/starpu_mpi_req.c b/mpi/src/starpu_mpi_req.c new file mode 100644 index 0000000..661db0a --- /dev/null +++ b/mpi/src/starpu_mpi_req.c @@ -0,0 +1,115 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2017-2017 Guillaume Beauchamp + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include + +void _starpu_mpi_request_init(struct _starpu_mpi_req **req) +{ + _STARPU_MPI_CALLOC(*req, 1, sizeof(struct _starpu_mpi_req)); + + /* Do not remove the commented lines, we keep them to make sure we did not forget to initialize a field */ + /* Initialize the request structure */ + //(*req)->data_handle = NULL; + //(*req)->prio = 0; + (*req)->node = (unsigned) -1; + + //(*req)->datatype = 0; + //(*req)->datatype_name = NULL; + //(*req)->ptr = NULL; + (*req)->count = -1; + (*req)->registered_datatype = -1; + + (*req)->node_tag.node.rank = -1; + (*req)->node_tag.data_tag = -1; + //(*req)->node_tag.node.comm = 0; + + //(*req)->func = NULL; + + //(*req)->status = NULL; + //(*req)->flag = NULL; + _starpu_mpi_req_multilist_init_coop_sends(*req); + + (*req)->ret = -1; + + (*req)->request_type = UNKNOWN_REQ; + + //(*req)->submitted = 0; + //(*req)->completed = 0; + //(*req)->posted = 0; + + //(*req)->sync = 0; + (*req)->detached = -1; + //(*req)->callback = NULL; + //(*req)->callback_arg = NULL; + + (*req)->sequential_consistency = 1; + (*req)->pre_sync_jobid = -1; + (*req)->post_sync_jobid = -1; + +#ifdef STARPU_SIMGRID + starpu_pthread_queue_init(&((*req)->queue)); + starpu_pthread_queue_register(&_starpu_mpi_thread_wait, &((*req)->queue)); + //(*req)->done = 0; +#endif + _mpi_backend._starpu_mpi_backend_request_init(*req); +} + +struct _starpu_mpi_req *_starpu_mpi_request_fill(starpu_data_handle_t data_handle, + int srcdst, starpu_mpi_tag_t data_tag, MPI_Comm comm, + unsigned detached, unsigned sync, int prio, void (*callback)(void *), void *arg, + enum _starpu_mpi_request_type request_type, void (*func)(struct _starpu_mpi_req *), + int sequential_consistency, + int is_internal_req, + starpu_ssize_t count) +{ + struct _starpu_mpi_req *req; + + /* Initialize the request structure */ + _starpu_mpi_request_init(&req); + req->request_type = request_type; + /* prio_list is sorted by increasing values */ + if (_starpu_mpi_use_prio) + req->prio = prio; + req->data_handle = data_handle; + req->node_tag.node.rank = srcdst; + req->node_tag.data_tag = data_tag; + req->node_tag.node.comm = comm; + req->detached = detached; + req->sync = sync; + req->callback = callback; + req->callback_arg = arg; + req->func = func; + req->sequential_consistency = sequential_consistency; + req->count = count; + + _mpi_backend._starpu_mpi_backend_request_fill(req, is_internal_req); + + return req; +} + +void _starpu_mpi_request_destroy(struct _starpu_mpi_req *req) +{ + _mpi_backend._starpu_mpi_backend_request_destroy(req); + free(req->datatype_name); + req->datatype_name = NULL; +#ifdef STARPU_SIMGRID + starpu_pthread_queue_unregister(&_starpu_mpi_thread_wait, &req->queue); + starpu_pthread_queue_destroy(&req->queue); +#endif + free(req); +} diff --git a/mpi/src/starpu_mpi_select_node.c b/mpi/src/starpu_mpi_select_node.c new file mode 100644 index 0000000..c45fda0 --- /dev/null +++ b/mpi/src/starpu_mpi_select_node.c @@ -0,0 +1,126 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2014-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include + +#include +#include +#include +#include +#include +#include + +static int _current_policy = STARPU_MPI_NODE_SELECTION_MOST_R_DATA; +static int _last_predefined_policy = STARPU_MPI_NODE_SELECTION_MOST_R_DATA; +static starpu_mpi_select_node_policy_func_t _policies[_STARPU_MPI_NODE_SELECTION_MAX_POLICY]; + +int _starpu_mpi_select_node_with_most_data(int me, int nb_nodes, struct starpu_data_descr *descr, int nb_data); + +void _starpu_mpi_select_node_init() +{ + int i; + + _policies[STARPU_MPI_NODE_SELECTION_MOST_R_DATA] = _starpu_mpi_select_node_with_most_data; + for(i=_last_predefined_policy+1 ; i<_STARPU_MPI_NODE_SELECTION_MAX_POLICY ; i++) + _policies[i] = NULL; +} + +int starpu_mpi_node_selection_get_current_policy() +{ + return _current_policy; +} + +int starpu_mpi_node_selection_set_current_policy(int policy) +{ + STARPU_ASSERT_MSG(_policies[policy] != NULL, "Policy %d invalid.\n", policy); + _current_policy = policy; + return 0; +} + +int starpu_mpi_node_selection_register_policy(starpu_mpi_select_node_policy_func_t policy_func) +{ + int i=_last_predefined_policy+1; + // Look for a unregistered policy + while(i<_STARPU_MPI_NODE_SELECTION_MAX_POLICY) + { + if (_policies[i] == NULL) + break; + i++; + } + STARPU_ASSERT_MSG(i<_STARPU_MPI_NODE_SELECTION_MAX_POLICY, "No unused policy available. Unregister existing policies before registering a new one."); + _policies[i] = policy_func; + return i; +} + +int starpu_mpi_node_selection_unregister_policy(int policy) +{ + STARPU_ASSERT_MSG(policy > _last_predefined_policy, "Policy %d invalid. Only user-registered policies can be unregistered\n", policy); + _policies[policy] = NULL; + return 0; +} + +int _starpu_mpi_select_node_with_most_data(int me, int nb_nodes, struct starpu_data_descr *descr, int nb_data) +{ + size_t *size_on_nodes; + size_t max_size; + int i; + int xrank = 0; + + (void)me; + _STARPU_MPI_CALLOC(size_on_nodes, nb_nodes, sizeof(size_t)); + + for(i= 0 ; iops->get_size(data); + + if (rank == STARPU_MPI_PER_NODE) + /* Each of them has it */ + continue; + + if (mode & STARPU_R) + size_on_nodes[rank] += size; + + if (mode & STARPU_W) + /* Would have to transfer it back */ + size_on_nodes[rank] += size; + } + + max_size = 0; + for(i=0 ; i max_size) + { + max_size = size_on_nodes[i]; + xrank = i; + } + } + + free(size_on_nodes); + return xrank; +} + +int _starpu_mpi_select_node(int me, int nb_nodes, struct starpu_data_descr *descr, int nb_data, int policy) +{ + int ppolicy = policy == STARPU_MPI_NODE_SELECTION_CURRENT_POLICY ? _current_policy : policy; + STARPU_ASSERT_MSG(ppolicy < _STARPU_MPI_NODE_SELECTION_MAX_POLICY, "Invalid policy %d\n", ppolicy); + STARPU_ASSERT_MSG(_policies[ppolicy], "Unregistered policy %d\n", ppolicy); + starpu_mpi_select_node_policy_func_t func = _policies[ppolicy]; + return func(me, nb_nodes, descr, nb_data); +} diff --git a/mpi/src/starpu_mpi_select_node.h b/mpi/src/starpu_mpi_select_node.h new file mode 100644 index 0000000..b399bce --- /dev/null +++ b/mpi/src/starpu_mpi_select_node.h @@ -0,0 +1,38 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __STARPU_MPI_SELECT_NODE_H__ +#define __STARPU_MPI_SELECT_NODE_H__ + +#include + +/** @file */ + +#ifdef __cplusplus +extern "C" +{ +#endif + +#define _STARPU_MPI_NODE_SELECTION_MAX_POLICY 24 + +void _starpu_mpi_select_node_init(); +int _starpu_mpi_select_node(int me, int nb_nodes, struct starpu_data_descr *descr, int nb_data, int policy); + +#ifdef __cplusplus +} +#endif + +#endif // __STARPU_MPI_SELECT_NODE_H__ diff --git a/mpi/src/starpu_mpi_stats.c b/mpi/src/starpu_mpi_stats.c new file mode 100644 index 0000000..48d9082 --- /dev/null +++ b/mpi/src/starpu_mpi_stats.c @@ -0,0 +1,210 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include + +/* measure the amount of data transfers between each pair of MPI nodes */ +static size_t *comm_amount = NULL; +static size_t comm_amount_memnode[STARPU_MAXNODES]; +static int world_size; +static int stats_enabled = 0; +static int nb_coop; +static int* nb_nodes_per_coop = NULL; +static double time_init; +static MPI_Comm comm_init; +static int nb_sends = 0; +static size_t max_sent_size = 0; +#ifdef STARPU_USE_MPI_NMAD +static struct _starpu_spinlock stats_lock; +#endif + +void _starpu_mpi_comm_amounts_init(MPI_Comm comm) +{ + if (stats_enabled != 1) + { + time_init = starpu_timing_now(); + comm_init = comm; + stats_enabled = starpu_getenv_number("STARPU_MPI_STATS"); + if (stats_enabled == -1) + { + /* Legacy env var */ + stats_enabled = starpu_getenv_number("STARPU_COMM_STATS"); + } + if (stats_enabled == -1) + { + stats_enabled = 0; + } + } + if (stats_enabled == 0) + return; + + _STARPU_DISP("Warning: StarPU is executed with STARPU_MPI_STATS=1, which slows down a bit\n"); + + starpu_mpi_comm_size(comm, &world_size); + _STARPU_MPI_DEBUG(1, "allocating for %d nodes\n", world_size); + + _STARPU_MPI_CALLOC(comm_amount, world_size, sizeof(size_t)); + + nb_coop = 0; + _STARPU_MPI_CALLOC(nb_nodes_per_coop, world_size, sizeof(int)); + +#ifdef STARPU_USE_MPI_NMAD + _starpu_spin_init(&stats_lock); +#endif +} + +void _starpu_mpi_comm_stats_disable() +{ + stats_enabled = 0; +} + +void _starpu_mpi_comm_stats_enable() +{ + stats_enabled = 1; + if (comm_amount == NULL) + { + _starpu_mpi_comm_amounts_init(comm_init); + } +} + +void _starpu_mpi_comm_amounts_shutdown() +{ + if (comm_amount) + { + free(comm_amount); + free(nb_nodes_per_coop); + comm_amount = NULL; + nb_nodes_per_coop = NULL; + +#ifdef STARPU_USE_MPI_NMAD + _starpu_spin_destroy(&stats_lock); +#endif + } +} + +void _starpu_mpi_comm_amounts_inc(MPI_Comm comm, unsigned memnode, unsigned dst, MPI_Datatype datatype, int count) +{ + int src, size; + + if (stats_enabled == 0) + return; + + starpu_mpi_comm_rank(comm, &src); + MPI_Type_size(datatype, &size); + + _STARPU_MPI_DEBUG(1, "[%d] adding %d to %d, from node %d\n", src, count*size, dst, memnode); + + STARPU_ASSERT(memnode < starpu_memory_nodes_get_count()); + +#ifdef STARPU_USE_MPI_NMAD + /* With NewMadeleine, the send requests are triggered from the workers, so + * this is a critical section. */ + _starpu_spin_lock(&stats_lock); +#endif + + comm_amount[dst] += count*size; + comm_amount_memnode[memnode] += count*size; + + if (((size_t) count*size) > max_sent_size) + { + max_sent_size = count*size; + } + + nb_sends++; + +#ifdef STARPU_USE_MPI_NMAD + _starpu_spin_unlock(&stats_lock); +#endif +} + +void _starpu_mpi_nb_coop_inc(int nb_nodes_in_coop) +{ + if (stats_enabled == 0) + return; + + assert(nb_nodes_in_coop > 0); + assert(nb_nodes_in_coop < world_size); + +#ifdef STARPU_USE_MPI_NMAD + STARPU_ATTRIBUTE_UNUSED size_t dummy = STARPU_ATOMIC_ADD(&nb_coop, 1); + dummy = STARPU_ATOMIC_ADD(&nb_nodes_per_coop[nb_nodes_in_coop-1], 1); +#else + nb_coop++; + nb_nodes_per_coop[nb_nodes_in_coop-1]++; +#endif +} + +void starpu_mpi_comm_stats_retrieve(size_t *comm_stats) +{ + if (comm_amount) + memcpy(comm_stats, comm_amount, world_size * sizeof(size_t)); +} + +void _starpu_mpi_comm_amounts_display(FILE *stream, int node) +{ + int dst; + size_t sum = 0; + + if (comm_amount == NULL) + return; + + double time = starpu_timing_now() - time_init; + + for (dst = 0; dst < world_size; dst++) + { + sum += comm_amount[dst]; + } + + fprintf(stream, "\n[starpu_comm_stats][%d] TOTAL:\t%f B\t%f MB\t %f B/s\t %f MB/s\n", node, (float)sum, (float)sum/1024/1024, (float)sum/(float)time, (float)sum/1204/1024/(float)time); + + fprintf(stream, "[starpu_comm_stats][%d] nb_sends: %d\n", node, nb_sends); + fprintf(stream, "[starpu_comm_stats][%d] max_sent_size: %ld\n", node, max_sent_size); + fprintf(stream, "[starpu_comm_stats][%d] average sent size: %ld\n", node, nb_sends ? sum / nb_sends : 0); + + for (dst = 0; dst < world_size; dst++) + { + if (comm_amount[dst]) + fprintf(stream, "[starpu_comm_stats][%d:%d]\t%f B\t%f MB\t %f B/s\t %f MB/s\n", + node, dst, (float)comm_amount[dst], ((float)comm_amount[dst])/(1024*1024), + (float)comm_amount[dst]/(float)time, ((float)comm_amount[dst])/(1024*1024)/(float)time); + } + + char name[32]; + unsigned xdst; + for (xdst = 0; xdst < starpu_memory_nodes_get_count(); xdst++) + { + if (comm_amount_memnode[xdst]) + { + starpu_memory_node_get_name(xdst, name, sizeof(name)); + fprintf(stream, "[starpu_comm_stats_memnode][%d:%s]\t%f B\t%f MB\t %f B/s\t %f MB/s\n", + node, name, (float)comm_amount_memnode[xdst], ((float)comm_amount_memnode[xdst])/(1024*1024), + (float)comm_amount_memnode[xdst]/(float)time, ((float)comm_amount_memnode[xdst])/(1024*1024)/(float)time); + } + } + + fprintf(stream, "[starpu_comm_stats][%d] NB_COOP: %d\n", node, nb_coop); + for (dst = 0; dst < world_size; dst++) + { + if (nb_nodes_per_coop[dst] != 0) + { + fprintf(stream, "[starpu_comm_stats][%d]\t %d in coop: %d (%f%%)\n", node, dst+1, nb_nodes_per_coop[dst], nb_coop ? (((float) nb_nodes_per_coop[dst]) / nb_coop) * 100. : 0); + } + } +} diff --git a/mpi/src/starpu_mpi_stats.h b/mpi/src/starpu_mpi_stats.h new file mode 100644 index 0000000..159b62a --- /dev/null +++ b/mpi/src/starpu_mpi_stats.h @@ -0,0 +1,43 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __STARPU_MPI_STATS_H__ +#define __STARPU_MPI_STATS_H__ + +#include +#include +#include + +/** @file */ + +#ifdef __cplusplus +extern "C" +{ +#endif + +void _starpu_mpi_comm_amounts_init(MPI_Comm comm); +void _starpu_mpi_comm_stats_disable(void); +void _starpu_mpi_comm_stats_enable(void); +void _starpu_mpi_comm_amounts_shutdown(void); +void _starpu_mpi_comm_amounts_inc(MPI_Comm comm, unsigned memnode, unsigned dst, MPI_Datatype datatype, int count); +void _starpu_mpi_nb_coop_inc(int nb_nodes_in_coop); +void _starpu_mpi_comm_amounts_display(FILE *stream, int node); + +#ifdef __cplusplus +} +#endif + +#endif // __STARPU_MPI_STATS_H__ diff --git a/mpi/src/starpu_mpi_tags.c b/mpi/src/starpu_mpi_tags.c new file mode 100644 index 0000000..ff360c0 --- /dev/null +++ b/mpi/src/starpu_mpi_tags.c @@ -0,0 +1,141 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2021-2021 Federal University of Rio Grande do Sul (UFRGS) + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +#include +#include + +/** + * @brief Structure to store tags ranges + * + * List structure to manage the set of available tags. + */ +struct starpu_tags_range +{ + int64_t min; /**< Minimal value in the range */ + int64_t max; /**< Maximal value in the range */ + struct starpu_tags_range *next; /**< Pointer to the following range */ +}; + +static struct starpu_tags_range *cst_first = NULL; + +/** + * @brief StarPU tag upper bound + */ +static int64_t _starpu_tags_ub = 0; + +void _starpu_mpi_tags_init(void) +{ + if (!_starpu_tags_ub) + { + int ok = 0; + void *tag_ub_p = NULL; + + starpu_mpi_comm_get_attr(MPI_COMM_WORLD, STARPU_MPI_TAG_UB, &tag_ub_p, &ok); + _starpu_tags_ub = (uint64_t)((intptr_t)tag_ub_p); + + STARPU_ASSERT_MSG(ok, "Failed to get the STARPU_MPI_TAG_UB attribute\n"); + } +} + +int64_t starpu_mpi_tags_allocate(int64_t nbtags) +{ + struct starpu_tags_range *new; + struct starpu_tags_range *prev = NULL; + struct starpu_tags_range *current = cst_first; + int64_t min = 0; + int64_t max = (current == NULL) ? _starpu_tags_ub : current->min; + + if (nbtags == 0) + { + return -1; + } + STARPU_ASSERT(_starpu_tags_ub != 0); /* StarPU tag must be initialized */ + + while (((max - min) < nbtags) && (current != NULL)) + { + min = current->max; + prev = current; + current = current->next; + max = (current == NULL) ? _starpu_tags_ub : current->min; + } + + if ((max - min) < nbtags) + { + _STARPU_ERROR("No space left in tags.\n" ); + return -1; + } + + _STARPU_MALLOC(new, sizeof(struct starpu_tags_range)); + new->min = min; + new->max = min + nbtags; + new->next = current; + if (prev == NULL) + { + cst_first = new; + } + else + { + STARPU_ASSERT(prev->next == current); + prev->next = new; + } + + _STARPU_MPI_DEBUG(0, "Allocates tag range %ld - %ld\n", min, min + nbtags); + + STARPU_ASSERT(cst_first != NULL); + return new->min; +} + +void starpu_mpi_tags_free(int64_t min) +{ + struct starpu_tags_range *prev = NULL; + struct starpu_tags_range *current = cst_first; + + STARPU_ASSERT(cst_first != NULL); /* At least one range must be registered */ + + while ((current != NULL) && (current->min < min)) + { + prev = current; + current = current->next; + } + + if (current == NULL) + { + _STARPU_ERROR("Failed to release the tag range starting by %ld", min); + return; + } + + STARPU_ASSERT(current != NULL); + STARPU_ASSERT(current->min == min); + + if (prev) + { + prev->next = current->next; + } + else + { + STARPU_ASSERT(current == cst_first); + cst_first = current->next; + } + + _STARPU_MPI_DEBUG(0, "Free tag range %ld - %ld\n", current->min, current->max); + + free(current); + + return; +} diff --git a/mpi/src/starpu_mpi_task_insert.c b/mpi/src/starpu_mpi_task_insert.c new file mode 100644 index 0000000..ab43918 --- /dev/null +++ b/mpi/src/starpu_mpi_task_insert.c @@ -0,0 +1,1224 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2021 Federal University of Rio Grande do Sul (UFRGS) + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "starpu_mpi_task_insert.h" + +#define _SEND_DATA(data, mode, dest, data_tag, prio, comm, callback, arg) \ + do { \ + if (mode & STARPU_SSEND) \ + return starpu_mpi_issend_detached_prio(data, dest, data_tag, prio, comm, callback, arg); \ + else \ + return starpu_mpi_isend_detached_prio(data, dest, data_tag, prio, comm, callback, arg); \ + } while (0) + +static void (*pre_submit_hook)(struct starpu_task *task) = NULL; + +/* reduction wrap-up */ +// entry in the table +struct _starpu_redux_data_entry +{ + UT_hash_handle hh; + starpu_data_handle_t data_handle; +}; +// the table +static struct _starpu_redux_data_entry *_redux_data = NULL; + +void _starpu_mpi_pre_submit_hook_call(struct starpu_task *task) +{ + if (pre_submit_hook) + pre_submit_hook(task); +} + +int starpu_mpi_pre_submit_hook_register(void (*f)(struct starpu_task *)) +{ + if (pre_submit_hook) + _STARPU_MSG("Warning: a pre_submit_hook has already been registered. Please check if you really want to erase the previously registered hook.\n"); + pre_submit_hook = f; + return 0; +} + +int starpu_mpi_pre_submit_hook_unregister() +{ + pre_submit_hook = NULL; + return 0; +} + +int _starpu_mpi_find_executee_node(starpu_data_handle_t data, enum starpu_data_access_mode mode, int me, int *do_execute, int *inconsistent_execute, int *xrank) +{ + if (mode & STARPU_W || mode & STARPU_REDUX) + { + if (!data) + { + /* We don't have anything allocated for this. + * The application knows we won't do anything + * about this task */ + /* Yes, the app could actually not call + * task_insert at all itself, this is just a + * safeguard. */ + _STARPU_MPI_DEBUG(3, "oh oh\n"); + _STARPU_MPI_LOG_OUT(); + return -EINVAL; + } + + int mpi_rank = starpu_mpi_data_get_rank(data); + if (mpi_rank == -1) + { + _STARPU_ERROR("Data %p with mode STARPU_W needs to have a valid rank", data); + } + + if (*xrank == -1) + { + // No node has been selected yet + *xrank = mpi_rank; + _STARPU_MPI_DEBUG(100, "Codelet is going to be executed by node %d\n", *xrank); + *do_execute = mpi_rank == STARPU_MPI_PER_NODE || (mpi_rank == me); + } + else if (mpi_rank != *xrank) + { + _STARPU_MPI_DEBUG(100, "Another node %d had already been selected to execute the codelet, can't now set %d\n", *xrank, mpi_rank); + *inconsistent_execute = 1; + if (*xrank == STARPU_MPI_PER_NODE) + _STARPU_ERROR("Data %p has rank %d but we had STARPU_MPI_PER_NODE data before that", data, mpi_rank); + else if (mpi_rank == STARPU_MPI_PER_NODE) + _STARPU_ERROR("Data %p has rank STARPU_MPI_PER_NODE but we had non-STARPU_MPI_PER_NODE data before that (rank %d)", data, *xrank); + } + } + _STARPU_MPI_DEBUG(100, "Executing: inconsistent=%d, do_execute=%d, xrank=%d\n", *inconsistent_execute, *do_execute, *xrank); + return 0; +} + +int _starpu_mpi_exchange_data_before_execution(starpu_data_handle_t data, enum starpu_data_access_mode mode, int me, int xrank, int do_execute, int prio, MPI_Comm comm) +{ + if (data && xrank == STARPU_MPI_PER_NODE) + { + STARPU_ASSERT_MSG(starpu_mpi_data_get_rank(data) == STARPU_MPI_PER_NODE, "If task is replicated, it has to access only per-node data"); + } + if (data && mode & STARPU_R && !(mode & STARPU_MPI_REDUX)) + { + int mpi_rank = starpu_mpi_data_get_rank(data); + starpu_mpi_tag_t data_tag = starpu_mpi_data_get_tag(data); + if (mpi_rank == -1) + { + _STARPU_ERROR("StarPU needs to be told the MPI rank of this data, using starpu_mpi_data_register\n"); + } + + if (do_execute && mpi_rank != STARPU_MPI_PER_NODE && mpi_rank != me) + { + /* The node is going to execute the codelet, but it does not own the data, it needs to receive the data from the owner node */ + int already_received = starpu_mpi_cached_receive_set(data); + if (already_received == 0) + { + if (data_tag == -1) + _STARPU_ERROR("StarPU needs to be told the MPI tag of this data, using starpu_mpi_data_register\n"); + _STARPU_MPI_DEBUG(1, "Receiving data %p from %d with prio %d\n", data, mpi_rank, prio); + int ret = starpu_mpi_irecv_detached_prio(data, mpi_rank, data_tag, prio, comm, NULL, NULL); + if (ret) + return ret; + } + // else the node has already received the data + } + + if (!do_execute && mpi_rank == me) + { + /* The node owns the data, but another node is going to execute the codelet, the node needs to send the data to the executee node. */ + int already_sent = starpu_mpi_cached_send_set(data, xrank); + if (already_sent == 0) + { + if (data_tag == -1) + _STARPU_ERROR("StarPU needs to be told the MPI tag of this data, using starpu_mpi_data_register\n"); + _STARPU_MPI_DEBUG(1, "Sending data %p to %d with prio %d\n", data, xrank, prio); + _SEND_DATA(data, mode, xrank, data_tag, prio, comm, NULL, NULL); + } + // Else the data has already been sent + } + } + return 0; +} + +static +int _starpu_mpi_exchange_data_after_execution(starpu_data_handle_t data, enum starpu_data_access_mode mode, int me, int xrank, int do_execute, int prio, MPI_Comm comm) +{ + if (mode & STARPU_W && !(mode & STARPU_MPI_REDUX)) + { + int mpi_rank = starpu_mpi_data_get_rank(data); + starpu_mpi_tag_t data_tag = starpu_mpi_data_get_tag(data); + struct _starpu_mpi_data* mpi_data = _starpu_mpi_data_get(data); + if(mpi_rank == -1) + { + _STARPU_ERROR("StarPU needs to be told the MPI rank of this data, using starpu_mpi_data_register\n"); + } + mpi_data->modified=1; + if (mpi_rank == STARPU_MPI_PER_NODE) + { + mpi_rank = me; + } + if (mpi_rank == me) + { + if (xrank != -1 && (xrank != STARPU_MPI_PER_NODE && me != xrank)) + { + _STARPU_MPI_DEBUG(1, "Receive data %p back from the task %d which executed the codelet with prio %d...\n", data, xrank, prio); + if(data_tag == -1) + _STARPU_ERROR("StarPU needs to be told the MPI tag of this data, using starpu_mpi_data_register\n"); + int ret = starpu_mpi_irecv_detached_prio(data, xrank, data_tag, prio, comm, NULL, NULL); + if (ret) + return ret; + } + } + else if (do_execute) + { + if(data_tag == -1) + _STARPU_ERROR("StarPU needs to be told the MPI tag of this data, using starpu_mpi_data_register\n"); + _STARPU_MPI_DEBUG(1, "Send data %p back to its owner %d with prio %d...\n", data, mpi_rank, prio); + _SEND_DATA(data, mode, mpi_rank, data_tag, prio, comm, NULL, NULL); + } + } + return 0; +} + +static +void _starpu_mpi_clear_data_after_execution(starpu_data_handle_t data, enum starpu_data_access_mode mode, int me, int do_execute) +{ + if (_starpu_cache_enabled) + { + if ((mode & STARPU_W && !(mode & STARPU_MPI_REDUX)) || mode & STARPU_REDUX) + { + /* The data has been modified, it MUST be removed from the cache */ + starpu_mpi_cached_send_clear(data); + starpu_mpi_cached_receive_clear(data); + } + } + else + { + /* We allocated a temporary buffer for the received data, now drop it */ + if ((mode & STARPU_R && !(mode & STARPU_MPI_REDUX)) && do_execute) + { + int mpi_rank = starpu_mpi_data_get_rank(data); + if (mpi_rank == STARPU_MPI_PER_NODE) + { + mpi_rank = me; + } + if (mpi_rank != me && mpi_rank != -1) + { + starpu_data_invalidate_submit(data); + } + } + } +} + +static +int _starpu_mpi_task_decode_v(struct starpu_codelet *codelet, int me, int nb_nodes, int *xrank, int *do_execute, struct starpu_data_descr **descrs_p, int *nb_data_p, int *prio_p, va_list varg_list) +{ + /* XXX: _fstarpu_mpi_task_decode_v needs to be updated at the same time */ + va_list varg_list_copy; + int inconsistent_execute = 0; + int arg_type; + int node_selected = 0; + int nb_allocated_data = 16; + struct starpu_data_descr *descrs; + int nb_data; + int prio = 0; + int select_node_policy = STARPU_MPI_NODE_SELECTION_CURRENT_POLICY; + + _STARPU_TRACE_TASK_MPI_DECODE_START(); + + _STARPU_MPI_MALLOC(descrs, nb_allocated_data * sizeof(struct starpu_data_descr)); + nb_data = 0; + *do_execute = -1; + *xrank = -1; + + va_copy(varg_list_copy, varg_list); + while ((arg_type = va_arg(varg_list_copy, int)) != 0) + { + int arg_type_nocommute = arg_type & ~STARPU_COMMUTE; + + if (arg_type==STARPU_EXECUTE_ON_NODE) + { + int rank = va_arg(varg_list_copy, int); + if (rank != -1) + { + *xrank = rank; + if (node_selected == 0) + { + _STARPU_MPI_DEBUG(100, "Executing on node %d\n", *xrank); + *do_execute = 1; + node_selected = 1; + inconsistent_execute = 0; + } + } + } + else if (arg_type==STARPU_EXECUTE_ON_DATA) + { + starpu_data_handle_t data = va_arg(varg_list_copy, starpu_data_handle_t); + if (node_selected == 0) + { + *xrank = starpu_mpi_data_get_rank(data); + STARPU_ASSERT_MSG(*xrank != -1, "Rank of the data must be set using starpu_mpi_data_register() or starpu_data_set_rank()"); + _STARPU_MPI_DEBUG(100, "Executing on data node %d\n", *xrank); + STARPU_ASSERT_MSG(*xrank <= nb_nodes, "Node %d to execute codelet is not a valid node (%d)", *xrank, nb_nodes); + *do_execute = 1; + node_selected = 1; + inconsistent_execute = 0; + } + } + else if (arg_type==STARPU_NONE) + { + (void)va_arg(varg_list_copy, starpu_data_handle_t); + } + else if (arg_type_nocommute & STARPU_R || arg_type_nocommute & STARPU_W || arg_type_nocommute & STARPU_RW || arg_type & STARPU_SCRATCH || arg_type & STARPU_REDUX || arg_type & STARPU_MPI_REDUX) + { + starpu_data_handle_t data = va_arg(varg_list_copy, starpu_data_handle_t); + enum starpu_data_access_mode mode = (enum starpu_data_access_mode) arg_type; + if (node_selected == 0) + { + int ret = _starpu_mpi_find_executee_node(data, mode, me, do_execute, &inconsistent_execute, xrank); + if (ret == -EINVAL) + { + free(descrs); + va_end(varg_list_copy); + _STARPU_TRACE_TASK_MPI_DECODE_END(); + return ret; + } + } + if (nb_data >= nb_allocated_data) + { + nb_allocated_data *= 2; + _STARPU_MPI_REALLOC(descrs, nb_allocated_data * sizeof(struct starpu_data_descr)); + } + descrs[nb_data].handle = data; + descrs[nb_data].mode = mode; + nb_data ++; + } + else if (arg_type == STARPU_DATA_ARRAY) + { + starpu_data_handle_t *data = va_arg(varg_list_copy, starpu_data_handle_t *); + int nb_handles = va_arg(varg_list_copy, int); + int i; + + for(i=0 ; inbuffers == STARPU_VARIABLE_NBUFFERS || nb_data < codelet->nbuffers, "Too many data passed to starpu_mpi_task_insert"); + enum starpu_data_access_mode mode = STARPU_CODELET_GET_MODE(codelet, nb_data); + if (node_selected == 0) + { + int ret = _starpu_mpi_find_executee_node(data[i], mode, me, do_execute, &inconsistent_execute, xrank); + if (ret == -EINVAL) + { + free(descrs); + va_end(varg_list_copy); + _STARPU_TRACE_TASK_MPI_DECODE_END(); + return ret; + } + } + if (nb_data >= nb_allocated_data) + { + nb_allocated_data *= 2; + _STARPU_MPI_REALLOC(descrs, nb_allocated_data * sizeof(struct starpu_data_descr)); + } + descrs[nb_data].handle = data[i]; + descrs[nb_data].mode = mode; + nb_data ++; + } + } + else if (arg_type == STARPU_DATA_MODE_ARRAY) + { + struct starpu_data_descr *_descrs = va_arg(varg_list_copy, struct starpu_data_descr*); + int nb_handles = va_arg(varg_list_copy, int); + int i; + + for(i=0 ; i= nb_allocated_data) + { + nb_allocated_data *= 2; + _STARPU_MPI_REALLOC(descrs, nb_allocated_data * sizeof(struct starpu_data_descr)); + } + descrs[nb_data].handle = _descrs[i].handle; + descrs[nb_data].mode = mode; + nb_data ++; + } + } + else if (arg_type==STARPU_VALUE) + { + (void)va_arg(varg_list_copy, void *); + (void)va_arg(varg_list_copy, size_t); + } + else if (arg_type==STARPU_CL_ARGS) + { + (void)va_arg(varg_list_copy, void *); + (void)va_arg(varg_list_copy, size_t); + } + else if (arg_type==STARPU_CL_ARGS_NFREE) + { + (void)va_arg(varg_list_copy, void *); + (void)va_arg(varg_list_copy, size_t); + } + else if (arg_type==STARPU_TASK_DEPS_ARRAY) + { + (void)va_arg(varg_list_copy, unsigned); + (void)va_arg(varg_list_copy, struct starpu_task **); + } + else if (arg_type==STARPU_TASK_END_DEPS_ARRAY) + { + (void)va_arg(varg_list_copy, unsigned); + (void)va_arg(varg_list_copy, struct starpu_task **); + } + else if (arg_type==STARPU_CALLBACK) + { + (void)va_arg(varg_list_copy, _starpu_callback_func_t); + } + else if (arg_type==STARPU_CALLBACK_WITH_ARG) + { + (void)va_arg(varg_list_copy, _starpu_callback_func_t); + (void)va_arg(varg_list_copy, void *); + } + else if (arg_type==STARPU_CALLBACK_WITH_ARG_NFREE) + { + (void)va_arg(varg_list_copy, _starpu_callback_func_t); + (void)va_arg(varg_list_copy, void *); + } + else if (arg_type==STARPU_CALLBACK_ARG) + { + (void)va_arg(varg_list_copy, void *); + } + else if (arg_type==STARPU_CALLBACK_ARG_NFREE) + { + (void)va_arg(varg_list_copy, void *); + } + else if (arg_type==STARPU_EPILOGUE_CALLBACK) + { + (void)va_arg(varg_list_copy, _starpu_callback_func_t); + } + else if (arg_type==STARPU_EPILOGUE_CALLBACK_ARG) + { + (void)va_arg(varg_list_copy, void *); + } + else if (arg_type==STARPU_PRIORITY) + { + prio = va_arg(varg_list_copy, int); + } + /* STARPU_EXECUTE_ON_NODE handled above */ + /* STARPU_EXECUTE_ON_DATA handled above */ + /* STARPU_DATA_ARRAY handled above */ + /* STARPU_DATA_MODE_ARRAY handled above */ + else if (arg_type==STARPU_TAG) + { + (void)va_arg(varg_list_copy, starpu_tag_t); + } + else if (arg_type==STARPU_HYPERVISOR_TAG) + { + (void)va_arg(varg_list_copy, int); + } + else if (arg_type==STARPU_FLOPS) + { + (void)va_arg(varg_list_copy, double); + } + else if (arg_type==STARPU_SCHED_CTX) + { + (void)va_arg(varg_list_copy, unsigned); + } + else if (arg_type==STARPU_PROLOGUE_CALLBACK) + { + (void)va_arg(varg_list_copy, _starpu_callback_func_t); + } + else if (arg_type==STARPU_PROLOGUE_CALLBACK_ARG) + { + (void)va_arg(varg_list_copy, void *); + } + else if (arg_type==STARPU_PROLOGUE_CALLBACK_ARG_NFREE) + { + (void)va_arg(varg_list_copy, void *); + } + else if (arg_type==STARPU_PROLOGUE_CALLBACK_POP) + { + (void)va_arg(varg_list_copy, _starpu_callback_func_t); + } + else if (arg_type==STARPU_PROLOGUE_CALLBACK_POP_ARG) + { + (void)va_arg(varg_list_copy, void *); + } + else if (arg_type==STARPU_PROLOGUE_CALLBACK_POP_ARG_NFREE) + { + (void)va_arg(varg_list_copy, void *); + } + else if (arg_type==STARPU_EXECUTE_WHERE) + { + // the flag is decoded and set later when + // calling function _starpu_task_insert_create() + (void)va_arg(varg_list_copy, unsigned long long); + } + else if (arg_type==STARPU_EXECUTE_ON_WORKER) + { + // the flag is decoded and set later when + // calling function _starpu_task_insert_create() + (void)va_arg(varg_list_copy, int); + } + else if (arg_type==STARPU_TAG_ONLY) + { + (void)va_arg(varg_list_copy, starpu_tag_t); + } + else if (arg_type==STARPU_NAME) + { + (void)va_arg(varg_list_copy, const char *); + } + else if (arg_type==STARPU_POSSIBLY_PARALLEL) + { + (void)va_arg(varg_list_copy, unsigned); + } + else if (arg_type==STARPU_WORKER_ORDER) + { + // the flag is decoded and set later when + // calling function _starpu_task_insert_create() + (void)va_arg(varg_list_copy, unsigned); + } + else if (arg_type==STARPU_NODE_SELECTION_POLICY) + { + select_node_policy = va_arg(varg_list_copy, int); + } + else if (arg_type==STARPU_TASK_COLOR) + { + (void)va_arg(varg_list_copy, int); + } + else if (arg_type==STARPU_TASK_SYNCHRONOUS) + { + (void)va_arg(varg_list_copy, int); + } + else if (arg_type==STARPU_TRANSACTION) + { + (void)va_arg(varg_list_copy, struct starpu_transaction *); + } + else if (arg_type==STARPU_HANDLES_SEQUENTIAL_CONSISTENCY) + { + (void)va_arg(varg_list_copy, char *); + } +#ifdef STARPU_BUBBLE + else if (arg_type==STARPU_BUBBLE_FUNC) + { + STARPU_ASSERT_MSG(0, "Bubbles + MPI not supported yet\n"); + (void)va_arg(varg_list,void*); + } + else if (arg_type==STARPU_BUBBLE_FUNC_ARG) + { + (void)va_arg(varg_list,void*); + } + else if (arg_type==STARPU_BUBBLE_GEN_DAG_FUNC) + { + (void)va_arg(varg_list,void*); + } + else if (arg_type==STARPU_BUBBLE_GEN_DAG_FUNC_ARG) + { + (void)va_arg(varg_list,void*); + } +#endif + else if (arg_type==STARPU_TASK_END_DEP) + { + (void)va_arg(varg_list_copy, int); + } + else if (arg_type==STARPU_TASK_WORKERIDS) + { + (void)va_arg(varg_list_copy, unsigned); + (void)va_arg(varg_list_copy, uint32_t*); + } + else if (arg_type==STARPU_SEQUENTIAL_CONSISTENCY) + { + (void)va_arg(varg_list_copy, unsigned); + } + else if (arg_type==STARPU_TASK_PROFILING_INFO) + { + (void)va_arg(varg_list_copy, struct starpu_profiling_task_info *); + } + else if (arg_type==STARPU_TASK_NO_SUBMITORDER) + { + (void)va_arg(varg_list_copy, unsigned); + } + else if (arg_type==STARPU_TASK_SCHED_DATA) + { + (void)va_arg(varg_list_copy, void *); + } + else if (arg_type==STARPU_TASK_FILE) + { + (void)va_arg(varg_list_copy, const char *); + } + else if (arg_type==STARPU_TASK_LINE) + { + (void)va_arg(varg_list_copy, int); + } + else + { + STARPU_ABORT_MSG("Unrecognized argument %d, did you perhaps forget to end arguments with 0?\n", arg_type); + } + + } + va_end(varg_list_copy); + + if (inconsistent_execute == 1 || *xrank == -1) + { + // We need to find out which node is going to execute the codelet. + _STARPU_MPI_DEBUG(100, "Different nodes are owning W data. The node to execute the codelet is going to be selected with the current selection node policy. See starpu_mpi_node_selection_set_current_policy() to change the policy, or use STARPU_EXECUTE_ON_NODE or STARPU_EXECUTE_ON_DATA to specify the node\n"); + *xrank = _starpu_mpi_select_node(me, nb_nodes, descrs, nb_data, select_node_policy); + *do_execute = *xrank == STARPU_MPI_PER_NODE || (me == *xrank); + } + else + { + _STARPU_MPI_DEBUG(100, "Inconsistent=%d - xrank=%d\n", inconsistent_execute, *xrank); + *do_execute = *xrank == STARPU_MPI_PER_NODE || (me == *xrank); + } + _STARPU_MPI_DEBUG(100, "do_execute=%d\n", *do_execute); + + *descrs_p = descrs; + *nb_data_p = nb_data; + *prio_p = prio; + + _STARPU_TRACE_TASK_MPI_DECODE_END(); + return 0; +} + +static +int _starpu_mpi_task_build_v(MPI_Comm comm, struct starpu_codelet *codelet, struct starpu_task **task, int *xrank_p, struct starpu_data_descr **descrs_p, int *nb_data_p, int *prio_p, va_list varg_list) +{ + int me, do_execute, xrank, nb_nodes; + int ret; + int i; + struct starpu_data_descr *descrs = NULL; + int nb_data; + int prio; + + _STARPU_MPI_LOG_IN(); + + starpu_mpi_comm_rank(comm, &me); + starpu_mpi_comm_size(comm, &nb_nodes); + + /* Find out whether we are to execute the data because we own the data to be written to. */ + ret = _starpu_mpi_task_decode_v(codelet, me, nb_nodes, &xrank, &do_execute, &descrs, &nb_data, &prio, varg_list); + if (ret < 0) + return ret; + + _STARPU_TRACE_TASK_MPI_PRE_START(); + /* Send and receive data as requested */ + for(i=0 ; impi_data) + { + char *redux_map = starpu_mpi_data_get_redux_map(descrs[i].handle); + if (redux_map != NULL && descrs[i].mode & STARPU_R && descrs[i].mode & ~ STARPU_REDUX && descrs[i].mode & ~ STARPU_MPI_REDUX) + { + _starpu_mpi_redux_wrapup_data(descrs[i].handle); + } + } + _starpu_mpi_exchange_data_before_execution(descrs[i].handle, descrs[i].mode, me, xrank, do_execute, prio, comm); + } + + if (xrank_p) + *xrank_p = xrank; + if (nb_data_p) + *nb_data_p = nb_data; + if (prio_p) + *prio_p = prio; + + if (descrs_p) + *descrs_p = descrs; + else + free(descrs); + + if (do_execute == 1) + { + va_list varg_list_copy; + _STARPU_MPI_DEBUG(100, "Execution of the codelet %p (%s)\n", codelet, codelet?codelet->name:NULL); + + *task = starpu_task_create(); + (*task)->cl_arg_free = 1; + (*task)->callback_arg_free = 1; + (*task)->prologue_callback_arg_free = 1; + (*task)->prologue_callback_pop_arg_free = 1; + + va_copy(varg_list_copy, varg_list); + _starpu_task_insert_create(codelet, *task, varg_list_copy); + va_end(varg_list_copy); + + if ((*task)->cl) + { + /* we suppose the current context is not going to change between now and the execution of the task */ + (*task)->sched_ctx = _starpu_sched_ctx_get_current_context(); + /* Check the type of worker(s) required by the task exist */ + if (STARPU_UNLIKELY(!_starpu_worker_exists(*task))) + { + _STARPU_MPI_DEBUG(0, "There is no worker to execute the codelet %p (%s)\n", codelet, codelet?codelet->name:NULL); + return -ENODEV; + } + + /* In case we require that a task should be explicitly + * executed on a specific worker, we make sure that the worker + * is able to execute this task. */ + if (STARPU_UNLIKELY((*task)->execute_on_a_specific_worker && !starpu_combined_worker_can_execute_task((*task)->workerid, *task, 0))) + { + _STARPU_MPI_DEBUG(0, "The specified worker %d cannot execute the codelet %p (%s)\n", (*task)->workerid, codelet, codelet?codelet->name:NULL); + return -ENODEV; + } + } + } + + _STARPU_TRACE_TASK_MPI_PRE_END(); + + return do_execute; +} + +int _starpu_mpi_task_postbuild_v(MPI_Comm comm, int xrank, int do_execute, struct starpu_data_descr *descrs, int nb_data, int prio) +{ + int me, i; + + _STARPU_TRACE_TASK_MPI_POST_START(); + starpu_mpi_comm_rank(comm, &me); + + for(i=0 ; impi_data; + int rrank = starpu_mpi_data_get_rank(descrs[i].handle); + int size; + starpu_mpi_comm_size(comm, &size); + if (mpi_data->redux_map == NULL) + { + _STARPU_CALLOC(mpi_data->redux_map, size, sizeof(mpi_data->redux_map[0])); + } + mpi_data->redux_map [xrank] = 1; + mpi_data->redux_map [rrank] = 1; + int outside_owner = 0; + int j; + for (j = 0; j < size; j++) + { + if (mpi_data->redux_map[j] && j != rrank) + { + outside_owner = 1; + break; + } + } + if (outside_owner) + { + struct _starpu_redux_data_entry *entry; + HASH_FIND_PTR(_redux_data, &descrs[i].handle, entry); + if (entry == NULL) + { + _STARPU_MPI_MALLOC(entry, sizeof(*entry)); + starpu_data_handle_t data_handle = descrs[i].handle; + entry->data_handle = data_handle; + HASH_ADD_PTR(_redux_data, data_handle, entry); + } + } + } + _starpu_mpi_exchange_data_after_execution(descrs[i].handle, descrs[i].mode, me, xrank, do_execute, prio, comm); + _starpu_mpi_clear_data_after_execution(descrs[i].handle, descrs[i].mode, me, do_execute); + } + + _STARPU_TRACE_TASK_MPI_POST_END(); + _STARPU_MPI_LOG_OUT(); + return 0; +} + +static +int _starpu_mpi_task_insert_v(MPI_Comm comm, struct starpu_codelet *codelet, va_list varg_list) +{ + struct starpu_task *task; + int ret; + int xrank; + int do_execute = 0; + struct starpu_data_descr *descrs; + int nb_data; + int prio; + + ret = _starpu_mpi_task_build_v(comm, codelet, &task, &xrank, &descrs, &nb_data, &prio, varg_list); + if (ret < 0) + return ret; + + if (ret == 1) + { + do_execute = 1; + ret = starpu_task_submit(task); + + if (STARPU_UNLIKELY(ret == -ENODEV)) + { + _STARPU_MSG("submission of task %p with codelet %p failed (symbol `%s') (err: ENODEV)\n", + task, task->cl, + (codelet == NULL) ? "none" : + task->cl->name ? task->cl->name : + (task->cl->model && task->cl->model->symbol)?task->cl->model->symbol:"none"); + + task->destroy = 0; + starpu_task_destroy(task); + free(descrs); + return -ENODEV; + } + } + + int val = _starpu_mpi_task_postbuild_v(comm, xrank, do_execute, descrs, nb_data, prio); + free(descrs); + + if (ret == 1) + _starpu_mpi_pre_submit_hook_call(task); + + return val; +} + +#undef starpu_mpi_task_insert +int starpu_mpi_task_insert(MPI_Comm comm, struct starpu_codelet *codelet, ...) +{ + va_list varg_list; + int ret; + + va_start(varg_list, codelet); + ret = _starpu_mpi_task_insert_v(comm, codelet, varg_list); + va_end(varg_list); + return ret; +} + +#undef starpu_mpi_insert_task +int starpu_mpi_insert_task(MPI_Comm comm, struct starpu_codelet *codelet, ...) +{ + va_list varg_list; + int ret; + + va_start(varg_list, codelet); + ret = _starpu_mpi_task_insert_v(comm, codelet, varg_list); + va_end(varg_list); + return ret; +} + +#undef starpu_mpi_task_build +struct starpu_task *starpu_mpi_task_build(MPI_Comm comm, struct starpu_codelet *codelet, ...) +{ + va_list varg_list; + struct starpu_task *task; + int ret; + + va_start(varg_list, codelet); + ret = _starpu_mpi_task_build_v(comm, codelet, &task, NULL, NULL, NULL, NULL, varg_list); + va_end(varg_list); + return (ret == 1 || ret == -ENODEV) ? task : NULL; +} + +struct starpu_task *starpu_mpi_task_build_v(MPI_Comm comm, struct starpu_codelet *codelet, va_list varg_list) +{ + struct starpu_task *task; + int ret; + + ret = _starpu_mpi_task_build_v(comm, codelet, &task, NULL, NULL, NULL, NULL, varg_list); + return (ret == 1 || ret == -ENODEV) ? task : NULL; +} + +int starpu_mpi_task_post_build(MPI_Comm comm, struct starpu_codelet *codelet, ...) +{ + int xrank, do_execute; + int ret, me, nb_nodes; + va_list varg_list; + struct starpu_data_descr *descrs; + int nb_data; + int prio; + + starpu_mpi_comm_rank(comm, &me); + starpu_mpi_comm_size(comm, &nb_nodes); + + va_start(varg_list, codelet); + /* Find out whether we are to execute the data because we own the data to be written to. */ + ret = _starpu_mpi_task_decode_v(codelet, me, nb_nodes, &xrank, &do_execute, &descrs, &nb_data, &prio, varg_list); + va_end(varg_list); + if (ret < 0) + return ret; + + ret = _starpu_mpi_task_postbuild_v(comm, xrank, do_execute, descrs, nb_data, prio); + free(descrs); + return ret; +} + +int starpu_mpi_task_post_build_v(MPI_Comm comm, struct starpu_codelet *codelet, va_list varg_list) +{ + int xrank, do_execute; + int ret, me, nb_nodes; + struct starpu_data_descr *descrs; + int nb_data; + int prio; + + starpu_mpi_comm_rank(comm, &me); + starpu_mpi_comm_size(comm, &nb_nodes); + + /* Find out whether we are to execute the data because we own the data to be written to. */ + ret = _starpu_mpi_task_decode_v(codelet, me, nb_nodes, &xrank, &do_execute, &descrs, &nb_data, &prio, varg_list); + if (ret < 0) + return ret; + + ret = _starpu_mpi_task_postbuild_v(comm, xrank, do_execute, descrs, nb_data, prio); + free(descrs); + return ret; +} + +int starpu_mpi_task_exchange_data_before_execution(MPI_Comm comm, struct starpu_task *task, struct starpu_data_descr *descrs, struct starpu_mpi_task_exchange_params *params) +{ + int me, nb_nodes, inconsistent_execute; + unsigned i; + int select_node_policy = STARPU_MPI_NODE_SELECTION_CURRENT_POLICY; + unsigned nb_data; + + nb_data = STARPU_TASK_GET_NBUFFERS(task); + starpu_mpi_comm_rank(comm, &me); + starpu_mpi_comm_size(comm, &nb_nodes); + params->xrank = -1; + inconsistent_execute = 0; + for(i=0 ; ido_execute), + &inconsistent_execute, &(params->xrank)); + if (ret == -EINVAL) + { + return ret; + } + } + if (inconsistent_execute == 1 || params->xrank == -1) + { + // We need to find out which node is going to execute the codelet. + _STARPU_MPI_DEBUG(100, "Different nodes are owning W data. The node to execute the codelet is going to be selected with the current selection node policy. See starpu_mpi_node_selection_set_current_policy() to change the policy, or use STARPU_EXECUTE_ON_NODE or STARPU_EXECUTE_ON_DATA to specify the node\n"); + params->xrank = _starpu_mpi_select_node(me, nb_nodes, descrs, nb_data, select_node_policy); + params->do_execute = (params->xrank == STARPU_MPI_PER_NODE) || (me == params->xrank); + } + else + { + _STARPU_MPI_DEBUG(100, "Inconsistent=%d - xrank=%d\n", inconsistent_execute, params->xrank); + params->do_execute = (params->xrank == STARPU_MPI_PER_NODE) || (me == params->xrank); + } + + for(i=0 ; ixrank, + params->do_execute, + task->priority, + comm); + } + + params->priority = task->priority; + return 0; +} + +int starpu_mpi_task_exchange_data_after_execution(MPI_Comm comm, struct starpu_data_descr *descrs, unsigned nb_data, struct starpu_mpi_task_exchange_params params) +{ + return _starpu_mpi_task_postbuild_v(comm, params.xrank, params.do_execute, descrs, nb_data, params.priority); +} + +struct starpu_codelet _starpu_mpi_redux_data_synchro_cl = +{ + .where = STARPU_NOWHERE, + .modes = {STARPU_R, STARPU_W}, + .nbuffers = 2 +}; + +struct _starpu_mpi_redux_data_args +{ + starpu_data_handle_t data_handle; + starpu_data_handle_t new_handle; + starpu_mpi_tag_t data_tag; + int node; + MPI_Comm comm; + struct starpu_task *taskB; + long taskC_jobid; +}; + +void _starpu_mpi_redux_fill_post_sync_jobid(const void * const redux_data_args, long * const post_sync_jobid) +{ + *post_sync_jobid = ((const struct _starpu_mpi_redux_data_args *) redux_data_args)->taskC_jobid; +} + +int starpu_mpi_redux_data_prio_tree(MPI_Comm comm, starpu_data_handle_t data_handle, int prio, int arity) +{ + int me, rank, nb_nodes; + starpu_mpi_tag_t data_tag; + + rank = starpu_mpi_data_get_rank(data_handle); + data_tag = starpu_mpi_data_get_tag(data_handle); + struct _starpu_mpi_data *mpi_data = data_handle->mpi_data; + if (rank == -1) + { + _STARPU_ERROR("StarPU needs to be told the MPI rank of this data, using starpu_mpi_data_register\n"); + } + if (data_tag == -1) + { + _STARPU_ERROR("StarPU needs to be told the MPI tag of this data, using starpu_mpi_data_register\n"); + } + if (mpi_data->redux_map == NULL) + { + _STARPU_MPI_DEBUG(5, "I do not contribute to this reduction\n"); + return 0; + } + starpu_mpi_comm_rank(comm, &me); + starpu_mpi_comm_size(comm, &nb_nodes); + struct _starpu_redux_data_entry *entry; + HASH_FIND_PTR(_redux_data, &data_handle, entry); + +#ifdef STARPU_MPI_VERBOSE + int current_level=0; +#endif + int nb_contrib, next_nb_contrib; + int i, j, step, node; + char root_in_step, me_in_step; + // https://stackoverflow.com/questions/109023/how-to-count-the-number-of-set-bits-in-a-32-bit-integer + // https://stackoverflow.com/a/109025 + // see hamming weight + //nb_contrib = std::popcount(mpi_data->redux_map); // most preferable + nb_contrib=0; + for (i=0;iredux_map[%d] = %d\n", i, mpi_data->redux_map[i]); + if (mpi_data->redux_map[i]) nb_contrib++; + } + if (nb_contrib < 2) + { + _STARPU_MPI_DEBUG(5, "Not enough contributors to create a n-ary reduction tree.\n"); + /* duplicated at the end of this function */ + if (entry != NULL) + { + HASH_DEL(_redux_data, entry); + free(entry); + } + free(mpi_data->redux_map); + mpi_data->redux_map = NULL; + return 0; + } + if (arity < 2) + { + arity = nb_contrib; + } + arity = STARPU_MIN(arity,nb_contrib); + _STARPU_MPI_DEBUG(5, "There is %d contributors\n", nb_contrib); + int contributors[nb_contrib]; + int reducing_node; + j=0; + for (i=0;iredux_map[i]); + if (mpi_data->redux_map[i]) + { + contributors[j++] = i; + } + } + for (i=0;iredux_cl, + STARPU_RW|STARPU_COMMUTE, data_handle, + STARPU_R, new_handle, + STARPU_PRIORITY, prio, + STARPU_NAME, "redux_prio_tree_redux_cl", + 0); + if (ret) + return ret; + starpu_data_unregister_submit(new_handle); + } + } + } + else if (me_in_step) + { + _STARPU_MPI_DEBUG(5, "Sending redux handle to %d ...\n", reducing_node); + int ret = starpu_mpi_isend_detached_prio(data_handle, reducing_node, data_tag, prio, comm, NULL, NULL); + if (ret) + return ret; + starpu_data_invalidate_submit(data_handle); + } + contributors[step] = reducing_node; + } + nb_contrib = next_nb_contrib; +#ifdef STARPU_MPI_VERBOSE + current_level++; +#endif + } + + /* duplicated when not enough contributors */ + if (entry != NULL) + { + HASH_DEL(_redux_data, entry); + free(entry); + } + free(mpi_data->redux_map); + mpi_data->redux_map = NULL; + return 0; +} + +int starpu_mpi_redux_data(MPI_Comm comm, starpu_data_handle_t data_handle) +{ + return starpu_mpi_redux_data_prio(comm, data_handle, 0); +} + +int starpu_mpi_redux_data_tree(MPI_Comm comm, starpu_data_handle_t data_handle, int arity) +{ + return starpu_mpi_redux_data_prio_tree(comm, data_handle, 0, arity); +} + +int starpu_mpi_redux_data_prio(MPI_Comm comm, starpu_data_handle_t data_handle, int prio) +{ + int nb_nodes, nb_contrib, i; + struct _starpu_mpi_data *mpi_data = data_handle->mpi_data; + if (mpi_data->redux_map == NULL) + { + _STARPU_MPI_DEBUG(5, "I do not contribute to this reduction\n"); + return 0; + } + starpu_mpi_comm_size(comm, &nb_nodes); + nb_contrib=0; + for (i=0;iredux_map[i]) + { + nb_contrib++; + } + } + return starpu_mpi_redux_data_prio_tree(comm, data_handle, prio, nb_contrib); +} + +void _starpu_mpi_redux_wrapup_data(starpu_data_handle_t data_handle) +{ + // We could check if the handle makes sense but we do not because it helps the programmer using coherent + // distributed-memory reduction patterns + size_t data_size = starpu_data_get_size(data_handle); + // Small data => flat tree | binary tree + int _starpu_mpi_redux_threshold = starpu_getenv_number_default("STARPU_MPI_REDUX_ARITY_THRESHOLD", 1024); + int _starpu_mpi_redux_tree_size = 2; + if (_starpu_mpi_redux_threshold < 0 || (_starpu_mpi_redux_threshold > 0 && data_size < (size_t) _starpu_mpi_redux_threshold)) + { + _starpu_mpi_redux_tree_size = STARPU_MAXNODES; + } + struct _starpu_mpi_data *mpi_data = data_handle->mpi_data; + struct _starpu_redux_data_entry *entry; + + HASH_FIND_PTR(_redux_data, &data_handle, entry); + if (entry != NULL) + { + starpu_mpi_redux_data_tree(mpi_data->node_tag.node.comm,data_handle,_starpu_mpi_redux_tree_size); + } + return; +} + +void _starpu_mpi_redux_wrapup_data_all() +{ + struct _starpu_redux_data_entry *entry = NULL, *tmp = NULL; + HASH_ITER(hh, _redux_data, entry, tmp) + { + _starpu_mpi_redux_wrapup_data(entry->data_handle); + } + return; +} diff --git a/mpi/src/starpu_mpi_task_insert.h b/mpi/src/starpu_mpi_task_insert.h new file mode 100644 index 0000000..1da5065 --- /dev/null +++ b/mpi/src/starpu_mpi_task_insert.h @@ -0,0 +1,37 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __STARPU_MPI_TASK_INSERT_H__ +#define __STARPU_MPI_TASK_INSERT_H__ + +/** @file */ + +#ifdef __cplusplus +extern "C" +{ +#endif + +int _starpu_mpi_find_executee_node(starpu_data_handle_t data, enum starpu_data_access_mode mode, int me, int *do_execute, int *inconsistent_execute, int *xrank); +int _starpu_mpi_exchange_data_before_execution(starpu_data_handle_t data, enum starpu_data_access_mode mode, int me, int xrank, int do_execute, int prio, MPI_Comm comm); +int _starpu_mpi_task_postbuild_v(MPI_Comm comm, int xrank, int do_execute, struct starpu_data_descr *descrs, int nb_data, int prio); +void _starpu_mpi_redux_wrapup_data_all(); +void _starpu_mpi_redux_wrapup_data(starpu_data_handle_t data_handle); +void _starpu_mpi_pre_submit_hook_call(struct starpu_task *task); + +#ifdef __cplusplus +} +#endif +#endif /* __STARPU_MPI_TASK_INSERT_H__ */ diff --git a/mpi/src/starpu_mpi_task_insert_fortran.c b/mpi/src/starpu_mpi_task_insert_fortran.c new file mode 100644 index 0000000..c32e542 --- /dev/null +++ b/mpi/src/starpu_mpi_task_insert_fortran.c @@ -0,0 +1,638 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef HAVE_MPI_COMM_F2C +static +int _fstarpu_mpi_task_decode_v(struct starpu_codelet *codelet, int me, int nb_nodes, int *xrank, int *do_execute, struct starpu_data_descr **descrs_p, int *nb_data_p, int *prio_p, void **arglist) +{ + int arg_i = 0; + int inconsistent_execute = 0; + int node_selected = 0; + int nb_allocated_data = 16; + struct starpu_data_descr *descrs; + int nb_data; + int prio = 0; + int select_node_policy = STARPU_MPI_NODE_SELECTION_CURRENT_POLICY; + + _STARPU_TRACE_TASK_MPI_DECODE_START(); + + _STARPU_MPI_MALLOC(descrs, nb_allocated_data * sizeof(struct starpu_data_descr)); + nb_data = 0; + *do_execute = -1; + *xrank = -1; + + while (arglist[arg_i] != NULL) + { + int arg_type = (int)(intptr_t)arglist[arg_i]; + int arg_type_nocommute = arg_type & ~STARPU_COMMUTE; + + if (arg_type==STARPU_EXECUTE_ON_NODE) + { + arg_i++; + int rank = *(int *)arglist[arg_i]; + if (rank != -1) + { + *xrank = rank; + if (node_selected == 0) + { + _STARPU_MPI_DEBUG(100, "Executing on node %d\n", *xrank); + *do_execute = 1; + node_selected = 1; + inconsistent_execute = 0; + } + } + } + else if (arg_type==STARPU_EXECUTE_ON_DATA) + { + arg_i++; + starpu_data_handle_t data = arglist[arg_i]; + if (node_selected == 0) + { + *xrank = starpu_mpi_data_get_rank(data); + STARPU_ASSERT_MSG(*xrank != -1, "Rank of the data must be set using starpu_mpi_data_register() or starpu_data_set_rank()"); + _STARPU_MPI_DEBUG(100, "Executing on data node %d\n", *xrank); + STARPU_ASSERT_MSG(*xrank <= nb_nodes, "Node %d to execute codelet is not a valid node (%d)", *xrank, nb_nodes); + *do_execute = 1; + node_selected = 1; + inconsistent_execute = 0; + } + } + else if (arg_type_nocommute & STARPU_R || arg_type_nocommute & STARPU_W || arg_type_nocommute & STARPU_RW || arg_type & STARPU_SCRATCH || arg_type & STARPU_REDUX || arg_type & STARPU_MPI_REDUX) + { + arg_i++; + starpu_data_handle_t data = arglist[arg_i]; + enum starpu_data_access_mode mode = (enum starpu_data_access_mode) arg_type; + if (node_selected == 0) + { + int ret = _starpu_mpi_find_executee_node(data, mode, me, do_execute, &inconsistent_execute, xrank); + if (ret == -EINVAL) + { + free(descrs); + _STARPU_TRACE_TASK_MPI_DECODE_END(); + return ret; + } + } + if (nb_data >= nb_allocated_data) + { + nb_allocated_data *= 2; + _STARPU_MPI_REALLOC(descrs, nb_allocated_data * sizeof(struct starpu_data_descr)); + } + descrs[nb_data].handle = data; + descrs[nb_data].mode = mode; + nb_data ++; + } + else if (arg_type == STARPU_DATA_ARRAY) + { + arg_i++; + starpu_data_handle_t *data = arglist[arg_i]; + arg_i++; + int nb_handles = *(int *)arglist[arg_i]; + int i; + + for(i=0 ; inbuffers == STARPU_VARIABLE_NBUFFERS || nb_data < codelet->nbuffers, "Too many data passed to starpu_mpi_task_insert"); + enum starpu_data_access_mode mode = STARPU_CODELET_GET_MODE(codelet, nb_data); + if (node_selected == 0) + { + int ret = _starpu_mpi_find_executee_node(data[i], mode, me, do_execute, &inconsistent_execute, xrank); + if (ret == -EINVAL) + { + free(descrs); + _STARPU_TRACE_TASK_MPI_DECODE_END(); + return ret; + } + } + if (nb_data >= nb_allocated_data) + { + nb_allocated_data *= 2; + _STARPU_MPI_REALLOC(descrs, nb_allocated_data * sizeof(struct starpu_data_descr)); + } + descrs[nb_data].handle = data[i]; + descrs[nb_data].mode = mode; + nb_data ++; + } + } + else if (arg_type == STARPU_DATA_MODE_ARRAY) + { + arg_i++; + struct starpu_data_descr *_descrs = arglist[arg_i]; + arg_i++; + int nb_handles = *(int *)arglist[arg_i]; + int i; + + for(i=0 ; i= nb_allocated_data) + { + nb_allocated_data *= 2; + _STARPU_MPI_REALLOC(descrs, nb_allocated_data * sizeof(struct starpu_data_descr)); + } + descrs[nb_data].handle = _descrs[i].handle; + descrs[nb_data].mode = mode; + nb_data ++; + } + } + else if (arg_type==STARPU_VALUE) + { + arg_i++; + /* void* */ + arg_i++; + /* size_t */ + } + else if (arg_type==STARPU_CL_ARGS) + { + arg_i++; + /* void* */ + arg_i++; + /* size_t */ + } + else if (arg_type==STARPU_CL_ARGS_NFREE) + { + arg_i++; + /* void* */ + arg_i++; + /* size_t */ + } + else if (arg_type==STARPU_TASK_DEPS_ARRAY) + { + arg_i++; + /* unsigned */ + arg_i++; + /* struct starpu_task ** */ + } + else if (arg_type==STARPU_TASK_END_DEPS_ARRAY) + { + arg_i++; + /* unsigned */ + arg_i++; + /* struct starpu_task ** */ + } + else if (arg_type==STARPU_CALLBACK) + { + arg_i++; + /* _starpu_callback_func_t */ + } + else if (arg_type==STARPU_CALLBACK_WITH_ARG) + { + arg_i++; + /* _starpu_callback_func_t */ + arg_i++; + /* void* */ + } + else if (arg_type==STARPU_CALLBACK_WITH_ARG_NFREE) + { + arg_i++; + /* _starpu_callback_func_t */ + arg_i++; + /* void* */ + } + else if (arg_type==STARPU_CALLBACK_ARG) + { + arg_i++; + /* void* */ + } + else if (arg_type==STARPU_CALLBACK_ARG_NFREE) + { + arg_i++; + /* void* */ + } + else if (arg_type==STARPU_EPILOGUE_CALLBACK) + { + arg_i++; + /* _starpu_callback_func_t */ + } + else if (arg_type==STARPU_EPILOGUE_CALLBACK_ARG) + { + arg_i++; + /* void* */ + } + else if (arg_type==STARPU_PRIORITY) + { + arg_i++; + prio = *(int *)arglist[arg_i]; + /* int* */ + } + /* STARPU_EXECUTE_ON_NODE handled above */ + /* STARPU_EXECUTE_ON_DATA handled above */ + /* STARPU_DATA_ARRAY handled above */ + /* STARPU_DATA_MODE_ARRAY handled above */ + else if (arg_type==STARPU_TAG) + { + arg_i++; + /* starpu_tag_t* */ + } + else if (arg_type==STARPU_HYPERVISOR_TAG) + { + arg_i++; + /* int* */ + } + else if (arg_type==STARPU_FLOPS) + { + arg_i++; + /* double* */ + } + else if (arg_type==STARPU_SCHED_CTX) + { + arg_i++; + /* unsigned* */ + } + else if (arg_type==STARPU_PROLOGUE_CALLBACK) + { + arg_i++; + /* _starpu_callback_func_t */ + } + else if (arg_type==STARPU_PROLOGUE_CALLBACK_ARG) + { + arg_i++; + /* void* */ + } + else if (arg_type==STARPU_PROLOGUE_CALLBACK_ARG_NFREE) + { + arg_i++; + /* void* */ + } + else if (arg_type==STARPU_PROLOGUE_CALLBACK_POP) + { + arg_i++; + /* _starpu_callback_func_t */ + } + else if (arg_type==STARPU_PROLOGUE_CALLBACK_POP_ARG) + { + arg_i++; + /* void* */ + } + else if (arg_type==STARPU_PROLOGUE_CALLBACK_POP_ARG_NFREE) + { + arg_i++; + /* void* */ + } + else if (arg_type==STARPU_EXECUTE_WHERE) + { + arg_i++; + /* int* */ + } + else if (arg_type==STARPU_EXECUTE_ON_WORKER) + { + arg_i++; + /* int* */ + } + else if (arg_type==STARPU_TAG_ONLY) + { + arg_i++; + /* starpu_tag_t* */ + } + else if (arg_type==STARPU_NAME) + { + arg_i++; + /* char* */ + } + else if (arg_type==STARPU_POSSIBLY_PARALLEL) + { + arg_i++; + /* unsigned* */ + } + else if (arg_type==STARPU_WORKER_ORDER) + { + arg_i++; + /* unsigned* */ + } + else if (arg_type==STARPU_NODE_SELECTION_POLICY) + { + arg_i++; + /* int* */ + } + else if (arg_type==STARPU_TASK_COLOR) + { + arg_i++; + /* int* */ + } + else if (arg_type==STARPU_TASK_SYNCHRONOUS) + { + arg_i++; + /* int* */ + } + else if (arg_type==STARPU_TRANSACTION) + { + arg_i++; + /* struct starpu_transaction * */ + } + else if (arg_type==STARPU_HANDLES_SEQUENTIAL_CONSISTENCY) + { + arg_i++; + /* char* */ + } +#ifdef STARPU_BUBBLE + else if (arg_type==STARPU_BUBBLE_FUNC) + { + STARPU_ASSERT_MSG(0, "Bubbles + MPI not supported yet\n"); + arg_i++; + } + else if (arg_type==STARPU_BUBBLE_FUNC_ARG) + { + arg_i++; + } + else if (arg_type==STARPU_BUBBLE_GEN_DAG_FUNC) + { + arg_i++; + } + else if (arg_type==STARPU_BUBBLE_GEN_DAG_FUNC_ARG) + { + arg_i++; + } +#endif + else if (arg_type==STARPU_TASK_END_DEP) + { + arg_i++; + /* int */ + } + else if (arg_type==STARPU_TASK_WORKERIDS) + { + arg_i++; + /* unsigned */ + arg_i++; + /* uint32_t* */ + } + else if (arg_type==STARPU_SEQUENTIAL_CONSISTENCY) + { + arg_i++; + /* unsigned */ + } + else if (arg_type==STARPU_TASK_PROFILING_INFO) + { + arg_i++; + /* struct starpu_profiling_task_info * */ + } + else if (arg_type==STARPU_TASK_NO_SUBMITORDER) + { + arg_i++; + /* unsigned */ + } + else if (arg_type==STARPU_TASK_SCHED_DATA) + { + arg_i++; + /* void * */ + } + else if (arg_type==STARPU_TASK_FILE) + { + arg_i++; + /* char* */ + } + else if (arg_type==STARPU_TASK_LINE) + { + arg_i++; + /* int */ + } + else + { + STARPU_ABORT_MSG("Unrecognized argument %d, did you perhaps forget to end arguments with 0?\n", arg_type); + } + + arg_i++; + } + + if (inconsistent_execute == 1 || *xrank == -1) + { + // We need to find out which node is going to execute the codelet. + _STARPU_MPI_DISP("Different nodes are owning W data. The node to execute the codelet is going to be selected with the current selection node policy. See starpu_mpi_node_selection_set_current_policy() to change the policy, or use STARPU_EXECUTE_ON_NODE or STARPU_EXECUTE_ON_DATA to specify the node\n"); + *xrank = _starpu_mpi_select_node(me, nb_nodes, descrs, nb_data, select_node_policy); + *do_execute = *xrank == STARPU_MPI_PER_NODE || (me == *xrank); + } + else + { + _STARPU_MPI_DEBUG(100, "Inconsistent=%d - xrank=%d\n", inconsistent_execute, *xrank); + *do_execute = *xrank == STARPU_MPI_PER_NODE || (me == *xrank); + } + _STARPU_MPI_DEBUG(100, "do_execute=%d\n", *do_execute); + + *descrs_p = descrs; + *nb_data_p = nb_data; + *prio_p = prio; + + _STARPU_TRACE_TASK_MPI_DECODE_END(); + return 0; +} + +static +int _fstarpu_mpi_task_build_v(MPI_Comm comm, struct starpu_codelet *codelet, struct starpu_task **task, int *xrank_p, struct starpu_data_descr **descrs_p, int *nb_data_p, int *prio_p, void **arglist) +{ + int me, do_execute, xrank, nb_nodes; + int ret; + int i; + struct starpu_data_descr *descrs; + int nb_data; + int prio; + + _STARPU_MPI_LOG_IN(); + + starpu_mpi_comm_rank(comm, &me); + starpu_mpi_comm_size(comm, &nb_nodes); + + /* Find out whether we are to execute the data because we own the data to be written to. */ + ret = _fstarpu_mpi_task_decode_v(codelet, me, nb_nodes, &xrank, &do_execute, &descrs, &nb_data, &prio, arglist); + if (ret < 0) + return ret; + + _STARPU_TRACE_TASK_MPI_PRE_START(); + /* Send and receive data as requested */ + for(i=0 ; impi_data) + { + char *redux_map = starpu_mpi_data_get_redux_map(descrs[i].handle); + if (redux_map != NULL && descrs[i].mode & STARPU_R && descrs[i].mode & ~ STARPU_REDUX && descrs[i].mode & ~ STARPU_MPI_REDUX) + { + _starpu_mpi_redux_wrapup_data(descrs[i].handle); + } + } + _starpu_mpi_exchange_data_before_execution(descrs[i].handle, descrs[i].mode, me, xrank, do_execute, prio, comm); + } + + if (xrank_p) + *xrank_p = xrank; + if (nb_data_p) + *nb_data_p = nb_data; + if (prio_p) + *prio_p = prio; + + if (descrs_p) + *descrs_p = descrs; + else + free(descrs); + + if (do_execute == 1) + { + _STARPU_MPI_DEBUG(100, "Execution of the codelet %p (%s)\n", codelet, codelet?codelet->name:NULL); + + *task = starpu_task_create(); + (*task)->cl_arg_free = 1; + (*task)->callback_arg_free = 1; + (*task)->prologue_callback_arg_free = 1; + (*task)->prologue_callback_pop_arg_free = 1; + + _fstarpu_task_insert_create(codelet, *task, arglist); + + if ((*task)->cl) + { + /* we suppose the current context is not going to change between now and the execution of the task */ + (*task)->sched_ctx = _starpu_sched_ctx_get_current_context(); + /* Check the type of worker(s) required by the task exist */ + if (STARPU_UNLIKELY(!_starpu_worker_exists(*task))) + { + _STARPU_MPI_DEBUG(0, "There is no worker to execute the codelet %p (%s)\n", codelet, codelet?codelet->name:NULL); + return -ENODEV; + } + + /* In case we require that a task should be explicitly + * executed on a specific worker, we make sure that the worker + * is able to execute this task. */ + if (STARPU_UNLIKELY((*task)->execute_on_a_specific_worker && !starpu_combined_worker_can_execute_task((*task)->workerid, *task, 0))) + { + _STARPU_MPI_DEBUG(0, "The specified worker %d cannot execute the codelet %p (%s)\n", (*task)->workerid, codelet, codelet?codelet->name:NULL); + return -ENODEV; + } + } + } + + _STARPU_TRACE_TASK_MPI_PRE_END(); + + return do_execute; +} + +static +int _fstarpu_mpi_task_insert_v(MPI_Comm comm, struct starpu_codelet *codelet, void **arglist) +{ + struct starpu_task *task; + int ret; + int xrank; + int do_execute = 0; + struct starpu_data_descr *descrs; + int nb_data; + int prio; + + ret = _fstarpu_mpi_task_build_v(comm, codelet, &task, &xrank, &descrs, &nb_data, &prio, arglist); + if (ret < 0) + return ret; + + if (ret == 1) + { + do_execute = 1; + ret = starpu_task_submit(task); + + if (STARPU_UNLIKELY(ret == -ENODEV)) + { + _STARPU_MSG("submission of task %p with codelet %p failed (symbol `%s') (err: ENODEV)\n", + task, task->cl, + (codelet == NULL) ? "none" : + task->cl->name ? task->cl->name : + (task->cl->model && task->cl->model->symbol)?task->cl->model->symbol:"none"); + + task->destroy = 0; + starpu_task_destroy(task); + free(descrs); + return -ENODEV; + } + } + + int val = _starpu_mpi_task_postbuild_v(comm, xrank, do_execute, descrs, nb_data, prio); + free(descrs); + + if (ret == 1) + _starpu_mpi_pre_submit_hook_call(task); + + return val; +} + +void fstarpu_mpi_task_insert(void **arglist) +{ + MPI_Fint comm = *((MPI_Fint *)arglist[0]); + struct starpu_codelet *codelet = arglist[1]; + if (codelet == NULL) + { + STARPU_ABORT_MSG("task without codelet"); + } + + int ret; + ret = _fstarpu_mpi_task_insert_v(MPI_Comm_f2c(comm), codelet, arglist+2); + STARPU_ASSERT(ret >= 0); +} + +/* fstarpu_mpi_insert_task: aliased to fstarpu_mpi_task_insert in fstarpu_mpi_mod.f90 */ + +struct starpu_task *fstarpu_mpi_task_build(void **arglist) +{ + MPI_Fint comm = *((MPI_Fint *)arglist[0]); + struct starpu_codelet *codelet = arglist[1]; + if (codelet == NULL) + { + STARPU_ABORT_MSG("task without codelet"); + } + struct starpu_task *task; + int ret; + + ret = _fstarpu_mpi_task_build_v(MPI_Comm_f2c(comm), codelet, &task, NULL, NULL, NULL, NULL, arglist+2); + return (ret == 1 || ret == -ENODEV) ? task : NULL; +} + +void fstarpu_mpi_task_post_build(void **arglist) +{ + MPI_Fint comm = *((MPI_Fint *)arglist[0]); + struct starpu_codelet *codelet = arglist[1]; + if (codelet == NULL) + { + STARPU_ABORT_MSG("task without codelet"); + } + int xrank, do_execute; + int ret, me, nb_nodes; + struct starpu_data_descr *descrs; + int nb_data; + int prio; + + starpu_mpi_comm_rank(MPI_Comm_f2c(comm), &me); + starpu_mpi_comm_size(MPI_Comm_f2c(comm), &nb_nodes); + + /* Find out whether we are to execute the data because we own the data to be written to. */ + ret = _fstarpu_mpi_task_decode_v(codelet, me, nb_nodes, &xrank, &do_execute, &descrs, &nb_data, &prio, arglist+2); + STARPU_ASSERT(ret >= 0); + + ret = _starpu_mpi_task_postbuild_v(MPI_Comm_f2c(comm), xrank, do_execute, descrs, nb_data, prio); + free(descrs); + STARPU_ASSERT(ret >= 0); +} + +#endif /* HAVE_MPI_COMM_F2C */ diff --git a/mpi/tests/Makefile.am b/mpi/tests/Makefile.am new file mode 100644 index 0000000..c48dce9 --- /dev/null +++ b/mpi/tests/Makefile.am @@ -0,0 +1,301 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# Copyright (C) 2013-2013 Thibaut Lambert +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +include $(top_srcdir)/make/starpu-tests.mk + +SUFFIXES = .hip + +CC=$(MPICC) +CCLD=$(MPICC) + +include $(top_srcdir)/make/starpu-loader.mk + +LAUNCHER = $(STARPU_MPIEXEC) +LAUNCHER_ENV = $(MPI_RUN_ENV) + +if STARPU_SIMGRID +LOADER_BIN = $(LAUNCHER) +endif + +if STARPU_MPI_CHECK +TESTS = $(starpu_mpi_TESTS) +endif + +check_PROGRAMS = $(LOADER) $(starpu_mpi_TESTS) + +BUILT_SOURCES = + +CLEANFILES = *.gcno *.gcda *.linkinfo starpu_idle_microsec.log + +EXTRA_DIST = \ + helper.h \ + user_defined_datatype_value.h + +examplebindir = $(libdir)/starpu/examples/mpi + +examplebin_PROGRAMS = + +AM_CFLAGS += $(APP_CFLAGS) +AM_CXXFLAGS += $(APP_CXXFLAGS) +AM_FFLAGS += $(APP_FFLAGS) +AM_FCFLAGS += $(APP_FCFLAGS) +AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_builddir)/include -I$(top_srcdir)/mpi/include -I$(top_srcdir)/mpi/src -I$(top_srcdir)/src -I$(top_builddir)/src -I$(top_srcdir)/examples/ $(STARPU_H_CPPFLAGS) +AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@ +LIBS += $(STARPU_CUDA_LDFLAGS) $(STARPU_HIP_LDFLAGS) +LIBS += $(top_builddir)/src/@LIBSTARPU_LINK@ ../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la $(STARPU_EXPORTED_LIBS) + +######################## +# Unit testcases # +######################## + +starpu_mpi_TESTS = + +starpu_mpi_TESTS += \ + callback \ + driver \ + early_stuff \ + insert_task_block \ + insert_task_can_execute \ + insert_task_tags \ + multiple_send \ + policy_register \ + policy_register_many \ + policy_selection \ + star \ + stats \ + user_defined_datatype \ + wait_for_all + +if !STARPU_SIMGRID +starpu_mpi_TESTS += \ + attr \ + ndim_interface +endif + +if !STARPU_MPI_MINIMAL_TESTS +starpu_mpi_TESTS += \ + broadcast \ + early_request \ + gather \ + gather2 \ + insert_task \ + insert_task_count \ + insert_task_dyn_handles \ + insert_task_node_choice \ + insert_task_owner \ + insert_task_owner2 \ + insert_task_owner_data \ + matrix \ + matrix2 \ + mpi_barrier \ + mpi_detached_tag \ + mpi_earlyrecv \ + mpi_irecv \ + mpi_irecv_detached \ + mpi_isend \ + mpi_isend_detached \ + mpi_reduction \ + mpi_redux \ + mpi_scatter_gather \ + mpi_test \ + pingpong \ + policy_selection2 \ + ring \ + ring_async \ + ring_async_implicit \ + ring_sync \ + ring_sync_detached \ + temporary \ + data_cpy \ + mpi_data_cpy +endif + +if !STARPU_MPI_MINIMAL_TESTS +if !STARPU_SIMGRID +# missing support in simgrid +starpu_mpi_TESTS += \ + display_bindings \ + mpi_earlyrecv2 \ + mpi_earlyrecv2_sync \ + block_interface \ + block_interface_pinned \ + ndim_interface \ + insert_task_compute \ + insert_task_sent_cache \ + insert_task_recv_cache \ + insert_task_seq \ + tags_allocate \ + tags_checking \ + sync \ + coop \ + coop_datatype \ + coop_large \ + coop_many \ + coop_acknowledgement \ + coop_recv_not_yet_posted \ + coop_chained_sends \ + coop_wrong_order \ + coop_without_task \ + coop_user_defined_datatype \ + coop_recv_wait_finalize \ + coop_insert_task \ + coop_cache \ + mpi_task_submit + +if STARPU_USE_MPI_MPI +starpu_mpi_TESTS += \ + load_balancer +endif +endif + +# Expected to fail +starpu_mpi_TESTS += \ + policy_register_toomany \ + policy_unregister \ + starpu_redefine +endif + +noinst_PROGRAMS += \ + datatypes \ + pingpong \ + mpi_test \ + mpi_isend \ + mpi_earlyrecv \ + mpi_earlyrecv2 \ + mpi_earlyrecv2_sync \ + mpi_irecv \ + mpi_barrier \ + mpi_isend_detached \ + mpi_irecv_detached \ + mpi_detached_tag \ + mpi_redux \ + ring \ + ring_sync \ + ring_sync_detached \ + ring_async \ + ring_async_implicit \ + temporary \ + data_cpy \ + mpi_data_cpy \ + early_stuff \ + block_interface \ + block_interface_pinned \ + ndim_interface \ + attr \ + broadcast \ + callback \ + matrix \ + matrix2 \ + insert_task \ + insert_task_compute \ + insert_task_sent_cache \ + insert_task_recv_cache \ + insert_task_can_execute \ + insert_task_block \ + insert_task_owner \ + insert_task_owner2 \ + insert_task_owner_data \ + insert_task_node_choice \ + insert_task_count \ + insert_task_dyn_handles \ + insert_task_seq \ + insert_task_tags \ + multiple_send \ + mpi_scatter_gather \ + mpi_reduction \ + user_defined_datatype \ + tags_allocate \ + tags_checking \ + star \ + stats \ + sync \ + gather \ + gather2 \ + policy_register \ + policy_register_many \ + policy_register_toomany \ + policy_unregister \ + policy_selection \ + policy_selection2 \ + early_request \ + starpu_redefine \ + load_balancer \ + driver \ + coop \ + coop_datatype \ + coop_large \ + coop_many \ + coop_acknowledgement \ + coop_recv_not_yet_posted \ + coop_chained_sends \ + coop_wrong_order \ + coop_without_task \ + coop_user_defined_datatype \ + coop_recv_wait_finalize \ + coop_insert_task \ + coop_cache \ + nothing \ + display_bindings \ + mpi_task_submit \ + wait_for_all + +if STARPU_USE_MPI_FT +noinst_PROGRAMS += \ + checkpoints +endif STARPU_USE_MPI_FT + +XFAIL_TESTS= \ + policy_register_toomany \ + policy_unregister \ + starpu_redefine \ + nothing + +ring_SOURCES = ring.c +ring_sync_SOURCES = ring_sync.c +ring_sync_detached_SOURCES = ring_sync_detached.c +ring_async_SOURCES = ring_async.c +ring_async_implicit_SOURCES = ring_async_implicit.c +insert_task_count_SOURCES = insert_task_count.c +if STARPU_USE_CUDA +ring_SOURCES += ring_kernel.cu +ring_sync_SOURCES += ring_kernel.cu +ring_sync_detached_SOURCES += ring_kernel.cu +ring_async_SOURCES += ring_kernel.cu +ring_async_implicit_SOURCES += ring_kernel.cu +insert_task_count_SOURCES += ring_kernel.cu +endif +if STARPU_USE_HIP +ring_SOURCES += ring_kernel_hip.hip +ring_sync_SOURCES += ring_kernel_hip.hip +ring_sync_detached_SOURCES += ring_kernel_hip.hip +ring_async_SOURCES += ring_kernel_hip.hip +ring_async_implicit_SOURCES += ring_kernel_hip.hip +insert_task_count_SOURCES += ring_kernel_hip.hip +endif +mpi_reduction_SOURCES = mpi_reduction.c +mpi_reduction_SOURCES += mpi_reduction_kernels.c +user_defined_datatype_SOURCES = user_defined_datatype.c +user_defined_datatype_SOURCES += ../../examples/interface/complex_interface.c + +mpi_earlyrecv2_SOURCES = mpi_earlyrecv2.c +mpi_earlyrecv2_SOURCES += ../../examples/interface/complex_interface.c +mpi_earlyrecv2_sync_SOURCES = mpi_earlyrecv2_sync.c +mpi_earlyrecv2_sync_SOURCES += ../../examples/interface/complex_interface.c + +coop_user_defined_datatype_SOURCES = coop_user_defined_datatype.c +coop_user_defined_datatype_SOURCES += ../../examples/interface/complex_interface.c + +early_stuff_CFLAGS = $(AM_CFLAGS) $(FXT_CFLAGS) diff --git a/mpi/tests/Makefile.in b/mpi/tests/Makefile.in new file mode 100644 index 0000000..78713a8 --- /dev/null +++ b/mpi/tests/Makefile.in @@ -0,0 +1,3325 @@ +# Makefile.in generated by automake 1.16.5 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2021 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +target_triplet = @target@ +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@am__append_1 = --compiler-options -fno-strict-aliasing -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ $(STARPU_NVCC_H_CPPFLAGS) +@STARPU_USE_HIP_TRUE@am__append_2 = -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ +noinst_PROGRAMS = $(am__EXEEXT_7) datatypes$(EXEEXT) pingpong$(EXEEXT) \ + mpi_test$(EXEEXT) mpi_isend$(EXEEXT) mpi_earlyrecv$(EXEEXT) \ + mpi_earlyrecv2$(EXEEXT) mpi_earlyrecv2_sync$(EXEEXT) \ + mpi_irecv$(EXEEXT) mpi_barrier$(EXEEXT) \ + mpi_isend_detached$(EXEEXT) mpi_irecv_detached$(EXEEXT) \ + mpi_detached_tag$(EXEEXT) mpi_redux$(EXEEXT) ring$(EXEEXT) \ + ring_sync$(EXEEXT) ring_sync_detached$(EXEEXT) \ + ring_async$(EXEEXT) ring_async_implicit$(EXEEXT) \ + temporary$(EXEEXT) data_cpy$(EXEEXT) mpi_data_cpy$(EXEEXT) \ + early_stuff$(EXEEXT) block_interface$(EXEEXT) \ + block_interface_pinned$(EXEEXT) ndim_interface$(EXEEXT) \ + attr$(EXEEXT) broadcast$(EXEEXT) callback$(EXEEXT) \ + matrix$(EXEEXT) matrix2$(EXEEXT) insert_task$(EXEEXT) \ + insert_task_compute$(EXEEXT) insert_task_sent_cache$(EXEEXT) \ + insert_task_recv_cache$(EXEEXT) \ + insert_task_can_execute$(EXEEXT) insert_task_block$(EXEEXT) \ + insert_task_owner$(EXEEXT) insert_task_owner2$(EXEEXT) \ + insert_task_owner_data$(EXEEXT) \ + insert_task_node_choice$(EXEEXT) insert_task_count$(EXEEXT) \ + insert_task_dyn_handles$(EXEEXT) insert_task_seq$(EXEEXT) \ + insert_task_tags$(EXEEXT) multiple_send$(EXEEXT) \ + mpi_scatter_gather$(EXEEXT) mpi_reduction$(EXEEXT) \ + user_defined_datatype$(EXEEXT) tags_allocate$(EXEEXT) \ + tags_checking$(EXEEXT) star$(EXEEXT) stats$(EXEEXT) \ + sync$(EXEEXT) gather$(EXEEXT) gather2$(EXEEXT) \ + policy_register$(EXEEXT) policy_register_many$(EXEEXT) \ + policy_register_toomany$(EXEEXT) policy_unregister$(EXEEXT) \ + policy_selection$(EXEEXT) policy_selection2$(EXEEXT) \ + early_request$(EXEEXT) starpu_redefine$(EXEEXT) \ + load_balancer$(EXEEXT) driver$(EXEEXT) coop$(EXEEXT) \ + coop_datatype$(EXEEXT) coop_large$(EXEEXT) coop_many$(EXEEXT) \ + coop_acknowledgement$(EXEEXT) \ + coop_recv_not_yet_posted$(EXEEXT) coop_chained_sends$(EXEEXT) \ + coop_wrong_order$(EXEEXT) coop_without_task$(EXEEXT) \ + coop_user_defined_datatype$(EXEEXT) \ + coop_recv_wait_finalize$(EXEEXT) coop_insert_task$(EXEEXT) \ + coop_cache$(EXEEXT) nothing$(EXEEXT) display_bindings$(EXEEXT) \ + mpi_task_submit$(EXEEXT) wait_for_all$(EXEEXT) $(am__EXEEXT_8) +# Make tests run through mpiexec +@STARPU_USE_MPI_MASTER_SLAVE_TRUE@am__append_3 = $(abs_top_srcdir)/tools/starpu_msexec +@STARPU_USE_MPI_MASTER_SLAVE_TRUE@am__append_4 = $(MPI_RUN_ENV) STARPU_NMPIMSTHREADS=4 +@STARPU_USE_TCPIP_MASTER_SLAVE_TRUE@am__append_5 = $(abs_top_srcdir)/tools/starpu_msexec +# switch off local socket usage +#MS_LAUNCHER = $(abs_top_builddir)/tools/starpu_tcpipexec -np 2 -nobind -ncpus 1 -nolocal +@STARPU_USE_TCPIP_MASTER_SLAVE_TRUE@am__append_6 = STARPU_RESERVE_NCPU=2 +@STARPU_HAVE_WINDOWS_FALSE@am__append_7 = loader +@STARPU_MPI_CHECK_TRUE@TESTS = $(am__EXEEXT_6) +check_PROGRAMS = $(am__EXEEXT_6) +examplebin_PROGRAMS = +@STARPU_SIMGRID_FALSE@am__append_8 = \ +@STARPU_SIMGRID_FALSE@ attr \ +@STARPU_SIMGRID_FALSE@ ndim_interface + +@STARPU_MPI_MINIMAL_TESTS_FALSE@am__append_9 = \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@ broadcast \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@ early_request \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@ gather \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@ gather2 \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@ insert_task \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@ insert_task_count \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@ insert_task_dyn_handles \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@ insert_task_node_choice \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@ insert_task_owner \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@ insert_task_owner2 \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@ insert_task_owner_data \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@ matrix \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@ matrix2 \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@ mpi_barrier \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@ mpi_detached_tag \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@ mpi_earlyrecv \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@ mpi_irecv \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@ mpi_irecv_detached \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@ mpi_isend \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@ mpi_isend_detached \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@ mpi_reduction \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@ mpi_redux \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@ mpi_scatter_gather \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@ mpi_test \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@ pingpong \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@ policy_selection2 \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@ ring \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@ ring_async \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@ ring_async_implicit \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@ ring_sync \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@ ring_sync_detached \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@ temporary \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@ data_cpy \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@ mpi_data_cpy + + +# missing support in simgrid +@STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@am__append_10 = \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ display_bindings \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ mpi_earlyrecv2 \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ mpi_earlyrecv2_sync \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ block_interface \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ block_interface_pinned \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ ndim_interface \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ insert_task_compute \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ insert_task_sent_cache \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ insert_task_recv_cache \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ insert_task_seq \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ tags_allocate \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ tags_checking \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ sync \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ coop \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ coop_datatype \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ coop_large \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ coop_many \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ coop_acknowledgement \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ coop_recv_not_yet_posted \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ coop_chained_sends \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ coop_wrong_order \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ coop_without_task \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ coop_user_defined_datatype \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ coop_recv_wait_finalize \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ coop_insert_task \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ coop_cache \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ mpi_task_submit + +@STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@@STARPU_USE_MPI_MPI_TRUE@am__append_11 = \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@@STARPU_USE_MPI_MPI_TRUE@ load_balancer + + +# Expected to fail +@STARPU_MPI_MINIMAL_TESTS_FALSE@am__append_12 = \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@ policy_register_toomany \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@ policy_unregister \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@ starpu_redefine + +@STARPU_USE_MPI_FT_TRUE@am__append_13 = \ +@STARPU_USE_MPI_FT_TRUE@ checkpoints + +XFAIL_TESTS = policy_register_toomany$(EXEEXT) \ + policy_unregister$(EXEEXT) starpu_redefine$(EXEEXT) \ + nothing$(EXEEXT) +@STARPU_USE_CUDA_TRUE@am__append_14 = ring_kernel.cu +@STARPU_USE_CUDA_TRUE@am__append_15 = ring_kernel.cu +@STARPU_USE_CUDA_TRUE@am__append_16 = ring_kernel.cu +@STARPU_USE_CUDA_TRUE@am__append_17 = ring_kernel.cu +@STARPU_USE_CUDA_TRUE@am__append_18 = ring_kernel.cu +@STARPU_USE_CUDA_TRUE@am__append_19 = ring_kernel.cu +@STARPU_USE_HIP_TRUE@am__append_20 = ring_kernel_hip.hip +@STARPU_USE_HIP_TRUE@am__append_21 = ring_kernel_hip.hip +@STARPU_USE_HIP_TRUE@am__append_22 = ring_kernel_hip.hip +@STARPU_USE_HIP_TRUE@am__append_23 = ring_kernel_hip.hip +@STARPU_USE_HIP_TRUE@am__append_24 = ring_kernel_hip.hip +@STARPU_USE_HIP_TRUE@am__append_25 = ring_kernel_hip.hip +subdir = mpi/tests +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ + $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ + $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ + $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/src/common/config.h \ + $(top_builddir)/src/common/config-src-build.h \ + $(top_builddir)/include/starpu_config.h \ + $(top_builddir)/starpurm/include/starpurm_config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +@STARPU_SIMGRID_FALSE@am__EXEEXT_1 = attr$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ ndim_interface$(EXEEXT) +@STARPU_MPI_MINIMAL_TESTS_FALSE@am__EXEEXT_2 = broadcast$(EXEEXT) \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@ early_request$(EXEEXT) \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@ gather$(EXEEXT) \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@ gather2$(EXEEXT) \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@ insert_task$(EXEEXT) \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@ insert_task_count$(EXEEXT) \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@ insert_task_dyn_handles$(EXEEXT) \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@ insert_task_node_choice$(EXEEXT) \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@ insert_task_owner$(EXEEXT) \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@ insert_task_owner2$(EXEEXT) \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@ insert_task_owner_data$(EXEEXT) \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@ matrix$(EXEEXT) \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@ matrix2$(EXEEXT) \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@ mpi_barrier$(EXEEXT) \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@ mpi_detached_tag$(EXEEXT) \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@ mpi_earlyrecv$(EXEEXT) \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@ mpi_irecv$(EXEEXT) \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@ mpi_irecv_detached$(EXEEXT) \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@ mpi_isend$(EXEEXT) \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@ mpi_isend_detached$(EXEEXT) \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@ mpi_reduction$(EXEEXT) \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@ mpi_redux$(EXEEXT) \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@ mpi_scatter_gather$(EXEEXT) \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@ mpi_test$(EXEEXT) \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@ pingpong$(EXEEXT) \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@ policy_selection2$(EXEEXT) \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@ ring$(EXEEXT) \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@ ring_async$(EXEEXT) \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@ ring_async_implicit$(EXEEXT) \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@ ring_sync$(EXEEXT) \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@ ring_sync_detached$(EXEEXT) \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@ temporary$(EXEEXT) \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@ data_cpy$(EXEEXT) \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@ mpi_data_cpy$(EXEEXT) +@STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@am__EXEEXT_3 = display_bindings$(EXEEXT) \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ mpi_earlyrecv2$(EXEEXT) \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ mpi_earlyrecv2_sync$(EXEEXT) \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ block_interface$(EXEEXT) \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ block_interface_pinned$(EXEEXT) \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ ndim_interface$(EXEEXT) \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ insert_task_compute$(EXEEXT) \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ insert_task_sent_cache$(EXEEXT) \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ insert_task_recv_cache$(EXEEXT) \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ insert_task_seq$(EXEEXT) \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ tags_allocate$(EXEEXT) \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ tags_checking$(EXEEXT) \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ sync$(EXEEXT) \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ coop$(EXEEXT) \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ coop_datatype$(EXEEXT) \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ coop_large$(EXEEXT) \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ coop_many$(EXEEXT) \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ coop_acknowledgement$(EXEEXT) \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ coop_recv_not_yet_posted$(EXEEXT) \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ coop_chained_sends$(EXEEXT) \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ coop_wrong_order$(EXEEXT) \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ coop_without_task$(EXEEXT) \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ coop_user_defined_datatype$(EXEEXT) \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ coop_recv_wait_finalize$(EXEEXT) \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ coop_insert_task$(EXEEXT) \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ coop_cache$(EXEEXT) \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ mpi_task_submit$(EXEEXT) +@STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@@STARPU_USE_MPI_MPI_TRUE@am__EXEEXT_4 = load_balancer$(EXEEXT) +@STARPU_MPI_MINIMAL_TESTS_FALSE@am__EXEEXT_5 = policy_register_toomany$(EXEEXT) \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@ policy_unregister$(EXEEXT) \ +@STARPU_MPI_MINIMAL_TESTS_FALSE@ starpu_redefine$(EXEEXT) +am__EXEEXT_6 = callback$(EXEEXT) driver$(EXEEXT) early_stuff$(EXEEXT) \ + insert_task_block$(EXEEXT) insert_task_can_execute$(EXEEXT) \ + insert_task_tags$(EXEEXT) multiple_send$(EXEEXT) \ + policy_register$(EXEEXT) policy_register_many$(EXEEXT) \ + policy_selection$(EXEEXT) star$(EXEEXT) stats$(EXEEXT) \ + user_defined_datatype$(EXEEXT) wait_for_all$(EXEEXT) \ + $(am__EXEEXT_1) $(am__EXEEXT_2) $(am__EXEEXT_3) \ + $(am__EXEEXT_4) $(am__EXEEXT_5) +am__installdirs = "$(DESTDIR)$(examplebindir)" +@STARPU_HAVE_WINDOWS_FALSE@am__EXEEXT_7 = loader$(EXEEXT) +@STARPU_USE_MPI_FT_TRUE@am__EXEEXT_8 = checkpoints$(EXEEXT) +PROGRAMS = $(examplebin_PROGRAMS) $(noinst_PROGRAMS) +attr_SOURCES = attr.c +attr_OBJECTS = attr.$(OBJEXT) +attr_LDADD = $(LDADD) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +block_interface_SOURCES = block_interface.c +block_interface_OBJECTS = block_interface.$(OBJEXT) +block_interface_LDADD = $(LDADD) +block_interface_pinned_SOURCES = block_interface_pinned.c +block_interface_pinned_OBJECTS = block_interface_pinned.$(OBJEXT) +block_interface_pinned_LDADD = $(LDADD) +broadcast_SOURCES = broadcast.c +broadcast_OBJECTS = broadcast.$(OBJEXT) +broadcast_LDADD = $(LDADD) +callback_SOURCES = callback.c +callback_OBJECTS = callback.$(OBJEXT) +callback_LDADD = $(LDADD) +checkpoints_SOURCES = checkpoints.c +checkpoints_OBJECTS = checkpoints.$(OBJEXT) +checkpoints_LDADD = $(LDADD) +coop_SOURCES = coop.c +coop_OBJECTS = coop.$(OBJEXT) +coop_LDADD = $(LDADD) +coop_acknowledgement_SOURCES = coop_acknowledgement.c +coop_acknowledgement_OBJECTS = coop_acknowledgement.$(OBJEXT) +coop_acknowledgement_LDADD = $(LDADD) +coop_cache_SOURCES = coop_cache.c +coop_cache_OBJECTS = coop_cache.$(OBJEXT) +coop_cache_LDADD = $(LDADD) +coop_chained_sends_SOURCES = coop_chained_sends.c +coop_chained_sends_OBJECTS = coop_chained_sends.$(OBJEXT) +coop_chained_sends_LDADD = $(LDADD) +coop_datatype_SOURCES = coop_datatype.c +coop_datatype_OBJECTS = coop_datatype.$(OBJEXT) +coop_datatype_LDADD = $(LDADD) +coop_insert_task_SOURCES = coop_insert_task.c +coop_insert_task_OBJECTS = coop_insert_task.$(OBJEXT) +coop_insert_task_LDADD = $(LDADD) +coop_large_SOURCES = coop_large.c +coop_large_OBJECTS = coop_large.$(OBJEXT) +coop_large_LDADD = $(LDADD) +coop_many_SOURCES = coop_many.c +coop_many_OBJECTS = coop_many.$(OBJEXT) +coop_many_LDADD = $(LDADD) +coop_recv_not_yet_posted_SOURCES = coop_recv_not_yet_posted.c +coop_recv_not_yet_posted_OBJECTS = coop_recv_not_yet_posted.$(OBJEXT) +coop_recv_not_yet_posted_LDADD = $(LDADD) +coop_recv_wait_finalize_SOURCES = coop_recv_wait_finalize.c +coop_recv_wait_finalize_OBJECTS = coop_recv_wait_finalize.$(OBJEXT) +coop_recv_wait_finalize_LDADD = $(LDADD) +am__dirstamp = $(am__leading_dot)dirstamp +am_coop_user_defined_datatype_OBJECTS = \ + coop_user_defined_datatype.$(OBJEXT) \ + ../../examples/interface/complex_interface.$(OBJEXT) +coop_user_defined_datatype_OBJECTS = \ + $(am_coop_user_defined_datatype_OBJECTS) +coop_user_defined_datatype_LDADD = $(LDADD) +coop_without_task_SOURCES = coop_without_task.c +coop_without_task_OBJECTS = coop_without_task.$(OBJEXT) +coop_without_task_LDADD = $(LDADD) +coop_wrong_order_SOURCES = coop_wrong_order.c +coop_wrong_order_OBJECTS = coop_wrong_order.$(OBJEXT) +coop_wrong_order_LDADD = $(LDADD) +data_cpy_SOURCES = data_cpy.c +data_cpy_OBJECTS = data_cpy.$(OBJEXT) +data_cpy_LDADD = $(LDADD) +datatypes_SOURCES = datatypes.c +datatypes_OBJECTS = datatypes.$(OBJEXT) +datatypes_LDADD = $(LDADD) +display_bindings_SOURCES = display_bindings.c +display_bindings_OBJECTS = display_bindings.$(OBJEXT) +display_bindings_LDADD = $(LDADD) +driver_SOURCES = driver.c +driver_OBJECTS = driver.$(OBJEXT) +driver_LDADD = $(LDADD) +early_request_SOURCES = early_request.c +early_request_OBJECTS = early_request.$(OBJEXT) +early_request_LDADD = $(LDADD) +early_stuff_SOURCES = early_stuff.c +early_stuff_OBJECTS = early_stuff-early_stuff.$(OBJEXT) +early_stuff_LDADD = $(LDADD) +early_stuff_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(early_stuff_CFLAGS) \ + $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ +gather_SOURCES = gather.c +gather_OBJECTS = gather.$(OBJEXT) +gather_LDADD = $(LDADD) +gather2_SOURCES = gather2.c +gather2_OBJECTS = gather2.$(OBJEXT) +gather2_LDADD = $(LDADD) +insert_task_SOURCES = insert_task.c +insert_task_OBJECTS = insert_task.$(OBJEXT) +insert_task_LDADD = $(LDADD) +insert_task_block_SOURCES = insert_task_block.c +insert_task_block_OBJECTS = insert_task_block.$(OBJEXT) +insert_task_block_LDADD = $(LDADD) +insert_task_can_execute_SOURCES = insert_task_can_execute.c +insert_task_can_execute_OBJECTS = insert_task_can_execute.$(OBJEXT) +insert_task_can_execute_LDADD = $(LDADD) +insert_task_compute_SOURCES = insert_task_compute.c +insert_task_compute_OBJECTS = insert_task_compute.$(OBJEXT) +insert_task_compute_LDADD = $(LDADD) +am__insert_task_count_SOURCES_DIST = insert_task_count.c \ + ring_kernel.cu ring_kernel_hip.hip +@STARPU_USE_CUDA_TRUE@am__objects_1 = ring_kernel.$(OBJEXT) +@STARPU_USE_HIP_TRUE@am__objects_2 = ring_kernel_hip.$(OBJEXT) +am_insert_task_count_OBJECTS = insert_task_count.$(OBJEXT) \ + $(am__objects_1) $(am__objects_2) +insert_task_count_OBJECTS = $(am_insert_task_count_OBJECTS) +insert_task_count_LDADD = $(LDADD) +insert_task_dyn_handles_SOURCES = insert_task_dyn_handles.c +insert_task_dyn_handles_OBJECTS = insert_task_dyn_handles.$(OBJEXT) +insert_task_dyn_handles_LDADD = $(LDADD) +insert_task_node_choice_SOURCES = insert_task_node_choice.c +insert_task_node_choice_OBJECTS = insert_task_node_choice.$(OBJEXT) +insert_task_node_choice_LDADD = $(LDADD) +insert_task_owner_SOURCES = insert_task_owner.c +insert_task_owner_OBJECTS = insert_task_owner.$(OBJEXT) +insert_task_owner_LDADD = $(LDADD) +insert_task_owner2_SOURCES = insert_task_owner2.c +insert_task_owner2_OBJECTS = insert_task_owner2.$(OBJEXT) +insert_task_owner2_LDADD = $(LDADD) +insert_task_owner_data_SOURCES = insert_task_owner_data.c +insert_task_owner_data_OBJECTS = insert_task_owner_data.$(OBJEXT) +insert_task_owner_data_LDADD = $(LDADD) +insert_task_recv_cache_SOURCES = insert_task_recv_cache.c +insert_task_recv_cache_OBJECTS = insert_task_recv_cache.$(OBJEXT) +insert_task_recv_cache_LDADD = $(LDADD) +insert_task_sent_cache_SOURCES = insert_task_sent_cache.c +insert_task_sent_cache_OBJECTS = insert_task_sent_cache.$(OBJEXT) +insert_task_sent_cache_LDADD = $(LDADD) +insert_task_seq_SOURCES = insert_task_seq.c +insert_task_seq_OBJECTS = insert_task_seq.$(OBJEXT) +insert_task_seq_LDADD = $(LDADD) +insert_task_tags_SOURCES = insert_task_tags.c +insert_task_tags_OBJECTS = insert_task_tags.$(OBJEXT) +insert_task_tags_LDADD = $(LDADD) +load_balancer_SOURCES = load_balancer.c +load_balancer_OBJECTS = load_balancer.$(OBJEXT) +load_balancer_LDADD = $(LDADD) +loader_SOURCES = loader.c +loader_OBJECTS = loader-loader.$(OBJEXT) +loader_LDADD = $(LDADD) +matrix_SOURCES = matrix.c +matrix_OBJECTS = matrix.$(OBJEXT) +matrix_LDADD = $(LDADD) +matrix2_SOURCES = matrix2.c +matrix2_OBJECTS = matrix2.$(OBJEXT) +matrix2_LDADD = $(LDADD) +mpi_barrier_SOURCES = mpi_barrier.c +mpi_barrier_OBJECTS = mpi_barrier.$(OBJEXT) +mpi_barrier_LDADD = $(LDADD) +mpi_data_cpy_SOURCES = mpi_data_cpy.c +mpi_data_cpy_OBJECTS = mpi_data_cpy.$(OBJEXT) +mpi_data_cpy_LDADD = $(LDADD) +mpi_detached_tag_SOURCES = mpi_detached_tag.c +mpi_detached_tag_OBJECTS = mpi_detached_tag.$(OBJEXT) +mpi_detached_tag_LDADD = $(LDADD) +mpi_earlyrecv_SOURCES = mpi_earlyrecv.c +mpi_earlyrecv_OBJECTS = mpi_earlyrecv.$(OBJEXT) +mpi_earlyrecv_LDADD = $(LDADD) +am_mpi_earlyrecv2_OBJECTS = mpi_earlyrecv2.$(OBJEXT) \ + ../../examples/interface/complex_interface.$(OBJEXT) +mpi_earlyrecv2_OBJECTS = $(am_mpi_earlyrecv2_OBJECTS) +mpi_earlyrecv2_LDADD = $(LDADD) +am_mpi_earlyrecv2_sync_OBJECTS = mpi_earlyrecv2_sync.$(OBJEXT) \ + ../../examples/interface/complex_interface.$(OBJEXT) +mpi_earlyrecv2_sync_OBJECTS = $(am_mpi_earlyrecv2_sync_OBJECTS) +mpi_earlyrecv2_sync_LDADD = $(LDADD) +mpi_irecv_SOURCES = mpi_irecv.c +mpi_irecv_OBJECTS = mpi_irecv.$(OBJEXT) +mpi_irecv_LDADD = $(LDADD) +mpi_irecv_detached_SOURCES = mpi_irecv_detached.c +mpi_irecv_detached_OBJECTS = mpi_irecv_detached.$(OBJEXT) +mpi_irecv_detached_LDADD = $(LDADD) +mpi_isend_SOURCES = mpi_isend.c +mpi_isend_OBJECTS = mpi_isend.$(OBJEXT) +mpi_isend_LDADD = $(LDADD) +mpi_isend_detached_SOURCES = mpi_isend_detached.c +mpi_isend_detached_OBJECTS = mpi_isend_detached.$(OBJEXT) +mpi_isend_detached_LDADD = $(LDADD) +am_mpi_reduction_OBJECTS = mpi_reduction.$(OBJEXT) \ + mpi_reduction_kernels.$(OBJEXT) +mpi_reduction_OBJECTS = $(am_mpi_reduction_OBJECTS) +mpi_reduction_LDADD = $(LDADD) +mpi_redux_SOURCES = mpi_redux.c +mpi_redux_OBJECTS = mpi_redux.$(OBJEXT) +mpi_redux_LDADD = $(LDADD) +mpi_scatter_gather_SOURCES = mpi_scatter_gather.c +mpi_scatter_gather_OBJECTS = mpi_scatter_gather.$(OBJEXT) +mpi_scatter_gather_LDADD = $(LDADD) +mpi_task_submit_SOURCES = mpi_task_submit.c +mpi_task_submit_OBJECTS = mpi_task_submit.$(OBJEXT) +mpi_task_submit_LDADD = $(LDADD) +mpi_test_SOURCES = mpi_test.c +mpi_test_OBJECTS = mpi_test.$(OBJEXT) +mpi_test_LDADD = $(LDADD) +multiple_send_SOURCES = multiple_send.c +multiple_send_OBJECTS = multiple_send.$(OBJEXT) +multiple_send_LDADD = $(LDADD) +ndim_interface_SOURCES = ndim_interface.c +ndim_interface_OBJECTS = ndim_interface.$(OBJEXT) +ndim_interface_LDADD = $(LDADD) +nothing_SOURCES = nothing.c +nothing_OBJECTS = nothing.$(OBJEXT) +nothing_LDADD = $(LDADD) +pingpong_SOURCES = pingpong.c +pingpong_OBJECTS = pingpong.$(OBJEXT) +pingpong_LDADD = $(LDADD) +policy_register_SOURCES = policy_register.c +policy_register_OBJECTS = policy_register.$(OBJEXT) +policy_register_LDADD = $(LDADD) +policy_register_many_SOURCES = policy_register_many.c +policy_register_many_OBJECTS = policy_register_many.$(OBJEXT) +policy_register_many_LDADD = $(LDADD) +policy_register_toomany_SOURCES = policy_register_toomany.c +policy_register_toomany_OBJECTS = policy_register_toomany.$(OBJEXT) +policy_register_toomany_LDADD = $(LDADD) +policy_selection_SOURCES = policy_selection.c +policy_selection_OBJECTS = policy_selection.$(OBJEXT) +policy_selection_LDADD = $(LDADD) +policy_selection2_SOURCES = policy_selection2.c +policy_selection2_OBJECTS = policy_selection2.$(OBJEXT) +policy_selection2_LDADD = $(LDADD) +policy_unregister_SOURCES = policy_unregister.c +policy_unregister_OBJECTS = policy_unregister.$(OBJEXT) +policy_unregister_LDADD = $(LDADD) +am__ring_SOURCES_DIST = ring.c ring_kernel.cu ring_kernel_hip.hip +am_ring_OBJECTS = ring.$(OBJEXT) $(am__objects_1) $(am__objects_2) +ring_OBJECTS = $(am_ring_OBJECTS) +ring_LDADD = $(LDADD) +am__ring_async_SOURCES_DIST = ring_async.c ring_kernel.cu \ + ring_kernel_hip.hip +am_ring_async_OBJECTS = ring_async.$(OBJEXT) $(am__objects_1) \ + $(am__objects_2) +ring_async_OBJECTS = $(am_ring_async_OBJECTS) +ring_async_LDADD = $(LDADD) +am__ring_async_implicit_SOURCES_DIST = ring_async_implicit.c \ + ring_kernel.cu ring_kernel_hip.hip +am_ring_async_implicit_OBJECTS = ring_async_implicit.$(OBJEXT) \ + $(am__objects_1) $(am__objects_2) +ring_async_implicit_OBJECTS = $(am_ring_async_implicit_OBJECTS) +ring_async_implicit_LDADD = $(LDADD) +am__ring_sync_SOURCES_DIST = ring_sync.c ring_kernel.cu \ + ring_kernel_hip.hip +am_ring_sync_OBJECTS = ring_sync.$(OBJEXT) $(am__objects_1) \ + $(am__objects_2) +ring_sync_OBJECTS = $(am_ring_sync_OBJECTS) +ring_sync_LDADD = $(LDADD) +am__ring_sync_detached_SOURCES_DIST = ring_sync_detached.c \ + ring_kernel.cu ring_kernel_hip.hip +am_ring_sync_detached_OBJECTS = ring_sync_detached.$(OBJEXT) \ + $(am__objects_1) $(am__objects_2) +ring_sync_detached_OBJECTS = $(am_ring_sync_detached_OBJECTS) +ring_sync_detached_LDADD = $(LDADD) +star_SOURCES = star.c +star_OBJECTS = star.$(OBJEXT) +star_LDADD = $(LDADD) +starpu_redefine_SOURCES = starpu_redefine.c +starpu_redefine_OBJECTS = starpu_redefine.$(OBJEXT) +starpu_redefine_LDADD = $(LDADD) +stats_SOURCES = stats.c +stats_OBJECTS = stats.$(OBJEXT) +stats_LDADD = $(LDADD) +sync_SOURCES = sync.c +sync_OBJECTS = sync.$(OBJEXT) +sync_LDADD = $(LDADD) +tags_allocate_SOURCES = tags_allocate.c +tags_allocate_OBJECTS = tags_allocate.$(OBJEXT) +tags_allocate_LDADD = $(LDADD) +tags_checking_SOURCES = tags_checking.c +tags_checking_OBJECTS = tags_checking.$(OBJEXT) +tags_checking_LDADD = $(LDADD) +temporary_SOURCES = temporary.c +temporary_OBJECTS = temporary.$(OBJEXT) +temporary_LDADD = $(LDADD) +am_user_defined_datatype_OBJECTS = user_defined_datatype.$(OBJEXT) \ + ../../examples/interface/complex_interface.$(OBJEXT) +user_defined_datatype_OBJECTS = $(am_user_defined_datatype_OBJECTS) +user_defined_datatype_LDADD = $(LDADD) +wait_for_all_SOURCES = wait_for_all.c +wait_for_all_OBJECTS = wait_for_all.$(OBJEXT) +wait_for_all_LDADD = $(LDADD) +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)/src/common -I$(top_builddir)/include -I$(top_builddir)/starpurm/include +depcomp = $(SHELL) $(top_srcdir)/build-aux/depcomp +am__maybe_remake_depfiles = depfiles +am__depfiles_remade = \ + ../../examples/interface/$(DEPDIR)/complex_interface.Po \ + ./$(DEPDIR)/attr.Po ./$(DEPDIR)/block_interface.Po \ + ./$(DEPDIR)/block_interface_pinned.Po ./$(DEPDIR)/broadcast.Po \ + ./$(DEPDIR)/callback.Po ./$(DEPDIR)/checkpoints.Po \ + ./$(DEPDIR)/coop.Po ./$(DEPDIR)/coop_acknowledgement.Po \ + ./$(DEPDIR)/coop_cache.Po ./$(DEPDIR)/coop_chained_sends.Po \ + ./$(DEPDIR)/coop_datatype.Po ./$(DEPDIR)/coop_insert_task.Po \ + ./$(DEPDIR)/coop_large.Po ./$(DEPDIR)/coop_many.Po \ + ./$(DEPDIR)/coop_recv_not_yet_posted.Po \ + ./$(DEPDIR)/coop_recv_wait_finalize.Po \ + ./$(DEPDIR)/coop_user_defined_datatype.Po \ + ./$(DEPDIR)/coop_without_task.Po \ + ./$(DEPDIR)/coop_wrong_order.Po ./$(DEPDIR)/data_cpy.Po \ + ./$(DEPDIR)/datatypes.Po ./$(DEPDIR)/display_bindings.Po \ + ./$(DEPDIR)/driver.Po ./$(DEPDIR)/early_request.Po \ + ./$(DEPDIR)/early_stuff-early_stuff.Po ./$(DEPDIR)/gather.Po \ + ./$(DEPDIR)/gather2.Po ./$(DEPDIR)/insert_task.Po \ + ./$(DEPDIR)/insert_task_block.Po \ + ./$(DEPDIR)/insert_task_can_execute.Po \ + ./$(DEPDIR)/insert_task_compute.Po \ + ./$(DEPDIR)/insert_task_count.Po \ + ./$(DEPDIR)/insert_task_dyn_handles.Po \ + ./$(DEPDIR)/insert_task_node_choice.Po \ + ./$(DEPDIR)/insert_task_owner.Po \ + ./$(DEPDIR)/insert_task_owner2.Po \ + ./$(DEPDIR)/insert_task_owner_data.Po \ + ./$(DEPDIR)/insert_task_recv_cache.Po \ + ./$(DEPDIR)/insert_task_sent_cache.Po \ + ./$(DEPDIR)/insert_task_seq.Po ./$(DEPDIR)/insert_task_tags.Po \ + ./$(DEPDIR)/load_balancer.Po ./$(DEPDIR)/loader-loader.Po \ + ./$(DEPDIR)/matrix.Po ./$(DEPDIR)/matrix2.Po \ + ./$(DEPDIR)/mpi_barrier.Po ./$(DEPDIR)/mpi_data_cpy.Po \ + ./$(DEPDIR)/mpi_detached_tag.Po ./$(DEPDIR)/mpi_earlyrecv.Po \ + ./$(DEPDIR)/mpi_earlyrecv2.Po \ + ./$(DEPDIR)/mpi_earlyrecv2_sync.Po ./$(DEPDIR)/mpi_irecv.Po \ + ./$(DEPDIR)/mpi_irecv_detached.Po ./$(DEPDIR)/mpi_isend.Po \ + ./$(DEPDIR)/mpi_isend_detached.Po ./$(DEPDIR)/mpi_reduction.Po \ + ./$(DEPDIR)/mpi_reduction_kernels.Po ./$(DEPDIR)/mpi_redux.Po \ + ./$(DEPDIR)/mpi_scatter_gather.Po \ + ./$(DEPDIR)/mpi_task_submit.Po ./$(DEPDIR)/mpi_test.Po \ + ./$(DEPDIR)/multiple_send.Po ./$(DEPDIR)/ndim_interface.Po \ + ./$(DEPDIR)/nothing.Po ./$(DEPDIR)/pingpong.Po \ + ./$(DEPDIR)/policy_register.Po \ + ./$(DEPDIR)/policy_register_many.Po \ + ./$(DEPDIR)/policy_register_toomany.Po \ + ./$(DEPDIR)/policy_selection.Po \ + ./$(DEPDIR)/policy_selection2.Po \ + ./$(DEPDIR)/policy_unregister.Po ./$(DEPDIR)/ring.Po \ + ./$(DEPDIR)/ring_async.Po ./$(DEPDIR)/ring_async_implicit.Po \ + ./$(DEPDIR)/ring_sync.Po ./$(DEPDIR)/ring_sync_detached.Po \ + ./$(DEPDIR)/star.Po ./$(DEPDIR)/starpu_redefine.Po \ + ./$(DEPDIR)/stats.Po ./$(DEPDIR)/sync.Po \ + ./$(DEPDIR)/tags_allocate.Po ./$(DEPDIR)/tags_checking.Po \ + ./$(DEPDIR)/temporary.Po ./$(DEPDIR)/user_defined_datatype.Po \ + ./$(DEPDIR)/wait_for_all.Po +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = attr.c block_interface.c block_interface_pinned.c \ + broadcast.c callback.c checkpoints.c coop.c \ + coop_acknowledgement.c coop_cache.c coop_chained_sends.c \ + coop_datatype.c coop_insert_task.c coop_large.c coop_many.c \ + coop_recv_not_yet_posted.c coop_recv_wait_finalize.c \ + $(coop_user_defined_datatype_SOURCES) coop_without_task.c \ + coop_wrong_order.c data_cpy.c datatypes.c display_bindings.c \ + driver.c early_request.c early_stuff.c gather.c gather2.c \ + insert_task.c insert_task_block.c insert_task_can_execute.c \ + insert_task_compute.c $(insert_task_count_SOURCES) \ + insert_task_dyn_handles.c insert_task_node_choice.c \ + insert_task_owner.c insert_task_owner2.c \ + insert_task_owner_data.c insert_task_recv_cache.c \ + insert_task_sent_cache.c insert_task_seq.c insert_task_tags.c \ + load_balancer.c loader.c matrix.c matrix2.c mpi_barrier.c \ + mpi_data_cpy.c mpi_detached_tag.c mpi_earlyrecv.c \ + $(mpi_earlyrecv2_SOURCES) $(mpi_earlyrecv2_sync_SOURCES) \ + mpi_irecv.c mpi_irecv_detached.c mpi_isend.c \ + mpi_isend_detached.c $(mpi_reduction_SOURCES) mpi_redux.c \ + mpi_scatter_gather.c mpi_task_submit.c mpi_test.c \ + multiple_send.c ndim_interface.c nothing.c pingpong.c \ + policy_register.c policy_register_many.c \ + policy_register_toomany.c policy_selection.c \ + policy_selection2.c policy_unregister.c $(ring_SOURCES) \ + $(ring_async_SOURCES) $(ring_async_implicit_SOURCES) \ + $(ring_sync_SOURCES) $(ring_sync_detached_SOURCES) star.c \ + starpu_redefine.c stats.c sync.c tags_allocate.c \ + tags_checking.c temporary.c $(user_defined_datatype_SOURCES) \ + wait_for_all.c +DIST_SOURCES = attr.c block_interface.c block_interface_pinned.c \ + broadcast.c callback.c checkpoints.c coop.c \ + coop_acknowledgement.c coop_cache.c coop_chained_sends.c \ + coop_datatype.c coop_insert_task.c coop_large.c coop_many.c \ + coop_recv_not_yet_posted.c coop_recv_wait_finalize.c \ + $(coop_user_defined_datatype_SOURCES) coop_without_task.c \ + coop_wrong_order.c data_cpy.c datatypes.c display_bindings.c \ + driver.c early_request.c early_stuff.c gather.c gather2.c \ + insert_task.c insert_task_block.c insert_task_can_execute.c \ + insert_task_compute.c $(am__insert_task_count_SOURCES_DIST) \ + insert_task_dyn_handles.c insert_task_node_choice.c \ + insert_task_owner.c insert_task_owner2.c \ + insert_task_owner_data.c insert_task_recv_cache.c \ + insert_task_sent_cache.c insert_task_seq.c insert_task_tags.c \ + load_balancer.c loader.c matrix.c matrix2.c mpi_barrier.c \ + mpi_data_cpy.c mpi_detached_tag.c mpi_earlyrecv.c \ + $(mpi_earlyrecv2_SOURCES) $(mpi_earlyrecv2_sync_SOURCES) \ + mpi_irecv.c mpi_irecv_detached.c mpi_isend.c \ + mpi_isend_detached.c $(mpi_reduction_SOURCES) mpi_redux.c \ + mpi_scatter_gather.c mpi_task_submit.c mpi_test.c \ + multiple_send.c ndim_interface.c nothing.c pingpong.c \ + policy_register.c policy_register_many.c \ + policy_register_toomany.c policy_selection.c \ + policy_selection2.c policy_unregister.c \ + $(am__ring_SOURCES_DIST) $(am__ring_async_SOURCES_DIST) \ + $(am__ring_async_implicit_SOURCES_DIST) \ + $(am__ring_sync_SOURCES_DIST) \ + $(am__ring_sync_detached_SOURCES_DIST) star.c \ + starpu_redefine.c stats.c sync.c tags_allocate.c \ + tags_checking.c temporary.c $(user_defined_datatype_SOURCES) \ + wait_for_all.c +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +am__tty_colors_dummy = \ + mgn= red= grn= lgn= blu= brg= std=; \ + am__color_tests=no +am__tty_colors = { \ + $(am__tty_colors_dummy); \ + if test "X$(AM_COLOR_TESTS)" = Xno; then \ + am__color_tests=no; \ + elif test "X$(AM_COLOR_TESTS)" = Xalways; then \ + am__color_tests=yes; \ + elif test "X$$TERM" != Xdumb && { test -t 1; } 2>/dev/null; then \ + am__color_tests=yes; \ + fi; \ + if test $$am__color_tests = yes; then \ + red=''; \ + grn=''; \ + lgn=''; \ + blu=''; \ + mgn=''; \ + brg=''; \ + std=''; \ + fi; \ +} +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__recheck_rx = ^[ ]*:recheck:[ ]* +am__global_test_result_rx = ^[ ]*:global-test-result:[ ]* +am__copy_in_global_log_rx = ^[ ]*:copy-in-global-log:[ ]* +# A command that, given a newline-separated list of test names on the +# standard input, print the name of the tests that are to be re-run +# upon "make recheck". +am__list_recheck_tests = $(AWK) '{ \ + recheck = 1; \ + while ((rc = (getline line < ($$0 ".trs"))) != 0) \ + { \ + if (rc < 0) \ + { \ + if ((getline line2 < ($$0 ".log")) < 0) \ + recheck = 0; \ + break; \ + } \ + else if (line ~ /$(am__recheck_rx)[nN][Oo]/) \ + { \ + recheck = 0; \ + break; \ + } \ + else if (line ~ /$(am__recheck_rx)[yY][eE][sS]/) \ + { \ + break; \ + } \ + }; \ + if (recheck) \ + print $$0; \ + close ($$0 ".trs"); \ + close ($$0 ".log"); \ +}' +# A command that, given a newline-separated list of test names on the +# standard input, create the global log from their .trs and .log files. +am__create_global_log = $(AWK) ' \ +function fatal(msg) \ +{ \ + print "fatal: making $@: " msg | "cat >&2"; \ + exit 1; \ +} \ +function rst_section(header) \ +{ \ + print header; \ + len = length(header); \ + for (i = 1; i <= len; i = i + 1) \ + printf "="; \ + printf "\n\n"; \ +} \ +{ \ + copy_in_global_log = 1; \ + global_test_result = "RUN"; \ + while ((rc = (getline line < ($$0 ".trs"))) != 0) \ + { \ + if (rc < 0) \ + fatal("failed to read from " $$0 ".trs"); \ + if (line ~ /$(am__global_test_result_rx)/) \ + { \ + sub("$(am__global_test_result_rx)", "", line); \ + sub("[ ]*$$", "", line); \ + global_test_result = line; \ + } \ + else if (line ~ /$(am__copy_in_global_log_rx)[nN][oO]/) \ + copy_in_global_log = 0; \ + }; \ + if (copy_in_global_log) \ + { \ + rst_section(global_test_result ": " $$0); \ + while ((rc = (getline line < ($$0 ".log"))) != 0) \ + { \ + if (rc < 0) \ + fatal("failed to read from " $$0 ".log"); \ + print line; \ + }; \ + printf "\n"; \ + }; \ + close ($$0 ".trs"); \ + close ($$0 ".log"); \ +}' +# Restructured Text title. +am__rst_title = { sed 's/.*/ & /;h;s/./=/g;p;x;s/ *$$//;p;g' && echo; } +# Solaris 10 'make', and several other traditional 'make' implementations, +# pass "-e" to $(SHELL), and POSIX 2008 even requires this. Work around it +# by disabling -e (using the XSI extension "set +e") if it's set. +am__sh_e_setup = case $$- in *e*) set +e;; esac +# Default flags passed to test drivers. +am__common_driver_flags = \ + --color-tests "$$am__color_tests" \ + --enable-hard-errors "$$am__enable_hard_errors" \ + --expect-failure "$$am__expect_failure" +# To be inserted before the command running the test. Creates the +# directory for the log if needed. Stores in $dir the directory +# containing $f, in $tst the test, in $log the log. Executes the +# developer- defined test setup AM_TESTS_ENVIRONMENT (if any), and +# passes TESTS_ENVIRONMENT. Set up options for the wrapper that +# will run the test scripts (or their associated LOG_COMPILER, if +# thy have one). +am__check_pre = \ +$(am__sh_e_setup); \ +$(am__vpath_adj_setup) $(am__vpath_adj) \ +$(am__tty_colors); \ +srcdir=$(srcdir); export srcdir; \ +case "$@" in \ + */*) am__odir=`echo "./$@" | sed 's|/[^/]*$$||'`;; \ + *) am__odir=.;; \ +esac; \ +test "x$$am__odir" = x"." || test -d "$$am__odir" \ + || $(MKDIR_P) "$$am__odir" || exit $$?; \ +if test -f "./$$f"; then dir=./; \ +elif test -f "$$f"; then dir=; \ +else dir="$(srcdir)/"; fi; \ +tst=$$dir$$f; log='$@'; \ +if test -n '$(DISABLE_HARD_ERRORS)'; then \ + am__enable_hard_errors=no; \ +else \ + am__enable_hard_errors=yes; \ +fi; \ +case " $(XFAIL_TESTS) " in \ + *[\ \ ]$$f[\ \ ]* | *[\ \ ]$$dir$$f[\ \ ]*) \ + am__expect_failure=yes;; \ + *) \ + am__expect_failure=no;; \ +esac; \ +$(AM_TESTS_ENVIRONMENT) $(TESTS_ENVIRONMENT) +# A shell command to get the names of the tests scripts with any registered +# extension removed (i.e., equivalently, the names of the test logs, with +# the '.log' extension removed). The result is saved in the shell variable +# '$bases'. This honors runtime overriding of TESTS and TEST_LOGS. Sadly, +# we cannot use something simpler, involving e.g., "$(TEST_LOGS:.log=)", +# since that might cause problem with VPATH rewrites for suffix-less tests. +# See also 'test-harness-vpath-rewrite.sh' and 'test-trs-basic.sh'. +am__set_TESTS_bases = \ + bases='$(TEST_LOGS)'; \ + bases=`for i in $$bases; do echo $$i; done | sed 's/\.log$$//'`; \ + bases=`echo $$bases` +AM_TESTSUITE_SUMMARY_HEADER = ' for $(PACKAGE_STRING)' +RECHECK_LOGS = $(TEST_LOGS) +AM_RECURSIVE_TARGETS = check recheck +TEST_SUITE_LOG = test-suite.log +TEST_EXTENSIONS = @EXEEXT@ .test +LOG_DRIVER = $(SHELL) $(top_srcdir)/build-aux/test-driver +LOG_COMPILE = $(LOG_COMPILER) $(AM_LOG_FLAGS) $(LOG_FLAGS) +am__set_b = \ + case '$@' in \ + */*) \ + case '$*' in \ + */*) b='$*';; \ + *) b=`echo '$@' | sed 's/\.log$$//'`; \ + esac;; \ + *) \ + b='$*';; \ + esac +am__test_logs1 = $(TESTS:=.log) +am__test_logs2 = $(am__test_logs1:@EXEEXT@.log=.log) +TEST_LOGS = $(am__test_logs2:.test.log=.log) +TEST_LOG_DRIVER = $(SHELL) $(top_srcdir)/build-aux/test-driver +TEST_LOG_COMPILE = $(TEST_LOG_COMPILER) $(AM_TEST_LOG_FLAGS) \ + $(TEST_LOG_FLAGS) +am__DIST_COMMON = $(srcdir)/Makefile.in \ + $(top_srcdir)/build-aux/depcomp \ + $(top_srcdir)/build-aux/test-driver \ + $(top_srcdir)/make/starpu-loader.mk \ + $(top_srcdir)/make/starpu-tests.mk \ + $(top_srcdir)/make/starpu.mk +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +pkglibdir = @pkglibdir@ +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +APP_CFLAGS = @APP_CFLAGS@ +APP_CXXFLAGS = @APP_CXXFLAGS@ +APP_FCFLAGS = @APP_FCFLAGS@ +APP_FFLAGS = @APP_FFLAGS@ +AR = @AR@ +AS = @AS@ +ATLASDIR = @ATLASDIR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BLAS_LIB = @BLAS_LIB@ +BLAS_LIBS = @BLAS_LIBS@ +BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ +BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ +CC = $(MPICC) +CCDEPMODE = @CCDEPMODE@ +CC_OR_MPICC = @CC_OR_MPICC@ +CC_OR_NVCC = @CC_OR_NVCC@ +CFLAGS = @CFLAGS@ +COVERAGE = @COVERAGE@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CSCOPE = @CSCOPE@ +CTAGS = @CTAGS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DGELS_LIBS = @DGELS_LIBS@ +DLB_CFLAGS = @DLB_CFLAGS@ +DLB_LIBS = @DLB_LIBS@ +DLLTOOL = @DLLTOOL@ +DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +ECLIPSE = @ECLIPSE@ +EGREP = @EGREP@ +ETAGS = @ETAGS@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FC = @FC@ +FCFLAGS = @FCFLAGS@ +FFLAGS = @FFLAGS@ +FFTWF_CFLAGS = @FFTWF_CFLAGS@ +FFTWF_LIBS = @FFTWF_LIBS@ +FFTWL_CFLAGS = @FFTWL_CFLAGS@ +FFTWL_LIBS = @FFTWL_LIBS@ +FFTW_CFLAGS = @FFTW_CFLAGS@ +FFTW_LIBS = @FFTW_LIBS@ +FGREP = @FGREP@ +FILECMD = @FILECMD@ +FXTDIR = @FXTDIR@ +FXT_CFLAGS = @FXT_CFLAGS@ +FXT_LDFLAGS = @FXT_LDFLAGS@ +FXT_LIBS = @FXT_LIBS@ +GDB = @GDB@ +GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ +GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ +GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ +GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ +GOTODIR = @GOTODIR@ +GREP = @GREP@ +HAVE_CXX11 = @HAVE_CXX11@ +HAVE_FFTWFL = @HAVE_FFTWFL@ +HELP2MAN = @HELP2MAN@ +HIPCC = @HIPCC@ +HIPCCFLAGS = @HIPCCFLAGS@ $(am__append_2) +HIPCONFIG = @HIPCONFIG@ +HWLOC_CFLAGS = @HWLOC_CFLAGS@ +HWLOC_LIBS = @HWLOC_LIBS@ +HWLOC_REQUIRES = @HWLOC_REQUIRES@ +ICC = @ICC@ +ICC_ARGS = @ICC_ARGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JULIA = @JULIA@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ $(STARPU_CUDA_LDFLAGS) $(STARPU_HIP_LDFLAGS) \ + $(top_builddir)/src/@LIBSTARPU_LINK@ \ + ../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la \ + $(STARPU_EXPORTED_LIBS) +LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ +LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ +LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ +LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ +LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ +LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ +LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ +LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ +LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ +LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ +LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ +LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ +LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ +LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ +LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ +LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ +LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ +LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ +LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ +LIBSTARPU_LINK = @LIBSTARPU_LINK@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAGMA_CFLAGS = @MAGMA_CFLAGS@ +MAGMA_LIBS = @MAGMA_LIBS@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPICC_LDFLAGS = @MPICC_LDFLAGS@ +MPICXX = @MPICXX@ +MPIEXEC = @MPIEXEC@ +MPIEXEC_ARGS = @MPIEXEC_ARGS@ +MPIFORT = @MPIFORT@ +MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ +MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ +NM = @NM@ +NMAD_CFLAGS = @NMAD_CFLAGS@ +NMAD_LIBS = @NMAD_LIBS@ +NMEDIT = @NMEDIT@ +NVCC = @NVCC@ +NVCCFLAGS = @NVCCFLAGS@ $(am__append_1) +NVCC_CC = @NVCC_CC@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ +OPENBLAS_LIBS = @OPENBLAS_LIBS@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PAPI_CFLAGS = @PAPI_CFLAGS@ +PAPI_LIBS = @PAPI_LIBS@ +PARALLEL = @PARALLEL@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PKG_CONFIG = @PKG_CONFIG@ +POTI_CFLAGS = @POTI_CFLAGS@ +POTI_LIBS = @POTI_LIBS@ +PROG_CLANG = @PROG_CLANG@ +PROG_DATE = @PROG_DATE@ +PROG_FIND = @PROG_FIND@ +PROG_STAT = @PROG_STAT@ +PYTHON = @PYTHON@ +PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ +PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ +PYTHON_VERSION = @PYTHON_VERSION@ +RANLIB = @RANLIB@ +REALBASH = @REALBASH@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ +SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ +SIMGRID_LIBS = @SIMGRID_LIBS@ +SIMGRID_MC = @SIMGRID_MC@ +SLIC_CONFIG = @SLIC_CONFIG@ +SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ +SOCL_VENDORS = @SOCL_VENDORS@ +STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ +STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ +STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ +STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ +STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ +STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ +STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ +STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ +STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ +STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ +STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ +STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ +STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ +STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ +STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ +STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ +STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ +STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ +STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ +STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ +STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ +STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ +STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ +STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ +STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ +STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ +STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ +STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ +STARPU_LIB_PATH = @STARPU_LIB_PATH@ +STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ +STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ +STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ +STARPU_MS_LIB = @STARPU_MS_LIB@ +STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ +STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ +STARPU_OPENBLAS = @STARPU_OPENBLAS@ +STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ +STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ +STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ +STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ +STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ +STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ +STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ +STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ +STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ +STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ +STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ +STARPU_SRC_DIR = @STARPU_SRC_DIR@ +STARPU_USE_CPU = @STARPU_USE_CPU@ +STARPU_USE_CUDA = @STARPU_USE_CUDA@ +STARPU_USE_FXT = @STARPU_USE_FXT@ +STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ +STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ +STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ +STRIP = @STRIP@ +VERSION = @VERSION@ +XMKMF = @XMKMF@ +X_CFLAGS = @X_CFLAGS@ +X_EXTRA_LIBS = @X_EXTRA_LIBS@ +X_LIBS = @X_LIBS@ +X_PRE_LIBS = @X_PRE_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_ct_F77 = @ac_ct_F77@ +ac_ct_FC = @ac_ct_FC@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +doxygencommand = @doxygencommand@ +dvidir = @dvidir@ +eclipsepath = @eclipsepath@ +epstopdfcommand = @epstopdfcommand@ +exec_prefix = @exec_prefix@ +gitcommand = @gitcommand@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +hwloccalccommand = @hwloccalccommand@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +juliapath = @juliapath@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +mpicc_path = @mpicc_path@ +mpicxx_path = @mpicxx_path@ +mpiexec_path = @mpiexec_path@ +mpifort_path = @mpifort_path@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +pdflatexcommand = @pdflatexcommand@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +runstatedir = @runstatedir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target = @target@ +target_alias = @target_alias@ +target_cpu = @target_cpu@ +target_os = @target_os@ +target_vendor = @target_vendor@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +LAUNCHER_ENV = $(MPI_RUN_ENV) +LAUNCHER = $(STARPU_MPIEXEC) +AM_CFLAGS = $(GLOBAL_AM_CFLAGS) $(APP_CFLAGS) +AM_CXXFLAGS = $(GLOBAL_AM_CXXFLAGS) $(APP_CXXFLAGS) +AM_FFLAGS = $(GLOBAL_AM_FFLAGS) $(APP_FFLAGS) +AM_FCFLAGS = $(GLOBAL_AM_FCFLAGS) $(APP_FCFLAGS) +@STARPU_USE_CUDA_TRUE@V_nvcc_ = $(V_nvcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_USE_CUDA_TRUE@V_nvcc_0 = @echo " NVCC " $@; +@STARPU_USE_CUDA_TRUE@V_nvcc_1 = +@STARPU_USE_CUDA_TRUE@V_nvcc = $(V_nvcc_$(V)) + +# Avoid using nvcc when making a coverity build, nvcc produces millions of +# lines of code which we don't want to analyze. Instead, build dumb .o files +# containing empty functions. +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_ = $(V_mynvcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_0 = @echo " myNVCC " $@; +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_1 = +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc = $(V_mynvcc_$(V)) +@STARPU_USE_HIP_TRUE@V_hipcc_ = $(V_hipcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_USE_HIP_TRUE@V_hipcc_0 = @echo " HIPCC " $@; +@STARPU_USE_HIP_TRUE@V_hipcc_1 = +@STARPU_USE_HIP_TRUE@V_hipcc = $(V_hipcc_$(V)) +V_icc_ = $(V_icc_$(AM_DEFAULT_VERBOSITY)) +V_icc_0 = @echo " ICC " $@; +V_icc_1 = +V_icc = $(V_icc_$(V)) +V_ln_ = $(V_ln_$(AM_DEFAULT_VERBOSITY)) +V_ln_0 = @echo " LN " $@; +V_ln_1 = +V_ln = $(V_ln_$(V)) +V_help2man_ = $(V_help2man_$(AM_DEFAULT_VERBOSITY)) +V_help2man_0 = @echo " HELP2MAN" $@; +V_help2man_1 = +V_help2man = $(V_help2man_$(V)) +# These are always defined, both for starpu-mpi and for mpi-ms +# For MPI tests we don't want to oversubscribe the system +MPI_RUN_ENV = STARPU_WORKERS_GETBIND=0 STARPU_WORKERS_NOBIND=1 STARPU_NCPU=3 +@STARPU_SIMGRID_FALSE@STARPU_MPIEXEC = $(MPIEXEC) $(MPIEXEC_ARGS) -np $(STARPU_MPI_NP) +@STARPU_SIMGRID_TRUE@STARPU_MPIEXEC = $(abs_top_builddir)/tools/starpu_smpirun -np $(STARPU_MPI_NP) -platform $(abs_top_srcdir)/tools/perfmodels/cluster.xml -hostfile $(abs_top_srcdir)/tools/perfmodels/hostfile + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# Copyright (C) 2013-2013 Thibaut Lambert +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +SUFFIXES = .hip +CCLD = $(MPICC) + +# When GNU parallel is available and -j is passed to make, run tests through +# parallel, using a "starpu" semaphore. +# Also make test shell scripts run its tests through parallel, using a +# "substarpu" semaphore. This brings some overload, but only one level. +@HAVE_PARALLEL_TRUE@STARPU_SUB_PARALLEL = $(shell echo $(MAKEFLAGS) | sed -ne 's/.*-j\([0-9]\+\).*/parallel --semaphore --id substarpu --fg --fg-exit -j \1/p') +@STARPU_USE_MPI_MASTER_SLAVE_TRUE@MS_LAUNCHER = $(STARPU_MPIEXEC) +@STARPU_USE_TCPIP_MASTER_SLAVE_TRUE@MS_LAUNCHER = $(abs_top_builddir)/tools/starpu_tcpipexec -np 2 -nobind -ncpus 1 +@STARPU_HAVE_WINDOWS_FALSE@LOADER_BIN = $(LAUNCHER) $(LOADER) $(EXTERNAL) +@STARPU_HAVE_WINDOWS_TRUE@LOADER_BIN = $(LAUNCHER) $(EXTERNAL) +@STARPU_SIMGRID_TRUE@LOADER_BIN = $(LAUNCHER) +@STARPU_HAVE_WINDOWS_FALSE@loader_CPPFLAGS = $(AM_CPPFLAGS) -I$(top_builddir)/src/ +@STARPU_HAVE_AM111_FALSE@TESTS_ENVIRONMENT = $(LAUNCHER_ENV) top_builddir="$(abs_top_builddir)" top_srcdir="$(abs_top_srcdir)" $(LOADER_BIN) +@STARPU_HAVE_AM111_TRUE@TESTS_ENVIRONMENT = $(LAUNCHER_ENV) top_builddir="$(abs_top_builddir)" top_srcdir="$(abs_top_srcdir)" +@STARPU_HAVE_AM111_TRUE@LOG_COMPILER = $(LOADER_BIN) +AM_TESTS_FD_REDIRECT = 9>&2 +BUILT_SOURCES = +CLEANFILES = *.gcno *.gcda *.linkinfo starpu_idle_microsec.log +EXTRA_DIST = \ + helper.h \ + user_defined_datatype_value.h + +examplebindir = $(libdir)/starpu/examples/mpi +AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_builddir)/include -I$(top_srcdir)/mpi/include -I$(top_srcdir)/mpi/src -I$(top_srcdir)/src -I$(top_builddir)/src -I$(top_srcdir)/examples/ $(STARPU_H_CPPFLAGS) +AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@ + +######################## +# Unit testcases # +######################## +starpu_mpi_TESTS = callback driver early_stuff insert_task_block \ + insert_task_can_execute insert_task_tags multiple_send \ + policy_register policy_register_many policy_selection star \ + stats user_defined_datatype wait_for_all $(am__append_8) \ + $(am__append_9) $(am__append_10) $(am__append_11) \ + $(am__append_12) +ring_SOURCES = ring.c $(am__append_14) $(am__append_20) +ring_sync_SOURCES = ring_sync.c $(am__append_15) $(am__append_21) +ring_sync_detached_SOURCES = ring_sync_detached.c $(am__append_16) \ + $(am__append_22) +ring_async_SOURCES = ring_async.c $(am__append_17) $(am__append_23) +ring_async_implicit_SOURCES = ring_async_implicit.c $(am__append_18) \ + $(am__append_24) +insert_task_count_SOURCES = insert_task_count.c $(am__append_19) \ + $(am__append_25) +mpi_reduction_SOURCES = mpi_reduction.c mpi_reduction_kernels.c +user_defined_datatype_SOURCES = user_defined_datatype.c \ + ../../examples/interface/complex_interface.c +mpi_earlyrecv2_SOURCES = mpi_earlyrecv2.c \ + ../../examples/interface/complex_interface.c +mpi_earlyrecv2_sync_SOURCES = mpi_earlyrecv2_sync.c \ + ../../examples/interface/complex_interface.c +coop_user_defined_datatype_SOURCES = coop_user_defined_datatype.c \ + ../../examples/interface/complex_interface.c +early_stuff_CFLAGS = $(AM_CFLAGS) $(FXT_CFLAGS) +all: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) all-am + +.SUFFIXES: +.SUFFIXES: .hip .c .cu .cubin .lo .log .o .obj .test .test$(EXEEXT) .trs +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/make/starpu-tests.mk $(top_srcdir)/make/starpu.mk $(top_srcdir)/make/starpu-loader.mk $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign mpi/tests/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign mpi/tests/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; +$(top_srcdir)/make/starpu-tests.mk $(top_srcdir)/make/starpu.mk $(top_srcdir)/make/starpu-loader.mk $(am__empty): + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +clean-checkPROGRAMS: + @list='$(check_PROGRAMS)'; test -n "$$list" || exit 0; \ + echo " rm -f" $$list; \ + rm -f $$list || exit $$?; \ + test -n "$(EXEEXT)" || exit 0; \ + list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ + echo " rm -f" $$list; \ + rm -f $$list +install-examplebinPROGRAMS: $(examplebin_PROGRAMS) + @$(NORMAL_INSTALL) + @list='$(examplebin_PROGRAMS)'; test -n "$(examplebindir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(examplebindir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(examplebindir)" || exit 1; \ + fi; \ + for p in $$list; do echo "$$p $$p"; done | \ + sed 's/$(EXEEXT)$$//' | \ + while read p p1; do if test -f $$p \ + || test -f $$p1 \ + ; then echo "$$p"; echo "$$p"; else :; fi; \ + done | \ + sed -e 'p;s,.*/,,;n;h' \ + -e 's|.*|.|' \ + -e 'p;x;s,.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/' | \ + sed 'N;N;N;s,\n, ,g' | \ + $(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1 } \ + { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \ + if ($$2 == $$4) files[d] = files[d] " " $$1; \ + else { print "f", $$3 "/" $$4, $$1; } } \ + END { for (d in files) print "f", d, files[d] }' | \ + while read type dir files; do \ + if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \ + test -z "$$files" || { \ + echo " $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files '$(DESTDIR)$(examplebindir)$$dir'"; \ + $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files "$(DESTDIR)$(examplebindir)$$dir" || exit $$?; \ + } \ + ; done + +uninstall-examplebinPROGRAMS: + @$(NORMAL_UNINSTALL) + @list='$(examplebin_PROGRAMS)'; test -n "$(examplebindir)" || list=; \ + files=`for p in $$list; do echo "$$p"; done | \ + sed -e 'h;s,^.*/,,;s/$(EXEEXT)$$//;$(transform)' \ + -e 's/$$/$(EXEEXT)/' \ + `; \ + test -n "$$list" || exit 0; \ + echo " ( cd '$(DESTDIR)$(examplebindir)' && rm -f" $$files ")"; \ + cd "$(DESTDIR)$(examplebindir)" && rm -f $$files + +clean-examplebinPROGRAMS: + @list='$(examplebin_PROGRAMS)'; test -n "$$list" || exit 0; \ + echo " rm -f" $$list; \ + rm -f $$list || exit $$?; \ + test -n "$(EXEEXT)" || exit 0; \ + list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ + echo " rm -f" $$list; \ + rm -f $$list + +clean-noinstPROGRAMS: + @list='$(noinst_PROGRAMS)'; test -n "$$list" || exit 0; \ + echo " rm -f" $$list; \ + rm -f $$list || exit $$?; \ + test -n "$(EXEEXT)" || exit 0; \ + list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ + echo " rm -f" $$list; \ + rm -f $$list + +attr$(EXEEXT): $(attr_OBJECTS) $(attr_DEPENDENCIES) $(EXTRA_attr_DEPENDENCIES) + @rm -f attr$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(attr_OBJECTS) $(attr_LDADD) $(LIBS) + +block_interface$(EXEEXT): $(block_interface_OBJECTS) $(block_interface_DEPENDENCIES) $(EXTRA_block_interface_DEPENDENCIES) + @rm -f block_interface$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(block_interface_OBJECTS) $(block_interface_LDADD) $(LIBS) + +block_interface_pinned$(EXEEXT): $(block_interface_pinned_OBJECTS) $(block_interface_pinned_DEPENDENCIES) $(EXTRA_block_interface_pinned_DEPENDENCIES) + @rm -f block_interface_pinned$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(block_interface_pinned_OBJECTS) $(block_interface_pinned_LDADD) $(LIBS) + +broadcast$(EXEEXT): $(broadcast_OBJECTS) $(broadcast_DEPENDENCIES) $(EXTRA_broadcast_DEPENDENCIES) + @rm -f broadcast$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(broadcast_OBJECTS) $(broadcast_LDADD) $(LIBS) + +callback$(EXEEXT): $(callback_OBJECTS) $(callback_DEPENDENCIES) $(EXTRA_callback_DEPENDENCIES) + @rm -f callback$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(callback_OBJECTS) $(callback_LDADD) $(LIBS) + +checkpoints$(EXEEXT): $(checkpoints_OBJECTS) $(checkpoints_DEPENDENCIES) $(EXTRA_checkpoints_DEPENDENCIES) + @rm -f checkpoints$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(checkpoints_OBJECTS) $(checkpoints_LDADD) $(LIBS) + +coop$(EXEEXT): $(coop_OBJECTS) $(coop_DEPENDENCIES) $(EXTRA_coop_DEPENDENCIES) + @rm -f coop$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(coop_OBJECTS) $(coop_LDADD) $(LIBS) + +coop_acknowledgement$(EXEEXT): $(coop_acknowledgement_OBJECTS) $(coop_acknowledgement_DEPENDENCIES) $(EXTRA_coop_acknowledgement_DEPENDENCIES) + @rm -f coop_acknowledgement$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(coop_acknowledgement_OBJECTS) $(coop_acknowledgement_LDADD) $(LIBS) + +coop_cache$(EXEEXT): $(coop_cache_OBJECTS) $(coop_cache_DEPENDENCIES) $(EXTRA_coop_cache_DEPENDENCIES) + @rm -f coop_cache$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(coop_cache_OBJECTS) $(coop_cache_LDADD) $(LIBS) + +coop_chained_sends$(EXEEXT): $(coop_chained_sends_OBJECTS) $(coop_chained_sends_DEPENDENCIES) $(EXTRA_coop_chained_sends_DEPENDENCIES) + @rm -f coop_chained_sends$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(coop_chained_sends_OBJECTS) $(coop_chained_sends_LDADD) $(LIBS) + +coop_datatype$(EXEEXT): $(coop_datatype_OBJECTS) $(coop_datatype_DEPENDENCIES) $(EXTRA_coop_datatype_DEPENDENCIES) + @rm -f coop_datatype$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(coop_datatype_OBJECTS) $(coop_datatype_LDADD) $(LIBS) + +coop_insert_task$(EXEEXT): $(coop_insert_task_OBJECTS) $(coop_insert_task_DEPENDENCIES) $(EXTRA_coop_insert_task_DEPENDENCIES) + @rm -f coop_insert_task$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(coop_insert_task_OBJECTS) $(coop_insert_task_LDADD) $(LIBS) + +coop_large$(EXEEXT): $(coop_large_OBJECTS) $(coop_large_DEPENDENCIES) $(EXTRA_coop_large_DEPENDENCIES) + @rm -f coop_large$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(coop_large_OBJECTS) $(coop_large_LDADD) $(LIBS) + +coop_many$(EXEEXT): $(coop_many_OBJECTS) $(coop_many_DEPENDENCIES) $(EXTRA_coop_many_DEPENDENCIES) + @rm -f coop_many$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(coop_many_OBJECTS) $(coop_many_LDADD) $(LIBS) + +coop_recv_not_yet_posted$(EXEEXT): $(coop_recv_not_yet_posted_OBJECTS) $(coop_recv_not_yet_posted_DEPENDENCIES) $(EXTRA_coop_recv_not_yet_posted_DEPENDENCIES) + @rm -f coop_recv_not_yet_posted$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(coop_recv_not_yet_posted_OBJECTS) $(coop_recv_not_yet_posted_LDADD) $(LIBS) + +coop_recv_wait_finalize$(EXEEXT): $(coop_recv_wait_finalize_OBJECTS) $(coop_recv_wait_finalize_DEPENDENCIES) $(EXTRA_coop_recv_wait_finalize_DEPENDENCIES) + @rm -f coop_recv_wait_finalize$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(coop_recv_wait_finalize_OBJECTS) $(coop_recv_wait_finalize_LDADD) $(LIBS) +../../examples/interface/$(am__dirstamp): + @$(MKDIR_P) ../../examples/interface + @: > ../../examples/interface/$(am__dirstamp) +../../examples/interface/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) ../../examples/interface/$(DEPDIR) + @: > ../../examples/interface/$(DEPDIR)/$(am__dirstamp) +../../examples/interface/complex_interface.$(OBJEXT): \ + ../../examples/interface/$(am__dirstamp) \ + ../../examples/interface/$(DEPDIR)/$(am__dirstamp) + +coop_user_defined_datatype$(EXEEXT): $(coop_user_defined_datatype_OBJECTS) $(coop_user_defined_datatype_DEPENDENCIES) $(EXTRA_coop_user_defined_datatype_DEPENDENCIES) + @rm -f coop_user_defined_datatype$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(coop_user_defined_datatype_OBJECTS) $(coop_user_defined_datatype_LDADD) $(LIBS) + +coop_without_task$(EXEEXT): $(coop_without_task_OBJECTS) $(coop_without_task_DEPENDENCIES) $(EXTRA_coop_without_task_DEPENDENCIES) + @rm -f coop_without_task$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(coop_without_task_OBJECTS) $(coop_without_task_LDADD) $(LIBS) + +coop_wrong_order$(EXEEXT): $(coop_wrong_order_OBJECTS) $(coop_wrong_order_DEPENDENCIES) $(EXTRA_coop_wrong_order_DEPENDENCIES) + @rm -f coop_wrong_order$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(coop_wrong_order_OBJECTS) $(coop_wrong_order_LDADD) $(LIBS) + +data_cpy$(EXEEXT): $(data_cpy_OBJECTS) $(data_cpy_DEPENDENCIES) $(EXTRA_data_cpy_DEPENDENCIES) + @rm -f data_cpy$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(data_cpy_OBJECTS) $(data_cpy_LDADD) $(LIBS) + +datatypes$(EXEEXT): $(datatypes_OBJECTS) $(datatypes_DEPENDENCIES) $(EXTRA_datatypes_DEPENDENCIES) + @rm -f datatypes$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(datatypes_OBJECTS) $(datatypes_LDADD) $(LIBS) + +display_bindings$(EXEEXT): $(display_bindings_OBJECTS) $(display_bindings_DEPENDENCIES) $(EXTRA_display_bindings_DEPENDENCIES) + @rm -f display_bindings$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(display_bindings_OBJECTS) $(display_bindings_LDADD) $(LIBS) + +driver$(EXEEXT): $(driver_OBJECTS) $(driver_DEPENDENCIES) $(EXTRA_driver_DEPENDENCIES) + @rm -f driver$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(driver_OBJECTS) $(driver_LDADD) $(LIBS) + +early_request$(EXEEXT): $(early_request_OBJECTS) $(early_request_DEPENDENCIES) $(EXTRA_early_request_DEPENDENCIES) + @rm -f early_request$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(early_request_OBJECTS) $(early_request_LDADD) $(LIBS) + +early_stuff$(EXEEXT): $(early_stuff_OBJECTS) $(early_stuff_DEPENDENCIES) $(EXTRA_early_stuff_DEPENDENCIES) + @rm -f early_stuff$(EXEEXT) + $(AM_V_CCLD)$(early_stuff_LINK) $(early_stuff_OBJECTS) $(early_stuff_LDADD) $(LIBS) + +gather$(EXEEXT): $(gather_OBJECTS) $(gather_DEPENDENCIES) $(EXTRA_gather_DEPENDENCIES) + @rm -f gather$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(gather_OBJECTS) $(gather_LDADD) $(LIBS) + +gather2$(EXEEXT): $(gather2_OBJECTS) $(gather2_DEPENDENCIES) $(EXTRA_gather2_DEPENDENCIES) + @rm -f gather2$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(gather2_OBJECTS) $(gather2_LDADD) $(LIBS) + +insert_task$(EXEEXT): $(insert_task_OBJECTS) $(insert_task_DEPENDENCIES) $(EXTRA_insert_task_DEPENDENCIES) + @rm -f insert_task$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(insert_task_OBJECTS) $(insert_task_LDADD) $(LIBS) + +insert_task_block$(EXEEXT): $(insert_task_block_OBJECTS) $(insert_task_block_DEPENDENCIES) $(EXTRA_insert_task_block_DEPENDENCIES) + @rm -f insert_task_block$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(insert_task_block_OBJECTS) $(insert_task_block_LDADD) $(LIBS) + +insert_task_can_execute$(EXEEXT): $(insert_task_can_execute_OBJECTS) $(insert_task_can_execute_DEPENDENCIES) $(EXTRA_insert_task_can_execute_DEPENDENCIES) + @rm -f insert_task_can_execute$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(insert_task_can_execute_OBJECTS) $(insert_task_can_execute_LDADD) $(LIBS) + +insert_task_compute$(EXEEXT): $(insert_task_compute_OBJECTS) $(insert_task_compute_DEPENDENCIES) $(EXTRA_insert_task_compute_DEPENDENCIES) + @rm -f insert_task_compute$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(insert_task_compute_OBJECTS) $(insert_task_compute_LDADD) $(LIBS) + +insert_task_count$(EXEEXT): $(insert_task_count_OBJECTS) $(insert_task_count_DEPENDENCIES) $(EXTRA_insert_task_count_DEPENDENCIES) + @rm -f insert_task_count$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(insert_task_count_OBJECTS) $(insert_task_count_LDADD) $(LIBS) + +insert_task_dyn_handles$(EXEEXT): $(insert_task_dyn_handles_OBJECTS) $(insert_task_dyn_handles_DEPENDENCIES) $(EXTRA_insert_task_dyn_handles_DEPENDENCIES) + @rm -f insert_task_dyn_handles$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(insert_task_dyn_handles_OBJECTS) $(insert_task_dyn_handles_LDADD) $(LIBS) + +insert_task_node_choice$(EXEEXT): $(insert_task_node_choice_OBJECTS) $(insert_task_node_choice_DEPENDENCIES) $(EXTRA_insert_task_node_choice_DEPENDENCIES) + @rm -f insert_task_node_choice$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(insert_task_node_choice_OBJECTS) $(insert_task_node_choice_LDADD) $(LIBS) + +insert_task_owner$(EXEEXT): $(insert_task_owner_OBJECTS) $(insert_task_owner_DEPENDENCIES) $(EXTRA_insert_task_owner_DEPENDENCIES) + @rm -f insert_task_owner$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(insert_task_owner_OBJECTS) $(insert_task_owner_LDADD) $(LIBS) + +insert_task_owner2$(EXEEXT): $(insert_task_owner2_OBJECTS) $(insert_task_owner2_DEPENDENCIES) $(EXTRA_insert_task_owner2_DEPENDENCIES) + @rm -f insert_task_owner2$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(insert_task_owner2_OBJECTS) $(insert_task_owner2_LDADD) $(LIBS) + +insert_task_owner_data$(EXEEXT): $(insert_task_owner_data_OBJECTS) $(insert_task_owner_data_DEPENDENCIES) $(EXTRA_insert_task_owner_data_DEPENDENCIES) + @rm -f insert_task_owner_data$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(insert_task_owner_data_OBJECTS) $(insert_task_owner_data_LDADD) $(LIBS) + +insert_task_recv_cache$(EXEEXT): $(insert_task_recv_cache_OBJECTS) $(insert_task_recv_cache_DEPENDENCIES) $(EXTRA_insert_task_recv_cache_DEPENDENCIES) + @rm -f insert_task_recv_cache$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(insert_task_recv_cache_OBJECTS) $(insert_task_recv_cache_LDADD) $(LIBS) + +insert_task_sent_cache$(EXEEXT): $(insert_task_sent_cache_OBJECTS) $(insert_task_sent_cache_DEPENDENCIES) $(EXTRA_insert_task_sent_cache_DEPENDENCIES) + @rm -f insert_task_sent_cache$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(insert_task_sent_cache_OBJECTS) $(insert_task_sent_cache_LDADD) $(LIBS) + +insert_task_seq$(EXEEXT): $(insert_task_seq_OBJECTS) $(insert_task_seq_DEPENDENCIES) $(EXTRA_insert_task_seq_DEPENDENCIES) + @rm -f insert_task_seq$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(insert_task_seq_OBJECTS) $(insert_task_seq_LDADD) $(LIBS) + +insert_task_tags$(EXEEXT): $(insert_task_tags_OBJECTS) $(insert_task_tags_DEPENDENCIES) $(EXTRA_insert_task_tags_DEPENDENCIES) + @rm -f insert_task_tags$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(insert_task_tags_OBJECTS) $(insert_task_tags_LDADD) $(LIBS) + +load_balancer$(EXEEXT): $(load_balancer_OBJECTS) $(load_balancer_DEPENDENCIES) $(EXTRA_load_balancer_DEPENDENCIES) + @rm -f load_balancer$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(load_balancer_OBJECTS) $(load_balancer_LDADD) $(LIBS) + +loader$(EXEEXT): $(loader_OBJECTS) $(loader_DEPENDENCIES) $(EXTRA_loader_DEPENDENCIES) + @rm -f loader$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(loader_OBJECTS) $(loader_LDADD) $(LIBS) + +matrix$(EXEEXT): $(matrix_OBJECTS) $(matrix_DEPENDENCIES) $(EXTRA_matrix_DEPENDENCIES) + @rm -f matrix$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(matrix_OBJECTS) $(matrix_LDADD) $(LIBS) + +matrix2$(EXEEXT): $(matrix2_OBJECTS) $(matrix2_DEPENDENCIES) $(EXTRA_matrix2_DEPENDENCIES) + @rm -f matrix2$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(matrix2_OBJECTS) $(matrix2_LDADD) $(LIBS) + +mpi_barrier$(EXEEXT): $(mpi_barrier_OBJECTS) $(mpi_barrier_DEPENDENCIES) $(EXTRA_mpi_barrier_DEPENDENCIES) + @rm -f mpi_barrier$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(mpi_barrier_OBJECTS) $(mpi_barrier_LDADD) $(LIBS) + +mpi_data_cpy$(EXEEXT): $(mpi_data_cpy_OBJECTS) $(mpi_data_cpy_DEPENDENCIES) $(EXTRA_mpi_data_cpy_DEPENDENCIES) + @rm -f mpi_data_cpy$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(mpi_data_cpy_OBJECTS) $(mpi_data_cpy_LDADD) $(LIBS) + +mpi_detached_tag$(EXEEXT): $(mpi_detached_tag_OBJECTS) $(mpi_detached_tag_DEPENDENCIES) $(EXTRA_mpi_detached_tag_DEPENDENCIES) + @rm -f mpi_detached_tag$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(mpi_detached_tag_OBJECTS) $(mpi_detached_tag_LDADD) $(LIBS) + +mpi_earlyrecv$(EXEEXT): $(mpi_earlyrecv_OBJECTS) $(mpi_earlyrecv_DEPENDENCIES) $(EXTRA_mpi_earlyrecv_DEPENDENCIES) + @rm -f mpi_earlyrecv$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(mpi_earlyrecv_OBJECTS) $(mpi_earlyrecv_LDADD) $(LIBS) + +mpi_earlyrecv2$(EXEEXT): $(mpi_earlyrecv2_OBJECTS) $(mpi_earlyrecv2_DEPENDENCIES) $(EXTRA_mpi_earlyrecv2_DEPENDENCIES) + @rm -f mpi_earlyrecv2$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(mpi_earlyrecv2_OBJECTS) $(mpi_earlyrecv2_LDADD) $(LIBS) + +mpi_earlyrecv2_sync$(EXEEXT): $(mpi_earlyrecv2_sync_OBJECTS) $(mpi_earlyrecv2_sync_DEPENDENCIES) $(EXTRA_mpi_earlyrecv2_sync_DEPENDENCIES) + @rm -f mpi_earlyrecv2_sync$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(mpi_earlyrecv2_sync_OBJECTS) $(mpi_earlyrecv2_sync_LDADD) $(LIBS) + +mpi_irecv$(EXEEXT): $(mpi_irecv_OBJECTS) $(mpi_irecv_DEPENDENCIES) $(EXTRA_mpi_irecv_DEPENDENCIES) + @rm -f mpi_irecv$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(mpi_irecv_OBJECTS) $(mpi_irecv_LDADD) $(LIBS) + +mpi_irecv_detached$(EXEEXT): $(mpi_irecv_detached_OBJECTS) $(mpi_irecv_detached_DEPENDENCIES) $(EXTRA_mpi_irecv_detached_DEPENDENCIES) + @rm -f mpi_irecv_detached$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(mpi_irecv_detached_OBJECTS) $(mpi_irecv_detached_LDADD) $(LIBS) + +mpi_isend$(EXEEXT): $(mpi_isend_OBJECTS) $(mpi_isend_DEPENDENCIES) $(EXTRA_mpi_isend_DEPENDENCIES) + @rm -f mpi_isend$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(mpi_isend_OBJECTS) $(mpi_isend_LDADD) $(LIBS) + +mpi_isend_detached$(EXEEXT): $(mpi_isend_detached_OBJECTS) $(mpi_isend_detached_DEPENDENCIES) $(EXTRA_mpi_isend_detached_DEPENDENCIES) + @rm -f mpi_isend_detached$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(mpi_isend_detached_OBJECTS) $(mpi_isend_detached_LDADD) $(LIBS) + +mpi_reduction$(EXEEXT): $(mpi_reduction_OBJECTS) $(mpi_reduction_DEPENDENCIES) $(EXTRA_mpi_reduction_DEPENDENCIES) + @rm -f mpi_reduction$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(mpi_reduction_OBJECTS) $(mpi_reduction_LDADD) $(LIBS) + +mpi_redux$(EXEEXT): $(mpi_redux_OBJECTS) $(mpi_redux_DEPENDENCIES) $(EXTRA_mpi_redux_DEPENDENCIES) + @rm -f mpi_redux$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(mpi_redux_OBJECTS) $(mpi_redux_LDADD) $(LIBS) + +mpi_scatter_gather$(EXEEXT): $(mpi_scatter_gather_OBJECTS) $(mpi_scatter_gather_DEPENDENCIES) $(EXTRA_mpi_scatter_gather_DEPENDENCIES) + @rm -f mpi_scatter_gather$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(mpi_scatter_gather_OBJECTS) $(mpi_scatter_gather_LDADD) $(LIBS) + +mpi_task_submit$(EXEEXT): $(mpi_task_submit_OBJECTS) $(mpi_task_submit_DEPENDENCIES) $(EXTRA_mpi_task_submit_DEPENDENCIES) + @rm -f mpi_task_submit$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(mpi_task_submit_OBJECTS) $(mpi_task_submit_LDADD) $(LIBS) + +mpi_test$(EXEEXT): $(mpi_test_OBJECTS) $(mpi_test_DEPENDENCIES) $(EXTRA_mpi_test_DEPENDENCIES) + @rm -f mpi_test$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(mpi_test_OBJECTS) $(mpi_test_LDADD) $(LIBS) + +multiple_send$(EXEEXT): $(multiple_send_OBJECTS) $(multiple_send_DEPENDENCIES) $(EXTRA_multiple_send_DEPENDENCIES) + @rm -f multiple_send$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(multiple_send_OBJECTS) $(multiple_send_LDADD) $(LIBS) + +ndim_interface$(EXEEXT): $(ndim_interface_OBJECTS) $(ndim_interface_DEPENDENCIES) $(EXTRA_ndim_interface_DEPENDENCIES) + @rm -f ndim_interface$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(ndim_interface_OBJECTS) $(ndim_interface_LDADD) $(LIBS) + +nothing$(EXEEXT): $(nothing_OBJECTS) $(nothing_DEPENDENCIES) $(EXTRA_nothing_DEPENDENCIES) + @rm -f nothing$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(nothing_OBJECTS) $(nothing_LDADD) $(LIBS) + +pingpong$(EXEEXT): $(pingpong_OBJECTS) $(pingpong_DEPENDENCIES) $(EXTRA_pingpong_DEPENDENCIES) + @rm -f pingpong$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(pingpong_OBJECTS) $(pingpong_LDADD) $(LIBS) + +policy_register$(EXEEXT): $(policy_register_OBJECTS) $(policy_register_DEPENDENCIES) $(EXTRA_policy_register_DEPENDENCIES) + @rm -f policy_register$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(policy_register_OBJECTS) $(policy_register_LDADD) $(LIBS) + +policy_register_many$(EXEEXT): $(policy_register_many_OBJECTS) $(policy_register_many_DEPENDENCIES) $(EXTRA_policy_register_many_DEPENDENCIES) + @rm -f policy_register_many$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(policy_register_many_OBJECTS) $(policy_register_many_LDADD) $(LIBS) + +policy_register_toomany$(EXEEXT): $(policy_register_toomany_OBJECTS) $(policy_register_toomany_DEPENDENCIES) $(EXTRA_policy_register_toomany_DEPENDENCIES) + @rm -f policy_register_toomany$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(policy_register_toomany_OBJECTS) $(policy_register_toomany_LDADD) $(LIBS) + +policy_selection$(EXEEXT): $(policy_selection_OBJECTS) $(policy_selection_DEPENDENCIES) $(EXTRA_policy_selection_DEPENDENCIES) + @rm -f policy_selection$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(policy_selection_OBJECTS) $(policy_selection_LDADD) $(LIBS) + +policy_selection2$(EXEEXT): $(policy_selection2_OBJECTS) $(policy_selection2_DEPENDENCIES) $(EXTRA_policy_selection2_DEPENDENCIES) + @rm -f policy_selection2$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(policy_selection2_OBJECTS) $(policy_selection2_LDADD) $(LIBS) + +policy_unregister$(EXEEXT): $(policy_unregister_OBJECTS) $(policy_unregister_DEPENDENCIES) $(EXTRA_policy_unregister_DEPENDENCIES) + @rm -f policy_unregister$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(policy_unregister_OBJECTS) $(policy_unregister_LDADD) $(LIBS) + +ring$(EXEEXT): $(ring_OBJECTS) $(ring_DEPENDENCIES) $(EXTRA_ring_DEPENDENCIES) + @rm -f ring$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(ring_OBJECTS) $(ring_LDADD) $(LIBS) + +ring_async$(EXEEXT): $(ring_async_OBJECTS) $(ring_async_DEPENDENCIES) $(EXTRA_ring_async_DEPENDENCIES) + @rm -f ring_async$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(ring_async_OBJECTS) $(ring_async_LDADD) $(LIBS) + +ring_async_implicit$(EXEEXT): $(ring_async_implicit_OBJECTS) $(ring_async_implicit_DEPENDENCIES) $(EXTRA_ring_async_implicit_DEPENDENCIES) + @rm -f ring_async_implicit$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(ring_async_implicit_OBJECTS) $(ring_async_implicit_LDADD) $(LIBS) + +ring_sync$(EXEEXT): $(ring_sync_OBJECTS) $(ring_sync_DEPENDENCIES) $(EXTRA_ring_sync_DEPENDENCIES) + @rm -f ring_sync$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(ring_sync_OBJECTS) $(ring_sync_LDADD) $(LIBS) + +ring_sync_detached$(EXEEXT): $(ring_sync_detached_OBJECTS) $(ring_sync_detached_DEPENDENCIES) $(EXTRA_ring_sync_detached_DEPENDENCIES) + @rm -f ring_sync_detached$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(ring_sync_detached_OBJECTS) $(ring_sync_detached_LDADD) $(LIBS) + +star$(EXEEXT): $(star_OBJECTS) $(star_DEPENDENCIES) $(EXTRA_star_DEPENDENCIES) + @rm -f star$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(star_OBJECTS) $(star_LDADD) $(LIBS) + +starpu_redefine$(EXEEXT): $(starpu_redefine_OBJECTS) $(starpu_redefine_DEPENDENCIES) $(EXTRA_starpu_redefine_DEPENDENCIES) + @rm -f starpu_redefine$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(starpu_redefine_OBJECTS) $(starpu_redefine_LDADD) $(LIBS) + +stats$(EXEEXT): $(stats_OBJECTS) $(stats_DEPENDENCIES) $(EXTRA_stats_DEPENDENCIES) + @rm -f stats$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(stats_OBJECTS) $(stats_LDADD) $(LIBS) + +sync$(EXEEXT): $(sync_OBJECTS) $(sync_DEPENDENCIES) $(EXTRA_sync_DEPENDENCIES) + @rm -f sync$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(sync_OBJECTS) $(sync_LDADD) $(LIBS) + +tags_allocate$(EXEEXT): $(tags_allocate_OBJECTS) $(tags_allocate_DEPENDENCIES) $(EXTRA_tags_allocate_DEPENDENCIES) + @rm -f tags_allocate$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(tags_allocate_OBJECTS) $(tags_allocate_LDADD) $(LIBS) + +tags_checking$(EXEEXT): $(tags_checking_OBJECTS) $(tags_checking_DEPENDENCIES) $(EXTRA_tags_checking_DEPENDENCIES) + @rm -f tags_checking$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(tags_checking_OBJECTS) $(tags_checking_LDADD) $(LIBS) + +temporary$(EXEEXT): $(temporary_OBJECTS) $(temporary_DEPENDENCIES) $(EXTRA_temporary_DEPENDENCIES) + @rm -f temporary$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(temporary_OBJECTS) $(temporary_LDADD) $(LIBS) + +user_defined_datatype$(EXEEXT): $(user_defined_datatype_OBJECTS) $(user_defined_datatype_DEPENDENCIES) $(EXTRA_user_defined_datatype_DEPENDENCIES) + @rm -f user_defined_datatype$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(user_defined_datatype_OBJECTS) $(user_defined_datatype_LDADD) $(LIBS) + +wait_for_all$(EXEEXT): $(wait_for_all_OBJECTS) $(wait_for_all_DEPENDENCIES) $(EXTRA_wait_for_all_DEPENDENCIES) + @rm -f wait_for_all$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(wait_for_all_OBJECTS) $(wait_for_all_LDADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + -rm -f ../../examples/interface/*.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@../../examples/interface/$(DEPDIR)/complex_interface.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/attr.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/block_interface.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/block_interface_pinned.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/broadcast.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/callback.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/checkpoints.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/coop.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/coop_acknowledgement.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/coop_cache.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/coop_chained_sends.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/coop_datatype.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/coop_insert_task.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/coop_large.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/coop_many.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/coop_recv_not_yet_posted.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/coop_recv_wait_finalize.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/coop_user_defined_datatype.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/coop_without_task.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/coop_wrong_order.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/data_cpy.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/datatypes.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/display_bindings.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/driver.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/early_request.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/early_stuff-early_stuff.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gather.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gather2.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/insert_task.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/insert_task_block.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/insert_task_can_execute.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/insert_task_compute.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/insert_task_count.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/insert_task_dyn_handles.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/insert_task_node_choice.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/insert_task_owner.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/insert_task_owner2.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/insert_task_owner_data.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/insert_task_recv_cache.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/insert_task_sent_cache.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/insert_task_seq.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/insert_task_tags.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/load_balancer.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/loader-loader.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/matrix.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/matrix2.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mpi_barrier.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mpi_data_cpy.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mpi_detached_tag.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mpi_earlyrecv.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mpi_earlyrecv2.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mpi_earlyrecv2_sync.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mpi_irecv.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mpi_irecv_detached.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mpi_isend.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mpi_isend_detached.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mpi_reduction.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mpi_reduction_kernels.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mpi_redux.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mpi_scatter_gather.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mpi_task_submit.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mpi_test.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/multiple_send.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ndim_interface.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/nothing.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pingpong.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/policy_register.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/policy_register_many.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/policy_register_toomany.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/policy_selection.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/policy_selection2.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/policy_unregister.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ring.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ring_async.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ring_async_implicit.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ring_sync.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ring_sync_detached.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/star.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/starpu_redefine.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/stats.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sync.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tags_allocate.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tags_checking.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/temporary.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/user_defined_datatype.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/wait_for_all.Po@am__quote@ # am--include-marker + +$(am__depfiles_remade): + @$(MKDIR_P) $(@D) + @echo '# dummy' >$@-t && $(am__mv) $@-t $@ + +am--depfiles: $(am__depfiles_remade) + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ +@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +early_stuff-early_stuff.o: early_stuff.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(early_stuff_CFLAGS) $(CFLAGS) -MT early_stuff-early_stuff.o -MD -MP -MF $(DEPDIR)/early_stuff-early_stuff.Tpo -c -o early_stuff-early_stuff.o `test -f 'early_stuff.c' || echo '$(srcdir)/'`early_stuff.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/early_stuff-early_stuff.Tpo $(DEPDIR)/early_stuff-early_stuff.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='early_stuff.c' object='early_stuff-early_stuff.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(early_stuff_CFLAGS) $(CFLAGS) -c -o early_stuff-early_stuff.o `test -f 'early_stuff.c' || echo '$(srcdir)/'`early_stuff.c + +early_stuff-early_stuff.obj: early_stuff.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(early_stuff_CFLAGS) $(CFLAGS) -MT early_stuff-early_stuff.obj -MD -MP -MF $(DEPDIR)/early_stuff-early_stuff.Tpo -c -o early_stuff-early_stuff.obj `if test -f 'early_stuff.c'; then $(CYGPATH_W) 'early_stuff.c'; else $(CYGPATH_W) '$(srcdir)/early_stuff.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/early_stuff-early_stuff.Tpo $(DEPDIR)/early_stuff-early_stuff.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='early_stuff.c' object='early_stuff-early_stuff.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(early_stuff_CFLAGS) $(CFLAGS) -c -o early_stuff-early_stuff.obj `if test -f 'early_stuff.c'; then $(CYGPATH_W) 'early_stuff.c'; else $(CYGPATH_W) '$(srcdir)/early_stuff.c'; fi` + +loader-loader.o: loader.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(loader_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT loader-loader.o -MD -MP -MF $(DEPDIR)/loader-loader.Tpo -c -o loader-loader.o `test -f 'loader.c' || echo '$(srcdir)/'`loader.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/loader-loader.Tpo $(DEPDIR)/loader-loader.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='loader.c' object='loader-loader.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(loader_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o loader-loader.o `test -f 'loader.c' || echo '$(srcdir)/'`loader.c + +loader-loader.obj: loader.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(loader_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT loader-loader.obj -MD -MP -MF $(DEPDIR)/loader-loader.Tpo -c -o loader-loader.obj `if test -f 'loader.c'; then $(CYGPATH_W) 'loader.c'; else $(CYGPATH_W) '$(srcdir)/loader.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/loader-loader.Tpo $(DEPDIR)/loader-loader.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='loader.c' object='loader-loader.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(loader_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o loader-loader.obj `if test -f 'loader.c'; then $(CYGPATH_W) 'loader.c'; else $(CYGPATH_W) '$(srcdir)/loader.c'; fi` + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-am +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-am + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-am + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +# Recover from deleted '.trs' file; this should ensure that +# "rm -f foo.log; make foo.trs" re-run 'foo.test', and re-create +# both 'foo.log' and 'foo.trs'. Break the recipe in two subshells +# to avoid problems with "make -n". +.log.trs: + rm -f $< $@ + $(MAKE) $(AM_MAKEFLAGS) $< + +# Leading 'am--fnord' is there to ensure the list of targets does not +# expand to empty, as could happen e.g. with make check TESTS=''. +am--fnord $(TEST_LOGS) $(TEST_LOGS:.log=.trs): $(am__force_recheck) +am--force-recheck: + @: + +$(TEST_SUITE_LOG): $(TEST_LOGS) + @$(am__set_TESTS_bases); \ + am__f_ok () { test -f "$$1" && test -r "$$1"; }; \ + redo_bases=`for i in $$bases; do \ + am__f_ok $$i.trs && am__f_ok $$i.log || echo $$i; \ + done`; \ + if test -n "$$redo_bases"; then \ + redo_logs=`for i in $$redo_bases; do echo $$i.log; done`; \ + redo_results=`for i in $$redo_bases; do echo $$i.trs; done`; \ + if $(am__make_dryrun); then :; else \ + rm -f $$redo_logs && rm -f $$redo_results || exit 1; \ + fi; \ + fi; \ + if test -n "$$am__remaking_logs"; then \ + echo "fatal: making $(TEST_SUITE_LOG): possible infinite" \ + "recursion detected" >&2; \ + elif test -n "$$redo_logs"; then \ + am__remaking_logs=yes $(MAKE) $(AM_MAKEFLAGS) $$redo_logs; \ + fi; \ + if $(am__make_dryrun); then :; else \ + st=0; \ + errmsg="fatal: making $(TEST_SUITE_LOG): failed to create"; \ + for i in $$redo_bases; do \ + test -f $$i.trs && test -r $$i.trs \ + || { echo "$$errmsg $$i.trs" >&2; st=1; }; \ + test -f $$i.log && test -r $$i.log \ + || { echo "$$errmsg $$i.log" >&2; st=1; }; \ + done; \ + test $$st -eq 0 || exit 1; \ + fi + @$(am__sh_e_setup); $(am__tty_colors); $(am__set_TESTS_bases); \ + ws='[ ]'; \ + results=`for b in $$bases; do echo $$b.trs; done`; \ + test -n "$$results" || results=/dev/null; \ + all=` grep "^$$ws*:test-result:" $$results | wc -l`; \ + pass=` grep "^$$ws*:test-result:$$ws*PASS" $$results | wc -l`; \ + fail=` grep "^$$ws*:test-result:$$ws*FAIL" $$results | wc -l`; \ + skip=` grep "^$$ws*:test-result:$$ws*SKIP" $$results | wc -l`; \ + xfail=`grep "^$$ws*:test-result:$$ws*XFAIL" $$results | wc -l`; \ + xpass=`grep "^$$ws*:test-result:$$ws*XPASS" $$results | wc -l`; \ + error=`grep "^$$ws*:test-result:$$ws*ERROR" $$results | wc -l`; \ + if test `expr $$fail + $$xpass + $$error` -eq 0; then \ + success=true; \ + else \ + success=false; \ + fi; \ + br='==================='; br=$$br$$br$$br$$br; \ + result_count () \ + { \ + if test x"$$1" = x"--maybe-color"; then \ + maybe_colorize=yes; \ + elif test x"$$1" = x"--no-color"; then \ + maybe_colorize=no; \ + else \ + echo "$@: invalid 'result_count' usage" >&2; exit 4; \ + fi; \ + shift; \ + desc=$$1 count=$$2; \ + if test $$maybe_colorize = yes && test $$count -gt 0; then \ + color_start=$$3 color_end=$$std; \ + else \ + color_start= color_end=; \ + fi; \ + echo "$${color_start}# $$desc $$count$${color_end}"; \ + }; \ + create_testsuite_report () \ + { \ + result_count $$1 "TOTAL:" $$all "$$brg"; \ + result_count $$1 "PASS: " $$pass "$$grn"; \ + result_count $$1 "SKIP: " $$skip "$$blu"; \ + result_count $$1 "XFAIL:" $$xfail "$$lgn"; \ + result_count $$1 "FAIL: " $$fail "$$red"; \ + result_count $$1 "XPASS:" $$xpass "$$red"; \ + result_count $$1 "ERROR:" $$error "$$mgn"; \ + }; \ + { \ + echo "$(PACKAGE_STRING): $(subdir)/$(TEST_SUITE_LOG)" | \ + $(am__rst_title); \ + create_testsuite_report --no-color; \ + echo; \ + echo ".. contents:: :depth: 2"; \ + echo; \ + for b in $$bases; do echo $$b; done \ + | $(am__create_global_log); \ + } >$(TEST_SUITE_LOG).tmp || exit 1; \ + mv $(TEST_SUITE_LOG).tmp $(TEST_SUITE_LOG); \ + if $$success; then \ + col="$$grn"; \ + else \ + col="$$red"; \ + test x"$$VERBOSE" = x || cat $(TEST_SUITE_LOG); \ + fi; \ + echo "$${col}$$br$${std}"; \ + echo "$${col}Testsuite summary"$(AM_TESTSUITE_SUMMARY_HEADER)"$${std}"; \ + echo "$${col}$$br$${std}"; \ + create_testsuite_report --maybe-color; \ + echo "$$col$$br$$std"; \ + if $$success; then :; else \ + echo "$${col}See $(subdir)/$(TEST_SUITE_LOG)$${std}"; \ + if test -n "$(PACKAGE_BUGREPORT)"; then \ + echo "$${col}Please report to $(PACKAGE_BUGREPORT)$${std}"; \ + fi; \ + echo "$$col$$br$$std"; \ + fi; \ + $$success || exit 1 + +check-TESTS: $(check_PROGRAMS) + @list='$(RECHECK_LOGS)'; test -z "$$list" || rm -f $$list + @list='$(RECHECK_LOGS:.log=.trs)'; test -z "$$list" || rm -f $$list + @test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) + @set +e; $(am__set_TESTS_bases); \ + log_list=`for i in $$bases; do echo $$i.log; done`; \ + trs_list=`for i in $$bases; do echo $$i.trs; done`; \ + log_list=`echo $$log_list`; trs_list=`echo $$trs_list`; \ + $(MAKE) $(AM_MAKEFLAGS) $(TEST_SUITE_LOG) TEST_LOGS="$$log_list"; \ + exit $$?; +recheck: all $(check_PROGRAMS) + @test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) + @set +e; $(am__set_TESTS_bases); \ + bases=`for i in $$bases; do echo $$i; done \ + | $(am__list_recheck_tests)` || exit 1; \ + log_list=`for i in $$bases; do echo $$i.log; done`; \ + log_list=`echo $$log_list`; \ + $(MAKE) $(AM_MAKEFLAGS) $(TEST_SUITE_LOG) \ + am__force_recheck=am--force-recheck \ + TEST_LOGS="$$log_list"; \ + exit $$? +callback.log: callback$(EXEEXT) + @p='callback$(EXEEXT)'; \ + b='callback'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +driver.log: driver$(EXEEXT) + @p='driver$(EXEEXT)'; \ + b='driver'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +early_stuff.log: early_stuff$(EXEEXT) + @p='early_stuff$(EXEEXT)'; \ + b='early_stuff'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +insert_task_block.log: insert_task_block$(EXEEXT) + @p='insert_task_block$(EXEEXT)'; \ + b='insert_task_block'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +insert_task_can_execute.log: insert_task_can_execute$(EXEEXT) + @p='insert_task_can_execute$(EXEEXT)'; \ + b='insert_task_can_execute'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +insert_task_tags.log: insert_task_tags$(EXEEXT) + @p='insert_task_tags$(EXEEXT)'; \ + b='insert_task_tags'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +multiple_send.log: multiple_send$(EXEEXT) + @p='multiple_send$(EXEEXT)'; \ + b='multiple_send'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +policy_register.log: policy_register$(EXEEXT) + @p='policy_register$(EXEEXT)'; \ + b='policy_register'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +policy_register_many.log: policy_register_many$(EXEEXT) + @p='policy_register_many$(EXEEXT)'; \ + b='policy_register_many'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +policy_selection.log: policy_selection$(EXEEXT) + @p='policy_selection$(EXEEXT)'; \ + b='policy_selection'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +star.log: star$(EXEEXT) + @p='star$(EXEEXT)'; \ + b='star'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +stats.log: stats$(EXEEXT) + @p='stats$(EXEEXT)'; \ + b='stats'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +user_defined_datatype.log: user_defined_datatype$(EXEEXT) + @p='user_defined_datatype$(EXEEXT)'; \ + b='user_defined_datatype'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +wait_for_all.log: wait_for_all$(EXEEXT) + @p='wait_for_all$(EXEEXT)'; \ + b='wait_for_all'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +attr.log: attr$(EXEEXT) + @p='attr$(EXEEXT)'; \ + b='attr'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +ndim_interface.log: ndim_interface$(EXEEXT) + @p='ndim_interface$(EXEEXT)'; \ + b='ndim_interface'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +broadcast.log: broadcast$(EXEEXT) + @p='broadcast$(EXEEXT)'; \ + b='broadcast'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +early_request.log: early_request$(EXEEXT) + @p='early_request$(EXEEXT)'; \ + b='early_request'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +gather.log: gather$(EXEEXT) + @p='gather$(EXEEXT)'; \ + b='gather'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +gather2.log: gather2$(EXEEXT) + @p='gather2$(EXEEXT)'; \ + b='gather2'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +insert_task.log: insert_task$(EXEEXT) + @p='insert_task$(EXEEXT)'; \ + b='insert_task'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +insert_task_count.log: insert_task_count$(EXEEXT) + @p='insert_task_count$(EXEEXT)'; \ + b='insert_task_count'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +insert_task_dyn_handles.log: insert_task_dyn_handles$(EXEEXT) + @p='insert_task_dyn_handles$(EXEEXT)'; \ + b='insert_task_dyn_handles'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +insert_task_node_choice.log: insert_task_node_choice$(EXEEXT) + @p='insert_task_node_choice$(EXEEXT)'; \ + b='insert_task_node_choice'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +insert_task_owner.log: insert_task_owner$(EXEEXT) + @p='insert_task_owner$(EXEEXT)'; \ + b='insert_task_owner'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +insert_task_owner2.log: insert_task_owner2$(EXEEXT) + @p='insert_task_owner2$(EXEEXT)'; \ + b='insert_task_owner2'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +insert_task_owner_data.log: insert_task_owner_data$(EXEEXT) + @p='insert_task_owner_data$(EXEEXT)'; \ + b='insert_task_owner_data'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +matrix.log: matrix$(EXEEXT) + @p='matrix$(EXEEXT)'; \ + b='matrix'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +matrix2.log: matrix2$(EXEEXT) + @p='matrix2$(EXEEXT)'; \ + b='matrix2'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +mpi_barrier.log: mpi_barrier$(EXEEXT) + @p='mpi_barrier$(EXEEXT)'; \ + b='mpi_barrier'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +mpi_detached_tag.log: mpi_detached_tag$(EXEEXT) + @p='mpi_detached_tag$(EXEEXT)'; \ + b='mpi_detached_tag'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +mpi_earlyrecv.log: mpi_earlyrecv$(EXEEXT) + @p='mpi_earlyrecv$(EXEEXT)'; \ + b='mpi_earlyrecv'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +mpi_irecv.log: mpi_irecv$(EXEEXT) + @p='mpi_irecv$(EXEEXT)'; \ + b='mpi_irecv'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +mpi_irecv_detached.log: mpi_irecv_detached$(EXEEXT) + @p='mpi_irecv_detached$(EXEEXT)'; \ + b='mpi_irecv_detached'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +mpi_isend.log: mpi_isend$(EXEEXT) + @p='mpi_isend$(EXEEXT)'; \ + b='mpi_isend'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +mpi_isend_detached.log: mpi_isend_detached$(EXEEXT) + @p='mpi_isend_detached$(EXEEXT)'; \ + b='mpi_isend_detached'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +mpi_reduction.log: mpi_reduction$(EXEEXT) + @p='mpi_reduction$(EXEEXT)'; \ + b='mpi_reduction'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +mpi_redux.log: mpi_redux$(EXEEXT) + @p='mpi_redux$(EXEEXT)'; \ + b='mpi_redux'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +mpi_scatter_gather.log: mpi_scatter_gather$(EXEEXT) + @p='mpi_scatter_gather$(EXEEXT)'; \ + b='mpi_scatter_gather'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +mpi_test.log: mpi_test$(EXEEXT) + @p='mpi_test$(EXEEXT)'; \ + b='mpi_test'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +pingpong.log: pingpong$(EXEEXT) + @p='pingpong$(EXEEXT)'; \ + b='pingpong'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +policy_selection2.log: policy_selection2$(EXEEXT) + @p='policy_selection2$(EXEEXT)'; \ + b='policy_selection2'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +ring.log: ring$(EXEEXT) + @p='ring$(EXEEXT)'; \ + b='ring'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +ring_async.log: ring_async$(EXEEXT) + @p='ring_async$(EXEEXT)'; \ + b='ring_async'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +ring_async_implicit.log: ring_async_implicit$(EXEEXT) + @p='ring_async_implicit$(EXEEXT)'; \ + b='ring_async_implicit'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +ring_sync.log: ring_sync$(EXEEXT) + @p='ring_sync$(EXEEXT)'; \ + b='ring_sync'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +ring_sync_detached.log: ring_sync_detached$(EXEEXT) + @p='ring_sync_detached$(EXEEXT)'; \ + b='ring_sync_detached'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +temporary.log: temporary$(EXEEXT) + @p='temporary$(EXEEXT)'; \ + b='temporary'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +data_cpy.log: data_cpy$(EXEEXT) + @p='data_cpy$(EXEEXT)'; \ + b='data_cpy'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +mpi_data_cpy.log: mpi_data_cpy$(EXEEXT) + @p='mpi_data_cpy$(EXEEXT)'; \ + b='mpi_data_cpy'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +display_bindings.log: display_bindings$(EXEEXT) + @p='display_bindings$(EXEEXT)'; \ + b='display_bindings'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +mpi_earlyrecv2.log: mpi_earlyrecv2$(EXEEXT) + @p='mpi_earlyrecv2$(EXEEXT)'; \ + b='mpi_earlyrecv2'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +mpi_earlyrecv2_sync.log: mpi_earlyrecv2_sync$(EXEEXT) + @p='mpi_earlyrecv2_sync$(EXEEXT)'; \ + b='mpi_earlyrecv2_sync'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +block_interface.log: block_interface$(EXEEXT) + @p='block_interface$(EXEEXT)'; \ + b='block_interface'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +block_interface_pinned.log: block_interface_pinned$(EXEEXT) + @p='block_interface_pinned$(EXEEXT)'; \ + b='block_interface_pinned'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +insert_task_compute.log: insert_task_compute$(EXEEXT) + @p='insert_task_compute$(EXEEXT)'; \ + b='insert_task_compute'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +insert_task_sent_cache.log: insert_task_sent_cache$(EXEEXT) + @p='insert_task_sent_cache$(EXEEXT)'; \ + b='insert_task_sent_cache'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +insert_task_recv_cache.log: insert_task_recv_cache$(EXEEXT) + @p='insert_task_recv_cache$(EXEEXT)'; \ + b='insert_task_recv_cache'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +insert_task_seq.log: insert_task_seq$(EXEEXT) + @p='insert_task_seq$(EXEEXT)'; \ + b='insert_task_seq'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +tags_allocate.log: tags_allocate$(EXEEXT) + @p='tags_allocate$(EXEEXT)'; \ + b='tags_allocate'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +tags_checking.log: tags_checking$(EXEEXT) + @p='tags_checking$(EXEEXT)'; \ + b='tags_checking'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +sync.log: sync$(EXEEXT) + @p='sync$(EXEEXT)'; \ + b='sync'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +coop.log: coop$(EXEEXT) + @p='coop$(EXEEXT)'; \ + b='coop'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +coop_datatype.log: coop_datatype$(EXEEXT) + @p='coop_datatype$(EXEEXT)'; \ + b='coop_datatype'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +coop_large.log: coop_large$(EXEEXT) + @p='coop_large$(EXEEXT)'; \ + b='coop_large'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +coop_many.log: coop_many$(EXEEXT) + @p='coop_many$(EXEEXT)'; \ + b='coop_many'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +coop_acknowledgement.log: coop_acknowledgement$(EXEEXT) + @p='coop_acknowledgement$(EXEEXT)'; \ + b='coop_acknowledgement'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +coop_recv_not_yet_posted.log: coop_recv_not_yet_posted$(EXEEXT) + @p='coop_recv_not_yet_posted$(EXEEXT)'; \ + b='coop_recv_not_yet_posted'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +coop_chained_sends.log: coop_chained_sends$(EXEEXT) + @p='coop_chained_sends$(EXEEXT)'; \ + b='coop_chained_sends'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +coop_wrong_order.log: coop_wrong_order$(EXEEXT) + @p='coop_wrong_order$(EXEEXT)'; \ + b='coop_wrong_order'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +coop_without_task.log: coop_without_task$(EXEEXT) + @p='coop_without_task$(EXEEXT)'; \ + b='coop_without_task'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +coop_user_defined_datatype.log: coop_user_defined_datatype$(EXEEXT) + @p='coop_user_defined_datatype$(EXEEXT)'; \ + b='coop_user_defined_datatype'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +coop_recv_wait_finalize.log: coop_recv_wait_finalize$(EXEEXT) + @p='coop_recv_wait_finalize$(EXEEXT)'; \ + b='coop_recv_wait_finalize'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +coop_insert_task.log: coop_insert_task$(EXEEXT) + @p='coop_insert_task$(EXEEXT)'; \ + b='coop_insert_task'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +coop_cache.log: coop_cache$(EXEEXT) + @p='coop_cache$(EXEEXT)'; \ + b='coop_cache'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +mpi_task_submit.log: mpi_task_submit$(EXEEXT) + @p='mpi_task_submit$(EXEEXT)'; \ + b='mpi_task_submit'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +load_balancer.log: load_balancer$(EXEEXT) + @p='load_balancer$(EXEEXT)'; \ + b='load_balancer'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +policy_register_toomany.log: policy_register_toomany$(EXEEXT) + @p='policy_register_toomany$(EXEEXT)'; \ + b='policy_register_toomany'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +policy_unregister.log: policy_unregister$(EXEEXT) + @p='policy_unregister$(EXEEXT)'; \ + b='policy_unregister'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +starpu_redefine.log: starpu_redefine$(EXEEXT) + @p='starpu_redefine$(EXEEXT)'; \ + b='starpu_redefine'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +.test.log: + @p='$<'; \ + $(am__set_b); \ + $(am__check_pre) $(TEST_LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_TEST_LOG_DRIVER_FLAGS) $(TEST_LOG_DRIVER_FLAGS) -- $(TEST_LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +@am__EXEEXT_TRUE@.test$(EXEEXT).log: +@am__EXEEXT_TRUE@ @p='$<'; \ +@am__EXEEXT_TRUE@ $(am__set_b); \ +@am__EXEEXT_TRUE@ $(am__check_pre) $(TEST_LOG_DRIVER) --test-name "$$f" \ +@am__EXEEXT_TRUE@ --log-file $$b.log --trs-file $$b.trs \ +@am__EXEEXT_TRUE@ $(am__common_driver_flags) $(AM_TEST_LOG_DRIVER_FLAGS) $(TEST_LOG_DRIVER_FLAGS) -- $(TEST_LOG_COMPILE) \ +@am__EXEEXT_TRUE@ "$$tst" $(AM_TESTS_FD_REDIRECT) +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am + $(MAKE) $(AM_MAKEFLAGS) $(check_PROGRAMS) + $(MAKE) $(AM_MAKEFLAGS) check-TESTS +check: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) check-am +all-am: Makefile $(PROGRAMS) +installdirs: + for dir in "$(DESTDIR)$(examplebindir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) install-am +install-exec: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + -test -z "$(TEST_LOGS)" || rm -f $(TEST_LOGS) + -test -z "$(TEST_LOGS:.log=.trs)" || rm -f $(TEST_LOGS:.log=.trs) + -test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) + +clean-generic: + -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + -rm -f ../../examples/interface/$(DEPDIR)/$(am__dirstamp) + -rm -f ../../examples/interface/$(am__dirstamp) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." + -test -z "$(BUILT_SOURCES)" || rm -f $(BUILT_SOURCES) +clean: clean-am + +clean-am: clean-checkPROGRAMS clean-examplebinPROGRAMS clean-generic \ + clean-libtool clean-noinstPROGRAMS mostlyclean-am + +distclean: distclean-am + -rm -f ../../examples/interface/$(DEPDIR)/complex_interface.Po + -rm -f ./$(DEPDIR)/attr.Po + -rm -f ./$(DEPDIR)/block_interface.Po + -rm -f ./$(DEPDIR)/block_interface_pinned.Po + -rm -f ./$(DEPDIR)/broadcast.Po + -rm -f ./$(DEPDIR)/callback.Po + -rm -f ./$(DEPDIR)/checkpoints.Po + -rm -f ./$(DEPDIR)/coop.Po + -rm -f ./$(DEPDIR)/coop_acknowledgement.Po + -rm -f ./$(DEPDIR)/coop_cache.Po + -rm -f ./$(DEPDIR)/coop_chained_sends.Po + -rm -f ./$(DEPDIR)/coop_datatype.Po + -rm -f ./$(DEPDIR)/coop_insert_task.Po + -rm -f ./$(DEPDIR)/coop_large.Po + -rm -f ./$(DEPDIR)/coop_many.Po + -rm -f ./$(DEPDIR)/coop_recv_not_yet_posted.Po + -rm -f ./$(DEPDIR)/coop_recv_wait_finalize.Po + -rm -f ./$(DEPDIR)/coop_user_defined_datatype.Po + -rm -f ./$(DEPDIR)/coop_without_task.Po + -rm -f ./$(DEPDIR)/coop_wrong_order.Po + -rm -f ./$(DEPDIR)/data_cpy.Po + -rm -f ./$(DEPDIR)/datatypes.Po + -rm -f ./$(DEPDIR)/display_bindings.Po + -rm -f ./$(DEPDIR)/driver.Po + -rm -f ./$(DEPDIR)/early_request.Po + -rm -f ./$(DEPDIR)/early_stuff-early_stuff.Po + -rm -f ./$(DEPDIR)/gather.Po + -rm -f ./$(DEPDIR)/gather2.Po + -rm -f ./$(DEPDIR)/insert_task.Po + -rm -f ./$(DEPDIR)/insert_task_block.Po + -rm -f ./$(DEPDIR)/insert_task_can_execute.Po + -rm -f ./$(DEPDIR)/insert_task_compute.Po + -rm -f ./$(DEPDIR)/insert_task_count.Po + -rm -f ./$(DEPDIR)/insert_task_dyn_handles.Po + -rm -f ./$(DEPDIR)/insert_task_node_choice.Po + -rm -f ./$(DEPDIR)/insert_task_owner.Po + -rm -f ./$(DEPDIR)/insert_task_owner2.Po + -rm -f ./$(DEPDIR)/insert_task_owner_data.Po + -rm -f ./$(DEPDIR)/insert_task_recv_cache.Po + -rm -f ./$(DEPDIR)/insert_task_sent_cache.Po + -rm -f ./$(DEPDIR)/insert_task_seq.Po + -rm -f ./$(DEPDIR)/insert_task_tags.Po + -rm -f ./$(DEPDIR)/load_balancer.Po + -rm -f ./$(DEPDIR)/loader-loader.Po + -rm -f ./$(DEPDIR)/matrix.Po + -rm -f ./$(DEPDIR)/matrix2.Po + -rm -f ./$(DEPDIR)/mpi_barrier.Po + -rm -f ./$(DEPDIR)/mpi_data_cpy.Po + -rm -f ./$(DEPDIR)/mpi_detached_tag.Po + -rm -f ./$(DEPDIR)/mpi_earlyrecv.Po + -rm -f ./$(DEPDIR)/mpi_earlyrecv2.Po + -rm -f ./$(DEPDIR)/mpi_earlyrecv2_sync.Po + -rm -f ./$(DEPDIR)/mpi_irecv.Po + -rm -f ./$(DEPDIR)/mpi_irecv_detached.Po + -rm -f ./$(DEPDIR)/mpi_isend.Po + -rm -f ./$(DEPDIR)/mpi_isend_detached.Po + -rm -f ./$(DEPDIR)/mpi_reduction.Po + -rm -f ./$(DEPDIR)/mpi_reduction_kernels.Po + -rm -f ./$(DEPDIR)/mpi_redux.Po + -rm -f ./$(DEPDIR)/mpi_scatter_gather.Po + -rm -f ./$(DEPDIR)/mpi_task_submit.Po + -rm -f ./$(DEPDIR)/mpi_test.Po + -rm -f ./$(DEPDIR)/multiple_send.Po + -rm -f ./$(DEPDIR)/ndim_interface.Po + -rm -f ./$(DEPDIR)/nothing.Po + -rm -f ./$(DEPDIR)/pingpong.Po + -rm -f ./$(DEPDIR)/policy_register.Po + -rm -f ./$(DEPDIR)/policy_register_many.Po + -rm -f ./$(DEPDIR)/policy_register_toomany.Po + -rm -f ./$(DEPDIR)/policy_selection.Po + -rm -f ./$(DEPDIR)/policy_selection2.Po + -rm -f ./$(DEPDIR)/policy_unregister.Po + -rm -f ./$(DEPDIR)/ring.Po + -rm -f ./$(DEPDIR)/ring_async.Po + -rm -f ./$(DEPDIR)/ring_async_implicit.Po + -rm -f ./$(DEPDIR)/ring_sync.Po + -rm -f ./$(DEPDIR)/ring_sync_detached.Po + -rm -f ./$(DEPDIR)/star.Po + -rm -f ./$(DEPDIR)/starpu_redefine.Po + -rm -f ./$(DEPDIR)/stats.Po + -rm -f ./$(DEPDIR)/sync.Po + -rm -f ./$(DEPDIR)/tags_allocate.Po + -rm -f ./$(DEPDIR)/tags_checking.Po + -rm -f ./$(DEPDIR)/temporary.Po + -rm -f ./$(DEPDIR)/user_defined_datatype.Po + -rm -f ./$(DEPDIR)/wait_for_all.Po + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: install-examplebinPROGRAMS + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f ../../examples/interface/$(DEPDIR)/complex_interface.Po + -rm -f ./$(DEPDIR)/attr.Po + -rm -f ./$(DEPDIR)/block_interface.Po + -rm -f ./$(DEPDIR)/block_interface_pinned.Po + -rm -f ./$(DEPDIR)/broadcast.Po + -rm -f ./$(DEPDIR)/callback.Po + -rm -f ./$(DEPDIR)/checkpoints.Po + -rm -f ./$(DEPDIR)/coop.Po + -rm -f ./$(DEPDIR)/coop_acknowledgement.Po + -rm -f ./$(DEPDIR)/coop_cache.Po + -rm -f ./$(DEPDIR)/coop_chained_sends.Po + -rm -f ./$(DEPDIR)/coop_datatype.Po + -rm -f ./$(DEPDIR)/coop_insert_task.Po + -rm -f ./$(DEPDIR)/coop_large.Po + -rm -f ./$(DEPDIR)/coop_many.Po + -rm -f ./$(DEPDIR)/coop_recv_not_yet_posted.Po + -rm -f ./$(DEPDIR)/coop_recv_wait_finalize.Po + -rm -f ./$(DEPDIR)/coop_user_defined_datatype.Po + -rm -f ./$(DEPDIR)/coop_without_task.Po + -rm -f ./$(DEPDIR)/coop_wrong_order.Po + -rm -f ./$(DEPDIR)/data_cpy.Po + -rm -f ./$(DEPDIR)/datatypes.Po + -rm -f ./$(DEPDIR)/display_bindings.Po + -rm -f ./$(DEPDIR)/driver.Po + -rm -f ./$(DEPDIR)/early_request.Po + -rm -f ./$(DEPDIR)/early_stuff-early_stuff.Po + -rm -f ./$(DEPDIR)/gather.Po + -rm -f ./$(DEPDIR)/gather2.Po + -rm -f ./$(DEPDIR)/insert_task.Po + -rm -f ./$(DEPDIR)/insert_task_block.Po + -rm -f ./$(DEPDIR)/insert_task_can_execute.Po + -rm -f ./$(DEPDIR)/insert_task_compute.Po + -rm -f ./$(DEPDIR)/insert_task_count.Po + -rm -f ./$(DEPDIR)/insert_task_dyn_handles.Po + -rm -f ./$(DEPDIR)/insert_task_node_choice.Po + -rm -f ./$(DEPDIR)/insert_task_owner.Po + -rm -f ./$(DEPDIR)/insert_task_owner2.Po + -rm -f ./$(DEPDIR)/insert_task_owner_data.Po + -rm -f ./$(DEPDIR)/insert_task_recv_cache.Po + -rm -f ./$(DEPDIR)/insert_task_sent_cache.Po + -rm -f ./$(DEPDIR)/insert_task_seq.Po + -rm -f ./$(DEPDIR)/insert_task_tags.Po + -rm -f ./$(DEPDIR)/load_balancer.Po + -rm -f ./$(DEPDIR)/loader-loader.Po + -rm -f ./$(DEPDIR)/matrix.Po + -rm -f ./$(DEPDIR)/matrix2.Po + -rm -f ./$(DEPDIR)/mpi_barrier.Po + -rm -f ./$(DEPDIR)/mpi_data_cpy.Po + -rm -f ./$(DEPDIR)/mpi_detached_tag.Po + -rm -f ./$(DEPDIR)/mpi_earlyrecv.Po + -rm -f ./$(DEPDIR)/mpi_earlyrecv2.Po + -rm -f ./$(DEPDIR)/mpi_earlyrecv2_sync.Po + -rm -f ./$(DEPDIR)/mpi_irecv.Po + -rm -f ./$(DEPDIR)/mpi_irecv_detached.Po + -rm -f ./$(DEPDIR)/mpi_isend.Po + -rm -f ./$(DEPDIR)/mpi_isend_detached.Po + -rm -f ./$(DEPDIR)/mpi_reduction.Po + -rm -f ./$(DEPDIR)/mpi_reduction_kernels.Po + -rm -f ./$(DEPDIR)/mpi_redux.Po + -rm -f ./$(DEPDIR)/mpi_scatter_gather.Po + -rm -f ./$(DEPDIR)/mpi_task_submit.Po + -rm -f ./$(DEPDIR)/mpi_test.Po + -rm -f ./$(DEPDIR)/multiple_send.Po + -rm -f ./$(DEPDIR)/ndim_interface.Po + -rm -f ./$(DEPDIR)/nothing.Po + -rm -f ./$(DEPDIR)/pingpong.Po + -rm -f ./$(DEPDIR)/policy_register.Po + -rm -f ./$(DEPDIR)/policy_register_many.Po + -rm -f ./$(DEPDIR)/policy_register_toomany.Po + -rm -f ./$(DEPDIR)/policy_selection.Po + -rm -f ./$(DEPDIR)/policy_selection2.Po + -rm -f ./$(DEPDIR)/policy_unregister.Po + -rm -f ./$(DEPDIR)/ring.Po + -rm -f ./$(DEPDIR)/ring_async.Po + -rm -f ./$(DEPDIR)/ring_async_implicit.Po + -rm -f ./$(DEPDIR)/ring_sync.Po + -rm -f ./$(DEPDIR)/ring_sync_detached.Po + -rm -f ./$(DEPDIR)/star.Po + -rm -f ./$(DEPDIR)/starpu_redefine.Po + -rm -f ./$(DEPDIR)/stats.Po + -rm -f ./$(DEPDIR)/sync.Po + -rm -f ./$(DEPDIR)/tags_allocate.Po + -rm -f ./$(DEPDIR)/tags_checking.Po + -rm -f ./$(DEPDIR)/temporary.Po + -rm -f ./$(DEPDIR)/user_defined_datatype.Po + -rm -f ./$(DEPDIR)/wait_for_all.Po + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-examplebinPROGRAMS + +.MAKE: all check check-am install install-am install-exec \ + install-strip + +.PHONY: CTAGS GTAGS TAGS all all-am am--depfiles check check-TESTS \ + check-am clean clean-checkPROGRAMS clean-examplebinPROGRAMS \ + clean-generic clean-libtool clean-noinstPROGRAMS cscopelist-am \ + ctags ctags-am distclean distclean-compile distclean-generic \ + distclean-libtool distclean-tags distdir dvi dvi-am html \ + html-am info info-am install install-am install-data \ + install-data-am install-dvi install-dvi-am \ + install-examplebinPROGRAMS install-exec install-exec-am \ + install-html install-html-am install-info install-info-am \ + install-man install-pdf install-pdf-am install-ps \ + install-ps-am install-strip installcheck installcheck-am \ + installdirs maintainer-clean maintainer-clean-generic \ + mostlyclean mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool pdf pdf-am ps ps-am recheck tags tags-am \ + uninstall uninstall-am uninstall-examplebinPROGRAMS + +.PRECIOUS: Makefile + +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@.cu.o: +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ @$(MKDIR_P) `dirname $@` +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ $(V_mynvcc)grep 'extern *"C" *void *' $< | sed -ne 's/extern *"C" *void *\([a-zA-Z0-9_]*\) *(.*/void \1(void) {}/p' | $(CC) -x c - -o $@ -c + +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.cubin: +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) -cubin $< -o $@ $(NVCCFLAGS) + +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.o: +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) $< -c -o $@ $(NVCCFLAGS) +@STARPU_USE_HIP_TRUE@.hip.o: +@STARPU_USE_HIP_TRUE@ $(V_hipcc) $(HIPCC) $< -c -o $@ $(HIPCCFLAGS) + +STARPU_MPI_NP ?= 4 + +showcheckfailed: + @ for x in $(shell grep -l "^FAIL " $(TEST_LOGS) /dev/null 2>/dev/null) ; do cat $$x ; done + @RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i showcheckfailed || RET=1 ; \ + done ; \ + exit $$RET + +showfailed: + @! grep "^FAIL " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "ERROR: AddressSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "WARNING: AddressSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "ERROR: ThreadSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "WARNING: ThreadSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "ERROR: LeakSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "WARNING: LeakSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l " runtime error: " $(TEST_LOGS) /dev/null 2>/dev/null + @RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -s -C $$i showfailed || RET=1 ; \ + done ; \ + exit $$RET + +showcheck: + -cat $(TEST_LOGS) /dev/null + @! grep -q "ERROR: AddressSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q "WARNING: AddressSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q "ERROR: ThreadSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q "WARNING: ThreadSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q "ERROR: LeakSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q "WARNING: LeakSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q " runtime error: " $(TEST_LOGS) /dev/null + RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i showcheck || RET=1 ; \ + done ; \ + exit $$RET + +showsuite: + -cat $(TEST_SUITE_LOG) /dev/null + @! grep -q "ERROR: AddressSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q "WARNING: AddressSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q "ERROR: ThreadSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q "WARNING: ThreadSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q "ERROR: LeakSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q "WARNING: LeakSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q " runtime error: " $(TEST_SUITE_LOG) /dev/null + RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i showsuite || RET=1 ; \ + done ; \ + exit $$RET + +@STARPU_SIMGRID_TRUE@export STARPU_PERF_MODEL_DIR=$(abs_top_srcdir)/tools/perfmodels/sampling +@STARPU_SIMGRID_TRUE@export STARPU_HOSTNAME=mirage +@STARPU_SIMGRID_TRUE@export MALLOC_PERTURB_=0 + +@STARPU_SIMGRID_TRUE@env: +@STARPU_SIMGRID_TRUE@ @echo export STARPU_PERF_MODEL_DIR=$(STARPU_PERF_MODEL_DIR) +@STARPU_SIMGRID_TRUE@ @echo export STARPU_HOSTNAME=$(STARPU_HOSTNAME) +@STARPU_SIMGRID_TRUE@ @echo export MALLOC_PERTURB_=$(MALLOC_PERTURB_) + +@STARPU_SIMGRID_TRUE@export STARPU_SIMGRID=1 + +@STARPU_QUICK_CHECK_TRUE@export STARPU_QUICK_CHECK=1 + +@STARPU_LONG_CHECK_TRUE@export STARPU_LONG_CHECK=1 + +# +# Test loading goes through a lot of launchers: +# +# - $(LAUNCHER) is called first, to run the test through starpu_msexec, i.e. +# either mpirun or starpu_tcpipexec +# +# - $(LOADER), i.e. tests/loader, is then called to implement timeout, running +# gdb, etc. But if it detects that the test is a .sh script, it just executes +# it +# +# - $(STARPU_CHECK_LAUNCHER) $(STARPU_CHECK_LAUNCHER_ARGS) is called by loader +# to run the program through e.g. valgrind.sh +# +# When the program is a shell script, additionally: +# +# - $(STARPU_SUB_PARALLEL) is called to control parallelism (see below) +# +# - $(MS_LAUNCHER) is called to run the test through starpu_msexec +# +# - $(STARPU_LAUNCH) was set by tests/loader to its own path, to run the program +# through it. +# +# - $(STARPU_CHECK_LAUNCHER) $(STARPU_CHECK_LAUNCHER_ARGS) is called by loader +# + +export LAUNCHER +@HAVE_PARALLEL_TRUE@export STARPU_SUB_PARALLEL + +export MS_LAUNCHER + +LAUNCHER ?= +MS_LAUNCHER ?= +@STARPU_HAVE_WINDOWS_FALSE@LOADER ?= ./loader + +LSAN_OPTIONS ?= suppressions=$(abs_top_srcdir)/tools/dev/lsan/suppressions +TSAN_OPTIONS ?= suppressions=$(abs_top_srcdir)/tools/dev/tsan/starpu.suppr +export LSAN_OPTIONS +export TSAN_OPTIONS + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/mpi/tests/attr.c b/mpi/tests/attr.c new file mode 100644 index 0000000..c6fbe2f --- /dev/null +++ b/mpi/tests/attr.c @@ -0,0 +1,38 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "helper.h" +#include + +int main(int argc STARPU_ATTRIBUTE_UNUSED, char *argv[] STARPU_ATTRIBUTE_UNUSED) +{ + int flag; + int64_t *value; + int64_t rvalue; + + starpu_mpi_comm_get_attr(MPI_COMM_WORLD, 42, NULL, &flag); + STARPU_ASSERT_MSG(flag == 0, "starpu_mpi_comm_get_attr was called with invalid argument\n"); + + starpu_mpi_comm_get_attr(MPI_COMM_WORLD, STARPU_MPI_TAG_UB, &value, &flag); + STARPU_ASSERT_MSG(flag == 1, "starpu_mpi_comm_get_attr was called with valid argument\n"); + + rvalue = *value; + FPRINTF(stderr, "Value: %"PRIi64"\n", *value); + FPRINTF(stderr, "Value: %"PRIi64"\n", rvalue); + + return 0; +} diff --git a/mpi/tests/block_interface.c b/mpi/tests/block_interface.c new file mode 100644 index 0000000..810c7b2 --- /dev/null +++ b/mpi/tests/block_interface.c @@ -0,0 +1,161 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include "helper.h" + +#ifdef STARPU_QUICK_CHECK +# define NITER 4 +#else +# define NITER 2048 +#endif + +#define BIGSIZE 32 +#define SIZE 8 + +int main(int argc, char **argv) +{ + int ret, rank, size; + int mpi_init; + struct starpu_conf conf; + + MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); + + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + conf.ncpus = -1; + conf.nmpi_ms = -1; + conf.ntcpip_ms = -1; + + ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, &conf); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); + starpu_mpi_comm_size(MPI_COMM_WORLD, &size); + + if (size < 2) + { + if (rank == 0) + FPRINTF(stderr, "We need at least 2 processes.\n"); + + starpu_mpi_shutdown(); + if (!mpi_init) + MPI_Finalize(); + return STARPU_TEST_SKIPPED; + } + + /* Node 0 will allocate a big block and only register an inner part of + * it as the block data, Node 1 will allocate a block of small size and + * register it directly. Node 0 and 1 will then exchange the content of + * their blocks. */ + + float *block = NULL; + starpu_data_handle_t block_handle = NULL; + + if (rank == 0) + { + block = calloc(BIGSIZE*BIGSIZE*BIGSIZE, sizeof(float)); + assert(block); + + /* fill the inner block */ + unsigned i, j, k; + for (k = 0; k < SIZE; k++) + for (j = 0; j < SIZE; j++) + for (i = 0; i < SIZE; i++) + { + block[i + j*BIGSIZE + k*BIGSIZE*BIGSIZE] = 1.0f; + } + + starpu_block_data_register(&block_handle, STARPU_MAIN_RAM, + (uintptr_t)block, BIGSIZE, BIGSIZE*BIGSIZE, + SIZE, SIZE, SIZE, sizeof(float)); + } + else if (rank == 1) + { + block = calloc(SIZE*SIZE*SIZE, sizeof(float)); + assert(block); + + starpu_block_data_register(&block_handle, STARPU_MAIN_RAM, + (uintptr_t)block, SIZE, SIZE*SIZE, + SIZE, SIZE, SIZE, sizeof(float)); + } + + if (rank == 0) + { + MPI_Status status; + + ret = starpu_mpi_send(block_handle, 1, 0x42, MPI_COMM_WORLD); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send"); + + ret = starpu_mpi_recv(block_handle, 1, 0x1337, MPI_COMM_WORLD, &status); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); + + /* check the content of the block */ + ret = starpu_data_acquire(block_handle, STARPU_R); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_acquire"); + + unsigned i, j, k; + for (k = 0; k < SIZE; k++) + for (j = 0; j < SIZE; j++) + for (i = 0; i < SIZE; i++) + { + assert(block[i + j*BIGSIZE + k*BIGSIZE*BIGSIZE] == 33.0f); + } + starpu_data_release(block_handle); + + } + else if (rank == 1) + { + MPI_Status status; + + ret = starpu_mpi_recv(block_handle, 0, 0x42, MPI_COMM_WORLD, &status); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); + + /* check the content of the block and modify it */ + ret = starpu_data_acquire(block_handle, STARPU_RW); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_acquire"); + + unsigned i, j, k; + for (k = 0; k < SIZE; k++) + for (j = 0; j < SIZE; j++) + for (i = 0; i < SIZE; i++) + { + assert(block[i + j*SIZE + k*SIZE*SIZE] == 1.0f); + block[i + j*SIZE + k*SIZE*SIZE] = 33.0f; + } + starpu_data_release(block_handle); + + ret = starpu_mpi_send(block_handle, 0, 0x1337, MPI_COMM_WORLD); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send"); + } + + if (rank == 0 || rank == 1) + { + starpu_data_unregister(block_handle); + free(block); + } + + FPRINTF(stdout, "Rank %d is done\n", rank); + fflush(stdout); + + starpu_mpi_shutdown(); + + if (!mpi_init) + MPI_Finalize(); + + return 0; +} diff --git a/mpi/tests/block_interface_pinned.c b/mpi/tests/block_interface_pinned.c new file mode 100644 index 0000000..bb081d2 --- /dev/null +++ b/mpi/tests/block_interface_pinned.c @@ -0,0 +1,162 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include "helper.h" + +#ifdef STARPU_QUICK_CHECK +# define NITER 16 +#else +# define NITER 2048 +#endif + +#define BIGSIZE 128 +#define SIZE 64 + +int main(int argc, char **argv) +{ + int ret, rank, size; + int mpi_init; + struct starpu_conf conf; + + MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); + + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + conf.ncpus = -1; + conf.nmpi_ms = -1; + conf.ntcpip_ms = -1; + + ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, &conf); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); + starpu_mpi_comm_size(MPI_COMM_WORLD, &size); + + if (size < 2) + { + if (rank == 0) + FPRINTF(stderr, "We need at least 2 processes.\n"); + + starpu_mpi_shutdown(); + if (!mpi_init) + MPI_Finalize(); + return STARPU_TEST_SKIPPED; + } + + /* Node 0 will allocate a big block and only register an inner part of + * it as the block data, Node 1 will allocate a block of small size and + * register it directly. Node 0 and 1 will then exchange the content of + * their blocks. */ + + float *block = NULL; + starpu_data_handle_t block_handle = NULL; + + if (rank == 0) + { + starpu_malloc((void **)&block, BIGSIZE*BIGSIZE*BIGSIZE*sizeof(float)); + memset(block, 0, BIGSIZE*BIGSIZE*BIGSIZE*sizeof(float)); + + /* fill the inner block */ + unsigned i, j, k; + for (k = 0; k < SIZE; k++) + for (j = 0; j < SIZE; j++) + for (i = 0; i < SIZE; i++) + { + block[i + j*BIGSIZE + k*BIGSIZE*BIGSIZE] = 1.0f; + } + + starpu_block_data_register(&block_handle, STARPU_MAIN_RAM, + (uintptr_t)block, BIGSIZE, BIGSIZE*BIGSIZE, + SIZE, SIZE, SIZE, sizeof(float)); + } + else if (rank == 1) + { + starpu_malloc((void **)&block, + SIZE*SIZE*SIZE*sizeof(float)); + memset(block, 0, SIZE*SIZE*SIZE*sizeof(float)); + + starpu_block_data_register(&block_handle, STARPU_MAIN_RAM, + (uintptr_t)block, SIZE, SIZE*SIZE, + SIZE, SIZE, SIZE, sizeof(float)); + } + + if (rank == 0) + { + MPI_Status status; + + ret = starpu_mpi_send(block_handle, 1, 0x42, MPI_COMM_WORLD); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send"); + + ret = starpu_mpi_recv(block_handle, 1, 0x1337, MPI_COMM_WORLD, &status); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); + + /* check the content of the block */ + ret = starpu_data_acquire(block_handle, STARPU_R); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_acquire"); + + unsigned i, j, k; + for (k = 0; k < SIZE; k++) + for (j = 0; j < SIZE; j++) + for (i = 0; i < SIZE; i++) + { + assert(block[i + j*BIGSIZE + k*BIGSIZE*BIGSIZE] == 33.0f); + } + starpu_data_release(block_handle); + + } + else if (rank == 1) + { + MPI_Status status; + + ret = starpu_mpi_recv(block_handle, 0, 0x42, MPI_COMM_WORLD, &status); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); + + /* check the content of the block and modify it */ + ret = starpu_data_acquire(block_handle, STARPU_RW); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_acquire"); + + unsigned i, j, k; + for (k = 0; k < SIZE; k++) + for (j = 0; j < SIZE; j++) + for (i = 0; i < SIZE; i++) + { + assert(block[i + j*SIZE + k*SIZE*SIZE] == 1.0f); + block[i + j*SIZE + k*SIZE*SIZE] = 33.0f; + } + starpu_data_release(block_handle); + + ret = starpu_mpi_send(block_handle, 0, 0x1337, MPI_COMM_WORLD); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send"); + } + + if (rank == 0 || rank == 1) + { + starpu_data_unregister(block_handle); + starpu_free_noflag(block, BIGSIZE*BIGSIZE*BIGSIZE*sizeof(float)); + } + + FPRINTF(stdout, "Rank %d is done\n", rank); + fflush(stdout); + + starpu_mpi_shutdown(); + + if (!mpi_init) + MPI_Finalize(); + + return 0; +} diff --git a/mpi/tests/broadcast.c b/mpi/tests/broadcast.c new file mode 100644 index 0000000..f1dee4d --- /dev/null +++ b/mpi/tests/broadcast.c @@ -0,0 +1,113 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "helper.h" + +void wait_CPU(void *descr[], void *args) +{ + int *var = (int*) STARPU_VARIABLE_GET_PTR(descr[0]); + int val; + + starpu_codelet_unpack_args(args, &val); + *var = val; + starpu_sleep(1); +} + +static struct starpu_codelet cl = +{ + .cpu_funcs = { wait_CPU }, + .cpu_funcs_name = { "wait_CPU" }, + .nbuffers = 1, + .flags = STARPU_CODELET_SIMGRID_EXECUTE, + .modes = { STARPU_W }, +}; + +int main(int argc, char **argv) +{ + int ret, rank, size; + starpu_data_handle_t handle; + int var=-1; + int mpi_init; + MPI_Status status; + struct starpu_conf conf; + + MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); + + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + conf.ncpus = -1; + conf.nmpi_ms = -1; + conf.ntcpip_ms = -1; + + ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, &conf); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); + starpu_mpi_comm_size(MPI_COMM_WORLD, &size); + + starpu_variable_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)&var, sizeof(var)); + + if (rank == 0) + { + int val, n; + + val = 42; + ret = starpu_task_insert(&cl, STARPU_W, handle, STARPU_VALUE, &val, sizeof(val), 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + + for(n = 1 ; n < size ; n++) + { + FPRINTF_MPI(stderr, "sending data to %d\n", n); + ret = starpu_mpi_isend_detached(handle, n, 0, MPI_COMM_WORLD, NULL, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend_detached"); + } + + val = 43; + ret = starpu_task_insert(&cl, STARPU_W, handle, STARPU_VALUE, &val, sizeof(val), 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + + for(n = 1 ; n < size ; n++) + { + FPRINTF_MPI(stderr, "sending data to %d\n", n); + ret = starpu_mpi_isend_detached(handle, n, 0, MPI_COMM_WORLD, NULL, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend_detached"); + } + } + else + { + ret = starpu_mpi_recv(handle, 0, 0, MPI_COMM_WORLD, &status); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); + starpu_data_acquire(handle, STARPU_R); + STARPU_ASSERT(var == 42); + starpu_data_release(handle); + + ret = starpu_mpi_recv(handle, 0, 0, MPI_COMM_WORLD, &status); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); + starpu_data_acquire(handle, STARPU_R); + STARPU_ASSERT(var == 43); + starpu_data_release(handle); + FPRINTF_MPI(stderr, "received data\n"); + } + + starpu_data_unregister(handle); + + starpu_mpi_shutdown(); + if (!mpi_init) + MPI_Finalize(); + + return 0; +} diff --git a/mpi/tests/callback.c b/mpi/tests/callback.c new file mode 100644 index 0000000..15db689 --- /dev/null +++ b/mpi/tests/callback.c @@ -0,0 +1,132 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "helper.h" + +static +int expected_x=40; +static +int expected_y=12; + +void my_func(void *descr[], void *_args) +{ + (void)descr; + (void)_args; + FPRINTF_MPI(stderr, "i am here\n"); +} + +struct starpu_codelet my_codelet = +{ + .cpu_funcs = {my_func}, + .cuda_funcs = {my_func}, + .opencl_funcs = {my_func}, + .model = &starpu_perfmodel_nop, +}; + +static +void callback(void *ptr) +{ + int *x = (int *)ptr; + FPRINTF_MPI(stderr, "x=%d\n", *x); + STARPU_ASSERT_MSG(*x == expected_x, "%d != %d\n", *x, expected_x); + (*x)++; +} + +static +void prologue_callback(void *ptr) +{ + int *y = (int *)ptr; + FPRINTF_MPI(stderr, "y=%d\n", *y); + STARPU_ASSERT_MSG(*y == expected_y, "%d != %d\n", *y, expected_y); + (*y)++; +} + +int main(int argc, char **argv) +{ + int ret; + int x=40; + int y=12; + int rank, size; + struct starpu_conf conf; + int mpi_init; + + MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); + + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + conf.ncpus = -1; + conf.nmpi_ms = -1; + conf.ntcpip_ms = -1; + conf.nopencl = -1; + + ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, &conf); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); + starpu_mpi_comm_size(MPI_COMM_WORLD, &size); + + ret = starpu_mpi_task_insert(MPI_COMM_WORLD, + NULL, + STARPU_EXECUTE_ON_NODE, 0, + STARPU_CALLBACK_WITH_ARG_NFREE, callback, &x, + 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_task_insert"); + + if (rank == 0) + expected_x ++; + ret = starpu_mpi_task_insert(MPI_COMM_WORLD, + NULL, + STARPU_EXECUTE_ON_NODE, 0, + STARPU_CALLBACK, callback, + STARPU_CALLBACK_ARG_NFREE, &x, + 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + + if (rank == 0) + expected_x ++; + STARPU_ASSERT_MSG(x == expected_x, "x should be equal to %d and not %d\n", expected_x, x); + + ret = starpu_mpi_task_insert(MPI_COMM_WORLD, + NULL, + STARPU_EXECUTE_ON_NODE, 0, + STARPU_PROLOGUE_CALLBACK, prologue_callback, + STARPU_PROLOGUE_CALLBACK_ARG_NFREE, &y, + 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + + if (rank == 0) + expected_y ++; + ret = starpu_mpi_task_insert(MPI_COMM_WORLD, + &my_codelet, + STARPU_EXECUTE_ON_NODE, 0, + STARPU_PROLOGUE_CALLBACK_POP, prologue_callback, + STARPU_PROLOGUE_CALLBACK_POP_ARG_NFREE, &y, + 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + + starpu_task_wait_for_all(); + if (rank == 0) + expected_y ++; + STARPU_ASSERT_MSG(y == expected_y, "y should be equal to %d and not %d\n", expected_y, y); + + starpu_mpi_shutdown(); + + if (!mpi_init) + MPI_Finalize(); + + return EXIT_SUCCESS; +} diff --git a/mpi/tests/checkpoints.c b/mpi/tests/checkpoints.c new file mode 100644 index 0000000..24f10af --- /dev/null +++ b/mpi/tests/checkpoints.c @@ -0,0 +1,204 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "helper.h" + +#define ARRAY_SIZE 12 + +int nb_nodes; +int me; + +int backup_of(int _me) +{ + if (_me==0) + return 1; + else + return 0; + return (_me+1)%nb_nodes; +} + +int pseudotest_checkpoint_template_register(int argc, char* argv[]) +{ + int mpi_init; + starpu_data_handle_t h; + starpu_data_handle_t h_array[ARRAY_SIZE]; + starpu_mpi_checkpoint_template_t cp_template1, cp_template2; + int val = 42; + int val2 = 1234; + + int array[ARRAY_SIZE]; + int ret; + struct starpu_conf conf; + + //init array + for (int i=0 ; i +#include "helper.h" + +void task_cpu_func(void *descr[], void *args) +{ + int *var = (int*) STARPU_VARIABLE_GET_PTR(descr[0]); + int val; + + starpu_codelet_unpack_args(args, &val); + *var = val; +} + +static struct starpu_codelet cl = +{ + .cpu_funcs = { task_cpu_func }, + .cpu_funcs_name = { "task_cpu_func" }, + .nbuffers = 1, + .flags = STARPU_CODELET_SIMGRID_EXECUTE, + .modes = { STARPU_W }, +}; + +int main(int argc, char **argv) +{ + int ret, rank, size; + starpu_data_handle_t handle; + int var = -1; + int mpi_init; + MPI_Status status; + struct starpu_conf conf; + + MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); + + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + conf.ncpus = -1; + conf.nmpi_ms = -1; + conf.ntcpip_ms = -1; + + ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, &conf); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); + starpu_mpi_comm_size(MPI_COMM_WORLD, &size); + + starpu_variable_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)&var, sizeof(var)); + + if (rank == 0) + { + int val, n; + + val = 42; + ret = starpu_task_insert(&cl, STARPU_W, handle, STARPU_VALUE, &val, sizeof(val), 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + + /* The task previously inserted should be enough to detect the coop, + * but to be sure, indicate the number of sends requests before really + * sending the data: */ + starpu_mpi_coop_sends_data_handle_nb_sends(handle, size-1); + + for(n = 1 ; n < size ; n++) + { + FPRINTF_MPI(stderr, "sending data to %d with prio %d\n", n, size-n); + ret = starpu_mpi_isend_detached_prio(handle, n, 0, size-n, MPI_COMM_WORLD, NULL, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend_detached_prio"); + } + } + else + { + ret = starpu_mpi_recv(handle, 0, 0, MPI_COMM_WORLD, &status); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); + starpu_data_acquire(handle, STARPU_R); + printf("[%d] received data: %d\n", rank, var); + STARPU_ASSERT(var == 42); + + starpu_data_release(handle); + + FPRINTF_MPI(stderr, "received data\n"); + } + + starpu_data_unregister(handle); + + printf("[%d] end\n", rank); + + starpu_mpi_shutdown(); + if (!mpi_init) + MPI_Finalize(); + + return 0; +} diff --git a/mpi/tests/coop_acknowledgement.c b/mpi/tests/coop_acknowledgement.c new file mode 100644 index 0000000..b001d0d --- /dev/null +++ b/mpi/tests/coop_acknowledgement.c @@ -0,0 +1,138 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "helper.h" + +void task_cpu_func(void *descr[], void *args) +{ + int *var = (int*) STARPU_VARIABLE_GET_PTR(descr[0]); + int val; + + starpu_codelet_unpack_args(args, &val); + *var = val; +} + +static struct starpu_codelet cl = +{ + .cpu_funcs = { task_cpu_func }, + .cpu_funcs_name = { "task_cpu_func" }, + .nbuffers = 1, + .flags = STARPU_CODELET_SIMGRID_EXECUTE, + .modes = { STARPU_W }, +}; + +int main(int argc, char **argv) +{ + int ret, rank, size; + starpu_data_handle_t handle; + int var = -1; + int mpi_init; + MPI_Status status; + struct starpu_conf conf; + + MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); + + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + conf.ncpus = -1; + conf.nmpi_ms = -1; + conf.ntcpip_ms = -1; + + ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, &conf); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); + starpu_mpi_comm_size(MPI_COMM_WORLD, &size); + + starpu_variable_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)&var, sizeof(var)); + + if (rank == 0) + { + int val, i; + + starpu_data_handle_t* ack_handles = malloc((size-1) * sizeof(starpu_data_handle_t)); + int* acks = calloc(size - 1, sizeof(int)); + starpu_mpi_req* ack_reqs = calloc((size-1), sizeof(starpu_mpi_req)); + + val = 42; + ret = starpu_task_insert(&cl, STARPU_W, handle, STARPU_VALUE, &val, sizeof(val), 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + + /* The task previously inserted should be enough to detect the coop, + * but to be sure, indicate the number of sends requests before really + * sending the data: */ + starpu_mpi_coop_sends_data_handle_nb_sends(handle, size-1); + + for(i = 1 ; i < size ; i++) + { + starpu_variable_data_register(&ack_handles[i-1], STARPU_MAIN_RAM, (uintptr_t) &acks[i-1], sizeof(int)); + ret = starpu_mpi_irecv(ack_handles[i-1], &ack_reqs[i-1], i, 0, MPI_COMM_WORLD); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_irecv"); + + FPRINTF_MPI(stderr, "sending data to %d\n", i); + ret = starpu_mpi_isend_detached(handle, i, 0, MPI_COMM_WORLD, NULL, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend_detached"); + } + + for(i = 0; i < size-1; i++) + { + ret = starpu_mpi_wait(&ack_reqs[i], MPI_STATUS_IGNORE); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_wait"); + starpu_data_acquire(ack_handles[i], STARPU_R); + + STARPU_ASSERT(acks[i] == 1); + + starpu_data_release(ack_handles[i]); + starpu_data_unregister(ack_handles[i]); + } + + free(ack_handles); + free(acks); + free(ack_reqs); + } + else + { + starpu_data_handle_t ack_handle; + int ack = 1; + + starpu_variable_data_register(&ack_handle, STARPU_MAIN_RAM, (uintptr_t) &ack, sizeof(ack)); + + ret = starpu_mpi_recv(handle, 0, 0, MPI_COMM_WORLD, &status); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); + starpu_data_acquire(handle, STARPU_R); + printf("[%d] received data: %d\n", rank, var); + STARPU_ASSERT(var == 42); + + ret = starpu_mpi_send(ack_handle, 0, 0, MPI_COMM_WORLD); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send"); + starpu_data_unregister(ack_handle); + + starpu_data_release(handle); + + FPRINTF_MPI(stderr, "received data\n"); + } + + starpu_data_unregister(handle); + + printf("[%d] end\n", rank); + + starpu_mpi_shutdown(); + if (!mpi_init) + MPI_Finalize(); + + return 0; +} diff --git a/mpi/tests/coop_cache.c b/mpi/tests/coop_cache.c new file mode 100644 index 0000000..b21a84a --- /dev/null +++ b/mpi/tests/coop_cache.c @@ -0,0 +1,158 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2022-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in childING.LGPL for more details. + */ + +/* This test generates a task graph that would lead to duplicate recipients if + * MPI cache is disabled: output of the "parent" task is required by all + * "child" tasks, each MPI rank executing two "child" tasks. + * + * Duplicates in the list of recipients of a broadcasts can lead to a deadlock. + * In the NewMadeleine implementation, the following will happen, when cache + * is disabled: + * - Rank 0 will trigger a broadcast to ranks {1, 2, 3, 1, 2} + * - Ranks 1, 2, and 3 will post *one* recv for the data tag 0 + * - In the binomial routing tree, the rank 2 will forward the data to rank 2 (so, itself) + * - However, on rank 2, the first recv will be finalized only after all + * forwards are done. But the forward to 2 can be finished only when the second + * recv is posted. Posting the second recv will be done only after the first one + * is finalized. Hence the deadlock. + * */ + +#include +#include "helper.h" + +#if !defined(STARPU_HAVE_SETENV) +#warning setenv is not defined. Skipping test +int main(void) +{ + return STARPU_TEST_SKIPPED; +} +#else + + +static void parent_cpu_func(void *descr[], void *args) +{ + starpu_sleep(2); // Give time to submit other tasks and detect coop +} + +static struct starpu_codelet parent_cl = +{ + .cpu_funcs = { parent_cpu_func }, + .cpu_funcs_name = { "parent_task" }, + .nbuffers = 1, + .modes = { STARPU_W } +}; + +static void child_cpu_func(void* descr[], void* args) +{ + // do nothing +} + +static struct starpu_codelet child_cl = +{ + .cpu_funcs = { child_cpu_func }, + .cpu_funcs_name = { "child_task" }, + .nbuffers = 2, + .modes = { STARPU_R, STARPU_W } +}; + +static inline int my_distrib(int x, int nb_nodes) +{ + return x % nb_nodes; +} + +static inline void do_test(starpu_mpi_tag_t *initial_tag, char* cache_enabled) +{ + int ret, rank, worldsize, i; + int* data; + starpu_data_handle_t* handles; + struct starpu_conf conf; + + setenv("STARPU_MPI_CACHE", cache_enabled, 1); + + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + conf.ncpus = -1; + conf.nmpi_ms = -1; + conf.ntcpip_ms = -1; + + ret = starpu_mpi_init_conf(NULL, NULL, 0, MPI_COMM_WORLD, &conf); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); + starpu_mpi_comm_size(MPI_COMM_WORLD, &worldsize); + + int nblocks = 2 * worldsize; + int **blocks = malloc(nblocks * sizeof(int*)); + + handles = malloc(nblocks*sizeof(starpu_data_handle_t)); + + for (i = 0; i < nblocks; i++) + { + int mpi_rank = my_distrib(i, worldsize); + if (mpi_rank == rank) + { + blocks[i] = calloc(320*320, sizeof(float)); + starpu_vector_data_register(&handles[i], STARPU_MAIN_RAM, (uintptr_t)blocks[i], 320*320, sizeof(float)); + } + else + { + blocks[i] = NULL; + starpu_vector_data_register(&handles[i], -1, (uintptr_t)NULL, 320*320, sizeof(float)); + } + + STARPU_ASSERT(handles[i] != NULL); + starpu_mpi_data_register(handles[i], *initial_tag+i, mpi_rank); + } + + starpu_mpi_task_insert(MPI_COMM_WORLD, &parent_cl, STARPU_W, handles[0], 0); + for (i = 1; i < nblocks-1; i++) + { + starpu_mpi_task_insert(MPI_COMM_WORLD, &child_cl, STARPU_R, handles[0], STARPU_W, handles[i], 0); + } + + starpu_task_wait_for_all(); + + for (i = 0; i < nblocks; i++) + { + starpu_data_unregister(handles[i]); + + if (my_distrib(i, worldsize) == rank) + { + free(blocks[i]); + } + } + + free(handles); + free(blocks); + + *initial_tag += 2*worldsize; + starpu_mpi_shutdown(); +} + +int main(int argc, char **argv) +{ + starpu_mpi_tag_t initial_tag = 0; + + MPI_INIT_THREAD_real(&argc, &argv, MPI_THREAD_SERIALIZED); + + do_test(&initial_tag, /* disable cache */ "0"); + do_test(&initial_tag, /* enable cache */ "1"); + + MPI_Finalize(); + + return 0; +} +#endif diff --git a/mpi/tests/coop_chained_sends.c b/mpi/tests/coop_chained_sends.c new file mode 100644 index 0000000..b1b5833 --- /dev/null +++ b/mpi/tests/coop_chained_sends.c @@ -0,0 +1,140 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2021-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "helper.h" + +#define NX 20 + +void scal_cpu_func(void *buffers[], void *cl_arg) +{ + unsigned i; + struct starpu_vector_interface *vector = buffers[0]; + unsigned n = STARPU_VECTOR_GET_NX(vector); + float *val = (float *) STARPU_VECTOR_GET_PTR(vector); + + /* scale the vector */ + for (i = 0; i < n; i++) + val[i] *= 2; +} + +static struct starpu_codelet cl = +{ + .where = STARPU_CPU, + .cpu_funcs = { scal_cpu_func }, + .cpu_funcs_name = { "scal_cpu_func" }, + .nbuffers = 1, + .modes = { STARPU_RW } +}; + +int main(int argc, char **argv) +{ + int ret, rank, size; + starpu_data_handle_t handle; + int mpi_init; + int i = 0, n = 0; + MPI_Status status; + struct starpu_conf conf; + + float* vector = malloc(NX * sizeof(float)); + + for (i = 0; i < NX; i++) + { + vector[i] = 1.0f; + } + + MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); + + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + conf.ncpus = -1; + conf.nmpi_ms = -1; + conf.ntcpip_ms = -1; + + ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, &conf); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); + starpu_mpi_comm_size(MPI_COMM_WORLD, &size); + + starpu_vector_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t) vector, NX, sizeof(float)); + + if (rank == 0) + { + ret = starpu_task_insert(&cl, STARPU_RW, handle, 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + + /* The task previously inserted should be enough to detect the coop, + * but to be sure, indicate the number of sends requests before really + * sending the data: */ + starpu_mpi_coop_sends_data_handle_nb_sends(handle, size-1); + + for (n = 1 ; n < size ; n++) + { + FPRINTF_MPI(stderr, "sending data to %d\n", n); + ret = starpu_mpi_isend_detached(handle, n, 0, MPI_COMM_WORLD, NULL, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend_detached"); + } + } + else + { + ret = starpu_mpi_recv(handle, 0, 0, MPI_COMM_WORLD, &status); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); + FPRINTF_MPI(stderr, "received data\n"); + starpu_data_acquire(handle, STARPU_R); + STARPU_ASSERT_MSG(vector[0] == 2, "vector[0] = %f, expected 2\n", vector[0]); + STARPU_ASSERT_MSG(vector[NX-1] == 2, "vector[%d] = %f, expected 2\n", NX-1, vector[NX-1]); + + starpu_data_release(handle); + + if (rank == 1) + { + ret = starpu_task_insert(&cl, STARPU_RW, handle, 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + + starpu_mpi_coop_sends_data_handle_nb_sends(handle, size-2); + + for (i = 2; i < size; i++) + { + FPRINTF_MPI(stderr, "sending data to %d\n", i); + ret = starpu_mpi_isend_detached(handle, i, 1, MPI_COMM_WORLD, NULL, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend_detached"); + } + } + else + { + ret = starpu_mpi_recv(handle, 1, 1, MPI_COMM_WORLD, &status); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); + FPRINTF_MPI(stderr, "received data\n"); + starpu_data_acquire(handle, STARPU_R); + STARPU_ASSERT_MSG(vector[0] == 4, "vector[0] = %f, expected 4\n", vector[0]); + STARPU_ASSERT_MSG(vector[NX-1] == 4, "vector[%d] = %f, expected 4\n", NX-1, vector[NX-1]); + starpu_data_release(handle); + } + } + + starpu_data_unregister(handle); + + printf("[%d] end\n", rank); + + starpu_mpi_shutdown(); + if (!mpi_init) + MPI_Finalize(); + + free(vector); + + return 0; +} diff --git a/mpi/tests/coop_datatype.c b/mpi/tests/coop_datatype.c new file mode 100644 index 0000000..ece3874 --- /dev/null +++ b/mpi/tests/coop_datatype.c @@ -0,0 +1,311 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "helper.h" + +struct starpu_my_data_interface +{ + enum starpu_data_interface_id id; /**< Identifier of the interface */ + + uintptr_t ptr; /**< local pointer of the data */ + uintptr_t dev_handle; /**< device handle of the data. */ + size_t offset; /**< offset in the data */ +}; + +struct starpu_my_data +{ + int d; + char c; +}; + +void _starpu_my_data_datatype_allocate(unsigned node, MPI_Datatype *mpi_datatype) +{ + int ret; + int blocklengths[2] = {1, 1}; + MPI_Aint displacements[2]; + MPI_Datatype types[2] = {MPI_INT, MPI_CHAR}; + struct starpu_my_data *myinterface; + myinterface = calloc(1, sizeof(struct starpu_my_data)); + + MPI_Get_address(myinterface, displacements); + MPI_Get_address(&myinterface[0].c, displacements+1); + displacements[1] -= displacements[0]; + displacements[0] = 0; + + ret = MPI_Type_create_struct(2, blocklengths, displacements, types, mpi_datatype); + STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_contiguous failed"); + + ret = MPI_Type_commit(mpi_datatype); + STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_commit failed"); + + free(myinterface); +} + +int starpu_my_data_datatype_allocate(starpu_data_handle_t handle, unsigned node, MPI_Datatype *mpi_datatype) +{ + (void)handle; + _starpu_my_data_datatype_allocate(node, mpi_datatype); + return 0; +} + +void starpu_my_data_datatype_free(MPI_Datatype *mpi_datatype) +{ + int ret; + ret = MPI_Type_free(mpi_datatype); + STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_free failed"); +} + +static void data_register_data_handle(starpu_data_handle_t handle, int home_node, void *data_interface) +{ + struct starpu_my_data_interface *my_data_interface = (struct starpu_my_data_interface *) data_interface; + + int node; + for (node = 0; node < STARPU_MAXNODES; node++) + { + struct starpu_my_data_interface *local_interface = + (struct starpu_my_data_interface *) starpu_data_get_interface_on_node(handle, node); + + if (node == home_node) + { + local_interface->ptr = my_data_interface->ptr; + local_interface->dev_handle = my_data_interface->dev_handle; + local_interface->offset = my_data_interface->offset; + } + else + { + local_interface->ptr = 0; + local_interface->dev_handle = 0; + local_interface->offset = 0; + } + } +} + +static starpu_ssize_t data_allocate_data_on_node(void *data_interface, unsigned node) +{ + uintptr_t addr = 0, handle; + + struct starpu_my_data_interface *my_data_interface = (struct starpu_my_data_interface *) data_interface; + + starpu_ssize_t allocated_memory = sizeof(int)+sizeof(char); + handle = starpu_malloc_on_node(node, allocated_memory); + if (!handle) + return -ENOMEM; + + if (starpu_node_get_kind(node) != STARPU_OPENCL_RAM) + addr = handle; + + /* update the data properly in consequence */ + my_data_interface->ptr = addr; + my_data_interface->dev_handle = handle; + my_data_interface->offset = 0; + + return allocated_memory; +} + +static void data_free_data_on_node(void *data_interface, unsigned node) +{ + struct starpu_my_data_interface *my_data_interface = (struct starpu_my_data_interface *) data_interface; + starpu_free_on_node(node, my_data_interface->dev_handle, sizeof(int)+sizeof(char)); + my_data_interface->ptr = 0; + my_data_interface->dev_handle = 0; +} + +static size_t data_get_size(starpu_data_handle_t handle) +{ + (void)handle; + return sizeof(int) + sizeof(char); +} + +static size_t data_get_alloc_size(starpu_data_handle_t handle) +{ + (void)handle; + return sizeof(int) + sizeof(char); +} + +static uint32_t data_footprint(starpu_data_handle_t handle) +{ + struct starpu_my_data_interface *my_data = (struct starpu_my_data_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + return starpu_hash_crc32c_be(my_data->ptr, 0); +} + +static int data_pack_data(starpu_data_handle_t handle, unsigned node, void **ptr, starpu_ssize_t *count) +{ + (void)handle; + (void)node; + (void)ptr; + (void)count; + STARPU_ASSERT_MSG(0, "The data interface has been registered with starpu_mpi_datatype_register(). Calling the pack_data function should not happen\n"); + return 0; +} + +static int data_peek_data(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count) +{ + (void)handle; + (void)node; + (void)ptr; + STARPU_ASSERT_MSG(0, "The data interface has been registered with starpu_mpi_datatype_register(). Calling the unpack_data function should not happen\n"); + return 0; +} + +static int data_unpack_data(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count) +{ + (void)handle; + (void)node; + (void)ptr; + (void)count; + STARPU_ASSERT_MSG(0, "The data interface has been registered with starpu_mpi_datatype_register(). Calling the unpack_data function should not happen\n"); + return 0; +} + +static starpu_ssize_t data_describe(void *data_interface, char *buf, size_t size) +{ + struct starpu_my_data_interface *my_data = (struct starpu_my_data_interface *) data_interface; + struct starpu_my_data *data = (struct starpu_my_data *)my_data->ptr; + return snprintf(buf, size, "Data%d-%c", data->d, data->c); +} + +static void *data_to_pointer(void *data_interface, unsigned node) +{ + (void) node; + struct starpu_my_data_interface *my_data_interface = data_interface; + + return (void*) my_data_interface->ptr; +} + +static struct starpu_data_interface_ops interface_data_ops = +{ + .register_data_handle = data_register_data_handle, + .allocate_data_on_node = data_allocate_data_on_node, + .free_data_on_node = data_free_data_on_node, + .get_size = data_get_size, + .get_alloc_size = data_get_alloc_size, + .footprint = data_footprint, + .interfaceid = STARPU_UNKNOWN_INTERFACE_ID, + .interface_size = sizeof(struct starpu_my_data_interface), + .to_pointer = data_to_pointer, + .pack_data = data_pack_data, + .peek_data = data_peek_data, + .unpack_data = data_unpack_data, + .describe = data_describe +}; + +void starpu_my_data_register(starpu_data_handle_t *handleptr, unsigned home_node, struct starpu_my_data *xc) +{ + if (interface_data_ops.interfaceid == STARPU_UNKNOWN_INTERFACE_ID) + { + interface_data_ops.interfaceid = starpu_data_interface_get_next_id(); + starpu_mpi_interface_datatype_node_register(interface_data_ops.interfaceid, starpu_my_data_datatype_allocate, starpu_my_data_datatype_free); + } + + struct starpu_my_data_interface data = + { + .id = interface_data_ops.interfaceid, + .ptr = (uintptr_t) xc, + .dev_handle = (uintptr_t) xc, + .offset = 0, + }; + + starpu_data_register(handleptr, home_node, &data, &interface_data_ops); +} + +void starpu_my_data_shutdown(void) +{ + starpu_mpi_interface_datatype_unregister(interface_data_ops.interfaceid); + +} + +int main(int argc, char **argv) +{ + int rank, nodes, mpi_init; + int ret; + const int tag = 12; + int i = 0; + struct starpu_conf conf; + + MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); + + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + conf.ncpus = -1; + conf.nmpi_ms = -1; + conf.ntcpip_ms = -1; + + ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, &conf); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); + starpu_mpi_comm_size(MPI_COMM_WORLD, &nodes); + + if (nodes < 2 || (starpu_cpu_worker_get_count() == 0)) + { + if (rank == 0) + { + if (nodes < 2) + fprintf(stderr, "We need at least 2 processes.\n"); + else + fprintf(stderr, "We need at least 1 CPU.\n"); + } + starpu_mpi_shutdown(); + return 77; + } + + struct starpu_my_data my0; + starpu_data_handle_t handle0; + + starpu_my_data_register(&handle0, STARPU_MAIN_RAM, &my0); + + if (rank == 0) + { + my0.d = 43; + my0.c = 'm'; + + starpu_mpi_coop_sends_data_handle_nb_sends(handle0, nodes-1); + for (i = 1; i < nodes; i++) + { + ret = starpu_mpi_isend_detached(handle0, i, tag, MPI_COMM_WORLD, NULL, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend_detached"); + } + } + else + { + my0.d = 23; + my0.c = 'd'; + + ret = starpu_mpi_recv(handle0, 0, tag, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); + starpu_data_acquire(handle0, STARPU_R); + + printf("[%d] received: %d %c\n", rank, my0.d, my0.c); + + assert(my0.d == 43); + assert(my0.c == 'm'); + + starpu_data_release(handle0); + } + + starpu_mpi_wait_for_all(MPI_COMM_WORLD); + starpu_mpi_barrier(MPI_COMM_WORLD); + + starpu_data_unregister(handle0); + starpu_my_data_shutdown(); + + starpu_mpi_shutdown(); + if (!mpi_init) + MPI_Finalize(); + + return 0; +} diff --git a/mpi/tests/coop_insert_task.c b/mpi/tests/coop_insert_task.c new file mode 100644 index 0000000..c7e57ba --- /dev/null +++ b/mpi/tests/coop_insert_task.c @@ -0,0 +1,133 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2021-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* Test to ensure coop are correctly detected even through the + * starpu_mpi_task_insert() API. + * + * One task put an initial value in a buffer, then each node copies the content + * of this buffer in a local buffer, within a task. Since each node needs the + * initial buffer, this triggers a broadcast. */ + +#include +#include "helper.h" + +#define TARGET_VALUE 42 + + +static void init_cpu_func(void *descr[], void *args) +{ + int *var = (int*) STARPU_VARIABLE_GET_PTR(descr[0]); + int val; + + starpu_codelet_unpack_args(args, &val); + *var = val; + + starpu_sleep(2); // Give time to submit other tasks and detect coop +} + +static struct starpu_codelet init_cl = +{ + .cpu_funcs = { init_cpu_func }, + .cpu_funcs_name = { "init_task" }, + .nbuffers = 1, + .modes = { STARPU_W } +}; + +static void copy_cpu_func(void* descr[], void* args) +{ + (void) args; + + int *var_src = (int*) STARPU_VARIABLE_GET_PTR(descr[0]); + int *var_target = (int*) STARPU_VARIABLE_GET_PTR(descr[1]); + + *var_target = *var_src; +} + +static struct starpu_codelet copy_cl = +{ + .cpu_funcs = { copy_cpu_func }, + .cpu_funcs_name = { "copy_task" }, + .nbuffers = 2, + .modes = { STARPU_R, STARPU_W } +}; + +int main(int argc, char **argv) +{ + int ret, rank, size, mpi_init, i; + int* data; + starpu_data_handle_t* handles; + MPI_Status status; + struct starpu_conf conf; + + MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); + + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + conf.ncpus = -1; + conf.nmpi_ms = -1; + conf.ntcpip_ms = -1; + + ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, &conf); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); + starpu_mpi_comm_size(MPI_COMM_WORLD, &size); + + data = malloc(size*sizeof(int)); + handles = malloc(size*sizeof(starpu_data_handle_t)); + + for (i = 0; i < size; i++) + { + if (i == rank) + { + starpu_variable_data_register(&handles[i], STARPU_MAIN_RAM, (uintptr_t)&data[i], sizeof(int)); + } + else + { + starpu_variable_data_register(&handles[i], -1, (uintptr_t)NULL, sizeof(int)); + } + + STARPU_ASSERT(handles[i] != NULL); + starpu_mpi_data_register(handles[i], i, i); + } + + int val = TARGET_VALUE; + starpu_mpi_task_insert(MPI_COMM_WORLD, &init_cl, STARPU_W, handles[0], STARPU_VALUE, &val, sizeof(val), 0); + for (i = 1; i < size; i++) + { + starpu_mpi_task_insert(MPI_COMM_WORLD, ©_cl, STARPU_R, handles[0], STARPU_W, handles[i], 0); + } + + starpu_data_acquire(handles[rank], STARPU_R); + int* handle_ptr = (int*) starpu_variable_get_local_ptr(handles[rank]); + printf("[%d] data: %d\n", rank, *handle_ptr); + STARPU_ASSERT(*handle_ptr == TARGET_VALUE); + starpu_data_release(handles[rank]); + + for (i = 0; i < size; i++) + { + starpu_data_unregister(handles[i]); + } + + free(handles); + free(data); + + starpu_mpi_shutdown(); + if (!mpi_init) + MPI_Finalize(); + + return 0; +} diff --git a/mpi/tests/coop_large.c b/mpi/tests/coop_large.c new file mode 100644 index 0000000..2595116 --- /dev/null +++ b/mpi/tests/coop_large.c @@ -0,0 +1,120 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "helper.h" + +#define NX 4800000 + +void scal_cpu_func(void *buffers[], void *cl_arg) +{ + unsigned i; + float factor; + struct starpu_vector_interface *vector = buffers[0]; + unsigned n = STARPU_VECTOR_GET_NX(vector); + float *val = (float *) STARPU_VECTOR_GET_PTR(vector); + starpu_codelet_unpack_args(cl_arg, &factor); + + /* scale the vector */ + for (i = 0; i < n; i++) + val[i] *= factor; +} + +static struct starpu_codelet cl = +{ + .where = STARPU_CPU, + .cpu_funcs = { scal_cpu_func }, + .cpu_funcs_name = { "scal_cpu_func" }, + .nbuffers = 1, + .modes = { STARPU_RW } +}; + +int main(int argc, char **argv) +{ + int ret, rank, size; + starpu_data_handle_t handle; + int mpi_init; + int i = 0; + MPI_Status status; + struct starpu_conf conf; + + float* vector = malloc(NX * sizeof(float)); + + for (i = 0; i < NX; i++) + { + vector[i] = 1.0f; + } + + MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); + + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + conf.ncpus = -1; + conf.nmpi_ms = -1; + conf.ntcpip_ms = -1; + + ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, &conf); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); + starpu_mpi_comm_size(MPI_COMM_WORLD, &size); + + starpu_vector_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t) vector, NX, sizeof(float)); + + float factor = 3.14; + + if (rank == 0) + { + ret = starpu_task_insert(&cl, STARPU_RW, handle, STARPU_VALUE, &factor, sizeof(factor), 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + + /* The task previously inserted should be enough to detect the coop, + * but to be sure, indicate the number of sends requests before really + * sending the data: */ + starpu_mpi_coop_sends_data_handle_nb_sends(handle, size-1); + + for (i = 1 ; i < size ; i++) + { + FPRINTF_MPI(stderr, "sending data to %d\n", i); + ret = starpu_mpi_isend_detached(handle, i, 0, MPI_COMM_WORLD, NULL, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend_detached"); + } + } + else + { + ret = starpu_mpi_recv(handle, 0, 0, MPI_COMM_WORLD, &status); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); + starpu_data_acquire(handle, STARPU_R); + STARPU_ASSERT_MSG(vector[0] == factor, "vector[0] = %f, expected %f\n", vector[0], factor); + STARPU_ASSERT_MSG(vector[NX-1] == factor, "vector[%d] = %f, expected %f\n", NX-1, vector[NX-1], factor); + + starpu_data_release(handle); + + FPRINTF_MPI(stderr, "received data\n"); + } + + starpu_data_unregister(handle); + + printf("[%d] end\n", rank); + + starpu_mpi_shutdown(); + if (!mpi_init) + MPI_Finalize(); + + free(vector); + + return 0; +} diff --git a/mpi/tests/coop_many.c b/mpi/tests/coop_many.c new file mode 100644 index 0000000..943c179 --- /dev/null +++ b/mpi/tests/coop_many.c @@ -0,0 +1,130 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2021-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* Every rank asynchronously sends coop and receives from coop several times */ + +#include +#include "helper.h" + +#define NX (256*256) +#define NB_MCASTS 10 + +int main(int argc, char **argv) +{ + int ret, rank, worldsize; + int mpi_init; + int i = 0, j = 0; + MPI_Status status; + struct starpu_conf conf; + + MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); + + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + conf.ncpus = -1; + conf.nmpi_ms = -1; + conf.ntcpip_ms = -1; + + ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, &conf); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); + starpu_mpi_comm_size(MPI_COMM_WORLD, &worldsize); + + float **vectors = malloc(NB_MCASTS*worldsize*sizeof(float*)); + starpu_data_handle_t *handles = malloc(NB_MCASTS*worldsize*sizeof(starpu_data_handle_t)); + starpu_mpi_req *reqs = malloc(NB_MCASTS*worldsize*sizeof(starpu_mpi_req)); + for (i = 0; i < NB_MCASTS*worldsize; i++) + { + vectors[i] = malloc(NX*sizeof(float)); + for (j = 0; j < NX; j++) + { + vectors[i][j] = i; + } + starpu_vector_data_register(&handles[i], STARPU_MAIN_RAM, (uintptr_t) vectors[i], NX, sizeof(float)); + } + + int sender_rank = 0; + // Submit all communications: + for (sender_rank = 0; sender_rank < worldsize; sender_rank++) + { + for (i = 0; i < NB_MCASTS; i++) + { + int tag = sender_rank*NB_MCASTS+i; + assert(tag < worldsize*NB_MCASTS); + + if (rank == sender_rank) + { + starpu_mpi_coop_sends_data_handle_nb_sends(handles[tag], worldsize-1); + for (j = 0; j < worldsize; j++) + { + if (j != sender_rank) + { + ret = starpu_mpi_isend_detached(handles[tag], j, tag, MPI_COMM_WORLD, NULL, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend_detached"); + } + } + } + else + { + ret = starpu_mpi_irecv(handles[tag], &reqs[tag], sender_rank, tag, MPI_COMM_WORLD); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_irecv"); + } + } + } + + // Wait for all receives: + for (sender_rank = 0; sender_rank < worldsize; sender_rank++) + { + for (i = 0; i < NB_MCASTS; i++) + { + int tag = sender_rank*NB_MCASTS+i; + assert(tag < worldsize*NB_MCASTS); + + if (rank != sender_rank) + { + ret = starpu_mpi_wait(&reqs[tag], MPI_STATUS_IGNORE); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_wait"); + + starpu_data_acquire(handles[i], STARPU_R); + STARPU_ASSERT_MSG(vectors[i][0] == i, "vectors[%d][0] = %f, expected %d\n", i, vectors[i][0], i); + STARPU_ASSERT_MSG(vectors[i][NX-1] == i, "vector[%d][%d] = %f, expected %d\n", i, NX-1, vectors[i][NX-1], i); + starpu_data_release(handles[i]); + } + } + } + + // This barrier is unblocked after all receives are done, that means all isends are also done, so we can after that unregister handles (there is no implicit wait on the isends) + starpu_mpi_wait_for_all(MPI_COMM_WORLD); + starpu_mpi_barrier(MPI_COMM_WORLD); + + for (i = 0; i < NB_MCASTS*worldsize; i++) + { + starpu_data_unregister(handles[i]); + free(vectors[i]); + } + free(vectors); + free(handles); + free(reqs); + + printf("[%d] end\n", rank); + + starpu_mpi_shutdown(); + if (!mpi_init) + MPI_Finalize(); + + return 0; +} diff --git a/mpi/tests/coop_recv_not_yet_posted.c b/mpi/tests/coop_recv_not_yet_posted.c new file mode 100644 index 0000000..a9680c4 --- /dev/null +++ b/mpi/tests/coop_recv_not_yet_posted.c @@ -0,0 +1,112 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include "helper.h" + +void task_cpu_func(void *descr[], void *args) +{ + int *var = (int*) STARPU_VARIABLE_GET_PTR(descr[0]); + int val; + + starpu_codelet_unpack_args(args, &val); + *var = val; +} + +static struct starpu_codelet cl = +{ + .cpu_funcs = { task_cpu_func }, + .cpu_funcs_name = { "task_cpu_func" }, + .nbuffers = 1, + .flags = STARPU_CODELET_SIMGRID_EXECUTE, + .modes = { STARPU_W }, +}; + +int main(int argc, char **argv) +{ + int ret, rank, size; + starpu_data_handle_t handle; + int var = -1; + int mpi_init; + MPI_Status status; + struct starpu_conf conf; + + MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); + + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + conf.ncpus = -1; + conf.nmpi_ms = -1; + conf.ntcpip_ms = -1; + + ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, &conf); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); + starpu_mpi_comm_size(MPI_COMM_WORLD, &size); + + starpu_variable_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)&var, sizeof(var)); + + if (rank == 0) + { + int val, n; + + val = 42; + ret = starpu_task_insert(&cl, STARPU_W, handle, STARPU_VALUE, &val, sizeof(val), 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + + /* The task previously inserted should be enough to detect the coop, + * but to be sure, indicate the number of sends requests before really + * sending the data: */ + starpu_mpi_coop_sends_data_handle_nb_sends(handle, size-1); + + for(n = 1 ; n < size ; n++) + { + FPRINTF_MPI(stderr, "sending data to %d\n", n); + ret = starpu_mpi_isend_detached(handle, n, 15, MPI_COMM_WORLD, NULL, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend_detached"); + } + } + else + { + if (rank == 2) + { + sleep(5); + } + printf("[%d] will post recv\n", rank); + fflush(stdout); + ret = starpu_mpi_recv(handle, 0, 15, MPI_COMM_WORLD, &status); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); + starpu_data_acquire(handle, STARPU_R); + printf("[%d] received data: %d\n", rank, var); + STARPU_ASSERT(var == 42); + + starpu_data_release(handle); + + FPRINTF_MPI(stderr, "received data\n"); + } + + starpu_data_unregister(handle); + + printf("[%d] end\n", rank); + + starpu_mpi_shutdown(); + if (!mpi_init) + MPI_Finalize(); + + return 0; +} diff --git a/mpi/tests/coop_recv_wait_finalize.c b/mpi/tests/coop_recv_wait_finalize.c new file mode 100644 index 0000000..41a44d0 --- /dev/null +++ b/mpi/tests/coop_recv_wait_finalize.c @@ -0,0 +1,125 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* This test checks if STARPU_MPI_RECV_WAIT_FINALIZE env var doesn't break anything. */ + +#include +#include +#include +#include "helper.h" + +#if !defined(STARPU_HAVE_SETENV) +#warning setenv is not defined. Skipping test +int main(void) +{ + return STARPU_TEST_SKIPPED; +} +#else + +static int rank, worldsize; + +static void task_cpu_func(void *descr[], void *args) +{ + int *var = (int*) STARPU_VARIABLE_GET_PTR(descr[0]); + int val; + + starpu_codelet_unpack_args(args, &val); + *var = val; +} + +static struct starpu_codelet cl = +{ + .cpu_funcs = { task_cpu_func }, + .cpu_funcs_name = { "task_cpu_func" }, + .nbuffers = 1, + .flags = STARPU_CODELET_SIMGRID_EXECUTE, + .modes = { STARPU_W }, +}; + +static void test(starpu_mpi_tag_t tag, char* enabled) +{ + int var = -1; + int ret; + starpu_data_handle_t handle; + struct starpu_conf conf; + + setenv("STARPU_MPI_RECV_WAIT_FINALIZE", enabled, 1); + + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + conf.ncpus = -1; + conf.nmpi_ms = -1; + conf.ntcpip_ms = -1; + + ret = starpu_mpi_init_conf(NULL, NULL, 0, MPI_COMM_WORLD, &conf); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + + starpu_variable_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t) &var, sizeof(var)); + + if (rank == 0) + { + int val, n; + + val = 42; + ret = starpu_task_insert(&cl, STARPU_W, handle, STARPU_VALUE, &val, sizeof(val), 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + + /* The task previously inserted should be enough to detect the coop, + * but to be sure, indicate the number of sends requests before really + * sending the data: */ + starpu_mpi_coop_sends_data_handle_nb_sends(handle, worldsize-1); + + for(n = 1 ; n < worldsize ; n++) + { + ret = starpu_mpi_isend_detached(handle, n, tag, MPI_COMM_WORLD, NULL, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend_detached"); + } + } + else + { + ret = starpu_mpi_recv(handle, 0, tag, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); + FPRINTF_MPI(stderr, "received data\n"); + starpu_data_acquire(handle, STARPU_R); + FPRINTF_MPI(stderr, "acquired data\n"); + printf("[%d] acquired data: %d\n", rank, var); + STARPU_ASSERT(var == 42); + + starpu_data_release(handle); + + FPRINTF_MPI(stderr, "received data\n"); + } + + starpu_data_unregister(handle); + + starpu_mpi_barrier(MPI_COMM_WORLD); + + starpu_mpi_shutdown(); +} + +int main(int argc, char** argv) +{ + MPI_INIT_THREAD_real(&argc, &argv, MPI_THREAD_SERIALIZED); + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &worldsize); + + test(42, "0"); + test(24, "1"); + + MPI_Finalize(); + return 0; +} +#endif diff --git a/mpi/tests/coop_user_defined_datatype.c b/mpi/tests/coop_user_defined_datatype.c new file mode 100644 index 0000000..10828d5 --- /dev/null +++ b/mpi/tests/coop_user_defined_datatype.c @@ -0,0 +1,184 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2021-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* Copy of test user_defined_datatype.c, but with coop */ + +#include +#include +#include +#include +#include "helper.h" + +#ifdef STARPU_QUICK_CHECK +# define ELEMENTS 10 +#else +# define ELEMENTS 1000 +#endif + +static int my_rank, worldsize, is_sender; + +void test_handle_recv_send(starpu_data_handle_t *handles, int nb_handles, starpu_mpi_tag_t tag) +{ + int i, j; + int ret; + + if (is_sender) + { + for(i=0 ; i +#include "helper.h" + + +int main(int argc, char **argv) +{ + int ret, rank, size; + starpu_data_handle_t handle; + int var = -1; + int mpi_init; + MPI_Status status; + struct starpu_conf conf; + + MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); + + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + conf.ncpus = -1; + conf.nmpi_ms = -1; + conf.ntcpip_ms = -1; + + ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, &conf); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); + starpu_mpi_comm_size(MPI_COMM_WORLD, &size); + + if (rank == 0) + { + int n; + var = 42; + starpu_variable_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)&var, sizeof(var)); + + /* This function tells StarPU to wait for size-1 sends of handle before really + * sending the data. There are many sends of the same handle, so a dynamic + * broadcast is triggered. */ + starpu_mpi_coop_sends_data_handle_nb_sends(handle, size-1); + + for(n = 1 ; n < size ; n++) + { + FPRINTF_MPI(stderr, "sending data to %d with prio %d\n", n, size-n); + ret = starpu_mpi_isend_detached_prio(handle, n, 0, size-n, MPI_COMM_WORLD, NULL, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend_detached_prio"); + } + } + else + { + starpu_variable_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)&var, sizeof(var)); + ret = starpu_mpi_recv(handle, 0, 0, MPI_COMM_WORLD, &status); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); + starpu_data_acquire(handle, STARPU_R); + printf("[%d] received data: %d\n", rank, var); + STARPU_ASSERT(var == 42); + + starpu_data_release(handle); + + FPRINTF_MPI(stderr, "received data\n"); + } + + starpu_data_unregister(handle); + + printf("[%d] end\n", rank); + + starpu_mpi_shutdown(); + if (!mpi_init) + MPI_Finalize(); + + return 0; +} diff --git a/mpi/tests/coop_wrong_order.c b/mpi/tests/coop_wrong_order.c new file mode 100644 index 0000000..d516558 --- /dev/null +++ b/mpi/tests/coop_wrong_order.c @@ -0,0 +1,131 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "helper.h" + +void task_cpu_func(void *descr[], void *args) +{ + int *var = (int*) STARPU_VARIABLE_GET_PTR(descr[0]); + int val; + + printf("running task\n"); + + starpu_codelet_unpack_args(args, &val); + *var = val; +} + +static struct starpu_codelet cl = +{ + .cpu_funcs = { task_cpu_func }, + .cpu_funcs_name = { "task_cpu_func" }, + .nbuffers = 1, + .flags = STARPU_CODELET_SIMGRID_EXECUTE, + .modes = { STARPU_W }, +}; + +int main(int argc, char **argv) +{ + int ret, rank, size; + starpu_data_handle_t handle; + int var = -1; + int mpi_init; + MPI_Status status; + struct starpu_conf conf; + + MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); + + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + conf.ncpus = -1; + conf.nmpi_ms = -1; + conf.ntcpip_ms = -1; + + ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, &conf); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); + starpu_mpi_comm_size(MPI_COMM_WORLD, &size); + + starpu_variable_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t) &var, sizeof(var)); + + if (rank == 0) + { + int val, n; + + val = 42; + ret = starpu_task_insert(&cl, STARPU_W, handle, STARPU_VALUE, &val, sizeof(val), 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + + starpu_mpi_coop_sends_data_handle_nb_sends(handle, size-1); + + for(n = 1 ; n < size ; n++) + { + FPRINTF_MPI(stderr, "sending data to %d with tag 1\n", n); + ret = starpu_mpi_isend_detached(handle, n, 1, MPI_COMM_WORLD, NULL, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend_detached"); + } + + val = 43; + ret = starpu_task_insert(&cl, STARPU_W, handle, STARPU_VALUE, &val, sizeof(val), 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + + /* The task previously inserted should be enough to detect the coop, + * but to be sure, indicate the number of sends requests before really + * sending the data: */ + starpu_mpi_coop_sends_data_handle_nb_sends(handle, size-1); + + for(n = 1 ; n < size ; n++) + { + FPRINTF_MPI(stderr, "sending data to %d with tag 0\n", n); + ret = starpu_mpi_isend_detached(handle, n, 0, MPI_COMM_WORLD, NULL, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend_detached"); + } + } + else + { + FPRINTF_MPI(stderr, "waiting for data with tag 0\n"); + ret = starpu_mpi_recv(handle, 0, 0, MPI_COMM_WORLD, &status); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); + starpu_data_acquire(handle, STARPU_R); + FPRINTF_MPI(stderr, "received data: %d\n", var); + STARPU_ASSERT(var == 43); + + starpu_data_release(handle); + + FPRINTF_MPI(stderr, "waiting for data with tag 1\n"); + ret = starpu_mpi_recv(handle, 0, 1, MPI_COMM_WORLD, &status); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); + starpu_data_acquire(handle, STARPU_R); + FPRINTF_MPI(stderr, "received data: %d\n", var); + STARPU_ASSERT(var == 42); + + starpu_data_release(handle); + } + + starpu_mpi_wait_for_all(MPI_COMM_WORLD); + starpu_mpi_barrier(MPI_COMM_WORLD); + + starpu_data_unregister(handle); + + printf("[%d] end\n", rank); + + starpu_mpi_shutdown(); + if (!mpi_init) + MPI_Finalize(); + + return 0; +} diff --git a/mpi/tests/data_cpy.c b/mpi/tests/data_cpy.c new file mode 100644 index 0000000..483c9bd --- /dev/null +++ b/mpi/tests/data_cpy.c @@ -0,0 +1,91 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2023-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "helper.h" + +void callback(void *arg) +{ + FPRINTF_MPI(stderr, "value in callback: %d\n", *((int *)arg)); +} + +int main(int argc, char **argv) +{ + int ret, rank, size; + int mpi_init; + starpu_data_handle_t src_handle, dst_handle; + int value; + + MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); + + ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); + starpu_mpi_comm_size(MPI_COMM_WORLD, &size); + + if (size < 2) + { + if (rank == 0) + FPRINTF(stderr, "We need at least 2 processes.\n"); + + starpu_mpi_shutdown(); + if (!mpi_init) + MPI_Finalize(); + return rank == 0 ? STARPU_TEST_SKIPPED : 0; + } + + { + value = rank; + starpu_variable_data_register(&src_handle, STARPU_MAIN_RAM, (uintptr_t)&value, sizeof(value)); + starpu_mpi_data_register(src_handle, 12, 0); + starpu_variable_data_register(&dst_handle, STARPU_MAIN_RAM, (uintptr_t)&value, sizeof(value)); + starpu_mpi_data_register(dst_handle, 42, 1); + + FPRINTF_MPI(stderr, "value before copy: %d\n", value); + if (rank == 1) STARPU_ASSERT_MSG(value == rank, "before copy value %d should be %d\n", value, rank); + starpu_mpi_data_cpy(dst_handle, src_handle, MPI_COMM_WORLD, 0, callback, &value); + starpu_data_unregister(src_handle); + starpu_data_unregister(dst_handle); + + FPRINTF_MPI(stderr, "value after copy: %d\n", value); + if (rank == 1) STARPU_ASSERT_MSG(value == 0, "after copy value %d should be %d\n", value, 0); + } + + { + value = rank+12; + starpu_variable_data_register(&src_handle, STARPU_MAIN_RAM, (uintptr_t)&value, sizeof(value)); + starpu_mpi_data_register(src_handle, 12, 0); + starpu_variable_data_register(&dst_handle, STARPU_MAIN_RAM, (uintptr_t)&value, sizeof(value)); + starpu_mpi_data_register(dst_handle, 42, 1); + + FPRINTF_MPI(stderr, "value before copy: %d\n", value); + if (rank == 1) STARPU_ASSERT_MSG(value == rank+12, "before copy value %d should be %d\n", value, rank+12); + starpu_mpi_data_cpy(dst_handle, src_handle, MPI_COMM_WORLD, 1, callback, &value); + starpu_data_unregister(src_handle); + starpu_data_unregister(dst_handle); + + FPRINTF_MPI(stderr, "value after copy: %d\n", value); + if (rank == 1) STARPU_ASSERT_MSG(value == 12, "after copy value %d should be %d\n", value, 12); + } + + starpu_mpi_shutdown(); + + if (!mpi_init) + MPI_Finalize(); + + return 0; +} diff --git a/mpi/tests/datatypes.c b/mpi/tests/datatypes.c new file mode 100644 index 0000000..a602129 --- /dev/null +++ b/mpi/tests/datatypes.c @@ -0,0 +1,626 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include "helper.h" + +typedef void (*check_func)(starpu_data_handle_t handle_s, starpu_data_handle_t handle_r, int *error); + +void send_recv_and_check(int rank, int node, starpu_data_handle_t handle_s, int tag_s, starpu_data_handle_t handle_r, int tag_r, int *error, check_func func) +{ + int ret; + MPI_Status status; + + if (rank == 0) + { + ret = starpu_mpi_send(handle_s, node, tag_s, MPI_COMM_WORLD); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send"); + ret = starpu_mpi_recv(handle_r, node, tag_r, MPI_COMM_WORLD, &status); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); + + assert(func); + func(handle_s, handle_r, error); + } + else if (rank == 1) + { + ret = starpu_mpi_recv(handle_s, node, tag_s, MPI_COMM_WORLD, &status); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); + ret = starpu_mpi_send(handle_s, node, tag_r, MPI_COMM_WORLD); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send"); + } +} + +/* + * Void + */ +void check_void(starpu_data_handle_t handle_s, starpu_data_handle_t handle_r, int *error) +{ + (void)error; + (void)handle_s; + (void)handle_r; + FPRINTF_MPI(stderr, "Success with void value\n"); +} + +void exchange_void(int rank, int *error) +{ + STARPU_SKIP_IF_VALGRIND; + + if (rank == 0) + { + starpu_data_handle_t void_handle[2]; + starpu_void_data_register(&void_handle[0]); + starpu_void_data_register(&void_handle[1]); + + send_recv_and_check(rank, 1, void_handle[0], 0x42, void_handle[1], 0x1337, error, check_void); + + starpu_data_unregister(void_handle[0]); + starpu_data_unregister(void_handle[1]); + } + else if (rank == 1) + { + starpu_data_handle_t void_handle; + starpu_void_data_register(&void_handle); + send_recv_and_check(rank, 0, void_handle, 0x42, NULL, 0x1337, NULL, NULL); + starpu_data_unregister(void_handle); + } +} + +/* + * Variable + */ +void check_variable(starpu_data_handle_t handle_s, starpu_data_handle_t handle_r, int *error) +{ + float *v_s, *v_r; + + STARPU_ASSERT(starpu_variable_get_elemsize(handle_s) == starpu_variable_get_elemsize(handle_r)); + + starpu_data_acquire(handle_s, STARPU_R); + v_s = (float *)starpu_variable_get_local_ptr(handle_s); + starpu_data_release(handle_s); + starpu_data_acquire(handle_r, STARPU_R); + v_r = (float *)starpu_variable_get_local_ptr(handle_r); + starpu_data_release(handle_r); + + if (*v_s == *v_r) + { + FPRINTF_MPI(stderr, "Success with variable value: %f == %f\n", *v_s, *v_r); + } + else + { + *error = 1; + FPRINTF_MPI(stderr, "Error with variable value: %f != %f\n", *v_s, *v_r); + } +} + +void exchange_variable(int rank, int *error) +{ + if (rank == 0) + { + float v = 42.12; + starpu_data_handle_t variable_handle[2]; + starpu_variable_data_register(&variable_handle[0], STARPU_MAIN_RAM, (uintptr_t)&v, sizeof(v)); + starpu_variable_data_register(&variable_handle[1], -1, (uintptr_t)NULL, sizeof(v)); + + send_recv_and_check(rank, 1, variable_handle[0], 0x42, variable_handle[1], 0x1337, error, check_variable); + + starpu_data_unregister(variable_handle[0]); + starpu_data_unregister(variable_handle[1]); + } + else if (rank == 1) + { + starpu_data_handle_t variable_handle; + starpu_variable_data_register(&variable_handle, -1, (uintptr_t)NULL, sizeof(float)); + send_recv_and_check(rank, 0, variable_handle, 0x42, NULL, 0x1337, NULL, NULL); + starpu_data_unregister(variable_handle); + } +} + +/* + * Vector + */ +void check_vector(starpu_data_handle_t handle_s, starpu_data_handle_t handle_r, int *error) +{ + int i; + int nx; + int *v_r, *v_s; + + STARPU_ASSERT(starpu_vector_get_elemsize(handle_s) == starpu_vector_get_elemsize(handle_r)); + STARPU_ASSERT(starpu_vector_get_nx(handle_s) == starpu_vector_get_nx(handle_r)); + + nx = starpu_vector_get_nx(handle_r); + v_r = (int *)starpu_vector_get_local_ptr(handle_r); + v_s = (int *)starpu_vector_get_local_ptr(handle_s); + + for(i=0 ; i %d] value: %c == %c\n", x, y, index, matrix_s[index], matrix_r[index]); + } + else + { + *error = 1; + FPRINTF_MPI(stderr, "Error with matrix[%d,%d --> %d] value: %c != %c\n", x, y, index, matrix_s[index], matrix_r[index]); + } + } + } +} + +void exchange_matrix(int rank, int *error) +{ + int nx=3; + int ny=2; + + if (rank == 0) + { + char *matrix, n='a'; + int x, y; + starpu_data_handle_t matrix_handle[2]; + + starpu_malloc((void **)&matrix, nx*ny*sizeof(char)); + assert(matrix); + for(y=0 ; y %d] value: %f == %f\n", x, y, z, index, block_s[index], block_r[index]); + } + else + { + *error = 1; + FPRINTF_MPI(stderr, "Error with block[%d,%d,%d --> %d] value: %f != %f\n", x, y, z, index, block_s[index], block_r[index]); + } + } + } + + starpu_data_release(handle_s); + starpu_data_release(handle_r); +} + +void exchange_block(int rank, int *error) +{ + int nx=3; + int ny=2; + int nz=4; + + if (rank == 0) + { + float *block, n=1.0; + int x, y, z; + starpu_data_handle_t block_handle[2]; + + starpu_malloc((void **)&block, nx*ny*nz*sizeof(float)); + assert(block); + for(z=0 ; z +#include +#include +#include "helper.h" + +#if !defined(STARPU_HAVE_SETENV) +#warning setenv is not defined. Skipping test +int main(void) +{ + return STARPU_TEST_SKIPPED; +} +#else +int main(int argc, char **argv) +{ + int ret; + setenv("STARPU_DISPLAY_BINDINGS", "1", 1); + + MPI_INIT_THREAD_real(&argc, &argv, MPI_THREAD_SERIALIZED); + + ret = starpu_mpi_init_conf(NULL, NULL, 0, MPI_COMM_WORLD, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + + starpu_mpi_shutdown(); + MPI_Finalize(); + + return EXIT_SUCCESS; +} +#endif diff --git a/mpi/tests/driver.c b/mpi/tests/driver.c new file mode 100644 index 0000000..5e1c5dc --- /dev/null +++ b/mpi/tests/driver.c @@ -0,0 +1,160 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include "helper.h" + +#if !defined(STARPU_HAVE_SETENV) +#warning setenv is not defined. Skipping test +int main(int argc, char **argv) +{ + return STARPU_TEST_SKIPPED; +} +#else + +int main(int argc, char **argv) +{ + int ret, rank, size, i; + starpu_data_handle_t tab_handle[4]; + int values[4]; + starpu_mpi_req request[2] = {NULL, NULL}; + int mpi_init; + struct starpu_conf conf; + + setenv("STARPU_MPI_DRIVER_CALL_FREQUENCY", "1", 1); + setenv("STARPU_MPI_DRIVER_TASK_FREQUENCY", "10", 1); + + MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); + + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + conf.ncpus = -1; + conf.nmpi_ms = -1; + conf.ntcpip_ms = -1; + + ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, &conf); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); + starpu_mpi_comm_size(MPI_COMM_WORLD, &size); + + if (size%2 != 0) + { + FPRINTF_MPI(stderr, "We need a even number of processes.\n"); + starpu_mpi_shutdown(); + if (!mpi_init) + MPI_Finalize(); + return rank == 0 ? STARPU_TEST_SKIPPED : 0; + } + + for(i=0 ; i<4 ; i++) + { + if (i<3 || rank%2) + { + // all data are registered on all nodes, but the 4th data which is not registered on the receiving node + values[i] = (rank+1) * (i+1); + starpu_variable_data_register(&tab_handle[i], STARPU_MAIN_RAM, (uintptr_t)&values[i], sizeof(values[i])); + starpu_mpi_data_register(tab_handle[i], i, rank); + } + } + + int other_rank = rank%2 == 0 ? rank+1 : rank-1; + + FPRINTF_MPI(stderr, "rank %d exchanging with rank %d\n", rank, other_rank); + + if (rank%2) + { + FPRINTF_MPI(stderr, "Sending values %d and %d to node %d\n", values[0], values[3], other_rank); + // this data will be received as an early registered data + ret = starpu_mpi_isend(tab_handle[0], &request[0], other_rank, 0, MPI_COMM_WORLD); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend"); + // this data will be received as an early UNregistered data + ret = starpu_mpi_isend(tab_handle[3], &request[1], other_rank, 3, MPI_COMM_WORLD); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend"); + + ret = starpu_mpi_send(tab_handle[1], other_rank, 1, MPI_COMM_WORLD); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send"); + ret = starpu_mpi_recv(tab_handle[2], other_rank, 2, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); + + } + else + { + ret = starpu_mpi_recv(tab_handle[1], other_rank, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); + ret = starpu_mpi_send(tab_handle[2], other_rank, 2, MPI_COMM_WORLD); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send"); + + // we register the data + starpu_variable_data_register(&tab_handle[3], -1, (uintptr_t)NULL, sizeof(int)); + starpu_mpi_data_register(tab_handle[3], 3, rank); + ret = starpu_mpi_irecv(tab_handle[3], &request[1], other_rank, 3, MPI_COMM_WORLD); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_irecv"); + ret = starpu_mpi_irecv(tab_handle[0], &request[0], other_rank, 0, MPI_COMM_WORLD); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_irecv"); + } + + int finished=0; + while (!finished) + { + for(i=0 ; i<2 ; i++) + { + if (request[i]) + { + int flag; + MPI_Status status; + ret = starpu_mpi_test(&request[i], &flag, &status); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_test"); + if (flag) + FPRINTF_MPI(stderr, "request[%d] = %d %p\n", i, flag, request[i]); + } + } + finished = request[0] == NULL && request[1] == NULL; +#ifdef STARPU_SIMGRID + starpu_sleep(0.001); +#endif + } + + if (rank%2 == 0) + { + void *ptr0; + void *ptr3; + + starpu_data_acquire(tab_handle[0], STARPU_RW); + ptr0 = starpu_data_get_local_ptr(tab_handle[0]); + starpu_data_release(tab_handle[0]); + + starpu_data_acquire(tab_handle[3], STARPU_RW); + ptr3 = starpu_data_get_local_ptr(tab_handle[3]); + starpu_data_release(tab_handle[3]); + + ret = (*((int *)ptr0) == (other_rank+1)*1) && (*((int *)ptr3) == (other_rank+1)*4); + ret = !ret; + FPRINTF_MPI(stderr, "[%s] Received values %d and %d from node %d\n", ret?"FAILURE":"SUCCESS", *((int *)ptr0), *((int *)ptr3), other_rank); + } + + for(i=0 ; i<4 ; i++) + starpu_data_unregister(tab_handle[i]); + + starpu_mpi_shutdown(); + + if (!mpi_init) + MPI_Finalize(); + + return 0; +} +#endif diff --git a/mpi/tests/early_request.c b/mpi/tests/early_request.c new file mode 100644 index 0000000..da88ba8 --- /dev/null +++ b/mpi/tests/early_request.c @@ -0,0 +1,282 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include "helper.h" + +#define NUM_EL 5 +#ifdef STARPU_QUICK_CHECK +# define NUM_LOOPS 2 +#else +# define NUM_LOOPS 10 +#endif + +/* + * This testcase written by J-M Couteyen allows to test that several + * early requests for a given source and tag can be posted to StarPU + * by the application before data arrive. + * + * In this test case, multiples processes (called "domains") exchanges + * information between multiple "elements" multiple times, with + * different sizes (in order to catch error more easily). + * The communications are independent between the elements (each one + * as its proper tag), but must occur in the submitted order for an + * element taken independently. +*/ + +struct element +{ + int tag; + int foreign_domain; + + int array_send[100]; + int array_recv[100]; + + starpu_data_handle_t ensure_submitted_order_send; + starpu_data_handle_t ensure_submitted_order_recv; + starpu_data_handle_t send; + starpu_data_handle_t recv; +}; + +/* functions/codelet to fill the bufferss*/ +void fill_tmp_buffer(void *buffers[], void *cl_arg) +{ + (void)cl_arg; + int *tmp = (int *) STARPU_VECTOR_GET_PTR(buffers[0]); + int nx = STARPU_VECTOR_GET_NX(buffers[0]); + int i; + + for (i=0; itag=size; + el->foreign_domain=foreign_domain; + + int mpi_rank; + starpu_mpi_comm_rank(MPI_COMM_WORLD, &mpi_rank); + + starpu_vector_data_register(&el->recv, 0, (uintptr_t)el->array_recv, size, sizeof(int)); + starpu_vector_data_register(&el->send, 0, (uintptr_t)el->array_send, size, sizeof(int)); + starpu_void_data_register(&el->ensure_submitted_order_send); + starpu_void_data_register(&el->ensure_submitted_order_recv); +} + +void free_element(struct element *el) +{ + starpu_data_unregister(el->recv); + starpu_data_unregister(el->send); + starpu_data_unregister(el->ensure_submitted_order_send); + starpu_data_unregister(el->ensure_submitted_order_recv); +} + +void insert_work_for_one_element(struct element *el) +{ + starpu_data_handle_t tmp_recv; + starpu_data_handle_t tmp_send; + int ret; + + starpu_vector_data_register(&tmp_recv, -1, 0, el->tag, sizeof(int)); + starpu_vector_data_register(&tmp_send, -1, 0, el->tag, sizeof(int)); + + //Emulate the work to fill the send buffer + ret = starpu_task_insert(&fill_tmp_buffer_cl, + STARPU_W,tmp_send, + 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + + //Send operation + ret = starpu_task_insert(&submitted_order_rw, + STARPU_RW,el->ensure_submitted_order_send, + STARPU_RW,tmp_send, + 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + ret = starpu_mpi_isend_detached(tmp_send,el->foreign_domain,el->tag, MPI_COMM_WORLD, NULL, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend_detached"); + + ret = starpu_task_insert(&submitted_order_rw, + STARPU_RW,el->ensure_submitted_order_send, + STARPU_RW,tmp_send, + 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + + //Recv operation for current element + ret = starpu_task_insert(&submitted_order, + STARPU_RW,el->ensure_submitted_order_recv, + STARPU_W,tmp_recv, + 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + ret = starpu_mpi_irecv_detached(tmp_recv,el->foreign_domain,el->tag, MPI_COMM_WORLD, NULL, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_irecv_detached"); + + //Emulate the "reading" of the recv value. + ret = starpu_task_insert(&read_ghost_value_cl, + STARPU_R,tmp_recv, + 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + + starpu_data_unregister_submit(tmp_send); + starpu_data_unregister_submit(tmp_recv); +} + +/*main program*/ +int main(int argc, char * argv[]) +{ + /* Init */ + int ret; + int mpi_rank, mpi_size; + int mpi_init; + struct starpu_conf conf; + + MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); + + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + conf.ncpus = -1; + conf.nmpi_ms = -1; + conf.ntcpip_ms = -1; + + ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, &conf); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + + starpu_mpi_comm_rank(MPI_COMM_WORLD, &mpi_rank); + starpu_mpi_comm_size(MPI_COMM_WORLD, &mpi_size); + + if (starpu_cpu_worker_get_count() == 0) + { + if (mpi_rank == 0) + FPRINTF(stderr, "We need at least 1 CPU worker.\n"); + starpu_mpi_shutdown(); + if (!mpi_init) + MPI_Finalize(); + return mpi_rank == 0 ? STARPU_TEST_SKIPPED : 0; + } + + /*element initialization : domains are connected as a ring for this test*/ + int num_elements=NUM_EL; + struct element * el_left=malloc(num_elements*sizeof(el_left[0])); + struct element * el_right=malloc(num_elements*sizeof(el_right[0])); + int i; + for(i=0;i +#include +#include "helper.h" + +#ifndef STARPU_USE_MPI_MPI +int main(int argc, char **argv) +{ + int mpi_init; + + MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); + if (!mpi_init) + MPI_Finalize(); + + return 0; +} + +#else + +#include +#include +#include + +void early_data() +{ + struct _starpu_mpi_early_data_handle *edh[2]; + struct _starpu_mpi_envelope envelope[2]; + struct _starpu_mpi_node_tag node_tag[2]; + struct _starpu_mpi_early_data_handle *early; + struct _starpu_mpi_early_data_handle_tag_hashlist *hash; + + memset(&node_tag[0], 0, sizeof(struct _starpu_mpi_node_tag)); + node_tag[0].node.rank = 1; + node_tag[0].node.comm = MPI_COMM_WORLD; + node_tag[0].data_tag = 42; + + memset(&node_tag[1], 0, sizeof(struct _starpu_mpi_node_tag)); + node_tag[1].node.rank = 2; + node_tag[1].node.comm = MPI_COMM_WORLD; + node_tag[1].data_tag = 84; + + envelope[0].data_tag = node_tag[0].data_tag; + edh[0] = _starpu_mpi_early_data_create(&envelope[0], node_tag[0].node.rank, node_tag[0].node.comm); + + envelope[1].data_tag = node_tag[1].data_tag; + edh[1] = _starpu_mpi_early_data_create(&envelope[1], node_tag[1].node.rank, node_tag[1].node.comm); + + _starpu_mpi_early_data_add(edh[0]); + _starpu_mpi_early_data_add(edh[1]); + + hash = _starpu_mpi_early_data_extract(&node_tag[0]); + STARPU_ASSERT(_starpu_mpi_early_data_handle_list_size(&hash->list) == 1); + early = _starpu_mpi_early_data_handle_list_pop_front(&hash->list); + STARPU_ASSERT(early->node_tag.node.comm == node_tag[0].node.comm && early->node_tag.node.rank == node_tag[0].node.rank && early->node_tag.data_tag == node_tag[0].data_tag); + STARPU_ASSERT(_starpu_mpi_early_data_handle_list_size(&hash->list) == 0); + _starpu_mpi_early_data_delete(early); + free(hash); + + early = _starpu_mpi_early_data_find(&node_tag[1]); + STARPU_ASSERT(early->node_tag.node.comm == node_tag[1].node.comm && early->node_tag.node.rank == node_tag[1].node.rank && early->node_tag.data_tag == node_tag[1].data_tag); + _starpu_mpi_early_data_delete(early); +} + +void early_request() +{ + struct _starpu_mpi_req req[2]; + struct _starpu_mpi_req *early; + struct _starpu_mpi_early_request_tag_hashlist *hash; + + memset(&req[0].node_tag, 0, sizeof(struct _starpu_mpi_node_tag)); + req[0].node_tag.node.rank = 1; + req[0].node_tag.node.comm = MPI_COMM_WORLD; + req[0].node_tag.data_tag = 42; + + memset(&req[1].node_tag, 0, sizeof(struct _starpu_mpi_node_tag)); + req[1].node_tag.node.rank = 2; + req[1].node_tag.node.comm = MPI_COMM_WORLD; + req[1].node_tag.data_tag = 84; + + _starpu_mpi_early_request_enqueue(&req[1]); + _starpu_mpi_early_request_enqueue(&req[0]); + + early = _starpu_mpi_early_request_dequeue(req[0].node_tag.data_tag, req[0].node_tag.node.rank, req[0].node_tag.node.comm); + STARPU_ASSERT(early->node_tag.data_tag == req[0].node_tag.data_tag && early->node_tag.node.rank == req[0].node_tag.node.rank && early->node_tag.node.comm == req[0].node_tag.node.comm); + + hash = _starpu_mpi_early_request_extract(req[1].node_tag.data_tag, req[1].node_tag.node.rank, req[1].node_tag.node.comm); + STARPU_ASSERT(_starpu_mpi_req_list_size(&hash->list) == 1); + early = _starpu_mpi_req_list_pop_front(&hash->list); + STARPU_ASSERT(_starpu_mpi_req_list_size(&hash->list) == 0); + STARPU_ASSERT(early->node_tag.data_tag == req[1].node_tag.data_tag && early->node_tag.node.rank == req[1].node_tag.node.rank && early->node_tag.node.comm == req[1].node_tag.node.comm); + free(hash); +} + +int main(int argc, char **argv) +{ + int ret; + int mpi_init; + struct starpu_conf conf; + + MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); + + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + conf.ncpus = -1; + conf.nmpi_ms = -1; + conf.ntcpip_ms = -1; + + ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, &conf); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + + early_data(); + early_request(); + + starpu_mpi_shutdown(); + + if (!mpi_init) + MPI_Finalize(); + return 0; +} + +#endif diff --git a/mpi/tests/gather.c b/mpi/tests/gather.c new file mode 100644 index 0000000..72f03f1 --- /dev/null +++ b/mpi/tests/gather.c @@ -0,0 +1,84 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "helper.h" + +int main(int argc, char **argv) +{ + int ret, rank, size; + starpu_data_handle_t handle; + int var; + int mpi_init; + struct starpu_conf conf; + + MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); + + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + conf.ncpus = -1; + conf.nmpi_ms = -1; + conf.ntcpip_ms = -1; + + ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, &conf); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); + starpu_mpi_comm_size(MPI_COMM_WORLD, &size); + + if (size<3) + { + FPRINTF(stderr, "We need more than 2 processes.\n"); + starpu_mpi_shutdown(); + if (!mpi_init) + MPI_Finalize(); + return rank == 0 ? STARPU_TEST_SKIPPED : 0; + } + + if (rank == 0) + { + int n; + for(n=1 ; n from node <%d>\n", var, n); + FPRINTF_MPI(stderr, "received <%d> from node %d\n", var, n); + starpu_data_release(handle); + starpu_data_unregister(handle); + } + } + else + { + FPRINTF_MPI(stderr, "sending to node %d\n", 0); + var = rank; + starpu_variable_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)&var, sizeof(var)); + ret = starpu_mpi_send(handle, 0, 42, MPI_COMM_WORLD); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send"); + starpu_data_unregister(handle); + } + + starpu_mpi_shutdown(); + if (!mpi_init) + MPI_Finalize(); + + return 0; +} diff --git a/mpi/tests/gather2.c b/mpi/tests/gather2.c new file mode 100644 index 0000000..ce7e753 --- /dev/null +++ b/mpi/tests/gather2.c @@ -0,0 +1,110 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "helper.h" + +int main(int argc, char **argv) +{ + int ret, rank, size; + int mpi_init; + struct starpu_conf conf; + + MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); + + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + conf.ncpus = -1; + conf.nmpi_ms = -1; + conf.ntcpip_ms = -1; + + ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, &conf); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); + starpu_mpi_comm_size(MPI_COMM_WORLD, &size); + + if (size<3) + { + FPRINTF(stderr, "We need more than 2 processes.\n"); + starpu_mpi_shutdown(); + if (!mpi_init) + MPI_Finalize(); + return rank == 0 ? STARPU_TEST_SKIPPED : 0; + } + + if (rank == 0) + { + int n; + for(n=1 ; n from node <%d>\n", var[0], n); + FPRINTF_MPI(stderr, "received <%d> from node %d\n", var[0], n); + starpu_data_release(handle[0]); + + ret = starpu_mpi_recv(handle[0], n, 44, MPI_COMM_WORLD, &status[1]); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); + ret = starpu_mpi_recv(handle[1], n, 46, MPI_COMM_WORLD, &status[2]); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); + for(i=0 ; i<2 ; i++) + starpu_data_acquire(handle[i], STARPU_R); + STARPU_ASSERT_MSG(var[0] == n*2, "Received incorrect value <%d> from node <%d>\n", var[0], n); + STARPU_ASSERT_MSG(var[1] == n*4, "Received incorrect value <%d> from node <%d>\n", var[0], n); + FPRINTF_MPI(stderr, "received <%d> and <%d> from node %d\n", var[0], var[1], n); + for(i=0 ; i<2 ; i++) + starpu_data_release(handle[i]); + for(i=0 ; i<2 ; i++) + starpu_data_unregister(handle[i]); + } + } + else + { + int i, var[3]; + starpu_data_handle_t handle[3]; + + FPRINTF_MPI(stderr, "sending to node %d\n", 0); + var[0] = rank; + var[1] = var[0] * 2; + var[2] = var[0] * 4; + for(i=0 ; i<3 ; i++) + starpu_variable_data_register(&handle[i], STARPU_MAIN_RAM, (uintptr_t)&var[i], sizeof(var[i])); + ret = starpu_mpi_send(handle[0], 0, 42, MPI_COMM_WORLD); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send"); + ret = starpu_mpi_send(handle[1], 0, 44, MPI_COMM_WORLD); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send"); + ret = starpu_mpi_send(handle[2], 0, 46, MPI_COMM_WORLD); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send"); + for(i=0 ; i<3 ; i++) + starpu_data_unregister(handle[i]); + } + + starpu_mpi_shutdown(); + if (!mpi_init) + MPI_Finalize(); + + return 0; +} diff --git a/mpi/tests/helper.h b/mpi/tests/helper.h new file mode 100644 index 0000000..102a40c --- /dev/null +++ b/mpi/tests/helper.h @@ -0,0 +1,51 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include "../../tests/helper.h" + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); fflush(ofile); }} while(0) +#define PRINTF(fmt, ...) do { if (!getenv("STARPU_SSILENT")) {printf(fmt, ## __VA_ARGS__); fflush(stdout); }} while(0) +#define FPRINTF_MPI(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) { \ + int _disp_rank; starpu_mpi_comm_rank(MPI_COMM_WORLD, &_disp_rank); \ + fprintf(ofile, "[%d][starpu_mpi][%s] " fmt , _disp_rank, __starpu_func__ ,## __VA_ARGS__); \ + fflush(ofile); }} while(0) +#define FPRINTF_MPI_COMM(ofile, comm, fmt, ...) do { if (!getenv("STARPU_SSILENT")) { \ + int _disp_rank; starpu_mpi_comm_rank(comm, &_disp_rank); \ + fprintf(ofile, "[%d][starpu_mpi][%s] " fmt , _disp_rank, __starpu_func__ ,## __VA_ARGS__); \ + fflush(ofile); }} while(0); + +#define MPI_INIT_THREAD_real(argc, argv, required) do { \ + int thread_support; \ + if (MPI_Init_thread(argc, argv, required, &thread_support) != MPI_SUCCESS) \ + { \ + fprintf(stderr,"MPI_Init_thread failed\n"); \ + exit(1); \ + } \ + if (thread_support == MPI_THREAD_FUNNELED) \ + fprintf(stderr,"Warning: MPI only has funneled thread support, not serialized, hoping this will work\n"); \ + if (thread_support < MPI_THREAD_FUNNELED) \ + fprintf(stderr,"Warning: MPI does not have thread support!\n"); } while(0) + +#ifdef STARPU_SIMGRID +#define MPI_INIT_THREAD(argc, argv, required, init) do { *(init) = 1 ; } while(0) +#else +#define MPI_INIT_THREAD(argc, argv, required, init) do { \ + *(init) = 0; \ + MPI_INIT_THREAD_real(argc, argv, required); } while(0) +#endif diff --git a/mpi/tests/insert_task.c b/mpi/tests/insert_task.c new file mode 100644 index 0000000..79c51db --- /dev/null +++ b/mpi/tests/insert_task.c @@ -0,0 +1,159 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include "helper.h" + +void func_cpu(void *descr[], void *_args) +{ + (void)_args; + unsigned *x = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[0]); + unsigned *y = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[1]); + + FPRINTF(stdout, "VALUES: %u %u\n", *x, *y); + *x = (*x + *y) / 2; +} + +struct starpu_codelet mycodelet = +{ + .cpu_funcs = {func_cpu}, + .nbuffers = 2, + .modes = {STARPU_RW, STARPU_R}, + .model = &starpu_perfmodel_nop, +}; + +#define X 4 +#define Y 5 + +/* Returns the MPI node number where data indexes index is */ +int my_distrib(int x, int y, int nb_nodes) +{ + return (x + y) % nb_nodes; +} + + +int main(int argc, char **argv) +{ + int rank, size, x, y; + int value=0, ret; + unsigned matrix[X][Y]; + starpu_data_handle_t data_handles[X][Y]; + struct starpu_conf conf; + int mpi_init; + + MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); + + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + conf.ncpus = -1; + conf.nmpi_ms = -1; + conf.ntcpip_ms = -1; + + ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, &conf); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); + starpu_mpi_comm_size(MPI_COMM_WORLD, &size); + + for(x = 0; x < X; x++) + { + for (y = 0; y < Y; y++) + { + matrix[x][y] = (rank+1)*10 + value; + value++; + } + } +#if 0 + for(x = 0; x < X; x++) + { + FPRINTF(stdout, "[%d] ", rank); + for (y = 0; y < Y; y++) + { + FPRINTF(stdout, "%3d ", matrix[x][y]); + } + FPRINTF(stdout, "\n"); + } +#endif + + for(x = 0; x < X; x++) + { + for (y = 0; y < Y; y++) + { + int mpi_rank = my_distrib(x, y, size); + if (mpi_rank == rank) + { + //FPRINTF(stderr, "[%d] Owning data[%d][%d]\n", rank, x, y); + starpu_variable_data_register(&data_handles[x][y], STARPU_MAIN_RAM, (uintptr_t)&(matrix[x][y]), sizeof(unsigned)); + } + else + { + /* I don't own this index, but will need it for my computations */ + //FPRINTF(stderr, "[%d] Neighbour of data[%d][%d]\n", rank, x, y); + starpu_variable_data_register(&data_handles[x][y], -1, (uintptr_t)NULL, sizeof(unsigned)); + } + if (data_handles[x][y]) + { + starpu_mpi_data_register(data_handles[x][y], (y*X)+x, mpi_rank); + } + } + } + + ret = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet, STARPU_RW, data_handles[1][1], STARPU_R, data_handles[0][1], 0); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_task_insert"); + ret = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet, STARPU_RW, data_handles[3][1], STARPU_R, data_handles[0][1], 0); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_task_insert"); + ret = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet, STARPU_RW, data_handles[0][1], STARPU_R, data_handles[0][0], 0); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_task_insert"); + ret = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet, STARPU_RW, data_handles[3][1], STARPU_R, data_handles[0][1], 0); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_task_insert"); + + FPRINTF(stderr, "Waiting ...\n"); + starpu_task_wait_for_all(); + +enodev: + for(x = 0; x < X; x++) + { + for (y = 0; y < Y; y++) + { + if (data_handles[x][y]) + starpu_data_unregister(data_handles[x][y]); + } + } + + starpu_mpi_shutdown(); + + if (!mpi_init) + MPI_Finalize(); + +#if 0 + for(x = 0; x < X; x++) + { + FPRINTF(stdout, "[%d] ", rank); + for (y = 0; y < Y; y++) + { + FPRINTF(stdout, "%3d ", matrix[x][y]); + } + FPRINTF(stdout, "\n"); + } +#endif + + return 0; +} diff --git a/mpi/tests/insert_task_block.c b/mpi/tests/insert_task_block.c new file mode 100644 index 0000000..dc4ed91 --- /dev/null +++ b/mpi/tests/insert_task_block.c @@ -0,0 +1,177 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include "helper.h" + +void func_cpu(void *descr[], void *_args) +{ + (void)_args; + unsigned *matrix = (unsigned *)STARPU_MATRIX_GET_PTR(descr[0]); + int nx = (int)STARPU_MATRIX_GET_NX(descr[0]); + int ny = (int)STARPU_MATRIX_GET_NY(descr[0]); + int ld = (int)STARPU_MATRIX_GET_LD(descr[0]); + + int i, j; + unsigned sum=0; + + for (i = 0; i < nx; i++) + { + for (j = 0; j < ny; j++) + { + sum += matrix[i+j*ld]; + } + } + for (i = 0; i < nx; i++) + { + for (j = 0; j < ny; j++) + { + matrix[i+j*ld] = sum;///(nx*ny); + } + } +} + +struct starpu_codelet mycodelet = +{ + .cpu_funcs = {func_cpu}, + .nbuffers = 1, +#ifdef STARPU_SIMGRID + .model = &starpu_perfmodel_nop, +#endif + .modes = {STARPU_RW} +}; + +#define SIZE 6 +#define BLOCKS 3 + +/* Returns the MPI node number where data indexes index is */ +int my_distrib(int x, int y, int nb_nodes) +{ + return (x + y) % nb_nodes; +} + + +int main(int argc, char **argv) +{ + int rank, size, x, y; + int ret, value=0; + unsigned matrix[SIZE*SIZE]; + starpu_data_handle_t data_handles[SIZE][SIZE]; + struct starpu_conf conf; + int mpi_init; + + MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); + + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + conf.ncpus = -1; + conf.nmpi_ms = -1; + conf.ntcpip_ms = -1; + + ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, &conf); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); + starpu_mpi_comm_size(MPI_COMM_WORLD, &size); + + for(x = 0; x < SIZE; x++) + { + for (y = 0; y < SIZE; y++) + { + matrix[x+y*SIZE] = rank*100 + value; + value++; + } + } +#if 1 + for(x = 0; x < SIZE; x++) + { + FPRINTF(stdout, "[%d] ", rank); + for (y = 0; y < SIZE; y++) + { + FPRINTF(stdout, "%3u ", matrix[x+y*SIZE]); + } + FPRINTF(stdout, "\n"); + } +#endif + + for(x = 0; x < BLOCKS ; x++) + { + for (y = 0; y < BLOCKS; y++) + { + int mpi_rank = my_distrib(x, y, size); + if (mpi_rank == rank) + { + //FPRINTF(stderr, "[%d] Owning data[%d][%d]\n", rank, x, y); + starpu_matrix_data_register(&data_handles[x][y], STARPU_MAIN_RAM, (uintptr_t)&(matrix[((SIZE/BLOCKS)*x) + ((SIZE/BLOCKS)*y) * SIZE]), + SIZE, SIZE/BLOCKS, SIZE/BLOCKS, sizeof(unsigned)); + } + else + { + /* I don't own this index, but will need it for my computations */ + //FPRINTF(stderr, "[%d] Neighbour of data[%d][%d]\n", rank, x, y); + starpu_matrix_data_register(&data_handles[x][y], -1, (uintptr_t)&(matrix[((SIZE/BLOCKS)*x) + ((SIZE/BLOCKS)*y) * SIZE]), + SIZE, SIZE/BLOCKS, SIZE/BLOCKS, sizeof(unsigned)); + } + if (data_handles[x][y]) + { + starpu_mpi_data_register(data_handles[x][y], (y*BLOCKS)+x, mpi_rank); + } + } + } + + for(x = 0; x < BLOCKS; x++) + { + for (y = 0; y < BLOCKS; y++) + { + ret = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet, + STARPU_RW, data_handles[x][y], + 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_task_insert"); + } + } + + FPRINTF(stderr, "Waiting ...\n"); + starpu_task_wait_for_all(); + + for(x = 0; x < BLOCKS; x++) + { + for (y = 0; y < BLOCKS; y++) + { + if (data_handles[x][y]) + starpu_data_unregister(data_handles[x][y]); + } + } + + starpu_mpi_shutdown(); + + if (!mpi_init) + MPI_Finalize(); + +#if 1 + for(x = 0; x < SIZE; x++) + { + FPRINTF(stdout, "[%d] ", rank); + for (y = 0; y < SIZE; y++) + { + FPRINTF(stdout, "%3u ", matrix[x+y*SIZE]); + } + FPRINTF(stdout, "\n"); + } +#endif + + return 0; +} diff --git a/mpi/tests/insert_task_can_execute.c b/mpi/tests/insert_task_can_execute.c new file mode 100644 index 0000000..1909de1 --- /dev/null +++ b/mpi/tests/insert_task_can_execute.c @@ -0,0 +1,98 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2022-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "helper.h" + +void cpu_fun(void* buffers[], void* args) +{ + float *ptr = (float*)(STARPU_VECTOR_GET_PTR(buffers[0])); + ptr[0] = 42; +} + +int can_execute(unsigned workerid, struct starpu_task* task, unsigned nimpl) +{ + return 1; +} + +static struct starpu_codelet codelet = +{ + .can_execute = can_execute, + .cpu_funcs = {cpu_fun}, + .nbuffers = 1, + .modes = {STARPU_W}, + .model = &starpu_perfmodel_nop, + .flags = STARPU_CODELET_SIMGRID_EXECUTE, +}; + +int main(int argc, char** argv) +{ + struct starpu_conf conf; + int mpi_init; + int rank; + int ret; + + MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); + + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + conf.ncpus = -1; + conf.nmpi_ms = -1; + conf.ntcpip_ms = -1; + + ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, &conf); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); + + // register a vector of one element + float *data = malloc(sizeof(float)); + data[0] = 55; + starpu_data_handle_t handle; + starpu_vector_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t) data, 1, sizeof(data[0])); + starpu_mpi_data_register(handle, 0, 0); + + // run the task + starpu_mpi_task_insert(MPI_COMM_WORLD, &codelet, STARPU_W, handle, NULL); + + // gather the result + starpu_data_unregister(handle); + + // check results + ret = 0; + if (rank == 0) + { + if (data[0] == 42) + { + ret = 0; + fprintf(stderr, "Success!\n"); + } + else + { + ret = 1; + fprintf(stderr, "Failure!\n"); + } + } + free(data); + + // shutdown starpu + starpu_mpi_shutdown(); + + if (!mpi_init) + MPI_Finalize(); + + return ret; +} diff --git a/mpi/tests/insert_task_compute.c b/mpi/tests/insert_task_compute.c new file mode 100644 index 0000000..1ef90af --- /dev/null +++ b/mpi/tests/insert_task_compute.c @@ -0,0 +1,256 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "helper.h" + +void func_cpu(void *descr[], void *_args) +{ + int rank; + int *x = (int *)STARPU_VARIABLE_GET_PTR(descr[0]); + int *y = (int *)STARPU_VARIABLE_GET_PTR(descr[1]); + + starpu_codelet_unpack_args(_args, &rank); + + FPRINTF(stdout, "[%d] VALUES: %d %d\n", rank, *x, *y); + *x = *x * *y; +} + +struct starpu_codelet mycodelet = +{ + .cpu_funcs = {func_cpu}, + .nbuffers = 2, + .modes = {STARPU_RW, STARPU_R}, + .model = &starpu_perfmodel_nop, +}; + +int test(int rank, int node, starpu_mpi_tag_t initial_tag, int *before, int *after, int task_insert, int data_array) +{ + int ok, ret, i, x[2]; + starpu_data_handle_t data_handles[2]; + struct starpu_data_descr descrs[2]; + int barrier_ret; + struct starpu_conf conf; + + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + conf.ncpus = -1; + conf.nmpi_ms = -1; + conf.ntcpip_ms = -1; + + ret = starpu_mpi_init_conf(NULL, NULL, 0, MPI_COMM_WORLD, &conf); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + + if (starpu_cpu_worker_get_count() == 0) + { + // If there is no cpu to execute the codelet, mpi will block trying to do the post-execution communication + ret = -ENODEV; + FPRINTF_MPI(stderr, "No CPU is available\n"); + goto nodata; + } + + FPRINTF_MPI(stderr, "Testing with node=%d - task_insert=%d - data_array=%d - \n", node, task_insert, data_array); + + for(i=0 ; i<2 ; i++) + { + if (rank <= 1) + { + x[i] = before[rank*2+i]; + //FPRINTF_MPI(stderr, "before computation x[%d] = %d\n", i, x[i]); + } + else + x[i] = rank*2+i; + if (rank == i) + starpu_variable_data_register(&data_handles[i], 0, (uintptr_t)&x[i], sizeof(int)); + else + starpu_variable_data_register(&data_handles[i], -1, (uintptr_t)NULL, sizeof(int)); + starpu_mpi_data_register(data_handles[i], initial_tag+i, i); + descrs[i].handle = data_handles[i]; + } + descrs[0].mode = STARPU_RW; + descrs[1].mode = STARPU_R; + + switch(task_insert) + { + case 0: + { + struct starpu_task *task = NULL; + switch(data_array) + { + case 0: + { + task = starpu_mpi_task_build(MPI_COMM_WORLD, &mycodelet, + STARPU_RW, data_handles[0], STARPU_R, data_handles[1], + STARPU_VALUE, &rank, sizeof(rank), + STARPU_EXECUTE_ON_NODE, node, 0); + break; + } + case 1: + { + task = starpu_mpi_task_build(MPI_COMM_WORLD, &mycodelet, + STARPU_DATA_ARRAY, data_handles, 2, + STARPU_VALUE, &rank, sizeof(rank), + STARPU_EXECUTE_ON_NODE, node, 0); + break; + } + case 2: + { + task = starpu_mpi_task_build(MPI_COMM_WORLD, &mycodelet, + STARPU_DATA_MODE_ARRAY, descrs, 2, + STARPU_VALUE, &rank, sizeof(rank), + STARPU_EXECUTE_ON_NODE, node, 0); + break; + } + } + + if (task) + { + ret = starpu_task_submit(task); + if (ret == -ENODEV) + goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + switch(data_array) + { + case 0: + { + starpu_mpi_task_post_build(MPI_COMM_WORLD, &mycodelet, + STARPU_RW, data_handles[0], STARPU_R, data_handles[1], + STARPU_EXECUTE_ON_NODE, node, 0); + break; + } + case 1: + { + starpu_mpi_task_post_build(MPI_COMM_WORLD, &mycodelet, + STARPU_DATA_ARRAY, data_handles, 2, + STARPU_EXECUTE_ON_NODE, node, 0); + break; + } + case 2: + { + starpu_mpi_task_post_build(MPI_COMM_WORLD, &mycodelet, + STARPU_DATA_MODE_ARRAY, descrs, 2, + STARPU_EXECUTE_ON_NODE, node, 0); + break; + } + } + + break; + } + case 1: + { + switch(data_array) + { + case 0: + { + ret = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet, + STARPU_RW, data_handles[0], STARPU_R, data_handles[1], + STARPU_VALUE, &rank, sizeof(rank), + STARPU_EXECUTE_ON_NODE, node, 0); + if (ret == -ENODEV) + goto enodev; + break; + } + case 1: + { + ret = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet, + STARPU_DATA_ARRAY, data_handles, 2, + STARPU_VALUE, &rank, sizeof(rank), + STARPU_EXECUTE_ON_NODE, node, 0); + if (ret == -ENODEV) + goto enodev; + break; + } + case 2: + { + ret = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet, + STARPU_DATA_MODE_ARRAY, descrs, 2, + STARPU_VALUE, &rank, sizeof(rank), + STARPU_EXECUTE_ON_NODE, node, 0); + if (ret == -ENODEV) + goto enodev; + break; + } + } + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_task_insert"); + break; + } + } + + starpu_task_wait_for_all(); + +enodev: + for(i=0; i<2; i++) + { + starpu_data_unregister(data_handles[i]); + } + + ok = 1; +#ifndef STARPU_SIMGRID + if (rank <= 1) + { + for(i=0; i<2; i++) + { + ok = ok && (x[i] == after[rank*2+i]); + if (x[i] != after[rank*2+i]) + FPRINTF_MPI(stderr, "after computation x[%d] = %d, should be %d\n", i, x[i], after[rank*2+i]); + } + FPRINTF_MPI(stderr, "result is %s\n", ok?"CORRECT":"NOT CORRECT"); + } +#endif + +nodata: + barrier_ret = MPI_Barrier(MPI_COMM_WORLD); + STARPU_ASSERT(barrier_ret == MPI_SUCCESS); + starpu_mpi_shutdown(); + + return ret == -ENODEV ? ret : !ok; +} + +int main(int argc, char **argv) +{ + int rank; + int global_ret, ret; + int before[4] = {10, 20, 11, 22}; + int after_node[2][4] = {{220, 20, 11, 22}, {220, 20, 11, 22}}; + int node, insert_task, data_array; + starpu_mpi_tag_t initial_tag = 0; + + MPI_INIT_THREAD_real(&argc, &argv, MPI_THREAD_SERIALIZED); + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + + global_ret = 0; + for(node=0 ; node<=1 ; node++) + { + for(insert_task=0 ; insert_task<=1 ; insert_task++) + { + for(data_array=0 ; data_array<=2 ; data_array++) + { + ret = test(rank, node, initial_tag, before, after_node[node], insert_task, data_array); + initial_tag += 2; + if (ret == -ENODEV || ret) + global_ret = ret; + } + } + } + + MPI_Finalize(); + if (rank == 0) + return global_ret==-ENODEV?STARPU_TEST_SKIPPED:global_ret; + else + return 0; +} diff --git a/mpi/tests/insert_task_count.c b/mpi/tests/insert_task_count.c new file mode 100644 index 0000000..4ab884f --- /dev/null +++ b/mpi/tests/insert_task_count.c @@ -0,0 +1,134 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "helper.h" + +#ifdef STARPU_QUICK_CHECK +# define NITER 32 +#elif !defined(STARPU_LONG_CHECK) +# define NITER 256 +#else +# define NITER 2048 +#endif + +#ifdef STARPU_USE_CUDA +extern void increment_cuda(void *descr[], void *_args); +#endif + +#ifdef STARPU_USE_HIP +extern void increment_hip(void *descr[], void *_args); +#endif + +void increment_cpu(void *descr[], void *_args) +{ + (void)_args; + int *tokenptr = (int *)STARPU_VECTOR_GET_PTR(descr[0]); + (*tokenptr)++; +} + +static struct starpu_codelet increment_cl = +{ +#ifdef STARPU_USE_CUDA + .cuda_funcs = {increment_cuda}, +#endif +#ifdef STARPU_USE_HIP + .hip_funcs = {increment_hip}, +#endif + .cpu_funcs = {increment_cpu}, + .nbuffers = 1, + .modes = {STARPU_RW}, + .model = &starpu_perfmodel_nop, +}; + +int main(int argc, char **argv) +{ + int ret, rank, size; + int token = 0; + starpu_data_handle_t token_handle; + int mpi_init; + + MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); + + ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); + starpu_mpi_comm_size(MPI_COMM_WORLD, &size); + + if (size < 2 || (starpu_cpu_worker_get_count() + starpu_cuda_worker_get_count() == 0) || (starpu_cpu_worker_get_count() + starpu_hip_worker_get_count() == 0)) + { + if (rank == 0) + { + if (size < 2) + FPRINTF(stderr, "We need at least 2 processes.\n"); + else + FPRINTF(stderr, "We need at least 1 CPU or CUDA or HIP worker.\n"); + } + starpu_mpi_shutdown(); + if (!mpi_init) + MPI_Finalize(); + return rank == 0 ? STARPU_TEST_SKIPPED : 0; + } + + if (rank == 1) + starpu_vector_data_register(&token_handle, 0, (uintptr_t)&token, 1, sizeof(token)); + else + starpu_vector_data_register(&token_handle, -1, (uintptr_t)NULL, 1, sizeof(token)); + starpu_mpi_data_register(token_handle, 12, 1); + + int nloops = NITER; + int loop; + + FPRINTF_MPI(stderr, "Start with token value %d\n", token); + + for (loop = 0; loop < nloops; loop++) + { + if (loop % 2) + starpu_mpi_task_insert(MPI_COMM_WORLD, &increment_cl, + STARPU_RW|STARPU_SSEND, token_handle, + STARPU_EXECUTE_ON_NODE, 0, + 0); + else + starpu_mpi_task_insert(MPI_COMM_WORLD, &increment_cl, + STARPU_RW, token_handle, + STARPU_EXECUTE_ON_NODE, 0, + 0); + } + + starpu_task_wait_for_all(); + starpu_data_unregister(token_handle); + + FPRINTF_MPI(stderr, "Final value for token %d\n", token); + + starpu_mpi_shutdown(); + if (!mpi_init) + MPI_Finalize(); + +#ifndef STARPU_SIMGRID + if (rank == 1) + { + STARPU_ASSERT_MSG(token == nloops, "token==%d != expected_value==%d\n", token, nloops); + } + else + { + STARPU_ASSERT_MSG(token == 0, "token==%d != expected_value==0\n", token); + + } +#endif + + return 0; +} diff --git a/mpi/tests/insert_task_dyn_handles.c b/mpi/tests/insert_task_dyn_handles.c new file mode 100644 index 0000000..4ddd49f --- /dev/null +++ b/mpi/tests/insert_task_dyn_handles.c @@ -0,0 +1,170 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include "helper.h" + +#define FFACTOR 42 + +void func_cpu(void *descr[], void *_args) +{ + (void)_args; + int num = starpu_task_get_current()->nbuffers; + int *factor = (int *)STARPU_VARIABLE_GET_PTR(descr[num-1]); + int i; + + for (i = 0; i < num-1; i++) + { + int *x = (int *)STARPU_VARIABLE_GET_PTR(descr[i]); + + *x = *x + 1**factor; + } +} + +struct starpu_codelet codelet = +{ + .cpu_funcs = {func_cpu}, + .cpu_funcs_name = {"func_cpu"}, + .nbuffers = STARPU_VARIABLE_NBUFFERS, +#ifdef STARPU_SIMGRID + .model = &starpu_perfmodel_nop, +#endif +}; + +int main(int argc, char **argv) +{ + int *x; + int i, ret, loop; + int rank; + int factor=0; + +#ifdef STARPU_QUICK_CHECK + int nloops = 4; +#else + int nloops = 16; +#endif + starpu_data_handle_t *data_handles; + starpu_data_handle_t factor_handle; + struct starpu_data_descr *descrs; + int mpi_init; + + MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); + + ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); + + if (starpu_cpu_worker_get_count() == 0) + { + if (rank == 0) + FPRINTF(stderr, "We need at least 1 CPU worker.\n"); + starpu_mpi_shutdown(); + return rank == 0 ? STARPU_TEST_SKIPPED : 0; + } + + x = calloc(1, (STARPU_NMAXBUFS+15) * sizeof(int)); + data_handles = malloc((STARPU_NMAXBUFS+15) * sizeof(starpu_data_handle_t)); + descrs = malloc((STARPU_NMAXBUFS+15) * sizeof(struct starpu_data_descr)); + for(i=0 ; i +#include +#include "helper.h" + +void func_cpu(void *descr[], void *_args) +{ + int node; + int rank; + (void)descr; + + starpu_codelet_unpack_args(_args, &node); + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); + FPRINTF_MPI(stderr, "Expected node: %d - Actual node: %d\n", node, rank); + + assert(node == rank); +} + +struct starpu_codelet mycodelet = +{ + .cpu_funcs = {func_cpu}, + .nbuffers = 2, + .modes = {STARPU_RW, STARPU_RW}, + .model = &starpu_perfmodel_nop, + .name = "insert_task_node_choice" +}; + +int main(int argc, char **argv) +{ + int ret, rank, size, err, node; + int x0=32; + long long x1=23; + starpu_data_handle_t data_handlesx0; + starpu_data_handle_t data_handlesx1; + struct starpu_conf conf; + int mpi_init; + + MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); + + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + conf.ncpus = -1; + conf.nmpi_ms = -1; + conf.ntcpip_ms = -1; + + ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, &conf); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); + starpu_mpi_comm_size(MPI_COMM_WORLD, &size); + + if (rank != 0 && rank != 1) + goto end; + + if (rank == 0) + { + starpu_variable_data_register(&data_handlesx0, STARPU_MAIN_RAM, (uintptr_t)&x0, sizeof(x0)); + starpu_variable_data_register(&data_handlesx1, -1, (uintptr_t)NULL, sizeof(x1)); + } + else + { + starpu_variable_data_register(&data_handlesx0, -1, (uintptr_t)NULL, sizeof(x0)); + starpu_variable_data_register(&data_handlesx1, STARPU_MAIN_RAM, (uintptr_t)&x1, sizeof(x1)); + } + starpu_mpi_data_register(data_handlesx0, 100, 0); + starpu_mpi_data_register(data_handlesx1, 200, 1); + + node = 0; + err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet, + STARPU_VALUE, &node, sizeof(node), + STARPU_EXECUTE_ON_NODE, 0, + STARPU_RW, data_handlesx0, STARPU_RW, data_handlesx1, + 0); + assert(err == 0); + + node = starpu_data_get_rank(data_handlesx1); + err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet, + STARPU_VALUE, &node, sizeof(node), + STARPU_EXECUTE_ON_DATA, data_handlesx1, + STARPU_RW, data_handlesx0, STARPU_RW, data_handlesx1, + 0); + assert(err == 0); + + // Node 1 has a long long data which has a bigger size than a + // int, so it is going to be selected by the node selection + // policy to execute the codelet + err = starpu_mpi_node_selection_set_current_policy(STARPU_MPI_NODE_SELECTION_MOST_R_DATA); + assert(err == 0); + node = 1; + err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet, + STARPU_VALUE, &node, sizeof(node), + STARPU_RW, data_handlesx0, STARPU_RW, data_handlesx1, + 0); + assert(err == 0); + + FPRINTF_MPI(stderr, "Waiting ...\n"); + starpu_task_wait_for_all(); + starpu_data_unregister(data_handlesx0); + starpu_data_unregister(data_handlesx1); + +end: + starpu_mpi_shutdown(); + + if (!mpi_init) + MPI_Finalize(); + + return 0; +} diff --git a/mpi/tests/insert_task_owner.c b/mpi/tests/insert_task_owner.c new file mode 100644 index 0000000..d75e3f3 --- /dev/null +++ b/mpi/tests/insert_task_owner.c @@ -0,0 +1,199 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include "helper.h" + +void func_cpu(void *descr[], void *_args) +{ + int node; + int rank; + (void)descr; + + starpu_codelet_unpack_args(_args, &node); + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); + FPRINTF_MPI(stderr, "Expected node: %d - Actual node: %d\n", node, rank); + + assert(node == rank); +} + +struct starpu_codelet mycodelet_r_w = +{ + .cpu_funcs = {func_cpu}, + .nbuffers = 2, + .modes = {STARPU_R, STARPU_W}, + .model = &starpu_perfmodel_nop, +}; + +struct starpu_codelet mycodelet_rw_r = +{ + .cpu_funcs = {func_cpu}, + .nbuffers = 2, + .modes = {STARPU_RW, STARPU_R}, + .model = &starpu_perfmodel_nop, +}; + +struct starpu_codelet mycodelet_rw_rw = +{ + .cpu_funcs = {func_cpu}, + .nbuffers = 2, + .modes = {STARPU_RW, STARPU_RW}, + .model = &starpu_perfmodel_nop, +}; + +struct starpu_codelet mycodelet_w_r = +{ + .cpu_funcs = {func_cpu}, + .nbuffers = 2, + .modes = {STARPU_W, STARPU_R}, + .model = &starpu_perfmodel_nop, +}; + +struct starpu_codelet mycodelet_r_r = +{ + .cpu_funcs = {func_cpu}, + .nbuffers = 2, + .modes = {STARPU_R, STARPU_R}, + .model = &starpu_perfmodel_nop, +}; + +int main(int argc, char **argv) +{ + int ret, rank, size, err, node; + long x0=32; + int x1=23; + starpu_data_handle_t data_handlesx0 = NULL; + starpu_data_handle_t data_handlesx1 = NULL; + struct starpu_conf conf; + int mpi_init; + + MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); + + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + conf.ncpus = -1; + conf.nmpi_ms = -1; + conf.ntcpip_ms = -1; + + ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, &conf); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); + starpu_mpi_comm_size(MPI_COMM_WORLD, &size); + + if (starpu_cpu_worker_get_count() == 0) + { + if (rank == 0) + FPRINTF(stderr, "We need at least 1 CPU worker.\n"); + starpu_mpi_shutdown(); + if (!mpi_init) + MPI_Finalize(); + return rank == 0 ? STARPU_TEST_SKIPPED : 0; + } + + if (rank != 0 && rank != 1) + goto end; + + if (rank == 0) + { + starpu_variable_data_register(&data_handlesx0, STARPU_MAIN_RAM, (uintptr_t)&x0, sizeof(x0)); + starpu_mpi_data_register(data_handlesx0, 0, rank); + starpu_variable_data_register(&data_handlesx1, -1, (uintptr_t)NULL, sizeof(x1)); + starpu_mpi_data_register(data_handlesx1, 1, 1); + } + else if (rank == 1) + { + starpu_variable_data_register(&data_handlesx1, STARPU_MAIN_RAM, (uintptr_t)&x1, sizeof(x1)); + starpu_mpi_data_register(data_handlesx1, 1, rank); + starpu_variable_data_register(&data_handlesx0, -1, (uintptr_t)NULL, sizeof(x0)); + starpu_mpi_data_register(data_handlesx0, 0, 0); + } + + node = starpu_mpi_data_get_rank(data_handlesx1); + err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet_r_w, + STARPU_VALUE, &node, sizeof(node), + STARPU_R, data_handlesx0, STARPU_W, data_handlesx1, + 0); + assert(err == 0); + + node = starpu_mpi_data_get_rank(data_handlesx0); + err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet_rw_r, + STARPU_VALUE, &node, sizeof(node), + STARPU_RW, data_handlesx0, STARPU_R, data_handlesx1, + 0); + assert(err == 0); + + err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet_rw_rw, + STARPU_VALUE, &node, sizeof(node), + STARPU_RW, data_handlesx0, STARPU_RW, data_handlesx1, + 0); + assert(err == 0); + + node = 1; + err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet_rw_rw, + STARPU_VALUE, &node, sizeof(node), + STARPU_RW, data_handlesx0, STARPU_RW, data_handlesx1, STARPU_EXECUTE_ON_NODE, node, + 0); + assert(err == 0); + + node = 0; + err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet_rw_rw, + STARPU_VALUE, &node, sizeof(node), + STARPU_RW, data_handlesx0, STARPU_RW, data_handlesx1, STARPU_EXECUTE_ON_NODE, node, + 0); + assert(err == 0); + + node = 0; + err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet_r_r, + STARPU_VALUE, &node, sizeof(node), + STARPU_R, data_handlesx0, STARPU_R, data_handlesx1, STARPU_EXECUTE_ON_NODE, node, + 0); + assert(err == 0); + + /* Here the value specified by the property STARPU_EXECUTE_ON_NODE is + going to overwrite the node even though the data model clearly specifies + which node is going to execute the codelet */ + node = 0; + err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet_r_w, + STARPU_VALUE, &node, sizeof(node), + STARPU_R, data_handlesx0, STARPU_W, data_handlesx1, STARPU_EXECUTE_ON_NODE, node, + 0); + assert(err == 0); + + /* Here the value specified by the property STARPU_EXECUTE_ON_NODE is + going to overwrite the node even though the data model clearly specifies + which node is going to execute the codelet */ + node = 0; + err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet_w_r, + STARPU_VALUE, &node, sizeof(node), + STARPU_W, data_handlesx0, STARPU_R, data_handlesx1, STARPU_EXECUTE_ON_NODE, node, + 0); + assert(err == 0); + + FPRINTF_MPI(stderr, "Waiting ...\n"); + starpu_task_wait_for_all(); + starpu_data_unregister(data_handlesx0); + starpu_data_unregister(data_handlesx1); + +end: + starpu_mpi_shutdown(); + + if (!mpi_init) + MPI_Finalize(); + + return 0; +} diff --git a/mpi/tests/insert_task_owner2.c b/mpi/tests/insert_task_owner2.c new file mode 100644 index 0000000..29c3f64 --- /dev/null +++ b/mpi/tests/insert_task_owner2.c @@ -0,0 +1,150 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include "helper.h" + +void func_cpu(void *descr[], void *_args) +{ + (void)_args; + int *x0 = (int *)STARPU_VARIABLE_GET_PTR(descr[0]); + int *x1 = (int *)STARPU_VARIABLE_GET_PTR(descr[1]); + int *x2 = (int *)STARPU_VARIABLE_GET_PTR(descr[2]); + int *y = (int *)STARPU_VARIABLE_GET_PTR(descr[3]); + + FPRINTF(stderr, "-------> CODELET VALUES: %d %d nan %d\n", *x0, *x1, *y); + *x2 = *y; + *y = (*x0 + *x1) * 100; + *x1 = 12; + FPRINTF(stderr, "-------> CODELET VALUES: %d %d %d %d\n", *x0, *x1, *x2, *y); +} + +struct starpu_codelet mycodelet = +{ + .cpu_funcs = {func_cpu}, + .nbuffers = 4, + .modes = {STARPU_R, STARPU_RW, STARPU_W, STARPU_RW}, + .model = &starpu_perfmodel_nop, +}; + +int main(int argc, char **argv) +{ + int rank, size, err; + int x[3], y=0; + int oldx[3]; + int i, ret=0; + starpu_data_handle_t data_handles[4]; + struct starpu_conf conf; + int mpi_init; + + MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); + + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + conf.ncpus = -1; + conf.nmpi_ms = -1; + conf.ntcpip_ms = -1; + + ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, &conf); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); + starpu_mpi_comm_size(MPI_COMM_WORLD, &size); + + if (starpu_cpu_worker_get_count() == 0) + { + if (rank == 0) + FPRINTF(stderr, "We need at least 1 CPU worker.\n"); + starpu_mpi_shutdown(); + if (!mpi_init) + MPI_Finalize(); + return rank == 0 ? STARPU_TEST_SKIPPED : 0; + } + + if (rank == 0) + { + for(i=0 ; i<3 ; i++) + { + x[i] = 10*(i+1); + oldx[i] = 10*(i+1); + starpu_variable_data_register(&data_handles[i], STARPU_MAIN_RAM, (uintptr_t)&x[i], sizeof(x[i])); + } + y = -1; + starpu_variable_data_register(&data_handles[3], -1, (uintptr_t)NULL, sizeof(int)); + } + else + { + for(i=0 ; i<3 ; i++) + { + x[i] = -1; + starpu_variable_data_register(&data_handles[i], -1, (uintptr_t)NULL, sizeof(int)); + } + y=200; + starpu_variable_data_register(&data_handles[3], STARPU_MAIN_RAM, (uintptr_t)&y, sizeof(int)); + } + for(i=0 ; i<3 ; i++) + { + starpu_mpi_data_register(data_handles[i], i, 0); + } + starpu_mpi_data_register(data_handles[3], 3, 1); + + FPRINTF(stderr, "[%d][init] VALUES: %d %d %d %d\n", rank, x[0], x[1], x[2], y); + + err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet, + STARPU_R, data_handles[0], STARPU_RW, data_handles[1], + STARPU_W, data_handles[2], + STARPU_RW, data_handles[3], + STARPU_EXECUTE_ON_NODE, 1, 0); + STARPU_CHECK_RETURN_VALUE(err, "starpu_mpi_task_insert"); + starpu_task_wait_for_all(); + + int *values = malloc(4 * sizeof(int)); + for(i=0 ; i<4 ; i++) + { + starpu_mpi_get_data_on_node_detached(MPI_COMM_WORLD, data_handles[i], 0, NULL, NULL); + if (rank == 0) + { + starpu_data_acquire(data_handles[i], STARPU_R); + values[i] = *((int *)starpu_data_get_local_ptr(data_handles[i])); + starpu_data_release(data_handles[i]); + } + starpu_data_unregister(data_handles[i]); + } + if (rank == 0) + { + FPRINTF(stderr, "[%d][local ptr] VALUES: %d %d %d %d\n", rank, values[0], values[1], values[2], values[3]); + if (values[0] != oldx[0] || values[1] != 12 || values[2] != 200 || values[3] != ((oldx[0] + oldx[1]) * 100)) + { + FPRINTF(stderr, "[%d][error] values[0] %d != x[0] %d && values[1] %d != 12 && values[2] %d != 200 && values[3] %d != ((x[0] %d + x[1] %d) * 100)\n", + rank, values[0], oldx[0], values[1], values[2], values[3], oldx[0], oldx[1]); + ret = 1; + } + else + { + FPRINTF(stderr, "[%d] correct computation\n", rank); + } + } + FPRINTF(stderr, "[%d][end] VALUES: %d %d %d %d\n", rank, x[0], x[1], x[2], y); + + free(values); + starpu_mpi_shutdown(); + + if (!mpi_init) + MPI_Finalize(); + + return (rank == 0) ? ret : 0; +} diff --git a/mpi/tests/insert_task_owner_data.c b/mpi/tests/insert_task_owner_data.c new file mode 100644 index 0000000..cb2287c --- /dev/null +++ b/mpi/tests/insert_task_owner_data.c @@ -0,0 +1,129 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include "helper.h" + +void func_cpu(void *descr[], void *_args) +{ + (void)_args; + int *x0 = (int *)STARPU_VARIABLE_GET_PTR(descr[0]); + int *x1 = (int *)STARPU_VARIABLE_GET_PTR(descr[1]); + + *x0 += 1; + *x1 *= *x1; +} + +struct starpu_codelet mycodelet = +{ + .cpu_funcs = {func_cpu}, + .nbuffers = 2, + .modes = {STARPU_RW, STARPU_RW}, + .model = &starpu_perfmodel_nop, +}; + +int main(int argc, char **argv) +{ + int rank, size, err; + int x[2]; + int ret, i; + starpu_data_handle_t data_handles[2]; + int values[2]; + struct starpu_conf conf; + int mpi_init; + + MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); + + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + conf.ncpus = -1; + conf.nmpi_ms = -1; + conf.ntcpip_ms = -1; + + ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, &conf); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); + starpu_mpi_comm_size(MPI_COMM_WORLD, &size); + + if (starpu_cpu_worker_get_count() == 0) + { + if (rank == 0) + FPRINTF(stderr, "We need at least 1 CPU worker.\n"); + starpu_mpi_shutdown(); + if (!mpi_init) + MPI_Finalize(); + return rank == 0 ? STARPU_TEST_SKIPPED : 0; + } + + if (rank == 0) + { + x[0] = 11; + starpu_variable_data_register(&data_handles[0], STARPU_MAIN_RAM, (uintptr_t)&x[0], sizeof(x[0])); + starpu_variable_data_register(&data_handles[1], -1, (uintptr_t)NULL, sizeof(x[1])); + } + else if (rank == 1) + { + x[1] = 12; + starpu_variable_data_register(&data_handles[0], -1, (uintptr_t)NULL, sizeof(x[0])); + starpu_variable_data_register(&data_handles[1], STARPU_MAIN_RAM, (uintptr_t)&x[1], sizeof(x[1])); + } + else + { + starpu_variable_data_register(&data_handles[0], -1, (uintptr_t)NULL, sizeof(x[0])); + starpu_variable_data_register(&data_handles[1], -1, (uintptr_t)NULL, sizeof(x[1])); + } + + starpu_mpi_data_register(data_handles[0], 0, 0); + starpu_mpi_data_register(data_handles[1], 1, 1); + + err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet, + STARPU_RW, data_handles[0], STARPU_RW, data_handles[1], + STARPU_EXECUTE_ON_DATA, data_handles[1], + 0); + assert(err == 0); + starpu_task_wait_for_all(); + + for(i=0 ; i<2 ; i++) + { + starpu_mpi_get_data_on_node_detached(MPI_COMM_WORLD, data_handles[i], 0, NULL, NULL); + if (rank == 0) + { + starpu_data_acquire(data_handles[i], STARPU_R); + values[i] = *((int *)starpu_data_get_local_ptr(data_handles[i])); + starpu_data_release(data_handles[i]); + } + } + ret = 0; + if (rank == 0) + { + FPRINTF(stderr, "[%d][local ptr] VALUES: %d %d\n", rank, values[0], values[1]); + if (values[0] != 12 || values[1] != 144) + { + ret = EXIT_FAILURE; + } + } + + starpu_data_unregister(data_handles[0]); + starpu_data_unregister(data_handles[1]); + + starpu_mpi_shutdown(); + + if (!mpi_init) + MPI_Finalize(); + + return ret; +} diff --git a/mpi/tests/insert_task_recv_cache.c b/mpi/tests/insert_task_recv_cache.c new file mode 100644 index 0000000..2a8d3e3 --- /dev/null +++ b/mpi/tests/insert_task_recv_cache.c @@ -0,0 +1,187 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include "helper.h" + +#if !defined(STARPU_HAVE_SETENV) +#warning setenv is not defined. Skipping test +int main(void) +{ + return STARPU_TEST_SKIPPED; +} +#else + +void func_cpu(void *descr[], void *_args) +{ + (void)descr; + (void)_args; +} + +struct starpu_codelet mycodelet = +{ + .cpu_funcs = {func_cpu}, + .nbuffers = 2, + .modes = {STARPU_RW, STARPU_R}, + .model = &starpu_perfmodel_nop, +}; + +#define NB_ELEMENTS 1000 +#define NB_DATA 2 + +/* Returns the MPI node number where data indexes index is */ +int my_distrib(int x) +{ + return x; +} + +void test_cache(int rank, starpu_mpi_tag_t initial_tag, char *enabled, size_t *comm_amount) +{ + int i; + int ret; + unsigned *v[NB_DATA]; + starpu_data_handle_t data_handles[NB_DATA]; + struct starpu_conf conf; + + FPRINTF(stderr, "Testing with STARPU_MPI_CACHE=%s\n", enabled); + setenv("STARPU_MPI_CACHE", enabled, 1); + + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + conf.ncpus = -1; + conf.nmpi_ms = -1; + conf.ntcpip_ms = -1; + + ret = starpu_mpi_init_conf(NULL, NULL, 0, MPI_COMM_WORLD, &conf); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + + for(i = 0; i < NB_DATA; i++) + { + int j; + v[i] = calloc(NB_ELEMENTS, sizeof(unsigned)); + for(j=0 ; j +#include +#include +#include "helper.h" + +#if !defined(STARPU_HAVE_SETENV) +#warning setenv is not defined. Skipping test +int main(void) +{ + return STARPU_TEST_SKIPPED; +} +#else + +void func_cpu(void *descr[], void *_args) +{ + (void)descr; + (void)_args; +} + +struct starpu_codelet mycodelet = +{ + .cpu_funcs = {func_cpu}, + .nbuffers = 2, + .modes = {STARPU_RW, STARPU_R}, + .model = &starpu_perfmodel_nop, +}; + +#define NB_ELEMENTS 1000 +#define NB_DATA 2 + +/* Returns the MPI node number where data indexes index is */ +int my_distrib(int x) +{ + return x; +} + +void test_cache(int rank, starpu_mpi_tag_t initial_tag, char *enabled, size_t *comm_amount) +{ + int i; + int ret; + unsigned *v[NB_DATA]; + starpu_data_handle_t data_handles[NB_DATA]; + struct starpu_conf conf; + + setenv("STARPU_MPI_CACHE", enabled, 1); + + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + conf.ncpus = -1; + conf.nmpi_ms = -1; + conf.ntcpip_ms = -1; + + ret = starpu_mpi_init_conf(NULL, NULL, 0, MPI_COMM_WORLD, &conf); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + + starpu_mpi_coop_sends_set_use(0); // disable coop_sends to avoid having wrong results when cache is disabled + + for(i = 0; i < NB_DATA; i++) + { + int j; + starpu_malloc((void **)&v[i], NB_ELEMENTS * sizeof(unsigned)); + for(j=0 ; j +#include +#include "helper.h" + +#if !defined(STARPU_HAVE_SETENV) +#warning setenv is not defined. Skipping test +int main(int argc, char **argv) +{ + return STARPU_TEST_SKIPPED; +} +#else + +void func_cpu(void *descr[], void *_args) +{ + (void) descr; + (void) _args; +} + +struct starpu_codelet mycodelet = +{ + .cpu_funcs = {func_cpu}, + .nbuffers = 2, + .modes = {STARPU_RW, STARPU_R}, + .model = &starpu_perfmodel_nop, +}; + +struct starpu_codelet mycodelet2 = +{ + .cpu_funcs = {func_cpu}, + .nbuffers = 1, + .modes = {STARPU_RW}, + .model = &starpu_perfmodel_nop, +}; + +#define X 4 + +/* Returns the MPI node number where data is */ +int my_distrib(int x, int nb_nodes) +{ + return x % nb_nodes; +} + +void dotest(int rank, int size, starpu_mpi_tag_t initial_tag, char *enabled) +{ + int x, i; + int ret; + unsigned values[X]; + starpu_data_handle_t data_handles[X]; + struct starpu_conf conf; + + setenv("STARPU_MPI_CACHE", enabled, 1); + + FPRINTF(stderr, "Testing with cache '%s'\n", enabled); + + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + conf.ncpus = -1; + conf.nmpi_ms = -1; + conf.ntcpip_ms = -1; + + ret = starpu_mpi_init_conf(NULL, NULL, 0, MPI_COMM_WORLD, &conf); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + + for(x = 0; x < X; x++) + { + values[x] = (rank+1)*10; + } + + for(x = 0; x < X; x++) + { + int mpi_rank = my_distrib(x, size); + if (mpi_rank == rank) + { + starpu_variable_data_register(&data_handles[x], STARPU_MAIN_RAM, (uintptr_t)&(values[x]), sizeof(unsigned)); + } + else + { + /* I don't own this index, but will need it for my computations */ + starpu_variable_data_register(&data_handles[x], -1, (uintptr_t)NULL, sizeof(unsigned)); + } + if (data_handles[x]) + { + starpu_mpi_data_register(data_handles[x], initial_tag+x, mpi_rank); + } + } + + for(i = 0 ; i +#include +#include "helper.h" + +void func_cpu(void *descr[], void *_args) +{ + (void) _args; + (void) descr; + + FPRINTF_MPI(stderr, "Hello\n"); +} + +struct starpu_codelet mycodelet = +{ + .cpu_funcs = {func_cpu}, + .nbuffers = 1, + .modes = {STARPU_RW}, + .model = &starpu_perfmodel_nop, + .name = "insert_task_tags" +}; + +int main(int argc, char **argv) +{ + int ret, rank, err; + int x=32; + starpu_data_handle_t handle0; + starpu_data_handle_t handle1; + int64_t *value; + struct starpu_conf conf; + int mpi_init; + + MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); + + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + conf.ncpus = -1; + conf.nmpi_ms = -1; + conf.ntcpip_ms = -1; + + ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, &conf); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); + if (rank != 0 && rank != 1) + goto end; + + starpu_variable_data_register(&handle0, STARPU_MAIN_RAM, (uintptr_t)&x, sizeof(x)); + starpu_variable_data_register(&handle1, STARPU_MAIN_RAM, (uintptr_t)&x, sizeof(x)); + + starpu_mpi_comm_get_attr(MPI_COMM_WORLD, STARPU_MPI_TAG_UB, &value, &err); + assert(err == 1); + + starpu_mpi_data_register(handle0, (*value)-1, 1); + starpu_mpi_data_register(handle1, (*value)-2, 1); + + err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet, + STARPU_EXECUTE_ON_NODE, 0, + STARPU_RW, handle0, + 0); + assert(err == 0); + + err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet, + STARPU_EXECUTE_ON_NODE, 1, + STARPU_RW, handle1, + 0); + assert(err == 0); + + FPRINTF_MPI(stderr, "Waiting ...\n"); + starpu_task_wait_for_all(); + starpu_data_unregister(handle0); + starpu_data_unregister(handle1); + +end: + starpu_mpi_shutdown(); + + if (!mpi_init) + MPI_Finalize(); + + return 0; +} diff --git a/mpi/tests/load_balancer.c b/mpi/tests/load_balancer.c new file mode 100644 index 0000000..d28fcb0 --- /dev/null +++ b/mpi/tests/load_balancer.c @@ -0,0 +1,74 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include "helper.h" + +#if !defined(STARPU_HAVE_UNSETENV) || !defined(STARPU_USE_MPI_MPI) + +#warning unsetenv is not defined. Skipping test +int main(int argc, char **argv) +{ + return STARPU_TEST_SKIPPED; +} +#else + +void get_neighbors(int **neighbor_ids, int *nneighbors) +{ + int rank, size; + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); + starpu_mpi_comm_size(MPI_COMM_WORLD, &size); + *nneighbors = 1; + *neighbor_ids = malloc(sizeof(int)); + *neighbor_ids[0] = rank==size-1?0:rank+1; +} + +void get_data_unit_to_migrate(starpu_data_handle_t **handle_unit, int *nhandles, int dst_node) +{ + (void)handle_unit; + (void)dst_node; + *nhandles = 0; +} + +int main(int argc, char **argv) +{ + int ret; + struct starpu_mpi_lb_conf itf; + int mpi_init; + + itf.get_neighbors = get_neighbors; + itf.get_data_unit_to_migrate = get_data_unit_to_migrate; + + MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); + ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + + unsetenv("STARPU_MPI_LB"); + starpu_mpi_lb_init(NULL, NULL); + starpu_mpi_lb_shutdown(); + + starpu_mpi_lb_init("heat", &itf); + starpu_mpi_lb_shutdown(); + + starpu_mpi_shutdown(); + if (!mpi_init) + MPI_Finalize(); + + return 0; +} + +#endif diff --git a/mpi/tests/loader.c b/mpi/tests/loader.c new file mode 100644 index 0000000..804797d --- /dev/null +++ b/mpi/tests/loader.c @@ -0,0 +1,505 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if defined(_WIN32) && !defined(__MINGW32__) && !defined(__CYGWIN__) +#include +#else +#include +#endif + +#ifdef STARPU_QUICK_CHECK +/* Quick checks are supposed to be real quick, typically less than 1s each, sometimes 10s + add some extra times for tests which run with all schedulers +*/ +#define DEFAULT_TIMEOUT 100 +#elif !defined(STARPU_LONG_CHECK) +/* Normal checks are supposed to be short enough, typically less than 10s each, sometimes 1-2m */ +#define DEFAULT_TIMEOUT 300 +#else +/* Long checks can be very long */ +#define DEFAULT_TIMEOUT 1000 +#endif +#define AUTOTEST_SKIPPED_TEST 77 + +static pid_t child_pid = 0; +static int timeout; + +#if defined(_WIN32) && !defined(__MINGW32__) && !defined(__CYGWIN__) +static int mygettimeofday(struct timeval *tv, void *tz) +{ + if (tv) + { + FILETIME ft; + unsigned long long res; + GetSystemTimeAsFileTime(&ft); + /* 100-nanosecond intervals since January 1, 1601 */ + res = ft.dwHighDateTime; + res <<= 32; + res |= ft.dwLowDateTime; + res /= 10; + /* Now we have microseconds */ + res -= (((1970-1601)*365) + 89) * 24ULL * 3600ULL * 1000000ULL; + /* Now we are based on epoch */ + tv->tv_sec = res / 1000000ULL; + tv->tv_usec = res % 1000000ULL; + } +} +#else +#define mygettimeofday(tv,tz) gettimeofday(tv,tz) +#endif + +#ifdef STARPU_GDB_PATH +static int try_launch_gdb(const char *exe, const char *core) +{ +# define GDB_COMMANDS \ + "-ex", "py-list", \ + "-ex", "starpu-tasks", \ + "-ex", "starpu-workers", \ + "-ex", "starpu-print-datas-summary", \ + "-ex", "starpu-memusage", \ + "-ex", "starpu-print-archs", \ + "-ex", "starpu-print-registered-models", \ + "-ex", "bt full", \ + "-ex", "py-bt", \ + "-ex", "thread apply all bt full", \ + "-ex", "thread apply all py-bt", \ + + int err; + pid_t pid; + struct stat st; + const char *top_builddir; + char *gdb; + + err = stat(core, &st); + if (err != 0) + { + fprintf(stderr, "while looking for core file of %s: %s: %m\n", + exe, core); + return -1; + } + + if (!(st.st_mode & S_IFREG)) + { + fprintf(stderr, "%s: not a regular file\n", core); + return -1; + } + + top_builddir = getenv("top_builddir"); + + pid = fork(); + switch (pid) + { + case 0: /* kid */ + if (top_builddir != NULL) + { + /* Run gdb with Libtool. */ + gdb = alloca(strlen(top_builddir) + + sizeof("/libtool") + 1); + strcpy(gdb, top_builddir); + strcat(gdb, "/libtool"); + err = execl(gdb, "gdb", "--mode=execute", + STARPU_GDB_PATH, "--batch", + GDB_COMMANDS + exe, core, NULL); + } + else + { + /* Run gdb directly */ + gdb = STARPU_GDB_PATH; + err = execl(gdb, "gdb", "--batch", + GDB_COMMANDS + exe, core, NULL); + } + if (err != 0) + { + fprintf(stderr, "while launching `%s': %m\n", gdb); + exit(EXIT_FAILURE); + } + exit(EXIT_SUCCESS); + break; + + case -1: + fprintf(stderr, "fork: %m\n"); + return -1; + + default: /* parent */ + { + pid_t who; + int status; + who = waitpid(pid, &status, 0); + if (who != pid) + fprintf(stderr, "while waiting for gdb " + "process %d: %m\n", pid); + } + } + return 0; +# undef GDB_COMMANDS +} +#endif /* STARPU_GDB_PATH */ + +static void launch_gdb(const char *exe) +{ +#ifdef STARPU_GDB_PATH + char s[32]; + snprintf(s, sizeof(s), "core.%d", child_pid); + if (try_launch_gdb(exe, s) < 0) + try_launch_gdb(exe, "core"); +#endif /* STARPU_GDB_PATH */ +} + +static char *test_name; + +static void test_cleaner(int sig) +{ + pid_t child_gid; + int status; + (void) sig; + + // send signal to all loader family members + fprintf(stderr, "[error] test %s has been blocked for %d seconds. Mark it as failed\n", test_name, timeout); + child_gid = getpgid(child_pid); + kill(-child_gid, SIGQUIT); + waitpid(child_pid, &status, 0); + launch_gdb(test_name); + raise(SIGALRM); + exit(EXIT_FAILURE); +} + +static void forwardsig(int sig) +{ + pid_t child_gid; + child_gid = getpgid(child_pid); + kill(-child_gid, sig); +} + +static int _decode(char **src, char *motif, const char *value) +{ + char *found; + + found = strstr(*src, motif); + if (found == NULL) return 0; + + char *new_src = calloc(1, strlen(*src)-strlen(motif)+strlen(value)+1); + + strncpy(new_src, *src, found - *src); + strcat(new_src, value); + strcat(new_src, found+strlen(motif)); + + *src = new_src; + return 1; +} + +static void decode(char **src, char *motif, const char *value) +{ + if (*src) + { + if (strstr(*src, motif) && value == NULL) + { + fprintf(stderr, "error: $%s undefined\n", motif); + exit(EXIT_FAILURE); + } + int d = _decode(src, motif, value); + while (d) + d = _decode(src, motif, value); + } +} + +int main(int argc, char *argv[]) +{ + int child_exit_status; + char *test_args; + char *launcher; + char *launcher_args; + char *libtool; + char *cflags; + const char *top_builddir = getenv("top_builddir"); + struct sigaction sa; + int ret; + struct timeval start; + struct timeval end; + double timing; + int x=1; + int asan = 0, lsan = 0, tsan = 0, usan = 0; + + (void) argc; + test_args = NULL; + timeout = 0; + + launcher=getenv("STARPU_CHECK_LAUNCHER"); + launcher_args=getenv("STARPU_CHECK_LAUNCHER_ARGS"); + cflags = getenv("CFLAGS"); + if (cflags) + { + if (strstr(cflags, "-fsanitize=address")) + asan = 1; + if (strstr(cflags, "-fsanitize=leak")) + lsan = 1; + if (strstr(cflags, "-fsanitize=thread")) + tsan = 1; + if (strstr(cflags, "-fsanitize=undefined")) + usan = 1; + } + + if (argv[x] && strcmp(argv[x], "-t") == 0) + { + timeout = strtol(argv[x+1], NULL, 10); + x += 2; + } + else if (getenv("STARPU_TIMEOUT_ENV")) + { + /* get user-defined iter_max value */ + timeout = strtol(getenv("STARPU_TIMEOUT_ENV"), NULL, 10); + } + else if (timeout <= 0) + { + timeout = DEFAULT_TIMEOUT; + if ((launcher && strstr(launcher, "valgrind")) || + (launcher && strstr(launcher, "helgrind")) || + tsan) + timeout *= 20; + if (asan || usan || lsan || + (launcher && strstr(launcher, "compute-sanitizer"))) + timeout *= 5; + + if (timeout > 1750) + timeout = 1750; + } + +#ifdef STARPU_SIMGRID +#ifdef STARPU_DEBUG + timeout *= 20; +#endif +#endif + +#ifdef STARPU_USE_MPI_MASTER_SLAVE + /* compare values between the 2 values of timeout */ + if (getenv("MPIEXEC_TIMEOUT")) + { + int mpiexec_timeout = strtol(getenv("MPIEXEC_TIMEOUT"), NULL, 10); + if (mpiexec_timeout != timeout) + fprintf(stderr, "[warning] MPIEXEC_TIMEOUT and STARPU_TIMEOUT_ENV values are different (%d and %d). The behavior may be different than expected !\n", mpiexec_timeout, timeout); + } +#endif + + if (argv[x] && strcmp(argv[x], "-p") == 0) + { + test_name = malloc(strlen(argv[x+1]) + 1 + strlen(argv[x+2]) + 1); + sprintf(test_name, "%s/%s", argv[x+1], argv[x+2]); + x += 3; + } + else + { + test_name = argv[x]; + x += 1; + } + + if (!test_name) + { + fprintf(stderr, "[error] Need name of program to start\n"); + exit(EXIT_FAILURE); + } + + size_t len = strlen(test_name); + if (len >= 3 && + test_name[len-3] == '.' && + test_name[len-2] == 's' && + test_name[len-1] == 'h') + { + /* This is a shell script, don't run ourself on bash, but make + * the script call us for each program invocation */ + + char *launch = NULL; + if (top_builddir == NULL) + // this may fail if .libs is in the directory path + setenv("STARPU_LAUNCH", argv[0], 1); + else + { + launch = malloc(strlen(top_builddir) + strlen("/tests/loader") + 1); + strcpy(launch, top_builddir); + strcat(launch, "/tests/loader"); + setenv("STARPU_LAUNCH", launch, 1); + } + + execvp(test_name, argv+x-1); + + fprintf(stderr, "[error] '%s' failed to exec. test marked as failed\n", test_name); + free(launch); + exit(EXIT_FAILURE); + } + + if (strstr(test_name, "spmv/dw_block_spmv")) + { + test_args = (char *) calloc(512, sizeof(char)); + snprintf(test_args, 512, "%s/examples/spmv/matrix_market/examples/fidapm05.mtx", STARPU_SRC_DIR); + } + else if (strstr(test_name, "starpu_perfmodel_display")) + { + if (x >= argc) + test_args = strdup("-l"); + } + else if (strstr(test_name, "starpu_perfmodel_plot")) + { + if (x >= argc) + test_args = strdup("-l"); + } + + /* get launcher program */ + if (launcher_args) + launcher_args=strdup(launcher_args); + + if (top_builddir == NULL) + { + fprintf(stderr, + "warning: $top_builddir undefined, " + "so $STARPU_CHECK_LAUNCHER ignored\n"); + launcher = NULL; + launcher_args = NULL; + libtool = NULL; + } + else + { + libtool = malloc(strlen(top_builddir) + 1 + strlen("libtool") + 1); + strcpy(libtool, top_builddir); + strcat(libtool, "/libtool"); + } + + if (launcher) + { + const char *top_srcdir = getenv("top_srcdir"); + decode(&launcher, "@top_srcdir@", top_srcdir); + decode(&launcher_args, "@top_srcdir@", top_srcdir); + } + + setenv("STARPU_OPENCL_PROGRAM_DIR", STARPU_SRC_DIR, 1); + + /* set SIGALARM handler */ + sa.sa_flags = SA_RESETHAND | SA_NODEFER; + sigemptyset(&sa.sa_mask); + sa.sa_handler = test_cleaner; + if (-1 == sigaction(SIGALRM, &sa, NULL)) + perror("sigaction"); + + signal(SIGINT, forwardsig); + signal(SIGHUP, forwardsig); + signal(SIGPIPE, forwardsig); + signal(SIGTERM, forwardsig); + + child_pid = fork(); + if (child_pid == 0) + { + char *launcher_argv[100]; + int i=0; + + setpgid(0, 0); + + /* "Launchers" such as Valgrind need to be inserted + * after the Libtool-generated wrapper scripts, hence + * this special-case. */ + if (launcher && top_builddir != NULL) + { + launcher_argv[i++] = libtool; + launcher_argv[i++] = "--mode=execute"; + launcher_argv[i++] = launcher; + if (launcher_args) + { + launcher_argv[i++] = strtok(launcher_args, " "); + while (launcher_argv[i-1]) + { + launcher_argv[i++] = strtok(NULL, " "); + } + } + } + + launcher_argv[i++] = test_name; + if (test_args) + launcher_argv[i++] = test_args; + else while (argv[x]) + { + launcher_argv[i++] = argv[x++]; + } +#ifdef STARPU_SIMGRID +#ifdef STARPU_DEBUG + launcher_argv[i++] = "--cfg=contexts/factory:thread"; +#endif +#endif + launcher_argv[i++] = NULL; + execvp(*launcher_argv, launcher_argv); + + fprintf(stderr, "[error] '%s' failed to exec. test marked as failed\n", test_name); + exit(EXIT_FAILURE); + } + if (child_pid == -1) + { + fprintf(stderr, "[error] fork. test marked as failed\n"); + exit(EXIT_FAILURE); + } + free(test_args); + free(libtool); + + ret = EXIT_SUCCESS; + gettimeofday(&start, NULL); + alarm(timeout); + if (child_pid == waitpid(child_pid, &child_exit_status, 0)) + { + if (WIFEXITED(child_exit_status)) + { + int status = WEXITSTATUS(child_exit_status); + if (status == EXIT_SUCCESS) + { + alarm(0); + } + else + { + if (status != AUTOTEST_SKIPPED_TEST) + fprintf(stdout, "`%s' exited with return code %d\n", + test_name, status); + ret = status; + } + } + else if (WIFSIGNALED(child_exit_status)) + { + fprintf(stderr, "[error] `%s' killed with signal %d; test marked as failed\n", + test_name, WTERMSIG(child_exit_status)); + launch_gdb(test_name); + ret = EXIT_FAILURE; + } + else + { + fprintf(stderr, "[error] `%s' did not terminate normally; test marked as failed\n", + test_name); + ret = EXIT_FAILURE; + } + } + + gettimeofday(&end, NULL); + timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec)); + fprintf(stderr, "#Execution_time_in_seconds %f %s\n", timing/1000000, test_name); + + return ret; +} diff --git a/mpi/tests/matrix.c b/mpi/tests/matrix.c new file mode 100644 index 0000000..cde64cd --- /dev/null +++ b/mpi/tests/matrix.c @@ -0,0 +1,156 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include "helper.h" + +void func_cpu(void *descr[], void *_args) +{ + (void)_args; + unsigned *A = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[0]); + unsigned *X = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[1]); + unsigned *Y = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[2]); + + FPRINTF_MPI(stderr, "VALUES: Y=%3u A=%3u X=%3u\n", *Y, *A, *X); + *Y = *Y + *A * *X; +} + +struct starpu_codelet mycodelet = +{ + .cpu_funcs = {func_cpu}, + .nbuffers = 3, + .modes = {STARPU_R, STARPU_R, STARPU_RW}, + .model = &starpu_perfmodel_nop, +}; + +#define N 4 + +int main(int argc, char **argv) +{ + int rank, n; + int ret; + unsigned A[N]; + unsigned X[N]; + unsigned Y; + starpu_data_handle_t data_A[N]; + starpu_data_handle_t data_X[N]; + starpu_data_handle_t data_Y; + struct starpu_conf conf; + int mpi_init; + + MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); + + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + conf.ncpus = -1; + conf.nmpi_ms = -1; + conf.ntcpip_ms = -1; + + ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, &conf); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); + + if (starpu_cpu_worker_get_count() == 0) + { + if (rank == 0) + FPRINTF(stderr, "We need at least 1 CPU worker.\n"); + starpu_mpi_shutdown(); + if (!mpi_init) + MPI_Finalize(); + return rank == 0 ? STARPU_TEST_SKIPPED : 0; + } + + for(n = 0; n < N; n++) + { + A[n] = (n+1)*10; + X[n] = n+1; + } + Y = 0; + + FPRINTF_MPI(stderr, "A = "); + for(n = 0; n < N; n++) + { + FPRINTF(stderr, "%u ", A[n]); + } + FPRINTF(stderr, "\n"); + FPRINTF_MPI(stderr, "X = "); + for(n = 0; n < N; n++) + { + FPRINTF(stderr, "%u ", X[n]); + } + FPRINTF(stderr, "\n"); + + for(n = 0; n < N; n++) + { + if (rank == n%2) + starpu_variable_data_register(&data_A[n], STARPU_MAIN_RAM, (uintptr_t)&A[n], sizeof(unsigned)); + else + starpu_variable_data_register(&data_A[n], -1, (uintptr_t)NULL, sizeof(unsigned)); + starpu_mpi_data_register(data_A[n], n+100, n%2); + FPRINTF_MPI(stderr, "Registering A[%d] to %p with tag %d and node %d\n", n, data_A[n], n+100, n%2); + + if (rank == n%2) + starpu_variable_data_register(&data_X[n], STARPU_MAIN_RAM, (uintptr_t)&X[n], sizeof(unsigned)); + else + starpu_variable_data_register(&data_X[n], -1, (uintptr_t)NULL, sizeof(unsigned)); + starpu_mpi_data_register(data_X[n], n+200, n%2); + FPRINTF_MPI(stderr, "Registering X[%d] to %p with tag %d and node %d\n", n, data_X[n], n+200, n%2); + } + if (rank == 0) + starpu_variable_data_register(&data_Y, STARPU_MAIN_RAM, (uintptr_t)&Y, sizeof(unsigned)); + else + starpu_variable_data_register(&data_Y, -1, (uintptr_t)NULL, sizeof(unsigned)); + starpu_mpi_data_register(data_Y, 10, 0); + FPRINTF_MPI(stderr, "Registering Y to %p with tag %d and node %d\n", data_Y, 10, 0); + + for(n = 0; n < N; n++) + { + ret = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet, + STARPU_R, data_A[n], + STARPU_R, data_X[n], + STARPU_RW, data_Y, + STARPU_EXECUTE_ON_DATA, data_A[n], + 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_task_insert"); + } + + FPRINTF(stderr, "Waiting ...\n"); + starpu_task_wait_for_all(); + + for(n = 0; n < N; n++) + { + starpu_data_unregister(data_A[n]); + starpu_data_unregister(data_X[n]); + } + starpu_data_unregister(data_Y); + + starpu_mpi_shutdown(); + if (!mpi_init) + MPI_Finalize(); + + FPRINTF(stdout, "[%d] Y=%u\n", rank, Y); + +#ifndef STARPU_SIMGRID + if (rank == 0) + { + STARPU_ASSERT_MSG(Y==300, "Error when calculating Y=%u\n", Y); + } +#endif + + return 0; +} diff --git a/mpi/tests/matrix2.c b/mpi/tests/matrix2.c new file mode 100644 index 0000000..f65c5e6 --- /dev/null +++ b/mpi/tests/matrix2.c @@ -0,0 +1,149 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include "helper.h" + +void func_cpu(void *descr[], void *_args) +{ + (void)_args; + unsigned *A = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[0]); + unsigned *X = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[1]); + unsigned *Y = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[2]); + + FPRINTF_MPI(stderr, "VALUES: Y=%3u A=%3u X=%3u\n", *Y, *A, *X); + *Y = *Y + *A * *X; +} + +struct starpu_codelet mycodelet = +{ + .cpu_funcs = {func_cpu}, + .nbuffers = 3, + .modes = {STARPU_R, STARPU_R, STARPU_RW}, + .model = &starpu_perfmodel_nop, +}; + +#define N 4 + +int main(int argc, char **argv) +{ + int rank, size; + int n; + int ret; + unsigned A[N]; + unsigned X[N]; + starpu_data_handle_t data_A[N]; + starpu_data_handle_t data_X[N]; + int mpi_init; + + MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); + + ret = starpu_mpi_init_conf(NULL, NULL, mpi_init, MPI_COMM_WORLD, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); + starpu_mpi_comm_size(MPI_COMM_WORLD, &size); + + if ((size < 3) || (starpu_cpu_worker_get_count() == 0)) + { + if (rank == 0) + { + if (size < 3) + FPRINTF(stderr, "We need at least 3 processes.\n"); + else + FPRINTF(stderr, "We need at least 1 CPU worker.\n"); + } + starpu_mpi_shutdown(); + if (!mpi_init) + MPI_Finalize(); + return rank == 0 ? STARPU_TEST_SKIPPED : 0; + } + + for(n = 0; n < N; n++) + { + A[n] = (n+1)*10; + X[n] = n+1; + } + + FPRINTF_MPI(stderr, "A = "); + for(n = 0; n < N; n++) + { + FPRINTF(stderr, "%u ", A[n]); + } + FPRINTF(stderr, "\n"); + FPRINTF_MPI(stderr, "X = "); + for(n = 0; n < N; n++) + { + FPRINTF(stderr, "%u ", X[n]); + } + FPRINTF(stderr, "\n"); + + for(n = 0; n < N; n++) + { + if (rank == n%2) + starpu_variable_data_register(&data_A[n], STARPU_MAIN_RAM, (uintptr_t)&A[n], sizeof(unsigned)); + else + starpu_variable_data_register(&data_A[n], -1, (uintptr_t)NULL, sizeof(unsigned)); + starpu_mpi_data_register(data_A[n], n+100, n%2); + FPRINTF_MPI(stderr, "Registering A[%d] to %p with tag %d and node %d\n", n,data_A[n], n+100, n%2); + } + + for(n = 0; n < N; n++) + { + if (rank == 2) + starpu_variable_data_register(&data_X[n], STARPU_MAIN_RAM, (uintptr_t)&X[n], sizeof(unsigned)); + else + starpu_variable_data_register(&data_X[n], -1, (uintptr_t)NULL, sizeof(unsigned)); + starpu_mpi_data_register(data_X[n], n+200, 2); + FPRINTF_MPI(stderr, "Registering X[%d] to %p with tag %d and node %d\n", n, data_X[n], n+200, 2); + } + + for(n = 0; n < N-1; n++) + { + ret = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet, + STARPU_R, data_A[n], + STARPU_R, data_X[n], + STARPU_RW, data_X[N-1], + STARPU_EXECUTE_ON_DATA, data_A[n], + 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_task_insert"); + } + + FPRINTF(stderr, "Waiting ...\n"); + starpu_task_wait_for_all(); + + for(n = 0; n < N; n++) + { + starpu_data_unregister(data_A[n]); + starpu_data_unregister(data_X[n]); + } + + starpu_mpi_shutdown(); + + FPRINTF(stdout, "[%d] X[%d]=%u\n", rank, N-1, X[N-1]); + +#ifndef STARPU_SIMGRID + if (rank == 2) + { + STARPU_ASSERT_MSG(X[N-1]==144, "Error when calculating X[N-1]=%u\n", X[N-1]); + } +#endif + + if (!mpi_init) + MPI_Finalize(); + return 0; +} diff --git a/mpi/tests/mpi_barrier.c b/mpi/tests/mpi_barrier.c new file mode 100644 index 0000000..ca06cbc --- /dev/null +++ b/mpi/tests/mpi_barrier.c @@ -0,0 +1,37 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "helper.h" + + +int main(int argc, char **argv) +{ + int ret, mpi_init; + + MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); + + ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + + starpu_mpi_barrier(MPI_COMM_WORLD); + + starpu_mpi_shutdown(); + if (!mpi_init) + MPI_Finalize(); + + return 0; +} diff --git a/mpi/tests/mpi_data_cpy.c b/mpi/tests/mpi_data_cpy.c new file mode 100644 index 0000000..852db5f --- /dev/null +++ b/mpi/tests/mpi_data_cpy.c @@ -0,0 +1,98 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "helper.h" + +#define DATA_TAG 666 +#define INC_COUNT 10 + +void func_cpu(void *descr[], void *_args) +{ + int rank; + int *value = (int *)STARPU_VARIABLE_GET_PTR(descr[0]); + + starpu_codelet_unpack_args(_args, &rank); + FPRINTF(stderr, "[rank %d] value in %d\n", rank, *value); + (*value)++; + FPRINTF(stderr, "[rank %d] value out %d\n", rank, *value); +} + +struct starpu_codelet mycodelet = +{ + .cpu_funcs = {func_cpu}, + .nbuffers = 1, + .modes = {STARPU_RW}, + .model = &starpu_perfmodel_nop, + .name = "increment", + .flags = STARPU_CODELET_SIMGRID_EXECUTE, +}; + +int main(int argc, char **argv) +{ + int size, rank; + int ret; + int value = 0; + starpu_data_handle_t *data; + int mpi_init; + int i; + + MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); + + ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); + starpu_mpi_comm_size(MPI_COMM_WORLD, &size); + + data = (starpu_data_handle_t*)malloc(size*sizeof(starpu_data_handle_t)); + for(i=0; i +#include "helper.h" + +#ifdef STARPU_QUICK_CHECK +# define NITER 16 +#elif !defined(STARPU_LONG_CHECK) +# define NITER 256 +#else +# define NITER 2048 +#endif +#define SIZE 16 + +float *tab; +starpu_data_handle_t tab_handle; + +int main(int argc, char **argv) +{ + int ret, rank, size; + int mpi_init; + + MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); + + ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); + starpu_mpi_comm_size(MPI_COMM_WORLD, &size); + + if (size%2 != 0) + { + if (rank == 0) + FPRINTF(stderr, "We need a even number of processes.\n"); + + starpu_mpi_shutdown(); + if (!mpi_init) + MPI_Finalize(); + return rank == 0 ? STARPU_TEST_SKIPPED : 0; + } + + tab = calloc(SIZE, sizeof(float)); + + starpu_vector_data_register(&tab_handle, STARPU_MAIN_RAM, (uintptr_t)tab, SIZE, sizeof(float)); + + int nloops = NITER; + int loop; + int other_rank = rank%2 == 0 ? rank+1 : rank-1; + + for (loop = 0; loop < nloops; loop++) + { + starpu_tag_t tag = (starpu_tag_t)loop; + + if ((loop % 2) == (rank%2)) + { + ret = starpu_mpi_isend_detached_unlock_tag(tab_handle, other_rank, loop, MPI_COMM_WORLD, tag); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend_detached_unlock_tag"); + } + else + { + ret= starpu_mpi_irecv_detached_unlock_tag(tab_handle, other_rank, loop, MPI_COMM_WORLD, tag); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_irecv_detached_unlock_tag"); + } + + starpu_tag_wait(tag); + } + + starpu_data_unregister(tab_handle); + free(tab); + + starpu_mpi_shutdown(); + + if (!mpi_init) + MPI_Finalize(); + + return 0; +} diff --git a/mpi/tests/mpi_earlyrecv.c b/mpi/tests/mpi_earlyrecv.c new file mode 100644 index 0000000..84f0e11 --- /dev/null +++ b/mpi/tests/mpi_earlyrecv.c @@ -0,0 +1,140 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "helper.h" +#include + +int main(int argc, char **argv) +{ + int ret, rank, size, i; + starpu_data_handle_t tab_handle[4]; + int values[4]; + starpu_mpi_req request[2] = {NULL, NULL}; + int mpi_init; + + MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); + + ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); + starpu_mpi_comm_size(MPI_COMM_WORLD, &size); + + if (size%2 != 0) + { + FPRINTF_MPI(stderr, "We need a even number of processes.\n"); + starpu_mpi_shutdown(); + if (!mpi_init) + MPI_Finalize(); + return rank == 0 ? STARPU_TEST_SKIPPED : 0; + } + + for(i=0 ; i<4 ; i++) + { + if (i<3 || rank%2) + { + // all data are registered on all nodes, but the 4th data which is not registered on the receiving node + values[i] = (rank+1) * (i+1); + starpu_variable_data_register(&tab_handle[i], STARPU_MAIN_RAM, (uintptr_t)&values[i], sizeof(values[i])); + starpu_mpi_data_register(tab_handle[i], i, rank); + } + } + + int other_rank = rank%2 == 0 ? rank+1 : rank-1; + + FPRINTF_MPI(stderr, "rank %d exchanging with rank %d\n", rank, other_rank); + + if (rank%2) + { + FPRINTF_MPI(stderr, "Sending values %d and %d to node %d\n", values[0], values[3], other_rank); + // this data will be received as an early registered data + ret = starpu_mpi_isend(tab_handle[0], &request[0], other_rank, 0, MPI_COMM_WORLD); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend"); + // this data will be received as an early UNregistered data + ret = starpu_mpi_isend(tab_handle[3], &request[1], other_rank, 3, MPI_COMM_WORLD); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend"); + + ret = starpu_mpi_send(tab_handle[1], other_rank, 1, MPI_COMM_WORLD); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send"); + ret = starpu_mpi_recv(tab_handle[2], other_rank, 2, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); + } + else + { + ret = starpu_mpi_recv(tab_handle[1], other_rank, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); + ret = starpu_mpi_send(tab_handle[2], other_rank, 2, MPI_COMM_WORLD); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send"); + + // we register the data + starpu_variable_data_register(&tab_handle[3], -1, (uintptr_t)NULL, sizeof(int)); + starpu_mpi_data_register(tab_handle[3], 3, rank); + ret = starpu_mpi_irecv(tab_handle[3], &request[1], other_rank, 3, MPI_COMM_WORLD); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_irecv"); + ret = starpu_mpi_irecv(tab_handle[0], &request[0], other_rank, 0, MPI_COMM_WORLD); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_irecv"); + } + + int finished=0; + while (!finished) + { + for(i=0 ; i<2 ; i++) + { + if (request[i]) + { + int flag; + MPI_Status status; + ret = starpu_mpi_test(&request[i], &flag, &status); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_test"); + if (flag) + FPRINTF_MPI(stderr, "request[%d] = %d %p\n", i, flag, request[i]); + } + } + finished = request[0] == NULL && request[1] == NULL; +#ifdef STARPU_SIMGRID + starpu_sleep(0.001); +#endif + } + + if (rank%2 == 0) + { + void *ptr0; + void *ptr3; + + starpu_data_acquire(tab_handle[0], STARPU_RW); + ptr0 = starpu_data_get_local_ptr(tab_handle[0]); + starpu_data_release(tab_handle[0]); + + starpu_data_acquire(tab_handle[3], STARPU_RW); + ptr3 = starpu_data_get_local_ptr(tab_handle[3]); + starpu_data_release(tab_handle[3]); + + ret = (*((int *)ptr0) == (other_rank+1)*1) && (*((int *)ptr3) == (other_rank+1)*4); + ret = !ret; + FPRINTF_MPI(stderr, "[%s] Received values %d and %d from node %d\n", ret?"FAILURE":"SUCCESS", *((int *)ptr0), *((int *)ptr3), other_rank); + } + + for(i=0 ; i<4 ; i++) + starpu_data_unregister(tab_handle[i]); + + starpu_mpi_shutdown(); + + if (!mpi_init) + MPI_Finalize(); + + return rank == 0 ? ret : 0; +} diff --git a/mpi/tests/mpi_earlyrecv2.c b/mpi/tests/mpi_earlyrecv2.c new file mode 100644 index 0000000..fcbfefd --- /dev/null +++ b/mpi/tests/mpi_earlyrecv2.c @@ -0,0 +1,287 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Thibaut Lambert + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "helper.h" +#include +#include + +#define NB 10 + +static starpu_pthread_mutex_t mutex = STARPU_PTHREAD_MUTEX_INITIALIZER; +static starpu_pthread_cond_t cond = STARPU_PTHREAD_COND_INITIALIZER; + +void callback(void *arg) +{ + unsigned *received = arg; + + STARPU_PTHREAD_MUTEX_LOCK(&mutex); + *received = *received + 1; + FPRINTF_MPI(stderr, "Requests %u received\n", *received); + STARPU_PTHREAD_COND_SIGNAL(&cond); + STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); +} + +typedef void (*check_func)(starpu_data_handle_t handle, int i, int rank, int *error); + +int exchange(int rank, starpu_data_handle_t *handles, starpu_mpi_tag_t initial_tag, check_func func, int detached) +{ + int other_rank = rank%2 == 0 ? rank+1 : rank-1; + int i; + int ret; + + if (rank%2) + { + ret = starpu_mpi_send(handles[0], other_rank, initial_tag+0, MPI_COMM_WORLD); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send"); + ret = starpu_mpi_send(handles[NB-1], other_rank, initial_tag+NB-1, MPI_COMM_WORLD); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send"); + for(i=1 ; i +#include "helper.h" +#include +#include + +#define NB 6 + +typedef void (*check_func)(starpu_data_handle_t handle, int i, int rank, int *error); + +int exchange(int rank, starpu_data_handle_t *handles, starpu_mpi_tag_t initial_tag, check_func func) +{ + int other_rank = rank%2 == 0 ? rank+1 : rank-1; + int i; + int ret=0; + starpu_mpi_req req[NB]; + + memset(req, 0, NB*sizeof(starpu_mpi_req)); + + if (rank%2) + { + ret = starpu_mpi_issend(handles[0], &req[0], other_rank, initial_tag+0, MPI_COMM_WORLD); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_issend"); + ret = starpu_mpi_issend(handles[NB-2], &req[NB-2], other_rank, initial_tag+NB-2, MPI_COMM_WORLD); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_issend"); + ret = starpu_mpi_isend(handles[NB-1], &req[NB-1], other_rank, initial_tag+NB-1, MPI_COMM_WORLD); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend"); + + for(i=1 ; i +#include "helper.h" + +#ifdef STARPU_QUICK_CHECK +# define NITER 16 +#else +# define NITER 2048 +#endif +#define SIZE 16 + +float *tab; +starpu_data_handle_t tab_handle; + +int main(int argc, char **argv) +{ + int ret, rank, size; + int mpi_init; + + MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); + + ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); + starpu_mpi_comm_size(MPI_COMM_WORLD, &size); + + if (size%2 != 0) + { + if (rank == 0) + FPRINTF(stderr, "We need a even number of processes.\n"); + + starpu_mpi_shutdown(); + if (!mpi_init) + MPI_Finalize(); + return rank == 0 ? STARPU_TEST_SKIPPED : 0; + } + + tab = calloc(SIZE, sizeof(float)); + + starpu_vector_data_register(&tab_handle, STARPU_MAIN_RAM, (uintptr_t)tab, SIZE, sizeof(float)); + + int nloops = NITER; + int loop; + int other_rank = rank%2 == 0 ? rank+1 : rank-1; + + for (loop = 0; loop < nloops; loop++) + { + if ((loop % 2) == (rank%2)) + { + ret = starpu_mpi_send(tab_handle, other_rank, loop, MPI_COMM_WORLD); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send"); + } + else + { + MPI_Status status; + starpu_mpi_req req; + ret = starpu_mpi_irecv(tab_handle, &req, other_rank, loop, MPI_COMM_WORLD); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_irecv"); + ret = starpu_mpi_wait(&req, &status); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_wait"); + } + } + + starpu_data_unregister(tab_handle); + free(tab); + + starpu_mpi_shutdown(); + + if (!mpi_init) + MPI_Finalize(); + + return 0; +} diff --git a/mpi/tests/mpi_irecv_detached.c b/mpi/tests/mpi_irecv_detached.c new file mode 100644 index 0000000..9630c01 --- /dev/null +++ b/mpi/tests/mpi_irecv_detached.c @@ -0,0 +1,108 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include "helper.h" + +#ifdef STARPU_QUICK_CHECK +# define NITER 16 +#elif !defined(STARPU_LONG_CHECK) +# define NITER 256 +#else +# define NITER 2048 +#endif +#define SIZE 16 + +float *tab; +starpu_data_handle_t tab_handle; + +static starpu_pthread_mutex_t mutex = STARPU_PTHREAD_MUTEX_INITIALIZER; +static starpu_pthread_cond_t cond = STARPU_PTHREAD_COND_INITIALIZER; + +void callback(void *arg) +{ + unsigned *received = arg; + + STARPU_PTHREAD_MUTEX_LOCK(&mutex); + *received = 1; + STARPU_PTHREAD_COND_SIGNAL(&cond); + STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); +} + + +int main(int argc, char **argv) +{ + int ret, rank, size; + int mpi_init; + + MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); + + ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); + starpu_mpi_comm_size(MPI_COMM_WORLD, &size); + + if (size%2 != 0) + { + if (rank == 0) + FPRINTF(stderr, "We need a even number of processes.\n"); + + starpu_mpi_shutdown(); + if (!mpi_init) + MPI_Finalize(); + return rank == 0 ? STARPU_TEST_SKIPPED : 0; + } + + tab = calloc(SIZE, sizeof(float)); + + starpu_vector_data_register(&tab_handle, STARPU_MAIN_RAM, (uintptr_t)tab, SIZE, sizeof(float)); + + int nloops = NITER; + int loop; + int other_rank = rank%2 == 0 ? rank+1 : rank-1; + + for (loop = 0; loop < nloops; loop++) + { + if ((loop % 2) == (rank%2)) + { + ret = starpu_mpi_send(tab_handle, other_rank, loop, MPI_COMM_WORLD); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send"); + } + else + { + int received = 0; + ret = starpu_mpi_irecv_detached(tab_handle, other_rank, loop, MPI_COMM_WORLD, callback, &received); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_irecv_detached"); + + STARPU_PTHREAD_MUTEX_LOCK(&mutex); + while (!received) + STARPU_PTHREAD_COND_WAIT(&cond, &mutex); + STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); + } + } + + starpu_data_unregister(tab_handle); + free(tab); + + starpu_mpi_shutdown(); + + if (!mpi_init) + MPI_Finalize(); + + return 0; +} diff --git a/mpi/tests/mpi_isend.c b/mpi/tests/mpi_isend.c new file mode 100644 index 0000000..7d5d194 --- /dev/null +++ b/mpi/tests/mpi_isend.c @@ -0,0 +1,90 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "helper.h" + +#ifdef STARPU_QUICK_CHECK +# define NITER 16 +#else +# define NITER 2048 +#endif +#define SIZE 16 + +float *tab; +starpu_data_handle_t tab_handle; + +int main(int argc, char **argv) +{ + int ret, rank, size; + int mpi_init; + + MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); + + ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); + starpu_mpi_comm_size(MPI_COMM_WORLD, &size); + + if (size%2 != 0) + { + if (rank == 0) + FPRINTF(stderr, "We need a even number of processes.\n"); + + starpu_mpi_shutdown(); + if (!mpi_init) + MPI_Finalize(); + return rank == 0 ? STARPU_TEST_SKIPPED : 0; + } + + tab = calloc(SIZE, sizeof(float)); + + starpu_vector_data_register(&tab_handle, STARPU_MAIN_RAM, (uintptr_t)tab, SIZE, sizeof(float)); + + int nloops = NITER; + int loop; + int other_rank = rank%2 == 0 ? rank+1 : rank-1; + + for (loop = 0; loop < nloops; loop++) + { + if ((loop % 2) == (rank%2)) + { + MPI_Status status; + starpu_mpi_req req; + ret = starpu_mpi_isend(tab_handle, &req, other_rank, loop, MPI_COMM_WORLD); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend"); + ret = starpu_mpi_wait(&req, &status); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_wait"); + } + else + { + MPI_Status status; + ret = starpu_mpi_recv(tab_handle, other_rank, loop, MPI_COMM_WORLD, &status); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); + } + } + + starpu_data_unregister(tab_handle); + free(tab); + + starpu_mpi_shutdown(); + + if (!mpi_init) + MPI_Finalize(); + + return 0; +} diff --git a/mpi/tests/mpi_isend_detached.c b/mpi/tests/mpi_isend_detached.c new file mode 100644 index 0000000..29fcc42 --- /dev/null +++ b/mpi/tests/mpi_isend_detached.c @@ -0,0 +1,113 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include + +#include "helper.h" + +#ifdef STARPU_QUICK_CHECK +# define NITER 16 +#elif !defined(STARPU_LONG_CHECK) +# define NITER 256 +#else +# define NITER 2048 +#endif +#define SIZE 16 + +static starpu_pthread_mutex_t mutex = STARPU_PTHREAD_MUTEX_INITIALIZER; +static starpu_pthread_cond_t cond = STARPU_PTHREAD_COND_INITIALIZER; + +void callback(void *arg) +{ + unsigned *completed = arg; + + STARPU_PTHREAD_MUTEX_LOCK(&mutex); + *completed = 1; + STARPU_PTHREAD_COND_SIGNAL(&cond); + STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); +} + +int main(int argc, char **argv) +{ + int ret, rank, size; + float *tab; + starpu_data_handle_t tab_handle; + int mpi_init; + + MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); + + ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); + starpu_mpi_comm_size(MPI_COMM_WORLD, &size); + + if (size%2 != 0) + { + if (rank == 0) + FPRINTF(stderr, "We need a even number of processes.\n"); + + starpu_mpi_shutdown(); + if (!mpi_init) + MPI_Finalize(); + return rank == 0 ? STARPU_TEST_SKIPPED : 0; + } + + tab = calloc(SIZE, sizeof(float)); + + starpu_vector_data_register(&tab_handle, STARPU_MAIN_RAM, (uintptr_t)tab, SIZE, sizeof(float)); + + int nloops = NITER; + int loop; + int other_rank = rank%2 == 0 ? rank+1 : rank-1; + + for (loop = 0; loop < nloops; loop++) + { + if ((loop % 2) == (rank%2)) + { + int sent = 0; + ret = starpu_mpi_isend_detached(tab_handle, other_rank, loop, MPI_COMM_WORLD, callback, &sent); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend_detached"); + + STARPU_PTHREAD_MUTEX_LOCK(&mutex); + while (!sent) + STARPU_PTHREAD_COND_WAIT(&cond, &mutex); + STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); + } + else + { + int received = 0; + ret = starpu_mpi_irecv_detached(tab_handle, other_rank, loop, MPI_COMM_WORLD, callback, &received); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_irecv_detached"); + + STARPU_PTHREAD_MUTEX_LOCK(&mutex); + while (!received) + STARPU_PTHREAD_COND_WAIT(&cond, &mutex); + STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); + } + } + + starpu_data_unregister(tab_handle); + free(tab); + + starpu_mpi_shutdown(); + + if (!mpi_init) + MPI_Finalize(); + + return 0; +} diff --git a/mpi/tests/mpi_reduction.c b/mpi/tests/mpi_reduction.c new file mode 100644 index 0000000..d74ed4b --- /dev/null +++ b/mpi/tests/mpi_reduction.c @@ -0,0 +1,220 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include "helper.h" + +extern void init_cpu_func(void *descr[], void *cl_arg); +extern void redux_cpu_func(void *descr[], void *cl_arg); +extern void dot_cpu_func(void *descr[], void *cl_arg); +extern void display_cpu_func(void *descr[], void *cl_arg); + +static struct starpu_codelet init_codelet = +{ + .cpu_funcs = {init_cpu_func}, + .nbuffers = 1, + .modes = {STARPU_W}, +#ifdef STARPU_SIMGRID + .model = &starpu_perfmodel_nop, +#endif + .name = "init_codelet" +}; + +static struct starpu_codelet redux_codelet = +{ + .cpu_funcs = {redux_cpu_func}, + .modes = {STARPU_RW|STARPU_COMMUTE, STARPU_R}, + .nbuffers = 2, +#ifdef STARPU_SIMGRID + .model = &starpu_perfmodel_nop, +#endif + .name = "redux_codelet" +}; + +static struct starpu_codelet dot_codelet = +{ + .cpu_funcs = {dot_cpu_func}, + .nbuffers = 2, + .modes = {STARPU_R, STARPU_REDUX}, +#ifdef STARPU_SIMGRID + .model = &starpu_perfmodel_nop, +#endif + .name = "dot_codelet" +}; + +static struct starpu_codelet display_codelet = +{ + .cpu_funcs = {display_cpu_func}, + .nbuffers = 1, + .modes = {STARPU_R}, +#ifdef STARPU_SIMGRID + .model = &starpu_perfmodel_nop, +#endif + .name = "display_codelet" +}; + +/* Returns the MPI node number where data indexes index is */ +int my_distrib(int x, int nb_nodes) +{ + return x % nb_nodes; +} + +int main(int argc, char **argv) +{ + int my_rank, size, x, y, i; + long int *vector; + long int dot, sum=0; + starpu_data_handle_t *handles; + starpu_data_handle_t dot_handle; + struct starpu_conf conf; + int ret; + + int nb_elements, step, loops; + int mpi_init; + + STARPU_SKIP_IF_VALGRIND_RETURN_SKIP; + + /* Not supported yet */ + if (starpu_getenv_number_default("STARPU_GLOBAL_ARBITER", 0) > 0) + return STARPU_TEST_SKIPPED; + + MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); + + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + conf.ncpus = -1; + conf.nmpi_ms = -1; + conf.ntcpip_ms = -1; + + ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, &conf); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + + starpu_mpi_comm_rank(MPI_COMM_WORLD, &my_rank); + starpu_mpi_comm_size(MPI_COMM_WORLD, &size); + + if (starpu_cpu_worker_get_count() == 0) + { + if (my_rank == 0) + FPRINTF(stderr, "We need at least 1 CPU worker.\n"); + starpu_mpi_shutdown(); + if (!mpi_init) + MPI_Finalize(); + return my_rank == 0 ? STARPU_TEST_SKIPPED : 0; + } + + nb_elements = size*8000; + step = 4; + loops = 5; + + starpu_malloc((void **)&vector, nb_elements*sizeof(long int)); + for(x = 0; x < nb_elements; x+=step) + { + int mpi_rank = my_distrib(x/step, size); + if (mpi_rank == my_rank) + { + for(y=0 ; y +#include + +#include "helper.h" + +/* + * Codelet to create a neutral element + */ +void init_cpu_func(void *descr[], void *cl_arg) +{ + (void)cl_arg; + long int *dot = (long int *)STARPU_VARIABLE_GET_PTR(descr[0]); + *dot = 0; + FPRINTF_MPI(stderr, "Init dot\n"); +} + +/* + * Codelet to perform the reduction of two elements + */ +void redux_cpu_func(void *descr[], void *cl_arg) +{ + (void)cl_arg; + long int *dota = (long int *)STARPU_VARIABLE_GET_PTR(descr[0]); + long int *dotb = (long int *)STARPU_VARIABLE_GET_PTR(descr[1]); + + *dota = *dota + *dotb; + FPRINTF_MPI(stderr, "Calling redux %ld=%ld+%ld\n", *dota, *dota-*dotb, *dotb); +} + +/* + * Dot product codelet + */ +void dot_cpu_func(void *descr[], void *cl_arg) +{ + (void)cl_arg; + long int *local_x = (long int *)STARPU_VECTOR_GET_PTR(descr[0]); + unsigned n = STARPU_VECTOR_GET_NX(descr[0]); + + long int *dot = (long int *)STARPU_VARIABLE_GET_PTR(descr[1]); + + //FPRINTF_MPI(stderr, "Before dot=%ld (adding %d elements...)\n", *dot, n); + unsigned i; + for (i = 0; i < n; i++) + { + //FPRINTF_MPI(stderr, "Adding %ld\n", local_x[i]); + *dot += local_x[i]; + } + //FPRINTF_MPI(stderr, "After dot=%ld\n", *dot); +} + +/* + * Display codelet + */ +void display_cpu_func(void *descr[], void *cl_arg) +{ + (void)cl_arg; + long int *local_x = (long int *)STARPU_VARIABLE_GET_PTR(descr[0]); + + FPRINTF_MPI(stderr, "Local=%ld\n", *local_x); +} diff --git a/mpi/tests/mpi_redux.c b/mpi/tests/mpi_redux.c new file mode 100644 index 0000000..95a2db4 --- /dev/null +++ b/mpi/tests/mpi_redux.c @@ -0,0 +1,116 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* This test does a manual reduction: all ranks send a number to the rank 0, + * the rank 0 sums these numbers and sends back the result to all ranks. */ + +#include +#include "helper.h" + +static starpu_pthread_mutex_t mutex = STARPU_PTHREAD_MUTEX_INITIALIZER; +static starpu_pthread_cond_t cond = STARPU_PTHREAD_COND_INITIALIZER; + +void callback(void *arg) +{ + unsigned *received = arg; + + STARPU_PTHREAD_MUTEX_LOCK(&mutex); + *received = *received + 1; + FPRINTF_MPI(stderr, "received = %u\n", *received); + STARPU_PTHREAD_COND_SIGNAL(&cond); + STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); +} + +int main(int argc, char **argv) +{ + int ret, rank, size, sum; + int value=0; + starpu_data_handle_t *handles; + int mpi_init; + + MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); + + ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); + starpu_mpi_comm_size(MPI_COMM_WORLD, &size); + + sum = ((size-1) * (size) / 2); + + if (rank == 0) + { + int src; + int received = 1; + + handles = malloc(size * sizeof(starpu_data_handle_t)); + + for(src=1 ; src +#include "helper.h" + +/* Returns the MPI node number where data indexes index is */ +int my_distrib(int x, int nb_nodes) +{ + return x % nb_nodes; +} + +void cpu_codelet(void *descr[], void *_args) +{ + int *vector = (int *)STARPU_VECTOR_GET_PTR(descr[0]); + unsigned nx = STARPU_VECTOR_GET_NX(descr[0]); + unsigned i; + int rank; + + starpu_codelet_unpack_args(_args, &rank); + for (i = 0; i < nx; i++) + { + //fprintf(stderr,"rank %d v[%d] = %d\n", rank, i, vector[i]); + vector[i] *= rank+2; + } +} + +static struct starpu_codelet cl = +{ + .cpu_funcs = {cpu_codelet}, + .nbuffers = 1, + .modes = {STARPU_RW}, +#ifdef STARPU_SIMGRID + .model = &starpu_perfmodel_nop, +#endif +}; + +void scallback(void *arg) +{ + char *msg = arg; + FPRINTF_MPI(stderr, "Sending completed for <%s>\n", msg); +} + +void rcallback(void *arg) +{ + char *msg = arg; + FPRINTF_MPI(stderr, "Reception completed for <%s>\n", msg); +} + +int main(int argc, char **argv) +{ + int rank, nodes, ret, x; + int *vector = NULL; + starpu_data_handle_t *data_handles; + int size=10; + struct starpu_conf conf; + int mpi_init; + + MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); + + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + conf.ncpus = -1; + conf.nmpi_ms = -1; + conf.ntcpip_ms = -1; + + ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, &conf); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); + starpu_mpi_comm_size(MPI_COMM_WORLD, &nodes); + + if (starpu_cpu_worker_get_count() == 0) + { + if (rank == 0) + FPRINTF(stderr, "We need at least 1 CPU worker.\n"); + starpu_mpi_shutdown(); + if (!mpi_init) + MPI_Finalize(); + return rank == 0 ? STARPU_TEST_SKIPPED : 0; + } + + if (rank == 0) + { + /* Allocate the vector */ + starpu_malloc((void **)&vector, size * sizeof(int)); + for(x=0 ; x +#include "helper.h" + +void func_cpu(void *descr[], void *_args) +{ + int *x = (int *)STARPU_VARIABLE_GET_PTR(descr[0]); + int *y = (int *)STARPU_VARIABLE_GET_PTR(descr[1]); + + FPRINTF(stdout, "VALUES: %d %d\n", *x, *y); +} + +struct starpu_codelet mycodelet = +{ + .cpu_funcs = {func_cpu}, + .nbuffers = 2, + .modes = {STARPU_RW, STARPU_RW}, + .model = &starpu_perfmodel_nop, +}; + +int main(int argc, char **argv) +{ + int ret, i, x[2]; + starpu_data_handle_t data_handles[2]; + int barrier_ret; + int rank; + struct starpu_task *task; + struct starpu_mpi_task_exchange_params params; + struct starpu_data_descr descrs[2]; + struct starpu_conf conf; + + MPI_INIT_THREAD_real(&argc, &argv, MPI_THREAD_SERIALIZED); + + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + conf.ncpus = -1; + conf.nmpi_ms = -1; + conf.ntcpip_ms = -1; + + ret = starpu_mpi_init_conf(NULL, NULL, 0, MPI_COMM_WORLD, &conf); + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); + if (ret == -ENODEV) return rank==0?STARPU_TEST_SKIPPED:0; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + + if (starpu_cpu_worker_get_count() == 0) + { + // If there is no cpu to execute the codelet, mpi will block trying to do the post-execution communication + ret = -ENODEV; + FPRINTF_MPI(stderr, "No CPU is available\n"); + goto nodata; + } + + for(i=0 ; i<2 ; i++) + { + x[i] = rank*2 + (i+1); + starpu_variable_data_register(&data_handles[i], STARPU_MAIN_RAM, (uintptr_t)&x[i], sizeof(int)); + starpu_mpi_data_register(data_handles[i], i, i); + } + + task = starpu_task_create(); + task->cl = &mycodelet; + task->handles[0] = data_handles[0]; + task->handles[1] = data_handles[1]; + + starpu_mpi_task_exchange_data_before_execution(MPI_COMM_WORLD, task, descrs, ¶ms); + if (params.do_execute) + { + ret = starpu_task_submit(task); + if (ret == -ENODEV) + { + task->destroy = 0; + starpu_task_destroy(task); + goto enodev; + } + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + else + { + task->destroy = 0; + starpu_task_destroy(task); + } + + starpu_mpi_task_exchange_data_after_execution(MPI_COMM_WORLD, descrs, 2, params); + + starpu_task_wait_for_all(); + +enodev: + for(i=0; i<2; i++) + { + starpu_data_unregister(data_handles[i]); + } + +nodata: + barrier_ret = MPI_Barrier(MPI_COMM_WORLD); + STARPU_ASSERT(barrier_ret == MPI_SUCCESS); + starpu_mpi_shutdown(); + + MPI_Finalize(); + if (rank == 0) + return ret==-ENODEV?STARPU_TEST_SKIPPED:ret; + else + return 0; +} diff --git a/mpi/tests/mpi_test.c b/mpi/tests/mpi_test.c new file mode 100644 index 0000000..2e1f4c9 --- /dev/null +++ b/mpi/tests/mpi_test.c @@ -0,0 +1,99 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "helper.h" + +#ifdef STARPU_QUICK_CHECK +# define NITER 16 +#else +# define NITER 2048 +#endif + +#define SIZE 16 + +int main(int argc, char **argv) +{ + int ret, rank, size; + float *tab; + starpu_data_handle_t tab_handle; + int mpi_init; + + MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); + + ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); + starpu_mpi_comm_size(MPI_COMM_WORLD, &size); + + if (size%2 != 0) + { + if (rank == 0) + FPRINTF(stderr, "We need a even number of processes.\n"); + + starpu_mpi_shutdown(); + if (!mpi_init) + MPI_Finalize(); + return rank == 0 ? STARPU_TEST_SKIPPED : 0; + } + + tab = calloc(SIZE, sizeof(float)); + + starpu_vector_data_register(&tab_handle, STARPU_MAIN_RAM, (uintptr_t)tab, SIZE, sizeof(float)); + + int nloops = NITER; + int loop; + int other_rank = rank%2 == 0 ? rank+1 : rank-1; + + for (loop = 0; loop < nloops; loop++) + { + starpu_mpi_req req; + + if ((loop % 2) == (rank%2)) + { + ret = starpu_mpi_isend(tab_handle, &req, other_rank, loop, MPI_COMM_WORLD); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend"); + } + else + { + ret = starpu_mpi_irecv(tab_handle, &req, other_rank, loop, MPI_COMM_WORLD); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_irecv"); + } + + int finished = 0; + do + { + MPI_Status status; + ret = starpu_mpi_test(&req, &finished, &status); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_test"); +#ifdef STARPU_SIMGRID + starpu_sleep(0.001); +#endif + } + while (!finished); + } + + starpu_data_unregister(tab_handle); + free(tab); + + starpu_mpi_shutdown(); + + if (!mpi_init) + MPI_Finalize(); + + return 0; +} diff --git a/mpi/tests/multiple_send.c b/mpi/tests/multiple_send.c new file mode 100644 index 0000000..9462aae --- /dev/null +++ b/mpi/tests/multiple_send.c @@ -0,0 +1,116 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "helper.h" + +int main(int argc, char **argv) +{ + int ret, rank, size; + unsigned send[2] = {42, 11}; + unsigned recv[2] = {33, 33}; + starpu_mpi_req req[2]; + starpu_data_handle_t send_handle[2]; + starpu_data_handle_t recv_handle[2]; + struct starpu_conf conf; + int mpi_init; + + MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); + + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + conf.ncpus = -1; + conf.nmpi_ms = -1; + conf.ntcpip_ms = -1; + + ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, &conf); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); + starpu_mpi_comm_size(MPI_COMM_WORLD, &size); + + if (size < 2) + { + if (rank == 0) + FPRINTF(stderr, "We need at least 2 processes.\n"); + + starpu_mpi_shutdown(); + if (!mpi_init) + MPI_Finalize(); + return rank == 0 ? STARPU_TEST_SKIPPED : 0; + } + + starpu_variable_data_register(&send_handle[0], STARPU_MAIN_RAM, (uintptr_t)&send[0], sizeof(unsigned)); + starpu_variable_data_register(&send_handle[1], STARPU_MAIN_RAM, (uintptr_t)&send[1], sizeof(unsigned)); + starpu_variable_data_register(&recv_handle[0], STARPU_MAIN_RAM, (uintptr_t)&recv[0], sizeof(unsigned)); + starpu_variable_data_register(&recv_handle[1], STARPU_MAIN_RAM, (uintptr_t)&recv[1], sizeof(unsigned)); + + if (rank == 0) + { + ret = starpu_mpi_isend(send_handle[0], &(req[0]), 1, 12, MPI_COMM_WORLD); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend"); + ret = starpu_mpi_isend(send_handle[1], &(req[1]), 1, 13, MPI_COMM_WORLD); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend"); + } + else if (rank == 1) + { + ret = starpu_mpi_irecv(recv_handle[0], &(req[0]), 0, 12, MPI_COMM_WORLD); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_irecv"); + ret = starpu_mpi_irecv(recv_handle[1], &(req[1]), 0, 13, MPI_COMM_WORLD); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_irecv"); + } + + if (rank == 0 || rank == 1) + { + int nb_req=2; + while (nb_req) + { + int r=0; + for(r=0 ; r<2 ; r++) + { + if (req[r]) + { + int finished = 0; + MPI_Status status; + ret = starpu_mpi_test(&req[r], &finished, &status); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_test"); + STARPU_ASSERT(finished != -1); + if (finished) + { + FPRINTF(stderr, "[%d] Request %d finished\n", rank, r); + req[r] = NULL; + nb_req--; + } + } + } +#ifdef STARPU_SIMGRID + starpu_sleep(0.001); +#endif + } + } + FPRINTF(stderr, "[%d] All requests finished\n", rank); + + starpu_data_unregister(send_handle[0]); + starpu_data_unregister(send_handle[1]); + starpu_data_unregister(recv_handle[0]); + starpu_data_unregister(recv_handle[1]); + + starpu_mpi_shutdown(); + if (!mpi_init) + MPI_Finalize(); + + return 0; +} diff --git a/mpi/tests/ndim_interface.c b/mpi/tests/ndim_interface.c new file mode 100644 index 0000000..c0909c6 --- /dev/null +++ b/mpi/tests/ndim_interface.c @@ -0,0 +1,180 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include "helper.h" + +#ifdef STARPU_QUICK_CHECK +# define NITER 16 +#else +# define NITER 2048 +#endif + +#define BIGSIZE 32 +#define SIZE 16 + +int main(int argc, char **argv) +{ + int ret, rank, size; + int mpi_init; + + MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); + + ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); + starpu_mpi_comm_size(MPI_COMM_WORLD, &size); + + if (size < 2) + { + if (rank == 0) + FPRINTF(stderr, "We need at least 2 processes.\n"); + + starpu_mpi_shutdown(); + if (!mpi_init) + MPI_Finalize(); + return rank == 0 ? STARPU_TEST_SKIPPED : 0; + } + + /* Node 0 will allocate a big 4-dim array and only register an inner part of + * it as the 4-dim array, Node 1 will allocate a 4-dim array of small size and + * register it directly. Node 0 and 1 will then exchange the content of + * their arrays. */ + + int *arr4d = NULL; + starpu_data_handle_t arr4d_handle = NULL; + + if (rank == 0) + { + arr4d = calloc(BIGSIZE*BIGSIZE*BIGSIZE*BIGSIZE, sizeof(int)); + assert(arr4d); + + /* fill the inner 4-dim array */ + unsigned i, j, k, l; + int n = 0; + for (l = 0; l < SIZE; l++) + { + for (k = 0; k < SIZE; k++) + { + for (j = 0; j < SIZE; j++) + { + for (i = 0; i < SIZE; i++) + { + arr4d[i + j*BIGSIZE + k*BIGSIZE*BIGSIZE + l*BIGSIZE*BIGSIZE*BIGSIZE] = n++; + } + } + } + } + + unsigned nn[4] = {SIZE, SIZE, SIZE, SIZE}; + unsigned ldn[4] = {1, BIGSIZE, BIGSIZE*BIGSIZE, BIGSIZE*BIGSIZE*BIGSIZE}; + + starpu_ndim_data_register(&arr4d_handle, STARPU_MAIN_RAM, (uintptr_t)arr4d, ldn, nn, 4, sizeof(int)); + } + else if (rank == 1) + { + arr4d = calloc(SIZE*SIZE*SIZE*SIZE, sizeof(int)); + assert(arr4d); + + unsigned nn[4] = {SIZE, SIZE, SIZE, SIZE}; + unsigned ldn[4] = {1, SIZE, SIZE*SIZE, SIZE*SIZE*SIZE}; + + starpu_ndim_data_register(&arr4d_handle, STARPU_MAIN_RAM, (uintptr_t)arr4d, ldn, nn, 4, sizeof(int)); + } + + if (rank == 0) + { + ret = starpu_mpi_send(arr4d_handle, 1, 0x42, MPI_COMM_WORLD); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send"); + + MPI_Status status; + ret = starpu_mpi_recv(arr4d_handle, 1, 0x1337, MPI_COMM_WORLD, &status); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); + + /* check the content of the 4-dim array */ + ret = starpu_data_acquire(arr4d_handle, STARPU_R); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_acquire"); + + int m = 10; + unsigned i, j, k, l; + for (l = 0; l < SIZE; l++) + { + for (k = 0; k < SIZE; k++) + { + for (j = 0; j < SIZE; j++) + { + for (i = 0; i < SIZE; i++) + { + assert(arr4d[i + j*BIGSIZE + k*BIGSIZE*BIGSIZE + l*BIGSIZE*BIGSIZE*BIGSIZE] == m); + m++; + } + } + } + } + + starpu_data_release(arr4d_handle); + } + else if (rank == 1) + { + MPI_Status status; + ret = starpu_mpi_recv(arr4d_handle, 0, 0x42, MPI_COMM_WORLD, &status); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); + + /* check the content of the 4-dim array and modify it */ + ret = starpu_data_acquire(arr4d_handle, STARPU_RW); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_acquire"); + + int n = 0, m = 10; + unsigned i, j, k, l; + for (l = 0; l < SIZE; l++) + { + for (k = 0; k < SIZE; k++) + { + for (j = 0; j < SIZE; j++) + { + for (i = 0; i < SIZE; i++) + { + assert(arr4d[i + j*SIZE + k*SIZE*SIZE + l*SIZE*SIZE*SIZE] == n); + n++; + arr4d[i + j*SIZE + k*SIZE*SIZE + l*SIZE*SIZE*SIZE] = m++; + } + } + } + } + + starpu_data_release(arr4d_handle); + + ret = starpu_mpi_send(arr4d_handle, 0, 0x1337, MPI_COMM_WORLD); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send"); + } + + FPRINTF(stdout, "Rank %d is done\n", rank); + fflush(stdout); + + if (rank == 0 || rank == 1) + { + starpu_data_unregister(arr4d_handle); + free(arr4d); + } + starpu_mpi_shutdown(); + + if (!mpi_init) + MPI_Finalize(); + + return 0; +} diff --git a/mpi/tests/nothing.c b/mpi/tests/nothing.c new file mode 100644 index 0000000..ddb626c --- /dev/null +++ b/mpi/tests/nothing.c @@ -0,0 +1,70 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * This program does nothing. It waits until it is interrupted by the user. + * Useful to check binding while StarPU is running. + */ + +#include +#include +#include "helper.h" + + +int main(int argc, char **argv) +{ + int ret, rank, worldsize; + int mpi_init; + + MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); + ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + + starpu_pause(); // our program will only wait, no need to stress cores by polling workers + + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); + starpu_mpi_comm_size(MPI_COMM_WORLD, &worldsize); + + starpu_mpi_barrier(MPI_COMM_WORLD); + + char hostname[65]; + gethostname(hostname, sizeof(hostname)); + + printf("[rank %d on %s] ready to wait !\n", rank, hostname); + + if (rank == 0) + { + printf("You can now check if thread binding is correct, for instance.\n"); + } + + fflush(stdout); + + while(1) + { + sleep(1); + } + + // TODO: maybe better handle the user interruption ? + + + starpu_resume(); + + starpu_mpi_shutdown(); + if (!mpi_init) + MPI_Finalize(); + + return 0; +} diff --git a/mpi/tests/pingpong.c b/mpi/tests/pingpong.c new file mode 100644 index 0000000..63e01b6 --- /dev/null +++ b/mpi/tests/pingpong.c @@ -0,0 +1,198 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include "helper.h" + +#ifdef STARPU_QUICK_CHECK +# define DEFAULT_NITER 16 +#else +# define DEFAULT_NITER 2048 +#endif + +#define DEFAULT_DATA_SIZE 16 +#define DEFAULT_SLEEP_TIME 0 +#define DEFAULT_METHOD 0 // ping pongs + +void usage() +{ + fprintf(stderr, "-n [number of iteration] (default: %d)\n", DEFAULT_NITER); + fprintf(stderr, "-s [number of floats to exchange] (default: %d)\n", DEFAULT_DATA_SIZE); + fprintf(stderr, "-S [time in millisecond of sleep between exchange, less than 1 second] (default: %d)\n", DEFAULT_SLEEP_TIME); + fprintf(stderr, "-b : broadcasts instead of simple pair-wise ping-pongs (default: %s)\n", DEFAULT_METHOD ? "broadcast" : "ping pongs"); +} + +float *tab; +starpu_data_handle_t tab_handle; + +int main(int argc, char **argv) +{ + int ret, rank, size; + int mpi_init; + int i; + + int niter = DEFAULT_NITER; + int data_size = DEFAULT_DATA_SIZE; + int sleep_time = DEFAULT_SLEEP_TIME; + int method = DEFAULT_METHOD; + + for (i = 1; i < argc; i++) + { + if (strcmp(argv[i], "-n") == 0) + { + niter = atoi(argv[i+1]); + if (niter <= 0) + { + fprintf(stderr, "%s: illegal argument %s\n", argv[0], argv[i]); + usage(); + exit(0); + } + i++; + } + else if (strcmp(argv[i], "-s") == 0) + { + data_size = atoi(argv[i+1]); + if (data_size <= 0) + { + fprintf(stderr, "%s: illegal argument %s\n", argv[0], argv[i]); + usage(); + exit(0); + } + i++; + } + else if(strcmp(argv[i], "-S") == 0) + { + sleep_time = atoi(argv[i+1]); + if (sleep_time <= 0 || sleep_time >= 1000) + { + fprintf(stderr, "%s: illegal argument %s\n", argv[0], argv[i]); + usage(); + exit(0); + } + i++; + } + else if(strcmp(argv[i], "-b") == 0) + { + method = 1; // broadcasts + } + else + { + fprintf(stderr, "%s: illegal argument %s\n", argv[0], argv[i]); + usage(); + exit(0); + } + } + + + MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); + + ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); + starpu_mpi_comm_size(MPI_COMM_WORLD, &size); + + if (size%2 != 0) + { + if (rank == 0) + FPRINTF(stderr, "We need a even number of processes.\n"); + + starpu_mpi_shutdown(); + if (!mpi_init) + MPI_Finalize(); + return rank == 0 ? STARPU_TEST_SKIPPED : 0; + } + + if (rank == 0) + { + FPRINTF(stdout, "Number of iterations: %d\n", niter); + FPRINTF(stdout, "Number of floats to exchange: %d\n", data_size); + FPRINTF(stdout, "Sleep time between exchanges: %d milliseconds\n", sleep_time); + if (method == 0) + FPRINTF(stdout, "Method: ping pongs\n"); + else + FPRINTF(stdout, "Method: broadcasts\n"); + } + + tab = calloc(data_size, sizeof(float)); + + starpu_vector_data_register(&tab_handle, STARPU_MAIN_RAM, (uintptr_t)tab, data_size, sizeof(float)); + + int loop; + int other_rank = rank%2 == 0 ? rank+1 : rank-1; + int sender; + int r; + + if (method == 0) // ping pongs + { + for (loop = 0; loop < niter; loop++) + { + if ((loop % 2) == (rank%2)) + { + //FPRINTF_MPI(stderr, "Sending to %d\n", other_rank); + ret = starpu_mpi_send(tab_handle, other_rank, loop, MPI_COMM_WORLD); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send"); + } + else + { + MPI_Status status; + //FPRINTF_MPI(stderr, "Receiving from %d\n", other_rank); + ret = starpu_mpi_recv(tab_handle, other_rank, loop, MPI_COMM_WORLD, &status); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); + } + + starpu_sleep(sleep_time / 1000); + } + } + else // broadcasts + { + for (loop = 0; loop < niter; loop++) + { + sender = loop % size; + if (sender == rank) + { + for (r = 0; r < size; r++) + { + if (r != rank) + { + ret = starpu_mpi_send(tab_handle, r, (r * niter) + loop, MPI_COMM_WORLD); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send"); + starpu_sleep(sleep_time / 1000); + } + } + } + else + { + MPI_Status status; + ret = starpu_mpi_recv(tab_handle, sender, (rank * niter) + loop, MPI_COMM_WORLD, &status); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); + + for (r = 0; r < (size-1); r++) + starpu_sleep(sleep_time / 1000); + } + } + } + + starpu_data_unregister(tab_handle); + free(tab); + + starpu_mpi_shutdown(); + if (!mpi_init) + MPI_Finalize(); + + return 0; +} diff --git a/mpi/tests/policy_register.c b/mpi/tests/policy_register.c new file mode 100644 index 0000000..eb77fcc --- /dev/null +++ b/mpi/tests/policy_register.c @@ -0,0 +1,135 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "helper.h" + +void func_cpu(void *descr[], void *_args) +{ + (void)descr; + (void)_args; +} + +struct starpu_codelet mycodelet = +{ + .cpu_funcs = {func_cpu}, + .nbuffers = 2, + .modes = {STARPU_W, STARPU_W}, + .model = &starpu_perfmodel_nop, +}; + +int starpu_mpi_select_node_my_policy_0(int me, int nb_nodes, struct starpu_data_descr *descr, int nb_data) +{ + (void) me; + (void) nb_nodes; + (void) nb_data; + + starpu_data_handle_t data = descr[0].handle; + return starpu_data_get_rank(data); +} + +int starpu_mpi_select_node_my_policy_1(int me, int nb_nodes, struct starpu_data_descr *descr, int nb_data) +{ + (void) me; + (void) nb_nodes; + (void) nb_data; + + starpu_data_handle_t data = descr[1].handle; + return starpu_data_get_rank(data); +} + +int main(int argc, char **argv) +{ + int ret; + int rank, size; + int policy; + struct starpu_task *task; + starpu_data_handle_t handles[2]; + int mpi_init; + + MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); + + ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); + starpu_mpi_comm_size(MPI_COMM_WORLD, &size); + + if (size < 2) + { + if (rank == 0) + FPRINTF(stderr, "We need at least 2 processes.\n"); + + starpu_mpi_shutdown(); + if (!mpi_init) + MPI_Finalize(); + return rank == 0 ? STARPU_TEST_SKIPPED : 0; + } + + if (rank == 0) + starpu_variable_data_register(&handles[0], STARPU_MAIN_RAM, (uintptr_t)&policy, sizeof(int)); + else + starpu_variable_data_register(&handles[0], -1, (uintptr_t)NULL, sizeof(int)); + starpu_mpi_data_register(handles[0], 10, 0); + if (rank == 1) + starpu_variable_data_register(&handles[1], STARPU_MAIN_RAM, (uintptr_t)&policy, sizeof(int)); + else + starpu_variable_data_register(&handles[1], -1, (uintptr_t)NULL, sizeof(int)); + starpu_mpi_data_register(handles[1], 20, 1); + + policy = starpu_mpi_node_selection_register_policy(starpu_mpi_select_node_my_policy_1); + starpu_mpi_node_selection_set_current_policy(policy); + + task = starpu_mpi_task_build(MPI_COMM_WORLD, &mycodelet, + STARPU_W, handles[0], STARPU_W, handles[1], + 0); + FPRINTF_MPI(stderr, "Task %p\n", task); + if (rank == 1) + { + STARPU_ASSERT_MSG(task, "Task should be executed by rank 1\n"); + task->destroy = 0; + starpu_task_destroy(task); + } + else + { + STARPU_ASSERT_MSG(task == NULL, "Task should be executed by rank 1\n"); + } + + policy = starpu_mpi_node_selection_register_policy(starpu_mpi_select_node_my_policy_0); + task = starpu_mpi_task_build(MPI_COMM_WORLD, &mycodelet, + STARPU_W, handles[0], STARPU_W, handles[1], + STARPU_NODE_SELECTION_POLICY, policy, + 0); + FPRINTF_MPI(stderr, "Task %p\n", task); + if (rank == 0) + { + STARPU_ASSERT_MSG(task, "Task should be executed by rank 0\n"); + task->destroy = 0; + starpu_task_destroy(task); + } + else + { + STARPU_ASSERT_MSG(task == NULL, "Task should be executed by rank 0\n"); + } + + starpu_data_unregister(handles[0]); + starpu_data_unregister(handles[1]); + starpu_mpi_shutdown(); + if (!mpi_init) + MPI_Finalize(); + + return 0; +} diff --git a/mpi/tests/policy_register_many.c b/mpi/tests/policy_register_many.c new file mode 100644 index 0000000..328875c --- /dev/null +++ b/mpi/tests/policy_register_many.c @@ -0,0 +1,63 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include "helper.h" + +int starpu_mpi_select_node_my_policy(int me, int nb_nodes, struct starpu_data_descr *descr, int nb_data) +{ + (void) me; + (void) nb_nodes; + (void) descr; + (void) nb_data; + return 0; +} + +int main(int argc, char **argv) +{ + int ret; + int i, policy; + struct starpu_conf conf; + int mpi_init; + + MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); + + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + conf.ncpus = -1; + conf.nmpi_ms = -1; + conf.ntcpip_ms = -1; + + ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, &conf); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + + for(i=0 ; i<_STARPU_MPI_NODE_SELECTION_MAX_POLICY-1 ; i++) + { + policy = starpu_mpi_node_selection_register_policy(starpu_mpi_select_node_my_policy); + FPRINTF_MPI(stderr, "New policy %d\n", policy); + } + starpu_mpi_node_selection_unregister_policy(_STARPU_MPI_NODE_SELECTION_MAX_POLICY-2); + policy = starpu_mpi_node_selection_register_policy(starpu_mpi_select_node_my_policy); + FPRINTF_MPI(stderr, "New policy %d\n", policy); + STARPU_ASSERT(policy==_STARPU_MPI_NODE_SELECTION_MAX_POLICY-2); + + starpu_mpi_shutdown(); + if (!mpi_init) + MPI_Finalize(); + + return 0; +} diff --git a/mpi/tests/policy_register_toomany.c b/mpi/tests/policy_register_toomany.c new file mode 100644 index 0000000..da3ca5a --- /dev/null +++ b/mpi/tests/policy_register_toomany.c @@ -0,0 +1,66 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include "helper.h" + +int starpu_mpi_select_node_my_policy(int me, int nb_nodes, struct starpu_data_descr *descr, int nb_data) +{ + (void) me; + (void) nb_nodes; + (void) descr; + (void) nb_data; + return 0; +} + +int main(int argc, char **argv) +{ + int ret; + int i; + struct starpu_conf conf; + int mpi_init; + +#ifdef STARPU_HAVE_VALGRIND_H + if (RUNNING_ON_VALGRIND) + return STARPU_TEST_SKIPPED; +#endif + + disable_coredump(); + + MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); + + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + conf.ncpus = -1; + conf.nmpi_ms = -1; + conf.ntcpip_ms = -1; + + ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, &conf); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + + for(i=0 ; i<_STARPU_MPI_NODE_SELECTION_MAX_POLICY+1 ; i++) + { + int policy = starpu_mpi_node_selection_register_policy(starpu_mpi_select_node_my_policy); + FPRINTF_MPI(stderr, "New policy %d\n", policy); + } + + starpu_mpi_shutdown(); + if (!mpi_init) + MPI_Finalize(); + + return 0; +} diff --git a/mpi/tests/policy_selection.c b/mpi/tests/policy_selection.c new file mode 100644 index 0000000..9de0ed6 --- /dev/null +++ b/mpi/tests/policy_selection.c @@ -0,0 +1,183 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "helper.h" + +void func_cpu(void *descr[], void *_args) +{ + (void)descr; + (void)_args; +} + +struct starpu_codelet mycodelet_2 = +{ + .cpu_funcs = {func_cpu}, + .nbuffers = 2, + .modes = {STARPU_W, STARPU_W}, + .model = &starpu_perfmodel_nop, +}; +struct starpu_codelet mycodelet_3 = +{ + .cpu_funcs = {func_cpu}, + .nbuffers = 3, + .modes = {STARPU_R, STARPU_W, STARPU_W}, + .model = &starpu_perfmodel_nop, +}; + +int main(int argc, char **argv) +{ + int ret; + int rank, size; + int policy = 12; + struct starpu_task *task; + starpu_data_handle_t handles[3]; + int mpi_init; + struct starpu_conf conf; + + MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); + (void)mpi_init; + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + conf.ncpus = -1; + conf.nmpi_ms = -1; + conf.ntcpip_ms = -1; + + ret = starpu_mpi_init_conf(NULL, NULL, 0, MPI_COMM_WORLD, &conf); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); + starpu_mpi_comm_size(MPI_COMM_WORLD, &size); + + if (size < 3) + { + if (rank == 0) + FPRINTF(stderr, "We need at least 3 processes.\n"); + + starpu_mpi_shutdown(); + if (!mpi_init) + MPI_Finalize(); + return rank == 0 ? STARPU_TEST_SKIPPED : 0; + } + + if (rank == 0) + { + starpu_variable_data_register(&handles[0], STARPU_MAIN_RAM, (uintptr_t)&policy, sizeof(int)); + } + else + { + starpu_variable_data_register(&handles[0], -1, (uintptr_t)NULL, sizeof(int)); + } + starpu_mpi_data_register(handles[0], 10, 0); + + if (rank == 1) + { + starpu_variable_data_register(&handles[1], STARPU_MAIN_RAM, (uintptr_t)&policy, sizeof(int)); + } + else + { + starpu_variable_data_register(&handles[1], -1, (uintptr_t)NULL, sizeof(int)); + } + starpu_mpi_data_register(handles[1], 20, 1); + + if (rank == 2) + { + starpu_variable_data_register(&handles[2], STARPU_MAIN_RAM, (uintptr_t)&policy, sizeof(int)); + } + else + { + starpu_variable_data_register(&handles[2], -1, (uintptr_t)NULL, sizeof(int)); + } + starpu_mpi_data_register(handles[2], 30, 2); + + // Force the execution on node 1 + task = starpu_mpi_task_build(MPI_COMM_WORLD, &mycodelet_3, + STARPU_R, handles[2], + STARPU_W, handles[0], STARPU_W, handles[1], + STARPU_EXECUTE_ON_NODE, 1, + 0); + FPRINTF_MPI(stderr, "Task %p\n", task); + if (rank == 1) + { + STARPU_ASSERT_MSG(task, "Task should be executed by rank 1"); + task->destroy = 0; + starpu_task_destroy(task); + } + else + { + STARPU_ASSERT_MSG(task == NULL, "Task should be executed by rank 1"); + } + + // Force the execution on node 1 + task = starpu_mpi_task_build(MPI_COMM_WORLD, &mycodelet_2, + STARPU_W, handles[0], STARPU_W, handles[1], + STARPU_EXECUTE_ON_NODE, 1, + 0); + FPRINTF_MPI(stderr, "Task %p\n", task); + if (rank == 1) + { + STARPU_ASSERT_MSG(task, "Task should be executed by rank 1"); + task->destroy = 0; + starpu_task_destroy(task); + } + else + { + STARPU_ASSERT_MSG(task == NULL, "Task should be executed by rank 1"); + } + + // Let StarPU choose the node + task = starpu_mpi_task_build(MPI_COMM_WORLD, &mycodelet_3, + STARPU_R, handles[2], + STARPU_W, handles[0], STARPU_W, handles[1], + 0); + FPRINTF_MPI(stderr, "Task %p\n", task); + if (rank == 0) + { + STARPU_ASSERT_MSG(task, "Task should be executed by rank 0"); + task->destroy = 0; + starpu_task_destroy(task); + } + else + { + STARPU_ASSERT_MSG(task == NULL, "Task should be executed by rank 2"); + } + + // Let StarPU choose the node + task = starpu_mpi_task_build(MPI_COMM_WORLD, &mycodelet_2, + STARPU_W, handles[0], STARPU_W, handles[1], + 0); + FPRINTF_MPI(stderr, "Task %p\n", task); + if (rank == 0) + { + STARPU_ASSERT_MSG(task, "Task should be executed by rank 0"); + task->destroy = 0; + starpu_task_destroy(task); + } + else + { + STARPU_ASSERT_MSG(task == NULL, "Task should be executed by rank 0"); + } + + starpu_data_unregister(handles[0]); + starpu_data_unregister(handles[1]); + starpu_data_unregister(handles[2]); + + starpu_mpi_shutdown(); + if (!mpi_init) + MPI_Finalize(); + + return 0; +} diff --git a/mpi/tests/policy_selection2.c b/mpi/tests/policy_selection2.c new file mode 100644 index 0000000..ca220a5 --- /dev/null +++ b/mpi/tests/policy_selection2.c @@ -0,0 +1,132 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "helper.h" + +void func_cpu(void *descr[], void *_args) +{ + (void)_args; + + int *data0 = (int *)STARPU_VARIABLE_GET_PTR(descr[0]); + int *data1 = (int *)STARPU_VARIABLE_GET_PTR(descr[1]); + int *data2 = (int *)STARPU_VARIABLE_GET_PTR(descr[2]); + *data1 = *data0; + *data2 = *data0; +} + +struct starpu_codelet mycodelet = +{ + .cpu_funcs = {func_cpu}, + .nbuffers = 3, + .modes = {STARPU_R, STARPU_W, STARPU_W}, + .model = &starpu_perfmodel_nop, +}; + +int main(int argc, char **argv) +{ + int ret; + int i; + int rank, size; + int data[3]; + starpu_data_handle_t handles[3]; + int mpi_init; + struct starpu_conf conf; + + MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); + (void)mpi_init; + + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + conf.ncpus = -1; + conf.nmpi_ms = -1; + conf.ntcpip_ms = -1; + + ret = starpu_mpi_init_conf(NULL, NULL, 0, MPI_COMM_WORLD, &conf); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); + starpu_mpi_comm_size(MPI_COMM_WORLD, &size); + + if ((size < 3) || (starpu_cpu_worker_get_count() == 0)) + { + if (rank == 0) + { + if (size < 3) + FPRINTF(stderr, "We need at least 3 processes.\n"); + else + FPRINTF(stderr, "We need at least 1 CPU worker.\n"); + } + starpu_mpi_shutdown(); + if (!mpi_init) + MPI_Finalize(); + return rank == 0 ? STARPU_TEST_SKIPPED : 0; + } + + data[0] = 42; + starpu_variable_data_register(&handles[0], STARPU_MAIN_RAM, (uintptr_t)&data[0], sizeof(int)); + starpu_mpi_data_register(handles[0], 10, 0); + + data[1] = 42; + starpu_variable_data_register(&handles[1], STARPU_MAIN_RAM, (uintptr_t)&data[1], sizeof(int)); + starpu_mpi_data_register(handles[1], 20, 1); + + data[2] = 12; + starpu_variable_data_register(&handles[2], STARPU_MAIN_RAM, (uintptr_t)&data[2], sizeof(int)); + starpu_mpi_data_register(handles[2], 30, 2); + + starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet, + STARPU_R, handles[2], STARPU_W, handles[0], STARPU_W, handles[1], + 0); + for(i=0 ; i<2 ; i++) starpu_data_acquire(handles[i], STARPU_R); + FPRINTF_MPI(stderr, "data[%d,%d,%d] = %d,%d,%d\n", 0, 1, 2, data[0], data[1], data[2]); + for(i=0 ; i<2 ; i++) starpu_data_release(handles[i]); +#ifndef STARPU_SIMGRID + if (rank == 0) + { + STARPU_ASSERT_MSG(data[0] == data[2] && data[1] == data[2], "Computation incorrect. data[%d] (%d) != data[%d] (%d) && data[%d] (%d) != data[%d] (%d)\n", + 0, data[0], 2, data[2], 1, data[1], 2, data[2]); + } +#endif + + for(i=0 ; i<2 ; i++) starpu_data_acquire(handles[i], STARPU_W); + for(i=0 ; i<2 ; i++) data[i] = 12; + for(i=0 ; i<2 ; i++) starpu_data_release(handles[i]); + + // Let StarPU choose the node + starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet, + STARPU_R, handles[2], STARPU_W, handles[0], STARPU_W, handles[1], + STARPU_EXECUTE_ON_NODE, 1, + 0); + for(i=0 ; i<2 ; i++) starpu_data_acquire(handles[i], STARPU_R); + FPRINTF_MPI(stderr, "data[%d,%d,%d] = %d,%d,%d\n", 0, 1, 2, data[0], data[1], data[2]); + for(i=0 ; i<2 ; i++) starpu_data_release(handles[i]); +#ifndef STARPU_SIMGRID + if (rank == 1) + { + STARPU_ASSERT_MSG(data[0] == data[2] && data[1] == data[2], "Computation incorrect. data[%d] (%d) != data[%d] (%d) && data[%d] (%d) != data[%d] (%d)\n", + 0, data[0], 2, data[2], 1, data[1], 2, data[2]); + } +#endif + + for(i=0 ; i<3 ; i++) starpu_data_unregister(handles[i]); + + starpu_mpi_shutdown(); + if (!mpi_init) + MPI_Finalize(); + + return 0; +} diff --git a/mpi/tests/policy_unregister.c b/mpi/tests/policy_unregister.c new file mode 100644 index 0000000..644a143 --- /dev/null +++ b/mpi/tests/policy_unregister.c @@ -0,0 +1,51 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "helper.h" + +int main(int argc, char **argv) +{ + int ret; + struct starpu_conf conf; + int mpi_init; + +#ifdef STARPU_HAVE_VALGRIND_H + if (RUNNING_ON_VALGRIND) + return STARPU_TEST_SKIPPED; +#endif + + disable_coredump(); + + MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); + + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + conf.ncpus = -1; + conf.nmpi_ms = -1; + conf.ntcpip_ms = -1; + + ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, &conf); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + + starpu_mpi_node_selection_unregister_policy(STARPU_MPI_NODE_SELECTION_MOST_R_DATA); + + starpu_mpi_shutdown(); + if (!mpi_init) + MPI_Finalize(); + + return 0; +} diff --git a/mpi/tests/ring.c b/mpi/tests/ring.c new file mode 100644 index 0000000..15dccf7 --- /dev/null +++ b/mpi/tests/ring.c @@ -0,0 +1,155 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "helper.h" + +#ifdef STARPU_QUICK_CHECK +# define NITER 32 +#elif !defined(STARPU_LONG_CHECK) +# define NITER 256 +#else +# define NITER 2048 +#endif + +#ifdef STARPU_USE_CUDA +extern void increment_cuda(void *descr[], void *_args); +#endif + +#ifdef STARPU_USE_HIP +extern void increment_hip(void *descr[], void *_args); +#endif + +void increment_cpu(void *descr[], void *_args) +{ + (void)_args; + int *tokenptr = (int *)STARPU_VECTOR_GET_PTR(descr[0]); + (*tokenptr)++; +} + +static struct starpu_codelet increment_cl = +{ +#ifdef STARPU_USE_CUDA + .cuda_funcs = {increment_cuda}, +#endif +#ifdef STARPU_USE_HIP + .hip_funcs = {increment_hip}, +#endif + .cpu_funcs = {increment_cpu}, + .nbuffers = 1, + .modes = {STARPU_RW}, + .model = &starpu_perfmodel_nop, +}; + +void increment_token(starpu_data_handle_t token_handle) +{ + struct starpu_task *task = starpu_task_create(); + + task->cl = &increment_cl; + task->handles[0] = token_handle; + task->synchronous = 1; + + int ret = starpu_task_submit(task); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); +} + +int main(int argc, char **argv) +{ + int ret, rank, size; + int mpi_init; + int token = 42; + starpu_data_handle_t token_handle; + + MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); + + ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); + starpu_mpi_comm_size(MPI_COMM_WORLD, &size); + + if (size < 2 || (starpu_cpu_worker_get_count() + starpu_cuda_worker_get_count() == 0) || (starpu_cpu_worker_get_count() + starpu_hip_worker_get_count() == 0)) + { + if (rank == 0) + { + if (size < 2) + FPRINTF(stderr, "We need at least 2 processes.\n"); + else + FPRINTF(stderr, "We need at least 1 CPU or CUDA or HIP worker.\n"); + } + starpu_mpi_shutdown(); + if (!mpi_init) + MPI_Finalize(); + return rank == 0 ? STARPU_TEST_SKIPPED : 0; + } + + starpu_vector_data_register(&token_handle, STARPU_MAIN_RAM, (uintptr_t)&token, 1, sizeof(token)); + + int nloops = NITER; + int loop; + + int last_loop = nloops - 1; + int last_rank = size - 1; + + for (loop = 0; loop < nloops; loop++) + { + starpu_mpi_tag_t tag = ((starpu_mpi_tag_t) loop)*size + rank; + + if (loop == 0 && rank == 0) + { + starpu_data_acquire(token_handle, STARPU_W); + token = 0; + FPRINTF(stdout, "Start with token value %d\n", token); + starpu_data_release(token_handle); + } + else + { + MPI_Status status; + ret = starpu_mpi_recv(token_handle, (rank+size-1)%size, tag, MPI_COMM_WORLD, &status); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); + } + + increment_token(token_handle); + + if (loop == last_loop && rank == last_rank) + { + starpu_data_acquire(token_handle, STARPU_R); + FPRINTF(stdout, "Finished : token value %d\n", token); + starpu_data_release(token_handle); + } + else + { + ret = starpu_mpi_send(token_handle, (rank+1)%size, tag+1, MPI_COMM_WORLD); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send"); + } + } + + starpu_data_unregister(token_handle); + starpu_mpi_shutdown(); + + if (!mpi_init) + MPI_Finalize(); + +#ifndef STARPU_SIMGRID + if (rank == last_rank) + { + FPRINTF(stderr, "[%d] token = %d == %d * %d ?\n", rank, token, nloops, size); + STARPU_ASSERT(token == nloops*size); + } +#endif + + return 0; +} diff --git a/mpi/tests/ring_async.c b/mpi/tests/ring_async.c new file mode 100644 index 0000000..323d83d --- /dev/null +++ b/mpi/tests/ring_async.c @@ -0,0 +1,162 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "helper.h" + +#ifdef STARPU_QUICK_CHECK +# define NITER 32 +#elif !defined(STARPU_LONG_CHECK) +# define NITER 256 +#else +# define NITER 2048 +#endif + +#ifdef STARPU_USE_CUDA +extern void increment_cuda(void *descr[], void *_args); +#endif + +#ifdef STARPU_USE_HIP +extern void increment_hip(void *descr[], void *_args); +#endif + +void increment_cpu(void *descr[], void *_args) +{ + (void)_args; + int *tokenptr = (int *)STARPU_VECTOR_GET_PTR(descr[0]); + (*tokenptr)++; +} + +static struct starpu_codelet increment_cl = +{ +#ifdef STARPU_USE_CUDA + .cuda_funcs = {increment_cuda}, +#endif +#ifdef STARPU_USE_HIP + .hip_funcs = {increment_hip}, +#endif + .cpu_funcs = {increment_cpu}, + .nbuffers = 1, + .modes = {STARPU_RW}, + .model = &starpu_perfmodel_nop, +}; + +void increment_token(starpu_data_handle_t token_handle) +{ + struct starpu_task *task = starpu_task_create(); + + task->cl = &increment_cl; + task->handles[0] = token_handle; + task->synchronous = 1; + + int ret = starpu_task_submit(task); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); +} + +int main(int argc, char **argv) +{ + int ret, rank, size; + int mpi_init; + int token = 42; + starpu_data_handle_t token_handle; + + MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); + + ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); + starpu_mpi_comm_size(MPI_COMM_WORLD, &size); + + if (size < 2 || (starpu_cpu_worker_get_count() + starpu_cuda_worker_get_count() == 0) || (starpu_cpu_worker_get_count() + starpu_hip_worker_get_count() == 0)) + { + if (rank == 0) + { + if (size < 2) + FPRINTF(stderr, "We need at least 2 processes.\n"); + else + FPRINTF(stderr, "We need at least 1 CPU or CUDA or HIP worker.\n"); + } + starpu_mpi_shutdown(); + if (!mpi_init) + MPI_Finalize(); + return rank == 0 ? STARPU_TEST_SKIPPED : 0; + } + + starpu_vector_data_register(&token_handle, STARPU_MAIN_RAM, (uintptr_t)&token, 1, sizeof(token)); + + int nloops = NITER; + int loop; + + int last_loop = nloops - 1; + int last_rank = size - 1; + + for (loop = 0; loop < nloops; loop++) + { + starpu_mpi_tag_t tag = ((starpu_mpi_tag_t) loop)*size + rank; + + if (loop == 0 && rank == 0) + { + starpu_data_acquire(token_handle, STARPU_W); + token = 0; + FPRINTF(stdout, "Start with token value %d\n", token); + starpu_data_release(token_handle); + } + else + { + MPI_Status status; + starpu_mpi_req req; + ret = starpu_mpi_irecv(token_handle, &req, (rank+size-1)%size, tag, MPI_COMM_WORLD); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_irecv"); + ret = starpu_mpi_wait(&req, &status); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_wait"); + } + + increment_token(token_handle); + + if (loop == last_loop && rank == last_rank) + { + starpu_data_acquire(token_handle, STARPU_R); + FPRINTF(stdout, "Finished : token value %d\n", token); + starpu_data_release(token_handle); + } + else + { + starpu_mpi_req req; + MPI_Status status; + ret = starpu_mpi_isend(token_handle, &req, (rank+1)%size, tag+1, MPI_COMM_WORLD); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend"); + ret = starpu_mpi_wait(&req, &status); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_wait"); + } + } + + starpu_data_unregister(token_handle); + starpu_mpi_shutdown(); + + if (!mpi_init) + MPI_Finalize(); + +#ifndef STARPU_SIMGRID + if (rank == last_rank) + { + FPRINTF(stderr, "[%d] token = %d == %d * %d ?\n", rank, token, nloops, size); + STARPU_ASSERT(token == nloops*size); + } +#endif + + return 0; +} diff --git a/mpi/tests/ring_async_implicit.c b/mpi/tests/ring_async_implicit.c new file mode 100644 index 0000000..b4c31c8 --- /dev/null +++ b/mpi/tests/ring_async_implicit.c @@ -0,0 +1,154 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "helper.h" + +#ifdef STARPU_QUICK_CHECK +# define NITER 32 +#elif !defined(STARPU_LONG_CHECK) +# define NITER 256 +#else +# define NITER 2048 +#endif + +#ifdef STARPU_USE_CUDA +extern void increment_cuda(void *descr[], void *_args); +#endif +#ifdef STARPU_USE_HIP +extern void increment_hip(void *descr[], void *_args); +#endif + +void increment_cpu(void *descr[], void *_args) +{ + (void)_args; + int *tokenptr = (int *)STARPU_VECTOR_GET_PTR(descr[0]); + (*tokenptr)++; +} + +static struct starpu_codelet increment_cl = +{ +#ifdef STARPU_USE_CUDA + .cuda_funcs = {increment_cuda}, +#endif +#ifdef STARPU_USE_HIP + .hip_funcs = {increment_hip}, +#endif + .cpu_funcs = {increment_cpu}, + .nbuffers = 1, + .modes = {STARPU_RW}, + .model = &starpu_perfmodel_nop, +}; + +void increment_token(starpu_data_handle_t token_handle) +{ + struct starpu_task *task = starpu_task_create(); + + task->cl = &increment_cl; + task->handles[0] = token_handle; + + int ret = starpu_task_submit(task); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); +} + +int main(int argc, char **argv) +{ + int ret, rank, size; + int token = 42; + starpu_data_handle_t token_handle; + int mpi_init; + + MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); + + ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); + starpu_mpi_comm_size(MPI_COMM_WORLD, &size); + + if (size < 2 || (starpu_cpu_worker_get_count() + starpu_cuda_worker_get_count() == 0) || (starpu_cpu_worker_get_count() + starpu_hip_worker_get_count() == 0)) + { + if (rank == 0) + { + if (size < 2) + FPRINTF(stderr, "We need at least 2 processes.\n"); + else + FPRINTF(stderr, "We need at least 1 CPU or CUDA or HIP worker.\n"); + } + starpu_mpi_shutdown(); + if (!mpi_init) + MPI_Finalize(); + return rank == 0 ? STARPU_TEST_SKIPPED : 0; + } + + starpu_vector_data_register(&token_handle, STARPU_MAIN_RAM, (uintptr_t)&token, 1, sizeof(token)); + + int nloops = NITER; + int loop; + + int last_loop = nloops - 1; + int last_rank = size - 1; + + for (loop = 0; loop < nloops; loop++) + { + starpu_mpi_tag_t tag = ((starpu_mpi_tag_t) loop)*size + rank; + + if (loop == 0 && rank == 0) + { + starpu_data_acquire(token_handle, STARPU_W); + token = 0; + FPRINTF(stdout, "Start with token value %d\n", token); + starpu_data_release(token_handle); + } + else + { + ret = starpu_mpi_irecv_detached(token_handle, (rank+size-1)%size, tag, MPI_COMM_WORLD, NULL, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_irecv_detached"); + } + + increment_token(token_handle); + + if (loop == last_loop && rank == last_rank) + { + starpu_data_acquire(token_handle, STARPU_R); + FPRINTF(stdout, "Finished : token value %d\n", token); + starpu_data_release(token_handle); + } + else + { + ret = starpu_mpi_isend_detached(token_handle, (rank+1)%size, tag+1, MPI_COMM_WORLD, NULL, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend_detached"); + } + } + + starpu_task_wait_for_all(); + + starpu_data_unregister(token_handle); + + starpu_mpi_shutdown(); + if (!mpi_init) + MPI_Finalize(); + +#ifndef STARPU_SIMGRID + if (rank == last_rank) + { + FPRINTF(stderr, "[%d] token = %d == %d * %d ?\n", rank, token, nloops, size); + STARPU_ASSERT(token == nloops*size); + } +#endif + + return 0; +} diff --git a/mpi/tests/ring_kernel.cu b/mpi/tests/ring_kernel.cu new file mode 100644 index 0000000..73b38bc --- /dev/null +++ b/mpi/tests/ring_kernel.cu @@ -0,0 +1,33 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +static __global__ void cuda_incrementer(int *token) +{ + (*token)++; +} + +extern "C" void increment_cuda(void *descr[], void *_args) +{ + (void) _args; + int *tokenptr = (int *)STARPU_VECTOR_GET_PTR(descr[0]); + + cuda_incrementer<<<1,1, 0, starpu_cuda_get_local_stream()>>>(tokenptr); + cudaError_t status = cudaGetLastError(); + if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status); + cudaStreamSynchronize(starpu_cuda_get_local_stream()); +} diff --git a/mpi/tests/ring_kernel_hip.hip b/mpi/tests/ring_kernel_hip.hip new file mode 100644 index 0000000..5a646e1 --- /dev/null +++ b/mpi/tests/ring_kernel_hip.hip @@ -0,0 +1,33 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +static __global__ void hip_incrementer(int *token) +{ + (*token)++; +} + +extern "C" void increment_hip(void *descr[], void *_args) +{ + (void) _args; + int *tokenptr = (int *)STARPU_VECTOR_GET_PTR(descr[0]); + + hipLaunchKernelGGL(hip_incrementer, 1, 1, 0, starpu_hip_get_local_stream(), tokenptr); + hipError_t status = hipGetLastError(); + if (status != hipSuccess) STARPU_HIP_REPORT_ERROR(status); + hipStreamSynchronize(starpu_hip_get_local_stream()); +} diff --git a/mpi/tests/ring_sync.c b/mpi/tests/ring_sync.c new file mode 100644 index 0000000..2373ff8 --- /dev/null +++ b/mpi/tests/ring_sync.c @@ -0,0 +1,159 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "helper.h" + +#ifdef STARPU_QUICK_CHECK +# define NITER 32 +#elif !defined(STARPU_LONG_CHECK) +# define NITER 256 +#else +# define NITER 2048 +#endif + +#ifdef STARPU_USE_CUDA +extern void increment_cuda(void *descr[], void *_args); +#endif + +#ifdef STARPU_USE_HIP +extern void increment_hip(void *descr[], void *_args); +#endif + +void increment_cpu(void *descr[], void *_args) +{ + (void)_args; + int *tokenptr = (int *)STARPU_VECTOR_GET_PTR(descr[0]); + (*tokenptr)++; +} + +static struct starpu_codelet increment_cl = +{ +#ifdef STARPU_USE_CUDA + .cuda_funcs = {increment_cuda}, +#endif +#ifdef STARPU_USE_HIP + .hip_funcs = {increment_hip}, +#endif + .cpu_funcs = {increment_cpu}, + .nbuffers = 1, + .modes = {STARPU_RW}, + .model = &starpu_perfmodel_nop, +}; + +void increment_token(starpu_data_handle_t token_handle) +{ + struct starpu_task *task = starpu_task_create(); + + task->cl = &increment_cl; + task->handles[0] = token_handle; + task->synchronous = 1; + + int ret = starpu_task_submit(task); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); +} + +int main(int argc, char **argv) +{ + int ret, rank, size; + int mpi_init; + int token = 42; + starpu_data_handle_t token_handle; + + MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); + + ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); + starpu_mpi_comm_size(MPI_COMM_WORLD, &size); + + if (size < 2 || (starpu_cpu_worker_get_count() + starpu_cuda_worker_get_count() == 0)) + { + if (rank == 0) + { + if (size < 2) + FPRINTF(stderr, "We need at least 2 processes.\n"); + else + FPRINTF(stderr, "We need at least 1 CPU or CUDA worker.\n"); + } + starpu_mpi_shutdown(); + if (!mpi_init) + MPI_Finalize(); + return rank == 0 ? STARPU_TEST_SKIPPED : 0; + } + + starpu_vector_data_register(&token_handle, 0, (uintptr_t)&token, 1, sizeof(token)); + + int nloops = NITER; + int loop; + + int last_loop = nloops - 1; + int last_rank = size - 1; + + for (loop = 0; loop < nloops; loop++) + { + starpu_mpi_tag_t tag = ((starpu_mpi_tag_t) loop)*size + rank; + + if (loop == 0 && rank == 0) + { + starpu_data_acquire(token_handle, STARPU_W); + token = 0; + FPRINTF(stdout, "Start with token value %d\n", token); + starpu_data_release(token_handle); + } + else + { + MPI_Status status; + ret = starpu_mpi_recv(token_handle, (rank+size-1)%size, tag, MPI_COMM_WORLD, &status); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); + } + + increment_token(token_handle); + + if (loop == last_loop && rank == last_rank) + { + starpu_data_acquire(token_handle, STARPU_R); + FPRINTF(stdout, "Finished : token value %d\n", token); + starpu_data_release(token_handle); + } + else + { + starpu_mpi_req req; + MPI_Status status; + ret = starpu_mpi_issend(token_handle, &req, (rank+1)%size, tag+1, MPI_COMM_WORLD); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_issend"); + ret = starpu_mpi_wait(&req, &status); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_wait"); + } + } + + starpu_data_unregister(token_handle); + starpu_mpi_shutdown(); + + if (!mpi_init) + MPI_Finalize(); + +#ifndef STARPU_SIMGRID + if (rank == last_rank) + { + FPRINTF(stderr, "[%d] token = %d == %d * %d ?\n", rank, token, nloops, size); + STARPU_ASSERT(token == nloops*size); + } +#endif + + return 0; +} diff --git a/mpi/tests/ring_sync_detached.c b/mpi/tests/ring_sync_detached.c new file mode 100644 index 0000000..d66f422 --- /dev/null +++ b/mpi/tests/ring_sync_detached.c @@ -0,0 +1,176 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "helper.h" + +#ifdef STARPU_QUICK_CHECK +# define NITER 32 +#elif !defined(STARPU_LONG_CHECK) +# define NITER 256 +#else +# define NITER 2048 +#endif + +#ifdef STARPU_USE_CUDA +extern void increment_cuda(void *descr[], void *_args); +#endif + +#ifdef STARPU_USE_HIP +extern void increment_hip(void *descr[], void *_args); +#endif + +void increment_cpu(void *descr[], void *_args) +{ + (void)_args; + int *tokenptr = (int *)STARPU_VECTOR_GET_PTR(descr[0]); + (*tokenptr)++; +} + +static struct starpu_codelet increment_cl = +{ +#ifdef STARPU_USE_CUDA + .cuda_funcs = {increment_cuda}, +#endif +#ifdef STARPU_USE_HIP + .hip_funcs = {increment_hip}, +#endif + .cpu_funcs = {increment_cpu}, + .nbuffers = 1, + .modes = {STARPU_RW}, + .model = &starpu_perfmodel_nop, +}; + +void increment_token(starpu_data_handle_t handle) +{ + struct starpu_task *task = starpu_task_create(); + + task->cl = &increment_cl; + task->handles[0] = handle; + task->synchronous = 1; + + int ret = starpu_task_submit(task); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); +} + +static starpu_pthread_mutex_t mutex = STARPU_PTHREAD_MUTEX_INITIALIZER; +static starpu_pthread_cond_t cond = STARPU_PTHREAD_COND_INITIALIZER; + +void callback(void *arg) +{ + unsigned *completed = arg; + + STARPU_PTHREAD_MUTEX_LOCK(&mutex); + *completed = 1; + STARPU_PTHREAD_COND_SIGNAL(&cond); + STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); +} + +int main(int argc, char **argv) +{ + int ret, rank, size; + int token = 42; + starpu_data_handle_t token_handle; + int mpi_init; + + MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); + + ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); + starpu_mpi_comm_size(MPI_COMM_WORLD, &size); + + if (size < 2 || (starpu_cpu_worker_get_count() + starpu_cuda_worker_get_count() == 0) || (starpu_cpu_worker_get_count() + starpu_hip_worker_get_count() == 0)) + { + if (rank == 0) + { + if (size < 2) + FPRINTF(stderr, "We need at least 2 processes.\n"); + else + FPRINTF(stderr, "We need at least 1 CPU or CUDA or HIP worker.\n"); + } + starpu_mpi_shutdown(); + if (!mpi_init) + MPI_Finalize(); + return rank == 0 ? STARPU_TEST_SKIPPED : 0; + } + + starpu_vector_data_register(&token_handle, 0, (uintptr_t)&token, 1, sizeof(token)); + + int nloops = NITER; + int loop; + + int last_loop = nloops - 1; + int last_rank = size - 1; + + for (loop = 0; loop < nloops; loop++) + { + starpu_mpi_tag_t tag = ((starpu_mpi_tag_t) loop)*size + rank; + + if (loop == 0 && rank == 0) + { + starpu_data_acquire(token_handle, STARPU_W); + token = 0; + FPRINTF_MPI(stderr, "Start with token value %d\n", token); + starpu_data_release(token_handle); + } + else + { + MPI_Status status; + ret = starpu_mpi_recv(token_handle, (rank+size-1)%size, tag, MPI_COMM_WORLD, &status); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); + } + + increment_token(token_handle); + + if (loop == last_loop && rank == last_rank) + { + starpu_data_acquire(token_handle, STARPU_R); + FPRINTF_MPI(stderr, "Finished : token value %d\n", token); + starpu_data_release(token_handle); + } + else + { + int sent = 0; + ret = starpu_mpi_issend_detached(token_handle, (rank+1)%size, tag+1, MPI_COMM_WORLD, callback, &sent); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_issend_detached"); + + STARPU_PTHREAD_MUTEX_LOCK(&mutex); + while (!sent) + STARPU_PTHREAD_COND_WAIT(&cond, &mutex); + STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); + } + } + + starpu_data_unregister(token_handle); + FPRINTF_MPI(stderr, "Final value for token %d\n", token); + + starpu_mpi_shutdown(); + if (!mpi_init) + MPI_Finalize(); + +#ifndef STARPU_SIMGRID + if (rank == last_rank) + { + FPRINTF(stderr, "[%d] token = %d == %d * %d ?\n", rank, token, nloops, size); + STARPU_ASSERT(token == nloops*size); + } +#endif + + + return 0; +} diff --git a/mpi/tests/star.c b/mpi/tests/star.c new file mode 100644 index 0000000..f883e46 --- /dev/null +++ b/mpi/tests/star.c @@ -0,0 +1,109 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2021-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "helper.h" + +static void read_func(void *descr[], void *_args) +{ + (void)_args; + int *a = (void*) STARPU_VARIABLE_GET_PTR(descr[0]); + + FPRINTF_MPI(stderr, "x = %d\n", *a); +} + +static struct starpu_codelet read_codelet = +{ + .cpu_funcs = {read_func}, + .nbuffers = 1, + .modes = {STARPU_R}, +#ifdef STARPU_SIMGRID + .model = &starpu_perfmodel_nop, +#endif + .name = "read_codelet" +}; + +static void write_func(void *descr[], void *_args) +{ + int rank, *a; + + a = (void*) STARPU_VARIABLE_GET_PTR(descr[0]); + starpu_codelet_unpack_args(_args, &rank); + + *a = rank+12; + FPRINTF_MPI(stderr, "x = %d rank=%d\n", *a, rank); +} + +static struct starpu_codelet write_codelet = +{ + .cpu_funcs = {write_func}, + .nbuffers = 1, + .modes = {STARPU_W}, +#ifdef STARPU_SIMGRID + .model = &starpu_perfmodel_nop, +#endif + .name = "write_codelet" +}; + +int main(int argc, char **argv) +{ + int ret, rank, size, node; + starpu_data_handle_t handle; + int var=42; + int mpi_init; + + MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); + + ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); + starpu_mpi_comm_size(MPI_COMM_WORLD, &size); + + if (size<3) + { + FPRINTF(stderr, "We need more than 2 processes.\n"); + starpu_mpi_shutdown(); + if (!mpi_init) + MPI_Finalize(); + return rank == 0 ? STARPU_TEST_SKIPPED : 0; + } + + if (rank==0) + starpu_variable_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)&var, sizeof(var)); + else + starpu_variable_data_register(&handle, -1, (uintptr_t)NULL, sizeof(var)); + starpu_mpi_data_register(handle, 42, 0); + + for(node=1 ; node +#include "helper.h" + +int main(int argc, char **argv) +{ + int ret; + starpu_data_handle_t handle; + int mpi_init; + +#ifdef STARPU_HAVE_VALGRIND_H + if (RUNNING_ON_VALGRIND) + return STARPU_TEST_SKIPPED; +#endif + + disable_coredump(); + + MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); + + ret = starpu_mpi_init_conf(NULL, NULL, mpi_init, MPI_COMM_WORLD, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + + starpu_vector_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)&ret, 1, sizeof(int)); + starpu_mpi_datatype_register(handle, NULL, NULL); + starpu_data_unregister(handle); + + starpu_mpi_shutdown(); + if (!mpi_init) + MPI_Finalize(); + + return 0; +} diff --git a/mpi/tests/stats.c b/mpi/tests/stats.c new file mode 100644 index 0000000..26b6b13 --- /dev/null +++ b/mpi/tests/stats.c @@ -0,0 +1,112 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2023-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "helper.h" + +#if !defined(STARPU_HAVE_UNSETENV) || !defined(STARPU_HAVE_SETENV) +#warning unsetenv or setenv are not defined. Skipping test +int main(void) +{ + return STARPU_TEST_SKIPPED; +} +#else +int main(int argc, char **argv) +{ + int ret, rank, size; + int mpi_init; + int value; + starpu_data_handle_t handle; + size_t *stats; + + unsetenv("STARPU_MPI_CACHE"); + unsetenv("STARPU_MPI_STATS"); + unsetenv("STARPU_COMM_STATS"); + + MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); + + ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); + starpu_mpi_comm_size(MPI_COMM_WORLD, &size); + + if (size < 2) + { + if (rank == 0) + FPRINTF(stderr, "We need at least 2 processes.\n"); + + starpu_mpi_shutdown(); + if (!mpi_init) + MPI_Finalize(); + return rank == 0 ? STARPU_TEST_SKIPPED : 0; + } + + stats = calloc(size, sizeof(stats[0])); + value = rank; + starpu_variable_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)&value, sizeof(value)); + + if (rank == 0) + { + ret = starpu_mpi_send(handle, 1, 42, MPI_COMM_WORLD); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send"); + } + else if (rank == 1) + { + ret = starpu_mpi_recv(handle, 0, 42, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); + } + + starpu_mpi_comm_stats_enable(); + + if (rank == 0) + { + ret = starpu_mpi_send(handle, 1, 42, MPI_COMM_WORLD); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send"); + } + else if (rank == 1) + { + ret = starpu_mpi_recv(handle, 0, 42, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); + } + + starpu_mpi_comm_stats_disable(); + + if (rank == 0) + { + ret = starpu_mpi_send(handle, 1, 42, MPI_COMM_WORLD); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send"); + } + else if (rank == 1) + { + ret = starpu_mpi_recv(handle, 0, 42, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); + } + + starpu_data_unregister(handle); + + starpu_mpi_comm_stats_retrieve(stats); + if (rank == 0) + STARPU_ASSERT_MSG(stats[1] == sizeof(int), "Comm stats are incorrect %ld != %ld\n", stats[0], (long)sizeof(int)); + + free(stats); + starpu_mpi_shutdown(); + if (!mpi_init) + MPI_Finalize(); + + return 0; +} +#endif diff --git a/mpi/tests/sync.c b/mpi/tests/sync.c new file mode 100644 index 0000000..aee528d --- /dev/null +++ b/mpi/tests/sync.c @@ -0,0 +1,113 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "helper.h" + +int main(int argc, char **argv) +{ + int size, x=789; + int rank, other_rank; + int ret; + starpu_data_handle_t data[2]; + int mpi_init; + struct starpu_conf conf; + + MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &size); + + if (size % 2) + { + FPRINTF(stderr, "We need a even number of processes.\n"); + if (!mpi_init) + MPI_Finalize(); + return rank == 0 ? STARPU_TEST_SKIPPED : 0; + } + + other_rank = rank%2 == 0 ? rank+1 : rank-1; + FPRINTF(stderr, "rank %d exchanging with rank %d\n", rank, other_rank); + + if (rank % 2) + { + MPI_Send(&rank, 1, MPI_INT, other_rank, 10, MPI_COMM_WORLD); + FPRINTF(stderr, "[%d] sending %d\n", rank, rank); + } + else + { + MPI_Recv(&x, 1, MPI_INT, other_rank, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + FPRINTF(stderr, "[%d] received %d\n", rank, x); + } + + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + conf.ncpus = -1; + conf.nmpi_ms = -1; + conf.ntcpip_ms = -1; + + ret = starpu_mpi_init_conf(NULL, NULL, 0, MPI_COMM_WORLD, &conf); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + + if (rank % 2) + { + starpu_variable_data_register(&data[0], STARPU_MAIN_RAM, (uintptr_t)&rank, sizeof(unsigned)); + starpu_variable_data_register(&data[1], STARPU_MAIN_RAM, (uintptr_t)&rank, sizeof(unsigned)); + starpu_mpi_data_register(data[1], 22, 0); + } + else + starpu_variable_data_register(&data[0], -1, (uintptr_t)NULL, sizeof(unsigned)); + starpu_mpi_data_register(data[0], 12, 0); + + if (rank % 2) + { + starpu_mpi_req req; + ret = starpu_mpi_issend(data[1], &req, other_rank, 22, MPI_COMM_WORLD); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_issend"); + ret = starpu_mpi_send(data[0], other_rank, 12, MPI_COMM_WORLD); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send"); + ret = starpu_mpi_wait(&req, MPI_STATUS_IGNORE); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_wait"); + } + else + { + int *xx; + + ret = starpu_mpi_recv(data[0], other_rank, 12, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); + starpu_data_acquire(data[0], STARPU_R); + xx = (int *)starpu_variable_get_local_ptr(data[0]); + FPRINTF_MPI(stderr, "received %d\n", *xx); + STARPU_ASSERT_MSG(x==*xx, "Received value %d is incorrect (should be %d)\n", *xx, x); + starpu_data_release(data[0]); + + starpu_variable_data_register(&data[1], -1, (uintptr_t)NULL, sizeof(unsigned)); + starpu_mpi_data_register(data[1], 22, 0); + ret = starpu_mpi_recv(data[0], other_rank, 22, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); + starpu_data_acquire(data[0], STARPU_R); + xx = (int *)starpu_variable_get_local_ptr(data[0]); + STARPU_ASSERT_MSG(x==*xx, "Received value %d is incorrect (should be %d)\n", *xx, x); + starpu_data_release(data[0]); + } + + starpu_data_unregister(data[0]); + starpu_data_unregister(data[1]); + + starpu_mpi_shutdown(); + if (!mpi_init) + MPI_Finalize(); + return 0; +} diff --git a/mpi/tests/tags_allocate.c b/mpi/tests/tags_allocate.c new file mode 100644 index 0000000..6c981bf --- /dev/null +++ b/mpi/tests/tags_allocate.c @@ -0,0 +1,74 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "helper.h" + +#define X 3 +#define Y 4 + +int main(int argc, char **argv) +{ + int size, rank, mpi_init; + int ret=0; + int x, y; + struct starpu_conf conf; + int matrix[X][Y]; + starpu_data_handle_t data_handles[X][Y]; + + MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &size); + + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + conf.ncpus = -1; + conf.nmpi_ms = -1; + conf.ntcpip_ms = -1; + + ret = starpu_mpi_init_conf(NULL, NULL, 0, MPI_COMM_WORLD, &conf); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + + int64_t mintag = starpu_mpi_tags_allocate(X*Y); + + for(x = 0; x < X; x++) + { + for (y = 0; y < Y; y++) + { + int tag = y*Y + x; + matrix[x][y] = tag; + starpu_variable_data_register(&data_handles[x][y], STARPU_MAIN_RAM, (uintptr_t)&matrix[x][y], sizeof(matrix[x][y])); + starpu_mpi_data_register(data_handles[x][y], mintag + tag, 0); + } + } + + // Here we can use the data + + for(x = 0; x < X; x++) + { + for (y = 0; y < Y; y++) + { + starpu_data_unregister(data_handles[x][y]); + } + } + + starpu_mpi_tags_free(mintag); + + starpu_mpi_shutdown(); + if (!mpi_init) + MPI_Finalize(); + return rank == 0 ? ret : 0; +} diff --git a/mpi/tests/tags_checking.c b/mpi/tests/tags_checking.c new file mode 100644 index 0000000..90cbc1b --- /dev/null +++ b/mpi/tests/tags_checking.c @@ -0,0 +1,166 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "helper.h" + +#define VAL0 12 +#define VAL1 24 + +static starpu_pthread_mutex_t mutex = STARPU_PTHREAD_MUTEX_INITIALIZER; +static starpu_pthread_cond_t cond = STARPU_PTHREAD_COND_INITIALIZER; + +void callback(void *arg) +{ + unsigned *received = arg; + + STARPU_PTHREAD_MUTEX_LOCK(&mutex); + *received = *received + 1; + FPRINTF_MPI(stderr, "Request %u received\n", *received); + STARPU_PTHREAD_COND_SIGNAL(&cond); + STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); +} + +int do_test(int rank, starpu_mpi_tag_t initial_tag, int sdetached, int rdetached) +{ + int ret, i; + int val[2]; + starpu_data_handle_t data[2]; + struct starpu_conf conf; + + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + conf.ncpus = -1; + conf.nmpi_ms = -1; + conf.ntcpip_ms = -1; + + ret = starpu_mpi_init_conf(NULL, NULL, 0, MPI_COMM_WORLD, &conf); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + + if (rank == 1) + { + val[0] = VAL0; + val[1] = VAL1; + } + else + { + val[0] = -1; + val[1] = -1; + } + starpu_variable_data_register(&data[0], STARPU_MAIN_RAM, (uintptr_t)&val[0], sizeof(val[0])); + starpu_variable_data_register(&data[1], STARPU_MAIN_RAM, (uintptr_t)&val[1], sizeof(val[1])); + starpu_mpi_data_register(data[0], initial_tag+77, 1); + starpu_mpi_data_register(data[1], initial_tag+88, 1); + + if (rank == 1) + { + for(i=1 ; i>=0 ; i--) + { + if (sdetached) + { + ret = starpu_mpi_isend_detached(data[i], 0, starpu_data_get_tag(data[i]), MPI_COMM_WORLD, NULL, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend_detached"); + } + else + { + ret = starpu_mpi_send(data[i], 0, starpu_data_get_tag(data[i]), MPI_COMM_WORLD); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send"); + } + } + } + else if (rank == 0) + { + int received = 0; + + for(i=0 ; i<2 ; i++) + FPRINTF_MPI(stderr, "Value[%d] = %d\n", i, val[i]); + for(i=0 ; i<2 ; i++) + { + if (rdetached) + { + ret = starpu_mpi_irecv_detached(data[i], 1, starpu_data_get_tag(data[i]), MPI_COMM_WORLD, callback, &received); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_irecv_detached"); + } + else + { + ret = starpu_mpi_recv(data[i], 1, starpu_data_get_tag(data[i]), MPI_COMM_WORLD, MPI_STATUS_IGNORE); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); + } + } + + if (rdetached) + { + STARPU_PTHREAD_MUTEX_LOCK(&mutex); + while (received != 2) + { + FPRINTF_MPI(stderr, "Received %d messages\n", received); + STARPU_PTHREAD_COND_WAIT(&cond, &mutex); + } + STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); + } + + for(i=0 ; i<2 ; i++) + starpu_data_acquire(data[i], STARPU_R); + for(i=0 ; i<2 ; i++) + FPRINTF_MPI(stderr, "Value[%d] = %d\n", i, val[i]); + for(i=0 ; i<2 ; i++) + starpu_data_release(data[i]); + } + FPRINTF_MPI(stderr, "Waiting ...\n"); + starpu_task_wait_for_all(); + + starpu_data_unregister(data[0]); + starpu_data_unregister(data[1]); + + if (rank == 0) + { + ret = (val[0] == VAL0 && val[1] == VAL1) ? 0 : 1; + } + starpu_mpi_shutdown(); + return ret; +} + +int main(int argc, char **argv) +{ + int size; + int rank; + int ret=0; + int sdetached, rdetached; + starpu_mpi_tag_t initial_tag = 0; + + MPI_INIT_THREAD_real(&argc, &argv, MPI_THREAD_SERIALIZED); + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &size); + + if (size < 2) + { + FPRINTF(stderr, "We need at least 2 processes.\n"); + MPI_Finalize(); + return rank == 0 ? STARPU_TEST_SKIPPED : 0; + } + + for(sdetached=0 ; sdetached<=1 ; sdetached++) + { + for(rdetached=0 ; rdetached<=1 ; rdetached++) + { + ret += do_test(rank, initial_tag, sdetached, rdetached); + initial_tag += 2; + } + } + + MPI_Finalize(); + return rank == 0 ? ret : 0; +} diff --git a/mpi/tests/temporary.c b/mpi/tests/temporary.c new file mode 100644 index 0000000..a37d597 --- /dev/null +++ b/mpi/tests/temporary.c @@ -0,0 +1,156 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* This tests that one can register temporary data0 on each MPI node which can mix with common data0 */ + +#include +#include "helper.h" + +static void func_add(void *descr[], void *_args) +{ + (void)_args; + int *a = (void*) STARPU_VARIABLE_GET_PTR(descr[0]); + const int *b = (void*) STARPU_VARIABLE_GET_PTR(descr[1]); + const int *c = (void*) STARPU_VARIABLE_GET_PTR(descr[2]); + + *a = *b + *c; + FPRINTF_MPI(stderr, "%d + %d = %d\n", *b, *c, *a); +} + +static struct starpu_codelet codelet_add = +{ + .cpu_funcs = {func_add}, + .nbuffers = 3, + .modes = {STARPU_W, STARPU_R, STARPU_R}, + .model = &starpu_perfmodel_nop, + .flags = STARPU_CODELET_SIMGRID_EXECUTE, +}; + +int main(int argc, char **argv) +{ + int rank, size, n; + int ret; + int val0 = 0, val1 = 0; + starpu_data_handle_t data0, data1, tmp0, tmp, tmp2; + struct starpu_conf conf; + int mpi_init; + + MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); + + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + conf.ncpus = -1; + conf.nmpi_ms = -1; + conf.ntcpip_ms = -1; + + ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, &conf); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); + starpu_mpi_comm_size(MPI_COMM_WORLD, &size); + + if (size < 2) + { + if (rank == 0) + FPRINTF(stderr, "We need at least 2 processes.\n"); + + starpu_mpi_shutdown(); + if (!mpi_init) + MPI_Finalize(); + return rank == 0 ? STARPU_TEST_SKIPPED : 0; + } + + if (starpu_mpi_cache_is_enabled() == 0) + goto skip; + + if (rank == 0) + { + val0 = 1; + starpu_variable_data_register(&data0, STARPU_MAIN_RAM, (uintptr_t)&val0, sizeof(val0)); + starpu_variable_data_register(&data1, -1, (uintptr_t)NULL, sizeof(val0)); + starpu_variable_data_register(&tmp0, -1, (uintptr_t)NULL, sizeof(val0)); + starpu_mpi_data_register(tmp0, -1, 0); + } + else if (rank == 1) + { + starpu_variable_data_register(&data0, -1, (uintptr_t)NULL, sizeof(val0)); + starpu_variable_data_register(&data1, STARPU_MAIN_RAM, (uintptr_t)&val1, sizeof(val1)); + tmp0 = NULL; + } + else + { + starpu_variable_data_register(&data0, -1, (uintptr_t)NULL, sizeof(val0)); + starpu_variable_data_register(&data1, -1, (uintptr_t)NULL, sizeof(val0)); + tmp0 = NULL; + } + starpu_variable_data_register(&tmp, -1, (uintptr_t)NULL, sizeof(val0)); + starpu_variable_data_register(&tmp2, -1, (uintptr_t)NULL, sizeof(val0)); + + starpu_mpi_data_register(data0, 42, 0); + starpu_mpi_data_register(data1, 43, 1); + starpu_mpi_data_register(tmp, 44, 0); + starpu_mpi_data_register(tmp2, -1, STARPU_MPI_PER_NODE); + + /* Test temporary data0 on node 0 only */ + starpu_mpi_task_insert(MPI_COMM_WORLD, &codelet_add, STARPU_W, tmp0, STARPU_R, data0, STARPU_R, data0, 0); + + starpu_mpi_task_insert(MPI_COMM_WORLD, &codelet_add, STARPU_W, data0, STARPU_R, tmp0, STARPU_R, tmp0, 0); + + starpu_mpi_task_insert(MPI_COMM_WORLD, &codelet_add, STARPU_W, tmp, STARPU_R, data0, STARPU_R, data0, 0); + + /* Now make some tmp per-node, so that each node replicates the computation */ + for (n = 0; n < size; n++) + if (n != 0) + /* Get the value on all nodes */ + starpu_mpi_get_data_on_node_detached(MPI_COMM_WORLD, tmp, n, NULL, NULL); + starpu_mpi_data_set_rank(tmp, STARPU_MPI_PER_NODE); + + /* This task writes to a per-node data, so will be executed by all nodes */ + starpu_mpi_task_insert(MPI_COMM_WORLD, &codelet_add, STARPU_W, tmp2, STARPU_R, tmp, STARPU_R, tmp, 0); + + /* All MPI nodes have computed the value (no MPI communication here!) */ + starpu_data_acquire_on_node(tmp2, STARPU_MAIN_RAM, STARPU_R); + STARPU_ASSERT(*(int*)starpu_data_handle_to_pointer(tmp2, STARPU_MAIN_RAM) == 16); + starpu_data_release_on_node(tmp2, STARPU_MAIN_RAM); + + /* And nodes 0 and 1 do something with it */ + starpu_mpi_task_insert(MPI_COMM_WORLD, &codelet_add, STARPU_W, data0, STARPU_R, tmp, STARPU_R, tmp2, 0); + starpu_mpi_task_insert(MPI_COMM_WORLD, &codelet_add, STARPU_W, data1, STARPU_R, tmp, STARPU_R, tmp2, 0); + + starpu_task_wait_for_all(); + + if (rank == 0) + { + starpu_data_unregister(tmp0); + } + starpu_data_unregister(data0); + starpu_data_unregister(data1); + starpu_data_unregister(tmp); + starpu_data_unregister(tmp2); + + if (rank == 0) + STARPU_ASSERT_MSG(val0 == 24, "[rank 0] %d should be %d\n", val0, 24); + if (rank == 1) + STARPU_ASSERT_MSG(val1 == 24, "[rank 1] %d should be %d\n", val1, 24); + +skip: + starpu_mpi_shutdown(); + + if (!mpi_init) + MPI_Finalize(); + + return 0; +} diff --git a/mpi/tests/user_defined_datatype.c b/mpi/tests/user_defined_datatype.c new file mode 100644 index 0000000..62f5b78 --- /dev/null +++ b/mpi/tests/user_defined_datatype.c @@ -0,0 +1,202 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include "helper.h" + +#ifdef STARPU_QUICK_CHECK +# define ELEMENTS 10 +#else +# define ELEMENTS 1000 +#endif + +typedef void (*test_func)(starpu_data_handle_t *, int, int, starpu_mpi_tag_t); + +void test_handle_irecv_isend_detached(starpu_data_handle_t *handles, int nb_handles, int rank, starpu_mpi_tag_t tag) +{ + int i; + (void)rank; + + for(i=0 ; ivalue) + +int *starpu_value_get(starpu_data_handle_t handle) +{ + struct starpu_value_interface *value_interface = + (struct starpu_value_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + return value_interface->value; +} + +static void value_register_data_handle(starpu_data_handle_t handle, int home_node, void *data_interface) +{ + struct starpu_value_interface *value_interface = (struct starpu_value_interface *) data_interface; + + int node; + for (node = 0; node < STARPU_MAXNODES; node++) + { + struct starpu_value_interface *local_interface = (struct starpu_value_interface *) + starpu_data_get_interface_on_node(handle, node); + + if (node == home_node) + local_interface->value = value_interface->value; + else + local_interface->value = 0; + } +} + +static starpu_ssize_t value_allocate_data_on_node(void *data_interface, unsigned node) +{ + struct starpu_value_interface *value_interface = (struct starpu_value_interface *) data_interface; + int *addr = 0; + + addr = (int *) starpu_malloc_on_node(node, sizeof(int)); + if (!addr) + return -ENOMEM; + + /* update the data properly in consequence */ + value_interface->value = addr; + + return sizeof(int); +} + +static void value_free_data_on_node(void *data_interface, unsigned node) +{ + struct starpu_value_interface *value_interface = (struct starpu_value_interface *) data_interface; + + starpu_free_on_node(node, (uintptr_t) value_interface->value, sizeof(int)); + value_interface->value = NULL; +} + +static size_t value_get_size(starpu_data_handle_t handle) +{ + (void)handle; + return sizeof(int); +} + +static uint32_t value_footprint(starpu_data_handle_t handle) +{ + return starpu_hash_crc32c_be(value_get_size(handle), 0); +} + +static void *value_to_pointer(void *data_interface, unsigned node) +{ + (void) node; + struct starpu_value_interface *value_interface = data_interface; + + return (void*) value_interface->value; +} + +static int value_pack_data(starpu_data_handle_t handle, unsigned node, void **ptr, starpu_ssize_t *count) +{ + STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); + + struct starpu_value_interface *value_interface = (struct starpu_value_interface *) + starpu_data_get_interface_on_node(handle, node); + + *count = sizeof(int); + if (ptr != NULL) + { + *ptr = (void*) starpu_malloc_on_node_flags(node, *count, 0); + memcpy(*ptr, value_interface->value, sizeof(int)); + } + + return 0; +} + +static int value_peek_data(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count) +{ + (void)count; + STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); + + struct starpu_value_interface *value_interface = (struct starpu_value_interface *) + starpu_data_get_interface_on_node(handle, node); + + value_interface->value[0] = ((int *)ptr)[0]; + + assert(value_interface->value[0] == 36); + + return 0; +} + +static int value_unpack_data(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count) +{ + value_peek_data(handle, node, ptr, count); + starpu_free_on_node_flags(node, (uintptr_t)ptr, count, 0); + + return 0; +} + +static int copy_any_to_any(void *src_interface, unsigned src_node, + void *dst_interface, unsigned dst_node, + void *async_data) +{ + struct starpu_value_interface *src_value = src_interface; + struct starpu_value_interface *dst_value = dst_interface; + + return starpu_interface_copy((uintptr_t) src_value->value, 0, src_node, + (uintptr_t) dst_value->value, 0, dst_node, + sizeof(int), + async_data); +} + +static const struct starpu_data_copy_methods value_copy_methods = +{ + .any_to_any = copy_any_to_any +}; + +static struct starpu_data_interface_ops interface_value_ops = +{ + .register_data_handle = value_register_data_handle, + .allocate_data_on_node = value_allocate_data_on_node, + .free_data_on_node = value_free_data_on_node, + .copy_methods = &value_copy_methods, + .get_size = value_get_size, + .footprint = value_footprint, + .interfaceid = STARPU_UNKNOWN_INTERFACE_ID, + .interface_size = sizeof(struct starpu_value_interface), + .to_pointer = value_to_pointer, + .pack_data = value_pack_data, + .peek_data = value_peek_data, + .unpack_data = value_unpack_data +}; + +void starpu_value_data_register(starpu_data_handle_t *handleptr, unsigned home_node, int *value) +{ + struct starpu_value_interface value_int = + { + .value = value + }; + + starpu_data_register(handleptr, home_node, &value_int, &interface_value_ops); +} + +#endif /* _USER_DEFINED_DATATYPE_VALUE_H */ diff --git a/mpi/tests/wait_for_all.c b/mpi/tests/wait_for_all.c new file mode 100644 index 0000000..7bd20fb --- /dev/null +++ b/mpi/tests/wait_for_all.c @@ -0,0 +1,97 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include "helper.h" + +void callback(void *arg) +{ + int *completed = arg; + *completed = 1; +} + +#define SIZE 370*000*0000 + +int main(int argc, char **argv) +{ + int ret, rank, size; + int mpi_init; + starpu_data_handle_t handle; + char *value; + int comm_completed=42; + + MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); + + ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + + starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); + starpu_mpi_comm_size(MPI_COMM_WORLD, &size); + + if (size < 2) + { + if (rank == 0) + FPRINTF(stderr, "We need at least 2 processes.\n"); + + starpu_mpi_shutdown(); + if (!mpi_init) + MPI_Finalize(); + return rank == 0 ? STARPU_TEST_SKIPPED : 0; + } + + value = calloc(SIZE, sizeof(value[0])); + starpu_vector_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)value, SIZE, sizeof(value[0])); + + if (rank == 1) + { + ret = starpu_mpi_send(handle, 0, 1, MPI_COMM_WORLD); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend_detached"); + } + else if (rank == 0) + { + ret = starpu_mpi_irecv_detached(handle, 1, 1, MPI_COMM_WORLD, callback, &comm_completed); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); + } + + starpu_mpi_wait_for_all(MPI_COMM_WORLD); + if (rank == 0) + { + if (comm_completed == 42) + { + FPRINTF_MPI(stderr, "comm not completed\n"); + ret = 1; + } + else + { + FPRINTF_MPI(stderr, "comm completed\n"); + } + } + starpu_data_unregister(handle); + free(value); + + starpu_mpi_shutdown(); + if (!mpi_init) + MPI_Finalize(); + + if (rank == 0 && comm_completed == 42) + { + FPRINTF(stderr, "comm still not completed\n"); + ret = 1; + } + + return (rank == 0) ? ret : 0; +} diff --git a/mpi/tools/Makefile.am b/mpi/tools/Makefile.am new file mode 100644 index 0000000..aa88c13 --- /dev/null +++ b/mpi/tools/Makefile.am @@ -0,0 +1,39 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# Copyright (C) 2017-2017 Erwan Leria +# Copyright (C) 2013-2013 Thibaut Lambert +# Copyright (C) 2013-2013 Joris Pablo +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +include $(top_srcdir)/make/starpu-notests.mk + +SUBDIRS = + +AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_srcdir)/tools/ -I$(top_srcdir)/mpi/ -I$(top_srcdir)/mpi/include -I$(top_builddir)/src -I$(top_srcdir)/src -DSTARPU_REPLAY_MPI $(STARPU_H_CPPFLAGS) +AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@ +LIBS += $(top_builddir)/src/@LIBSTARPU_LINK@ $(top_builddir)/mpi/src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la $(STARPU_EXPORTED_LIBS) + +CC=$(CC_OR_MPICC) +CCLD=$(CC_OR_MPICC) + +starpu_replay.c starpu_replay_sched.c: + $(V_ln) $(LN_S) $(top_srcdir)/tools/$(notdir $@) $@ + +if STARPU_SIMGRID +bin_PROGRAMS = starpu_replay_mpi + +starpu_replay_mpi_SOURCES = \ + starpu_replay.c \ + starpu_replay_sched.c +endif diff --git a/mpi/tools/Makefile.in b/mpi/tools/Makefile.in new file mode 100644 index 0000000..d9a93d9 --- /dev/null +++ b/mpi/tools/Makefile.in @@ -0,0 +1,1091 @@ +# Makefile.in generated by automake 1.16.5 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2021 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +target_triplet = @target@ +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@am__append_1 = --compiler-options -fno-strict-aliasing -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ $(STARPU_NVCC_H_CPPFLAGS) +@STARPU_USE_HIP_TRUE@am__append_2 = -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ +@STARPU_SIMGRID_TRUE@bin_PROGRAMS = starpu_replay_mpi$(EXEEXT) +subdir = mpi/tools +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ + $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ + $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ + $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/src/common/config.h \ + $(top_builddir)/src/common/config-src-build.h \ + $(top_builddir)/include/starpu_config.h \ + $(top_builddir)/starpurm/include/starpurm_config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +am__installdirs = "$(DESTDIR)$(bindir)" +PROGRAMS = $(bin_PROGRAMS) +am__starpu_replay_mpi_SOURCES_DIST = starpu_replay.c \ + starpu_replay_sched.c +@STARPU_SIMGRID_TRUE@am_starpu_replay_mpi_OBJECTS = \ +@STARPU_SIMGRID_TRUE@ starpu_replay.$(OBJEXT) \ +@STARPU_SIMGRID_TRUE@ starpu_replay_sched.$(OBJEXT) +starpu_replay_mpi_OBJECTS = $(am_starpu_replay_mpi_OBJECTS) +starpu_replay_mpi_LDADD = $(LDADD) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)/src/common -I$(top_builddir)/include -I$(top_builddir)/starpurm/include +depcomp = $(SHELL) $(top_srcdir)/build-aux/depcomp +am__maybe_remake_depfiles = depfiles +am__depfiles_remade = ./$(DEPDIR)/starpu_replay.Po \ + ./$(DEPDIR)/starpu_replay_sched.Po +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(starpu_replay_mpi_SOURCES) +DIST_SOURCES = $(am__starpu_replay_mpi_SOURCES_DIST) +RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \ + ctags-recursive dvi-recursive html-recursive info-recursive \ + install-data-recursive install-dvi-recursive \ + install-exec-recursive install-html-recursive \ + install-info-recursive install-pdf-recursive \ + install-ps-recursive install-recursive installcheck-recursive \ + installdirs-recursive pdf-recursive ps-recursive \ + tags-recursive uninstall-recursive +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ + distclean-recursive maintainer-clean-recursive +am__recursive_targets = \ + $(RECURSIVE_TARGETS) \ + $(RECURSIVE_CLEAN_TARGETS) \ + $(am__extra_recursive_targets) +AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \ + distdir distdir-am +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +DIST_SUBDIRS = $(SUBDIRS) +am__DIST_COMMON = $(srcdir)/Makefile.in \ + $(top_srcdir)/build-aux/depcomp \ + $(top_srcdir)/make/starpu-notests.mk \ + $(top_srcdir)/make/starpu.mk +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +am__relativize = \ + dir0=`pwd`; \ + sed_first='s,^\([^/]*\)/.*$$,\1,'; \ + sed_rest='s,^[^/]*/*,,'; \ + sed_last='s,^.*/\([^/]*\)$$,\1,'; \ + sed_butlast='s,/*[^/]*$$,,'; \ + while test -n "$$dir1"; do \ + first=`echo "$$dir1" | sed -e "$$sed_first"`; \ + if test "$$first" != "."; then \ + if test "$$first" = ".."; then \ + dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ + dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ + else \ + first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ + if test "$$first2" = "$$first"; then \ + dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ + else \ + dir2="../$$dir2"; \ + fi; \ + dir0="$$dir0"/"$$first"; \ + fi; \ + fi; \ + dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ + done; \ + reldir="$$dir2" +pkglibdir = @pkglibdir@ +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +APP_CFLAGS = @APP_CFLAGS@ +APP_CXXFLAGS = @APP_CXXFLAGS@ +APP_FCFLAGS = @APP_FCFLAGS@ +APP_FFLAGS = @APP_FFLAGS@ +AR = @AR@ +AS = @AS@ +ATLASDIR = @ATLASDIR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BLAS_LIB = @BLAS_LIB@ +BLAS_LIBS = @BLAS_LIBS@ +BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ +BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ +CC = $(CC_OR_MPICC) +CCDEPMODE = @CCDEPMODE@ +CC_OR_MPICC = @CC_OR_MPICC@ +CC_OR_NVCC = @CC_OR_NVCC@ +CFLAGS = @CFLAGS@ +COVERAGE = @COVERAGE@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CSCOPE = @CSCOPE@ +CTAGS = @CTAGS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DGELS_LIBS = @DGELS_LIBS@ +DLB_CFLAGS = @DLB_CFLAGS@ +DLB_LIBS = @DLB_LIBS@ +DLLTOOL = @DLLTOOL@ +DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +ECLIPSE = @ECLIPSE@ +EGREP = @EGREP@ +ETAGS = @ETAGS@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FC = @FC@ +FCFLAGS = @FCFLAGS@ +FFLAGS = @FFLAGS@ +FFTWF_CFLAGS = @FFTWF_CFLAGS@ +FFTWF_LIBS = @FFTWF_LIBS@ +FFTWL_CFLAGS = @FFTWL_CFLAGS@ +FFTWL_LIBS = @FFTWL_LIBS@ +FFTW_CFLAGS = @FFTW_CFLAGS@ +FFTW_LIBS = @FFTW_LIBS@ +FGREP = @FGREP@ +FILECMD = @FILECMD@ +FXTDIR = @FXTDIR@ +FXT_CFLAGS = @FXT_CFLAGS@ +FXT_LDFLAGS = @FXT_LDFLAGS@ +FXT_LIBS = @FXT_LIBS@ +GDB = @GDB@ +GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ +GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ +GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ +GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ +GOTODIR = @GOTODIR@ +GREP = @GREP@ +HAVE_CXX11 = @HAVE_CXX11@ +HAVE_FFTWFL = @HAVE_FFTWFL@ +HELP2MAN = @HELP2MAN@ +HIPCC = @HIPCC@ +HIPCCFLAGS = @HIPCCFLAGS@ $(am__append_2) +HIPCONFIG = @HIPCONFIG@ +HWLOC_CFLAGS = @HWLOC_CFLAGS@ +HWLOC_LIBS = @HWLOC_LIBS@ +HWLOC_REQUIRES = @HWLOC_REQUIRES@ +ICC = @ICC@ +ICC_ARGS = @ICC_ARGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JULIA = @JULIA@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ $(top_builddir)/src/@LIBSTARPU_LINK@ \ + $(top_builddir)/mpi/src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la \ + $(STARPU_EXPORTED_LIBS) +LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ +LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ +LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ +LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ +LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ +LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ +LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ +LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ +LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ +LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ +LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ +LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ +LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ +LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ +LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ +LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ +LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ +LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ +LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ +LIBSTARPU_LINK = @LIBSTARPU_LINK@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAGMA_CFLAGS = @MAGMA_CFLAGS@ +MAGMA_LIBS = @MAGMA_LIBS@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPICC_LDFLAGS = @MPICC_LDFLAGS@ +MPICXX = @MPICXX@ +MPIEXEC = @MPIEXEC@ +MPIEXEC_ARGS = @MPIEXEC_ARGS@ +MPIFORT = @MPIFORT@ +MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ +MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ +NM = @NM@ +NMAD_CFLAGS = @NMAD_CFLAGS@ +NMAD_LIBS = @NMAD_LIBS@ +NMEDIT = @NMEDIT@ +NVCC = @NVCC@ +NVCCFLAGS = @NVCCFLAGS@ $(am__append_1) +NVCC_CC = @NVCC_CC@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ +OPENBLAS_LIBS = @OPENBLAS_LIBS@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PAPI_CFLAGS = @PAPI_CFLAGS@ +PAPI_LIBS = @PAPI_LIBS@ +PARALLEL = @PARALLEL@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PKG_CONFIG = @PKG_CONFIG@ +POTI_CFLAGS = @POTI_CFLAGS@ +POTI_LIBS = @POTI_LIBS@ +PROG_CLANG = @PROG_CLANG@ +PROG_DATE = @PROG_DATE@ +PROG_FIND = @PROG_FIND@ +PROG_STAT = @PROG_STAT@ +PYTHON = @PYTHON@ +PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ +PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ +PYTHON_VERSION = @PYTHON_VERSION@ +RANLIB = @RANLIB@ +REALBASH = @REALBASH@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ +SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ +SIMGRID_LIBS = @SIMGRID_LIBS@ +SIMGRID_MC = @SIMGRID_MC@ +SLIC_CONFIG = @SLIC_CONFIG@ +SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ +SOCL_VENDORS = @SOCL_VENDORS@ +STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ +STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ +STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ +STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ +STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ +STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ +STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ +STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ +STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ +STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ +STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ +STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ +STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ +STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ +STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ +STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ +STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ +STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ +STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ +STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ +STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ +STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ +STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ +STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ +STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ +STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ +STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ +STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ +STARPU_LIB_PATH = @STARPU_LIB_PATH@ +STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ +STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ +STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ +STARPU_MS_LIB = @STARPU_MS_LIB@ +STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ +STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ +STARPU_OPENBLAS = @STARPU_OPENBLAS@ +STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ +STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ +STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ +STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ +STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ +STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ +STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ +STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ +STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ +STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ +STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ +STARPU_SRC_DIR = @STARPU_SRC_DIR@ +STARPU_USE_CPU = @STARPU_USE_CPU@ +STARPU_USE_CUDA = @STARPU_USE_CUDA@ +STARPU_USE_FXT = @STARPU_USE_FXT@ +STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ +STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ +STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ +STRIP = @STRIP@ +VERSION = @VERSION@ +XMKMF = @XMKMF@ +X_CFLAGS = @X_CFLAGS@ +X_EXTRA_LIBS = @X_EXTRA_LIBS@ +X_LIBS = @X_LIBS@ +X_PRE_LIBS = @X_PRE_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_ct_F77 = @ac_ct_F77@ +ac_ct_FC = @ac_ct_FC@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +doxygencommand = @doxygencommand@ +dvidir = @dvidir@ +eclipsepath = @eclipsepath@ +epstopdfcommand = @epstopdfcommand@ +exec_prefix = @exec_prefix@ +gitcommand = @gitcommand@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +hwloccalccommand = @hwloccalccommand@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +juliapath = @juliapath@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +mpicc_path = @mpicc_path@ +mpicxx_path = @mpicxx_path@ +mpiexec_path = @mpiexec_path@ +mpifort_path = @mpifort_path@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +pdflatexcommand = @pdflatexcommand@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +runstatedir = @runstatedir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target = @target@ +target_alias = @target_alias@ +target_cpu = @target_cpu@ +target_os = @target_os@ +target_vendor = @target_vendor@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +AM_CFLAGS = $(GLOBAL_AM_CFLAGS) +AM_CXXFLAGS = $(GLOBAL_AM_CXXFLAGS) +AM_FFLAGS = $(GLOBAL_AM_FFLAGS) +AM_FCFLAGS = $(GLOBAL_AM_FCFLAGS) +@STARPU_USE_CUDA_TRUE@V_nvcc_ = $(V_nvcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_USE_CUDA_TRUE@V_nvcc_0 = @echo " NVCC " $@; +@STARPU_USE_CUDA_TRUE@V_nvcc_1 = +@STARPU_USE_CUDA_TRUE@V_nvcc = $(V_nvcc_$(V)) + +# Avoid using nvcc when making a coverity build, nvcc produces millions of +# lines of code which we don't want to analyze. Instead, build dumb .o files +# containing empty functions. +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_ = $(V_mynvcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_0 = @echo " myNVCC " $@; +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_1 = +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc = $(V_mynvcc_$(V)) +@STARPU_USE_HIP_TRUE@V_hipcc_ = $(V_hipcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_USE_HIP_TRUE@V_hipcc_0 = @echo " HIPCC " $@; +@STARPU_USE_HIP_TRUE@V_hipcc_1 = +@STARPU_USE_HIP_TRUE@V_hipcc = $(V_hipcc_$(V)) +V_icc_ = $(V_icc_$(AM_DEFAULT_VERBOSITY)) +V_icc_0 = @echo " ICC " $@; +V_icc_1 = +V_icc = $(V_icc_$(V)) +V_ln_ = $(V_ln_$(AM_DEFAULT_VERBOSITY)) +V_ln_0 = @echo " LN " $@; +V_ln_1 = +V_ln = $(V_ln_$(V)) +V_help2man_ = $(V_help2man_$(AM_DEFAULT_VERBOSITY)) +V_help2man_0 = @echo " HELP2MAN" $@; +V_help2man_1 = +V_help2man = $(V_help2man_$(V)) + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# Copyright (C) 2017-2017 Erwan Leria +# Copyright (C) 2013-2013 Thibaut Lambert +# Copyright (C) 2013-2013 Joris Pablo +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +SUBDIRS = +AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_srcdir)/tools/ -I$(top_srcdir)/mpi/ -I$(top_srcdir)/mpi/include -I$(top_builddir)/src -I$(top_srcdir)/src -DSTARPU_REPLAY_MPI $(STARPU_H_CPPFLAGS) +AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@ +CCLD = $(CC_OR_MPICC) +@STARPU_SIMGRID_TRUE@starpu_replay_mpi_SOURCES = \ +@STARPU_SIMGRID_TRUE@ starpu_replay.c \ +@STARPU_SIMGRID_TRUE@ starpu_replay_sched.c + +all: all-recursive + +.SUFFIXES: +.SUFFIXES: .c .cu .cubin .hip .lo .o .obj +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/make/starpu-notests.mk $(top_srcdir)/make/starpu.mk $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign mpi/tools/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign mpi/tools/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; +$(top_srcdir)/make/starpu-notests.mk $(top_srcdir)/make/starpu.mk $(am__empty): + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): +install-binPROGRAMS: $(bin_PROGRAMS) + @$(NORMAL_INSTALL) + @list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(bindir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(bindir)" || exit 1; \ + fi; \ + for p in $$list; do echo "$$p $$p"; done | \ + sed 's/$(EXEEXT)$$//' | \ + while read p p1; do if test -f $$p \ + || test -f $$p1 \ + ; then echo "$$p"; echo "$$p"; else :; fi; \ + done | \ + sed -e 'p;s,.*/,,;n;h' \ + -e 's|.*|.|' \ + -e 'p;x;s,.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/' | \ + sed 'N;N;N;s,\n, ,g' | \ + $(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1 } \ + { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \ + if ($$2 == $$4) files[d] = files[d] " " $$1; \ + else { print "f", $$3 "/" $$4, $$1; } } \ + END { for (d in files) print "f", d, files[d] }' | \ + while read type dir files; do \ + if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \ + test -z "$$files" || { \ + echo " $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files '$(DESTDIR)$(bindir)$$dir'"; \ + $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files "$(DESTDIR)$(bindir)$$dir" || exit $$?; \ + } \ + ; done + +uninstall-binPROGRAMS: + @$(NORMAL_UNINSTALL) + @list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \ + files=`for p in $$list; do echo "$$p"; done | \ + sed -e 'h;s,^.*/,,;s/$(EXEEXT)$$//;$(transform)' \ + -e 's/$$/$(EXEEXT)/' \ + `; \ + test -n "$$list" || exit 0; \ + echo " ( cd '$(DESTDIR)$(bindir)' && rm -f" $$files ")"; \ + cd "$(DESTDIR)$(bindir)" && rm -f $$files + +clean-binPROGRAMS: + @list='$(bin_PROGRAMS)'; test -n "$$list" || exit 0; \ + echo " rm -f" $$list; \ + rm -f $$list || exit $$?; \ + test -n "$(EXEEXT)" || exit 0; \ + list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ + echo " rm -f" $$list; \ + rm -f $$list + +starpu_replay_mpi$(EXEEXT): $(starpu_replay_mpi_OBJECTS) $(starpu_replay_mpi_DEPENDENCIES) $(EXTRA_starpu_replay_mpi_DEPENDENCIES) + @rm -f starpu_replay_mpi$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(starpu_replay_mpi_OBJECTS) $(starpu_replay_mpi_LDADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/starpu_replay.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/starpu_replay_sched.Po@am__quote@ # am--include-marker + +$(am__depfiles_remade): + @$(MKDIR_P) $(@D) + @echo '# dummy' >$@-t && $(am__mv) $@-t $@ + +am--depfiles: $(am__depfiles_remade) + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ +@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +# This directory's subdirectories are mostly independent; you can cd +# into them and run 'make' without going through this Makefile. +# To change the values of 'make' variables: instead of editing Makefiles, +# (1) if the variable is set in 'config.status', edit 'config.status' +# (which will cause the Makefiles to be regenerated when you run 'make'); +# (2) otherwise, pass the desired values on the 'make' command line. +$(am__recursive_targets): + @fail=; \ + if $(am__make_keepgoing); then \ + failcom='fail=yes'; \ + else \ + failcom='exit 1'; \ + fi; \ + dot_seen=no; \ + target=`echo $@ | sed s/-recursive//`; \ + case "$@" in \ + distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ + *) list='$(SUBDIRS)' ;; \ + esac; \ + for subdir in $$list; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + dot_seen=yes; \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || eval $$failcom; \ + done; \ + if test "$$dot_seen" = "no"; then \ + $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ + fi; test -z "$$fail" + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-recursive +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ + include_option=--etags-include; \ + empty_fix=.; \ + else \ + include_option=--include; \ + empty_fix=; \ + fi; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + test ! -f $$subdir/TAGS || \ + set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ + fi; \ + done; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-recursive + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-recursive + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done + @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + $(am__make_dryrun) \ + || test -d "$(distdir)/$$subdir" \ + || $(MKDIR_P) "$(distdir)/$$subdir" \ + || exit 1; \ + dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ + $(am__relativize); \ + new_distdir=$$reldir; \ + dir1=$$subdir; dir2="$(top_distdir)"; \ + $(am__relativize); \ + new_top_distdir=$$reldir; \ + echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ + echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ + ($(am__cd) $$subdir && \ + $(MAKE) $(AM_MAKEFLAGS) \ + top_distdir="$$new_top_distdir" \ + distdir="$$new_distdir" \ + am__remove_distdir=: \ + am__skip_length_check=: \ + am__skip_mode_fix=: \ + distdir) \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-recursive +all-am: Makefile $(PROGRAMS) +installdirs: installdirs-recursive +installdirs-am: + for dir in "$(DESTDIR)$(bindir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-recursive +install-exec: install-exec-recursive +install-data: install-data-recursive +uninstall: uninstall-recursive + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-recursive +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-recursive + +clean-am: clean-binPROGRAMS clean-generic clean-libtool mostlyclean-am + +distclean: distclean-recursive + -rm -f ./$(DEPDIR)/starpu_replay.Po + -rm -f ./$(DEPDIR)/starpu_replay_sched.Po + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-recursive + +dvi-am: + +html: html-recursive + +html-am: + +info: info-recursive + +info-am: + +install-data-am: + +install-dvi: install-dvi-recursive + +install-dvi-am: + +install-exec-am: install-binPROGRAMS + +install-html: install-html-recursive + +install-html-am: + +install-info: install-info-recursive + +install-info-am: + +install-man: + +install-pdf: install-pdf-recursive + +install-pdf-am: + +install-ps: install-ps-recursive + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-recursive + -rm -f ./$(DEPDIR)/starpu_replay.Po + -rm -f ./$(DEPDIR)/starpu_replay_sched.Po + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-recursive + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-recursive + +pdf-am: + +ps: ps-recursive + +ps-am: + +uninstall-am: uninstall-binPROGRAMS + +.MAKE: $(am__recursive_targets) install-am install-strip + +.PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am \ + am--depfiles check check-am clean clean-binPROGRAMS \ + clean-generic clean-libtool cscopelist-am ctags ctags-am \ + distclean distclean-compile distclean-generic \ + distclean-libtool distclean-tags distdir dvi dvi-am html \ + html-am info info-am install install-am install-binPROGRAMS \ + install-data install-data-am install-dvi install-dvi-am \ + install-exec install-exec-am install-html install-html-am \ + install-info install-info-am install-man install-pdf \ + install-pdf-am install-ps install-ps-am install-strip \ + installcheck installcheck-am installdirs installdirs-am \ + maintainer-clean maintainer-clean-generic mostlyclean \ + mostlyclean-compile mostlyclean-generic mostlyclean-libtool \ + pdf pdf-am ps ps-am tags tags-am uninstall uninstall-am \ + uninstall-binPROGRAMS + +.PRECIOUS: Makefile + +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@.cu.o: +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ @$(MKDIR_P) `dirname $@` +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ $(V_mynvcc)grep 'extern *"C" *void *' $< | sed -ne 's/extern *"C" *void *\([a-zA-Z0-9_]*\) *(.*/void \1(void) {}/p' | $(CC) -x c - -o $@ -c + +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.cubin: +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) -cubin $< -o $@ $(NVCCFLAGS) + +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.o: +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) $< -c -o $@ $(NVCCFLAGS) +@STARPU_USE_HIP_TRUE@.hip.o: +@STARPU_USE_HIP_TRUE@ $(V_hipcc) $(HIPCC) $< -c -o $@ $(HIPCCFLAGS) + +recheck: + -cat /dev/null + +showcheckfailed: + @-cat /dev/null + +showfailed: + @-cat /dev/null + +showcheck: + -cat /dev/null + +showsuite: + -cat /dev/null + +starpu_replay.c starpu_replay_sched.c: + $(V_ln) $(LN_S) $(top_srcdir)/tools/$(notdir $@) $@ + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/mpi/tools/starpu_replay.c b/mpi/tools/starpu_replay.c new file mode 100644 index 0000000..1e63873 --- /dev/null +++ b/mpi/tools/starpu_replay.c @@ -0,0 +1,1198 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2017-2017 Erwan Leria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * This reads a tasks.rec file and replays the recorded task graph. + * Currently, this version is done to run with simgrid. + * + * For further information, contact erwan.leria@inria.fr + */ + +#include +#include +#include +#include + +#include +#include +#include +#include + + +#define REPLAY_NMAX_DEPENDENCIES 8 + +#define ARRAY_DUP(in, out, n) memcpy(out, in, n * sizeof(*out)) +#define ARRAY_INIT(array, n) memset(array, 0, n * sizeof(*array)) + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * Declarations of global variables, structures, pointers, ... * + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +static int static_workerid; + +/* TODO: move to core header while moving starpu_replay_sched to core */ +extern void schedRecInit(const char * filename); +extern void applySchedRec(struct starpu_task * starpu_task, long submit_order); + +/* Enum for normal and "wontuse" tasks */ +enum task_type {NormalTask, WontUseTask}; + +typedef unsigned long jobid_t; + +enum task_type control; +static char *name = NULL; +static char *model = NULL; +static jobid_t jobid; +static jobid_t *dependson; +static long submitorder = -1; +static starpu_tag_t tag; +static int workerid; +static uint32_t footprint; +static double flops, total_flops = 0.; + +static double startTime; //start time (The instant when the task starts) +static double endTime; //end time (The instant when the task ends) + +static int iteration = -1; + +static starpu_data_handle_t handles[STARPU_NMAXBUFS]; +static enum starpu_data_access_mode modes[STARPU_NMAXBUFS]; +static char normal_reg_signal[STARPU_NMAXBUFS]; + +/* Use the following arrays when the number of data is greater than STARPU_NMAXBUFS */ + +starpu_data_handle_t * handles_ptr; +enum starpu_data_access_mode * modes_ptr; +size_t * sizes_set; + +static size_t dependson_size; +static size_t ndependson; + +static unsigned nb_parameters = 0; /* Number of parameters */ +static int alloc_mode; /* If alloc_mode value is 1, then the handles are stored in dyn_handles, else they are in handles */ + +static int priority = 0; + +char * reg_signal = NULL; /* The register signal (0 or 1 coded on 8 bit) is used to know which handle of the task has to be registered in StarPU (in fact to avoid handle twice)*/ + +/* Record all tasks, hashed by jobid. */ +static struct task +{ + struct starpu_rbtree_node node; + UT_hash_handle hh; + jobid_t jobid; + int iteration; + long submit_order; + jobid_t *deps; + size_t ndependson; + struct starpu_task task; + enum task_type type; + int reg_signal; +} *tasks; + +/* Record handles */ +static struct handle +{ + UT_hash_handle hh; + starpu_data_handle_t mem_ptr; /* This value should be the registered handle */ + starpu_data_handle_t handle; /* The key is the original value of the handle in the file */ +} * handles_hash; + +/* Record models */ + +static struct perfmodel +{ + UT_hash_handle hh; + struct starpu_perfmodel perfmodel; + char * model_name; +} * model_hash; + + + +/* + * Replay data interface + * We don't care about many things anyway, essentially only sizes. + */ + +struct replay_interface +{ + enum starpu_data_interface_id id; + starpu_data_handle_t orig_handle; + size_t size; + size_t alloc_size; + size_t max_size; +}; + +static struct starpu_data_interface_ops replay_interface_ops; +static void register_replay(starpu_data_handle_t handle, int home_node, void *data_interface) +{ + (void) home_node; + struct replay_interface *replay_interface = data_interface; + unsigned node; + for (node = 0; node < STARPU_MAXNODES; node++) + { + struct replay_interface *local_interface = + starpu_data_get_interface_on_node(handle, node); + + local_interface->id = replay_interface->id; + local_interface->orig_handle = replay_interface->orig_handle; + local_interface->size = replay_interface->size; + local_interface->alloc_size = replay_interface->alloc_size; + local_interface->max_size = replay_interface->max_size; + } +} + +static void replay_data_register(starpu_data_handle_t *handleptr, starpu_data_handle_t orig_handle, int home_node, size_t size, size_t alloc_size, size_t max_size) +{ + struct replay_interface interface = + { + .id = replay_interface_ops.interfaceid, + .orig_handle = orig_handle, + .size = size, + .alloc_size = alloc_size, + .max_size = max_size, + }; + + starpu_data_register(handleptr, home_node, &interface, &replay_interface_ops); +} + +static size_t replay_get_size(starpu_data_handle_t handle) +{ + struct replay_interface *interface = + starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + return interface->size; +} + +static size_t replay_get_alloc_size(starpu_data_handle_t handle) +{ + struct replay_interface *interface = + starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + return interface->alloc_size; +} + +static size_t replay_get_max_size(starpu_data_handle_t handle) +{ + struct replay_interface *interface = + starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + return interface->max_size; +} + +static uint32_t replay_footprint(starpu_data_handle_t handle) +{ + return starpu_hash_crc32c_be(replay_get_size(handle), 0); +} + +static int replay_compare(void *data_interface_a, void *data_interface_b) +{ + struct replay_interface *replay_a = data_interface_a; + struct replay_interface *replay_b = data_interface_b; + + /* Two variables are considered compatible if they have the same size */ + return replay_a->size == replay_b->size; +} + +static void display_replay(starpu_data_handle_t handle, FILE *f) +{ + struct replay_interface *replay_interface = + starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + + fprintf(f, "%lu/%lu/%lu\t", + (unsigned long) replay_interface->size, + (unsigned long) replay_interface->alloc_size, + (unsigned long) replay_interface->max_size); +} + +static starpu_ssize_t describe_replay(void *data_interface, char *buf, size_t size) +{ + struct replay_interface *replay_interface = data_interface; + return snprintf(buf, size, "r%lu/%lu/%lu\t", + (unsigned long) replay_interface->size, + (unsigned long) replay_interface->alloc_size, + (unsigned long) replay_interface->max_size); +} + +static starpu_ssize_t allocate_replay_on_node(void *data_interface, unsigned dst_node) +{ + struct replay_interface *replay_interface = data_interface; + starpu_memory_allocate(dst_node, replay_interface->alloc_size, STARPU_MEMORY_OVERFLOW); + return 0; +} + +static void free_replay_on_node(void *data_interface, unsigned dst_node) +{ + struct replay_interface *replay_interface = data_interface; + starpu_memory_deallocate(dst_node, replay_interface->alloc_size); +} + +static int replay_copy(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *async_data) +{ + (void) dst_interface; + struct replay_interface *src = src_interface; + + /* We don't care about pointers */ + return starpu_interface_copy(1, 0, src_node, 1, 0, dst_node, src->size, async_data); +} + +static const struct starpu_data_copy_methods replay_copy_data_methods = +{ + .any_to_any = replay_copy, +}; + +static struct starpu_data_interface_ops replay_interface_ops = +{ + .register_data_handle = register_replay, + .allocate_data_on_node = allocate_replay_on_node, + .free_data_on_node = free_replay_on_node, + .copy_methods = &replay_copy_data_methods, + .get_size = replay_get_size, + .get_alloc_size = replay_get_alloc_size, + .get_max_size = replay_get_max_size, + .footprint = replay_footprint, + .compare = replay_compare, + .interfaceid = STARPU_UNKNOWN_INTERFACE_ID, + .interface_size = sizeof(struct replay_interface), + .display = display_replay, + .pack_data = NULL, + .peek_data = NULL, + .unpack_data = NULL, + .describe = describe_replay, + + /* We want to observe actual allocations/deallocations */ + .dontcache = 1, +}; + + +/* [SUBMITORDER] The tree of the submit order */ + +static struct starpu_rbtree tree = STARPU_RBTREE_INITIALIZER; + +/* the cmp_fn arg for rb_tree_insert() */ +unsigned int diff(struct starpu_rbtree_node * left_elm, struct starpu_rbtree_node * right_elm) +{ + long oleft = ((struct task *) left_elm)->submit_order; + long oright = ((struct task *) right_elm)->submit_order; + if (oleft == -1 && oright == -1) + { + if (left_elm < right_elm) + return -1; + else + return 1; + } + return oleft - oright; +} + +/* Settings for the perfmodel */ +struct task_arg +{ + uint32_t footprint; + unsigned narch; + double perf[]; +}; + +uint32_t get_footprint(struct starpu_task * task) +{ + return ((struct task_arg*) (task->cl_arg))->footprint; +} + +double arch_cost_function(struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned nimpl) +{ + int device = starpu_perfmodel_arch_comb_get(arch->ndevices, arch->devices); + STARPU_ASSERT(device != -1); + (void) nimpl; + + /* Then, get the pointer to the value of the expected time */ + struct task_arg *arg = task->cl_arg; + if (device < (int) arg->narch) + { + double val = arg->perf[device]; + + if (!(val == 0 || isnan(val))) + return val; + } + + fprintf(stderr, "[starpu] Error, expected_time is 0 or lower (replay.c line : %d)", __LINE__- 6); + + return 0.0; +} + +/* End of settings */ + +static unsigned long nexecuted_tasks; +void dumb_kernel(void *buffers[], void *args) +{ + (void) buffers; + (void) args; + nexecuted_tasks++; + if (!(nexecuted_tasks % 1000)) + { + fprintf(stderr, "\rExecuted task %lu...", nexecuted_tasks); + fflush(stdout); + } + + unsigned this_worker = starpu_worker_get_id_check(); + struct starpu_perfmodel_arch *perf_arch = starpu_worker_get_perf_archtype(this_worker, STARPU_NMAX_SCHED_CTXS); + + struct starpu_task *task = starpu_task_get_current(); + unsigned impl = starpu_task_get_implementation(task); + + double length = starpu_task_expected_length(task, perf_arch, impl); + + STARPU_ASSERT_MSG(!_STARPU_IS_ZERO(length) && !isnan(length), + "Codelet %s does not have a perfmodel, or is not calibrated enough, please re-run in non-simgrid mode until it is calibrated", + starpu_task_get_name(task)); + + starpu_sleep(length / 1000000); +} + +/* [CODELET] Initialization of an unique codelet for all the tasks*/ +static int can_execute(unsigned worker_id, struct starpu_task *task, unsigned nimpl) +{ + struct starpu_perfmodel_arch * arch = starpu_worker_get_perf_archtype(worker_id, STARPU_NMAX_SCHED_CTXS); + int device = starpu_perfmodel_arch_comb_get(arch->ndevices, arch->devices); + if (device == -1) + /* Doesn't exist yet, thus unknown, assuming it can not work there. */ + return 0; + (void) nimpl; + + /* Then, get the pointer to the value of the expected time */ + struct task_arg *arg = task->cl_arg; + if (device < (int) arg->narch) + { + double val = arg->perf[device]; + + if (!(val == 0 || isnan(val))) + return 1; + } + + return 0; +} + +static struct starpu_perfmodel myperfmodel = +{ + .type = STARPU_PER_ARCH, + .arch_cost_function = arch_cost_function, + .footprint = get_footprint, +}; + +static struct starpu_codelet cl = +{ + .cpu_funcs = { dumb_kernel }, + .cpu_funcs_name = { "dumb_kernel" }, + .cuda_funcs = { dumb_kernel }, + .opencl_funcs = { dumb_kernel }, + .nbuffers = STARPU_VARIABLE_NBUFFERS, + .can_execute = can_execute, + .model = &myperfmodel, + .flags = STARPU_CODELET_SIMGRID_EXECUTE, +}; + + +/* * * * * * * * * * * * * * +* * * * * Functions * * * * * +* * * * * * * * * * * * * * */ + + +/* The following function checks if the program has to use static or dynamic arrays*/ +static int set_alloc_mode(int total_parameters) +{ + return total_parameters <= STARPU_NMAXBUFS; +} + +/* According to the allocation mode, modify handles_ptr and modes_ptr in static or dynamic */ +static void arrays_managing(int mode) +{ + if (mode) + { + handles_ptr = &handles[0]; + modes_ptr = &modes[0]; + reg_signal = &normal_reg_signal[0]; + } + else + { + _STARPU_MALLOC(handles_ptr, sizeof(*handles_ptr) * nb_parameters); + _STARPU_MALLOC(modes_ptr, sizeof(*modes_ptr) * nb_parameters); + _STARPU_CALLOC(reg_signal, nb_parameters, sizeof(char)); + + } +} + +static unsigned count_number_tokens(const char* buffer, const char* delim) +{ + char* dup = strdup(buffer); + int result = 0; + char* token = strtok(dup, delim); + while(token != NULL) + { + ++result; + token = strtok(NULL, delim); + } + free(dup); + return result; +} + +/* Check if a handle hasn't been registered yet */ +static void variable_data_register_check(size_t * array_of_size, int nb_handles) +{ + int h, i; + starpu_data_handle_t orig_handles[nb_handles]; + + ARRAY_DUP(handles_ptr, orig_handles, nb_handles); + + for (h = 0 ; h < nb_handles ; h++) + { + if(reg_signal[h]) /* Get the register signal, if it's 1 do ... */ + { + struct handle * handles_cell; + + for (i = 0; i < h; i++) + { + /* Maybe we just registered it in this very h loop */ + if (handles_ptr[h] == orig_handles[i]) + { + handles_ptr[h] = handles_ptr[i]; + break; + } + } + + if (i == h) + { + _STARPU_MALLOC(handles_cell, sizeof(*handles_cell)); + STARPU_ASSERT(handles_cell != NULL); + + handles_cell->handle = handles_ptr[h]; /* Get the hidden key (initial handle from the file) to store it as a key*/ + + replay_data_register(handles_ptr+h, handles_ptr[h], + modes_ptr[h] & STARPU_R ? STARPU_MAIN_RAM : -1, + array_of_size[h], array_of_size[h], array_of_size[h]); + + handles_cell->mem_ptr = handles_ptr[h]; /* Store the new value of the handle into the hash table */ + + HASH_ADD(hh, handles_hash, handle, sizeof(handles_ptr[h]), handles_cell); + } + } + } +} + +void reset(void) +{ + control = NormalTask; + + if (name != NULL) + { + free(name); + name = NULL; + } + + if (model != NULL) + { + free(model); + model = NULL; + } + + if (sizes_set != NULL) + { + free(sizes_set); + sizes_set = NULL; + } + + if (reg_signal != NULL) + { + if (!alloc_mode) + { + free(reg_signal); + reg_signal = NULL; + } + else + { + ARRAY_INIT(reg_signal, nb_parameters); + } + } + + jobid = 0; + ndependson = 0; + tag = -1; + workerid = -1; + footprint = 0; + startTime = 0.0; + endTime = 0.0; + + if (submitorder != -1) + submitorder = -1; + + iteration = -1; + nb_parameters = 0; + alloc_mode = 1; +} + +void fix_wontuse_handle(struct task * wontuseTask) +{ + STARPU_ASSERT(wontuseTask); + + if (!wontuseTask->reg_signal) + /* Data was already registered when we created this task, so it's already a handle */ + return; + + struct handle *handle_tmp; + + /* Data was not registered when we created this task, so this is the application pointer, look it up now */ + HASH_FIND(hh, handles_hash, &wontuseTask->task.handles[0], sizeof(wontuseTask->task.handles[0]), handle_tmp); + + if (handle_tmp) + wontuseTask->task.handles[0] = handle_tmp->mem_ptr; + else + /* This data wasn't actually used, don't care about it */ + wontuseTask->task.handles[0] = NULL; +} + +/* Function that submits all the tasks (used when the program reaches EOF) */ +int submit_tasks(void) +{ + /* Add dependencies */ + + const struct starpu_rbtree * tmptree = &tree; + struct starpu_rbtree_node * currentNode = starpu_rbtree_first(tmptree); + long last_submitorder = 0; + + while (currentNode != NULL) + { + struct task * currentTask = (struct task *) currentNode; + + if (currentTask->type == NormalTask) + { + if (currentTask->submit_order != -1) + { + STARPU_ASSERT(currentTask->submit_order >= last_submitorder + 1); + + while (currentTask->submit_order > last_submitorder + 1) + { + /* Oops, some tasks were not submitted by original application, fake some */ + struct starpu_task *task = starpu_task_create(); + int ret; + task->cl = NULL; + task->name = "fake task for submit order"; + ret = starpu_task_submit(task); + STARPU_ASSERT(ret == 0); + last_submitorder++; + } + } + + if (currentTask->ndependson > 0) + { + struct starpu_task * taskdeps[currentTask->ndependson]; + unsigned i, j = 0; + + for (i = 0; i < currentTask->ndependson; i++) + { + struct task * taskdep; + + /* Get the ith jobid of deps_jobid */ + HASH_FIND(hh, tasks, ¤tTask->deps[i], sizeof(jobid), taskdep); + + if(taskdep) + { + taskdeps[j] = &taskdep->task; + j ++; + } + } + + starpu_task_declare_deps_array(¤tTask->task, j, taskdeps); + } + + if (!(currentTask->iteration == -1)) + starpu_iteration_push(currentTask->iteration); + + applySchedRec(¤tTask->task, currentTask->submit_order); + if (currentTask->submit_order == -1) + currentTask->task.no_submitorder = 1; + int ret_val = starpu_task_submit(¤tTask->task); + + if (!(currentTask->iteration == -1)) + starpu_iteration_pop(); + + if (ret_val != 0) + { + fprintf(stderr, "\nWhile submitting task %ld (%s): return %d\n", + currentTask->submit_order, + currentTask->task.name? currentTask->task.name : "unknown", + ret_val); + return -1; + } + + + //fprintf(stderr, "submitting task %s (%lu, %llu)\n", currentTask->task.name?currentTask->task.name:"anonymous", currentTask->jobid, (unsigned long long) currentTask->task.tag_id); + if (!(currentTask->submit_order % 1000)) + { + fprintf(stderr, "\rSubmitted task order %ld...", currentTask->submit_order); + fflush(stdout); + } + if (currentTask->submit_order != -1) + last_submitorder++; + } + + else + { + fix_wontuse_handle(currentTask); /* Add the handle in the wontuse task */ + if (currentTask->task.handles[0]) + { + starpu_data_wont_use(currentTask->task.handles[0]); + last_submitorder++; + } + } + + currentNode = starpu_rbtree_next(currentNode); + + } + fprintf(stderr, " done.\n"); + + return 1; +} + + +/* * * * * * * * * * * * * * * */ +/* * * * * * MAIN * * * * * * */ +/* * * * * * * * * * * * * * */ + +static void usage(const char *program) +{ + fprintf(stderr,"Usage: %s [--static-workerid] tasks.rec [sched.rec]\n", program); + exit(EXIT_FAILURE); +} + +int main(int argc, char **argv) +{ + FILE *rec; + char *s; + const char *tasks_rec = NULL; + const char *sched_rec = NULL; + unsigned i; + size_t s_allocated = 128; + + unsigned long nread_tasks = 0; + + /* FIXME: we do not support data with sequential consistency disabled */ + + _STARPU_MALLOC(s, s_allocated); + dependson_size = REPLAY_NMAX_DEPENDENCIES; /* Change the value of REPLAY_NMAX_DEPENCIES to modify the number of dependencies */ + _STARPU_MALLOC(dependson, dependson_size * sizeof (* dependson)); + alloc_mode = 1; + + for (i = 1; i < (unsigned) argc; i++) + { + if (!strcmp(argv[i], "--help") || !strcmp(argv[i], "-h")) + { + usage(argv[0]); + } + else if (!strcmp(argv[i], "--static-workerid")) + { + static_workerid = 1; + } + else + { + if (!tasks_rec) + tasks_rec = argv[i]; + else if (!sched_rec) + sched_rec = argv[i]; + else + usage(argv[0]); + } + } + + if (!tasks_rec) + usage(argv[0]); + + if (sched_rec) + schedRecInit(sched_rec); + + rec = fopen(tasks_rec, "r"); + if (!rec) + { + fprintf(stderr,"unable to open file %s: %s\n", tasks_rec, strerror(errno)); + exit(EXIT_FAILURE); + } + + int ret = starpu_init(NULL); + if (ret == -ENODEV) goto enodev; + + /* Read line by line, and on empty line submit the task with the accumulated information */ + reset(); + + double start = starpu_timing_now(); + int linenum = 0; + + while(1) + { + char *ln; + + if (!fgets(s, s_allocated, rec)) + { + fprintf(stderr, " done.\n"); + int submitted = submit_tasks(); + + if (submitted == -1) + { + goto enodev; + } + + goto eof; + } + + while (!(ln = strchr(s, '\n'))) + { + /* fprintf(stderr,"buffer size %d too small, doubling it\n", s_allocated); */ + _STARPU_REALLOC(s, s_allocated * 2); + + if (!fgets(s + s_allocated-1, s_allocated+1, rec)) + { + fprintf(stderr, "\n"); + int submitted = submit_tasks(); + + if (submitted == -1) + { + goto enodev; + } + + goto eof; + } + + s_allocated *= 2; + } + + linenum++; + + if (ln == s) + { + /* Empty line, do task */ + + struct task * task; + _STARPU_MALLOC(task, sizeof(*task)); + + starpu_task_init(&task->task); + task->deps = NULL; + + task->submit_order = submitorder; + + starpu_rbtree_node_init(&task->node); + starpu_rbtree_insert(&tree, &task->node, diff); + + + task->jobid = jobid; + task->iteration = iteration; + + if (name != NULL) + task->task.name = strdup(name); + + task->type = control; + + if (control == NormalTask) + { + if (workerid >= 0) + { + task->task.priority = priority; + task->task.cl = &cl; + if (static_workerid) + { + task->task.workerid = workerid; + task->task.execute_on_a_specific_worker = 1; + } + + if (alloc_mode) + { + /* Duplicating the handles stored (and registered in the current context) into the task */ + + ARRAY_DUP(modes_ptr, task->task.modes, nb_parameters); + ARRAY_DUP(modes_ptr, task->task.cl->modes, nb_parameters); + variable_data_register_check(sizes_set, nb_parameters); + ARRAY_DUP(handles_ptr, task->task.handles, nb_parameters); + } + else + { + task->task.dyn_modes = modes_ptr; + _STARPU_MALLOC(task->task.cl->dyn_modes, (sizeof(*task->task.cl->dyn_modes) * nb_parameters)); + ARRAY_DUP(modes_ptr, task->task.cl->dyn_modes, nb_parameters); + variable_data_register_check(sizes_set, nb_parameters); + task->task.dyn_handles = handles_ptr; + } + + task->task.nbuffers = nb_parameters; + + struct perfmodel * realmodel; + + HASH_FIND_STR(model_hash, model, realmodel); + + if (realmodel == NULL) + { + int len = strlen(model); + _STARPU_CALLOC(realmodel, 1, sizeof(struct perfmodel)); + + _STARPU_MALLOC(realmodel->model_name, sizeof(char) * (len+1)); + realmodel->model_name = strcpy(realmodel->model_name, model); + + starpu_perfmodel_init(&realmodel->perfmodel); + + int error = starpu_perfmodel_load_symbol(model, &realmodel->perfmodel); + + if (!error) + { + HASH_ADD_STR(model_hash, model_name, realmodel); + } + else + { + + fprintf(stderr, "[starpu][Warning] Error loading perfmodel symbol %s\n", model); + fprintf(stderr, "[starpu][Warning] Taking only measurements from the given execution, and forcing execution on worker %d\n", workerid); + starpu_perfmodel_unload_model(&realmodel->perfmodel); + free(realmodel->model_name); + free(realmodel); + realmodel = NULL; + } + + } + + struct starpu_perfmodel_arch *arch = starpu_worker_get_perf_archtype(workerid, 0); + + unsigned comb = starpu_perfmodel_arch_comb_add(arch->ndevices, arch->devices); + unsigned narch = starpu_perfmodel_get_narch_combs(); + + struct task_arg *arg; + _STARPU_MALLOC(arg, sizeof(struct task_arg) + sizeof(double) * narch); + arg->footprint = footprint; + arg->narch = narch; + double * perfTime = arg->perf; + + if (realmodel == NULL) + { + /* Erf, do without perfmodel, for execution there */ + task->task.workerid = workerid; + task->task.execute_on_a_specific_worker = 1; + for (i = 0; i < narch ; i++) + { + if (i == comb) + perfTime[i] = endTime - startTime; + else + perfTime[i] = NAN; + } + } + else + { + int one = 0; + for (i = 0; i < narch ; i++) + { + arch = starpu_perfmodel_arch_comb_fetch(i); + perfTime[i] = starpu_perfmodel_history_based_expected_perf(&realmodel->perfmodel, arch, footprint); + if (!(perfTime[i] == 0 || isnan(perfTime[i]))) + one = 1; + } + if (!one) + { + fprintf(stderr, "We do not have any performance measurement for symbol '%s' for footprint %x, we can not execute this", model, footprint); + exit(EXIT_FAILURE); + } + } + + task->task.cl_arg = arg; + task->task.flops = flops; + total_flops += flops; + } + + task->task.cl_arg_size = 0; + task->task.tag_id = tag; + task->task.use_tag = 1; + + task->ndependson = ndependson; + if (ndependson > 0) + { + _STARPU_MALLOC(task->deps, ndependson * sizeof (* task->deps)); + ARRAY_DUP(dependson, task->deps, ndependson); + } + } + + else + { + STARPU_ASSERT(nb_parameters == 1); + task->reg_signal = reg_signal[0]; + ARRAY_DUP(handles_ptr, task->task.handles, nb_parameters); + } + + /* Add this task to task hash */ + HASH_ADD(hh, tasks, jobid, sizeof(jobid), task); + + nread_tasks++; + if (!(nread_tasks % 1000)) + { + fprintf(stderr, "\rRead task %lu...", nread_tasks); + fflush(stdout); + } + + reset(); + } + + /* Record various information */ +#define TEST(field) (!strncmp(s, field": ", strlen(field) + 2)) + + else if(TEST("Control")) + { + char * c = s+9; + + if(!strncmp(c, "WontUse", 7)) + { + control = WontUseTask; + nb_parameters = 1; + alloc_mode = set_alloc_mode(nb_parameters); + arrays_managing(alloc_mode); + } + else + control = NormalTask; + } + else if (TEST("Name")) + { + *ln = 0; + name = strdup(s+6); + } + else if (TEST("Model")) + { + *ln = 0; + model = strdup(s+7); + } + else if (TEST("JobId")) + jobid = atol(s+7); + else if(TEST("SubmitOrder")) + submitorder = atoi(s+13); + else if (TEST("DependsOn")) + { + char *c = s + 11; + + for (ndependson = 0; *c != '\n'; ndependson++) + { + if (ndependson >= dependson_size) + { + dependson_size *= 2; + _STARPU_REALLOC(dependson, dependson_size * sizeof(*dependson)); + } + dependson[ndependson] = strtol(c, &c, 10); + } + } + else if (TEST("Tag")) + { + tag = strtol(s+5, NULL, 16); + } + else if (TEST("WorkerId")) + { + workerid = atoi(s+10); + } + else if (TEST("Footprint")) + { + footprint = strtoul(s+11, NULL, 16); + } + else if (TEST("Parameters")) + { + /* Nothing to do */ + } + else if (TEST("Handles")) + { + *ln = 0; + char *buffer = s + 9; + const char *delim = " "; + unsigned nb_parameters_line = count_number_tokens(buffer, delim); + + if(nb_parameters == 0) + { + nb_parameters = nb_parameters_line; + arrays_managing(set_alloc_mode(nb_parameters)); + } + else + STARPU_ASSERT(nb_parameters == nb_parameters_line); + + char* token = strtok(buffer, delim); + for (i = 0 ; i < nb_parameters ; i++) + { + STARPU_ASSERT(token); + struct handle *handles_cell; /* A cell of the hash table for the handles */ + starpu_data_handle_t handle_value = (starpu_data_handle_t) strtol(token, NULL, 16); /* Get the ith handle on the line (in the file) */ + + HASH_FIND(hh, handles_hash, &handle_value, sizeof(handle_value), handles_cell); /* Find if the handle_value was already registered as a key in the hash table */ + + /* If it wasn't, then add it to the hash table */ + if (handles_cell == NULL) + { + /* Hide the initial handle from the file into the handles array to find it when necessary */ + handles_ptr[i] = handle_value; + reg_signal[i] = 1; + } + else + { + handles_ptr[i] = handles_cell->mem_ptr; + reg_signal[i] = 0; + } + + token = strtok(NULL, delim); + } + } + else if (TEST("Modes")) + { + *ln = 0; + char * buffer = s + 7; + unsigned mode_i = 0; + const char * delim = " "; + unsigned nb_parameters_line = count_number_tokens(buffer, delim); + + if(nb_parameters == 0) + { + nb_parameters = nb_parameters_line; + arrays_managing(set_alloc_mode(nb_parameters)); + } + else + STARPU_ASSERT(nb_parameters == nb_parameters_line); + + char* token = strtok(buffer, delim); + + while (token != NULL && mode_i < nb_parameters) + { + /* Subject to the names of starpu modes enumerator are not modified */ + if (!strncmp(token, "RW", 2)) + { + *(modes_ptr+mode_i) = STARPU_RW; + mode_i++; + } + else if (!strncmp(token, "R", 1)) + { + *(modes_ptr+mode_i) = STARPU_R; + mode_i++; + } + else if (!strncmp(token, "W", 1)) + { + *(modes_ptr+mode_i) = STARPU_W; + mode_i++; + } + /* Other cases produce a warning*/ + else + { + fprintf(stderr, "[Warning] A mode is different from R/W (jobid task : %lu)", jobid); + } + token = strtok(NULL, delim); + } + } + else if (TEST("Sizes")) + { + *ln = 0; + char * buffer = s + 7; + const char * delim = " "; + unsigned nb_parameters_line = count_number_tokens(buffer, delim); + unsigned k = 0; + + if(nb_parameters == 0) + { + nb_parameters = nb_parameters_line; + arrays_managing(set_alloc_mode(nb_parameters)); + } + else + STARPU_ASSERT(nb_parameters == nb_parameters_line); + + _STARPU_MALLOC(sizes_set, nb_parameters * sizeof(size_t)); + + char * token = strtok(buffer, delim); + while (token != NULL && k < nb_parameters) + { + sizes_set[k] = strtol(token, NULL, 10); + token = strtok(NULL, delim); + + k++; + } + } + else if (TEST("StartTime")) + { + startTime = strtod(s+11, NULL); + } + else if (TEST("EndTime")) + { + endTime = strtod(s+9, NULL); + } + else if (TEST("GFlop")) + { + flops = 1000000000 * strtod(s+7, NULL); + } + else if (TEST("Iteration")) + { + iteration = (unsigned) strtol(s+11, NULL, 10); + } + else if (TEST("Priority")) + { + priority = strtol(s + 10, NULL, 10); + } + } + +eof: + + starpu_task_wait_for_all(); + fprintf(stderr, " done.\n"); + + printf("%g ms", (starpu_timing_now() - start) / 1000.); + if (total_flops != 0.) + printf("\t%g GF/s", (total_flops / (starpu_timing_now() - start)) / 1000.); + printf("\n"); + + /* FREE allocated memory */ + + free(dependson); + free(s); + + /* End of FREE */ + + struct handle *handle=NULL, *handletmp=NULL; + HASH_ITER(hh, handles_hash, handle, handletmp) + { + starpu_data_unregister(handle->mem_ptr); + HASH_DEL(handles_hash, handle); + free(handle); + } + + struct perfmodel *model_s=NULL, *modeltmp=NULL; + HASH_ITER(hh, model_hash, model_s, modeltmp) + { + starpu_perfmodel_unload_model(&model_s->perfmodel); + HASH_DEL(model_hash, model_s); + free(model_s->model_name); + free(model_s); + } + + struct task *task=NULL, *tasktmp=NULL; + HASH_ITER(hh, tasks, task, tasktmp) + { + free(task->task.cl_arg); + free((char*)task->task.name); + + if (task->task.dyn_handles != NULL) + { + free(task->task.dyn_handles); + free(task->task.dyn_modes); + } + + HASH_DEL(tasks, task); + starpu_task_clean(&task->task); + free(task->deps); + starpu_rbtree_remove(&tree, &task->node); + free(task); + } + + starpu_shutdown(); + return 0; + +enodev: + starpu_shutdown(); + return 77; +} diff --git a/mpi/tools/starpu_replay_sched.c b/mpi/tools/starpu_replay_sched.c new file mode 100644 index 0000000..3b5c674 --- /dev/null +++ b/mpi/tools/starpu_replay_sched.c @@ -0,0 +1,439 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2017-2017 Erwan Leria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * This reads a sched.rec file and mangles submitted tasks according to the hint + * from that file. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +// +// sched.rec files look like this: +// +// SubmitOrder: 1234 +// Priority: 12 +// SpecificWorker: 1 +// Workers: 0 1 2 +// DependsOn: 1235 +// +// Prefetch: 1234 +// DependsOn: 1233 +// MemoryNode: 1 +// Parameters: 1 + +#define CPY(src, dst, n) memcpy(dst, src, n * sizeof(*dst)) + +#if 0 +#define debug(fmt, ...) fprintf(stderr, fmt, ##__VA_ARGS__) +#else +#define debug(fmt, ...) (void)0 +#endif + +static unsigned long submitorder; /* Also use as prefetchtag */ +static int priority; +static int eosw; +static unsigned workerorder; +static int memnode; +/* FIXME: MAXs */ +static uint32_t workers[STARPU_NMAXWORKERS/32]; +static unsigned nworkers; +static unsigned dependson[STARPU_NMAXBUFS]; +static unsigned ndependson; +static unsigned params[STARPU_NMAXBUFS]; +static unsigned nparams; + +static enum sched_type +{ + NormalTask, + PrefetchTask, +} sched_type; + +static struct starpu_codelet cl_prefetch = +{ + .where = STARPU_NOWHERE, + .nbuffers = 1, + .modes = { STARPU_R }, +}; + +static struct task +{ + UT_hash_handle hh; + + unsigned long submitorder; + int priority; + int memnode; + unsigned dependson[STARPU_NMAXBUFS]; + unsigned ndependson; + struct starpu_task *depends_tasks[STARPU_NMAXBUFS]; + + /* For real tasks */ + int eosw; + unsigned workerorder; + uint32_t workers[STARPU_NMAXWORKERS/32]; + unsigned nworkers; + + /* For prefetch tasks */ + unsigned params[STARPU_NMAXBUFS]; + unsigned nparams; + struct starpu_task *pref_task; /* Actual prefetch task */ +} *mangled_tasks, *prefetch_tasks; + +LIST_TYPE(dep, + struct task *task; + unsigned i; +); + +struct deps +{ + UT_hash_handle hh; + unsigned long submitorder; + struct dep_list list; +} *dependencies = NULL; + +static void reset(void) +{ + submitorder = 0; + priority = INT_MIN; + eosw = -1; + memset(&workers, 0, sizeof(workers)); + nworkers = 0; + ndependson = 0; + sched_type = NormalTask; + nparams = 0; + memnode = -1; + workerorder = 0; +} + +/* TODO : respecter l'ordre de soumission des tâches SubmitOrder */ + + +static void checkField(char * s) +{ + /* Record various information */ +#define TEST(field) (!strncmp(s, field": ", strlen(field) + 2)) + + if (TEST("SubmitOrder")) + { + s = s + strlen("SubmitOrder: "); + submitorder = strtol(s, NULL, 10); + } + + else if (TEST("Priority")) + { + s = s + strlen("Priority: "); + priority = strtol(s, NULL, 10); + } + + else if (TEST("SpecificWorker")) + { + s = s + strlen("SpecificWorker: "); + eosw = strtol(s, NULL, 10); + } + + else if (TEST("Workers")) + { + s = s + strlen("Workers: "); + char * delim = " "; + char * token = strtok(s, delim); + int i = 0; + + while (token != NULL) + { + int k = strtol(token, NULL, 10); + STARPU_ASSERT_MSG(k < STARPU_NMAXWORKERS, "%d is bigger than maximum %d\n", k, STARPU_NMAXWORKERS); + workers[k/(sizeof(*workers)*8)] |= (1 << (k%(sizeof(*workers)*8))); + i++; + token = strtok(NULL, delim); + } + + nworkers = i; + } + + else if (TEST("DependsOn")) + { + /* NOTE : dependsons (in the sched.rec) should be the submit orders of the dependencies, + otherwise it can occur an undefined behaviour + (contrary to the tasks.rec where dependencies are jobids */ + unsigned i = 0; + char * delim = " "; + char * token = strtok(s+strlen("DependsOn: "), delim); + + while (token != NULL) + { + dependson[i] = strtol(token, NULL, 10); + i++; + token = strtok(NULL, delim); + } + ndependson = i; + } + + else if (TEST("Prefetch")) + { + s = s + strlen("Prefetch: "); + submitorder = strtol(s, NULL, 10); + sched_type = PrefetchTask; + } + + else if (TEST("Parameters")) + { + s = s + strlen("Parameters: "); + char * delim = " "; + char * token = strtok(s, delim); + int i = 0; + + while (token != NULL) + { + params[i] = strtol(token, NULL, 10); + i++; + token = strtok(NULL, delim); + } + nparams = i; + } + + else if (TEST("MemoryNode")) + { + s = s + strlen("MemoryNode: "); + memnode = strtol(s, NULL, 10); + } + + else if (TEST("Workerorder")) + { + s = s + strlen("Workerorder: "); + workerorder = strtol(s, NULL, 10); + } +} + + +void schedRecInit(const char * filename) +{ + FILE * f = fopen(filename, "r"); + + if(f == NULL) + { + fprintf(stderr,"unable to open file %s: %s\n", filename, strerror(errno)); + return; + } + + size_t lnsize = 128; + char *s; + _STARPU_MALLOC(s, sizeof(*s) * lnsize); + int eof = 0; + + reset(); + + while(!eof && !feof(f)) + { + char *ln; + + /* Get the line */ + if (!fgets(s, lnsize, f)) + { + eof = 1; + } + while (!(ln = strchr(s, '\n'))) + { + _STARPU_REALLOC(s, lnsize * 2); + if (!fgets(s + lnsize-1, lnsize+1, f)) + { + eof = 1; + break; + } + lnsize *= 2; + } + + if ((ln == s || eof) && submitorder) + { + /* Empty line, doit */ + struct task * task; + unsigned i; + + _STARPU_MALLOC(task, sizeof(*task)); + task->submitorder = submitorder; + task->priority = priority; + task->memnode = memnode; + CPY(dependson, task->dependson, ndependson); + task->ndependson = ndependson; + + /* Also record submitorder of tasks that this one will need to depend on */ + for (i = 0; i < ndependson; i++) + { + struct dep *dep; + struct starpu_task *starpu_task; + _STARPU_MALLOC(dep, sizeof(*dep)); + dep->task = task; + dep->i = i; + + struct deps *deps; + HASH_FIND(hh, dependencies, &task->dependson[i], sizeof(submitorder), deps); + if (!deps) + { + /* No task depends on this one yet, add a cell for it */ + _STARPU_MALLOC(deps, sizeof(*deps)); + dep_list_init(&deps->list); + deps->submitorder = task->dependson[i]; + HASH_ADD(hh, dependencies, submitorder, sizeof(submitorder), deps); + } + dep_list_push_back(&deps->list, dep); + + /* Create the intermediate task */ + starpu_task = dep->task->depends_tasks[i] = starpu_task_create(); + starpu_task->cl = NULL; + starpu_task->destroy = 0; + starpu_task->no_submitorder = 1; + } + + switch (sched_type) + { + case NormalTask: + /* A new task to mangle, record what needs to be done */ + task->eosw = eosw; + task->workerorder = workerorder; + CPY(workers, task->workers, STARPU_NMAXWORKERS/32); + task->nworkers = nworkers; + STARPU_ASSERT(nparams == 0); + + debug("adding mangled task %lu\n", submitorder); + HASH_ADD(hh, mangled_tasks, submitorder, sizeof(submitorder), task); + break; + + case PrefetchTask: + STARPU_ASSERT(memnode >= 0); + STARPU_ASSERT(eosw == -1); + STARPU_ASSERT(workerorder == 0); + STARPU_ASSERT(nworkers == 0); + CPY(params, task->params, nparams); + task->nparams = nparams; + /* TODO: more params */ + STARPU_ASSERT_MSG(nparams == 1, "only supports one parameter at a time"); + + debug("adding prefetch task for %lu\n", submitorder); + HASH_ADD(hh, prefetch_tasks, submitorder, sizeof(submitorder), task); + break; + default: + STARPU_ASSERT(0); + break; + } + + reset(); + } + else checkField(s); + } + + fclose(f); + + free(s); +} + +static void do_prefetch(void *arg) +{ + unsigned node = (uintptr_t) arg; + starpu_data_idle_prefetch_on_node(starpu_task_get_current()->handles[0], node, 1); +} + +void applySchedRec(struct starpu_task *starpu_task, unsigned long submit_order) +{ + struct task *task; + struct deps *deps; + int ret; + + HASH_FIND(hh, dependencies, &submit_order, sizeof(submit_order), deps); + if (deps) + { + struct dep *dep; + for (dep = dep_list_begin(&deps->list); + dep != dep_list_end(&deps->list); + dep = dep_list_next(dep)) + { + debug("task %lu is %d-th dep for %lu\n", submit_order, dep->i, dep->task->submitorder); + /* Some task will depend on this one, make the dependency */ + starpu_task_declare_deps_array(dep->task->depends_tasks[dep->i], 1, &starpu_task); + ret = starpu_task_submit(dep->task->depends_tasks[dep->i]); + STARPU_ASSERT(ret == 0); + } + } + + HASH_FIND(hh, prefetch_tasks, &submit_order, sizeof(submit_order), task); + if (task) + { + /* We want to submit a prefetch for this task */ + debug("task %lu has a prefetch for parameter %d to node %d\n", submit_order, task->params[0], task->memnode); + struct starpu_task *pref_task; + pref_task = task->pref_task = starpu_task_create(); + pref_task->cl = &cl_prefetch; + pref_task->destroy = 1; + pref_task->no_submitorder = 1; + pref_task->callback_arg = (void*)(uintptr_t) task->memnode; + pref_task->callback_func = do_prefetch; + + /* TODO: more params */ + pref_task->handles[0] = starpu_task->handles[task->params[0]]; + /* Make it depend on intermediate tasks */ + if (task->ndependson) + { + debug("%u dependencies\n", task->ndependson); + starpu_task_declare_deps_array(pref_task, task->ndependson, task->depends_tasks); + } + ret = starpu_task_submit(pref_task); + STARPU_ASSERT(ret == 0); + } + + HASH_FIND(hh, mangled_tasks, &submit_order, sizeof(submit_order), task); + if (task == NULL) + /* Nothing to do for this */ + return; + + debug("mangling task %lu\n", submit_order); + if (task->eosw >= 0) + { + debug("execute on a specific worker %d\n", task->eosw); + starpu_task->workerid = task->eosw; + starpu_task->execute_on_a_specific_worker = 1; + } + if (task->workerorder > 0) + { + debug("workerorder %d\n", task->workerorder); + starpu_task->workerorder = task->workerorder; + } + if (task->priority != INT_MIN) + { + debug("priority %d\n", task->priority); + starpu_task->priority = task->priority; + } + if (task->nworkers) + { + debug("%u workers %x\n", task->nworkers, task->workers[0]); + starpu_task->workerids_len = sizeof(task->workers) / sizeof(task->workers[0]); + _STARPU_MALLOC(starpu_task->workerids, task->nworkers * sizeof(*starpu_task->workerids)); + CPY(task->workers, starpu_task->workerids, STARPU_NMAXWORKERS/32); + } + + if (task->ndependson) + { + debug("%u dependencies\n", task->ndependson); + starpu_task_declare_deps_array(starpu_task, task->ndependson, task->depends_tasks); + } + + /* And now, let it go! */ +} diff --git a/packages/libstarpu.pc.in b/packages/libstarpu.pc.in new file mode 100644 index 0000000..d6b24fc --- /dev/null +++ b/packages/libstarpu.pc.in @@ -0,0 +1,29 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +prefix=@prefix@ +exec_prefix=@exec_prefix@ +libdir=@libdir@ +pkglibdir=@pkglibdir@ +includedir=@includedir@ +starpu_includedir=${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ + +Name: starpu +Description: offers support for heterogeneous multicore architecture +Version: @PACKAGE_VERSION@ +Cflags: -I${starpu_includedir} @STARPU_H_CPPFLAGS@ -DSTARPU_USE_DEPRECATED_API -DSTARPU_USE_DEPRECATED_ONE_ZERO_API +Libs: @STARPU_EXPORT_DYNAMIC@ -L${libdir} -lstarpu-@STARPU_EFFECTIVE_VERSION@ @STARPU_EXPORTED_LIBS@ @STARPU_SC_HYPERVISOR@ +Libs.private: @LDFLAGS@ @LIBS@ @LIBSTARPU_LDFLAGS@ +Requires: @HWLOC_REQUIRES@ diff --git a/packages/starpu-1.0.pc.in b/packages/starpu-1.0.pc.in new file mode 100644 index 0000000..4f7b190 --- /dev/null +++ b/packages/starpu-1.0.pc.in @@ -0,0 +1,29 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +prefix=@prefix@ +exec_prefix=@exec_prefix@ +libdir=@libdir@ +pkglibdir=@pkglibdir@ +includedir=@includedir@ +starpu_includedir=${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ + +Name: starpu +Description: offers support for heterogeneous multicore architecture +Version: @PACKAGE_VERSION@ +Cflags: -I${starpu_includedir} @STARPU_H_CPPFLAGS@ -DSTARPU_USE_DEPRECATED_ONE_ZERO_API +Libs: @STARPU_EXPORT_DYNAMIC@ -L${libdir} -lstarpu-@STARPU_EFFECTIVE_VERSION@ @STARPU_EXPORTED_LIBS@ @STARPU_SC_HYPERVISOR@ +Libs.private: @LDFLAGS@ @LIBS@ @LIBSTARPU_LDFLAGS@ +Requires: @HWLOC_REQUIRES@ diff --git a/packages/starpu-1.1.pc.in b/packages/starpu-1.1.pc.in new file mode 100644 index 0000000..89dc6eb --- /dev/null +++ b/packages/starpu-1.1.pc.in @@ -0,0 +1,29 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +prefix=@prefix@ +exec_prefix=@exec_prefix@ +libdir=@libdir@ +pkglibdir=@pkglibdir@ +includedir=@includedir@ +starpu_includedir=${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ + +Name: starpu +Description: offers support for heterogeneous multicore architecture +Version: @PACKAGE_VERSION@ +Cflags: -I${starpu_includedir} @STARPU_H_CPPFLAGS@ +Libs: @STARPU_EXPORT_DYNAMIC@ -L${libdir} -lstarpu-@STARPU_EFFECTIVE_VERSION@ @STARPU_EXPORTED_LIBS@ @STARPU_SC_HYPERVISOR@ +Libs.private: @LDFLAGS@ @LIBS@ @LIBSTARPU_LDFLAGS@ +Requires: @HWLOC_REQUIRES@ diff --git a/packages/starpu-1.2.pc.in b/packages/starpu-1.2.pc.in new file mode 100644 index 0000000..89dc6eb --- /dev/null +++ b/packages/starpu-1.2.pc.in @@ -0,0 +1,29 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +prefix=@prefix@ +exec_prefix=@exec_prefix@ +libdir=@libdir@ +pkglibdir=@pkglibdir@ +includedir=@includedir@ +starpu_includedir=${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ + +Name: starpu +Description: offers support for heterogeneous multicore architecture +Version: @PACKAGE_VERSION@ +Cflags: -I${starpu_includedir} @STARPU_H_CPPFLAGS@ +Libs: @STARPU_EXPORT_DYNAMIC@ -L${libdir} -lstarpu-@STARPU_EFFECTIVE_VERSION@ @STARPU_EXPORTED_LIBS@ @STARPU_SC_HYPERVISOR@ +Libs.private: @LDFLAGS@ @LIBS@ @LIBSTARPU_LDFLAGS@ +Requires: @HWLOC_REQUIRES@ diff --git a/packages/starpu-1.3.in b/packages/starpu-1.3.in new file mode 100644 index 0000000..ea282b6 --- /dev/null +++ b/packages/starpu-1.3.in @@ -0,0 +1,55 @@ +#%Module +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2022-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +proc ModulesHelp { } +{ + puts stderr "\t[module-info name] - loads the StarPU @STARPU_EFFECTIVE_VERSION@ environment" + puts stderr "\tThe following environment variables are modified:" + puts stderr "\t$PATH" + puts stderr "\t$LD_LIBRARY_PATH" + puts stderr "\t$LIBRARY_PATH" + puts stderr "\t$INCLUDE" + puts stderr "\t$CPATH" + puts stderr "\t$PKG_CONFIG_PATH" + puts stderr "\t$MANPATH" +} + +set prefix @prefix@ + +conflict starpu + +module-whatis "loads the StarPU @STARPU_EFFECTIVE_VERSION@ environment" + +if {![file exists $prefix]} +{ + puts stderr "\t[module-info name] Load Error: $prefix does not exist" + break + exit 1 +} + +set exec_prefix @exec_prefix@ +set libdir @libdir@ +set datarootdir @datarootdir@ + +prepend-path PATH @bindir@ +prepend-path LD_LIBRARY_PATH @libdir@ +prepend-path LIBRARY_PATH @libdir@ +prepend-path INCLUDE @includedir@/starpu/@STARPU_EFFECTIVE_VERSION@ +prepend-path CPATH @includedir@/starpu/@STARPU_EFFECTIVE_VERSION@ +prepend-path PKG_CONFIG_PATH @libdir@/pkgconfig +prepend-path MANPATH @mandir@ +prepend-path PYTHONPATH @libdir@/python@PYTHON_VERSION@/site-packages diff --git a/packages/starpu-1.3.pc.in b/packages/starpu-1.3.pc.in new file mode 100644 index 0000000..89dc6eb --- /dev/null +++ b/packages/starpu-1.3.pc.in @@ -0,0 +1,29 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +prefix=@prefix@ +exec_prefix=@exec_prefix@ +libdir=@libdir@ +pkglibdir=@pkglibdir@ +includedir=@includedir@ +starpu_includedir=${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ + +Name: starpu +Description: offers support for heterogeneous multicore architecture +Version: @PACKAGE_VERSION@ +Cflags: -I${starpu_includedir} @STARPU_H_CPPFLAGS@ +Libs: @STARPU_EXPORT_DYNAMIC@ -L${libdir} -lstarpu-@STARPU_EFFECTIVE_VERSION@ @STARPU_EXPORTED_LIBS@ @STARPU_SC_HYPERVISOR@ +Libs.private: @LDFLAGS@ @LIBS@ @LIBSTARPU_LDFLAGS@ +Requires: @HWLOC_REQUIRES@ diff --git a/packages/starpu-1.4.in b/packages/starpu-1.4.in new file mode 100644 index 0000000..ea282b6 --- /dev/null +++ b/packages/starpu-1.4.in @@ -0,0 +1,55 @@ +#%Module +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2022-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +proc ModulesHelp { } +{ + puts stderr "\t[module-info name] - loads the StarPU @STARPU_EFFECTIVE_VERSION@ environment" + puts stderr "\tThe following environment variables are modified:" + puts stderr "\t$PATH" + puts stderr "\t$LD_LIBRARY_PATH" + puts stderr "\t$LIBRARY_PATH" + puts stderr "\t$INCLUDE" + puts stderr "\t$CPATH" + puts stderr "\t$PKG_CONFIG_PATH" + puts stderr "\t$MANPATH" +} + +set prefix @prefix@ + +conflict starpu + +module-whatis "loads the StarPU @STARPU_EFFECTIVE_VERSION@ environment" + +if {![file exists $prefix]} +{ + puts stderr "\t[module-info name] Load Error: $prefix does not exist" + break + exit 1 +} + +set exec_prefix @exec_prefix@ +set libdir @libdir@ +set datarootdir @datarootdir@ + +prepend-path PATH @bindir@ +prepend-path LD_LIBRARY_PATH @libdir@ +prepend-path LIBRARY_PATH @libdir@ +prepend-path INCLUDE @includedir@/starpu/@STARPU_EFFECTIVE_VERSION@ +prepend-path CPATH @includedir@/starpu/@STARPU_EFFECTIVE_VERSION@ +prepend-path PKG_CONFIG_PATH @libdir@/pkgconfig +prepend-path MANPATH @mandir@ +prepend-path PYTHONPATH @libdir@/python@PYTHON_VERSION@/site-packages diff --git a/packages/starpu-1.4.pc.in b/packages/starpu-1.4.pc.in new file mode 100644 index 0000000..89dc6eb --- /dev/null +++ b/packages/starpu-1.4.pc.in @@ -0,0 +1,29 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +prefix=@prefix@ +exec_prefix=@exec_prefix@ +libdir=@libdir@ +pkglibdir=@pkglibdir@ +includedir=@includedir@ +starpu_includedir=${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ + +Name: starpu +Description: offers support for heterogeneous multicore architecture +Version: @PACKAGE_VERSION@ +Cflags: -I${starpu_includedir} @STARPU_H_CPPFLAGS@ +Libs: @STARPU_EXPORT_DYNAMIC@ -L${libdir} -lstarpu-@STARPU_EFFECTIVE_VERSION@ @STARPU_EXPORTED_LIBS@ @STARPU_SC_HYPERVISOR@ +Libs.private: @LDFLAGS@ @LIBS@ @LIBSTARPU_LDFLAGS@ +Requires: @HWLOC_REQUIRES@ diff --git a/sc_hypervisor/Makefile.am b/sc_hypervisor/Makefile.am new file mode 100644 index 0000000..aa971ac --- /dev/null +++ b/sc_hypervisor/Makefile.am @@ -0,0 +1,27 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +include $(top_srcdir)/make/starpu-subdirtests.mk + +SUBDIRS = src examples + +versincludedir = $(includedir)/starpu/$(STARPU_EFFECTIVE_VERSION) + +versinclude_HEADERS = include/sc_hypervisor.h \ + include/sc_hypervisor_config.h \ + include/sc_hypervisor_monitoring.h \ + include/sc_hypervisor_policy.h \ + include/sc_hypervisor_lp.h diff --git a/sc_hypervisor/Makefile.in b/sc_hypervisor/Makefile.in new file mode 100644 index 0000000..b103a97 --- /dev/null +++ b/sc_hypervisor/Makefile.in @@ -0,0 +1,952 @@ +# Makefile.in generated by automake 1.16.5 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2021 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +target_triplet = @target@ +subdir = sc_hypervisor +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ + $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ + $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ + $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(versinclude_HEADERS) \ + $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/src/common/config.h \ + $(top_builddir)/src/common/config-src-build.h \ + $(top_builddir)/include/starpu_config.h \ + $(top_builddir)/starpurm/include/starpurm_config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +SOURCES = +DIST_SOURCES = +RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \ + ctags-recursive dvi-recursive html-recursive info-recursive \ + install-data-recursive install-dvi-recursive \ + install-exec-recursive install-html-recursive \ + install-info-recursive install-pdf-recursive \ + install-ps-recursive install-recursive installcheck-recursive \ + installdirs-recursive pdf-recursive ps-recursive \ + tags-recursive uninstall-recursive +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__installdirs = "$(DESTDIR)$(versincludedir)" +HEADERS = $(versinclude_HEADERS) +RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ + distclean-recursive maintainer-clean-recursive +am__recursive_targets = \ + $(RECURSIVE_TARGETS) \ + $(RECURSIVE_CLEAN_TARGETS) \ + $(am__extra_recursive_targets) +AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \ + distdir distdir-am +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +DIST_SUBDIRS = $(SUBDIRS) +am__DIST_COMMON = $(srcdir)/Makefile.in \ + $(top_srcdir)/make/starpu-subdirtests.mk +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +am__relativize = \ + dir0=`pwd`; \ + sed_first='s,^\([^/]*\)/.*$$,\1,'; \ + sed_rest='s,^[^/]*/*,,'; \ + sed_last='s,^.*/\([^/]*\)$$,\1,'; \ + sed_butlast='s,/*[^/]*$$,,'; \ + while test -n "$$dir1"; do \ + first=`echo "$$dir1" | sed -e "$$sed_first"`; \ + if test "$$first" != "."; then \ + if test "$$first" = ".."; then \ + dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ + dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ + else \ + first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ + if test "$$first2" = "$$first"; then \ + dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ + else \ + dir2="../$$dir2"; \ + fi; \ + dir0="$$dir0"/"$$first"; \ + fi; \ + fi; \ + dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ + done; \ + reldir="$$dir2" +pkglibdir = @pkglibdir@ +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +APP_CFLAGS = @APP_CFLAGS@ +APP_CXXFLAGS = @APP_CXXFLAGS@ +APP_FCFLAGS = @APP_FCFLAGS@ +APP_FFLAGS = @APP_FFLAGS@ +AR = @AR@ +AS = @AS@ +ATLASDIR = @ATLASDIR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BLAS_LIB = @BLAS_LIB@ +BLAS_LIBS = @BLAS_LIBS@ +BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ +BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CC_OR_MPICC = @CC_OR_MPICC@ +CC_OR_NVCC = @CC_OR_NVCC@ +CFLAGS = @CFLAGS@ +COVERAGE = @COVERAGE@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CSCOPE = @CSCOPE@ +CTAGS = @CTAGS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DGELS_LIBS = @DGELS_LIBS@ +DLB_CFLAGS = @DLB_CFLAGS@ +DLB_LIBS = @DLB_LIBS@ +DLLTOOL = @DLLTOOL@ +DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +ECLIPSE = @ECLIPSE@ +EGREP = @EGREP@ +ETAGS = @ETAGS@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FC = @FC@ +FCFLAGS = @FCFLAGS@ +FFLAGS = @FFLAGS@ +FFTWF_CFLAGS = @FFTWF_CFLAGS@ +FFTWF_LIBS = @FFTWF_LIBS@ +FFTWL_CFLAGS = @FFTWL_CFLAGS@ +FFTWL_LIBS = @FFTWL_LIBS@ +FFTW_CFLAGS = @FFTW_CFLAGS@ +FFTW_LIBS = @FFTW_LIBS@ +FGREP = @FGREP@ +FILECMD = @FILECMD@ +FXTDIR = @FXTDIR@ +FXT_CFLAGS = @FXT_CFLAGS@ +FXT_LDFLAGS = @FXT_LDFLAGS@ +FXT_LIBS = @FXT_LIBS@ +GDB = @GDB@ +GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ +GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ +GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ +GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ +GOTODIR = @GOTODIR@ +GREP = @GREP@ +HAVE_CXX11 = @HAVE_CXX11@ +HAVE_FFTWFL = @HAVE_FFTWFL@ +HELP2MAN = @HELP2MAN@ +HIPCC = @HIPCC@ +HIPCCFLAGS = @HIPCCFLAGS@ +HIPCONFIG = @HIPCONFIG@ +HWLOC_CFLAGS = @HWLOC_CFLAGS@ +HWLOC_LIBS = @HWLOC_LIBS@ +HWLOC_REQUIRES = @HWLOC_REQUIRES@ +ICC = @ICC@ +ICC_ARGS = @ICC_ARGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JULIA = @JULIA@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ +LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ +LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ +LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ +LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ +LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ +LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ +LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ +LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ +LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ +LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ +LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ +LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ +LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ +LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ +LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ +LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ +LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ +LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ +LIBSTARPU_LINK = @LIBSTARPU_LINK@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAGMA_CFLAGS = @MAGMA_CFLAGS@ +MAGMA_LIBS = @MAGMA_LIBS@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPICC_LDFLAGS = @MPICC_LDFLAGS@ +MPICXX = @MPICXX@ +MPIEXEC = @MPIEXEC@ +MPIEXEC_ARGS = @MPIEXEC_ARGS@ +MPIFORT = @MPIFORT@ +MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ +MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ +NM = @NM@ +NMAD_CFLAGS = @NMAD_CFLAGS@ +NMAD_LIBS = @NMAD_LIBS@ +NMEDIT = @NMEDIT@ +NVCC = @NVCC@ +NVCCFLAGS = @NVCCFLAGS@ +NVCC_CC = @NVCC_CC@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ +OPENBLAS_LIBS = @OPENBLAS_LIBS@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PAPI_CFLAGS = @PAPI_CFLAGS@ +PAPI_LIBS = @PAPI_LIBS@ +PARALLEL = @PARALLEL@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PKG_CONFIG = @PKG_CONFIG@ +POTI_CFLAGS = @POTI_CFLAGS@ +POTI_LIBS = @POTI_LIBS@ +PROG_CLANG = @PROG_CLANG@ +PROG_DATE = @PROG_DATE@ +PROG_FIND = @PROG_FIND@ +PROG_STAT = @PROG_STAT@ +PYTHON = @PYTHON@ +PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ +PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ +PYTHON_VERSION = @PYTHON_VERSION@ +RANLIB = @RANLIB@ +REALBASH = @REALBASH@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ +SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ +SIMGRID_LIBS = @SIMGRID_LIBS@ +SIMGRID_MC = @SIMGRID_MC@ +SLIC_CONFIG = @SLIC_CONFIG@ +SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ +SOCL_VENDORS = @SOCL_VENDORS@ +STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ +STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ +STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ +STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ +STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ +STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ +STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ +STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ +STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ +STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ +STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ +STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ +STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ +STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ +STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ +STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ +STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ +STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ +STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ +STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ +STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ +STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ +STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ +STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ +STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ +STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ +STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ +STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ +STARPU_LIB_PATH = @STARPU_LIB_PATH@ +STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ +STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ +STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ +STARPU_MS_LIB = @STARPU_MS_LIB@ +STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ +STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ +STARPU_OPENBLAS = @STARPU_OPENBLAS@ +STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ +STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ +STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ +STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ +STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ +STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ +STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ +STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ +STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ +STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ +STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ +STARPU_SRC_DIR = @STARPU_SRC_DIR@ +STARPU_USE_CPU = @STARPU_USE_CPU@ +STARPU_USE_CUDA = @STARPU_USE_CUDA@ +STARPU_USE_FXT = @STARPU_USE_FXT@ +STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ +STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ +STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ +STRIP = @STRIP@ +VERSION = @VERSION@ +XMKMF = @XMKMF@ +X_CFLAGS = @X_CFLAGS@ +X_EXTRA_LIBS = @X_EXTRA_LIBS@ +X_LIBS = @X_LIBS@ +X_PRE_LIBS = @X_PRE_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_ct_F77 = @ac_ct_F77@ +ac_ct_FC = @ac_ct_FC@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +doxygencommand = @doxygencommand@ +dvidir = @dvidir@ +eclipsepath = @eclipsepath@ +epstopdfcommand = @epstopdfcommand@ +exec_prefix = @exec_prefix@ +gitcommand = @gitcommand@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +hwloccalccommand = @hwloccalccommand@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +juliapath = @juliapath@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +mpicc_path = @mpicc_path@ +mpicxx_path = @mpicxx_path@ +mpiexec_path = @mpiexec_path@ +mpifort_path = @mpifort_path@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +pdflatexcommand = @pdflatexcommand@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +runstatedir = @runstatedir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target = @target@ +target_alias = @target_alias@ +target_cpu = @target_cpu@ +target_os = @target_os@ +target_vendor = @target_vendor@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +SUBDIRS = src examples +versincludedir = $(includedir)/starpu/$(STARPU_EFFECTIVE_VERSION) +versinclude_HEADERS = include/sc_hypervisor.h \ + include/sc_hypervisor_config.h \ + include/sc_hypervisor_monitoring.h \ + include/sc_hypervisor_policy.h \ + include/sc_hypervisor_lp.h + +all: all-recursive + +.SUFFIXES: +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/make/starpu-subdirtests.mk $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign sc_hypervisor/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign sc_hypervisor/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; +$(top_srcdir)/make/starpu-subdirtests.mk $(am__empty): + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs +install-versincludeHEADERS: $(versinclude_HEADERS) + @$(NORMAL_INSTALL) + @list='$(versinclude_HEADERS)'; test -n "$(versincludedir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(versincludedir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(versincludedir)" || exit 1; \ + fi; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; \ + done | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_HEADER) $$files '$(DESTDIR)$(versincludedir)'"; \ + $(INSTALL_HEADER) $$files "$(DESTDIR)$(versincludedir)" || exit $$?; \ + done + +uninstall-versincludeHEADERS: + @$(NORMAL_UNINSTALL) + @list='$(versinclude_HEADERS)'; test -n "$(versincludedir)" || list=; \ + files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ + dir='$(DESTDIR)$(versincludedir)'; $(am__uninstall_files_from_dir) + +# This directory's subdirectories are mostly independent; you can cd +# into them and run 'make' without going through this Makefile. +# To change the values of 'make' variables: instead of editing Makefiles, +# (1) if the variable is set in 'config.status', edit 'config.status' +# (which will cause the Makefiles to be regenerated when you run 'make'); +# (2) otherwise, pass the desired values on the 'make' command line. +$(am__recursive_targets): + @fail=; \ + if $(am__make_keepgoing); then \ + failcom='fail=yes'; \ + else \ + failcom='exit 1'; \ + fi; \ + dot_seen=no; \ + target=`echo $@ | sed s/-recursive//`; \ + case "$@" in \ + distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ + *) list='$(SUBDIRS)' ;; \ + esac; \ + for subdir in $$list; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + dot_seen=yes; \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || eval $$failcom; \ + done; \ + if test "$$dot_seen" = "no"; then \ + $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ + fi; test -z "$$fail" + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-recursive +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ + include_option=--etags-include; \ + empty_fix=.; \ + else \ + include_option=--include; \ + empty_fix=; \ + fi; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + test ! -f $$subdir/TAGS || \ + set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ + fi; \ + done; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-recursive + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-recursive + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done + @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + $(am__make_dryrun) \ + || test -d "$(distdir)/$$subdir" \ + || $(MKDIR_P) "$(distdir)/$$subdir" \ + || exit 1; \ + dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ + $(am__relativize); \ + new_distdir=$$reldir; \ + dir1=$$subdir; dir2="$(top_distdir)"; \ + $(am__relativize); \ + new_top_distdir=$$reldir; \ + echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ + echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ + ($(am__cd) $$subdir && \ + $(MAKE) $(AM_MAKEFLAGS) \ + top_distdir="$$new_top_distdir" \ + distdir="$$new_distdir" \ + am__remove_distdir=: \ + am__skip_length_check=: \ + am__skip_mode_fix=: \ + distdir) \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-recursive +all-am: Makefile $(HEADERS) +installdirs: installdirs-recursive +installdirs-am: + for dir in "$(DESTDIR)$(versincludedir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-recursive +install-exec: install-exec-recursive +install-data: install-data-recursive +uninstall: uninstall-recursive + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-recursive +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-recursive + +clean-am: clean-generic clean-libtool mostlyclean-am + +distclean: distclean-recursive + -rm -f Makefile +distclean-am: clean-am distclean-generic distclean-tags + +dvi: dvi-recursive + +dvi-am: + +html: html-recursive + +html-am: + +info: info-recursive + +info-am: + +install-data-am: install-versincludeHEADERS + +install-dvi: install-dvi-recursive + +install-dvi-am: + +install-exec-am: + +install-html: install-html-recursive + +install-html-am: + +install-info: install-info-recursive + +install-info-am: + +install-man: + +install-pdf: install-pdf-recursive + +install-pdf-am: + +install-ps: install-ps-recursive + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-recursive + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-recursive + +mostlyclean-am: mostlyclean-generic mostlyclean-libtool + +pdf: pdf-recursive + +pdf-am: + +ps: ps-recursive + +ps-am: + +uninstall-am: uninstall-versincludeHEADERS + +.MAKE: $(am__recursive_targets) install-am install-strip + +.PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am check \ + check-am clean clean-generic clean-libtool cscopelist-am ctags \ + ctags-am distclean distclean-generic distclean-libtool \ + distclean-tags distdir dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-html \ + install-html-am install-info install-info-am install-man \ + install-pdf install-pdf-am install-ps install-ps-am \ + install-strip install-versincludeHEADERS installcheck \ + installcheck-am installdirs installdirs-am maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-generic \ + mostlyclean-libtool pdf pdf-am ps ps-am tags tags-am uninstall \ + uninstall-am uninstall-versincludeHEADERS + +.PRECIOUS: Makefile + + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +recheck: + RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i recheck || RET=1 ; \ + done ; \ + exit $$RET + +showcheckfailed: + @RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i showcheckfailed || RET=1 ; \ + done ; \ + exit $$RET + +showfailed: + @RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -s -C $$i showfailed || RET=1 ; \ + done ; \ + exit $$RET + +showcheck: + RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i showcheck || RET=1 ; \ + done ; \ + exit $$RET + +showsuite: + RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i showsuite || RET=1 ; \ + done ; \ + exit $$RET + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/sc_hypervisor/examples/Makefile.am b/sc_hypervisor/examples/Makefile.am new file mode 100644 index 0000000..f927b43 --- /dev/null +++ b/sc_hypervisor/examples/Makefile.am @@ -0,0 +1,55 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +include $(top_srcdir)/make/starpu-tests.mk + +AM_CFLAGS += $(MAGMA_CFLAGS) +AM_CPPFLAGS = -I$(top_srcdir)/include -I$(top_srcdir)/examples -I$(top_builddir)/include -I$(top_srcdir)/sc_hypervisor/include -I$(top_srcdir)/sc_hypervisor/examples $(STARPU_H_CPPFLAGS) +AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@ +LIBS += $(top_builddir)/src/@LIBSTARPU_LINK@ $(top_builddir)/sc_hypervisor/src/libsc_hypervisor.la $(STARPU_EXPORTED_LIBS) +LIBS += $(STARPU_CUDA_LDFLAGS) + +noinst_PROGRAMS = \ + app_driven_test/app_driven_test \ + lp_test/lp_test \ + lp_test/lp_resize_test \ + hierarchical_ctxs/resize_hierarchical_ctxs + +if !STARPU_NO_BLAS_LIB +noinst_PROGRAMS += \ + cholesky/cholesky_implicit + +noinst_HEADERS = \ + cholesky/cholesky.h \ + sched_ctx_utils/sched_ctx_utils.h +endif + +if !STARPU_NO_BLAS_LIB + +cholesky_cholesky_implicit_SOURCES = \ + cholesky/cholesky_implicit.c \ + cholesky/cholesky_models.c \ + cholesky/cholesky_kernels.c \ + sched_ctx_utils/sched_ctx_utils.c \ + ../../examples/common/blas.c + +cholesky_cholesky_implicit_LDADD = \ + $(top_builddir)/sc_hypervisor/src/libsc_hypervisor.la \ + $(STARPU_BLAS_LDFLAGS) + +endif + +app_driven_test_app_driven_test_LDADD = \ + $(top_builddir)/sc_hypervisor/src/libsc_hypervisor.la diff --git a/sc_hypervisor/examples/Makefile.in b/sc_hypervisor/examples/Makefile.in new file mode 100644 index 0000000..531cdc0 --- /dev/null +++ b/sc_hypervisor/examples/Makefile.in @@ -0,0 +1,1168 @@ +# Makefile.in generated by automake 1.16.5 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2021 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +target_triplet = @target@ +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@am__append_1 = --compiler-options -fno-strict-aliasing -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ $(STARPU_NVCC_H_CPPFLAGS) +@STARPU_USE_HIP_TRUE@am__append_2 = -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ +noinst_PROGRAMS = app_driven_test/app_driven_test$(EXEEXT) \ + lp_test/lp_test$(EXEEXT) lp_test/lp_resize_test$(EXEEXT) \ + hierarchical_ctxs/resize_hierarchical_ctxs$(EXEEXT) \ + $(am__EXEEXT_1) +@STARPU_NO_BLAS_LIB_FALSE@am__append_3 = \ +@STARPU_NO_BLAS_LIB_FALSE@ cholesky/cholesky_implicit + +subdir = sc_hypervisor/examples +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ + $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ + $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ + $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(am__noinst_HEADERS_DIST) \ + $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/src/common/config.h \ + $(top_builddir)/src/common/config-src-build.h \ + $(top_builddir)/include/starpu_config.h \ + $(top_builddir)/starpurm/include/starpurm_config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +@STARPU_NO_BLAS_LIB_FALSE@am__EXEEXT_1 = \ +@STARPU_NO_BLAS_LIB_FALSE@ cholesky/cholesky_implicit$(EXEEXT) +PROGRAMS = $(noinst_PROGRAMS) +app_driven_test_app_driven_test_SOURCES = \ + app_driven_test/app_driven_test.c +am__dirstamp = $(am__leading_dot)dirstamp +app_driven_test_app_driven_test_OBJECTS = \ + app_driven_test/app_driven_test.$(OBJEXT) +app_driven_test_app_driven_test_DEPENDENCIES = \ + $(top_builddir)/sc_hypervisor/src/libsc_hypervisor.la +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +am__cholesky_cholesky_implicit_SOURCES_DIST = \ + cholesky/cholesky_implicit.c cholesky/cholesky_models.c \ + cholesky/cholesky_kernels.c sched_ctx_utils/sched_ctx_utils.c \ + ../../examples/common/blas.c +@STARPU_NO_BLAS_LIB_FALSE@am_cholesky_cholesky_implicit_OBJECTS = cholesky/cholesky_implicit.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ cholesky/cholesky_models.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ cholesky/cholesky_kernels.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ sched_ctx_utils/sched_ctx_utils.$(OBJEXT) \ +@STARPU_NO_BLAS_LIB_FALSE@ ../../examples/common/blas.$(OBJEXT) +cholesky_cholesky_implicit_OBJECTS = \ + $(am_cholesky_cholesky_implicit_OBJECTS) +am__DEPENDENCIES_1 = +@STARPU_NO_BLAS_LIB_FALSE@cholesky_cholesky_implicit_DEPENDENCIES = $(top_builddir)/sc_hypervisor/src/libsc_hypervisor.la \ +@STARPU_NO_BLAS_LIB_FALSE@ $(am__DEPENDENCIES_1) +hierarchical_ctxs_resize_hierarchical_ctxs_SOURCES = \ + hierarchical_ctxs/resize_hierarchical_ctxs.c +hierarchical_ctxs_resize_hierarchical_ctxs_OBJECTS = \ + hierarchical_ctxs/resize_hierarchical_ctxs.$(OBJEXT) +hierarchical_ctxs_resize_hierarchical_ctxs_LDADD = $(LDADD) +lp_test_lp_resize_test_SOURCES = lp_test/lp_resize_test.c +lp_test_lp_resize_test_OBJECTS = lp_test/lp_resize_test.$(OBJEXT) +lp_test_lp_resize_test_LDADD = $(LDADD) +lp_test_lp_test_SOURCES = lp_test/lp_test.c +lp_test_lp_test_OBJECTS = lp_test/lp_test.$(OBJEXT) +lp_test_lp_test_LDADD = $(LDADD) +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)/src/common -I$(top_builddir)/include -I$(top_builddir)/starpurm/include +depcomp = $(SHELL) $(top_srcdir)/build-aux/depcomp +am__maybe_remake_depfiles = depfiles +am__depfiles_remade = ../../examples/common/$(DEPDIR)/blas.Po \ + app_driven_test/$(DEPDIR)/app_driven_test.Po \ + cholesky/$(DEPDIR)/cholesky_implicit.Po \ + cholesky/$(DEPDIR)/cholesky_kernels.Po \ + cholesky/$(DEPDIR)/cholesky_models.Po \ + hierarchical_ctxs/$(DEPDIR)/resize_hierarchical_ctxs.Po \ + lp_test/$(DEPDIR)/lp_resize_test.Po \ + lp_test/$(DEPDIR)/lp_test.Po \ + sched_ctx_utils/$(DEPDIR)/sched_ctx_utils.Po +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = app_driven_test/app_driven_test.c \ + $(cholesky_cholesky_implicit_SOURCES) \ + hierarchical_ctxs/resize_hierarchical_ctxs.c \ + lp_test/lp_resize_test.c lp_test/lp_test.c +DIST_SOURCES = app_driven_test/app_driven_test.c \ + $(am__cholesky_cholesky_implicit_SOURCES_DIST) \ + hierarchical_ctxs/resize_hierarchical_ctxs.c \ + lp_test/lp_resize_test.c lp_test/lp_test.c +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__noinst_HEADERS_DIST = cholesky/cholesky.h \ + sched_ctx_utils/sched_ctx_utils.h +HEADERS = $(noinst_HEADERS) +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +am__DIST_COMMON = $(srcdir)/Makefile.in \ + $(top_srcdir)/build-aux/depcomp \ + $(top_srcdir)/make/starpu-tests.mk \ + $(top_srcdir)/make/starpu.mk +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +pkglibdir = @pkglibdir@ +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +APP_CFLAGS = @APP_CFLAGS@ +APP_CXXFLAGS = @APP_CXXFLAGS@ +APP_FCFLAGS = @APP_FCFLAGS@ +APP_FFLAGS = @APP_FFLAGS@ +AR = @AR@ +AS = @AS@ +ATLASDIR = @ATLASDIR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BLAS_LIB = @BLAS_LIB@ +BLAS_LIBS = @BLAS_LIBS@ +BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ +BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CC_OR_MPICC = @CC_OR_MPICC@ +CC_OR_NVCC = @CC_OR_NVCC@ +CFLAGS = @CFLAGS@ +COVERAGE = @COVERAGE@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CSCOPE = @CSCOPE@ +CTAGS = @CTAGS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DGELS_LIBS = @DGELS_LIBS@ +DLB_CFLAGS = @DLB_CFLAGS@ +DLB_LIBS = @DLB_LIBS@ +DLLTOOL = @DLLTOOL@ +DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +ECLIPSE = @ECLIPSE@ +EGREP = @EGREP@ +ETAGS = @ETAGS@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FC = @FC@ +FCFLAGS = @FCFLAGS@ +FFLAGS = @FFLAGS@ +FFTWF_CFLAGS = @FFTWF_CFLAGS@ +FFTWF_LIBS = @FFTWF_LIBS@ +FFTWL_CFLAGS = @FFTWL_CFLAGS@ +FFTWL_LIBS = @FFTWL_LIBS@ +FFTW_CFLAGS = @FFTW_CFLAGS@ +FFTW_LIBS = @FFTW_LIBS@ +FGREP = @FGREP@ +FILECMD = @FILECMD@ +FXTDIR = @FXTDIR@ +FXT_CFLAGS = @FXT_CFLAGS@ +FXT_LDFLAGS = @FXT_LDFLAGS@ +FXT_LIBS = @FXT_LIBS@ +GDB = @GDB@ +GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ +GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ +GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ +GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ +GOTODIR = @GOTODIR@ +GREP = @GREP@ +HAVE_CXX11 = @HAVE_CXX11@ +HAVE_FFTWFL = @HAVE_FFTWFL@ +HELP2MAN = @HELP2MAN@ +HIPCC = @HIPCC@ +HIPCCFLAGS = @HIPCCFLAGS@ $(am__append_2) +HIPCONFIG = @HIPCONFIG@ +HWLOC_CFLAGS = @HWLOC_CFLAGS@ +HWLOC_LIBS = @HWLOC_LIBS@ +HWLOC_REQUIRES = @HWLOC_REQUIRES@ +ICC = @ICC@ +ICC_ARGS = @ICC_ARGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JULIA = @JULIA@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ $(top_builddir)/src/@LIBSTARPU_LINK@ \ + $(top_builddir)/sc_hypervisor/src/libsc_hypervisor.la \ + $(STARPU_EXPORTED_LIBS) $(STARPU_CUDA_LDFLAGS) +LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ +LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ +LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ +LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ +LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ +LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ +LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ +LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ +LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ +LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ +LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ +LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ +LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ +LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ +LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ +LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ +LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ +LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ +LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ +LIBSTARPU_LINK = @LIBSTARPU_LINK@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAGMA_CFLAGS = @MAGMA_CFLAGS@ +MAGMA_LIBS = @MAGMA_LIBS@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPICC_LDFLAGS = @MPICC_LDFLAGS@ +MPICXX = @MPICXX@ +MPIEXEC = @MPIEXEC@ +MPIEXEC_ARGS = @MPIEXEC_ARGS@ +MPIFORT = @MPIFORT@ +MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ +MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ +NM = @NM@ +NMAD_CFLAGS = @NMAD_CFLAGS@ +NMAD_LIBS = @NMAD_LIBS@ +NMEDIT = @NMEDIT@ +NVCC = @NVCC@ +NVCCFLAGS = @NVCCFLAGS@ $(am__append_1) +NVCC_CC = @NVCC_CC@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ +OPENBLAS_LIBS = @OPENBLAS_LIBS@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PAPI_CFLAGS = @PAPI_CFLAGS@ +PAPI_LIBS = @PAPI_LIBS@ +PARALLEL = @PARALLEL@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PKG_CONFIG = @PKG_CONFIG@ +POTI_CFLAGS = @POTI_CFLAGS@ +POTI_LIBS = @POTI_LIBS@ +PROG_CLANG = @PROG_CLANG@ +PROG_DATE = @PROG_DATE@ +PROG_FIND = @PROG_FIND@ +PROG_STAT = @PROG_STAT@ +PYTHON = @PYTHON@ +PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ +PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ +PYTHON_VERSION = @PYTHON_VERSION@ +RANLIB = @RANLIB@ +REALBASH = @REALBASH@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ +SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ +SIMGRID_LIBS = @SIMGRID_LIBS@ +SIMGRID_MC = @SIMGRID_MC@ +SLIC_CONFIG = @SLIC_CONFIG@ +SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ +SOCL_VENDORS = @SOCL_VENDORS@ +STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ +STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ +STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ +STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ +STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ +STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ +STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ +STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ +STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ +STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ +STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ +STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ +STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ +STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ +STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ +STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ +STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ +STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ +STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ +STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ +STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ +STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ +STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ +STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ +STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ +STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ +STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ +STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ +STARPU_LIB_PATH = @STARPU_LIB_PATH@ +STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ +STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ +STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ +STARPU_MS_LIB = @STARPU_MS_LIB@ +STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ +STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ +STARPU_OPENBLAS = @STARPU_OPENBLAS@ +STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ +STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ +STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ +STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ +STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ +STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ +STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ +STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ +STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ +STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ +STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ +STARPU_SRC_DIR = @STARPU_SRC_DIR@ +STARPU_USE_CPU = @STARPU_USE_CPU@ +STARPU_USE_CUDA = @STARPU_USE_CUDA@ +STARPU_USE_FXT = @STARPU_USE_FXT@ +STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ +STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ +STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ +STRIP = @STRIP@ +VERSION = @VERSION@ +XMKMF = @XMKMF@ +X_CFLAGS = @X_CFLAGS@ +X_EXTRA_LIBS = @X_EXTRA_LIBS@ +X_LIBS = @X_LIBS@ +X_PRE_LIBS = @X_PRE_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_ct_F77 = @ac_ct_F77@ +ac_ct_FC = @ac_ct_FC@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +doxygencommand = @doxygencommand@ +dvidir = @dvidir@ +eclipsepath = @eclipsepath@ +epstopdfcommand = @epstopdfcommand@ +exec_prefix = @exec_prefix@ +gitcommand = @gitcommand@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +hwloccalccommand = @hwloccalccommand@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +juliapath = @juliapath@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +mpicc_path = @mpicc_path@ +mpicxx_path = @mpicxx_path@ +mpiexec_path = @mpiexec_path@ +mpifort_path = @mpifort_path@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +pdflatexcommand = @pdflatexcommand@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +runstatedir = @runstatedir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target = @target@ +target_alias = @target_alias@ +target_cpu = @target_cpu@ +target_os = @target_os@ +target_vendor = @target_vendor@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +LAUNCHER_ENV = +LAUNCHER = + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +AM_CFLAGS = $(GLOBAL_AM_CFLAGS) $(MAGMA_CFLAGS) +AM_CXXFLAGS = $(GLOBAL_AM_CXXFLAGS) +AM_FFLAGS = $(GLOBAL_AM_FFLAGS) +AM_FCFLAGS = $(GLOBAL_AM_FCFLAGS) +@STARPU_USE_CUDA_TRUE@V_nvcc_ = $(V_nvcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_USE_CUDA_TRUE@V_nvcc_0 = @echo " NVCC " $@; +@STARPU_USE_CUDA_TRUE@V_nvcc_1 = +@STARPU_USE_CUDA_TRUE@V_nvcc = $(V_nvcc_$(V)) + +# Avoid using nvcc when making a coverity build, nvcc produces millions of +# lines of code which we don't want to analyze. Instead, build dumb .o files +# containing empty functions. +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_ = $(V_mynvcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_0 = @echo " myNVCC " $@; +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_1 = +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc = $(V_mynvcc_$(V)) +@STARPU_USE_HIP_TRUE@V_hipcc_ = $(V_hipcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_USE_HIP_TRUE@V_hipcc_0 = @echo " HIPCC " $@; +@STARPU_USE_HIP_TRUE@V_hipcc_1 = +@STARPU_USE_HIP_TRUE@V_hipcc = $(V_hipcc_$(V)) +V_icc_ = $(V_icc_$(AM_DEFAULT_VERBOSITY)) +V_icc_0 = @echo " ICC " $@; +V_icc_1 = +V_icc = $(V_icc_$(V)) +V_ln_ = $(V_ln_$(AM_DEFAULT_VERBOSITY)) +V_ln_0 = @echo " LN " $@; +V_ln_1 = +V_ln = $(V_ln_$(V)) +V_help2man_ = $(V_help2man_$(AM_DEFAULT_VERBOSITY)) +V_help2man_0 = @echo " HELP2MAN" $@; +V_help2man_1 = +V_help2man = $(V_help2man_$(V)) +# These are always defined, both for starpu-mpi and for mpi-ms +# For MPI tests we don't want to oversubscribe the system +MPI_RUN_ENV = STARPU_WORKERS_GETBIND=0 STARPU_WORKERS_NOBIND=1 STARPU_NCPU=3 +@STARPU_SIMGRID_FALSE@STARPU_MPIEXEC = $(MPIEXEC) $(MPIEXEC_ARGS) -np $(STARPU_MPI_NP) +@STARPU_SIMGRID_TRUE@STARPU_MPIEXEC = $(abs_top_builddir)/tools/starpu_smpirun -np $(STARPU_MPI_NP) -platform $(abs_top_srcdir)/tools/perfmodels/cluster.xml -hostfile $(abs_top_srcdir)/tools/perfmodels/hostfile +AM_CPPFLAGS = -I$(top_srcdir)/include -I$(top_srcdir)/examples -I$(top_builddir)/include -I$(top_srcdir)/sc_hypervisor/include -I$(top_srcdir)/sc_hypervisor/examples $(STARPU_H_CPPFLAGS) +AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@ +@STARPU_NO_BLAS_LIB_FALSE@noinst_HEADERS = \ +@STARPU_NO_BLAS_LIB_FALSE@ cholesky/cholesky.h \ +@STARPU_NO_BLAS_LIB_FALSE@ sched_ctx_utils/sched_ctx_utils.h + +@STARPU_NO_BLAS_LIB_FALSE@cholesky_cholesky_implicit_SOURCES = \ +@STARPU_NO_BLAS_LIB_FALSE@ cholesky/cholesky_implicit.c \ +@STARPU_NO_BLAS_LIB_FALSE@ cholesky/cholesky_models.c \ +@STARPU_NO_BLAS_LIB_FALSE@ cholesky/cholesky_kernels.c \ +@STARPU_NO_BLAS_LIB_FALSE@ sched_ctx_utils/sched_ctx_utils.c \ +@STARPU_NO_BLAS_LIB_FALSE@ ../../examples/common/blas.c + +@STARPU_NO_BLAS_LIB_FALSE@cholesky_cholesky_implicit_LDADD = \ +@STARPU_NO_BLAS_LIB_FALSE@ $(top_builddir)/sc_hypervisor/src/libsc_hypervisor.la \ +@STARPU_NO_BLAS_LIB_FALSE@ $(STARPU_BLAS_LDFLAGS) + +app_driven_test_app_driven_test_LDADD = \ + $(top_builddir)/sc_hypervisor/src/libsc_hypervisor.la + +all: all-am + +.SUFFIXES: +.SUFFIXES: .c .cu .cubin .hip .lo .o .obj +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/make/starpu-tests.mk $(top_srcdir)/make/starpu.mk $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign sc_hypervisor/examples/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign sc_hypervisor/examples/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; +$(top_srcdir)/make/starpu-tests.mk $(top_srcdir)/make/starpu.mk $(am__empty): + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +clean-noinstPROGRAMS: + @list='$(noinst_PROGRAMS)'; test -n "$$list" || exit 0; \ + echo " rm -f" $$list; \ + rm -f $$list || exit $$?; \ + test -n "$(EXEEXT)" || exit 0; \ + list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ + echo " rm -f" $$list; \ + rm -f $$list +app_driven_test/$(am__dirstamp): + @$(MKDIR_P) app_driven_test + @: > app_driven_test/$(am__dirstamp) +app_driven_test/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) app_driven_test/$(DEPDIR) + @: > app_driven_test/$(DEPDIR)/$(am__dirstamp) +app_driven_test/app_driven_test.$(OBJEXT): \ + app_driven_test/$(am__dirstamp) \ + app_driven_test/$(DEPDIR)/$(am__dirstamp) + +app_driven_test/app_driven_test$(EXEEXT): $(app_driven_test_app_driven_test_OBJECTS) $(app_driven_test_app_driven_test_DEPENDENCIES) $(EXTRA_app_driven_test_app_driven_test_DEPENDENCIES) app_driven_test/$(am__dirstamp) + @rm -f app_driven_test/app_driven_test$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(app_driven_test_app_driven_test_OBJECTS) $(app_driven_test_app_driven_test_LDADD) $(LIBS) +cholesky/$(am__dirstamp): + @$(MKDIR_P) cholesky + @: > cholesky/$(am__dirstamp) +cholesky/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) cholesky/$(DEPDIR) + @: > cholesky/$(DEPDIR)/$(am__dirstamp) +cholesky/cholesky_implicit.$(OBJEXT): cholesky/$(am__dirstamp) \ + cholesky/$(DEPDIR)/$(am__dirstamp) +cholesky/cholesky_models.$(OBJEXT): cholesky/$(am__dirstamp) \ + cholesky/$(DEPDIR)/$(am__dirstamp) +cholesky/cholesky_kernels.$(OBJEXT): cholesky/$(am__dirstamp) \ + cholesky/$(DEPDIR)/$(am__dirstamp) +sched_ctx_utils/$(am__dirstamp): + @$(MKDIR_P) sched_ctx_utils + @: > sched_ctx_utils/$(am__dirstamp) +sched_ctx_utils/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) sched_ctx_utils/$(DEPDIR) + @: > sched_ctx_utils/$(DEPDIR)/$(am__dirstamp) +sched_ctx_utils/sched_ctx_utils.$(OBJEXT): \ + sched_ctx_utils/$(am__dirstamp) \ + sched_ctx_utils/$(DEPDIR)/$(am__dirstamp) +../../examples/common/$(am__dirstamp): + @$(MKDIR_P) ../../examples/common + @: > ../../examples/common/$(am__dirstamp) +../../examples/common/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) ../../examples/common/$(DEPDIR) + @: > ../../examples/common/$(DEPDIR)/$(am__dirstamp) +../../examples/common/blas.$(OBJEXT): \ + ../../examples/common/$(am__dirstamp) \ + ../../examples/common/$(DEPDIR)/$(am__dirstamp) + +cholesky/cholesky_implicit$(EXEEXT): $(cholesky_cholesky_implicit_OBJECTS) $(cholesky_cholesky_implicit_DEPENDENCIES) $(EXTRA_cholesky_cholesky_implicit_DEPENDENCIES) cholesky/$(am__dirstamp) + @rm -f cholesky/cholesky_implicit$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(cholesky_cholesky_implicit_OBJECTS) $(cholesky_cholesky_implicit_LDADD) $(LIBS) +hierarchical_ctxs/$(am__dirstamp): + @$(MKDIR_P) hierarchical_ctxs + @: > hierarchical_ctxs/$(am__dirstamp) +hierarchical_ctxs/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) hierarchical_ctxs/$(DEPDIR) + @: > hierarchical_ctxs/$(DEPDIR)/$(am__dirstamp) +hierarchical_ctxs/resize_hierarchical_ctxs.$(OBJEXT): \ + hierarchical_ctxs/$(am__dirstamp) \ + hierarchical_ctxs/$(DEPDIR)/$(am__dirstamp) + +hierarchical_ctxs/resize_hierarchical_ctxs$(EXEEXT): $(hierarchical_ctxs_resize_hierarchical_ctxs_OBJECTS) $(hierarchical_ctxs_resize_hierarchical_ctxs_DEPENDENCIES) $(EXTRA_hierarchical_ctxs_resize_hierarchical_ctxs_DEPENDENCIES) hierarchical_ctxs/$(am__dirstamp) + @rm -f hierarchical_ctxs/resize_hierarchical_ctxs$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(hierarchical_ctxs_resize_hierarchical_ctxs_OBJECTS) $(hierarchical_ctxs_resize_hierarchical_ctxs_LDADD) $(LIBS) +lp_test/$(am__dirstamp): + @$(MKDIR_P) lp_test + @: > lp_test/$(am__dirstamp) +lp_test/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) lp_test/$(DEPDIR) + @: > lp_test/$(DEPDIR)/$(am__dirstamp) +lp_test/lp_resize_test.$(OBJEXT): lp_test/$(am__dirstamp) \ + lp_test/$(DEPDIR)/$(am__dirstamp) + +lp_test/lp_resize_test$(EXEEXT): $(lp_test_lp_resize_test_OBJECTS) $(lp_test_lp_resize_test_DEPENDENCIES) $(EXTRA_lp_test_lp_resize_test_DEPENDENCIES) lp_test/$(am__dirstamp) + @rm -f lp_test/lp_resize_test$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(lp_test_lp_resize_test_OBJECTS) $(lp_test_lp_resize_test_LDADD) $(LIBS) +lp_test/lp_test.$(OBJEXT): lp_test/$(am__dirstamp) \ + lp_test/$(DEPDIR)/$(am__dirstamp) + +lp_test/lp_test$(EXEEXT): $(lp_test_lp_test_OBJECTS) $(lp_test_lp_test_DEPENDENCIES) $(EXTRA_lp_test_lp_test_DEPENDENCIES) lp_test/$(am__dirstamp) + @rm -f lp_test/lp_test$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(lp_test_lp_test_OBJECTS) $(lp_test_lp_test_LDADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + -rm -f ../../examples/common/*.$(OBJEXT) + -rm -f app_driven_test/*.$(OBJEXT) + -rm -f cholesky/*.$(OBJEXT) + -rm -f hierarchical_ctxs/*.$(OBJEXT) + -rm -f lp_test/*.$(OBJEXT) + -rm -f sched_ctx_utils/*.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@../../examples/common/$(DEPDIR)/blas.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@app_driven_test/$(DEPDIR)/app_driven_test.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@cholesky/$(DEPDIR)/cholesky_implicit.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@cholesky/$(DEPDIR)/cholesky_kernels.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@cholesky/$(DEPDIR)/cholesky_models.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@hierarchical_ctxs/$(DEPDIR)/resize_hierarchical_ctxs.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@lp_test/$(DEPDIR)/lp_resize_test.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@lp_test/$(DEPDIR)/lp_test.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@sched_ctx_utils/$(DEPDIR)/sched_ctx_utils.Po@am__quote@ # am--include-marker + +$(am__depfiles_remade): + @$(MKDIR_P) $(@D) + @echo '# dummy' >$@-t && $(am__mv) $@-t $@ + +am--depfiles: $(am__depfiles_remade) + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ +@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + -rm -rf app_driven_test/.libs app_driven_test/_libs + -rm -rf cholesky/.libs cholesky/_libs + -rm -rf hierarchical_ctxs/.libs hierarchical_ctxs/_libs + -rm -rf lp_test/.libs lp_test/_libs + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-am +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-am + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-am + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile $(PROGRAMS) $(HEADERS) +installdirs: +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + -rm -f ../../examples/common/$(DEPDIR)/$(am__dirstamp) + -rm -f ../../examples/common/$(am__dirstamp) + -rm -f app_driven_test/$(DEPDIR)/$(am__dirstamp) + -rm -f app_driven_test/$(am__dirstamp) + -rm -f cholesky/$(DEPDIR)/$(am__dirstamp) + -rm -f cholesky/$(am__dirstamp) + -rm -f hierarchical_ctxs/$(DEPDIR)/$(am__dirstamp) + -rm -f hierarchical_ctxs/$(am__dirstamp) + -rm -f lp_test/$(DEPDIR)/$(am__dirstamp) + -rm -f lp_test/$(am__dirstamp) + -rm -f sched_ctx_utils/$(DEPDIR)/$(am__dirstamp) + -rm -f sched_ctx_utils/$(am__dirstamp) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-generic clean-libtool clean-noinstPROGRAMS \ + mostlyclean-am + +distclean: distclean-am + -rm -f ../../examples/common/$(DEPDIR)/blas.Po + -rm -f app_driven_test/$(DEPDIR)/app_driven_test.Po + -rm -f cholesky/$(DEPDIR)/cholesky_implicit.Po + -rm -f cholesky/$(DEPDIR)/cholesky_kernels.Po + -rm -f cholesky/$(DEPDIR)/cholesky_models.Po + -rm -f hierarchical_ctxs/$(DEPDIR)/resize_hierarchical_ctxs.Po + -rm -f lp_test/$(DEPDIR)/lp_resize_test.Po + -rm -f lp_test/$(DEPDIR)/lp_test.Po + -rm -f sched_ctx_utils/$(DEPDIR)/sched_ctx_utils.Po + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f ../../examples/common/$(DEPDIR)/blas.Po + -rm -f app_driven_test/$(DEPDIR)/app_driven_test.Po + -rm -f cholesky/$(DEPDIR)/cholesky_implicit.Po + -rm -f cholesky/$(DEPDIR)/cholesky_kernels.Po + -rm -f cholesky/$(DEPDIR)/cholesky_models.Po + -rm -f hierarchical_ctxs/$(DEPDIR)/resize_hierarchical_ctxs.Po + -rm -f lp_test/$(DEPDIR)/lp_resize_test.Po + -rm -f lp_test/$(DEPDIR)/lp_test.Po + -rm -f sched_ctx_utils/$(DEPDIR)/sched_ctx_utils.Po + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: + +.MAKE: install-am install-strip + +.PHONY: CTAGS GTAGS TAGS all all-am am--depfiles check check-am clean \ + clean-generic clean-libtool clean-noinstPROGRAMS cscopelist-am \ + ctags ctags-am distclean distclean-compile distclean-generic \ + distclean-libtool distclean-tags distdir dvi dvi-am html \ + html-am info info-am install install-am install-data \ + install-data-am install-dvi install-dvi-am install-exec \ + install-exec-am install-html install-html-am install-info \ + install-info-am install-man install-pdf install-pdf-am \ + install-ps install-ps-am install-strip installcheck \ + installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags tags-am uninstall uninstall-am + +.PRECIOUS: Makefile + +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@.cu.o: +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ @$(MKDIR_P) `dirname $@` +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ $(V_mynvcc)grep 'extern *"C" *void *' $< | sed -ne 's/extern *"C" *void *\([a-zA-Z0-9_]*\) *(.*/void \1(void) {}/p' | $(CC) -x c - -o $@ -c + +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.cubin: +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) -cubin $< -o $@ $(NVCCFLAGS) + +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.o: +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) $< -c -o $@ $(NVCCFLAGS) +@STARPU_USE_HIP_TRUE@.hip.o: +@STARPU_USE_HIP_TRUE@ $(V_hipcc) $(HIPCC) $< -c -o $@ $(HIPCCFLAGS) + +STARPU_MPI_NP ?= 4 + +showcheckfailed: + @ for x in $(shell grep -l "^FAIL " $(TEST_LOGS) /dev/null 2>/dev/null) ; do cat $$x ; done + @RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i showcheckfailed || RET=1 ; \ + done ; \ + exit $$RET + +showfailed: + @! grep "^FAIL " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "ERROR: AddressSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "WARNING: AddressSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "ERROR: ThreadSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "WARNING: ThreadSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "ERROR: LeakSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "WARNING: LeakSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l " runtime error: " $(TEST_LOGS) /dev/null 2>/dev/null + @RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -s -C $$i showfailed || RET=1 ; \ + done ; \ + exit $$RET + +showcheck: + -cat $(TEST_LOGS) /dev/null + @! grep -q "ERROR: AddressSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q "WARNING: AddressSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q "ERROR: ThreadSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q "WARNING: ThreadSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q "ERROR: LeakSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q "WARNING: LeakSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q " runtime error: " $(TEST_LOGS) /dev/null + RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i showcheck || RET=1 ; \ + done ; \ + exit $$RET + +showsuite: + -cat $(TEST_SUITE_LOG) /dev/null + @! grep -q "ERROR: AddressSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q "WARNING: AddressSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q "ERROR: ThreadSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q "WARNING: ThreadSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q "ERROR: LeakSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q "WARNING: LeakSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q " runtime error: " $(TEST_SUITE_LOG) /dev/null + RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i showsuite || RET=1 ; \ + done ; \ + exit $$RET + +@STARPU_SIMGRID_TRUE@export STARPU_PERF_MODEL_DIR=$(abs_top_srcdir)/tools/perfmodels/sampling +@STARPU_SIMGRID_TRUE@export STARPU_HOSTNAME=mirage +@STARPU_SIMGRID_TRUE@export MALLOC_PERTURB_=0 + +@STARPU_SIMGRID_TRUE@env: +@STARPU_SIMGRID_TRUE@ @echo export STARPU_PERF_MODEL_DIR=$(STARPU_PERF_MODEL_DIR) +@STARPU_SIMGRID_TRUE@ @echo export STARPU_HOSTNAME=$(STARPU_HOSTNAME) +@STARPU_SIMGRID_TRUE@ @echo export MALLOC_PERTURB_=$(MALLOC_PERTURB_) + +@STARPU_SIMGRID_TRUE@export STARPU_SIMGRID=1 + +@STARPU_QUICK_CHECK_TRUE@export STARPU_QUICK_CHECK=1 + +@STARPU_LONG_CHECK_TRUE@export STARPU_LONG_CHECK=1 + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/sc_hypervisor/examples/app_driven_test/app_driven_test.c b/sc_hypervisor/examples/app_driven_test/app_driven_test.c new file mode 100644 index 0000000..868d9fe --- /dev/null +++ b/sc_hypervisor/examples/app_driven_test/app_driven_test.c @@ -0,0 +1,174 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include + +#define NTASKS 1000 +#define NINCR 10 +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +struct params +{ + unsigned sched_ctx; + int task_tag; +}; + +unsigned val[2]; +starpu_pthread_mutex_t mut[2]; + +/* Every implementation of a codelet must have this prototype, the first + * argument (buffers) describes the buffers/streams that are managed by the + * DSM; the second arguments references read-only data that is passed as an + * argument of the codelet (task->cl_arg). Here, "buffers" is unused as there + * are no data input/output managed by the DSM (cl.nbuffers = 0) */ + +void cpu_func(__attribute__((unused))void *buffers[], void *cl_arg) +{ + struct params *params = (struct params *) cl_arg; + + int i; + for(i = 0; i < NINCR; i++) + { + STARPU_PTHREAD_MUTEX_LOCK(&mut[params->sched_ctx - 1]); + val[params->sched_ctx - 1]++; + STARPU_PTHREAD_MUTEX_UNLOCK(&mut[params->sched_ctx - 1]); + } + if(params->task_tag != 0) + FPRINTF(stdout, "Task with tag %d executed in ctx = %u %u counter_tests\n", params->task_tag, params->sched_ctx, val[params->sched_ctx - 1]); +} + +struct starpu_codelet cl = {0}; + +/* the management of the tags is done by the user */ +/* who will take care that the tags will be unique */ +int tag = 1; +void* submit_tasks_thread(void *arg) +{ + unsigned sched_ctx = *((unsigned*)arg); + starpu_sched_ctx_set_context(&sched_ctx); + + struct starpu_task *task[NTASKS]; + struct params params[NTASKS]; + int i; + for(i = 0; i < NTASKS; i++) + { + task[i] = starpu_task_create(); +// starpu_usleep(5000); + cl.cpu_funcs[0] = cpu_func; + cl.nbuffers = 0; + + task[i]->cl = &cl; + + if(sched_ctx == 1 && i == 5) + { + /* tag the tasks whose execution will start the resizing process */ + task[i]->hypervisor_tag = tag; + /* indicate particular settings the context should have when the + resizing will be done */ + sc_hypervisor_ctl(sched_ctx, + SC_HYPERVISOR_TIME_TO_APPLY, tag, + SC_HYPERVISOR_MIN_WORKERS, 2, + SC_HYPERVISOR_MAX_WORKERS, 12, + SC_HYPERVISOR_NULL); + printf("require resize for sched_ctx %u at tag %d\n", sched_ctx, tag); + /* specify that the contexts should be resized when the task having this + particular tag will finish executing */ + sc_hypervisor_post_resize_request(sched_ctx, tag); + } + + params[i].sched_ctx = sched_ctx; + params[i].task_tag = task[i]->hypervisor_tag; + + task[i]->cl_arg = ¶ms[i]; + task[i]->cl_arg_size = sizeof(params); + + int ret = starpu_task_submit(task[i]); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + starpu_task_wait_for_all(); + return NULL; +} + +int main() +{ + int ret = starpu_init(NULL); + + if (ret == -ENODEV) + return 77; + + int num_workers = starpu_worker_get_count(); + int nres1 = num_workers; + int nres2 = num_workers; + int resources1[nres1]; + int resources2[nres2]; + int i; + for(i = 0; i < nres1; i++) + resources1[i] = i; + + for(i = 0; i < nres2; i++) + resources2[i] = i; + + /* create contexts */ + unsigned sched_ctx1 = starpu_sched_ctx_create(resources1, nres1, "sched_ctx1", STARPU_SCHED_CTX_POLICY_NAME, "dmda", 0); + unsigned sched_ctx2 = starpu_sched_ctx_create(resources2, nres2, "sched_ctx2", STARPU_SCHED_CTX_POLICY_NAME, "dmda", 0); + + /* initialize the hypervisor */ + struct sc_hypervisor_policy policy = {}; + + policy.custom = 0; + /* indicate which strategy to use + in this particular case we use app_driven which allows the user to resize + the ctxs dynamically at particular moments of the execution of the application */ + policy.name = "app_driven"; + void *perf_counters = sc_hypervisor_init(&policy); + + /* let starpu know which performance counters should use + to inform the hypervisor how the application and the resources are executing */ + starpu_sched_ctx_set_perf_counters(sched_ctx1, perf_counters); + starpu_sched_ctx_set_perf_counters(sched_ctx2, perf_counters); + + /* register the contexts that should be managed by the hypervisor + and indicate an approximate amount of workload if known; + in this case we don't know it and we put 0 */ + sc_hypervisor_register_ctx(sched_ctx1, 0.0); + sc_hypervisor_register_ctx(sched_ctx2, 0.0); + + starpu_pthread_t tid[2]; + + val[0] = 0; + val[1] = 0; + STARPU_PTHREAD_MUTEX_INIT(&mut[0], NULL); + STARPU_PTHREAD_MUTEX_INIT(&mut[1], NULL); + + /* we create two threads to simulate simultaneous submission of tasks */ + STARPU_PTHREAD_CREATE(&tid[0], NULL, submit_tasks_thread, (void*)&sched_ctx1); + STARPU_PTHREAD_CREATE(&tid[1], NULL, submit_tasks_thread, (void*)&sched_ctx2); + + STARPU_PTHREAD_JOIN(tid[0], NULL); + STARPU_PTHREAD_JOIN(tid[1], NULL); + + /* free starpu and hypervisor data */ + starpu_shutdown(); + sc_hypervisor_shutdown(); + + FPRINTF(stdout, "ctx = %u executed %u counter_tests out of %d \n", sched_ctx1, val[0], NTASKS*NINCR); + FPRINTF(stdout, "ctx = %u executed %u counter_tests out of %d \n", sched_ctx2, val[1], NTASKS*NINCR); + return 0; +} diff --git a/sc_hypervisor/examples/cholesky/cholesky.h b/sc_hypervisor/examples/cholesky/cholesky.h new file mode 100644 index 0000000..cb5e8fa --- /dev/null +++ b/sc_hypervisor/examples/cholesky/cholesky.h @@ -0,0 +1,173 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Thibaut Lambert + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __DW_CHOLESKY_H__ +#define __DW_CHOLESKY_H__ + +#include +#include +#include +#include +#ifdef STARPU_USE_CUDA +#include +#include +#include +#endif + +#include +#include +#include + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) +#define NMAXBLOCKS 32 + +#define TAG_POTRF(k) ((starpu_tag_t)((1ULL<<60) | (unsigned long long)(k))) +#define TAG_TRSM(k,j) ((starpu_tag_t)((3ULL<<60) | (((unsigned long long)(k))<<32) \ + | (unsigned long long)(j)))) +#define TAG_GEMM(k,i,j) ((starpu_tag_t)(((4ULL<<60) | ((unsigned long long)(k)<<32) \ + | ((unsigned long long)(i)<<16) \ + | (unsigned long long)(j)))) + +#define TAG_POTRF_AUX(k, prefix) ((starpu_tag_t)((((unsigned long long)(prefix))<<60) | (1ULL<<56) | (unsigned long long)(k))) +#define TAG_TRSM_AUX(k,j, prefix) ((starpu_tag_t)((((unsigned long long)(prefix))<<60) \ + | ((3ULL<<56) |(((unsigned long long)(k))<<32) \ + | (unsigned long long)(j)))) +#define TAG_GEMM_AUX(k,i,j, prefix) ((starpu_tag_t)((((unsigned long long)(prefix))<<60) \ + | ((4ULL<<56) | ((unsigned long long)(k)<<32) \ + | ((unsigned long long)(i)<<16) \ + | (unsigned long long)(j)))) + +#define BLOCKSIZE (size/nblocks) + +#define BLAS3_FLOP(n1,n2,n3) \ + (2*((uint64_t)n1)*((uint64_t)n2)*((uint64_t)n3)) + +/* This is from magma + + -- Innovative Computing Laboratory + -- Electrical Engineering and Computer Science Department + -- University of Tennessee + -- (C) Copyright 2009 + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the University of Tennessee, Knoxville nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + */ + +#define FMULS_POTRF(__n) ((double)(__n) * (((1. / 6.) * (double)(__n) + 0.5) * (double)(__n) + (1. / 3.))) +#define FADDS_POTRF(__n) ((double)(__n) * (((1. / 6.) * (double)(__n)) * (double)(__n) - (1. / 6.))) + +#define FLOPS_SPOTRF(__n) (FMULS_POTRF((__n)) + FADDS_POTRF((__n))) + +#define FMULS_TRMM_2(__m, __n) (0.5 * (double)(__n) * (double)(__m) * ((double)(__m)+1.)) +#define FADDS_TRMM_2(__m, __n) (0.5 * (double)(__n) * (double)(__m) * ((double)(__m)-1.)) + +#define FMULS_TRMM(__m, __n) (/*((__side) == PlasmaLeft) ? FMULS_TRMM_2((__m), (__n)) :*/ FMULS_TRMM_2((__n), (__m))) +#define FADDS_TRMM(__m, __n) (/*((__side) == PlasmaLeft) ? FADDS_TRMM_2((__m), (__n)) :*/ FADDS_TRMM_2((__n), (__m))) + +#define FMULS_TRSM FMULS_TRMM +#define FADDS_TRSM FMULS_TRMM + +#define FLOPS_STRSM(__m, __n) (FMULS_TRSM((__m), (__n)) + FADDS_TRSM((__m), (__n))) + + +#define FMULS_SYRK(__k, __n) (0.5 * (double)(__k) * (double)(__n) * ((double)(__n)+1.)) +#define FADDS_SYRK(__k, __n) (0.5 * (double)(__k) * (double)(__n) * ((double)(__n)+1.)) + +#define FLOPS_SSYRK(__k, __n) (FMULS_SYRK((__k), (__n)) + FADDS_SYRK((__k), (__n))) + + +#define FMULS_GEMM(__m, __n, __k) ((double)(__m) * (double)(__n) * (double)(__k)) +#define FADDS_GEMM(__m, __n, __k) ((double)(__m) * (double)(__n) * (double)(__k)) + +#define FLOPS_SGEMM(__m, __n, __k) (FMULS_GEMM((__m), (__n), (__k)) + FADDS_GEMM((__m), (__n), (__k))) + +/* End of magma code */ + +extern unsigned g_size; +extern unsigned g_nblocks; +extern unsigned g_nbigblocks; +extern unsigned g_pinned; +extern unsigned g_noprio; +extern unsigned g_check; +extern unsigned g_bound; +extern unsigned g_with_ctxs; +extern unsigned g_with_noctxs; +extern unsigned g_chole1; +extern unsigned g_chole2; + +extern struct starpu_perfmodel chol_model_potrf; +extern struct starpu_perfmodel chol_model_trsm; +extern struct starpu_perfmodel chol_model_syrk; +extern struct starpu_perfmodel chol_model_gemm; + +void chol_cpu_codelet_update_potrf(void **, void *); +void chol_cpu_codelet_update_trsm(void **, void *); +void chol_cpu_codelet_update_syrk(void **, void *); +void chol_cpu_codelet_update_gemm(void **, void *); + +extern struct starpu_codelet cl_potrf; +extern struct starpu_codelet cl_trsm; +extern struct starpu_codelet cl_syrk; +extern struct starpu_codelet cl_gemm; + +double cpu_chol_task_potrf_cost(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl); +double cpu_chol_task_trsm_cost(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl); +double cpu_chol_task_syrk_cost(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl); +double cpu_chol_task_gemm_cost(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl); + +#ifdef STARPU_USE_CUDA +void chol_cublas_codelet_update_potrf(void *descr[], void *_args); +void chol_cublas_codelet_update_trsm(void *descr[], void *_args); +void chol_cublas_codelet_update_syrk(void *descr[], void *_args); +void chol_cublas_codelet_update_gemm(void *descr[], void *_args); + +double cuda_chol_task_potrf_cost(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl); +double cuda_chol_task_trsm_cost(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl); +double cuda_chol_task_syrk_cost(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl); +double cuda_chol_task_gemm_cost(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl); +#endif + +void initialize_chol_model(struct starpu_perfmodel* model, char* symbol, + double (*cpu_cost_function)(struct starpu_task *, struct starpu_perfmodel_arch*, unsigned), + double (*cuda_cost_function)(struct starpu_task *, struct starpu_perfmodel_arch*, unsigned)); + +void parse_args(int argc, char **argv); + +#endif /* __DW_CHOLESKY_H__ */ diff --git a/sc_hypervisor/examples/cholesky/cholesky_implicit.c b/sc_hypervisor/examples/cholesky/cholesky_implicit.c new file mode 100644 index 0000000..0779669 --- /dev/null +++ b/sc_hypervisor/examples/cholesky/cholesky_implicit.c @@ -0,0 +1,379 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Thibaut Lambert + * Copyright (C) 2010-2010 Mehdi Juhoor + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "cholesky.h" +#include "../sched_ctx_utils/sched_ctx_utils.h" + +/* + * code to bootstrap the factorization + * and construct the DAG + */ + +static void callback_turn_spmd_on(void *arg) +{ + (void)arg; + cl_gemm.type = STARPU_SPMD; +} + +int hypervisor_tag = 1; +static int _cholesky(starpu_data_handle_t dataA, unsigned nblocks) +{ + int ret; + double start; + double end; + + unsigned k,m,n; + unsigned long nx = starpu_matrix_get_nx(dataA); + unsigned long nn = nx/nblocks; + + int prio_level = g_noprio?STARPU_DEFAULT_PRIO:STARPU_MAX_PRIO; + + if (g_bound) + starpu_bound_start(0, 0); + + start = starpu_timing_now(); + + /* create all the DAG nodes */ + for (k = 0; k < nblocks; k++) + { + starpu_iteration_push(k); + starpu_data_handle_t sdatakk = starpu_data_get_sub_data(dataA, 2, k, k); + if(k == 0 && g_with_ctxs) + { + ret = starpu_task_insert(&cl_potrf, + STARPU_PRIORITY, prio_level, + STARPU_RW, sdatakk, + STARPU_CALLBACK, (k == 3*nblocks/4)?callback_turn_spmd_on:NULL, + STARPU_HYPERVISOR_TAG, hypervisor_tag, + 0); + if (ret == -ENODEV) return 77; + set_hypervisor_conf(START_BENCH, hypervisor_tag++); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + } + else + { + ret = starpu_task_insert(&cl_potrf, + STARPU_PRIORITY, prio_level, + STARPU_RW, sdatakk, + STARPU_CALLBACK, (k == 3*nblocks/4)?callback_turn_spmd_on:NULL, + STARPU_FLOPS, (double) FLOPS_SPOTRF(nn), + 0); + if (ret == -ENODEV) return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + } + + for (m = k+1; m m) + { + mat[m+n*size] = 0.0f; /* debug */ + } + } + } + float *test_mat = malloc(size*size*sizeof(float)); + STARPU_ASSERT(test_mat); + + STARPU_SSYRK("L", "N", size, size, 1.0f, + mat, size, 0.0f, test_mat, size); + + FPRINTF(stderr, "comparing results ...\n"); +#ifdef PRINT_OUTPUT + for (m = 0; m < size; m++) + { + for (n = 0; n < size; n++) + { + if (n <= m) + { + FPRINTF(stdout, "%2.2f\t", test_mat[m +n*size]); + } + else + { + FPRINTF(stdout, ".\t"); + } + } + FPRINTF(stdout, "\n"); + } +#endif + + for (m = 0; m < size; m++) + { + for (n = 0; n < size; n++) + { + if (n <= m) + { + float orig = (1.0f/(1.0f+m+n)) + ((m == n)?1.0f*size:0.0f); + float err = fabsf(test_mat[m +n*size] - orig) / orig; + if (err > 0.0001) + { + FPRINTF(stderr, "Error[%u, %u] --> %2.6f != %2.6f (err %2.6f)\n", m, n, test_mat[m +n*size], orig, err); + assert(0); + } + } + } + } + free(test_mat); + } + starpu_free_noflag(mat, (size_t)size*size*sizeof(float)); +} + +int main(int argc, char **argv) +{ + int ret; + + /* create a simple definite positive symmetric matrix example + * + * Hilbert matrix : h(i,j) = 1/(i+j+1) + * */ + + parse_args(argc, argv); + + if(g_with_ctxs || g_with_noctxs || g_chole1 || g_chole2) + parse_args_ctx(argc, argv); + + ret = starpu_init(NULL); + if (ret == -ENODEV) + return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + +#ifdef STARPU_USE_CUDA + initialize_chol_model(&chol_model_potrf,"chol_model_potrf",cpu_chol_task_potrf_cost,cuda_chol_task_potrf_cost); + initialize_chol_model(&chol_model_trsm,"chol_model_trsm",cpu_chol_task_trsm_cost,cuda_chol_task_trsm_cost); + initialize_chol_model(&chol_model_syrk,"chol_model_syrk",cpu_chol_task_syrk_cost,cuda_chol_task_syrk_cost); + initialize_chol_model(&chol_model_gemm,"chol_model_gemm",cpu_chol_task_gemm_cost,cuda_chol_task_gemm_cost); +#else + initialize_chol_model(&chol_model_potrf,"chol_model_potrf",cpu_chol_task_potrf_cost,NULL); + initialize_chol_model(&chol_model_trsm,"chol_model_trsm",cpu_chol_task_trsm_cost,NULL); + initialize_chol_model(&chol_model_syrk,"chol_model_syrk",cpu_chol_task_syrk_cost,NULL); + initialize_chol_model(&chol_model_gemm,"chol_model_gemm",cpu_chol_task_gemm_cost,NULL); +#endif + + starpu_cublas_init(); + + if(g_with_ctxs) + { + construct_contexts(); + start_2benchs(execute_cholesky); + } + else if(g_with_noctxs) + start_2benchs(execute_cholesky); + else if(g_chole1) + start_1stbench(execute_cholesky); + else if(g_chole2) + start_2ndbench(execute_cholesky); + else + execute_cholesky(NULL, g_size, g_nblocks); + + starpu_cublas_shutdown(); + starpu_shutdown(); + + if(g_with_ctxs) + end_contexts(); + + return 0; +} diff --git a/sc_hypervisor/examples/cholesky/cholesky_kernels.c b/sc_hypervisor/examples/cholesky/cholesky_kernels.c new file mode 100644 index 0000000..2217f10 --- /dev/null +++ b/sc_hypervisor/examples/cholesky/cholesky_kernels.c @@ -0,0 +1,438 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "cholesky.h" +//#include "../common/blas.h" +#ifdef STARPU_USE_CUDA +#include +#include +#ifdef STARPU_HAVE_MAGMA +#include "magma.h" +#include "magma_lapack.h" +#endif +#endif + +/* + * GEMM + */ + +#if defined(STARPU_USE_CUDA) +static const float p1 = 1.0; +static const float m1 = -1.0; +#endif + +static inline void chol_common_cpu_codelet_update_gemm(void *descr[], int s, void *_args) +{ + (void)_args; + /* printf("gemm\n"); */ + float *left = (float *)STARPU_MATRIX_GET_PTR(descr[0]); + float *right = (float *)STARPU_MATRIX_GET_PTR(descr[1]); + float *center = (float *)STARPU_MATRIX_GET_PTR(descr[2]); + + unsigned dx = STARPU_MATRIX_GET_NY(descr[2]); + unsigned dy = STARPU_MATRIX_GET_NX(descr[2]); + unsigned dz = STARPU_MATRIX_GET_NY(descr[0]); + + unsigned ld21 = STARPU_MATRIX_GET_LD(descr[0]); + unsigned ld12 = STARPU_MATRIX_GET_LD(descr[1]); + unsigned ld22 = STARPU_MATRIX_GET_LD(descr[2]); + + switch (s) + { + case 0: + { + /* CPU kernel */ + int worker_size = starpu_combined_worker_get_size(); + + if (worker_size == 1) + { + /* Sequential CPU kernel */ + STARPU_SGEMM("N", "T", dy, dx, dz, -1.0f, left, ld21, + right, ld12, 1.0f, center, ld22); + } + else + { + /* Parallel CPU kernel */ + int rank = starpu_combined_worker_get_rank(); + + unsigned block_size = (dx + worker_size - 1)/worker_size; + unsigned new_dx = STARPU_MIN(dx, block_size*(rank+1)) - block_size*rank; + + float *new_left = &left[block_size*rank]; + float *new_center = ¢er[block_size*rank]; + + STARPU_SGEMM("N", "T", dy, new_dx, dz, -1.0f, new_left, ld21, + right, ld12, 1.0f, new_center, ld22); + } + break; + } +#ifdef STARPU_USE_CUDA + case 1: + { + /* CUDA kernel */ + cublasStatus_t status = cublasSgemm(starpu_cublas_get_local_handle(), + CUBLAS_OP_N, CUBLAS_OP_T, dy, dx, dz, + &m1, left, ld21, right, ld12, + &p1, center, ld22); + if (status != CUBLAS_STATUS_SUCCESS) + STARPU_CUBLAS_REPORT_ERROR(status); + + break; + } +#endif + default: + STARPU_ABORT(); + break; + } +} + +void chol_cpu_codelet_update_gemm(void *descr[], void *_args) +{ + chol_common_cpu_codelet_update_gemm(descr, 0, _args); +} + +#ifdef STARPU_USE_CUDA +void chol_cublas_codelet_update_gemm(void *descr[], void *_args) +{ + chol_common_cpu_codelet_update_gemm(descr, 1, _args); +} +#endif /* STARPU_USE_CUDA */ + +/* + * SYRK + */ + +static inline void chol_common_cpu_codelet_update_syrk(void *descr[], int s, void *_args) +{ + (void)_args; + /* printf("syrk\n"); */ + float *left = (float *)STARPU_MATRIX_GET_PTR(descr[0]); + float *center = (float *)STARPU_MATRIX_GET_PTR(descr[1]); + + unsigned dx = STARPU_MATRIX_GET_NY(descr[1]); + unsigned dz = STARPU_MATRIX_GET_NY(descr[0]); + + unsigned ld21 = STARPU_MATRIX_GET_LD(descr[0]); + unsigned ld22 = STARPU_MATRIX_GET_LD(descr[1]); + + switch (s) + { + case 0: + { + /* CPU kernel */ + STARPU_SSYRK("L", "N", dx, dz, -1.0f, left, ld21, + 1.0f, center, ld22); + break; + } +#ifdef STARPU_USE_CUDA + case 1: + { + /* CUDA kernel */ + cublasStatus_t status = cublasSsyrk(starpu_cublas_get_local_handle(), + CUBLAS_FILL_MODE_LOWER, CUBLAS_OP_N, dx, dz, + &m1, left, ld21, + &p1, center, ld22); + if (status != CUBLAS_STATUS_SUCCESS) + STARPU_CUBLAS_REPORT_ERROR(status); + break; + } +#endif + default: + STARPU_ABORT(); + break; + } +} + +void chol_cpu_codelet_update_syrk(void *descr[], void *_args) +{ + chol_common_cpu_codelet_update_syrk(descr, 0, _args); +} + +#ifdef STARPU_USE_CUDA +void chol_cublas_codelet_update_syrk(void *descr[], void *_args) +{ + chol_common_cpu_codelet_update_syrk(descr, 1, _args); +} +#endif /* STARPU_USE_CUDA */ + +/* + * TRSM + */ + +static inline void chol_common_codelet_update_trsm(void *descr[], int s, void *_args) +{ + (void)_args; +/* printf("trsm\n"); */ + float *sub11; + float *sub21; + + sub11 = (float *)STARPU_MATRIX_GET_PTR(descr[0]); + sub21 = (float *)STARPU_MATRIX_GET_PTR(descr[1]); + + unsigned ld11 = STARPU_MATRIX_GET_LD(descr[0]); + unsigned ld21 = STARPU_MATRIX_GET_LD(descr[1]); + + unsigned nx21 = STARPU_MATRIX_GET_NY(descr[1]); + unsigned ny21 = STARPU_MATRIX_GET_NX(descr[1]); + +#ifdef STARPU_USE_CUDA + cublasStatus_t status; +#endif + + switch (s) + { + case 0: + STARPU_STRSM("R", "L", "T", "N", nx21, ny21, 1.0f, sub11, ld11, sub21, ld21); + break; +#ifdef STARPU_USE_CUDA + case 1: + status = cublasStrsm(starpu_cublas_get_local_handle(), + CUBLAS_SIDE_RIGHT, CUBLAS_FILL_MODE_LOWER, CUBLAS_OP_T, CUBLAS_DIAG_NON_UNIT, + nx21, ny21, &p1, sub11, ld11, sub21, ld21); + if (status != CUBLAS_STATUS_SUCCESS) + STARPU_CUBLAS_REPORT_ERROR(status); + break; +#endif + default: + STARPU_ABORT(); + break; + } +} + +void chol_cpu_codelet_update_trsm(void *descr[], void *_args) +{ + chol_common_codelet_update_trsm(descr, 0, _args); +} + +#ifdef STARPU_USE_CUDA +void chol_cublas_codelet_update_trsm(void *descr[], void *_args) +{ + chol_common_codelet_update_trsm(descr, 1, _args); +} +#endif + +/* + * POTRF + */ + +static inline void chol_common_codelet_update_potrf(void *descr[], int s, void *_args) +{ + (void)_args; +/* printf("potrf\n"); */ + float *sub11; + + sub11 = (float *)STARPU_MATRIX_GET_PTR(descr[0]); + + unsigned nx = STARPU_MATRIX_GET_NY(descr[0]); + unsigned ld = STARPU_MATRIX_GET_LD(descr[0]); + + unsigned z; + + switch (s) + { + case 0: + +#ifdef STARPU_MKL + STARPU_SPOTRF("L", nx, sub11, ld); +#else + /* + * - alpha 11 <- lambda 11 = sqrt(alpha11) + * - alpha 21 <- l 21 = alpha 21 / lambda 11 + * - A22 <- A22 - l21 trans(l21) + */ + + for (z = 0; z < nx; z++) + { + float lambda11; + lambda11 = sqrt(sub11[z+z*ld]); + sub11[z+z*ld] = lambda11; + + STARPU_ASSERT(lambda11 != 0.0f); + + STARPU_SSCAL(nx - z - 1, 1.0f/lambda11, &sub11[(z+1)+z*ld], 1); + + STARPU_SSYR("L", nx - z - 1, -1.0f, + &sub11[(z+1)+z*ld], 1, + &sub11[(z+1)+(z+1)*ld], ld); + } +#endif + break; +#ifdef STARPU_USE_CUDA + case 1: +#ifdef STARPU_HAVE_MAGMA + { + int ret; + int info; +#if (MAGMA_VERSION_MAJOR > 1) || (MAGMA_VERSION_MAJOR == 1 && MAGMA_VERSION_MINOR >= 4) + cudaStream_t stream = starpu_cuda_get_local_stream(); + cublasSetKernelStream(stream); + magmablasSetKernelStream(stream); +#else + starpu_cublas_set_stream(); +#endif + ret = magma_spotrf_gpu(MagmaLower, nx, sub11, ld, &info); + if (ret != MAGMA_SUCCESS) + { + fprintf(stderr, "Error in Magma: %d\n", ret); + STARPU_ABORT(); + } +#if (MAGMA_VERSION_MAJOR > 1) || (MAGMA_VERSION_MAJOR == 1 && MAGMA_VERSION_MINOR >= 4) + cudaError_t cures = cudaStreamSynchronize(stream); +#else + cudaError_t cures = cudaDeviceSynchronize(); +#endif + STARPU_ASSERT(!cures); + } +#else + { + + float *lambda11; + cublasStatus_t status; + cudaStream_t stream = starpu_cuda_get_local_stream(); + cublasHandle_t handle = starpu_cublas_get_local_handle(); + cudaHostAlloc((void **)&lambda11, sizeof(float), 0); + + for (z = 0; z < nx; z++) + { + cudaMemcpyAsync(lambda11, &sub11[z+z*ld], sizeof(float), cudaMemcpyDeviceToHost, stream); + cudaStreamSynchronize(stream); + + STARPU_ASSERT(*lambda11 != 0.0f); + + *lambda11 = sqrt(*lambda11); + +/* cublasSetVector(1, sizeof(float), lambda11, sizeof(float), &sub11[z+z*ld], sizeof(float)); */ + cudaMemcpyAsync(&sub11[z+z*ld], lambda11, sizeof(float), cudaMemcpyHostToDevice, stream); + float scal = 1.0f/(*lambda11); + + status = cublasSscal(handle, + nx - z - 1, &scal, &sub11[(z+1)+z*ld], 1); + if (status != CUBLAS_STATUS_SUCCESS) + STARPU_CUBLAS_REPORT_ERROR(status); + + status = cublasSsyr(handle, + CUBLAS_FILL_MODE_UPPER, + nx - z - 1, &m1, + &sub11[(z+1)+z*ld], 1, + &sub11[(z+1)+(z+1)*ld], ld); + if (status != CUBLAS_STATUS_SUCCESS) + STARPU_CUBLAS_REPORT_ERROR(status); + } + + cudaStreamSynchronize(stream); + cudaFreeHost(lambda11); + } +#endif + break; +#endif + default: + STARPU_ABORT(); + break; + } +} + + +void chol_cpu_codelet_update_potrf(void *descr[], void *_args) +{ + chol_common_codelet_update_potrf(descr, 0, _args); +} + +#ifdef STARPU_USE_CUDA +void chol_cublas_codelet_update_potrf(void *descr[], void *_args) +{ + chol_common_codelet_update_potrf(descr, 1, _args); +} +#endif/* STARPU_USE_CUDA */ + +struct starpu_perfmodel chol_model_potrf; +struct starpu_perfmodel chol_model_trsm; +struct starpu_perfmodel chol_model_syrk; +struct starpu_perfmodel chol_model_gemm; + +/* + * Create the codelets + */ + +struct starpu_codelet cl_potrf = +{ + .type = STARPU_SEQ, + .cpu_funcs = {chol_cpu_codelet_update_potrf}, + .cpu_funcs_name = {"chol_cpu_codelet_update_potrf"}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {chol_cublas_codelet_update_potrf}, +#elif defined(STARPU_SIMGRID) + .cuda_funcs = {(void*)1}, +#endif + .nbuffers = 1, + .modes = { STARPU_RW }, + .model = &chol_model_potrf, + .color = 0xffff00, +}; + +struct starpu_codelet cl_trsm = +{ + .type = STARPU_SEQ, + .cpu_funcs = {chol_cpu_codelet_update_trsm}, + .cpu_funcs_name = {"chol_cpu_codelet_update_trsm"}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {chol_cublas_codelet_update_trsm}, +#elif defined(STARPU_SIMGRID) + .cuda_funcs = {(void*)1}, +#endif + .cuda_flags = {STARPU_CUDA_ASYNC}, + .nbuffers = 2, + .modes = { STARPU_R, STARPU_RW }, + .model = &chol_model_trsm, + .color = 0x8080ff, +}; + +struct starpu_codelet cl_syrk = +{ + .type = STARPU_SEQ, + .max_parallelism = INT_MAX, + .cpu_funcs = {chol_cpu_codelet_update_syrk}, + .cpu_funcs_name = {"chol_cpu_codelet_update_syrk"}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {chol_cublas_codelet_update_syrk}, +#elif defined(STARPU_SIMGRID) + .cuda_funcs = {(void*)1}, +#endif + .cuda_flags = {STARPU_CUDA_ASYNC}, + .nbuffers = 2, + .modes = { STARPU_R, STARPU_RW }, + .model = &chol_model_syrk, + .color = 0x00ff00, +}; + +struct starpu_codelet cl_gemm = +{ + .type = STARPU_SEQ, + .max_parallelism = INT_MAX, + .cpu_funcs = {chol_cpu_codelet_update_gemm}, + .cpu_funcs_name = {"chol_cpu_codelet_update_gemm"}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {chol_cublas_codelet_update_gemm}, +#elif defined(STARPU_SIMGRID) + .cuda_funcs = {(void*)1}, +#endif + .cuda_flags = {STARPU_CUDA_ASYNC}, + .nbuffers = 3, + .modes = { STARPU_R, STARPU_R, STARPU_RW }, + .model = &chol_model_gemm, + .color = 0x00c000, +}; + diff --git a/sc_hypervisor/examples/cholesky/cholesky_models.c b/sc_hypervisor/examples/cholesky/cholesky_models.c new file mode 100644 index 0000000..ba14fbd --- /dev/null +++ b/sc_hypervisor/examples/cholesky/cholesky_models.c @@ -0,0 +1,288 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Thibaut Lambert + * Copyright (C) 2011-2011 Télécom Sud Paris + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * Example of a cost model for BLAS operations. This is really just an + * example! + */ + +/* + * As a convention, in that file, buffers[0] is represented by A, + * buffers[1] is B ... + */ + +/* + * Number of flops of Gemm + */ + +#include +#include +#include "cholesky.h" + +/* #define USE_PERTURBATION 1 */ + +#ifdef USE_PERTURBATION +#define PERTURB(a) ((starpu_drand48()*2.0f*(AMPL) + 1.0f - (AMPL))*(a)) +#else +#define PERTURB(a) (a) +#endif + +double cpu_chol_task_potrf_cost(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl) +{ + (void)arch; + (void)nimpl; + uint32_t n; + + n = starpu_matrix_get_nx(task->handles[0]); + + double cost = (((double)(n)*n*n)/1000.0f*0.894/0.79176); + +#ifdef STARPU_MODEL_DEBUG + FPRINTF(stdout, "cpu_chol_task_potrf_cost n %u cost %e\n", n, cost); +#endif + + return PERTURB(cost); +} + +double cuda_chol_task_potrf_cost(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl) +{ + (void)arch; + (void)nimpl; + uint32_t n; + + n = starpu_matrix_get_nx(task->handles[0]); + + double cost = (((double)(n)*n*n)/50.0f/10.75/5.088633/0.9883); + +#ifdef STARPU_MODEL_DEBUG + FPRINTF(stdout, "cuda_chol_task_potrf_cost n %u cost %e\n", n, cost); +#endif + + return PERTURB(cost); +} + +double cpu_chol_task_trsm_cost(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl) +{ + (void)arch; + (void)nimpl; + uint32_t n; + + n = starpu_matrix_get_nx(task->handles[0]); + + double cost = (((double)(n)*n*n)/7706.674/0.95/0.9965); + +#ifdef STARPU_MODEL_DEBUG + FPRINTF(stdout, "cpu_chol_task_trsm_cost n %u cost %e\n", n, cost); +#endif + + return PERTURB(cost); +} + +double cuda_chol_task_trsm_cost(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl) +{ + (void)arch; + (void)nimpl; + uint32_t n; + + n = starpu_matrix_get_nx(task->handles[0]); + + double cost = (((double)(n)*n*n)/50.0f/10.75/87.29520); + +#ifdef STARPU_MODEL_DEBUG + FPRINTF(stdout, "cuda_chol_task_trsm_cost n %u cost %e\n", n, cost); +#endif + + return PERTURB(cost); +} + +double cpu_chol_task_syrk_cost(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl) +{ + (void)arch; + (void)nimpl; + uint32_t n; + + n = starpu_matrix_get_nx(task->handles[0]); + + double cost = (((double)(n)*n*n)/50.0f/10.75/8.0760)/2; + +#ifdef STARPU_MODEL_DEBUG + FPRINTF(stdout, "cpu_chol_task_syrk_cost n %u cost %e\n", n, cost); +#endif + + return PERTURB(cost); +} + +double cuda_chol_task_syrk_cost(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl) +{ + (void)arch; + (void)nimpl; + uint32_t n; + + n = starpu_matrix_get_nx(task->handles[0]); + + double cost = (((double)(n)*n*n)/50.0f/10.75/76.30666)/2; + +#ifdef STARPU_MODEL_DEBUG + FPRINTF(stdout, "cuda_chol_task_syrk_cost n %u cost %e\n", n, cost); +#endif + + return PERTURB(cost); +} + +double cpu_chol_task_gemm_cost(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl) +{ + (void)arch; + (void)nimpl; + uint32_t n; + + n = starpu_matrix_get_nx(task->handles[0]); + + double cost = (((double)(n)*n*n)/50.0f/10.75/8.0760); + +#ifdef STARPU_MODEL_DEBUG + FPRINTF(stdout, "cpu_chol_task_gemm_cost n %u cost %e\n", n, cost); +#endif + + return PERTURB(cost); +} + +double cuda_chol_task_gemm_cost(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl) +{ + (void)arch; + (void)nimpl; + uint32_t n; + + n = starpu_matrix_get_nx(task->handles[0]); + + double cost = (((double)(n)*n*n)/50.0f/10.75/76.30666); + +#ifdef STARPU_MODEL_DEBUG + FPRINTF(stdout, "cuda_chol_task_gemm_cost n %u cost %e\n", n, cost); +#endif + + return PERTURB(cost); +} + +void initialize_chol_model(struct starpu_perfmodel* model, char * symbol, + double (*cpu_cost_function)(struct starpu_task *, struct starpu_perfmodel_arch*, unsigned), + double (*cuda_cost_function)(struct starpu_task *, struct starpu_perfmodel_arch*, unsigned)) +{ + struct starpu_perfmodel_per_arch *per_arch; + + model->symbol = symbol; + model->type = STARPU_HISTORY_BASED; + + starpu_perfmodel_init(model); + + per_arch = starpu_perfmodel_get_model_per_devices(model, 0, STARPU_CPU_WORKER, 0, 1, -1); + per_arch->cost_function = cpu_cost_function; + // We could also call directly: + // starpu_perfmodel_set_per_devices_cost_function(model, 0, cpu_cost_function, STARPU_CPU_WORKER, 0, 1, -1); + + if(starpu_worker_get_count_by_type(STARPU_CUDA_WORKER) != 0) + { + per_arch = starpu_perfmodel_get_model_per_devices(model, 0, STARPU_CUDA_WORKER, 0, 1, -1); + per_arch->cost_function = cuda_cost_function; + + } +} + +unsigned g_size = 4*1024; +unsigned g_nblocks = 16; +unsigned g_nbigblocks = 8; +unsigned g_pinned = 0; +unsigned g_noprio = 0; +unsigned g_check = 0; +unsigned g_bound = 0; +unsigned g_with_ctxs = 0; +unsigned g_with_noctxs = 0; +unsigned g_chole1 = 0; +unsigned g_chole2 = 0; + +void parse_args(int argc, char **argv) +{ + int i; + for (i = 1; i < argc; i++) + { + if (strcmp(argv[i], "-with_ctxs") == 0) + { + g_with_ctxs = 1; + break; + } + if (strcmp(argv[i], "-with_noctxs") == 0) + { + g_with_noctxs = 1; + break; + } + + if (strcmp(argv[i], "-chole1") == 0) + { + g_chole1 = 1; + break; + } + + if (strcmp(argv[i], "-chole2") == 0) + { + g_chole2 = 1; + break; + } + + if (strcmp(argv[i], "-size") == 0) + { + char *argptr; + g_size = strtol(argv[++i], &argptr, 10); + } + + if (strcmp(argv[i], "-nblocks") == 0) + { + char *argptr; + g_nblocks = strtol(argv[++i], &argptr, 10); + } + + if (strcmp(argv[i], "-nbigblocks") == 0) + { + char *argptr; + g_nbigblocks = strtol(argv[++i], &argptr, 10); + } + + if (strcmp(argv[i], "-pin") == 0) + { + g_pinned = 1; + } + + if (strcmp(argv[i], "-no-prio") == 0) + { + g_noprio = 1; + } + + if (strcmp(argv[i], "-bound") == 0) + { + g_bound = 1; + } + + if (strcmp(argv[i], "-check") == 0) + { + g_check = 1; + } + + if (strcmp(argv[i], "-h") == 0) + { + printf("usage : %s [-pin] [-size size] [-nblocks nblocks] [-check]\n", argv[0]); + } + } +} diff --git a/sc_hypervisor/examples/hierarchical_ctxs/resize_hierarchical_ctxs.c b/sc_hypervisor/examples/hierarchical_ctxs/resize_hierarchical_ctxs.c new file mode 100644 index 0000000..1f7d087 --- /dev/null +++ b/sc_hypervisor/examples/hierarchical_ctxs/resize_hierarchical_ctxs.c @@ -0,0 +1,162 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include + +#define NTASKS 1000 +#define NINCR 10 +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + + +unsigned val[3]; +starpu_pthread_mutex_t mut[3]; + +/* Every implementation of a codelet must have this prototype, the first + * argument (buffers) describes the buffers/streams that are managed by the + * DSM; the second arguments references read-only data that is passed as an + * argument of the codelet (task->cl_arg). Here, "buffers" is unused as there + * are no data input/output managed by the DSM (cl.nbuffers = 0) */ +void cpu_func(__attribute__((unused))void *buffers[], void *cl_arg) +{ + unsigned sched_ctx = *((unsigned *) cl_arg); + + int i; + for(i = 0; i < NINCR; i++) + { + STARPU_PTHREAD_MUTEX_LOCK(&mut[sched_ctx - 1]); + val[sched_ctx - 1]++; + STARPU_PTHREAD_MUTEX_UNLOCK(&mut[sched_ctx - 1]); + } +} + +struct starpu_codelet cl = {0}; + +void* submit_tasks_thread(void *arg) +{ + unsigned sched_ctx = *((unsigned*)arg); + starpu_sched_ctx_set_context(&sched_ctx); + + struct starpu_task *task[NTASKS]; + int i; + for(i = 0; i < NTASKS; i++) + { + task[i] = starpu_task_create(); + cl.cpu_funcs[0] = cpu_func; + cl.nbuffers = 0; + + task[i]->cl = &cl; + + task[i]->cl_arg = &sched_ctx; + task[i]->cl_arg_size = sizeof(unsigned); + + task[i]->flops = NINCR*1000000000.0; + int ret = starpu_task_submit(task[i]); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + if(i == NTASKS/2) + sc_hypervisor_resize_ctxs(NULL, -1, NULL, -1); + } + + starpu_task_wait_for_all(); + return NULL; +} + +int main() +{ + int ret = starpu_init(NULL); + + if (ret == -ENODEV) + return 77; + + /* create contexts */ + unsigned sched_ctx1 = starpu_sched_ctx_create(NULL, 0, "sched_ctx1", STARPU_SCHED_CTX_POLICY_NAME, "dmda", STARPU_SCHED_CTX_HIERARCHY_LEVEL, 0, 0); + unsigned sched_ctx2 = starpu_sched_ctx_create(NULL, 0, "sched_ctx2", STARPU_SCHED_CTX_POLICY_NAME, "dmda", STARPU_SCHED_CTX_HIERARCHY_LEVEL, 1, 0); + unsigned sched_ctx3 = starpu_sched_ctx_create(NULL, 0, "sched_ctx3", STARPU_SCHED_CTX_POLICY_NAME, "dmda", STARPU_SCHED_CTX_HIERARCHY_LEVEL, 1, 0); + starpu_sched_ctx_set_inheritor(sched_ctx2, sched_ctx1); + starpu_sched_ctx_set_inheritor(sched_ctx3, sched_ctx1); + + /* initialize the hypervisor */ + struct sc_hypervisor_policy policy; + policy.custom = 0; + /* indicate which strategy to use + * in this particular case we use app_driven which allows the user to resize + * the ctxs dynamically at particular moments of the execution of the application */ + policy.name = "feft_lp"; + void *perf_counters = sc_hypervisor_init(&policy); + + /* let starpu know which performance counters should use + * to inform the hypervisor how the application and the resources are executing */ + starpu_sched_ctx_set_perf_counters(sched_ctx1, perf_counters); + starpu_sched_ctx_set_perf_counters(sched_ctx2, perf_counters); + starpu_sched_ctx_set_perf_counters(sched_ctx3, perf_counters); + + double flops1 = NTASKS*NINCR*1000000000.0; + double flops2 = NTASKS*NINCR*1000000000.0; + double flops3 = NTASKS*NINCR*1000000000.0; + /* register the contexts that should be managed by the hypervisor + * and indicate an approximate amount of workload if known; + in this case we don't know it and we put 0 */ + sc_hypervisor_register_ctx(sched_ctx1, flops1); + sc_hypervisor_register_ctx(sched_ctx2, flops2); + sc_hypervisor_register_ctx(sched_ctx3, flops3); + + unsigned ncpus = starpu_cpu_worker_get_count(); + + sc_hypervisor_ctl(sched_ctx1, + SC_HYPERVISOR_MAX_WORKERS, ncpus, + SC_HYPERVISOR_NULL); + + sc_hypervisor_ctl(sched_ctx2, + SC_HYPERVISOR_MAX_WORKERS, ncpus, + SC_HYPERVISOR_NULL); + + sc_hypervisor_ctl(sched_ctx3, + SC_HYPERVISOR_MAX_WORKERS, ncpus, + SC_HYPERVISOR_NULL); + + /* lp strategy allows sizing the contexts because we know the total number of flops + * to be executed */ + sc_hypervisor_size_ctxs(NULL, -1, NULL, -1); + + starpu_pthread_t tid[3]; + + val[0] = 0; + val[1] = 0; + val[2] = 0; + STARPU_PTHREAD_MUTEX_INIT(&mut[0], NULL); + STARPU_PTHREAD_MUTEX_INIT(&mut[1], NULL); + STARPU_PTHREAD_MUTEX_INIT(&mut[2], NULL); + + /* we create two threads to simulate simultaneous submission of tasks */ + STARPU_PTHREAD_CREATE(&tid[0], NULL, submit_tasks_thread, (void*)&sched_ctx1); + STARPU_PTHREAD_CREATE(&tid[1], NULL, submit_tasks_thread, (void*)&sched_ctx2); + STARPU_PTHREAD_CREATE(&tid[2], NULL, submit_tasks_thread, (void*)&sched_ctx3); + + STARPU_PTHREAD_JOIN(tid[0], NULL); + STARPU_PTHREAD_JOIN(tid[1], NULL); + STARPU_PTHREAD_JOIN(tid[2], NULL); + + /* free starpu and hypervisor data */ + starpu_shutdown(); + sc_hypervisor_shutdown(); + + FPRINTF(stdout, "ctx = %u executed %u counter_tests out of %d \n", sched_ctx1, val[0], NTASKS*NINCR); + FPRINTF(stdout, "ctx = %u executed %u counter_tests out of %d \n", sched_ctx2, val[1], NTASKS*NINCR); + FPRINTF(stdout, "ctx = %u executed %u counter_tests out of %d \n", sched_ctx3, val[2], NTASKS*NINCR); + return 0; +} diff --git a/sc_hypervisor/examples/lp_test/lp_resize_test.c b/sc_hypervisor/examples/lp_test/lp_resize_test.c new file mode 100644 index 0000000..cb4857f --- /dev/null +++ b/sc_hypervisor/examples/lp_test/lp_resize_test.c @@ -0,0 +1,137 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include + +#define NTASKS 1000 +#define NINCR 10 +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + + +unsigned val[2]; +starpu_pthread_mutex_t mut[2]; + +/* Every implementation of a codelet must have this prototype, the first +* argument (buffers) describes the buffers/streams that are managed by the + * DSM; the second arguments references read-only data that is passed as an + * argument of the codelet (task->cl_arg). Here, "buffers" is unused as there + * are no data input/output managed by the DSM (cl.nbuffers = 0) */ + +void cpu_func(__attribute__((unused))void *buffers[], void *cl_arg) +{ + unsigned sched_ctx = *((unsigned *) cl_arg); + + int i; + for(i = 0; i < NINCR; i++) + { + STARPU_PTHREAD_MUTEX_LOCK(&mut[sched_ctx - 1]); + val[sched_ctx - 1]++; + STARPU_PTHREAD_MUTEX_UNLOCK(&mut[sched_ctx - 1]); + } +} + +struct starpu_codelet cl = {0}; + +void* submit_tasks_thread(void *arg) +{ + unsigned sched_ctx = *((unsigned*)arg); + starpu_sched_ctx_set_context(&sched_ctx); + + struct starpu_task *task[NTASKS]; + int i; + for(i = 0; i < NTASKS; i++) + { + task[i] = starpu_task_create(); + cl.cpu_funcs[0] = cpu_func; + cl.nbuffers = 0; + + task[i]->cl = &cl; + + task[i]->cl_arg = &sched_ctx; + task[i]->cl_arg_size = sizeof(unsigned); + + task[i]->flops = NINCR*1000000000.0; + int ret = starpu_task_submit(task[i]); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + if(i == NTASKS/2) + sc_hypervisor_resize_ctxs(NULL, -1, NULL, -1); + } + + starpu_task_wait_for_all(); + return NULL; +} + +int main() +{ + int ret = starpu_init(NULL); + + if (ret == -ENODEV) + return 77; + + /* create contexts */ + unsigned sched_ctx1 = starpu_sched_ctx_create(NULL, 0, "sched_ctx1", STARPU_SCHED_CTX_POLICY_NAME, "dmda", 0); + unsigned sched_ctx2 = starpu_sched_ctx_create(NULL, 0, "sched_ctx2", STARPU_SCHED_CTX_POLICY_NAME, "dmda", 0); + + /* initialize the hypervisor */ + struct sc_hypervisor_policy policy; + policy.custom = 0; + /* indicate which strategy to use + in this particular case we use app_driven which allows the user to resize + the ctxs dynamically at particular moments of the execution of the application */ + policy.name = "feft_lp"; + void *perf_counters = sc_hypervisor_init(&policy); + + /* let starpu know which performance counters should use + to inform the hypervisor how the application and the resources are executing */ + starpu_sched_ctx_set_perf_counters(sched_ctx1, perf_counters); + starpu_sched_ctx_set_perf_counters(sched_ctx2, perf_counters); + + double flops1 = NTASKS*NINCR*1000000000.0; + double flops2 = NTASKS*NINCR*1000000000.0; + /* register the contexts that should be managed by the hypervisor + and indicate an approximate amount of workload if known; + in this case we don't know it and we put 0 */ + sc_hypervisor_register_ctx(sched_ctx1, flops1); + sc_hypervisor_register_ctx(sched_ctx2, flops2); + /* lp strategy allows sizing the contexts because we know the total number of flops + * to be executed */ + sc_hypervisor_size_ctxs(NULL, -1, NULL, -1); + + starpu_pthread_t tid[2]; + + val[0] = 0; + val[1] = 0; + STARPU_PTHREAD_MUTEX_INIT(&mut[0], NULL); + STARPU_PTHREAD_MUTEX_INIT(&mut[1], NULL); + + /* we create two threads to simulate simultaneous submission of tasks */ + STARPU_PTHREAD_CREATE(&tid[0], NULL, submit_tasks_thread, (void*)&sched_ctx1); + STARPU_PTHREAD_CREATE(&tid[1], NULL, submit_tasks_thread, (void*)&sched_ctx2); + + STARPU_PTHREAD_JOIN(tid[0], NULL); + STARPU_PTHREAD_JOIN(tid[1], NULL); + + /* free starpu and hypervisor data */ + starpu_shutdown(); + sc_hypervisor_shutdown(); + + FPRINTF(stdout, "ctx = %u executed %u counter_tests out of %d \n", sched_ctx1, val[0], NTASKS*NINCR); + FPRINTF(stdout, "ctx = %u executed %u counter_tests out of %d \n", sched_ctx2, val[1], NTASKS*NINCR); + return 0; +} diff --git a/sc_hypervisor/examples/lp_test/lp_test.c b/sc_hypervisor/examples/lp_test/lp_test.c new file mode 100644 index 0000000..1bbe317 --- /dev/null +++ b/sc_hypervisor/examples/lp_test/lp_test.c @@ -0,0 +1,137 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include + +#define NTASKS 1000 +#define NINCR 10 +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + + +unsigned val[2]; +starpu_pthread_mutex_t mut[2]; + +/* Every implementation of a codelet must have this prototype, the first + * argument (buffers) describes the buffers/streams that are managed by the + * DSM; the second arguments references read-only data that is passed as an + * argument of the codelet (task->cl_arg). Here, "buffers" is unused as there + * are no data input/output managed by the DSM (cl.nbuffers = 0) */ + +void cpu_func(__attribute__((unused))void *buffers[], void *cl_arg) +{ + unsigned sched_ctx = *((unsigned *) cl_arg); + + int i; + for(i = 0; i < NINCR; i++) + { + STARPU_PTHREAD_MUTEX_LOCK(&mut[sched_ctx - 1]); + val[sched_ctx - 1]++; + STARPU_PTHREAD_MUTEX_UNLOCK(&mut[sched_ctx - 1]); + } +} + +struct starpu_codelet cl = {0}; + +void* submit_tasks_thread(void *arg) +{ + unsigned sched_ctx = *((unsigned*)arg); + starpu_sched_ctx_set_context(&sched_ctx); + + struct starpu_task *task[NTASKS]; + int i; + for(i = 0; i < NTASKS; i++) + { + task[i] = starpu_task_create(); + cl.cpu_funcs[0] = cpu_func; + cl.nbuffers = 0; + + task[i]->cl = &cl; + + task[i]->cl_arg = &sched_ctx; + task[i]->cl_arg_size = sizeof(unsigned); + + task[i]->flops = NINCR*1000000000.0; + int ret = starpu_task_submit(task[i]); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + } + + starpu_task_wait_for_all(); + return NULL; +} + +int main() +{ + int ret = starpu_init(NULL); + + if (ret == -ENODEV) + return 77; + + + /* create contexts */ + unsigned sched_ctx1 = starpu_sched_ctx_create(NULL, 0, "sched_ctx1", STARPU_SCHED_CTX_POLICY_NAME, "dmda", 0); + unsigned sched_ctx2 = starpu_sched_ctx_create(NULL, 0, "sched_ctx2", STARPU_SCHED_CTX_POLICY_NAME, "dmda", 0); + + /* initialize the hypervisor */ + struct sc_hypervisor_policy policy; + policy.custom = 0; + /* indicate which strategy to use + in this particular case we use app_driven which allows the user to resize + the ctxs dynamically at particular moments of the execution of the application */ + policy.name = "feft_lp"; + void *perf_counters = sc_hypervisor_init(&policy); + + /* let starpu know which performance counters should use + to inform the hypervisor how the application and the resources are executing */ + starpu_sched_ctx_set_perf_counters(sched_ctx1, perf_counters); + starpu_sched_ctx_set_perf_counters(sched_ctx2, perf_counters); + + double flops1 = NTASKS*NINCR*1000000000.0; + double flops2 = NTASKS*NINCR*1000000000.0; + /* register the contexts that should be managed by the hypervisor + and indicate an approximate amount of workload if known; + in this case we don't know it and we put 0 */ + sc_hypervisor_register_ctx(sched_ctx1, flops1); + sc_hypervisor_register_ctx(sched_ctx2, flops2); + /* lp strategy allows sizing the contexts because we know the total number of flops + to be executed */ + sc_hypervisor_size_ctxs(NULL, -1, NULL, -1); + + starpu_pthread_t tid[2]; + + val[0] = 0; + val[1] = 0; + STARPU_PTHREAD_MUTEX_INIT(&mut[0], NULL); + STARPU_PTHREAD_MUTEX_INIT(&mut[1], NULL); + + /* we create two threads to simulate simultaneous submission of tasks */ + STARPU_PTHREAD_CREATE(&tid[0], NULL, submit_tasks_thread, (void*)&sched_ctx1); + STARPU_PTHREAD_CREATE(&tid[1], NULL, submit_tasks_thread, (void*)&sched_ctx2); + + STARPU_PTHREAD_JOIN(tid[0], NULL); + STARPU_PTHREAD_JOIN(tid[1], NULL); + + /* free starpu and hypervisor data */ + starpu_shutdown(); + sc_hypervisor_shutdown(); + + FPRINTF(stdout, "ctx = %u executed %u counter_tests out of %d \n", sched_ctx1, val[0], NTASKS*NINCR); + FPRINTF(stdout, "ctx = %u executed %u counter_tests out of %d \n", sched_ctx2, val[1], NTASKS*NINCR); + return 0; +} diff --git a/sc_hypervisor/examples/sched_ctx_utils/sched_ctx_utils.c b/sc_hypervisor/examples/sched_ctx_utils/sched_ctx_utils.c new file mode 100644 index 0000000..5f6ea7e --- /dev/null +++ b/sc_hypervisor/examples/sched_ctx_utils/sched_ctx_utils.c @@ -0,0 +1,531 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "sched_ctx_utils.h" +#include +#include "sc_hypervisor.h" +#define NSAMPLES 3 + +unsigned size1; +unsigned size2; +unsigned nblocks1; +unsigned nblocks2; +unsigned cpu1; +unsigned cpu2; +unsigned gpu; +unsigned gpu1; +unsigned gpu2; + +typedef struct +{ + unsigned id; + unsigned ctx; + int the_other_ctx; + int *workers; + int nworkers; + void (*bench)(float*, unsigned, unsigned); + unsigned size; + unsigned nblocks; + float *mat[NSAMPLES]; +} params; + +typedef struct +{ + double flops; + double avg_timing; +} retvals; + +int first = 1; +starpu_pthread_mutex_t mut; +retvals rv[2]; +params p1, p2; +int it = 0; +int it2 = 0; + +starpu_pthread_key_t key; + +void init() +{ + size1 = 4*1024; + size2 = 4*1024; + nblocks1 = 16; + nblocks2 = 16; + cpu1 = 0; + cpu2 = 0; + gpu = 0; + gpu1 = 0; + gpu2 = 0; + + rv[0].flops = 0.0; + rv[1].flops = 0.0; + rv[1].avg_timing = 0.0; + rv[1].avg_timing = 0.0; + + p1.ctx = 0; + p2.ctx = 0; + + p1.id = 0; + p2.id = 1; + STARPU_PTHREAD_KEY_CREATE(&key, NULL); +} + +void update_sched_ctx_timing_results(double flops, double avg_timing) +{ + unsigned *id = STARPU_PTHREAD_GETSPECIFIC(key); + rv[*id].flops += flops; + rv[*id].avg_timing += avg_timing; +} + +void* start_bench(void *val) +{ + params *p = (params*)val; + int i; + + STARPU_PTHREAD_SETSPECIFIC(key, &p->id); + + if(p->ctx != 0) + starpu_sched_ctx_set_context(&p->ctx); + + for(i = 0; i < NSAMPLES; i++) + p->bench(p->mat[i], p->size, p->nblocks); + + /* if(p->ctx != 0) */ + /* { */ + /* STARPU_PTHREAD_MUTEX_LOCK(&mut); */ + /* if(first){ */ + /* sc_hypervisor_unregiser_ctx(p->ctx); */ + /* starpu_sched_ctx_delete(p->ctx, p->the_other_ctx); */ + /* } */ + + /* first = 0; */ + /* STARPU_PTHREAD_MUTEX_UNLOCK(&mut); */ + /* } */ + sc_hypervisor_stop_resize(p->the_other_ctx); + rv[p->id].flops /= NSAMPLES; + rv[p->id].avg_timing /= NSAMPLES; + + return NULL; +} + +float* construct_matrix(unsigned size) +{ + float *mat; + starpu_malloc((void **)&mat, (size_t)size*size*sizeof(float)); + + unsigned i,j; + for (i = 0; i < size; i++) + { + for (j = 0; j < size; j++) + { + mat[j +i*size] = (1.0f/(1.0f+i+j)) + ((i == j)?1.0f*size:0.0f); + /* mat[j +i*size] = ((i == j)?1.0f*size:0.0f); */ + } + } + return mat; +} +void start_2benchs(void (*bench)(float*, unsigned, unsigned)) +{ + p1.bench = bench; + p1.size = size1; + p1.nblocks = nblocks1; + + p2.bench = bench; + p2.size = size2; + p2.nblocks = nblocks2; + + int i; + for(i = 0; i < NSAMPLES; i++) + { + p1.mat[i] = construct_matrix(p1.size); + p2.mat[i] = construct_matrix(p2.size); + } + + starpu_pthread_t tid[2]; + STARPU_PTHREAD_MUTEX_INIT(&mut, NULL); + + struct timeval start; + struct timeval end; + + gettimeofday(&start, NULL); + + STARPU_PTHREAD_CREATE(&tid[0], NULL, (void*)start_bench, (void*)&p1); + STARPU_PTHREAD_CREATE(&tid[1], NULL, (void*)start_bench, (void*)&p2); + + STARPU_PTHREAD_JOIN(tid[0], NULL); + STARPU_PTHREAD_JOIN(tid[1], NULL); + + gettimeofday(&end, NULL); + + STARPU_PTHREAD_MUTEX_DESTROY(&mut); + + double timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec)); + timing /= 1000000; + + printf("%2.2f %2.2f ", rv[0].flops, rv[1].flops); + printf("%2.2f %2.2f %2.2f\n", rv[0].avg_timing, rv[1].avg_timing, timing); + +} + +void start_1stbench(void (*bench)(float*, unsigned, unsigned)) +{ + p1.bench = bench; + p1.size = size1; + p1.nblocks = nblocks1; + + int i; + for(i = 0; i < NSAMPLES; i++) + { + p1.mat[i] = construct_matrix(p1.size); + } + + struct timeval start; + struct timeval end; + + gettimeofday(&start, NULL); + + start_bench((void*)&p1); + + gettimeofday(&end, NULL); + + STARPU_PTHREAD_MUTEX_DESTROY(&mut); + + double timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec)); + timing /= 1000000; + + printf("%2.2f ", rv[0].flops); + printf("%2.2f %2.2f\n", rv[0].avg_timing, timing); +} + +void start_2ndbench(void (*bench)(float*, unsigned, unsigned)) +{ + p2.bench = bench; + p2.size = size2; + p2.nblocks = nblocks2; + int i; + for(i = 0; i < NSAMPLES; i++) + { + p2.mat[i] = construct_matrix(p2.size); + } + + struct timeval start; + struct timeval end; + + gettimeofday(&start, NULL); + + start_bench((void*)&p2); + + gettimeofday(&end, NULL); + + STARPU_PTHREAD_MUTEX_DESTROY(&mut); + + double timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec)); + timing /= 1000000; + + printf("%2.2f ", rv[1].flops); + printf("%2.2f %2.2f\n", rv[1].avg_timing, timing); +} + +void construct_contexts() +{ + struct sc_hypervisor_policy policy; + policy.custom = 0; + policy.name = "idle"; + void *perf_counters = sc_hypervisor_init(&policy); + int nworkers1 = cpu1 + gpu + gpu1; + int nworkers2 = cpu2 + gpu + gpu2; + /* unsigned n_all_gpus = gpu + gpu1 + gpu2; */ + + + int i; + /* int k = 0; */ + nworkers1 = 12; + p1.workers = (int*)malloc(nworkers1*sizeof(int)); + + /* for(i = 0; i < gpu; i++) */ + /* p1.workers[k++] = i; */ + + /* for(i = gpu; i < gpu + gpu1; i++) */ + /* p1.workers[k++] = i; */ + + + /* for(i = n_all_gpus; i < n_all_gpus + cpu1; i++) */ + /* p1.workers[k++] = i; */ + + + for(i = 0; i < 12; i++) + p1.workers[i] = i; + + p1.ctx = starpu_sched_ctx_create(p1.workers, nworkers1, "sched_ctx1", STARPU_SCHED_CTX_POLICY_NAME, "heft", 0); + starpu_sched_ctx_set_perf_counters(p1.ctx, perf_counters); + p2.the_other_ctx = (int)p1.ctx; + p1.nworkers = nworkers1; + sc_hypervisor_register_ctx(p1.ctx, 0.0); + + /* sc_hypervisor_ctl(p1.ctx, */ + /* SC_HYPERVISOR_MAX_IDLE, p1.workers, p1.nworkers, 5000.0, */ + /* SC_HYPERVISOR_MAX_IDLE, p1.workers, gpu+gpu1, 100000.0, */ + /* SC_HYPERVISOR_EMPTY_CTX_MAX_IDLE, p1.workers, p1.nworkers, 500000.0, */ + /* SC_HYPERVISOR_GRANULARITY, 2, */ + /* SC_HYPERVISOR_MIN_TASKS, 1000, */ + /* SC_HYPERVISOR_NEW_WORKERS_MAX_IDLE, 100000.0, */ + /* SC_HYPERVISOR_MIN_WORKERS, 6, */ + /* SC_HYPERVISOR_MAX_WORKERS, 12, */ + /* NULL); */ + + sc_hypervisor_ctl(p1.ctx, + SC_HYPERVISOR_GRANULARITY, 2, + SC_HYPERVISOR_MIN_TASKS, 1000, + SC_HYPERVISOR_MIN_WORKERS, 6, + SC_HYPERVISOR_MAX_WORKERS, 12, + NULL); + + /* k = 0; */ + p2.workers = (int*)malloc(nworkers2*sizeof(int)); + + /* for(i = 0; i < gpu; i++) */ + /* p2.workers[k++] = i; */ + + /* for(i = gpu + gpu1; i < gpu + gpu1 + gpu2; i++) */ + /* p2.workers[k++] = i; */ + + /* for(i = n_all_gpus + cpu1; i < n_all_gpus + cpu1 + cpu2; i++) */ + /* p2.workers[k++] = i; */ + + p2.ctx = starpu_sched_ctx_create(p2.workers, 0, "sched_ctx2", STARPU_SCHED_CTX_POLICY_NAME, "heft", 0); + starpu_sched_ctx_set_perf_counters(p2.ctx, perf_counters); + p1.the_other_ctx = (int)p2.ctx; + p2.nworkers = 0; + sc_hypervisor_register_ctx(p2.ctx, 0.0); + + /* sc_hypervisor_ctl(p2.ctx, */ + /* SC_HYPERVISOR_MAX_IDLE, p2.workers, p2.nworkers, 2000.0, */ + /* SC_HYPERVISOR_MAX_IDLE, p2.workers, gpu+gpu2, 5000.0, */ + /* SC_HYPERVISOR_EMPTY_CTX_MAX_IDLE, p1.workers, p1.nworkers, 500000.0, */ + /* SC_HYPERVISOR_GRANULARITY, 2, */ + /* SC_HYPERVISOR_MIN_TASKS, 500, */ + /* SC_HYPERVISOR_NEW_WORKERS_MAX_IDLE, 1000.0, */ + /* SC_HYPERVISOR_MIN_WORKERS, 4, */ + /* SC_HYPERVISOR_MAX_WORKERS, 8, */ + /* NULL); */ + + sc_hypervisor_ctl(p2.ctx, + SC_HYPERVISOR_GRANULARITY, 2, + SC_HYPERVISOR_MIN_TASKS, 500, + SC_HYPERVISOR_MIN_WORKERS, 0, + SC_HYPERVISOR_MAX_WORKERS, 6, + NULL); + +} + +void set_hypervisor_conf(int event, int task_tag) +{ + (void)event; + (void)task_tag; +/* unsigned *id = STARPU_PTHREAD_GETSPECIFIC(key); */ +/* if(*id == 0) */ +/* { */ +/* if(event == END_BENCH) */ +/* { */ +/* if(it < 2) */ +/* { */ +/* sc_hypervisor_ctl(p2.ctx, */ +/* SC_HYPERVISOR_MIN_WORKERS, 2, */ +/* SC_HYPERVISOR_MAX_WORKERS, 4, */ +/* SC_HYPERVISOR_TIME_TO_APPLY, task_tag, */ +/* NULL); */ + +/* printf("%d: set max %d for tag %d\n", p2.ctx, 4, task_tag); */ +/* sc_hypervisor_ctl(p1.ctx, */ +/* SC_HYPERVISOR_MIN_WORKERS, 6, */ +/* SC_HYPERVISOR_MAX_WORKERS, 8, */ +/* SC_HYPERVISOR_TIME_TO_APPLY, task_tag, */ +/* NULL); */ +/* printf("%d: set max %d for tag %d\n", p1.ctx, 8, task_tag); */ +/* sc_hypervisor_resize(p1.ctx, task_tag); */ +/* } */ +/* if(it == 2) */ +/* { */ +/* sc_hypervisor_ctl(p2.ctx, */ +/* SC_HYPERVISOR_MIN_WORKERS, 12, */ +/* SC_HYPERVISOR_MAX_WORKERS, 12, */ +/* SC_HYPERVISOR_TIME_TO_APPLY, task_tag, */ +/* NULL); */ +/* printf("%d: set max %d for tag %d\n", p2.ctx, 12, task_tag); */ +/* sc_hypervisor_ctl(p1.ctx, */ +/* SC_HYPERVISOR_MIN_WORKERS, 0, */ +/* SC_HYPERVISOR_MAX_WORKERS, 0, */ +/* SC_HYPERVISOR_TIME_TO_APPLY, task_tag, */ +/* NULL); */ +/* printf("%d: set max %d for tag %d\n", p1.ctx, 0, task_tag); */ +/* sc_hypervisor_resize(p1.ctx, task_tag); */ +/* } */ +/* it++; */ + +/* } */ +/* } */ +/* else */ +/* { */ +/* if(event == END_BENCH) */ +/* { */ +/* if(it2 < 3) */ +/* { */ +/* sc_hypervisor_ctl(p1.ctx, */ +/* SC_HYPERVISOR_MIN_WORKERS, 6, */ +/* SC_HYPERVISOR_MAX_WORKERS, 12, */ +/* SC_HYPERVISOR_TIME_TO_APPLY, task_tag, */ +/* NULL); */ +/* printf("%d: set max %d for tag %d\n", p1.ctx, 12, task_tag); */ +/* sc_hypervisor_ctl(p2.ctx, */ +/* SC_HYPERVISOR_MIN_WORKERS, 0, */ +/* SC_HYPERVISOR_MAX_WORKERS, 0, */ +/* SC_HYPERVISOR_TIME_TO_APPLY, task_tag, */ +/* NULL); */ +/* printf("%d: set max %d for tag %d\n", p2.ctx, 0, task_tag); */ +/* sc_hypervisor_resize(p2.ctx, task_tag); */ +/* } */ +/* it2++; */ +/* } */ +/* } */ + + /* if(*id == 1) */ + /* { */ + /* if(event == START_BENCH) */ + /* { */ + /* int workers[12] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; */ + /* sc_hypervisor_ctl(p1.ctx, */ + /* SC_HYPERVISOR_MAX_IDLE, workers, 12, 800000.0, */ + /* SC_HYPERVISOR_TIME_TO_APPLY, task_tag, */ + /* NULL); */ + /* } */ + /* else */ + /* { */ + /* if(it2 < 2) */ + /* { */ + /* int workers[12] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; */ + /* sc_hypervisor_ctl(p2.ctx, */ + /* SC_HYPERVISOR_MAX_IDLE, workers, 12, 500.0, */ + /* SC_HYPERVISOR_MAX_IDLE, workers, 3, 200.0, */ + /* SC_HYPERVISOR_TIME_TO_APPLY, task_tag, */ + /* NULL); */ + /* } */ + /* if(it2 == 2) */ + /* { */ + /* int workers[12] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; */ + /* sc_hypervisor_ctl(p2.ctx, */ + /* SC_HYPERVISOR_MAX_IDLE, workers, 12, 1000.0, */ + /* SC_HYPERVISOR_MAX_IDLE, workers, 3, 500.0, */ + /* SC_HYPERVISOR_TIME_TO_APPLY, task_tag, */ + /* SC_HYPERVISOR_MAX_WORKERS, 12, */ + /* NULL); */ + /* } */ + /* it2++; */ + /* } */ + + /* } else { */ + /* if(event == START_BENCH) */ + /* { */ + /* int workers[12] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; */ + /* sc_hypervisor_ctl(p1.ctx, */ + /* SC_HYPERVISOR_MAX_IDLE, workers, 12, 1500.0, */ + /* SC_HYPERVISOR_MAX_IDLE, workers, 3, 4000.0, */ + /* SC_HYPERVISOR_TIME_TO_APPLY, task_tag, */ + /* NULL); */ + /* } */ + /* if(event == END_BENCH) */ + /* { */ + /* if(it < 2) */ + /* { */ + /* int workers[12] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; */ + /* sc_hypervisor_ctl(p1.ctx, */ + /* SC_HYPERVISOR_MAX_IDLE, workers, 12, 100.0, */ + /* SC_HYPERVISOR_MAX_IDLE, workers, 3, 5000.0, */ + /* SC_HYPERVISOR_TIME_TO_APPLY, task_tag, */ + /* NULL); */ + /* } */ + /* if(it == 2) */ + /* { */ + /* int workers[12] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; */ + /* sc_hypervisor_ctl(p1.ctx, */ + /* SC_HYPERVISOR_MAX_IDLE, workers, 12, 5000.0, */ + /* SC_HYPERVISOR_MAX_IDLE, workers, 3, 10000.0, */ + /* SC_HYPERVISOR_TIME_TO_APPLY, task_tag, */ + /* NULL); */ + /* } */ + + /* it++; */ + /* } */ + + /* } */ +} + +void end_contexts() +{ + free(p1.workers); + free(p2.workers); + sc_hypervisor_shutdown(); +} + +void parse_args_ctx(int argc, char **argv) +{ + init(); + int i; + for (i = 1; i < argc; i++) { + if (strcmp(argv[i], "-size1") == 0) { + char *argptr; + size1 = strtol(argv[++i], &argptr, 10); + } + + if (strcmp(argv[i], "-nblocks1") == 0) { + char *argptr; + nblocks1 = strtol(argv[++i], &argptr, 10); + } + + if (strcmp(argv[i], "-size2") == 0) { + char *argptr; + size2 = strtol(argv[++i], &argptr, 10); + } + + if (strcmp(argv[i], "-nblocks2") == 0) { + char *argptr; + nblocks2 = strtol(argv[++i], &argptr, 10); + } + + if (strcmp(argv[i], "-cpu1") == 0) { + char *argptr; + cpu1 = strtol(argv[++i], &argptr, 10); + } + + if (strcmp(argv[i], "-cpu2") == 0) { + char *argptr; + cpu2 = strtol(argv[++i], &argptr, 10); + } + + if (strcmp(argv[i], "-gpu") == 0) { + char *argptr; + gpu = strtol(argv[++i], &argptr, 10); + } + + if (strcmp(argv[i], "-gpu1") == 0) { + char *argptr; + gpu1 = strtol(argv[++i], &argptr, 10); + } + + if (strcmp(argv[i], "-gpu2") == 0) { + char *argptr; + gpu2 = strtol(argv[++i], &argptr, 10); + } + } +} diff --git a/sc_hypervisor/examples/sched_ctx_utils/sched_ctx_utils.h b/sc_hypervisor/examples/sched_ctx_utils/sched_ctx_utils.h new file mode 100644 index 0000000..8760682 --- /dev/null +++ b/sc_hypervisor/examples/sched_ctx_utils/sched_ctx_utils.h @@ -0,0 +1,33 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include + +#define START_BENCH 0 +#define END_BENCH 1 + +void parse_args_ctx(int argc, char **argv); +void update_sched_ctx_timing_results(double gflops, double timing); +void construct_contexts(); +void end_contexts(void); +void start_2benchs(void (*bench)(float *mat, unsigned size, unsigned nblocks)); +void start_1stbench(void (*bench)(float *mat, unsigned size, unsigned nblocks)); +void start_2ndbench(void (*bench)(float *mat, unsigned size, unsigned nblocks)); +void set_hypervisor_conf(int event, int task_tag); diff --git a/sc_hypervisor/include/sc_hypervisor.h b/sc_hypervisor/include/sc_hypervisor.h new file mode 100644 index 0000000..11c134d --- /dev/null +++ b/sc_hypervisor/include/sc_hypervisor.h @@ -0,0 +1,295 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef SC_HYPERVISOR_H +#define SC_HYPERVISOR_H + +#include +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + @ingroup API_SC_Hypervisor + Methods to implement a hypervisor resizing policy. +*/ +struct sc_hypervisor_policy +{ + /** + Indicate the name of the policy, if there is not a custom + policy, the policy corresponding to this name will be used + by the hypervisor + */ + const char *name; + + /** + Indicate whether the policy is custom or not + */ + unsigned custom; + + /** + Distribute workers to contexts even at the beginning of the + program + */ + void (*size_ctxs)(unsigned *sched_ctxs, int nsched_ctxs, int *workers, int nworkers); + + /** + Require explicit resizing + */ + void (*resize_ctxs)(unsigned *sched_ctxs, int nsched_ctxs, int *workers, int nworkers); + + /** + Called whenever the indicated worker executes another idle + cycle in sched_ctx + */ + void (*handle_idle_cycle)(unsigned sched_ctx, int worker); + + /** + Called whenever a task is pushed on the worker’s queue + corresponding to the context sched_ctx + */ + void (*handle_pushed_task)(unsigned sched_ctx, int worker); + + /** + Called whenever a task is poped from the worker’s queue + corresponding to the context sched_ctx + */ + void (*handle_poped_task)(unsigned sched_ctx, int worker, struct starpu_task *task, uint32_t footprint); + + /** + Called whenever a task is executed on the indicated worker + and context after a long period of idle time + */ + void (*handle_idle_end)(unsigned sched_ctx, int worker); + + /** + Called whenever a tag task has just been executed. The + table of resize requests is provided as well as the tag + */ + void (*handle_post_exec_hook)(unsigned sched_ctx, int task_tag); + + /** + the hypervisor takes a decision when a job was submitted in + this ctx + */ + void (*handle_submitted_job)(struct starpu_codelet *cl, unsigned sched_ctx, uint32_t footprint, size_t data_size); + + /** + the hypervisor takes a decision when a certain ctx was + deleted + */ + void (*end_ctx)(unsigned sched_ctx); + + /** + the hypervisor takes a decision when a certain ctx was + registered + */ + void (*start_ctx)(unsigned sched_ctx); + + /** + the hypervisor initializes values for the workers + */ + void (*init_worker)(int workerid, unsigned sched_ctx); +}; + +/** + @defgroup API_SC_Hypervisor_usage Scheduling Context Hypervisor - Regular usage + There is a single hypervisor that is in charge of resizing contexts + and the resizing strategy is chosen at the initialization of the + hypervisor. A single resize can be done at a time. + + The Scheduling Context Hypervisor Plugin provides a series of + performance counters to StarPU. By incrementing them, StarPU can + help the hypervisor in the resizing decision making process. + + The function sc_hypervisor_init() initializes the hypervisor to use + the strategy provided as parameter and creates the performance + counters (see starpu_sched_ctx_performance_counters). These + performance counters represent actually some callbacks that will be + used by the contexts to notify the information needed by the + hypervisor. + + Scheduling Contexts that have to be resized by the hypervisor must + be first registered to the hypervisor using the function + sc_hypervisor_register_ctx() + + Note: The Hypervisor is actually a worker that takes this role once + certain conditions trigger the resizing process (there is no + additional thread assigned to the hypervisor). + @{ +*/ + +/** + synchronise the hypervisor when several workers try to update its + information +*/ +extern starpu_pthread_mutex_t act_hypervisor_mutex; + +/** + Start the hypervisor with the given policy +*/ +void *sc_hypervisor_init(struct sc_hypervisor_policy *policy); + +/** + Shutdown the hypervisor. + The hypervisor and all information concerning it is cleaned. There + is no synchronization between this function and starpu_shutdown(). + Thus, this should be called after starpu_shutdown(), because the + performance counters will still need allocated callback functions. +*/ +void sc_hypervisor_shutdown(void); + +/** + Register the context to the hypervisor, and indicate the number of + flops the context will execute (used for Gflops rate based strategy) +*/ +void sc_hypervisor_register_ctx(unsigned sched_ctx, double total_flops); + +/** + Unregister a context from the hypervisor, and so exclude the + context from the resizing process +*/ +void sc_hypervisor_unregister_ctx(unsigned sched_ctx); + +/** + Require resizing the context \p sched_ctx whenever a task tagged + with the id \p task_tag finished executing +*/ +void sc_hypervisor_post_resize_request(unsigned sched_ctx, int task_tag); + +/** + Require reconsidering the distribution of resources over the + indicated scheduling contexts, i.e reevaluate the distribution of + the resources and eventually resize if needed +*/ +void sc_hypervisor_resize_ctxs(unsigned *sched_ctxs, int nsched_ctxs, int *workers, int nworkers); + +/** + Do not allow the hypervisor to resize a context. +*/ +void sc_hypervisor_stop_resize(unsigned sched_ctx); + +/** + Allow the hypervisor to resize a context if necessary. +*/ +void sc_hypervisor_start_resize(unsigned sched_ctx); + +/** + Return the name of the resizing policy used by the hypervisor +*/ +const char *sc_hypervisor_get_policy(void); + +/** + Ask the hypervisor to add workers to a sched_ctx +*/ +void sc_hypervisor_add_workers_to_sched_ctx(int *workers_to_add, unsigned nworkers_to_add, unsigned sched_ctx); + +/** + Ask the hypervisor to remove workers from a sched_ctx +*/ +void sc_hypervisor_remove_workers_from_sched_ctx(int *workers_to_remove, unsigned nworkers_to_remove, unsigned sched_ctx, unsigned now); + +/** + Ask the hypervisor to move workers from one context to another +*/ +void sc_hypervisor_move_workers(unsigned sender_sched_ctx, unsigned receiver_sched_ctx, int *workers_to_move, unsigned nworkers_to_move, unsigned now); + +/** + Ask the hypervisor to choose a distribution of workers in the + required contexts +*/ +void sc_hypervisor_size_ctxs(unsigned *sched_ctxs, int nsched_ctxs, int *workers, int nworkers); + +/** + Check if there are pending demands of resizing +*/ +unsigned sc_hypervisor_get_size_req(unsigned **sched_ctxs, int *nsched_ctxs, int **workers, int *nworkers); + +/** + Save a demand of resizing +*/ +void sc_hypervisor_save_size_req(unsigned *sched_ctxs, int nsched_ctxs, int *workers, int nworkers); + +/** + Clear the list of pending demands of resizing +*/ +void sc_hypervisor_free_size_req(void); + +/** + Check out if a context can be resized +*/ +unsigned sc_hypervisor_can_resize(unsigned sched_ctx); + +/** + Indicate the types of tasks a context will execute in order to + better decide the sizing of ctxs +*/ +void sc_hypervisor_set_type_of_task(struct starpu_codelet *cl, unsigned sched_ctx, uint32_t footprint, size_t data_size); + +/** + Change dynamically the total number of flops of a context, move the + deadline of the finishing time of the context +*/ +void sc_hypervisor_update_diff_total_flops(unsigned sched_ctx, double diff_total_flops); + +/** + Change dynamically the number of the elapsed flops in a context, + modify the past in order to better compute the speed +*/ +void sc_hypervisor_update_diff_elapsed_flops(unsigned sched_ctx, double diff_task_flops); + +/** + Update the min and max workers needed by each context +*/ +void sc_hypervisor_update_resize_interval(unsigned *sched_ctxs, int nsched_ctxs, int max_nworkers); + +/** + Return a list of contexts that are on the same level in the + hierarchy of contexts +*/ +void sc_hypervisor_get_ctxs_on_level(unsigned **sched_ctxs, int *nsched_ctxs, unsigned hierarchy_level, unsigned father_sched_ctx_id); + +/** + Returns the number of levels of ctxs registered to the hyp +*/ +unsigned sc_hypervisor_get_nhierarchy_levels(void); + +/** + Return the leaves ctxs from the list of ctxs +*/ +void sc_hypervisor_get_leaves(unsigned *sched_ctxs, int nsched_ctxs, unsigned *leaves, int *nleaves); + +/** + Return the nready flops of all ctxs below in hierarchy of sched_ctx +*/ +double sc_hypervisor_get_nready_flops_of_all_sons_of_sched_ctx(unsigned sched_ctx); + +void sc_hypervisor_print_overhead(void); + +void sc_hypervisor_init_worker(int workerid, unsigned sched_ctx); + +/** @} */ + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/sc_hypervisor/include/sc_hypervisor_config.h b/sc_hypervisor/include/sc_hypervisor_config.h new file mode 100644 index 0000000..36a21a4 --- /dev/null +++ b/sc_hypervisor/include/sc_hypervisor_config.h @@ -0,0 +1,230 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef SC_HYPERVISOR_CONFIG_H +#define SC_HYPERVISOR_CONFIG_H + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + @ingroup API_SC_Hypervisor + @{ +*/ + +/** + This macro is used when calling sc_hypervisor_ctl() and must be + followed by 3 arguments: an array of int for the workerids to apply + the condition, an int to indicate the size of the array, and a + double value indicating the maximum idle time allowed for a worker + before the resizing process should be triggered +*/ +#define SC_HYPERVISOR_MAX_IDLE -1 + +#define SC_HYPERVISOR_MIN_WORKING -2 + +/** + This macro is used when calling sc_hypervisor_ctl() and must be + followed by 3 arguments: an array of int for the workerids to apply + the condition, an int to indicate the size of the array, and an int + value indicating the priority of the workers previously mentioned. + The workers with the smallest priority are moved the first. +*/ +#define SC_HYPERVISOR_PRIORITY -3 + +/** + This macro is used when calling sc_hypervisor_ctl() and must be + followed by 1 argument(int) indicating the minimum number of + workers a context should have, underneath this limit the context + cannot execute. +*/ +#define SC_HYPERVISOR_MIN_WORKERS -4 + +/** + This macro is used when calling sc_hypervisor_ctl() and must be + followed by 1 argument(int) indicating the maximum number of + workers a context should have, above this limit the context would + not be able to scale +*/ +#define SC_HYPERVISOR_MAX_WORKERS -5 + +/** + This macro is used when calling sc_hypervisor_ctl() and must be + followed by 1 argument(int) indicating the granularity of the + resizing process (the number of workers should be moved from the + context once it is resized) This parameter is ignore for the Gflops + rate based strategy (see \ref ResizingStrategies), the number of + workers that have to be moved is calculated by the strategy. +*/ +#define SC_HYPERVISOR_GRANULARITY -6 + +/** + This macro is used when calling sc_hypervisor_ctl() and must be + followed by 2 arguments: an array of int for the workerids to apply + the condition and an int to indicate the size of the array. These + workers are not allowed to be moved from the context. +*/ +#define SC_HYPERVISOR_FIXED_WORKERS -7 + +/** + This macro is used when calling sc_hypervisor_ctl() and must be + followed by 1 argument (int) that indicated the minimum number of + tasks that have to be executed before the context could be resized. + This parameter is ignored for the Application Driven strategy (see + \ref ResizingStrategies) where the user indicates exactly when the + resize should be done. +*/ +#define SC_HYPERVISOR_MIN_TASKS -8 + +/** + This macro is used when calling sc_hypervisor_ctl() and must be + followed by 1 argument, a double value indicating the maximum idle + time allowed for workers that have just been moved from other + contexts in the current context. +*/ +#define SC_HYPERVISOR_NEW_WORKERS_MAX_IDLE -9 + +/** + This macro is used when calling sc_hypervisor_ctl() and must be + followed by 1 argument (int) indicating the tag an executed task + should have such that this configuration should be taken into + account. +*/ +#define SC_HYPERVISOR_TIME_TO_APPLY -10 + +/** + This macro is used when calling sc_hypervisor_ctl() and must be + followed by 1 argument + */ +#define SC_HYPERVISOR_NULL -11 + +/** + This macro is used when calling sc_hypervisor_ctl() and must be + followed by 1 argument, a double, that indicates the number of + flops needed to be executed before computing the speed of a worker +*/ +#define SC_HYPERVISOR_ISPEED_W_SAMPLE -12 + +/** + This macro is used when calling sc_hypervisor_ctl() and must be + followed by 1 argument, a double, that indicates the number of + flops needed to be executed before computing the speed of a context +*/ +#define SC_HYPERVISOR_ISPEED_CTX_SAMPLE -13 + +#define SC_HYPERVISOR_TIME_SAMPLE -14 + +#define MAX_IDLE_TIME 5000000000 +#define MIN_WORKING_TIME 500 + +/** + Methods that implement a hypervisor resizing policy. +*/ +struct sc_hypervisor_policy_config +{ + /** + Indicate the minimum number of workers needed by the context + */ + int min_nworkers; + + /** + Indicate the maximum number of workers needed by the context + */ + int max_nworkers; + + /** + Indicate the workers granularity of the context + */ + int granularity; + + /** + Indicate the priority of each worker to stay in the context + the smaller the priority the faster it will be moved to + another context + */ + int priority[STARPU_NMAXWORKERS]; + + /** + Indicate the maximum idle time accepted before a resize is + triggered + above this limit the priority of the worker is reduced + */ + double max_idle[STARPU_NMAXWORKERS]; + + /** + Indicate that underneath this limit the priority of the + worker is reduced + */ + double min_working[STARPU_NMAXWORKERS]; + + /** + Indicate which workers can be moved and which ones are + fixed + */ + int fixed_workers[STARPU_NMAXWORKERS]; + + /** + Indicate the maximum idle time accepted before a resize is + triggered for the workers that just arrived in the new + context + */ + double new_workers_max_idle; + + /** + Indicate the sample used to compute the instant speed per + worker + */ + double ispeed_w_sample[STARPU_NMAXWORKERS]; + + /** + Indicate the sample used to compute the instant speed per + ctxs + */ + double ispeed_ctx_sample; + + /** + Indicate the sample used to compute the instant speed per + ctx (in seconds) + */ + double time_sample; +}; + +/** + Specify the configuration for a context +*/ +void sc_hypervisor_set_config(unsigned sched_ctx, void *config); + +/** + Return the configuration of a context +*/ +struct sc_hypervisor_policy_config *sc_hypervisor_get_config(unsigned sched_ctx); + +/** + Specify different parameters for the configuration of a context. + The list must be zero-terminated +*/ +void sc_hypervisor_ctl(unsigned sched_ctx, ...); + +/** @} */ + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/sc_hypervisor/include/sc_hypervisor_lp.h b/sc_hypervisor/include/sc_hypervisor_lp.h new file mode 100644 index 0000000..510190a --- /dev/null +++ b/sc_hypervisor/include/sc_hypervisor_lp.h @@ -0,0 +1,126 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef SC_HYPERVISOR_LP_H +#define SC_HYPERVISOR_LP_H + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + @defgroup API_SC_Hypervisor_LP Scheduling Context Hypervisor - Linear Programming + @{ +*/ + +#ifdef STARPU_HAVE_GLPK_H +#include +#endif //STARPU_HAVE_GLPK_H + +struct sc_hypervisor_policy_task_pool; +struct types_of_workers; +/** + return tmax, and compute in table res the nr of workers needed by each context st the system ends up in the smallest tma +*/ +double sc_hypervisor_lp_get_nworkers_per_ctx(int nsched_ctxs, int ntypes_of_workers, double res[nsched_ctxs][ntypes_of_workers], + int total_nw[ntypes_of_workers], struct types_of_workers *tw, unsigned *in_sched_ctxs); + +/** + return tmax of the system +*/ +double sc_hypervisor_lp_get_tmax(int nw, int *workers); + +/** + the linear programme determines a rational number of resources for each ctx, we round them depending on the type of resource +*/ +void sc_hypervisor_lp_round_double_to_int(int ns, int nw, double res[ns][nw], int res_rounded[ns][nw]); + +/** + redistribute the resource in contexts by assigning the first x available resources to each one +*/ +void sc_hypervisor_lp_redistribute_resources_in_ctxs(int ns, int nw, int res_rounded[ns][nw], double res[ns][nw], unsigned *sched_ctxs, struct types_of_workers *tw); + +/** + make the first distribution of resource in contexts by assigning the first x available resources to each one +*/ +void sc_hypervisor_lp_distribute_resources_in_ctxs(unsigned *sched_ctxs, int ns, int nw, int res_rounded[ns][nw], double res[ns][nw], int *workers, int nworkers, struct types_of_workers *tw); + +/** + make the first distribution of resource in contexts by assigning the first x available resources to each one, share not integer no of workers +*/ +void sc_hypervisor_lp_distribute_floating_no_resources_in_ctxs(unsigned *sched_ctxs, int ns, int nw, double res[ns][nw], int *workers, int nworkers, struct types_of_workers *tw); + +/** + place resources in contexts depending on whether they already have workers or not +*/ +void sc_hypervisor_lp_place_resources_in_ctx(int ns, int nw, double w_in_s[ns][nw], unsigned *sched_ctxs, int *workers, unsigned do_size, struct types_of_workers *tw); + +/** + not used resources are shared between all contexts +*/ +void sc_hypervisor_lp_share_remaining_resources(int ns, unsigned *sched_ctxs, int nworkers, int *workers); + +/** + dichotomy btw t1 & t2 +*/ +double sc_hypervisor_lp_find_tmax(double t1, double t2); + +/** + execute the lp through dichotomy +*/ +unsigned sc_hypervisor_lp_execute_dichotomy(int ns, int nw, double w_in_s[ns][nw], unsigned solve_lp_integer, void *specific_data, + double tmin, double tmax, double smallest_tmax, + double (*lp_estimated_distrib_func)(int lns, int lnw, double ldraft_w_in_s[ns][nw], + unsigned lis_integer, double ltmax, void *lspecifc_data)); + +#ifdef STARPU_HAVE_GLPK_H +/** + linear program that returns 1/tmax, and computes in table res the + nr of workers needed by each context st the system ends up in the + smallest tmax +*/ +double sc_hypervisor_lp_simulate_distrib_flops(int nsched_ctxs, int ntypes_of_workers, double speed[nsched_ctxs][ntypes_of_workers], + double flops[nsched_ctxs], double res[nsched_ctxs][ntypes_of_workers], int total_nw[ntypes_of_workers], + unsigned sched_ctxs[nsched_ctxs], double vmax); + +/** + linear program that simulates a distribution of tasks that + minimises the execution time of the tasks in the pool +*/ +double sc_hypervisor_lp_simulate_distrib_tasks(int ns, int nw, int nt, double w_in_s[ns][nw], double tasks[nw][nt], + double times[nw][nt], unsigned is_integer, double tmax, unsigned *in_sched_ctxs, + struct sc_hypervisor_policy_task_pool *tmp_task_pools); + +/** + linear program that simulates a distribution of flops over the + workers on particular sample of the execution of the application + such that the entire sample would finish in a minimum amount of + time +*/ +double sc_hypervisor_lp_simulate_distrib_flops_on_sample(int ns, int nw, double final_w_in_s[ns][nw], unsigned is_integer, double tmax, + double **speed, double flops[ns], double **final_flops_on_w); +#endif // STARPU_HAVE_GLPK_H + +/** @} */ + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/sc_hypervisor/include/sc_hypervisor_monitoring.h b/sc_hypervisor/include/sc_hypervisor_monitoring.h new file mode 100644 index 0000000..70e90e9 --- /dev/null +++ b/sc_hypervisor/include/sc_hypervisor_monitoring.h @@ -0,0 +1,279 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2015-2015 Mathieu Lirzin + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef SC_HYPERVISOR_MONITORING_H +#define SC_HYPERVISOR_MONITORING_H + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + @ingroup API_SC_Hypervisor + @{ +*/ + +/** + Structure to check if the workers moved to another context are + actually taken into account in that context. +*/ +struct sc_hypervisor_resize_ack +{ + /** + The context receiving the new workers + */ + int receiver_sched_ctx; + + /** + List of workers required to be moved + */ + int *moved_workers; + + /** + Number of workers required to be moved + */ + int nmoved_workers; + + /** + List of workers that actually got in the receiver ctx. If + the value corresponding to a worker is 1, this worker got + moved in the new context. + */ + int *acked_workers; +}; + +/** + Wrapper of the contexts available in StarPU which contains all + information about a context obtained by incrementing the + performance counters. it is attached to a sched_ctx storing + monitoring information +*/ +struct sc_hypervisor_wrapper +{ + /** + the monitored context + */ + unsigned sched_ctx; + + /** + The corresponding resize configuration + */ + struct sc_hypervisor_policy_config *config; + + /** + the start time of the resizing sample of the workers of + this context + */ + double start_time_w[STARPU_NMAXWORKERS]; + + /** + The idle time counter of each worker of the context + */ + double current_idle_time[STARPU_NMAXWORKERS]; + + /** + The time the workers were idle from the last resize + */ + double idle_time[STARPU_NMAXWORKERS]; + + /** + The moment when the workers started being idle + */ + double idle_start_time[STARPU_NMAXWORKERS]; + + /** + Time during which the worker executed tasks + */ + double exec_time[STARPU_NMAXWORKERS]; + + /** + Time when the worker started executing a task + */ + double exec_start_time[STARPU_NMAXWORKERS]; + + /** + List of workers that will leave the context (lazy resizing + process) + */ + int worker_to_be_removed[STARPU_NMAXWORKERS]; + + /** + Number of tasks pushed on each worker in this context + */ + int pushed_tasks[STARPU_NMAXWORKERS]; + + /** + Number of tasks poped from each worker in this context + */ + int poped_tasks[STARPU_NMAXWORKERS]; + + /** + The total number of flops to execute by the context + */ + double total_flops; + + /** + The number of flops executed by each workers of the context + */ + double total_elapsed_flops[STARPU_NMAXWORKERS]; + + /** + number of flops executed since last resizing + */ + double elapsed_flops[STARPU_NMAXWORKERS]; + + /** + Quantity of data (in bytes) used to execute tasks on each + worker in this context + */ + size_t elapsed_data[STARPU_NMAXWORKERS]; + + /** + Number of tasks executed on each worker in this context + */ + int elapsed_tasks[STARPU_NMAXWORKERS]; + + /** + the average speed of the type of workers when they belonged + to this context + 0 - cuda 1 - cpu + */ + double ref_speed[2]; + + /** + Number of flops submitted to this context + */ + double submitted_flops; + + /** + Number of flops that still have to be executed by the + workers in this context + */ + double remaining_flops; + + /** + Start time of the resizing sample of this context + */ + double start_time; + + /** + First time a task was pushed to this context + */ + double real_start_time; + + /** + Start time for sample in which the hypervisor is not allowed to + react bc too expensive */ + double hyp_react_start_time; + + /** + Structure confirming the last resize finished and a new one + can be done. + Workers do not leave the current context until the receiver + context does not ack the receive of these workers + */ + struct sc_hypervisor_resize_ack resize_ack; + + /** + Mutex needed to synchronize the acknowledgment of the + workers into the receiver context + */ + starpu_pthread_mutex_t mutex; + + /** + Boolean indicating if the hypervisor can use the flops + corresponding to the entire execution of the context + */ + unsigned total_flops_available; + + /** + boolean indicating that a context is being sized + */ + unsigned to_be_sized; + + /** + Boolean indicating if we add the idle of this worker to the + idle of the context + */ + unsigned compute_idle[STARPU_NMAXWORKERS]; + + /** + Boolean indicating if we add the entiere idle of this + worker to the idle of the context or just half + */ + unsigned compute_partial_idle[STARPU_NMAXWORKERS]; + + /** + consider the max in the lp + */ + unsigned consider_max; +}; + +/** + Return the wrapper of the given context + @ingroup API_SC_Hypervisor +*/ +struct sc_hypervisor_wrapper *sc_hypervisor_get_wrapper(unsigned sched_ctx); + +/** + Get the list of registered contexts + @ingroup API_SC_Hypervisor +*/ +unsigned *sc_hypervisor_get_sched_ctxs(void); + +/** + Get the number of registered contexts + @ingroup API_SC_Hypervisor +*/ +int sc_hypervisor_get_nsched_ctxs(void); + +/** + Get the number of workers of a certain architecture in a context +*/ +int sc_hypervisor_get_nworkers_ctx(unsigned sched_ctx, enum starpu_worker_archtype arch); + +/** + Get the number of flops executed by a context since last resizing + (reset to 0 when a resizing is done) + @ingroup API_SC_Hypervisor +*/ +double sc_hypervisor_get_elapsed_flops_per_sched_ctx(struct sc_hypervisor_wrapper *sc_w); + +/** + Get the number of flops executed by a context since the beginning +*/ +double sc_hypervisor_get_total_elapsed_flops_per_sched_ctx(struct sc_hypervisor_wrapper *sc_w); + +/** + Compute an average value of the cpu/cuda speed +*/ +double sc_hypervisorsc_hypervisor_get_speed_per_worker_type(struct sc_hypervisor_wrapper *sc_w, enum starpu_worker_archtype arch); + +/** + Compte the actual speed of all workers of a specific type of worker +*/ +double sc_hypervisor_get_speed(struct sc_hypervisor_wrapper *sc_w, enum starpu_worker_archtype arch); + +/** @} */ + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/sc_hypervisor/include/sc_hypervisor_policy.h b/sc_hypervisor/include/sc_hypervisor_policy.h new file mode 100644 index 0000000..a2116d0 --- /dev/null +++ b/sc_hypervisor/include/sc_hypervisor_policy.h @@ -0,0 +1,227 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef SC_HYPERVISOR_POLICY_H +#define SC_HYPERVISOR_POLICY_H + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + @defgroup API_SC_Hypervisor Scheduling Context Hypervisor - Building a new resizing policy + @{ +*/ + +#define HYPERVISOR_REDIM_SAMPLE 0.02 +#define HYPERVISOR_START_REDIM_SAMPLE 0.1 +#define SC_NOTHING 0 +#define SC_IDLE 1 +#define SC_SPEED 2 + +struct types_of_workers +{ + unsigned ncpus; + unsigned ncuda; + unsigned nw; +}; + +/** + Task wrapper linked list + @ingroup API_SC_Hypervisor +*/ +struct sc_hypervisor_policy_task_pool +{ + /** + Which codelet has been executed + */ + struct starpu_codelet *cl; + + /** + Task footprint key + */ + uint32_t footprint; + + /** + Context the task belongs to + */ + unsigned sched_ctx_id; + + /** + Number of tasks of this kind + */ + unsigned long n; + + /** + The quantity of data(in bytes) needed by the task to execute + */ + size_t data_size; + + /** + Other task kinds + */ + struct sc_hypervisor_policy_task_pool *next; +}; + +/** + add task information to a task wrapper linked list +*/ +void sc_hypervisor_policy_add_task_to_pool(struct starpu_codelet *cl, unsigned sched_ctx, uint32_t footprint, struct sc_hypervisor_policy_task_pool **task_pools, size_t data_size); + +/** + remove task information from a task wrapper linked list +*/ +void sc_hypervisor_policy_remove_task_from_pool(struct starpu_task *task, uint32_t footprint, struct sc_hypervisor_policy_task_pool **task_pools); + +/** + clone a task wrapper linked list +*/ +struct sc_hypervisor_policy_task_pool *sc_hypervisor_policy_clone_task_pool(struct sc_hypervisor_policy_task_pool *tp); + +/** + get the execution time of the submitted tasks out of starpu's calibration files +*/ +void sc_hypervisor_get_tasks_times(int nw, int nt, double times[nw][nt], int *workers, unsigned size_ctxs, struct sc_hypervisor_policy_task_pool *task_pools); + +/** + find the context with the lowest priority in order to move some workers +*/ +unsigned sc_hypervisor_find_lowest_prio_sched_ctx(unsigned req_sched_ctx, int nworkers_to_move); + +/** + find the first most idle workers of a context +*/ +int *sc_hypervisor_get_idlest_workers(unsigned sched_ctx, int *nworkers, enum starpu_worker_archtype arch); + +/** + find the first most idle workers in a list +*/ +int *sc_hypervisor_get_idlest_workers_in_list(int *start, int *workers, int nall_workers, int *nworkers, enum starpu_worker_archtype arch); + +/** + find workers that can be moved from a context (if the constraints of min, max, etc allow this) +*/ +int sc_hypervisor_get_movable_nworkers(struct sc_hypervisor_policy_config *config, unsigned sched_ctx, enum starpu_worker_archtype arch); + +/** + compute how many workers should be moved from this context +*/ +int sc_hypervisor_compute_nworkers_to_move(unsigned req_sched_ctx); + +/** + check the policy's constraints in order to resize +*/ +unsigned sc_hypervisor_policy_resize(unsigned sender_sched_ctx, unsigned receiver_sched_ctx, unsigned force_resize, unsigned now); + +/** + check the policy's constraints in order to resize and find a context willing the resources +*/ +unsigned sc_hypervisor_policy_resize_to_unknown_receiver(unsigned sender_sched_ctx, unsigned now); + +/** + compute the speed of a context +*/ +double sc_hypervisor_get_ctx_speed(struct sc_hypervisor_wrapper *sc_w); + +/** + get the time of execution of the slowest context +*/ +double sc_hypervisor_get_slowest_ctx_exec_time(void); + +/** + get the time of execution of the fastest context +*/ +double sc_hypervisor_get_fastest_ctx_exec_time(void); + +/** + compute the speed of a workers in a context +*/ +double sc_hypervisor_get_speed_per_worker(struct sc_hypervisor_wrapper *sc_w, unsigned worker); + +/** + compute the speed of a type of worker in a context +*/ +double sc_hypervisor_get_speed_per_worker_type(struct sc_hypervisor_wrapper *sc_w, enum starpu_worker_archtype arch); + +/** + compute the speed of a type of worker in a context depending on its history +*/ +double sc_hypervisor_get_ref_speed_per_worker_type(struct sc_hypervisor_wrapper *sc_w, enum starpu_worker_archtype arch); + +/** + compute the average speed of a type of worker in all ctxs from the beginning of appl +*/ +double sc_hypervisor_get_avg_speed(enum starpu_worker_archtype arch); + +/** + verify if we need to consider the max in the lp +*/ +void sc_hypervisor_check_if_consider_max(struct types_of_workers *tw); + +/** + get the list of workers grouped by type +*/ +void sc_hypervisor_group_workers_by_type(struct types_of_workers *tw, int *total_nw); + +/** + get what type of worker corresponds to a certain index of types of workers +*/ +enum starpu_worker_archtype sc_hypervisor_get_arch_for_index(unsigned w, struct types_of_workers *tw); + +/** + get the index of types of workers corresponding to the type of workers indicated +*/ +unsigned sc_hypervisor_get_index_for_arch(enum starpu_worker_archtype arch, struct types_of_workers *tw); + +/** + check if we trigger resizing or not +*/ +unsigned sc_hypervisor_criteria_fulfilled(unsigned sched_ctx, int worker); + +/** + check if worker was idle long enough +*/ +unsigned sc_hypervisor_check_idle(unsigned sched_ctx, int worker); + +/** + check if there is a speed gap btw ctxs +*/ +unsigned sc_hypervisor_check_speed_gap_btw_ctxs(unsigned *sched_ctxs, int nsched_ctxs, int *workers, int nworkers); + +/** + check if there is a speed gap btw ctxs on one level +*/ +unsigned sc_hypervisor_check_speed_gap_btw_ctxs_on_level(int level, int *workers_in, int nworkers_in, unsigned father_sched_ctx_id, unsigned **sched_ctxs, int *nsched_ctxs); + +/** + check what triggers resizing (idle, speed, etc. +*/ +unsigned sc_hypervisor_get_resize_criteria(void); + +/** + load information concerning the type of workers into a types_of_workers struct +*/ +struct types_of_workers *sc_hypervisor_get_types_of_workers(int *workers, unsigned nworkers); + +/** @} */ + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/sc_hypervisor/src/Makefile.am b/sc_hypervisor/src/Makefile.am new file mode 100644 index 0000000..c724295 --- /dev/null +++ b/sc_hypervisor/src/Makefile.am @@ -0,0 +1,46 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +include $(top_srcdir)/make/starpu-notests.mk + +AM_CPPFLAGS = -I$(top_builddir)/include -I$(top_srcdir)/include -I$(top_srcdir)/sc_hypervisor/include/ -I$(top_srcdir)/sc_hypervisor/src $(STARPU_H_CPPFLAGS) +LIBS += $(top_builddir)/src/@LIBSTARPU_LINK@ $(STARPU_EXPORTED_LIBS) + +lib_LTLIBRARIES = libsc_hypervisor.la + +libsc_hypervisor_la_SOURCES = \ + sc_hypervisor.c \ + sc_config.c \ + policies_utils/policy_tools.c \ + policies_utils/speed.c \ + policies_utils/task_pool.c \ + policies_utils/lp_tools.c \ + policies_utils/lp_programs.c \ + policies_utils/dichotomy.c \ + hypervisor_policies/idle_policy.c \ + hypervisor_policies/app_driven_policy.c \ + hypervisor_policies/gflops_rate_policy.c \ + hypervisor_policies/feft_lp_policy.c \ + hypervisor_policies/teft_lp_policy.c \ + hypervisor_policies/ispeed_policy.c \ + hypervisor_policies/ispeed_lp_policy.c \ + hypervisor_policies/throughput_lp_policy.c \ + hypervisor_policies/hard_coded_policy.c \ + hypervisor_policies/perf_count_policy.c + +noinst_HEADERS = \ + sc_hypervisor_intern.h \ + uthash.h diff --git a/sc_hypervisor/src/Makefile.in b/sc_hypervisor/src/Makefile.in new file mode 100644 index 0000000..5b5472e --- /dev/null +++ b/sc_hypervisor/src/Makefile.in @@ -0,0 +1,1139 @@ +# Makefile.in generated by automake 1.16.5 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2021 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +target_triplet = @target@ +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@am__append_1 = --compiler-options -fno-strict-aliasing -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ $(STARPU_NVCC_H_CPPFLAGS) +@STARPU_USE_HIP_TRUE@am__append_2 = -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ +subdir = sc_hypervisor/src +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ + $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ + $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ + $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(noinst_HEADERS) \ + $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/src/common/config.h \ + $(top_builddir)/src/common/config-src-build.h \ + $(top_builddir)/include/starpu_config.h \ + $(top_builddir)/starpurm/include/starpurm_config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__installdirs = "$(DESTDIR)$(libdir)" +LTLIBRARIES = $(lib_LTLIBRARIES) +libsc_hypervisor_la_LIBADD = +am__dirstamp = $(am__leading_dot)dirstamp +am_libsc_hypervisor_la_OBJECTS = sc_hypervisor.lo sc_config.lo \ + policies_utils/policy_tools.lo policies_utils/speed.lo \ + policies_utils/task_pool.lo policies_utils/lp_tools.lo \ + policies_utils/lp_programs.lo policies_utils/dichotomy.lo \ + hypervisor_policies/idle_policy.lo \ + hypervisor_policies/app_driven_policy.lo \ + hypervisor_policies/gflops_rate_policy.lo \ + hypervisor_policies/feft_lp_policy.lo \ + hypervisor_policies/teft_lp_policy.lo \ + hypervisor_policies/ispeed_policy.lo \ + hypervisor_policies/ispeed_lp_policy.lo \ + hypervisor_policies/throughput_lp_policy.lo \ + hypervisor_policies/hard_coded_policy.lo \ + hypervisor_policies/perf_count_policy.lo +libsc_hypervisor_la_OBJECTS = $(am_libsc_hypervisor_la_OBJECTS) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)/src/common -I$(top_builddir)/include -I$(top_builddir)/starpurm/include +depcomp = $(SHELL) $(top_srcdir)/build-aux/depcomp +am__maybe_remake_depfiles = depfiles +am__depfiles_remade = ./$(DEPDIR)/sc_config.Plo \ + ./$(DEPDIR)/sc_hypervisor.Plo \ + hypervisor_policies/$(DEPDIR)/app_driven_policy.Plo \ + hypervisor_policies/$(DEPDIR)/feft_lp_policy.Plo \ + hypervisor_policies/$(DEPDIR)/gflops_rate_policy.Plo \ + hypervisor_policies/$(DEPDIR)/hard_coded_policy.Plo \ + hypervisor_policies/$(DEPDIR)/idle_policy.Plo \ + hypervisor_policies/$(DEPDIR)/ispeed_lp_policy.Plo \ + hypervisor_policies/$(DEPDIR)/ispeed_policy.Plo \ + hypervisor_policies/$(DEPDIR)/perf_count_policy.Plo \ + hypervisor_policies/$(DEPDIR)/teft_lp_policy.Plo \ + hypervisor_policies/$(DEPDIR)/throughput_lp_policy.Plo \ + policies_utils/$(DEPDIR)/dichotomy.Plo \ + policies_utils/$(DEPDIR)/lp_programs.Plo \ + policies_utils/$(DEPDIR)/lp_tools.Plo \ + policies_utils/$(DEPDIR)/policy_tools.Plo \ + policies_utils/$(DEPDIR)/speed.Plo \ + policies_utils/$(DEPDIR)/task_pool.Plo +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(libsc_hypervisor_la_SOURCES) +DIST_SOURCES = $(libsc_hypervisor_la_SOURCES) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +HEADERS = $(noinst_HEADERS) +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +am__DIST_COMMON = $(srcdir)/Makefile.in \ + $(top_srcdir)/build-aux/depcomp \ + $(top_srcdir)/make/starpu-notests.mk \ + $(top_srcdir)/make/starpu.mk +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +pkglibdir = @pkglibdir@ +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +APP_CFLAGS = @APP_CFLAGS@ +APP_CXXFLAGS = @APP_CXXFLAGS@ +APP_FCFLAGS = @APP_FCFLAGS@ +APP_FFLAGS = @APP_FFLAGS@ +AR = @AR@ +AS = @AS@ +ATLASDIR = @ATLASDIR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BLAS_LIB = @BLAS_LIB@ +BLAS_LIBS = @BLAS_LIBS@ +BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ +BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CC_OR_MPICC = @CC_OR_MPICC@ +CC_OR_NVCC = @CC_OR_NVCC@ +CFLAGS = @CFLAGS@ +COVERAGE = @COVERAGE@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CSCOPE = @CSCOPE@ +CTAGS = @CTAGS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DGELS_LIBS = @DGELS_LIBS@ +DLB_CFLAGS = @DLB_CFLAGS@ +DLB_LIBS = @DLB_LIBS@ +DLLTOOL = @DLLTOOL@ +DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +ECLIPSE = @ECLIPSE@ +EGREP = @EGREP@ +ETAGS = @ETAGS@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FC = @FC@ +FCFLAGS = @FCFLAGS@ +FFLAGS = @FFLAGS@ +FFTWF_CFLAGS = @FFTWF_CFLAGS@ +FFTWF_LIBS = @FFTWF_LIBS@ +FFTWL_CFLAGS = @FFTWL_CFLAGS@ +FFTWL_LIBS = @FFTWL_LIBS@ +FFTW_CFLAGS = @FFTW_CFLAGS@ +FFTW_LIBS = @FFTW_LIBS@ +FGREP = @FGREP@ +FILECMD = @FILECMD@ +FXTDIR = @FXTDIR@ +FXT_CFLAGS = @FXT_CFLAGS@ +FXT_LDFLAGS = @FXT_LDFLAGS@ +FXT_LIBS = @FXT_LIBS@ +GDB = @GDB@ +GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ +GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ +GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ +GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ +GOTODIR = @GOTODIR@ +GREP = @GREP@ +HAVE_CXX11 = @HAVE_CXX11@ +HAVE_FFTWFL = @HAVE_FFTWFL@ +HELP2MAN = @HELP2MAN@ +HIPCC = @HIPCC@ +HIPCCFLAGS = @HIPCCFLAGS@ $(am__append_2) +HIPCONFIG = @HIPCONFIG@ +HWLOC_CFLAGS = @HWLOC_CFLAGS@ +HWLOC_LIBS = @HWLOC_LIBS@ +HWLOC_REQUIRES = @HWLOC_REQUIRES@ +ICC = @ICC@ +ICC_ARGS = @ICC_ARGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JULIA = @JULIA@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ $(top_builddir)/src/@LIBSTARPU_LINK@ \ + $(STARPU_EXPORTED_LIBS) +LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ +LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ +LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ +LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ +LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ +LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ +LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ +LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ +LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ +LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ +LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ +LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ +LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ +LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ +LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ +LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ +LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ +LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ +LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ +LIBSTARPU_LINK = @LIBSTARPU_LINK@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAGMA_CFLAGS = @MAGMA_CFLAGS@ +MAGMA_LIBS = @MAGMA_LIBS@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPICC_LDFLAGS = @MPICC_LDFLAGS@ +MPICXX = @MPICXX@ +MPIEXEC = @MPIEXEC@ +MPIEXEC_ARGS = @MPIEXEC_ARGS@ +MPIFORT = @MPIFORT@ +MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ +MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ +NM = @NM@ +NMAD_CFLAGS = @NMAD_CFLAGS@ +NMAD_LIBS = @NMAD_LIBS@ +NMEDIT = @NMEDIT@ +NVCC = @NVCC@ +NVCCFLAGS = @NVCCFLAGS@ $(am__append_1) +NVCC_CC = @NVCC_CC@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ +OPENBLAS_LIBS = @OPENBLAS_LIBS@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PAPI_CFLAGS = @PAPI_CFLAGS@ +PAPI_LIBS = @PAPI_LIBS@ +PARALLEL = @PARALLEL@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PKG_CONFIG = @PKG_CONFIG@ +POTI_CFLAGS = @POTI_CFLAGS@ +POTI_LIBS = @POTI_LIBS@ +PROG_CLANG = @PROG_CLANG@ +PROG_DATE = @PROG_DATE@ +PROG_FIND = @PROG_FIND@ +PROG_STAT = @PROG_STAT@ +PYTHON = @PYTHON@ +PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ +PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ +PYTHON_VERSION = @PYTHON_VERSION@ +RANLIB = @RANLIB@ +REALBASH = @REALBASH@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ +SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ +SIMGRID_LIBS = @SIMGRID_LIBS@ +SIMGRID_MC = @SIMGRID_MC@ +SLIC_CONFIG = @SLIC_CONFIG@ +SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ +SOCL_VENDORS = @SOCL_VENDORS@ +STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ +STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ +STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ +STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ +STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ +STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ +STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ +STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ +STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ +STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ +STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ +STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ +STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ +STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ +STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ +STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ +STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ +STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ +STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ +STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ +STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ +STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ +STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ +STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ +STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ +STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ +STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ +STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ +STARPU_LIB_PATH = @STARPU_LIB_PATH@ +STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ +STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ +STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ +STARPU_MS_LIB = @STARPU_MS_LIB@ +STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ +STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ +STARPU_OPENBLAS = @STARPU_OPENBLAS@ +STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ +STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ +STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ +STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ +STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ +STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ +STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ +STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ +STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ +STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ +STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ +STARPU_SRC_DIR = @STARPU_SRC_DIR@ +STARPU_USE_CPU = @STARPU_USE_CPU@ +STARPU_USE_CUDA = @STARPU_USE_CUDA@ +STARPU_USE_FXT = @STARPU_USE_FXT@ +STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ +STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ +STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ +STRIP = @STRIP@ +VERSION = @VERSION@ +XMKMF = @XMKMF@ +X_CFLAGS = @X_CFLAGS@ +X_EXTRA_LIBS = @X_EXTRA_LIBS@ +X_LIBS = @X_LIBS@ +X_PRE_LIBS = @X_PRE_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_ct_F77 = @ac_ct_F77@ +ac_ct_FC = @ac_ct_FC@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +doxygencommand = @doxygencommand@ +dvidir = @dvidir@ +eclipsepath = @eclipsepath@ +epstopdfcommand = @epstopdfcommand@ +exec_prefix = @exec_prefix@ +gitcommand = @gitcommand@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +hwloccalccommand = @hwloccalccommand@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +juliapath = @juliapath@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +mpicc_path = @mpicc_path@ +mpicxx_path = @mpicxx_path@ +mpiexec_path = @mpiexec_path@ +mpifort_path = @mpifort_path@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +pdflatexcommand = @pdflatexcommand@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +runstatedir = @runstatedir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target = @target@ +target_alias = @target_alias@ +target_cpu = @target_cpu@ +target_os = @target_os@ +target_vendor = @target_vendor@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +AM_CFLAGS = $(GLOBAL_AM_CFLAGS) +AM_CXXFLAGS = $(GLOBAL_AM_CXXFLAGS) +AM_FFLAGS = $(GLOBAL_AM_FFLAGS) +AM_FCFLAGS = $(GLOBAL_AM_FCFLAGS) +@STARPU_USE_CUDA_TRUE@V_nvcc_ = $(V_nvcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_USE_CUDA_TRUE@V_nvcc_0 = @echo " NVCC " $@; +@STARPU_USE_CUDA_TRUE@V_nvcc_1 = +@STARPU_USE_CUDA_TRUE@V_nvcc = $(V_nvcc_$(V)) + +# Avoid using nvcc when making a coverity build, nvcc produces millions of +# lines of code which we don't want to analyze. Instead, build dumb .o files +# containing empty functions. +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_ = $(V_mynvcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_0 = @echo " myNVCC " $@; +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_1 = +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc = $(V_mynvcc_$(V)) +@STARPU_USE_HIP_TRUE@V_hipcc_ = $(V_hipcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_USE_HIP_TRUE@V_hipcc_0 = @echo " HIPCC " $@; +@STARPU_USE_HIP_TRUE@V_hipcc_1 = +@STARPU_USE_HIP_TRUE@V_hipcc = $(V_hipcc_$(V)) +V_icc_ = $(V_icc_$(AM_DEFAULT_VERBOSITY)) +V_icc_0 = @echo " ICC " $@; +V_icc_1 = +V_icc = $(V_icc_$(V)) +V_ln_ = $(V_ln_$(AM_DEFAULT_VERBOSITY)) +V_ln_0 = @echo " LN " $@; +V_ln_1 = +V_ln = $(V_ln_$(V)) +V_help2man_ = $(V_help2man_$(AM_DEFAULT_VERBOSITY)) +V_help2man_0 = @echo " HELP2MAN" $@; +V_help2man_1 = +V_help2man = $(V_help2man_$(V)) +AM_CPPFLAGS = -I$(top_builddir)/include -I$(top_srcdir)/include -I$(top_srcdir)/sc_hypervisor/include/ -I$(top_srcdir)/sc_hypervisor/src $(STARPU_H_CPPFLAGS) +lib_LTLIBRARIES = libsc_hypervisor.la +libsc_hypervisor_la_SOURCES = \ + sc_hypervisor.c \ + sc_config.c \ + policies_utils/policy_tools.c \ + policies_utils/speed.c \ + policies_utils/task_pool.c \ + policies_utils/lp_tools.c \ + policies_utils/lp_programs.c \ + policies_utils/dichotomy.c \ + hypervisor_policies/idle_policy.c \ + hypervisor_policies/app_driven_policy.c \ + hypervisor_policies/gflops_rate_policy.c \ + hypervisor_policies/feft_lp_policy.c \ + hypervisor_policies/teft_lp_policy.c \ + hypervisor_policies/ispeed_policy.c \ + hypervisor_policies/ispeed_lp_policy.c \ + hypervisor_policies/throughput_lp_policy.c \ + hypervisor_policies/hard_coded_policy.c \ + hypervisor_policies/perf_count_policy.c + +noinst_HEADERS = \ + sc_hypervisor_intern.h \ + uthash.h + +all: all-am + +.SUFFIXES: +.SUFFIXES: .c .cu .cubin .hip .lo .o .obj +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/make/starpu-notests.mk $(top_srcdir)/make/starpu.mk $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign sc_hypervisor/src/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign sc_hypervisor/src/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; +$(top_srcdir)/make/starpu-notests.mk $(top_srcdir)/make/starpu.mk $(am__empty): + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +install-libLTLIBRARIES: $(lib_LTLIBRARIES) + @$(NORMAL_INSTALL) + @list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \ + list2=; for p in $$list; do \ + if test -f $$p; then \ + list2="$$list2 $$p"; \ + else :; fi; \ + done; \ + test -z "$$list2" || { \ + echo " $(MKDIR_P) '$(DESTDIR)$(libdir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(libdir)" || exit 1; \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(libdir)'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(libdir)"; \ + } + +uninstall-libLTLIBRARIES: + @$(NORMAL_UNINSTALL) + @list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \ + for p in $$list; do \ + $(am__strip_dir) \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(libdir)/$$f'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(libdir)/$$f"; \ + done + +clean-libLTLIBRARIES: + -test -z "$(lib_LTLIBRARIES)" || rm -f $(lib_LTLIBRARIES) + @list='$(lib_LTLIBRARIES)'; \ + locs=`for p in $$list; do echo $$p; done | \ + sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ + sort -u`; \ + test -z "$$locs" || { \ + echo rm -f $${locs}; \ + rm -f $${locs}; \ + } +policies_utils/$(am__dirstamp): + @$(MKDIR_P) policies_utils + @: > policies_utils/$(am__dirstamp) +policies_utils/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) policies_utils/$(DEPDIR) + @: > policies_utils/$(DEPDIR)/$(am__dirstamp) +policies_utils/policy_tools.lo: policies_utils/$(am__dirstamp) \ + policies_utils/$(DEPDIR)/$(am__dirstamp) +policies_utils/speed.lo: policies_utils/$(am__dirstamp) \ + policies_utils/$(DEPDIR)/$(am__dirstamp) +policies_utils/task_pool.lo: policies_utils/$(am__dirstamp) \ + policies_utils/$(DEPDIR)/$(am__dirstamp) +policies_utils/lp_tools.lo: policies_utils/$(am__dirstamp) \ + policies_utils/$(DEPDIR)/$(am__dirstamp) +policies_utils/lp_programs.lo: policies_utils/$(am__dirstamp) \ + policies_utils/$(DEPDIR)/$(am__dirstamp) +policies_utils/dichotomy.lo: policies_utils/$(am__dirstamp) \ + policies_utils/$(DEPDIR)/$(am__dirstamp) +hypervisor_policies/$(am__dirstamp): + @$(MKDIR_P) hypervisor_policies + @: > hypervisor_policies/$(am__dirstamp) +hypervisor_policies/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) hypervisor_policies/$(DEPDIR) + @: > hypervisor_policies/$(DEPDIR)/$(am__dirstamp) +hypervisor_policies/idle_policy.lo: \ + hypervisor_policies/$(am__dirstamp) \ + hypervisor_policies/$(DEPDIR)/$(am__dirstamp) +hypervisor_policies/app_driven_policy.lo: \ + hypervisor_policies/$(am__dirstamp) \ + hypervisor_policies/$(DEPDIR)/$(am__dirstamp) +hypervisor_policies/gflops_rate_policy.lo: \ + hypervisor_policies/$(am__dirstamp) \ + hypervisor_policies/$(DEPDIR)/$(am__dirstamp) +hypervisor_policies/feft_lp_policy.lo: \ + hypervisor_policies/$(am__dirstamp) \ + hypervisor_policies/$(DEPDIR)/$(am__dirstamp) +hypervisor_policies/teft_lp_policy.lo: \ + hypervisor_policies/$(am__dirstamp) \ + hypervisor_policies/$(DEPDIR)/$(am__dirstamp) +hypervisor_policies/ispeed_policy.lo: \ + hypervisor_policies/$(am__dirstamp) \ + hypervisor_policies/$(DEPDIR)/$(am__dirstamp) +hypervisor_policies/ispeed_lp_policy.lo: \ + hypervisor_policies/$(am__dirstamp) \ + hypervisor_policies/$(DEPDIR)/$(am__dirstamp) +hypervisor_policies/throughput_lp_policy.lo: \ + hypervisor_policies/$(am__dirstamp) \ + hypervisor_policies/$(DEPDIR)/$(am__dirstamp) +hypervisor_policies/hard_coded_policy.lo: \ + hypervisor_policies/$(am__dirstamp) \ + hypervisor_policies/$(DEPDIR)/$(am__dirstamp) +hypervisor_policies/perf_count_policy.lo: \ + hypervisor_policies/$(am__dirstamp) \ + hypervisor_policies/$(DEPDIR)/$(am__dirstamp) + +libsc_hypervisor.la: $(libsc_hypervisor_la_OBJECTS) $(libsc_hypervisor_la_DEPENDENCIES) $(EXTRA_libsc_hypervisor_la_DEPENDENCIES) + $(AM_V_CCLD)$(LINK) -rpath $(libdir) $(libsc_hypervisor_la_OBJECTS) $(libsc_hypervisor_la_LIBADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + -rm -f hypervisor_policies/*.$(OBJEXT) + -rm -f hypervisor_policies/*.lo + -rm -f policies_utils/*.$(OBJEXT) + -rm -f policies_utils/*.lo + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sc_config.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sc_hypervisor.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@hypervisor_policies/$(DEPDIR)/app_driven_policy.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@hypervisor_policies/$(DEPDIR)/feft_lp_policy.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@hypervisor_policies/$(DEPDIR)/gflops_rate_policy.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@hypervisor_policies/$(DEPDIR)/hard_coded_policy.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@hypervisor_policies/$(DEPDIR)/idle_policy.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@hypervisor_policies/$(DEPDIR)/ispeed_lp_policy.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@hypervisor_policies/$(DEPDIR)/ispeed_policy.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@hypervisor_policies/$(DEPDIR)/perf_count_policy.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@hypervisor_policies/$(DEPDIR)/teft_lp_policy.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@hypervisor_policies/$(DEPDIR)/throughput_lp_policy.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@policies_utils/$(DEPDIR)/dichotomy.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@policies_utils/$(DEPDIR)/lp_programs.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@policies_utils/$(DEPDIR)/lp_tools.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@policies_utils/$(DEPDIR)/policy_tools.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@policies_utils/$(DEPDIR)/speed.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@policies_utils/$(DEPDIR)/task_pool.Plo@am__quote@ # am--include-marker + +$(am__depfiles_remade): + @$(MKDIR_P) $(@D) + @echo '# dummy' >$@-t && $(am__mv) $@-t $@ + +am--depfiles: $(am__depfiles_remade) + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ +@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + -rm -rf hypervisor_policies/.libs hypervisor_policies/_libs + -rm -rf policies_utils/.libs policies_utils/_libs + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-am +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-am + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-am + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile $(LTLIBRARIES) $(HEADERS) +installdirs: + for dir in "$(DESTDIR)$(libdir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + -rm -f hypervisor_policies/$(DEPDIR)/$(am__dirstamp) + -rm -f hypervisor_policies/$(am__dirstamp) + -rm -f policies_utils/$(DEPDIR)/$(am__dirstamp) + -rm -f policies_utils/$(am__dirstamp) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-generic clean-libLTLIBRARIES clean-libtool \ + mostlyclean-am + +distclean: distclean-am + -rm -f ./$(DEPDIR)/sc_config.Plo + -rm -f ./$(DEPDIR)/sc_hypervisor.Plo + -rm -f hypervisor_policies/$(DEPDIR)/app_driven_policy.Plo + -rm -f hypervisor_policies/$(DEPDIR)/feft_lp_policy.Plo + -rm -f hypervisor_policies/$(DEPDIR)/gflops_rate_policy.Plo + -rm -f hypervisor_policies/$(DEPDIR)/hard_coded_policy.Plo + -rm -f hypervisor_policies/$(DEPDIR)/idle_policy.Plo + -rm -f hypervisor_policies/$(DEPDIR)/ispeed_lp_policy.Plo + -rm -f hypervisor_policies/$(DEPDIR)/ispeed_policy.Plo + -rm -f hypervisor_policies/$(DEPDIR)/perf_count_policy.Plo + -rm -f hypervisor_policies/$(DEPDIR)/teft_lp_policy.Plo + -rm -f hypervisor_policies/$(DEPDIR)/throughput_lp_policy.Plo + -rm -f policies_utils/$(DEPDIR)/dichotomy.Plo + -rm -f policies_utils/$(DEPDIR)/lp_programs.Plo + -rm -f policies_utils/$(DEPDIR)/lp_tools.Plo + -rm -f policies_utils/$(DEPDIR)/policy_tools.Plo + -rm -f policies_utils/$(DEPDIR)/speed.Plo + -rm -f policies_utils/$(DEPDIR)/task_pool.Plo + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: install-libLTLIBRARIES + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f ./$(DEPDIR)/sc_config.Plo + -rm -f ./$(DEPDIR)/sc_hypervisor.Plo + -rm -f hypervisor_policies/$(DEPDIR)/app_driven_policy.Plo + -rm -f hypervisor_policies/$(DEPDIR)/feft_lp_policy.Plo + -rm -f hypervisor_policies/$(DEPDIR)/gflops_rate_policy.Plo + -rm -f hypervisor_policies/$(DEPDIR)/hard_coded_policy.Plo + -rm -f hypervisor_policies/$(DEPDIR)/idle_policy.Plo + -rm -f hypervisor_policies/$(DEPDIR)/ispeed_lp_policy.Plo + -rm -f hypervisor_policies/$(DEPDIR)/ispeed_policy.Plo + -rm -f hypervisor_policies/$(DEPDIR)/perf_count_policy.Plo + -rm -f hypervisor_policies/$(DEPDIR)/teft_lp_policy.Plo + -rm -f hypervisor_policies/$(DEPDIR)/throughput_lp_policy.Plo + -rm -f policies_utils/$(DEPDIR)/dichotomy.Plo + -rm -f policies_utils/$(DEPDIR)/lp_programs.Plo + -rm -f policies_utils/$(DEPDIR)/lp_tools.Plo + -rm -f policies_utils/$(DEPDIR)/policy_tools.Plo + -rm -f policies_utils/$(DEPDIR)/speed.Plo + -rm -f policies_utils/$(DEPDIR)/task_pool.Plo + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-libLTLIBRARIES + +.MAKE: install-am install-strip + +.PHONY: CTAGS GTAGS TAGS all all-am am--depfiles check check-am clean \ + clean-generic clean-libLTLIBRARIES clean-libtool cscopelist-am \ + ctags ctags-am distclean distclean-compile distclean-generic \ + distclean-libtool distclean-tags distdir dvi dvi-am html \ + html-am info info-am install install-am install-data \ + install-data-am install-dvi install-dvi-am install-exec \ + install-exec-am install-html install-html-am install-info \ + install-info-am install-libLTLIBRARIES install-man install-pdf \ + install-pdf-am install-ps install-ps-am install-strip \ + installcheck installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags tags-am uninstall uninstall-am uninstall-libLTLIBRARIES + +.PRECIOUS: Makefile + +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@.cu.o: +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ @$(MKDIR_P) `dirname $@` +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ $(V_mynvcc)grep 'extern *"C" *void *' $< | sed -ne 's/extern *"C" *void *\([a-zA-Z0-9_]*\) *(.*/void \1(void) {}/p' | $(CC) -x c - -o $@ -c + +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.cubin: +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) -cubin $< -o $@ $(NVCCFLAGS) + +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.o: +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) $< -c -o $@ $(NVCCFLAGS) +@STARPU_USE_HIP_TRUE@.hip.o: +@STARPU_USE_HIP_TRUE@ $(V_hipcc) $(HIPCC) $< -c -o $@ $(HIPCCFLAGS) + +recheck: + -cat /dev/null + +showcheckfailed: + @-cat /dev/null + +showfailed: + @-cat /dev/null + +showcheck: + -cat /dev/null + +showsuite: + -cat /dev/null + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/sc_hypervisor/src/hypervisor_policies/app_driven_policy.c b/sc_hypervisor/src/hypervisor_policies/app_driven_policy.c new file mode 100644 index 0000000..23b2781 --- /dev/null +++ b/sc_hypervisor/src/hypervisor_policies/app_driven_policy.c @@ -0,0 +1,36 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ +#include + +static void app_driven_handle_post_exec_hook(unsigned sched_ctx, __attribute__((unused)) int task_tag) +{ + sc_hypervisor_policy_resize_to_unknown_receiver(sched_ctx, 1); +} + +struct sc_hypervisor_policy app_driven_policy = +{ + .size_ctxs = NULL, + .handle_poped_task = NULL, + .handle_pushed_task = NULL, + .handle_idle_cycle = NULL, + .handle_idle_end = NULL, + .handle_post_exec_hook = app_driven_handle_post_exec_hook, + .handle_submitted_job = NULL, + .end_ctx = NULL, + .init_worker = NULL, + .custom = 0, + .name = "app_driven" +}; diff --git a/sc_hypervisor/src/hypervisor_policies/feft_lp_policy.c b/sc_hypervisor/src/hypervisor_policies/feft_lp_policy.c new file mode 100644 index 0000000..481b9ec --- /dev/null +++ b/sc_hypervisor/src/hypervisor_policies/feft_lp_policy.c @@ -0,0 +1,381 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "sc_hypervisor_lp.h" +#include "sc_hypervisor_policy.h" +#include +#include + +unsigned long resize_no = 0; +#ifdef STARPU_HAVE_GLPK_H +static void _try_resizing(unsigned *sched_ctxs, int nsched_ctxs, int *workers, int nworkers) +{ + /* for vite */ + int ns = sched_ctxs == NULL ? sc_hypervisor_get_nsched_ctxs() : nsched_ctxs; +#ifdef STARPU_SC_HYPERVISOR_DEBUG + printf("resize_no = %lu %d ctxs\n", resize_no, ns); +#endif + if(ns <= 0) return; + + unsigned *curr_sched_ctxs = sched_ctxs == NULL ? sc_hypervisor_get_sched_ctxs() : sched_ctxs; + unsigned curr_nworkers = nworkers == -1 ? starpu_worker_get_count() : (unsigned)nworkers; + + struct types_of_workers *tw = sc_hypervisor_get_types_of_workers(workers, curr_nworkers); + int nw = tw->nw; + double nworkers_per_ctx[ns][nw]; + + int total_nw[nw]; + sc_hypervisor_group_workers_by_type(tw, total_nw); + + + struct timeval start_time; + struct timeval end_time; + gettimeofday(&start_time, NULL); + + double vmax = sc_hypervisor_lp_get_nworkers_per_ctx(ns, nw, nworkers_per_ctx, total_nw, tw, sched_ctxs); + gettimeofday(&end_time, NULL); + + long diff_s = end_time.tv_sec - start_time.tv_sec; + long diff_us = end_time.tv_usec - start_time.tv_usec; + + __attribute__((unused)) float timing = (float)(diff_s*1000000 + diff_us)/1000.0; + + if(vmax != -1.0) + { +/* int nworkers_per_ctx_rounded[ns][nw]; */ +/* sc_hypervisor_lp_round_double_to_int(ns, nw, nworkers_per_ctx, nworkers_per_ctx_rounded); */ +/* // sc_hypervisor_lp_redistribute_resources_in_ctxs(ns, nw, nworkers_per_ctx_rounded, nworkers_per_ctx, curr_sched_ctxs, tw); */ +/* sc_hypervisor_lp_distribute_resources_in_ctxs(curr_sched_ctxs, ns, nw, nworkers_per_ctx_rounded, nworkers_per_ctx, workers, curr_nworkers, tw); */ + sc_hypervisor_lp_distribute_floating_no_resources_in_ctxs(curr_sched_ctxs, ns, nw, nworkers_per_ctx, workers, curr_nworkers, tw); + + sc_hypervisor_lp_share_remaining_resources(ns, curr_sched_ctxs, curr_nworkers, workers); + } +#ifdef STARPU_SC_HYPERVISOR_DEBUG + printf("*****finished resize \n"); +#endif + free(tw); + return; +} + +static void _try_resizing_hierarchically(unsigned levels, unsigned current_level, unsigned *sched_ctxs, unsigned nsched_ctxs, int *pus, int npus) +{ + if(levels == 0) + return; + + _try_resizing(sched_ctxs, nsched_ctxs, pus, npus); + + unsigned s; + for(s = 0; s < nsched_ctxs; s++) + { + unsigned *sched_ctxs_child; + int nsched_ctxs_child = 0; + sc_hypervisor_get_ctxs_on_level(&sched_ctxs_child, &nsched_ctxs_child, current_level+1, sched_ctxs[s]); + if(nsched_ctxs_child > 0) + { + int *pus_father; + unsigned npus_father = 0; + npus_father = starpu_sched_ctx_get_workers_list(sched_ctxs[s], &pus_father); + + _try_resizing_hierarchically(levels-1, current_level+1, sched_ctxs_child, nsched_ctxs_child, pus_father, npus_father); + + free(pus_father); + free(sched_ctxs_child); + } + } + return; +} + +static unsigned _get_min_level(unsigned *sched_ctxs, int nsched_ctxs) +{ + unsigned min = sc_hypervisor_get_nhierarchy_levels(); + int s; + for(s = 0; s < nsched_ctxs; s++) + { + unsigned level = starpu_sched_ctx_get_hierarchy_level(sched_ctxs[s]); + if(level < min) + min = level; + } + return min; +} + +static unsigned _get_first_level(unsigned *sched_ctxs, int nsched_ctxs, unsigned *first_level, int *nsched_ctxs_first_level) +{ + unsigned min = _get_min_level(sched_ctxs, nsched_ctxs); + int s; + for(s = 0; s < nsched_ctxs; s++) + if(starpu_sched_ctx_get_hierarchy_level(sched_ctxs[s]) == min) + first_level[(*nsched_ctxs_first_level)++] = sched_ctxs[s]; + return min; +} + +static void _resize(unsigned *sched_ctxs, int nsched_ctxs, int *workers, int nworkers) +{ + starpu_fxt_trace_user_event(resize_no); + + unsigned nhierarchy_levels = sc_hypervisor_get_nhierarchy_levels(); + if(nhierarchy_levels > 1) + { + if(nsched_ctxs == -1) + { + unsigned *sched_ctxs2; + int nsched_ctxs2; + sc_hypervisor_get_ctxs_on_level(&sched_ctxs2, &nsched_ctxs2, 0, STARPU_NMAX_SCHED_CTXS); + + if(nsched_ctxs2 > 0) + { + _try_resizing_hierarchically(nhierarchy_levels, 0, sched_ctxs2, nsched_ctxs2, workers, nworkers); + free(sched_ctxs2); + } + } + else + { + unsigned first_level[nsched_ctxs]; + int nsched_ctxs_first_level = 0; + int min = _get_first_level(sched_ctxs, nsched_ctxs, first_level, &nsched_ctxs_first_level); + + _try_resizing_hierarchically(nhierarchy_levels, min, first_level, nsched_ctxs_first_level, workers, nworkers); + } + } + else + _try_resizing(sched_ctxs, nsched_ctxs, workers, nworkers); + resize_no++; +} + +static void _resize_if_speed_diff(unsigned sched_ctx, int worker) +{ + (void)worker; + unsigned nhierarchy_levels = sc_hypervisor_get_nhierarchy_levels(); + if(nhierarchy_levels > 1) + { + + int current_level = (int)starpu_sched_ctx_get_hierarchy_level(sched_ctx); + if(current_level == 0) + { + _resize(NULL, -1, NULL, -1); + return; + } + + unsigned father = starpu_sched_ctx_get_inheritor(sched_ctx); + int level; + int *pus_father_old = NULL; + unsigned npus_father_old = 0; + unsigned *sched_ctxs_old = NULL; + int nsched_ctxs_old = 0; + unsigned is_speed_diff = 0; + + for(level = current_level ; level >= 0; level--) + { + int *pus_father = NULL; + int npus_father = -1; + if(level > 0) + npus_father = starpu_sched_ctx_get_workers_list(father, &pus_father); + + + unsigned *sched_ctxs = NULL; + int nsched_ctxs = 0; + is_speed_diff = sc_hypervisor_check_speed_gap_btw_ctxs_on_level(level, pus_father, npus_father, father, &sched_ctxs, &nsched_ctxs); + if(!is_speed_diff) + { + if(level == current_level) + { + if(pus_father) + free(pus_father); + if(sched_ctxs) + free(sched_ctxs); + pus_father = NULL; + sched_ctxs = NULL; + break; + } + else + { + _resize(sched_ctxs_old, nsched_ctxs_old, pus_father_old, npus_father_old); + + if(pus_father_old) + free(pus_father_old); + if(sched_ctxs_old) + free(sched_ctxs_old); + pus_father_old = NULL; + sched_ctxs_old = NULL; + + if(pus_father) + free(pus_father); + if(nsched_ctxs > 0) + free(sched_ctxs); + pus_father = NULL; + sched_ctxs = NULL; + break; + } + } + if(pus_father_old) + free(pus_father_old); + if(sched_ctxs_old) + free(sched_ctxs_old); + + pus_father_old = pus_father; + sched_ctxs_old = sched_ctxs; + npus_father_old = npus_father; + nsched_ctxs_old = nsched_ctxs; + + father = level > 1 ? starpu_sched_ctx_get_inheritor(father) : STARPU_NMAX_SCHED_CTXS; + } + if(is_speed_diff) + { + if(pus_father_old) + free(pus_father_old); + if(sched_ctxs_old) + free(sched_ctxs_old); + + _resize(NULL, -1, NULL, -1); + } + } + else + { + unsigned criteria = sc_hypervisor_get_resize_criteria(); + if(criteria != SC_NOTHING && criteria == SC_IDLE) + { + + _resize(NULL, -1, NULL, -1); + } + else + { + if(sc_hypervisor_check_speed_gap_btw_ctxs(NULL, -1, NULL, -1)) + _resize(NULL, -1, NULL, -1); + } + } + return; +} + +static void feft_lp_handle_poped_task(unsigned sched_ctx, int worker, + __attribute__((unused))struct starpu_task *task, __attribute__((unused))uint32_t footprint) +{ + if(worker == -2) return; + unsigned criteria = sc_hypervisor_get_resize_criteria(); + if(criteria != SC_NOTHING && criteria == SC_SPEED) + { + + int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex); + if(ret != EBUSY) + { + _resize_if_speed_diff(sched_ctx, worker); + STARPU_PTHREAD_MUTEX_UNLOCK(&act_hypervisor_mutex); + } + } +} + +static void feft_lp_size_ctxs(unsigned *sched_ctxs, int nsched_ctxs, int *workers, int nworkers) +{ + STARPU_PTHREAD_MUTEX_LOCK(&act_hypervisor_mutex); + + struct sc_hypervisor_wrapper* sc_w = NULL; + int s = 0; + for(s = 0; s < nsched_ctxs; s++) + { + sc_w = sc_hypervisor_get_wrapper(sched_ctxs[s]); + sc_w->to_be_sized = 1; + } + + _resize(sched_ctxs, nsched_ctxs, workers, nworkers); +#ifdef STARPU_SC_HYPERVISOR_DEBUG + printf("finished size ctxs\n"); +#endif + STARPU_PTHREAD_MUTEX_UNLOCK(&act_hypervisor_mutex); +} + +static void _resize_leaves(int worker) +{ + unsigned s; + unsigned *sched_ctxs = NULL; + unsigned nsched_ctxs = starpu_worker_get_sched_ctx_list(worker, &sched_ctxs); + unsigned workers_sched_ctxs[nsched_ctxs]; + unsigned nworkers_sched_ctxs = 0; + + struct sc_hypervisor_wrapper *sc_w = NULL; + for(s = 0; s < nsched_ctxs; s++) + { + sc_w = sc_hypervisor_get_wrapper(sched_ctxs[s]); + if(sc_w->sched_ctx != STARPU_NMAX_SCHED_CTXS) + { + workers_sched_ctxs[nworkers_sched_ctxs++] = sched_ctxs[s]; + } + } + + free(sched_ctxs); + + unsigned leaves[nsched_ctxs]; + int nleaves = 0; + sc_hypervisor_get_leaves(workers_sched_ctxs, nworkers_sched_ctxs, leaves, &nleaves); + int x; + for(x = 0; x < nleaves; x++) + _resize_if_speed_diff(leaves[x], worker); +} + +static void feft_lp_handle_idle_cycle(unsigned sched_ctx, int worker) +{ + (void)sched_ctx; + unsigned criteria = sc_hypervisor_get_resize_criteria(); + if(criteria != SC_NOTHING)// && criteria == SC_IDLE) + { + int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex); + if(ret != EBUSY) + { + _resize_leaves(worker); + STARPU_PTHREAD_MUTEX_UNLOCK(&act_hypervisor_mutex); + } + } +} + +static void feft_lp_resize_ctxs(unsigned *sched_ctxs, int nsched_ctxs , + int *workers, int nworkers) +{ + int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex); + if(ret != EBUSY) + { + struct sc_hypervisor_wrapper* sc_w = NULL; + int s = 0; + for(s = 0; s < nsched_ctxs; s++) + { + sc_w = sc_hypervisor_get_wrapper(sched_ctxs[s]); + + if((sc_w->submitted_flops + (0.1*sc_w->total_flops)) < sc_w->total_flops) + { + STARPU_PTHREAD_MUTEX_UNLOCK(&act_hypervisor_mutex); + return; + } + } + + _resize(sched_ctxs, nsched_ctxs, workers, nworkers); + + STARPU_PTHREAD_MUTEX_UNLOCK(&act_hypervisor_mutex); + } +} + +struct sc_hypervisor_policy feft_lp_policy = +{ + .size_ctxs = feft_lp_size_ctxs, + .resize_ctxs = feft_lp_resize_ctxs, + .handle_poped_task = feft_lp_handle_poped_task, + .handle_pushed_task = NULL, + .handle_idle_cycle = feft_lp_handle_idle_cycle, + .handle_idle_end = NULL, + .handle_post_exec_hook = NULL, + .handle_submitted_job = NULL, + .end_ctx = NULL, + .init_worker = NULL, + .custom = 0, + .name = "feft_lp" +}; + +#endif /* STARPU_HAVE_GLPK_H */ diff --git a/sc_hypervisor/src/hypervisor_policies/gflops_rate_policy.c b/sc_hypervisor/src/hypervisor_policies/gflops_rate_policy.c new file mode 100644 index 0000000..95f70c4 --- /dev/null +++ b/sc_hypervisor/src/hypervisor_policies/gflops_rate_policy.c @@ -0,0 +1,312 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "sc_hypervisor_policy.h" + +static double _get_total_elapsed_flops_per_sched_ctx(unsigned sched_ctx) +{ + struct sc_hypervisor_wrapper* sc_w = sc_hypervisor_get_wrapper(sched_ctx); + double ret_val = 0.0; + int i; + for(i = 0; i < STARPU_NMAXWORKERS; i++) + ret_val += sc_w->total_elapsed_flops[i]; + return ret_val; +} + +double _get_exp_end(unsigned sched_ctx) +{ + struct sc_hypervisor_wrapper *sc_w = sc_hypervisor_get_wrapper(sched_ctx); + double elapsed_flops = sc_hypervisor_get_elapsed_flops_per_sched_ctx(sc_w); + + if(elapsed_flops >= 1.0) + { + double curr_time = starpu_timing_now(); + double elapsed_time = curr_time - sc_w->start_time; + double exp_end = (elapsed_time * sc_w->remaining_flops / elapsed_flops) + curr_time; + return exp_end; + } + return -1.0; +} + +/* computes the instructions left to be executed out of the total instructions to execute */ +double _get_flops_left_pct(unsigned sched_ctx) +{ + struct sc_hypervisor_wrapper *wrapper = sc_hypervisor_get_wrapper(sched_ctx); + double total_elapsed_flops = _get_total_elapsed_flops_per_sched_ctx(sched_ctx); + if(wrapper->total_flops == total_elapsed_flops || total_elapsed_flops > wrapper->total_flops) + return 0.0; + + return (wrapper->total_flops - total_elapsed_flops)/wrapper->total_flops; +} + +/* select the workers needed to be moved in order to force the sender and the receiver context to finish simultaneously */ +static int* _get_workers_to_move(unsigned sender_sched_ctx, unsigned receiver_sched_ctx, int *nworkers) +{ + struct sc_hypervisor_wrapper* sender_sc_w = sc_hypervisor_get_wrapper(sender_sched_ctx); + struct sc_hypervisor_wrapper* receiver_sc_w = sc_hypervisor_get_wrapper(receiver_sched_ctx); + int *workers = NULL; + double v_receiver = sc_hypervisor_get_ctx_speed(receiver_sc_w); + double receiver_remainig_flops = receiver_sc_w->remaining_flops; + double sender_exp_end = _get_exp_end(sender_sched_ctx); + double sender_v_cpu = sc_hypervisor_get_speed_per_worker_type(sender_sc_w, STARPU_CPU_WORKER); + double v_for_rctx = (receiver_remainig_flops/(sender_exp_end - starpu_timing_now())) - v_receiver; + + int nworkers_needed = v_for_rctx/sender_v_cpu; +/* printf("%d->%d: v_rec %lf v %lf v_cpu %lf w_needed %d \n", sender_sched_ctx, receiver_sched_ctx, */ +/* v_receiver, v_for_rctx, sender_v_cpu, nworkers_needed); */ + if(nworkers_needed > 0) + { + struct sc_hypervisor_policy_config *sender_config = sc_hypervisor_get_config(sender_sched_ctx); + int potential_moving_cpus = sc_hypervisor_get_movable_nworkers(sender_config, sender_sched_ctx, STARPU_CPU_WORKER); + int potential_moving_gpus = sc_hypervisor_get_movable_nworkers(sender_config, sender_sched_ctx, STARPU_CUDA_WORKER); + int sender_nworkers = (int)starpu_sched_ctx_get_nworkers(sender_sched_ctx); + struct sc_hypervisor_policy_config *config = sc_hypervisor_get_config(receiver_sched_ctx); + int nworkers_ctx = (int)starpu_sched_ctx_get_nworkers(receiver_sched_ctx); + + if(nworkers_needed < (potential_moving_cpus + 5 * potential_moving_gpus)) + { + if((sender_nworkers - nworkers_needed) >= sender_config->min_nworkers) + { + if((nworkers_ctx + nworkers_needed) > config->max_nworkers) + nworkers_needed = nworkers_ctx > config->max_nworkers ? 0 : (config->max_nworkers - nworkers_ctx); + + if(nworkers_needed > 0) + { + int ngpus = nworkers_needed / 5; + int *gpus; + gpus = sc_hypervisor_get_idlest_workers(sender_sched_ctx, &ngpus, STARPU_CUDA_WORKER); + int ncpus = nworkers_needed - ngpus; + int *cpus; + cpus = sc_hypervisor_get_idlest_workers(sender_sched_ctx, &ncpus, STARPU_CPU_WORKER); + workers = (int*)malloc(nworkers_needed*sizeof(int)); + int i; + printf("%d: gpus: ", nworkers_needed); + for(i = 0; i < ngpus; i++) + { + workers[(*nworkers)++] = gpus[i]; + printf("%d ", gpus[i]); + } + printf(" cpus:"); + for(i = 0; i < ncpus; i++) + { + workers[(*nworkers)++] = cpus[i]; + printf("%d ", cpus[i]); + } + printf("\n"); + free(gpus); + free(cpus); + } + } + } + else + { + /*if the needed number of workers is to big we only move the number of workers + corresponding to the granularity set by the user */ + int nworkers_to_move = sc_hypervisor_compute_nworkers_to_move(sender_sched_ctx); + + if(sender_nworkers - nworkers_to_move >= sender_config->min_nworkers) + { + int nshared_workers = (int)starpu_sched_ctx_get_nshared_workers(sender_sched_ctx, receiver_sched_ctx); + if((nworkers_ctx + nworkers_to_move - nshared_workers) > config->max_nworkers) + nworkers_to_move = nworkers_ctx > config->max_nworkers ? 0 : (config->max_nworkers - nworkers_ctx + nshared_workers); + + if(nworkers_to_move > 0) + { + workers = sc_hypervisor_get_idlest_workers(sender_sched_ctx, &nworkers_to_move, STARPU_ANY_WORKER); + *nworkers = nworkers_to_move; + } + } + } + } + return workers; +} + +static unsigned _gflops_rate_resize(unsigned sender_sched_ctx, unsigned receiver_sched_ctx, unsigned force_resize) +{ + int ret = 1; + if(force_resize) + STARPU_PTHREAD_MUTEX_LOCK(&act_hypervisor_mutex); + else + ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex); + if(ret != EBUSY) + { + int nworkers_to_move = 0; + int *workers_to_move = _get_workers_to_move(sender_sched_ctx, receiver_sched_ctx, &nworkers_to_move); + if(nworkers_to_move > 0) + { + sc_hypervisor_move_workers(sender_sched_ctx, receiver_sched_ctx, workers_to_move, nworkers_to_move, 0); + + struct sc_hypervisor_policy_config *new_config = sc_hypervisor_get_config(receiver_sched_ctx); + int i; + for(i = 0; i < nworkers_to_move; i++) + new_config->max_idle[workers_to_move[i]] = new_config->max_idle[workers_to_move[i]] !=MAX_IDLE_TIME ? new_config->max_idle[workers_to_move[i]] : new_config->new_workers_max_idle; + + free(workers_to_move); + } + STARPU_PTHREAD_MUTEX_UNLOCK(&act_hypervisor_mutex); + return 1; + } + return 0; + +} + +static int _find_fastest_sched_ctx() +{ + unsigned *sched_ctxs = sc_hypervisor_get_sched_ctxs(); + int nsched_ctxs = sc_hypervisor_get_nsched_ctxs(); + + double first_exp_end = _get_exp_end(sched_ctxs[0]); + int fastest_sched_ctx = first_exp_end == -1.0 ? -1 : (int)sched_ctxs[0]; + double curr_exp_end = 0.0; + int i; + for(i = 1; i < nsched_ctxs; i++) + { + curr_exp_end = _get_exp_end(sched_ctxs[i]); + if((curr_exp_end < first_exp_end || first_exp_end == -1.0) && curr_exp_end != -1.0) + { + first_exp_end = curr_exp_end; + fastest_sched_ctx = sched_ctxs[i]; + } + } + + return fastest_sched_ctx; + +} + +static int _find_slowest_sched_ctx() +{ + unsigned *sched_ctxs = sc_hypervisor_get_sched_ctxs(); + int nsched_ctxs = sc_hypervisor_get_nsched_ctxs(); + + int slowest_sched_ctx = -1; + double curr_exp_end = 0.0; + double last_exp_end = -1.0; + int i; + for(i = 0; i < nsched_ctxs; i++) + { + curr_exp_end = _get_exp_end(sched_ctxs[i]); + /*if it hasn't started bc of no resources give it priority */ + if(curr_exp_end == -1.0) + return sched_ctxs[i]; + if(curr_exp_end > last_exp_end) + { + slowest_sched_ctx = sched_ctxs[i]; + last_exp_end = curr_exp_end; + } + } + + return slowest_sched_ctx; + +} + +static int _find_slowest_available_sched_ctx(unsigned sched_ctx) +{ + unsigned *sched_ctxs = sc_hypervisor_get_sched_ctxs(); + int nsched_ctxs = sc_hypervisor_get_nsched_ctxs(); + + int slowest_sched_ctx = -1; + double curr_exp_end = 0.0; + double last_exp_end = -1.0; + int i; + for(i = 0; i < nsched_ctxs; i++) + { + if(sched_ctxs[i] != sched_ctx) + { + curr_exp_end = _get_exp_end(sched_ctxs[i]); + /*if it hasn't started bc of no resources give it priority */ + if(curr_exp_end == -1.0) + return sched_ctxs[i]; + if(last_exp_end < curr_exp_end) + { + slowest_sched_ctx = sched_ctxs[i]; + last_exp_end = curr_exp_end; + } + } + } + + return slowest_sched_ctx; + +} + +static void gflops_rate_resize(unsigned sched_ctx) +{ + _get_exp_end(sched_ctx); + double flops_left_pct = _get_flops_left_pct(sched_ctx); + + /* if the context finished all the instructions it had to execute + we move all the resources to the slowest context */ + if(flops_left_pct == 0.0f) + { + int slowest_sched_ctx = _find_slowest_available_sched_ctx(sched_ctx); + if(slowest_sched_ctx != -1) + { + double slowest_flops_left_pct = _get_flops_left_pct(slowest_sched_ctx); + if(slowest_flops_left_pct != 0.0f) + { + struct sc_hypervisor_policy_config* config = sc_hypervisor_get_config(sched_ctx); + config->min_nworkers = 0; + config->max_nworkers = 0; + printf("ctx %u finished & gives away the res to %d; slow_left %lf\n", sched_ctx, slowest_sched_ctx, slowest_flops_left_pct); + sc_hypervisor_policy_resize(sched_ctx, slowest_sched_ctx, 1, 1); + sc_hypervisor_stop_resize(slowest_sched_ctx); + } + } + } + + int fastest_sched_ctx = _find_fastest_sched_ctx(); + int slowest_sched_ctx = _find_slowest_sched_ctx(); + + if(fastest_sched_ctx != -1 && slowest_sched_ctx != -1 && fastest_sched_ctx != slowest_sched_ctx) + { + double fastest_exp_end = _get_exp_end(fastest_sched_ctx); + double slowest_exp_end = _get_exp_end(slowest_sched_ctx); + + if((slowest_exp_end == -1.0 && fastest_exp_end != -1.0) || ((fastest_exp_end + (fastest_exp_end*0.5)) < slowest_exp_end)) + { + double fast_flops_left_pct = _get_flops_left_pct(fastest_sched_ctx); + if(fast_flops_left_pct < 0.8) + { + + struct sc_hypervisor_wrapper *sc_w = sc_hypervisor_get_wrapper(slowest_sched_ctx); + double elapsed_flops = sc_hypervisor_get_elapsed_flops_per_sched_ctx(sc_w); + if((elapsed_flops/sc_w->total_flops) > 0.1) + _gflops_rate_resize(fastest_sched_ctx, slowest_sched_ctx, 0); + } + } + } +} + +static void gflops_rate_handle_poped_task(unsigned sched_ctx, __attribute__((unused)) int worker, + __attribute__((unused))struct starpu_task *task, __attribute__((unused))uint32_t footprint) +{ + gflops_rate_resize(sched_ctx); +} + +struct sc_hypervisor_policy gflops_rate_policy = +{ + .size_ctxs = NULL, + .resize_ctxs = NULL, + .handle_poped_task = gflops_rate_handle_poped_task, + .handle_pushed_task = NULL, + .handle_idle_cycle = NULL, + .handle_idle_end = NULL, + .handle_post_exec_hook = NULL, + .handle_submitted_job = NULL, + .end_ctx = NULL, + .init_worker = NULL, + .custom = 0, + .name = "gflops_rate" +}; diff --git a/sc_hypervisor/src/hypervisor_policies/hard_coded_policy.c b/sc_hypervisor/src/hypervisor_policies/hard_coded_policy.c new file mode 100644 index 0000000..943f62a --- /dev/null +++ b/sc_hypervisor/src/hypervisor_policies/hard_coded_policy.c @@ -0,0 +1,122 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "sc_hypervisor_policy.h" +#include "sc_hypervisor_lp.h" +#include "sc_hypervisor_policy.h" + +unsigned hard_coded_worker_belong_to_other_sched_ctx(unsigned sched_ctx, int worker) +{ + unsigned *sched_ctxs = sc_hypervisor_get_sched_ctxs(); + int nsched_ctxs = sc_hypervisor_get_nsched_ctxs(); + + int i; + for(i = 0; i < nsched_ctxs; i++) + if(sched_ctxs[i] != sched_ctx && starpu_sched_ctx_contains_worker(worker, sched_ctxs[i])) + return 1; + return 0; +} + +void hard_coded_handle_idle_cycle(unsigned sched_ctx, int worker) +{ + unsigned criteria = sc_hypervisor_get_resize_criteria(); + if(criteria != SC_NOTHING)// && criteria == SC_SPEED) + { + + int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex); + if(ret != EBUSY) + { +// if(sc_hypervisor_criteria_fulfilled(sched_ctx, worker)) +// if(sc_hypervisor_check_speed_gap_btw_ctxs(NULL, -1, NULL, -1)) + if(sc_hypervisor_check_idle(sched_ctx, worker)) + { + if(hard_coded_worker_belong_to_other_sched_ctx(sched_ctx, worker)) + sc_hypervisor_remove_workers_from_sched_ctx(&worker, 1, sched_ctx, 1); + else + { + // sc_hypervisor_policy_resize_to_unknown_receiver(sched_ctx, 0); + unsigned *sched_ctxs = sc_hypervisor_get_sched_ctxs(); + int ns = sc_hypervisor_get_nsched_ctxs(); + + + int nworkers = (int)starpu_worker_get_count(); + struct types_of_workers *tw = sc_hypervisor_get_types_of_workers(NULL, nworkers); + int nw = tw->nw; + double w_in_s[ns][nw]; + w_in_s[0][0] = 1; + w_in_s[0][1] = 3; + + w_in_s[1][0] = 8; + w_in_s[1][1] = 0; + +// sc_hypervisor_lp_place_resources_in_ctx(ns, nw, w_in_s, sched_ctxs, NULL, 1, tw); + sc_hypervisor_lp_distribute_floating_no_resources_in_ctxs(sched_ctxs, ns, tw->nw, w_in_s, NULL, nworkers, tw); + free(tw); + } + } + STARPU_PTHREAD_MUTEX_UNLOCK(&act_hypervisor_mutex); + } + } +} +static void hard_coded_handle_poped_task(unsigned sched_ctx, __attribute__((unused))int worker, struct starpu_task *task, uint32_t footprint) +{ + (void)task; + (void)footprint; + unsigned criteria = sc_hypervisor_get_resize_criteria(); + if(criteria != SC_NOTHING && criteria == SC_SPEED) + { + + int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex); + if(ret != EBUSY) + { + + if(sc_hypervisor_criteria_fulfilled(sched_ctx, worker)) + { + // sc_hypervisor_policy_resize_to_unknown_receiver(sched_ctx, 0); + unsigned *sched_ctxs = sc_hypervisor_get_sched_ctxs(); + int ns = sc_hypervisor_get_nsched_ctxs(); + + int nworkers = (int)starpu_worker_get_count(); + struct types_of_workers *tw = sc_hypervisor_get_types_of_workers(NULL, nworkers); + int nw = tw->nw; + double w_in_s[ns][nw]; + w_in_s[0][0] = 1; + w_in_s[0][1] = 3; + + w_in_s[1][0] = 8; + w_in_s[1][1] = 0; +// sc_hypervisor_lp_place_resources_in_ctx(ns, nw, w_in_s, sched_ctxs, NULL, 1, tw); + sc_hypervisor_lp_distribute_floating_no_resources_in_ctxs(sched_ctxs, ns, tw->nw, w_in_s, NULL, nworkers, tw); + free(tw); + } + STARPU_PTHREAD_MUTEX_UNLOCK(&act_hypervisor_mutex); + } + } +} +struct sc_hypervisor_policy hard_coded_policy = +{ + .size_ctxs = NULL, + .handle_poped_task = hard_coded_handle_poped_task, + .handle_pushed_task = NULL, + .handle_idle_cycle = hard_coded_handle_idle_cycle, + .handle_idle_end = NULL, + .handle_post_exec_hook = NULL, + .handle_submitted_job = NULL, + .end_ctx = NULL, + .init_worker = NULL, + .custom = 0, + .name = "hard_coded" +}; diff --git a/sc_hypervisor/src/hypervisor_policies/idle_policy.c b/sc_hypervisor/src/hypervisor_policies/idle_policy.c new file mode 100644 index 0000000..d3049b5 --- /dev/null +++ b/sc_hypervisor/src/hypervisor_policies/idle_policy.c @@ -0,0 +1,55 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "sc_hypervisor_policy.h" + +unsigned worker_belong_to_other_sched_ctx(unsigned sched_ctx, int worker) +{ + unsigned *sched_ctxs = sc_hypervisor_get_sched_ctxs(); + int nsched_ctxs = sc_hypervisor_get_nsched_ctxs(); + + int i; + for(i = 0; i < nsched_ctxs; i++) + if(sched_ctxs[i] != sched_ctx && starpu_sched_ctx_contains_worker(worker, sched_ctxs[i])) + return 1; + return 0; +} + +void idle_handle_idle_cycle(unsigned sched_ctx, int worker) +{ + if(sc_hypervisor_criteria_fulfilled(sched_ctx, worker)) + { + if(worker_belong_to_other_sched_ctx(sched_ctx, worker)) + sc_hypervisor_remove_workers_from_sched_ctx(&worker, 1, sched_ctx, 1); + else + sc_hypervisor_policy_resize_to_unknown_receiver(sched_ctx, 0); + } +} + +struct sc_hypervisor_policy idle_policy = +{ + .size_ctxs = NULL, + .handle_poped_task = NULL, + .handle_pushed_task = NULL, + .handle_idle_cycle = idle_handle_idle_cycle, + .handle_idle_end = NULL, + .handle_post_exec_hook = NULL, + .handle_submitted_job = NULL, + .end_ctx = NULL, + .init_worker = NULL, + .custom = 0, + .name = "idle" +}; diff --git a/sc_hypervisor/src/hypervisor_policies/ispeed_lp_policy.c b/sc_hypervisor/src/hypervisor_policies/ispeed_lp_policy.c new file mode 100644 index 0000000..2c925e2 --- /dev/null +++ b/sc_hypervisor/src/hypervisor_policies/ispeed_lp_policy.c @@ -0,0 +1,258 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "sc_hypervisor_lp.h" +#include "sc_hypervisor_policy.h" +#include +#include + +struct ispeed_lp_data +{ + double **speed; + double *flops; + double **flops_on_w; + int *workers; +}; + +#ifdef STARPU_HAVE_GLPK_H +static double _compute_workers_distrib(int ns, int nw, double final_w_in_s[ns][nw], + unsigned is_integer, double tmax, void *specific_data) +{ + struct ispeed_lp_data *sd = (struct ispeed_lp_data *)specific_data; + + double **speed = sd->speed; + double *flops = sd->flops; + + double **final_flops_on_w = sd->flops_on_w; + + return sc_hypervisor_lp_simulate_distrib_flops_on_sample(ns, nw, final_w_in_s, is_integer, tmax, speed, flops, final_flops_on_w); +} + +static unsigned _compute_flops_distribution_over_ctxs(int ns, int nw, double w_in_s[ns][nw], double **flops_on_w, unsigned *sched_ctxs, int *workers) +{ + double *flops = (double*)malloc(ns*sizeof(double)); + double **speed = (double **)malloc(ns*sizeof(double*)); + int i; + for(i = 0; i < ns; i++) + speed[i] = (double*)malloc(nw*sizeof(double)); + + int w,s; + + struct sc_hypervisor_wrapper* sc_w = NULL; + for(s = 0; s < ns; s++) + { + sc_w = sc_hypervisor_get_wrapper(sched_ctxs[s]); + for(w = 0; w < nw; w++) + { + w_in_s[s][w] = 0.0; + int worker = workers == NULL ? w : workers[w]; + + speed[s][w] = sc_hypervisor_get_speed_per_worker(sc_w, worker); + if(speed[s][w] == -1.0) + { + enum starpu_worker_archtype arch = starpu_worker_get_type(worker); + speed[s][w] = sc_hypervisor_get_speed(sc_w, arch); + if(arch == STARPU_CUDA_WORKER) + { + unsigned worker_in_ctx = starpu_sched_ctx_contains_worker(worker, sc_w->sched_ctx); + if(!worker_in_ctx) + { + double transfer_speed = starpu_transfer_bandwidth(STARPU_MAIN_RAM, starpu_worker_get_memory_node(worker)) / 1000; + speed[s][w] = (speed[s][w] * transfer_speed) / (speed[s][w] + transfer_speed); + } + } + + } + +// printf("v[w%d][s%d] = %lf\n",w, s, speed[s][w]); + } + struct sc_hypervisor_policy_config *config = sc_hypervisor_get_config(sched_ctxs[s]); + flops[s] = config->ispeed_ctx_sample/1000000000; /* in gflops */ + } + + /* take the exec time of the slowest ctx + as starting point and then try to minimize it + as increasing it a little for the faster ctxs */ + double tmax = sc_hypervisor_get_slowest_ctx_exec_time(); + double smallest_tmax = sc_hypervisor_get_fastest_ctx_exec_time(); //tmax - 0.5*tmax; +// printf("tmax %lf smallest %lf\n", tmax, smallest_tmax); + double tmin = 0.0; + + struct ispeed_lp_data specific_data; + specific_data.speed = speed; + specific_data.flops = flops; + specific_data.flops_on_w = flops_on_w; + specific_data.workers = workers; + + unsigned found_sol = sc_hypervisor_lp_execute_dichotomy(ns, nw, w_in_s, 1, (void*)&specific_data, + tmin, tmax, smallest_tmax, _compute_workers_distrib); + + for(i = 0; i < ns; i++) + free(speed[i]); + free(speed); + + return found_sol; +} + +static void _try_resizing(unsigned *sched_ctxs, int nsched_ctxs , int *workers, int nworkers) +{ + int ns = sched_ctxs == NULL ? sc_hypervisor_get_nsched_ctxs() : nsched_ctxs; + int nw = nworkers == -1 ? (int)starpu_worker_get_count() : nworkers; /* Number of different workers */ + unsigned *curr_sched_ctxs = sched_ctxs == NULL ? sc_hypervisor_get_sched_ctxs() : sched_ctxs; + + struct types_of_workers *tw = sc_hypervisor_get_types_of_workers(workers, nw); + int ntypes_of_workers = tw->nw; + + + double w_in_s[ns][nw]; + + double **flops_on_w = (double**)malloc(ns*sizeof(double*)); + int i; + for(i = 0; i < ns; i++) + flops_on_w[i] = (double*)malloc(nw*sizeof(double)); + + struct timeval start_time; + struct timeval end_time; + gettimeofday(&start_time, NULL); + unsigned found_sol = _compute_flops_distribution_over_ctxs(ns, nw, w_in_s, flops_on_w, curr_sched_ctxs, workers); + gettimeofday(&end_time, NULL); + + long diff_s = end_time.tv_sec - start_time.tv_sec; + long diff_us = end_time.tv_usec - start_time.tv_usec; + + __attribute__((unused)) float timing = (float)(diff_s*1000000 + diff_us)/1000.0; + + /* if we did find at least one solution redistribute the resources */ + if(found_sol) + { + int w, s; + double nworkers_per_ctx[ns][ntypes_of_workers]; + int nworkers_per_ctx_rounded[ns][ntypes_of_workers]; + for(s = 0; s < ns; s++) + { + for(w = 0; w < ntypes_of_workers; w++) + { + nworkers_per_ctx[s][w] = 0.0; + nworkers_per_ctx_rounded[s][w] = 0; + } + } + + for(s = 0; s < ns; s++) + { + for(w = 0; w < nw; w++) + { + enum starpu_worker_archtype arch = starpu_worker_get_type(w); + + int idx = sc_hypervisor_get_index_for_arch(arch, tw); + nworkers_per_ctx[s][idx] += w_in_s[s][w]; + if(arch == STARPU_CUDA_WORKER) + { + if(w_in_s[s][w] >= 0.3) + nworkers_per_ctx_rounded[s][idx]++; + } + else + { + if(w_in_s[s][w] > 0.5) + nworkers_per_ctx_rounded[s][idx]++; + } + } + } +/* for(s = 0; s < ns; s++) */ +/* printf("%d: cpus = %lf gpus = %lf cpus_round = %d gpus_round = %d\n", s, nworkers[s][1], nworkers[s][0], */ +/* nworkers_rounded[s][1], nworkers_rounded[s][0]); */ + + sc_hypervisor_lp_redistribute_resources_in_ctxs(ns, ntypes_of_workers, nworkers_per_ctx_rounded, nworkers_per_ctx, curr_sched_ctxs, tw); + } + free(tw); + for(i = 0; i < ns; i++) + free(flops_on_w[i]); + free(flops_on_w); +} + +static void ispeed_lp_handle_poped_task(__attribute__((unused))unsigned sched_ctx, __attribute__((unused))int worker, + __attribute__((unused))struct starpu_task *task, __attribute__((unused))uint32_t footprint) +{ + int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex); + if(ret != EBUSY) + { + unsigned criteria = sc_hypervisor_get_resize_criteria(); + if(criteria != SC_NOTHING && criteria == SC_SPEED) + { + if(sc_hypervisor_check_speed_gap_btw_ctxs(NULL, -1, NULL, -1)) + { + _try_resizing(NULL, -1, NULL, -1); + } + } + STARPU_PTHREAD_MUTEX_UNLOCK(&act_hypervisor_mutex); + } +} + +static void ispeed_lp_handle_idle_cycle(unsigned sched_ctx, int worker) +{ + int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex); + if(ret != EBUSY) + { + unsigned criteria = sc_hypervisor_get_resize_criteria(); + if(criteria != SC_NOTHING && criteria == SC_IDLE) + { + + if(sc_hypervisor_check_idle(sched_ctx, worker)) + { + _try_resizing(NULL, -1, NULL, -1); + } + } + STARPU_PTHREAD_MUTEX_UNLOCK(&act_hypervisor_mutex); + } +} + +static void ispeed_lp_resize_ctxs(unsigned *sched_ctxs, int nsched_ctxs , int *workers, int nworkers) +{ + int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex); + if(ret != EBUSY) + { + _try_resizing(sched_ctxs, nsched_ctxs, workers, nworkers); + STARPU_PTHREAD_MUTEX_UNLOCK(&act_hypervisor_mutex); + } +} + +static void ispeed_lp_end_ctx(__attribute__((unused))unsigned sched_ctx) +{ +/* struct sc_hypervisor_wrapper* sc_w = sc_hypervisor_get_wrapper(sched_ctx); */ +/* int worker; */ +/* for(worker = 0; worker < 12; worker++) */ +/* printf("%d/%d: speed %lf\n", worker, sched_ctx, sc_w->ref_speed[worker]); */ + + return; +} + +struct sc_hypervisor_policy ispeed_lp_policy = +{ + .size_ctxs = NULL, + .resize_ctxs = ispeed_lp_resize_ctxs, + .handle_poped_task = ispeed_lp_handle_poped_task, + .handle_pushed_task = NULL, + .handle_idle_cycle = ispeed_lp_handle_idle_cycle, + .handle_idle_end = NULL, + .handle_post_exec_hook = NULL, + .handle_submitted_job = NULL, + .end_ctx = ispeed_lp_end_ctx, + .init_worker = NULL, + .custom = 0, + .name = "ispeed_lp" +}; + +#endif /* STARPU_HAVE_GLPK_H */ diff --git a/sc_hypervisor/src/hypervisor_policies/ispeed_policy.c b/sc_hypervisor/src/hypervisor_policies/ispeed_policy.c new file mode 100644 index 0000000..6a30732 --- /dev/null +++ b/sc_hypervisor/src/hypervisor_policies/ispeed_policy.c @@ -0,0 +1,195 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "sc_hypervisor_policy.h" + +static unsigned _get_fastest_sched_ctx(void) +{ + unsigned *sched_ctxs = sc_hypervisor_get_sched_ctxs(); + int nsched_ctxs = sc_hypervisor_get_nsched_ctxs(); + + int fastest_sched_ctx = STARPU_NMAX_SCHED_CTXS; + double curr_speed = 0.0; + double biggest_speed = 0.0; + int i; + for(i = 0; i < nsched_ctxs; i++) + { + curr_speed = sc_hypervisor_get_ctx_speed(sc_hypervisor_get_wrapper(sched_ctxs[i])); + if(curr_speed > biggest_speed) + { + fastest_sched_ctx = sched_ctxs[i]; + biggest_speed = curr_speed; + } + } + + return fastest_sched_ctx; +} + +static unsigned _get_slowest_sched_ctx(void) +{ + unsigned *sched_ctxs = sc_hypervisor_get_sched_ctxs(); + int nsched_ctxs = sc_hypervisor_get_nsched_ctxs(); + + double smallest_speed = sc_hypervisor_get_ctx_speed(sc_hypervisor_get_wrapper(sched_ctxs[0])); + unsigned slowest_sched_ctx = smallest_speed == -1.0 ? STARPU_NMAX_SCHED_CTXS : sched_ctxs[0]; + double curr_speed = 0.0; + int i; + for(i = 1; i < nsched_ctxs; i++) + { + curr_speed = sc_hypervisor_get_ctx_speed(sc_hypervisor_get_wrapper(sched_ctxs[i])); + if((curr_speed < smallest_speed || smallest_speed == 0.0) && curr_speed != -1.0) + { + smallest_speed = curr_speed; + slowest_sched_ctx = sched_ctxs[i]; + } + } + + return slowest_sched_ctx; +} + + +/* get first nworkers with the highest idle time in the context */ +static int* _get_slowest_workers(unsigned sched_ctx, int *nworkers, enum starpu_worker_archtype arch) +{ + struct sc_hypervisor_wrapper* sc_w = sc_hypervisor_get_wrapper(sched_ctx); + struct sc_hypervisor_policy_config *config = sc_hypervisor_get_config(sched_ctx); + + int *curr_workers = (int*)malloc((*nworkers) * sizeof(int)); + int i; + for(i = 0; i < *nworkers; i++) + curr_workers[i] = -1; + + struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx); + int index; + int worker; + int considered = 0; + + struct starpu_sched_ctx_iterator it; + + workers->init_iterator(workers, &it); + for(index = 0; index < *nworkers; index++) + { + while(workers->has_next(workers, &it)) + { + considered = 0; + worker = workers->get_next(workers, &it); + enum starpu_worker_archtype curr_arch = starpu_worker_get_type(worker); + if(arch == STARPU_ANY_WORKER || curr_arch == arch) + { + + if(!config->fixed_workers[worker]) + { + for(i = 0; i < index; i++) + { + if(curr_workers[i] == worker) + { + considered = 1; + break; + } + } + + if(!considered) + { + double worker_speed = sc_hypervisor_get_speed_per_worker(sc_w, worker); + if(worker_speed != -1.0) + { + /* the first iteration*/ + if(curr_workers[index] < 0) + curr_workers[index] = worker; + /* small priority worker is the first to leave the ctx*/ + else if(config->priority[worker] < + config->priority[curr_workers[index]]) + curr_workers[index] = worker; + /* if we don't consider priorities check for the workers + with the biggest idle time */ + else if(config->priority[worker] == + config->priority[curr_workers[index]]) + { + double curr_worker_speed = sc_hypervisor_get_speed_per_worker(sc_w, curr_workers[index]); +// printf("speed[%d] = %lf speed[%d] = %lf\n", worker, worker_speed, curr_workers[index], curr_worker_speed); + if(worker_speed < curr_worker_speed && curr_worker_speed != -1.0) + { + curr_workers[index] = worker; + } + } + } + } + } + } + } + + if(curr_workers[index] < 0) + { + *nworkers = index; + break; + } + } + return curr_workers; +} + +static void ispeed_handle_poped_task(unsigned sched_ctx, int worker, __attribute__((unused))struct starpu_task *task, __attribute__((unused))uint32_t footprint) +{ + int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex); + if(ret != EBUSY) + { + if(sc_hypervisor_criteria_fulfilled(sched_ctx, worker)) + { + unsigned fastest_sched_ctx = _get_fastest_sched_ctx(); + unsigned slowest_sched_ctx = _get_slowest_sched_ctx(); + if(fastest_sched_ctx != STARPU_NMAX_SCHED_CTXS && slowest_sched_ctx != STARPU_NMAX_SCHED_CTXS && fastest_sched_ctx != slowest_sched_ctx) + { + int nworkers_to_move = sc_hypervisor_compute_nworkers_to_move(fastest_sched_ctx); + if(nworkers_to_move > 0) + { + int *workers_to_move = _get_slowest_workers(fastest_sched_ctx, &nworkers_to_move, STARPU_ANY_WORKER); + if(nworkers_to_move > 0) + { + double new_speed = 0.0; + int i; + for(i = 0; i < nworkers_to_move; i++) + new_speed += sc_hypervisor_get_speed_per_worker(sc_hypervisor_get_wrapper(fastest_sched_ctx), workers_to_move[i]); + double fastest_speed = sc_hypervisor_get_ctx_speed(sc_hypervisor_get_wrapper(fastest_sched_ctx)); + double slowest_speed = sc_hypervisor_get_ctx_speed(sc_hypervisor_get_wrapper(slowest_sched_ctx)); +// printf("fast_speed(%d) %lf slow_speed(%d) %lf new speed(%d) %lf \n", fastest_sched_ctx, fastest_speed, slowest_sched_ctx, +// slowest_speed, workers_to_move[0], new_speed); + if(fastest_speed != -1.0 && slowest_speed != -1.0 && (slowest_speed + new_speed) <= (fastest_speed - new_speed)) + { + sc_hypervisor_move_workers(fastest_sched_ctx, slowest_sched_ctx, workers_to_move, nworkers_to_move, 0); + } + } + + free(workers_to_move); + } + + } + } + STARPU_PTHREAD_MUTEX_UNLOCK(&act_hypervisor_mutex); + } +} + +struct sc_hypervisor_policy ispeed_policy = { + .size_ctxs = NULL, + .handle_poped_task = ispeed_handle_poped_task, + .handle_pushed_task = NULL, + .handle_idle_cycle = NULL, + .handle_idle_end = NULL, + .handle_post_exec_hook = NULL, + .handle_submitted_job = NULL, + .end_ctx = NULL, + .init_worker = NULL, + .custom = 0, + .name = "ispeed" +}; diff --git a/sc_hypervisor/src/hypervisor_policies/perf_count_policy.c b/sc_hypervisor/src/hypervisor_policies/perf_count_policy.c new file mode 100644 index 0000000..b2bc8f8 --- /dev/null +++ b/sc_hypervisor/src/hypervisor_policies/perf_count_policy.c @@ -0,0 +1,371 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "sc_hypervisor_policy.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct perf_event_attr pe_instr[STARPU_NMAXWORKERS]; +/* struct perf_event_attr pe_cycles[STARPU_NMAXWORKERS]; */ +/* struct perf_event_attr pe_cache_misses[STARPU_NMAXWORKERS]; */ +/* struct perf_event_attr pe_cache_refs[STARPU_NMAXWORKERS]; */ +/* struct perf_event_attr pe_branch_instr[STARPU_NMAXWORKERS]; */ +struct perf_event_attr pe_fps[STARPU_NMAXWORKERS]; + +int fd_instr[STARPU_NMAXWORKERS]; +/* int fd_cycles[STARPU_NMAXWORKERS]; */ +/* int fd_cache_misses[STARPU_NMAXWORKERS]; */ +/* int fd_cache_refs[STARPU_NMAXWORKERS]; */ +/* int fd_branch_instr[STARPU_NMAXWORKERS]; */ +int fd_fps[STARPU_NMAXWORKERS]; +unsigned perf_event_opened[STARPU_NMAXWORKERS]; + +long long total_instr[STARPU_NMAX_SCHED_CTXS]; +/* long long total_cycles[STARPU_NMAX_SCHED_CTXS]; */ +/* long long total_time[STARPU_NMAX_SCHED_CTXS]; */ + +/* long long total_cache_misses[STARPU_NMAX_SCHED_CTXS]; */ +/* long long total_cache_refs[STARPU_NMAX_SCHED_CTXS]; */ + +/* long long total_branch_instr[STARPU_NMAX_SCHED_CTXS]; */ +long long total_fps[STARPU_NMAX_SCHED_CTXS]; + +struct read_format +{ + uint64_t value; /* The value of the event */ + uint64_t time_enabled; /* if PERF_FORMAT_TOTAL_TIME_ENABLED */ + uint64_t time_running; /* if PERF_FORMAT_TOTAL_TIME_RUNNING */ + uint64_t id; /* if PERF_FORMAT_ID */ +}; + +static long perf_event_open(struct perf_event_attr *attr, pid_t pid, int cpu, + int group_fd, unsigned long flags) +{ + int ret = syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags); + return ret; +} + +void print_results_for_worker(int workerid, unsigned sched_ctx, struct starpu_task *task) +{ + ssize_t rread; + long long instr, /*cycles, cache_misses, cache_refs, branch_instr,*/ fps; + rread = read(fd_instr[workerid], &instr, sizeof(instr)); + assert(rread == sizeof(instr)); + /* read(fd_cycles[workerid], &cycles, sizeof(long long)); */ + /* read(fd_cache_misses[workerid], &cache_misses, sizeof(long long)); */ + /* read(fd_cache_refs[workerid], &cache_refs, sizeof(long long)); */ + /* read(fd_branch_instr[workerid], &branch_instr, sizeof(long long)); */ + rread = read(fd_fps[workerid], &fps, sizeof(long long)); + assert(rread == sizeof(long long)); + + total_instr[sched_ctx] += instr; + /* total_cycles[sched_ctx] += cycles; */ + /* total_cache_misses[sched_ctx] += cache_misses; */ + /* total_cache_refs[sched_ctx] += cache_refs; */ + /* total_branch_instr[sched_ctx] += branch_instr; */ + total_fps[sched_ctx] += fps; + + printf("Instrs %lf M instr of worker %lf M\n", (double)total_instr[sched_ctx]/1000000, + (double)instr/1000000); + printf("Fps %lf M curr fps %lf M \n", (double)total_fps[sched_ctx]/1000000, + (double)fps/1000000); + + printf("Task Flops %lf k %s \n", task->flops/1000, (task->cl && task->cl->model) ? task->cl->model->symbol : "task null"); + printf("-------------------------------------------\n"); + +} + +void print_results_for_ctx(unsigned sched_ctx, struct starpu_task *task) +{ + long long curr_total_instr = 0; + /* long long curr_total_cycles = 0; */ + /* long long curr_total_cache_misses = 0; */ + /* long long curr_total_cache_refs = 0; */ + /* long long curr_total_branch_instr = 0; */ + long long curr_total_fps = 0; + + struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx); + + struct starpu_sched_ctx_iterator it; + + int workerid; + workers->init_iterator(workers, &it); + while(workers->has_next(workers, &it)) + { + ssize_t rread; + + workerid = workers->get_next(workers, &it); + // Read event counter value + struct read_format instr, /*cycles, cache_misses, cache_refs, branch_instr,*/ fps; + rread = read(fd_instr[workerid], &instr, sizeof(struct read_format)); + assert(rread==sizeof(struct read_format)); + /* read(fd_cycles[workerid], &cycles, sizeof(long long)); */ + /* read(fd_cache_misses[workerid], &cache_misses, sizeof(long long)); */ + /* read(fd_cache_refs[workerid], &cache_refs, sizeof(long long)); */ + /* read(fd_branch_instr[workerid], &branch_instr, sizeof(long long)); */ + rread = read(fd_fps[workerid], &fps, sizeof(struct read_format)); + assert(rread == sizeof(struct read_format)); + + curr_total_instr += (instr.time_enabled != 0 && instr.time_running !=0) ? instr.value * instr.time_enabled/instr.time_running : instr.value; + printf("w%d instr time enabled %"PRIu64" time running %"PRIu64" \n", workerid, instr.time_enabled, instr.time_running); + + /* curr_total_cycles += cycles; */ + /* curr_total_cache_misses += cache_misses; */ + /* curr_total_cache_refs += cache_refs; */ + /* curr_total_branch_instr += branch_instr; */ + curr_total_fps += (fps.time_enabled != 0 && fps.time_running !=0) ? fps.value * fps.time_enabled/fps.time_running : fps.value; + printf("w%d fps time enabled %lu time running %lu \n", workerid, fps.time_enabled, fps.time_running); + } + + total_instr[sched_ctx] += curr_total_instr; + /* total_cycles[sched_ctx] += curr_total_cycles; */ + /* total_cache_misses[sched_ctx] += curr_total_cache_misses; */ + /* total_cache_refs[sched_ctx] += curr_total_cache_refs; */ + /* total_branch_instr[sched_ctx] += curr_total_branch_instr; */ + total_fps[sched_ctx] += curr_total_fps; + + printf("%u: Instrs %lf k curr instr %lf k\n", sched_ctx, (double)total_instr[sched_ctx]/1000, + (double)curr_total_instr/1000); + printf("%u: Fps %lf k curr fps %lf k\n", sched_ctx, + (double)total_fps[sched_ctx]/1000, + (double)curr_total_fps/1000); + + printf("%u: Task Flops %lf k %s \n", sched_ctx, task->flops/1000, (task->cl && task->cl->model) ? task->cl->model->symbol : "task null"); + printf("-------------------------------------------\n"); +} + +void config_event(struct perf_event_attr *event, unsigned with_time, uint64_t event_type, uint64_t config_type) +{ + memset(event, 0, sizeof(struct perf_event_attr)); + event->type = event_type; + event->size = sizeof(struct perf_event_attr); + event->config = config_type; + event->disabled = 1; // Event is initially disabled + event->exclude_kernel = 1; // excluding events that happen in the kernel space + if(with_time) + { + /* if the PMU is multiplexing several events we measure the time spent to actually measure this event (time_running) + and compare it to the one expected is did, thus we compute the precision of the counter*/ + event->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED|PERF_FORMAT_TOTAL_TIME_RUNNING; + } + +} + +void open_event(int *file_desc, struct perf_event_attr *event, int group_fd) +{ + *file_desc = perf_event_open(event, 0, -1, group_fd, 0); + if (*file_desc == -1) { + fprintf(stderr, "Error opening leader %llx\n", event->config); + perror("perf_event_open"); + exit(0); + } + +} +void config_all_events_for_worker(int workerid) +{ + config_event(&pe_instr[workerid], 1, PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS); + /* config_event(&pe_cycles[workerid], 0, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES); */ + /* config_event(&pe_cache_misses[workerid], 0, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_MISSES); */ + /* config_event(&pe_cache_refs[workerid], 0, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_REFERENCES); */ + /* config_event(&pe_branch_instr[workerid], 0, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_INSTRUCTIONS); */ + config_event(&pe_fps[workerid], 1, PERF_TYPE_RAW, 0x1010); +} + +void open_all_events_for_worker(int curr_workerid) +{ + open_event(&fd_instr[curr_workerid], &pe_instr[curr_workerid], -1); + /* open_event(&fd_cycles[curr_workerid], &pe_cycles[curr_workerid], fd_instr[curr_workerid]); */ + /* open_event(&fd_cache_misses[curr_workerid], &pe_cache_misses[curr_workerid], fd_instr[curr_workerid]); */ + /* open_event(&fd_cache_refs[curr_workerid], &pe_cache_refs[curr_workerid], fd_instr[curr_workerid]); */ + /* open_event(&fd_branch_instr[curr_workerid], &pe_branch_instr[curr_workerid], fd_instr[curr_workerid]); */ + open_event(&fd_fps[curr_workerid], &pe_fps[curr_workerid], fd_instr[curr_workerid]); +} + +void close_all_events_for_worker(int curr_workerid) +{ + close(fd_instr[curr_workerid]); + /* close(fd_cycles[curr_workerid]); */ + /* close(fd_cache_misses[curr_workerid]); */ + /* close(fd_cache_refs[curr_workerid]); */ + /* close(fd_branch_instr[curr_workerid]); */ + close(fd_fps[curr_workerid]); +} + +void start_monitoring_all_events_for_worker(int workerid) +{ + ioctl(fd_instr[workerid], PERF_EVENT_IOC_RESET, 0); + ioctl(fd_instr[workerid], PERF_EVENT_IOC_ENABLE, 0); + + /* ioctl(fd_cycles[workerid], PERF_EVENT_IOC_RESET, 0); */ + /* ioctl(fd_cycles[workerid], PERF_EVENT_IOC_ENABLE, 0); */ + + /* ioctl(fd_cache_misses[workerid], PERF_EVENT_IOC_RESET, 0); */ + /* ioctl(fd_cache_misses[workerid], PERF_EVENT_IOC_ENABLE, 0); */ + + /* ioctl(fd_cache_refs[workerid], PERF_EVENT_IOC_RESET, 0); */ + /* ioctl(fd_cache_refs[workerid], PERF_EVENT_IOC_ENABLE, 0); */ + + /* ioctl(fd_branch_instr[workerid], PERF_EVENT_IOC_RESET, 0); */ + /* ioctl(fd_branch_instr[workerid], PERF_EVENT_IOC_ENABLE, 0); */ + + ioctl(fd_fps[workerid], PERF_EVENT_IOC_RESET, 0); + ioctl(fd_fps[workerid], PERF_EVENT_IOC_ENABLE, 0); +} + +void stop_monitoring_all_events_for_worker(int workerid) +{ + ioctl(fd_instr[workerid], PERF_EVENT_IOC_DISABLE, 0); + /* ioctl(fd_cycles[workerid], PERF_EVENT_IOC_DISABLE, 0); */ + /* ioctl(fd_cache_misses[workerid], PERF_EVENT_IOC_DISABLE, 0); */ + /* ioctl(fd_cache_refs[workerid], PERF_EVENT_IOC_DISABLE, 0); */ + /* ioctl(fd_branch_instr[workerid], PERF_EVENT_IOC_DISABLE, 0); */ + ioctl(fd_fps[workerid], PERF_EVENT_IOC_DISABLE, 0); +} + +void perf_count_handle_idle_end(unsigned sched_ctx, int worker) +{ + unsigned has_starpu_scheduler; + unsigned has_awake_workers; + has_starpu_scheduler = starpu_sched_ctx_has_starpu_scheduler(sched_ctx, &has_awake_workers); + + if(!has_starpu_scheduler && !has_awake_workers) + { + struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx); + + struct starpu_sched_ctx_iterator it; + + int workerid; + workers->init_iterator(workers, &it); + while(workers->has_next(workers, &it)) + { + workerid = workers->get_next(workers, &it); + if(perf_event_opened[workerid]) + start_monitoring_all_events_for_worker(workerid); + } + } + else + { + if(!perf_event_opened[worker]) + { + config_all_events_for_worker(worker); + open_all_events_for_worker(worker); + perf_event_opened[worker] = 1; + } + start_monitoring_all_events_for_worker(worker); + } +} + +void perf_count_handle_poped_task(unsigned sched_ctx, int worker, + struct starpu_task *task, + __attribute__((unused))uint32_t footprint) +{ + unsigned has_starpu_scheduler; + unsigned has_awake_workers; + has_starpu_scheduler = starpu_sched_ctx_has_starpu_scheduler(sched_ctx, &has_awake_workers); + + if(!has_starpu_scheduler && !has_awake_workers) + { + struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx); + + struct starpu_sched_ctx_iterator it; + + int workerid; + workers->init_iterator(workers, &it); + while(workers->has_next(workers, &it)) + { + workerid = workers->get_next(workers, &it); + if(perf_event_opened[workerid]) + stop_monitoring_all_events_for_worker(workerid); + } +// printf("worker requesting %d in ctx %d \n", starpu_worker_get_id(), sched_ctx); + print_results_for_ctx(sched_ctx, task); + } + else + { + if(perf_event_opened[worker]) + stop_monitoring_all_events_for_worker(worker); + print_results_for_worker(worker, sched_ctx, task); + } +} + +void perf_count_init_worker(int workerid, unsigned sched_ctx) +{ + (void)sched_ctx; + if(!perf_event_opened[workerid]) + { + open_all_events_for_worker(workerid); + perf_event_opened[workerid] = 1; + } + else + { + close_all_events_for_worker(workerid); + open_all_events_for_worker(workerid); + } +} + +void perf_count_start_ctx(unsigned sched_ctx) +{ + struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx); + + struct starpu_sched_ctx_iterator it; + + int workerid; + workers->init_iterator(workers, &it); + while(workers->has_next(workers, &it)) + { + workerid = workers->get_next(workers, &it); + config_all_events_for_worker(workerid); + } +} + +void perf_count_end_ctx(unsigned sched_ctx) +{ + struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx); + + struct starpu_sched_ctx_iterator it; + + int workerid; + workers->init_iterator(workers, &it); + while(workers->has_next(workers, &it)) + { + workerid = workers->get_next(workers, &it); + close_all_events_for_worker(workerid); + } +} + +struct sc_hypervisor_policy perf_count_policy = +{ + .size_ctxs = NULL, + .handle_poped_task = perf_count_handle_poped_task, + .handle_pushed_task = NULL, + .handle_idle_cycle = NULL, + .handle_idle_end = perf_count_handle_idle_end, + .handle_post_exec_hook = NULL, + .handle_submitted_job = NULL, + .end_ctx = perf_count_end_ctx, + .start_ctx = perf_count_start_ctx, + .init_worker = perf_count_init_worker, + .custom = 0, + .name = "perf_count" +}; diff --git a/sc_hypervisor/src/hypervisor_policies/teft_lp_policy.c b/sc_hypervisor/src/hypervisor_policies/teft_lp_policy.c new file mode 100644 index 0000000..0b03927 --- /dev/null +++ b/sc_hypervisor/src/hypervisor_policies/teft_lp_policy.c @@ -0,0 +1,346 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "sc_hypervisor_lp.h" +#include "sc_hypervisor_policy.h" +#include +#include + +static struct sc_hypervisor_policy_task_pool *task_pools = NULL; + +static starpu_pthread_mutex_t mutex = STARPU_PTHREAD_MUTEX_INITIALIZER; + +struct teft_lp_data +{ + int nt; + double **tasks; + unsigned *in_sched_ctxs; + int *workers; + struct sc_hypervisor_policy_task_pool *tmp_task_pools; + unsigned size_ctxs; +}; + +static double _compute_workers_distrib(int ns, int nw, double final_w_in_s[ns][nw], + unsigned is_integer, double tmax, void *specific_data) +{ + struct teft_lp_data *sd = (struct teft_lp_data *)specific_data; + + int nt = sd->nt; + double **final_tasks = sd->tasks; + unsigned *in_sched_ctxs = sd->in_sched_ctxs; + int *workers = sd->workers; + struct sc_hypervisor_policy_task_pool *tmp_task_pools = sd->tmp_task_pools; + unsigned size_ctxs = sd->size_ctxs; + + if(tmp_task_pools == NULL) + return 0.0; + + double w_in_s[ns][nw]; + double tasks[nw][nt]; + double times[nw][nt]; + + /* times in ms */ + sc_hypervisor_get_tasks_times(nw, nt, times, workers, size_ctxs, task_pools); + + double res = 0.0; +#ifdef STARPU_HAVE_GLPK_H + res = sc_hypervisor_lp_simulate_distrib_tasks(ns, nw, nt, w_in_s, tasks, times, is_integer, tmax, in_sched_ctxs, tmp_task_pools); +#endif //STARPU_HAVE_GLPK_H + if(res != 0.0) + { + int s, w, t; + for(s = 0; s < ns; s++) + for(w = 0; w < nw; w++) + final_w_in_s[s][w] = w_in_s[s][w]; + + for(w = 0; w < nw; w++) + for(t = 0; t < nt; t++) + final_tasks[w][t] = tasks[w][t]; + } + return res; +} + +static void _size_ctxs(unsigned *sched_ctxs, int nsched_ctxs , int *workers, int nworkers) +{ + int ns = sched_ctxs == NULL ? sc_hypervisor_get_nsched_ctxs() : nsched_ctxs; + int nw = workers == NULL ? (int)starpu_worker_get_count() : nworkers; /* Number of different workers */ + int nt = 0; /* Number of different kinds of tasks */ + + struct sc_hypervisor_policy_task_pool * tp; + for (tp = task_pools; tp; tp = tp->next) + nt++; + + double w_in_s[ns][nw]; + double **tasks=(double**)malloc(nw*sizeof(double*)); + int i; + for(i = 0; i < nw; i++) + tasks[i] = (double*)malloc(nt*sizeof(double)); + + + struct teft_lp_data specific_data; + specific_data.nt = nt; + specific_data.tasks = tasks; + specific_data.in_sched_ctxs = sched_ctxs; + specific_data.workers = workers; + specific_data.tmp_task_pools = task_pools; + specific_data.size_ctxs = 1; + + /* smallest possible tmax, difficult to obtain as we + compute the nr of flops and not the tasks */ + /*lp computes it in s but it's converted to ms just before return */ + double possible_tmax = sc_hypervisor_lp_get_tmax(nw, workers); + double smallest_tmax = possible_tmax / 3; + double tmax = possible_tmax * ns; + double tmin = 0.0; + unsigned found_sol = 0; + + if(nt > 0 && tmax > 0.0) + { + found_sol = sc_hypervisor_lp_execute_dichotomy(ns, nw, w_in_s, 1, (void*)&specific_data, + tmin, tmax, smallest_tmax, _compute_workers_distrib); + } + + + /* if we did find at least one solution redistribute the resources */ + if(found_sol) + { + struct types_of_workers *tw = sc_hypervisor_get_types_of_workers(workers, nw); + sc_hypervisor_lp_place_resources_in_ctx(ns, nw, w_in_s, sched_ctxs, workers, 1, tw); + free(tw); + } + + for(i = 0; i < nw; i++) + free(tasks[i]); + free(tasks); + +} + +static void size_if_required() +{ + int nsched_ctxs, nworkers; + unsigned *sched_ctxs; + int *workers; + unsigned has_req = sc_hypervisor_get_size_req(&sched_ctxs, &nsched_ctxs, &workers, &nworkers); + + if(has_req) + { + struct sc_hypervisor_wrapper* sc_w = NULL; + unsigned ready_to_size = 1; + int s; + STARPU_PTHREAD_MUTEX_LOCK(&act_hypervisor_mutex); + for(s = 0; s < nsched_ctxs; s++) + { + sc_w = sc_hypervisor_get_wrapper(sched_ctxs[s]); +// if(sc_w->submitted_flops < sc_w->total_flops) + if((sc_w->submitted_flops + (0.1*sc_w->total_flops)) < sc_w->total_flops) + ready_to_size = 0; + } + + if(ready_to_size) + { + _size_ctxs(sched_ctxs, nsched_ctxs, workers, nworkers); + sc_hypervisor_free_size_req(); + } + STARPU_PTHREAD_MUTEX_UNLOCK(&act_hypervisor_mutex); + } +} + +static void teft_lp_handle_submitted_job(struct starpu_codelet *cl, unsigned sched_ctx, uint32_t footprint, size_t data_size) +{ + /* count the tasks of the same type */ + STARPU_PTHREAD_MUTEX_LOCK(&mutex); + sc_hypervisor_policy_add_task_to_pool(cl, sched_ctx, footprint, &task_pools, data_size); + STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); + + size_if_required(); +} + +static void _try_resizing(unsigned *sched_ctxs, int nsched_ctxs , int *workers, int nworkers) +{ + int ns = sched_ctxs == NULL ? sc_hypervisor_get_nsched_ctxs() : nsched_ctxs; + if(ns < 2) return; + int nw = workers == NULL ? (int)starpu_worker_get_count() : nworkers; /* Number of different workers */ + + sched_ctxs = sched_ctxs == NULL ? sc_hypervisor_get_sched_ctxs() : sched_ctxs; + + int nt = 0; /* Number of different kinds of tasks */ + +// STARPU_PTHREAD_MUTEX_LOCK(&mutex); + + /* we don't take the mutex bc a correct value of the number of tasks is + not required but we do a copy in order to be sure + that the linear progr won't segfault if the list of + submitted task will change during the exec */ + + struct sc_hypervisor_policy_task_pool *tp = NULL; + struct sc_hypervisor_policy_task_pool *tmp_task_pools = sc_hypervisor_policy_clone_task_pool(task_pools); + + for (tp = task_pools; tp; tp = tp->next) + nt++; + + double w_in_s[ns][nw]; + double **tasks_per_worker=(double**)malloc(nw*sizeof(double*)); + int i; + for(i = 0; i < nw; i++) + tasks_per_worker[i] = (double*)malloc(nt*sizeof(double)); + + struct teft_lp_data specific_data; + specific_data.nt = nt; + specific_data.tasks = tasks_per_worker; + specific_data.in_sched_ctxs = NULL; + specific_data.workers = NULL; + specific_data.tmp_task_pools = tmp_task_pools; + specific_data.size_ctxs = 0; + + /* smallest possible tmax, difficult to obtain as we + compute the nr of flops and not the tasks */ + /*lp computes it in s but it's converted to ms just before return */ + double possible_tmax = sc_hypervisor_lp_get_tmax(nw, NULL); + double smallest_tmax = possible_tmax/2.0; + double tmax = possible_tmax + smallest_tmax; + double tmin = smallest_tmax; + unsigned found_sol = 0; + + if(nt > 0 && tmax > 0.0) + { + struct timeval start_time; + struct timeval end_time; + gettimeofday(&start_time, NULL); + + + found_sol = sc_hypervisor_lp_execute_dichotomy(ns, nw, w_in_s, 1, (void*)&specific_data, + tmin, tmax, smallest_tmax, _compute_workers_distrib); + gettimeofday(&end_time, NULL); + + long diff_s = end_time.tv_sec - start_time.tv_sec; + long diff_us = end_time.tv_usec - start_time.tv_usec; + + __attribute__((unused)) float timing = (float)(diff_s*1000000 + diff_us)/1000.0; + } +// STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); + + /* if we did find at least one solution redistribute the resources */ + if(found_sol) + { + struct types_of_workers *tw = sc_hypervisor_get_types_of_workers(workers, nw); + sc_hypervisor_lp_place_resources_in_ctx(ns, nw, w_in_s, sched_ctxs, workers, 0, tw); + free(tw); + } + + struct sc_hypervisor_policy_task_pool *next = NULL; + struct sc_hypervisor_policy_task_pool *tmp_tp = tmp_task_pools; + while(tmp_task_pools) + { + next = tmp_tp->next; + free(tmp_tp); + tmp_tp = next; + tmp_task_pools = next; + } + for(i = 0; i < nw; i++) + free(tasks_per_worker[i]); + free(tasks_per_worker); +} + +static void teft_lp_handle_poped_task(unsigned sched_ctx, int worker, struct starpu_task *task, uint32_t footprint) +{ + (void)sched_ctx; + if(worker > -2) + { + int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex); + if(ret != EBUSY) + { + unsigned criteria = sc_hypervisor_get_resize_criteria(); + if(criteria != SC_NOTHING && criteria == SC_SPEED) + { + + if(sc_hypervisor_check_speed_gap_btw_ctxs(NULL, -1, NULL, -1)) + { + _try_resizing(NULL, -1, NULL, -1); + } + } + + STARPU_PTHREAD_MUTEX_UNLOCK(&act_hypervisor_mutex); + } + } + /* too expensive to take this mutex and correct value of the number of tasks is not compulsory */ + STARPU_PTHREAD_MUTEX_LOCK(&mutex); + sc_hypervisor_policy_remove_task_from_pool(task, footprint, &task_pools); + STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); + +} + +static void teft_lp_handle_idle_cycle(unsigned sched_ctx, int worker) +{ + (void)sched_ctx; + (void)worker; + unsigned criteria = sc_hypervisor_get_resize_criteria(); + if(criteria != SC_NOTHING)// && criteria == SC_IDLE) + { + int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex); + if(ret != EBUSY) + { + _try_resizing(NULL, -1, NULL, -1); + STARPU_PTHREAD_MUTEX_UNLOCK(&act_hypervisor_mutex); + } + } + return; +} + +static void teft_lp_size_ctxs(unsigned *sched_ctxs, int nsched_ctxs , int *workers, int nworkers) +{ + sc_hypervisor_save_size_req(sched_ctxs, nsched_ctxs, workers, nworkers); +} + +static void teft_lp_resize_ctxs(unsigned *sched_ctxs, int nsched_ctxs , int *workers, int nworkers) +{ + int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex); + if(ret != EBUSY) + { + struct sc_hypervisor_wrapper* sc_w = NULL; + int s = 0; + for(s = 0; s < nsched_ctxs; s++) + { + sc_w = sc_hypervisor_get_wrapper(sched_ctxs[s]); + + if((sc_w->submitted_flops + (0.1*sc_w->total_flops)) < sc_w->total_flops) + { + STARPU_PTHREAD_MUTEX_UNLOCK(&act_hypervisor_mutex); + return; + } + } + + + _try_resizing(sched_ctxs, nsched_ctxs, workers, nworkers); + STARPU_PTHREAD_MUTEX_UNLOCK(&act_hypervisor_mutex); + } +} + +struct sc_hypervisor_policy teft_lp_policy = +{ + .size_ctxs = teft_lp_size_ctxs, + .resize_ctxs = teft_lp_resize_ctxs, + .handle_poped_task = teft_lp_handle_poped_task, + .handle_pushed_task = NULL, + .handle_idle_cycle = teft_lp_handle_idle_cycle, + .handle_idle_end = NULL, + .handle_post_exec_hook = NULL, + .handle_submitted_job = teft_lp_handle_submitted_job, + .end_ctx = NULL, + .init_worker = NULL, + .custom = 0, + .name = "teft_lp" +}; diff --git a/sc_hypervisor/src/hypervisor_policies/throughput_lp_policy.c b/sc_hypervisor/src/hypervisor_policies/throughput_lp_policy.c new file mode 100644 index 0000000..752fd27 --- /dev/null +++ b/sc_hypervisor/src/hypervisor_policies/throughput_lp_policy.c @@ -0,0 +1,357 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "sc_hypervisor_lp.h" +#include "sc_hypervisor_policy.h" +#include +#include + +static double _glp_resolve(int ns, int nw, double speed[ns][nw], double w_in_s[ns][nw], unsigned integer); + + +static unsigned _compute_max_speed(int ns, int nw, double w_in_s[ns][nw], unsigned *in_sched_ctxs, int *workers) +{ + double speed[ns][nw]; + + unsigned *sched_ctxs = in_sched_ctxs == NULL ? sc_hypervisor_get_sched_ctxs() : in_sched_ctxs; + + int w,s; + + struct sc_hypervisor_wrapper* sc_w = NULL; + for(s = 0; s < ns; s++) + { + sc_w = sc_hypervisor_get_wrapper(sched_ctxs[s]); + for(w = 0; w < nw; w++) + { + w_in_s[s][w] = 0.0; + int worker = workers == NULL ? w : workers[w]; + + enum starpu_worker_archtype arch = starpu_worker_get_type(worker); + speed[s][w] = sc_hypervisor_get_speed(sc_w, arch); + } + } + + + struct timeval start_time; + struct timeval end_time; + gettimeofday(&start_time, NULL); + + double res = _glp_resolve(ns, nw, speed, w_in_s, 1); + gettimeofday(&end_time, NULL); + + long diff_s = end_time.tv_sec - start_time.tv_sec; + long diff_us = end_time.tv_usec - start_time.tv_usec; + + __attribute__((unused)) float timing = (float)(diff_s*1000000 + diff_us)/1000; + + if(res > 0.0) + return 1; + return 0; +} + +/* + * GNU Linear Programming Kit backend + */ +#ifdef STARPU_HAVE_GLPK_H +#include +static double _glp_resolve(int ns, int nw, double speed[ns][nw], double w_in_s[ns][nw], unsigned integer) +{ + int w = 0, s = 0; + glp_prob *lp; + + lp = glp_create_prob(); + glp_set_prob_name(lp, "StarPU theoretical bound"); + glp_set_obj_dir(lp, GLP_MAX); + glp_set_obj_name(lp, "total speed"); + + { + int ne = 2 * ns * nw /* worker execution time */ + + 1 + + 1 ; /* glp dumbness */ + int n = 1; + int ia[ne], ja[ne]; + double ar[ne]; + + + /* Variables: x[s][w] + the acknwoledgment that the worker w belongs to the context s */ + glp_add_cols(lp, nw*ns + 1); + + for(s = 0; s < ns; s++) + for(w = 0; w < nw; w++) + { + char name[32]; + snprintf(name, sizeof(name), "w%ds%dn", w, s); + glp_set_col_name(lp, s*nw+w+1, name); + if (integer) + { + glp_set_col_kind(lp, s*nw+w+1, GLP_IV); + glp_set_col_bnds(lp, s*nw+w+1, GLP_DB, 0, 1); + } + else + glp_set_col_bnds(lp, s*nw+w+1, GLP_DB, 0.0, 1.0); + + } + + /* vmax should be positif */ + /* Z = vmax structural variable, x[s][w] are auxiliary variables */ + glp_set_col_name(lp, nw*ns+1, "vmax"); + glp_set_col_bnds(lp, nw*ns+1, GLP_LO, 0.0, 0.0); + glp_set_obj_coef(lp, nw*ns+1, 1.); + + + int curr_row_idx = 0; + /* Total worker speed */ + glp_add_rows(lp, 1); + + /*sum(x[s][w]*speed[s][w]) >= vmax */ + char name[32], title[64]; + starpu_worker_get_name(w, name, sizeof(name)); + snprintf(title, sizeof(title), "worker %s", name); + glp_set_row_name(lp, curr_row_idx + 1, title); + + for(s = 0; s < ns; s++) + { + for (w = 0; w < nw; w++) + { + /* x[s][w] */ + ia[n] = curr_row_idx + 1; + ja[n] = s*nw+w+1; + ar[n] = speed[s][w]; + n++; + } + } + /* vmax */ + ia[n] = curr_row_idx + 1; + ja[n] = nw*ns+1; + ar[n] = (-1); + n++; + glp_set_row_bnds(lp, curr_row_idx + 1, GLP_LO, 0.0, 0.0); + + curr_row_idx += 1 ; + + /* sum(x[s][w]) = 1 */ + glp_add_rows(lp, nw); + for (w = 0; w < nw; w++) + { + starpu_worker_get_name(w, name, sizeof(name)); + snprintf(title, sizeof(title), "w%x", w); + glp_set_row_name(lp, curr_row_idx+w+1, title); + for(s = 0; s < ns; s++) + { + ia[n] = curr_row_idx+w+1; + ja[n] = s*nw+w+1; + ar[n] = 1; + n++; + } + if(integer) + glp_set_row_bnds(lp, curr_row_idx+w+1, GLP_FX, 1, 1); + else + glp_set_row_bnds(lp, curr_row_idx+w+1, GLP_FX, 1.0, 1.0); + } + + if(n != ne) + printf("ns= %d nw = %d n = %d ne = %d\n", ns, nw, n, ne); + STARPU_ASSERT(n == ne); + + glp_load_matrix(lp, ne-1, ia, ja, ar); + } + + glp_smcp parm; + glp_init_smcp(&parm); + parm.msg_lev = GLP_MSG_OFF; + int ret = glp_simplex(lp, &parm); + if (ret) + { + glp_delete_prob(lp); + lp = NULL; + return 0.0; + } + + if (integer) + { + glp_iocp iocp; + glp_init_iocp(&iocp); + iocp.msg_lev = GLP_MSG_OFF; + glp_intopt(lp, &iocp); + int stat = glp_mip_status(lp); + /* if we don't have a solution return */ + if(stat == GLP_NOFEAS) + { + glp_delete_prob(lp); + lp = NULL; + return 0.0; + } + } + + int stat = glp_get_prim_stat(lp); + /* if we don't have a solution return */ + if(stat == GLP_NOFEAS) + { + glp_delete_prob(lp); + lp = NULL; + printf("No sol!!!\n"); + return 0.0; + } + + double res = glp_get_obj_val(lp); + + for(s = 0; s < ns; s++) + for(w = 0; w < nw; w++) + { + if (integer) + w_in_s[s][w] = (double)glp_mip_col_val(lp, s*nw+w+1); + else + w_in_s[s][w] = glp_get_col_prim(lp, s*nw+w+1); + } + + glp_delete_prob(lp); + return res; +} + + +static void _try_resizing(unsigned *sched_ctxs, int nsched_ctxs , int *workers, int nworkers) +{ + int ns = sched_ctxs == NULL ? sc_hypervisor_get_nsched_ctxs() : nsched_ctxs; + int nw = workers == NULL ? (int)starpu_worker_get_count() : nworkers; /* Number of different workers */ + + sched_ctxs = sched_ctxs == NULL ? sc_hypervisor_get_sched_ctxs() : sched_ctxs; + + double w_in_s[ns][nw]; + unsigned found_sol = _compute_max_speed(ns, nw, w_in_s, sched_ctxs, workers); + /* if we did find at least one solution redistribute the resources */ + if(found_sol) + { + struct types_of_workers *tw = sc_hypervisor_get_types_of_workers(workers, nw); + int w, s; + double nworkers_per_ctx[ns][tw->nw]; + int nworkers_per_ctx_rounded[ns][tw->nw]; + for(s = 0; s < ns; s++) + { + for(w = 0; w < nw; w++) + { + nworkers_per_ctx[s][w] = 0.0; + nworkers_per_ctx_rounded[s][w] = 0; + } + } + + for(s = 0; s < ns; s++) + { + for(w = 0; w < nw; w++) + { + enum starpu_worker_archtype arch = starpu_worker_get_type(w); + int idx = sc_hypervisor_get_index_for_arch(STARPU_CUDA_WORKER, tw); + nworkers_per_ctx[s][idx] += w_in_s[s][w]; + + if(arch == STARPU_CUDA_WORKER) + { + if(w_in_s[s][w] >= 0.3) + nworkers_per_ctx_rounded[s][idx]++; + } + else + { + idx = sc_hypervisor_get_index_for_arch(STARPU_CPU_WORKER, tw); + nworkers_per_ctx[s][idx] += w_in_s[s][w]; + if(w_in_s[s][w] > 0.5) + nworkers_per_ctx_rounded[s][idx]++; + } + } + } +/* for(s = 0; s < ns; s++) */ +/* printf("%d: cpus = %lf gpus = %lf cpus_round = %d gpus_round = %d\n", s, nworkers[s][1], nworkers[s][0], */ +/* nworkers_rounded[s][1], nworkers_rounded[s][0]); */ + + + sc_hypervisor_lp_redistribute_resources_in_ctxs(ns, tw->nw, nworkers_per_ctx_rounded, nworkers_per_ctx, sched_ctxs, tw); + free(tw); + } +} + +static void throughput_lp_handle_poped_task(__attribute__((unused))unsigned sched_ctx, __attribute__((unused))int worker, + __attribute__((unused))struct starpu_task *task, __attribute__((unused))uint32_t footprint) +{ + int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex); + if(ret != EBUSY) + { + unsigned criteria = sc_hypervisor_get_resize_criteria(); + if(criteria != SC_NOTHING && criteria == SC_SPEED) + { + if(sc_hypervisor_check_speed_gap_btw_ctxs(NULL, -1, NULL, -1)) + { + _try_resizing(NULL, -1, NULL, -1); + } + } + STARPU_PTHREAD_MUTEX_UNLOCK(&act_hypervisor_mutex); + } +} + +static void throughput_lp_handle_idle_cycle(unsigned sched_ctx, int worker) +{ + int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex); + if(ret != EBUSY) + { + unsigned criteria = sc_hypervisor_get_resize_criteria(); + if(criteria != SC_NOTHING && criteria == SC_IDLE) + { + + if(sc_hypervisor_check_idle(sched_ctx, worker)) + { + _try_resizing(NULL, -1, NULL, -1); +// sc_hypervisor_move_workers(sched_ctx, 3 - sched_ctx, &worker, 1, 1); + } + } + STARPU_PTHREAD_MUTEX_UNLOCK(&act_hypervisor_mutex); + } +} + +static void throughput_lp_resize_ctxs(unsigned *sched_ctxs, int nsched_ctxs , int *workers, int nworkers) +{ + int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex); + if(ret != EBUSY) + { + _try_resizing(sched_ctxs, nsched_ctxs, workers, nworkers); + STARPU_PTHREAD_MUTEX_UNLOCK(&act_hypervisor_mutex); + } +} + +static void throughput_lp_end_ctx(__attribute__((unused))unsigned sched_ctx) +{ +/* struct sc_hypervisor_wrapper* sc_w = sc_hypervisor_get_wrapper(sched_ctx); */ +/* int worker; */ +/* for(worker = 0; worker < 12; worker++) */ +/* printf("%d/%d: speed %lf\n", worker, sched_ctx, sc_w->ref_speed[worker]); */ + + return; +} + +struct sc_hypervisor_policy throughput_lp_policy = +{ + .size_ctxs = NULL, + .resize_ctxs = throughput_lp_resize_ctxs, + .handle_poped_task = throughput_lp_handle_poped_task, + .handle_pushed_task = NULL, + .handle_idle_cycle = throughput_lp_handle_idle_cycle, + .handle_idle_end = NULL, + .handle_post_exec_hook = NULL, + .handle_submitted_job = NULL, + .end_ctx = throughput_lp_end_ctx, + .init_worker = NULL, + .custom = 0, + .name = "throughput_lp" +}; + +#endif /* STARPU_HAVE_GLPK_H */ diff --git a/sc_hypervisor/src/policies_utils/dichotomy.c b/sc_hypervisor/src/policies_utils/dichotomy.c new file mode 100644 index 0000000..1913128 --- /dev/null +++ b/sc_hypervisor/src/policies_utils/dichotomy.c @@ -0,0 +1,124 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "sc_hypervisor_lp.h" +#include "sc_hypervisor_policy.h" +#include +#include + +/* executes the function lp_estimated_distrib_func over the interval [tmin, tmax] until it finds the lowest value that + * still has solutions */ +unsigned sc_hypervisor_lp_execute_dichotomy(int ns, int nw, double w_in_s[ns][nw], unsigned solve_lp_integer, void *specific_data, + double tmin, double tmax, double smallest_tmax, + double (*lp_estimated_distrib_func)(int lns, int lnw, double ldraft_w_in_s[ns][nw], + unsigned lis_integer, double ltmax, void *lspecifc_data)) +{ + (void)smallest_tmax; + double res = 1.0; + unsigned has_sol = 0; + double tmid = tmax; + unsigned found_sol = 0; + struct timeval start_time; + struct timeval end_time; + int nd = 0; + double found_tmid = tmax; + double potential_tmid = tmid; + double threshold = tmax*0.1; + gettimeofday(&start_time, NULL); + + /* we fix tmax and we do not treat it as an unknown + we just vary by dichotomy its values*/ + while(1) + { + /* find solution and save the values in draft tables + only if there is a solution for the system we save them + in the proper table */ + printf("solving for tmid %lf \n", tmid); + res = lp_estimated_distrib_func(ns, nw, w_in_s, solve_lp_integer, tmid, specific_data); + if(res < 0.0) + { + printf("timeouted no point in continuing\n"); + found_sol = 0; + break; + } + else if(res != 0.0) + { + has_sol = 1; + found_sol = 1; + found_tmid = tmid; + printf("found sol for tmid %lf \n", tmid); + } + else + { + printf("failed for tmid %lf \n", tmid); + if(tmid == tmax) + { + printf("failed for tmid %lf from the first time\n", tmid); + break; + } + has_sol = 0; + } + + /* if we have a solution with this tmid try a smaller value + bigger than the old one */ + if(has_sol) + { + /* if the difference between tmax and tmid is smaller than + a given threshold there is no point in searching more + precision */ + tmax = tmid; + potential_tmid = tmin + ((tmax-tmin)/2.0); + if((tmax - potential_tmid) < threshold) + { + printf("had_sol but stop doing it for tmin %lf tmax %lf and potential tmid %lf \n", tmin, tmax, potential_tmid); + break; + } + printf("try for smaller potential tmid %lf \n", potential_tmid); + } + else /*else try a bigger one */ + { + /* if we previously found a good sol and we keep failing + we stop searching for a better sol */ + tmin = tmid; + potential_tmid = tmin + ((tmax-tmin)/2.0); + if((tmax - potential_tmid) < threshold) + { + printf("didn't have sol but stop doing it for tmin %lf tmax %lf and potential tmid %lf \n", tmin, tmax, potential_tmid); + break; + } + printf("try for bigger potential tmid %lf \n", potential_tmid); + } + + tmid = potential_tmid; + + nd++; + } + printf("solve againd for tmid %lf \n", found_tmid); + if(found_sol) + { + res = lp_estimated_distrib_func(ns, nw, w_in_s, solve_lp_integer, found_tmid, specific_data); + found_sol = (res != 0.0); + } + printf("found sol %u for tmid %lf\n", found_sol, found_tmid); + gettimeofday(&end_time, NULL); + + long diff_s = end_time.tv_sec - start_time.tv_sec; + long diff_us = end_time.tv_usec - start_time.tv_usec; + + __attribute__((unused)) float timing = (float)(diff_s*1000000 + diff_us)/1000; + + return found_sol; +} diff --git a/sc_hypervisor/src/policies_utils/lp_programs.c b/sc_hypervisor/src/policies_utils/lp_programs.c new file mode 100644 index 0000000..b6a83f6 --- /dev/null +++ b/sc_hypervisor/src/policies_utils/lp_programs.c @@ -0,0 +1,724 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ +/* + * GNU Linear Programming Kit backend + */ + +#include "sc_hypervisor_policy.h" +#include "sc_hypervisor_lp.h" + +#ifdef STARPU_HAVE_GLPK_H + +double sc_hypervisor_lp_simulate_distrib_tasks(int ns, int nw, int nt, double w_in_s[ns][nw], double tasks[nw][nt], + double times[nw][nt], unsigned is_integer, double tmax, unsigned *in_sched_ctxs, + struct sc_hypervisor_policy_task_pool *tmp_task_pools) +{ + struct sc_hypervisor_policy_task_pool * tp; + int t, w, s; + glp_prob *lp; + + + lp = glp_create_prob(); + glp_set_prob_name(lp, "StarPU theoretical bound"); + glp_set_obj_dir(lp, GLP_MAX); + glp_set_obj_name(lp, "total execution time"); + + { + int ne = nt * nw /* worker execution time */ + + nw * ns + + nw * (nt + ns) + + 1; /* glp dumbness */ + int n = 1; + int ia[ne], ja[ne]; + double ar[ne]; + + /* Variables: number of tasks i assigned to worker j, and tmax */ + glp_add_cols(lp, nw*nt+ns*nw); +#define colnum(w, t) ((t)*nw+(w)+1) + for(s = 0; s < ns; s++) + for(w = 0; w < nw; w++) + glp_set_obj_coef(lp, nw*nt+s*nw+w+1, 1.); + + for (w = 0; w < nw; w++) + for (t = 0; t < nt; t++) + { + char name[32]; + snprintf(name, sizeof(name), "w%dt%dn", w, t); + glp_set_col_name(lp, colnum(w, t), name); + if (is_integer) + { + glp_set_col_kind(lp, colnum(w, t), GLP_IV); + glp_set_col_bnds(lp, colnum(w, t), GLP_LO, 0, 0); + } + else + glp_set_col_bnds(lp, colnum(w, t), GLP_LO, 0.0, 0.0); + } + for(s = 0; s < ns; s++) + for(w = 0; w < nw; w++) + { + char name[32]; + snprintf(name, sizeof(name), "w%ds%dn", w, s); + glp_set_col_name(lp, nw*nt+s*nw+w+1, name); + if (is_integer) + { + glp_set_col_kind(lp, nw*nt+s*nw+w+1, GLP_IV); + glp_set_col_bnds(lp, nw*nt+s*nw+w+1, GLP_DB, 0, 1); + } + else + glp_set_col_bnds(lp, nw*nt+s*nw+w+1, GLP_DB, 0.0, 1.0); + } + + unsigned *sched_ctxs = in_sched_ctxs == NULL ? sc_hypervisor_get_sched_ctxs() : in_sched_ctxs; + + int curr_row_idx = 0; + /* Total worker execution time */ + glp_add_rows(lp, nw*ns); + for (t = 0; t < nt; t++) + { + int someone = 0; + for (w = 0; w < nw; w++) + if (!isnan(times[w][t])) + someone = 1; + if (!someone) + { + /* This task does not have any performance model at all, abort */ + printf("NO PERF MODELS\n"); + glp_delete_prob(lp); + return 0.0; + } + } + /*sum(t[t][w]*n[t][w]) < x[s][w]*tmax */ + for(s = 0; s < ns; s++) + { + for (w = 0; w < nw; w++) + { + char name[32], title[64]; + starpu_worker_get_name(w, name, sizeof(name)); + snprintf(title, sizeof(title), "worker %s", name); + glp_set_row_name(lp, curr_row_idx+s*nw+w+1, title); + for (t = 0, tp = tmp_task_pools; tp; t++, tp = tp->next) + { + if(tp->sched_ctx_id == sched_ctxs[s]) + { + ia[n] = curr_row_idx+s*nw+w+1; + ja[n] = colnum(w, t); + if (isnan(times[w][t])) + { + printf("had to insert huge val \n"); + ar[n] = 1000000000.; + } + else + ar[n] = times[w][t]; + n++; + } + } + /* x[s][w] = 1 | 0 */ + ia[n] = curr_row_idx+s*nw+w+1; + ja[n] = nw*nt+s*nw+w+1; + ar[n] = (-1) * tmax; + n++; + if (is_integer) + { + glp_set_row_bnds(lp, curr_row_idx+s*nw+w+1, GLP_UP, 0, 0); + } + else + glp_set_row_bnds(lp, curr_row_idx+s*nw+w+1, GLP_UP, 0.0, 0.0); + } + } + + curr_row_idx += nw*ns; + + /* Total task completion */ + glp_add_rows(lp, nt); + for (t = 0, tp = tmp_task_pools; tp; t++, tp = tp->next) + { + char name[32], title[64]; + starpu_worker_get_name(w, name, sizeof(name)); + snprintf(title, sizeof(title), "task %s key %x", tp->cl->name, (unsigned) tp->footprint); + glp_set_row_name(lp, curr_row_idx+t+1, title); + for (w = 0; w < nw; w++) + { + ia[n] = curr_row_idx+t+1; + ja[n] = colnum(w, t); + ar[n] = 1; + n++; + } + glp_set_row_bnds(lp, curr_row_idx+t+1, GLP_FX, tp->n, tp->n); + } + + curr_row_idx += nt; + + /* sum(x[s][i]) = 1 */ + glp_add_rows(lp, nw); + for (w = 0; w < nw; w++) + { + char name[32], title[64]; + starpu_worker_get_name(w, name, sizeof(name)); + snprintf(title, sizeof(title), "w%x", w); + glp_set_row_name(lp, curr_row_idx+w+1, title); + for(s = 0; s < ns; s++) + { + ia[n] = curr_row_idx+w+1; + ja[n] = nw*nt+s*nw+w+1; + ar[n] = 1; + n++; + } + if(is_integer) + glp_set_row_bnds(lp, curr_row_idx+w+1, GLP_FX, 1, 1); + else + glp_set_row_bnds(lp, curr_row_idx+w+1, GLP_FX, 1.0, 1.0); + } + if(n != ne) + printf("ns= %d nw = %d nt = %d n = %d ne = %d\n", ns, nw, nt, n, ne); + STARPU_ASSERT(n == ne); + + glp_load_matrix(lp, ne-1, ia, ja, ar); + } + + glp_smcp parm; + glp_init_smcp(&parm); + parm.msg_lev = GLP_MSG_OFF; + int ret = glp_simplex(lp, &parm); + + /* char str[50]; */ + /* sprintf(str, "outpu_lp_%g", tmax); */ + + /* glp_print_sol(lp, str); */ + + if (ret) + { + printf("error in simplex\n"); + glp_delete_prob(lp); + lp = NULL; + return 0.0; + } + + int stat = glp_get_prim_stat(lp); + /* if we don't have a solution return */ + if(stat == GLP_NOFEAS) + { + glp_delete_prob(lp); +// printf("no_sol in tmax = %lf\n", tmax); + lp = NULL; + return 0.0; + } + + + if (is_integer) + { + glp_iocp iocp; + glp_init_iocp(&iocp); + iocp.msg_lev = GLP_MSG_OFF; +// iocp.tm_lim = 1000; + glp_intopt(lp, &iocp); + stat = glp_mip_status(lp); + /* if we don't have a solution return */ + if(stat == GLP_NOFEAS || stat == GLP_ETMLIM || stat == GLP_UNDEF) + { +// printf("no int sol in tmax = %lf\n", tmax); + if(stat == GLP_ETMLIM || stat == GLP_UNDEF) + printf("timeout \n"); + glp_delete_prob(lp); + lp = NULL; + return 0.0; + } + } + + double res = glp_get_obj_val(lp); + for (w = 0; w < nw; w++) + for (t = 0; t < nt; t++) + if (is_integer) + tasks[w][t] = (double)glp_mip_col_val(lp, colnum(w, t)); + else + tasks[w][t] = glp_get_col_prim(lp, colnum(w, t)); + + /* printf("**********************************************\n"); */ + /* printf("for tmax %lf\n", tmax); */ + for(s = 0; s < ns; s++) + for(w = 0; w < nw; w++) + { + if (is_integer) + w_in_s[s][w] = (double)glp_mip_col_val(lp, nw*nt+s*nw+w+1); + else + w_in_s[s][w] = glp_get_col_prim(lp, nw*nt+s*nw+w+1); +// printf("w %d in ctx %d = %lf\n", w, s, w_in_s[s][w]); + } + /* printf("\n"); */ + /* printf("**********************************************\n"); */ + glp_delete_prob(lp); + return res; +} + +double sc_hypervisor_lp_simulate_distrib_flops(int ns, int nw, double v[ns][nw], double flops[ns], double res[ns][nw], + int total_nw[nw], unsigned sched_ctxs[ns], double last_vmax) +{ + int integer = 1; + int s, w; + glp_prob *lp; + + int ne = (ns*nw+1)*(ns+nw) + + 1; /* glp dumbness */ + int n = 1; + int ia[ne], ja[ne]; + double ar[ne]; + + lp = glp_create_prob(); + + glp_set_prob_name(lp, "sample"); + glp_set_obj_dir(lp, GLP_MAX); + glp_set_obj_name(lp, "max speed"); + + /* we add nw*ns columns one for each type of worker in each context + and another column corresponding to the 1/tmax bound (bc 1/tmax is a variable too)*/ + glp_add_cols(lp, nw*ns+1); + + /* struct sc_hypervisor_wrapper *sc_w = NULL; */ + for(s = 0; s < ns; s++) + { + /* sc_w = sc_hypervisor_get_wrapper(sched_ctxs[s]); */ + struct sc_hypervisor_policy_config *config = sc_hypervisor_get_config(sched_ctxs[s]); + for(w = 0; w < nw; w++) + { + char name[32]; + snprintf(name, sizeof(name), "worker%dctx%d", w, s); + glp_set_col_name(lp, n, name); + + if (integer) + { + glp_set_col_kind(lp, n, GLP_IV); + /* if(sc_w->consider_max) */ + /* { */ + /* if(config->max_nworkers == 0) */ + /* glp_set_col_bnds(lp, n, GLP_FX, config->min_nworkers, config->max_nworkers); */ + /* else */ + /* glp_set_col_bnds(lp, n, GLP_DB, config->min_nworkers, config->max_nworkers); */ + /* } */ + /* else */ + { + if(total_nw[w] == 0) + glp_set_col_bnds(lp, n, GLP_FX, config->min_nworkers, total_nw[w]); + else + glp_set_col_bnds(lp, n, GLP_DB, config->min_nworkers, total_nw[w]); + } + } + else + { +/* if(sc_w->consider_max) */ +/* { */ +/* if(config->max_nworkers == 0) */ +/* glp_set_col_bnds(lp, n, GLP_FX, config->min_nworkers*1.0, config->max_nworkers*1.0); */ +/* else */ +/* glp_set_col_bnds(lp, n, GLP_DB, config->min_nworkers*1.0, config->max_nworkers*1.0); */ +/* #ifdef STARPU_SC_HYPERVISOR_DEBUG */ +/* printf("%d****************consider max %lf in lp\n", sched_ctxs[s], config->max_nworkers*1.0); */ +/* #endif */ +/* } */ +/* else */ + { + if(total_nw[w] == 0) + glp_set_col_bnds(lp, n, GLP_FX, config->min_nworkers*1.0, total_nw[w]*1.0); + else + glp_set_col_bnds(lp, n, GLP_DB, config->min_nworkers*1.0, total_nw[w]*1.0); +#ifdef STARPU_SC_HYPERVISOR_DEBUG + printf("%u****************don't consider max %d but total %d in lp\n", sched_ctxs[s], config->max_nworkers, total_nw[w]); +#endif + } + } + n++; + } + } +#ifdef STARPU_SC_HYPERVISOR_DEBUG + printf("ns = %d nw = %d\n", ns, nw); +#endif + /*1/tmax should belong to the interval [0.0;1.0]*/ + glp_set_col_name(lp, n, "vmax"); +// glp_set_col_bnds(lp, n, GLP_DB, 0.0, 1.0); + if(last_vmax != -1.0) + glp_set_col_bnds(lp, n, GLP_LO, last_vmax, last_vmax); + else + glp_set_col_bnds(lp, n, GLP_LO, 0.0, 0.0); + /* Z = 1/tmax -> 1/tmax structural variable, nCPUs & nGPUs in ctx are auxiliary variables */ + glp_set_obj_coef(lp, n, 1.0); + + n = 1; + /* one row corresponds to one ctx*/ + glp_add_rows(lp, ns); + + for(s = 0; s < ns; s++) + { + char name[32]; + snprintf(name, sizeof(name), "ctx%d", s); + glp_set_row_name(lp, s+1, name); + glp_set_row_bnds(lp, s+1, GLP_LO, 0., 0.); + + for(w = 0; w < nw; w++) + { + int s2; + for(s2 = 0; s2 < ns; s2++) + { + if(s2 == s) + { + ia[n] = s+1; + ja[n] = w + nw*s2 + 1; + ar[n] = v[s][w]; +// printf("ia[%d]=%d ja[%d]=%d ar[%d]=%lf\n", n, ia[n], n, ja[n], n, ar[n]); + } + else + { + ia[n] = s+1; + ja[n] = w + nw*s2 + 1; + ar[n] = 0.0; +// printf("ia[%d]=%d ja[%d]=%d ar[%d]=%lf\n", n, ia[n], n, ja[n], n, ar[n]); + } + n++; + } + } + /* 1/tmax */ + ia[n] = s+1; + ja[n] = ns*nw+1; + ar[n] = (-1) * flops[s]; +// printf("ia[%d]=%d ja[%d]=%d ar[%d]=%lf\n", n, ia[n], n, ja[n], n, ar[n]); + n++; + } + + /*we add another linear constraint : sum(all cpus) = 9 and sum(all gpus) = 3 */ + glp_add_rows(lp, nw); + + for(w = 0; w < nw; w++) + { + char name[32]; + snprintf(name, sizeof(name), "w%d", w); + glp_set_row_name(lp, ns+w+1, name); + for(s = 0; s < ns; s++) + { + int w2; + for(w2 = 0; w2 < nw; w2++) + { + if(w2 == w) + { + ia[n] = ns+w+1; + ja[n] = w2+s*nw + 1; + ar[n] = 1.0; +// printf("ia[%d]=%d ja[%d]=%d ar[%d]=%lf\n", n, ia[n], n, ja[n], n, ar[n]); + } + else + { + ia[n] = ns+w+1; + ja[n] = w2+s*nw + 1; + ar[n] = 0.0; +// printf("ia[%d]=%d ja[%d]=%d ar[%d]=%lf\n", n, ia[n], n, ja[n], n, ar[n]); + } + n++; + } + } + /* 1/tmax */ + ia[n] = ns+w+1; + ja[n] = ns*nw+1; + ar[n] = 0.0; +// printf("ia[%d]=%d ja[%d]=%d ar[%d]=%lf\n", n, ia[n], n, ja[n], n, ar[n]); + n++; + + /*sum(all gpus) = 3*/ + if(w == 0) + glp_set_row_bnds(lp, ns+w+1, GLP_FX, total_nw[0], total_nw[0]); + + /*sum(all cpus) = 9*/ + if(w == 1) + glp_set_row_bnds(lp, ns+w+1, GLP_FX, total_nw[1], total_nw[1]); + } + + STARPU_ASSERT(n == ne); + + glp_load_matrix(lp, ne-1, ia, ja, ar); + + glp_smcp parm; + glp_init_smcp(&parm); + parm.msg_lev = GLP_MSG_OFF; + int ret = glp_simplex(lp, &parm); + if (ret) + { + printf("error in simplex\n"); + glp_delete_prob(lp); + lp = NULL; + return 0.0; + } + + int stat = glp_get_prim_stat(lp); + /* if we don't have a solution return */ + if(stat == GLP_NOFEAS) + { + glp_delete_prob(lp); + printf("no_sol\n"); + lp = NULL; + return 0.0; + } + + + if (integer) + { + glp_iocp iocp; + glp_init_iocp(&iocp); + iocp.msg_lev = GLP_MSG_OFF; + glp_intopt(lp, &iocp); + stat = glp_mip_status(lp); + /* if we don't have a solution return */ + if(stat == GLP_NOFEAS) + { + printf("no int sol\n"); + glp_delete_prob(lp); + lp = NULL; + return 0.0; + } + } + + double vmax = glp_get_obj_val(lp); +#ifdef STARPU_SC_HYPERVISOR_DEBUG + printf("vmax = %lf \n", vmax); +#endif + n = 1; + for(s = 0; s < ns; s++) + { + for(w = 0; w < nw; w++) + { + if (integer) + res[s][w] = (double)glp_mip_col_val(lp, n); + else + res[s][w] = glp_get_col_prim(lp, n); +#ifdef STARPU_SC_HYPERVISOR_DEBUG + printf("%d/%d: res %lf flops = %lf v = %lf\n", w,s, res[s][w], flops[s], v[s][w]); +#endif + n++; + } + } + + glp_delete_prob(lp); + return vmax; +} + +double sc_hypervisor_lp_simulate_distrib_flops_on_sample(int ns, int nw, double final_w_in_s[ns][nw], unsigned is_integer, double tmax, + double **speed, double flops[ns], double **final_flops_on_w) +{ + double w_in_s[ns][nw]; + double flops_on_w[ns][nw]; + + int w, s; + glp_prob *lp; + +// printf("try with tmax %lf\n", tmax); + lp = glp_create_prob(); + glp_set_prob_name(lp, "StarPU theoretical bound"); + glp_set_obj_dir(lp, GLP_MAX); + glp_set_obj_name(lp, "total execution time"); + + { + int ne = 5 * ns * nw /* worker execution time */ + + 1; /* glp dumbness */ + int n = 1; + int ia[ne], ja[ne]; + double ar[ne]; + + + /* Variables: number of flops assigned to worker w in context s, and + the acknwoledgment that the worker w belongs to the context s */ + glp_add_cols(lp, 2*nw*ns); +#define colnum_sample(w, s) ((s)*nw+(w)+1) + for(s = 0; s < ns; s++) + for(w = 0; w < nw; w++) + glp_set_obj_coef(lp, nw*ns+colnum_sample(w,s), 1.); + + for(s = 0; s < ns; s++) + for(w = 0; w < nw; w++) + { + char name[32]; + snprintf(name, sizeof(name), "flopsw%ds%dn", w, s); + glp_set_col_name(lp, colnum_sample(w,s), name); + glp_set_col_bnds(lp, colnum_sample(w,s), GLP_LO, 0., 0.); + + snprintf(name, sizeof(name), "w%ds%dn", w, s); + glp_set_col_name(lp, nw*ns+colnum_sample(w,s), name); + if (is_integer) + { + glp_set_col_kind(lp, nw*ns+colnum_sample(w, s), GLP_IV); + glp_set_col_bnds(lp, nw*ns+colnum_sample(w,s), GLP_DB, 0, 1); + } + else + glp_set_col_bnds(lp, nw*ns+colnum_sample(w,s), GLP_DB, 0.0, 1.0); + + } + + + int curr_row_idx = 0; + /* Total worker execution time */ + glp_add_rows(lp, nw*ns); + + /*nflops[s][w]/v[s][w] < x[s][w]*tmax */ + for(s = 0; s < ns; s++) + { + for (w = 0; w < nw; w++) + { + char name[32], title[64]; + starpu_worker_get_name(w, name, sizeof(name)); + snprintf(title, sizeof(title), "worker %s", name); + glp_set_row_name(lp, curr_row_idx+s*nw+w+1, title); + + /* nflosp[s][w] */ + ia[n] = curr_row_idx+s*nw+w+1; + ja[n] = colnum_sample(w, s); + ar[n] = 1 / speed[s][w]; + + n++; + + /* x[s][w] = 1 | 0 */ + ia[n] = curr_row_idx+s*nw+w+1; + ja[n] = nw*ns+colnum_sample(w,s); + ar[n] = (-1) * tmax; + n++; + glp_set_row_bnds(lp, curr_row_idx+s*nw+w+1, GLP_UP, 0.0, 0.0); + } + } + + curr_row_idx += nw*ns; + + /* sum(flops[s][w]) = flops[s] */ + glp_add_rows(lp, ns); + for (s = 0; s < ns; s++) + { + char name[32], title[64]; + starpu_worker_get_name(w, name, sizeof(name)); + snprintf(title, sizeof(title), "flops %lf ctx%d", flops[s], s); + glp_set_row_name(lp, curr_row_idx+s+1, title); + for (w = 0; w < nw; w++) + { + ia[n] = curr_row_idx+s+1; + ja[n] = colnum_sample(w, s); + ar[n] = 1; + n++; + } + glp_set_row_bnds(lp, curr_row_idx+s+1, GLP_FX, flops[s], flops[s]); + } + + curr_row_idx += ns; + + /* sum(x[s][w]) = 1 */ + glp_add_rows(lp, nw); + for (w = 0; w < nw; w++) + { + char name[32], title[64]; + starpu_worker_get_name(w, name, sizeof(name)); + snprintf(title, sizeof(title), "w%x", w); + glp_set_row_name(lp, curr_row_idx+w+1, title); + for(s = 0; s < ns; s++) + { + ia[n] = curr_row_idx+w+1; + ja[n] = nw*ns+colnum_sample(w,s); + ar[n] = 1; + n++; + } + if(is_integer) + glp_set_row_bnds(lp, curr_row_idx+w+1, GLP_FX, 1, 1); + else + glp_set_row_bnds(lp, curr_row_idx+w+1, GLP_FX, 1.0, 1.0); + } + + curr_row_idx += nw; + + /* sum(nflops[s][w]) > 0*/ + glp_add_rows(lp, nw); + for (w = 0; w < nw; w++) + { + char name[32], title[64]; + starpu_worker_get_name(w, name, sizeof(name)); + snprintf(title, sizeof(title), "flopsw%x", w); + glp_set_row_name(lp, curr_row_idx+w+1, title); + for(s = 0; s < ns; s++) + { + ia[n] = curr_row_idx+w+1; + ja[n] = colnum_sample(w,s); + ar[n] = 1; + n++; + } + + glp_set_row_bnds(lp, curr_row_idx+w+1, GLP_LO, 0.1, 0.); + } + + if(n != ne) + printf("ns= %d nw = %d n = %d ne = %d\n", ns, nw, n, ne); + STARPU_ASSERT(n == ne); + + glp_load_matrix(lp, ne-1, ia, ja, ar); + } + + glp_smcp parm; + glp_init_smcp(&parm); + parm.msg_lev = GLP_MSG_OFF; + int ret = glp_simplex(lp, &parm); + if (ret) + { + glp_delete_prob(lp); + lp = NULL; + return 0.0; + } + + if (is_integer) + { + glp_iocp iocp; + glp_init_iocp(&iocp); + iocp.msg_lev = GLP_MSG_OFF; + glp_intopt(lp, &iocp); + int stat = glp_mip_status(lp); + /* if we don't have a solution return */ + if(stat == GLP_NOFEAS) + { + glp_delete_prob(lp); + lp = NULL; + return 0.0; + } + } + + int stat = glp_get_prim_stat(lp); + /* if we don't have a solution return */ + if(stat == GLP_NOFEAS) + { + glp_delete_prob(lp); + lp = NULL; + return 0.0; + } + + double res = glp_get_obj_val(lp); + + for(s = 0; s < ns; s++) + for(w = 0; w < nw; w++) + { + flops_on_w[s][w] = glp_get_col_prim(lp, colnum_sample(w, s)); + if (is_integer) + w_in_s[s][w] = (double)glp_mip_col_val(lp, nw*ns+colnum_sample(w, s)); + else + w_in_s[s][w] = glp_get_col_prim(lp, nw*ns+colnum_sample(w,s)); +// printf("w_in_s[s%d][w%d] = %lf flops[s%d][w%d] = %lf \n", s, w, w_in_s[s][w], s, w, flops_on_w[s][w]); + } + + glp_delete_prob(lp); + for(s = 0; s < ns; s++) + for(w = 0; w < nw; w++) + { + final_w_in_s[s][w] = w_in_s[s][w]; + final_flops_on_w[s][w] = flops_on_w[s][w]; + } + + return res; + +} +#endif // STARPU_HAVE_GLPK_H diff --git a/sc_hypervisor/src/policies_utils/lp_tools.c b/sc_hypervisor/src/policies_utils/lp_tools.c new file mode 100644 index 0000000..12c9d9b --- /dev/null +++ b/sc_hypervisor/src/policies_utils/lp_tools.c @@ -0,0 +1,936 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "sc_hypervisor_lp.h" +#include "sc_hypervisor_policy.h" +#include "sc_hypervisor_intern.h" +#include + +double sc_hypervisor_lp_get_nworkers_per_ctx(int nsched_ctxs, int ntypes_of_workers, double res[nsched_ctxs][ntypes_of_workers], + int total_nw[ntypes_of_workers], struct types_of_workers *tw, unsigned *in_sched_ctxs) +{ + unsigned *sched_ctxs = in_sched_ctxs == NULL ? sc_hypervisor_get_sched_ctxs() : in_sched_ctxs; +#ifdef STARPU_HAVE_GLPK_H + double v[nsched_ctxs][ntypes_of_workers]; + double flops[nsched_ctxs]; + +/* unsigned nhierarchy_levels = sc_hypervisor_get_nhierarchy_levels(); */ +/* if(nhierarchy_levels <= 1) */ + sc_hypervisor_update_resize_interval(sched_ctxs, nsched_ctxs, total_nw[0]); + + int nw = tw->nw; + int i = 0; + struct sc_hypervisor_wrapper* sc_w; + + for(i = 0; i < nsched_ctxs; i++) + { + sc_w = sc_hypervisor_get_wrapper(sched_ctxs[i]); + int w; + for(w = 0; w < nw; w++) + v[i][w] = sc_hypervisor_get_speed(sc_w, sc_hypervisor_get_arch_for_index(w, tw)); + + double ready_flops = starpu_sched_ctx_get_nready_flops(sc_w->sched_ctx); + unsigned nhierarchy_levels = sc_hypervisor_get_nhierarchy_levels(); + if(nhierarchy_levels > 1) + ready_flops = sc_hypervisor_get_nready_flops_of_all_sons_of_sched_ctx(sc_w->sched_ctx); + +#ifdef STARPU_SC_HYPERVISOR_DEBUG + int nready_tasks = starpu_sched_ctx_get_nready_tasks(sc_w->sched_ctx); +#endif + + if(sc_w->to_be_sized) + { + flops[i] = sc_w->remaining_flops/1000000000.0; /* in gflops*/ + sc_w->to_be_sized = 0; + } + else + { + if(nhierarchy_levels > 1) + flops[i] = sc_w->remaining_flops/1000000000.0; /* in gflops*/ + else + if(sc_w->remaining_flops < 0.0) + flops[i] = ready_flops/1000000000.0; /* in gflops*/ + else + { + if((ready_flops/1000000000.0) <= 0.000002) + flops[i] = 0.0; + else + flops[i] = sc_w->remaining_flops/1000000000.0; /* in gflops*/ + } + } + if(flops[i] < 0.0) + flops[i] = 0.0; +#ifdef STARPU_SC_HYPERVISOR_DEBUG + printf("%u: flops %lf remaining flops %lf ready flops %lf nready_tasks %d\n", + sched_ctxs[i], flops[i], sc_w->remaining_flops/1000000000, ready_flops/1000000000, nready_tasks); +#endif + + } + sc_hypervisor_check_if_consider_max(tw); + int w; + for(w = 0; w < nw; w++) + { + double avg_speed = sc_hypervisor_get_avg_speed(sc_hypervisor_get_arch_for_index(w, tw)); + if(avg_speed != -1.0) + { +#ifdef STARPU_SC_HYPERVISOR_DEBUG + printf("avg_speed for cpus is %lf \n", avg_speed); +#endif + unsigned consider_max_for_all = 0; + for(i = 0; i < nsched_ctxs; i++) + { + sc_w = sc_hypervisor_get_wrapper(sched_ctxs[i]); + + if(!sc_w->consider_max) + { +#ifdef STARPU_SC_HYPERVISOR_DEBUG + printf("ctx %u: current speed is %lf and compare speed is min %lf max %lf\n", sched_ctxs[i], v[i][w], (0.1*avg_speed), (2*avg_speed)); +#endif + if(v[i][w] < 0.1*avg_speed || v[i][w] > 2*avg_speed) + { + sc_w->consider_max = 1; + consider_max_for_all = 1; + } +#ifdef STARPU_SC_HYPERVISOR_DEBUG + printf("ctx %u consider max %d \n", sched_ctxs[i], sc_w->consider_max); +#endif + } + + } + if(consider_max_for_all) + { + for(i = 0; i < nsched_ctxs; i++) + { + sc_w = sc_hypervisor_get_wrapper(sched_ctxs[i]); + sc_w->consider_max = 1; +#ifdef STARPU_SC_HYPERVISOR_DEBUG + printf("ctx %u consider max %d anyway \n", sched_ctxs[i], sc_w->consider_max); +#endif + } + } + + } + } + + if(nsched_ctxs == 1) + { + for(w = 0; w < nw; w++) + res[0][w] = total_nw[w]; + double optimal_v = 0.0; +#ifdef STARPU_USE_CUDA + optimal_v = res[0][0] * v[0][0] + res[0][1]* v[0][1]; +#else + optimal_v = res[0][0] * v[0][0]; +#endif //STARPU_USE_CUDA + _set_optimal_v(sched_ctxs[0], optimal_v); + return 1.0; + } + + + unsigned tmp_sched_ctxs[STARPU_NMAX_SCHED_CTXS]; + double tmp_flops[STARPU_NMAX_SCHED_CTXS]; + double tmp_v[STARPU_NMAX_SCHED_CTXS][ntypes_of_workers]; + double tmp_res[STARPU_NMAX_SCHED_CTXS][ntypes_of_workers]; + int tmp_nsched_ctxs = 0; + for(i = 0; i < nsched_ctxs; i++) + { + struct sc_hypervisor_policy_config *config = sc_hypervisor_get_config(sched_ctxs[i]); + sc_w = sc_hypervisor_get_wrapper(sched_ctxs[i]); + if(config->max_nworkers != 0 || !sc_w->consider_max) + { + tmp_sched_ctxs[tmp_nsched_ctxs] = sched_ctxs[i]; + tmp_flops[tmp_nsched_ctxs] = flops[i]; + for(w = 0; w < ntypes_of_workers; w++) + tmp_v[tmp_nsched_ctxs][w] = v[i][w]; + tmp_nsched_ctxs++; + } + } + if(tmp_nsched_ctxs == 0) + return -1.0; + double ret = sc_hypervisor_lp_simulate_distrib_flops(tmp_nsched_ctxs, ntypes_of_workers, tmp_v, tmp_flops, tmp_res, total_nw, tmp_sched_ctxs, -1.0); + + int j; + for(i = 0; i < nsched_ctxs; i++) + { + unsigned found = 0; + for(j = 0; j < tmp_nsched_ctxs; j++) + { + if(sched_ctxs[i] == tmp_sched_ctxs[j]) + { + for(w = 0; w < ntypes_of_workers; w++) + res[i][w] = tmp_res[j][w]; + found = 1; + break; + } + } + if(!found) + { + for(w = 0; w < ntypes_of_workers; w++) + res[i][w] = 0.0; + } + } + + double vmax = 0.0; + + if(ret != 0.0) + { + /* redo the lp after cleaning out the contexts that got all the max workers required */ + unsigned selected_sched_ctxs[STARPU_NMAX_SCHED_CTXS]; + double selected_flops[STARPU_NMAX_SCHED_CTXS]; + double selected_v[STARPU_NMAX_SCHED_CTXS][ntypes_of_workers]; + int nselected = 0; + int available_cpus = total_nw[0]; + int used_cpus = 0; + + for(i = 0; i < nsched_ctxs; i++) + { + struct sc_hypervisor_policy_config *config = sc_hypervisor_get_config(sched_ctxs[i]); + if(res[i][0] < config->max_nworkers && config->max_nworkers != 0 && flops[i] > 0.0001) + { + selected_flops[nselected] = flops[i]; + selected_v[nselected][0] = v[i][0]; + selected_sched_ctxs[nselected++] = sched_ctxs[i]; + } + else + available_cpus -= res[i][0]; + used_cpus += res[i][0]; + } + + if(used_cpus < 0.8 * total_nw[0] && nselected > 1) + { + double old_ret = ret; + + if(nselected <= 0 || nselected == nsched_ctxs) + { + nselected = nsched_ctxs; + for(i = 0; i < nsched_ctxs; i++) + { + selected_flops[i] = flops[i]; + selected_v[i][0] = v[i][0]; + selected_sched_ctxs[i] = sched_ctxs[i]; + } + } + else + total_nw[0] = available_cpus; + + double selected_res[nselected][ntypes_of_workers]; + ret = sc_hypervisor_lp_simulate_distrib_flops(nselected, ntypes_of_workers, selected_v, selected_flops, selected_res, total_nw, selected_sched_ctxs, ret); + + if(ret != 0) + { + for(i = 0; i < nsched_ctxs; i++) + { + for(j = 0; j < nselected; j++) + { + if(sched_ctxs[i] == selected_sched_ctxs[j]) + { + res[i][0] = selected_res[j][0]; + } + } + } + } + else + ret = old_ret; + } + + } + + /* if the lp could not give any workers to any context + just split the workers btw the contexts */ + if(ret == 0.0) + { + double rand_res[nw]; + for(w = 0; w < nw; w++) + rand_res[w] = total_nw[w]/nsched_ctxs; + int s; + for(s = 0; s < nsched_ctxs; s++) + for(w = 0; w < nw; w++) + res[s][w] = rand_res[w]; + } + + else + /* keep the first speed */ +// if(ret != 0.0) + { + vmax = 1 / ret; + } + double optimal_v = 0.0; + for(i = 0; i < nsched_ctxs; i++) + { +#ifdef STARPU_USE_CUDA + optimal_v = res[i][0] * v[i][0] + res[i][1]* v[i][1]; +#else + optimal_v = res[i][0] * v[i][0]; +#endif //STARPU_USE_CUDA + unsigned no_workers = 1; + for(w = 0; w < nw; w++) + { + if(res[i][w] != 0.0) + { + no_workers = 0; + break; + } + } + + sc_w = sc_hypervisor_get_wrapper(sched_ctxs[i]); + +/* if the hypervisor gave 0 workers to a context but the context still + * has some last flops or a ready task that does not even have any flops + * we give a worker (in shared mode) to the context in order to leave him + * finish its work = we give -1.0 value instead of 0.0 and further on in + * the distribution function we take this into account and revert the variable + * to its 0.0 value +*/ +// if(no_workers && (flops[i] != 0.0 || sc_w->nready_tasks > 0)) + if(no_workers) + { + for(w = 0; w < nw; w++) + res[i][w] = -1.0; + } + +// if(optimal_v != 0.0) + _set_optimal_v(sched_ctxs[i], optimal_v); + } + + return vmax; +#else//STARPU_HAVE_GLPK_H + return 0.0; +#endif//STARPU_HAVE_GLPK_H +} + +double sc_hypervisor_lp_get_tmax(int nworkers, int *workers) +{ + struct types_of_workers *tw = sc_hypervisor_get_types_of_workers(workers, nworkers); + int nw = tw->nw; + + int total_nw[nw]; + sc_hypervisor_group_workers_by_type(tw, total_nw); + + int nsched_ctxs = sc_hypervisor_get_nsched_ctxs(); + + double res[nsched_ctxs][nw]; + double ret = sc_hypervisor_lp_get_nworkers_per_ctx(nsched_ctxs, nw, res, total_nw, tw, NULL) * 1000.0; + free(tw); + return ret; +} + +void sc_hypervisor_lp_round_double_to_int(int ns, int nw, double res[ns][nw], int res_rounded[ns][nw]) +{ + int s, w; + double left_res[nw]; + for(w = 0; w < nw; w++) + left_res[w] = 0.0; + for(s = 0; s < ns; s++) + { + for(w = 0; w < nw; w++) + { + int x = floor(res[s][w]); + double x_double = (double)x; + double diff = res[s][w] - x_double; + + if(diff != 0.0) + { + if(diff > 0.5) + { + if(left_res[w] != 0.0) + { + if((diff + left_res[w]) > 0.5) + { + res_rounded[s][w] = x + 1; + left_res[w] = (-1.0) * (x_double + 1.0 - (res[s][w] + left_res[w])); + } + else + { + res_rounded[s][w] = x; + left_res[w] = (-1.0) * (diff + left_res[w]); + } + } + else + { + res_rounded[s][w] = x + 1; + left_res[w] = (-1.0) * (x_double + 1.0 - res[s][w]); + } + + } + else + { + if((diff + left_res[w]) > 0.5) + { + res_rounded[s][w] = x + 1; + left_res[w] = (-1.0) * (x_double + 1.0 - (res[s][w] + left_res[w])); + } + else + { + res_rounded[s][w] = x; + left_res[w] = diff; + } + } + } + else + res_rounded[s][w] = x; + } + } +} + +void _lp_find_workers_to_give_away(int nw, int ns, unsigned sched_ctx, int sched_ctx_idx, + int tmp_nw_move[nw], int tmp_workers_move[nw][STARPU_NMAXWORKERS], + int tmp_nw_add[nw], int tmp_workers_add[nw][STARPU_NMAXWORKERS], + int res_rounded[ns][nw], double res[ns][nw], struct types_of_workers *tw) +{ + int w; + double target_res = 0.0; + for(w = 0; w < nw; w++) + { + target_res += res[sched_ctx_idx][w]; + if(res[sched_ctx_idx][w] == -1.0) res[sched_ctx_idx][w] = 0.0; + } + + for(w = 0; w < nw; w++) + { + enum starpu_worker_archtype arch = sc_hypervisor_get_arch_for_index(w, tw); + + if(arch == STARPU_CPU_WORKER) + { + int nworkers_ctx = sc_hypervisor_get_nworkers_ctx(sched_ctx, arch); + if(nworkers_ctx > res_rounded[sched_ctx_idx][w]) + { + int nworkers_to_move = nworkers_ctx - res_rounded[sched_ctx_idx][w]; + int *workers_to_move = sc_hypervisor_get_idlest_workers(sched_ctx, &nworkers_to_move, arch); + int i; + if(target_res < 0.0 && nworkers_to_move > 0) + { + tmp_workers_add[w][tmp_nw_add[w]++] = workers_to_move[0]; + for(i = 1; i < nworkers_to_move; i++) + tmp_workers_move[w][tmp_nw_move[w]++] = workers_to_move[i]; + } + else + { + for(i = 0; i < nworkers_to_move; i++) + tmp_workers_move[w][tmp_nw_move[w]++] = workers_to_move[i]; + } + free(workers_to_move); + } + } + else + { + double nworkers_ctx = sc_hypervisor_get_nworkers_ctx(sched_ctx, arch) * 1.0; + if(nworkers_ctx > res[sched_ctx_idx][w]) + { + double nworkers_to_move = nworkers_ctx - res[sched_ctx_idx][w]; + int x = floor(nworkers_to_move); + double x_double = (double)x; + double diff = nworkers_to_move - x_double; + if(diff == 0.0) + { + int *workers_to_move = sc_hypervisor_get_idlest_workers(sched_ctx, &x, arch); + if(x > 0) + { + int i; + for(i = 0; i < x; i++) + tmp_workers_move[w][tmp_nw_move[w]++] = workers_to_move[i]; + + } + free(workers_to_move); + } + else + { + x+=1; + int *workers_to_move = sc_hypervisor_get_idlest_workers(sched_ctx, &x, arch); + if(x > 0) + { + int i; + for(i = 0; i < x-1; i++) + tmp_workers_move[w][tmp_nw_move[w]++] = workers_to_move[i]; + + if(diff > 0.8) + tmp_workers_move[w][tmp_nw_move[w]++] = workers_to_move[x-1]; + else + if(diff > 0.3) + tmp_workers_add[w][tmp_nw_add[w]++] = workers_to_move[x-1]; + + } + free(workers_to_move); + } + } + } + } +} + +void _lp_find_workers_to_accept(int nw, int ns, unsigned sched_ctx, int sched_ctx_idx, + int tmp_nw_move[nw], int tmp_workers_move[nw][STARPU_NMAXWORKERS], + int tmp_nw_add[nw], int tmp_workers_add[nw][STARPU_NMAXWORKERS], + int *nw_move, int workers_move[STARPU_NMAXWORKERS], + int *nw_add, int workers_add[STARPU_NMAXWORKERS], + int res_rounded[ns][nw], double res[ns][nw], struct types_of_workers *tw) +{ + int w; + int j = 0, k = 0; + for(w = 0; w < nw; w++) + { + enum starpu_worker_archtype arch = sc_hypervisor_get_arch_for_index(w, tw); + + int nw_ctx2 = sc_hypervisor_get_nworkers_ctx(sched_ctx, arch); + int nw_needed = res_rounded[sched_ctx_idx][w] - nw_ctx2; + + if(nw_needed > 0 && tmp_nw_move[w] > 0) + { + *nw_move += nw_needed >= tmp_nw_move[w] ? tmp_nw_move[w] : nw_needed; + int i = 0; + for(i = 0; i < STARPU_NMAXWORKERS; i++) + { + if(tmp_workers_move[w][i] != -1) + { + workers_move[j++] = tmp_workers_move[w][i]; + tmp_workers_move[w][i] = -1; + if(j == *nw_move) + break; + } + } + tmp_nw_move[w] -= *nw_move; + } + + + double needed = res[sched_ctx_idx][w] - (nw_ctx2 * 1.0); + int x = floor(needed); + double x_double = (double)x; + double diff = needed - x_double; + if((diff > 0.3 || needed > 0.3) && tmp_nw_add[w] > 0) + { + *nw_add = tmp_nw_add[w]; + int i = 0; + for(i = 0; i < STARPU_NMAXWORKERS; i++) + { + if(tmp_workers_add[w][i] != -1) + { + workers_add[k++] = tmp_workers_add[w][i]; + tmp_workers_add[w][i] = -1; + if(k == *nw_add) + break; + } + } + tmp_nw_add[w] -= *nw_add; + } + } +} + +void _lp_find_workers_to_remove(int nw, int tmp_nw_move[nw], int tmp_workers_move[nw][STARPU_NMAXWORKERS], + int *nw_move, int workers_move[STARPU_NMAXWORKERS]) +{ + int w; + for(w = 0; w < nw; w++) + { + if(tmp_nw_move[w] > 0) + { + *nw_move += tmp_nw_move[w]; + int i = 0, j = 0; + for(i = 0; i < STARPU_NMAXWORKERS; i++) + { + if(tmp_workers_move[w][i] != -1) + { + workers_move[j++] = tmp_workers_move[w][i]; + tmp_workers_move[w][i] = -1; + if(j == *nw_move) + break; + } + } + + } + } +} + +void sc_hypervisor_lp_redistribute_resources_in_ctxs(int ns, int nw, int res_rounded[ns][nw], double res[ns][nw], unsigned *sched_ctxs, struct types_of_workers *tw) +{ + int s, s2, w; + for(s = 0; s < ns; s++) + { + int tmp_workers_move[nw][STARPU_NMAXWORKERS]; + int tmp_nw_move[nw]; + + int tmp_workers_add[nw][STARPU_NMAXWORKERS]; + int tmp_nw_add[nw]; + + + for(w = 0; w < nw; w++) + { + tmp_nw_move[w] = 0; + tmp_nw_add[w] = 0; + int i; + for(i = 0; i < STARPU_NMAXWORKERS; i++) + { + tmp_workers_move[w][i] = -1; + tmp_workers_add[w][i] = -1; + } + } + + /* find workers that ctx s has to give away */ + _lp_find_workers_to_give_away(nw, ns, sched_ctxs[s], s, + tmp_nw_move, tmp_workers_move, + tmp_nw_add, tmp_workers_add, res_rounded, + res, tw); + for(s2 = 0; s2 < ns; s2++) + { + if(sched_ctxs[s2] != sched_ctxs[s]) + { + /* find workers that ctx s2 wants to accept from ctx s + the rest of it will probably accepted by another ctx */ + int workers_move[STARPU_NMAXWORKERS]; + int nw_move = 0; + + int workers_add[STARPU_NMAXWORKERS]; + int nw_add = 0; + + + _lp_find_workers_to_accept(nw, ns, sched_ctxs[s2], s2, + tmp_nw_move, tmp_workers_move, + tmp_nw_add, tmp_workers_add, + &nw_move, workers_move, + &nw_add, workers_add, + res_rounded, res, tw); + + if(nw_move > 0) + { + sc_hypervisor_move_workers(sched_ctxs[s], sched_ctxs[s2], workers_move, nw_move, !(_sc_hypervisor_use_lazy_resize())); + nw_move = 0; + } + + if(nw_add > 0) + { + sc_hypervisor_add_workers_to_sched_ctx(workers_add, nw_add, sched_ctxs[s2]); + nw_add = 0; + } + } + } + + /* if there are workers that weren't accepted by anyone but ctx s wants + to get rid of them just remove them from ctx s */ + int workers_move[STARPU_NMAXWORKERS]; + int nw_move = 0; + + _lp_find_workers_to_remove(nw, tmp_nw_move, tmp_workers_move, + &nw_move, workers_move); + + if(nw_move > 0) + sc_hypervisor_remove_workers_from_sched_ctx(workers_move, nw_move, sched_ctxs[s], !(_sc_hypervisor_use_lazy_resize())); + } +} +int _lp_get_unwanted_workers(int *workers_add, int nw_add, unsigned sched_ctx, int *workers_remove) +{ + int nw_remove = 0; + struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx); + int worker; + + struct starpu_sched_ctx_iterator it; + workers->init_iterator(workers, &it); + + while(workers->has_next(workers, &it)) + { + worker = workers->get_next(workers, &it); + int i; + unsigned found = 0; + for(i = 0; i < nw_add; i++) + { + if(worker == workers_add[i]) + { + found = 1; + break; + } + } + if(!found) + workers_remove[nw_remove++] = worker; + } + return nw_remove; +} + +void sc_hypervisor_lp_distribute_resources_in_ctxs(unsigned* sched_ctxs, int ns, int nw, int res_rounded[ns][nw], + double res[ns][nw], int *workers, int nworkers, struct types_of_workers *tw) +{ + int s, w; + int start[nw]; + for(w = 0; w < nw; w++) + start[w] = 0; + for(s = 0; s < ns; s++) + { + int workers_add[STARPU_NMAXWORKERS]; + int nw_add = 0; + double target_res = 0.0; + for(w = 0; w < nw; w++) + { + target_res += res[s][w]; + if(res[s][w] == -1.0) res[s][w] = 0.0; + } + + for(w = 0; w < nw; w++) + { + enum starpu_worker_archtype arch = sc_hypervisor_get_arch_for_index(w, tw); + + if(arch == STARPU_CPU_WORKER) + { + int nworkers_to_add = res_rounded[s][w]; + if(target_res < 0.0) + { + nworkers_to_add=1; + int old_start = start[w]; + if(start[w] != 0) + start[w]--; + int *workers_to_add = sc_hypervisor_get_idlest_workers_in_list(&start[w], workers, nworkers, &nworkers_to_add, arch); + start[w] = old_start; + int i; + for(i = 0; i < nworkers_to_add; i++) + { + workers_add[nw_add++] = workers_to_add[i]; + } + free(workers_to_add); + } + else + { + int *workers_to_add = sc_hypervisor_get_idlest_workers_in_list(&start[w], workers, nworkers, &nworkers_to_add, arch); + int i; + for(i = 0; i < nworkers_to_add; i++) + workers_add[nw_add++] = workers_to_add[i]; + free(workers_to_add); + } + } + else + { + double nworkers_to_add = res[s][w]; + int x = floor(nworkers_to_add); + double x_double = (double)x; + double diff = nworkers_to_add - x_double; + if(diff == 0.0) + { + int *workers_to_add = sc_hypervisor_get_idlest_workers_in_list(&start[w], workers, nworkers, &x, arch); + int i; + for(i = 0; i < x; i++) + workers_add[nw_add++] = workers_to_add[i]; + free(workers_to_add); + } + else + { + x+=1; + int *workers_to_add = sc_hypervisor_get_idlest_workers_in_list(&start[w], workers, nworkers, &x, arch); + int i; + if(diff >= 0.3) + for(i = 0; i < x; i++) + workers_add[nw_add++] = workers_to_add[i]; + else + for(i = 0; i < x-1; i++) + workers_add[nw_add++] = workers_to_add[i]; + + free(workers_to_add); + } + } + } +// sc_hypervisor_start_resize(sched_ctxs[s]); + sc_hypervisor_add_workers_to_sched_ctx(workers_add, nw_add, sched_ctxs[s]); + int workers_remove[STARPU_NMAXWORKERS]; + int nw_remove = _lp_get_unwanted_workers(workers_add, nw_add, sched_ctxs[s], workers_remove); + sc_hypervisor_remove_workers_from_sched_ctx(workers_remove, nw_remove, sched_ctxs[s], !(_sc_hypervisor_use_lazy_resize())); + } +} + +void sc_hypervisor_lp_distribute_floating_no_resources_in_ctxs(unsigned* sched_ctxs, int ns, int nw, double res[ns][nw], + int *workers, int nworkers, struct types_of_workers *tw) +{ + int s, w; + int start[nw]; + for(w = 0; w < nw; w++) + start[w] = 0; + for(s = 0; s < ns; s++) + { + int workers_add[STARPU_NMAXWORKERS]; + int nw_add = 0; + double target_res = 0.0; + for(w = 0; w < nw; w++) + { + target_res += res[s][w]; + if(res[s][w] == -1.0) res[s][w] = 0.0; + } + + for(w = 0; w < nw; w++) + { + enum starpu_worker_archtype arch = sc_hypervisor_get_arch_for_index(w, tw); + + if(arch == STARPU_CPU_WORKER) + { + int nworkers_to_add = ceil(res[s][w]); + double ceil_double = (double)nworkers_to_add; + double diff = ceil_double - res[s][w]; + + if(target_res < 0.0) + { + nworkers_to_add=1; + int old_start = start[w]; + if(start[w] != 0) + start[w]--; + int *workers_to_add = sc_hypervisor_get_idlest_workers_in_list(&start[w], workers, nworkers, &nworkers_to_add, arch); + start[w] = old_start; + int i; + for(i = 0; i < nworkers_to_add; i++) + { + workers_add[nw_add++] = workers_to_add[i]; + } + free(workers_to_add); + } + else + { + int *workers_to_add = sc_hypervisor_get_idlest_workers_in_list(&start[w], workers, nworkers, &nworkers_to_add, arch); + int i; + for(i = 0; i < nworkers_to_add; i++) + workers_add[nw_add++] = workers_to_add[i]; + free(workers_to_add); + } + if(diff != 0.0) + start[w]--; + } + else + { + double nworkers_to_add = res[s][w]; + int x = floor(nworkers_to_add); + double x_double = (double)x; + double diff = nworkers_to_add - x_double; + if(diff == 0.0) + { + int *workers_to_add = sc_hypervisor_get_idlest_workers_in_list(&start[w], workers, nworkers, &x, arch); + int i; + for(i = 0; i < x; i++) + workers_add[nw_add++] = workers_to_add[i]; + free(workers_to_add); + } + else + { + x+=1; + int *workers_to_add = sc_hypervisor_get_idlest_workers_in_list(&start[w], workers, nworkers, &x, arch); + int i; + if(diff >= 0.3) + for(i = 0; i < x; i++) + workers_add[nw_add++] = workers_to_add[i]; + else + for(i = 0; i < x-1; i++) + workers_add[nw_add++] = workers_to_add[i]; + + free(workers_to_add); + } + } + } +// sc_hypervisor_start_resize(sched_ctxs[s]); + sc_hypervisor_add_workers_to_sched_ctx(workers_add, nw_add, sched_ctxs[s]); + int workers_remove[STARPU_NMAXWORKERS]; + int nw_remove = _lp_get_unwanted_workers(workers_add, nw_add, sched_ctxs[s], workers_remove); + sc_hypervisor_remove_workers_from_sched_ctx(workers_remove, nw_remove, sched_ctxs[s], !(_sc_hypervisor_use_lazy_resize())); + } +} + +/* nw = all the workers (either in a list or on all machine) */ +void sc_hypervisor_lp_place_resources_in_ctx(int ns, int nw, double w_in_s[ns][nw], unsigned *sched_ctxs_input, int *workers_input, unsigned do_size, struct types_of_workers *tw) +{ + int w, s; + int ntypes_of_workers = tw->nw; + double nworkers[ns][ntypes_of_workers]; + int nworkers_rounded[ns][ntypes_of_workers]; + for(s = 0; s < ns; s++) + { + for(w = 0; w < ntypes_of_workers; w++) + { + nworkers[s][w] = 0.0; + nworkers_rounded[s][w] = 0; + } + + } + + for(s = 0; s < ns; s++) + { + for(w = 0; w < nw; w++) + { + enum starpu_worker_archtype arch = starpu_worker_get_type(w); + int idx = sc_hypervisor_get_index_for_arch(arch, tw); + nworkers[s][idx] += w_in_s[s][w]; + + if(arch == STARPU_CUDA_WORKER) + { + if(w_in_s[s][w] >= 0.3) + nworkers_rounded[s][idx]++; + } + else + { + if(w_in_s[s][w] > 0.5) + nworkers_rounded[s][idx]++; + } + } + } + + if(!do_size) + sc_hypervisor_lp_redistribute_resources_in_ctxs(ns, ntypes_of_workers, nworkers_rounded, nworkers, sched_ctxs_input, tw); + else + { + unsigned *current_sched_ctxs = sched_ctxs_input == NULL ? sc_hypervisor_get_sched_ctxs() : sched_ctxs_input; + + unsigned has_workers = 0; + for(s = 0; s < ns; s++) + { + int nworkers_ctx = sc_hypervisor_get_nworkers_ctx(current_sched_ctxs[s], + STARPU_ANY_WORKER); + if(nworkers_ctx != 0) + { + has_workers = 1; + break; + } + } + if(has_workers) + sc_hypervisor_lp_redistribute_resources_in_ctxs(ns, ntypes_of_workers, nworkers_rounded, nworkers, current_sched_ctxs, tw); + else + sc_hypervisor_lp_distribute_resources_in_ctxs(current_sched_ctxs, ns, ntypes_of_workers, nworkers_rounded, nworkers, workers_input, nw, tw); + } + return; +} + +void sc_hypervisor_lp_share_remaining_resources(int ns, unsigned *sched_ctxs, int nworkers, int *workers) +{ + int s, w, worker, nw = 0; + int remaining_workers[nworkers]; + for(w = 0; w < nworkers; w++) + { + worker = workers == NULL ? w : workers[w]; + unsigned found = 0; + for(s = 0; s < ns; s++) + { + if(starpu_sched_ctx_contains_worker(worker, sched_ctxs[s])) + { + found = 1; + break; + } + } + if(!found) + { + remaining_workers[nw++] = worker; + } + } + + if(nw > 0) + { + for(s = 0; s < ns; s++) + { + for(w = 0; w < nw; w++) + _sc_hypervisor_allow_compute_idle(sched_ctxs[s], remaining_workers[w], 0); + sc_hypervisor_add_workers_to_sched_ctx(remaining_workers, nw, sched_ctxs[s]); + } + } +} + +double sc_hypervisor_lp_find_tmax(double t1, double t2) +{ + return t1 + ((t2 - t1)/2); +} diff --git a/sc_hypervisor/src/policies_utils/policy_tools.c b/sc_hypervisor/src/policies_utils/policy_tools.c new file mode 100644 index 0000000..572b092 --- /dev/null +++ b/sc_hypervisor/src/policies_utils/policy_tools.c @@ -0,0 +1,620 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Thibaut Lambert + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "sc_hypervisor_policy.h" +#include "sc_hypervisor_intern.h" +#include "sc_hypervisor_lp.h" + +static int _compute_priority(unsigned sched_ctx) +{ + struct sc_hypervisor_policy_config *config = sc_hypervisor_get_config(sched_ctx); + + int total_priority = 0; + + struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx); + int worker; + + struct starpu_sched_ctx_iterator it; + workers->init_iterator(workers, &it); + + while(workers->has_next(workers, &it)) + { + worker = workers->get_next(workers, &it); + total_priority += config->priority[worker]; + } + + return total_priority; +} + +/* find the context with the lowest priority */ +unsigned sc_hypervisor_find_lowest_prio_sched_ctx(unsigned req_sched_ctx, int nworkers_to_move) +{ + int i; + int highest_priority = -1; + int current_priority = 0; + unsigned sched_ctx = STARPU_NMAX_SCHED_CTXS; + unsigned *sched_ctxs = sc_hypervisor_get_sched_ctxs(); + int nsched_ctxs = sc_hypervisor_get_nsched_ctxs(); + + + struct sc_hypervisor_policy_config *config = NULL; + + for(i = 0; i < nsched_ctxs; i++) + { + if(sched_ctxs[i] != STARPU_NMAX_SCHED_CTXS && sched_ctxs[i] != req_sched_ctx) + { + int nworkers = (int)starpu_sched_ctx_get_nworkers(sched_ctxs[i]); + config = sc_hypervisor_get_config(sched_ctxs[i]); + if((nworkers + nworkers_to_move) <= config->max_nworkers) + { + current_priority = _compute_priority(sched_ctxs[i]); + if (highest_priority < current_priority) + { + highest_priority = current_priority; + sched_ctx = sched_ctxs[i]; + } + } + } + } + + return sched_ctx; +} + +int* sc_hypervisor_get_idlest_workers_in_list(int *start, int *workers, int nall_workers, int *nworkers, enum starpu_worker_archtype arch) +{ + int *curr_workers = (int*)malloc((*nworkers)*sizeof(int)); + + int w, worker; + int nfound_workers = 0; + for(w = 0; w < nall_workers; w++) + { + if(nfound_workers >= *nworkers) + break; + + worker = workers == NULL ? w : workers[w]; + enum starpu_worker_archtype curr_arch = starpu_worker_get_type(worker); + if(arch == STARPU_ANY_WORKER || curr_arch == arch) + { + if(w >= *start) + { + curr_workers[nfound_workers++] = worker; + *start = w+1; + } + } + } + if(nfound_workers < *nworkers) + *nworkers = nfound_workers; + return curr_workers; +} + +/* get first nworkers with the highest idle time in the context */ +int* sc_hypervisor_get_idlest_workers(unsigned sched_ctx, int *nworkers, enum starpu_worker_archtype arch) +{ + struct sc_hypervisor_wrapper* sc_w = sc_hypervisor_get_wrapper(sched_ctx); + struct sc_hypervisor_policy_config *config = sc_hypervisor_get_config(sched_ctx); + + int *curr_workers = (int*)malloc((*nworkers) * sizeof(int)); + int i; + for(i = 0; i < *nworkers; i++) + curr_workers[i] = -1; + + struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx); + int index; + int worker; + int considered = 0; + + struct starpu_sched_ctx_iterator it; + workers->init_iterator(workers, &it); + + for(index = 0; index < *nworkers; index++) + { + while(workers->has_next(workers, &it)) + { + considered = 0; + worker = workers->get_next(workers, &it); + enum starpu_worker_archtype curr_arch = starpu_worker_get_type(worker); + if(arch == STARPU_ANY_WORKER || curr_arch == arch) + { + + if(!config->fixed_workers[worker]) + { + for(i = 0; i < index; i++) + { + if(curr_workers[i] == worker) + { + considered = 1; + break; + } + } + + if(!considered) + { + /* the first iteration*/ + if(curr_workers[index] < 0) + curr_workers[index] = worker; + /* small priority worker is the first to leave the ctx*/ + else if(config->priority[worker] < + config->priority[curr_workers[index]]) + curr_workers[index] = worker; + /* if we don't consider priorities check for the workers + with the biggest idle time */ + else if(config->priority[worker] == + config->priority[curr_workers[index]]) + { + double worker_idle_time = sc_w->current_idle_time[worker]; + double curr_worker_idle_time = sc_w->current_idle_time[curr_workers[index]]; + if(worker_idle_time > curr_worker_idle_time) + curr_workers[index] = worker; + } + } + } + } + } + + if(curr_workers[index] < 0) + { + *nworkers = index; + break; + } + } + + return curr_workers; +} + +/* get the number of workers in the context that are allowed to be moved (that are not fixed) */ +int sc_hypervisor_get_movable_nworkers(struct sc_hypervisor_policy_config *config, unsigned sched_ctx, enum starpu_worker_archtype arch) +{ + struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx); + + int potential_workers = 0; + int worker; + + struct starpu_sched_ctx_iterator it; + workers->init_iterator(workers, &it); + while(workers->has_next(workers, &it)) + { + worker = workers->get_next(workers, &it); + enum starpu_worker_archtype curr_arch = starpu_worker_get_type(worker); + if(arch == STARPU_ANY_WORKER || curr_arch == arch) + { + if(!config->fixed_workers[worker]) + potential_workers++; + } + } + + return potential_workers; +} + +/* compute the number of workers that should be moved depending: + * - on the min/max number of workers in a context imposed by the user, + * - on the resource granularity imposed by the user for the resizing process*/ +int sc_hypervisor_compute_nworkers_to_move(unsigned req_sched_ctx) +{ + struct sc_hypervisor_policy_config *config = sc_hypervisor_get_config(req_sched_ctx); + int nworkers = (int)starpu_sched_ctx_get_nworkers(req_sched_ctx); + int nworkers_to_move = 0; + + int potential_moving_workers = (int)sc_hypervisor_get_movable_nworkers(config, req_sched_ctx, STARPU_ANY_WORKER); + if(potential_moving_workers > 0) + { + if(potential_moving_workers <= config->min_nworkers) + /* if we have to give more than min better give it all */ + /* => empty ctx will block until having the required workers */ + nworkers_to_move = potential_moving_workers; + else if(potential_moving_workers > config->max_nworkers) + { + if((potential_moving_workers - config->granularity) > config->max_nworkers) +// nworkers_to_move = config->granularity; + nworkers_to_move = potential_moving_workers; + else + nworkers_to_move = potential_moving_workers - config->max_nworkers; + + } + else if(potential_moving_workers > config->granularity) + { + if((nworkers - config->granularity) > config->min_nworkers) + nworkers_to_move = config->granularity; + else + nworkers_to_move = potential_moving_workers - config->min_nworkers; + } + else + { + int nfixed_workers = nworkers - potential_moving_workers; + if(nfixed_workers >= config->min_nworkers) + nworkers_to_move = potential_moving_workers; + else + nworkers_to_move = potential_moving_workers - (config->min_nworkers - nfixed_workers); + } + + if((nworkers - nworkers_to_move) > config->max_nworkers) + nworkers_to_move = nworkers - config->max_nworkers; + } + return nworkers_to_move; +} + +unsigned sc_hypervisor_policy_resize(unsigned sender_sched_ctx, unsigned receiver_sched_ctx, unsigned force_resize, unsigned now) +{ + int ret = 1; + if(force_resize) + STARPU_PTHREAD_MUTEX_LOCK(&act_hypervisor_mutex); + else + ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex); + if(ret != EBUSY) + { + int nworkers_to_move = sc_hypervisor_compute_nworkers_to_move(sender_sched_ctx); + if(nworkers_to_move > 0) + { + unsigned poor_sched_ctx = STARPU_NMAX_SCHED_CTXS; + if(receiver_sched_ctx == STARPU_NMAX_SCHED_CTXS) + { + poor_sched_ctx = sc_hypervisor_find_lowest_prio_sched_ctx(sender_sched_ctx, (unsigned)nworkers_to_move); + } + else + { + poor_sched_ctx = receiver_sched_ctx; + struct sc_hypervisor_policy_config *config = sc_hypervisor_get_config(poor_sched_ctx); + int nworkers = (int)starpu_sched_ctx_get_nworkers(poor_sched_ctx); + int nshared_workers = (int)starpu_sched_ctx_get_nshared_workers(sender_sched_ctx, poor_sched_ctx); + if((nworkers+nworkers_to_move-nshared_workers) > config->max_nworkers) + nworkers_to_move = nworkers > config->max_nworkers ? 0 : (config->max_nworkers - nworkers+nshared_workers); + if(nworkers_to_move == 0) poor_sched_ctx = STARPU_NMAX_SCHED_CTXS; + } + if(poor_sched_ctx != STARPU_NMAX_SCHED_CTXS) + { + int *workers_to_move = sc_hypervisor_get_idlest_workers(sender_sched_ctx, &nworkers_to_move, STARPU_ANY_WORKER); + sc_hypervisor_move_workers(sender_sched_ctx, poor_sched_ctx, workers_to_move, nworkers_to_move, now); + + struct sc_hypervisor_policy_config *new_config = sc_hypervisor_get_config(poor_sched_ctx); + int i; + for(i = 0; i < nworkers_to_move; i++) + new_config->max_idle[workers_to_move[i]] = new_config->max_idle[workers_to_move[i]] !=MAX_IDLE_TIME ? new_config->max_idle[workers_to_move[i]] : new_config->new_workers_max_idle; + + free(workers_to_move); + } + } + STARPU_PTHREAD_MUTEX_UNLOCK(&act_hypervisor_mutex); + return 1; + } + return 0; + +} + + +unsigned sc_hypervisor_policy_resize_to_unknown_receiver(unsigned sender_sched_ctx, unsigned now) +{ + return sc_hypervisor_policy_resize(sender_sched_ctx, STARPU_NMAX_SCHED_CTXS, 0, now); +} + +double sc_hypervisor_get_slowest_ctx_exec_time(void) +{ + unsigned *sched_ctxs = sc_hypervisor_get_sched_ctxs(); + int nsched_ctxs = sc_hypervisor_get_nsched_ctxs(); + +/* double curr_time = starpu_timing_now(); */ + double slowest_time = 0.0; + + int s; + struct sc_hypervisor_wrapper* sc_w; + for(s = 0; s < nsched_ctxs; s++) + { + sc_w = sc_hypervisor_get_wrapper(sched_ctxs[s]); + +// double elapsed_time = (curr_time - sc_w->start_time)/1000000; + struct sc_hypervisor_policy_config *config = sc_hypervisor_get_config(sc_w->sched_ctx); + double elapsed_time = (config->ispeed_ctx_sample/1000000000.0)/sc_hypervisor_get_ctx_speed(sc_w); + if(elapsed_time > slowest_time) + slowest_time = elapsed_time; + + } + return slowest_time; +} + +double sc_hypervisor_get_fastest_ctx_exec_time(void) +{ + unsigned *sched_ctxs = sc_hypervisor_get_sched_ctxs(); + int nsched_ctxs = sc_hypervisor_get_nsched_ctxs(); + + double curr_time = starpu_timing_now(); + double fastest_time = curr_time; + + int s; + struct sc_hypervisor_wrapper* sc_w; + for(s = 0; s < nsched_ctxs; s++) + { + sc_w = sc_hypervisor_get_wrapper(sched_ctxs[s]); + + struct sc_hypervisor_policy_config *config = sc_hypervisor_get_config(sc_w->sched_ctx); + double elapsed_time = (config->ispeed_ctx_sample/1000000000.0)/sc_hypervisor_get_ctx_speed(sc_w); + + if(elapsed_time < fastest_time) + fastest_time = elapsed_time; + + } + + return fastest_time; +} + +void sc_hypervisor_group_workers_by_type(struct types_of_workers *tw, int *total_nw) +{ + unsigned w; + for(w = 0; w < tw->nw; w++) + total_nw[w] = 0; + + if(tw->ncpus != 0) + { + total_nw[0] = tw->ncpus; + if(tw->ncuda != 0) + total_nw[1] = tw->ncuda; + } + else + { + if(tw->ncuda != 0) + total_nw[0] =tw->ncuda; + } + +} + +enum starpu_worker_archtype sc_hypervisor_get_arch_for_index(unsigned w, struct types_of_workers *tw) +{ + if(w == 0) + { + if(tw->ncpus != 0) + return STARPU_CPU_WORKER; + else + return STARPU_CUDA_WORKER; + } + else + if(tw->ncuda != 0) + return STARPU_CUDA_WORKER; + + return STARPU_CPU_WORKER; +} + +unsigned sc_hypervisor_get_index_for_arch(enum starpu_worker_archtype arch, struct types_of_workers *tw) +{ + + if(arch == STARPU_CPU_WORKER) + { + if(tw->ncpus != 0) + return 0; + } + else + { + if(arch == STARPU_CUDA_WORKER) + { + if(tw->ncpus != 0) + return 1; + else + return 0; + } + } + return 0; +} + +void sc_hypervisor_get_tasks_times(int nw, int nt, double times[nw][nt], int *workers, unsigned size_ctxs, struct sc_hypervisor_policy_task_pool *task_pools) +{ + struct sc_hypervisor_policy_task_pool *tp; + int w, t; + for(w = 0; w < nw; w++) + for(t = 0; t < nt; t++) + times[w][t] = NAN; + for (w = 0; w < nw; w++) + { + for (t = 0, tp = task_pools; tp; t++, tp = tp->next) + { + int worker = workers == NULL ? w : workers[w]; + struct starpu_perfmodel_arch* arch = starpu_worker_get_perf_archtype(worker, STARPU_NMAX_SCHED_CTXS); + double length = starpu_perfmodel_history_based_expected_perf(tp->cl->model, arch, tp->footprint); + + if (isnan(length)) + times[w][t] = NAN; + else + { + times[w][t] = (length / 1000.); + double transfer_time = 0.0; + unsigned worker_in_ctx = starpu_sched_ctx_contains_worker(worker, tp->sched_ctx_id); + enum starpu_worker_archtype warch = starpu_worker_get_type(worker); + if(!worker_in_ctx && !size_ctxs) + { + if(warch == STARPU_CUDA_WORKER) + { + double transfer_speed = starpu_transfer_bandwidth(STARPU_MAIN_RAM, starpu_worker_get_memory_node(worker)); + if(transfer_speed > 0.0) + transfer_time += (tp->data_size / transfer_speed) / 1000. ; + + double latency = starpu_transfer_latency(STARPU_MAIN_RAM, starpu_worker_get_memory_node(worker)); + transfer_time += latency/1000.; +// transfer_time *=4; + } + else if (warch == STARPU_CPU_WORKER) + { + if(!starpu_sched_ctx_contains_type_of_worker(warch, tp->sched_ctx_id)) + { + double transfer_speed = starpu_transfer_bandwidth(starpu_worker_get_memory_node(worker), STARPU_MAIN_RAM); + if(transfer_speed > 0.0) + transfer_time += (tp->data_size / transfer_speed) / 1000. ; + + double latency = starpu_transfer_latency(starpu_worker_get_memory_node(worker), STARPU_MAIN_RAM); + transfer_time += latency / 1000.; + } + } + } + +// printf("%d/%d %s x %d time = %lf transfer_time = %lf\n", w, tp->sched_ctx_id, tp->cl->model->symbol, tp->n, times[w][t], transfer_time); + times[w][t] += transfer_time; + } +// printf("sc%d w%d task %s nt %d times %lf s\n", tp->sched_ctx_id, w, tp->cl->model->symbol, tp->n, times[w][t]); + } + } +} + +unsigned sc_hypervisor_check_idle(unsigned sched_ctx, int worker) +{ + struct sc_hypervisor_wrapper* sc_w = sc_hypervisor_get_wrapper(sched_ctx); + struct sc_hypervisor_policy_config *config = sc_w->config; + if(config != NULL) + { + if(sc_w->idle_time[worker] > config->max_idle[worker]) + { +// printf("w%d/ctx%d: current idle %lf max_idle %lf\n", worker, sched_ctx, sc_w->idle_time[worker], config->max_idle[worker]); + return 1; + } + } + + return 0; +} + +/* check if there is a big speed gap between the contexts */ +unsigned sc_hypervisor_check_speed_gap_btw_ctxs(unsigned *sched_ctxs_in, int ns_in, int *workers_in, int nworkers_in) +{ + unsigned *sched_ctxs = sched_ctxs_in == NULL ? sc_hypervisor_get_sched_ctxs() : sched_ctxs_in; + int ns = ns_in == -1 ? sc_hypervisor_get_nsched_ctxs() : ns_in; + int *workers = workers_in; + int nworkers = nworkers_in == -1 ? (int)starpu_worker_get_count() : nworkers_in; + int i = 0, j = 0; + struct sc_hypervisor_wrapper* sc_w; + struct sc_hypervisor_wrapper* other_sc_w; + + + double optimal_v[ns]; + unsigned has_opt_v = 1; + for(i = 0; i < ns; i++) + { + optimal_v[i] = _get_optimal_v(sched_ctxs[i]); + if(optimal_v[i] == 0.0) + { + has_opt_v = 0; + break; + } + } + +/*if an optimal speed has not been computed yet do it now */ + if(!has_opt_v) + { + struct types_of_workers *tw = sc_hypervisor_get_types_of_workers(workers, nworkers); + int nw = tw->nw; + double nworkers_per_ctx[ns][nw]; + int total_nw[nw]; + sc_hypervisor_group_workers_by_type(tw, total_nw); + +// double vmax = sc_hypervisor_lp_get_nworkers_per_ctx(ns, nw, nworkers_per_ctx, total_nw, tw, sched_ctxs); + + +// if(vmax != 0.0) + { + for(i = 0; i < ns; i++) + { + sc_w = sc_hypervisor_get_wrapper(sched_ctxs[i]); + double v[nw]; + optimal_v[i] = 0.0; + int w; + for(w = 0; w < nw; w++) + { + v[w] = sc_hypervisor_get_speed(sc_w, sc_hypervisor_get_arch_for_index(w, tw)); + optimal_v[i] += nworkers_per_ctx[i][w] == -1.0 ? 0.0 : nworkers_per_ctx[i][w]*v[w]; + } + _set_optimal_v(sched_ctxs[i], optimal_v[i]); + } + has_opt_v = 1; + } + free(tw); + } + + /* if we have an optimal speed for each type of worker compare the monitored one with the + * theoretical one */ + if(has_opt_v) + { + for(i = 0; i < ns; i++) + { + sc_w = sc_hypervisor_get_wrapper(sched_ctxs[i]); + + double ctx_v = sc_hypervisor_get_ctx_speed(sc_w); + if(ctx_v == -1.0) + return 0; + } + + for(i = 0; i < ns; i++) + { + sc_w = sc_hypervisor_get_wrapper(sched_ctxs[i]); + + double ctx_v = sc_hypervisor_get_ctx_speed(sc_w); + ctx_v = ctx_v < 0.01 ? 0.0 : ctx_v; + double max_vel = _get_max_speed_gap(); + if(ctx_v != -1.0 && ((ctx_v < (1-max_vel)*optimal_v[i]) || ctx_v > (1+max_vel)*optimal_v[i])) + { + return 1; + } + } + } + else /* if we have not been able to compute a theoretical speed consider the env variable + SC_MAX_SPEED_GAP and compare the speed of the contexts, whenever the difference + btw them is greater than the max value the function returns true */ + { + for(i = 0; i < ns; i++) + { + sc_w = sc_hypervisor_get_wrapper(sched_ctxs[i]); + double ctx_v = sc_hypervisor_get_ctx_speed(sc_w); + if(ctx_v != -1.0) + { + for(j = 0; j < ns; j++) + { + if(sched_ctxs[i] != sched_ctxs[j]) + { + unsigned snworkers = starpu_sched_ctx_get_nworkers(sched_ctxs[j]); + if(snworkers == 0) + return 1; + + other_sc_w = sc_hypervisor_get_wrapper(sched_ctxs[j]); + double other_ctx_v = sc_hypervisor_get_ctx_speed(other_sc_w); + if(other_ctx_v != -1.0) + { + double gap = ctx_v < other_ctx_v ? other_ctx_v / ctx_v : ctx_v / other_ctx_v; + double max_vel = _get_max_speed_gap(); + if(gap > max_vel) + return 1; + } + } + } + } + + } + } + return 0; +} + +unsigned sc_hypervisor_check_speed_gap_btw_ctxs_on_level(int level, int *workers_in, int nworkers_in, unsigned father_sched_ctx_id, unsigned **sched_ctxs, int *nsched_ctxs) +{ + sc_hypervisor_get_ctxs_on_level(sched_ctxs, nsched_ctxs, level, father_sched_ctx_id); + + if(*nsched_ctxs > 0) + return sc_hypervisor_check_speed_gap_btw_ctxs(*sched_ctxs, *nsched_ctxs, workers_in, nworkers_in); + return 0; +} + +unsigned sc_hypervisor_criteria_fulfilled(unsigned sched_ctx, int worker) +{ + unsigned criteria = sc_hypervisor_get_resize_criteria(); + if(criteria != SC_NOTHING) + { + if(criteria == SC_IDLE) + return sc_hypervisor_check_idle(sched_ctx, worker); + else + return sc_hypervisor_check_speed_gap_btw_ctxs(NULL, -1, NULL, -1); + } + else + return 0; +} diff --git a/sc_hypervisor/src/policies_utils/speed.c b/sc_hypervisor/src/policies_utils/speed.c new file mode 100644 index 0000000..fd332bf --- /dev/null +++ b/sc_hypervisor/src/policies_utils/speed.c @@ -0,0 +1,321 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "sc_hypervisor_policy.h" +#include "sc_hypervisor_intern.h" +#include + + +double sc_hypervisor_get_ctx_speed(struct sc_hypervisor_wrapper* sc_w) +{ + struct sc_hypervisor_policy_config *config = sc_hypervisor_get_config(sc_w->sched_ctx); + double elapsed_flops = sc_hypervisor_get_elapsed_flops_per_sched_ctx(sc_w); + double sample = config->ispeed_ctx_sample; + + + double total_elapsed_flops = sc_hypervisor_get_total_elapsed_flops_per_sched_ctx(sc_w); + double total_flops = sc_w->total_flops; + + char *start_sample_prc_char = getenv("SC_HYPERVISOR_START_RESIZE"); + double start_sample_prc = start_sample_prc_char ? atof(start_sample_prc_char) : 0.0; + double start_sample = start_sample_prc > 0.0 ? (start_sample_prc / 100) * total_flops : sample; + double redim_sample = elapsed_flops == total_elapsed_flops ? (start_sample > 0.0 ? start_sample : sample) : sample; + + double curr_time = starpu_timing_now(); + double elapsed_time = (curr_time - sc_w->start_time) / 1000000.0; /* in seconds */ + + unsigned can_compute_speed = 0; + char *speed_sample_criteria = getenv("SC_HYPERVISOR_SAMPLE_CRITERIA"); + if(speed_sample_criteria && (strcmp(speed_sample_criteria, "time") == 0)) + can_compute_speed = elapsed_time > config->time_sample; + else + can_compute_speed = elapsed_flops >= redim_sample; + + if(can_compute_speed) + { + return (elapsed_flops/1000000000.0)/elapsed_time;/* in Gflops/s */ + } + return -1.0; +} + +double sc_hypervisor_get_speed_per_worker(struct sc_hypervisor_wrapper *sc_w, unsigned worker) +{ + if(!starpu_sched_ctx_contains_worker(worker, sc_w->sched_ctx)) + return -1.0; + + double elapsed_flops = sc_w->elapsed_flops[worker] / 1000000000.0; /*in gflops */ + + struct sc_hypervisor_policy_config *config = sc_hypervisor_get_config(sc_w->sched_ctx); + double sample = config->ispeed_w_sample[worker] / 1000000000.0; /*in gflops */ + + double ctx_elapsed_flops = sc_hypervisor_get_elapsed_flops_per_sched_ctx(sc_w); + double ctx_sample = config->ispeed_ctx_sample; + if(ctx_elapsed_flops > ctx_sample && elapsed_flops == 0.0) + return 0.00000000000001; + + if(elapsed_flops > sample) + { + double curr_time = starpu_timing_now(); + double elapsed_time = (curr_time - sc_w->start_time) / 1000000.0; /* in seconds */ + elapsed_time -= sc_w->idle_time[worker]; + +/* size_t elapsed_data_used = sc_w->elapsed_data[worker]; */ +/* enum starpu_worker_archtype arch = starpu_worker_get_type(worker); */ +/* if(arch == STARPU_CUDA_WORKER) */ +/* { */ +/* /\* unsigned worker_in_ctx = starpu_sched_ctx_contains_worker(worker, sc_w->sched_ctx); *\/ */ +/* /\* if(!worker_in_ctx) *\/ */ +/* /\* { *\/ */ + +/* /\* double transfer_speed = starpu_transfer_bandwidth(STARPU_MAIN_RAM, starpu_worker_get_memory_node(worker)); *\/ */ +/* /\* elapsed_time += (elapsed_data_used / transfer_speed) / 1000000 ; *\/ */ +/* /\* } *\/ */ +/* double latency = starpu_transfer_latency(STARPU_MAIN_RAM, starpu_worker_get_memory_node(worker)); */ +/* // printf("%d/%d: latency %lf elapsed_time before %lf ntasks %d\n", worker, sc_w->sched_ctx, latency, elapsed_time, elapsed_tasks); */ +/* elapsed_time += (elapsed_tasks * latency)/1000000; */ +/* // printf("elapsed time after %lf \n", elapsed_time); */ +/* } */ + + double vel = (elapsed_flops/elapsed_time);/* in Gflops/s */ + return vel; + } + + return -1.0; +} + +/* compute an average value of the cpu/cuda speed */ +double sc_hypervisor_get_speed_per_worker_type(struct sc_hypervisor_wrapper* sc_w, enum starpu_worker_archtype arch) +{ + struct sc_hypervisor_policy_config *config = sc_hypervisor_get_config(sc_w->sched_ctx); + + double ctx_elapsed_flops = sc_hypervisor_get_elapsed_flops_per_sched_ctx(sc_w); + double ctx_sample = config->ispeed_ctx_sample; + + double curr_time = starpu_timing_now(); + double elapsed_time = (curr_time - sc_w->start_time) / 1000000.0; /* in seconds */ + + unsigned can_compute_speed = 0; + char *speed_sample_criteria = getenv("SC_HYPERVISOR_SAMPLE_CRITERIA"); + if(speed_sample_criteria && (strcmp(speed_sample_criteria, "time") == 0)) + can_compute_speed = elapsed_time > config->time_sample; + else + can_compute_speed = ctx_elapsed_flops > ctx_sample; + + if(can_compute_speed) + { + if(ctx_elapsed_flops == 0.0) return -1.0; + + struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sc_w->sched_ctx); + int worker; + + struct starpu_sched_ctx_iterator it; + workers->init_iterator(workers, &it); + + double speed = 0.0; + unsigned nworkers = 0; + double all_workers_flops = 0.0; + double max_workers_idle_time = 0.0; + while(workers->has_next(workers, &it)) + { + worker = workers->get_next(workers, &it); + enum starpu_worker_archtype req_arch = starpu_worker_get_type(worker); + if(arch == req_arch && sc_w->compute_idle[worker]) + { + if(sc_w->exec_start_time[worker] != 0.0) + { + double current_exec_time = 0.0; + if(sc_w->exec_start_time[worker] < sc_w->start_time) + current_exec_time = (curr_time - sc_w->start_time) / 1000000.0; /* in seconds */ + else + current_exec_time = (curr_time - sc_w->exec_start_time[worker]) / 1000000.0; /* in seconds */ + double suppl_flops = current_exec_time * sc_hypervisor_get_ref_speed_per_worker_type(sc_w, req_arch); + all_workers_flops += suppl_flops; + } + + all_workers_flops += sc_w->elapsed_flops[worker] / 1000000000.0; /*in gflops */ + if(max_workers_idle_time < sc_w->idle_time[worker]) + max_workers_idle_time = sc_w->idle_time[worker]; /* in seconds */ + nworkers++; + } + } + + if(nworkers != 0 && all_workers_flops > 0.0001) + { +// elapsed_time -= max_workers_idle_time; + speed = (all_workers_flops / elapsed_time) / nworkers; + } + else + speed = -1.0; + + if(speed != -1.0) + { + + /* if ref_speed started being corrupted bc of the old bad distribution + register only the last frame otherwise make the average with the speed + behavior of the application until now */ + if(arch == STARPU_CUDA_WORKER) + sc_w->ref_speed[0] = (sc_w->ref_speed[0] > 0.1) ? ((sc_w->ref_speed[0] + speed) / 2.0) : speed; + else + sc_w->ref_speed[1] = (sc_w->ref_speed[1] > 0.1) ? ((sc_w->ref_speed[1] + speed) / 2.0) : speed; + } + return speed; + } + + return -1.0; +} + +/* compute an average value of the cpu/cuda old speed */ +double sc_hypervisor_get_ref_speed_per_worker_type(struct sc_hypervisor_wrapper* sc_w, enum starpu_worker_archtype arch) +{ + if(arch == STARPU_CUDA_WORKER && sc_w->ref_speed[0] > 0.0) + return sc_w->ref_speed[0]; + else + if(arch == STARPU_CPU_WORKER && sc_w->ref_speed[1] > 0.0) + return sc_w->ref_speed[1]; + + return -1.0; +} + +/* returns the speed necessary for the linear programs (either the monitored one either a default value) */ +double sc_hypervisor_get_speed(struct sc_hypervisor_wrapper *sc_w, enum starpu_worker_archtype arch) +{ + /* monitored speed in the last frame */ + double speed = sc_hypervisor_get_speed_per_worker_type(sc_w, arch); + if(speed == -1.0) + { + /* avg value of the monitored speed over the entier current execution */ + speed = sc_hypervisor_get_ref_speed_per_worker_type(sc_w, arch); + } + if(speed == -1.0) + { + /* a default value */ + speed = arch == STARPU_CPU_WORKER ? SC_HYPERVISOR_DEFAULT_CPU_SPEED : SC_HYPERVISOR_DEFAULT_CUDA_SPEED; + } + + return speed; +} + +double sc_hypervisor_get_avg_speed(enum starpu_worker_archtype arch) +{ + double total_executed_flops = 0.0; + double total_estimated_flops = 0.0; + struct sc_hypervisor_wrapper *sc_w; + double max_real_start_time = 0.0; + int s; + unsigned nworkers = starpu_worker_get_count_by_type(arch); + + unsigned *sched_ctxs; + int nsched_ctxs; + sc_hypervisor_get_ctxs_on_level(&sched_ctxs, &nsched_ctxs, 0, STARPU_NMAX_SCHED_CTXS); + + for(s = 0; s < nsched_ctxs; s++) + { + sc_w = sc_hypervisor_get_wrapper(sched_ctxs[s]); + struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctxs[s]); + int worker; + + struct starpu_sched_ctx_iterator it; + workers->init_iterator(workers, &it); + + while(workers->has_next(workers, &it)) + { + worker = workers->get_next(workers, &it); + enum starpu_worker_archtype req_arch = starpu_worker_get_type(worker); + if(arch == req_arch) + { + total_executed_flops += sc_w->total_elapsed_flops[worker] / 1000000000.0; /*in gflops */; + } + } + + total_estimated_flops += sc_w->total_flops / 1000000000.0; /*in gflops */ + + if(max_real_start_time < sc_w->real_start_time) + max_real_start_time = sc_w->real_start_time; + } + free(sched_ctxs); + + double speed = -1.0; +#ifdef STARPU_SC_HYPERVISOR_DEBUG + printf("total_exec_flops %lf total_estimated_flops %lf max_real_start_time %lf nworkers %u \n", total_executed_flops, total_estimated_flops, max_real_start_time, nworkers); +#endif + if(total_executed_flops > 0.5*total_estimated_flops) + { + double curr_time = starpu_timing_now(); + double time = (curr_time - max_real_start_time) / 1000000.0; /* in seconds */ +#ifdef STARPU_SC_HYPERVISOR_DEBUG + printf("time = %lf\n", time); +#endif + speed = (total_executed_flops / time) / nworkers; + } + + return speed; +} + +void _consider_max_for_children(unsigned sched_ctx, unsigned consider_max) +{ + struct sc_hypervisor_wrapper *sc_w = sc_hypervisor_get_wrapper(sched_ctx); + sc_w->consider_max = consider_max; +#ifdef STARPU_SC_HYPERVISOR_DEBUG + printf("ctx %u consider max %d \n", sched_ctx, sc_w->consider_max); +#endif + + int level = starpu_sched_ctx_get_hierarchy_level(sched_ctx); + unsigned *sched_ctxs_child; + int nsched_ctxs_child = 0; + sc_hypervisor_get_ctxs_on_level(&sched_ctxs_child, &nsched_ctxs_child, level+1, sched_ctx); + int s; + for(s = 0; s < nsched_ctxs_child; s++) + _consider_max_for_children(sched_ctxs_child[s], consider_max); + free(sched_ctxs_child); +} + +void sc_hypervisor_check_if_consider_max(struct types_of_workers *tw) +{ + unsigned *sched_ctxs; + int nsched_ctxs; + sc_hypervisor_get_ctxs_on_level(&sched_ctxs, &nsched_ctxs, 0, STARPU_NMAX_SCHED_CTXS); + + int nw = tw->nw; + double avg_speed_per_tw[nw]; + int w; + for(w = 0; w < nw; w++) + { + avg_speed_per_tw[w] = sc_hypervisor_get_avg_speed(sc_hypervisor_get_arch_for_index(w, tw)); + if(avg_speed_per_tw[w] == -1.0) + { + free(sched_ctxs); + return; + } + } + + int s; + for(s = 0; s < nsched_ctxs; s++) + { + for(w = 0; w < nw; w++) + { + struct sc_hypervisor_wrapper *sc_w = sc_hypervisor_get_wrapper(sched_ctxs[s]); + double speed = sc_hypervisor_get_speed(sc_w, sc_hypervisor_get_arch_for_index(w, tw)); +#ifdef STARPU_SC_HYPERVISOR_DEBUG + printf("%u: speed %lf avg_speed %lf min %lf max %lf\n", sched_ctxs[s], speed, avg_speed_per_tw[w], (avg_speed_per_tw[w]*0.5), (avg_speed_per_tw[w]*1.5)); +#endif + if(speed < avg_speed_per_tw[w]*0.5 || speed > avg_speed_per_tw[w]*1.5) + _consider_max_for_children(sched_ctxs[s], 1); + else + _consider_max_for_children(sched_ctxs[s], 0); + } + } + free(sched_ctxs); +} diff --git a/sc_hypervisor/src/policies_utils/task_pool.c b/sc_hypervisor/src/policies_utils/task_pool.c new file mode 100644 index 0000000..244725c --- /dev/null +++ b/sc_hypervisor/src/policies_utils/task_pool.c @@ -0,0 +1,99 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + + +#include "sc_hypervisor_policy.h" + +void sc_hypervisor_policy_add_task_to_pool(struct starpu_codelet *cl, unsigned sched_ctx, uint32_t footprint, struct sc_hypervisor_policy_task_pool **task_pools, size_t data_size) +{ + struct sc_hypervisor_policy_task_pool *tp = NULL; + + for (tp = *task_pools; tp; tp = tp->next) + { + if (tp && tp->cl == cl && tp->footprint == footprint && tp->sched_ctx_id == sched_ctx) + break; + } + + if (!tp) + { + tp = (struct sc_hypervisor_policy_task_pool *) malloc(sizeof(struct sc_hypervisor_policy_task_pool)); + tp->cl = cl; + tp->footprint = footprint; + tp->sched_ctx_id = sched_ctx; + tp->n = 0; + tp->next = *task_pools; + tp->data_size = data_size; + *task_pools = tp; + } + + /* One more task of this kind */ + tp->n++; +} + +void sc_hypervisor_policy_remove_task_from_pool(struct starpu_task *task, uint32_t footprint, struct sc_hypervisor_policy_task_pool **task_pools) +{ + /* count the tasks of the same type */ + struct sc_hypervisor_policy_task_pool *tp = NULL; + + for (tp = *task_pools; tp; tp = tp->next) + { + if (tp && tp->cl == task->cl && tp->footprint == footprint && tp->sched_ctx_id == task->sched_ctx) + break; + } + + if (tp) + { + if(tp->n > 1) + tp->n--; + else + { + if(tp == *task_pools) + { + struct sc_hypervisor_policy_task_pool *next_tp = NULL; + if((*task_pools)->next) + next_tp = (*task_pools)->next; + + free(tp); + tp = NULL; + + *task_pools = next_tp; + + } + else + { + struct sc_hypervisor_policy_task_pool *prev_tp = NULL; + for (prev_tp = *task_pools; prev_tp; prev_tp = prev_tp->next) + { + if (prev_tp->next == tp) + prev_tp->next = tp->next; + } + + free(tp); + tp = NULL; + } + } + } +} + +struct sc_hypervisor_policy_task_pool* sc_hypervisor_policy_clone_task_pool(struct sc_hypervisor_policy_task_pool *tp) +{ + if(tp == NULL) return NULL; + + struct sc_hypervisor_policy_task_pool *tmp_tp = (struct sc_hypervisor_policy_task_pool*)malloc(sizeof(struct sc_hypervisor_policy_task_pool)); + memcpy(tmp_tp, tp, sizeof(struct sc_hypervisor_policy_task_pool)); + tmp_tp->next = sc_hypervisor_policy_clone_task_pool(tp->next); + return tmp_tp; +} diff --git a/sc_hypervisor/src/sc_config.c b/sc_hypervisor/src/sc_config.c new file mode 100644 index 0000000..5ee8571 --- /dev/null +++ b/sc_hypervisor/src/sc_config.c @@ -0,0 +1,257 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +static struct sc_hypervisor_policy_config* _create_config(void) +{ + struct sc_hypervisor_policy_config *config = (struct sc_hypervisor_policy_config *)malloc(sizeof(struct sc_hypervisor_policy_config)); + config->min_nworkers = -1; + config->max_nworkers = -1; + config->new_workers_max_idle = -1.0; + config->ispeed_ctx_sample = 0.0; + config->time_sample = 0.5; + + int i; + for(i = 0; i < STARPU_NMAXWORKERS; i++) + { + config->granularity = -1; + config->priority[i] = -1; + config->fixed_workers[i] = -1; + config->max_idle[i] = -1.0; + config->min_working[i] = -1.0; + config->ispeed_w_sample[i] = 0.0; + } + + return config; +} + +static void _update_config(struct sc_hypervisor_policy_config *old, struct sc_hypervisor_policy_config* new) +{ + old->min_nworkers = new->min_nworkers != -1 ? new->min_nworkers : old->min_nworkers ; + old->max_nworkers = new->max_nworkers != -1 ? new->max_nworkers : old->max_nworkers ; + old->new_workers_max_idle = new->new_workers_max_idle != -1.0 ? new->new_workers_max_idle : old->new_workers_max_idle; + old->granularity = new->granularity != -1 ? new->granularity : old->granularity; + + int i; + for(i = 0; i < STARPU_NMAXWORKERS; i++) + { + old->priority[i] = new->priority[i] != -1 ? new->priority[i] : old->priority[i]; + old->fixed_workers[i] = new->fixed_workers[i] != -1 ? new->fixed_workers[i] : old->fixed_workers[i]; + old->max_idle[i] = new->max_idle[i] != -1.0 ? new->max_idle[i] : old->max_idle[i]; + old->min_working[i] = new->min_working[i] != -1.0 ? new->min_working[i] : old->min_working[i]; + } +} + +void sc_hypervisor_set_config(unsigned sched_ctx, void *config) +{ + if(hypervisor.sched_ctx_w[sched_ctx].config != NULL && config != NULL) + { + _update_config(hypervisor.sched_ctx_w[sched_ctx].config, config); + } + else + { + hypervisor.sched_ctx_w[sched_ctx].config = config; + } + + return; +} + +void _add_config(unsigned sched_ctx) +{ + struct sc_hypervisor_policy_config *config = _create_config(); + config->min_nworkers = 0; + config->max_nworkers = starpu_worker_get_count(); + config->new_workers_max_idle = MAX_IDLE_TIME; + + int i; + for(i = 0; i < STARPU_NMAXWORKERS; i++) + { + config->granularity = 1; + config->priority[i] = 0; + config->fixed_workers[i] = 0; + config->max_idle[i] = MAX_IDLE_TIME; + config->min_working[i] = MIN_WORKING_TIME; + } + + sc_hypervisor_set_config(sched_ctx, config); +} + +void _remove_config(unsigned sched_ctx) +{ + sc_hypervisor_set_config(sched_ctx, NULL); +} + +struct sc_hypervisor_policy_config* sc_hypervisor_get_config(unsigned sched_ctx) +{ + return hypervisor.sched_ctx_w[sched_ctx].config; +} + +static struct sc_hypervisor_policy_config* _ctl(unsigned sched_ctx, va_list varg_list, unsigned later) +{ + struct sc_hypervisor_policy_config *config = NULL; + + if(later) + config = _create_config(); + else + config = sc_hypervisor_get_config(sched_ctx); + + assert(config != NULL); + + int arg_type; + int i; + int *workerids; + int nworkers; + + while ((arg_type = va_arg(varg_list, int)) != SC_HYPERVISOR_NULL) + { + switch(arg_type) + { + case SC_HYPERVISOR_MAX_IDLE: + workerids = va_arg(varg_list, int*); + nworkers = va_arg(varg_list, int); + double max_idle = va_arg(varg_list, double); + for(i = 0; i < nworkers; i++) + config->max_idle[workerids[i]] = max_idle; + + break; + + case SC_HYPERVISOR_MIN_WORKING: + workerids = va_arg(varg_list, int*); + nworkers = va_arg(varg_list, int); + double min_working = va_arg(varg_list, double); + + for(i = 0; i < nworkers; i++) + config->min_working[workerids[i]] = min_working; + + break; + + case SC_HYPERVISOR_PRIORITY: + workerids = va_arg(varg_list, int*); + nworkers = va_arg(varg_list, int); + int priority = va_arg(varg_list, int); + + for(i = 0; i < nworkers; i++) + config->priority[workerids[i]] = priority; + break; + + case SC_HYPERVISOR_MIN_WORKERS: + config->min_nworkers = va_arg(varg_list, unsigned); + break; + + case SC_HYPERVISOR_MAX_WORKERS: + config->max_nworkers = va_arg(varg_list, unsigned); + break; + + case SC_HYPERVISOR_GRANULARITY: + config->granularity = va_arg(varg_list, unsigned); + break; + + case SC_HYPERVISOR_FIXED_WORKERS: + workerids = va_arg(varg_list, int*); + nworkers = va_arg(varg_list, int); + + for(i = 0; i < nworkers; i++) + config->fixed_workers[workerids[i]] = 1; + break; + + case SC_HYPERVISOR_NEW_WORKERS_MAX_IDLE: + config->new_workers_max_idle = va_arg(varg_list, double); + break; + + case SC_HYPERVISOR_ISPEED_W_SAMPLE: + workerids = va_arg(varg_list, int*); + nworkers = va_arg(varg_list, int); + double sample = va_arg(varg_list, double); + + for(i = 0; i < nworkers; i++) + config->ispeed_w_sample[workerids[i]] = sample; + break; + + case SC_HYPERVISOR_ISPEED_CTX_SAMPLE: + config->ispeed_ctx_sample = va_arg(varg_list, double); + break; + + case SC_HYPERVISOR_TIME_SAMPLE: + config->time_sample = va_arg(varg_list, double); + break; + + +/* not important for the strateg, needed just to jump these args in the iteration of the args */ + case SC_HYPERVISOR_TIME_TO_APPLY: + va_arg(varg_list, int); + break; + + case SC_HYPERVISOR_MIN_TASKS: + va_arg(varg_list, int); + break; + + } + } + + return later ? config : NULL; +} + + +void sc_hypervisor_ctl(unsigned sched_ctx, ...) +{ + va_list varg_list; + va_start(varg_list, sched_ctx); + + int arg_type; + int stop = 0; + int task_tag = -1; + + while ((arg_type = va_arg(varg_list, int)) != SC_HYPERVISOR_NULL) + { + switch(arg_type) + { + case SC_HYPERVISOR_TIME_TO_APPLY: + task_tag = va_arg(varg_list, int); + stop = 1; + break; + + case SC_HYPERVISOR_MIN_TASKS: + hypervisor.min_tasks = va_arg(varg_list, int); + hypervisor.check_min_tasks[sched_ctx] = 1; + break; + + } + if(stop) break; + } + + va_end(varg_list); + va_start(varg_list, sched_ctx); + + /* if config not null => save hypervisor configuration and consider it later */ + struct sc_hypervisor_policy_config *config = _ctl(sched_ctx, varg_list, (task_tag > 0)); + if(config != NULL) + { + struct configuration_entry *entry; + + entry = malloc(sizeof *entry); + STARPU_ASSERT(entry != NULL); + + entry->task_tag = task_tag; + entry->configuration = config; + + STARPU_PTHREAD_MUTEX_LOCK(&hypervisor.conf_mut[sched_ctx]); + HASH_ADD_INT(hypervisor.configurations[sched_ctx], task_tag, entry); + STARPU_PTHREAD_MUTEX_UNLOCK(&hypervisor.conf_mut[sched_ctx]); + } + + va_end(varg_list); +} diff --git a/sc_hypervisor/src/sc_hypervisor.c b/sc_hypervisor/src/sc_hypervisor.c new file mode 100644 index 0000000..2321335 --- /dev/null +++ b/sc_hypervisor/src/sc_hypervisor.c @@ -0,0 +1,1712 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include + +struct sc_hypervisor hypervisor; +starpu_pthread_mutex_t act_hypervisor_mutex; +double hyp_overhead = 0.0; +unsigned imposed_resize = 0; +unsigned type_of_tasks_known = 0; +struct starpu_sched_ctx_performance_counters* perf_counters = NULL; + +static void notify_idle_cycle(unsigned sched_ctx, int worker, double idle_time); +static void notify_pushed_task(unsigned sched_ctx, int worker); +static void notify_post_exec_task(struct starpu_task *task, size_t data_size, uint32_t footprint, + int hypervisor_tag, double flops); +static void notify_poped_task(unsigned sched_ctx, int worker); +static void notify_submitted_job(struct starpu_task *task, unsigned footprint, size_t data_size); +static void notify_empty_ctx(unsigned sched_ctx, struct starpu_task *task); +static void notify_delete_context(unsigned sched_ctx); + +extern struct sc_hypervisor_policy idle_policy; +extern struct sc_hypervisor_policy app_driven_policy; +extern struct sc_hypervisor_policy gflops_rate_policy; +#ifdef STARPU_HAVE_GLPK_H +extern struct sc_hypervisor_policy feft_lp_policy; +extern struct sc_hypervisor_policy teft_lp_policy; +extern struct sc_hypervisor_policy ispeed_lp_policy; +extern struct sc_hypervisor_policy throughput_lp_policy; +#endif // STARPU_HAVE_GLPK_ +extern struct sc_hypervisor_policy ispeed_policy; +extern struct sc_hypervisor_policy hard_coded_policy; +extern struct sc_hypervisor_policy perf_count_policy; + + +static struct sc_hypervisor_policy *predefined_policies[] = +{ + &idle_policy, + &app_driven_policy, +#ifdef STARPU_HAVE_GLPK_H + &feft_lp_policy, + &teft_lp_policy, + &ispeed_lp_policy, + &throughput_lp_policy, +#endif // STARPU_HAVE_GLPK_H + &gflops_rate_policy, + &ispeed_policy, + &hard_coded_policy, + &perf_count_policy +}; + +static void _load_hypervisor_policy(struct sc_hypervisor_policy *policy) +{ + STARPU_ASSERT(policy); + + hypervisor.policy.name = policy->name; + hypervisor.policy.size_ctxs = policy->size_ctxs; + hypervisor.policy.resize_ctxs = policy->resize_ctxs; + hypervisor.policy.handle_poped_task = policy->handle_poped_task; + hypervisor.policy.handle_pushed_task = policy->handle_pushed_task; + hypervisor.policy.handle_idle_cycle = policy->handle_idle_cycle; + hypervisor.policy.handle_idle_end = policy->handle_idle_end; + hypervisor.policy.handle_post_exec_hook = policy->handle_post_exec_hook; + hypervisor.policy.handle_submitted_job = policy->handle_submitted_job; + hypervisor.policy.end_ctx = policy->end_ctx; + hypervisor.policy.start_ctx = policy->start_ctx; + hypervisor.policy.init_worker = policy->init_worker; +} + + +static struct sc_hypervisor_policy *_find_hypervisor_policy_from_name(const char *policy_name) +{ + + if (!policy_name) + return NULL; + + unsigned i; + for (i = 0; i < sizeof(predefined_policies)/sizeof(predefined_policies[0]); i++) + { + struct sc_hypervisor_policy *p; + p = predefined_policies[i]; + if (p->name) + { + if (strcmp(policy_name, p->name) == 0) { + /* we found a policy with the requested name */ + return p; + } + } + } + fprintf(stderr, "Warning: hypervisor policy \"%s\" was not found, try \"help\" to get a list\n", policy_name); + + /* nothing was found */ + return NULL; +} + +static void display_sched_help_message(void) +{ + const char* policy_name = getenv("SC_HYPERVISOR_POLICY"); + if (policy_name && (strcmp(policy_name, "help") == 0)) + { + fprintf(stderr, "SC_HYPERVISOR_POLICY can be either of\n"); + /* display the description of all predefined policies */ + unsigned i; + for (i = 0; i < sizeof(predefined_policies)/sizeof(predefined_policies[0]); i++) + { + struct sc_hypervisor_policy *p = predefined_policies[i]; + if (p->name) + { + fprintf(stderr, "%s\n", p->name); + } + } + } +} + +static struct sc_hypervisor_policy *_select_hypervisor_policy(struct sc_hypervisor_policy* hypervisor_policy) +{ + struct sc_hypervisor_policy *selected_policy = NULL; + + if(hypervisor_policy && hypervisor_policy->custom) + return hypervisor_policy; + + /* we look if the application specified the name of a policy to load */ + const char *policy_name; + if (hypervisor_policy && hypervisor_policy->name) + { + policy_name = hypervisor_policy->name; + } + else + { + policy_name = getenv("SC_HYPERVISOR_POLICY"); + } + + if (policy_name) + selected_policy = _find_hypervisor_policy_from_name(policy_name); + + /* Perhaps there was no policy that matched the name */ + if (selected_policy) + return selected_policy; + + /* If no policy was specified, we use the idle policy as a default */ + + return &idle_policy; +} + + +/* initializez the performance counters that starpu will use to retrieve hints for resizing */ +void* sc_hypervisor_init(struct sc_hypervisor_policy *hypervisor_policy) +{ +/* Perhaps we have to display some help */ + display_sched_help_message(); + + hypervisor.min_tasks = 0; + hypervisor.nsched_ctxs = 0; + char* vel_gap = getenv("SC_HYPERVISOR_MAX_SPEED_GAP"); + hypervisor.max_speed_gap = vel_gap ? atof(vel_gap) : SC_SPEED_MAX_GAP_DEFAULT; + char* crit = getenv("SC_HYPERVISOR_TRIGGER_RESIZE"); + hypervisor.resize_criteria = !crit ? SC_IDLE : strcmp(crit,"idle") == 0 ? SC_IDLE : (strcmp(crit,"speed") == 0 ? SC_SPEED : SC_NOTHING); + + STARPU_PTHREAD_MUTEX_INIT(&act_hypervisor_mutex, NULL); +// hypervisor.start_executing_time = starpu_timing_now(); + + int i; + for(i = 0; i < STARPU_NMAX_SCHED_CTXS; i++) + { + hypervisor.resize[i] = 0; + hypervisor.allow_remove[i] = 1; + hypervisor.configurations[i] = NULL; + hypervisor.sr = NULL; + hypervisor.check_min_tasks[i] = 1; + hypervisor.sched_ctxs[i] = STARPU_NMAX_SCHED_CTXS; + hypervisor.sched_ctx_w[i].sched_ctx = STARPU_NMAX_SCHED_CTXS; + hypervisor.sched_ctx_w[i].config = NULL; + hypervisor.sched_ctx_w[i].total_flops = 0.0; + hypervisor.sched_ctx_w[i].submitted_flops = 0.0; + hypervisor.sched_ctx_w[i].remaining_flops = 0.0; + hypervisor.sched_ctx_w[i].start_time = 0.0; + hypervisor.sched_ctx_w[i].real_start_time = 0.0; + hypervisor.sched_ctx_w[i].hyp_react_start_time = 0.0; + hypervisor.sched_ctx_w[i].resize_ack.receiver_sched_ctx = -1; + hypervisor.sched_ctx_w[i].resize_ack.moved_workers = NULL; + hypervisor.sched_ctx_w[i].resize_ack.nmoved_workers = 0; + hypervisor.sched_ctx_w[i].resize_ack.acked_workers = NULL; + STARPU_PTHREAD_MUTEX_INIT(&hypervisor.sched_ctx_w[i].mutex, NULL); + hypervisor.optimal_v[i] = 0.0; + + hypervisor.sched_ctx_w[i].ref_speed[0] = -1.0; + hypervisor.sched_ctx_w[i].ref_speed[1] = -1.0; + hypervisor.sched_ctx_w[i].total_flops_available = 0; + hypervisor.sched_ctx_w[i].to_be_sized = 0; + hypervisor.sched_ctx_w[i].consider_max = 0; + int j; + for(j = 0; j < STARPU_NMAXWORKERS; j++) + { + hypervisor.sched_ctx_w[i].start_time_w[i] = 0.0; + hypervisor.sched_ctx_w[i].current_idle_time[j] = 0.0; + hypervisor.sched_ctx_w[i].idle_time[j] = 0.0; + hypervisor.sched_ctx_w[i].idle_start_time[j] = 0.0; + hypervisor.sched_ctx_w[i].exec_time[j] = 0.0; + hypervisor.sched_ctx_w[i].exec_start_time[j] = 0.0; + hypervisor.sched_ctx_w[i].pushed_tasks[j] = 0; + hypervisor.sched_ctx_w[i].poped_tasks[j] = 0; + hypervisor.sched_ctx_w[i].elapsed_flops[j] = 0.0; + hypervisor.sched_ctx_w[i].elapsed_data[j] = 0; + hypervisor.sched_ctx_w[i].elapsed_tasks[j] = 0; + hypervisor.sched_ctx_w[i].total_elapsed_flops[j] = 0.0; + hypervisor.sched_ctx_w[i].worker_to_be_removed[j] = 0; + hypervisor.sched_ctx_w[i].compute_idle[j] = 1; + hypervisor.sched_ctx_w[i].compute_partial_idle[j] = 0; + } + } + + struct sc_hypervisor_policy *selected_hypervisor_policy = _select_hypervisor_policy(hypervisor_policy); + _load_hypervisor_policy(selected_hypervisor_policy); + + perf_counters = (struct starpu_sched_ctx_performance_counters*)malloc(sizeof(struct starpu_sched_ctx_performance_counters)); + perf_counters->notify_idle_cycle = notify_idle_cycle; + perf_counters->notify_pushed_task = notify_pushed_task; + perf_counters->notify_poped_task = notify_poped_task; + perf_counters->notify_post_exec_task = notify_post_exec_task; + perf_counters->notify_submitted_job = notify_submitted_job; + perf_counters->notify_empty_ctx = notify_empty_ctx; + perf_counters->notify_delete_context = notify_delete_context; + + starpu_sched_ctx_notify_hypervisor_exists(); + + return (void*)perf_counters; +} + +const char* sc_hypervisor_get_policy() +{ + return hypervisor.policy.name; +} + +/* the user can forbid the resizing process*/ +void sc_hypervisor_stop_resize(unsigned sched_ctx) +{ + imposed_resize = 1; + hypervisor.resize[sched_ctx] = 0; +} + +/* the user can restart the resizing process*/ +void sc_hypervisor_start_resize(unsigned sched_ctx) +{ + imposed_resize = 1; + hypervisor.resize[sched_ctx] = 1; +} + +static void _print_current_time() +{ + char* stop_print = getenv("SC_HYPERVISOR_STOP_PRINT"); + int sp = stop_print ? atoi(stop_print) : 1; + + if(!sp) + { + if(hypervisor.start_executing_time == 0.0) + { + fprintf(stdout, "Time: %lf\n", -1.0); + return; + } + + double curr_time = starpu_timing_now(); + double elapsed_time = (curr_time - hypervisor.start_executing_time) / 1000000.0; /* in seconds */ + fprintf(stdout, "Time: %lf\n", elapsed_time); + int i; + for(i = 0; i < STARPU_NMAX_SCHED_CTXS; i++) + { + if(hypervisor.sched_ctxs[i] != STARPU_NMAX_SCHED_CTXS) + { + struct sc_hypervisor_wrapper *sc_w = &hypervisor.sched_ctx_w[hypervisor.sched_ctxs[i]]; + + double cpu_speed = sc_hypervisor_get_speed(sc_w, STARPU_CPU_WORKER); + double cuda_speed = sc_hypervisor_get_speed(sc_w, STARPU_CUDA_WORKER); + int ncpus = sc_hypervisor_get_nworkers_ctx(sc_w->sched_ctx, STARPU_CPU_WORKER); + int ncuda = sc_hypervisor_get_nworkers_ctx(sc_w->sched_ctx, STARPU_CUDA_WORKER); + fprintf(stdout, "%d: cpu_v = %lf cuda_v = %lf ncpus = %d ncuda = %d\n", hypervisor.sched_ctxs[i], cpu_speed, cuda_speed, ncpus, ncuda); + } + } + } + return; +} + +void sc_hypervisor_shutdown(void) +{ + int i; + for(i = 0; i < STARPU_NMAX_SCHED_CTXS; i++) + { + if(hypervisor.sched_ctxs[i] != STARPU_NMAX_SCHED_CTXS && hypervisor.nsched_ctxs > 0) + { + sc_hypervisor_stop_resize(hypervisor.sched_ctxs[i]); + sc_hypervisor_unregister_ctx(hypervisor.sched_ctxs[i]); + STARPU_PTHREAD_MUTEX_DESTROY(&hypervisor.sched_ctx_w[i].mutex); + } + } + perf_counters->notify_idle_cycle = NULL; + perf_counters->notify_pushed_task = NULL; + perf_counters->notify_poped_task = NULL; + perf_counters->notify_post_exec_task = NULL; + perf_counters->notify_delete_context = NULL; + + free(perf_counters); + perf_counters = NULL; + + STARPU_PTHREAD_MUTEX_DESTROY(&act_hypervisor_mutex); + +} + +void sc_hypervisor_print_overhead() +{ +// hyp_overhead /= 1000000.0;* + FILE *f; + const char *sched_env = getenv("OVERHEAD_FILE"); + if(!sched_env) + f = fopen("overhead_microsec", "a"); + else + f = fopen(sched_env, "a"); + fprintf(f, "%lf \n", hyp_overhead); + fclose(f); + + +} + +/* the hypervisor is in charge only of the contexts registered to it*/ +void sc_hypervisor_register_ctx(unsigned sched_ctx, double total_flops) +{ + if(hypervisor.policy.start_ctx) + hypervisor.policy.start_ctx(sched_ctx); + + STARPU_PTHREAD_MUTEX_LOCK(&act_hypervisor_mutex); + hypervisor.configurations[sched_ctx] = NULL; + hypervisor.resize_requests[sched_ctx] = NULL; + STARPU_PTHREAD_MUTEX_INIT(&hypervisor.conf_mut[sched_ctx], NULL); + STARPU_PTHREAD_MUTEX_INIT(&hypervisor.resize_mut[sched_ctx], NULL); + + _add_config(sched_ctx); + hypervisor.sched_ctx_w[sched_ctx].sched_ctx = sched_ctx; + hypervisor.sched_ctxs[hypervisor.nsched_ctxs++] = sched_ctx; + + hypervisor.sched_ctx_w[sched_ctx].total_flops = total_flops; + hypervisor.sched_ctx_w[sched_ctx].remaining_flops = total_flops; + hypervisor.resize[sched_ctx] = 0;//1; + STARPU_PTHREAD_MUTEX_UNLOCK(&act_hypervisor_mutex); +} + +static int _get_first_free_sched_ctx(unsigned *sched_ctxs, int nsched_ctxs) +{ + int i; + for(i = 0; i < nsched_ctxs; i++) + if(sched_ctxs[i] == STARPU_NMAX_SCHED_CTXS) + return i; + + return STARPU_NMAX_SCHED_CTXS; +} + +/* rearange array of sched_ctxs in order not to have {MAXVAL, MAXVAL, 5, MAXVAL, 7} + * and have instead {5, 7, MAXVAL, MAXVAL, MAXVAL} + * it is easier afterwards to iterate the array +*/ +static void _rearange_sched_ctxs(unsigned *sched_ctxs, int old_nsched_ctxs) +{ + int first_free_id = STARPU_NMAX_SCHED_CTXS; + int i; + for(i = 0; i < old_nsched_ctxs; i++) + { + if(sched_ctxs[i] != STARPU_NMAX_SCHED_CTXS) + { + first_free_id = _get_first_free_sched_ctx(sched_ctxs, old_nsched_ctxs); + if(first_free_id != STARPU_NMAX_SCHED_CTXS) + { + sched_ctxs[first_free_id] = sched_ctxs[i]; + sched_ctxs[i] = STARPU_NMAX_SCHED_CTXS; + } + } + } +} + +/* unregistered contexts will no longer be resized */ +void sc_hypervisor_unregister_ctx(unsigned sched_ctx) +{ +#ifdef STARPU_SC_HYPERVISOR_DEBUG + printf("unregister ctx %d with remaining flops %lf \n", hypervisor.sched_ctx_w[sched_ctx].sched_ctx, hypervisor.sched_ctx_w[sched_ctx].remaining_flops); +#endif + if(hypervisor.policy.end_ctx) + hypervisor.policy.end_ctx(sched_ctx); + + STARPU_PTHREAD_MUTEX_LOCK(&act_hypervisor_mutex); + unsigned father = starpu_sched_ctx_get_inheritor(sched_ctx); + int *pus; + unsigned npus = starpu_sched_ctx_get_workers_list(sched_ctx, &pus); + + if(npus) + { + starpu_sched_ctx_set_priority(pus, npus, father, 1); + free(pus); + } + + unsigned i; + for(i = 0; i < hypervisor.nsched_ctxs; i++) + { + if(hypervisor.sched_ctxs[i] == sched_ctx) + { + hypervisor.sched_ctxs[i] = STARPU_NMAX_SCHED_CTXS; + break; + } + } + + _rearange_sched_ctxs(hypervisor.sched_ctxs, hypervisor.nsched_ctxs); + hypervisor.nsched_ctxs--; + hypervisor.sched_ctx_w[sched_ctx].sched_ctx = STARPU_NMAX_SCHED_CTXS; + _remove_config(sched_ctx); + + STARPU_PTHREAD_MUTEX_DESTROY(&hypervisor.conf_mut[sched_ctx]); + STARPU_PTHREAD_MUTEX_DESTROY(&hypervisor.resize_mut[sched_ctx]); + if(hypervisor.nsched_ctxs == 1) + sc_hypervisor_stop_resize(hypervisor.sched_ctxs[0]); + + STARPU_PTHREAD_MUTEX_UNLOCK(&act_hypervisor_mutex); +} + +void sc_hypervisor_reset_react_start_time(unsigned sched_ctx, unsigned now) +{ + if(now) + hypervisor.sched_ctx_w[sched_ctx].hyp_react_start_time = starpu_timing_now(); + starpu_sched_ctx_update_start_resizing_sample(sched_ctx, starpu_timing_now()); +} + + +double _get_max_speed_gap() +{ + return hypervisor.max_speed_gap; +} + +unsigned sc_hypervisor_get_resize_criteria() +{ + return hypervisor.resize_criteria; +} + +static int get_ntasks(int *tasks) +{ + int ntasks = 0; + int j; + for(j = 0; j < STARPU_NMAXWORKERS; j++) + { + ntasks += tasks[j]; + } + return ntasks; +} + +int sc_hypervisor_get_nworkers_ctx(unsigned sched_ctx, enum starpu_worker_archtype arch) +{ + int nworkers_ctx = 0; + struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx); + int worker; + + struct starpu_sched_ctx_iterator it; + workers->init_iterator(workers, &it); + + while(workers->has_next(workers, &it)) + { + worker = workers->get_next(workers, &it); + enum starpu_worker_archtype curr_arch = starpu_worker_get_type(worker); + if(curr_arch == arch || arch == STARPU_ANY_WORKER) + nworkers_ctx++; + } + return nworkers_ctx; +} + +static void _set_elapsed_flops_per_sched_ctx(unsigned sched_ctx, double val) +{ + int i; + for(i = 0; i < STARPU_NMAXWORKERS; i++) + { + hypervisor.sched_ctx_w[sched_ctx].elapsed_flops[i] = val; + if(val == 0) + { + hypervisor.sched_ctx_w[sched_ctx].elapsed_data[i] = 0; + hypervisor.sched_ctx_w[sched_ctx].elapsed_tasks[i] = 0; + } + } +} + +double sc_hypervisor_get_elapsed_flops_per_sched_ctx(struct sc_hypervisor_wrapper* sc_w) +{ + double ret_val = 0.0; + + struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sc_w->sched_ctx); + int worker; + + struct starpu_sched_ctx_iterator it; + workers->init_iterator(workers, &it); + + while(workers->has_next(workers, &it)) + { + worker = workers->get_next(workers, &it); + ret_val += sc_w->elapsed_flops[worker]; + } + + return ret_val; +} + +double sc_hypervisor_get_total_elapsed_flops_per_sched_ctx(struct sc_hypervisor_wrapper* sc_w) +{ + double ret_val = 0.0; + struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sc_w->sched_ctx); + int worker; + + struct starpu_sched_ctx_iterator it; + workers->init_iterator(workers, &it); + + while(workers->has_next(workers, &it)) + { + worker = workers->get_next(workers, &it); + ret_val += sc_w->total_elapsed_flops[worker]; + } + + return ret_val; +} + +double sc_hypervisor_get_nready_flops_of_all_sons_of_sched_ctx(unsigned sched_ctx) +{ + double ready_flops = starpu_sched_ctx_get_nready_flops(sched_ctx); + unsigned *sched_ctxs; + int nsched_ctxs = 0; + sc_hypervisor_get_ctxs_on_level(&sched_ctxs, &nsched_ctxs, starpu_sched_ctx_get_hierarchy_level(sched_ctx), sched_ctx); + int s; + for(s = 0; s < nsched_ctxs; s++) + ready_flops += sc_hypervisor_get_nready_flops_of_all_sons_of_sched_ctx(sched_ctxs[s]); + //ready_flops += starpu_get_nready_flops_of_sched_ctx(sched_ctxs[s]); + + free(sched_ctxs); + return ready_flops; +} +static void _decrement_elapsed_flops_per_worker(unsigned sched_ctx, int worker, double flops) +{ + if(starpu_sched_ctx_get_hierarchy_level(sched_ctx) > 0) + { + unsigned father = starpu_sched_ctx_get_inheritor(sched_ctx); + hypervisor.sched_ctx_w[father].elapsed_flops[worker] -= flops; + _decrement_elapsed_flops_per_worker(father, worker, flops); + } + + return; +} +void _reset_resize_sample_info(unsigned sender_sched_ctx, unsigned receiver_sched_ctx) +{ + double start_time = starpu_timing_now(); + if(sender_sched_ctx != STARPU_NMAX_SCHED_CTXS) + { + /* info concerning only the gflops_rate strateg */ + struct sc_hypervisor_wrapper *sender_sc_w = &hypervisor.sched_ctx_w[sender_sched_ctx]; + + sender_sc_w->start_time = start_time; + unsigned nworkers = starpu_worker_get_count(); + unsigned i; + for(i = 0; i < nworkers; i++) + { + sender_sc_w->start_time_w[i] = start_time; + sender_sc_w->idle_time[i] = 0.0; + sender_sc_w->idle_start_time[i] = 0.0; + hypervisor.sched_ctx_w[sender_sched_ctx].exec_time[i] = 0.0; +// hypervisor.sched_ctx_w[sender_sched_ctx].exec_start_time[i] = (hypervisor.sched_ctx_w[sender_sched_ctx].exec_start_time[i] != 0.0) ? starpu_timing_now() : 0.0; + _decrement_elapsed_flops_per_worker(sender_sched_ctx, i, hypervisor.sched_ctx_w[sender_sched_ctx].elapsed_flops[i]); + + } + _set_elapsed_flops_per_sched_ctx(sender_sched_ctx, 0.0); + } + + if(receiver_sched_ctx != STARPU_NMAX_SCHED_CTXS) + { + struct sc_hypervisor_wrapper *receiver_sc_w = &hypervisor.sched_ctx_w[receiver_sched_ctx]; + + receiver_sc_w->start_time = start_time; + + unsigned nworkers = starpu_worker_get_count(); + unsigned i; + for(i = 0; i < nworkers; i++) + { + receiver_sc_w->start_time_w[i] = (receiver_sc_w->start_time_w[i] != 0.0) ? starpu_timing_now() : 0.0; + receiver_sc_w->idle_time[i] = 0.0; + receiver_sc_w->idle_start_time[i] = (receiver_sc_w->exec_start_time[i] != 0.0) ? 0.0 : starpu_timing_now(); +// hypervisor.sched_ctx_w[receiver_sched_ctx].exec_start_time[i] = (receiver_sc_w->exec_start_time[i] != 0.0) ? starpu_timing_now() : 0.0; + hypervisor.sched_ctx_w[receiver_sched_ctx].exec_time[i] = 0.0; + _decrement_elapsed_flops_per_worker(receiver_sched_ctx, i, hypervisor.sched_ctx_w[receiver_sched_ctx].elapsed_flops[i]); + } + _set_elapsed_flops_per_sched_ctx(receiver_sched_ctx, 0.0); + } + return; +} + +/* actually move the workers: the cpus are moved, gpus are only shared */ +/* forbids another resize request before this one is take into account */ +void sc_hypervisor_move_workers(unsigned sender_sched_ctx, unsigned receiver_sched_ctx, int* workers_to_move, unsigned nworkers_to_move, unsigned now) +{ + if(nworkers_to_move > 0 && hypervisor.resize[sender_sched_ctx]) + { + _print_current_time(); +#ifdef STARPU_SC_HYPERVISOR_DEBUG + printf("resize ctx %u with %u workers", sender_sched_ctx, nworkers_to_move); + unsigned j; + for(j = 0; j < nworkers_to_move; j++) + printf(" %d", workers_to_move[j]); + printf("\n"); +#endif + + hypervisor.allow_remove[receiver_sched_ctx] = 0; + starpu_sched_ctx_add_workers(workers_to_move, nworkers_to_move, receiver_sched_ctx); + + if(now) + { +#ifdef STARPU_SC_HYPERVISOR_DEBUG + printf("remove now from ctx %u:", sender_sched_ctx); + for(j = 0; j < nworkers_to_move; j++) + printf(" %d", workers_to_move[j]); + printf("\n"); +#endif + starpu_sched_ctx_remove_workers(workers_to_move, nworkers_to_move, sender_sched_ctx); + hypervisor.allow_remove[receiver_sched_ctx] = 1; + _reset_resize_sample_info(sender_sched_ctx, receiver_sched_ctx); + } + else + { + int ret = starpu_pthread_mutex_trylock(&hypervisor.sched_ctx_w[sender_sched_ctx].mutex); + if(ret != EBUSY) + { + hypervisor.sched_ctx_w[sender_sched_ctx].resize_ack.receiver_sched_ctx = receiver_sched_ctx; + hypervisor.sched_ctx_w[sender_sched_ctx].resize_ack.moved_workers = (int*)malloc(nworkers_to_move * sizeof(int)); + hypervisor.sched_ctx_w[sender_sched_ctx].resize_ack.nmoved_workers = nworkers_to_move; + hypervisor.sched_ctx_w[sender_sched_ctx].resize_ack.acked_workers = (int*)malloc(nworkers_to_move * sizeof(int)); + + + unsigned i; + for(i = 0; i < nworkers_to_move; i++) + { + hypervisor.sched_ctx_w[sender_sched_ctx].current_idle_time[workers_to_move[i]] = 0.0; + hypervisor.sched_ctx_w[sender_sched_ctx].resize_ack.moved_workers[i] = workers_to_move[i]; + hypervisor.sched_ctx_w[sender_sched_ctx].resize_ack.acked_workers[i] = 0; + } + + hypervisor.resize[sender_sched_ctx] = 0; + if(imposed_resize) imposed_resize = 0; + STARPU_PTHREAD_MUTEX_UNLOCK(&hypervisor.sched_ctx_w[sender_sched_ctx].mutex); + } + } + struct sc_hypervisor_policy_config *new_config = sc_hypervisor_get_config(receiver_sched_ctx); + unsigned i; + for(i = 0; i < nworkers_to_move; i++) + new_config->max_idle[workers_to_move[i]] = new_config->max_idle[workers_to_move[i]] !=MAX_IDLE_TIME ? new_config->max_idle[workers_to_move[i]] : new_config->new_workers_max_idle; + + } + return; +} + +void sc_hypervisor_add_workers_to_sched_ctx(int* workers_to_add, unsigned nworkers_to_add, unsigned sched_ctx) +{ + if(nworkers_to_add > 0 && hypervisor.resize[sched_ctx]) + { + _print_current_time(); +#ifdef STARPU_SC_HYPERVISOR_DEBUG + unsigned j; + printf("add to ctx %u:", sched_ctx); + for(j = 0; j < nworkers_to_add; j++) + printf(" %d", workers_to_add[j]); + printf("\n"); +#endif + starpu_sched_ctx_add_workers(workers_to_add, nworkers_to_add, sched_ctx); + struct sc_hypervisor_policy_config *new_config = sc_hypervisor_get_config(sched_ctx); + unsigned i; + for(i = 0; i < nworkers_to_add; i++) + new_config->max_idle[workers_to_add[i]] = new_config->max_idle[workers_to_add[i]] != MAX_IDLE_TIME ? new_config->max_idle[workers_to_add[i]] : new_config->new_workers_max_idle; + _reset_resize_sample_info(STARPU_NMAX_SCHED_CTXS, sched_ctx); + + } + return; +} + +unsigned sc_hypervisor_can_resize(unsigned sched_ctx) +{ + return hypervisor.resize[sched_ctx]; +} + +void sc_hypervisor_remove_workers_from_sched_ctx(int* workers_to_remove, unsigned nworkers_to_remove, unsigned sched_ctx, unsigned now) +{ + if(nworkers_to_remove > 0 && hypervisor.resize[sched_ctx] && hypervisor.allow_remove[sched_ctx]) + { + _print_current_time(); + unsigned nworkers = 0; + int workers[nworkers_to_remove]; + + if(now) + { +#ifdef STARPU_SC_HYPERVISOR_DEBUG + unsigned j; + printf("remove explicitley now from ctx %u:", sched_ctx); + for(j = 0; j < nworkers_to_remove; j++) + printf(" %d", workers_to_remove[j]); + printf("\n"); +#endif + starpu_sched_ctx_remove_workers(workers_to_remove, nworkers_to_remove, sched_ctx); + _reset_resize_sample_info(sched_ctx, STARPU_NMAX_SCHED_CTXS); + } + else + { +#ifdef STARPU_SC_HYPERVISOR_DEBUG + printf("try to remove from ctx %u: ", sched_ctx); + unsigned j; + for(j = 0; j < nworkers_to_remove; j++) + printf(" %d", workers_to_remove[j]); + printf("\n"); +#endif + int ret = starpu_pthread_mutex_trylock(&hypervisor.sched_ctx_w[sched_ctx].mutex); + if(ret != EBUSY) + { + + unsigned i; + for(i = 0; i < nworkers_to_remove; i++) + if(starpu_sched_ctx_contains_worker(workers_to_remove[i], sched_ctx)) + workers[nworkers++] = workers_to_remove[i]; + + hypervisor.sched_ctx_w[sched_ctx].resize_ack.receiver_sched_ctx = -1; + hypervisor.sched_ctx_w[sched_ctx].resize_ack.moved_workers = (int*)malloc(nworkers_to_remove * sizeof(int)); + hypervisor.sched_ctx_w[sched_ctx].resize_ack.nmoved_workers = (int)nworkers; + hypervisor.sched_ctx_w[sched_ctx].resize_ack.acked_workers = (int*)malloc(nworkers_to_remove * sizeof(int)); + + + for(i = 0; i < nworkers; i++) + { + hypervisor.sched_ctx_w[sched_ctx].current_idle_time[workers[i]] = 0.0; + hypervisor.sched_ctx_w[sched_ctx].resize_ack.moved_workers[i] = workers[i]; + hypervisor.sched_ctx_w[sched_ctx].resize_ack.acked_workers[i] = 0; + } + + hypervisor.resize[sched_ctx] = 0; + if(imposed_resize) imposed_resize = 0; + STARPU_PTHREAD_MUTEX_UNLOCK(&hypervisor.sched_ctx_w[sched_ctx].mutex); + } + } + } + return; +} + +static unsigned _ack_resize_completed(unsigned sched_ctx, int worker) +{ + if(worker != -1 && !starpu_sched_ctx_contains_worker(worker, sched_ctx)) + return 0; + + struct sc_hypervisor_resize_ack *resize_ack = NULL; + unsigned sender_sched_ctx = STARPU_NMAX_SCHED_CTXS; + + int i; + for(i = 0; i < STARPU_NMAX_SCHED_CTXS; i++) + { + if(hypervisor.sched_ctxs[i] != STARPU_NMAX_SCHED_CTXS) + { + struct sc_hypervisor_wrapper *sc_w = &hypervisor.sched_ctx_w[hypervisor.sched_ctxs[i]]; + STARPU_PTHREAD_MUTEX_LOCK(&sc_w->mutex); + unsigned only_remove = 0; + if(sc_w->resize_ack.receiver_sched_ctx == -1 && hypervisor.sched_ctxs[i] != sched_ctx && + sc_w->resize_ack.nmoved_workers > 0 && starpu_sched_ctx_contains_worker(worker, hypervisor.sched_ctxs[i])) + { + int j; + for(j = 0; j < sc_w->resize_ack.nmoved_workers; j++) + if(sc_w->resize_ack.moved_workers[j] == worker) + { + only_remove = 1; + _reset_resize_sample_info(sched_ctx, STARPU_NMAX_SCHED_CTXS); + break; + } + } + if(only_remove || + (sc_w->resize_ack.receiver_sched_ctx != -1 && sc_w->resize_ack.receiver_sched_ctx == (int)sched_ctx)) + { + resize_ack = &sc_w->resize_ack; + sender_sched_ctx = hypervisor.sched_ctxs[i]; + STARPU_PTHREAD_MUTEX_UNLOCK(&sc_w->mutex); + break; + } + STARPU_PTHREAD_MUTEX_UNLOCK(&sc_w->mutex); + } + } + + /* if there is no ctx waiting for its ack return 1*/ + if(resize_ack == NULL) + { + return 1; + } + + int ret = starpu_pthread_mutex_trylock(&hypervisor.sched_ctx_w[sender_sched_ctx].mutex); + if(ret != EBUSY) + { + int *moved_workers = resize_ack->moved_workers; + int nmoved_workers = resize_ack->nmoved_workers; + int *acked_workers = resize_ack->acked_workers; + + if(worker != -1) + { + for(i = 0; i < nmoved_workers; i++) + { + int moved_worker = moved_workers[i]; + if(moved_worker == worker && acked_workers[i] == 0) + { + acked_workers[i] = 1; + } + } + } + + int nacked_workers = 0; + for(i = 0; i < nmoved_workers; i++) + { + nacked_workers += (acked_workers[i] == 1); + } + + unsigned resize_completed = (nacked_workers == nmoved_workers); + int receiver_sched_ctx = sched_ctx; + if(resize_completed) + { + /* if the permission to resize is not allowed by the user don't do it + whatever the application says */ + if(!((hypervisor.resize[sender_sched_ctx] == 0 || hypervisor.resize[receiver_sched_ctx] == 0) && imposed_resize)) + { +/* int j; */ +/* printf("remove after ack from ctx %d:", sender_sched_ctx); */ +/* for(j = 0; j < nmoved_workers; j++) */ +/* printf(" %d", moved_workers[j]); */ +/* printf("\n"); */ + + starpu_sched_ctx_remove_workers(moved_workers, nmoved_workers, sender_sched_ctx); + + _reset_resize_sample_info(sender_sched_ctx, receiver_sched_ctx); + + hypervisor.resize[sender_sched_ctx] = 1; + hypervisor.allow_remove[receiver_sched_ctx] = 1; + /* if the user allowed resizing leave the decisions to the application */ + if(imposed_resize) imposed_resize = 0; + + resize_ack->receiver_sched_ctx = -1; + resize_ack->nmoved_workers = 0; + free(resize_ack->moved_workers); + free(resize_ack->acked_workers); + + } + STARPU_PTHREAD_MUTEX_UNLOCK(&hypervisor.sched_ctx_w[sender_sched_ctx].mutex); + return resize_completed; + } + STARPU_PTHREAD_MUTEX_UNLOCK(&hypervisor.sched_ctx_w[sender_sched_ctx].mutex); + } + return 0; +} + +/* Enqueue a resize request for 'sched_ctx', to be executed when the + * 'task_tag' tasks of 'sched_ctx' complete. */ +void sc_hypervisor_post_resize_request(unsigned sched_ctx, int task_tag) +{ + struct resize_request_entry *entry; + + entry = malloc(sizeof *entry); + STARPU_ASSERT(entry != NULL); + + entry->sched_ctx = sched_ctx; + entry->task_tag = task_tag; + + STARPU_PTHREAD_MUTEX_LOCK(&hypervisor.resize_mut[sched_ctx]); + HASH_ADD_INT(hypervisor.resize_requests[sched_ctx], task_tag, entry); + STARPU_PTHREAD_MUTEX_UNLOCK(&hypervisor.resize_mut[sched_ctx]); +} + +void sc_hypervisor_resize_ctxs(unsigned *sched_ctxs, int nsched_ctxs , int *workers, int nworkers) +{ + if(hypervisor.policy.resize_ctxs) + hypervisor.policy.resize_ctxs(sched_ctxs, nsched_ctxs, workers, nworkers); +} + +void _sc_hypervisor_allow_compute_idle(unsigned sched_ctx, int worker, unsigned allow) +{ + hypervisor.sched_ctx_w[sched_ctx].compute_idle[worker] = allow; +} + + +int _update_max_hierarchically(unsigned *sched_ctxs, int nsched_ctxs) +{ + int s; + unsigned leaves[hypervisor.nsched_ctxs]; + int nleaves = 0; + sc_hypervisor_get_leaves(hypervisor.sched_ctxs, hypervisor.nsched_ctxs, leaves, &nleaves); + + int max = 0; + + for(s = 0; s < nsched_ctxs; s++) + { + struct sc_hypervisor_policy_config *config = sc_hypervisor_get_config(sched_ctxs[s]); + unsigned found = 0; + int l = 0; + for(l = 0; l < nleaves; l++) + { + if(leaves[l] == sched_ctxs[s]) + { + found = 1; + break; + } + } + if(!found) + { + config->max_nworkers = 0; + int level = starpu_sched_ctx_get_hierarchy_level(sched_ctxs[s]); + unsigned *sched_ctxs_child; + int nsched_ctxs_child = 0; + sc_hypervisor_get_ctxs_on_level(&sched_ctxs_child, &nsched_ctxs_child, level+1, sched_ctxs[s]); + if(nsched_ctxs_child > 0) + { + config->max_nworkers += _update_max_hierarchically(sched_ctxs_child, nsched_ctxs_child); + free(sched_ctxs_child); + int max_possible_workers = starpu_worker_get_count(); + if(config->max_nworkers < 0) + config->max_nworkers = 0; + if(config->max_nworkers > max_possible_workers) + config->max_nworkers = max_possible_workers; + + } +#ifdef STARPU_SC_HYPERVISOR_DEBUG + printf("ctx %u has max %d \n", sched_ctxs[s], config->max_nworkers); +#endif + } + max += config->max_nworkers; + } + return max; +} +void _update_max_diff_hierarchically(unsigned father, double diff) +{ + int level = starpu_sched_ctx_get_hierarchy_level(father); + unsigned *sched_ctxs_child; + int nsched_ctxs_child = 0; + sc_hypervisor_get_ctxs_on_level(&sched_ctxs_child, &nsched_ctxs_child, level+1, father); + if(nsched_ctxs_child > 0) + { + int s; + double total_nflops = 0.0; + for(s = 0; s < nsched_ctxs_child; s++) + { + total_nflops += hypervisor.sched_ctx_w[sched_ctxs_child[s]].remaining_flops < 0.0 ? 0.0 : hypervisor.sched_ctx_w[sched_ctxs_child[s]].remaining_flops; + } + + int accumulated_diff = 0; + for(s = 0; s < nsched_ctxs_child; s++) + { + struct sc_hypervisor_policy_config *config = sc_hypervisor_get_config(sched_ctxs_child[s]); + double remaining_flops = hypervisor.sched_ctx_w[sched_ctxs_child[s]].remaining_flops < 0.0 ? 0.0 : hypervisor.sched_ctx_w[sched_ctxs_child[s]].remaining_flops; + int current_diff = total_nflops == 0.0 ? 0.0 : floor((remaining_flops / total_nflops) * diff); + accumulated_diff += current_diff; + if(s == (nsched_ctxs_child - 1) && accumulated_diff < diff) + current_diff += (diff - accumulated_diff); + config->max_nworkers += current_diff; +#ifdef STARPU_SC_HYPERVISOR_DEBUG + printf("%u: redib max_nworkers incr %d diff = %d \n", sched_ctxs_child[s], config->max_nworkers, current_diff); +#endif + _update_max_diff_hierarchically(sched_ctxs_child[s], current_diff); + } + free(sched_ctxs_child); + } + return; +} + +void sc_hypervisor_update_resize_interval(unsigned *sched_ctxs, int nsched_ctxs, int max_workers) +{ + (void) max_workers; + unsigned leaves[hypervisor.nsched_ctxs]; + int nleaves = 0; + sc_hypervisor_get_leaves(hypervisor.sched_ctxs, hypervisor.nsched_ctxs, leaves, &nleaves); + int l; + + unsigned sched_ctx; + int total_max_nworkers = 0; +// int max_cpus = starpu_cpu_worker_get_count(); + unsigned configured = 0; + int i; + for(i = 0; i < nsched_ctxs; i++) + { + unsigned found = 0; + for(l = 0; l < nleaves; l++) + { + if(leaves[l] == sched_ctxs[i]) + { + found = 1; + break; + } + } + if(!found) + continue; + + sched_ctx = sched_ctxs[i]; + + if(hypervisor.sched_ctx_w[sched_ctx].to_be_sized) continue; + + struct sc_hypervisor_policy_config *config = sc_hypervisor_get_config(sched_ctx); + struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx); + int worker; + + struct starpu_sched_ctx_iterator it; + workers->init_iterator(workers, &it); + + double elapsed_time_worker[STARPU_NMAXWORKERS]; + double norm_idle_time = 0.0; + double end_time = starpu_timing_now(); + while(workers->has_next(workers, &it)) + { + double idle_time = 0.0; + worker = workers->get_next(workers, &it); + if(hypervisor.sched_ctx_w[sched_ctx].compute_idle[worker]) + { + if(hypervisor.sched_ctx_w[sched_ctx].start_time_w[worker] == 0.0) + elapsed_time_worker[worker] = 0.0; + else + elapsed_time_worker[worker] = (end_time - hypervisor.sched_ctx_w[sched_ctx].start_time_w[worker]) / 1000000.0; + + if(hypervisor.sched_ctx_w[sched_ctx].idle_start_time[worker] == 0.0) + { + idle_time = hypervisor.sched_ctx_w[sched_ctx].idle_time[worker]; /* in seconds */ + } + else + { + double idle = (end_time - hypervisor.sched_ctx_w[sched_ctx].idle_start_time[worker]) / 1000000.0; /* in seconds */ + idle_time = hypervisor.sched_ctx_w[sched_ctx].idle_time[worker] + idle; + } + norm_idle_time += (elapsed_time_worker[worker] == 0.0 ? 0.0 : (idle_time / elapsed_time_worker[worker])); +/* printf("%d/%d: start time %lf elapsed time %lf idle time %lf norm_idle_time %lf \n", */ +/* worker, sched_ctx, hypervisor.sched_ctx_w[sched_ctx].start_time_w[worker], elapsed_time_worker[worker], idle_time, norm_idle_time); */ + } + } + + double norm_exec_time = 0.0; + for(worker = 0; worker < STARPU_NMAXWORKERS; worker++) + { + double exec_time = 0.0; + if(hypervisor.sched_ctx_w[sched_ctx].start_time_w[worker] == 0.0) + elapsed_time_worker[worker] = 0.0; + else + elapsed_time_worker[worker] = (end_time - hypervisor.sched_ctx_w[sched_ctx].start_time_w[worker]) / 1000000.0; + + if(hypervisor.sched_ctx_w[sched_ctx].exec_start_time[worker] == 0.0) + { + exec_time = hypervisor.sched_ctx_w[sched_ctx].exec_time[worker]; + } + else + { + double current_exec_time = 0.0; + if(hypervisor.sched_ctx_w[sched_ctx].exec_start_time[worker] < hypervisor.sched_ctx_w[sched_ctx].start_time) + current_exec_time = (end_time - hypervisor.sched_ctx_w[sched_ctx].start_time) / 1000000.0; /* in seconds */ + else + current_exec_time = (end_time - hypervisor.sched_ctx_w[sched_ctx].exec_start_time[worker]) / 1000000.0; /* in seconds */ + + exec_time = hypervisor.sched_ctx_w[sched_ctx].exec_time[worker] + current_exec_time; + } + norm_exec_time += elapsed_time_worker[worker] == 0.0 ? 0.0 : exec_time / elapsed_time_worker[worker]; + } + +#ifdef STARPU_SC_HYPERVISOR_DEBUG + double curr_time = starpu_timing_now(); + double elapsed_time = (curr_time - hypervisor.sched_ctx_w[sched_ctx].start_time) / 1000000.0; /* in seconds */ + int nready_tasks = starpu_sched_ctx_get_nready_tasks(sched_ctx); +#endif +/* if(norm_idle_time >= 0.9) */ +/* { */ +/* config->max_nworkers = lrint(norm_exec_time); */ +/* } */ +/* else */ +/* { */ +/* if(norm_idle_time < 0.1) */ +/* config->max_nworkers = lrint(norm_exec_time) + nready_tasks - 1; //workers->nworkers + hypervisor.sched_ctx_w[sched_ctx].nready_tasks - 1; */ +/* else */ +/* config->max_nworkers = lrint(norm_exec_time); */ +/* } */ + config->max_nworkers = lrint(norm_exec_time); +// config->max_nworkers = hypervisor.sched_ctx_w[sched_ctx].nready_tasks - 1; + + /* if(config->max_nworkers < 0) */ +/* config->max_nworkers = 0; */ +/* if(config->max_nworkers > max_workers) */ +/* config->max_nworkers = max_workers; */ + +#ifdef STARPU_SC_HYPERVISOR_DEBUG + printf("%u: ready tasks %d norm_idle_time %lf elapsed_time %lf norm_exec_time %lf nworker %d max %d \n", + sched_ctx, nready_tasks, norm_idle_time, elapsed_time, norm_exec_time, workers->nworkers, config->max_nworkers); +#endif + + total_max_nworkers += config->max_nworkers; + configured = 1; + + } + + unsigned nhierarchy_levels = sc_hypervisor_get_nhierarchy_levels(); + if(nhierarchy_levels > 1 && configured) + { + unsigned *sched_ctxs2; + int nsched_ctxs2; + sc_hypervisor_get_ctxs_on_level(&sched_ctxs2, &nsched_ctxs2, 0, STARPU_NMAX_SCHED_CTXS); + + if(nsched_ctxs2 > 0) + { + _update_max_hierarchically(sched_ctxs2, nsched_ctxs2); + int s; + int current_total_max_nworkers = 0; + double max_nflops = 0.0; + unsigned max_nflops_sched_ctx = sched_ctxs2[0]; + for(s = 0; s < nsched_ctxs2; s++) + { + struct sc_hypervisor_policy_config *config = sc_hypervisor_get_config(sched_ctxs2[s]); + current_total_max_nworkers += config->max_nworkers; + if(max_nflops < hypervisor.sched_ctx_w[sched_ctxs2[s]].remaining_flops) + { + max_nflops = hypervisor.sched_ctx_w[sched_ctxs2[s]].remaining_flops; + max_nflops_sched_ctx = sched_ctxs2[s]; + } + } + + int max_possible_workers = starpu_worker_get_count(); + /*if the sum of the max cpus is smaller than the total cpus available + increase the max for the ones having more ready tasks to exec */ + if(current_total_max_nworkers < max_possible_workers) + { + int diff = max_possible_workers - current_total_max_nworkers; + struct sc_hypervisor_policy_config *config = sc_hypervisor_get_config(max_nflops_sched_ctx); + config->max_nworkers += diff; +#ifdef STARPU_SC_HYPERVISOR_DEBUG + printf("%u: redib max_nworkers incr %d \n", max_nflops_sched_ctx, config->max_nworkers); +#endif + _update_max_diff_hierarchically(max_nflops_sched_ctx, diff); + } + free(sched_ctxs2); + } + } + + + + /*if the sum of the max cpus is smaller than the total cpus available + increase the max for the ones having more ready tasks to exec */ + /* if(configured && total_max_nworkers < max_workers) */ +/* { */ +/* int diff = max_workers - total_max_nworkers; */ +/* int max_nready = -1; */ +/* unsigned max_nready_sched_ctx = sched_ctxs[0]; */ +/* for(i = 0; i < nsched_ctxs; i++) */ +/* { */ +/* int nready_tasks = starpu_sched_ctx_get_nready_tasks(sched_ctxs[i]); */ +/* if(max_nready < nready_tasks) */ +/* { */ +/* max_nready = nready_tasks; */ +/* max_nready_sched_ctx = sched_ctxs[i]; */ +/* } */ +/* } */ +/* struct sc_hypervisor_policy_config *config = sc_hypervisor_get_config(max_nready_sched_ctx); */ +/* config->max_nworkers += diff; */ +/* printf("%d: redib max_nworkers incr %d \n", max_nready_sched_ctx, config->max_nworkers); */ +/* } */ + +} + +/* notifies the hypervisor that a new task was pushed on the queue of the worker */ +static void notify_pushed_task(unsigned sched_ctx, int worker) +{ + hypervisor.sched_ctx_w[sched_ctx].pushed_tasks[worker]++; + if(hypervisor.sched_ctx_w[sched_ctx].total_flops != 0.0 && hypervisor.sched_ctx_w[sched_ctx].start_time == 0.0) + hypervisor.sched_ctx_w[sched_ctx].start_time = starpu_timing_now(); + + if(hypervisor.sched_ctx_w[sched_ctx].total_flops != 0.0 && hypervisor.sched_ctx_w[sched_ctx].start_time_w[worker] == 0.0) + { + hypervisor.sched_ctx_w[sched_ctx].start_time_w[worker] = starpu_timing_now(); + } + + int ntasks = get_ntasks(hypervisor.sched_ctx_w[sched_ctx].pushed_tasks); + + if((hypervisor.min_tasks == 0 || (!(hypervisor.resize[sched_ctx] == 0 && imposed_resize) && ntasks == hypervisor.min_tasks)) && hypervisor.check_min_tasks[sched_ctx]) + { + hypervisor.resize[sched_ctx] = 1; + if(imposed_resize) imposed_resize = 0; + hypervisor.check_min_tasks[sched_ctx] = 0; + } + + if(hypervisor.policy.handle_pushed_task) + hypervisor.policy.handle_pushed_task(sched_ctx, worker); +} + +unsigned choose_ctx_to_steal(int worker) +{ + int j; + int ns = hypervisor.nsched_ctxs; + int max_ready_tasks = 0; + unsigned chosen_ctx = STARPU_NMAX_SCHED_CTXS; + for(j = 0; j < ns; j++) + { + unsigned other_ctx = hypervisor.sched_ctxs[j]; + int nready = starpu_sched_ctx_get_nready_tasks(other_ctx); + if(!starpu_sched_ctx_contains_worker(worker, other_ctx) && max_ready_tasks < nready) + { + max_ready_tasks = nready; + chosen_ctx = other_ctx; + } + } + return chosen_ctx; +} + +/* notifies the hypervisor that the worker spent another cycle in idle time */ +static void notify_idle_cycle(unsigned sched_ctx, int worker, double idle_time) +{ + if(hypervisor.start_executing_time == 0.0) return; + struct sc_hypervisor_wrapper *sc_w = &hypervisor.sched_ctx_w[sched_ctx]; + sc_w->current_idle_time[worker] += idle_time; + + if(sc_w->idle_start_time[worker] == 0.0 && sc_w->hyp_react_start_time != 0.0) + sc_w->idle_start_time[worker] = starpu_timing_now(); + + + if(sc_w->idle_start_time[worker] > 0.0) + { + double end_time = starpu_timing_now(); + sc_w->idle_time[worker] += (end_time - sc_w->idle_start_time[worker]) / 1000000.0; /* in seconds */ + } + + hypervisor.sched_ctx_w[sched_ctx].idle_start_time[worker] = starpu_timing_now(); + + if(hypervisor.resize[sched_ctx] && hypervisor.policy.handle_idle_cycle) + { + if(sc_w->hyp_react_start_time == 0.0) + sc_hypervisor_reset_react_start_time(sched_ctx, 1); + + double curr_time = starpu_timing_now(); + double elapsed_time = (curr_time - sc_w->hyp_react_start_time) / 1000000.0; /* in seconds */ + if(sc_w->sched_ctx != STARPU_NMAX_SCHED_CTXS && elapsed_time > sc_w->config->time_sample) + { + unsigned idle_everywhere = 0; + unsigned *sched_ctxs = NULL; + unsigned nsched_ctxs = 0; + int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex); + if(ret != EBUSY) + { + if(sc_hypervisor_check_idle(sched_ctx, worker)) + { + idle_everywhere = 1; + + nsched_ctxs = starpu_worker_get_sched_ctx_list(worker, &sched_ctxs); + unsigned s; + for(s = 0; s < nsched_ctxs; s++) + { + if(hypervisor.sched_ctx_w[sched_ctxs[s]].sched_ctx != STARPU_NMAX_SCHED_CTXS) + { + if(!sc_hypervisor_check_idle(sched_ctxs[s], worker)) + idle_everywhere = 0; + } + } + free(sched_ctxs); + } + STARPU_PTHREAD_MUTEX_UNLOCK(&act_hypervisor_mutex); + } + + if(idle_everywhere) + { + double hyp_overhead_start = starpu_timing_now(); + if(elapsed_time > (sc_w->config->time_sample*2)) + hypervisor.policy.handle_idle_cycle(sched_ctx, worker); + double hyp_overhead_end = starpu_timing_now(); + hyp_overhead += (hyp_overhead_end - hyp_overhead_start); + if(elapsed_time > (sc_w->config->time_sample*2)) + sc_hypervisor_reset_react_start_time(sched_ctx, 1); + else + sc_hypervisor_reset_react_start_time(sched_ctx, 0); + } + } + } + return; +} + +void _update_real_start_time_hierarchically(unsigned sched_ctx) +{ + hypervisor.sched_ctx_w[sched_ctx].real_start_time = starpu_timing_now(); + if(starpu_sched_ctx_get_hierarchy_level(sched_ctx) > 0) + { + _update_real_start_time_hierarchically(starpu_sched_ctx_get_inheritor(sched_ctx)); + } + return; +} + +/* notifies the hypervisor that the worker is no longer idle and a new task was pushed on its queue */ +static void notify_poped_task(unsigned sched_ctx, int worker) +{ + if(hypervisor.start_executing_time == 0.0) + hypervisor.start_executing_time = starpu_timing_now(); + if(!hypervisor.resize[sched_ctx]) + hypervisor.resize[sched_ctx] = 1; + + if(hypervisor.sched_ctx_w[sched_ctx].total_flops != 0.0 && hypervisor.sched_ctx_w[sched_ctx].real_start_time == 0.0) + _update_real_start_time_hierarchically(sched_ctx); + + if(hypervisor.sched_ctx_w[sched_ctx].start_time_w[worker] == 0.0) + { + hypervisor.sched_ctx_w[sched_ctx].start_time_w[worker] = starpu_timing_now(); + } + + hypervisor.sched_ctx_w[sched_ctx].exec_start_time[worker] = starpu_timing_now(); + + if(hypervisor.sched_ctx_w[sched_ctx].idle_start_time[worker] > 0.0) + { + int ns = hypervisor.nsched_ctxs; + int j; + for(j = 0; j < ns; j++) + { + if(hypervisor.sched_ctxs[j] != sched_ctx) + { + if(hypervisor.sched_ctx_w[hypervisor.sched_ctxs[j]].idle_start_time[worker] > 0.0) + hypervisor.sched_ctx_w[hypervisor.sched_ctxs[j]].compute_partial_idle[worker] = 1; + } + } + double end_time = starpu_timing_now(); + double idle = (end_time - hypervisor.sched_ctx_w[sched_ctx].idle_start_time[worker]) / 1000000.0; /* in seconds */ + + if(hypervisor.sched_ctx_w[sched_ctx].compute_partial_idle[worker]) + hypervisor.sched_ctx_w[sched_ctx].idle_time[worker] += idle / 2.0; + else + hypervisor.sched_ctx_w[sched_ctx].idle_time[worker] += idle; + + hypervisor.sched_ctx_w[sched_ctx].compute_partial_idle[worker] = 0; + hypervisor.sched_ctx_w[sched_ctx].idle_start_time[worker] = 0.0; + } + + if(hypervisor.resize[sched_ctx]) + hypervisor.sched_ctx_w[sched_ctx].current_idle_time[worker] = 0.0; + + if(hypervisor.policy.handle_idle_end) + hypervisor.policy.handle_idle_end(sched_ctx, worker); +} + + +static void _update_counters_hierarchically(int worker, unsigned sched_ctx, double flops, size_t data_size) +{ + hypervisor.sched_ctx_w[sched_ctx].poped_tasks[worker]++; + hypervisor.sched_ctx_w[sched_ctx].elapsed_flops[worker] += flops; + hypervisor.sched_ctx_w[sched_ctx].elapsed_data[worker] += data_size ; + hypervisor.sched_ctx_w[sched_ctx].elapsed_tasks[worker]++ ; + hypervisor.sched_ctx_w[sched_ctx].total_elapsed_flops[worker] += flops; + + STARPU_PTHREAD_MUTEX_LOCK(&hypervisor.sched_ctx_w[sched_ctx].mutex); + hypervisor.sched_ctx_w[sched_ctx].remaining_flops -= flops; + STARPU_PTHREAD_MUTEX_UNLOCK(&hypervisor.sched_ctx_w[sched_ctx].mutex); + + if(starpu_sched_ctx_get_hierarchy_level(sched_ctx) > 0) + _update_counters_hierarchically(worker, starpu_sched_ctx_get_inheritor(sched_ctx), flops, data_size); + + return; +} + +/* notifies the hypervisor that a tagged task has just been executed */ +static void notify_post_exec_task(struct starpu_task *task, size_t data_size, uint32_t footprint, int task_tag, double flops) +{ + unsigned sched_ctx = task->sched_ctx; + int worker = starpu_worker_get_id_check(); + + if(hypervisor.sched_ctx_w[sched_ctx].exec_start_time[worker] != 0.0) + { + double current_time = starpu_timing_now(); + double exec_time = (current_time - + hypervisor.sched_ctx_w[sched_ctx].exec_start_time[worker]) / 1000000.0; /* in seconds */ + hypervisor.sched_ctx_w[sched_ctx].exec_time[worker] += exec_time; + hypervisor.sched_ctx_w[sched_ctx].exec_start_time[worker] = 0.0; + } + + hypervisor.sched_ctx_w[sched_ctx].poped_tasks[worker]++; + hypervisor.sched_ctx_w[sched_ctx].elapsed_flops[worker] += flops; + hypervisor.sched_ctx_w[sched_ctx].elapsed_data[worker] += data_size ; + hypervisor.sched_ctx_w[sched_ctx].elapsed_tasks[worker]++ ; + hypervisor.sched_ctx_w[sched_ctx].total_elapsed_flops[worker] += flops; + + STARPU_PTHREAD_MUTEX_LOCK(&hypervisor.sched_ctx_w[sched_ctx].mutex); + hypervisor.sched_ctx_w[sched_ctx].remaining_flops -= flops; + STARPU_PTHREAD_MUTEX_UNLOCK(&hypervisor.sched_ctx_w[sched_ctx].mutex); + + if(_sc_hypervisor_use_lazy_resize()) + _ack_resize_completed(sched_ctx, worker); + + if(starpu_sched_ctx_get_hierarchy_level(sched_ctx) > 0) + { + _update_counters_hierarchically(worker, starpu_sched_ctx_get_inheritor(sched_ctx), flops, data_size); + } + + if(hypervisor.resize[sched_ctx]) + { + if(hypervisor.policy.handle_poped_task) + { + if(hypervisor.sched_ctx_w[sched_ctx].hyp_react_start_time == 0.0) + sc_hypervisor_reset_react_start_time(sched_ctx, 1); + + double curr_time = starpu_timing_now(); + double elapsed_time = (curr_time - hypervisor.sched_ctx_w[sched_ctx].hyp_react_start_time) / 1000000.0; /* in seconds */ + if(hypervisor.sched_ctx_w[sched_ctx].sched_ctx != STARPU_NMAX_SCHED_CTXS && elapsed_time > hypervisor.sched_ctx_w[sched_ctx].config->time_sample) + { + double hyp_overhead_start = starpu_timing_now(); + if(elapsed_time > (hypervisor.sched_ctx_w[sched_ctx].config->time_sample*2)) + hypervisor.policy.handle_poped_task(sched_ctx, worker, task, footprint); + double hyp_overhead_end = starpu_timing_now(); + hyp_overhead += (hyp_overhead_end - hyp_overhead_start); + if(elapsed_time > (hypervisor.sched_ctx_w[sched_ctx].config->time_sample*2)) + sc_hypervisor_reset_react_start_time(sched_ctx, 1); + else + sc_hypervisor_reset_react_start_time(sched_ctx, 0); + } + else + /* no need to consider resizing, just remove the task from the pool if the strategy requires it*/ + hypervisor.policy.handle_poped_task(sched_ctx, -2, task, footprint); + } + } +/* STARPU_PTHREAD_MUTEX_LOCK(&act_hypervisor_mutex); */ +/* _ack_resize_completed(sched_ctx, worker); */ +/* STARPU_PTHREAD_MUTEX_UNLOCK(&act_hypervisor_mutex); */ + if(hypervisor.sched_ctx_w[sched_ctx].poped_tasks[worker] % 200 == 0) + _print_current_time(); + + if(task_tag <= 0) + return; + + unsigned conf_sched_ctx; + unsigned i; + unsigned ns = hypervisor.nsched_ctxs; + + for(i = 0; i < ns; i++) + { + struct configuration_entry *entry; + + conf_sched_ctx = hypervisor.sched_ctxs[i]; + STARPU_PTHREAD_MUTEX_LOCK(&hypervisor.conf_mut[conf_sched_ctx]); + + HASH_FIND_INT(hypervisor.configurations[conf_sched_ctx], &task_tag, entry); + + if (entry != NULL) + { + struct sc_hypervisor_policy_config *config = entry->configuration; + + sc_hypervisor_set_config(conf_sched_ctx, config); + HASH_DEL(hypervisor.configurations[conf_sched_ctx], entry); + free(config); + } + STARPU_PTHREAD_MUTEX_UNLOCK(&hypervisor.conf_mut[conf_sched_ctx]); + } + + if(hypervisor.resize[sched_ctx]) + { + STARPU_PTHREAD_MUTEX_LOCK(&hypervisor.resize_mut[sched_ctx]); + + if(hypervisor.policy.handle_post_exec_hook) + { + /* Check whether 'task_tag' is in the 'resize_requests' set. */ + struct resize_request_entry *entry; + HASH_FIND_INT(hypervisor.resize_requests[sched_ctx], &task_tag, entry); + if (entry != NULL) + { + hypervisor.policy.handle_post_exec_hook(sched_ctx, task_tag); + HASH_DEL(hypervisor.resize_requests[sched_ctx], entry); + free(entry); + } + + } + STARPU_PTHREAD_MUTEX_UNLOCK(&hypervisor.resize_mut[sched_ctx]); + } + return; +} + +static void notify_submitted_job(struct starpu_task *task, uint32_t footprint, size_t data_size) +{ + (void)footprint; + (void)data_size; + unsigned sched_ctx = task->sched_ctx; + STARPU_PTHREAD_MUTEX_LOCK(&hypervisor.sched_ctx_w[sched_ctx].mutex); + hypervisor.sched_ctx_w[sched_ctx].submitted_flops += task->flops; + STARPU_PTHREAD_MUTEX_UNLOCK(&hypervisor.sched_ctx_w[sched_ctx].mutex); + + /* signaled by the user - no need to wait for them */ + /* if(hypervisor.policy.handle_submitted_job && !type_of_tasks_known) */ + /* hypervisor.policy.handle_submitted_job(task->cl, task->sched_ctx, footprint, data_size); */ +} + +static void notify_empty_ctx(unsigned sched_ctx_id, struct starpu_task *task) +{ + (void)sched_ctx_id; + (void)task; + sc_hypervisor_resize_ctxs(NULL, -1 , NULL, -1); +} + +void sc_hypervisor_set_type_of_task(struct starpu_codelet *cl, unsigned sched_ctx, uint32_t footprint, size_t data_size) +{ + type_of_tasks_known = 1; + if(hypervisor.policy.handle_submitted_job) + hypervisor.policy.handle_submitted_job(cl, sched_ctx, footprint, data_size); +} + +static void notify_delete_context(unsigned sched_ctx) +{ + _print_current_time(); + sc_hypervisor_unregister_ctx(sched_ctx); +} + +void sc_hypervisor_size_ctxs(unsigned *sched_ctxs, int nsched_ctxs, int *workers, int nworkers) +{ +// STARPU_PTHREAD_MUTEX_LOCK(&act_hypervisor_mutex); + unsigned curr_nsched_ctxs = sched_ctxs == NULL ? hypervisor.nsched_ctxs : (unsigned)nsched_ctxs; + unsigned *curr_sched_ctxs = sched_ctxs == NULL ? hypervisor.sched_ctxs : sched_ctxs; +// STARPU_PTHREAD_MUTEX_UNLOCK(&act_hypervisor_mutex); + unsigned s; + for(s = 0; s < curr_nsched_ctxs; s++) + hypervisor.resize[curr_sched_ctxs[s]] = 1; + + if(hypervisor.policy.size_ctxs) + hypervisor.policy.size_ctxs(curr_sched_ctxs, curr_nsched_ctxs, workers, nworkers); +} + +struct sc_hypervisor_wrapper* sc_hypervisor_get_wrapper(unsigned sched_ctx) +{ + return &hypervisor.sched_ctx_w[sched_ctx]; +} + +unsigned* sc_hypervisor_get_sched_ctxs() +{ + return hypervisor.sched_ctxs; +} + +int sc_hypervisor_get_nsched_ctxs() +{ + int ns; + ns = hypervisor.nsched_ctxs; + return ns; +} + +int _sc_hypervisor_use_lazy_resize(void) +{ + char* lazy = getenv("SC_HYPERVISOR_LAZY_RESIZE"); + return lazy ? atoi(lazy) : 1; +} + +void sc_hypervisor_save_size_req(unsigned *sched_ctxs, int nsched_ctxs, int *workers, int nworkers) +{ + hypervisor.sr = (struct size_request*)malloc(sizeof(struct size_request)); + hypervisor.sr->sched_ctxs = sched_ctxs; + hypervisor.sr->nsched_ctxs = nsched_ctxs; + hypervisor.sr->workers = workers; + hypervisor.sr->nworkers = nworkers; +} + +unsigned sc_hypervisor_get_size_req(unsigned **sched_ctxs, int* nsched_ctxs, int **workers, int *nworkers) +{ + if(hypervisor.sr != NULL) + { + *sched_ctxs = hypervisor.sr->sched_ctxs; + *nsched_ctxs = hypervisor.sr->nsched_ctxs; + *workers = hypervisor.sr->workers; + *nworkers = hypervisor.sr->nworkers; + return 1; + } + return 0; +} + +void sc_hypervisor_free_size_req(void) +{ + if(hypervisor.sr != NULL) + { + free(hypervisor.sr); + hypervisor.sr = NULL; + } +} + +double _get_optimal_v(unsigned sched_ctx) +{ + return hypervisor.optimal_v[sched_ctx]; +} + +void _set_optimal_v(unsigned sched_ctx, double optimal_v) +{ + hypervisor.optimal_v[sched_ctx] = optimal_v; +} + +static struct types_of_workers* _init_structure_types_of_workers(void) +{ + struct types_of_workers *tw = (struct types_of_workers*)malloc(sizeof(struct types_of_workers)); + tw->ncpus = 0; + tw->ncuda = 0; + tw->nw = 0; + return tw; +} + +struct types_of_workers* sc_hypervisor_get_types_of_workers(int *workers, unsigned nworkers) +{ + struct types_of_workers *tw = _init_structure_types_of_workers(); + + unsigned w; + for(w = 0; w < nworkers; w++) + { + enum starpu_worker_archtype arch = workers == NULL ? starpu_worker_get_type((int)w) : starpu_worker_get_type(workers[w]); + if(arch == STARPU_CPU_WORKER) + tw->ncpus++; + if(arch == STARPU_CUDA_WORKER) + tw->ncuda++; + } + if(tw->ncpus > 0) tw->nw++; + if(tw->ncuda > 0) tw->nw++; + return tw; +} + +void sc_hypervisor_update_diff_total_flops(unsigned sched_ctx, double diff_total_flops) +{ +// double hyp_overhead_start = starpu_timing_now(); + STARPU_PTHREAD_MUTEX_LOCK(&hypervisor.sched_ctx_w[sched_ctx].mutex); + hypervisor.sched_ctx_w[sched_ctx].total_flops += diff_total_flops; + hypervisor.sched_ctx_w[sched_ctx].remaining_flops += diff_total_flops; + STARPU_PTHREAD_MUTEX_UNLOCK(&hypervisor.sched_ctx_w[sched_ctx].mutex); +/* double hyp_overhead_end = starpu_timing_now(); */ +/* hyp_overhead += (hyp_overhead_end - hyp_overhead_start); */ + if(starpu_sched_ctx_get_hierarchy_level(sched_ctx) > 0) + sc_hypervisor_update_diff_total_flops(starpu_sched_ctx_get_inheritor(sched_ctx), diff_total_flops); + return; + +} + +void sc_hypervisor_update_diff_elapsed_flops(unsigned sched_ctx, double diff_elapsed_flops) +{ +// double hyp_overhead_start = starpu_timing_now(); + int workerid = starpu_worker_get_id(); + if(workerid != -1) + { +// STARPU_PTHREAD_MUTEX_LOCK(&hypervisor.sched_ctx_w[sched_ctx].mutex); + hypervisor.sched_ctx_w[sched_ctx].elapsed_flops[workerid] += diff_elapsed_flops; + hypervisor.sched_ctx_w[sched_ctx].total_elapsed_flops[workerid] += diff_elapsed_flops; +// STARPU_PTHREAD_MUTEX_UNLOCK(&hypervisor.sched_ctx_w[sched_ctx].mutex); + } +/* double hyp_overhead_end = starpu_timing_now(); */ +/* hyp_overhead += (hyp_overhead_end - hyp_overhead_start); */ + if(starpu_sched_ctx_get_hierarchy_level(sched_ctx) > 0) + sc_hypervisor_update_diff_elapsed_flops(starpu_sched_ctx_get_inheritor(sched_ctx), diff_elapsed_flops); + return; +} + +void sc_hypervisor_get_ctxs_on_level(unsigned **sched_ctxs, int *nsched_ctxs, unsigned hierarchy_level, unsigned father_sched_ctx_id) +{ + unsigned s; + *nsched_ctxs = 0; + *sched_ctxs = (unsigned*)malloc(hypervisor.nsched_ctxs * sizeof(unsigned)); + for(s = 0; s < hypervisor.nsched_ctxs; s++) + { + /* if father == STARPU_NMAX_SCHED_CTXS we take all the ctxs in this level */ + if(starpu_sched_ctx_get_hierarchy_level(hypervisor.sched_ctxs[s]) == hierarchy_level && + (starpu_sched_ctx_get_inheritor(hypervisor.sched_ctxs[s]) == father_sched_ctx_id || father_sched_ctx_id == STARPU_NMAX_SCHED_CTXS)) + (*sched_ctxs)[(*nsched_ctxs)++] = hypervisor.sched_ctxs[s]; + } + if(*nsched_ctxs == 0) + { + free(*sched_ctxs); + *sched_ctxs = NULL; + } + return; +} + +unsigned sc_hypervisor_get_nhierarchy_levels(void) +{ + unsigned nlevels = 0; + unsigned level = 0; + unsigned levels[STARPU_NMAX_SCHED_CTXS]; + unsigned s, l; + for(s = 0; s < hypervisor.nsched_ctxs; s++) + { + level = starpu_sched_ctx_get_hierarchy_level(hypervisor.sched_ctxs[s]); + unsigned found = 0; + for(l = 0; l < nlevels; l++) + if(levels[l] == level) + found = 1; + if(!found) + levels[nlevels++] = level; + } + return nlevels; +} + +void sc_hypervisor_get_leaves(unsigned *sched_ctxs, int nsched_ctxs, unsigned *leaves, int *nleaves) +{ + int s, s2; + for(s = 0; s < nsched_ctxs; s++) + { + unsigned is_someones_father = 0; + for(s2 = 0; s2 < nsched_ctxs; s2++) + { + unsigned father = starpu_sched_ctx_get_inheritor(sched_ctxs[s2]); + if(sched_ctxs[s] == father) + { + is_someones_father = 1; + break; + } + } + if(!is_someones_father) + leaves[(*nleaves)++] = sched_ctxs[s]; + } + return; +} + + +void sc_hypervisor_init_worker(int workerid, unsigned sched_ctx) +{ + if(hypervisor.policy.init_worker) + hypervisor.policy.init_worker(workerid, sched_ctx); +} diff --git a/sc_hypervisor/src/sc_hypervisor_intern.h b/sc_hypervisor/src/sc_hypervisor_intern.h new file mode 100644 index 0000000..7453f39 --- /dev/null +++ b/sc_hypervisor/src/sc_hypervisor_intern.h @@ -0,0 +1,127 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "uthash.h" + +#define SC_SPEED_MAX_GAP_DEFAULT 50 +#define SC_HYPERVISOR_DEFAULT_CPU_SPEED 5.0 +#define SC_HYPERVISOR_DEFAULT_CUDA_SPEED 100.0 + +struct size_request +{ + int *workers; + int nworkers; + unsigned *sched_ctxs; + int nsched_ctxs; +}; + + +/* Entry in the resize request hash table. */ +struct resize_request_entry +{ + /* Key: the tag of tasks concerned by this resize request. */ + uint32_t task_tag; + + /* Value: identifier of the scheduling context needing to be resized. + * The value doesn't matter since the hash table is used only to test + * membership of a task tag. */ + unsigned sched_ctx; + + /* Bookkeeping. */ + UT_hash_handle hh; +}; + +/* structure to indicate when the moving of workers was actually done + (moved workers can be seen in the new ctx) */ +struct resize_ack +{ + /* receiver context */ + int receiver_sched_ctx; + /* list of workers required to be moved */ + int *moved_workers; + /* number of workers required to be moved */ + int nmoved_workers; + /* list of workers that actually got in the receiver ctx */ + int *acked_workers; +}; + +struct configuration_entry +{ + /* Key: the tag of tasks concerned by this configuration. */ + uint32_t task_tag; + + /* Value: configuration of the scheduling context. */ + struct sc_hypervisor_policy_config *configuration; + + /* Bookkeeping. */ + UT_hash_handle hh; +}; + +struct sc_hypervisor +{ + struct sc_hypervisor_wrapper sched_ctx_w[STARPU_NMAX_SCHED_CTXS]; + unsigned sched_ctxs[STARPU_NMAX_SCHED_CTXS]; + unsigned nsched_ctxs; + unsigned resize[STARPU_NMAX_SCHED_CTXS]; + unsigned allow_remove[STARPU_NMAX_SCHED_CTXS]; + int min_tasks; + struct sc_hypervisor_policy policy; + + struct configuration_entry *configurations[STARPU_NMAX_SCHED_CTXS]; + + /* Set of pending resize requests for any context/tag pair. */ + struct resize_request_entry *resize_requests[STARPU_NMAX_SCHED_CTXS]; + + starpu_pthread_mutex_t conf_mut[STARPU_NMAX_SCHED_CTXS]; + starpu_pthread_mutex_t resize_mut[STARPU_NMAX_SCHED_CTXS]; + struct size_request *sr; + int check_min_tasks[STARPU_NMAX_SCHED_CTXS]; + + /* time when the hypervisor started */ + double start_executing_time; + + /* max speed diff btw ctx before triggering resizing */ + double max_speed_gap; + + /* criteria to trigger resizing */ + unsigned resize_criteria; + + /* value of the speed to compare the speed of the context to */ + double optimal_v[STARPU_NMAX_SCHED_CTXS]; +}; + +struct sc_hypervisor_adjustment +{ + int workerids[STARPU_NMAXWORKERS]; + int nworkers; +}; + +extern struct sc_hypervisor hypervisor; + +void _add_config(unsigned sched_ctx); + +void _remove_config(unsigned sched_ctx); + +double _get_max_speed_gap(); + +double _get_optimal_v(unsigned sched_ctx); + +void _set_optimal_v(unsigned sched_ctx, double optimal_v); + +int _sc_hypervisor_use_lazy_resize(void); + +void _sc_hypervisor_allow_compute_idle(unsigned sched_ctx, int worker, unsigned allow); diff --git a/sc_hypervisor/src/uthash.h b/sc_hypervisor/src/uthash.h new file mode 100644 index 0000000..a2ef9fb --- /dev/null +++ b/sc_hypervisor/src/uthash.h @@ -0,0 +1,1024 @@ +/* +Copyright (c) 2003-2010, Troy D. Hanson http://uthash.sourceforge.net +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER +OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef UTHASH_H +#define UTHASH_H + +#include /* memcmp,strlen */ +#include /* ptrdiff_t */ + +/* These macros use decltype or the earlier __typeof GNU extension. + As decltype is only available in newer compilers (VS2010 or gcc 4.3+ + when compiling c++ source) this code uses whatever method is needed + or, for VS2008 where neither is available, uses casting workarounds. */ +#ifdef _MSC_VER /* MS compiler */ +#if _MSC_VER >= 1600 && defined(__cplusplus) /* VS2010 or newer in C++ mode */ +#define DECLTYPE(x) (decltype(x)) +#else /* VS2008 or older (or VS2010 in C mode) */ +#define NO_DECLTYPE +#define DECLTYPE(x) +#endif +#else /* GNU, Sun and other compilers */ +#define DECLTYPE(x) (__typeof(x)) +#endif + +#ifdef NO_DECLTYPE +#define DECLTYPE_ASSIGN(dst,src) \ +do { \ + char **_da_dst = (char**)(&(dst)); \ + *_da_dst = (char*)(src); \ +} while(0) +#else +#define DECLTYPE_ASSIGN(dst,src) \ +do { \ + (dst) = DECLTYPE(dst)(src); \ +} while(0) +#endif + +/* a number of the hash function use uint32_t which isn't defined on win32 */ +#ifdef _MSC_VER +typedef unsigned int uint32_t; +#else +#include /* uint32_t */ +#endif + +#define UTHASH_VERSION 1.9.3 + +#define uthash_fatal(msg) exit(-1) /* fatal error (out of memory,etc) */ +#define uthash_malloc(sz) malloc(sz) /* malloc fcn */ +#define uthash_free(ptr,sz) free(ptr) /* free fcn */ + +#define uthash_noexpand_fyi(tbl) /* can be defined to log noexpand */ +#define uthash_expand_fyi(tbl) /* can be defined to log expands */ + +/* initial number of buckets */ +#define HASH_INITIAL_NUM_BUCKETS 32 /* initial number of buckets */ +#define HASH_INITIAL_NUM_BUCKETS_LOG2 5 /* lg2 of initial number of buckets */ +#define HASH_BKT_CAPACITY_THRESH 10 /* expand when bucket count reaches */ + +/* calculate the element whose hash handle address is hhe */ +#define ELMT_FROM_HH(tbl,hhp) ((void*)(((char*)(hhp)) - ((tbl)->hho))) + +#define HASH_FIND(hh,head,keyptr,keylen,out) \ +do { \ + unsigned _hf_bkt=0,_hf_hashv=0; \ + out=NULL; \ + if (head) { \ + HASH_FCN(keyptr,keylen, (head)->hh.tbl->num_buckets, _hf_hashv, _hf_bkt); \ + if (HASH_BLOOM_TEST((head)->hh.tbl, _hf_hashv)) { \ + HASH_FIND_IN_BKT((head)->hh.tbl, hh, (head)->hh.tbl->buckets[ _hf_bkt ], \ + keyptr,keylen,out); \ + } \ + } \ +} while (0) + +#ifdef HASH_BLOOM +#define HASH_BLOOM_BITLEN (1ULL << HASH_BLOOM) +#define HASH_BLOOM_BYTELEN (HASH_BLOOM_BITLEN/8) + ((HASH_BLOOM_BITLEN%8) ? 1:0) +#define HASH_BLOOM_MAKE(tbl) \ +do { \ + (tbl)->bloom_nbits = HASH_BLOOM; \ + (tbl)->bloom_bv = (uint8_t*)uthash_malloc(HASH_BLOOM_BYTELEN); \ + if (!((tbl)->bloom_bv)) { uthash_fatal( "out of memory"); } \ + memset((tbl)->bloom_bv, 0, HASH_BLOOM_BYTELEN); \ + (tbl)->bloom_sig = HASH_BLOOM_SIGNATURE; \ +} while (0); + +#define HASH_BLOOM_FREE(tbl) \ +do { \ + uthash_free((tbl)->bloom_bv, HASH_BLOOM_BYTELEN); \ +} while (0); + +#define HASH_BLOOM_BITSET(bv,idx) (bv[(idx)/8] |= (1U << ((idx)%8))) +#define HASH_BLOOM_BITTEST(bv,idx) (bv[(idx)/8] & (1U << ((idx)%8))) + +#define HASH_BLOOM_ADD(tbl,hashv) \ + HASH_BLOOM_BITSET((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1))) + +#define HASH_BLOOM_TEST(tbl,hashv) \ + HASH_BLOOM_BITTEST((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1))) + +#else +#define HASH_BLOOM_MAKE(tbl) +#define HASH_BLOOM_FREE(tbl) +#define HASH_BLOOM_ADD(tbl,hashv) +#define HASH_BLOOM_TEST(tbl,hashv) (1) +#endif + +#define HASH_MAKE_TABLE(hh,head) \ +do { \ + (head)->hh.tbl = (UT_hash_table*)uthash_malloc( \ + sizeof(UT_hash_table)); \ + if (!((head)->hh.tbl)) { uthash_fatal( "out of memory"); } \ + memset((head)->hh.tbl, 0, sizeof(UT_hash_table)); \ + (head)->hh.tbl->tail = &((head)->hh); \ + (head)->hh.tbl->num_buckets = HASH_INITIAL_NUM_BUCKETS; \ + (head)->hh.tbl->log2_num_buckets = HASH_INITIAL_NUM_BUCKETS_LOG2; \ + (head)->hh.tbl->hho = (char*)(&(head)->hh) - (char*)(head); \ + (head)->hh.tbl->buckets = (UT_hash_bucket*)uthash_malloc( \ + HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket)); \ + if (! (head)->hh.tbl->buckets) { uthash_fatal( "out of memory"); } \ + memset((head)->hh.tbl->buckets, 0, \ + HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket)); \ + HASH_BLOOM_MAKE((head)->hh.tbl); \ + (head)->hh.tbl->signature = HASH_SIGNATURE; \ +} while(0) + +#define HASH_ADD(hh,head,fieldname,keylen_in,add) \ + HASH_ADD_KEYPTR(hh,head,&add->fieldname,keylen_in,add) + +#ifdef STARPU_DEBUG +/* Check that we don't insert the same key several times */ +#define HASH_CHECK_KEY(hh,head,keyptr,keylen,out) \ +do { \ + __typeof__(out) _out; \ + HASH_FIND(hh,head,keyptr,keylen,_out); \ + STARPU_ASSERT_MSG(!_out,"Cannot insert the same key twice"); \ +} while(0) +#else +#define HASH_CHECK_KEY(hh,head,keyptr,keylen,out) +#endif + +#define HASH_ADD_KEYPTR(hh,head,keyptr,keylen_in,add) \ +do { \ + unsigned _ha_bkt=0; \ + HASH_CHECK_KEY(hh,head,keyptr,keylen_in,add); \ + (add)->hh.next = NULL; \ + (add)->hh.key = (char*)keyptr; \ + (add)->hh.keylen = keylen_in; \ + if (!(head)) { \ + head = (add); \ + (head)->hh.prev = NULL; \ + HASH_MAKE_TABLE(hh,head); \ + } else { \ + (head)->hh.tbl->tail->next = (add); \ + (add)->hh.prev = ELMT_FROM_HH((head)->hh.tbl, (head)->hh.tbl->tail); \ + (head)->hh.tbl->tail = &((add)->hh); \ + } \ + (head)->hh.tbl->num_items++; \ + (add)->hh.tbl = (head)->hh.tbl; \ + HASH_FCN(keyptr,keylen_in, (head)->hh.tbl->num_buckets, \ + (add)->hh.hashv, _ha_bkt); \ + HASH_ADD_TO_BKT((head)->hh.tbl->buckets[_ha_bkt],&(add)->hh); \ + HASH_BLOOM_ADD((head)->hh.tbl,(add)->hh.hashv); \ + HASH_EMIT_KEY(hh,head,keyptr,keylen_in); \ + HASH_FSCK(hh,head); \ +} while(0) + +#define HASH_TO_BKT( hashv, num_bkts, bkt ) \ +do { \ + bkt = ((hashv) & ((num_bkts) - 1)); \ +} while(0) + +/* delete "delptr" from the hash table. + * "the usual" patch-up process for the app-order doubly-linked-list. + * The use of _hd_hh_del below deserves special explanation. + * These used to be expressed using (delptr) but that led to a bug + * if someone used the same symbol for the head and deletee, like + * HASH_DELETE(hh,users,users); + * We want that to work, but by changing the head (users) below + * we were forfeiting our ability to further refer to the deletee (users) + * in the patch-up process. Solution: use scratch space to + * copy the deletee pointer, then the latter references are via that + * scratch pointer rather than through the repointed (users) symbol. + */ +#define HASH_DELETE(hh,head,delptr) \ +do { \ + unsigned _hd_bkt; \ + struct UT_hash_handle *_hd_hh_del; \ + if ( ((delptr)->hh.prev == NULL) && ((delptr)->hh.next == NULL) ) { \ + uthash_free((head)->hh.tbl->buckets, \ + (head)->hh.tbl->num_buckets*sizeof(struct UT_hash_bucket) ); \ + HASH_BLOOM_FREE((head)->hh.tbl); \ + uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \ + head = NULL; \ + } else { \ + _hd_hh_del = &((delptr)->hh); \ + if ((delptr) == ELMT_FROM_HH((head)->hh.tbl,(head)->hh.tbl->tail)) { \ + (head)->hh.tbl->tail = \ + (UT_hash_handle*)((char*)((delptr)->hh.prev) + \ + (head)->hh.tbl->hho); \ + } \ + if ((delptr)->hh.prev) { \ + ((UT_hash_handle*)((char*)((delptr)->hh.prev) + \ + (head)->hh.tbl->hho))->next = (delptr)->hh.next; \ + } else { \ + DECLTYPE_ASSIGN(head,(delptr)->hh.next); \ + } \ + if (_hd_hh_del->next) { \ + ((UT_hash_handle*)((char*)_hd_hh_del->next + \ + (head)->hh.tbl->hho))->prev = \ + _hd_hh_del->prev; \ + } \ + HASH_TO_BKT( _hd_hh_del->hashv, (head)->hh.tbl->num_buckets, _hd_bkt); \ + HASH_DEL_IN_BKT(hh,(head)->hh.tbl->buckets[_hd_bkt], _hd_hh_del); \ + (head)->hh.tbl->num_items--; \ + } \ + HASH_FSCK(hh,head); \ +} while (0) + + +/* convenience forms of HASH_FIND/HASH_ADD/HASH_DEL */ +#define HASH_FIND_STR(head,findstr,out) \ + HASH_FIND(hh,head,findstr,strlen(findstr),out) +#define HASH_ADD_STR(head,strfield,add) \ + HASH_ADD(hh,head,strfield[0],strlen(add->strfield),add) +#define HASH_FIND_INT(head,findint,out) \ + HASH_FIND(hh,head,findint,sizeof(int),out) +#define HASH_ADD_INT(head,intfield,add) \ + HASH_ADD(hh,head,intfield,sizeof(int),add) +#define HASH_FIND_PTR(head,findptr,out) \ + HASH_FIND(hh,head,findptr,sizeof(void *),out) +#define HASH_ADD_PTR(head,ptrfield,add) \ + HASH_ADD(hh,head,ptrfield,sizeof(void *),add) +#define HASH_DEL(head,delptr) \ + HASH_DELETE(hh,head,delptr) + +/* HASH_FSCK checks hash integrity on every add/delete when HASH_DEBUG is defined. + * This is for uthash developer only; it compiles away if HASH_DEBUG isn't defined. + */ +#ifdef HASH_DEBUG +#define HASH_OOPS(...) do { fprintf(stderr,__VA_ARGS__); exit(-1); } while (0) +#define HASH_FSCK(hh,head) \ +do { \ + unsigned _bkt_i; \ + unsigned _count, _bkt_count; \ + char *_prev; \ + struct UT_hash_handle *_thh; \ + if (head) { \ + _count = 0; \ + for( _bkt_i = 0; _bkt_i < (head)->hh.tbl->num_buckets; _bkt_i++) { \ + _bkt_count = 0; \ + _thh = (head)->hh.tbl->buckets[_bkt_i].hh_head; \ + _prev = NULL; \ + while (_thh) { \ + if (_prev != (char*)(_thh->hh_prev)) { \ + HASH_OOPS("invalid hh_prev %p, actual %p\n", \ + _thh->hh_prev, _prev ); \ + } \ + _bkt_count++; \ + _prev = (char*)(_thh); \ + _thh = _thh->hh_next; \ + } \ + _count += _bkt_count; \ + if ((head)->hh.tbl->buckets[_bkt_i].count != _bkt_count) { \ + HASH_OOPS("invalid bucket count %u, actual %u\n", \ + (head)->hh.tbl->buckets[_bkt_i].count, _bkt_count); \ + } \ + } \ + if (_count != (head)->hh.tbl->num_items) { \ + HASH_OOPS("invalid hh item count %u, actual %u\n", \ + (head)->hh.tbl->num_items, _count ); \ + } \ + /* traverse hh in app order; check next/prev integrity, count */ \ + _count = 0; \ + _prev = NULL; \ + _thh = &(head)->hh; \ + while (_thh) { \ + _count++; \ + if (_prev !=(char*)(_thh->prev)) { \ + HASH_OOPS("invalid prev %p, actual %p\n", \ + _thh->prev, _prev ); \ + } \ + _prev = (char*)ELMT_FROM_HH((head)->hh.tbl, _thh); \ + _thh = ( _thh->next ? (UT_hash_handle*)((char*)(_thh->next) + \ + (head)->hh.tbl->hho) : NULL ); \ + } \ + if (_count != (head)->hh.tbl->num_items) { \ + HASH_OOPS("invalid app item count %u, actual %u\n", \ + (head)->hh.tbl->num_items, _count ); \ + } \ + } \ +} while (0) +#else +#define HASH_FSCK(hh,head) +#endif + +/* When compiled with -DHASH_EMIT_KEYS, length-prefixed keys are emitted to + * the descriptor to which this macro is defined for tuning the hash function. + * The app can #include to get the prototype for write(2). */ +#ifdef HASH_EMIT_KEYS +#define HASH_EMIT_KEY(hh,head,keyptr,fieldlen) \ +do { \ + unsigned _klen = fieldlen; \ + write(HASH_EMIT_KEYS, &_klen, sizeof(_klen)); \ + write(HASH_EMIT_KEYS, keyptr, fieldlen); \ +} while (0) +#else +#define HASH_EMIT_KEY(hh,head,keyptr,fieldlen) +#endif + +/* default to Jenkin's hash unless overridden e.g. DHASH_FUNCTION=HASH_SAX */ +#ifdef HASH_FUNCTION +#define HASH_FCN HASH_FUNCTION +#else +#define HASH_FCN HASH_JEN +#endif + +/* The Bernstein hash function, used in Perl prior to v5.6 */ +#define HASH_BER(key,keylen,num_bkts,hashv,bkt) \ +do { \ + unsigned _hb_keylen=keylen; \ + char *_hb_key=(char*)(key); \ + (hashv) = 0; \ + while (_hb_keylen--) { (hashv) = ((hashv) * 33) + *_hb_key++; } \ + bkt = (hashv) & (num_bkts-1); \ +} while (0) + + +/* SAX/FNV/OAT/JEN hash functions are macro variants of those listed at + * http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx */ +#define HASH_SAX(key,keylen,num_bkts,hashv,bkt) \ +do { \ + unsigned _sx_i; \ + char *_hs_key=(char*)(key); \ + hashv = 0; \ + for(_sx_i=0; _sx_i < keylen; _sx_i++) \ + hashv ^= (hashv << 5) + (hashv >> 2) + _hs_key[_sx_i]; \ + bkt = hashv & (num_bkts-1); \ +} while (0) + +#define HASH_FNV(key,keylen,num_bkts,hashv,bkt) \ +do { \ + unsigned _fn_i; \ + char *_hf_key=(char*)(key); \ + hashv = 2166136261UL; \ + for(_fn_i=0; _fn_i < keylen; _fn_i++) \ + hashv = (hashv * 16777619) ^ _hf_key[_fn_i]; \ + bkt = hashv & (num_bkts-1); \ +} while(0); + +#define HASH_OAT(key,keylen,num_bkts,hashv,bkt) \ +do { \ + unsigned _ho_i; \ + char *_ho_key=(char*)(key); \ + hashv = 0; \ + for(_ho_i=0; _ho_i < keylen; _ho_i++) { \ + hashv += _ho_key[_ho_i]; \ + hashv += (hashv << 10); \ + hashv ^= (hashv >> 6); \ + } \ + hashv += (hashv << 3); \ + hashv ^= (hashv >> 11); \ + hashv += (hashv << 15); \ + bkt = hashv & (num_bkts-1); \ +} while(0) + +#define HASH_JEN_MIX(a,b,c) \ +do { \ + a -= b; a -= c; a ^= ( c >> 13 ); \ + b -= c; b -= a; b ^= ( a << 8 ); \ + c -= a; c -= b; c ^= ( b >> 13 ); \ + a -= b; a -= c; a ^= ( c >> 12 ); \ + b -= c; b -= a; b ^= ( a << 16 ); \ + c -= a; c -= b; c ^= ( b >> 5 ); \ + a -= b; a -= c; a ^= ( c >> 3 ); \ + b -= c; b -= a; b ^= ( a << 10 ); \ + c -= a; c -= b; c ^= ( b >> 15 ); \ +} while (0) + +#define HASH_JEN(key,keylen,num_bkts,hashv,bkt) \ +do { \ + unsigned _hj_i,_hj_j,_hj_k; \ + char *_hj_key=(char*)(key); \ + hashv = 0xfeedbeef; \ + _hj_i = _hj_j = 0x9e3779b9; \ + _hj_k = keylen; \ + while (_hj_k >= 12) { \ + _hj_i += (_hj_key[0] + ( (unsigned)_hj_key[1] << 8 ) \ + + ( (unsigned)_hj_key[2] << 16 ) \ + + ( (unsigned)_hj_key[3] << 24 ) ); \ + _hj_j += (_hj_key[4] + ( (unsigned)_hj_key[5] << 8 ) \ + + ( (unsigned)_hj_key[6] << 16 ) \ + + ( (unsigned)_hj_key[7] << 24 ) ); \ + hashv += (_hj_key[8] + ( (unsigned)_hj_key[9] << 8 ) \ + + ( (unsigned)_hj_key[10] << 16 ) \ + + ( (unsigned)_hj_key[11] << 24 ) ); \ + \ + HASH_JEN_MIX(_hj_i, _hj_j, hashv); \ + \ + _hj_key += 12; \ + _hj_k -= 12; \ + } \ + hashv += keylen; \ + switch ( _hj_k ) { \ + case 11: hashv += ( (unsigned)_hj_key[10] << 24 ); \ + /* FALLTHRU */ \ + case 10: hashv += ( (unsigned)_hj_key[9] << 16 ); \ + /* FALLTHRU */ \ + case 9: hashv += ( (unsigned)_hj_key[8] << 8 ); \ + /* FALLTHRU */ \ + case 8: _hj_j += ( (unsigned)_hj_key[7] << 24 ); \ + /* FALLTHRU */ \ + case 7: _hj_j += ( (unsigned)_hj_key[6] << 16 ); \ + /* FALLTHRU */ \ + case 6: _hj_j += ( (unsigned)_hj_key[5] << 8 ); \ + /* FALLTHRU */ \ + case 5: _hj_j += _hj_key[4]; \ + /* FALLTHRU */ \ + case 4: _hj_i += ( (unsigned)_hj_key[3] << 24 ); \ + /* FALLTHRU */ \ + case 3: _hj_i += ( (unsigned)_hj_key[2] << 16 ); \ + /* FALLTHRU */ \ + case 2: _hj_i += ( (unsigned)_hj_key[1] << 8 ); \ + /* FALLTHRU */ \ + case 1: _hj_i += _hj_key[0]; \ + /* FALLTHRU */ \ + default: break; \ + } \ + HASH_JEN_MIX(_hj_i, _hj_j, hashv); \ + bkt = hashv & (num_bkts-1); \ +} while(0) + +/* The Paul Hsieh hash function */ +#undef get16bits +#if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) \ + || defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__) +#define get16bits(d) (*((const uint16_t *) (d))) +#endif + +#if !defined (get16bits) +#define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8) \ + +(uint32_t)(((const uint8_t *)(d))[0]) ) +#endif +#define HASH_SFH(key,keylen,num_bkts,hashv,bkt) \ +do { \ + char *_sfh_key=(char*)(key); \ + uint32_t _sfh_tmp, _sfh_len = keylen; \ + \ + int _sfh_rem = _sfh_len & 3; \ + _sfh_len >>= 2; \ + hashv = 0xcafebabe; \ + \ + /* Main loop */ \ + for (;_sfh_len > 0; _sfh_len--) { \ + hashv += get16bits (_sfh_key); \ + _sfh_tmp = (get16bits (_sfh_key+2) << 11) ^ hashv; \ + hashv = (hashv << 16) ^ _sfh_tmp; \ + _sfh_key += 2*sizeof (uint16_t); \ + hashv += hashv >> 11; \ + } \ + \ + /* Handle end cases */ \ + switch (_sfh_rem) { \ + case 3: hashv += get16bits (_sfh_key); \ + hashv ^= hashv << 16; \ + hashv ^= _sfh_key[sizeof (uint16_t)] << 18; \ + hashv += hashv >> 11; \ + break; \ + case 2: hashv += get16bits (_sfh_key); \ + hashv ^= hashv << 11; \ + hashv += hashv >> 17; \ + break; \ + case 1: hashv += *_sfh_key; \ + hashv ^= hashv << 10; \ + hashv += hashv >> 1; \ + break; \ + default: break; \ + } \ + \ + /* Force "avalanching" of final 127 bits */ \ + hashv ^= hashv << 3; \ + hashv += hashv >> 5; \ + hashv ^= hashv << 4; \ + hashv += hashv >> 17; \ + hashv ^= hashv << 25; \ + hashv += hashv >> 6; \ + bkt = hashv & (num_bkts-1); \ +} while(0); + +#ifdef HASH_USING_NO_STRICT_ALIASING +/* The MurmurHash exploits some CPU's (e.g. x86) tolerance for unaligned reads. + * For other types of CPU's (e.g. Sparc) an unaligned read causes a bus error. + * So MurmurHash comes in two versions, the faster unaligned one and the slower + * aligned one. We only use the faster one on CPU's where we know it's safe. + * + * Note the preprocessor built-in defines can be emitted using: + * + * gcc -m64 -dM -E - < /dev/null (on gcc) + * cc -## a.c (where a.c is a simple test file) (Sun Studio) + */ +#if (defined(__i386__) || defined(__x86_64__)) +#define HASH_MUR HASH_MUR_UNALIGNED +#else +#define HASH_MUR HASH_MUR_ALIGNED +#endif + +/* Appleby's MurmurHash fast version for unaligned-tolerant archs like i386 */ +#define HASH_MUR_UNALIGNED(key,keylen,num_bkts,hashv,bkt) \ +do { \ + const unsigned int _mur_m = 0x5bd1e995; \ + const int _mur_r = 24; \ + hashv = 0xcafebabe ^ keylen; \ + char *_mur_key = (char *)(key); \ + uint32_t _mur_tmp, _mur_len = keylen; \ + \ + for (;_mur_len >= 4; _mur_len-=4) { \ + _mur_tmp = *(uint32_t *)_mur_key; \ + _mur_tmp *= _mur_m; \ + _mur_tmp ^= _mur_tmp >> _mur_r; \ + _mur_tmp *= _mur_m; \ + hashv *= _mur_m; \ + hashv ^= _mur_tmp; \ + _mur_key += 4; \ + } \ + \ + switch(_mur_len) \ + { \ + case 3: hashv ^= _mur_key[2] << 16; \ + /* FALLTHRU */ \ + case 2: hashv ^= _mur_key[1] << 8; \ + /* FALLTHRU */ \ + case 1: hashv ^= _mur_key[0]; \ + hashv *= _mur_m; \ + /* FALLTHRU */ \ + default: break; \ + }; \ + \ + hashv ^= hashv >> 13; \ + hashv *= _mur_m; \ + hashv ^= hashv >> 15; \ + \ + bkt = hashv & (num_bkts-1); \ +} while(0) + +/* Appleby's MurmurHash version for alignment-sensitive archs like Sparc */ +#define HASH_MUR_ALIGNED(key,keylen,num_bkts,hashv,bkt) \ +do { \ + const unsigned int _mur_m = 0x5bd1e995; \ + const int _mur_r = 24; \ + hashv = 0xcafebabe ^ (keylen); \ + char *_mur_key = (char *)(key); \ + uint32_t _mur_len = keylen; \ + int _mur_align = (int)_mur_key & 3; \ + \ + if (_mur_align && (_mur_len >= 4)) { \ + unsigned _mur_t = 0, _mur_d = 0; \ + switch(_mur_align) { \ + case 1: _mur_t |= _mur_key[2] << 16; \ + /* FALLTHRU */ \ + case 2: _mur_t |= _mur_key[1] << 8; \ + /* FALLTHRU */ \ + case 3: _mur_t |= _mur_key[0]; \ + /* FALLTHRU */ \ + default: break; \ + } \ + _mur_t <<= (8 * _mur_align); \ + _mur_key += 4-_mur_align; \ + _mur_len -= 4-_mur_align; \ + int _mur_sl = 8 * (4-_mur_align); \ + int _mur_sr = 8 * _mur_align; \ + \ + for (;_mur_len >= 4; _mur_len-=4) { \ + _mur_d = *(unsigned *)_mur_key; \ + _mur_t = (_mur_t >> _mur_sr) | (_mur_d << _mur_sl); \ + unsigned _mur_k = _mur_t; \ + _mur_k *= _mur_m; \ + _mur_k ^= _mur_k >> _mur_r; \ + _mur_k *= _mur_m; \ + hashv *= _mur_m; \ + hashv ^= _mur_k; \ + _mur_t = _mur_d; \ + _mur_key += 4; \ + } \ + _mur_d = 0; \ + if(_mur_len >= _mur_align) { \ + switch(_mur_align) { \ + case 3: _mur_d |= _mur_key[2] << 16; \ + /* FALLTHRU */ \ + case 2: _mur_d |= _mur_key[1] << 8; \ + /* FALLTHRU */ \ + case 1: _mur_d |= _mur_key[0]; \ + /* FALLTHRU */ \ + default: break; \ + } \ + unsigned _mur_k = (_mur_t >> _mur_sr) | (_mur_d << _mur_sl); \ + _mur_k *= _mur_m; \ + _mur_k ^= _mur_k >> _mur_r; \ + _mur_k *= _mur_m; \ + hashv *= _mur_m; \ + hashv ^= _mur_k; \ + _mur_k += _mur_align; \ + _mur_len -= _mur_align; \ + \ + switch(_mur_len) \ + { \ + case 3: hashv ^= _mur_key[2] << 16; \ + /* FALLTHRU */ \ + case 2: hashv ^= _mur_key[1] << 8; \ + /* FALLTHRU */ \ + case 1: hashv ^= _mur_key[0]; \ + hashv *= _mur_m; \ + /* FALLTHRU */ \ + default: break; \ + } \ + } else { \ + switch(_mur_len) \ + { \ + case 3: _mur_d ^= _mur_key[2] << 16; \ + /* FALLTHRU */ \ + case 2: _mur_d ^= _mur_key[1] << 8; \ + /* FALLTHRU */ \ + case 1: _mur_d ^= _mur_key[0]; \ + /* FALLTHRU */ \ + case 0: hashv ^= (_mur_t >> _mur_sr) | (_mur_d << _mur_sl); \ + hashv *= _mur_m; \ + /* FALLTHRU */ \ + default: break; \ + } \ + } \ + \ + hashv ^= hashv >> 13; \ + hashv *= _mur_m; \ + hashv ^= hashv >> 15; \ + } else { \ + for (;_mur_len >= 4; _mur_len-=4) { \ + unsigned _mur_k = *(unsigned*)_mur_key; \ + _mur_k *= _mur_m; \ + _mur_k ^= _mur_k >> _mur_r; \ + _mur_k *= _mur_m; \ + hashv *= _mur_m; \ + hashv ^= _mur_k; \ + _mur_key += 4; \ + } \ + switch(_mur_len) \ + { \ + case 3: hashv ^= _mur_key[2] << 16; \ + /* FALLTHRU */ \ + case 2: hashv ^= _mur_key[1] << 8; \ + /* FALLTHRU */ \ + case 1: hashv ^= _mur_key[0]; \ + hashv *= _mur_m; \ + /* FALLTHRU */ \ + default: break; \ + } \ + \ + hashv ^= hashv >> 13; \ + hashv *= _mur_m; \ + hashv ^= hashv >> 15; \ + } \ + bkt = hashv & (num_bkts-1); \ +} while(0) +#endif /* HASH_USING_NO_STRICT_ALIASING */ + +/* key comparison function; return 0 if keys equal */ +#define HASH_KEYCMP(a,b,len) memcmp(a,b,len) + +/* iterate over items in a known bucket to find desired item */ +#define HASH_FIND_IN_BKT(tbl,hh,head,keyptr,keylen_in,out) \ +do { \ + if (head.hh_head) DECLTYPE_ASSIGN(out,ELMT_FROM_HH(tbl,head.hh_head)); \ + else out=NULL; \ + while (out) { \ + if (out->hh.keylen == keylen_in) { \ + if ((HASH_KEYCMP(out->hh.key,keyptr,keylen_in)) == 0) break; \ + } \ + if (out->hh.hh_next) DECLTYPE_ASSIGN(out,ELMT_FROM_HH(tbl,out->hh.hh_next)); \ + else out = NULL; \ + } \ +} while(0) + +/* add an item to a bucket */ +#define HASH_ADD_TO_BKT(head,addhh) \ +do { \ + head.count++; \ + (addhh)->hh_next = head.hh_head; \ + (addhh)->hh_prev = NULL; \ + if (head.hh_head) { (head).hh_head->hh_prev = (addhh); } \ + (head).hh_head=addhh; \ + if (head.count >= ((head.expand_mult+1) * HASH_BKT_CAPACITY_THRESH) \ + && (addhh)->tbl->noexpand != 1) { \ + HASH_EXPAND_BUCKETS((addhh)->tbl); \ + } \ +} while(0) + +/* remove an item from a given bucket */ +#define HASH_DEL_IN_BKT(hh,head,hh_del) \ + (head).count--; \ + if ((head).hh_head == hh_del) { \ + (head).hh_head = hh_del->hh_next; \ + } \ + if (hh_del->hh_prev) { \ + hh_del->hh_prev->hh_next = hh_del->hh_next; \ + } \ + if (hh_del->hh_next) { \ + hh_del->hh_next->hh_prev = hh_del->hh_prev; \ + } + +/* Bucket expansion has the effect of doubling the number of buckets + * and redistributing the items into the new buckets. Ideally the + * items will distribute more or less evenly into the new buckets + * (the extent to which this is true is a measure of the quality of + * the hash function as it applies to the key domain). + * + * With the items distributed into more buckets, the chain length + * (item count) in each bucket is reduced. Thus by expanding buckets + * the hash keeps a bound on the chain length. This bounded chain + * length is the essence of how a hash provides constant time lookup. + * + * The calculation of tbl->ideal_chain_maxlen below deserves some + * explanation. First, keep in mind that we're calculating the ideal + * maximum chain length based on the *new* (doubled) bucket count. + * In fractions this is just n/b (n=number of items,b=new num buckets). + * Since the ideal chain length is an integer, we want to calculate + * ceil(n/b). We don't depend on floating point arithmetic in this + * hash, so to calculate ceil(n/b) with integers we could write + * + * ceil(n/b) = (n/b) + ((n%b)?1:0) + * + * and in fact a previous version of this hash did just that. + * But now we have improved things a bit by recognizing that b is + * always a power of two. We keep its base 2 log handy (call it lb), + * so now we can write this with a bit shift and logical AND: + * + * ceil(n/b) = (n>>lb) + ( (n & (b-1)) ? 1:0) + * + */ +#define HASH_EXPAND_BUCKETS(tbl) \ +do { \ + unsigned _he_bkt; \ + unsigned _he_bkt_i; \ + struct UT_hash_handle *_he_thh, *_he_hh_nxt; \ + UT_hash_bucket *_he_new_buckets, *_he_newbkt; \ + _he_new_buckets = (UT_hash_bucket*)uthash_malloc( \ + 2 * tbl->num_buckets * sizeof(struct UT_hash_bucket)); \ + if (!_he_new_buckets) { uthash_fatal( "out of memory"); } \ + memset(_he_new_buckets, 0, \ + 2 * tbl->num_buckets * sizeof(struct UT_hash_bucket)); \ + tbl->ideal_chain_maxlen = \ + (tbl->num_items >> (tbl->log2_num_buckets+1)) + \ + ((tbl->num_items & ((tbl->num_buckets*2)-1)) ? 1 : 0); \ + tbl->nonideal_items = 0; \ + for(_he_bkt_i = 0; _he_bkt_i < tbl->num_buckets; _he_bkt_i++) \ + { \ + _he_thh = tbl->buckets[ _he_bkt_i ].hh_head; \ + while (_he_thh) { \ + _he_hh_nxt = _he_thh->hh_next; \ + HASH_TO_BKT( _he_thh->hashv, tbl->num_buckets*2, _he_bkt); \ + _he_newbkt = &(_he_new_buckets[ _he_bkt ]); \ + if (++(_he_newbkt->count) > tbl->ideal_chain_maxlen) { \ + tbl->nonideal_items++; \ + _he_newbkt->expand_mult = _he_newbkt->count / \ + tbl->ideal_chain_maxlen; \ + } \ + _he_thh->hh_prev = NULL; \ + _he_thh->hh_next = _he_newbkt->hh_head; \ + if (_he_newbkt->hh_head) _he_newbkt->hh_head->hh_prev = \ + _he_thh; \ + _he_newbkt->hh_head = _he_thh; \ + _he_thh = _he_hh_nxt; \ + } \ + } \ + uthash_free( tbl->buckets, tbl->num_buckets*sizeof(struct UT_hash_bucket) ); \ + tbl->num_buckets *= 2; \ + tbl->log2_num_buckets++; \ + tbl->buckets = _he_new_buckets; \ + tbl->ineff_expands = (tbl->nonideal_items > (tbl->num_items >> 1)) ? \ + (tbl->ineff_expands+1) : 0; \ + if (tbl->ineff_expands > 1) { \ + tbl->noexpand=1; \ + uthash_noexpand_fyi(tbl); \ + } \ + uthash_expand_fyi(tbl); \ +} while(0) + + +/* This is an adaptation of Simon Tatham's O(n log(n)) mergesort */ +/* Note that HASH_SORT assumes the hash handle name to be hh. + * HASH_SRT was added to allow the hash handle name to be passed in. */ +#define HASH_SORT(head,cmpfcn) HASH_SRT(hh,head,cmpfcn) +#define HASH_SRT(hh,head,cmpfcn) \ +do { \ + unsigned _hs_i; \ + unsigned _hs_looping,_hs_nmerges,_hs_insize,_hs_psize,_hs_qsize; \ + struct UT_hash_handle *_hs_p, *_hs_q, *_hs_e, *_hs_list, *_hs_tail; \ + if (head) { \ + _hs_insize = 1; \ + _hs_looping = 1; \ + _hs_list = &((head)->hh); \ + while (_hs_looping) { \ + _hs_p = _hs_list; \ + _hs_list = NULL; \ + _hs_tail = NULL; \ + _hs_nmerges = 0; \ + while (_hs_p) { \ + _hs_nmerges++; \ + _hs_q = _hs_p; \ + _hs_psize = 0; \ + for ( _hs_i = 0; _hs_i < _hs_insize; _hs_i++ ) { \ + _hs_psize++; \ + _hs_q = (UT_hash_handle*)((_hs_q->next) ? \ + ((void*)((char*)(_hs_q->next) + \ + (head)->hh.tbl->hho)) : NULL); \ + if (! (_hs_q) ) break; \ + } \ + _hs_qsize = _hs_insize; \ + while ((_hs_psize > 0) || ((_hs_qsize > 0) && _hs_q )) { \ + if (_hs_psize == 0) { \ + _hs_e = _hs_q; \ + _hs_q = (UT_hash_handle*)((_hs_q->next) ? \ + ((void*)((char*)(_hs_q->next) + \ + (head)->hh.tbl->hho)) : NULL); \ + _hs_qsize--; \ + } else if ( (_hs_qsize == 0) || !(_hs_q) ) { \ + _hs_e = _hs_p; \ + _hs_p = (UT_hash_handle*)((_hs_p->next) ? \ + ((void*)((char*)(_hs_p->next) + \ + (head)->hh.tbl->hho)) : NULL); \ + _hs_psize--; \ + } else if (( \ + cmpfcn(DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl,_hs_p)), \ + DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl,_hs_q))) \ + ) <= 0) { \ + _hs_e = _hs_p; \ + _hs_p = (UT_hash_handle*)((_hs_p->next) ? \ + ((void*)((char*)(_hs_p->next) + \ + (head)->hh.tbl->hho)) : NULL); \ + _hs_psize--; \ + } else { \ + _hs_e = _hs_q; \ + _hs_q = (UT_hash_handle*)((_hs_q->next) ? \ + ((void*)((char*)(_hs_q->next) + \ + (head)->hh.tbl->hho)) : NULL); \ + _hs_qsize--; \ + } \ + if ( _hs_tail ) { \ + _hs_tail->next = ((_hs_e) ? \ + ELMT_FROM_HH((head)->hh.tbl,_hs_e) : NULL); \ + } else { \ + _hs_list = _hs_e; \ + } \ + _hs_e->prev = ((_hs_tail) ? \ + ELMT_FROM_HH((head)->hh.tbl,_hs_tail) : NULL); \ + _hs_tail = _hs_e; \ + } \ + _hs_p = _hs_q; \ + } \ + _hs_tail->next = NULL; \ + if ( _hs_nmerges <= 1 ) { \ + _hs_looping=0; \ + (head)->hh.tbl->tail = _hs_tail; \ + DECLTYPE_ASSIGN(head,ELMT_FROM_HH((head)->hh.tbl, _hs_list)); \ + } \ + _hs_insize *= 2; \ + } \ + HASH_FSCK(hh,head); \ + } \ +} while (0) + +/* This function selects items from one hash into another hash. + * The end result is that the selected items have dual presence + * in both hashes. There is no copy of the items made; rather + * they are added into the new hash through a secondary hash + * hash handle that must be present in the structure. */ +#define HASH_SELECT(hh_dst, dst, hh_src, src, cond) \ +do { \ + unsigned _src_bkt, _dst_bkt; \ + void *_last_elt=NULL, *_elt; \ + UT_hash_handle *_src_hh, *_dst_hh, *_last_elt_hh=NULL; \ + ptrdiff_t _dst_hho = ((char*)(&(dst)->hh_dst) - (char*)(dst)); \ + if (src) { \ + for(_src_bkt=0; _src_bkt < (src)->hh_src.tbl->num_buckets; _src_bkt++) { \ + for(_src_hh = (src)->hh_src.tbl->buckets[_src_bkt].hh_head; \ + _src_hh; \ + _src_hh = _src_hh->hh_next) { \ + _elt = ELMT_FROM_HH((src)->hh_src.tbl, _src_hh); \ + if (cond(_elt)) { \ + _dst_hh = (UT_hash_handle*)(((char*)_elt) + _dst_hho); \ + _dst_hh->key = _src_hh->key; \ + _dst_hh->keylen = _src_hh->keylen; \ + _dst_hh->hashv = _src_hh->hashv; \ + _dst_hh->prev = _last_elt; \ + _dst_hh->next = NULL; \ + if (_last_elt_hh) { _last_elt_hh->next = _elt; } \ + if (!dst) { \ + DECLTYPE_ASSIGN(dst,_elt); \ + HASH_MAKE_TABLE(hh_dst,dst); \ + } else { \ + _dst_hh->tbl = (dst)->hh_dst.tbl; \ + } \ + HASH_TO_BKT(_dst_hh->hashv, _dst_hh->tbl->num_buckets, _dst_bkt); \ + HASH_ADD_TO_BKT(_dst_hh->tbl->buckets[_dst_bkt],_dst_hh); \ + (dst)->hh_dst.tbl->num_items++; \ + _last_elt = _elt; \ + _last_elt_hh = _dst_hh; \ + } \ + } \ + } \ + } \ + HASH_FSCK(hh_dst,dst); \ +} while (0) + +#define HASH_CLEAR(hh,head) \ +do { \ + if (head) { \ + uthash_free((head)->hh.tbl->buckets, \ + (head)->hh.tbl->num_buckets*sizeof(struct UT_hash_bucket)); \ + uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \ + (head)=NULL; \ + } \ +} while(0) + +#ifdef NO_DECLTYPE +#define HASH_ITER(hh,head,el,tmp) \ +for((el)=(head), (*(char**)(&(tmp)))=(char*)((head)?(head)->hh.next:NULL); \ + el; (el)=(tmp),(*(char**)(&(tmp)))=(char*)((tmp)?(tmp)->hh.next:NULL)) +#else +#define HASH_ITER(hh,head,el,tmp) \ +for((el)=(head),(tmp)=DECLTYPE(el)((head)?(head)->hh.next:NULL); \ + el; (el)=(tmp),(tmp)=DECLTYPE(el)((tmp)?(tmp)->hh.next:NULL)) +#endif + +/* obtain a count of items in the hash */ +#define HASH_COUNT(head) HASH_CNT(hh,head) +#define HASH_CNT(hh,head) ((head)?((head)->hh.tbl->num_items):0) + +typedef struct UT_hash_bucket { + struct UT_hash_handle *hh_head; + unsigned count; + + /* expand_mult is normally set to 0. In this situation, the max chain length + * threshold is enforced at its default value, HASH_BKT_CAPACITY_THRESH. (If + * the bucket's chain exceeds this length, bucket expansion is triggered). + * However, setting expand_mult to a non-zero value delays bucket expansion + * (that would be triggered by additions to this particular bucket) + * until its chain length reaches a *multiple* of HASH_BKT_CAPACITY_THRESH. + * (The multiplier is simply expand_mult+1). The whole idea of this + * multiplier is to reduce bucket expansions, since they are expensive, in + * situations where we know that a particular bucket tends to be overused. + * It is better to let its chain length grow to a longer yet-still-bounded + * value, than to do an O(n) bucket expansion too often. + */ + unsigned expand_mult; + +} UT_hash_bucket; + +/* random signature used only to find hash tables in external analysis */ +#define HASH_SIGNATURE 0xa0111fe1 +#define HASH_BLOOM_SIGNATURE 0xb12220f2 + +typedef struct UT_hash_table { + UT_hash_bucket *buckets; + unsigned num_buckets, log2_num_buckets; + unsigned num_items; + struct UT_hash_handle *tail; /* tail hh in app order, for fast append */ + ptrdiff_t hho; /* hash handle offset (byte pos of hash handle in element */ + + /* in an ideal situation (all buckets used equally), no bucket would have + * more than ceil(#items/#buckets) items. that's the ideal chain length. */ + unsigned ideal_chain_maxlen; + + /* nonideal_items is the number of items in the hash whose chain position + * exceeds the ideal chain maxlen. these items pay the penalty for an uneven + * hash distribution; reaching them in a chain traversal takes >ideal steps */ + unsigned nonideal_items; + + /* ineffective expands occur when a bucket doubling was performed, but + * afterward, more than half the items in the hash had nonideal chain + * positions. If this happens on two consecutive expansions we inhibit any + * further expansion, as it's not helping; this happens when the hash + * function isn't a good fit for the key domain. When expansion is inhibited + * the hash will still work, albeit no longer in constant time. */ + unsigned ineff_expands, noexpand; + + uint32_t signature; /* used only to find hash tables in external analysis */ +#ifdef HASH_BLOOM + uint32_t bloom_sig; /* used only to test bloom exists in external analysis */ + uint8_t *bloom_bv; + char bloom_nbits; +#endif + +} UT_hash_table; + +typedef struct UT_hash_handle { + struct UT_hash_table *tbl; + void *prev; /* prev element in app order */ + void *next; /* next element in app order */ + struct UT_hash_handle *hh_prev; /* previous hh in bucket order */ + struct UT_hash_handle *hh_next; /* next hh in bucket order */ + void *key; /* ptr to enclosing struct's key */ + unsigned keylen; /* enclosing struct's key len */ + unsigned hashv; /* result of hash-fcn(key) */ +} UT_hash_handle; + +#endif /* UTHASH_H */ diff --git a/socl/Makefile.am b/socl/Makefile.am new file mode 100644 index 0000000..05c3812 --- /dev/null +++ b/socl/Makefile.am @@ -0,0 +1,24 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +include $(top_srcdir)/make/starpu-subdirtests.mk + +SUBDIRS = src examples + +EXTRA_DIST = README + +SOCL_vendorsdir = @datarootdir@/starpu/opencl/vendors +dist_SOCL_vendors_DATA = @SOCL_VENDORS@ diff --git a/socl/Makefile.in b/socl/Makefile.in new file mode 100644 index 0000000..52f9681 --- /dev/null +++ b/socl/Makefile.in @@ -0,0 +1,949 @@ +# Makefile.in generated by automake 1.16.5 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2021 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +target_triplet = @target@ +subdir = socl +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ + $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ + $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ + $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(dist_SOCL_vendors_DATA) \ + $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/src/common/config.h \ + $(top_builddir)/src/common/config-src-build.h \ + $(top_builddir)/include/starpu_config.h \ + $(top_builddir)/starpurm/include/starpurm_config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +SOURCES = +DIST_SOURCES = +RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \ + ctags-recursive dvi-recursive html-recursive info-recursive \ + install-data-recursive install-dvi-recursive \ + install-exec-recursive install-html-recursive \ + install-info-recursive install-pdf-recursive \ + install-ps-recursive install-recursive installcheck-recursive \ + installdirs-recursive pdf-recursive ps-recursive \ + tags-recursive uninstall-recursive +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__installdirs = "$(DESTDIR)$(SOCL_vendorsdir)" +DATA = $(dist_SOCL_vendors_DATA) +RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ + distclean-recursive maintainer-clean-recursive +am__recursive_targets = \ + $(RECURSIVE_TARGETS) \ + $(RECURSIVE_CLEAN_TARGETS) \ + $(am__extra_recursive_targets) +AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \ + distdir distdir-am +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +DIST_SUBDIRS = $(SUBDIRS) +am__DIST_COMMON = $(srcdir)/Makefile.in \ + $(top_srcdir)/make/starpu-subdirtests.mk README +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +am__relativize = \ + dir0=`pwd`; \ + sed_first='s,^\([^/]*\)/.*$$,\1,'; \ + sed_rest='s,^[^/]*/*,,'; \ + sed_last='s,^.*/\([^/]*\)$$,\1,'; \ + sed_butlast='s,/*[^/]*$$,,'; \ + while test -n "$$dir1"; do \ + first=`echo "$$dir1" | sed -e "$$sed_first"`; \ + if test "$$first" != "."; then \ + if test "$$first" = ".."; then \ + dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ + dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ + else \ + first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ + if test "$$first2" = "$$first"; then \ + dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ + else \ + dir2="../$$dir2"; \ + fi; \ + dir0="$$dir0"/"$$first"; \ + fi; \ + fi; \ + dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ + done; \ + reldir="$$dir2" +pkglibdir = @pkglibdir@ +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +APP_CFLAGS = @APP_CFLAGS@ +APP_CXXFLAGS = @APP_CXXFLAGS@ +APP_FCFLAGS = @APP_FCFLAGS@ +APP_FFLAGS = @APP_FFLAGS@ +AR = @AR@ +AS = @AS@ +ATLASDIR = @ATLASDIR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BLAS_LIB = @BLAS_LIB@ +BLAS_LIBS = @BLAS_LIBS@ +BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ +BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CC_OR_MPICC = @CC_OR_MPICC@ +CC_OR_NVCC = @CC_OR_NVCC@ +CFLAGS = @CFLAGS@ +COVERAGE = @COVERAGE@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CSCOPE = @CSCOPE@ +CTAGS = @CTAGS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DGELS_LIBS = @DGELS_LIBS@ +DLB_CFLAGS = @DLB_CFLAGS@ +DLB_LIBS = @DLB_LIBS@ +DLLTOOL = @DLLTOOL@ +DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +ECLIPSE = @ECLIPSE@ +EGREP = @EGREP@ +ETAGS = @ETAGS@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FC = @FC@ +FCFLAGS = @FCFLAGS@ +FFLAGS = @FFLAGS@ +FFTWF_CFLAGS = @FFTWF_CFLAGS@ +FFTWF_LIBS = @FFTWF_LIBS@ +FFTWL_CFLAGS = @FFTWL_CFLAGS@ +FFTWL_LIBS = @FFTWL_LIBS@ +FFTW_CFLAGS = @FFTW_CFLAGS@ +FFTW_LIBS = @FFTW_LIBS@ +FGREP = @FGREP@ +FILECMD = @FILECMD@ +FXTDIR = @FXTDIR@ +FXT_CFLAGS = @FXT_CFLAGS@ +FXT_LDFLAGS = @FXT_LDFLAGS@ +FXT_LIBS = @FXT_LIBS@ +GDB = @GDB@ +GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ +GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ +GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ +GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ +GOTODIR = @GOTODIR@ +GREP = @GREP@ +HAVE_CXX11 = @HAVE_CXX11@ +HAVE_FFTWFL = @HAVE_FFTWFL@ +HELP2MAN = @HELP2MAN@ +HIPCC = @HIPCC@ +HIPCCFLAGS = @HIPCCFLAGS@ +HIPCONFIG = @HIPCONFIG@ +HWLOC_CFLAGS = @HWLOC_CFLAGS@ +HWLOC_LIBS = @HWLOC_LIBS@ +HWLOC_REQUIRES = @HWLOC_REQUIRES@ +ICC = @ICC@ +ICC_ARGS = @ICC_ARGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JULIA = @JULIA@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ +LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ +LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ +LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ +LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ +LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ +LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ +LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ +LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ +LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ +LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ +LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ +LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ +LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ +LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ +LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ +LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ +LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ +LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ +LIBSTARPU_LINK = @LIBSTARPU_LINK@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAGMA_CFLAGS = @MAGMA_CFLAGS@ +MAGMA_LIBS = @MAGMA_LIBS@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPICC_LDFLAGS = @MPICC_LDFLAGS@ +MPICXX = @MPICXX@ +MPIEXEC = @MPIEXEC@ +MPIEXEC_ARGS = @MPIEXEC_ARGS@ +MPIFORT = @MPIFORT@ +MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ +MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ +NM = @NM@ +NMAD_CFLAGS = @NMAD_CFLAGS@ +NMAD_LIBS = @NMAD_LIBS@ +NMEDIT = @NMEDIT@ +NVCC = @NVCC@ +NVCCFLAGS = @NVCCFLAGS@ +NVCC_CC = @NVCC_CC@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ +OPENBLAS_LIBS = @OPENBLAS_LIBS@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PAPI_CFLAGS = @PAPI_CFLAGS@ +PAPI_LIBS = @PAPI_LIBS@ +PARALLEL = @PARALLEL@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PKG_CONFIG = @PKG_CONFIG@ +POTI_CFLAGS = @POTI_CFLAGS@ +POTI_LIBS = @POTI_LIBS@ +PROG_CLANG = @PROG_CLANG@ +PROG_DATE = @PROG_DATE@ +PROG_FIND = @PROG_FIND@ +PROG_STAT = @PROG_STAT@ +PYTHON = @PYTHON@ +PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ +PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ +PYTHON_VERSION = @PYTHON_VERSION@ +RANLIB = @RANLIB@ +REALBASH = @REALBASH@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ +SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ +SIMGRID_LIBS = @SIMGRID_LIBS@ +SIMGRID_MC = @SIMGRID_MC@ +SLIC_CONFIG = @SLIC_CONFIG@ +SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ +SOCL_VENDORS = @SOCL_VENDORS@ +STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ +STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ +STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ +STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ +STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ +STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ +STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ +STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ +STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ +STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ +STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ +STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ +STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ +STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ +STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ +STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ +STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ +STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ +STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ +STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ +STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ +STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ +STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ +STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ +STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ +STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ +STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ +STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ +STARPU_LIB_PATH = @STARPU_LIB_PATH@ +STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ +STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ +STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ +STARPU_MS_LIB = @STARPU_MS_LIB@ +STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ +STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ +STARPU_OPENBLAS = @STARPU_OPENBLAS@ +STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ +STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ +STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ +STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ +STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ +STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ +STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ +STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ +STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ +STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ +STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ +STARPU_SRC_DIR = @STARPU_SRC_DIR@ +STARPU_USE_CPU = @STARPU_USE_CPU@ +STARPU_USE_CUDA = @STARPU_USE_CUDA@ +STARPU_USE_FXT = @STARPU_USE_FXT@ +STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ +STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ +STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ +STRIP = @STRIP@ +VERSION = @VERSION@ +XMKMF = @XMKMF@ +X_CFLAGS = @X_CFLAGS@ +X_EXTRA_LIBS = @X_EXTRA_LIBS@ +X_LIBS = @X_LIBS@ +X_PRE_LIBS = @X_PRE_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_ct_F77 = @ac_ct_F77@ +ac_ct_FC = @ac_ct_FC@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +doxygencommand = @doxygencommand@ +dvidir = @dvidir@ +eclipsepath = @eclipsepath@ +epstopdfcommand = @epstopdfcommand@ +exec_prefix = @exec_prefix@ +gitcommand = @gitcommand@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +hwloccalccommand = @hwloccalccommand@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +juliapath = @juliapath@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +mpicc_path = @mpicc_path@ +mpicxx_path = @mpicxx_path@ +mpiexec_path = @mpiexec_path@ +mpifort_path = @mpifort_path@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +pdflatexcommand = @pdflatexcommand@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +runstatedir = @runstatedir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target = @target@ +target_alias = @target_alias@ +target_cpu = @target_cpu@ +target_os = @target_os@ +target_vendor = @target_vendor@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +SUBDIRS = src examples +EXTRA_DIST = README +SOCL_vendorsdir = @datarootdir@/starpu/opencl/vendors +dist_SOCL_vendors_DATA = @SOCL_VENDORS@ +all: all-recursive + +.SUFFIXES: +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/make/starpu-subdirtests.mk $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign socl/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign socl/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; +$(top_srcdir)/make/starpu-subdirtests.mk $(am__empty): + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs +install-dist_SOCL_vendorsDATA: $(dist_SOCL_vendors_DATA) + @$(NORMAL_INSTALL) + @list='$(dist_SOCL_vendors_DATA)'; test -n "$(SOCL_vendorsdir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(SOCL_vendorsdir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(SOCL_vendorsdir)" || exit 1; \ + fi; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; \ + done | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(SOCL_vendorsdir)'"; \ + $(INSTALL_DATA) $$files "$(DESTDIR)$(SOCL_vendorsdir)" || exit $$?; \ + done + +uninstall-dist_SOCL_vendorsDATA: + @$(NORMAL_UNINSTALL) + @list='$(dist_SOCL_vendors_DATA)'; test -n "$(SOCL_vendorsdir)" || list=; \ + files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ + dir='$(DESTDIR)$(SOCL_vendorsdir)'; $(am__uninstall_files_from_dir) + +# This directory's subdirectories are mostly independent; you can cd +# into them and run 'make' without going through this Makefile. +# To change the values of 'make' variables: instead of editing Makefiles, +# (1) if the variable is set in 'config.status', edit 'config.status' +# (which will cause the Makefiles to be regenerated when you run 'make'); +# (2) otherwise, pass the desired values on the 'make' command line. +$(am__recursive_targets): + @fail=; \ + if $(am__make_keepgoing); then \ + failcom='fail=yes'; \ + else \ + failcom='exit 1'; \ + fi; \ + dot_seen=no; \ + target=`echo $@ | sed s/-recursive//`; \ + case "$@" in \ + distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ + *) list='$(SUBDIRS)' ;; \ + esac; \ + for subdir in $$list; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + dot_seen=yes; \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || eval $$failcom; \ + done; \ + if test "$$dot_seen" = "no"; then \ + $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ + fi; test -z "$$fail" + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-recursive +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ + include_option=--etags-include; \ + empty_fix=.; \ + else \ + include_option=--include; \ + empty_fix=; \ + fi; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + test ! -f $$subdir/TAGS || \ + set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ + fi; \ + done; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-recursive + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-recursive + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done + @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + $(am__make_dryrun) \ + || test -d "$(distdir)/$$subdir" \ + || $(MKDIR_P) "$(distdir)/$$subdir" \ + || exit 1; \ + dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ + $(am__relativize); \ + new_distdir=$$reldir; \ + dir1=$$subdir; dir2="$(top_distdir)"; \ + $(am__relativize); \ + new_top_distdir=$$reldir; \ + echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ + echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ + ($(am__cd) $$subdir && \ + $(MAKE) $(AM_MAKEFLAGS) \ + top_distdir="$$new_top_distdir" \ + distdir="$$new_distdir" \ + am__remove_distdir=: \ + am__skip_length_check=: \ + am__skip_mode_fix=: \ + distdir) \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-recursive +all-am: Makefile $(DATA) +installdirs: installdirs-recursive +installdirs-am: + for dir in "$(DESTDIR)$(SOCL_vendorsdir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-recursive +install-exec: install-exec-recursive +install-data: install-data-recursive +uninstall: uninstall-recursive + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-recursive +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-recursive + +clean-am: clean-generic clean-libtool mostlyclean-am + +distclean: distclean-recursive + -rm -f Makefile +distclean-am: clean-am distclean-generic distclean-tags + +dvi: dvi-recursive + +dvi-am: + +html: html-recursive + +html-am: + +info: info-recursive + +info-am: + +install-data-am: install-dist_SOCL_vendorsDATA + +install-dvi: install-dvi-recursive + +install-dvi-am: + +install-exec-am: + +install-html: install-html-recursive + +install-html-am: + +install-info: install-info-recursive + +install-info-am: + +install-man: + +install-pdf: install-pdf-recursive + +install-pdf-am: + +install-ps: install-ps-recursive + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-recursive + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-recursive + +mostlyclean-am: mostlyclean-generic mostlyclean-libtool + +pdf: pdf-recursive + +pdf-am: + +ps: ps-recursive + +ps-am: + +uninstall-am: uninstall-dist_SOCL_vendorsDATA + +.MAKE: $(am__recursive_targets) install-am install-strip + +.PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am check \ + check-am clean clean-generic clean-libtool cscopelist-am ctags \ + ctags-am distclean distclean-generic distclean-libtool \ + distclean-tags distdir dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am \ + install-dist_SOCL_vendorsDATA install-dvi install-dvi-am \ + install-exec install-exec-am install-html install-html-am \ + install-info install-info-am install-man install-pdf \ + install-pdf-am install-ps install-ps-am install-strip \ + installcheck installcheck-am installdirs installdirs-am \ + maintainer-clean maintainer-clean-generic mostlyclean \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags tags-am uninstall uninstall-am \ + uninstall-dist_SOCL_vendorsDATA + +.PRECIOUS: Makefile + + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +recheck: + RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i recheck || RET=1 ; \ + done ; \ + exit $$RET + +showcheckfailed: + @RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i showcheckfailed || RET=1 ; \ + done ; \ + exit $$RET + +showfailed: + @RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -s -C $$i showfailed || RET=1 ; \ + done ; \ + exit $$RET + +showcheck: + RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i showcheck || RET=1 ; \ + done ; \ + exit $$RET + +showsuite: + RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i showsuite || RET=1 ; \ + done ; \ + exit $$RET + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/socl/README b/socl/README new file mode 100644 index 0000000..d167b61 --- /dev/null +++ b/socl/README @@ -0,0 +1,5 @@ +StarPU's OpenCL interface +========================= + +This directory contains an OpenCL implementation that can +be used as a replacement of the classic StarPU's API. diff --git a/socl/examples/Makefile.am b/socl/examples/Makefile.am new file mode 100644 index 0000000..34981d8 --- /dev/null +++ b/socl/examples/Makefile.am @@ -0,0 +1,61 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +include $(top_srcdir)/make/starpu-tests.mk +include $(top_srcdir)/make/starpu-loader.mk + +AM_CFLAGS += $(MAGMA_CFLAGS) +AM_CPPFLAGS = $(STARPU_H_CPPFLAGS) -DCL_TARGET_OPENCL_VERSION=120 +AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@ +LIBS += $(top_builddir)/socl/src/libsocl-@STARPU_EFFECTIVE_VERSION@.la $(top_builddir)/src/@LIBSTARPU_LINK@ $(STARPU_EXPORTED_LIBS) +LIBS += $(STARPU_OPENCL_LDFLAGS) + +SOCL_EXAMPLES = +if !STARPU_SIMGRID +TESTS = $(SOCL_EXAMPLES) +endif + +check_PROGRAMS = $(SOCL_EXAMPLES) + +CLEANFILES = *.gcno *.gcda starpu_idle_microsec.log + +examplebindir = $(libdir)/starpu/examples/socl/ +examplebin_PROGRAMS = + +examplebin_PROGRAMS += \ + basic/basic \ + basicsplit/basicsplit \ + testmap/testmap \ + clinfo/clinfo \ + matmul/matmul \ + mandelbrot/mandelbrot \ + mansched/mansched + + +SOCL_EXAMPLES += \ + basic/basic \ + basicsplit/basicsplit \ + testmap/testmap \ + clinfo/clinfo \ + matmul/matmul \ + mansched/mansched + +matmul_matmul_LDADD = -lm + +#mandelbrot_mandelbrot_CPPFLAGS = $(AM_CPPFLAGS) +#if STARPU_HAVE_X11 +#mandelbrot_mandelbrot_CPPFLAGS += $(X_CFLAGS) +#mandelbrot_mandelbrot_LDADD = $(X_PRE_LIBS) $(X_LIBS) -lX11 $(X_EXTRA_LIBS) +#endif diff --git a/socl/examples/Makefile.in b/socl/examples/Makefile.in new file mode 100644 index 0000000..9c93634 --- /dev/null +++ b/socl/examples/Makefile.in @@ -0,0 +1,1726 @@ +# Makefile.in generated by automake 1.16.5 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2021 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +target_triplet = @target@ +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@am__append_1 = --compiler-options -fno-strict-aliasing -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ $(STARPU_NVCC_H_CPPFLAGS) +@STARPU_USE_HIP_TRUE@am__append_2 = -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ +noinst_PROGRAMS = $(am__EXEEXT_2) +# Make tests run through mpiexec +@STARPU_USE_MPI_MASTER_SLAVE_TRUE@am__append_3 = $(abs_top_srcdir)/tools/starpu_msexec +@STARPU_USE_MPI_MASTER_SLAVE_TRUE@am__append_4 = $(MPI_RUN_ENV) STARPU_NMPIMSTHREADS=4 +@STARPU_USE_TCPIP_MASTER_SLAVE_TRUE@am__append_5 = $(abs_top_srcdir)/tools/starpu_msexec +# switch off local socket usage +#MS_LAUNCHER = $(abs_top_builddir)/tools/starpu_tcpipexec -np 2 -nobind -ncpus 1 -nolocal +@STARPU_USE_TCPIP_MASTER_SLAVE_TRUE@am__append_6 = STARPU_RESERVE_NCPU=2 +@STARPU_HAVE_WINDOWS_FALSE@am__append_7 = loader +@STARPU_SIMGRID_FALSE@TESTS = $(am__EXEEXT_1) +check_PROGRAMS = $(am__EXEEXT_1) +examplebin_PROGRAMS = basic/basic$(EXEEXT) \ + basicsplit/basicsplit$(EXEEXT) testmap/testmap$(EXEEXT) \ + clinfo/clinfo$(EXEEXT) matmul/matmul$(EXEEXT) \ + mandelbrot/mandelbrot$(EXEEXT) mansched/mansched$(EXEEXT) +subdir = socl/examples +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ + $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ + $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ + $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/src/common/config.h \ + $(top_builddir)/src/common/config-src-build.h \ + $(top_builddir)/include/starpu_config.h \ + $(top_builddir)/starpurm/include/starpurm_config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +am__EXEEXT_1 = basic/basic$(EXEEXT) basicsplit/basicsplit$(EXEEXT) \ + testmap/testmap$(EXEEXT) clinfo/clinfo$(EXEEXT) \ + matmul/matmul$(EXEEXT) mansched/mansched$(EXEEXT) +am__installdirs = "$(DESTDIR)$(examplebindir)" +@STARPU_HAVE_WINDOWS_FALSE@am__EXEEXT_2 = loader$(EXEEXT) +PROGRAMS = $(examplebin_PROGRAMS) $(noinst_PROGRAMS) +basic_basic_SOURCES = basic/basic.c +am__dirstamp = $(am__leading_dot)dirstamp +basic_basic_OBJECTS = basic/basic.$(OBJEXT) +basic_basic_LDADD = $(LDADD) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +basicsplit_basicsplit_SOURCES = basicsplit/basicsplit.c +basicsplit_basicsplit_OBJECTS = basicsplit/basicsplit.$(OBJEXT) +basicsplit_basicsplit_LDADD = $(LDADD) +clinfo_clinfo_SOURCES = clinfo/clinfo.c +clinfo_clinfo_OBJECTS = clinfo/clinfo.$(OBJEXT) +clinfo_clinfo_LDADD = $(LDADD) +loader_SOURCES = loader.c +loader_OBJECTS = loader-loader.$(OBJEXT) +loader_LDADD = $(LDADD) +mandelbrot_mandelbrot_SOURCES = mandelbrot/mandelbrot.c +mandelbrot_mandelbrot_OBJECTS = mandelbrot/mandelbrot.$(OBJEXT) +mandelbrot_mandelbrot_LDADD = $(LDADD) +mansched_mansched_SOURCES = mansched/mansched.c +mansched_mansched_OBJECTS = mansched/mansched.$(OBJEXT) +mansched_mansched_LDADD = $(LDADD) +matmul_matmul_SOURCES = matmul/matmul.c +matmul_matmul_OBJECTS = matmul/matmul.$(OBJEXT) +matmul_matmul_DEPENDENCIES = +testmap_testmap_SOURCES = testmap/testmap.c +testmap_testmap_OBJECTS = testmap/testmap.$(OBJEXT) +testmap_testmap_LDADD = $(LDADD) +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)/src/common -I$(top_builddir)/include -I$(top_builddir)/starpurm/include +depcomp = $(SHELL) $(top_srcdir)/build-aux/depcomp +am__maybe_remake_depfiles = depfiles +am__depfiles_remade = ./$(DEPDIR)/loader-loader.Po \ + basic/$(DEPDIR)/basic.Po basicsplit/$(DEPDIR)/basicsplit.Po \ + clinfo/$(DEPDIR)/clinfo.Po mandelbrot/$(DEPDIR)/mandelbrot.Po \ + mansched/$(DEPDIR)/mansched.Po matmul/$(DEPDIR)/matmul.Po \ + testmap/$(DEPDIR)/testmap.Po +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = basic/basic.c basicsplit/basicsplit.c clinfo/clinfo.c \ + loader.c mandelbrot/mandelbrot.c mansched/mansched.c \ + matmul/matmul.c testmap/testmap.c +DIST_SOURCES = basic/basic.c basicsplit/basicsplit.c clinfo/clinfo.c \ + loader.c mandelbrot/mandelbrot.c mansched/mansched.c \ + matmul/matmul.c testmap/testmap.c +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +am__tty_colors_dummy = \ + mgn= red= grn= lgn= blu= brg= std=; \ + am__color_tests=no +am__tty_colors = { \ + $(am__tty_colors_dummy); \ + if test "X$(AM_COLOR_TESTS)" = Xno; then \ + am__color_tests=no; \ + elif test "X$(AM_COLOR_TESTS)" = Xalways; then \ + am__color_tests=yes; \ + elif test "X$$TERM" != Xdumb && { test -t 1; } 2>/dev/null; then \ + am__color_tests=yes; \ + fi; \ + if test $$am__color_tests = yes; then \ + red=''; \ + grn=''; \ + lgn=''; \ + blu=''; \ + mgn=''; \ + brg=''; \ + std=''; \ + fi; \ +} +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__recheck_rx = ^[ ]*:recheck:[ ]* +am__global_test_result_rx = ^[ ]*:global-test-result:[ ]* +am__copy_in_global_log_rx = ^[ ]*:copy-in-global-log:[ ]* +# A command that, given a newline-separated list of test names on the +# standard input, print the name of the tests that are to be re-run +# upon "make recheck". +am__list_recheck_tests = $(AWK) '{ \ + recheck = 1; \ + while ((rc = (getline line < ($$0 ".trs"))) != 0) \ + { \ + if (rc < 0) \ + { \ + if ((getline line2 < ($$0 ".log")) < 0) \ + recheck = 0; \ + break; \ + } \ + else if (line ~ /$(am__recheck_rx)[nN][Oo]/) \ + { \ + recheck = 0; \ + break; \ + } \ + else if (line ~ /$(am__recheck_rx)[yY][eE][sS]/) \ + { \ + break; \ + } \ + }; \ + if (recheck) \ + print $$0; \ + close ($$0 ".trs"); \ + close ($$0 ".log"); \ +}' +# A command that, given a newline-separated list of test names on the +# standard input, create the global log from their .trs and .log files. +am__create_global_log = $(AWK) ' \ +function fatal(msg) \ +{ \ + print "fatal: making $@: " msg | "cat >&2"; \ + exit 1; \ +} \ +function rst_section(header) \ +{ \ + print header; \ + len = length(header); \ + for (i = 1; i <= len; i = i + 1) \ + printf "="; \ + printf "\n\n"; \ +} \ +{ \ + copy_in_global_log = 1; \ + global_test_result = "RUN"; \ + while ((rc = (getline line < ($$0 ".trs"))) != 0) \ + { \ + if (rc < 0) \ + fatal("failed to read from " $$0 ".trs"); \ + if (line ~ /$(am__global_test_result_rx)/) \ + { \ + sub("$(am__global_test_result_rx)", "", line); \ + sub("[ ]*$$", "", line); \ + global_test_result = line; \ + } \ + else if (line ~ /$(am__copy_in_global_log_rx)[nN][oO]/) \ + copy_in_global_log = 0; \ + }; \ + if (copy_in_global_log) \ + { \ + rst_section(global_test_result ": " $$0); \ + while ((rc = (getline line < ($$0 ".log"))) != 0) \ + { \ + if (rc < 0) \ + fatal("failed to read from " $$0 ".log"); \ + print line; \ + }; \ + printf "\n"; \ + }; \ + close ($$0 ".trs"); \ + close ($$0 ".log"); \ +}' +# Restructured Text title. +am__rst_title = { sed 's/.*/ & /;h;s/./=/g;p;x;s/ *$$//;p;g' && echo; } +# Solaris 10 'make', and several other traditional 'make' implementations, +# pass "-e" to $(SHELL), and POSIX 2008 even requires this. Work around it +# by disabling -e (using the XSI extension "set +e") if it's set. +am__sh_e_setup = case $$- in *e*) set +e;; esac +# Default flags passed to test drivers. +am__common_driver_flags = \ + --color-tests "$$am__color_tests" \ + --enable-hard-errors "$$am__enable_hard_errors" \ + --expect-failure "$$am__expect_failure" +# To be inserted before the command running the test. Creates the +# directory for the log if needed. Stores in $dir the directory +# containing $f, in $tst the test, in $log the log. Executes the +# developer- defined test setup AM_TESTS_ENVIRONMENT (if any), and +# passes TESTS_ENVIRONMENT. Set up options for the wrapper that +# will run the test scripts (or their associated LOG_COMPILER, if +# thy have one). +am__check_pre = \ +$(am__sh_e_setup); \ +$(am__vpath_adj_setup) $(am__vpath_adj) \ +$(am__tty_colors); \ +srcdir=$(srcdir); export srcdir; \ +case "$@" in \ + */*) am__odir=`echo "./$@" | sed 's|/[^/]*$$||'`;; \ + *) am__odir=.;; \ +esac; \ +test "x$$am__odir" = x"." || test -d "$$am__odir" \ + || $(MKDIR_P) "$$am__odir" || exit $$?; \ +if test -f "./$$f"; then dir=./; \ +elif test -f "$$f"; then dir=; \ +else dir="$(srcdir)/"; fi; \ +tst=$$dir$$f; log='$@'; \ +if test -n '$(DISABLE_HARD_ERRORS)'; then \ + am__enable_hard_errors=no; \ +else \ + am__enable_hard_errors=yes; \ +fi; \ +case " $(XFAIL_TESTS) " in \ + *[\ \ ]$$f[\ \ ]* | *[\ \ ]$$dir$$f[\ \ ]*) \ + am__expect_failure=yes;; \ + *) \ + am__expect_failure=no;; \ +esac; \ +$(AM_TESTS_ENVIRONMENT) $(TESTS_ENVIRONMENT) +# A shell command to get the names of the tests scripts with any registered +# extension removed (i.e., equivalently, the names of the test logs, with +# the '.log' extension removed). The result is saved in the shell variable +# '$bases'. This honors runtime overriding of TESTS and TEST_LOGS. Sadly, +# we cannot use something simpler, involving e.g., "$(TEST_LOGS:.log=)", +# since that might cause problem with VPATH rewrites for suffix-less tests. +# See also 'test-harness-vpath-rewrite.sh' and 'test-trs-basic.sh'. +am__set_TESTS_bases = \ + bases='$(TEST_LOGS)'; \ + bases=`for i in $$bases; do echo $$i; done | sed 's/\.log$$//'`; \ + bases=`echo $$bases` +AM_TESTSUITE_SUMMARY_HEADER = ' for $(PACKAGE_STRING)' +RECHECK_LOGS = $(TEST_LOGS) +AM_RECURSIVE_TARGETS = check recheck +TEST_SUITE_LOG = test-suite.log +TEST_EXTENSIONS = @EXEEXT@ .test +LOG_DRIVER = $(SHELL) $(top_srcdir)/build-aux/test-driver +LOG_COMPILE = $(LOG_COMPILER) $(AM_LOG_FLAGS) $(LOG_FLAGS) +am__set_b = \ + case '$@' in \ + */*) \ + case '$*' in \ + */*) b='$*';; \ + *) b=`echo '$@' | sed 's/\.log$$//'`; \ + esac;; \ + *) \ + b='$*';; \ + esac +am__test_logs1 = $(TESTS:=.log) +am__test_logs2 = $(am__test_logs1:@EXEEXT@.log=.log) +TEST_LOGS = $(am__test_logs2:.test.log=.log) +TEST_LOG_DRIVER = $(SHELL) $(top_srcdir)/build-aux/test-driver +TEST_LOG_COMPILE = $(TEST_LOG_COMPILER) $(AM_TEST_LOG_FLAGS) \ + $(TEST_LOG_FLAGS) +am__DIST_COMMON = $(srcdir)/Makefile.in \ + $(top_srcdir)/build-aux/depcomp \ + $(top_srcdir)/build-aux/test-driver \ + $(top_srcdir)/make/starpu-loader.mk \ + $(top_srcdir)/make/starpu-tests.mk \ + $(top_srcdir)/make/starpu.mk +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +pkglibdir = @pkglibdir@ +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +APP_CFLAGS = @APP_CFLAGS@ +APP_CXXFLAGS = @APP_CXXFLAGS@ +APP_FCFLAGS = @APP_FCFLAGS@ +APP_FFLAGS = @APP_FFLAGS@ +AR = @AR@ +AS = @AS@ +ATLASDIR = @ATLASDIR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BLAS_LIB = @BLAS_LIB@ +BLAS_LIBS = @BLAS_LIBS@ +BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ +BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CC_OR_MPICC = @CC_OR_MPICC@ +CC_OR_NVCC = @CC_OR_NVCC@ +CFLAGS = @CFLAGS@ +COVERAGE = @COVERAGE@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CSCOPE = @CSCOPE@ +CTAGS = @CTAGS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DGELS_LIBS = @DGELS_LIBS@ +DLB_CFLAGS = @DLB_CFLAGS@ +DLB_LIBS = @DLB_LIBS@ +DLLTOOL = @DLLTOOL@ +DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +ECLIPSE = @ECLIPSE@ +EGREP = @EGREP@ +ETAGS = @ETAGS@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FC = @FC@ +FCFLAGS = @FCFLAGS@ +FFLAGS = @FFLAGS@ +FFTWF_CFLAGS = @FFTWF_CFLAGS@ +FFTWF_LIBS = @FFTWF_LIBS@ +FFTWL_CFLAGS = @FFTWL_CFLAGS@ +FFTWL_LIBS = @FFTWL_LIBS@ +FFTW_CFLAGS = @FFTW_CFLAGS@ +FFTW_LIBS = @FFTW_LIBS@ +FGREP = @FGREP@ +FILECMD = @FILECMD@ +FXTDIR = @FXTDIR@ +FXT_CFLAGS = @FXT_CFLAGS@ +FXT_LDFLAGS = @FXT_LDFLAGS@ +FXT_LIBS = @FXT_LIBS@ +GDB = @GDB@ +GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ +GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ +GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ +GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ +GOTODIR = @GOTODIR@ +GREP = @GREP@ +HAVE_CXX11 = @HAVE_CXX11@ +HAVE_FFTWFL = @HAVE_FFTWFL@ +HELP2MAN = @HELP2MAN@ +HIPCC = @HIPCC@ +HIPCCFLAGS = @HIPCCFLAGS@ $(am__append_2) +HIPCONFIG = @HIPCONFIG@ +HWLOC_CFLAGS = @HWLOC_CFLAGS@ +HWLOC_LIBS = @HWLOC_LIBS@ +HWLOC_REQUIRES = @HWLOC_REQUIRES@ +ICC = @ICC@ +ICC_ARGS = @ICC_ARGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JULIA = @JULIA@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ \ + $(top_builddir)/socl/src/libsocl-@STARPU_EFFECTIVE_VERSION@.la \ + $(top_builddir)/src/@LIBSTARPU_LINK@ $(STARPU_EXPORTED_LIBS) \ + $(STARPU_OPENCL_LDFLAGS) +LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ +LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ +LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ +LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ +LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ +LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ +LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ +LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ +LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ +LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ +LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ +LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ +LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ +LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ +LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ +LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ +LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ +LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ +LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ +LIBSTARPU_LINK = @LIBSTARPU_LINK@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAGMA_CFLAGS = @MAGMA_CFLAGS@ +MAGMA_LIBS = @MAGMA_LIBS@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPICC_LDFLAGS = @MPICC_LDFLAGS@ +MPICXX = @MPICXX@ +MPIEXEC = @MPIEXEC@ +MPIEXEC_ARGS = @MPIEXEC_ARGS@ +MPIFORT = @MPIFORT@ +MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ +MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ +NM = @NM@ +NMAD_CFLAGS = @NMAD_CFLAGS@ +NMAD_LIBS = @NMAD_LIBS@ +NMEDIT = @NMEDIT@ +NVCC = @NVCC@ +NVCCFLAGS = @NVCCFLAGS@ $(am__append_1) +NVCC_CC = @NVCC_CC@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ +OPENBLAS_LIBS = @OPENBLAS_LIBS@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PAPI_CFLAGS = @PAPI_CFLAGS@ +PAPI_LIBS = @PAPI_LIBS@ +PARALLEL = @PARALLEL@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PKG_CONFIG = @PKG_CONFIG@ +POTI_CFLAGS = @POTI_CFLAGS@ +POTI_LIBS = @POTI_LIBS@ +PROG_CLANG = @PROG_CLANG@ +PROG_DATE = @PROG_DATE@ +PROG_FIND = @PROG_FIND@ +PROG_STAT = @PROG_STAT@ +PYTHON = @PYTHON@ +PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ +PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ +PYTHON_VERSION = @PYTHON_VERSION@ +RANLIB = @RANLIB@ +REALBASH = @REALBASH@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ +SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ +SIMGRID_LIBS = @SIMGRID_LIBS@ +SIMGRID_MC = @SIMGRID_MC@ +SLIC_CONFIG = @SLIC_CONFIG@ +SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ +SOCL_VENDORS = @SOCL_VENDORS@ +STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ +STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ +STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ +STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ +STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ +STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ +STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ +STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ +STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ +STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ +STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ +STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ +STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ +STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ +STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ +STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ +STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ +STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ +STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ +STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ +STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ +STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ +STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ +STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ +STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ +STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ +STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ +STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ +STARPU_LIB_PATH = @STARPU_LIB_PATH@ +STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ +STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ +STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ +STARPU_MS_LIB = @STARPU_MS_LIB@ +STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ +STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ +STARPU_OPENBLAS = @STARPU_OPENBLAS@ +STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ +STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ +STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ +STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ +STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ +STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ +STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ +STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ +STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ +STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ +STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ +STARPU_SRC_DIR = @STARPU_SRC_DIR@ +STARPU_USE_CPU = @STARPU_USE_CPU@ +STARPU_USE_CUDA = @STARPU_USE_CUDA@ +STARPU_USE_FXT = @STARPU_USE_FXT@ +STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ +STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ +STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ +STRIP = @STRIP@ +VERSION = @VERSION@ +XMKMF = @XMKMF@ +X_CFLAGS = @X_CFLAGS@ +X_EXTRA_LIBS = @X_EXTRA_LIBS@ +X_LIBS = @X_LIBS@ +X_PRE_LIBS = @X_PRE_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_ct_F77 = @ac_ct_F77@ +ac_ct_FC = @ac_ct_FC@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +doxygencommand = @doxygencommand@ +dvidir = @dvidir@ +eclipsepath = @eclipsepath@ +epstopdfcommand = @epstopdfcommand@ +exec_prefix = @exec_prefix@ +gitcommand = @gitcommand@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +hwloccalccommand = @hwloccalccommand@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +juliapath = @juliapath@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +mpicc_path = @mpicc_path@ +mpicxx_path = @mpicxx_path@ +mpiexec_path = @mpiexec_path@ +mpifort_path = @mpifort_path@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +pdflatexcommand = @pdflatexcommand@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +runstatedir = @runstatedir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target = @target@ +target_alias = @target_alias@ +target_cpu = @target_cpu@ +target_os = @target_os@ +target_vendor = @target_vendor@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +LAUNCHER_ENV = $(am__append_4) $(am__append_6) +LAUNCHER = $(am__append_3) $(am__append_5) + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +AM_CFLAGS = $(GLOBAL_AM_CFLAGS) $(MAGMA_CFLAGS) +AM_CXXFLAGS = $(GLOBAL_AM_CXXFLAGS) +AM_FFLAGS = $(GLOBAL_AM_FFLAGS) +AM_FCFLAGS = $(GLOBAL_AM_FCFLAGS) +@STARPU_USE_CUDA_TRUE@V_nvcc_ = $(V_nvcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_USE_CUDA_TRUE@V_nvcc_0 = @echo " NVCC " $@; +@STARPU_USE_CUDA_TRUE@V_nvcc_1 = +@STARPU_USE_CUDA_TRUE@V_nvcc = $(V_nvcc_$(V)) + +# Avoid using nvcc when making a coverity build, nvcc produces millions of +# lines of code which we don't want to analyze. Instead, build dumb .o files +# containing empty functions. +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_ = $(V_mynvcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_0 = @echo " myNVCC " $@; +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_1 = +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc = $(V_mynvcc_$(V)) +@STARPU_USE_HIP_TRUE@V_hipcc_ = $(V_hipcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_USE_HIP_TRUE@V_hipcc_0 = @echo " HIPCC " $@; +@STARPU_USE_HIP_TRUE@V_hipcc_1 = +@STARPU_USE_HIP_TRUE@V_hipcc = $(V_hipcc_$(V)) +V_icc_ = $(V_icc_$(AM_DEFAULT_VERBOSITY)) +V_icc_0 = @echo " ICC " $@; +V_icc_1 = +V_icc = $(V_icc_$(V)) +V_ln_ = $(V_ln_$(AM_DEFAULT_VERBOSITY)) +V_ln_0 = @echo " LN " $@; +V_ln_1 = +V_ln = $(V_ln_$(V)) +V_help2man_ = $(V_help2man_$(AM_DEFAULT_VERBOSITY)) +V_help2man_0 = @echo " HELP2MAN" $@; +V_help2man_1 = +V_help2man = $(V_help2man_$(V)) +# These are always defined, both for starpu-mpi and for mpi-ms +# For MPI tests we don't want to oversubscribe the system +MPI_RUN_ENV = STARPU_WORKERS_GETBIND=0 STARPU_WORKERS_NOBIND=1 STARPU_NCPU=3 +@STARPU_SIMGRID_FALSE@STARPU_MPIEXEC = $(MPIEXEC) $(MPIEXEC_ARGS) -np $(STARPU_MPI_NP) +@STARPU_SIMGRID_TRUE@STARPU_MPIEXEC = $(abs_top_builddir)/tools/starpu_smpirun -np $(STARPU_MPI_NP) -platform $(abs_top_srcdir)/tools/perfmodels/cluster.xml -hostfile $(abs_top_srcdir)/tools/perfmodels/hostfile + +# When GNU parallel is available and -j is passed to make, run tests through +# parallel, using a "starpu" semaphore. +# Also make test shell scripts run its tests through parallel, using a +# "substarpu" semaphore. This brings some overload, but only one level. +@HAVE_PARALLEL_TRUE@STARPU_SUB_PARALLEL = $(shell echo $(MAKEFLAGS) | sed -ne 's/.*-j\([0-9]\+\).*/parallel --semaphore --id substarpu --fg --fg-exit -j \1/p') +@STARPU_USE_MPI_MASTER_SLAVE_TRUE@MS_LAUNCHER = $(STARPU_MPIEXEC) +@STARPU_USE_TCPIP_MASTER_SLAVE_TRUE@MS_LAUNCHER = $(abs_top_builddir)/tools/starpu_tcpipexec -np 2 -nobind -ncpus 1 +@STARPU_HAVE_WINDOWS_FALSE@LOADER_BIN = $(LAUNCHER) $(LOADER) $(EXTERNAL) +@STARPU_HAVE_WINDOWS_TRUE@LOADER_BIN = $(LAUNCHER) $(EXTERNAL) +@STARPU_HAVE_WINDOWS_FALSE@loader_CPPFLAGS = $(AM_CPPFLAGS) -I$(top_builddir)/src/ +@STARPU_HAVE_AM111_FALSE@TESTS_ENVIRONMENT = $(LAUNCHER_ENV) top_builddir="$(abs_top_builddir)" top_srcdir="$(abs_top_srcdir)" $(LOADER_BIN) +@STARPU_HAVE_AM111_TRUE@TESTS_ENVIRONMENT = $(LAUNCHER_ENV) top_builddir="$(abs_top_builddir)" top_srcdir="$(abs_top_srcdir)" +@STARPU_HAVE_AM111_TRUE@LOG_COMPILER = $(LOADER_BIN) +AM_TESTS_FD_REDIRECT = 9>&2 +AM_CPPFLAGS = $(STARPU_H_CPPFLAGS) -DCL_TARGET_OPENCL_VERSION=120 +AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@ +SOCL_EXAMPLES = basic/basic basicsplit/basicsplit testmap/testmap \ + clinfo/clinfo matmul/matmul mansched/mansched +CLEANFILES = *.gcno *.gcda starpu_idle_microsec.log +examplebindir = $(libdir)/starpu/examples/socl/ +matmul_matmul_LDADD = -lm +all: all-am + +.SUFFIXES: +.SUFFIXES: .c .cu .cubin .hip .lo .log .o .obj .test .test$(EXEEXT) .trs +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/make/starpu-tests.mk $(top_srcdir)/make/starpu.mk $(top_srcdir)/make/starpu-loader.mk $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign socl/examples/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign socl/examples/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; +$(top_srcdir)/make/starpu-tests.mk $(top_srcdir)/make/starpu.mk $(top_srcdir)/make/starpu-loader.mk $(am__empty): + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +clean-checkPROGRAMS: + @list='$(check_PROGRAMS)'; test -n "$$list" || exit 0; \ + echo " rm -f" $$list; \ + rm -f $$list || exit $$?; \ + test -n "$(EXEEXT)" || exit 0; \ + list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ + echo " rm -f" $$list; \ + rm -f $$list +install-examplebinPROGRAMS: $(examplebin_PROGRAMS) + @$(NORMAL_INSTALL) + @list='$(examplebin_PROGRAMS)'; test -n "$(examplebindir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(examplebindir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(examplebindir)" || exit 1; \ + fi; \ + for p in $$list; do echo "$$p $$p"; done | \ + sed 's/$(EXEEXT)$$//' | \ + while read p p1; do if test -f $$p \ + || test -f $$p1 \ + ; then echo "$$p"; echo "$$p"; else :; fi; \ + done | \ + sed -e 'p;s,.*/,,;n;h' \ + -e 's|.*|.|' \ + -e 'p;x;s,.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/' | \ + sed 'N;N;N;s,\n, ,g' | \ + $(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1 } \ + { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \ + if ($$2 == $$4) files[d] = files[d] " " $$1; \ + else { print "f", $$3 "/" $$4, $$1; } } \ + END { for (d in files) print "f", d, files[d] }' | \ + while read type dir files; do \ + if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \ + test -z "$$files" || { \ + echo " $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files '$(DESTDIR)$(examplebindir)$$dir'"; \ + $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files "$(DESTDIR)$(examplebindir)$$dir" || exit $$?; \ + } \ + ; done + +uninstall-examplebinPROGRAMS: + @$(NORMAL_UNINSTALL) + @list='$(examplebin_PROGRAMS)'; test -n "$(examplebindir)" || list=; \ + files=`for p in $$list; do echo "$$p"; done | \ + sed -e 'h;s,^.*/,,;s/$(EXEEXT)$$//;$(transform)' \ + -e 's/$$/$(EXEEXT)/' \ + `; \ + test -n "$$list" || exit 0; \ + echo " ( cd '$(DESTDIR)$(examplebindir)' && rm -f" $$files ")"; \ + cd "$(DESTDIR)$(examplebindir)" && rm -f $$files + +clean-examplebinPROGRAMS: + @list='$(examplebin_PROGRAMS)'; test -n "$$list" || exit 0; \ + echo " rm -f" $$list; \ + rm -f $$list || exit $$?; \ + test -n "$(EXEEXT)" || exit 0; \ + list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ + echo " rm -f" $$list; \ + rm -f $$list + +clean-noinstPROGRAMS: + @list='$(noinst_PROGRAMS)'; test -n "$$list" || exit 0; \ + echo " rm -f" $$list; \ + rm -f $$list || exit $$?; \ + test -n "$(EXEEXT)" || exit 0; \ + list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ + echo " rm -f" $$list; \ + rm -f $$list +basic/$(am__dirstamp): + @$(MKDIR_P) basic + @: > basic/$(am__dirstamp) +basic/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) basic/$(DEPDIR) + @: > basic/$(DEPDIR)/$(am__dirstamp) +basic/basic.$(OBJEXT): basic/$(am__dirstamp) \ + basic/$(DEPDIR)/$(am__dirstamp) + +basic/basic$(EXEEXT): $(basic_basic_OBJECTS) $(basic_basic_DEPENDENCIES) $(EXTRA_basic_basic_DEPENDENCIES) basic/$(am__dirstamp) + @rm -f basic/basic$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(basic_basic_OBJECTS) $(basic_basic_LDADD) $(LIBS) +basicsplit/$(am__dirstamp): + @$(MKDIR_P) basicsplit + @: > basicsplit/$(am__dirstamp) +basicsplit/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) basicsplit/$(DEPDIR) + @: > basicsplit/$(DEPDIR)/$(am__dirstamp) +basicsplit/basicsplit.$(OBJEXT): basicsplit/$(am__dirstamp) \ + basicsplit/$(DEPDIR)/$(am__dirstamp) + +basicsplit/basicsplit$(EXEEXT): $(basicsplit_basicsplit_OBJECTS) $(basicsplit_basicsplit_DEPENDENCIES) $(EXTRA_basicsplit_basicsplit_DEPENDENCIES) basicsplit/$(am__dirstamp) + @rm -f basicsplit/basicsplit$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(basicsplit_basicsplit_OBJECTS) $(basicsplit_basicsplit_LDADD) $(LIBS) +clinfo/$(am__dirstamp): + @$(MKDIR_P) clinfo + @: > clinfo/$(am__dirstamp) +clinfo/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) clinfo/$(DEPDIR) + @: > clinfo/$(DEPDIR)/$(am__dirstamp) +clinfo/clinfo.$(OBJEXT): clinfo/$(am__dirstamp) \ + clinfo/$(DEPDIR)/$(am__dirstamp) + +clinfo/clinfo$(EXEEXT): $(clinfo_clinfo_OBJECTS) $(clinfo_clinfo_DEPENDENCIES) $(EXTRA_clinfo_clinfo_DEPENDENCIES) clinfo/$(am__dirstamp) + @rm -f clinfo/clinfo$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(clinfo_clinfo_OBJECTS) $(clinfo_clinfo_LDADD) $(LIBS) + +loader$(EXEEXT): $(loader_OBJECTS) $(loader_DEPENDENCIES) $(EXTRA_loader_DEPENDENCIES) + @rm -f loader$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(loader_OBJECTS) $(loader_LDADD) $(LIBS) +mandelbrot/$(am__dirstamp): + @$(MKDIR_P) mandelbrot + @: > mandelbrot/$(am__dirstamp) +mandelbrot/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) mandelbrot/$(DEPDIR) + @: > mandelbrot/$(DEPDIR)/$(am__dirstamp) +mandelbrot/mandelbrot.$(OBJEXT): mandelbrot/$(am__dirstamp) \ + mandelbrot/$(DEPDIR)/$(am__dirstamp) + +mandelbrot/mandelbrot$(EXEEXT): $(mandelbrot_mandelbrot_OBJECTS) $(mandelbrot_mandelbrot_DEPENDENCIES) $(EXTRA_mandelbrot_mandelbrot_DEPENDENCIES) mandelbrot/$(am__dirstamp) + @rm -f mandelbrot/mandelbrot$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(mandelbrot_mandelbrot_OBJECTS) $(mandelbrot_mandelbrot_LDADD) $(LIBS) +mansched/$(am__dirstamp): + @$(MKDIR_P) mansched + @: > mansched/$(am__dirstamp) +mansched/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) mansched/$(DEPDIR) + @: > mansched/$(DEPDIR)/$(am__dirstamp) +mansched/mansched.$(OBJEXT): mansched/$(am__dirstamp) \ + mansched/$(DEPDIR)/$(am__dirstamp) + +mansched/mansched$(EXEEXT): $(mansched_mansched_OBJECTS) $(mansched_mansched_DEPENDENCIES) $(EXTRA_mansched_mansched_DEPENDENCIES) mansched/$(am__dirstamp) + @rm -f mansched/mansched$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(mansched_mansched_OBJECTS) $(mansched_mansched_LDADD) $(LIBS) +matmul/$(am__dirstamp): + @$(MKDIR_P) matmul + @: > matmul/$(am__dirstamp) +matmul/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) matmul/$(DEPDIR) + @: > matmul/$(DEPDIR)/$(am__dirstamp) +matmul/matmul.$(OBJEXT): matmul/$(am__dirstamp) \ + matmul/$(DEPDIR)/$(am__dirstamp) + +matmul/matmul$(EXEEXT): $(matmul_matmul_OBJECTS) $(matmul_matmul_DEPENDENCIES) $(EXTRA_matmul_matmul_DEPENDENCIES) matmul/$(am__dirstamp) + @rm -f matmul/matmul$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(matmul_matmul_OBJECTS) $(matmul_matmul_LDADD) $(LIBS) +testmap/$(am__dirstamp): + @$(MKDIR_P) testmap + @: > testmap/$(am__dirstamp) +testmap/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) testmap/$(DEPDIR) + @: > testmap/$(DEPDIR)/$(am__dirstamp) +testmap/testmap.$(OBJEXT): testmap/$(am__dirstamp) \ + testmap/$(DEPDIR)/$(am__dirstamp) + +testmap/testmap$(EXEEXT): $(testmap_testmap_OBJECTS) $(testmap_testmap_DEPENDENCIES) $(EXTRA_testmap_testmap_DEPENDENCIES) testmap/$(am__dirstamp) + @rm -f testmap/testmap$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(testmap_testmap_OBJECTS) $(testmap_testmap_LDADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + -rm -f basic/*.$(OBJEXT) + -rm -f basicsplit/*.$(OBJEXT) + -rm -f clinfo/*.$(OBJEXT) + -rm -f mandelbrot/*.$(OBJEXT) + -rm -f mansched/*.$(OBJEXT) + -rm -f matmul/*.$(OBJEXT) + -rm -f testmap/*.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/loader-loader.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@basic/$(DEPDIR)/basic.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@basicsplit/$(DEPDIR)/basicsplit.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@clinfo/$(DEPDIR)/clinfo.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@mandelbrot/$(DEPDIR)/mandelbrot.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@mansched/$(DEPDIR)/mansched.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@matmul/$(DEPDIR)/matmul.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@testmap/$(DEPDIR)/testmap.Po@am__quote@ # am--include-marker + +$(am__depfiles_remade): + @$(MKDIR_P) $(@D) + @echo '# dummy' >$@-t && $(am__mv) $@-t $@ + +am--depfiles: $(am__depfiles_remade) + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ +@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +loader-loader.o: loader.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(loader_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT loader-loader.o -MD -MP -MF $(DEPDIR)/loader-loader.Tpo -c -o loader-loader.o `test -f 'loader.c' || echo '$(srcdir)/'`loader.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/loader-loader.Tpo $(DEPDIR)/loader-loader.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='loader.c' object='loader-loader.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(loader_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o loader-loader.o `test -f 'loader.c' || echo '$(srcdir)/'`loader.c + +loader-loader.obj: loader.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(loader_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT loader-loader.obj -MD -MP -MF $(DEPDIR)/loader-loader.Tpo -c -o loader-loader.obj `if test -f 'loader.c'; then $(CYGPATH_W) 'loader.c'; else $(CYGPATH_W) '$(srcdir)/loader.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/loader-loader.Tpo $(DEPDIR)/loader-loader.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='loader.c' object='loader-loader.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(loader_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o loader-loader.obj `if test -f 'loader.c'; then $(CYGPATH_W) 'loader.c'; else $(CYGPATH_W) '$(srcdir)/loader.c'; fi` + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + -rm -rf basic/.libs basic/_libs + -rm -rf basicsplit/.libs basicsplit/_libs + -rm -rf clinfo/.libs clinfo/_libs + -rm -rf mandelbrot/.libs mandelbrot/_libs + -rm -rf mansched/.libs mansched/_libs + -rm -rf matmul/.libs matmul/_libs + -rm -rf testmap/.libs testmap/_libs + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-am +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-am + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-am + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +# Recover from deleted '.trs' file; this should ensure that +# "rm -f foo.log; make foo.trs" re-run 'foo.test', and re-create +# both 'foo.log' and 'foo.trs'. Break the recipe in two subshells +# to avoid problems with "make -n". +.log.trs: + rm -f $< $@ + $(MAKE) $(AM_MAKEFLAGS) $< + +# Leading 'am--fnord' is there to ensure the list of targets does not +# expand to empty, as could happen e.g. with make check TESTS=''. +am--fnord $(TEST_LOGS) $(TEST_LOGS:.log=.trs): $(am__force_recheck) +am--force-recheck: + @: + +$(TEST_SUITE_LOG): $(TEST_LOGS) + @$(am__set_TESTS_bases); \ + am__f_ok () { test -f "$$1" && test -r "$$1"; }; \ + redo_bases=`for i in $$bases; do \ + am__f_ok $$i.trs && am__f_ok $$i.log || echo $$i; \ + done`; \ + if test -n "$$redo_bases"; then \ + redo_logs=`for i in $$redo_bases; do echo $$i.log; done`; \ + redo_results=`for i in $$redo_bases; do echo $$i.trs; done`; \ + if $(am__make_dryrun); then :; else \ + rm -f $$redo_logs && rm -f $$redo_results || exit 1; \ + fi; \ + fi; \ + if test -n "$$am__remaking_logs"; then \ + echo "fatal: making $(TEST_SUITE_LOG): possible infinite" \ + "recursion detected" >&2; \ + elif test -n "$$redo_logs"; then \ + am__remaking_logs=yes $(MAKE) $(AM_MAKEFLAGS) $$redo_logs; \ + fi; \ + if $(am__make_dryrun); then :; else \ + st=0; \ + errmsg="fatal: making $(TEST_SUITE_LOG): failed to create"; \ + for i in $$redo_bases; do \ + test -f $$i.trs && test -r $$i.trs \ + || { echo "$$errmsg $$i.trs" >&2; st=1; }; \ + test -f $$i.log && test -r $$i.log \ + || { echo "$$errmsg $$i.log" >&2; st=1; }; \ + done; \ + test $$st -eq 0 || exit 1; \ + fi + @$(am__sh_e_setup); $(am__tty_colors); $(am__set_TESTS_bases); \ + ws='[ ]'; \ + results=`for b in $$bases; do echo $$b.trs; done`; \ + test -n "$$results" || results=/dev/null; \ + all=` grep "^$$ws*:test-result:" $$results | wc -l`; \ + pass=` grep "^$$ws*:test-result:$$ws*PASS" $$results | wc -l`; \ + fail=` grep "^$$ws*:test-result:$$ws*FAIL" $$results | wc -l`; \ + skip=` grep "^$$ws*:test-result:$$ws*SKIP" $$results | wc -l`; \ + xfail=`grep "^$$ws*:test-result:$$ws*XFAIL" $$results | wc -l`; \ + xpass=`grep "^$$ws*:test-result:$$ws*XPASS" $$results | wc -l`; \ + error=`grep "^$$ws*:test-result:$$ws*ERROR" $$results | wc -l`; \ + if test `expr $$fail + $$xpass + $$error` -eq 0; then \ + success=true; \ + else \ + success=false; \ + fi; \ + br='==================='; br=$$br$$br$$br$$br; \ + result_count () \ + { \ + if test x"$$1" = x"--maybe-color"; then \ + maybe_colorize=yes; \ + elif test x"$$1" = x"--no-color"; then \ + maybe_colorize=no; \ + else \ + echo "$@: invalid 'result_count' usage" >&2; exit 4; \ + fi; \ + shift; \ + desc=$$1 count=$$2; \ + if test $$maybe_colorize = yes && test $$count -gt 0; then \ + color_start=$$3 color_end=$$std; \ + else \ + color_start= color_end=; \ + fi; \ + echo "$${color_start}# $$desc $$count$${color_end}"; \ + }; \ + create_testsuite_report () \ + { \ + result_count $$1 "TOTAL:" $$all "$$brg"; \ + result_count $$1 "PASS: " $$pass "$$grn"; \ + result_count $$1 "SKIP: " $$skip "$$blu"; \ + result_count $$1 "XFAIL:" $$xfail "$$lgn"; \ + result_count $$1 "FAIL: " $$fail "$$red"; \ + result_count $$1 "XPASS:" $$xpass "$$red"; \ + result_count $$1 "ERROR:" $$error "$$mgn"; \ + }; \ + { \ + echo "$(PACKAGE_STRING): $(subdir)/$(TEST_SUITE_LOG)" | \ + $(am__rst_title); \ + create_testsuite_report --no-color; \ + echo; \ + echo ".. contents:: :depth: 2"; \ + echo; \ + for b in $$bases; do echo $$b; done \ + | $(am__create_global_log); \ + } >$(TEST_SUITE_LOG).tmp || exit 1; \ + mv $(TEST_SUITE_LOG).tmp $(TEST_SUITE_LOG); \ + if $$success; then \ + col="$$grn"; \ + else \ + col="$$red"; \ + test x"$$VERBOSE" = x || cat $(TEST_SUITE_LOG); \ + fi; \ + echo "$${col}$$br$${std}"; \ + echo "$${col}Testsuite summary"$(AM_TESTSUITE_SUMMARY_HEADER)"$${std}"; \ + echo "$${col}$$br$${std}"; \ + create_testsuite_report --maybe-color; \ + echo "$$col$$br$$std"; \ + if $$success; then :; else \ + echo "$${col}See $(subdir)/$(TEST_SUITE_LOG)$${std}"; \ + if test -n "$(PACKAGE_BUGREPORT)"; then \ + echo "$${col}Please report to $(PACKAGE_BUGREPORT)$${std}"; \ + fi; \ + echo "$$col$$br$$std"; \ + fi; \ + $$success || exit 1 + +check-TESTS: $(check_PROGRAMS) + @list='$(RECHECK_LOGS)'; test -z "$$list" || rm -f $$list + @list='$(RECHECK_LOGS:.log=.trs)'; test -z "$$list" || rm -f $$list + @test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) + @set +e; $(am__set_TESTS_bases); \ + log_list=`for i in $$bases; do echo $$i.log; done`; \ + trs_list=`for i in $$bases; do echo $$i.trs; done`; \ + log_list=`echo $$log_list`; trs_list=`echo $$trs_list`; \ + $(MAKE) $(AM_MAKEFLAGS) $(TEST_SUITE_LOG) TEST_LOGS="$$log_list"; \ + exit $$?; +recheck: all $(check_PROGRAMS) + @test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) + @set +e; $(am__set_TESTS_bases); \ + bases=`for i in $$bases; do echo $$i; done \ + | $(am__list_recheck_tests)` || exit 1; \ + log_list=`for i in $$bases; do echo $$i.log; done`; \ + log_list=`echo $$log_list`; \ + $(MAKE) $(AM_MAKEFLAGS) $(TEST_SUITE_LOG) \ + am__force_recheck=am--force-recheck \ + TEST_LOGS="$$log_list"; \ + exit $$? +basic/basic.log: basic/basic$(EXEEXT) + @p='basic/basic$(EXEEXT)'; \ + b='basic/basic'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +basicsplit/basicsplit.log: basicsplit/basicsplit$(EXEEXT) + @p='basicsplit/basicsplit$(EXEEXT)'; \ + b='basicsplit/basicsplit'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +testmap/testmap.log: testmap/testmap$(EXEEXT) + @p='testmap/testmap$(EXEEXT)'; \ + b='testmap/testmap'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +clinfo/clinfo.log: clinfo/clinfo$(EXEEXT) + @p='clinfo/clinfo$(EXEEXT)'; \ + b='clinfo/clinfo'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +matmul/matmul.log: matmul/matmul$(EXEEXT) + @p='matmul/matmul$(EXEEXT)'; \ + b='matmul/matmul'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +mansched/mansched.log: mansched/mansched$(EXEEXT) + @p='mansched/mansched$(EXEEXT)'; \ + b='mansched/mansched'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +.test.log: + @p='$<'; \ + $(am__set_b); \ + $(am__check_pre) $(TEST_LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_TEST_LOG_DRIVER_FLAGS) $(TEST_LOG_DRIVER_FLAGS) -- $(TEST_LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +@am__EXEEXT_TRUE@.test$(EXEEXT).log: +@am__EXEEXT_TRUE@ @p='$<'; \ +@am__EXEEXT_TRUE@ $(am__set_b); \ +@am__EXEEXT_TRUE@ $(am__check_pre) $(TEST_LOG_DRIVER) --test-name "$$f" \ +@am__EXEEXT_TRUE@ --log-file $$b.log --trs-file $$b.trs \ +@am__EXEEXT_TRUE@ $(am__common_driver_flags) $(AM_TEST_LOG_DRIVER_FLAGS) $(TEST_LOG_DRIVER_FLAGS) -- $(TEST_LOG_COMPILE) \ +@am__EXEEXT_TRUE@ "$$tst" $(AM_TESTS_FD_REDIRECT) +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am + $(MAKE) $(AM_MAKEFLAGS) $(check_PROGRAMS) + $(MAKE) $(AM_MAKEFLAGS) check-TESTS +check: check-am +all-am: Makefile $(PROGRAMS) +installdirs: + for dir in "$(DESTDIR)$(examplebindir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + -test -z "$(TEST_LOGS)" || rm -f $(TEST_LOGS) + -test -z "$(TEST_LOGS:.log=.trs)" || rm -f $(TEST_LOGS:.log=.trs) + -test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) + +clean-generic: + -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + -rm -f basic/$(DEPDIR)/$(am__dirstamp) + -rm -f basic/$(am__dirstamp) + -rm -f basicsplit/$(DEPDIR)/$(am__dirstamp) + -rm -f basicsplit/$(am__dirstamp) + -rm -f clinfo/$(DEPDIR)/$(am__dirstamp) + -rm -f clinfo/$(am__dirstamp) + -rm -f mandelbrot/$(DEPDIR)/$(am__dirstamp) + -rm -f mandelbrot/$(am__dirstamp) + -rm -f mansched/$(DEPDIR)/$(am__dirstamp) + -rm -f mansched/$(am__dirstamp) + -rm -f matmul/$(DEPDIR)/$(am__dirstamp) + -rm -f matmul/$(am__dirstamp) + -rm -f testmap/$(DEPDIR)/$(am__dirstamp) + -rm -f testmap/$(am__dirstamp) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-checkPROGRAMS clean-examplebinPROGRAMS clean-generic \ + clean-libtool clean-noinstPROGRAMS mostlyclean-am + +distclean: distclean-am + -rm -f ./$(DEPDIR)/loader-loader.Po + -rm -f basic/$(DEPDIR)/basic.Po + -rm -f basicsplit/$(DEPDIR)/basicsplit.Po + -rm -f clinfo/$(DEPDIR)/clinfo.Po + -rm -f mandelbrot/$(DEPDIR)/mandelbrot.Po + -rm -f mansched/$(DEPDIR)/mansched.Po + -rm -f matmul/$(DEPDIR)/matmul.Po + -rm -f testmap/$(DEPDIR)/testmap.Po + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: install-examplebinPROGRAMS + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f ./$(DEPDIR)/loader-loader.Po + -rm -f basic/$(DEPDIR)/basic.Po + -rm -f basicsplit/$(DEPDIR)/basicsplit.Po + -rm -f clinfo/$(DEPDIR)/clinfo.Po + -rm -f mandelbrot/$(DEPDIR)/mandelbrot.Po + -rm -f mansched/$(DEPDIR)/mansched.Po + -rm -f matmul/$(DEPDIR)/matmul.Po + -rm -f testmap/$(DEPDIR)/testmap.Po + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-examplebinPROGRAMS + +.MAKE: check-am install-am install-strip + +.PHONY: CTAGS GTAGS TAGS all all-am am--depfiles check check-TESTS \ + check-am clean clean-checkPROGRAMS clean-examplebinPROGRAMS \ + clean-generic clean-libtool clean-noinstPROGRAMS cscopelist-am \ + ctags ctags-am distclean distclean-compile distclean-generic \ + distclean-libtool distclean-tags distdir dvi dvi-am html \ + html-am info info-am install install-am install-data \ + install-data-am install-dvi install-dvi-am \ + install-examplebinPROGRAMS install-exec install-exec-am \ + install-html install-html-am install-info install-info-am \ + install-man install-pdf install-pdf-am install-ps \ + install-ps-am install-strip installcheck installcheck-am \ + installdirs maintainer-clean maintainer-clean-generic \ + mostlyclean mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool pdf pdf-am ps ps-am recheck tags tags-am \ + uninstall uninstall-am uninstall-examplebinPROGRAMS + +.PRECIOUS: Makefile + +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@.cu.o: +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ @$(MKDIR_P) `dirname $@` +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ $(V_mynvcc)grep 'extern *"C" *void *' $< | sed -ne 's/extern *"C" *void *\([a-zA-Z0-9_]*\) *(.*/void \1(void) {}/p' | $(CC) -x c - -o $@ -c + +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.cubin: +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) -cubin $< -o $@ $(NVCCFLAGS) + +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.o: +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) $< -c -o $@ $(NVCCFLAGS) +@STARPU_USE_HIP_TRUE@.hip.o: +@STARPU_USE_HIP_TRUE@ $(V_hipcc) $(HIPCC) $< -c -o $@ $(HIPCCFLAGS) + +STARPU_MPI_NP ?= 4 + +showcheckfailed: + @ for x in $(shell grep -l "^FAIL " $(TEST_LOGS) /dev/null 2>/dev/null) ; do cat $$x ; done + @RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i showcheckfailed || RET=1 ; \ + done ; \ + exit $$RET + +showfailed: + @! grep "^FAIL " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "ERROR: AddressSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "WARNING: AddressSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "ERROR: ThreadSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "WARNING: ThreadSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "ERROR: LeakSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "WARNING: LeakSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l " runtime error: " $(TEST_LOGS) /dev/null 2>/dev/null + @RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -s -C $$i showfailed || RET=1 ; \ + done ; \ + exit $$RET + +showcheck: + -cat $(TEST_LOGS) /dev/null + @! grep -q "ERROR: AddressSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q "WARNING: AddressSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q "ERROR: ThreadSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q "WARNING: ThreadSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q "ERROR: LeakSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q "WARNING: LeakSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q " runtime error: " $(TEST_LOGS) /dev/null + RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i showcheck || RET=1 ; \ + done ; \ + exit $$RET + +showsuite: + -cat $(TEST_SUITE_LOG) /dev/null + @! grep -q "ERROR: AddressSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q "WARNING: AddressSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q "ERROR: ThreadSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q "WARNING: ThreadSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q "ERROR: LeakSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q "WARNING: LeakSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q " runtime error: " $(TEST_SUITE_LOG) /dev/null + RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i showsuite || RET=1 ; \ + done ; \ + exit $$RET + +@STARPU_SIMGRID_TRUE@export STARPU_PERF_MODEL_DIR=$(abs_top_srcdir)/tools/perfmodels/sampling +@STARPU_SIMGRID_TRUE@export STARPU_HOSTNAME=mirage +@STARPU_SIMGRID_TRUE@export MALLOC_PERTURB_=0 + +@STARPU_SIMGRID_TRUE@env: +@STARPU_SIMGRID_TRUE@ @echo export STARPU_PERF_MODEL_DIR=$(STARPU_PERF_MODEL_DIR) +@STARPU_SIMGRID_TRUE@ @echo export STARPU_HOSTNAME=$(STARPU_HOSTNAME) +@STARPU_SIMGRID_TRUE@ @echo export MALLOC_PERTURB_=$(MALLOC_PERTURB_) + +@STARPU_SIMGRID_TRUE@export STARPU_SIMGRID=1 + +@STARPU_QUICK_CHECK_TRUE@export STARPU_QUICK_CHECK=1 + +@STARPU_LONG_CHECK_TRUE@export STARPU_LONG_CHECK=1 + +# +# Test loading goes through a lot of launchers: +# +# - $(LAUNCHER) is called first, to run the test through starpu_msexec, i.e. +# either mpirun or starpu_tcpipexec +# +# - $(LOADER), i.e. tests/loader, is then called to implement timeout, running +# gdb, etc. But if it detects that the test is a .sh script, it just executes +# it +# +# - $(STARPU_CHECK_LAUNCHER) $(STARPU_CHECK_LAUNCHER_ARGS) is called by loader +# to run the program through e.g. valgrind.sh +# +# When the program is a shell script, additionally: +# +# - $(STARPU_SUB_PARALLEL) is called to control parallelism (see below) +# +# - $(MS_LAUNCHER) is called to run the test through starpu_msexec +# +# - $(STARPU_LAUNCH) was set by tests/loader to its own path, to run the program +# through it. +# +# - $(STARPU_CHECK_LAUNCHER) $(STARPU_CHECK_LAUNCHER_ARGS) is called by loader +# + +export LAUNCHER +@HAVE_PARALLEL_TRUE@export STARPU_SUB_PARALLEL + +export MS_LAUNCHER + +LAUNCHER ?= +MS_LAUNCHER ?= +@STARPU_HAVE_WINDOWS_FALSE@LOADER ?= ./loader + +LSAN_OPTIONS ?= suppressions=$(abs_top_srcdir)/tools/dev/lsan/suppressions +TSAN_OPTIONS ?= suppressions=$(abs_top_srcdir)/tools/dev/tsan/starpu.suppr +export LSAN_OPTIONS +export TSAN_OPTIONS + +#mandelbrot_mandelbrot_CPPFLAGS = $(AM_CPPFLAGS) +#if STARPU_HAVE_X11 +#mandelbrot_mandelbrot_CPPFLAGS += $(X_CFLAGS) +#mandelbrot_mandelbrot_LDADD = $(X_PRE_LIBS) $(X_LIBS) -lX11 $(X_EXTRA_LIBS) +#endif + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/socl/examples/basic/basic.c b/socl/examples/basic/basic.c new file mode 100644 index 0000000..5641b08 --- /dev/null +++ b/socl/examples/basic/basic.c @@ -0,0 +1,253 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + + +#include +#include +#include +#include + +#ifdef __APPLE_CC__ +#include +#else +#include +#endif + +#define error(...) do { fprintf(stderr, "Error: " __VA_ARGS__); exit(EXIT_FAILURE); } while(0) +#define check(err, str) do { if(err != CL_SUCCESS) { fprintf(stderr, "OpenCL Error (%d): %s\n",err, str); exit(EXIT_FAILURE); }} while(0) + +#ifdef UNUSED +#elif defined(__GNUC__) +# define UNUSED(x) UNUSED_ ## x __attribute__((unused)) +#else +# define UNUSED(x) x +#endif + +#define SIZE 1024 +#define TYPE float +#define REALSIZE (SIZE * sizeof(TYPE)) + +const char *kernel_src = "__kernel void add(__global float*s1, __global float*s2, __global float*d) { \ + size_t x = get_global_id(0);\n \ + size_t y = get_global_id(1);\n \ + size_t w = get_global_size(0); \n \ + int idx = y*w+x; \n \ +#ifdef SOCL_DEVICE_TYPE_GPU \n \ + d[idx] = s1[idx] + s2[idx];\n \ +#endif \n \ +#ifdef SOCL_DEVICE_TYPE_CPU \n \ + d[idx] = s1[idx] + 2* s2[idx];\n \ +#endif \n \ +#ifdef SOCL_DEVICE_TYPE_ACCELERATOR \n \ + d[idx] = s1[idx] + 3 * s2[idx];\n \ +#endif \n \ +#ifdef SOCL_DEVICE_TYPE_UNKNOWN \n \ + d[idx] = s1[idx] + 4 * s2[idx];\n \ +#endif \n \ +}"; + +int main(int UNUSED(argc), char** UNUSED(argv)) +{ + cl_platform_id platforms[15]; + cl_uint num_platforms; + cl_device_id devices[15]; + cl_uint num_devices; + cl_context context; + cl_program program; + cl_kernel kernel; + cl_mem s1m, s2m, dm; + cl_command_queue cq; + cl_int err; + unsigned int i; + + TYPE s1[SIZE],s2[SIZE],d[SIZE]; + + { + for (i=0; i +#include +#include +#include + +#ifdef __APPLE_CC__ +#include +#else +#include +#endif + +#define error(...) do { fprintf(stderr, "Error: " __VA_ARGS__); exit(EXIT_FAILURE); } while(0) +#define check(err, str) do { if(err != CL_SUCCESS) { fprintf(stderr, "OpenCL Error (%d): %s\n",err, str); exit(EXIT_FAILURE); }} while(0) + +#ifdef UNUSED +#elif defined(__GNUC__) +# define UNUSED(x) UNUSED_ ## x __attribute__((unused)) +#else +# define UNUSED(x) x +#endif + +#define SIZE 1024 +#define TYPE float +#define REALSIZE (SIZE * sizeof(TYPE)) + +const char * kernel_src = "__kernel void add(__global float*s1, __global float*s2, __global float*d) { \ + size_t x = get_global_id(0);\n \ + size_t y = get_global_id(1);\n \ + size_t w = get_global_size(0); \n \ + int idx = y*w+x; \n \ +#ifdef SOCL_DEVICE_TYPE_GPU \n \ + d[idx] = s1[idx] + s2[idx];\n \ +#endif \n \ +#ifdef SOCL_DEVICE_TYPE_CPU \n \ + d[idx] = s1[idx] + 2* s2[idx];\n \ +#endif \n \ +#ifdef SOCL_DEVICE_TYPE_ACCELERATOR \n \ + d[idx] = s1[idx] + 3 * s2[idx];\n \ +#endif \n \ +#ifdef SOCL_DEVICE_TYPE_UNKNOWN \n \ + d[idx] = s1[idx] + 4 * s2[idx];\n \ +#endif \n \ +}"; + +cl_kernel kernel; +cl_context context; +TYPE s1[SIZE],s2[SIZE],d[SIZE]; + +typedef cl_int (*split_func_t)(cl_command_queue, cl_uint, cl_uint, const size_t *, const size_t *, const size_t *, const cl_event, cl_event *); + +void add(cl_command_queue cq, cl_uint size, TYPE * _s1, TYPE *_s2, TYPE*_d, cl_uint num_events, cl_event * events, cl_event *event) +{ + cl_int err; + + printf("Creating buffers...\n"); + cl_mem s1m = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, size * sizeof(TYPE), _s1, &err); + check(err, "clCreateBuffer s1"); + cl_mem s2m = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, size * sizeof(TYPE), _s2, &err); + check(err, "clCreateBuffer s2"); + cl_mem dm = clCreateBuffer(context, CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR, size * sizeof(TYPE), _d, &err); + check(err, "clCreateBuffer d"); + + err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &s1m); + check(err, "clSetKernelArg 0"); + err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &s2m); + check(err, "clSetKernelArg 1"); + err = clSetKernelArg(kernel, 2, sizeof(cl_mem), &dm); + check(err, "clSetKernelArg 2"); + + printf("Enqueueing NDRangeKernel...\n"); + size_t local[3] = {16, 1, 1}; + size_t global[3] = {size, 1, 1}; + cl_event eventK; + err = clEnqueueNDRangeKernel(cq, kernel, 3, NULL, global, local, num_events, events, &eventK); + check(err, "clEnqueueNDRangeKernel"); + + clEnqueueMapBuffer(cq, dm, CL_FALSE, CL_MAP_READ, 0, size * sizeof(TYPE), 1, &eventK, event, &err); + check(err, "clEnqueueMapBuffer"); + + clReleaseMemObject(s1m); + clReleaseMemObject(s2m); + clReleaseMemObject(dm); +} + +cl_int split_func(cl_command_queue cq, cl_uint split_factor, void * UNUSED(data), cl_event before, cl_event * after) +{ + cl_event evs[split_factor]; + + printf("Partition with factor %d\n", split_factor); + + cl_uint size = ((SIZE)/split_factor) - (SIZE/split_factor % 16); + cl_uint i; + for (i=0; i +#include + +#ifdef __APPLE_CC__ +#include +#else +#include +#endif + +static inline void checkErr(cl_int err, const char * name) +{ + if (err != CL_SUCCESS) + { + fprintf(stderr, "ERROR: %s (%d)\n", name, err); + exit(1); + } +} + +int main(void) +{ + cl_int err; + cl_uint num_platforms; + + // Platform info + err = clGetPlatformIDs(0, NULL, &num_platforms); + if (num_platforms == 0) + { + printf("No OpenCL platform found.\n"); + exit(77); + } + checkErr(err, "Unable to get platform count"); + + cl_platform_id platforms[num_platforms]; + err = clGetPlatformIDs(num_platforms, platforms, NULL); + checkErr(err, "Unable to get platform list"); + + // Iteratate over platforms + printf("Number of platforms:\t\t\t\t %d\n", num_platforms); + + { + unsigned int i; + for (i=0; i +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if defined(_WIN32) && !defined(__MINGW32__) && !defined(__CYGWIN__) +#include +#else +#include +#endif + +#ifdef STARPU_QUICK_CHECK +/* Quick checks are supposed to be real quick, typically less than 1s each, sometimes 10s + add some extra times for tests which run with all schedulers +*/ +#define DEFAULT_TIMEOUT 100 +#elif !defined(STARPU_LONG_CHECK) +/* Normal checks are supposed to be short enough, typically less than 10s each, sometimes 1-2m */ +#define DEFAULT_TIMEOUT 300 +#else +/* Long checks can be very long */ +#define DEFAULT_TIMEOUT 1000 +#endif +#define AUTOTEST_SKIPPED_TEST 77 + +static pid_t child_pid = 0; +static int timeout; + +#if defined(_WIN32) && !defined(__MINGW32__) && !defined(__CYGWIN__) +static int mygettimeofday(struct timeval *tv, void *tz) +{ + if (tv) + { + FILETIME ft; + unsigned long long res; + GetSystemTimeAsFileTime(&ft); + /* 100-nanosecond intervals since January 1, 1601 */ + res = ft.dwHighDateTime; + res <<= 32; + res |= ft.dwLowDateTime; + res /= 10; + /* Now we have microseconds */ + res -= (((1970-1601)*365) + 89) * 24ULL * 3600ULL * 1000000ULL; + /* Now we are based on epoch */ + tv->tv_sec = res / 1000000ULL; + tv->tv_usec = res % 1000000ULL; + } +} +#else +#define mygettimeofday(tv,tz) gettimeofday(tv,tz) +#endif + +#ifdef STARPU_GDB_PATH +static int try_launch_gdb(const char *exe, const char *core) +{ +# define GDB_COMMANDS \ + "-ex", "py-list", \ + "-ex", "starpu-tasks", \ + "-ex", "starpu-workers", \ + "-ex", "starpu-print-datas-summary", \ + "-ex", "starpu-memusage", \ + "-ex", "starpu-print-archs", \ + "-ex", "starpu-print-registered-models", \ + "-ex", "bt full", \ + "-ex", "py-bt", \ + "-ex", "thread apply all bt full", \ + "-ex", "thread apply all py-bt", \ + + int err; + pid_t pid; + struct stat st; + const char *top_builddir; + char *gdb; + + err = stat(core, &st); + if (err != 0) + { + fprintf(stderr, "while looking for core file of %s: %s: %m\n", + exe, core); + return -1; + } + + if (!(st.st_mode & S_IFREG)) + { + fprintf(stderr, "%s: not a regular file\n", core); + return -1; + } + + top_builddir = getenv("top_builddir"); + + pid = fork(); + switch (pid) + { + case 0: /* kid */ + if (top_builddir != NULL) + { + /* Run gdb with Libtool. */ + gdb = alloca(strlen(top_builddir) + + sizeof("/libtool") + 1); + strcpy(gdb, top_builddir); + strcat(gdb, "/libtool"); + err = execl(gdb, "gdb", "--mode=execute", + STARPU_GDB_PATH, "--batch", + GDB_COMMANDS + exe, core, NULL); + } + else + { + /* Run gdb directly */ + gdb = STARPU_GDB_PATH; + err = execl(gdb, "gdb", "--batch", + GDB_COMMANDS + exe, core, NULL); + } + if (err != 0) + { + fprintf(stderr, "while launching `%s': %m\n", gdb); + exit(EXIT_FAILURE); + } + exit(EXIT_SUCCESS); + break; + + case -1: + fprintf(stderr, "fork: %m\n"); + return -1; + + default: /* parent */ + { + pid_t who; + int status; + who = waitpid(pid, &status, 0); + if (who != pid) + fprintf(stderr, "while waiting for gdb " + "process %d: %m\n", pid); + } + } + return 0; +# undef GDB_COMMANDS +} +#endif /* STARPU_GDB_PATH */ + +static void launch_gdb(const char *exe) +{ +#ifdef STARPU_GDB_PATH + char s[32]; + snprintf(s, sizeof(s), "core.%d", child_pid); + if (try_launch_gdb(exe, s) < 0) + try_launch_gdb(exe, "core"); +#endif /* STARPU_GDB_PATH */ +} + +static char *test_name; + +static void test_cleaner(int sig) +{ + pid_t child_gid; + int status; + (void) sig; + + // send signal to all loader family members + fprintf(stderr, "[error] test %s has been blocked for %d seconds. Mark it as failed\n", test_name, timeout); + child_gid = getpgid(child_pid); + kill(-child_gid, SIGQUIT); + waitpid(child_pid, &status, 0); + launch_gdb(test_name); + raise(SIGALRM); + exit(EXIT_FAILURE); +} + +static void forwardsig(int sig) +{ + pid_t child_gid; + child_gid = getpgid(child_pid); + kill(-child_gid, sig); +} + +static int _decode(char **src, char *motif, const char *value) +{ + char *found; + + found = strstr(*src, motif); + if (found == NULL) return 0; + + char *new_src = calloc(1, strlen(*src)-strlen(motif)+strlen(value)+1); + + strncpy(new_src, *src, found - *src); + strcat(new_src, value); + strcat(new_src, found+strlen(motif)); + + *src = new_src; + return 1; +} + +static void decode(char **src, char *motif, const char *value) +{ + if (*src) + { + if (strstr(*src, motif) && value == NULL) + { + fprintf(stderr, "error: $%s undefined\n", motif); + exit(EXIT_FAILURE); + } + int d = _decode(src, motif, value); + while (d) + d = _decode(src, motif, value); + } +} + +int main(int argc, char *argv[]) +{ + int child_exit_status; + char *test_args; + char *launcher; + char *launcher_args; + char *libtool; + char *cflags; + const char *top_builddir = getenv("top_builddir"); + struct sigaction sa; + int ret; + struct timeval start; + struct timeval end; + double timing; + int x=1; + int asan = 0, lsan = 0, tsan = 0, usan = 0; + + (void) argc; + test_args = NULL; + timeout = 0; + + launcher=getenv("STARPU_CHECK_LAUNCHER"); + launcher_args=getenv("STARPU_CHECK_LAUNCHER_ARGS"); + cflags = getenv("CFLAGS"); + if (cflags) + { + if (strstr(cflags, "-fsanitize=address")) + asan = 1; + if (strstr(cflags, "-fsanitize=leak")) + lsan = 1; + if (strstr(cflags, "-fsanitize=thread")) + tsan = 1; + if (strstr(cflags, "-fsanitize=undefined")) + usan = 1; + } + + if (argv[x] && strcmp(argv[x], "-t") == 0) + { + timeout = strtol(argv[x+1], NULL, 10); + x += 2; + } + else if (getenv("STARPU_TIMEOUT_ENV")) + { + /* get user-defined iter_max value */ + timeout = strtol(getenv("STARPU_TIMEOUT_ENV"), NULL, 10); + } + else if (timeout <= 0) + { + timeout = DEFAULT_TIMEOUT; + if ((launcher && strstr(launcher, "valgrind")) || + (launcher && strstr(launcher, "helgrind")) || + tsan) + timeout *= 20; + if (asan || usan || lsan || + (launcher && strstr(launcher, "compute-sanitizer"))) + timeout *= 5; + + if (timeout > 1750) + timeout = 1750; + } + +#ifdef STARPU_SIMGRID +#ifdef STARPU_DEBUG + timeout *= 20; +#endif +#endif + +#ifdef STARPU_USE_MPI_MASTER_SLAVE + /* compare values between the 2 values of timeout */ + if (getenv("MPIEXEC_TIMEOUT")) + { + int mpiexec_timeout = strtol(getenv("MPIEXEC_TIMEOUT"), NULL, 10); + if (mpiexec_timeout != timeout) + fprintf(stderr, "[warning] MPIEXEC_TIMEOUT and STARPU_TIMEOUT_ENV values are different (%d and %d). The behavior may be different than expected !\n", mpiexec_timeout, timeout); + } +#endif + + if (argv[x] && strcmp(argv[x], "-p") == 0) + { + test_name = malloc(strlen(argv[x+1]) + 1 + strlen(argv[x+2]) + 1); + sprintf(test_name, "%s/%s", argv[x+1], argv[x+2]); + x += 3; + } + else + { + test_name = argv[x]; + x += 1; + } + + if (!test_name) + { + fprintf(stderr, "[error] Need name of program to start\n"); + exit(EXIT_FAILURE); + } + + size_t len = strlen(test_name); + if (len >= 3 && + test_name[len-3] == '.' && + test_name[len-2] == 's' && + test_name[len-1] == 'h') + { + /* This is a shell script, don't run ourself on bash, but make + * the script call us for each program invocation */ + + char *launch = NULL; + if (top_builddir == NULL) + // this may fail if .libs is in the directory path + setenv("STARPU_LAUNCH", argv[0], 1); + else + { + launch = malloc(strlen(top_builddir) + strlen("/tests/loader") + 1); + strcpy(launch, top_builddir); + strcat(launch, "/tests/loader"); + setenv("STARPU_LAUNCH", launch, 1); + } + + execvp(test_name, argv+x-1); + + fprintf(stderr, "[error] '%s' failed to exec. test marked as failed\n", test_name); + free(launch); + exit(EXIT_FAILURE); + } + + if (strstr(test_name, "spmv/dw_block_spmv")) + { + test_args = (char *) calloc(512, sizeof(char)); + snprintf(test_args, 512, "%s/examples/spmv/matrix_market/examples/fidapm05.mtx", STARPU_SRC_DIR); + } + else if (strstr(test_name, "starpu_perfmodel_display")) + { + if (x >= argc) + test_args = strdup("-l"); + } + else if (strstr(test_name, "starpu_perfmodel_plot")) + { + if (x >= argc) + test_args = strdup("-l"); + } + + /* get launcher program */ + if (launcher_args) + launcher_args=strdup(launcher_args); + + if (top_builddir == NULL) + { + fprintf(stderr, + "warning: $top_builddir undefined, " + "so $STARPU_CHECK_LAUNCHER ignored\n"); + launcher = NULL; + launcher_args = NULL; + libtool = NULL; + } + else + { + libtool = malloc(strlen(top_builddir) + 1 + strlen("libtool") + 1); + strcpy(libtool, top_builddir); + strcat(libtool, "/libtool"); + } + + if (launcher) + { + const char *top_srcdir = getenv("top_srcdir"); + decode(&launcher, "@top_srcdir@", top_srcdir); + decode(&launcher_args, "@top_srcdir@", top_srcdir); + } + + setenv("STARPU_OPENCL_PROGRAM_DIR", STARPU_SRC_DIR, 1); + + /* set SIGALARM handler */ + sa.sa_flags = SA_RESETHAND | SA_NODEFER; + sigemptyset(&sa.sa_mask); + sa.sa_handler = test_cleaner; + if (-1 == sigaction(SIGALRM, &sa, NULL)) + perror("sigaction"); + + signal(SIGINT, forwardsig); + signal(SIGHUP, forwardsig); + signal(SIGPIPE, forwardsig); + signal(SIGTERM, forwardsig); + + child_pid = fork(); + if (child_pid == 0) + { + char *launcher_argv[100]; + int i=0; + + setpgid(0, 0); + + /* "Launchers" such as Valgrind need to be inserted + * after the Libtool-generated wrapper scripts, hence + * this special-case. */ + if (launcher && top_builddir != NULL) + { + launcher_argv[i++] = libtool; + launcher_argv[i++] = "--mode=execute"; + launcher_argv[i++] = launcher; + if (launcher_args) + { + launcher_argv[i++] = strtok(launcher_args, " "); + while (launcher_argv[i-1]) + { + launcher_argv[i++] = strtok(NULL, " "); + } + } + } + + launcher_argv[i++] = test_name; + if (test_args) + launcher_argv[i++] = test_args; + else while (argv[x]) + { + launcher_argv[i++] = argv[x++]; + } +#ifdef STARPU_SIMGRID +#ifdef STARPU_DEBUG + launcher_argv[i++] = "--cfg=contexts/factory:thread"; +#endif +#endif + launcher_argv[i++] = NULL; + execvp(*launcher_argv, launcher_argv); + + fprintf(stderr, "[error] '%s' failed to exec. test marked as failed\n", test_name); + exit(EXIT_FAILURE); + } + if (child_pid == -1) + { + fprintf(stderr, "[error] fork. test marked as failed\n"); + exit(EXIT_FAILURE); + } + free(test_args); + free(libtool); + + ret = EXIT_SUCCESS; + gettimeofday(&start, NULL); + alarm(timeout); + if (child_pid == waitpid(child_pid, &child_exit_status, 0)) + { + if (WIFEXITED(child_exit_status)) + { + int status = WEXITSTATUS(child_exit_status); + if (status == EXIT_SUCCESS) + { + alarm(0); + } + else + { + if (status != AUTOTEST_SKIPPED_TEST) + fprintf(stdout, "`%s' exited with return code %d\n", + test_name, status); + ret = status; + } + } + else if (WIFSIGNALED(child_exit_status)) + { + fprintf(stderr, "[error] `%s' killed with signal %d; test marked as failed\n", + test_name, WTERMSIG(child_exit_status)); + launch_gdb(test_name); + ret = EXIT_FAILURE; + } + else + { + fprintf(stderr, "[error] `%s' did not terminate normally; test marked as failed\n", + test_name); + ret = EXIT_FAILURE; + } + } + + gettimeofday(&end, NULL); + timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec)); + fprintf(stderr, "#Execution_time_in_seconds %f %s\n", timing/1000000, test_name); + + return ret; +} diff --git a/socl/examples/mandelbrot/mandelbrot.c b/socl/examples/mandelbrot/mandelbrot.c new file mode 100644 index 0000000..ac05718 --- /dev/null +++ b/socl/examples/mandelbrot/mandelbrot.c @@ -0,0 +1,543 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + + +#include +#include +#include +#include + +/* Uncomment this to activate X11 display */ +//#define USE_X11 + +#define SHORT_LOG 1 +#define ROUND_ROBIN + +#ifdef USE_X11 +#include +#include +int use_x11 = 1; +#else +int use_x11 = 0; +#endif + +int demo = 0; +int frames = -1; + + +#include +#include +#include + +#ifdef __APPLE_CC__ +#include +#else +#include +#endif + +#define error(...) do { fprintf(stderr, "Error: " __VA_ARGS__); exit(EXIT_FAILURE); } while(0) +#define check(err, str) do { if(err != CL_SUCCESS) { fprintf(stderr, "OpenCL Error (%d): %s\n",err, str); exit(EXIT_FAILURE); }} while(0) + +#ifdef UNUSED +#elif defined(__GNUC__) +# define UNUSED(x) UNUSED_ ## x __attribute__((unused)) +#else +# define UNUSED(x) x +#endif + +const char * kernel_src = "\ +#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n\ +#define TYPE double \n\ +#define MIN(a,b) (((a)<(b))? (a) : (b))\n\ + __kernel void mandelbrot_kernel(__global uint * a,\n \ + TYPE leftX, TYPE topY,\n \ + TYPE stepX, TYPE stepY,\n \ + uint maxIt, uint iby, uint block_size)\n \ +{\n \ + TYPE xc = leftX + get_global_id(0) * stepX;\n \ + TYPE yc = iby*block_size*stepY + topY + get_global_id(1) * stepY;\n \ + int it;\n \ + TYPE x,y;\n \ + x = y = (TYPE)0.0;\n \ + for (it=0;it (TYPE)4) break; \n \ + TYPE twoxy = (TYPE)2*x*y;\n \ + x = x2 - y2 + xc;\n \ + y = twoxy + yc;\n \ + }\n \ + uint v = MIN((1024*((float)(it)/(2000))), 256);\n \ + a[get_global_id(0) + get_global_id(1)*get_global_size(0)] = (v<<16|(255-v)<<8); \n \ +}"; + +static cl_uint nblocks = 8; +static cl_uint height = 768; +static cl_uint width = 1024; +static cl_uint maxIt = 20000; + +static cl_uint group_size = 64; + +static double leftX = -0.745; +static double rightX = -0.74375; +static double topY = .15; +static double bottomY = .14875; + +#ifdef USE_X11 +/* X11 data */ +static Display *dpy; +static Window win; +static XImage *bitmap; +static GC gc; +static KeySym Left=-1, Right, Down, Up, Alt ; +static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; + +static void exit_x11(void) +{ + XDestroyImage(bitmap); + XDestroyWindow(dpy, win); + XCloseDisplay(dpy); +} + +static void init_x11(int width, int height, cl_uint *buffer) +{ + /* Attempt to open the display */ + dpy = XOpenDisplay(NULL); + + /* Failure */ + if (!dpy) + exit(0); + + unsigned long white = WhitePixel(dpy,DefaultScreen(dpy)); + unsigned long black = BlackPixel(dpy,DefaultScreen(dpy)); + + win = XCreateSimpleWindow(dpy, DefaultRootWindow(dpy), 0, 0, + width, height, 0, black, white); + + /* We want to be notified when the window appears */ + XSelectInput(dpy, win, StructureNotifyMask); + + /* Make it appear */ + XMapWindow(dpy, win); + + XTextProperty tp; + char name[128] = "Mandelbrot"; + char *n = name; + Status st = XStringListToTextProperty(&n, 1, &tp); + if (st) + XSetWMName(dpy, win, &tp); + + /* Wait for the MapNotify event */ + XFlush(dpy); + + int depth = DefaultDepth(dpy, DefaultScreen(dpy)); + Visual *visual = DefaultVisual(dpy, DefaultScreen(dpy)); + + /* Make bitmap */ + bitmap = XCreateImage(dpy, visual, depth, + ZPixmap, 0, (char *)buffer, + width, height, 32, 0); + + /* Init GC */ + gc = XCreateGC(dpy, win, 0, NULL); + XSetForeground(dpy, gc, black); + + XSelectInput(dpy, win, ExposureMask | KeyPressMask | StructureNotifyMask); + + Atom wmDeleteMessage; + wmDeleteMessage = XInternAtom(dpy, "WM_DELETE_WINDOW", False); + XSetWMProtocols(dpy, win, &wmDeleteMessage, 1); + + Left = XStringToKeysym ("Left"); + Right = XStringToKeysym ("Right"); + Up = XStringToKeysym ("Up"); + Down = XStringToKeysym ("Down"); + Alt = XStringToKeysym ("Alt"); +} + +static int handle_events(void) +{ + XEvent event; + XNextEvent(dpy, &event); + + KeySym key; + char text[255]; + + double coef = 0.05; + + if (event.type == KeyPress) + { + XLookupString(&event.xkey,text,255,&key,0); + if (key == Left) + { + double widthX = rightX - leftX; + leftX -= coef*widthX; + rightX -= coef*widthX; + } + else if (key == Right) + { + double widthX = rightX - leftX; + leftX += coef*widthX; + rightX += coef*widthX; + } + else if (key == Down) + { + double heightY = topY - bottomY; + topY += coef*heightY; + bottomY += coef*heightY; + } + else if (key == Up) + { + double heightY = topY - bottomY; + topY -= coef*heightY; + bottomY -= coef*heightY; + } + else + { + double widthX = rightX - leftX; + double heightY = topY - bottomY; + + if (text[0] == '-') + { + /* Zoom out */ + leftX -= (coef/2)*widthX; + rightX += (coef/2)*widthX; + topY += (coef/2)*heightY; + bottomY -= (coef/2)*heightY; + } + else if (text[0] == '+') + { + /* Zoom in */ + leftX += (coef/2)*widthX; + rightX -= (coef/2)*widthX; + topY -= (coef/2)*heightY; + bottomY += (coef/2)*heightY; + } + } + + if (text[0]=='q') + { + return -1; + } + } + + if (event.type==ButtonPress) + { + /* tell where the mouse Button was Pressed */ + printf("You pressed a button at (%i,%i)\n", + event.xbutton.x,event.xbutton.y); + } + + return 0; +} +#endif //USE_X11 + +static void parse_args(int argc, char **argv) +{ + int i; + for (i = 1; i < argc; i++) + { + if (strcmp(argv[i], "-h") == 0) + { + fprintf(stderr, "Usage: %s [-h] [ -width 1024] [-height 768] [-nblocks 16] [-group_size 64] [-no-x11] [-demo] [-frames N] [-pos leftx:rightx:bottomy:topy]\n", argv[0]); + exit(-1); + } + + if (strcmp(argv[i], "-width") == 0) + { + char *argptr; + width = strtol(argv[++i], &argptr, 10); + } + + if (strcmp(argv[i], "-frames") == 0) + { + char *argptr; + frames = strtol(argv[++i], &argptr, 10); + } + + if (strcmp(argv[i], "-height") == 0) + { + char *argptr; + height = strtol(argv[++i], &argptr, 10); + } + + if (strcmp(argv[i], "-group_size") == 0) + { + char *argptr; + group_size = strtol(argv[++i], &argptr, 10); + } + + if (strcmp(argv[i], "-nblocks") == 0) + { + char *argptr; + nblocks = strtol(argv[++i], &argptr, 10); + } + + if (strcmp(argv[i], "-pos") == 0) + { + int ret = sscanf(argv[++i], "%lf:%lf:%lf:%lf", &leftX, &rightX, &bottomY, &topY); + assert(ret == 4); + } + + if (strcmp(argv[i], "-demo") == 0) + { + demo = 1; + leftX = -50.22749575062760; + rightX = 48.73874621262927; + topY = -49.35016705749115; + bottomY = 49.64891691946615; + + } + + if (strcmp(argv[i], "-no-x11") == 0) + { +#ifdef USE_X11 + use_x11 = 0; +#endif + } + } +} + +int main(int argc, char **argv) +{ +#define MAX_DEVICES 20 + cl_platform_id platforms[15]; + cl_uint num_platforms; + cl_device_id devices[15]; + cl_uint num_devices; + cl_context context; + cl_program program; + cl_kernel kernel; + cl_command_queue cq[MAX_DEVICES]; + cl_int err; + cl_uint i; + + parse_args(argc, argv); + + cl_uint block_size = height/nblocks; + assert((height % nblocks) == 0); + assert((width % group_size) == 0); + + clGetPlatformIDs(0, NULL, &num_platforms); + if (num_platforms == 0) + { + printf("No OpenCL platform found\n"); + exit(0); + } + err = clGetPlatformIDs(sizeof(platforms)/sizeof(cl_platform_id), platforms, NULL); + check(err, "clGetPlatformIDs"); + + unsigned int platform_idx; + for (platform_idx=0; platform_idx +#include +#include +#include + +#ifdef __APPLE_CC__ +#include +#else +#include +#endif + +#define error(...) do { fprintf(stderr, "Error: " __VA_ARGS__); exit(EXIT_FAILURE); } while(0) +#define check(err, str) do { if(err != CL_SUCCESS) { fprintf(stderr, "OpenCL Error (%d): %s\n",err, str); exit(EXIT_FAILURE); }} while(0) + +#ifdef UNUSED +#elif defined(__GNUC__) +# define UNUSED(x) UNUSED_ ## x __attribute__((unused)) +#else +# define UNUSED(x) x +#endif + +#define SIZE 1024 +#define TYPE float +#define REALSIZE (SIZE * sizeof(TYPE)) + +const char * kernel_src = "__kernel void add(__global float*s1, __global float*s2, __global float*d) { \ + size_t x = get_global_id(0); \ + size_t y = get_global_id(1); \ + size_t w = get_global_size(0); \ + int idx = y*w+x; \ + d[idx] = s1[idx] + s2[idx]; \ +}"; + +int main(int UNUSED(argc), char** UNUSED(argv)) +{ + cl_platform_id platforms[15]; + cl_uint num_platforms; + cl_device_id devices[15]; + cl_uint num_devices; + cl_context context; + cl_program program; + cl_kernel kernel; + cl_mem s1m, s2m, dm; + cl_command_queue cq; + unsigned int d; + cl_int err; + + TYPE s1[SIZE],s2[SIZE],dst[SIZE]; + + { + int i; + for (i=0; i +#else +#include +#endif + +#include +#include +#include +#include +#include +#include +#include +#include + +#define error(...) do { fprintf(stderr, "Error: " __VA_ARGS__); exit(EXIT_FAILURE); } while(0) +#define check(exp) do { err = exp; if(err != CL_SUCCESS) { fprintf(stderr, "OpenCL Error (%d): " #exp "\n", err); exit(EXIT_FAILURE); }} while(0) +#define check2(exp) exp; if(err != CL_SUCCESS) { fprintf(stderr, "OpenCL Error (%d): " #exp "\n", err); exit(EXIT_FAILURE); } +#define check3(exp, err) do { if(err != CL_SUCCESS) { fprintf(stderr, "OpenCL Error (%d): " #exp "\n", err); exit(EXIT_FAILURE); } } while(0) + +// Thread block size +#define BLOCK_SIZE 16 // Kernel thread-block size +#define WORK_SIZE 64 // Kernel global size in lines of A (or C) +#define TYPE float + +// Basic Matrix dimensions +#define WA (128L * BLOCK_SIZE) // Matrix A width +#ifdef STARPU_QUICK_CHECK +#define HA (128L * BLOCK_SIZE) // Matrix A height +#else +#define HA (512L * BLOCK_SIZE) // Matrix A height +#endif +#define WB (128L * BLOCK_SIZE) // Matrix B width +#define HB WA // Matrix B height +#define WC WB // Matrix C width +#define HC HA // Matrix C height +#define BLOCKS (HA / WORK_SIZE) + +//////////////////////////////////////////////////////////////////////////////// +// declaration, forward +void printDiff(TYPE*, TYPE*, int, int, int, TYPE); +void computeReference(TYPE*, const TYPE*, const TYPE*, unsigned int, unsigned int, unsigned int); + +#define str(x) #x + +#define CODE "\ +#define TYPE float\n\ +__kernel void sgemmNN(int wa, int ha, int wb, __global TYPE* A, __global TYPE* B, __global TYPE* C) {\n\ + #define BS 16\n \ + #define BLOCK_SIZE 16\n \ + int bx = get_group_id(0);\n \ + int by = get_group_id(1);\n \ + \n \ + int tx = get_local_id(0);\n \ + int ty = get_local_id(1);\n \ + \n \ + int gx = get_global_id(0);\n \ + int gy = get_global_id(1);\n \ + __local float As[BS][BS+1]; \ + __local float Bs[BS][BS+1]; \ + \n \ + unsigned int block_w = min(wb - bx * BLOCK_SIZE, BLOCK_SIZE);\n \ + unsigned int block_h = min(ha - by * BLOCK_SIZE, BLOCK_SIZE);\n \ + \n \ + int valid = (gx < wb && gy < ha);\n \ + \n \ + TYPE Csub = (TYPE)0.0;\n \ + \n \ + int pos = 0;\n \ + while (pos < wa) {\n \ + unsigned int size = min(wa-pos, BLOCK_SIZE);\n \ + if (tx < size && gy < ha)\n \ + As[tx][ty] = A[pos + tx + wa * gy];\n \ + if (ty < size && gx < wb)\n \ + Bs[tx][ty] = B[gx + wb * (pos+ty)];\n \ + \n \ + barrier(CLK_LOCAL_MEM_FENCE);\n \ + \n \ + if (valid) {\n \ + for (int k = 0; k < size; ++k)\n \ + Csub += As[k][ty] * Bs[tx][k];\n \ + }\n \ + pos += size;\n \ + barrier(CLK_LOCAL_MEM_FENCE);\n \ + }\n \ + \n \ + if (valid)\n \ + C[wb * gy + gx] = Csub;\n \ +}" + +static char * code = CODE; + +int check = 0; + +static void __attribute__((unused)) parse_args(int argc, const char **argv) +{ + int i; + for (i = 1; i < argc; i++) + { + if (strcmp(argv[i], "-check") == 0) + { + check = 1; + } + + if (strcmp(argv[i], "-h") == 0) + { + printf("usage : %s [-check]\n", argv[0]); + } + } +} + +// Round Up Division function +size_t roundUp(int group_size, int global_size) +{ + int r = global_size % group_size; + if(r == 0) + { + return global_size; + } + else + { + return global_size + group_size - r; + } +} + +void fillArray(TYPE* data, int size) +{ + int i; + const TYPE fScale = (TYPE)(1.0f / (float)RAND_MAX); + for (i = 0; i < size; ++i) + { + data[i] = fScale * rand(); + } +} + +void printArray(float* data, int size) +{ + int i; + for (i = 0; i < size; ++i) + { + printf("%d: %.3f\n", i, data[i]); + } +} + +/** + * Compare two float arrays using L2-norm with an epsilon tolerance for equality + * @return shrTRUE if \a reference and \a data are identical, otherwise shrFALSE + * @param reference handle to the reference data / gold image + * @param data handle to the computed data + * @param len number of elements in reference and data + * @param epsilon epsilon to use for the comparison +*/ +int shrCompareL2fe(const float* reference, const float* data, const unsigned int len, const float epsilon) +{ + assert(epsilon >= 0); + + float error = 0; + float ref = 0; + + unsigned int i; + for(i = 0; i < len; ++i) + { + float diff = reference[i] - data[i]; + error += diff * diff; + ref += reference[i] * reference[i]; + } + + float normRef = sqrtf(ref); + if (fabs(ref) < 1e-7) + { +#ifdef _DEBUG + fprintf(stderr, "ERROR, reference l2-norm is 0\n"); +#endif + return 0; + } + float normError = sqrtf(error); + error = normError / normRef; + int result = error < epsilon; +#ifdef _DEBUG + if(!result) + { + fprintf(stderr, "ERROR, l2-norm error %lf is greater than epsilon %lf \n", error, epsilon); + } +#endif + + return result; +} + + +int main(int argc, const char** argv) +{ + cl_uint platform_count; + cl_platform_id platforms[5]; + + cl_int err = CL_SUCCESS; + unsigned int i, p; + + cl_device_type dev_type = CL_DEVICE_TYPE_ALL; + + void * ptrs[BLOCKS]; + cl_command_queue cqs[BLOCKS]; + cl_mem d_A[BLOCKS]; + cl_mem d_C[BLOCKS]; + cl_mem d_B[BLOCKS]; + + cl_event GPUDone[BLOCKS]; + cl_event GPUExecution[BLOCKS]; + struct timeval start, end; + + int workOffset[BLOCKS]; + int workSize[BLOCKS]; + + unsigned int sizePerGPU = HC / BLOCKS; + unsigned int sizeMod = HC % BLOCKS; + + size_t A_size = WA * HA; + size_t A_mem_size = sizeof(TYPE) * A_size; + TYPE* A_data; + + size_t B_size = WB * HB; + size_t B_mem_size = sizeof(TYPE) * B_size; + TYPE* B_data; + + size_t C_size = WC * HC; + size_t C_mem_size = sizeof(TYPE) * C_size; + TYPE* C_data; + + parse_args(argc, argv); + + check(clGetPlatformIDs(5, platforms, &platform_count)); + if (platform_count == 0) + { + printf("No platform found\n"); + exit(77); + } + + cl_uint device_count; + cl_uint devs[platform_count]; + cl_device_id * devices[platform_count]; + cl_context ctx[platform_count]; + cl_command_queue * commandQueue[platform_count]; + + device_count = 0; + for (p=0; p %.6f...\n", listLength, listTol); + int i,j,k; + int error_count=0; + for (j = 0; j < height; j++) + { + if (error_count < listLength) + { + printf("\n Row %d:\n", j); + } + for (i = 0; i < width; i++) + { + k = j * width + i; + float diff = fabs(data1[k] - data2[k]); + if (diff > listTol) + { + if (error_count < listLength) + { + printf(" Loc(%d,%d)\tCPU=%.5f\tGPU=%.5f\tDiff=%.6f\n", i, j, data1[k], data2[k], diff); + } + error_count++; + } + } + } + printf(" \n Total Errors = %d\n\n", error_count); +} + +/** + * Compute reference data set + * C = A * B + * @param C reference data, computed but preallocated + * @param A matrix A as provided to device + * @param B matrix B as provided to device + * @param hA height of matrix A + * @param wB width of matrix B +*/ +void computeReference(TYPE* C, const TYPE* A, const TYPE* B, unsigned int hA, unsigned int wA, unsigned int wB) +{ + unsigned int i,j,k; + for (i = 0; i < hA; ++i) + for (j = 0; j < wB; ++j) + { + double sum = 0; + for (k = 0; k < wA; ++k) + { + double a = A[i * wA + k]; + double b = B[k * wB + j]; + sum += a * b; + } + C[i * wB + j] = (TYPE)sum; + } +} +#endif /* STARPU_NON_BLOCKING_DRIVERS */ diff --git a/socl/examples/testmap/testmap.c b/socl/examples/testmap/testmap.c new file mode 100644 index 0000000..4d3d980 --- /dev/null +++ b/socl/examples/testmap/testmap.c @@ -0,0 +1,262 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + + +#include +#include +#include +#include + +#ifdef __APPLE_CC__ +#include +#else +#include +#endif + +#define error(...) do { fprintf(stderr, "Error: " __VA_ARGS__); exit(EXIT_FAILURE); } while(0) +#define check(err, str) do { if(err != CL_SUCCESS) { fprintf(stderr, "OpenCL Error (%d): %s\n",err, str); exit(EXIT_FAILURE); }} while(0) + +#ifdef UNUSED +#elif defined(__GNUC__) +# define UNUSED(x) UNUSED_ ## x __attribute__((unused)) +#else +# define UNUSED(x) x +#endif + +#define SIZE 1024 +#define TYPE float +#define REALSIZE (SIZE * sizeof(TYPE)) + +const char * kernel_src = "__kernel void add(__global float*s1, __global float*s2, __global float*d) { \ + size_t x = get_global_id(0);\n \ + size_t y = get_global_id(1);\n \ + size_t w = get_global_size(0); \n \ + int idx = y*w+x; \n \ +#ifdef SOCL_DEVICE_TYPE_GPU \n \ + d[idx] = s1[idx] + s2[idx];\n \ +#endif \n \ +#ifdef SOCL_DEVICE_TYPE_CPU \n \ + d[idx] = s1[idx] + 2* s2[idx];\n \ +#endif \n \ +#ifdef SOCL_DEVICE_TYPE_ACCELERATOR \n \ + d[idx] = s1[idx] + 3 * s2[idx];\n \ +#endif \n \ +#ifdef SOCL_DEVICE_TYPE_UNKNOWN \n \ + d[idx] = s1[idx] + 4 * s2[idx];\n \ +#endif \n \ +}"; + +int main(int UNUSED(argc), char** UNUSED(argv)) +{ + cl_platform_id platforms[15]; + cl_uint num_platforms; + cl_device_id devices[15]; + cl_uint num_devices; + cl_context context; + cl_program program; + cl_kernel kernel; + cl_mem s1m, s2m, dm; + cl_command_queue cq; + cl_int err; + unsigned int i; + + TYPE * s1, *s2, d[SIZE]; + + printf("Querying platform...\n"); + clGetPlatformIDs(0, NULL, &num_platforms); + if (num_platforms == 0) + { + printf("No OpenCL platform found.\n"); + exit(77); + } + + err = clGetPlatformIDs(sizeof(platforms)/sizeof(cl_platform_id), platforms, &num_platforms); + check(err, "clGetPlatformIDs"); + + int platform_idx = -1; + for (i=0; i +#else +#include "cl_platform.h" +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/******************************************************************************/ + +typedef struct _cl_platform_id * cl_platform_id; +typedef struct _cl_device_id * cl_device_id; +typedef struct _cl_context * cl_context; +typedef struct _cl_command_queue * cl_command_queue; +typedef struct _cl_mem * cl_mem; +typedef struct _cl_program * cl_program; +typedef struct _cl_kernel * cl_kernel; +typedef struct _cl_event * cl_event; +typedef struct _cl_sampler * cl_sampler; + +typedef cl_uint cl_bool; /* WARNING! Unlike cl_ types in cl_platform.h, cl_bool is not guaranteed to be the same size as the bool in kernels. */ +typedef cl_ulong cl_bitfield; +typedef cl_bitfield cl_device_type; +typedef cl_uint cl_platform_info; +typedef cl_uint cl_device_info; +typedef cl_bitfield cl_device_fp_config; +typedef cl_uint cl_device_mem_cache_type; +typedef cl_uint cl_device_local_mem_type; +typedef cl_bitfield cl_device_exec_capabilities; +typedef cl_bitfield cl_command_queue_properties; +typedef intptr_t cl_device_partition_property; +typedef cl_bitfield cl_device_affinity_domain; + +typedef intptr_t cl_context_properties; +typedef cl_uint cl_context_info; +typedef cl_uint cl_command_queue_info; +typedef cl_uint cl_channel_order; +typedef cl_uint cl_channel_type; +typedef cl_bitfield cl_mem_flags; +typedef cl_uint cl_mem_object_type; +typedef cl_uint cl_mem_info; +typedef cl_bitfield cl_mem_migration_flags; +typedef cl_uint cl_image_info; +typedef cl_uint cl_buffer_create_type; +typedef cl_uint cl_addressing_mode; +typedef cl_uint cl_filter_mode; +typedef cl_uint cl_sampler_info; +typedef cl_bitfield cl_map_flags; +typedef cl_uint cl_program_info; +typedef cl_uint cl_program_build_info; +typedef cl_uint cl_program_binary_type; +typedef cl_int cl_build_status; +typedef cl_uint cl_kernel_info; +typedef cl_uint cl_kernel_arg_info; +typedef cl_uint cl_kernel_arg_address_qualifier; +typedef cl_uint cl_kernel_arg_access_qualifier; +typedef cl_bitfield cl_kernel_arg_type_qualifier; +typedef cl_uint cl_kernel_work_group_info; +typedef cl_uint cl_event_info; +typedef cl_uint cl_command_type; +typedef cl_uint cl_profiling_info; + + +typedef struct _cl_image_format { + cl_channel_order image_channel_order; + cl_channel_type image_channel_data_type; +} cl_image_format; + +typedef struct _cl_image_desc { + cl_mem_object_type image_type; + size_t image_width; + size_t image_height; + size_t image_depth; + size_t image_array_size; + size_t image_row_pitch; + size_t image_slice_pitch; + cl_uint num_mip_levels; + cl_uint num_samples; + cl_mem buffer; +} cl_image_desc; + +typedef struct _cl_buffer_region { + size_t origin; + size_t size; +} cl_buffer_region; + + +/******************************************************************************/ + +/* Error Codes */ +#define CL_SUCCESS 0 +#define CL_DEVICE_NOT_FOUND -1 +#define CL_DEVICE_NOT_AVAILABLE -2 +#define CL_COMPILER_NOT_AVAILABLE -3 +#define CL_MEM_OBJECT_ALLOCATION_FAILURE -4 +#define CL_OUT_OF_RESOURCES -5 +#define CL_OUT_OF_HOST_MEMORY -6 +#define CL_PROFILING_INFO_NOT_AVAILABLE -7 +#define CL_MEM_COPY_OVERLAP -8 +#define CL_IMAGE_FORMAT_MISMATCH -9 +#define CL_IMAGE_FORMAT_NOT_SUPPORTED -10 +#define CL_BUILD_PROGRAM_FAILURE -11 +#define CL_MAP_FAILURE -12 +#define CL_MISALIGNED_SUB_BUFFER_OFFSET -13 +#define CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST -14 +#define CL_COMPILE_PROGRAM_FAILURE -15 +#define CL_LINKER_NOT_AVAILABLE -16 +#define CL_LINK_PROGRAM_FAILURE -17 +#define CL_DEVICE_PARTITION_FAILED -18 +#define CL_KERNEL_ARG_INFO_NOT_AVAILABLE -19 + +#define CL_INVALID_VALUE -30 +#define CL_INVALID_DEVICE_TYPE -31 +#define CL_INVALID_PLATFORM -32 +#define CL_INVALID_DEVICE -33 +#define CL_INVALID_CONTEXT -34 +#define CL_INVALID_QUEUE_PROPERTIES -35 +#define CL_INVALID_COMMAND_QUEUE -36 +#define CL_INVALID_HOST_PTR -37 +#define CL_INVALID_MEM_OBJECT -38 +#define CL_INVALID_IMAGE_FORMAT_DESCRIPTOR -39 +#define CL_INVALID_IMAGE_SIZE -40 +#define CL_INVALID_SAMPLER -41 +#define CL_INVALID_BINARY -42 +#define CL_INVALID_BUILD_OPTIONS -43 +#define CL_INVALID_PROGRAM -44 +#define CL_INVALID_PROGRAM_EXECUTABLE -45 +#define CL_INVALID_KERNEL_NAME -46 +#define CL_INVALID_KERNEL_DEFINITION -47 +#define CL_INVALID_KERNEL -48 +#define CL_INVALID_ARG_INDEX -49 +#define CL_INVALID_ARG_VALUE -50 +#define CL_INVALID_ARG_SIZE -51 +#define CL_INVALID_KERNEL_ARGS -52 +#define CL_INVALID_WORK_DIMENSION -53 +#define CL_INVALID_WORK_GROUP_SIZE -54 +#define CL_INVALID_WORK_ITEM_SIZE -55 +#define CL_INVALID_GLOBAL_OFFSET -56 +#define CL_INVALID_EVENT_WAIT_LIST -57 +#define CL_INVALID_EVENT -58 +#define CL_INVALID_OPERATION -59 +#define CL_INVALID_GL_OBJECT -60 +#define CL_INVALID_BUFFER_SIZE -61 +#define CL_INVALID_MIP_LEVEL -62 +#define CL_INVALID_GLOBAL_WORK_SIZE -63 +#define CL_INVALID_PROPERTY -64 +#define CL_INVALID_IMAGE_DESCRIPTOR -65 +#define CL_INVALID_COMPILER_OPTIONS -66 +#define CL_INVALID_LINKER_OPTIONS -67 +#define CL_INVALID_DEVICE_PARTITION_COUNT -68 + +/* OpenCL Version */ +#define CL_VERSION_1_0 1 +#define CL_VERSION_1_1 1 +#define CL_VERSION_1_2 1 + +/* cl_bool */ +#define CL_FALSE 0 +#define CL_TRUE 1 +#define CL_BLOCKING CL_TRUE +#define CL_NON_BLOCKING CL_FALSE + +/* cl_platform_info */ +#define CL_PLATFORM_PROFILE 0x0900 +#define CL_PLATFORM_VERSION 0x0901 +#define CL_PLATFORM_NAME 0x0902 +#define CL_PLATFORM_VENDOR 0x0903 +#define CL_PLATFORM_EXTENSIONS 0x0904 + +/* cl_device_type - bitfield */ +#define CL_DEVICE_TYPE_DEFAULT (1 << 0) +#define CL_DEVICE_TYPE_CPU (1 << 1) +#define CL_DEVICE_TYPE_GPU (1 << 2) +#define CL_DEVICE_TYPE_ACCELERATOR (1 << 3) +#define CL_DEVICE_TYPE_CUSTOM (1 << 4) +#define CL_DEVICE_TYPE_ALL 0xFFFFFFFF + +/* cl_device_info */ +#define CL_DEVICE_TYPE 0x1000 +#define CL_DEVICE_VENDOR_ID 0x1001 +#define CL_DEVICE_MAX_COMPUTE_UNITS 0x1002 +#define CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS 0x1003 +#define CL_DEVICE_MAX_WORK_GROUP_SIZE 0x1004 +#define CL_DEVICE_MAX_WORK_ITEM_SIZES 0x1005 +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR 0x1006 +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT 0x1007 +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT 0x1008 +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG 0x1009 +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT 0x100A +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE 0x100B +#define CL_DEVICE_MAX_CLOCK_FREQUENCY 0x100C +#define CL_DEVICE_ADDRESS_BITS 0x100D +#define CL_DEVICE_MAX_READ_IMAGE_ARGS 0x100E +#define CL_DEVICE_MAX_WRITE_IMAGE_ARGS 0x100F +#define CL_DEVICE_MAX_MEM_ALLOC_SIZE 0x1010 +#define CL_DEVICE_IMAGE2D_MAX_WIDTH 0x1011 +#define CL_DEVICE_IMAGE2D_MAX_HEIGHT 0x1012 +#define CL_DEVICE_IMAGE3D_MAX_WIDTH 0x1013 +#define CL_DEVICE_IMAGE3D_MAX_HEIGHT 0x1014 +#define CL_DEVICE_IMAGE3D_MAX_DEPTH 0x1015 +#define CL_DEVICE_IMAGE_SUPPORT 0x1016 +#define CL_DEVICE_MAX_PARAMETER_SIZE 0x1017 +#define CL_DEVICE_MAX_SAMPLERS 0x1018 +#define CL_DEVICE_MEM_BASE_ADDR_ALIGN 0x1019 +#define CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE 0x101A +#define CL_DEVICE_SINGLE_FP_CONFIG 0x101B +#define CL_DEVICE_GLOBAL_MEM_CACHE_TYPE 0x101C +#define CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE 0x101D +#define CL_DEVICE_GLOBAL_MEM_CACHE_SIZE 0x101E +#define CL_DEVICE_GLOBAL_MEM_SIZE 0x101F +#define CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE 0x1020 +#define CL_DEVICE_MAX_CONSTANT_ARGS 0x1021 +#define CL_DEVICE_LOCAL_MEM_TYPE 0x1022 +#define CL_DEVICE_LOCAL_MEM_SIZE 0x1023 +#define CL_DEVICE_ERROR_CORRECTION_SUPPORT 0x1024 +#define CL_DEVICE_PROFILING_TIMER_RESOLUTION 0x1025 +#define CL_DEVICE_ENDIAN_LITTLE 0x1026 +#define CL_DEVICE_AVAILABLE 0x1027 +#define CL_DEVICE_COMPILER_AVAILABLE 0x1028 +#define CL_DEVICE_EXECUTION_CAPABILITIES 0x1029 +#define CL_DEVICE_QUEUE_PROPERTIES 0x102A +#define CL_DEVICE_NAME 0x102B +#define CL_DEVICE_VENDOR 0x102C +#define CL_DRIVER_VERSION 0x102D +#define CL_DEVICE_PROFILE 0x102E +#define CL_DEVICE_VERSION 0x102F +#define CL_DEVICE_EXTENSIONS 0x1030 +#define CL_DEVICE_PLATFORM 0x1031 +#define CL_DEVICE_DOUBLE_FP_CONFIG 0x1032 +/* 0x1033 reserved for CL_DEVICE_HALF_FP_CONFIG */ +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF 0x1034 +#define CL_DEVICE_HOST_UNIFIED_MEMORY 0x1035 +#define CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR 0x1036 +#define CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT 0x1037 +#define CL_DEVICE_NATIVE_VECTOR_WIDTH_INT 0x1038 +#define CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG 0x1039 +#define CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT 0x103A +#define CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE 0x103B +#define CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF 0x103C +#define CL_DEVICE_OPENCL_C_VERSION 0x103D +#define CL_DEVICE_LINKER_AVAILABLE 0x103E +#define CL_DEVICE_BUILT_IN_KERNELS 0x103F +#define CL_DEVICE_IMAGE_MAX_BUFFER_SIZE 0x1040 +#define CL_DEVICE_IMAGE_MAX_ARRAY_SIZE 0x1041 +#define CL_DEVICE_PARENT_DEVICE 0x1042 +#define CL_DEVICE_PARTITION_MAX_SUB_DEVICES 0x1043 +#define CL_DEVICE_PARTITION_PROPERTIES 0x1044 +#define CL_DEVICE_PARTITION_AFFINITY_DOMAIN 0x1045 +#define CL_DEVICE_PARTITION_TYPE 0x1046 +#define CL_DEVICE_REFERENCE_COUNT 0x1047 +#define CL_DEVICE_PREFERRED_INTEROP_USER_SYNC 0x1048 +#define CL_DEVICE_PRINTF_BUFFER_SIZE 0x1049 + +/* cl_device_fp_config - bitfield */ +#define CL_FP_DENORM (1 << 0) +#define CL_FP_INF_NAN (1 << 1) +#define CL_FP_ROUND_TO_NEAREST (1 << 2) +#define CL_FP_ROUND_TO_ZERO (1 << 3) +#define CL_FP_ROUND_TO_INF (1 << 4) +#define CL_FP_FMA (1 << 5) +#define CL_FP_SOFT_FLOAT (1 << 6) +#define CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT (1 << 7) + +/* cl_device_mem_cache_type */ +#define CL_NONE 0x0 +#define CL_READ_ONLY_CACHE 0x1 +#define CL_READ_WRITE_CACHE 0x2 + +/* cl_device_local_mem_type */ +#define CL_LOCAL 0x1 +#define CL_GLOBAL 0x2 + +/* cl_device_exec_capabilities - bitfield */ +#define CL_EXEC_KERNEL (1 << 0) +#define CL_EXEC_NATIVE_KERNEL (1 << 1) + +/* cl_command_queue_properties - bitfield */ +#define CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE (1 << 0) +#define CL_QUEUE_PROFILING_ENABLE (1 << 1) + +/* cl_context_info */ +#define CL_CONTEXT_REFERENCE_COUNT 0x1080 +#define CL_CONTEXT_DEVICES 0x1081 +#define CL_CONTEXT_PROPERTIES 0x1082 +#define CL_CONTEXT_NUM_DEVICES 0x1083 + +/* cl_context_properties */ +#define CL_CONTEXT_PLATFORM 0x1084 +#define CL_CONTEXT_INTEROP_USER_SYNC 0x1085 + +/* cl_device_partition_property */ +#define CL_DEVICE_PARTITION_EQUALLY 0x1086 +#define CL_DEVICE_PARTITION_BY_COUNTS 0x1087 +#define CL_DEVICE_PARTITION_BY_COUNTS_LIST_END 0x0 +#define CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN 0x1088 + +/* cl_device_affinity_domain */ +#define CL_DEVICE_AFFINITY_DOMAIN_NUMA (1 << 0) +#define CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE (1 << 1) +#define CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE (1 << 2) +#define CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE (1 << 3) +#define CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE (1 << 4) +#define CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE (1 << 5) + +/* cl_command_queue_info */ +#define CL_QUEUE_CONTEXT 0x1090 +#define CL_QUEUE_DEVICE 0x1091 +#define CL_QUEUE_REFERENCE_COUNT 0x1092 +#define CL_QUEUE_PROPERTIES 0x1093 + +/* cl_mem_flags - bitfield */ +#define CL_MEM_READ_WRITE (1 << 0) +#define CL_MEM_WRITE_ONLY (1 << 1) +#define CL_MEM_READ_ONLY (1 << 2) +#define CL_MEM_USE_HOST_PTR (1 << 3) +#define CL_MEM_ALLOC_HOST_PTR (1 << 4) +#define CL_MEM_COPY_HOST_PTR (1 << 5) +// reserved (1 << 6) +#define CL_MEM_HOST_WRITE_ONLY (1 << 7) +#define CL_MEM_HOST_READ_ONLY (1 << 8) +#define CL_MEM_HOST_NO_ACCESS (1 << 9) + +/* cl_mem_migration_flags - bitfield */ +#define CL_MIGRATE_MEM_OBJECT_HOST (1 << 0) +#define CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED (1 << 1) + +/* cl_channel_order */ +#define CL_R 0x10B0 +#define CL_A 0x10B1 +#define CL_RG 0x10B2 +#define CL_RA 0x10B3 +#define CL_RGB 0x10B4 +#define CL_RGBA 0x10B5 +#define CL_BGRA 0x10B6 +#define CL_ARGB 0x10B7 +#define CL_INTENSITY 0x10B8 +#define CL_LUMINANCE 0x10B9 +#define CL_Rx 0x10BA +#define CL_RGx 0x10BB +#define CL_RGBx 0x10BC + +/* cl_channel_type */ +#define CL_SNORM_INT8 0x10D0 +#define CL_SNORM_INT16 0x10D1 +#define CL_UNORM_INT8 0x10D2 +#define CL_UNORM_INT16 0x10D3 +#define CL_UNORM_SHORT_565 0x10D4 +#define CL_UNORM_SHORT_555 0x10D5 +#define CL_UNORM_INT_101010 0x10D6 +#define CL_SIGNED_INT8 0x10D7 +#define CL_SIGNED_INT16 0x10D8 +#define CL_SIGNED_INT32 0x10D9 +#define CL_UNSIGNED_INT8 0x10DA +#define CL_UNSIGNED_INT16 0x10DB +#define CL_UNSIGNED_INT32 0x10DC +#define CL_HALF_FLOAT 0x10DD +#define CL_FLOAT 0x10DE + +/* cl_mem_object_type */ +#define CL_MEM_OBJECT_BUFFER 0x10F0 +#define CL_MEM_OBJECT_IMAGE2D 0x10F1 +#define CL_MEM_OBJECT_IMAGE3D 0x10F2 +#define CL_MEM_OBJECT_IMAGE2D_ARRAY 0x10F3 +#define CL_MEM_OBJECT_IMAGE1D 0x10F4 +#define CL_MEM_OBJECT_IMAGE1D_ARRAY 0x10F5 +#define CL_MEM_OBJECT_IMAGE1D_BUFFER 0x10F6 + +/* cl_mem_info */ +#define CL_MEM_TYPE 0x1100 +#define CL_MEM_FLAGS 0x1101 +#define CL_MEM_SIZE 0x1102 +#define CL_MEM_HOST_PTR 0x1103 +#define CL_MEM_MAP_COUNT 0x1104 +#define CL_MEM_REFERENCE_COUNT 0x1105 +#define CL_MEM_CONTEXT 0x1106 +#define CL_MEM_ASSOCIATED_MEMOBJECT 0x1107 +#define CL_MEM_OFFSET 0x1108 + +/* cl_image_info */ +#define CL_IMAGE_FORMAT 0x1110 +#define CL_IMAGE_ELEMENT_SIZE 0x1111 +#define CL_IMAGE_ROW_PITCH 0x1112 +#define CL_IMAGE_SLICE_PITCH 0x1113 +#define CL_IMAGE_WIDTH 0x1114 +#define CL_IMAGE_HEIGHT 0x1115 +#define CL_IMAGE_DEPTH 0x1116 +#define CL_IMAGE_ARRAY_SIZE 0x1117 +#define CL_IMAGE_BUFFER 0x1118 +#define CL_IMAGE_NUM_MIP_LEVELS 0x1119 +#define CL_IMAGE_NUM_SAMPLES 0x111A + +/* cl_addressing_mode */ +#define CL_ADDRESS_NONE 0x1130 +#define CL_ADDRESS_CLAMP_TO_EDGE 0x1131 +#define CL_ADDRESS_CLAMP 0x1132 +#define CL_ADDRESS_REPEAT 0x1133 +#define CL_ADDRESS_MIRRORED_REPEAT 0x1134 + +/* cl_filter_mode */ +#define CL_FILTER_NEAREST 0x1140 +#define CL_FILTER_LINEAR 0x1141 + +/* cl_sampler_info */ +#define CL_SAMPLER_REFERENCE_COUNT 0x1150 +#define CL_SAMPLER_CONTEXT 0x1151 +#define CL_SAMPLER_NORMALIZED_COORDS 0x1152 +#define CL_SAMPLER_ADDRESSING_MODE 0x1153 +#define CL_SAMPLER_FILTER_MODE 0x1154 + +/* cl_map_flags - bitfield */ +#define CL_MAP_READ (1 << 0) +#define CL_MAP_WRITE (1 << 1) +#define CL_MAP_WRITE_INVALIDATE_REGION (1 << 2) + +/* cl_program_info */ +#define CL_PROGRAM_REFERENCE_COUNT 0x1160 +#define CL_PROGRAM_CONTEXT 0x1161 +#define CL_PROGRAM_NUM_DEVICES 0x1162 +#define CL_PROGRAM_DEVICES 0x1163 +#define CL_PROGRAM_SOURCE 0x1164 +#define CL_PROGRAM_BINARY_SIZES 0x1165 +#define CL_PROGRAM_BINARIES 0x1166 +#define CL_PROGRAM_NUM_KERNELS 0x1167 +#define CL_PROGRAM_KERNEL_NAMES 0x1168 + +/* cl_program_build_info */ +#define CL_PROGRAM_BUILD_STATUS 0x1181 +#define CL_PROGRAM_BUILD_OPTIONS 0x1182 +#define CL_PROGRAM_BUILD_LOG 0x1183 +#define CL_PROGRAM_BINARY_TYPE 0x1184 + +/* cl_program_binary_type */ +#define CL_PROGRAM_BINARY_TYPE_NONE 0x0 +#define CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT 0x1 +#define CL_PROGRAM_BINARY_TYPE_LIBRARY 0x2 +#define CL_PROGRAM_BINARY_TYPE_EXECUTABLE 0x4 + +/* cl_build_status */ +#define CL_BUILD_SUCCESS 0 +#define CL_BUILD_NONE -1 +#define CL_BUILD_ERROR -2 +#define CL_BUILD_IN_PROGRESS -3 + +/* cl_kernel_info */ +#define CL_KERNEL_FUNCTION_NAME 0x1190 +#define CL_KERNEL_NUM_ARGS 0x1191 +#define CL_KERNEL_REFERENCE_COUNT 0x1192 +#define CL_KERNEL_CONTEXT 0x1193 +#define CL_KERNEL_PROGRAM 0x1194 +#define CL_KERNEL_ATTRIBUTES 0x1195 + +/* cl_kernel_arg_info */ +#define CL_KERNEL_ARG_ADDRESS_QUALIFIER 0x1196 +#define CL_KERNEL_ARG_ACCESS_QUALIFIER 0x1197 +#define CL_KERNEL_ARG_TYPE_NAME 0x1198 +#define CL_KERNEL_ARG_TYPE_QUALIFIER 0x1199 +#define CL_KERNEL_ARG_NAME 0x119A + +/* cl_kernel_arg_address_qualifier */ +#define CL_KERNEL_ARG_ADDRESS_GLOBAL 0x119B +#define CL_KERNEL_ARG_ADDRESS_LOCAL 0x119C +#define CL_KERNEL_ARG_ADDRESS_CONSTANT 0x119D +#define CL_KERNEL_ARG_ADDRESS_PRIVATE 0x119E + +/* cl_kernel_arg_access_qualifier */ +#define CL_KERNEL_ARG_ACCESS_READ_ONLY 0x11A0 +#define CL_KERNEL_ARG_ACCESS_WRITE_ONLY 0x11A1 +#define CL_KERNEL_ARG_ACCESS_READ_WRITE 0x11A2 +#define CL_KERNEL_ARG_ACCESS_NONE 0x11A3 + +/* cl_kernel_arg_type_qualifer */ +#define CL_KERNEL_ARG_TYPE_NONE 0 +#define CL_KERNEL_ARG_TYPE_CONST (1 << 0) +#define CL_KERNEL_ARG_TYPE_RESTRICT (1 << 1) +#define CL_KERNEL_ARG_TYPE_VOLATILE (1 << 2) + +/* cl_kernel_work_group_info */ +#define CL_KERNEL_WORK_GROUP_SIZE 0x11B0 +#define CL_KERNEL_COMPILE_WORK_GROUP_SIZE 0x11B1 +#define CL_KERNEL_LOCAL_MEM_SIZE 0x11B2 +#define CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE 0x11B3 +#define CL_KERNEL_PRIVATE_MEM_SIZE 0x11B4 +#define CL_KERNEL_GLOBAL_WORK_SIZE 0x11B5 + +/* cl_event_info */ +#define CL_EVENT_COMMAND_QUEUE 0x11D0 +#define CL_EVENT_COMMAND_TYPE 0x11D1 +#define CL_EVENT_REFERENCE_COUNT 0x11D2 +#define CL_EVENT_COMMAND_EXECUTION_STATUS 0x11D3 +#define CL_EVENT_CONTEXT 0x11D4 + +/* cl_command_type */ +#define CL_COMMAND_NDRANGE_KERNEL 0x11F0 +#define CL_COMMAND_TASK 0x11F1 +#define CL_COMMAND_NATIVE_KERNEL 0x11F2 +#define CL_COMMAND_READ_BUFFER 0x11F3 +#define CL_COMMAND_WRITE_BUFFER 0x11F4 +#define CL_COMMAND_COPY_BUFFER 0x11F5 +#define CL_COMMAND_READ_IMAGE 0x11F6 +#define CL_COMMAND_WRITE_IMAGE 0x11F7 +#define CL_COMMAND_COPY_IMAGE 0x11F8 +#define CL_COMMAND_COPY_IMAGE_TO_BUFFER 0x11F9 +#define CL_COMMAND_COPY_BUFFER_TO_IMAGE 0x11FA +#define CL_COMMAND_MAP_BUFFER 0x11FB +#define CL_COMMAND_MAP_IMAGE 0x11FC +#define CL_COMMAND_UNMAP_MEM_OBJECT 0x11FD +#define CL_COMMAND_MARKER 0x11FE +#define CL_COMMAND_ACQUIRE_GL_OBJECTS 0x11FF +#define CL_COMMAND_RELEASE_GL_OBJECTS 0x1200 +#define CL_COMMAND_READ_BUFFER_RECT 0x1201 +#define CL_COMMAND_WRITE_BUFFER_RECT 0x1202 +#define CL_COMMAND_COPY_BUFFER_RECT 0x1203 +#define CL_COMMAND_USER 0x1204 +#define CL_COMMAND_BARRIER 0x1205 +#define CL_COMMAND_MIGRATE_MEM_OBJECTS 0x1206 +#define CL_COMMAND_FILL_BUFFER 0x1207 +#define CL_COMMAND_FILL_IMAGE 0x1208 + +/* command execution status */ +#define CL_COMPLETE 0x0 +#define CL_RUNNING 0x1 +#define CL_SUBMITTED 0x2 +#define CL_QUEUED 0x3 + +/* cl_buffer_create_type */ +#define CL_BUFFER_CREATE_TYPE_REGION 0x1220 + +/* cl_profiling_info */ +#define CL_PROFILING_COMMAND_QUEUED 0x1280 +#define CL_PROFILING_COMMAND_SUBMIT 0x1281 +#define CL_PROFILING_COMMAND_START 0x1282 +#define CL_PROFILING_COMMAND_END 0x1283 + +/********************************************************************************************************/ + +/* Platform API */ +extern CL_API_ENTRY cl_int CL_API_CALL +clGetPlatformIDs(cl_uint /* num_entries */, + cl_platform_id * /* platforms */, + cl_uint * /* num_platforms */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetPlatformInfo(cl_platform_id /* platform */, + cl_platform_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +/* Device APIs */ +extern CL_API_ENTRY cl_int CL_API_CALL +clGetDeviceIDs(cl_platform_id /* platform */, + cl_device_type /* device_type */, + cl_uint /* num_entries */, + cl_device_id * /* devices */, + cl_uint * /* num_devices */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetDeviceInfo(cl_device_id /* device */, + cl_device_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clCreateSubDevices(cl_device_id /* in_device */, + const cl_device_partition_property * /* properties */, + cl_uint /* num_devices */, + cl_device_id * /* out_devices */, + cl_uint * /* num_devices_ret */) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainDevice(cl_device_id /* device */) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseDevice(cl_device_id /* device */) CL_API_SUFFIX__VERSION_1_2; + +/* Context APIs */ +extern CL_API_ENTRY cl_context CL_API_CALL +clCreateContext(const cl_context_properties * /* properties */, + cl_uint /* num_devices */, + const cl_device_id * /* devices */, + void (CL_CALLBACK * /* pfn_notify */)(const char *, const void *, size_t, void *), + void * /* user_data */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_context CL_API_CALL +clCreateContextFromType(const cl_context_properties * /* properties */, + cl_device_type /* device_type */, + void (CL_CALLBACK * /* pfn_notify*/ )(const char *, const void *, size_t, void *), + void * /* user_data */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainContext(cl_context /* context */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseContext(cl_context /* context */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetContextInfo(cl_context /* context */, + cl_context_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +/* Command Queue APIs */ +extern CL_API_ENTRY cl_command_queue CL_API_CALL +clCreateCommandQueue(cl_context /* context */, + cl_device_id /* device */, + cl_command_queue_properties /* properties */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainCommandQueue(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseCommandQueue(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetCommandQueueInfo(cl_command_queue /* command_queue */, + cl_command_queue_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +/* Memory Object APIs */ +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateBuffer(cl_context /* context */, + cl_mem_flags /* flags */, + size_t /* size */, + void * /* host_ptr */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateSubBuffer(cl_mem /* buffer */, + cl_mem_flags /* flags */, + cl_buffer_create_type /* buffer_create_type */, + const void * /* buffer_create_info */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_1; + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateImage(cl_context /* context */, + cl_mem_flags /* flags */, + const cl_image_format * /* image_format */, + const cl_image_desc * /* image_desc */, + void * /* host_ptr */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainMemObject(cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseMemObject(cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetSupportedImageFormats(cl_context /* context */, + cl_mem_flags /* flags */, + cl_mem_object_type /* image_type */, + cl_uint /* num_entries */, + cl_image_format * /* image_formats */, + cl_uint * /* num_image_formats */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetMemObjectInfo(cl_mem /* memobj */, + cl_mem_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetImageInfo(cl_mem /* image */, + cl_image_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetMemObjectDestructorCallback( cl_mem /* memobj */, + void (CL_CALLBACK * /*pfn_notify*/)( cl_mem /* memobj */, void* /*user_data*/), + void * /*user_data */ ) CL_API_SUFFIX__VERSION_1_1; + +/* Sampler APIs */ +extern CL_API_ENTRY cl_sampler CL_API_CALL +clCreateSampler(cl_context /* context */, + cl_bool /* normalized_coords */, + cl_addressing_mode /* addressing_mode */, + cl_filter_mode /* filter_mode */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainSampler(cl_sampler /* sampler */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseSampler(cl_sampler /* sampler */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetSamplerInfo(cl_sampler /* sampler */, + cl_sampler_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +/* Program Object APIs */ +extern CL_API_ENTRY cl_program CL_API_CALL +clCreateProgramWithSource(cl_context /* context */, + cl_uint /* count */, + const char ** /* strings */, + const size_t * /* lengths */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_program CL_API_CALL +clCreateProgramWithBinary(cl_context /* context */, + cl_uint /* num_devices */, + const cl_device_id * /* device_list */, + const size_t * /* lengths */, + const unsigned char ** /* binaries */, + cl_int * /* binary_status */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_program CL_API_CALL +clCreateProgramWithBuiltInKernels(cl_context /* context */, + cl_uint /* num_devices */, + const cl_device_id * /* device_list */, + const char * /* kernel_names */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainProgram(cl_program /* program */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseProgram(cl_program /* program */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clBuildProgram(cl_program /* program */, + cl_uint /* num_devices */, + const cl_device_id * /* device_list */, + const char * /* options */, + void (CL_CALLBACK * /* pfn_notify */)(cl_program /* program */, void * /* user_data */), + void * /* user_data */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clCompileProgram(cl_program /* program */, + cl_uint /* num_devices */, + const cl_device_id * /* device_list */, + const char * /* options */, + cl_uint /* num_input_headers */, + const cl_program * /* input_headers */, + const char ** /* header_include_names */, + void (CL_CALLBACK * /* pfn_notify */)(cl_program /* program */, void * /* user_data */), + void * /* user_data */) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_program CL_API_CALL +clLinkProgram(cl_context /* context */, + cl_uint /* num_devices */, + const cl_device_id * /* device_list */, + const char * /* options */, + cl_uint /* num_input_programs */, + const cl_program * /* input_programs */, + void (CL_CALLBACK * /* pfn_notify */)(cl_program /* program */, void * /* user_data */), + void * /* user_data */, + cl_int * /* errcode_ret */ ) CL_API_SUFFIX__VERSION_1_2; + + +extern CL_API_ENTRY cl_int CL_API_CALL +clUnloadPlatformCompiler(cl_platform_id /* platform */) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetProgramInfo(cl_program /* program */, + cl_program_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetProgramBuildInfo(cl_program /* program */, + cl_device_id /* device */, + cl_program_build_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +/* Kernel Object APIs */ +extern CL_API_ENTRY cl_kernel CL_API_CALL +clCreateKernel(cl_program /* program */, + const char * /* kernel_name */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clCreateKernelsInProgram(cl_program /* program */, + cl_uint /* num_kernels */, + cl_kernel * /* kernels */, + cl_uint * /* num_kernels_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainKernel(cl_kernel /* kernel */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseKernel(cl_kernel /* kernel */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetKernelArg(cl_kernel /* kernel */, + cl_uint /* arg_index */, + size_t /* arg_size */, + const void * /* arg_value */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetKernelInfo(cl_kernel /* kernel */, + cl_kernel_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetKernelArgInfo(cl_kernel /* kernel */, + cl_uint /* arg_indx */, + cl_kernel_arg_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetKernelWorkGroupInfo(cl_kernel /* kernel */, + cl_device_id /* device */, + cl_kernel_work_group_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +/* Event Object APIs */ +extern CL_API_ENTRY cl_int CL_API_CALL +clWaitForEvents(cl_uint /* num_events */, + const cl_event * /* event_list */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetEventInfo(cl_event /* event */, + cl_event_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_event CL_API_CALL +clCreateUserEvent(cl_context /* context */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_1; + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainEvent(cl_event /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseEvent(cl_event /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetUserEventStatus(cl_event /* event */, + cl_int /* execution_status */) CL_API_SUFFIX__VERSION_1_1; + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetEventCallback( cl_event /* event */, + cl_int /* command_exec_callback_type */, + void (CL_CALLBACK * /* pfn_notify */)(cl_event, cl_int, void *), + void * /* user_data */) CL_API_SUFFIX__VERSION_1_1; + +/* Profiling APIs */ +extern CL_API_ENTRY cl_int CL_API_CALL +clGetEventProfilingInfo(cl_event /* event */, + cl_profiling_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +/* Flush and Finish APIs */ +extern CL_API_ENTRY cl_int CL_API_CALL +clFlush(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clFinish(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0; + +/* Enqueued Commands APIs */ +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueReadBuffer(cl_command_queue /* command_queue */, + cl_mem /* buffer */, + cl_bool /* blocking_read */, + size_t /* offset */, + size_t /* size */, + void * /* ptr */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueReadBufferRect(cl_command_queue /* command_queue */, + cl_mem /* buffer */, + cl_bool /* blocking_read */, + const size_t * /* buffer_offset */, + const size_t * /* host_offset */, + const size_t * /* region */, + size_t /* buffer_row_pitch */, + size_t /* buffer_slice_pitch */, + size_t /* host_row_pitch */, + size_t /* host_slice_pitch */, + void * /* ptr */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_1; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueWriteBuffer(cl_command_queue /* command_queue */, + cl_mem /* buffer */, + cl_bool /* blocking_write */, + size_t /* offset */, + size_t /* size */, + const void * /* ptr */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueWriteBufferRect(cl_command_queue /* command_queue */, + cl_mem /* buffer */, + cl_bool /* blocking_write */, + const size_t * /* buffer_offset */, + const size_t * /* host_offset */, + const size_t * /* region */, + size_t /* buffer_row_pitch */, + size_t /* buffer_slice_pitch */, + size_t /* host_row_pitch */, + size_t /* host_slice_pitch */, + const void * /* ptr */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_1; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueFillBuffer(cl_command_queue /* command_queue */, + cl_mem /* buffer */, + const void * /* pattern */, + size_t /* pattern_size */, + size_t /* offset */, + size_t /* size */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueCopyBuffer(cl_command_queue /* command_queue */, + cl_mem /* src_buffer */, + cl_mem /* dst_buffer */, + size_t /* src_offset */, + size_t /* dst_offset */, + size_t /* size */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueCopyBufferRect(cl_command_queue /* command_queue */, + cl_mem /* src_buffer */, + cl_mem /* dst_buffer */, + const size_t * /* src_origin */, + const size_t * /* dst_origin */, + const size_t * /* region */, + size_t /* src_row_pitch */, + size_t /* src_slice_pitch */, + size_t /* dst_row_pitch */, + size_t /* dst_slice_pitch */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_1; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueReadImage(cl_command_queue /* command_queue */, + cl_mem /* image */, + cl_bool /* blocking_read */, + const size_t * /* origin[3] */, + const size_t * /* region[3] */, + size_t /* row_pitch */, + size_t /* slice_pitch */, + void * /* ptr */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueWriteImage(cl_command_queue /* command_queue */, + cl_mem /* image */, + cl_bool /* blocking_write */, + const size_t * /* origin[3] */, + const size_t * /* region[3] */, + size_t /* input_row_pitch */, + size_t /* input_slice_pitch */, + const void * /* ptr */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueFillImage(cl_command_queue /* command_queue */, + cl_mem /* image */, + const void * /* fill_color */, + const size_t * /* origin[3] */, + const size_t * /* region[3] */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueCopyImage(cl_command_queue /* command_queue */, + cl_mem /* src_image */, + cl_mem /* dst_image */, + const size_t * /* src_origin[3] */, + const size_t * /* dst_origin[3] */, + const size_t * /* region[3] */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueCopyImageToBuffer(cl_command_queue /* command_queue */, + cl_mem /* src_image */, + cl_mem /* dst_buffer */, + const size_t * /* src_origin[3] */, + const size_t * /* region[3] */, + size_t /* dst_offset */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueCopyBufferToImage(cl_command_queue /* command_queue */, + cl_mem /* src_buffer */, + cl_mem /* dst_image */, + size_t /* src_offset */, + const size_t * /* dst_origin[3] */, + const size_t * /* region[3] */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY void * CL_API_CALL +clEnqueueMapBuffer(cl_command_queue /* command_queue */, + cl_mem /* buffer */, + cl_bool /* blocking_map */, + cl_map_flags /* map_flags */, + size_t /* offset */, + size_t /* size */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY void * CL_API_CALL +clEnqueueMapImage(cl_command_queue /* command_queue */, + cl_mem /* image */, + cl_bool /* blocking_map */, + cl_map_flags /* map_flags */, + const size_t * /* origin[3] */, + const size_t * /* region[3] */, + size_t * /* image_row_pitch */, + size_t * /* image_slice_pitch */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueUnmapMemObject(cl_command_queue /* command_queue */, + cl_mem /* memobj */, + void * /* mapped_ptr */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueMigrateMemObjects(cl_command_queue /* command_queue */, + cl_uint /* num_mem_objects */, + const cl_mem * /* mem_objects */, + cl_mem_migration_flags /* flags */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueNDRangeKernel(cl_command_queue /* command_queue */, + cl_kernel /* kernel */, + cl_uint /* work_dim */, + const size_t * /* global_work_offset */, + const size_t * /* global_work_size */, + const size_t * /* local_work_size */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueTask(cl_command_queue /* command_queue */, + cl_kernel /* kernel */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueNativeKernel(cl_command_queue /* command_queue */, + void (CL_CALLBACK * /*user_func*/)(void *), + void * /* args */, + size_t /* cb_args */, + cl_uint /* num_mem_objects */, + const cl_mem * /* mem_list */, + const void ** /* args_mem_loc */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueMarkerWithWaitList(cl_command_queue /* command_queue */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueBarrierWithWaitList(cl_command_queue /* command_queue */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetPrintfCallback(cl_context /* context */, + void (CL_CALLBACK * /* pfn_notify */)(cl_context /* program */, + cl_uint /*printf_data_len */, + char * /* printf_data_ptr */, + void * /* user_data */), + void * /* user_data */) CL_API_SUFFIX__VERSION_1_2; + + + +/* Extension function access + * + * Returns the extension function address for the given function name, + * or NULL if a valid function can not be found. The client must + * check to make sure the address is not NULL, before using or + * calling the returned function address. + */ +extern CL_API_ENTRY void * CL_API_CALL +clGetExtensionFunctionAddressForPlatform(cl_platform_id /* platform */, + const char * /* func_name */) CL_API_SUFFIX__VERSION_1_2; + + +#ifdef CL_USE_DEPRECATED_OPENCL_1_0_APIS +#warning CL_USE_DEPRECATED_OPENCL_1_0_APIS is defined. These APIs are unsupported and untested in OpenCL 1.1! + /* + * WARNING: + * This API introduces mutable state into the OpenCL implementation. It has been REMOVED + * to better facilitate thread safety. The 1.0 API is not thread safe. It is not tested by the + * OpenCL 1.1 conformance test, and consequently may not work or may not work dependably. + * It is likely to be non-performant. Use of this API is not advised. Use at your own risk. + * + * Software developers previously relying on this API are instructed to set the command queue + * properties when creating the queue, instead. + */ + extern CL_API_ENTRY cl_int CL_API_CALL + clSetCommandQueueProperty(cl_command_queue /* command_queue */, + cl_command_queue_properties /* properties */, + cl_bool /* enable */, + cl_command_queue_properties * /* old_properties */) CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED; +#endif /* CL_USE_DEPRECATED_OPENCL_1_0_APIS */ + + +#ifdef CL_USE_DEPRECATED_OPENCL_1_1_APIS + extern CL_API_ENTRY cl_mem CL_API_CALL + clCreateImage2D(cl_context /* context */, + cl_mem_flags /* flags */, + const cl_image_format * /* image_format */, + size_t /* image_width */, + size_t /* image_height */, + size_t /* image_row_pitch */, + void * /* host_ptr */, + cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + + extern CL_API_ENTRY cl_mem CL_API_CALL + clCreateImage3D(cl_context /* context */, + cl_mem_flags /* flags */, + const cl_image_format * /* image_format */, + size_t /* image_width */, + size_t /* image_height */, + size_t /* image_depth */, + size_t /* image_row_pitch */, + size_t /* image_slice_pitch */, + void * /* host_ptr */, + cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + + extern CL_API_ENTRY cl_int CL_API_CALL + clEnqueueMarker(cl_command_queue /* command_queue */, + cl_event * /* event */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + + extern CL_API_ENTRY cl_int CL_API_CALL + clEnqueueWaitForEvents(cl_command_queue /* command_queue */, + cl_uint /* num_events */, + const cl_event * /* event_list */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + + extern CL_API_ENTRY cl_int CL_API_CALL + clEnqueueBarrier(cl_command_queue /* command_queue */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + + extern CL_API_ENTRY cl_int CL_API_CALL + clUnloadCompiler(void) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + + extern CL_API_ENTRY void * CL_API_CALL + clGetExtensionFunctionAddress(const char * /* func_name */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; +#endif /* CL_USE_DEPRECATED_OPENCL_1_2_APIS */ + +#ifdef __cplusplus +} +#endif + +#endif /* __OPENCL_CL_H */ + diff --git a/socl/src/CL/cl_d3d10.h b/socl/src/CL/cl_d3d10.h new file mode 100644 index 0000000..ebc2b61 --- /dev/null +++ b/socl/src/CL/cl_d3d10.h @@ -0,0 +1,126 @@ +/********************************************************************************** + * Copyright (c) 2008-2010 The Khronos Group Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and/or associated documentation files (the + * "Materials"), to deal in the Materials without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Materials, and to + * permit persons to whom the Materials are furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Materials. + * + * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. + **********************************************************************************/ + +/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */ + +#ifndef __OPENCL_CL_D3D10_H +#define __OPENCL_CL_D3D10_H + +#include +#include "cl.h" +#include "cl_platform.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/****************************************************************************** + * cl_khr_d3d10_sharing */ +#define cl_khr_d3d10_sharing 1 + +typedef cl_uint cl_d3d10_device_source_khr; +typedef cl_uint cl_d3d10_device_set_khr; + +/******************************************************************************/ + +// Error Codes +#define CL_INVALID_D3D10_DEVICE_KHR -1002 +#define CL_INVALID_D3D10_RESOURCE_KHR -1003 +#define CL_D3D10_RESOURCE_ALREADY_ACQUIRED_KHR -1004 +#define CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR -1005 + +// cl_d3d10_device_source_nv +#define CL_D3D10_DEVICE_KHR 0x4010 +#define CL_D3D10_DXGI_ADAPTER_KHR 0x4011 + +// cl_d3d10_device_set_nv +#define CL_PREFERRED_DEVICES_FOR_D3D10_KHR 0x4012 +#define CL_ALL_DEVICES_FOR_D3D10_KHR 0x4013 + +// cl_context_info +#define CL_CONTEXT_D3D10_DEVICE_KHR 0x4014 +#define CL_CONTEXT_D3D10_PREFER_SHARED_RESOURCES_KHR 0x402C + +// cl_mem_info +#define CL_MEM_D3D10_RESOURCE_KHR 0x4015 + +// cl_image_info +#define CL_IMAGE_D3D10_SUBRESOURCE_KHR 0x4016 + +// cl_command_type +#define CL_COMMAND_ACQUIRE_D3D10_OBJECTS_KHR 0x4017 +#define CL_COMMAND_RELEASE_D3D10_OBJECTS_KHR 0x4018 + +/******************************************************************************/ + +typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetDeviceIDsFromD3D10KHR_fn)( + cl_platform_id platform, + cl_d3d10_device_source_khr d3d_device_source, + void * d3d_object, + cl_d3d10_device_set_khr d3d_device_set, + cl_uint num_entries, + cl_device_id * devices, + cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D10BufferKHR_fn)( + cl_context context, + cl_mem_flags flags, + ID3D10Buffer * resource, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D10Texture2DKHR_fn)( + cl_context context, + cl_mem_flags flags, + ID3D10Texture2D * resource, + UINT subresource, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D10Texture3DKHR_fn)( + cl_context context, + cl_mem_flags flags, + ID3D10Texture3D * resource, + UINT subresource, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireD3D10ObjectsKHR_fn)( + cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem * mem_objects, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseD3D10ObjectsKHR_fn)( + cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem * mem_objects, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + +#ifdef __cplusplus +} +#endif + +#endif // __OPENCL_CL_D3D10_H + diff --git a/socl/src/CL/cl_d3d11.h b/socl/src/CL/cl_d3d11.h new file mode 100644 index 0000000..00f8ffc --- /dev/null +++ b/socl/src/CL/cl_d3d11.h @@ -0,0 +1,126 @@ +/********************************************************************************** + * Copyright (c) 2008-2010 The Khronos Group Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and/or associated documentation files (the + * "Materials"), to deal in the Materials without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Materials, and to + * permit persons to whom the Materials are furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Materials. + * + * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. + **********************************************************************************/ + +/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */ + +#ifndef __OPENCL_CL_D3D11_H +#define __OPENCL_CL_D3D11_H + +#include +#include "cl.h" +#include "cl_platform.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/****************************************************************************** + * cl_khr_d3d11_sharing */ +#define cl_khr_d3d11_sharing 1 + +typedef cl_uint cl_d3d11_device_source_khr; +typedef cl_uint cl_d3d11_device_set_khr; + +/******************************************************************************/ + +// Error Codes +#define CL_INVALID_D3D11_DEVICE_KHR -1006 +#define CL_INVALID_D3D11_RESOURCE_KHR -1007 +#define CL_D3D11_RESOURCE_ALREADY_ACQUIRED_KHR -1008 +#define CL_D3D11_RESOURCE_NOT_ACQUIRED_KHR -1009 + +// cl_d3d11_device_source +#define CL_D3D11_DEVICE_KHR 0x4019 +#define CL_D3D11_DXGI_ADAPTER_KHR 0x401A + +// cl_d3d11_device_set +#define CL_PREFERRED_DEVICES_FOR_D3D11_KHR 0x401B +#define CL_ALL_DEVICES_FOR_D3D11_KHR 0x401C + +// cl_context_info +#define CL_CONTEXT_D3D11_DEVICE_KHR 0x401D +#define CL_CONTEXT_D3D11_PREFER_SHARED_RESOURCES_KHR 0x402D + +// cl_mem_info +#define CL_MEM_D3D11_RESOURCE_KHR 0x401E + +// cl_image_info +#define CL_IMAGE_D3D11_SUBRESOURCE_KHR 0x401F + +// cl_command_type +#define CL_COMMAND_ACQUIRE_D3D11_OBJECTS_KHR 0x4020 +#define CL_COMMAND_RELEASE_D3D11_OBJECTS_KHR 0x4021 + +/******************************************************************************/ + +typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetDeviceIDsFromD3D11KHR_fn)( + cl_platform_id platform, + cl_d3d11_device_source_khr d3d_device_source, + void * d3d_object, + cl_d3d11_device_set_khr d3d_device_set, + cl_uint num_entries, + cl_device_id * devices, + cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D11BufferKHR_fn)( + cl_context context, + cl_mem_flags flags, + ID3D11Buffer * resource, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D11Texture2DKHR_fn)( + cl_context context, + cl_mem_flags flags, + ID3D11Texture2D * resource, + UINT subresource, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D11Texture3DKHR_fn)( + cl_context context, + cl_mem_flags flags, + ID3D11Texture3D * resource, + UINT subresource, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireD3D11ObjectsKHR_fn)( + cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem * mem_objects, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseD3D11ObjectsKHR_fn)( + cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem * mem_objects, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_2; + +#ifdef __cplusplus +} +#endif + +#endif // __OPENCL_CL_D3D11_H + diff --git a/socl/src/CL/cl_dx9_media_sharing.h b/socl/src/CL/cl_dx9_media_sharing.h new file mode 100644 index 0000000..002731c --- /dev/null +++ b/socl/src/CL/cl_dx9_media_sharing.h @@ -0,0 +1,127 @@ +/********************************************************************************** + * Copyright (c) 2008-2012 The Khronos Group Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and/or associated documentation files (the + * "Materials"), to deal in the Materials without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Materials, and to + * permit persons to whom the Materials are furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Materials. + * + * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. + **********************************************************************************/ + +/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */ + +#ifndef __OPENCL_CL_DX9_MEDIA_SHARING_H +#define __OPENCL_CL_DX9_MEDIA_SHARING_H + +#include "cl.h" +#include "cl_platform.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/****************************************************************************** +/* cl_khr_dx9_media_sharing */ +#define cl_khr_dx9_media_sharing 1 + +typedef cl_uint cl_dx9_media_adapter_type_khr; +typedef cl_uint cl_dx9_media_adapter_set_khr; + +#if defined(_WIN32) +#include +typedef struct _cl_dx9_surface_info_khr +{ + IDirect3DSurface9 *resource; + HANDLE shared_handle; +} cl_dx9_surface_info_khr; +#endif + + +/******************************************************************************/ + +// Error Codes +#define CL_INVALID_DX9_MEDIA_ADAPTER_KHR -1010 +#define CL_INVALID_DX9_MEDIA_SURFACE_KHR -1011 +#define CL_DX9_MEDIA_SURFACE_ALREADY_ACQUIRED_KHR -1012 +#define CL_DX9_MEDIA_SURFACE_NOT_ACQUIRED_KHR -1013 + +// cl_media_adapter_type_khr +#define CL_ADAPTER_D3D9_KHR 0x2020 +#define CL_ADAPTER_D3D9EX_KHR 0x2021 +#define CL_ADAPTER_DXVA_KHR 0x2022 + +// cl_media_adapter_set_khr +#define CL_PREFERRED_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR 0x2023 +#define CL_ALL_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR 0x2024 + +// cl_context_info +#define CL_CONTEXT_ADAPTER_D3D9_KHR 0x2025 +#define CL_CONTEXT_ADAPTER_D3D9EX_KHR 0x2026 +#define CL_CONTEXT_ADAPTER_DXVA_KHR 0x2027 + +// cl_mem_info +#define CL_MEM_DX9_MEDIA_ADAPTER_TYPE_KHR 0x2028 +#define CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR 0x2029 + +// cl_image_info +#define CL_IMAGE_DX9_MEDIA_PLANE_KHR 0x202A + +// cl_command_type +#define CL_COMMAND_ACQUIRE_DX9_MEDIA_SURFACES_KHR 0x202B +#define CL_COMMAND_RELEASE_DX9_MEDIA_SURFACES_KHR 0x202C + +/******************************************************************************/ + +typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetDeviceIDsFromDX9MediaAdapterKHR_fn)( + cl_platform_id platform, + cl_uint num_media_adapters, + cl_dx9_media_adapter_type_khr * media_adapter_type, + void * media_adapters, + cl_dx9_media_adapter_set_khr media_adapter_set, + cl_uint num_entries, + cl_device_id * devices, + cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromDX9MediaSurfaceKHR_fn)( + cl_context context, + cl_mem_flags flags, + cl_dx9_media_adapter_type_khr adapter_type, + void * surface_info, + cl_uint plane, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireDX9MediaSurfacesKHR_fn)( + cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem * mem_objects, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseDX9MediaSurfacesKHR_fn)( + cl_command_queue command_queue, + cl_uint num_objects, + cl_mem * mem_objects, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_2; + +#ifdef __cplusplus +} +#endif + +#endif // __OPENCL_CL_DX9_MEDIA_SHARING_H + diff --git a/socl/src/CL/cl_ext.h b/socl/src/CL/cl_ext.h new file mode 100644 index 0000000..1abfdae --- /dev/null +++ b/socl/src/CL/cl_ext.h @@ -0,0 +1,213 @@ +/******************************************************************************* + * Copyright (c) 2008-2010 The Khronos Group Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and/or associated documentation files (the + * "Materials"), to deal in the Materials without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Materials, and to + * permit persons to whom the Materials are furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Materials. + * + * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. + ******************************************************************************/ + +/* $Revision: 11928 $ on $Date: 2010-07-13 09:04:56 -0700 (Tue, 13 Jul 2010) $ */ + +/* cl_ext.h contains OpenCL extensions which don't have external */ +/* (OpenGL, D3D) dependencies. */ + +#ifndef __CL_EXT_H +#define __CL_EXT_H + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef __APPLE__ + #include + #include +#else + #include "cl.h" +#endif + +/* cl_khr_fp64 extension - no extension #define since it has no functions */ +#define CL_DEVICE_DOUBLE_FP_CONFIG 0x1032 + +/* cl_khr_fp16 extension - no extension #define since it has no functions */ +#define CL_DEVICE_HALF_FP_CONFIG 0x1033 + +/* Memory object destruction + * + * Apple extension for use to manage externally allocated buffers used with cl_mem objects with CL_MEM_USE_HOST_PTR + * + * Registers a user callback function that will be called when the memory object is deleted and its resources + * freed. Each call to clSetMemObjectCallbackFn registers the specified user callback function on a callback + * stack associated with memobj. The registered user callback functions are called in the reverse order in + * which they were registered. The user callback functions are called and then the memory object is deleted + * and its resources freed. This provides a mechanism for the application (and libraries) using memobj to be + * notified when the memory referenced by host_ptr, specified when the memory object is created and used as + * the storage bits for the memory object, can be reused or freed. + * + * The application may not call CL api's with the cl_mem object passed to the pfn_notify. + * + * Please check for the "cl_APPLE_SetMemObjectDestructor" extension using clGetDeviceInfo(CL_DEVICE_EXTENSIONS) + * before using. + */ +#define cl_APPLE_SetMemObjectDestructor 1 +cl_int CL_API_ENTRY clSetMemObjectDestructorAPPLE( cl_mem /* memobj */, + void (* /*pfn_notify*/)( cl_mem /* memobj */, void* /*user_data*/), + void * /*user_data */ ) CL_EXT_SUFFIX__VERSION_1_0; + + +/* Context Logging Functions + * + * The next three convenience functions are intended to be used as the pfn_notify parameter to clCreateContext(). + * Please check for the "cl_APPLE_ContextLoggingFunctions" extension using clGetDeviceInfo(CL_DEVICE_EXTENSIONS) + * before using. + * + * clLogMessagesToSystemLog fowards on all log messages to the Apple System Logger + */ +#define cl_APPLE_ContextLoggingFunctions 1 +extern void CL_API_ENTRY clLogMessagesToSystemLogAPPLE( const char * /* errstr */, + const void * /* private_info */, + size_t /* cb */, + void * /* user_data */ ) CL_EXT_SUFFIX__VERSION_1_0; + +/* clLogMessagesToStdout sends all log messages to the file descriptor stdout */ +extern void CL_API_ENTRY clLogMessagesToStdoutAPPLE( const char * /* errstr */, + const void * /* private_info */, + size_t /* cb */, + void * /* user_data */ ) CL_EXT_SUFFIX__VERSION_1_0; + +/* clLogMessagesToStderr sends all log messages to the file descriptor stderr */ +extern void CL_API_ENTRY clLogMessagesToStderrAPPLE( const char * /* errstr */, + const void * /* private_info */, + size_t /* cb */, + void * /* user_data */ ) CL_EXT_SUFFIX__VERSION_1_0; + + +/************************ +* cl_khr_icd extension * +************************/ +#define cl_khr_icd 1 + +/* cl_platform_info */ +#define CL_PLATFORM_ICD_SUFFIX_KHR 0x0920 + +/* Additional Error Codes */ +#define CL_PLATFORM_NOT_FOUND_KHR -1001 + +extern CL_API_ENTRY cl_int CL_API_CALL +clIcdGetPlatformIDsKHR(cl_uint /* num_entries */, + cl_platform_id * /* platforms */, + cl_uint * /* num_platforms */); + +typedef CL_API_ENTRY cl_int (CL_API_CALL *clIcdGetPlatformIDsKHR_fn)( + cl_uint /* num_entries */, + cl_platform_id * /* platforms */, + cl_uint * /* num_platforms */); + + +/****************************************** +* cl_nv_device_attribute_query extension * +******************************************/ +/* cl_nv_device_attribute_query extension - no extension #define since it has no functions */ +#define CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV 0x4000 +#define CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV 0x4001 +#define CL_DEVICE_REGISTERS_PER_BLOCK_NV 0x4002 +#define CL_DEVICE_WARP_SIZE_NV 0x4003 +#define CL_DEVICE_GPU_OVERLAP_NV 0x4004 +#define CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV 0x4005 +#define CL_DEVICE_INTEGRATED_MEMORY_NV 0x4006 + + +/********************************* +* cl_amd_device_attribute_query * +*********************************/ +#define CL_DEVICE_PROFILING_TIMER_OFFSET_AMD 0x4036 + + +#ifdef CL_VERSION_1_1 + /*********************************** + * cl_ext_device_fission extension * + ***********************************/ + #define cl_ext_device_fission 1 + + extern CL_API_ENTRY cl_int CL_API_CALL + clReleaseDeviceEXT( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1; + + typedef CL_API_ENTRY cl_int + (CL_API_CALL *clReleaseDeviceEXT_fn)( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1; + + extern CL_API_ENTRY cl_int CL_API_CALL + clRetainDeviceEXT( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1; + + typedef CL_API_ENTRY cl_int + (CL_API_CALL *clRetainDeviceEXT_fn)( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1; + + typedef cl_ulong cl_device_partition_property_ext; + extern CL_API_ENTRY cl_int CL_API_CALL + clCreateSubDevicesEXT( cl_device_id /*in_device*/, + const cl_device_partition_property_ext * /* properties */, + cl_uint /*num_entries*/, + cl_device_id * /*out_devices*/, + cl_uint * /*num_devices*/ ) CL_EXT_SUFFIX__VERSION_1_1; + + typedef CL_API_ENTRY cl_int + ( CL_API_CALL * clCreateSubDevicesEXT_fn)( cl_device_id /*in_device*/, + const cl_device_partition_property_ext * /* properties */, + cl_uint /*num_entries*/, + cl_device_id * /*out_devices*/, + cl_uint * /*num_devices*/ ) CL_EXT_SUFFIX__VERSION_1_1; + + /* cl_device_partition_property_ext */ + #define CL_DEVICE_PARTITION_EQUALLY_EXT 0x4050 + #define CL_DEVICE_PARTITION_BY_COUNTS_EXT 0x4051 + #define CL_DEVICE_PARTITION_BY_NAMES_EXT 0x4052 + #define CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN_EXT 0x4053 + + /* clDeviceGetInfo selectors */ + #define CL_DEVICE_PARENT_DEVICE_EXT 0x4054 + #define CL_DEVICE_PARTITION_TYPES_EXT 0x4055 + #define CL_DEVICE_AFFINITY_DOMAINS_EXT 0x4056 + #define CL_DEVICE_REFERENCE_COUNT_EXT 0x4057 + #define CL_DEVICE_PARTITION_STYLE_EXT 0x4058 + + /* error codes */ + #define CL_DEVICE_PARTITION_FAILED_EXT -1057 + #define CL_INVALID_PARTITION_COUNT_EXT -1058 + #define CL_INVALID_PARTITION_NAME_EXT -1059 + + /* CL_AFFINITY_DOMAINs */ + #define CL_AFFINITY_DOMAIN_L1_CACHE_EXT 0x1 + #define CL_AFFINITY_DOMAIN_L2_CACHE_EXT 0x2 + #define CL_AFFINITY_DOMAIN_L3_CACHE_EXT 0x3 + #define CL_AFFINITY_DOMAIN_L4_CACHE_EXT 0x4 + #define CL_AFFINITY_DOMAIN_NUMA_EXT 0x10 + #define CL_AFFINITY_DOMAIN_NEXT_FISSIONABLE_EXT 0x100 + + /* cl_device_partition_property_ext list terminators */ + #define CL_PROPERTIES_LIST_END_EXT ((cl_device_partition_property_ext) 0) + #define CL_PARTITION_BY_COUNTS_LIST_END_EXT ((cl_device_partition_property_ext) 0) + #define CL_PARTITION_BY_NAMES_LIST_END_EXT ((cl_device_partition_property_ext) 0 - 1) + + + +#endif /* CL_VERSION_1_1 */ + +#ifdef __cplusplus +} +#endif + + +#endif /* __CL_EXT_H */ diff --git a/socl/src/CL/cl_gl.h b/socl/src/CL/cl_gl.h new file mode 100644 index 0000000..9dc6e85 --- /dev/null +++ b/socl/src/CL/cl_gl.h @@ -0,0 +1,165 @@ +/********************************************************************************** + * Copyright (c) 2011 The Khronos Group Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and/or associated documentation files (the + * "Materials"), to deal in the Materials without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Materials, and to + * permit persons to whom the Materials are furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Materials. + * + * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. + **********************************************************************************/ + +#ifndef __OPENCL_CL_GL_H +#define __OPENCL_CL_GL_H + +#ifdef __APPLE__ +#include +#else +#include "cl.h" +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +typedef cl_uint cl_gl_object_type; +typedef cl_uint cl_gl_texture_info; +typedef cl_uint cl_gl_platform_info; +typedef struct __GLsync *cl_GLsync; + +/* cl_gl_object_type = 0x2000 - 0x200F enum values are currently taken */ +#define CL_GL_OBJECT_BUFFER 0x2000 +#define CL_GL_OBJECT_TEXTURE2D 0x2001 +#define CL_GL_OBJECT_TEXTURE3D 0x2002 +#define CL_GL_OBJECT_RENDERBUFFER 0x2003 +#define CL_GL_OBJECT_TEXTURE2D_ARRAY 0x200E +#define CL_GL_OBJECT_TEXTURE1D 0x200F +#define CL_GL_OBJECT_TEXTURE1D_ARRAY 0x2010 +#define CL_GL_OBJECT_TEXTURE_BUFFER 0x2011 + +/* cl_gl_texture_info */ +#define CL_GL_TEXTURE_TARGET 0x2004 +#define CL_GL_MIPMAP_LEVEL 0x2005 + + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateFromGLBuffer(cl_context /* context */, + cl_mem_flags /* flags */, + cl_GLuint /* bufobj */, + int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateFromGLTexture(cl_context /* context */, + cl_mem_flags /* flags */, + cl_GLenum /* target */, + cl_GLint /* miplevel */, + cl_GLuint /* texture */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateFromGLRenderbuffer(cl_context /* context */, + cl_mem_flags /* flags */, + cl_GLuint /* renderbuffer */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetGLObjectInfo(cl_mem /* memobj */, + cl_gl_object_type * /* gl_object_type */, + cl_GLuint * /* gl_object_name */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetGLTextureInfo(cl_mem /* memobj */, + cl_gl_texture_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueAcquireGLObjects(cl_command_queue /* command_queue */, + cl_uint /* num_objects */, + const cl_mem * /* mem_objects */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueReleaseGLObjects(cl_command_queue /* command_queue */, + cl_uint /* num_objects */, + const cl_mem * /* mem_objects */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + + +#ifdef CL_USE_DEPRECATED_OPENCL_1_1_APIS +#ifndef BUILDING_SOCL +#warning CL_USE_DEPRECATED_OPENCL_1_1_APIS is defined. These APIs are unsupported and untested in OpenCL 1.2! +#endif + extern CL_API_ENTRY cl_mem CL_API_CALL + clCreateFromGLTexture2D(cl_context /* context */, + cl_mem_flags /* flags */, + cl_GLenum /* target */, + cl_GLint /* miplevel */, + cl_GLuint /* texture */, + cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + + extern CL_API_ENTRY cl_mem CL_API_CALL + clCreateFromGLTexture3D(cl_context /* context */, + cl_mem_flags /* flags */, + cl_GLenum /* target */, + cl_GLint /* miplevel */, + cl_GLuint /* texture */, + cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; +#endif /* CL_USE_DEPRECATED_OPENCL_1_2_APIS */ + +/* cl_khr_gl_sharing extension */ + +#define cl_khr_gl_sharing 1 + +typedef cl_uint cl_gl_context_info; + +/* Additional Error Codes */ +#define CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR -1000 + +/* cl_gl_context_info */ +#define CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR 0x2006 +#define CL_DEVICES_FOR_GL_CONTEXT_KHR 0x2007 + +/* Additional cl_context_properties */ +#define CL_GL_CONTEXT_KHR 0x2008 +#define CL_EGL_DISPLAY_KHR 0x2009 +#define CL_GLX_DISPLAY_KHR 0x200A +#define CL_WGL_HDC_KHR 0x200B +#define CL_CGL_SHAREGROUP_KHR 0x200C + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetGLContextInfoKHR(const cl_context_properties * /* properties */, + cl_gl_context_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetGLContextInfoKHR_fn)( + const cl_context_properties * properties, + cl_gl_context_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret); + +#ifdef __cplusplus +} +#endif + +#endif /* __OPENCL_CL_GL_H */ diff --git a/socl/src/CL/cl_gl_ext.h b/socl/src/CL/cl_gl_ext.h new file mode 100644 index 0000000..3970e88 --- /dev/null +++ b/socl/src/CL/cl_gl_ext.h @@ -0,0 +1,69 @@ +/********************************************************************************** + * Copyright (c) 2008-2010 The Khronos Group Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and/or associated documentation files (the + * "Materials"), to deal in the Materials without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Materials, and to + * permit persons to whom the Materials are furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Materials. + * + * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. + **********************************************************************************/ + +/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */ + +/* cl_gl_ext.h contains vendor (non-KHR) OpenCL extensions which have */ +/* OpenGL dependencies. */ + +#ifndef __OPENCL_CL_GL_EXT_H +#define __OPENCL_CL_GL_EXT_H + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef __APPLE__ + #include +#else + #include "cl_gl.h" +#endif + +/* + * For each extension, follow this template + * cl_VEN_extname extension */ +/* #define cl_VEN_extname 1 + * ... define new types, if any + * ... define new tokens, if any + * ... define new APIs, if any + * + * If you need GLtypes here, mirror them with a cl_GLtype, rather than including a GL header + * This allows us to avoid having to decide whether to include GL headers or GLES here. + */ + +/* + * cl_khr_gl_event extension + * See section 9.9 in the OpenCL 1.1 spec for more information + */ +#define CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR 0x200D + +extern CL_API_ENTRY cl_event CL_API_CALL +clCreateEventFromGLsyncKHR(cl_context /* context */, + cl_GLsync /* cl_GLsync */, + cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1; + +#ifdef __cplusplus +} +#endif + +#endif /* __OPENCL_CL_GL_EXT_H */ diff --git a/socl/src/CL/cl_platform.h b/socl/src/CL/cl_platform.h new file mode 100644 index 0000000..983ad61 --- /dev/null +++ b/socl/src/CL/cl_platform.h @@ -0,0 +1,1204 @@ +/********************************************************************************** + * Copyright (c) 2008-2010 The Khronos Group Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and/or associated documentation files (the + * "Materials"), to deal in the Materials without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Materials, and to + * permit persons to whom the Materials are furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Materials. + * + * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. + **********************************************************************************/ + +/* $Revision: 11803 $ on $Date: 2010-06-25 10:02:12 -0700 (Fri, 25 Jun 2010) $ */ + +#ifndef __CL_PLATFORM_H +#define __CL_PLATFORM_H + +#ifdef __APPLE__ + /* Contains #defines for AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER below */ + #include +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +#if defined(_WIN32) + #define CL_API_ENTRY + #define CL_API_CALL __stdcall + #define CL_CALLBACK __stdcall +#else + #define CL_API_ENTRY + #define CL_API_CALL + #define CL_CALLBACK +#endif + +#ifdef __APPLE__ + #define CL_EXTENSION_WEAK_LINK __attribute__((weak_import)) + #define CL_API_SUFFIX__VERSION_1_0 AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER + #define CL_EXT_SUFFIX__VERSION_1_0 CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER + #define CL_API_SUFFIX__VERSION_1_1 CL_EXTENSION_WEAK_LINK + #define CL_EXT_SUFFIX__VERSION_1_1 CL_EXTENSION_WEAK_LINK + #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER + #define CL_API_SUFFIX__VERSION_1_2 CL_EXTENSION_WEAK_LINK + #define CL_EXT_SUFFIX__VERSION_1_2 CL_EXTENSION_WEAK_LINK + #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED CL_EXTENSION_WEAK_LINK +#else + #define CL_EXTENSION_WEAK_LINK + #define CL_API_SUFFIX__VERSION_1_0 + #define CL_EXT_SUFFIX__VERSION_1_0 + #define CL_API_SUFFIX__VERSION_1_1 + #define CL_EXT_SUFFIX__VERSION_1_1 + #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED + #define CL_API_SUFFIX__VERSION_1_2 + #define CL_EXT_SUFFIX__VERSION_1_2 + #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED +#endif + +#if (defined (_WIN32) && defined(_MSC_VER)) + +/* scalar types */ +typedef signed __int8 cl_char; +typedef unsigned __int8 cl_uchar; +typedef signed __int16 cl_short; +typedef unsigned __int16 cl_ushort; +typedef signed __int32 cl_int; +typedef unsigned __int32 cl_uint; +typedef signed __int64 cl_long; +typedef unsigned __int64 cl_ulong; + +typedef unsigned __int16 cl_half; +typedef float cl_float; +typedef double cl_double; + +/* Macro names and corresponding values defined by OpenCL */ +#define CL_CHAR_BIT 8 +#define CL_SCHAR_MAX 127 +#define CL_SCHAR_MIN (-127-1) +#define CL_CHAR_MAX CL_SCHAR_MAX +#define CL_CHAR_MIN CL_SCHAR_MIN +#define CL_UCHAR_MAX 255 +#define CL_SHRT_MAX 32767 +#define CL_SHRT_MIN (-32767-1) +#define CL_USHRT_MAX 65535 +#define CL_INT_MAX 2147483647 +#define CL_INT_MIN (-2147483647-1) +#define CL_UINT_MAX 0xffffffffU +#define CL_LONG_MAX ((cl_long) 0x7FFFFFFFFFFFFFFFLL) +#define CL_LONG_MIN ((cl_long) -0x7FFFFFFFFFFFFFFFLL - 1LL) +#define CL_ULONG_MAX ((cl_ulong) 0xFFFFFFFFFFFFFFFFULL) + +#define CL_FLT_DIG 6 +#define CL_FLT_MANT_DIG 24 +#define CL_FLT_MAX_10_EXP +38 +#define CL_FLT_MAX_EXP +128 +#define CL_FLT_MIN_10_EXP -37 +#define CL_FLT_MIN_EXP -125 +#define CL_FLT_RADIX 2 +#define CL_FLT_MAX 340282346638528859811704183484516925440.0f +#define CL_FLT_MIN 1.175494350822287507969e-38f +#define CL_FLT_EPSILON 0x1.0p-23f + +#define CL_DBL_DIG 15 +#define CL_DBL_MANT_DIG 53 +#define CL_DBL_MAX_10_EXP +308 +#define CL_DBL_MAX_EXP +1024 +#define CL_DBL_MIN_10_EXP -307 +#define CL_DBL_MIN_EXP -1021 +#define CL_DBL_RADIX 2 +#define CL_DBL_MAX 179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.0 +#define CL_DBL_MIN 2.225073858507201383090e-308 +#define CL_DBL_EPSILON 2.220446049250313080847e-16 + +#define CL_M_E 2.718281828459045090796 +#define CL_M_LOG2E 1.442695040888963387005 +#define CL_M_LOG10E 0.434294481903251816668 +#define CL_M_LN2 0.693147180559945286227 +#define CL_M_LN10 2.302585092994045901094 +#define CL_M_PI 3.141592653589793115998 +#define CL_M_PI_2 1.570796326794896557999 +#define CL_M_PI_4 0.785398163397448278999 +#define CL_M_1_PI 0.318309886183790691216 +#define CL_M_2_PI 0.636619772367581382433 +#define CL_M_2_SQRTPI 1.128379167095512558561 +#define CL_M_SQRT2 1.414213562373095145475 +#define CL_M_SQRT1_2 0.707106781186547572737 + +#define CL_M_E_F 2.71828174591064f +#define CL_M_LOG2E_F 1.44269502162933f +#define CL_M_LOG10E_F 0.43429449200630f +#define CL_M_LN2_F 0.69314718246460f +#define CL_M_LN10_F 2.30258512496948f +#define CL_M_PI_F 3.14159274101257f +#define CL_M_PI_2_F 1.57079637050629f +#define CL_M_PI_4_F 0.78539818525314f +#define CL_M_1_PI_F 0.31830987334251f +#define CL_M_2_PI_F 0.63661974668503f +#define CL_M_2_SQRTPI_F 1.12837922573090f +#define CL_M_SQRT2_F 1.41421353816986f +#define CL_M_SQRT1_2_F 0.70710676908493f + +#define CL_NAN (CL_INFINITY - CL_INFINITY) +#define CL_HUGE_VALF ((cl_float) 1e50) +#define CL_HUGE_VAL ((cl_double) 1e500) +#define CL_MAXFLOAT CL_FLT_MAX +#define CL_INFINITY CL_HUGE_VALF + +#else + +#include + +/* scalar types */ +typedef int8_t cl_char; +typedef uint8_t cl_uchar; +typedef int16_t cl_short __attribute__((aligned(2))); +typedef uint16_t cl_ushort __attribute__((aligned(2))); +typedef int32_t cl_int __attribute__((aligned(4))); +typedef uint32_t cl_uint __attribute__((aligned(4))); +typedef int64_t cl_long __attribute__((aligned(8))); +typedef uint64_t cl_ulong __attribute__((aligned(8))); + +typedef uint16_t cl_half __attribute__((aligned(2))); +typedef float cl_float __attribute__((aligned(4))); +typedef double cl_double __attribute__((aligned(8))); + +/* Macro names and corresponding values defined by OpenCL */ +#define CL_CHAR_BIT 8 +#define CL_SCHAR_MAX 127 +#define CL_SCHAR_MIN (-127-1) +#define CL_CHAR_MAX CL_SCHAR_MAX +#define CL_CHAR_MIN CL_SCHAR_MIN +#define CL_UCHAR_MAX 255 +#define CL_SHRT_MAX 32767 +#define CL_SHRT_MIN (-32767-1) +#define CL_USHRT_MAX 65535 +#define CL_INT_MAX 2147483647 +#define CL_INT_MIN (-2147483647-1) +#define CL_UINT_MAX 0xffffffffU +#define CL_LONG_MAX ((cl_long) 0x7FFFFFFFFFFFFFFFLL) +#define CL_LONG_MIN ((cl_long) -0x7FFFFFFFFFFFFFFFLL - 1LL) +#define CL_ULONG_MAX ((cl_ulong) 0xFFFFFFFFFFFFFFFFULL) + +#define CL_FLT_DIG 6 +#define CL_FLT_MANT_DIG 24 +#define CL_FLT_MAX_10_EXP +38 +#define CL_FLT_MAX_EXP +128 +#define CL_FLT_MIN_10_EXP -37 +#define CL_FLT_MIN_EXP -125 +#define CL_FLT_RADIX 2 +#define CL_FLT_MAX 0x1.fffffep127f +#define CL_FLT_MIN 0x1.0p-126f +#define CL_FLT_EPSILON 0x1.0p-23f + +#define CL_DBL_DIG 15 +#define CL_DBL_MANT_DIG 53 +#define CL_DBL_MAX_10_EXP +308 +#define CL_DBL_MAX_EXP +1024 +#define CL_DBL_MIN_10_EXP -307 +#define CL_DBL_MIN_EXP -1021 +#define CL_DBL_RADIX 2 +#define CL_DBL_MAX 0x1.fffffffffffffp1023 +#define CL_DBL_MIN 0x1.0p-1022 +#define CL_DBL_EPSILON 0x1.0p-52 + +#define CL_M_E 2.718281828459045090796 +#define CL_M_LOG2E 1.442695040888963387005 +#define CL_M_LOG10E 0.434294481903251816668 +#define CL_M_LN2 0.693147180559945286227 +#define CL_M_LN10 2.302585092994045901094 +#define CL_M_PI 3.141592653589793115998 +#define CL_M_PI_2 1.570796326794896557999 +#define CL_M_PI_4 0.785398163397448278999 +#define CL_M_1_PI 0.318309886183790691216 +#define CL_M_2_PI 0.636619772367581382433 +#define CL_M_2_SQRTPI 1.128379167095512558561 +#define CL_M_SQRT2 1.414213562373095145475 +#define CL_M_SQRT1_2 0.707106781186547572737 + +#define CL_M_E_F 2.71828174591064f +#define CL_M_LOG2E_F 1.44269502162933f +#define CL_M_LOG10E_F 0.43429449200630f +#define CL_M_LN2_F 0.69314718246460f +#define CL_M_LN10_F 2.30258512496948f +#define CL_M_PI_F 3.14159274101257f +#define CL_M_PI_2_F 1.57079637050629f +#define CL_M_PI_4_F 0.78539818525314f +#define CL_M_1_PI_F 0.31830987334251f +#define CL_M_2_PI_F 0.63661974668503f +#define CL_M_2_SQRTPI_F 1.12837922573090f +#define CL_M_SQRT2_F 1.41421353816986f +#define CL_M_SQRT1_2_F 0.70710676908493f + +#if defined( __GNUC__ ) + #define CL_HUGE_VALF __builtin_huge_valf() + #define CL_HUGE_VAL __builtin_huge_val() + #define CL_NAN __builtin_nanf( "" ) +#else + #define CL_HUGE_VALF ((cl_float) 1e50) + #define CL_HUGE_VAL ((cl_double) 1e500) + float nanf( const char * ); + #define CL_NAN nanf( "" ) +#endif +#define CL_MAXFLOAT CL_FLT_MAX +#define CL_INFINITY CL_HUGE_VALF + +#endif + +#include + +/* Mirror types to GL types. Mirror types allow us to avoid deciding which headers to load based on whether we are using GL or GLES here. */ +typedef unsigned int cl_GLuint; +typedef int cl_GLint; +typedef unsigned int cl_GLenum; + +/* + * Vector types + * + * Note: OpenCL requires that all types be naturally aligned. + * This means that vector types must be naturally aligned. + * For example, a vector of four floats must be aligned to + * a 16 byte boundary (calculated as 4 * the natural 4-byte + * alignment of the float). The alignment qualifiers here + * will only function properly if your compiler supports them + * and if you don't actively work to defeat them. For example, + * in order for a cl_float4 to be 16 byte aligned in a struct, + * the start of the struct must itself be 16-byte aligned. + * + * Maintaining proper alignment is the user's responsibility. + */ + +/* Define basic vector types */ +#if defined( __VEC__ ) + #include /* may be omitted depending on compiler. AltiVec spec provides no way to detect whether the header is required. */ + typedef vector unsigned char __cl_uchar16; + typedef vector signed char __cl_char16; + typedef vector unsigned short __cl_ushort8; + typedef vector signed short __cl_short8; + typedef vector unsigned int __cl_uint4; + typedef vector signed int __cl_int4; + typedef vector float __cl_float4; + #define __CL_UCHAR16__ 1 + #define __CL_CHAR16__ 1 + #define __CL_USHORT8__ 1 + #define __CL_SHORT8__ 1 + #define __CL_UINT4__ 1 + #define __CL_INT4__ 1 + #define __CL_FLOAT4__ 1 +#endif + +#if defined( __SSE__ ) + #if defined( __MINGW64__ ) + #include + #else + #include + #endif + #if defined( __GNUC__ ) + typedef float __cl_float4 __attribute__((vector_size(16))); + #else + typedef __m128 __cl_float4; + #endif + #define __CL_FLOAT4__ 1 +#endif + +#if defined( __SSE2__ ) + #if defined( __MINGW64__ ) + #include + #else + #include + #endif + #if defined( __GNUC__ ) + typedef cl_uchar __cl_uchar16 __attribute__((vector_size(16))); + typedef cl_char __cl_char16 __attribute__((vector_size(16))); + typedef cl_ushort __cl_ushort8 __attribute__((vector_size(16))); + typedef cl_short __cl_short8 __attribute__((vector_size(16))); + typedef cl_uint __cl_uint4 __attribute__((vector_size(16))); + typedef cl_int __cl_int4 __attribute__((vector_size(16))); + typedef cl_ulong __cl_ulong2 __attribute__((vector_size(16))); + typedef cl_long __cl_long2 __attribute__((vector_size(16))); + typedef cl_double __cl_double2 __attribute__((vector_size(16))); + #else + typedef __m128i __cl_uchar16; + typedef __m128i __cl_char16; + typedef __m128i __cl_ushort8; + typedef __m128i __cl_short8; + typedef __m128i __cl_uint4; + typedef __m128i __cl_int4; + typedef __m128i __cl_ulong2; + typedef __m128i __cl_long2; + typedef __m128d __cl_double2; + #endif + #define __CL_UCHAR16__ 1 + #define __CL_CHAR16__ 1 + #define __CL_USHORT8__ 1 + #define __CL_SHORT8__ 1 + #define __CL_INT4__ 1 + #define __CL_UINT4__ 1 + #define __CL_ULONG2__ 1 + #define __CL_LONG2__ 1 + #define __CL_DOUBLE2__ 1 +#endif + +#if defined( __MMX__ ) + #include + #if defined( __GNUC__ ) + typedef cl_uchar __cl_uchar8 __attribute__((vector_size(8))); + typedef cl_char __cl_char8 __attribute__((vector_size(8))); + typedef cl_ushort __cl_ushort4 __attribute__((vector_size(8))); + typedef cl_short __cl_short4 __attribute__((vector_size(8))); + typedef cl_uint __cl_uint2 __attribute__((vector_size(8))); + typedef cl_int __cl_int2 __attribute__((vector_size(8))); + typedef cl_ulong __cl_ulong1 __attribute__((vector_size(8))); + typedef cl_long __cl_long1 __attribute__((vector_size(8))); + typedef cl_float __cl_float2 __attribute__((vector_size(8))); + #else + typedef __m64 __cl_uchar8; + typedef __m64 __cl_char8; + typedef __m64 __cl_ushort4; + typedef __m64 __cl_short4; + typedef __m64 __cl_uint2; + typedef __m64 __cl_int2; + typedef __m64 __cl_ulong1; + typedef __m64 __cl_long1; + typedef __m64 __cl_float2; + #endif + #define __CL_UCHAR8__ 1 + #define __CL_CHAR8__ 1 + #define __CL_USHORT4__ 1 + #define __CL_SHORT4__ 1 + #define __CL_INT2__ 1 + #define __CL_UINT2__ 1 + #define __CL_ULONG1__ 1 + #define __CL_LONG1__ 1 + #define __CL_FLOAT2__ 1 +#endif + +#if defined( __AVX__ ) + #if defined( __MINGW64__ ) + #include + #else + #include + #endif + #if defined( __GNUC__ ) + typedef cl_float __cl_float8 __attribute__((vector_size(32))); + typedef cl_double __cl_double4 __attribute__((vector_size(32))); + #else + typedef __m256 __cl_float8; + typedef __m256d __cl_double4; + #endif + #define __CL_FLOAT8__ 1 + #define __CL_DOUBLE4__ 1 +#endif + +/* Define alignment keys */ +#if defined( __GNUC__ ) + #define CL_ALIGNED(_x) __attribute__ ((aligned(_x))) +#elif defined( _WIN32) && (_MSC_VER) + /* Alignment keys neutered on windows because MSVC can't swallow function arguments with alignment requirements */ + /* http://msdn.microsoft.com/en-us/library/373ak2y1%28VS.71%29.aspx */ + /* #include */ + /* #define CL_ALIGNED(_x) _CRT_ALIGN(_x) */ + #define CL_ALIGNED(_x) +#else + #warning Need to implement some method to align data here + #define CL_ALIGNED(_x) +#endif + +/* Indicate whether .xyzw, .s0123 and .hi.lo are supported */ +#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) + /* .xyzw and .s0123...{f|F} are supported */ + #define CL_HAS_NAMED_VECTOR_FIELDS 1 + /* .hi and .lo are supported */ + #define CL_HAS_HI_LO_VECTOR_FIELDS 1 +#endif + +/* Define cl_vector types */ + +/* ---- cl_charn ---- */ +typedef union +{ + cl_char CL_ALIGNED(2) s[2]; +#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) + __extension__ struct{ cl_char x, y; }; + __extension__ struct{ cl_char s0, s1; }; + __extension__ struct{ cl_char lo, hi; }; +#endif +#if defined( __CL_CHAR2__) + __cl_char2 v2; +#endif +}cl_char2; + +typedef union +{ + cl_char CL_ALIGNED(4) s[4]; +#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) + __extension__ struct{ cl_char x, y, z, w; }; + __extension__ struct{ cl_char s0, s1, s2, s3; }; + __extension__ struct{ cl_char2 lo, hi; }; +#endif +#if defined( __CL_CHAR2__) + __cl_char2 v2[2]; +#endif +#if defined( __CL_CHAR4__) + __cl_char4 v4; +#endif +}cl_char4; + +/* cl_char3 is identical in size, alignment and behavior to cl_char4. See section 6.1.5. */ +typedef cl_char4 cl_char3; + +typedef union +{ + cl_char CL_ALIGNED(8) s[8]; +#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) + __extension__ struct{ cl_char x, y, z, w; }; + __extension__ struct{ cl_char s0, s1, s2, s3, s4, s5, s6, s7; }; + __extension__ struct{ cl_char4 lo, hi; }; +#endif +#if defined( __CL_CHAR2__) + __cl_char2 v2[4]; +#endif +#if defined( __CL_CHAR4__) + __cl_char4 v4[2]; +#endif +#if defined( __CL_CHAR8__ ) + __cl_char8 v8; +#endif +}cl_char8; + +typedef union +{ + cl_char CL_ALIGNED(16) s[16]; +#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) + __extension__ struct{ cl_char x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __extension__ struct{ cl_char s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __extension__ struct{ cl_char8 lo, hi; }; +#endif +#if defined( __CL_CHAR2__) + __cl_char2 v2[8]; +#endif +#if defined( __CL_CHAR4__) + __cl_char4 v4[4]; +#endif +#if defined( __CL_CHAR8__ ) + __cl_char8 v8[2]; +#endif +#if defined( __CL_CHAR16__ ) + __cl_char16 v16; +#endif +}cl_char16; + + +/* ---- cl_ucharn ---- */ +typedef union +{ + cl_uchar CL_ALIGNED(2) s[2]; +#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) + __extension__ struct{ cl_uchar x, y; }; + __extension__ struct{ cl_uchar s0, s1; }; + __extension__ struct{ cl_uchar lo, hi; }; +#endif +#if defined( __cl_uchar2__) + __cl_uchar2 v2; +#endif +}cl_uchar2; + +typedef union +{ + cl_uchar CL_ALIGNED(4) s[4]; +#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) + __extension__ struct{ cl_uchar x, y, z, w; }; + __extension__ struct{ cl_uchar s0, s1, s2, s3; }; + __extension__ struct{ cl_uchar2 lo, hi; }; +#endif +#if defined( __CL_UCHAR2__) + __cl_uchar2 v2[2]; +#endif +#if defined( __CL_UCHAR4__) + __cl_uchar4 v4; +#endif +}cl_uchar4; + +/* cl_uchar3 is identical in size, alignment and behavior to cl_uchar4. See section 6.1.5. */ +typedef cl_uchar4 cl_uchar3; + +typedef union +{ + cl_uchar CL_ALIGNED(8) s[8]; +#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) + __extension__ struct{ cl_uchar x, y, z, w; }; + __extension__ struct{ cl_uchar s0, s1, s2, s3, s4, s5, s6, s7; }; + __extension__ struct{ cl_uchar4 lo, hi; }; +#endif +#if defined( __CL_UCHAR2__) + __cl_uchar2 v2[4]; +#endif +#if defined( __CL_UCHAR4__) + __cl_uchar4 v4[2]; +#endif +#if defined( __CL_UCHAR8__ ) + __cl_uchar8 v8; +#endif +}cl_uchar8; + +typedef union +{ + cl_uchar CL_ALIGNED(16) s[16]; +#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) + __extension__ struct{ cl_uchar x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __extension__ struct{ cl_uchar s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __extension__ struct{ cl_uchar8 lo, hi; }; +#endif +#if defined( __CL_UCHAR2__) + __cl_uchar2 v2[8]; +#endif +#if defined( __CL_UCHAR4__) + __cl_uchar4 v4[4]; +#endif +#if defined( __CL_UCHAR8__ ) + __cl_uchar8 v8[2]; +#endif +#if defined( __CL_UCHAR16__ ) + __cl_uchar16 v16; +#endif +}cl_uchar16; + + +/* ---- cl_shortn ---- */ +typedef union +{ + cl_short CL_ALIGNED(4) s[2]; +#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) + __extension__ struct{ cl_short x, y; }; + __extension__ struct{ cl_short s0, s1; }; + __extension__ struct{ cl_short lo, hi; }; +#endif +#if defined( __CL_SHORT2__) + __cl_short2 v2; +#endif +}cl_short2; + +typedef union +{ + cl_short CL_ALIGNED(8) s[4]; +#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) + __extension__ struct{ cl_short x, y, z, w; }; + __extension__ struct{ cl_short s0, s1, s2, s3; }; + __extension__ struct{ cl_short2 lo, hi; }; +#endif +#if defined( __CL_SHORT2__) + __cl_short2 v2[2]; +#endif +#if defined( __CL_SHORT4__) + __cl_short4 v4; +#endif +}cl_short4; + +/* cl_short3 is identical in size, alignment and behavior to cl_short4. See section 6.1.5. */ +typedef cl_short4 cl_short3; + +typedef union +{ + cl_short CL_ALIGNED(16) s[8]; +#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) + __extension__ struct{ cl_short x, y, z, w; }; + __extension__ struct{ cl_short s0, s1, s2, s3, s4, s5, s6, s7; }; + __extension__ struct{ cl_short4 lo, hi; }; +#endif +#if defined( __CL_SHORT2__) + __cl_short2 v2[4]; +#endif +#if defined( __CL_SHORT4__) + __cl_short4 v4[2]; +#endif +#if defined( __CL_SHORT8__ ) + __cl_short8 v8; +#endif +}cl_short8; + +typedef union +{ + cl_short CL_ALIGNED(32) s[16]; +#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) + __extension__ struct{ cl_short x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __extension__ struct{ cl_short s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __extension__ struct{ cl_short8 lo, hi; }; +#endif +#if defined( __CL_SHORT2__) + __cl_short2 v2[8]; +#endif +#if defined( __CL_SHORT4__) + __cl_short4 v4[4]; +#endif +#if defined( __CL_SHORT8__ ) + __cl_short8 v8[2]; +#endif +#if defined( __CL_SHORT16__ ) + __cl_short16 v16; +#endif +}cl_short16; + + +/* ---- cl_ushortn ---- */ +typedef union +{ + cl_ushort CL_ALIGNED(4) s[2]; +#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) + __extension__ struct{ cl_ushort x, y; }; + __extension__ struct{ cl_ushort s0, s1; }; + __extension__ struct{ cl_ushort lo, hi; }; +#endif +#if defined( __CL_USHORT2__) + __cl_ushort2 v2; +#endif +}cl_ushort2; + +typedef union +{ + cl_ushort CL_ALIGNED(8) s[4]; +#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) + __extension__ struct{ cl_ushort x, y, z, w; }; + __extension__ struct{ cl_ushort s0, s1, s2, s3; }; + __extension__ struct{ cl_ushort2 lo, hi; }; +#endif +#if defined( __CL_USHORT2__) + __cl_ushort2 v2[2]; +#endif +#if defined( __CL_USHORT4__) + __cl_ushort4 v4; +#endif +}cl_ushort4; + +/* cl_ushort3 is identical in size, alignment and behavior to cl_ushort4. See section 6.1.5. */ +typedef cl_ushort4 cl_ushort3; + +typedef union +{ + cl_ushort CL_ALIGNED(16) s[8]; +#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) + __extension__ struct{ cl_ushort x, y, z, w; }; + __extension__ struct{ cl_ushort s0, s1, s2, s3, s4, s5, s6, s7; }; + __extension__ struct{ cl_ushort4 lo, hi; }; +#endif +#if defined( __CL_USHORT2__) + __cl_ushort2 v2[4]; +#endif +#if defined( __CL_USHORT4__) + __cl_ushort4 v4[2]; +#endif +#if defined( __CL_USHORT8__ ) + __cl_ushort8 v8; +#endif +}cl_ushort8; + +typedef union +{ + cl_ushort CL_ALIGNED(32) s[16]; +#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) + __extension__ struct{ cl_ushort x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __extension__ struct{ cl_ushort s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __extension__ struct{ cl_ushort8 lo, hi; }; +#endif +#if defined( __CL_USHORT2__) + __cl_ushort2 v2[8]; +#endif +#if defined( __CL_USHORT4__) + __cl_ushort4 v4[4]; +#endif +#if defined( __CL_USHORT8__ ) + __cl_ushort8 v8[2]; +#endif +#if defined( __CL_USHORT16__ ) + __cl_ushort16 v16; +#endif +}cl_ushort16; + +/* ---- cl_intn ---- */ +typedef union +{ + cl_int CL_ALIGNED(8) s[2]; +#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) + __extension__ struct{ cl_int x, y; }; + __extension__ struct{ cl_int s0, s1; }; + __extension__ struct{ cl_int lo, hi; }; +#endif +#if defined( __CL_INT2__) + __cl_int2 v2; +#endif +}cl_int2; + +typedef union +{ + cl_int CL_ALIGNED(16) s[4]; +#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) + __extension__ struct{ cl_int x, y, z, w; }; + __extension__ struct{ cl_int s0, s1, s2, s3; }; + __extension__ struct{ cl_int2 lo, hi; }; +#endif +#if defined( __CL_INT2__) + __cl_int2 v2[2]; +#endif +#if defined( __CL_INT4__) + __cl_int4 v4; +#endif +}cl_int4; + +/* cl_int3 is identical in size, alignment and behavior to cl_int4. See section 6.1.5. */ +typedef cl_int4 cl_int3; + +typedef union +{ + cl_int CL_ALIGNED(32) s[8]; +#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) + __extension__ struct{ cl_int x, y, z, w; }; + __extension__ struct{ cl_int s0, s1, s2, s3, s4, s5, s6, s7; }; + __extension__ struct{ cl_int4 lo, hi; }; +#endif +#if defined( __CL_INT2__) + __cl_int2 v2[4]; +#endif +#if defined( __CL_INT4__) + __cl_int4 v4[2]; +#endif +#if defined( __CL_INT8__ ) + __cl_int8 v8; +#endif +}cl_int8; + +typedef union +{ + cl_int CL_ALIGNED(64) s[16]; +#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) + __extension__ struct{ cl_int x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __extension__ struct{ cl_int s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __extension__ struct{ cl_int8 lo, hi; }; +#endif +#if defined( __CL_INT2__) + __cl_int2 v2[8]; +#endif +#if defined( __CL_INT4__) + __cl_int4 v4[4]; +#endif +#if defined( __CL_INT8__ ) + __cl_int8 v8[2]; +#endif +#if defined( __CL_INT16__ ) + __cl_int16 v16; +#endif +}cl_int16; + + +/* ---- cl_uintn ---- */ +typedef union +{ + cl_uint CL_ALIGNED(8) s[2]; +#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) + __extension__ struct{ cl_uint x, y; }; + __extension__ struct{ cl_uint s0, s1; }; + __extension__ struct{ cl_uint lo, hi; }; +#endif +#if defined( __CL_UINT2__) + __cl_uint2 v2; +#endif +}cl_uint2; + +typedef union +{ + cl_uint CL_ALIGNED(16) s[4]; +#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) + __extension__ struct{ cl_uint x, y, z, w; }; + __extension__ struct{ cl_uint s0, s1, s2, s3; }; + __extension__ struct{ cl_uint2 lo, hi; }; +#endif +#if defined( __CL_UINT2__) + __cl_uint2 v2[2]; +#endif +#if defined( __CL_UINT4__) + __cl_uint4 v4; +#endif +}cl_uint4; + +/* cl_uint3 is identical in size, alignment and behavior to cl_uint4. See section 6.1.5. */ +typedef cl_uint4 cl_uint3; + +typedef union +{ + cl_uint CL_ALIGNED(32) s[8]; +#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) + __extension__ struct{ cl_uint x, y, z, w; }; + __extension__ struct{ cl_uint s0, s1, s2, s3, s4, s5, s6, s7; }; + __extension__ struct{ cl_uint4 lo, hi; }; +#endif +#if defined( __CL_UINT2__) + __cl_uint2 v2[4]; +#endif +#if defined( __CL_UINT4__) + __cl_uint4 v4[2]; +#endif +#if defined( __CL_UINT8__ ) + __cl_uint8 v8; +#endif +}cl_uint8; + +typedef union +{ + cl_uint CL_ALIGNED(64) s[16]; +#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) + __extension__ struct{ cl_uint x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __extension__ struct{ cl_uint s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __extension__ struct{ cl_uint8 lo, hi; }; +#endif +#if defined( __CL_UINT2__) + __cl_uint2 v2[8]; +#endif +#if defined( __CL_UINT4__) + __cl_uint4 v4[4]; +#endif +#if defined( __CL_UINT8__ ) + __cl_uint8 v8[2]; +#endif +#if defined( __CL_UINT16__ ) + __cl_uint16 v16; +#endif +}cl_uint16; + +/* ---- cl_longn ---- */ +typedef union +{ + cl_long CL_ALIGNED(16) s[2]; +#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) + __extension__ struct{ cl_long x, y; }; + __extension__ struct{ cl_long s0, s1; }; + __extension__ struct{ cl_long lo, hi; }; +#endif +#if defined( __CL_LONG2__) + __cl_long2 v2; +#endif +}cl_long2; + +typedef union +{ + cl_long CL_ALIGNED(32) s[4]; +#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) + __extension__ struct{ cl_long x, y, z, w; }; + __extension__ struct{ cl_long s0, s1, s2, s3; }; + __extension__ struct{ cl_long2 lo, hi; }; +#endif +#if defined( __CL_LONG2__) + __cl_long2 v2[2]; +#endif +#if defined( __CL_LONG4__) + __cl_long4 v4; +#endif +}cl_long4; + +/* cl_long3 is identical in size, alignment and behavior to cl_long4. See section 6.1.5. */ +typedef cl_long4 cl_long3; + +typedef union +{ + cl_long CL_ALIGNED(64) s[8]; +#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) + __extension__ struct{ cl_long x, y, z, w; }; + __extension__ struct{ cl_long s0, s1, s2, s3, s4, s5, s6, s7; }; + __extension__ struct{ cl_long4 lo, hi; }; +#endif +#if defined( __CL_LONG2__) + __cl_long2 v2[4]; +#endif +#if defined( __CL_LONG4__) + __cl_long4 v4[2]; +#endif +#if defined( __CL_LONG8__ ) + __cl_long8 v8; +#endif +}cl_long8; + +typedef union +{ + cl_long CL_ALIGNED(128) s[16]; +#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) + __extension__ struct{ cl_long x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __extension__ struct{ cl_long s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __extension__ struct{ cl_long8 lo, hi; }; +#endif +#if defined( __CL_LONG2__) + __cl_long2 v2[8]; +#endif +#if defined( __CL_LONG4__) + __cl_long4 v4[4]; +#endif +#if defined( __CL_LONG8__ ) + __cl_long8 v8[2]; +#endif +#if defined( __CL_LONG16__ ) + __cl_long16 v16; +#endif +}cl_long16; + + +/* ---- cl_ulongn ---- */ +typedef union +{ + cl_ulong CL_ALIGNED(16) s[2]; +#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) + __extension__ struct{ cl_ulong x, y; }; + __extension__ struct{ cl_ulong s0, s1; }; + __extension__ struct{ cl_ulong lo, hi; }; +#endif +#if defined( __CL_ULONG2__) + __cl_ulong2 v2; +#endif +}cl_ulong2; + +typedef union +{ + cl_ulong CL_ALIGNED(32) s[4]; +#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) + __extension__ struct{ cl_ulong x, y, z, w; }; + __extension__ struct{ cl_ulong s0, s1, s2, s3; }; + __extension__ struct{ cl_ulong2 lo, hi; }; +#endif +#if defined( __CL_ULONG2__) + __cl_ulong2 v2[2]; +#endif +#if defined( __CL_ULONG4__) + __cl_ulong4 v4; +#endif +}cl_ulong4; + +/* cl_ulong3 is identical in size, alignment and behavior to cl_ulong4. See section 6.1.5. */ +typedef cl_ulong4 cl_ulong3; + +typedef union +{ + cl_ulong CL_ALIGNED(64) s[8]; +#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) + __extension__ struct{ cl_ulong x, y, z, w; }; + __extension__ struct{ cl_ulong s0, s1, s2, s3, s4, s5, s6, s7; }; + __extension__ struct{ cl_ulong4 lo, hi; }; +#endif +#if defined( __CL_ULONG2__) + __cl_ulong2 v2[4]; +#endif +#if defined( __CL_ULONG4__) + __cl_ulong4 v4[2]; +#endif +#if defined( __CL_ULONG8__ ) + __cl_ulong8 v8; +#endif +}cl_ulong8; + +typedef union +{ + cl_ulong CL_ALIGNED(128) s[16]; +#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) + __extension__ struct{ cl_ulong x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __extension__ struct{ cl_ulong s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __extension__ struct{ cl_ulong8 lo, hi; }; +#endif +#if defined( __CL_ULONG2__) + __cl_ulong2 v2[8]; +#endif +#if defined( __CL_ULONG4__) + __cl_ulong4 v4[4]; +#endif +#if defined( __CL_ULONG8__ ) + __cl_ulong8 v8[2]; +#endif +#if defined( __CL_ULONG16__ ) + __cl_ulong16 v16; +#endif +}cl_ulong16; + + +/* --- cl_floatn ---- */ + +typedef union +{ + cl_float CL_ALIGNED(8) s[2]; +#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) + __extension__ struct{ cl_float x, y; }; + __extension__ struct{ cl_float s0, s1; }; + __extension__ struct{ cl_float lo, hi; }; +#endif +#if defined( __CL_FLOAT2__) + __cl_float2 v2; +#endif +}cl_float2; + +typedef union +{ + cl_float CL_ALIGNED(16) s[4]; +#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) + __extension__ struct{ cl_float x, y, z, w; }; + __extension__ struct{ cl_float s0, s1, s2, s3; }; + __extension__ struct{ cl_float2 lo, hi; }; +#endif +#if defined( __CL_FLOAT2__) + __cl_float2 v2[2]; +#endif +#if defined( __CL_FLOAT4__) + __cl_float4 v4; +#endif +}cl_float4; + +/* cl_float3 is identical in size, alignment and behavior to cl_float4. See section 6.1.5. */ +typedef cl_float4 cl_float3; + +typedef union +{ + cl_float CL_ALIGNED(32) s[8]; +#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) + __extension__ struct{ cl_float x, y, z, w; }; + __extension__ struct{ cl_float s0, s1, s2, s3, s4, s5, s6, s7; }; + __extension__ struct{ cl_float4 lo, hi; }; +#endif +#if defined( __CL_FLOAT2__) + __cl_float2 v2[4]; +#endif +#if defined( __CL_FLOAT4__) + __cl_float4 v4[2]; +#endif +#if defined( __CL_FLOAT8__ ) + __cl_float8 v8; +#endif +}cl_float8; + +typedef union +{ + cl_float CL_ALIGNED(64) s[16]; +#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) + __extension__ struct{ cl_float x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __extension__ struct{ cl_float s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __extension__ struct{ cl_float8 lo, hi; }; +#endif +#if defined( __CL_FLOAT2__) + __cl_float2 v2[8]; +#endif +#if defined( __CL_FLOAT4__) + __cl_float4 v4[4]; +#endif +#if defined( __CL_FLOAT8__ ) + __cl_float8 v8[2]; +#endif +#if defined( __CL_FLOAT16__ ) + __cl_float16 v16; +#endif +}cl_float16; + +/* --- cl_doublen ---- */ + +typedef union +{ + cl_double CL_ALIGNED(16) s[2]; +#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) + __extension__ struct{ cl_double x, y; }; + __extension__ struct{ cl_double s0, s1; }; + __extension__ struct{ cl_double lo, hi; }; +#endif +#if defined( __CL_DOUBLE2__) + __cl_double2 v2; +#endif +}cl_double2; + +typedef union +{ + cl_double CL_ALIGNED(32) s[4]; +#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) + __extension__ struct{ cl_double x, y, z, w; }; + __extension__ struct{ cl_double s0, s1, s2, s3; }; + __extension__ struct{ cl_double2 lo, hi; }; +#endif +#if defined( __CL_DOUBLE2__) + __cl_double2 v2[2]; +#endif +#if defined( __CL_DOUBLE4__) + __cl_double4 v4; +#endif +}cl_double4; + +/* cl_double3 is identical in size, alignment and behavior to cl_double4. See section 6.1.5. */ +typedef cl_double4 cl_double3; + +typedef union +{ + cl_double CL_ALIGNED(64) s[8]; +#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) + __extension__ struct{ cl_double x, y, z, w; }; + __extension__ struct{ cl_double s0, s1, s2, s3, s4, s5, s6, s7; }; + __extension__ struct{ cl_double4 lo, hi; }; +#endif +#if defined( __CL_DOUBLE2__) + __cl_double2 v2[4]; +#endif +#if defined( __CL_DOUBLE4__) + __cl_double4 v4[2]; +#endif +#if defined( __CL_DOUBLE8__ ) + __cl_double8 v8; +#endif +}cl_double8; + +typedef union +{ + cl_double CL_ALIGNED(128) s[16]; +#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) + __extension__ struct{ cl_double x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __extension__ struct{ cl_double s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __extension__ struct{ cl_double8 lo, hi; }; +#endif +#if defined( __CL_DOUBLE2__) + __cl_double2 v2[8]; +#endif +#if defined( __CL_DOUBLE4__) + __cl_double4 v4[4]; +#endif +#if defined( __CL_DOUBLE8__ ) + __cl_double8 v8[2]; +#endif +#if defined( __CL_DOUBLE16__ ) + __cl_double16 v16; +#endif +}cl_double16; + +/* Macro to facilitate debugging + * Usage: + * Place CL_PROGRAM_STRING_DEBUG_INFO on the line before the first line of your source. + * The first line ends with: CL_PROGRAM_STRING_BEGIN \" + * Each line thereafter of OpenCL C source must end with: \n\ + * The last line ends in "; + * + * Example: + * + * const char *my_program = CL_PROGRAM_STRING_BEGIN "\ + * kernel void foo( int a, float * b ) \n\ + * { \n\ + * // my comment \n\ + * *b[ get_global_id(0)] = a; \n\ + * } \n\ + * "; + * + * This should correctly set up the line, (column) and file information for your source + * string so you can do source level debugging. + */ +#define __CL_STRINGIFY( _x ) # _x +#define _CL_STRINGIFY( _x ) __CL_STRINGIFY( _x ) +#define CL_PROGRAM_STRING_DEBUG_INFO "#line " _CL_STRINGIFY(__LINE__) " \"" __FILE__ "\" \n\n" + +#ifdef __cplusplus +} +#endif + +#endif /* __CL_PLATFORM_H */ diff --git a/socl/src/CL/opencl.h b/socl/src/CL/opencl.h new file mode 100644 index 0000000..9f4b722 --- /dev/null +++ b/socl/src/CL/opencl.h @@ -0,0 +1,54 @@ +/******************************************************************************* + * Copyright (c) 2008-2010 The Khronos Group Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and/or associated documentation files (the + * "Materials"), to deal in the Materials without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Materials, and to + * permit persons to whom the Materials are furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Materials. + * + * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. + ******************************************************************************/ + +/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */ + +#ifndef __OPENCL_H +#define __OPENCL_H + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef __APPLE__ + +#include +#include +#include +#include + +#else + +#include "cl.h" +#include "cl_gl.h" +#include "cl_gl_ext.h" +#include "cl_ext.h" + +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* __OPENCL_H */ + diff --git a/socl/src/Makefile.am b/socl/src/Makefile.am new file mode 100644 index 0000000..96d83ce --- /dev/null +++ b/socl/src/Makefile.am @@ -0,0 +1,136 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +include $(top_srcdir)/make/starpu-notests.mk + +CLEANFILES = *.gcno *.gcda + +AM_CPPFLAGS = -DBUILDING_SOCL -I$(top_srcdir)/include/ -I$(top_builddir)/include -I$(top_builddir)/src -I$(top_srcdir)/src -I$(top_srcdir)/socl/src $(STARPU_H_CPPFLAGS) +LIBS += $(top_builddir)/src/@LIBSTARPU_LINK@ $(STARPU_EXPORTED_LIBS) +LIBS += $(STARPU_OPENCL_LDFLAGS) + +SUBDIRS = + +lib_LTLIBRARIES = libsocl-@STARPU_EFFECTIVE_VERSION@.la + +noinst_HEADERS = \ + command.h \ + command_list.h \ + command_queue.h \ + debug.h \ + event.h \ + gc.h \ + getinfo.h \ + mem_objects.h \ + ocl_icd.h \ + socl.h \ + task.h \ + util.h \ + init.h \ + CL/cl_d3d10.h \ + CL/cl_ext.h \ + CL/cl.h \ + CL/cl_d3d11.h \ + CL/cl_gl_ext.h \ + CL/cl_platform.h \ + CL/cl_dx9_media_sharing.h \ + CL/cl_gl.h \ + CL/opencl.h + +libsocl_@STARPU_EFFECTIVE_VERSION@_la_LDFLAGS = $(ldflags) -no-undefined \ + -version-info $(LIBSOCL_INTERFACE_CURRENT):$(LIBSOCL_INTERFACE_REVISION):$(LIBSOCL_INTERFACE_AGE) + +libsocl_@STARPU_EFFECTIVE_VERSION@_la_SOURCES = \ + command.c \ + command_list.c \ + command_queue.c \ + debug.c \ + event.c \ + gc.c \ + init.c \ + mem_objects.c \ + socl.c \ + task.c \ + util.c \ + cl_getplatformids.c \ + cl_getplatforminfo.c \ + cl_getdeviceids.c \ + cl_getdeviceinfo.c \ + cl_releasecontext.c \ + cl_createcontext.c \ + cl_createcontextfromtype.c \ + cl_retaincontext.c \ + cl_getcontextinfo.c \ + cl_releasecommandqueue.c \ + cl_createcommandqueue.c \ + cl_retaincommandqueue.c \ + cl_getcommandqueueinfo.c \ + cl_setcommandqueueproperty.c \ + cl_releaseevent.c \ + cl_waitforevents.c \ + cl_geteventinfo.c \ + cl_retainevent.c \ + cl_enqueuemarker.c \ + cl_enqueuewaitforevents.c \ + cl_enqueuebarrier.c \ + cl_flush.c \ + cl_finish.c \ + cl_releasememobject.c \ + cl_createbuffer.c \ + cl_createimage2d.c \ + cl_createimage3d.c \ + cl_retainmemobject.c \ + cl_getsupportedimageformats.c \ + cl_getmemobjectinfo.c \ + cl_getimageinfo.c \ + cl_createsampler.c \ + cl_retainsampler.c \ + cl_releasesampler.c \ + cl_getsamplerinfo.c \ + cl_releaseprogram.c \ + cl_createprogramwithsource.c \ + cl_createprogramwithbinary.c \ + cl_retainprogram.c \ + cl_buildprogram.c \ + cl_unloadcompiler.c \ + cl_getprograminfo.c \ + cl_getprogrambuildinfo.c \ + cl_releasekernel.c \ + cl_createkernel.c \ + cl_createkernelsinprogram.c \ + cl_retainkernel.c \ + cl_setkernelarg.c \ + cl_getkernelinfo.c \ + cl_getkernelworkgroupinfo.c \ + cl_enqueuereadbuffer.c \ + cl_enqueuewritebuffer.c \ + cl_enqueuecopybuffer.c \ + cl_enqueuereadimage.c \ + cl_enqueuewriteimage.c \ + cl_enqueuecopyimage.c \ + cl_enqueuecopyimagetobuffer.c \ + cl_enqueuecopybuffertoimage.c \ + cl_enqueuemapbuffer.c \ + cl_enqueuemapimage.c \ + cl_enqueueunmapmemobject.c \ + cl_enqueuetask.c \ + cl_enqueuendrangekernel.c \ + cl_enqueuenativekernel.c \ + cl_enqueuemarkerwithwaitlist.c \ + cl_enqueuebarrierwithwaitlist.c \ + cl_geteventprofilinginfo.c \ + cl_getextensionfunctionaddress.c \ + cl_icdgetplatformidskhr.c diff --git a/socl/src/Makefile.in b/socl/src/Makefile.in new file mode 100644 index 0000000..5e982f6 --- /dev/null +++ b/socl/src/Makefile.in @@ -0,0 +1,1543 @@ +# Makefile.in generated by automake 1.16.5 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2021 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +target_triplet = @target@ +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@am__append_1 = --compiler-options -fno-strict-aliasing -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ $(STARPU_NVCC_H_CPPFLAGS) +@STARPU_USE_HIP_TRUE@am__append_2 = -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ +subdir = socl/src +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ + $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ + $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ + $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(noinst_HEADERS) \ + $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/src/common/config.h \ + $(top_builddir)/src/common/config-src-build.h \ + $(top_builddir)/include/starpu_config.h \ + $(top_builddir)/starpurm/include/starpurm_config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__installdirs = "$(DESTDIR)$(libdir)" +LTLIBRARIES = $(lib_LTLIBRARIES) +libsocl_@STARPU_EFFECTIVE_VERSION@_la_LIBADD = +am_libsocl_@STARPU_EFFECTIVE_VERSION@_la_OBJECTS = command.lo \ + command_list.lo command_queue.lo debug.lo event.lo gc.lo \ + init.lo mem_objects.lo socl.lo task.lo util.lo \ + cl_getplatformids.lo cl_getplatforminfo.lo cl_getdeviceids.lo \ + cl_getdeviceinfo.lo cl_releasecontext.lo cl_createcontext.lo \ + cl_createcontextfromtype.lo cl_retaincontext.lo \ + cl_getcontextinfo.lo cl_releasecommandqueue.lo \ + cl_createcommandqueue.lo cl_retaincommandqueue.lo \ + cl_getcommandqueueinfo.lo cl_setcommandqueueproperty.lo \ + cl_releaseevent.lo cl_waitforevents.lo cl_geteventinfo.lo \ + cl_retainevent.lo cl_enqueuemarker.lo \ + cl_enqueuewaitforevents.lo cl_enqueuebarrier.lo cl_flush.lo \ + cl_finish.lo cl_releasememobject.lo cl_createbuffer.lo \ + cl_createimage2d.lo cl_createimage3d.lo cl_retainmemobject.lo \ + cl_getsupportedimageformats.lo cl_getmemobjectinfo.lo \ + cl_getimageinfo.lo cl_createsampler.lo cl_retainsampler.lo \ + cl_releasesampler.lo cl_getsamplerinfo.lo cl_releaseprogram.lo \ + cl_createprogramwithsource.lo cl_createprogramwithbinary.lo \ + cl_retainprogram.lo cl_buildprogram.lo cl_unloadcompiler.lo \ + cl_getprograminfo.lo cl_getprogrambuildinfo.lo \ + cl_releasekernel.lo cl_createkernel.lo \ + cl_createkernelsinprogram.lo cl_retainkernel.lo \ + cl_setkernelarg.lo cl_getkernelinfo.lo \ + cl_getkernelworkgroupinfo.lo cl_enqueuereadbuffer.lo \ + cl_enqueuewritebuffer.lo cl_enqueuecopybuffer.lo \ + cl_enqueuereadimage.lo cl_enqueuewriteimage.lo \ + cl_enqueuecopyimage.lo cl_enqueuecopyimagetobuffer.lo \ + cl_enqueuecopybuffertoimage.lo cl_enqueuemapbuffer.lo \ + cl_enqueuemapimage.lo cl_enqueueunmapmemobject.lo \ + cl_enqueuetask.lo cl_enqueuendrangekernel.lo \ + cl_enqueuenativekernel.lo cl_enqueuemarkerwithwaitlist.lo \ + cl_enqueuebarrierwithwaitlist.lo cl_geteventprofilinginfo.lo \ + cl_getextensionfunctionaddress.lo cl_icdgetplatformidskhr.lo +libsocl_@STARPU_EFFECTIVE_VERSION@_la_OBJECTS = \ + $(am_libsocl_@STARPU_EFFECTIVE_VERSION@_la_OBJECTS) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +libsocl_@STARPU_EFFECTIVE_VERSION@_la_LINK = $(LIBTOOL) $(AM_V_lt) \ + --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link \ + $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(libsocl_@STARPU_EFFECTIVE_VERSION@_la_LDFLAGS) $(LDFLAGS) -o \ + $@ +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)/src/common -I$(top_builddir)/include -I$(top_builddir)/starpurm/include +depcomp = $(SHELL) $(top_srcdir)/build-aux/depcomp +am__maybe_remake_depfiles = depfiles +am__depfiles_remade = ./$(DEPDIR)/cl_buildprogram.Plo \ + ./$(DEPDIR)/cl_createbuffer.Plo \ + ./$(DEPDIR)/cl_createcommandqueue.Plo \ + ./$(DEPDIR)/cl_createcontext.Plo \ + ./$(DEPDIR)/cl_createcontextfromtype.Plo \ + ./$(DEPDIR)/cl_createimage2d.Plo \ + ./$(DEPDIR)/cl_createimage3d.Plo \ + ./$(DEPDIR)/cl_createkernel.Plo \ + ./$(DEPDIR)/cl_createkernelsinprogram.Plo \ + ./$(DEPDIR)/cl_createprogramwithbinary.Plo \ + ./$(DEPDIR)/cl_createprogramwithsource.Plo \ + ./$(DEPDIR)/cl_createsampler.Plo \ + ./$(DEPDIR)/cl_enqueuebarrier.Plo \ + ./$(DEPDIR)/cl_enqueuebarrierwithwaitlist.Plo \ + ./$(DEPDIR)/cl_enqueuecopybuffer.Plo \ + ./$(DEPDIR)/cl_enqueuecopybuffertoimage.Plo \ + ./$(DEPDIR)/cl_enqueuecopyimage.Plo \ + ./$(DEPDIR)/cl_enqueuecopyimagetobuffer.Plo \ + ./$(DEPDIR)/cl_enqueuemapbuffer.Plo \ + ./$(DEPDIR)/cl_enqueuemapimage.Plo \ + ./$(DEPDIR)/cl_enqueuemarker.Plo \ + ./$(DEPDIR)/cl_enqueuemarkerwithwaitlist.Plo \ + ./$(DEPDIR)/cl_enqueuenativekernel.Plo \ + ./$(DEPDIR)/cl_enqueuendrangekernel.Plo \ + ./$(DEPDIR)/cl_enqueuereadbuffer.Plo \ + ./$(DEPDIR)/cl_enqueuereadimage.Plo \ + ./$(DEPDIR)/cl_enqueuetask.Plo \ + ./$(DEPDIR)/cl_enqueueunmapmemobject.Plo \ + ./$(DEPDIR)/cl_enqueuewaitforevents.Plo \ + ./$(DEPDIR)/cl_enqueuewritebuffer.Plo \ + ./$(DEPDIR)/cl_enqueuewriteimage.Plo ./$(DEPDIR)/cl_finish.Plo \ + ./$(DEPDIR)/cl_flush.Plo \ + ./$(DEPDIR)/cl_getcommandqueueinfo.Plo \ + ./$(DEPDIR)/cl_getcontextinfo.Plo \ + ./$(DEPDIR)/cl_getdeviceids.Plo \ + ./$(DEPDIR)/cl_getdeviceinfo.Plo \ + ./$(DEPDIR)/cl_geteventinfo.Plo \ + ./$(DEPDIR)/cl_geteventprofilinginfo.Plo \ + ./$(DEPDIR)/cl_getextensionfunctionaddress.Plo \ + ./$(DEPDIR)/cl_getimageinfo.Plo \ + ./$(DEPDIR)/cl_getkernelinfo.Plo \ + ./$(DEPDIR)/cl_getkernelworkgroupinfo.Plo \ + ./$(DEPDIR)/cl_getmemobjectinfo.Plo \ + ./$(DEPDIR)/cl_getplatformids.Plo \ + ./$(DEPDIR)/cl_getplatforminfo.Plo \ + ./$(DEPDIR)/cl_getprogrambuildinfo.Plo \ + ./$(DEPDIR)/cl_getprograminfo.Plo \ + ./$(DEPDIR)/cl_getsamplerinfo.Plo \ + ./$(DEPDIR)/cl_getsupportedimageformats.Plo \ + ./$(DEPDIR)/cl_icdgetplatformidskhr.Plo \ + ./$(DEPDIR)/cl_releasecommandqueue.Plo \ + ./$(DEPDIR)/cl_releasecontext.Plo \ + ./$(DEPDIR)/cl_releaseevent.Plo \ + ./$(DEPDIR)/cl_releasekernel.Plo \ + ./$(DEPDIR)/cl_releasememobject.Plo \ + ./$(DEPDIR)/cl_releaseprogram.Plo \ + ./$(DEPDIR)/cl_releasesampler.Plo \ + ./$(DEPDIR)/cl_retaincommandqueue.Plo \ + ./$(DEPDIR)/cl_retaincontext.Plo \ + ./$(DEPDIR)/cl_retainevent.Plo ./$(DEPDIR)/cl_retainkernel.Plo \ + ./$(DEPDIR)/cl_retainmemobject.Plo \ + ./$(DEPDIR)/cl_retainprogram.Plo \ + ./$(DEPDIR)/cl_retainsampler.Plo \ + ./$(DEPDIR)/cl_setcommandqueueproperty.Plo \ + ./$(DEPDIR)/cl_setkernelarg.Plo \ + ./$(DEPDIR)/cl_unloadcompiler.Plo \ + ./$(DEPDIR)/cl_waitforevents.Plo ./$(DEPDIR)/command.Plo \ + ./$(DEPDIR)/command_list.Plo ./$(DEPDIR)/command_queue.Plo \ + ./$(DEPDIR)/debug.Plo ./$(DEPDIR)/event.Plo ./$(DEPDIR)/gc.Plo \ + ./$(DEPDIR)/init.Plo ./$(DEPDIR)/mem_objects.Plo \ + ./$(DEPDIR)/socl.Plo ./$(DEPDIR)/task.Plo ./$(DEPDIR)/util.Plo +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(libsocl_@STARPU_EFFECTIVE_VERSION@_la_SOURCES) +DIST_SOURCES = $(libsocl_@STARPU_EFFECTIVE_VERSION@_la_SOURCES) +RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \ + ctags-recursive dvi-recursive html-recursive info-recursive \ + install-data-recursive install-dvi-recursive \ + install-exec-recursive install-html-recursive \ + install-info-recursive install-pdf-recursive \ + install-ps-recursive install-recursive installcheck-recursive \ + installdirs-recursive pdf-recursive ps-recursive \ + tags-recursive uninstall-recursive +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +HEADERS = $(noinst_HEADERS) +RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ + distclean-recursive maintainer-clean-recursive +am__recursive_targets = \ + $(RECURSIVE_TARGETS) \ + $(RECURSIVE_CLEAN_TARGETS) \ + $(am__extra_recursive_targets) +AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \ + distdir distdir-am +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +DIST_SUBDIRS = $(SUBDIRS) +am__DIST_COMMON = $(srcdir)/Makefile.in \ + $(top_srcdir)/build-aux/depcomp \ + $(top_srcdir)/make/starpu-notests.mk \ + $(top_srcdir)/make/starpu.mk +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +am__relativize = \ + dir0=`pwd`; \ + sed_first='s,^\([^/]*\)/.*$$,\1,'; \ + sed_rest='s,^[^/]*/*,,'; \ + sed_last='s,^.*/\([^/]*\)$$,\1,'; \ + sed_butlast='s,/*[^/]*$$,,'; \ + while test -n "$$dir1"; do \ + first=`echo "$$dir1" | sed -e "$$sed_first"`; \ + if test "$$first" != "."; then \ + if test "$$first" = ".."; then \ + dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ + dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ + else \ + first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ + if test "$$first2" = "$$first"; then \ + dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ + else \ + dir2="../$$dir2"; \ + fi; \ + dir0="$$dir0"/"$$first"; \ + fi; \ + fi; \ + dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ + done; \ + reldir="$$dir2" +pkglibdir = @pkglibdir@ +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +APP_CFLAGS = @APP_CFLAGS@ +APP_CXXFLAGS = @APP_CXXFLAGS@ +APP_FCFLAGS = @APP_FCFLAGS@ +APP_FFLAGS = @APP_FFLAGS@ +AR = @AR@ +AS = @AS@ +ATLASDIR = @ATLASDIR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BLAS_LIB = @BLAS_LIB@ +BLAS_LIBS = @BLAS_LIBS@ +BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ +BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CC_OR_MPICC = @CC_OR_MPICC@ +CC_OR_NVCC = @CC_OR_NVCC@ +CFLAGS = @CFLAGS@ +COVERAGE = @COVERAGE@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CSCOPE = @CSCOPE@ +CTAGS = @CTAGS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DGELS_LIBS = @DGELS_LIBS@ +DLB_CFLAGS = @DLB_CFLAGS@ +DLB_LIBS = @DLB_LIBS@ +DLLTOOL = @DLLTOOL@ +DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +ECLIPSE = @ECLIPSE@ +EGREP = @EGREP@ +ETAGS = @ETAGS@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FC = @FC@ +FCFLAGS = @FCFLAGS@ +FFLAGS = @FFLAGS@ +FFTWF_CFLAGS = @FFTWF_CFLAGS@ +FFTWF_LIBS = @FFTWF_LIBS@ +FFTWL_CFLAGS = @FFTWL_CFLAGS@ +FFTWL_LIBS = @FFTWL_LIBS@ +FFTW_CFLAGS = @FFTW_CFLAGS@ +FFTW_LIBS = @FFTW_LIBS@ +FGREP = @FGREP@ +FILECMD = @FILECMD@ +FXTDIR = @FXTDIR@ +FXT_CFLAGS = @FXT_CFLAGS@ +FXT_LDFLAGS = @FXT_LDFLAGS@ +FXT_LIBS = @FXT_LIBS@ +GDB = @GDB@ +GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ +GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ +GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ +GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ +GOTODIR = @GOTODIR@ +GREP = @GREP@ +HAVE_CXX11 = @HAVE_CXX11@ +HAVE_FFTWFL = @HAVE_FFTWFL@ +HELP2MAN = @HELP2MAN@ +HIPCC = @HIPCC@ +HIPCCFLAGS = @HIPCCFLAGS@ $(am__append_2) +HIPCONFIG = @HIPCONFIG@ +HWLOC_CFLAGS = @HWLOC_CFLAGS@ +HWLOC_LIBS = @HWLOC_LIBS@ +HWLOC_REQUIRES = @HWLOC_REQUIRES@ +ICC = @ICC@ +ICC_ARGS = @ICC_ARGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JULIA = @JULIA@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ $(top_builddir)/src/@LIBSTARPU_LINK@ \ + $(STARPU_EXPORTED_LIBS) $(STARPU_OPENCL_LDFLAGS) +LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ +LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ +LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ +LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ +LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ +LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ +LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ +LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ +LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ +LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ +LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ +LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ +LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ +LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ +LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ +LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ +LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ +LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ +LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ +LIBSTARPU_LINK = @LIBSTARPU_LINK@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAGMA_CFLAGS = @MAGMA_CFLAGS@ +MAGMA_LIBS = @MAGMA_LIBS@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPICC_LDFLAGS = @MPICC_LDFLAGS@ +MPICXX = @MPICXX@ +MPIEXEC = @MPIEXEC@ +MPIEXEC_ARGS = @MPIEXEC_ARGS@ +MPIFORT = @MPIFORT@ +MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ +MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ +NM = @NM@ +NMAD_CFLAGS = @NMAD_CFLAGS@ +NMAD_LIBS = @NMAD_LIBS@ +NMEDIT = @NMEDIT@ +NVCC = @NVCC@ +NVCCFLAGS = @NVCCFLAGS@ $(am__append_1) +NVCC_CC = @NVCC_CC@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ +OPENBLAS_LIBS = @OPENBLAS_LIBS@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PAPI_CFLAGS = @PAPI_CFLAGS@ +PAPI_LIBS = @PAPI_LIBS@ +PARALLEL = @PARALLEL@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PKG_CONFIG = @PKG_CONFIG@ +POTI_CFLAGS = @POTI_CFLAGS@ +POTI_LIBS = @POTI_LIBS@ +PROG_CLANG = @PROG_CLANG@ +PROG_DATE = @PROG_DATE@ +PROG_FIND = @PROG_FIND@ +PROG_STAT = @PROG_STAT@ +PYTHON = @PYTHON@ +PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ +PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ +PYTHON_VERSION = @PYTHON_VERSION@ +RANLIB = @RANLIB@ +REALBASH = @REALBASH@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ +SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ +SIMGRID_LIBS = @SIMGRID_LIBS@ +SIMGRID_MC = @SIMGRID_MC@ +SLIC_CONFIG = @SLIC_CONFIG@ +SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ +SOCL_VENDORS = @SOCL_VENDORS@ +STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ +STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ +STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ +STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ +STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ +STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ +STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ +STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ +STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ +STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ +STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ +STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ +STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ +STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ +STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ +STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ +STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ +STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ +STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ +STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ +STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ +STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ +STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ +STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ +STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ +STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ +STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ +STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ +STARPU_LIB_PATH = @STARPU_LIB_PATH@ +STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ +STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ +STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ +STARPU_MS_LIB = @STARPU_MS_LIB@ +STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ +STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ +STARPU_OPENBLAS = @STARPU_OPENBLAS@ +STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ +STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ +STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ +STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ +STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ +STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ +STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ +STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ +STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ +STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ +STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ +STARPU_SRC_DIR = @STARPU_SRC_DIR@ +STARPU_USE_CPU = @STARPU_USE_CPU@ +STARPU_USE_CUDA = @STARPU_USE_CUDA@ +STARPU_USE_FXT = @STARPU_USE_FXT@ +STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ +STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ +STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ +STRIP = @STRIP@ +VERSION = @VERSION@ +XMKMF = @XMKMF@ +X_CFLAGS = @X_CFLAGS@ +X_EXTRA_LIBS = @X_EXTRA_LIBS@ +X_LIBS = @X_LIBS@ +X_PRE_LIBS = @X_PRE_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_ct_F77 = @ac_ct_F77@ +ac_ct_FC = @ac_ct_FC@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +doxygencommand = @doxygencommand@ +dvidir = @dvidir@ +eclipsepath = @eclipsepath@ +epstopdfcommand = @epstopdfcommand@ +exec_prefix = @exec_prefix@ +gitcommand = @gitcommand@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +hwloccalccommand = @hwloccalccommand@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +juliapath = @juliapath@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +mpicc_path = @mpicc_path@ +mpicxx_path = @mpicxx_path@ +mpiexec_path = @mpiexec_path@ +mpifort_path = @mpifort_path@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +pdflatexcommand = @pdflatexcommand@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +runstatedir = @runstatedir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target = @target@ +target_alias = @target_alias@ +target_cpu = @target_cpu@ +target_os = @target_os@ +target_vendor = @target_vendor@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +AM_CFLAGS = $(GLOBAL_AM_CFLAGS) +AM_CXXFLAGS = $(GLOBAL_AM_CXXFLAGS) +AM_FFLAGS = $(GLOBAL_AM_FFLAGS) +AM_FCFLAGS = $(GLOBAL_AM_FCFLAGS) +@STARPU_USE_CUDA_TRUE@V_nvcc_ = $(V_nvcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_USE_CUDA_TRUE@V_nvcc_0 = @echo " NVCC " $@; +@STARPU_USE_CUDA_TRUE@V_nvcc_1 = +@STARPU_USE_CUDA_TRUE@V_nvcc = $(V_nvcc_$(V)) + +# Avoid using nvcc when making a coverity build, nvcc produces millions of +# lines of code which we don't want to analyze. Instead, build dumb .o files +# containing empty functions. +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_ = $(V_mynvcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_0 = @echo " myNVCC " $@; +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_1 = +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc = $(V_mynvcc_$(V)) +@STARPU_USE_HIP_TRUE@V_hipcc_ = $(V_hipcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_USE_HIP_TRUE@V_hipcc_0 = @echo " HIPCC " $@; +@STARPU_USE_HIP_TRUE@V_hipcc_1 = +@STARPU_USE_HIP_TRUE@V_hipcc = $(V_hipcc_$(V)) +V_icc_ = $(V_icc_$(AM_DEFAULT_VERBOSITY)) +V_icc_0 = @echo " ICC " $@; +V_icc_1 = +V_icc = $(V_icc_$(V)) +V_ln_ = $(V_ln_$(AM_DEFAULT_VERBOSITY)) +V_ln_0 = @echo " LN " $@; +V_ln_1 = +V_ln = $(V_ln_$(V)) +V_help2man_ = $(V_help2man_$(AM_DEFAULT_VERBOSITY)) +V_help2man_0 = @echo " HELP2MAN" $@; +V_help2man_1 = +V_help2man = $(V_help2man_$(V)) +CLEANFILES = *.gcno *.gcda +AM_CPPFLAGS = -DBUILDING_SOCL -I$(top_srcdir)/include/ -I$(top_builddir)/include -I$(top_builddir)/src -I$(top_srcdir)/src -I$(top_srcdir)/socl/src $(STARPU_H_CPPFLAGS) +SUBDIRS = +lib_LTLIBRARIES = libsocl-@STARPU_EFFECTIVE_VERSION@.la +noinst_HEADERS = \ + command.h \ + command_list.h \ + command_queue.h \ + debug.h \ + event.h \ + gc.h \ + getinfo.h \ + mem_objects.h \ + ocl_icd.h \ + socl.h \ + task.h \ + util.h \ + init.h \ + CL/cl_d3d10.h \ + CL/cl_ext.h \ + CL/cl.h \ + CL/cl_d3d11.h \ + CL/cl_gl_ext.h \ + CL/cl_platform.h \ + CL/cl_dx9_media_sharing.h \ + CL/cl_gl.h \ + CL/opencl.h + +libsocl_@STARPU_EFFECTIVE_VERSION@_la_LDFLAGS = $(ldflags) -no-undefined \ + -version-info $(LIBSOCL_INTERFACE_CURRENT):$(LIBSOCL_INTERFACE_REVISION):$(LIBSOCL_INTERFACE_AGE) + +libsocl_@STARPU_EFFECTIVE_VERSION@_la_SOURCES = \ + command.c \ + command_list.c \ + command_queue.c \ + debug.c \ + event.c \ + gc.c \ + init.c \ + mem_objects.c \ + socl.c \ + task.c \ + util.c \ + cl_getplatformids.c \ + cl_getplatforminfo.c \ + cl_getdeviceids.c \ + cl_getdeviceinfo.c \ + cl_releasecontext.c \ + cl_createcontext.c \ + cl_createcontextfromtype.c \ + cl_retaincontext.c \ + cl_getcontextinfo.c \ + cl_releasecommandqueue.c \ + cl_createcommandqueue.c \ + cl_retaincommandqueue.c \ + cl_getcommandqueueinfo.c \ + cl_setcommandqueueproperty.c \ + cl_releaseevent.c \ + cl_waitforevents.c \ + cl_geteventinfo.c \ + cl_retainevent.c \ + cl_enqueuemarker.c \ + cl_enqueuewaitforevents.c \ + cl_enqueuebarrier.c \ + cl_flush.c \ + cl_finish.c \ + cl_releasememobject.c \ + cl_createbuffer.c \ + cl_createimage2d.c \ + cl_createimage3d.c \ + cl_retainmemobject.c \ + cl_getsupportedimageformats.c \ + cl_getmemobjectinfo.c \ + cl_getimageinfo.c \ + cl_createsampler.c \ + cl_retainsampler.c \ + cl_releasesampler.c \ + cl_getsamplerinfo.c \ + cl_releaseprogram.c \ + cl_createprogramwithsource.c \ + cl_createprogramwithbinary.c \ + cl_retainprogram.c \ + cl_buildprogram.c \ + cl_unloadcompiler.c \ + cl_getprograminfo.c \ + cl_getprogrambuildinfo.c \ + cl_releasekernel.c \ + cl_createkernel.c \ + cl_createkernelsinprogram.c \ + cl_retainkernel.c \ + cl_setkernelarg.c \ + cl_getkernelinfo.c \ + cl_getkernelworkgroupinfo.c \ + cl_enqueuereadbuffer.c \ + cl_enqueuewritebuffer.c \ + cl_enqueuecopybuffer.c \ + cl_enqueuereadimage.c \ + cl_enqueuewriteimage.c \ + cl_enqueuecopyimage.c \ + cl_enqueuecopyimagetobuffer.c \ + cl_enqueuecopybuffertoimage.c \ + cl_enqueuemapbuffer.c \ + cl_enqueuemapimage.c \ + cl_enqueueunmapmemobject.c \ + cl_enqueuetask.c \ + cl_enqueuendrangekernel.c \ + cl_enqueuenativekernel.c \ + cl_enqueuemarkerwithwaitlist.c \ + cl_enqueuebarrierwithwaitlist.c \ + cl_geteventprofilinginfo.c \ + cl_getextensionfunctionaddress.c \ + cl_icdgetplatformidskhr.c + +all: all-recursive + +.SUFFIXES: +.SUFFIXES: .c .cu .cubin .hip .lo .o .obj +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/make/starpu-notests.mk $(top_srcdir)/make/starpu.mk $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign socl/src/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign socl/src/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; +$(top_srcdir)/make/starpu-notests.mk $(top_srcdir)/make/starpu.mk $(am__empty): + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +install-libLTLIBRARIES: $(lib_LTLIBRARIES) + @$(NORMAL_INSTALL) + @list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \ + list2=; for p in $$list; do \ + if test -f $$p; then \ + list2="$$list2 $$p"; \ + else :; fi; \ + done; \ + test -z "$$list2" || { \ + echo " $(MKDIR_P) '$(DESTDIR)$(libdir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(libdir)" || exit 1; \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(libdir)'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(libdir)"; \ + } + +uninstall-libLTLIBRARIES: + @$(NORMAL_UNINSTALL) + @list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \ + for p in $$list; do \ + $(am__strip_dir) \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(libdir)/$$f'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(libdir)/$$f"; \ + done + +clean-libLTLIBRARIES: + -test -z "$(lib_LTLIBRARIES)" || rm -f $(lib_LTLIBRARIES) + @list='$(lib_LTLIBRARIES)'; \ + locs=`for p in $$list; do echo $$p; done | \ + sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ + sort -u`; \ + test -z "$$locs" || { \ + echo rm -f $${locs}; \ + rm -f $${locs}; \ + } + +libsocl-@STARPU_EFFECTIVE_VERSION@.la: $(libsocl_@STARPU_EFFECTIVE_VERSION@_la_OBJECTS) $(libsocl_@STARPU_EFFECTIVE_VERSION@_la_DEPENDENCIES) $(EXTRA_libsocl_@STARPU_EFFECTIVE_VERSION@_la_DEPENDENCIES) + $(AM_V_CCLD)$(libsocl_@STARPU_EFFECTIVE_VERSION@_la_LINK) -rpath $(libdir) $(libsocl_@STARPU_EFFECTIVE_VERSION@_la_OBJECTS) $(libsocl_@STARPU_EFFECTIVE_VERSION@_la_LIBADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_buildprogram.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_createbuffer.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_createcommandqueue.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_createcontext.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_createcontextfromtype.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_createimage2d.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_createimage3d.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_createkernel.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_createkernelsinprogram.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_createprogramwithbinary.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_createprogramwithsource.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_createsampler.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_enqueuebarrier.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_enqueuebarrierwithwaitlist.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_enqueuecopybuffer.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_enqueuecopybuffertoimage.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_enqueuecopyimage.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_enqueuecopyimagetobuffer.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_enqueuemapbuffer.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_enqueuemapimage.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_enqueuemarker.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_enqueuemarkerwithwaitlist.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_enqueuenativekernel.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_enqueuendrangekernel.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_enqueuereadbuffer.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_enqueuereadimage.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_enqueuetask.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_enqueueunmapmemobject.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_enqueuewaitforevents.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_enqueuewritebuffer.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_enqueuewriteimage.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_finish.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_flush.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_getcommandqueueinfo.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_getcontextinfo.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_getdeviceids.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_getdeviceinfo.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_geteventinfo.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_geteventprofilinginfo.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_getextensionfunctionaddress.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_getimageinfo.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_getkernelinfo.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_getkernelworkgroupinfo.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_getmemobjectinfo.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_getplatformids.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_getplatforminfo.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_getprogrambuildinfo.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_getprograminfo.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_getsamplerinfo.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_getsupportedimageformats.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_icdgetplatformidskhr.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_releasecommandqueue.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_releasecontext.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_releaseevent.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_releasekernel.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_releasememobject.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_releaseprogram.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_releasesampler.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_retaincommandqueue.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_retaincontext.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_retainevent.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_retainkernel.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_retainmemobject.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_retainprogram.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_retainsampler.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_setcommandqueueproperty.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_setkernelarg.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_unloadcompiler.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_waitforevents.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/command.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/command_list.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/command_queue.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/debug.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/event.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gc.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/init.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mem_objects.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/socl.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/task.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/util.Plo@am__quote@ # am--include-marker + +$(am__depfiles_remade): + @$(MKDIR_P) $(@D) + @echo '# dummy' >$@-t && $(am__mv) $@-t $@ + +am--depfiles: $(am__depfiles_remade) + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ +@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +# This directory's subdirectories are mostly independent; you can cd +# into them and run 'make' without going through this Makefile. +# To change the values of 'make' variables: instead of editing Makefiles, +# (1) if the variable is set in 'config.status', edit 'config.status' +# (which will cause the Makefiles to be regenerated when you run 'make'); +# (2) otherwise, pass the desired values on the 'make' command line. +$(am__recursive_targets): + @fail=; \ + if $(am__make_keepgoing); then \ + failcom='fail=yes'; \ + else \ + failcom='exit 1'; \ + fi; \ + dot_seen=no; \ + target=`echo $@ | sed s/-recursive//`; \ + case "$@" in \ + distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ + *) list='$(SUBDIRS)' ;; \ + esac; \ + for subdir in $$list; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + dot_seen=yes; \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || eval $$failcom; \ + done; \ + if test "$$dot_seen" = "no"; then \ + $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ + fi; test -z "$$fail" + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-recursive +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ + include_option=--etags-include; \ + empty_fix=.; \ + else \ + include_option=--include; \ + empty_fix=; \ + fi; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + test ! -f $$subdir/TAGS || \ + set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ + fi; \ + done; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-recursive + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-recursive + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done + @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + $(am__make_dryrun) \ + || test -d "$(distdir)/$$subdir" \ + || $(MKDIR_P) "$(distdir)/$$subdir" \ + || exit 1; \ + dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ + $(am__relativize); \ + new_distdir=$$reldir; \ + dir1=$$subdir; dir2="$(top_distdir)"; \ + $(am__relativize); \ + new_top_distdir=$$reldir; \ + echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ + echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ + ($(am__cd) $$subdir && \ + $(MAKE) $(AM_MAKEFLAGS) \ + top_distdir="$$new_top_distdir" \ + distdir="$$new_distdir" \ + am__remove_distdir=: \ + am__skip_length_check=: \ + am__skip_mode_fix=: \ + distdir) \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-recursive +all-am: Makefile $(LTLIBRARIES) $(HEADERS) +installdirs: installdirs-recursive +installdirs-am: + for dir in "$(DESTDIR)$(libdir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-recursive +install-exec: install-exec-recursive +install-data: install-data-recursive +uninstall: uninstall-recursive + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-recursive +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-recursive + +clean-am: clean-generic clean-libLTLIBRARIES clean-libtool \ + mostlyclean-am + +distclean: distclean-recursive + -rm -f ./$(DEPDIR)/cl_buildprogram.Plo + -rm -f ./$(DEPDIR)/cl_createbuffer.Plo + -rm -f ./$(DEPDIR)/cl_createcommandqueue.Plo + -rm -f ./$(DEPDIR)/cl_createcontext.Plo + -rm -f ./$(DEPDIR)/cl_createcontextfromtype.Plo + -rm -f ./$(DEPDIR)/cl_createimage2d.Plo + -rm -f ./$(DEPDIR)/cl_createimage3d.Plo + -rm -f ./$(DEPDIR)/cl_createkernel.Plo + -rm -f ./$(DEPDIR)/cl_createkernelsinprogram.Plo + -rm -f ./$(DEPDIR)/cl_createprogramwithbinary.Plo + -rm -f ./$(DEPDIR)/cl_createprogramwithsource.Plo + -rm -f ./$(DEPDIR)/cl_createsampler.Plo + -rm -f ./$(DEPDIR)/cl_enqueuebarrier.Plo + -rm -f ./$(DEPDIR)/cl_enqueuebarrierwithwaitlist.Plo + -rm -f ./$(DEPDIR)/cl_enqueuecopybuffer.Plo + -rm -f ./$(DEPDIR)/cl_enqueuecopybuffertoimage.Plo + -rm -f ./$(DEPDIR)/cl_enqueuecopyimage.Plo + -rm -f ./$(DEPDIR)/cl_enqueuecopyimagetobuffer.Plo + -rm -f ./$(DEPDIR)/cl_enqueuemapbuffer.Plo + -rm -f ./$(DEPDIR)/cl_enqueuemapimage.Plo + -rm -f ./$(DEPDIR)/cl_enqueuemarker.Plo + -rm -f ./$(DEPDIR)/cl_enqueuemarkerwithwaitlist.Plo + -rm -f ./$(DEPDIR)/cl_enqueuenativekernel.Plo + -rm -f ./$(DEPDIR)/cl_enqueuendrangekernel.Plo + -rm -f ./$(DEPDIR)/cl_enqueuereadbuffer.Plo + -rm -f ./$(DEPDIR)/cl_enqueuereadimage.Plo + -rm -f ./$(DEPDIR)/cl_enqueuetask.Plo + -rm -f ./$(DEPDIR)/cl_enqueueunmapmemobject.Plo + -rm -f ./$(DEPDIR)/cl_enqueuewaitforevents.Plo + -rm -f ./$(DEPDIR)/cl_enqueuewritebuffer.Plo + -rm -f ./$(DEPDIR)/cl_enqueuewriteimage.Plo + -rm -f ./$(DEPDIR)/cl_finish.Plo + -rm -f ./$(DEPDIR)/cl_flush.Plo + -rm -f ./$(DEPDIR)/cl_getcommandqueueinfo.Plo + -rm -f ./$(DEPDIR)/cl_getcontextinfo.Plo + -rm -f ./$(DEPDIR)/cl_getdeviceids.Plo + -rm -f ./$(DEPDIR)/cl_getdeviceinfo.Plo + -rm -f ./$(DEPDIR)/cl_geteventinfo.Plo + -rm -f ./$(DEPDIR)/cl_geteventprofilinginfo.Plo + -rm -f ./$(DEPDIR)/cl_getextensionfunctionaddress.Plo + -rm -f ./$(DEPDIR)/cl_getimageinfo.Plo + -rm -f ./$(DEPDIR)/cl_getkernelinfo.Plo + -rm -f ./$(DEPDIR)/cl_getkernelworkgroupinfo.Plo + -rm -f ./$(DEPDIR)/cl_getmemobjectinfo.Plo + -rm -f ./$(DEPDIR)/cl_getplatformids.Plo + -rm -f ./$(DEPDIR)/cl_getplatforminfo.Plo + -rm -f ./$(DEPDIR)/cl_getprogrambuildinfo.Plo + -rm -f ./$(DEPDIR)/cl_getprograminfo.Plo + -rm -f ./$(DEPDIR)/cl_getsamplerinfo.Plo + -rm -f ./$(DEPDIR)/cl_getsupportedimageformats.Plo + -rm -f ./$(DEPDIR)/cl_icdgetplatformidskhr.Plo + -rm -f ./$(DEPDIR)/cl_releasecommandqueue.Plo + -rm -f ./$(DEPDIR)/cl_releasecontext.Plo + -rm -f ./$(DEPDIR)/cl_releaseevent.Plo + -rm -f ./$(DEPDIR)/cl_releasekernel.Plo + -rm -f ./$(DEPDIR)/cl_releasememobject.Plo + -rm -f ./$(DEPDIR)/cl_releaseprogram.Plo + -rm -f ./$(DEPDIR)/cl_releasesampler.Plo + -rm -f ./$(DEPDIR)/cl_retaincommandqueue.Plo + -rm -f ./$(DEPDIR)/cl_retaincontext.Plo + -rm -f ./$(DEPDIR)/cl_retainevent.Plo + -rm -f ./$(DEPDIR)/cl_retainkernel.Plo + -rm -f ./$(DEPDIR)/cl_retainmemobject.Plo + -rm -f ./$(DEPDIR)/cl_retainprogram.Plo + -rm -f ./$(DEPDIR)/cl_retainsampler.Plo + -rm -f ./$(DEPDIR)/cl_setcommandqueueproperty.Plo + -rm -f ./$(DEPDIR)/cl_setkernelarg.Plo + -rm -f ./$(DEPDIR)/cl_unloadcompiler.Plo + -rm -f ./$(DEPDIR)/cl_waitforevents.Plo + -rm -f ./$(DEPDIR)/command.Plo + -rm -f ./$(DEPDIR)/command_list.Plo + -rm -f ./$(DEPDIR)/command_queue.Plo + -rm -f ./$(DEPDIR)/debug.Plo + -rm -f ./$(DEPDIR)/event.Plo + -rm -f ./$(DEPDIR)/gc.Plo + -rm -f ./$(DEPDIR)/init.Plo + -rm -f ./$(DEPDIR)/mem_objects.Plo + -rm -f ./$(DEPDIR)/socl.Plo + -rm -f ./$(DEPDIR)/task.Plo + -rm -f ./$(DEPDIR)/util.Plo + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-recursive + +dvi-am: + +html: html-recursive + +html-am: + +info: info-recursive + +info-am: + +install-data-am: + +install-dvi: install-dvi-recursive + +install-dvi-am: + +install-exec-am: install-libLTLIBRARIES + +install-html: install-html-recursive + +install-html-am: + +install-info: install-info-recursive + +install-info-am: + +install-man: + +install-pdf: install-pdf-recursive + +install-pdf-am: + +install-ps: install-ps-recursive + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-recursive + -rm -f ./$(DEPDIR)/cl_buildprogram.Plo + -rm -f ./$(DEPDIR)/cl_createbuffer.Plo + -rm -f ./$(DEPDIR)/cl_createcommandqueue.Plo + -rm -f ./$(DEPDIR)/cl_createcontext.Plo + -rm -f ./$(DEPDIR)/cl_createcontextfromtype.Plo + -rm -f ./$(DEPDIR)/cl_createimage2d.Plo + -rm -f ./$(DEPDIR)/cl_createimage3d.Plo + -rm -f ./$(DEPDIR)/cl_createkernel.Plo + -rm -f ./$(DEPDIR)/cl_createkernelsinprogram.Plo + -rm -f ./$(DEPDIR)/cl_createprogramwithbinary.Plo + -rm -f ./$(DEPDIR)/cl_createprogramwithsource.Plo + -rm -f ./$(DEPDIR)/cl_createsampler.Plo + -rm -f ./$(DEPDIR)/cl_enqueuebarrier.Plo + -rm -f ./$(DEPDIR)/cl_enqueuebarrierwithwaitlist.Plo + -rm -f ./$(DEPDIR)/cl_enqueuecopybuffer.Plo + -rm -f ./$(DEPDIR)/cl_enqueuecopybuffertoimage.Plo + -rm -f ./$(DEPDIR)/cl_enqueuecopyimage.Plo + -rm -f ./$(DEPDIR)/cl_enqueuecopyimagetobuffer.Plo + -rm -f ./$(DEPDIR)/cl_enqueuemapbuffer.Plo + -rm -f ./$(DEPDIR)/cl_enqueuemapimage.Plo + -rm -f ./$(DEPDIR)/cl_enqueuemarker.Plo + -rm -f ./$(DEPDIR)/cl_enqueuemarkerwithwaitlist.Plo + -rm -f ./$(DEPDIR)/cl_enqueuenativekernel.Plo + -rm -f ./$(DEPDIR)/cl_enqueuendrangekernel.Plo + -rm -f ./$(DEPDIR)/cl_enqueuereadbuffer.Plo + -rm -f ./$(DEPDIR)/cl_enqueuereadimage.Plo + -rm -f ./$(DEPDIR)/cl_enqueuetask.Plo + -rm -f ./$(DEPDIR)/cl_enqueueunmapmemobject.Plo + -rm -f ./$(DEPDIR)/cl_enqueuewaitforevents.Plo + -rm -f ./$(DEPDIR)/cl_enqueuewritebuffer.Plo + -rm -f ./$(DEPDIR)/cl_enqueuewriteimage.Plo + -rm -f ./$(DEPDIR)/cl_finish.Plo + -rm -f ./$(DEPDIR)/cl_flush.Plo + -rm -f ./$(DEPDIR)/cl_getcommandqueueinfo.Plo + -rm -f ./$(DEPDIR)/cl_getcontextinfo.Plo + -rm -f ./$(DEPDIR)/cl_getdeviceids.Plo + -rm -f ./$(DEPDIR)/cl_getdeviceinfo.Plo + -rm -f ./$(DEPDIR)/cl_geteventinfo.Plo + -rm -f ./$(DEPDIR)/cl_geteventprofilinginfo.Plo + -rm -f ./$(DEPDIR)/cl_getextensionfunctionaddress.Plo + -rm -f ./$(DEPDIR)/cl_getimageinfo.Plo + -rm -f ./$(DEPDIR)/cl_getkernelinfo.Plo + -rm -f ./$(DEPDIR)/cl_getkernelworkgroupinfo.Plo + -rm -f ./$(DEPDIR)/cl_getmemobjectinfo.Plo + -rm -f ./$(DEPDIR)/cl_getplatformids.Plo + -rm -f ./$(DEPDIR)/cl_getplatforminfo.Plo + -rm -f ./$(DEPDIR)/cl_getprogrambuildinfo.Plo + -rm -f ./$(DEPDIR)/cl_getprograminfo.Plo + -rm -f ./$(DEPDIR)/cl_getsamplerinfo.Plo + -rm -f ./$(DEPDIR)/cl_getsupportedimageformats.Plo + -rm -f ./$(DEPDIR)/cl_icdgetplatformidskhr.Plo + -rm -f ./$(DEPDIR)/cl_releasecommandqueue.Plo + -rm -f ./$(DEPDIR)/cl_releasecontext.Plo + -rm -f ./$(DEPDIR)/cl_releaseevent.Plo + -rm -f ./$(DEPDIR)/cl_releasekernel.Plo + -rm -f ./$(DEPDIR)/cl_releasememobject.Plo + -rm -f ./$(DEPDIR)/cl_releaseprogram.Plo + -rm -f ./$(DEPDIR)/cl_releasesampler.Plo + -rm -f ./$(DEPDIR)/cl_retaincommandqueue.Plo + -rm -f ./$(DEPDIR)/cl_retaincontext.Plo + -rm -f ./$(DEPDIR)/cl_retainevent.Plo + -rm -f ./$(DEPDIR)/cl_retainkernel.Plo + -rm -f ./$(DEPDIR)/cl_retainmemobject.Plo + -rm -f ./$(DEPDIR)/cl_retainprogram.Plo + -rm -f ./$(DEPDIR)/cl_retainsampler.Plo + -rm -f ./$(DEPDIR)/cl_setcommandqueueproperty.Plo + -rm -f ./$(DEPDIR)/cl_setkernelarg.Plo + -rm -f ./$(DEPDIR)/cl_unloadcompiler.Plo + -rm -f ./$(DEPDIR)/cl_waitforevents.Plo + -rm -f ./$(DEPDIR)/command.Plo + -rm -f ./$(DEPDIR)/command_list.Plo + -rm -f ./$(DEPDIR)/command_queue.Plo + -rm -f ./$(DEPDIR)/debug.Plo + -rm -f ./$(DEPDIR)/event.Plo + -rm -f ./$(DEPDIR)/gc.Plo + -rm -f ./$(DEPDIR)/init.Plo + -rm -f ./$(DEPDIR)/mem_objects.Plo + -rm -f ./$(DEPDIR)/socl.Plo + -rm -f ./$(DEPDIR)/task.Plo + -rm -f ./$(DEPDIR)/util.Plo + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-recursive + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-recursive + +pdf-am: + +ps: ps-recursive + +ps-am: + +uninstall-am: uninstall-libLTLIBRARIES + +.MAKE: $(am__recursive_targets) install-am install-strip + +.PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am \ + am--depfiles check check-am clean clean-generic \ + clean-libLTLIBRARIES clean-libtool cscopelist-am ctags \ + ctags-am distclean distclean-compile distclean-generic \ + distclean-libtool distclean-tags distdir dvi dvi-am html \ + html-am info info-am install install-am install-data \ + install-data-am install-dvi install-dvi-am install-exec \ + install-exec-am install-html install-html-am install-info \ + install-info-am install-libLTLIBRARIES install-man install-pdf \ + install-pdf-am install-ps install-ps-am install-strip \ + installcheck installcheck-am installdirs installdirs-am \ + maintainer-clean maintainer-clean-generic mostlyclean \ + mostlyclean-compile mostlyclean-generic mostlyclean-libtool \ + pdf pdf-am ps ps-am tags tags-am uninstall uninstall-am \ + uninstall-libLTLIBRARIES + +.PRECIOUS: Makefile + +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@.cu.o: +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ @$(MKDIR_P) `dirname $@` +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ $(V_mynvcc)grep 'extern *"C" *void *' $< | sed -ne 's/extern *"C" *void *\([a-zA-Z0-9_]*\) *(.*/void \1(void) {}/p' | $(CC) -x c - -o $@ -c + +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.cubin: +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) -cubin $< -o $@ $(NVCCFLAGS) + +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.o: +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) $< -c -o $@ $(NVCCFLAGS) +@STARPU_USE_HIP_TRUE@.hip.o: +@STARPU_USE_HIP_TRUE@ $(V_hipcc) $(HIPCC) $< -c -o $@ $(HIPCCFLAGS) + +recheck: + -cat /dev/null + +showcheckfailed: + @-cat /dev/null + +showfailed: + @-cat /dev/null + +showcheck: + -cat /dev/null + +showsuite: + -cat /dev/null + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/socl/src/cl_buildprogram.c b/socl/src/cl_buildprogram.c new file mode 100644 index 0000000..8de8f31 --- /dev/null +++ b/socl/src/cl_buildprogram.c @@ -0,0 +1,123 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "socl.h" + +struct bp_data +{ + cl_program program; + char * options; + const cl_device_id * device_list; + cl_uint num_devices; +}; + +static void soclBuildProgram_task(void *data) +{ + struct bp_data *d = (struct bp_data*)data; + cl_device_id device; + cl_int err; + unsigned int i; + + int wid = starpu_worker_get_id_check(); + + /* Check if the kernel has to be built for this device */ + for (i=0; i <= d->num_devices; i++) + { + if (i == d->num_devices) + return; + + if (d->device_list[i]->worker_id == wid) + break; + } + + int range = starpu_worker_get_range(); + starpu_opencl_get_device(wid, &device); + + DEBUG_MSG("[Worker %d] Building program...\n", wid); + + cl_device_type dev_type; + clGetDeviceInfo(device, CL_DEVICE_TYPE, sizeof(cl_device_type), &dev_type, NULL); + char * dev_type_str = (dev_type == CL_DEVICE_TYPE_CPU ? "CPU" : + dev_type == CL_DEVICE_TYPE_GPU ? "GPU" : + dev_type == CL_DEVICE_TYPE_ACCELERATOR ? "ACCELERATOR" : "UNKNOWN"); + + char opts[4096]; + snprintf(opts, sizeof(opts), "-DSOCL_DEVICE_TYPE_%s %s", + dev_type_str, (d->options != NULL ? d->options : "")); + + err = clBuildProgram(d->program->cl_programs[range], 1, &device, opts, NULL, NULL); + if (err != CL_SUCCESS) + { + size_t len; + clGetProgramBuildInfo(d->program->cl_programs[range], device, CL_PROGRAM_BUILD_LOG, 0, NULL, &len); + char * buffer = malloc(len+1); + buffer[len] = '\0'; + clGetProgramBuildInfo(d->program->cl_programs[range], device, CL_PROGRAM_BUILD_LOG, len, buffer, NULL); + DEBUG_CL("clBuildProgram", err); + ERROR_MSG("clBuildProgram: %s\n Aborting.\n", buffer); + free(buffer); + } + + DEBUG_MSG("[Worker %d] Done building.\n", wid); +} + +CL_API_SUFFIX__VERSION_1_0 +CL_API_ENTRY cl_int CL_API_CALL +soclBuildProgram(cl_program program, + cl_uint num_devices, + const cl_device_id * device_list, + const char * options, + void (CL_CALLBACK *pfn_notify)(cl_program program, void * user_data), + void * user_data) +{ + struct bp_data *data; + + program->options = options != NULL ? strdup(options) : NULL; + program->options_size = options != NULL ? strlen(options)+1 : 0; + + data = (struct bp_data*)malloc(sizeof(struct bp_data)); + gc_entity_store(&data->program, program); + data->options = (char*)options; + + /* If the device list is empty, we compile for every device in the context associated to the program */ + if (device_list == NULL) + { + num_devices = program->context->num_devices; + device_list = program->context->devices; + } + + data->num_devices = num_devices; + data->device_list = device_list; + + /*FIXME: starpu_execute_on_specific_workers is synchronous. + * However pfn_notify is useful only because build is supposed to be asynchronous + */ + unsigned workers[num_devices]; + unsigned i; + for (i=0; iworker_id; + } + starpu_execute_on_specific_workers(soclBuildProgram_task, data, num_devices, workers, "SOCL_BUILD_PROGRAM"); + + if (pfn_notify != NULL) + pfn_notify(program, user_data); + + gc_entity_unstore(&data->program); + free(data); + + return CL_SUCCESS; +} diff --git a/socl/src/cl_createbuffer.c b/socl/src/cl_createbuffer.c new file mode 100644 index 0000000..09b32fb --- /dev/null +++ b/socl/src/cl_createbuffer.c @@ -0,0 +1,150 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "socl.h" + +static void release_callback_memobject(void * e) +{ + cl_mem mem = (cl_mem)e; + + /* Release references */ + gc_entity_unstore(&mem->context); + + //Delete this mem_object from the mem_object list + mem_object_release(mem); + + /* Destruct object */ + starpu_data_unregister_submit(mem->handle); + + if (!(mem->flags & CL_MEM_USE_HOST_PTR)) + free(mem->ptr); +} + +/** + * \brief Create a buffer + * + * A buffer has always an allocated region in host memory. If CL_MEM_USE_HOST_PTR + * is set, we use memory pointed by host_ptr, otherwise some host memory is + * allocated. + * + * If CL_MEM_USE_HOST_PTR or CL_MEM_ALLOC_HOST_PTR are set, memory pointed by host_ptr + * is not coherent. To enforce coherency, you have to map the buffer (clEnqueueMapBuffer). + * + * If CL_MEM_COPY_HOST_PTR is set, the buffer will be duplicated in host memory. You + * should avoid it. + * + */ +CL_API_SUFFIX__VERSION_1_0 +CL_API_ENTRY cl_mem CL_API_CALL +soclCreateBuffer(cl_context context, + cl_mem_flags flags, + size_t size, + void * host_ptr, + cl_int * errcode_ret) +{ + cl_mem mem; + + if (errcode_ret != NULL) + *errcode_ret = CL_SUCCESS; + + //Check flags + if (((flags & CL_MEM_READ_ONLY) && (flags & CL_MEM_WRITE_ONLY)) + || ((flags & CL_MEM_READ_WRITE) && (flags & CL_MEM_READ_ONLY)) + || ((flags & CL_MEM_READ_WRITE) && (flags & CL_MEM_WRITE_ONLY)) + || ((flags & CL_MEM_USE_HOST_PTR) && (flags & CL_MEM_ALLOC_HOST_PTR)) + || ((flags & CL_MEM_USE_HOST_PTR) && (flags & CL_MEM_COPY_HOST_PTR))) + { + if (errcode_ret != NULL) + *errcode_ret = CL_INVALID_VALUE; + return NULL; + } + + if (size == 0) + { + if (errcode_ret != NULL) + *errcode_ret = CL_INVALID_BUFFER_SIZE; + return NULL; + } + + if ((host_ptr == NULL && (flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR))) + || (host_ptr != NULL && !(flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR)))) + { + if (errcode_ret != NULL) + *errcode_ret = CL_INVALID_HOST_PTR; + return NULL; + } + + //Alloc cl_mem structure + mem = (cl_mem)gc_entity_alloc(sizeof(struct _cl_mem), release_callback_memobject, "buffer"); + if (mem == NULL) + { + if (errcode_ret != NULL) + *errcode_ret = CL_OUT_OF_HOST_MEMORY; + return NULL; + } + + mem->ptr = NULL; + mem->map_count = 0; + gc_entity_store(&mem->context, context); + mem->flags = flags; + mem->size = size; + mem->host_ptr = host_ptr; + +#ifdef DEBUG + static int id = 0; + mem->id = id++; +#endif + + mem_object_store(mem); + + //TODO: we shouldn't allocate the buffer ourselves. StarPU allocates it if a NULL pointer is given + + // If not MEM_USE_HOST_PTR, we need to alloc the buffer ourselves + if (!(flags & CL_MEM_USE_HOST_PTR)) + { + mem->ptr = malloc(size); + if (mem->ptr == NULL) + { + if (errcode_ret != NULL) + *errcode_ret = CL_MEM_OBJECT_ALLOCATION_FAILURE; + free(mem); + return NULL; + } + //The buffer doesn't contain meaningful data + mem->scratch = 1; + } + else + { + //The buffer may contain meaningful data + mem->scratch = 0; + mem->ptr = host_ptr; + } + + // Access mode + mem->mode = (flags & CL_MEM_READ_ONLY) ? CL_MEM_READ_ONLY : + (flags & CL_MEM_WRITE_ONLY) ? CL_MEM_WRITE_ONLY : CL_MEM_READ_WRITE; + + // Perform data copy if necessary + if (flags & CL_MEM_COPY_HOST_PTR) + memcpy(mem->ptr, host_ptr, size); + + // Create StarPU buffer (on home node? what's this?) + starpu_variable_data_register(&mem->handle, STARPU_MAIN_RAM, (uintptr_t)mem->ptr, size); + + DEBUG_MSG("[Buffer %d] Initialized (cl_mem %p handle %p)\n", mem->id, mem, mem->handle); + + return mem; +} diff --git a/socl/src/cl_createcommandqueue.c b/socl/src/cl_createcommandqueue.c new file mode 100644 index 0000000..99da3a0 --- /dev/null +++ b/socl/src/cl_createcommandqueue.c @@ -0,0 +1,85 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "socl.h" + +static void release_callback_command_queue(void * e) +{ + cl_command_queue cq = (cl_command_queue)e; + + //Disable StarPU profiling if necessary + if (cq->properties & CL_QUEUE_PROFILING_ENABLE) + { + profiling_queue_count -= 1; + if (profiling_queue_count == 0) + starpu_profiling_status_set(STARPU_PROFILING_DISABLE); + } + + /* Release references */ + gc_entity_unstore(&cq->context); + + /* Destruct object */ + STARPU_PTHREAD_MUTEX_DESTROY(&cq->mutex); +} + +CL_API_SUFFIX__VERSION_1_0 +CL_API_ENTRY cl_command_queue CL_API_CALL +soclCreateCommandQueue(cl_context context, + cl_device_id device, + cl_command_queue_properties properties, + cl_int * errcode_ret) +{ + cl_command_queue cq; + + cq = (cl_command_queue)gc_entity_alloc(sizeof(struct _cl_command_queue), + release_callback_command_queue, "command_queue"); + if (cq == NULL) + { + if (errcode_ret != NULL) + *errcode_ret = CL_OUT_OF_HOST_MEMORY; + return NULL; + } + + cq->properties = properties; + gc_entity_store(&cq->context, context); + + char * fd = getenv("SOCL_FORCE_DYNAMIC"); + int force_dynamic = fd == NULL ? 0 : atoi(fd); + + cq->device = force_dynamic ? NULL : device; + +#ifdef DEBUG + static int id = 0; + cq->id = id++; +#endif + + //Enable StarPU profiling if necessary + if (properties & CL_QUEUE_PROFILING_ENABLE) + { + if (profiling_queue_count == 0) + starpu_profiling_status_set(STARPU_PROFILING_ENABLE); + profiling_queue_count += 1; + } + + cq->commands = NULL; + cq->barrier = NULL; + STARPU_PTHREAD_MUTEX_INIT(&cq->mutex, NULL); + + if (errcode_ret != NULL) + *errcode_ret = CL_SUCCESS; + + return cq; +} diff --git a/socl/src/cl_createcontext.c b/socl/src/cl_createcontext.c new file mode 100644 index 0000000..e8dfb18 --- /dev/null +++ b/socl/src/cl_createcontext.c @@ -0,0 +1,159 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "socl.h" + +static void release_callback_context(void * e) +{ + cl_context context = (cl_context)e; + + /* Destruct object */ + if (context->properties != NULL) + free(context->properties); + + //FIXME: should we free StarPU contexts? + //starpu_sched_ctx_finished_submit(context->sched_ctx); + + free(context->devices); +} + +static char * defaultScheduler = "dmda"; +static char * defaultName = "default"; + +CL_API_SUFFIX__VERSION_1_0 +CL_API_ENTRY cl_context CL_API_CALL +soclCreateContext(const cl_context_properties * properties, + cl_uint num_devices, + const cl_device_id * devices, + void (CL_CALLBACK *pfn_notify)(const char *, const void *, size_t, void *), + void * user_data, + cl_int * errcode_ret) +{ + if (pfn_notify == NULL && user_data != NULL) + { + if (errcode_ret != NULL) + *errcode_ret = CL_INVALID_VALUE; + return NULL; + } + + //Check properties + if (properties != NULL) + { + const cl_context_properties *p = properties; + int i = 0; + while (p[i] != 0) + { + switch (p[i]) + { + case CL_CONTEXT_PLATFORM: + i++; + if (p[i] != ((cl_context_properties)&socl_platform)) + { + if (errcode_ret != NULL) + *errcode_ret = CL_INVALID_PLATFORM; + return NULL; + } + break; + + case CL_CONTEXT_SCHEDULER_SOCL: + case CL_CONTEXT_NAME_SOCL: + i++; + if (p[i] == 0) + { + if (errcode_ret != NULL) + *errcode_ret = CL_INVALID_PROPERTY; + return NULL; + } + break; + } + i++; + } + } + + cl_context ctx; + ctx = (cl_context)gc_entity_alloc(sizeof(struct _cl_context), release_callback_context, "context"); + if (ctx == NULL) + { + if (errcode_ret != NULL) + *errcode_ret = CL_OUT_OF_HOST_MEMORY; + return NULL; + } + + ctx->num_properties = 0; + ctx->properties = NULL; + + char * sched = getenv("STARPU_SCHED"); + char * scheduler = sched == NULL ? defaultScheduler : sched; + char * name = defaultName; + + // Properties + if (properties != NULL) + { + //Count properties + const cl_context_properties * p = properties; + do + { + ctx->num_properties++; + p++; + } while (*p != 0); + + //Copy properties + ctx->properties = malloc(sizeof(cl_context_properties) * ctx->num_properties); + memcpy(ctx->properties, properties, sizeof(cl_context_properties) * ctx->num_properties); + + //Selected scheduler + cl_uint i = 0; + for (i=0; inum_properties; i++) + { + if (p[i] == CL_CONTEXT_SCHEDULER_SOCL) + { + i++; + scheduler = (char*)p[i]; + } + if (p[i] == CL_CONTEXT_NAME_SOCL) + { + i++; + name = (char*)p[i]; + } + } + } + + ctx->pfn_notify = pfn_notify; + ctx->user_data = user_data; + ctx->num_devices = num_devices; + +#ifdef DEBUG + static int id = 0; + ctx->id = id++; +#endif + + ctx->devices = malloc(sizeof(cl_device_id) * num_devices); + memcpy(ctx->devices, devices, sizeof(cl_device_id)*num_devices); + + // Create context + int workers[num_devices]; + unsigned int i; + for (i=0; idevices[i]->worker_id; + } + ctx->sched_ctx = starpu_sched_ctx_create(workers, num_devices, name, STARPU_SCHED_CTX_POLICY_NAME, scheduler, 0); + + if (errcode_ret != NULL) + *errcode_ret = CL_SUCCESS; + + return ctx; +} diff --git a/socl/src/cl_createcontextfromtype.c b/socl/src/cl_createcontextfromtype.c new file mode 100644 index 0000000..de3368b --- /dev/null +++ b/socl/src/cl_createcontextfromtype.c @@ -0,0 +1,41 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2012-2012 Vincent Danjean + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "socl.h" +#include "init.h" + +CL_API_SUFFIX__VERSION_1_0 +CL_API_ENTRY cl_context CL_API_CALL +soclCreateContextFromType(const cl_context_properties * properties, + cl_device_type device_type, + void (CL_CALLBACK *pfn_notify)(const char *, const void *, size_t, void *), + void * user_data, + cl_int * errcode_ret) +{ + if (socl_init_starpu() < 0) + return NULL; + + //TODO: appropriate error messages + + cl_uint num_devices; + soclGetDeviceIDs(&socl_platform, device_type, 0, NULL, &num_devices); + + cl_device_id devices[num_devices]; + soclGetDeviceIDs(&socl_platform, device_type, num_devices, devices, NULL); + + return soclCreateContext(properties, num_devices, devices, pfn_notify, user_data, errcode_ret); +} diff --git a/socl/src/cl_createimage2d.c b/socl/src/cl_createimage2d.c new file mode 100644 index 0000000..18f11cd --- /dev/null +++ b/socl/src/cl_createimage2d.c @@ -0,0 +1,33 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "socl.h" + +CL_API_SUFFIX__VERSION_1_0 +CL_API_ENTRY cl_mem CL_API_CALL +soclCreateImage2D(cl_context UNUSED(context), + cl_mem_flags UNUSED(flags), + const cl_image_format * UNUSED(image_format), + size_t UNUSED(image_width), + size_t UNUSED(image_height), + size_t UNUSED(image_row_pitch), + void * UNUSED(host_ptr), + cl_int * errcode_ret) +{ + if (errcode_ret != NULL) + *errcode_ret = CL_INVALID_OPERATION; + return NULL; +} diff --git a/socl/src/cl_createimage3d.c b/socl/src/cl_createimage3d.c new file mode 100644 index 0000000..f611a6b --- /dev/null +++ b/socl/src/cl_createimage3d.c @@ -0,0 +1,35 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "socl.h" + +CL_API_SUFFIX__VERSION_1_0 +CL_API_ENTRY cl_mem CL_API_CALL +soclCreateImage3D(cl_context UNUSED(context), + cl_mem_flags UNUSED(flags), + const cl_image_format * UNUSED(image_format), + size_t UNUSED(image_width), + size_t UNUSED(image_height), + size_t UNUSED(image_depth), + size_t UNUSED(image_row_pitch), + size_t UNUSED(image_slice_pitch), + void * UNUSED(host_ptr), + cl_int * errcode_ret) +{ + if (errcode_ret != NULL) + *errcode_ret = CL_INVALID_OPERATION; + return NULL; +} diff --git a/socl/src/cl_createkernel.c b/socl/src/cl_createkernel.c new file mode 100644 index 0000000..93c6e16 --- /dev/null +++ b/socl/src/cl_createkernel.c @@ -0,0 +1,211 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "socl.h" + +static void soclCreateKernel_task(void *data) +{ + struct _cl_kernel *k = (struct _cl_kernel *)data; + + int range = starpu_worker_get_range(); + cl_int err; + + if (k->program->cl_programs[range] == NULL) + { + k->errcodes[range] = CL_SUCCESS; + DEBUG_MSG("[Device %u] Kernel creation skipped: program has not been built for this device.\n", starpu_worker_get_id_check()); + return; + } + + DEBUG_MSG("[Device %u] Creating kernel...\n", starpu_worker_get_id_check()); + k->cl_kernels[range] = clCreateKernel(k->program->cl_programs[range], k->kernel_name, &err); + if (err != CL_SUCCESS) + { + k->errcodes[range] = err; + ERROR_STOP("[Device %u] Unable to create kernel. Error %d. Aborting.\n", starpu_worker_get_id_check(), err); + return; + } + + /* One worker creates argument structures */ + if (STARPU_BOOL_COMPARE_AND_SWAP(&k->num_args, 0, 666)) + { + unsigned int i; + cl_uint num_args; + + err = clGetKernelInfo(k->cl_kernels[range], CL_KERNEL_NUM_ARGS, sizeof(num_args), &num_args, NULL); + if (err != CL_SUCCESS) + { + DEBUG_CL("clGetKernelInfo", err); + ERROR_STOP("Unable to get kernel argument count. Aborting.\n"); + } + k->num_args = num_args; + DEBUG_MSG("Kernel has %u arguments\n", num_args); + + k->arg_size = (size_t*)malloc(sizeof(size_t) * num_args); + k->arg_value = (void**)malloc(sizeof(void*) * num_args); + k->arg_type = (enum kernel_arg_type*)malloc(sizeof(enum kernel_arg_type) * num_args); + /* Settings default type to NULL */ + for (i=0; iarg_value[i] = NULL; + k->arg_type[i] = Null; + } + } +} + +static void release_callback_kernel(void * e) +{ + cl_kernel kernel = (cl_kernel)e; + + //Free args + unsigned int i; + for (i=0; inum_args; i++) + { + switch (kernel->arg_type[i]) + { + case Null: + case Buffer: + break; + case Immediate: + free(kernel->arg_value[i]); + break; + } + } + if (kernel->arg_size != NULL) + free(kernel->arg_size); + if (kernel->arg_value != NULL) + free(kernel->arg_value); + if (kernel->arg_type != NULL) + free(kernel->arg_type); + + //Release real kernels... + for (i=0; icl_kernels[i] != NULL) + { + cl_int err = clReleaseKernel(kernel->cl_kernels[i]); + if (err != CL_SUCCESS) + DEBUG_CL("clReleaseKernel", err); + } + } + + //Release perfmodel + //FIXME: we cannot release performance models before StarPU shutdown as it + //will use them to store kernel execution times + + //free(kernel->perfmodel); + //free(kernel->kernel_name); + + gc_entity_unstore(&kernel->program); + + free(kernel->cl_kernels); + free(kernel->errcodes); +} + +CL_API_SUFFIX__VERSION_1_0 +CL_API_ENTRY cl_kernel CL_API_CALL +soclCreateKernel(cl_program program, + const char * kernel_name, + cl_int * errcode_ret) +{ + cl_kernel k; + + if (program == NULL) + { + if (errcode_ret != NULL) + *errcode_ret = CL_INVALID_PROGRAM; + return NULL; + } + + //TODO: check programs (see opencl specs) + + /* Create Kernel structure */ + k = (cl_kernel)gc_entity_alloc(sizeof(struct _cl_kernel), release_callback_kernel, "kernel"); + if (k == NULL) + { + if (errcode_ret != NULL) + *errcode_ret = CL_OUT_OF_HOST_MEMORY; + return NULL; + } + + gc_entity_store(&k->program, program); + k->kernel_name = strdup(kernel_name); + + k->perfmodel = malloc(sizeof(struct starpu_perfmodel)); + memset(k->perfmodel, 0, sizeof(struct starpu_perfmodel)); + k->perfmodel->type = STARPU_HISTORY_BASED; + k->perfmodel->symbol = k->kernel_name; + + k->num_args = 0; + k->arg_value = NULL; + k->arg_size = NULL; + + k->split_func = NULL; + k->split_space = 0; + k->split_data = NULL; + k->split_perfs = NULL; + STARPU_PTHREAD_MUTEX_INIT(&k->split_lock, NULL); + +#ifdef DEBUG + static int id = 0; + k->id = id++; +#endif + + k->cl_kernels = (cl_kernel*)malloc(socl_device_count * sizeof(cl_kernel)); + k->errcodes = (cl_int*)malloc(socl_device_count * sizeof(cl_int)); + + { + unsigned int i; + for (i=0; icl_kernels[i] = NULL; + k->errcodes[i] = -9999; + } + } + + /* Create kernel on each device */ + DEBUG_MSG("[Kernel %d] Create %u kernels (name \"%s\")\n", k->id, socl_device_count, kernel_name); + starpu_execute_on_each_worker_ex(soclCreateKernel_task, k, STARPU_OPENCL, "SOCL_CREATE_KERNEL"); + + if (errcode_ret != NULL) + { + unsigned int i; + *errcode_ret = CL_SUCCESS; + for (i=0; ierrcodes[i]) + { +#define CASE_RET(e) case e: *errcode_ret = e; return k + CASE_RET(CL_INVALID_PROGRAM); + CASE_RET(CL_INVALID_PROGRAM_EXECUTABLE); + CASE_RET(CL_INVALID_KERNEL_NAME); + CASE_RET(CL_INVALID_KERNEL_DEFINITION); + CASE_RET(CL_INVALID_VALUE); + CASE_RET(CL_OUT_OF_RESOURCES); + CASE_RET(CL_OUT_OF_HOST_MEMORY); +#undef CASE_RET + } + } + + if (k->num_args == 666) + { + *errcode_ret = CL_INVALID_PROGRAM_EXECUTABLE; + return k; + } + } + + return k; +} diff --git a/socl/src/cl_createkernelsinprogram.c b/socl/src/cl_createkernelsinprogram.c new file mode 100644 index 0000000..76f36fd --- /dev/null +++ b/socl/src/cl_createkernelsinprogram.c @@ -0,0 +1,28 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "socl.h" + +CL_API_SUFFIX__VERSION_1_0 +CL_API_ENTRY cl_int CL_API_CALL +soclCreateKernelsInProgram(cl_program UNUSED(program), + cl_uint UNUSED(num_kernels), + cl_kernel * UNUSED(kernels), + cl_uint * UNUSED(num_kernels_ret)) +{ + //TODO + return CL_INVALID_OPERATION; +} diff --git a/socl/src/cl_createprogramwithbinary.c b/socl/src/cl_createprogramwithbinary.c new file mode 100644 index 0000000..4b2e3ba --- /dev/null +++ b/socl/src/cl_createprogramwithbinary.c @@ -0,0 +1,34 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "socl.h" + +CL_API_SUFFIX__VERSION_1_0 +CL_API_ENTRY cl_program CL_API_CALL +soclCreateProgramWithBinary(cl_context UNUSED(context), + cl_uint UNUSED(num_devices), + const cl_device_id * UNUSED(device_list), + const size_t * UNUSED(lengths), + const unsigned char ** UNUSED(binaries), + cl_int * UNUSED(binary_status), + cl_int * errcode_ret) +{ + //TODO + if (errcode_ret != NULL) + *errcode_ret = CL_INVALID_OPERATION; + + return NULL; +} diff --git a/socl/src/cl_createprogramwithsource.c b/socl/src/cl_createprogramwithsource.c new file mode 100644 index 0000000..3386501 --- /dev/null +++ b/socl/src/cl_createprogramwithsource.c @@ -0,0 +1,167 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "socl.h" + +struct cpws_data +{ + struct _cl_program *program; + cl_int *errcodes; + cl_uint count; + char **strings; + size_t *lengths; +}; + +static void soclCreateProgramWithSource_task(void *data) +{ + struct cpws_data *d = (struct cpws_data*)data; + + cl_context context; + int wid = starpu_worker_get_id_check(); + DEBUG_MSG("Worker id: %d\n", wid); + + int range = starpu_worker_get_range(); + + starpu_opencl_get_context(wid, &context); + + d->program->cl_programs[range] = clCreateProgramWithSource(context, d->count, (const char**)d->strings, d->lengths, &d->errcodes[range]); +} + +static void release_callback_program(void * e) +{ + cl_program program = (cl_program)e; + + unsigned int i; + for (i=0; icl_programs[i] != NULL) + { + cl_int err = clReleaseProgram(program->cl_programs[i]); + if (err != CL_SUCCESS) + DEBUG_CL("clReleaseProgram", err); + } + } + + /* Release references */ + gc_entity_unstore(&program->context); + + free(program->cl_programs); + + if (program->options != NULL) + free(program->options); +} + +CL_API_SUFFIX__VERSION_1_0 +CL_API_ENTRY cl_program CL_API_CALL +soclCreateProgramWithSource(cl_context context, + cl_uint count, + const char ** strings, + const size_t * lengths, + cl_int * errcode_ret) +{ + cl_program p; + struct cpws_data *data; + unsigned int i; + + if (errcode_ret != NULL) + *errcode_ret = CL_SUCCESS; + + /* Check arguments */ + if (count == 0 || strings == NULL) + { + if (errcode_ret != NULL) + *errcode_ret = CL_INVALID_VALUE; + return NULL; + } + + /* Alloc cl_program structure */ + p = (cl_program)gc_entity_alloc(sizeof(struct _cl_program), release_callback_program, "program"); + if (p == NULL) + { + if (errcode_ret != NULL) + *errcode_ret = CL_OUT_OF_HOST_MEMORY; + return NULL; + } + + gc_entity_store(&p->context, context); + p->options = NULL; + +#ifdef DEBUG + static int id = 0; + p->id = id++; +#endif + + p->cl_programs = (cl_program*)malloc(sizeof(cl_program) * socl_device_count); + if (p->cl_programs == NULL) + { + if (errcode_ret != NULL) + *errcode_ret = CL_OUT_OF_HOST_MEMORY; + return NULL; + } + + { + for (i=0; icl_programs[i] = NULL; + } + + /* Construct structure to pass arguments to workers */ + data = (struct cpws_data*)malloc(sizeof(struct cpws_data)); + if (data == NULL) + { + if (errcode_ret != NULL) + *errcode_ret = CL_OUT_OF_HOST_MEMORY; + free(p->cl_programs); + return NULL; + } + data->count = count; + data->program = p; + data->strings = (char**)strings; + data->lengths = (size_t*)lengths; + + data->errcodes = (cl_int*)malloc(sizeof(cl_int) * socl_device_count); + for (i=0; ierrcodes[i] = CL_SUCCESS; + } + + /* Init real cl_program for each OpenCL device */ + unsigned workers[context->num_devices]; + for (i=0; inum_devices; i++) + { + workers[i] = context->devices[i]->worker_id; + } + starpu_execute_on_specific_workers(soclCreateProgramWithSource_task, data, context->num_devices, workers, "SOCL_CREATE_PROGRAM"); + + if (errcode_ret != NULL) + { + *errcode_ret = CL_SUCCESS; + for (i=0; ierrcodes[i] != CL_SUCCESS) + { + DEBUG_MSG("Worker [%u] failed\n", i); + DEBUG_CL("clCreateProgramWithSource", data->errcodes[i]); + *errcode_ret = data->errcodes[i]; + break; + } + } + } + + free(data->errcodes); + free(data); + + return p; +} diff --git a/socl/src/cl_createsampler.c b/socl/src/cl_createsampler.c new file mode 100644 index 0000000..c878be7 --- /dev/null +++ b/socl/src/cl_createsampler.c @@ -0,0 +1,30 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "socl.h" + +CL_API_SUFFIX__VERSION_1_0 +CL_API_ENTRY cl_sampler CL_API_CALL +soclCreateSampler(cl_context UNUSED(context), + cl_bool UNUSED(normalized_coords), + cl_addressing_mode UNUSED(addressing_mode), + cl_filter_mode UNUSED(filter_mode), + cl_int * errcode_ret) +{ + if (errcode_ret != NULL) + *errcode_ret = CL_INVALID_OPERATION; + return NULL; +} diff --git a/socl/src/cl_enqueuebarrier.c b/socl/src/cl_enqueuebarrier.c new file mode 100644 index 0000000..5e177ca --- /dev/null +++ b/socl/src/cl_enqueuebarrier.c @@ -0,0 +1,36 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "socl.h" + +CL_API_SUFFIX__VERSION_1_0 +CL_API_ENTRY cl_int CL_API_CALL +soclEnqueueBarrier(cl_command_queue cq) +{ + command_barrier cmd = command_barrier_create(); + + command_queue_enqueue(cq, cmd, 0, NULL); + + return CL_SUCCESS; +} + +cl_int command_barrier_submit(command_barrier cmd) +{ + struct starpu_task *task; + task = task_create(CL_COMMAND_BARRIER); + + return task_submit(task, cmd); +} diff --git a/socl/src/cl_enqueuebarrierwithwaitlist.c b/socl/src/cl_enqueuebarrierwithwaitlist.c new file mode 100644 index 0000000..0626da5 --- /dev/null +++ b/socl/src/cl_enqueuebarrierwithwaitlist.c @@ -0,0 +1,35 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "socl.h" + +CL_API_SUFFIX__VERSION_1_2 +CL_API_ENTRY cl_int CL_API_CALL +soclEnqueueBarrierWithWaitList(cl_command_queue cq, + cl_uint num_events, + const cl_event * events, + cl_event * event) +{ + command_barrier cmd = command_barrier_create(); + + cl_event ev = command_event_get(cmd); + + command_queue_enqueue(cq, cmd, num_events, events); + + RETURN_EVENT(ev, event); + + return CL_SUCCESS; +} diff --git a/socl/src/cl_enqueuecopybuffer.c b/socl/src/cl_enqueuecopybuffer.c new file mode 100644 index 0000000..cd13196 --- /dev/null +++ b/socl/src/cl_enqueuecopybuffer.c @@ -0,0 +1,121 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "socl.h" + +static void soclEnqueueCopyBuffer_opencl_task(void *descr[], void *args) +{ + int wid; + cl_command_queue cq; + cl_event ev; + command_copy_buffer cmd = (command_copy_buffer)args; + + cl_event event = command_event_get(cmd); + event->prof_start = _socl_nanotime(); + gc_entity_release(event); + + wid = starpu_worker_get_id_check(); + starpu_opencl_get_queue(wid, &cq); + + cl_mem src = (cl_mem)STARPU_VARIABLE_GET_PTR(descr[0]); + cl_mem dst = (cl_mem)STARPU_VARIABLE_GET_PTR(descr[1]); + + clEnqueueCopyBuffer(cq, src,dst, cmd->src_offset, cmd->dst_offset, cmd->cb, 0, NULL, &ev); + clWaitForEvents(1, &ev); + clReleaseEvent(ev); + + gc_entity_release_cmd(cmd); +} + +static void soclEnqueueCopyBuffer_cpu_task(void *descr[], void *args) +{ + command_copy_buffer cmd = (command_copy_buffer)args; + + cl_event ev = command_event_get(cmd); + ev->prof_start = _socl_nanotime(); + gc_entity_release(ev); + + char * src = (void*)STARPU_VARIABLE_GET_PTR(descr[0]); + char * dst = (void*)STARPU_VARIABLE_GET_PTR(descr[1]); + + memcpy(dst+cmd->dst_offset, src+cmd->src_offset, cmd->cb); + + gc_entity_release_cmd(cmd); +} + +static struct starpu_perfmodel copy_buffer_perfmodel = +{ + .type = STARPU_HISTORY_BASED, + .symbol = "SOCL_COPY_BUFFER" +}; + +static struct starpu_codelet codelet_copybuffer = +{ + .where = STARPU_CPU | STARPU_OPENCL, + .model = ©_buffer_perfmodel, + .cpu_funcs = { &soclEnqueueCopyBuffer_cpu_task }, + .opencl_funcs = { &soclEnqueueCopyBuffer_opencl_task }, + .modes = {STARPU_R, STARPU_RW}, + .nbuffers = 2 +}; + +cl_int command_copy_buffer_submit(command_copy_buffer cmd) +{ + struct starpu_task * task = task_create(CL_COMMAND_COPY_BUFFER); + + task->handles[0] = cmd->src_buffer->handle; + task->handles[1] = cmd->dst_buffer->handle; + task->cl = &codelet_copybuffer; + + /* Execute the task on a specific worker? */ + if (cmd->_command.event->cq->device != NULL) + { + task->execute_on_a_specific_worker = 1; + task->workerid = cmd->_command.event->cq->device->worker_id; + } + + gc_entity_store_cmd(&task->cl_arg, cmd); + task->cl_arg_size = sizeof(*cmd); + + cmd->dst_buffer->scratch = 0; + + task_submit(task, cmd); + + return CL_SUCCESS; +} + +CL_API_SUFFIX__VERSION_1_0 +CL_API_ENTRY cl_int CL_API_CALL +soclEnqueueCopyBuffer(cl_command_queue cq, + cl_mem src_buffer, + cl_mem dst_buffer, + size_t src_offset, + size_t dst_offset, + size_t cb, + cl_uint num_events, + const cl_event * events, + cl_event * event) +{ + command_copy_buffer cmd = command_copy_buffer_create(src_buffer, dst_buffer, src_offset, dst_offset, cb); + + cl_event ev = command_event_get(cmd); + + command_queue_enqueue(cq, cmd, num_events, events); + + RETURN_EVENT(ev, event); + + return CL_SUCCESS; +} diff --git a/socl/src/cl_enqueuecopybuffertoimage.c b/socl/src/cl_enqueuecopybuffertoimage.c new file mode 100644 index 0000000..490c8ff --- /dev/null +++ b/socl/src/cl_enqueuecopybuffertoimage.c @@ -0,0 +1,32 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "socl.h" + +CL_API_SUFFIX__VERSION_1_0 +CL_API_ENTRY cl_int CL_API_CALL +soclEnqueueCopyBufferToImage(cl_command_queue UNUSED(command_queue), + cl_mem UNUSED(src_buffer), + cl_mem UNUSED(dst_image), + size_t UNUSED(src_offset), + const size_t * UNUSED(dst_origin), + const size_t * UNUSED(region), + cl_uint UNUSED(num_events_in_wait_list), + const cl_event * UNUSED(event_wait_list), + cl_event * UNUSED(event)) +{ + return CL_INVALID_OPERATION; +} diff --git a/socl/src/cl_enqueuecopyimage.c b/socl/src/cl_enqueuecopyimage.c new file mode 100644 index 0000000..5642caa --- /dev/null +++ b/socl/src/cl_enqueuecopyimage.c @@ -0,0 +1,32 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "socl.h" + +CL_API_SUFFIX__VERSION_1_0 +CL_API_ENTRY cl_int CL_API_CALL +soclEnqueueCopyImage(cl_command_queue UNUSED(command_queue), + cl_mem UNUSED(src_image), + cl_mem UNUSED(dst_image), + const size_t * UNUSED(src_origin), + const size_t * UNUSED(dst_origin), + const size_t * UNUSED(region), + cl_uint UNUSED(num_events_in_wait_list), + const cl_event * UNUSED(event_wait_list), + cl_event * UNUSED(event)) +{ + return CL_INVALID_OPERATION; +} diff --git a/socl/src/cl_enqueuecopyimagetobuffer.c b/socl/src/cl_enqueuecopyimagetobuffer.c new file mode 100644 index 0000000..15b8ae2 --- /dev/null +++ b/socl/src/cl_enqueuecopyimagetobuffer.c @@ -0,0 +1,32 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "socl.h" + +CL_API_SUFFIX__VERSION_1_0 +CL_API_ENTRY cl_int CL_API_CALL +soclEnqueueCopyImageToBuffer(cl_command_queue UNUSED(command_queue), + cl_mem UNUSED(src_image), + cl_mem UNUSED(dst_buffer), + const size_t * UNUSED(src_origin), + const size_t * UNUSED(region), + size_t UNUSED(dst_offset), + cl_uint UNUSED(num_events_in_wait_list), + const cl_event * UNUSED(event_wait_list), + cl_event * UNUSED(event)) +{ + return CL_INVALID_OPERATION; +} diff --git a/socl/src/cl_enqueuemapbuffer.c b/socl/src/cl_enqueuemapbuffer.c new file mode 100644 index 0000000..187a9e4 --- /dev/null +++ b/socl/src/cl_enqueuemapbuffer.c @@ -0,0 +1,71 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "socl.h" + +static void mapbuffer_task(void *args) +{ + command_map_buffer cmd = (command_map_buffer)args; + + cl_event ev = command_event_get(cmd); + ev->prof_start = _socl_nanotime(); + gc_entity_release(ev); + + enum starpu_data_access_mode mode = (cmd->map_flags == CL_MAP_READ ? STARPU_R : STARPU_RW); + + starpu_data_acquire_cb(cmd->buffer->handle, mode, command_completed_task_callback, cmd); +} + +static struct starpu_codelet codelet_mapbuffer = +{ + .name = "SOCL_MAP_BUFFER" +}; + +cl_int command_map_buffer_submit(command_map_buffer cmd) +{ + gc_entity_retain(cmd); + + cpu_task_submit(cmd, mapbuffer_task, cmd, 0, 0, &codelet_mapbuffer, 0, NULL); + + return CL_SUCCESS; +} + +CL_API_SUFFIX__VERSION_1_0 +CL_API_ENTRY void * CL_API_CALL +soclEnqueueMapBuffer(cl_command_queue cq, + cl_mem buffer, + cl_bool blocking, + cl_map_flags map_flags, + size_t offset, + size_t cb, + cl_uint num_events, + const cl_event * events, + cl_event * event, + cl_int * errcode_ret) +{ + command_map_buffer cmd = command_map_buffer_create(buffer, map_flags, offset, cb); + + cl_event ev = command_event_get(cmd); + + command_queue_enqueue(cq, cmd, num_events, events); + + if (errcode_ret != NULL) + *errcode_ret = CL_SUCCESS; + + MAY_BLOCK_THEN_RETURN_EVENT(ev,blocking,event); + + return (void*)(starpu_variable_get_local_ptr(buffer->handle) + offset); +} diff --git a/socl/src/cl_enqueuemapimage.c b/socl/src/cl_enqueuemapimage.c new file mode 100644 index 0000000..5e381d3 --- /dev/null +++ b/socl/src/cl_enqueuemapimage.c @@ -0,0 +1,38 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "socl.h" + +CL_API_SUFFIX__VERSION_1_0 +CL_API_ENTRY void * CL_API_CALL +soclEnqueueMapImage(cl_command_queue UNUSED(command_queue), + cl_mem UNUSED(image), + cl_bool UNUSED(blocking_map), + cl_map_flags UNUSED(map_flags), + const size_t * UNUSED(origin), + const size_t * UNUSED(region), + size_t * UNUSED(image_row_pitch), + size_t * UNUSED(image_slice_pitch), + cl_uint UNUSED(num_events_in_wait_list), + const cl_event * UNUSED(event_wait_list), + cl_event * UNUSED(event), + cl_int * errcode_ret) +{ + if (errcode_ret != NULL) + *errcode_ret = CL_INVALID_OPERATION; + + return NULL; +} diff --git a/socl/src/cl_enqueuemarker.c b/socl/src/cl_enqueuemarker.c new file mode 100644 index 0000000..0ed18cd --- /dev/null +++ b/socl/src/cl_enqueuemarker.c @@ -0,0 +1,44 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "socl.h" + +CL_API_SUFFIX__VERSION_1_0 +CL_API_ENTRY cl_int CL_API_CALL +soclEnqueueMarker(cl_command_queue cq, + cl_event * event) +{ + if (event == NULL) + return CL_INVALID_VALUE; + + command_marker cmd = command_marker_create(); + + cl_event ev = command_event_get(cmd); + + command_queue_enqueue(cq, cmd, 0, NULL); + + RETURN_EVENT(ev, event); + + return CL_SUCCESS; +} + +cl_int command_marker_submit(command_marker cmd) +{ + struct starpu_task *task; + task = task_create(CL_COMMAND_MARKER); + + return task_submit(task, cmd); +} diff --git a/socl/src/cl_enqueuemarkerwithwaitlist.c b/socl/src/cl_enqueuemarkerwithwaitlist.c new file mode 100644 index 0000000..463840d --- /dev/null +++ b/socl/src/cl_enqueuemarkerwithwaitlist.c @@ -0,0 +1,38 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "socl.h" + +CL_API_SUFFIX__VERSION_1_2 +CL_API_ENTRY cl_int CL_API_CALL +soclEnqueueMarkerWithWaitList(cl_command_queue cq, + cl_uint num_events, + const cl_event * events, + cl_event * event) +{ + if (events == NULL) + return soclEnqueueBarrierWithWaitList(cq, num_events, events, event); + + command_marker cmd = command_marker_create(); + + cl_event ev = command_event_get(cmd); + + command_queue_enqueue(cq, cmd, num_events, events); + + RETURN_EVENT(ev, event); + + return CL_SUCCESS; +} diff --git a/socl/src/cl_enqueuenativekernel.c b/socl/src/cl_enqueuenativekernel.c new file mode 100644 index 0000000..e36fff9 --- /dev/null +++ b/socl/src/cl_enqueuenativekernel.c @@ -0,0 +1,33 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "socl.h" + +CL_API_SUFFIX__VERSION_1_0 +CL_API_ENTRY cl_int CL_API_CALL +soclEnqueueNativeKernel(cl_command_queue UNUSED(command_queue), + __attribute__((unused)) void (*user_func)(void *), + void * UNUSED(args), + size_t UNUSED(cb_args), + cl_uint UNUSED(num_mem_objects), + const cl_mem * UNUSED(mem_list), + const void ** UNUSED(args_mem_loc), + cl_uint UNUSED(num_events_in_wait_list), + const cl_event * UNUSED(event_wait_list), + cl_event * UNUSED(event)) +{ + return CL_INVALID_OPERATION; +} diff --git a/socl/src/cl_enqueuendrangekernel.c b/socl/src/cl_enqueuendrangekernel.c new file mode 100644 index 0000000..3b68b29 --- /dev/null +++ b/socl/src/cl_enqueuendrangekernel.c @@ -0,0 +1,248 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "socl.h" +#include "event.h" + +void soclEnqueueNDRangeKernel_task(void *descr[], void *args) +{ + command_ndrange_kernel cmd = (command_ndrange_kernel)args; + + cl_command_queue cq; + int wid; + cl_int err; + + cl_event ev = command_event_get(cmd); + ev->prof_start = _socl_nanotime(); + gc_entity_release(ev); + + wid = starpu_worker_get_id_check(); + starpu_opencl_get_queue(wid, &cq); + + DEBUG_MSG("[worker %d] [kernel %d] Executing kernel...\n", wid, cmd->kernel->id); + + int range = starpu_worker_get_range(); + + /* Set arguments */ + { + unsigned int i; + int buf = 0; + for (i=0; inum_args; i++) + { + switch (cmd->arg_types[i]) + { + case Null: + err = clSetKernelArg(cmd->kernel->cl_kernels[range], i, cmd->arg_sizes[i], NULL); + break; + case Buffer: + { + cl_mem mem; + mem = (cl_mem)STARPU_VARIABLE_GET_PTR(descr[buf]); + err = clSetKernelArg(cmd->kernel->cl_kernels[range], i, cmd->arg_sizes[i], &mem); + buf++; + } + break; + case Immediate: + err = clSetKernelArg(cmd->kernel->cl_kernels[range], i, cmd->arg_sizes[i], cmd->args[i]); + break; + } + if (err != CL_SUCCESS) + { + DEBUG_CL("clSetKernelArg", err); + DEBUG_ERROR("Aborting\n"); + } + } + } + + /* Calling Kernel */ + cl_event event; + err = clEnqueueNDRangeKernel(cq, cmd->kernel->cl_kernels[range], cmd->work_dim, cmd->global_work_offset, cmd->global_work_size, cmd->local_work_size, 0, NULL, &event); + + if (err != CL_SUCCESS) + { + ERROR_MSG("Worker[%d] Unable to Enqueue kernel (error %d)\n", wid, err); + DEBUG_CL("clEnqueueNDRangeKernel", err); + DEBUG_MSG("Workdim %u, global_work_offset %p, global_work_size %p, local_work_size %p\n", + cmd->work_dim, cmd->global_work_offset, cmd->global_work_size, cmd->local_work_size); + DEBUG_MSG("Global work size: %ld %ld %ld\n", (long)cmd->global_work_size[0], + (long)(cmd->work_dim > 1 ? cmd->global_work_size[1] : 1), (long)(cmd->work_dim > 2 ? cmd->global_work_size[2] : 1)); + if (cmd->local_work_size != NULL) + DEBUG_MSG("Local work size: %ld %ld %ld\n", (long)cmd->local_work_size[0], + (long)(cmd->work_dim > 1 ? cmd->local_work_size[1] : 1), (long)(cmd->work_dim > 2 ? cmd->local_work_size[2] : 1)); + } + else + { + /* Waiting for kernel to terminate */ + clWaitForEvents(1, &event); + clReleaseEvent(event); + } +} + +/** + * Real kernel enqueuing command + */ +cl_int command_ndrange_kernel_submit(command_ndrange_kernel cmd) +{ + starpu_task task = task_create(CL_COMMAND_NDRANGE_KERNEL); + task->cl = &cmd->codelet; + task->cl->model = cmd->kernel->perfmodel; + task->cl_arg = cmd; + task->cl_arg_size = sizeof(cmd); + + /* Execute the task on a specific worker? */ + if (cmd->_command.event->cq->device != NULL) + { + task->execute_on_a_specific_worker = 1; + task->workerid = cmd->_command.event->cq->device->worker_id; + } + + struct starpu_codelet * codelet = task->cl; + + /* We need to detect which parameters are OpenCL's memory objects and + * we retrieve their corresponding StarPU buffers */ + cmd->num_buffers = 0; + cmd->buffers = malloc(sizeof(cl_mem) * cmd->num_args); + + unsigned int i; + for (i=0; inum_args; i++) + { + if (cmd->arg_types[i] == Buffer) + { + cl_mem buf = *(cl_mem*)cmd->args[i]; + + gc_entity_store(&cmd->buffers[cmd->num_buffers], buf); + task->handles[cmd->num_buffers] = buf->handle; + + /* Determine best StarPU buffer access mode */ + int mode; + if (buf->mode == CL_MEM_READ_ONLY) + mode = STARPU_R; + else if (buf->mode == CL_MEM_WRITE_ONLY) + { + mode = STARPU_W; + buf->scratch = 0; + } + else if (buf->scratch) + { //RW but never accessed in RW or W mode + mode = STARPU_W; + buf->scratch = 0; + } + else + { + mode = STARPU_RW; + buf->scratch = 0; + } + codelet->modes[cmd->num_buffers] = mode; + + cmd->num_buffers += 1; + } + } + codelet->nbuffers = cmd->num_buffers; + + task_submit(task, cmd); + + return CL_SUCCESS; +} + +CL_API_SUFFIX__VERSION_1_1 +CL_API_ENTRY cl_int CL_API_CALL +soclEnqueueNDRangeKernel(cl_command_queue cq, + cl_kernel kernel, + cl_uint work_dim, + const size_t * global_work_offset, + const size_t * global_work_size, + const size_t * local_work_size, + cl_uint num_events, + const cl_event * events, + cl_event * event) +{ + if (kernel->split_func != NULL && !STARPU_PTHREAD_MUTEX_TRYLOCK(&kernel->split_lock)) + { + cl_event beforeEvent, afterEvent, totalEvent; + + totalEvent = event_create(); + gc_entity_store(&totalEvent->cq, cq); + + command_marker cmd = command_marker_create(); + beforeEvent = command_event_get(cmd); + command_queue_enqueue(cq, cmd, num_events, events); + + cl_uint iter = 1; + cl_uint split_min = CL_UINT_MAX; + cl_uint split_min_iter = 1; + while (iter < kernel->split_space && kernel->split_perfs[iter] != 0) + { + if (kernel->split_perfs[iter] < split_min) + { + split_min = kernel->split_perfs[iter]; + split_min_iter = iter; + } + iter++; + } + + if (iter == kernel->split_space) + { + iter = split_min_iter; + } + + cl_int ret = kernel->split_func(cq, iter, kernel->split_data, beforeEvent, &afterEvent); + + if (ret == CL_SUCCESS) + { + //FIXME: blocking call + soclWaitForEvents(1, &afterEvent); + + /* Store perf */ + cl_ulong start,end; + soclGetEventProfilingInfo(beforeEvent, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &start, NULL); + soclGetEventProfilingInfo(afterEvent, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &end, NULL); + soclReleaseEvent(afterEvent); + + kernel->split_perfs[iter] = end-start; + + STARPU_PTHREAD_MUTEX_UNLOCK(&kernel->split_lock); + + event_complete(totalEvent); + + totalEvent->prof_start = start; + totalEvent->prof_submit = start; + totalEvent->prof_queued = start; + totalEvent->prof_end = end; + + RETURN_EVENT(totalEvent,event); + } + else + { + STARPU_PTHREAD_MUTEX_UNLOCK(&kernel->split_lock); + soclReleaseEvent(totalEvent); + } + + return ret; + } + else + { + command_ndrange_kernel cmd = command_ndrange_kernel_create(kernel, work_dim, + global_work_offset, global_work_size, local_work_size); + + cl_event ev = command_event_get(cmd); + + command_queue_enqueue(cq, cmd, num_events, events); + + RETURN_EVENT(ev, event); + } + + return CL_SUCCESS; +} diff --git a/socl/src/cl_enqueuereadbuffer.c b/socl/src/cl_enqueuereadbuffer.c new file mode 100644 index 0000000..9a3562f --- /dev/null +++ b/socl/src/cl_enqueuereadbuffer.c @@ -0,0 +1,124 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "socl.h" + +static void soclEnqueueReadBuffer_cpu_task(void *descr[], void *args) +{ + command_read_buffer cmd = (command_read_buffer)args; + + cl_event ev = command_event_get(cmd); + ev->prof_start = _socl_nanotime(); + gc_entity_release(ev); + + char * ptr = (void*)STARPU_VARIABLE_GET_PTR(descr[0]); + DEBUG_MSG("[Buffer %d] Reading %ld bytes from %p to %p\n", cmd->buffer->id, (long)cmd->cb, ptr+cmd->offset, cmd->ptr); + + //This fix is for people who use USE_HOST_PTR and still use ReadBuffer to sync the buffer in host mem at host_ptr. + //They should use buffer mapping facilities instead. + if (ptr+cmd->offset != cmd->ptr) + memcpy(cmd->ptr, ptr+cmd->offset, cmd->cb); + + gc_entity_release_cmd(cmd); +} + +static void soclEnqueueReadBuffer_opencl_task(void *descr[], void *args) +{ + command_read_buffer cmd = (command_read_buffer)args; + + cl_event event = command_event_get(cmd); + event->prof_start = _socl_nanotime(); + gc_entity_release(event); + + cl_mem mem = (cl_mem)STARPU_VARIABLE_GET_PTR(descr[0]); + + DEBUG_MSG("[Buffer %d] Reading %ld bytes from offset %ld into %p\n", cmd->buffer->id, (long)cmd->cb, (long)cmd->offset, cmd->ptr); + + int wid = starpu_worker_get_id_check(); + cl_command_queue cq; + starpu_opencl_get_queue(wid, &cq); + + cl_event ev; + cl_int ret = clEnqueueReadBuffer(cq, mem, CL_TRUE, cmd->offset, cmd->cb, cmd->ptr, 0, NULL, &ev); + if (ret != CL_SUCCESS) + ERROR_CL("clEnqueueReadBuffer", ret); + + clWaitForEvents(1, &ev); + clReleaseEvent(ev); + + gc_entity_release_cmd(cmd); +} + +static struct starpu_perfmodel read_buffer_perfmodel = +{ + .type = STARPU_HISTORY_BASED, + .symbol = "SOCL_READ_BUFFER" +}; + +static struct starpu_codelet codelet_readbuffer = +{ + .where = STARPU_OPENCL, + .model = &read_buffer_perfmodel, + .cpu_funcs = { &soclEnqueueReadBuffer_cpu_task }, + .opencl_funcs = { &soclEnqueueReadBuffer_opencl_task }, + .modes = {STARPU_R}, + .nbuffers = 1 +}; + +cl_int command_read_buffer_submit(command_read_buffer cmd) +{ + struct starpu_task * task = task_create(CL_COMMAND_READ_BUFFER); + + task->handles[0] = cmd->buffer->handle; + task->cl = &codelet_readbuffer; + + /* Execute the task on a specific worker? */ + if (cmd->_command.event->cq->device != NULL) + { + task->execute_on_a_specific_worker = 1; + task->workerid = cmd->_command.event->cq->device->worker_id; + } + + gc_entity_store_cmd(&task->cl_arg, cmd); + task->cl_arg_size = sizeof(*cmd); + + task_submit(task, cmd); + + return CL_SUCCESS; +} + +CL_API_SUFFIX__VERSION_1_0 +CL_API_ENTRY cl_int CL_API_CALL +soclEnqueueReadBuffer(cl_command_queue cq, + cl_mem buffer, + cl_bool blocking, + size_t offset, + size_t cb, + void * ptr, + cl_uint num_events, + const cl_event * events, + cl_event * event) +{ + command_read_buffer cmd = command_read_buffer_create(buffer, offset, cb, ptr); + + cl_event ev = command_event_get(cmd); + + command_queue_enqueue(cq, cmd, num_events, events); + + MAY_BLOCK_THEN_RETURN_EVENT(ev, blocking, event); + + return CL_SUCCESS; +} diff --git a/socl/src/cl_enqueuereadimage.c b/socl/src/cl_enqueuereadimage.c new file mode 100644 index 0000000..eafe5ee --- /dev/null +++ b/socl/src/cl_enqueuereadimage.c @@ -0,0 +1,34 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "socl.h" + +CL_API_SUFFIX__VERSION_1_0 +CL_API_ENTRY cl_int CL_API_CALL +soclEnqueueReadImage(cl_command_queue UNUSED(command_queue), + cl_mem UNUSED(image), + cl_bool UNUSED(blocking_read), + const size_t * UNUSED(origin), + const size_t * UNUSED(region), + size_t UNUSED(row_pitch), + size_t UNUSED(slice_pitch), + void * UNUSED(ptr), + cl_uint UNUSED(num_events_in_wait_list), + const cl_event * UNUSED(event_wait_list), + cl_event * UNUSED(event)) +{ + return CL_INVALID_OPERATION; +} diff --git a/socl/src/cl_enqueuetask.c b/socl/src/cl_enqueuetask.c new file mode 100644 index 0000000..378e6d9 --- /dev/null +++ b/socl/src/cl_enqueuetask.c @@ -0,0 +1,36 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "socl.h" + +CL_API_SUFFIX__VERSION_1_0 +CL_API_ENTRY cl_int CL_API_CALL +soclEnqueueTask(cl_command_queue cq, + cl_kernel kernel, + cl_uint num_events, + const cl_event * events, + cl_event * event) +{ + command_ndrange_kernel cmd = command_task_create(kernel); + + cl_event ev = command_event_get(cmd); + + command_queue_enqueue(cq, cmd, num_events, events); + + RETURN_EVENT(ev, event); + + return CL_SUCCESS; +} diff --git a/socl/src/cl_enqueueunmapmemobject.c b/socl/src/cl_enqueueunmapmemobject.c new file mode 100644 index 0000000..0ec84d8 --- /dev/null +++ b/socl/src/cl_enqueueunmapmemobject.c @@ -0,0 +1,51 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "socl.h" + +cl_int command_unmap_mem_object_submit(command_unmap_mem_object cmd) +{ + /* Aliases */ + cl_mem buffer = cmd->buffer; + + static struct starpu_codelet codelet = + { + .name = "SOCL_UNMAP_MEM_OBJECT" + }; + cpu_task_submit(cmd, (void(*)(void*))starpu_data_release, buffer->handle, 0, 1, &codelet, 0, NULL); + + return CL_SUCCESS; +} + +CL_API_SUFFIX__VERSION_1_0 +CL_API_ENTRY cl_int CL_API_CALL +soclEnqueueUnmapMemObject(cl_command_queue cq, + cl_mem buffer, + void * ptr, + cl_uint num_events, + const cl_event * events, + cl_event * event) +{ + command_unmap_mem_object cmd = command_unmap_mem_object_create(buffer, ptr); + + cl_event ev = command_event_get(cmd); + + command_queue_enqueue(cq, cmd, num_events, events); + + RETURN_EVENT(ev, event); + + return CL_SUCCESS; +} diff --git a/socl/src/cl_enqueuewaitforevents.c b/socl/src/cl_enqueuewaitforevents.c new file mode 100644 index 0000000..72e10bb --- /dev/null +++ b/socl/src/cl_enqueuewaitforevents.c @@ -0,0 +1,30 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "socl.h" + +CL_API_SUFFIX__VERSION_1_0 +CL_API_ENTRY cl_int CL_API_CALL +soclEnqueueWaitForEvents(cl_command_queue cq, + cl_uint num_events, + const cl_event * events) +{ + command_marker cmd = command_marker_create(); + + command_queue_enqueue(cq, cmd, num_events, events); + + return CL_SUCCESS; +} diff --git a/socl/src/cl_enqueuewritebuffer.c b/socl/src/cl_enqueuewritebuffer.c new file mode 100644 index 0000000..2526553 --- /dev/null +++ b/socl/src/cl_enqueuewritebuffer.c @@ -0,0 +1,149 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "socl.h" + +static void soclEnqueueWriteBuffer_cpu_task(void *descr[], void *args) +{ + command_write_buffer cmd = (command_write_buffer)args; + + cl_event ev = command_event_get(cmd); + ev->prof_start = _socl_nanotime(); + gc_entity_release(ev); + + char * ptr = (void*)STARPU_VARIABLE_GET_PTR(descr[0]); + DEBUG_MSG("[Buffer %d] Writing %ld bytes from %p to %p\n", cmd->buffer->id, (long)cmd->cb, cmd->ptr, ptr+cmd->offset); + + //FIXME: Fix for people who use USE_HOST_PTR, modify data at host_ptr and use WriteBuffer to commit the change. + // StarPU may have erased host mem at host_ptr (for instance by retrieving current buffer data at host_ptr) + // Buffer mapping facilities should be used instead + // Maybe we should report the bug here... for now, we just avoid memcpy crash due to overlapping regions... + if (ptr+cmd->offset != cmd->ptr) + memcpy(ptr+cmd->offset, cmd->ptr, cmd->cb); + + gc_entity_release_cmd(cmd); +} + +static void soclEnqueueWriteBuffer_opencl_task(void *descr[], void *args) +{ + command_write_buffer cmd = (command_write_buffer)args; + + cl_event event = command_event_get(cmd); + event->prof_start = _socl_nanotime(); + gc_entity_release(event); + + cl_mem mem = (cl_mem)STARPU_VARIABLE_GET_PTR(descr[0]); + + DEBUG_MSG("[Buffer %d] Writing %ld bytes to offset %ld from %p\n", cmd->buffer->id, (long)cmd->cb, (long)cmd->offset, cmd->ptr); + + int wid = starpu_worker_get_id_check(); + cl_command_queue cq; + starpu_opencl_get_queue(wid, &cq); + + cl_event ev; + + cl_int err = clEnqueueWriteBuffer(cq, mem, CL_TRUE, cmd->offset, cmd->cb, cmd->ptr, 0, NULL, &ev); + if (err != CL_SUCCESS) + ERROR_CL("clEnqueueWriteBuffer", err); + + clWaitForEvents(1, &ev); + clReleaseEvent(ev); + + gc_entity_release_cmd(cmd); +} + +static struct starpu_perfmodel write_buffer_perfmodel = +{ + .type = STARPU_HISTORY_BASED, + .symbol = "SOCL_WRITE_BUFFER" +}; + +static struct starpu_codelet codelet_writebuffer = +{ + .where = STARPU_OPENCL, + .model = &write_buffer_perfmodel, + .cpu_funcs = { &soclEnqueueWriteBuffer_cpu_task }, + .opencl_funcs = { &soclEnqueueWriteBuffer_opencl_task }, + .modes = {STARPU_W}, + .nbuffers = 1 +}; + +static struct starpu_codelet codelet_writebuffer_partial = +{ + .where = STARPU_OPENCL, + .model = &write_buffer_perfmodel, + .cpu_funcs = { &soclEnqueueWriteBuffer_cpu_task }, + .opencl_funcs = { &soclEnqueueWriteBuffer_opencl_task }, + .modes = {STARPU_RW}, + .nbuffers = 1 +}; + +cl_int command_write_buffer_submit(command_write_buffer cmd) +{ + /* Aliases */ + cl_mem buffer = cmd->buffer; + size_t cb = cmd->cb; + + struct starpu_task *task; + task = task_create(CL_COMMAND_WRITE_BUFFER); + + task->handles[0] = buffer->handle; + //If only a subpart of the buffer is written, RW access mode is required + if (cb != buffer->size) + task->cl = &codelet_writebuffer_partial; + else + task->cl = &codelet_writebuffer; + + gc_entity_store_cmd(&task->cl_arg, cmd); + task->cl_arg_size = sizeof(*cmd); + + /* Execute the task on a specific worker? */ + if (cmd->_command.event->cq->device != NULL) + { + task->execute_on_a_specific_worker = 1; + task->workerid = cmd->_command.event->cq->device->worker_id; + } + + //The buffer now contains meaningful data + cmd->buffer->scratch = 0; + + task_submit(task, cmd); + + return CL_SUCCESS; +} + +CL_API_SUFFIX__VERSION_1_0 +CL_API_ENTRY cl_int CL_API_CALL +soclEnqueueWriteBuffer(cl_command_queue cq, + cl_mem buffer, + cl_bool blocking, + size_t offset, + size_t cb, + const void * ptr, + cl_uint num_events, + const cl_event * events, + cl_event * event) +{ + command_write_buffer cmd = command_write_buffer_create(buffer, offset, cb, ptr); + + cl_event ev = command_event_get(cmd); + + command_queue_enqueue(cq, cmd, num_events, events); + + MAY_BLOCK_THEN_RETURN_EVENT(ev, blocking, event); + + return CL_SUCCESS; +} diff --git a/socl/src/cl_enqueuewriteimage.c b/socl/src/cl_enqueuewriteimage.c new file mode 100644 index 0000000..398004c --- /dev/null +++ b/socl/src/cl_enqueuewriteimage.c @@ -0,0 +1,34 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "socl.h" + +CL_API_SUFFIX__VERSION_1_0 +CL_API_ENTRY cl_int CL_API_CALL +soclEnqueueWriteImage(cl_command_queue UNUSED(command_queue), + cl_mem UNUSED(image), + cl_bool UNUSED(blocking_write), + const size_t * UNUSED(origin), + const size_t * UNUSED(region), + size_t UNUSED(input_row_pitch), + size_t UNUSED(input_slice_pitch), + const void * UNUSED(ptr), + cl_uint UNUSED(num_events_in_wait_list), + const cl_event * UNUSED(event_wait_list), + cl_event * UNUSED(event)) +{ + return CL_INVALID_OPERATION; +} diff --git a/socl/src/cl_finish.c b/socl/src/cl_finish.c new file mode 100644 index 0000000..cc71f63 --- /dev/null +++ b/socl/src/cl_finish.c @@ -0,0 +1,32 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "socl.h" + +CL_API_SUFFIX__VERSION_1_0 +CL_API_ENTRY cl_int CL_API_CALL +soclFinish(cl_command_queue cq) +{ + command_barrier cmd = command_barrier_create(); + + cl_event ev = command_event_get(cmd); + + command_queue_enqueue(cq, cmd, 0, NULL); + + MAY_BLOCK_THEN_RETURN_EVENT(ev, CL_TRUE, (cl_event*)NULL); + + return CL_SUCCESS; +} diff --git a/socl/src/cl_flush.c b/socl/src/cl_flush.c new file mode 100644 index 0000000..4b481e0 --- /dev/null +++ b/socl/src/cl_flush.c @@ -0,0 +1,24 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "socl.h" + +CL_API_SUFFIX__VERSION_1_0 +CL_API_ENTRY cl_int CL_API_CALL +soclFlush(cl_command_queue UNUSED(command_queue)) +{ + return CL_SUCCESS; +} diff --git a/socl/src/cl_getcommandqueueinfo.c b/socl/src/cl_getcommandqueueinfo.c new file mode 100644 index 0000000..cbb1493 --- /dev/null +++ b/socl/src/cl_getcommandqueueinfo.c @@ -0,0 +1,42 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "socl.h" +#include "getinfo.h" + +CL_API_SUFFIX__VERSION_1_0 +CL_API_ENTRY cl_int CL_API_CALL +soclGetCommandQueueInfo(cl_command_queue cq, + cl_command_queue_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) +{ + if (cq == NULL) + return CL_INVALID_COMMAND_QUEUE; + + switch (param_name) + { + INFO_CASE(CL_QUEUE_CONTEXT, cq->context); + INFO_CASE(CL_QUEUE_DEVICE, cq->device); + INFO_CASE(CL_QUEUE_REFERENCE_COUNT, cq->_entity.refs); + INFO_CASE(CL_QUEUE_PROPERTIES, cq->properties); + default: + return CL_INVALID_VALUE; + } + + return CL_SUCCESS; +} diff --git a/socl/src/cl_getcontextinfo.c b/socl/src/cl_getcontextinfo.c new file mode 100644 index 0000000..5a5593f --- /dev/null +++ b/socl/src/cl_getcontextinfo.c @@ -0,0 +1,41 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "socl.h" +#include "getinfo.h" + +CL_API_SUFFIX__VERSION_1_0 +CL_API_ENTRY cl_int CL_API_CALL +soclGetContextInfo(cl_context context, + cl_context_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) +{ + if (context == NULL) + return CL_INVALID_CONTEXT; + + switch (param_name) + { + INFO_CASE(CL_CONTEXT_REFERENCE_COUNT, context->_entity.refs); + INFO_CASE_EX(CL_CONTEXT_DEVICES, context->devices, context->num_devices * sizeof(cl_device_id)); + INFO_CASE_EX(CL_CONTEXT_PROPERTIES, context->properties, context->num_properties * sizeof(cl_context_properties)); + default: + return CL_INVALID_VALUE; + } + + return CL_SUCCESS; +} diff --git a/socl/src/cl_getdeviceids.c b/socl/src/cl_getdeviceids.c new file mode 100644 index 0000000..27247ba --- /dev/null +++ b/socl/src/cl_getdeviceids.c @@ -0,0 +1,96 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2012-2012 Vincent Danjean + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "socl.h" +#include "init.h" + +/** + * \brief Return one device of each kind + * + * \param[in] platform Must be StarPU platform ID or NULL + */ +CL_API_SUFFIX__VERSION_1_0 +CL_API_ENTRY cl_int CL_API_CALL +soclGetDeviceIDs(cl_platform_id platform, + cl_device_type device_type, + cl_uint num_entries, + cl_device_id * devices, + cl_uint * num_devices) +{ + if (socl_init_starpu() < 0) + { + *num_devices = 0; + return CL_SUCCESS; + } + + if (_starpu_init_failed) + { + *num_devices = 0; + return CL_SUCCESS; + } + + if (platform != NULL && platform != &socl_platform) + return CL_INVALID_PLATFORM; + + if ((devices != NULL && num_entries == 0) + || (devices == NULL && num_devices == NULL)) + return CL_INVALID_VALUE; + + if (!(device_type & (CL_DEVICE_TYPE_CPU | CL_DEVICE_TYPE_GPU | CL_DEVICE_TYPE_ACCELERATOR | CL_DEVICE_TYPE_DEFAULT)) + && (device_type != CL_DEVICE_TYPE_ALL)) + return CL_INVALID_DEVICE_TYPE; + + int ndevs = starpu_worker_get_count_by_type(STARPU_OPENCL_WORKER); + + int workers[ndevs]; + starpu_worker_get_ids_by_type(STARPU_OPENCL_WORKER, workers, ndevs); + + if (socl_devices == NULL) + { + socl_device_count = ndevs; + socl_devices = malloc(sizeof(struct _cl_device_id) * ndevs); + int i; + for (i=0; i < ndevs; i++) + { + int devid = starpu_worker_get_devid(workers[i]); + socl_devices[i].dispatch = &socl_master_dispatch; + socl_devices[i].worker_id = workers[i]; + socl_devices[i].device_id = devid; + } + } + + int i; + unsigned int num = 0; + for (i=0; i < ndevs; i++) + { + int devid = socl_devices[i].device_id; + cl_device_id dev; + starpu_opencl_get_device(devid, &dev); + cl_device_type typ; + clGetDeviceInfo(dev, CL_DEVICE_TYPE, sizeof(typ), &typ, NULL); + if (typ & device_type) + { + if (devices != NULL && num < num_entries) devices[num] = &socl_devices[i]; + num++; + } + } + + if (num_devices != NULL) + *num_devices = num; + + return CL_SUCCESS; +} diff --git a/socl/src/cl_getdeviceinfo.c b/socl/src/cl_getdeviceinfo.c new file mode 100644 index 0000000..fa05984 --- /dev/null +++ b/socl/src/cl_getdeviceinfo.c @@ -0,0 +1,56 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "socl.h" +#include "getinfo.h" + +CL_API_SUFFIX__VERSION_1_0 +CL_API_ENTRY cl_int CL_API_CALL +soclGetDeviceInfo(cl_device_id device, + cl_device_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) +{ + //FIXME: we do not check if the device is valid + /* if (device != &socl_virtual_device && device is not a valid StarPU worker identifier) + return CL_INVALID_DEVICE;*/ + + int devid = device->device_id; + + cl_device_id dev; + starpu_opencl_get_device(devid, &dev); + + int ret = CL_SUCCESS; + + switch (param_name) + { + case CL_DEVICE_PLATFORM: + { + cl_platform_id p = &socl_platform; + INFO_CASE_EX2(p); + } + case CL_DEVICE_IMAGE_SUPPORT: + { + cl_bool res = CL_FALSE; + INFO_CASE_EX2(res); + } + default: + ret = clGetDeviceInfo(dev, param_name, param_value_size, param_value, param_value_size_ret); + } + + return ret; +} diff --git a/socl/src/cl_geteventinfo.c b/socl/src/cl_geteventinfo.c new file mode 100644 index 0000000..1f8624d --- /dev/null +++ b/socl/src/cl_geteventinfo.c @@ -0,0 +1,46 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "socl.h" +#include "getinfo.h" + +CL_API_SUFFIX__VERSION_1_0 +CL_API_ENTRY cl_int CL_API_CALL +soclGetEventInfo(cl_event event, + cl_event_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) +{ + if (event == NULL) + return CL_INVALID_EVENT; + +#define STAT_CASE(starpu,opencl) case starpu: \ + status = opencl; \ + break; + + switch (param_name) + { + INFO_CASE(CL_EVENT_COMMAND_QUEUE, event->cq); + INFO_CASE(CL_EVENT_COMMAND_TYPE, event->command->typ); + INFO_CASE(CL_EVENT_COMMAND_EXECUTION_STATUS, event->status); + INFO_CASE(CL_EVENT_REFERENCE_COUNT, event->_entity.refs); + default: + return CL_INVALID_VALUE; + } + + return CL_SUCCESS; +} diff --git a/socl/src/cl_geteventprofilinginfo.c b/socl/src/cl_geteventprofilinginfo.c new file mode 100644 index 0000000..2a36ee0 --- /dev/null +++ b/socl/src/cl_geteventprofilinginfo.c @@ -0,0 +1,39 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "socl.h" +#include "getinfo.h" + +CL_API_SUFFIX__VERSION_1_0 +CL_API_ENTRY cl_int CL_API_CALL +soclGetEventProfilingInfo(cl_event event, + cl_profiling_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) +{ + switch (param_name) + { + INFO_CASE_VALUE(CL_PROFILING_COMMAND_QUEUED, cl_ulong, event->prof_queued); + INFO_CASE_VALUE(CL_PROFILING_COMMAND_SUBMIT, cl_ulong, event->prof_submit); + INFO_CASE_VALUE(CL_PROFILING_COMMAND_START, cl_ulong, event->prof_start); + INFO_CASE_VALUE(CL_PROFILING_COMMAND_END, cl_ulong, event->prof_end); + default: + return CL_INVALID_VALUE; + } + + return CL_SUCCESS; +} diff --git a/socl/src/cl_getextensionfunctionaddress.c b/socl/src/cl_getextensionfunctionaddress.c new file mode 100644 index 0000000..0cdbd48 --- /dev/null +++ b/socl/src/cl_getextensionfunctionaddress.c @@ -0,0 +1,48 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2012-2012 Vincent Danjean + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "socl.h" +#include "init.h" + +CL_API_SUFFIX__VERSION_1_0 +CL_API_ENTRY void * CL_API_CALL +soclGetExtensionFunctionAddress(const char * func_name) +{ + if (func_name != NULL && strcmp(func_name, "clShutdown") == 0) + { + return (void*)soclShutdown; + } + + return NULL; +} + +CL_API_ENTRY void * CL_API_CALL +soclGetExtensionFunctionAddressForPlatform(cl_platform_id p, const char * func_name) CL_API_SUFFIX__VERSION_1_2 +{ + if (p != &socl_platform) + return NULL; + + return soclGetExtensionFunctionAddress(func_name); +} + +CL_API_ENTRY void * CL_API_CALL clGetExtensionFunctionAddress(const char * func_name) CL_API_SUFFIX__VERSION_1_0 +{ + if(func_name != NULL && strcmp("clIcdGetPlatformIDsKHR", func_name) == 0) + return (void *)soclIcdGetPlatformIDsKHR; + return NULL; +} diff --git a/socl/src/cl_getimageinfo.c b/socl/src/cl_getimageinfo.c new file mode 100644 index 0000000..36ff03d --- /dev/null +++ b/socl/src/cl_getimageinfo.c @@ -0,0 +1,29 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "socl.h" +#include "getinfo.h" + +CL_API_SUFFIX__VERSION_1_0 +CL_API_ENTRY cl_int CL_API_CALL +soclGetImageInfo(cl_mem UNUSED(image), + cl_image_info UNUSED(param_name), + size_t UNUSED(param_value_size), + void * UNUSED(param_value), + size_t * UNUSED(param_value_size_ret)) +{ + return CL_INVALID_OPERATION; +} diff --git a/socl/src/cl_getkernelinfo.c b/socl/src/cl_getkernelinfo.c new file mode 100644 index 0000000..a95337d --- /dev/null +++ b/socl/src/cl_getkernelinfo.c @@ -0,0 +1,43 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "socl.h" +#include "getinfo.h" + +CL_API_SUFFIX__VERSION_1_0 +CL_API_ENTRY cl_int CL_API_CALL +soclGetKernelInfo(cl_kernel kernel, + cl_kernel_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) +{ + if (kernel == NULL) + return CL_INVALID_KERNEL; + + switch (param_name) + { + INFO_CASE_EX(CL_KERNEL_FUNCTION_NAME, kernel->kernel_name, strlen(kernel->kernel_name)+1); + INFO_CASE(CL_KERNEL_NUM_ARGS, kernel->num_args); + INFO_CASE(CL_KERNEL_REFERENCE_COUNT, kernel->_entity.refs); + INFO_CASE(CL_KERNEL_PROGRAM, kernel->program); + INFO_CASE(CL_KERNEL_CONTEXT, kernel->program->context); + default: + return CL_INVALID_VALUE; + } + + return CL_SUCCESS; +} diff --git a/socl/src/cl_getkernelworkgroupinfo.c b/socl/src/cl_getkernelworkgroupinfo.c new file mode 100644 index 0000000..2387398 --- /dev/null +++ b/socl/src/cl_getkernelworkgroupinfo.c @@ -0,0 +1,34 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "socl.h" + +CL_API_SUFFIX__VERSION_1_0 +CL_API_ENTRY cl_int CL_API_CALL +soclGetKernelWorkGroupInfo(cl_kernel kernel, + cl_device_id device, + cl_kernel_work_group_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) +{ + int range = starpu_worker_get_range_by_id(device->worker_id); + cl_device_id dev; + starpu_opencl_get_device(device->device_id, &dev); + + return clGetKernelWorkGroupInfo(kernel->cl_kernels[range], dev, + param_name, param_value_size, param_value, param_value_size_ret); +} diff --git a/socl/src/cl_getmemobjectinfo.c b/socl/src/cl_getmemobjectinfo.c new file mode 100644 index 0000000..54dbd69 --- /dev/null +++ b/socl/src/cl_getmemobjectinfo.c @@ -0,0 +1,44 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "socl.h" +#include "getinfo.h" + +CL_API_SUFFIX__VERSION_1_0 +CL_API_ENTRY cl_int CL_API_CALL +soclGetMemObjectInfo(cl_mem mem, + cl_mem_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) +{ + static cl_mem_object_type mot = CL_MEM_OBJECT_BUFFER; + + switch (param_name) + { + INFO_CASE(CL_MEM_TYPE, mot); + INFO_CASE(CL_MEM_FLAGS, mem->flags); + INFO_CASE(CL_MEM_SIZE, mem->size); + INFO_CASE(CL_MEM_HOST_PTR, mem->host_ptr); + INFO_CASE(CL_MEM_MAP_COUNT, mem->map_count); + INFO_CASE(CL_MEM_REFERENCE_COUNT, mem->_entity.refs); + INFO_CASE(CL_MEM_CONTEXT, mem->context); + default: + return CL_INVALID_VALUE; + } + + return CL_SUCCESS; +} diff --git a/socl/src/cl_getplatformids.c b/socl/src/cl_getplatformids.c new file mode 100644 index 0000000..55da70e --- /dev/null +++ b/socl/src/cl_getplatformids.c @@ -0,0 +1,50 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "socl.h" + +extern int _starpu_init_failed; + +/** + * \brief Get StarPU platform ID + */ +CL_API_SUFFIX__VERSION_1_0 +CL_API_ENTRY cl_int CL_API_CALL +soclGetPlatformIDs(cl_uint num_entries, + cl_platform_id * platforms, + cl_uint * num_platforms) +{ + if (_starpu_init_failed) + { + if (num_platforms != NULL) + *num_platforms = 0; + return CL_SUCCESS; + } + + if ((num_entries == 0 && platforms != NULL) + || (num_platforms == NULL && platforms == NULL)) + return CL_INVALID_VALUE; + else + { + if (platforms != NULL) + platforms[0] = &socl_platform; + + if (num_platforms != NULL) + *num_platforms = 1; + } + + return CL_SUCCESS; +} diff --git a/socl/src/cl_getplatforminfo.c b/socl/src/cl_getplatforminfo.c new file mode 100644 index 0000000..92d2e83 --- /dev/null +++ b/socl/src/cl_getplatforminfo.c @@ -0,0 +1,50 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2012-2012 Vincent Danjean + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "socl.h" +#include "getinfo.h" + +/** + * \brief Get information about StarPU platform + * + * \param[in] platform StarPU platform ID or NULL + */ +CL_API_SUFFIX__VERSION_1_0 +CL_API_ENTRY cl_int CL_API_CALL +soclGetPlatformInfo(cl_platform_id platform, + cl_platform_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) +{ + if (platform != NULL && platform != &socl_platform) + return CL_INVALID_PLATFORM; + + switch (param_name) + { + INFO_CASE_STRING(CL_PLATFORM_PROFILE, SOCL_PROFILE); + INFO_CASE_STRING(CL_PLATFORM_VERSION, SOCL_VERSION); + INFO_CASE_STRING(CL_PLATFORM_NAME, SOCL_PLATFORM_NAME); + INFO_CASE_STRING(CL_PLATFORM_VENDOR, SOCL_VENDOR); + INFO_CASE_STRING(CL_PLATFORM_EXTENSIONS, SOCL_PLATFORM_EXTENSIONS); + INFO_CASE_STRING(CL_PLATFORM_ICD_SUFFIX_KHR, SOCL_PLATFORM_ICD_SUFFIX_KHR); + default: + return CL_INVALID_VALUE; + } + + return CL_SUCCESS; +} diff --git a/socl/src/cl_getprogrambuildinfo.c b/socl/src/cl_getprogrambuildinfo.c new file mode 100644 index 0000000..c7d47db --- /dev/null +++ b/socl/src/cl_getprogrambuildinfo.c @@ -0,0 +1,44 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "socl.h" +#include "getinfo.h" + +CL_API_SUFFIX__VERSION_1_0 +CL_API_ENTRY cl_int CL_API_CALL +soclGetProgramBuildInfo(cl_program program, + cl_device_id UNUSED(device), + cl_program_build_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) +{ + if (program == NULL) + return CL_INVALID_PROGRAM; + + switch (param_name) + { + //TODO + //INFO_CASE(CL_PROGRAM_BUILD_STATUS, program->build_status); + INFO_CASE_EX(CL_PROGRAM_BUILD_OPTIONS, program->options, program->options_size); + //TODO + //INFO_CASE(CL_PROGRAM_BUILD_LOG, program->build_log); + default: + return CL_INVALID_VALUE; + } + + return CL_SUCCESS; +} diff --git a/socl/src/cl_getprograminfo.c b/socl/src/cl_getprograminfo.c new file mode 100644 index 0000000..f39cdff --- /dev/null +++ b/socl/src/cl_getprograminfo.c @@ -0,0 +1,46 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "socl.h" +#include "getinfo.h" + +CL_API_SUFFIX__VERSION_1_0 +CL_API_ENTRY cl_int CL_API_CALL +soclGetProgramInfo(cl_program program, + cl_program_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) +{ + if (program == NULL) + return CL_INVALID_PROGRAM; + + switch (param_name) + { + INFO_CASE(CL_PROGRAM_REFERENCE_COUNT, program->_entity.refs); + INFO_CASE(CL_PROGRAM_CONTEXT, program->context); + INFO_CASE(CL_PROGRAM_NUM_DEVICES, program->context->num_devices); + INFO_CASE_EX(CL_PROGRAM_DEVICES, program->context->devices, sizeof(cl_device_id)*program->context->num_devices); + //TODO + /*INFO_CASE(CL_PROGRAM_SOURCE, program->source); + INFO_CASE(CL_PROGRAM_BINARY_SIZE, program->binary_sizes); + INFO_CASE(CL_PROGRAM_BINARIES, program->binaries);*/ + default: + return CL_INVALID_VALUE; + } + + return CL_SUCCESS; +} diff --git a/socl/src/cl_getsamplerinfo.c b/socl/src/cl_getsamplerinfo.c new file mode 100644 index 0000000..8f4d111 --- /dev/null +++ b/socl/src/cl_getsamplerinfo.c @@ -0,0 +1,29 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "socl.h" +#include "getinfo.h" + +CL_API_SUFFIX__VERSION_1_0 +CL_API_ENTRY cl_int CL_API_CALL +soclGetSamplerInfo(cl_sampler UNUSED(sampler), + cl_sampler_info UNUSED(param_name), + size_t UNUSED(param_value_size), + void * UNUSED(param_value), + size_t * UNUSED(param_value_size_ret)) +{ + return CL_INVALID_OPERATION; +} diff --git a/socl/src/cl_getsupportedimageformats.c b/socl/src/cl_getsupportedimageformats.c new file mode 100644 index 0000000..3edb363 --- /dev/null +++ b/socl/src/cl_getsupportedimageformats.c @@ -0,0 +1,29 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "socl.h" + +CL_API_SUFFIX__VERSION_1_0 +CL_API_ENTRY cl_int CL_API_CALL +soclGetSupportedImageFormats(cl_context UNUSED(context), + cl_mem_flags UNUSED(flags), + cl_mem_object_type UNUSED(image_type), + cl_uint UNUSED(num_entries), + cl_image_format * UNUSED(image_formats), + cl_uint * UNUSED(num_image_formats)) +{ + return CL_INVALID_OPERATION; +} diff --git a/socl/src/cl_icdgetplatformidskhr.c b/socl/src/cl_icdgetplatformidskhr.c new file mode 100644 index 0000000..acf440f --- /dev/null +++ b/socl/src/cl_icdgetplatformidskhr.c @@ -0,0 +1,40 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2012-2012 Vincent Danjean + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "socl.h" + +extern int _starpu_init_failed; + +CL_EXT_SUFFIX__VERSION_1_0 +CL_API_ENTRY cl_int CL_API_CALL soclIcdGetPlatformIDsKHR(cl_uint num_entries, + cl_platform_id *platforms, + cl_uint *num_platforms) +{ + if ((num_entries == 0 && platforms != NULL) + || (num_platforms == NULL && platforms == NULL)) + return CL_INVALID_VALUE; + else + { + if (platforms != NULL) + platforms[0] = &socl_platform; + + if (num_platforms != NULL) + *num_platforms = 1; + } + + return CL_SUCCESS; +} diff --git a/socl/src/cl_releasecommandqueue.c b/socl/src/cl_releasecommandqueue.c new file mode 100644 index 0000000..bf008b8 --- /dev/null +++ b/socl/src/cl_releasecommandqueue.c @@ -0,0 +1,26 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "socl.h" + +CL_API_SUFFIX__VERSION_1_0 +CL_API_ENTRY cl_int CL_API_CALL +soclReleaseCommandQueue(cl_command_queue cq) +{ + gc_entity_release(cq); + + return CL_SUCCESS; +} diff --git a/socl/src/cl_releasecontext.c b/socl/src/cl_releasecontext.c new file mode 100644 index 0000000..ce39823 --- /dev/null +++ b/socl/src/cl_releasecontext.c @@ -0,0 +1,29 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "socl.h" + +CL_API_SUFFIX__VERSION_1_0 +CL_API_ENTRY cl_int CL_API_CALL +soclReleaseContext(cl_context context) +{ + if (context == NULL) + return CL_INVALID_CONTEXT; + + gc_entity_release(context); + + return CL_SUCCESS; +} diff --git a/socl/src/cl_releaseevent.c b/socl/src/cl_releaseevent.c new file mode 100644 index 0000000..a41253f --- /dev/null +++ b/socl/src/cl_releaseevent.c @@ -0,0 +1,29 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "socl.h" + +CL_API_SUFFIX__VERSION_1_0 +CL_API_ENTRY cl_int CL_API_CALL +soclReleaseEvent(cl_event event) +{ + if (event == NULL) + return CL_INVALID_EVENT; + + gc_entity_release(event); + + return CL_SUCCESS; +} diff --git a/socl/src/cl_releasekernel.c b/socl/src/cl_releasekernel.c new file mode 100644 index 0000000..68ff3e6 --- /dev/null +++ b/socl/src/cl_releasekernel.c @@ -0,0 +1,29 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "socl.h" + +CL_API_SUFFIX__VERSION_1_0 +CL_API_ENTRY cl_int CL_API_CALL +soclReleaseKernel(cl_kernel kernel) +{ + if (kernel == NULL) + return CL_INVALID_KERNEL; + + gc_entity_release(kernel); + + return CL_SUCCESS; +} diff --git a/socl/src/cl_releasememobject.c b/socl/src/cl_releasememobject.c new file mode 100644 index 0000000..87b6cd7 --- /dev/null +++ b/socl/src/cl_releasememobject.c @@ -0,0 +1,26 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "socl.h" + +CL_API_SUFFIX__VERSION_1_0 +CL_API_ENTRY cl_int CL_API_CALL +soclReleaseMemObject(cl_mem mem) +{ + gc_entity_release(mem); + + return CL_SUCCESS; +} diff --git a/socl/src/cl_releaseprogram.c b/socl/src/cl_releaseprogram.c new file mode 100644 index 0000000..322273a --- /dev/null +++ b/socl/src/cl_releaseprogram.c @@ -0,0 +1,29 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "socl.h" + +CL_API_SUFFIX__VERSION_1_0 +CL_API_ENTRY cl_int CL_API_CALL +soclReleaseProgram(cl_program program) +{ + if (program == NULL) + return CL_INVALID_PROGRAM; + + gc_entity_release(program); + + return CL_SUCCESS; +} diff --git a/socl/src/cl_releasesampler.c b/socl/src/cl_releasesampler.c new file mode 100644 index 0000000..a0ca537 --- /dev/null +++ b/socl/src/cl_releasesampler.c @@ -0,0 +1,24 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "socl.h" + +CL_API_SUFFIX__VERSION_1_0 +CL_API_ENTRY cl_int CL_API_CALL +soclReleaseSampler(cl_sampler UNUSED(sampler)) +{ + return CL_INVALID_OPERATION; +} diff --git a/socl/src/cl_retaincommandqueue.c b/socl/src/cl_retaincommandqueue.c new file mode 100644 index 0000000..876e579 --- /dev/null +++ b/socl/src/cl_retaincommandqueue.c @@ -0,0 +1,29 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "socl.h" + +CL_API_SUFFIX__VERSION_1_0 +CL_API_ENTRY cl_int CL_API_CALL +soclRetainCommandQueue(cl_command_queue cq) +{ + if (cq == NULL) + return CL_INVALID_COMMAND_QUEUE; + + gc_entity_retain(cq); + + return CL_SUCCESS; +} diff --git a/socl/src/cl_retaincontext.c b/socl/src/cl_retaincontext.c new file mode 100644 index 0000000..b7ec3f0 --- /dev/null +++ b/socl/src/cl_retaincontext.c @@ -0,0 +1,29 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "socl.h" + +CL_API_SUFFIX__VERSION_1_0 +CL_API_ENTRY cl_int CL_API_CALL +soclRetainContext(cl_context context) +{ + if (context == NULL) + return CL_INVALID_CONTEXT; + + gc_entity_retain(context); + + return CL_SUCCESS; +} diff --git a/socl/src/cl_retainevent.c b/socl/src/cl_retainevent.c new file mode 100644 index 0000000..b005072 --- /dev/null +++ b/socl/src/cl_retainevent.c @@ -0,0 +1,29 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "socl.h" + +CL_API_SUFFIX__VERSION_1_0 +CL_API_ENTRY cl_int CL_API_CALL +soclRetainEvent(cl_event event) +{ + if (event == NULL) + return CL_INVALID_EVENT; + + gc_entity_retain(event); + + return CL_SUCCESS; +} diff --git a/socl/src/cl_retainkernel.c b/socl/src/cl_retainkernel.c new file mode 100644 index 0000000..13865ed --- /dev/null +++ b/socl/src/cl_retainkernel.c @@ -0,0 +1,29 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "socl.h" + +CL_API_SUFFIX__VERSION_1_0 +CL_API_ENTRY cl_int CL_API_CALL +soclRetainKernel(cl_kernel kernel) +{ + if (kernel == NULL) + return CL_INVALID_KERNEL; + + gc_entity_retain(kernel); + + return CL_SUCCESS; +} diff --git a/socl/src/cl_retainmemobject.c b/socl/src/cl_retainmemobject.c new file mode 100644 index 0000000..0e4644c --- /dev/null +++ b/socl/src/cl_retainmemobject.c @@ -0,0 +1,29 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "socl.h" + +CL_API_SUFFIX__VERSION_1_0 +CL_API_ENTRY cl_int CL_API_CALL +soclRetainMemObject(cl_mem mem) +{ + if (mem == NULL) + return CL_INVALID_MEM_OBJECT; + + gc_entity_retain(mem); + + return CL_SUCCESS; +} diff --git a/socl/src/cl_retainprogram.c b/socl/src/cl_retainprogram.c new file mode 100644 index 0000000..a4008fc --- /dev/null +++ b/socl/src/cl_retainprogram.c @@ -0,0 +1,29 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "socl.h" + +CL_API_SUFFIX__VERSION_1_0 +CL_API_ENTRY cl_int CL_API_CALL +soclRetainProgram(cl_program program) +{ + if (program == NULL) + return CL_INVALID_PROGRAM; + + gc_entity_retain(program); + + return CL_SUCCESS; +} diff --git a/socl/src/cl_retainsampler.c b/socl/src/cl_retainsampler.c new file mode 100644 index 0000000..1ea0554 --- /dev/null +++ b/socl/src/cl_retainsampler.c @@ -0,0 +1,24 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "socl.h" + +CL_API_SUFFIX__VERSION_1_0 +CL_API_ENTRY cl_int CL_API_CALL +soclRetainSampler(cl_sampler UNUSED(sampler)) +{ + return CL_INVALID_OPERATION; +} diff --git a/socl/src/cl_setcommandqueueproperty.c b/socl/src/cl_setcommandqueueproperty.c new file mode 100644 index 0000000..9086e4a --- /dev/null +++ b/socl/src/cl_setcommandqueueproperty.c @@ -0,0 +1,58 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "socl.h" + +CL_API_SUFFIX__VERSION_1_0 +CL_API_ENTRY cl_int CL_API_CALL +soclSetCommandQueueProperty(cl_command_queue command_queue, + cl_command_queue_properties properties, + cl_bool enable, + cl_command_queue_properties * old_properties) +{ + if (command_queue == NULL) + return CL_INVALID_COMMAND_QUEUE; + + if (old_properties != NULL) + *old_properties = command_queue->properties; + + if (enable) + { + //Enable StarPU profiling if necessary + if (properties & (~command_queue->properties) & CL_QUEUE_PROFILING_ENABLE) + { + if (profiling_queue_count == 0) + starpu_profiling_status_set(STARPU_PROFILING_ENABLE); + profiling_queue_count += 1; + } + //Set new properties + command_queue->properties |= properties; + } + else + { + //Disable StarPU profiling if necessary + if ((~properties) & command_queue->properties & CL_QUEUE_PROFILING_ENABLE) + { + profiling_queue_count -= 1; + if (profiling_queue_count == 0) + starpu_profiling_status_set(STARPU_PROFILING_DISABLE); + } + //Set new properties + command_queue->properties &= ~properties; + } + + return CL_SUCCESS; +} diff --git a/socl/src/cl_setkernelarg.c b/socl/src/cl_setkernelarg.c new file mode 100644 index 0000000..1fd606c --- /dev/null +++ b/socl/src/cl_setkernelarg.c @@ -0,0 +1,102 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "socl.h" + +CL_API_SUFFIX__VERSION_1_0 +CL_API_ENTRY cl_int CL_API_CALL +soclSetKernelArg(cl_kernel kernel, + cl_uint arg_index, + size_t arg_size, + const void * arg_value) +{ + if (kernel == NULL) + return CL_INVALID_KERNEL; + + if (arg_index == (cl_uint)-1) + { + kernel->split_func = arg_value; + return CL_SUCCESS; + } + else if (arg_index == (cl_uint)-2) + { + kernel->split_space = *(cl_uint*)arg_value; + if (kernel->split_perfs != NULL) + { + free(kernel->split_perfs); + } + kernel->split_perfs = calloc(kernel->split_space, sizeof(cl_ulong)); + return CL_SUCCESS; + } + else if (arg_index == (cl_uint)-3) + { + kernel->split_data = (void *)arg_value; + return CL_SUCCESS; + } + + if (arg_index >= kernel->num_args) + return CL_INVALID_ARG_INDEX; + + //FIXME: we don't return CL_INVALID_ARG_VALUE if "arg_value is NULL for an argument that is not declared with __local qualifier or vice-versa" + //FIXME: we don't return CL_INVALID_MEM_OBJECT + //FIXME: we don't return CL_INVALID_ARG_SIZE + + /* Free previous argument (set to NULL) */ + switch (kernel->arg_type[arg_index]) + { + case Null: + break; + case Buffer: + kernel->arg_type[arg_index] = Null; + free(kernel->arg_value[arg_index]); + kernel->arg_value[arg_index] = NULL; + break; + case Immediate: + free(kernel->arg_value[arg_index]); + kernel->arg_type[arg_index] = Null; + kernel->arg_value[arg_index] = NULL; + break; + } + + kernel->arg_type[arg_index] = Null; + kernel->arg_size[arg_index] = arg_size; + + DEBUG_MSG("[Kernel %d] Set argument %u: argsize %ld argvalue %p\n", kernel->id, arg_index, (long)arg_size, arg_value); + + /* Argument is not Null */ + if (arg_value != NULL) + { + cl_mem buf = NULL; + /* Check if argument is a memory object */ + if ((arg_size == sizeof(cl_mem)) && ((buf = mem_object_fetch(arg_value)) != NULL)) + { + DEBUG_MSG("Found buffer %d \n", buf->id); + kernel->arg_type[arg_index] = Buffer; + kernel->arg_value[arg_index] = malloc(sizeof(void*)); + *(cl_mem*)kernel->arg_value[arg_index] = buf; //We do not use gc_entity_store here because kernels do not hold reference on buffers (see OpenCL spec) + } + else + { + /* Argument must be an immediate buffer */ + DEBUG_MSG("Immediate data\n"); + kernel->arg_type[arg_index] = Immediate; + kernel->arg_value[arg_index] = malloc(arg_size); + memcpy(kernel->arg_value[arg_index], arg_value, arg_size); + } + } + + return CL_SUCCESS; +} diff --git a/socl/src/cl_unloadcompiler.c b/socl/src/cl_unloadcompiler.c new file mode 100644 index 0000000..d836e30 --- /dev/null +++ b/socl/src/cl_unloadcompiler.c @@ -0,0 +1,24 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "socl.h" + +CL_API_SUFFIX__VERSION_1_0 +CL_API_ENTRY cl_int CL_API_CALL +soclUnloadCompiler(void) +{ + return CL_SUCCESS; +} diff --git a/socl/src/cl_waitforevents.c b/socl/src/cl_waitforevents.c new file mode 100644 index 0000000..02deb64 --- /dev/null +++ b/socl/src/cl_waitforevents.c @@ -0,0 +1,40 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "socl.h" + +CL_API_SUFFIX__VERSION_1_0 +CL_API_ENTRY cl_int CL_API_CALL +soclWaitForEvents(cl_uint num_events, + const cl_event * event_list) +{ + unsigned int i; + +#ifdef DEBUG + DEBUG_MSG("Waiting for events: "); + for (i=0; iid, i == (num_events-1) ? "" : ", "); + } + DEBUG_MSG_NOHEAD("\n"); +#endif + + for (i=0; iid); + + DEBUG_MSG("Stop waiting :)\n"); + return CL_SUCCESS; +} diff --git a/socl/src/command.c b/socl/src/command.c new file mode 100644 index 0000000..804b27c --- /dev/null +++ b/socl/src/command.c @@ -0,0 +1,347 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "socl.h" +#include + +/* Forward extern declaration */ +extern void soclEnqueueNDRangeKernel_task(void *descr[], void *args); + +cl_event command_event_get_ex(cl_command cmd) +{ + cl_event ev = cmd->event; + gc_entity_retain(ev); + return ev; +} + +static void command_release_callback(void *a) +{ + cl_command cmd = (cl_command)a; + + // Call command specific release callback + if (cmd->release_callback != NULL) + cmd->release_callback(cmd); + + // Generic command destructor + cl_uint i; + for (i=0; inum_events; i++) + { + gc_entity_unstore(&cmd->events[i]); + } + cmd->num_events = 0; + free(cmd->events); + + /* Remove from command queue */ + cl_command_queue cq = cmd->event->cq; + if (cq != NULL) + { + /* Lock command queue */ + STARPU_PTHREAD_MUTEX_LOCK(&cq->mutex); + + /* Remove barrier if applicable */ + if (cq->barrier == cmd) + cq->barrier = NULL; + + /* Remove from the list of out-of-order commands */ + cq->commands = command_list_remove(cq->commands, cmd); + + /* Unlock command queue */ + STARPU_PTHREAD_MUTEX_UNLOCK(&cq->mutex); + } + + // Events may survive to commands that created them + cmd->event->command = NULL; + gc_entity_unstore(&cmd->event); +} + +void command_init_ex(cl_command cmd, cl_command_type typ, void (*cb)(void*)) +{ + gc_entity_init(&cmd->_entity, command_release_callback, "command"); + cmd->release_callback = cb; + cmd->typ = typ; + cmd->num_events = 0; + cmd->events = NULL; + cmd->event = event_create(); // we do not use gc_entity_store here because if nobody requires the event, it should be destroyed with the command + cmd->event->command = cmd; + cmd->task = NULL; + cmd->submitted = 0; +} + +void command_submit_ex(cl_command cmd) +{ +#define SUBMIT(typ,name) case typ: \ + name##_submit((name)cmd); \ + break; + + assert(cmd->submitted == 0); + + switch(cmd->typ) + { + SUBMIT(CL_COMMAND_NDRANGE_KERNEL, command_ndrange_kernel); + SUBMIT(CL_COMMAND_TASK, command_ndrange_kernel); + SUBMIT(CL_COMMAND_READ_BUFFER, command_read_buffer); + SUBMIT(CL_COMMAND_WRITE_BUFFER, command_write_buffer); + SUBMIT(CL_COMMAND_COPY_BUFFER, command_copy_buffer); + SUBMIT(CL_COMMAND_MAP_BUFFER, command_map_buffer); + SUBMIT(CL_COMMAND_UNMAP_MEM_OBJECT, command_unmap_mem_object); + SUBMIT(CL_COMMAND_MARKER, command_marker); + SUBMIT(CL_COMMAND_BARRIER, command_barrier); + default: + ERROR_STOP("Trying to submit unknown command (type %x)", cmd->typ); + } + + cmd->submitted = 1; +#undef SUBMIT +} + +cl_int command_submit_deep_ex(cl_command cmd) +{ + if (cmd->submitted == 1) + return CL_SUCCESS; + + /* We set this in order to avoid cyclic dependencies */ + cmd->submitted = 1; + + unsigned int i; + for (i=0; inum_events; i++) + command_submit_deep(cmd->events[i]->command); + + cmd->submitted = 0; + + command_submit_ex(cmd); + + return CL_SUCCESS; +} + +void command_graph_dump_ex(cl_command cmd) +{ + unsigned int i; + for (i=0; inum_events; i++) + command_graph_dump_ex(cmd->events[i]->command); + + const char * typ_str = (cmd->typ == CL_COMMAND_NDRANGE_KERNEL ? "ndrange_kernel" : + cmd->typ == CL_COMMAND_TASK ? "task" : + cmd->typ == CL_COMMAND_READ_BUFFER ? "read_buffer" : + cmd->typ == CL_COMMAND_WRITE_BUFFER ? "write_buffer" : + cmd->typ == CL_COMMAND_COPY_BUFFER ? "copy_buffer" : + cmd->typ == CL_COMMAND_MAP_BUFFER ? "map_buffer" : + cmd->typ == CL_COMMAND_UNMAP_MEM_OBJECT ? "unmap_mem_object" : + cmd->typ == CL_COMMAND_MARKER ? "marker" : + cmd->typ == CL_COMMAND_BARRIER ? "barrier" : "unknown"); + + printf("CMD %p TYPE %s DEPS", cmd, typ_str); + for (i=0; inum_events; i++) + printf(" %p", cmd->events[i]->command); + printf("\n"); +} + +#define nullOrDup(name,size) cmd->name = memdup_safe(name,size) +#define nullOrFree(name) if (cmd->name != NULL) free((void*)cmd->name) +#define dup(name) cmd->name = name + +void command_ndrange_kernel_release(void * arg) +{ + command_ndrange_kernel cmd = (command_ndrange_kernel)arg; + + gc_entity_unstore(&cmd->kernel); + nullOrFree(global_work_offset); + nullOrFree(global_work_size); + nullOrFree(local_work_size); + free(cmd->arg_sizes); + free(cmd->arg_types); + unsigned int i; + for (i=0; inum_args; i++) + { + free(cmd->args[i]); + cmd->args[i] = NULL; + } + free(cmd->args); + + for (i=0; inum_buffers; i++) + gc_entity_unstore(&cmd->buffers[i]); + + free(cmd->buffers); +} + +command_ndrange_kernel command_ndrange_kernel_create(cl_kernel kernel, + cl_uint work_dim, + const size_t * global_work_offset, + const size_t * global_work_size, + const size_t * local_work_size) +{ + command_ndrange_kernel cmd = calloc(1, sizeof(struct command_ndrange_kernel_t)); + command_init(cmd, CL_COMMAND_NDRANGE_KERNEL, command_ndrange_kernel_release); + + gc_entity_store(&cmd->kernel, kernel); + + dup(work_dim); + nullOrDup(global_work_offset, work_dim*sizeof(size_t)); + nullOrDup(global_work_size, work_dim*sizeof(size_t)); + nullOrDup(local_work_size, work_dim*sizeof(size_t)); + + starpu_codelet_init(&cmd->codelet); + cmd->codelet.where = STARPU_OPENCL; + cmd->codelet.energy_model = NULL; + cmd->codelet.opencl_funcs[0] = &soclEnqueueNDRangeKernel_task; + + /* Kernel is mutable, so we duplicate its parameters... */ + cmd->num_args = kernel->num_args; + cmd->arg_sizes = memdup(kernel->arg_size, sizeof(size_t) * kernel->num_args); + cmd->arg_types = memdup(kernel->arg_type, sizeof(enum kernel_arg_type) * kernel->num_args); + cmd->args = memdup_deep_varsize_safe(kernel->arg_value, kernel->num_args, kernel->arg_size); + + return cmd; +} + +command_ndrange_kernel command_task_create (cl_kernel kernel) +{ + static cl_uint task_work_dim = 3; + static const size_t task_global_work_offset[3] = {0,0,0}; + static const size_t task_global_work_size[3] = {1,1,1}; + static const size_t * task_local_work_size = NULL; + + command_ndrange_kernel cmd = command_ndrange_kernel_create(kernel, task_work_dim, task_global_work_offset, + task_global_work_size, task_local_work_size); + + /* This is the only difference with command_ndrange_kernel_create */ + cmd->_command.typ = CL_COMMAND_TASK; + + return cmd; +} + +command_barrier command_barrier_create () +{ + command_barrier cmd = malloc(sizeof(struct command_barrier_t)); + command_init(cmd, CL_COMMAND_BARRIER, NULL); + + return cmd; +} + +command_marker command_marker_create () +{ + command_marker cmd = malloc(sizeof(struct command_marker_t)); + command_init(cmd, CL_COMMAND_MARKER, NULL); + + return cmd; +} + +void command_map_buffer_release(void * UNUSED(arg)) +{ + /* We DO NOT unstore (release) the buffer as unmap will do it + gc_entity_unstore(&cmd->buffer); */ +} + +command_map_buffer command_map_buffer_create(cl_mem buffer, + cl_map_flags map_flags, + size_t offset, + size_t cb) +{ + command_map_buffer cmd = malloc(sizeof(struct command_map_buffer_t)); + command_init(cmd, CL_COMMAND_MAP_BUFFER, command_map_buffer_release); + + gc_entity_store(&cmd->buffer, buffer); + dup(map_flags); + dup(offset); + dup(cb); + + return cmd; +} + +void command_unmap_mem_object_release(void * arg) +{ + command_unmap_mem_object cmd = (command_unmap_mem_object)arg; + + /* We release the buffer twice because map buffer command did not */ + gc_entity_release(cmd->buffer); + gc_entity_unstore(&cmd->buffer); +} + +command_unmap_mem_object command_unmap_mem_object_create(cl_mem buffer, void * ptr) +{ + command_unmap_mem_object cmd = malloc(sizeof(struct command_unmap_mem_object_t)); + command_init(cmd, CL_COMMAND_UNMAP_MEM_OBJECT, command_unmap_mem_object_release); + + gc_entity_store(&cmd->buffer, buffer); + dup(ptr); + + return cmd; +} + +void command_read_buffer_release(void *arg) +{ + command_read_buffer cmd = (command_read_buffer)arg; + gc_entity_unstore(&cmd->buffer); +} + +command_read_buffer command_read_buffer_create(cl_mem buffer, size_t offset, size_t cb, void * ptr) +{ + command_read_buffer cmd = malloc(sizeof(struct command_read_buffer_t)); + command_init(cmd, CL_COMMAND_READ_BUFFER, command_read_buffer_release); + + gc_entity_store(&cmd->buffer, buffer); + dup(offset); + dup(cb); + dup(ptr); + + return cmd; +} + +void command_write_buffer_release(void *arg) +{ + command_write_buffer cmd = (command_write_buffer)arg; + gc_entity_unstore(&cmd->buffer); +} + +command_write_buffer command_write_buffer_create(cl_mem buffer, size_t offset, size_t cb, const void * ptr) +{ + command_write_buffer cmd = malloc(sizeof(struct command_write_buffer_t)); + command_init(cmd, CL_COMMAND_WRITE_BUFFER, command_write_buffer_release); + + gc_entity_store(&cmd->buffer, buffer); + dup(offset); + dup(cb); + dup(ptr); + + return cmd; +} + +void command_copy_buffer_release(void *arg) +{ + command_copy_buffer cmd = (command_copy_buffer)arg; + gc_entity_unstore(&cmd->src_buffer); + gc_entity_unstore(&cmd->dst_buffer); +} + +command_copy_buffer command_copy_buffer_create(cl_mem src_buffer, cl_mem dst_buffer, + size_t src_offset, size_t dst_offset, size_t cb) +{ + command_copy_buffer cmd = malloc(sizeof(struct command_copy_buffer_t)); + command_init(cmd, CL_COMMAND_COPY_BUFFER, command_copy_buffer_release); + + gc_entity_store(&cmd->src_buffer, src_buffer); + gc_entity_store(&cmd->dst_buffer, dst_buffer); + dup(src_offset); + dup(dst_offset); + dup(cb); + + return cmd; +} + +#undef nullOrDup +#undef nodeNullOrDup +#undef dup +#undef nodeDup +#undef memdup diff --git a/socl/src/command.h b/socl/src/command.h new file mode 100644 index 0000000..c09ec8f --- /dev/null +++ b/socl/src/command.h @@ -0,0 +1,211 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "socl.h" + +#ifndef SOCL_COMMANDS_H +#define SOCL_COMMANDS_H + +typedef struct cl_command_t * cl_command; + +#define gc_entity_store_cmd(dest,cmd) gc_entity_store(dest, &cmd->_command) +#define gc_entity_release_cmd(cmd) gc_entity_release(&cmd->_command) + +/** + * Initialize a command structure + * + * Command constructors for each kind of command use this method + * Implicit and explicit dependencies must be passed as parameters + */ +void command_init_ex(cl_command cmd, cl_command_type typ, void (*cb)(void*)); +#define command_init(cmd,typ,cb) \ + command_init_ex((cl_command)cmd,typ,cb) + +void command_release(cl_command cmd); + +/** Submit a command for execution */ +void command_submit_ex(cl_command cmd); +#define command_submit(cmd) \ + command_submit_ex(&(cmd)->_command) + +/** Submit a command and its dependencies */ +cl_int command_submit_deep_ex(cl_command cmd); +#define command_submit_deep(cmd) (command_submit_deep_ex((cl_command)cmd)) + +void command_graph_dump_ex(cl_command cmd); +#define command_graph_dump(cmd) (command_graph_dump_ex((cl_command)cmd)) + +/************************** + * OpenCL Commands + **************************/ +struct cl_command_t +{ + CL_ENTITY; + cl_command_type typ; /* Command type */ + cl_uint num_events; /* Number of dependencies */ + cl_event * events; /* Dependencies */ + cl_event event; /* Event for this command */ + starpu_task task; /* Associated StarPU task, if any */ + char submitted; /* True if the command has been submitted to StarPU */ + void (*release_callback)(void*); /* Command specific destructor */ +}; + +#define command_type_get(cmd) (((cl_command)cmd)->typ) + +cl_event command_event_get_ex(cl_command cmd); +#define command_event_get(cmd) command_event_get_ex(&cmd->_command) + +#define command_num_events_get_ex(cmd) (cmd->num_events) +#define command_num_events_get(cmd) ((cmd)->_command.num_events) +#define command_events_get_ex(cmd) ((cmd)->events) +#define command_events_get(cmd) ((cmd)->_command.events) +#define command_task_get(cmd) ((cmd)->_command.task) +#define command_cq_get(cmd) ((cmd)->_command.cq) + +#define CL_COMMAND struct cl_command_t _command; + +typedef struct command_ndrange_kernel_t +{ + CL_COMMAND + + cl_kernel kernel; + struct starpu_codelet codelet; + cl_uint work_dim; + const size_t * global_work_offset; + const size_t * global_work_size; + const size_t * local_work_size; + cl_uint num_args; + size_t * arg_sizes; + enum kernel_arg_type * arg_types; + void ** args; + cl_uint num_buffers; + cl_mem * buffers; +} * command_ndrange_kernel; + + +typedef struct command_read_buffer_t +{ + CL_COMMAND + + cl_mem buffer; + size_t offset; + size_t cb; + void * ptr; +} * command_read_buffer; + +typedef struct command_write_buffer_t +{ + CL_COMMAND + + cl_mem buffer; + size_t offset; + size_t cb; + const void * ptr; +} * command_write_buffer; + +typedef struct command_copy_buffer_t +{ + CL_COMMAND + + cl_mem src_buffer; + cl_mem dst_buffer; + size_t src_offset; + size_t dst_offset; + size_t cb; +} * command_copy_buffer; + +typedef struct command_map_buffer_t +{ + CL_COMMAND + + cl_mem buffer; + cl_map_flags map_flags; + size_t offset; + size_t cb; +} * command_map_buffer; + +typedef struct command_unmap_mem_object_t +{ + CL_COMMAND + + cl_mem buffer; + void * ptr; +} * command_unmap_mem_object; + +typedef struct command_marker_t +{ + CL_COMMAND +} * command_marker; + +typedef struct command_barrier_t +{ + CL_COMMAND +} * command_barrier; + +/************************* + * Constructor functions + *************************/ + +command_ndrange_kernel command_ndrange_kernel_create (cl_kernel kernel, + cl_uint work_dim, + const size_t * global_work_offset, + const size_t * global_work_size, + const size_t * local_work_size); + +command_ndrange_kernel command_task_create (cl_kernel kernel); + +command_barrier command_barrier_create (); + +command_marker command_marker_create (); + +command_map_buffer command_map_buffer_create(cl_mem buffer, + cl_map_flags map_flags, + size_t offset, + size_t cb); + +command_unmap_mem_object command_unmap_mem_object_create(cl_mem buffer, + void * ptr); + +command_read_buffer command_read_buffer_create(cl_mem buffer, + size_t offset, + size_t cb, + void * ptr); + +command_write_buffer command_write_buffer_create(cl_mem buffer, + size_t offset, + size_t cb, + const void * ptr); + +command_copy_buffer command_copy_buffer_create(cl_mem src_buffer, + cl_mem dst_buffer, + size_t src_offset, + size_t dst_offset, + size_t cb); + +/************************* + * Submit functions + *************************/ +cl_int command_ndrange_kernel_submit(command_ndrange_kernel cmd); +cl_int command_read_buffer_submit(command_read_buffer cmd); +cl_int command_write_buffer_submit(command_write_buffer cmd); +cl_int command_copy_buffer_submit(command_copy_buffer cmd); +cl_int command_map_buffer_submit(command_map_buffer cmd); +cl_int command_unmap_mem_object_submit(command_unmap_mem_object cmd); +cl_int command_marker_submit(command_marker cmd); +cl_int command_barrier_submit(command_barrier cmd); + + +#endif /* SOCL_COMMANDS_H */ diff --git a/socl/src/command_list.c b/socl/src/command_list.c new file mode 100644 index 0000000..f678da7 --- /dev/null +++ b/socl/src/command_list.c @@ -0,0 +1,56 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "socl.h" + +command_list command_list_cons(cl_command cmd, command_list ls) +{ + command_list e = malloc(sizeof(struct command_list_t)); + e->cmd = cmd; + e->next = ls; + e->prev = NULL; + if (ls != NULL) + ls->prev = e; + return e; +} + +/** + * Remove every occurrence of cmd in the list l + */ +command_list command_list_remove(command_list l, cl_command cmd) +{ + command_list e = l; + while (e != NULL) + { + if (e->cmd == cmd) + { + if (e->prev != NULL) e->prev->next = e->next; + if (e->next != NULL) e->next->prev = e->prev; + command_list old = e; + if (l == old) + { // list head has been removed + l = old->next; + } + e = old->next; + free(old); + } + else + { + e = e->next; + } + } + return l; +} diff --git a/socl/src/command_list.h b/socl/src/command_list.h new file mode 100644 index 0000000..6672d4a --- /dev/null +++ b/socl/src/command_list.h @@ -0,0 +1,29 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "socl.h" + +typedef struct command_list_t * command_list; + +struct command_list_t +{ + cl_command cmd; + command_list next; + command_list prev; +}; + +command_list command_list_cons(cl_command cmd, command_list ls); +command_list command_list_remove(command_list l, cl_command cmd); diff --git a/socl/src/command_queue.c b/socl/src/command_queue.c new file mode 100644 index 0000000..e7787ae --- /dev/null +++ b/socl/src/command_queue.c @@ -0,0 +1,116 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2012-2012 Vincent Danjean + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "socl.h" +#include "task.h" +#include "gc.h" + +/** + * WARNING: command queues do NOT hold references on events. Only events hold references + * on command queues. This way, event release will automatically remove the event from + * its command queue. + */ + +void command_queue_enqueue_ex(cl_command_queue cq, cl_command cmd, cl_uint num_events, const cl_event * events) +{ + cl_event ev = command_event_get_ex(cmd); + ev->prof_queued = _socl_nanotime(); + gc_entity_release(ev); + + /* Check if the command is a barrier */ + int is_barrier = (cmd->typ == CL_COMMAND_BARRIER || !(cq->properties & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)); + + /* Add references to the command queue */ + gc_entity_store(&cmd->event->cq, cq); + + /* Lock command queue */ + STARPU_PTHREAD_MUTEX_LOCK(&cq->mutex); + + /*** Number of dependencies ***/ + int ndeps = num_events; + + /* Add dependency to last barrier if applicable */ + if (cq->barrier != NULL) + ndeps++; + + /* Add dependencies to out-of-order events (if any) */ + if (is_barrier) + { + command_list cl = cq->commands; + while (cl != NULL) + { + ndeps++; + cl = cl->next; + } + } + + /*** Dependencies ***/ + cl_event * deps = malloc(ndeps * sizeof(cl_event)); + + int n = 0; + + /* Add dependency to last barrier if applicable */ + if (cq->barrier != NULL) + gc_entity_store(&deps[n++], cq->barrier->event); + + /* Add dependencies to out-of-order events (if any) */ + if (is_barrier) + { + command_list cl = cq->commands; + while (cl != NULL) + { + gc_entity_store(&deps[n++], cl->cmd->event); + cl = cl->next; + } + } + + /* Add explicit dependencies */ + unsigned i; + for (i=0; inum_events = ndeps; + cmd->events = deps; + + /* Insert command in the queue */ + if (is_barrier) + { + /* Remove out-of-order commands */ + cq->commands = NULL; + /* Register the command as the last barrier */ + cq->barrier = cmd; + } + else + { + /* Add command to the list of out-of-order commands */ + cq->commands = command_list_cons(cmd, cq->commands); + } + + /* Submit command + * We need to do it before unlocking because we don't want events to get + * released while we use them to set dependencies + */ + command_submit_ex(cmd); + + /* Unlock command queue */ + STARPU_PTHREAD_MUTEX_UNLOCK(&cq->mutex); + + gc_entity_release(cmd); +} diff --git a/socl/src/command_queue.h b/socl/src/command_queue.h new file mode 100644 index 0000000..97bf145 --- /dev/null +++ b/socl/src/command_queue.h @@ -0,0 +1,29 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef SOCL_COMMAND_QUEUE_H +#define SOCL_COMMAND_QUEUE_H + +void command_queue_enqueue_ex(cl_command_queue cq, /* Command queue */ + cl_command cmd, /* Command to enqueue */ + cl_uint num_events, /* Number of explicit dependencies */ + const cl_event * events /* Explicit dependencies */ + ); + +#define command_queue_enqueue(cq, cmd, num_events, events)\ + command_queue_enqueue_ex(cq, (cl_command)cmd, num_events, events) + +#endif /* SOCL_COMMAND_QUEUE_H */ diff --git a/socl/src/debug.c b/socl/src/debug.c new file mode 100644 index 0000000..9f89dbf --- /dev/null +++ b/socl/src/debug.c @@ -0,0 +1,74 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "socl.h" + +void ERROR_CL(char *s, cl_int err) { +#define ERR_CASE(a) case a: ERROR_MSG("[OpenCL] %s CL error: %s\n", s, #a); break; + switch(err) { + case CL_SUCCESS: + DEBUG_MSG("[OpenCL] %s SUCCESS.\n", s); + break; + ERR_CASE(CL_DEVICE_NOT_FOUND); + ERR_CASE(CL_DEVICE_NOT_AVAILABLE); + ERR_CASE(CL_COMPILER_NOT_AVAILABLE); + ERR_CASE(CL_MEM_OBJECT_ALLOCATION_FAILURE); + ERR_CASE(CL_OUT_OF_RESOURCES); + ERR_CASE(CL_OUT_OF_HOST_MEMORY); + ERR_CASE(CL_PROFILING_INFO_NOT_AVAILABLE); + ERR_CASE(CL_MEM_COPY_OVERLAP); + ERR_CASE(CL_IMAGE_FORMAT_MISMATCH); + ERR_CASE(CL_IMAGE_FORMAT_NOT_SUPPORTED); + ERR_CASE(CL_BUILD_PROGRAM_FAILURE); + ERR_CASE(CL_MAP_FAILURE); + ERR_CASE(CL_INVALID_VALUE); + ERR_CASE(CL_INVALID_DEVICE_TYPE); + ERR_CASE(CL_INVALID_PLATFORM); + ERR_CASE(CL_INVALID_DEVICE); + ERR_CASE(CL_INVALID_CONTEXT); + ERR_CASE(CL_INVALID_QUEUE_PROPERTIES); + ERR_CASE(CL_INVALID_COMMAND_QUEUE); + ERR_CASE(CL_INVALID_HOST_PTR); + ERR_CASE(CL_INVALID_MEM_OBJECT); + ERR_CASE(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR); + ERR_CASE(CL_INVALID_IMAGE_SIZE); + ERR_CASE(CL_INVALID_SAMPLER); + ERR_CASE(CL_INVALID_BINARY); + ERR_CASE(CL_INVALID_BUILD_OPTIONS); + ERR_CASE(CL_INVALID_PROGRAM); + ERR_CASE(CL_INVALID_PROGRAM_EXECUTABLE); + ERR_CASE(CL_INVALID_KERNEL_NAME); + ERR_CASE(CL_INVALID_KERNEL_DEFINITION); + ERR_CASE(CL_INVALID_KERNEL); + ERR_CASE(CL_INVALID_ARG_INDEX); + ERR_CASE(CL_INVALID_ARG_VALUE); + ERR_CASE(CL_INVALID_ARG_SIZE); + ERR_CASE(CL_INVALID_KERNEL_ARGS); + ERR_CASE(CL_INVALID_WORK_DIMENSION); + ERR_CASE(CL_INVALID_WORK_GROUP_SIZE); + ERR_CASE(CL_INVALID_WORK_ITEM_SIZE); + ERR_CASE(CL_INVALID_GLOBAL_OFFSET); + ERR_CASE(CL_INVALID_EVENT_WAIT_LIST); + ERR_CASE(CL_INVALID_EVENT); + ERR_CASE(CL_INVALID_OPERATION); + ERR_CASE(CL_INVALID_GL_OBJECT); + ERR_CASE(CL_INVALID_BUFFER_SIZE); + ERR_CASE(CL_INVALID_MIP_LEVEL); + ERR_CASE(CL_INVALID_GLOBAL_WORK_SIZE); + default: + ERROR_MSG("%s CL error: Error message not supported by ERROR_CL function (%d).\n", s, err); + } +} diff --git a/socl/src/debug.h b/socl/src/debug.h new file mode 100644 index 0000000..1c40464 --- /dev/null +++ b/socl/src/debug.h @@ -0,0 +1,53 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef SOCL_DEBUG_H +#define SOCL_DEBUG_H + +#include <../src/common/config.h> + +#ifdef STARPU_VERBOSE +#define DEBUG +#include +#define DEBUG_MSG(...) do { if (!getenv("STARPU_SILENT")) { fprintf(stderr, "[SOCL] [%s] ", __starpu_func__); fprintf(stderr, __VA_ARGS__);}} while (0) +#define DEBUG_MSG_NOHEAD(...) do { if (!getenv("STARPU_SILENT")) { fprintf(stderr, __VA_ARGS__);}} while (0); +#define DEBUG_ERROR(...) do { if (!getenv("STARPU_SILENT")) { fprintf(stderr, "[SOCL] ERROR: "__VA_ARGS__); } exit(1); } while (0) +#else +#define DEBUG_MSG(...) while(0) +#define DEBUG_MSG_NOHEAD(...) while(0) +#define DEBUG_ERROR(...) while(0) +#endif + +#define ERROR_MSG(...) do { fprintf(stderr, "[SOCL] [%s] ERROR: ", __starpu_func__); fprintf(stderr, __VA_ARGS__); } while (0) +#define ERROR_MSG_NOHEAD(...) fprintf(stderr, __VA_ARGS__) +#define ERROR_STOP(...) do { ERROR_MSG(__VA_ARGS__); exit(1); } while(0) + +void ERROR_CL(char *s, cl_int err); + +#ifdef STARPU_VERBOSE +#define DEBUG_CL(args...) ERROR_CL(args) +#else +#define DEBUG_CL(...) while(0) +#endif + +#ifdef DEBUG +#define DEBUG_PARAM(p) p +#else +#define DEBUG_PARAM(p) UNUSED(p) +#endif + + +#endif /* SOCL_DEBUG_H */ diff --git a/socl/src/event.c b/socl/src/event.c new file mode 100644 index 0000000..426909f --- /dev/null +++ b/socl/src/event.c @@ -0,0 +1,72 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "socl.h" +#include "event.h" +#include "gc.h" + +static void release_callback_event(void * e); + +int event_unique_id() +{ + static int id = 1; + + return STARPU_ATOMIC_ADD(&id,1) - 1; +} + +/** + * Create a new event + * + * Events have one-to-one relation with tag. Tag number is event ID + */ +cl_event event_create(void) +{ + cl_event ev; + ev = gc_entity_alloc(sizeof(struct _cl_event), release_callback_event, "event"); + + ev->id = event_unique_id(); + ev->status = CL_SUBMITTED; + ev->command = NULL; + ev->prof_queued = 0L; + ev->prof_submit = 0L; + ev->prof_start = 0L; + ev->prof_end = 0L; + ev->cq = NULL; + + return ev; +} + +void event_complete(cl_event ev) +{ + ev->status = CL_COMPLETE; + + ev->prof_end = _socl_nanotime(); + + /* Trigger the tag associated to the command event */ + DEBUG_MSG("Trigger event %d\n", ev->id); + starpu_tag_notify_from_apps(ev->id); +} + +static void release_callback_event(void * e) +{ + cl_event event = (cl_event)e; + + gc_entity_unstore(&event->cq); + + /* Destruct object */ + //FIXME + //starpu_tag_remove(event->id); +} diff --git a/socl/src/event.h b/socl/src/event.h new file mode 100644 index 0000000..b3dd05c --- /dev/null +++ b/socl/src/event.h @@ -0,0 +1,36 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef SOCL_EVENT_H +#define SOCL_EVENT_H + +#include "socl.h" + +/** + * Create a new event + * + * Events have one-to-one relation with tag. Tag number is event ID + */ +cl_event event_create(void); + +/** + * Generate a unique tag id + */ +int event_unique_id(); + +void event_complete(cl_event ev); + +#endif /* SOCL_EVENT_H */ diff --git a/socl/src/gc.c b/socl/src/gc.c new file mode 100644 index 0000000..8d20399 --- /dev/null +++ b/socl/src/gc.c @@ -0,0 +1,229 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2012-2012 Vincent Danjean + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "gc.h" +#include "event.h" +#include "socl.h" + +#include + +/** + * Garbage collection thread + */ + +/* List of entities to be released */ +static volatile entity gc_list = NULL; +static volatile entity entities = NULL; + +/* Mutex and cond for release */ +static starpu_pthread_mutex_t gc_mutex = STARPU_PTHREAD_MUTEX_INITIALIZER; +static starpu_pthread_cond_t gc_cond = STARPU_PTHREAD_COND_INITIALIZER; + +/* Set to 1 to stop release thread execution */ +static volatile int gc_stop_required = 0; + +#define GC_LOCK STARPU_PTHREAD_MUTEX_LOCK(&gc_mutex) +#define GC_UNLOCK { STARPU_PTHREAD_COND_SIGNAL(&gc_cond); STARPU_PTHREAD_MUTEX_UNLOCK(&gc_mutex);} +#define GC_UNLOCK_NO_SIGNAL STARPU_PTHREAD_MUTEX_UNLOCK(&gc_mutex) + +/* Thread routine */ +static void * gc_thread_routine(void *UNUSED(arg)) +{ + GC_LOCK; + + do + { + /* Make a copy of the gc_list to allow callbacks to add things into it */ + entity rs = gc_list; + gc_list = NULL; + + GC_UNLOCK_NO_SIGNAL; + + entity r = rs; + while (r != NULL) + { + /* Call entity release callback */ + if (r->release_callback != NULL) + { + r->release_callback(r); + } + + /* Release entity */ + entity next = r->next; + free(r); + + r = next; + } + + GC_LOCK; + + /* Check if new entities have been added */ + if (gc_list != NULL) + continue; + + /* Stop if required */ + if (gc_stop_required) + { + GC_UNLOCK_NO_SIGNAL; + break; + } + + /* Otherwise we sleep */ + STARPU_PTHREAD_COND_WAIT(&gc_cond, &gc_mutex); + } while (1); + + starpu_pthread_exit(NULL); +} + +static starpu_pthread_t gc_thread; + +/* Start garbage collection */ +void gc_start(void) +{ + STARPU_PTHREAD_CREATE(&gc_thread, NULL, gc_thread_routine, NULL); +} + +/* Stop garbage collection */ +void gc_stop(void) +{ + GC_LOCK; + + gc_stop_required = 1; + + GC_UNLOCK; + + STARPU_PTHREAD_JOIN(gc_thread, NULL); +} + +int gc_entity_release_ex(entity e, const char * DEBUG_PARAM(caller)) +{ + DEBUG_MSG("[%s] Decrementing refcount of %s %p to ", caller, e->name, (void *)e); + + /* Decrement reference count */ + int refs = STARPU_ATOMIC_ADD(&e->refs, -1); + + DEBUG_MSG_NOHEAD("%d\n", refs); + + assert(refs >= 0); + + if (refs != 0) + return 0; + + DEBUG_MSG("[%s] Releasing %s %p\n", caller, e->name, (void *)e); + + GC_LOCK; + + /* Remove entity from the entities list */ + if (e->prev != NULL) + e->prev->next = e->next; + if (e->next != NULL) + e->next->prev = e->prev; + if (entities == e) + entities = e->next; + + /* Put entity in the release queue */ + e->next = gc_list; + gc_list = e; + + GC_UNLOCK; + + return 1; +} + +/** + * Initialize entity + */ +void gc_entity_init(void *arg, void (*release_callback)(void*), char * name) +{ + DEBUG_MSG("Initializing entity %p (%s)\n", arg, name); + + struct entity * e = (entity)arg; + + e->dispatch = &socl_master_dispatch; + e->refs = 1; + e->release_callback = release_callback; + e->prev = NULL; + e->name = name; + + GC_LOCK; + + e->next = entities; + if (entities != NULL) + entities->prev = e; + entities = e; + + GC_UNLOCK_NO_SIGNAL; +} + +/** + * Allocate and initialize entity + */ +void * gc_entity_alloc(unsigned int size, void (*release_callback)(void*), char * name) +{ + void * e = malloc(size); + gc_entity_init(e, release_callback, name); + return e; +} + +/** Retain entity */ +void gc_entity_retain_ex(void *arg, const char * DEBUG_PARAM(caller)) +{ + struct entity * e = (entity)arg; + +#ifdef DEBUG + int refs = +#else + (void) +#endif + STARPU_ATOMIC_ADD(&e->refs, 1); + + DEBUG_MSG("[%s] Incrementing refcount of %s %p to %d\n", caller, e->name, e, refs); +} + +int gc_active_entity_count(void) +{ + int i = 0; + + entity e = entities; + while (e != NULL) + { + i++; + e = e->next; + } + + return i; +} + +void gc_print_remaining_entities(void) +{ + DEBUG_MSG("Remaining entities:\n"); + + GC_LOCK; + + entity e = entities; + while (e != NULL) + { + DEBUG_MSG(" - %s %p\n", e->name, (void *)e); + e = e->next; + } + + GC_UNLOCK; +} + +#undef GC_LOCK +#undef GC_UNLOCK +#undef GC_UNLOCK_NO_SIGNAL diff --git a/socl/src/gc.h b/socl/src/gc.h new file mode 100644 index 0000000..4f237e7 --- /dev/null +++ b/socl/src/gc.h @@ -0,0 +1,55 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef SOCL_GC_H +#define SOCL_GC_H + +#include "socl.h" + +void gc_start(void); +void gc_stop(void); + +void gc_entity_init(void *arg, void (*release_callback)(void*), char*name); + +void * gc_entity_alloc(unsigned int size, void (*release_callback)(void*), char * name); + +void gc_entity_retain_ex(void *arg, const char *); +#define gc_entity_retain(arg) gc_entity_retain_ex(arg, __starpu_func__) + +/** Decrement reference counter and release entity if applicable */ +int gc_entity_release_ex(entity e, const char*); + +int gc_active_entity_count(void); +void gc_print_remaining_entities(void); + +#define gc_entity_release(a) gc_entity_release_ex(&(a)->_entity, __starpu_func__) + +#define gc_entity_store(dest,e) \ + do {\ + void * _e = e;\ + gc_entity_retain(_e); \ + *dest = _e;\ + } while(0); + +#define gc_entity_unstore(dest) \ + do {\ + gc_entity_release(*dest); \ + *dest = NULL;\ + } while(0); + + + +#endif diff --git a/socl/src/getinfo.h b/socl/src/getinfo.h new file mode 100644 index 0000000..be56f79 --- /dev/null +++ b/socl/src/getinfo.h @@ -0,0 +1,60 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef SOCL_GETINFO_H +#define SOCL_GETINFO_H + +#define INFO_CASE_EX2(var) if (param_value != NULL) { \ + if (param_value_size < sizeof(var)) \ + return CL_INVALID_VALUE; \ + memcpy(param_value, &var, sizeof(var)); \ + } \ + if (param_value_size_ret != NULL) \ + *param_value_size_ret = sizeof(var); \ + break; + +#define INFO_CASE(param, var) case param: \ + INFO_CASE_EX2(var) + +#define INFO_CASE_STRING_EX2(var) if (param_value != NULL) { \ + if (param_value_size < strlen(var)+1) \ + return CL_INVALID_VALUE; \ + strcpy(param_value, var); \ + } \ + if (param_value_size_ret != NULL) \ + *param_value_size_ret = strlen(var)+1; \ + break; + +#define INFO_CASE_STRING(param, var) case param: \ + INFO_CASE_STRING_EX2(var) + +#define INFO_CASE_VALUE(param, type, value) case param: {\ + type tmp = (value);\ + INFO_CASE_EX2(tmp);\ + } + +//warning: var is a reference +#define INFO_CASE_EX(param, var, size) case param: \ + if (param_value != NULL) { \ + if (param_value_size < size) \ + return CL_INVALID_VALUE; \ + memcpy(param_value, var, size); \ + } \ + if (param_value_size_ret != NULL) \ + *param_value_size_ret = size; \ + break; + +#endif /* SOCL_GETINFO_H */ diff --git a/socl/src/init.c b/socl/src/init.c new file mode 100644 index 0000000..5a87869 --- /dev/null +++ b/socl/src/init.c @@ -0,0 +1,144 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2012-2012 Vincent Danjean + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../src/common/utils.h" +#include "socl.h" +#include "gc.h" +#include "mem_objects.h" + +int _starpu_init_failed; +static enum initialization _socl_init = UNINITIALIZED; +static starpu_pthread_mutex_t _socl_mutex = STARPU_PTHREAD_MUTEX_INITIALIZER; +static starpu_pthread_cond_t _socl_cond = STARPU_PTHREAD_COND_INITIALIZER; +static pthread_t _socl_thread_init; +static struct starpu_conf conf; + +int socl_init_starpu(void) +{ + STARPU_PTHREAD_MUTEX_LOCK(&_socl_mutex); + if (_socl_init == INITIALIZED) + { + STARPU_PTHREAD_MUTEX_UNLOCK(&_socl_mutex); + return 0; + } + + if (_socl_init == CHANGING) + { + /* Avoid recursion when starpu_init calls hwloc initialization which uses its opencl plugin */ + if (pthread_equal(_socl_thread_init, pthread_self())) + { + STARPU_PTHREAD_MUTEX_UNLOCK(&_socl_mutex); + return -1; + } + + /* Somebody else is initializing already, wait for him */ + while (_socl_init != INITIALIZED) + STARPU_PTHREAD_COND_WAIT(&_socl_cond, &_socl_mutex); + STARPU_PTHREAD_MUTEX_UNLOCK(&_socl_mutex); + return 0; + } + _socl_init = CHANGING; + _socl_thread_init = pthread_self(); + STARPU_PTHREAD_MUTEX_UNLOCK(&_socl_mutex); + + starpu_conf_init(&conf); + conf.precedence_over_environment_variables = 1; + conf.ncuda = 0; + conf.ncpus = 0; + + _starpu_init_failed = starpu_init(&conf); + if (_starpu_init_failed != 0) + { + DEBUG_MSG("Error when calling starpu_init: %d\n", _starpu_init_failed); + } + else + { + if (starpu_opencl_worker_get_count() == 0) + { + DEBUG_MSG("StarPU didn't find any OpenCL device. Try disabling CUDA support in StarPU (export STARPU_NCUDA=0).\n"); + _starpu_init_failed = -ENODEV; + } + } + + /* Disable dataflow implicit dependencies */ + starpu_data_set_default_sequential_consistency_flag(0); + + STARPU_PTHREAD_MUTEX_LOCK(&_socl_mutex); + _socl_init = INITIALIZED; + STARPU_PTHREAD_COND_BROADCAST(&_socl_cond); + STARPU_PTHREAD_MUTEX_UNLOCK(&_socl_mutex); + + return 0; +} +/** + * Initialize SOCL + */ +__attribute__((constructor)) static void socl_init() +{ + mem_object_init(); + + gc_start(); +} + +void soclShutdown() +{ + static int shutdown = 0; + + if (!shutdown) + { + shutdown = 1; + + STARPU_PTHREAD_MUTEX_LOCK(&_socl_mutex); + if(_socl_init) + starpu_task_wait_for_all(); + + gc_stop(); + + if(_socl_init) + starpu_task_wait_for_all(); + + int active_entities = gc_active_entity_count(); + + if (active_entities != 0) + { + DEBUG_MSG("Unreleased entities: %d\n", active_entities); + gc_print_remaining_entities(); + } + + if(_socl_init && _starpu_init_failed != -ENODEV) + starpu_shutdown(); + STARPU_PTHREAD_MUTEX_UNLOCK(&_socl_mutex); + + if (socl_devices != NULL) + { + free(socl_devices); + socl_devices = NULL; + } + } +} + +/** + * Shutdown SOCL + */ +__attribute__((destructor)) static void socl_shutdown() +{ + char * skip_str = getenv("SOCL_SKIP_DESTRUCTOR"); + int skip = (skip_str != NULL ? atoi(skip_str) : 0); + + if (!skip) soclShutdown(); +} diff --git a/socl/src/init.h b/socl/src/init.h new file mode 100644 index 0000000..20729ee --- /dev/null +++ b/socl/src/init.h @@ -0,0 +1,33 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "socl.h" +#include "gc.h" +#include "mem_objects.h" + +#ifndef SOCL_INIT_H +#define SOCL_INIT_H + +extern int _starpu_init_failed; +extern volatile int _starpu_init; +/** + * Initialize StarPU + */ + +int socl_init_starpu(void); +void soclShutdown(void); + +#endif /* SOCL_INIT_H */ diff --git a/socl/src/mem_objects.c b/socl/src/mem_objects.c new file mode 100644 index 0000000..7f65cd6 --- /dev/null +++ b/socl/src/mem_objects.c @@ -0,0 +1,101 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "socl.h" + + +#define mem_object_hash_key 257 + +static cl_mem p_mem_objects[mem_object_hash_key] = {NULL}; +static starpu_pthread_spinlock_t p_mem_objects_spinlock[mem_object_hash_key]; + +#define LOCK(i) starpu_pthread_spin_lock(&p_mem_objects_spinlock[i]); +#define UNLOCK(i) starpu_pthread_spin_unlock(&p_mem_objects_spinlock[i]); + +void mem_object_init(void) +{ + int i; + for (i=0; i> 4; + uintptr_t t3 = t2 % mem_object_hash_key; + return (int)t3; +} + +void mem_object_store(cl_mem m) +{ + int hash = mem_object_hash(m); + + LOCK(hash); + + m->prev = NULL; + m->next = p_mem_objects[hash]; + if (p_mem_objects[hash] != NULL) + p_mem_objects[hash]->prev = m; + p_mem_objects[hash] = m; + + UNLOCK(hash); +} + +void mem_object_release(cl_mem m) +{ + int hash = mem_object_hash(m); + + LOCK(hash); + + if (m->prev != NULL) + m->prev->next = m->next; + if (m->next != NULL) + m->next->prev = m->prev; + + if (p_mem_objects[hash] == m) + { + p_mem_objects[hash] = m->next; + } + + UNLOCK(hash) +} + +cl_mem mem_object_fetch(const void * addr) +{ + int hash = mem_object_hash(*(cl_mem*)addr); + + LOCK(hash); + + cl_mem buf; + for (buf = p_mem_objects[hash]; buf != NULL; buf = buf->next) + { + if (*(cl_mem*)addr == buf) + { + UNLOCK(hash); + return buf; + } + } + + UNLOCK(hash); + return NULL; +} + +#undef LOCK +#undef UNLOCK +#undef mem_object_hash_key diff --git a/socl/src/mem_objects.h b/socl/src/mem_objects.h new file mode 100644 index 0000000..14911ae --- /dev/null +++ b/socl/src/mem_objects.h @@ -0,0 +1,25 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef SOCL_MEM_OBJECTS_H +#define SOCL_MEM_OBJECTS_H + +void mem_object_init(void); +void mem_object_store(cl_mem m); +void mem_object_release(cl_mem m); +cl_mem mem_object_fetch(const void * addr); + +#endif /* SOCL_MEM_OBJECTS_H */ diff --git a/socl/src/ocl_icd.h b/socl/src/ocl_icd.h new file mode 100644 index 0000000..9f1e24c --- /dev/null +++ b/socl/src/ocl_icd.h @@ -0,0 +1,905 @@ +/** +Copyright (c) 2012, Brice Videau +Copyright (c) 2012, Vincent Danjean +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +Do not edit this file. It is automatically generated. + +*/ + +#include "CL/cl.h" +#include "CL/cl_gl.h" +#include "CL/cl_ext.h" + +#define OCL_ICD_API_VERSION 1 +#define OCL_ICD_IDENTIFIED_FUNCTIONS 102 + +struct _cl_icd_dispatch { + CL_API_ENTRY cl_int (CL_API_CALL*clGetPlatformIDs)( + cl_uint /* num_entries */, + cl_platform_id * /* platforms */, + cl_uint * /* num_platforms */ + ) CL_API_SUFFIX__VERSION_1_0; + + CL_API_ENTRY cl_int (CL_API_CALL* + clGetPlatformInfo)( + cl_platform_id /* platform */, + cl_platform_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */ + ) CL_API_SUFFIX__VERSION_1_0; + + CL_API_ENTRY cl_int (CL_API_CALL*clGetDeviceIDs)( + cl_platform_id /* platform */, + cl_device_type /* device_type */, + cl_uint /* num_entries */, + cl_device_id * /* devices */, + cl_uint * /* num_devices */ + ) CL_API_SUFFIX__VERSION_1_0; + + CL_API_ENTRY cl_int (CL_API_CALL*clGetDeviceInfo)( + cl_device_id /* device */, + cl_device_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */ + ) CL_API_SUFFIX__VERSION_1_0; + + CL_API_ENTRY cl_context (CL_API_CALL*clCreateContext)( + const cl_context_properties * /* properties */, + cl_uint /* num_devices */, + const cl_device_id * /* devices */, + void (CL_CALLBACK * /* pfn_notify */)(const char *, const void *, size_t, void *), + void * /* user_data */, + cl_int * /* errcode_ret */ + ) CL_API_SUFFIX__VERSION_1_0; + + CL_API_ENTRY cl_context (CL_API_CALL*clCreateContextFromType)( + const cl_context_properties * /* properties */, + cl_device_type /* device_type */, + void (CL_CALLBACK * /* pfn_notify*/ )(const char *, const void *, size_t, void *), + void * /* user_data */, + cl_int * /* errcode_ret */ + ) CL_API_SUFFIX__VERSION_1_0; + + CL_API_ENTRY cl_int (CL_API_CALL*clRetainContext)( + cl_context /* context */ + ) CL_API_SUFFIX__VERSION_1_0; + + CL_API_ENTRY cl_int (CL_API_CALL*clReleaseContext)( + cl_context /* context */ + ) CL_API_SUFFIX__VERSION_1_0; + + CL_API_ENTRY cl_int (CL_API_CALL*clGetContextInfo)( + cl_context /* context */, + cl_context_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */ + ) CL_API_SUFFIX__VERSION_1_0; + + CL_API_ENTRY cl_command_queue (CL_API_CALL*clCreateCommandQueue)( + cl_context /* context */, + cl_device_id /* device */, + cl_command_queue_properties /* properties */, + cl_int * /* errcode_ret */ + ) CL_API_SUFFIX__VERSION_1_0; + + CL_API_ENTRY cl_int (CL_API_CALL*clRetainCommandQueue)( + cl_command_queue /* command_queue */ + ) CL_API_SUFFIX__VERSION_1_0; + + CL_API_ENTRY cl_int (CL_API_CALL*clReleaseCommandQueue)( + cl_command_queue /* command_queue */ + ) CL_API_SUFFIX__VERSION_1_0; + + CL_API_ENTRY cl_int (CL_API_CALL*clGetCommandQueueInfo)( + cl_command_queue /* command_queue */, + cl_command_queue_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */ + ) CL_API_SUFFIX__VERSION_1_0; + + CL_API_ENTRY cl_int (CL_API_CALL*clSetCommandQueueProperty)( + cl_command_queue /* command_queue */, + cl_command_queue_properties /* properties */, + cl_bool /* enable */, + cl_command_queue_properties * /* old_properties */) CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED; + + CL_API_ENTRY cl_mem (CL_API_CALL*clCreateBuffer)( + cl_context /* context */, + cl_mem_flags /* flags */, + size_t /* size */, + void * /* host_ptr */, + cl_int * /* errcode_ret */ + ) CL_API_SUFFIX__VERSION_1_0; + + CL_API_ENTRY cl_mem (CL_API_CALL*clCreateImage2D)( + cl_context /* context */, + cl_mem_flags /* flags */, + const cl_image_format * /* image_format */, + size_t /* image_width */, + size_t /* image_height */, + size_t /* image_row_pitch */, + void * /* host_ptr */, + cl_int * /* errcode_ret */ + ) CL_API_SUFFIX__VERSION_1_0; + + CL_API_ENTRY cl_mem (CL_API_CALL*clCreateImage3D)( + cl_context /* context */, + cl_mem_flags /* flags */, + const cl_image_format * /* image_format */, + size_t /* image_width */, + size_t /* image_height */, + size_t /* image_depth */, + size_t /* image_row_pitch */, + size_t /* image_slice_pitch */, + void * /* host_ptr */, + cl_int * /* errcode_ret */ + ) CL_API_SUFFIX__VERSION_1_0; + + CL_API_ENTRY cl_int (CL_API_CALL*clRetainMemObject)( + cl_mem /* memobj */ + ) CL_API_SUFFIX__VERSION_1_0; + + CL_API_ENTRY cl_int (CL_API_CALL*clReleaseMemObject)( + cl_mem /* memobj */ + ) CL_API_SUFFIX__VERSION_1_0; + + CL_API_ENTRY cl_int (CL_API_CALL*clGetSupportedImageFormats)( + cl_context /* context */, + cl_mem_flags /* flags */, + cl_mem_object_type /* image_type */, + cl_uint /* num_entries */, + cl_image_format * /* image_formats */, + cl_uint * /* num_image_formats */ + ) CL_API_SUFFIX__VERSION_1_0; + + CL_API_ENTRY cl_int (CL_API_CALL*clGetMemObjectInfo)( + cl_mem /* memobj */, + cl_mem_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */ + ) CL_API_SUFFIX__VERSION_1_0; + + CL_API_ENTRY cl_int (CL_API_CALL*clGetImageInfo)( + cl_mem /* image */, + cl_image_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */ + ) CL_API_SUFFIX__VERSION_1_0; + + CL_API_ENTRY cl_sampler (CL_API_CALL*clCreateSampler)( + cl_context /* context */, + cl_bool /* normalized_coords */, + cl_addressing_mode /* addressing_mode */, + cl_filter_mode /* filter_mode */, + cl_int * /* errcode_ret */ + ) CL_API_SUFFIX__VERSION_1_0; + + CL_API_ENTRY cl_int (CL_API_CALL*clRetainSampler)( + cl_sampler /* sampler */ + ) CL_API_SUFFIX__VERSION_1_0; + + CL_API_ENTRY cl_int (CL_API_CALL*clReleaseSampler)( + cl_sampler /* sampler */ + ) CL_API_SUFFIX__VERSION_1_0; + + CL_API_ENTRY cl_int (CL_API_CALL*clGetSamplerInfo)( + cl_sampler /* sampler */, + cl_sampler_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */ + ) CL_API_SUFFIX__VERSION_1_0; + + CL_API_ENTRY cl_program (CL_API_CALL*clCreateProgramWithSource)( + cl_context /* context */, + cl_uint /* count */, + const char ** /* strings */, + const size_t * /* lengths */, + cl_int * /* errcode_ret */ + ) CL_API_SUFFIX__VERSION_1_0; + + CL_API_ENTRY cl_program (CL_API_CALL*clCreateProgramWithBinary)( + cl_context /* context */, + cl_uint /* num_devices */, + const cl_device_id * /* device_list */, + const size_t * /* lengths */, + const unsigned char ** /* binaries */, + cl_int * /* binary_status */, + cl_int * /* errcode_ret */ + ) CL_API_SUFFIX__VERSION_1_0; + + CL_API_ENTRY cl_int (CL_API_CALL*clRetainProgram)( + cl_program /* program */ + ) CL_API_SUFFIX__VERSION_1_0; + + CL_API_ENTRY cl_int (CL_API_CALL*clReleaseProgram)( + cl_program /* program */ + ) CL_API_SUFFIX__VERSION_1_0; + + CL_API_ENTRY cl_int (CL_API_CALL*clBuildProgram)( + cl_program /* program */, + cl_uint /* num_devices */, + const cl_device_id * /* device_list */, + const char * /* options */, + void (CL_CALLBACK * /* pfn_notify */)(cl_program /* program */, void * /* user_data */), + void * /* user_data */ + ) CL_API_SUFFIX__VERSION_1_0; + + CL_API_ENTRY cl_int (CL_API_CALL*clUnloadCompiler)( + void + ) CL_API_SUFFIX__VERSION_1_0; + + CL_API_ENTRY cl_int (CL_API_CALL*clGetProgramInfo)( + cl_program /* program */, + cl_program_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */ + ) CL_API_SUFFIX__VERSION_1_0; + + CL_API_ENTRY cl_int (CL_API_CALL*clGetProgramBuildInfo)( + cl_program /* program */, + cl_device_id /* device */, + cl_program_build_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */ + ) CL_API_SUFFIX__VERSION_1_0; + + CL_API_ENTRY cl_kernel (CL_API_CALL*clCreateKernel)( + cl_program /* program */, + const char * /* kernel_name */, + cl_int * /* errcode_ret */ + ) CL_API_SUFFIX__VERSION_1_0; + + CL_API_ENTRY cl_int (CL_API_CALL*clCreateKernelsInProgram)( + cl_program /* program */, + cl_uint /* num_kernels */, + cl_kernel * /* kernels */, + cl_uint * /* num_kernels_ret */ + ) CL_API_SUFFIX__VERSION_1_0; + + CL_API_ENTRY cl_int (CL_API_CALL*clRetainKernel)( + cl_kernel /* kernel */ + ) CL_API_SUFFIX__VERSION_1_0; + + CL_API_ENTRY cl_int (CL_API_CALL*clReleaseKernel)( + cl_kernel /* kernel */ + ) CL_API_SUFFIX__VERSION_1_0; + + CL_API_ENTRY cl_int (CL_API_CALL*clSetKernelArg)( + cl_kernel /* kernel */, + cl_uint /* arg_index */, + size_t /* arg_size */, + const void * /* arg_value */ + ) CL_API_SUFFIX__VERSION_1_0; + + CL_API_ENTRY cl_int (CL_API_CALL*clGetKernelInfo)( + cl_kernel /* kernel */, + cl_kernel_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */ + ) CL_API_SUFFIX__VERSION_1_0; + + CL_API_ENTRY cl_int (CL_API_CALL*clGetKernelWorkGroupInfo)( + cl_kernel /* kernel */, + cl_device_id /* device */, + cl_kernel_work_group_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */ + ) CL_API_SUFFIX__VERSION_1_0; + + CL_API_ENTRY cl_int (CL_API_CALL*clWaitForEvents)( + cl_uint /* num_events */, + const cl_event * /* event_list */ + ) CL_API_SUFFIX__VERSION_1_0; + + CL_API_ENTRY cl_int (CL_API_CALL*clGetEventInfo)( + cl_event /* event */, + cl_event_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */ + ) CL_API_SUFFIX__VERSION_1_0; + + CL_API_ENTRY cl_int (CL_API_CALL*clRetainEvent)( + cl_event /* event */ + ) CL_API_SUFFIX__VERSION_1_0; + + CL_API_ENTRY cl_int (CL_API_CALL*clReleaseEvent)( + cl_event /* event */ + ) CL_API_SUFFIX__VERSION_1_0; + + CL_API_ENTRY cl_int (CL_API_CALL*clGetEventProfilingInfo)( + cl_event /* event */, + cl_profiling_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */ + ) CL_API_SUFFIX__VERSION_1_0; + + CL_API_ENTRY cl_int (CL_API_CALL*clFlush)( + cl_command_queue /* command_queue */ + ) CL_API_SUFFIX__VERSION_1_0; + + CL_API_ENTRY cl_int (CL_API_CALL*clFinish)( + cl_command_queue /* command_queue */ + ) CL_API_SUFFIX__VERSION_1_0; + + CL_API_ENTRY cl_int (CL_API_CALL*clEnqueueReadBuffer)( + cl_command_queue /* command_queue */, + cl_mem /* buffer */, + cl_bool /* blocking_read */, + size_t /* offset */, + size_t /* cb */, + void * /* ptr */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */ + ) CL_API_SUFFIX__VERSION_1_0; + + CL_API_ENTRY cl_int (CL_API_CALL*clEnqueueWriteBuffer)( + cl_command_queue /* command_queue */, + cl_mem /* buffer */, + cl_bool /* blocking_write */, + size_t /* offset */, + size_t /* cb */, + const void * /* ptr */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */ + ) CL_API_SUFFIX__VERSION_1_0; + + CL_API_ENTRY cl_int (CL_API_CALL*clEnqueueCopyBuffer)( + cl_command_queue /* command_queue */, + cl_mem /* src_buffer */, + cl_mem /* dst_buffer */, + size_t /* src_offset */, + size_t /* dst_offset */, + size_t /* cb */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */ + ) CL_API_SUFFIX__VERSION_1_0; + + CL_API_ENTRY cl_int (CL_API_CALL*clEnqueueReadImage)( + cl_command_queue /* command_queue */, + cl_mem /* image */, + cl_bool /* blocking_read */, + const size_t * /* origin[3] */, + const size_t * /* region[3] */, + size_t /* row_pitch */, + size_t /* slice_pitch */, + void * /* ptr */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */ + ) CL_API_SUFFIX__VERSION_1_0; + + CL_API_ENTRY cl_int (CL_API_CALL*clEnqueueWriteImage)( + cl_command_queue /* command_queue */, + cl_mem /* image */, + cl_bool /* blocking_write */, + const size_t * /* origin[3] */, + const size_t * /* region[3] */, + size_t /* input_row_pitch */, + size_t /* input_slice_pitch */, + const void * /* ptr */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */ + ) CL_API_SUFFIX__VERSION_1_0; + + CL_API_ENTRY cl_int (CL_API_CALL*clEnqueueCopyImage)( + cl_command_queue /* command_queue */, + cl_mem /* src_image */, + cl_mem /* dst_image */, + const size_t * /* src_origin[3] */, + const size_t * /* dst_origin[3] */, + const size_t * /* region[3] */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */ + ) CL_API_SUFFIX__VERSION_1_0; + + CL_API_ENTRY cl_int (CL_API_CALL*clEnqueueCopyImageToBuffer)( + cl_command_queue /* command_queue */, + cl_mem /* src_image */, + cl_mem /* dst_buffer */, + const size_t * /* src_origin[3] */, + const size_t * /* region[3] */, + size_t /* dst_offset */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */ + ) CL_API_SUFFIX__VERSION_1_0; + + CL_API_ENTRY cl_int (CL_API_CALL*clEnqueueCopyBufferToImage)( + cl_command_queue /* command_queue */, + cl_mem /* src_buffer */, + cl_mem /* dst_image */, + size_t /* src_offset */, + const size_t * /* dst_origin[3] */, + const size_t * /* region[3] */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */ + ) CL_API_SUFFIX__VERSION_1_0; + + CL_API_ENTRY void * (CL_API_CALL*clEnqueueMapBuffer)( + cl_command_queue /* command_queue */, + cl_mem /* buffer */, + cl_bool /* blocking_map */, + cl_map_flags /* map_flags */, + size_t /* offset */, + size_t /* cb */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */, + cl_int * /* errcode_ret */ + ) CL_API_SUFFIX__VERSION_1_0; + + CL_API_ENTRY void * (CL_API_CALL*clEnqueueMapImage)( + cl_command_queue /* command_queue */, + cl_mem /* image */, + cl_bool /* blocking_map */, + cl_map_flags /* map_flags */, + const size_t * /* origin[3] */, + const size_t * /* region[3] */, + size_t * /* image_row_pitch */, + size_t * /* image_slice_pitch */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */, + cl_int * /* errcode_ret */ + ) CL_API_SUFFIX__VERSION_1_0; + + CL_API_ENTRY cl_int (CL_API_CALL*clEnqueueUnmapMemObject)( + cl_command_queue /* command_queue */, + cl_mem /* memobj */, + void * /* mapped_ptr */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */ + ) CL_API_SUFFIX__VERSION_1_0; + + CL_API_ENTRY cl_int (CL_API_CALL*clEnqueueNDRangeKernel)( + cl_command_queue /* command_queue */, + cl_kernel /* kernel */, + cl_uint /* work_dim */, + const size_t * /* global_work_offset */, + const size_t * /* global_work_size */, + const size_t * /* local_work_size */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */ + ) CL_API_SUFFIX__VERSION_1_0; + + CL_API_ENTRY cl_int (CL_API_CALL*clEnqueueTask)( + cl_command_queue /* command_queue */, + cl_kernel /* kernel */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */ + ) CL_API_SUFFIX__VERSION_1_0; + + CL_API_ENTRY cl_int (CL_API_CALL*clEnqueueNativeKernel)( + cl_command_queue /* command_queue */, + void (*user_func)(void *), + void * /* args */, + size_t /* cb_args */, + cl_uint /* num_mem_objects */, + const cl_mem * /* mem_list */, + const void ** /* args_mem_loc */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */ + ) CL_API_SUFFIX__VERSION_1_0; + + CL_API_ENTRY cl_int (CL_API_CALL*clEnqueueMarker)( + cl_command_queue /* command_queue */, + cl_event * /* event */ + ) CL_API_SUFFIX__VERSION_1_0; + + CL_API_ENTRY cl_int (CL_API_CALL*clEnqueueWaitForEvents)( + cl_command_queue /* command_queue */, + cl_uint /* num_events */, + const cl_event * /* event_list */ + ) CL_API_SUFFIX__VERSION_1_0; + + CL_API_ENTRY cl_int (CL_API_CALL*clEnqueueBarrier)( + cl_command_queue /* command_queue */ + ) CL_API_SUFFIX__VERSION_1_0; + + CL_API_ENTRY void * (CL_API_CALL*clGetExtensionFunctionAddress)( + const char * /* func_name */ + ) CL_API_SUFFIX__VERSION_1_0; + + CL_API_ENTRY cl_mem (CL_API_CALL*clCreateFromGLBuffer)( + cl_context /* context */, + cl_mem_flags /* flags */, + cl_GLuint /* bufobj */, + int * /* errcode_ret */ + ) CL_API_SUFFIX__VERSION_1_0; + + CL_API_ENTRY cl_mem (CL_API_CALL*clCreateFromGLTexture2D)( + cl_context /* context */, + cl_mem_flags /* flags */, + cl_GLenum /* target */, + cl_GLint /* miplevel */, + cl_GLuint /* texture */, + cl_int * /* errcode_ret */ + ) CL_API_SUFFIX__VERSION_1_0; + + CL_API_ENTRY cl_mem (CL_API_CALL*clCreateFromGLTexture3D)( + cl_context /* context */, + cl_mem_flags /* flags */, + cl_GLenum /* target */, + cl_GLint /* miplevel */, + cl_GLuint /* texture */, + cl_int * /* errcode_ret */ + ) CL_API_SUFFIX__VERSION_1_0; + + CL_API_ENTRY cl_mem (CL_API_CALL*clCreateFromGLRenderbuffer)( + cl_context /* context */, + cl_mem_flags /* flags */, + cl_GLuint /* renderbuffer */, + cl_int * /* errcode_ret */ + ) CL_API_SUFFIX__VERSION_1_0; + + CL_API_ENTRY cl_int (CL_API_CALL*clGetGLObjectInfo)( + cl_mem /* memobj */, + cl_gl_object_type * /* gl_object_type */, + cl_GLuint * /* gl_object_name */ + ) CL_API_SUFFIX__VERSION_1_0; + + CL_API_ENTRY cl_int (CL_API_CALL*clGetGLTextureInfo)( + cl_mem /* memobj */, + cl_gl_texture_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */ + ) CL_API_SUFFIX__VERSION_1_0; + + CL_API_ENTRY cl_int (CL_API_CALL*clEnqueueAcquireGLObjects)( + cl_command_queue /* command_queue */, + cl_uint /* num_objects */, + const cl_mem * /* mem_objects */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */ + ) CL_API_SUFFIX__VERSION_1_0; + + CL_API_ENTRY cl_int (CL_API_CALL*clEnqueueReleaseGLObjects)( + cl_command_queue /* command_queue */, + cl_uint /* num_objects */, + const cl_mem * /* mem_objects */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */ + ) CL_API_SUFFIX__VERSION_1_0; + + CL_API_ENTRY cl_int (CL_API_CALL*clGetGLContextInfoKHR)( + const cl_context_properties * /* properties */, + cl_gl_context_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */ + ) CL_API_SUFFIX__VERSION_1_0; + + CL_API_ENTRY cl_int (CL_API_CALL* clUnknown75)( + void); + + CL_API_ENTRY cl_int (CL_API_CALL* clUnknown76)( + void); + + CL_API_ENTRY cl_int (CL_API_CALL* clUnknown77)( + void); + + CL_API_ENTRY cl_int (CL_API_CALL* clUnknown78)( + void); + + CL_API_ENTRY cl_int (CL_API_CALL* clUnknown79)( + void); + + CL_API_ENTRY cl_int (CL_API_CALL* clUnknown80)( + void); + + CL_API_ENTRY cl_int (CL_API_CALL*clSetEventCallback)( + cl_event /* event */, + cl_int /* command_exec_callback_type */, + void (CL_CALLBACK * /* pfn_notify */)(cl_event, cl_int, void *), + void * /* user_data */ + ) CL_API_SUFFIX__VERSION_1_1; + + CL_API_ENTRY cl_mem (CL_API_CALL*clCreateSubBuffer)( + cl_mem /* buffer */, + cl_mem_flags /* flags */, + cl_buffer_create_type /* buffer_create_type */, + const void * /* buffer_create_info */, + cl_int * /* errcode_ret */ + ) CL_API_SUFFIX__VERSION_1_1; + + CL_API_ENTRY cl_int (CL_API_CALL*clSetMemObjectDestructorCallback)( + cl_mem /* memobj */, + void (CL_CALLBACK * /*pfn_notify*/)( cl_mem /* memobj */, void* /*user_data*/), + void * /*user_data */ ) CL_API_SUFFIX__VERSION_1_1; + + CL_API_ENTRY cl_event (CL_API_CALL*clCreateUserEvent)( + cl_context /* context */, + cl_int * /* errcode_ret */ + ) CL_API_SUFFIX__VERSION_1_1; + + CL_API_ENTRY cl_int (CL_API_CALL*clSetUserEventStatus)( + cl_event /* event */, + cl_int /* execution_status */ + ) CL_API_SUFFIX__VERSION_1_1; + + CL_API_ENTRY cl_int (CL_API_CALL*clEnqueueReadBufferRect)( + cl_command_queue /* command_queue */, + cl_mem /* buffer */, + cl_bool /* blocking_read */, + const size_t * /* buffer_origin */, + const size_t * /* host_origin */, + const size_t * /* region */, + size_t /* buffer_row_pitch */, + size_t /* buffer_slice_pitch */, + size_t /* host_row_pitch */, + size_t /* host_slice_pitch */, + void * /* ptr */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */ + ) CL_API_SUFFIX__VERSION_1_1; + + CL_API_ENTRY cl_int (CL_API_CALL*clEnqueueWriteBufferRect)( + cl_command_queue /* command_queue */, + cl_mem /* buffer */, + cl_bool /* blocking_write */, + const size_t * /* buffer_origin */, + const size_t * /* host_origin */, + const size_t * /* region */, + size_t /* buffer_row_pitch */, + size_t /* buffer_slice_pitch */, + size_t /* host_row_pitch */, + size_t /* host_slice_pitch */, + const void * /* ptr */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */ + ) CL_API_SUFFIX__VERSION_1_1; + + CL_API_ENTRY cl_int (CL_API_CALL*clEnqueueCopyBufferRect)( + cl_command_queue /* command_queue */, + cl_mem /* src_buffer */, + cl_mem /* dst_buffer */, + const size_t * /* src_origin */, + const size_t * /* dst_origin */, + const size_t * /* region */, + size_t /* src_row_pitch */, + size_t /* src_slice_pitch */, + size_t /* dst_row_pitch */, + size_t /* dst_slice_pitch */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */ + ) CL_API_SUFFIX__VERSION_1_1; + + CL_API_ENTRY cl_int (CL_API_CALL* clCreateSubDevicesEXT)( + cl_device_id /*in_device*/, + const cl_device_partition_property_ext * /* properties */, + cl_uint /*num_entries*/, + cl_device_id * /*out_devices*/, + cl_uint * /*num_devices*/ ) CL_EXT_SUFFIX__VERSION_1_1; + + CL_API_ENTRY cl_int (CL_API_CALL* clRetainDeviceEXT)( + cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1; + + CL_API_ENTRY cl_int (CL_API_CALL* clReleaseDeviceEXT)( + cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1; + + CL_API_ENTRY cl_int (CL_API_CALL* clUnknown92)( + void); + + CL_API_ENTRY cl_int (CL_API_CALL*clCreateSubDevices)( + cl_device_id /* in_device */, + const cl_device_partition_property * /* properties */, + cl_uint /* num_devices */, + cl_device_id * /* out_devices */, + cl_uint * /* num_devices_ret */ + ) CL_API_SUFFIX__VERSION_1_2; + + CL_API_ENTRY cl_int (CL_API_CALL*clRetainDevice)( + cl_device_id /* device */ + ) CL_API_SUFFIX__VERSION_1_2; + + CL_API_ENTRY cl_int (CL_API_CALL*clReleaseDevice)( + cl_device_id /* device */ + ) CL_API_SUFFIX__VERSION_1_2; + + CL_API_ENTRY cl_mem (CL_API_CALL*clCreateImage)( + cl_context /* context */, + cl_mem_flags /* flags */, + const cl_image_format * /* image_format */, + const cl_image_desc * /* image_desc */, + void * /* host_ptr */, + cl_int * /* errcode_ret */ + ) CL_API_SUFFIX__VERSION_1_2; + + CL_API_ENTRY cl_program (CL_API_CALL*clCreateProgramWithBuiltInKernels)( + cl_context /* context */, + cl_uint /* num_devices */, + const cl_device_id * /* device_list */, + const char * /* kernel_names */, + cl_int * /* errcode_ret */ + ) CL_API_SUFFIX__VERSION_1_2; + + CL_API_ENTRY cl_int (CL_API_CALL*clCompileProgram)( + cl_program /* program */, + cl_uint /* num_devices */, + const cl_device_id * /* device_list */, + const char * /* options */, + cl_uint /* num_input_headers */, + const cl_program * /* input_headers */, + const char ** /* header_include_names */, + void (CL_CALLBACK * /* pfn_notify */)(cl_program /* program */, void * /* user_data */), + void * /* user_data */ + ) CL_API_SUFFIX__VERSION_1_2; + + CL_API_ENTRY cl_program (CL_API_CALL*clLinkProgram)( + cl_context /* context */, + cl_uint /* num_devices */, + const cl_device_id * /* device_list */, + const char * /* options */, + cl_uint /* num_input_programs */, + const cl_program * /* input_programs */, + void (CL_CALLBACK * /* pfn_notify */)(cl_program /* program */, void * /* user_data */), + void * /* user_data */, + cl_int * /* errcode_ret */ + ) CL_API_SUFFIX__VERSION_1_2; + + CL_API_ENTRY cl_int (CL_API_CALL*clUnloadPlatformCompiler)( + cl_platform_id /* platform */ + ) CL_API_SUFFIX__VERSION_1_2; + + CL_API_ENTRY cl_int (CL_API_CALL*clGetKernelArgInfo)( + cl_kernel /* kernel */, + cl_uint /* arg_indx */, + cl_kernel_arg_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */ + ) CL_API_SUFFIX__VERSION_1_2; + + CL_API_ENTRY cl_int (CL_API_CALL*clEnqueueFillBuffer)( + cl_command_queue /* command_queue */, + cl_mem /* buffer */, + const void * /* pattern */, + size_t /* pattern_size */, + size_t /* offset */, + size_t /* size */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */ + ) CL_API_SUFFIX__VERSION_1_2; + + CL_API_ENTRY cl_int (CL_API_CALL*clEnqueueFillImage)( + cl_command_queue /* command_queue */, + cl_mem /* image */, + const void * /* fill_color */, + const size_t * /* origin[3] */, + const size_t * /* region[3] */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */ + ) CL_API_SUFFIX__VERSION_1_2; + + CL_API_ENTRY cl_int (CL_API_CALL*clEnqueueMigrateMemObjects)( + cl_command_queue /* command_queue */, + cl_uint /* num_mem_objects */, + const cl_mem * /* mem_objects */, + cl_mem_migration_flags /* flags */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */ + ) CL_API_SUFFIX__VERSION_1_2; + + CL_API_ENTRY cl_int (CL_API_CALL*clEnqueueMarkerWithWaitList)( + cl_command_queue /* command_queue */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */ + ) CL_API_SUFFIX__VERSION_1_2; + + CL_API_ENTRY cl_int (CL_API_CALL*clEnqueueBarrierWithWaitList)( + cl_command_queue /* command_queue */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */ + ) CL_API_SUFFIX__VERSION_1_2; + + CL_API_ENTRY void * (CL_API_CALL* + clGetExtensionFunctionAddressForPlatform)( + cl_platform_id /* platform */, + const char * /* func_name */ + ) CL_API_SUFFIX__VERSION_1_2; + + CL_API_ENTRY cl_mem (CL_API_CALL*clCreateFromGLTexture)( + cl_context /* context */, + cl_mem_flags /* flags */, + cl_GLenum /* target */, + cl_GLint /* miplevel */, + cl_GLuint /* texture */, + cl_int * /* errcode_ret */ + ) CL_API_SUFFIX__VERSION_1_2; + + CL_API_ENTRY cl_int (CL_API_CALL* clUnknown109)( + void); + + CL_API_ENTRY cl_int (CL_API_CALL* clUnknown110)( + void); + + CL_API_ENTRY cl_int (CL_API_CALL* clUnknown111)( + void); + + CL_API_ENTRY cl_int (CL_API_CALL* clUnknown112)( + void); + + CL_API_ENTRY cl_int (CL_API_CALL* clUnknown113)( + void); + + CL_API_ENTRY cl_int (CL_API_CALL* clUnknown114)( + void); + + CL_API_ENTRY cl_int (CL_API_CALL* clUnknown115)( + void); + + CL_API_ENTRY cl_int (CL_API_CALL* clUnknown116)( + void); + + CL_API_ENTRY cl_int (CL_API_CALL* clUnknown117)( + void); + + CL_API_ENTRY cl_int (CL_API_CALL* clUnknown118)( + void); + + CL_API_ENTRY cl_int (CL_API_CALL* clUnknown119)( + void); + + CL_API_ENTRY cl_int (CL_API_CALL* clUnknown120)( + void); + + CL_API_ENTRY cl_int (CL_API_CALL* clUnknown121)( + void); + +}; + diff --git a/socl/src/socl.c b/socl/src/socl.c new file mode 100644 index 0000000..4318460 --- /dev/null +++ b/socl/src/socl.c @@ -0,0 +1,163 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2012-2012 Vincent Danjean + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "socl.h" + +struct _cl_icd_dispatch socl_master_dispatch = +{ + soclGetPlatformIDs, + soclGetPlatformInfo, + soclGetDeviceIDs, + soclGetDeviceInfo, + soclCreateContext, + soclCreateContextFromType, + soclRetainContext, + soclReleaseContext, + soclGetContextInfo, + soclCreateCommandQueue, + soclRetainCommandQueue, + soclReleaseCommandQueue, + soclGetCommandQueueInfo, + soclSetCommandQueueProperty, + soclCreateBuffer, + soclCreateImage2D, + soclCreateImage3D, + soclRetainMemObject, + soclReleaseMemObject, + soclGetSupportedImageFormats, + soclGetMemObjectInfo, + soclGetImageInfo, + soclCreateSampler, + soclRetainSampler, + soclReleaseSampler, + soclGetSamplerInfo, + soclCreateProgramWithSource, + soclCreateProgramWithBinary, + soclRetainProgram, + soclReleaseProgram, + soclBuildProgram, + soclUnloadCompiler, + soclGetProgramInfo, + soclGetProgramBuildInfo, + soclCreateKernel, + soclCreateKernelsInProgram, + soclRetainKernel, + soclReleaseKernel, + soclSetKernelArg, + soclGetKernelInfo, + soclGetKernelWorkGroupInfo, + soclWaitForEvents, + soclGetEventInfo, + soclRetainEvent, + soclReleaseEvent, + soclGetEventProfilingInfo, + soclFlush, + soclFinish, + soclEnqueueReadBuffer, + soclEnqueueWriteBuffer, + soclEnqueueCopyBuffer, + soclEnqueueReadImage, + soclEnqueueWriteImage, + soclEnqueueCopyImage, + soclEnqueueCopyImageToBuffer, + soclEnqueueCopyBufferToImage, + soclEnqueueMapBuffer, + soclEnqueueMapImage, + soclEnqueueUnmapMemObject, + soclEnqueueNDRangeKernel, + soclEnqueueTask, + soclEnqueueNativeKernel, + soclEnqueueMarker, + soclEnqueueWaitForEvents, + soclEnqueueBarrier, + soclGetExtensionFunctionAddress, + (void *) NULL, // clCreateFromGLBuffer, + (void *) NULL, // clCreateFromGLTexture2D, + (void *) NULL, // clCreateFromGLTexture3D, + (void *) NULL, // clCreateFromGLRenderbuffer, + (void *) NULL, // clGetGLObjectInfo, + (void *) NULL, // clGetGLTextureInfo, + (void *) NULL, // clEnqueueAcquireGLObjects, + (void *) NULL, // clEnqueueReleaseGLObjects, + (void *) NULL, // clGetGLContextInfoKHR, + (void *) NULL, // + (void *) NULL, + (void *) NULL, + (void *) NULL, + (void *) NULL, + (void *) NULL, + (void *) NULL, // clSetEventCallback, + (void *) NULL, // clCreateSubBuffer, + (void *) NULL, // clSetMemObjectDestructorCallback, + (void *) NULL, // clCreateUserEvent, + (void *) NULL, // clSetUserEventStatus, + (void *) NULL, // clEnqueueReadBufferRect, + (void *) NULL, // clEnqueueWriteBufferRect, + (void *) NULL, // clEnqueueCopyBufferRect, + (void *) NULL, // clCreateSubDevicesEXT, + (void *) NULL, // clRetainDeviceEXT, + (void *) NULL, // clReleaseDeviceEXT, + (void *) NULL, + (void *) NULL, // clCreateSubDevices, + (void *) NULL, // clRetainDevice, + (void *) NULL, // clReleaseDevice, + (void *) NULL, // clCreateImage, + (void *) NULL, // clCreateProgramWithBuiltInKernels, + (void *) NULL, // clCompileProgram, + (void *) NULL, // clLinkProgram, + (void *) NULL, // clUnloadPlatformCompiler, + (void *) NULL, // clGetKernelArgInfo, + (void *) NULL, // clEnqueueFillBuffer, + (void *) NULL, // clEnqueueFillImage, + (void *) NULL, // clEnqueueMigrateMemObjects, + soclEnqueueMarkerWithWaitList, // clEnqueueMarkerWithWaitList, + soclEnqueueBarrierWithWaitList, // clEnqueueBarrierWithWaitList, + soclGetExtensionFunctionAddressForPlatform, // clGetExtensionFunctionAddressForPlatform, + (void *) NULL, // clCreateFromGLTexture, + (void *) NULL, + (void *) NULL, + (void *) NULL, + (void *) NULL, + (void *) NULL, + (void *) NULL, + (void *) NULL, + (void *) NULL, + (void *) NULL, + (void *) NULL, + (void *) NULL, + (void *) NULL, + (void *) NULL +}; + +struct _cl_platform_id socl_platform = {&socl_master_dispatch}; + +const char * __attribute__ ((aligned (16))) SOCL_PROFILE = "FULL_PROFILE"; +const char * __attribute__ ((aligned (16))) SOCL_VERSION = "OpenCL 1.0 SOCL Edition (0.1.0)"; +const char * __attribute__ ((aligned (16))) SOCL_PLATFORM_NAME = "SOCL Platform"; +const char * __attribute__ ((aligned (16))) SOCL_VENDOR = "Inria"; +const char * __attribute__ ((aligned (16))) SOCL_PLATFORM_EXTENSIONS = "cl_khr_icd"; +const char * __attribute__ ((aligned (16))) SOCL_PLATFORM_ICD_SUFFIX_KHR ="SOCL"; + + +/* Command queues with profiling enabled + * This allows us to disable StarPU profiling it + * is equal to 0 + */ +int __attribute__ ((aligned (16))) profiling_queue_count = 0; + +struct _cl_device_id * socl_devices = NULL; +unsigned int socl_device_count = 0; diff --git a/socl/src/socl.h b/socl/src/socl.h new file mode 100644 index 0000000..b269cc1 --- /dev/null +++ b/socl/src/socl.h @@ -0,0 +1,789 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef SOCL_H +#define SOCL_H + +#define CL_CONTEXT_SCHEDULER_SOCL 0xFF01 +#define CL_CONTEXT_NAME_SOCL 0xFF02 + +#include +#include +#include +#include +#include "CL/cl.h" +#include "ocl_icd.h" + +#include + +typedef struct starpu_task * starpu_task; + +#ifdef UNUSED +#elif defined(__GNUC__) + #define UNUSED(x) UNUSED_ ## x __attribute__((unused)) +#else + #define UNUSED(x) x +#endif + + +/** + * Entity that can be managed by the garbage collector + */ +typedef struct entity * entity; + +struct entity +{ + struct _cl_icd_dispatch * dispatch; + /* Reference count */ + size_t refs; + + /* Callback called on release */ + void (*release_callback)(void*entity); + + /* Entity identifier (used for debugging purpose) */ + char * name; + + /* Next entity in garbage collector queue */ + entity prev; + entity next; +}; + +/* OpenCL entities (context, command queues, buffers...) must use + * this macro as their first field */ +#define CL_ENTITY struct entity _entity; + +#include "command.h" +#include "command_list.h" +#include "command_queue.h" +#include "debug.h" +#include "event.h" +#include "gc.h" +#include "mem_objects.h" +#include "task.h" +#include "util.h" + +struct _cl_platform_id +{ + struct _cl_icd_dispatch *dispatch; +}; + +struct _cl_device_id +{ + struct _cl_icd_dispatch *dispatch; + int device_id; + int worker_id; +}; + +#define RETURN_EVENT(ev, event) \ + if ((event) != NULL) { \ + *event = ev; \ + } \ + else { \ + gc_entity_release(ev); \ + } + +#define MAY_BLOCK_THEN_RETURN_EVENT(ev,blocking,event) \ + if ((blocking) == CL_TRUE) { \ + soclWaitForEvents(1, &ev); \ + } \ + RETURN_EVENT(ev,event); \ + +/* Constants */ +extern const char * SOCL_PROFILE; +extern const char * SOCL_VERSION; +extern const char * SOCL_PLATFORM_NAME; +extern const char * SOCL_VENDOR; +extern const char * SOCL_PLATFORM_EXTENSIONS; +extern const char * SOCL_PLATFORM_ICD_SUFFIX_KHR; + +struct _cl_context +{ + CL_ENTITY; + + void (CL_CALLBACK *pfn_notify)(const char *, const void *, size_t, void *); + void *user_data; + + /* Associated devices */ + cl_device_id * devices; + cl_uint num_devices; + + /* Scheduling context */ + unsigned sched_ctx; + + /* Properties */ + cl_context_properties * properties; + cl_uint num_properties; + + /* ID */ +#ifdef DEBUG + int id; +#endif +}; + +struct _cl_command_queue +{ + CL_ENTITY; + + cl_command_queue_properties properties; + cl_device_id device; + cl_context context; + + /* Stored commands */ + command_list commands; + + /* Last enqueued barrier-like event */ + cl_command barrier; + + /* Mutex */ + starpu_pthread_mutex_t mutex; + + /* ID */ +#ifdef DEBUG + int id; +#endif +}; + +struct _cl_event +{ + CL_ENTITY; + + /* Command queue */ + cl_command_queue cq; + + /* Command */ + cl_command command; + + /* Event status */ + cl_int status; + + /* ID + * This ID is used as a tag for StarPU dependencies + */ + int id; + + /* Profiling info */ + cl_ulong prof_queued, prof_submit, prof_start, prof_end; +}; + +struct _cl_mem +{ + CL_ENTITY; + + /* StarPU handle */ + starpu_data_handle_t handle; + + /* Pointer to data in host memory */ + void *ptr; + + /* Buffer size */ + size_t size; + + /* Indicates how many references (mapping, MEM_USE_HOST_PTR...) require + * coherence in host memory. If set to zero, no coherency is maintained + * (this is the most efficient) */ + int map_count; + + /* Creation flags */ + cl_mem_flags flags; + + /* Creation context */ + cl_context context; + + /* Access mode */ + int mode; + + /* Host ptr */ + void * host_ptr; + + /* Fields used to store cl_mems in mem_objects list */ + cl_mem prev; + cl_mem next; + + /* Indicates if a buffer may contain meaningful data. Otherwise + we don't have to transfer it */ + int scratch; + + /* ID */ +#ifdef DEBUG + int id; +#endif +}; + +struct _cl_program +{ + CL_ENTITY; + + /* Real OpenCL Programs + * There is one entry for each device (even non OpenCL ones) + * in order to index this array with dev_id + */ + cl_program *cl_programs; + + /* Context used to create this program */ + cl_context context; + + /* Options */ + char * options; + unsigned int options_size; + + /* ID */ +#ifdef DEBUG + int id; +#endif +}; + +enum kernel_arg_type { Null, Buffer, Immediate }; + +typedef cl_int (*split_func_t)(cl_command_queue, cl_uint, void *, const cl_event, cl_event *); + +struct _cl_kernel +{ + CL_ENTITY; + + /* Associated program */ + cl_program program; + + /* StarPU codelet */ + struct starpu_perfmodel * perfmodel; + + /* Kernel name */ + char * kernel_name; + + /* Real OpenCL kernels */ + cl_kernel *cl_kernels; + + /* clCreateKernel return codes */ + cl_int *errcodes; + + /* Arguments */ + unsigned int num_args; + size_t *arg_size; + enum kernel_arg_type *arg_type; + void **arg_value; + + /* Partition function */ + cl_uint split_space; + split_func_t split_func; + cl_ulong * split_perfs; + void * split_data; + starpu_pthread_mutex_t split_lock; + + /* ID */ +#ifdef DEBUG + int id; +#endif +}; + +/* Global vars */ + +/* Command queues with profiling enabled + * This allows us to disable StarPU profiling it + * is equal to 0 + */ +extern int profiling_queue_count; + +/***************************************************************************/ + +/* Platform API */ +extern CL_API_ENTRY cl_int CL_API_CALL +soclGetPlatformIDs(cl_uint /* num_entries */, + cl_platform_id * /* platforms */, + cl_uint * /* num_platforms */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +soclGetPlatformInfo(cl_platform_id /* platform */, + cl_platform_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +/* Device APIs */ +extern CL_API_ENTRY cl_int CL_API_CALL +soclGetDeviceIDs(cl_platform_id /* platform */, + cl_device_type /* device_type */, + cl_uint /* num_entries */, + cl_device_id * /* devices */, + cl_uint * /* num_devices */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +soclGetDeviceInfo(cl_device_id /* device */, + cl_device_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +/* Context APIs */ +extern CL_API_ENTRY cl_context CL_API_CALL +soclCreateContext(const cl_context_properties * /* properties */, + cl_uint /* num_devices */, + const cl_device_id * /* devices */, + void (CL_CALLBACK *pfn_notify)(const char *, const void *, size_t, void *) /* pfn_notify */, + void * /* user_data */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_context CL_API_CALL +soclCreateContextFromType(const cl_context_properties * /* properties */, + cl_device_type /* device_type */, + void (CL_CALLBACK *pfn_notify)(const char *, const void *, size_t, void *) /* pfn_notify */, + void * /* user_data */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +soclRetainContext(cl_context /* context */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +soclReleaseContext(cl_context /* context */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +soclGetContextInfo(cl_context /* context */, + cl_context_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +/* Command Queue APIs */ +extern CL_API_ENTRY cl_command_queue CL_API_CALL +soclCreateCommandQueue(cl_context /* context */, + cl_device_id /* device */, + cl_command_queue_properties /* properties */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +soclRetainCommandQueue(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +soclReleaseCommandQueue(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +soclGetCommandQueueInfo(cl_command_queue /* command_queue */, + cl_command_queue_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +soclSetCommandQueueProperty(cl_command_queue /* command_queue */, + cl_command_queue_properties /* properties */, + cl_bool /* enable */, + cl_command_queue_properties * /* old_properties */) CL_API_SUFFIX__VERSION_1_0; + +/* Memory Object APIs */ +extern CL_API_ENTRY cl_mem CL_API_CALL +soclCreateBuffer(cl_context /* context */, + cl_mem_flags /* flags */, + size_t /* size */, + void * /* host_ptr */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_mem CL_API_CALL +soclCreateImage2D(cl_context /* context */, + cl_mem_flags /* flags */, + const cl_image_format * /* image_format */, + size_t /* image_width */, + size_t /* image_height */, + size_t /* image_row_pitch */, + void * /* host_ptr */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_mem CL_API_CALL +soclCreateImage3D(cl_context /* context */, + cl_mem_flags /* flags */, + const cl_image_format * /* image_format */, + size_t /* image_width */, + size_t /* image_height */, + size_t /* image_depth */, + size_t /* image_row_pitch */, + size_t /* image_slice_pitch */, + void * /* host_ptr */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +soclRetainMemObject(cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +soclReleaseMemObject(cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +soclGetSupportedImageFormats(cl_context /* context */, + cl_mem_flags /* flags */, + cl_mem_object_type /* image_type */, + cl_uint /* num_entries */, + cl_image_format * /* image_formats */, + cl_uint * /* num_image_formats */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +soclGetMemObjectInfo(cl_mem /* memobj */, + cl_mem_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +soclGetImageInfo(cl_mem /* image */, + cl_image_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +/* Sampler APIs */ +extern CL_API_ENTRY cl_sampler CL_API_CALL +soclCreateSampler(cl_context /* context */, + cl_bool /* normalized_coords */, + cl_addressing_mode /* addressing_mode */, + cl_filter_mode /* filter_mode */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +soclRetainSampler(cl_sampler /* sampler */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +soclReleaseSampler(cl_sampler /* sampler */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +soclGetSamplerInfo(cl_sampler /* sampler */, + cl_sampler_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +/* Program Object APIs */ +extern CL_API_ENTRY cl_program CL_API_CALL +soclCreateProgramWithSource(cl_context /* context */, + cl_uint /* count */, + const char ** /* strings */, + const size_t * /* lengths */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_program CL_API_CALL +soclCreateProgramWithBinary(cl_context /* context */, + cl_uint /* num_devices */, + const cl_device_id * /* device_list */, + const size_t * /* lengths */, + const unsigned char ** /* binaries */, + cl_int * /* binary_status */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +soclRetainProgram(cl_program /* program */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +soclReleaseProgram(cl_program /* program */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +soclBuildProgram(cl_program /* program */, + cl_uint /* num_devices */, + const cl_device_id * /* device_list */, + const char * /* options */, + void (CL_CALLBACK *pfn_notify)(cl_program /* program */, void * /* user_data */), + void * /* user_data */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +soclUnloadCompiler(void) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +soclGetProgramInfo(cl_program /* program */, + cl_program_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +soclGetProgramBuildInfo(cl_program /* program */, + cl_device_id /* device */, + cl_program_build_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +/* Kernel Object APIs */ +extern CL_API_ENTRY cl_kernel CL_API_CALL +soclCreateKernel(cl_program /* program */, + const char * /* kernel_name */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +soclCreateKernelsInProgram(cl_program /* program */, + cl_uint /* num_kernels */, + cl_kernel * /* kernels */, + cl_uint * /* num_kernels_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +soclRetainKernel(cl_kernel /* kernel */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +soclReleaseKernel(cl_kernel /* kernel */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +soclSetKernelArg(cl_kernel /* kernel */, + cl_uint /* arg_index */, + size_t /* arg_size */, + const void * /* arg_value */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +soclGetKernelInfo(cl_kernel /* kernel */, + cl_kernel_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +soclGetKernelWorkGroupInfo(cl_kernel /* kernel */, + cl_device_id /* device */, + cl_kernel_work_group_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +/* Event Object APIs */ +extern CL_API_ENTRY cl_int CL_API_CALL +soclWaitForEvents(cl_uint /* num_events */, + const cl_event * /* event_list */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +soclGetEventInfo(cl_event /* event */, + cl_event_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +soclRetainEvent(cl_event /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +soclReleaseEvent(cl_event /* event */) CL_API_SUFFIX__VERSION_1_0; + +/* Profiling APIs */ +extern CL_API_ENTRY cl_int CL_API_CALL +soclGetEventProfilingInfo(cl_event /* event */, + cl_profiling_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +/* Flush and Finish APIs */ +extern CL_API_ENTRY cl_int CL_API_CALL +soclFlush(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +soclFinish(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0; + +/* Enqueued Commands APIs */ +extern CL_API_ENTRY cl_int CL_API_CALL +soclEnqueueReadBuffer(cl_command_queue /* command_queue */, + cl_mem /* buffer */, + cl_bool /* blocking_read */, + size_t /* offset */, + size_t /* cb */, + void * /* ptr */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +soclEnqueueWriteBuffer(cl_command_queue /* command_queue */, + cl_mem /* buffer */, + cl_bool /* blocking_write */, + size_t /* offset */, + size_t /* cb */, + const void * /* ptr */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +soclEnqueueCopyBuffer(cl_command_queue /* command_queue */, + cl_mem /* src_buffer */, + cl_mem /* dst_buffer */, + size_t /* src_offset */, + size_t /* dst_offset */, + size_t /* cb */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +soclEnqueueReadImage(cl_command_queue /* command_queue */, + cl_mem /* image */, + cl_bool /* blocking_read */, + const size_t * /* origin[3] */, + const size_t * /* region[3] */, + size_t /* row_pitch */, + size_t /* slice_pitch */, + void * /* ptr */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +soclEnqueueWriteImage(cl_command_queue /* command_queue */, + cl_mem /* image */, + cl_bool /* blocking_write */, + const size_t * /* origin[3] */, + const size_t * /* region[3] */, + size_t /* input_row_pitch */, + size_t /* input_slice_pitch */, + const void * /* ptr */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +soclEnqueueCopyImage(cl_command_queue /* command_queue */, + cl_mem /* src_image */, + cl_mem /* dst_image */, + const size_t * /* src_origin[3] */, + const size_t * /* dst_origin[3] */, + const size_t * /* region[3] */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +soclEnqueueCopyImageToBuffer(cl_command_queue /* command_queue */, + cl_mem /* src_image */, + cl_mem /* dst_buffer */, + const size_t * /* src_origin[3] */, + const size_t * /* region[3] */, + size_t /* dst_offset */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +soclEnqueueCopyBufferToImage(cl_command_queue /* command_queue */, + cl_mem /* src_buffer */, + cl_mem /* dst_image */, + size_t /* src_offset */, + const size_t * /* dst_origin[3] */, + const size_t * /* region[3] */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY void * CL_API_CALL +soclEnqueueMapBuffer(cl_command_queue /* command_queue */, + cl_mem /* buffer */, + cl_bool /* blocking_map */, + cl_map_flags /* map_flags */, + size_t /* offset */, + size_t /* cb */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY void * CL_API_CALL +soclEnqueueMapImage(cl_command_queue /* command_queue */, + cl_mem /* image */, + cl_bool /* blocking_map */, + cl_map_flags /* map_flags */, + const size_t * /* origin[3] */, + const size_t * /* region[3] */, + size_t * /* image_row_pitch */, + size_t * /* image_slice_pitch */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +soclEnqueueUnmapMemObject(cl_command_queue /* command_queue */, + cl_mem /* memobj */, + void * /* mapped_ptr */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +soclEnqueueNDRangeKernel(cl_command_queue /* command_queue */, + cl_kernel /* kernel */, + cl_uint /* work_dim */, + const size_t * /* global_work_offset */, + const size_t * /* global_work_size */, + const size_t * /* local_work_size */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +soclEnqueueTask(cl_command_queue /* command_queue */, + cl_kernel /* kernel */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +soclEnqueueNativeKernel(cl_command_queue /* command_queue */, + void (*user_func)(void *), + void * /* args */, + size_t /* cb_args */, + cl_uint /* num_mem_objects */, + const cl_mem * /* mem_list */, + const void ** /* args_mem_loc */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +soclEnqueueMarker(cl_command_queue /* command_queue */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +soclEnqueueWaitForEvents(cl_command_queue /* command_queue */, + cl_uint /* num_events */, + const cl_event * /* event_list */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +soclEnqueueBarrier(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +soclEnqueueMarkerWithWaitList(cl_command_queue /* command_queue */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */ + ) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +soclEnqueueBarrierWithWaitList(cl_command_queue /* command_queue */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */ + ) CL_API_SUFFIX__VERSION_1_2; + +/* Extension function access + * + * Returns the extension function address for the given function name, + * or NULL if a valid function can not be found. The client must + * check to make sure the address is not NULL, before using or + * calling the returned function address. + */ +extern CL_API_ENTRY void * CL_API_CALL +soclGetExtensionFunctionAddress(const char * /* func_name */) CL_API_SUFFIX__VERSION_1_0; + +extern void * CL_API_CALL +soclGetExtensionFunctionAddressForPlatform(cl_platform_id p, const char * func_name) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +soclIcdGetPlatformIDsKHR(cl_uint /* num_entries */, + cl_platform_id * /* platforms */, + cl_uint * /* num_platforms */) CL_EXT_SUFFIX__VERSION_1_0; + +extern struct _cl_icd_dispatch socl_master_dispatch; +extern struct _cl_platform_id socl_platform; +extern struct _cl_device_id * socl_devices; +extern unsigned int socl_device_count; + +#endif /* SOCL_H */ diff --git a/socl/src/task.c b/socl/src/task.c new file mode 100644 index 0000000..57783b7 --- /dev/null +++ b/socl/src/task.c @@ -0,0 +1,180 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "socl.h" +#include "gc.h" +#include "event.h" + +void command_completed(cl_command cmd) +{ + starpu_task task = cmd->task; + + cl_event ev = command_event_get_ex(cmd); + ev->status = CL_COMPLETE; + + ev->prof_end = _socl_nanotime(); + + /* Commands without codelets (marker, barrier, unmap...) take no time */ + if (task->cl == NULL) + ev->prof_start = ev->prof_end; + + /* Trigger the tag associated to the command event */ + DEBUG_MSG("Trigger event %d\n", ev->id); + starpu_tag_notify_from_apps(ev->id); + + gc_entity_release(ev); +} + +void command_completed_task_callback(void *arg) +{ + cl_command cmd = (cl_command)arg; + + command_completed(cmd); + + /* Release the command stored task callback parameter */ + gc_entity_release(cmd); +} + +/* + * Create a StarPU task + */ +starpu_task task_create(cl_command_type typ) +{ + struct starpu_task * task; + + /* Create StarPU task */ + task = starpu_task_create(); + + /* Set task common settings */ + task->destroy = 0; + task->detach = 0; + + task->use_tag = 1; + task->tag_id = event_unique_id(); + + return task; +} + +void task_depends_on(starpu_task task, cl_uint num_events, cl_event *events) +{ + if (num_events != 0) + { + cl_uint i; + + starpu_tag_t * tags = malloc(num_events * sizeof(starpu_tag_t)); + + DEBUG_MSG("Task %p depends on events:", task); + for (i=0; iid; + DEBUG_MSG_NOHEAD(" %d", events[i]->id); + } + DEBUG_MSG_NOHEAD("\n"); + + starpu_tag_declare_deps_array(task->tag_id, num_events, tags); + + free(tags); + } +} + +cl_int task_submit_ex(starpu_task task, cl_command cmd) +{ + /* Associated the task to the command */ + cmd->task = task; + + cl_uint num_events = command_num_events_get_ex(cmd); + cl_event * events = command_events_get_ex(cmd); + + task_depends_on(task, num_events, events); + + task->callback_func = command_completed_task_callback; + gc_entity_store(&task->callback_arg, cmd); + + cl_event ev = command_event_get_ex(cmd); + ev->prof_submit = _socl_nanotime(); + gc_entity_release(ev); + + /* Submit task */ + int ret = (task->cl != NULL && task->where == STARPU_OPENCL ? + starpu_task_submit_to_ctx(task, cmd->event->cq->context->sched_ctx) : + starpu_task_submit(task)); + + if (ret != 0) + DEBUG_ERROR("Unable to submit a task. Error %d\n", ret); + + return CL_SUCCESS; +} + + +/********************************* + * CPU task helper + *********************************/ + +struct cputask_arg +{ + void (*callback)(void*); + void * arg; + int free_arg; + cl_command cmd; + int complete_cmd; +}; + +static void cputask_task(void *args) +{ + struct cputask_arg * arg = (struct cputask_arg*)args; + + arg->callback(arg->arg); + + if (arg->complete_cmd) + command_completed(arg->cmd); + + if (arg->free_arg) + { + assert(arg->arg != NULL); + free(arg->arg); + arg->arg = NULL; + } + + gc_entity_unstore(&arg->cmd); + free(arg); +} + +void cpu_task_submit_ex(cl_command cmd, void (*callback)(void*), void *arg, int free_arg, int complete_cmd, struct starpu_codelet * codelet, unsigned num_events, cl_event * events) +{ + struct cputask_arg * a = malloc(sizeof(struct cputask_arg)); + a->callback = callback; + a->arg = arg; + a->free_arg = free_arg; + gc_entity_store(&a->cmd, cmd); + a->complete_cmd = complete_cmd; + + codelet->where = STARPU_OPENCL | STARPU_CPU | STARPU_CUDA; + + starpu_task task = task_create(CL_COMMAND_TASK); + if (num_events != 0) + { + task_depends_on(task, num_events, events); + } + + task->callback_func = cputask_task; + task->callback_arg = a; + + cmd->task = task; + + int ret = starpu_task_submit(task); + if (ret != 0) + DEBUG_ERROR("Unable to submit a task. Error %d\n", ret); +} diff --git a/socl/src/task.h b/socl/src/task.h new file mode 100644 index 0000000..b1fd549 --- /dev/null +++ b/socl/src/task.h @@ -0,0 +1,51 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef SOCL_TASK_H +#define SOCL_TASK_H + +#include "socl.h" + +starpu_task task_create(cl_command_type typ) STARPU_ATTRIBUTE_MALLOC; +void task_dependency_add(starpu_task task, cl_uint num_events, cl_event *events); +void command_completed(cl_command cmd); + +void command_completed_task_callback(void *); + +/* Execute callback(arg) in a CPU task (with no buffer) + * Associate this task to the command cmd (i.e. when this task completes, the command is completed) + * Additional dependencies can be specified (num_events, events). + * The codelet is used to give a fixed name to the task without allocating a + * new codelet structure each time. This function will fill the other fields + * as appropriate */ +void cpu_task_submit_ex(cl_command cmd, void (*callback)(void*), void *arg, int free_arg, int release_cmd, struct starpu_codelet *, unsigned num_events, cl_event * events); + +#define cpu_task_submit(cmd, args...) cpu_task_submit_ex((cl_command)cmd, args) + +/** + * Associate a StarPU task to a command and submit it + * + * When the task terminates, the command is set as terminated too + */ +cl_int task_submit_ex(starpu_task task, cl_command cmd); +#define task_submit(task,cmd) task_submit_ex(task, (cl_command)cmd) + +/** + * Add task dependencies + */ +void task_depends_on(starpu_task task, cl_uint num_events, cl_event *events); + +#endif /* SOCL_TASK_H */ diff --git a/socl/src/util.c b/socl/src/util.c new file mode 100644 index 0000000..24ec364 --- /dev/null +++ b/socl/src/util.c @@ -0,0 +1,69 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "socl.h" +#include "common/timing.h" + +int starpu_worker_get_range_by_id(int id) +{ + int i, oid = 0; + for (i=0; i and not the internal src/ header which contains the +# static inline definition +dist-hook: + failed=0 ; \ + look=""; \ + for i in $$( $(GREP) "static inline" $$(find $(srcdir) -name \*.h) | $(SED) -e 's/.*static inline //g' | $(GREP) -v ENAME\#\# | $(SED) -n -e 's/[^(]* \(\|\*\)\([^ (]*\)(.*/\2/' -e 'p;s/^_*//;p' | $(GREP) -v _starpu_spin_init | $(GREP) -v starpu_sched_ctx_worker_is_master_for_child_ctx) ; do \ + if [ -z "$$look" ] ; then \ + look="$$i" ; \ + else \ + look="$$look\|$$i" ; \ + fi ; \ + done ; \ + echo "$$look" ; \ + for j in $(shell find . -name \*.o) ; do \ + nm $$j | $(GREP) -e "U \($$look\)$$" && { echo $$j ; failed=1 ; } ; \ + done ; \ + [ $$failed == 0 ] + nm -n .libs/libstarpu-@STARPU_EFFECTIVE_VERSION@.so | grep -v " [Ua-z] " | grep -v ' W '| grep -ve " _\?_\?_\?f\?starpu" | grep -ve " \(_init\|main\|smpi_simulated_main_\|_fini\|_edata\|__bss_start\|_end\|fut_getstamp\|__gcov_\|mangle_path\)" | (! grep .) diff --git a/src/Makefile.in b/src/Makefile.in new file mode 100644 index 0000000..2f3bed5 --- /dev/null +++ b/src/Makefile.in @@ -0,0 +1,3206 @@ +# Makefile.in generated by automake 1.16.5 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2021 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# Copyright (C) 2013-2013 Simon Archipoff +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + + + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +target_triplet = @target@ +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@am__append_1 = --compiler-options -fno-strict-aliasing -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ $(STARPU_NVCC_H_CPPFLAGS) +@STARPU_USE_HIP_TRUE@am__append_2 = -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ +@STARPU_USE_MPI_MASTER_SLAVE_TRUE@am__append_3 = $(MPICC_LDFLAGS) +@STARPU_HAVE_WINDOWS_TRUE@am__append_4 = -Xlinker --output-def -Xlinker .libs/libstarpu-@STARPU_EFFECTIVE_VERSION@.def +@STARPU_HAVE_DARWIN_TRUE@am__append_5 = \ +@STARPU_HAVE_DARWIN_TRUE@ -Wl,-U,_starpu_main \ +@STARPU_HAVE_DARWIN_TRUE@ -Wl,-U,_smpi_main \ +@STARPU_HAVE_DARWIN_TRUE@ -Wl,-U,__starpu_mpi_simgrid_init \ +@STARPU_HAVE_DARWIN_TRUE@ -Wl,-U,_smpi_simulated_main_ \ +@STARPU_HAVE_DARWIN_TRUE@ -Wl,-U,_starpu_mpi_world_rank + +@STARPU_HAVE_LEVELDB_TRUE@am__append_6 = core/disk_ops/disk_leveldb.cpp +@STARPU_HAVE_HDF5_TRUE@am__append_7 = core/disk_ops/disk_hdf5.c +@STARPU_USE_HIP_TRUE@am__append_8 = drivers/hip/driver_hip.c +@STARPU_USE_CUDA0_TRUE@am__append_9 = drivers/cuda/driver_cuda0.c +@STARPU_USE_CUDA0_FALSE@@STARPU_USE_CUDA1_TRUE@am__append_10 = drivers/cuda/driver_cuda1.c +@STARPU_USE_CUDA0_FALSE@@STARPU_USE_CUDA1_FALSE@@STARPU_USE_CUDA_TRUE@am__append_11 = drivers/cuda/driver_cuda.c +@STARPU_SIMGRID_TRUE@@STARPU_USE_CUDA0_FALSE@@STARPU_USE_CUDA1_FALSE@@STARPU_USE_CUDA_FALSE@am__append_12 = drivers/cuda/driver_cuda.c +@STARPU_USE_OPENCL_TRUE@am__append_13 = \ +@STARPU_USE_OPENCL_TRUE@ drivers/opencl/driver_opencl.c \ +@STARPU_USE_OPENCL_TRUE@ drivers/opencl/driver_opencl_utils.c +@STARPU_SIMGRID_TRUE@@STARPU_USE_OPENCL_FALSE@am__append_14 = drivers/opencl/driver_opencl.c +@STARPU_USE_MAX_FPGA_TRUE@am__append_15 = drivers/max/driver_max_fpga.c +@STARPU_LINUX_SYS_TRUE@am__append_16 = core/disk_ops/disk_unistd_o_direct.c +@STARPU_HAVE_HWLOC_TRUE@am__append_17 = \ +@STARPU_HAVE_HWLOC_TRUE@ sched_policies/scheduler_maker.c \ +@STARPU_HAVE_HWLOC_TRUE@ sched_policies/hierarchical_heft.c + +@STARPU_HAVE_HWLOC_TRUE@@STARPU_HAVE_OPENMP_TRUE@@STARPU_HWLOC_HAVE_TOPOLOGY_DUP_TRUE@am__append_18 = parallel_worker/starpu_parallel_worker_create.c + +######################################### +# # +# Generic MP compilation # +# # +######################################### +@STARPU_USE_MP_TRUE@am__append_19 = drivers/mp_common/mp_common.c \ +@STARPU_USE_MP_TRUE@ drivers/mp_common/source_common.c \ +@STARPU_USE_MP_TRUE@ drivers/mp_common/sink_common.c +@STARPU_USE_MPI_MASTER_SLAVE_TRUE@am__append_20 = drivers/mpi/driver_mpi_common.c \ +@STARPU_USE_MPI_MASTER_SLAVE_TRUE@ drivers/mpi/driver_mpi_source.c \ +@STARPU_USE_MPI_MASTER_SLAVE_TRUE@ drivers/mpi/driver_mpi_sink.c +@STARPU_USE_TCPIP_MASTER_SLAVE_TRUE@am__append_21 = drivers/tcpip/driver_tcpip_common.c \ +@STARPU_USE_TCPIP_MASTER_SLAVE_TRUE@ drivers/tcpip/driver_tcpip_source.c \ +@STARPU_USE_TCPIP_MASTER_SLAVE_TRUE@ drivers/tcpip/driver_tcpip_sink.c +subdir = src +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ + $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ + $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ + $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(noinst_HEADERS) \ + $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/src/common/config.h \ + $(top_builddir)/src/common/config-src-build.h \ + $(top_builddir)/include/starpu_config.h \ + $(top_builddir)/starpurm/include/starpurm_config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__installdirs = "$(DESTDIR)$(libdir)" "$(DESTDIR)$(xmldir)" +LTLIBRARIES = $(lib_LTLIBRARIES) +libstarpu_@STARPU_EFFECTIVE_VERSION@_la_LIBADD = +am__libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES_DIST = \ + common/barrier.c common/barrier_counter.c common/hash.c \ + common/rwlock.c common/starpu_spinlock.c common/timing.c \ + common/fxt.c common/utils.c common/thread.c common/rbtree.c \ + common/graph.c common/inlines.c common/knobs.c core/jobs.c \ + core/task.c core/task_bundle.c core/tree.c core/devices.c \ + core/drivers.c core/workers.c core/combined_workers.c \ + core/topology.c core/disk.c core/debug.c core/errorcheck.c \ + core/progress_hook.c core/idle_hook.c core/dependencies/cg.c \ + core/dependencies/dependencies.c \ + core/dependencies/implicit_data_deps.c \ + core/dependencies/tags.c core/dependencies/task_deps.c \ + core/dependencies/data_concurrency.c \ + core/dependencies/data_arbiter_concurrency.c \ + core/disk_ops/disk_stdio.c core/disk_ops/disk_unistd.c \ + core/disk_ops/unistd/disk_unistd_global.c \ + core/perfmodel/perfmodel_history.c \ + core/perfmodel/energy_model.c core/perfmodel/perfmodel_bus.c \ + core/perfmodel/perfmodel.c core/perfmodel/perfmodel_print.c \ + core/perfmodel/perfmodel_nan.c core/perfmodel/regression.c \ + core/perfmodel/multiple_regression.c core/sched_policy.c \ + core/simgrid.c core/simgrid_cpp.cpp core/sched_ctx.c \ + core/sched_ctx_list.c core/parallel_task.c \ + core/detect_combined_workers.c \ + sched_policies/eager_central_policy.c \ + sched_policies/eager_central_priority_policy.c \ + sched_policies/work_stealing_policy.c \ + sched_policies/deque_modeling_policy_data_aware.c \ + sched_policies/random_policy.c sched_policies/fifo_queues.c \ + sched_policies/parallel_heft.c sched_policies/parallel_eager.c \ + sched_policies/heteroprio.c sched_policies/graph_test_policy.c \ + drivers/driver_common/driver_common.c \ + drivers/disk/driver_disk.c datawizard/node_ops.c \ + datawizard/memory_nodes.c datawizard/write_back.c \ + datawizard/coherency.c datawizard/data_request.c \ + datawizard/datawizard.c datawizard/copy_driver.c \ + datawizard/filters.c datawizard/sort_data_handles.c \ + datawizard/malloc.c datawizard/memory_manager.c \ + datawizard/memalloc.c datawizard/memstats.c \ + datawizard/footprint.c datawizard/datastats.c \ + datawizard/user_interactions.c datawizard/reduction.c \ + datawizard/interfaces/data_interface.c \ + datawizard/interfaces/bcsr_interface.c \ + datawizard/interfaces/coo_interface.c \ + datawizard/interfaces/csr_interface.c \ + datawizard/interfaces/vector_filters.c \ + datawizard/interfaces/vector_interface.c \ + datawizard/interfaces/matrix_filters.c \ + datawizard/interfaces/matrix_interface.c \ + datawizard/interfaces/block_filters.c \ + datawizard/interfaces/block_interface.c \ + datawizard/interfaces/tensor_filters.c \ + datawizard/interfaces/tensor_interface.c \ + datawizard/interfaces/ndim_filters.c \ + datawizard/interfaces/ndim_interface.c \ + datawizard/interfaces/bcsr_filters.c \ + datawizard/interfaces/csr_filters.c \ + datawizard/interfaces/variable_interface.c \ + datawizard/interfaces/void_interface.c \ + datawizard/interfaces/multiformat_interface.c \ + util/execute_on_all.c util/starpu_create_sync_task.c \ + util/file.c util/fstarpu.c util/misc.c \ + util/openmp_runtime_support.c \ + util/openmp_runtime_support_environment.c \ + util/openmp_runtime_support_omp_api.c util/starpu_data_cpy.c \ + util/starpu_task_insert.c util/starpu_task_insert_utils.c \ + debug/traces/starpu_fxt.c debug/traces/starpu_fxt_mpi.c \ + debug/traces/starpu_fxt_dag.c debug/traces/starpu_paje.c \ + debug/traces/anim.c debug/latency.c debug/structures_size.c \ + profiling/profiling.c profiling/bound.c \ + profiling/profiling_helpers.c profiling/callbacks.c \ + worker_collection/worker_list.c \ + worker_collection/worker_tree.c \ + sched_policies/component_worker.c \ + sched_policies/component_sched.c \ + sched_policies/component_fifo.c sched_policies/prio_deque.c \ + sched_policies/helper_mct.c sched_policies/component_prio.c \ + sched_policies/component_random.c \ + sched_policies/component_eager.c \ + sched_policies/component_eager_prio.c \ + sched_policies/component_eager_calibration.c \ + sched_policies/component_mct.c sched_policies/component_heft.c \ + sched_policies/component_heteroprio.c \ + sched_policies/component_best_implementation.c \ + sched_policies/component_perfmodel_select.c \ + sched_policies/component_composed.c \ + sched_policies/component_work_stealing.c \ + sched_policies/component_stage.c \ + sched_policies/component_userchoice.c \ + sched_policies/modular_eager.c \ + sched_policies/modular_eager_prio.c \ + sched_policies/modular_eager_prefetching.c \ + sched_policies/modular_gemm.c sched_policies/modular_prio.c \ + sched_policies/modular_prio_prefetching.c \ + sched_policies/modular_random.c \ + sched_policies/modular_parallel_random.c \ + sched_policies/modular_random_prefetching.c \ + sched_policies/modular_parallel_heft.c \ + sched_policies/modular_heft.c \ + sched_policies/modular_heft_prio.c \ + sched_policies/modular_heteroprio.c \ + sched_policies/modular_heteroprio_heft.c \ + sched_policies/modular_heft2.c sched_policies/modular_ws.c \ + sched_policies/modular_ez.c core/disk_ops/disk_leveldb.cpp \ + core/disk_ops/disk_hdf5.c drivers/cpu/driver_cpu.c \ + drivers/hip/driver_hip_init.c drivers/cuda/driver_cuda_init.c \ + drivers/hip/driver_hip.c drivers/hip/starpu_hipblas.c \ + drivers/cuda/driver_cuda0.c drivers/cuda/driver_cuda1.c \ + drivers/cuda/driver_cuda.c drivers/cuda/starpu_cublas.c \ + drivers/cuda/starpu_cublas_v2.c drivers/cuda/starpu_cublasLt.c \ + drivers/cuda/starpu_cusparse.c drivers/cuda/starpu_cusolver.c \ + drivers/opencl/driver_opencl_init.c \ + drivers/opencl/driver_opencl.c \ + drivers/opencl/driver_opencl_utils.c \ + drivers/max/driver_max_fpga_init.c \ + drivers/max/driver_max_fpga.c \ + core/disk_ops/disk_unistd_o_direct.c \ + sched_policies/scheduler_maker.c \ + sched_policies/hierarchical_heft.c \ + parallel_worker/starpu_parallel_worker_create.c \ + drivers/mp_common/mp_common.c \ + drivers/mp_common/source_common.c \ + drivers/mp_common/sink_common.c drivers/mpi/driver_mpi_init.c \ + drivers/mpi/driver_mpi_common.c \ + drivers/mpi/driver_mpi_source.c drivers/mpi/driver_mpi_sink.c \ + drivers/tcpip/driver_tcpip_init.c \ + drivers/tcpip/driver_tcpip_common.c \ + drivers/tcpip/driver_tcpip_source.c \ + drivers/tcpip/driver_tcpip_sink.c +am__dirstamp = $(am__leading_dot)dirstamp +@STARPU_HAVE_LEVELDB_TRUE@am__objects_1 = \ +@STARPU_HAVE_LEVELDB_TRUE@ core/disk_ops/disk_leveldb.lo +@STARPU_HAVE_HDF5_TRUE@am__objects_2 = core/disk_ops/disk_hdf5.lo +@STARPU_USE_HIP_TRUE@am__objects_3 = drivers/hip/driver_hip.lo +@STARPU_USE_CUDA0_TRUE@am__objects_4 = drivers/cuda/driver_cuda0.lo +@STARPU_USE_CUDA0_FALSE@@STARPU_USE_CUDA1_TRUE@am__objects_5 = drivers/cuda/driver_cuda1.lo +@STARPU_USE_CUDA0_FALSE@@STARPU_USE_CUDA1_FALSE@@STARPU_USE_CUDA_TRUE@am__objects_6 = drivers/cuda/driver_cuda.lo +@STARPU_SIMGRID_TRUE@@STARPU_USE_CUDA0_FALSE@@STARPU_USE_CUDA1_FALSE@@STARPU_USE_CUDA_FALSE@am__objects_7 = drivers/cuda/driver_cuda.lo +@STARPU_USE_OPENCL_TRUE@am__objects_8 = \ +@STARPU_USE_OPENCL_TRUE@ drivers/opencl/driver_opencl.lo \ +@STARPU_USE_OPENCL_TRUE@ drivers/opencl/driver_opencl_utils.lo +@STARPU_SIMGRID_TRUE@@STARPU_USE_OPENCL_FALSE@am__objects_9 = drivers/opencl/driver_opencl.lo +@STARPU_USE_MAX_FPGA_TRUE@am__objects_10 = \ +@STARPU_USE_MAX_FPGA_TRUE@ drivers/max/driver_max_fpga.lo +@STARPU_LINUX_SYS_TRUE@am__objects_11 = \ +@STARPU_LINUX_SYS_TRUE@ core/disk_ops/disk_unistd_o_direct.lo +@STARPU_HAVE_HWLOC_TRUE@am__objects_12 = \ +@STARPU_HAVE_HWLOC_TRUE@ sched_policies/scheduler_maker.lo \ +@STARPU_HAVE_HWLOC_TRUE@ sched_policies/hierarchical_heft.lo +@STARPU_HAVE_HWLOC_TRUE@@STARPU_HAVE_OPENMP_TRUE@@STARPU_HWLOC_HAVE_TOPOLOGY_DUP_TRUE@am__objects_13 = parallel_worker/starpu_parallel_worker_create.lo +@STARPU_USE_MP_TRUE@am__objects_14 = drivers/mp_common/mp_common.lo \ +@STARPU_USE_MP_TRUE@ drivers/mp_common/source_common.lo \ +@STARPU_USE_MP_TRUE@ drivers/mp_common/sink_common.lo +@STARPU_USE_MPI_MASTER_SLAVE_TRUE@am__objects_15 = drivers/mpi/driver_mpi_common.lo \ +@STARPU_USE_MPI_MASTER_SLAVE_TRUE@ drivers/mpi/driver_mpi_source.lo \ +@STARPU_USE_MPI_MASTER_SLAVE_TRUE@ drivers/mpi/driver_mpi_sink.lo +@STARPU_USE_TCPIP_MASTER_SLAVE_TRUE@am__objects_16 = drivers/tcpip/driver_tcpip_common.lo \ +@STARPU_USE_TCPIP_MASTER_SLAVE_TRUE@ drivers/tcpip/driver_tcpip_source.lo \ +@STARPU_USE_TCPIP_MASTER_SLAVE_TRUE@ drivers/tcpip/driver_tcpip_sink.lo +am_libstarpu_@STARPU_EFFECTIVE_VERSION@_la_OBJECTS = \ + common/barrier.lo common/barrier_counter.lo common/hash.lo \ + common/rwlock.lo common/starpu_spinlock.lo common/timing.lo \ + common/fxt.lo common/utils.lo common/thread.lo \ + common/rbtree.lo common/graph.lo common/inlines.lo \ + common/knobs.lo core/jobs.lo core/task.lo core/task_bundle.lo \ + core/tree.lo core/devices.lo core/drivers.lo core/workers.lo \ + core/combined_workers.lo core/topology.lo core/disk.lo \ + core/debug.lo core/errorcheck.lo core/progress_hook.lo \ + core/idle_hook.lo core/dependencies/cg.lo \ + core/dependencies/dependencies.lo \ + core/dependencies/implicit_data_deps.lo \ + core/dependencies/tags.lo core/dependencies/task_deps.lo \ + core/dependencies/data_concurrency.lo \ + core/dependencies/data_arbiter_concurrency.lo \ + core/disk_ops/disk_stdio.lo core/disk_ops/disk_unistd.lo \ + core/disk_ops/unistd/disk_unistd_global.lo \ + core/perfmodel/perfmodel_history.lo \ + core/perfmodel/energy_model.lo core/perfmodel/perfmodel_bus.lo \ + core/perfmodel/perfmodel.lo core/perfmodel/perfmodel_print.lo \ + core/perfmodel/perfmodel_nan.lo core/perfmodel/regression.lo \ + core/perfmodel/multiple_regression.lo core/sched_policy.lo \ + core/simgrid.lo core/simgrid_cpp.lo core/sched_ctx.lo \ + core/sched_ctx_list.lo core/parallel_task.lo \ + core/detect_combined_workers.lo \ + sched_policies/eager_central_policy.lo \ + sched_policies/eager_central_priority_policy.lo \ + sched_policies/work_stealing_policy.lo \ + sched_policies/deque_modeling_policy_data_aware.lo \ + sched_policies/random_policy.lo sched_policies/fifo_queues.lo \ + sched_policies/parallel_heft.lo \ + sched_policies/parallel_eager.lo sched_policies/heteroprio.lo \ + sched_policies/graph_test_policy.lo \ + drivers/driver_common/driver_common.lo \ + drivers/disk/driver_disk.lo datawizard/node_ops.lo \ + datawizard/memory_nodes.lo datawizard/write_back.lo \ + datawizard/coherency.lo datawizard/data_request.lo \ + datawizard/datawizard.lo datawizard/copy_driver.lo \ + datawizard/filters.lo datawizard/sort_data_handles.lo \ + datawizard/malloc.lo datawizard/memory_manager.lo \ + datawizard/memalloc.lo datawizard/memstats.lo \ + datawizard/footprint.lo datawizard/datastats.lo \ + datawizard/user_interactions.lo datawizard/reduction.lo \ + datawizard/interfaces/data_interface.lo \ + datawizard/interfaces/bcsr_interface.lo \ + datawizard/interfaces/coo_interface.lo \ + datawizard/interfaces/csr_interface.lo \ + datawizard/interfaces/vector_filters.lo \ + datawizard/interfaces/vector_interface.lo \ + datawizard/interfaces/matrix_filters.lo \ + datawizard/interfaces/matrix_interface.lo \ + datawizard/interfaces/block_filters.lo \ + datawizard/interfaces/block_interface.lo \ + datawizard/interfaces/tensor_filters.lo \ + datawizard/interfaces/tensor_interface.lo \ + datawizard/interfaces/ndim_filters.lo \ + datawizard/interfaces/ndim_interface.lo \ + datawizard/interfaces/bcsr_filters.lo \ + datawizard/interfaces/csr_filters.lo \ + datawizard/interfaces/variable_interface.lo \ + datawizard/interfaces/void_interface.lo \ + datawizard/interfaces/multiformat_interface.lo \ + util/execute_on_all.lo util/starpu_create_sync_task.lo \ + util/file.lo util/fstarpu.lo util/misc.lo \ + util/openmp_runtime_support.lo \ + util/openmp_runtime_support_environment.lo \ + util/openmp_runtime_support_omp_api.lo util/starpu_data_cpy.lo \ + util/starpu_task_insert.lo util/starpu_task_insert_utils.lo \ + debug/traces/starpu_fxt.lo debug/traces/starpu_fxt_mpi.lo \ + debug/traces/starpu_fxt_dag.lo debug/traces/starpu_paje.lo \ + debug/traces/anim.lo debug/latency.lo debug/structures_size.lo \ + profiling/profiling.lo profiling/bound.lo \ + profiling/profiling_helpers.lo profiling/callbacks.lo \ + worker_collection/worker_list.lo \ + worker_collection/worker_tree.lo \ + sched_policies/component_worker.lo \ + sched_policies/component_sched.lo \ + sched_policies/component_fifo.lo sched_policies/prio_deque.lo \ + sched_policies/helper_mct.lo sched_policies/component_prio.lo \ + sched_policies/component_random.lo \ + sched_policies/component_eager.lo \ + sched_policies/component_eager_prio.lo \ + sched_policies/component_eager_calibration.lo \ + sched_policies/component_mct.lo \ + sched_policies/component_heft.lo \ + sched_policies/component_heteroprio.lo \ + sched_policies/component_best_implementation.lo \ + sched_policies/component_perfmodel_select.lo \ + sched_policies/component_composed.lo \ + sched_policies/component_work_stealing.lo \ + sched_policies/component_stage.lo \ + sched_policies/component_userchoice.lo \ + sched_policies/modular_eager.lo \ + sched_policies/modular_eager_prio.lo \ + sched_policies/modular_eager_prefetching.lo \ + sched_policies/modular_gemm.lo sched_policies/modular_prio.lo \ + sched_policies/modular_prio_prefetching.lo \ + sched_policies/modular_random.lo \ + sched_policies/modular_parallel_random.lo \ + sched_policies/modular_random_prefetching.lo \ + sched_policies/modular_parallel_heft.lo \ + sched_policies/modular_heft.lo \ + sched_policies/modular_heft_prio.lo \ + sched_policies/modular_heteroprio.lo \ + sched_policies/modular_heteroprio_heft.lo \ + sched_policies/modular_heft2.lo sched_policies/modular_ws.lo \ + sched_policies/modular_ez.lo $(am__objects_1) $(am__objects_2) \ + drivers/cpu/driver_cpu.lo drivers/hip/driver_hip_init.lo \ + drivers/cuda/driver_cuda_init.lo $(am__objects_3) \ + drivers/hip/starpu_hipblas.lo $(am__objects_4) \ + $(am__objects_5) $(am__objects_6) $(am__objects_7) \ + drivers/cuda/starpu_cublas.lo drivers/cuda/starpu_cublas_v2.lo \ + drivers/cuda/starpu_cublasLt.lo \ + drivers/cuda/starpu_cusparse.lo \ + drivers/cuda/starpu_cusolver.lo \ + drivers/opencl/driver_opencl_init.lo $(am__objects_8) \ + $(am__objects_9) drivers/max/driver_max_fpga_init.lo \ + $(am__objects_10) $(am__objects_11) $(am__objects_12) \ + $(am__objects_13) $(am__objects_14) \ + drivers/mpi/driver_mpi_init.lo $(am__objects_15) \ + drivers/tcpip/driver_tcpip_init.lo $(am__objects_16) +libstarpu_@STARPU_EFFECTIVE_VERSION@_la_OBJECTS = \ + $(am_libstarpu_@STARPU_EFFECTIVE_VERSION@_la_OBJECTS) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +libstarpu_@STARPU_EFFECTIVE_VERSION@_la_LINK = $(LIBTOOL) $(AM_V_lt) \ + --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link \ + $(CXXLD) $(AM_CXXFLAGS) $(CXXFLAGS) \ + $(libstarpu_@STARPU_EFFECTIVE_VERSION@_la_LDFLAGS) $(LDFLAGS) \ + -o $@ +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)/src/common -I$(top_builddir)/include -I$(top_builddir)/starpurm/include +depcomp = $(SHELL) $(top_srcdir)/build-aux/depcomp +am__maybe_remake_depfiles = depfiles +am__depfiles_remade = common/$(DEPDIR)/barrier.Plo \ + common/$(DEPDIR)/barrier_counter.Plo common/$(DEPDIR)/fxt.Plo \ + common/$(DEPDIR)/graph.Plo common/$(DEPDIR)/hash.Plo \ + common/$(DEPDIR)/inlines.Plo common/$(DEPDIR)/knobs.Plo \ + common/$(DEPDIR)/rbtree.Plo common/$(DEPDIR)/rwlock.Plo \ + common/$(DEPDIR)/starpu_spinlock.Plo \ + common/$(DEPDIR)/thread.Plo common/$(DEPDIR)/timing.Plo \ + common/$(DEPDIR)/utils.Plo core/$(DEPDIR)/combined_workers.Plo \ + core/$(DEPDIR)/debug.Plo \ + core/$(DEPDIR)/detect_combined_workers.Plo \ + core/$(DEPDIR)/devices.Plo core/$(DEPDIR)/disk.Plo \ + core/$(DEPDIR)/drivers.Plo core/$(DEPDIR)/errorcheck.Plo \ + core/$(DEPDIR)/idle_hook.Plo core/$(DEPDIR)/jobs.Plo \ + core/$(DEPDIR)/parallel_task.Plo \ + core/$(DEPDIR)/progress_hook.Plo core/$(DEPDIR)/sched_ctx.Plo \ + core/$(DEPDIR)/sched_ctx_list.Plo \ + core/$(DEPDIR)/sched_policy.Plo core/$(DEPDIR)/simgrid.Plo \ + core/$(DEPDIR)/simgrid_cpp.Plo core/$(DEPDIR)/task.Plo \ + core/$(DEPDIR)/task_bundle.Plo core/$(DEPDIR)/topology.Plo \ + core/$(DEPDIR)/tree.Plo core/$(DEPDIR)/workers.Plo \ + core/dependencies/$(DEPDIR)/cg.Plo \ + core/dependencies/$(DEPDIR)/data_arbiter_concurrency.Plo \ + core/dependencies/$(DEPDIR)/data_concurrency.Plo \ + core/dependencies/$(DEPDIR)/dependencies.Plo \ + core/dependencies/$(DEPDIR)/implicit_data_deps.Plo \ + core/dependencies/$(DEPDIR)/tags.Plo \ + core/dependencies/$(DEPDIR)/task_deps.Plo \ + core/disk_ops/$(DEPDIR)/disk_hdf5.Plo \ + core/disk_ops/$(DEPDIR)/disk_leveldb.Plo \ + core/disk_ops/$(DEPDIR)/disk_stdio.Plo \ + core/disk_ops/$(DEPDIR)/disk_unistd.Plo \ + core/disk_ops/$(DEPDIR)/disk_unistd_o_direct.Plo \ + core/disk_ops/unistd/$(DEPDIR)/disk_unistd_global.Plo \ + core/perfmodel/$(DEPDIR)/energy_model.Plo \ + core/perfmodel/$(DEPDIR)/multiple_regression.Plo \ + core/perfmodel/$(DEPDIR)/perfmodel.Plo \ + core/perfmodel/$(DEPDIR)/perfmodel_bus.Plo \ + core/perfmodel/$(DEPDIR)/perfmodel_history.Plo \ + core/perfmodel/$(DEPDIR)/perfmodel_nan.Plo \ + core/perfmodel/$(DEPDIR)/perfmodel_print.Plo \ + core/perfmodel/$(DEPDIR)/regression.Plo \ + datawizard/$(DEPDIR)/coherency.Plo \ + datawizard/$(DEPDIR)/copy_driver.Plo \ + datawizard/$(DEPDIR)/data_request.Plo \ + datawizard/$(DEPDIR)/datastats.Plo \ + datawizard/$(DEPDIR)/datawizard.Plo \ + datawizard/$(DEPDIR)/filters.Plo \ + datawizard/$(DEPDIR)/footprint.Plo \ + datawizard/$(DEPDIR)/malloc.Plo \ + datawizard/$(DEPDIR)/memalloc.Plo \ + datawizard/$(DEPDIR)/memory_manager.Plo \ + datawizard/$(DEPDIR)/memory_nodes.Plo \ + datawizard/$(DEPDIR)/memstats.Plo \ + datawizard/$(DEPDIR)/node_ops.Plo \ + datawizard/$(DEPDIR)/reduction.Plo \ + datawizard/$(DEPDIR)/sort_data_handles.Plo \ + datawizard/$(DEPDIR)/user_interactions.Plo \ + datawizard/$(DEPDIR)/write_back.Plo \ + datawizard/interfaces/$(DEPDIR)/bcsr_filters.Plo \ + datawizard/interfaces/$(DEPDIR)/bcsr_interface.Plo \ + datawizard/interfaces/$(DEPDIR)/block_filters.Plo \ + datawizard/interfaces/$(DEPDIR)/block_interface.Plo \ + datawizard/interfaces/$(DEPDIR)/coo_interface.Plo \ + datawizard/interfaces/$(DEPDIR)/csr_filters.Plo \ + datawizard/interfaces/$(DEPDIR)/csr_interface.Plo \ + datawizard/interfaces/$(DEPDIR)/data_interface.Plo \ + datawizard/interfaces/$(DEPDIR)/matrix_filters.Plo \ + datawizard/interfaces/$(DEPDIR)/matrix_interface.Plo \ + datawizard/interfaces/$(DEPDIR)/multiformat_interface.Plo \ + datawizard/interfaces/$(DEPDIR)/ndim_filters.Plo \ + datawizard/interfaces/$(DEPDIR)/ndim_interface.Plo \ + datawizard/interfaces/$(DEPDIR)/tensor_filters.Plo \ + datawizard/interfaces/$(DEPDIR)/tensor_interface.Plo \ + datawizard/interfaces/$(DEPDIR)/variable_interface.Plo \ + datawizard/interfaces/$(DEPDIR)/vector_filters.Plo \ + datawizard/interfaces/$(DEPDIR)/vector_interface.Plo \ + datawizard/interfaces/$(DEPDIR)/void_interface.Plo \ + debug/$(DEPDIR)/latency.Plo \ + debug/$(DEPDIR)/structures_size.Plo \ + debug/traces/$(DEPDIR)/anim.Plo \ + debug/traces/$(DEPDIR)/starpu_fxt.Plo \ + debug/traces/$(DEPDIR)/starpu_fxt_dag.Plo \ + debug/traces/$(DEPDIR)/starpu_fxt_mpi.Plo \ + debug/traces/$(DEPDIR)/starpu_paje.Plo \ + drivers/cpu/$(DEPDIR)/driver_cpu.Plo \ + drivers/cuda/$(DEPDIR)/driver_cuda.Plo \ + drivers/cuda/$(DEPDIR)/driver_cuda0.Plo \ + drivers/cuda/$(DEPDIR)/driver_cuda1.Plo \ + drivers/cuda/$(DEPDIR)/driver_cuda_init.Plo \ + drivers/cuda/$(DEPDIR)/starpu_cublas.Plo \ + drivers/cuda/$(DEPDIR)/starpu_cublasLt.Plo \ + drivers/cuda/$(DEPDIR)/starpu_cublas_v2.Plo \ + drivers/cuda/$(DEPDIR)/starpu_cusolver.Plo \ + drivers/cuda/$(DEPDIR)/starpu_cusparse.Plo \ + drivers/disk/$(DEPDIR)/driver_disk.Plo \ + drivers/driver_common/$(DEPDIR)/driver_common.Plo \ + drivers/hip/$(DEPDIR)/driver_hip.Plo \ + drivers/hip/$(DEPDIR)/driver_hip_init.Plo \ + drivers/hip/$(DEPDIR)/starpu_hipblas.Plo \ + drivers/max/$(DEPDIR)/driver_max_fpga.Plo \ + drivers/max/$(DEPDIR)/driver_max_fpga_init.Plo \ + drivers/mp_common/$(DEPDIR)/mp_common.Plo \ + drivers/mp_common/$(DEPDIR)/sink_common.Plo \ + drivers/mp_common/$(DEPDIR)/source_common.Plo \ + drivers/mpi/$(DEPDIR)/driver_mpi_common.Plo \ + drivers/mpi/$(DEPDIR)/driver_mpi_init.Plo \ + drivers/mpi/$(DEPDIR)/driver_mpi_sink.Plo \ + drivers/mpi/$(DEPDIR)/driver_mpi_source.Plo \ + drivers/opencl/$(DEPDIR)/driver_opencl.Plo \ + drivers/opencl/$(DEPDIR)/driver_opencl_init.Plo \ + drivers/opencl/$(DEPDIR)/driver_opencl_utils.Plo \ + drivers/tcpip/$(DEPDIR)/driver_tcpip_common.Plo \ + drivers/tcpip/$(DEPDIR)/driver_tcpip_init.Plo \ + drivers/tcpip/$(DEPDIR)/driver_tcpip_sink.Plo \ + drivers/tcpip/$(DEPDIR)/driver_tcpip_source.Plo \ + parallel_worker/$(DEPDIR)/starpu_parallel_worker_create.Plo \ + profiling/$(DEPDIR)/bound.Plo \ + profiling/$(DEPDIR)/callbacks.Plo \ + profiling/$(DEPDIR)/profiling.Plo \ + profiling/$(DEPDIR)/profiling_helpers.Plo \ + sched_policies/$(DEPDIR)/component_best_implementation.Plo \ + sched_policies/$(DEPDIR)/component_composed.Plo \ + sched_policies/$(DEPDIR)/component_eager.Plo \ + sched_policies/$(DEPDIR)/component_eager_calibration.Plo \ + sched_policies/$(DEPDIR)/component_eager_prio.Plo \ + sched_policies/$(DEPDIR)/component_fifo.Plo \ + sched_policies/$(DEPDIR)/component_heft.Plo \ + sched_policies/$(DEPDIR)/component_heteroprio.Plo \ + sched_policies/$(DEPDIR)/component_mct.Plo \ + sched_policies/$(DEPDIR)/component_perfmodel_select.Plo \ + sched_policies/$(DEPDIR)/component_prio.Plo \ + sched_policies/$(DEPDIR)/component_random.Plo \ + sched_policies/$(DEPDIR)/component_sched.Plo \ + sched_policies/$(DEPDIR)/component_stage.Plo \ + sched_policies/$(DEPDIR)/component_userchoice.Plo \ + sched_policies/$(DEPDIR)/component_work_stealing.Plo \ + sched_policies/$(DEPDIR)/component_worker.Plo \ + sched_policies/$(DEPDIR)/deque_modeling_policy_data_aware.Plo \ + sched_policies/$(DEPDIR)/eager_central_policy.Plo \ + sched_policies/$(DEPDIR)/eager_central_priority_policy.Plo \ + sched_policies/$(DEPDIR)/fifo_queues.Plo \ + sched_policies/$(DEPDIR)/graph_test_policy.Plo \ + sched_policies/$(DEPDIR)/helper_mct.Plo \ + sched_policies/$(DEPDIR)/heteroprio.Plo \ + sched_policies/$(DEPDIR)/hierarchical_heft.Plo \ + sched_policies/$(DEPDIR)/modular_eager.Plo \ + sched_policies/$(DEPDIR)/modular_eager_prefetching.Plo \ + sched_policies/$(DEPDIR)/modular_eager_prio.Plo \ + sched_policies/$(DEPDIR)/modular_ez.Plo \ + sched_policies/$(DEPDIR)/modular_gemm.Plo \ + sched_policies/$(DEPDIR)/modular_heft.Plo \ + sched_policies/$(DEPDIR)/modular_heft2.Plo \ + sched_policies/$(DEPDIR)/modular_heft_prio.Plo \ + sched_policies/$(DEPDIR)/modular_heteroprio.Plo \ + sched_policies/$(DEPDIR)/modular_heteroprio_heft.Plo \ + sched_policies/$(DEPDIR)/modular_parallel_heft.Plo \ + sched_policies/$(DEPDIR)/modular_parallel_random.Plo \ + sched_policies/$(DEPDIR)/modular_prio.Plo \ + sched_policies/$(DEPDIR)/modular_prio_prefetching.Plo \ + sched_policies/$(DEPDIR)/modular_random.Plo \ + sched_policies/$(DEPDIR)/modular_random_prefetching.Plo \ + sched_policies/$(DEPDIR)/modular_ws.Plo \ + sched_policies/$(DEPDIR)/parallel_eager.Plo \ + sched_policies/$(DEPDIR)/parallel_heft.Plo \ + sched_policies/$(DEPDIR)/prio_deque.Plo \ + sched_policies/$(DEPDIR)/random_policy.Plo \ + sched_policies/$(DEPDIR)/scheduler_maker.Plo \ + sched_policies/$(DEPDIR)/work_stealing_policy.Plo \ + util/$(DEPDIR)/execute_on_all.Plo util/$(DEPDIR)/file.Plo \ + util/$(DEPDIR)/fstarpu.Plo util/$(DEPDIR)/misc.Plo \ + util/$(DEPDIR)/openmp_runtime_support.Plo \ + util/$(DEPDIR)/openmp_runtime_support_environment.Plo \ + util/$(DEPDIR)/openmp_runtime_support_omp_api.Plo \ + util/$(DEPDIR)/starpu_create_sync_task.Plo \ + util/$(DEPDIR)/starpu_data_cpy.Plo \ + util/$(DEPDIR)/starpu_task_insert.Plo \ + util/$(DEPDIR)/starpu_task_insert_utils.Plo \ + worker_collection/$(DEPDIR)/worker_list.Plo \ + worker_collection/$(DEPDIR)/worker_tree.Plo +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +CXXCOMPILE = $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) +LTCXXCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CXXFLAGS) $(CXXFLAGS) +AM_V_CXX = $(am__v_CXX_@AM_V@) +am__v_CXX_ = $(am__v_CXX_@AM_DEFAULT_V@) +am__v_CXX_0 = @echo " CXX " $@; +am__v_CXX_1 = +CXXLD = $(CXX) +CXXLINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CXXLD) $(AM_CXXFLAGS) \ + $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CXXLD = $(am__v_CXXLD_@AM_V@) +am__v_CXXLD_ = $(am__v_CXXLD_@AM_DEFAULT_V@) +am__v_CXXLD_0 = @echo " CXXLD " $@; +am__v_CXXLD_1 = +SOURCES = $(libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES) +DIST_SOURCES = \ + $(am__libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES_DIST) +RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \ + ctags-recursive dvi-recursive html-recursive info-recursive \ + install-data-recursive install-dvi-recursive \ + install-exec-recursive install-html-recursive \ + install-info-recursive install-pdf-recursive \ + install-ps-recursive install-recursive installcheck-recursive \ + installdirs-recursive pdf-recursive ps-recursive \ + tags-recursive uninstall-recursive +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +DATA = $(xml_DATA) +HEADERS = $(noinst_HEADERS) +RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ + distclean-recursive maintainer-clean-recursive +am__recursive_targets = \ + $(RECURSIVE_TARGETS) \ + $(RECURSIVE_CLEAN_TARGETS) \ + $(am__extra_recursive_targets) +AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \ + distdir distdir-am +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +DIST_SUBDIRS = $(SUBDIRS) +am__DIST_COMMON = $(srcdir)/Makefile.in \ + $(top_srcdir)/build-aux/depcomp \ + $(top_srcdir)/make/starpu-notests.mk \ + $(top_srcdir)/make/starpu.mk +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +am__relativize = \ + dir0=`pwd`; \ + sed_first='s,^\([^/]*\)/.*$$,\1,'; \ + sed_rest='s,^[^/]*/*,,'; \ + sed_last='s,^.*/\([^/]*\)$$,\1,'; \ + sed_butlast='s,/*[^/]*$$,,'; \ + while test -n "$$dir1"; do \ + first=`echo "$$dir1" | sed -e "$$sed_first"`; \ + if test "$$first" != "."; then \ + if test "$$first" = ".."; then \ + dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ + dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ + else \ + first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ + if test "$$first2" = "$$first"; then \ + dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ + else \ + dir2="../$$dir2"; \ + fi; \ + dir0="$$dir0"/"$$first"; \ + fi; \ + fi; \ + dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ + done; \ + reldir="$$dir2" +pkglibdir = @pkglibdir@ +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +APP_CFLAGS = @APP_CFLAGS@ +APP_CXXFLAGS = @APP_CXXFLAGS@ +APP_FCFLAGS = @APP_FCFLAGS@ +APP_FFLAGS = @APP_FFLAGS@ +AR = @AR@ +AS = @AS@ +ATLASDIR = @ATLASDIR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BLAS_LIB = @BLAS_LIB@ +BLAS_LIBS = @BLAS_LIBS@ +BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ +BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CC_OR_MPICC = @CC_OR_MPICC@ +CC_OR_NVCC = @CC_OR_NVCC@ +CFLAGS = @CFLAGS@ +COVERAGE = @COVERAGE@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CSCOPE = @CSCOPE@ +CTAGS = @CTAGS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DGELS_LIBS = @DGELS_LIBS@ +DLB_CFLAGS = @DLB_CFLAGS@ +DLB_LIBS = @DLB_LIBS@ +DLLTOOL = @DLLTOOL@ +DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +ECLIPSE = @ECLIPSE@ +EGREP = @EGREP@ +ETAGS = @ETAGS@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FC = @FC@ +FCFLAGS = @FCFLAGS@ +FFLAGS = @FFLAGS@ +FFTWF_CFLAGS = @FFTWF_CFLAGS@ +FFTWF_LIBS = @FFTWF_LIBS@ +FFTWL_CFLAGS = @FFTWL_CFLAGS@ +FFTWL_LIBS = @FFTWL_LIBS@ +FFTW_CFLAGS = @FFTW_CFLAGS@ +FFTW_LIBS = @FFTW_LIBS@ +FGREP = @FGREP@ +FILECMD = @FILECMD@ +FXTDIR = @FXTDIR@ +FXT_CFLAGS = @FXT_CFLAGS@ +FXT_LDFLAGS = @FXT_LDFLAGS@ +FXT_LIBS = @FXT_LIBS@ +GDB = @GDB@ +GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ +GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ +GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ +GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ +GOTODIR = @GOTODIR@ +GREP = @GREP@ +HAVE_CXX11 = @HAVE_CXX11@ +HAVE_FFTWFL = @HAVE_FFTWFL@ +HELP2MAN = @HELP2MAN@ +HIPCC = @HIPCC@ +HIPCCFLAGS = @HIPCCFLAGS@ $(am__append_2) +HIPCONFIG = @HIPCONFIG@ +HWLOC_CFLAGS = @HWLOC_CFLAGS@ +HWLOC_LIBS = @HWLOC_LIBS@ +HWLOC_REQUIRES = @HWLOC_REQUIRES@ +ICC = @ICC@ +ICC_ARGS = @ICC_ARGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JULIA = @JULIA@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ -lm $(LIBSTARPU_LDFLAGS) $(OPENMP_CFLAGS) \ + $(am__append_3) +LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ +LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ +LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ +LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ +LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ +LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ +LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ +LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ +LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ +LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ +LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ +LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ +LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ +LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ +LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ +LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ +LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ +LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ +LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ +LIBSTARPU_LINK = @LIBSTARPU_LINK@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAGMA_CFLAGS = @MAGMA_CFLAGS@ +MAGMA_LIBS = @MAGMA_LIBS@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPICC_LDFLAGS = @MPICC_LDFLAGS@ +MPICXX = @MPICXX@ +MPIEXEC = @MPIEXEC@ +MPIEXEC_ARGS = @MPIEXEC_ARGS@ +MPIFORT = @MPIFORT@ +MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ +MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ +NM = @NM@ +NMAD_CFLAGS = @NMAD_CFLAGS@ +NMAD_LIBS = @NMAD_LIBS@ +NMEDIT = @NMEDIT@ +NVCC = @NVCC@ +NVCCFLAGS = @NVCCFLAGS@ $(am__append_1) +NVCC_CC = @NVCC_CC@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ +OPENBLAS_LIBS = @OPENBLAS_LIBS@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PAPI_CFLAGS = @PAPI_CFLAGS@ +PAPI_LIBS = @PAPI_LIBS@ +PARALLEL = @PARALLEL@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PKG_CONFIG = @PKG_CONFIG@ +POTI_CFLAGS = @POTI_CFLAGS@ +POTI_LIBS = @POTI_LIBS@ +PROG_CLANG = @PROG_CLANG@ +PROG_DATE = @PROG_DATE@ +PROG_FIND = @PROG_FIND@ +PROG_STAT = @PROG_STAT@ +PYTHON = @PYTHON@ +PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ +PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ +PYTHON_VERSION = @PYTHON_VERSION@ +RANLIB = @RANLIB@ +REALBASH = @REALBASH@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ +SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ +SIMGRID_LIBS = @SIMGRID_LIBS@ +SIMGRID_MC = @SIMGRID_MC@ +SLIC_CONFIG = @SLIC_CONFIG@ +SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ +SOCL_VENDORS = @SOCL_VENDORS@ +STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ +STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ +STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ +STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ +STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ +STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ +STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ +STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ +STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ +STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ +STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ +STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ +STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ +STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ +STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ +STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ +STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ +STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ +STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ +STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ +STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ +STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ +STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ +STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ +STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ +STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ +STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ +STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ +STARPU_LIB_PATH = @STARPU_LIB_PATH@ +STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ +STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ +STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ +STARPU_MS_LIB = @STARPU_MS_LIB@ +STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ +STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ +STARPU_OPENBLAS = @STARPU_OPENBLAS@ +STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ +STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ +STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ +STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ +STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ +STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ +STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ +STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ +STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ +STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ +STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ +STARPU_SRC_DIR = @STARPU_SRC_DIR@ +STARPU_USE_CPU = @STARPU_USE_CPU@ +STARPU_USE_CUDA = @STARPU_USE_CUDA@ +STARPU_USE_FXT = @STARPU_USE_FXT@ +STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ +STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ +STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ +STRIP = @STRIP@ +VERSION = @VERSION@ +XMKMF = @XMKMF@ +X_CFLAGS = @X_CFLAGS@ +X_EXTRA_LIBS = @X_EXTRA_LIBS@ +X_LIBS = @X_LIBS@ +X_PRE_LIBS = @X_PRE_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_ct_F77 = @ac_ct_F77@ +ac_ct_FC = @ac_ct_FC@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +doxygencommand = @doxygencommand@ +dvidir = @dvidir@ +eclipsepath = @eclipsepath@ +epstopdfcommand = @epstopdfcommand@ +exec_prefix = @exec_prefix@ +gitcommand = @gitcommand@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +hwloccalccommand = @hwloccalccommand@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +juliapath = @juliapath@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +mpicc_path = @mpicc_path@ +mpicxx_path = @mpicxx_path@ +mpiexec_path = @mpiexec_path@ +mpifort_path = @mpifort_path@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +pdflatexcommand = @pdflatexcommand@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +runstatedir = @runstatedir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target = @target@ +target_alias = @target_alias@ +target_cpu = @target_cpu@ +target_os = @target_os@ +target_vendor = @target_vendor@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +AM_CFLAGS = $(GLOBAL_AM_CFLAGS) +AM_CXXFLAGS = $(GLOBAL_AM_CXXFLAGS) +AM_FFLAGS = $(GLOBAL_AM_FFLAGS) +AM_FCFLAGS = $(GLOBAL_AM_FCFLAGS) +@STARPU_USE_CUDA_TRUE@V_nvcc_ = $(V_nvcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_USE_CUDA_TRUE@V_nvcc_0 = @echo " NVCC " $@; +@STARPU_USE_CUDA_TRUE@V_nvcc_1 = +@STARPU_USE_CUDA_TRUE@V_nvcc = $(V_nvcc_$(V)) + +# Avoid using nvcc when making a coverity build, nvcc produces millions of +# lines of code which we don't want to analyze. Instead, build dumb .o files +# containing empty functions. +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_ = $(V_mynvcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_0 = @echo " myNVCC " $@; +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_1 = +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc = $(V_mynvcc_$(V)) +@STARPU_USE_HIP_TRUE@V_hipcc_ = $(V_hipcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_USE_HIP_TRUE@V_hipcc_0 = @echo " HIPCC " $@; +@STARPU_USE_HIP_TRUE@V_hipcc_1 = +@STARPU_USE_HIP_TRUE@V_hipcc = $(V_hipcc_$(V)) +V_icc_ = $(V_icc_$(AM_DEFAULT_VERBOSITY)) +V_icc_0 = @echo " ICC " $@; +V_icc_1 = +V_icc = $(V_icc_$(V)) +V_ln_ = $(V_ln_$(AM_DEFAULT_VERBOSITY)) +V_ln_0 = @echo " LN " $@; +V_ln_1 = +V_ln = $(V_ln_$(V)) +V_help2man_ = $(V_help2man_$(AM_DEFAULT_VERBOSITY)) +V_help2man_0 = @echo " HELP2MAN" $@; +V_help2man_1 = +V_help2man = $(V_help2man_$(V)) +AM_CPPFLAGS = -I$(top_srcdir)/include/ -DBUILDING_STARPU \ + -DSTARPU_DATADIR='"$(datadir)"' $(STARPU_H_CPPFLAGS) \ + $(OPENMP_CFLAGS) $(FXT_CFLAGS) +SUBDIRS = +CLEANFILES = *.gcno *.gcda *.linkinfo +EXTRA_DIST = dolib.c core/perfmodel/starpu-perfmodel.dtd +xml_DATA = $(srcdir)/core/perfmodel/starpu-perfmodel.dtd +xmldir = $(pkgdatadir) +ldflags = $(am__append_4) +libstarpu_so_version = $(LIBSTARPU_INTERFACE_CURRENT):$(LIBSTARPU_INTERFACE_REVISION):$(LIBSTARPU_INTERFACE_AGE) +@STARPU_HAVE_WINDOWS_TRUE@LC_MESSAGES = C +lib_LTLIBRARIES = libstarpu-@STARPU_EFFECTIVE_VERSION@.la +libstarpu_@STARPU_EFFECTIVE_VERSION@_la_LDFLAGS = $(ldflags) \ + -no-undefined -version-info $(libstarpu_so_version) \ + $(am__append_5) +noinst_HEADERS = \ + core/dependencies/data_concurrency.h \ + core/dependencies/cg.h \ + core/dependencies/tags.h \ + core/dependencies/implicit_data_deps.h \ + core/disk.h \ + core/disk_ops/unistd/disk_unistd_global.h \ + core/progress_hook.h \ + core/idle_hook.h \ + core/sched_policy.h \ + core/sched_ctx.h \ + core/sched_ctx_list.h \ + core/perfmodel/perfmodel.h \ + core/perfmodel/regression.h \ + core/perfmodel/multiple_regression.h \ + core/jobs.h \ + core/devices.h \ + core/task.h \ + core/drivers.h \ + core/workers.h \ + core/topology.h \ + core/debug.h \ + core/errorcheck.h \ + core/combined_workers.h \ + core/simgrid.h \ + core/task_bundle.h \ + core/detect_combined_workers.h \ + sched_policies/helper_mct.h \ + sched_policies/fifo_queues.h \ + sched_policies/heteroprio.h \ + datawizard/node_ops.h \ + datawizard/footprint.h \ + datawizard/datawizard.h \ + datawizard/data_request.h \ + datawizard/filters.h \ + datawizard/write_back.h \ + datawizard/datastats.h \ + datawizard/malloc.h \ + datawizard/memstats.h \ + datawizard/memory_manager.h \ + datawizard/memalloc.h \ + datawizard/copy_driver.h \ + datawizard/coherency.h \ + datawizard/sort_data_handles.h \ + datawizard/memory_nodes.h \ + datawizard/interfaces/data_interface.h \ + common/barrier.h \ + common/timing.h \ + common/list.h \ + common/rwlock.h \ + common/starpu_spinlock.h \ + common/fxt.h \ + common/utils.h \ + common/thread.h \ + common/barrier.h \ + common/uthash.h \ + common/barrier_counter.h \ + common/rbtree.h \ + common/rbtree_i.h \ + common/prio_list.h \ + common/graph.h \ + common/knobs.h \ + drivers/driver_common/driver_common.h \ + drivers/mp_common/mp_common.h \ + drivers/mp_common/source_common.h \ + drivers/mp_common/sink_common.h \ + drivers/cpu/driver_cpu.h \ + drivers/cuda/driver_cuda.h \ + drivers/hip/driver_hip.h \ + drivers/opencl/driver_opencl.h \ + drivers/opencl/driver_opencl_utils.h \ + drivers/max/driver_max_fpga.h \ + debug/starpu_debug_helpers.h \ + drivers/mpi/driver_mpi_common.h \ + drivers/mpi/driver_mpi_source.h \ + drivers/mpi/driver_mpi_sink.h \ + drivers/tcpip/driver_tcpip_common.h \ + drivers/tcpip/driver_tcpip_common_func.h \ + drivers/tcpip/driver_tcpip_source.h \ + drivers/tcpip/driver_tcpip_sink.h \ + drivers/disk/driver_disk.h \ + debug/traces/starpu_fxt.h \ + parallel_worker/starpu_parallel_worker_create.h \ + profiling/bound.h \ + profiling/profiling.h \ + profiling/callbacks.h \ + util/openmp_runtime_support.h \ + util/starpu_task_insert_utils.h \ + util/starpu_data_cpy.h \ + sched_policies/prio_deque.h \ + sched_policies/sched_component.h + + +######################################### +# # +# MPI Master/Slave compilation # +# # +######################################### + +######################################### +# # +# TCPIP Master/Slave compilation # +# # +######################################### +libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES = common/barrier.c \ + common/barrier_counter.c common/hash.c common/rwlock.c \ + common/starpu_spinlock.c common/timing.c common/fxt.c \ + common/utils.c common/thread.c common/rbtree.c common/graph.c \ + common/inlines.c common/knobs.c core/jobs.c core/task.c \ + core/task_bundle.c core/tree.c core/devices.c core/drivers.c \ + core/workers.c core/combined_workers.c core/topology.c \ + core/disk.c core/debug.c core/errorcheck.c \ + core/progress_hook.c core/idle_hook.c core/dependencies/cg.c \ + core/dependencies/dependencies.c \ + core/dependencies/implicit_data_deps.c \ + core/dependencies/tags.c core/dependencies/task_deps.c \ + core/dependencies/data_concurrency.c \ + core/dependencies/data_arbiter_concurrency.c \ + core/disk_ops/disk_stdio.c core/disk_ops/disk_unistd.c \ + core/disk_ops/unistd/disk_unistd_global.c \ + core/perfmodel/perfmodel_history.c \ + core/perfmodel/energy_model.c core/perfmodel/perfmodel_bus.c \ + core/perfmodel/perfmodel.c core/perfmodel/perfmodel_print.c \ + core/perfmodel/perfmodel_nan.c core/perfmodel/regression.c \ + core/perfmodel/multiple_regression.c core/sched_policy.c \ + core/simgrid.c core/simgrid_cpp.cpp core/sched_ctx.c \ + core/sched_ctx_list.c core/parallel_task.c \ + core/detect_combined_workers.c \ + sched_policies/eager_central_policy.c \ + sched_policies/eager_central_priority_policy.c \ + sched_policies/work_stealing_policy.c \ + sched_policies/deque_modeling_policy_data_aware.c \ + sched_policies/random_policy.c sched_policies/fifo_queues.c \ + sched_policies/parallel_heft.c sched_policies/parallel_eager.c \ + sched_policies/heteroprio.c sched_policies/graph_test_policy.c \ + drivers/driver_common/driver_common.c \ + drivers/disk/driver_disk.c datawizard/node_ops.c \ + datawizard/memory_nodes.c datawizard/write_back.c \ + datawizard/coherency.c datawizard/data_request.c \ + datawizard/datawizard.c datawizard/copy_driver.c \ + datawizard/filters.c datawizard/sort_data_handles.c \ + datawizard/malloc.c datawizard/memory_manager.c \ + datawizard/memalloc.c datawizard/memstats.c \ + datawizard/footprint.c datawizard/datastats.c \ + datawizard/user_interactions.c datawizard/reduction.c \ + datawizard/interfaces/data_interface.c \ + datawizard/interfaces/bcsr_interface.c \ + datawizard/interfaces/coo_interface.c \ + datawizard/interfaces/csr_interface.c \ + datawizard/interfaces/vector_filters.c \ + datawizard/interfaces/vector_interface.c \ + datawizard/interfaces/matrix_filters.c \ + datawizard/interfaces/matrix_interface.c \ + datawizard/interfaces/block_filters.c \ + datawizard/interfaces/block_interface.c \ + datawizard/interfaces/tensor_filters.c \ + datawizard/interfaces/tensor_interface.c \ + datawizard/interfaces/ndim_filters.c \ + datawizard/interfaces/ndim_interface.c \ + datawizard/interfaces/bcsr_filters.c \ + datawizard/interfaces/csr_filters.c \ + datawizard/interfaces/variable_interface.c \ + datawizard/interfaces/void_interface.c \ + datawizard/interfaces/multiformat_interface.c \ + util/execute_on_all.c util/starpu_create_sync_task.c \ + util/file.c util/fstarpu.c util/misc.c \ + util/openmp_runtime_support.c \ + util/openmp_runtime_support_environment.c \ + util/openmp_runtime_support_omp_api.c util/starpu_data_cpy.c \ + util/starpu_task_insert.c util/starpu_task_insert_utils.c \ + debug/traces/starpu_fxt.c debug/traces/starpu_fxt_mpi.c \ + debug/traces/starpu_fxt_dag.c debug/traces/starpu_paje.c \ + debug/traces/anim.c debug/latency.c debug/structures_size.c \ + profiling/profiling.c profiling/bound.c \ + profiling/profiling_helpers.c profiling/callbacks.c \ + worker_collection/worker_list.c \ + worker_collection/worker_tree.c \ + sched_policies/component_worker.c \ + sched_policies/component_sched.c \ + sched_policies/component_fifo.c sched_policies/prio_deque.c \ + sched_policies/helper_mct.c sched_policies/component_prio.c \ + sched_policies/component_random.c \ + sched_policies/component_eager.c \ + sched_policies/component_eager_prio.c \ + sched_policies/component_eager_calibration.c \ + sched_policies/component_mct.c sched_policies/component_heft.c \ + sched_policies/component_heteroprio.c \ + sched_policies/component_best_implementation.c \ + sched_policies/component_perfmodel_select.c \ + sched_policies/component_composed.c \ + sched_policies/component_work_stealing.c \ + sched_policies/component_stage.c \ + sched_policies/component_userchoice.c \ + sched_policies/modular_eager.c \ + sched_policies/modular_eager_prio.c \ + sched_policies/modular_eager_prefetching.c \ + sched_policies/modular_gemm.c sched_policies/modular_prio.c \ + sched_policies/modular_prio_prefetching.c \ + sched_policies/modular_random.c \ + sched_policies/modular_parallel_random.c \ + sched_policies/modular_random_prefetching.c \ + sched_policies/modular_parallel_heft.c \ + sched_policies/modular_heft.c \ + sched_policies/modular_heft_prio.c \ + sched_policies/modular_heteroprio.c \ + sched_policies/modular_heteroprio_heft.c \ + sched_policies/modular_heft2.c sched_policies/modular_ws.c \ + sched_policies/modular_ez.c $(am__append_6) $(am__append_7) \ + drivers/cpu/driver_cpu.c drivers/hip/driver_hip_init.c \ + drivers/cuda/driver_cuda_init.c $(am__append_8) \ + drivers/hip/starpu_hipblas.c $(am__append_9) $(am__append_10) \ + $(am__append_11) $(am__append_12) drivers/cuda/starpu_cublas.c \ + drivers/cuda/starpu_cublas_v2.c drivers/cuda/starpu_cublasLt.c \ + drivers/cuda/starpu_cusparse.c drivers/cuda/starpu_cusolver.c \ + drivers/opencl/driver_opencl_init.c $(am__append_13) \ + $(am__append_14) drivers/max/driver_max_fpga_init.c \ + $(am__append_15) $(am__append_16) $(am__append_17) \ + $(am__append_18) $(am__append_19) \ + drivers/mpi/driver_mpi_init.c $(am__append_20) \ + drivers/tcpip/driver_tcpip_init.c $(am__append_21) +all: all-recursive + +.SUFFIXES: +.SUFFIXES: .c .cpp .cu .cubin .hip .lo .o .obj +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/make/starpu-notests.mk $(top_srcdir)/make/starpu.mk $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign src/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; +$(top_srcdir)/make/starpu-notests.mk $(top_srcdir)/make/starpu.mk $(am__empty): + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +install-libLTLIBRARIES: $(lib_LTLIBRARIES) + @$(NORMAL_INSTALL) + @list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \ + list2=; for p in $$list; do \ + if test -f $$p; then \ + list2="$$list2 $$p"; \ + else :; fi; \ + done; \ + test -z "$$list2" || { \ + echo " $(MKDIR_P) '$(DESTDIR)$(libdir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(libdir)" || exit 1; \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(libdir)'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(libdir)"; \ + } + +uninstall-libLTLIBRARIES: + @$(NORMAL_UNINSTALL) + @list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \ + for p in $$list; do \ + $(am__strip_dir) \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(libdir)/$$f'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(libdir)/$$f"; \ + done + +clean-libLTLIBRARIES: + -test -z "$(lib_LTLIBRARIES)" || rm -f $(lib_LTLIBRARIES) + @list='$(lib_LTLIBRARIES)'; \ + locs=`for p in $$list; do echo $$p; done | \ + sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ + sort -u`; \ + test -z "$$locs" || { \ + echo rm -f $${locs}; \ + rm -f $${locs}; \ + } +common/$(am__dirstamp): + @$(MKDIR_P) common + @: > common/$(am__dirstamp) +common/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) common/$(DEPDIR) + @: > common/$(DEPDIR)/$(am__dirstamp) +common/barrier.lo: common/$(am__dirstamp) \ + common/$(DEPDIR)/$(am__dirstamp) +common/barrier_counter.lo: common/$(am__dirstamp) \ + common/$(DEPDIR)/$(am__dirstamp) +common/hash.lo: common/$(am__dirstamp) \ + common/$(DEPDIR)/$(am__dirstamp) +common/rwlock.lo: common/$(am__dirstamp) \ + common/$(DEPDIR)/$(am__dirstamp) +common/starpu_spinlock.lo: common/$(am__dirstamp) \ + common/$(DEPDIR)/$(am__dirstamp) +common/timing.lo: common/$(am__dirstamp) \ + common/$(DEPDIR)/$(am__dirstamp) +common/fxt.lo: common/$(am__dirstamp) common/$(DEPDIR)/$(am__dirstamp) +common/utils.lo: common/$(am__dirstamp) \ + common/$(DEPDIR)/$(am__dirstamp) +common/thread.lo: common/$(am__dirstamp) \ + common/$(DEPDIR)/$(am__dirstamp) +common/rbtree.lo: common/$(am__dirstamp) \ + common/$(DEPDIR)/$(am__dirstamp) +common/graph.lo: common/$(am__dirstamp) \ + common/$(DEPDIR)/$(am__dirstamp) +common/inlines.lo: common/$(am__dirstamp) \ + common/$(DEPDIR)/$(am__dirstamp) +common/knobs.lo: common/$(am__dirstamp) \ + common/$(DEPDIR)/$(am__dirstamp) +core/$(am__dirstamp): + @$(MKDIR_P) core + @: > core/$(am__dirstamp) +core/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) core/$(DEPDIR) + @: > core/$(DEPDIR)/$(am__dirstamp) +core/jobs.lo: core/$(am__dirstamp) core/$(DEPDIR)/$(am__dirstamp) +core/task.lo: core/$(am__dirstamp) core/$(DEPDIR)/$(am__dirstamp) +core/task_bundle.lo: core/$(am__dirstamp) \ + core/$(DEPDIR)/$(am__dirstamp) +core/tree.lo: core/$(am__dirstamp) core/$(DEPDIR)/$(am__dirstamp) +core/devices.lo: core/$(am__dirstamp) core/$(DEPDIR)/$(am__dirstamp) +core/drivers.lo: core/$(am__dirstamp) core/$(DEPDIR)/$(am__dirstamp) +core/workers.lo: core/$(am__dirstamp) core/$(DEPDIR)/$(am__dirstamp) +core/combined_workers.lo: core/$(am__dirstamp) \ + core/$(DEPDIR)/$(am__dirstamp) +core/topology.lo: core/$(am__dirstamp) core/$(DEPDIR)/$(am__dirstamp) +core/disk.lo: core/$(am__dirstamp) core/$(DEPDIR)/$(am__dirstamp) +core/debug.lo: core/$(am__dirstamp) core/$(DEPDIR)/$(am__dirstamp) +core/errorcheck.lo: core/$(am__dirstamp) \ + core/$(DEPDIR)/$(am__dirstamp) +core/progress_hook.lo: core/$(am__dirstamp) \ + core/$(DEPDIR)/$(am__dirstamp) +core/idle_hook.lo: core/$(am__dirstamp) core/$(DEPDIR)/$(am__dirstamp) +core/dependencies/$(am__dirstamp): + @$(MKDIR_P) core/dependencies + @: > core/dependencies/$(am__dirstamp) +core/dependencies/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) core/dependencies/$(DEPDIR) + @: > core/dependencies/$(DEPDIR)/$(am__dirstamp) +core/dependencies/cg.lo: core/dependencies/$(am__dirstamp) \ + core/dependencies/$(DEPDIR)/$(am__dirstamp) +core/dependencies/dependencies.lo: core/dependencies/$(am__dirstamp) \ + core/dependencies/$(DEPDIR)/$(am__dirstamp) +core/dependencies/implicit_data_deps.lo: \ + core/dependencies/$(am__dirstamp) \ + core/dependencies/$(DEPDIR)/$(am__dirstamp) +core/dependencies/tags.lo: core/dependencies/$(am__dirstamp) \ + core/dependencies/$(DEPDIR)/$(am__dirstamp) +core/dependencies/task_deps.lo: core/dependencies/$(am__dirstamp) \ + core/dependencies/$(DEPDIR)/$(am__dirstamp) +core/dependencies/data_concurrency.lo: \ + core/dependencies/$(am__dirstamp) \ + core/dependencies/$(DEPDIR)/$(am__dirstamp) +core/dependencies/data_arbiter_concurrency.lo: \ + core/dependencies/$(am__dirstamp) \ + core/dependencies/$(DEPDIR)/$(am__dirstamp) +core/disk_ops/$(am__dirstamp): + @$(MKDIR_P) core/disk_ops + @: > core/disk_ops/$(am__dirstamp) +core/disk_ops/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) core/disk_ops/$(DEPDIR) + @: > core/disk_ops/$(DEPDIR)/$(am__dirstamp) +core/disk_ops/disk_stdio.lo: core/disk_ops/$(am__dirstamp) \ + core/disk_ops/$(DEPDIR)/$(am__dirstamp) +core/disk_ops/disk_unistd.lo: core/disk_ops/$(am__dirstamp) \ + core/disk_ops/$(DEPDIR)/$(am__dirstamp) +core/disk_ops/unistd/$(am__dirstamp): + @$(MKDIR_P) core/disk_ops/unistd + @: > core/disk_ops/unistd/$(am__dirstamp) +core/disk_ops/unistd/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) core/disk_ops/unistd/$(DEPDIR) + @: > core/disk_ops/unistd/$(DEPDIR)/$(am__dirstamp) +core/disk_ops/unistd/disk_unistd_global.lo: \ + core/disk_ops/unistd/$(am__dirstamp) \ + core/disk_ops/unistd/$(DEPDIR)/$(am__dirstamp) +core/perfmodel/$(am__dirstamp): + @$(MKDIR_P) core/perfmodel + @: > core/perfmodel/$(am__dirstamp) +core/perfmodel/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) core/perfmodel/$(DEPDIR) + @: > core/perfmodel/$(DEPDIR)/$(am__dirstamp) +core/perfmodel/perfmodel_history.lo: core/perfmodel/$(am__dirstamp) \ + core/perfmodel/$(DEPDIR)/$(am__dirstamp) +core/perfmodel/energy_model.lo: core/perfmodel/$(am__dirstamp) \ + core/perfmodel/$(DEPDIR)/$(am__dirstamp) +core/perfmodel/perfmodel_bus.lo: core/perfmodel/$(am__dirstamp) \ + core/perfmodel/$(DEPDIR)/$(am__dirstamp) +core/perfmodel/perfmodel.lo: core/perfmodel/$(am__dirstamp) \ + core/perfmodel/$(DEPDIR)/$(am__dirstamp) +core/perfmodel/perfmodel_print.lo: core/perfmodel/$(am__dirstamp) \ + core/perfmodel/$(DEPDIR)/$(am__dirstamp) +core/perfmodel/perfmodel_nan.lo: core/perfmodel/$(am__dirstamp) \ + core/perfmodel/$(DEPDIR)/$(am__dirstamp) +core/perfmodel/regression.lo: core/perfmodel/$(am__dirstamp) \ + core/perfmodel/$(DEPDIR)/$(am__dirstamp) +core/perfmodel/multiple_regression.lo: core/perfmodel/$(am__dirstamp) \ + core/perfmodel/$(DEPDIR)/$(am__dirstamp) +core/sched_policy.lo: core/$(am__dirstamp) \ + core/$(DEPDIR)/$(am__dirstamp) +core/simgrid.lo: core/$(am__dirstamp) core/$(DEPDIR)/$(am__dirstamp) +core/simgrid_cpp.lo: core/$(am__dirstamp) \ + core/$(DEPDIR)/$(am__dirstamp) +core/sched_ctx.lo: core/$(am__dirstamp) core/$(DEPDIR)/$(am__dirstamp) +core/sched_ctx_list.lo: core/$(am__dirstamp) \ + core/$(DEPDIR)/$(am__dirstamp) +core/parallel_task.lo: core/$(am__dirstamp) \ + core/$(DEPDIR)/$(am__dirstamp) +core/detect_combined_workers.lo: core/$(am__dirstamp) \ + core/$(DEPDIR)/$(am__dirstamp) +sched_policies/$(am__dirstamp): + @$(MKDIR_P) sched_policies + @: > sched_policies/$(am__dirstamp) +sched_policies/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) sched_policies/$(DEPDIR) + @: > sched_policies/$(DEPDIR)/$(am__dirstamp) +sched_policies/eager_central_policy.lo: \ + sched_policies/$(am__dirstamp) \ + sched_policies/$(DEPDIR)/$(am__dirstamp) +sched_policies/eager_central_priority_policy.lo: \ + sched_policies/$(am__dirstamp) \ + sched_policies/$(DEPDIR)/$(am__dirstamp) +sched_policies/work_stealing_policy.lo: \ + sched_policies/$(am__dirstamp) \ + sched_policies/$(DEPDIR)/$(am__dirstamp) +sched_policies/deque_modeling_policy_data_aware.lo: \ + sched_policies/$(am__dirstamp) \ + sched_policies/$(DEPDIR)/$(am__dirstamp) +sched_policies/random_policy.lo: sched_policies/$(am__dirstamp) \ + sched_policies/$(DEPDIR)/$(am__dirstamp) +sched_policies/fifo_queues.lo: sched_policies/$(am__dirstamp) \ + sched_policies/$(DEPDIR)/$(am__dirstamp) +sched_policies/parallel_heft.lo: sched_policies/$(am__dirstamp) \ + sched_policies/$(DEPDIR)/$(am__dirstamp) +sched_policies/parallel_eager.lo: sched_policies/$(am__dirstamp) \ + sched_policies/$(DEPDIR)/$(am__dirstamp) +sched_policies/heteroprio.lo: sched_policies/$(am__dirstamp) \ + sched_policies/$(DEPDIR)/$(am__dirstamp) +sched_policies/graph_test_policy.lo: sched_policies/$(am__dirstamp) \ + sched_policies/$(DEPDIR)/$(am__dirstamp) +drivers/driver_common/$(am__dirstamp): + @$(MKDIR_P) drivers/driver_common + @: > drivers/driver_common/$(am__dirstamp) +drivers/driver_common/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) drivers/driver_common/$(DEPDIR) + @: > drivers/driver_common/$(DEPDIR)/$(am__dirstamp) +drivers/driver_common/driver_common.lo: \ + drivers/driver_common/$(am__dirstamp) \ + drivers/driver_common/$(DEPDIR)/$(am__dirstamp) +drivers/disk/$(am__dirstamp): + @$(MKDIR_P) drivers/disk + @: > drivers/disk/$(am__dirstamp) +drivers/disk/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) drivers/disk/$(DEPDIR) + @: > drivers/disk/$(DEPDIR)/$(am__dirstamp) +drivers/disk/driver_disk.lo: drivers/disk/$(am__dirstamp) \ + drivers/disk/$(DEPDIR)/$(am__dirstamp) +datawizard/$(am__dirstamp): + @$(MKDIR_P) datawizard + @: > datawizard/$(am__dirstamp) +datawizard/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) datawizard/$(DEPDIR) + @: > datawizard/$(DEPDIR)/$(am__dirstamp) +datawizard/node_ops.lo: datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) +datawizard/memory_nodes.lo: datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) +datawizard/write_back.lo: datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) +datawizard/coherency.lo: datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) +datawizard/data_request.lo: datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) +datawizard/datawizard.lo: datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) +datawizard/copy_driver.lo: datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) +datawizard/filters.lo: datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) +datawizard/sort_data_handles.lo: datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) +datawizard/malloc.lo: datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) +datawizard/memory_manager.lo: datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) +datawizard/memalloc.lo: datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) +datawizard/memstats.lo: datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) +datawizard/footprint.lo: datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) +datawizard/datastats.lo: datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) +datawizard/user_interactions.lo: datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) +datawizard/reduction.lo: datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) +datawizard/interfaces/$(am__dirstamp): + @$(MKDIR_P) datawizard/interfaces + @: > datawizard/interfaces/$(am__dirstamp) +datawizard/interfaces/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) datawizard/interfaces/$(DEPDIR) + @: > datawizard/interfaces/$(DEPDIR)/$(am__dirstamp) +datawizard/interfaces/data_interface.lo: \ + datawizard/interfaces/$(am__dirstamp) \ + datawizard/interfaces/$(DEPDIR)/$(am__dirstamp) +datawizard/interfaces/bcsr_interface.lo: \ + datawizard/interfaces/$(am__dirstamp) \ + datawizard/interfaces/$(DEPDIR)/$(am__dirstamp) +datawizard/interfaces/coo_interface.lo: \ + datawizard/interfaces/$(am__dirstamp) \ + datawizard/interfaces/$(DEPDIR)/$(am__dirstamp) +datawizard/interfaces/csr_interface.lo: \ + datawizard/interfaces/$(am__dirstamp) \ + datawizard/interfaces/$(DEPDIR)/$(am__dirstamp) +datawizard/interfaces/vector_filters.lo: \ + datawizard/interfaces/$(am__dirstamp) \ + datawizard/interfaces/$(DEPDIR)/$(am__dirstamp) +datawizard/interfaces/vector_interface.lo: \ + datawizard/interfaces/$(am__dirstamp) \ + datawizard/interfaces/$(DEPDIR)/$(am__dirstamp) +datawizard/interfaces/matrix_filters.lo: \ + datawizard/interfaces/$(am__dirstamp) \ + datawizard/interfaces/$(DEPDIR)/$(am__dirstamp) +datawizard/interfaces/matrix_interface.lo: \ + datawizard/interfaces/$(am__dirstamp) \ + datawizard/interfaces/$(DEPDIR)/$(am__dirstamp) +datawizard/interfaces/block_filters.lo: \ + datawizard/interfaces/$(am__dirstamp) \ + datawizard/interfaces/$(DEPDIR)/$(am__dirstamp) +datawizard/interfaces/block_interface.lo: \ + datawizard/interfaces/$(am__dirstamp) \ + datawizard/interfaces/$(DEPDIR)/$(am__dirstamp) +datawizard/interfaces/tensor_filters.lo: \ + datawizard/interfaces/$(am__dirstamp) \ + datawizard/interfaces/$(DEPDIR)/$(am__dirstamp) +datawizard/interfaces/tensor_interface.lo: \ + datawizard/interfaces/$(am__dirstamp) \ + datawizard/interfaces/$(DEPDIR)/$(am__dirstamp) +datawizard/interfaces/ndim_filters.lo: \ + datawizard/interfaces/$(am__dirstamp) \ + datawizard/interfaces/$(DEPDIR)/$(am__dirstamp) +datawizard/interfaces/ndim_interface.lo: \ + datawizard/interfaces/$(am__dirstamp) \ + datawizard/interfaces/$(DEPDIR)/$(am__dirstamp) +datawizard/interfaces/bcsr_filters.lo: \ + datawizard/interfaces/$(am__dirstamp) \ + datawizard/interfaces/$(DEPDIR)/$(am__dirstamp) +datawizard/interfaces/csr_filters.lo: \ + datawizard/interfaces/$(am__dirstamp) \ + datawizard/interfaces/$(DEPDIR)/$(am__dirstamp) +datawizard/interfaces/variable_interface.lo: \ + datawizard/interfaces/$(am__dirstamp) \ + datawizard/interfaces/$(DEPDIR)/$(am__dirstamp) +datawizard/interfaces/void_interface.lo: \ + datawizard/interfaces/$(am__dirstamp) \ + datawizard/interfaces/$(DEPDIR)/$(am__dirstamp) +datawizard/interfaces/multiformat_interface.lo: \ + datawizard/interfaces/$(am__dirstamp) \ + datawizard/interfaces/$(DEPDIR)/$(am__dirstamp) +util/$(am__dirstamp): + @$(MKDIR_P) util + @: > util/$(am__dirstamp) +util/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) util/$(DEPDIR) + @: > util/$(DEPDIR)/$(am__dirstamp) +util/execute_on_all.lo: util/$(am__dirstamp) \ + util/$(DEPDIR)/$(am__dirstamp) +util/starpu_create_sync_task.lo: util/$(am__dirstamp) \ + util/$(DEPDIR)/$(am__dirstamp) +util/file.lo: util/$(am__dirstamp) util/$(DEPDIR)/$(am__dirstamp) +util/fstarpu.lo: util/$(am__dirstamp) util/$(DEPDIR)/$(am__dirstamp) +util/misc.lo: util/$(am__dirstamp) util/$(DEPDIR)/$(am__dirstamp) +util/openmp_runtime_support.lo: util/$(am__dirstamp) \ + util/$(DEPDIR)/$(am__dirstamp) +util/openmp_runtime_support_environment.lo: util/$(am__dirstamp) \ + util/$(DEPDIR)/$(am__dirstamp) +util/openmp_runtime_support_omp_api.lo: util/$(am__dirstamp) \ + util/$(DEPDIR)/$(am__dirstamp) +util/starpu_data_cpy.lo: util/$(am__dirstamp) \ + util/$(DEPDIR)/$(am__dirstamp) +util/starpu_task_insert.lo: util/$(am__dirstamp) \ + util/$(DEPDIR)/$(am__dirstamp) +util/starpu_task_insert_utils.lo: util/$(am__dirstamp) \ + util/$(DEPDIR)/$(am__dirstamp) +debug/traces/$(am__dirstamp): + @$(MKDIR_P) debug/traces + @: > debug/traces/$(am__dirstamp) +debug/traces/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) debug/traces/$(DEPDIR) + @: > debug/traces/$(DEPDIR)/$(am__dirstamp) +debug/traces/starpu_fxt.lo: debug/traces/$(am__dirstamp) \ + debug/traces/$(DEPDIR)/$(am__dirstamp) +debug/traces/starpu_fxt_mpi.lo: debug/traces/$(am__dirstamp) \ + debug/traces/$(DEPDIR)/$(am__dirstamp) +debug/traces/starpu_fxt_dag.lo: debug/traces/$(am__dirstamp) \ + debug/traces/$(DEPDIR)/$(am__dirstamp) +debug/traces/starpu_paje.lo: debug/traces/$(am__dirstamp) \ + debug/traces/$(DEPDIR)/$(am__dirstamp) +debug/traces/anim.lo: debug/traces/$(am__dirstamp) \ + debug/traces/$(DEPDIR)/$(am__dirstamp) +debug/$(am__dirstamp): + @$(MKDIR_P) debug + @: > debug/$(am__dirstamp) +debug/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) debug/$(DEPDIR) + @: > debug/$(DEPDIR)/$(am__dirstamp) +debug/latency.lo: debug/$(am__dirstamp) \ + debug/$(DEPDIR)/$(am__dirstamp) +debug/structures_size.lo: debug/$(am__dirstamp) \ + debug/$(DEPDIR)/$(am__dirstamp) +profiling/$(am__dirstamp): + @$(MKDIR_P) profiling + @: > profiling/$(am__dirstamp) +profiling/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) profiling/$(DEPDIR) + @: > profiling/$(DEPDIR)/$(am__dirstamp) +profiling/profiling.lo: profiling/$(am__dirstamp) \ + profiling/$(DEPDIR)/$(am__dirstamp) +profiling/bound.lo: profiling/$(am__dirstamp) \ + profiling/$(DEPDIR)/$(am__dirstamp) +profiling/profiling_helpers.lo: profiling/$(am__dirstamp) \ + profiling/$(DEPDIR)/$(am__dirstamp) +profiling/callbacks.lo: profiling/$(am__dirstamp) \ + profiling/$(DEPDIR)/$(am__dirstamp) +worker_collection/$(am__dirstamp): + @$(MKDIR_P) worker_collection + @: > worker_collection/$(am__dirstamp) +worker_collection/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) worker_collection/$(DEPDIR) + @: > worker_collection/$(DEPDIR)/$(am__dirstamp) +worker_collection/worker_list.lo: worker_collection/$(am__dirstamp) \ + worker_collection/$(DEPDIR)/$(am__dirstamp) +worker_collection/worker_tree.lo: worker_collection/$(am__dirstamp) \ + worker_collection/$(DEPDIR)/$(am__dirstamp) +sched_policies/component_worker.lo: sched_policies/$(am__dirstamp) \ + sched_policies/$(DEPDIR)/$(am__dirstamp) +sched_policies/component_sched.lo: sched_policies/$(am__dirstamp) \ + sched_policies/$(DEPDIR)/$(am__dirstamp) +sched_policies/component_fifo.lo: sched_policies/$(am__dirstamp) \ + sched_policies/$(DEPDIR)/$(am__dirstamp) +sched_policies/prio_deque.lo: sched_policies/$(am__dirstamp) \ + sched_policies/$(DEPDIR)/$(am__dirstamp) +sched_policies/helper_mct.lo: sched_policies/$(am__dirstamp) \ + sched_policies/$(DEPDIR)/$(am__dirstamp) +sched_policies/component_prio.lo: sched_policies/$(am__dirstamp) \ + sched_policies/$(DEPDIR)/$(am__dirstamp) +sched_policies/component_random.lo: sched_policies/$(am__dirstamp) \ + sched_policies/$(DEPDIR)/$(am__dirstamp) +sched_policies/component_eager.lo: sched_policies/$(am__dirstamp) \ + sched_policies/$(DEPDIR)/$(am__dirstamp) +sched_policies/component_eager_prio.lo: \ + sched_policies/$(am__dirstamp) \ + sched_policies/$(DEPDIR)/$(am__dirstamp) +sched_policies/component_eager_calibration.lo: \ + sched_policies/$(am__dirstamp) \ + sched_policies/$(DEPDIR)/$(am__dirstamp) +sched_policies/component_mct.lo: sched_policies/$(am__dirstamp) \ + sched_policies/$(DEPDIR)/$(am__dirstamp) +sched_policies/component_heft.lo: sched_policies/$(am__dirstamp) \ + sched_policies/$(DEPDIR)/$(am__dirstamp) +sched_policies/component_heteroprio.lo: \ + sched_policies/$(am__dirstamp) \ + sched_policies/$(DEPDIR)/$(am__dirstamp) +sched_policies/component_best_implementation.lo: \ + sched_policies/$(am__dirstamp) \ + sched_policies/$(DEPDIR)/$(am__dirstamp) +sched_policies/component_perfmodel_select.lo: \ + sched_policies/$(am__dirstamp) \ + sched_policies/$(DEPDIR)/$(am__dirstamp) +sched_policies/component_composed.lo: sched_policies/$(am__dirstamp) \ + sched_policies/$(DEPDIR)/$(am__dirstamp) +sched_policies/component_work_stealing.lo: \ + sched_policies/$(am__dirstamp) \ + sched_policies/$(DEPDIR)/$(am__dirstamp) +sched_policies/component_stage.lo: sched_policies/$(am__dirstamp) \ + sched_policies/$(DEPDIR)/$(am__dirstamp) +sched_policies/component_userchoice.lo: \ + sched_policies/$(am__dirstamp) \ + sched_policies/$(DEPDIR)/$(am__dirstamp) +sched_policies/modular_eager.lo: sched_policies/$(am__dirstamp) \ + sched_policies/$(DEPDIR)/$(am__dirstamp) +sched_policies/modular_eager_prio.lo: sched_policies/$(am__dirstamp) \ + sched_policies/$(DEPDIR)/$(am__dirstamp) +sched_policies/modular_eager_prefetching.lo: \ + sched_policies/$(am__dirstamp) \ + sched_policies/$(DEPDIR)/$(am__dirstamp) +sched_policies/modular_gemm.lo: sched_policies/$(am__dirstamp) \ + sched_policies/$(DEPDIR)/$(am__dirstamp) +sched_policies/modular_prio.lo: sched_policies/$(am__dirstamp) \ + sched_policies/$(DEPDIR)/$(am__dirstamp) +sched_policies/modular_prio_prefetching.lo: \ + sched_policies/$(am__dirstamp) \ + sched_policies/$(DEPDIR)/$(am__dirstamp) +sched_policies/modular_random.lo: sched_policies/$(am__dirstamp) \ + sched_policies/$(DEPDIR)/$(am__dirstamp) +sched_policies/modular_parallel_random.lo: \ + sched_policies/$(am__dirstamp) \ + sched_policies/$(DEPDIR)/$(am__dirstamp) +sched_policies/modular_random_prefetching.lo: \ + sched_policies/$(am__dirstamp) \ + sched_policies/$(DEPDIR)/$(am__dirstamp) +sched_policies/modular_parallel_heft.lo: \ + sched_policies/$(am__dirstamp) \ + sched_policies/$(DEPDIR)/$(am__dirstamp) +sched_policies/modular_heft.lo: sched_policies/$(am__dirstamp) \ + sched_policies/$(DEPDIR)/$(am__dirstamp) +sched_policies/modular_heft_prio.lo: sched_policies/$(am__dirstamp) \ + sched_policies/$(DEPDIR)/$(am__dirstamp) +sched_policies/modular_heteroprio.lo: sched_policies/$(am__dirstamp) \ + sched_policies/$(DEPDIR)/$(am__dirstamp) +sched_policies/modular_heteroprio_heft.lo: \ + sched_policies/$(am__dirstamp) \ + sched_policies/$(DEPDIR)/$(am__dirstamp) +sched_policies/modular_heft2.lo: sched_policies/$(am__dirstamp) \ + sched_policies/$(DEPDIR)/$(am__dirstamp) +sched_policies/modular_ws.lo: sched_policies/$(am__dirstamp) \ + sched_policies/$(DEPDIR)/$(am__dirstamp) +sched_policies/modular_ez.lo: sched_policies/$(am__dirstamp) \ + sched_policies/$(DEPDIR)/$(am__dirstamp) +core/disk_ops/disk_leveldb.lo: core/disk_ops/$(am__dirstamp) \ + core/disk_ops/$(DEPDIR)/$(am__dirstamp) +core/disk_ops/disk_hdf5.lo: core/disk_ops/$(am__dirstamp) \ + core/disk_ops/$(DEPDIR)/$(am__dirstamp) +drivers/cpu/$(am__dirstamp): + @$(MKDIR_P) drivers/cpu + @: > drivers/cpu/$(am__dirstamp) +drivers/cpu/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) drivers/cpu/$(DEPDIR) + @: > drivers/cpu/$(DEPDIR)/$(am__dirstamp) +drivers/cpu/driver_cpu.lo: drivers/cpu/$(am__dirstamp) \ + drivers/cpu/$(DEPDIR)/$(am__dirstamp) +drivers/hip/$(am__dirstamp): + @$(MKDIR_P) drivers/hip + @: > drivers/hip/$(am__dirstamp) +drivers/hip/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) drivers/hip/$(DEPDIR) + @: > drivers/hip/$(DEPDIR)/$(am__dirstamp) +drivers/hip/driver_hip_init.lo: drivers/hip/$(am__dirstamp) \ + drivers/hip/$(DEPDIR)/$(am__dirstamp) +drivers/cuda/$(am__dirstamp): + @$(MKDIR_P) drivers/cuda + @: > drivers/cuda/$(am__dirstamp) +drivers/cuda/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) drivers/cuda/$(DEPDIR) + @: > drivers/cuda/$(DEPDIR)/$(am__dirstamp) +drivers/cuda/driver_cuda_init.lo: drivers/cuda/$(am__dirstamp) \ + drivers/cuda/$(DEPDIR)/$(am__dirstamp) +drivers/hip/driver_hip.lo: drivers/hip/$(am__dirstamp) \ + drivers/hip/$(DEPDIR)/$(am__dirstamp) +drivers/hip/starpu_hipblas.lo: drivers/hip/$(am__dirstamp) \ + drivers/hip/$(DEPDIR)/$(am__dirstamp) +drivers/cuda/driver_cuda0.lo: drivers/cuda/$(am__dirstamp) \ + drivers/cuda/$(DEPDIR)/$(am__dirstamp) +drivers/cuda/driver_cuda1.lo: drivers/cuda/$(am__dirstamp) \ + drivers/cuda/$(DEPDIR)/$(am__dirstamp) +drivers/cuda/driver_cuda.lo: drivers/cuda/$(am__dirstamp) \ + drivers/cuda/$(DEPDIR)/$(am__dirstamp) +drivers/cuda/starpu_cublas.lo: drivers/cuda/$(am__dirstamp) \ + drivers/cuda/$(DEPDIR)/$(am__dirstamp) +drivers/cuda/starpu_cublas_v2.lo: drivers/cuda/$(am__dirstamp) \ + drivers/cuda/$(DEPDIR)/$(am__dirstamp) +drivers/cuda/starpu_cublasLt.lo: drivers/cuda/$(am__dirstamp) \ + drivers/cuda/$(DEPDIR)/$(am__dirstamp) +drivers/cuda/starpu_cusparse.lo: drivers/cuda/$(am__dirstamp) \ + drivers/cuda/$(DEPDIR)/$(am__dirstamp) +drivers/cuda/starpu_cusolver.lo: drivers/cuda/$(am__dirstamp) \ + drivers/cuda/$(DEPDIR)/$(am__dirstamp) +drivers/opencl/$(am__dirstamp): + @$(MKDIR_P) drivers/opencl + @: > drivers/opencl/$(am__dirstamp) +drivers/opencl/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) drivers/opencl/$(DEPDIR) + @: > drivers/opencl/$(DEPDIR)/$(am__dirstamp) +drivers/opencl/driver_opencl_init.lo: drivers/opencl/$(am__dirstamp) \ + drivers/opencl/$(DEPDIR)/$(am__dirstamp) +drivers/opencl/driver_opencl.lo: drivers/opencl/$(am__dirstamp) \ + drivers/opencl/$(DEPDIR)/$(am__dirstamp) +drivers/opencl/driver_opencl_utils.lo: drivers/opencl/$(am__dirstamp) \ + drivers/opencl/$(DEPDIR)/$(am__dirstamp) +drivers/max/$(am__dirstamp): + @$(MKDIR_P) drivers/max + @: > drivers/max/$(am__dirstamp) +drivers/max/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) drivers/max/$(DEPDIR) + @: > drivers/max/$(DEPDIR)/$(am__dirstamp) +drivers/max/driver_max_fpga_init.lo: drivers/max/$(am__dirstamp) \ + drivers/max/$(DEPDIR)/$(am__dirstamp) +drivers/max/driver_max_fpga.lo: drivers/max/$(am__dirstamp) \ + drivers/max/$(DEPDIR)/$(am__dirstamp) +core/disk_ops/disk_unistd_o_direct.lo: core/disk_ops/$(am__dirstamp) \ + core/disk_ops/$(DEPDIR)/$(am__dirstamp) +sched_policies/scheduler_maker.lo: sched_policies/$(am__dirstamp) \ + sched_policies/$(DEPDIR)/$(am__dirstamp) +sched_policies/hierarchical_heft.lo: sched_policies/$(am__dirstamp) \ + sched_policies/$(DEPDIR)/$(am__dirstamp) +parallel_worker/$(am__dirstamp): + @$(MKDIR_P) parallel_worker + @: > parallel_worker/$(am__dirstamp) +parallel_worker/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) parallel_worker/$(DEPDIR) + @: > parallel_worker/$(DEPDIR)/$(am__dirstamp) +parallel_worker/starpu_parallel_worker_create.lo: \ + parallel_worker/$(am__dirstamp) \ + parallel_worker/$(DEPDIR)/$(am__dirstamp) +drivers/mp_common/$(am__dirstamp): + @$(MKDIR_P) drivers/mp_common + @: > drivers/mp_common/$(am__dirstamp) +drivers/mp_common/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) drivers/mp_common/$(DEPDIR) + @: > drivers/mp_common/$(DEPDIR)/$(am__dirstamp) +drivers/mp_common/mp_common.lo: drivers/mp_common/$(am__dirstamp) \ + drivers/mp_common/$(DEPDIR)/$(am__dirstamp) +drivers/mp_common/source_common.lo: drivers/mp_common/$(am__dirstamp) \ + drivers/mp_common/$(DEPDIR)/$(am__dirstamp) +drivers/mp_common/sink_common.lo: drivers/mp_common/$(am__dirstamp) \ + drivers/mp_common/$(DEPDIR)/$(am__dirstamp) +drivers/mpi/$(am__dirstamp): + @$(MKDIR_P) drivers/mpi + @: > drivers/mpi/$(am__dirstamp) +drivers/mpi/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) drivers/mpi/$(DEPDIR) + @: > drivers/mpi/$(DEPDIR)/$(am__dirstamp) +drivers/mpi/driver_mpi_init.lo: drivers/mpi/$(am__dirstamp) \ + drivers/mpi/$(DEPDIR)/$(am__dirstamp) +drivers/mpi/driver_mpi_common.lo: drivers/mpi/$(am__dirstamp) \ + drivers/mpi/$(DEPDIR)/$(am__dirstamp) +drivers/mpi/driver_mpi_source.lo: drivers/mpi/$(am__dirstamp) \ + drivers/mpi/$(DEPDIR)/$(am__dirstamp) +drivers/mpi/driver_mpi_sink.lo: drivers/mpi/$(am__dirstamp) \ + drivers/mpi/$(DEPDIR)/$(am__dirstamp) +drivers/tcpip/$(am__dirstamp): + @$(MKDIR_P) drivers/tcpip + @: > drivers/tcpip/$(am__dirstamp) +drivers/tcpip/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) drivers/tcpip/$(DEPDIR) + @: > drivers/tcpip/$(DEPDIR)/$(am__dirstamp) +drivers/tcpip/driver_tcpip_init.lo: drivers/tcpip/$(am__dirstamp) \ + drivers/tcpip/$(DEPDIR)/$(am__dirstamp) +drivers/tcpip/driver_tcpip_common.lo: drivers/tcpip/$(am__dirstamp) \ + drivers/tcpip/$(DEPDIR)/$(am__dirstamp) +drivers/tcpip/driver_tcpip_source.lo: drivers/tcpip/$(am__dirstamp) \ + drivers/tcpip/$(DEPDIR)/$(am__dirstamp) +drivers/tcpip/driver_tcpip_sink.lo: drivers/tcpip/$(am__dirstamp) \ + drivers/tcpip/$(DEPDIR)/$(am__dirstamp) + +libstarpu-@STARPU_EFFECTIVE_VERSION@.la: $(libstarpu_@STARPU_EFFECTIVE_VERSION@_la_OBJECTS) $(libstarpu_@STARPU_EFFECTIVE_VERSION@_la_DEPENDENCIES) $(EXTRA_libstarpu_@STARPU_EFFECTIVE_VERSION@_la_DEPENDENCIES) + $(AM_V_CXXLD)$(libstarpu_@STARPU_EFFECTIVE_VERSION@_la_LINK) -rpath $(libdir) $(libstarpu_@STARPU_EFFECTIVE_VERSION@_la_OBJECTS) $(libstarpu_@STARPU_EFFECTIVE_VERSION@_la_LIBADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + -rm -f common/*.$(OBJEXT) + -rm -f common/*.lo + -rm -f core/*.$(OBJEXT) + -rm -f core/*.lo + -rm -f core/dependencies/*.$(OBJEXT) + -rm -f core/dependencies/*.lo + -rm -f core/disk_ops/*.$(OBJEXT) + -rm -f core/disk_ops/*.lo + -rm -f core/disk_ops/unistd/*.$(OBJEXT) + -rm -f core/disk_ops/unistd/*.lo + -rm -f core/perfmodel/*.$(OBJEXT) + -rm -f core/perfmodel/*.lo + -rm -f datawizard/*.$(OBJEXT) + -rm -f datawizard/*.lo + -rm -f datawizard/interfaces/*.$(OBJEXT) + -rm -f datawizard/interfaces/*.lo + -rm -f debug/*.$(OBJEXT) + -rm -f debug/*.lo + -rm -f debug/traces/*.$(OBJEXT) + -rm -f debug/traces/*.lo + -rm -f drivers/cpu/*.$(OBJEXT) + -rm -f drivers/cpu/*.lo + -rm -f drivers/cuda/*.$(OBJEXT) + -rm -f drivers/cuda/*.lo + -rm -f drivers/disk/*.$(OBJEXT) + -rm -f drivers/disk/*.lo + -rm -f drivers/driver_common/*.$(OBJEXT) + -rm -f drivers/driver_common/*.lo + -rm -f drivers/hip/*.$(OBJEXT) + -rm -f drivers/hip/*.lo + -rm -f drivers/max/*.$(OBJEXT) + -rm -f drivers/max/*.lo + -rm -f drivers/mp_common/*.$(OBJEXT) + -rm -f drivers/mp_common/*.lo + -rm -f drivers/mpi/*.$(OBJEXT) + -rm -f drivers/mpi/*.lo + -rm -f drivers/opencl/*.$(OBJEXT) + -rm -f drivers/opencl/*.lo + -rm -f drivers/tcpip/*.$(OBJEXT) + -rm -f drivers/tcpip/*.lo + -rm -f parallel_worker/*.$(OBJEXT) + -rm -f parallel_worker/*.lo + -rm -f profiling/*.$(OBJEXT) + -rm -f profiling/*.lo + -rm -f sched_policies/*.$(OBJEXT) + -rm -f sched_policies/*.lo + -rm -f util/*.$(OBJEXT) + -rm -f util/*.lo + -rm -f worker_collection/*.$(OBJEXT) + -rm -f worker_collection/*.lo + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@common/$(DEPDIR)/barrier.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@common/$(DEPDIR)/barrier_counter.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@common/$(DEPDIR)/fxt.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@common/$(DEPDIR)/graph.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@common/$(DEPDIR)/hash.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@common/$(DEPDIR)/inlines.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@common/$(DEPDIR)/knobs.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@common/$(DEPDIR)/rbtree.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@common/$(DEPDIR)/rwlock.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@common/$(DEPDIR)/starpu_spinlock.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@common/$(DEPDIR)/thread.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@common/$(DEPDIR)/timing.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@common/$(DEPDIR)/utils.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@core/$(DEPDIR)/combined_workers.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@core/$(DEPDIR)/debug.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@core/$(DEPDIR)/detect_combined_workers.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@core/$(DEPDIR)/devices.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@core/$(DEPDIR)/disk.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@core/$(DEPDIR)/drivers.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@core/$(DEPDIR)/errorcheck.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@core/$(DEPDIR)/idle_hook.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@core/$(DEPDIR)/jobs.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@core/$(DEPDIR)/parallel_task.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@core/$(DEPDIR)/progress_hook.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@core/$(DEPDIR)/sched_ctx.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@core/$(DEPDIR)/sched_ctx_list.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@core/$(DEPDIR)/sched_policy.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@core/$(DEPDIR)/simgrid.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@core/$(DEPDIR)/simgrid_cpp.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@core/$(DEPDIR)/task.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@core/$(DEPDIR)/task_bundle.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@core/$(DEPDIR)/topology.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@core/$(DEPDIR)/tree.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@core/$(DEPDIR)/workers.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@core/dependencies/$(DEPDIR)/cg.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@core/dependencies/$(DEPDIR)/data_arbiter_concurrency.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@core/dependencies/$(DEPDIR)/data_concurrency.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@core/dependencies/$(DEPDIR)/dependencies.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@core/dependencies/$(DEPDIR)/implicit_data_deps.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@core/dependencies/$(DEPDIR)/tags.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@core/dependencies/$(DEPDIR)/task_deps.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@core/disk_ops/$(DEPDIR)/disk_hdf5.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@core/disk_ops/$(DEPDIR)/disk_leveldb.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@core/disk_ops/$(DEPDIR)/disk_stdio.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@core/disk_ops/$(DEPDIR)/disk_unistd.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@core/disk_ops/$(DEPDIR)/disk_unistd_o_direct.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@core/disk_ops/unistd/$(DEPDIR)/disk_unistd_global.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@core/perfmodel/$(DEPDIR)/energy_model.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@core/perfmodel/$(DEPDIR)/multiple_regression.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@core/perfmodel/$(DEPDIR)/perfmodel.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@core/perfmodel/$(DEPDIR)/perfmodel_bus.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@core/perfmodel/$(DEPDIR)/perfmodel_history.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@core/perfmodel/$(DEPDIR)/perfmodel_nan.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@core/perfmodel/$(DEPDIR)/perfmodel_print.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@core/perfmodel/$(DEPDIR)/regression.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/coherency.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/copy_driver.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/data_request.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/datastats.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/datawizard.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/filters.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/footprint.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/malloc.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/memalloc.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/memory_manager.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/memory_nodes.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/memstats.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/node_ops.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/reduction.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/sort_data_handles.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/user_interactions.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/write_back.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/$(DEPDIR)/bcsr_filters.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/$(DEPDIR)/bcsr_interface.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/$(DEPDIR)/block_filters.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/$(DEPDIR)/block_interface.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/$(DEPDIR)/coo_interface.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/$(DEPDIR)/csr_filters.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/$(DEPDIR)/csr_interface.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/$(DEPDIR)/data_interface.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/$(DEPDIR)/matrix_filters.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/$(DEPDIR)/matrix_interface.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/$(DEPDIR)/multiformat_interface.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/$(DEPDIR)/ndim_filters.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/$(DEPDIR)/ndim_interface.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/$(DEPDIR)/tensor_filters.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/$(DEPDIR)/tensor_interface.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/$(DEPDIR)/variable_interface.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/$(DEPDIR)/vector_filters.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/$(DEPDIR)/vector_interface.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/$(DEPDIR)/void_interface.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@debug/$(DEPDIR)/latency.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@debug/$(DEPDIR)/structures_size.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@debug/traces/$(DEPDIR)/anim.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@debug/traces/$(DEPDIR)/starpu_fxt.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@debug/traces/$(DEPDIR)/starpu_fxt_dag.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@debug/traces/$(DEPDIR)/starpu_fxt_mpi.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@debug/traces/$(DEPDIR)/starpu_paje.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@drivers/cpu/$(DEPDIR)/driver_cpu.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@drivers/cuda/$(DEPDIR)/driver_cuda.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@drivers/cuda/$(DEPDIR)/driver_cuda0.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@drivers/cuda/$(DEPDIR)/driver_cuda1.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@drivers/cuda/$(DEPDIR)/driver_cuda_init.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@drivers/cuda/$(DEPDIR)/starpu_cublas.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@drivers/cuda/$(DEPDIR)/starpu_cublasLt.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@drivers/cuda/$(DEPDIR)/starpu_cublas_v2.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@drivers/cuda/$(DEPDIR)/starpu_cusolver.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@drivers/cuda/$(DEPDIR)/starpu_cusparse.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@drivers/disk/$(DEPDIR)/driver_disk.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@drivers/driver_common/$(DEPDIR)/driver_common.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@drivers/hip/$(DEPDIR)/driver_hip.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@drivers/hip/$(DEPDIR)/driver_hip_init.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@drivers/hip/$(DEPDIR)/starpu_hipblas.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@drivers/max/$(DEPDIR)/driver_max_fpga.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@drivers/max/$(DEPDIR)/driver_max_fpga_init.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@drivers/mp_common/$(DEPDIR)/mp_common.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@drivers/mp_common/$(DEPDIR)/sink_common.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@drivers/mp_common/$(DEPDIR)/source_common.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@drivers/mpi/$(DEPDIR)/driver_mpi_common.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@drivers/mpi/$(DEPDIR)/driver_mpi_init.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@drivers/mpi/$(DEPDIR)/driver_mpi_sink.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@drivers/mpi/$(DEPDIR)/driver_mpi_source.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@drivers/opencl/$(DEPDIR)/driver_opencl.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@drivers/opencl/$(DEPDIR)/driver_opencl_init.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@drivers/opencl/$(DEPDIR)/driver_opencl_utils.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@drivers/tcpip/$(DEPDIR)/driver_tcpip_common.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@drivers/tcpip/$(DEPDIR)/driver_tcpip_init.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@drivers/tcpip/$(DEPDIR)/driver_tcpip_sink.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@drivers/tcpip/$(DEPDIR)/driver_tcpip_source.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@parallel_worker/$(DEPDIR)/starpu_parallel_worker_create.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@profiling/$(DEPDIR)/bound.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@profiling/$(DEPDIR)/callbacks.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@profiling/$(DEPDIR)/profiling.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@profiling/$(DEPDIR)/profiling_helpers.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/component_best_implementation.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/component_composed.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/component_eager.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/component_eager_calibration.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/component_eager_prio.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/component_fifo.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/component_heft.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/component_heteroprio.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/component_mct.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/component_perfmodel_select.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/component_prio.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/component_random.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/component_sched.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/component_stage.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/component_userchoice.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/component_work_stealing.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/component_worker.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/deque_modeling_policy_data_aware.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/eager_central_policy.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/eager_central_priority_policy.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/fifo_queues.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/graph_test_policy.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/helper_mct.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/heteroprio.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/hierarchical_heft.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/modular_eager.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/modular_eager_prefetching.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/modular_eager_prio.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/modular_ez.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/modular_gemm.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/modular_heft.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/modular_heft2.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/modular_heft_prio.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/modular_heteroprio.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/modular_heteroprio_heft.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/modular_parallel_heft.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/modular_parallel_random.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/modular_prio.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/modular_prio_prefetching.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/modular_random.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/modular_random_prefetching.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/modular_ws.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/parallel_eager.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/parallel_heft.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/prio_deque.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/random_policy.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/scheduler_maker.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/work_stealing_policy.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@util/$(DEPDIR)/execute_on_all.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@util/$(DEPDIR)/file.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@util/$(DEPDIR)/fstarpu.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@util/$(DEPDIR)/misc.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@util/$(DEPDIR)/openmp_runtime_support.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@util/$(DEPDIR)/openmp_runtime_support_environment.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@util/$(DEPDIR)/openmp_runtime_support_omp_api.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@util/$(DEPDIR)/starpu_create_sync_task.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@util/$(DEPDIR)/starpu_data_cpy.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@util/$(DEPDIR)/starpu_task_insert.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@util/$(DEPDIR)/starpu_task_insert_utils.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@worker_collection/$(DEPDIR)/worker_list.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@worker_collection/$(DEPDIR)/worker_tree.Plo@am__quote@ # am--include-marker + +$(am__depfiles_remade): + @$(MKDIR_P) $(@D) + @echo '# dummy' >$@-t && $(am__mv) $@-t $@ + +am--depfiles: $(am__depfiles_remade) + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ +@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +.cpp.o: +@am__fastdepCXX_TRUE@ $(AM_V_CXX)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ +@am__fastdepCXX_TRUE@ $(CXXCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCXX_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXXCOMPILE) -c -o $@ $< + +.cpp.obj: +@am__fastdepCXX_TRUE@ $(AM_V_CXX)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ +@am__fastdepCXX_TRUE@ $(CXXCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ +@am__fastdepCXX_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXXCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.cpp.lo: +@am__fastdepCXX_TRUE@ $(AM_V_CXX)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ +@am__fastdepCXX_TRUE@ $(LTCXXCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCXX_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(LTCXXCOMPILE) -c -o $@ $< + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + -rm -rf common/.libs common/_libs + -rm -rf core/.libs core/_libs + -rm -rf core/dependencies/.libs core/dependencies/_libs + -rm -rf core/disk_ops/.libs core/disk_ops/_libs + -rm -rf core/disk_ops/unistd/.libs core/disk_ops/unistd/_libs + -rm -rf core/perfmodel/.libs core/perfmodel/_libs + -rm -rf datawizard/.libs datawizard/_libs + -rm -rf datawizard/interfaces/.libs datawizard/interfaces/_libs + -rm -rf debug/.libs debug/_libs + -rm -rf debug/traces/.libs debug/traces/_libs + -rm -rf drivers/cpu/.libs drivers/cpu/_libs + -rm -rf drivers/cuda/.libs drivers/cuda/_libs + -rm -rf drivers/disk/.libs drivers/disk/_libs + -rm -rf drivers/driver_common/.libs drivers/driver_common/_libs + -rm -rf drivers/hip/.libs drivers/hip/_libs + -rm -rf drivers/max/.libs drivers/max/_libs + -rm -rf drivers/mp_common/.libs drivers/mp_common/_libs + -rm -rf drivers/mpi/.libs drivers/mpi/_libs + -rm -rf drivers/opencl/.libs drivers/opencl/_libs + -rm -rf drivers/tcpip/.libs drivers/tcpip/_libs + -rm -rf parallel_worker/.libs parallel_worker/_libs + -rm -rf profiling/.libs profiling/_libs + -rm -rf sched_policies/.libs sched_policies/_libs + -rm -rf util/.libs util/_libs + -rm -rf worker_collection/.libs worker_collection/_libs +install-xmlDATA: $(xml_DATA) + @$(NORMAL_INSTALL) + @list='$(xml_DATA)'; test -n "$(xmldir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(xmldir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(xmldir)" || exit 1; \ + fi; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; \ + done | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(xmldir)'"; \ + $(INSTALL_DATA) $$files "$(DESTDIR)$(xmldir)" || exit $$?; \ + done + +uninstall-xmlDATA: + @$(NORMAL_UNINSTALL) + @list='$(xml_DATA)'; test -n "$(xmldir)" || list=; \ + files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ + dir='$(DESTDIR)$(xmldir)'; $(am__uninstall_files_from_dir) + +# This directory's subdirectories are mostly independent; you can cd +# into them and run 'make' without going through this Makefile. +# To change the values of 'make' variables: instead of editing Makefiles, +# (1) if the variable is set in 'config.status', edit 'config.status' +# (which will cause the Makefiles to be regenerated when you run 'make'); +# (2) otherwise, pass the desired values on the 'make' command line. +$(am__recursive_targets): + @fail=; \ + if $(am__make_keepgoing); then \ + failcom='fail=yes'; \ + else \ + failcom='exit 1'; \ + fi; \ + dot_seen=no; \ + target=`echo $@ | sed s/-recursive//`; \ + case "$@" in \ + distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ + *) list='$(SUBDIRS)' ;; \ + esac; \ + for subdir in $$list; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + dot_seen=yes; \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || eval $$failcom; \ + done; \ + if test "$$dot_seen" = "no"; then \ + $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ + fi; test -z "$$fail" + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-recursive +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ + include_option=--etags-include; \ + empty_fix=.; \ + else \ + include_option=--include; \ + empty_fix=; \ + fi; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + test ! -f $$subdir/TAGS || \ + set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ + fi; \ + done; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-recursive + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-recursive + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done + @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + $(am__make_dryrun) \ + || test -d "$(distdir)/$$subdir" \ + || $(MKDIR_P) "$(distdir)/$$subdir" \ + || exit 1; \ + dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ + $(am__relativize); \ + new_distdir=$$reldir; \ + dir1=$$subdir; dir2="$(top_distdir)"; \ + $(am__relativize); \ + new_top_distdir=$$reldir; \ + echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ + echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ + ($(am__cd) $$subdir && \ + $(MAKE) $(AM_MAKEFLAGS) \ + top_distdir="$$new_top_distdir" \ + distdir="$$new_distdir" \ + am__remove_distdir=: \ + am__skip_length_check=: \ + am__skip_mode_fix=: \ + distdir) \ + || exit 1; \ + fi; \ + done + $(MAKE) $(AM_MAKEFLAGS) \ + top_distdir="$(top_distdir)" distdir="$(distdir)" \ + dist-hook +check-am: all-am +check: check-recursive +@STARPU_HAVE_MS_LIB_FALSE@all-local: +@STARPU_HAVE_WINDOWS_FALSE@all-local: +all-am: Makefile $(LTLIBRARIES) $(DATA) $(HEADERS) all-local +installdirs: installdirs-recursive +installdirs-am: + for dir in "$(DESTDIR)$(libdir)" "$(DESTDIR)$(xmldir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-recursive +install-exec: install-exec-recursive +install-data: install-data-recursive +uninstall: uninstall-recursive + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-recursive +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + -rm -f common/$(DEPDIR)/$(am__dirstamp) + -rm -f common/$(am__dirstamp) + -rm -f core/$(DEPDIR)/$(am__dirstamp) + -rm -f core/$(am__dirstamp) + -rm -f core/dependencies/$(DEPDIR)/$(am__dirstamp) + -rm -f core/dependencies/$(am__dirstamp) + -rm -f core/disk_ops/$(DEPDIR)/$(am__dirstamp) + -rm -f core/disk_ops/$(am__dirstamp) + -rm -f core/disk_ops/unistd/$(DEPDIR)/$(am__dirstamp) + -rm -f core/disk_ops/unistd/$(am__dirstamp) + -rm -f core/perfmodel/$(DEPDIR)/$(am__dirstamp) + -rm -f core/perfmodel/$(am__dirstamp) + -rm -f datawizard/$(DEPDIR)/$(am__dirstamp) + -rm -f datawizard/$(am__dirstamp) + -rm -f datawizard/interfaces/$(DEPDIR)/$(am__dirstamp) + -rm -f datawizard/interfaces/$(am__dirstamp) + -rm -f debug/$(DEPDIR)/$(am__dirstamp) + -rm -f debug/$(am__dirstamp) + -rm -f debug/traces/$(DEPDIR)/$(am__dirstamp) + -rm -f debug/traces/$(am__dirstamp) + -rm -f drivers/cpu/$(DEPDIR)/$(am__dirstamp) + -rm -f drivers/cpu/$(am__dirstamp) + -rm -f drivers/cuda/$(DEPDIR)/$(am__dirstamp) + -rm -f drivers/cuda/$(am__dirstamp) + -rm -f drivers/disk/$(DEPDIR)/$(am__dirstamp) + -rm -f drivers/disk/$(am__dirstamp) + -rm -f drivers/driver_common/$(DEPDIR)/$(am__dirstamp) + -rm -f drivers/driver_common/$(am__dirstamp) + -rm -f drivers/hip/$(DEPDIR)/$(am__dirstamp) + -rm -f drivers/hip/$(am__dirstamp) + -rm -f drivers/max/$(DEPDIR)/$(am__dirstamp) + -rm -f drivers/max/$(am__dirstamp) + -rm -f drivers/mp_common/$(DEPDIR)/$(am__dirstamp) + -rm -f drivers/mp_common/$(am__dirstamp) + -rm -f drivers/mpi/$(DEPDIR)/$(am__dirstamp) + -rm -f drivers/mpi/$(am__dirstamp) + -rm -f drivers/opencl/$(DEPDIR)/$(am__dirstamp) + -rm -f drivers/opencl/$(am__dirstamp) + -rm -f drivers/tcpip/$(DEPDIR)/$(am__dirstamp) + -rm -f drivers/tcpip/$(am__dirstamp) + -rm -f parallel_worker/$(DEPDIR)/$(am__dirstamp) + -rm -f parallel_worker/$(am__dirstamp) + -rm -f profiling/$(DEPDIR)/$(am__dirstamp) + -rm -f profiling/$(am__dirstamp) + -rm -f sched_policies/$(DEPDIR)/$(am__dirstamp) + -rm -f sched_policies/$(am__dirstamp) + -rm -f util/$(DEPDIR)/$(am__dirstamp) + -rm -f util/$(am__dirstamp) + -rm -f worker_collection/$(DEPDIR)/$(am__dirstamp) + -rm -f worker_collection/$(am__dirstamp) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +@STARPU_HAVE_WINDOWS_FALSE@install-exec-hook: +clean: clean-recursive + +clean-am: clean-generic clean-libLTLIBRARIES clean-libtool \ + mostlyclean-am + +distclean: distclean-recursive + -rm -f common/$(DEPDIR)/barrier.Plo + -rm -f common/$(DEPDIR)/barrier_counter.Plo + -rm -f common/$(DEPDIR)/fxt.Plo + -rm -f common/$(DEPDIR)/graph.Plo + -rm -f common/$(DEPDIR)/hash.Plo + -rm -f common/$(DEPDIR)/inlines.Plo + -rm -f common/$(DEPDIR)/knobs.Plo + -rm -f common/$(DEPDIR)/rbtree.Plo + -rm -f common/$(DEPDIR)/rwlock.Plo + -rm -f common/$(DEPDIR)/starpu_spinlock.Plo + -rm -f common/$(DEPDIR)/thread.Plo + -rm -f common/$(DEPDIR)/timing.Plo + -rm -f common/$(DEPDIR)/utils.Plo + -rm -f core/$(DEPDIR)/combined_workers.Plo + -rm -f core/$(DEPDIR)/debug.Plo + -rm -f core/$(DEPDIR)/detect_combined_workers.Plo + -rm -f core/$(DEPDIR)/devices.Plo + -rm -f core/$(DEPDIR)/disk.Plo + -rm -f core/$(DEPDIR)/drivers.Plo + -rm -f core/$(DEPDIR)/errorcheck.Plo + -rm -f core/$(DEPDIR)/idle_hook.Plo + -rm -f core/$(DEPDIR)/jobs.Plo + -rm -f core/$(DEPDIR)/parallel_task.Plo + -rm -f core/$(DEPDIR)/progress_hook.Plo + -rm -f core/$(DEPDIR)/sched_ctx.Plo + -rm -f core/$(DEPDIR)/sched_ctx_list.Plo + -rm -f core/$(DEPDIR)/sched_policy.Plo + -rm -f core/$(DEPDIR)/simgrid.Plo + -rm -f core/$(DEPDIR)/simgrid_cpp.Plo + -rm -f core/$(DEPDIR)/task.Plo + -rm -f core/$(DEPDIR)/task_bundle.Plo + -rm -f core/$(DEPDIR)/topology.Plo + -rm -f core/$(DEPDIR)/tree.Plo + -rm -f core/$(DEPDIR)/workers.Plo + -rm -f core/dependencies/$(DEPDIR)/cg.Plo + -rm -f core/dependencies/$(DEPDIR)/data_arbiter_concurrency.Plo + -rm -f core/dependencies/$(DEPDIR)/data_concurrency.Plo + -rm -f core/dependencies/$(DEPDIR)/dependencies.Plo + -rm -f core/dependencies/$(DEPDIR)/implicit_data_deps.Plo + -rm -f core/dependencies/$(DEPDIR)/tags.Plo + -rm -f core/dependencies/$(DEPDIR)/task_deps.Plo + -rm -f core/disk_ops/$(DEPDIR)/disk_hdf5.Plo + -rm -f core/disk_ops/$(DEPDIR)/disk_leveldb.Plo + -rm -f core/disk_ops/$(DEPDIR)/disk_stdio.Plo + -rm -f core/disk_ops/$(DEPDIR)/disk_unistd.Plo + -rm -f core/disk_ops/$(DEPDIR)/disk_unistd_o_direct.Plo + -rm -f core/disk_ops/unistd/$(DEPDIR)/disk_unistd_global.Plo + -rm -f core/perfmodel/$(DEPDIR)/energy_model.Plo + -rm -f core/perfmodel/$(DEPDIR)/multiple_regression.Plo + -rm -f core/perfmodel/$(DEPDIR)/perfmodel.Plo + -rm -f core/perfmodel/$(DEPDIR)/perfmodel_bus.Plo + -rm -f core/perfmodel/$(DEPDIR)/perfmodel_history.Plo + -rm -f core/perfmodel/$(DEPDIR)/perfmodel_nan.Plo + -rm -f core/perfmodel/$(DEPDIR)/perfmodel_print.Plo + -rm -f core/perfmodel/$(DEPDIR)/regression.Plo + -rm -f datawizard/$(DEPDIR)/coherency.Plo + -rm -f datawizard/$(DEPDIR)/copy_driver.Plo + -rm -f datawizard/$(DEPDIR)/data_request.Plo + -rm -f datawizard/$(DEPDIR)/datastats.Plo + -rm -f datawizard/$(DEPDIR)/datawizard.Plo + -rm -f datawizard/$(DEPDIR)/filters.Plo + -rm -f datawizard/$(DEPDIR)/footprint.Plo + -rm -f datawizard/$(DEPDIR)/malloc.Plo + -rm -f datawizard/$(DEPDIR)/memalloc.Plo + -rm -f datawizard/$(DEPDIR)/memory_manager.Plo + -rm -f datawizard/$(DEPDIR)/memory_nodes.Plo + -rm -f datawizard/$(DEPDIR)/memstats.Plo + -rm -f datawizard/$(DEPDIR)/node_ops.Plo + -rm -f datawizard/$(DEPDIR)/reduction.Plo + -rm -f datawizard/$(DEPDIR)/sort_data_handles.Plo + -rm -f datawizard/$(DEPDIR)/user_interactions.Plo + -rm -f datawizard/$(DEPDIR)/write_back.Plo + -rm -f datawizard/interfaces/$(DEPDIR)/bcsr_filters.Plo + -rm -f datawizard/interfaces/$(DEPDIR)/bcsr_interface.Plo + -rm -f datawizard/interfaces/$(DEPDIR)/block_filters.Plo + -rm -f datawizard/interfaces/$(DEPDIR)/block_interface.Plo + -rm -f datawizard/interfaces/$(DEPDIR)/coo_interface.Plo + -rm -f datawizard/interfaces/$(DEPDIR)/csr_filters.Plo + -rm -f datawizard/interfaces/$(DEPDIR)/csr_interface.Plo + -rm -f datawizard/interfaces/$(DEPDIR)/data_interface.Plo + -rm -f datawizard/interfaces/$(DEPDIR)/matrix_filters.Plo + -rm -f datawizard/interfaces/$(DEPDIR)/matrix_interface.Plo + -rm -f datawizard/interfaces/$(DEPDIR)/multiformat_interface.Plo + -rm -f datawizard/interfaces/$(DEPDIR)/ndim_filters.Plo + -rm -f datawizard/interfaces/$(DEPDIR)/ndim_interface.Plo + -rm -f datawizard/interfaces/$(DEPDIR)/tensor_filters.Plo + -rm -f datawizard/interfaces/$(DEPDIR)/tensor_interface.Plo + -rm -f datawizard/interfaces/$(DEPDIR)/variable_interface.Plo + -rm -f datawizard/interfaces/$(DEPDIR)/vector_filters.Plo + -rm -f datawizard/interfaces/$(DEPDIR)/vector_interface.Plo + -rm -f datawizard/interfaces/$(DEPDIR)/void_interface.Plo + -rm -f debug/$(DEPDIR)/latency.Plo + -rm -f debug/$(DEPDIR)/structures_size.Plo + -rm -f debug/traces/$(DEPDIR)/anim.Plo + -rm -f debug/traces/$(DEPDIR)/starpu_fxt.Plo + -rm -f debug/traces/$(DEPDIR)/starpu_fxt_dag.Plo + -rm -f debug/traces/$(DEPDIR)/starpu_fxt_mpi.Plo + -rm -f debug/traces/$(DEPDIR)/starpu_paje.Plo + -rm -f drivers/cpu/$(DEPDIR)/driver_cpu.Plo + -rm -f drivers/cuda/$(DEPDIR)/driver_cuda.Plo + -rm -f drivers/cuda/$(DEPDIR)/driver_cuda0.Plo + -rm -f drivers/cuda/$(DEPDIR)/driver_cuda1.Plo + -rm -f drivers/cuda/$(DEPDIR)/driver_cuda_init.Plo + -rm -f drivers/cuda/$(DEPDIR)/starpu_cublas.Plo + -rm -f drivers/cuda/$(DEPDIR)/starpu_cublasLt.Plo + -rm -f drivers/cuda/$(DEPDIR)/starpu_cublas_v2.Plo + -rm -f drivers/cuda/$(DEPDIR)/starpu_cusolver.Plo + -rm -f drivers/cuda/$(DEPDIR)/starpu_cusparse.Plo + -rm -f drivers/disk/$(DEPDIR)/driver_disk.Plo + -rm -f drivers/driver_common/$(DEPDIR)/driver_common.Plo + -rm -f drivers/hip/$(DEPDIR)/driver_hip.Plo + -rm -f drivers/hip/$(DEPDIR)/driver_hip_init.Plo + -rm -f drivers/hip/$(DEPDIR)/starpu_hipblas.Plo + -rm -f drivers/max/$(DEPDIR)/driver_max_fpga.Plo + -rm -f drivers/max/$(DEPDIR)/driver_max_fpga_init.Plo + -rm -f drivers/mp_common/$(DEPDIR)/mp_common.Plo + -rm -f drivers/mp_common/$(DEPDIR)/sink_common.Plo + -rm -f drivers/mp_common/$(DEPDIR)/source_common.Plo + -rm -f drivers/mpi/$(DEPDIR)/driver_mpi_common.Plo + -rm -f drivers/mpi/$(DEPDIR)/driver_mpi_init.Plo + -rm -f drivers/mpi/$(DEPDIR)/driver_mpi_sink.Plo + -rm -f drivers/mpi/$(DEPDIR)/driver_mpi_source.Plo + -rm -f drivers/opencl/$(DEPDIR)/driver_opencl.Plo + -rm -f drivers/opencl/$(DEPDIR)/driver_opencl_init.Plo + -rm -f drivers/opencl/$(DEPDIR)/driver_opencl_utils.Plo + -rm -f drivers/tcpip/$(DEPDIR)/driver_tcpip_common.Plo + -rm -f drivers/tcpip/$(DEPDIR)/driver_tcpip_init.Plo + -rm -f drivers/tcpip/$(DEPDIR)/driver_tcpip_sink.Plo + -rm -f drivers/tcpip/$(DEPDIR)/driver_tcpip_source.Plo + -rm -f parallel_worker/$(DEPDIR)/starpu_parallel_worker_create.Plo + -rm -f profiling/$(DEPDIR)/bound.Plo + -rm -f profiling/$(DEPDIR)/callbacks.Plo + -rm -f profiling/$(DEPDIR)/profiling.Plo + -rm -f profiling/$(DEPDIR)/profiling_helpers.Plo + -rm -f sched_policies/$(DEPDIR)/component_best_implementation.Plo + -rm -f sched_policies/$(DEPDIR)/component_composed.Plo + -rm -f sched_policies/$(DEPDIR)/component_eager.Plo + -rm -f sched_policies/$(DEPDIR)/component_eager_calibration.Plo + -rm -f sched_policies/$(DEPDIR)/component_eager_prio.Plo + -rm -f sched_policies/$(DEPDIR)/component_fifo.Plo + -rm -f sched_policies/$(DEPDIR)/component_heft.Plo + -rm -f sched_policies/$(DEPDIR)/component_heteroprio.Plo + -rm -f sched_policies/$(DEPDIR)/component_mct.Plo + -rm -f sched_policies/$(DEPDIR)/component_perfmodel_select.Plo + -rm -f sched_policies/$(DEPDIR)/component_prio.Plo + -rm -f sched_policies/$(DEPDIR)/component_random.Plo + -rm -f sched_policies/$(DEPDIR)/component_sched.Plo + -rm -f sched_policies/$(DEPDIR)/component_stage.Plo + -rm -f sched_policies/$(DEPDIR)/component_userchoice.Plo + -rm -f sched_policies/$(DEPDIR)/component_work_stealing.Plo + -rm -f sched_policies/$(DEPDIR)/component_worker.Plo + -rm -f sched_policies/$(DEPDIR)/deque_modeling_policy_data_aware.Plo + -rm -f sched_policies/$(DEPDIR)/eager_central_policy.Plo + -rm -f sched_policies/$(DEPDIR)/eager_central_priority_policy.Plo + -rm -f sched_policies/$(DEPDIR)/fifo_queues.Plo + -rm -f sched_policies/$(DEPDIR)/graph_test_policy.Plo + -rm -f sched_policies/$(DEPDIR)/helper_mct.Plo + -rm -f sched_policies/$(DEPDIR)/heteroprio.Plo + -rm -f sched_policies/$(DEPDIR)/hierarchical_heft.Plo + -rm -f sched_policies/$(DEPDIR)/modular_eager.Plo + -rm -f sched_policies/$(DEPDIR)/modular_eager_prefetching.Plo + -rm -f sched_policies/$(DEPDIR)/modular_eager_prio.Plo + -rm -f sched_policies/$(DEPDIR)/modular_ez.Plo + -rm -f sched_policies/$(DEPDIR)/modular_gemm.Plo + -rm -f sched_policies/$(DEPDIR)/modular_heft.Plo + -rm -f sched_policies/$(DEPDIR)/modular_heft2.Plo + -rm -f sched_policies/$(DEPDIR)/modular_heft_prio.Plo + -rm -f sched_policies/$(DEPDIR)/modular_heteroprio.Plo + -rm -f sched_policies/$(DEPDIR)/modular_heteroprio_heft.Plo + -rm -f sched_policies/$(DEPDIR)/modular_parallel_heft.Plo + -rm -f sched_policies/$(DEPDIR)/modular_parallel_random.Plo + -rm -f sched_policies/$(DEPDIR)/modular_prio.Plo + -rm -f sched_policies/$(DEPDIR)/modular_prio_prefetching.Plo + -rm -f sched_policies/$(DEPDIR)/modular_random.Plo + -rm -f sched_policies/$(DEPDIR)/modular_random_prefetching.Plo + -rm -f sched_policies/$(DEPDIR)/modular_ws.Plo + -rm -f sched_policies/$(DEPDIR)/parallel_eager.Plo + -rm -f sched_policies/$(DEPDIR)/parallel_heft.Plo + -rm -f sched_policies/$(DEPDIR)/prio_deque.Plo + -rm -f sched_policies/$(DEPDIR)/random_policy.Plo + -rm -f sched_policies/$(DEPDIR)/scheduler_maker.Plo + -rm -f sched_policies/$(DEPDIR)/work_stealing_policy.Plo + -rm -f util/$(DEPDIR)/execute_on_all.Plo + -rm -f util/$(DEPDIR)/file.Plo + -rm -f util/$(DEPDIR)/fstarpu.Plo + -rm -f util/$(DEPDIR)/misc.Plo + -rm -f util/$(DEPDIR)/openmp_runtime_support.Plo + -rm -f util/$(DEPDIR)/openmp_runtime_support_environment.Plo + -rm -f util/$(DEPDIR)/openmp_runtime_support_omp_api.Plo + -rm -f util/$(DEPDIR)/starpu_create_sync_task.Plo + -rm -f util/$(DEPDIR)/starpu_data_cpy.Plo + -rm -f util/$(DEPDIR)/starpu_task_insert.Plo + -rm -f util/$(DEPDIR)/starpu_task_insert_utils.Plo + -rm -f worker_collection/$(DEPDIR)/worker_list.Plo + -rm -f worker_collection/$(DEPDIR)/worker_tree.Plo + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-recursive + +dvi-am: + +html: html-recursive + +html-am: + +info: info-recursive + +info-am: + +install-data-am: install-xmlDATA + +install-dvi: install-dvi-recursive + +install-dvi-am: + +install-exec-am: install-libLTLIBRARIES + @$(NORMAL_INSTALL) + $(MAKE) $(AM_MAKEFLAGS) install-exec-hook +install-html: install-html-recursive + +install-html-am: + +install-info: install-info-recursive + +install-info-am: + +install-man: + +install-pdf: install-pdf-recursive + +install-pdf-am: + +install-ps: install-ps-recursive + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-recursive + -rm -f common/$(DEPDIR)/barrier.Plo + -rm -f common/$(DEPDIR)/barrier_counter.Plo + -rm -f common/$(DEPDIR)/fxt.Plo + -rm -f common/$(DEPDIR)/graph.Plo + -rm -f common/$(DEPDIR)/hash.Plo + -rm -f common/$(DEPDIR)/inlines.Plo + -rm -f common/$(DEPDIR)/knobs.Plo + -rm -f common/$(DEPDIR)/rbtree.Plo + -rm -f common/$(DEPDIR)/rwlock.Plo + -rm -f common/$(DEPDIR)/starpu_spinlock.Plo + -rm -f common/$(DEPDIR)/thread.Plo + -rm -f common/$(DEPDIR)/timing.Plo + -rm -f common/$(DEPDIR)/utils.Plo + -rm -f core/$(DEPDIR)/combined_workers.Plo + -rm -f core/$(DEPDIR)/debug.Plo + -rm -f core/$(DEPDIR)/detect_combined_workers.Plo + -rm -f core/$(DEPDIR)/devices.Plo + -rm -f core/$(DEPDIR)/disk.Plo + -rm -f core/$(DEPDIR)/drivers.Plo + -rm -f core/$(DEPDIR)/errorcheck.Plo + -rm -f core/$(DEPDIR)/idle_hook.Plo + -rm -f core/$(DEPDIR)/jobs.Plo + -rm -f core/$(DEPDIR)/parallel_task.Plo + -rm -f core/$(DEPDIR)/progress_hook.Plo + -rm -f core/$(DEPDIR)/sched_ctx.Plo + -rm -f core/$(DEPDIR)/sched_ctx_list.Plo + -rm -f core/$(DEPDIR)/sched_policy.Plo + -rm -f core/$(DEPDIR)/simgrid.Plo + -rm -f core/$(DEPDIR)/simgrid_cpp.Plo + -rm -f core/$(DEPDIR)/task.Plo + -rm -f core/$(DEPDIR)/task_bundle.Plo + -rm -f core/$(DEPDIR)/topology.Plo + -rm -f core/$(DEPDIR)/tree.Plo + -rm -f core/$(DEPDIR)/workers.Plo + -rm -f core/dependencies/$(DEPDIR)/cg.Plo + -rm -f core/dependencies/$(DEPDIR)/data_arbiter_concurrency.Plo + -rm -f core/dependencies/$(DEPDIR)/data_concurrency.Plo + -rm -f core/dependencies/$(DEPDIR)/dependencies.Plo + -rm -f core/dependencies/$(DEPDIR)/implicit_data_deps.Plo + -rm -f core/dependencies/$(DEPDIR)/tags.Plo + -rm -f core/dependencies/$(DEPDIR)/task_deps.Plo + -rm -f core/disk_ops/$(DEPDIR)/disk_hdf5.Plo + -rm -f core/disk_ops/$(DEPDIR)/disk_leveldb.Plo + -rm -f core/disk_ops/$(DEPDIR)/disk_stdio.Plo + -rm -f core/disk_ops/$(DEPDIR)/disk_unistd.Plo + -rm -f core/disk_ops/$(DEPDIR)/disk_unistd_o_direct.Plo + -rm -f core/disk_ops/unistd/$(DEPDIR)/disk_unistd_global.Plo + -rm -f core/perfmodel/$(DEPDIR)/energy_model.Plo + -rm -f core/perfmodel/$(DEPDIR)/multiple_regression.Plo + -rm -f core/perfmodel/$(DEPDIR)/perfmodel.Plo + -rm -f core/perfmodel/$(DEPDIR)/perfmodel_bus.Plo + -rm -f core/perfmodel/$(DEPDIR)/perfmodel_history.Plo + -rm -f core/perfmodel/$(DEPDIR)/perfmodel_nan.Plo + -rm -f core/perfmodel/$(DEPDIR)/perfmodel_print.Plo + -rm -f core/perfmodel/$(DEPDIR)/regression.Plo + -rm -f datawizard/$(DEPDIR)/coherency.Plo + -rm -f datawizard/$(DEPDIR)/copy_driver.Plo + -rm -f datawizard/$(DEPDIR)/data_request.Plo + -rm -f datawizard/$(DEPDIR)/datastats.Plo + -rm -f datawizard/$(DEPDIR)/datawizard.Plo + -rm -f datawizard/$(DEPDIR)/filters.Plo + -rm -f datawizard/$(DEPDIR)/footprint.Plo + -rm -f datawizard/$(DEPDIR)/malloc.Plo + -rm -f datawizard/$(DEPDIR)/memalloc.Plo + -rm -f datawizard/$(DEPDIR)/memory_manager.Plo + -rm -f datawizard/$(DEPDIR)/memory_nodes.Plo + -rm -f datawizard/$(DEPDIR)/memstats.Plo + -rm -f datawizard/$(DEPDIR)/node_ops.Plo + -rm -f datawizard/$(DEPDIR)/reduction.Plo + -rm -f datawizard/$(DEPDIR)/sort_data_handles.Plo + -rm -f datawizard/$(DEPDIR)/user_interactions.Plo + -rm -f datawizard/$(DEPDIR)/write_back.Plo + -rm -f datawizard/interfaces/$(DEPDIR)/bcsr_filters.Plo + -rm -f datawizard/interfaces/$(DEPDIR)/bcsr_interface.Plo + -rm -f datawizard/interfaces/$(DEPDIR)/block_filters.Plo + -rm -f datawizard/interfaces/$(DEPDIR)/block_interface.Plo + -rm -f datawizard/interfaces/$(DEPDIR)/coo_interface.Plo + -rm -f datawizard/interfaces/$(DEPDIR)/csr_filters.Plo + -rm -f datawizard/interfaces/$(DEPDIR)/csr_interface.Plo + -rm -f datawizard/interfaces/$(DEPDIR)/data_interface.Plo + -rm -f datawizard/interfaces/$(DEPDIR)/matrix_filters.Plo + -rm -f datawizard/interfaces/$(DEPDIR)/matrix_interface.Plo + -rm -f datawizard/interfaces/$(DEPDIR)/multiformat_interface.Plo + -rm -f datawizard/interfaces/$(DEPDIR)/ndim_filters.Plo + -rm -f datawizard/interfaces/$(DEPDIR)/ndim_interface.Plo + -rm -f datawizard/interfaces/$(DEPDIR)/tensor_filters.Plo + -rm -f datawizard/interfaces/$(DEPDIR)/tensor_interface.Plo + -rm -f datawizard/interfaces/$(DEPDIR)/variable_interface.Plo + -rm -f datawizard/interfaces/$(DEPDIR)/vector_filters.Plo + -rm -f datawizard/interfaces/$(DEPDIR)/vector_interface.Plo + -rm -f datawizard/interfaces/$(DEPDIR)/void_interface.Plo + -rm -f debug/$(DEPDIR)/latency.Plo + -rm -f debug/$(DEPDIR)/structures_size.Plo + -rm -f debug/traces/$(DEPDIR)/anim.Plo + -rm -f debug/traces/$(DEPDIR)/starpu_fxt.Plo + -rm -f debug/traces/$(DEPDIR)/starpu_fxt_dag.Plo + -rm -f debug/traces/$(DEPDIR)/starpu_fxt_mpi.Plo + -rm -f debug/traces/$(DEPDIR)/starpu_paje.Plo + -rm -f drivers/cpu/$(DEPDIR)/driver_cpu.Plo + -rm -f drivers/cuda/$(DEPDIR)/driver_cuda.Plo + -rm -f drivers/cuda/$(DEPDIR)/driver_cuda0.Plo + -rm -f drivers/cuda/$(DEPDIR)/driver_cuda1.Plo + -rm -f drivers/cuda/$(DEPDIR)/driver_cuda_init.Plo + -rm -f drivers/cuda/$(DEPDIR)/starpu_cublas.Plo + -rm -f drivers/cuda/$(DEPDIR)/starpu_cublasLt.Plo + -rm -f drivers/cuda/$(DEPDIR)/starpu_cublas_v2.Plo + -rm -f drivers/cuda/$(DEPDIR)/starpu_cusolver.Plo + -rm -f drivers/cuda/$(DEPDIR)/starpu_cusparse.Plo + -rm -f drivers/disk/$(DEPDIR)/driver_disk.Plo + -rm -f drivers/driver_common/$(DEPDIR)/driver_common.Plo + -rm -f drivers/hip/$(DEPDIR)/driver_hip.Plo + -rm -f drivers/hip/$(DEPDIR)/driver_hip_init.Plo + -rm -f drivers/hip/$(DEPDIR)/starpu_hipblas.Plo + -rm -f drivers/max/$(DEPDIR)/driver_max_fpga.Plo + -rm -f drivers/max/$(DEPDIR)/driver_max_fpga_init.Plo + -rm -f drivers/mp_common/$(DEPDIR)/mp_common.Plo + -rm -f drivers/mp_common/$(DEPDIR)/sink_common.Plo + -rm -f drivers/mp_common/$(DEPDIR)/source_common.Plo + -rm -f drivers/mpi/$(DEPDIR)/driver_mpi_common.Plo + -rm -f drivers/mpi/$(DEPDIR)/driver_mpi_init.Plo + -rm -f drivers/mpi/$(DEPDIR)/driver_mpi_sink.Plo + -rm -f drivers/mpi/$(DEPDIR)/driver_mpi_source.Plo + -rm -f drivers/opencl/$(DEPDIR)/driver_opencl.Plo + -rm -f drivers/opencl/$(DEPDIR)/driver_opencl_init.Plo + -rm -f drivers/opencl/$(DEPDIR)/driver_opencl_utils.Plo + -rm -f drivers/tcpip/$(DEPDIR)/driver_tcpip_common.Plo + -rm -f drivers/tcpip/$(DEPDIR)/driver_tcpip_init.Plo + -rm -f drivers/tcpip/$(DEPDIR)/driver_tcpip_sink.Plo + -rm -f drivers/tcpip/$(DEPDIR)/driver_tcpip_source.Plo + -rm -f parallel_worker/$(DEPDIR)/starpu_parallel_worker_create.Plo + -rm -f profiling/$(DEPDIR)/bound.Plo + -rm -f profiling/$(DEPDIR)/callbacks.Plo + -rm -f profiling/$(DEPDIR)/profiling.Plo + -rm -f profiling/$(DEPDIR)/profiling_helpers.Plo + -rm -f sched_policies/$(DEPDIR)/component_best_implementation.Plo + -rm -f sched_policies/$(DEPDIR)/component_composed.Plo + -rm -f sched_policies/$(DEPDIR)/component_eager.Plo + -rm -f sched_policies/$(DEPDIR)/component_eager_calibration.Plo + -rm -f sched_policies/$(DEPDIR)/component_eager_prio.Plo + -rm -f sched_policies/$(DEPDIR)/component_fifo.Plo + -rm -f sched_policies/$(DEPDIR)/component_heft.Plo + -rm -f sched_policies/$(DEPDIR)/component_heteroprio.Plo + -rm -f sched_policies/$(DEPDIR)/component_mct.Plo + -rm -f sched_policies/$(DEPDIR)/component_perfmodel_select.Plo + -rm -f sched_policies/$(DEPDIR)/component_prio.Plo + -rm -f sched_policies/$(DEPDIR)/component_random.Plo + -rm -f sched_policies/$(DEPDIR)/component_sched.Plo + -rm -f sched_policies/$(DEPDIR)/component_stage.Plo + -rm -f sched_policies/$(DEPDIR)/component_userchoice.Plo + -rm -f sched_policies/$(DEPDIR)/component_work_stealing.Plo + -rm -f sched_policies/$(DEPDIR)/component_worker.Plo + -rm -f sched_policies/$(DEPDIR)/deque_modeling_policy_data_aware.Plo + -rm -f sched_policies/$(DEPDIR)/eager_central_policy.Plo + -rm -f sched_policies/$(DEPDIR)/eager_central_priority_policy.Plo + -rm -f sched_policies/$(DEPDIR)/fifo_queues.Plo + -rm -f sched_policies/$(DEPDIR)/graph_test_policy.Plo + -rm -f sched_policies/$(DEPDIR)/helper_mct.Plo + -rm -f sched_policies/$(DEPDIR)/heteroprio.Plo + -rm -f sched_policies/$(DEPDIR)/hierarchical_heft.Plo + -rm -f sched_policies/$(DEPDIR)/modular_eager.Plo + -rm -f sched_policies/$(DEPDIR)/modular_eager_prefetching.Plo + -rm -f sched_policies/$(DEPDIR)/modular_eager_prio.Plo + -rm -f sched_policies/$(DEPDIR)/modular_ez.Plo + -rm -f sched_policies/$(DEPDIR)/modular_gemm.Plo + -rm -f sched_policies/$(DEPDIR)/modular_heft.Plo + -rm -f sched_policies/$(DEPDIR)/modular_heft2.Plo + -rm -f sched_policies/$(DEPDIR)/modular_heft_prio.Plo + -rm -f sched_policies/$(DEPDIR)/modular_heteroprio.Plo + -rm -f sched_policies/$(DEPDIR)/modular_heteroprio_heft.Plo + -rm -f sched_policies/$(DEPDIR)/modular_parallel_heft.Plo + -rm -f sched_policies/$(DEPDIR)/modular_parallel_random.Plo + -rm -f sched_policies/$(DEPDIR)/modular_prio.Plo + -rm -f sched_policies/$(DEPDIR)/modular_prio_prefetching.Plo + -rm -f sched_policies/$(DEPDIR)/modular_random.Plo + -rm -f sched_policies/$(DEPDIR)/modular_random_prefetching.Plo + -rm -f sched_policies/$(DEPDIR)/modular_ws.Plo + -rm -f sched_policies/$(DEPDIR)/parallel_eager.Plo + -rm -f sched_policies/$(DEPDIR)/parallel_heft.Plo + -rm -f sched_policies/$(DEPDIR)/prio_deque.Plo + -rm -f sched_policies/$(DEPDIR)/random_policy.Plo + -rm -f sched_policies/$(DEPDIR)/scheduler_maker.Plo + -rm -f sched_policies/$(DEPDIR)/work_stealing_policy.Plo + -rm -f util/$(DEPDIR)/execute_on_all.Plo + -rm -f util/$(DEPDIR)/file.Plo + -rm -f util/$(DEPDIR)/fstarpu.Plo + -rm -f util/$(DEPDIR)/misc.Plo + -rm -f util/$(DEPDIR)/openmp_runtime_support.Plo + -rm -f util/$(DEPDIR)/openmp_runtime_support_environment.Plo + -rm -f util/$(DEPDIR)/openmp_runtime_support_omp_api.Plo + -rm -f util/$(DEPDIR)/starpu_create_sync_task.Plo + -rm -f util/$(DEPDIR)/starpu_data_cpy.Plo + -rm -f util/$(DEPDIR)/starpu_task_insert.Plo + -rm -f util/$(DEPDIR)/starpu_task_insert_utils.Plo + -rm -f worker_collection/$(DEPDIR)/worker_list.Plo + -rm -f worker_collection/$(DEPDIR)/worker_tree.Plo + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-recursive + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-recursive + +pdf-am: + +ps: ps-recursive + +ps-am: + +uninstall-am: uninstall-libLTLIBRARIES uninstall-xmlDATA + +.MAKE: $(am__recursive_targets) install-am install-exec-am \ + install-strip + +.PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am all-local \ + am--depfiles check check-am clean clean-generic \ + clean-libLTLIBRARIES clean-libtool cscopelist-am ctags \ + ctags-am dist-hook distclean distclean-compile \ + distclean-generic distclean-libtool distclean-tags distdir dvi \ + dvi-am html html-am info info-am install install-am \ + install-data install-data-am install-dvi install-dvi-am \ + install-exec install-exec-am install-exec-hook install-html \ + install-html-am install-info install-info-am \ + install-libLTLIBRARIES install-man install-pdf install-pdf-am \ + install-ps install-ps-am install-strip install-xmlDATA \ + installcheck installcheck-am installdirs installdirs-am \ + maintainer-clean maintainer-clean-generic mostlyclean \ + mostlyclean-compile mostlyclean-generic mostlyclean-libtool \ + pdf pdf-am ps ps-am tags tags-am uninstall uninstall-am \ + uninstall-libLTLIBRARIES uninstall-xmlDATA + +.PRECIOUS: Makefile + +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@.cu.o: +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ @$(MKDIR_P) `dirname $@` +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ $(V_mynvcc)grep 'extern *"C" *void *' $< | sed -ne 's/extern *"C" *void *\([a-zA-Z0-9_]*\) *(.*/void \1(void) {}/p' | $(CC) -x c - -o $@ -c + +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.cubin: +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) -cubin $< -o $@ $(NVCCFLAGS) + +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.o: +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) $< -c -o $@ $(NVCCFLAGS) +@STARPU_USE_HIP_TRUE@.hip.o: +@STARPU_USE_HIP_TRUE@ $(V_hipcc) $(HIPCC) $< -c -o $@ $(HIPCCFLAGS) + +recheck: + -cat /dev/null + +showcheckfailed: + @-cat /dev/null + +showfailed: + @-cat /dev/null + +showcheck: + -cat /dev/null + +showsuite: + -cat /dev/null +@STARPU_HAVE_WINDOWS_TRUE@export LC_MESSAGES + +@STARPU_HAVE_MS_LIB_TRUE@@STARPU_HAVE_WINDOWS_TRUE@.libs/libstarpu-@STARPU_EFFECTIVE_VERSION@.lib: libstarpu-@STARPU_EFFECTIVE_VERSION@.la dolib +@STARPU_HAVE_MS_LIB_TRUE@@STARPU_HAVE_WINDOWS_TRUE@ ./dolib "$(STARPU_MS_LIB)" $(STARPU_MS_LIB_ARCH) .libs/libstarpu-@STARPU_EFFECTIVE_VERSION@.def @STARPU_EFFECTIVE_VERSION@ $(libstarpu_so_version) .libs/libstarpu-@STARPU_EFFECTIVE_VERSION@.lib +@STARPU_HAVE_MS_LIB_TRUE@@STARPU_HAVE_WINDOWS_TRUE@all-local: .libs/libstarpu-@STARPU_EFFECTIVE_VERSION@.lib + +@STARPU_HAVE_WINDOWS_TRUE@install-exec-hook: +@STARPU_HAVE_WINDOWS_TRUE@ $(INSTALL) .libs/libstarpu-@STARPU_EFFECTIVE_VERSION@.def $(DESTDIR)$(libdir) +@STARPU_HAVE_MS_LIB_TRUE@@STARPU_HAVE_WINDOWS_TRUE@ $(INSTALL) .libs/libstarpu-@STARPU_EFFECTIVE_VERSION@.lib $(DESTDIR)$(libdir) +@STARPU_HAVE_MS_LIB_TRUE@@STARPU_HAVE_WINDOWS_TRUE@ $(INSTALL) .libs/libstarpu-@STARPU_EFFECTIVE_VERSION@.exp $(DESTDIR)$(libdir) + +######################################### + +# If some external references appear (U), it means the corresponding .c file has +# only included and not the internal src/ header which contains the +# static inline definition +dist-hook: + failed=0 ; \ + look=""; \ + for i in $$( $(GREP) "static inline" $$(find $(srcdir) -name \*.h) | $(SED) -e 's/.*static inline //g' | $(GREP) -v ENAME\#\# | $(SED) -n -e 's/[^(]* \(\|\*\)\([^ (]*\)(.*/\2/' -e 'p;s/^_*//;p' | $(GREP) -v _starpu_spin_init | $(GREP) -v starpu_sched_ctx_worker_is_master_for_child_ctx) ; do \ + if [ -z "$$look" ] ; then \ + look="$$i" ; \ + else \ + look="$$look\|$$i" ; \ + fi ; \ + done ; \ + echo "$$look" ; \ + for j in $(shell find . -name \*.o) ; do \ + nm $$j | $(GREP) -e "U \($$look\)$$" && { echo $$j ; failed=1 ; } ; \ + done ; \ + [ $$failed == 0 ] + nm -n .libs/libstarpu-@STARPU_EFFECTIVE_VERSION@.so | grep -v " [Ua-z] " | grep -v ' W '| grep -ve " _\?_\?_\?f\?starpu" | grep -ve " \(_init\|main\|smpi_simulated_main_\|_fini\|_edata\|__bss_start\|_end\|fut_getstamp\|__gcov_\|mangle_path\)" | (! grep .) + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/src/common/barrier.c b/src/common/barrier.c new file mode 100644 index 0000000..319aef7 --- /dev/null +++ b/src/common/barrier.c @@ -0,0 +1,92 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include + +int _starpu_barrier_init(struct _starpu_barrier *barrier, int count) +{ + barrier->count = count; + barrier->reached_start = 0; + barrier->reached_exit = 0; + barrier->reached_flops = 0.0; + STARPU_PTHREAD_MUTEX_INIT(&barrier->mutex, NULL); + STARPU_PTHREAD_MUTEX_INIT(&barrier->mutex_exit, NULL); + STARPU_PTHREAD_COND_INIT(&barrier->cond, NULL); + return 0; +} + +static +int _starpu_barrier_test(struct _starpu_barrier *barrier) +{ + /* + * Check whether any threads are known to be waiting; report + * "BUSY" if so. + */ + STARPU_PTHREAD_MUTEX_LOCK(&barrier->mutex_exit); + if (barrier->reached_exit != barrier->count) + { + STARPU_PTHREAD_MUTEX_UNLOCK(&barrier->mutex_exit); + return EBUSY; + } + STARPU_PTHREAD_MUTEX_UNLOCK(&barrier->mutex_exit); + return 0; +} + +int _starpu_barrier_destroy(struct _starpu_barrier *barrier) +{ + int ret; + do + { + ret = _starpu_barrier_test(barrier); + } + while (ret == EBUSY); + _STARPU_DEBUG("reached_exit %u\n", barrier->reached_exit); + + STARPU_PTHREAD_MUTEX_DESTROY(&barrier->mutex); + STARPU_PTHREAD_MUTEX_DESTROY(&barrier->mutex_exit); + STARPU_PTHREAD_COND_DESTROY(&barrier->cond); + return 0; +} + +int _starpu_barrier_wait(struct _starpu_barrier *barrier) +{ + int ret=0; + + // Wait until all threads enter the barrier + STARPU_PTHREAD_MUTEX_LOCK(&barrier->mutex); + barrier->reached_exit=0; + barrier->reached_start++; + if (barrier->reached_start == barrier->count) + { + barrier->reached_start = 0; + STARPU_PTHREAD_COND_BROADCAST(&barrier->cond); + ret = STARPU_PTHREAD_BARRIER_SERIAL_THREAD; + } + else + { + STARPU_PTHREAD_COND_WAIT(&barrier->cond,&barrier->mutex); + } + STARPU_PTHREAD_MUTEX_UNLOCK(&barrier->mutex); + + // Count number of threads that exit the barrier + STARPU_PTHREAD_MUTEX_LOCK(&barrier->mutex_exit); + barrier->reached_exit ++; + STARPU_PTHREAD_MUTEX_UNLOCK(&barrier->mutex_exit); + + return ret; +} diff --git a/src/common/barrier.h b/src/common/barrier.h new file mode 100644 index 0000000..c6ccd93 --- /dev/null +++ b/src/common/barrier.h @@ -0,0 +1,45 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __COMMON_BARRIER_H__ +#define __COMMON_BARRIER_H__ + +#include + +#pragma GCC visibility push(hidden) + +/** @file */ + +struct _starpu_barrier +{ + unsigned count; + unsigned reached_start; + unsigned reached_exit; + double reached_flops; + starpu_pthread_mutex_t mutex; + starpu_pthread_mutex_t mutex_exit; + starpu_pthread_cond_t cond; +}; + +int _starpu_barrier_init(struct _starpu_barrier *barrier, int count); + +int _starpu_barrier_destroy(struct _starpu_barrier *barrier); + +int _starpu_barrier_wait(struct _starpu_barrier *barrier); + +#pragma GCC visibility pop + +#endif // __COMMON_BARRIER_H__ diff --git a/src/common/barrier_counter.c b/src/common/barrier_counter.c new file mode 100644 index 0000000..db9b4bd --- /dev/null +++ b/src/common/barrier_counter.c @@ -0,0 +1,192 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +int _starpu_barrier_counter_init(struct _starpu_barrier_counter *barrier_c, unsigned count) +{ + _starpu_barrier_init(&barrier_c->barrier, count); + barrier_c->min_threshold = 0; + barrier_c->max_threshold = 0; + STARPU_PTHREAD_COND_INIT(&barrier_c->cond2, NULL); + return 0; +} + +int _starpu_barrier_counter_destroy(struct _starpu_barrier_counter *barrier_c) +{ + _starpu_barrier_destroy(&barrier_c->barrier); + STARPU_PTHREAD_COND_DESTROY(&barrier_c->cond2); + return 0; +} + + +int _starpu_barrier_counter_wait_for_empty_counter(struct _starpu_barrier_counter *barrier_c) +{ + struct _starpu_barrier *barrier = &barrier_c->barrier; + int ret; + STARPU_PTHREAD_MUTEX_LOCK(&barrier->mutex); + + ret = barrier->reached_start; + while (barrier->reached_start > 0) + STARPU_PTHREAD_COND_WAIT(&barrier->cond, &barrier->mutex); + + STARPU_PTHREAD_MUTEX_UNLOCK(&barrier->mutex); + return ret; +} + +int _starpu_barrier_counter_wait_until_counter_reaches_down_to_n(struct _starpu_barrier_counter *barrier_c, unsigned n) +{ + struct _starpu_barrier *barrier = &barrier_c->barrier; + STARPU_PTHREAD_MUTEX_LOCK(&barrier->mutex); + + while (barrier->reached_start > n) + { + if (barrier_c->max_threshold < n) + barrier_c->max_threshold = n; + STARPU_PTHREAD_COND_WAIT(&barrier->cond, &barrier->mutex); + } + + STARPU_PTHREAD_MUTEX_UNLOCK(&barrier->mutex); + return 0; +} + +int _starpu_barrier_counter_wait_until_counter_reaches_up_to_n(struct _starpu_barrier_counter *barrier_c, unsigned n) +{ + struct _starpu_barrier *barrier = &barrier_c->barrier; + STARPU_PTHREAD_MUTEX_LOCK(&barrier->mutex); + + while (barrier->reached_start < n) + { + if (barrier_c->min_threshold > n) + barrier_c->min_threshold = n; + STARPU_PTHREAD_COND_WAIT(&barrier_c->cond2, &barrier->mutex); + } + + STARPU_PTHREAD_MUTEX_UNLOCK(&barrier->mutex); + return 0; +} + +int _starpu_barrier_counter_wait_for_full_counter(struct _starpu_barrier_counter *barrier_c) +{ + struct _starpu_barrier *barrier = &barrier_c->barrier; + STARPU_PTHREAD_MUTEX_LOCK(&barrier->mutex); + + while (barrier->reached_start < barrier->count) + STARPU_PTHREAD_COND_WAIT(&barrier_c->cond2, &barrier->mutex); + + STARPU_PTHREAD_MUTEX_UNLOCK(&barrier->mutex); + return 0; +} + +int _starpu_barrier_counter_decrement_until_empty_counter(struct _starpu_barrier_counter *barrier_c, double flops) +{ + struct _starpu_barrier *barrier = &barrier_c->barrier; + int ret = 0; + STARPU_PTHREAD_MUTEX_LOCK(&barrier->mutex); + + barrier->reached_flops -= flops; + if (--barrier->reached_start == 0) + { + ret = 1; + STARPU_PTHREAD_COND_BROADCAST(&barrier->cond); + } + if (barrier_c->max_threshold && barrier->reached_start == barrier_c->max_threshold) + { + /* have those not happy enough tell us how much again */ + barrier_c->max_threshold = 0; + STARPU_PTHREAD_COND_BROADCAST(&barrier->cond); + } + + STARPU_PTHREAD_MUTEX_UNLOCK(&barrier->mutex); + return ret; +} + +int _starpu_barrier_counter_increment_until_full_counter(struct _starpu_barrier_counter *barrier_c, double flops) +{ + struct _starpu_barrier *barrier = &barrier_c->barrier; + int ret = 0; + STARPU_PTHREAD_MUTEX_LOCK(&barrier->mutex); + + barrier->reached_flops += flops; + if(++barrier->reached_start == barrier->count) + { + ret = 1; + STARPU_PTHREAD_COND_BROADCAST(&barrier_c->cond2); + } + if (barrier_c->min_threshold && barrier->reached_start == barrier_c->min_threshold) + { + /* have those not happy enough tell us how much again */ + barrier_c->min_threshold = 0; + STARPU_PTHREAD_COND_BROADCAST(&barrier_c->cond2); + } + + STARPU_PTHREAD_MUTEX_UNLOCK(&barrier->mutex); + return ret; +} + +int _starpu_barrier_counter_increment(struct _starpu_barrier_counter *barrier_c, double flops) +{ + struct _starpu_barrier *barrier = &barrier_c->barrier; + STARPU_PTHREAD_MUTEX_LOCK(&barrier->mutex); + + barrier->reached_start++; + barrier->reached_flops += flops; + STARPU_PTHREAD_COND_BROADCAST(&barrier_c->cond2); + STARPU_PTHREAD_MUTEX_UNLOCK(&barrier->mutex); + return 0; +} + +int _starpu_barrier_counter_check(struct _starpu_barrier_counter *barrier_c) +{ + struct _starpu_barrier *barrier = &barrier_c->barrier; + STARPU_PTHREAD_MUTEX_LOCK(&barrier->mutex); + + if(barrier->reached_start == 0) + STARPU_PTHREAD_COND_BROADCAST(&barrier->cond); + + STARPU_PTHREAD_MUTEX_UNLOCK(&barrier->mutex); + return 0; +} + +int _starpu_barrier_counter_get_reached_start(struct _starpu_barrier_counter *barrier_c) +{ + struct _starpu_barrier *barrier = &barrier_c->barrier; + int ret; + STARPU_PTHREAD_MUTEX_LOCK(&barrier->mutex); + ret = barrier->reached_start; + STARPU_PTHREAD_MUTEX_UNLOCK(&barrier->mutex); + return ret; +} + +int _starpu_barrier_counter_get_reached_exit(struct _starpu_barrier_counter *barrier_c) +{ + struct _starpu_barrier *barrier = &barrier_c->barrier; + int ret; + STARPU_PTHREAD_MUTEX_LOCK(&barrier->mutex); + ret = barrier->reached_exit; + STARPU_PTHREAD_MUTEX_UNLOCK(&barrier->mutex); + return ret; +} + +double _starpu_barrier_counter_get_reached_flops(struct _starpu_barrier_counter *barrier_c) +{ + struct _starpu_barrier *barrier = &barrier_c->barrier; + double ret; + STARPU_PTHREAD_MUTEX_LOCK(&barrier->mutex); + ret = barrier->reached_flops; + STARPU_PTHREAD_MUTEX_UNLOCK(&barrier->mutex); + return ret; +} diff --git a/src/common/barrier_counter.h b/src/common/barrier_counter.h new file mode 100644 index 0000000..9e69216 --- /dev/null +++ b/src/common/barrier_counter.h @@ -0,0 +1,63 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Thibaut Lambert + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __BARRIER_COUNTER_H__ +#define __BARRIER_COUNTER_H__ + +/** @file */ + +#include +#include + +#pragma GCC visibility push(hidden) + +struct _starpu_barrier_counter +{ + struct _starpu_barrier barrier; + unsigned min_threshold; + unsigned max_threshold; + starpu_pthread_cond_t cond2; +}; + +int _starpu_barrier_counter_init(struct _starpu_barrier_counter *barrier_c, unsigned count); + +int _starpu_barrier_counter_destroy(struct _starpu_barrier_counter *barrier_c); + +int _starpu_barrier_counter_wait_for_empty_counter(struct _starpu_barrier_counter *barrier_c); + +int _starpu_barrier_counter_wait_until_counter_reaches_down_to_n(struct _starpu_barrier_counter *barrier_c, unsigned n); +int _starpu_barrier_counter_wait_until_counter_reaches_up_to_n(struct _starpu_barrier_counter *barrier_c, unsigned n); + +int _starpu_barrier_counter_wait_for_full_counter(struct _starpu_barrier_counter *barrier_c); + +int _starpu_barrier_counter_decrement_until_empty_counter(struct _starpu_barrier_counter *barrier_c, double flops); + +int _starpu_barrier_counter_increment_until_full_counter(struct _starpu_barrier_counter *barrier_c, double flops); + +int _starpu_barrier_counter_increment(struct _starpu_barrier_counter *barrier_c, double flops); + +int _starpu_barrier_counter_check(struct _starpu_barrier_counter *barrier_c); + +int _starpu_barrier_counter_get_reached_start(struct _starpu_barrier_counter *barrier_c); + +int _starpu_barrier_counter_get_reached_exit(struct _starpu_barrier_counter *barrier_c); + +double _starpu_barrier_counter_get_reached_flops(struct _starpu_barrier_counter *barrier_c); + +#pragma GCC visibility pop + +#endif diff --git a/src/common/config-src-build.h.in b/src/common/config-src-build.h.in new file mode 100644 index 0000000..37bd3fb --- /dev/null +++ b/src/common/config-src-build.h.in @@ -0,0 +1,2 @@ +#undef STARPU_SRC_DIR +#undef STARPU_BUILD_DIR diff --git a/src/common/config.h.in b/src/common/config.h.in new file mode 100644 index 0000000..8c27132 --- /dev/null +++ b/src/common/config.h.in @@ -0,0 +1,1166 @@ +/* src/common/config.h.in. Generated from configure.ac by autoheader. */ + +/* enable FUT traces */ +#undef CONFIG_FUT + +/* Define to 1 if you have the header file. */ +#undef HAVE_AIO_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_AYUDAME_H + +/* Define to 1 if you have the `cblas_sgemv' function. */ +#undef HAVE_CBLAS_SGEMV + +/* Define to 1 if you have the `clEnqueueMarkerWithWaitList' function. */ +#undef HAVE_CLENQUEUEMARKERWITHWAITLIST + +/* Define to 1 if you have the `clGetExtensionFunctionAddressForPlatform' + function. */ +#undef HAVE_CLGETEXTENSIONFUNCTIONADDRESSFORPLATFORM + +/* Define to 1 if you have the `clock_gettime' function. */ +#undef HAVE_CLOCK_GETTIME + +/* Define to 1 if you have the header file. */ +#undef HAVE_CL_CL_EXT_H + +/* Define to 1 if you have the `copy_file_range' function. */ +#undef HAVE_COPY_FILE_RANGE + +/* Define to 1 if you have the header file. */ +#undef HAVE_CUBLASLT_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_CUDA_GL_INTEROP_H + +/* define if the compiler supports basic C++11 syntax */ +#undef HAVE_CXX11 + +/* Define to 1 if you have the declaration of `cusparseSetStream', and to 0 if + you don't. */ +#undef HAVE_DECL_CUSPARSESETSTREAM + +/* Define to 1 if you have the declaration of `enable_fut_flush', and to 0 if + you don't. */ +#undef HAVE_DECL_ENABLE_FUT_FLUSH + +/* Define to 1 if you have the declaration of `fut_setup_flush_callback', and + to 0 if you don't. */ +#undef HAVE_DECL_FUT_SETUP_FLUSH_CALLBACK + +/* Define to 1 if you have the declaration of `fut_set_filename', and to 0 if + you don't. */ +#undef HAVE_DECL_FUT_SET_FILENAME + +/* Define to 1 if you have the declaration of + `hwloc_cuda_get_device_osdev_by_index', and to 0 if you don't. */ +#undef HAVE_DECL_HWLOC_CUDA_GET_DEVICE_OSDEV_BY_INDEX + +/* Define to 1 if you have the declaration of + `hwloc_distances_obj_pair_values', and to 0 if you don't. */ +#undef HAVE_DECL_HWLOC_DISTANCES_OBJ_PAIR_VALUES + +/* Define to 1 if you have the declaration of + `hwloc_hip_get_device_osdev_by_index', and to 0 if you don't. */ +#undef HAVE_DECL_HWLOC_HIP_GET_DEVICE_OSDEV_BY_INDEX + +/* Define to 1 if you have the declaration of + `nvmlDeviceGetTotalEnergyConsumption', and to 0 if you don't. */ +#undef HAVE_DECL_NVMLDEVICEGETTOTALENERGYCONSUMPTION + +/* Define to 1 if you have the declaration of `smpi_process_set_user_data', + and to 0 if you don't. */ +#undef HAVE_DECL_SMPI_PROCESS_SET_USER_DATA + +/* Define to 1 if you have the header file. */ +#undef HAVE_DLB_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_DLFCN_H + +/* Define to 1 if you have the `dlopen' function. */ +#undef HAVE_DLOPEN + +/* Define to 1 if you have the `enable_fut_flush' function. */ +#undef HAVE_ENABLE_FUT_FLUSH + +/* Define to 1 if you have the `fut_setup_flush_callback' function. */ +#undef HAVE_FUT_SETUP_FLUSH_CALLBACK + +/* Define to 1 if you have the `fut_set_filename' function. */ +#undef HAVE_FUT_SET_FILENAME + +/* Define to 1 if you have the `fxt_blockev_leave' function. */ +#undef HAVE_FXT_BLOCKEV_LEAVE + +/* Define to 1 if you have the `fxt_close' function. */ +#undef HAVE_FXT_CLOSE + +/* Define to 1 if you have the `getpagesize' function. */ +#undef HAVE_GETPAGESIZE + +/* Define to 1 if you have the `getrlimit' function. */ +#undef HAVE_GETRLIMIT + +/* Define to 1 if you have the header file. */ +#undef HAVE_GLPK_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_HDF5_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_HIP_HIP_RUNTIME_API_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_HIP_HIP_RUNTIME_H + +/* Define to 1 if you have the `hwloc_cpukinds_get_nr' function. */ +#undef HAVE_HWLOC_CPUKINDS_GET_NR + +/* Define to 1 if you have the `hwloc_get_area_memlocation' function. */ +#undef HAVE_HWLOC_GET_AREA_MEMLOCATION + +/* Define to 1 if you have the header file. */ +#undef HAVE_HWLOC_GLIBC_SCHED_H + +/* Define to 1 if you have the `hwloc_topology_dup' function. */ +#undef HAVE_HWLOC_TOPOLOGY_DUP + +/* Define to 1 if you have the `hwloc_topology_set_components' function. */ +#undef HAVE_HWLOC_TOPOLOGY_SET_COMPONENTS + +/* Define to 1 if you have the header file. */ +#undef HAVE_INTTYPES_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_LEVELDB_DB_H + +/* Define to 1 if you have the `atlas' library (-latlas). */ +#undef HAVE_LIBATLAS + +/* Define to 1 if you have the `blas-openblas' library (-lblas-openblas). */ +#undef HAVE_LIBBLAS_OPENBLAS + +/* Define to 1 if you have the `cblas' library (-lcblas). */ +#undef HAVE_LIBCBLAS + +/* Define to 1 if you have the `dl' library (-ldl). */ +#undef HAVE_LIBDL + +/* Define to 1 if you have the `dlb' library (-ldlb). */ +#undef HAVE_LIBDLB + +/* Define to 1 if you have the `f77blas' library (-lf77blas). */ +#undef HAVE_LIBF77BLAS + +/* Define to 1 if you have the `gfortran' library (-lgfortran). */ +#undef HAVE_LIBGFORTRAN + +/* Define to 1 if you have the `GL' library (-lGL). */ +#undef HAVE_LIBGL + +/* Define to 1 if you have the `glpk' library (-lglpk). */ +#undef HAVE_LIBGLPK + +/* Define to 1 if you have the `GLU' library (-lGLU). */ +#undef HAVE_LIBGLU + +/* Define to 1 if you have the `glut' library (-lglut). */ +#undef HAVE_LIBGLUT + +/* Define to 1 if you have the `goto' library (-lgoto). */ +#undef HAVE_LIBGOTO + +/* Define to 1 if you have the `goto2' library (-lgoto2). */ +#undef HAVE_LIBGOTO2 + +/* Define to 1 if you have the `hdf5' library (-lhdf5). */ +#undef HAVE_LIBHDF5 + +/* Define to 1 if you have the `ifcore' library (-lifcore). */ +#undef HAVE_LIBIFCORE + +/* Define to 1 if you have the `leveldb' library (-lleveldb). */ +#undef HAVE_LIBLEVELDB + +/* Define to 1 if you have the `openblas' library (-lopenblas). */ +#undef HAVE_LIBOPENBLAS + +/* Define to 1 if you have the `rt' library (-lrt). */ +#undef HAVE_LIBRT + +/* Define to 1 if you have the `simgrid' library (-lsimgrid). */ +#undef HAVE_LIBSIMGRID + +/* Define to 1 if you have the `ws2_32' library (-lws2_32). */ +#undef HAVE_LIBWS2_32 + +/* Define to 1 if you have the header file. */ +#undef HAVE_MALLOC_H + +/* Define to 1 if you have the `memalign' function. */ +#undef HAVE_MEMALIGN + +/* Define to 1 if you have the `mkdtemp' function. */ +#undef HAVE_MKDTEMP + +/* Define to 1 if you have the `mkostemp' function. */ +#undef HAVE_MKOSTEMP + +/* Define to 1 if you have a working `mmap' system call. */ +#undef HAVE_MMAP + +/* Function MPI_Comm_f2c is available */ +#undef HAVE_MPI_COMM_F2C + +/* Define to 1 if you have the `MSG_environment_get_routing_root' function. */ +#undef HAVE_MSG_ENVIRONMENT_GET_ROUTING_ROOT + +/* Define to 1 if you have the `MSG_get_as_by_name' function. */ +#undef HAVE_MSG_GET_AS_BY_NAME + +/* Define to 1 if you have the `MSG_host_get_speed' function. */ +#undef HAVE_MSG_HOST_GET_SPEED + +/* Define to 1 if you have the header file. */ +#undef HAVE_MSG_MSG_H + +/* Define to 1 if you have the `MSG_process_attach' function. */ +#undef HAVE_MSG_PROCESS_ATTACH + +/* Define to 1 if you have the `MSG_process_self_name' function. */ +#undef HAVE_MSG_PROCESS_SELF_NAME + +/* Define to 1 if you have the `MSG_process_userdata_init' function. */ +#undef HAVE_MSG_PROCESS_USERDATA_INIT + +/* Define to 1 if you have the `MSG_zone_get_by_name' function. */ +#undef HAVE_MSG_ZONE_GET_BY_NAME + +/* Define to 1 if you have the `MSG_zone_get_hosts' function. */ +#undef HAVE_MSG_ZONE_GET_HOSTS + +/* Define to 1 if you have the `nm_trace_add_synchro_point' function. */ +#undef HAVE_NM_TRACE_ADD_SYNCHRO_POINT + +/* PIOman (from PM2) is available */ +#undef HAVE_PIOMAN + +/* Define to 1 if you have the `piom_ltask_set_bound_thread_os_indexes' + function. */ +#undef HAVE_PIOM_LTASK_SET_BOUND_THREAD_OS_INDEXES + +/* Define to 1 if you have the `posix_memalign' function. */ +#undef HAVE_POSIX_MEMALIGN + +/* Define to 1 if you have the `poti_init_custom' function. */ +#undef HAVE_POTI_INIT_CUSTOM + +/* Define to 1 if you have the `poti_user_NewEvent' function. */ +#undef HAVE_POTI_USER_NEWEVENT + +/* Define to 1 if you have the `pread' function. */ +#undef HAVE_PREAD + +/* Define to 1 if you have the `pthread_setaffinity_np' function. */ +#undef HAVE_PTHREAD_SETAFFINITY_NP + +/* pthread_spin_lock is available */ +#undef HAVE_PTHREAD_SPIN_LOCK + +/* Define to 1 if you have the `pwrite' function. */ +#undef HAVE_PWRITE + +/* Define to 1 if you have the header file. */ +#undef HAVE_PYTHON_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_ROCBLAS_ROCBLAS_H + +/* Define to 1 if you have the `scandir' function. */ +#undef HAVE_SCANDIR + +/* Define to 1 if you have the `sg_actor_attach' function. */ +#undef HAVE_SG_ACTOR_ATTACH + +/* Define to 1 if you have the `sg_actor_attach_pthread' function. */ +#undef HAVE_SG_ACTOR_ATTACH_PTHREAD + +/* Define to 1 if you have the `sg_actor_data' function. */ +#undef HAVE_SG_ACTOR_DATA + +/* Define to 1 if you have the `sg_actor_execute' function. */ +#undef HAVE_SG_ACTOR_EXECUTE + +/* Define to 1 if you have the `sg_actor_get_data' function. */ +#undef HAVE_SG_ACTOR_GET_DATA + +/* Define to 1 if you have the `sg_actor_init' function. */ +#undef HAVE_SG_ACTOR_INIT + +/* Define to 1 if you have the `sg_actor_on_exit' function. */ +#undef HAVE_SG_ACTOR_ON_EXIT + +/* Define to 1 if you have the `sg_actor_ref' function. */ +#undef HAVE_SG_ACTOR_REF + +/* Define to 1 if you have the `sg_actor_self' function. */ +#undef HAVE_SG_ACTOR_SELF + +/* Define to 1 if you have the `sg_actor_self_execute' function. */ +#undef HAVE_SG_ACTOR_SELF_EXECUTE + +/* Define to 1 if you have the `sg_actor_set_data' function. */ +#undef HAVE_SG_ACTOR_SET_DATA + +/* Define to 1 if you have the `sg_actor_set_stacksize' function. */ +#undef HAVE_SG_ACTOR_SET_STACKSIZE + +/* Define to 1 if you have the `sg_actor_sleep_for' function. */ +#undef HAVE_SG_ACTOR_SLEEP_FOR + +/* Define to 1 if you have the `sg_cfg_set_int' function. */ +#undef HAVE_SG_CFG_SET_INT + +/* Define to 1 if you have the `sg_config_continue_after_help' function. */ +#undef HAVE_SG_CONFIG_CONTINUE_AFTER_HELP + +/* Define to 1 if you have the `sg_host_get_properties' function. */ +#undef HAVE_SG_HOST_GET_PROPERTIES + +/* Define to 1 if you have the `sg_host_get_property_names' function. */ +#undef HAVE_SG_HOST_GET_PROPERTY_NAMES + +/* Define to 1 if you have the `sg_host_get_route' function. */ +#undef HAVE_SG_HOST_GET_ROUTE + +/* Define to 1 if you have the `sg_host_get_route_links' function. */ +#undef HAVE_SG_HOST_GET_ROUTE_LINKS + +/* Define to 1 if you have the `sg_host_get_speed' function. */ +#undef HAVE_SG_HOST_GET_SPEED + +/* Define to 1 if you have the `sg_host_list' function. */ +#undef HAVE_SG_HOST_LIST + +/* Define to 1 if you have the `sg_host_route' function. */ +#undef HAVE_SG_HOST_ROUTE + +/* Define to 1 if you have the `sg_host_self' function. */ +#undef HAVE_SG_HOST_SELF + +/* Define to 1 if you have the `sg_host_sendto' function. */ +#undef HAVE_SG_HOST_SENDTO + +/* Define to 1 if you have the `sg_host_send_to' function. */ +#undef HAVE_SG_HOST_SEND_TO + +/* Define to 1 if you have the `sg_host_speed' function. */ +#undef HAVE_SG_HOST_SPEED + +/* Define to 1 if you have the `sg_link_bandwidth_set' function. */ +#undef HAVE_SG_LINK_BANDWIDTH_SET + +/* Define to 1 if you have the `sg_link_get_name' function. */ +#undef HAVE_SG_LINK_GET_NAME + +/* Define to 1 if you have the `sg_link_name' function. */ +#undef HAVE_SG_LINK_NAME + +/* Define to 1 if you have the `sg_link_set_bandwidth' function. */ +#undef HAVE_SG_LINK_SET_BANDWIDTH + +/* Define to 1 if you have the `sg_zone_get_all_hosts' function. */ +#undef HAVE_SG_ZONE_GET_ALL_HOSTS + +/* Define to 1 if you have the `sg_zone_get_by_name' function. */ +#undef HAVE_SG_ZONE_GET_BY_NAME + +/* Define to 1 if you have the `sg_zone_get_hosts' function. */ +#undef HAVE_SG_ZONE_GET_HOSTS + +/* Define to 1 if you have the `simcall_process_create' function. */ +#undef HAVE_SIMCALL_PROCESS_CREATE + +/* Define to 1 if you have the header file. */ +#undef HAVE_SIMGRID_ACTOR_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SIMGRID_BARRIER_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SIMGRID_COND_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SIMGRID_ENGINE_H + +/* Define to 1 if you have the `simgrid_get_clock' function. */ +#undef HAVE_SIMGRID_GET_CLOCK + +/* Define to 1 if you have the header file. */ +#undef HAVE_SIMGRID_HOST_H + +/* Define to 1 if you have the `simgrid_init' function. */ +#undef HAVE_SIMGRID_INIT + +/* Define to 1 if you have the header file. */ +#undef HAVE_SIMGRID_LINK_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SIMGRID_MSG_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SIMGRID_MUTEX_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SIMGRID_SEMAPHORE_H + +/* Define to 1 if you have the `simgrid_set_maestro' function. */ +#undef HAVE_SIMGRID_SET_MAESTRO + +/* Define to 1 if you have the header file. */ +#undef HAVE_SIMGRID_SIMDAG_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SIMGRID_VERSION_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SIMGRID_ZONE_H + +/* Define to 1 if you have the `smpi_process_set_user_data' function. */ +#undef HAVE_SMPI_PROCESS_SET_USER_DATA + +/* Define to 1 if you have the `SMPI_thread_create' function. */ +#undef HAVE_SMPI_THREAD_CREATE + +/* Define to 1 if the system has the type `smx_actor_t'. */ +#undef HAVE_SMX_ACTOR_T + +/* Define to 1 if you have the header file. */ +#undef HAVE_STDINT_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_STDIO_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_STDLIB_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_STRINGS_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_STRING_H + +/* Define to 1 if you have the `sysconf' function. */ +#undef HAVE_SYSCONF + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_PARAM_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_STAT_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_TYPES_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_UNISTD_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_VALGRIND_HELGRIND_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_VALGRIND_MEMCHECK_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_VALGRIND_VALGRIND_H + +/* Define to 1 if you have the `xbt_barrier_init' function. */ +#undef HAVE_XBT_BARRIER_INIT + +/* Define to 1 if you have the header file. */ +#undef HAVE_XBT_BASE_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_XBT_CONFIG_H + +/* Define to 1 if you have the `xbt_mutex_try_acquire' function. */ +#undef HAVE_XBT_MUTEX_TRY_ACQUIRE + +/* Define to 1 if you have the header file. */ +#undef HAVE_XBT_SYNCHRO_H + +/* Define to the sub-directory where libtool stores uninstalled libraries. */ +#undef LT_OBJDIR + +/* Name of package */ +#undef PACKAGE + +/* Define to the address where bug reports for this package should be sent. */ +#undef PACKAGE_BUGREPORT + +/* Define to the full name of this package. */ +#undef PACKAGE_NAME + +/* Define to the full name and version of this package. */ +#undef PACKAGE_STRING + +/* Define to the one symbol short name of this package. */ +#undef PACKAGE_TARNAME + +/* Define to the home page for this package. */ +#undef PACKAGE_URL + +/* Define to the version of this package. */ +#undef PACKAGE_VERSION + +/* The size of `void *', as computed by sizeof. */ +#undef SIZEOF_VOID_P + +/* display DLB resource management verbose debug messages */ +#undef STARPURM_DLB_VERBOSE + +/* Define to 1 if dlb support is enabled. */ +#undef STARPURM_HAVE_DLB + +/* Define to 1 if DLB callbacks expect an user argument */ +#undef STARPURM_HAVE_DLB_CALLBACK_ARG + +/* Define to 1 if StarPU has support for worker callbacks. */ +#undef STARPURM_STARPU_HAVE_WORKER_CALLBACKS + +/* display resource management verbose debug messages */ +#undef STARPURM_VERBOSE + +/* use ARMPL library */ +#undef STARPU_ARMPL + +/* use STARPU_ATLAS library */ +#undef STARPU_ATLAS + +/* Define this to enable hierarchical dags support */ +#undef STARPU_BUBBLE + +/* display verbose bubble debug messages */ +#undef STARPU_BUBBLE_VERBOSE + +/* location of StarPU build directory */ +#undef STARPU_BUILD_DIR + +/* use built-in min_dgels */ +#undef STARPU_BUILT_IN_MIN_DGELS + +/* Define to 1 if you are building with coverity */ +#undef STARPU_COVERITY + +/* Define to 1 to enforce data locality */ +#undef STARPU_DATA_LOCALITY_ENFORCE + +/* enable debugging statements */ +#undef STARPU_DEBUG + +/* enable developer warnings */ +#undef STARPU_DEVEL + +/* Define to 1 to disable asynchronous copy between CPU and GPU devices */ +#undef STARPU_DISABLE_ASYNCHRONOUS_COPY + +/* Define to 1 to disable asynchronous copy between CPU and CUDA devices */ +#undef STARPU_DISABLE_ASYNCHRONOUS_CUDA_COPY + +/* Define to 1 to disable asynchronous copy between CPU and Maxeler FPGA + devices */ +#undef STARPU_DISABLE_ASYNCHRONOUS_MAX_FPGA_COPY + +/* Define to 1 to disable asynchronous copy between MPI Master and MPI Slave + devices */ +#undef STARPU_DISABLE_ASYNCHRONOUS_MPI_MS_COPY + +/* Define to 1 to disable asynchronous copy between CPU and OpenCL devices */ +#undef STARPU_DISABLE_ASYNCHRONOUS_OPENCL_COPY + +/* Define to 1 to disable asynchronous copy between TCP/IP Master and TCP/IP + Slave devices */ +#undef STARPU_DISABLE_ASYNCHRONOUS_TCPIP_MS_COPY + +/* display verbose debug messages */ +#undef STARPU_EXTRA_VERBOSE + +/* enable additional locking systems FxT traces */ +#undef STARPU_FXT_LOCK_TRACES + +/* how many MPI nodes fxt files can be manipulated when generating traces */ +#undef STARPU_FXT_MAX_FILES + +/* Path to the GNU debugger. */ +#undef STARPU_GDB_PATH + +/* use STARPU_GOTO library */ +#undef STARPU_GOTO + +/* Define to 1 if the target supports __atomic_compare_exchange_n */ +#undef STARPU_HAVE_ATOMIC_COMPARE_EXCHANGE_N + +/* Define to 1 if the target supports __atomic_compare_exchange_n_8 */ +#undef STARPU_HAVE_ATOMIC_COMPARE_EXCHANGE_N_8 + +/* Define to 1 if the target supports __atomic_exchange_n */ +#undef STARPU_HAVE_ATOMIC_EXCHANGE_N + +/* Define to 1 if the target supports __atomic_exchange_n_8 */ +#undef STARPU_HAVE_ATOMIC_EXCHANGE_N_8 + +/* Define to 1 if the target supports __atomic_fetch_add */ +#undef STARPU_HAVE_ATOMIC_FETCH_ADD + +/* Define to 1 if the target supports __atomic_fetch_add_8 */ +#undef STARPU_HAVE_ATOMIC_FETCH_ADD_8 + +/* Define to 1 if the target supports __atomic_fetch_or */ +#undef STARPU_HAVE_ATOMIC_FETCH_OR + +/* Define to 1 if the target supports __atomic_fetch_or_8 */ +#undef STARPU_HAVE_ATOMIC_FETCH_OR_8 + +/* Define to 1 if the target supports __atomic_test_and_set */ +#undef STARPU_HAVE_ATOMIC_TEST_AND_SET + +/* The blas library is available */ +#undef STARPU_HAVE_BLAS + +/* Define to 1 if CUDA device properties include BusID */ +#undef STARPU_HAVE_BUSID + +/* The blas library has blas.h */ +#undef STARPU_HAVE_CBLAS_H + +/* Define to 1 if CUDA device properties include canMapHostMemory */ +#undef STARPU_HAVE_CUDA_CANMAPHOST + +/* Peer transfers are supported in CUDA */ +#undef STARPU_HAVE_CUDA_MEMCPY_PEER + +/* Define to 1 if CUDA device properties include managedMemory */ +#undef STARPU_HAVE_CUDA_MNGMEM + +/* Define to 1 if CUDA device properties include pageableMemoryAccess */ +#undef STARPU_HAVE_CUDA_PAGEABLEMEM + +/* Define to 1 if CUDA pointer attributes include a type field instead of old + memoryType field */ +#undef STARPU_HAVE_CUDA_POINTER_TYPE + +/* Define to 1 if CUDA device properties include unifiedAddressing */ +#undef STARPU_HAVE_CUDA_UNIFIEDADDR + +/* cufftDoubleComplex is available */ +#undef STARPU_HAVE_CUFFTDOUBLECOMPLEX + +/* CURAND is available */ +#undef STARPU_HAVE_CURAND + +/* compiler supports cxx11 */ +#undef STARPU_HAVE_CXX11 + +/* Define this on darwin. */ +#undef STARPU_HAVE_DARWIN + +/* Define to 1 if CUDA device properties include DomainID */ +#undef STARPU_HAVE_DOMAINID + +/* Define to 1 if you have the header file. */ +#undef STARPU_HAVE_F77_H + +/* Define this if a Fortran compiler is available */ +#undef STARPU_HAVE_FC + +/* Define to 1 if you have the libfftw3 library. */ +#undef STARPU_HAVE_FFTW + +/* Define to 1 if you have the libfftw3f library. */ +#undef STARPU_HAVE_FFTWF + +/* Define to 1 if you have the libfftw3l library. */ +#undef STARPU_HAVE_FFTWL + +/* Define to 1 if you have the header file. */ +#undef STARPU_HAVE_GLPK_H + +/* Define to 1 if you have the header file. */ +#undef STARPU_HAVE_HDF5 + +/* Define to 1 if you have the header file. */ +#undef STARPU_HAVE_HELGRIND_H + +/* Peer transfers are supported in HIP */ +#undef STARPU_HAVE_HIP_MEMCPY_PEER + +/* Define to 1 if you have the hwloc library. */ +#undef STARPU_HAVE_HWLOC + +/* Define this if icc is available */ +#undef STARPU_HAVE_ICC + +/* Define to 1 if you have the header file. */ +#undef STARPU_HAVE_LEVELDB + +/* Define to 1 if you have the cublasLt library */ +#undef STARPU_HAVE_LIBCUBLASLT + +/* Define to 1 if you have the cusolver library */ +#undef STARPU_HAVE_LIBCUSOLVER + +/* Define to 1 if you have the cusparse library */ +#undef STARPU_HAVE_LIBCUSPARSE + +/* libnuma is available */ +#undef STARPU_HAVE_LIBNUMA + +/* Define to 1 if you have the MAGMA library. */ +#undef STARPU_HAVE_MAGMA + +/* Define to 1 if you have the header file. */ +#undef STARPU_HAVE_MALLOC_H + +/* Define to 1 if you have the `memalign' function. */ +#undef STARPU_HAVE_MEMALIGN + +/* Define to 1 if you have the header file. */ +#undef STARPU_HAVE_MEMCHECK_H + +/* Define to 1 if the function MPI_Comm_create_group is available. */ +#undef STARPU_HAVE_MPI_COMM_CREATE_GROUP + +/* is available */ +#undef STARPU_HAVE_MPI_EXT + +/* Define to 1 if you have mpi_sync_clocks and it is meant to be used */ +#undef STARPU_HAVE_MPI_SYNC_CLOCKS + +/* Define to 1 if you have msg.h in msg/. */ +#undef STARPU_HAVE_MSG_MSG_H + +/* Define to 1 if the function nearbyintf is available. */ +#undef STARPU_HAVE_NEARBYINTF + +/* Define to 1 if you have the nvml.h header */ +#undef STARPU_HAVE_NVML_H + +/* Define to 1 if you have the `posix_memalign' function. */ +#undef STARPU_HAVE_POSIX_MEMALIGN + +/* Define to 1 if you have libpoti and it is meant to be used */ +#undef STARPU_HAVE_POTI + +/* variable program_invocation_short_name is available */ +#undef STARPU_HAVE_PROGRAM_INVOCATION_SHORT_NAME + +/* pthread_barrier is available */ +#undef STARPU_HAVE_PTHREAD_BARRIER + +/* pthread_setname_np is available */ +#undef STARPU_HAVE_PTHREAD_SETNAME_NP + +/* pthread_spin_lock is available */ +#undef STARPU_HAVE_PTHREAD_SPIN_LOCK + +/* Define to 1 if the function rintf is available. */ +#undef STARPU_HAVE_RINTF + +/* Define this to 1 when s4u::Engine::on_time_advance_cb is available */ +#undef STARPU_HAVE_S4U_ON_TIME_ADVANCE_CB + +/* Define to 1 if the function sched_yield is available. */ +#undef STARPU_HAVE_SCHED_YIELD + +/* Define to 1 if the function setenv is available. */ +#undef STARPU_HAVE_SETENV + +/* Define to 1 if you have actor.h in simgrid/. */ +#undef STARPU_HAVE_SIMGRID_ACTOR_H + +/* Define to 1 if you have barrier.h in simgrid/. */ +#undef STARPU_HAVE_SIMGRID_BARRIER_H + +/* Define to 1 if you have cond.h in simgrid/. */ +#undef STARPU_HAVE_SIMGRID_COND_H + +/* Define to 1 if you have engine.h in simgrid/. */ +#undef STARPU_HAVE_SIMGRID_ENGINE_H + +/* Define to 1 if you have host.h in simgrid/. */ +#undef STARPU_HAVE_SIMGRID_HOST_H + +/* Define to 1 if you have link.h in simgrid/. */ +#undef STARPU_HAVE_SIMGRID_LINK_H + +/* Define to 1 if you have msg.h in simgrid/. */ +#undef STARPU_HAVE_SIMGRID_MSG_H + +/* Define to 1 if you have mutex.h in simgrid/. */ +#undef STARPU_HAVE_SIMGRID_MUTEX_H + +/* Define to 1 if you have semaphore.h in simgrid/. */ +#undef STARPU_HAVE_SIMGRID_SEMAPHORE_H + +/* Define to 1 if you have simdag.h in simgrid/. */ +#undef STARPU_HAVE_SIMGRID_SIMDAG_H + +/* Define to 1 if you have version.h in simgrid/. */ +#undef STARPU_HAVE_SIMGRID_VERSION_H + +/* Define to 1 if you have zone.h in simgrid/. */ +#undef STARPU_HAVE_SIMGRID_ZONE_H + +/* Define to 1 if you have the smx_actor_t type. */ +#undef STARPU_HAVE_SMX_ACTOR_T + +/* statement expressions are available */ +#undef STARPU_HAVE_STATEMENT_EXPRESSIONS + +/* Define to 1 if the function strerro_r is available. */ +#undef STARPU_HAVE_STRERROR_R + +/* struct timespec is defined */ +#undef STARPU_HAVE_STRUCT_TIMESPEC + +/* Define to 1 if the target supports __sync_bool_compare_and_swap */ +#undef STARPU_HAVE_SYNC_BOOL_COMPARE_AND_SWAP + +/* Define to 1 if the target supports __sync_bool_compare_and_swap_8 */ +#undef STARPU_HAVE_SYNC_BOOL_COMPARE_AND_SWAP_8 + +/* Define to 1 if the target supports __sync_fetch_and_add */ +#undef STARPU_HAVE_SYNC_FETCH_AND_ADD + +/* Define to 1 if the target supports __sync_fetch_and_add_8 */ +#undef STARPU_HAVE_SYNC_FETCH_AND_ADD_8 + +/* Define to 1 if the target supports __sync_fetch_and_or */ +#undef STARPU_HAVE_SYNC_FETCH_AND_OR + +/* Define to 1 if the target supports __sync_fetch_and_or_8 */ +#undef STARPU_HAVE_SYNC_FETCH_AND_OR_8 + +/* Define to 1 if the target supports __sync_lock_test_and_set */ +#undef STARPU_HAVE_SYNC_LOCK_TEST_AND_SET + +/* Define to 1 if the target supports __sync_synchronize */ +#undef STARPU_HAVE_SYNC_SYNCHRONIZE + +/* Define to 1 if the target supports __sync_val_compare_and_swap */ +#undef STARPU_HAVE_SYNC_VAL_COMPARE_AND_SWAP + +/* Define to 1 if the target supports __sync_val_compare_and_swap_8 */ +#undef STARPU_HAVE_SYNC_VAL_COMPARE_AND_SWAP_8 + +/* Define to 1 if you have the header file. */ +#undef STARPU_HAVE_UNISTD_H + +/* Define to 1 if the function unsetenv is available. */ +#undef STARPU_HAVE_UNSETENV + +/* Define to 1 if you have the header file. */ +#undef STARPU_HAVE_VALGRIND_H + +/* Define this on windows. */ +#undef STARPU_HAVE_WINDOWS + +/* enable X11 */ +#undef STARPU_HAVE_X11 + +/* Define to 1 if you have base.h in xbt/. */ +#undef STARPU_HAVE_XBT_BASE_H + +/* Define to 1 if you have config.h in xbt/. */ +#undef STARPU_HAVE_XBT_CONFIG_H + +/* Define to 1 if you have synchro.h in xbt/. */ +#undef STARPU_HAVE_XBT_SYNCHRO_H + +/* calibration heuristic value */ +#undef STARPU_HISTORYMAXERROR + +/* Define to 1 on Linux */ +#undef STARPU_LINUX_SYS + +/* enable long check */ +#undef STARPU_LONG_CHECK + +/* Major version number of StarPU. */ +#undef STARPU_MAJOR_VERSION + +/* Maximum number of CPUs supported */ +#undef STARPU_MAXCPUS + +/* maximum number of CUDA devices */ +#undef STARPU_MAXCUDADEVS + +/* maximum number of HIP devices */ +#undef STARPU_MAXHIPDEVS + +/* maximum number of implementations */ +#undef STARPU_MAXIMPLEMENTATIONS + +/* maximum number of Maxeler FPGA devices */ +#undef STARPU_MAXMAXFPGADEVS + +/* maximum number of MPI devices */ +#undef STARPU_MAXMPIDEVS + +/* maximum number of memory nodes */ +#undef STARPU_MAXNODES + +/* maximum number of NUMA nodes */ +#undef STARPU_MAXNUMANODES + +/* maximum number of OPENCL devices */ +#undef STARPU_MAXOPENCLDEVS + +/* maximum number of TCP/IP devices */ +#undef STARPU_MAXTCPIPDEVS + +/* enable memory stats */ +#undef STARPU_MEMORY_STATS + +/* Minor version number of StarPU. */ +#undef STARPU_MINOR_VERSION + +/* use MKL library */ +#undef STARPU_MKL + +/* use user defined library */ +#undef STARPU_MLR_MODEL + +/* enable performance model debug */ +#undef STARPU_MODEL_DEBUG + +/* display MPI verbose debug messages */ +#undef STARPU_MPI_EXTRA_VERBOSE + +/* enable StarPU MPI pedantic isend */ +#undef STARPU_MPI_PEDANTIC_ISEND + +/* display MPI verbose debug messages */ +#undef STARPU_MPI_VERBOSE + +/* Using native windows threads */ +#undef STARPU_NATIVE_WINTHREADS + +/* enable new check */ +#undef STARPU_NEW_CHECK + +/* how many buffers can be manipulated per task */ +#undef STARPU_NMAXBUFS + +/* Maximum number of device per device arch */ +#undef STARPU_NMAXDEVS + +/* Maximum number of workers */ +#undef STARPU_NMAXWORKERS + +/* Maximum number of worker combinations */ +#undef STARPU_NMAX_COMBINEDWORKERS + +/* Maximum number of sched_ctxs supported */ +#undef STARPU_NMAX_SCHED_CTXS + +/* drivers must progress */ +#undef STARPU_NON_BLOCKING_DRIVERS + +/* disable assertions */ +#undef STARPU_NO_ASSERT + +/* Define to 1 if you use the openblas library. */ +#undef STARPU_OPENBLAS + +/* Define to 1 on OpenBSD systems */ +#undef STARPU_OPENBSD_SYS + +/* Define this to enable using an OpenCL simulator */ +#undef STARPU_OPENCL_SIMULATOR + +/* enable OpenGL rendering of some examples */ +#undef STARPU_OPENGL_RENDER + +/* Define this to enable OpenMP runtime support */ +#undef STARPU_OPENMP + +/* Define this to enable LLVM OpenMP runtime support */ +#undef STARPU_OPENMP_LLVM + +/* Define to 1 if you have the libpapi library */ +#undef STARPU_PAPI + +/* Define this to enable parallel worker support */ +#undef STARPU_PARALLEL_WORKER + +/* enable performance debug */ +#undef STARPU_PERF_DEBUG + +/* performance models location */ +#undef STARPU_PERF_MODEL_DIR + +/* Define this to enable profiling tool support */ +#undef STARPU_PROF_TOOL + +/* Define to 1 if `PTHREAD_COND_INITIALIZER' is just zeroes */ +#undef STARPU_PTHREAD_COND_INITIALIZER_ZERO + +/* Define to 1 if `PTHREAD_MUTEX_INITIALIZER' is just zeroes */ +#undef STARPU_PTHREAD_MUTEX_INITIALIZER_ZERO + +/* Define to 1 if `PTHREAD_RWLOCK_INITIALIZER' is just zeroes */ +#undef STARPU_PTHREAD_RWLOCK_INITIALIZER_ZERO + +/* Python cloudpickle package available */ +#undef STARPU_PYTHON_HAVE_CLOUDPICKLE + +/* Python joblib package available */ +#undef STARPU_PYTHON_HAVE_JOBLIB + +/* Python3 numpy package available */ +#undef STARPU_PYTHON_HAVE_NUMPY + +/* enable quick check */ +#undef STARPU_QUICK_CHECK + +/* Release version number of StarPU. */ +#undef STARPU_RELEASE_VERSION + +/* enable debug sc_hypervisor */ +#undef STARPU_SC_HYPERVISOR_DEBUG + +/* Define this to enable simgrid execution */ +#undef STARPU_SIMGRID + +/* Define to 1 if you have the `simgrid_init' function. */ +#undef STARPU_SIMGRID_HAVE_SIMGRID_INIT + +/* Define to 1 if you have the `xbt_barrier_init' function. */ +#undef STARPU_SIMGRID_HAVE_XBT_BARRIER_INIT + +/* Define this to enable Model Checker in simgrid execution */ +#undef STARPU_SIMGRID_MC + +/* check spinlock use */ +#undef STARPU_SPINLOCK_CHECK + +/* location of StarPU sources */ +#undef STARPU_SRC_DIR + +/* Only static compilation was made */ +#undef STARPU_STATIC_ONLY + +/* use user defined library */ +#undef STARPU_SYSTEM_BLAS + +/* enable data allocation cache */ +#undef STARPU_USE_ALLOCATION_CACHE + +/* Define to 1 if Ayudame 1 is available and should be used */ +#undef STARPU_USE_AYUDAME1 + +/* Define to 1 if Ayudame 2 is available and should be used */ +#undef STARPU_USE_AYUDAME2 + +/* CPU driver is activated */ +#undef STARPU_USE_CPU + +/* CUDA support is activated */ +#undef STARPU_USE_CUDA + +/* Define to 1 if the CUDA0 driver is to be tested */ +#undef STARPU_USE_CUDA0 + +/* Define to 1 if the CUDA1 driver is to be tested */ +#undef STARPU_USE_CUDA1 + +/* Define to 1 if CUDA Mapped host memory may be used */ +#undef STARPU_USE_CUDA_MAP + +/* Define to 1 if drandr48 is available and should be used */ +#undef STARPU_USE_DRAND48 + +/* Define to 1 if erandr48_r is available */ +#undef STARPU_USE_ERAND48_R + +/* enable FxT traces */ +#undef STARPU_USE_FXT + +/* Define to 1 if the HIP driver is to be tested */ +#undef STARPU_USE_HIP + +/* HIPBLAS support is enabled */ +#undef STARPU_USE_HIPBLAS + +/* Maxeler FPGA support is activated */ +#undef STARPU_USE_MAX_FPGA + +/* Message-passing SINKs support is enabled */ +#undef STARPU_USE_MP + +/* whether the StarPU MPI library is available */ +#undef STARPU_USE_MPI + +/* whether the StarPU MPI failure tolerance mechanisms are requested */ +#undef STARPU_USE_MPI_FT + +/* whether the StarPU MPI failure tolerance mechanisms stats are watched */ +#undef STARPU_USE_MPI_FT_STATS + +/* MPI Master Slave support is enabled */ +#undef STARPU_USE_MPI_MASTER_SLAVE + +/* whether the StarPU MPI library (with a native MPI implementation) is + available */ +#undef STARPU_USE_MPI_MPI + +/* whether the StarPU MPI library (with a NewMadeleine implementation) is + available */ +#undef STARPU_USE_MPI_NMAD + +/* OpenCL support is activated */ +#undef STARPU_USE_OPENCL + +/* enable sc_hypervisor lib */ +#undef STARPU_USE_SC_HYPERVISOR + +/* TCPIP Master Slave support is enabled */ +#undef STARPU_USE_TCPIP_MASTER_SLAVE + +/* Define to 1 to disable STARPU_SKIP_IF_VALGRIND when running tests. */ +#undef STARPU_VALGRIND_FULL + +/* display verbose debug messages */ +#undef STARPU_VERBOSE + +/* workers must call callbacks on sleep/wake-up */ +#undef STARPU_WORKER_CALLBACKS + +/* Define to 1 if all of the C90 standard headers exist (not just the ones + required in a freestanding environment). This macro is provided for + backward compatibility; new code need not use it. */ +#undef STDC_HEADERS + +/* Version number of package */ +#undef VERSION + +/* Define to 1 if the X Window System is missing or not being used. */ +#undef X_DISPLAY_MISSING + +/* Define to the equivalent of the C99 'restrict' keyword, or to + nothing if this is not supported. Do not define if restrict is + supported only directly. */ +#undef restrict +/* Work around a bug in older versions of Sun C++, which did not + #define __restrict__ or support _Restrict or __restrict__ + even though the corresponding Sun C compiler ended up with + "#define restrict _Restrict" or "#define restrict __restrict__" + in the previous line. This workaround can be removed once + we assume Oracle Developer Studio 12.5 (2016) or later. */ +#if defined __SUNPRO_CC && !defined __RESTRICT && !defined __restrict__ +# define _Restrict +# define __restrict__ +#endif diff --git a/src/common/fxt.c b/src/common/fxt.c new file mode 100644 index 0000000..7e951ba --- /dev/null +++ b/src/common/fxt.c @@ -0,0 +1,502 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include +#include +#include + +/* we need to identify each task to generate the DAG. */ +unsigned long _starpu_job_cnt = 0; + +#ifdef STARPU_USE_FXT +#include +#include +#include + +#ifdef STARPU_HAVE_WINDOWS +#include +#endif + +#ifdef __linux__ +#include /* for SYS_gettid */ +#elif defined(__FreeBSD__) +#include /* for thr_self() */ +#endif + +/* By default, record all events but the VERBOSE_EXTRA ones, which are very costly: */ +#define KEYMASKALL_DEFAULT FUT_KEYMASKALL & (~_STARPU_FUT_KEYMASK_TASK_VERBOSE_EXTRA) & (~_STARPU_FUT_KEYMASK_MPI_VERBOSE_EXTRA) + +static char _starpu_prof_file_user[1024]; +int _starpu_fxt_started = 0; +int _starpu_fxt_willstart = 1; +starpu_pthread_mutex_t _starpu_fxt_started_mutex = STARPU_PTHREAD_MUTEX_INITIALIZER; +starpu_pthread_cond_t _starpu_fxt_started_cond = STARPU_PTHREAD_COND_INITIALIZER; + +/* and their submission order. */ +unsigned long _starpu_submit_order = 0; + +static int _starpu_written = 0; + +static int _starpu_id; + +/* If we use several MPI processes, we can't use STARPU_GENERATE_TRACE=1, + * because each MPI process will handle its own trace file, so store the world + * size to warn the user if needed and avoid processing partial traces. */ +static int _starpu_mpi_worldsize = 1; + +/* Event mask used to initialize FxT. By default all events are recorded just + * after FxT starts, but this can be changed by calling + * starpu_fxt_autostart_profiling(0) */ +static unsigned int initial_key_mask = KEYMASKALL_DEFAULT; + +/* Event mask used when events are actually recorded, e.g. between + * starpu_fxt_start|stop_profiling() calls if autostart is disabled, or at + * anytime otherwise. Can be changed by the user at runtime, by setting + * STARPU_FXT_EVENTS env var. */ +static unsigned int profiling_key_mask = 0; + +#ifdef STARPU_SIMGRID +/* Give virtual time to FxT */ +uint64_t fut_getstamp(void) +{ + return starpu_timing_now()*1000.; +} +#endif + +long _starpu_gettid(void) +{ + /* TODO: test at configure whether __thread is available, and use that + * to cache the value. + * Don't use the TSD, this is getting called before we would have the + * time to allocate it. */ +#ifdef STARPU_SIMGRID +# ifdef HAVE_SG_ACTOR_SELF + return (uintptr_t) sg_actor_self(); +# else + return (uintptr_t) MSG_process_self(); +# endif +#else +#if defined(__linux__) + return syscall(SYS_gettid); +#elif defined(__FreeBSD__) + long tid; + thr_self(&tid); + return tid; +#elif defined(_WIN32) && !defined(__CYGWIN__) + return (long) GetCurrentThreadId(); +#else + return (long) starpu_pthread_self(); +#endif +#endif +} + +static void _starpu_profile_set_tracefile(void) +{ + char *user; + + char *fxt_prefix = starpu_getenv("STARPU_FXT_PREFIX"); + if (!fxt_prefix) + fxt_prefix = "/tmp"; + else + _starpu_mkpath_and_check(fxt_prefix, S_IRWXU); + + char suffix[64]; + char *fxt_suffix = starpu_getenv("STARPU_FXT_SUFFIX"); + if (!fxt_suffix) + { + user = starpu_getenv("USER"); + if (!user) + user = ""; + snprintf(suffix, sizeof(suffix), "prof_file_%s_%d", user, _starpu_id); + } + else + { + snprintf(suffix, sizeof(suffix), "%s_%d", fxt_suffix, _starpu_id); + } + + snprintf(_starpu_prof_file_user, sizeof(_starpu_prof_file_user), "%s/%s", fxt_prefix, suffix); +} + +static inline unsigned int _starpu_profile_get_user_keymask(void) +{ + if (profiling_key_mask != 0) + return profiling_key_mask; + + char *fxt_events = starpu_getenv("STARPU_FXT_EVENTS"); + if (fxt_events) + { + profiling_key_mask = _STARPU_FUT_KEYMASK_META; // contains mandatory events, even when profiling is disabled + + char delim[] = "|,"; + char* sub = strtok(fxt_events, delim); + for (; sub != NULL; sub = strtok(NULL, delim)) + { + if (!strcasecmp(sub, "USER")) + profiling_key_mask |= _STARPU_FUT_KEYMASK_USER; + else if (!strcasecmp(sub, "TASK")) + profiling_key_mask |= _STARPU_FUT_KEYMASK_TASK; + else if (!strcasecmp(sub, "TASK_VERBOSE")) + profiling_key_mask |= _STARPU_FUT_KEYMASK_TASK_VERBOSE; + else if (!strcasecmp(sub, "DATA")) + profiling_key_mask |= _STARPU_FUT_KEYMASK_DATA; + else if (!strcasecmp(sub, "DATA_VERBOSE")) + profiling_key_mask |= _STARPU_FUT_KEYMASK_DATA_VERBOSE; + else if (!strcasecmp(sub, "WORKER")) + profiling_key_mask |= _STARPU_FUT_KEYMASK_WORKER; + else if (!strcasecmp(sub, "WORKER_VERBOSE")) + profiling_key_mask |= _STARPU_FUT_KEYMASK_WORKER_VERBOSE; + else if (!strcasecmp(sub, "DSM")) + profiling_key_mask |= _STARPU_FUT_KEYMASK_DSM; + else if (!strcasecmp(sub, "DSM_VERBOSE")) + profiling_key_mask |= _STARPU_FUT_KEYMASK_DSM_VERBOSE; + else if (!strcasecmp(sub, "SCHED")) + profiling_key_mask |= _STARPU_FUT_KEYMASK_SCHED; + else if (!strcasecmp(sub, "SCHED_VERBOSE")) + profiling_key_mask |= _STARPU_FUT_KEYMASK_SCHED_VERBOSE; + else if (!strcasecmp(sub, "LOCK")) + profiling_key_mask |= _STARPU_FUT_KEYMASK_LOCK; + else if (!strcasecmp(sub, "LOCK_VERBOSE")) + profiling_key_mask |= _STARPU_FUT_KEYMASK_LOCK_VERBOSE; + else if (!strcasecmp(sub, "EVENT")) + profiling_key_mask |= _STARPU_FUT_KEYMASK_EVENT; + else if (!strcasecmp(sub, "EVENT_VERBOSE")) + profiling_key_mask |= _STARPU_FUT_KEYMASK_EVENT_VERBOSE; + else if (!strcasecmp(sub, "MPI")) + profiling_key_mask |= _STARPU_FUT_KEYMASK_MPI; + else if (!strcasecmp(sub, "MPI_VERBOSE")) + profiling_key_mask |= _STARPU_FUT_KEYMASK_MPI_VERBOSE; + else if (!strcasecmp(sub, "HYP")) + profiling_key_mask |= _STARPU_FUT_KEYMASK_HYP; + else if (!strcasecmp(sub, "HYP_VERBOSE")) + profiling_key_mask |= _STARPU_FUT_KEYMASK_HYP_VERBOSE; + else if (!strcasecmp(sub, "TASK_VERBOSE_EXTRA")) + profiling_key_mask |= _STARPU_FUT_KEYMASK_TASK_VERBOSE_EXTRA; + else if (!strcasecmp(sub, "MPI_VERBOSE_EXTRA")) + profiling_key_mask |= _STARPU_FUT_KEYMASK_MPI_VERBOSE_EXTRA; + /* Added categories here should also be added in the documentation + * 501_environment_variable.doxy. */ + else + _STARPU_MSG("Unknown event type '%s'\n", sub); + } + } + else + { + /* If user doesn't want to filter events, all events are recorded: */ + profiling_key_mask = KEYMASKALL_DEFAULT; + } + + return profiling_key_mask; +} + +void starpu_profiling_set_id(int new_id) +{ + _STARPU_DEBUG("Set id to <%d>\n", new_id); + _starpu_id = new_id; + _starpu_profile_set_tracefile(); + +#ifdef HAVE_FUT_SET_FILENAME + fut_set_filename(_starpu_prof_file_user); +#endif +} + +void _starpu_profiling_set_mpi_worldsize(int worldsize) +{ + STARPU_ASSERT(worldsize >= 1); + _starpu_mpi_worldsize = worldsize; + + int generate_trace = starpu_getenv_number("STARPU_GENERATE_TRACE"); + if (generate_trace == 1 && _starpu_mpi_worldsize > 1) + { + /** TODO: make it work ! + * The problem is that when STARPU_GENERATE_TRACE is used, each MPI + * process will generate the trace corresponding to its own execution + * (which makes no sense in MPI execution with several processes). + * Although letting only one StarPU process generating the trace by + * using the trace files of all MPI processes is not the most + * complicated thing to do, one case is not easy to deal with: what to + * do when each process stored its trace file in the local memory of + * the node (e.g. /tmp/) ? + */ + _STARPU_MSG("You can't use STARPU_GENERATE_TRACE=1 with several MPI processes. Use starpu_fxt_tool after application execution.\n"); + } +} + +void starpu_fxt_autostart_profiling(int autostart) +{ + /* By calling this function with autostart = 0 before starpu_init(), + * FxT will record only required event to properly work later (KEYMASK_META), and + * won't record anything else. */ + if (autostart) + initial_key_mask = _starpu_profile_get_user_keymask(); + else + initial_key_mask = _STARPU_FUT_KEYMASK_META; +} + +void starpu_fxt_start_profiling() +{ + unsigned threadid = _starpu_gettid(); + fut_keychange(FUT_ENABLE, _starpu_profile_get_user_keymask(), threadid); + _STARPU_TRACE_META("start_profiling"); +} + +void starpu_fxt_stop_profiling() +{ + unsigned threadid = _starpu_gettid(); + _STARPU_TRACE_META("stop_profiling"); + fut_keychange(FUT_SETMASK, _STARPU_FUT_KEYMASK_META, threadid); +} + +int starpu_fxt_is_enabled() +{ + return starpu_getenv_number_default("STARPU_FXT_TRACE", 0); +} + +#ifdef HAVE_FUT_SETUP_FLUSH_CALLBACK +void _starpu_fxt_flush_callback() +{ + _STARPU_MSG("FxT is flushing trace to disk ! This can impact performance.\n"); + _STARPU_MSG("Maybe you should increase the value of STARPU_TRACE_BUFFER_SIZE ?\n"); + + starpu_fxt_trace_user_event_string("fxt flush"); +} +#endif + +void _starpu_fxt_init_profiling(uint64_t trace_buffer_size) +{ + unsigned threadid; + + STARPU_PTHREAD_MUTEX_LOCK(&_starpu_fxt_started_mutex); + if (!(_starpu_fxt_willstart = starpu_fxt_is_enabled())) + { + STARPU_PTHREAD_COND_BROADCAST(&_starpu_fxt_started_cond); + STARPU_PTHREAD_MUTEX_UNLOCK(&_starpu_fxt_started_mutex); + return; + } + + STARPU_ASSERT(!_starpu_fxt_started); + + _starpu_fxt_started = 1; + _starpu_written = 0; + _starpu_profile_set_tracefile(); + + STARPU_HG_DISABLE_CHECKING(fut_active); + +#ifdef HAVE_FUT_SET_FILENAME + fut_set_filename(_starpu_prof_file_user); +#endif +#ifdef HAVE_ENABLE_FUT_FLUSH + // when the event buffer is full, fxt stops recording events. + // The trace may thus be incomplete. + // Enable the fut_flush function which is called when the + // fxt event buffer is full to flush the buffer to disk, + // therefore allowing to record the remaining events. + enable_fut_flush(); +#endif + + threadid = _starpu_gettid(); + +#ifdef HAVE_FUT_SETUP_FLUSH_CALLBACK + if (fut_setup_flush_callback(trace_buffer_size / sizeof(unsigned long), initial_key_mask, threadid, &_starpu_fxt_flush_callback) < 0) +#else + if (fut_setup(trace_buffer_size / sizeof(unsigned long), initial_key_mask, threadid) < 0) +#endif + { + perror("fut_setup"); + STARPU_ABORT(); + } + + STARPU_PTHREAD_COND_BROADCAST(&_starpu_fxt_started_cond); + STARPU_PTHREAD_MUTEX_UNLOCK(&_starpu_fxt_started_mutex); + + return; +} + +int _starpu_generate_paje_trace_read_option(const char *option, struct starpu_fxt_options *options) +{ + if (strcmp(option, "-c") == 0) + { + options->per_task_colour = 1; + } + else if (strcmp(option, "-no-events") == 0) + { + options->no_events = 1; + } + else if (strcmp(option, "-no-counter") == 0) + { + options->no_counter = 1; + } + else if (strcmp(option, "-no-bus") == 0) + { + options->no_bus = 1; + } + else if (strcmp(option, "-no-flops") == 0) + { + options->no_flops = 1; + } + else if (strcmp(option, "-no-smooth") == 0) + { + options->no_smooth = 1; + } + else if (strcmp(option, "-no-acquire") == 0) + { + options->no_acquire = 1; + } + else if (strcmp(option, "-memory-states") == 0) + { + options->memory_states = 1; + } + else if (strcmp(option, "-internal") == 0) + { + options->internal = 1; + } + else if (strcmp(option, "-label-deps") == 0) + { + options->label_deps = 1; + } + else if (strcmp(option, "-number-events") == 0) + { + options->number_events_path = strdup("number_events.data"); + } + else if (strcmp(option, "-use-task-color") == 0) + { + options->use_task_color = 1; + } + else + { + return 1; + } + return 0; +} + +static void _starpu_generate_paje_trace(char *input_fxt_filename, char *output_paje_filename, char *dirname) +{ + /* We take default options */ + struct starpu_fxt_options options; + starpu_fxt_options_init(&options); + + char *trace_options = starpu_getenv("STARPU_GENERATE_TRACE_OPTIONS"); + if (trace_options) + { + char *option = strtok(trace_options, " "); + while (option) + { + int ret = _starpu_generate_paje_trace_read_option(option, &options); + if (ret == 1) + _STARPU_MSG("Option <%s> is not a valid option for starpu_fxt_tool\n", option); + option = strtok(NULL, " "); + } + } + + options.ninputfiles = 1; + options.filenames[0] = input_fxt_filename; + free(options.out_paje_path); + options.out_paje_path = strdup(output_paje_filename); + options.file_prefix = ""; + options.file_rank = -1; + options.dir = dirname; + + starpu_fxt_generate_trace(&options); + starpu_fxt_options_shutdown(&options); +} + +void _starpu_fxt_dump_file(void) +{ + if (!_starpu_fxt_started) + return; + + char hostname[128]; + gethostname(hostname, 128); + + int ret = fut_endup(_starpu_prof_file_user); + if (ret < 0) + _STARPU_MSG("Problem when writing FxT traces into file %s:%s\n", hostname, _starpu_prof_file_user); +#ifdef STARPU_VERBOSE + else + _STARPU_MSG("Writing FxT traces into file %s:%s\n", hostname, _starpu_prof_file_user); +#endif +} + +void _starpu_stop_fxt_profiling(void) +{ + if (!_starpu_fxt_started) + return; + if (!_starpu_written) + { + _starpu_fxt_dump_file(); + + /* Should we generate a Paje trace directly ? */ + int generate_trace = starpu_getenv_number("STARPU_GENERATE_TRACE"); + if (_starpu_mpi_worldsize == 1 && generate_trace == 1) + { + _starpu_set_catch_signals(0); + char *fxt_prefix = starpu_getenv("STARPU_FXT_PREFIX"); + _starpu_generate_paje_trace(_starpu_prof_file_user, "paje.trace", fxt_prefix); + } + + int ret = fut_done(); + if (ret < 0) + { + /* Something went wrong with the FxT trace (eg. there + * was too many events) */ + _STARPU_MSG("Warning: the FxT trace could not be generated properly\n"); + } + + _starpu_written = 1; + _starpu_fxt_started = 0; + } +} + +#else // STARPU_USE_FXT + +void starpu_fxt_autostart_profiling(int autostart STARPU_ATTRIBUTE_UNUSED) +{ +} + +void starpu_fxt_start_profiling() +{ +} + +void starpu_fxt_stop_profiling() +{ +} + +#endif // STARPU_USE_FXT + +void starpu_fxt_trace_user_event(unsigned long code STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_USE_FXT + _STARPU_TRACE_USER_EVENT(code); +#endif +} + + +void starpu_fxt_trace_user_meta_string(const char *s STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_USE_FXT + _STARPU_TRACE_META(s); +#endif +} + +void starpu_fxt_trace_user_event_string(const char *s STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_USE_FXT + _STARPU_TRACE_EVENT(s); +#endif +} diff --git a/src/common/fxt.h b/src/common/fxt.h new file mode 100644 index 0000000..c8f7590 --- /dev/null +++ b/src/common/fxt.h @@ -0,0 +1,1575 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2018,2020 Federal University of Rio Grande do Sul (UFRGS) + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __FXT_H__ +#define __FXT_H__ + + +/** @file */ + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE 1 /* ou _BSD_SOURCE ou _SVID_SOURCE */ +#endif + +#include +#include +#include +#include +#ifdef HAVE_UNISTD_H +#include +#endif +#include +#include + +#ifdef STARPU_USE_FXT +#include +#include +#endif + +#pragma GCC visibility push(hidden) + +/* some key to identify the worker kind */ +#define _STARPU_FUT_WORKER_KEY(kind) (kind + 0x100) +#define _STARPU_FUT_KEY_WORKER(key) (key - 0x100) + +#define _STARPU_FUT_WORKER_INIT_START 0x5100 +#define _STARPU_FUT_WORKER_INIT_END 0x5101 + +#define _STARPU_FUT_START_CODELET_BODY 0x5102 +#define _STARPU_FUT_END_CODELET_BODY 0x5103 + +#define _STARPU_FUT_JOB_PUSH 0x5104 +#define _STARPU_FUT_JOB_POP 0x5105 + +#define _STARPU_FUT_UPDATE_TASK_CNT 0x5106 + +#define _STARPU_FUT_START_FETCH_INPUT_ON_TID 0x5107 +#define _STARPU_FUT_END_FETCH_INPUT_ON_TID 0x5108 +#define _STARPU_FUT_START_PUSH_OUTPUT_ON_TID 0x5109 +#define _STARPU_FUT_END_PUSH_OUTPUT_ON_TID 0x5110 + +#define _STARPU_FUT_TAG 0x5111 +#define _STARPU_FUT_TAG_DEPS 0x5112 + +#define _STARPU_FUT_TASK_DEPS 0x5113 + +#define _STARPU_FUT_DATA_COPY 0x5114 +#define _STARPU_FUT_WORK_STEALING 0x5115 + +#define _STARPU_FUT_WORKER_DEINIT_START 0x5116 +#define _STARPU_FUT_WORKER_DEINIT_END 0x5117 + +#define _STARPU_FUT_WORKER_SLEEP_START 0x5118 +#define _STARPU_FUT_WORKER_SLEEP_END 0x5119 + +#define _STARPU_FUT_TASK_SUBMIT 0x511a +#define _STARPU_FUT_CODELET_DATA_HANDLE 0x511b + +#define _STARPU_FUT_MODEL_NAME 0x511c + +#define _STARPU_FUT_DATA_NAME 0x511d +#define _STARPU_FUT_DATA_COORDINATES 0x511e +#define _STARPU_FUT_HANDLE_DATA_UNREGISTER 0x511f + +#define _STARPU_FUT_CODELET_DATA_HANDLE_NUMA_ACCESS 0x5120 + +#define _STARPU_FUT_NEW_MEM_NODE 0x5122 + +#define _STARPU_FUT_START_CALLBACK 0x5123 +#define _STARPU_FUT_END_CALLBACK 0x5124 + +#define _STARPU_FUT_TASK_DONE 0x5125 +#define _STARPU_FUT_TAG_DONE 0x5126 + +#define _STARPU_FUT_START_ALLOC 0x5127 +#define _STARPU_FUT_END_ALLOC 0x5128 + +#define _STARPU_FUT_START_ALLOC_REUSE 0x5129 +#define _STARPU_FUT_END_ALLOC_REUSE 0x5130 + +#define _STARPU_FUT_USED_MEM 0x512a + +#define _STARPU_FUT_TASK_NAME 0x512b + +#define _STARPU_FUT_DATA_WONT_USE 0x512c + +#define _STARPU_FUT_TASK_COLOR 0x512d + +#define _STARPU_FUT_DATA_DOING_WONT_USE 0x512e + +#define _STARPU_FUT_TASK_LINE 0x512f + +#define _STARPU_FUT_START_MEMRECLAIM 0x5131 +#define _STARPU_FUT_END_MEMRECLAIM 0x5132 + +#define _STARPU_FUT_START_DRIVER_COPY 0x5133 +#define _STARPU_FUT_END_DRIVER_COPY 0x5134 + +#define _STARPU_FUT_START_DRIVER_COPY_ASYNC 0x5135 +#define _STARPU_FUT_END_DRIVER_COPY_ASYNC 0x5136 + +#define _STARPU_FUT_START_PROGRESS_ON_TID 0x5137 +#define _STARPU_FUT_END_PROGRESS_ON_TID 0x5138 + +#define _STARPU_FUT_USER_EVENT 0x5139 + +#define _STARPU_FUT_SET_PROFILING 0x513a + +#define _STARPU_FUT_TASK_WAIT_FOR_ALL 0x513b + +#define _STARPU_FUT_EVENT 0x513c +#define _STARPU_FUT_THREAD_EVENT 0x513d + +#define _STARPU_FUT_CODELET_DETAILS 0x513e +#define _STARPU_FUT_CODELET_DATA 0x513f + +#define _STARPU_FUT_LOCKING_MUTEX 0x5140 +#define _STARPU_FUT_MUTEX_LOCKED 0x5141 + +#define _STARPU_FUT_UNLOCKING_MUTEX 0x5142 +#define _STARPU_FUT_MUTEX_UNLOCKED 0x5143 + +#define _STARPU_FUT_TRYLOCK_MUTEX 0x5144 + +#define _STARPU_FUT_RDLOCKING_RWLOCK 0x5145 +#define _STARPU_FUT_RWLOCK_RDLOCKED 0x5146 + +#define _STARPU_FUT_WRLOCKING_RWLOCK 0x5147 +#define _STARPU_FUT_RWLOCK_WRLOCKED 0x5148 + +#define _STARPU_FUT_UNLOCKING_RWLOCK 0x5149 +#define _STARPU_FUT_RWLOCK_UNLOCKED 0x514a + +#define _STARPU_FUT_LOCKING_SPINLOCK 0x514b +#define _STARPU_FUT_SPINLOCK_LOCKED 0x514c + +#define _STARPU_FUT_UNLOCKING_SPINLOCK 0x514d +#define _STARPU_FUT_SPINLOCK_UNLOCKED 0x514e + +#define _STARPU_FUT_TRYLOCK_SPINLOCK 0x514f + +#define _STARPU_FUT_COND_WAIT_BEGIN 0x5150 +#define _STARPU_FUT_COND_WAIT_END 0x5151 + +#define _STARPU_FUT_MEMORY_FULL 0x5152 + +#define _STARPU_FUT_DATA_LOAD 0x5153 + +#define _STARPU_FUT_START_UNPARTITION_ON_TID 0x5154 +#define _STARPU_FUT_END_UNPARTITION_ON_TID 0x5155 + +#define _STARPU_FUT_START_FREE 0x5156 +#define _STARPU_FUT_END_FREE 0x5157 + +#define _STARPU_FUT_START_WRITEBACK 0x5158 +#define _STARPU_FUT_END_WRITEBACK 0x5159 + +#define _STARPU_FUT_SCHED_COMPONENT_PUSH_PRIO 0x515a +#define _STARPU_FUT_SCHED_COMPONENT_POP_PRIO 0x515b + +#define _STARPU_FUT_START_WRITEBACK_ASYNC 0x515c +#define _STARPU_FUT_END_WRITEBACK_ASYNC 0x515d + +#define _STARPU_FUT_HYPERVISOR_BEGIN 0x5160 +#define _STARPU_FUT_HYPERVISOR_END 0x5161 + +#define _STARPU_FUT_BARRIER_WAIT_BEGIN 0x5162 +#define _STARPU_FUT_BARRIER_WAIT_END 0x5163 + +#define _STARPU_FUT_WORKER_SCHEDULING_START 0x5164 +#define _STARPU_FUT_WORKER_SCHEDULING_END 0x5165 +#define _STARPU_FUT_WORKER_SCHEDULING_PUSH 0x5166 +#define _STARPU_FUT_WORKER_SCHEDULING_POP 0x5167 + +#define _STARPU_FUT_START_EXECUTING 0x5168 +#define _STARPU_FUT_END_EXECUTING 0x5169 + +#define _STARPU_FUT_SCHED_COMPONENT_NEW 0x516a +#define _STARPU_FUT_SCHED_COMPONENT_CONNECT 0x516b +#define _STARPU_FUT_SCHED_COMPONENT_PUSH 0x516c +#define _STARPU_FUT_SCHED_COMPONENT_PULL 0x516d + +#define _STARPU_FUT_TASK_SUBMIT_START 0x516e +#define _STARPU_FUT_TASK_SUBMIT_END 0x516f + +#define _STARPU_FUT_TASK_BUILD_START 0x5170 +#define _STARPU_FUT_TASK_BUILD_END 0x5171 + +#define _STARPU_FUT_TASK_MPI_DECODE_START 0x5172 +#define _STARPU_FUT_TASK_MPI_DECODE_END 0x5173 + +#define _STARPU_FUT_TASK_MPI_PRE_START 0x5174 +#define _STARPU_FUT_TASK_MPI_PRE_END 0x5175 + +#define _STARPU_FUT_TASK_MPI_POST_START 0x5176 +#define _STARPU_FUT_TASK_MPI_POST_END 0x5177 + +#define _STARPU_FUT_TASK_WAIT_START 0x5178 +#define _STARPU_FUT_TASK_WAIT_END 0x5179 + +#define _STARPU_FUT_TASK_WAIT_FOR_ALL_START 0x517a +#define _STARPU_FUT_TASK_WAIT_FOR_ALL_END 0x517b + +#define _STARPU_FUT_HANDLE_DATA_REGISTER 0x517c + +#define _STARPU_FUT_START_FETCH_INPUT 0x517e +#define _STARPU_FUT_END_FETCH_INPUT 0x517f + +#define _STARPU_FUT_TASK_THROTTLE_START 0x5180 +#define _STARPU_FUT_TASK_THROTTLE_END 0x5181 + +#define _STARPU_FUT_DATA_STATE_INVALID 0x5182 +#define _STARPU_FUT_DATA_STATE_OWNER 0x5183 +#define _STARPU_FUT_DATA_STATE_SHARED 0x5184 + +#define _STARPU_FUT_DATA_REQUEST_CREATED 0x5185 +#define _STARPU_FUT_PAPI_TASK_EVENT_VALUE 0x5186 +#define _STARPU_FUT_TASK_EXCLUDE_FROM_DAG 0x5187 + +#define _STARPU_FUT_TASK_END_DEP 0x5188 + +#ifdef STARPU_BUBBLE +#define _STARPU_FUT_TASK_BUBBLE 0x5189 +#endif + +#define _STARPU_FUT_START_PARALLEL_SYNC 0x518a +#define _STARPU_FUT_END_PARALLEL_SYNC 0x518b + +/* Predefined FUT key masks */ +#define _STARPU_FUT_KEYMASK_META FUT_KEYMASK0 +#define _STARPU_FUT_KEYMASK_USER FUT_KEYMASK1 +#define _STARPU_FUT_KEYMASK_TASK FUT_KEYMASK2 +#define _STARPU_FUT_KEYMASK_TASK_VERBOSE FUT_KEYMASK3 +#define _STARPU_FUT_KEYMASK_DATA FUT_KEYMASK4 +#define _STARPU_FUT_KEYMASK_DATA_VERBOSE FUT_KEYMASK5 +#define _STARPU_FUT_KEYMASK_WORKER FUT_KEYMASK6 +#define _STARPU_FUT_KEYMASK_WORKER_VERBOSE FUT_KEYMASK7 +#define _STARPU_FUT_KEYMASK_DSM FUT_KEYMASK8 +#define _STARPU_FUT_KEYMASK_DSM_VERBOSE FUT_KEYMASK9 +#define _STARPU_FUT_KEYMASK_SCHED FUT_KEYMASK10 +#define _STARPU_FUT_KEYMASK_SCHED_VERBOSE FUT_KEYMASK11 +#define _STARPU_FUT_KEYMASK_LOCK FUT_KEYMASK12 +#define _STARPU_FUT_KEYMASK_LOCK_VERBOSE FUT_KEYMASK13 +#define _STARPU_FUT_KEYMASK_EVENT FUT_KEYMASK14 +#define _STARPU_FUT_KEYMASK_EVENT_VERBOSE FUT_KEYMASK15 +#define _STARPU_FUT_KEYMASK_MPI FUT_KEYMASK16 +#define _STARPU_FUT_KEYMASK_MPI_VERBOSE FUT_KEYMASK17 +#define _STARPU_FUT_KEYMASK_HYP FUT_KEYMASK18 +#define _STARPU_FUT_KEYMASK_HYP_VERBOSE FUT_KEYMASK19 +#define _STARPU_FUT_KEYMASK_TASK_VERBOSE_EXTRA FUT_KEYMASK20 +#define _STARPU_FUT_KEYMASK_MPI_VERBOSE_EXTRA FUT_KEYMASK21 +/* When doing modifications to keymasks: + * - also adapt _starpu_profile_get_user_keymask() in src/common/fxt.c + * - adapt KEYMASKALL_DEFAULT in src/common/fxt.c + * - adapt the documentation in 501_environment_variable.doxy and/or + * 380_offline_performance_tools.doxy */ + +extern unsigned long _starpu_job_cnt; + +static inline unsigned long _starpu_fxt_get_job_id(void) +{ + unsigned long ret = STARPU_ATOMIC_ADDL(&_starpu_job_cnt, 1); + STARPU_ASSERT_MSG(ret != 0, "Oops, job_id wrapped! There are too many tasks for tracking them for profiling"); + return ret; +} + +#ifdef STARPU_USE_FXT + +/* Some versions of FxT do not include the declaration of the function */ +#ifdef HAVE_ENABLE_FUT_FLUSH +#if !HAVE_DECL_ENABLE_FUT_FLUSH +void enable_fut_flush(); +#endif +#endif +#ifdef HAVE_FUT_SET_FILENAME +#if !HAVE_DECL_FUT_SET_FILENAME +void fut_set_filename(char *filename); +#endif +#endif + +extern int _starpu_fxt_started STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; +extern int _starpu_fxt_willstart STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; +extern starpu_pthread_mutex_t _starpu_fxt_started_mutex STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; +extern starpu_pthread_cond_t _starpu_fxt_started_cond STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; + +/** Wait until FXT is started (or not). Returns if FXT was started */ +static inline int _starpu_fxt_wait_initialisation() +{ + STARPU_PTHREAD_MUTEX_LOCK(&_starpu_fxt_started_mutex); + while (_starpu_fxt_willstart && !_starpu_fxt_started) + STARPU_PTHREAD_COND_WAIT(&_starpu_fxt_started_cond, &_starpu_fxt_started_mutex); + STARPU_PTHREAD_MUTEX_UNLOCK(&_starpu_fxt_started_mutex); + + return _starpu_fxt_started; +} + +extern unsigned long _starpu_submit_order; + +static inline unsigned long _starpu_fxt_get_submit_order(void) +{ + unsigned long ret = STARPU_ATOMIC_ADDL(&_starpu_submit_order, 1); + STARPU_ASSERT_MSG(_starpu_submit_order != 0, "Oops, submit_order wrapped! There are too many tasks for tracking them for profiling"); + return ret; +} + +long _starpu_gettid(void) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; + +int _starpu_generate_paje_trace_read_option(const char *option, struct starpu_fxt_options *options) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; + +/** Initialize the FxT library. */ +void _starpu_fxt_init_profiling(uint64_t trace_buffer_size); + +/** Stop the FxT library, and generate the trace file. */ +void _starpu_stop_fxt_profiling(void); + +/** In case we use MPI, tell the profiling system how many processes are used. */ +void _starpu_profiling_set_mpi_worldsize(int worldsize) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; + +/** Generate the trace file. Used when catching signals SIGINT and SIGSEGV */ +void _starpu_fxt_dump_file(void); + +#ifdef FUT_NEEDS_COMMIT +#define _STARPU_FUT_COMMIT(size) fut_commitstampedbuffer(size) +#else +#define _STARPU_FUT_COMMIT(size) do { } while (0) +#endif + +#ifdef FUT_RAW_ALWAYS_PROBE1STR +#define _STARPU_FUT_ALWAYS_PROBE1STR(CODE, P1, str) FUT_RAW_ALWAYS_PROBE1STR(CODE, P1, str) +#else +#define _STARPU_FUT_ALWAYS_PROBE1STR(CODE, P1, str) \ +do { \ + if(STARPU_UNLIKELY(fut_active)) { \ + /* No more than FXT_MAX_PARAMS args are allowed */ \ + /* we add a \0 just in case ... */ \ + size_t len = STARPU_MIN(strlen(str)+1, (FXT_MAX_PARAMS - 1)*sizeof(unsigned long));\ + unsigned nbargs_str = (len + sizeof(unsigned long) - 1)/(sizeof(unsigned long));\ + unsigned nbargs = 1 + nbargs_str; \ + size_t total_len = FUT_SIZE(nbargs); \ + unsigned long *futargs = \ + fut_getstampedbuffer(FUT_CODE(CODE, nbargs), total_len);\ + *(futargs++) = (unsigned long)(P1); \ + snprintf((char *)futargs, len, "%s", str); \ + ((char *)futargs)[len - 1] = '\0'; \ + _STARPU_FUT_COMMIT(total_len); \ + }} while (0) +#endif + +#ifdef FUT_FULL_PROBE1STR +#define _STARPU_FUT_FULL_PROBE1STR(KEYMASK, CODE, P1, str) FUT_FULL_PROBE1STR(CODE, P1, str) +#else +/** Sometimes we need something a little more specific than the wrappers from + * FxT: these macro permit to put add an event with 3 (or 4) numbers followed + * by a string. */ +#define _STARPU_FUT_FULL_PROBE1STR(KEYMASK, CODE, P1, str) \ +do { \ + if (STARPU_UNLIKELY(KEYMASK & fut_active)) { \ + _STARPU_FUT_ALWAYS_PROBE1STR(CODE, P1, str); \ + } \ +} while (0) +#endif + +#ifdef FUT_ALWAYS_PROBE2STR +#define _STARPU_FUT_ALWAYS_PROBE2STR(CODE, P1, P2, str) FUT_RAW_ALWAYS_PROBE2STR(CODE, P1, P2, str) +#else +#define _STARPU_FUT_ALWAYS_PROBE2STR(CODE, P1, P2, str) \ +do { \ + /* No more than FXT_MAX_PARAMS args are allowed */ \ + /* we add a \0 just in case ... */ \ + size_t len = STARPU_MIN(strlen(str)+1, (FXT_MAX_PARAMS - 2)*sizeof(unsigned long));\ + unsigned nbargs_str = (len + sizeof(unsigned long) - 1)/(sizeof(unsigned long));\ + unsigned nbargs = 2 + nbargs_str; \ + size_t total_len = FUT_SIZE(nbargs); \ + unsigned long *futargs = \ + fut_getstampedbuffer(FUT_CODE(CODE, nbargs), total_len);\ + *(futargs++) = (unsigned long)(P1); \ + *(futargs++) = (unsigned long)(P2); \ + snprintf((char *)futargs, len, "%s", str); \ + ((char *)futargs)[len - 1] = '\0'; \ + _STARPU_FUT_COMMIT(total_len); \ +} while (0) +#endif + +#ifdef FUT_FULL_PROBE2STR +#define _STARPU_FUT_FULL_PROBE2STR(KEYMASK, CODE, P1, P2, str) FUT_FULL_PROBE2STR(CODE, P1, P2, str) +#else +#define _STARPU_FUT_FULL_PROBE2STR(KEYMASK, CODE, P1, P2, str) \ +do { \ + if (STARPU_UNLIKELY(KEYMASK & fut_active)) { \ + _STARPU_FUT_ALWAYS_PROBE2STR(CODE, P1, P2, str); \ + } \ +} while (0) +#endif + +#ifdef FUT_ALWAYS_PROBE3STR +#define _STARPU_FUT_ALWAYS_PROBE3STR(CODE, P1, P2, P3, str) FUT_RAW_ALWAYS_PROBE3STR(CODE, P1, P2, P3, str) +#else +#define _STARPU_FUT_ALWAYS_PROBE3STR(CODE, P1, P2, P3, str) \ +do { \ + /* No more than FXT_MAX_PARAMS args are allowed */ \ + /* we add a \0 just in case ... */ \ + size_t len = STARPU_MIN(strlen(str)+1, (FXT_MAX_PARAMS - 3)*sizeof(unsigned long));\ + unsigned nbargs_str = (len + sizeof(unsigned long) - 1)/(sizeof(unsigned long));\ + unsigned nbargs = 3 + nbargs_str; \ + size_t total_len = FUT_SIZE(nbargs); \ + unsigned long *futargs = \ + fut_getstampedbuffer(FUT_CODE(CODE, nbargs), total_len);\ + *(futargs++) = (unsigned long)(P1); \ + *(futargs++) = (unsigned long)(P2); \ + *(futargs++) = (unsigned long)(P3); \ + snprintf((char *)futargs, len, "%s", str); \ + ((char *)futargs)[len - 1] = '\0'; \ + _STARPU_FUT_COMMIT(total_len); \ +} while (0) +#endif + +#ifdef FUT_FULL_PROBE3STR +#define _STARPU_FUT_FULL_PROBE3STR(KEYMASK, CODE, P1, P2, P3, str) FUT_FULL_PROBE3STR(CODE, P1, P2, P3, str) +#else +#define _STARPU_FUT_FULL_PROBE3STR(KEYMASK, CODE, P1, P2, P3, str) \ +do { \ + if (STARPU_UNLIKELY(KEYMASK & fut_active)) { \ + _STARPU_FUT_ALWAYS_PROBE3STR(CODE, P1, P2, P3, str); \ + } \ +} while (0) +#endif + +#ifdef FUT_ALWAYS_PROBE4STR +#define _STARPU_FUT_ALWAYS_PROBE4STR(CODE, P1, P2, P3, P4, str) FUT_RAW_ALWAYS_PROBE4STR(CODE, P1, P2, P3, P4, str) +#else +#define _STARPU_FUT_ALWAYS_PROBE4STR(CODE, P1, P2, P3, P4, str) \ +do { \ + /* No more than FXT_MAX_PARAMS args are allowed */ \ + /* we add a \0 just in case ... */ \ + size_t len = STARPU_MIN(strlen(str)+1, (FXT_MAX_PARAMS - 4)*sizeof(unsigned long));\ + unsigned nbargs_str = (len + sizeof(unsigned long) - 1)/(sizeof(unsigned long));\ + unsigned nbargs = 4 + nbargs_str; \ + size_t total_len = FUT_SIZE(nbargs); \ + unsigned long *futargs = \ + fut_getstampedbuffer(FUT_CODE(CODE, nbargs), total_len);\ + *(futargs++) = (unsigned long)(P1); \ + *(futargs++) = (unsigned long)(P2); \ + *(futargs++) = (unsigned long)(P3); \ + *(futargs++) = (unsigned long)(P4); \ + snprintf((char *)futargs, len, "%s", str); \ + ((char *)futargs)[len - 1] = '\0'; \ + _STARPU_FUT_COMMIT(total_len); \ +} while (0) +#endif + +#ifdef FUT_FULL_PROBE4STR +#define _STARPU_FUT_FULL_PROBE4STR(KEYMASK, CODE, P1, P2, P3, P4, str) FUT_FULL_PROBE4STR(CODE, P1, P2, P3, P4, str) +#else +#define _STARPU_FUT_FULL_PROBE4STR(KEYMASK, CODE, P1, P2, P3, P4, str) \ +do { \ + if (STARPU_UNLIKELY(KEYMASK & fut_active)) { \ + _STARPU_FUT_ALWAYS_PROBE4STR(CODE, P1, P2, P3, P4, str); \ + } \ +} while (0) +#endif + +#ifdef FUT_ALWAYS_PROBE5STR +#define _STARPU_FUT_ALWAYS_PROBE5STR(CODE, P1, P2, P3, P4, P5, str) FUT_RAW_ALWAYS_PROBE5STR(CODE, P1, P2, P3, P4, P5, str) +#else +#define _STARPU_FUT_ALWAYS_PROBE5STR(CODE, P1, P2, P3, P4, P5, str) \ +do { \ + /* No more than FXT_MAX_PARAMS args are allowed */ \ + /* we add a \0 just in case ... */ \ + size_t len = STARPU_MIN(strlen(str)+1, (FXT_MAX_PARAMS - 5)*sizeof(unsigned long));\ + unsigned nbargs_str = (len + sizeof(unsigned long) - 1)/(sizeof(unsigned long));\ + unsigned nbargs = 5 + nbargs_str; \ + size_t total_len = FUT_SIZE(nbargs); \ + unsigned long *futargs = \ + fut_getstampedbuffer(FUT_CODE(CODE, nbargs), total_len);\ + *(futargs++) = (unsigned long)(P1); \ + *(futargs++) = (unsigned long)(P2); \ + *(futargs++) = (unsigned long)(P3); \ + *(futargs++) = (unsigned long)(P4); \ + *(futargs++) = (unsigned long)(P5); \ + snprintf((char *)futargs, len, "%s", str); \ + ((char *)futargs)[len - 1] = '\0'; \ + _STARPU_FUT_COMMIT(total_len); \ +} while (0) +#endif + +#ifdef FUT_FULL_PROBE5STR +#define _STARPU_FUT_FULL_PROBE5STR(KEYMASK, CODE, P1, P2, P3, P4, P5, str) FUT_FULL_PROBE5STR(CODE, P1, P2, P3, P4, P5, str) +#else +#define _STARPU_FUT_FULL_PROBE5STR(KEYMASK, CODE, P1, P2, P3, P4, P5, str) \ +do { \ + if (STARPU_UNLIKELY(KEYMASK & fut_active)) { \ + _STARPU_FUT_ALWAYS_PROBE5STR(CODE, P1, P2, P3, P4, P5, str); \ + } \ +} while (0) +#endif + +#ifdef FUT_ALWAYS_PROBE6STR +#define _STARPU_FUT_ALWAYS_PROBE6STR(CODE, P1, P2, P3, P4, P5, P6, str) FUT_RAW_ALWAYS_PROBE6STR(CODE, P1, P2, P3, P4, P5, P6, str) +#else +#define _STARPU_FUT_ALWAYS_PROBE6STR(CODE, P1, P2, P3, P4, P5, P6, str) \ +do { \ + /* No more than FXT_MAX_PARAMS args are allowed */ \ + /* we add a \0 just in case ... */ \ + size_t len = STARPU_MIN(strlen(str)+1, (FXT_MAX_PARAMS - 6)*sizeof(unsigned long));\ + unsigned nbargs_str = (len + sizeof(unsigned long) - 1)/(sizeof(unsigned long));\ + unsigned nbargs = 6 + nbargs_str; \ + size_t total_len = FUT_SIZE(nbargs); \ + unsigned long *futargs = \ + fut_getstampedbuffer(FUT_CODE(CODE, nbargs), total_len);\ + *(futargs++) = (unsigned long)(P1); \ + *(futargs++) = (unsigned long)(P2); \ + *(futargs++) = (unsigned long)(P3); \ + *(futargs++) = (unsigned long)(P4); \ + *(futargs++) = (unsigned long)(P5); \ + *(futargs++) = (unsigned long)(P6); \ + snprintf((char *)futargs, len, "%s", str); \ + ((char *)futargs)[len - 1] = '\0'; \ + _STARPU_FUT_COMMIT(total_len); \ +} while (0) +#endif + +#ifdef FUT_FULL_PROBE6STR +#define _STARPU_FUT_FULL_PROBE6STR(KEYMASK, CODE, P1, P2, P3, P4, P5, P6, str) FUT_FULL_PROBE6STR(CODE, P1, P2, P3, P4, P5, P6, str) +#else +#define _STARPU_FUT_FULL_PROBE6STR(KEYMASK, CODE, P1, P2, P3, P4, P5, P6, str) \ +do { \ + if (STARPU_UNLIKELY(KEYMASK & fut_active)) { \ + _STARPU_FUT_ALWAYS_PROBE6STR(CODE, P1, P2, P3, P4, P5, P6, str); \ + } \ +} while (0) +#endif + +#ifdef FUT_ALWAYS_PROBE7STR +#define _STARPU_FUT_ALWAYS_PROBE7STR(CODE, P1, P2, P3, P4, P5, P6, P7, str) FUT_RAW_ALWAYS_PROBE7STR(CODE, P1, P2, P3, P4, P5, P6, P7, str) +#else +#define _STARPU_FUT_ALWAYS_PROBE7STR(CODE, P1, P2, P3, P4, P5, P6, P7, str) \ +do { \ + /* No more than FXT_MAX_PARAMS args are allowed */ \ + /* we add a \0 just in case ... */ \ + size_t len = STARPU_MIN(strlen(str)+1, (FXT_MAX_PARAMS - 7)*sizeof(unsigned long));\ + unsigned nbargs_str = (len + sizeof(unsigned long) - 1)/(sizeof(unsigned long));\ + unsigned nbargs = 7 + nbargs_str; \ + size_t total_len = FUT_SIZE(nbargs); \ + unsigned long *futargs = \ + fut_getstampedbuffer(FUT_CODE(CODE, nbargs), total_len);\ + *(futargs++) = (unsigned long)(P1); \ + *(futargs++) = (unsigned long)(P2); \ + *(futargs++) = (unsigned long)(P3); \ + *(futargs++) = (unsigned long)(P4); \ + *(futargs++) = (unsigned long)(P5); \ + *(futargs++) = (unsigned long)(P6); \ + *(futargs++) = (unsigned long)(P7); \ + snprintf((char *)futargs, len, "%s", str); \ + ((char *)futargs)[len - 1] = '\0'; \ + _STARPU_FUT_COMMIT(total_len); \ +} while (0) +#endif + +#ifdef FUT_FULL_PROBE7STR +#define _STARPU_FUT_FULL_PROBE7STR(KEYMASK, CODE, P1, P2, P3, P4, P5, P6, P7, str) FUT_FULL_PROBE7STR(CODE, P1, P2, P3, P4, P5, P6, P7, str) +#else +#define _STARPU_FUT_FULL_PROBE7STR(KEYMASK, CODE, P1, P2, P3, P4, P5, P6, P7, str) \ +do { \ + if (STARPU_UNLIKELY(KEYMASK & fut_active)) { \ + _STARPU_FUT_ALWAYS_PROBE7STR(CODE, P1, P2, P3, P4, P5, P6, P7, str); \ + } \ +} while (0) +#endif + +#ifndef FUT_RAW_PROBE7 +#define FUT_RAW_PROBE7(CODE,P1,P2,P3,P4,P5,P6,P7) do { \ + if(STARPU_UNLIKELY(fut_active)) { \ + unsigned long *__args __attribute__((unused))= \ + fut_getstampedbuffer(CODE, \ + FUT_SIZE(7)); \ + *(__args++)=(unsigned long)(P1);*(__args++)=(unsigned long)(P2);*(__args++)=(unsigned long)(P3);*(__args++)=(unsigned long)(P4);*(__args++)=(unsigned long)(P5);*(__args++)=(unsigned long)(P6);*(__args++)=(unsigned long)(P7); \ + _STARPU_FUT_COMMIT(FUT_SIZE(7)); \ + } \ + } while (0) +#endif + +#ifndef FUT_RAW_ALWAYS_PROBE1 +#define FUT_RAW_ALWAYS_PROBE1(CODE,P1) do { \ + unsigned long *__args __attribute__((unused))= \ + fut_getstampedbuffer(CODE, \ + FUT_SIZE(1)); \ + *(__args++)=(unsigned long)(P1); \ + fut_commitstampedbuffer(FUT_SIZE(1)); \ + } while (0) +#endif +#define FUT_DO_ALWAYS_PROBE1(CODE,P1) do { \ + FUT_RAW_ALWAYS_PROBE1(FUT_CODE(CODE, 1),P1); \ +} while (0) + +#ifndef FUT_RAW_ALWAYS_PROBE2 +#define FUT_RAW_ALWAYS_PROBE2(CODE,P1,P2) do { \ + unsigned long *__args __attribute__((unused))= \ + fut_getstampedbuffer(CODE, \ + FUT_SIZE(2)); \ + *(__args++)=(unsigned long)(P1);*(__args++)=(unsigned long)(P2); \ + fut_commitstampedbuffer(FUT_SIZE(2)); \ + } while (0) +#endif +#define FUT_DO_ALWAYS_PROBE2(CODE,P1,P2) do { \ + FUT_RAW_ALWAYS_PROBE2(FUT_CODE(CODE, 2),P1,P2); \ +} while (0) + +#ifndef FUT_RAW_ALWAYS_PROBE3 +#define FUT_RAW_ALWAYS_PROBE3(CODE,P1,P2,P3) do { \ + unsigned long *__args __attribute__((unused))= \ + fut_getstampedbuffer(CODE, \ + FUT_SIZE(3)); \ + *(__args++)=(unsigned long)(P1);*(__args++)=(unsigned long)(P2);*(__args++)=(unsigned long)(P3); \ + fut_commitstampedbuffer(FUT_SIZE(3)); \ + } while (0) +#endif +#define FUT_DO_ALWAYS_PROBE3(CODE,P1,P2,P3) do { \ + FUT_RAW_ALWAYS_PROBE3(FUT_CODE(CODE, 3),P1,P2,P3); \ +} while (0) + +#ifndef FUT_RAW_ALWAYS_PROBE4 +#define FUT_RAW_ALWAYS_PROBE4(CODE,P1,P2,P3,P4) do { \ + unsigned long *__args __attribute__((unused))= \ + fut_getstampedbuffer(CODE, \ + FUT_SIZE(4)); \ + *(__args++)=(unsigned long)(P1);*(__args++)=(unsigned long)(P2);*(__args++)=(unsigned long)(P3);*(__args++)=(unsigned long)(P4); \ + fut_commitstampedbuffer(FUT_SIZE(4)); \ + } while (0) +#endif +#define FUT_DO_ALWAYS_PROBE4(CODE,P1,P2,P3,P4) do { \ + FUT_RAW_ALWAYS_PROBE4(FUT_CODE(CODE, 4),P1,P2,P3,P4); \ +} while (0) + +#ifndef FUT_RAW_ALWAYS_PROBE5 +#define FUT_RAW_ALWAYS_PROBE5(CODE,P1,P2,P3,P4,P5) do { \ + unsigned long *__args __attribute__((unused))= \ + fut_getstampedbuffer(CODE, \ + FUT_SIZE(5)); \ + *(__args++)=(unsigned long)(P1);*(__args++)=(unsigned long)(P2);*(__args++)=(unsigned long)(P3);*(__args++)=(unsigned long)(P4);*(__args++)=(unsigned long)(P5); \ + fut_commitstampedbuffer(FUT_SIZE(5)); \ + } while (0) +#endif +#define FUT_DO_ALWAYS_PROBE5(CODE,P1,P2,P3,P4,P5) do { \ + FUT_RAW_ALWAYS_PROBE5(FUT_CODE(CODE, 5),P1,P2,P3,P4,P5); \ +} while (0) + +#ifndef FUT_RAW_ALWAYS_PROBE6 +#define FUT_RAW_ALWAYS_PROBE6(CODE,P1,P2,P3,P4,P5,P6) do { \ + unsigned long *__args __attribute__((unused))= \ + fut_getstampedbuffer(CODE, \ + FUT_SIZE(6)); \ + *(__args++)=(unsigned long)(P1);*(__args++)=(unsigned long)(P2);*(__args++)=(unsigned long)(P3);*(__args++)=(unsigned long)(P4);*(__args++)=(unsigned long)(P5);*(__args++)=(unsigned long)(P6); \ + fut_commitstampedbuffer(FUT_SIZE(6)); \ + } while (0) +#endif +#define FUT_DO_ALWAYS_PROBE6(CODE,P1,P2,P3,P4,P5,P6) do { \ + FUT_RAW_ALWAYS_PROBE6(FUT_CODE(CODE, 6),P1,P2,P3,P4,P5,P6); \ +} while (0) + +#ifndef FUT_RAW_ALWAYS_PROBE7 +#define FUT_RAW_ALWAYS_PROBE7(CODE,P1,P2,P3,P4,P5,P6,P7) do { \ + unsigned long *__args __attribute__((unused))= \ + fut_getstampedbuffer(CODE, \ + FUT_SIZE(7)); \ + *(__args++)=(unsigned long)(P1);*(__args++)=(unsigned long)(P2);*(__args++)=(unsigned long)(P3);*(__args++)=(unsigned long)(P4);*(__args++)=(unsigned long)(P5);*(__args++)=(unsigned long)(P6);*(__args++)=(unsigned long)(P7); \ + fut_commitstampedbuffer(FUT_SIZE(7)); \ + } while (0) +#endif +#define FUT_DO_ALWAYS_PROBE7(CODE,P1,P2,P3,P4,P5,P6,P7) do { \ + FUT_RAW_ALWAYS_PROBE7(FUT_CODE(CODE, 7),P1,P2,P3,P4,P5,P6,P7); \ +} while (0) + +#ifndef FUT_RAW_ALWAYS_PROBE8 +#define FUT_RAW_ALWAYS_PROBE8(CODE,P1,P2,P3,P4,P5,P6,P7,P8) do { \ + unsigned long *__args __attribute__((unused))= \ + fut_getstampedbuffer(CODE, \ + FUT_SIZE(8)); \ + *(__args++)=(unsigned long)(P1);*(__args++)=(unsigned long)(P2);*(__args++)=(unsigned long)(P3);*(__args++)=(unsigned long)(P4);*(__args++)=(unsigned long)(P5);*(__args++)=(unsigned long)(P6);*(__args++)=(unsigned long)(P7);*(__args++)=(unsigned long)(P8); \ + fut_commitstampedbuffer(FUT_SIZE(8)); \ + } while (0) +#endif +#define FUT_DO_ALWAYS_PROBE8(CODE,P1,P2,P3,P4,P5,P6,P7,P8) do { \ + FUT_RAW_ALWAYS_PROBE8(FUT_CODE(CODE, 8),P1,P2,P3,P4,P5,P6,P7,P8); \ +} while (0) + +#ifndef FUT_RAW_ALWAYS_PROBE9 +#define FUT_RAW_ALWAYS_PROBE9(CODE,P1,P2,P3,P4,P5,P6,P7,P8,P9) do { \ + unsigned long *__args __attribute__((unused))= \ + fut_getstampedbuffer(CODE, \ + FUT_SIZE(9)); \ + *(__args++)=(unsigned long)(P1);*(__args++)=(unsigned long)(P2);*(__args++)=(unsigned long)(P3);*(__args++)=(unsigned long)(P4);*(__args++)=(unsigned long)(P5);*(__args++)=(unsigned long)(P6);*(__args++)=(unsigned long)(P7);*(__args++)=(unsigned long)(P8);*(__args++)=(unsigned long)(P9); \ + fut_commitstampedbuffer(FUT_SIZE(9)); \ + } while (0) +#endif +#define FUT_DO_ALWAYS_PROBE9(CODE,P1,P2,P3,P4,P5,P6,P7,P8,P9) do { \ + FUT_RAW_ALWAYS_PROBE9(FUT_CODE(CODE, 9),P1,P2,P3,P4,P5,P6,P7,P8,P9); \ +} while (0) + +/* full probes */ +#ifndef FUT_FULL_PROBE0 +#define FUT_FULL_PROBE0(KEYMASK,CODE) do { \ + if (STARPU_UNLIKELY(KEYMASK & fut_active)) { \ + FUT_RAW_ALWAYS_PROBE0(FUT_CODE(CODE, 0)); \ + } \ +} while(0) +#endif + +#ifndef FUT_FULL_PROBE1 +#define FUT_FULL_PROBE1(KEYMASK,CODE,P1) do { \ + if (STARPU_UNLIKELY(KEYMASK & fut_active)) { \ + FUT_RAW_ALWAYS_PROBE1(FUT_CODE(CODE, 1),P1); \ + } \ +} while(0) +#endif + +#ifndef FUT_FULL_PROBE2 +#define FUT_FULL_PROBE2(KEYMASK,CODE,P1,P2) do { \ + if (STARPU_UNLIKELY(KEYMASK & fut_active)) { \ + FUT_RAW_ALWAYS_PROBE2(FUT_CODE(CODE, 2),P1,P2); \ + } \ +} while(0) +#endif + +#ifndef FUT_FULL_PROBE3 +#define FUT_FULL_PROBE3(KEYMASK,CODE,P1,P2,P3) do { \ + if (STARPU_UNLIKELY(KEYMASK & fut_active)) { \ + FUT_RAW_ALWAYS_PROBE3(FUT_CODE(CODE, 3),P1,P2,P3); \ + } \ +} while(0) +#endif + +#ifndef FUT_FULL_PROBE4 +#define FUT_FULL_PROBE4(KEYMASK,CODE,P1,P2,P3,P4) do { \ + if (STARPU_UNLIKELY(KEYMASK & fut_active)) { \ + FUT_RAW_ALWAYS_PROBE4(FUT_CODE(CODE, 4),P1,P2,P3,P4); \ + } \ +} while(0) +#endif + +#ifndef FUT_FULL_PROBE5 +#define FUT_FULL_PROBE5(KEYMASK,CODE,P1,P2,P3,P4,P5) do { \ + if (STARPU_UNLIKELY(KEYMASK & fut_active)) { \ + FUT_RAW_ALWAYS_PROBE5(FUT_CODE(CODE, 5),P1,P2,P3,P4,P5); \ + } \ +} while(0) +#endif + +#ifndef FUT_FULL_PROBE6 +#define FUT_FULL_PROBE6(KEYMASK,CODE,P1,P2,P3,P4,P5,P6) do { \ + if (STARPU_UNLIKELY(KEYMASK & fut_active)) { \ + FUT_RAW_ALWAYS_PROBE6(FUT_CODE(CODE, 6),P1,P2,P3,P4,P5,P6); \ + } \ +} while(0) +#endif + +#ifndef FUT_FULL_PROBE7 +#define FUT_FULL_PROBE7(KEYMASK,CODE,P1,P2,P3,P4,P5,P6,P7) do { \ + if (STARPU_UNLIKELY(KEYMASK & fut_active)) { \ + FUT_RAW_ALWAYS_PROBE7(FUT_CODE(CODE, 7),P1,P2,P3,P4,P5,P6,P7); \ + } \ +} while(0) +#endif + +#ifndef FUT_FULL_PROBE8 +#define FUT_FULL_PROBE8(KEYMASK,CODE,P1,P2,P3,P4,P5,P6,P7,P8) do { \ + if(KEYMASK & fut_active) { \ + FUT_RAW_ALWAYS_PROBE8(FUT_CODE(CODE, 8),P1,P2,P3,P4,P5,P6,P7,P8); \ + } \ +} while(0) +#endif + +#ifndef FUT_FULL_PROBE9 +#define FUT_FULL_PROBE9(KEYMASK,CODE,P1,P2,P3,P4,P5,P6,P7,P8,P9) do { \ + if(KEYMASK & fut_active) { \ + FUT_RAW_ALWAYS_PROBE9(FUT_CODE(CODE, 9),P1,P2,P3,P4,P5,P6,P7,P8,P9); \ + } \ +} while(0) +#endif + +#define _STARPU_TRACE_NEW_MEM_NODE(nodeid) do {\ + if (_starpu_fxt_started) \ + FUT_DO_ALWAYS_PROBE2(_STARPU_FUT_NEW_MEM_NODE, nodeid, _starpu_gettid()); \ +} while (0) + +#define _STARPU_TRACE_REGISTER_THREAD(cpuid) do {\ + if (_starpu_fxt_started) \ + FUT_DO_ALWAYS_PROBE2(FUT_NEW_LWP_CODE, cpuid, _starpu_gettid()); \ +} while (0) + +#define _STARPU_TRACE_WORKER_INIT_START(workerkind, workerid, devid, memnode, bindid, sync) do {\ + if (_starpu_fxt_started) \ + FUT_DO_ALWAYS_PROBE7(_STARPU_FUT_WORKER_INIT_START, _STARPU_FUT_WORKER_KEY(workerkind), workerid, devid, memnode, bindid, sync, _starpu_gettid()); \ +} while (0) + +#define _STARPU_TRACE_WORKER_INIT_END(__workerid) do {\ + if (_starpu_fxt_started) \ + FUT_DO_ALWAYS_PROBE2(_STARPU_FUT_WORKER_INIT_END, _starpu_gettid(), (__workerid)); \ +} while (0) + +#define _STARPU_TRACE_START_CODELET_BODY(job, nimpl, perf_arch, workerid, rank) \ +do { \ + if(STARPU_UNLIKELY((_STARPU_FUT_KEYMASK_TASK|_STARPU_FUT_KEYMASK_TASK_VERBOSE|_STARPU_FUT_KEYMASK_DATA|_STARPU_FUT_KEYMASK_TASK_VERBOSE_EXTRA) & fut_active)) { \ + FUT_FULL_PROBE4(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_START_CODELET_BODY, (job)->job_id, ((job)->task)->sched_ctx, workerid, starpu_worker_get_memory_node(workerid)); \ + { \ + if (rank == 0 && (job)->task->cl) \ + { \ + const int __nbuffers = STARPU_TASK_GET_NBUFFERS((job)->task); \ + char __buf[FXT_MAX_PARAMS*sizeof(long)]; \ + int __i; \ + for (__i = 0; __i < __nbuffers; __i++) \ + { \ + starpu_data_handle_t __handle = STARPU_TASK_GET_HANDLE((job)->task, __i); \ + void *__interface = _STARPU_TASK_GET_INTERFACES((job)->task)[__i]; \ + if (__handle->ops->describe) \ + { \ + __handle->ops->describe(__interface, __buf, sizeof(__buf)); \ + _STARPU_FUT_FULL_PROBE1STR(_STARPU_FUT_KEYMASK_DATA, _STARPU_FUT_CODELET_DATA, workerid, __buf); \ + } \ + FUT_FULL_PROBE4(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_CODELET_DATA_HANDLE, (job)->job_id, (__handle), _starpu_data_get_size(__handle), STARPU_TASK_GET_MODE((job)->task, __i)); \ + /* Regarding the memory location: + * - if the data interface doesn't provide to_pointer operation, NULL will be returned + * and the location will be -1, which is fine; + * - we have to check whether the memory is on an actual NUMA node (and not on GPU + * memory, for instance); + * - looking at memory location before executing the task isn't the best choice: + * the page can be not allocated yet. A solution would be to get the memory + * location at the end of the task, but there is no FxT probe where we iterate over + * handles, after task execution. + * */ \ + FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_TASK_VERBOSE_EXTRA, _STARPU_FUT_CODELET_DATA_HANDLE_NUMA_ACCESS, (job)->job_id, (__i), starpu_worker_get_memory_node_kind(starpu_worker_get_type(workerid)) == STARPU_CPU_RAM && starpu_task_get_current_data_node(__i) >= 0 ? starpu_get_memory_location_bitmap(starpu_data_handle_to_pointer(__handle, (unsigned) starpu_task_get_current_data_node(__i)), starpu_data_get_size(__handle)) : -1); \ + } \ + } \ + const size_t __job_size = _starpu_job_get_data_size((job)->task->cl?(job)->task->cl->model:NULL, perf_arch, nimpl, (job)); \ + const uint32_t __job_hash = _starpu_compute_buffers_footprint((job)->task->cl?(job)->task->cl->model:NULL, perf_arch, nimpl, (job));\ + FUT_FULL_PROBE7(_STARPU_FUT_KEYMASK_TASK_VERBOSE, _STARPU_FUT_CODELET_DETAILS, ((job)->task)->sched_ctx, __job_size, __job_hash, (job)->task->flops / 1000 / ((job)->task->cl && job->task->cl->type != STARPU_SEQ ? j->task_size : 1), (job)->task->tag_id, workerid, ((job)->job_id)); \ + } \ + } \ +} while(0) + +#define _STARPU_TRACE_END_CODELET_BODY(job, nimpl, perf_arch, workerid, rank) \ +do { \ + if(STARPU_UNLIKELY((_STARPU_FUT_KEYMASK_TASK) & fut_active)) { \ + const size_t job_size = _starpu_job_get_data_size((job)->task->cl?(job)->task->cl->model:NULL, perf_arch, nimpl, (job)); \ + const uint32_t job_hash = _starpu_compute_buffers_footprint((job)->task->cl?(job)->task->cl->model:NULL, perf_arch, nimpl, (job));\ + char _archname[32]=""; \ + if (perf_arch) starpu_perfmodel_get_arch_name(perf_arch, _archname, 32, 0); \ + _STARPU_FUT_FULL_PROBE5STR(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_END_CODELET_BODY, (job)->job_id, (job_size), (job_hash), workerid, _starpu_gettid(), _archname); \ + } \ +} while(0) + +#define _STARPU_TRACE_START_EXECUTING(job) \ + FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_WORKER_VERBOSE, _STARPU_FUT_START_EXECUTING, _starpu_gettid(), (job)->job_id); + +#define _STARPU_TRACE_END_EXECUTING(job) \ + FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_WORKER_VERBOSE, _STARPU_FUT_END_EXECUTING, _starpu_gettid(), (job)->job_id); + +#define _STARPU_TRACE_START_PARALLEL_SYNC(job) \ + FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_WORKER_VERBOSE, _STARPU_FUT_START_PARALLEL_SYNC, _starpu_gettid(), (job)->job_id); + +#define _STARPU_TRACE_END_PARALLEL_SYNC(job) \ + FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_WORKER_VERBOSE, _STARPU_FUT_END_PARALLEL_SYNC, _starpu_gettid(), (job)->job_id); + +#define _STARPU_TRACE_START_CALLBACK(job) \ + FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_WORKER_VERBOSE, _STARPU_FUT_START_CALLBACK, job, _starpu_gettid()); + +#define _STARPU_TRACE_END_CALLBACK(job) \ + FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_WORKER_VERBOSE, _STARPU_FUT_END_CALLBACK, job, _starpu_gettid()); + +#define _STARPU_TRACE_JOB_PUSH(task, prio) \ + FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_SCHED, _STARPU_FUT_JOB_PUSH, _starpu_get_job_associated_to_task(task)->job_id, prio, _starpu_gettid()); + +#define _STARPU_TRACE_JOB_POP(task, prio) \ + FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_SCHED, _STARPU_FUT_JOB_POP, _starpu_get_job_associated_to_task(task)->job_id, prio, _starpu_gettid()); + +#define _STARPU_TRACE_UPDATE_TASK_CNT(counter) \ + FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_UPDATE_TASK_CNT, counter, _starpu_gettid()) + +#define _STARPU_TRACE_START_FETCH_INPUT(job) \ + FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_DSM, _STARPU_FUT_START_FETCH_INPUT_ON_TID, job, _starpu_gettid()); + +#define _STARPU_TRACE_END_FETCH_INPUT(job) \ + FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_DSM, _STARPU_FUT_END_FETCH_INPUT_ON_TID, job, _starpu_gettid()); + +#define _STARPU_TRACE_START_PUSH_OUTPUT(job) \ + FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_DSM, _STARPU_FUT_START_PUSH_OUTPUT_ON_TID, job, _starpu_gettid()); + +#define _STARPU_TRACE_END_PUSH_OUTPUT(job) \ + FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_DSM, _STARPU_FUT_END_PUSH_OUTPUT_ON_TID, job, _starpu_gettid()); + +#define _STARPU_TRACE_WORKER_END_FETCH_INPUT(job, id) \ + FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_WORKER_VERBOSE, _STARPU_FUT_END_FETCH_INPUT, job, id); + +#define _STARPU_TRACE_WORKER_START_FETCH_INPUT(job, id) \ + FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_WORKER_VERBOSE, _STARPU_FUT_START_FETCH_INPUT, job, id); + +#define _STARPU_TRACE_TAG(tag, job) \ + FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TAG, tag, (job)->job_id) + +#define _STARPU_TRACE_TAG_DEPS(tag_child, tag_father) \ + FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TAG_DEPS, tag_child, tag_father) + +#define _STARPU_TRACE_TASK_DEPS(job_prev, job_succ) \ + _STARPU_FUT_FULL_PROBE4STR(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TASK_DEPS, (job_prev)->job_id, (job_succ)->job_id, (job_succ)->task->type, 1, "task") + +#define _STARPU_TRACE_TASK_END_DEP(job_prev, job_succ) \ + FUT_DO_PROBE2(_STARPU_FUT_TASK_END_DEP, (job_prev)->job_id, (job_succ)->job_id) + +#define _STARPU_TRACE_GHOST_TASK_DEPS(ghost_prev_id, job_succ) \ + _STARPU_FUT_FULL_PROBE4STR(_STARPU_FUT_KEYMASK_TASK_VERBOSE, _STARPU_FUT_TASK_DEPS, (ghost_prev_id), (job_succ)->job_id, (job_succ)->task->type, 1, "ghost") + +#ifdef STARPU_BUBBLE +#define _STARPU_TRACE_BUBBLE_TASK_DEPS(prev_id, job_succ) \ + _STARPU_FUT_FULL_PROBE4STR(_STARPU_FUT_KEYMASK_TASK_VERBOSE, _STARPU_FUT_TASK_DEPS, (prev_id), (job_succ)->job_id, (job_succ)->task->type, 1, "bubble") +#endif + +#define _STARPU_TRACE_TASK_EXCLUDE_FROM_DAG(job) \ + do { \ + unsigned exclude_from_dag = (job)->exclude_from_dag; \ + FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TASK_EXCLUDE_FROM_DAG, (job)->job_id, (long unsigned)exclude_from_dag); \ +} while(0) + +#define _STARPU_TRACE_TASK_NAME_LINE_COLOR(job) \ + do { \ + _STARPU_TRACE_TASK_COLOR(job); \ + _STARPU_TRACE_TASK_NAME(job); \ + _STARPU_TRACE_TASK_LINE(job); \ + } while(0) + +#define _STARPU_TRACE_TASK_LINE(job) \ + do { \ + if ((job)->task->file) \ + _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TASK_LINE, (job)->job_id, (job)->task->line, (job)->task->file); \ +} while(0) + +#ifdef STARPU_BUBBLE +#define _STARPU_TRACE_BUBBLE(job) \ +do { \ + if(STARPU_UNLIKELY((_STARPU_FUT_KEYMASK_TASK) & fut_active)) { \ + unsigned int is_bubble=(job)->is_bubble; \ + unsigned long bubble_parent=(job)->task->bubble_parent; \ + FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TASK_BUBBLE, (job)->job_id, is_bubble, bubble_parent); \ + } \ +} while(0) +#endif + +#define _STARPU_TRACE_TASK_NAME(job) \ +do { \ + if(STARPU_UNLIKELY((_STARPU_FUT_KEYMASK_TASK) & fut_active)) { \ + const char *model_name = _starpu_job_get_model_name((job)); \ + const char *name = _starpu_job_get_task_name((job)); \ + if (name) \ + { \ + _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TASK_NAME, (job)->job_id, _starpu_gettid(), name); \ + } \ + else { \ + _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TASK_NAME, (job)->job_id, _starpu_gettid(), "unknown");\ + } \ + if (model_name) \ + _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_MODEL_NAME, (job)->job_id, _starpu_gettid(), model_name); \ + } \ +} while(0) + +#define _STARPU_TRACE_TASK_COLOR(job) \ +do { \ + if(STARPU_UNLIKELY((_STARPU_FUT_KEYMASK_TASK) & fut_active)) { \ + if ((job)->task->color != 0) \ + FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TASK_COLOR, (job)->job_id, (job)->task->color); \ + else if ((job)->task->cl && (job)->task->cl->color != 0) \ + FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TASK_COLOR, (job)->job_id, (job)->task->cl->color); \ + } \ +} while(0) + +#define _STARPU_TRACE_TASK_DONE(job) \ + FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TASK_DONE, (job)->job_id, _starpu_gettid()) + +#define _STARPU_TRACE_TAG_DONE(tag) \ +do { \ + if(STARPU_UNLIKELY((_STARPU_FUT_KEYMASK_TASK) & fut_active)) { \ + struct _starpu_job *job = (tag)->job; \ + const char *model_name = _starpu_job_get_task_name((job)); \ + if (model_name) \ + { \ + _STARPU_FUT_FULL_PROBE3STR(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TAG_DONE, (tag)->id, _starpu_gettid(), 1, model_name); \ + } \ + else { \ + FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TAG_DONE, (tag)->id, _starpu_gettid(), 0);\ + } \ + } \ +} while(0) + +#define _STARPU_TRACE_DATA_NAME(handle, name) \ + _STARPU_FUT_FULL_PROBE1STR(_STARPU_FUT_KEYMASK_META, _STARPU_FUT_DATA_NAME, handle, name) + +#define _STARPU_TRACE_DATA_COORDINATES(handle, dim, v) do {\ + switch (dim) { \ + case 1: FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_META, _STARPU_FUT_DATA_COORDINATES, handle, dim, v[0]); break; \ + case 2: FUT_FULL_PROBE4(_STARPU_FUT_KEYMASK_META, _STARPU_FUT_DATA_COORDINATES, handle, dim, v[0], v[1]); break; \ + case 3: FUT_FULL_PROBE5(_STARPU_FUT_KEYMASK_META, _STARPU_FUT_DATA_COORDINATES, handle, dim, v[0], v[1], v[2]); break; \ + case 4: FUT_FULL_PROBE6(_STARPU_FUT_KEYMASK_META, _STARPU_FUT_DATA_COORDINATES, handle, dim, v[0], v[1], v[2], v[3]); break; \ + default: FUT_FULL_PROBE7(_STARPU_FUT_KEYMASK_META, _STARPU_FUT_DATA_COORDINATES, handle, dim, v[0], v[1], v[2], v[3], v[4]); break; \ + } \ +} while (0) + +#define _STARPU_TRACE_DATA_COPY(src_node, dst_node, size) \ + FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_DSM, _STARPU_FUT_DATA_COPY, src_node, dst_node, size) + +#define _STARPU_TRACE_DATA_WONT_USE(handle) \ + FUT_FULL_PROBE4(_STARPU_FUT_KEYMASK_DATA, _STARPU_FUT_DATA_WONT_USE, handle, _starpu_fxt_get_submit_order(), _starpu_fxt_get_job_id(), _starpu_gettid()) + +#define _STARPU_TRACE_DATA_DOING_WONT_USE(handle) \ + FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_DSM, _STARPU_FUT_DATA_DOING_WONT_USE, handle) + +#define _STARPU_TRACE_START_DRIVER_COPY(src_node, dst_node, size, com_id, prefetch, handle) \ + FUT_FULL_PROBE6(_STARPU_FUT_KEYMASK_DSM, _STARPU_FUT_START_DRIVER_COPY, src_node, dst_node, size, com_id, prefetch, handle) + +#define _STARPU_TRACE_END_DRIVER_COPY(src_node, dst_node, size, com_id, prefetch) \ + FUT_FULL_PROBE5(_STARPU_FUT_KEYMASK_DSM, _STARPU_FUT_END_DRIVER_COPY, src_node, dst_node, size, com_id, prefetch) + +#define _STARPU_TRACE_START_DRIVER_COPY_ASYNC(src_node, dst_node) \ + FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_DSM, _STARPU_FUT_START_DRIVER_COPY_ASYNC, src_node, dst_node) + +#define _STARPU_TRACE_END_DRIVER_COPY_ASYNC(src_node, dst_node) \ + FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_DSM, _STARPU_FUT_END_DRIVER_COPY_ASYNC, src_node, dst_node) + +#define _STARPU_TRACE_WORK_STEALING(empty_q, victim_q) \ + FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_SCHED_VERBOSE, _STARPU_FUT_WORK_STEALING, empty_q, victim_q) + +#define _STARPU_TRACE_WORKER_DEINIT_START do {\ + if (_starpu_fxt_started) \ + FUT_DO_ALWAYS_PROBE1(_STARPU_FUT_WORKER_DEINIT_START, _starpu_gettid()); \ +} while(0) + +#define _STARPU_TRACE_WORKER_DEINIT_END(workerkind) do {\ + if (_starpu_fxt_started) \ + FUT_DO_ALWAYS_PROBE2(_STARPU_FUT_WORKER_DEINIT_END, _STARPU_FUT_WORKER_KEY(workerkind), _starpu_gettid()); \ +} while(0) + +#define _STARPU_TRACE_WORKER_SCHEDULING_START \ + FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_WORKER_VERBOSE, _STARPU_FUT_WORKER_SCHEDULING_START, _starpu_gettid()); + +#define _STARPU_TRACE_WORKER_SCHEDULING_END \ + FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_WORKER_VERBOSE, _STARPU_FUT_WORKER_SCHEDULING_END, _starpu_gettid()); + +#define _STARPU_TRACE_WORKER_SCHEDULING_PUSH \ + FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_WORKER_VERBOSE, _STARPU_FUT_WORKER_SCHEDULING_PUSH, _starpu_gettid()); + +#define _STARPU_TRACE_WORKER_SCHEDULING_POP \ + FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_WORKER_VERBOSE, _STARPU_FUT_WORKER_SCHEDULING_POP, _starpu_gettid()); + +#define _STARPU_TRACE_WORKER_SLEEP_START \ + FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_WORKER, _STARPU_FUT_WORKER_SLEEP_START, _starpu_gettid()); + +#define _STARPU_TRACE_WORKER_SLEEP_END \ + FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_WORKER, _STARPU_FUT_WORKER_SLEEP_END, _starpu_gettid()); + +#define _STARPU_TRACE_TASK_SUBMIT(job, iter, subiter) \ + FUT_FULL_PROBE7(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TASK_SUBMIT, (job)->job_id, iter, subiter, (job)->task->no_submitorder?0:_starpu_fxt_get_submit_order(), (job)->task->priority, (job)->task->type, _starpu_gettid()); + +#define _STARPU_TRACE_TASK_SUBMIT_START() \ + FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_TASK_VERBOSE, _STARPU_FUT_TASK_SUBMIT_START, _starpu_gettid()); + +#define _STARPU_TRACE_TASK_SUBMIT_END() \ + FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_TASK_VERBOSE, _STARPU_FUT_TASK_SUBMIT_END, _starpu_gettid()); + +#define _STARPU_TRACE_TASK_THROTTLE_START() \ + FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TASK_THROTTLE_START, _starpu_gettid()); + +#define _STARPU_TRACE_TASK_THROTTLE_END() \ + FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TASK_THROTTLE_END, _starpu_gettid()); + +#define _STARPU_TRACE_TASK_BUILD_START() \ + FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_TASK_VERBOSE, _STARPU_FUT_TASK_BUILD_START, _starpu_gettid()); + +#define _STARPU_TRACE_TASK_BUILD_END() \ + FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_TASK_VERBOSE, _STARPU_FUT_TASK_BUILD_END, _starpu_gettid()); + +#define _STARPU_TRACE_TASK_MPI_DECODE_START() \ + FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_MPI_VERBOSE, _STARPU_FUT_TASK_MPI_DECODE_START, _starpu_gettid()); + +#define _STARPU_TRACE_TASK_MPI_DECODE_END() \ + FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_MPI_VERBOSE, _STARPU_FUT_TASK_MPI_DECODE_END, _starpu_gettid()); + +#define _STARPU_TRACE_TASK_MPI_PRE_START() \ + FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_MPI_VERBOSE, _STARPU_FUT_TASK_MPI_PRE_START, _starpu_gettid()); + +#define _STARPU_TRACE_TASK_MPI_PRE_END() \ + FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_MPI_VERBOSE, _STARPU_FUT_TASK_MPI_PRE_END, _starpu_gettid()); + +#define _STARPU_TRACE_TASK_MPI_POST_START() \ + FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_MPI_VERBOSE, _STARPU_FUT_TASK_MPI_POST_START, _starpu_gettid()); + +#define _STARPU_TRACE_TASK_MPI_POST_END() \ + FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_MPI_VERBOSE, _STARPU_FUT_TASK_MPI_POST_END, _starpu_gettid()); + +#define _STARPU_TRACE_TASK_WAIT_START(job) \ + FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_TASK_VERBOSE, _STARPU_FUT_TASK_WAIT_START, (job)->job_id, _starpu_gettid()); + +#define _STARPU_TRACE_TASK_WAIT_END() \ + FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_TASK_VERBOSE, _STARPU_FUT_TASK_WAIT_END, _starpu_gettid()); + +#define _STARPU_TRACE_TASK_WAIT_FOR_ALL_START() \ + FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_TASK_VERBOSE, _STARPU_FUT_TASK_WAIT_FOR_ALL_START, _starpu_gettid()); + +#define _STARPU_TRACE_TASK_WAIT_FOR_ALL_END() \ + FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_TASK_VERBOSE, _STARPU_FUT_TASK_WAIT_FOR_ALL_END, _starpu_gettid()); + +#define _STARPU_TRACE_START_ALLOC(memnode, size, handle, is_prefetch) \ + FUT_FULL_PROBE5(_STARPU_FUT_KEYMASK_DSM, _STARPU_FUT_START_ALLOC, memnode, _starpu_gettid(), size, handle, is_prefetch); + +#define _STARPU_TRACE_END_ALLOC(memnode, handle, r) \ + FUT_FULL_PROBE4(_STARPU_FUT_KEYMASK_DSM, _STARPU_FUT_END_ALLOC, memnode, _starpu_gettid(), handle, r); + +#define _STARPU_TRACE_START_ALLOC_REUSE(memnode, size, handle, is_prefetch) \ + FUT_FULL_PROBE5(_STARPU_FUT_KEYMASK_DSM_VERBOSE, _STARPU_FUT_START_ALLOC_REUSE, memnode, _starpu_gettid(), size, handle, is_prefetch); + +#define _STARPU_TRACE_END_ALLOC_REUSE(memnode, handle, r) \ + FUT_FULL_PROBE4(_STARPU_FUT_KEYMASK_DSM_VERBOSE, _STARPU_FUT_END_ALLOC_REUSE, memnode, _starpu_gettid(), handle, r); + +#define _STARPU_TRACE_START_FREE(memnode, size, handle) \ + FUT_FULL_PROBE4(_STARPU_FUT_KEYMASK_DSM_VERBOSE, _STARPU_FUT_START_FREE, memnode, _starpu_gettid(), size, handle); + +#define _STARPU_TRACE_END_FREE(memnode, handle) \ + FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_DSM_VERBOSE, _STARPU_FUT_END_FREE, memnode, _starpu_gettid(), handle); + +#define _STARPU_TRACE_START_WRITEBACK(memnode, handle) \ + FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_DSM_VERBOSE, _STARPU_FUT_START_WRITEBACK, memnode, _starpu_gettid(), handle); + +#define _STARPU_TRACE_END_WRITEBACK(memnode, handle) \ + FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_DSM_VERBOSE, _STARPU_FUT_END_WRITEBACK, memnode, _starpu_gettid(), handle); + +#define _STARPU_TRACE_USED_MEM(memnode,used) \ + FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_DSM_VERBOSE, _STARPU_FUT_USED_MEM, memnode, used, _starpu_gettid()); + +#define _STARPU_TRACE_START_MEMRECLAIM(memnode,is_prefetch) \ + FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_DSM_VERBOSE, _STARPU_FUT_START_MEMRECLAIM, memnode, is_prefetch, _starpu_gettid()); + +#define _STARPU_TRACE_END_MEMRECLAIM(memnode, is_prefetch) \ + FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_DSM_VERBOSE, _STARPU_FUT_END_MEMRECLAIM, memnode, is_prefetch, _starpu_gettid()); + +#define _STARPU_TRACE_START_WRITEBACK_ASYNC(memnode) \ + FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_DSM_VERBOSE, _STARPU_FUT_START_WRITEBACK_ASYNC, memnode, _starpu_gettid()); + +#define _STARPU_TRACE_END_WRITEBACK_ASYNC(memnode) \ + FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_DSM_VERBOSE, _STARPU_FUT_END_WRITEBACK_ASYNC, memnode, _starpu_gettid()); + +#define _STARPU_TRACE_PAPI_TASK_EVENT(event_id, task, value) \ + FUT_DO_PROBE3(_STARPU_FUT_PAPI_TASK_EVENT_VALUE, event_id, _starpu_get_job_associated_to_task(task)->job_id, value) + +/* We skip these events because they are called so often that they cause FxT to + * fail and make the overall trace unreadable anyway. */ +#define _STARPU_TRACE_START_PROGRESS(memnode) \ + FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_WORKER_VERBOSE, _STARPU_FUT_START_PROGRESS_ON_TID, memnode, _starpu_gettid()); + +#define _STARPU_TRACE_END_PROGRESS(memnode) \ + FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_WORKER_VERBOSE, _STARPU_FUT_END_PROGRESS_ON_TID, memnode, _starpu_gettid()); + +#define _STARPU_TRACE_USER_EVENT(code) \ + FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_USER, _STARPU_FUT_USER_EVENT, code, _starpu_gettid()); + +#define _STARPU_TRACE_META(S) \ + FUT_FULL_PROBESTR(_STARPU_FUT_KEYMASK_META, _STARPU_FUT_EVENT,S) + +#define _STARPU_TRACE_SET_PROFILING(status) \ + FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_META, _STARPU_FUT_SET_PROFILING, status, _starpu_gettid()); + +#define _STARPU_TRACE_TASK_WAIT_FOR_ALL \ + FUT_FULL_PROBE0(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TASK_WAIT_FOR_ALL) + +#define _STARPU_TRACE_EVENT_ALWAYS(S) do {\ + if (_starpu_fxt_started) \ + FUT_DO_ALWAYS_PROBESTR(_STARPU_FUT_EVENT,S) \ +} while(0) + +#define _STARPU_TRACE_EVENT(S) \ + FUT_FULL_PROBESTR(_STARPU_FUT_KEYMASK_EVENT, _STARPU_FUT_EVENT,S) + +#define _STARPU_TRACE_EVENT_VERBOSE(S) \ + FUT_FULL_PROBESTR(_STARPU_FUT_KEYMASK_EVENT_VERBOSE, _STARPU_FUT_EVENT,S) + + +#define _STARPU_TRACE_THREAD_EVENT(S) \ + _STARPU_FUT_FULL_PROBE1STR(_STARPU_FUT_KEYMASK_WORKER, _STARPU_FUT_THREAD_EVENT, _starpu_gettid(), S) + +#define _STARPU_TRACE_HYPERVISOR_BEGIN() \ + FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_HYP, _STARPU_FUT_HYPERVISOR_BEGIN, _starpu_gettid()); + +#define _STARPU_TRACE_HYPERVISOR_END() \ + FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_HYP, _STARPU_FUT_HYPERVISOR_END, _starpu_gettid()); + +#ifdef STARPU_FXT_LOCK_TRACES + +#define _STARPU_TRACE_LOCKING_MUTEX() do { \ + const char *file; \ + file = strrchr(__FILE__,'/') + 1; \ + _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK, _STARPU_FUT_LOCKING_MUTEX,__LINE__,_starpu_gettid(),file); \ +} while (0) + +#define _STARPU_TRACE_MUTEX_LOCKED() do { \ + const char *file; \ + file = strrchr(__FILE__,'/') + 1; \ + _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK_VERBOSE, _STARPU_FUT_MUTEX_LOCKED,__LINE__,_starpu_gettid(),file); \ +} while(0) + +#define _STARPU_TRACE_UNLOCKING_MUTEX() do { \ + const char *file; \ + file = strrchr(__FILE__,'/') + 1; \ + _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK, _STARPU_FUT_UNLOCKING_MUTEX,__LINE__,_starpu_gettid(),file); \ +} while(0) + +#define _STARPU_TRACE_MUTEX_UNLOCKED() do {\ + const char *file; \ + file = strrchr(__FILE__,'/') + 1; \ + _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK_VERBOSE, _STARPU_FUT_MUTEX_UNLOCKED,__LINE__,_starpu_gettid(),file); \ +} while(0) + +#define _STARPU_TRACE_TRYLOCK_MUTEX() do { \ + const char *file; \ + file = strrchr(__FILE__,'/') + 1; \ + _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK, _STARPU_FUT_TRYLOCK_MUTEX,__LINE__,_starpu_gettid(),file); \ +} while(0) + +#define _STARPU_TRACE_RDLOCKING_RWLOCK() do { \ + const char *file; \ + file = strrchr(__FILE__,'/') + 1; \ + _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK, _STARPU_FUT_RDLOCKING_RWLOCK,__LINE__,_starpu_gettid(),file); \ +} while(0) + +#define _STARPU_TRACE_RWLOCK_RDLOCKED() do { \ + const char *file; \ + file = strrchr(__FILE__,'/') + 1; \ + _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK_VERBOSE, _STARPU_FUT_RWLOCK_RDLOCKED,__LINE__,_starpu_gettid(),file); \ +} while(0) + +#define _STARPU_TRACE_WRLOCKING_RWLOCK() do { \ + const char *file; \ + file = strrchr(__FILE__,'/') + 1; \ + _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK, _STARPU_FUT_WRLOCKING_RWLOCK,__LINE__,_starpu_gettid(),file); \ +} while(0) + +#define _STARPU_TRACE_RWLOCK_WRLOCKED() do { \ + const char *file; \ + file = strrchr(__FILE__,'/') + 1; \ + _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK_VERBOSE, _STARPU_FUT_RWLOCK_WRLOCKED,__LINE__,_starpu_gettid(),file); \ +} while(0) + +#define _STARPU_TRACE_UNLOCKING_RWLOCK() do { \ + const char *file; \ + file = strrchr(__FILE__,'/') + 1; \ + _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK, _STARPU_FUT_UNLOCKING_RWLOCK,__LINE__,_starpu_gettid(),file); \ +} while(0) + +#define _STARPU_TRACE_RWLOCK_UNLOCKED() do { \ + const char *file; \ + file = strrchr(__FILE__,'/') + 1; \ + _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK_VERBOSE, _STARPU_FUT_RWLOCK_UNLOCKED,__LINE__,_starpu_gettid(),file); \ +} while(0) + +#define STARPU_TRACE_SPINLOCK_CONDITITION (starpu_worker_get_type(starpu_worker_get_id()) == STARPU_CUDA_WORKER) + +#define _STARPU_TRACE_LOCKING_SPINLOCK(file, line) do {\ + if (STARPU_TRACE_SPINLOCK_CONDITITION) { \ + const char *xfile; \ + xfile = strrchr(file,'/') + 1; \ + _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK_VERBOSE, _STARPU_FUT_LOCKING_SPINLOCK,line,_starpu_gettid(),xfile); \ + } \ +} while(0) + +#define _STARPU_TRACE_SPINLOCK_LOCKED(file, line) do { \ + if (STARPU_TRACE_SPINLOCK_CONDITITION) { \ + const char *xfile; \ + xfile = strrchr(file,'/') + 1; \ + _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK_VERBOSE, _STARPU_FUT_SPINLOCK_LOCKED,line,_starpu_gettid(),xfile); \ + } \ +} while(0) + +#define _STARPU_TRACE_UNLOCKING_SPINLOCK(file, line) do { \ + if (STARPU_TRACE_SPINLOCK_CONDITITION) { \ + const char *xfile; \ + xfile = strrchr(file,'/') + 1; \ + _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK_VERBOSE, _STARPU_FUT_UNLOCKING_SPINLOCK,line,_starpu_gettid(),xfile); \ + } \ +} while(0) + +#define _STARPU_TRACE_SPINLOCK_UNLOCKED(file, line) do { \ + if (STARPU_TRACE_SPINLOCK_CONDITITION) { \ + const char *xfile; \ + xfile = strrchr(file,'/') + 1; \ + _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK_VERBOSE, _STARPU_FUT_SPINLOCK_UNLOCKED,line,_starpu_gettid(),xfile); \ + } \ +} while(0) + +#define _STARPU_TRACE_TRYLOCK_SPINLOCK(file, line) do { \ + if (STARPU_TRACE_SPINLOCK_CONDITITION) { \ + const char *xfile; \ + xfile = strrchr(file,'/') + 1; \ + _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK_VERBOSE, _STARPU_FUT_TRYLOCK_SPINLOCK,line,_starpu_gettid(),xfile); \ + } \ +} while(0) + +#define _STARPU_TRACE_COND_WAIT_BEGIN() do { \ + const char *file; \ + file = strrchr(__FILE__,'/') + 1; \ + _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK, _STARPU_FUT_COND_WAIT_BEGIN,__LINE__,_starpu_gettid(),file); \ +} while(0) + +#define _STARPU_TRACE_COND_WAIT_END() do { \ + const char *file; \ + file = strrchr(__FILE__,'/') + 1; \ + _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK, _STARPU_FUT_COND_WAIT_END,__LINE__,_starpu_gettid(),file); \ +} while(0) + +#define _STARPU_TRACE_BARRIER_WAIT_BEGIN() do { \ + const char *file; \ + file = strrchr(__FILE__,'/') + 1; \ + _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK, _STARPU_FUT_BARRIER_WAIT_BEGIN,__LINE__,_starpu_gettid(),file); \ +} while(0) + +#define _STARPU_TRACE_BARRIER_WAIT_END() do { \ + const char *file; \ + file = strrchr(__FILE__,'/') + 1; \ + _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK, _STARPU_FUT_BARRIER_WAIT_END,__LINE__,_starpu_gettid(),file); \ +} while(0) + +#else // !STARPU_FXT_LOCK_TRACES + +#define _STARPU_TRACE_LOCKING_MUTEX() do {} while(0) +#define _STARPU_TRACE_MUTEX_LOCKED() do {} while(0) +#define _STARPU_TRACE_UNLOCKING_MUTEX() do {} while(0) +#define _STARPU_TRACE_MUTEX_UNLOCKED() do {} while(0) +#define _STARPU_TRACE_TRYLOCK_MUTEX() do {} while(0) +#define _STARPU_TRACE_RDLOCKING_RWLOCK() do {} while(0) +#define _STARPU_TRACE_RWLOCK_RDLOCKED() do {} while(0) +#define _STARPU_TRACE_WRLOCKING_RWLOCK() do {} while(0) +#define _STARPU_TRACE_RWLOCK_WRLOCKED() do {} while(0) +#define _STARPU_TRACE_UNLOCKING_RWLOCK() do {} while(0) +#define _STARPU_TRACE_RWLOCK_UNLOCKED() do {} while(0) +#define _STARPU_TRACE_LOCKING_SPINLOCK(file, line) do {(void) file; (void)line;} while(0) +#define _STARPU_TRACE_SPINLOCK_LOCKED(file, line) do {(void) file; (void)line;} while(0) +#define _STARPU_TRACE_UNLOCKING_SPINLOCK(file, line) do {(void) file; (void)line;} while(0) +#define _STARPU_TRACE_SPINLOCK_UNLOCKED(file, line) do {(void) file; (void)line;} while(0) +#define _STARPU_TRACE_TRYLOCK_SPINLOCK(file, line) do {(void) file; (void)line;} while(0) +#define _STARPU_TRACE_COND_WAIT_BEGIN() do {} while(0) +#define _STARPU_TRACE_COND_WAIT_END() do {} while(0) +#define _STARPU_TRACE_BARRIER_WAIT_BEGIN() do {} while(0) +#define _STARPU_TRACE_BARRIER_WAIT_END() do {} while(0) + +#endif // STARPU_FXT_LOCK_TRACES + +#define _STARPU_TRACE_MEMORY_FULL(size) \ + FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_DSM, _STARPU_FUT_MEMORY_FULL,size,_starpu_gettid()); + +#define _STARPU_TRACE_DATA_LOAD(workerid,size) \ + FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_DSM, _STARPU_FUT_DATA_LOAD, workerid, size); + +#define _STARPU_TRACE_START_UNPARTITION(handle, memnode) \ + FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_DSM, _STARPU_FUT_START_UNPARTITION_ON_TID, memnode, _starpu_gettid(), handle); + +#define _STARPU_TRACE_END_UNPARTITION(handle, memnode) \ + FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_DSM, _STARPU_FUT_END_UNPARTITION_ON_TID, memnode, _starpu_gettid(), handle); + +#define _STARPU_TRACE_SCHED_COMPONENT_PUSH_PRIO(workerid, ntasks, exp_len) \ + FUT_FULL_PROBE4(_STARPU_FUT_KEYMASK_SCHED, _STARPU_FUT_SCHED_COMPONENT_PUSH_PRIO, _starpu_gettid(), workerid, ntasks, exp_len); + +#define _STARPU_TRACE_SCHED_COMPONENT_POP_PRIO(workerid, ntasks, exp_len) \ + FUT_FULL_PROBE4(_STARPU_FUT_KEYMASK_SCHED, _STARPU_FUT_SCHED_COMPONENT_POP_PRIO, _starpu_gettid(), workerid, ntasks, exp_len); + +#define _STARPU_TRACE_SCHED_COMPONENT_NEW(component) \ + if (STARPU_UNLIKELY(fut_active)) _STARPU_FUT_ALWAYS_PROBE1STR(_STARPU_FUT_SCHED_COMPONENT_NEW, component, (component)->name); + +#define _STARPU_TRACE_SCHED_COMPONENT_CONNECT(parent, child) \ + if (STARPU_UNLIKELY(fut_active)) FUT_RAW_ALWAYS_PROBE2(FUT_CODE(_STARPU_FUT_SCHED_COMPONENT_CONNECT,2), parent, child); + +#define _STARPU_TRACE_SCHED_COMPONENT_PUSH(from, to, task, prio) \ + FUT_FULL_PROBE5(_STARPU_FUT_KEYMASK_SCHED, _STARPU_FUT_SCHED_COMPONENT_PUSH, _starpu_gettid(), from, to, task, prio); + +#define _STARPU_TRACE_SCHED_COMPONENT_PULL(from, to, task) \ + FUT_FULL_PROBE5(_STARPU_FUT_KEYMASK_SCHED, _STARPU_FUT_SCHED_COMPONENT_PULL, _starpu_gettid(), from, to, task, (task)->priority); + +#define _STARPU_TRACE_HANDLE_DATA_REGISTER(handle) do { \ + if(STARPU_UNLIKELY((_STARPU_FUT_KEYMASK_META) & fut_active)) { \ + const size_t __data_size = handle->ops->get_size(handle); \ + const starpu_ssize_t __max_data_size = _starpu_data_get_max_size(handle); \ + char __buf[(FXT_MAX_PARAMS-4)*sizeof(long)]; \ + void *__interface = handle->per_node[0].data_interface; \ + if (handle->ops->describe) \ + handle->ops->describe(__interface, __buf, sizeof(__buf)); \ + else \ + __buf[0] = 0; \ + _STARPU_FUT_FULL_PROBE4STR(_STARPU_FUT_KEYMASK_META, _STARPU_FUT_HANDLE_DATA_REGISTER, handle, __data_size, __max_data_size, handle->home_node, __buf); \ + } \ +} while (0) + +#define _STARPU_TRACE_HANDLE_DATA_UNREGISTER(handle) \ + FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_DATA, _STARPU_FUT_HANDLE_DATA_UNREGISTER, handle) + +//Coherency Data Traces +#define _STARPU_TRACE_DATA_STATE_INVALID(handle, node) \ + FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_DSM_VERBOSE, _STARPU_FUT_DATA_STATE_INVALID, handle, node) + +#define _STARPU_TRACE_DATA_STATE_OWNER(handle, node) \ + FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_DSM_VERBOSE, _STARPU_FUT_DATA_STATE_OWNER, handle, node) + +#define _STARPU_TRACE_DATA_STATE_SHARED(handle, node) \ + FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_DSM_VERBOSE, _STARPU_FUT_DATA_STATE_SHARED, handle, node) + +#define _STARPU_TRACE_DATA_REQUEST_CREATED(handle, orig, dest, prio, is_pre, req) \ + FUT_FULL_PROBE6(_STARPU_FUT_KEYMASK_DSM_VERBOSE, _STARPU_FUT_DATA_REQUEST_CREATED, orig, dest, prio, handle, is_pre, req) + + +#else // !STARPU_USE_FXT + +/* Dummy macros in case FxT is disabled */ +#define _STARPU_TRACE_NEW_MEM_NODE(nodeid) do {(void)(nodeid);} while(0) +#define _STARPU_TRACE_REGISTER_THREAD(cpuid) do {(void)(cpuid);} while(0) +#define _STARPU_TRACE_WORKER_INIT_START(a,b,c,d,e,f) do {(void)(a); (void)(b); (void)(c); (void)(d); (void)(e); (void)(f);} while(0) +#define _STARPU_TRACE_WORKER_INIT_END(workerid) do {(void)(workerid);} while(0) +#define _STARPU_TRACE_START_CODELET_BODY(job, nimpl, perf_arch, workerid, rank) do {(void)(job); (void)(nimpl); (void)(perf_arch); (void)(workerid); (void)(rank);} while(0) +#define _STARPU_TRACE_END_CODELET_BODY(job, nimpl, perf_arch, workerid, rank) do {(void)(job); (void)(nimpl); (void)(perf_arch); (void)(workerid); (void)(rank);} while(0) +#define _STARPU_TRACE_START_EXECUTING(job) do {(void)(job);} while(0) +#define _STARPU_TRACE_END_EXECUTING(job) do {(void)(job);} while(0) +#define _STARPU_TRACE_START_PARALLEL_SYNC(job) do {(void)(job);} while(0) +#define _STARPU_TRACE_END_PARALLEL_SYNC(job) do {(void)(job);} while(0) +#define _STARPU_TRACE_START_CALLBACK(job) do {(void)(job);} while(0) +#define _STARPU_TRACE_END_CALLBACK(job) do {(void)(job);} while(0) +#define _STARPU_TRACE_JOB_PUSH(task, prio) do {(void)(task); (void)(prio);} while(0) +#define _STARPU_TRACE_JOB_POP(task, prio) do {(void)(task); (void)(prio);} while(0) +#define _STARPU_TRACE_UPDATE_TASK_CNT(counter) do {(void)(counter);} while(0) +#define _STARPU_TRACE_START_FETCH_INPUT(job) do {(void)(job);} while(0) +#define _STARPU_TRACE_END_FETCH_INPUT(job) do {(void)(job);} while(0) +#define _STARPU_TRACE_START_PUSH_OUTPUT(job) do {(void)(job);} while(0) +#define _STARPU_TRACE_END_PUSH_OUTPUT(job) do {(void)(job);} while(0) +#define _STARPU_TRACE_TAG(tag, job) do {(void)(tag); (void)(job);} while(0) +#define _STARPU_TRACE_TAG_DEPS(a, b) do {(void)(a); (void)(b);} while(0) +#define _STARPU_TRACE_TASK_DEPS(a, b) do {(void)(a); (void)(b);} while(0) +#define _STARPU_TRACE_TASK_END_DEP(a, b) do {(void)(a); (void)(b);} while(0) +#define _STARPU_TRACE_GHOST_TASK_DEPS(a, b) do {(void)(a); (void)(b);} while(0) +#define _STARPU_TRACE_TASK_EXCLUDE_FROM_DAG(a) do {(void)(a);} while(0) +#define _STARPU_TRACE_TASK_NAME_LINE_COLOR(a) do {(void)(a);} while(0) +#define _STARPU_TRACE_TASK_NAME(a) do {(void)(a);} while(0) +#define _STARPU_TRACE_TASK_LINE(a) do {(void)(a);} while(0) +#define _STARPU_TRACE_TASK_COLOR(a) do {(void)(a);} while(0) +#define _STARPU_TRACE_TASK_DONE(a) do {(void)(a);} while(0) +#define _STARPU_TRACE_TAG_DONE(a) do {(void)(a);} while(0) +#define _STARPU_TRACE_DATA_NAME(a, b) do {(void)(a); (void)(b);} while(0) +#define _STARPU_TRACE_DATA_COORDINATES(a, b, c) do {(void)(a); (void)(b); (void)(c);} while(0) +#define _STARPU_TRACE_DATA_COPY(a, b, c) do {(void)(a); (void)(b); (void)(c);} while(0) +#define _STARPU_TRACE_DATA_WONT_USE(a) do {(void)(a);} while(0) +#define _STARPU_TRACE_DATA_DOING_WONT_USE(a) do {(void)(a);} while(0) +#define _STARPU_TRACE_START_DRIVER_COPY(a,b,c,d,e,f) do {(void)(a); (void)(b); (void)(c); (void)(d); (void)(e); (void)(f);} while(0) +#define _STARPU_TRACE_END_DRIVER_COPY(a,b,c,d,e) do {(void)(a); (void)(b); (void)(c); (void)(d); (void)(e);} while(0) +#define _STARPU_TRACE_START_DRIVER_COPY_ASYNC(a,b) do {(void)(a); (void)(b);} while(0) +#define _STARPU_TRACE_END_DRIVER_COPY_ASYNC(a,b) do {(void)(a); (void)(b);} while(0) +#define _STARPU_TRACE_WORK_STEALING(a, b) do {(void)(a); (void)(b);} while(0) +#define _STARPU_TRACE_WORKER_DEINIT_START do {} while(0) +#define _STARPU_TRACE_WORKER_DEINIT_END(a) do {(void)(a);} while(0) +#define _STARPU_TRACE_WORKER_SCHEDULING_START do {} while(0) +#define _STARPU_TRACE_WORKER_SCHEDULING_END do {} while(0) +#define _STARPU_TRACE_WORKER_SCHEDULING_PUSH do {} while(0) +#define _STARPU_TRACE_WORKER_SCHEDULING_POP do {} while(0) +#define _STARPU_TRACE_WORKER_SLEEP_START do {} while(0) +#define _STARPU_TRACE_WORKER_SLEEP_END do {} while(0) +#define _STARPU_TRACE_TASK_SUBMIT(job, a, b) do {(void)(job); (void)(a);(void)(b);} while(0) +#define _STARPU_TRACE_TASK_SUBMIT_START() do {} while(0) +#define _STARPU_TRACE_TASK_SUBMIT_END() do {} while(0) +#define _STARPU_TRACE_TASK_THROTTLE_START() do {} while(0) +#define _STARPU_TRACE_TASK_THROTTLE_END() do {} while(0) +#define _STARPU_TRACE_TASK_BUILD_START() do {} while(0) +#define _STARPU_TRACE_TASK_BUILD_END() do {} while(0) +#define _STARPU_TRACE_TASK_MPI_DECODE_START() do {} while(0) +#define _STARPU_TRACE_TASK_MPI_DECODE_END() do {} while(0) +#define _STARPU_TRACE_TASK_MPI_PRE_START() do {} while(0) +#define _STARPU_TRACE_TASK_MPI_PRE_END() do {} while(0) +#define _STARPU_TRACE_TASK_MPI_POST_START() do {} while(0) +#define _STARPU_TRACE_TASK_MPI_POST_END() do {} while(0) +#define _STARPU_TRACE_TASK_WAIT_START(job) do {(void)(job);} while(0) +#define _STARPU_TRACE_TASK_WAIT_END() do {} while(0) +#define _STARPU_TRACE_TASK_WAIT_FOR_ALL_START() do {} while(0) +#define _STARPU_TRACE_TASK_WAIT_FOR_ALL_END() do {} while(0) +#define _STARPU_TRACE_START_ALLOC(memnode, size, handle, is_prefetch) do {(void)(memnode); (void)(size); (void)(handle);} while(0) +#define _STARPU_TRACE_END_ALLOC(memnode, handle, r) do {(void)(memnode); (void)(handle); (void)(r);} while(0) +#define _STARPU_TRACE_START_ALLOC_REUSE(a, size, handle, is_prefetch) do {(void)(a); (void)(size); (void)(handle);} while(0) +#define _STARPU_TRACE_END_ALLOC_REUSE(a, handle, r) do {(void)(a); (void)(handle); (void)(r);} while(0) +#define _STARPU_TRACE_START_FREE(memnode, size, handle) do {(void)(memnode); (void)(size); (void)(handle);} while(0) +#define _STARPU_TRACE_END_FREE(memnode, handle) do {(void)(memnode); (void)(handle);} while(0) +#define _STARPU_TRACE_START_WRITEBACK(memnode, handle) do {(void)(memnode); (void)(handle);} while(0) +#define _STARPU_TRACE_END_WRITEBACK(memnode, handle) do {(void)(memnode); (void)(handle);} while(0) +#define _STARPU_TRACE_USED_MEM(memnode,used) do {(void)(memnode); (void)(used);} while (0) +#define _STARPU_TRACE_START_MEMRECLAIM(memnode,is_prefetch) do {(void)(memnode); (void)(is_prefetch);} while(0) +#define _STARPU_TRACE_END_MEMRECLAIM(memnode,is_prefetch) do {(void)(memnode); (void)(is_prefetch);} while(0) +#define _STARPU_TRACE_START_WRITEBACK_ASYNC(memnode) do {(void)(memnode);} while(0) +#define _STARPU_TRACE_END_WRITEBACK_ASYNC(memnode) do {(void)(memnode);} while(0) +#define _STARPU_TRACE_START_PROGRESS(memnode) do {(void)(memnode);} while(0) +#define _STARPU_TRACE_END_PROGRESS(memnode) do {(void)(memnode);} while(0) +#define _STARPU_TRACE_USER_EVENT(code) do {(void)(code);} while(0) +#define _STARPU_TRACE_SET_PROFILING(status) do {(void)(status);} while(0) +#define _STARPU_TRACE_TASK_WAIT_FOR_ALL() do {} while(0) +#define _STARPU_TRACE_EVENT_ALWAYS(S) do {(void)(S);} while(0) +#define _STARPU_TRACE_EVENT(S) do {(void)(S);} while(0) +#define _STARPU_TRACE_EVENT_VERBOSE(S) do {(void)(S);} while(0) +#define _STARPU_TRACE_THREAD_EVENT(S) do {(void)(S);} while(0) +#define _STARPU_TRACE_LOCKING_MUTEX() do {} while(0) +#define _STARPU_TRACE_MUTEX_LOCKED() do {} while(0) +#define _STARPU_TRACE_UNLOCKING_MUTEX() do {} while(0) +#define _STARPU_TRACE_MUTEX_UNLOCKED() do {} while(0) +#define _STARPU_TRACE_TRYLOCK_MUTEX() do {} while(0) +#define _STARPU_TRACE_RDLOCKING_RWLOCK() do {} while(0) +#define _STARPU_TRACE_RWLOCK_RDLOCKED() do {} while(0) +#define _STARPU_TRACE_WRLOCKING_RWLOCK() do {} while(0) +#define _STARPU_TRACE_RWLOCK_WRLOCKED() do {} while(0) +#define _STARPU_TRACE_UNLOCKING_RWLOCK() do {} while(0) +#define _STARPU_TRACE_RWLOCK_UNLOCKED() do {} while(0) +#define _STARPU_TRACE_LOCKING_SPINLOCK(file, line) do {(void)(file); (void)(line);} while(0) +#define _STARPU_TRACE_SPINLOCK_LOCKED(file, line) do {(void)(file); (void)(line);} while(0) +#define _STARPU_TRACE_UNLOCKING_SPINLOCK(file, line) do {(void)(file); (void)(line);} while(0) +#define _STARPU_TRACE_SPINLOCK_UNLOCKED(file, line) do {(void)(file); (void)(line);} while(0) +#define _STARPU_TRACE_TRYLOCK_SPINLOCK(file, line) do {(void)(file); (void)(line);} while(0) +#define _STARPU_TRACE_COND_WAIT_BEGIN() do {} while(0) +#define _STARPU_TRACE_COND_WAIT_END() do {} while(0) +#define _STARPU_TRACE_BARRIER_WAIT_BEGIN() do {} while(0) +#define _STARPU_TRACE_BARRIER_WAIT_END() do {} while(0) +#define _STARPU_TRACE_MEMORY_FULL(size) do {(void)(size);} while(0) +#define _STARPU_TRACE_DATA_LOAD(workerid,size) do {(void)(workerid); (void)(size);} while(0) +#define _STARPU_TRACE_START_UNPARTITION(handle, memnode) do {(void)(handle); (void)(memnode);} while(0) +#define _STARPU_TRACE_END_UNPARTITION(handle, memnode) do {(void)(handle); (void)(memnode);} while(0) +#define _STARPU_TRACE_SCHED_COMPONENT_PUSH_PRIO(workerid, ntasks, exp_len) do {(void)(workerid); (void)(ntasks); (void)(exp_len);} while(0) +#define _STARPU_TRACE_SCHED_COMPONENT_POP_PRIO(workerid, ntasks, exp_len) do {(void)(workerid); (void)(ntasks); (void)(exp_len);} while(0) +#define _STARPU_TRACE_HYPERVISOR_BEGIN() do {} while(0) +#define _STARPU_TRACE_HYPERVISOR_END() do {} while(0) +#define _STARPU_TRACE_SCHED_COMPONENT_NEW(component) do {(void)(component);} while (0) +#define _STARPU_TRACE_SCHED_COMPONENT_CONNECT(parent, child) do {(void)(parent); (void)(child);} while (0) +#define _STARPU_TRACE_SCHED_COMPONENT_PUSH(from, to, task, prio) do {(void)(from); (void)(to); (void)(task); (void)(prio);} while (0) +#define _STARPU_TRACE_SCHED_COMPONENT_PULL(from, to, task) do {(void)(from); (void)(to); (void)(task);} while (0) +#define _STARPU_TRACE_HANDLE_DATA_REGISTER(handle) do {(void)(handle);} while (0) +#define _STARPU_TRACE_HANDLE_DATA_UNREGISTER(handle) do {(void)(handle);} while (0) +#define _STARPU_TRACE_WORKER_START_FETCH_INPUT(job, id) do {(void)(job); (void)(id);} while(0) +#define _STARPU_TRACE_WORKER_END_FETCH_INPUT(job, id) do {(void)(job); (void)(id);} while(0) +#define _STARPU_TRACE_DATA_STATE_INVALID(handle, node) do {(void)(handle); (void)(node);} while(0) +#define _STARPU_TRACE_DATA_STATE_OWNER(handle, node) do {(void)(handle); (void)(node);} while(0) +#define _STARPU_TRACE_DATA_STATE_SHARED(handle, node) do {(void)(handle); (void)(node);} while(0) +#define _STARPU_TRACE_DATA_REQUEST_CREATED(handle, orig, dest, prio, is_pre, req) do {(void)(handle); (void)(orig); (void)(dest); (void)(prio); (void)(is_pre); (void)(req); } while(0) +#define _STARPU_TRACE_PAPI_TASK_EVENT(event_id, task, value) do {(void)(event_id); (void)(task); (void)(value);} while(0) + +#ifdef STARPU_BUBBLE +#define _STARPU_TRACE_BUBBLE_TASK_DEPS(a, b) do {(void)(a); (void)(b);} while(0) +#define _STARPU_TRACE_BUBBLE(a) do {(void)(a);} while(0) +#endif + +#endif // STARPU_USE_FXT + +#pragma GCC visibility pop + +#endif // __FXT_H__ diff --git a/src/common/graph.c b/src/common/graph.c new file mode 100644 index 0000000..58b8a5e --- /dev/null +++ b/src/common/graph.c @@ -0,0 +1,493 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * This stores the task graph structure, to used by the schedulers which need + * it. We do not always enable it since it is costly. To avoid interfering + * too much with execution, it may be a bit outdated, i.e. still contain jobs + * which have completed very recently. + * + * This is because we drop nodes lazily: when a job terminates, we just add the + * node to the dropped list (to avoid having to take the mutex on the whole + * graph). The graph gets updated whenever the graph mutex becomes available. + */ + +#include +#include +#include +#include + +/* Protects the whole task graph except the dropped list */ +static starpu_pthread_rwlock_t graph_lock; + +/* Whether we should enable recording the task graph */ +int _starpu_graph_record; + +/* This list contains all nodes without incoming dependency */ +static struct _starpu_graph_node_multilist_top top; +/* This list contains all nodes without outgoing dependency */ +static struct _starpu_graph_node_multilist_bottom bottom; +/* This list contains all nodes */ +static struct _starpu_graph_node_multilist_all all; + +/* Protects the dropped list, always taken before graph lock */ +static starpu_pthread_mutex_t dropped_lock; +/* This list contains all dropped nodes, i.e. the job terminated by the corresponding node is still int he graph */ +static struct _starpu_graph_node_multilist_dropped dropped; + +void _starpu_graph_init(void) +{ + STARPU_PTHREAD_RWLOCK_INIT(&graph_lock, NULL); + _starpu_graph_node_multilist_head_init_top(&top); + _starpu_graph_node_multilist_head_init_bottom(&bottom); + _starpu_graph_node_multilist_head_init_all(&all); + STARPU_PTHREAD_MUTEX_INIT(&dropped_lock, NULL); + _starpu_graph_node_multilist_head_init_dropped(&dropped); +} + +/* LockWR the graph lock */ +void _starpu_graph_wrlock(void) +{ + starpu_worker_relax_on(); + STARPU_PTHREAD_RWLOCK_WRLOCK(&graph_lock); + starpu_worker_relax_off(); +} + +void _starpu_graph_drop_node(struct _starpu_graph_node *node); + +/* This flushes the list of nodes to be dropped. Both the dropped_lock and + * graph_lock mutexes have to be held on entry, and are released. */ +void _starpu_graph_drop_dropped_nodes(void) +{ + struct _starpu_graph_node_multilist_dropped dropping; + + /* Pick up the list of dropped nodes */ + _starpu_graph_node_multilist_move_dropped(&dropped, &dropping); + STARPU_PTHREAD_MUTEX_UNLOCK(&dropped_lock); + + /* And now process it if it's not empty. */ + if (!_starpu_graph_node_multilist_empty_dropped(&dropping)) + { + struct _starpu_graph_node *node, *next; + + for (node = _starpu_graph_node_multilist_begin_dropped(&dropping); + node != _starpu_graph_node_multilist_end_dropped(&dropping); + node = next) + { + next = _starpu_graph_node_multilist_next_dropped(node); + _starpu_graph_drop_node(node); + } + } + STARPU_PTHREAD_RWLOCK_UNLOCK(&graph_lock); +} + +/* UnlockWR the graph lock */ +void _starpu_graph_wrunlock(void) +{ + starpu_worker_relax_on(); + STARPU_PTHREAD_MUTEX_LOCK(&dropped_lock); + starpu_worker_relax_off(); + _starpu_graph_drop_dropped_nodes(); +} + +/* LockRD the graph lock */ +void _starpu_graph_rdlock(void) +{ + starpu_worker_relax_on(); + STARPU_PTHREAD_RWLOCK_RDLOCK(&graph_lock); + starpu_worker_relax_off(); +} + +/* UnlockRD the graph lock */ +void _starpu_graph_rdunlock(void) +{ + STARPU_PTHREAD_RWLOCK_UNLOCK(&graph_lock); + /* Take the opportunity to try to take it WR */ + if (STARPU_PTHREAD_RWLOCK_TRYWRLOCK(&graph_lock) == 0) + /* Good, flush dropped nodes */ + _starpu_graph_wrunlock(); +} + +static void __starpu_graph_foreach(void (*func)(void *data, struct _starpu_graph_node *node), void *data) +{ + struct _starpu_graph_node *node; + + for (node = _starpu_graph_node_multilist_begin_all(&all); + node != _starpu_graph_node_multilist_end_all(&all); + node = _starpu_graph_node_multilist_next_all(node)) + func(data, node); +} + +/* Add a node to the graph */ +void _starpu_graph_add_job(struct _starpu_job *job) +{ + struct _starpu_graph_node *node; + _STARPU_CALLOC(node, 1, sizeof(*node)); + node->job = job; + job->graph_node = node; + STARPU_PTHREAD_MUTEX_INIT0(&node->mutex, NULL); + + _starpu_graph_wrlock(); + + /* It does not have any dependency yet, add to all lists */ + _starpu_graph_node_multilist_push_back_top(&top, node); + _starpu_graph_node_multilist_push_back_bottom(&bottom, node); + _starpu_graph_node_multilist_push_back_all(&all, node); + + _starpu_graph_wrunlock(); +} + +/* Add a node to an array of nodes */ +static unsigned add_node(struct _starpu_graph_node *node, struct _starpu_graph_node ***nodes, unsigned *n_nodes, unsigned *alloc_nodes, unsigned **slot) +{ + unsigned ret; + if (*n_nodes == *alloc_nodes) + { + if (*alloc_nodes) + *alloc_nodes *= 2; + else + *alloc_nodes = 4; + _STARPU_REALLOC(*nodes, *alloc_nodes * sizeof(**nodes)); + if (slot) + { + _STARPU_REALLOC(*slot, *alloc_nodes * sizeof(**slot)); + } + } + ret = (*n_nodes)++; + (*nodes)[ret] = node; + return ret; +} + +/* Add a dependency between nodes */ +void _starpu_graph_add_job_dep(struct _starpu_job *job, struct _starpu_job *prev_job) +{ + unsigned rank_incoming, rank_outgoing; + _starpu_graph_wrlock(); + struct _starpu_graph_node *node = job->graph_node; + struct _starpu_graph_node *prev_node = prev_job->graph_node; + if (!node || !prev_node) + { + /* Already gone */ + _starpu_graph_wrunlock(); + return; + } + + if (_starpu_graph_node_multilist_queued_bottom(prev_node)) + /* Previous node is not at bottom any more */ + _starpu_graph_node_multilist_erase_bottom(&bottom, prev_node); + + if (_starpu_graph_node_multilist_queued_top(node)) + /* Next node is not at top any more */ + _starpu_graph_node_multilist_erase_top(&top, node); + + node->total_incoming++; + rank_incoming = add_node(prev_node, &node->incoming, &node->n_incoming, &node->alloc_incoming, &node->incoming_slot); + rank_outgoing = add_node(node, &prev_node->outgoing, &prev_node->n_outgoing, &prev_node->alloc_outgoing, &prev_node->outgoing_slot); + prev_node->outgoing_slot[rank_outgoing] = rank_incoming; + node->incoming_slot[rank_incoming] = rank_outgoing; + + _starpu_graph_wrunlock(); +} + +/* Drop a node, and thus its dependencies */ +void _starpu_graph_drop_node(struct _starpu_graph_node *node) +{ + unsigned i; + STARPU_ASSERT(!node->job); + + if (_starpu_graph_node_multilist_queued_bottom(node)) + _starpu_graph_node_multilist_erase_bottom(&bottom, node); + if (_starpu_graph_node_multilist_queued_top(node)) + _starpu_graph_node_multilist_erase_top(&top, node); + if (_starpu_graph_node_multilist_queued_all(node)) + _starpu_graph_node_multilist_erase_all(&all, node); + + /* Drop ourself from the incoming part of the outgoing nodes. */ + for (i = 0; i < node->n_outgoing; i++) + { + struct _starpu_graph_node *next = node->outgoing[i]; + if (next) + next->incoming[node->outgoing_slot[i]] = NULL; + } + + /* Drop ourself from the outgoing part of the incoming nodes, + * in case we happen to get dropped before it. */ + for (i = 0; i < node->n_incoming; i++) + { + struct _starpu_graph_node *prev = node->incoming[i]; + if (prev) + prev->outgoing[node->incoming_slot[i]] = NULL; + } + + node->n_outgoing = 0; + free(node->outgoing); + node->outgoing = NULL; + free(node->outgoing_slot); + node->outgoing_slot = NULL; + node->alloc_outgoing = 0; + node->n_incoming = 0; + free(node->incoming); + node->incoming = NULL; + free(node->incoming_slot); + node->incoming_slot = NULL; + node->alloc_incoming = 0; + free(node); +} + +/* Drop a job */ +void _starpu_graph_drop_job(struct _starpu_job *job) +{ + struct _starpu_graph_node *node = job->graph_node; + job->graph_node = NULL; + if (!node) + return; + + starpu_worker_relax_on(); + STARPU_PTHREAD_MUTEX_LOCK(&node->mutex); + starpu_worker_relax_off(); + /* Will not be able to use the job any more */ + node->job = NULL; + STARPU_PTHREAD_MUTEX_UNLOCK(&node->mutex); + + starpu_worker_relax_on(); + STARPU_PTHREAD_MUTEX_LOCK(&dropped_lock); + starpu_worker_relax_off(); + /* Queue for removal when lock becomes available */ + _starpu_graph_node_multilist_push_back_dropped(&dropped, node); + if (STARPU_PTHREAD_RWLOCK_TRYWRLOCK(&graph_lock) == 0) + { + /* Graph wrlock is available, drop nodes immediately */ + _starpu_graph_drop_dropped_nodes(); + } + else + STARPU_PTHREAD_MUTEX_UNLOCK(&dropped_lock); +} + +static void _starpu_graph_set_n(void *data, struct _starpu_graph_node *node) +{ + int value = (intptr_t) data; + node->graph_n = value; +} + +/* Call func for each vertex of the task graph, from bottom to top, in topological order */ +static void _starpu_graph_compute_bottom_up(void (*func)(struct _starpu_graph_node *next_node, struct _starpu_graph_node *prev_node, void *data), void *data) +{ + struct _starpu_graph_node *node, *node2; + struct _starpu_graph_node **current_set = NULL, **next_set = NULL, **swap_set; + unsigned current_n, next_n, i, j; + unsigned current_alloc = 0, next_alloc = 0, swap_alloc; + + /* Classical flow algorithm: start from bottom, and propagate depths to top */ + + /* Set number of processed outgoing edges to 0 for each node */ + __starpu_graph_foreach(_starpu_graph_set_n, (void*) 0); + + /* Start with the bottom of the graph */ + current_n = 0; + for (node = _starpu_graph_node_multilist_begin_bottom(&bottom); + node != _starpu_graph_node_multilist_end_bottom(&bottom); + node = _starpu_graph_node_multilist_next_bottom(node)) + add_node(node, ¤t_set, ¤t_n, ¤t_alloc, NULL); + + /* Now propagate to top as long as we have current nodes */ + while (current_n) + { + /* Next set is initially empty */ + next_n = 0; + + /* For each node in the current set */ + for (i = 0; i < current_n; i++) + { + node = current_set[i]; + /* For each parent of this node */ + for (j = 0; j < node->n_incoming; j++) + { + node2 = node->incoming[j]; + if (!node2) + continue; + node2->graph_n++; + func(node, node2, data); + + if ((unsigned) node2->graph_n == node2->n_outgoing) + /* All outgoing edges were processed, can now add to next set */ + add_node(node2, &next_set, &next_n, &next_alloc, NULL); + } + } + + /* Swap next set with current set */ + swap_set = next_set; + swap_alloc = next_alloc; + next_set = current_set; + next_alloc = current_alloc; + current_set = swap_set; + current_alloc = swap_alloc; + current_n = next_n; + } + free(current_set); + free(next_set); +} + +static void compute_depth(struct _starpu_graph_node *next_node, struct _starpu_graph_node *prev_node, void *data) +{ + (void)data; + if (prev_node->depth < next_node->depth + 1) + prev_node->depth = next_node->depth + 1; +} + +void _starpu_graph_compute_depths(void) +{ + struct _starpu_graph_node *node; + + _starpu_graph_wrlock(); + + /* The bottom of the graph has depth 0 */ + for (node = _starpu_graph_node_multilist_begin_bottom(&bottom); + node != _starpu_graph_node_multilist_end_bottom(&bottom); + node = _starpu_graph_node_multilist_next_bottom(node)) + node->depth = 0; + + _starpu_graph_compute_bottom_up(compute_depth, NULL); + + _starpu_graph_wrunlock(); +} + +void _starpu_graph_compute_descendants(void) +{ + struct _starpu_graph_node *node, *node2, *node3; + struct _starpu_graph_node **current_set = NULL, **next_set = NULL, **swap_set; + unsigned current_n, next_n, i, j; + unsigned current_alloc = 0, next_alloc = 0, swap_alloc; + + _starpu_graph_wrlock(); + + /* Yes, this is O(|V|.(|V|+|E|)) */ + + /* We could get O(|V|.|E|) by doing a topological sort first. + * + * |E| is usually O(|V|), though (bounded number of data dependencies, + * and we use synchronization tasks) */ + + for (node = _starpu_graph_node_multilist_begin_all(&all); + node != _starpu_graph_node_multilist_end_all(&all); + node = _starpu_graph_node_multilist_next_all(node)) + { + unsigned descendants; + + /* Mark all nodes as unseen */ + for (node2 = _starpu_graph_node_multilist_begin_all(&all); + node2 != _starpu_graph_node_multilist_end_all(&all); + node2 = _starpu_graph_node_multilist_next_all(node2)) + node2->graph_n = 0; + + /* Start with the node we want to compute the number of descendants of */ + current_n = 0; + add_node(node, ¤t_set, ¤t_n, ¤t_alloc, NULL); + node->graph_n = 1; + + descendants = 0; + /* While we have descendants, count their descendants */ + while (current_n) + { + /* Next set is initially empty */ + next_n = 0; + + /* For each node in the current set */ + for (i = 0; i < current_n; i++) + { + node2 = current_set[i]; + /* For each child of this node2 */ + for (j = 0; j < node2->n_outgoing; j++) + { + node3 = node2->outgoing[j]; + if (!node3) + continue; + if (node3->graph_n) + /* Already seen */ + continue; + /* Add this node */ + node3->graph_n = 1; + descendants++; + add_node(node3, &next_set, &next_n, &next_alloc, NULL); + } + } + /* Swap next set with current set */ + swap_set = next_set; + swap_alloc = next_alloc; + next_set = current_set; + next_alloc = current_alloc; + current_set = swap_set; + current_alloc = swap_alloc; + current_n = next_n; + } + node->descendants = descendants; + } + + _starpu_graph_wrunlock(); + + free(current_set); + free(next_set); +} + +void _starpu_graph_foreach(void (*func)(void *data, struct _starpu_graph_node *node), void *data) +{ + _starpu_graph_wrlock(); + __starpu_graph_foreach(func, data); + _starpu_graph_wrunlock(); +} + +struct _starpu_graph_node *_starpu_graph_task_node(struct starpu_task *task) +{ + // Can job be NULL? In other words, can a task not be associated with any job? + struct _starpu_job *job = _starpu_get_job_associated_to_task(task); + + return job->graph_node; +} + +struct starpu_task *_starpu_graph_node_task(struct _starpu_graph_node *node) +{ + struct _starpu_job *job = node->job; + struct starpu_task *task = NULL; + + if (job) + task = job->task; + + return task; +} + +void _starpu_graph_node_outgoing(struct _starpu_graph_node *node, unsigned *n_outgoing, struct _starpu_graph_node ***outgoing) +{ + unsigned n, added = 0; + + _starpu_graph_rdlock(); + + if (*n_outgoing < node->n_outgoing) + { + // Reallocate the 'outgoing' array if its size is smaller than the node's number of outgoing nodes + _STARPU_REALLOC(*outgoing, node->n_outgoing * sizeof(**outgoing)); + } + + *n_outgoing = node->n_outgoing; + + for (n = 0; n < *n_outgoing; ++n) + { + struct _starpu_graph_node *successor = node->outgoing[n]; + + if (successor) + *outgoing[added++] = node; + } + + _starpu_graph_rdunlock(); +} diff --git a/src/common/graph.h b/src/common/graph.h new file mode 100644 index 0000000..b8b9fe2 --- /dev/null +++ b/src/common/graph.h @@ -0,0 +1,133 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __GRAPH_H__ +#define __GRAPH_H__ + +#include +#include + +#pragma GCC visibility push(hidden) + +/** @file */ + +MULTILIST_CREATE_TYPE(_starpu_graph_node, all) +MULTILIST_CREATE_TYPE(_starpu_graph_node, top) +MULTILIST_CREATE_TYPE(_starpu_graph_node, bottom) +MULTILIST_CREATE_TYPE(_starpu_graph_node, dropped) + +struct _starpu_graph_node +{ + /** protects access to the job */ + starpu_pthread_mutex_t mutex; + /** pointer to the job, if it is still alive, NULL otherwise */ + struct _starpu_job *job; + + /** + * Fields for graph analysis for scheduling heuristics + */ + /** Member of list of all jobs without incoming dependency */ + struct _starpu_graph_node_multilist_top top; + /** Member of list of all jobs without outgoing dependency */ + struct _starpu_graph_node_multilist_bottom bottom; + /** Member of list of all jobs */ + struct _starpu_graph_node_multilist_all all; + /** Member of list of dropped jobs */ + struct _starpu_graph_node_multilist_dropped dropped; + + /** set of incoming dependencies */ + /** May contain NULLs for terminated jobs */ + struct _starpu_graph_node **incoming; + /** Index within corresponding outgoing array */ + unsigned *incoming_slot; + /** Number of slots used */ + unsigned n_incoming; + /** Size of incoming */ + unsigned alloc_incoming; + /** set of outgoing dependencies */ + struct _starpu_graph_node **outgoing; + + /** Total number of incoming dependencies, including those who completed */ + unsigned total_incoming; + + /** Index within corresponding incoming array */ + unsigned *outgoing_slot; + /** Number of slots used */ + unsigned n_outgoing; + /** Size of outgoing */ + unsigned alloc_outgoing; + + /** Rank from bottom, in number of jobs + * Only available if _starpu_graph_compute_depths was called + */ + unsigned depth; + /** Number of children, grand-children, etc. + * Only available if _starpu_graph_compute_descendants was called + */ + unsigned descendants; + + /** Variable available for graph flow */ + int graph_n; +}; + +MULTILIST_CREATE_INLINES(struct _starpu_graph_node, _starpu_graph_node, all) +MULTILIST_CREATE_INLINES(struct _starpu_graph_node, _starpu_graph_node, top) +MULTILIST_CREATE_INLINES(struct _starpu_graph_node, _starpu_graph_node, bottom) +MULTILIST_CREATE_INLINES(struct _starpu_graph_node, _starpu_graph_node, dropped) + +extern int _starpu_graph_record; +void _starpu_graph_init(void); +void _starpu_graph_wrlock(void); +void _starpu_graph_rdlock(void); +void _starpu_graph_wrunlock(void); +void _starpu_graph_rdunlock(void); + +/** Add a job to the graph, called before any _starpu_graph_add_job_dep call */ +void _starpu_graph_add_job(struct _starpu_job *job); + +/** Add a dependency between jobs */ +void _starpu_graph_add_job_dep(struct _starpu_job *job, struct _starpu_job *prev_job); + +/** Remove a job from the graph */ +void _starpu_graph_drop_job(struct _starpu_job *job); + +/** Really drop the nodes from the graph now */ +void _starpu_graph_drop_dropped_nodes(void); + +/** + * This make StarPU compute for each task the depth, i.e. the length + * of the longest path to a task without outgoing dependencies. + * This does not take job duration into account, just the number +*/ +void _starpu_graph_compute_depths(void); + +/** Compute the descendants of jobs in the graph */ +void _starpu_graph_compute_descendants(void); + +/** + * This calls \e func for each node of the task graph, passing also \e + * data as it + * Apply func on each job of the graph +*/ +void _starpu_graph_foreach(void (*func)(void *data, struct _starpu_graph_node *node), void *data); + +struct _starpu_graph_node *_starpu_graph_task_node(struct starpu_task *task); +struct starpu_task *_starpu_graph_node_task(struct _starpu_graph_node *node); +void _starpu_graph_node_outgoing(struct _starpu_graph_node *node, unsigned *n_outgoing, struct _starpu_graph_node ***outgoing); + +#pragma GCC visibility pop + +#endif /* __GRAPH_H__ */ diff --git a/src/common/hash.c b/src/common/hash.c new file mode 100644 index 0000000..db7a0a1 --- /dev/null +++ b/src/common/hash.c @@ -0,0 +1,81 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include + +#define _STARPU_CRC32C_POLY_BE 0x1EDC6F41 + +static inline uint32_t STARPU_ATTRIBUTE_PURE starpu_crc32c_be_8(uint8_t inputbyte, uint32_t inputcrc) +{ + unsigned i; + uint32_t crc; + + crc = inputcrc ^ (((uint32_t) inputbyte) << 24); + for (i = 0; i < 8; i++) + crc = (crc << 1) ^ ((crc & 0x80000000) ? _STARPU_CRC32C_POLY_BE : 0); + + return crc; +} + +uint32_t starpu_hash_crc32c_be_n(const void *input, size_t n, uint32_t inputcrc) +{ + uint8_t *p = (uint8_t *)input; + size_t i; + + uint32_t crc = inputcrc; + + for (i = 0; i < n; i++) + crc = starpu_crc32c_be_8(p[i], crc); + + return crc; +} + +uint32_t starpu_hash_crc32c_be_ptr(void *input, uint32_t inputcrc) +{ + return starpu_hash_crc32c_be_n(&input, sizeof(input), inputcrc); +} + +uint32_t starpu_hash_crc32c_be(uint32_t input, uint32_t inputcrc) +{ + uint8_t *p = (uint8_t *)&input; + + uint32_t crc = inputcrc; + + crc = starpu_crc32c_be_8(p[0], crc); + crc = starpu_crc32c_be_8(p[1], crc); + crc = starpu_crc32c_be_8(p[2], crc); + crc = starpu_crc32c_be_8(p[3], crc); + + return crc; +} + +uint32_t starpu_hash_crc32c_string(const char *str, uint32_t inputcrc) +{ + uint32_t hash = inputcrc; + + size_t len = strlen(str); + + unsigned i; + for (i = 0; i < len; i++) + { + hash = starpu_crc32c_be_8((uint8_t)str[i], hash); + } + + return hash; +} diff --git a/src/common/inlines.c b/src/common/inlines.c new file mode 100644 index 0000000..f057edf --- /dev/null +++ b/src/common/inlines.c @@ -0,0 +1,22 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* This includes the inline definitions in a .c file so that they can also be + * referenced from outside */ + +#define LIST_INLINE +#define PRIO_LIST_INLINE +#include diff --git a/src/common/knobs.c b/src/common/knobs.c new file mode 100644 index 0000000..bad77cc --- /dev/null +++ b/src/common/knobs.c @@ -0,0 +1,918 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* Performance counters and configurable knobs */ + +#include +#include +#include +#include +#include +#include +#include + +/* Performance Monitoring */ +struct perf_counter_array +{ + int size; + struct starpu_perf_counter *array; + int updater_array_size; + void (**updater_array)(struct starpu_perf_counter_sample *sample, void *context); +}; + +static struct perf_counter_array global_counters = { .size = 0, .array = NULL, .updater_array_size = 0, .updater_array = NULL }; +static struct perf_counter_array per_worker_counters = { .size = 0, .array = NULL, .updater_array_size = 0, .updater_array = NULL }; +static struct perf_counter_array per_codelet_counters = { .size = 0, .array = NULL, .updater_array_size = 0, .updater_array = NULL }; + +static struct starpu_perf_counter_sample global_sample = { .scope = starpu_perf_counter_scope_global, .listener = NULL, .value_array = NULL }; + +/* - */ + +void _starpu_perf_counter_sample_init(struct starpu_perf_counter_sample *sample, enum starpu_perf_counter_scope scope) +{ + STARPU_ASSERT_PERF_COUNTER_SCOPE_DEFINED(scope); + sample->scope = scope; + sample->listener = NULL; + sample->value_array = NULL; + _starpu_spin_init(&sample->lock); +} + +void _starpu_perf_counter_sample_exit(struct starpu_perf_counter_sample *sample) +{ + STARPU_ASSERT(sample->listener == NULL); + sample->listener = NULL; + if (sample->value_array) + { + free(sample->value_array); + } + sample->value_array = NULL; + sample->scope = starpu_perf_counter_scope_undefined; + _starpu_spin_destroy(&sample->lock); +} + +/* - */ + +void _starpu_perf_counter_init(struct _starpu_machine_config *pconfig) +{ + if (pconfig->conf.start_perf_counter_collection) + { + /* start perf counter collection immediately */ + pconfig->perf_counter_pause_depth = 0; + } + else + { + /* defer perf counter collection until call to + * starpu_perf_counter_start_collection () */ + pconfig->perf_counter_pause_depth = 1; + } + STARPU_ASSERT(!_starpu_machine_is_running()); + _starpu_perf_counter_sample_init(&global_sample, starpu_perf_counter_scope_global); + + /* call counter registration routines in each modules */ + _starpu__task_c__register_counters(); +} + +void _starpu_perf_counter_exit(void) +{ + STARPU_ASSERT(!_starpu_machine_is_running()); + + _starpu_perf_counter_unregister_all_scopes(); + _starpu_perf_counter_sample_exit(&global_sample); +} + +/* - */ + +void starpu_perf_counter_collection_start() +{ + STARPU_HG_DISABLE_CHECKING(_starpu_config.perf_counter_pause_depth); + (void)STARPU_ATOMIC_ADD(&_starpu_config.perf_counter_pause_depth, -1); +} + +void starpu_perf_counter_collection_stop() +{ + STARPU_HG_DISABLE_CHECKING(_starpu_config.perf_counter_pause_depth); + (void)STARPU_ATOMIC_ADD(&_starpu_config.perf_counter_pause_depth, +1); +} + +/* - */ + +int starpu_perf_counter_scope_name_to_id(const char * const name) +{ + if (strcmp(name, "global") == 0) + return starpu_perf_counter_scope_global; + if (strcmp(name, "per_worker") == 0) + return starpu_perf_counter_scope_per_worker; + if (strcmp(name, "per_codelet") == 0) + return starpu_perf_counter_scope_per_codelet; + return -1; +} + +const char *starpu_perf_counter_scope_id_to_name(const enum starpu_perf_counter_scope scope) +{ + switch (scope) + { + case starpu_perf_counter_scope_global: + return "global"; + + case starpu_perf_counter_scope_per_worker: + return "per_worker"; + + case starpu_perf_counter_scope_per_codelet: + return "per_codelet"; + + default: + return NULL; + }; +} + +/* - */ + +int starpu_perf_counter_type_name_to_id(const char * const name) +{ + if (strcmp(name, "int32") == 0) + return starpu_perf_counter_type_int32; + if (strcmp(name, "int64") == 0) + return starpu_perf_counter_type_int64; + if (strcmp(name, "float") == 0) + return starpu_perf_counter_type_float; + if (strcmp(name, "double") == 0) + return starpu_perf_counter_type_double; + return -1; +} + +const char *starpu_perf_counter_type_id_to_name(const enum starpu_perf_counter_type type) +{ + switch (type) + { + case starpu_perf_counter_type_int32: + return "int32"; + + case starpu_perf_counter_type_int64: + return "int64"; + + case starpu_perf_counter_type_float: + return "float"; + + case starpu_perf_counter_type_double: + return "double"; + + default: + return NULL; + }; +} + +static struct perf_counter_array *_get_counters(const enum starpu_perf_counter_scope scope) +{ + STARPU_ASSERT_PERF_COUNTER_SCOPE_DEFINED(scope); + switch (scope) + { + case starpu_perf_counter_scope_global: + return &global_counters; + + case starpu_perf_counter_scope_per_worker: + return &per_worker_counters; + + case starpu_perf_counter_scope_per_codelet: + return &per_codelet_counters; + + default: + STARPU_ABORT(); + }; + return NULL; +}; + +/* - */ + +int _starpu_perf_counter_register(enum starpu_perf_counter_scope scope, const char *name, enum starpu_perf_counter_type type, const char *help) +{ + STARPU_ASSERT(!_starpu_machine_is_running()); + + struct perf_counter_array * const counters = _get_counters(scope); + STARPU_ASSERT_PERF_COUNTER_TYPE_DEFINED(type); + + const int index = counters->size++; + _STARPU_REALLOC(counters->array, counters->size * sizeof(*counters->array)); + + struct starpu_perf_counter * const new_counter = &counters->array[index]; + const int id = _starpu_perf_counter_id_build(scope, index); + new_counter->id = id; + new_counter->name = name; + new_counter->help = help; + new_counter->type = type; + + return id; +} + +static void _unregister_counter_scope(enum starpu_perf_counter_scope scope) +{ + STARPU_ASSERT(!_starpu_machine_is_running()); + + struct perf_counter_array * const counters = _get_counters(scope); + free(counters->array); + counters->array = NULL; + free(counters->updater_array); + counters->updater_array = NULL; + counters->size = 0; +} + +void _starpu_perf_counter_unregister_all_scopes(void) +{ + STARPU_ASSERT(!_starpu_machine_is_running()); + + _unregister_counter_scope(starpu_perf_counter_scope_global); + _unregister_counter_scope(starpu_perf_counter_scope_per_worker); + _unregister_counter_scope(starpu_perf_counter_scope_per_codelet); +} + +/* - */ + +int starpu_perf_counter_nb(enum starpu_perf_counter_scope scope) +{ + const struct perf_counter_array * const counters = _get_counters(scope); + return counters->size; +} + +int starpu_perf_counter_nth_to_id(enum starpu_perf_counter_scope scope, int nth) +{ + return _starpu_perf_counter_id_build(scope, nth); +} + +int starpu_perf_counter_name_to_id(enum starpu_perf_counter_scope scope, const char *name) +{ + const struct perf_counter_array * const counters = _get_counters(scope); + int index; + for (index = 0; index < counters->size; index++) + { + if (strcmp(name, counters->array[index].name) == 0) + { + return _starpu_perf_counter_id_build(scope, index); + } + } + return -1; +} + +const char *starpu_perf_counter_id_to_name(int id) +{ + const int scope = _starpu_perf_counter_id_get_scope(id); + const int index = _starpu_perf_counter_id_get_index(id); + const struct perf_counter_array * const counters = _get_counters(scope); + if (index < 0 || index >= counters->size) + return NULL; + return counters->array[index].name; +} + +const char *starpu_perf_counter_get_help_string(int id) +{ + const int scope = _starpu_perf_counter_id_get_scope(id); + const int index = _starpu_perf_counter_id_get_index(id); + const struct perf_counter_array * const counters = _get_counters(scope); + STARPU_ASSERT(index >= 0 && index < counters->size); + return counters->array[index].help; +} + +int starpu_perf_counter_get_type_id(int id) +{ + const int scope = _starpu_perf_counter_id_get_scope(id); + const int index = _starpu_perf_counter_id_get_index(id); + const struct perf_counter_array * const counters = _get_counters(scope); + STARPU_ASSERT(index >= 0 && index < counters->size); + return counters->array[index].type; +} + +/* - */ + +void starpu_perf_counter_list_avail(enum starpu_perf_counter_scope scope) +{ + const struct perf_counter_array * const counters = _get_counters(scope); + int index; + for (index = 0; index < counters->size; index++) + { + const struct starpu_perf_counter * const counter = &counters->array[index]; + printf("0x%08x:%s [%s] - %s\n", _starpu_perf_counter_id_build(scope, index), counter->name, starpu_perf_counter_type_id_to_name(counter->type), counter->help); + } +} + +void starpu_perf_counter_list_all_avail(void) +{ + printf("scope: global\n"); + starpu_perf_counter_list_avail(starpu_perf_counter_scope_global); + + printf("scope: per_worker\n"); + starpu_perf_counter_list_avail(starpu_perf_counter_scope_per_worker); + + printf("scope: per_codelet\n"); + starpu_perf_counter_list_avail(starpu_perf_counter_scope_per_codelet); +} + +/* - */ + +struct starpu_perf_counter_set *starpu_perf_counter_set_alloc(enum starpu_perf_counter_scope scope) +{ + struct perf_counter_array *counters = _get_counters(scope); + struct starpu_perf_counter_set *set; + _STARPU_MALLOC(set, sizeof(*set)); + set->scope = scope; + set->size = counters->size; + _STARPU_CALLOC(set->index_array, set->size, sizeof(*set->index_array)); + return set; +} + +void starpu_perf_counter_set_free(struct starpu_perf_counter_set *set) +{ + memset(set->index_array, 0, set->size*sizeof(*set->index_array)); + free(set->index_array); + memset(set, 0, sizeof(*set)); + free(set); +} + +/* - */ + +void starpu_perf_counter_set_enable_id(struct starpu_perf_counter_set *set, int id) +{ + const int index = _starpu_perf_counter_id_get_index(id); + STARPU_ASSERT(index >= 0 && index < set->size); + set->index_array[index] = 1; +} + +void starpu_perf_counter_set_disable_id(struct starpu_perf_counter_set *set, int id) +{ + const int index = _starpu_perf_counter_id_get_index(id); + STARPU_ASSERT(index >= 0 && index < set->size); + set->index_array[index] = 0; +} + +/* - */ + +struct starpu_perf_counter_listener *starpu_perf_counter_listener_init(struct starpu_perf_counter_set *set, + void (*callback)(struct starpu_perf_counter_listener *listener, struct starpu_perf_counter_sample *sample, void *context), + void *user_arg) +{ + struct starpu_perf_counter_listener *listener; + _STARPU_MALLOC(listener, sizeof(*listener)); + listener->set = set; + listener->callback = callback; + listener->user_arg = user_arg; + return listener; +} + +void starpu_perf_counter_listener_exit(struct starpu_perf_counter_listener *listener) +{ + memset(listener, 0, sizeof(*listener)); + free(listener); +} + +/* - */ + +static void set_listener(struct starpu_perf_counter_sample *sample, struct starpu_perf_counter_listener *listener) +{ + _starpu_spin_lock(&sample->lock); + STARPU_ASSERT(sample->listener == NULL); + + STARPU_ASSERT(listener->set != NULL); + STARPU_ASSERT(listener->set->scope == sample->scope); + + sample->listener = listener; + + /* Assume a single listener, for now, which sets the set of counters to monitor */ + STARPU_ASSERT(sample->value_array == NULL); + _STARPU_CALLOC(sample->value_array, sample->listener->set->size, sizeof(*sample->value_array)); + _starpu_spin_unlock(&sample->lock); +} + + +void starpu_perf_counter_set_global_listener(struct starpu_perf_counter_listener *listener) +{ + set_listener(&global_sample, listener); +} + +void starpu_perf_counter_set_per_worker_listener(unsigned workerid, struct starpu_perf_counter_listener *listener) +{ + struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); + set_listener(&worker->perf_counter_sample, listener); +} + +void starpu_perf_counter_set_all_per_worker_listeners(struct starpu_perf_counter_listener *listener) +{ + unsigned nworkers = _starpu_worker_get_count(); + unsigned workerid; + for (workerid = 0; workerid < nworkers; workerid++) + { + starpu_perf_counter_set_per_worker_listener(workerid, listener); + } +} + +void starpu_perf_counter_set_per_codelet_listener(struct starpu_codelet *cl, struct starpu_perf_counter_listener *listener) +{ + STARPU_ASSERT(cl->perf_counter_values == NULL); + _STARPU_CALLOC(cl->perf_counter_values, 1, sizeof(*cl->perf_counter_values)); + + STARPU_ASSERT(cl->perf_counter_sample == NULL); + _STARPU_MALLOC(cl->perf_counter_sample, sizeof(*cl->perf_counter_sample)); + _starpu_perf_counter_sample_init(cl->perf_counter_sample, starpu_perf_counter_scope_per_codelet); + set_listener(cl->perf_counter_sample, listener); +} + +/* - */ + +static void unset_listener(struct starpu_perf_counter_sample *sample) +{ + _starpu_spin_lock(&sample->lock); + STARPU_ASSERT(sample->listener != NULL); + + memset(sample->value_array, 0, sample->listener->set->size * sizeof(*sample->value_array)); + free(sample->value_array); + sample->value_array = NULL; + sample->listener = NULL; + _starpu_spin_unlock(&sample->lock); +} + +void starpu_perf_counter_unset_global_listener() +{ + unset_listener(&global_sample); +} + +void starpu_perf_counter_unset_per_worker_listener(unsigned workerid) +{ + struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); + unset_listener(&worker->perf_counter_sample); +} + +void starpu_perf_counter_unset_all_per_worker_listeners(void) +{ + unsigned nworkers = _starpu_worker_get_count(); + unsigned workerid; + for (workerid = 0; workerid < nworkers; workerid++) + { + starpu_perf_counter_unset_per_worker_listener(workerid); + } +} + +void starpu_perf_counter_unset_per_codelet_listener(struct starpu_codelet *cl) +{ + STARPU_ASSERT(cl->perf_counter_sample != NULL); + unset_listener(cl->perf_counter_sample); + _starpu_perf_counter_sample_exit(cl->perf_counter_sample); + free(cl->perf_counter_sample); + cl->perf_counter_sample = NULL; + free(cl->perf_counter_values); + cl->perf_counter_values = NULL; +} + +/* - */ + +void _starpu_perf_counter_register_updater(enum starpu_perf_counter_scope scope, void (*updater)(struct starpu_perf_counter_sample *sample, void *context)) +{ + STARPU_ASSERT(!_starpu_machine_is_running()); + + struct perf_counter_array *counters = _get_counters(scope); + int upd_id; + upd_id = counters->updater_array_size++; + _STARPU_REALLOC(counters->updater_array, counters->updater_array_size * sizeof(*counters->updater_array)); + counters->updater_array[upd_id] = updater; +} + +/* - */ + +static void update_sample(struct starpu_perf_counter_sample *sample, void *context) +{ + if (sample->listener == NULL) + return; + + _starpu_spin_lock(&sample->lock); + struct perf_counter_array *counters = _get_counters(sample->scope); + + /* for now, we assume that a sample will only be updated if it has a listener plugged, with a non-empty set */ + if (sample->listener != NULL && sample->listener->set != NULL) + { + if (counters->updater_array_size > 0) + { + int upd_id; + for (upd_id = 0; upd_id < counters->updater_array_size; upd_id++) + { + counters->updater_array[upd_id](sample, context); + } + + if (sample->listener != NULL) + { + sample->listener->callback(sample->listener, sample, context); + } + } + } + _starpu_spin_unlock(&sample->lock); +} + +void _starpu_perf_counter_update_global_sample(void) +{ + update_sample(&global_sample, NULL); +} + +void _starpu_perf_counter_update_per_worker_sample(unsigned workerid) +{ + struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); + update_sample(&worker->perf_counter_sample, worker); +} + +void _starpu_perf_counter_update_per_codelet_sample(struct starpu_codelet *cl) +{ + update_sample(cl->perf_counter_sample, cl); +} + +#define STARPU_PERF_COUNTER_SAMPLE_GET_TYPED_VALUE(STRING, TYPE) \ +TYPE starpu_perf_counter_sample_get_##STRING##_value(struct starpu_perf_counter_sample *sample, const int counter_id) \ +{ \ + STARPU_ASSERT(starpu_perf_counter_get_type_id(counter_id) == starpu_perf_counter_type_##STRING); \ + STARPU_ASSERT(sample->listener != NULL && sample->listener->set != NULL); \ + STARPU_ASSERT(_starpu_perf_counter_id_get_scope(counter_id) == sample->listener->set->scope); \ + \ + const struct starpu_perf_counter_set * const set = sample->listener->set; \ + const int index = _starpu_perf_counter_id_get_index(counter_id); \ + STARPU_ASSERT(index < set->size); \ + STARPU_ASSERT(set->index_array[index] > 0); \ + return sample->value_array[index].STRING##_val; \ +} +STARPU_PERF_COUNTER_SAMPLE_GET_TYPED_VALUE(int32, int32_t); +STARPU_PERF_COUNTER_SAMPLE_GET_TYPED_VALUE(int64, int64_t); +STARPU_PERF_COUNTER_SAMPLE_GET_TYPED_VALUE(float, float); +STARPU_PERF_COUNTER_SAMPLE_GET_TYPED_VALUE(double, double); +#undef STARPU_PERF_COUNTER_SAMPLE_GET_TYPED_VALUE + +/* -------------------------------------------------------------------- */ +/* Performance Steering */ + +struct perf_knob_array +{ + int size; + struct starpu_perf_knob *array; +}; + +static struct perf_knob_array global_knobs = { .size = 0, .array = NULL }; +static struct perf_knob_array per_worker_knobs = { .size = 0, .array = NULL }; +static struct perf_knob_array per_scheduler_knobs = { .size = 0, .array = NULL }; + +void _starpu_perf_knob_init(void) +{ + STARPU_ASSERT(!_starpu_machine_is_running()); + /* call knob registration routines in each modules */ + _starpu__workers_c__register_knobs(); + _starpu__task_c__register_knobs(); + _starpu__dmda_c__register_knobs(); +} + +void _starpu_perf_knob_exit(void) +{ + STARPU_ASSERT(!_starpu_machine_is_running()); + + _starpu_perf_knob_unregister_all_scopes(); + _starpu__workers_c__unregister_knobs(); + _starpu__task_c__unregister_knobs(); + _starpu__dmda_c__unregister_knobs(); +} + +/* - */ + +int starpu_perf_knob_scope_name_to_id(const char * const name) +{ + if (strcmp(name, "global") == 0) + return starpu_perf_knob_scope_global; + if (strcmp(name, "per_worker") == 0) + return starpu_perf_knob_scope_per_worker; + if (strcmp(name, "per_scheduler") == 0) + return starpu_perf_knob_scope_per_scheduler; + return -1; +} + +const char *starpu_perf_knob_scope_id_to_name(const enum starpu_perf_knob_scope scope) +{ + switch (scope) + { + case starpu_perf_knob_scope_global: + return "global"; + + case starpu_perf_knob_scope_per_worker: + return "per_worker"; + + case starpu_perf_knob_scope_per_scheduler: + return "per_scheduler"; + + default: + return NULL; + }; +} + +/* - */ + +int starpu_perf_knob_type_name_to_id(const char * const name) +{ + if (strcmp(name, "int32") == 0) + return starpu_perf_knob_type_int32; + if (strcmp(name, "int64") == 0) + return starpu_perf_knob_type_int64; + if (strcmp(name, "float") == 0) + return starpu_perf_knob_type_float; + if (strcmp(name, "double") == 0) + return starpu_perf_knob_type_double; + return -1; +} + +const char *starpu_perf_knob_type_id_to_name(const enum starpu_perf_knob_type type) +{ + switch (type) + { + case starpu_perf_knob_type_int32: + return "int32"; + + case starpu_perf_knob_type_int64: + return "int64"; + + case starpu_perf_knob_type_float: + return "float"; + + case starpu_perf_knob_type_double: + return "double"; + + default: + return NULL; + }; +} + +static struct perf_knob_array *_get_knobs(const enum starpu_perf_knob_scope scope) +{ + STARPU_ASSERT_PERF_KNOB_SCOPE_DEFINED(scope); + switch (scope) + { + case starpu_perf_knob_scope_global: + return &global_knobs; + + case starpu_perf_knob_scope_per_worker: + return &per_worker_knobs; + + case starpu_perf_knob_scope_per_scheduler: + return &per_scheduler_knobs; + + default: + STARPU_ABORT(); + }; + return NULL; +}; + +/* - */ + +struct starpu_perf_knob_group *_starpu_perf_knob_group_register(enum starpu_perf_knob_scope scope, + void (*set_func)(const struct starpu_perf_knob * const knob, void *context, const struct starpu_perf_knob_value * const value), + void (*get_func)(const struct starpu_perf_knob * const knob, void *context, struct starpu_perf_knob_value * const value)) +{ + STARPU_ASSERT_PERF_KNOB_SCOPE_DEFINED(scope); + STARPU_ASSERT(set_func != NULL); + STARPU_ASSERT(get_func != NULL); + struct starpu_perf_knob_group *new_group; + _STARPU_MALLOC(new_group, sizeof(*new_group)); + new_group->scope = scope; + new_group->set = set_func; + new_group->get = get_func; + new_group->array_size = 0; + new_group->array = NULL; + return new_group; +} + +void _starpu_perf_knob_group_unregister(struct starpu_perf_knob_group *group) +{ + STARPU_ASSERT((group->array_size > 0 && group->array != NULL) || (group->array_size = 0 && group->array == NULL)); + if (group->array != NULL) + { + free(group->array); + } + memset(group, 0, sizeof(*group)); + free(group); +} + +/* - */ + +int _starpu_perf_knob_register(struct starpu_perf_knob_group *group, const char *name, enum starpu_perf_knob_type type, const char *help) +{ + STARPU_ASSERT(!_starpu_machine_is_running()); + + struct perf_knob_array * const knobs = _get_knobs(group->scope); + STARPU_ASSERT_PERF_KNOB_TYPE_DEFINED(type); + + const int index = knobs->size++; + _STARPU_REALLOC(knobs->array, knobs->size * sizeof(*knobs->array)); + + struct starpu_perf_knob * const new_knob = &knobs->array[index]; + const int id = _starpu_perf_knob_id_build(group->scope, index); + new_knob->id = id; + new_knob->name = name; + new_knob->help = help; + new_knob->type = type; + new_knob->group = group; + new_knob->id_in_group = group->array_size++; + _STARPU_REALLOC(group->array, group->array_size * sizeof(*group->array)); + group->array[new_knob->id_in_group] = new_knob; + return id; +} + +static void _unregister_knob_scope(enum starpu_perf_knob_scope scope) +{ + STARPU_ASSERT(!_starpu_machine_is_running()); + + struct perf_knob_array * const knobs = _get_knobs(scope); + free(knobs->array); + knobs->array = NULL; + knobs->size = 0; +} + +void _starpu_perf_knob_unregister_all_scopes(void) +{ + STARPU_ASSERT(!_starpu_machine_is_running()); + + _unregister_knob_scope(starpu_perf_knob_scope_global); + _unregister_knob_scope(starpu_perf_knob_scope_per_worker); + _unregister_knob_scope(starpu_perf_knob_scope_per_scheduler); +} + +/* - */ + +int starpu_perf_knob_nb(enum starpu_perf_knob_scope scope) +{ + const struct perf_knob_array * const knobs = _get_knobs(scope); + return knobs->size; +} + +int starpu_perf_knob_nth_to_id(enum starpu_perf_knob_scope scope, int nth) +{ + return _starpu_perf_knob_id_build(scope, nth); +} + +int starpu_perf_knob_name_to_id(enum starpu_perf_knob_scope scope, const char *name) +{ + const struct perf_knob_array * const knobs = _get_knobs(scope); + int index; + for (index = 0; index < knobs->size; index++) + { + if (strcmp(name, knobs->array[index].name) == 0) + { + return _starpu_perf_knob_id_build(scope, index); + } + } + return -1; +} + +const char *starpu_perf_knob_id_to_name(int id) +{ + const int scope = _starpu_perf_knob_id_get_scope(id); + const int index = _starpu_perf_knob_id_get_index(id); + const struct perf_knob_array * const knobs = _get_knobs(scope); + if (index < 0 || index >= knobs->size) + return NULL; + return knobs->array[index].name; +} + +const char *starpu_perf_knob_get_help_string(int id) +{ + const int scope = _starpu_perf_knob_id_get_scope(id); + const int index = _starpu_perf_knob_id_get_index(id); + const struct perf_knob_array * const knobs = _get_knobs(scope); + STARPU_ASSERT(index >= 0 && index < knobs->size); + return knobs->array[index].help; +} + +int starpu_perf_knob_get_type_id(int id) +{ + const int scope = _starpu_perf_knob_id_get_scope(id); + const int index = _starpu_perf_knob_id_get_index(id); + const struct perf_knob_array * const knobs = _get_knobs(scope); + STARPU_ASSERT(index >= 0 && index < knobs->size); + return knobs->array[index].type; +} + +static struct starpu_perf_knob *get_knob(int id) +{ + const int scope = _starpu_perf_knob_id_get_scope(id); + struct perf_knob_array *knobs = _get_knobs(scope); + const int index = _starpu_perf_knob_id_get_index(id); + STARPU_ASSERT(index >= 0 && index < knobs->size); + return &knobs->array[index]; +} + +/* - */ + +void starpu_perf_knob_list_avail(enum starpu_perf_knob_scope scope) +{ + const struct perf_knob_array * const knobs = _get_knobs(scope); + int index; + for (index = 0; index < knobs->size; index++) + { + const struct starpu_perf_knob * const knob = &knobs->array[index]; + printf("0x%08x:%s [%s] - %s\n", _starpu_perf_knob_id_build(scope, index), knob->name, starpu_perf_knob_type_id_to_name(knob->type), knob->help); + } +} + +void starpu_perf_knob_list_all_avail(void) +{ + printf("scope: global\n"); + starpu_perf_knob_list_avail(starpu_perf_knob_scope_global); + + printf("scope: per_worker\n"); + starpu_perf_knob_list_avail(starpu_perf_knob_scope_per_worker); + + printf("scope: per_scheduler\n"); + starpu_perf_knob_list_avail(starpu_perf_knob_scope_per_scheduler); +} + +#define __STARPU_PERF_KNOB_SET_TYPED_VALUE(SCOPE_NAME, STRING, TYPE) \ +void starpu_perf_knob_set_##SCOPE_NAME##_##STRING##_value(const int knob_id, const TYPE value) \ +{ \ + STARPU_ASSERT(_starpu_perf_knob_id_get_scope(knob_id) == starpu_perf_knob_scope_global); \ + const struct starpu_perf_knob * const knob = get_knob(knob_id); \ + STARPU_ASSERT(starpu_perf_knob_get_type_id(knob_id) == starpu_perf_knob_type_##STRING); \ + const struct starpu_perf_knob_group * const knob_group = knob->group; \ + const struct starpu_perf_knob_value kv = { .val_##TYPE = value }; \ + knob_group->set(knob, NULL, &kv); \ +} + +__STARPU_PERF_KNOB_SET_TYPED_VALUE(global, int32, int32_t); +__STARPU_PERF_KNOB_SET_TYPED_VALUE(global, int64, int64_t); +__STARPU_PERF_KNOB_SET_TYPED_VALUE(global, float, float); +__STARPU_PERF_KNOB_SET_TYPED_VALUE(global, double, double); + +#undef __STARPU_PERF_KNOB_SAMPLE_SET_TYPED_VALUE + +#define __STARPU_PERF_KNOB_GET_TYPED_VALUE(SCOPE_NAME, STRING, TYPE) \ +TYPE starpu_perf_knob_get_##SCOPE_NAME##_##STRING##_value(const int knob_id) \ +{ \ + STARPU_ASSERT(_starpu_perf_knob_id_get_scope(knob_id) == starpu_perf_knob_scope_global); \ + const struct starpu_perf_knob * const knob = get_knob(knob_id); \ + STARPU_ASSERT(starpu_perf_knob_get_type_id(knob_id) == starpu_perf_knob_type_##STRING); \ + const struct starpu_perf_knob_group * const knob_group = knob->group; \ + struct starpu_perf_knob_value kv; \ + knob_group->get(knob, NULL, &kv); \ + return kv.val_##TYPE; \ +} + +__STARPU_PERF_KNOB_GET_TYPED_VALUE(global, int32, int32_t); +__STARPU_PERF_KNOB_GET_TYPED_VALUE(global, int64, int64_t); +__STARPU_PERF_KNOB_GET_TYPED_VALUE(global, float, float); +__STARPU_PERF_KNOB_GET_TYPED_VALUE(global, double, double); + +#undef __STARPU_PERF_KNOB_SAMPLE_GET_TYPED_VALUE + + +#define __STARPU_PERF_KNOB_SET_TYPED_VALUE_WITH_CONTEXT(SCOPE_NAME, STRING, TYPE, CONTEXT_TYPE, CONTEXT_VAR) \ +void starpu_perf_knob_set_##SCOPE_NAME##_##STRING##_value(const int knob_id, CONTEXT_TYPE CONTEXT_VAR, const TYPE value) \ +{ \ + STARPU_ASSERT(_starpu_perf_knob_id_get_scope(knob_id) == starpu_perf_knob_scope_##SCOPE_NAME); \ + const struct starpu_perf_knob * const knob = get_knob(knob_id); \ + STARPU_ASSERT(starpu_perf_knob_get_type_id(knob_id) == starpu_perf_knob_type_##STRING); \ + const struct starpu_perf_knob_group * const knob_group = knob->group; \ + const struct starpu_perf_knob_value kv = { .val_##TYPE = value }; \ + knob_group->set(knob, &CONTEXT_VAR, &kv); \ +} + +__STARPU_PERF_KNOB_SET_TYPED_VALUE_WITH_CONTEXT(per_worker, int32, int32_t, unsigned, workerid); +__STARPU_PERF_KNOB_SET_TYPED_VALUE_WITH_CONTEXT(per_worker, int64, int64_t, unsigned, workerid); +__STARPU_PERF_KNOB_SET_TYPED_VALUE_WITH_CONTEXT(per_worker, float, float, unsigned, workerid); +__STARPU_PERF_KNOB_SET_TYPED_VALUE_WITH_CONTEXT(per_worker, double, double, unsigned, workerid); + +__STARPU_PERF_KNOB_SET_TYPED_VALUE_WITH_CONTEXT(per_scheduler, int32, int32_t, const char *, sched_policy_name); +__STARPU_PERF_KNOB_SET_TYPED_VALUE_WITH_CONTEXT(per_scheduler, int64, int64_t, const char *, sched_policy_name); +__STARPU_PERF_KNOB_SET_TYPED_VALUE_WITH_CONTEXT(per_scheduler, float, float, const char *, sched_policy_name); +__STARPU_PERF_KNOB_SET_TYPED_VALUE_WITH_CONTEXT(per_scheduler, double, double, const char *, sched_policy_name); + +#undef __STARPU_PERF_KNOB_SAMPLE_SET_TYPED_VALUE_WITH_CONTEXT + +#define __STARPU_PERF_KNOB_GET_TYPED_VALUE_WITH_CONTEXT(SCOPE_NAME, STRING, TYPE, CONTEXT_TYPE, CONTEXT_VAR) \ +TYPE starpu_perf_knob_get_##SCOPE_NAME##_##STRING##_value(const int knob_id, CONTEXT_TYPE CONTEXT_VAR) \ +{ \ + STARPU_ASSERT(_starpu_perf_knob_id_get_scope(knob_id) == starpu_perf_knob_scope_##SCOPE_NAME); \ + const struct starpu_perf_knob * const knob = get_knob(knob_id); \ + STARPU_ASSERT(starpu_perf_knob_get_type_id(knob_id) == starpu_perf_knob_type_##STRING); \ + const struct starpu_perf_knob_group * const knob_group = knob->group; \ + struct starpu_perf_knob_value kv; \ + knob_group->get(knob, &CONTEXT_VAR, &kv); \ + return kv.val_##TYPE; \ +} + +__STARPU_PERF_KNOB_GET_TYPED_VALUE_WITH_CONTEXT(per_worker, int32, int32_t, unsigned, workerid); +__STARPU_PERF_KNOB_GET_TYPED_VALUE_WITH_CONTEXT(per_worker, int64, int64_t, unsigned, workerid); +__STARPU_PERF_KNOB_GET_TYPED_VALUE_WITH_CONTEXT(per_worker, float, float, unsigned, workerid); +__STARPU_PERF_KNOB_GET_TYPED_VALUE_WITH_CONTEXT(per_worker, double, double, unsigned, workerid); + +__STARPU_PERF_KNOB_GET_TYPED_VALUE_WITH_CONTEXT(per_scheduler, int32, int32_t, const char *, sched_policy_name); +__STARPU_PERF_KNOB_GET_TYPED_VALUE_WITH_CONTEXT(per_scheduler, int64, int64_t, const char *, sched_policy_name); +__STARPU_PERF_KNOB_GET_TYPED_VALUE_WITH_CONTEXT(per_scheduler, float, float, const char *, sched_policy_name); +__STARPU_PERF_KNOB_GET_TYPED_VALUE_WITH_CONTEXT(per_scheduler, double, double, const char *, sched_policy_name); + +#undef __STARPU_PERF_KNOB_SAMPLE_GET_TYPED_VALUE_WITH_CONTEXT + diff --git a/src/common/knobs.h b/src/common/knobs.h new file mode 100644 index 0000000..23ff497 --- /dev/null +++ b/src/common/knobs.h @@ -0,0 +1,390 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* Performance counters and configurable knobs */ + +#ifndef __KNOBS_H__ +#define __KNOBS_H__ + +/** @file */ + +#include +#include +#include + +#pragma GCC visibility push(hidden) + +/** Performance Monitoring */ +#define STARPU_ASSERT_PERF_COUNTER_SCOPE_DEFINED(t) STARPU_ASSERT( \ + (t == starpu_perf_counter_scope_global) \ + || (t == starpu_perf_counter_scope_per_worker) \ + || (t == starpu_perf_counter_scope_per_codelet) \ + ) + +#define STARPU_ASSERT_PERF_COUNTER_TYPE_DEFINED(t) STARPU_ASSERT( \ + (t == starpu_perf_counter_type_int32) \ + || (t == starpu_perf_counter_type_int64) \ + || (t == starpu_perf_counter_type_float) \ + || (t == starpu_perf_counter_type_double) \ + ) + +#define _STARPU_PERF_COUNTER_ID_SCOPE_BITS 4 + +#if defined(STARPU_VAL_COMPARE_AND_SWAP64) && defined (STARPU_ATOMIC_ADD64) +#define STARPU_PERF_COUNTER_64 +#endif + +struct starpu_perf_counter_sample; +struct _starpu_worker; + +#define __STARPU_PERF_COUNTER_UPDATE_32BIT(OPNAME,OP,TYPENAME,TYPE) \ +static inline void _starpu_perf_counter_update_##OPNAME##_##TYPENAME(TYPE *ptr, TYPE value) \ +{ \ + STARPU_ASSERT(sizeof(TYPE) == sizeof(uint32_t)); \ + typedef uint32_t __attribute__((__may_alias__)) alias_uint32_t; \ + typedef TYPE __attribute__((__may_alias__)) alias_##TYPE; \ + \ + uint32_t raw_old = *(uint32_t *)ptr; \ + \ + while(value OP *(alias_##TYPE*)&raw_old) \ + { \ + uint32_t raw_old_check = STARPU_VAL_COMPARE_AND_SWAP32((uint32_t *)ptr, raw_old, *(alias_uint32_t*)&value); \ + if (raw_old_check == raw_old) \ + break; \ + raw_old = raw_old_check; \ + } \ +} + +#ifdef STARPU_PERF_COUNTER_64 +typedef int64_t starpu_perf_counter_int64_t; +typedef double starpu_perf_counter_double; +#define __STARPU_PERF_COUNTER_UPDATE_64BIT(OPNAME,OP,TYPENAME,TYPE) \ +static inline void _starpu_perf_counter_update_##OPNAME##_##TYPENAME(TYPE *ptr, TYPE value) \ +{ \ + STARPU_ASSERT(sizeof(TYPE) == sizeof(uint64_t)); \ + typedef uint64_t __attribute__((__may_alias__)) alias_uint64_t; \ + typedef TYPE __attribute__((__may_alias__)) alias_##TYPE; \ + \ + uint64_t raw_old = *(uint64_t *)ptr; \ + \ + while(value OP *(alias_##TYPE*)&raw_old) \ + { \ + uint64_t raw_old_check = STARPU_VAL_COMPARE_AND_SWAP64((uint64_t *)ptr, raw_old, *(alias_uint64_t*)&value); \ + if (raw_old_check == raw_old) \ + break; \ + raw_old = raw_old_check; \ + } \ +} +#else +/* No native 64bit atomic operation, revert to lower precision */ +typedef int32_t starpu_perf_counter_int64_t; +typedef float starpu_perf_counter_double; +#define __STARPU_PERF_COUNTER_UPDATE_64BIT(OPNAME,OP,TYPENAME,TYPE) \ + __STARPU_PERF_COUNTER_UPDATE_32BIT(OPNAME,OP,TYPENAME,TYPE) +#endif + +/* Atomic max */ +__STARPU_PERF_COUNTER_UPDATE_32BIT(max,>=,int32,int32_t); +__STARPU_PERF_COUNTER_UPDATE_32BIT(max,>=,float,float); +__STARPU_PERF_COUNTER_UPDATE_64BIT(max,>=,int64,starpu_perf_counter_int64_t); +__STARPU_PERF_COUNTER_UPDATE_64BIT(max,>=,double,starpu_perf_counter_double); + +/* Atomic min */ +__STARPU_PERF_COUNTER_UPDATE_32BIT(min,<=,int32,int32_t); +__STARPU_PERF_COUNTER_UPDATE_32BIT(min,<=,float,float); +__STARPU_PERF_COUNTER_UPDATE_64BIT(min,<=,int64,starpu_perf_counter_int64_t); +__STARPU_PERF_COUNTER_UPDATE_64BIT(min,<=,double,starpu_perf_counter_double); + +#undef __STARPU_PERF_COUNTER_UPDATE_32BIT +#undef __STARPU_PERF_COUNTER_UPDATE_64BIT + +/** Floating point atomic accumulate */ +#define __STARPU_PERF_COUNTER_UPDATE_ACC_FLOAT(TYPENAME, TYPE) \ +static inline void _starpu_perf_counter_update_acc_##TYPENAME(TYPE *ptr, TYPE acc_value) \ +{ \ + STARPU_ASSERT(sizeof(TYPE) == sizeof(uint32_t)); \ + typedef uint32_t __attribute__((__may_alias__)) alias_uint32_t; \ + typedef TYPE __attribute__((__may_alias__)) alias_float; \ + uint32_t raw_old = *(uint32_t *)ptr; \ + while (1) \ + { \ + TYPE value = acc_value + *(alias_float*)&raw_old; \ + uint32_t raw_old_check = STARPU_VAL_COMPARE_AND_SWAP32((uint32_t *)ptr, raw_old, *(alias_uint32_t*)&value); \ + if (raw_old_check == raw_old) \ + break; \ + raw_old = raw_old_check; \ + } \ +} + +__STARPU_PERF_COUNTER_UPDATE_ACC_FLOAT(float, float); +#ifdef STARPU_PERF_COUNTER_64 +static inline void _starpu_perf_counter_update_acc_double(double *ptr, double acc_value) +{ + STARPU_ASSERT(sizeof(double) == sizeof(uint64_t)); + typedef uint64_t __attribute__((__may_alias__)) alias_uint64_t; + typedef double __attribute__((__may_alias__)) alias_double; + uint64_t raw_old = *(uint64_t *)ptr; + while (1) + { + double value = acc_value + *(alias_double*)&raw_old; + uint64_t raw_old_check = STARPU_VAL_COMPARE_AND_SWAP64((uint64_t *)ptr, raw_old, *(alias_uint64_t*)&value); + if (raw_old_check == raw_old) + break; + raw_old = raw_old_check; + } +} +#else +__STARPU_PERF_COUNTER_UPDATE_ACC_FLOAT(double, starpu_perf_counter_double); +#endif + +#ifdef STARPU_ATOMIC_ADD64 +#define STARPU_PERF_COUNTER_ADD64(ptr, val) STARPU_ATOMIC_ADD64((ptr), (val)) +#else +#define STARPU_PERF_COUNTER_ADD64(ptr, val) STARPU_ATOMIC_ADD((ptr), (val)) +#endif + +struct starpu_perf_counter +{ + int id; + const char *name; + const char *help; + enum starpu_perf_counter_type type; +}; + +struct starpu_perf_counter_set +{ + enum starpu_perf_counter_scope scope; + int size; + int *index_array; +}; + +union starpu_perf_counter_value +{ + int32_t int32_val; + starpu_perf_counter_int64_t int64_val; + float float_val; + starpu_perf_counter_double double_val; +}; + +struct starpu_perf_counter_listener +{ + struct starpu_perf_counter_set *set; + void (*callback)(struct starpu_perf_counter_listener *listener, struct starpu_perf_counter_sample *sample, void *context); + void *user_arg; +}; + +struct starpu_perf_counter_sample +{ + enum starpu_perf_counter_scope scope; + struct starpu_perf_counter_listener *listener; + union starpu_perf_counter_value *value_array; + struct _starpu_spinlock lock; +}; + +struct starpu_perf_counter_sample_cl_values +{ + struct + { + starpu_perf_counter_int64_t total_submitted; + starpu_perf_counter_int64_t peak_submitted; + starpu_perf_counter_int64_t current_submitted; + starpu_perf_counter_int64_t peak_ready; + starpu_perf_counter_int64_t current_ready; + starpu_perf_counter_int64_t total_executed; + starpu_perf_counter_double cumul_execution_time; + } task; +}; + +typedef void (*starpu_perf_counter_sample_updater)(struct starpu_perf_counter_sample *sample, void *context); + +static inline enum starpu_perf_counter_scope _starpu_perf_counter_id_get_scope(const int counter_id) +{ + STARPU_ASSERT(counter_id >= 0); + return counter_id & ((1 << _STARPU_PERF_COUNTER_ID_SCOPE_BITS) - 1); +} + +static inline int _starpu_perf_counter_id_get_index(const int counter_id) +{ + STARPU_ASSERT(counter_id >= 0); + return counter_id >> _STARPU_PERF_COUNTER_ID_SCOPE_BITS; +} + +static inline int _starpu_perf_counter_id_build(const enum starpu_perf_counter_scope scope, const int index) +{ + STARPU_ASSERT_PERF_COUNTER_SCOPE_DEFINED(scope); + STARPU_ASSERT(index >= 0); + return (index << _STARPU_PERF_COUNTER_ID_SCOPE_BITS) | scope; +} + + +void _starpu_perf_counter_sample_init(struct starpu_perf_counter_sample *sample, enum starpu_perf_counter_scope scope); +void _starpu_perf_counter_sample_exit(struct starpu_perf_counter_sample *sample); +void _starpu_perf_counter_init(struct _starpu_machine_config *pconfig); +void _starpu_perf_counter_exit(void); + +int _starpu_perf_counter_register(enum starpu_perf_counter_scope scope, const char *name, enum starpu_perf_counter_type type, const char *help); +void _starpu_perf_counter_unregister_all_scopes(void); + +void _starpu_perf_counter_register_updater(enum starpu_perf_counter_scope scope, void (*updater)(struct starpu_perf_counter_sample *sample, void *context)); + +void _starpu_perf_counter_update_global_sample(void); +void _starpu_perf_counter_update_per_worker_sample(unsigned workerid); +void _starpu_perf_counter_update_per_codelet_sample(struct starpu_codelet *cl); + +#define __STARPU_PERF_COUNTER_SAMPLE_SET_TYPED_VALUE(STRING, TYPE) \ +static inline void _starpu_perf_counter_sample_set_##STRING##_value(struct starpu_perf_counter_sample *sample, const int counter_id, const TYPE value) \ +{ \ + STARPU_ASSERT(starpu_perf_counter_get_type_id(counter_id) == starpu_perf_counter_type_##STRING); \ + STARPU_ASSERT(sample->listener != NULL && sample->listener->set != NULL); \ + STARPU_ASSERT(_starpu_perf_counter_id_get_scope(counter_id) == sample->listener->set->scope); \ + \ + const struct starpu_perf_counter_set * const set = sample->listener->set; \ + const int index = _starpu_perf_counter_id_get_index(counter_id); \ + STARPU_ASSERT(index < set->size); \ + if (set->index_array[index] > 0) \ + { \ + sample->value_array[index].STRING##_val = value; \ + } \ +} + +__STARPU_PERF_COUNTER_SAMPLE_SET_TYPED_VALUE(int32, int32_t); +__STARPU_PERF_COUNTER_SAMPLE_SET_TYPED_VALUE(int64, starpu_perf_counter_int64_t); +__STARPU_PERF_COUNTER_SAMPLE_SET_TYPED_VALUE(float, float); +__STARPU_PERF_COUNTER_SAMPLE_SET_TYPED_VALUE(double, starpu_perf_counter_double); + +#undef __STARPU_PERF_COUNTER_SAMPLE_SET_TYPED_VALUE + +#define __STARPU_PERF_COUNTER_REG(PREFIX, SCOPE, CTR, TYPESTRING, HELP) \ + do \ + { \ + __##CTR = _starpu_perf_counter_register(SCOPE, \ + PREFIX "." #CTR, starpu_perf_counter_type_ ## TYPESTRING, \ + HELP); \ + } \ + while (0) + +/* global counter variables */ +extern starpu_perf_counter_int64_t _starpu_task__g_total_submitted__value; +extern starpu_perf_counter_int64_t _starpu_task__g_peak_submitted__value; +extern starpu_perf_counter_int64_t _starpu_task__g_current_submitted__value; +extern starpu_perf_counter_int64_t _starpu_task__g_peak_ready__value; +extern starpu_perf_counter_int64_t _starpu_task__g_current_ready__value; + +/* performance counter registration routines per modules */ +void _starpu__task_c__register_counters(void); /* module: task.c */ + + +/* -------------------------------------------------------------------- */ +/* Performance Steering */ + +#define STARPU_ASSERT_PERF_KNOB_SCOPE_DEFINED(t) STARPU_ASSERT( \ + (t == starpu_perf_knob_scope_global) \ + || (t == starpu_perf_knob_scope_per_worker) \ + || (t == starpu_perf_knob_scope_per_scheduler) \ + ) + +#define STARPU_ASSERT_PERF_KNOB_TYPE_DEFINED(t) STARPU_ASSERT( \ + (t == starpu_perf_knob_type_int32) \ + || (t == starpu_perf_knob_type_int64) \ + || (t == starpu_perf_knob_type_float) \ + || (t == starpu_perf_knob_type_double) \ + ) + +#define _STARPU_PERF_KNOBS_ID_SCOPE_BITS 4 + +struct starpu_perf_knob; + +struct starpu_perf_knob_value +{ + enum starpu_perf_knob_type type; + union + { + int32_t val_int32_t; + starpu_perf_counter_int64_t val_int64_t; + float val_float; + starpu_perf_counter_double val_double; + }; +}; + +struct starpu_perf_knob_group +{ + enum starpu_perf_knob_scope scope; + void (*set)(const struct starpu_perf_knob * const knob, void *context, const struct starpu_perf_knob_value * const value); + void (*get)(const struct starpu_perf_knob * const knob, void *context, struct starpu_perf_knob_value * const value); + int array_size; + struct starpu_perf_knob **array; +}; + +struct starpu_perf_knob +{ + int id; + int id_in_group; + const char *name; + const char *help; + enum starpu_perf_knob_type type; + struct starpu_perf_knob_group *group; +}; + +#define __STARPU_PERF_KNOB_REG(PREFIX, SCOPE, CTR, TYPESTRING, HELP) \ + do \ + { \ + __##CTR = _starpu_perf_knob_register(SCOPE, \ + PREFIX "." #CTR, starpu_perf_knob_type_ ## TYPESTRING, \ + HELP); \ + } \ + while (0) + +static inline int _starpu_perf_knob_id_get_scope(const int knob_id) +{ + STARPU_ASSERT(knob_id >= 0); + return knob_id & ((1 << _STARPU_PERF_KNOBS_ID_SCOPE_BITS) - 1); +} + +static inline int _starpu_perf_knob_id_get_index(const int knob_id) +{ + STARPU_ASSERT(knob_id >= 0); + return knob_id >> _STARPU_PERF_KNOBS_ID_SCOPE_BITS; +} + +static inline int _starpu_perf_knob_id_build(const enum starpu_perf_knob_scope scope, const int index) +{ + STARPU_ASSERT_PERF_KNOB_SCOPE_DEFINED(scope); + STARPU_ASSERT(index >= 0); + return (index << _STARPU_PERF_KNOBS_ID_SCOPE_BITS) | scope; +} + +void _starpu_perf_knob_init(void); +void _starpu_perf_knob_exit(void); + +struct starpu_perf_knob_group *_starpu_perf_knob_group_register(enum starpu_perf_knob_scope scope, + void (*set_func)(const struct starpu_perf_knob * const knob, void *context, const struct starpu_perf_knob_value * const value), + void (*get_func)(const struct starpu_perf_knob * const knob, void *context, struct starpu_perf_knob_value * const value)); +void _starpu_perf_knob_group_unregister(struct starpu_perf_knob_group *group); + +int _starpu_perf_knob_register(struct starpu_perf_knob_group *group, const char *name, enum starpu_perf_knob_type type, const char *help); +void _starpu_perf_knob_unregister_all_scopes(void); + +/* performance knob registration routines per modules */ +void _starpu__workers_c__register_knobs(void); /* module: workers.c */ +void _starpu__task_c__register_knobs(void); /* module: task.c */ +void _starpu__dmda_c__register_knobs(void); /* module: dmda.c */ +void _starpu__workers_c__unregister_knobs(void); /* module: workers.c */ +void _starpu__task_c__unregister_knobs(void); /* module: task.c */ +void _starpu__dmda_c__unregister_knobs(void); /* module: dmda.c */ + +#pragma GCC visibility pop + +#endif // __KNOBS_H__ diff --git a/src/common/list.h b/src/common/list.h new file mode 100644 index 0000000..3b57527 --- /dev/null +++ b/src/common/list.h @@ -0,0 +1,470 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Thibaut Lambert + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __LIST_H__ +#define __LIST_H__ + +/** @file */ + +#include + +/** @remarks list how-to + * ********************************************************* + * LIST_TYPE(FOO, content); + * + * - declares the following types: + * + * + for cells : struct FOO + * + for lists : struct FOO_list + * + for iterators : struct FOO + * + * - declares the following inlines (all O(1) except stated otherwise, n is the number of elements) : + * + * * Create a cell + * struct FOO* FOO_new(void); + * + * * Suppress a cell + * void FOO_delete(struct FOO*); + * + * * Create a list (initially empty) + * struct FOO_list* FOO_list_new(void); + * + * * Initializes a list (initially empty) + * void FOO_list_init(struct FOO_list*); + * + * * Initializes a list (initially empty), assuming that the content of FOO_list was already zeroed + * void FOO_list_init0(struct FOO_list*); + * + * * Suppresses a liste + * void FOO_list_delete(struct FOO_list*); + * + * * Check whether a list is empty + * int FOO_list_empty(struct FOO_list*); + * + * * Remove a given cell from the list + * void FOO_list_erase(struct FOO_list*, struct FOO*); + * + * * Add a cell at the back of the list + * void FOO_list_push_back(struct FOO_list*, struct FOO*); + * + * * Add a cell at the front of the list + * void FOO_list_push_front(struct FOO_list*, struct FOO*); + * + * * Add a cell before a given cell of a list + * void FOO_list_insert_before(struct FOO_list*, struct FOO*new, struct FOO*); + * + * * Add a cell after a given cell of a list + * void FOO_list_insert_after(struct FOO_list*, struct FOO*new, struct FOO*); + * + * * Append the second list at the end of the first list + * struct FOO* FOO_list_push_list_back(struct FOO_list*, struct FOO_list*); + * + * * Prepend the first list at the beginning of the second list + * struct FOO* FOO_list_push_list_front(struct FOO_list*, struct FOO_list*); + * + * * Return and remove the node at the back of the list + * struct FOO* FOO_list_pop_back(struct FOO_list*); + * + * * Return and remove the node at the front of the list + * struct FOO* FOO_list_pop_front(struct FOO_list*); + * + * * Return the node at the back of the list + * struct FOO* FOO_list_back(struct FOO_list*); + * + * * Return the node at the front of the list + * struct FOO* FOO_list_front(struct FOO_list*); + * + * * Check that the list chaining is coherent (O(n)) + * int FOO_list_check(struct FOO_list*); + * + * * Return the first cell of the list (from the front) + * struct FOO* FOO_list_begin(struct FOO_list*); + * + * * Return the value to be tested at the end of the list (at the back) + * struct FOO* FOO_list_end(struct FOO_list*); + * + * * Return the next element of the list (from the front) + * struct FOO* FOO_list_next(struct FOO*) + * + * * Return the last element of the list (from the back) + * struct FOO* FOO_list_last(struct FOO_list*); + * + * * Return the value to be tested at the beginning of the list (at the front) + * struct FOO* FOO_list_alpha(struct FOO_list*); + * + * * Return the previous element of the list (from the back) + * struct FOO* FOO_list_prev(struct FOO*) + * + * * Return the size of the list in O(n) + * int FOO_list_size(struct FOO_list*) + * + * * Return the position of the cell in the list (indexed from 0) (O(n) on average) + * int FOO_list_member(struct FOO_list*, struct FOO*) + * + * * Test whether the cell is in the list (O(n) on average) + * int FOO_list_ismember(struct FOO_list*, struct FOO*) + * + * ********************************************************* + * Usage example: + * - initially you'd have: + * struct my_struct + * { + * int a; + * int b; + * }; + * - to make a list of it, we replace the declaration above with: + * LIST_TYPE(my_struct, + * int a; + * int b; + * ); + * which creates the struct my_struct and struct my_struct_list types. + * + * - setting up an empty list: + * struct my_struct_list l; + * my_struct_list_init(&l); + * + * - allocating an empty list: + * struct my_struct_list * l = my_struct_list_new(); + * - add a cell 'e' at the front of list 'l': + * struct my_struct * e = my_struct_new(); + * e->a = 0; + * e->b = 0; + * my_struct_list_push_front(&l, e); + * + * - iterating over a list from the front: + * struct my_struct * i; + * for(i = my_struct_list_begin(&l); + * i != my_struct_list_end(&l); + * i = my_struct_list_next(i)) + * { + * printf("a=%d; b=%d\n", i->a, i->b); + * } + * + * - iterating over a list from the back: + * struct my_struct * i; + * for(i = my_struct_list_last(&l); + * i != my_struct_list_alpha(&l); + * i = my_struct_list_prev(i)) + * { + * printf("a=%d; b=%d\n", i->a, i->b); + * } + * ********************************************************* + */ + + +#ifndef LIST_INLINE +#define LIST_INLINE static inline +#endif + +/**@hideinitializer + * Generates a new type for list of elements */ +#define LIST_TYPE(ENAME, DECL) \ + LIST_CREATE_TYPE(ENAME, DECL) + +#define LIST_CREATE_TYPE(ENAME, DECL) \ + /** from automatic type: struct ENAME */ \ + struct ENAME \ + { \ + struct ENAME *_prev; /**< @internal previous cell */ \ + struct ENAME *_next; /**< @internal next cell */ \ + DECL \ + }; \ + LIST_CREATE_TYPE_NOSTRUCT(ENAME, _prev, _next) + +/**@hideinitializer + * The effective type declaration for lists */ +#define LIST_CREATE_TYPE_NOSTRUCT(ENAME, _prev, _next) \ + /** @internal */ \ + /* NOTE: this must not be greater than the struct defined in include/starpu_task_list.h */ \ + struct ENAME##_list \ + { \ + struct ENAME *_head; /**< @internal head of the list */ \ + struct ENAME *_tail; /**< @internal tail of the list */ \ + }; \ + /** @internal */LIST_INLINE struct ENAME *ENAME##_new(void) \ + { struct ENAME *e; _STARPU_MALLOC(e, sizeof(struct ENAME)); \ + e->_next = NULL; e->_prev = NULL; return e; } \ + /** @internal */LIST_INLINE void ENAME##_delete(struct ENAME *e) \ + { free(e); } \ + /** @internal */LIST_INLINE void ENAME##_list_push_front(struct ENAME##_list *l, struct ENAME *e) \ + { if(l->_tail == NULL) l->_tail = e; else l->_head->_prev = e; \ + e->_prev = NULL; e->_next = l->_head; l->_head = e; } \ + /** @internal */LIST_INLINE void ENAME##_list_push_back(struct ENAME##_list *l, struct ENAME *e) \ + { if(l->_head == NULL) l->_head = e; else l->_tail->_next = e; \ + e->_next = NULL; e->_prev = l->_tail; l->_tail = e; } \ + /** @internal */LIST_INLINE void ENAME##_list_insert_before(struct ENAME##_list *l, struct ENAME *e, struct ENAME *o) \ + { struct ENAME *p = o->_prev; if (p) { p->_next = e; e->_prev = p; } else { l->_head = e; e->_prev = NULL; } \ + e->_next = o; o->_prev = e; } \ + /** @internal */LIST_INLINE void ENAME##_list_insert_after(struct ENAME##_list *l, struct ENAME *e, struct ENAME *o) \ + { struct ENAME *n = o->_next; if (n) { n->_prev = e; e->_next = n; } else { l->_tail = e; e->_next = NULL; } \ + e->_prev = o; o->_next = e; } \ + /** @internal */LIST_INLINE void ENAME##_list_push_list_front(struct ENAME##_list *l1, struct ENAME##_list *l2) \ + { if (l2->_head == NULL) { l2->_head = l1->_head; l2->_tail = l1->_tail; } \ + else if (l1->_head != NULL) { l1->_tail->_next = l2->_head; l2->_head->_prev = l1->_tail; l2->_head = l1->_head; } } \ + /** @internal */LIST_INLINE void ENAME##_list_push_list_back(struct ENAME##_list *l1, struct ENAME##_list *l2) \ + { if(l1->_head == NULL) { l1->_head = l2->_head; l1->_tail = l2->_tail; } \ + else if (l2->_head != NULL) { l1->_tail->_next = l2->_head; l2->_head->_prev = l1->_tail; l1->_tail = l2->_tail; } } \ + /** @internal */LIST_INLINE struct ENAME *ENAME##_list_front(const struct ENAME##_list *l) \ + { return l->_head; } \ + /** @internal */LIST_INLINE struct ENAME *ENAME##_list_back(const struct ENAME##_list *l) \ + { return l->_tail; } \ + /** @internal */LIST_INLINE void ENAME##_list_init(struct ENAME##_list *l) \ + { l->_head=NULL; l->_tail=NULL; } \ + /** @internal */LIST_INLINE void ENAME##_list_init0(struct ENAME##_list *l STARPU_ATTRIBUTE_UNUSED) \ + { } \ + /** @internal */LIST_INLINE struct ENAME##_list *ENAME##_list_new(void) \ + { struct ENAME##_list *l; _STARPU_MALLOC(l, sizeof(struct ENAME##_list)); \ + ENAME##_list_init(l); return l; } \ + /** @internal */LIST_INLINE int ENAME##_list_empty(const struct ENAME##_list *l) \ + { return (l->_head == NULL); } \ + /** @internal */LIST_INLINE void ENAME##_list_delete(struct ENAME##_list *l) \ + { free(l); } \ + /** @internal */LIST_INLINE void ENAME##_list_erase(struct ENAME##_list *l, struct ENAME *c) \ + { struct ENAME *p = c->_prev; if(p) p->_next = c->_next; else l->_head = c->_next; \ + if(c->_next) c->_next->_prev = p; else l->_tail = p; } \ + /** @internal */LIST_INLINE struct ENAME *ENAME##_list_pop_front(struct ENAME##_list *l) \ + { struct ENAME *e = ENAME##_list_front(l); \ + ENAME##_list_erase(l, e); return e; } \ + /** @internal */LIST_INLINE struct ENAME *ENAME##_list_pop_back(struct ENAME##_list *l) \ + { struct ENAME *e = ENAME##_list_back(l); \ + ENAME##_list_erase(l, e); return e; } \ + /** @internal */LIST_INLINE struct ENAME *ENAME##_list_begin(const struct ENAME##_list *l) \ + { return l->_head; } \ + /** @internal */LIST_INLINE struct ENAME *ENAME##_list_end(const struct ENAME##_list *l STARPU_ATTRIBUTE_UNUSED) \ + { return NULL; } \ + /** @internal */LIST_INLINE struct ENAME *ENAME##_list_next(const struct ENAME *i) \ + { return i->_next; } \ + /** @internal */LIST_INLINE struct ENAME *ENAME##_list_last(const struct ENAME##_list *l) \ + { return l->_tail; } \ + /** @internal */LIST_INLINE struct ENAME *ENAME##_list_alpha(const struct ENAME##_list *l STARPU_ATTRIBUTE_UNUSED) \ + { return NULL; } \ + /** @internal */LIST_INLINE struct ENAME *ENAME##_list_prev(const struct ENAME *i) \ + { return i->_prev; } \ + /** @internal */LIST_INLINE int ENAME##_list_ismember(const struct ENAME##_list *l, const struct ENAME *e) \ + { struct ENAME *i=l->_head; while(i!=NULL){ if (i == e) return 1; i=i->_next; } return 0; } \ + /** @internal */LIST_INLINE int ENAME##_list_member(const struct ENAME##_list *l, const struct ENAME *e) \ + { struct ENAME *i=l->_head; int k=0; while(i!=NULL){if (i == e) return k; k++; i=i->_next; } return -1; } \ + /** @internal */LIST_INLINE int ENAME##_list_size(const struct ENAME##_list *l) \ + { struct ENAME *i=l->_head; int k=0; while(i!=NULL){k++;i=i->_next;} return k; } \ + /** @internal */LIST_INLINE int ENAME##_list_check(const struct ENAME##_list *l) \ + { struct ENAME *i=l->_head; while(i) \ + { if ((i->_next == NULL) && i != l->_tail) return 0; \ + if (i->_next == i) return 0; \ + i=i->_next;} return 1; } \ + /** @internal */LIST_INLINE void ENAME##_list_move(struct ENAME##_list *ldst, struct ENAME##_list *lsrc) \ + { ENAME##_list_init(ldst); ldst->_head = lsrc->_head; ldst->_tail = lsrc->_tail; lsrc->_head = NULL; lsrc->_tail = NULL; } + + +#ifdef STARPU_DEBUG +#define STARPU_ASSERT_MULTILIST(expr) STARPU_ASSERT(expr) +#else +#define STARPU_ASSERT_MULTILIST(expr) ((void) 0) +#endif + +/* + * This is an implementation of list allowing to be member of several lists. + * - One should first call MULTILIST_CREATE_TYPE for the ENAME and for each + * MEMBER type + * - Then the main element type should include fields of type + * ENAME_multilist_MEMBER + * - Then one should call MULTILIST_CREATE_INLINES to create the inlines which + * manipulate lists for this MEMBER type. + * + * ********************************************************* + * Usage example: + * + * - initially you'd have: + * struct my_struct + * { + * int a; + * int b; + * }; + * + * - to make multilists of it, we add MULTILIST_CREATE_TYPE calls before, the + * multilist fields, and MULTILIST_CREATE_INLINES calls after:: + * + * MULTILIST_CREATE_TYPE(my_struct, foo); + * MULTILIST_CREATE_TYPE(my_struct, bar); + * + * struct my_struct + * { + * struct my_struct_multilist_foo foo; + * struct my_struct_multilist_bar bar; + * int a; + * int b; + * }; + * + * MULTILIST_CREATE_INLINES(struct my_struct, my_struct, foo); + * MULTILIST_CREATE_INLINES(struct my_struct, my_struct, bar); + * + * - creating a new element and initialize the multilist fields: + * + * struct my_struct *e = malloc(sizeof(*e)); + * my_struct_multilist_init_foo(e); + * my_struct_multilist_init_bar(e); + * e->a = 0; + * e->b = 0; + * + * - setting up an empty list: + * + * struct my_struct_multilist_foo l; + * my_struct_multilist_head_init_foo(&l); + * + * - add element 'e' at the front of list 'l': + * my_struct_multilist_push_front_foo(&l, e); + * + * - TODO implementation: popping from the front: + * struct my_struct *i; + * i = my_struct_multilist_front_foo(&l); + * + * - iterating over a list from the front: + * struct my_struct *i; + * for(i = my_struct_multilist_begin_foo(&l); + * i != my_struct_multilist_end_foo(&l); + * i = my_struct_multilist_next_foo(i)) + * { + * printf("a=%d; b=%d\n", i->a, i->b); + * } + */ + +/* Create the ENAME_multilist_MEMBER, to be used both as head and as member of main element type */ +#define MULTILIST_CREATE_TYPE(ENAME, MEMBER) \ +struct ENAME##_multilist_##MEMBER { \ + struct ENAME##_multilist_##MEMBER *next; \ + struct ENAME##_multilist_##MEMBER *prev; \ +}; + +/* Create the inlines */ +#define MULTILIST_CREATE_INLINES(TYPE, ENAME, MEMBER) \ +/* Cast from list element to real type. */ \ +LIST_INLINE TYPE *ENAME##_of_multilist_##MEMBER(struct ENAME##_multilist_##MEMBER *elt) { \ + return ((TYPE *) ((uintptr_t) (elt) - ((uintptr_t) (&((TYPE *) 0)->MEMBER)))); \ +} \ +\ +/* Initialize a list head. */ \ +LIST_INLINE void ENAME##_multilist_head_init_##MEMBER(struct ENAME##_multilist_##MEMBER *head) { \ + head->next = head; \ + head->prev = head; \ +} \ +\ +/* Initialize a list element. */ \ +LIST_INLINE void ENAME##_multilist_init_##MEMBER(TYPE *e) { \ + (e)->MEMBER.next = NULL; \ + (e)->MEMBER.prev = NULL; \ +} \ +\ +/* Push element to head of a list. */ \ +LIST_INLINE void ENAME##_multilist_push_front_##MEMBER(struct ENAME##_multilist_##MEMBER *head, TYPE *e) { \ + STARPU_ASSERT_MULTILIST(e->MEMBER.prev == NULL); \ + STARPU_ASSERT_MULTILIST(e->MEMBER.next == NULL); \ + e->MEMBER.next = head->next; \ + e->MEMBER.prev = head; \ + head->next->prev = &e->MEMBER; \ + head->next = &e->MEMBER; \ +} \ +\ +/* Push element to tail of a list. */ \ +LIST_INLINE void ENAME##_multilist_push_back_##MEMBER(struct ENAME##_multilist_##MEMBER *head, TYPE *e) { \ + STARPU_ASSERT_MULTILIST(e->MEMBER.prev == NULL); \ + STARPU_ASSERT_MULTILIST(e->MEMBER.next == NULL); \ + e->MEMBER.prev = head->prev; \ + e->MEMBER.next = head; \ + head->prev->next = &e->MEMBER; \ + head->prev = &e->MEMBER; \ +} \ +\ +/* Erase element from a list. */ \ +LIST_INLINE void ENAME##_multilist_erase_##MEMBER(struct ENAME##_multilist_##MEMBER *head STARPU_ATTRIBUTE_UNUSED, TYPE *e) { \ + STARPU_ASSERT_MULTILIST(e->MEMBER.next->prev == &e->MEMBER); \ + e->MEMBER.next->prev = e->MEMBER.prev; \ + STARPU_ASSERT_MULTILIST(e->MEMBER.prev->next == &e->MEMBER); \ + e->MEMBER.prev->next = e->MEMBER.next; \ + e->MEMBER.next = NULL; \ + e->MEMBER.prev = NULL; \ +} \ +\ +/* Test whether the element was queued on the list. */ \ +LIST_INLINE int ENAME##_multilist_queued_##MEMBER(TYPE *e) { \ + return ((e)->MEMBER.next != NULL); \ +} \ +\ +/* Test whether the list is empty. */ \ +LIST_INLINE int ENAME##_multilist_empty_##MEMBER(struct ENAME##_multilist_##MEMBER *head) { \ + return head->next == head; \ +} \ +\ +/* Test whether the element is alone in a list. */ \ +LIST_INLINE int ENAME##_multilist_alone_##MEMBER(TYPE *e) { \ + return (e)->MEMBER.next == (e)->MEMBER.prev; \ +} \ +\ +/* Return the first element of the list. */ \ +LIST_INLINE TYPE *ENAME##_multilist_begin_##MEMBER(struct ENAME##_multilist_##MEMBER *head) { \ + return ENAME##_of_multilist_##MEMBER(head->next); \ +} \ +/* Return the value to be tested at the end of the list. */ \ +LIST_INLINE TYPE *ENAME##_multilist_end_##MEMBER(struct ENAME##_multilist_##MEMBER *head) { \ + return ENAME##_of_multilist_##MEMBER(head); \ +} \ +/* Return the next element of the list. */ \ +LIST_INLINE TYPE *ENAME##_multilist_next_##MEMBER(TYPE *e) { \ + return ENAME##_of_multilist_##MEMBER(e->MEMBER.next); \ +} \ +/* Return the first element of the list. */ \ +LIST_INLINE TYPE *ENAME##_multilist_front_##MEMBER(struct ENAME##_multilist_##MEMBER *head) { \ + return ENAME##_of_multilist_##MEMBER(head->next); \ +} \ +/* Return the last element of the list. */ \ +LIST_INLINE TYPE *ENAME##_multilist_back_##MEMBER(struct ENAME##_multilist_##MEMBER *head) { \ + return ENAME##_of_multilist_##MEMBER(head->prev); \ +} \ +\ +/* Return the first element of the list and erase it. */ \ +LIST_INLINE TYPE *ENAME##_multilist_pop_front_##MEMBER(struct ENAME##_multilist_##MEMBER *head) { \ + TYPE *e = ENAME##_multilist_front_##MEMBER(head); \ + ENAME##_multilist_erase_##MEMBER(head, e); return e; \ +} \ +/* Return the last element of the list and erase it. */ \ +LIST_INLINE TYPE *ENAME##_multilist_pop_back_##MEMBER(struct ENAME##_multilist_##MEMBER *head) { \ + TYPE *e = ENAME##_multilist_back_##MEMBER(head); \ + ENAME##_multilist_erase_##MEMBER(head, e); return e; \ +} \ +\ +\ + /* Move a list from its head to another head. Passing newhead == NULL allows to detach the list from any head. */ \ +LIST_INLINE void ENAME##_multilist_move_##MEMBER(struct ENAME##_multilist_##MEMBER *head, struct ENAME##_multilist_##MEMBER *newhead) { \ + if (ENAME##_multilist_empty_##MEMBER(head)) \ + ENAME##_multilist_head_init_##MEMBER(newhead); \ + else { \ + if (newhead) { \ + newhead->next = head->next; \ + newhead->next->prev = newhead; \ + } else { \ + head->next->prev = head->prev; \ + } \ + if (newhead) { \ + newhead->prev = head->prev; \ + newhead->prev->next = newhead; \ + } else { \ + head->prev->next = head->next; \ + } \ + head->next = head; \ + head->prev = head; \ + } \ +} + +#endif /* __LIST_H__ */ diff --git a/src/common/prio_list.h b/src/common/prio_list.h new file mode 100644 index 0000000..6d60d7a --- /dev/null +++ b/src/common/prio_list.h @@ -0,0 +1,591 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/** @file */ + +/* + * This implements list with priorities (as an int), by using two stages: + * - an RB tree stage sorted by priority, whose leaves are... + * - ... double-linked lists sorted by insertion order. + * + * We always keep the 0-priority list allocated, to avoid keeping + * allocating/deallocating it when all priorities are 0. + * + * We maintain an "empty" flag, to allow lockless FOO_prio_list_empty call. + * + * PRIO_LIST_TYPE(FOO, priority_field) + * + * - Declares the following type: + * + priority list: struct FOO_prio_list + * + * - Declares the following inlines (all O(1) except stated otherwise, n is the + * number of elements, p is the number of different priorities): + * + * * Initialize a new priority list + * void FOO_prio_list_init(struct FOO_prio_list*) + * + * * Initialize a new priority list, assuming that the content of FOO_prio_list was already zeroed + * void FOO_prio_list_init0(struct FOO_prio_list*) + * + * * Free an empty priority list + * void FOO_prio_list_deinit(struct FOO_prio_list*) + * + * * Add a new cell at the end of the list of the priority of the cell (O(log2 p)) + * void FOO_prio_list_push_back(struct FOO_prio_list*, struct FOO*) + * + * * Add a new cell at the beginning of the list of the priority of the cell (O(log2 p)) + * void FOO_prio_list_push_front(struct FOO_prio_list*, struct FOO*) + * + * * Test whether the priority list is empty + * void FOO_prio_list_empty(struct FOO_prio_list*) + * + * * Remove given cell from the priority list + * void FOO_prio_list_erase(struct FOO_prio_list*, struct FOO*) + * + * * Return and remove the first cell of highest priority of the priority list + * void FOO_prio_list_pop_front_highest(struct FOO_prio_list*) + * * Return and remove the first cell of lowest priority of the priority list + * void FOO_prio_list_pop_front_lowest(struct FOO_prio_list*) + * + * * Return and remove the last cell of highest priority of the priority list + * void FOO_prio_list_pop_back_highest(struct FOO_prio_list*) + * * Return and remove the last cell of lowest priority of the priority list + * void FOO_prio_list_pop_back_lowest(struct FOO_prio_list*) + * + * * Return the first cell of highest priority of the priority list + * void FOO_prio_list_front_highest(struct FOO_prio_list*) + * * Return the first cell of lowest priority of the priority list + * void FOO_prio_list_front_lowest(struct FOO_prio_list*) + * + * * Return the last cell of highest priority of sthe priority list + * void FOO_prio_list_back_highest(struct FOO_prio_list*) + * * Return the last cell of lowest priority of sthe priority list + * void FOO_prio_list_back_lowest(struct FOO_prio_list*) + * + * * Append second priority list at ends of the first priority list (O(log2 p)) + * void FOO_prio_list_push_prio_list_back(struct FOO_prio_list*, struct FOO_prio_list*) + * + * * Append second priority list at beginning of the first priority list (O(log2 p)) + * void FOO_prio_list_push_prio_list_front(struct FOO_prio_list*, struct FOO_prio_list*) + * + * * Test whether cell is part of the list (O(n)) + * void FOO_prio_list_ismember(struct FOO_prio_list*, struct FOO*) + * + * * Return the first cell of the list + * struct FOO* FOO_prio_list_begin(struct FOO_prio_list*); + * + * * Return the value to test at the end of the list + * struct FOO* FOO_prio_list_end(struct FOO_prio_list*); + * + * * Return the next cell of the list + * struct FOO* FOO_prio_list_next(struct FOO_prio_list*, struct FOO*) + * + * * Return the last cell of the list + * struct FOO* FOO_prio_list_last(struct FOO_prio_list*); + * + * * Return the value to test at the beginning of the list + * struct FOO* FOO_prio_list_alpha(struct FOO_prio_list*); + * + * * Return the previous cell of the list + * struct FOO* FOO_prio_list_prev(struct FOO_prio_list*, struct FOO*) + * + * Return the previous cell of the same priority, or the last cell of next highest priority + * struct FOO* FOO_prio_list_prev_highest(struct FOO_prio_list*, struct FOO*) + * + * Return the next cell of the same priority, or the first cell of next lowest priority + * struct FOO* FOO_prio_list_next_lowest(struct FOO_prio_list*, struct FOO*) + * + * PRIO_LIST_TYPE assumes that LIST_TYPE has already been called to create the + * final structure. + * + * ********************************************************* + * Usage example: + * LIST_TYPE(my_struct, + * int a; + * int b; + * int prio; + * ); + * PRIO_LIST_TYPE(my_struct, prio); + * + * and then my_struct_prio_list_* inlines are available + */ + +#ifndef __PRIO_LIST_H__ +#define __PRIO_LIST_H__ + +#include + +#ifndef PRIO_LIST_INLINE +#define PRIO_LIST_INLINE static inline +#endif + +#define PRIO_LIST_TYPE(ENAME, PRIOFIELD) \ + PRIO_LIST_CREATE_TYPE(ENAME, PRIOFIELD) + +#ifndef STARPU_DEBUG + +#define PRIO_LIST_CREATE_TYPE(ENAME, PRIOFIELD) \ + /* The main type: an RB binary tree */ \ + struct ENAME##_prio_list { \ + struct starpu_rbtree tree; \ + int empty; \ + }; \ + /* The second stage: a list */ \ + struct ENAME##_prio_list_stage { \ + struct starpu_rbtree_node node; /* Keep this first so ENAME##_node_to_list_stage can work. */ \ + int prio; \ + struct ENAME##_list list; \ + }; \ + PRIO_LIST_INLINE struct ENAME##_prio_list_stage *ENAME##_node_to_list_stage(struct starpu_rbtree_node *node) \ + { \ + /* This assumes node is first member of stage */ \ + return (struct ENAME##_prio_list_stage *) node; \ + } \ + PRIO_LIST_INLINE const struct ENAME##_prio_list_stage *ENAME##_node_to_list_stage_const(const struct starpu_rbtree_node *node) \ + { \ + /* This assumes node is first member of stage */ \ + return (struct ENAME##_prio_list_stage *) node; \ + } \ + PRIO_LIST_INLINE void ENAME##_prio_list_init(struct ENAME##_prio_list *priolist) \ + { \ + starpu_rbtree_init(&priolist->tree); \ + priolist->empty = 1; \ + } \ + PRIO_LIST_INLINE void ENAME##_prio_list_init0(struct ENAME##_prio_list *priolist) \ + { \ + starpu_rbtree_init0(&priolist->tree); \ + priolist->empty = 1; \ + } \ + PRIO_LIST_INLINE void ENAME##_prio_list_deinit(struct ENAME##_prio_list *priolist) \ + { \ + if (starpu_rbtree_empty(&priolist->tree)) \ + return; \ + struct starpu_rbtree_node *root = priolist->tree.root; \ + struct ENAME##_prio_list_stage *stage = ENAME##_node_to_list_stage(root); \ + assert(ENAME##_list_empty(&stage->list)); \ + assert(!root->children[0] && !root->children[1]); \ + starpu_rbtree_remove(&priolist->tree, root); \ + free(stage); \ + } \ + PRIO_LIST_INLINE int ENAME##_prio_list_cmp_fn(int prio, const struct starpu_rbtree_node *node) \ + { \ + /* Sort by decreasing order */ \ + const struct ENAME##_prio_list_stage *e2 = ENAME##_node_to_list_stage_const(node); \ + if (e2->prio < prio) \ + return -1; \ + if (e2->prio == prio) \ + return 0; \ + /* e2->prio > prio */ \ + return 1; \ + } \ + PRIO_LIST_INLINE struct ENAME##_prio_list_stage *ENAME##_prio_list_add(struct ENAME##_prio_list *priolist, int prio) \ + { \ + uintptr_t slot; \ + struct starpu_rbtree_node *node; \ + struct ENAME##_prio_list_stage *stage; \ + node = starpu_rbtree_lookup_slot(&priolist->tree, prio, ENAME##_prio_list_cmp_fn, slot); \ + if (node) \ + stage = ENAME##_node_to_list_stage(node); \ + else { \ + _STARPU_CALLOC(stage, 1, sizeof(*stage)); \ + starpu_rbtree_node_init0(&stage->node); \ + stage->prio = prio; \ + ENAME##_list_init0(&stage->list); \ + starpu_rbtree_insert_slot(&priolist->tree, slot, &stage->node); \ + } \ + return stage; \ + } \ + PRIO_LIST_INLINE void ENAME##_prio_list_push_back(struct ENAME##_prio_list *priolist, struct ENAME *e) \ + { \ + struct ENAME##_prio_list_stage *stage = ENAME##_prio_list_add(priolist, e->PRIOFIELD); \ + ENAME##_list_push_back(&stage->list, e); \ + priolist->empty = 0; \ + } \ + PRIO_LIST_INLINE void ENAME##_prio_list_push_front(struct ENAME##_prio_list *priolist, struct ENAME *e) \ + { \ + struct ENAME##_prio_list_stage *stage = ENAME##_prio_list_add(priolist, e->PRIOFIELD); \ + ENAME##_list_push_front(&stage->list, e); \ + priolist->empty = 0; \ + } \ + PRIO_LIST_INLINE int ENAME##_prio_list_empty(const struct ENAME##_prio_list *priolist) \ + { \ + return priolist->empty; \ + } \ + /* Version of list_empty which does not use the cached empty flag, + * typically used to compute the value of the flag */ \ + PRIO_LIST_INLINE int ENAME##_prio_list_empty_slow(const struct ENAME##_prio_list *priolist) \ + { \ + if (starpu_rbtree_empty(&priolist->tree)) \ + return 1; \ + struct starpu_rbtree_node *root = priolist->tree.root; \ + const struct ENAME##_prio_list_stage *stage = ENAME##_node_to_list_stage_const(root); \ + if (ENAME##_list_empty(&stage->list) && !root->children[0] && !root->children[1]) \ + /* Just one empty list */ \ + return 1; \ + return 0; \ + } \ + /* To be called when removing an element from a stage, to potentially remove this stage */ \ + PRIO_LIST_INLINE void ENAME##_prio_list_check_empty_stage(struct ENAME##_prio_list *priolist, struct ENAME##_prio_list_stage *stage) \ + { \ + if (ENAME##_list_empty(&stage->list)) { \ + if (stage->prio != 0) \ + { \ + /* stage got empty, remove it */ \ + starpu_rbtree_remove(&priolist->tree, &stage->node); \ + free(stage); \ + } \ + priolist->empty = ENAME##_prio_list_empty_slow(priolist); \ + } \ + } \ + PRIO_LIST_INLINE void ENAME##_prio_list_erase(struct ENAME##_prio_list *priolist, struct ENAME *e) \ + { \ + struct starpu_rbtree_node *node = starpu_rbtree_lookup(&priolist->tree, e->PRIOFIELD, ENAME##_prio_list_cmp_fn); \ + assert(node); \ + struct ENAME##_prio_list_stage *stage = ENAME##_node_to_list_stage(node); \ + ENAME##_list_erase(&stage->list, e); \ + ENAME##_prio_list_check_empty_stage(priolist, stage); \ + } \ + PRIO_LIST_INLINE int ENAME##_prio_list_get_next_nonempty_stage(struct ENAME##_prio_list *priolist, struct starpu_rbtree_node *node, struct starpu_rbtree_node **pnode, struct ENAME##_prio_list_stage **pstage) \ + { \ + struct ENAME##_prio_list_stage *stage; \ + while(1) { \ + struct starpu_rbtree_node *next; \ + if (!node) \ + /* Tree is empty */ \ + return 0; \ + stage = ENAME##_node_to_list_stage(node); \ + if (!ENAME##_list_empty(&stage->list)) \ + break; \ + /* Empty list, skip to next tree entry */ \ + next = starpu_rbtree_next(node); \ + /* drop it if not 0-prio */ \ + if (stage->prio != 0) \ + { \ + starpu_rbtree_remove(&priolist->tree, node); \ + free(stage); \ + } \ + node = next; \ + } \ + *pnode = node; \ + *pstage = stage; \ + return 1; \ + } \ + PRIO_LIST_INLINE int ENAME##_prio_list_get_prev_nonempty_stage(struct ENAME##_prio_list *priolist, struct starpu_rbtree_node *node, struct starpu_rbtree_node **pnode, struct ENAME##_prio_list_stage **pstage) \ + { \ + struct ENAME##_prio_list_stage *stage; \ + while(1) { \ + struct starpu_rbtree_node *prev; \ + if (!node) \ + /* Tree is empty */ \ + return 0; \ + stage = ENAME##_node_to_list_stage(node); \ + if (!ENAME##_list_empty(&stage->list)) \ + break; \ + /* Empty list, skip to prev tree entry */ \ + prev = starpu_rbtree_prev(node); \ + /* drop it if not 0-prio */ \ + if (stage->prio != 0) \ + { \ + starpu_rbtree_remove(&priolist->tree, node); \ + free(stage); \ + } \ + node = prev; \ + } \ + *pnode = node; \ + *pstage = stage; \ + return 1; \ + } \ + PRIO_LIST_INLINE int ENAME##_prio_list_get_first_nonempty_stage(struct ENAME##_prio_list *priolist, struct starpu_rbtree_node **pnode, struct ENAME##_prio_list_stage **pstage) \ + { \ + struct starpu_rbtree_node *node = starpu_rbtree_first(&priolist->tree); \ + return ENAME##_prio_list_get_next_nonempty_stage(priolist, node, pnode, pstage); \ + } \ + PRIO_LIST_INLINE int ENAME##_prio_list_get_last_nonempty_stage(struct ENAME##_prio_list *priolist, struct starpu_rbtree_node **pnode, struct ENAME##_prio_list_stage **pstage) \ + { \ + struct starpu_rbtree_node *node = starpu_rbtree_last(&priolist->tree); \ + return ENAME##_prio_list_get_prev_nonempty_stage(priolist, node, pnode, pstage); \ + } \ + PRIO_LIST_INLINE struct ENAME *ENAME##_prio_list_pop_front_highest(struct ENAME##_prio_list *priolist) \ + { \ + struct starpu_rbtree_node *node; \ + struct ENAME##_prio_list_stage *stage; \ + struct ENAME *ret; \ + if (!ENAME##_prio_list_get_first_nonempty_stage(priolist, &node, &stage)) \ + return NULL; \ + ret = ENAME##_list_pop_front(&stage->list); \ + ENAME##_prio_list_check_empty_stage(priolist, stage); \ + return ret; \ + } \ + PRIO_LIST_INLINE struct ENAME *ENAME##_prio_list_pop_front_lowest(struct ENAME##_prio_list *priolist) \ + { \ + struct starpu_rbtree_node *node; \ + struct ENAME##_prio_list_stage *stage; \ + struct ENAME *ret; \ + if (!ENAME##_prio_list_get_last_nonempty_stage(priolist, &node, &stage)) \ + return NULL; \ + ret = ENAME##_list_pop_front(&stage->list); \ + ENAME##_prio_list_check_empty_stage(priolist, stage); \ + return ret; \ + } \ + PRIO_LIST_INLINE struct ENAME *ENAME##_prio_list_front_highest(struct ENAME##_prio_list *priolist) \ + { \ + struct starpu_rbtree_node *node; \ + struct ENAME##_prio_list_stage *stage; \ + if (!ENAME##_prio_list_get_first_nonempty_stage(priolist, &node, &stage)) \ + return NULL; \ + return ENAME##_list_front(&stage->list); \ + } \ + PRIO_LIST_INLINE struct ENAME *ENAME##_prio_list_front_lowest(struct ENAME##_prio_list *priolist) \ + { \ + struct starpu_rbtree_node *node; \ + struct ENAME##_prio_list_stage *stage; \ + if (!ENAME##_prio_list_get_last_nonempty_stage(priolist, &node, &stage)) \ + return NULL; \ + return ENAME##_list_front(&stage->list); \ + } \ + PRIO_LIST_INLINE struct ENAME *ENAME##_prio_list_pop_back_highest(struct ENAME##_prio_list *priolist) \ + { \ + struct starpu_rbtree_node *node; \ + struct ENAME##_prio_list_stage *stage; \ + struct ENAME *ret; \ + if (!ENAME##_prio_list_get_first_nonempty_stage(priolist, &node, &stage)) \ + return NULL; \ + ret = ENAME##_list_pop_back(&stage->list); \ + ENAME##_prio_list_check_empty_stage(priolist, stage); \ + return ret; \ + } \ + PRIO_LIST_INLINE struct ENAME *ENAME##_prio_list_pop_back_lowest(struct ENAME##_prio_list *priolist) \ + { \ + struct starpu_rbtree_node *node; \ + struct ENAME##_prio_list_stage *stage; \ + struct ENAME *ret; \ + if (!ENAME##_prio_list_get_last_nonempty_stage(priolist, &node, &stage)) \ + return NULL; \ + ret = ENAME##_list_pop_back(&stage->list); \ + ENAME##_prio_list_check_empty_stage(priolist, stage); \ + return ret; \ + } \ + PRIO_LIST_INLINE struct ENAME *ENAME##_prio_list_back_highest(struct ENAME##_prio_list *priolist) \ + { \ + struct starpu_rbtree_node *node; \ + struct ENAME##_prio_list_stage *stage; \ + if (!ENAME##_prio_list_get_first_nonempty_stage(priolist, &node, &stage)) \ + return NULL; \ + return ENAME##_list_back(&stage->list); \ + } \ + PRIO_LIST_INLINE struct ENAME *ENAME##_prio_list_back_lowest(struct ENAME##_prio_list *priolist) \ + { \ + struct starpu_rbtree_node *node; \ + struct ENAME##_prio_list_stage *stage; \ + if (!ENAME##_prio_list_get_last_nonempty_stage(priolist, &node, &stage)) \ + return NULL; \ + return ENAME##_list_back(&stage->list); \ + } \ + PRIO_LIST_INLINE void ENAME##_prio_list_push_prio_list_back(struct ENAME##_prio_list *priolist, struct ENAME##_prio_list *priolist_toadd) \ + { \ + struct starpu_rbtree_node *node_toadd, *tmp; \ + starpu_rbtree_for_each_remove(&priolist_toadd->tree, node_toadd, tmp) { \ + struct ENAME##_prio_list_stage *stage_toadd = ENAME##_node_to_list_stage(node_toadd); \ + uintptr_t slot; \ + struct starpu_rbtree_node *node = starpu_rbtree_lookup_slot(&priolist->tree, stage_toadd->prio, ENAME##_prio_list_cmp_fn, slot); \ + if (node) \ + { \ + /* Catenate the lists */ \ + if (!ENAME##_list_empty(&stage_toadd->list)) { \ + struct ENAME##_prio_list_stage *stage = ENAME##_node_to_list_stage(node); \ + ENAME##_list_push_list_back(&stage->list, &stage_toadd->list); \ + free(node_toadd); \ + priolist->empty = 0; \ + } \ + } \ + else \ + { \ + if (!ENAME##_list_empty(&stage_toadd->list)) { \ + /* Just move the node between the trees */ \ + starpu_rbtree_insert_slot(&priolist->tree, slot, node_toadd); \ + priolist->empty = 0; \ + } \ + else \ + { \ + /* Actually empty, don't bother moving the list */ \ + free(node_toadd); \ + } \ + } \ + } \ + } \ + PRIO_LIST_INLINE int ENAME##_prio_list_ismember(const struct ENAME##_prio_list *priolist, const struct ENAME *e) \ + { \ + struct starpu_rbtree_node *node = starpu_rbtree_lookup(&priolist->tree, e->PRIOFIELD, ENAME##_prio_list_cmp_fn); \ + if (node) { \ + const struct ENAME##_prio_list_stage *stage = ENAME##_node_to_list_stage_const(node); \ + return ENAME##_list_ismember(&stage->list, e); \ + } \ + return 0; \ + } \ + PRIO_LIST_INLINE struct ENAME *ENAME##_prio_list_begin(struct ENAME##_prio_list *priolist) \ + { \ + struct starpu_rbtree_node *node; \ + struct ENAME##_prio_list_stage *stage; \ + if (!ENAME##_prio_list_get_first_nonempty_stage(priolist, &node, &stage)) \ + return NULL; \ + return ENAME##_list_begin(&stage->list); \ + } \ + PRIO_LIST_INLINE struct ENAME *ENAME##_prio_list_end(struct ENAME##_prio_list *priolist STARPU_ATTRIBUTE_UNUSED) \ + { return NULL; } \ + PRIO_LIST_INLINE struct ENAME *ENAME##_prio_list_next(struct ENAME##_prio_list *priolist, const struct ENAME *i) \ + { \ + struct ENAME *next = ENAME##_list_next(i); \ + if (next != ENAME##_list_end(NULL)) \ + return next; \ + struct starpu_rbtree_node *node = starpu_rbtree_lookup(&priolist->tree, i->PRIOFIELD, ENAME##_prio_list_cmp_fn); \ + assert(node); \ + struct ENAME##_prio_list_stage *stage; \ + node = starpu_rbtree_next(node); \ + if (!ENAME##_prio_list_get_next_nonempty_stage(priolist, node, &node, &stage)) \ + return NULL; \ + return ENAME##_list_begin(&stage->list); \ + } \ + PRIO_LIST_INLINE struct ENAME *ENAME##_prio_list_last(struct ENAME##_prio_list *priolist) \ + { \ + struct starpu_rbtree_node *node; \ + struct ENAME##_prio_list_stage *stage; \ + if (!ENAME##_prio_list_get_last_nonempty_stage(priolist, &node, &stage)) \ + return NULL; \ + return ENAME##_list_last(&stage->list); \ + } \ + PRIO_LIST_INLINE struct ENAME *ENAME##_prio_list_alpha(struct ENAME##_prio_list *priolist STARPU_ATTRIBUTE_UNUSED) \ + { return NULL; } \ + PRIO_LIST_INLINE struct ENAME *ENAME##_prio_list_prev(struct ENAME##_prio_list *priolist, const struct ENAME *i) \ + { \ + struct ENAME *next = ENAME##_list_prev(i); \ + if (next != ENAME##_list_alpha(NULL)) \ + return next; \ + struct starpu_rbtree_node *node = starpu_rbtree_lookup(&priolist->tree, i->PRIOFIELD, ENAME##_prio_list_cmp_fn); \ + assert(node); \ + struct ENAME##_prio_list_stage *stage; \ + node = starpu_rbtree_prev(node); \ + if (!ENAME##_prio_list_get_prev_nonempty_stage(priolist, node, &node, &stage)) \ + return NULL; \ + return ENAME##_list_last(&stage->list); \ + } \ + PRIO_LIST_INLINE struct ENAME *ENAME##_prio_list_prev_highest(struct ENAME##_prio_list *priolist, const struct ENAME *i) \ + { \ + struct ENAME *next = ENAME##_list_prev(i); \ + if (next != ENAME##_list_alpha(NULL)) \ + return next; \ + struct starpu_rbtree_node *node = starpu_rbtree_lookup(&priolist->tree, i->PRIOFIELD, ENAME##_prio_list_cmp_fn); \ + assert(node); \ + struct ENAME##_prio_list_stage *stage; \ + node = starpu_rbtree_next(node); \ + if (!ENAME##_prio_list_get_next_nonempty_stage(priolist, node, &node, &stage)) \ + return NULL; \ + return ENAME##_list_last(&stage->list); \ + } \ + PRIO_LIST_INLINE struct ENAME *ENAME##_prio_list_next_lowest(struct ENAME##_prio_list *priolist, const struct ENAME *i) \ + { \ + struct ENAME *next = ENAME##_list_next(i); \ + if (next != ENAME##_list_end(NULL)) \ + return next; \ + struct starpu_rbtree_node *node = starpu_rbtree_lookup(&priolist->tree, i->PRIOFIELD, ENAME##_prio_list_cmp_fn); \ + assert(node); \ + struct ENAME##_prio_list_stage *stage; \ + node = starpu_rbtree_prev(node); \ + if (!ENAME##_prio_list_get_prev_nonempty_stage(priolist, node, &node, &stage)) \ + return NULL; \ + return ENAME##_list_begin(&stage->list); \ + } \ + +#else + +/* gdbinit can't recurse in a tree. Use a mere list in debugging mode. */ +#define PRIO_LIST_CREATE_TYPE(ENAME, PRIOFIELD) \ + struct ENAME##_prio_list { struct ENAME##_list list; }; \ + PRIO_LIST_INLINE void ENAME##_prio_list_init(struct ENAME##_prio_list *priolist) \ + { ENAME##_list_init(&(priolist)->list); } \ + PRIO_LIST_INLINE void ENAME##_prio_list_init0(struct ENAME##_prio_list *priolist) \ + { ENAME##_list_init0(&(priolist)->list); } \ + PRIO_LIST_INLINE void ENAME##_prio_list_deinit(struct ENAME##_prio_list *priolist) \ + { (void) (priolist); /* ENAME##_list_deinit(&(priolist)->list); */ } \ + PRIO_LIST_INLINE void ENAME##_prio_list_push_back(struct ENAME##_prio_list *priolist, struct ENAME *e) \ + { \ + struct ENAME *cur; \ + for (cur = ENAME##_list_begin(&(priolist)->list); \ + cur != ENAME##_list_end(&(priolist)->list); \ + cur = ENAME##_list_next(cur)) \ + if ((e)->PRIOFIELD > cur->PRIOFIELD) \ + break; \ + if (cur == ENAME##_list_end(&(priolist)->list)) \ + ENAME##_list_push_back(&(priolist)->list, (e)); \ + else \ + ENAME##_list_insert_before(&(priolist)->list, (e), cur); \ + } \ + PRIO_LIST_INLINE void ENAME##_prio_list_push_front(struct ENAME##_prio_list *priolist, struct ENAME *e) \ + { \ + struct ENAME *cur; \ + for (cur = ENAME##_list_begin(&(priolist)->list); \ + cur != ENAME##_list_end(&(priolist)->list); \ + cur = ENAME##_list_next(cur)) \ + if ((e)->PRIOFIELD >= cur->PRIOFIELD) \ + break; \ + if (cur == ENAME##_list_end(&(priolist)->list)) \ + ENAME##_list_push_back(&(priolist)->list, (e)); \ + else \ + ENAME##_list_insert_before(&(priolist)->list, (e), cur); \ + } \ + PRIO_LIST_INLINE int ENAME##_prio_list_empty(const struct ENAME##_prio_list *priolist) \ + { return ENAME##_list_empty(&(priolist)->list); } \ + PRIO_LIST_INLINE void ENAME##_prio_list_erase(struct ENAME##_prio_list *priolist, struct ENAME *e) \ + { ENAME##_list_erase(&(priolist)->list, (e)); } \ + PRIO_LIST_INLINE struct ENAME *ENAME##_prio_list_pop_front_highest(struct ENAME##_prio_list *priolist) \ + { return ENAME##_list_pop_front(&(priolist)->list); } \ + PRIO_LIST_INLINE struct ENAME *ENAME##_prio_list_pop_front_lowest(struct ENAME##_prio_list *priolist) \ + { return ENAME##_list_pop_front(&(priolist)->list); } \ + PRIO_LIST_INLINE struct ENAME *ENAME##_prio_list_pop_back_highest(struct ENAME##_prio_list *priolist) \ + { return ENAME##_list_pop_back(&(priolist)->list); } \ + PRIO_LIST_INLINE struct ENAME *ENAME##_prio_list_pop_back_lowest(struct ENAME##_prio_list *priolist) \ + { return ENAME##_list_pop_back(&(priolist)->list); } \ + PRIO_LIST_INLINE struct ENAME *ENAME##_prio_list_front_highest(struct ENAME##_prio_list *priolist) \ + { return ENAME##_list_front(&(priolist)->list); } \ + PRIO_LIST_INLINE struct ENAME *ENAME##_prio_list_front_lowest(struct ENAME##_prio_list *priolist) \ + { return ENAME##_list_front(&(priolist)->list); } \ + PRIO_LIST_INLINE struct ENAME *ENAME##_prio_list_back_highest(struct ENAME##_prio_list *priolist) \ + { return ENAME##_list_back(&(priolist)->list); } \ + PRIO_LIST_INLINE struct ENAME *ENAME##_prio_list_back_lowest(struct ENAME##_prio_list *priolist) \ + { return ENAME##_list_back(&(priolist)->list); } \ + PRIO_LIST_INLINE void ENAME##_prio_list_push_prio_list_back(struct ENAME##_prio_list *priolist, struct ENAME##_prio_list *priolist_toadd) \ + { ENAME##_list_push_list_back(&(priolist)->list, &(priolist_toadd)->list); } \ + PRIO_LIST_INLINE int ENAME##_prio_list_ismember(const struct ENAME##_prio_list *priolist, const struct ENAME *e) \ + { return ENAME##_list_ismember(&(priolist)->list, (e)); } \ + PRIO_LIST_INLINE struct ENAME *ENAME##_prio_list_begin(struct ENAME##_prio_list *priolist) \ + { return ENAME##_list_begin(&(priolist)->list); } \ + PRIO_LIST_INLINE struct ENAME *ENAME##_prio_list_end(struct ENAME##_prio_list *priolist) \ + { return ENAME##_list_end(&(priolist)->list); } \ + PRIO_LIST_INLINE struct ENAME *ENAME##_prio_list_next(struct ENAME##_prio_list *priolist STARPU_ATTRIBUTE_UNUSED, const struct ENAME *i) \ + { return ENAME##_list_next(i); } \ + PRIO_LIST_INLINE struct ENAME *ENAME##_prio_list_last(struct ENAME##_prio_list *priolist) \ + { return ENAME##_list_last(&(priolist)->list); } \ + PRIO_LIST_INLINE struct ENAME *ENAME##_prio_list_alpha(struct ENAME##_prio_list *priolist) \ + { return ENAME##_list_alpha(&(priolist)->list); } \ + PRIO_LIST_INLINE struct ENAME *ENAME##_prio_list_prev(struct ENAME##_prio_list *priolist STARPU_ATTRIBUTE_UNUSED, const struct ENAME *i) \ + { return ENAME##_list_prev(i); } \ + PRIO_LIST_INLINE struct ENAME *ENAME##_prio_list_prev_highest(struct ENAME##_prio_list *priolist STARPU_ATTRIBUTE_UNUSED, const struct ENAME *i) \ + { return ENAME##_list_prev(i); } \ + PRIO_LIST_INLINE struct ENAME *ENAME##_prio_list_next_lowest(struct ENAME##_prio_list *priolist STARPU_ATTRIBUTE_UNUSED, const struct ENAME *i) \ + { return ENAME##_list_next(i); } \ + +#endif + +#endif // __PRIO_LIST_H__ diff --git a/src/common/rbtree.c b/src/common/rbtree.c new file mode 100644 index 0000000..f3d1ed7 --- /dev/null +++ b/src/common/rbtree.c @@ -0,0 +1,500 @@ +/* + * Copyright (c) 2010, 2012 Richard Braun. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include + +#define unlikely(expr) __builtin_expect(!!(expr), 0) + +/* + * Return the index of a node in the children array of its parent. + * + * The parent parameter must not be null, and must be the parent of the + * given node. + */ +static inline int starpu_rbtree_index(const struct starpu_rbtree_node *node, + const struct starpu_rbtree_node *parent) +{ + assert(parent != NULL); + assert((node == NULL) || (starpu_rbtree_parent(node) == parent)); + + if (parent->children[STARPU_RBTREE_LEFT] == node) + return STARPU_RBTREE_LEFT; + + assert(parent->children[STARPU_RBTREE_RIGHT] == node); + + return STARPU_RBTREE_RIGHT; +} + +/* + * Return the color of a node. + */ +static inline int starpu_rbtree_color(const struct starpu_rbtree_node *node) +{ + return node->parent & STARPU_RBTREE_COLOR_MASK; +} + +/* + * Return true if the node is red. + */ +static inline int starpu_rbtree_is_red(const struct starpu_rbtree_node *node) +{ + return starpu_rbtree_color(node) == STARPU_RBTREE_COLOR_RED; +} + +/* + * Return true if the node is black. + */ +static inline int starpu_rbtree_is_black(const struct starpu_rbtree_node *node) +{ + return starpu_rbtree_color(node) == STARPU_RBTREE_COLOR_BLACK; +} + +/* + * Set the parent of a node, retaining its current color. + */ +static inline void starpu_rbtree_set_parent(struct starpu_rbtree_node *node, + struct starpu_rbtree_node *parent) +{ + assert(starpu_rbtree_check_alignment(node)); + assert(starpu_rbtree_check_alignment(parent)); + + node->parent = (uintptr_t)parent | (node->parent & STARPU_RBTREE_COLOR_MASK); +} + +/* + * Set the color of a node, retaining its current parent. + */ +static inline void starpu_rbtree_set_color(struct starpu_rbtree_node *node, int color) +{ + assert((color & ~STARPU_RBTREE_COLOR_MASK) == 0); + node->parent = (node->parent & STARPU_RBTREE_PARENT_MASK) | color; +} + +/* + * Set the color of a node to red, retaining its current parent. + */ +static inline void starpu_rbtree_set_red(struct starpu_rbtree_node *node) +{ + starpu_rbtree_set_color(node, STARPU_RBTREE_COLOR_RED); +} + +/* + * Set the color of a node to black, retaining its current parent. + */ +static inline void starpu_rbtree_set_black(struct starpu_rbtree_node *node) +{ + starpu_rbtree_set_color(node, STARPU_RBTREE_COLOR_BLACK); +} + +/* + * Perform a tree rotation, rooted at the given node. + * + * The direction parameter defines the rotation direction and is either + * STARPU_RBTREE_LEFT or STARPU_RBTREE_RIGHT. + */ +static void starpu_rbtree_rotate(struct starpu_rbtree *tree, struct starpu_rbtree_node *node, int direction) +{ + struct starpu_rbtree_node *parent, *rnode; + int left, right; + + left = direction; + right = 1 - left; + parent = starpu_rbtree_parent(node); + rnode = node->children[right]; + + node->children[right] = rnode->children[left]; + + if (rnode->children[left] != NULL) + starpu_rbtree_set_parent(rnode->children[left], node); + + rnode->children[left] = node; + starpu_rbtree_set_parent(rnode, parent); + + if (unlikely(parent == NULL)) + tree->root = rnode; + else + parent->children[starpu_rbtree_index(node, parent)] = rnode; + + starpu_rbtree_set_parent(node, rnode); +} + +void starpu_rbtree_insert_rebalance(struct starpu_rbtree *tree, struct starpu_rbtree_node *parent, + int index, struct starpu_rbtree_node *node) +{ + struct starpu_rbtree_node *grand_parent, *tmp; + + assert(starpu_rbtree_check_alignment(parent)); + assert(starpu_rbtree_check_alignment(node)); + + node->parent = (uintptr_t)parent | STARPU_RBTREE_COLOR_RED; + node->children[STARPU_RBTREE_LEFT] = NULL; + node->children[STARPU_RBTREE_RIGHT] = NULL; + + if (unlikely(parent == NULL)) + tree->root = node; + else + parent->children[index] = node; + + for (;;) + { + struct starpu_rbtree_node *uncle; + int left, right; + + if (parent == NULL) + { + starpu_rbtree_set_black(node); + break; + } + + if (starpu_rbtree_is_black(parent)) + break; + + grand_parent = starpu_rbtree_parent(parent); + assert(grand_parent != NULL); + + left = starpu_rbtree_index(parent, grand_parent); + right = 1 - left; + + uncle = grand_parent->children[right]; + + /* + * Uncle is red. Flip colors and repeat at grand parent. + */ + if ((uncle != NULL) && starpu_rbtree_is_red(uncle)) + { + starpu_rbtree_set_black(uncle); + starpu_rbtree_set_black(parent); + starpu_rbtree_set_red(grand_parent); + node = grand_parent; + parent = starpu_rbtree_parent(node); + continue; + } + + /* + * Node is the right child of its parent. Rotate left at parent. + */ + if (parent->children[right] == node) + { + starpu_rbtree_rotate(tree, parent, left); + tmp = node; + node = parent; + parent = tmp; + } + + /* + * Node is the left child of its parent. Handle colors, rotate right + * at grand parent, and leave. + */ + starpu_rbtree_set_black(parent); + starpu_rbtree_set_red(grand_parent); + starpu_rbtree_rotate(tree, grand_parent, right); + break; + } + + assert(starpu_rbtree_is_black(tree->root)); +} + +void starpu_rbtree_remove(struct starpu_rbtree *tree, struct starpu_rbtree_node *node) +{ + struct starpu_rbtree_node *child, *parent, *brother; + int color, left, right; + + if (node->children[STARPU_RBTREE_LEFT] == NULL) + child = node->children[STARPU_RBTREE_RIGHT]; + else if (node->children[STARPU_RBTREE_RIGHT] == NULL) + child = node->children[STARPU_RBTREE_LEFT]; + else + { + struct starpu_rbtree_node *successor; + + /* + * Two-children case: replace the node with its successor. + */ + + successor = node->children[STARPU_RBTREE_RIGHT]; + + while (successor->children[STARPU_RBTREE_LEFT] != NULL) + successor = successor->children[STARPU_RBTREE_LEFT]; + + color = starpu_rbtree_color(successor); + child = successor->children[STARPU_RBTREE_RIGHT]; + parent = starpu_rbtree_parent(node); + + if (unlikely(parent == NULL)) + tree->root = successor; + else + parent->children[starpu_rbtree_index(node, parent)] = successor; + + parent = starpu_rbtree_parent(successor); + + /* + * Set parent directly to keep the original color. + */ + successor->parent = node->parent; + successor->children[STARPU_RBTREE_LEFT] = node->children[STARPU_RBTREE_LEFT]; + starpu_rbtree_set_parent(successor->children[STARPU_RBTREE_LEFT], successor); + + if (node == parent) + parent = successor; + else + { + successor->children[STARPU_RBTREE_RIGHT] = node->children[STARPU_RBTREE_RIGHT]; + starpu_rbtree_set_parent(successor->children[STARPU_RBTREE_RIGHT], successor); + parent->children[STARPU_RBTREE_LEFT] = child; + + if (child != NULL) + starpu_rbtree_set_parent(child, parent); + } + + goto update_color; + } + + /* + * Node has at most one child. + */ + + color = starpu_rbtree_color(node); + parent = starpu_rbtree_parent(node); + + if (child != NULL) + starpu_rbtree_set_parent(child, parent); + + if (unlikely(parent == NULL)) + tree->root = child; + else + parent->children[starpu_rbtree_index(node, parent)] = child; + + /* + * The node has been removed, update the colors. The child pointer can + * be null, in which case it is considered a black leaf. + */ +update_color: + if (color == STARPU_RBTREE_COLOR_RED) + return; + + for (;;) + { + if ((child != NULL) && starpu_rbtree_is_red(child)) + { + starpu_rbtree_set_black(child); + break; + } + + if (parent == NULL) + break; + + left = starpu_rbtree_index(child, parent); + right = 1 - left; + + brother = parent->children[right]; + + /* + * Brother is red. Recolor and rotate left at parent so that brother + * becomes black. + */ + if (starpu_rbtree_is_red(brother)) + { + starpu_rbtree_set_black(brother); + starpu_rbtree_set_red(parent); + starpu_rbtree_rotate(tree, parent, left); + brother = parent->children[right]; + } + + /* + * Brother has no red child. Recolor and repeat at parent. + */ + if (((brother->children[STARPU_RBTREE_LEFT] == NULL) + || starpu_rbtree_is_black(brother->children[STARPU_RBTREE_LEFT])) + && ((brother->children[STARPU_RBTREE_RIGHT] == NULL) + || starpu_rbtree_is_black(brother->children[STARPU_RBTREE_RIGHT]))) + { + starpu_rbtree_set_red(brother); + child = parent; + parent = starpu_rbtree_parent(child); + continue; + } + + /* + * Brother's right child is black. Recolor and rotate right at brother. + */ + if ((brother->children[right] == NULL) + || starpu_rbtree_is_black(brother->children[right])) + { + starpu_rbtree_set_black(brother->children[left]); + starpu_rbtree_set_red(brother); + starpu_rbtree_rotate(tree, brother, right); + brother = parent->children[right]; + } + + /* + * Brother's left child is black. Exchange parent and brother colors + * (we already know brother is black), set brother's right child black, + * rotate left at parent and leave. + */ + starpu_rbtree_set_color(brother, starpu_rbtree_color(parent)); + starpu_rbtree_set_black(parent); + starpu_rbtree_set_black(brother->children[right]); + starpu_rbtree_rotate(tree, parent, left); + break; + } + + assert((tree->root == NULL) || starpu_rbtree_is_black(tree->root)); +} + +struct starpu_rbtree_node * starpu_rbtree_nearest(struct starpu_rbtree_node *parent, int index, + int direction) +{ + assert(starpu_rbtree_check_index(direction)); + + if (parent == NULL) + return NULL; + + assert(starpu_rbtree_check_index(index)); + + if (index != direction) + return parent; + + return starpu_rbtree_walk(parent, direction); +} + +struct starpu_rbtree_node * starpu_rbtree_firstlast(const struct starpu_rbtree *tree, int direction) +{ + struct starpu_rbtree_node *prev, *cur; + + assert(starpu_rbtree_check_index(direction)); + + prev = NULL; + + for (cur = tree->root; cur != NULL; cur = cur->children[direction]) + prev = cur; + + return prev; +} + +struct starpu_rbtree_node * starpu_rbtree_walk(struct starpu_rbtree_node *node, int direction) +{ + int left, right; + + assert(starpu_rbtree_check_index(direction)); + + left = direction; + right = 1 - left; + + if (node == NULL) + return NULL; + + if (node->children[left] != NULL) + { + node = node->children[left]; + + while (node->children[right] != NULL) + node = node->children[right]; + } + else + { + for (;;) + { + struct starpu_rbtree_node *parent; + int index; + + parent = starpu_rbtree_parent(node); + + if (parent == NULL) + return NULL; + + index = starpu_rbtree_index(node, parent); + node = parent; + + if (index == right) + break; + } + } + + return node; +} + +/* + * Return the left-most deepest child node of the given node. + */ +static struct starpu_rbtree_node * starpu_rbtree_find_deepest(struct starpu_rbtree_node *node) +{ + struct starpu_rbtree_node *parent; + + assert(node != NULL); + + for (;;) + { + parent = node; + node = node->children[STARPU_RBTREE_LEFT]; + + if (node == NULL) + { + node = parent->children[STARPU_RBTREE_RIGHT]; + + if (node == NULL) + return parent; + } + } +} + +struct starpu_rbtree_node * starpu_rbtree_postwalk_deepest(const struct starpu_rbtree *tree) +{ + struct starpu_rbtree_node *node; + + node = tree->root; + + if (node == NULL) + return NULL; + + return starpu_rbtree_find_deepest(node); +} + +struct starpu_rbtree_node * starpu_rbtree_postwalk_unlink(struct starpu_rbtree_node *node) +{ + struct starpu_rbtree_node *parent; + int index; + + if (node == NULL) + return NULL; + + assert(node->children[STARPU_RBTREE_LEFT] == NULL); + assert(node->children[STARPU_RBTREE_RIGHT] == NULL); + + parent = starpu_rbtree_parent(node); + + if (parent == NULL) + return NULL; + + index = starpu_rbtree_index(node, parent); + parent->children[index] = NULL; + node = parent->children[STARPU_RBTREE_RIGHT]; + + if (node == NULL) + return parent; + + return starpu_rbtree_find_deepest(node); +} diff --git a/src/common/rbtree.h b/src/common/rbtree.h new file mode 100644 index 0000000..2651339 --- /dev/null +++ b/src/common/rbtree.h @@ -0,0 +1,334 @@ +/* + * Copyright (c) 2010, 2011 Richard Braun. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * + * Red-black tree. + */ + +#ifndef _KERN_RBTREE_H +#define _KERN_RBTREE_H + +/** @file */ + +#include +#include +#include +#include + +#include + +#define MACRO_BEGIN ({ +#define MACRO_END }) +/* + * Indexes of the left and right nodes in the children array of a node. + */ +#define STARPU_RBTREE_LEFT 0 +#define STARPU_RBTREE_RIGHT 1 + +/** + * Red-black node. + */ +struct starpu_rbtree_node; + +/** + * Red-black tree. + */ +struct starpu_rbtree; + +/** + * Static tree initializer. + */ +#define STARPU_RBTREE_INITIALIZER { NULL } + +#include "rbtree_i.h" + +/** + * Initialize a tree. + */ +static inline void starpu_rbtree_init(struct starpu_rbtree *tree) +{ + tree->root = NULL; +} + +/** + * This version assumes that the content of tree was already zeroed + */ +static inline void starpu_rbtree_init0(struct starpu_rbtree *tree STARPU_ATTRIBUTE_UNUSED) +{ +} + +/** + * Initialize a node. + * + * A node is in no tree when its parent points to itself. + */ +static inline void starpu_rbtree_node_init(struct starpu_rbtree_node *node) +{ + assert(starpu_rbtree_check_alignment(node)); + + node->parent = (uintptr_t)node | STARPU_RBTREE_COLOR_RED; + node->children[STARPU_RBTREE_LEFT] = NULL; + node->children[STARPU_RBTREE_RIGHT] = NULL; +} + +/** + * This version assumes that the content of node was already zeroed + */ +static inline void starpu_rbtree_node_init0(struct starpu_rbtree_node *node) +{ + assert(starpu_rbtree_check_alignment(node)); + + node->parent = (uintptr_t)node | STARPU_RBTREE_COLOR_RED; + //node->children[STARPU_RBTREE_LEFT] = NULL; + //node->children[STARPU_RBTREE_RIGHT] = NULL; +} + +/** + * Return true if node is in no tree. + */ +static inline int starpu_rbtree_node_unlinked(const struct starpu_rbtree_node *node) +{ + return starpu_rbtree_parent(node) == node; +} + +/** + * Macro that evaluates to the address of the structure containing the + * given node based on the given type and member. + */ +#define starpu_rbtree_entry(node, type, member) structof(node, type, member) + +/** + * Return true if tree is empty. + */ +static inline int starpu_rbtree_empty(const struct starpu_rbtree *tree) +{ + return tree->root == NULL; +} + +/** + * Look up a node in a tree. + * + * Note that implementing the lookup algorithm as a macro gives two benefits: + * First, it avoids the overhead of a callback function. Next, the type of the + * cmp_fn parameter isn't rigid. The only guarantee offered by this + * implementation is that the key parameter is the first parameter given to + * cmp_fn. This way, users can pass only the value they need for comparison + * instead of e.g. allocating a full structure on the stack. + * + * See starpu_rbtree_insert(). + */ +#define starpu_rbtree_lookup(tree, key, cmp_fn) \ +MACRO_BEGIN \ + struct starpu_rbtree_node *___cur; \ + int ___diff; \ + \ + ___cur = (tree)->root; \ + \ + while (___cur != NULL) { \ + ___diff = cmp_fn(key, ___cur); \ + \ + if (___diff == 0) \ + break; \ + \ + ___cur = ___cur->children[starpu_rbtree_d2i(___diff)]; \ + } \ + \ + ___cur; \ +MACRO_END + +/** + * Look up a node or one of its nearest nodes in a tree. + * + * This macro essentially acts as starpu_rbtree_lookup() but if no entry matched + * the key, an additional step is performed to obtain the next or previous + * node, depending on the direction (left or right). + * + * The constraints that apply to the key parameter are the same as for + * starpu_rbtree_lookup(). + */ +#define starpu_rbtree_lookup_nearest(tree, key, cmp_fn, dir) \ +MACRO_BEGIN \ + struct starpu_rbtree_node *___cur, *___prev; \ + int ___diff, ___index; \ + \ + ___prev = NULL; \ + ___index = -1; \ + ___cur = (tree)->root; \ + \ + while (___cur != NULL) { \ + ___diff = cmp_fn(key, ___cur); \ + \ + if (___diff == 0) \ + break; \ + \ + ___prev = ___cur; \ + ___index = starpu_rbtree_d2i(___diff); \ + ___cur = ___cur->children[___index]; \ + } \ + \ + if (___cur == NULL) \ + ___cur = starpu_rbtree_nearest(___prev, ___index, dir); \ + \ + ___cur; \ +MACRO_END + +/** + * Insert a node in a tree. + * + * This macro performs a standard lookup to obtain the insertion point of + * the given node in the tree (it is assumed that the inserted node never + * compares equal to any other entry in the tree) and links the node. It + * then checks red-black rules violations, and rebalances the tree if + * necessary. + * + * Unlike starpu_rbtree_lookup(), the cmp_fn parameter must compare two complete + * entries, so it is suggested to use two different comparison inline + * functions, such as myobj_cmp_lookup() and myobj_cmp_insert(). There is no + * guarantee about the order of the nodes given to the comparison function. + * + * See starpu_rbtree_lookup(). + */ +#define starpu_rbtree_insert(tree, node, cmp_fn) \ +MACRO_BEGIN \ + struct starpu_rbtree_node *___cur, *___prev; \ + int ___diff, ___index; \ + \ + ___prev = NULL; \ + ___index = -1; \ + ___cur = (tree)->root; \ + \ + while (___cur != NULL) { \ + ___diff = cmp_fn(node, ___cur); \ + assert(___diff != 0); \ + ___prev = ___cur; \ + ___index = starpu_rbtree_d2i(___diff); \ + ___cur = ___cur->children[___index]; \ + } \ + \ + starpu_rbtree_insert_rebalance(tree, ___prev, ___index, node); \ +MACRO_END + +/** + * Look up a node/slot pair in a tree. + * + * This macro essentially acts as starpu_rbtree_lookup() but in addition to a node, + * it also returns a slot, which identifies an insertion point in the tree. + * If the returned node is null, the slot can be used by starpu_rbtree_insert_slot() + * to insert without the overhead of an additional lookup. The slot is a + * simple uintptr_t integer. + * + * The constraints that apply to the key parameter are the same as for + * starpu_rbtree_lookup(). + */ +#define starpu_rbtree_lookup_slot(tree, key, cmp_fn, slot) \ +MACRO_BEGIN \ + struct starpu_rbtree_node *___cur, *___prev; \ + int ___diff, ___index; \ + \ + ___prev = NULL; \ + ___index = 0; \ + ___cur = (tree)->root; \ + \ + while (___cur != NULL) { \ + ___diff = cmp_fn(key, ___cur); \ + \ + if (___diff == 0) \ + break; \ + \ + ___prev = ___cur; \ + ___index = starpu_rbtree_d2i(___diff); \ + ___cur = ___cur->children[___index]; \ + } \ + \ + (slot) = starpu_rbtree_slot(___prev, ___index); \ + ___cur; \ +MACRO_END + +/** + * Insert a node at an insertion point in a tree. + * + * This macro essentially acts as starpu_rbtree_insert() except that it doesn't + * obtain the insertion point with a standard lookup. The insertion point + * is obtained by calling starpu_rbtree_lookup_slot(). In addition, the new node + * must not compare equal to an existing node in the tree (i.e. the slot + * must denote a null node). + */ +static inline void starpu_rbtree_insert_slot(struct starpu_rbtree *tree, uintptr_t slot, + struct starpu_rbtree_node *node) +{ + struct starpu_rbtree_node *parent; + int index; + + parent = starpu_rbtree_slot_parent(slot); + index = starpu_rbtree_slot_index(slot); + starpu_rbtree_insert_rebalance(tree, parent, index, node); +} + +/** + * Remove a node from a tree. + * + * After completion, the node is stale. + */ +void starpu_rbtree_remove(struct starpu_rbtree *tree, struct starpu_rbtree_node *node); + +/** + * Return the first node of a tree. + */ +/* TODO: optimize by maintaining the first node of the tree */ +#define starpu_rbtree_first(tree) starpu_rbtree_firstlast(tree, STARPU_RBTREE_LEFT) + +/** + * Return the last node of a tree. + */ +/* TODO: optimize by maintaining the first node of the tree */ +/* TODO: could be useful to optimize the case when the key being inserted is + * bigger that the biggest node */ +#define starpu_rbtree_last(tree) starpu_rbtree_firstlast(tree, STARPU_RBTREE_RIGHT) + +/** + * Return the node previous to the given node. + */ +#define starpu_rbtree_prev(node) starpu_rbtree_walk(node, STARPU_RBTREE_LEFT) + +/** + * Return the node next to the given node. + */ +#define starpu_rbtree_next(node) starpu_rbtree_walk(node, STARPU_RBTREE_RIGHT) + +/** + * Forge a loop to process all nodes of a tree, removing them when visited. + * + * This macro can only be used to destroy a tree, so that the resources used + * by the entries can be released by the user. It basically removes all nodes + * without doing any color checking. + * + * After completion, all nodes and the tree root member are stale. + */ +#define starpu_rbtree_for_each_remove(tree, node, tmp) \ + for (node = starpu_rbtree_postwalk_deepest(tree), \ + tmp = starpu_rbtree_postwalk_unlink(node); \ + node != NULL; \ + node = tmp, tmp = starpu_rbtree_postwalk_unlink(node)) \ + +#endif /* _KERN_RBTREE_H */ diff --git a/src/common/rbtree_i.h b/src/common/rbtree_i.h new file mode 100644 index 0000000..cf4d309 --- /dev/null +++ b/src/common/rbtree_i.h @@ -0,0 +1,190 @@ +/* + * Copyright (c) 2010, 2011 Richard Braun. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _KERN_RBTREE_I_H +#define _KERN_RBTREE_I_H + +#include + +/** @file */ + +/** + * Red-black node structure. + * + * To reduce the number of branches and the instruction cache footprint, + * the left and right child pointers are stored in an array, and the symmetry + * of most tree operations is exploited by using left/right variables when + * referring to children. + * + * In addition, this implementation assumes that all nodes are 4-byte aligned, + * so that the least significant bit of the parent member can be used to store + * the color of the node. This is true for all modern 32 and 64 bits + * architectures, as long as the nodes aren't embedded in structures with + * special alignment constraints such as member packing. + */ +struct starpu_rbtree_node +{ + uintptr_t parent; + struct starpu_rbtree_node *children[2]; +}; + +/** + * Red-black tree structure. + */ +struct starpu_rbtree +{ + struct starpu_rbtree_node *root; +}; + +/** + * Masks applied on the parent member of a node to obtain either the + * color or the parent address. + */ +#define STARPU_RBTREE_COLOR_MASK ((uintptr_t) 0x1) +#define STARPU_RBTREE_PARENT_MASK (~((uintptr_t) 0x3)) + +/** + * Node colors. + */ +#define STARPU_RBTREE_COLOR_RED 0 +#define STARPU_RBTREE_COLOR_BLACK 1 + +/** + * Masks applied on slots to obtain either the child index or the parent + * address. + */ +#define STARPU_RBTREE_SLOT_INDEX_MASK ((uintptr_t) 0x1) +#define STARPU_RBTREE_SLOT_PARENT_MASK (~STARPU_RBTREE_SLOT_INDEX_MASK) + +/** + * Return true if the given pointer is suitably aligned. + */ +static inline int starpu_rbtree_check_alignment(const struct starpu_rbtree_node *node) +{ + return ((uintptr_t)node & (~STARPU_RBTREE_PARENT_MASK)) == 0; +} + +/** + * Return true if the given index is a valid child index. + */ +static inline int starpu_rbtree_check_index(int index) +{ + return index == (index & 1); +} + +/** + * Convert the result of a comparison into an index in the children array + * (0 or 1). + * + * This function is mostly used when looking up a node. + */ +static inline int starpu_rbtree_d2i(int diff) +{ + return !(diff <= 0); +} + +/** + * Return the parent of a node. + */ +static inline struct starpu_rbtree_node * starpu_rbtree_parent(const struct starpu_rbtree_node *node) +{ + return (struct starpu_rbtree_node *)(node->parent & STARPU_RBTREE_PARENT_MASK); +} + +/** + * Translate an insertion point into a slot. + */ +static inline uintptr_t starpu_rbtree_slot(struct starpu_rbtree_node *parent, int index) +{ + assert(starpu_rbtree_check_alignment(parent)); + assert(starpu_rbtree_check_index(index)); + return (uintptr_t)parent | index; +} + +/** + * Extract the parent address from a slot. + */ +static inline struct starpu_rbtree_node * starpu_rbtree_slot_parent(uintptr_t slot) +{ + return (struct starpu_rbtree_node *)(slot & STARPU_RBTREE_SLOT_PARENT_MASK); +} + +/** + * Extract the index from a slot. + */ +static inline int starpu_rbtree_slot_index(uintptr_t slot) +{ + return slot & STARPU_RBTREE_SLOT_INDEX_MASK; +} + +/** + * Insert a node in a tree, rebalancing it if necessary. + * + * The index parameter is the index in the children array of the parent where + * the new node is to be inserted. It is ignored if the parent is null. + * + * This function is intended to be used by the starpu_rbtree_insert() macro only. + */ +void starpu_rbtree_insert_rebalance(struct starpu_rbtree *tree, struct starpu_rbtree_node *parent, + int index, struct starpu_rbtree_node *node); + +/** + * Return the previous or next node relative to a location in a tree. + * + * The parent and index parameters define the location, which can be empty. + * The direction parameter is either STARPU_RBTREE_LEFT (to obtain the previous + * node) or STARPU_RBTREE_RIGHT (to obtain the next one). + */ +struct starpu_rbtree_node * starpu_rbtree_nearest(struct starpu_rbtree_node *parent, int index, + int direction); + +/** + * Return the first or last node of a tree. + * + * The direction parameter is either STARPU_RBTREE_LEFT (to obtain the first node) + * or STARPU_RBTREE_RIGHT (to obtain the last one). + */ +struct starpu_rbtree_node * starpu_rbtree_firstlast(const struct starpu_rbtree *tree, int direction); + +/** + * Return the node next to, or previous to the given node. + * + * The direction parameter is either STARPU_RBTREE_LEFT (to obtain the previous node) + * or STARPU_RBTREE_RIGHT (to obtain the next one). + */ +struct starpu_rbtree_node * starpu_rbtree_walk(struct starpu_rbtree_node *node, int direction); + +/** + * Return the left-most deepest node of a tree, which is the starting point of + * the postorder traversal performed by starpu_rbtree_for_each_remove(). + */ +struct starpu_rbtree_node * starpu_rbtree_postwalk_deepest(const struct starpu_rbtree *tree); + +/** + * Unlink a node from its tree and return the next (right) node in postorder. + */ +struct starpu_rbtree_node * starpu_rbtree_postwalk_unlink(struct starpu_rbtree_node *node); + +#endif /* _KERN_RBTREE_I_H */ diff --git a/src/common/rwlock.c b/src/common/rwlock.c new file mode 100644 index 0000000..64cff43 --- /dev/null +++ b/src/common/rwlock.c @@ -0,0 +1,162 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/** + * A dummy implementation of a rw_lock using spinlocks ... + */ + +#include "rwlock.h" + +static void _starpu_take_busy_lock(struct _starpu_rw_lock *lock) +{ + uint32_t prev; + do + { + prev = STARPU_TEST_AND_SET(&lock->busy, 1); + } + while (prev); +} + +static void _starpu_release_busy_lock(struct _starpu_rw_lock *lock) +{ + STARPU_RELEASE(&lock->busy); +} + +void _starpu_init_rw_lock(struct _starpu_rw_lock *lock) +{ + STARPU_ASSERT(lock); + + lock->writer = 0; + lock->readercnt = 0; + lock->busy = 0; +} + + +int _starpu_take_rw_lock_write_try(struct _starpu_rw_lock *lock) +{ + _starpu_take_busy_lock(lock); + + if (lock->readercnt > 0 || lock->writer) + { + /* fail to take the lock */ + _starpu_release_busy_lock(lock); + return -1; + } + else + { + STARPU_ASSERT(lock->readercnt == 0); + STARPU_ASSERT(lock->writer == 0); + + /* no one was either writing nor reading */ + lock->writer = 1; + _starpu_release_busy_lock(lock); + return 0; + } +} + +int _starpu_take_rw_lock_read_try(struct _starpu_rw_lock *lock) +{ + _starpu_take_busy_lock(lock); + + if (lock->writer) + { + /* there is a writer ... */ + _starpu_release_busy_lock(lock); + return -1; + } + else + { + STARPU_ASSERT(lock->writer == 0); + + /* no one is writing */ + /* XXX check wrap arounds ... */ + lock->readercnt++; + _starpu_release_busy_lock(lock); + + return 0; + } +} + + + +void _starpu_take_rw_lock_write(struct _starpu_rw_lock *lock) +{ + do + { + _starpu_take_busy_lock(lock); + + if (lock->readercnt > 0 || lock->writer) + { + /* fail to take the lock */ + _starpu_release_busy_lock(lock); + } + else + { + STARPU_ASSERT(lock->readercnt == 0); + STARPU_ASSERT(lock->writer == 0); + + /* no one was either writing nor reading */ + lock->writer = 1; + _starpu_release_busy_lock(lock); + return; + } + } + while (1); +} + +void _starpu_take_rw_lock_read(struct _starpu_rw_lock *lock) +{ + do + { + _starpu_take_busy_lock(lock); + + if (lock->writer) + { + /* there is a writer ... */ + _starpu_release_busy_lock(lock); + } + else + { + STARPU_ASSERT(lock->writer == 0); + + /* no one is writing */ + /* XXX check wrap arounds ... */ + lock->readercnt++; + _starpu_release_busy_lock(lock); + + return; + } + } + while (1); +} + +void _starpu_release_rw_lock(struct _starpu_rw_lock *lock) +{ + _starpu_take_busy_lock(lock); + /* either writer or reader (exactly one !) */ + if (lock->writer) + { + STARPU_ASSERT(lock->readercnt == 0); + lock->writer = 0; + } + else + { + /* reading mode */ + STARPU_ASSERT(lock->writer == 0); + lock->readercnt--; + } + _starpu_release_busy_lock(lock); +} diff --git a/src/common/rwlock.h b/src/common/rwlock.h new file mode 100644 index 0000000..1844387 --- /dev/null +++ b/src/common/rwlock.h @@ -0,0 +1,57 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __RWLOCKS_H__ +#define __RWLOCKS_H__ + +#include +#include + +#pragma GCC visibility push(hidden) + +/** @file */ + +/** Dummy implementation of a RW-lock using a spinlock. */ +struct _starpu_rw_lock +{ + uint32_t busy; + uint8_t writer; + uint16_t readercnt; +}; + +/** Initialize the RW-lock */ +void _starpu_init_rw_lock(struct _starpu_rw_lock *lock); + +/** Grab the RW-lock in a write mode */ +void _starpu_take_rw_lock_write(struct _starpu_rw_lock *lock); + +/** Grab the RW-lock in a read mode */ +void _starpu_take_rw_lock_read(struct _starpu_rw_lock *lock); + +/** Try to grab the RW-lock in a write mode. Returns 0 in case of success, -1 + * otherwise. */ +int _starpu_take_rw_lock_write_try(struct _starpu_rw_lock *lock); + +/** Try to grab the RW-lock in a read mode. Returns 0 in case of success, -1 + * otherwise. */ +int _starpu_take_rw_lock_read_try(struct _starpu_rw_lock *lock); + +/** Unlock the RW-lock. */ +void _starpu_release_rw_lock(struct _starpu_rw_lock *lock); + +#pragma GCC visibility pop + +#endif diff --git a/src/common/starpu_spinlock.c b/src/common/starpu_spinlock.c new file mode 100644 index 0000000..41c856d --- /dev/null +++ b/src/common/starpu_spinlock.c @@ -0,0 +1,43 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include + +#if defined(STARPU_SPINLOCK_CHECK) +int _starpu_spin_init(struct _starpu_spinlock *lock) +{ + starpu_pthread_mutexattr_t errcheck_attr; + int ret; + ret = starpu_pthread_mutexattr_init(&errcheck_attr); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_pthread_mutexattr_init"); + + ret = starpu_pthread_mutexattr_settype(&errcheck_attr, PTHREAD_MUTEX_ERRORCHECK); + STARPU_ASSERT(!ret); + + ret = starpu_pthread_mutex_init(&lock->errcheck_lock, &errcheck_attr); + starpu_pthread_mutexattr_destroy(&errcheck_attr); + return ret; +} + +int _starpu_spin_destroy(struct _starpu_spinlock *lock) +{ + return starpu_pthread_mutex_destroy(&lock->errcheck_lock); +} +#endif diff --git a/src/common/starpu_spinlock.h b/src/common/starpu_spinlock.h new file mode 100644 index 0000000..1ab4239 --- /dev/null +++ b/src/common/starpu_spinlock.h @@ -0,0 +1,135 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ +#ifndef __STARPU_SPINLOCK_H__ +#define __STARPU_SPINLOCK_H__ + +/** @file */ + +#include +#include +#include +#include +#include +#include + +#ifdef STARPU_SPINLOCK_CHECK + +/* We don't care about performance */ + +struct _starpu_spinlock +{ + starpu_pthread_mutex_t errcheck_lock; + const char *last_taker; +}; + +int _starpu_spin_init(struct _starpu_spinlock *lock); +int _starpu_spin_destroy(struct _starpu_spinlock *lock); + +static inline int __starpu_spin_lock(struct _starpu_spinlock *lock, const char *file STARPU_ATTRIBUTE_UNUSED, int line STARPU_ATTRIBUTE_UNUSED, const char *func STARPU_ATTRIBUTE_UNUSED) +{ + _STARPU_TRACE_LOCKING_SPINLOCK(file, line); + int ret = starpu_pthread_mutex_lock(&lock->errcheck_lock); + STARPU_ASSERT(!ret); + lock->last_taker = func; + _STARPU_TRACE_SPINLOCK_LOCKED(file, line); + return ret; +} + +static inline void _starpu_spin_checklocked(struct _starpu_spinlock *lock STARPU_ATTRIBUTE_UNUSED) +{ + STARPU_ASSERT(starpu_pthread_mutex_trylock(&lock->errcheck_lock) != 0); +} + +static inline int __starpu_spin_trylock(struct _starpu_spinlock *lock, const char *file STARPU_ATTRIBUTE_UNUSED, int line STARPU_ATTRIBUTE_UNUSED, const char *func STARPU_ATTRIBUTE_UNUSED) +{ + _STARPU_TRACE_TRYLOCK_SPINLOCK(file, line); + int ret = starpu_pthread_mutex_trylock(&lock->errcheck_lock); + STARPU_ASSERT(!ret || (ret == EBUSY)); + if (STARPU_LIKELY(!ret)) + { + lock->last_taker = func; + _STARPU_TRACE_SPINLOCK_LOCKED(file, line); + } + return ret; +} + +static inline int __starpu_spin_unlock(struct _starpu_spinlock *lock, const char *file STARPU_ATTRIBUTE_UNUSED, int line STARPU_ATTRIBUTE_UNUSED, const char *func STARPU_ATTRIBUTE_UNUSED) +{ + _STARPU_TRACE_UNLOCKING_SPINLOCK(file, line); + int ret = starpu_pthread_mutex_unlock(&lock->errcheck_lock); + STARPU_ASSERT(!ret); + _STARPU_TRACE_SPINLOCK_UNLOCKED(file, line); + return ret; +} +#else + +/* We do care about performance, inline as much as possible */ + +struct _starpu_spinlock +{ + starpu_pthread_spinlock_t lock; +}; + +static inline int _starpu_spin_init(struct _starpu_spinlock *lock) +{ + int ret = starpu_pthread_spin_init(&lock->lock, 0); + STARPU_ASSERT(!ret); + return ret; +} + +#define _starpu_spin_destroy(_lock) starpu_pthread_spin_destroy(&(_lock)->lock) + +static inline int __starpu_spin_lock(struct _starpu_spinlock *lock, const char *file STARPU_ATTRIBUTE_UNUSED, int line STARPU_ATTRIBUTE_UNUSED, const char *func STARPU_ATTRIBUTE_UNUSED) +{ + _STARPU_TRACE_LOCKING_SPINLOCK(file, line); + int ret = starpu_pthread_spin_lock(&lock->lock); + STARPU_ASSERT(!ret); + _STARPU_TRACE_SPINLOCK_LOCKED(file, line); + return ret; +} + +#define _starpu_spin_checklocked(_lock) _starpu_pthread_spin_checklocked(&(_lock)->lock) + +static inline int __starpu_spin_trylock(struct _starpu_spinlock *lock, const char *file STARPU_ATTRIBUTE_UNUSED, int line STARPU_ATTRIBUTE_UNUSED, const char *func STARPU_ATTRIBUTE_UNUSED) +{ + _STARPU_TRACE_TRYLOCK_SPINLOCK(file, line); + int ret = starpu_pthread_spin_trylock(&lock->lock); + STARPU_ASSERT(!ret || (ret == EBUSY)); + if (STARPU_LIKELY(!ret)) + _STARPU_TRACE_SPINLOCK_LOCKED(file, line); + return ret; +} + +static inline int __starpu_spin_unlock(struct _starpu_spinlock *lock, const char *file STARPU_ATTRIBUTE_UNUSED, int line STARPU_ATTRIBUTE_UNUSED, const char *func STARPU_ATTRIBUTE_UNUSED) +{ + _STARPU_TRACE_UNLOCKING_SPINLOCK(file, line); + int ret = starpu_pthread_spin_unlock(&lock->lock); + STARPU_ASSERT(!ret); + _STARPU_TRACE_SPINLOCK_UNLOCKED(file, line); + return ret; +} +#endif + +#define _starpu_spin_lock(lock) \ + __starpu_spin_lock(lock, __FILE__, __LINE__, __starpu_func__) +#define _starpu_spin_trylock(lock) \ + __starpu_spin_trylock(lock, __FILE__, __LINE__, __starpu_func__) +#define _starpu_spin_unlock(lock) \ + __starpu_spin_unlock(lock, __FILE__, __LINE__, __starpu_func__) + +#define STARPU_SPIN_MAXTRY 10 + +#endif // __STARPU_SPINLOCK_H__ diff --git a/src/common/thread.c b/src/common/thread.c new file mode 100644 index 0000000..71ca2a0 --- /dev/null +++ b/src/common/thread.c @@ -0,0 +1,1196 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#ifdef STARPU_DEBUG +#include +#endif +#include +#include +#include + +#include +#include + +#ifdef STARPU_SIMGRID +#ifdef STARPU_HAVE_SIMGRID_MUTEX_H +#include +#include +#elif defined(STARPU_HAVE_XBT_SYNCHRO_H) +#include +#else +#include +#endif +#include +#if !defined(HAVE_SG_ACTOR_GET_DATA) && !defined(HAVE_SG_ACTOR_DATA) && \ + (defined(HAVE_SMPI_PROCESS_SET_USER_DATA) || defined(smpi_process_get_user_data)) && \ + !(defined(HAVE_MSG_PROCESS_SELF_NAME) || defined(MSG_process_self_name)) +#include +#endif +#else + +#if defined(STARPU_LINUX_SYS) && defined(STARPU_HAVE_XCHG) +#include +#include + +/* Private futexes are not so old, cope with old kernels. */ +#ifdef FUTEX_WAIT_PRIVATE +static int _starpu_futex_wait = FUTEX_WAIT_PRIVATE; +static int _starpu_futex_wake = FUTEX_WAKE_PRIVATE; +#else +static int _starpu_futex_wait = FUTEX_WAIT; +static int _starpu_futex_wake = FUTEX_WAKE; +#endif + +#endif +#endif /* !STARPU_SIMGRID */ + +#ifdef STARPU_SIMGRID + +int starpu_pthread_equal(starpu_pthread_t t1, starpu_pthread_t t2) +{ + return t1 == t2; +} + +starpu_pthread_t starpu_pthread_self(void) +{ +#ifdef HAVE_SG_ACTOR_SELF + return sg_actor_self(); +#else + return MSG_process_self(); +#endif +} + +int starpu_pthread_create_on(const char *name, starpu_pthread_t *thread, const starpu_pthread_attr_t *attr STARPU_ATTRIBUTE_UNUSED, void *(*start_routine) (void *), void *arg, starpu_sg_host_t host) +{ + char **_args; + int ret; + _STARPU_MALLOC(_args, 3*sizeof(char*)); + ret = asprintf(&_args[0], "%p", start_routine); + STARPU_ASSERT(ret); + ret = asprintf(&_args[1], "%p", arg); + STARPU_ASSERT(ret); + _args[2] = NULL; + if (!host) + host = _starpu_simgrid_get_host_by_name("MAIN"); + + void *tsd; + _STARPU_CALLOC(tsd, MAX_TSD+1, sizeof(void*)); + +#ifndef HAVE_SG_ACTOR_SET_STACKSIZE + if (attr && attr->stacksize) + _starpu_simgrid_set_stack_size(attr->stacksize); +#endif +#ifdef HAVE_SG_ACTOR_INIT + *thread= sg_actor_init(name, host); +#ifdef HAVE_SG_ACTOR_SET_STACKSIZE + if (attr && attr->stacksize) + sg_actor_set_stacksize(*thread, attr->stacksize); +#endif +#ifdef HAVE_SG_ACTOR_SET_DATA + sg_actor_set_data(*thread, tsd); +#else + sg_actor_data_set(*thread, tsd); +#endif + sg_actor_start(*thread, _starpu_simgrid_thread_start, 2, _args); +#else + *thread = MSG_process_create_with_arguments(name, _starpu_simgrid_thread_start, tsd, host, 2, _args); +#if defined(HAVE_SG_ACTOR_DATA) || defined(HAVE_SG_ACTOR_GET_DATA) +#ifdef HAVE_SG_ACTOR_SET_DATA + sg_actor_set_data(*thread, tsd); +#else + sg_actor_data_set(*thread, tsd); +#endif +#endif +#endif +#ifndef HAVE_SG_ACTOR_SET_STACKSIZE + if (attr && attr->stacksize) + _starpu_simgrid_set_stack_size(_starpu_default_stack_size); +#endif + +#if SIMGRID_VERSION >= 31500 && SIMGRID_VERSION != 31559 +# ifdef HAVE_SG_ACTOR_REF + sg_actor_ref(*thread); +# else + MSG_process_ref(*thread); +# endif +#endif + return 0; +} + +int starpu_pthread_create(starpu_pthread_t *thread, const starpu_pthread_attr_t *attr, void *(*start_routine) (void *), void *arg) +{ + return starpu_pthread_create_on("", thread, attr, start_routine, arg, NULL); +} + +int starpu_pthread_join(starpu_pthread_t thread STARPU_ATTRIBUTE_UNUSED, void **retval STARPU_ATTRIBUTE_UNUSED) +{ +#if SIMGRID_VERSION >= 31400 +# ifdef STARPU_HAVE_SIMGRID_ACTOR_H + sg_actor_join(thread, 1000000); +# else + MSG_process_join(thread, 1000000); +# endif +#if SIMGRID_VERSION >= 31500 && SIMGRID_VERSION != 31559 +# ifdef HAVE_SG_ACTOR_REF + sg_actor_unref(thread); +# else + MSG_process_unref(thread); +# endif +#endif +#else + starpu_sleep(1); +#endif + return 0; +} + +int starpu_pthread_detach(starpu_pthread_t thread STARPU_ATTRIBUTE_UNUSED) +{ +#if SIMGRID_VERSION >= 31500 && SIMGRID_VERSION != 31559 +# ifdef HAVE_SG_ACTOR_REF + sg_actor_unref(thread); +# else + MSG_process_unref(thread); +# endif +#endif + return 0; +} + +int starpu_pthread_exit(void *retval STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef HAVE_SG_ACTOR_SELF + sg_actor_kill(sg_actor_self()); +#else + MSG_process_kill(MSG_process_self()); +#endif + STARPU_ABORT_MSG("MSG_process_kill(MSG_process_self()) returned?!"); +} + + +int starpu_pthread_attr_init(starpu_pthread_attr_t *attr) +{ + attr->stacksize = 0; + return 0; +} + +int starpu_pthread_attr_destroy(starpu_pthread_attr_t *attr STARPU_ATTRIBUTE_UNUSED) +{ + return 0; +} + +int starpu_pthread_attr_setstacksize(starpu_pthread_attr_t *attr, size_t stacksize) +{ + attr->stacksize = stacksize; + return 0; +} + +int starpu_pthread_attr_setdetachstate(starpu_pthread_attr_t *attr STARPU_ATTRIBUTE_UNUSED, int detachstate STARPU_ATTRIBUTE_UNUSED) +{ + return 0; +} + +int starpu_pthread_mutex_init(starpu_pthread_mutex_t *mutex, const starpu_pthread_mutexattr_t *mutexattr STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_HAVE_SIMGRID_MUTEX_H + *mutex = sg_mutex_init(); +#else + *mutex = xbt_mutex_init(); +#endif + return 0; +} + +int starpu_pthread_mutex_destroy(starpu_pthread_mutex_t *mutex) +{ + if (*mutex) +#ifdef STARPU_HAVE_SIMGRID_MUTEX_H + sg_mutex_destroy(*mutex); +#else + xbt_mutex_destroy(*mutex); +#endif + return 0; +} + +int starpu_pthread_mutex_lock(starpu_pthread_mutex_t *mutex) +{ + _STARPU_TRACE_LOCKING_MUTEX(); + + /* Note: this is actually safe, because simgrid only preempts within + * simgrid functions */ + if (!*mutex) + { + /* Here we may get preempted */ +#ifdef STARPU_HAVE_SIMGRID_MUTEX_H + sg_mutex_t new_mutex = sg_mutex_init(); +#else + xbt_mutex_t new_mutex = xbt_mutex_init(); +#endif + if (!*mutex) + *mutex = new_mutex; + else + /* Somebody already initialized it while we were + * calling sg_mutex_init, this one is now useless */ +#ifdef STARPU_HAVE_SIMGRID_MUTEX_H + sg_mutex_destroy(new_mutex); +#else + xbt_mutex_destroy(new_mutex); +#endif + } + +#ifdef STARPU_HAVE_SIMGRID_MUTEX_H + sg_mutex_lock(*mutex); +#else + xbt_mutex_acquire(*mutex); +#endif + + _STARPU_TRACE_MUTEX_LOCKED(); + + return 0; +} + +int starpu_pthread_mutex_unlock(starpu_pthread_mutex_t *mutex) +{ + _STARPU_TRACE_UNLOCKING_MUTEX(); + +#ifdef STARPU_HAVE_SIMGRID_MUTEX_H + sg_mutex_unlock(*mutex); +#else + xbt_mutex_release(*mutex); +#endif + + _STARPU_TRACE_MUTEX_UNLOCKED(); + + return 0; +} + +int starpu_pthread_mutex_trylock(starpu_pthread_mutex_t *mutex) +{ + int ret; + _STARPU_TRACE_TRYLOCK_MUTEX(); + +#ifdef STARPU_HAVE_SIMGRID_MUTEX_H + ret = sg_mutex_try_lock(*mutex); +#elif defined(HAVE_XBT_MUTEX_TRY_ACQUIRE) || defined(xbt_mutex_try_acquire) + ret = xbt_mutex_try_acquire(*mutex); +#else + ret = simcall_mutex_trylock((smx_mutex_t)*mutex); +#endif + ret = ret ? 0 : EBUSY; + + _STARPU_TRACE_MUTEX_LOCKED(); + + return ret; +} + +int starpu_pthread_mutexattr_gettype(const starpu_pthread_mutexattr_t *attr STARPU_ATTRIBUTE_UNUSED, int *type STARPU_ATTRIBUTE_UNUSED) +{ + return 0; +} + +int starpu_pthread_mutexattr_settype(starpu_pthread_mutexattr_t *attr STARPU_ATTRIBUTE_UNUSED, int type STARPU_ATTRIBUTE_UNUSED) +{ + return 0; +} + +int starpu_pthread_mutexattr_destroy(starpu_pthread_mutexattr_t *attr STARPU_ATTRIBUTE_UNUSED) +{ + return 0; +} + +int starpu_pthread_mutexattr_init(starpu_pthread_mutexattr_t *attr STARPU_ATTRIBUTE_UNUSED) +{ + return 0; +} + + +/* Indexed by key-1 */ +static int used_key[MAX_TSD]; + +int starpu_pthread_key_create(starpu_pthread_key_t *key, void (*destr_function) (void *) STARPU_ATTRIBUTE_UNUSED) +{ + unsigned i; + + /* Note: no synchronization here, we are actually monothreaded anyway. */ + for (i = 0; i < MAX_TSD; i++) + { + if (!used_key[i]) + { + used_key[i] = 1; + break; + } + } + STARPU_ASSERT(i < MAX_TSD); + /* key 0 is for process pointer argument */ + *key = i+1; + return 0; +} + +int starpu_pthread_key_delete(starpu_pthread_key_t key) +{ + used_key[key-1] = 0; + return 0; +} + +/* We need it only when using smpi */ +#pragma weak smpi_process_get_user_data +#if !HAVE_DECL_SMPI_PROCESS_SET_USER_DATA && !defined(smpi_process_get_user_data) +extern void *smpi_process_get_user_data(); +#endif + +int starpu_pthread_setspecific(starpu_pthread_key_t key, const void *pointer) +{ + void **array; +#ifdef HAVE_SG_ACTOR_GET_DATA + array = sg_actor_get_data(sg_actor_self()); +#elif defined(HAVE_SG_ACTOR_DATA) + array = sg_actor_data(sg_actor_self()); +#else +#if defined(HAVE_SMPI_PROCESS_SET_USER_DATA) || defined(smpi_process_get_user_data) +#if defined(HAVE_MSG_PROCESS_SELF_NAME) || defined(MSG_process_self_name) + const char *process_name = MSG_process_self_name(); +#else + const char *process_name = SIMIX_process_self_get_name(); +#endif + char *end; + /* Test whether it is an MPI rank */ + strtol(process_name, &end, 10); + if (!*end || !strcmp(process_name, "wait for mpi transfer") || + (!strcmp(process_name, "main") && _starpu_simgrid_running_smpi())) + /* Special-case the SMPI process */ + array = smpi_process_get_user_data(); + else +#endif + array = MSG_process_get_data(MSG_process_self()); +#endif + array[key] = (void*) pointer; + return 0; +} + +void* starpu_pthread_getspecific(starpu_pthread_key_t key) +{ + void **array; +#ifdef HAVE_SG_ACTOR_GET_DATA + array = sg_actor_get_data(sg_actor_self()); +#elif defined(HAVE_SG_ACTOR_DATA) + array = sg_actor_data(sg_actor_self()); +#else +#if defined(HAVE_SMPI_PROCESS_SET_USER_DATA) || defined(smpi_process_get_user_data) +#if defined(HAVE_MSG_PROCESS_SELF_NAME) || defined(MSG_process_self_name) + const char *process_name = MSG_process_self_name(); +#else + const char *process_name = SIMIX_process_self_get_name(); +#endif + char *end; + /* Test whether it is an MPI rank */ + strtol(process_name, &end, 10); + if (!*end || !strcmp(process_name, "wait for mpi transfer") || + (!strcmp(process_name, "main") && _starpu_simgrid_running_smpi())) + /* Special-case the SMPI processes */ + array = smpi_process_get_user_data(); + else +#endif + array = MSG_process_get_data(MSG_process_self()); +#endif + if (!array) + return NULL; + return array[key]; +} + +int starpu_pthread_cond_init(starpu_pthread_cond_t *cond, starpu_pthread_condattr_t *cond_attr STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_HAVE_SIMGRID_COND_H + *cond = sg_cond_init(); +#else + *cond = xbt_cond_init(); +#endif + return 0; +} + +static void _starpu_pthread_cond_auto_init(starpu_pthread_cond_t *cond) +{ + /* Note: this is actually safe, because simgrid only preempts within + * simgrid functions */ + if (!*cond) + { + /* Here we may get preempted */ +#ifdef STARPU_HAVE_SIMGRID_COND_H + sg_cond_t new_cond = sg_cond_init(); +#else + xbt_cond_t new_cond = xbt_cond_init(); +#endif + if (!*cond) + *cond = new_cond; + else + /* Somebody already initialized it while we were + * calling xbt_cond_init, this one is now useless */ +#ifdef STARPU_HAVE_SIMGRID_COND_H + sg_cond_destroy(new_cond); +#else + xbt_cond_destroy(new_cond); +#endif + } +} + +int starpu_pthread_cond_signal(starpu_pthread_cond_t *cond) +{ + _starpu_pthread_cond_auto_init(cond); +#ifdef STARPU_HAVE_SIMGRID_COND_H + sg_cond_notify_one(*cond); +#else + xbt_cond_signal(*cond); +#endif + return 0; +} + +int starpu_pthread_cond_broadcast(starpu_pthread_cond_t *cond) +{ + _starpu_pthread_cond_auto_init(cond); +#ifdef STARPU_HAVE_SIMGRID_COND_H + sg_cond_notify_all(*cond); +#else + xbt_cond_broadcast(*cond); +#endif + return 0; +} + +int starpu_pthread_cond_wait(starpu_pthread_cond_t *cond, starpu_pthread_mutex_t *mutex) +{ + _STARPU_TRACE_COND_WAIT_BEGIN(); + + _starpu_pthread_cond_auto_init(cond); +#ifdef STARPU_HAVE_SIMGRID_COND_H + sg_cond_wait(*cond, *mutex); +#else + xbt_cond_wait(*cond, *mutex); +#endif + + _STARPU_TRACE_COND_WAIT_END(); + + return 0; +} + +int starpu_pthread_cond_timedwait(starpu_pthread_cond_t *cond, starpu_pthread_mutex_t *mutex, const struct timespec *abstime) +{ +#if SIMGRID_VERSION >= 31800 + struct timespec now, delta; + double delay; + int ret = 0; + _starpu_clock_gettime(&now); + delta.tv_sec = abstime->tv_sec - now.tv_sec; + delta.tv_nsec = abstime->tv_nsec - now.tv_nsec; + delay = (double) delta.tv_sec + (double) delta.tv_nsec / 1000000000.; + + _STARPU_TRACE_COND_WAIT_BEGIN(); + + _starpu_pthread_cond_auto_init(cond); +#ifdef STARPU_HAVE_SIMGRID_COND_H + ret = sg_cond_wait_for(*cond, *mutex, delay) ? ETIMEDOUT : 0; +#else + ret = xbt_cond_timedwait(*cond, *mutex, delay) ? ETIMEDOUT : 0; +#endif + + _STARPU_TRACE_COND_WAIT_END(); + + return ret; +#else + STARPU_ASSERT_MSG(0, "simgrid version is too old for this"); +#endif +} + +int starpu_pthread_cond_destroy(starpu_pthread_cond_t *cond) +{ + if (*cond) +#ifdef STARPU_HAVE_SIMGRID_COND_H + sg_cond_destroy(*cond); +#else + xbt_cond_destroy(*cond); +#endif + return 0; +} + +/* TODO: use rwlocks + * https://framagit.org/simgrid/simgrid/-/issues/92 + */ +int starpu_pthread_rwlock_init(starpu_pthread_rwlock_t *restrict rwlock, const starpu_pthread_rwlockattr_t *restrict attr STARPU_ATTRIBUTE_UNUSED) +{ + return starpu_pthread_mutex_init(rwlock, NULL); +} + +int starpu_pthread_rwlock_destroy(starpu_pthread_rwlock_t *rwlock) +{ + return starpu_pthread_mutex_destroy(rwlock); +} + +int starpu_pthread_rwlock_rdlock(starpu_pthread_rwlock_t *rwlock) +{ + _STARPU_TRACE_RDLOCKING_RWLOCK(); + + int p_ret = starpu_pthread_mutex_lock(rwlock); + + _STARPU_TRACE_RWLOCK_RDLOCKED(); + + return p_ret; +} + +int starpu_pthread_rwlock_tryrdlock(starpu_pthread_rwlock_t *rwlock) +{ + int p_ret = starpu_pthread_mutex_trylock(rwlock); + + if (!p_ret) + _STARPU_TRACE_RWLOCK_RDLOCKED(); + + return p_ret; +} + +int starpu_pthread_rwlock_wrlock(starpu_pthread_rwlock_t *rwlock) +{ + _STARPU_TRACE_WRLOCKING_RWLOCK(); + + int p_ret = starpu_pthread_mutex_lock(rwlock); + + _STARPU_TRACE_RWLOCK_WRLOCKED(); + + return p_ret; +} + +int starpu_pthread_rwlock_trywrlock(starpu_pthread_rwlock_t *rwlock) +{ + int p_ret = starpu_pthread_mutex_trylock(rwlock); + + if (!p_ret) + _STARPU_TRACE_RWLOCK_RDLOCKED(); + + return p_ret; +} + + +int starpu_pthread_rwlock_unlock(starpu_pthread_rwlock_t *rwlock) +{ + _STARPU_TRACE_UNLOCKING_RWLOCK(); + + int p_ret = starpu_pthread_mutex_unlock(rwlock); + + _STARPU_TRACE_RWLOCK_UNLOCKED(); + + return p_ret; +} + +#ifdef STARPU_HAVE_SIMGRID_BARRIER_H +int starpu_pthread_barrier_init(starpu_pthread_barrier_t *restrict barrier, const starpu_pthread_barrierattr_t *restrict attr STARPU_ATTRIBUTE_UNUSED, unsigned count) +{ + *barrier = sg_barrier_init(count); + return 0; +} + +int starpu_pthread_barrier_destroy(starpu_pthread_barrier_t *barrier) +{ + if (*barrier) + sg_barrier_destroy(*barrier); + return 0; +} + +int starpu_pthread_barrier_wait(starpu_pthread_barrier_t *barrier) +{ + int ret; + + _STARPU_TRACE_BARRIER_WAIT_BEGIN(); + + ret = sg_barrier_wait(*barrier); + + _STARPU_TRACE_BARRIER_WAIT_END(); + return ret; +} +#elif defined(STARPU_SIMGRID_HAVE_XBT_BARRIER_INIT) || defined(xbt_barrier_init) +int starpu_pthread_barrier_init(starpu_pthread_barrier_t *restrict barrier, const starpu_pthread_barrierattr_t *restrict attr STARPU_ATTRIBUTE_UNUSED, unsigned count) +{ + *barrier = xbt_barrier_init(count); + return 0; +} + +int starpu_pthread_barrier_destroy(starpu_pthread_barrier_t *barrier) +{ + if (*barrier) + xbt_barrier_destroy(*barrier); + return 0; +} + +int starpu_pthread_barrier_wait(starpu_pthread_barrier_t *barrier) +{ + int ret; + + _STARPU_TRACE_BARRIER_WAIT_BEGIN(); + + ret = xbt_barrier_wait(*barrier); + + _STARPU_TRACE_BARRIER_WAIT_END(); + return ret; +} +#endif /* defined(STARPU_SIMGRID_HAVE_XBT_BARRIER_INIT) */ + +int starpu_pthread_queue_init(starpu_pthread_queue_t *q) +{ + STARPU_PTHREAD_MUTEX_INIT(&q->mutex, NULL); + q->queue = NULL; + q->allocqueue = 0; + q->nqueue = 0; + return 0; +} + +int starpu_pthread_wait_init(starpu_pthread_wait_t *w) +{ + STARPU_PTHREAD_MUTEX_INIT(&w->mutex, NULL); + STARPU_PTHREAD_COND_INIT(&w->cond, NULL); + w->block = 1; + return 0; +} + +int starpu_pthread_queue_register(starpu_pthread_wait_t *w, starpu_pthread_queue_t *q) +{ + STARPU_PTHREAD_MUTEX_LOCK(&q->mutex); + + if (q->nqueue == q->allocqueue) + { + /* Make room for the new waiter */ + unsigned newalloc; + newalloc = q->allocqueue * 2; + if (!newalloc) + newalloc = 1; + _STARPU_REALLOC(q->queue, newalloc * sizeof(*(q->queue))); + q->allocqueue = newalloc; + } + q->queue[q->nqueue++] = w; + + STARPU_PTHREAD_MUTEX_UNLOCK(&q->mutex); + return 0; +} + +int starpu_pthread_queue_unregister(starpu_pthread_wait_t *w, starpu_pthread_queue_t *q) +{ + unsigned i; + STARPU_PTHREAD_MUTEX_LOCK(&q->mutex); + for (i = 0; i < q->nqueue; i++) + { + if (q->queue[i] == w) + { + memmove(&q->queue[i], &q->queue[i+1], (q->nqueue - i - 1) * sizeof(*(q->queue))); + break; + } + } + STARPU_ASSERT(i < q->nqueue); + q->nqueue--; + STARPU_PTHREAD_MUTEX_UNLOCK(&q->mutex); + return 0; +} + +int starpu_pthread_wait_reset(starpu_pthread_wait_t *w) +{ + STARPU_PTHREAD_MUTEX_LOCK(&w->mutex); + w->block = 1; + STARPU_PTHREAD_MUTEX_UNLOCK(&w->mutex); + return 0; +} + +int starpu_pthread_wait_wait(starpu_pthread_wait_t *w) +{ + STARPU_PTHREAD_MUTEX_LOCK(&w->mutex); + while (w->block == 1) + STARPU_PTHREAD_COND_WAIT(&w->cond, &w->mutex); + STARPU_PTHREAD_MUTEX_UNLOCK(&w->mutex); + return 0; +} + +/* pthread_cond_timedwait not yet available on windows, but we don't run simgrid there anyway */ +#ifdef STARPU_SIMGRID +int starpu_pthread_wait_timedwait(starpu_pthread_wait_t *w, const struct timespec *abstime) +{ + STARPU_PTHREAD_MUTEX_LOCK(&w->mutex); + while (w->block == 1) + STARPU_PTHREAD_COND_TIMEDWAIT(&w->cond, &w->mutex, abstime); + STARPU_PTHREAD_MUTEX_UNLOCK(&w->mutex); + return 0; +} +#endif + +int starpu_pthread_queue_signal(starpu_pthread_queue_t *q) +{ + starpu_pthread_wait_t *w; + STARPU_PTHREAD_MUTEX_LOCK(&q->mutex); + if (q->nqueue) + { + /* TODO: better try to wake a sleeping one if possible */ + w = q->queue[0]; + STARPU_PTHREAD_MUTEX_LOCK(&w->mutex); + w->block = 0; + STARPU_PTHREAD_COND_SIGNAL(&w->cond); + STARPU_PTHREAD_MUTEX_UNLOCK(&w->mutex); + } + STARPU_PTHREAD_MUTEX_UNLOCK(&q->mutex); + return 0; +} + +int starpu_pthread_queue_broadcast(starpu_pthread_queue_t *q) +{ + unsigned i; + starpu_pthread_wait_t *w; + STARPU_PTHREAD_MUTEX_LOCK(&q->mutex); + for (i = 0; i < q->nqueue; i++) + { + w = q->queue[i]; + STARPU_PTHREAD_MUTEX_LOCK(&w->mutex); + w->block = 0; + STARPU_PTHREAD_COND_SIGNAL(&w->cond); + STARPU_PTHREAD_MUTEX_UNLOCK(&w->mutex); + } + STARPU_PTHREAD_MUTEX_UNLOCK(&q->mutex); + return 0; +} + +int starpu_pthread_wait_destroy(starpu_pthread_wait_t *w) +{ + STARPU_PTHREAD_MUTEX_LOCK(&w->mutex); + STARPU_PTHREAD_MUTEX_UNLOCK(&w->mutex); + STARPU_PTHREAD_MUTEX_DESTROY(&w->mutex); + STARPU_PTHREAD_COND_DESTROY(&w->cond); + return 0; +} + +int starpu_pthread_queue_destroy(starpu_pthread_queue_t *q) +{ + STARPU_ASSERT(!q->nqueue); + STARPU_PTHREAD_MUTEX_LOCK(&q->mutex); + STARPU_PTHREAD_MUTEX_UNLOCK(&q->mutex); + STARPU_PTHREAD_MUTEX_DESTROY(&q->mutex); + free(q->queue); + return 0; +} + +#endif /* STARPU_SIMGRID */ + +#if (defined(STARPU_SIMGRID) && !defined(STARPU_HAVE_SIMGRID_BARRIER_H) && !defined(STARPU_SIMGRID_HAVE_XBT_BARRIER_INIT) && !defined(xbt_barrier_init)) || (!defined(STARPU_SIMGRID) && !defined(STARPU_HAVE_PTHREAD_BARRIER)) +int starpu_pthread_barrier_init(starpu_pthread_barrier_t *restrict barrier, const starpu_pthread_barrierattr_t *restrict attr STARPU_ATTRIBUTE_UNUSED, unsigned count) +{ + int ret = starpu_pthread_mutex_init(&barrier->mutex, NULL); + if (!ret) + ret = starpu_pthread_cond_init(&barrier->cond, NULL); + if (!ret) + ret = starpu_pthread_cond_init(&barrier->cond_destroy, NULL); + barrier->count = count; + barrier->done = 0; + barrier->busy = 0; + return ret; +} + +int starpu_pthread_barrier_destroy(starpu_pthread_barrier_t *barrier) +{ + starpu_pthread_mutex_lock(&barrier->mutex); + while (barrier->busy) + { + starpu_pthread_cond_wait(&barrier->cond_destroy, &barrier->mutex); + } + starpu_pthread_mutex_unlock(&barrier->mutex); + int ret = starpu_pthread_mutex_destroy(&barrier->mutex); + if (!ret) + ret = starpu_pthread_cond_destroy(&barrier->cond); + if (!ret) + ret = starpu_pthread_cond_destroy(&barrier->cond_destroy); + return ret; +} + +int starpu_pthread_barrier_wait(starpu_pthread_barrier_t *barrier) +{ + int ret = 0; + _STARPU_TRACE_BARRIER_WAIT_BEGIN(); + + starpu_pthread_mutex_lock(&barrier->mutex); + barrier->done++; + if (barrier->done == barrier->count) + { + barrier->done = 0; + starpu_pthread_cond_broadcast(&barrier->cond); + ret = STARPU_PTHREAD_BARRIER_SERIAL_THREAD; + } + else + { + barrier->busy++; + starpu_pthread_cond_wait(&barrier->cond, &barrier->mutex); + barrier->busy--; + starpu_pthread_cond_broadcast(&barrier->cond_destroy); + } + + starpu_pthread_mutex_unlock(&barrier->mutex); + + _STARPU_TRACE_BARRIER_WAIT_END(); + + return ret; +} +#endif /* defined(STARPU_SIMGRID) || !defined(STARPU_HAVE_PTHREAD_BARRIER) */ + +#ifdef STARPU_FXT_LOCK_TRACES +#if !defined(STARPU_SIMGRID) && !defined(_MSC_VER) /* !STARPU_SIMGRID */ +int starpu_pthread_mutex_lock(starpu_pthread_mutex_t *mutex) +{ + _STARPU_TRACE_LOCKING_MUTEX(); + + int p_ret = pthread_mutex_lock(mutex); + + _STARPU_TRACE_MUTEX_LOCKED(); + + return p_ret; +} + +int starpu_pthread_mutex_unlock(starpu_pthread_mutex_t *mutex) +{ + _STARPU_TRACE_UNLOCKING_MUTEX(); + + int p_ret = pthread_mutex_unlock(mutex); + + _STARPU_TRACE_MUTEX_UNLOCKED(); + + return p_ret; +} + +int starpu_pthread_mutex_trylock(starpu_pthread_mutex_t *mutex) +{ + int ret; + _STARPU_TRACE_TRYLOCK_MUTEX(); + + ret = pthread_mutex_trylock(mutex); + + if (!ret) + _STARPU_TRACE_MUTEX_LOCKED(); + + return ret; +} + +int starpu_pthread_cond_wait(starpu_pthread_cond_t *cond, starpu_pthread_mutex_t *mutex) +{ + _STARPU_TRACE_COND_WAIT_BEGIN(); + + int p_ret = pthread_cond_wait(cond, mutex); + + _STARPU_TRACE_COND_WAIT_END(); + + return p_ret; +} + +int starpu_pthread_rwlock_rdlock(starpu_pthread_rwlock_t *rwlock) +{ + _STARPU_TRACE_RDLOCKING_RWLOCK(); + + int p_ret = pthread_rwlock_rdlock(rwlock); + + _STARPU_TRACE_RWLOCK_RDLOCKED(); + + return p_ret; +} + +int starpu_pthread_rwlock_tryrdlock(starpu_pthread_rwlock_t *rwlock) +{ + _STARPU_TRACE_RDLOCKING_RWLOCK(); + + int p_ret = pthread_rwlock_tryrdlock(rwlock); + + if (!p_ret) + _STARPU_TRACE_RWLOCK_RDLOCKED(); + + return p_ret; +} + +int starpu_pthread_rwlock_wrlock(starpu_pthread_rwlock_t *rwlock) +{ + _STARPU_TRACE_WRLOCKING_RWLOCK(); + + int p_ret = pthread_rwlock_wrlock(rwlock); + + _STARPU_TRACE_RWLOCK_WRLOCKED(); + + return p_ret; +} + +int starpu_pthread_rwlock_trywrlock(starpu_pthread_rwlock_t *rwlock) +{ + _STARPU_TRACE_WRLOCKING_RWLOCK(); + + int p_ret = pthread_rwlock_trywrlock(rwlock); + + if (!p_ret) + _STARPU_TRACE_RWLOCK_WRLOCKED(); + + return p_ret; +} + +int starpu_pthread_rwlock_unlock(starpu_pthread_rwlock_t *rwlock) +{ + _STARPU_TRACE_UNLOCKING_RWLOCK(); + + int p_ret = pthread_rwlock_unlock(rwlock); + + _STARPU_TRACE_RWLOCK_UNLOCKED(); + + return p_ret; +} +#endif /* !defined(STARPU_SIMGRID) && !defined(_MSC_VER) */ + +#if !defined(STARPU_SIMGRID) && !defined(_MSC_VER) && defined(STARPU_HAVE_PTHREAD_BARRIER) +int starpu_pthread_barrier_wait(starpu_pthread_barrier_t *barrier) +{ + int ret; + _STARPU_TRACE_BARRIER_WAIT_BEGIN(); + + ret = pthread_barrier_wait(barrier); + + _STARPU_TRACE_BARRIER_WAIT_END(); + + return ret; +} +#endif /* STARPU_SIMGRID, _MSC_VER, STARPU_HAVE_PTHREAD_BARRIER */ + +#endif /* STARPU_FXT_LOCK_TRACES */ + +/* "sched" variants, to be used (through the STARPU_PTHREAD_MUTEX_*LOCK_SCHED + * macros of course) which record when the mutex is held or not */ +int starpu_pthread_mutex_lock_sched(starpu_pthread_mutex_t *mutex) +{ + return starpu_pthread_mutex_lock(mutex); +} + +int starpu_pthread_mutex_unlock_sched(starpu_pthread_mutex_t *mutex) +{ + return starpu_pthread_mutex_unlock(mutex); +} + +int starpu_pthread_mutex_trylock_sched(starpu_pthread_mutex_t *mutex) +{ + return starpu_pthread_mutex_trylock(mutex); +} + +#ifdef STARPU_DEBUG +void starpu_pthread_mutex_check_sched(starpu_pthread_mutex_t *mutex, char *file, int line) +{ + int workerid = starpu_worker_get_id(); + STARPU_ASSERT_MSG(workerid == -1 || !_starpu_worker_mutex_is_sched_mutex(workerid, mutex), "%s:%d is locking/unlocking a sched mutex but not using STARPU_PTHREAD_MUTEX_LOCK_SCHED", file, line); +} +#endif + +#if defined(STARPU_SIMGRID) || (defined(STARPU_LINUX_SYS) && defined(STARPU_HAVE_XCHG)) || !defined(HAVE_PTHREAD_SPIN_LOCK) + +#undef starpu_pthread_spin_init +int starpu_pthread_spin_init(starpu_pthread_spinlock_t *lock, int pshared) +{ + return _starpu_pthread_spin_init(lock, pshared); +} + +#undef starpu_pthread_spin_destroy +int starpu_pthread_spin_destroy(starpu_pthread_spinlock_t *lock STARPU_ATTRIBUTE_UNUSED) +{ + return _starpu_pthread_spin_destroy(lock); +} + +#undef starpu_pthread_spin_lock +int starpu_pthread_spin_lock(starpu_pthread_spinlock_t *lock) +{ + return _starpu_pthread_spin_lock(lock); +} +#endif + +#if defined(STARPU_SIMGRID) || (defined(STARPU_LINUX_SYS) && defined(STARPU_HAVE_XCHG)) || !defined(STARPU_HAVE_PTHREAD_SPIN_LOCK) + +#if !defined(STARPU_SIMGRID) && defined(STARPU_LINUX_SYS) && defined(STARPU_HAVE_XCHG) +int _starpu_pthread_spin_do_lock(starpu_pthread_spinlock_t *lock) +{ + if (STARPU_VAL_COMPARE_AND_SWAP(&lock->taken, 0, 1) == 0) + /* Got it on first try! */ + return 0; + + /* Busy, spin a bit. */ + unsigned i; + for (i = 0; i < 128; i++) + { + /* Pause a bit before retrying */ + STARPU_UYIELD(); + /* And synchronize with other threads */ + STARPU_SYNCHRONIZE(); + if (!lock->taken) + /* Holder released it, try again */ + if (STARPU_VAL_COMPARE_AND_SWAP(&lock->taken, 0, 1) == 0) + /* Got it! */ + return 0; + } + + /* We have spent enough time with spinning, let's block */ + /* This avoids typical 10ms pauses when the application thread tries to submit tasks. */ + while (1) + { + /* Tell releaser to wake us */ + unsigned prev = STARPU_VAL_EXCHANGE(&lock->taken, 2); + if (prev == 0) + /* Ah, it just got released and we actually acquired + * it! + * Note: the sad thing is that we have just written 2, + * so will spuriously try to wake a thread on unlock, + * but we can not avoid it since we do not know whether + * there are other threads sleeping or not. + */ + return 0; + + /* Now start sleeping (unless it was released in between) + * We are sure to get woken because either + * - some thread has not released the lock yet, and lock->taken + * is 2, so it will wake us. + * - some other thread started blocking, and will set + * lock->taken back to 2 + */ + if (syscall(SYS_futex, &lock->taken, _starpu_futex_wait, 2, NULL, NULL, 0)) + if (errno == ENOSYS) + _starpu_futex_wait = FUTEX_WAIT; + } +} +#endif + +#undef starpu_pthread_spin_trylock +int starpu_pthread_spin_trylock(starpu_pthread_spinlock_t *lock) +{ + return _starpu_pthread_spin_trylock(lock); +} + +#undef starpu_pthread_spin_unlock +int starpu_pthread_spin_unlock(starpu_pthread_spinlock_t *lock) +{ + return _starpu_pthread_spin_unlock(lock); +} + +#if !defined(STARPU_SIMGRID) && defined(STARPU_LINUX_SYS) && defined(STARPU_HAVE_XCHG) +void _starpu_pthread_spin_do_unlock(starpu_pthread_spinlock_t *lock) +{ + /* + * Somebody to wake. Clear 'taken' and wake him. + * Note that he may not be sleeping yet, but if he is not, we won't + * since the value of 'taken' will have changed. + */ + lock->taken = 0; + STARPU_SYNCHRONIZE(); + if (syscall(SYS_futex, &lock->taken, _starpu_futex_wake, 1, NULL, NULL, 0) == -1) + switch (errno) + { + case ENOSYS: + _starpu_futex_wake = FUTEX_WAKE; + if (syscall(SYS_futex, &lock->taken, _starpu_futex_wake, 1, NULL, NULL, 0) == -1) + STARPU_ASSERT_MSG(0, "futex(wake) returned %d!", errno); + break; + case 0: + break; + default: + STARPU_ASSERT_MSG(0, "futex returned %d!", errno); + break; + } +} + +#endif + +#endif /* defined(STARPU_SIMGRID) || (defined(STARPU_LINUX_SYS) && defined(STARPU_HAVE_XCHG)) || !defined(STARPU_HAVE_PTHREAD_SPIN_LOCK) */ + +#ifdef STARPU_SIMGRID + +int starpu_sem_destroy(starpu_sem_t *sem) +{ +#ifdef STARPU_HAVE_SIMGRID_SEMAPHORE_H + sg_sem_destroy(*sem); +#else + MSG_sem_destroy(*sem); +#endif + return 0; +} + +int starpu_sem_init(starpu_sem_t *sem, int pshared, unsigned value) +{ + STARPU_ASSERT_MSG(pshared == 0, "pshared semaphores not supported under simgrid"); +#ifdef STARPU_HAVE_SIMGRID_SEMAPHORE_H + *sem = sg_sem_init(value); +#else + *sem = MSG_sem_init(value); +#endif + return 0; +} + +int starpu_sem_post(starpu_sem_t *sem) +{ +#ifdef STARPU_HAVE_SIMGRID_SEMAPHORE_H + sg_sem_release(*sem); +#else + MSG_sem_release(*sem); +#endif + return 0; +} + +int starpu_sem_wait(starpu_sem_t *sem) +{ +#ifdef STARPU_HAVE_SIMGRID_SEMAPHORE_H + sg_sem_acquire(*sem); +#else + MSG_sem_acquire(*sem); +#endif + return 0; +} + +int starpu_sem_trywait(starpu_sem_t *sem) +{ +#ifdef STARPU_HAVE_SIMGRID_SEMAPHORE_H + if (sg_sem_would_block(*sem)) +#else + if (MSG_sem_would_block(*sem)) +#endif + return EAGAIN; + starpu_sem_wait(sem); + return 0; +} + +int starpu_sem_getvalue(starpu_sem_t *sem, int *sval) +{ +#if SIMGRID_VERSION > 31300 +# ifdef STARPU_HAVE_SIMGRID_SEMAPHORE_H + *sval = sg_sem_get_capacity(*sem); +# else + *sval = MSG_sem_get_capacity(*sem); +# endif + return 0; +#else + (void) sem; + (void) sval; + STARPU_ABORT_MSG("sigmrid up to 3.13 did not have working MSG_sem_get_capacity"); +#endif +} + +#elif !defined(_MSC_VER) || defined(BUILDING_STARPU) /* !STARPU_SIMGRID */ + +int starpu_sem_wait(starpu_sem_t *sem) +{ + int ret; + while((ret = sem_wait(sem)) == -1 && errno == EINTR) + ; + + return ret; +} + +int starpu_sem_trywait(starpu_sem_t *sem) +{ + int ret; + while((ret = sem_trywait(sem)) == -1 && errno == EINTR) + ; + + return ret; +} + +#endif diff --git a/src/common/thread.h b/src/common/thread.h new file mode 100644 index 0000000..35fe90b --- /dev/null +++ b/src/common/thread.h @@ -0,0 +1,172 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __COMMON_THREAD_H__ +#define __COMMON_THREAD_H__ + +/** @file */ + +#include +#include + +#pragma GCC visibility push(hidden) + +#if defined(STARPU_LINUX_SYS) && defined(STARPU_HAVE_XCHG) +int _starpu_pthread_spin_do_lock(starpu_pthread_spinlock_t *lock) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; +#endif + +#if defined(STARPU_SIMGRID) || (defined(STARPU_LINUX_SYS) && defined(STARPU_HAVE_XCHG)) || !defined(STARPU_HAVE_PTHREAD_SPIN_LOCK) + +static inline int _starpu_pthread_spin_init(starpu_pthread_spinlock_t *lock, int pshared STARPU_ATTRIBUTE_UNUSED) +{ + lock->taken = 0; + return 0; +} +#define starpu_pthread_spin_init _starpu_pthread_spin_init + +static inline int _starpu_pthread_spin_destroy(starpu_pthread_spinlock_t *lock STARPU_ATTRIBUTE_UNUSED) +{ + /* we don't do anything */ + return 0; +} +#define starpu_pthread_spin_destroy _starpu_pthread_spin_destroy + +static inline int _starpu_pthread_spin_lock(starpu_pthread_spinlock_t *lock) +{ +#ifdef STARPU_SIMGRID + if (STARPU_LIKELY(!lock->taken)) + { + lock->taken = 1; + return 0; + } + +#ifdef STARPU_HAVE_S4U_ON_TIME_ADVANCE_CB + /* There is contention, count that a bit */ + starpu_sleep(0.000001); + /* And try again */ + if (STARPU_LIKELY(!lock->taken)) + { + lock->taken = 1; + return 0; + } + + /* Really no luck, really wait for it */ + STARPU_PTHREAD_MUTEX_LOCK(&_starpu_simgrid_time_advance_mutex); +#endif + while (lock->taken) + { +#ifdef STARPU_HAVE_S4U_ON_TIME_ADVANCE_CB + STARPU_PTHREAD_COND_WAIT(&_starpu_simgrid_time_advance_cond, &_starpu_simgrid_time_advance_mutex); +#else + /* Give hand to another thread, hopefully the one which has the + * spinlock and probably just has also a short-lived mutex. */ + starpu_sleep(0.000001); +#endif + STARPU_UYIELD(); + } + lock->taken = 1; +#ifdef STARPU_HAVE_S4U_ON_TIME_ADVANCE_CB + STARPU_PTHREAD_MUTEX_UNLOCK(&_starpu_simgrid_time_advance_mutex); +#endif + return 0; +#elif defined(STARPU_LINUX_SYS) && defined(STARPU_HAVE_XCHG) + if (STARPU_LIKELY(STARPU_VAL_COMPARE_AND_SWAP(&lock->taken, 0, 1) == 0)) + /* Got it on first try! */ + return 0; + + return _starpu_pthread_spin_do_lock(lock); +#else /* !SIMGRID && !LINUX */ + uint32_t prev; + do + { + prev = STARPU_TEST_AND_SET(&lock->taken, 1); + if (STARPU_UNLIKELY(prev)) + STARPU_UYIELD(); + } + while (STARPU_UNLIKELY(prev)); + return 0; +#endif +} +#define starpu_pthread_spin_lock _starpu_pthread_spin_lock + +static inline void _starpu_pthread_spin_checklocked(starpu_pthread_spinlock_t *lock STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_SIMGRID + STARPU_ASSERT(lock->taken); +#elif defined(STARPU_LINUX_SYS) && defined(STARPU_HAVE_XCHG) + STARPU_ASSERT(lock->taken == 1 || lock->taken == 2); +#else + STARPU_ASSERT(lock->taken); +#endif +} + +static inline int _starpu_pthread_spin_trylock(starpu_pthread_spinlock_t *lock) +{ +#ifdef STARPU_SIMGRID + if (STARPU_UNLIKELY(lock->taken)) + return EBUSY; + lock->taken = 1; + return 0; +#elif defined(STARPU_LINUX_SYS) && defined(STARPU_HAVE_XCHG) + unsigned prev; + prev = STARPU_VAL_COMPARE_AND_SWAP(&lock->taken, 0, 1); + return (prev == 0)?0:EBUSY; +#else /* !SIMGRID && !LINUX */ + uint32_t prev; + prev = STARPU_TEST_AND_SET(&lock->taken, 1); + return (prev == 0)?0:EBUSY; +#endif +} +#define starpu_pthread_spin_trylock _starpu_pthread_spin_trylock + +#if defined(STARPU_LINUX_SYS) && defined(STARPU_HAVE_XCHG) +void _starpu_pthread_spin_do_unlock(starpu_pthread_spinlock_t *lock) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; +#endif + +static inline int _starpu_pthread_spin_unlock(starpu_pthread_spinlock_t *lock) +{ +#ifdef STARPU_SIMGRID + lock->taken = 0; +#elif defined(STARPU_LINUX_SYS) && defined(STARPU_HAVE_XCHG) + STARPU_ASSERT(lock->taken != 0); + STARPU_SYNCHRONIZE(); + unsigned next = STARPU_ATOMIC_ADD(&lock->taken, -1); + if (STARPU_LIKELY(next == 0)) + /* Nobody to wake, we are done */ + return 0; + _starpu_pthread_spin_do_unlock(lock); +#else /* !SIMGRID && !LINUX */ + STARPU_RELEASE(&lock->taken); +#endif + return 0; +} +#define starpu_pthread_spin_unlock _starpu_pthread_spin_unlock + +#else /* defined(STARPU_SIMGRID) || (defined(STARPU_LINUX_SYS) && defined(STARPU_HAVE_XCHG)) || !defined(STARPU_HAVE_PTHREAD_SPIN_LOCK) */ + +static inline void _starpu_pthread_spin_checklocked(starpu_pthread_spinlock_t *lock STARPU_ATTRIBUTE_UNUSED) +{ + STARPU_ASSERT(pthread_spin_trylock((pthread_spinlock_t *)lock) != 0); +} + +#endif /* defined(STARPU_SIMGRID) || (defined(STARPU_LINUX_SYS) && defined(STARPU_HAVE_XCHG)) || !defined(STARPU_HAVE_PTHREAD_SPIN_LOCK) */ + + +#pragma GCC visibility pop + +#endif /* __COMMON_THREAD_H__ */ + + diff --git a/src/common/timing.c b/src/common/timing.c new file mode 100644 index 0000000..214bbda --- /dev/null +++ b/src/common/timing.c @@ -0,0 +1,266 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include +#include + +#ifdef STARPU_SIMGRID +#include +#ifdef HAVE_SIMGRID_ENGINE_H +#include +#endif +#endif + +#if defined(_WIN32) && !defined(__MINGW32__) && !defined(__CYGWIN__) +#include +#endif + +#ifdef STARPU_SIMGRID +void _starpu_timing_init(void) +{ +} + +void _starpu_clock_gettime(struct timespec *ts) +{ +#ifdef HAVE_SIMGRID_GET_CLOCK + double now = simgrid_get_clock(); +#else + double now = MSG_get_clock(); +#endif + ts->tv_sec = floor(now); + ts->tv_nsec = floor((now - ts->tv_sec) * 1000000000); +} + +#elif defined(HAVE_CLOCK_GETTIME) && defined(CLOCK_MONOTONIC) +#include +#ifndef _POSIX_C_SOURCE +/* for clock_gettime */ +#define _POSIX_C_SOURCE 199309L +#endif + +#ifdef __linux__ +#ifndef CLOCK_MONOTONIC_RAW +#define CLOCK_MONOTONIC_RAW 4 +#endif +#endif + +static struct timespec _starpu_reference_start_time_ts; + +/* Modern CPUs' clocks are usually not synchronized so we use a monotonic clock + * to have consistent timing measurements. + */ +static void _starpu_clock_readtime(struct timespec *ts) +{ +#if 0 /* def CLOCK_MONOTONIC_RAW */ +/* The CLOCK_MONOTONIC_RAW clock is not + * subject to NTP adjustments, but is not available on all systems (in that + * case we use the CLOCK_MONOTONIC clock instead). */ + +/* In the distributed case, we *do* want NTP adjustments, to get + * somehow-coherent traces, so this is disabled */ + + static int raw_supported = 0; + switch (raw_supported) + { + case -1: + break; + case 1: + clock_gettime(CLOCK_MONOTONIC_RAW, ts); + return; + case 0: + if (clock_gettime(CLOCK_MONOTONIC_RAW, ts)) + { + raw_supported = -1; + break; + } + else + { + raw_supported = 1; + return; + } + } +#endif + clock_gettime(CLOCK_MONOTONIC, ts); +} + +void _starpu_timing_init(void) +{ + _starpu_clock_gettime(&_starpu_reference_start_time_ts); +} + +void _starpu_clock_gettime(struct timespec *ts) +{ + struct timespec absolute_ts; + + /* Read the current time */ + _starpu_clock_readtime(&absolute_ts); + + /* Compute the relative time since initialization */ + starpu_timespec_sub(&absolute_ts, &_starpu_reference_start_time_ts, ts); +} + +#else // !HAVE_CLOCK_GETTIME + +#if defined(__i386__) || defined(__pentium__) || defined(__pentiumpro__) || defined(__i586__) || defined(__i686__) || defined(__k6__) || defined(__k7__) || defined(__x86_64__) +union starpu_u_tick +{ + uint64_t tick; + + struct + { + uint32_t low; + uint32_t high; + } + sub; +}; + +#define STARPU_GET_TICK(t) __asm__ volatile("rdtsc" : "=a" ((t).sub.low), "=d" ((t).sub.high)) +#define STARPU_TICK_RAW_DIFF(t1, t2) ((t2).tick - (t1).tick) +#define STARPU_TICK_DIFF(t1, t2) (STARPU_TICK_RAW_DIFF(t1, t2) - _starpu_residual) + +static union starpu_u_tick _starpu_reference_start_tick; +static double _starpu_scale = 0.0; +static unsigned long long _starpu_residual = 0; + +static int _starpu_inited = 0; + +#if defined(_WIN32) && !defined(__MINGW32__) && !defined(__CYGWIN__) +static int mygettimeofday(struct timeval *tv, void *tz) +{ + if (tv) + { + FILETIME ft; + unsigned long long res; + GetSystemTimeAsFileTime(&ft); + /* 100-nanosecond intervals since January 1, 1601 */ + res = ft.dwHighDateTime; + res <<= 32; + res |= ft.dwLowDateTime; + res /= 10; + /* Now we have microseconds */ + res -= (((1970-1601)*365) + 89) * 24ULL * 3600ULL * 1000000ULL; + /* Now we are based on epoch */ + tv->tv_sec = res / 1000000ULL; + tv->tv_usec = res % 1000000ULL; + } +} +#else +#define mygettimeofday(tv,tz) gettimeofday(tv,tz) +#endif + +void _starpu_timing_init(void) +{ + static union starpu_u_tick t1, t2; + int i; + + if (_starpu_inited) return; + + _starpu_residual = (unsigned long long)1 << 63; + + for(i = 0; i < 20; i++) + { + STARPU_GET_TICK(t1); + STARPU_GET_TICK(t2); + _starpu_residual = STARPU_MIN(_starpu_residual, STARPU_TICK_RAW_DIFF(t1, t2)); + } + + { + struct timeval tv1,tv2; + + STARPU_GET_TICK(t1); + mygettimeofday(&tv1,0); + starpu_sleep(0.5); + STARPU_GET_TICK(t2); + mygettimeofday(&tv2,0); + _starpu_scale = ((tv2.tv_sec*1e6 + tv2.tv_usec) - + (tv1.tv_sec*1e6 + tv1.tv_usec)) / + (double)(STARPU_TICK_DIFF(t1, t2)); + } + + STARPU_GET_TICK(_starpu_reference_start_tick); + + _starpu_inited = 1; +} + +void _starpu_clock_gettime(struct timespec *ts) +{ + union starpu_u_tick tick_now; + + STARPU_GET_TICK(tick_now); + + uint64_t elapsed_ticks = STARPU_TICK_DIFF(_starpu_reference_start_tick, tick_now); + + /* We convert this number into nano-seconds so that we can fill the + * timespec structure. */ + uint64_t elapsed_ns = (uint64_t)(((double)elapsed_ticks)*(_starpu_scale*1000.0)); + + long tv_nsec = (elapsed_ns % 1000000000); + time_t tv_sec = (elapsed_ns / 1000000000); + + ts->tv_sec = tv_sec; + ts->tv_nsec = tv_nsec; +} + +#else // !HAVE_CLOCK_GETTIME & no rdtsc +#warning StarPU could not find a timer, clock will always return 0 +void _starpu_timing_init(void) +{ +} + +void _starpu_clock_gettime(struct timespec *ts) +{ + ts->tv_sec = 0; + ts->tv_nsec = 0; +} +#endif +#endif // HAVE_CLOCK_GETTIME + +/* Returns the time elapsed between start and end in microseconds */ +double starpu_timing_timespec_delay_us(struct timespec *start, struct timespec *end) +{ + struct timespec diff; + + starpu_timespec_sub(end, start, &diff); + + double us = (diff.tv_sec*1e6) + (diff.tv_nsec*1e-3); + + return us; +} + +double starpu_timing_timespec_to_us(struct timespec *ts) +{ + return (1000000.0*ts->tv_sec) + (0.001*ts->tv_nsec); +} + +double starpu_timing_now(void) +{ +#ifdef STARPU_SIMGRID +# ifdef HAVE_SIMGRID_GET_CLOCK + return simgrid_get_clock()*1000000; +# else + return MSG_get_clock()*1000000; +# endif +#else + struct timespec now; + _starpu_clock_gettime(&now); + + return starpu_timing_timespec_to_us(&now); +#endif +} diff --git a/src/common/timing.h b/src/common/timing.h new file mode 100644 index 0000000..bea72ae --- /dev/null +++ b/src/common/timing.h @@ -0,0 +1,43 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef TIMING_H +#define TIMING_H + +/** @file */ + +#include +#include +#ifdef HAVE_UNISTD_H +#include +#endif +#include +#include + +#pragma GCC visibility push(hidden) + +/** + * _starpu_timing_init must be called prior to using any of these timing + * functions. + */ +void _starpu_timing_init(void); +void _starpu_clock_gettime(struct timespec *ts) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; + +#pragma GCC visibility pop + +#endif /* TIMING_H */ + + diff --git a/src/common/uthash.h b/src/common/uthash.h new file mode 100644 index 0000000..760d814 --- /dev/null +++ b/src/common/uthash.h @@ -0,0 +1,1030 @@ +/* +Copyright (c) 2003-2010, Troy D. Hanson http://uthash.sourceforge.net +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER +OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef UTHASH_H +#define UTHASH_H + +/** @file */ + +#include /* memcmp,strlen */ +#include /* ptrdiff_t */ + +/* These macros use decltype or the earlier __typeof GNU extension. + As decltype is only available in newer compilers (VS2010 or gcc 4.3+ + when compiling c++ source) this code uses whatever method is needed + or, for VS2008 where neither is available, uses casting workarounds. */ +#ifdef _MSC_VER /* MS compiler */ +#if _MSC_VER >= 1600 && defined(__cplusplus) /* VS2010 or newer in C++ mode */ +#define DECLTYPE(x) (decltype(x)) +#else /* VS2008 or older (or VS2010 in C mode) */ +#define NO_DECLTYPE +#define DECLTYPE(x) +#endif +#else /* GNU, Sun and other compilers */ +#define DECLTYPE(x) (__typeof(x)) +#endif + +#ifdef NO_DECLTYPE +#define DECLTYPE_ASSIGN(dst,src) \ +do { \ + char **_da_dst = (char**)(&(dst)); \ + *_da_dst = (char*)(src); \ +} while(0) +#else +#define DECLTYPE_ASSIGN(dst,src) \ +do { \ + (dst) = DECLTYPE(dst)(src); \ +} while(0) +#endif + +/* a number of the hash function use uint32_t which isn't defined on win32 */ +#ifdef _MSC_VER +typedef unsigned int uint32_t; +#else +#include /* uint32_t */ +#endif + +#pragma GCC visibility push(hidden) + +#define UTHASH_VERSION 1.9.3 + +#define uthash_fatal(msg) exit(-1) /* fatal error (out of memory,etc) */ +#define uthash_malloc(sz) malloc(sz) /* malloc fcn */ +#define uthash_free(ptr,sz) free(ptr) /* free fcn */ + +#define uthash_noexpand_fyi(tbl) /* can be defined to log noexpand */ +#define uthash_expand_fyi(tbl) /* can be defined to log expands */ + +/* initial number of buckets */ +#define HASH_INITIAL_NUM_BUCKETS 32 /* initial number of buckets */ +#define HASH_INITIAL_NUM_BUCKETS_LOG2 5 /* lg2 of initial number of buckets */ +#define HASH_BKT_CAPACITY_THRESH 10 /* expand when bucket count reaches */ + +/* calculate the element whose hash handle address is hhe */ +#define ELMT_FROM_HH(tbl,hhp) ((void*)(((char*)(hhp)) - ((tbl)->hho))) + +#define HASH_FIND(hh,head,keyptr,keylen,out) \ +do { \ + unsigned _hf_bkt=0,_hf_hashv=0; \ + out=NULL; \ + if (head) { \ + HASH_FCN(keyptr,keylen, (head)->hh.tbl->num_buckets, _hf_hashv, _hf_bkt); \ + if (HASH_BLOOM_TEST((head)->hh.tbl, _hf_hashv)) { \ + HASH_FIND_IN_BKT((head)->hh.tbl, hh, (head)->hh.tbl->buckets[ _hf_bkt ], \ + keyptr,keylen,out); \ + } \ + } \ +} while (0) + +#ifdef HASH_BLOOM +#define HASH_BLOOM_BITLEN (1ULL << HASH_BLOOM) +#define HASH_BLOOM_BYTELEN (HASH_BLOOM_BITLEN/8) + ((HASH_BLOOM_BITLEN%8) ? 1:0) +#define HASH_BLOOM_MAKE(tbl) \ +do { \ + (tbl)->bloom_nbits = HASH_BLOOM; \ + (tbl)->bloom_bv = (uint8_t*)uthash_malloc(HASH_BLOOM_BYTELEN); \ + if (!((tbl)->bloom_bv)) { uthash_fatal( "out of memory"); } \ + memset((tbl)->bloom_bv, 0, HASH_BLOOM_BYTELEN); \ + (tbl)->bloom_sig = HASH_BLOOM_SIGNATURE; \ +} while (0) + +#define HASH_BLOOM_FREE(tbl) \ +do { \ + uthash_free((tbl)->bloom_bv, HASH_BLOOM_BYTELEN); \ +} while (0) + +#define HASH_BLOOM_BITSET(bv,idx) (bv[(idx)/8] |= (1U << ((idx)%8))) +#define HASH_BLOOM_BITTEST(bv,idx) (bv[(idx)/8] & (1U << ((idx)%8))) + +#define HASH_BLOOM_ADD(tbl,hashv) \ + HASH_BLOOM_BITSET((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1))) + +#define HASH_BLOOM_TEST(tbl,hashv) \ + HASH_BLOOM_BITTEST((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1))) + +#else +#define HASH_BLOOM_MAKE(tbl) +#define HASH_BLOOM_FREE(tbl) +#define HASH_BLOOM_ADD(tbl,hashv) +#define HASH_BLOOM_TEST(tbl,hashv) (1) +#endif + +#define HASH_MAKE_TABLE(hh,head) \ +do { \ + (head)->hh.tbl = (UT_hash_table*)uthash_malloc( \ + sizeof(UT_hash_table)); \ + if (!((head)->hh.tbl)) { uthash_fatal( "out of memory"); } \ + memset((head)->hh.tbl, 0, sizeof(UT_hash_table)); \ + (head)->hh.tbl->tail = &((head)->hh); \ + (head)->hh.tbl->num_buckets = HASH_INITIAL_NUM_BUCKETS; \ + (head)->hh.tbl->log2_num_buckets = HASH_INITIAL_NUM_BUCKETS_LOG2; \ + (head)->hh.tbl->hho = (char*)(&(head)->hh) - (char*)(head); \ + (head)->hh.tbl->buckets = (UT_hash_bucket*)uthash_malloc( \ + HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket)); \ + if (! (head)->hh.tbl->buckets) { uthash_fatal( "out of memory"); } \ + memset((head)->hh.tbl->buckets, 0, \ + HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket)); \ + HASH_BLOOM_MAKE((head)->hh.tbl); \ + (head)->hh.tbl->signature = HASH_SIGNATURE; \ +} while(0) + +#define HASH_ADD(hh,head,fieldname,keylen_in,add) \ + HASH_ADD_KEYPTR(hh,head,&add->fieldname,keylen_in,add) + +#ifdef STARPU_DEBUG +/* Check that we don't insert the same key several times */ +#define HASH_CHECK_KEY(hh,head,keyptr,keylen,out) \ +do { \ + __typeof__(out) _out; \ + HASH_FIND(hh,head,keyptr,keylen,_out); \ + STARPU_ASSERT_MSG(!_out,"Cannot insert the same key twice"); \ +} while(0) +#else +#define HASH_CHECK_KEY(hh,head,keyptr,keylen,out) +#endif + +#define HASH_ADD_KEYPTR(hh,head,keyptr,keylen_in,add) \ +do { \ + unsigned _ha_bkt=0; \ + HASH_CHECK_KEY(hh,head,keyptr,keylen_in,add); \ + (add)->hh.next = NULL; \ + (add)->hh.key = (char*)keyptr; \ + (add)->hh.keylen = keylen_in; \ + if (!(head)) { \ + head = (add); \ + (head)->hh.prev = NULL; \ + HASH_MAKE_TABLE(hh,head); \ + } else { \ + (head)->hh.tbl->tail->next = (add); \ + (add)->hh.prev = ELMT_FROM_HH((head)->hh.tbl, (head)->hh.tbl->tail); \ + (head)->hh.tbl->tail = &((add)->hh); \ + } \ + (head)->hh.tbl->num_items++; \ + (add)->hh.tbl = (head)->hh.tbl; \ + HASH_FCN(keyptr,keylen_in, (head)->hh.tbl->num_buckets, \ + (add)->hh.hashv, _ha_bkt); \ + HASH_ADD_TO_BKT((head)->hh.tbl->buckets[_ha_bkt],&(add)->hh); \ + HASH_BLOOM_ADD((head)->hh.tbl,(add)->hh.hashv); \ + HASH_EMIT_KEY(hh,head,keyptr,keylen_in); \ + HASH_FSCK(hh,head); \ +} while(0) + +#define HASH_TO_BKT( hashv, num_bkts, bkt ) \ +do { \ + bkt = ((hashv) & ((num_bkts) - 1)); \ +} while(0) + +/* delete "delptr" from the hash table. + * "the usual" patch-up process for the app-order doubly-linked-list. + * The use of _hd_hh_del below deserves special explanation. + * These used to be expressed using (delptr) but that led to a bug + * if someone used the same symbol for the head and deletee, like + * HASH_DELETE(hh,users,users); + * We want that to work, but by changing the head (users) below + * we were forfeiting our ability to further refer to the deletee (users) + * in the patch-up process. Solution: use scratch space to + * copy the deletee pointer, then the latter references are via that + * scratch pointer rather than through the repointed (users) symbol. + */ +#define HASH_DELETE(hh,head,delptr) \ +do { \ + unsigned _hd_bkt; \ + struct UT_hash_handle *_hd_hh_del; \ + if ( ((delptr)->hh.prev == NULL) && ((delptr)->hh.next == NULL) ) { \ + uthash_free((head)->hh.tbl->buckets, \ + (head)->hh.tbl->num_buckets*sizeof(struct UT_hash_bucket) ); \ + HASH_BLOOM_FREE((head)->hh.tbl); \ + uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \ + head = NULL; \ + } else { \ + _hd_hh_del = &((delptr)->hh); \ + if ((delptr) == ELMT_FROM_HH((head)->hh.tbl,(head)->hh.tbl->tail)) { \ + (head)->hh.tbl->tail = \ + (UT_hash_handle*)((char*)((delptr)->hh.prev) + \ + (head)->hh.tbl->hho); \ + } \ + if ((delptr)->hh.prev) { \ + ((UT_hash_handle*)((char*)((delptr)->hh.prev) + \ + (head)->hh.tbl->hho))->next = (delptr)->hh.next; \ + } else { \ + DECLTYPE_ASSIGN(head,(delptr)->hh.next); \ + } \ + if (_hd_hh_del->next) { \ + ((UT_hash_handle*)((char*)_hd_hh_del->next + \ + (head)->hh.tbl->hho))->prev = \ + _hd_hh_del->prev; \ + } \ + HASH_TO_BKT( _hd_hh_del->hashv, (head)->hh.tbl->num_buckets, _hd_bkt); \ + HASH_DEL_IN_BKT(hh,(head)->hh.tbl->buckets[_hd_bkt], _hd_hh_del); \ + (head)->hh.tbl->num_items--; \ + } \ + HASH_FSCK(hh,head); \ +} while (0) + + +/* convenience forms of HASH_FIND/HASH_ADD/HASH_DEL */ +#define HASH_FIND_STR(head,findstr,out) \ + HASH_FIND(hh,head,findstr,strlen(findstr),out) +#define HASH_ADD_STR(head,strfield,add) \ + HASH_ADD(hh,head,strfield[0],strlen(add->strfield),add) +#define HASH_FIND_INT(head,findint,out) \ + HASH_FIND(hh,head,findint,sizeof(int),out) +#define HASH_ADD_INT(head,intfield,add) \ + HASH_ADD(hh,head,intfield,sizeof(int),add) +#define HASH_FIND_PTR(head,findptr,out) \ + HASH_FIND(hh,head,findptr,sizeof(void *),out) +#define HASH_ADD_PTR(head,ptrfield,add) \ + HASH_ADD(hh,head,ptrfield,sizeof(void *),add) +#define HASH_DEL(head,delptr) \ + HASH_DELETE(hh,head,delptr) + +/* HASH_FSCK checks hash integrity on every add/delete when HASH_DEBUG is defined. + * This is for uthash developer only; it compiles away if HASH_DEBUG isn't defined. + */ +#ifdef HASH_DEBUG +#define HASH_OOPS(...) do { fprintf(stderr,__VA_ARGS__); exit(-1); } while (0) +#define HASH_FSCK(hh,head) \ +do { \ + unsigned _bkt_i; \ + unsigned _count, _bkt_count; \ + char *_prev; \ + struct UT_hash_handle *_thh; \ + if (head) { \ + _count = 0; \ + for( _bkt_i = 0; _bkt_i < (head)->hh.tbl->num_buckets; _bkt_i++) { \ + _bkt_count = 0; \ + _thh = (head)->hh.tbl->buckets[_bkt_i].hh_head; \ + _prev = NULL; \ + while (_thh) { \ + if (_prev != (char*)(_thh->hh_prev)) { \ + HASH_OOPS("invalid hh_prev %p, actual %p\n", \ + _thh->hh_prev, _prev ); \ + } \ + _bkt_count++; \ + _prev = (char*)(_thh); \ + _thh = _thh->hh_next; \ + } \ + _count += _bkt_count; \ + if ((head)->hh.tbl->buckets[_bkt_i].count != _bkt_count) { \ + HASH_OOPS("invalid bucket count %u, actual %u\n", \ + (head)->hh.tbl->buckets[_bkt_i].count, _bkt_count); \ + } \ + } \ + if (_count != (head)->hh.tbl->num_items) { \ + HASH_OOPS("invalid hh item count %u, actual %u\n", \ + (head)->hh.tbl->num_items, _count ); \ + } \ + /* traverse hh in app order; check next/prev integrity, count */ \ + _count = 0; \ + _prev = NULL; \ + _thh = &(head)->hh; \ + while (_thh) { \ + _count++; \ + if (_prev !=(char*)(_thh->prev)) { \ + HASH_OOPS("invalid prev %p, actual %p\n", \ + _thh->prev, _prev ); \ + } \ + _prev = (char*)ELMT_FROM_HH((head)->hh.tbl, _thh); \ + _thh = ( _thh->next ? (UT_hash_handle*)((char*)(_thh->next) + \ + (head)->hh.tbl->hho) : NULL ); \ + } \ + if (_count != (head)->hh.tbl->num_items) { \ + HASH_OOPS("invalid app item count %u, actual %u\n", \ + (head)->hh.tbl->num_items, _count ); \ + } \ + } \ +} while (0) +#else +#define HASH_FSCK(hh,head) +#endif + +/* When compiled with -DHASH_EMIT_KEYS, length-prefixed keys are emitted to + * the descriptor to which this macro is defined for tuning the hash function. + * The app can #include to get the prototype for write(2). */ +#ifdef HASH_EMIT_KEYS +#define HASH_EMIT_KEY(hh,head,keyptr,fieldlen) \ +do { \ + unsigned _klen = fieldlen; \ + write(HASH_EMIT_KEYS, &_klen, sizeof(_klen)); \ + write(HASH_EMIT_KEYS, keyptr, fieldlen); \ +} while (0) +#else +#define HASH_EMIT_KEY(hh,head,keyptr,fieldlen) +#endif + +/* default to Jenkin's hash unless overridden e.g. DHASH_FUNCTION=HASH_SAX */ +#ifdef HASH_FUNCTION +#define HASH_FCN HASH_FUNCTION +#else +#define HASH_FCN HASH_JEN +#endif + +/* The Bernstein hash function, used in Perl prior to v5.6 */ +#define HASH_BER(key,keylen,num_bkts,hashv,bkt) \ +do { \ + unsigned _hb_keylen=keylen; \ + char *_hb_key=(char*)(key); \ + (hashv) = 0; \ + while (_hb_keylen--) { (hashv) = ((hashv) * 33) + *_hb_key++; } \ + bkt = (hashv) & (num_bkts-1); \ +} while (0) + + +/* SAX/FNV/OAT/JEN hash functions are macro variants of those listed at + * http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx */ +#define HASH_SAX(key,keylen,num_bkts,hashv,bkt) \ +do { \ + unsigned _sx_i; \ + char *_hs_key=(char*)(key); \ + hashv = 0; \ + for(_sx_i=0; _sx_i < keylen; _sx_i++) \ + hashv ^= (hashv << 5) + (hashv >> 2) + _hs_key[_sx_i]; \ + bkt = hashv & (num_bkts-1); \ +} while (0) + +#define HASH_FNV(key,keylen,num_bkts,hashv,bkt) \ +do { \ + unsigned _fn_i; \ + char *_hf_key=(char*)(key); \ + hashv = 2166136261UL; \ + for(_fn_i=0; _fn_i < keylen; _fn_i++) \ + hashv = (hashv * 16777619) ^ _hf_key[_fn_i]; \ + bkt = hashv & (num_bkts-1); \ +} while(0) + +#define HASH_OAT(key,keylen,num_bkts,hashv,bkt) \ +do { \ + unsigned _ho_i; \ + char *_ho_key=(char*)(key); \ + hashv = 0; \ + for(_ho_i=0; _ho_i < keylen; _ho_i++) { \ + hashv += _ho_key[_ho_i]; \ + hashv += (hashv << 10); \ + hashv ^= (hashv >> 6); \ + } \ + hashv += (hashv << 3); \ + hashv ^= (hashv >> 11); \ + hashv += (hashv << 15); \ + bkt = hashv & (num_bkts-1); \ +} while(0) + +#define HASH_JEN_MIX(a,b,c) \ +do { \ + a -= b; a -= c; a ^= ( c >> 13 ); \ + b -= c; b -= a; b ^= ( a << 8 ); \ + c -= a; c -= b; c ^= ( b >> 13 ); \ + a -= b; a -= c; a ^= ( c >> 12 ); \ + b -= c; b -= a; b ^= ( a << 16 ); \ + c -= a; c -= b; c ^= ( b >> 5 ); \ + a -= b; a -= c; a ^= ( c >> 3 ); \ + b -= c; b -= a; b ^= ( a << 10 ); \ + c -= a; c -= b; c ^= ( b >> 15 ); \ +} while (0) + +#define HASH_JEN(key,keylen,num_bkts,hashv,bkt) \ +do { \ + unsigned _hj_i,_hj_j,_hj_k; \ + char *_hj_key=(char*)(key); \ + hashv = 0xfeedbeef; \ + _hj_i = _hj_j = 0x9e3779b9; \ + _hj_k = keylen; \ + while (_hj_k >= 12) { \ + _hj_i += (_hj_key[0] + ( (unsigned)_hj_key[1] << 8 ) \ + + ( (unsigned)_hj_key[2] << 16 ) \ + + ( (unsigned)_hj_key[3] << 24 ) ); \ + _hj_j += (_hj_key[4] + ( (unsigned)_hj_key[5] << 8 ) \ + + ( (unsigned)_hj_key[6] << 16 ) \ + + ( (unsigned)_hj_key[7] << 24 ) ); \ + hashv += (_hj_key[8] + ( (unsigned)_hj_key[9] << 8 ) \ + + ( (unsigned)_hj_key[10] << 16 ) \ + + ( (unsigned)_hj_key[11] << 24 ) ); \ + \ + HASH_JEN_MIX(_hj_i, _hj_j, hashv); \ + \ + _hj_key += 12; \ + _hj_k -= 12; \ + } \ + hashv += keylen; \ + switch ( _hj_k ) { \ + case 11: hashv += ( (unsigned)_hj_key[10] << 24 ); \ + /* FALLTHRU */ \ + case 10: hashv += ( (unsigned)_hj_key[9] << 16 ); \ + /* FALLTHRU */ \ + case 9: hashv += ( (unsigned)_hj_key[8] << 8 ); \ + /* FALLTHRU */ \ + case 8: _hj_j += ( (unsigned)_hj_key[7] << 24 ); \ + /* FALLTHRU */ \ + case 7: _hj_j += ( (unsigned)_hj_key[6] << 16 ); \ + /* FALLTHRU */ \ + case 6: _hj_j += ( (unsigned)_hj_key[5] << 8 ); \ + /* FALLTHRU */ \ + case 5: _hj_j += _hj_key[4]; \ + /* FALLTHRU */ \ + case 4: _hj_i += ( (unsigned)_hj_key[3] << 24 ); \ + /* FALLTHRU */ \ + case 3: _hj_i += ( (unsigned)_hj_key[2] << 16 ); \ + /* FALLTHRU */ \ + case 2: _hj_i += ( (unsigned)_hj_key[1] << 8 ); \ + /* FALLTHRU */ \ + case 1: _hj_i += _hj_key[0]; \ + /* FALLTHRU */ \ + default: break; \ + } \ + HASH_JEN_MIX(_hj_i, _hj_j, hashv); \ + bkt = hashv & (num_bkts-1); \ +} while(0) + +/* The Paul Hsieh hash function */ +#undef get16bits +#if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) \ + || defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__) +#define get16bits(d) (*((const uint16_t *) (d))) +#endif + +#if !defined (get16bits) +#define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8) \ + +(uint32_t)(((const uint8_t *)(d))[0]) ) +#endif +#define HASH_SFH(key,keylen,num_bkts,hashv,bkt) \ +do { \ + char *_sfh_key=(char*)(key); \ + uint32_t _sfh_tmp, _sfh_len = keylen; \ + \ + int _sfh_rem = _sfh_len & 3; \ + _sfh_len >>= 2; \ + hashv = 0xcafebabe; \ + \ + /* Main loop */ \ + for (;_sfh_len > 0; _sfh_len--) { \ + hashv += get16bits (_sfh_key); \ + _sfh_tmp = (get16bits (_sfh_key+2) << 11) ^ hashv; \ + hashv = (hashv << 16) ^ _sfh_tmp; \ + _sfh_key += 2*sizeof (uint16_t); \ + hashv += hashv >> 11; \ + } \ + \ + /* Handle end cases */ \ + switch (_sfh_rem) { \ + case 3: hashv += get16bits (_sfh_key); \ + hashv ^= hashv << 16; \ + hashv ^= _sfh_key[sizeof (uint16_t)] << 18; \ + hashv += hashv >> 11; \ + break; \ + case 2: hashv += get16bits (_sfh_key); \ + hashv ^= hashv << 11; \ + hashv += hashv >> 17; \ + break; \ + case 1: hashv += *_sfh_key; \ + hashv ^= hashv << 10; \ + hashv += hashv >> 1; \ + break; \ + default: break; \ + } \ + \ + /* Force "avalanching" of final 127 bits */ \ + hashv ^= hashv << 3; \ + hashv += hashv >> 5; \ + hashv ^= hashv << 4; \ + hashv += hashv >> 17; \ + hashv ^= hashv << 25; \ + hashv += hashv >> 6; \ + bkt = hashv & (num_bkts-1); \ +} while(0) + +#ifdef HASH_USING_NO_STRICT_ALIASING +/* The MurmurHash exploits some CPU's (e.g. x86) tolerance for unaligned reads. + * For other types of CPU's (e.g. Sparc) an unaligned read causes a bus error. + * So MurmurHash comes in two versions, the faster unaligned one and the slower + * aligned one. We only use the faster one on CPU's where we know it's safe. + * + * Note the preprocessor built-in defines can be emitted using: + * + * gcc -m64 -dM -E - < /dev/null (on gcc) + * cc -## a.c (where a.c is a simple test file) (Sun Studio) + */ +#if (defined(__i386__) || defined(__x86_64__)) +#define HASH_MUR HASH_MUR_UNALIGNED +#else +#define HASH_MUR HASH_MUR_ALIGNED +#endif + +/* Appleby's MurmurHash fast version for unaligned-tolerant archs like i386 */ +#define HASH_MUR_UNALIGNED(key,keylen,num_bkts,hashv,bkt) \ +do { \ + const unsigned int _mur_m = 0x5bd1e995; \ + const int _mur_r = 24; \ + hashv = 0xcafebabe ^ keylen; \ + char *_mur_key = (char *)(key); \ + uint32_t _mur_tmp, _mur_len = keylen; \ + \ + for (;_mur_len >= 4; _mur_len-=4) { \ + _mur_tmp = *(uint32_t *)_mur_key; \ + _mur_tmp *= _mur_m; \ + _mur_tmp ^= _mur_tmp >> _mur_r; \ + _mur_tmp *= _mur_m; \ + hashv *= _mur_m; \ + hashv ^= _mur_tmp; \ + _mur_key += 4; \ + } \ + \ + switch(_mur_len) \ + { \ + case 3: hashv ^= _mur_key[2] << 16; \ + /* FALLTHRU */ \ + case 2: hashv ^= _mur_key[1] << 8; \ + /* FALLTHRU */ \ + case 1: hashv ^= _mur_key[0]; \ + hashv *= _mur_m; \ + /* FALLTHRU */ \ + default: break; \ + }; \ + \ + hashv ^= hashv >> 13; \ + hashv *= _mur_m; \ + hashv ^= hashv >> 15; \ + \ + bkt = hashv & (num_bkts-1); \ +} while(0) + +/* Appleby's MurmurHash version for alignment-sensitive archs like Sparc */ +#define HASH_MUR_ALIGNED(key,keylen,num_bkts,hashv,bkt) \ +do { \ + const unsigned int _mur_m = 0x5bd1e995; \ + const int _mur_r = 24; \ + hashv = 0xcafebabe ^ (keylen); \ + char *_mur_key = (char *)(key); \ + uint32_t _mur_len = keylen; \ + int _mur_align = (int)_mur_key & 3; \ + \ + if (_mur_align && (_mur_len >= 4)) { \ + unsigned _mur_t = 0, _mur_d = 0; \ + switch(_mur_align) { \ + case 1: _mur_t |= _mur_key[2] << 16; \ + /* FALLTHRU */ \ + case 2: _mur_t |= _mur_key[1] << 8; \ + /* FALLTHRU */ \ + case 3: _mur_t |= _mur_key[0]; \ + /* FALLTHRU */ \ + default: break; \ + } \ + _mur_t <<= (8 * _mur_align); \ + _mur_key += 4-_mur_align; \ + _mur_len -= 4-_mur_align; \ + int _mur_sl = 8 * (4-_mur_align); \ + int _mur_sr = 8 * _mur_align; \ + \ + for (;_mur_len >= 4; _mur_len-=4) { \ + _mur_d = *(unsigned *)_mur_key; \ + _mur_t = (_mur_t >> _mur_sr) | (_mur_d << _mur_sl); \ + unsigned _mur_k = _mur_t; \ + _mur_k *= _mur_m; \ + _mur_k ^= _mur_k >> _mur_r; \ + _mur_k *= _mur_m; \ + hashv *= _mur_m; \ + hashv ^= _mur_k; \ + _mur_t = _mur_d; \ + _mur_key += 4; \ + } \ + _mur_d = 0; \ + if(_mur_len >= _mur_align) { \ + switch(_mur_align) { \ + case 3: _mur_d |= _mur_key[2] << 16; \ + /* FALLTHRU */ \ + case 2: _mur_d |= _mur_key[1] << 8; \ + /* FALLTHRU */ \ + case 1: _mur_d |= _mur_key[0]; \ + /* FALLTHRU */ \ + default: break; \ + } \ + unsigned _mur_k = (_mur_t >> _mur_sr) | (_mur_d << _mur_sl); \ + _mur_k *= _mur_m; \ + _mur_k ^= _mur_k >> _mur_r; \ + _mur_k *= _mur_m; \ + hashv *= _mur_m; \ + hashv ^= _mur_k; \ + _mur_k += _mur_align; \ + _mur_len -= _mur_align; \ + \ + switch(_mur_len) \ + { \ + case 3: hashv ^= _mur_key[2] << 16; \ + /* FALLTHRU */ \ + case 2: hashv ^= _mur_key[1] << 8; \ + /* FALLTHRU */ \ + case 1: hashv ^= _mur_key[0]; \ + hashv *= _mur_m; \ + /* FALLTHRU */ \ + default: break; \ + } \ + } else { \ + switch(_mur_len) \ + { \ + case 3: _mur_d ^= _mur_key[2] << 16; \ + /* FALLTHRU */ \ + case 2: _mur_d ^= _mur_key[1] << 8; \ + /* FALLTHRU */ \ + case 1: _mur_d ^= _mur_key[0]; \ + /* FALLTHRU */ \ + case 0: hashv ^= (_mur_t >> _mur_sr) | (_mur_d << _mur_sl); \ + hashv *= _mur_m; \ + /* FALLTHRU */ \ + default: break; \ + } \ + } \ + \ + hashv ^= hashv >> 13; \ + hashv *= _mur_m; \ + hashv ^= hashv >> 15; \ + } else { \ + for (;_mur_len >= 4; _mur_len-=4) { \ + unsigned _mur_k = *(unsigned*)_mur_key; \ + _mur_k *= _mur_m; \ + _mur_k ^= _mur_k >> _mur_r; \ + _mur_k *= _mur_m; \ + hashv *= _mur_m; \ + hashv ^= _mur_k; \ + _mur_key += 4; \ + } \ + switch(_mur_len) \ + { \ + case 3: hashv ^= _mur_key[2] << 16; \ + /* FALLTHRU */ \ + case 2: hashv ^= _mur_key[1] << 8; \ + /* FALLTHRU */ \ + case 1: hashv ^= _mur_key[0]; \ + hashv *= _mur_m; \ + /* FALLTHRU */ \ + default: break; \ + } \ + \ + hashv ^= hashv >> 13; \ + hashv *= _mur_m; \ + hashv ^= hashv >> 15; \ + } \ + bkt = hashv & (num_bkts-1); \ +} while(0) +#endif /* HASH_USING_NO_STRICT_ALIASING */ + +/* key comparison function; return 0 if keys equal */ +#define HASH_KEYCMP(a,b,len) memcmp(a,b,len) + +/* iterate over items in a known bucket to find desired item */ +#define HASH_FIND_IN_BKT(tbl,hh,head,keyptr,keylen_in,out) \ +do { \ + if (head.hh_head) DECLTYPE_ASSIGN(out,ELMT_FROM_HH(tbl,head.hh_head)); \ + else out=NULL; \ + while (out) { \ + if (out->hh.keylen == keylen_in) { \ + if ((HASH_KEYCMP(out->hh.key,keyptr,keylen_in)) == 0) break; \ + } \ + if (out->hh.hh_next) DECLTYPE_ASSIGN(out,ELMT_FROM_HH(tbl,out->hh.hh_next)); \ + else out = NULL; \ + } \ +} while(0) + +/* add an item to a bucket */ +#define HASH_ADD_TO_BKT(head,addhh) \ +do { \ + head.count++; \ + (addhh)->hh_next = head.hh_head; \ + (addhh)->hh_prev = NULL; \ + if (head.hh_head) { (head).hh_head->hh_prev = (addhh); } \ + (head).hh_head=addhh; \ + if (head.count >= ((head.expand_mult+1) * HASH_BKT_CAPACITY_THRESH) \ + && (addhh)->tbl->noexpand != 1) { \ + HASH_EXPAND_BUCKETS((addhh)->tbl); \ + } \ +} while(0) + +/* remove an item from a given bucket */ +#define HASH_DEL_IN_BKT(hh,head,hh_del) \ + (head).count--; \ + if ((head).hh_head == hh_del) { \ + (head).hh_head = hh_del->hh_next; \ + } \ + if (hh_del->hh_prev) { \ + hh_del->hh_prev->hh_next = hh_del->hh_next; \ + } \ + if (hh_del->hh_next) { \ + hh_del->hh_next->hh_prev = hh_del->hh_prev; \ + } + +/* Bucket expansion has the effect of doubling the number of buckets + * and redistributing the items into the new buckets. Ideally the + * items will distribute more or less evenly into the new buckets + * (the extent to which this is true is a measure of the quality of + * the hash function as it applies to the key domain). + * + * With the items distributed into more buckets, the chain length + * (item count) in each bucket is reduced. Thus by expanding buckets + * the hash keeps a bound on the chain length. This bounded chain + * length is the essence of how a hash provides constant time lookup. + * + * The calculation of tbl->ideal_chain_maxlen below deserves some + * explanation. First, keep in mind that we're calculating the ideal + * maximum chain length based on the *new* (doubled) bucket count. + * In fractions this is just n/b (n=number of items,b=new num buckets). + * Since the ideal chain length is an integer, we want to calculate + * ceil(n/b). We don't depend on floating point arithmetic in this + * hash, so to calculate ceil(n/b) with integers we could write + * + * ceil(n/b) = (n/b) + ((n%b)?1:0) + * + * and in fact a previous version of this hash did just that. + * But now we have improved things a bit by recognizing that b is + * always a power of two. We keep its base 2 log handy (call it lb), + * so now we can write this with a bit shift and logical AND: + * + * ceil(n/b) = (n>>lb) + ( (n & (b-1)) ? 1:0) + * + */ +#define HASH_EXPAND_BUCKETS(tbl) \ +do { \ + unsigned _he_bkt; \ + unsigned _he_bkt_i; \ + struct UT_hash_handle *_he_thh, *_he_hh_nxt; \ + UT_hash_bucket *_he_new_buckets, *_he_newbkt; \ + _he_new_buckets = (UT_hash_bucket*)uthash_malloc( \ + 2 * tbl->num_buckets * sizeof(struct UT_hash_bucket)); \ + if (!_he_new_buckets) { uthash_fatal( "out of memory"); } \ + memset(_he_new_buckets, 0, \ + 2 * tbl->num_buckets * sizeof(struct UT_hash_bucket)); \ + tbl->ideal_chain_maxlen = \ + (tbl->num_items >> (tbl->log2_num_buckets+1)) + \ + ((tbl->num_items & ((tbl->num_buckets*2)-1)) ? 1 : 0); \ + tbl->nonideal_items = 0; \ + for(_he_bkt_i = 0; _he_bkt_i < tbl->num_buckets; _he_bkt_i++) \ + { \ + _he_thh = tbl->buckets[ _he_bkt_i ].hh_head; \ + while (_he_thh) { \ + _he_hh_nxt = _he_thh->hh_next; \ + HASH_TO_BKT( _he_thh->hashv, tbl->num_buckets*2, _he_bkt); \ + _he_newbkt = &(_he_new_buckets[ _he_bkt ]); \ + if (++(_he_newbkt->count) > tbl->ideal_chain_maxlen) { \ + tbl->nonideal_items++; \ + _he_newbkt->expand_mult = _he_newbkt->count / \ + tbl->ideal_chain_maxlen; \ + } \ + _he_thh->hh_prev = NULL; \ + _he_thh->hh_next = _he_newbkt->hh_head; \ + if (_he_newbkt->hh_head) _he_newbkt->hh_head->hh_prev = \ + _he_thh; \ + _he_newbkt->hh_head = _he_thh; \ + _he_thh = _he_hh_nxt; \ + } \ + } \ + uthash_free( tbl->buckets, tbl->num_buckets*sizeof(struct UT_hash_bucket) ); \ + tbl->num_buckets *= 2; \ + tbl->log2_num_buckets++; \ + tbl->buckets = _he_new_buckets; \ + tbl->ineff_expands = (tbl->nonideal_items > (tbl->num_items >> 1)) ? \ + (tbl->ineff_expands+1) : 0; \ + if (tbl->ineff_expands > 1) { \ + tbl->noexpand=1; \ + uthash_noexpand_fyi(tbl); \ + } \ + uthash_expand_fyi(tbl); \ +} while(0) + + +/* This is an adaptation of Simon Tatham's O(n log(n)) mergesort */ +/* Note that HASH_SORT assumes the hash handle name to be hh. + * HASH_SRT was added to allow the hash handle name to be passed in. */ +#define HASH_SORT(head,cmpfcn) HASH_SRT(hh,head,cmpfcn) +#define HASH_SRT(hh,head,cmpfcn) \ +do { \ + unsigned _hs_i; \ + unsigned _hs_looping,_hs_nmerges,_hs_insize,_hs_psize,_hs_qsize; \ + struct UT_hash_handle *_hs_p, *_hs_q, *_hs_e, *_hs_list, *_hs_tail; \ + if (head) { \ + _hs_insize = 1; \ + _hs_looping = 1; \ + _hs_list = &((head)->hh); \ + while (_hs_looping) { \ + _hs_p = _hs_list; \ + _hs_list = NULL; \ + _hs_tail = NULL; \ + _hs_nmerges = 0; \ + while (_hs_p) { \ + _hs_nmerges++; \ + _hs_q = _hs_p; \ + _hs_psize = 0; \ + for ( _hs_i = 0; _hs_i < _hs_insize; _hs_i++ ) { \ + _hs_psize++; \ + _hs_q = (UT_hash_handle*)((_hs_q->next) ? \ + ((void*)((char*)(_hs_q->next) + \ + (head)->hh.tbl->hho)) : NULL); \ + if (! (_hs_q) ) break; \ + } \ + _hs_qsize = _hs_insize; \ + while ((_hs_psize > 0) || ((_hs_qsize > 0) && _hs_q )) { \ + if (_hs_psize == 0) { \ + _hs_e = _hs_q; \ + _hs_q = (UT_hash_handle*)((_hs_q->next) ? \ + ((void*)((char*)(_hs_q->next) + \ + (head)->hh.tbl->hho)) : NULL); \ + _hs_qsize--; \ + } else if ( (_hs_qsize == 0) || !(_hs_q) ) { \ + _hs_e = _hs_p; \ + _hs_p = (UT_hash_handle*)((_hs_p->next) ? \ + ((void*)((char*)(_hs_p->next) + \ + (head)->hh.tbl->hho)) : NULL); \ + _hs_psize--; \ + } else if (( \ + cmpfcn(DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl,_hs_p)), \ + DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl,_hs_q))) \ + ) <= 0) { \ + _hs_e = _hs_p; \ + _hs_p = (UT_hash_handle*)((_hs_p->next) ? \ + ((void*)((char*)(_hs_p->next) + \ + (head)->hh.tbl->hho)) : NULL); \ + _hs_psize--; \ + } else { \ + _hs_e = _hs_q; \ + _hs_q = (UT_hash_handle*)((_hs_q->next) ? \ + ((void*)((char*)(_hs_q->next) + \ + (head)->hh.tbl->hho)) : NULL); \ + _hs_qsize--; \ + } \ + if ( _hs_tail ) { \ + _hs_tail->next = ((_hs_e) ? \ + ELMT_FROM_HH((head)->hh.tbl,_hs_e) : NULL); \ + } else { \ + _hs_list = _hs_e; \ + } \ + _hs_e->prev = ((_hs_tail) ? \ + ELMT_FROM_HH((head)->hh.tbl,_hs_tail) : NULL); \ + _hs_tail = _hs_e; \ + } \ + _hs_p = _hs_q; \ + } \ + _hs_tail->next = NULL; \ + if ( _hs_nmerges <= 1 ) { \ + _hs_looping=0; \ + (head)->hh.tbl->tail = _hs_tail; \ + DECLTYPE_ASSIGN(head,ELMT_FROM_HH((head)->hh.tbl, _hs_list)); \ + } \ + _hs_insize *= 2; \ + } \ + HASH_FSCK(hh,head); \ + } \ +} while (0) + +/* This function selects items from one hash into another hash. + * The end result is that the selected items have dual presence + * in both hashes. There is no copy of the items made; rather + * they are added into the new hash through a secondary hash + * hash handle that must be present in the structure. */ +#define HASH_SELECT(hh_dst, dst, hh_src, src, cond) \ +do { \ + unsigned _src_bkt, _dst_bkt; \ + void *_last_elt=NULL, *_elt; \ + UT_hash_handle *_src_hh, *_dst_hh, *_last_elt_hh=NULL; \ + ptrdiff_t _dst_hho = ((char*)(&(dst)->hh_dst) - (char*)(dst)); \ + if (src) { \ + for(_src_bkt=0; _src_bkt < (src)->hh_src.tbl->num_buckets; _src_bkt++) { \ + for(_src_hh = (src)->hh_src.tbl->buckets[_src_bkt].hh_head; \ + _src_hh; \ + _src_hh = _src_hh->hh_next) { \ + _elt = ELMT_FROM_HH((src)->hh_src.tbl, _src_hh); \ + if (cond(_elt)) { \ + _dst_hh = (UT_hash_handle*)(((char*)_elt) + _dst_hho); \ + _dst_hh->key = _src_hh->key; \ + _dst_hh->keylen = _src_hh->keylen; \ + _dst_hh->hashv = _src_hh->hashv; \ + _dst_hh->prev = _last_elt; \ + _dst_hh->next = NULL; \ + if (_last_elt_hh) { _last_elt_hh->next = _elt; } \ + if (!dst) { \ + DECLTYPE_ASSIGN(dst,_elt); \ + HASH_MAKE_TABLE(hh_dst,dst); \ + } else { \ + _dst_hh->tbl = (dst)->hh_dst.tbl; \ + } \ + HASH_TO_BKT(_dst_hh->hashv, _dst_hh->tbl->num_buckets, _dst_bkt); \ + HASH_ADD_TO_BKT(_dst_hh->tbl->buckets[_dst_bkt],_dst_hh); \ + (dst)->hh_dst.tbl->num_items++; \ + _last_elt = _elt; \ + _last_elt_hh = _dst_hh; \ + } \ + } \ + } \ + } \ + HASH_FSCK(hh_dst,dst); \ +} while (0) + +#define HASH_CLEAR(hh,head) \ +do { \ + if (head) { \ + uthash_free((head)->hh.tbl->buckets, \ + (head)->hh.tbl->num_buckets*sizeof(struct UT_hash_bucket)); \ + uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \ + (head)=NULL; \ + } \ +} while(0) + +#ifdef NO_DECLTYPE +#define HASH_ITER(hh,head,el,tmp) \ +for((el)=(head), (*(char**)(&(tmp)))=(char*)((head)?(head)->hh.next:NULL); \ + el; (el)=(tmp),(*(char**)(&(tmp)))=(char*)((tmp)?(tmp)->hh.next:NULL)) +#else +#define HASH_ITER(hh,head,el,tmp) \ +for((el)=(head),(tmp)=DECLTYPE(el)((head)?(head)->hh.next:NULL); \ + el; (el)=(tmp),(tmp)=DECLTYPE(el)((tmp)?(tmp)->hh.next:NULL)) +#endif + +/* obtain a count of items in the hash */ +#define HASH_COUNT(head) HASH_CNT(hh,head) +#define HASH_CNT(hh,head) ((head)?((head)->hh.tbl->num_items):0) + +typedef struct UT_hash_bucket { + struct UT_hash_handle *hh_head; + unsigned count; + + /* expand_mult is normally set to 0. In this situation, the max chain length + * threshold is enforced at its default value, HASH_BKT_CAPACITY_THRESH. (If + * the bucket's chain exceeds this length, bucket expansion is triggered). + * However, setting expand_mult to a non-zero value delays bucket expansion + * (that would be triggered by additions to this particular bucket) + * until its chain length reaches a *multiple* of HASH_BKT_CAPACITY_THRESH. + * (The multiplier is simply expand_mult+1). The whole idea of this + * multiplier is to reduce bucket expansions, since they are expensive, in + * situations where we know that a particular bucket tends to be overused. + * It is better to let its chain length grow to a longer yet-still-bounded + * value, than to do an O(n) bucket expansion too often. + */ + unsigned expand_mult; + +} UT_hash_bucket; + +/* random signature used only to find hash tables in external analysis */ +#define HASH_SIGNATURE 0xa0111fe1 +#define HASH_BLOOM_SIGNATURE 0xb12220f2 + +typedef struct UT_hash_table { + UT_hash_bucket *buckets; + unsigned num_buckets, log2_num_buckets; + unsigned num_items; + struct UT_hash_handle *tail; /* tail hh in app order, for fast append */ + ptrdiff_t hho; /* hash handle offset (byte pos of hash handle in element */ + + /* in an ideal situation (all buckets used equally), no bucket would have + * more than ceil(#items/#buckets) items. that's the ideal chain length. */ + unsigned ideal_chain_maxlen; + + /* nonideal_items is the number of items in the hash whose chain position + * exceeds the ideal chain maxlen. these items pay the penalty for an uneven + * hash distribution; reaching them in a chain traversal takes >ideal steps */ + unsigned nonideal_items; + + /* ineffective expands occur when a bucket doubling was performed, but + * afterward, more than half the items in the hash had nonideal chain + * positions. If this happens on two consecutive expansions we inhibit any + * further expansion, as it's not helping; this happens when the hash + * function isn't a good fit for the key domain. When expansion is inhibited + * the hash will still work, albeit no longer in constant time. */ + unsigned ineff_expands, noexpand; + + uint32_t signature; /* used only to find hash tables in external analysis */ +#ifdef HASH_BLOOM + uint32_t bloom_sig; /* used only to test bloom exists in external analysis */ + uint8_t *bloom_bv; + char bloom_nbits; +#endif + +} UT_hash_table; + +typedef struct UT_hash_handle { + struct UT_hash_table *tbl; + void *prev; /* prev element in app order */ + void *next; /* next element in app order */ + struct UT_hash_handle *hh_prev; /* previous hh in bucket order */ + struct UT_hash_handle *hh_next; /* next hh in bucket order */ + void *key; /* ptr to enclosing struct's key */ + unsigned keylen; /* enclosing struct's key len */ + unsigned hashv; /* result of hash-fcn(key) */ +} UT_hash_handle; + +#pragma GCC visibility pop + +#endif /* UTHASH_H */ diff --git a/src/common/utils.c b/src/common/utils.c new file mode 100644 index 0000000..e87e5be --- /dev/null +++ b/src/common/utils.c @@ -0,0 +1,769 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2020-2020 Federal University of Rio Grande do Sul (UFRGS) + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include +#ifdef HAVE_UNISTD_H +#include +#endif +#include +#include + +#if defined(_WIN32) && !defined(__CYGWIN__) +#include +#include +#define mkdir(path, mode) mkdir(path) +#if !defined(__MINGW32__) +#define ftruncate(fd, length) _chsize(fd, length) +#endif +#endif + +#ifndef O_BINARY +#define O_BINARY 0 +#endif + +#if !defined(O_DIRECT) && defined(F_NOCACHE) +#define O_DIRECT F_NOCACHE +#endif + +#ifndef O_DIRECT +#define O_DIRECT 0 +#endif + +int _starpu_silent; + +void _starpu_util_init(void) +{ + _starpu_silent = starpu_getenv_number_default("STARPU_SILENT", 0); + STARPU_HG_DISABLE_CHECKING(_starpu_silent); +} + +#if defined(_WIN32) && !defined(__CYGWIN__) && !defined(__MINGW32__) +#include +static char * dirname(char * path) +{ + char drive[_MAX_DRIVE]; + char dir[_MAX_DIR]; + /* Remove trailing slash */ + while (strlen(path) > 0 && (*(path+strlen(path)-1) == '/' || *(path+strlen(path)-1) == '\\')) + *(path+strlen(path)-1) = '\0'; + _splitpath(path, drive, dir, NULL, NULL); + _makepath(path, drive, dir, NULL, NULL); + return path; +} +#else +#include +#endif + +/* Function with behaviour like `mkdir -p'. This function was adapted from + * http://niallohiggins.com/2009/01/08/mkpath-mkdir-p-alike-in-c-for-unix/ */ + +int _starpu_mkpath(const char *s, mode_t mode) +{ + int olderrno; + char *q, *r = NULL, *path = NULL, *up = NULL; + int rv = -1; + + while (s[0] == '/' && s[1] == '/') + s++; + + if (strcmp(s, ".") == 0 || strcmp(s, "/") == 0 +#if defined(_WIN32) + /* C:/ or C:\ */ + || (s[0] && s[1] == ':' && (s[2] == '/' || s[2] == '\\') && !s[3]) + || strcmp(s, "\\") == 0 +#endif + ) + return 0; + + if ((path = strdup(s)) == NULL) + STARPU_ABORT(); + + if ((q = strdup(s)) == NULL) + STARPU_ABORT(); + + if ((r = dirname(q)) == NULL) + goto out; + + if ((up = strdup(r)) == NULL) + STARPU_ABORT(); + + if ((_starpu_mkpath(up, mode) == -1) && (errno != EEXIST)) + goto out; + + struct stat sb; + if (stat(path, &sb) == 0) + { + if (!S_ISDIR(sb.st_mode)) + { + _STARPU_MSG("Error: %s already exists and is not a directory:\n", path); + STARPU_ABORT(); + } + /* It already exists and is a directory. */ + rv = 0; + } + else + { + if ((mkdir(path, mode) == -1) && (errno != EEXIST)) + rv = -1; + else + rv = 0; + } + +out: + olderrno = errno; + if (up) + free(up); + + free(q); + free(path); + errno = olderrno; + return rv; +} + +void _starpu_mkpath_and_check(const char *path, mode_t mode) +{ + int ret; + + ret = _starpu_mkpath(path, mode); + + if (ret == -1 && errno != EEXIST) + { + _STARPU_MSG("Error making StarPU directory %s:\n", path); + perror("mkdir"); + STARPU_ABORT(); + } +} + +char *_starpu_mkdtemp_internal(char *tmpl) +{ + int len = (int)strlen(tmpl); + int i; + int count = 1; + int ret; + + int first_letter = (int)'a'; + int nb_letters = 25; + int len_template = 6; + + // Initialize template + for(i=len-len_template ; i0 && (host = strtok(NULL, " "))); + if(rank>=0) + { + _STARPU_MSG("Missing hostnames in STARPU_MPI_HOSTNAMES\n"); + STARPU_ABORT(); + } + } + snprintf(hostname, size-1, "%s", host); + free(srv_hosts); + hostname[size-1] = 0; + } + else if (forced_hostname && forced_hostname[0]) + { + snprintf(hostname, size-1, "%s", forced_hostname); + hostname[size-1] = 0; + } + else + { + char *c; + gethostname(hostname, size-1); + hostname[size-1] = 0; + c = strchr(hostname, '.'); + if (c) + *c = 0; + } +} + +void starpu_sleep(float nb_sec) +{ +#ifdef STARPU_SIMGRID +# ifdef HAVE_SG_ACTOR_SLEEP_FOR + sg_actor_sleep_for(nb_sec); +# else + MSG_process_sleep(nb_sec); +# endif +#elif defined(STARPU_HAVE_WINDOWS) + Sleep(nb_sec * 1000); +#else + struct timespec req, rem; + + req.tv_sec = nb_sec; + req.tv_nsec = (nb_sec - (float) req.tv_sec) * 1000000000; + while (nanosleep(&req, &rem)) + req = rem; +#endif +} + +void starpu_usleep(float nb_micro_sec) +{ +#ifdef STARPU_SIMGRID +# ifdef HAVE_SG_ACTOR_SLEEP_FOR + sg_actor_sleep_for(nb_micro_sec / 1000000); +# else + MSG_process_sleep(nb_micro_sec / 1000000); +# endif +#elif defined(STARPU_HAVE_WINDOWS) + Sleep(nb_micro_sec / 1000); +#elif HAVE_UNISTD_H + usleep(nb_micro_sec); +#else +#error no implementation of usleep +#endif +} + +char *starpu_getenv(const char *str) +{ +#ifndef STARPU_SIMGRID +#if defined(STARPU_DEVEL) || defined(STARPU_DEBUG) + struct _starpu_worker * worker; + + worker = _starpu_get_local_worker_key(); + + if (worker && worker->worker_is_initialized) + _STARPU_DISP("getenv should not be called from running workers, only for main() or worker initialization, since it is not reentrant\n"); +#endif +#endif + return getenv(str); +} + +static int _strings_ncmp(const char *strings[], const char *str) +{ + int pos = 0; + while (strings[pos]) + { + if ((strlen(str) == strlen(strings[pos]) && strncasecmp(str, strings[pos], strlen(strings[pos])) == 0)) + break; + pos++; + } + if (strings[pos] == NULL) + return -1; + return pos; +} + +int starpu_get_env_string_var_default(const char *str, const char *strings[], int defvalue) +{ + int val; + char *strval; + + strval = starpu_getenv(str); + if (!strval) + { + val = defvalue; + } + else + { + val = _strings_ncmp(strings, strval); + if (val < 0) + { + int i; + _STARPU_MSG("\n"); + _STARPU_MSG("Invalid value '%s' for environment variable '%s'\n", strval, str); + _STARPU_MSG("Valid values are:\n"); + for(i=0;strings[i]!=NULL;i++) _STARPU_MSG("\t%s\n",strings[i]); + _STARPU_MSG("\n"); + STARPU_ABORT(); + } + } + return val; +} + +static void remove_spaces(char *str) +{ + int i = 0; + int j = 0; + + while (str[j] != '\0') + { + if (isspace(str[j])) + { + j++; + continue; + } + if (j > i) + { + str[i] = str[j]; + } + i++; + j++; + } + if (j > i) + { + str[i] = str[j]; + } +} + +int starpu_get_env_size_default(const char *str, int defval) +{ + int val; + char *strval; + + strval = starpu_getenv(str); + if (!strval) + { + val = defval; + } + else + { + char *value = strdup(strval); + if (value == NULL) + _STARPU_ERROR("memory allocation failed\n"); + remove_spaces(value); + if (value[0] == '\0') + { + free(value); + val = defval; + } + else + { + char *endptr = NULL; + int mult = 1024; + errno = 0; + int v = (int)strtol(value, &endptr, 10); + if (errno != 0) + _STARPU_ERROR("could not parse environment variable '%s' with value '%s', strtol failed with error %s\n", str, value, strerror(errno)); + if (*endptr != '\0') + { + switch (*endptr) + { + case 'b': + case 'B': mult = 1; break; + case 'k': + case 'K': mult = 1024; break; + case 'm': + case 'M': mult = 1024*1024; break; + case 'g': + case 'G': mult = 1024*1024*1024; break; + default: + _STARPU_ERROR("could not parse environment variable '%s' with value '%s' size suffix invalid\n", str, value); + } + } + val = v*mult; + free(value); + } + } + return val; +} + +void starpu_display_bindings(void) +{ +#if defined(STARPU_HAVE_HWLOC) && !defined(STARPU_SIMGRID) + int value = starpu_getenv_number_default("STARPU_DISPLAY_BINDINGS", 0); + int ret = 0; + if (value == 2) + ret = system("lstopo --ps -"); + else + ret = system("hwloc-ps -a -t -c"); + if (ret) + { + _STARPU_DISP("%s returned %d\n", value==2?"lstopo":"hwloc-ps", ret); + fflush(stderr); + } + fflush(stdout); +#else + _STARPU_DISP("hwloc not available to display bindings.\n"); +#endif +} diff --git a/src/common/utils.h b/src/common/utils.h new file mode 100644 index 0000000..162166d --- /dev/null +++ b/src/common/utils.h @@ -0,0 +1,215 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __COMMON_UTILS_H__ +#define __COMMON_UTILS_H__ + +/** @file */ + +#include +#include +#include +#include +#include +#ifdef HAVE_UNISTD_H +#include +#endif +#include +#ifdef STARPU_HAVE_SCHED_YIELD +#include +#endif +#include + +#ifdef STARPU_HAVE_HELGRIND_H +#include +#endif + +#pragma GCC visibility push(hidden) + +#define _STARPU_STRINGIFY_(x) #x +#define _STARPU_STRINGIFY(x) _STARPU_STRINGIFY_(x) + +#ifndef DO_CREQ_v_WW +#define DO_CREQ_v_WW(_creqF, _ty1F, _arg1F, _ty2F, _arg2F) ((void)0) +#endif +#ifndef DO_CREQ_v_W +#define DO_CREQ_v_W(_creqF, _ty1F, _arg1F) ((void)0) +#endif +#ifndef ANNOTATE_HAPPENS_BEFORE +#define ANNOTATE_HAPPENS_BEFORE(obj) ((void)0) +#endif +#ifndef ANNOTATE_HAPPENS_BEFORE_FORGET_ALL +#define ANNOTATE_HAPPENS_BEFORE_FORGET_ALL(obj) ((void)0) +#endif +#ifndef ANNOTATE_HAPPENS_AFTER +#define ANNOTATE_HAPPENS_AFTER(obj) ((void)0) +#endif +#ifndef VALGRIND_HG_DISABLE_CHECKING +#define VALGRIND_HG_DISABLE_CHECKING(start, len) ((void)0) +#endif +#ifndef VALGRIND_HG_ENABLE_CHECKING +#define VALGRIND_HG_ENABLE_CHECKING(start, len) ((void)0) +#endif +#ifndef VALGRIND_STACK_REGISTER +#define VALGRIND_STACK_REGISTER(stackbottom, stacktop) 0 +#endif +#ifndef VALGRIND_STACK_DEREGISTER +#define VALGRIND_STACK_DEREGISTER(id) ((void)0) +#endif +#ifndef RUNNING_ON_VALGRIND +#define RUNNING_ON_VALGRIND 0 +#endif +#ifdef STARPU_SANITIZE_THREAD +#define STARPU_RUNNING_ON_VALGRIND 1 +#else +#define STARPU_RUNNING_ON_VALGRIND RUNNING_ON_VALGRIND +#endif +#define STARPU_HG_DISABLE_CHECKING(variable) VALGRIND_HG_DISABLE_CHECKING(&(variable), sizeof(variable)) +#define STARPU_HG_ENABLE_CHECKING(variable) VALGRIND_HG_ENABLE_CHECKING(&(variable), sizeof(variable)) + +#define STARPU_DEBUG_PREFIX "[starpu]" + +/* This is needed in some places to make valgrind yield to another thread to be + * able to progress. */ +#if defined(__i386__) || defined(__x86_64__) +#define _STARPU_UYIELD() __asm__ __volatile("rep; nop") +#else +#define _STARPU_UYIELD() ((void)0) +#endif +#if defined(STARPU_HAVE_SCHED_YIELD) && defined(STARPU_HAVE_HELGRIND_H) +#define STARPU_VALGRIND_YIELD() do { if (STARPU_RUNNING_ON_VALGRIND) sched_yield(); } while (0) +#define STARPU_UYIELD() do { if (STARPU_RUNNING_ON_VALGRIND) sched_yield(); else _STARPU_UYIELD(); } while (0) +#else +#define STARPU_VALGRIND_YIELD() do { } while (0) +#define STARPU_UYIELD() _STARPU_UYIELD() +#endif + +#ifdef STARPU_VERBOSE +# define _STARPU_DEBUG(fmt, ...) do { if (!_starpu_silent) {fprintf(stderr, STARPU_DEBUG_PREFIX"[%s] " fmt ,__starpu_func__ ,## __VA_ARGS__); fflush(stderr); }} while(0) +# define _STARPU_DEBUG_NO_HEADER(fmt, ...) do { if (!_starpu_silent) {fprintf(stderr, fmt , ## __VA_ARGS__); fflush(stderr); }} while(0) +#else +# define _STARPU_DEBUG(fmt, ...) do { } while (0) +# define _STARPU_DEBUG_NO_HEADER(fmt, ...) do { } while (0) +#endif + +#ifdef STARPU_EXTRA_VERBOSE +# define _STARPU_EXTRA_DEBUG(fmt, ...) do { if (!_starpu_silent) {fprintf(stderr, STARPU_DEBUG_PREFIX"[%s] " fmt ,__starpu_func__ ,## __VA_ARGS__); fflush(stderr); }} while(0) +#else +# define _STARPU_EXTRA_DEBUG(fmt, ...) do { } while (0) +#endif + +#ifdef STARPU_EXTRA_VERBOSE +# define _STARPU_LOG_IN() do { if (!_starpu_silent) {fprintf(stderr, STARPU_DEBUG_PREFIX"[%ld][%s:%s@%d] -->\n", starpu_pthread_self(), __starpu_func__,__FILE__, __LINE__); }} while(0) +# define _STARPU_LOG_OUT() do { if (!_starpu_silent) {fprintf(stderr, STARPU_DEBUG_PREFIX"[%ld][%s:%s@%d] <--\n", starpu_pthread_self(), __starpu_func__, __FILE__, __LINE__); }} while(0) +# define _STARPU_LOG_OUT_TAG(outtag) do { if (!_starpu_silent) {fprintf(stderr, STARPU_DEBUG_PREFIX"[%ld][%s:%s@%d] <-- (%s)\n", starpu_pthread_self(), __starpu_func__, __FILE__, __LINE__, outtag); }} while(0) +#else +# define _STARPU_LOG_IN() +# define _STARPU_LOG_OUT() +# define _STARPU_LOG_OUT_TAG(outtag) +#endif + +/* TODO: cache */ +#if defined(STARPU_USE_MPI) + +#if !defined HOST_NAME_MAX +#define HOST_NAME_MAX 256 +#endif + +#define _STARPU_MSG(fmt, ...) do { char _msghost[HOST_NAME_MAX]; gethostname(_msghost, HOST_NAME_MAX); fprintf(stderr, STARPU_DEBUG_PREFIX"[%s][%s] " fmt, _msghost, __starpu_func__, ## __VA_ARGS__); } while(0) +#define _STARPU_DISP(fmt, ...) do { if (!_starpu_silent) { char _disphost[HOST_NAME_MAX]; gethostname(_disphost, HOST_NAME_MAX); fprintf(stderr, STARPU_DEBUG_PREFIX"[%s][%s] " fmt, _disphost, __starpu_func__, ## __VA_ARGS__); }} while(0) +#define _STARPU_ERROR(fmt, ...) \ + do { \ + char _errorhost[HOST_NAME_MAX]; \ + gethostname(_errorhost, HOST_NAME_MAX); \ + fprintf(stderr, "\n\n[starpu][%s][%s] Error: " fmt, _errorhost, __starpu_func__, ## __VA_ARGS__); \ + fprintf(stderr, "\n\n"); \ + STARPU_ABORT(); \ + } while (0) +#else /* STARPU_USE_MPI */ +#define _STARPU_MSG(fmt, ...) do { fprintf(stderr, STARPU_DEBUG_PREFIX"[%s] " fmt ,__starpu_func__ ,## __VA_ARGS__); } while(0) +#define _STARPU_DISP(fmt, ...) do { if (!_starpu_silent) {fprintf(stderr, STARPU_DEBUG_PREFIX"[%s] " fmt ,__starpu_func__ ,## __VA_ARGS__); }} while(0) +#define _STARPU_ERROR(fmt, ...) \ + do { \ + fprintf(stderr, "\n\n[starpu][%s] Error: " fmt ,__starpu_func__ ,## __VA_ARGS__); \ + fprintf(stderr, "\n\n"); \ + STARPU_ABORT(); \ + } while (0) +#endif /* STARPU_USE_MPI */ + +#ifdef _MSC_VER +# if defined(__cplusplus) +# define _STARPU_DECLTYPE(x) (decltype(x)) +# else +# define _STARPU_DECLTYPE(x) +# endif +#else +# define _STARPU_DECLTYPE(x) (__typeof(x)) +#endif + +#define _STARPU_MALLOC(ptr, size) do { ptr = _STARPU_DECLTYPE(ptr) malloc(size); STARPU_ASSERT_MSG(ptr != NULL || size == 0, "Cannot allocate %ld bytes\n", (long) (size)); } while (0) +#define _STARPU_CALLOC(ptr, nmemb, size) do { ptr = _STARPU_DECLTYPE(ptr) calloc(nmemb, size); STARPU_ASSERT_MSG(ptr != NULL || size == 0, "Cannot allocate %ld bytes\n", (long) (nmemb*size)); } while (0) +#define _STARPU_REALLOC(ptr, size) do { void *_new_ptr = realloc(ptr, size); STARPU_ASSERT_MSG(_new_ptr != NULL || size == 0, "Cannot reallocate %ld bytes\n", (long) (size)); ptr = _STARPU_DECLTYPE(ptr) _new_ptr;} while (0) + +#ifdef _MSC_VER +#define _STARPU_IS_ZERO(a) (a == 0.0) +#else +#define _STARPU_IS_ZERO(a) (fpclassify(a) == FP_ZERO) +#endif + +char *_starpu_mkdtemp_internal(char *tmpl) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; +char *_starpu_mkdtemp(char *tmpl) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; +int _starpu_mkpath(const char *s, mode_t mode); +void _starpu_mkpath_and_check(const char *s, mode_t mode) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; +char *_starpu_mktemp(const char *directory, int flags, int *fd); +/** This version creates a hierarchy of n temporary directories, useful when + * creating a lot of temporary files to be stored in the same place */ +char *_starpu_mktemp_many(const char *directory, int depth, int flags, int *fd); +void _starpu_rmtemp_many(char *path, int depth); +void _starpu_rmdir_many(char *path, int depth); +int _starpu_fftruncate(FILE *file, size_t length); +int _starpu_ftruncate(int fd, size_t length); +int _starpu_frdlock(FILE *file); +int _starpu_frdunlock(FILE *file); +int _starpu_fwrlock(FILE *file); +int _starpu_fwrunlock(FILE *file); +char *_starpu_get_home_path(void); +void _starpu_gethostname(char *hostname, size_t size) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; + +/** If FILE is currently on a comment line, eat it. */ +void _starpu_drop_comments(FILE *f); + +struct _starpu_job; +/** Returns the symbol associated to that job if any. */ +const char *_starpu_job_get_model_name(struct _starpu_job *j); +/** Returns the name associated to that job if any. */ +const char *_starpu_job_get_task_name(struct _starpu_job *j); + +struct starpu_codelet; +/** Returns the symbol associated to that job if any. */ +const char *_starpu_codelet_get_model_name(struct starpu_codelet *cl); + +/** Returns the name of a codelet, or fallback to the name of the perfmodel. */ +const char *_starpu_codelet_get_name(struct starpu_codelet *cl); + +int _starpu_check_mutex_deadlock(starpu_pthread_mutex_t *mutex); + +void _starpu_util_init(void); + +enum initialization { UNINITIALIZED = 0, CHANGING, INITIALIZED }; + +#pragma GCC visibility pop + +#endif // __COMMON_UTILS_H__ diff --git a/src/core/combined_workers.c b/src/core/combined_workers.c new file mode 100644 index 0000000..a89ccea --- /dev/null +++ b/src/core/combined_workers.c @@ -0,0 +1,178 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Thibaut Lambert + * Copyright (C) 2013-2013 Simon Archipoff + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include // for qsort + +#include +#include +#include + +#ifdef __GLIBC__ +#include +#endif + +#if defined(_WIN32) && !defined(__CYGWIN__) +#include +#endif + +static int compar_int(const void *pa, const void *pb) +{ + int a = *((int *)pa); + int b = *((int *)pb); + + return a - b; +} + +static void sort_workerid_array(int nworkers, int workerid_array[]) +{ + qsort(workerid_array, nworkers, sizeof(int), compar_int); +} + +/* Create a new worker id for a combination of workers. This method should + * typically be called at the initialization of the scheduling policy. This + * worker should be the combination of the list of id's contained in the + * workerid_array array which has nworkers entries. This function returns + * the identifier of the combined worker in case of success, a negative value + * is returned otherwise. */ +int starpu_combined_worker_assign_workerid(int nworkers, int workerid_array[]) +{ + int new_workerid; + + /* Return the number of actual workers. */ + struct _starpu_machine_config *config = _starpu_get_machine_config(); + + int basic_worker_count = (int)config->topology.nworkers; + int combined_worker_id = (int)config->topology.ncombinedworkers; + + /* We sort the ids */ + sort_workerid_array(nworkers, workerid_array); + + /* Test that all workers are not combined workers already. */ + int i; + for (i = 0; i < nworkers; i++) + { + int id = workerid_array[i]; + + /* We only combine valid "basic" workers */ + if ((id < 0) || (id >= basic_worker_count)) + return -EINVAL; + + /* We only combine CPUs */ + STARPU_ASSERT(config->workers[id].arch == STARPU_CPU_WORKER); + STARPU_ASSERT(config->workers[id].worker_mask == STARPU_CPU); + } + + /* Get an id for that combined worker. Note that this is not thread + * safe because this method should only be called when the scheduler + * is being initialized. */ + new_workerid = basic_worker_count + combined_worker_id; + STARPU_ASSERT_MSG_ALWAYS(new_workerid < STARPU_NMAXWORKERS, "Too many combined workers (%d) for parallel task execution. Please use configure option --enable-maxcpus to increase it beyond the current value %d", new_workerid, STARPU_MAXCPUS); + config->topology.ncombinedworkers++; + +// fprintf(stderr, "COMBINED WORKERS "); +// for (i = 0; i < nworkers; i++) +// { +// fprintf(stderr, "%d ", workerid_array[i]); +// } +// fprintf(stderr, "into worker %d\n", new_workerid); + + for(i = 0; i < nworkers; i++) + _starpu_get_worker_struct(workerid_array[i])->combined_workerid = new_workerid; + + struct _starpu_combined_worker *combined_worker = + &config->combined_workers[combined_worker_id]; + + combined_worker->worker_size = nworkers; + _STARPU_MALLOC(combined_worker->perf_arch.devices, sizeof(struct starpu_perfmodel_device)); + combined_worker->perf_arch.ndevices = 1; + combined_worker->perf_arch.devices[0].type = config->workers[workerid_array[0]].perf_arch.devices[0].type; + combined_worker->perf_arch.devices[0].devid = config->workers[workerid_array[0]].perf_arch.devices[0].devid; + combined_worker->perf_arch.devices[0].ncores = nworkers; + combined_worker->worker_mask = config->workers[workerid_array[0]].worker_mask; + +#ifdef STARPU_USE_MP + combined_worker->count = nworkers -1; + STARPU_PTHREAD_MUTEX_INIT(&combined_worker->count_mutex,NULL); +#endif + + /* We assume that the memory node should either be that of the first + * entry, and it is very likely that every worker in the combination + * should be on the same memory node.*/ + int first_id = workerid_array[0]; + combined_worker->memory_node = config->workers[first_id].memory_node; + + /* Save the list of combined workers */ + memcpy(&combined_worker->combined_workerid, workerid_array, nworkers*sizeof(int)); + + /* Note that we maintain both the cpu_set and the hwloc_cpu_set so that + * the application is not forced to use hwloc when it is available. */ +#ifdef __GLIBC__ + CPU_ZERO(&combined_worker->cpu_set); +#endif /* __GLIBC__ */ + +#ifdef STARPU_HAVE_HWLOC + combined_worker->hwloc_cpu_set = hwloc_bitmap_alloc(); +#endif + + for (i = 0; i < nworkers; i++) + { +#if defined(__GLIBC__) || defined(STARPU_HAVE_HWLOC) + int id = workerid_array[i]; +#ifdef __GLIBC__ +#ifdef CPU_OR + CPU_OR(&combined_worker->cpu_set, + &combined_worker->cpu_set, + &config->workers[id].cpu_set); +#else + int j; + for (j = 0; j < CPU_SETSIZE; j++) + { + if (CPU_ISSET(j, &config->workers[id].cpu_set)) + CPU_SET(j, &combined_worker->cpu_set); + } +#endif +#endif /* __GLIBC__ */ + +#ifdef STARPU_HAVE_HWLOC + hwloc_bitmap_or(combined_worker->hwloc_cpu_set, + combined_worker->hwloc_cpu_set, + config->workers[id].hwloc_cpu_set); +#endif +#endif + } + + starpu_sched_ctx_add_combined_workers(&new_workerid, 1, STARPU_GLOBAL_SCHED_CTX); + + return new_workerid; +} + +int starpu_combined_worker_get_description(int workerid, int *worker_size, int **combined_workerid) +{ + /* Check that this is the id of a combined worker */ + struct _starpu_combined_worker *worker; + worker = _starpu_get_combined_worker_struct(workerid); + STARPU_ASSERT(worker); + + if (worker_size) + *worker_size = worker->worker_size; + + if (combined_workerid) + *combined_workerid = worker->combined_workerid; + + return 0; +} diff --git a/src/core/combined_workers.h b/src/core/combined_workers.h new file mode 100644 index 0000000..22de1f5 --- /dev/null +++ b/src/core/combined_workers.h @@ -0,0 +1,29 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __COMBINED_WORKERS_H__ +#define __COMBINED_WORKERS_H__ + +/** @file */ + +#include +#include + +#pragma GCC visibility push(hidden) + +#pragma GCC visibility pop + +#endif // __COMBINED_WORKERS_H__ diff --git a/src/core/debug.c b/src/core/debug.c new file mode 100644 index 0000000..4f15f68 --- /dev/null +++ b/src/core/debug.c @@ -0,0 +1,129 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include + +#ifdef STARPU_VERBOSE +/* we want a single writer at the same time to have a log that is readable */ +static starpu_pthread_mutex_t logfile_mutex = STARPU_PTHREAD_MUTEX_INITIALIZER; +static FILE *logfile = NULL; +#endif + +int _starpu_debug +#ifdef STARPU_DEBUG + = 1 +#else + = 0 +#endif + ; + +/* Tell gdb whether FXT is compiled in or not */ +int _starpu_use_fxt +#ifdef STARPU_USE_FXT + = 1 +#endif + ; + +void _starpu_open_debug_logfile(void) +{ +#ifdef STARPU_VERBOSE + /* what is the name of the file ? default = "starpu.log" */ + char *logfile_name; + + logfile_name = starpu_getenv("STARPU_LOGFILENAME"); + if (!logfile_name) + { + logfile_name = "starpu.log"; + } + + logfile = fopen(logfile_name, "w+"); + STARPU_ASSERT_MSG(logfile, "Could not open file %s for verbose logs (%s). You can specify another file destination with the STARPU_LOGFILENAME environment variable", logfile_name, strerror(errno)); +#endif +} + +void _starpu_close_debug_logfile(void) +{ +#ifdef STARPU_VERBOSE + if (logfile) + { + fclose(logfile); + logfile = NULL; + } +#endif +} + +void _starpu_print_to_logfile(const char *format STARPU_ATTRIBUTE_UNUSED, ...) +{ +#ifdef STARPU_VERBOSE + va_list args; + va_start(args, format); + STARPU_PTHREAD_MUTEX_LOCK(&logfile_mutex); + vfprintf(logfile, format, args); + STARPU_PTHREAD_MUTEX_UNLOCK(&logfile_mutex); + va_end(args); +#endif +} + +/* Record codelet to give ayudame nice function ids starting from 0. */ +#if defined(STARPU_USE_AYUDAME1) +static struct ayudame_codelet +{ + char *name; + struct starpu_codelet *cl; +} *codelets; +static unsigned ncodelets, ncodelets_alloc; +static starpu_pthread_mutex_t ayudame_mutex = STARPU_PTHREAD_MUTEX_INITIALIZER; +int64_t _starpu_ayudame_get_func_id(struct starpu_codelet *cl) +{ + unsigned i; + const char *name; + if (!cl) + return 0; + name = _starpu_codelet_get_model_name(cl); + STARPU_PTHREAD_MUTEX_LOCK(&ayudame_mutex); + for (i=0; i < ncodelets; i++) + { + if (codelets[i].cl == cl && + ((!name && !codelets[i].name) || + ((name && codelets[i].name) && !strcmp(codelets[i].name, name)))) + { + STARPU_PTHREAD_MUTEX_UNLOCK(&ayudame_mutex); + return i + 1; + } + } + if (ncodelets == ncodelets_alloc) + { + if (!ncodelets_alloc) + ncodelets_alloc = 16; + else + ncodelets_alloc *= 2; + _STARPU_REALLOC(codelets, ncodelets_alloc * sizeof(*codelets)); + } + codelets[ncodelets].cl = cl; + if (name) + /* codelet might be freed by user */ + codelets[ncodelets].name = strdup(name); + else + codelets[ncodelets].name = NULL; + i = ncodelets++; + if (name) + AYU_event(AYU_REGISTERFUNCTION, i+1, (void*) name); + STARPU_PTHREAD_MUTEX_UNLOCK(&ayudame_mutex); + return i + 1; +} +#endif /* AYUDAME1 */ diff --git a/src/core/debug.h b/src/core/debug.h new file mode 100644 index 0000000..7bcce8a --- /dev/null +++ b/src/core/debug.h @@ -0,0 +1,317 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __DEBUG_H__ +#define __DEBUG_H__ + +/** @file */ + +#include +#include +#include + +#include +#include + +#if defined(STARPU_USE_AYUDAME1) +/* Ayudame 1 API */ +# include +# ifndef AYU_RT_STARPU +# define AYU_RT_STARPU 4 +# endif +# define STARPU_AYU_EVENT AYU_event + +# define STARPU_AYU_PREINIT() \ + if (AYU_event) \ + { \ + enum ayu_runtime_t ayu_rt = AYU_RT_STARPU; \ + AYU_event(AYU_PREINIT, 0, (void*) &ayu_rt); \ + } + +# define STARPU_AYU_INIT() \ + if (AYU_event) \ + { \ + AYU_event(AYU_INIT, 0, NULL); \ + } + +# define STARPU_AYU_FINISH() \ + if (AYU_event) \ + { \ + AYU_event(AYU_FINISH, 0, NULL); \ + } + +# define STARPU_AYU_ADDDEPENDENCY(previous, handle, job_id) \ + if (AYU_event) \ + { \ + uintptr_t __AYU_data[3] = { (previous), (uintptr_t) (handle), (uintptr_t) (handle) }; \ + AYU_event(AYU_ADDDEPENDENCY, (job_id), __AYU_data); \ + } + +# define STARPU_AYU_REMOVETASK(job_id) \ + if (AYU_event) \ + { \ + AYU_event(AYU_REMOVETASK, (job_id), NULL); \ + } + +# define STARPU_AYU_ADDTASK(job_id, task) \ + if (AYU_event) \ + { \ + int64_t __AYU_data[2] = { \ + ((struct starpu_task *)(task))!=NULL?_starpu_ayudame_get_func_id(((struct starpu_task *)(task))->cl):0, \ + ((struct starpu_task *)(task))!=NULL?((struct starpu_task *)(task))->priority-STARPU_MIN_PRIO:0 \ + }; \ + AYU_event(AYU_ADDTASK, (job_id), __AYU_data); \ + } + +# define STARPU_AYU_PRERUNTASK(job_id, workerid) \ + if (AYU_event) \ + { \ + intptr_t __id = (workerid); \ + AYU_event(AYU_PRERUNTASK, (job_id), &__id); \ + } + +# define STARPU_AYU_RUNTASK(job_id) \ + if (AYU_event) \ + { \ + AYU_event(AYU_RUNTASK, (job_id), NULL); \ + } + +# define STARPU_AYU_POSTRUNTASK(job_id) \ + if (AYU_event) \ + { \ + AYU_event(AYU_POSTRUNTASK, (job_id), NULL); \ + } + +# define STARPU_AYU_ADDTOTASKQUEUE(job_id, worker_id) \ + if (AYU_event) \ + { \ + intptr_t __id = (worker_id); \ + AYU_event(AYU_ADDTASKTOQUEUE, (job_id), &__id); \ + } + +# define STARPU_AYU_BARRIER() \ + if (AYU_event) \ + { \ + AYU_event(AYU_BARRIER, 0, NULL); \ + } + +#elif defined(STARPU_USE_AYUDAME2) +/* Ayudame 2 API */ +# include +# define STARPU_AYU_EVENT ayu_event + +# define STARPU_AYU_PREINIT() + +# define STARPU_AYU_INIT() + +# define STARPU_AYU_FINISH() \ + if (ayu_event){ \ + ayu_client_id_t __cli_id = get_client_id(AYU_CLIENT_STARPU); \ + ayu_event_data_t __data; \ + __data.common.client_id = __cli_id; \ + ayu_event(AYU_FINISH, __data); \ + } + +# define STARPU_AYU_ADDDEPENDENCY(previous, handle, job_id) \ + if (ayu_event) \ + { \ + ayu_client_id_t __cli_id = get_client_id(AYU_CLIENT_STARPU); \ + ayu_event_data_t __data; \ + uint64_t __dep_id=0; \ + __dep_id |= (previous) << 0; \ + __dep_id |= (job_id) << 24; \ + __dep_id |= (uintptr_t) (handle) << 48; \ + __data.common.client_id = __cli_id; \ + __data.add_dependency.dependency_id = __dep_id; \ + __data.add_dependency.from_id=(previous); \ + __data.add_dependency.to_id=(job_id); \ + __data.add_dependency.dependency_label = "dep"; \ + ayu_event(AYU_ADDDEPENDENCY, __data); \ + ayu_wipe_data(&__data); \ + \ + char __buf[32]; \ + snprintf(__buf, sizeof(__buf), "%llu", (unsigned long long)(uintptr_t) (handle)); \ + __data.common.client_id = __cli_id; \ + __data.set_property.property_owner_id = __dep_id; \ + __data.set_property.key = "dep_address_value"; \ + __data.set_property.value = __buf; \ + ayu_event(AYU_SETPROPERTY, __data); \ + ayu_wipe_data(&__data); \ + } + +# define STARPU_AYU_REMOVETASK(job_id) \ + if (ayu_event) \ + { \ + ayu_client_id_t __cli_id = get_client_id(AYU_CLIENT_STARPU); \ + ayu_event_data_t __data; \ + __data.common.client_id = __cli_id; \ + __data.set_property.property_owner_id = (job_id); \ + __data.set_property.key = "state"; \ + __data.set_property.value = "finished"; \ + ayu_event(AYU_SETPROPERTY, __data); \ + ayu_wipe_data(&__data); \ + } + +# define STARPU_AYU_ADDTASK(job_id, task) \ + if (ayu_event) \ + { \ + ayu_client_id_t __cli_id = get_client_id(AYU_CLIENT_STARPU); \ + ayu_event_data_t __data; \ + __data.common.client_id = __cli_id; \ + __data.add_task.task_id = (job_id); \ + __data.add_task.scope_id = 0; \ + __data.add_task.task_label = "task"; \ + ayu_event(AYU_ADDTASK, __data); \ + ayu_wipe_data(&__data); \ + \ + if ((task) != NULL) \ + { \ + char __buf[32]; \ + snprintf(__buf, sizeof(__buf), "%d", ((struct starpu_task *)(task))->priority); \ + __data.common.client_id = __cli_id; \ + __data.set_property.property_owner_id = (job_id); \ + __data.set_property.key = "priority"; \ + __data.set_property.value = __buf; \ + ayu_event(AYU_SETPROPERTY, __data); \ + ayu_wipe_data(&__data); \ + \ + const char *__name = ((struct starpu_task *)(task))->name != NULL?((struct starpu_task *)(task))->name: \ + ((struct starpu_task *)(task))->cl->name != NULL?((struct starpu_task *)(task))->cl->name:""; \ + __data.common.client_id = __cli_id; \ + __data.set_property.property_owner_id = (job_id); \ + __data.set_property.key = "function_name"; \ + __data.set_property.value = __name; \ + ayu_event(AYU_SETPROPERTY, __data); \ + ayu_wipe_data(&__data); \ + } \ + } + +# define STARPU_AYU_PRERUNTASK(job_id, workerid) \ + if (ayu_event) \ + { \ + ayu_client_id_t __cli_id = get_client_id(AYU_CLIENT_STARPU); \ + ayu_event_data_t __data; \ + __data.common.client_id = __cli_id; \ + __data.set_property.property_owner_id = (job_id); \ + __data.set_property.key = "state"; \ + __data.set_property.value = "running"; \ + ayu_event(AYU_SETPROPERTY, __data); \ + ayu_wipe_data(&__data); \ + \ + char __buf[32]; \ + snprintf(__buf, sizeof(__buf), "%d", (workerid)); \ + __data.common.client_id = __cli_id; \ + __data.set_property.property_owner_id = (job_id); \ + __data.set_property.key = "worker"; \ + __data.set_property.value = __buf; \ + ayu_event(AYU_SETPROPERTY, __data); \ + ayu_wipe_data(&__data); \ + } + +# define STARPU_AYU_RUNTASK(job_id) \ + if (ayu_event) { \ + ayu_client_id_t __cli_id = get_client_id(AYU_CLIENT_STARPU); \ + ayu_event_data_t __data; \ + __data.common.client_id = __cli_id; \ + __data.set_property.property_owner_id = (job_id); \ + __data.set_property.key = "state"; \ + __data.set_property.value = "running"; \ + ayu_event(AYU_SETPROPERTY, __data); \ + ayu_wipe_data(&__data); \ + } + +# define STARPU_AYU_POSTRUNTASK(job_id) \ + if (ayu_event) \ + { \ + /* TODO ADD thread id core id etc */ \ + ayu_client_id_t __cli_id = get_client_id(AYU_CLIENT_STARPU); \ + ayu_event_data_t __data; \ + __data.common.client_id = __cli_id; \ + __data.set_property.property_owner_id = (job_id); \ + __data.set_property.key = "state"; \ + __data.set_property.value = "finished"; \ + ayu_event(AYU_SETPROPERTY, __data); \ + ayu_wipe_data(&__data); \ + } + +# define STARPU_AYU_ADDTOTASKQUEUE(job_id, worker_id) \ + if (ayu_event) \ + { \ + ayu_client_id_t __cli_id = get_client_id(AYU_CLIENT_STARPU); \ + ayu_event_data_t __data; \ + __data.common.client_id = __cli_id; \ + __data.set_property.property_owner_id = (job_id); \ + __data.set_property.key = "state"; \ + __data.set_property.value = "queued"; \ + ayu_event(AYU_SETPROPERTY, __data); \ + ayu_wipe_data(&__data); \ + \ + char __buf[32]; \ + snprintf(__buf, sizeof(__buf), "%d", (int)(worker_id)); \ + __data.common.client_id = __cli_id; \ + __data.set_property.property_owner_id = (job_id); \ + __data.set_property.key = "worker"; \ + __data.set_property.value = __buf; \ + ayu_event(AYU_SETPROPERTY, __data); \ + ayu_wipe_data(&__data); \ + } + +# define STARPU_AYU_BARRIER() \ + if (ayu_event) \ + { \ + /* How to generate a barrier event with Ayudame 2? */ \ + } +#else +# define STARPU_AYU_EVENT (0) +# define STARPU_AYU_PREINIT() +# define STARPU_AYU_INIT() +# define STARPU_AYU_FINISH() +# define STARPU_AYU_ADDDEPENDENCY(previous, handle, next_job) +# define STARPU_AYU_REMOVETASK(job_id) +# define STARPU_AYU_ADDTASK(job_id, task) +# define STARPU_AYU_PRERUNTASK(job_id, workerid) +# define STARPU_AYU_RUNTASK(job_id) +# define STARPU_AYU_POSTRUNTASK(job_id) +# define STARPU_AYU_ADDTOTASKQUEUE(job_id, worker_id) +# define STARPU_AYU_BARRIER() + +#endif + +#pragma GCC visibility push(hidden) + +/** Create a file that will contain StarPU's log */ +void _starpu_open_debug_logfile(void); + +/** Close StarPU's log file */ +void _starpu_close_debug_logfile(void); + +/** Write into StarPU's log file */ +void _starpu_print_to_logfile(const char *format, ...) STARPU_ATTRIBUTE_FORMAT(printf, 1, 2); + +/** Tell gdb whether FXT is compiled in or not */ +extern int _starpu_use_fxt; + +#if defined(STARPU_USE_AYUDAME1) +/** Get an Ayudame id for CL */ +int64_t _starpu_ayudame_get_func_id(struct starpu_codelet *cl); +#endif + +void _starpu_watchdog_init(void); +void _starpu_watchdog_shutdown(void); + +#pragma GCC visibility pop + +#endif // __DEBUG_H__ diff --git a/src/core/dependencies/cg.c b/src/core/dependencies/cg.c new file mode 100644 index 0000000..57b4700 --- /dev/null +++ b/src/core/dependencies/cg.c @@ -0,0 +1,419 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include +#include +#include + +void _starpu_cg_list_init0(struct _starpu_cg_list *list) +{ + _starpu_spin_init(&list->lock); + //list->ndeps = 0; + //list->ndeps_completed = 0; +#ifdef STARPU_DEBUG + //list->deps = NULL; + //list->done = NULL; +#endif + + //list->terminated = 0; + + //list->nsuccs = 0; +#ifdef STARPU_DYNAMIC_DEPS_SIZE + /* this is a small initial default value ... may be changed */ + //list->succ_list_size = 0; + //list->succ = NULL; +#endif +} + +void _starpu_cg_list_deinit(struct _starpu_cg_list *list) +{ + unsigned id; + for (id = 0; id < list->nsuccs; id++) + { + struct _starpu_cg *cg = list->succ[id]; + + /* We remove the reference on the completion group, and free it + * if there is no more reference. */ + unsigned ntags = STARPU_ATOMIC_ADD(&cg->ntags, -1); + if (ntags == 0) + { +#ifdef STARPU_DEBUG + free(list->succ[id]->deps); + free(list->succ[id]->done); +#endif + free(list->succ[id]); + } + } + +#ifdef STARPU_DYNAMIC_DEPS_SIZE + free(list->succ); +#endif +#ifdef STARPU_DEBUG + free(list->deps); + free(list->done); +#endif + _starpu_spin_destroy(&list->lock); +} + +/* Returns whether the completion was already terminated, and caller should + * thus immediately proceed. */ +int _starpu_add_successor_to_cg_list(struct _starpu_cg_list *successors, struct _starpu_cg *cg) +{ + int ret; + STARPU_ASSERT(cg); + + _starpu_spin_lock(&successors->lock); + ret = successors->terminated; + + /* where should that cg should be put in the array ? */ + unsigned index = successors->nsuccs++; + +#ifdef STARPU_DYNAMIC_DEPS_SIZE + if (index >= successors->succ_list_size) + { + /* the successor list is too small */ + if (successors->succ_list_size > 0) + successors->succ_list_size *= 2; + else + successors->succ_list_size = 4; + + _STARPU_REALLOC(successors->succ, successors->succ_list_size*sizeof(struct _starpu_cg *)); + } +#else + STARPU_ASSERT(index < STARPU_NMAXDEPS); +#endif + successors->succ[index] = cg; + _starpu_spin_unlock(&successors->lock); + + return ret; +} + +int _starpu_list_task_successors_in_cg_list(struct _starpu_cg_list *successors, unsigned ndeps, struct starpu_task *task_array[]) +{ + unsigned i; + unsigned n = 0; + _starpu_spin_lock(&successors->lock); + for (i = 0; i < successors->nsuccs; i++) + { + struct _starpu_cg *cg = successors->succ[i]; + if (cg->cg_type != STARPU_CG_TASK) + continue; + if (n < ndeps) + { + task_array[n] = cg->succ.job->task; + n++; + } + } + _starpu_spin_unlock(&successors->lock); + return n; +} + +int _starpu_list_task_scheduled_successors_in_cg_list(struct _starpu_cg_list *successors, unsigned ndeps, struct starpu_task *task_array[]) +{ + unsigned i; + unsigned n = 0; + _starpu_spin_lock(&successors->lock); + for (i = 0; i < successors->nsuccs; i++) + { + struct _starpu_cg *cg = successors->succ[i]; + if (cg->cg_type != STARPU_CG_TASK) + continue; + if (n < ndeps) + { + struct starpu_task *task = cg->succ.job->task; + if (task->cl == NULL || task->where == STARPU_NOWHERE || task->execute_on_a_specific_worker) + /* will not be scheduled */ + continue; + task_array[n] = task; + n++; + } + } + _starpu_spin_unlock(&successors->lock); + return n; +} + +int _starpu_list_tag_successors_in_cg_list(struct _starpu_cg_list *successors, unsigned ndeps, starpu_tag_t tag_array[]) +{ + unsigned i; + unsigned n = 0; + _starpu_spin_lock(&successors->lock); + for (i = 0; i < successors->nsuccs; i++) + { + struct _starpu_cg *cg = successors->succ[i]; + if (cg->cg_type != STARPU_CG_TAG) + continue; + if (n < ndeps) + { + tag_array[n] = cg->succ.tag->id; + n++; + } + } + _starpu_spin_unlock(&successors->lock); + return n; +} + +void _starpu_notify_cg(void *pred STARPU_ATTRIBUTE_UNUSED, struct _starpu_cg *cg) +{ + STARPU_ASSERT(cg); + unsigned remaining = STARPU_ATOMIC_ADD(&cg->remaining, -1); + ANNOTATE_HAPPENS_BEFORE(&cg->remaining); + + if (remaining == 0) + { + ANNOTATE_HAPPENS_AFTER(&cg->remaining); + /* Note: This looks racy to helgrind when the tasks are not + * autoregenerated, since they then unsubcribe from the + * completion group in parallel, thus decreasing ntags. This is + * however not a problem since it means we will not reuse this + * cg, and remaining will not be used, so a bogus value won't + * hurt. + */ + cg->remaining = cg->ntags; + + /* the group is now completed */ + switch (cg->cg_type) + { + case STARPU_CG_APPS: + { + /* this is a cg for an application waiting on a set of + * tags, wake the thread */ + STARPU_PTHREAD_MUTEX_LOCK(&cg->succ.succ_apps.cg_mutex); + cg->succ.succ_apps.completed = 1; + STARPU_PTHREAD_COND_SIGNAL(&cg->succ.succ_apps.cg_cond); + STARPU_PTHREAD_MUTEX_UNLOCK(&cg->succ.succ_apps.cg_mutex); + break; + } + + case STARPU_CG_TAG: + { + struct _starpu_cg_list *tag_successors; + struct _starpu_tag *tag; + + tag = cg->succ.tag; + _starpu_spin_lock(&tag->lock); + tag_successors = &tag->tag_successors; + + tag_successors->ndeps_completed++; + + /* Note: the tag is already locked by the + * caller. */ + if ((tag->state == STARPU_BLOCKED) && + (tag_successors->ndeps == tag_successors->ndeps_completed)) + { + /* reset the counter so that we can reuse the completion group */ + tag_successors->ndeps_completed = 0; + /* This releases the lock */ + _starpu_tag_set_ready(tag); + } + else + _starpu_spin_unlock(&tag->lock); + break; + } + + case STARPU_CG_TASK: + { + struct _starpu_cg_list *job_successors; + struct _starpu_job *j; + + j = cg->succ.job; + + STARPU_PTHREAD_MUTEX_LOCK(&j->sync_mutex); + + job_successors = &j->job_successors; +#ifdef STARPU_DEBUG + if (!j->task->regenerate) + { + unsigned i; + /* Remove backward cg pointers for easier debugging */ + if (job_successors->deps) + { + for (i = 0; i < job_successors->ndeps; i++) + if (job_successors->deps[i] == cg) + break; + STARPU_ASSERT(i < job_successors->ndeps); + job_successors->done[i] = 1; + } + if (cg->deps) + { + for (i = 0; i < cg->ndeps; i++) + if (cg->deps[i] == pred) + break; + STARPU_ASSERT(i < cg->ndeps); + cg->done[i] = 1; + } + } +#endif + + unsigned ndeps_completed = + STARPU_ATOMIC_ADD(&job_successors->ndeps_completed, 1); + + STARPU_ASSERT(job_successors->ndeps >= ndeps_completed); + + /* Need to atomically test submitted and check + * dependencies, since this is concurrent with + * _starpu_submit_job */ + if (j->submitted && job_successors->ndeps == ndeps_completed && + j->task->status == STARPU_TASK_BLOCKED_ON_TASK) + { + /* That task has already passed tag checks, + * do not do them again since the tag has been cleared! */ + _starpu_enforce_deps_starting_from_task(j); + } + else + STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex); + + + break; + } + + default: + STARPU_ABORT(); + } + } +} + +/* Called when a job has just started, so we can notify tasks which were waiting + * only for this one when they can expect to start */ +/* Note: in case of a tag, it must be already locked */ +void _starpu_notify_job_ready_soon_cg(void *pred STARPU_ATTRIBUTE_UNUSED, struct _starpu_cg *cg, _starpu_notify_job_start_data *data) +{ + STARPU_ASSERT(cg); + + if (cg->remaining == 1) + { + /* the group is to be completed */ + switch (cg->cg_type) + { + case STARPU_CG_APPS: + /* Not a task */ + break; + + case STARPU_CG_TAG: + { + struct _starpu_cg_list *tag_successors; + struct _starpu_tag *tag; + + tag = cg->succ.tag; + tag_successors = &tag->tag_successors; + + /* Note: the tag is already locked by the + * caller. */ + if ((tag->state == STARPU_BLOCKED) && + (tag_successors->ndeps == tag_successors->ndeps_completed + 1)) + { + /* This is to be ready */ + _starpu_enforce_deps_notify_job_ready_soon(tag->job, data, 1); + } + break; + } + + case STARPU_CG_TASK: + { + struct _starpu_cg_list *job_successors; + struct _starpu_job *j; + + j = cg->succ.job; + job_successors = &j->job_successors; + + if (job_successors->ndeps == job_successors->ndeps_completed + 1 && + j->task->status == STARPU_TASK_BLOCKED_ON_TASK) + { + /* This is to be ready */ + _starpu_enforce_deps_notify_job_ready_soon(j, data, 0); + } + + break; + } + + default: + STARPU_ABORT(); + } + } +} + + +/* Caller just has to promise that the list will not disappear. + * _starpu_notify_cg_list protects the list itself. + * No job lock should be held, since we might want to immediately call the callback of an empty task. + */ +void _starpu_notify_cg_list(void *pred, struct _starpu_cg_list *successors) +{ + unsigned succ; + + _starpu_spin_lock(&successors->lock); + successors->terminated = 1; + /* Note: some thread might be concurrently adding other items */ + for (succ = 0; succ < successors->nsuccs; succ++) + { + struct _starpu_cg *cg = successors->succ[succ]; + STARPU_ASSERT(cg); + unsigned cg_type = cg->cg_type; + + if (cg_type == STARPU_CG_APPS) + { + /* Remove the temporary ref to the cg */ + memmove(&successors->succ[succ], &successors->succ[succ+1], (successors->nsuccs-(succ+1)) * sizeof(successors->succ[succ])); + succ--; + successors->nsuccs--; + } + _starpu_spin_unlock(&successors->lock); + _starpu_notify_cg(pred, cg); + _starpu_spin_lock(&successors->lock); + } + _starpu_spin_unlock(&successors->lock); +} + +/* Called when a job has just started, so we can notify tasks which were waiting + * only for this one when they can expect to start */ +/* Caller just has to promise that the list will not disappear. + * _starpu_notify_cg_list protects the list itself. + * No job lock should be held, since we might want to immediately call the callback of an empty task. + */ +void _starpu_notify_job_start_cg_list(void *pred, struct _starpu_cg_list *successors, _starpu_notify_job_start_data *data) +{ + unsigned succ; + + _starpu_spin_lock(&successors->lock); + /* Note: some thread might be concurrently adding other items */ + for (succ = 0; succ < successors->nsuccs; succ++) + { + struct _starpu_cg *cg = successors->succ[succ]; + _starpu_spin_unlock(&successors->lock); + STARPU_ASSERT(cg); + unsigned cg_type = cg->cg_type; + + struct _starpu_tag *cgtag = NULL; + + if (cg_type == STARPU_CG_TAG) + { + cgtag = cg->succ.tag; + STARPU_ASSERT(cgtag); + _starpu_spin_lock(&cgtag->lock); + } + + _starpu_notify_job_ready_soon_cg(pred, cg, data); + + if (cg_type == STARPU_CG_TAG) + _starpu_spin_unlock(&cgtag->lock); + + _starpu_spin_lock(&successors->lock); + } + _starpu_spin_unlock(&successors->lock); +} diff --git a/src/core/dependencies/cg.h b/src/core/dependencies/cg.h new file mode 100644 index 0000000..4a1fcf8 --- /dev/null +++ b/src/core/dependencies/cg.h @@ -0,0 +1,141 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __CG_H__ +#define __CG_H__ + +/** @file */ + +#include +#include + +#pragma GCC visibility push(hidden) + +/** + * we do not necessarily want to allocate room for 256 dependencies, but we + * want to handle the few situation where there are a lot of dependencies as + * well + */ +#define STARPU_DYNAMIC_DEPS_SIZE 1 + +/* randomly chosen ! */ +#ifndef STARPU_DYNAMIC_DEPS_SIZE +#define STARPU_NMAXDEPS 256 +#endif + +struct _starpu_job; + +/** Completion Group list, records both the number of expected notifications + * before the completion can start, and the list of successors when the + * completion is finished. */ +struct _starpu_cg_list +{ + /** Protects atomicity of the list and the terminated flag */ + struct _starpu_spinlock lock; + + /** Number of notifications to be waited for */ + unsigned ndeps; /* how many deps ? */ + unsigned ndeps_completed; /* how many deps are done ? */ +#ifdef STARPU_DEBUG + /** Array of the notifications, size ndeps */ + struct _starpu_cg **deps; + /** Which ones have notified, size ndeps */ + char *done; +#endif + + /** Whether the completion is finished. + * For restartable/restarted tasks, only the first iteration is taken into account here. + */ + unsigned terminated; + + /** List of successors */ + unsigned nsuccs; /* how many successors ? */ +#ifdef STARPU_DYNAMIC_DEPS_SIZE + /** How many allocated items in succ */ + unsigned succ_list_size; + struct _starpu_cg **succ; +#else + struct _starpu_cg *succ[STARPU_NMAXDEPS]; +#endif +}; + +enum _starpu_cg_type +{ + STARPU_CG_APPS=(1<<0), + STARPU_CG_TAG=(1<<1), + STARPU_CG_TASK=(1<<2) +}; + +/** Completion Group */ +struct _starpu_cg +{ + /** number of tags depended on */ + unsigned ntags; + /** number of remaining tags */ + unsigned remaining; + +#ifdef STARPU_DEBUG + unsigned ndeps; + /** array of predecessors, size ndeps */ + void **deps; + /** which ones have notified, size ndeps */ + char *done; +#endif + + enum _starpu_cg_type cg_type; + + union + { + /** STARPU_CG_TAG */ + struct _starpu_tag *tag; + + /** STARPU_CG_TASK */ + struct _starpu_job *job; + + /** STARPU_CG_APPS + * in case this completion group is related to an application, + * we have to explicitly wake the waiting thread instead of + * reschedule the corresponding task */ + struct + { + unsigned completed; + starpu_pthread_mutex_t cg_mutex; + starpu_pthread_cond_t cg_cond; + } succ_apps; + } succ; +}; + +typedef struct _starpu_notify_job_start_data _starpu_notify_job_start_data; + +void _starpu_notify_dependencies(struct _starpu_job *j); +void _starpu_job_notify_start(struct _starpu_job *j, struct starpu_perfmodel_arch* perf_arch); +void _starpu_job_notify_ready_soon(struct _starpu_job *j, _starpu_notify_job_start_data *data); + +void _starpu_cg_list_init0(struct _starpu_cg_list *list); +void _starpu_cg_list_deinit(struct _starpu_cg_list *list); +int _starpu_add_successor_to_cg_list(struct _starpu_cg_list *successors, struct _starpu_cg *cg); +int _starpu_list_task_successors_in_cg_list(struct _starpu_cg_list *successors, unsigned ndeps, struct starpu_task *task_array[]); +int _starpu_list_task_scheduled_successors_in_cg_list(struct _starpu_cg_list *successors, unsigned ndeps, struct starpu_task *task_array[]); +int _starpu_list_tag_successors_in_cg_list(struct _starpu_cg_list *successors, unsigned ndeps, starpu_tag_t tag_array[]); +void _starpu_notify_cg(void *pred, struct _starpu_cg *cg); +void _starpu_notify_cg_list(void *pred, struct _starpu_cg_list *successors); +void _starpu_notify_job_start_cg_list(void *pred, struct _starpu_cg_list *successors, _starpu_notify_job_start_data *data); +void _starpu_notify_task_dependencies(struct _starpu_job *j); +void _starpu_notify_job_start_tasks(struct _starpu_job *j, _starpu_notify_job_start_data *data); + +#pragma GCC visibility pop + +#endif // __CG_H__ diff --git a/src/core/dependencies/data_arbiter_concurrency.c b/src/core/dependencies/data_arbiter_concurrency.c new file mode 100644 index 0000000..f07f638 --- /dev/null +++ b/src/core/dependencies/data_arbiter_concurrency.c @@ -0,0 +1,855 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2021-2021 Federal University of Rio Grande do Sul (UFRGS) + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include +#include +#include + +/* TODO factorize with data_concurrency.c and btw support redux */ + +//#define LOCK_OR_DELEGATE + +/* + * This implements a solution for the dining philosophers problem (see + * data_concurrency.c for the rationale) based on a centralized arbiter. This + * allows to get a more parallel solution than the Dijkstra solution, by + * avoiding strictly serialized executions, and instead opportunistically find + * which tasks can take data. + * + * These are the algorithms implemented below: + * + * + * at termination of task T: + * + * - for each handle h of T: + * - mutex_lock(&arbiter) + * - release reference on h + * - call _starpu_notify_arbitered_dependencies which does the following + * - for each task Tc waiting for h: + * - for each data Tc_h it is waiting for: + * - if Tc_h is busy, goto fail + * // Ok, now really take them + * - For each data Tc_h it is waiting: + * - lock(Tc_h) + * - take reference on h (it should be still available since we hold the arbiter) + * - unlock(Tc_h) + * // Ok, we managed to find somebody, we're finished! + * _starpu_push_task(Tc); + * break; + * fail: + * - unrecord T as waiting on h + * - record T as waiting on Tc_h + * // No luck, let's try another task + * continue; + * // Release the arbiter mutex a bit from time to time + * - mutex_unlock(&arbiter) + * + * + * at submission of task T (_starpu_submit_job_enforce_arbitered_deps): + * + * - mutex_lock(&arbiter) + * - for each handle h of T: + * - lock(h) + * - try to take a reference on h, goto fail on failure + * - unlock(h) + * // Success! + * - mutex_unlock(&arbiter); + * - return 0; + * + * fail: + * // couldn't take everything, record task T and abort + * - record T as waiting on h + * // drop spurious references + * - for each handle h of T already taken: + * - lock(h) + * - release reference on h + * - unlock(h) + * - mutex_unlock(&arbiter) + * - return 1; + * + * at acquire (_starpu_attempt_to_submit_arbitered_data_request): + * - mutex_lock(&arbiter) + * - try to take a reference on h + * - on failure, record as waiting on h + * - mutex_unlock(&arbiter); + * - return 0 if succeeded, 1 if failed; + */ + +static int _starpu_arbiter_filter_modes(int mode) +{ + /* Do not care about some flags */ + mode &= ~STARPU_COMMUTE; + mode &= ~STARPU_SSEND; + mode &= ~STARPU_LOCALITY; + mode &= ~STARPU_NOFOOTPRINT; + if (mode == STARPU_RW) + mode = STARPU_W; + return mode; +} + +struct starpu_arbiter +{ +#ifdef LOCK_OR_DELEGATE +/* The list of task to perform */ + struct LockOrDelegateListNode* dlTaskListHead; + +/* To protect the list of tasks */ + struct _starpu_spinlock dlListLock; +/* Whether somebody is working on the list */ + int working; +#else /* LOCK_OR_DELEGATE */ + starpu_pthread_mutex_t mutex; +#endif /* LOCK_OR_DELEGATE */ +}; + +#ifdef LOCK_OR_DELEGATE + +/* In case of congestion, we don't want to needlessly wait for the arbiter lock + * while we can just delegate the work to the worker already managing some + * dependencies. + * + * So we push work on the dlTastListHead queue and only one worker will process + * the list. + */ + +/* A LockOrDelegate task list */ +struct LockOrDelegateListNode +{ + void (*func)(void*); + void* data; + struct LockOrDelegateListNode* next; +}; + +/* Post a task to perform if possible, otherwise put it in the list + * If we can perform this task, we may also perform all the tasks in the list + * This function return 1 if the task (and maybe some others) has been done + * by the calling thread and 0 otherwise (if the task has just been put in the list) + */ +static int _starpu_LockOrDelegatePostOrPerform(starpu_arbiter_t arbiter, void (*func)(void*), void* data) +{ + struct LockOrDelegateListNode *newNode, *iter, *next; + int did = 0; + + _STARPU_MALLOC(newNode, sizeof(*newNode)); + newNode->data = data; + newNode->func = func; + + _starpu_spin_lock(&arbiter->dlListLock); + if (arbiter->working) + { + /* Somebody working on it, insert the node */ + newNode->next = arbiter->dlTaskListHead; + arbiter->dlTaskListHead = newNode; + } + else + { + /* Nobody working on the list, we'll work */ + arbiter->working = 1; + + /* work on what was pushed so far first */ + iter = arbiter->dlTaskListHead; + arbiter->dlTaskListHead = NULL; + _starpu_spin_unlock(&arbiter->dlListLock); + while (iter != NULL) + { + (*iter->func)(iter->data); + next = iter->next; + free(iter); + iter = next; + } + + /* And then do our job */ + (*func)(data); + free(newNode); + did = 1; + + _starpu_spin_lock(&arbiter->dlListLock); + /* And finish working on anything that could have been pushed + * in the meanwhile */ + while (arbiter->dlTaskListHead != 0) + { + iter = arbiter->dlTaskListHead; + arbiter->dlTaskListHead = arbiter->dlTaskListHead->next; + _starpu_spin_unlock(&arbiter->dlListLock); + + (*iter->func)(iter->data); + free(iter); + _starpu_spin_lock(&arbiter->dlListLock); + } + + arbiter->working = 0; + } + + _starpu_spin_unlock(&arbiter->dlListLock); + return did; +} + +#endif + +/* Try to submit just one data request, in case the request can be processed + * immediately, return 0, if there is still a dependency that is not compatible + * with the current mode, the request is put in the per-handle list of + * "requesters", and this function returns 1. */ +#ifdef LOCK_OR_DELEGATE +struct starpu_submit_arbitered_args +{ + unsigned request_from_codelet; + starpu_data_handle_t handle; + enum starpu_data_access_mode mode; + void (*callback)(void *); + void *argcb; + struct _starpu_job *j; + unsigned buffer_index; +}; +static unsigned ___starpu_attempt_to_submit_arbitered_data_request(unsigned request_from_codelet, + starpu_data_handle_t handle, enum starpu_data_access_mode mode, + void (*callback)(void *), void *argcb, + struct _starpu_job *j, unsigned buffer_index); +static void __starpu_attempt_to_submit_arbitered_data_request(void *inData) +{ + struct starpu_submit_arbitered_args* args = inData; + unsigned request_from_codelet = args->request_from_codelet; + starpu_data_handle_t handle = args->handle; + enum starpu_data_access_mode mode = args->mode; + void (*callback)(void*) = args->callback; + void *argcb = args->argcb; + struct _starpu_job *j = args->j; + unsigned buffer_index = args->buffer_index; + free(args); + if (!___starpu_attempt_to_submit_arbitered_data_request(request_from_codelet, handle, mode, callback, argcb, j, buffer_index)) + /* Success, but we have no way to report it to original caller, + * so call callback ourself */ + callback(argcb); +} + +unsigned _starpu_attempt_to_submit_arbitered_data_request(unsigned request_from_codelet, + starpu_data_handle_t handle, enum starpu_data_access_mode mode, + void (*callback)(void *), void *argcb, + struct _starpu_job *j, unsigned buffer_index) +{ + struct starpu_submit_arbitered_args* args; + _STARPU_MALLOC(args, sizeof(*args)); + args->request_from_codelet = request_from_codelet; + args->handle = handle; + args->mode = mode; + args->callback = callback; + args->argcb = argcb; + args->j = j; + args->buffer_index = buffer_index; + /* The function will delete args */ + _starpu_LockOrDelegatePostOrPerform(handle->arbiter, &__starpu_attempt_to_submit_arbitered_data_request, args); + return 1; +} + +unsigned ___starpu_attempt_to_submit_arbitered_data_request(unsigned request_from_codelet, + starpu_data_handle_t handle, enum starpu_data_access_mode mode, + void (*callback)(void *), void *argcb, + struct _starpu_job *j, unsigned buffer_index) +{ + STARPU_ASSERT(handle->arbiter); +#else // LOCK_OR_DELEGATE +unsigned _starpu_attempt_to_submit_arbitered_data_request(unsigned request_from_codelet, + starpu_data_handle_t handle, enum starpu_data_access_mode mode, + void (*callback)(void *), void *argcb, + struct _starpu_job *j, unsigned buffer_index) +{ + starpu_arbiter_t arbiter = handle->arbiter; + STARPU_PTHREAD_MUTEX_LOCK(&arbiter->mutex); +#endif // LOCK_OR_DELEGATE + + mode = _starpu_arbiter_filter_modes(mode); + + STARPU_ASSERT_MSG(!(mode & STARPU_REDUX), "REDUX with arbiter is not implemented\n"); + + /* Take the lock protecting the header. We try to do some progression + * in case this is called from a worker, otherwise we just wait for the + * lock to be available. */ + if (request_from_codelet) + { + int cpt = 0; + while (cpt < STARPU_SPIN_MAXTRY && _starpu_spin_trylock(&handle->header_lock)) + { + cpt++; + _starpu_datawizard_progress(_STARPU_DATAWIZARD_DO_NOT_ALLOC); + } + if (cpt == STARPU_SPIN_MAXTRY) + _starpu_spin_lock(&handle->header_lock); + } + else + { + _starpu_spin_lock(&handle->header_lock); + } + + /* If there is currently nobody accessing the piece of data, or it's + * not another writer and if this is the same type of access as the + * current one, we can proceed. */ + unsigned put_in_list = 1; + + if ((handle->refcnt == 0) || (!(mode == STARPU_W) && (handle->current_mode == mode))) + { + /* TODO: Detect whether this is the end of a reduction phase etc. like in data_concurrency.c */ + if (0) + { + } + else + { + put_in_list = 0; + } + } + + if (put_in_list) + { + /* there cannot be multiple writers or a new writer + * while the data is in read mode */ + + handle->busy_count++; + /* enqueue the request */ + struct _starpu_data_requester *r = _starpu_data_requester_new(); + r->mode = mode; + r->is_requested_by_codelet = request_from_codelet; + r->j = j; + r->buffer_index = buffer_index; + r->prio = j ? j->task->priority : 0; + r->ready_data_callback = callback; + r->argcb = argcb; + + _starpu_data_requester_prio_list_push_back(&handle->arbitered_req_list, r); + + /* failed */ + put_in_list = 1; + } + else + { + handle->refcnt++; + handle->busy_count++; + + /* Do not write to handle->current_mode if it is already + * R. This avoids a spurious warning from helgrind when + * the following happens: + * acquire(R) in thread A + * acquire(R) in thread B + * release_data_on_node() in thread A + * helgrind would shout that the latter reads current_mode + * unsafely. + * + * This actually basically explains helgrind that it is a + * shared R acquisition. + */ + if (mode != STARPU_R || handle->current_mode != mode) + handle->current_mode = mode; + + /* success */ + put_in_list = 0; + } + + _starpu_spin_unlock(&handle->header_lock); +#ifndef LOCK_OR_DELEGATE + STARPU_PTHREAD_MUTEX_UNLOCK(&arbiter->mutex); +#endif // LOCK_OR_DELEGATE + return put_in_list; + +} + + + +#ifdef LOCK_OR_DELEGATE +/* These are the arguments passed to _submit_job_enforce_arbitered_deps */ +struct starpu_enforce_arbitered_args +{ + struct _starpu_job *j; + unsigned buf; + unsigned nbuffers; +}; + +static void ___starpu_submit_job_enforce_arbitered_deps(struct _starpu_job *j, unsigned buf, unsigned nbuffers); +static void __starpu_submit_job_enforce_arbitered_deps(void* inData) +{ + struct starpu_enforce_arbitered_args* args = inData; + struct _starpu_job *j = args->j; + unsigned buf = args->buf; + unsigned nbuffers = args->nbuffers; + /* we are in charge of freeing the args */ + free(args); + ___starpu_submit_job_enforce_arbitered_deps(j, buf, nbuffers); +} + +void _starpu_submit_job_enforce_arbitered_deps(struct _starpu_job *j, unsigned buf, unsigned nbuffers) +{ + struct starpu_enforce_arbitered_args* args; + _STARPU_MALLOC(args, sizeof(*args)); + starpu_data_handle_t handle = _STARPU_JOB_GET_ORDERED_BUFFER_HANDLE(j, buf); + args->j = j; + args->buf = buf; + args->nbuffers = nbuffers; + /* The function will delete args */ + _starpu_LockOrDelegatePostOrPerform(handle->arbiter, &__starpu_submit_job_enforce_arbitered_deps, args); +} + +static void ___starpu_submit_job_enforce_arbitered_deps(struct _starpu_job *j, unsigned buf, unsigned nbuffers) +{ + starpu_arbiter_t arbiter = _STARPU_JOB_GET_ORDERED_BUFFER_HANDLE(j, buf)->arbiter; +#else // LOCK_OR_DELEGATE +void _starpu_submit_job_enforce_arbitered_deps(struct _starpu_job *j, unsigned buf, unsigned nbuffers) +{ + struct _starpu_data_descr *descrs = _STARPU_JOB_GET_ORDERED_BUFFERS(j); + starpu_arbiter_t arbiter = descrs[buf].handle->arbiter; + STARPU_PTHREAD_MUTEX_LOCK(&arbiter->mutex); +#endif + STARPU_ASSERT(arbiter); + + const unsigned start_buf_arbiter = buf; + unsigned idx_buf_arbiter; + int idx_buf_arbiterdup; + unsigned all_arbiter_available = 1; + + starpu_data_handle_t handle; + enum starpu_data_access_mode mode; + int node; + + for (idx_buf_arbiter = start_buf_arbiter; idx_buf_arbiter < nbuffers; idx_buf_arbiter++) + { + handle = descrs[idx_buf_arbiter].handle; + mode = descrs[idx_buf_arbiter].mode & ~STARPU_COMMUTE; + node = descrs[idx_buf_arbiter].orig_node; + + mode = _starpu_arbiter_filter_modes(mode); + + STARPU_ASSERT_MSG(!(mode & STARPU_REDUX), "REDUX with arbiter is not implemented\n"); + + for (idx_buf_arbiterdup = (int) idx_buf_arbiter-1; idx_buf_arbiterdup >= 0; idx_buf_arbiterdup--) + { + starpu_data_handle_t handle_dup = descrs[idx_buf_arbiterdup].handle; + int node_dup = descrs[idx_buf_arbiterdup].orig_node; + if (handle_dup == handle && node_dup == node) + /* We have already requested this data, skip it. This + * depends on ordering putting writes before reads, see + * _starpu_compar_handles. */ + goto next; + if (!_starpu_handles_same_root(handle_dup, handle)) + /* We are not checking within the same parent any more, no need to continue checking other handles */ + break; + } + + if (handle->arbiter != arbiter) + { + /* another arbiter */ + break; + } + + /* Try to take handle */ + _starpu_spin_lock(&handle->header_lock); + if ((handle->refcnt == 0) || (!(mode == STARPU_W) && (handle->current_mode == mode))) + { + /* Got it */ + handle->refcnt++; + handle->busy_count++; + if (mode != STARPU_R || handle->current_mode != mode) + handle->current_mode = mode; + _starpu_spin_unlock(&handle->header_lock); + } + else + { + /* a handle does not have a refcnt == 0, stop */ + _starpu_spin_unlock(&handle->header_lock); + all_arbiter_available = 0; + break; + } + next: + ; + } + if (all_arbiter_available == 0) + { + /* Oups, record ourself as waiting for this data */ + + struct _starpu_data_requester *r = _starpu_data_requester_new(); + r->mode = mode; + r->is_requested_by_codelet = 1; + r->j = j; + r->buffer_index = start_buf_arbiter; + r->prio = j->task->priority; + r->ready_data_callback = NULL; + r->argcb = NULL; + + /* store node in list */ + _starpu_data_requester_prio_list_push_front(&handle->arbitered_req_list, r); + + _starpu_spin_lock(&handle->header_lock); + handle->busy_count++; + _starpu_spin_unlock(&handle->header_lock); + + /* and cancel all taken */ + unsigned idx_buf_cancel; + unsigned idx_buf_canceldup; + for (idx_buf_cancel = start_buf_arbiter; idx_buf_cancel < idx_buf_arbiter ; idx_buf_cancel++) + { + starpu_data_handle_t cancel_handle = descrs[idx_buf_cancel].handle; + int cancel_node = descrs[idx_buf_cancel].orig_node; + if (cancel_handle->arbiter != arbiter) + /* Will have to process another arbiter, will do that later */ + break; + + for (idx_buf_canceldup = idx_buf_cancel+1; idx_buf_canceldup < idx_buf_arbiter; idx_buf_canceldup++) + { + starpu_data_handle_t handle_dup = descrs[idx_buf_canceldup].handle; + int node_dup = descrs[idx_buf_canceldup].orig_node; + if (handle_dup == cancel_handle && node_dup == cancel_node) + goto next2; + if (!_starpu_handles_same_root(handle_dup, cancel_handle)) + /* We are not checking within the same parent any more, no need to continue checking other handles */ + break; + } + + _starpu_spin_lock(&cancel_handle->header_lock); + /* reset the counter because finally we do not take the data */ + STARPU_ASSERT(cancel_handle->refcnt >= 1); + cancel_handle->refcnt--; + STARPU_ASSERT(cancel_handle->busy_count > 0); + cancel_handle->busy_count--; + if (!_starpu_data_check_not_busy(cancel_handle)) + _starpu_spin_unlock(&cancel_handle->header_lock); + next2: + ; + } + +#ifndef LOCK_OR_DELEGATE + STARPU_PTHREAD_MUTEX_UNLOCK(&arbiter->mutex); +#endif + return; + } +#ifndef LOCK_OR_DELEGATE + STARPU_PTHREAD_MUTEX_UNLOCK(&arbiter->mutex); +#endif + + // all_arbiter_available is true + if (idx_buf_arbiter < nbuffers) + /* Other arbitered data, process them */ + _starpu_submit_job_enforce_arbitered_deps(j, idx_buf_arbiter, nbuffers); + else + /* Finished with all data, can eventually push! */ + _starpu_push_task(j); +} + +#ifdef LOCK_OR_DELEGATE +void ___starpu_notify_arbitered_dependencies(starpu_data_handle_t handle); +void __starpu_notify_arbitered_dependencies(void* inData) +{ + starpu_data_handle_t handle = inData; + ___starpu_notify_arbitered_dependencies(handle); +} +void _starpu_notify_arbitered_dependencies(starpu_data_handle_t handle) +{ + _starpu_LockOrDelegatePostOrPerform(handle->arbiter, &__starpu_notify_arbitered_dependencies, handle); +} +void ___starpu_notify_arbitered_dependencies(starpu_data_handle_t handle) +#else // LOCK_OR_DELEGATE +void _starpu_notify_arbitered_dependencies(starpu_data_handle_t handle, enum starpu_data_access_mode down_to_mode) +#endif +{ + starpu_arbiter_t arbiter = handle->arbiter; +#ifndef LOCK_OR_DELEGATE + STARPU_PTHREAD_MUTEX_LOCK(&arbiter->mutex); +#endif + + /* Since the request has been posted the handle may have been proceed and released */ + if (_starpu_data_requester_prio_list_empty(&handle->arbitered_req_list)) + { + /* No waiter, just remove our reference */ + _starpu_spin_lock(&handle->header_lock); + if (down_to_mode == STARPU_NONE) + { + STARPU_ASSERT(handle->refcnt > 0); + handle->refcnt--; + STARPU_ASSERT(handle->busy_count > 0); + handle->busy_count--; + } + else + { + /* Downgrade from W or RW down to R, keeping the same reference, + * but thus allowing other readers without allowing writers. */ + STARPU_ASSERT(down_to_mode == STARPU_R && + handle->current_mode == STARPU_W); + handle->current_mode = down_to_mode; + } +#ifndef LOCK_OR_DELEGATE + STARPU_PTHREAD_MUTEX_UNLOCK(&arbiter->mutex); +#endif + if (_starpu_data_check_not_busy(handle)) + /* Handle was even destroyed, don't unlock it. */ + return; + _starpu_spin_unlock(&handle->header_lock); + return; + } + + /* There is a waiter, remove our reference */ + _starpu_spin_lock(&handle->header_lock); + if (down_to_mode == STARPU_NONE) + { + STARPU_ASSERT(handle->refcnt > 0); + handle->refcnt--; + STARPU_ASSERT(handle->busy_count > 0); + handle->busy_count--; + } + else + { + /* Downgrade from W or RW down to R, keeping the same reference, + * but thus allowing other readers without allowing writers. */ + STARPU_ASSERT(down_to_mode == STARPU_R && + handle->current_mode == STARPU_W); + handle->current_mode = down_to_mode; + } + /* There should be at least one busy_count reference for the waiter + * (thus we don't risk to see the handle disappear below) */ + STARPU_ASSERT(handle->busy_count > 0); + _starpu_spin_unlock(&handle->header_lock); + + /* Note: we may be putting back our own requests, so avoid looping by + * extracting the list */ + struct _starpu_data_requester_prio_list l = handle->arbitered_req_list; + _starpu_data_requester_prio_list_init(&handle->arbitered_req_list); + + while (!_starpu_data_requester_prio_list_empty(&l)) + { + struct _starpu_data_requester *r = _starpu_data_requester_prio_list_pop_front_highest(&l); + + if (!r->is_requested_by_codelet) + { + /* data_acquire_cb, process it */ + enum starpu_data_access_mode r_mode = r->mode; + int put_in_list = 1; + + r_mode = _starpu_arbiter_filter_modes(r_mode); + + _starpu_spin_lock(&handle->header_lock); + handle->busy_count++; + if ((handle->refcnt == 0) || (!(r_mode == STARPU_W) && (handle->current_mode == r_mode))) + { + handle->refcnt++; + handle->current_mode = r_mode; + put_in_list = 0; + } + _starpu_spin_unlock(&handle->header_lock); + + if (put_in_list) + _starpu_data_requester_prio_list_push_front(&l, r); + + /* Put back remaining requests */ + _starpu_data_requester_prio_list_push_prio_list_back(&handle->arbitered_req_list, &l); +#ifndef LOCK_OR_DELEGATE + STARPU_PTHREAD_MUTEX_UNLOCK(&arbiter->mutex); +#endif + if (!put_in_list) + { + r->ready_data_callback(r->argcb); + _starpu_data_requester_delete(r); + } + + _starpu_spin_lock(&handle->header_lock); + STARPU_ASSERT(handle->busy_count > 0); + handle->busy_count--; + if (!_starpu_data_check_not_busy(handle)) + _starpu_spin_unlock(&handle->header_lock); + return; + } + + /* A task waiting for a set of data, try to acquire them */ + + struct _starpu_job* j = r->j; + unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(j->task); + + unsigned idx_buf_arbiter; + int idx_buf_arbiterdup; + unsigned all_arbiter_available = 1; + starpu_data_handle_t handle_arbiter; + enum starpu_data_access_mode mode; + int node_arbiter; + + unsigned start_buf_arbiter = r->buffer_index; + struct _starpu_data_descr *descrs = _STARPU_JOB_GET_ORDERED_BUFFERS(j); + + for (idx_buf_arbiter = start_buf_arbiter; idx_buf_arbiter < nbuffers; idx_buf_arbiter++) + { + handle_arbiter = descrs[idx_buf_arbiter].handle; + node_arbiter = descrs[idx_buf_arbiter].orig_node; + + if (handle_arbiter->arbiter != arbiter) + /* Will have to process another arbiter, will do that later */ + break; + + for (idx_buf_arbiterdup = (int) idx_buf_arbiter-1; idx_buf_arbiterdup >= 0; idx_buf_arbiterdup--) + { + starpu_data_handle_t handle_dup = descrs[idx_buf_arbiterdup].handle; + int node_dup = descrs[idx_buf_arbiterdup].orig_node; + if (handle_dup == handle_arbiter && node_dup == node_arbiter) + /* We have already requested this data, skip it. This + * depends on ordering putting writes before reads, see + * _starpu_compar_handles. */ + goto next; + if (!_starpu_handles_same_root(handle_dup, handle_arbiter)) + /* We are not checking within the same parent any more, no need to continue checking other handles */ + break; + } + + mode = descrs[idx_buf_arbiter].mode; + mode = _starpu_arbiter_filter_modes(mode); + + /* we post all arbiter */ + _starpu_spin_lock(&handle_arbiter->header_lock); + if (!((handle_arbiter->refcnt == 0) || (!(mode == STARPU_W) && (handle_arbiter->current_mode == mode)))) + { + /* handle is not available, record ourself */ + _starpu_spin_unlock(&handle_arbiter->header_lock); + all_arbiter_available = 0; + break; + } + /* mark the handle as taken */ + handle_arbiter->refcnt++; + handle_arbiter->busy_count++; + handle_arbiter->current_mode = mode; + _starpu_spin_unlock(&handle_arbiter->header_lock); + next: + ; + } + + if (all_arbiter_available) + { + /* Success! Drop request */ + _starpu_data_requester_delete(r); + + _starpu_spin_lock(&handle->header_lock); + STARPU_ASSERT(handle->busy_count > 0); + handle->busy_count--; + if (!_starpu_data_check_not_busy(handle)) + _starpu_spin_unlock(&handle->header_lock); + + /* Put back remaining requests */ + _starpu_data_requester_prio_list_push_prio_list_back(&handle->arbitered_req_list, &l); +#ifndef LOCK_OR_DELEGATE + STARPU_PTHREAD_MUTEX_UNLOCK(&arbiter->mutex); +#endif + + if (idx_buf_arbiter < nbuffers) + /* Other arbitered data, process them */ + _starpu_submit_job_enforce_arbitered_deps(j, idx_buf_arbiter, nbuffers); + else + /* Finished with all data, can eventually push! */ + _starpu_push_task(j); + + return; + } + else + { + /* all handles are not available - record that task on the first unavailable handle */ + + /* store node in list */ + r->mode = mode; + _starpu_data_requester_prio_list_push_front(&handle_arbiter->arbitered_req_list, r); + + /* Move check_busy reference too */ + _starpu_spin_lock(&handle->header_lock); + STARPU_ASSERT(handle->busy_count > 0); + handle->busy_count--; + if (!_starpu_data_check_not_busy(handle)) + _starpu_spin_unlock(&handle->header_lock); + + _starpu_spin_lock(&handle_arbiter->header_lock); + handle_arbiter->busy_count++; + _starpu_spin_unlock(&handle_arbiter->header_lock); + + /* and revert the mark */ + unsigned idx_buf_cancel; + unsigned idx_buf_canceldup; + for (idx_buf_cancel = start_buf_arbiter; idx_buf_cancel < idx_buf_arbiter ; idx_buf_cancel++) + { + starpu_data_handle_t cancel_handle = descrs[idx_buf_cancel].handle; + int cancel_node = descrs[idx_buf_cancel].orig_node; + if (cancel_handle->arbiter != arbiter) + break; + for (idx_buf_canceldup = idx_buf_cancel+1; idx_buf_canceldup < idx_buf_arbiter; idx_buf_canceldup++) + { + starpu_data_handle_t handle_dup = descrs[idx_buf_canceldup].handle; + int node_dup = descrs[idx_buf_canceldup].orig_node; + if (handle_dup == cancel_handle && node_dup == cancel_node) + goto next2; + if (!_starpu_handles_same_root(handle_dup, cancel_handle)) + /* We are not checking within the same parent any more, no need to continue checking other handles */ + break; + } + _starpu_spin_lock(&cancel_handle->header_lock); + STARPU_ASSERT(cancel_handle->refcnt >= 1); + cancel_handle->refcnt--; + STARPU_ASSERT(cancel_handle->busy_count > 0); + cancel_handle->busy_count--; + if (!_starpu_data_check_not_busy(cancel_handle)) + _starpu_spin_unlock(&cancel_handle->header_lock); + next2: + ; + } + } + } + /* no task has been pushed */ +#ifndef LOCK_OR_DELEGATE + STARPU_PTHREAD_MUTEX_UNLOCK(&arbiter->mutex); +#endif + return; +} + +starpu_arbiter_t starpu_arbiter_create(void) +{ + starpu_arbiter_t res; + _STARPU_MALLOC(res, sizeof(*res)); + +#ifdef LOCK_OR_DELEGATE + res->dlTaskListHead = NULL; + _starpu_spin_init(&res->dlListLock); + res->working = 0; +#else /* LOCK_OR_DELEGATE */ + STARPU_PTHREAD_MUTEX_INIT(&res->mutex, NULL); +#endif /* LOCK_OR_DELEGATE */ + + return res; +} + +void starpu_data_assign_arbiter(starpu_data_handle_t handle, starpu_arbiter_t arbiter) +{ + if (handle->arbiter && handle->arbiter == _starpu_global_arbiter) + /* Just for testing purpose */ + return; + STARPU_ASSERT_MSG(!handle->arbiter, "handle can only be assigned one arbiter"); + STARPU_ASSERT_MSG(!handle->refcnt, "arbiter can be assigned to handle only right after initialization"); + STARPU_ASSERT_MSG(!handle->busy_count, "arbiter can be assigned to handle only right after initialization"); + handle->arbiter = arbiter; +} + +void starpu_arbiter_destroy(starpu_arbiter_t arbiter) +{ +#ifdef LOCK_OR_DELEGATE + _starpu_spin_lock(&arbiter->dlListLock); + STARPU_ASSERT(!arbiter->dlTaskListHead); + STARPU_ASSERT(!arbiter->working); + _starpu_spin_unlock(&arbiter->dlListLock); + _starpu_spin_destroy(&arbiter->dlListLock); +#else /* LOCK_OR_DELEGATE */ + STARPU_PTHREAD_MUTEX_LOCK(&arbiter->mutex); + STARPU_PTHREAD_MUTEX_UNLOCK(&arbiter->mutex); + STARPU_PTHREAD_MUTEX_DESTROY(&arbiter->mutex); +#endif /* LOCK_OR_DELEGATE */ + free(arbiter); +} diff --git a/src/core/dependencies/data_concurrency.c b/src/core/dependencies/data_concurrency.c new file mode 100644 index 0000000..ee6e964 --- /dev/null +++ b/src/core/dependencies/data_concurrency.c @@ -0,0 +1,690 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2021-2021 Federal University of Rio Grande do Sul (UFRGS) + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include +#include + +/* + * We have a kind of dining philosophers problem: various tasks are accessing + * various data concurrently in different modes: STARPU_R, STARPU_RW, STARPU_W, + * STARPU_SCRATCH and STARPU_REDUX. STARPU_RW is managed as a STARPU_W access. + * We have the following constraints: + * + * - A single STARPU_W access is allowed at a time. + * - Concurrent STARPU_R accesses are allowed. + * - Concurrent STARPU_SCRATCH accesses are allowed. + * - Concurrent STARPU_REDUX accesses are allowed. + * + * What we do here is implementing the Dijkstra solutions: handles are sorted + * by pointer value order, and tasks call + * _starpu_attempt_to_submit_data_request for each requested data in that order + * (see _starpu_sort_task_handles call in _starpu_concurrent_data_access). + * + * _starpu_attempt_to_submit_data_request will either: + * - obtain access to the data, and thus the task can proceed with acquiring + * other data (see _submit_job_access_data) + * - queue a request on the data handle + * + * When a task finishes, it calls _starpu_notify_data_dependencies for each + * data, to free its acquisitions. This will look whether the first queued + * request can be fulfilled, and in such case make the task try to acquire its + * next data. + * + * The same mechanism is used for application data acquisition + * (starpu_data_acquire). + * + * For data with an arbiter, we have a second step, performed after this first + * step, implemented in data_arbiter_concurrency.c + */ + +/* + * Check to see whether the first queued request can proceed, and return it in + * such case. + */ +/* the handle header lock must be taken by the caller */ +static struct _starpu_data_requester *may_unlock_data_req_list_head(starpu_data_handle_t handle) +{ + struct _starpu_data_requester_prio_list *req_list; + + if (handle->reduction_refcnt > 0) + { + req_list = &handle->reduction_req_list; + } + else + { + if (_starpu_data_requester_prio_list_empty(&handle->reduction_req_list)) + req_list = &handle->req_list; + else + req_list = &handle->reduction_req_list; + } + + /* if there is no one to unlock ... */ + if (_starpu_data_requester_prio_list_empty(req_list)) + return NULL; + + /* if there is no reference to the data anymore, we can use it */ + if (handle->refcnt == 0) + return _starpu_data_requester_prio_list_pop_front_highest(req_list); + + /* Already writing to it, do not let another write access through */ + if (handle->current_mode == STARPU_W) + return NULL; + + /* data->current_mode == STARPU_R, so we can process more readers */ + struct _starpu_data_requester *r = _starpu_data_requester_prio_list_front_highest(req_list); + + enum starpu_data_access_mode r_mode = r->mode; + if (r_mode == STARPU_RW) + r_mode = STARPU_W; + + /* If this is a STARPU_R, STARPU_SCRATCH or STARPU_REDUX type of + * access, we only proceed if the current mode is the same as the + * requested mode. */ + if (r_mode == handle->current_mode) + return _starpu_data_requester_prio_list_pop_front_highest(req_list); + else + return NULL; +} + +/* Try to submit a data request, in case the request can be processed + * immediately, return 0, if there is still a dependency that is not compatible + * with the current mode, the request is put in the per-handle list of + * "requesters", and this function returns 1. */ +/* No lock is held, this acquires and releases the handle header lock */ +static unsigned _starpu_attempt_to_submit_data_request(unsigned request_from_codelet, + starpu_data_handle_t handle, enum starpu_data_access_mode mode, + void (*callback)(void *), void *argcb, + struct _starpu_job *j, unsigned buffer_index) +{ + if (handle->arbiter) + return _starpu_attempt_to_submit_arbitered_data_request(request_from_codelet, handle, mode, callback, argcb, j, buffer_index); + + /* Do not care about some flags */ + mode &= ~STARPU_COMMUTE; + mode &= ~STARPU_SSEND; + mode &= ~STARPU_LOCALITY; + mode &= ~STARPU_NOFOOTPRINT; + if (mode == STARPU_RW) + mode = STARPU_W; + + /* Take the lock protecting the header. We try to do some progression + * in case this is called from a worker, otherwise we just wait for the + * lock to be available. */ + if (request_from_codelet) + { + int cpt = 0; + while (cpt < STARPU_SPIN_MAXTRY && _starpu_spin_trylock(&handle->header_lock)) + { + cpt++; + _starpu_datawizard_progress(_STARPU_DATAWIZARD_DO_NOT_ALLOC); + } + if (cpt == STARPU_SPIN_MAXTRY) + _starpu_spin_lock(&handle->header_lock); + } + else + { + _starpu_spin_lock(&handle->header_lock); + } + + /* If we have a request that is not used for the reduction, and that a + * reduction is pending, we put it at the end of normal list, and we + * use the reduction_req_list instead */ + unsigned pending_reduction = (handle->reduction_refcnt > 0); + unsigned frozen = 0; + + /* If we are currently performing a reduction, we freeze any request + * that is not explicitly a reduction task. */ + unsigned is_a_reduction_task = (request_from_codelet && j && j->reduction_task); + + if (pending_reduction && !is_a_reduction_task) + frozen = 1; + + /* If there is currently nobody accessing the piece of data, or it's + * not another writer and if this is the same type of access as the + * current one, we can proceed. */ + unsigned put_in_list = 1; + + enum starpu_data_access_mode previous_mode = handle->current_mode; + + if (!frozen && ((handle->refcnt == 0) || (!(mode == STARPU_W) && (handle->current_mode == mode)))) + { + /* Detect whether this is the end of a reduction phase */ + /* We don't want to start multiple reductions of the + * same handle at the same time ! */ + + if ((handle->reduction_refcnt == 0) && (previous_mode == STARPU_REDUX) && (mode != STARPU_REDUX)) + { + _starpu_data_end_reduction_mode(handle, j?j->task->priority:STARPU_DEFAULT_PRIO); + + /* Since we need to perform a mode change, we freeze + * the request if needed. */ + put_in_list = (handle->reduction_refcnt > 0); + } + else + { + put_in_list = 0; + } + } + + if (put_in_list) + { + /* there cannot be multiple writers or a new writer + * while the data is in read mode */ + + handle->busy_count++; + /* enqueue the request */ + struct _starpu_data_requester *r = _starpu_data_requester_new(); + r->mode = mode; + r->is_requested_by_codelet = request_from_codelet; + r->j = j; + r->buffer_index = buffer_index; + r->prio = j ? j->task->priority : 0; + r->ready_data_callback = callback; + r->argcb = argcb; + + /* We put the requester in a specific list if this is a reduction task */ + struct _starpu_data_requester_prio_list *req_list = + is_a_reduction_task?&handle->reduction_req_list:&handle->req_list; + + _starpu_data_requester_prio_list_push_back(req_list, r); + + /* failed */ + put_in_list = 1; + } + else + { + handle->refcnt++; + handle->busy_count++; + + /* Do not write to handle->current_mode if it is already + * R. This avoids a spurious warning from helgrind when + * the following happens: + * acquire(R) in thread A + * acquire(R) in thread B + * release_data_on_node() in thread A + * helgrind would shout that the latter reads current_mode + * unsafely. + * + * This actually basically explains helgrind that it is a + * shared R acquisition. + */ + if (mode != STARPU_R || handle->current_mode != mode) + handle->current_mode = mode; + + if ((mode == STARPU_REDUX) && (previous_mode != STARPU_REDUX)) + _starpu_data_start_reduction_mode(handle); + + /* success */ + put_in_list = 0; + } + + _starpu_spin_unlock(&handle->header_lock); + return put_in_list; + +} + +/* Take a data, without waiting for it to be available (it is assumed to be). + * This is typical used for nodeps tasks, for which a previous task has already + * waited for the proper conditions, and we just need to take another reference + * for overall reference coherency. + * No lock is held, this acquires and releases the handle header lock */ +static void _starpu_take_data(unsigned request_from_codelet, + starpu_data_handle_t handle, enum starpu_data_access_mode mode, + struct _starpu_job *j) +{ + STARPU_ASSERT_MSG(!handle->arbiter, "TODO"); + + /* Do not care about some flags */ + mode &= ~STARPU_COMMUTE; + mode &= ~STARPU_SSEND; + mode &= ~STARPU_LOCALITY; + mode &= ~STARPU_NOFOOTPRINT; + if (mode == STARPU_RW) + mode = STARPU_W; + + /* Take the lock protecting the header. We try to do some progression + * in case this is called from a worker, otherwise we just wait for the + * lock to be available. */ + if (request_from_codelet) + { + int cpt = 0; + while (cpt < STARPU_SPIN_MAXTRY && _starpu_spin_trylock(&handle->header_lock)) + { + cpt++; + _starpu_datawizard_progress(_STARPU_DATAWIZARD_DO_NOT_ALLOC); + } + if (cpt == STARPU_SPIN_MAXTRY) + _starpu_spin_lock(&handle->header_lock); + } + else + { + _starpu_spin_lock(&handle->header_lock); + } + + /* If we are currently performing a reduction, we freeze any request + * that is not explicitly a reduction task. */ + unsigned is_a_reduction_task = (request_from_codelet && j && j->reduction_task); + + STARPU_ASSERT_MSG(!is_a_reduction_task, "TODO"); + + enum starpu_data_access_mode previous_mode = handle->current_mode; + + STARPU_ASSERT_MSG(mode == previous_mode, "mode was %d, but requested %d", previous_mode, mode); + + handle->refcnt++; + handle->busy_count++; + + _starpu_spin_unlock(&handle->header_lock); +} + + +/* No lock is held */ +unsigned _starpu_attempt_to_submit_data_request_from_apps(starpu_data_handle_t handle, enum starpu_data_access_mode mode, + void (*callback)(void *), void *argcb) +{ + return _starpu_attempt_to_submit_data_request(0, handle, mode, callback, argcb, NULL, 0); +} + +/* No lock is held */ +static unsigned attempt_to_submit_data_request_from_job(struct _starpu_job *j, unsigned buffer_index) +{ + /* Note that we do not access j->task->handles, but j->ordered_buffers + * which is a sorted copy of it. */ + struct _starpu_data_descr *buffer = &(_STARPU_JOB_GET_ORDERED_BUFFERS(j)[buffer_index]); + starpu_data_handle_t handle = buffer->handle; + enum starpu_data_access_mode mode = buffer->mode & ~STARPU_COMMUTE; + + return _starpu_attempt_to_submit_data_request(1, handle, mode, NULL, NULL, j, buffer_index); +} + +/* Try to acquire all data of the given job, one by one in handle pointer value order + */ +/* No lock is held */ +static unsigned _submit_job_access_data(struct _starpu_job *j, unsigned start_buffer_index) +{ + unsigned buf; + int bufdup; + + unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(j->task); + for (buf = start_buffer_index; buf < nbuffers; buf++) + { + starpu_data_handle_t handle = _STARPU_JOB_GET_ORDERED_BUFFER_HANDLE(j, buf); + int node = _STARPU_JOB_GET_ORDERED_BUFFER_ORIG_NODE(j, buf); + + for (bufdup = (int) buf-1; bufdup >= 0; bufdup--) + { + starpu_data_handle_t handle_dup = _STARPU_JOB_GET_ORDERED_BUFFER_HANDLE(j, bufdup); + int node_dup = _STARPU_JOB_GET_ORDERED_BUFFER_ORIG_NODE(j, bufdup); + if (handle_dup == handle && node_dup == node) + /* We have already requested this data, skip it. This + * depends on ordering putting writes before reads, see + * _starpu_compar_handles. */ + goto next; + if (!_starpu_handles_same_root(handle_dup, handle)) + /* We are not checking within the same parent any more, no need to continue checking other handles */ + break; + } + + STARPU_ASSERT(j->task->status == STARPU_TASK_BLOCKED || j->task->status == STARPU_TASK_BLOCKED_ON_TAG || j->task->status == STARPU_TASK_BLOCKED_ON_TASK || j->task->status == STARPU_TASK_BLOCKED_ON_DATA); + j->task->status = STARPU_TASK_BLOCKED_ON_DATA; + + if(handle->arbiter) + { + /* We arrived on an arbitered data, we stop and proceed + * with the arbiter second step. */ + _starpu_submit_job_enforce_arbitered_deps(j, buf, nbuffers); + return 1; + } + + if (attempt_to_submit_data_request_from_job(j, buf)) + { + return 1; + } + + next: + ; + } + + return 0; +} + +static void take_data_from_job(struct _starpu_job *j, unsigned buffer_index) +{ + /* Note that we do not access j->task->handles, but j->ordered_buffers + * which is a sorted copy of it. */ + struct _starpu_data_descr *buffer = &(_STARPU_JOB_GET_ORDERED_BUFFERS(j)[buffer_index]); + starpu_data_handle_t handle = buffer->handle; + enum starpu_data_access_mode mode = buffer->mode & ~STARPU_COMMUTE; + + _starpu_take_data(1, handle, mode, j); +} + +/* Immediately acquire all data of the given job, one by one in handle pointer value order + */ +/* No lock is held */ +static void _submit_job_take_data_deps(struct _starpu_job *j, unsigned start_buffer_index) +{ + unsigned buf; + int bufdup; + + unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(j->task); + for (buf = start_buffer_index; buf < nbuffers; buf++) + { + starpu_data_handle_t handle = _STARPU_JOB_GET_ORDERED_BUFFER_HANDLE(j, buf); + int node = _STARPU_JOB_GET_ORDERED_BUFFER_ORIG_NODE(j, buf); + + for (bufdup = (int) buf-1; bufdup >= 0; bufdup--) + { + starpu_data_handle_t handle_dup = _STARPU_JOB_GET_ORDERED_BUFFER_HANDLE(j, bufdup); + int node_dup = _STARPU_JOB_GET_ORDERED_BUFFER_ORIG_NODE(j, bufdup); + if (handle_dup == handle && node_dup == node) + /* We have already requested this data, skip it. This + * depends on ordering putting writes before reads, see + * _starpu_compar_handles. */ + goto next; + if (!_starpu_handles_same_root(handle_dup, handle)) + /* We are not checking within the same parent any more, no need to continue checking other handles */ + break; + } + + if(handle->arbiter) + { + /* We arrived on an arbitered data, we stop and proceed + * with the arbiter second step. */ + STARPU_ASSERT_MSG(0, "TODO"); + //_starpu_submit_job_take_arbitered_deps(j, buf, nbuffers); + } + + take_data_from_job(j, buf); + + next: + ; + } +} + +/* This is called when the tag+task dependencies are to be finished releasing. */ +void _starpu_enforce_data_deps_notify_job_ready_soon(struct _starpu_job *j, _starpu_notify_job_start_data *data) +{ + unsigned buf; + + if (j->task->cl) + { + unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(j->task); + + for (buf = 0; buf < nbuffers; buf++) + { + starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(j->task, buf); + if (handle->arbiter) + /* Oops, it's the arbiter's decision */ + return; + } + + /* We need to check data availability only if sequential consistency + * dependencies have not been used */ + if (!j->sequential_consistency) + { + for (buf = 0; buf < nbuffers; buf++) + { + starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(j->task, buf); + enum starpu_data_access_mode mode = STARPU_TASK_GET_MODE(j->task, buf) & ~STARPU_COMMUTE; + + if (handle->reduction_refcnt) + /* Reduction pending, don't bother trying */ + return; + if (handle->refcnt != 0 && (mode == STARPU_W || handle->current_mode != mode)) + /* Incompatible modes, not ready immediately */ + return; + } + } + } + /* Ok, it really looks like this job will be ready soon */ + _starpu_job_notify_ready_soon(j, data); +} + +void _starpu_job_set_ordered_buffers(struct _starpu_job *j) +{ + /* Compute an ordered list of the different pieces of data so that we + * grab then according to a total order, thus avoiding a deadlock + * condition */ + unsigned i; + unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(j->task); + struct starpu_task *task = j->task; + struct _starpu_data_descr *buffers = _STARPU_JOB_GET_ORDERED_BUFFERS(j); + + for (i=0 ; icl ? STARPU_CODELET_GET_NODE(task->cl, i) : STARPU_SPECIFIC_NODE_NONE; + buffers[i].node = -1; + } + _starpu_sort_task_handles(buffers, nbuffers); + for (i=0 ; icl && task->cl->specific_nodes) + for (i=1 ; icl, buffers[i].index) != + STARPU_CODELET_GET_NODE(task->cl, buffers[i-1].index)) + { + STARPU_ASSERT_MSG(!(buffers[i].mode & STARPU_W) && + !(buffers[i-1].mode & STARPU_W), + "Cannot request the same data on different nodes with write mode"); + STARPU_ASSERT_MSG(!(buffers[i].mode & STARPU_REDUX) && + !(buffers[i-1].mode & STARPU_REDUX), + "Cannot request the same data on different nodes with redux mode"); + } + } +} + +/* Sort the data used by the given job by handle pointer value order, and + * try to acquire them in that order */ +/* No lock is held */ +unsigned _starpu_concurrent_data_access(struct _starpu_job *j) +{ + struct starpu_codelet *cl = j->task->cl; + + if ((cl == NULL) || (STARPU_TASK_GET_NBUFFERS(j->task) == 0)) + return 0; + + return _submit_job_access_data(j, 0); +} + +/* This request got fulfilled, continue with the other requests of the + * corresponding job */ +/* No lock is held */ +static unsigned unlock_one_requester(struct _starpu_data_requester *r) +{ + struct _starpu_job *j = r->j; + unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(j->task); + unsigned buffer_index = r->buffer_index; + + if (buffer_index + 1 < nbuffers) + /* not all buffers are protected yet */ + return _submit_job_access_data(j, buffer_index + 1); + else + return 0; +} + +/* Sort the data used by the given job by handle pointer value order, and + * immediately acquire them in that order */ +/* No lock is held */ +void _starpu_submit_job_take_data_deps(struct _starpu_job *j) +{ + struct starpu_codelet *cl = j->task->cl; + + if ((cl == NULL) || (STARPU_TASK_GET_NBUFFERS(j->task) == 0)) + return; + + _submit_job_take_data_deps(j, 0); +} + +/* This is called when a task is finished with a piece of data + * (or on starpu_data_release) + * + * The header lock must already be taken by the caller. + * This may free the handle if it was lazily unregistered (1 is returned in + * that case). The handle pointer thus becomes invalid for the caller. + */ +int _starpu_notify_data_dependencies(starpu_data_handle_t handle, enum starpu_data_access_mode down_to_mode) +{ + _starpu_spin_checklocked(&handle->header_lock); + + if (down_to_mode != STARPU_NONE && handle->current_mode == down_to_mode) + { + /* No change, nothing to do */ + return 0; + } + + if (handle->arbiter) + { + /* Keep our reference for now, _starpu_notify_arbitered_dependencies + * will drop it when it needs to */ + STARPU_ASSERT(_starpu_data_requester_prio_list_empty(&handle->req_list)); + STARPU_ASSERT(_starpu_data_requester_prio_list_empty(&handle->reduction_req_list)); + _starpu_spin_unlock(&handle->header_lock); + /* _starpu_notify_arbitered_dependencies will handle its own locking */ + _starpu_notify_arbitered_dependencies(handle, down_to_mode); + /* We have already unlocked */ + return 1; + } + + STARPU_ASSERT(_starpu_data_requester_prio_list_empty(&handle->arbitered_req_list)); + + if (down_to_mode == STARPU_NONE) + { + /* A data access has finished so we remove a reference. */ + STARPU_ASSERT(handle->refcnt > 0); + handle->refcnt--; + STARPU_ASSERT(handle->busy_count > 0); + handle->busy_count--; + if (_starpu_data_check_not_busy(handle)) + /* Handle was destroyed, nothing left to do. */ + return 1; + } + else + { + /* Downgrade from W or RW down to R, keeping the same reference, + * but thus allowing other readers without allowing writers. */ + STARPU_ASSERT(down_to_mode == STARPU_R && + (handle->current_mode == STARPU_RW || + handle->current_mode == STARPU_W)); + handle->current_mode = down_to_mode; + } + + /* In case there is a pending reduction, and that this is the last + * requester, we may go back to a "normal" coherency model. */ + if (handle->reduction_refcnt > 0) + { + //fprintf(stderr, "NOTIFY REDUCTION TASK RED REFCNT %d\n", handle->reduction_refcnt); + handle->reduction_refcnt--; + if (handle->reduction_refcnt == 0) + _starpu_data_end_reduction_mode_terminate(handle); + } + + if (handle->unlocking_reqs) + /* + * Our caller is already running the unlock loop below (we were + * most probably called from the ready_data_callback call + * below). Avoid looping again (which would potentially mean + * unbounded recursion), our caller will continue doing the + * unlock work for us. + */ + return 0; + + handle->unlocking_reqs = 1; + struct _starpu_data_requester *r; + while ((r = may_unlock_data_req_list_head(handle))) + { + /* STARPU_RW accesses are treated as STARPU_W */ + enum starpu_data_access_mode r_mode = r->mode; + if (r_mode == STARPU_RW) + r_mode = STARPU_W; + + int put_in_list = 1; + if ((handle->reduction_refcnt == 0) && (handle->current_mode == STARPU_REDUX) && (r_mode != STARPU_REDUX)) + { + _starpu_data_end_reduction_mode(handle, r->prio); + + /* Since we need to perform a mode change, we freeze + * the request if needed. */ + put_in_list = (handle->reduction_refcnt > 0); + } + else + { + put_in_list = 0; + } + + if (put_in_list) + { + /* We need to put the request back because we must + * perform a reduction before. */ + _starpu_data_requester_prio_list_push_front(&handle->req_list, r); + } + else + { + /* The data is now attributed to that request so we put a + * reference on it. */ + handle->refcnt++; + handle->busy_count++; + + enum starpu_data_access_mode previous_mode = handle->current_mode; + handle->current_mode = r_mode; + + /* In case we enter in a reduction mode, we invalidate all per + * worker replicates. Note that the "per_node" replicates are + * kept intact because we'll reduce a valid copy of the + * "per-node replicate" with the per-worker replicates .*/ + if ((r_mode == STARPU_REDUX) && (previous_mode != STARPU_REDUX)) + _starpu_data_start_reduction_mode(handle); + + _starpu_spin_unlock(&handle->header_lock); + + if (r->is_requested_by_codelet) + { + if (!unlock_one_requester(r)) + _starpu_push_task(r->j); + } + else + { + STARPU_ASSERT(r->ready_data_callback); + + /* execute the callback associated with the data requester */ + r->ready_data_callback(r->argcb); + } + + _starpu_data_requester_delete(r); + + _starpu_spin_lock(&handle->header_lock); + STARPU_ASSERT(handle->busy_count > 0); + handle->busy_count--; + if (_starpu_data_check_not_busy(handle)) + return 1; + } + } + handle->unlocking_reqs = 0; + + return 0; +} diff --git a/src/core/dependencies/data_concurrency.h b/src/core/dependencies/data_concurrency.h new file mode 100644 index 0000000..47c77ef --- /dev/null +++ b/src/core/dependencies/data_concurrency.h @@ -0,0 +1,48 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __DATA_CONCURRENCY_H__ +#define __DATA_CONCURRENCY_H__ + +/** @file */ + +#include + +#pragma GCC visibility push(hidden) + +void _starpu_job_set_ordered_buffers(struct _starpu_job *j); + +unsigned _starpu_concurrent_data_access(struct _starpu_job *j); +void _starpu_submit_job_enforce_arbitered_deps(struct _starpu_job *j, unsigned buf, unsigned nbuffers); +void _starpu_submit_job_take_data_deps(struct _starpu_job *j); +void _starpu_enforce_data_deps_notify_job_ready_soon(struct _starpu_job *j, _starpu_notify_job_start_data *data); + +int _starpu_notify_data_dependencies(starpu_data_handle_t handle, enum starpu_data_access_mode down_to_mode); +void _starpu_notify_arbitered_dependencies(starpu_data_handle_t handle, enum starpu_data_access_mode down_to_mode); + +unsigned _starpu_attempt_to_submit_data_request_from_apps(starpu_data_handle_t handle, + enum starpu_data_access_mode mode, + void (*callback)(void *), void *argcb); + +unsigned _starpu_attempt_to_submit_arbitered_data_request(unsigned request_from_codelet, + starpu_data_handle_t handle, enum starpu_data_access_mode mode, + void (*callback)(void *), void *argcb, + struct _starpu_job *j, unsigned buffer_index); + +#pragma GCC visibility pop + +#endif // __DATA_CONCURRENCY_H__ + diff --git a/src/core/dependencies/dependencies.c b/src/core/dependencies/dependencies.c new file mode 100644 index 0000000..a7fc049 --- /dev/null +++ b/src/core/dependencies/dependencies.c @@ -0,0 +1,103 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include +#include +#include + +/* We assume that the job will not disappear under our hands */ +void _starpu_notify_dependencies(struct _starpu_job *j) +{ + STARPU_ASSERT(j); + STARPU_ASSERT(j->task); + + /* unlock tasks depending on that task */ + _starpu_notify_task_dependencies(j); + + /* unlock tags depending on that task */ + if (j->task->use_tag) + _starpu_notify_tag_dependencies(j->tag); + +} + +/* TODO: make this a hashtable indexed by func+data and pass that through data. */ +static starpu_notify_ready_soon_func notify_ready_soon_func; +static void *notify_ready_soon_func_data; + +struct _starpu_notify_job_start_data +{ + double delay; +}; + +void starpu_task_notify_ready_soon_register(starpu_notify_ready_soon_func f, void *data) +{ + STARPU_ASSERT(!notify_ready_soon_func); + notify_ready_soon_func = f; + notify_ready_soon_func_data = data; +} + +/* Called when a job has just started, so we can notify tasks which were waiting + * only for this one when they can expect to start */ +static void __starpu_job_notify_start(struct _starpu_job *j, double delay); +void _starpu_job_notify_start(struct _starpu_job *j, struct starpu_perfmodel_arch* perf_arch) +{ + double delay; + + if (!notify_ready_soon_func) + return; + + delay = starpu_task_expected_length(j->task, perf_arch, j->nimpl); + if (isnan(delay) || _STARPU_IS_ZERO(delay)) + return; + + __starpu_job_notify_start(j, delay); +} + +static void __starpu_job_notify_start(struct _starpu_job *j, double delay) +{ + _starpu_notify_job_start_data data = { .delay = delay }; + + _starpu_notify_job_start_tasks(j, &data); + + if (j->task->use_tag) + _starpu_notify_job_start_tag_dependencies(j->tag, &data); + + /* TODO: check data notification */ +} + +/* Called when the last dependency of this job has just started, so we know that + * this job will be released after the given delay. */ +void _starpu_job_notify_ready_soon(struct _starpu_job *j, _starpu_notify_job_start_data *data) +{ + struct starpu_task *task = j->task; + + /* Notify that this task will start after the given delay */ + notify_ready_soon_func(notify_ready_soon_func_data, task, data->delay); + + + /* Notify some known transitions as well */ + + if (!task->cl || task->cl->where == STARPU_NOWHERE || task->where == STARPU_NOWHERE) + /* This task will immediately terminate, so transition this */ + __starpu_job_notify_start(_starpu_get_job_associated_to_task(task), data->delay); + if (j->quick_next) + /* This job is actually a pre_sync job with a post_sync job to be released right after */ + _starpu_job_notify_ready_soon(j->quick_next, data); +} diff --git a/src/core/dependencies/implicit_data_deps.c b/src/core/dependencies/implicit_data_deps.c new file mode 100644 index 0000000..4a14f11 --- /dev/null +++ b/src/core/dependencies/implicit_data_deps.c @@ -0,0 +1,759 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include +#include +#include + +#if 0 +# define _STARPU_DEP_DEBUG(fmt, ...) fprintf(stderr, fmt, ## __VA_ARGS__); +#else +# define _STARPU_DEP_DEBUG(fmt, ...) +#endif + +static void (*write_hook)(starpu_data_handle_t); + +void _starpu_implicit_data_deps_write_hook(void (*func)(starpu_data_handle_t)) +{ + STARPU_ASSERT_MSG(!write_hook || write_hook == func, "only one implicit data deps hook at a time\n"); + write_hook = func; +} + +static void _starpu_add_ghost_dependency(starpu_data_handle_t handle, unsigned long previous, struct starpu_task *next) +{ + struct _starpu_job *next_job = _starpu_get_job_associated_to_task(next); + _starpu_bound_job_id_dep(handle, next_job, previous); + STARPU_AYU_ADDDEPENDENCY(previous, handle, next_job->job_id); +} + +static void _starpu_add_dependency(starpu_data_handle_t handle, struct starpu_task *previous, struct starpu_task *next) +{ + _starpu_add_ghost_dependency(handle, _starpu_get_job_associated_to_task(previous)->job_id, next); +} + +/* Add post_sync_task as new accessor among the existing ones, making pre_sync_task depend on the last synchronization task if any. */ +static void _starpu_add_accessor(starpu_data_handle_t handle, struct starpu_task *pre_sync_task, int *submit_pre_sync, struct starpu_task *post_sync_task, struct _starpu_task_wrapper_dlist *post_sync_task_dependency_slot) +{ + /* Add this task to the list of readers */ + STARPU_ASSERT(!post_sync_task_dependency_slot->prev); + STARPU_ASSERT(!post_sync_task_dependency_slot->next); + post_sync_task_dependency_slot->task = post_sync_task; + post_sync_task_dependency_slot->next = handle->last_submitted_accessors.next; + post_sync_task_dependency_slot->prev = &handle->last_submitted_accessors; + post_sync_task_dependency_slot->next->prev = post_sync_task_dependency_slot; + handle->last_submitted_accessors.next = post_sync_task_dependency_slot; + + /* This task depends on the previous synchronization task if any */ + if (handle->last_sync_task && handle->last_sync_task != post_sync_task) + { + *submit_pre_sync= 1; + struct starpu_task *task_array[1] = {handle->last_sync_task}; + _starpu_task_declare_deps_array(pre_sync_task, 1, task_array, 0); + _starpu_add_dependency(handle, handle->last_sync_task, pre_sync_task); + _STARPU_DEP_DEBUG("dep %p -> %p\n", handle->last_sync_task, pre_sync_task); + } + else + { + _STARPU_DEP_DEBUG("No dep\n"); + } + + /* There was perhaps no last submitted writer but a + * ghost one, we should report that here, and keep the + * ghost writer valid */ + if ( + ( +#ifdef STARPU_USE_FXT + 1 +#else + _starpu_bound_recording +#endif + || STARPU_AYU_EVENT + ) && handle->last_submitted_ghost_sync_id_is_valid) + { + _STARPU_TRACE_GHOST_TASK_DEPS(handle->last_submitted_ghost_sync_id, + _starpu_get_job_associated_to_task(pre_sync_task)); + _starpu_add_ghost_dependency(handle, handle->last_submitted_ghost_sync_id, pre_sync_task); + _STARPU_DEP_DEBUG("dep ID%lu -> %p\n", handle->last_submitted_ghost_sync_id, pre_sync_task); + } + + if (*submit_pre_sync && !pre_sync_task->cl) + { + /* Add a reference to be released in _starpu_handle_job_termination */ + _starpu_spin_lock(&handle->header_lock); + handle->busy_count++; + _starpu_spin_unlock(&handle->header_lock); + _starpu_get_job_associated_to_task(pre_sync_task)->implicit_dep_handle = handle; + } +} + +/* This adds a new synchronization task which depends on all the previous accessors */ +static void _starpu_add_sync_task(starpu_data_handle_t handle, struct starpu_task *pre_sync_task, struct starpu_task *post_sync_task, struct starpu_task *ignored_task) +{ + /* Count the existing accessors */ + unsigned naccessors = 0; + struct _starpu_task_wrapper_dlist *l; + l = handle->last_submitted_accessors.next; + while (l != &handle->last_submitted_accessors) + { + if (l->task == ignored_task) + { + /* Don't make pre_sync_task depend on post_sync_task! + * but still drop from the list. + * This happens notably when a task accesses several + * times to the same data. + */ + struct _starpu_task_wrapper_dlist *next; + l->prev->next = l->next; + l->next->prev = l->prev; + l->task = NULL; + l->prev = NULL; + next = l->next; + l->next = NULL; + l = next; + } + else + { + naccessors++; + l = l->next; + } + } + _STARPU_DEP_DEBUG("%d accessors\n", naccessors); + + if (naccessors > 0) + { + /* Put all tasks in the list into task_array */ + struct starpu_task *task_array[naccessors]; + unsigned i = 0; + l = handle->last_submitted_accessors.next; + while (l != &handle->last_submitted_accessors) + { + STARPU_ASSERT(l->task); + STARPU_ASSERT(l->task != ignored_task); + task_array[i++] = l->task; + _starpu_add_dependency(handle, l->task, pre_sync_task); + _STARPU_DEP_DEBUG("dep %p -> %p\n", l->task, pre_sync_task); + + struct _starpu_task_wrapper_dlist *prev = l; + l = l->next; + prev->task = NULL; + prev->next = NULL; + prev->prev = NULL; + } + _starpu_task_declare_deps_array(pre_sync_task, naccessors, task_array, 0); + } +#ifndef STARPU_USE_FXT + if (_starpu_bound_recording) +#endif + { + /* Declare all dependencies with ghost accessors */ + struct _starpu_jobid_list *ghost_accessors_id = handle->last_submitted_ghost_accessors_id; + while (ghost_accessors_id) + { + unsigned long id = ghost_accessors_id->id; + _STARPU_TRACE_GHOST_TASK_DEPS(id, + _starpu_get_job_associated_to_task(pre_sync_task)); + _starpu_add_ghost_dependency(handle, id, pre_sync_task); + _STARPU_DEP_DEBUG("dep ID%lu -> %p\n", id, pre_sync_task); + + struct _starpu_jobid_list *prev = ghost_accessors_id; + ghost_accessors_id = ghost_accessors_id->next; + free(prev); + } + handle->last_submitted_ghost_accessors_id = NULL; + } + + handle->last_submitted_accessors.next = &handle->last_submitted_accessors; + handle->last_submitted_accessors.prev = &handle->last_submitted_accessors; + handle->last_sync_task = post_sync_task; + + if (!post_sync_task->cl) + { + /* Add a reference to be released in _starpu_handle_job_termination */ + _starpu_spin_lock(&handle->header_lock); + handle->busy_count++; + _starpu_spin_unlock(&handle->header_lock); + _starpu_get_job_associated_to_task(post_sync_task)->implicit_dep_handle = handle; + } +} + +/* This function adds the implicit task dependencies introduced by data + * sequential consistency. Two tasks are provided: pre_sync and post_sync which + * respectively indicates which task is going to depend on the previous deps + * and on which task future deps should wait. In the case of a dependency + * introduced by a task submission, both tasks are just the submitted task, but + * in the case of user interactions with the DSM, these may be different tasks. + * */ +/* NB : handle->sequential_consistency_mutex must be hold by the caller; + * returns a task, to be submitted after releasing that mutex. */ +/* *submit_pre_sync is whether the pre_sync_task will be submitted or not. The + * caller should set it to 1 if it intends to submit it anyway, or to 0 + * if it may not submit it (because it has no other use for the task than + * synchronization). In the latter case, + * _starpu_detect_implicit_data_deps_with_handle will set it to 1 in case the + * task really needs to be submitted, or leave it to 0 if there is nothing to be + * waited for anyway. */ +struct starpu_task *_starpu_detect_implicit_data_deps_with_handle(struct starpu_task *pre_sync_task, int *submit_pre_sync, struct starpu_task *post_sync_task, struct _starpu_task_wrapper_dlist *post_sync_task_dependency_slot, + starpu_data_handle_t handle, enum starpu_data_access_mode mode, unsigned task_handle_sequential_consistency) +{ + struct starpu_task *task = NULL; + + /* Do not care about some flags */ + mode &= ~ STARPU_SSEND; + mode &= ~ STARPU_LOCALITY; + mode &= ~ STARPU_NOFOOTPRINT; + + STARPU_ASSERT(!(mode & STARPU_SCRATCH)); + _STARPU_LOG_IN(); + + if (handle->sequential_consistency && task_handle_sequential_consistency) + { + struct _starpu_job *pre_sync_job = _starpu_get_job_associated_to_task(pre_sync_task); + struct _starpu_job *post_sync_job = _starpu_get_job_associated_to_task(post_sync_task); + + if (mode & STARPU_R && !handle->initialized) + { + STARPU_ASSERT_MSG(handle->init_cl, "Handle %p is not initialized, it cannot be read", handle); + /* The task will initialize it with init_cl */ + handle->initialized = 1; + } + + if (mode & STARPU_W || mode == STARPU_REDUX) + { + + STARPU_ASSERT_MSG(!handle->readonly, "Read-only handle %p can not be written to", handle); + + handle->initialized = 1; + /* We will change our value, disconnect from our readonly duplicates */ + if (handle->readonly_dup) + { + STARPU_ASSERT(handle->readonly_dup->readonly_dup_of == handle); + handle->readonly_dup->readonly_dup_of = NULL; + handle->readonly_dup = NULL; + } + if (write_hook) + write_hook(handle); + } + + /* Skip tasks that are associated to a reduction phase so that + * they do not interfere with the application. */ + if (pre_sync_job->reduction_task) + { + *submit_pre_sync = 1; + return NULL; + } + if (post_sync_job->reduction_task) + { + *submit_pre_sync = 0; + return NULL; + } + + /* In case we are generating the DAG, we add an implicit + * dependency between the pre and the post sync tasks in case + * they are not the same. */ + if (pre_sync_task != post_sync_task +#ifndef STARPU_USE_FXT + && _starpu_bound_recording +#endif + ) + { + _STARPU_TRACE_GHOST_TASK_DEPS(pre_sync_job->job_id, post_sync_job); + _starpu_bound_task_dep(post_sync_job, pre_sync_job); + } + + enum starpu_data_access_mode previous_mode = handle->last_submitted_mode; + + _STARPU_DEP_DEBUG("Handle %p Tasks %p %p %x->%x\n", handle, pre_sync_task, post_sync_task, previous_mode, mode); + + /* + * Tasks can access the data concurrently only if they have the + * same access mode, which can only be either: + * - write with STARPU_COMMUTE + * - read + * - redux + * + * In other cases, the tasks have to depend on each other. + */ + + if ((mode & STARPU_W && mode & STARPU_COMMUTE && previous_mode & STARPU_W && previous_mode & STARPU_COMMUTE) + || (mode == STARPU_R && previous_mode == STARPU_R) + || (mode == STARPU_REDUX && previous_mode == STARPU_REDUX)) + { + _STARPU_DEP_DEBUG("concurrently\n"); + /* Can access concurrently with current tasks */ + if (handle->last_sync_task != NULL) + *submit_pre_sync = 1; + _starpu_add_accessor(handle, pre_sync_task, submit_pre_sync, post_sync_task, post_sync_task_dependency_slot); + } + else + { + /* Can not access concurrently, have to wait for existing accessors */ + struct _starpu_task_wrapper_dlist *l = handle->last_submitted_accessors.next; + _STARPU_DEP_DEBUG("dependency\n"); + + if ((l != &handle->last_submitted_accessors && l->next != &handle->last_submitted_accessors) + || (handle->last_submitted_ghost_accessors_id && handle->last_submitted_ghost_accessors_id->next) + || (l != &handle->last_submitted_accessors && handle->last_submitted_ghost_accessors_id)) + { + /* Several previous accessors */ + *submit_pre_sync = 1; + + if (mode == STARPU_W) + { + _STARPU_DEP_DEBUG("several predecessors, and this is a W-only task, thus can serve directly as a synchronization task.\n"); + /* Optimization: this task can not + * combine with others anyway, use it + * as synchronization task by making it + * wait for the previous ones. */ + _starpu_add_sync_task(handle, pre_sync_task, post_sync_task, post_sync_task); + } + else + { + _STARPU_DEP_DEBUG("several predecessors, adding sync task\n"); + /* insert an empty synchronization task + * which waits for the whole set, + * instead of creating a quadratic + * number of dependencies. */ + struct starpu_task *sync_task = starpu_task_create(); + STARPU_ASSERT(sync_task); + if (previous_mode == STARPU_REDUX) + sync_task->name = "_starpu_sync_task_redux"; + else if (mode == STARPU_COMMUTE || previous_mode == STARPU_COMMUTE) + sync_task->name = "_starpu_sync_task_commute"; + else + sync_task->name = "_starpu_sync_task"; + sync_task->cl = NULL; + sync_task->type = post_sync_task->type; + sync_task->priority = post_sync_task->priority; + + /* Make this task wait for the previous ones */ + _starpu_add_sync_task(handle, sync_task, sync_task, post_sync_task); + /* And the requested task wait for this one */ + _starpu_add_accessor(handle, pre_sync_task, submit_pre_sync, post_sync_task, post_sync_task_dependency_slot); + + task = sync_task; + } + } + else + { + struct _starpu_jobid_list *ghost_accessors_id = handle->last_submitted_ghost_accessors_id; + /* At most one previous accessor or one ghost */ + if (l != &handle->last_submitted_accessors) + { + /* One accessor, make it the sync task, + * and start depending on it. */ + *submit_pre_sync = 1; + _STARPU_DEP_DEBUG("One previous accessor, depending on it\n"); + handle->last_sync_task = l->task; + l->next = NULL; + l->prev = NULL; + handle->last_submitted_accessors.next = &handle->last_submitted_accessors; + handle->last_submitted_accessors.prev = &handle->last_submitted_accessors; + handle->last_submitted_ghost_sync_id_is_valid = 0; + } + else if (ghost_accessors_id) + { + /* One ghost, just remember its id */ + _STARPU_DEP_DEBUG("No more currently running accessor, but a ghost id, taking it.\n"); + handle->last_submitted_ghost_sync_id = ghost_accessors_id->id; + handle->last_submitted_ghost_sync_id_is_valid = 1; + STARPU_ASSERT(!ghost_accessors_id->next); + handle->last_submitted_ghost_accessors_id = NULL; + free(ghost_accessors_id); + } + else + { + _STARPU_DEP_DEBUG("No previous accessor, no dependency\n"); + } + _starpu_add_accessor(handle, pre_sync_task, submit_pre_sync, post_sync_task, post_sync_task_dependency_slot); + } + } + handle->last_submitted_mode = mode; + } else { + *submit_pre_sync = 0; + } + _STARPU_LOG_OUT(); + return task; +} + +int _starpu_test_implicit_data_deps_with_handle(starpu_data_handle_t handle, enum starpu_data_access_mode mode) +{ + /* Do not care about some flags */ + mode &= ~ STARPU_SSEND; + mode &= ~ STARPU_LOCALITY; + mode &= ~ STARPU_NOFOOTPRINT; + + STARPU_ASSERT(!(mode & STARPU_SCRATCH)); + + if (handle->sequential_consistency) + { + if (handle->last_sync_task) + return -EAGAIN; + if (handle->last_submitted_accessors.next != &handle->last_submitted_accessors) + return -EAGAIN; + + if (mode & STARPU_W || mode == STARPU_REDUX) + handle->initialized = 1; + handle->last_submitted_mode = mode; + } + return 0; +} + +/* Create the implicit dependencies for a newly submitted task */ +void _starpu_detect_implicit_data_deps(struct starpu_task *task) +{ + STARPU_ASSERT(task->cl); + _STARPU_LOG_IN(); + + if (!task->sequential_consistency) + return; + + /* We don't want to enforce a sequential consistency for tasks that are + * not visible to the application. */ + struct _starpu_job *j = _starpu_get_job_associated_to_task(task); + if (j->reduction_task) + return; + + j->sequential_consistency = 1; + + unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task); + struct _starpu_data_descr *descrs = _STARPU_JOB_GET_ORDERED_BUFFERS(j); + struct _starpu_task_wrapper_dlist *dep_slots = _STARPU_JOB_GET_DEP_SLOTS(j); + + unsigned buffer; + int bufferdup; + for (buffer = 0; buffer < nbuffers; buffer++) + { + starpu_data_handle_t handle = descrs[buffer].handle; + enum starpu_data_access_mode mode = descrs[buffer].mode; + struct starpu_task *new_task; + + /* Scratch memory does not introduce any deps */ + if (mode & STARPU_SCRATCH) + continue; + + for (bufferdup = (int) buffer-1; bufferdup >= 0; bufferdup--) + { + starpu_data_handle_t handle_dup = descrs[bufferdup].handle; + enum starpu_data_access_mode mode_dup = descrs[bufferdup].mode; + if (handle_dup == handle && mode_dup == mode) + /* We have already added dependencies for this + * data, skip it. This reduces the number of + * dependencies, and allows notify_soon to work + * when a task uses the same data several times + * (otherwise it will not be able to find out that the two + * dependencies will be over at the same time) */ + goto next; + if (!_starpu_handles_same_root(handle_dup, handle)) + /* We are not checking within the same parent any more, no need to continue checking other handles */ + break; + + } + + STARPU_PTHREAD_MUTEX_LOCK(&handle->sequential_consistency_mutex); + unsigned index = descrs[buffer].index; + unsigned task_handle_sequential_consistency = task->handles_sequential_consistency ? task->handles_sequential_consistency[index] : handle->sequential_consistency; + int submit_pre_sync = 1; + if (!task_handle_sequential_consistency) + j->sequential_consistency = 0; + new_task = _starpu_detect_implicit_data_deps_with_handle(task, &submit_pre_sync, task, &dep_slots[buffer], handle, mode, task_handle_sequential_consistency); + STARPU_PTHREAD_MUTEX_UNLOCK(&handle->sequential_consistency_mutex); + if (new_task) + { + int ret = _starpu_task_submit_internally(new_task); + STARPU_ASSERT(!ret); + } + next: + ; + } + _STARPU_LOG_OUT(); +} + +/* This function is called when a task has been executed so that we don't + * create dependencies to task that do not exist anymore. */ +/* NB: We maintain a list of "ghost deps" in case FXT is enabled. Ghost + * dependencies are the dependencies that are implicitly enforced by StarPU + * even if they do not imply a real dependency. For instance in the following + * sequence, f(Ar) g(Ar) h(Aw), we expect to have h depend on both f and g, but + * if h is submitted after the termination of f or g, StarPU will not create a + * dependency as this is not needed anymore. */ +/* the sequential_consistency_mutex of the handle has to be already held */ +void _starpu_release_data_enforce_sequential_consistency(struct starpu_task *task, struct _starpu_task_wrapper_dlist *task_dependency_slot, starpu_data_handle_t handle) +{ + STARPU_PTHREAD_MUTEX_LOCK(&handle->sequential_consistency_mutex); + + if (handle->sequential_consistency) + { + + /* If this is the last writer, there is no point in adding + * extra deps to that tasks that does not exists anymore */ + if (task == handle->last_sync_task) + { + handle->last_sync_task = NULL; + +#ifndef STARPU_USE_FXT + if (_starpu_bound_recording) +#endif + { + /* Save the previous writer as the ghost last writer */ + handle->last_submitted_ghost_sync_id_is_valid = 1; + struct _starpu_job *ghost_job = _starpu_get_job_associated_to_task(task); + handle->last_submitted_ghost_sync_id = ghost_job->job_id; + } + } + + /* Same if this is one of the readers: we go through the list + * of readers and remove the task if it is found. */ + if (task_dependency_slot && task_dependency_slot->next) + { +#ifdef STARPU_DEBUG + /* Make sure we are removing ourself from the proper handle */ + struct _starpu_task_wrapper_dlist *l; + for (l = task_dependency_slot->prev; l->task; l = l->prev) + ; + STARPU_ASSERT(l == &handle->last_submitted_accessors); + for (l = task_dependency_slot->next; l->task; l = l->next) + ; + STARPU_ASSERT(l == &handle->last_submitted_accessors); +#endif + STARPU_ASSERT(task_dependency_slot->task == task); + + task_dependency_slot->next->prev = task_dependency_slot->prev; + task_dependency_slot->prev->next = task_dependency_slot->next; + task_dependency_slot->task = NULL; + task_dependency_slot->next = NULL; + task_dependency_slot->prev = NULL; +#ifndef STARPU_USE_FXT + if (_starpu_bound_recording) +#endif + { + /* Save the job id of the reader task in the ghost reader linked list list */ + struct _starpu_job *ghost_reader_job = _starpu_get_job_associated_to_task(task); + struct _starpu_jobid_list *link; + _STARPU_MALLOC(link, sizeof(struct _starpu_jobid_list)); + link->next = handle->last_submitted_ghost_accessors_id; + link->id = ghost_reader_job->job_id; + handle->last_submitted_ghost_accessors_id = link; + } + } + } + + STARPU_PTHREAD_MUTEX_UNLOCK(&handle->sequential_consistency_mutex); +} + +/* This is the same as _starpu_release_data_enforce_sequential_consistency, but + * for all data of a task */ +void _starpu_release_task_enforce_sequential_consistency(struct _starpu_job *j) +{ + struct starpu_task *task = j->task; + + if (!task->cl) + return; + + struct _starpu_data_descr *descrs = _STARPU_JOB_GET_ORDERED_BUFFERS(j); + struct _starpu_task_wrapper_dlist *slots = _STARPU_JOB_GET_DEP_SLOTS(j); + + unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task); + unsigned index; + int indexdup; + + /* Release all implicit dependencies */ + for (index = 0; index < nbuffers; index++) + { + starpu_data_handle_t handle = descrs[index].handle; + enum starpu_data_access_mode mode = descrs[index].mode; + + for (indexdup = (int) index-1; indexdup >= 0; indexdup--) + { + starpu_data_handle_t handle_dup = descrs[indexdup].handle; + enum starpu_data_access_mode mode_dup = descrs[indexdup].mode; + if (handle_dup == handle && mode_dup == mode) + /* See _starpu_detect_implicit_data_deps */ + goto next; + if (!_starpu_handles_same_root(handle_dup, handle)) + /* We are not checking within the same parent any more, no need to continue checking other handles */ + break; + } + + _starpu_release_data_enforce_sequential_consistency(task, &slots[index], handle); + next: + ; + } + + for (index = 0; index < nbuffers; index++) + { + starpu_data_handle_t handle = descrs[index].handle; + int node = descrs[index].orig_node; + + for (indexdup = index+1; indexdup < (int) nbuffers; indexdup++) + { + starpu_data_handle_t handle_dup = descrs[indexdup].handle; + int node_dup = descrs[indexdup].orig_node; + if (handle_dup == handle && node_dup == node) + /* We will release this data, skip it for now. This + * depends on ordering putting writes before reads, see + * _starpu_compar_handles */ + goto next2; + if (!_starpu_handles_same_root(handle_dup, handle)) + /* We are not checking within the same parent any more, no need to continue checking other handles */ + break; + } + + /* Release the reference acquired in _starpu_push_task_output */ + _starpu_spin_lock(&handle->header_lock); + STARPU_ASSERT(handle->busy_count > 0); + handle->busy_count--; + if (!_starpu_data_check_not_busy(handle)) + _starpu_spin_unlock(&handle->header_lock); + + next2: + ; + } +} + + +void _starpu_add_post_sync_tasks(struct starpu_task *post_sync_task, starpu_data_handle_t handle) +{ + _STARPU_LOG_IN(); + STARPU_PTHREAD_MUTEX_LOCK(&handle->sequential_consistency_mutex); + + if (handle->sequential_consistency) + { + handle->post_sync_tasks_cnt++; + + struct _starpu_task_wrapper_list *link; + _STARPU_MALLOC(link, sizeof(struct _starpu_task_wrapper_list)); + link->task = post_sync_task; + link->next = handle->post_sync_tasks; + handle->post_sync_tasks = link; + } + + STARPU_PTHREAD_MUTEX_UNLOCK(&handle->sequential_consistency_mutex); + _STARPU_LOG_OUT(); +} + +void _starpu_unlock_post_sync_tasks(starpu_data_handle_t handle, enum starpu_data_access_mode mode) +{ + struct _starpu_task_wrapper_list *post_sync_tasks = NULL; + unsigned do_submit_tasks = 0; + unsigned last_cnt; + + /* Here helgrind would shout that this is an unprotected access, but + * count can only be zero if we don't have to care about + * post_sync_tasks_cnt at all. */ + if (handle->post_sync_tasks_cnt) + { + STARPU_PTHREAD_MUTEX_LOCK(&handle->sequential_consistency_mutex); + last_cnt = handle->post_sync_tasks_cnt; + + if (mode == STARPU_NONE) + /* Last release from us */ + handle->post_sync_tasks_cnt--; + + if (last_cnt == 1) + { + /* unlock all tasks : we need not hold the lock while unlocking all these tasks */ + do_submit_tasks = 1; + post_sync_tasks = handle->post_sync_tasks; + handle->post_sync_tasks = NULL; + } + STARPU_PTHREAD_MUTEX_UNLOCK(&handle->sequential_consistency_mutex); + } + + if (do_submit_tasks) + { + struct _starpu_task_wrapper_list *link = post_sync_tasks; + + while (link) + { + /* There is no need to depend on that task now, since it was already unlocked */ + _starpu_release_data_enforce_sequential_consistency(link->task, &_starpu_get_job_associated_to_task(link->task)->implicit_dep_slot, handle); + + int ret = _starpu_task_submit_internally(link->task); + STARPU_ASSERT(!ret); + struct _starpu_task_wrapper_list *tmp = link; + link = link->next; + free(tmp); + } + } +} + +/* If sequential consistency mode is enabled, this function blocks until the + * handle is available in the requested access mode. */ +int _starpu_data_wait_until_available(starpu_data_handle_t handle, enum starpu_data_access_mode mode, const char *sync_name) +{ + /* If sequential consistency is enabled, wait until data is available */ + STARPU_PTHREAD_MUTEX_LOCK(&handle->sequential_consistency_mutex); + int sequential_consistency = handle->sequential_consistency; + if (sequential_consistency) + { + struct starpu_task *sync_task, *new_task; + int submit_pre_sync = 0; + sync_task = starpu_task_create(); + sync_task->name = sync_name; + sync_task->detach = 0; + sync_task->destroy = 1; + sync_task->type = STARPU_TASK_TYPE_INTERNAL; + + /* It is not really a RW access, but we want to make sure that + * all previous accesses are done */ + new_task = _starpu_detect_implicit_data_deps_with_handle(sync_task, &submit_pre_sync, sync_task, &_starpu_get_job_associated_to_task(sync_task)->implicit_dep_slot, handle, mode, sequential_consistency); + STARPU_PTHREAD_MUTEX_UNLOCK(&handle->sequential_consistency_mutex); + + if (new_task) + { + int ret = _starpu_task_submit_internally(new_task); + STARPU_ASSERT(!ret); + } + + if (submit_pre_sync) + { + int ret = _starpu_task_submit_internally(sync_task); + STARPU_ASSERT(!ret); + ret = starpu_task_wait(sync_task); + STARPU_ASSERT(ret == 0); + } + else + { + starpu_task_destroy(sync_task); + } + } + else + { + STARPU_PTHREAD_MUTEX_UNLOCK(&handle->sequential_consistency_mutex); + } + + return 0; +} + +/* This data is about to be freed, clean our stuff */ +void _starpu_data_clear_implicit(starpu_data_handle_t handle) +{ + struct _starpu_jobid_list *list; + + STARPU_PTHREAD_MUTEX_LOCK(&handle->sequential_consistency_mutex); + list = handle->last_submitted_ghost_accessors_id; + while (list) + { + struct _starpu_jobid_list *next = list->next; + free(list); + list = next; + } + STARPU_PTHREAD_MUTEX_UNLOCK(&handle->sequential_consistency_mutex); +} diff --git a/src/core/dependencies/implicit_data_deps.h b/src/core/dependencies/implicit_data_deps.h new file mode 100644 index 0000000..888a82b --- /dev/null +++ b/src/core/dependencies/implicit_data_deps.h @@ -0,0 +1,48 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __IMPLICIT_DATA_DEPS_H__ +#define __IMPLICIT_DATA_DEPS_H__ + +/** @file */ + +#include +#include + +#pragma GCC visibility push(hidden) + +struct starpu_task *_starpu_detect_implicit_data_deps_with_handle(struct starpu_task *pre_sync_task, int *submit_pre_sync, struct starpu_task *post_sync_task, struct _starpu_task_wrapper_dlist *post_sync_task_dependency_slot, + starpu_data_handle_t handle, enum starpu_data_access_mode mode, unsigned task_handle_sequential_consistency); +int _starpu_test_implicit_data_deps_with_handle(starpu_data_handle_t handle, enum starpu_data_access_mode mode); +void _starpu_detect_implicit_data_deps(struct starpu_task *task); +void _starpu_release_data_enforce_sequential_consistency(struct starpu_task *task, struct _starpu_task_wrapper_dlist *task_dependency_slot, starpu_data_handle_t handle); +void _starpu_release_task_enforce_sequential_consistency(struct _starpu_job *j); + +void _starpu_add_post_sync_tasks(struct starpu_task *post_sync_task, starpu_data_handle_t handle); +void _starpu_unlock_post_sync_tasks(starpu_data_handle_t handle, enum starpu_data_access_mode mode); + +/** Register a hook to be called when a write is submitted */ +void _starpu_implicit_data_deps_write_hook(void (*func)(starpu_data_handle_t)) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; + +/** This function blocks until the handle is available in the requested mode */ +int _starpu_data_wait_until_available(starpu_data_handle_t handle, enum starpu_data_access_mode mode, const char *sync_name); + +void _starpu_data_clear_implicit(starpu_data_handle_t handle); + +#pragma GCC visibility pop + +#endif // __IMPLICIT_DATA_DEPS_H__ + diff --git a/src/core/dependencies/tags.c b/src/core/dependencies/tags.c new file mode 100644 index 0000000..c01b59d --- /dev/null +++ b/src/core/dependencies/tags.c @@ -0,0 +1,543 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define STARPU_AYUDAME_OFFSET 4000000000000000000ULL + +struct _starpu_tag_table +{ + UT_hash_handle hh; + starpu_tag_t id; + struct _starpu_tag *tag; +}; + +#define HASH_ADD_UINT64_T(head,field,add) HASH_ADD(hh,head,field,sizeof(uint64_t),add) +#define HASH_FIND_UINT64_T(head,find,out) HASH_FIND(hh,head,find,sizeof(uint64_t),out) + +static struct _starpu_tag_table *tag_htbl = NULL; +static starpu_pthread_rwlock_t tag_global_rwlock; + +static struct _starpu_cg *create_cg_apps(unsigned ntags) +{ + struct _starpu_cg *cg; + _STARPU_MALLOC(cg, sizeof(struct _starpu_cg)); + + cg->ntags = ntags; + cg->remaining = ntags; + cg->cg_type = STARPU_CG_APPS; + + cg->succ.succ_apps.completed = 0; + STARPU_PTHREAD_MUTEX_INIT(&cg->succ.succ_apps.cg_mutex, NULL); + STARPU_PTHREAD_COND_INIT(&cg->succ.succ_apps.cg_cond, NULL); + + return cg; +} + +static struct _starpu_cg *create_cg_tag(unsigned ntags, struct _starpu_tag *tag) +{ + struct _starpu_cg *cg; + _STARPU_MALLOC(cg, sizeof(struct _starpu_cg)); + + cg->ntags = ntags; + cg->remaining = ntags; +#ifdef STARPU_DEBUG + cg->ndeps = ntags; + cg->deps = NULL; + cg->done = NULL; +#endif + cg->cg_type = STARPU_CG_TAG; + + cg->succ.tag = tag; + tag->tag_successors.ndeps++; +#ifdef STARPU_DEBUG + _STARPU_REALLOC(tag->tag_successors.deps, tag->tag_successors.ndeps * sizeof(tag->tag_successors.deps[0])); + _STARPU_REALLOC(tag->tag_successors.done, tag->tag_successors.ndeps * sizeof(tag->tag_successors.done[0])); + tag->tag_successors.deps[tag->tag_successors.ndeps-1] = cg; + tag->tag_successors.done[tag->tag_successors.ndeps-1] = 0; +#endif + + return cg; +} + +static struct _starpu_tag *_starpu_tag_init(starpu_tag_t id) +{ + struct _starpu_tag *tag; + _STARPU_CALLOC(tag, 1, sizeof(struct _starpu_tag)); + + //tag->job = NULL; + //tag->is_assigned = 0; + //tag->is_submitted = 0; + + tag->id = id; + tag->state = STARPU_INVALID_STATE; + + _starpu_cg_list_init0(&tag->tag_successors); + + _starpu_spin_init(&tag->lock); + + return tag; +} + +static void _starpu_tag_free(void *_tag) +{ + struct _starpu_tag *tag = (struct _starpu_tag *) _tag; + + if (tag) + { + _starpu_spin_lock(&tag->lock); + + unsigned nsuccs = tag->tag_successors.nsuccs; + unsigned succ; + + for (succ = 0; succ < nsuccs; succ++) + { + struct _starpu_cg *cg = tag->tag_successors.succ[succ]; + + unsigned ntags = STARPU_ATOMIC_ADD(&cg->ntags, -1); + unsigned STARPU_ATTRIBUTE_UNUSED remaining = STARPU_ATOMIC_ADD(&cg->remaining, -1); + + if (!ntags && (cg->cg_type == STARPU_CG_TAG)) + { + /* Last tag this cg depends on, cg becomes unreferenced */ +#ifdef STARPU_DEBUG + free(cg->deps); + free(cg->done); +#endif + free(cg); + } + } + +#ifdef STARPU_DYNAMIC_DEPS_SIZE + free(tag->tag_successors.succ); +#endif +#ifdef STARPU_DEBUG + free(tag->tag_successors.deps); + free(tag->tag_successors.done); +#endif + + _starpu_spin_unlock(&tag->lock); + _starpu_spin_destroy(&tag->lock); + + free(tag); + } +} + +/* + * Statically initializing tag_global_rwlock seems to lead to weird errors + * on Darwin, so we do it dynamically. + */ +void _starpu_init_tags(void) +{ + STARPU_PTHREAD_RWLOCK_INIT(&tag_global_rwlock, NULL); +} + +void starpu_tag_remove(starpu_tag_t id) +{ + struct _starpu_tag_table *entry; + + STARPU_ASSERT(!STARPU_AYU_EVENT || id < STARPU_AYUDAME_OFFSET); + STARPU_AYU_REMOVETASK(id + STARPU_AYUDAME_OFFSET); + STARPU_PTHREAD_RWLOCK_WRLOCK(&tag_global_rwlock); + + HASH_FIND_UINT64_T(tag_htbl, &id, entry); + if (entry) HASH_DEL(tag_htbl, entry); + + STARPU_PTHREAD_RWLOCK_UNLOCK(&tag_global_rwlock); + + if (entry) + { + _starpu_tag_free(entry->tag); + free(entry); + } +} + +void starpu_tag_clear(void) +{ + STARPU_PTHREAD_RWLOCK_WRLOCK(&tag_global_rwlock); + + /* XXX: _starpu_tag_free takes the tag spinlocks while we are keeping + * the global rwlock. This contradicts the lock order of + * starpu_tag_wait_array. Should not be a problem in practice since + * starpu_tag_clear is called at shutdown only. */ + struct _starpu_tag_table *entry=NULL, *tmp=NULL; + + HASH_ITER(hh, tag_htbl, entry, tmp) + { + HASH_DEL(tag_htbl, entry); + _starpu_tag_free(entry->tag); + free(entry); + } + + STARPU_PTHREAD_RWLOCK_UNLOCK(&tag_global_rwlock); +} + +static struct _starpu_tag *_gettag_struct(starpu_tag_t id) +{ + /* search if the tag is already declared or not */ + struct _starpu_tag_table *entry; + struct _starpu_tag *tag; + + HASH_FIND_UINT64_T(tag_htbl, &id, entry); + if (entry != NULL) + tag = entry->tag; + else + { + /* the tag does not exist yet : create an entry */ + tag = _starpu_tag_init(id); + + struct _starpu_tag_table *entry2; + _STARPU_MALLOC(entry2, sizeof(*entry2)); + entry2->id = id; + entry2->tag = tag; + + HASH_ADD_UINT64_T(tag_htbl, id, entry2); + + STARPU_ASSERT(!STARPU_AYU_EVENT || id < STARPU_AYUDAME_OFFSET); + STARPU_AYU_ADDTASK(id + STARPU_AYUDAME_OFFSET, NULL); + } + + return tag; +} + +static struct _starpu_tag *gettag_struct(starpu_tag_t id) +{ + struct _starpu_tag *tag; + STARPU_PTHREAD_RWLOCK_WRLOCK(&tag_global_rwlock); + tag = _gettag_struct(id); + STARPU_PTHREAD_RWLOCK_UNLOCK(&tag_global_rwlock); + return tag; +} + +/* lock should be taken, and this releases it */ +void _starpu_tag_set_ready(struct _starpu_tag *tag) +{ + /* mark this tag as ready to run */ + tag->state = STARPU_READY; + /* declare it to the scheduler ! */ + struct _starpu_job *j = tag->job; + + STARPU_ASSERT(!STARPU_AYU_EVENT || tag->id < STARPU_AYUDAME_OFFSET); + STARPU_AYU_PRERUNTASK(tag->id + STARPU_AYUDAME_OFFSET, -1); + STARPU_AYU_POSTRUNTASK(tag->id + STARPU_AYUDAME_OFFSET); + + /* In case the task job is going to be scheduled immediately, and if + * the task is "empty", calling _starpu_push_task would directly try to enforce + * the dependencies of the task, and therefore it would try to grab the + * lock again, resulting in a deadlock. */ + _starpu_spin_unlock(&tag->lock); + + /* enforce data dependencies */ + STARPU_PTHREAD_MUTEX_LOCK(&j->sync_mutex); + _starpu_enforce_deps_starting_from_task(j); +} + +/* the lock of the tag must already be taken ! */ +static void _starpu_tag_add_succ(struct _starpu_tag *tag, struct _starpu_cg *cg) +{ + STARPU_ASSERT(tag); + + _starpu_add_successor_to_cg_list(&tag->tag_successors, cg); + + if (tag->state == STARPU_DONE) + { + /* the tag was already completed sooner */ + _starpu_notify_cg(tag, cg); + } +} + +void _starpu_notify_tag_dependencies(struct _starpu_tag *tag) +{ + _starpu_spin_lock(&tag->lock); + + if (tag->state == STARPU_DONE) + { + _starpu_spin_unlock(&tag->lock); + return; + } + + tag->state = STARPU_DONE; + _STARPU_TRACE_TAG_DONE(tag); + + _starpu_notify_cg_list(tag, &tag->tag_successors); + + _starpu_spin_unlock(&tag->lock); +} + +/* Called when a job has just started, so we can notify tasks which were waiting + * only for this one when they can expect to start */ +void _starpu_notify_job_start_tag_dependencies(struct _starpu_tag *tag, _starpu_notify_job_start_data *data) +{ + _starpu_notify_job_start_cg_list(tag, &tag->tag_successors, data); +} + +void starpu_tag_restart(starpu_tag_t id) +{ + struct _starpu_tag *tag = gettag_struct(id); + + _starpu_spin_lock(&tag->lock); + STARPU_ASSERT_MSG(tag->state == STARPU_DONE || tag->state == STARPU_INVALID_STATE || tag->state == STARPU_ASSOCIATED || tag->state == STARPU_BLOCKED, "Only completed tags can be restarted (%llu was %d)", (unsigned long long) id, tag->state); + tag->state = STARPU_BLOCKED; + _starpu_spin_unlock(&tag->lock); +} + +void starpu_tag_notify_from_apps(starpu_tag_t id) +{ + struct _starpu_tag *tag = gettag_struct(id); + + _starpu_notify_tag_dependencies(tag); +} + +void _starpu_notify_restart_tag_dependencies(struct _starpu_tag *tag) +{ + _starpu_spin_lock(&tag->lock); + + if (tag->state == STARPU_DONE) + { + tag->state = STARPU_BLOCKED; + _starpu_spin_unlock(&tag->lock); + return; + } + + _STARPU_TRACE_TAG_DONE(tag); + + tag->state = STARPU_BLOCKED; + + _starpu_notify_cg_list(tag, &tag->tag_successors); + + _starpu_spin_unlock(&tag->lock); +} + +void starpu_tag_notify_restart_from_apps(starpu_tag_t id) +{ + struct _starpu_tag *tag = gettag_struct(id); + + _starpu_notify_restart_tag_dependencies(tag); +} + +void _starpu_tag_declare(starpu_tag_t id, struct _starpu_job *job) +{ + _STARPU_TRACE_TAG(id, job); + job->task->use_tag = 1; + + struct _starpu_tag *tag= gettag_struct(id); + + _starpu_spin_lock(&tag->lock); + + /* Note: a tag can be shared by several tasks, when it is used to + * detect when either of them are finished. We however don't allow + * several tasks to share a tag when it is used to wake them by + * dependency */ + if (tag->job != job) + tag->is_assigned++; + tag->job = job; + + job->tag = tag; + /* the tag is now associated to a job */ + + /* When the same tag may be signaled several times by different tasks, + * and it's already done, we should not reset the "done" state. + * When the tag is simply used by the same task several times, we have + * to do so. */ + if (job->task->regenerate || job->submitted == 2 || + tag->state != STARPU_DONE) + tag->state = STARPU_ASSOCIATED; + STARPU_ASSERT(!STARPU_AYU_EVENT || id < STARPU_AYUDAME_OFFSET); + STARPU_AYU_ADDDEPENDENCY(id+STARPU_AYUDAME_OFFSET, 0, job->job_id); + STARPU_AYU_ADDDEPENDENCY(job->job_id, 0, id+STARPU_AYUDAME_OFFSET); + _starpu_spin_unlock(&tag->lock); +} + +void starpu_tag_declare_deps_array(starpu_tag_t id, unsigned ndeps, starpu_tag_t *array) +{ + if (!ndeps) + return; + + unsigned i; + + /* create the associated completion group */ + struct _starpu_tag *tag_child = gettag_struct(id); + + _starpu_spin_lock(&tag_child->lock); + struct _starpu_cg *cg = create_cg_tag(ndeps, tag_child); + _starpu_spin_unlock(&tag_child->lock); + +#ifdef STARPU_DEBUG + _STARPU_MALLOC(cg->deps, ndeps * sizeof(cg->deps[0])); + _STARPU_MALLOC(cg->done, ndeps * sizeof(cg->done[0])); +#endif + + for (i = 0; i < ndeps; i++) + { + starpu_tag_t dep_id = array[i]; + +#ifdef STARPU_DEBUG + cg->deps[i] = (void*) (uintptr_t) dep_id; + cg->done[i] = 0; +#endif + + /* id depends on dep_id + * so cg should be among dep_id's successors*/ + _STARPU_TRACE_TAG_DEPS(id, dep_id); + _starpu_bound_tag_dep(id, dep_id); + struct _starpu_tag *tag_dep = gettag_struct(dep_id); + STARPU_ASSERT(tag_dep != tag_child); + _starpu_spin_lock(&tag_dep->lock); + _starpu_tag_add_succ(tag_dep, cg); + STARPU_ASSERT(!STARPU_AYU_EVENT || dep_id < STARPU_AYUDAME_OFFSET); + STARPU_ASSERT(!STARPU_AYU_EVENT || id < STARPU_AYUDAME_OFFSET); + STARPU_AYU_ADDDEPENDENCY(dep_id+STARPU_AYUDAME_OFFSET, 0, id+STARPU_AYUDAME_OFFSET); + _starpu_spin_unlock(&tag_dep->lock); + } +} + +void starpu_tag_declare_deps(starpu_tag_t id, unsigned ndeps, ...) +{ + if (!ndeps) + return; + + unsigned i; + + /* create the associated completion group */ + struct _starpu_tag *tag_child = gettag_struct(id); + + _starpu_spin_lock(&tag_child->lock); + struct _starpu_cg *cg = create_cg_tag(ndeps, tag_child); + _starpu_spin_unlock(&tag_child->lock); + + va_list pa; + va_start(pa, ndeps); + for (i = 0; i < ndeps; i++) + { + starpu_tag_t dep_id; + dep_id = va_arg(pa, starpu_tag_t); + + /* id depends on dep_id + * so cg should be among dep_id's successors*/ + _STARPU_TRACE_TAG_DEPS(id, dep_id); + _starpu_bound_tag_dep(id, dep_id); + struct _starpu_tag *tag_dep = gettag_struct(dep_id); + STARPU_ASSERT(tag_dep != tag_child); + _starpu_spin_lock(&tag_dep->lock); + _starpu_tag_add_succ(tag_dep, cg); + STARPU_ASSERT(!STARPU_AYU_EVENT || dep_id < STARPU_AYUDAME_OFFSET); + STARPU_ASSERT(!STARPU_AYU_EVENT || id < STARPU_AYUDAME_OFFSET); + STARPU_AYU_ADDDEPENDENCY(dep_id+STARPU_AYUDAME_OFFSET, 0, id+STARPU_AYUDAME_OFFSET); + _starpu_spin_unlock(&tag_dep->lock); + } + va_end(pa); +} + +/* this function may be called by the application (outside callbacks !) */ +int starpu_tag_wait_array(unsigned ntags, starpu_tag_t *id) +{ + unsigned i; + unsigned current; + + struct _starpu_tag *tag_array[ntags]; + + _STARPU_LOG_IN(); + + /* It is forbidden to block within callbacks or codelets */ + STARPU_ASSERT_MSG(_starpu_worker_may_perform_blocking_calls(), "starpu_tag_wait must not be called from a task or callback"); + + starpu_do_schedule(); + STARPU_PTHREAD_RWLOCK_WRLOCK(&tag_global_rwlock); + /* only wait the tags that are not done yet */ + for (i = 0, current = 0; i < ntags; i++) + { + struct _starpu_tag *tag = _gettag_struct(id[i]); + + _starpu_spin_lock(&tag->lock); + + if (tag->state == STARPU_DONE) + { + /* that tag is done already */ + _starpu_spin_unlock(&tag->lock); + } + else + { + tag_array[current] = tag; + current++; + } + } + STARPU_PTHREAD_RWLOCK_UNLOCK(&tag_global_rwlock); + + if (current == 0) + { + /* all deps are already fulfilled */ + _STARPU_LOG_OUT_TAG("all deps are already fulfilled"); + return 0; + } + + /* there is at least one task that is not finished */ + struct _starpu_cg *cg = create_cg_apps(current); + + for (i = 0; i < current; i++) + { + _starpu_tag_add_succ(tag_array[i], cg); + _starpu_spin_unlock(&tag_array[i]->lock); + } + + STARPU_PTHREAD_MUTEX_LOCK(&cg->succ.succ_apps.cg_mutex); + + while (!cg->succ.succ_apps.completed) + STARPU_PTHREAD_COND_WAIT(&cg->succ.succ_apps.cg_cond, &cg->succ.succ_apps.cg_mutex); + + STARPU_PTHREAD_MUTEX_UNLOCK(&cg->succ.succ_apps.cg_mutex); + + STARPU_PTHREAD_MUTEX_DESTROY(&cg->succ.succ_apps.cg_mutex); + STARPU_PTHREAD_COND_DESTROY(&cg->succ.succ_apps.cg_cond); + + free(cg); + + _STARPU_LOG_OUT(); + return 0; +} + +int starpu_tag_wait(starpu_tag_t id) +{ + return starpu_tag_wait_array(1, &id); +} + +struct starpu_task *starpu_tag_get_task(starpu_tag_t id) +{ + struct _starpu_tag_table *entry; + struct _starpu_tag *tag; + + STARPU_PTHREAD_RWLOCK_WRLOCK(&tag_global_rwlock); + HASH_FIND_UINT64_T(tag_htbl, &id, entry); + STARPU_PTHREAD_RWLOCK_UNLOCK(&tag_global_rwlock); + + if (!entry) + return NULL; + tag = entry->tag; + + if (!tag->job) + return NULL; + + return tag->job->task; +} + diff --git a/src/core/dependencies/tags.h b/src/core/dependencies/tags.h new file mode 100644 index 0000000..0e92e79 --- /dev/null +++ b/src/core/dependencies/tags.h @@ -0,0 +1,81 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __TAGS_H__ +#define __TAGS_H__ + +/** @file */ + +#include +#include +#include +#include + +#pragma GCC visibility push(hidden) + +#define _STARPU_TAG_SIZE (sizeof(starpu_tag_t)*8) + +enum _starpu_tag_state +{ + /** this tag is not declared by any task */ + STARPU_INVALID_STATE, + /** _starpu_tag_declare was called to associate the tag to a task */ + STARPU_ASSOCIATED, + /** some task dependencies are not fulfilled yet */ + STARPU_BLOCKED, + /** the task can be (or has been) submitted to the scheduler (all deps fulfilled) */ + STARPU_READY, +// useless ... +// /** the task has been submitted to the scheduler */ +// STARPU_SCHEDULED, + /** the task has been performed */ + STARPU_DONE +}; + +struct _starpu_job; + +struct _starpu_tag +{ + /** + Lock for this structure. Locking order is in dependency order: a tag + * must not be locked before locking a tag it depends on */ + struct _starpu_spinlock lock; + /** an identifier for the task */ + starpu_tag_t id; + enum _starpu_tag_state state; + + struct _starpu_cg_list tag_successors; + + /** which job is associated to the tag if any ? */ + struct _starpu_job *job; + + unsigned is_assigned; + unsigned is_submitted; +}; + +void _starpu_init_tags(void); + +void _starpu_notify_tag_dependencies(struct _starpu_tag *tag); +void _starpu_notify_job_start_tag_dependencies(struct _starpu_tag *tag, _starpu_notify_job_start_data *data); + +void _starpu_tag_declare(starpu_tag_t id, struct _starpu_job *job); + +/** lock should be taken, and this releases it */ +void _starpu_tag_set_ready(struct _starpu_tag *tag); + +#pragma GCC visibility pop + +#endif // __TAGS_H__ diff --git a/src/core/dependencies/task_deps.c b/src/core/dependencies/task_deps.c new file mode 100644 index 0000000..dc9d3bf --- /dev/null +++ b/src/core/dependencies/task_deps.c @@ -0,0 +1,233 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static struct _starpu_cg *create_cg_task(unsigned ntags, struct _starpu_job *j) +{ + struct _starpu_cg *cg; + _STARPU_MALLOC(cg, sizeof(struct _starpu_cg)); + + cg->ntags = ntags; + cg->remaining = ntags; +#ifdef STARPU_DEBUG + cg->ndeps = ntags; + cg->deps = NULL; + cg->done = NULL; +#endif + cg->cg_type = STARPU_CG_TASK; + + cg->succ.job = j; + j->job_successors.ndeps++; +#ifdef STARPU_DEBUG + _STARPU_REALLOC(j->job_successors.deps, j->job_successors.ndeps * sizeof(j->job_successors.deps[0])); + _STARPU_REALLOC(j->job_successors.done, j->job_successors.ndeps * sizeof(j->job_successors.done[0])); + j->job_successors.deps[j->job_successors.ndeps-1] = cg; + j->job_successors.done[j->job_successors.ndeps-1] = 0; +#endif + + return cg; +} + +static void _starpu_task_add_succ(struct _starpu_job *j, struct _starpu_cg *cg) +{ + STARPU_ASSERT(j); + + if (_starpu_add_successor_to_cg_list(&j->job_successors, cg)) + /* the task was already completed sooner */ + _starpu_notify_cg(j, cg); +} + +void _starpu_notify_task_dependencies(struct _starpu_job *j) +{ + _starpu_notify_cg_list(j, &j->job_successors); +} + +/* Called when a job has just started, so we can notify tasks which were waiting + * only for this one when they can expect to start */ +void _starpu_notify_job_start_tasks(struct _starpu_job *j, _starpu_notify_job_start_data *data) +{ + _starpu_notify_job_start_cg_list(j, &j->job_successors, data); +} + +/* task depends on the tasks in task array */ +void _starpu_task_declare_deps_array(struct starpu_task *task, unsigned ndeps, struct starpu_task *task_array[], int check) +{ + if (ndeps == 0) + return; + + struct _starpu_job *job; + + job = _starpu_get_job_associated_to_task(task); + + STARPU_PTHREAD_MUTEX_LOCK(&job->sync_mutex); + if (check) + { + int ret = !job->submitted || !task->destroy || task->detach; +#ifdef STARPU_OPENMP + ret = ret || job->continuation; +#endif + STARPU_ASSERT_MSG(ret, "Task dependencies have to be set before submission (submitted %u destroy %u detach %u)", job->submitted, task->destroy, task->detach); + } + else + STARPU_ASSERT_MSG(job->terminated <= 1, "Task dependencies have to be set before termination (terminated %u)", job->terminated); + + struct _starpu_cg *cg = create_cg_task(ndeps, job); + STARPU_PTHREAD_MUTEX_UNLOCK(&job->sync_mutex); + +#ifdef STARPU_DEBUG + _STARPU_MALLOC(cg->deps, ndeps * sizeof(cg->deps[0])); + _STARPU_MALLOC(cg->done, ndeps * sizeof(cg->done[0])); +#endif + + unsigned i; + for (i = 0; i < ndeps; i++) + { + struct starpu_task *dep_task = task_array[i]; + + struct _starpu_job *dep_job; + struct _starpu_cg *back_cg = NULL; + + dep_job = _starpu_get_job_associated_to_task(dep_task); + STARPU_ASSERT_MSG(dep_task != task, "A task cannot be made to depend on itself"); + +#ifdef STARPU_DEBUG + cg->deps[i] = dep_job; + cg->done[i] = 0; +#endif + + STARPU_ASSERT_MSG(dep_job != job, "A task must not depend on itself."); + STARPU_PTHREAD_MUTEX_LOCK(&dep_job->sync_mutex); + if (check) + { + STARPU_ASSERT_MSG(!dep_job->submitted || !dep_job->task->destroy || !dep_job->task->detach || starpu_task_get_current() == dep_task, "Unless it is not to be destroyed automatically, task dependencies have to be set before submission"); + STARPU_ASSERT_MSG(dep_job->submitted != 2, "For resubmited tasks, dependencies have to be set before first re-submission"); + STARPU_ASSERT_MSG(!dep_job->submitted || !dep_job->task->regenerate, "For regenerated tasks, dependencies have to be set before first submission"); + } + else + STARPU_ASSERT_MSG(dep_job->terminated <= 1, "Task dependencies have to be set before termination (terminated %u)", dep_job->terminated); + if (dep_job->task->regenerate) + { + /* Make sure we don't regenerate the dependency before this task is finished */ + back_cg = create_cg_task(1, dep_job); + /* Just do not take that dependency into account for the first submission */ + dep_job->job_successors.ndeps_completed++; + } + STARPU_PTHREAD_MUTEX_UNLOCK(&dep_job->sync_mutex); + + _STARPU_TRACE_TASK_DEPS(dep_job, job); + _starpu_bound_task_dep(job, dep_job); + if (check) + { + STARPU_AYU_ADDDEPENDENCY(dep_job->job_id, 0, job->job_id); + } + if (_starpu_graph_record) + _starpu_graph_add_job_dep(job, dep_job); + + _starpu_task_add_succ(dep_job, cg); + if (dep_job->task->regenerate) + _starpu_task_add_succ(job, back_cg); + } +} + +void starpu_task_declare_deps_array(struct starpu_task *task, unsigned ndeps, struct starpu_task *task_array[]) +{ + _starpu_task_declare_deps_array(task, ndeps, task_array, 1); +} + +void starpu_task_declare_deps(struct starpu_task *task, unsigned ndeps, ...) +{ + if (ndeps == 0) + return; + struct starpu_task *tasks[ndeps]; + unsigned i; + va_list pa; + va_start(pa, ndeps); + for (i = 0; i < ndeps; i++) + { + tasks[i] = va_arg(pa, struct starpu_task *); + } + va_end(pa); + starpu_task_declare_deps_array(task, ndeps, tasks); +} + +void starpu_task_declare_end_deps_array(struct starpu_task *task, unsigned ndeps, struct starpu_task *task_array[]) +{ + unsigned i; + + starpu_task_end_dep_add(task, ndeps); + for (i = 0; i < ndeps; i++) + { + struct starpu_task *dep_task = task_array[i]; + struct _starpu_job *dep_job = _starpu_get_job_associated_to_task(dep_task); + int done = 0; + + STARPU_ASSERT_MSG(!dep_job->submitted || !dep_job->task->destroy || !dep_job->task->detach || starpu_task_get_current() == dep_task, "Unless it is not to be destroyed automatically, task end dependencies have to be set before submission"); + STARPU_ASSERT_MSG(dep_job->submitted != 2, "For resubmited tasks, dependencies have to be set before first re-submission"); + STARPU_ASSERT_MSG(!dep_job->submitted || !dep_job->task->regenerate, "For regenerated tasks, dependencies have to be set before first submission"); + + STARPU_ASSERT_MSG(!dep_job->end_rdep, "multiple end dependencies are not supported yet"); + STARPU_ASSERT_MSG(!dep_job->task->regenerate, "end dependencies are not supported yet for regenerated tasks"); + + STARPU_PTHREAD_MUTEX_LOCK(&dep_job->sync_mutex); + dep_job->end_rdep = task; + if (dep_job->terminated) + /* It's actually already over */ + done = 1; + STARPU_PTHREAD_MUTEX_UNLOCK(&dep_job->sync_mutex); + + if (done) + starpu_task_end_dep_release(task); + } +} + +void starpu_task_declare_end_deps(struct starpu_task *task, unsigned ndeps, ...) +{ + if (ndeps == 0) + return; + struct starpu_task *tasks[ndeps]; + unsigned i; + va_list pa; + va_start(pa, ndeps); + for (i = 0; i < ndeps; i++) + { + tasks[i] = va_arg(pa, struct starpu_task *); + } + va_end(pa); + starpu_task_declare_end_deps_array(task, ndeps, tasks); +} + +int starpu_task_get_task_succs(struct starpu_task *task, unsigned ndeps, struct starpu_task *task_array[]) +{ + struct _starpu_job *j = _starpu_get_job_associated_to_task(task); + return _starpu_list_task_successors_in_cg_list(&j->job_successors, ndeps, task_array); +} + +int starpu_task_get_task_scheduled_succs(struct starpu_task *task, unsigned ndeps, struct starpu_task *task_array[]) +{ + struct _starpu_job *j = _starpu_get_job_associated_to_task(task); + return _starpu_list_task_scheduled_successors_in_cg_list(&j->job_successors, ndeps, task_array); +} diff --git a/src/core/detect_combined_workers.c b/src/core/detect_combined_workers.c new file mode 100644 index 0000000..1d1b2f8 --- /dev/null +++ b/src/core/detect_combined_workers.c @@ -0,0 +1,337 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Thibaut Lambert + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include + +int _starpu_initialized_combined_workers; + +#ifdef STARPU_HAVE_HWLOC +#include + +static void find_workers(hwloc_obj_t obj, int cpu_workers[STARPU_NMAXWORKERS], unsigned *n) +{ + struct _starpu_hwloc_userdata *data = obj->userdata; + if (!data->worker_list) + /* Not something we run something on, don't care */ + return; + if (data->worker_list == (void*) -1) + { + /* Intra node, recurse */ + unsigned i; + for (i = 0; i < obj->arity; i++) + find_workers(obj->children[i], cpu_workers, n); + return; + } + + /* Got to a PU leaf */ + struct _starpu_worker_list *workers = data->worker_list; + struct _starpu_worker *worker; + for(worker = _starpu_worker_list_begin(workers); worker != _starpu_worker_list_end(workers); worker = _starpu_worker_list_next(worker)) + { + /* is it a CPU worker? */ + if (worker->perf_arch.devices[0].type == STARPU_CPU_WORKER && worker->perf_arch.devices[0].ncores == 1) + { + _STARPU_DEBUG("worker %d is part of it\n", worker->workerid); + /* Add it to the combined worker */ + cpu_workers[(*n)++] = worker->workerid; + } + } +} + +static void synthesize_intermediate_workers(hwloc_obj_t *children, unsigned min, unsigned max, unsigned arity, unsigned n, unsigned synthesize_arity) +{ + unsigned nworkers, i, j; + unsigned chunk_size = (n + synthesize_arity-1) / synthesize_arity; + unsigned chunk_start; + int cpu_workers[STARPU_NMAXWORKERS]; + int ret; + + if (n <= synthesize_arity) + /* Not too many children, do not synthesize */ + return; + + _STARPU_DEBUG("%u children > %u, synthesizing intermediate combined workers of size %u\n", n, synthesize_arity, chunk_size); + + n = 0; + j = 0; + nworkers = 0; + chunk_start = 0; + for (i = 0 ; i < arity; i++) + { + if (((struct _starpu_hwloc_userdata*)children[i]->userdata)->worker_list) + { + n++; + _STARPU_DEBUG("child %u\n", i); + find_workers(children[i], cpu_workers, &nworkers); + j++; + } + /* Completed a chunk, or last bit (but not if it's just 1 subobject) */ + if (j == chunk_size || (i == arity-1 && j > 1)) + { + if (nworkers >= min && nworkers <= max) + { + unsigned sched_ctx_id = starpu_sched_ctx_get_context(); + if(sched_ctx_id == STARPU_NMAX_SCHED_CTXS) + sched_ctx_id = 0; + struct starpu_worker_collection* workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id); + + _STARPU_DEBUG("Adding it\n"); + ret = starpu_combined_worker_assign_workerid(nworkers, cpu_workers); + STARPU_ASSERT(ret >= 0); + workers->add(workers,ret); + } + /* Recurse there */ + synthesize_intermediate_workers(children+chunk_start, min, max, i - chunk_start, n, synthesize_arity); + /* And restart another one */ + n = 0; + j = 0; + nworkers = 0; + chunk_start = i+1; + } + } +} + +static void find_and_assign_combinations(hwloc_obj_t obj, unsigned min, unsigned max, unsigned synthesize_arity) +{ + char name[64]; + unsigned i, n, nworkers; + int cpu_workers[STARPU_NMAXWORKERS]; + +#if HWLOC_API_VERSION >= 0x10000 + hwloc_obj_attr_snprintf(name, sizeof(name), obj, "#", 0); +#else + hwloc_obj_snprintf(name, sizeof(name), _starpu_get_machine_config()->topology.hwtopology, obj, "#", 0); +#endif + _STARPU_DEBUG("Looking at %s\n", name); + + for (n = 0, i = 0; i < obj->arity; i++) + if (((struct _starpu_hwloc_userdata *)obj->children[i]->userdata)->worker_list) + /* it has a CPU worker */ + n++; + + if (n == 1) + { + /* If there is only one child, we go to the next level right away */ + find_and_assign_combinations(obj->children[0], min, max, synthesize_arity); + return; + } + + /* Add this object */ + nworkers = 0; + find_workers(obj, cpu_workers, &nworkers); + + if (nworkers >= min && nworkers <= max) + { + _STARPU_DEBUG("Adding it\n"); + unsigned sched_ctx_id = starpu_sched_ctx_get_context(); + if(sched_ctx_id == STARPU_NMAX_SCHED_CTXS) + sched_ctx_id = 0; + + struct starpu_worker_collection* workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id); + + int newworkerid = starpu_combined_worker_assign_workerid(nworkers, cpu_workers); + STARPU_ASSERT(newworkerid >= 0); + workers->add(workers,newworkerid); + } + + /* Add artificial intermediate objects recursively */ + synthesize_intermediate_workers(obj->children, min, max, obj->arity, n, synthesize_arity); + + /* And recurse */ + for (i = 0; i < obj->arity; i++) + if (((struct _starpu_hwloc_userdata*) obj->children[i]->userdata)->worker_list == (void*) -1) + find_and_assign_combinations(obj->children[i], min, max, synthesize_arity); +} + +static void find_and_assign_combinations_with_hwloc(int *workerids, int nworkers) +{ + struct _starpu_machine_config *config = _starpu_get_machine_config(); + struct _starpu_machine_topology *topology = &config->topology; + int synthesize_arity = starpu_getenv_number("STARPU_SYNTHESIZE_ARITY_COMBINED_WORKER"); + + int min = starpu_getenv_number("STARPU_MIN_WORKERSIZE"); + if (min < 2) + min = 2; + int max = starpu_getenv_number("STARPU_MAX_WORKERSIZE"); + if (max == -1) + max = INT_MAX; + + if (synthesize_arity == -1) + synthesize_arity = 2; + + STARPU_ASSERT_MSG(synthesize_arity > 0, "STARPU_SYNTHESIZE_ARITY_COMBINED_WORKER must be greater than 0"); + + /* First, mark nodes which contain CPU workers, simply by setting their userdata field */ + int i; + for (i = 0; i < nworkers; i++) + { + struct _starpu_worker *worker = _starpu_get_worker_struct(workerids[i]); + if (worker->perf_arch.devices[0].type == STARPU_CPU_WORKER && worker->perf_arch.devices[0].ncores == 1) + { + hwloc_obj_t obj = hwloc_get_obj_by_depth(topology->hwtopology, config->pu_depth, worker->bindid); + obj = obj->parent; + while (obj) + { + ((struct _starpu_hwloc_userdata*) obj->userdata)->worker_list = (void*) -1; + obj = obj->parent; + } + } + } + find_and_assign_combinations(hwloc_get_root_obj(topology->hwtopology), min, max, synthesize_arity); +} + +#else /* STARPU_HAVE_HWLOC */ + +static void assign_combinations_without_hwloc(struct starpu_worker_collection* worker_collection, int* workers, unsigned n, int min, int max) +{ + + int size,i; + //if the maximum number of worker is already reached + if(worker_collection->nworkers >= STARPU_NMAXWORKERS - 1) + return; + + for (size = min; size <= max; size *= 2) + { + unsigned first; + for (first = 0; first < n; first += size) + { + if (first + size <= n) + { + int found_workerids[size]; + + for (i = 0; i < size; i++) + found_workerids[i] = workers[first + i]; + + /* We register this combination */ + int newworkerid; + newworkerid = starpu_combined_worker_assign_workerid(size, found_workerids); + STARPU_ASSERT(newworkerid >= 0); + worker_collection->add(worker_collection, newworkerid); + //if the maximum number of worker is reached, then return + if(worker_collection->nworkers >= STARPU_NMAXWORKERS - 1) + return; + } + } + } +} + + +static void find_and_assign_combinations_without_hwloc(int *workerids, int nworkers) +{ + int i; + unsigned sched_ctx_id = starpu_sched_ctx_get_context(); + if(sched_ctx_id == STARPU_NMAX_SCHED_CTXS) + sched_ctx_id = 0; + int min, max; + struct starpu_worker_collection* workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id); + + /* We put the id of all CPU workers in this array */ + int cpu_workers[STARPU_NMAXWORKERS]; + unsigned ncpus = 0; + + for (i = 0; i < nworkers; i++) + { + struct _starpu_worker *worker = _starpu_get_worker_struct(workerids[i]); + if (worker->arch == STARPU_CPU_WORKER) + cpu_workers[ncpus++] = i; + } + + min = starpu_getenv_number("STARPU_MIN_WORKERSIZE"); + if (min < 2) + min = 2; + max = starpu_getenv_number("STARPU_MAX_WORKERSIZE"); + if (max == -1 || max > (int) ncpus) + max = ncpus; + + assign_combinations_without_hwloc(workers,cpu_workers,ncpus,min,max); +} + +#endif /* STARPU_HAVE_HWLOC */ + +static void combine_all_cpu_workers(int *workerids, int nworkers) +{ + unsigned sched_ctx_id = starpu_sched_ctx_get_context(); + if(sched_ctx_id == STARPU_NMAX_SCHED_CTXS) + sched_ctx_id = 0; + struct starpu_worker_collection* workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id); + int cpu_workers[STARPU_NMAXWORKERS]; + int ncpus = 0; + int i; + int min; + int max; + + for (i = 0; i < nworkers; i++) + { + struct _starpu_worker *worker = _starpu_get_worker_struct(workerids[i]); + + if (worker->arch == STARPU_CPU_WORKER) + cpu_workers[ncpus++] = workerids[i]; + } + + min = starpu_getenv_number("STARPU_MIN_WORKERSIZE"); + if (min < 1) + min = 1; + max = starpu_getenv_number("STARPU_MAX_WORKERSIZE"); + if (max == -1 || max > ncpus) + max = ncpus; + + for (i = min; i <= max; i++) + { + int newworkerid = starpu_combined_worker_assign_workerid(i, cpu_workers); + STARPU_ASSERT(newworkerid >= 0); + workers->add(workers, newworkerid); + } +} + +void _starpu_sched_find_worker_combinations(int *workerids, int nworkers) +{ + /* FIXME: this seems to be lacking shutdown support? */ + + if (_starpu_initialized_combined_workers) + return; + _starpu_initialized_combined_workers = 1; + + struct _starpu_machine_config *config = _starpu_get_machine_config(); + + if (config->conf.single_combined_worker > 0) + combine_all_cpu_workers(workerids, nworkers); + else + { +#ifdef STARPU_HAVE_HWLOC + find_and_assign_combinations_with_hwloc(workerids, nworkers); +#else + find_and_assign_combinations_without_hwloc(workerids, nworkers); +#endif + } +} + +void starpu_sched_find_all_worker_combinations(void) +{ + const unsigned nbasic_workers = starpu_worker_get_count(); + int basic_workerids[nbasic_workers]; + unsigned i; + for(i = 0; i < nbasic_workers; i++) + { + basic_workerids[i] = i; + } + + _starpu_sched_find_worker_combinations(basic_workerids, nbasic_workers); +} diff --git a/src/core/detect_combined_workers.h b/src/core/detect_combined_workers.h new file mode 100644 index 0000000..2a90a76 --- /dev/null +++ b/src/core/detect_combined_workers.h @@ -0,0 +1,29 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +#pragma GCC visibility push(hidden) + +/** @file */ + +/** Initialize combined workers */ +void _starpu_sched_find_worker_combinations(int *workerids, int nworkers); + +extern int _starpu_initialized_combined_workers; + +#pragma GCC visibility pop + diff --git a/src/core/devices.c b/src/core/devices.c new file mode 100644 index 0000000..990ea8f --- /dev/null +++ b/src/core/devices.c @@ -0,0 +1,104 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +struct _starpu_device_entry +{ + UT_hash_handle hh; + unsigned deviceid; +}; + +static struct _starpu_device_entry *gpu_devices_already_used; + +void _starpu_devices_gpu_set_used(int devid) +{ + struct _starpu_device_entry *entry; + HASH_FIND_INT(gpu_devices_already_used, &devid, entry); + if (!entry) + { + _STARPU_MALLOC(entry, sizeof(*entry)); + entry->deviceid = devid; + HASH_ADD_INT(gpu_devices_already_used, deviceid, entry); + } +} + +void _starpu_devices_gpu_clear(struct _starpu_machine_config *config, enum starpu_worker_archtype type) +{ + struct _starpu_machine_topology *topology = &config->topology; + unsigned tmp[STARPU_NMAXWORKERS]; + unsigned nb=0; + int i; + for(i=0 ; iworkers_devid[type][i]; + + HASH_FIND_INT(gpu_devices_already_used, &devid, entry); + if (entry == NULL) + { + tmp[nb] = devid; + nb++; + } + } + for (i=nb ; iworkers_devid[type], tmp, sizeof(unsigned)*STARPU_NMAXWORKERS); +} + +void _starpu_devices_drop_duplicate(unsigned ids[STARPU_NMAXWORKERS]) +{ + struct _starpu_device_entry *devices_already_used = NULL; + unsigned tmp[STARPU_NMAXWORKERS]; + unsigned nb=0; + int i; + + for(i=0 ; ideviceid = devid; + HASH_ADD_INT(devices_already_used, deviceid, entry2); + tmp[nb] = devid; + nb ++; + } + } + struct _starpu_device_entry *entry=NULL, *tempo=NULL; + HASH_ITER(hh, devices_already_used, entry, tempo) + { + HASH_DEL(devices_already_used, entry); + free(entry); + } + for (i=nb ; i +#include +#include + +#pragma GCC visibility push(hidden) + +/** Drop duplicate values from \p ids. */ +void _starpu_devices_drop_duplicate(unsigned ids[STARPU_NMAXWORKERS]); + +/** Set gpu \p devid as already used. */ +void _starpu_devices_gpu_set_used(int devid); + +/** Drop from the topology information the gpus which are already used. */ +void _starpu_devices_gpu_clear(struct _starpu_machine_config *config, enum starpu_worker_archtype type); + +/** Clean the list of gpus which are already used. */ +void _starpu_devices_gpu_clean(); + + +#pragma GCC visibility pop + +#endif // __DEVICES_H__ + diff --git a/src/core/disk.c b/src/core/disk.c new file mode 100644 index 0000000..87b7c1f --- /dev/null +++ b/src/core/disk.c @@ -0,0 +1,546 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Corentin Salingue + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +struct disk_register +{ + void *base; + struct starpu_disk_ops *functions; + /* disk condition (1 = all authorizations, */ + int flag; +}; + +static int add_disk_in_list(unsigned node, struct starpu_disk_ops *func, void *base); + +static struct disk_register *disk_register_list[STARPU_MAXNODES]; +static int disk_number = 0; + +int starpu_disk_swap_node = -1; + +static void add_async_event(struct _starpu_async_channel * channel, void * event) +{ + if (!event) + return; + + struct _starpu_disk_event *disk_event = _starpu_disk_get_event(&channel->event); + if (disk_event->requests == NULL) + { + disk_event->requests = _starpu_disk_backend_event_list_new(); + } + + struct _starpu_disk_backend_event * backend_event = _starpu_disk_backend_event_new(); + backend_event->backend_event = event; + + /* Store event at the end of the list */ + _starpu_disk_backend_event_list_push_back(disk_event->requests, backend_event); +} + +int starpu_disk_register(struct starpu_disk_ops *func, void *parameter, starpu_ssize_t size) +{ + STARPU_ASSERT_MSG(size < 0 || size >= STARPU_DISK_SIZE_MIN, "Minimum disk size is %d Bytes ! (Here %d) \n", (int) STARPU_DISK_SIZE_MIN, (int) size); + /* register disk */ + unsigned disk_memnode = _starpu_memory_node_register(STARPU_DISK_RAM, 0); + + /* Connect the disk memory node to all numa memory nodes */ + int nb_numa_nodes = starpu_memory_nodes_get_numa_count(); + int numa_node; + for (numa_node = 0; numa_node < nb_numa_nodes; numa_node++) + { + _starpu_register_bus(disk_memnode, numa_node); + _starpu_register_bus(numa_node, disk_memnode); + } + + /* Any worker can manage disk memnode */ + struct _starpu_machine_config *config = _starpu_get_machine_config(); + unsigned worker; + for (worker = 0; worker < starpu_worker_get_count(); worker++) + { + /* But prefer to use only CPU workers if possible */ + if (starpu_worker_get_type(worker) == STARPU_CPU_WORKER) + { + struct _starpu_worker *workerarg = &config->workers[worker]; + _starpu_memory_node_add_nworkers(disk_memnode); + _starpu_worker_drives_memory_node(workerarg, disk_memnode); + } + } + + if (!_starpu_memory_node_get_nworkers(disk_memnode)) + { + /* Bleh, no CPU worker to drive the disk, use non-CPU workers too */ + for (worker = 0; worker < starpu_worker_get_count(); worker++) + { + if (starpu_worker_get_type(worker) != STARPU_CPU_WORKER) + { + struct _starpu_worker *workerarg = &config->workers[worker]; + _starpu_memory_node_add_nworkers(disk_memnode); + _starpu_worker_drives_memory_node(workerarg, disk_memnode); + } + } + } + + //Add bus for disk <-> disk copy + if (func->copy != NULL) + { + int disk; + for (disk = 0; disk < STARPU_MAXNODES; disk++) + if (disk_register_list[disk] != NULL && disk_register_list[disk]->functions->copy != NULL && disk_register_list[disk]->functions->copy == func->copy) + { + _starpu_register_bus(disk_memnode, disk); + _starpu_register_bus(disk, disk_memnode); + } + } + + /* connect disk */ + void *base = func->plug(parameter, size); + + /* remember it */ + int n STARPU_ATTRIBUTE_UNUSED = add_disk_in_list(disk_memnode, func, base); + +#ifdef STARPU_SIMGRID + char name[16]; + snprintf(name, sizeof(name), "DISK%d", n); + starpu_sg_host_t host = _starpu_simgrid_get_host_by_name(name); + STARPU_ASSERT_MSG(host, "Could not find disk %s in platform file", name); + _starpu_simgrid_memory_node_set_host(disk_memnode, host); +#endif + + int ret = func->bandwidth(disk_memnode, base); + /* have a problem with the disk */ + if (ret == 0) + return -ENOENT; + if (size >= 0) + _starpu_memory_manager_set_global_memory_size(disk_memnode, size); + + _starpu_mem_chunk_disk_register(disk_memnode); + + return disk_memnode; +} + +void _starpu_disk_unregister(void) +{ + int i; + + /* search disk and delete it */ + for (i = 0; i < STARPU_MAXNODES; ++i) + { + if (disk_register_list[i] == NULL) + continue; + + _starpu_set_disk_flag(i, STARPU_DISK_NO_RECLAIM); + _starpu_free_all_automatically_allocated_buffers(i); + + /* don't forget to unplug */ + disk_register_list[i]->functions->unplug(disk_register_list[i]->base); + free(disk_register_list[i]); + disk_register_list[i] = NULL; + + disk_number--; + } + + /* no disk in the list -> delete the list */ + + STARPU_ASSERT_MSG(disk_number == 0, "Some disks are not unregistered !"); +} + +/* interface between user and disk memory */ + +void *_starpu_disk_alloc(unsigned node, size_t size) +{ + return disk_register_list[node]->functions->alloc(disk_register_list[node]->base, size); +} + +void _starpu_disk_free(unsigned node, void *obj, size_t size) +{ + disk_register_list[node]->functions->free(disk_register_list[node]->base, obj, size); +} + +/* src_node == disk node and dst_node == STARPU_MAIN_RAM */ +int _starpu_disk_read(unsigned src_node, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, void *obj, void *buf, off_t offset, size_t size, struct _starpu_async_channel *channel) +{ + void *event = NULL; + + if (channel != NULL) + { + if (disk_register_list[src_node]->functions->async_read == NULL) + channel = NULL; + else + { + double start; + _starpu_disk_get_event(&channel->event)->memory_node = src_node; + + starpu_interface_start_driver_copy_async(src_node, dst_node, &start); + event = disk_register_list[src_node]->functions->async_read(disk_register_list[src_node]->base, obj, buf, offset, size); + starpu_interface_end_driver_copy_async(src_node, dst_node, start); + + add_async_event(channel, event); + } + } + /* asynchronous request failed or synchronous request is asked */ + if (channel == NULL || !event) + { + disk_register_list[src_node]->functions->read(disk_register_list[src_node]->base, obj, buf, offset, size); + return 0; + } + return -EAGAIN; +} + +/* src_node == STARPU_MAIN_RAM and dst_node == disk node */ +int _starpu_disk_write(unsigned src_node STARPU_ATTRIBUTE_UNUSED, unsigned dst_node, void *obj, void *buf, off_t offset, size_t size, struct _starpu_async_channel *channel) +{ + void *event = NULL; + + if (channel != NULL) + { + if (disk_register_list[dst_node]->functions->async_write == NULL) + channel = NULL; + else + { + double start; + _starpu_disk_get_event(&channel->event)->memory_node = dst_node; + + starpu_interface_start_driver_copy_async(src_node, dst_node, &start); + event = disk_register_list[dst_node]->functions->async_write(disk_register_list[dst_node]->base, obj, buf, offset, size); + starpu_interface_end_driver_copy_async(src_node, dst_node, start); + + add_async_event(channel, event); + } + } + /* asynchronous request failed or synchronous request is asked */ + if (channel == NULL || !event) + { + disk_register_list[dst_node]->functions->write(disk_register_list[dst_node]->base, obj, buf, offset, size); + return 0; + } + return -EAGAIN; +} + +int _starpu_disk_copy(unsigned node_src, void *obj_src, off_t offset_src, unsigned node_dst, void *obj_dst, off_t offset_dst, size_t size, struct _starpu_async_channel *channel) +{ + /* both nodes have same copy function */ + void * event = NULL; + + if (channel) + { + _starpu_disk_get_event(&channel->event)->memory_node = node_src; + event = disk_register_list[node_src]->functions->copy(disk_register_list[node_src]->base, + obj_src, offset_src, + disk_register_list[node_dst]->base, + obj_dst, offset_dst, size); + add_async_event(channel, event); + } + + /* Something goes wrong with copy disk to disk... */ + if (!event) + { + if (channel || starpu_asynchronous_copy_disabled()) + disk_register_list[node_src]->functions->copy = NULL; + + /* perform a read, and after a write... */ + void * ptr; + int ret = _starpu_malloc_flags_on_node(STARPU_MAIN_RAM, &ptr, size, 0); + STARPU_ASSERT_MSG(ret == 0, "Cannot allocate %zu bytes to perform disk to disk operation", size); + + ret = _starpu_disk_read(node_src, STARPU_MAIN_RAM, obj_src, ptr, offset_src, size, NULL); + STARPU_ASSERT_MSG(ret == 0, "Cannot read %zu bytes to perform disk to disk copy", size); + ret = _starpu_disk_write(STARPU_MAIN_RAM, node_dst, obj_dst, ptr, offset_dst, size, NULL); + STARPU_ASSERT_MSG(ret == 0, "Cannot write %zu bytes to perform disk to disk copy", size); + + _starpu_free_flags_on_node(STARPU_MAIN_RAM, ptr, size, 0); + + return 0; + } + + STARPU_ASSERT(event); + return -EAGAIN; +} + +int _starpu_disk_full_read(unsigned src_node, unsigned dst_node, void *obj, void **ptr, size_t *size, struct _starpu_async_channel *channel) +{ + void *event = NULL; + + if (channel != NULL) + { + if (disk_register_list[src_node]->functions->async_full_read == NULL) + channel = NULL; + else + { + double start; + _starpu_disk_get_event(&channel->event)->memory_node = src_node; + + starpu_interface_start_driver_copy_async(src_node, dst_node, &start); + event = disk_register_list[src_node]->functions->async_full_read(disk_register_list[src_node]->base, obj, ptr, size, dst_node); + starpu_interface_end_driver_copy_async(src_node, dst_node, start); + + add_async_event(channel, event); + } + } + /* asynchronous request failed or synchronous request is asked */ + if (channel == NULL || !event) + { + disk_register_list[src_node]->functions->full_read(disk_register_list[src_node]->base, obj, ptr, size, dst_node); + return 0; + } + return -EAGAIN; +} + +int _starpu_disk_full_write(unsigned src_node STARPU_ATTRIBUTE_UNUSED, unsigned dst_node, void *obj, void *ptr, size_t size, struct _starpu_async_channel *channel) +{ + void *event = NULL; + + if (channel != NULL) + { + if (disk_register_list[dst_node]->functions->async_full_write == NULL) + channel = NULL; + else + { + double start; + _starpu_disk_get_event(&channel->event)->memory_node = dst_node; + + starpu_interface_start_driver_copy_async(src_node, dst_node, &start); + event = disk_register_list[dst_node]->functions->async_full_write(disk_register_list[dst_node]->base, obj, ptr, size); + starpu_interface_end_driver_copy_async(src_node, dst_node, start); + + add_async_event(channel, event); + } + } + /* asynchronous request failed or synchronous request is asked */ + if (channel == NULL || !event) + { + disk_register_list[dst_node]->functions->full_write(disk_register_list[dst_node]->base, obj, ptr, size); + return 0; + } + return -EAGAIN; +} + +void *starpu_disk_open(unsigned node, void *pos, size_t size) +{ + return disk_register_list[node]->functions->open(disk_register_list[node]->base, pos, size); +} + +void starpu_disk_close(unsigned node, void *obj, size_t size) +{ + disk_register_list[node]->functions->close(disk_register_list[node]->base, obj, size); +} + +void starpu_disk_wait_request(struct _starpu_async_channel *async_channel) +{ + struct _starpu_disk_event *disk_event = _starpu_disk_get_event(&async_channel->event); + unsigned node = disk_event->memory_node; + + if (disk_event->requests != NULL && !_starpu_disk_backend_event_list_empty(disk_event->requests)) + { + struct _starpu_disk_backend_event * event = _starpu_disk_backend_event_list_begin(disk_event->requests); + struct _starpu_disk_backend_event * next; + + /* Wait all events in the list and remove them */ + while (event != _starpu_disk_backend_event_list_end(disk_event->requests)) + { + next = _starpu_disk_backend_event_list_next(event); + + disk_register_list[node]->functions->wait_request(event->backend_event); + + disk_register_list[node]->functions->free_request(event->backend_event); + + _starpu_disk_backend_event_list_erase(disk_event->requests, event); + + _starpu_disk_backend_event_delete(event); + + event = next; + } + + /* Remove the list because it doesn't contain any event */ + _starpu_disk_backend_event_list_delete(disk_event->requests); + disk_event->requests = NULL; + } +} + +int starpu_disk_test_request(struct _starpu_async_channel *async_channel) +{ + struct _starpu_disk_event *disk_event = _starpu_disk_get_event(&async_channel->event); + unsigned node = disk_event->memory_node; + + if (disk_event->requests != NULL && !_starpu_disk_backend_event_list_empty(disk_event->requests)) + { + struct _starpu_disk_backend_event * event = _starpu_disk_backend_event_list_begin(disk_event->requests); + struct _starpu_disk_backend_event * next; + + /* Wait all events in the list and remove them */ + while (event != _starpu_disk_backend_event_list_end(disk_event->requests)) + { + next = _starpu_disk_backend_event_list_next(event); + + int res = disk_register_list[node]->functions->test_request(event->backend_event); + + if (res) + { + disk_register_list[node]->functions->free_request(event->backend_event); + + _starpu_disk_backend_event_list_erase(disk_event->requests, event); + + _starpu_disk_backend_event_delete(event); + } + + event = next; + } + + /* Remove the list because it doesn't contain any event */ + if (_starpu_disk_backend_event_list_empty(disk_event->requests)) + { + _starpu_disk_backend_event_list_delete(disk_event->requests); + disk_event->requests = NULL; + } + } + + return disk_event->requests == NULL; +} + +void starpu_disk_free_request(struct _starpu_async_channel *async_channe STARPU_ATTRIBUTE_UNUSED) +{ +/* It does not have any sense to use this function currently because requests are freed in test of wait functions */ + STARPU_ABORT(); + +/* struct _starpu_disk_event *disk_event = _starpu_disk_get_event(&async_channel->event); + int position = get_location_with_node(disk_event->memory_node); + if (disk_event->backend_event) + disk_register_list[position]->functions->free_request(disk_event->backend_event); +*/ +} + +static int add_disk_in_list(unsigned node, struct starpu_disk_ops *func, void *base) +{ + int n; + struct disk_register *dr; + _STARPU_MALLOC(dr, sizeof(struct disk_register)); + dr->base = base; + dr->flag = STARPU_DISK_ALL; + dr->functions = func; + n = disk_number++; + disk_register_list[node] = dr; + return n; +} + +int _starpu_disk_can_copy(unsigned node1, unsigned node2) +{ + STARPU_ASSERT(starpu_node_get_kind(node1) == STARPU_DISK_RAM && starpu_node_get_kind(node2) == STARPU_DISK_RAM); + + if (disk_register_list[node1]->functions == disk_register_list[node2]->functions) + /* they must have a copy function */ + if (disk_register_list[node1]->functions->copy != NULL) + return 1; + return 0; +} + +void _starpu_set_disk_flag(unsigned node, int flag) +{ + disk_register_list[node]->flag = flag; +} + +int _starpu_get_disk_flag(unsigned node) +{ + return disk_register_list[node]->flag; +} + +void _starpu_swap_init(void) +{ + char *backend; + char *path; + starpu_ssize_t size; + struct starpu_disk_ops *ops; + + path = starpu_getenv("STARPU_DISK_SWAP"); + if (!path) + return; + + backend = starpu_getenv("STARPU_DISK_SWAP_BACKEND"); + if (!backend) + { + ops = &starpu_disk_unistd_ops; + } + else if (!strcmp(backend, "stdio")) + { + ops = &starpu_disk_stdio_ops; + } + else if (!strcmp(backend, "unistd")) + { + ops = &starpu_disk_unistd_ops; + } + else if (!strcmp(backend, "unistd_o_direct")) + { +#ifdef STARPU_LINUX_SYS + ops = &starpu_disk_unistd_o_direct_ops; +#else + _STARPU_DISP("Warning: o_direct support is not compiled in, could not enable disk swap\n"); + return; +#endif + + } + else if (!strcmp(backend, "leveldb")) + { +#ifdef STARPU_HAVE_LEVELDB + ops = &starpu_disk_leveldb_ops; +#else + _STARPU_DISP("Warning: leveldb support is not compiled in, could not enable disk swap\n"); + return; +#endif + } + else if (!strcmp(backend, "hdf5")) + { +#ifdef STARPU_HAVE_HDF5 + ops = &starpu_disk_hdf5_ops; +#else + _STARPU_DISP("Warning: hdf5 support is not compiled in, could not enable disk swap\n"); + return; +#endif + } + else + { + _STARPU_DISP("Warning: unknown disk swap backend %s, could not enable disk swap\n", backend); + return; + } + + size = starpu_getenv_number_default("STARPU_DISK_SWAP_SIZE", -1); + + starpu_disk_swap_node = starpu_disk_register(ops, path, ((size_t) size) << 20); + if (starpu_disk_swap_node < 0) + { + _STARPU_DISP("Warning: could not enable disk swap %s on %s with size %ld, could not enable disk swap\n", backend, path, (long) size); + return; + } +} diff --git a/src/core/disk.h b/src/core/disk.h new file mode 100644 index 0000000..9e6d7da --- /dev/null +++ b/src/core/disk.h @@ -0,0 +1,82 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Corentin Salingue + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __DISK_H__ +#define __DISK_H__ + +/** @file */ + +#define STARPU_DISK_ALL 1 +#define STARPU_DISK_NO_RECLAIM 2 + +#ifdef __cplusplus +extern "C" +{ +#endif + +#include +#include + +#pragma GCC visibility push(hidden) + +/** interface to manipulate memory disk */ +void * _starpu_disk_alloc (unsigned node, size_t size) STARPU_ATTRIBUTE_MALLOC; + +void _starpu_disk_free (unsigned node, void *obj, size_t size); +/** src_node is a disk node, dst_node is for the moment the STARPU_MAIN_RAM */ +int _starpu_disk_read(unsigned src_node, unsigned dst_node, void *obj, void *buf, off_t offset, size_t size, struct _starpu_async_channel * async_channel); +/** src_node is for the moment the STARU_MAIN_RAM, dst_node is a disk node */ +int _starpu_disk_write(unsigned src_node, unsigned dst_node, void *obj, void *buf, off_t offset, size_t size, struct _starpu_async_channel * async_channel); + +int _starpu_disk_full_read(unsigned src_node, unsigned dst_node, void * obj, void ** ptr, size_t * size, struct _starpu_async_channel * async_channel); +int _starpu_disk_full_write(unsigned src_node, unsigned dst_node, void * obj, void * ptr, size_t size, struct _starpu_async_channel * async_channel); + +int _starpu_disk_copy(unsigned node_src, void* obj_src, off_t offset_src, unsigned node_dst, void* obj_dst, off_t offset_dst, size_t size, struct _starpu_async_channel * async_channel); + +/** force the request to compute */ +void starpu_disk_wait_request(struct _starpu_async_channel *async_channel); +/** return 1 if the request is finished, 0 if not finished */ +int starpu_disk_test_request(struct _starpu_async_channel *async_channel); +void starpu_disk_free_request(struct _starpu_async_channel *async_channel); + +/** interface to compare memory disk */ +int _starpu_disk_can_copy(unsigned node1, unsigned node2); + +/** change disk flag */ +void _starpu_set_disk_flag(unsigned node, int flag); +int _starpu_get_disk_flag(unsigned node); + +/** unregister disk */ +void _starpu_disk_unregister(void); + +void _starpu_swap_init(void); + +static inline struct _starpu_disk_event *_starpu_disk_get_event(union _starpu_async_channel_event *_event) +{ + struct _starpu_disk_event *event; + STARPU_STATIC_ASSERT(sizeof(*event) <= sizeof(*_event)); + event = (struct _starpu_disk_event *) _event; + return event; +} + +#ifdef __cplusplus +} +#endif + +#pragma GCC visibility pop + +#endif /* __DISK_H__ */ diff --git a/src/core/disk_ops/disk_hdf5.c b/src/core/disk_ops/disk_hdf5.c new file mode 100644 index 0000000..9a4961c --- /dev/null +++ b/src/core/disk_ops/disk_hdf5.c @@ -0,0 +1,977 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2017-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#ifdef HAVE_UNISTD_H +#include +#endif +#include +#include + +#include +#include +#include + +#ifndef O_BINARY +#define O_BINARY 0 +#endif + +#define NITER _starpu_calibration_minimum +#define STARPU_CHUNK_DIM 4096 + +/* ------------------- use HDF5 to write on disk ------------------- */ + +#ifndef H5_HAVE_THREADSAFE +static int nb_disk_open = 0; +static volatile int init_finished = 0; +static starpu_pthread_t global_thread; /* This thread will perform each write/read because we don't have asynchronous functions */ +static volatile int global_run; /* Ask to the thread if he can continue */ +static starpu_pthread_mutex_t global_mutex; /* Mutex is used to protect work_list and if HDF5 library is not safe */ +static starpu_pthread_cond_t global_cond; +static struct _starpu_hdf5_work_list global_work_list; /* This list contains the work for the hdf5 thread */ +#endif + +#ifdef H5_HAVE_THREADSAFE + +#define HDF5_VAR_THREAD fileBase->thread +#define HDF5_VAR_RUN fileBase->run +#define HDF5_VAR_MUTEX fileBase->mutex +#define HDF5_VAR_COND fileBase->cond +#define HDF5_VAR_WORK_LIST fileBase->work_list + +#else + +#define HDF5_VAR_THREAD global_thread +#define HDF5_VAR_RUN global_run +#define HDF5_VAR_MUTEX global_mutex +#define HDF5_VAR_COND global_cond +#define HDF5_VAR_WORK_LIST global_work_list + +#endif + +enum hdf5_work_type { READ, WRITE, FULL_READ, FULL_WRITE, COPY }; + +LIST_TYPE(_starpu_hdf5_work, + enum hdf5_work_type type; + + struct starpu_hdf5_base * base_src; + struct starpu_hdf5_obj * obj_src; + off_t offset_src; + + struct starpu_hdf5_base * base_dst; + struct starpu_hdf5_obj * obj_dst; + off_t offset_dst; + + void * ptr; + size_t size; + void * event; +); + +struct starpu_hdf5_base +{ + hid_t fileID; + char * path; + unsigned created; /* StarPU creates the HDF5 file */ + unsigned next_dataset_id; + starpu_pthread_t thread; /* This thread will perform each write/read because we don't have asynchronous functions */ + int run; /* Ask to the thread if he can continue */ + starpu_pthread_mutex_t mutex; /* Mutex is used to protect work_list and if HDF5 library is not safe */ + starpu_pthread_cond_t cond; + struct _starpu_hdf5_work_list work_list; /* This list contains the work for the hdf5 thread */ +}; + +struct starpu_hdf5_obj +{ + hid_t dataset; /* describe this object in HDF5 file */ + char * path; /* path where data are stored in HDF5 file */ + size_t size; +}; + +static inline void _starpu_hdf5_protect_start(void * base STARPU_ATTRIBUTE_UNUSED) +{ +#ifndef H5_HAVE_THREADSAFE + if (base != NULL) + STARPU_PTHREAD_MUTEX_LOCK(&HDF5_VAR_MUTEX); +#endif +} + +static inline void _starpu_hdf5_protect_stop(void * base STARPU_ATTRIBUTE_UNUSED) +{ +#ifndef H5_HAVE_THREADSAFE + if (base != NULL) + STARPU_PTHREAD_MUTEX_UNLOCK(&HDF5_VAR_MUTEX); +#endif +} + +/* ------------------ Functions for internal thread -------------------- */ + +/* TODO : Dataspace may not be NATIVE_CHAR for opened data */ +static void starpu_hdf5_full_read_internal(struct _starpu_hdf5_work * work) +{ + herr_t status; + + status = H5Dread(work->obj_src->dataset, H5T_NATIVE_CHAR, H5S_ALL, H5S_ALL, H5P_DEFAULT, work->ptr); + STARPU_ASSERT_MSG(status >= 0, "Can not read data associed to this dataset (%s)\n", work->obj_src->path); +} + +/* TODO : Dataspace may not be NATIVE_CHAR for opened data */ +static void starpu_hdf5_full_write_internal(struct _starpu_hdf5_work * work) +{ + herr_t status; + + /* Update size of dataspace */ + if (work->size > work->obj_dst->size) + { + /* Get official datatype */ + hid_t datatype = H5Dget_type(work->obj_dst->dataset); + hsize_t sizeDatatype = H5Tget_size(datatype); + + /* Count in number of elements */ + hsize_t extendsdim[1] = {work->size/sizeDatatype}; + status = H5Dset_extent (work->obj_dst->dataset, extendsdim); + STARPU_ASSERT_MSG(status >= 0, "Error when extending HDF5 dataspace !\n"); + work->obj_dst->size = work->size; + } + + /* Write ALL the dataspace */ + status = H5Dwrite(work->obj_dst->dataset, H5T_NATIVE_CHAR, H5S_ALL, H5S_ALL, H5P_DEFAULT, work->ptr); + STARPU_ASSERT_MSG(status >= 0, "Can not write data to this dataset (%s)\n", work->obj_dst->path); +} + +static void starpu_hdf5_read_internal(struct _starpu_hdf5_work * work) +{ + herr_t status; + + /* Get official datatype */ + hid_t datatype = H5Dget_type(work->obj_src->dataset); + hsize_t sizeDatatype = H5Tget_size(datatype); + + /* count in element, not in byte */ + work->offset_src /= sizeDatatype; + work->size /= sizeDatatype; + + /* duplicate the dataspace in the dataset */ + hid_t dataspace_select = H5Dget_space(work->obj_src->dataset); + STARPU_ASSERT_MSG(dataspace_select >= 0, "Error when reading this HDF5 dataset (%s)\n", work->obj_src->path); + + /* Select what we want of the duplicated dataspace (it's called an hyperslab). This operation is done on place */ + hsize_t offsets[1] = {work->offset_src}; + hsize_t count[1] = {work->size}; + /* stride and block size are NULL which is equivalent of a shift of 1 */ + status = H5Sselect_hyperslab(dataspace_select, H5S_SELECT_SET, offsets, NULL, count, NULL); + STARPU_ASSERT_MSG(status >= 0, "Error when reading this HDF5 dataset (%s)\n", work->obj_src->path); + + /* create the dataspace for the received data which describes ptr */ + hsize_t dims_receive[1] = {work->size}; + hid_t dataspace_receive = H5Screate_simple(1, dims_receive, NULL); + STARPU_ASSERT_MSG(dataspace_receive >= 0, "Error when reading this HDF5 dataset (%s)\n", work->obj_src->path); + + /* Receiver has to be an hyperslabs */ + offsets[0] = 0; + count[0] = work->size; + H5Sselect_hyperslab(dataspace_receive, H5S_SELECT_SET, offsets, NULL, count, NULL); + STARPU_ASSERT_MSG(dataspace_receive >= 0, "Error when reading this HDF5 dataset (%s)\n", work->obj_src->path); + + status = H5Dread(work->obj_src->dataset, datatype, dataspace_receive, dataspace_select, H5P_DEFAULT, work->ptr); + STARPU_ASSERT_MSG(status >= 0, "Error when reading this HDF5 dataset (%s)\n", work->obj_src->path); + + /* don't need these dataspaces */ + status = H5Sclose(dataspace_select); + STARPU_ASSERT_MSG(status >= 0, "Error when reading this HDF5 dataset (%s)\n", work->obj_src->path); + status = H5Sclose(dataspace_receive); + STARPU_ASSERT_MSG(status >= 0, "Error when reading this HDF5 dataset (%s)\n", work->obj_src->path); +} + +static void starpu_hdf5_write_internal(struct _starpu_hdf5_work * work) +{ + herr_t status; + + /* Get official datatype */ + hid_t datatype = H5Dget_type(work->obj_dst->dataset); + hsize_t sizeDatatype = H5Tget_size(datatype); + + /* Update size of dataspace */ + if (work->size + work->offset_dst > work->obj_dst->size) + { + /* Count in number of elements */ + hsize_t extendsdim[1] = {(work->offset_dst + work->size)/sizeDatatype}; + status = H5Dset_extent (work->obj_dst->dataset, extendsdim); + STARPU_ASSERT_MSG(status >= 0, "Error when extending HDF5 dataspace !\n"); + work->obj_dst->size = work->offset_dst + work->size; + } + + /* count in element, not in byte */ + work->offset_dst /= sizeDatatype; + work->size /= sizeDatatype; + + /* duplicate the dataspace in the dataset */ + hid_t dataspace_select = H5Dget_space(work->obj_dst->dataset); + STARPU_ASSERT_MSG(dataspace_select >= 0, "Error when writing this HDF5 dataset (%s)\n", work->obj_dst->path); + + /* Select what we want of the duplicated dataspace (it's called an hyperslab). This operation is done on place */ + hsize_t offsets[1] = {work->offset_dst}; + hsize_t count[1] = {work->size}; + /* stride and block size are NULL which is equivalent of a shift of 1 */ + status = H5Sselect_hyperslab(dataspace_select, H5S_SELECT_SET, offsets, NULL, count, NULL); + STARPU_ASSERT_MSG(status >= 0, "Error when writing this HDF5 dataset (%s)\n", work->obj_dst->path); + + /* create the dataspace for the received data which describes ptr */ + hsize_t dims_send[1] = {work->size}; + hid_t dataspace_send = H5Screate_simple(1, dims_send, NULL); + STARPU_ASSERT_MSG(dataspace_send >= 0, "Error when writing this HDF5 dataset (%s)\n", work->obj_dst->path); + + /* Receiver has to be an hyperslabs */ + offsets[0] = 0; + count[0] = work->size; + H5Sselect_hyperslab(dataspace_send, H5S_SELECT_SET, offsets, NULL, count, NULL); + STARPU_ASSERT_MSG(dataspace_send >= 0, "Error when writing this HDF5 dataset (%s)\n", work->obj_dst->path); + + status = H5Dwrite(work->obj_dst->dataset, datatype, dataspace_send, dataspace_select, H5P_DEFAULT, work->ptr); + STARPU_ASSERT_MSG(status >= 0, "Error when writing this HDF5 dataset (%s)\n", work->obj_dst->path); + + /* don't need these dataspaces */ + status = H5Sclose(dataspace_select); + STARPU_ASSERT_MSG(status >= 0, "Error when writing this HDF5 dataset (%s)\n", work->obj_dst->path); + status = H5Sclose(dataspace_send); + STARPU_ASSERT_MSG(status >= 0, "Error when writing this HDF5 dataset (%s)\n", work->obj_dst->path); +} + +static unsigned warned = 0; +static void starpu_hdf5_copy_internal(struct _starpu_hdf5_work * work) +{ + herr_t status; + + /* HDF5 H50copy supports only same size in both areas and copies the entire object */ + if (work->offset_src == 0 && work->offset_dst == 0 && work->size == work->obj_src->size && work->size == work->obj_dst->size) + { + H5Dclose(work->obj_dst->dataset); + /* Dirty : Delete dataspace because H5Ocopy only works if destination does not exist */ + H5Ldelete(work->base_dst->fileID, work->obj_dst->path, H5P_DEFAULT); + + status = H5Ocopy(work->base_src->fileID, work->obj_src->path, work->base_dst->fileID, work->obj_dst->path, H5P_DEFAULT, H5P_DEFAULT); + STARPU_ASSERT_MSG(status >= 0, "Can not copy data (%s) associed to this disk (%s) to the data (%s) on this disk (%s)\n", work->obj_src->path, work->base_src->path, work->obj_dst->path, work->base_dst->path); + + work->obj_dst->dataset = H5Dopen2(work->base_dst->fileID, work->obj_dst->path, H5P_DEFAULT); + } + else + { + if (!warned) + { + _STARPU_DISP("Direct disk to disk copy is not supported for a piece of data. Data will be transferred to RAM memory and then, be pushed on disk \n"); + warned = 1; + } + + void * ptr; + int ret = _starpu_malloc_flags_on_node(STARPU_MAIN_RAM, &ptr, work->size, 0); + STARPU_ASSERT_MSG(ret == 0, "Cannot allocate %lu bytes to perform disk to disk operation", (unsigned long)work->size); + + /* buffer is only used internally to store intermediate data */ + work->ptr = ptr; + + starpu_hdf5_read_internal(work); + starpu_hdf5_write_internal(work); + + _starpu_free_flags_on_node(STARPU_MAIN_RAM, ptr, work->size, 0); + } +} + +static void * _starpu_hdf5_internal_thread(void * arg) +{ +#ifdef H5_HAVE_THREADSAFE + struct starpu_hdf5_base * fileBase = (struct starpu_hdf5_base *) arg; +#endif + while (HDF5_VAR_RUN || !_starpu_hdf5_work_list_empty(&HDF5_VAR_WORK_LIST)) + { + STARPU_PTHREAD_MUTEX_LOCK(&HDF5_VAR_MUTEX); + if (_starpu_hdf5_work_list_empty(&HDF5_VAR_WORK_LIST) && HDF5_VAR_RUN) + STARPU_PTHREAD_COND_WAIT(&HDF5_VAR_COND, &HDF5_VAR_MUTEX); + STARPU_PTHREAD_MUTEX_UNLOCK(&HDF5_VAR_MUTEX); + + /* We are the only consumer here, don't need to protect here */ + if (!_starpu_hdf5_work_list_empty(&HDF5_VAR_WORK_LIST)) + { + STARPU_PTHREAD_MUTEX_LOCK(&HDF5_VAR_MUTEX); + struct _starpu_hdf5_work * work = _starpu_hdf5_work_list_pop_back(&HDF5_VAR_WORK_LIST); + STARPU_PTHREAD_MUTEX_UNLOCK(&HDF5_VAR_MUTEX); + + if (work->base_src < work->base_dst) + { + _starpu_hdf5_protect_start(work->base_src); +#ifdef H5_HAVE_THREADSAFE + _starpu_hdf5_protect_start(work->base_dst); +#endif + } + else + { + _starpu_hdf5_protect_start(work->base_dst); +#ifdef H5_HAVE_THREADSAFE + if (work->base_src != work->base_dst) + _starpu_hdf5_protect_start(work->base_src); +#endif + } + + switch(work->type) + { + case READ: + starpu_hdf5_read_internal(work); + break; + + case WRITE: + starpu_hdf5_write_internal(work); + break; + + case FULL_READ: + starpu_hdf5_full_read_internal(work); + break; + + case FULL_WRITE: + starpu_hdf5_full_write_internal(work); + break; + + case COPY: + starpu_hdf5_copy_internal(work); + break; + + default: + STARPU_ABORT(); + } + + if (work->base_src < work->base_dst) + { + _starpu_hdf5_protect_stop(work->base_src); +#ifdef H5_HAVE_THREADSAFE + _starpu_hdf5_protect_stop(work->base_dst); +#endif + } + else + { + _starpu_hdf5_protect_stop(work->base_dst); +#ifdef H5_HAVE_THREADSAFE + if (work->base_src != work->base_dst) + _starpu_hdf5_protect_stop(work->base_src); +#endif + } + + /* Update event to tell it's finished */ + starpu_sem_post((starpu_sem_t *) work->event); + + free(work); + } + } + + return NULL; +} + +static void _starpu_hdf5_create_thread(struct starpu_hdf5_base * fileBase) +{ + _starpu_hdf5_work_list_init(&HDF5_VAR_WORK_LIST); + HDF5_VAR_RUN = 1; + + STARPU_PTHREAD_COND_INIT(&HDF5_VAR_COND, NULL); + STARPU_PTHREAD_CREATE(&HDF5_VAR_THREAD, NULL, _starpu_hdf5_internal_thread, (void *) fileBase); +} + +/* returns the size in BYTES */ +static hsize_t _starpu_get_size_obj(struct starpu_hdf5_obj * obj) +{ + herr_t status; + + hid_t dataspace = H5Dget_space(obj->dataset); + STARPU_ASSERT_MSG(dataspace >= 0, "Can not get the size of this HDF5 dataset (%s)\n", obj->path); + + hsize_t dims[1]; + status = H5Sget_simple_extent_dims(dataspace, dims, NULL); + STARPU_ASSERT_MSG(status >= 0, "Can not get the size of this HDF5 dataset (%s)\n", obj->path); + + hid_t datatype = H5Dget_type(obj->dataset); + STARPU_ASSERT_MSG(datatype >= 0, "Can not get the size of this HDF5 dataset (%s)\n", obj->path); + + hsize_t sizeDatatype = H5Tget_size(datatype); + STARPU_ASSERT_MSG(sizeDatatype > 0, "Can not get the size of this HDF5 dataset (%s)\n", obj->path); + + H5Sclose(dataspace); + H5Tclose(datatype); + + return dims[0]*sizeDatatype; +} + +static void starpu_hdf5_send_work(void *base_src, void *obj_src, off_t offset_src, void *base_dst, void *obj_dst, off_t offset_dst, void *buf, size_t size, void * event, enum hdf5_work_type type) +{ + struct starpu_hdf5_obj * dataObj_src = (struct starpu_hdf5_obj *) obj_src; + struct starpu_hdf5_obj * dataObj_dst = (struct starpu_hdf5_obj *) obj_dst; + struct starpu_hdf5_base * fileBase_src = (struct starpu_hdf5_base *) base_src; + struct starpu_hdf5_base * fileBase_dst = (struct starpu_hdf5_base *) base_dst; + + struct _starpu_hdf5_work * work; + _STARPU_MALLOC(work, sizeof(*work)); + + work->type = type; + + work->base_src = fileBase_src; + work->obj_src = dataObj_src; + work->offset_src = offset_src; + + work->base_dst = fileBase_dst; + work->obj_dst = dataObj_dst; + work->offset_dst = offset_dst; + + work->ptr = buf; + work->size = size; + work->event = event; + +#ifdef H5_HAVE_THREADSAFE + struct starpu_hdf5_base * fileBase; + if (fileBase_src != NULL) + fileBase = fileBase_src; + else + fileBase = fileBase_dst; +#endif + + STARPU_PTHREAD_MUTEX_LOCK(&HDF5_VAR_MUTEX); + _starpu_hdf5_work_list_push_front(&HDF5_VAR_WORK_LIST, work); + /* Wake up internal thread */ + STARPU_PTHREAD_COND_BROADCAST(&HDF5_VAR_COND); + STARPU_PTHREAD_MUTEX_UNLOCK(&HDF5_VAR_MUTEX); +} + +static struct starpu_hdf5_obj * _starpu_hdf5_data_alloc(struct starpu_hdf5_base * fileBase, char * name, size_t size) +{ + struct starpu_hdf5_obj * obj; + _STARPU_MALLOC(obj, sizeof(*obj)); + + _starpu_hdf5_protect_start((void *) fileBase); + + /* create a dataspace with one dimension of size elements */ + hsize_t dim[1] = {size}; + hsize_t maxdim[1] = {H5S_UNLIMITED}; + hid_t dataspace = H5Screate_simple(1, dim, maxdim); + + if (dataspace < 0) + { + free(obj); + return NULL; + } + + hsize_t chunkdim[1] = {STARPU_CHUNK_DIM}; + hid_t prop = H5Pcreate (H5P_DATASET_CREATE); + herr_t status = H5Pset_chunk (prop, 1, chunkdim); + STARPU_ASSERT_MSG(status >= 0, "Error when setting HDF5 property \n"); + + /* create a dataset at location name, with data described by the dataspace. + * Each element are like char in C (expected one byte) + */ + obj->dataset = H5Dcreate2(fileBase->fileID, name, H5T_NATIVE_CHAR, dataspace, H5P_DEFAULT, prop, H5P_DEFAULT); + + H5Sclose(dataspace); + H5Pclose(prop); + + if (obj->dataset < 0) + { + free(obj); + return NULL; + } + + obj->path = name; + obj->size = size; + + _starpu_hdf5_protect_stop((void *) fileBase); + + return obj; +} + +static struct starpu_hdf5_obj * _starpu_hdf5_data_open(struct starpu_hdf5_base * fileBase, char * name, size_t size) +{ + struct starpu_hdf5_obj * obj; + _STARPU_MALLOC(obj, sizeof(*obj)); + + _starpu_hdf5_protect_start((void *) fileBase); + + /* create a dataset at location name, with data described by the dataspace. + * Each element are like char in C (expected one byte) + */ + obj->dataset = H5Dopen2(fileBase->fileID, name, H5P_DEFAULT); + + _starpu_hdf5_protect_stop((void *) fileBase); + + if (obj->dataset < 0) + { + free(obj); + return NULL; + } + + obj->path = name; + obj->size = size; + + return obj; +} + +static void *starpu_hdf5_plug(void *parameter, starpu_ssize_t size STARPU_ATTRIBUTE_UNUSED) +{ + struct starpu_hdf5_base * fileBase; + _STARPU_MALLOC(fileBase, sizeof(struct starpu_hdf5_base)); + +#ifndef H5_HAVE_THREADSAFE + int actual_nb_disk = STARPU_ATOMIC_ADD(&nb_disk_open, 1); + if (actual_nb_disk == 1) + { +#endif + STARPU_PTHREAD_MUTEX_INIT(&HDF5_VAR_MUTEX, NULL); +#ifndef H5_HAVE_THREADSAFE + } + else + { + while (!init_finished) + STARPU_UYIELD(); + } +#endif + + _starpu_hdf5_protect_start(fileBase); + + struct stat buf; + if (stat(parameter, &buf) != 0 || !S_ISREG(buf.st_mode)) + { + /* The file doesn't exist or the directory exists => create the datafile */ + int id; + _starpu_mkpath(parameter, S_IRWXU); + fileBase->path = _starpu_mktemp(parameter, O_RDWR | O_BINARY, &id); + if (!fileBase->path) + { + free(fileBase); + _STARPU_ERROR("Can not create the HDF5 file (%s)", (char *) parameter); + return NULL; + } + + /* just use _starpu_mktemp_many to create a file, close the file descriptor */ + close(id); + + /* Truncate it */ + fileBase->fileID = H5Fcreate((char *)fileBase->path, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); + if (fileBase->fileID < 0) + { + free(fileBase); + _STARPU_ERROR("Can not create the HDF5 file (%s)", (char *) parameter); + return NULL; + } + fileBase->created = 1; + } + else + { + /* Well, open it ! */ + char *path = strdup((char *)parameter); + STARPU_ASSERT(path); + + fileBase->fileID = H5Fopen((char *)parameter, H5F_ACC_RDWR, H5P_DEFAULT); + if (fileBase->fileID < 0) + { + free(fileBase); + free(path); + _STARPU_ERROR("Can not open the HDF5 file (%s)", (char *) parameter); + return NULL; + } + fileBase->created = 0; + fileBase->path = path; + } + +#ifndef H5_HAVE_THREADSAFE + if (actual_nb_disk == 1) + { +#endif + _starpu_hdf5_create_thread(fileBase); +#ifndef H5_HAVE_THREADSAFE + init_finished = 1; + } +#endif + +#if H5_VERS_MAJOR > 1 || (H5_VERS_MAJOR == 1 && H5_VERS_MINOR > 10) || (H5_VERS_MAJOR == 1 && H5_VERS_MINOR == 10 && H5_VERS_RELEASE > 0) + H5Pset_file_space_strategy(fileBase->fileID, H5F_FSPACE_STRATEGY_FSM_AGGR, 0, 0); +#endif + + _starpu_hdf5_protect_stop(fileBase); + + fileBase->next_dataset_id = 0; + + return (void *) fileBase; +} + +/* free memory allocated for the base */ +static void starpu_hdf5_unplug(void *base) +{ +#ifndef H5_HAVE_THREADSAFE + int actual_nb_disk = STARPU_ATOMIC_ADD(&nb_disk_open, -1); +#endif + + struct starpu_hdf5_base * fileBase = (struct starpu_hdf5_base *) base; + herr_t status; + + STARPU_PTHREAD_MUTEX_LOCK(&HDF5_VAR_MUTEX); + +#ifndef H5_HAVE_THREADSAFE + if (actual_nb_disk == 0) + { +#endif + HDF5_VAR_RUN = 0; + STARPU_PTHREAD_COND_BROADCAST(&HDF5_VAR_COND); + STARPU_PTHREAD_MUTEX_UNLOCK(&HDF5_VAR_MUTEX); + STARPU_PTHREAD_JOIN(HDF5_VAR_THREAD, NULL); + STARPU_PTHREAD_MUTEX_LOCK(&HDF5_VAR_MUTEX); + STARPU_PTHREAD_COND_DESTROY(&HDF5_VAR_COND); + STARPU_ASSERT(_starpu_hdf5_work_list_empty(&HDF5_VAR_WORK_LIST)); + /* the internal thread is deleted */ +#ifndef H5_HAVE_THREADSAFE + } +#endif + + status = H5Fclose(fileBase->fileID); + + STARPU_PTHREAD_MUTEX_UNLOCK(&HDF5_VAR_MUTEX); + +#ifndef H5_HAVE_THREADSAFE + if (actual_nb_disk == 0) + { +#endif + STARPU_PTHREAD_MUTEX_DESTROY(&HDF5_VAR_MUTEX); +#ifndef H5_HAVE_THREADSAFE + init_finished = 0; + } +#endif + + STARPU_ASSERT_MSG(status >= 0, "Can not unplug this HDF5 disk (%s)\n", fileBase->path); + if (fileBase->created) + { + unlink(fileBase->path); + } + else + { + /* Warn user about repack, because unlink dataset doesn't delete data in file */ + _STARPU_DISP("This disk (%s) was used to store temporary data. You may use the h5repack command to reduce the size of the file... \n", fileBase->path); + } + free(fileBase->path); + free(fileBase); +} + +static void *starpu_hdf5_alloc(void *base, size_t size) +{ + struct starpu_hdf5_base * fileBase = (struct starpu_hdf5_base *) base; + struct starpu_hdf5_obj * obj; + char * name; + char * prefix = "STARPU_"; + char name_id[16]; + + /* Save the name of the dataset */ + STARPU_PTHREAD_MUTEX_LOCK(&HDF5_VAR_MUTEX); + snprintf(name_id, sizeof(name_id), "%u", fileBase->next_dataset_id); + fileBase->next_dataset_id++; + STARPU_PTHREAD_MUTEX_UNLOCK(&HDF5_VAR_MUTEX); + + /* name in HDF5 is like a path */ + _STARPU_MALLOC(name, 1+strlen(prefix)+strlen(name_id)+1); + snprintf(name, 1+strlen(prefix)+strlen(name_id)+1, "/%s%s", prefix, name_id); + + obj = _starpu_hdf5_data_alloc(fileBase, name, size); + + if (!obj) + { + free(name); + } + + return (void *) obj; +} + +static void starpu_hdf5_free(void *base, void *obj, size_t size STARPU_ATTRIBUTE_UNUSED) +{ + struct starpu_hdf5_base * fileBase = (struct starpu_hdf5_base *) base; + struct starpu_hdf5_obj * dataObj = (struct starpu_hdf5_obj *) obj; + herr_t status; + + _starpu_hdf5_protect_start(base); + + status = H5Dclose(dataObj->dataset); + STARPU_ASSERT_MSG(status >= 0, "Can not free this HDF5 dataset (%s)\n", dataObj->path); + + /* remove the dataset link in the HDF5 + * But it doesn't delete the space in the file */ + status = H5Ldelete(fileBase->fileID, dataObj->path, H5P_DEFAULT); + STARPU_ASSERT_MSG(status >= 0, "Can not delete the link associed to this dataset (%s)\n", dataObj->path); + + _starpu_hdf5_protect_stop(base); + + free(dataObj->path); + free(dataObj); +} + +static void *starpu_hdf5_open(void *base, void *pos, size_t size) +{ + struct starpu_hdf5_base * fileBase = (struct starpu_hdf5_base *) base; + struct starpu_hdf5_obj * obj; + char *name; + + name = strdup((char *)pos); + STARPU_ASSERT(name); + + obj = _starpu_hdf5_data_open(fileBase, name, size); + + if (!obj) + { + free(name); + } + + return (void *) obj; +} + +static void starpu_hdf5_close(void *base, void *obj, size_t size STARPU_ATTRIBUTE_UNUSED) +{ + struct starpu_hdf5_obj * dataObj = (struct starpu_hdf5_obj *) obj; + herr_t status; + + _starpu_hdf5_protect_start(base); + + status = H5Dclose(dataObj->dataset); + STARPU_ASSERT_MSG(status >= 0, "Can not close this HDF5 dataset (%s)\n", dataObj->path); + + _starpu_hdf5_protect_stop(base); + + free(dataObj->path); + free(dataObj); +} + +static void starpu_hdf5_wait(void * event) +{ + starpu_sem_t * finished = (starpu_sem_t *) event; + + starpu_sem_wait(finished); +} + +static int starpu_hdf5_test(void * event) +{ + starpu_sem_t * finished = (starpu_sem_t *) event; + + return starpu_sem_trywait(finished) == 0; +} + +static int starpu_hdf5_full_read(void *base, void *obj, void **ptr, size_t *size, unsigned dst_node) +{ + struct starpu_hdf5_obj * dataObj = (struct starpu_hdf5_obj *) obj; + + starpu_sem_t finished; + starpu_sem_init(&finished, 0, 0); + + _starpu_hdf5_protect_start(base); + *size = _starpu_get_size_obj(dataObj); + _starpu_hdf5_protect_stop(base); + + _starpu_malloc_flags_on_node(dst_node, ptr, *size, 0); + + starpu_hdf5_send_work(base, obj, 0, NULL, NULL, 0, *ptr, *size, (void*) &finished, FULL_READ); + + starpu_hdf5_wait(&finished); + + starpu_sem_destroy(&finished); + + return 0; +} + +static int starpu_hdf5_full_write(void *base, void *obj, void *ptr, size_t size) +{ + starpu_sem_t finished; + starpu_sem_init(&finished, 0, 0); + + starpu_hdf5_send_work(NULL, NULL, 0, base, obj, 0, ptr, size, (void*) &finished, FULL_WRITE); + + starpu_hdf5_wait(&finished); + + starpu_sem_destroy(&finished); + + return 0; +} + +static int starpu_hdf5_read(void *base, void *obj, void *buf, off_t offset, size_t size) +{ + starpu_sem_t finished; + starpu_sem_init(&finished, 0, 0); + + starpu_hdf5_send_work(base, obj, offset, NULL, NULL, 0, buf, size, (void*) &finished, READ); + + starpu_hdf5_wait(&finished); + + starpu_sem_destroy(&finished); + + return 0; +} + +static int starpu_hdf5_write(void *base, void *obj, const void *buf, off_t offset, size_t size) +{ + starpu_sem_t finished; + starpu_sem_init(&finished, 0, 0); + + starpu_hdf5_send_work(NULL, NULL, 0, base, obj, offset, (void *) buf, size, (void*) &finished, WRITE); + + starpu_hdf5_wait(&finished); + + starpu_sem_destroy(&finished); + + return 0; +} + +static void * starpu_hdf5_async_read(void *base, void *obj, void *buf, off_t offset, size_t size) +{ + starpu_sem_t * finished; + _STARPU_MALLOC(finished, sizeof(*finished)); + starpu_sem_init(finished, 0, 0); + + starpu_hdf5_send_work(base, obj, offset, NULL, NULL, 0, buf, size, (void*) finished, READ); + + return finished; +} + +static void * starpu_hdf5_async_write(void *base, void *obj, void *buf, off_t offset, size_t size) +{ + starpu_sem_t * finished; + _STARPU_MALLOC(finished, sizeof(*finished)); + starpu_sem_init(finished, 0, 0); + + starpu_hdf5_send_work(NULL, NULL, 0, base, obj, offset, (void *) buf, size, (void*) finished, WRITE); + + return finished; +} + +void * starpu_hdf5_async_full_read (void * base, void * obj, void ** ptr, size_t * size, unsigned dst_node) +{ + struct starpu_hdf5_obj * dataObj = (struct starpu_hdf5_obj *) obj; + + starpu_sem_t * finished; + _STARPU_MALLOC(finished, sizeof(*finished)); + starpu_sem_init(finished, 0, 0); + + _starpu_hdf5_protect_start(base); + *size = _starpu_get_size_obj(dataObj); + _starpu_hdf5_protect_stop(base); + + _starpu_malloc_flags_on_node(dst_node, ptr, *size, 0); + + starpu_hdf5_send_work(base, obj, 0, NULL, NULL, 0, *ptr, *size, (void*) finished, FULL_READ); + + return finished; +} + +void * starpu_hdf5_async_full_write (void * base, void * obj, void * ptr, size_t size) +{ + starpu_sem_t * finished; + _STARPU_MALLOC(finished, sizeof(*finished)); + starpu_sem_init(finished, 0, 0); + + starpu_hdf5_send_work(NULL, NULL, 0, base, obj, 0, ptr, size, (void*) finished, FULL_WRITE); + + return finished; +} + +void * starpu_hdf5_copy(void *base_src, void* obj_src, off_t offset_src, void *base_dst, void* obj_dst, off_t offset_dst, size_t size) +{ + starpu_sem_t * finished; + _STARPU_MALLOC(finished, sizeof(*finished)); + starpu_sem_init(finished, 0, 0); + + starpu_hdf5_send_work(base_src, obj_src, offset_src, base_dst, obj_dst, offset_dst, NULL, size, (void*) finished, COPY); + + return finished; +} + +static void starpu_hdf5_free_request(void * event) +{ + starpu_sem_destroy(event); + free(event); +} + +static int get_hdf5_bandwidth_between_disk_and_main_ram(unsigned node, void *base) +{ + unsigned iter; + double timing_slowness, timing_latency; + double start; + double end; + char *buf; + struct starpu_hdf5_base * fileBase = (struct starpu_hdf5_base *) base; + + srand(time(NULL)); + starpu_malloc_flags((void **) &buf, STARPU_DISK_SIZE_MIN, 0); + STARPU_ASSERT(buf != NULL); + + /* allocate memory */ + void *mem = _starpu_disk_alloc(node, STARPU_DISK_SIZE_MIN); + /* fail to alloc */ + if (mem == NULL) + return 0; + + memset(buf, 0, STARPU_DISK_SIZE_MIN); + + /* Measure upload slowness */ + start = starpu_timing_now(); + for (iter = 0; iter < NITER; ++iter) + { + _starpu_disk_write(STARPU_MAIN_RAM, node, mem, buf, 0, STARPU_DISK_SIZE_MIN, NULL); + + } + end = starpu_timing_now(); + timing_slowness = end - start; + + /* free memory */ + starpu_free_flags(buf, STARPU_DISK_SIZE_MIN, 0); + + starpu_malloc_flags((void**) &buf, sizeof(char), 0); + STARPU_ASSERT(buf != NULL); + + *buf = 0; + + /* Measure latency */ + start = starpu_timing_now(); + for (iter = 0; iter < NITER; ++iter) + { + _starpu_disk_write(STARPU_MAIN_RAM, node, mem, buf, rand() % (STARPU_DISK_SIZE_MIN -1) , 1, NULL); + } + end = starpu_timing_now(); + timing_latency = end - start; + + _starpu_disk_free(node, mem, STARPU_DISK_SIZE_MIN); + starpu_free_flags(buf, sizeof(char), 0); + + _starpu_save_bandwidth_and_latency_disk((NITER/timing_slowness)*STARPU_DISK_SIZE_MIN, (NITER/timing_slowness)*STARPU_DISK_SIZE_MIN, + timing_latency/NITER, timing_latency/NITER, node, fileBase->path); + return 1; +} + +struct starpu_disk_ops starpu_disk_hdf5_ops = +{ + .alloc = starpu_hdf5_alloc, + .free = starpu_hdf5_free, + .open = starpu_hdf5_open, + .close = starpu_hdf5_close, + .read = starpu_hdf5_read, + .write = starpu_hdf5_write, + .plug = starpu_hdf5_plug, + .unplug = starpu_hdf5_unplug, + .copy = starpu_hdf5_copy, + .bandwidth = get_hdf5_bandwidth_between_disk_and_main_ram, + .full_read = starpu_hdf5_full_read, + .full_write = starpu_hdf5_full_write, + + .async_read = starpu_hdf5_async_read, + .async_write = starpu_hdf5_async_write, + .async_full_read = starpu_hdf5_async_full_read, + .async_full_write = starpu_hdf5_async_full_write, + .wait_request = starpu_hdf5_wait, + .test_request = starpu_hdf5_test, + .free_request = starpu_hdf5_free_request +}; diff --git a/src/core/disk_ops/disk_leveldb.cpp b/src/core/disk_ops/disk_leveldb.cpp new file mode 100644 index 0000000..86c7316 --- /dev/null +++ b/src/core/disk_ops/disk_leveldb.cpp @@ -0,0 +1,368 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Corentin Salingue + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#define NITER _starpu_calibration_minimum + +/* ------------------- use leveldb to write on disk ------------------- */ + +struct starpu_leveldb_obj +{ + char * key; + size_t size; + starpu_pthread_mutex_t mutex; +}; + +struct starpu_leveldb_base +{ + char *path; + leveldb::DB* db; + /* if StarPU creates the leveldb */ + bool created; +}; + +/* allocation memory on disk */ +static void *starpu_leveldb_alloc(void *base, size_t size STARPU_ATTRIBUTE_UNUSED) +{ + struct starpu_leveldb_base *base_tmp = (struct starpu_leveldb_base *) base; + struct starpu_leveldb_obj *obj = (struct starpu_leveldb_obj *)malloc(sizeof(struct starpu_leveldb_obj)); + STARPU_ASSERT(obj); + + STARPU_PTHREAD_MUTEX_INIT(&obj->mutex, NULL); + + size_t len = 6 + 1 + 2+sizeof(void*)*2 + 1; + char *key = (char *)malloc(len*sizeof(char)); + STARPU_ASSERT(key); + snprintf(key, len, "STARPU-%p", obj); + + /* create and add a key with a small memory */ + leveldb::Status s = base_tmp->db->Put(leveldb::WriteOptions(), key, "a"); + STARPU_ASSERT(s.ok()); + + /* obj->size is the real size in the disk */ + obj->key = key; + obj->size = sizeof(char); + + return (void *) obj; +} + +/* free memory on disk */ +static void starpu_leveldb_free(void *base , void *obj, size_t size STARPU_ATTRIBUTE_UNUSED) +{ + struct starpu_leveldb_obj *tmp = (struct starpu_leveldb_obj *) obj; + struct starpu_leveldb_base *base_tmp = (struct starpu_leveldb_base *) base; + + base_tmp->db->Delete(leveldb::WriteOptions(), tmp->key); + + STARPU_PTHREAD_MUTEX_DESTROY(&tmp->mutex); + + free(tmp->key); + free(tmp); +} + +/* open an existing memory on disk */ +static void *starpu_leveldb_open(void *base STARPU_ATTRIBUTE_UNUSED, void *pos, size_t size) +{ + struct starpu_leveldb_obj *obj = (struct starpu_leveldb_obj *)malloc(sizeof(struct starpu_leveldb_obj)); + STARPU_ASSERT(obj); + + STARPU_PTHREAD_MUTEX_INIT(&obj->mutex, NULL); + + obj->key = strdup((char*) pos); + obj->size = size; + + return (void *) obj; +} + +/* free memory without delete it */ +static void starpu_leveldb_close(void *base STARPU_ATTRIBUTE_UNUSED, void *obj, size_t size STARPU_ATTRIBUTE_UNUSED) +{ + struct starpu_leveldb_obj *tmp = (struct starpu_leveldb_obj *) obj; + + STARPU_PTHREAD_MUTEX_DESTROY(&tmp->mutex); + + free(tmp->key); + free(tmp); +} + +/* in the leveldb, we are obliged to read and to write the entire data + * so, we have to use buffers to have offset and size options */ +static int starpu_leveldb_read(void *base, void *obj, void *buf, off_t offset, size_t size) +{ + struct starpu_leveldb_obj *tmp = (struct starpu_leveldb_obj *) obj; + struct starpu_leveldb_base *base_tmp = (struct starpu_leveldb_base *) base; + + STARPU_PTHREAD_MUTEX_LOCK(&tmp->mutex); + + /* leveldb need a string to store data */ + std::string value; + leveldb::Status s = base_tmp->db->Get(leveldb::ReadOptions(), tmp->key, &value); + uintptr_t value_read = (uintptr_t)(value.c_str()); + + /* use buffer */ + if(s.ok()) + memcpy(buf, (void *) (value_read+offset), size); + else + STARPU_ASSERT(s.ok()); + + STARPU_PTHREAD_MUTEX_UNLOCK(&tmp->mutex); + + return 0; +} + +static int starpu_leveldb_full_read(void *base, void *obj, void **ptr, size_t *size, unsigned dst_node) +{ + struct starpu_leveldb_obj *tmp = (struct starpu_leveldb_obj *) obj; + struct starpu_leveldb_base *base_tmp = (struct starpu_leveldb_base *) base; + + STARPU_PTHREAD_MUTEX_LOCK(&tmp->mutex); + + /* leveldb need a string to store data */ + std::string value; + leveldb::Status s = base_tmp->db->Get(leveldb::ReadOptions(), tmp->key, &value); + + STARPU_ASSERT(s.ok()); + + *size = value.length(); + _starpu_malloc_flags_on_node(dst_node, ptr, *size, 0); + STARPU_ASSERT(*ptr); + + /* use buffer */ + memcpy(*ptr, value.c_str(), *size); + + STARPU_PTHREAD_MUTEX_UNLOCK(&tmp->mutex); + + return 0; +} + +/* write on the memory disk */ +static int starpu_leveldb_write(void *base, void *obj, const void *buf, off_t offset, size_t size) +{ + struct starpu_leveldb_obj *tmp = (struct starpu_leveldb_obj *) obj; + struct starpu_leveldb_base *base_tmp = (struct starpu_leveldb_base *) base; + void *buffer; + leveldb::Status s; + + STARPU_PTHREAD_MUTEX_LOCK(&tmp->mutex); + + if (offset == 0 && size >= tmp->size) + { + /* We overwrite everything, no need to get the old value */ + buffer = (void*) buf; + } + else + { + uintptr_t buf_tmp = (uintptr_t) buf; + buffer = malloc((tmp->size > (offset + size)) ? tmp->size : (offset + size)); + STARPU_ASSERT(buffer); + + /* we read the data */ + std::string value; + + s = base_tmp->db->Get(leveldb::ReadOptions(), tmp->key, &value); + uintptr_t value_read = (uintptr_t)(value.c_str()); + + STARPU_ASSERT(s.ok()); + memcpy(buffer, (void *) value_read, tmp->size); + + /* put the new data on their new place */ + memcpy((void *) ((uintptr_t) buffer + offset), (void *) buf_tmp, size); + } + + /* and write them */ + s = base_tmp->db->Put(leveldb::WriteOptions(), tmp->key, (char *)buffer); + STARPU_ASSERT(s.ok()); + + /* if the new size is higher than the old, we update it - first write after the alloc */ + tmp->size = (tmp->size > size) ? tmp->size : size; + if (buffer != buf) + free(buffer); + STARPU_PTHREAD_MUTEX_UNLOCK(&tmp->mutex); + + return 0; +} + +static int starpu_leveldb_full_write(void *base, void *obj, void *ptr, size_t size) +{ + struct starpu_leveldb_obj *tmp = (struct starpu_leveldb_obj *) obj; + struct starpu_leveldb_base *base_tmp = (struct starpu_leveldb_base *) base; + + /* update file size to achieve correct writes */ + tmp->size = size; + + leveldb::WriteOptions write_options; + write_options.sync = true; + + leveldb::Status s = base_tmp->db->Put(write_options, tmp->key, (char *)ptr); + STARPU_ASSERT(s.ok()); + return 0; +} + +/* create a new copy of parameter == base */ +static void *starpu_leveldb_plug(void *parameter, starpu_ssize_t size STARPU_ATTRIBUTE_UNUSED) +{ + struct starpu_leveldb_base *tmp = (struct starpu_leveldb_base *)malloc(sizeof(struct starpu_leveldb_base)); + STARPU_ASSERT(tmp); + + leveldb::Status status; + leveldb::DB *db; + leveldb::Options options; + options.create_if_missing = true; + + /* try to create the database */ + options.error_if_exists = true; + status = leveldb::DB::Open(options, (char *) parameter, &db); + tmp->created = true; + + /* if it has already been created before */ + if (!status.ok()) + { + options.error_if_exists = false; + status = leveldb::DB::Open(options, (char *) parameter, &db); + STARPU_ASSERT_MSG(status.ok(), "StarPU leveldb plug failed !"); + tmp->created = false; + } + + tmp->db = db; + tmp->path = strdup((const char*) parameter); + STARPU_ASSERT(status.ok()); + return (void *) tmp; +} + +/* free memory allocated for the base */ +static void starpu_leveldb_unplug(void *base) +{ + struct starpu_leveldb_base *base_tmp = (struct starpu_leveldb_base *) base; + if(base_tmp->created) + delete base_tmp->db; + free(base_tmp->path); + free(base); +} + +static int get_leveldb_bandwidth_between_disk_and_main_ram(unsigned node, void *base) +{ + unsigned iter; + double timing_slowness, timing_latency; + double start; + double end; + struct starpu_leveldb_base *base_tmp = (struct starpu_leveldb_base *) base; + + srand(time (NULL)); + char *buf = (char *)malloc(STARPU_DISK_SIZE_MIN*sizeof(char)); + STARPU_ASSERT(buf); + + /* allocate memory */ + void *mem = _starpu_disk_alloc(node, STARPU_DISK_SIZE_MIN); + /* fail to alloc */ + if (mem == NULL) + { + free(buf); + return 0; + } + + /* Measure upload slowness */ + start = starpu_timing_now(); + for (iter = 0; iter < NITER; ++iter) + { + _starpu_disk_write(STARPU_MAIN_RAM, node, mem, buf, 0, STARPU_DISK_SIZE_MIN, NULL); + } + end = starpu_timing_now(); + timing_slowness = end - start; + + + /* free memory */ + free(buf); + + buf = (char *)malloc(sizeof(char)); + STARPU_ASSERT(buf); + + /* Measure latency */ + start = starpu_timing_now(); + for (iter = 0; iter < NITER; ++iter) + { + _starpu_disk_write(STARPU_MAIN_RAM, node, mem, buf, rand() % (STARPU_DISK_SIZE_MIN -1) , 1, NULL); + } + end = starpu_timing_now(); + timing_latency = end - start; + + _starpu_disk_free(node, mem, STARPU_DISK_SIZE_MIN); + free(buf); + + _starpu_save_bandwidth_and_latency_disk((NITER/timing_slowness)*STARPU_DISK_SIZE_MIN, (NITER/timing_slowness)*STARPU_DISK_SIZE_MIN, + timing_latency/NITER, timing_latency/NITER, node, base_tmp->path); + return 1; +} + +#if __cplusplus >= 201103L +struct starpu_disk_ops starpu_disk_leveldb_ops = +{ + .plug = starpu_leveldb_plug, + .unplug = starpu_leveldb_unplug, + .bandwidth = get_leveldb_bandwidth_between_disk_and_main_ram, + .alloc = starpu_leveldb_alloc, + .free = starpu_leveldb_free, + .open = starpu_leveldb_open, + .close = starpu_leveldb_close, + .read = starpu_leveldb_read, + .write = starpu_leveldb_write, + .full_read = starpu_leveldb_full_read, + .full_write = starpu_leveldb_full_write, + .async_write = NULL, + .async_read = NULL, + .async_full_read = NULL, + .async_full_write = NULL, + .copy = NULL, + .wait_request = NULL, + .test_request = NULL, + .free_request = NULL +}; +#else +struct starpu_disk_ops starpu_disk_leveldb_ops = +{ + starpu_leveldb_plug, + starpu_leveldb_unplug, + get_leveldb_bandwidth_between_disk_and_main_ram, + starpu_leveldb_alloc, + starpu_leveldb_free, + starpu_leveldb_open, + starpu_leveldb_close, + starpu_leveldb_read, + starpu_leveldb_write, + starpu_leveldb_full_read, + starpu_leveldb_full_write, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL +}; +#endif diff --git a/src/core/disk_ops/disk_stdio.c b/src/core/disk_ops/disk_stdio.c new file mode 100644 index 0000000..5c6c8bf --- /dev/null +++ b/src/core/disk_ops/disk_stdio.c @@ -0,0 +1,461 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Corentin Salingue + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#ifdef STARPU_HAVE_WINDOWS +# include +#endif + +#define NITER _starpu_calibration_minimum + +#ifndef O_BINARY +#define O_BINARY 0 +#endif + +#define MAX_OPEN_FILES 64 +#define TEMP_HIERARCHY_DEPTH 2 + +/* ------------------- use STDIO to write on disk ------------------- */ +static unsigned starpu_stdio_opened_files; + +struct starpu_stdio_obj +{ + int descriptor; + FILE * file; + char * path; + size_t size; + starpu_pthread_mutex_t mutex; +}; + +struct starpu_stdio_base +{ + char * path; + int created; +}; + +static struct starpu_stdio_obj *_starpu_stdio_init(int descriptor, char *path, size_t size) +{ + struct starpu_stdio_obj *obj; + _STARPU_MALLOC(obj, sizeof(struct starpu_stdio_obj)); + + FILE *f = fdopen(descriptor,"rb+"); + if (f == NULL) + { + free(obj); + return NULL; + } + + STARPU_HG_DISABLE_CHECKING(starpu_stdio_opened_files); + if (starpu_stdio_opened_files >= MAX_OPEN_FILES) + { + /* Too many opened files, avoid keeping this one opened */ + fclose(f); + f = NULL; + descriptor = -1; + } + else + (void) STARPU_ATOMIC_ADD(&starpu_stdio_opened_files, 1); + + STARPU_PTHREAD_MUTEX_INIT(&obj->mutex, NULL); + + obj->descriptor = descriptor; + obj->file = f; + obj->path = path; + obj->size = size; + + return (void *) obj; +} + +static FILE *_starpu_stdio_reopen(struct starpu_stdio_obj *obj) +{ + int id = open(obj->path, O_RDWR); + STARPU_ASSERT(id >= 0); + + FILE *f = fdopen(id,"rb+"); + STARPU_ASSERT(f); + + return f; +} + +static void _starpu_stdio_reclose(FILE *f) +{ + fclose(f); +} + +static void _starpu_stdio_close(struct starpu_stdio_obj *obj) +{ + if (obj->descriptor < 0) + return; + + if (starpu_stdio_opened_files < MAX_OPEN_FILES) + (void) STARPU_ATOMIC_ADD(&starpu_stdio_opened_files, -1); + + fclose(obj->file); +} + +static void _starpu_stdio_fini(struct starpu_stdio_obj *obj) +{ + STARPU_PTHREAD_MUTEX_DESTROY(&obj->mutex); + + free(obj->path); + free(obj); +} + +/* allocation memory on disk */ +static void *starpu_stdio_alloc(void *base, size_t size) +{ + struct starpu_stdio_obj *obj; + struct starpu_stdio_base * fileBase = (struct starpu_stdio_base *) base; + + int id; + char *baseCpy = _starpu_mktemp_many(fileBase->path, TEMP_HIERARCHY_DEPTH, O_RDWR | O_BINARY, &id); + + /* fail */ + if (!baseCpy) + return NULL; + + int val = _starpu_ftruncate(id,size); + /* fail */ + if (val < 0) + { + _STARPU_DISP("Could not truncate file, ftruncate failed with error '%s'\n", strerror(errno)); + close(id); + unlink(baseCpy); + free(baseCpy); + return NULL; + } + + obj = _starpu_stdio_init(id, baseCpy, size); + if (!obj) + { + close(id); + unlink(baseCpy); + free(baseCpy); + } + + return obj; +} + +/* free memory on disk */ +static void starpu_stdio_free(void *base STARPU_ATTRIBUTE_UNUSED, void *obj, size_t size STARPU_ATTRIBUTE_UNUSED) +{ + struct starpu_stdio_obj *tmp = (struct starpu_stdio_obj *) obj; + + _starpu_stdio_close(tmp); + unlink(tmp->path); + _starpu_rmtemp_many(tmp->path, TEMP_HIERARCHY_DEPTH); + _starpu_stdio_fini(tmp); +} + +/* open an existing memory on disk */ +static void *starpu_stdio_open(void *base, void *pos, size_t size) +{ + struct starpu_stdio_base * fileBase = (struct starpu_stdio_base *) base; + struct starpu_stdio_obj *obj; + /* create template */ + char *baseCpy; + _STARPU_MALLOC(baseCpy, strlen(fileBase->path)+1+strlen(pos)+1); + + snprintf(baseCpy, strlen(fileBase->path)+1+strlen(pos)+1, "%s/%s", fileBase->path, (char *)pos); + + int id = open(baseCpy, O_RDWR); + if (id < 0) + { + free(baseCpy); + return NULL; + } + + obj = _starpu_stdio_init(id, baseCpy, size); + if (!obj) + free(baseCpy); + return obj; +} + +/* free memory without delete it */ +static void starpu_stdio_close(void *base STARPU_ATTRIBUTE_UNUSED, void *obj, size_t size STARPU_ATTRIBUTE_UNUSED) +{ + struct starpu_stdio_obj *tmp = (struct starpu_stdio_obj *) obj; + + _starpu_stdio_close(tmp); + _starpu_stdio_fini(tmp); +} + +/* read the memory disk */ +static int starpu_stdio_read(void *base STARPU_ATTRIBUTE_UNUSED, void *obj, void *buf, off_t offset, size_t size) +{ + struct starpu_stdio_obj *tmp = (struct starpu_stdio_obj *) obj; + FILE *f = tmp->file; + + if (f) + STARPU_PTHREAD_MUTEX_LOCK(&tmp->mutex); + else + f = _starpu_stdio_reopen(obj); + + int res = fseek(f, offset, SEEK_SET); + STARPU_ASSERT_MSG(res == 0, "Stdio read failed"); + + starpu_ssize_t nb = fread(buf, 1, size, f); + STARPU_ASSERT_MSG(nb >= 0, "Stdio read failed"); + + if (tmp->file) + STARPU_PTHREAD_MUTEX_UNLOCK(&tmp->mutex); + else + _starpu_stdio_reclose(f); + + return 0; +} + +static int starpu_stdio_full_read(void *base STARPU_ATTRIBUTE_UNUSED, void *obj, void **ptr, size_t *size, unsigned dst_node) +{ + struct starpu_stdio_obj *tmp = (struct starpu_stdio_obj *) obj; + FILE *f = tmp->file; + starpu_ssize_t ssize; + + if (f) + STARPU_PTHREAD_MUTEX_LOCK(&tmp->mutex); + else + f = _starpu_stdio_reopen(obj); + + int res = fseek(f, 0, SEEK_END); + STARPU_ASSERT_MSG(res == 0, "Stdio write failed"); + ssize = ftell(f); + STARPU_ASSERT_MSG(ssize >= 0, "Stdio write failed"); + *size = ssize; + + if (tmp->file) + STARPU_PTHREAD_MUTEX_UNLOCK(&tmp->mutex); + /* Alloc aligned buffer */ + _starpu_malloc_flags_on_node(dst_node, ptr, *size, 0); + if (tmp->file) + STARPU_PTHREAD_MUTEX_LOCK(&tmp->mutex); + + res = fseek(f, 0, SEEK_SET); + STARPU_ASSERT_MSG(res == 0, "Stdio read failed"); + + starpu_ssize_t nb = fread(*ptr, 1, *size, f); + STARPU_ASSERT_MSG(nb >= 0, "Stdio read failed"); + + if (tmp->file) + STARPU_PTHREAD_MUTEX_UNLOCK(&tmp->mutex); + else + _starpu_stdio_reclose(f); + + return 0; +} + +/* write on the memory disk */ +static int starpu_stdio_write(void *base STARPU_ATTRIBUTE_UNUSED, void *obj, const void *buf, off_t offset, size_t size) +{ + struct starpu_stdio_obj *tmp = (struct starpu_stdio_obj *) obj; + FILE *f = tmp->file; + + if (f) + STARPU_PTHREAD_MUTEX_LOCK(&tmp->mutex); + else + f = _starpu_stdio_reopen(obj); + + int res = fseek(f, offset, SEEK_SET); + STARPU_ASSERT_MSG(res == 0, "Stdio write failed"); + + fwrite(buf, 1, size, f); + + if (tmp->file) + STARPU_PTHREAD_MUTEX_UNLOCK(&tmp->mutex); + else + _starpu_stdio_reclose(f); + + return 0; +} + +static int starpu_stdio_full_write(void *base STARPU_ATTRIBUTE_UNUSED, void *obj, void *ptr, size_t size) +{ + struct starpu_stdio_obj *tmp = (struct starpu_stdio_obj *) obj; + FILE *f = tmp->file; + + if (!f) + f = _starpu_stdio_reopen(obj); + + /* update file size to realise the next good full_read */ + if(size != tmp->size) + { + int val = _starpu_fftruncate(f,size); + STARPU_ASSERT(val == 0); + + tmp->size = size; + } + + int res = fseek(f, 0, SEEK_SET); + STARPU_ASSERT_MSG(res == 0, "Stdio write failed"); + + fwrite(ptr, 1, size, f); + + if (!tmp->file) + _starpu_stdio_reclose(f); + + return 0; +} + +static void *starpu_stdio_plug(void *parameter, starpu_ssize_t size STARPU_ATTRIBUTE_UNUSED) +{ + struct starpu_stdio_base * base; + struct stat buf; + + _STARPU_MALLOC(base, sizeof(*base)); + base->created = 0; + base->path = strdup((char *) parameter); + STARPU_ASSERT(base->path); + + if (!(stat(base->path, &buf) == 0 && S_ISDIR(buf.st_mode))) + { + _starpu_mkpath(base->path, S_IRWXU); + base->created = 1; + } + + return (void *) base; +} + +/* free memory allocated for the base */ +static void starpu_stdio_unplug(void *base) +{ + struct starpu_stdio_base * fileBase = (struct starpu_stdio_base *) base; + if (fileBase->created) + rmdir(fileBase->path); + free(fileBase->path); + free(fileBase); +} + +static int get_stdio_bandwidth_between_disk_and_main_ram(unsigned node, void *base) +{ + unsigned iter; + double timing_slowness, timing_latency; + double start; + double end; + char *buf; + struct starpu_stdio_base * fileBase = (struct starpu_stdio_base *) base; + + srand(time(NULL)); + starpu_malloc_flags((void **) &buf, STARPU_DISK_SIZE_MIN, 0); + STARPU_ASSERT(buf != NULL); + + /* allocate memory */ + void *mem = _starpu_disk_alloc(node, STARPU_DISK_SIZE_MIN); + /* fail to alloc */ + if (mem == NULL) + return 0; + struct starpu_stdio_obj *tmp = (struct starpu_stdio_obj *) mem; + + memset(buf, 0, STARPU_DISK_SIZE_MIN); + + /* Measure upload slowness */ + start = starpu_timing_now(); + for (iter = 0; iter < NITER; ++iter) + { + FILE *f = tmp->file; + + _starpu_disk_write(STARPU_MAIN_RAM, node, mem, buf, 0, STARPU_DISK_SIZE_MIN, NULL); + + if (!f) + f = _starpu_stdio_reopen(tmp); + + /* clean cache memory */ + int res = fflush(f); + STARPU_ASSERT_MSG(res == 0, "Slowness computation failed \n"); + +#ifdef STARPU_HAVE_WINDOWS + res = _commit(fileno(f)); +#else + res = fsync(fileno(f)); +#endif + STARPU_ASSERT_MSG(res == 0, "Slowness computation failed \n"); + + if (!tmp->file) + _starpu_stdio_reclose(f); + } + end = starpu_timing_now(); + timing_slowness = end - start; + + /* free memory */ + starpu_free_flags(buf, STARPU_DISK_SIZE_MIN, 0); + + starpu_malloc_flags((void**) &buf, sizeof(char), 0); + STARPU_ASSERT(buf != NULL); + + *buf = 0; + + /* Measure latency */ + start = starpu_timing_now(); + for (iter = 0; iter < NITER; ++iter) + { + FILE *f = tmp->file; + + _starpu_disk_write(STARPU_MAIN_RAM, node, mem, buf, rand() % (STARPU_DISK_SIZE_MIN -1) , 1, NULL); + + if (!f) + f = _starpu_stdio_reopen(tmp); + + int res = fflush(f); + STARPU_ASSERT_MSG(res == 0, "Latency computation failed"); + +#ifdef STARPU_HAVE_WINDOWS + res = _commit(fileno(f)); +#else + res = fsync(fileno(f)); +#endif + STARPU_ASSERT_MSG(res == 0, "Latency computation failed"); + + if (!tmp->file) + _starpu_stdio_reclose(f); + } + end = starpu_timing_now(); + timing_latency = end - start; + + _starpu_disk_free(node, mem, STARPU_DISK_SIZE_MIN); + starpu_free_flags(buf, sizeof(char), 0); + + _starpu_save_bandwidth_and_latency_disk((NITER/timing_slowness)*STARPU_DISK_SIZE_MIN, (NITER/timing_slowness)*STARPU_DISK_SIZE_MIN, + timing_latency/NITER, timing_latency/NITER, node, fileBase->path); + return 1; +} + +struct starpu_disk_ops starpu_disk_stdio_ops = +{ + .alloc = starpu_stdio_alloc, + .free = starpu_stdio_free, + .open = starpu_stdio_open, + .close = starpu_stdio_close, + .read = starpu_stdio_read, + .write = starpu_stdio_write, + .plug = starpu_stdio_plug, + .unplug = starpu_stdio_unplug, + .copy = NULL, + .bandwidth = get_stdio_bandwidth_between_disk_and_main_ram, + .full_read = starpu_stdio_full_read, + .full_write = starpu_stdio_full_write +}; diff --git a/src/core/disk_ops/disk_unistd.c b/src/core/disk_ops/disk_unistd.c new file mode 100644 index 0000000..4d97c1b --- /dev/null +++ b/src/core/disk_ops/disk_unistd.c @@ -0,0 +1,81 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Corentin Salingue + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include + +#include +#ifdef HAVE_UNISTD_H +#include +#endif +#include +#include +#include +#include + +/* ------------------- use UNISTD to write on disk ------------------- */ + +/* allocation memory on disk */ +static void *starpu_unistd_alloc(void *base, size_t size) +{ + struct starpu_unistd_global_obj *obj; + _STARPU_MALLOC(obj, sizeof(struct starpu_unistd_global_obj)); + /* only flags change between unistd and unistd_o_direct */ + obj->flags = O_RDWR | O_BINARY; + return starpu_unistd_global_alloc(obj, base, size); +} + +/* open an existing memory on disk */ +static void *starpu_unistd_open(void *base, void *pos, size_t size) +{ + struct starpu_unistd_global_obj *obj; + _STARPU_MALLOC(obj, sizeof(struct starpu_unistd_global_obj)); + /* only flags change between unistd and unistd_o_direct */ + obj->flags = O_RDWR | O_BINARY; + return starpu_unistd_global_open(obj, base, pos, size); +} + +struct starpu_disk_ops starpu_disk_unistd_ops = +{ + .alloc = starpu_unistd_alloc, + .free = starpu_unistd_global_free, + .open = starpu_unistd_open, + .close = starpu_unistd_global_close, + .read = starpu_unistd_global_read, + .write = starpu_unistd_global_write, + .plug = starpu_unistd_global_plug, + .unplug = starpu_unistd_global_unplug, +#ifdef STARPU_UNISTD_USE_COPY + .copy = starpu_unistd_global_copy, +#else + .copy = NULL, +#endif + .bandwidth = _starpu_get_unistd_global_bandwidth_between_disk_and_main_ram, +#ifdef HAVE_AIO_H + .async_read = starpu_unistd_global_async_read, + .async_write = starpu_unistd_global_async_write, + .async_full_read = starpu_unistd_global_async_full_read, + .async_full_write = starpu_unistd_global_async_full_write, + .wait_request = starpu_unistd_global_wait_request, + .test_request = starpu_unistd_global_test_request, + .free_request = starpu_unistd_global_free_request, +#endif + .full_read = starpu_unistd_global_full_read, + .full_write = starpu_unistd_global_full_write +}; diff --git a/src/core/disk_ops/disk_unistd_o_direct.c b/src/core/disk_ops/disk_unistd_o_direct.c new file mode 100644 index 0000000..a0af6c4 --- /dev/null +++ b/src/core/disk_ops/disk_unistd_o_direct.c @@ -0,0 +1,151 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Corentin Salingue + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include + +#include +#ifdef HAVE_UNISTD_H +#include +#endif +#include +#include +#include +#include + +/* ------------------- use UNISTD to write on disk ------------------- */ + +/* allocation memory on disk */ +static void *starpu_unistd_o_direct_alloc(void *base, size_t size) +{ + struct starpu_unistd_global_obj *obj; + _STARPU_MALLOC(obj, sizeof(struct starpu_unistd_global_obj)); + /* only flags change between unistd and unistd_o_direct */ + obj->flags = O_RDWR | O_DIRECT | O_BINARY; + return starpu_unistd_global_alloc(obj, base, size); +} + +/* open an existing memory on disk */ +static void *starpu_unistd_o_direct_open(void *base, void *pos, size_t size) +{ + struct starpu_unistd_global_obj *obj; + _STARPU_MALLOC(obj, sizeof(struct starpu_unistd_global_obj)); + /* only flags change between unistd and unistd_o_direct */ + obj->flags = O_RDWR | O_DIRECT | O_BINARY; + return starpu_unistd_global_open(obj, base, pos, size); +} + +/* read the memory disk */ +static int starpu_unistd_o_direct_read(void *base, void *obj, void *buf, off_t offset, size_t size) +{ + STARPU_ASSERT_MSG((size % getpagesize()) == 0, "You can only read a multiple of page size %u Bytes (Here %d)", getpagesize(), (int) size); + + STARPU_ASSERT_MSG((((uintptr_t) buf) % getpagesize()) == 0, "You have to use starpu_malloc function to get aligned buffers for the unistd_o_direct variant"); + + return starpu_unistd_global_read(base, obj, buf, offset, size); +} + +/* write on the memory disk */ +static int starpu_unistd_o_direct_write(void *base, void *obj, const void *buf, off_t offset, size_t size) +{ + STARPU_ASSERT_MSG((size % getpagesize()) == 0, "You can only write a multiple of page size %u Bytes (Here %d)", getpagesize(), (int) size); + + STARPU_ASSERT_MSG((((uintptr_t)buf) % getpagesize()) == 0, "You have to use starpu_malloc function to get aligned buffers for the unistd_o_direct variant"); + + return starpu_unistd_global_write(base, obj, buf, offset, size); +} + +/* create a new copy of parameter == base */ +static void *starpu_unistd_o_direct_plug(void *parameter, starpu_ssize_t size) +{ + starpu_malloc_set_align(getpagesize()); + + return starpu_unistd_global_plug(parameter, size); +} + +#if defined(HAVE_AIO_H) || defined(HAVE_LIBAIO_H) +void *starpu_unistd_o_direct_global_async_read(void *base, void *obj, void *buf, off_t offset, size_t size) +{ + STARPU_ASSERT_MSG((size % getpagesize()) == 0, "The unistd_o_direct variant can only read a multiple of page size %lu Bytes (Here %lu). Use the non-o_direct unistd variant if your data is not a multiple of %lu", + (unsigned long) getpagesize(), (unsigned long) size, (unsigned long) getpagesize()); + + STARPU_ASSERT_MSG((((uintptr_t) buf) % getpagesize()) == 0, "You have to use starpu_malloc function to get aligned buffers for the unistd_o_direct variant"); + + return starpu_unistd_global_async_read(base, obj, buf, offset, size); +} + +void *starpu_unistd_o_direct_global_async_write(void *base, void *obj, void *buf, off_t offset, size_t size) +{ + STARPU_ASSERT_MSG((size % getpagesize()) == 0, "The unistd_o_direct variant can only write a multiple of page size %lu Bytes (Here %lu). Use the non-o_direct unistd variant if your data is not a multiple of %lu", + (unsigned long) getpagesize(), (unsigned long) size, (unsigned long) getpagesize()); + + STARPU_ASSERT_MSG((((uintptr_t)buf) % getpagesize()) == 0, "You have to use starpu_malloc function to get aligned buffers for the unistd_o_direct variant"); + + return starpu_unistd_global_async_write(base, obj, buf, offset, size); +} +#endif + +#ifdef STARPU_UNISTD_USE_COPY +void * starpu_unistd_o_direct_global_copy(void *base_src, void* obj_src, off_t offset_src, void *base_dst, void* obj_dst, off_t offset_dst, size_t size) +{ + STARPU_ASSERT_MSG((size % getpagesize()) == 0, "The unistd_o_direct variant can only write a multiple of page size %lu Bytes (Here %lu). Use the non-o_direct unistd variant if your data is not a multiple of %lu", + (unsigned long) getpagesize(), (unsigned long) size, (unsigned long) getpagesize()); + + return starpu_unistd_global_copy(base_src, obj_src, offset_src, base_dst, obj_dst, offset_dst, size); +} +#endif + +int starpu_unistd_o_direct_global_full_write(void *base, void *obj, void *ptr, size_t size) +{ + STARPU_ASSERT_MSG((size % getpagesize()) == 0, "The unistd_o_direct variant can only write a multiple of page size %lu Bytes (Here %lu). Use the non-o_direct unistd variant if your data is not a multiple of %lu", + (unsigned long) getpagesize(), (unsigned long) size, (unsigned long) getpagesize()); + + STARPU_ASSERT_MSG((((uintptr_t)ptr) % getpagesize()) == 0, "You have to use starpu_malloc function to get aligned buffers for the unistd_o_direct variant"); + + return starpu_unistd_global_full_write(base, obj, ptr, size); +} + +struct starpu_disk_ops starpu_disk_unistd_o_direct_ops = +{ + .alloc = starpu_unistd_o_direct_alloc, + .free = starpu_unistd_global_free, + .open = starpu_unistd_o_direct_open, + .close = starpu_unistd_global_close, + .read = starpu_unistd_o_direct_read, + .write = starpu_unistd_o_direct_write, + .plug = starpu_unistd_o_direct_plug, + .unplug = starpu_unistd_global_unplug, +#ifdef STARPU_UNISTD_USE_COPY + .copy = starpu_unistd_o_direct_global_copy, +#else + .copy = NULL, +#endif + .bandwidth = _starpu_get_unistd_global_bandwidth_between_disk_and_main_ram, +#if defined(HAVE_AIO_H) || defined(HAVE_LIBAIO_H) + .async_read = starpu_unistd_o_direct_global_async_read, + .async_write = starpu_unistd_o_direct_global_async_write, + .wait_request = starpu_unistd_global_wait_request, + .test_request = starpu_unistd_global_test_request, + .free_request = starpu_unistd_global_free_request, + .async_full_read = starpu_unistd_global_async_full_read, + .async_full_write = starpu_unistd_global_async_full_write, +#endif + .full_read = starpu_unistd_global_full_read, + .full_write = starpu_unistd_o_direct_global_full_write +}; diff --git a/src/core/disk_ops/unistd/disk_unistd_global.c b/src/core/disk_ops/unistd/disk_unistd_global.c new file mode 100644 index 0000000..040b806 --- /dev/null +++ b/src/core/disk_ops/unistd/disk_unistd_global.c @@ -0,0 +1,1122 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Corentin Salingue + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include + +#include +#if defined(HAVE_LIBAIO_H) +#include +#elif defined(HAVE_AIO_H) +#include +#endif +#ifdef HAVE_UNISTD_H +# include +#endif +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef STARPU_HAVE_WINDOWS +# include +#endif + +#define NITER _starpu_calibration_minimum + +#ifdef O_DIRECT +# define MEM_SIZE getpagesize() +#else +# define MEM_SIZE 1 +#endif + +#define MAX_OPEN_FILES 64 +#define TEMP_HIERARCHY_DEPTH 2 + +#if !defined(HAVE_COPY_FILE_RANGE) && defined(__linux__) && defined(__NR_copy_file_range) +static starpu_ssize_t copy_file_range(int fd_in, loff_t *off_in, int fd_out, + loff_t *off_out, size_t len, unsigned int flags) +{ + return syscall(__NR_copy_file_range, fd_in, off_in, fd_out, + off_out, len, flags); +} +#endif + +static unsigned starpu_unistd_opened_files; + +#ifdef STARPU_UNISTD_USE_COPY +LIST_TYPE(starpu_unistd_work_copy, + int fd_src; + int fd_dst; + starpu_loff_t off_src; + starpu_loff_t off_dst; + struct starpu_unistd_global_obj * obj_src; + struct starpu_unistd_global_obj * obj_dst; + size_t len; + unsigned flags; + starpu_sem_t finished; +); + +struct starpu_unistd_copy_thread +{ + int run; + starpu_pthread_t thread; + starpu_pthread_cond_t cond; + starpu_pthread_mutex_t mutex; + struct starpu_unistd_work_copy_list list; +}; + +static struct starpu_unistd_copy_thread copy_thread[STARPU_MAXNODES][STARPU_MAXNODES]; +static unsigned starpu_unistd_nb_disk_opened = 0; +/* copy_file_range syscall can return ENOSYS. Use global var to catch + * and prevent StarPU using direct disk to disk copy */ +static int starpu_unistd_copy_works = 1; +#endif + +struct starpu_unistd_base +{ + char * path; + int created; + /* To know which thread handles the copy function */ +#ifdef STARPU_UNISTD_USE_COPY + unsigned disk_index; +#endif +#if defined(HAVE_LIBAIO_H) + io_context_t ctx; + struct starpu_unistd_aiocb_link * hashtable; + starpu_pthread_mutex_t mutex; +#endif +}; + +#if defined(HAVE_LIBAIO_H) +struct starpu_unistd_aiocb_link +{ + UT_hash_handle hh; + void * starpu_aiocb; + void * aiocb; +}; +struct starpu_unistd_aiocb +{ + int finished; + struct iocb iocb; + struct starpu_unistd_global_obj *obj; + struct starpu_unistd_base *base; + size_t len; +}; +#elif defined(HAVE_AIO_H) +struct starpu_unistd_aiocb +{ + struct aiocb aiocb; + struct starpu_unistd_global_obj *obj; +}; +#endif + +enum starpu_unistd_wait_type { STARPU_UNISTD_AIOCB, STARPU_UNISTD_COPY }; + +union starpu_unistd_wait_event +{ + struct starpu_unistd_work_copy * event_copy; +#if defined(HAVE_LIBAIO_H) || defined(HAVE_AIO_H) + struct starpu_unistd_aiocb event_aiocb; +#endif +}; + +struct starpu_unistd_wait +{ + enum starpu_unistd_wait_type type; + union starpu_unistd_wait_event event; +}; + +/* ------------------- use UNISTD to write on disk ------------------- */ + +static void _starpu_unistd_init(struct starpu_unistd_global_obj *obj, int descriptor, char *path, size_t size) +{ + STARPU_HG_DISABLE_CHECKING(starpu_unistd_opened_files); +#ifdef STARPU_UNISTD_USE_COPY + STARPU_HG_DISABLE_CHECKING(starpu_unistd_copy_works); +#endif + if (starpu_unistd_opened_files >= MAX_OPEN_FILES) + { + /* Too many opened files, avoid keeping this one opened */ + close(descriptor); + descriptor = -1; + } + else + (void) STARPU_ATOMIC_ADD(&starpu_unistd_opened_files, 1); + + STARPU_PTHREAD_MUTEX_INIT(&obj->mutex, NULL); + + obj->descriptor = descriptor; + obj->path = path; + obj->size = size; +} + +static int _starpu_unistd_reopen(struct starpu_unistd_global_obj *obj) +{ + int id = open(obj->path, obj->flags); + STARPU_ASSERT_MSG(id >= 0, "Reopening file %s failed: errno %d", obj->path, errno); + return id; +} + +static void _starpu_unistd_reclose(int id) +{ + close(id); +} + +static void _starpu_unistd_close(struct starpu_unistd_global_obj *obj) +{ + if (obj->descriptor < 0) + return; + + if (starpu_unistd_opened_files < MAX_OPEN_FILES) + (void) STARPU_ATOMIC_ADD(&starpu_unistd_opened_files, -1); + + close(obj->descriptor); +} + +static void _starpu_unistd_fini(struct starpu_unistd_global_obj *obj) +{ + STARPU_PTHREAD_MUTEX_DESTROY(&obj->mutex); + + free(obj->path); + obj->path = NULL; + free(obj); +} + +/* allocation memory on disk */ +void *starpu_unistd_global_alloc(struct starpu_unistd_global_obj *obj, void *base, size_t size) +{ + int id; + struct starpu_unistd_base * fileBase = (struct starpu_unistd_base *) base; + char *baseCpy = _starpu_mktemp_many(fileBase->path, TEMP_HIERARCHY_DEPTH, obj->flags, &id); + + /* fail */ + if (!baseCpy) + { + free(obj); + return NULL; + } + + int val = _starpu_ftruncate(id,size); + /* fail */ + if (val < 0) + { + _STARPU_DISP("Could not truncate file, ftruncate failed with error '%s'\n", strerror(errno)); + close(id); + unlink(baseCpy); + free(baseCpy); + free(obj); + return NULL; + } + + _starpu_unistd_init(obj, id, baseCpy, size); + + return obj; +} + +/* free memory on disk */ +void starpu_unistd_global_free(void *base STARPU_ATTRIBUTE_UNUSED, void *obj, size_t size STARPU_ATTRIBUTE_UNUSED) +{ + struct starpu_unistd_global_obj *tmp = (struct starpu_unistd_global_obj *) obj; + + _starpu_unistd_close(tmp); + unlink(tmp->path); + _starpu_rmtemp_many(tmp->path, TEMP_HIERARCHY_DEPTH); + _starpu_unistd_fini(tmp); +} + +/* open an existing memory on disk */ +void *starpu_unistd_global_open(struct starpu_unistd_global_obj *obj, void *base, void *pos, size_t size) +{ + struct starpu_unistd_base *fileBase = (struct starpu_unistd_base *) base; + /* create template */ + char *baseCpy; + _STARPU_MALLOC(baseCpy, strlen(fileBase->path)+1+strlen(pos)+1); + + snprintf(baseCpy, strlen(fileBase->path)+1+strlen(pos)+1, "%s/%s", fileBase->path, (char *)pos); + + int id = open(baseCpy, obj->flags); + if (id < 0) + { + free(obj); + free(baseCpy); + return NULL; + } + + _starpu_unistd_init(obj, id, baseCpy, size); + + return obj; +} + +/* free memory without delete it */ +void starpu_unistd_global_close(void *base STARPU_ATTRIBUTE_UNUSED, void *obj, size_t size STARPU_ATTRIBUTE_UNUSED) +{ + struct starpu_unistd_global_obj *tmp = (struct starpu_unistd_global_obj *) obj; + + _starpu_unistd_close(tmp); + _starpu_unistd_fini(tmp); +} + +/* read the memory disk */ +int starpu_unistd_global_read(void *base STARPU_ATTRIBUTE_UNUSED, void *obj, void *buf, off_t offset, size_t size) +{ + struct starpu_unistd_global_obj *tmp = (struct starpu_unistd_global_obj *) obj; + starpu_ssize_t nb; + int fd = tmp->descriptor; + starpu_ssize_t bytes_to_write = size; + +#ifdef HAVE_PREAD + if (fd >= 0) + { + while (bytes_to_write > 0) + { + nb = pread(fd, buf, bytes_to_write, offset); + STARPU_ASSERT_MSG(nb >= 0, "Starpu Disk unistd pread failed: size %lu got errno %d", (unsigned long) size, errno); + bytes_to_write -= nb; + buf = (char*) buf + nb; + offset += nb; + } + } + else +#endif + { + if (tmp->descriptor >= 0) + STARPU_PTHREAD_MUTEX_LOCK(&tmp->mutex); + else + fd = _starpu_unistd_reopen(obj); + + int res = lseek(fd, offset, SEEK_SET); + STARPU_ASSERT_MSG(res >= 0, "Starpu Disk unistd lseek for read failed: offset %lu got errno %d", (unsigned long) offset, errno); + + while (bytes_to_write > 0) + { + nb = read(fd, buf, bytes_to_write); + STARPU_ASSERT_MSG(nb >= 0, "Starpu Disk unistd read failed: offset %lu got errno %d", (unsigned long) offset, errno); + bytes_to_write -= nb; + buf = (char*) buf + nb; + offset += nb; + } + + if (tmp->descriptor >= 0) + STARPU_PTHREAD_MUTEX_UNLOCK(&tmp->mutex); + else + _starpu_unistd_reclose(fd); + + } + + return 0; +} + +#if defined(HAVE_LIBAIO_H) +void *starpu_unistd_global_async_read(void *base, void *obj, void *buf, off_t offset, size_t size) +{ + struct starpu_unistd_base * fileBase = (struct starpu_unistd_base *) base; + struct starpu_unistd_global_obj *tmp = obj; + struct starpu_unistd_wait * event; + _STARPU_CALLOC(event, 1,sizeof(*event)); + event->type = STARPU_UNISTD_AIOCB; + struct starpu_unistd_aiocb *starpu_aiocb = &event->event.event_aiocb; + struct iocb *iocb = &starpu_aiocb->iocb; + starpu_aiocb->obj = obj; + int fd = tmp->descriptor; + int err; + + if (fd < 0) + fd = _starpu_unistd_reopen(obj); + + starpu_aiocb->len = size; + starpu_aiocb->finished = 0; + starpu_aiocb->base = fileBase; + io_prep_pread(iocb, fd, buf, size, offset); + if ((err = io_submit(fileBase->ctx, 1, &iocb)) < 0) + { + _STARPU_DISP("Warning: io_submit returned %d (%s)\n", err, strerror(err)); + if (tmp->descriptor < 0) + _starpu_unistd_reclose(fd); + iocb = NULL; + } + + struct starpu_unistd_aiocb_link *l; + _STARPU_MALLOC(l, sizeof(*l)); + l->aiocb = iocb; + l->starpu_aiocb = starpu_aiocb; + STARPU_PTHREAD_MUTEX_LOCK(&fileBase->mutex); + HASH_ADD_PTR(fileBase->hashtable, aiocb, l); + STARPU_PTHREAD_MUTEX_UNLOCK(&fileBase->mutex); + + return event; +} +#elif defined(HAVE_AIO_H) +void *starpu_unistd_global_async_read(void *base STARPU_ATTRIBUTE_UNUSED, void *obj, void *buf, off_t offset, size_t size) +{ + struct starpu_unistd_global_obj *tmp = obj; + struct starpu_unistd_wait * event; + _STARPU_CALLOC(event, 1,sizeof(*event)); + event->type = STARPU_UNISTD_AIOCB; + struct starpu_unistd_aiocb *starpu_aiocb = &event->event.event_aiocb; + struct aiocb *aiocb = &starpu_aiocb->aiocb; + starpu_aiocb->obj = obj; + int fd = tmp->descriptor; + + if (fd < 0) + fd = _starpu_unistd_reopen(obj); + + aiocb->aio_fildes = fd; + aiocb->aio_offset = offset; + aiocb->aio_nbytes = size; + aiocb->aio_buf = buf; + aiocb->aio_reqprio = 0; + aiocb->aio_lio_opcode = LIO_NOP; + + if (aio_read(aiocb) < 0) + { + _STARPU_DISP("Warning: aio_read returned %d (%s)\n", errno, strerror(errno)); + if (tmp->descriptor < 0) + _starpu_unistd_reclose(fd); + } + + return event; +} +#endif + +int starpu_unistd_global_full_read(void *base STARPU_ATTRIBUTE_UNUSED, void *obj, void **ptr, size_t *size, unsigned dst_node) +{ + struct starpu_unistd_global_obj *tmp = (struct starpu_unistd_global_obj *) obj; + int fd = tmp->descriptor; + + if (fd < 0) + fd = _starpu_unistd_reopen(obj); +#ifdef STARPU_HAVE_WINDOWS + *size = _filelength(fd); +#else + struct stat st; + int ret = fstat(fd, &st); + STARPU_ASSERT(ret==0); + + *size = st.st_size; +#endif + if (tmp->descriptor < 0) + _starpu_unistd_reclose(fd); + + /* Allocated aligned buffer */ + _starpu_malloc_flags_on_node(dst_node, ptr, *size, 0); + return starpu_unistd_global_read(base, obj, *ptr, 0, *size); +} + +/* write on the memory disk */ +int starpu_unistd_global_write(void *base STARPU_ATTRIBUTE_UNUSED, void *obj, const void *buf, off_t offset, size_t size) +{ + struct starpu_unistd_global_obj *tmp = (struct starpu_unistd_global_obj *) obj; + starpu_ssize_t res; + int fd = tmp->descriptor; + starpu_ssize_t bytes_to_write = size; + +#ifdef HAVE_PWRITE + if (fd >= 0) + { + while (bytes_to_write > 0) + { + res = pwrite(fd, buf, bytes_to_write, offset); + STARPU_ASSERT_MSG(res >= 0, "Starpu Disk unistd pwrite failed: offset %lu got errno %d", (unsigned long) offset, errno); + bytes_to_write -= res; + buf = (char*) buf + res; + offset += res; + } + } + else +#endif + { + if (tmp->descriptor >= 0) + STARPU_PTHREAD_MUTEX_LOCK(&tmp->mutex); + else + fd = _starpu_unistd_reopen(obj); + + res = lseek(fd, offset, SEEK_SET); + STARPU_ASSERT_MSG(res >= 0, "Starpu Disk unistd lseek for write failed: offset %lu got errno %d", (unsigned long) offset, errno); + + while (bytes_to_write > 0) + { + res = write(fd, buf, bytes_to_write); + STARPU_ASSERT_MSG(res >= 0, "Starpu Disk unistd write failed: offset %lu got errno %d", (unsigned long) offset, errno); + bytes_to_write -= res; + buf = (char*) buf + res; + offset += res; + } + + if (tmp->descriptor >= 0) + STARPU_PTHREAD_MUTEX_UNLOCK(&tmp->mutex); + else + _starpu_unistd_reclose(fd); + } + + return 0; +} + +#if defined(HAVE_LIBAIO_H) +void *starpu_unistd_global_async_write(void *base, void *obj, void *buf, off_t offset, size_t size) +{ + struct starpu_unistd_base * fileBase = (struct starpu_unistd_base *) base; + struct starpu_unistd_global_obj *tmp = obj; + struct starpu_unistd_wait * event; + _STARPU_CALLOC(event, 1,sizeof(*event)); + event->type = STARPU_UNISTD_AIOCB; + struct starpu_unistd_aiocb *starpu_aiocb = &event->event.event_aiocb; + struct iocb *iocb = &starpu_aiocb->iocb; + starpu_aiocb->obj = obj; + int fd = tmp->descriptor; + int err; + + if (fd < 0) + fd = _starpu_unistd_reopen(obj); + + starpu_aiocb->len = size; + starpu_aiocb->finished = 0; + starpu_aiocb->base = fileBase; + io_prep_pwrite(iocb, fd, buf, size, offset); + if ((err = io_submit(fileBase->ctx, 1, &iocb)) < 0) + { + _STARPU_DISP("Warning: io_submit returned %d (%s)\n", err, strerror(err)); + if (tmp->descriptor < 0) + _starpu_unistd_reclose(fd); + iocb = NULL; + } + + struct starpu_unistd_aiocb_link *l; + _STARPU_MALLOC(l, sizeof(*l)); + l->aiocb = iocb; + l->starpu_aiocb = starpu_aiocb; + STARPU_PTHREAD_MUTEX_LOCK(&fileBase->mutex); + HASH_ADD_PTR(fileBase->hashtable, aiocb, l); + STARPU_PTHREAD_MUTEX_UNLOCK(&fileBase->mutex); + + return event; +} +#elif defined(HAVE_AIO_H) +void *starpu_unistd_global_async_write(void *base STARPU_ATTRIBUTE_UNUSED, void *obj, void *buf, off_t offset, size_t size) +{ + struct starpu_unistd_global_obj *tmp = obj; + struct starpu_unistd_wait * event; + _STARPU_CALLOC(event, 1,sizeof(*event)); + event->type = STARPU_UNISTD_AIOCB; + struct starpu_unistd_aiocb *starpu_aiocb = &event->event.event_aiocb; + struct aiocb *aiocb = &starpu_aiocb->aiocb; + starpu_aiocb->obj = obj; + int fd = tmp->descriptor; + + if (fd < 0) + fd = _starpu_unistd_reopen(obj); + + aiocb->aio_fildes = fd; + aiocb->aio_offset = offset; + aiocb->aio_nbytes = size; + aiocb->aio_buf = buf; + aiocb->aio_reqprio = 0; + aiocb->aio_lio_opcode = LIO_NOP; + + if (aio_write(aiocb) < 0) + { + _STARPU_DISP("Warning: aio_write returned %d (%s)\n", errno, strerror(errno)); + if (tmp->descriptor < 0) + _starpu_unistd_reclose(fd); + aiocb = NULL; + } + + return event; +} +#endif + +int starpu_unistd_global_full_write(void *base STARPU_ATTRIBUTE_UNUSED, void *obj, void *ptr, size_t size) +{ + struct starpu_unistd_global_obj *tmp = (struct starpu_unistd_global_obj *) obj; + + /* update file size to realise the next good full_read */ + if(size != tmp->size) + { + int fd = tmp->descriptor; + + if (fd < 0) + fd = _starpu_unistd_reopen(obj); + int val = _starpu_ftruncate(fd,size); + if (tmp->descriptor < 0) + _starpu_unistd_reclose(fd); + STARPU_ASSERT(val == 0); + tmp->size = size; + } + + return starpu_unistd_global_write(base, obj, ptr, 0, size); +} + +#if defined(HAVE_AIO_H) +void * starpu_unistd_global_async_full_read (void * base, void * obj, void ** ptr, size_t * size, unsigned dst_node) +{ + struct starpu_unistd_global_obj *tmp = (struct starpu_unistd_global_obj *) obj; + int fd = tmp->descriptor; + + if (fd < 0) + fd = _starpu_unistd_reopen(obj); +#ifdef STARPU_HAVE_WINDOWS + *size = _filelength(fd); +#else + struct stat st; + int ret = fstat(fd, &st); + STARPU_ASSERT(ret==0); + + *size = st.st_size; +#endif +#ifdef STARPU_LINUX_SYS + /* on Linux, read() (and similar system calls) will transfer at most 0x7ffff000 bytes, see read(2) */ + /* FIXME: make starpu_unistd_global_test_request and starpu_unistd_global_wait_request + * resubmit an updated request whenever the request completion is truncated */ + if (*size > 0x7ffff000) + return NULL; +#endif + + if (tmp->descriptor < 0) + _starpu_unistd_reclose(fd); + + /* Allocated aligned buffer */ + _starpu_malloc_flags_on_node(dst_node, ptr, *size, 0); + return starpu_unistd_global_async_read(base, obj, *ptr, 0, *size); +} + +void * starpu_unistd_global_async_full_write (void * base, void * obj, void * ptr, size_t size) +{ + struct starpu_unistd_global_obj *tmp = (struct starpu_unistd_global_obj *) obj; + +#ifdef STARPU_LINUX_SYS + /* on Linux, write() (and similar system calls) will transfer at most 0x7ffff000 bytes, see write(2) */ + /* FIXME: make starpu_unistd_global_test_request and starpu_unistd_global_wait_request + * resubmit an updated request whenever the request completion is truncated */ + if (size > 0x7ffff000) + return NULL; +#endif + + /* update file size to realise the next good full_read */ + if(size != tmp->size) + { + int fd = tmp->descriptor; + + if (fd < 0) + fd = _starpu_unistd_reopen(obj); + int val = _starpu_ftruncate(fd,size); + if (tmp->descriptor < 0) + _starpu_unistd_reclose(fd); + STARPU_ASSERT(val == 0); + tmp->size = size; + } + + return starpu_unistd_global_async_write(base, obj, ptr, 0, size); +} +#endif + +#ifdef STARPU_UNISTD_USE_COPY +static void * starpu_unistd_internal_thread(void * arg) +{ + struct starpu_unistd_copy_thread * internal_copy_thread = (struct starpu_unistd_copy_thread *) arg; + + while (internal_copy_thread->run || !starpu_unistd_work_copy_list_empty(&internal_copy_thread->list)) + { + STARPU_PTHREAD_MUTEX_LOCK(&internal_copy_thread->mutex); + if (internal_copy_thread->run && starpu_unistd_work_copy_list_empty(&internal_copy_thread->list)) + STARPU_PTHREAD_COND_WAIT(&internal_copy_thread->cond, &internal_copy_thread->mutex); + STARPU_PTHREAD_MUTEX_UNLOCK(&internal_copy_thread->mutex); + + if (!starpu_unistd_work_copy_list_empty(&internal_copy_thread->list)) + { + STARPU_PTHREAD_MUTEX_LOCK(&internal_copy_thread->mutex); + struct starpu_unistd_work_copy * work = starpu_unistd_work_copy_list_pop_back(&internal_copy_thread->list); + STARPU_PTHREAD_MUTEX_UNLOCK(&internal_copy_thread->mutex); + + starpu_ssize_t ret = copy_file_range(work->fd_src, &work->off_src, work->fd_dst, &work->off_dst, work->len, work->flags); + + if (ret == -1 && (errno == ENOSYS || errno == EINVAL)) + { + void *buf; + /* System call not supported, or glibc + * compatibility layer does not work (e.g. + * because we use O_DIRECT and glibc doesn't + * align the buffer), avoid submitting more + * copies. */ + starpu_unistd_copy_works = 0; + + /* And do the copy by hand for this time */ + starpu_malloc(&buf, work->len); + ret = pread(work->fd_src, buf, work->len, work->off_src); + STARPU_ASSERT_MSG(ret >= 0, "Reading failed (errno %d)", errno); + STARPU_ASSERT_MSG((size_t) ret == work->len, "Reading failed (value %ld instead of %ld)", (long)ret, (long)work->len); + ret = pwrite(work->fd_dst, buf, work->len, work->off_dst); + STARPU_ASSERT_MSG(ret >= 0, "Writing failed (errno %d)", errno); + STARPU_ASSERT_MSG((size_t) ret == work->len, "Writing failed (value %ld instead of %ld)", (long)ret, (long)work->len); + starpu_free_noflag(buf, work->len); + } + else + { + STARPU_ASSERT_MSG(ret >= 0, "Copy_file_range failed (errno %d)", errno); + STARPU_ASSERT_MSG((size_t) ret == work->len, "Copy_file_range failed (value %ld instead of %ld)", (long)ret, (long)work->len); + } + + starpu_sem_post(&work->finished); + + /* Don't free work, it's done when tested/waited are completed */ + } + + } + + return NULL; +} + +static void initialize_working_thread(struct starpu_unistd_copy_thread *internal_copy_thread) +{ + STARPU_PTHREAD_MUTEX_INIT(&internal_copy_thread->mutex, NULL); + STARPU_PTHREAD_COND_INIT(&internal_copy_thread->cond, NULL); + internal_copy_thread->run = 1; + starpu_unistd_work_copy_list_init(&internal_copy_thread->list); + STARPU_PTHREAD_CREATE(&internal_copy_thread->thread, NULL, starpu_unistd_internal_thread, internal_copy_thread); +} +#endif + +/* create a new copy of parameter == base */ +void *starpu_unistd_global_plug(void *parameter, starpu_ssize_t size STARPU_ATTRIBUTE_UNUSED) +{ + struct starpu_unistd_base * base; + struct stat buf; + + _STARPU_MALLOC(base, sizeof(*base)); + base->created = 0; + base->path = strdup((char *) parameter); + STARPU_ASSERT(base->path); + + if (!(stat(base->path, &buf) == 0 && S_ISDIR(buf.st_mode))) + { + _starpu_mkpath(base->path, S_IRWXU); + base->created = 1; + } + +#if defined(HAVE_LIBAIO_H) + STARPU_PTHREAD_MUTEX_INIT(&base->mutex, NULL); + base->hashtable = NULL; + unsigned nb_event = MAX_PENDING_REQUESTS_PER_NODE + MAX_PENDING_PREFETCH_REQUESTS_PER_NODE + MAX_PENDING_IDLE_REQUESTS_PER_NODE; + memset(&base->ctx, 0, sizeof(base->ctx)); + int ret = io_setup(nb_event, &base->ctx); + STARPU_ASSERT(ret == 0); +#endif + +#ifdef STARPU_UNISTD_USE_COPY + base->disk_index = starpu_unistd_nb_disk_opened; + starpu_unistd_nb_disk_opened++; + + unsigned i; + for (i = 0; i < starpu_unistd_nb_disk_opened; i++) + { + initialize_working_thread(©_thread[i][base->disk_index]); + /* don't initialize twice this case */ + if (i != base->disk_index) + initialize_working_thread(©_thread[base->disk_index][i]); + } +#endif + + return (void *) base; +} + +#ifdef STARPU_UNISTD_USE_COPY +static void ending_working_thread(struct starpu_unistd_copy_thread *internal_copy_thread) +{ + STARPU_PTHREAD_MUTEX_LOCK(&internal_copy_thread->mutex); + internal_copy_thread->run = 0; + STARPU_PTHREAD_COND_BROADCAST(&internal_copy_thread->cond); + STARPU_PTHREAD_MUTEX_UNLOCK(&internal_copy_thread->mutex); + + STARPU_PTHREAD_JOIN(internal_copy_thread->thread, NULL); + + STARPU_PTHREAD_MUTEX_DESTROY(&internal_copy_thread->mutex); + STARPU_PTHREAD_COND_DESTROY(&internal_copy_thread->cond); +} +#endif + +/* free memory allocated for the base */ +void starpu_unistd_global_unplug(void *base) +{ + struct starpu_unistd_base * fileBase = (struct starpu_unistd_base *) base; +#if defined(HAVE_LIBAIO_H) + STARPU_PTHREAD_MUTEX_DESTROY(&fileBase->mutex); + io_destroy(fileBase->ctx); +#endif + if (fileBase->created) + rmdir(fileBase->path); + +#ifdef STARPU_UNISTD_USE_COPY + unsigned i; + for (i = 0; i < fileBase->disk_index+1; i++) + { + ending_working_thread(©_thread[i][fileBase->disk_index]); + /* don't uninitialize twice this case */ + if (i != fileBase->disk_index) + ending_working_thread(©_thread[fileBase->disk_index][i]); + } + starpu_unistd_nb_disk_opened--; + +#endif + + free(fileBase->path); + free(fileBase); +} + +int _starpu_get_unistd_global_bandwidth_between_disk_and_main_ram(unsigned node, void *base) +{ + int res; + unsigned iter; + double timing_slowness, timing_latency; + double start; + double end; + struct starpu_unistd_base * fileBase = (struct starpu_unistd_base *) base; + + srand(time(NULL)); + char *buf; + starpu_malloc_flags((void *) &buf, STARPU_DISK_SIZE_MIN, 0); + STARPU_ASSERT(buf != NULL); + memset(buf, 0, STARPU_DISK_SIZE_MIN); + + /* allocate memory */ + void *mem = _starpu_disk_alloc(node, STARPU_DISK_SIZE_MIN); + /* fail to alloc */ + if (mem == NULL) + return 0; + + struct starpu_unistd_global_obj *tmp = (struct starpu_unistd_global_obj *) mem; + + /* Measure upload slowness */ + start = starpu_timing_now(); + for (iter = 0; iter < NITER; ++iter) + { + int fd = tmp->descriptor; + + _starpu_disk_write(STARPU_MAIN_RAM, node, mem, buf, 0, STARPU_DISK_SIZE_MIN, NULL); + + if (fd < 0) + fd = _starpu_unistd_reopen(tmp); +#ifdef STARPU_HAVE_WINDOWS + res = _commit(fd); +#else + res = fsync(fd); +#endif + if (tmp->descriptor < 0) + _starpu_unistd_reclose(fd); + + STARPU_ASSERT_MSG(res == 0, "bandwidth computation failed"); + } + end = starpu_timing_now(); + timing_slowness = end - start; + + /* free memory */ + starpu_free_flags(buf, STARPU_DISK_SIZE_MIN, 0); + + starpu_malloc_flags((void *) &buf, MEM_SIZE, 0); + STARPU_ASSERT(buf != NULL); + + memset(buf, 0, MEM_SIZE); + + /* Measure latency */ + start = starpu_timing_now(); + for (iter = 0; iter < NITER; ++iter) + { + int fd = tmp->descriptor; + + _starpu_disk_write(STARPU_MAIN_RAM, node, mem, buf, (rand() % (STARPU_DISK_SIZE_MIN/MEM_SIZE)) * MEM_SIZE, MEM_SIZE, NULL); + + if (fd < 0) + fd = _starpu_unistd_reopen(tmp); +#ifdef STARPU_HAVE_WINDOWS + res = _commit(fd); +#else + res = fsync(fd); +#endif + if (tmp->descriptor < 0) + _starpu_unistd_reclose(fd); + + STARPU_ASSERT_MSG(res == 0, "Latency computation failed"); + } + end = starpu_timing_now(); + timing_latency = end - start; + + _starpu_disk_free(node, mem, STARPU_DISK_SIZE_MIN); + starpu_free_flags(buf, MEM_SIZE, 0); + + _starpu_save_bandwidth_and_latency_disk((NITER/timing_slowness)*STARPU_DISK_SIZE_MIN, (NITER/timing_slowness)*STARPU_DISK_SIZE_MIN, + timing_latency/NITER, timing_latency/NITER, node, fileBase->path); + return 1; +} + +void starpu_unistd_global_wait_request(void *async_channel) +{ + struct starpu_unistd_wait * event = async_channel; + switch (event->type) + { + case STARPU_UNISTD_AIOCB : + { +#if defined(HAVE_LIBAIO_H) + struct starpu_unistd_aiocb *starpu_aiocb = &event->event.event_aiocb; + struct io_event ev; + + int values = -1; + int myerrno = EAGAIN; + while(!starpu_aiocb->finished || (values <= 0 && (myerrno == EAGAIN || myerrno == EINTR))) + { + /* Wait the answer of the request timeout IS NULL */ + values = io_getevents(starpu_aiocb->base->ctx, 1, 1, &ev, NULL); + if (values < 0) + myerrno = -values; + if (values > 0) + { + //we may catch an other request... + STARPU_PTHREAD_MUTEX_LOCK(&starpu_aiocb->base->mutex); + + struct starpu_unistd_aiocb_link *l = NULL; + HASH_FIND_PTR(starpu_aiocb->base->hashtable, &ev.obj, l); + STARPU_ASSERT(l != NULL); + + HASH_DEL(starpu_aiocb->base->hashtable, l); + STARPU_PTHREAD_MUTEX_UNLOCK(&starpu_aiocb->base->mutex); + struct starpu_unistd_aiocb *aiocb = l->starpu_aiocb; + STARPU_ASSERT_MSG(ev.res == aiocb->len, "Aio request was truncated"); + aiocb->finished = 1; + free(l); + } + } +#elif defined(HAVE_AIO_H) + struct starpu_unistd_aiocb *starpu_aiocb = &event->event.event_aiocb; + struct aiocb *aiocb = &starpu_aiocb->aiocb; + int values = -1; + int ret, myerrno = EAGAIN; + starpu_ssize_t size; + while(values < 0 && (myerrno == EAGAIN || myerrno == EINTR)) + { + /* Wait the answer of the request TIMESTAMP IS NULL */ + values = aio_suspend((const struct aiocb **) &aiocb, 1, NULL); + myerrno = errno; + } + ret = aio_error(aiocb); + STARPU_ASSERT_MSG(!ret, "aio_error returned %d", ret); + size = aio_return(aiocb); + STARPU_ASSERT(size == (starpu_ssize_t) aiocb->aio_nbytes); +#endif + break; + } + +#ifdef STARPU_UNISTD_USE_COPY + case STARPU_UNISTD_COPY : + { + starpu_sem_wait(&event->event.event_copy->finished); + break; + } +#endif + + default : + STARPU_ABORT_MSG(); + break; + } +} + +int starpu_unistd_global_test_request(void *async_channel) +{ + struct starpu_unistd_wait * event = async_channel; + switch (event->type) + { + case STARPU_UNISTD_AIOCB : + { +#if defined(HAVE_LIBAIO_H) + struct starpu_unistd_aiocb *starpu_aiocb = &event->event.event_aiocb; + struct io_event ev; + struct timespec ts; + int ret; + + if (starpu_aiocb->finished) + return 1; + + memset(&ts, 0, sizeof(ts)); + + /* Test the answer of the request */ + ret = io_getevents(starpu_aiocb->base->ctx, 0, 1, &ev, &ts); + + if (ret == 1) + { + //we may catch an other request... + STARPU_PTHREAD_MUTEX_LOCK(&starpu_aiocb->base->mutex); + + struct starpu_unistd_aiocb_link *l = NULL; + HASH_FIND_PTR(starpu_aiocb->base->hashtable, &ev.obj, l); + STARPU_ASSERT(l != NULL); + + HASH_DEL(starpu_aiocb->base->hashtable, l); + STARPU_PTHREAD_MUTEX_UNLOCK(&starpu_aiocb->base->mutex); + struct starpu_unistd_aiocb *aiocb = l->starpu_aiocb; + STARPU_ASSERT_MSG(ev.res == aiocb->len, "Aio request was truncated"); + aiocb->finished = 1; + free(l); + + if (starpu_aiocb->finished) + return 1; + } + + return 0; +#elif defined(HAVE_AIO_H) + struct starpu_unistd_aiocb *starpu_aiocb = &event->event.event_aiocb; + struct aiocb *aiocb = &starpu_aiocb->aiocb; + int ret; + +#if defined(__GLIBC__) && (__GLIBC__ < 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ < 22)) + /* glibc's aio_error was not threadsafe before glibc 2.22 */ + struct timespec ts = { .tv_sec = 0, .tv_nsec = 0 }; + ret = aio_suspend((const struct aiocb **) &aiocb, 1, &ts); + if (ret < 0 && (errno == EAGAIN || errno == EINTR)) + return 0; + STARPU_ASSERT_MSG(!ret, "aio_suspend returned %d %d\n", ret, errno); +#endif + starpu_ssize_t size; + /* Test the answer of the request */ + ret = aio_error(aiocb); + if (ret == 0) + { + /* request is finished */ + size = aio_return(aiocb); + STARPU_ASSERT_MSG(size == (starpu_ssize_t) aiocb->aio_nbytes, "AIO op got %ld bytes instead of %ld bytes\n", (long) size, (long) aiocb->aio_nbytes); + return 1; + } + if (ret == EINTR || ret == EINPROGRESS || ret == EAGAIN) + return 0; + /* an error occurred */ + STARPU_ABORT_MSG("aio_error returned %d", ret); +#endif + break; + } + +#ifdef STARPU_UNISTD_USE_COPY + case STARPU_UNISTD_COPY : + { + return starpu_sem_trywait(&event->event.event_copy->finished) == 0; + } +#endif + + default : + STARPU_ABORT_MSG(); + break; + } + + return 0; +} + +void starpu_unistd_global_free_request(void *async_channel) +{ + struct starpu_unistd_wait * event = async_channel; + switch (event->type) + { + case STARPU_UNISTD_AIOCB : + { +#if defined(HAVE_LIBAIO_H) + struct starpu_unistd_aiocb *starpu_aiocb = &event->event.event_aiocb; + struct iocb *iocb = &starpu_aiocb->iocb; + if (starpu_aiocb->obj->descriptor < 0) + _starpu_unistd_reclose(iocb->aio_fildes); + free(event); +#elif defined(HAVE_AIO_H) + struct starpu_unistd_aiocb *starpu_aiocb = &event->event.event_aiocb; + struct aiocb *aiocb = &starpu_aiocb->aiocb; + if (starpu_aiocb->obj->descriptor < 0) + _starpu_unistd_reclose(aiocb->aio_fildes); + free(event); +#endif + break; + } + +#ifdef STARPU_UNISTD_USE_COPY + case STARPU_UNISTD_COPY : + { + starpu_sem_destroy(&event->event.event_copy->finished); + + int fd_src = event->event.event_copy->obj_src->descriptor; + if (fd_src < 0) + _starpu_unistd_reclose(event->event.event_copy->fd_src); + int fd_dst = event->event.event_copy->obj_dst->descriptor; + if (fd_dst < 0) + _starpu_unistd_reclose(event->event.event_copy->fd_dst); + + starpu_unistd_work_copy_delete(event->event.event_copy); + free(event); + break; + } +#endif + + default : + STARPU_ABORT_MSG(); + break; + } +} + + +#ifdef STARPU_UNISTD_USE_COPY +void * starpu_unistd_global_copy(void *base_src, void* obj_src, off_t offset_src, void *base_dst, void* obj_dst, off_t offset_dst, size_t size) +{ + struct starpu_unistd_global_obj * unistd_obj_src = obj_src; + struct starpu_unistd_global_obj * unistd_obj_dst = obj_dst; + struct starpu_unistd_base * unistd_base_src = base_src; + struct starpu_unistd_base * unistd_base_dst = base_dst; + + if (starpu_unistd_copy_works == 0) + /* It didn't work previously, don't bother submitting more. */ + return NULL; + + struct starpu_unistd_wait * event; + _STARPU_CALLOC(event, 1,sizeof(*event)); + event->type = STARPU_UNISTD_COPY; + + int fd_src = unistd_obj_src->descriptor; + if (fd_src < 0) + fd_src = _starpu_unistd_reopen(obj_src); + int fd_dst = unistd_obj_dst->descriptor; + if (fd_dst < 0) + fd_dst = _starpu_unistd_reopen(obj_dst); + + struct starpu_unistd_work_copy * work = starpu_unistd_work_copy_new(); + work->fd_src = fd_src; + work->fd_dst = fd_dst; + work->obj_src = unistd_obj_src; + work->obj_dst = unistd_obj_dst; + work->off_src = offset_src; + work->off_dst = offset_dst; + work->len = size; + /* currently not used by copy_file_range */ + work->flags = 0; + starpu_sem_init(&work->finished, 0, 0); + + event->event.event_copy = work; + + struct starpu_unistd_copy_thread * thread = ©_thread[unistd_base_src->disk_index][unistd_base_dst->disk_index]; + + STARPU_PTHREAD_MUTEX_LOCK(&thread->mutex); + starpu_unistd_work_copy_list_push_front(&thread->list, work); + STARPU_PTHREAD_COND_BROADCAST(&thread->cond); + STARPU_PTHREAD_MUTEX_UNLOCK(&thread->mutex); + + return event; +} +#endif diff --git a/src/core/disk_ops/unistd/disk_unistd_global.h b/src/core/disk_ops/unistd/disk_unistd_global.h new file mode 100644 index 0000000..ebde2e3 --- /dev/null +++ b/src/core/disk_ops/unistd/disk_unistd_global.h @@ -0,0 +1,78 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Corentin Salingue + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __DISK_UNISTD_GLOBAL_H__ +#define __DISK_UNISTD_GLOBAL_H__ + +/** @file */ + +#include +#ifdef __linux__ +#include +#endif + +#pragma GCC visibility push(hidden) + +#ifndef O_BINARY +#define O_BINARY 0 +#endif + +#define STARPU_UNISTD_USE_COPY 1 +#if !defined(HAVE_COPY_FILE_RANGE) && !defined(__NR_copy_file_range) +#undef STARPU_UNISTD_USE_COPY +#endif + +#ifdef __linux__ +typedef loff_t starpu_loff_t; +#else +typedef off_t starpu_loff_t; +#endif + +struct starpu_unistd_global_obj +{ + int descriptor; + char * path; + size_t size; + int flags; + starpu_pthread_mutex_t mutex; +}; + +void * starpu_unistd_global_alloc (struct starpu_unistd_global_obj * obj, void *base, size_t size); +void starpu_unistd_global_free (void *base, void *obj, size_t size); +void * starpu_unistd_global_open (struct starpu_unistd_global_obj * obj, void *base, void *pos, size_t size); +void starpu_unistd_global_close (void *base, void *obj, size_t size); +int starpu_unistd_global_read (void *base, void *obj, void *buf, off_t offset, size_t size); +int starpu_unistd_global_write (void *base, void *obj, const void *buf, off_t offset, size_t size); +void * starpu_unistd_global_plug (void *parameter, starpu_ssize_t size); +void starpu_unistd_global_unplug (void *base); +int _starpu_get_unistd_global_bandwidth_between_disk_and_main_ram(unsigned node, void *base); +void* starpu_unistd_global_async_read (void *base, void *obj, void *buf, off_t offset, size_t size); +void* starpu_unistd_global_async_write (void *base, void *obj, void *buf, off_t offset, size_t size); +void * starpu_unistd_global_async_full_write (void * base, void * obj, void * ptr, size_t size); +void * starpu_unistd_global_async_full_read (void * base, void * obj, void ** ptr, size_t * size, unsigned dst_node); +void starpu_unistd_global_wait_request(void * async_channel); +int starpu_unistd_global_test_request(void * async_channel); +void starpu_unistd_global_free_request(void * async_channel); +int starpu_unistd_global_full_read(void *base, void * obj, void ** ptr, size_t * size, unsigned dst_node); +int starpu_unistd_global_full_write (void * base, void * obj, void * ptr, size_t size); +#ifdef STARPU_UNISTD_USE_COPY +void * starpu_unistd_global_copy(void *base_src, void* obj_src, off_t offset_src, void *base_dst, void* obj_dst, off_t offset_dst, size_t size); +#endif + +#pragma GCC visibility pop + +#endif diff --git a/src/core/drivers.c b/src/core/drivers.c new file mode 100644 index 0000000..c95fde7 --- /dev/null +++ b/src/core/drivers.c @@ -0,0 +1,71 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2016-2016 Uppsala University + * Copyright (C) 2011-2011 Télécom Sud Paris + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include + +int starpu_driver_init(struct starpu_driver *d) +{ + STARPU_ASSERT(d); + struct _starpu_worker *worker = _starpu_get_worker_from_driver(d); + + if (worker->driver_ops == NULL) + return -EINVAL; + else + return worker->driver_ops->init(worker); +} + +int starpu_driver_run(struct starpu_driver *d) +{ + if (!d) + { + _STARPU_DEBUG("Invalid argument\n"); + return -EINVAL; + } + + struct _starpu_worker *worker = _starpu_get_worker_from_driver(d); + if (worker->driver_ops == NULL) + return -EINVAL; + else + return worker->driver_ops->run(worker); +} + +int starpu_driver_run_once(struct starpu_driver *d) +{ + STARPU_ASSERT(d); + struct _starpu_worker *worker = _starpu_get_worker_from_driver(d); + + if (worker->driver_ops == NULL) + return -EINVAL; + else + return worker->driver_ops->run_once(worker); +} + +int starpu_driver_deinit(struct starpu_driver *d) +{ + STARPU_ASSERT(d); + struct _starpu_worker *worker = _starpu_get_worker_from_driver(d); + + if (worker->driver_ops == NULL) + return -EINVAL; + else + return worker->driver_ops->deinit(worker); +} + diff --git a/src/core/drivers.h b/src/core/drivers.h new file mode 100644 index 0000000..cb0eb49 --- /dev/null +++ b/src/core/drivers.h @@ -0,0 +1,39 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2016-2016 Uppsala University + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __DRIVERS_H__ +#define __DRIVERS_H__ + +#pragma GCC visibility push(hidden) + +/** @file */ + +struct _starpu_driver_ops +{ + int (*init)(struct _starpu_worker *worker); /**< Initialize the thread for running the worker */ + int (*run)(struct _starpu_worker *worker); /**< Actually run the worker */ + int (*run_once)(struct _starpu_worker *worker); /**< Run just one loop of the worker */ + int (*deinit)(struct _starpu_worker *worker); /**< Deinitialize the thread after running a worker */ + int (*set_devid)(struct starpu_driver *driver, struct _starpu_worker *worker); + /**< Sets into \p driver the id for worker \p worker */ + int (*is_devid)(struct starpu_driver *driver, struct _starpu_worker *worker); + /**< Tests whether \p driver has the id for worker \p worker */ +}; + +#pragma GCC visibility pop + +#endif // __DRIVERS_H__ diff --git a/src/core/errorcheck.c b/src/core/errorcheck.c new file mode 100644 index 0000000..1a9a407 --- /dev/null +++ b/src/core/errorcheck.c @@ -0,0 +1,93 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include + +void _starpu_add_worker_status(struct _starpu_worker *worker, enum _starpu_worker_status_index st, struct timespec *time) +{ + starpu_pthread_mutex_t *sched_mutex; + starpu_pthread_cond_t *sched_cond; + starpu_worker_get_sched_condition(worker->workerid, &sched_mutex, &sched_cond); + STARPU_PTHREAD_MUTEX_LOCK_SCHED(sched_mutex); + STARPU_ASSERT(!(worker->status & (1 << st))); + if (starpu_profiling_status_get()) + _starpu_worker_start_state(worker->workerid, st, time); + worker->status |= (1 << st); + STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(sched_mutex); +} + +void _starpu_add_local_worker_status(enum _starpu_worker_status_index st, struct timespec *time) +{ + struct _starpu_worker *worker = _starpu_get_local_worker_key(); + + /* It is possible that we call this function from the application (and + * thereforce outside a worker), for instance if we are executing the + * callback function of a task with a "NULL" codelet. */ + if (worker) + _starpu_add_worker_status(worker, st, time); +} + +void _starpu_clear_worker_status(struct _starpu_worker *worker, enum _starpu_worker_status_index st, struct timespec *time) +{ + starpu_pthread_mutex_t *sched_mutex; + starpu_pthread_cond_t *sched_cond; + starpu_worker_get_sched_condition(worker->workerid, &sched_mutex, &sched_cond); + STARPU_PTHREAD_MUTEX_LOCK_SCHED(sched_mutex); + STARPU_ASSERT((worker->status & (1 << st))); + if (starpu_profiling_status_get()) + _starpu_worker_stop_state(worker->workerid, st, time); + worker->status &= ~(1 << st); + STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(sched_mutex); +} + +void _starpu_clear_local_worker_status(enum _starpu_worker_status_index st, struct timespec *time) +{ + struct _starpu_worker *worker = _starpu_get_local_worker_key(); + + /* It is possible that we call this function from the application (and + * thereforce outside a worker), for instance if we are executing the + * callback function of a task with a "NULL" codelet. */ + if (worker) + _starpu_clear_worker_status(worker, st, time); +} + +enum _starpu_worker_status _starpu_get_local_worker_status(void) +{ + struct _starpu_worker *worker = _starpu_get_local_worker_key(); + if (STARPU_UNLIKELY(!worker)) + return STATUS_INVALID; + + return worker->status; +} + +/* It is forbidden to call blocking operations with Callback and during the + * execution of a task. */ +unsigned _starpu_worker_may_perform_blocking_calls(void) +{ + enum _starpu_worker_status st = _starpu_get_local_worker_status(); +#ifdef STARPU_OPENMP + /* When the current task is an OpenMP task, we may need to block, + * especially when unregistering data used by child tasks. However, + * we don't want to blindly disable the check for non OpenMP tasks. */ + const struct starpu_task * const task = starpu_task_get_current(); + const int blocking_call_check_override = task && task->omp_task; +#else /* STARPU_OPENMP */ + const int blocking_call_check_override = 0; +#endif /* STARPU_OPENMP */ + + return blocking_call_check_override || (st == STATUS_INVALID) || (!(st & STATUS_CALLBACK) && !(st & STATUS_EXECUTING)); +} diff --git a/src/core/errorcheck.h b/src/core/errorcheck.h new file mode 100644 index 0000000..abb34ce --- /dev/null +++ b/src/core/errorcheck.h @@ -0,0 +1,84 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __ERRORCHECK_H__ +#define __ERRORCHECK_H__ + +/** @file */ + +#include + +#pragma GCC visibility push(hidden) + +/** This type enumerates the actions that can be done by a worker. + * Some can be happening during others, that is why + * enum _starpu_worker_status + * is a bitset indexed by the values of enum _starpu_worker_status_index. + */ +enum _starpu_worker_status_index +{ + STATUS_INDEX_INITIALIZING = 0, + STATUS_INDEX_EXECUTING, + STATUS_INDEX_CALLBACK, + STATUS_INDEX_WAITING, + STATUS_INDEX_SLEEPING, + STATUS_INDEX_SCHEDULING, + STATUS_INDEX_NR, +}; + +/** This type describes in which state a worker may be. */ +enum _starpu_worker_status +{ + /** invalid status (for instance if we request the status of some thread + * that is not controlled by StarPU */ + STATUS_INVALID = -1, + /** Nothing particular, thus just overhead */ + STATUS_UNKNOWN = 0, + /** during the initialization */ + STATUS_INITIALIZING = 1 << STATUS_INDEX_INITIALIZING, + /** during the execution of a codelet */ + STATUS_EXECUTING = 1 << STATUS_INDEX_EXECUTING, + /** during the execution of the callback */ + STATUS_CALLBACK = 1 << STATUS_INDEX_CALLBACK, + /** while waiting for a data transfer */ + STATUS_WAITING = 1 << STATUS_INDEX_WAITING, + /** while sleeping because there is no task to do */ + STATUS_SLEEPING = 1 << STATUS_INDEX_SLEEPING, + /** while executing the scheduler code */ + STATUS_SCHEDULING = 1 << STATUS_INDEX_SCHEDULING, +}; + +struct _starpu_worker; +/** Specify what the local worker is currently doing (eg. executing a callback). + * This permits to detect if this is legal to do a blocking call for instance. */ +void _starpu_add_worker_status(struct _starpu_worker *worker, enum _starpu_worker_status_index st, struct timespec *time); +void _starpu_add_local_worker_status(enum _starpu_worker_status_index st, struct timespec *time); + +/** Clear the fact that the local worker was currently doing something(eg. executing a callback). */ +void _starpu_clear_worker_status(struct _starpu_worker *worker, enum _starpu_worker_status_index st, struct timespec *time); +void _starpu_clear_local_worker_status(enum _starpu_worker_status_index st, struct timespec *time); + +/** Indicate what type of operation the worker is currently doing. */ +enum _starpu_worker_status _starpu_get_local_worker_status(void); + +/** It is forbidden to do blocking calls during some operations such as callback + * or during the execution of a task. This function indicates whether it is + * legal to call a blocking operation in the current context. */ +unsigned _starpu_worker_may_perform_blocking_calls(void); + +#pragma GCC visibility pop + +#endif // __ERRORCHECK_H__ diff --git a/src/core/idle_hook.c b/src/core/idle_hook.c new file mode 100644 index 0000000..e1a286f --- /dev/null +++ b/src/core/idle_hook.c @@ -0,0 +1,116 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include + +#define NMAXHOOKS 16 + +struct progression_hook +{ + unsigned (*func)(void *arg); + void *arg; + unsigned active; +}; + +/* protect the hook table */ +static starpu_pthread_rwlock_t idle_hook_rwlock; + +static struct progression_hook idle_hooks[NMAXHOOKS] = {{NULL, NULL, 0}}; +static int active_idle_hook_cnt = 0; + +/* + * Statically initializing idle_hook_rwlock seems to lead to weird errors + * on Darwin, so we do it dynamically. + */ +void _starpu_init_idle_hooks(void) +{ + STARPU_PTHREAD_RWLOCK_INIT(&idle_hook_rwlock, NULL); + STARPU_HG_DISABLE_CHECKING(active_idle_hook_cnt); +} + +int starpu_idle_hook_register(unsigned (*func)(void *arg), void *arg) +{ + int hook; + STARPU_PTHREAD_RWLOCK_WRLOCK(&idle_hook_rwlock); + for (hook = 0; hook < NMAXHOOKS; hook++) + { + if (!idle_hooks[hook].active) + { + /* We found an empty slot */ + idle_hooks[hook].func = func; + idle_hooks[hook].arg = arg; + idle_hooks[hook].active = 1; + active_idle_hook_cnt++; + + STARPU_PTHREAD_RWLOCK_UNLOCK(&idle_hook_rwlock); + + return hook; + } + } + + STARPU_PTHREAD_RWLOCK_UNLOCK(&idle_hook_rwlock); + + starpu_wake_all_blocked_workers(); + + /* We could not find an empty slot */ + return -1; +} + +void starpu_idle_hook_deregister(int hook_id) +{ + STARPU_PTHREAD_RWLOCK_WRLOCK(&idle_hook_rwlock); + + if (idle_hooks[hook_id].active) + active_idle_hook_cnt--; + + idle_hooks[hook_id].active = 0; + + STARPU_PTHREAD_RWLOCK_UNLOCK(&idle_hook_rwlock); +} + +unsigned _starpu_execute_registered_idle_hooks(void) +{ + if (active_idle_hook_cnt == 0) + return 1; + + /* By default, it is possible to block, but if some idle hooks + * requires that it's not blocking, we disable blocking. */ + unsigned may_block = 1; + + unsigned hook; + for (hook = 0; hook < NMAXHOOKS; hook++) + { + unsigned active; + + STARPU_PTHREAD_RWLOCK_RDLOCK(&idle_hook_rwlock); + active = idle_hooks[hook].active; + STARPU_PTHREAD_RWLOCK_UNLOCK(&idle_hook_rwlock); + + unsigned may_block_hook = 1; + + if (active) + may_block_hook = idle_hooks[hook].func(idle_hooks[hook].arg); + + /* As soon as one hook tells that the driver cannot be + * blocking, we don't allow it. */ + if (!may_block_hook) + may_block = 0; + } + + return may_block; +} diff --git a/src/core/idle_hook.h b/src/core/idle_hook.h new file mode 100644 index 0000000..3ee3c8f --- /dev/null +++ b/src/core/idle_hook.h @@ -0,0 +1,30 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __IDLE_HOOK_H__ +#define __IDLE_HOOK_H__ + +#pragma GCC visibility push(hidden) + +/** @file */ + +void _starpu_init_idle_hooks(void); + +unsigned _starpu_execute_registered_idle_hooks(void); + +#pragma GCC visibility pop + +#endif /* !__IDLE_HOOK_H__ */ diff --git a/src/core/jobs.c b/src/core/jobs.c new file mode 100644 index 0000000..9b1a54d --- /dev/null +++ b/src/core/jobs.c @@ -0,0 +1,1138 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Thibaut Lambert + * Copyright (C) 2011-2011 Télécom Sud Paris + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static int max_memory_use; +static int task_progress; +static unsigned long njobs_finished; +static unsigned long njobs, maxnjobs; + +#ifdef STARPU_DEBUG +/* List of all jobs, for debugging */ +static struct _starpu_job_multilist_all_submitted all_jobs_list; +static starpu_pthread_mutex_t all_jobs_list_mutex = STARPU_PTHREAD_MUTEX_INITIALIZER; +#endif + +void _starpu_job_crash(); + +void _starpu_job_init(void) +{ + max_memory_use = starpu_getenv_number_default("STARPU_MAX_MEMORY_USE", 0); + task_progress = starpu_getenv_number_default("STARPU_TASK_PROGRESS", 0); +#ifdef STARPU_DEBUG + _starpu_job_multilist_head_init_all_submitted(&all_jobs_list); +#endif + _starpu_crash_add_hook(&_starpu_job_crash); +} + +void _starpu_job_memory_use(int check) +{ + if (max_memory_use) + { + _STARPU_DISP("Memory used for %lu tasks: %lu MiB\n", maxnjobs, (unsigned long) (maxnjobs * (sizeof(struct starpu_task) + sizeof(struct _starpu_job))) >> 20); + if (check) + STARPU_ASSERT_MSG(njobs == 0, "Some tasks have not been cleaned, did you forget to call starpu_task_destroy or starpu_task_clean?"); + } +} + +void _starpu_job_crash() +{ + _starpu_job_memory_use(0); +} + +void _starpu_job_fini(void) +{ + _starpu_job_memory_use(1); +} + +void _starpu_exclude_task_from_dag(struct starpu_task *task) +{ + struct _starpu_job *j = _starpu_get_job_associated_to_task(task); + + j->exclude_from_dag = 1; + _STARPU_TRACE_TASK_EXCLUDE_FROM_DAG(j); +} + +/* create an internal struct _starpu_job structure to encapsulate the task */ +struct _starpu_job* STARPU_ATTRIBUTE_MALLOC _starpu_job_create(struct starpu_task *task) +{ + struct _starpu_job *job; + _STARPU_LOG_IN(); + + /* As most of the fields must be initialized at NULL, let's put 0 + * everywhere */ + _STARPU_CALLOC(job, 1, sizeof(*job)); + + if (task->dyn_handles) + { + _STARPU_MALLOC(job->dyn_ordered_buffers, STARPU_TASK_GET_NBUFFERS(task) * sizeof(job->dyn_ordered_buffers[0])); + _STARPU_CALLOC(job->dyn_dep_slots, STARPU_TASK_GET_NBUFFERS(task), sizeof(job->dyn_dep_slots[0])); + } + + job->task = task; + + if ( +#if defined(STARPU_DEBUG) + 1 +#elif defined(STARPU_USE_FXT) + fut_active +#else + _starpu_bound_recording || _starpu_task_break_on_push != -1 || _starpu_task_break_on_sched != -1 || _starpu_task_break_on_pop != -1 || _starpu_task_break_on_exec != -1 || STARPU_AYU_EVENT +#endif + ) + { + job->job_id = _starpu_fxt_get_job_id(); + STARPU_AYU_ADDTASK(job->job_id, task); + STARPU_ASSERT(job->job_id != ULONG_MAX); + } + if (max_memory_use) + { + unsigned long jobs = STARPU_ATOMIC_ADDL(&njobs, 1); + if (jobs > maxnjobs) + maxnjobs = jobs; + } + + _starpu_cg_list_init0(&job->job_successors); + + STARPU_PTHREAD_MUTEX_INIT0(&job->sync_mutex, NULL); + STARPU_PTHREAD_COND_INIT0(&job->sync_cond, NULL); + + /* By default we have sequential tasks */ + job->task_size = 1; + + job->workerid = -1; + + if (task->use_tag) + _starpu_tag_declare(task->tag_id, job); + + if (_starpu_graph_record) + _starpu_graph_add_job(job); + + _STARPU_LOG_OUT(); + return job; +} + +struct _starpu_job* _starpu_get_job_associated_to_task_slow(struct starpu_task *task, struct _starpu_job *job) +{ + if (job == _STARPU_JOB_UNSET) + { + job = STARPU_VAL_COMPARE_AND_SWAP_PTR(&task->starpu_private, _STARPU_JOB_UNSET, _STARPU_JOB_SETTING); + if (job != _STARPU_JOB_UNSET && job != _STARPU_JOB_SETTING) + { + /* Actually available in the meanwhile */ + STARPU_RMB(); + return job; + } + + if (job == _STARPU_JOB_UNSET) + { + /* Ok, we have to do it */ + job = _starpu_job_create(task); + STARPU_WMB(); + task->starpu_private = job; + return job; + } + } + + /* Saw _STARPU_JOB_SETTING, somebody is doing it, wait for it. + * This is rare enough that busy-reading is fine enough. */ + while ((job = *(struct _starpu_job *volatile*) &task->starpu_private) == _STARPU_JOB_SETTING) + { + STARPU_UYIELD(); + STARPU_SYNCHRONIZE(); + } + + STARPU_RMB(); + return job; +} + +void _starpu_job_destroy(struct _starpu_job *j) +{ + /* Wait for any code that was still working on the job (and was + * probably our waker) */ + STARPU_PTHREAD_MUTEX_LOCK(&j->sync_mutex); + STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex); + STARPU_PTHREAD_COND_DESTROY(&j->sync_cond); + STARPU_PTHREAD_MUTEX_DESTROY(&j->sync_mutex); + + if (j->task_size > 1) + { + STARPU_PTHREAD_BARRIER_DESTROY(&j->before_work_barrier); + STARPU_PTHREAD_BARRIER_DESTROY(&j->after_work_barrier); + STARPU_ASSERT(j->after_work_busy_barrier == 0); + } + + _starpu_cg_list_deinit(&j->job_successors); + if (j->dyn_ordered_buffers) + { + free(j->dyn_ordered_buffers); + j->dyn_ordered_buffers = NULL; + } + if (j->dyn_dep_slots) + { + free(j->dyn_dep_slots); + j->dyn_dep_slots = NULL; + } + + if (_starpu_graph_record && j->graph_node) + _starpu_graph_drop_job(j); + + if (max_memory_use) + (void) STARPU_ATOMIC_ADDL(&njobs, -1); + + free(j); +} + +int _starpu_job_finished(struct _starpu_job *j) +{ + int ret; + STARPU_PTHREAD_MUTEX_LOCK(&j->sync_mutex); + ret = j->terminated == 2; + STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex); + return ret; +} + +void _starpu_wait_job(struct _starpu_job *j) +{ + STARPU_ASSERT(j->task); + STARPU_ASSERT(!j->task->detach); + _STARPU_LOG_IN(); + + STARPU_PTHREAD_MUTEX_LOCK(&j->sync_mutex); + + /* We wait for the flag to have a value of 2 which means that both the + * codelet's implementation and its callback have been executed. That + * way, _starpu_wait_job won't return until the entire task was really + * executed (so that we cannot destroy the task while it is still being + * manipulated by the driver). */ + + while (j->terminated != 2) + { + STARPU_PTHREAD_COND_WAIT(&j->sync_cond, &j->sync_mutex); + } + + STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex); + _STARPU_LOG_OUT(); +} + +#ifdef STARPU_OPENMP +int _starpu_test_job_termination(struct _starpu_job *j) +{ + STARPU_ASSERT(j->task); + STARPU_ASSERT(!j->task->detach); + /* Disable Helgrind race complaint, since we really just want to poll j->terminated */ + if (STARPU_RUNNING_ON_VALGRIND) + { + int v = STARPU_PTHREAD_MUTEX_TRYLOCK(&j->sync_mutex); + if (v != EBUSY) + { + STARPU_ASSERT(v == 0); + int ret = (j->terminated == 2); + STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex); + return ret; + } + else + { + return 0; + } + } + else + { + STARPU_SYNCHRONIZE(); + return j->terminated == 2; + } +} +void _starpu_job_prepare_for_continuation_ext(struct _starpu_job *j, unsigned continuation_resubmit, + void (*continuation_callback_on_sleep)(void *arg), void *continuation_callback_on_sleep_arg) +{ + STARPU_ASSERT(!j->continuation); + /* continuation are not supported for parallel tasks for now */ + STARPU_ASSERT(j->task_size == 1); + j->continuation = 1; + j->continuation_resubmit = continuation_resubmit; + j->continuation_callback_on_sleep = continuation_callback_on_sleep; + j->continuation_callback_on_sleep_arg = continuation_callback_on_sleep_arg; + j->job_successors.ndeps = 0; + j->job_successors.ndeps_completed = 0; +} +/* Prepare a currently running job for accepting a new set of + * dependencies in anticipation of becoming a continuation. */ +void _starpu_job_prepare_for_continuation(struct _starpu_job *j) +{ + _starpu_job_prepare_for_continuation_ext(j, 1, NULL, NULL); +} +void _starpu_job_set_omp_cleanup_callback(struct _starpu_job *j, + void (*omp_cleanup_callback)(void *arg), void *omp_cleanup_callback_arg) +{ + j->omp_cleanup_callback = omp_cleanup_callback; + j->omp_cleanup_callback_arg = omp_cleanup_callback_arg; +} +#endif + +void _starpu_handle_job_submission(struct _starpu_job *j) +{ + /* Need to atomically set submitted to 1 and check dependencies, since + * this is concucrent with _starpu_notify_cg */ + j->terminated = 0; + + if (!j->submitted) + j->submitted = 1; + else + j->submitted = 2; + +#ifdef STARPU_DEBUG + STARPU_PTHREAD_MUTEX_LOCK(&all_jobs_list_mutex); + _starpu_job_multilist_push_back_all_submitted(&all_jobs_list, j); + STARPU_PTHREAD_MUTEX_UNLOCK(&all_jobs_list_mutex); +#endif +} + +void starpu_task_end_dep_release(struct starpu_task *t) +{ + struct _starpu_job *j = _starpu_get_job_associated_to_task(t); + +#ifdef STARPU_USE_FXT + struct starpu_task *current = starpu_task_get_current(); + if (current) + { + struct _starpu_job *jcurrent = _starpu_get_job_associated_to_task(current); + _STARPU_TRACE_TASK_END_DEP(jcurrent, j); + } +#endif + + _starpu_handle_job_termination(j); +} + +void starpu_task_end_dep_add(struct starpu_task *t, int nb_deps) +{ + struct _starpu_job *j = _starpu_get_job_associated_to_task(t); + STARPU_PTHREAD_MUTEX_LOCK(&j->sync_mutex); + t->nb_termination_call_required += nb_deps; + STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex); +} + +void _starpu_handle_job_termination(struct _starpu_job *j) +{ + if (j->task->nb_termination_call_required != 0) + { + STARPU_PTHREAD_MUTEX_LOCK(&j->sync_mutex); + int nb = j->task->nb_termination_call_required; + j->task->nb_termination_call_required -= 1; + STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex); + if (nb != 0) return; + } + + if (task_progress) + { + unsigned long jobs = STARPU_ATOMIC_ADDL(&njobs_finished, 1); + + fprintf(stderr,"\r%lu tasks finished (last %lu %p on %d)...", jobs, j->job_id, j->task, starpu_worker_get_id()); + } + + struct starpu_task *task = j->task; + struct starpu_task *end_rdep = NULL; + unsigned sched_ctx = task->sched_ctx; + double flops = task->flops; + + const unsigned continuation = +#ifdef STARPU_OPENMP + j->continuation +#else + 0 +#endif + ; + + if (!continuation) + { + void (*epilogue_callback)(void *) = task->epilogue_callback_func; + /* the epilogue callback is executed before the dependencies release*/ + if (epilogue_callback) + { + enum _starpu_worker_status old_status = _starpu_get_local_worker_status(); + + /* so that we can check whether we are doing blocking calls + * within the callback */ + if (!(old_status & STATUS_CALLBACK)) + _starpu_add_local_worker_status(STATUS_INDEX_CALLBACK, NULL); + + /* Perhaps we have nested callbacks (eg. with chains of empty + * tasks). So we store the current task and we will restore it + * later. */ + struct starpu_task *current_task = starpu_task_get_current(); + + _starpu_set_current_task(task); + + _STARPU_TRACE_START_CALLBACK(j); + epilogue_callback(task->epilogue_callback_arg); + _STARPU_TRACE_END_CALLBACK(j); + + _starpu_set_current_task(current_task); + + if (!(old_status & STATUS_CALLBACK)) + _starpu_clear_local_worker_status(STATUS_INDEX_CALLBACK, NULL); + } + } + +#ifdef STARPU_DEBUG + STARPU_PTHREAD_MUTEX_LOCK(&all_jobs_list_mutex); + _starpu_job_multilist_erase_all_submitted(&all_jobs_list, j); + STARPU_PTHREAD_MUTEX_UNLOCK(&all_jobs_list_mutex); +#endif + + STARPU_PTHREAD_MUTEX_LOCK(&j->sync_mutex); + STARPU_ASSERT(task->status == STARPU_TASK_RUNNING); +#ifdef STARPU_OPENMP + if (continuation) + { + task->status = STARPU_TASK_STOPPED; + } + else +#endif + { + task->status = STARPU_TASK_FINISHED; + + /* already prepare for next run */ + struct _starpu_cg_list *job_successors = &j->job_successors; + job_successors->ndeps_completed = 0; + + /* We must have set the j->terminated flag early, so that it is + * possible to express task dependencies within the callback + * function. A value of 1 means that the codelet was executed but that + * the callback is not done yet. */ + j->terminated = 1; + end_rdep = j->end_rdep; + } + STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex); + + +#ifdef STARPU_USE_SC_HYPERVISOR + size_t data_size = 0; +#endif //STARPU_USE_SC_HYPERVISOR + + /* We release handle reference count */ + if (task->cl && !continuation +#ifdef STARPU_BUBBLE + && !j->is_bubble +#endif + ) + { + unsigned i; + unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task); +#ifdef STARPU_USE_SC_HYPERVISOR + for(i = 0; i < nbuffers; i++) + { + starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, i); + if (handle != NULL) + data_size += _starpu_data_get_size(handle); + } +#endif //STARPU_USE_SC_HYPERVISOR + + for (i = 0; i < nbuffers; i++) + { + starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, i); + _starpu_spin_lock(&handle->header_lock); + handle->busy_count--; + if (!_starpu_data_check_not_busy(handle)) + _starpu_spin_unlock(&handle->header_lock); + } + } + + /* Check nowhere before releasing the sequential consistency (which may + * unregister the handle and free its switch_cl, and thus task->cl here. */ + unsigned nowhere = !task->cl || task->cl->where == STARPU_NOWHERE || task->where == STARPU_NOWHERE; + /* If the job was executed on a combined worker there is no need for the + * scheduler to process it : the task structure doesn't contain any valuable + * data as it's not linked to an actual worker */ + /* control task should not execute post_exec_hook */ + if(j->task_size == 1 && !nowhere && !j->internal +#ifdef STARPU_OPENMP + /* If this is a continuation, we do not execute the post_exec_hook. The + * post_exec_hook will be run only when the continued task fully + * completes. + * + * Note: If needed, a specific hook could be added to handle stopped + * tasks */ + && !continuation +#endif + ) + { + _starpu_sched_post_exec_hook(task); +#ifdef STARPU_USE_SC_HYPERVISOR + int workerid = starpu_worker_get_id(); + _starpu_sched_ctx_post_exec_task_cb(workerid, task, data_size, j->footprint); +#endif //STARPU_USE_SC_HYPERVISOR + + } + + /* Remove ourself from the graph before notifying dependencies */ + if (_starpu_graph_record) + _starpu_graph_drop_job(j); + + /* Get callback pointer for codelet before notifying dependencies, in + case dependencies free the codelet (see starpu_data_unregister for + instance) */ + void (*callback)(void *) = task->callback_func; + if (!callback && task->cl) + callback = task->cl->callback_func; + + /* If this is a continuation, we do not release task dependencies now. + * Task dependencies will be released only when the continued task + * fully completes */ + if (!continuation) + { + /* Tell other tasks that we don't exist any more, thus no need for + * implicit dependencies any more. */ + _starpu_release_task_enforce_sequential_consistency(j); + } + + /* Task does not have a cl, but has explicit data dependencies, we need + * to tell them that we will not exist any more before notifying the + * tasks waiting for us + * + * For continuations, implicit dependency handles are only released + * when the task fully completes */ + if (j->implicit_dep_handle && !continuation) + { + starpu_data_handle_t handle = j->implicit_dep_handle; + _starpu_release_data_enforce_sequential_consistency(j->task, &j->implicit_dep_slot, handle); + /* Release reference taken while setting implicit_dep_handle */ + _starpu_spin_lock(&handle->header_lock); + handle->busy_count--; + if (!_starpu_data_check_not_busy(handle)) + _starpu_spin_unlock(&handle->header_lock); + } + + if (!continuation) + { + /* If this is a continuation, we do not notify task/tag dependencies + * now. Task/tag dependencies will be notified only when the continued + * task fully completes */ + /* in case there are dependencies, wake up the proper tasks */ + if (end_rdep) + starpu_task_end_dep_release(end_rdep); + _starpu_notify_dependencies(j); + + /* If this is a continuation, we do not execute the callback + * now. The callback will be executed only when the continued + * task fully completes */ + /* the callback is executed after the dependencies so that we may remove the tag + * of the task itself */ + if (callback) + { + struct timespec *time = NULL; + int profiling = starpu_profiling_status_get(); + if (profiling && task->profiling_info) + { + time = &task->profiling_info->callback_start_time; + _starpu_clock_gettime(time); + } + enum _starpu_worker_status old_status = _starpu_get_local_worker_status(); + + /* so that we can check whether we are doing blocking calls + * within the callback */ + if (!(old_status & STATUS_CALLBACK)) + _starpu_add_local_worker_status(STATUS_INDEX_CALLBACK, time); + + /* Perhaps we have nested callbacks (eg. with chains of empty + * tasks). So we store the current task and we will restore it + * later. */ + struct starpu_task *current_task = starpu_task_get_current(); + + _starpu_set_current_task(task); + + _STARPU_TRACE_START_CALLBACK(j); + callback(task->callback_arg); + _STARPU_TRACE_END_CALLBACK(j); + + _starpu_set_current_task(current_task); + + if (profiling && task->profiling_info) + { + time = &task->profiling_info->callback_end_time; + _starpu_clock_gettime(time); + } + + if (!(old_status & STATUS_CALLBACK)) + _starpu_clear_local_worker_status(STATUS_INDEX_CALLBACK, time); + } + } + + /* Note: For now, we keep the TASK_DONE trace event for continuation, + * however we could add a specific event for stopped tasks if needed. + */ + _STARPU_TRACE_TASK_DONE(j); + + STARPU_PTHREAD_MUTEX_LOCK(&j->sync_mutex); + + /* NB: we do not save those values before the callback, in case the + * application changes some parameters eventually (eg. a task may not + * be generated if the application is terminated). */ + unsigned destroy = task->destroy; + unsigned detach = task->detach; + unsigned regenerate = task->regenerate; + unsigned synchronous = task->synchronous; + + if (!continuation) + { +#ifdef STARPU_OPENMP + if (j->omp_cleanup_callback) + { + j->omp_cleanup_callback(j->omp_cleanup_callback_arg); + j->omp_cleanup_callback = NULL; + j->omp_cleanup_callback_arg = NULL; + } +#endif + /* A value of 2 is put to specify that not only the codelet but + * also the callback were executed. */ + j->terminated = 2; + } + task->prefetched = 0; + STARPU_PTHREAD_COND_BROADCAST(&j->sync_cond); + STARPU_AYU_REMOVETASK(j->job_id); + STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex); + + /* we do not deallocate the job structure if some is going to + * wait after the task */ + if (detach && !continuation) + { + /* no one is going to synchronize with that task so we release + * the data structures now. In case the job was already locked + * by the caller, it is its responsibility to destroy the task. + * */ + if (destroy) + _starpu_task_destroy(task); + } + + /* A continuation is not much different from a regenerated task. */ + if (regenerate || continuation) + { + STARPU_ASSERT_MSG((detach && !destroy && !synchronous) + || continuation + , "Regenerated task must be detached (was %u), and not have destroy=1 (was %u) or synchronous=1 (was %u)", detach, destroy, synchronous); + STARPU_AYU_ADDTASK(j->job_id, j->exclude_from_dag?NULL:task); + + { +#ifdef STARPU_OPENMP + unsigned continuation_resubmit = j->continuation_resubmit; + void (*continuation_callback_on_sleep)(void *arg) = j->continuation_callback_on_sleep; + void *continuation_callback_on_sleep_arg = j->continuation_callback_on_sleep_arg; + j->continuation_resubmit = 1; + j->continuation_callback_on_sleep = NULL; + j->continuation_callback_on_sleep_arg = NULL; + if (!continuation || continuation_resubmit) +#endif + { + /* We reuse the same job structure */ + task->status = STARPU_TASK_BLOCKED; + int ret = _starpu_submit_job(j, 0); + STARPU_ASSERT(!ret); + } +#ifdef STARPU_OPENMP + if (continuation && continuation_callback_on_sleep != NULL) + { + continuation_callback_on_sleep(continuation_callback_on_sleep_arg); + } +#endif + } + } + + _starpu_decrement_nready_tasks_of_sched_ctx(sched_ctx, flops); + _starpu_decrement_nsubmitted_tasks_of_sched_ctx(sched_ctx); + struct _starpu_worker *worker; + worker = _starpu_get_local_worker_key(); + if (worker) + { + STARPU_PTHREAD_MUTEX_LOCK_SCHED(&worker->sched_mutex); + + if(worker->removed_from_ctx[sched_ctx] == 1 && worker->shares_tasks_lists[sched_ctx] == 1) + { + _starpu_worker_gets_out_of_ctx(sched_ctx, worker); + worker->removed_from_ctx[sched_ctx] = 0; + } + STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&worker->sched_mutex); + } +} + +/* This function is called when a new task is submitted to StarPU + * it returns 1 if the tag deps are not fulfilled, 0 otherwise */ +static unsigned _starpu_not_all_tag_deps_are_fulfilled(struct _starpu_job *j) +{ + unsigned ret; + + if (!j->task->use_tag) + { + /* this task does not use tags, so we can go on */ + return 0; + } + + struct _starpu_tag *tag = j->tag; + + struct _starpu_cg_list *tag_successors = &tag->tag_successors; + + _starpu_spin_lock(&tag->lock); + STARPU_ASSERT_MSG(tag->is_assigned == 1 || !tag_successors->ndeps, "a tag can be assigned only one task to wake (%llu had %u assigned tasks, and %u successors)", (unsigned long long) tag->id, tag->is_assigned, tag_successors->ndeps); + + if (tag_successors->ndeps != tag_successors->ndeps_completed) + { + tag->state = STARPU_BLOCKED; + j->task->status = STARPU_TASK_BLOCKED_ON_TAG; + ret = 1; + } + else + { + /* existing deps (if any) are fulfilled */ + /* If the same tag is being signaled by several tasks, do not + * clear a DONE state. If it's the same job submitted several + * times with the same tag, we have to do it */ + if (j->submitted == 2 || tag->state != STARPU_DONE) + tag->state = STARPU_READY; + /* already prepare for next run */ + tag_successors->ndeps_completed = 0; + ret = 0; + } + + _starpu_spin_unlock(&tag->lock); + return ret; +} + +static unsigned _starpu_not_all_task_deps_are_fulfilled(struct _starpu_job *j) +{ + unsigned ret; + + struct _starpu_cg_list *job_successors = &j->job_successors; + + if (!j->submitted || (job_successors->ndeps != job_successors->ndeps_completed)) + { + STARPU_ASSERT(j->task->status == STARPU_TASK_BLOCKED || j->task->status == STARPU_TASK_BLOCKED_ON_TAG); + j->task->status = STARPU_TASK_BLOCKED_ON_TASK; + ret = 1; + } + else + { + /* existing deps (if any) are fulfilled */ + ret = 0; + } + + return ret; +} + +#ifdef STARPU_BUBBLE +int _starpu_bubble_unpartition_data_if_needed(struct _starpu_job *j) +{ + //_STARPU_DEBUG("[%s(%p)]\n", starpu_task_get_name(j->task), j->task); + int unpartition_needed = 0; + unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(j->task); + unsigned nhandle = 0; + unsigned i; + struct starpu_task *control_task = NULL; + + for (i = 0; i < nbuffers; i++) + { + starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(j->task, i); + enum starpu_data_access_mode mode = STARPU_TASK_GET_MODE(j->task, i); + + STARPU_PTHREAD_MUTEX_LOCK(&handle->unpartition_mutex); + + /** + * Version A + * + * We create a control task with the required data + * dependencies that will be automatically/magically + * handled by _starpu_data_partition_access_submit() + * called in _starpu_task_submit_head(). + */ + if (handle->nplans > 0) + { + if (unpartition_needed == 0) + { + control_task = starpu_task_create(); + control_task->name = "ucontrol"; + _starpu_task_declare_deps_array(j->task, 1, &control_task, 0); + + unpartition_needed = 1; + } + + //STARPU_TASK_SET_HANDLE(control_task, handle, nhandle); + control_task->handles[nhandle] = handle; + //STARPU_TASK_SET_MODE(control_task, mode, nhandle); + control_task->modes[nhandle] = mode; + nhandle ++; + } + /** + * Version B + * + * We find a way to call directly + * _starpu_data_partition_access_submit() here, and we + * (re-)plug the current task onto the last task + * generated by + * _starpu_data_partition_access_submit(). + */ + else + { + //_starpu_data_partition_access_submit(handle, (mode & STARPU_W) != 0); + // + replug on the current task + } + STARPU_PTHREAD_MUTEX_UNLOCK(&handle->unpartition_mutex); + } + + // No data has been partitioned, let's keep going + if (unpartition_needed == 0) + { + return 0; + } + + // Add the dependency on the unpartition tasks + STARPU_PTHREAD_MUTEX_LOCK(&j->sync_mutex); + j->task->status = STARPU_TASK_BLOCKED_ON_TASK; + STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex); + + STARPU_ASSERT(control_task); + int ret = starpu_task_submit(control_task); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit(control_task)"); + + return 1; +} + +static int _starpu_turn_task_into_bubble(struct _starpu_job *j) +{ + if (j->already_turned_into_bubble) + { + /* + * We have first checked all dependencies of the bubble, + * and secondly checked in a second stage the additional + * partition/unpartition dependencies + */ + STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex); + return 0; + } + j->already_turned_into_bubble = 1; + //_STARPU_DEBUG("[%s(%p)]\n", starpu_task_get_name(j->task), j->task); + + if (j->is_bubble == 1) + { + STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex); + return 0; + } + else if (j->task->cl == NULL) + { + STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex); + return 0; + } + else + { + STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex); + return _starpu_bubble_unpartition_data_if_needed(j); + } +} + +void _starpu_bubble_execute(struct _starpu_job *j) +{ + _STARPU_TRACE_BUBBLE(j); + _STARPU_TRACE_TASK_NAME_LINE_COLOR(j); + _STARPU_TRACE_START_CODELET_BODY(j, 0, NULL, 0, 0); + STARPU_ASSERT_MSG(j->task->bubble_gen_dag_func!=NULL || (j->task->cl && j->task->cl->bubble_gen_dag_func!=NULL), + "task->bubble_gen_dag_func MUST be defined\n"); + +#ifdef STARPU_VERBOSE + struct timespec tp; + clock_gettime(CLOCK_MONOTONIC, &tp); + unsigned long long timestamp = 1000000000ULL*tp.tv_sec + tp.tv_nsec; + _STARPU_DEBUG("{%llu} [%s(%p)] Running bubble\n", timestamp, starpu_task_get_name(j->task), j->task); +#endif + if (j->task->bubble_gen_dag_func) + j->task->bubble_gen_dag_func(j->task, j->task->bubble_gen_dag_func_arg); + else + j->task->cl->bubble_gen_dag_func(j->task, j->task->bubble_gen_dag_func_arg); + j->task->where = STARPU_NOWHERE; + _STARPU_TRACE_END_CODELET_BODY(j, 0, NULL, 0, 0); +} +#endif + +/* + * In order, we enforce tag, task and data dependencies. The task is + * passed to the scheduler only once all these constraints are fulfilled. + * + * The job mutex has to be taken for atomicity with task submission, and + * is released here. + */ +unsigned _starpu_enforce_deps_and_schedule(struct _starpu_job *j) +{ + unsigned ret; + _STARPU_LOG_IN(); + + /* enforce tag dependencies */ + if (_starpu_not_all_tag_deps_are_fulfilled(j)) + { + STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex); + _STARPU_LOG_OUT_TAG("not_all_tag_deps_are_fulfilled"); + return 0; + } + + /* enforce task dependencies */ + if (_starpu_not_all_task_deps_are_fulfilled(j)) + { + STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex); + _STARPU_LOG_OUT_TAG("not_all_task_deps_are_fulfilled"); + return 0; + } + +#ifdef STARPU_BUBBLE + /* Wait for all dependencies at the correct level to be + * fulfilled before adding missing partition/unpartition + * + * If partition/unpartition are submitted we will enter the if + * case and come back later when these new dependencies are + * fulfilled + */ + if (_starpu_turn_task_into_bubble(j)) + { + _STARPU_LOG_OUT_TAG("bubble"); + return 0; + } +#else + STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex); +#endif + +#ifdef STARPU_BUBBLE + if (j->is_bubble == 1) + { + _starpu_bubble_execute(j); + } + else +#endif + { + /* respect data concurrent access */ + if (_starpu_concurrent_data_access(j)) + { + _STARPU_LOG_OUT_TAG("concurrent_data_access"); + return 0; + } + } + +#ifdef STARPU_BUBBLE + if (j->task->bubble_parent != 0) + _STARPU_TRACE_BUBBLE_TASK_DEPS(j->task->bubble_parent, j); +#endif + + ret = _starpu_push_task(j); + + _STARPU_LOG_OUT(); + return ret; +} + +/* Tag deps are already fulfilled */ +unsigned _starpu_enforce_deps_starting_from_task(struct _starpu_job *j) +{ + unsigned ret; + + /* enforce task dependencies */ + if (_starpu_not_all_task_deps_are_fulfilled(j)) + { + STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex); + return 0; + } +#ifdef STARPU_BUBBLE + if (_starpu_turn_task_into_bubble(j)) + { + _STARPU_LOG_OUT_TAG("bubble"); + return 0; + } +#else + STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex); +#endif + +#ifdef STARPU_BUBBLE + if (j->is_bubble == 1) + { + _starpu_bubble_execute(j); + } + else +#endif + { + /* respect data concurrent access */ + if (_starpu_concurrent_data_access(j)) + return 0; + } + +#ifdef STARPU_BUBBLE + if (j->task->bubble_parent != 0) + _STARPU_TRACE_BUBBLE_TASK_DEPS(j->task->bubble_parent, j); +#endif + + ret = _starpu_push_task(j); + + return ret; +} + +#ifdef STARPU_OPENMP +/* When waking up a continuation, we only enforce new task dependencies */ +unsigned _starpu_reenforce_task_deps_and_schedule(struct _starpu_job *j) +{ + unsigned ret; + _STARPU_LOG_IN(); + STARPU_ASSERT(j->discontinuous); + + /* enforce task dependencies */ + if (_starpu_not_all_task_deps_are_fulfilled(j)) + { + STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex); + _STARPU_LOG_OUT_TAG("not_all_task_deps_are_fulfilled"); + return 0; + } + STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex); + ret = _starpu_push_task(j); + + _STARPU_LOG_OUT(); + return ret; +} +#endif + +unsigned _starpu_take_deps_and_schedule(struct _starpu_job *j) +{ + unsigned ret; + STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex); + + /* Take references */ + _starpu_submit_job_take_data_deps(j); + +#ifdef STARPU_BUBBLE + if (j->task->bubble_parent != 0) + _STARPU_TRACE_BUBBLE_TASK_DEPS(j->task->bubble_parent, j); +#endif + + /* And immediately push task */ + ret = _starpu_push_task(j); + + return ret; +} + +/* This is called when a tag or task dependency is to be released. */ +void _starpu_enforce_deps_notify_job_ready_soon(struct _starpu_job *j, _starpu_notify_job_start_data *data, int tag) +{ + if (!j->submitted) + /* It's not even submitted actually */ + return; + struct _starpu_cg_list *job_successors = &j->job_successors; + /* tag is 1 when we got woken up by a tag dependency about to be + * released, and thus we have to check the exact numbner of + * dependencies. Otherwise it's a task dependency which is about to be + * released. */ + if (job_successors->ndeps != job_successors->ndeps_completed + 1 - tag) + /* There are still other dependencies */ + return; + + _starpu_enforce_data_deps_notify_job_ready_soon(j, data); +} + +/* Ordered tasks are simply recorded as they arrive in the local_ordered_tasks + * ring buffer, indexed by order, and pulled from its head. */ +/* TODO: replace with perhaps a heap */ + +/* This function must be called with worker->sched_mutex taken */ +struct starpu_task *_starpu_pop_local_task(struct _starpu_worker *worker) +{ + struct starpu_task *task = NULL; + + if (worker->local_ordered_tasks_size) + { + task = worker->local_ordered_tasks[worker->current_ordered_task]; + if (task) + { + worker->local_ordered_tasks[worker->current_ordered_task] = NULL; + STARPU_ASSERT(task->workerorder == worker->current_ordered_task_order); + /* Next ordered task is there, return it */ + worker->current_ordered_task = (worker->current_ordered_task + 1) % worker->local_ordered_tasks_size; + worker->current_ordered_task_order++; + _starpu_pop_task_end(task); + return task; + } + } + + if (!starpu_task_prio_list_empty(&worker->local_tasks)) + task = starpu_task_prio_list_pop_front_highest(&worker->local_tasks); + + _starpu_pop_task_end(task); + return task; +} + +int _starpu_push_local_task(struct _starpu_worker *worker, struct starpu_task *task) +{ + /* Check that the worker is able to execute the task ! */ + STARPU_ASSERT(task && task->cl); + if (STARPU_UNLIKELY(!(worker->worker_mask & task->where))) + return -ENODEV; + + starpu_worker_lock(worker->workerid); + + if (task->execute_on_a_specific_worker && task->workerorder) + { + STARPU_ASSERT_MSG(task->workerorder >= worker->current_ordered_task_order, "worker order values must not have duplicates (%u pushed to worker %d, but %u already passed)", task->workerorder, worker->workerid, worker->current_ordered_task_order); + /* Put it in the ordered task ring */ + unsigned needed = task->workerorder - worker->current_ordered_task_order + 1; + if (worker->local_ordered_tasks_size < needed) + { + /* Increase the size */ + unsigned alloc = worker->local_ordered_tasks_size; + struct starpu_task **new; + + if (!alloc) + alloc = 1; + while (alloc < needed) + alloc *= 2; + _STARPU_MALLOC(new, alloc * sizeof(*new)); + + if (worker->local_ordered_tasks_size) + { + /* Put existing tasks at the beginning of the new ring */ + unsigned copied = worker->local_ordered_tasks_size - worker->current_ordered_task; + memcpy(new, &worker->local_ordered_tasks[worker->current_ordered_task], copied * sizeof(*new)); + memcpy(new + copied, worker->local_ordered_tasks, (worker->local_ordered_tasks_size - copied) * sizeof(*new)); + } + memset(new + worker->local_ordered_tasks_size, 0, (alloc - worker->local_ordered_tasks_size) * sizeof(*new)); + free(worker->local_ordered_tasks); + worker->local_ordered_tasks = new; + worker->local_ordered_tasks_size = alloc; + worker->current_ordered_task = 0; + } + worker->local_ordered_tasks[(worker->current_ordered_task + task->workerorder - worker->current_ordered_task_order) % worker->local_ordered_tasks_size] = task; + } + else + { + starpu_task_prio_list_push_back(&worker->local_tasks, task); + } + + starpu_wake_worker_locked(worker->workerid); + starpu_push_task_end(task); + starpu_worker_unlock(worker->workerid); + + return 0; +} diff --git a/src/core/jobs.h b/src/core/jobs.h new file mode 100644 index 0000000..92abef9 --- /dev/null +++ b/src/core/jobs.h @@ -0,0 +1,296 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Thibaut Lambert + * Copyright (C) 2011-2011 Télécom Sud Paris + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __JOBS_H__ +#define __JOBS_H__ + +/** @file */ + +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef HAVE_UNISTD_H +#include +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#pragma GCC visibility push(hidden) + +struct _starpu_worker; + +/** codelet function */ +typedef void (*_starpu_cl_func_t)(void **, void *); + +#define _STARPU_MAY_PERFORM(j, arch) ((j)->task->where & STARPU_##arch) + +struct _starpu_data_descr +{ + starpu_data_handle_t handle; + enum starpu_data_access_mode mode; + int orig_node; /** This is the original node in the codelet */ + int node; /** This is the value actually chosen, only set by + _starpu_fetch_task_input for coherency with + _starpu_fetch_task_input_tail and __starpu_push_task_output */ + int index; + + int orderedindex; /** For this field the array is actually indexed by + parameter order, and this provides the ordered + index */ +}; + +#ifdef STARPU_DEBUG +MULTILIST_CREATE_TYPE(_starpu_job, all_submitted) +#endif +/** A job is the internal representation of a task. */ +struct _starpu_job +{ + /** Each job is attributed a unique id. This however only defined when recording traces or using jobid-based task breakpoints */ + unsigned long job_id; + + /** The task associated to that job */ + struct starpu_task *task; + + /** A task that this will unlock quickly, e.g. we are the pre_sync part + * of a data acquisition, and the caller promised that data release will + * happen immediately, so that the post_sync task will be started + * immediately after. */ + struct _starpu_job *quick_next; + + /** These synchronization structures are used to wait for the job to be + * available or terminated for instance. */ + starpu_pthread_mutex_t sync_mutex; + starpu_pthread_cond_t sync_cond; + + /** To avoid deadlocks, we reorder the different buffers accessed to by + * the task so that we always grab the rw-lock associated to the + * handles in the same order. */ + struct _starpu_data_descr ordered_buffers[STARPU_NMAXBUFS]; + struct _starpu_task_wrapper_dlist dep_slots[STARPU_NMAXBUFS]; + struct _starpu_data_descr *dyn_ordered_buffers; + struct _starpu_task_wrapper_dlist *dyn_dep_slots; + + /** If a tag is associated to the job, this points to the internal data + * structure that describes the tag status. */ + struct _starpu_tag *tag; + + /** Maintain a list of all the completion groups that depend on the job. + * */ + struct _starpu_cg_list job_successors; + + /** Task whose termination depends on this task */ + struct starpu_task *end_rdep; + + /** For tasks with cl==NULL but submitted with explicit data dependency, + * the handle for this dependency, so as to remove the task from the + * last_writer/readers */ + starpu_data_handle_t implicit_dep_handle; + struct _starpu_task_wrapper_dlist implicit_dep_slot; + + /** Indicates whether the task associated to that job has already been + * submitted to StarPU (1) or not (0) (using starpu_task_submit). + * Becomes and stays 2 when the task is submitted several times. + * + * Protected by j->sync_mutex. + */ + unsigned submitted:2; + + /** Indicates whether the task associated to this job is terminated or + * not. + * + * Protected by j->sync_mutex. + */ + unsigned terminated:2; + +#ifdef STARPU_OPENMP + /** Job is a continuation or a regular task. */ + unsigned continuation; + + /** If 0, the prepared continuation is not resubmitted automatically + * when going to sleep, if 1, the prepared continuation is immediately + * resubmitted when going to sleep. */ + unsigned continuation_resubmit; + + /** Callback function called when: + * - The continuation starpu task is ready to be submitted again if + * continuation_resubmit = 0; + * - The continuation starpu task has just been re-submitted if + * continuation_resubmit = 1. */ + void (*continuation_callback_on_sleep)(void *arg); + void *continuation_callback_on_sleep_arg; + + void (*omp_cleanup_callback)(void *arg); + void *omp_cleanup_callback_arg; + + /** Job has been stopped at least once. */ + unsigned discontinuous; + + /** Cumulated execution time for discontinuous jobs */ + struct timespec cumulated_ts; + + /** Cumulated energy consumption for discontinuous jobs */ + double cumulated_energy_consumed; +#endif + + /** The value of the footprint that identifies the job may be stored in + * this structure. */ + uint32_t footprint; + unsigned footprint_is_computed:1; + + /** Should that task appear in the debug tools ? (eg. the DAG generated + * with dot) */ + unsigned exclude_from_dag:1; + + /** Is that task internal to StarPU? */ + unsigned internal:1; + /** Did that task use sequential consistency for its data? */ + unsigned sequential_consistency:1; + + /** During the reduction of a handle, StarPU may have to submit tasks to + * perform the reduction itself: those task should not be stalled while + * other tasks are blocked until the handle has been properly reduced, + * so we need a flag to differentiate them from "normal" tasks. */ + unsigned reduction_task:1; + + /** The implementation associated to the job */ + unsigned nimpl; + + /** Number of workers executing that task (>1 if the task is parallel) + * */ + int task_size; + + /** The worker the task is running on (or -1 when not running yet) */ + int workerid; + + /** In case we have assigned this job to a combined workerid */ + int combined_workerid; + + /** How many workers are currently running an alias of that job (for + * parallel tasks only). */ + int active_task_alias_count; + + struct bound_task *bound_task; + + /** Parallel workers may have to synchronize before/after the execution of a parallel task. */ + starpu_pthread_barrier_t before_work_barrier; + starpu_pthread_barrier_t after_work_barrier; + unsigned after_work_busy_barrier; + + struct _starpu_graph_node *graph_node; + +#ifdef STARPU_DEBUG + /** Linked-list of all jobs, for debugging */ + struct _starpu_job_multilist_all_submitted all_submitted; +#endif + +#ifdef STARPU_BUBBLE + int already_turned_into_bubble; + unsigned is_bubble:1; +#endif +}; + +#ifdef STARPU_DEBUG +MULTILIST_CREATE_INLINES(struct _starpu_job, _starpu_job, all_submitted) +#endif + +void _starpu_job_init(void); +void _starpu_job_fini(void); + +/** Create an internal struct _starpu_job *structure to encapsulate the task. */ +struct _starpu_job* _starpu_job_create(struct starpu_task *task) STARPU_ATTRIBUTE_MALLOC; + +/** Destroy the data structure associated to the job structure */ +void _starpu_job_destroy(struct _starpu_job *j); + +/** Test for the termination of the job */ +int _starpu_job_finished(struct _starpu_job *j); + +/** Wait for the termination of the job */ +void _starpu_wait_job(struct _starpu_job *j); + +#ifdef STARPU_OPENMP +/** Test for the termination of the job */ +int _starpu_test_job_termination(struct _starpu_job *j); + +/** Prepare the job for accepting new dependencies before becoming a continuation. */ + +void _starpu_job_prepare_for_continuation_ext(struct _starpu_job *j, unsigned continuation_resubmit, + void (*continuation_callback_on_sleep)(void *arg), void *continuation_callback_on_sleep_arg); +void _starpu_job_prepare_for_continuation(struct _starpu_job *j); +void _starpu_job_set_omp_cleanup_callback(struct _starpu_job *j, + void (*omp_cleanup_callback)(void *arg), void *omp_cleanup_callback_arg); +#endif + +/** Specify that the task should not appear in the DAG generated by debug tools. */ +void _starpu_exclude_task_from_dag(struct starpu_task *task); + +/** try to submit job j, enqueue it if it's not schedulable yet. The job's sync mutex is supposed to be held already */ +unsigned _starpu_enforce_deps_and_schedule(struct _starpu_job *j); +unsigned _starpu_enforce_deps_starting_from_task(struct _starpu_job *j); +#ifdef STARPU_OPENMP +/** When waking up a continuation, we only enforce new task dependencies */ +unsigned _starpu_reenforce_task_deps_and_schedule(struct _starpu_job *j); +#endif + +unsigned _starpu_take_deps_and_schedule(struct _starpu_job *j); +void _starpu_enforce_deps_notify_job_ready_soon(struct _starpu_job *j, _starpu_notify_job_start_data *data, int tag); + +/** Called at the submission of the job */ +void _starpu_handle_job_submission(struct _starpu_job *j); +/** This function must be called after the execution of a job, this triggers all + * job's dependencies and perform the callback function if any. */ +void _starpu_handle_job_termination(struct _starpu_job *j); + +/** Get the sum of the size of the data accessed by the job. */ +size_t _starpu_job_get_data_size(struct starpu_perfmodel *model, struct starpu_perfmodel_arch* arch, unsigned nimpl, struct _starpu_job *j); + +/** Get a task from the local pool of tasks that were explicitly attributed to + * that worker. */ +struct starpu_task *_starpu_pop_local_task(struct _starpu_worker *worker); + +/** Put a task into the pool of tasks that are explicitly attributed to the + * specified worker. */ +int _starpu_push_local_task(struct _starpu_worker *worker, struct starpu_task *task); + +#define _STARPU_JOB_GET_ORDERED_BUFFER_INDEX(job, i) ((job->dyn_ordered_buffers) ? job->dyn_ordered_buffers[i].index : job->ordered_buffers[i].index) +#define _STARPU_JOB_GET_ORDERED_BUFFER_HANDLE(job, i) ((job->dyn_ordered_buffers) ? job->dyn_ordered_buffers[i].handle : job->ordered_buffers[i].handle) +#define _STARPU_JOB_GET_ORDERED_BUFFER_MODE(job, i) ((job->dyn_ordered_buffers) ? job->dyn_ordered_buffers[i].mode : job->ordered_buffers[i].mode) +#define _STARPU_JOB_GET_ORDERED_BUFFER_NODE(job, i) ((job->dyn_ordered_buffers) ? job->dyn_ordered_buffers[i].node : job->ordered_buffers[i].node) +#define _STARPU_JOB_GET_ORDERED_BUFFER_ORIG_NODE(job, i) ((job->dyn_ordered_buffers) ? job->dyn_ordered_buffers[i].orig_node : job->ordered_buffers[i].orig_node) + +#define _STARPU_JOB_SET_ORDERED_BUFFER(job, buffer, i) do { if (job->dyn_ordered_buffers) job->dyn_ordered_buffers[i] = buffer; else job->ordered_buffers[i] = buffer;} while(0) +#define _STARPU_JOB_GET_ORDERED_BUFFERS(job) ((job->dyn_ordered_buffers) ? job->dyn_ordered_buffers : &job->ordered_buffers[0]) + +#define _STARPU_JOB_GET_DEP_SLOTS(job) (((job)->dyn_dep_slots) ? (job)->dyn_dep_slots : (job)->dep_slots) + +#pragma GCC visibility pop + +#endif // __JOBS_H__ diff --git a/src/core/parallel_task.c b/src/core/parallel_task.c new file mode 100644 index 0000000..d0615c2 --- /dev/null +++ b/src/core/parallel_task.c @@ -0,0 +1,65 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include +#include +#include + +struct starpu_task *starpu_task_dup(struct starpu_task *task) +{ + struct starpu_task *task_dup; + _STARPU_MALLOC(task_dup, sizeof(struct starpu_task)); + + /* TODO perhaps this is a bit too much overhead and we should only copy + * part of the structure ? */ + *task_dup = *task; + + return task_dup; +} + +void starpu_parallel_task_barrier_init_n(struct starpu_task* task, int worker_size) +{ + struct _starpu_job *j = _starpu_get_job_associated_to_task(task); + j->task_size = worker_size; + j->combined_workerid = -1; + j->active_task_alias_count = 0; + + //fprintf(stderr, "POP -> size %d best_size %d\n", worker_size, best_size); + + STARPU_PTHREAD_BARRIER_INIT(&j->before_work_barrier, NULL, worker_size); + STARPU_PTHREAD_BARRIER_INIT(&j->after_work_barrier, NULL, worker_size); + j->after_work_busy_barrier = worker_size; + + return; +} + +void starpu_parallel_task_barrier_init(struct starpu_task* task, int workerid) +{ + /* The master needs to dispatch the task between the + * different combined workers */ + struct _starpu_combined_worker *combined_worker = _starpu_get_combined_worker_struct(workerid); + int worker_size = combined_worker->worker_size; + struct _starpu_job *j = _starpu_get_job_associated_to_task(task); + + starpu_parallel_task_barrier_init_n(task, worker_size); + + j->combined_workerid = workerid; +} + diff --git a/src/core/perfmodel/energy_model.c b/src/core/perfmodel/energy_model.c new file mode 100644 index 0000000..f401be8 --- /dev/null +++ b/src/core/perfmodel/energy_model.c @@ -0,0 +1,299 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#ifdef STARPU_PAPI +#include +#endif +#ifdef STARPU_HAVE_HWLOC +#include +#endif +#include +#include +#include +#include +#ifdef HAVE_UNISTD_H +#include +#endif +#include +#include +#include +#include +#include +#include + +/* Driver porters: adding your driver here is optional, only needed for the support of energy profiling. */ +#ifdef STARPU_USE_CUDA +#ifdef HAVE_NVMLDEVICEGETTOTALENERGYCONSUMPTION +#include +#include +#include +#endif +#endif + +#define ERROR_RETURN(retval, function) do { PAPI_perror(function); fprintf(stderr, "Error %d %s:line %d\n", retval,__FILE__,__LINE__); return(retval); } while (0) + +#if 0 +#define debug(fmt, ...) printf(fmt, ## __VA_ARGS__) +#else +#define debug(fmt, ...) +#endif + +#ifdef STARPU_PAPI +#ifdef STARPU_HAVE_HWLOC +static const int N_EVTS = 2; +static int n_recorded; + +static int nsockets; + +static const char* event_names[] = +{ + "rapl::RAPL_ENERGY_PKG:cpu=%d", + "rapl::RAPL_ENERGY_DRAM:cpu=%d" +}; + +static int add_event(int EventSet, int socket); + +/* PAPI variables*/ + +/*must be initialized to PAPI_NULL before calling PAPI_create_event*/ +static int EventSet = PAPI_NULL; +#endif +#endif + +static double t1; + +#ifdef STARPU_USE_CUDA +#ifdef HAVE_NVMLDEVICEGETTOTALENERGYCONSUMPTION +static unsigned long long energy_begin, energy_end; +static nvmlDevice_t device; +#endif +#endif + +int starpu_energy_start(int workerid STARPU_ATTRIBUTE_UNUSED, enum starpu_worker_archtype archi) +{ + t1 = starpu_timing_now(); + + /* Driver porters: adding your driver here is optional, only needed for the support of energy measurement. */ + + switch (archi) + { +#ifdef STARPU_PAPI +#ifdef STARPU_HAVE_HWLOC + case STARPU_CPU_WORKER: + { + STARPU_ASSERT_MSG(workerid == -1, "For CPUs we cannot measure each worker separately, use where = STARPU_CPU and leave workerid as -1\n"); + + int retval, number; + + struct _starpu_machine_config *config = _starpu_get_machine_config(); + hwloc_topology_t topology = config->topology.hwtopology; + + nsockets = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_PACKAGE); + + if ((retval = PAPI_library_init(PAPI_VER_CURRENT)) != PAPI_VER_CURRENT) + ERROR_RETURN(retval, "PAPI_library_init"); + + /* Creating the eventset */ + if ((retval = PAPI_create_eventset(&EventSet)) != PAPI_OK) + ERROR_RETURN(retval, "PAPI_create_eventset"); + + int i; + for (i = 0 ; i < nsockets ; i ++) + { + /* return the index of socket */ + hwloc_obj_t obj = hwloc_get_obj_by_type(topology, HWLOC_OBJ_PACKAGE, i); + STARPU_ASSERT(obj); + if ((retval = add_event(EventSet, obj->os_index)) != PAPI_OK) + { + if (retval == PAPI_EPERM) + _STARPU_DISP("PAPI could not access counters due to permissions errors. Perhaps your system requires to run measurements as root?\n"); + else if (retval == PAPI_ENOEVNT) + _STARPU_DISP("PAPI could not access counters. Perhaps your system requires to run measurements as root?\n"); + ERROR_RETURN(retval, "PAPI_add_named_event"); + } + } + + /* get the number of events in the event set */ + number = 0; + if ((retval = PAPI_list_events(EventSet, NULL, &number)) != PAPI_OK) + ERROR_RETURN(retval, "PAPI_list_events"); + + debug("There are %d events in the event set\n", number); + + /* Start counting */ + if ((retval = PAPI_start(EventSet)) != PAPI_OK) + ERROR_RETURN(retval, "PAPI_start"); + + return retval; + } +#endif +#endif + +#ifdef HAVE_NVMLDEVICEGETTOTALENERGYCONSUMPTION + case STARPU_CUDA_WORKER: + { + if (!_starpu_nvmlDeviceGetHandleByIndex || !_starpu_nvmlDeviceGetTotalEnergyConsumption) + return -1; + + STARPU_ASSERT_MSG(workerid != -1, "For CUDA GPUs we measure each GPU separately, please specify a worker\n"); + int devid = starpu_worker_get_devid(workerid); + int ret = _starpu_nvmlDeviceGetHandleByIndex(devid, &device); + if (ret != NVML_SUCCESS) + { + _STARPU_DISP("Could not get CUDA device %d from nvml\n", devid); + return -1; + } + ret = _starpu_nvmlDeviceGetTotalEnergyConsumption(device, &energy_begin); + if (ret != NVML_SUCCESS) + { + _STARPU_DISP("Could not measure energy used by CUDA device %d\n", devid); + return -1; + } + return 0; + } + break; +#endif + + default: + printf("Error: worker is not supported ! \n"); + return -1; + } +} + +int starpu_energy_stop(struct starpu_perfmodel *model, struct starpu_task *task, unsigned nimpl, unsigned ntasks, int workerid, enum starpu_worker_archtype archi) +{ + double energy = 0.; + int retval = 0; + unsigned cpuid = 0; + double t2 = starpu_timing_now(); + double t STARPU_ATTRIBUTE_UNUSED = t2 - t1; + + switch (archi) + { +#ifdef STARPU_PAPI +#ifdef STARPU_HAVE_HWLOC + case STARPU_CPU_WORKER: + { + STARPU_ASSERT_MSG(workerid == -1, "For CPUs we cannot measure each worker separately, use where = STARPU_CPU and leave workerid as -1\n"); + + /*This is where we store the values we read from the eventset */ + long long values[nsockets*n_recorded]; + + /* Stop counting and store the values into the array */ + if ((retval = PAPI_stop(EventSet, values)) != PAPI_OK) + ERROR_RETURN(retval, "PAPI_stop"); + + int k,s; + + for(s = 0 ; s < nsockets ; s ++) + { + for(k = 0 ; k < n_recorded; k++) + { + double delta = values[s * n_recorded + k]*0.23/1.0e9; + energy += delta; + + debug("%-40s%12.6f J\t(for %f us, Average Power %.1fW)\n", + event_names[k], + delta, t, delta/(t*1.0E-6)); + } + } + + /*removes all events from a PAPI event set */ + if ((retval = PAPI_cleanup_eventset(EventSet)) != PAPI_OK) + ERROR_RETURN(retval, "PAPI_cleanup_eventset"); + + /*deallocates the memory associated with an empty PAPI EventSet*/ + if ((retval = PAPI_destroy_eventset(&EventSet)) != PAPI_OK) + ERROR_RETURN(retval, "PAPI_destroy_eventset"); + + break; + } +#endif +#endif + +#ifdef HAVE_NVMLDEVICEGETTOTALENERGYCONSUMPTION + case STARPU_CUDA_WORKER: + { + if (!_starpu_nvmlDeviceGetTotalEnergyConsumption) + return -1; + + STARPU_ASSERT_MSG(workerid != -1, "For CUDA GPUs we measure each GPU separately, please specify a worker\n"); + int ret = _starpu_nvmlDeviceGetTotalEnergyConsumption(device, &energy_end); + if (ret != NVML_SUCCESS) + return -1; + energy = (energy_end - energy_begin) / 1000.; + debug("energy consumption on device %d is %f mJ (for %f us, Average power %0.1fW)\n", 0, energy * 1000., t, energy / (t*1.0E-6)); + break; + } +#endif + + default: + { + printf("Error: worker type %d is not supported! \n", archi); + return -1; + } + } + + + struct starpu_perfmodel_arch *arch; + if (workerid == -1) + /* Just take one of them */ + workerid = starpu_worker_get_by_type(archi, 0); + + arch = starpu_worker_get_perf_archtype(workerid, STARPU_NMAX_SCHED_CTXS); + + starpu_perfmodel_update_history_n(model, task, arch, cpuid, nimpl, energy / ntasks, ntasks); + + return retval; +} + +#ifdef STARPU_PAPI +#ifdef STARPU_HAVE_HWLOC +static int add_event(int eventSet, int socket) +{ + int retval, i; + for (i = 0; i < N_EVTS; i++) + { + char buf[255]; + snprintf(buf, sizeof(buf), event_names[i], socket); + + /* printf("Activating multiplex\n"); */ + /* retval = PAPI_set_multiplex(eventSet); */ + /* if(retval != PAPI_OK) { */ + /* _STARPU_DISP("cannot set multiplex\n"); */ + /* return retval; */ + /* } */ + retval = PAPI_add_named_event(eventSet, buf); + if (retval != PAPI_OK) + { + if (!strcmp(event_names[i], "rapl::RAPL_ENERGY_DRAM:cpu=%d")) + { + /* Ok, too bad */ + _STARPU_DISP("Note: DRAM energy measurement not available\n"); + n_recorded = i; + return PAPI_OK; + } + _STARPU_DISP("cannot add event '%s': %d\n", buf, retval); + return retval; + } + } + + n_recorded = i; + return(PAPI_OK); +} +#endif +#endif diff --git a/src/core/perfmodel/multiple_regression.c b/src/core/perfmodel/multiple_regression.c new file mode 100644 index 0000000..fe69d8d --- /dev/null +++ b/src/core/perfmodel/multiple_regression.c @@ -0,0 +1,375 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2018-2018 Umeà University + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* Code for computing multiple linear regression */ + +#include + +typedef long int integer; +typedef double doublereal; + +#ifdef STARPU_MLR_MODEL +#ifdef STARPU_BUILT_IN_MIN_DGELS +int _starpu_dgels_(char *trans, integer *m, integer *n, integer *nrhs, doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal *work, integer *lwork, integer *info); +#else +int dgels_(char *trans, integer *m, integer *n, integer *nrhs, doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal *work, integer *lwork, integer *info); +#endif +#endif //STARPU_MLR_MODEL + +static unsigned long count_file_lines(FILE *f) +{ + unsigned long lines=0; + while(!feof(f)) + { + int ch = fgetc(f); + if(ch == '\n') + { + lines++; + } + } + rewind(f); + + return lines; +} + +static void dump_multiple_regression_list(double *mpar, double *my, int start, unsigned nparameters, struct starpu_perfmodel_history_list *list_history) +{ + struct starpu_perfmodel_history_list *ptr = list_history; + int i = start; + unsigned j; + while (ptr) + { + my[i] = ptr->entry->duration; + for(j=0; jentry->parameters[j]; + ptr = ptr->next; + i++; + } + +} + +static void load_old_calibration(double *mx, double *my, unsigned nparameters, char *filepath) +{ + char buffer[1024]; + char *line; + int i=0; + + FILE *f = fopen(filepath, "a+"); + STARPU_ASSERT_MSG(f, "Could not load performance model from file %s\n", filepath); + + line = fgets(buffer,sizeof(buffer),f);//skipping first line + STARPU_ASSERT(line); + while((line=fgets(buffer,sizeof(buffer),f))!=NULL) + { + char *record = strtok(line,","); + STARPU_ASSERT_MSG(record, "Could not load performance model from file %s\n", filepath); + my[i] = atof(record); + record = strtok(NULL,","); + int j=0; + while(record != NULL) + { + mx[i*nparameters+j] = atof(record) ; + ++j; + record = strtok(NULL,","); + } + ++i ; + } + + fclose(f); +} + +static unsigned long find_long_list_size(struct starpu_perfmodel_history_list *list_history) +{ + long cnt = 0; + + struct starpu_perfmodel_history_list *ptr = list_history; + while (ptr) + { + cnt++; + ptr = ptr->next; + } + + return cnt; +} + +#ifdef STARPU_MLR_MODEL +int dgels_multiple_reg_coeff(double *mpar, double *my, unsigned long nn, unsigned ncoeff, unsigned nparameters, double *coeff, unsigned **combinations) +{ +/* Arguments */ +/* ========= */ + +/* TRANS (input) CHARACTER*1 */ +/* = 'N': the linear system involves A; */ +/* = 'T': the linear system involves A**T. */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix A. M >= 0. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix A. N >= 0. */ + +/* NRHS (input) INTEGER */ +/* The number of right hand sides, i.e., the number of */ +/* columns of the matrices B and X. NRHS >=0. */ + +/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ +/* On entry, the M-by-N matrix A. */ +/* On exit, */ +/* if M >= N, A is overwritten by details of its QR */ +/* factorization as returned by DGEQRF; */ +/* if M < N, A is overwritten by details of its LQ */ +/* factorization as returned by DGELQF. */ + +/* LDA (input) INTEGER */ +/* The leading dimension of the array A. LDA >= max(1,M). */ + +/* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* On entry, the matrix B of right hand side vectors, stored */ +/* columnwise; B is M-by-NRHS if TRANS = 'N', or N-by-NRHS */ +/* if TRANS = 'T'. */ +/* On exit, if INFO = 0, B is overwritten by the solution */ +/* vectors, stored columnwise: */ +/* if TRANS = 'N' and m >= n, rows 1 to n of B contain the least */ +/* squares solution vectors; the residual sum of squares for the */ +/* solution in each column is given by the sum of squares of */ +/* elements N+1 to M in that column; */ +/* if TRANS = 'N' and m < n, rows 1 to N of B contain the */ +/* minimum norm solution vectors; */ +/* if TRANS = 'T' and m >= n, rows 1 to M of B contain the */ +/* minimum norm solution vectors; */ +/* if TRANS = 'T' and m < n, rows 1 to M of B contain the */ +/* least squares solution vectors; the residual sum of squares */ +/* for the solution in each column is given by the sum of */ +/* squares of elements M+1 to N in that column. */ + +/* LDB (input) INTEGER */ +/* The leading dimension of the array B. LDB >= MAX(1,M,N). */ + +/* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ + +/* LWORK (input) INTEGER */ +/* The dimension of the array WORK. */ +/* LWORK >= max(1, MN + max(MN, NRHS)). */ +/* For optimal performance, */ +/* LWORK >= max(1, MN + max(MN, NRHS) * NB). */ +/* where MN = min(M,N) and NB is the optimum block size. */ + +/* If LWORK = -1, then a workspace query is assumed; the routine */ +/* only calculates the optimal size of the WORK array, returns */ +/* this value as the first entry of the WORK array, and no error */ +/* message related to LWORK is issued by XERBLA. */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > 0: if INFO = i, the i-th diagonal element of the */ +/* triangular factor of A is zero, so that A does not have */ +/* full rank; the least squares solution could not be */ +/* computed. */ + +/* ===================================================================== */ + + if(nn <= ncoeff) + { + _STARPU_DISP("Warning: This function is not intended for the use when number of parameters is larger than the number of observations. Check how your matrices A and B were allocated or simply add more benchmarks.\n Multiple linear regression model will not be written into perfmodel file.\n"); + return 1; + } + + char trans = 'N'; + integer m = nn; + integer n = ncoeff; + integer nrhs = 1; // number of columns of B and X (which are vectors therefore nrhs=1) + doublereal *X; + _STARPU_MALLOC(X, sizeof(double)*n*m); // (/!\ modified at the output) contain the model and the different values of pararmters + doublereal *Y; + _STARPU_MALLOC(Y, sizeof(double)*m); + + double coefficient; + int i, j; + unsigned k; + for (i=0; i < m; i++) + { + Y[i] = my[i]; + X[i] = 1.; + for (j=1; j < n; j++) + { + coefficient = 1.; + for(k=0; k < nparameters; k++) + { + coefficient *= pow(mpar[i*nparameters+k],combinations[j-1][k]); + } + X[i+j*m] = coefficient; + } + } + + integer lda = m; + integer ldb = m; // + integer info = 0; + + integer lwork = n*2; + doublereal *work; // (output) + _STARPU_MALLOC(work, sizeof(double)*lwork); + + /* // Running LAPACK dgels_ */ +#ifdef STARPU_BUILT_IN_MIN_DGELS + _starpu_dgels_(&trans, &m, &n, &nrhs, X, &lda, Y, &ldb, work, &lwork, &info); +#else + dgels_(&trans, &m, &n, &nrhs, X, &lda, Y, &ldb, work, &lwork, &info); +#endif + + /* Check for the full rank */ + if(info != 0) + { + _STARPU_DISP("Warning: Problems when executing dgels_ function. It seems like the diagonal element %ld is zero.\n Multiple linear regression model will not be written into perfmodel file.\n", info); + free(X); + free(Y); + free(work); + return 1; + } + + /* Copy computed coefficients */ + for(i=0; i<(int) ncoeff; i++) + coeff[i] = Y[i]; + + free(X); + free(Y); + free(work); + + return 0; +} +#endif //STARPU_MLR_MODEL + +/* + * Validating the accuracy of the coefficients. + * For the the validation is extremely basic, but it should be improved. +*/ +void starpu_validate_mlr(double *coeff, unsigned ncoeff, const char *codelet_name) +{ + unsigned i; + if (coeff[0] < 0) + _STARPU_DISP("Warning: Constant computed by least square method is negative (%f). The model %s is likely to be inaccurate.\n", coeff[0], codelet_name); + + for(i=1; i 0) + load_old_calibration(mpar, my, nparameters, filepath); + + /* Filling X and Y matrices with measured values */ + dump_multiple_regression_list(mpar, my, old_lines, nparameters, ptr); + + if (ncoeff!=0 && combinations!=NULL) + { +#ifdef STARPU_MLR_MODEL + /* Computing coefficients using multiple linear regression */ + if(dgels_multiple_reg_coeff(mpar, my, n, ncoeff, nparameters, coeff, combinations)) + { + free(mpar); + free(my); + return 1; + } + /* Basic validation of the model accuracy */ + starpu_validate_mlr(coeff, ncoeff, codelet_name); +#else + _STARPU_DISP("Warning: StarPU was compiled without '--enable-mlr' option, thus multiple linear regression model will not be computed.\n"); + for(i=0; i 0) + { + f = fopen(filepath, "a+"); + STARPU_ASSERT_MSG(f, "Could not save performance model into the file %s\n", filepath); + } + else + { + f = fopen(filepath, "w+"); + STARPU_ASSERT_MSG(f, "Could not save performance model into the file %s\n", filepath); + fprintf(f, "Duration"); + for(j=0; j +#include +#include +#include +#include + +#pragma GCC visibility push(hidden) + +int _starpu_multiple_regression(struct starpu_perfmodel_history_list *ptr, double *coeff, unsigned ncoeff, unsigned nparameters, const char **parameters_names, unsigned **combinations, const char *codelet_name); + +#pragma GCC visibility pop + +#endif // __MULTIPLE_REGRESSION_H__ diff --git a/src/core/perfmodel/perfmodel.c b/src/core/perfmodel/perfmodel.c new file mode 100644 index 0000000..6a68eb8 --- /dev/null +++ b/src/core/perfmodel/perfmodel.c @@ -0,0 +1,972 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2016-2016 Uppsala University + * Copyright (C) 2013-2013 Thibaut Lambert + * Copyright (C) 2011-2011 Télécom Sud Paris + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#ifdef HAVE_UNISTD_H +#include +#endif +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef STARPU_HAVE_WINDOWS +#include +#endif + +static int _starpu_expected_transfer_time_writeback; + +void _starpu_init_perfmodel(void) +{ + _starpu_expected_transfer_time_writeback = starpu_getenv_number_default("STARPU_EXPECTED_TRANSFER_TIME_WRITEBACK", 0); +} + +/* This flag indicates whether performance models should be calibrated or not. + * 0: models need not be calibrated + * 1: models must be calibrated + * 2: models must be calibrated, existing models are overwritten. + */ +static unsigned calibrate_flag = 0; +void _starpu_set_calibrate_flag(unsigned val) +{ + calibrate_flag = val; +} + +unsigned _starpu_get_calibrate_flag(void) +{ + return calibrate_flag; +} + +struct starpu_perfmodel_arch* starpu_worker_get_perf_archtype(int workerid, unsigned sched_ctx_id) +{ + STARPU_ASSERT(workerid>=0); + + if(sched_ctx_id != STARPU_NMAX_SCHED_CTXS) + { + unsigned child_sched_ctx = starpu_sched_ctx_worker_is_master_for_child_ctx(workerid, sched_ctx_id); + if(child_sched_ctx != STARPU_NMAX_SCHED_CTXS) + return _starpu_sched_ctx_get_perf_archtype(child_sched_ctx); + struct _starpu_sched_ctx *stream_ctx = _starpu_worker_get_ctx_stream(workerid); + if(stream_ctx != NULL) + return _starpu_sched_ctx_get_perf_archtype(stream_ctx->id); + } + + struct _starpu_machine_config *config = _starpu_get_machine_config(); + + /* This workerid may either be a basic worker or a combined worker */ + unsigned nworkers = config->topology.nworkers; + + if (workerid < (int)config->topology.nworkers) + return &config->workers[workerid].perf_arch; + + + /* We have a combined worker */ + unsigned ncombinedworkers = config->topology.ncombinedworkers; + STARPU_ASSERT(workerid < (int)(ncombinedworkers + nworkers)); + return &config->combined_workers[workerid - nworkers].perf_arch; +} + +/* + * PER WORKER model + */ + +static double per_worker_task_expected_perf(struct starpu_perfmodel *model, unsigned workerid, struct starpu_task *task, unsigned nimpl) +{ + double (*worker_cost_function)(struct starpu_task *task, unsigned workerid, unsigned nimpl); + + worker_cost_function = model->worker_cost_function; + STARPU_ASSERT_MSG(worker_cost_function, "STARPU_PER_WORKER needs worker_cost_function to be defined"); + + return worker_cost_function(task, workerid, nimpl); +} + +/* + * PER ARCH model + */ + +static double per_arch_task_expected_perf(struct starpu_perfmodel *model, struct starpu_perfmodel_arch * arch, struct starpu_task *task, unsigned nimpl) +{ + int comb; + double (*per_arch_cost_function)(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl); + + if (model->arch_cost_function) + return model->arch_cost_function(task, arch, nimpl); + + comb = starpu_perfmodel_arch_comb_get(arch->ndevices, arch->devices); + STARPU_ASSERT_MSG(comb != -1, "Didn't find the proper arch combination\n"); + STARPU_ASSERT_MSG(model->state->per_arch[comb] != NULL, "STARPU_PER_ARCH needs per-arch cost_function to be defined"); + + per_arch_cost_function = model->state->per_arch[comb][nimpl].cost_function; + STARPU_ASSERT_MSG(per_arch_cost_function, "STARPU_PER_ARCH needs per-arch cost_function to be defined"); + + return per_arch_cost_function(task, arch, nimpl); +} + +/* + * Common model + */ + +double starpu_worker_get_relative_speedup(struct starpu_perfmodel_arch* perf_arch) +{ + double speedup = 0; + int dev; + for(dev = 0; dev < perf_arch->ndevices; dev++) + { + enum starpu_worker_archtype archtype = perf_arch->devices[dev].type; + double coef = starpu_driver_info[archtype].alpha; + speedup += coef * (perf_arch->devices[dev].ncores); + } + return speedup; +} + +static double common_task_expected_perf(struct starpu_perfmodel *model, struct starpu_perfmodel_arch* arch, struct starpu_task *task, unsigned nimpl) +{ + double exp; + double alpha; + + STARPU_ASSERT_MSG(model->cost_function, "STARPU_COMMON requires common cost_function to be defined"); + + exp = model->cost_function(task, nimpl); + alpha = starpu_worker_get_relative_speedup(arch); + + STARPU_ASSERT(!_STARPU_IS_ZERO(alpha)); + + return exp/alpha; +} + +void _starpu_init_and_load_perfmodel(struct starpu_perfmodel *model) +{ + if (!model || model->is_loaded) + return; + + starpu_perfmodel_init(model); + + if (model->is_loaded) + return; + + switch (model->type) + { + case STARPU_PER_WORKER: + case STARPU_PER_ARCH: + case STARPU_COMMON: + /* Nothing more to do than init */ + break; + case STARPU_HISTORY_BASED: + case STARPU_NL_REGRESSION_BASED: + _starpu_load_history_based_model(model, 1); + break; + case STARPU_REGRESSION_BASED: + case STARPU_MULTIPLE_REGRESSION_BASED: + _starpu_load_history_based_model(model, 0); + break; + + default: + STARPU_ABORT(); + } + + model->is_loaded = 1; +} + +static double starpu_model_expected_perf(struct starpu_task *task, struct starpu_perfmodel *model, struct starpu_perfmodel_arch* arch, unsigned nimpl) +{ + double exp_perf = 0.0; + if (model) + { + _starpu_init_and_load_perfmodel(model); + + struct _starpu_job *j = _starpu_get_job_associated_to_task(task); + + switch (model->type) + { + case STARPU_PER_ARCH: + exp_perf = per_arch_task_expected_perf(model, arch, task, nimpl); + STARPU_ASSERT_MSG(isnan(exp_perf)||exp_perf>=0,"exp_perf=%lf\n",exp_perf); + break; + case STARPU_COMMON: + exp_perf = common_task_expected_perf(model, arch, task, nimpl); + STARPU_ASSERT_MSG(isnan(exp_perf)||exp_perf>=0,"exp_perf=%lf\n",exp_perf); + break; + case STARPU_HISTORY_BASED: + exp_perf = _starpu_history_based_job_expected_perf(model, arch, j, nimpl); + STARPU_ASSERT_MSG(isnan(exp_perf)||exp_perf>=0,"exp_perf=%lf\n",exp_perf); + break; + case STARPU_REGRESSION_BASED: + exp_perf = _starpu_regression_based_job_expected_perf(model, arch, j, nimpl); + STARPU_ASSERT_MSG(isnan(exp_perf)||exp_perf>=0,"exp_perf=%lf\n",exp_perf); + break; + case STARPU_NL_REGRESSION_BASED: + exp_perf = _starpu_non_linear_regression_based_job_expected_perf(model, arch, j,nimpl); + STARPU_ASSERT_MSG(isnan(exp_perf)||exp_perf>=0,"exp_perf=%lf\n",exp_perf); + break; + case STARPU_MULTIPLE_REGRESSION_BASED: + exp_perf = _starpu_multiple_regression_based_job_expected_perf(model, arch, j, nimpl); + STARPU_ASSERT_MSG(isnan(exp_perf)||exp_perf>=0,"exp_perf=%lf\n",exp_perf); + break; + default: + STARPU_ABORT(); + } + } + + /* no model was found */ + return exp_perf; +} + +static double starpu_model_worker_expected_perf(struct starpu_task *task, struct starpu_perfmodel *model, unsigned workerid, unsigned sched_ctx_id, unsigned nimpl) +{ + if (!model) + return 0.0; + + if (model->type == STARPU_PER_WORKER) + return per_worker_task_expected_perf(model, workerid, task, nimpl); + else + { + struct starpu_perfmodel_arch *per_arch = starpu_worker_get_perf_archtype(workerid, sched_ctx_id); + return starpu_model_expected_perf(task, model, per_arch, nimpl); + } +} + +double starpu_task_expected_length(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl) +{ + if (!task->cl) + /* Tasks without codelet don't actually take time */ + return 0.0; + return starpu_model_expected_perf(task, task->cl->model, arch, nimpl); +} + +double starpu_task_worker_expected_length(struct starpu_task *task, unsigned workerid, unsigned sched_ctx_id, unsigned nimpl) +{ + if (!task->cl) + /* Tasks without codelet don't actually take time */ + return 0.0; + return starpu_model_worker_expected_perf(task, task->cl->model, workerid, sched_ctx_id, nimpl); +} + +double starpu_task_expected_length_average(struct starpu_task *task, unsigned sched_ctx_id) +{ + if (!task->cl) + /* Tasks without codelet don't actually take time */ + return 0.0; + + struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id); + double harmsum = 0.0; + unsigned n = 0; + + struct starpu_sched_ctx_iterator it; + workers->init_iterator_for_parallel_tasks(workers, &it, task); + while(workers->has_next(workers, &it)) + { + unsigned nimpl; + unsigned impl_mask; + unsigned workerid = workers->get_next(workers, &it); + + if (!starpu_worker_can_execute_task_impl(workerid, task, &impl_mask)) + continue; + + double best_expected = DBL_MAX; + for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++) + { + if (!(impl_mask & (1U << nimpl))) + { + /* no one on that queue may execute this task */ + continue; + } + + double expected = starpu_task_worker_expected_length(task, workerid, sched_ctx_id, nimpl); + if (expected < best_expected) + best_expected = expected; + } + harmsum += 1. / best_expected; + n++; + } + + return n/harmsum; +} + +double starpu_task_expected_energy(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl) +{ + if (!task->cl) + /* Tasks without codelet don't actually take time */ + return 0.0; + return starpu_model_expected_perf(task, task->cl->energy_model, arch, nimpl); +} + +double starpu_task_worker_expected_energy(struct starpu_task *task, unsigned workerid, unsigned sched_ctx_id, unsigned nimpl) +{ + if (!task->cl) + /* Tasks without codelet don't actually take time */ + return 0.0; + return starpu_model_worker_expected_perf(task, task->cl->energy_model, workerid, sched_ctx_id, nimpl); + +} + +double starpu_task_expected_energy_average(struct starpu_task *task, unsigned sched_ctx_id) +{ + if (!task->cl) + /* Tasks without codelet don't actually take time */ + return 0.0; + + struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id); + double harmsum = 0.0; + unsigned n = 0; + + struct starpu_sched_ctx_iterator it; + workers->init_iterator_for_parallel_tasks(workers, &it, task); + while(workers->has_next(workers, &it)) + { + unsigned nimpl; + unsigned impl_mask; + unsigned workerid = workers->get_next(workers, &it); + + if (!starpu_worker_can_execute_task_impl(workerid, task, &impl_mask)) + continue; + + double best_expected = DBL_MAX; + for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++) + { + if (!(impl_mask & (1U << nimpl))) + { + /* no one on that queue may execute this task */ + continue; + } + + double expected = starpu_task_worker_expected_energy(task, workerid, sched_ctx_id, nimpl); + if (expected < best_expected) + best_expected = expected; + } + harmsum += 1. / best_expected; + n++; + } + + return n/harmsum; +} + +double starpu_task_expected_conversion_time(struct starpu_task *task, + struct starpu_perfmodel_arch* arch, + unsigned nimpl) +{ + unsigned i; + double sum = 0.0; + unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task); + +#ifdef STARPU_DEVEL +#warning TODO: conversion time with combined arch perfmodel +#endif + STARPU_ASSERT_MSG(arch->ndevices == 1, "TODO"); + + for (i = 0; i < nbuffers; i++) + { + starpu_data_handle_t handle; + struct starpu_task *conversion_task; + enum starpu_node_kind node_kind; + + handle = STARPU_TASK_GET_HANDLE(task, i); + if (!_starpu_data_is_multiformat_handle(handle)) + continue; + + node_kind = starpu_worker_get_memory_node_kind(arch->devices[0].type); + if (!_starpu_handle_needs_conversion_task_for_arch(handle, node_kind)) + continue; + + conversion_task = _starpu_create_conversion_task_for_arch(handle, node_kind); + sum += starpu_task_expected_length(conversion_task, arch, nimpl); + _starpu_spin_lock(&handle->header_lock); + handle->refcnt--; + handle->busy_count--; + if (!_starpu_data_check_not_busy(handle)) + _starpu_spin_unlock(&handle->header_lock); + starpu_task_clean(conversion_task); + free(conversion_task); + } + + return sum; +} + +/* Predict the transfer time (in µs) to move a handle between memory nodes */ +static double _starpu_data_expected_transfer_time(starpu_data_handle_t handle, unsigned src_node, unsigned dst_node, enum starpu_data_access_mode mode, size_t size) +{ + double duration = 0.; +#define MAX_REQUESTS 4 + unsigned src_nodes[MAX_REQUESTS]; + unsigned dst_nodes[MAX_REQUESTS]; + unsigned handling_nodes[MAX_REQUESTS]; + int nhops = _starpu_determine_request_path(handle, src_node, dst_node, mode, + MAX_REQUESTS, + src_nodes, dst_nodes, handling_nodes, 0); + int i; + + for (i = 0; i < nhops; i++) + duration += starpu_transfer_predict(src_nodes[i], dst_nodes[i], size); + + return duration; +} + +/* Predict the transfer time (in µs) to move a handle to a memory node */ +double starpu_data_expected_transfer_time(starpu_data_handle_t handle, unsigned memory_node, enum starpu_data_access_mode mode) +{ + /* FIXME: Fix write-only mode with _starpu_expected_transfer_time_writeback */ + /* FIXME: count time_writeback only if the data is not dirty. Once it is dirty, we shouldn't + * count the writeback penalty again. */ + + /* If we don't need to read the content of the handle */ + if (!(mode & STARPU_R)) + return 0.0; + + if (starpu_data_is_on_node(handle, memory_node)) + return 0.0; + + size_t size = _starpu_data_get_size(handle); + + /* XXX in case we have an abstract piece of data (eg. with the + * void interface, this does not introduce any overhead, and we + * don't even want to consider the latency that is not + * relevant). */ + if (size == 0) + return 0.0; + + double duration = 0.; + + _starpu_spin_lock(&handle->header_lock); + int src_node = _starpu_select_src_node(handle, memory_node); + _starpu_spin_unlock(&handle->header_lock); + if (src_node >= 0) + { + duration += _starpu_data_expected_transfer_time(handle, src_node, memory_node, mode, size); + } + /* Else, will just create it in place. Ideally we should take the + * time to create it into account */ + + if (_starpu_expected_transfer_time_writeback && (mode & STARPU_W) && handle->home_node >= 0) + { + /* Will have to write back the produced data, artificially count + * the time to bring it back to its home node */ + duration += _starpu_data_expected_transfer_time(handle, memory_node, handle->home_node, STARPU_R, size); + } + + return duration; +} + +/* Data transfer performance modeling */ +double starpu_task_expected_data_transfer_time(unsigned memory_node, struct starpu_task *task) +{ + unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task); + unsigned buffer; + + double penalty = 0.0; + + for (buffer = 0; buffer < nbuffers; buffer++) + { + starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, buffer); + enum starpu_data_access_mode mode = STARPU_TASK_GET_MODE(task, buffer); + int node = _starpu_task_data_get_node_on_node(task, buffer, memory_node); + + if (node >= 0) + penalty += starpu_data_expected_transfer_time(handle, node, mode); + } + + return penalty; +} + +/* Data transfer performance modeling */ +double starpu_task_expected_data_transfer_time_for(struct starpu_task *task, unsigned worker) +{ + unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task); + unsigned buffer; + + double penalty = 0.0; + + for (buffer = 0; buffer < nbuffers; buffer++) + { + starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, buffer); + enum starpu_data_access_mode mode = STARPU_TASK_GET_MODE(task, buffer); + int node = _starpu_task_data_get_node_on_worker(task, buffer, worker); + + if (node >= 0) + penalty += starpu_data_expected_transfer_time(handle, node, mode); + } + + return penalty; +} + +/* Return the expected duration of the entire task bundle in µs */ +double starpu_task_bundle_expected_length(starpu_task_bundle_t bundle, struct starpu_perfmodel_arch* arch, unsigned nimpl) +{ + double expected_length = 0.0; + + /* We expect the length of the bundle the be the sum of the different tasks length. */ + STARPU_PTHREAD_MUTEX_LOCK(&bundle->mutex); + + struct _starpu_task_bundle_entry *entry; + entry = bundle->list; + + while (entry) + { + if(!entry->task->scheduled) + { + double task_length = starpu_task_expected_length(entry->task, arch, nimpl); + + /* In case the task is not calibrated, we consider the task + * ends immediately. */ + if (task_length > 0.0) + expected_length += task_length; + } + + entry = entry->next; + } + + STARPU_PTHREAD_MUTEX_UNLOCK(&bundle->mutex); + + return expected_length; +} + +/* Return the expected energy consumption of the entire task bundle in J */ +double starpu_task_bundle_expected_energy(starpu_task_bundle_t bundle, struct starpu_perfmodel_arch* arch, unsigned nimpl) +{ + double expected_energy = 0.0; + + /* We expect total consumption of the bundle the be the sum of the different tasks consumption. */ + STARPU_PTHREAD_MUTEX_LOCK(&bundle->mutex); + + struct _starpu_task_bundle_entry *entry; + entry = bundle->list; + + while (entry) + { + double task_energy = starpu_task_expected_energy(entry->task, arch, nimpl); + + /* In case the task is not calibrated, we consider the task + * ends immediately. */ + if (task_energy > 0.0) + expected_energy += task_energy; + + entry = entry->next; + } + + STARPU_PTHREAD_MUTEX_UNLOCK(&bundle->mutex); + + return expected_energy; +} + +/* Return the time (in µs) expected to transfer all data used within the bundle */ +double starpu_task_bundle_expected_data_transfer_time(starpu_task_bundle_t bundle, unsigned memory_node) +{ + STARPU_PTHREAD_MUTEX_LOCK(&bundle->mutex); + + struct _starpu_handle_list *handles = NULL; + + /* We list all the handle that are accessed within the bundle. */ + + /* For each task in the bundle */ + struct _starpu_task_bundle_entry *entry = bundle->list; + while (entry) + { + struct starpu_task *task = entry->task; + + if (task->cl) + { + unsigned b; + unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task); + for (b = 0; b < nbuffers; b++) + { + starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, b); + enum starpu_data_access_mode mode = STARPU_TASK_GET_MODE(task, b); + + if (!(mode & STARPU_R)) + continue; + + /* Insert the handle in the sorted list in case + * it's not already in that list. */ + _starpu_insertion_handle_sorted(&handles, handle, mode); + } + } + + entry = entry->next; + } + + STARPU_PTHREAD_MUTEX_UNLOCK(&bundle->mutex); + + /* Compute the sum of data transfer time, and destroy the list */ + + double total_exp = 0.0; + + while (handles) + { + struct _starpu_handle_list *current = handles; + handles = handles->next; + + double exp; + exp = starpu_data_expected_transfer_time(current->handle, memory_node, current->mode); + + total_exp += exp; + + free(current); + } + + return total_exp; +} + +#define _PERF_MODEL_DIR_MAXLEN 256 +#define _PERF_MODEL_DIR_MAXNB 20 + +static char *_perf_model_paths[_PERF_MODEL_DIR_MAXNB]; +static int _perf_model_paths_nb=0; + +static int _perf_model_bus_location = -1; +static int _perf_model_bus_directory_existence_was_tested[_PERF_MODEL_DIR_MAXNB]; +static char *_perf_model_dir_bus = NULL; +static char *_perf_model_dirs_codelet[_PERF_MODEL_DIR_MAXNB]; + +static int _perf_model_codelet_directory_existence_was_tested[_PERF_MODEL_DIR_MAXNB]; + +static void _starpu_set_perf_model_dirs(); + +void _starpu_find_perf_model_codelet(const char *symbol, const char *hostname, char *path, size_t maxlen) +{ + const char *dot = strrchr(symbol, '.'); + int i=0; + + _starpu_set_perf_model_dirs(); + + for(i=0 ; _perf_model_paths[i]!=NULL ; i++) + { + snprintf(path, maxlen, "%scodelets/%d/%s%s%s", _perf_model_paths[i], _STARPU_PERFMODEL_VERSION, symbol, dot?"":".", dot?"":hostname); + //_STARPU_MSG("checking file %s\n", path); + int res = access(path, F_OK); + if (res == 0) + { + return; + } + } + + // The file was not found + path[0] = '\0'; +} + +void _starpu_find_perf_model_codelet_debug(const char *symbol, const char *hostname, const char *arch, char *path, size_t maxlen) +{ + const char *dot = strrchr(symbol, '.'); + int i=0; + + _starpu_set_perf_model_dirs(); + + for(i=0 ; _perf_model_paths[i]!=NULL ; i++) + { + snprintf(path, maxlen, "%scodelets/%d/%s%s%s", _perf_model_paths[i], _STARPU_PERFMODEL_VERSION, symbol, dot?"":".", dot?"":hostname); + //_STARPU_MSG("checking file %s\n", path); + int res = access(path, F_OK); + if (res == 0) + { + snprintf(path, maxlen, "%sdebug/%s%s%s%s", _perf_model_paths[i], symbol, dot?"":".", dot?"":hostname, arch); + return; + } + } + + // The file was not found + path[0] = '\0'; +} + +void _starpu_set_default_perf_model_codelet(const char *symbol, const char *hostname, char *path, size_t maxlen) +{ + _starpu_create_codelet_sampling_directory_if_needed(0); + const char *dot = strrchr(symbol, '.'); + snprintf(path, maxlen, "%scodelets/%d/%s%s%s", _perf_model_paths[0], _STARPU_PERFMODEL_VERSION, symbol, dot?"":".", dot?"":hostname); +} + +char *_starpu_get_perf_model_dir_default() +{ + _starpu_create_codelet_sampling_directory_if_needed(0); + return _perf_model_paths[0]; +} + +char *_starpu_get_perf_model_dir_bus() +{ + int loc = _starpu_create_bus_sampling_directory_if_needed(-1); + if (loc == -ENOENT) + return NULL; + if (_perf_model_dir_bus == NULL) + { + _STARPU_MALLOC(_perf_model_dir_bus, _PERF_MODEL_DIR_MAXLEN); + snprintf(_perf_model_dir_bus, _PERF_MODEL_DIR_MAXLEN, "%sbus/", _perf_model_paths[_perf_model_bus_location]); + } + return _perf_model_dir_bus; +} + +char **_starpu_get_perf_model_dirs_codelet() +{ + if (_perf_model_dirs_codelet[0] == NULL) + { + int i; + for(i=0 ; i<_perf_model_paths_nb ; i++) + { + _STARPU_MALLOC(_perf_model_dirs_codelet[i], _PERF_MODEL_DIR_MAXLEN); + snprintf(_perf_model_dirs_codelet[i], _PERF_MODEL_DIR_MAXLEN, "%scodelets/%d/", _perf_model_paths[i], _STARPU_PERFMODEL_VERSION); + _starpu_create_codelet_sampling_directory_if_needed(i); + } + } + return _perf_model_dirs_codelet; +} + +static void _perf_model_add_dir(char *dir, int only_is_valid, char *var) +{ + STARPU_ASSERT_MSG(_perf_model_paths_nb < _PERF_MODEL_DIR_MAXNB, "Maximum number of performance models directory"); + + if (dir == NULL || strlen(dir) == 0) + { + _STARPU_MSG("Warning: directory <%s> as set %s is empty\n", dir, var); + return; + } + int add=1; + if (only_is_valid) + { + DIR *ddir = opendir(dir); + if (ddir == NULL) + { + add = 0; + _STARPU_MSG("Warning: directory <%s> as set %s does not exist\n", dir, var); + } + else + closedir(ddir); + } + + if (add == 1) + { + _STARPU_DEBUG("Adding directory <%s> as set %s at location %d\n", dir, var, _perf_model_paths_nb); + _STARPU_MALLOC(_perf_model_paths[_perf_model_paths_nb], _PERF_MODEL_DIR_MAXLEN); + snprintf(_perf_model_paths[_perf_model_paths_nb], _PERF_MODEL_DIR_MAXLEN, "%s/", dir); + _perf_model_bus_directory_existence_was_tested[_perf_model_paths_nb] = 0; + _perf_model_codelet_directory_existence_was_tested[_perf_model_paths_nb] = 0; + _perf_model_paths_nb ++; + _perf_model_paths[_perf_model_paths_nb] = NULL; + } +} + +void _starpu_set_perf_model_dirs() +{ + if (_perf_model_paths_nb != 0) return; + + char *env = starpu_getenv("STARPU_PERF_MODEL_DIR"); + if (env) + { + _perf_model_add_dir(env, 0, "by variable STARPU_PERF_MODEL_DIR"); + } + +#ifdef STARPU_PERF_MODEL_DIR + _perf_model_add_dir((char *)STARPU_PERF_MODEL_DIR, 0, "by configure parameter"); +#else + char home[_PERF_MODEL_DIR_MAXLEN]; + snprintf(home, _PERF_MODEL_DIR_MAXLEN, "%s/.starpu/sampling", _starpu_get_home_path()); + _perf_model_add_dir(home, 0, "by STARPU_HOME directory"); +#endif + + env = starpu_getenv("STARPU_PERF_MODEL_PATH"); + if (env) + { + char *saveptr, *token; + token = strtok_r(env, ":", &saveptr); + for (; token != NULL; token = strtok_r(NULL, ",", &saveptr)) + { + _perf_model_add_dir(token, 1, "by variable STARPU_PERF_MODEL_PATH"); + } + } + + _perf_model_add_dir(STARPU_SAMPLING_DIR, 1, "by installation directory"); +} + +int _starpu_set_default_perf_model_bus() +{ + assert(_perf_model_bus_location < 0); + _perf_model_bus_location = 0; + return _perf_model_bus_location; +} + +int _starpu_get_perf_model_bus() +{ + if (_perf_model_bus_location != -1) + return _perf_model_bus_location; + + char hostname[65]; + int i=0; + + _starpu_set_perf_model_dirs(); + _starpu_gethostname(hostname, sizeof(hostname)); + + while(_perf_model_paths[i]) + { + char path[PATH_LENGTH]; + snprintf(path, PATH_LENGTH, "%sbus/%s.config", _perf_model_paths[i], hostname); + _STARPU_DEBUG("checking path %s\n", path); + int res = access(path, F_OK); + if (res == 0) + { + _perf_model_bus_location = i; + return _perf_model_bus_location; + } + i++; + } + return -ENOENT; +} + +int _starpu_create_bus_sampling_directory_if_needed(int location) +{ + if (location < 0) + location = _starpu_get_perf_model_bus(); + if (location == -ENOENT) + return -ENOENT; + + STARPU_ASSERT_MSG(location < _perf_model_paths_nb, "Location %d for performance models file is invalid", location); + if (!_perf_model_bus_directory_existence_was_tested[location]) + { + char *dir = _perf_model_paths[location]; + + _STARPU_DEBUG("creating directories at <%s>\n", dir); + + /* The performance of the codelets are stored in + * $STARPU_PERF_MODEL_DIR/codelets/ while those of the bus are stored in + * $STARPU_PERF_MODEL_DIR/bus/ so that we don't have name collisions */ + + _starpu_mkpath_and_check(dir, S_IRWXU); + + /* Performance of the memory subsystem */ + char bus[_PERF_MODEL_DIR_MAXLEN]; + snprintf(bus, _PERF_MODEL_DIR_MAXLEN, "%s/bus/", dir); + _starpu_mkpath_and_check(bus, S_IRWXU); + + _perf_model_bus_directory_existence_was_tested[location] = 1; + } + return 0; +} + +void _starpu_create_codelet_sampling_directory_if_needed(int location) +{ + STARPU_ASSERT_MSG(location < _perf_model_paths_nb, "Location %d for performance models file is invalid", location); + if (!_perf_model_codelet_directory_existence_was_tested[location]) + { + char *dir = _perf_model_paths[location]; + + if (dir) + { + _STARPU_DEBUG("creating directories at <%s>\n", dir); + + /* Per-task performance models */ + char codelet[_PERF_MODEL_DIR_MAXLEN]; + snprintf(codelet, _PERF_MODEL_DIR_MAXLEN, "%scodelets/%d/", dir, _STARPU_PERFMODEL_VERSION); + _starpu_mkpath_and_check(codelet, S_IRWXU); + + /* Performance debug measurements */ + char debug[_PERF_MODEL_DIR_MAXLEN]; + snprintf(debug, _PERF_MODEL_DIR_MAXLEN, "%sdebug/", dir); + _starpu_mkpath(debug, S_IRWXU); + + _perf_model_codelet_directory_existence_was_tested[location] = 1; + } + } +} + +void starpu_perfmodel_free_sampling(void) +{ + int i; + for(i=0 ; i<_perf_model_paths_nb ; i++) + { + free(_perf_model_paths[i]); + _perf_model_paths[i] = NULL; + _perf_model_bus_directory_existence_was_tested[i] = 0; + _perf_model_codelet_directory_existence_was_tested[i] = 0; + free(_perf_model_dirs_codelet[i]); + _perf_model_dirs_codelet[i] = NULL; + } + _perf_model_paths_nb = 0; + _perf_model_bus_location = -1; + free(_perf_model_dir_bus); + _perf_model_dir_bus = NULL; + _starpu_free_arch_combs(); +} + + +static double nop_cost_function(struct starpu_task *t STARPU_ATTRIBUTE_UNUSED, struct starpu_perfmodel_arch *a STARPU_ATTRIBUTE_UNUSED, unsigned i STARPU_ATTRIBUTE_UNUSED) +{ + return 0.000001; +} + +struct starpu_perfmodel starpu_perfmodel_nop = +{ + .type = STARPU_PER_ARCH, + .arch_cost_function = nop_cost_function, +}; + +/* This function is intended to be used by external tools that should read + * the performance model files */ +int starpu_perfmodel_list(FILE *output) +{ +#ifdef HAVE_SCANDIR + struct dirent **list; + int i=0; + + _starpu_set_perf_model_dirs(); + + for(i=0 ; _perf_model_paths[i]!=NULL ; i++) + { + char pcodelet[_PERF_MODEL_DIR_MAXLEN]; + int n; + + snprintf(pcodelet, _PERF_MODEL_DIR_MAXLEN, "%scodelets/%d/", _perf_model_paths[i], _STARPU_PERFMODEL_VERSION); + n = scandir(pcodelet, &list, NULL, alphasort); + if (n < 0) + { + _STARPU_DISP("Could not open the perfmodel directory <%s>: %s\n", pcodelet, strerror(errno)); + } + else + { + int j; + fprintf(output, "codelet directory: <%s>\n", pcodelet); + for (j = 0; j < n; j++) + { + if (strcmp(list[j]->d_name, ".") && strcmp(list[j]->d_name, "..")) + fprintf(output, "file: <%s>\n", list[j]->d_name); + free(list[j]); + } + free(list); + } + } + return 0; +#else + (void)output; + _STARPU_MSG("Listing perfmodels is not implemented on pure Windows yet\n"); + return 1; +#endif +} + +void starpu_perfmodel_directory(FILE *output) +{ + int i; + + _starpu_set_perf_model_dirs(); + + for(i=0 ; _perf_model_paths[i]!=NULL ; i++) + { + char pcodelet[_PERF_MODEL_DIR_MAXLEN]; + snprintf(pcodelet, _PERF_MODEL_DIR_MAXLEN, "%scodelets/%d/", _perf_model_paths[i], _STARPU_PERFMODEL_VERSION); + fprintf(output, "directory: <%s>\n", pcodelet); + } +} diff --git a/src/core/perfmodel/perfmodel.h b/src/core/perfmodel/perfmodel.h new file mode 100644 index 0000000..e1802fc --- /dev/null +++ b/src/core/perfmodel/perfmodel.h @@ -0,0 +1,136 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Thibaut Lambert + * Copyright (C) 2011-2011 Télécom Sud Paris + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __PERFMODEL_H__ +#define __PERFMODEL_H__ + +/** @file */ + +#include +#include +#include +#include + +#pragma GCC visibility push(hidden) + +#ifdef __cplusplus +extern "C" +{ +#endif + +void _starpu_init_perfmodel(void); + +/** + * Performance models files are stored in a directory whose name + * include the version of the performance model format. The version + * number is also written in the file itself. + * When updating the format, the variable _STARPU_PERFMODEL_VERSION + * should be updated. It is then possible to switch easily between + * different versions of StarPU having different performance model + * formats. + */ +#define _STARPU_PERFMODEL_VERSION 45 +#define PATH_LENGTH 256 +#define STR_SHORT_LENGTH 32 +#define STR_LONG_LENGTH 256 +#define STR_VERY_LONG_LENGTH 1024 + +struct _starpu_perfmodel_state +{ + struct starpu_perfmodel_per_arch** per_arch; /*STARPU_MAXIMPLEMENTATIONS*/ + int** per_arch_is_set; /*STARPU_MAXIMPLEMENTATIONS*/ + + starpu_pthread_rwlock_t model_rwlock; + int *nimpls; + int *nimpls_set; + /** The number of combinations currently used by the model */ + int ncombs; + /** The number of combinations allocated in the array nimpls and ncombs */ + int ncombs_set; + int *combs; +}; + +struct starpu_data_descr; +struct _starpu_job; +struct starpu_perfmodel_arch; + +extern unsigned _starpu_calibration_minimum; + +void _starpu_find_perf_model_codelet(const char *symbol, const char *hostname, char *path, size_t maxlen); +void _starpu_find_perf_model_codelet_debug(const char *symbol, const char *hostname, const char *arch, char *path, size_t maxlen); +void _starpu_set_default_perf_model_codelet(const char *symbol, const char *hostname, char *path, size_t maxlen); + +char *_starpu_get_perf_model_dir_default(); +char **_starpu_get_perf_model_dirs_codelet() STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; +char *_starpu_get_perf_model_dir_bus(); + +double _starpu_history_based_job_expected_perf(struct starpu_perfmodel *model, struct starpu_perfmodel_arch* arch, struct _starpu_job *j, unsigned nimpl); +double _starpu_history_based_job_expected_deviation(struct starpu_perfmodel *model, struct starpu_perfmodel_arch* arch, struct _starpu_job *j, unsigned nimpl); +void _starpu_load_history_based_model(struct starpu_perfmodel *model, unsigned scan_history); +void _starpu_init_and_load_perfmodel(struct starpu_perfmodel *model); +void _starpu_initialize_registered_performance_models(void); +void _starpu_deinitialize_registered_performance_models(void); +void _starpu_deinitialize_performance_model(struct starpu_perfmodel *model); + +double _starpu_regression_based_job_expected_perf(struct starpu_perfmodel *model, struct starpu_perfmodel_arch* arch, struct _starpu_job *j, unsigned nimpl); +double _starpu_non_linear_regression_based_job_expected_perf(struct starpu_perfmodel *model, struct starpu_perfmodel_arch* arch, struct _starpu_job *j, unsigned nimpl); +double _starpu_multiple_regression_based_job_expected_perf(struct starpu_perfmodel *model, struct starpu_perfmodel_arch* arch, struct _starpu_job *j, unsigned nimpl); +void _starpu_update_perfmodel_history(struct _starpu_job *j, struct starpu_perfmodel *model, struct starpu_perfmodel_arch * arch, unsigned cpuid, double measured, unsigned nimpl, unsigned number); +int _starpu_perfmodel_create_comb_if_needed(struct starpu_perfmodel_arch* arch); + +int _starpu_create_bus_sampling_directory_if_needed(int location); +void _starpu_create_codelet_sampling_directory_if_needed(int location); + +void _starpu_load_bus_performance_files(void); + +void _starpu_init_bus_performance(void); + +int _starpu_get_perf_model_bus(); +int _starpu_set_default_perf_model_bus(); + +void _starpu_set_calibrate_flag(unsigned val); +unsigned _starpu_get_calibrate_flag(void); + +#if defined(STARPU_USE_CUDA) +unsigned *_starpu_get_cuda_affinity_vector(unsigned gpuid); +#endif +#if defined(STARPU_USE_OPENCL) +unsigned *_starpu_get_opencl_affinity_vector(unsigned gpuid); +#endif + +void _starpu_save_bandwidth_and_latency_disk(double bandwidth_write, double bandwidth_read, double latency_write, double latency_read, unsigned node, const char *name); + +void _starpu_write_double(FILE *f, const char *format, double val) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; +int _starpu_read_double(FILE *f, char *format, double *val) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; +void _starpu_simgrid_get_platform_path(int version, char *path, size_t maxlen); + +void _starpu_perfmodel_realloc(struct starpu_perfmodel *model, int nb); + +void _starpu_free_arch_combs(void); + +#if defined(STARPU_HAVE_HWLOC) +hwloc_topology_t _starpu_perfmodel_get_hwtopology(); +#endif + +#ifdef __cplusplus +} +#endif + +#pragma GCC visibility pop + +#endif // __PERFMODEL_H__ diff --git a/src/core/perfmodel/perfmodel_bus.c b/src/core/perfmodel/perfmodel_bus.c new file mode 100644 index 0000000..86526e8 --- /dev/null +++ b/src/core/perfmodel/perfmodel_bus.c @@ -0,0 +1,3364 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Corentin Salingue + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifdef STARPU_USE_CUDA +#ifndef _GNU_SOURCE +#define _GNU_SOURCE 1 +#endif +#include +#endif +#include +#include + +#include +#include +#include +#include +#ifdef HAVE_UNISTD_H +#include +#endif +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef STARPU_USE_OPENCL +#include +#endif + +#ifdef STARPU_HAVE_WINDOWS +#include +#endif + +#ifdef STARPU_HAVE_HWLOC +#include +#ifdef STARPU_HAVE_NVML_H +#undef nvmlDeviceGetPciInfo +#define nvmlDeviceGetPciInfo _starpu_nvmlDeviceGetPciInfo +#undef nvmlDeviceGetUUID +#define nvmlDeviceGetUUID _starpu_nvmlDeviceGetUUID +#include +#endif +#ifndef HWLOC_API_VERSION +#define HWLOC_OBJ_PU HWLOC_OBJ_PROC +#endif +#if HWLOC_API_VERSION < 0x00010b00 +#define HWLOC_OBJ_NUMANODE HWLOC_OBJ_NODE +#endif +#endif + +#if HAVE_DECL_HWLOC_CUDA_GET_DEVICE_OSDEV_BY_INDEX +#include +#endif + +#ifdef STARPU_USE_MPI_MASTER_SLAVE +#include +#endif + +#define SIZE (32*1024*1024*sizeof(char)) +#define NITER 32 + +#ifndef STARPU_SIMGRID +static void _starpu_bus_force_sampling(int location); +#endif + +/* timing is in µs per byte (i.e. slowness, inverse of bandwidth) */ +struct dev_timing +{ + int numa_id; + int numa_distance; + double timing_htod; + double latency_htod; + double timing_dtoh; + double latency_dtoh; +}; + +static double raw_bandwidth_matrix[STARPU_MAXNODES][STARPU_MAXNODES]; /* MB/s, indexed by device ids */ +static double bandwidth_matrix[STARPU_MAXNODES][STARPU_MAXNODES]; /* MB/s, indexed by memory nodes */ +static double raw_latency_matrix[STARPU_MAXNODES][STARPU_MAXNODES]; /* µs, indexed by devices ids */ +static double latency_matrix[STARPU_MAXNODES][STARPU_MAXNODES]; /* µs, indexed by memory nodes */ +static unsigned was_benchmarked = 0; +#ifndef STARPU_SIMGRID +static unsigned ncpus = 0; +#endif +static unsigned nmem[STARPU_NRAM]; +#define nnumas (nmem[STARPU_CPU_RAM]) +#define ncuda (nmem[STARPU_CUDA_RAM]) +#define nopencl (nmem[STARPU_OPENCL_RAM]) +#define nmpims (nmem[STARPU_MPI_MS_RAM]) +#define ntcpip_ms (nmem[STARPU_TCPIP_MS_RAM]) + +#ifndef STARPU_SIMGRID +/* Benchmarking the performance of the bus */ + +static double numa_latency[STARPU_MAXNUMANODES][STARPU_MAXNUMANODES]; +static double numa_timing[STARPU_MAXNUMANODES][STARPU_MAXNUMANODES]; + +static uint64_t cuda_size[STARPU_MAXCUDADEVS]; +static char cuda_devname[STARPU_MAXCUDADEVS][256]; +#if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) +static int gpu_numa[STARPU_NRAM][STARPU_NMAXDEVS]; /* hwloc NUMA logical ID */ +#endif +#endif + +/* preference order of NUMA nodes (logical indexes) */ +static unsigned affinity_matrix[STARPU_NRAM][STARPU_NMAXDEVS][STARPU_MAXNUMANODES]; + +#ifndef STARPU_SIMGRID +static double timing_dtod[STARPU_NRAM][STARPU_NMAXDEVS][STARPU_NMAXDEVS]; +static double latency_dtod[STARPU_NRAM][STARPU_NMAXDEVS][STARPU_NMAXDEVS]; + +static struct dev_timing timing_per_numa[STARPU_NRAM][STARPU_NMAXDEVS][STARPU_MAXNUMANODES]; + +#ifdef STARPU_USE_CUDA +static char cudadev_direct[STARPU_MAXNODES][STARPU_MAXNODES]; +#endif + +static uint64_t opencl_size[STARPU_MAXOPENCLDEVS]; +static char opencl_devname[STARPU_MAXOPENCLDEVS][64]; +#endif + +#ifdef STARPU_HAVE_HWLOC +static hwloc_topology_t hwtopology; +#if HAVE_DECL_HWLOC_DISTANCES_OBJ_PAIR_VALUES +static struct hwloc_distances_s *numa_distances; +#endif + +hwloc_topology_t _starpu_perfmodel_get_hwtopology() +{ + return hwtopology; +} + +static int find_cpu_from_numa_node(unsigned numa_id) +{ + hwloc_obj_t obj = hwloc_get_obj_by_type(hwtopology, HWLOC_OBJ_NUMANODE, numa_id); + + if (obj) + { +#if HWLOC_API_VERSION >= 0x00020000 + /* From hwloc 2.0, NUMAnode objects do not contain CPUs, they + * are contained in a group which contain the CPUs. */ + obj = obj->parent; +#endif + } + else + { + /* No such NUMA node, probably hwloc 1.x with no NUMA + * node, just take one CPU from the whole system */ + obj = hwloc_get_root_obj(hwtopology); + } + + STARPU_ASSERT(obj); + hwloc_obj_t current = obj; + + while (current->type != HWLOC_OBJ_PU) + { + current = current->first_child; + + /* If we don't find a "PU" obj before the leave, perhaps we are + * just not allowed to use it. */ + if (!current) + return -1; + } + + STARPU_ASSERT(current->type == HWLOC_OBJ_PU); + + return current->logical_index; +} +#endif + +#if (defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL)) && !defined(STARPU_SIMGRID) + +static void set_numa_distance(int dev, unsigned numa, enum starpu_node_kind arch, struct dev_timing *dev_timing_per_cpu) +{ + /* A priori we don't know the distance */ + dev_timing_per_cpu->numa_distance = -1; + +#ifdef STARPU_HAVE_HWLOC + if (nnumas <= 1) + return; + + if (!starpu_driver_info[starpu_memory_node_get_worker_archtype(arch)].get_hwloc_obj) + return; + + hwloc_obj_t obj = starpu_driver_info[starpu_memory_node_get_worker_archtype(arch)].get_hwloc_obj(hwtopology, dev); + if (!obj) + return; + + hwloc_obj_t numa_obj = _starpu_numa_get_obj(obj); + if (!numa_obj) + return; + + if (numa_obj->logical_index == numa) + { + _STARPU_DEBUG("GPU is on NUMA %d, distance zero\n", numa); + dev_timing_per_cpu->numa_distance = 0; + return; + } + +#if HAVE_DECL_HWLOC_DISTANCES_OBJ_PAIR_VALUES + if (!numa_distances) + return; + + hwloc_obj_t drive_numa_obj = hwloc_get_obj_by_type(hwtopology, HWLOC_OBJ_NUMANODE, numa); + hwloc_uint64_t gpu2drive, drive2gpu; + if (!drive_numa_obj) + return; + + _STARPU_DEBUG("GPU is on NUMA %d vs %d\n", numa_obj->logical_index, numa); + if (hwloc_distances_obj_pair_values(numa_distances, numa_obj, drive_numa_obj, &gpu2drive, &drive2gpu) == 0) + { + _STARPU_DEBUG("got distance G2H %lu H2G %lu\n", (unsigned long) gpu2drive, (unsigned long) drive2gpu); + dev_timing_per_cpu->numa_distance = (gpu2drive + drive2gpu) / 2; + } +#endif +#endif +} + +/* TODO: factorize by using starpu_malloc, and the driver's malloc/free_on_node, copy_data_from/to. + * Will probably need to introduce a method for cudaDeviceReset, + * for MPI_Barrier, and for determining which combinations should be measured. */ + +#ifdef STARPU_USE_CUDA + +static void measure_bandwidth_between_host_and_dev_on_numa_with_cuda(int dev, unsigned numa, int cpu, struct dev_timing *dev_timing_per_cpu) +{ + _starpu_bind_thread_on_cpu(cpu, STARPU_NOWORKERID, NULL); + size_t size = SIZE; + + /* Initialize CUDA context on the device */ + /* We do not need to enable OpenGL interoperability at this point, + * since we cleanly shutdown CUDA before returning. */ + cudaSetDevice(dev); + + /* hack to avoid third party libs to rebind threads */ + _starpu_bind_thread_on_cpu(cpu, STARPU_NOWORKERID, NULL); + + /* hack to force the initialization */ + cudaFree(0); + + /* hack to avoid third party libs to rebind threads */ + _starpu_bind_thread_on_cpu(cpu, STARPU_NOWORKERID, NULL); + + /* Get the maximum size which can be allocated on the device */ + struct cudaDeviceProp prop; + cudaError_t cures; + cures = cudaGetDeviceProperties(&prop, dev); + if (STARPU_UNLIKELY(cures)) STARPU_CUDA_REPORT_ERROR(cures); + cuda_size[dev] = prop.totalGlobalMem; + strncpy(cuda_devname[dev], prop.name, sizeof(cuda_devname[dev])); + cuda_devname[dev][sizeof(cuda_devname[dev])-1] = 0; + if (size > prop.totalGlobalMem/4) size = prop.totalGlobalMem/4; + + /* Allocate a buffer on the device */ + unsigned char *d_buffer; + cures = cudaMalloc((void **)&d_buffer, size); + if (STARPU_UNLIKELY(cures)) STARPU_CUDA_REPORT_ERROR(cures); + + /* hack to avoid third party libs to rebind threads */ + _starpu_bind_thread_on_cpu(cpu, STARPU_NOWORKERID, NULL); + + /* Allocate a buffer on the host */ + unsigned char *h_buffer; + +#if defined(STARPU_HAVE_HWLOC) + if (nnumas > 1) + { + /* different NUMA nodes available */ + hwloc_obj_t obj = hwloc_get_obj_by_type(hwtopology, HWLOC_OBJ_NUMANODE, numa); + STARPU_ASSERT(obj); +#if HWLOC_API_VERSION >= 0x00020000 + h_buffer = hwloc_alloc_membind(hwtopology, size, obj->nodeset, HWLOC_MEMBIND_BIND, HWLOC_MEMBIND_BYNODESET); +#else + h_buffer = hwloc_alloc_membind_nodeset(hwtopology, size, obj->nodeset, HWLOC_MEMBIND_BIND, 0); +#endif + } + else +#endif + { + /* we use STARPU_MAIN_RAM */ + _STARPU_MALLOC(h_buffer, size); + } + + cudaHostRegister((void *)h_buffer, size, 0); + + if (STARPU_UNLIKELY(cures)) STARPU_CUDA_REPORT_ERROR(cures); + + /* hack to avoid third party libs to rebind threads */ + _starpu_bind_thread_on_cpu(cpu, STARPU_NOWORKERID, NULL); + + /* Fill them */ + memset(h_buffer, 0, size); + cudaMemset(d_buffer, 0, size); + cudaDeviceSynchronize(); + + /* hack to avoid third party libs to rebind threads */ + _starpu_bind_thread_on_cpu(cpu, STARPU_NOWORKERID, NULL); + + unsigned iter; + double timing; + double start; + double end; + + /* Measure upload bandwidth */ + start = starpu_timing_now(); + for (iter = 0; iter < NITER; iter++) + { + cudaMemcpy(d_buffer, h_buffer, size, cudaMemcpyHostToDevice); + cudaDeviceSynchronize(); + } + end = starpu_timing_now(); + timing = end - start; + + dev_timing_per_cpu->timing_htod = timing/NITER/size; + + /* Measure download bandwidth */ + start = starpu_timing_now(); + for (iter = 0; iter < NITER; iter++) + { + cudaMemcpy(h_buffer, d_buffer, size, cudaMemcpyDeviceToHost); + cudaDeviceSynchronize(); + } + end = starpu_timing_now(); + timing = end - start; + + dev_timing_per_cpu->timing_dtoh = timing/NITER/size; + + /* Measure upload latency */ + start = starpu_timing_now(); + for (iter = 0; iter < NITER; iter++) + { + cudaMemcpy(d_buffer, h_buffer, 1, cudaMemcpyHostToDevice); + cudaDeviceSynchronize(); + } + end = starpu_timing_now(); + timing = end - start; + + dev_timing_per_cpu->latency_htod = timing/NITER; + + /* Measure download latency */ + start = starpu_timing_now(); + for (iter = 0; iter < NITER; iter++) + { + cudaMemcpy(h_buffer, d_buffer, 1, cudaMemcpyDeviceToHost); + cudaDeviceSynchronize(); + } + end = starpu_timing_now(); + timing = end - start; + + dev_timing_per_cpu->latency_dtoh = timing/NITER; + + /* Free buffers */ + cudaHostUnregister(h_buffer); +#if defined(STARPU_HAVE_HWLOC) + if (nnumas > 1) + { + /* different NUMA nodes available */ + hwloc_free(hwtopology, h_buffer, size); + } + else +#endif + { + free(h_buffer); + } + + cudaFree(d_buffer); + +#if CUDART_VERSION >= 4000 + cudaDeviceReset(); +#else + cudaThreadExit(); +#endif +} + +#ifdef STARPU_HAVE_CUDA_MEMCPY_PEER +static void measure_bandwidth_between_dev_and_dev_cuda(int src, int dst, double *timingr, double *latencyr) +{ + size_t size = SIZE; + int can; + + /* Get the maximum size which can be allocated on the device */ + struct cudaDeviceProp prop; + cudaError_t cures; + + cures = cudaGetDeviceProperties(&prop, src); + if (STARPU_UNLIKELY(cures)) STARPU_CUDA_REPORT_ERROR(cures); + if (size > prop.totalGlobalMem/4) size = prop.totalGlobalMem/4; + cures = cudaGetDeviceProperties(&prop, dst); + if (STARPU_UNLIKELY(cures)) STARPU_CUDA_REPORT_ERROR(cures); + if (size > prop.totalGlobalMem/4) size = prop.totalGlobalMem/4; + + /* Initialize CUDA context on the source */ + /* We do not need to enable OpenGL interoperability at this point, + * since we cleanly shutdown CUDA before returning. */ + cudaSetDevice(src); + + if (starpu_getenv_number("STARPU_ENABLE_CUDA_GPU_GPU_DIRECT") != 0) + { + cures = cudaDeviceCanAccessPeer(&can, src, dst); + (void) cudaGetLastError(); + if (!cures && can) + { + cures = cudaDeviceEnablePeerAccess(dst, 0); + (void) cudaGetLastError(); + if (!cures) + { + _STARPU_DISP("GPU-Direct %d -> %d\n", dst, src); + cudadev_direct[src][dst] = 1; + } + } + } + + /* Allocate a buffer on the device */ + unsigned char *s_buffer; + cures = cudaMalloc((void **)&s_buffer, size); + if (STARPU_UNLIKELY(cures)) STARPU_CUDA_REPORT_ERROR(cures); + cudaMemset(s_buffer, 0, size); + cudaDeviceSynchronize(); + + /* Initialize CUDA context on the destination */ + /* We do not need to enable OpenGL interoperability at this point, + * since we cleanly shutdown CUDA before returning. */ + cudaSetDevice(dst); + + if (starpu_getenv_number("STARPU_ENABLE_CUDA_GPU_GPU_DIRECT") != 0) + { + cures = cudaDeviceCanAccessPeer(&can, dst, src); + (void) cudaGetLastError(); + if (!cures && can) + { + cures = cudaDeviceEnablePeerAccess(src, 0); + (void) cudaGetLastError(); + if (!cures) + { + _STARPU_DISP("GPU-Direct %d -> %d\n", src, dst); + cudadev_direct[dst][src] = 1; + } + } + } + + /* Allocate a buffer on the device */ + unsigned char *d_buffer; + cures = cudaMalloc((void **)&d_buffer, size); + if (STARPU_UNLIKELY(cures)) STARPU_CUDA_REPORT_ERROR(cures); + cudaMemset(d_buffer, 0, size); + cudaDeviceSynchronize(); + + unsigned iter; + double timing; + double start; + double end; + + /* Measure upload bandwidth */ + start = starpu_timing_now(); + for (iter = 0; iter < NITER; iter++) + { + cudaMemcpyPeer(d_buffer, dst, s_buffer, src, size); + cudaDeviceSynchronize(); + } + end = starpu_timing_now(); + timing = end - start; + + *timingr = timing/NITER/size; + + /* Measure upload latency */ + start = starpu_timing_now(); + for (iter = 0; iter < NITER; iter++) + { + cudaMemcpyPeer(d_buffer, dst, s_buffer, src, 1); + cudaDeviceSynchronize(); + } + end = starpu_timing_now(); + timing = end - start; + + *latencyr = timing/NITER; + + /* Free buffers */ + cudaFree(d_buffer); + cudaSetDevice(src); + cudaFree(s_buffer); + +#if CUDART_VERSION >= 4000 + cudaDeviceReset(); +#else + cudaThreadExit(); +#endif +} +#endif +#endif + +#ifdef STARPU_USE_OPENCL +static void measure_bandwidth_between_host_and_dev_on_numa_with_opencl(int dev, unsigned numa, int cpu, struct dev_timing *dev_timing_per_cpu) +{ + cl_context context; + cl_command_queue queue; + cl_int err=0; + size_t size = SIZE; + int not_initialized; + + _starpu_bind_thread_on_cpu(cpu, STARPU_NOWORKERID, NULL); + + /* Is the context already initialised ? */ + starpu_opencl_get_context(dev, &context); + not_initialized = (context == NULL); + if (not_initialized == 1) + _starpu_opencl_init_context(dev); + + /* Get context and queue */ + starpu_opencl_get_context(dev, &context); + starpu_opencl_get_queue(dev, &queue); + + /* Get the maximum size which can be allocated on the device */ + cl_device_id device; + cl_ulong maxMemAllocSize, totalGlobalMem; + starpu_opencl_get_device(dev, &device); + err = clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(maxMemAllocSize), &maxMemAllocSize, NULL); + if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err); + if (size > (size_t)maxMemAllocSize/4) size = maxMemAllocSize/4; + + err = clGetDeviceInfo(device, CL_DEVICE_GLOBAL_MEM_SIZE , sizeof(totalGlobalMem), &totalGlobalMem, NULL); + if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err); + opencl_size[dev] = totalGlobalMem; + + err = clGetDeviceInfo(device, CL_DEVICE_NAME , sizeof(opencl_devname[dev]), &opencl_devname[dev], NULL); + if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err); + + if (_starpu_opencl_get_device_type(dev) == CL_DEVICE_TYPE_CPU) + { + /* Let's not use too much RAM when running OpenCL on a CPU: it + * would make the OS swap like crazy. */ + size /= 2; + } + + /* hack to avoid third party libs to rebind threads */ + _starpu_bind_thread_on_cpu(cpu, STARPU_NOWORKERID, NULL); + + /* Allocate a buffer on the device */ + cl_mem d_buffer; + d_buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, size, NULL, &err); + if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err); + + /* hack to avoid third party libs to rebind threads */ + _starpu_bind_thread_on_cpu(cpu, STARPU_NOWORKERID, NULL); + /* Allocate a buffer on the host */ + unsigned char *h_buffer; +#if defined(STARPU_HAVE_HWLOC) + if (nnumas > 1) + { + /* different NUMA nodes available */ + hwloc_obj_t obj = hwloc_get_obj_by_type(hwtopology, HWLOC_OBJ_NUMANODE, numa); + STARPU_ASSERT(obj); +#if HWLOC_API_VERSION >= 0x00020000 + h_buffer = hwloc_alloc_membind(hwtopology, size, obj->nodeset, HWLOC_MEMBIND_BIND, HWLOC_MEMBIND_BYNODESET); +#else + h_buffer = hwloc_alloc_membind_nodeset(hwtopology, size, obj->nodeset, HWLOC_MEMBIND_BIND, 0); +#endif + } + else +#endif + { + /* we use STARPU_MAIN_RAM */ + _STARPU_MALLOC(h_buffer, size); + } + + /* hack to avoid third party libs to rebind threads */ + _starpu_bind_thread_on_cpu(cpu, STARPU_NOWORKERID, NULL); + /* Fill them */ + memset(h_buffer, 0, size); + err = clEnqueueWriteBuffer(queue, d_buffer, CL_TRUE, 0, size, h_buffer, 0, NULL, NULL); + if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err); + clFinish(queue); + /* hack to avoid third party libs to rebind threads */ + _starpu_bind_thread_on_cpu(cpu, STARPU_NOWORKERID, NULL); + + unsigned iter; + double timing; + double start; + double end; + + /* Measure upload bandwidth */ + start = starpu_timing_now(); + for (iter = 0; iter < NITER; iter++) + { + err = clEnqueueWriteBuffer(queue, d_buffer, CL_TRUE, 0, size, h_buffer, 0, NULL, NULL); + if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err); + clFinish(queue); + } + end = starpu_timing_now(); + timing = end - start; + + dev_timing_per_cpu->timing_htod = timing/NITER/size; + + /* Measure download bandwidth */ + start = starpu_timing_now(); + for (iter = 0; iter < NITER; iter++) + { + err = clEnqueueReadBuffer(queue, d_buffer, CL_TRUE, 0, size, h_buffer, 0, NULL, NULL); + if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err); + clFinish(queue); + } + end = starpu_timing_now(); + timing = end - start; + + dev_timing_per_cpu->timing_dtoh = timing/NITER/size; + + /* Measure upload latency */ + start = starpu_timing_now(); + for (iter = 0; iter < NITER; iter++) + { + err = clEnqueueWriteBuffer(queue, d_buffer, CL_TRUE, 0, 1, h_buffer, 0, NULL, NULL); + if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err); + clFinish(queue); + } + end = starpu_timing_now(); + timing = end - start; + + dev_timing_per_cpu->latency_htod = timing/NITER; + + /* Measure download latency */ + start = starpu_timing_now(); + for (iter = 0; iter < NITER; iter++) + { + err = clEnqueueReadBuffer(queue, d_buffer, CL_TRUE, 0, 1, h_buffer, 0, NULL, NULL); + if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err); + clFinish(queue); + } + end = starpu_timing_now(); + timing = end - start; + + dev_timing_per_cpu->latency_dtoh = timing/NITER; + + /* Free buffers */ + err = clReleaseMemObject(d_buffer); + if (STARPU_UNLIKELY(err != CL_SUCCESS)) + STARPU_OPENCL_REPORT_ERROR(err); +#if defined(STARPU_HAVE_HWLOC) + if (nnumas > 1) + { + /* different NUMA nodes available */ + hwloc_free(hwtopology, h_buffer, size); + } + else +#endif + { + free(h_buffer); + } + + /* Uninitiliaze OpenCL context on the device */ + if (not_initialized == 1) + _starpu_opencl_deinit_context(dev); +} +#endif + +static void measure_bandwidth_between_host_and_dev(int dev, struct dev_timing dev_timing_per_numa[STARPU_NMAXDEVS][STARPU_MAXNUMANODES], enum starpu_node_kind type) +{ + /* We measure the bandwidth between each GPU and each NUMA node */ + unsigned numa_id; + for (numa_id = 0; numa_id < nnumas; numa_id++) + { + /* Store STARPU_memnode for later */ + dev_timing_per_numa[dev][numa_id].numa_id = numa_id; + + /* Chose one CPU connected to this NUMA node */ + int cpu_id = 0; +#ifdef STARPU_HAVE_HWLOC + cpu_id = find_cpu_from_numa_node(numa_id); +#endif + if (cpu_id < 0) + continue; + + _STARPU_DISP("with NUMA %d...\n", numa_id); + + /* Check hwloc location of GPU */ + set_numa_distance(dev, numa_id, type, &dev_timing_per_numa[dev][numa_id]); + +#ifdef STARPU_USE_CUDA + if (type == STARPU_CUDA_RAM) + measure_bandwidth_between_host_and_dev_on_numa_with_cuda(dev, numa_id, cpu_id, &dev_timing_per_numa[dev][numa_id]); +#endif +#ifdef STARPU_USE_OPENCL + if (type == STARPU_OPENCL_RAM) + measure_bandwidth_between_host_and_dev_on_numa_with_opencl(dev, numa_id, cpu_id, &dev_timing_per_numa[dev][numa_id]); +#endif + } + /* TODO: also measure the available aggregated bandwidth on a NUMA node, and through the interconnect */ + +#if defined(STARPU_HAVE_HWLOC) + hwloc_obj_t obj = NULL; + + if (starpu_driver_info[starpu_memory_node_get_worker_archtype(type)].get_hwloc_obj) + obj = starpu_driver_info[starpu_memory_node_get_worker_archtype(type)].get_hwloc_obj(hwtopology, dev); + if (obj) + obj = _starpu_numa_get_obj(obj); + if (obj) + gpu_numa[type][dev] = obj->logical_index; + else +#endif + gpu_numa[type][dev] = -1; + +#ifdef STARPU_VERBOSE + for (numa_id = 0; numa_id < nnumas; numa_id++) + { + double bandwidth_dtoh = dev_timing_per_numa[dev][numa_id].timing_dtoh; + double bandwidth_htod = dev_timing_per_numa[dev][numa_id].timing_htod; + + double bandwidth_sum2 = bandwidth_dtoh*bandwidth_dtoh + bandwidth_htod*bandwidth_htod; + + _STARPU_DISP("(%10s) BANDWIDTH GPU %d NUMA %u - htod %.0fMB/s - dtoh %.0fMB/s - %.0fMB/s\n", starpu_memory_driver_info[type].name_upper, dev, numa_id, 1/bandwidth_htod, 1/bandwidth_dtoh, 1/sqrt(bandwidth_sum2)); + } +#endif +} +#endif /* defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) */ + +#if !defined(STARPU_SIMGRID) +static void measure_bandwidth_latency_between_numa(int numa_src, int numa_dst, double *timing_nton, double *latency_nton) +{ +#if defined(STARPU_HAVE_HWLOC) + if (nnumas > 1) + { + /* different NUMA nodes available */ + double start, end, timing; + unsigned iter; + + /* Chose one CPU connected to this NUMA node */ + int cpu_id = 0; + cpu_id = find_cpu_from_numa_node(numa_src); + if (cpu_id < 0) + /* We didn't find a CPU attached to the numa_src NUMA nodes */ + goto no_calibration; + + _starpu_bind_thread_on_cpu(cpu_id, STARPU_NOWORKERID, NULL); + + unsigned char *h_buffer; + hwloc_obj_t obj_src = hwloc_get_obj_by_type(hwtopology, HWLOC_OBJ_NUMANODE, numa_src); + STARPU_ASSERT(obj_src); +#if HWLOC_API_VERSION >= 0x00020000 + h_buffer = hwloc_alloc_membind(hwtopology, SIZE, obj_src->nodeset, HWLOC_MEMBIND_BIND, HWLOC_MEMBIND_BYNODESET); +#else + h_buffer = hwloc_alloc_membind_nodeset(hwtopology, SIZE, obj_src->nodeset, HWLOC_MEMBIND_BIND, 0); +#endif + + unsigned char *d_buffer; + hwloc_obj_t obj_dst = hwloc_get_obj_by_type(hwtopology, HWLOC_OBJ_NUMANODE, numa_dst); + STARPU_ASSERT(obj_dst); +#if HWLOC_API_VERSION >= 0x00020000 + d_buffer = hwloc_alloc_membind(hwtopology, SIZE, obj_dst->nodeset, HWLOC_MEMBIND_BIND, HWLOC_MEMBIND_BYNODESET); +#else + d_buffer = hwloc_alloc_membind_nodeset(hwtopology, SIZE, obj_dst->nodeset, HWLOC_MEMBIND_BIND, 0); +#endif + + memset(h_buffer, 0, SIZE); + + start = starpu_timing_now(); + for (iter = 0; iter < NITER; iter++) + { + memcpy(d_buffer, h_buffer, SIZE); + } + end = starpu_timing_now(); + timing = end - start; + + *timing_nton = timing/NITER/SIZE; + + start = starpu_timing_now(); + for (iter = 0; iter < NITER; iter++) + { + memcpy(d_buffer, h_buffer, 1); + } + end = starpu_timing_now(); + timing = end - start; + + *latency_nton = timing/NITER; + + hwloc_free(hwtopology, h_buffer, SIZE); + hwloc_free(hwtopology, d_buffer, SIZE); + } + else +no_calibration: +#endif + { + /* Cannot make a real calibration */ + numa_timing[numa_src][numa_dst] = 0.01; + numa_latency[numa_src][numa_dst] = 0; + } +} +#endif + +static void benchmark_all_memory_nodes(void) +{ +#ifdef STARPU_SIMGRID + _STARPU_DISP("Can not measure bus in simgrid mode, please run starpu_calibrate_bus in non-simgrid mode to make sure the bus performance model was calibrated\n"); + STARPU_ABORT(); +#else /* !SIMGRID */ + unsigned i, j; + + _STARPU_DEBUG("Benchmarking the speed of the bus\n"); + +#ifdef STARPU_DEVEL +#warning FIXME: when running several StarPU processes on the same node (MPI rank per numa), we need to use a lock to avoid concurrent benchmarking. +#endif + +#ifdef STARPU_HAVE_HWLOC + int ret; + ret = hwloc_topology_init(&hwtopology); + STARPU_ASSERT_MSG(ret == 0, "Could not initialize Hwloc topology (%s)\n", strerror(errno)); + _starpu_topology_filter(hwtopology); + ret = hwloc_topology_load(hwtopology); + STARPU_ASSERT_MSG(ret == 0, "Could not load Hwloc topology (%s)\n", strerror(errno)); +#if HAVE_DECL_HWLOC_DISTANCES_OBJ_PAIR_VALUES + unsigned n = 1; + hwloc_distances_get_by_name(hwtopology, "NUMALatency", &n, &numa_distances, 0); + if (!n) + numa_distances = NULL; +#endif +#endif + +#ifdef STARPU_HAVE_HWLOC + hwloc_bitmap_t former_cpuset = hwloc_bitmap_alloc(); + hwloc_get_cpubind(hwtopology, former_cpuset, HWLOC_CPUBIND_THREAD); +#elif defined(__linux__) + /* Save the current cpu binding */ + cpu_set_t former_process_affinity; + int ret; + ret = sched_getaffinity(0, sizeof(former_process_affinity), &former_process_affinity); + if (ret) + { + perror("sched_getaffinity"); + STARPU_ABORT(); + } +#else +#warning Missing binding support, StarPU will not be able to properly benchmark NUMA topology +#endif + + for (i = 0; i < nnumas; i++) + for (j = 0; j < nnumas; j++) + if (i != j) + { + _STARPU_DISP("NUMA %d -> %d...\n", i, j); + measure_bandwidth_latency_between_numa(i, j, &numa_timing[i][j], &numa_latency[i][j]); + } + +#ifdef STARPU_USE_CUDA + ncuda = _starpu_get_cuda_device_count(); + for (i = 0; i < ncuda; i++) + { + _STARPU_DISP("CUDA %u...\n", i); + /* measure bandwidth between Host and Device i */ + measure_bandwidth_between_host_and_dev(i, timing_per_numa[STARPU_CUDA_RAM], STARPU_CUDA_RAM); + } +#ifdef STARPU_HAVE_CUDA_MEMCPY_PEER + for (i = 0; i < ncuda; i++) + { + for (j = 0; j < ncuda; j++) + if (i != j) + { + _STARPU_DISP("CUDA %u -> %u...\n", i, j); + /* measure bandwidth between Host and Device i */ + measure_bandwidth_between_dev_and_dev_cuda(i, j, &timing_dtod[STARPU_CUDA_RAM][i][j], &latency_dtod[STARPU_CUDA_RAM][i][j]); + } + } +#endif +#endif +#ifdef STARPU_USE_OPENCL + nopencl = _starpu_opencl_get_device_count(); + for (i = 0; i < nopencl; i++) + { + _STARPU_DISP("OpenCL %u...\n", i); + /* measure bandwidth between Host and Device i */ + measure_bandwidth_between_host_and_dev(i, timing_per_numa[STARPU_OPENCL_RAM], STARPU_OPENCL_RAM); + } +#endif + + +#ifdef STARPU_USE_MPI_MASTER_SLAVE + double mpi_time_device_to_device[STARPU_MAXMPIDEVS][STARPU_MAXMPIDEVS] = {{0.0}}; + double mpi_latency_device_to_device[STARPU_MAXMPIDEVS][STARPU_MAXMPIDEVS] = {{0.0}}; + /* FIXME: rather make _starpu_mpi_common_measure_bandwidth_latency directly fill timing_per_numa */ + _starpu_mpi_common_measure_bandwidth_latency(mpi_time_device_to_device, mpi_latency_device_to_device); + for (i = 0; i < nmpims; i++) + { + for (j = 0; j < nnumas; j++) + { + timing_per_numa[STARPU_MPI_MS_RAM][i][j].numa_id = j; + timing_per_numa[STARPU_MPI_MS_RAM][i][j].numa_distance = -1; + timing_per_numa[STARPU_MPI_MS_RAM][i][j].timing_htod = mpi_time_device_to_device[0][i+1]; + timing_per_numa[STARPU_MPI_MS_RAM][i][j].latency_htod = mpi_latency_device_to_device[0][i+1]; + timing_per_numa[STARPU_MPI_MS_RAM][i][j].timing_dtoh = mpi_time_device_to_device[i+1][0]; + timing_per_numa[STARPU_MPI_MS_RAM][i][j].latency_dtoh = mpi_latency_device_to_device[i+1][0]; + } + for (j = 0; j < nmpims; j++) + { + timing_dtod[STARPU_MPI_MS_RAM][i][j] = mpi_time_device_to_device[i+1][j+1]; + } + } +#endif /* STARPU_USE_MPI_MASTER_SLAVE */ + +#ifdef STARPU_USE_TCPIP_MASTER_SLAVE + double tcpip_time_device_to_device[STARPU_MAXTCPIPDEVS][STARPU_MAXTCPIPDEVS] = {{0.0}}; + double tcpip_latency_device_to_device[STARPU_MAXTCPIPDEVS][STARPU_MAXTCPIPDEVS] = {{0.0}}; + /* FIXME: rather make _starpu_mpi_common_measure_bandwidth_latency directly fill timing_per_numa */ + _starpu_tcpip_common_measure_bandwidth_latency(tcpip_time_device_to_device, tcpip_latency_device_to_device); + for (i = 0; i < ntcpip_ms; i++) + { + for (j = 0; j < nnumas; j++) + { + timing_per_numa[STARPU_TCPIP_MS_RAM][i][j].numa_id = j; + timing_per_numa[STARPU_TCPIP_MS_RAM][i][j].numa_distance = -1; + timing_per_numa[STARPU_TCPIP_MS_RAM][i][j].timing_htod = tcpip_time_device_to_device[0][i+1]; + timing_per_numa[STARPU_TCPIP_MS_RAM][i][j].latency_htod = tcpip_latency_device_to_device[0][i+1]; + timing_per_numa[STARPU_TCPIP_MS_RAM][i][j].timing_dtoh = tcpip_time_device_to_device[i+1][0]; + timing_per_numa[STARPU_TCPIP_MS_RAM][i][j].latency_dtoh = tcpip_latency_device_to_device[i+1][0]; + } + for (j = 0; j < ntcpip_ms; j++) + { + timing_dtod[STARPU_TCPIP_MS_RAM][i][j] = tcpip_time_device_to_device[i+1][j+1]; + } + } +#endif /* STARPU_USE_TCPIP_MASTER_SLAVE */ + +#ifdef STARPU_HAVE_HWLOC + hwloc_set_cpubind(hwtopology, former_cpuset, HWLOC_CPUBIND_THREAD); + hwloc_bitmap_free(former_cpuset); +#elif defined(__linux__) + /* Restore the former affinity */ + ret = sched_setaffinity(0, sizeof(former_process_affinity), &former_process_affinity); + if (ret) + { + perror("sched_setaffinity"); + STARPU_ABORT(); + } +#endif + +#ifdef STARPU_HAVE_HWLOC +#if HAVE_DECL_HWLOC_DISTANCES_OBJ_PAIR_VALUES + if (numa_distances) + hwloc_distances_release(hwtopology, numa_distances); + numa_distances = NULL; +#endif + hwloc_topology_destroy(hwtopology); +#endif + + _STARPU_DEBUG("Benchmarking the speed of the bus is done.\n"); + + was_benchmarked = 1; +#endif /* !SIMGRID */ +} + +static void get_bus_path(const char *type, char *path, size_t maxlen) +{ + char hostname[65]; + char *bus; + + bus = _starpu_get_perf_model_dir_bus(); + _starpu_gethostname(hostname, sizeof(hostname)); + snprintf(path, maxlen, "%s%s.%s", bus?_starpu_get_perf_model_dir_bus():"INVALID_LOCATION/", hostname, type); +} + +/* + * Affinity + */ + +static void get_affinity_path(char *path, size_t maxlen) +{ + get_bus_path("affinity", path, maxlen); +} + +#ifndef STARPU_SIMGRID + +static void load_bus_affinity_file_content(void) +{ + FILE *f; + int locked; + + char path[PATH_LENGTH]; + get_affinity_path(path, sizeof(path)); + + _STARPU_DEBUG("loading affinities from %s\n", path); + + f = fopen(path, "r"); + STARPU_ASSERT_MSG(f, "Error when reading from file '%s'", path); + + locked = _starpu_frdlock(f) == 0; + + unsigned gpu; + enum starpu_node_kind type; + unsigned ok = 1; + + for (type = STARPU_CUDA_RAM; ok && type < STARPU_NRAM; type++) + { + for (gpu = 0; ok && gpu < nmem[type]; gpu++) + { + int ret; + unsigned dummy; + + _starpu_drop_comments(f); + ret = fscanf(f, "%u\t", &dummy); + if (ret != 1) + { + /* Old perfmodel file, ignore rest */ + ok = 0; + break; + } + + STARPU_ASSERT(dummy == gpu); + + unsigned numa; + for (numa = 0; numa < nnumas; numa++) + { + ret = fscanf(f, "%u\t", &affinity_matrix[type][gpu][numa]); + STARPU_ASSERT_MSG(ret == 1, "Error when reading from file '%s'", path); + } + + ret = fscanf(f, "\n"); + STARPU_ASSERT_MSG(ret == 0, "Error when reading from file '%s'", path); + } + } + if (locked) + _starpu_frdunlock(f); + + fclose(f); +} + +/* NB: we want to sort the bandwidth by DECREASING order */ +static int compar_dev_timing(const void *left_dev_timing, const void *right_dev_timing) +{ + const struct dev_timing *left = (const struct dev_timing *)left_dev_timing; + const struct dev_timing *right = (const struct dev_timing *)right_dev_timing; + + if (left->numa_distance == 0 && right->numa_distance != 0) + /* We prefer left */ + return -1; + + if (right->numa_distance == 0 && left->numa_distance != 0) + /* We prefer right */ + return 1; + + if (left->numa_distance >= 0 && right->numa_distance >= 0) + { + return left->numa_distance > right->numa_distance ? 1 : + left->numa_distance < right->numa_distance ? -1 : 0; + } + + double left_dtoh = left->timing_dtoh; + double left_htod = left->timing_htod; + double right_dtoh = right->timing_dtoh; + double right_htod = right->timing_htod; + + double timing_sum2_left = left_dtoh*left_dtoh + left_htod*left_htod; + double timing_sum2_right = right_dtoh*right_dtoh + right_htod*right_htod; + + /* it's for a decreasing sorting */ + return timing_sum2_left > timing_sum2_right ? 1 : + timing_sum2_left < timing_sum2_right ? -1 : 0; +} + +static void write_bus_affinity_file_content(void) +{ + STARPU_ASSERT(was_benchmarked); + + FILE *f; + char path[PATH_LENGTH]; + int locked; + + get_affinity_path(path, sizeof(path)); + + _STARPU_DEBUG("writing affinities to %s\n", path); + + f = fopen(path, "a+"); + if (!f) + { + perror("fopen write_buf_affinity_file_content"); + _STARPU_DISP("path '%s'\n", path); + fflush(stderr); + STARPU_ABORT(); + } + + locked = _starpu_fwrlock(f) == 0; + fseek(f, 0, SEEK_SET); + _starpu_fftruncate(f, 0); + + unsigned numa; + unsigned gpu; + enum starpu_node_kind type; + + fprintf(f, "# GPU\t"); + for (numa = 0; numa < nnumas; numa++) + fprintf(f, "NUMA%u\t", numa); + fprintf(f, "\n"); + + for (type = STARPU_CUDA_RAM; type < STARPU_NRAM; type++) + { + /* Use an other array to sort bandwidth */ + struct dev_timing timing_per_numa_sorted[STARPU_NMAXDEVS][STARPU_MAXNUMANODES]; + memcpy(timing_per_numa_sorted, timing_per_numa[type], sizeof(timing_per_numa[type])); + + for (gpu = 0; gpu < nmem[type]; gpu++) + { + fprintf(f, "%u\t", gpu); + + qsort(timing_per_numa_sorted[gpu], nnumas, sizeof(struct dev_timing), compar_dev_timing); + + for (numa = 0; numa < nnumas; numa++) + { + fprintf(f, "%d\t", timing_per_numa_sorted[gpu][numa].numa_id); + } + + fprintf(f, "\n"); + } + } + + if (locked) + _starpu_fwrunlock(f); + fclose(f); +} + +static void generate_bus_affinity_file(void) +{ + if (!was_benchmarked) + benchmark_all_memory_nodes(); + + write_bus_affinity_file_content(); +} + +static int check_bus_affinity_file(void) +{ + int ret = 1; + FILE *f; + int locked; + unsigned dummy; + + char path[PATH_LENGTH]; + get_affinity_path(path, sizeof(path)); + + _STARPU_DEBUG("loading affinities from %s\n", path); + + f = fopen(path, "r"); + STARPU_ASSERT_MSG(f, "Error when reading from file '%s'", path); + + locked = _starpu_frdlock(f) == 0; + + ret = fscanf(f, "# GPU\t"); + STARPU_ASSERT_MSG(ret == 0, "Error when reading from file '%s'", path); + + ret = fscanf(f, "NUMA%u\t", &dummy); + + if (locked) + _starpu_frdunlock(f); + + fclose(f); + return ret == 1; +} + +static void load_bus_affinity_file(void) +{ + int exist, check = 1; + + char path[PATH_LENGTH]; + get_affinity_path(path, sizeof(path)); + + /* access return 0 if file exists */ + exist = access(path, F_OK); + + if (exist == 0) + /* return 0 if it's not good */ + check = check_bus_affinity_file(); + + if (check == 0) + _STARPU_DISP("Affinity File is too old for this version of StarPU ! Rebuilding it...\n"); + + if (check == 0 || exist != 0) + { + /* File does not exist yet */ + generate_bus_affinity_file(); + } + + load_bus_affinity_file_content(); +} + +unsigned *_starpu_get_cuda_affinity_vector(unsigned gpuid) +{ + return affinity_matrix[STARPU_CUDA_RAM][gpuid]; +} + +unsigned *_starpu_get_opencl_affinity_vector(unsigned gpuid) +{ + return affinity_matrix[STARPU_OPENCL_RAM][gpuid]; +} + +void starpu_bus_print_affinity(FILE *f) +{ + enum starpu_node_kind type; + + fprintf(f, "# GPU\tNUMA in preference order (logical index)\n"); + + for (type = STARPU_CUDA_RAM; type < STARPU_NRAM; type++) + { + unsigned gpu; + + if (!nmem[type]) + continue; + + fprintf(f, "# %s\n", starpu_memory_driver_info[type].name_upper); + for(gpu = 0 ; gpu. Expected a number. Did you change the maximum number of GPUs at ./configure time?\n", path); + fclose(f); + return 0; + } + n = getc(f); + if (n == '\n') + break; + if (n != '\t') + { + _STARPU_DISP("bogus character '%c' (%d) in latency file %s\n", n, n, path); + fclose(f); + return 0; + } + + raw_latency_matrix[src][dst] = latency; + + /* Look out for \t\n */ + n = getc(f); + if (n == '\n') + break; + ungetc(n, f); + n = '\t'; + } + + /* No more values, take NAN */ + for (; dst < STARPU_MAXNODES; dst++) + raw_latency_matrix[src][dst] = NAN; + + while (n == '\t') + { + /* Look out for \t\n */ + n = getc(f); + if (n == '\n') + break; + ungetc(n, f); + + n = _starpu_read_double(f, "%le", &latency); + if (n && !isnan(latency)) + { + _STARPU_DISP("Too many nodes in latency file %s for this configuration (%d). Did you change the maximum number of GPUs at ./configure time?\n", path, STARPU_MAXNODES); + fclose(f); + return 0; + } + n = getc(f); + } + if (n != '\n') + { + _STARPU_DISP("Bogus character '%c' (%d) in latency file %s\n", n, n, path); + fclose(f); + return 0; + } + + /* Look out for EOF */ + n = getc(f); + if (n == EOF) + break; + ungetc(n, f); + } + if (locked) + _starpu_frdunlock(f); + fclose(f); + + /* No more values, take NAN */ + for (; src < STARPU_MAXNODES; src++) + for (dst = 0; dst < STARPU_MAXNODES; dst++) + raw_latency_matrix[src][dst] = NAN; + + return 1; +} + +#if !defined(STARPU_SIMGRID) +static double search_bus_best_latency(int src, enum starpu_node_kind type, int htod) +{ + /* Search the best latency for this node */ + double best = 0.0; + double actual = 0.0; + unsigned check = 0; + unsigned numa; + for (numa = 0; numa < nnumas; numa++) + { + if (htod) + actual = timing_per_numa[type][src][numa].latency_htod; + else + actual = timing_per_numa[type][src][numa].latency_dtoh; + if (!check || actual < best) + { + best = actual; + check = 1; + } + } + return best; +} + +static void write_bus_latency_file_content(void) +{ + enum starpu_node_kind type; + unsigned src, dst, maxnode; + /* Boundaries to check if src or dst are inside the interval */ + unsigned b_low, b_up; + FILE *f; + int locked; + + STARPU_ASSERT(was_benchmarked); + + char path[PATH_LENGTH]; + get_latency_path(path, sizeof(path)); + + _STARPU_DEBUG("writing latencies to %s\n", path); + + f = fopen(path, "a+"); + if (!f) + { + perror("fopen write_bus_latency_file_content"); + _STARPU_DISP("path '%s'\n", path); + fflush(stderr); + STARPU_ABORT(); + } + locked = _starpu_fwrlock(f) == 0; + fseek(f, 0, SEEK_SET); + _starpu_fftruncate(f, 0); + + fprintf(f, "# "); + for (type = STARPU_CPU_RAM; type < STARPU_NRAM; type++) + { + for (dst = 0; dst < nmem[type]; dst++) + { + fprintf(f, "to %s %d\t", _starpu_node_get_prefix(type), dst); + } + } + fprintf(f, "\n"); + + maxnode = 0; + for (type = STARPU_CPU_RAM; type < STARPU_NRAM; type++) + maxnode += nmem[type]; + + for (src = 0; src < STARPU_MAXNODES; src++) + { + for (dst = 0; dst < STARPU_MAXNODES; dst++) + { + /* µs */ + double latency = 0.0; + + if ((src >= maxnode) || (dst >= maxnode)) + { + /* convention */ + latency = NAN; + } + else if (src == dst) + { + latency = 0.0; + } + else + { + b_low = b_up = 0; + + /* ---- Begin NUMA ---- */ + b_up += nnumas; + + if (src >= b_low && src < b_up && dst >= b_low && dst < b_up) + latency += numa_latency[src-b_low][dst-b_low]; + + /* copy interval to check numa index later */ + unsigned numa_low = b_low; + unsigned numa_up = b_up; + + b_low += nnumas; + /* ---- End NUMA ---- */ + + for (type = STARPU_CUDA_RAM; type < STARPU_NRAM; type++) + { + b_up += nmem[type]; + /* Check if it's direct GPU-GPU transfer */ + if (src >= b_low && src < b_up && dst >= b_low && dst < b_up + && timing_dtod[type][src-b_low][dst-b_low]) + latency += latency_dtod[type][src-b_low][dst-b_low]; + else + { + /* Check if it's GPU <-> NUMA link */ + if (src >=b_low && src < b_up && dst >= numa_low && dst < numa_up) + latency += timing_per_numa[type][(src-b_low)][dst-numa_low].latency_dtoh; + if (dst >= b_low && dst < b_up && src >= numa_low && dst < numa_up) + latency += timing_per_numa[type][(dst-b_low)][src-numa_low].latency_htod; + /* To other devices, take the best latency */ + if (src >= b_low && src < b_up && !(dst >= numa_low && dst < numa_up)) + latency += search_bus_best_latency(src-b_low, type, 0); + if (dst >= b_low && dst < b_up && !(src >= numa_low && dst < numa_up)) + latency += search_bus_best_latency(dst-b_low, type, 1); + } + b_low += nmem[type]; + } + } + + if (dst > 0) + fputc('\t', f); + _starpu_write_double(f, "%e", latency); + } + + fprintf(f, "\n"); + } + if (locked) + _starpu_fwrunlock(f); + + fclose(f); +} +#endif + +static void generate_bus_latency_file(void) +{ + if (!was_benchmarked) + benchmark_all_memory_nodes(); + +#ifndef STARPU_SIMGRID + write_bus_latency_file_content(); +#endif +} + +static void load_bus_latency_file(void) +{ + int res; + + char path[PATH_LENGTH]; + get_latency_path(path, sizeof(path)); + + res = access(path, F_OK); + if (res || !load_bus_latency_file_content()) + { + /* File does not exist yet or is bogus */ + generate_bus_latency_file(); + res = load_bus_latency_file_content(); + STARPU_ASSERT(res); + } + +} + + +/* + * Bandwidth + */ +static void get_bandwidth_path(char *path, size_t maxlen) +{ + get_bus_path("bandwidth", path, maxlen); +} + +static int load_bus_bandwidth_file_content(void) +{ + int n; + unsigned src, dst; + FILE *f; + double bandwidth; + int locked; + + char path[PATH_LENGTH]; + get_bandwidth_path(path, sizeof(path)); + + _STARPU_DEBUG("loading bandwidth from %s\n", path); + + f = fopen(path, "r"); + if (!f) + { + perror("fopen load_bus_bandwidth_file_content"); + _STARPU_DISP("path '%s'\n", path); + fflush(stderr); + STARPU_ABORT(); + } + locked = _starpu_frdlock(f) == 0; + + for (src = 0; src < STARPU_MAXNODES; src++) + { + _starpu_drop_comments(f); + for (dst = 0; dst < STARPU_MAXNODES; dst++) + { + n = _starpu_read_double(f, "%le", &bandwidth); + if (n != 1) + { + _STARPU_DISP("Error while reading bandwidth file <%s>. Expected a number\n", path); + fclose(f); + return 0; + } + n = getc(f); + if (n == '\n') + break; + if (n != '\t') + { + _STARPU_DISP("bogus character '%c' (%d) in bandwidth file %s\n", n, n, path); + fclose(f); + return 0; + } + + int limit_bandwidth = starpu_getenv_number("STARPU_LIMIT_BANDWIDTH"); + if (limit_bandwidth >= 0) + { +#ifndef STARPU_SIMGRID + _STARPU_DISP("Warning: STARPU_LIMIT_BANDWIDTH set to %d but simgrid not enabled, thus ignored\n", limit_bandwidth); +#else +#ifdef HAVE_SG_LINK_BANDWIDTH_SET + bandwidth = limit_bandwidth; +#else + _STARPU_DISP("Warning: STARPU_LIMIT_BANDWIDTH set to %d but this requires simgrid 3.26\n", limit_bandwidth); +#endif +#endif + } + + raw_bandwidth_matrix[src][dst] = bandwidth; + + /* Look out for \t\n */ + n = getc(f); + if (n == '\n') + break; + ungetc(n, f); + n = '\t'; + } + + /* No more values, take NAN */ + for (; dst < STARPU_MAXNODES; dst++) + raw_bandwidth_matrix[src][dst] = NAN; + + while (n == '\t') + { + /* Look out for \t\n */ + n = getc(f); + if (n == '\n') + break; + ungetc(n, f); + + n = _starpu_read_double(f, "%le", &bandwidth); + if (n && !isnan(bandwidth)) + { + _STARPU_DISP("Too many nodes in bandwidth file %s for this configuration (%d)\n", path, STARPU_MAXNODES); + fclose(f); + return 0; + } + n = getc(f); + } + if (n != '\n') + { + _STARPU_DISP("Bogus character '%c' (%d) in bandwidth file %s\n", n, n, path); + fclose(f); + return 0; + } + + /* Look out for EOF */ + n = getc(f); + if (n == EOF) + break; + ungetc(n, f); + } + if (locked) + _starpu_frdunlock(f); + fclose(f); + + /* No more values, take NAN */ + for (; src < STARPU_MAXNODES; src++) + for (dst = 0; dst < STARPU_MAXNODES; dst++) + raw_bandwidth_matrix[src][dst] = NAN; + + return 1; +} + +#if !defined(STARPU_SIMGRID) +static double search_bus_best_timing(int src, enum starpu_node_kind type, int htod) +{ + /* Search the best latency for this node */ + double best = 0.0; + double actual = 0.0; + unsigned check = 0; + unsigned numa; + for (numa = 0; numa < nnumas; numa++) + { + if (htod) + actual = timing_per_numa[type][src][numa].timing_htod; + else + actual = timing_per_numa[type][src][numa].timing_dtoh; + if (!check || actual < best) + { + best = actual; + check = 1; + } + } + return best; +} + +static void write_bus_bandwidth_file_content(void) +{ + enum starpu_node_kind type; + unsigned src, dst, maxnode; + unsigned b_low, b_up; + FILE *f; + int locked; + + STARPU_ASSERT(was_benchmarked); + + char path[PATH_LENGTH]; + get_bandwidth_path(path, sizeof(path)); + + _STARPU_DEBUG("writing bandwidth to %s\n", path); + + f = fopen(path, "a+"); + STARPU_ASSERT_MSG(f, "Error when opening file (writing) '%s'", path); + + locked = _starpu_fwrlock(f) == 0; + fseek(f, 0, SEEK_SET); + _starpu_fftruncate(f, 0); + + fprintf(f, "# "); + for (type = STARPU_CPU_RAM; type < STARPU_NRAM; type++) + { + for (dst = 0; dst < nmem[type]; dst++) + { + fprintf(f, "to %s %d\t", _starpu_node_get_prefix(type), dst); + } + } + fprintf(f, "\n"); + + maxnode = 0; + for (type = STARPU_CPU_RAM; type < STARPU_NRAM; type++) + maxnode += nmem[type]; + + for (src = 0; src < STARPU_MAXNODES; src++) + { + for (dst = 0; dst < STARPU_MAXNODES; dst++) + { + double bandwidth; + + if ((src >= maxnode) || (dst >= maxnode)) + { + bandwidth = NAN; + } + else if (src != dst) + { + double slowness = 0.0; + /* Total bandwidth is the harmonic mean of bandwidths */ + b_low = b_up = 0; + + /* Begin NUMA */ + b_up += nnumas; + + if (src >= b_low && src < b_up && dst >= b_low && dst < b_up) + slowness += numa_timing[src-b_low][dst-b_low]; + + /* copy interval to check numa index later */ + unsigned numa_low = b_low; + unsigned numa_up = b_up; + + b_low += nnumas; + /* End NUMA */ + + for (type = STARPU_CUDA_RAM; type < STARPU_NRAM; type++) + { + b_up += nmem[type]; + /* Check if it's direct GPU-GPU transfer */ + if (src >= b_low && src < b_up && dst >= b_low && dst < b_up + && timing_dtod[type][src-b_low][dst-b_low]) + slowness += timing_dtod[type][src-b_low][dst-b_low]; + else + { + /* Check if it's GPU <-> NUMA link */ + if (src >= b_low && src < b_up && dst >= numa_low && dst < numa_up) + slowness += timing_per_numa[type][(src-b_low)][dst-numa_low].timing_dtoh; + if (dst >= b_low && dst < b_up && src >= numa_low && src < numa_up) + slowness += timing_per_numa[type][(dst-b_low)][src-numa_low].timing_htod; + /* To other devices, take the best slowness */ + if (src >= b_low && src < b_up && !(dst >= numa_low && dst < numa_up)) + slowness += search_bus_best_timing(src-b_low, type, 0); + if (dst >= b_low && dst < b_up && !(src >= numa_low && src < numa_up)) + slowness += search_bus_best_timing(dst-b_low, type, 1); + } + b_low += nmem[type]; + } + + bandwidth = 1.0/slowness; + } + else + { + /* convention */ + bandwidth = 0.0; + } + + if (dst) + fputc('\t', f); + _starpu_write_double(f, "%e", bandwidth); + } + + fprintf(f, "\n"); + } + + if (locked) + _starpu_fwrunlock(f); + fclose(f); +} +#endif /* STARPU_SIMGRID */ + +void starpu_bus_print_filenames(FILE *output) +{ + char bandwidth_path[PATH_LENGTH]; + char affinity_path[PATH_LENGTH]; + char latency_path[PATH_LENGTH]; + + get_bandwidth_path(bandwidth_path, sizeof(bandwidth_path)); + get_affinity_path(affinity_path, sizeof(affinity_path)); + get_latency_path(latency_path, sizeof(latency_path)); + + fprintf(output, "bandwidth: <%s>\n", bandwidth_path); + fprintf(output, " affinity: <%s>\n", affinity_path); + fprintf(output, " latency: <%s>\n", latency_path); +} + +void starpu_bus_print_bandwidth(FILE *f) +{ + unsigned src, dst, maxnode = starpu_memory_nodes_get_count(); + + fprintf(f, "from/to\t"); + for (dst = 0; dst < maxnode; dst++) + { + char name[128]; + starpu_memory_node_get_name(dst, name, sizeof(name)); + fprintf(f, "%s\t", name); + } + fprintf(f, "\n"); + + for (src = 0; src < maxnode; src++) + { + char name[128]; + starpu_memory_node_get_name(src, name, sizeof(name)); + fprintf(f, "%s\t", name); + + for (dst = 0; dst < maxnode; dst++) + fprintf(f, "%.0f\t", bandwidth_matrix[src][dst]); + + fprintf(f, "\n"); + } + fprintf(f, "\n"); + + for (src = 0; src < maxnode; src++) + { + char name[128]; + starpu_memory_node_get_name(src, name, sizeof(name)); + fprintf(f, "%s\t", name); + + for (dst = 0; dst < maxnode; dst++) + fprintf(f, "%.0f\t", latency_matrix[src][dst]); + + fprintf(f, "\n"); + } + +#ifndef STARPU_SIMGRID +#if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) + if (ncuda != 0 || nopencl != 0) + fprintf(f, "\nGPU\tNUMA in preference order (logical index), host-to-device, device-to-host\n"); + for (src = 0; src < ncuda + nopencl; src++) + { + struct dev_timing *timing; + struct _starpu_machine_config * config = _starpu_get_machine_config(); + unsigned nhwnumas = _starpu_topology_get_nhwnumanodes(config); + unsigned numa; + +#ifdef STARPU_USE_CUDA + if (src < ncuda) + { + fprintf(f, "CUDA_%u\t", src); + for (numa = 0; numa < nhwnumas; numa++) + { + timing = &timing_per_numa[STARPU_CUDA_RAM][src][numa]; + if (timing->timing_htod) + fprintf(f, "%2d %.0f %.0f\t", timing->numa_id, 1/timing->timing_htod, 1/timing->timing_dtoh); + else + fprintf(f, "%2u\t", affinity_matrix[STARPU_CUDA_RAM][src][numa]); + } + } +#ifdef STARPU_USE_OPENCL + else +#endif +#endif +#ifdef STARPU_USE_OPENCL + { + fprintf(f, "OpenCL%u\t", src-ncuda); + for (numa = 0; numa < nhwnumas; numa++) + { + timing = &timing_per_numa[STARPU_OPENCL_RAM][(src-ncuda)][numa]; + if (timing->timing_htod) + fprintf(f, "%2d %.0f %.0f\t", timing->numa_id, 1/timing->timing_htod, 1/timing->timing_dtoh); + else + fprintf(f, "%2u\t", affinity_matrix[STARPU_OPENCL_RAM][src-ncuda][numa]); + } + } +#endif + fprintf(f, "\n"); + } +#endif +#endif +} + +static void generate_bus_bandwidth_file(void) +{ + if (!was_benchmarked) + benchmark_all_memory_nodes(); + +#ifndef STARPU_SIMGRID + write_bus_bandwidth_file_content(); +#endif +} + +static void load_bus_bandwidth_file(void) +{ + int res; + + char path[PATH_LENGTH]; + get_bandwidth_path(path, sizeof(path)); + + res = access(path, F_OK); + if (res || !load_bus_bandwidth_file_content()) + { + /* File does not exist yet or is bogus */ + generate_bus_bandwidth_file(); + res = load_bus_bandwidth_file_content(); + STARPU_ASSERT(res); + } +} + +#ifndef STARPU_SIMGRID +/* + * Config + */ +static void get_config_path(char *path, size_t maxlen) +{ + get_bus_path("config", path, maxlen); +} + +#if defined(STARPU_USE_MPI_MASTER_SLAVE) +/* check if the master or one slave has to recalibrate */ +static int mpi_check_recalibrate(int my_recalibrate) +{ + int nb_mpi = _starpu_mpi_src_get_device_count() + 1; + int mpi_recalibrate[nb_mpi]; + int i; + + MPI_Allgather(&my_recalibrate, 1, MPI_INT, mpi_recalibrate, 1, MPI_INT, MPI_COMM_WORLD); + + for (i = 0; i < nb_mpi; i++) + { + if (mpi_recalibrate[i]) + { + return 1; + } + } + return 0; +} +#endif + +static void compare_value_and_recalibrate(enum starpu_node_kind type, const char * msg, unsigned val_file, unsigned val_detected) +{ + int recalibrate = 0; + if (val_file != val_detected && + !((type == STARPU_MPI_MS_RAM || type == STARPU_TCPIP_MS_RAM) && !val_detected)) + recalibrate = 1; + +#ifdef STARPU_USE_MPI_MASTER_SLAVE + //Send to each other to know if we had to recalibrate because someone cannot have the correct value in the config file + if (_starpu_config.conf.nmpi_ms != 0) + recalibrate = mpi_check_recalibrate(recalibrate); +#endif + + if (recalibrate) + { +#ifdef STARPU_USE_MPI_MASTER_SLAVE + /* Only the master prints the message */ + if (_starpu_mpi_common_is_src_node()) +#endif + _STARPU_DISP("Current configuration does not match the bus performance model (%s: (stored) %d != (current) %d), recalibrating...\n", msg, val_file, val_detected); + + int location = _starpu_get_perf_model_bus(); + _starpu_bus_force_sampling(location); + +#ifdef STARPU_USE_MPI_MASTER_SLAVE + if (_starpu_mpi_common_is_src_node()) +#endif + _STARPU_DISP("... done\n"); + } +} + +static void check_bus_config_file(void) +{ + struct _starpu_machine_config *config = _starpu_get_machine_config(); + int recalibrate = 0; + char path[PATH_LENGTH]; + + int location = _starpu_get_perf_model_bus(); + if (location < 0 || config->conf.bus_calibrate > 0) + recalibrate = 1; + +#if defined(STARPU_USE_MPI_MASTER_SLAVE) + if (_starpu_config.conf.nmpi_ms != 0) + //Send to each other to know if we had to recalibrate because someone cannot have the config file + recalibrate = mpi_check_recalibrate(recalibrate); +#endif + + if (recalibrate) + { + if (location < 0) + _STARPU_DISP("No performance model for the bus, calibrating...\n"); + _starpu_bus_force_sampling(location); + if (location < 0) + _STARPU_DISP("... done\n"); + } + else + { + FILE *f; + int ret; + enum starpu_node_kind type; + unsigned read_cpus = -1; + unsigned n_read[STARPU_NRAM]; + + int locked; + unsigned ok; + + get_config_path(path, sizeof(path)); + + // Loading configuration from file + f = fopen(path, "r"); + STARPU_ASSERT_MSG(f, "Error when reading from file '%s'", path); + locked = _starpu_frdlock(f) == 0; + _starpu_drop_comments(f); + + ret = fscanf(f, "%u\t", &read_cpus); + STARPU_ASSERT_MSG(ret == 1, "Error when reading from file '%s'", path); + _starpu_drop_comments(f); + + for (type = STARPU_CPU_RAM; type < STARPU_NRAM; type++) + n_read[type] = -1; + ok = 1; + for (type = STARPU_CPU_RAM; ok && type < STARPU_NRAM; type++) + { + if (ok) + ret = fscanf(f, "%u\t", &n_read[type]); + if (!ok || ret != 1) + { + ok = 0; + n_read[type] = 0; + } + _starpu_drop_comments(f); + } + + if (locked) + _starpu_frdunlock(f); + fclose(f); + + // Loading current configuration + ncpus = _starpu_topology_get_nhwcpu(config); + + /* TODO: factorize these calls */ + nnumas = _starpu_topology_get_nhwnumanodes(config); +#ifdef STARPU_USE_CUDA + ncuda = _starpu_get_cuda_device_count(); +#endif +#ifdef STARPU_USE_OPENCL + nopencl = _starpu_opencl_get_device_count(); +#endif +#ifdef STARPU_USE_MPI_MASTER_SLAVE + nmpims = _starpu_mpi_src_get_device_count(); +#endif /* STARPU_USE_MPI_MASTER_SLAVE */ +#ifdef STARPU_USE_TCPIP_MASTER_SLAVE + ntcpip_ms = _starpu_tcpip_src_get_device_count(); +#endif /* STARPU_USE_TCPIP_MASTER_SLAVE */ + + // Checking if both configurations match + compare_value_and_recalibrate(STARPU_CPU_RAM, "CPUS", read_cpus, ncpus); + for (type = STARPU_CPU_RAM; type < STARPU_NRAM; type++) + { + compare_value_and_recalibrate(type, + starpu_memory_driver_info[type].name_upper, n_read[type], nmem[type]); + } + } +} + +static void write_bus_config_file_content(void) +{ + FILE *f; + char path[PATH_LENGTH]; + int locked; + enum starpu_node_kind type; + + STARPU_ASSERT(was_benchmarked); + get_config_path(path, sizeof(path)); + + _STARPU_DEBUG("writing config to %s\n", path); + + f = fopen(path, "a+"); + STARPU_ASSERT_MSG(f, "Error when opening file (writing) '%s'", path); + locked = _starpu_fwrlock(f) == 0; + fseek(f, 0, SEEK_SET); + _starpu_fftruncate(f, 0); + + fprintf(f, "# Current configuration\n"); + fprintf(f, "%u # Number of CPUs\n", ncpus); + for (type = STARPU_CPU_RAM; type < STARPU_NRAM; type++) + fprintf(f, "%u # Number of %s nodes\n", nmem[type], + starpu_memory_driver_info[type].name_upper); + + if (locked) + _starpu_fwrunlock(f); + fclose(f); +} + +static void generate_bus_config_file(void) +{ + if (!was_benchmarked) + benchmark_all_memory_nodes(); + + write_bus_config_file_content(); +} +#endif /* !SIMGRID */ + +void _starpu_simgrid_get_platform_path(int version, char *path, size_t maxlen) +{ + if (version == 3) + get_bus_path("platform.xml", path, maxlen); + else + get_bus_path("platform.v4.xml", path, maxlen); +} + +#ifndef STARPU_SIMGRID +/* + * Compute the precise PCI tree bandwidth and link shares + * + * We only have measurements from one leaf to another. We assume that the + * available bandwidth is greater at lower levels, and thus measurements from + * increasingly far GPUs provide the PCI bridges bandwidths at each level. + * + * The bandwidth of a PCI bridge is thus computed as the maximum of the speed + * of the various transfers that we have achieved through it. We thus browse + * the PCI tree three times: + * + * - first through all CUDA-CUDA possible transfers to compute the maximum + * measured bandwidth on each PCI link and hub used for that. + * - then through the whole tree to emit links for each PCI link and hub. + * - then through all CUDA-CUDA possible transfers again to emit routes. + */ + +#if defined(STARPU_USE_CUDA) && HAVE_DECL_HWLOC_CUDA_GET_DEVICE_OSDEV_BY_INDEX && defined(STARPU_HAVE_CUDA_MEMCPY_PEER) + +/* Records, for each PCI link and hub, the maximum bandwidth seen through it */ +struct pci_userdata +{ + /* Uplink max measurement */ + double bw_up; + double bw_down; + + /* Hub max measurement */ + double bw; +}; + +/* Allocate a pci_userdata structure for the given object */ +static void allocate_userdata(hwloc_obj_t obj) +{ + struct pci_userdata *data; + + if (obj->userdata) + return; + + _STARPU_MALLOC(obj->userdata, sizeof(*data)); + data = obj->userdata; + data->bw_up = 0.0; + data->bw_down = 0.0; + data->bw = 0.0; +} + +/* Update the maximum bandwidth seen going to upstream */ +static void update_bandwidth_up(hwloc_obj_t obj, double bandwidth) +{ + struct pci_userdata *data; + if (obj->type != HWLOC_OBJ_BRIDGE && obj->type != HWLOC_OBJ_PCI_DEVICE) + return; + allocate_userdata(obj); + + data = obj->userdata; + if (data->bw_up < bandwidth) + data->bw_up = bandwidth; +} + +/* Update the maximum bandwidth seen going from upstream */ +static void update_bandwidth_down(hwloc_obj_t obj, double bandwidth) +{ + struct pci_userdata *data; + if (obj->type != HWLOC_OBJ_BRIDGE && obj->type != HWLOC_OBJ_PCI_DEVICE) + return; + allocate_userdata(obj); + + data = obj->userdata; + if (data->bw_down < bandwidth) + data->bw_down = bandwidth; +} + +/* Update the maximum bandwidth seen going through this Hub */ +static void update_bandwidth_through(hwloc_obj_t obj, double bandwidth) +{ + struct pci_userdata *data; + allocate_userdata(obj); + + data = obj->userdata; + if (data->bw < bandwidth) + data->bw = bandwidth; +} + +/* find_* functions perform the first step: computing maximum bandwidths */ + +/* Our traffic had to go through the host, go back from target up to the host, + * updating uplink downstream bandwidth along the way */ +static void find_platform_backward_path(hwloc_obj_t obj, double bandwidth) +{ + if (!obj) + /* Oops, we should have seen a host bridge. Well, too bad. */ + return; + + /* Update uplink bandwidth of PCI Hub */ + update_bandwidth_down(obj, bandwidth); + /* Update internal bandwidth of PCI Hub */ + update_bandwidth_through(obj, bandwidth); + + if (obj->type == HWLOC_OBJ_BRIDGE && obj->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_HOST) + /* Finished */ + return; + + /* Continue up */ + find_platform_backward_path(obj->parent, bandwidth); +} +/* Same, but update uplink upstream bandwidth */ +static void find_platform_forward_path(hwloc_obj_t obj, double bandwidth) +{ + if (!obj) + /* Oops, we should have seen a host bridge. Well, too bad. */ + return; + + /* Update uplink bandwidth of PCI Hub */ + update_bandwidth_up(obj, bandwidth); + /* Update internal bandwidth of PCI Hub */ + update_bandwidth_through(obj, bandwidth); + + if (obj->type == HWLOC_OBJ_BRIDGE && obj->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_HOST) + /* Finished */ + return; + + /* Continue up */ + find_platform_forward_path(obj->parent, bandwidth); +} + +/* Find the path from obj1 through parent down to obj2 (without ever going up), + * and update the maximum bandwidth along the path */ +static int find_platform_path_down(hwloc_obj_t parent, hwloc_obj_t obj1, hwloc_obj_t obj2, double bandwidth) +{ + unsigned i; + + /* Base case, path is empty */ + if (parent == obj2) + return 1; + + /* Try to go down from parent */ + for (i = 0; i < parent->arity; i++) + if (parent->children[i] != obj1 && find_platform_path_down(parent->children[i], NULL, obj2, bandwidth)) + { + /* Found it down there, update bandwidth of parent */ + update_bandwidth_down(parent->children[i], bandwidth); + update_bandwidth_through(parent, bandwidth); + return 1; + } +#if HWLOC_API_VERSION >= 0x00020000 + hwloc_obj_t io; + for (io = parent->io_first_child; io; io = io->next_sibling) + if (io != obj1 && find_platform_path_down(io, NULL, obj2, bandwidth)) + { + /* Found it down there, update bandwidth of parent */ + update_bandwidth_down(io, bandwidth); + update_bandwidth_through(parent, bandwidth); + return 1; + } +#endif + return 0; +} + +/* Find the path from obj1 to obj2, and update the maximum bandwidth along the + * path */ +static int find_platform_path_up(hwloc_obj_t obj1, hwloc_obj_t obj2, double bandwidth) +{ + int ret; + hwloc_obj_t parent = obj1->parent; + + if (!parent) + { + /* Oops, we should have seen a host bridge. Act as if we had seen it. */ + find_platform_backward_path(obj2, bandwidth); + return 1; + } + + if (find_platform_path_down(parent, obj1, obj2, bandwidth)) + /* obj2 was a mere (sub)child of our parent */ + return 1; + + /* obj2 is not a (sub)child of our parent, we have to go up through the parent */ + if (parent->type == HWLOC_OBJ_BRIDGE && parent->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_HOST) + { + /* We have to go up to the Interconnect, so obj2 is not in the same PCI + * tree, so we're for for obj1 to Interconnect, and just find the path + * from obj2 to Interconnect too. + */ + find_platform_backward_path(obj2, bandwidth); + + update_bandwidth_up(parent, bandwidth); + update_bandwidth_through(parent, bandwidth); + + return 1; + } + + /* Not at host yet, just go up */ + ret = find_platform_path_up(parent, obj2, bandwidth); + update_bandwidth_up(parent, bandwidth); + update_bandwidth_through(parent, bandwidth); + return ret; +} + +static hwloc_obj_t get_hwloc_cuda_obj(hwloc_topology_t topology, unsigned devid) +{ + hwloc_obj_t res; + struct cudaDeviceProp props; + cudaError_t cures; + + res = hwloc_cuda_get_device_osdev_by_index(topology, devid); + if (res) + return res; + + cures = cudaGetDeviceProperties(&props, devid); + if (cures == cudaSuccess) + { + res = hwloc_get_pcidev_by_busid(topology, props.pciDomainID, props.pciBusID, props.pciDeviceID, 0); + if (res) + return res; + +#if defined(STARPU_HAVE_NVML_H) && !defined(STARPU_USE_CUDA0) && !defined(STARPU_USE_CUDA1) + nvmlDevice_t nvmldev = _starpu_cuda_get_nvmldev(&props); + + if (nvmldev && _starpu_nvmlDeviceGetIndex && _starpu_nvmlDeviceGetPciInfo && _starpu_nvmlDeviceGetUUID) + { + unsigned int index; + if (_starpu_nvmlDeviceGetIndex(nvmldev, &index) == NVML_SUCCESS) + { + res = hwloc_nvml_get_device_osdev_by_index(topology, index); + if (res) + return res; + } + + res = hwloc_nvml_get_device_osdev(topology, nvmldev); + if (res) + return res; + } +#endif + } + return NULL; +} + +/* find the path between cuda i and cuda j, and update the maximum bandwidth along the path */ +static int find_platform_cuda_path(hwloc_topology_t topology, unsigned i, unsigned j, double bandwidth) +{ + hwloc_obj_t cudai, cudaj; + cudai = get_hwloc_cuda_obj(topology, i); + cudaj = get_hwloc_cuda_obj(topology, j); + + if (!cudai || !cudaj) + return 0; + + return find_platform_path_up(cudai, cudaj, bandwidth); +} + +/* emit_topology_bandwidths performs the second step: emitting link names */ + +/* Emit the link name of the object */ +static void emit_pci_hub(FILE *f, hwloc_obj_t obj) +{ + STARPU_ASSERT(obj->type == HWLOC_OBJ_BRIDGE); + fprintf(f, "PCI:%04x:[%02x-%02x]", obj->attr->bridge.downstream.pci.domain, obj->attr->bridge.downstream.pci.secondary_bus, obj->attr->bridge.downstream.pci.subordinate_bus); +} + +static void emit_pci_dev(FILE *f, struct hwloc_pcidev_attr_s *pcidev) +{ + fprintf(f, "PCI:%04x:%02x:%02x.%1x", pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func); +} + +/* Emit the links of the object */ +static void emit_topology_bandwidths(FILE *f, hwloc_obj_t obj, const char *Bps, const char *s) +{ + unsigned i; + if (obj->userdata) + { + struct pci_userdata *data = obj->userdata; + + if (obj->type == HWLOC_OBJ_BRIDGE) + { + /* Uplink */ + fprintf(f, " \n", data->bw_up, Bps, s); + fprintf(f, " \n", data->bw_down, Bps, s); + + /* PCI Switches are assumed to have infinite internal bandwidth */ + if (!obj->name || !strstr(obj->name, "Switch")) + { + /* We assume that PCI Hubs have double bandwidth in + * order to support full duplex but not more */ + fprintf(f, " \n", data->bw * 2, Bps, s); + } + } + else if (obj->type == HWLOC_OBJ_PCI_DEVICE) + { + fprintf(f, " attr->pcidev); + fprintf(f, " up\" bandwidth=\"%f%s\" latency=\"0.000000%s\"/>\n", data->bw_up, Bps, s); + fprintf(f, " attr->pcidev); + fprintf(f, " down\" bandwidth=\"%f%s\" latency=\"0.000000%s\"/>\n", data->bw_down, Bps, s); + } + } + + for (i = 0; i < obj->arity; i++) + emit_topology_bandwidths(f, obj->children[i], Bps, s); +#if HWLOC_API_VERSION >= 0x00020000 + hwloc_obj_t io; + for (io = obj->io_first_child; io; io = io->next_sibling) + emit_topology_bandwidths(f, io, Bps, s); +#endif +} + +/* emit_pci_link_* functions perform the third step: emitting the routes */ + +static void emit_pci_link(FILE *f, hwloc_obj_t obj, const char *suffix) +{ + if (obj->type == HWLOC_OBJ_BRIDGE) + { + fprintf(f, " \n", suffix); + } + else if (obj->type == HWLOC_OBJ_PCI_DEVICE) + { + fprintf(f, " attr->pcidev); + fprintf(f, " %s\"/>\n", suffix); + } +} + +/* Go to upstream */ +static void emit_pci_link_up(FILE *f, hwloc_obj_t obj) +{ + emit_pci_link(f, obj, "up"); +} + +/* Go from upstream */ +static void emit_pci_link_down(FILE *f, hwloc_obj_t obj) +{ + emit_pci_link(f, obj, "down"); +} + +/* Go through PCI hub */ +static void emit_pci_link_through(FILE *f, hwloc_obj_t obj) +{ + /* We don't care about traffic going through PCI switches */ + if (obj->type == HWLOC_OBJ_BRIDGE) + { + if (!obj->name || !strstr(obj->name, "Switch")) + emit_pci_link(f, obj, "through"); + else + { + fprintf(f, " \n"); + } + } +} + +/* Our traffic has to go through the host, go back from target up to the host, + * using uplink downstream along the way */ +static void emit_platform_backward_path(FILE *f, hwloc_obj_t obj) +{ + if (!obj) + /* Oops, we should have seen a host bridge. Well, too bad. */ + return; + + /* Go through PCI Hub */ + emit_pci_link_through(f, obj); + /* Go through uplink */ + emit_pci_link_down(f, obj); + + if (obj->type == HWLOC_OBJ_BRIDGE && obj->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_HOST) + { + /* Finished, go through NUMA */ + hwloc_obj_t numa = _starpu_numa_get_obj(obj); + if (numa) + fprintf(f, " \n", numa->logical_index); + else + fprintf(f, " \n"); + return; + } + + /* Continue up */ + emit_platform_backward_path(f, obj->parent); +} +/* Same, but use upstream link */ +static void emit_platform_forward_path(FILE *f, hwloc_obj_t obj) +{ + if (!obj) + /* Oops, we should have seen a host bridge. Well, too bad. */ + return; + + /* Go through PCI Hub */ + emit_pci_link_through(f, obj); + /* Go through uplink */ + emit_pci_link_up(f, obj); + + if (obj->type == HWLOC_OBJ_BRIDGE && obj->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_HOST) + { + /* Finished, go through NUMA */ + hwloc_obj_t numa = _starpu_numa_get_obj(obj); + if (numa) + fprintf(f, " \n", numa->logical_index); + else + fprintf(f, " \n"); + return; + } + + /* Continue up */ + emit_platform_forward_path(f, obj->parent); +} + +/* Find the path from obj1 through parent down to obj2 (without ever going up), + * and use the links along the path */ +static int emit_platform_path_down(FILE *f, hwloc_obj_t parent, hwloc_obj_t obj1, hwloc_obj_t obj2) +{ + unsigned i; + + /* Base case, path is empty */ + if (parent == obj2) + return 1; + + /* Try to go down from parent */ + for (i = 0; i < parent->arity; i++) + if (parent->children[i] != obj1 && emit_platform_path_down(f, parent->children[i], NULL, obj2)) + { + /* Found it down there, path goes through this hub */ + emit_pci_link_down(f, parent->children[i]); + emit_pci_link_through(f, parent); + return 1; + } +#if HWLOC_API_VERSION >= 0x00020000 + hwloc_obj_t io; + for (io = parent->io_first_child; io; io = io->next_sibling) + if (io != obj1 && emit_platform_path_down(f, io, NULL, obj2)) + { + /* Found it down there, path goes through this hub */ + emit_pci_link_down(f, io); + emit_pci_link_through(f, parent); + return 1; + } +#endif + return 0; +} + +/* Find the path from obj1 to obj2, and use the links along the path */ +static int emit_platform_path_up(FILE *f, hwloc_obj_t obj1, hwloc_obj_t obj2) +{ + int ret; + hwloc_obj_t parent = obj1->parent; + + if (!parent) + { + /* Oops, we should have seen a host bridge. Act as if we had seen it. */ + emit_platform_backward_path(f, obj2); + return 1; + } + + if (emit_platform_path_down(f, parent, obj1, obj2)) + /* obj2 was a mere (sub)child of our parent */ + return 1; + + /* obj2 is not a (sub)child of our parent, we have to go up through the parent */ + if (parent->type == HWLOC_OBJ_BRIDGE && parent->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_HOST) + { + /* We have to go up to the Interconnect, so obj2 is not in the same PCI + * tree, so we're for for obj1 to Interconnect, and just find the path + * from obj2 to Interconnect too. + */ + emit_platform_backward_path(f, obj2); + + hwloc_obj_t numa2 = _starpu_numa_get_obj(obj2); + hwloc_obj_t numa1 = _starpu_numa_get_obj(obj1); + + if (!numa1 || !numa2 || numa1 != numa2) + { + fprintf(f, " \n"); + if (numa1) + fprintf(f, " \n", numa1->logical_index); + } + + emit_pci_link_up(f, parent); + emit_pci_link_through(f, parent); + + return 1; + } + + /* Not at host yet, just go up */ + ret = emit_platform_path_up(f, parent, obj2); + emit_pci_link_up(f, parent); + emit_pci_link_through(f, parent); + return ret; +} + +/* Clean our mess in the topology before destroying it */ +static void clean_topology(hwloc_obj_t obj) +{ + unsigned i; + if (obj->userdata) + { + free(obj->userdata); + obj->userdata = NULL; + } + for (i = 0; i < obj->arity; i++) + clean_topology(obj->children[i]); +#if HWLOC_API_VERSION >= 0x00020000 + hwloc_obj_t io; + for (io = obj->io_first_child; io; io = io->next_sibling) + clean_topology(io); +#endif +} +#endif + +static void write_bus_platform_file_content(int version) +{ + FILE *f; + char path[PATH_LENGTH]; + unsigned i; + const char *speed, *flops, *Bps, *s; + char dash; + int locked; + + if (version == 3) + { + speed = "power"; + flops = ""; + Bps = ""; + s = ""; + dash = '_'; + } + else + { + speed = "speed"; + flops = "f"; + Bps = "Bps"; + s = "s"; + dash = '-'; + } + + STARPU_ASSERT(was_benchmarked); + + _starpu_simgrid_get_platform_path(version, path, sizeof(path)); + + _STARPU_DEBUG("writing platform to %s\n", path); + + f = fopen(path, "a+"); + if (!f) + { + perror("fopen write_bus_platform_file_content"); + _STARPU_DISP("path '%s'\n", path); + fflush(stderr); + STARPU_ABORT(); + } + locked = _starpu_fwrlock(f) == 0; + fseek(f, 0, SEEK_SET); + _starpu_fftruncate(f, 0); + + fprintf(f, + "\n" + "\n" + " \n" + " \n" + " \n" + " \n" + " \n" + " \n" + " \n" + " \n" + " \n" + " \n", + version == 3 + ? "http://simgrid.gforge.inria.fr/simgrid.dtd" + : "http://simgrid.gforge.inria.fr/simgrid/simgrid.dtd", + version, dash, dash, dash, dash, speed, flops); + + for (i = 0; i < ncpus; i++) + /* TODO: host memory for out-of-core simulation */ + fprintf(f, " \n", i, speed, flops); + + for (i = 0; i < ncuda; i++) + { + fprintf(f, " \n", i, speed, flops); + fprintf(f, " \n", cuda_devname[i]); + fprintf(f, " \n", (unsigned long long) cuda_size[i]); +#ifdef STARPU_HAVE_CUDA_MEMCPY_PEER + fprintf(f, " \n"); +#endif + /* TODO: record cudadev_direct instead of assuming it's NUMA nodes */ + fprintf(f, " \n"); + } + + for (i = 0; i < nopencl; i++) + { + fprintf(f, " \n", i, speed, flops); + fprintf(f, " \n", opencl_devname[i]); + fprintf(f, " \n", (unsigned long long) opencl_size[i]); + fprintf(f, " \n"); + } + + fprintf(f, "\n \n", speed, flops); + + /* + * Compute maximum bandwidth, taken as host bandwidth + */ + double max_bandwidth = 0; + double max_bandwidth_numa[nnumas]; + unsigned numa; + for (numa = 0; numa < nnumas; numa++) + max_bandwidth_numa[numa] = 0.; + +#ifdef STARPU_USE_CUDA + for (i = 0; i < ncuda; i++) + { + for (numa = 0; numa < nnumas; numa++) + { + double down_bw = 1.0 / timing_per_numa[STARPU_CUDA_RAM][i][numa].timing_dtoh; + double up_bw = 1.0 / timing_per_numa[STARPU_CUDA_RAM][i][numa].timing_htod; + if (max_bandwidth < down_bw) + max_bandwidth = down_bw; + if (max_bandwidth_numa[numa] < down_bw) + max_bandwidth_numa[numa] = down_bw; + if (max_bandwidth < up_bw) + max_bandwidth = up_bw; + if (max_bandwidth_numa[numa] < up_bw) + max_bandwidth_numa[numa] = up_bw; + } + } +#endif +#ifdef STARPU_USE_OPENCL + for (i = 0; i < nopencl; i++) + { + for (numa = 0; numa < nnumas; numa++) + { + double down_bw = 1.0 / timing_per_numa[STARPU_OPENCL_RAM][i][numa].timing_dtoh; + double up_bw = 1.0 / timing_per_numa[STARPU_OPENCL_RAM][i][numa].timing_htod; + if (max_bandwidth < down_bw) + max_bandwidth = down_bw; + if (max_bandwidth_numa[numa] < down_bw) + max_bandwidth_numa[numa] = down_bw; + if (max_bandwidth < up_bw) + max_bandwidth = up_bw; + if (max_bandwidth_numa[numa] < up_bw) + max_bandwidth_numa[numa] = up_bw; + } + } +#endif + for (numa = 0; numa < nnumas; numa++) + fprintf(f, " \n", numa, max_bandwidth_numa[numa]*1000000, Bps, s); + fprintf(f, " \n\n", max_bandwidth*1000000, Bps, s); + + /* + * OpenCL links + */ + +#ifdef STARPU_USE_OPENCL + for (i = 0; i < nopencl; i++) + { + char i_name[17]; + snprintf(i_name, sizeof(i_name), "OpenCL%u", i); + fprintf(f, " \n", + i_name, + 1000000 / search_bus_best_timing(i, STARPU_OPENCL_RAM, 1), Bps, + search_bus_best_latency(i, STARPU_OPENCL_RAM, 1)/1000000., s); + fprintf(f, " \n", + i_name, + 1000000 / search_bus_best_timing(i, STARPU_OPENCL_RAM, 0), Bps, + search_bus_best_latency(i, STARPU_OPENCL_RAM, 0)/1000000., s); + } + fprintf(f, "\n"); +#endif + + /* + * CUDA links and routes + */ + +#ifdef STARPU_USE_CUDA + /* Write RAM/CUDA bandwidths and latencies */ + for (i = 0; i < ncuda; i++) + { + char i_name[16]; + snprintf(i_name, sizeof(i_name), "CUDA%u", i); + fprintf(f, " \n", + i_name, + 1000000. / search_bus_best_timing(i, STARPU_CUDA_RAM, 1), Bps, + search_bus_best_latency(i, STARPU_CUDA_RAM, 1)/1000000., s); + fprintf(f, " \n", + i_name, + 1000000. / search_bus_best_timing(i, STARPU_CUDA_RAM, 0), Bps, + search_bus_best_latency(i, STARPU_CUDA_RAM, 0)/1000000., s); + } + fprintf(f, "\n"); +#ifdef STARPU_HAVE_CUDA_MEMCPY_PEER + /* Write CUDA/CUDA bandwidths and latencies */ + for (i = 0; i < ncuda; i++) + { + unsigned j; + char i_name[16]; + snprintf(i_name, sizeof(i_name), "CUDA%u", i); + for (j = 0; j < ncuda; j++) + { + char j_name[16]; + if (j == i) + continue; + snprintf(j_name, sizeof(j_name), "CUDA%u", j); + fprintf(f, " \n", + i_name, j_name, + 1000000. / timing_dtod[STARPU_CUDA_RAM][i][j], Bps, + latency_dtod[STARPU_CUDA_RAM][i][j]/1000000., s); + } + } +#endif + +#if HAVE_DECL_HWLOC_CUDA_GET_DEVICE_OSDEV_BY_INDEX && defined(STARPU_USE_CUDA) && defined(STARPU_HAVE_CUDA_MEMCPY_PEER) + /* If we have enough hwloc information, write PCI bandwidths and routes */ + if (!starpu_getenv_number_default("STARPU_PCI_FLAT", 0) && ncuda > 0) + { + int ret; + hwloc_topology_t topology; + ret = hwloc_topology_init(&topology); + STARPU_ASSERT_MSG(ret == 0, "Could not initialize Hwloc topology (%s)\n", strerror(errno)); + _starpu_topology_filter(topology); + ret = hwloc_topology_load(topology); + STARPU_ASSERT_MSG(ret == 0, "Could not load Hwloc topology (%s)\n", strerror(errno)); + + char nvlink[ncuda][ncuda]; + char nvlinkhost[ncuda]; + char nvswitch[ncuda]; + memset(nvlink, 0, sizeof(nvlink)); + memset(nvlinkhost, 0, sizeof(nvlinkhost)); + memset(nvswitch, 0, sizeof(nvswitch)); + + /* TODO: move to drivers */ +#if defined(STARPU_HAVE_NVML_H) && !defined(STARPU_USE_CUDA0) && !defined(STARPU_USE_CUDA1) + /* First find NVLinks */ + struct cudaDeviceProp props[ncuda]; + + for (i = 0; i < ncuda; i++) + { + cudaError_t cures = cudaGetDeviceProperties(&props[i], i); + if (cures != cudaSuccess) + props[i].name[0] = 0; + } + + if (_starpu_nvmlDeviceGetNvLinkState && _starpu_nvmlDeviceGetNvLinkRemotePciInfo) + for (i = 0; i < ncuda; i++) + { + unsigned j; + + if (!props[i].name[0]) + continue; + + nvmlDevice_t nvmldev; + nvmldev = _starpu_cuda_get_nvmldev(&props[i]); + if (!nvmldev) + continue; + + for (j = 0; j < NVML_NVLINK_MAX_LINKS; j++) + { + nvmlEnableState_t active; + nvmlReturn_t nvmlret; + nvmlPciInfo_t pci; + unsigned k; + + nvmlret = _starpu_nvmlDeviceGetNvLinkState(nvmldev, j, &active); + if (nvmlret != NVML_SUCCESS) + continue; + if (active != NVML_FEATURE_ENABLED) + continue; + nvmlret = _starpu_nvmlDeviceGetNvLinkRemotePciInfo(nvmldev, j, &pci); + if (nvmlret != NVML_SUCCESS) + continue; + + hwloc_obj_t obj = hwloc_get_pcidev_by_busid(topology, + pci.domain, pci.bus, pci.device, 0); + if (obj && obj->type == HWLOC_OBJ_PCI_DEVICE && (obj->attr->pcidev.class_id >> 8 == 0x06)) + { + /* This is a PCI bridge */ + switch (obj->attr->pcidev.vendor_id) + { + case 0x1014: + /* IBM OpenCAPI port, direct CPU-GPU NVLink */ + /* TODO: NUMA affinity */ + nvlinkhost[i] = 1; + continue; + case 0x10de: + nvswitch[i] = 1; + continue; + } + } + + /* Otherwise, link to another GPU? */ + for (k = 0; k < ncuda; k++) + { + if ((int) pci.domain == props[k].pciDomainID + && (int) pci.bus == props[k].pciBusID + && (int) pci.device == props[k].pciDeviceID) + { + nvlink[i][k] = 1; + nvlink[k][i] = 1; + break; + } + } + if (k < ncuda) + /* Yes it was another GPU */ + continue; + + /* No idea what this is */ + _STARPU_DISP("Warning: NVLink to unknown PCI card %04x:%02x:%02x: %04x\n", pci.domain, pci.bus, pci.device, pci.pciDeviceId); + } + } + + for (i = 0; i < ncuda; i++) + { + unsigned j; + for (j = i+1; j < ncuda; j++) + { + if (nvswitch[i] && nvswitch[j]) + { + static int warned = 0; + if (!warned) + { + warned = 1; + /* TODO: follow answers to https://forums.developer.nvidia.com/t/how-to-distinguish-different-nvswitch/241983 */ + _STARPU_DISP("Warning: NVSwitch not tested yet with several switches, assuming there is only one NVSwitch in the system\n"); + } + nvlink[i][j] = 1; + nvlink[j][i] = 1; + } + } + } +#endif + + /* Find paths and record measured bandwidth along the path */ + for (i = 0; i < ncuda; i++) + { + unsigned j; + + for (j = 0; j < ncuda; j++) + if (i != j && !nvlink[i][j] && !nvlinkhost[i] && !nvlinkhost[j]) + if (!find_platform_cuda_path(topology, i, j, 1000000. / timing_dtod[STARPU_CUDA_RAM][i][j])) + { + _STARPU_DISP("Warning: could not get CUDA location from hwloc\n"); + clean_topology(hwloc_get_root_obj(topology)); + hwloc_topology_destroy(topology); + goto flat_cuda; + } + + /* Record RAM/CUDA bandwidths */ + if (!nvlinkhost[i]) + { + find_platform_forward_path(get_hwloc_cuda_obj(topology, i), 1000000. / search_bus_best_timing(i, STARPU_CUDA_RAM, 0)); + find_platform_backward_path(get_hwloc_cuda_obj(topology, i), 1000000. / search_bus_best_timing(i, STARPU_CUDA_RAM, 1)); + } + } + + /* Ok, found path in all cases, can emit advanced platform routes */ + fprintf(f, "\n"); + emit_topology_bandwidths(f, hwloc_get_root_obj(topology), Bps, s); + fprintf(f, "\n"); + + for (i = 0; i < ncuda; i++) + { + unsigned j; + for (j = 0; j < ncuda; j++) + if (i != j) + { + fprintf(f, " \n", i, j); + fprintf(f, " \n", i, j); + if (!nvlink[i][j]) + { + if (nvlinkhost[i] && nvlinkhost[j]) + { + /* FIXME: if they are directly connected through PCI, is NVLink host preferred? */ + if (gpu_numa[STARPU_CUDA_RAM][i] >= 0) + fprintf(f, " \n", gpu_numa[STARPU_CUDA_RAM][i]); + fprintf(f, " \n"); + if (gpu_numa[STARPU_CUDA_RAM][j] >= 0) + fprintf(f, " \n", gpu_numa[STARPU_CUDA_RAM][j]); + } + else + emit_platform_path_up(f, + get_hwloc_cuda_obj(topology, i), + get_hwloc_cuda_obj(topology, j)); + } + fprintf(f, " \n"); + } + + fprintf(f, " \n", i); + fprintf(f, " \n", i); + if (nvlinkhost[i]) + { + if (gpu_numa[STARPU_CUDA_RAM][i] >= 0) + fprintf(f, " \n", gpu_numa[STARPU_CUDA_RAM][i]); + } + else + emit_platform_forward_path(f, get_hwloc_cuda_obj(topology, i)); + fprintf(f, " \n"); + + fprintf(f, " \n", i); + fprintf(f, " \n", i); + if (nvlinkhost[i]) + { + if (gpu_numa[STARPU_CUDA_RAM][i] >= 0) + fprintf(f, " \n", gpu_numa[STARPU_CUDA_RAM][i]); + } + else + emit_platform_backward_path(f, get_hwloc_cuda_obj(topology, i)); + fprintf(f, " \n"); + } + + clean_topology(hwloc_get_root_obj(topology)); + hwloc_topology_destroy(topology); + } + else + { +flat_cuda: +#else + { +#endif + /* If we don't have enough hwloc information, write trivial routes always through host */ + for (i = 0; i < ncuda; i++) + { + char i_name[16]; + snprintf(i_name, sizeof(i_name), "CUDA%u", i); + fprintf(f, " \n", i_name); + fprintf(f, " \n", i_name); + fprintf(f, " \n"); + fprintf(f, " \n"); + fprintf(f, " \n", i_name); + fprintf(f, " \n", i_name); + fprintf(f, " \n"); + fprintf(f, " \n"); + } +#ifdef STARPU_HAVE_CUDA_MEMCPY_PEER + for (i = 0; i < ncuda; i++) + { + unsigned j; + char i_name[16]; + snprintf(i_name, sizeof(i_name), "CUDA%u", i); + for (j = 0; j < ncuda; j++) + { + char j_name[16]; + if (j == i) + continue; + snprintf(j_name, sizeof(j_name), "CUDA%u", j); + fprintf(f, " \n", i_name, j_name); + fprintf(f, " \n", i_name, j_name); + fprintf(f, " \n"); + fprintf(f, " \n"); + } + } +#endif + } /* defined(STARPU_HAVE_HWLOC) && defined(STARPU_HAVE_CUDA_MEMCPY_PEER) */ + fprintf(f, "\n"); +#endif /* STARPU_USE_CUDA */ + + /* + * OpenCL routes + */ + +#ifdef STARPU_USE_OPENCL + for (i = 0; i < nopencl; i++) + { + char i_name[17]; + snprintf(i_name, sizeof(i_name), "OpenCL%u", i); + fprintf(f, " \n", i_name); + fprintf(f, " \n", i_name); + fprintf(f, " \n"); + fprintf(f, " \n"); + fprintf(f, " \n", i_name); + fprintf(f, " \n", i_name); + fprintf(f, " \n"); + fprintf(f, " \n"); + } +#endif + + fprintf(f, + " \n" + " \n" + ); + + if (locked) + _starpu_fwrunlock(f); + fclose(f); + +} + +static void generate_bus_platform_file(void) +{ + if (!was_benchmarked) + benchmark_all_memory_nodes(); + + write_bus_platform_file_content(3); + write_bus_platform_file_content(4); +} + +static void check_bus_platform_file(void) +{ + int res; + + char path[PATH_LENGTH]; + _starpu_simgrid_get_platform_path(4, path, sizeof(path)); + + res = access(path, F_OK); + + if (!res) + { + _starpu_simgrid_get_platform_path(3, path, sizeof(path)); + res = access(path, F_OK); + } + + if (res) + { + /* File does not exist yet */ + generate_bus_platform_file(); + } +} + +/* + * Generic + */ + +static void _starpu_bus_force_sampling(int location) +{ + _STARPU_DEBUG("Force bus sampling ...\n"); + if (location < 0) + { + location = _starpu_set_default_perf_model_bus(); + } + _starpu_create_bus_sampling_directory_if_needed(location); + + generate_bus_affinity_file(); + generate_bus_latency_file(); + generate_bus_bandwidth_file(); + generate_bus_config_file(); + generate_bus_platform_file(); +} +#endif /* !SIMGRID */ + +void _starpu_load_bus_performance_files(void) +{ + _starpu_create_bus_sampling_directory_if_needed(-1); + + struct _starpu_machine_config * config = _starpu_get_machine_config(); + nnumas = _starpu_topology_get_nhwnumanodes(config); +#ifndef STARPU_SIMGRID + ncpus = _starpu_topology_get_nhwcpu(config); +#endif + + /* TODO: factorize these calls */ +#if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID) + ncuda = _starpu_get_cuda_device_count(); +#endif +#if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID) + nopencl = _starpu_opencl_get_device_count(); +#endif +#if defined(STARPU_USE_MPI_MASTER_SLAVE) + nmpims = _starpu_mpi_src_get_device_count(); +#endif +#if defined(STARPU_USE_TCPIP_MASTER_SLAVE) + ntcpip_ms = _starpu_tcpip_src_get_device_count(); +#endif + +#ifndef STARPU_SIMGRID + check_bus_config_file(); +#endif + +#ifdef STARPU_USE_MPI_MASTER_SLAVE + /* be sure that master wrote the perf files */ + if (_starpu_config.conf.nmpi_ms != 0) + _starpu_mpi_common_barrier(); +#endif + +#ifndef STARPU_SIMGRID + load_bus_affinity_file(); +#endif + load_bus_latency_file(); + load_bus_bandwidth_file(); +#ifndef STARPU_SIMGRID + check_bus_platform_file(); +#endif +} + +static unsigned _get_raw_memory_node_index(unsigned node) +{ + enum starpu_node_kind type = starpu_node_get_kind(node), cur; + int devid = starpu_memory_node_get_devid(node); + unsigned base; + + base = 0; + for (cur = STARPU_CPU_RAM; cur < type; cur++) + base += nmem[cur]; + + return base + devid; +} + +void _starpu_init_bus_performance(void) +{ + unsigned src, dst, raw_src, raw_dst; + + for (src = 0; src < STARPU_MAXNODES; src++) + { + for (dst = 0; dst < STARPU_MAXNODES; dst++) + { + raw_src = _get_raw_memory_node_index(src); + raw_dst = _get_raw_memory_node_index(dst); + bandwidth_matrix[src][dst] = raw_bandwidth_matrix[raw_src][raw_dst]; + latency_matrix[src][dst] = raw_latency_matrix[raw_src][raw_dst]; + } + } +} + +/* (in MB/s) */ +double starpu_transfer_bandwidth(unsigned src_node, unsigned dst_node) +{ + return bandwidth_matrix[src_node][dst_node]; +} + +/* (in µs) */ +double starpu_transfer_latency(unsigned src_node, unsigned dst_node) +{ + return latency_matrix[src_node][dst_node]; +} + +/* (in µs) */ +double starpu_transfer_predict(unsigned src_node, unsigned dst_node, size_t size) +{ + if (src_node == dst_node) + return 0; + + double bandwidth = bandwidth_matrix[src_node][dst_node]; + double latency = latency_matrix[src_node][dst_node]; + struct _starpu_machine_topology *topology = &_starpu_get_machine_config()->topology; + int busid = starpu_bus_get_id(src_node, dst_node); +#if 0 + int direct = starpu_bus_get_direct(busid); +#endif + float ngpus = starpu_bus_get_ngpus(busid); + if (ngpus != 1) + ngpus = topology->ndevices[STARPU_CUDA_WORKER]+topology->ndevices[STARPU_OPENCL_WORKER]; + +#ifdef STARPU_DEVEL +#warning FIXME: ngpus should not be used e.g. for slow disk transfers... +#endif + +#if 0 + /* Ideally we should take into account that some GPUs are directly + * connected through a PCI switch, which has less contention that the + * Host bridge, but doing that seems to *decrease* performance... */ + if (direct) + { + float neighbours = starpu_bus_get_ngpus(busid); + /* Count transfers of these GPUs, and count transfers between + * other GPUs and these GPUs */ + ngpus = neighbours + (ngpus - neighbours) * neighbours / ngpus; + } +#endif + + if (isnan(latency) || isnan(bandwidth)) + { + static int warned = 0; + if (!warned) + { + _STARPU_DISP("Warning: no bus performance model was calibrated between nodes %d and %d, ignoring transfer time\n", src_node, dst_node); + warned = 1; + } + return 0; + } + + return latency + (size/bandwidth)*2*ngpus; +} + +/* calculate save bandwidth and latency */ +/* bandwidth in MB/s - latency in µs */ +void _starpu_save_bandwidth_and_latency_disk(double bandwidth_write, double bandwidth_read, double latency_write, double latency_read, unsigned node, const char *name) +{ + unsigned int i, j; + double slowness_disk_between_main_ram, slowness_main_ram_between_node; + int print_stats = starpu_getenv_number_default("STARPU_BUS_STATS", 0); + + if (print_stats) + { + fprintf(stderr, "\n#---------------------\n"); + fprintf(stderr, "Data transfer speed for %s (node %u):\n", name, node); + } + + /* save bandwidth */ + for(i = 0; i < STARPU_MAXNODES; ++i) + { + for(j = 0; j < STARPU_MAXNODES; ++j) + { + if (i == j && j == node) /* source == destination == node */ + { + bandwidth_matrix[i][j] = 0; + } + else if (i == node) /* source == disk */ + { + /* convert in slowness */ + if(bandwidth_read != 0) + slowness_disk_between_main_ram = 1/bandwidth_read; + else + slowness_disk_between_main_ram = 0; + + if(bandwidth_matrix[STARPU_MAIN_RAM][j] != 0) + slowness_main_ram_between_node = 1/bandwidth_matrix[STARPU_MAIN_RAM][j]; + else + slowness_main_ram_between_node = 0; + + bandwidth_matrix[i][j] = 1/(slowness_disk_between_main_ram+slowness_main_ram_between_node); + + if (!isnan(bandwidth_matrix[i][j]) && print_stats) + fprintf(stderr,"%u -> %u: %.0f MB/s\n", i, j, bandwidth_matrix[i][j]); + } + else if (j == node) /* destination == disk */ + { + /* convert in slowness */ + if(bandwidth_write != 0) + slowness_disk_between_main_ram = 1/bandwidth_write; + else + slowness_disk_between_main_ram = 0; + + if(bandwidth_matrix[i][STARPU_MAIN_RAM] != 0) + slowness_main_ram_between_node = 1/bandwidth_matrix[i][STARPU_MAIN_RAM]; + else + slowness_main_ram_between_node = 0; + + bandwidth_matrix[i][j] = 1/(slowness_disk_between_main_ram+slowness_main_ram_between_node); + + if (!isnan(bandwidth_matrix[i][j]) && print_stats) + fprintf(stderr,"%u -> %u: %.0f MB/s\n", i, j, bandwidth_matrix[i][j]); + } + else if (j > node || i > node) /* not affected by the node */ + { + bandwidth_matrix[i][j] = NAN; + } + } + } + + /* save latency */ + for(i = 0; i < STARPU_MAXNODES; ++i) + { + for(j = 0; j < STARPU_MAXNODES; ++j) + { + if (i == j && j == node) /* source == destination == node */ + { + latency_matrix[i][j] = 0; + } + else if (i == node) /* source == disk */ + { + latency_matrix[i][j] = (latency_write+latency_matrix[STARPU_MAIN_RAM][j]); + + if (!isnan(latency_matrix[i][j]) && print_stats) + fprintf(stderr,"%u -> %u: %.0f us\n", i, j, latency_matrix[i][j]); + } + else if (j == node) /* destination == disk */ + { + latency_matrix[i][j] = (latency_read+latency_matrix[i][STARPU_MAIN_RAM]); + + if (!isnan(latency_matrix[i][j]) && print_stats) + fprintf(stderr,"%u -> %u: %.0f us\n", i, j, latency_matrix[i][j]); + } + else if (j > node || i > node) /* not affected by the node */ + { + latency_matrix[i][j] = NAN; + } + } + } + + if (print_stats) + fprintf(stderr, "\n#---------------------\n"); +} diff --git a/src/core/perfmodel/perfmodel_history.c b/src/core/perfmodel/perfmodel_history.c new file mode 100644 index 0000000..988a0f0 --- /dev/null +++ b/src/core/perfmodel/perfmodel_history.c @@ -0,0 +1,2249 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Thibaut Lambert + * Copyright (C) 2011-2011 Télécom SudParis + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#if !defined(_WIN32) || defined(__MINGW32__) || defined(__CYGWIN__) +#include +#include +#endif +#include +#include +#ifdef HAVE_UNISTD_H +#include +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef STARPU_HAVE_WINDOWS +#include +#endif + +#define HASH_ADD_UINT32_T(head,field,add) HASH_ADD(hh,head,field,sizeof(uint32_t),add) +#define HASH_FIND_UINT32_T(head,find,out) HASH_FIND(hh,head,find,sizeof(uint32_t),out) + +static struct starpu_perfmodel_arch **arch_combs; +static int current_arch_comb; +static int nb_arch_combs; +static starpu_pthread_rwlock_t arch_combs_mutex = STARPU_PTHREAD_RWLOCK_INITIALIZER; +static int historymaxerror; +static char ignore_devid[STARPU_NARCH]; + +/* How many executions a codelet will have to be measured before we + * consider that calibration will provide a value good enough for scheduling */ +unsigned _starpu_calibration_minimum; + +struct starpu_perfmodel_history_table +{ + UT_hash_handle hh; + uint32_t footprint; + struct starpu_perfmodel_history_entry *history_entry; +}; + +/* We want more than 10% variance on X to trust regression */ +#define VALID_REGRESSION(reg_model) \ + ((reg_model)->minx < (9*(reg_model)->maxx)/10 && (reg_model)->nsample >= _starpu_calibration_minimum) + +static starpu_pthread_rwlock_t registered_models_rwlock; +LIST_TYPE(_starpu_perfmodel, + struct starpu_perfmodel *model; +) +static struct _starpu_perfmodel_list registered_models; + +static char _starpu_perfmodel_hostname[STR_LONG_LENGTH]; + +void starpu_perfmodel_initialize(void) +{ + /* make sure performance model directories exist (or create them) */ + _starpu_create_bus_sampling_directory_if_needed(-1); + + _starpu_perfmodel_list_init(®istered_models); + + STARPU_PTHREAD_RWLOCK_INIT(®istered_models_rwlock, NULL); + STARPU_PTHREAD_RWLOCK_INIT(&arch_combs_mutex, NULL); + + _starpu_gethostname(_starpu_perfmodel_hostname, sizeof(_starpu_perfmodel_hostname)); +} + +void _starpu_initialize_registered_performance_models(void) +{ + starpu_perfmodel_initialize(); + + struct _starpu_machine_config *conf = _starpu_get_machine_config(); + /* FIXME: just iterate over all archs */ + unsigned ncores = conf->topology.nhwdevices[STARPU_CPU_WORKER]; + unsigned ncuda = conf->topology.nhwdevices[STARPU_CUDA_WORKER]; + unsigned nopencl = conf->topology.nhwdevices[STARPU_OPENCL_WORKER]; + enum starpu_worker_archtype archtype; +#if STARPU_MAXMPIDEVS > 0 || STARPU_MAXTCPIPDEVS > 0 + unsigned i; +#endif + unsigned nmpi = 0; + unsigned ntcpip = 0; +#if STARPU_MAXMPIDEVS > 0 + STARPU_ASSERT(conf->topology.nhwdevices[STARPU_MPI_MS_WORKER] < STARPU_NMAXDEVS); + for(i = 0; i < conf->topology.nhwdevices[STARPU_MPI_MS_WORKER]; i++) + nmpi += conf->topology.nhwworker[STARPU_MPI_MS_WORKER][i]; +#endif +#if STARPU_MAXTCPIPDEVS > 0 + for(i = 0; i < conf->topology.nhwdevices[STARPU_TCPIP_MS_WORKER]; i++) + { + ntcpip += conf->topology.nhwworker[STARPU_TCPIP_MS_WORKER][i]; + } +#endif + + // We used to allocate 2**(ncores + ncuda + nopencl + nmpi + ntcpip), this is too big + // We now allocate only 2*(ncores + ncuda + nopencl + nmpi + ntcpip), and reallocate when necessary in starpu_perfmodel_arch_comb_add + nb_arch_combs = 2 * (ncores + ncuda + nopencl + nmpi + ntcpip); + _STARPU_MALLOC(arch_combs, nb_arch_combs*sizeof(struct starpu_perfmodel_arch*)); + current_arch_comb = 0; + historymaxerror = starpu_getenv_number_default("STARPU_HISTORY_MAX_ERROR", STARPU_HISTORYMAXERROR); + _starpu_calibration_minimum = starpu_getenv_number_default("STARPU_CALIBRATE_MINIMUM", 10); + + for (archtype = 0; archtype < STARPU_NARCH; archtype++) + { + char name[128]; + const char *arch = starpu_worker_get_type_as_env_var(archtype); + int def = archtype == STARPU_CPU_WORKER ? 1 : 0; + snprintf(name, sizeof(name), "STARPU_PERF_MODEL_HOMOGENEOUS_%s", arch); + ignore_devid[archtype] = starpu_getenv_number_default(name, def); + } +} + +void _starpu_perfmodel_malloc_per_arch(struct starpu_perfmodel *model, int comb, int nb_impl) +{ + int i; + + _STARPU_MALLOC(model->state->per_arch[comb], nb_impl*sizeof(struct starpu_perfmodel_per_arch)); + for(i = 0; i < nb_impl; i++) + { + memset(&model->state->per_arch[comb][i], 0, sizeof(struct starpu_perfmodel_per_arch)); + } + model->state->nimpls_set[comb] = nb_impl; +} + +void _starpu_perfmodel_malloc_per_arch_is_set(struct starpu_perfmodel *model, int comb, int nb_impl) +{ + int i; + + _STARPU_MALLOC(model->state->per_arch_is_set[comb], nb_impl*sizeof(int)); + for(i = 0; i < nb_impl; i++) + { + model->state->per_arch_is_set[comb][i] = 0; + } +} + +int _starpu_perfmodel_arch_comb_get(int ndevices, struct starpu_perfmodel_device *devices) +{ + int comb, ncomb; + ncomb = current_arch_comb; + for(comb = 0; comb < ncomb; comb++) + { + int found = 0; + if(arch_combs[comb]->ndevices == ndevices) + { + int dev1, dev2; + int nfounded = 0; + for(dev1 = 0; dev1 < arch_combs[comb]->ndevices; dev1++) + { + for(dev2 = 0; dev2 < ndevices; dev2++) + { + if(arch_combs[comb]->devices[dev1].type == devices[dev2].type && + (ignore_devid[devices[dev2].type] || + arch_combs[comb]->devices[dev1].devid == devices[dev2].devid) && + arch_combs[comb]->devices[dev1].ncores == devices[dev2].ncores) + nfounded++; + } + } + if(nfounded == ndevices) + found = 1; + } + if (found) + return comb; + } + return -1; +} + +int starpu_perfmodel_arch_comb_get(int ndevices, struct starpu_perfmodel_device *devices) +{ + int ret; + STARPU_PTHREAD_RWLOCK_RDLOCK(&arch_combs_mutex); + ret = _starpu_perfmodel_arch_comb_get(ndevices, devices); + STARPU_PTHREAD_RWLOCK_UNLOCK(&arch_combs_mutex); + return ret; +} + +int starpu_perfmodel_arch_comb_add(int ndevices, struct starpu_perfmodel_device* devices) +{ + STARPU_PTHREAD_RWLOCK_WRLOCK(&arch_combs_mutex); + int comb = _starpu_perfmodel_arch_comb_get(ndevices, devices); + if (comb != -1) + { + /* Somebody else added it in between */ + STARPU_PTHREAD_RWLOCK_UNLOCK(&arch_combs_mutex); + return comb; + } + if (current_arch_comb >= nb_arch_combs) + { + // We need to allocate more arch_combs + nb_arch_combs = current_arch_comb+10; + _STARPU_REALLOC(arch_combs, nb_arch_combs*sizeof(struct starpu_perfmodel_arch*)); + } + _STARPU_MALLOC(arch_combs[current_arch_comb], sizeof(struct starpu_perfmodel_arch)); + _STARPU_MALLOC(arch_combs[current_arch_comb]->devices, ndevices*sizeof(struct starpu_perfmodel_device)); + arch_combs[current_arch_comb]->ndevices = ndevices; + int dev; + for(dev = 0; dev < ndevices; dev++) + { + arch_combs[current_arch_comb]->devices[dev].type = devices[dev].type; + arch_combs[current_arch_comb]->devices[dev].devid = devices[dev].devid; + arch_combs[current_arch_comb]->devices[dev].ncores = devices[dev].ncores; + } + comb = current_arch_comb++; + STARPU_PTHREAD_RWLOCK_UNLOCK(&arch_combs_mutex); + return comb; +} + +void _starpu_free_arch_combs(void) +{ + int i; + STARPU_PTHREAD_RWLOCK_WRLOCK(&arch_combs_mutex); + for(i = 0; i < current_arch_comb; i++) + { + free(arch_combs[i]->devices); + free(arch_combs[i]); + } + current_arch_comb = 0; + free(arch_combs); + arch_combs = NULL; + STARPU_PTHREAD_RWLOCK_UNLOCK(&arch_combs_mutex); + STARPU_PTHREAD_RWLOCK_DESTROY(&arch_combs_mutex); + STARPU_PTHREAD_RWLOCK_INIT(&arch_combs_mutex, NULL); +} + +int starpu_perfmodel_get_narch_combs() +{ + return current_arch_comb; +} + +struct starpu_perfmodel_arch *starpu_perfmodel_arch_comb_fetch(int comb) +{ + return arch_combs[comb]; +} + +static size_t __starpu_job_get_data_size(struct starpu_perfmodel *model, struct starpu_perfmodel_arch* arch, unsigned impl, struct _starpu_job *j) +{ + struct starpu_task *task = j->task; + int comb = arch == NULL ? -1 : starpu_perfmodel_arch_comb_get(arch->ndevices, arch->devices); + + if (model && model->state->per_arch && comb != -1 && comb < model->state->ncombs_set && model->state->per_arch[comb] && model->state->per_arch[comb][impl].size_base) + { + return model->state->per_arch[comb][impl].size_base(task, arch, impl); + } + else if (model && model->size_base) + { + return model->size_base(task, impl); + } + else + { + unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task); + size_t size = 0; + + unsigned buffer; + for (buffer = 0; buffer < nbuffers; buffer++) + { + starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, buffer); + enum starpu_data_access_mode mode = STARPU_TASK_GET_MODE(task, buffer); + if (mode & STARPU_NOFOOTPRINT) + continue; + size += _starpu_data_get_size(handle); + } + return size; + } +} + +size_t _starpu_job_get_data_size(struct starpu_perfmodel *model, struct starpu_perfmodel_arch* arch, unsigned impl, struct _starpu_job *j) +{ + size_t ret; + if (model) + STARPU_PTHREAD_RWLOCK_RDLOCK(&model->state->model_rwlock); + ret = __starpu_job_get_data_size(model, arch, impl, j); + if (model) + STARPU_PTHREAD_RWLOCK_UNLOCK(&model->state->model_rwlock); + return ret; +} + +/* + * History based model + */ +static void insert_history_entry(struct starpu_perfmodel_history_entry *entry, struct starpu_perfmodel_history_list **list, struct starpu_perfmodel_history_table **history_ptr) +{ + struct starpu_perfmodel_history_list *link; + struct starpu_perfmodel_history_table *table; + + _STARPU_MALLOC(link, sizeof(struct starpu_perfmodel_history_list)); + link->next = *list; + link->entry = entry; + *list = link; + + /* detect concurrency issue */ + //HASH_FIND_UINT32_T(*history_ptr, &entry->footprint, table); + //STARPU_ASSERT(table == NULL); + + _STARPU_MALLOC(table, sizeof(*table)); + table->footprint = entry->footprint; + table->history_entry = entry; + HASH_ADD_UINT32_T(*history_ptr, footprint, table); +} + +#ifndef STARPU_SIMGRID +static void check_reg_model(struct starpu_perfmodel *model, int comb, int impl) +{ + struct starpu_perfmodel_per_arch *per_arch_model = &model->state->per_arch[comb][impl]; + struct starpu_perfmodel_regression_model *reg_model = &per_arch_model->regression; + + /* + * Linear Regression model + */ + + /* Unless we have enough measurements, we put NaN in the file to indicate the model is invalid */ + double alpha = nan(""), beta = nan(""); + if (model->type == STARPU_REGRESSION_BASED || model->type == STARPU_NL_REGRESSION_BASED) + { + if (reg_model->nsample > 1) + { + alpha = reg_model->alpha; + beta = reg_model->beta; + } + } + + /* TODO: check: + * reg_model->sumlnx + * reg_model->sumlnx2 + * reg_model->sumlny + * reg_model->sumlnxlny + * alpha + * beta + * reg_model->minx + * reg_model->maxx + */ + (void)alpha; + (void)beta; + + /* + * Non-Linear Regression model + */ + + double a = nan(""), b = nan(""), c = nan(""); + + if (model->type == STARPU_NL_REGRESSION_BASED) + _starpu_regression_non_linear_power(per_arch_model->list, &a, &b, &c); + + /* TODO: check: + * a + * b + * c + */ + + /* + * Multiple Regression Model + */ + + if (model->type == STARPU_MULTIPLE_REGRESSION_BASED) + { + /* TODO: check: */ + } +} + +static void dump_reg_model(FILE *f, struct starpu_perfmodel *model, int comb, int impl) +{ + struct starpu_perfmodel_per_arch *per_arch_model; + + per_arch_model = &model->state->per_arch[comb][impl]; + struct starpu_perfmodel_regression_model *reg_model; + reg_model = &per_arch_model->regression; + + /* + * Linear Regression model + */ + + /* Unless we have enough measurements, we put NaN in the file to indicate the model is invalid */ + double alpha = nan(""), beta = nan(""); + if (model->type == STARPU_REGRESSION_BASED || model->type == STARPU_NL_REGRESSION_BASED) + { + if (reg_model->nsample > 1) + { + alpha = reg_model->alpha; + beta = reg_model->beta; + } + } + + fprintf(f, "# sumlnx\tsumlnx2\t\tsumlny\t\tsumlnxlny\talpha\t\tbeta\t\tn\tminx\t\tmaxx\n"); + fprintf(f, "%-15e\t%-15e\t%-15e\t%-15e\t", reg_model->sumlnx, reg_model->sumlnx2, reg_model->sumlny, reg_model->sumlnxlny); + _starpu_write_double(f, "%-15e", alpha); + fprintf(f, "\t"); + _starpu_write_double(f, "%-15e", beta); + fprintf(f, "\t%u\t%-15lu\t%-15lu\n", reg_model->nsample, reg_model->minx, reg_model->maxx); + + /* + * Non-Linear Regression model + */ + + double a = nan(""), b = nan(""), c = nan(""); + + if (model->type == STARPU_NL_REGRESSION_BASED) + { + if (_starpu_regression_non_linear_power(per_arch_model->list, &a, &b, &c) != 0) + _STARPU_DISP("Warning: could not compute a non-linear regression for model %s\n", model->symbol); + } + + fprintf(f, "# a\t\tb\t\tc\n"); + _starpu_write_double(f, "%-15e", a); + fprintf(f, "\t"); + _starpu_write_double(f, "%-15e", b); + fprintf(f, "\t"); + _starpu_write_double(f, "%-15e", c); + fprintf(f, "\n"); + + /* + * Multiple Regression Model + */ + + if (model->type != STARPU_MULTIPLE_REGRESSION_BASED) + { + fprintf(f, "# not multiple-regression-base\n"); + fprintf(f, "0\n"); + } + else + { + if (reg_model->ncoeff==0 && model->ncombinations!=0 && model->combinations!=NULL) + { + reg_model->ncoeff = model->ncombinations + 1; + } + + _STARPU_MALLOC(reg_model->coeff, reg_model->ncoeff*sizeof(double)); + _starpu_multiple_regression(per_arch_model->list, reg_model->coeff, reg_model->ncoeff, model->nparameters, model->parameters_names, model->combinations, model->symbol); + + fprintf(f, "# n\tintercept\t"); + if (reg_model->ncoeff==0 || model->ncombinations==0 || model->combinations==NULL) + fprintf(f, "\n1\tnan"); + else + { + unsigned i; + for (i=0; i < model->ncombinations; i++) + { + if (model->parameters_names == NULL) + fprintf(f, "c%u", i+1); + else + { + unsigned j; + int first=1; + for(j=0; j < model->nparameters; j++) + { + if (model->combinations[i][j] > 0) + { + if (first) + first=0; + else + fprintf(f, "*"); + + if(model->parameters_names[j] != NULL) + fprintf(f, "%s", model->parameters_names[j]); + else + fprintf(f, "P%u", j); + + if (model->combinations[i][j] > 1) + fprintf(f, "^%d", model->combinations[i][j]); + } + } + } + fprintf(f, "\t\t"); + } + + fprintf(f, "\n%u", reg_model->ncoeff); + for (i=0; i < reg_model->ncoeff; i++) + fprintf(f, "\t%-15e", reg_model->coeff[i]); + } + } +} +#endif + +static void scan_reg_model(FILE *f, const char *path, struct starpu_perfmodel_regression_model *reg_model) +{ + int res; + + /* + * Linear Regression model + */ + + _starpu_drop_comments(f); + + res = fscanf(f, "%le\t%le\t%le\t%le\t", ®_model->sumlnx, ®_model->sumlnx2, ®_model->sumlny, ®_model->sumlnxlny); + STARPU_ASSERT_MSG(res == 4, "Incorrect performance model file %s", path); + res = _starpu_read_double(f, "%le", ®_model->alpha); + STARPU_ASSERT_MSG(res == 1, "Incorrect performance model file %s", path); + res = _starpu_read_double(f, "\t%le", ®_model->beta); + STARPU_ASSERT_MSG(res == 1, "Incorrect performance model file %s", path); + res = fscanf(f, "\t%u\t%lu\t%lu\n", ®_model->nsample, ®_model->minx, ®_model->maxx); + STARPU_ASSERT_MSG(res == 3, "Incorrect performance model file %s", path); + + /* If any of the parameters describing the linear regression model is NaN, the model is invalid */ + unsigned invalid = (isnan(reg_model->alpha)||isnan(reg_model->beta)); + reg_model->valid = !invalid && VALID_REGRESSION(reg_model); + + /* + * Non-Linear Regression model + */ + + _starpu_drop_comments(f); + + res = _starpu_read_double(f, "%le", ®_model->a); + STARPU_ASSERT_MSG(res == 1, "Incorrect performance model file %s", path); + res = _starpu_read_double(f, "\t%le", ®_model->b); + STARPU_ASSERT_MSG(res == 1, "Incorrect performance model file %s", path); + res = _starpu_read_double(f, "%le", ®_model->c); + STARPU_ASSERT_MSG(res == 1, "Incorrect performance model file %s", path); + res = fscanf(f, "\n"); + STARPU_ASSERT_MSG(res == 0, "Incorrect performance model file %s", path); + + /* If any of the parameters describing the non-linear regression model is NaN, the model is invalid */ + unsigned nl_invalid = (isnan(reg_model->a)||isnan(reg_model->b)||isnan(reg_model->c)); + reg_model->nl_valid = !nl_invalid && VALID_REGRESSION(reg_model); + + _starpu_drop_comments(f); + + // Read how many coefficients is there + res = fscanf(f, "%u", ®_model->ncoeff); + STARPU_ASSERT_MSG(res == 1, "Incorrect performance model file %s", path); + + /* + * Multiple Regression Model + */ + if (reg_model->ncoeff != 0) + { + _STARPU_MALLOC(reg_model->coeff, reg_model->ncoeff*sizeof(double)); + + unsigned multi_invalid = 0; + unsigned i; + for (i=0; i < reg_model->ncoeff; i++) + { + res = _starpu_read_double(f, "%le", ®_model->coeff[i]); + STARPU_ASSERT_MSG(res == 1, "Incorrect performance model file %s", path); + multi_invalid = (multi_invalid||isnan(reg_model->coeff[i])); + } + reg_model->multi_valid = !multi_invalid; + } + res = fscanf(f, "\n"); + STARPU_ASSERT_MSG(res == 0, "Incorrect performance model file %s", path); +} + + +#ifndef STARPU_SIMGRID +static void check_history_entry(struct starpu_perfmodel_history_entry *entry) +{ + STARPU_ASSERT_MSG(entry->deviation >= 0, "entry=%p, entry->deviation=%lf\n", entry, entry->deviation); + STARPU_ASSERT_MSG(entry->sum >= 0, "entry=%p, entry->sum=%lf\n", entry, entry->sum); + STARPU_ASSERT_MSG(entry->sum2 >= 0, "entry=%p, entry->sum2=%lf\n", entry, entry->sum2); + STARPU_ASSERT_MSG(entry->mean >= 0, "entry=%p, entry->mean=%lf\n", entry, entry->mean); + STARPU_ASSERT_MSG(isnan(entry->flops)||entry->flops >= 0, "entry=%p, entry->flops=%lf\n", entry, entry->flops); + STARPU_ASSERT_MSG(entry->duration >= 0, "entry=%p, entry->duration=%lf\n", entry, entry->duration); +} + +static void dump_history_entry(FILE *f, struct starpu_perfmodel_history_entry *entry) +{ + fprintf(f, "%08x\t%-15lu\t%-15e\t%-15e\t%-15e\t%-15e\t%-15e\t%u\n", entry->footprint, (unsigned long) entry->size, entry->flops, entry->mean, entry->deviation, entry->sum, entry->sum2, entry->nsample); +} +#endif + +static void scan_history_entry(FILE *f, const char *path, struct starpu_perfmodel_history_entry *entry) +{ + int res; + + _starpu_drop_comments(f); + + /* In case entry is NULL, we just drop these values */ + unsigned nsample; + uint32_t footprint; + unsigned long size; /* in bytes */ + double flops; + double mean; + double deviation; + double sum; + double sum2; + + char line[STR_LONG_LENGTH]; + char *ret; + + ret = fgets(line, sizeof(line), f); + STARPU_ASSERT(ret); + STARPU_ASSERT(strchr(line, '\n')); + + /* Read the values from the file */ + res = sscanf(line, "%x\t%lu\t%le\t%le\t%le\t%le\t%le\t%u", &footprint, &size, &flops, &mean, &deviation, &sum, &sum2, &nsample); + + if (res != 8) + { + flops = 0.; + /* Read the values from the file */ + res = sscanf(line, "%x\t%lu\t%le\t%le\t%le\t%le\t%u", &footprint, &size, &mean, &deviation, &sum, &sum2, &nsample); + STARPU_ASSERT_MSG(res == 7, "Incorrect performance model file %s", path); + } + + if (entry) + { + STARPU_ASSERT_MSG(isnan(flops) || flops >=0, "Negative flops %lf in performance model file %s", flops, path); + STARPU_ASSERT_MSG(mean >=0, "Negative mean %lf in performance model file %s", mean, path); + STARPU_ASSERT_MSG(deviation >=0, "Negative deviation %lf in performance model file %s", deviation, path); + STARPU_ASSERT_MSG(sum >=0, "Negative sum %lf in performance model file %s", sum, path); + STARPU_ASSERT_MSG(sum2 >=0, "Negative sum2 %lf in performance model file %s", sum2, path); + entry->footprint = footprint; + entry->size = size; + entry->flops = flops; + entry->mean = mean; + entry->deviation = deviation; + entry->sum = sum; + entry->sum2 = sum2; + entry->nsample = nsample; + } +} + +static void parse_per_arch_model_file(FILE *f, const char *path, struct starpu_perfmodel_per_arch *per_arch_model, unsigned scan_history, struct starpu_perfmodel *model) +{ + unsigned nentries; + struct starpu_perfmodel_regression_model *reg_model = &per_arch_model->regression; + + _starpu_drop_comments(f); + + int res = fscanf(f, "%u\n", &nentries); + STARPU_ASSERT_MSG(res == 1, "Incorrect performance model file %s", path); + + scan_reg_model(f, path, reg_model); + + /* parse entries */ + unsigned i; + for (i = 0; i < nentries; i++) + { + struct starpu_perfmodel_history_entry *entry = NULL; + if (scan_history) + { + _STARPU_CALLOC(entry, 1, sizeof(struct starpu_perfmodel_history_entry)); + + /* Tell helgrind that we do not care about + * racing access to the sampling, we only want a + * good-enough estimation */ + STARPU_HG_DISABLE_CHECKING(entry->nsample); + STARPU_HG_DISABLE_CHECKING(entry->mean); + //entry->nerror = 0; + } + + scan_history_entry(f, path, entry); + + /* insert the entry in the hashtable and the list structures */ + /* TODO: Insert it at the end of the list, to avoid reversing + * the order... But efficiently! We may have a lot of entries */ + if (scan_history) + insert_history_entry(entry, &per_arch_model->list, &per_arch_model->history); + } + + if (model && model->type == STARPU_PERFMODEL_INVALID) + { + /* Tool loading a perfmodel without having the corresponding codelet */ + if (reg_model->ncoeff != 0) + model->type = STARPU_MULTIPLE_REGRESSION_BASED; + else if (!isnan(reg_model->a) && !isnan(reg_model->b) && !isnan(reg_model->c)) + model->type = STARPU_NL_REGRESSION_BASED; + else if (!isnan(reg_model->alpha) && !isnan(reg_model->beta)) + model->type = STARPU_REGRESSION_BASED; + else if (nentries) + model->type = STARPU_HISTORY_BASED; + /* else unknown, leave invalid */ + } +} + + +static void parse_arch(FILE *f, const char *path, struct starpu_perfmodel *model, unsigned scan_history, int comb) +{ + struct starpu_perfmodel_per_arch dummy; + unsigned nimpls, impl, i, ret; + + /* Parsing number of implementation */ + _starpu_drop_comments(f); + ret = fscanf(f, "%u\n", &nimpls); + STARPU_ASSERT_MSG(ret == 1, "Incorrect performance model file %s", path); + + if(model != NULL) + { + /* Parsing each implementation */ + unsigned implmax = STARPU_MIN(nimpls, STARPU_MAXIMPLEMENTATIONS); + model->state->nimpls[comb] = implmax; + if (!model->state->per_arch[comb]) + { + _starpu_perfmodel_malloc_per_arch(model, comb, STARPU_MAXIMPLEMENTATIONS); + } + if (!model->state->per_arch_is_set[comb]) + { + _starpu_perfmodel_malloc_per_arch_is_set(model, comb, STARPU_MAXIMPLEMENTATIONS); + } + + for (impl = 0; impl < implmax; impl++) + { + struct starpu_perfmodel_per_arch *per_arch_model = &model->state->per_arch[comb][impl]; + model->state->per_arch_is_set[comb][impl] = 1; + parse_per_arch_model_file(f, path, per_arch_model, scan_history, model); + } + } + else + { + impl = 0; + } + + /* if the number of implementation is greater than STARPU_MAXIMPLEMENTATIONS + * we skip the last implementation */ + for (i = impl; i < nimpls; i++) + parse_per_arch_model_file(f, path, &dummy, 0, NULL); +} + +static void parse_comb(FILE *f, const char *path, struct starpu_perfmodel *model, unsigned scan_history, int comb) +{ + int ndevices = 0; + _starpu_drop_comments(f); + int ret = fscanf(f, "%d\n", &ndevices); + STARPU_ASSERT_MSG(ret == 1, "Incorrect performance model file %s", path); + + struct starpu_perfmodel_device devices[ndevices]; + + int dev; + for(dev = 0; dev < ndevices; dev++) + { + _starpu_drop_comments(f); + int type; + ret = fscanf(f, "%d\n", &type); + STARPU_ASSERT_MSG(ret == 1, "Incorrect performance model file %s", path); + int dev_id; + _starpu_drop_comments(f); + ret = fscanf(f, "%d\n", &dev_id); + STARPU_ASSERT_MSG(ret == 1, "Incorrect performance model file %s", path); + int ncores; + _starpu_drop_comments(f); + ret = fscanf(f, "%d\n", &ncores); + STARPU_ASSERT_MSG(ret == 1, "Incorrect performance model file %s", path); + devices[dev].type = type; + devices[dev].devid = dev_id; + devices[dev].ncores = ncores; + } + int id_comb = starpu_perfmodel_arch_comb_get(ndevices, devices); + if(id_comb == -1) + id_comb = starpu_perfmodel_arch_comb_add(ndevices, devices); + + if (id_comb >= model->state->ncombs_set) + _starpu_perfmodel_realloc(model, id_comb+1); + + model->state->combs[comb] = id_comb; + parse_arch(f, path, model, scan_history, id_comb); +} + +static int parse_model_file(FILE *f, const char *path, struct starpu_perfmodel *model, unsigned scan_history) +{ + int ret, version=0; + + /* First check that it's not empty (very common corruption result, for + which there is no solution) + */ + fseek(f, 0, SEEK_END); + long pos = ftell(f); + if (pos == 0) + { + _STARPU_DISP("Performance model file %s is empty, ignoring it\n", path); + return 1; + } + rewind(f); + + /* Parsing performance model version */ + _starpu_drop_comments(f); + ret = fscanf(f, "%d\n", &version); + STARPU_ASSERT_MSG(version == _STARPU_PERFMODEL_VERSION, "Incorrect performance model file %s with a model version %d not being the current model version (%d)\n", path, + version, _STARPU_PERFMODEL_VERSION); + STARPU_ASSERT_MSG(ret == 1, "Incorrect performance model file %s", path); + + int ncombs = 0; + _starpu_drop_comments(f); + ret = fscanf(f, "%d\n", &ncombs); + STARPU_ASSERT_MSG(ret == 1, "Incorrect performance model file %s", path); + if(ncombs > 0) + { + model->state->ncombs = ncombs; + } + + if (ncombs > model->state->ncombs_set) + { + // The model has more combs than the original number of arch_combs, we need to reallocate + _starpu_perfmodel_realloc(model, ncombs); + } + + int comb; + for(comb = 0; comb < ncombs; comb++) + parse_comb(f, path, model, scan_history, comb); + + return 0; +} + +#ifndef STARPU_SIMGRID +static void check_per_arch_model(struct starpu_perfmodel *model, int comb, unsigned impl) +{ + struct starpu_perfmodel_per_arch *per_arch_model; + + per_arch_model = &model->state->per_arch[comb][impl]; + /* count the number of elements in the lists */ + struct starpu_perfmodel_history_list *ptr = NULL; + unsigned nentries = 0; + + if (model->type == STARPU_HISTORY_BASED || model->type == STARPU_NL_REGRESSION_BASED || model->type == STARPU_REGRESSION_BASED) + { + /* Dump the list of all entries in the history */ + ptr = per_arch_model->list; + while(ptr) + { + nentries++; + ptr = ptr->next; + } + } + + /* header */ + char archname[STR_SHORT_LENGTH]; + starpu_perfmodel_get_arch_name(arch_combs[comb], archname, sizeof(archname), impl); + STARPU_ASSERT(strlen(archname)>0); + check_reg_model(model, comb, impl); + + /* Dump the history into the model file in case it is necessary */ + if (model->type == STARPU_HISTORY_BASED || model->type == STARPU_NL_REGRESSION_BASED || model->type == STARPU_REGRESSION_BASED) + { + ptr = per_arch_model->list; + while (ptr) + { + check_history_entry(ptr->entry); + ptr = ptr->next; + } + } +} +static void dump_per_arch_model_file(FILE *f, struct starpu_perfmodel *model, int comb, unsigned impl) +{ + struct starpu_perfmodel_per_arch *per_arch_model; + + per_arch_model = &model->state->per_arch[comb][impl]; + /* count the number of elements in the lists */ + struct starpu_perfmodel_history_list *ptr = NULL; + unsigned nentries = 0; + + if (model->type == STARPU_HISTORY_BASED || model->type == STARPU_NL_REGRESSION_BASED || model->type == STARPU_REGRESSION_BASED) + { + /* Dump the list of all entries in the history */ + ptr = per_arch_model->list; + while(ptr) + { + nentries++; + ptr = ptr->next; + } + } + + /* header */ + char archname[STR_SHORT_LENGTH]; + starpu_perfmodel_get_arch_name(arch_combs[comb], archname, sizeof(archname), impl); + fprintf(f, "#####\n"); + fprintf(f, "# Model for %s\n", archname); + fprintf(f, "# number of entries\n%u\n", nentries); + + dump_reg_model(f, model, comb, impl); + + /* Dump the history into the model file in case it is necessary */ + if (model->type == STARPU_HISTORY_BASED || model->type == STARPU_NL_REGRESSION_BASED || model->type == STARPU_REGRESSION_BASED) + { + fprintf(f, "# hash\t\tsize\t\tflops\t\tmean (us or J)\tdev (us or J)\tsum\t\tsum2\t\tn\n"); + ptr = per_arch_model->list; + while (ptr) + { + dump_history_entry(f, ptr->entry); + ptr = ptr->next; + } + } + + fprintf(f, "\n"); +} + +static void check_model(struct starpu_perfmodel *model) +{ + int ncombs = model->state->ncombs; + STARPU_ASSERT(ncombs >= 0); + + int i, impl, dev; + for(i = 0; i < ncombs; i++) + { + int comb = model->state->combs[i]; + STARPU_ASSERT(comb >= 0); + + int ndevices = arch_combs[comb]->ndevices; + STARPU_ASSERT(ndevices >= 1); + + for(dev = 0; dev < ndevices; dev++) + { + STARPU_ASSERT(arch_combs[comb]->devices[dev].type < STARPU_NARCH); + + STARPU_ASSERT(arch_combs[comb]->devices[dev].devid >= 0); + + STARPU_ASSERT(arch_combs[comb]->devices[dev].ncores >= 0); + } + + int nimpls = model->state->nimpls[comb]; + STARPU_ASSERT(nimpls >= 1); + for (impl = 0; impl < nimpls; impl++) + { + check_per_arch_model(model, comb, impl); + } + } +} + +/* Driver porters: adding your driver here is optional, only needed for performance models. */ + +static void dump_model_file(FILE *f, struct starpu_perfmodel *model) +{ + fprintf(f, "##################\n"); + fprintf(f, "# Performance Model Version\n"); + fprintf(f, "%d\n\n", _STARPU_PERFMODEL_VERSION); + + int ncombs = model->state->ncombs; + + fprintf(f, "####################\n"); + fprintf(f, "# COMBs\n"); + fprintf(f, "# number of combinations\n"); + fprintf(f, "%d\n", ncombs); + + int i, impl, dev; + for(i = 0; i < ncombs; i++) + { + int comb = model->state->combs[i]; + int ndevices = arch_combs[comb]->ndevices; + fprintf(f, "####################\n"); + fprintf(f, "# COMB_%d\n", comb); + fprintf(f, "# number of types devices\n"); + fprintf(f, "%d\n", ndevices); + + for(dev = 0; dev < ndevices; dev++) + { + fprintf(f, "####################\n"); + fprintf(f, "# DEV_%d\n", dev); + fprintf(f, "# device type (CPU - %d, CUDA - %d, OPENCL - %d, MPI_MS - %d, TCPIP_MS - %d)\n", + STARPU_CPU_WORKER, STARPU_CUDA_WORKER, STARPU_OPENCL_WORKER, STARPU_MPI_MS_WORKER, STARPU_TCPIP_MS_WORKER); + fprintf(f, "%u\n", arch_combs[comb]->devices[dev].type); + + fprintf(f, "####################\n"); + fprintf(f, "# DEV_%d\n", dev); + fprintf(f, "# device id \n"); + fprintf(f, "%u\n", arch_combs[comb]->devices[dev].devid); + + fprintf(f, "####################\n"); + fprintf(f, "# DEV_%d\n", dev); + fprintf(f, "# number of cores \n"); + fprintf(f, "%u\n", arch_combs[comb]->devices[dev].ncores); + } + + int nimpls = model->state->nimpls[comb]; + fprintf(f, "##########\n"); + fprintf(f, "# number of implementations\n"); + fprintf(f, "%d\n", nimpls); + for (impl = 0; impl < nimpls; impl++) + { + dump_per_arch_model_file(f, model, comb, impl); + } + } +} +#endif + +static void dump_history_entry_xml(FILE *f, struct starpu_perfmodel_history_entry *entry) +{ + fprintf(f, " \n", entry->footprint, (unsigned long) entry->size, entry->flops, entry->mean, entry->deviation, entry->sum, entry->sum2, entry->nsample); +} + +static void dump_reg_model_xml(FILE *f, struct starpu_perfmodel *model, int comb, int impl) +{ + struct starpu_perfmodel_per_arch *per_arch_model; + + per_arch_model = &model->state->per_arch[comb][impl]; + struct starpu_perfmodel_regression_model *reg_model = &per_arch_model->regression; + + /* + * Linear Regression model + */ + + if (model->type == STARPU_REGRESSION_BASED) + { + fprintf(f, " \n"); + fprintf(f, " sumlnx, reg_model->sumlnx2, reg_model->sumlny, reg_model->sumlnxlny); + fprintf(f, " alpha=\""); + _starpu_write_double(f, "%e", reg_model->alpha); + fprintf(f, "\" beta=\""); + _starpu_write_double(f, "%e", reg_model->beta); + fprintf(f, "\" nsample=\"%u\" minx=\"%lu\" maxx=\"%lu\"/>\n", reg_model->nsample, reg_model->minx, reg_model->maxx); + } + + /* + * Non-Linear Regression model + */ + + else if (model->type == STARPU_NL_REGRESSION_BASED) + { + fprintf(f, " \n"); + fprintf(f, " a); + fprintf(f, "\" b=\""); + _starpu_write_double(f, "%e", reg_model->b); + fprintf(f, "\" c=\""); + _starpu_write_double(f, "%e", reg_model->c); + fprintf(f, "\"/>\n"); + } + + else if (model->type == STARPU_MULTIPLE_REGRESSION_BASED) + { + if (reg_model->ncoeff==0 || model->ncombinations==0 || model->combinations==NULL) + fprintf(f, " \n"); + else + { + unsigned i; + fprintf(f, " \n", reg_model->coeff[0]); + for (i=0; i < model->ncombinations; i++) + { + fprintf(f, " parameters_names == NULL) + fprintf(f, "c%u", i+1); + else + { + unsigned j; + int first=1; + for(j=0; j < model->nparameters; j++) + { + if (model->combinations[i][j] > 0) + { + if (first) + first=0; + else + fprintf(f, "*"); + + if(model->parameters_names[j] != NULL) + fprintf(f, "%s", model->parameters_names[j]); + else + fprintf(f, "P%u", j); + + if (model->combinations[i][j] > 1) + fprintf(f, "^%d", model->combinations[i][j]); + } + } + } + fprintf(f, "\" coef=\"%e\"/>\n", reg_model->coeff[i+1]); + } + fprintf(f, " \n"); + } + } +} + +static void dump_per_arch_model_xml(FILE *f, struct starpu_perfmodel *model, int comb, unsigned impl) +{ + struct starpu_perfmodel_per_arch *per_arch_model; + + per_arch_model = &model->state->per_arch[comb][impl]; + /* count the number of elements in the lists */ + struct starpu_perfmodel_history_list *ptr; + + dump_reg_model_xml(f, model, comb, impl); + + /* Dump the history into the model file in case it is necessary */ + ptr = per_arch_model->list; + while (ptr) + { + dump_history_entry_xml(f, ptr->entry); + ptr = ptr->next; + } +} + +void starpu_perfmodel_dump_xml(FILE *f, struct starpu_perfmodel *model) +{ + _starpu_init_and_load_perfmodel(model); + + fprintf(f, "\n"); + fprintf(f, "\n"); + fprintf(f, "\n", model->symbol); + fprintf(f, "\n"); + fprintf(f, "\n", _STARPU_PERFMODEL_VERSION); + + STARPU_PTHREAD_RWLOCK_RDLOCK(&model->state->model_rwlock); + int ncombs = model->state->ncombs; + int i, impl, dev; + + for(i = 0; i < ncombs; i++) + { + int comb = model->state->combs[i]; + int ndevices = arch_combs[comb]->ndevices; + + fprintf(f, " \n"); + for(dev = 0; dev < ndevices; dev++) + { + enum starpu_worker_archtype archtype = arch_combs[comb]->devices[dev].type; + const char *type = starpu_driver_info[archtype].name_upper; + STARPU_ASSERT(type); + fprintf(f, " devices[dev].devid); + if (arch_combs[comb]->devices[dev].type == STARPU_CPU_WORKER) + fprintf(f, " ncores=\"%d\"", + arch_combs[comb]->devices[dev].ncores); + fprintf(f, "/>\n"); + } + int nimpls = model->state->nimpls[comb]; + for (impl = 0; impl < nimpls; impl++) + { + fprintf(f, " \n", impl); + char archname[STR_SHORT_LENGTH]; + starpu_perfmodel_get_arch_name(arch_combs[comb], archname, sizeof(archname), impl); + fprintf(f, " \n", archname); + dump_per_arch_model_xml(f, model, comb, impl); + fprintf(f, " \n"); + } + fprintf(f, " \n"); + } + STARPU_PTHREAD_RWLOCK_UNLOCK(&model->state->model_rwlock); + fprintf(f, "\n"); +} + +void _starpu_perfmodel_realloc(struct starpu_perfmodel *model, int nb) +{ + int i; + + STARPU_ASSERT(nb > model->state->ncombs_set); +#ifdef SSIZE_MAX + STARPU_ASSERT((size_t) nb < SSIZE_MAX / sizeof(struct starpu_perfmodel_per_arch*)); +#endif + _STARPU_REALLOC(model->state->per_arch, nb*sizeof(struct starpu_perfmodel_per_arch*)); + _STARPU_REALLOC(model->state->per_arch_is_set, nb*sizeof(int*)); + _STARPU_REALLOC(model->state->nimpls, nb*sizeof(int)); + _STARPU_REALLOC(model->state->nimpls_set, nb*sizeof(int)); + _STARPU_REALLOC(model->state->combs, nb*sizeof(int)); + for(i = model->state->ncombs_set; i < nb; i++) + { + model->state->per_arch[i] = NULL; + model->state->per_arch_is_set[i] = NULL; + model->state->nimpls[i] = 0; + model->state->nimpls_set[i] = 0; + } + model->state->ncombs_set = nb; +} + +void starpu_perfmodel_init(struct starpu_perfmodel *model) +{ + int already_init; + int ncombs; + + STARPU_ASSERT(model); + + STARPU_PTHREAD_RWLOCK_RDLOCK(®istered_models_rwlock); + already_init = model->is_init; + STARPU_PTHREAD_RWLOCK_UNLOCK(®istered_models_rwlock); + + if (already_init) + return; + + /* The model is still not loaded so we grab the lock in write mode, and + * if it's not loaded once we have the lock, we do load it. */ + STARPU_PTHREAD_RWLOCK_WRLOCK(®istered_models_rwlock); + + /* Was the model initialized since the previous test ? */ + if (model->is_init) + { + STARPU_PTHREAD_RWLOCK_UNLOCK(®istered_models_rwlock); + return; + } + + model->path = NULL; + _STARPU_MALLOC(model->state, sizeof(struct _starpu_perfmodel_state)); + STARPU_PTHREAD_RWLOCK_INIT(&model->state->model_rwlock, NULL); + + STARPU_PTHREAD_RWLOCK_RDLOCK(&arch_combs_mutex); + model->state->ncombs_set = ncombs = nb_arch_combs; + STARPU_PTHREAD_RWLOCK_UNLOCK(&arch_combs_mutex); + _STARPU_CALLOC(model->state->per_arch, ncombs, sizeof(struct starpu_perfmodel_per_arch*)); + _STARPU_CALLOC(model->state->per_arch_is_set, ncombs, sizeof(int*)); + _STARPU_CALLOC(model->state->nimpls, ncombs, sizeof(int)); + _STARPU_CALLOC(model->state->nimpls_set, ncombs, sizeof(int)); + _STARPU_MALLOC(model->state->combs, ncombs*sizeof(int)); + model->state->ncombs = 0; + + /* add the model to a linked list */ + struct _starpu_perfmodel *node = _starpu_perfmodel_new(); + + node->model = model; + //model->debug_modelid = debug_modelid++; + + /* put this model at the beginning of the list */ + _starpu_perfmodel_list_push_front(®istered_models, node); + + model->is_init = 1; + STARPU_PTHREAD_RWLOCK_UNLOCK(®istered_models_rwlock); +} + +static void get_model_debug_path(struct starpu_perfmodel *model, const char *arch, char *path, size_t maxlen) +{ + STARPU_ASSERT(path); + _starpu_find_perf_model_codelet_debug(model->symbol, _starpu_perfmodel_hostname, arch, path, maxlen); +} + +void starpu_perfmodel_get_model_path(const char *symbol, char *path, size_t maxlen) +{ + _starpu_find_perf_model_codelet(symbol, _starpu_perfmodel_hostname, path, maxlen); +} + +void starpu_perfmodel_get_model_path_default_location(const char *symbol, char *path, size_t maxlen) +{ + _starpu_set_default_perf_model_codelet(symbol, _starpu_perfmodel_hostname, path, maxlen); +} + +#ifndef STARPU_SIMGRID +void starpu_save_history_based_model(struct starpu_perfmodel *model) +{ + STARPU_ASSERT(model); + STARPU_ASSERT(model->symbol); + int locked; + + /* TODO checks */ + + /* filename = $STARPU_PERF_MODEL_DIR/codelets/symbol.hostname */ + char path[STR_LONG_LENGTH]; + starpu_perfmodel_get_model_path(model->symbol, path, sizeof(path)); + + if (path[0] == '\0') + starpu_perfmodel_get_model_path_default_location(model->symbol, path, sizeof(path)); + + free(model->path); + model->path = strdup(path); + _STARPU_DEBUG("Opening performance model file <%s> for model <%s>\n", path, model->symbol); + + /* overwrite existing file, or create it */ + FILE *f; + f = fopen(path, "a+"); + STARPU_ASSERT_MSG(f, "Could not save performance model %s\n", path); + + locked = _starpu_fwrlock(f) == 0; + check_model(model); + fseek(f, 0, SEEK_SET); + _starpu_fftruncate(f, 0); + dump_model_file(f, model); + if (locked) + _starpu_fwrunlock(f); + + fclose(f); +} +#endif + +static void _starpu_dump_registered_models(void) +{ +#ifndef STARPU_SIMGRID + STARPU_PTHREAD_RWLOCK_WRLOCK(®istered_models_rwlock); + + struct _starpu_perfmodel *node; + + _STARPU_DEBUG("DUMP MODELS !\n"); + + for (node = _starpu_perfmodel_list_begin(®istered_models); + node != _starpu_perfmodel_list_end(®istered_models); + node = _starpu_perfmodel_list_next(node)) + { + if (node->model->is_init && (node->model->type != STARPU_PER_WORKER && node->model->type != STARPU_PER_ARCH && node->model->type != STARPU_COMMON)) + starpu_save_history_based_model(node->model); + } + + STARPU_PTHREAD_RWLOCK_UNLOCK(®istered_models_rwlock); +#endif +} + +void _starpu_deinitialize_performance_model(struct starpu_perfmodel *model) +{ + if(model->is_init && model->state && model->state->per_arch != NULL) + { + int i; + for(i=0 ; istate->ncombs_set ; i++) + { + if (model->state->per_arch[i]) + { + int impl; + for(impl=0 ; implstate->nimpls_set[i] ; impl++) + { + struct starpu_perfmodel_per_arch *archmodel = &model->state->per_arch[i][impl]; + if (archmodel->history) + { + struct starpu_perfmodel_history_list *list; + struct starpu_perfmodel_history_table *entry=NULL, *tmp=NULL; + + HASH_ITER(hh, archmodel->history, entry, tmp) + { + HASH_DEL(archmodel->history, entry); + free(entry); + } + archmodel->history = NULL; + + list = archmodel->list; + while (list) + { + struct starpu_perfmodel_history_list *plist; + free(list->entry); + plist = list; + list = list->next; + free(plist); + } + archmodel->list = NULL; + } + } + free(model->state->per_arch[i]); + model->state->per_arch[i] = NULL; + + free(model->state->per_arch_is_set[i]); + model->state->per_arch_is_set[i] = NULL; + } + } + free(model->state->per_arch); + model->state->per_arch = NULL; + + free(model->state->per_arch_is_set); + model->state->per_arch_is_set = NULL; + + free(model->state->nimpls); + model->state->nimpls = NULL; + + free(model->state->nimpls_set); + model->state->nimpls_set = NULL; + + free(model->state->combs); + model->state->combs = NULL; + model->state->ncombs = 0; + } + model->is_init = 0; + model->is_loaded = 0; +} + +void _starpu_deinitialize_registered_performance_models(void) +{ + if (_starpu_get_calibrate_flag()) + _starpu_dump_registered_models(); + + STARPU_PTHREAD_RWLOCK_WRLOCK(®istered_models_rwlock); + + struct _starpu_perfmodel *node, *nnode; + + _STARPU_DEBUG("FREE MODELS !\n"); + + for (node = _starpu_perfmodel_list_begin(®istered_models); + node != _starpu_perfmodel_list_end(®istered_models); + node = nnode) + { + struct starpu_perfmodel *model = node->model; + nnode = _starpu_perfmodel_list_next(node); + + STARPU_PTHREAD_RWLOCK_WRLOCK(&model->state->model_rwlock); + _starpu_deinitialize_performance_model(model); + STARPU_PTHREAD_RWLOCK_UNLOCK(&model->state->model_rwlock); + + free(node->model->path); + node->model->path = NULL; + free(node->model->state); + node->model->state = NULL; + + _starpu_perfmodel_list_erase(®istered_models, node); + _starpu_perfmodel_delete(node); + } + + STARPU_PTHREAD_RWLOCK_UNLOCK(®istered_models_rwlock); + STARPU_PTHREAD_RWLOCK_DESTROY(®istered_models_rwlock); + starpu_perfmodel_free_sampling(); +} + +/* We first try to grab the global lock in read mode to check whether the model + * was loaded or not (this is very likely to have been already loaded). If the + * model was not loaded yet, we take the lock in write mode, and if the model + * is still not loaded once we have the lock, we do load it. */ +void _starpu_load_history_based_model(struct starpu_perfmodel *model, unsigned scan_history) +{ + STARPU_PTHREAD_RWLOCK_WRLOCK(&model->state->model_rwlock); + + if(!model->is_loaded) + { + char path[STR_LONG_LENGTH]; + // Check if a symbol is defined before trying to load the model from a file + STARPU_ASSERT_MSG(model->symbol, "history-based performance models must have a symbol"); + + starpu_perfmodel_get_model_path(model->symbol, path, sizeof(path)); + + unsigned calibrate_flag = _starpu_get_calibrate_flag(); + model->benchmarking = calibrate_flag; + model->is_loaded = 1; + + if (path[0] == '\0') + { + _STARPU_DEBUG("No performance model file for model %s ...\n", model->symbol); + STARPU_PTHREAD_RWLOCK_UNLOCK(&model->state->model_rwlock); + return; + } + + free(model->path); + model->path = strdup(path); + _STARPU_DEBUG("Opening performance model file %s for model %s ...\n", path, model->symbol); + + if (calibrate_flag == 2) + { + /* The user specified that the performance model should + * be overwritten, so we don't load the existing file ! + * */ + _STARPU_DEBUG("Overwrite existing file\n"); + } + else + { + /* We try to load the file */ + FILE *f; + f = fopen(path, "r"); + if (f) + { + int locked; + locked = _starpu_frdlock(f) == 0; + parse_model_file(f, path, model, scan_history); + if (locked) + _starpu_frdunlock(f); + fclose(f); + _STARPU_DEBUG("Performance model file %s for model %s is loaded\n", path, model->symbol); + } + else + { + _STARPU_DEBUG("Performance model file %s does not exist or is not readable: %s\n", path, strerror(errno)); + } + } + + } + STARPU_PTHREAD_RWLOCK_UNLOCK(&model->state->model_rwlock); + +} + +/* This function is intended to be used by external tools that should read the + * performance model files */ +/* TODO: write an clear function, to free symbol and history */ +int starpu_perfmodel_load_symbol(const char *symbol, struct starpu_perfmodel *model) +{ + char path[STR_LONG_LENGTH]; + + model->symbol = strdup(symbol); + starpu_perfmodel_get_model_path(model->symbol, path, sizeof(path)); + _STARPU_DEBUG("get_model_path -> %s\n", path); + + if (path[0] != '\0') + { + return starpu_perfmodel_load_file(path, model); + } + else + { + const char *dot = strrchr(symbol, '.'); + if (dot) + { + char *symbol2 = strdup(symbol); + symbol2[dot-symbol] = '\0'; + int ret; + _STARPU_DISP("note: loading history from %s instead of %s\n", symbol2, symbol); + ret = starpu_perfmodel_load_symbol(symbol2, model); + free(symbol2); + return ret; + } + else + { + _STARPU_DISP("There is no performance model for symbol %s\n", symbol); + return 1; + } + } +} + +int starpu_perfmodel_load_file(const char *filename, struct starpu_perfmodel *model) +{ + int res, ret = 0; + FILE *f = fopen(filename, "r"); + int locked; + + STARPU_ASSERT(f); + + starpu_perfmodel_init(model); + model->path = strdup(filename); + + locked = _starpu_frdlock(f) == 0; + ret = parse_model_file(f, filename, model, 1); + if (locked) + _starpu_frdunlock(f); + + res = fclose(f); + STARPU_ASSERT(res == 0); + + if (ret) + starpu_perfmodel_unload_model(model); + else + model->is_loaded = 1; + return ret; +} + +int starpu_perfmodel_unload_model(struct starpu_perfmodel *model) +{ + if (model->symbol) + { + free((char *)model->symbol); + model->symbol = NULL; + } + + starpu_perfmodel_deinit(model); + + return 0; +} + +int starpu_perfmodel_deinit(struct starpu_perfmodel *model) +{ + _starpu_deinitialize_performance_model(model); + free(model->path); + free(model->state); + model->state = NULL; + + STARPU_PTHREAD_RWLOCK_WRLOCK(®istered_models_rwlock); + struct _starpu_perfmodel *node; + for (node = _starpu_perfmodel_list_begin(®istered_models); + node != _starpu_perfmodel_list_end(®istered_models); + node = _starpu_perfmodel_list_next(node)) + { + if (node->model == model) + { + _starpu_perfmodel_list_erase(®istered_models, node); + _starpu_perfmodel_delete(node); + break; + } + } + STARPU_PTHREAD_RWLOCK_UNLOCK(®istered_models_rwlock); + + return 0; +} + +const char* starpu_perfmodel_get_archtype_name(enum starpu_worker_archtype archtype) +{ + const char *name = starpu_driver_info[archtype].name_lower; + STARPU_ASSERT(name); + return name; +} + +void starpu_perfmodel_get_arch_name(struct starpu_perfmodel_arch* arch, char *archname, size_t maxlen,unsigned impl) +{ + int i; + int comb = _starpu_perfmodel_create_comb_if_needed(arch); + + STARPU_ASSERT(comb != -1); + char devices[STR_VERY_LONG_LENGTH]; + int written = 0; + devices[0] = '\0'; + for(i=0 ; indevices ; i++) + { + written += snprintf(devices + written, sizeof(devices)-written, "%s%d%s", starpu_perfmodel_get_archtype_name(arch->devices[i].type), arch->devices[i].devid, i != arch->ndevices-1 ? "_":""); + } + snprintf(archname, maxlen, "%s_impl%u (Comb%d)", devices, impl, comb); +} + +void starpu_perfmodel_debugfilepath(struct starpu_perfmodel *model, + struct starpu_perfmodel_arch* arch, char *path, size_t maxlen, unsigned nimpl) +{ + int comb = starpu_perfmodel_arch_comb_get(arch->ndevices, arch->devices); + STARPU_ASSERT(comb != -1); + char archname[STR_SHORT_LENGTH]; + starpu_perfmodel_get_arch_name(arch, archname, sizeof(archname), nimpl); + + STARPU_ASSERT(path); + + get_model_debug_path(model, archname, path, maxlen); +} + +double _starpu_regression_based_job_expected_perf(struct starpu_perfmodel *model, struct starpu_perfmodel_arch* arch, struct _starpu_job *j, unsigned nimpl) +{ + int comb; + double exp = NAN; + size_t size = 0; + struct starpu_perfmodel_regression_model *regmodel = NULL; + + comb = starpu_perfmodel_arch_comb_get(arch->ndevices, arch->devices); + if (comb == -1) + goto docal; + + STARPU_PTHREAD_RWLOCK_RDLOCK(&model->state->model_rwlock); + size = __starpu_job_get_data_size(model, arch, nimpl, j); + + if (comb >= model->state->ncombs_set + || model->state->per_arch[comb] == NULL) + { + // The model has not been executed on this combination + STARPU_PTHREAD_RWLOCK_UNLOCK(&model->state->model_rwlock); + goto docal; + } + + regmodel = &model->state->per_arch[comb][nimpl].regression; + + if (regmodel->valid && size >= regmodel->minx * 0.9 && size <= regmodel->maxx * 1.1) + exp = regmodel->alpha*pow((double)size, regmodel->beta); + STARPU_PTHREAD_RWLOCK_UNLOCK(&model->state->model_rwlock); + +docal: + STARPU_HG_DISABLE_CHECKING(model->benchmarking); + if (isnan(exp) && !model->benchmarking) + { + char archname[STR_SHORT_LENGTH]; + + starpu_perfmodel_get_arch_name(arch, archname, sizeof(archname), nimpl); + _STARPU_DISP("Warning: model %s is not calibrated enough for %s size %lu (only %u measurements from size %lu to %lu), forcing calibration for this run. Use the STARPU_CALIBRATE environment variable to control this. You probably need to run again to continue calibrating the model, until this warning disappears.\n", model->symbol, archname, (unsigned long) size, regmodel?regmodel->nsample:0, regmodel?regmodel->minx:0, regmodel?regmodel->maxx:0); + _starpu_set_calibrate_flag(1); + model->benchmarking = 1; + } + + return exp; +} + +double _starpu_non_linear_regression_based_job_expected_perf(struct starpu_perfmodel *model, struct starpu_perfmodel_arch* arch, struct _starpu_job *j,unsigned nimpl) +{ + int comb; + double exp = NAN; + size_t size = 0; + struct starpu_perfmodel_regression_model *regmodel; + struct starpu_perfmodel_history_table *entry = NULL; + + comb = starpu_perfmodel_arch_comb_get(arch->ndevices, arch->devices); + if (comb == -1) + goto docal; + + STARPU_PTHREAD_RWLOCK_RDLOCK(&model->state->model_rwlock); + size = __starpu_job_get_data_size(model, arch, nimpl, j); + + if (comb >= model->state->ncombs_set + || model->state->per_arch[comb] == NULL) + { + // The model has not been executed on this combination + STARPU_PTHREAD_RWLOCK_UNLOCK(&model->state->model_rwlock); + goto docal; + } + + regmodel = &model->state->per_arch[comb][nimpl].regression; + + if (regmodel->nl_valid && size >= regmodel->minx * 0.9 && size <= regmodel->maxx * 1.1) + { + exp = regmodel->a*pow((double)size, regmodel->b) + regmodel->c; + STARPU_PTHREAD_RWLOCK_UNLOCK(&model->state->model_rwlock); + } + else + { + uint32_t key = _starpu_compute_buffers_footprint(model, arch, nimpl, j); + struct starpu_perfmodel_per_arch *per_arch_model = &model->state->per_arch[comb][nimpl]; + struct starpu_perfmodel_history_table *history; + + history = per_arch_model->history; + HASH_FIND_UINT32_T(history, &key, entry); + STARPU_PTHREAD_RWLOCK_UNLOCK(&model->state->model_rwlock); + + /* Here helgrind would shout that this is unprotected access. + * We do not care about racing access to the mean, we only want + * a good-enough estimation */ + + if (entry && entry->history_entry && entry->history_entry->nsample >= _starpu_calibration_minimum) + exp = entry->history_entry->mean; + +docal: + STARPU_HG_DISABLE_CHECKING(model->benchmarking); + if (isnan(exp) && !model->benchmarking) + { + char archname[STR_SHORT_LENGTH]; + + starpu_perfmodel_get_arch_name(arch, archname, sizeof(archname), nimpl); + _STARPU_DISP("Warning: model %s is not calibrated enough for %s size %lu (only %u measurements), forcing calibration for this run. Use the STARPU_CALIBRATE environment variable to control this. You probably need to run again to continue calibrating the model, until this warning disappears.\n", model->symbol, archname, (unsigned long) size, entry && entry->history_entry ? entry->history_entry->nsample : 0); + _starpu_set_calibrate_flag(1); + model->benchmarking = 1; + } + } + + return exp; +} + +double _starpu_multiple_regression_based_job_expected_perf(struct starpu_perfmodel *model, struct starpu_perfmodel_arch* arch, struct _starpu_job *j, unsigned nimpl) +{ + int comb; + double expected_duration=NAN; + + struct starpu_perfmodel_regression_model *reg_model = NULL; + comb = starpu_perfmodel_arch_comb_get(arch->ndevices, arch->devices); + if(comb == -1) + goto docal; + + STARPU_PTHREAD_RWLOCK_RDLOCK(&model->state->model_rwlock); + if (comb >= model->state->ncombs_set || + model->state->per_arch[comb] == NULL) + { + // The model has not been executed on this combination + STARPU_PTHREAD_RWLOCK_UNLOCK(&model->state->model_rwlock); + goto docal; + } + reg_model = &model->state->per_arch[comb][nimpl].regression; + STARPU_PTHREAD_RWLOCK_UNLOCK(&model->state->model_rwlock); + if (reg_model->coeff == NULL) + goto docal; + + double *parameters; + _STARPU_MALLOC(parameters, model->nparameters*sizeof(double)); + model->parameters(j->task, parameters); + expected_duration=reg_model->coeff[0]; + unsigned i; + for (i=0; i < model->ncombinations; i++) + { + double parameter_value=1.; + unsigned k; + for (k=0; k < model->nparameters; k++) + parameter_value *= pow(parameters[k],model->combinations[i][k]); + + expected_duration += reg_model->coeff[i+1]*parameter_value; + } + +docal: + STARPU_HG_DISABLE_CHECKING(model->benchmarking); + if (isnan(expected_duration) && !model->benchmarking) + { + char archname[STR_SHORT_LENGTH]; + + starpu_perfmodel_get_arch_name(arch, archname, sizeof(archname), nimpl); + _STARPU_DISP("Warning: model %s is not calibrated enough for %s, forcing calibration for this run. Use the STARPU_CALIBRATE environment variable to control this. You probably need to run again to continue calibrating the model, until this warning disappears.\n", model->symbol, archname); + _starpu_set_calibrate_flag(1); + model->benchmarking = 1; + } + + // In the unlikely event that predicted duration is negative + // in case multiple linear regression is not so accurate + if (expected_duration < 0) + expected_duration = 0.00001; + + //Make sure that the injected time is in milliseconds + return expected_duration; +} + +double __starpu_history_based_job_expected_perf(struct starpu_perfmodel *model, struct starpu_perfmodel_arch* arch, struct _starpu_job *j,unsigned nimpl,size_t offset) +{ + int comb; + double exp = NAN; + struct starpu_perfmodel_per_arch *per_arch_model; + struct starpu_perfmodel_history_entry *entry = NULL; + struct starpu_perfmodel_history_table *history, *elt; + uint32_t key; + double *data; + + comb = starpu_perfmodel_arch_comb_get(arch->ndevices, arch->devices); + key = _starpu_compute_buffers_footprint(model, arch, nimpl, j); + if(comb == -1) + goto docal; + + STARPU_PTHREAD_RWLOCK_RDLOCK(&model->state->model_rwlock); + if (comb >= model->state->ncombs_set || + model->state->per_arch[comb] == NULL) + { + // The model has not been executed on this combination + STARPU_PTHREAD_RWLOCK_UNLOCK(&model->state->model_rwlock); + goto docal; + } + + per_arch_model = &model->state->per_arch[comb][nimpl]; + + history = per_arch_model->history; + HASH_FIND_UINT32_T(history, &key, elt); + entry = (elt == NULL) ? NULL : elt->history_entry; + if (entry) + data = (double*) ((char*) entry + offset); + STARPU_ASSERT_MSG(!entry || *data >= 0, "entry=%p, entry data=%lf\n", entry, entry?*data:NAN); + STARPU_PTHREAD_RWLOCK_UNLOCK(&model->state->model_rwlock); + + /* Here helgrind would shout that this is unprotected access. + * We do not care about racing access to the mean/deviation, we only want + * a good-enough estimation */ + + if (entry && entry->nsample) + { +#ifdef STARPU_SIMGRID + if (entry->nsample < _starpu_calibration_minimum) + { + char archname[STR_SHORT_LENGTH]; + starpu_perfmodel_get_arch_name(arch, archname, sizeof(archname), nimpl); + + _STARPU_DISP("Warning: model %s is not calibrated enough for %s size %ld footprint %x (only %u measurements). Using it anyway for the simulation\n", model->symbol, archname, j->task?(long int)_starpu_job_get_data_size(model, arch, nimpl, j):-1, key, entry->nsample); + } +#else + if (entry->nsample >= _starpu_calibration_minimum) +#endif + { + STARPU_ASSERT_MSG(*data >= 0, "entry data=%lf\n", *data); + /* TODO: report differently if we've scheduled really enough + * of that task and the scheduler should perhaps put it aside */ + /* Calibrated enough */ + exp = *data; + } + } + +docal: +#ifdef STARPU_SIMGRID + if (isnan(exp)) + { + char archname[STR_SHORT_LENGTH]; + starpu_perfmodel_get_arch_name(arch, archname, sizeof(archname), nimpl); + + _STARPU_DISP("Warning: model %s is not calibrated at all for %s size %ld footprint %x. Assuming it can not work there\n", model->symbol, archname, j->task?(long int)_starpu_job_get_data_size(model, arch, nimpl, j):-1, key); + exp = 0.; + } +#else + STARPU_HG_DISABLE_CHECKING(model->benchmarking); + if (isnan(exp) && !model->benchmarking) + { + char archname[STR_SHORT_LENGTH]; + + starpu_perfmodel_get_arch_name(arch, archname, sizeof(archname), nimpl); + _STARPU_DISP("Warning: model %s is not calibrated enough for %s size %ld footprint %x (only %u measurements), forcing calibration for this run. Use the STARPU_CALIBRATE environment variable to control this. You probably need to run again to continue calibrating the model, until this warning disappears.\n", model->symbol, archname, j->task?(long int)_starpu_job_get_data_size(model, arch, nimpl, j):-1, key, entry ? entry->nsample : 0); + _starpu_set_calibrate_flag(1); + model->benchmarking = 1; + } +#endif + + STARPU_ASSERT_MSG(isnan(exp)||exp >= 0, "exp=%lf\n", exp); + return exp; +} + +double _starpu_history_based_job_expected_perf(struct starpu_perfmodel *model, struct starpu_perfmodel_arch* arch, struct _starpu_job *j,unsigned nimpl) +{ + return __starpu_history_based_job_expected_perf(model, arch, j, nimpl, offsetof(struct starpu_perfmodel_history_entry, mean)); +} + +double _starpu_history_based_job_expected_deviation(struct starpu_perfmodel *model, struct starpu_perfmodel_arch* arch, struct _starpu_job *j,unsigned nimpl) +{ + return __starpu_history_based_job_expected_perf(model, arch, j, nimpl, offsetof(struct starpu_perfmodel_history_entry, deviation)); +} + +double starpu_perfmodel_history_based_expected_perf(struct starpu_perfmodel *model, struct starpu_perfmodel_arch * arch, uint32_t footprint) +{ + struct _starpu_job j = + { + .footprint = footprint, + .footprint_is_computed = 1, + }; + return _starpu_history_based_job_expected_perf(model, arch, &j, j.nimpl); +} + +int _starpu_perfmodel_create_comb_if_needed(struct starpu_perfmodel_arch* arch) +{ + int comb = starpu_perfmodel_arch_comb_get(arch->ndevices, arch->devices); + if(comb == -1) + comb = starpu_perfmodel_arch_comb_add(arch->ndevices, arch->devices); + return comb; +} + +void _starpu_update_perfmodel_history(struct _starpu_job *j, struct starpu_perfmodel *model, struct starpu_perfmodel_arch* arch, unsigned cpuid STARPU_ATTRIBUTE_UNUSED, double measured, unsigned impl, unsigned number) +{ + STARPU_ASSERT_MSG(measured >= 0, "measured=%lf\n", measured); + if (model) + { + int c; + unsigned found = 0; + int comb = _starpu_perfmodel_create_comb_if_needed(arch); + + STARPU_PTHREAD_RWLOCK_WRLOCK(&model->state->model_rwlock); + + for(c = 0; c < model->state->ncombs; c++) + { + if(model->state->combs[c] == comb) + { + found = 1; + break; + } + } + + if(!found) + { + if (model->state->ncombs + 1 >= model->state->ncombs_set) + { + // The number of combinations is bigger than the one which was initially allocated, we need to reallocate, + // do not only reallocate 1 extra comb, rather reallocate 5 to avoid too frequent calls to _starpu_perfmodel_realloc + _starpu_perfmodel_realloc(model, model->state->ncombs_set+5); + } + model->state->combs[model->state->ncombs++] = comb; + } + + if(!model->state->per_arch[comb]) + { + _starpu_perfmodel_malloc_per_arch(model, comb, STARPU_MAXIMPLEMENTATIONS); + _starpu_perfmodel_malloc_per_arch_is_set(model, comb, STARPU_MAXIMPLEMENTATIONS); + model->state->nimpls[comb] = 0; + } + + struct starpu_perfmodel_per_arch *per_arch_model = &model->state->per_arch[comb][impl]; + if (model->state->per_arch_is_set[comb][impl] == 0) + { + // We are adding a new implementation for the given comb and the given impl + model->state->nimpls[comb]++; + model->state->per_arch_is_set[comb][impl] = 1; + } + + if (model->type == STARPU_HISTORY_BASED || model->type == STARPU_NL_REGRESSION_BASED || model->type == STARPU_REGRESSION_BASED) + { + struct starpu_perfmodel_history_entry *entry; + struct starpu_perfmodel_history_table *elt; + struct starpu_perfmodel_history_list **list; + uint32_t key = _starpu_compute_buffers_footprint(model, arch, impl, j); + + list = &per_arch_model->list; + + HASH_FIND_UINT32_T(per_arch_model->history, &key, elt); + entry = (elt == NULL) ? NULL : elt->history_entry; + + if (!entry) + { + /* this is the first entry with such a footprint */ + _STARPU_CALLOC(entry, 1, sizeof(struct starpu_perfmodel_history_entry)); + + /* Tell helgrind that we do not care about + * racing access to the sampling, we only want a + * good-enough estimation */ + STARPU_HG_DISABLE_CHECKING(entry->nsample); + STARPU_HG_DISABLE_CHECKING(entry->mean); + + /* For history-based, do not take the first measurement into account, it is very often quite bogus */ + /* TODO: it'd be good to use a better estimation heuristic, like the median, or latest n values, etc. */ + if (number != 1 || model->type != STARPU_HISTORY_BASED) + { + entry->sum = measured * number; + entry->sum2 = measured*measured * number; + entry->nsample = number; + entry->mean = measured; + } + + entry->size = __starpu_job_get_data_size(model, arch, impl, j); + entry->flops = j->task->flops; + + entry->footprint = key; + + insert_history_entry(entry, list, &per_arch_model->history); + } + else + { + /* There is already an entry with the same footprint */ + + double local_deviation = measured/entry->mean; + + if (entry->nsample && + (100 * local_deviation > (100 + historymaxerror) + || (100 / local_deviation > (100 + historymaxerror)))) + { + entry->nerror+=number; + + /* More errors than measurements, we're most probably completely wrong, we flush out all the entries */ + if (entry->nerror >= entry->nsample) + { + char archname[STR_SHORT_LENGTH]; + starpu_perfmodel_get_arch_name(arch, archname, sizeof(archname), impl); + _STARPU_DISP("Too big deviation for model %s on %s: %fus vs average %fus, %u such errors against %u samples (%+f%%), flushing the performance model. Use the STARPU_HISTORY_MAX_ERROR environment variable to control the threshold (currently %d%%)\n", model->symbol, archname, measured, entry->mean, entry->nerror, entry->nsample, measured * 100. / entry->mean - 100, historymaxerror); + entry->sum = 0.0; + entry->sum2 = 0.0; + entry->nsample = 0; + entry->nerror = 0; + entry->mean = 0.0; + entry->deviation = 0.0; + } + } + else + { + entry->sum += measured * number; + entry->sum2 += measured*measured * number; + entry->nsample += number; + + unsigned n = entry->nsample; + entry->mean = entry->sum / n; + entry->deviation = sqrt((fabs(entry->sum2 - (entry->sum*entry->sum)/n))/n); + } + + if (j->task->flops != 0. && !isnan(entry->flops)) + { + if (entry->flops == 0.) + entry->flops = j->task->flops; + else if ((fabs(entry->flops - j->task->flops) / entry->flops) > 0.00001) + { + /* Incoherent flops! forget about trying to record flops */ + _STARPU_DISP("Incoherent flops in model %s: %f vs previous %f, stopping recording flops\n", model->symbol, j->task->flops, entry->flops); + entry->flops = NAN; + } + } + } + + STARPU_ASSERT(entry); + } + + if (model->type == STARPU_REGRESSION_BASED || model->type == STARPU_NL_REGRESSION_BASED) + { + struct starpu_perfmodel_regression_model *reg_model; + reg_model = &per_arch_model->regression; + + /* update the regression model */ + size_t job_size = __starpu_job_get_data_size(model, arch, impl, j); + double logy, logx; + logx = log((double)job_size); + logy = log(measured); + + reg_model->sumlnx += logx; + reg_model->sumlnx2 += logx*logx; + reg_model->sumlny += logy; + reg_model->sumlnxlny += logx*logy; + if (reg_model->minx == 0 || job_size < reg_model->minx) + reg_model->minx = job_size; + if (reg_model->maxx == 0 || job_size > reg_model->maxx) + reg_model->maxx = job_size; + reg_model->nsample++; + + if (VALID_REGRESSION(reg_model)) + { + unsigned n = reg_model->nsample; + + double num = (n*reg_model->sumlnxlny - reg_model->sumlnx*reg_model->sumlny); + double denom = (n*reg_model->sumlnx2 - reg_model->sumlnx*reg_model->sumlnx); + + reg_model->beta = num/denom; + reg_model->alpha = exp((reg_model->sumlny - reg_model->beta*reg_model->sumlnx)/n); + reg_model->valid = 1; + } + } + + if (model->type == STARPU_MULTIPLE_REGRESSION_BASED) + { + struct starpu_perfmodel_history_entry *entry; + struct starpu_perfmodel_history_list **list; + list = &per_arch_model->list; + + _STARPU_CALLOC(entry, 1, sizeof(struct starpu_perfmodel_history_entry)); + _STARPU_MALLOC(entry->parameters, model->nparameters*sizeof(double)); + model->parameters(j->task, entry->parameters); + entry->tag = j->task->tag_id; + STARPU_ASSERT(measured >= 0); + entry->duration = measured; + + struct starpu_perfmodel_history_list *link; + _STARPU_MALLOC(link, sizeof(struct starpu_perfmodel_history_list)); + link->next = *list; + link->entry = entry; + *list = link; + } + +#ifdef STARPU_MODEL_DEBUG + struct starpu_task *task = j->task; + starpu_perfmodel_debugfilepath(model, arch_combs[comb], per_arch_model->debug_path, STR_LONG_LENGTH, impl); + FILE *f = fopen(per_arch_model->debug_path, "a+"); + int locked; + if (f == NULL) + { + _STARPU_DISP("Error <%s> when opening file <%s>\n", strerror(errno), per_arch_model->debug_path); + STARPU_PTHREAD_RWLOCK_UNLOCK(&model->state->model_rwlock); + return; + } + locked = _starpu_fwrlock(f) == 0; + + if (!j->footprint_is_computed) + (void) _starpu_compute_buffers_footprint(model, arch, impl, j); + + STARPU_ASSERT(j->footprint_is_computed); + + fprintf(f, "0x%x\t%lu\t%f\t%f\t%f\t%u\t\t", j->footprint, (unsigned long) __starpu_job_get_data_size(model, arch, impl, j), measured, task->predicted, task->predicted_transfer, cpuid); + unsigned i; + unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task); + + for (i = 0; i < nbuffers; i++) + { + starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, i); + + STARPU_ASSERT(handle->ops); + STARPU_ASSERT(handle->ops->display); + handle->ops->display(handle, f); + } + fprintf(f, "\n"); + if (locked) + _starpu_fwrunlock(f); + fclose(f); +#endif + STARPU_PTHREAD_RWLOCK_UNLOCK(&model->state->model_rwlock); + } +} + +void starpu_perfmodel_update_history_n(struct starpu_perfmodel *model, struct starpu_task *task, struct starpu_perfmodel_arch * arch, unsigned cpuid, unsigned nimpl, double measured, unsigned number) +{ + struct _starpu_job *job = _starpu_get_job_associated_to_task(task); + +#ifdef STARPU_SIMGRID + STARPU_ASSERT_MSG(0, "We are not supposed to update history when simulating execution"); +#endif + + _starpu_init_and_load_perfmodel(model); + /* Record measurement */ + _starpu_update_perfmodel_history(job, model, arch, cpuid, measured, nimpl, number); + /* and save perfmodel on termination */ + _starpu_set_calibrate_flag(1); +} + +void starpu_perfmodel_update_history(struct starpu_perfmodel *model, struct starpu_task *task, struct starpu_perfmodel_arch * arch, unsigned cpuid, unsigned nimpl, double measured) +{ + starpu_perfmodel_update_history_n(model, task, arch, cpuid, nimpl, measured, 1); +} + +int starpu_perfmodel_list_combs(FILE *output, struct starpu_perfmodel *model) +{ + int comb; + + fprintf(output, "Model <%s>\n", model->symbol); + for(comb = 0; comb < model->state->ncombs; comb++) + { + struct starpu_perfmodel_arch *arch; + int device; + + arch = starpu_perfmodel_arch_comb_fetch(model->state->combs[comb]); + fprintf(output, "\tComb %d: %d device%s\n", model->state->combs[comb], arch->ndevices, arch->ndevices>1?"s":""); + for(device=0 ; devicendevices ; device++) + { + const char *name = starpu_perfmodel_get_archtype_name(arch->devices[device].type); + fprintf(output, "\t\tDevice %d: type: %s - devid: %d - ncores: %d\n", device, name, arch->devices[device].devid, arch->devices[device].ncores); + } + } + return 0; +} + +struct starpu_perfmodel_per_arch *starpu_perfmodel_get_model_per_arch(struct starpu_perfmodel *model, struct starpu_perfmodel_arch *arch, unsigned impl) +{ + int comb = starpu_perfmodel_arch_comb_get(arch->ndevices, arch->devices); + if (comb == -1) + return NULL; + + if (comb >= model->state->ncombs_set || + !model->state->per_arch[comb]) + return NULL; + + return &model->state->per_arch[comb][impl]; +} + +static struct starpu_perfmodel_per_arch *_starpu_perfmodel_get_model_per_devices(struct starpu_perfmodel *model, int impl, va_list varg_list) +{ + struct starpu_perfmodel_arch arch; + va_list varg_list_copy; + int i, arg_type; + int is_cpu_set = 0; + + // We first count the number of devices + arch.ndevices = 0; + va_copy(varg_list_copy, varg_list); + while ((arg_type = va_arg(varg_list_copy, int)) != -1) + { + int devid = va_arg(varg_list_copy, int); + int ncores = va_arg(varg_list_copy, int); + + arch.ndevices ++; + if (arg_type == STARPU_CPU_WORKER) + { + STARPU_ASSERT_MSG(is_cpu_set == 0, "STARPU_CPU_WORKER can only be specified once\n"); + STARPU_ASSERT_MSG(devid==0, "STARPU_CPU_WORKER must be followed by a value 0 for the device id"); + is_cpu_set = 1; + } + else + { + STARPU_ASSERT_MSG(ncores==1, "%s must be followed by a value 1 for ncores", starpu_worker_get_type_as_string(arg_type)); + } + } + va_end(varg_list_copy); + + // We set the devices + _STARPU_MALLOC(arch.devices, arch.ndevices * sizeof(struct starpu_perfmodel_device)); + va_copy(varg_list_copy, varg_list); + for(i=0 ; i= model->state->ncombs_set) + _starpu_perfmodel_realloc(model, comb+1); + + // Get the per_arch object + if (model->state->per_arch[comb] == NULL) + { + _starpu_perfmodel_malloc_per_arch(model, comb, STARPU_MAXIMPLEMENTATIONS); + _starpu_perfmodel_malloc_per_arch_is_set(model, comb, STARPU_MAXIMPLEMENTATIONS); + model->state->nimpls[comb] = 0; + } + model->state->per_arch_is_set[comb][impl] = 1; + model->state->nimpls[comb] ++; + + return &model->state->per_arch[comb][impl]; +} + +struct starpu_perfmodel_per_arch *starpu_perfmodel_get_model_per_devices(struct starpu_perfmodel *model, int impl, ...) +{ + va_list varg_list; + struct starpu_perfmodel_per_arch *per_arch; + + va_start(varg_list, impl); + per_arch = _starpu_perfmodel_get_model_per_devices(model, impl, varg_list); + va_end(varg_list); + + return per_arch; +} + +int starpu_perfmodel_set_per_devices_cost_function(struct starpu_perfmodel *model, int impl, starpu_perfmodel_per_arch_cost_function func, ...) +{ + va_list varg_list; + struct starpu_perfmodel_per_arch *per_arch; + + va_start(varg_list, func); + per_arch = _starpu_perfmodel_get_model_per_devices(model, impl, varg_list); + per_arch->cost_function = func; + va_end(varg_list); + + return 0; +} + +int starpu_perfmodel_set_per_devices_size_base(struct starpu_perfmodel *model, int impl, starpu_perfmodel_per_arch_size_base func, ...) +{ + va_list varg_list; + struct starpu_perfmodel_per_arch *per_arch; + + va_start(varg_list, func); + per_arch = _starpu_perfmodel_get_model_per_devices(model, impl, varg_list); + per_arch->size_base = func; + va_end(varg_list); + + return 0; +} diff --git a/src/core/perfmodel/perfmodel_nan.c b/src/core/perfmodel/perfmodel_nan.c new file mode 100644 index 0000000..c676e68 --- /dev/null +++ b/src/core/perfmodel/perfmodel_nan.c @@ -0,0 +1,101 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include +#include +#include + +/** Some systems cannot read NAN values, yes, it is really bad ... */ + +#if defined(STARPU_HAVE_WINDOWS) || defined(STARPU_OPENBSD_SYS) +# define _STARPU_OWN_NAN 1 +#else +# define _STARPU_OWN_NAN 0 +#endif + +#if _STARPU_OWN_NAN == 1 +static +void _starpu_read_spaces(FILE *f) +{ + int c = getc(f); + if (isspace(c)) + { + while (isspace(c)) c = getc(f); + ungetc(c, f); + } + else + { + ungetc(c, f); + } +} +#endif /* _STARPU_OWN_NAN */ + +void _starpu_write_double(FILE *f, const char *format, double val) +{ +#if _STARPU_OWN_NAN == 1 + if (isnan(val)) + { + fprintf(f, "NaN"); + } + else + { + fprintf(f, format, val); + } +#else + fprintf(f, format, val); +#endif +} + +int _starpu_read_double(FILE *f, char *format, double *val) +{ +#if _STARPU_OWN_NAN == 1 + _starpu_read_spaces(f); + + int x1 = getc(f); + + if (x1 == 'N') + { + int x2 = getc(f); + int x3 = getc(f); + if (x2 == 'a' && x3 == 'N') + { +#ifdef _MSC_VER + unsigned long long _mynan = 0x7fffffffffffffffull; + double mynan = *(double*)&_mynan; +#else + double mynan = NAN; +#endif + *val = mynan; + return 1; + } + else + { + return 0; + } + } + else + { + ungetc(x1, f); + return fscanf(f, format, val); + } +#else + return fscanf(f, format, val); +#endif +} diff --git a/src/core/perfmodel/perfmodel_print.c b/src/core/perfmodel/perfmodel_print.c new file mode 100644 index 0000000..1b62d5b --- /dev/null +++ b/src/core/perfmodel/perfmodel_print.c @@ -0,0 +1,324 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Thibaut Lambert + * Copyright (C) 2011-2011 Télécom Sud Paris + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include "perfmodel.h" + +static +void _starpu_perfmodel_print_history_based(struct starpu_perfmodel_per_arch *per_arch_model, char *parameter, uint32_t *footprint, FILE *output) +{ + struct starpu_perfmodel_history_list *ptr; + + ptr = per_arch_model->list; + + if (!parameter && ptr) + fprintf(output, "# hash\t\tsize\t\tflops\t\tmean (us or J)\tstddev (us or J)\t\tn\n"); + + while (ptr) + { + struct starpu_perfmodel_history_entry *entry = ptr->entry; + if (!footprint || entry->footprint == *footprint) + { + if (!parameter) + { + /* There isn't a parameter that is explicitly requested, so we display all parameters */ + fprintf(output, "%08x\t%-15lu\t%-15e\t%-15e\t%-15e\t%u\n", entry->footprint, + (unsigned long) entry->size, entry->flops, entry->mean, entry->deviation, entry->nsample); + } + else + { + /* only display the parameter that was specifically requested */ + if (strcmp(parameter, "mean") == 0) + { + fprintf(output, "%-15e\n", entry->mean); + } + + if (strcmp(parameter, "stddev") == 0) + { + fprintf(output, "%-15e\n", entry->deviation); + return; + } + } + } + + ptr = ptr->next; + } +} + +void starpu_perfmodel_print(struct starpu_perfmodel *model, struct starpu_perfmodel_arch* arch, unsigned nimpl, char *parameter, uint32_t *footprint, FILE *output) +{ + int comb = starpu_perfmodel_arch_comb_get(arch->ndevices, arch->devices); + STARPU_ASSERT(comb != -1); + + struct starpu_perfmodel_per_arch *arch_model = &model->state->per_arch[comb][nimpl]; + + if (arch_model->regression.nsample || arch_model->regression.valid || arch_model->regression.nl_valid || arch_model->list) + { + char archname[32]; + starpu_perfmodel_get_arch_name(arch, archname, 32, nimpl); + fprintf(output, "# performance model for %s\n", archname); + } + + if (parameter == NULL) + { + /* no specific parameter was requested, so we display everything */ + if (arch_model->regression.nsample) + { + fprintf(output, "\tRegression : #sample = %u\n", arch_model->regression.nsample); + } + + /* Only display the regression model if we could actually build a model */ + if (arch_model->regression.valid) + { + fprintf(output, "\tLinear: y = alpha size ^ beta\n"); + fprintf(output, "\t\talpha = %e\n", arch_model->regression.alpha); + fprintf(output, "\t\tbeta = %e\n", arch_model->regression.beta); + } + else + { + //fprintf(output, "\tLinear model is INVALID\n"); + } + + if (arch_model->regression.nl_valid) + { + fprintf(output, "\tNon-Linear: y = a size ^b + c\n"); + fprintf(output, "\t\ta = %e\n", arch_model->regression.a); + fprintf(output, "\t\tb = %e\n", arch_model->regression.b); + fprintf(output, "\t\tc = %e\n", arch_model->regression.c); + } + else + { + //fprintf(output, "\tNon-Linear model is INVALID\n"); + } + + _starpu_perfmodel_print_history_based(arch_model, parameter, footprint, output); + +#if 0 + char debugname[1024]; + starpu_perfmodel_debugfilepath(model, arch, debugname, 1024, nimpl); + _STARPU_MSG("\t debug file path : %s\n", debugname); +#endif + } + else + { + /* only display the parameter that was specifically requested */ + if (strcmp(parameter, "a") == 0) + { + fprintf(output, "%e\n", arch_model->regression.a); + return; + } + + if (strcmp(parameter, "b") == 0) + { + fprintf(output, "%e\n", arch_model->regression.b); + return; + } + + if (strcmp(parameter, "c") == 0) + { + fprintf(output, "%e\n", arch_model->regression.c); + return; + } + + if (strcmp(parameter, "alpha") == 0) + { + fprintf(output, "%e\n", arch_model->regression.alpha); + return; + } + + if (strcmp(parameter, "beta") == 0) + { + fprintf(output, "%e\n", arch_model->regression.beta); + return; + } + + if (strcmp(parameter, "path-file-debug") == 0) + { + char debugname[256]; + starpu_perfmodel_debugfilepath(model, arch, debugname, 256, nimpl); + fprintf(output, "%s\n", debugname); + return; + } + + if ((strcmp(parameter, "mean") == 0) || (strcmp(parameter, "stddev") == 0)) + { + _starpu_perfmodel_print_history_based(arch_model, parameter, footprint, output); + return; + } + + /* TODO display if it's valid ? */ + + _STARPU_ERROR("Unknown parameter requested, aborting.\n"); + } +} + +/* FIXME: Generalize to any arch */ + +int starpu_perfmodel_print_all(struct starpu_perfmodel *model, char *arch, char *parameter, uint32_t *footprint, FILE *output) +{ + _starpu_init_and_load_perfmodel(model); + if (arch == NULL) + { + int comb, impl; + for(comb = 0; comb < starpu_perfmodel_get_narch_combs(); comb++) + { + struct starpu_perfmodel_arch *arch_comb = starpu_perfmodel_arch_comb_fetch(comb); + int nimpls = model->state ? model->state->nimpls[comb] : 0; + for(impl = 0; impl < nimpls; impl++) + starpu_perfmodel_print(model, arch_comb, impl, parameter, footprint, output); + } + } + else + { + if (strcmp(arch, "cpu") == 0) + { + int implid; + struct starpu_perfmodel_arch perf_arch; + perf_arch.ndevices = 1; + _STARPU_MALLOC(perf_arch.devices, sizeof(struct starpu_perfmodel_device)); + perf_arch.devices[0].type = STARPU_CPU_WORKER; + perf_arch.devices[0].devid = 0; + perf_arch.devices[0].ncores = 1; + int comb = starpu_perfmodel_arch_comb_get(perf_arch.ndevices, perf_arch.devices); + STARPU_ASSERT(comb != -1); + int nimpls = model->state->nimpls[comb]; + for (implid = 0; implid < nimpls; implid++) + starpu_perfmodel_print(model, &perf_arch,implid, parameter, footprint, output); /* Display all codelets on cpu */ + free(perf_arch.devices); + return 0; + } + + int k; + if (sscanf(arch, "cpu:%d", &k) == 1) + { + /* For combined CPU workers */ + if ((k < 1) || (k > STARPU_MAXCPUS)) + { + _STARPU_ERROR("Invalid CPU size\n"); + } + + int implid; + struct starpu_perfmodel_arch perf_arch; + perf_arch.ndevices = 1; + _STARPU_MALLOC(perf_arch.devices, sizeof(struct starpu_perfmodel_device)); + perf_arch.devices[0].type = STARPU_CPU_WORKER; + perf_arch.devices[0].devid = 0; + perf_arch.devices[0].ncores = k; + int comb = starpu_perfmodel_arch_comb_get(perf_arch.ndevices, perf_arch.devices); + STARPU_ASSERT(comb != -1); + int nimpls = model->state->nimpls[comb]; + + for (implid = 0; implid < nimpls; implid++) + starpu_perfmodel_print(model, &perf_arch, implid, parameter, footprint, output); + free(perf_arch.devices); + return 0; + } + + if (strcmp(arch, "cuda") == 0) + { + int implid; + struct starpu_perfmodel_arch perf_arch; + + perf_arch.ndevices = 1; + _STARPU_MALLOC(perf_arch.devices, sizeof(struct starpu_perfmodel_device)); + perf_arch.devices[0].type = STARPU_CUDA_WORKER; + perf_arch.devices[0].ncores = 1; + int comb; + for(comb = 0; comb < starpu_perfmodel_get_narch_combs(); comb++) + { + struct starpu_perfmodel_arch *arch_comb = starpu_perfmodel_arch_comb_fetch(comb); + if(arch_comb->ndevices == 1 && arch_comb->devices[0].type == STARPU_CUDA_WORKER) + { + perf_arch.devices[0].devid = arch_comb->devices[0].devid; + int nimpls = model->state->nimpls[comb]; + + for (implid = 0; implid < nimpls; implid++) + starpu_perfmodel_print(model, &perf_arch, implid, parameter, footprint, output); + } + } + free(perf_arch.devices); + return 0; + } + + /* TODO: There must be a cleaner way ! */ + int gpuid; + int nmatched; + nmatched = sscanf(arch, "cuda_%d", &gpuid); + if (nmatched == 0) + nmatched = sscanf(arch, "cuda%d", &gpuid); + if (nmatched == 1) + { + struct starpu_perfmodel_arch perf_arch; + perf_arch.ndevices = 1; + _STARPU_MALLOC(perf_arch.devices, sizeof(struct starpu_perfmodel_device)); + + perf_arch.devices[0].type = STARPU_CUDA_WORKER; + perf_arch.devices[0].devid = gpuid; + perf_arch.devices[0].ncores = 1; + + int comb = starpu_perfmodel_arch_comb_get(perf_arch.ndevices, perf_arch.devices); + STARPU_ASSERT(comb != -1); + int nimpls = model->state->nimpls[comb]; + + int implid; + for (implid = 0; implid < nimpls; implid++) + starpu_perfmodel_print(model, &perf_arch, implid, parameter, footprint, output); + return 0; + } + + _STARPU_MSG("Unknown architecture requested\n"); + return -1; + } + return 0; +} + +int starpu_perfmodel_print_estimations(struct starpu_perfmodel *model, uint32_t footprint, FILE *output) +{ + unsigned workerid; + for (workerid = 0; workerid < starpu_worker_get_count(); workerid++) + { + struct starpu_perfmodel_arch* arch = starpu_worker_get_perf_archtype(workerid, STARPU_NMAX_SCHED_CTXS); + int comb = starpu_perfmodel_arch_comb_get(arch->ndevices, arch->devices); + struct starpu_perfmodel_per_arch *arch_model; + struct starpu_perfmodel_history_list *ptr = NULL; + + if (comb >= 0 && model->state->per_arch[comb]) + { + arch_model = &model->state->per_arch[comb][0]; + + for (ptr = arch_model->list; ptr; ptr = ptr->next) + { + struct starpu_perfmodel_history_entry *entry = ptr->entry; + if (entry->footprint == footprint) + { + fprintf(output, "%s%e", workerid?" ":"", entry->mean); + break; + } + } + } + if (!ptr) + { + /* Didn't find any entry :/ */ + fprintf(output, "%sinf", workerid?" ":""); + } + } + return 0; +} diff --git a/src/core/perfmodel/regression.c b/src/core/perfmodel/regression.c new file mode 100644 index 0000000..ceb2a33 --- /dev/null +++ b/src/core/perfmodel/regression.c @@ -0,0 +1,305 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include + +#define MAXREGITER 1000 +#define EPS 1.0e-10 + +/* + * For measurements close to C, we do not want to try to fit, since we are + * fitting the distance to C, which won't actually really get smaller + */ +#define C_RADIUS 1 + +/* + * smoothly ramp from 0 to 1 between 0 and 1 + * <= 0: stay 0 + * >= 1: stay 1 */ +static double level(double x) +{ + if (x <= 0.) + return 0.; + if (x >= 1.) + return 1.; + if (x < 0.5) + return -2*x*x+4*x-1; + return 2*x*x; +} + +static double fixpop(unsigned pop, double c, double y) +{ + double distance = (y-c)/c; + return pop * level((distance - C_RADIUS) / C_RADIUS); +} + +static double compute_b(double c, unsigned n, size_t *x, double *y, unsigned *pop) +{ + double b; + + /* X = log (x) , Y = log (y - c) */ + double sumxy = 0.0; + double sumx = 0.0; + double sumx2 = 0.0; + double sumy = 0.0; + double nn = 0; + + unsigned i; + for (i = 0; i < n; i++) + { + double xi = log(x[i]); + double yi = log(y[i]-c); + double popi = fixpop(pop[i], c, y[i]); + if (popi <= 0) + continue; + + sumxy += xi*yi*popi; + sumx += xi*popi; + sumx2 += xi*xi*popi; + sumy += yi*popi; + + nn += popi; + } + + b = (nn * sumxy - sumx * sumy) / (nn*sumx2 - sumx*sumx); + + return b; +} + +static double compute_a(double c, double b, unsigned n, size_t *x, double *y, unsigned *pop) +{ + double a; + + /* X = log (x) , Y = log (y - c) */ + double sumx = 0.0; + double sumy = 0.0; + double nn = 0; + + unsigned i; + for (i = 0; i < n; i++) + { + double xi = log(x[i]); + double yi = log(y[i]-c); + double popi = fixpop(pop[i], c, y[i]); + if (popi <= 0) + continue; + + sumx += xi*popi; + sumy += yi*popi; + + nn += popi; + } + + a = (sumy - b*sumx) / nn; + + return a; +} + + + +/* returns r */ +static double test_r(double c, unsigned n, size_t *x, double *y, unsigned *pop) +{ + double r; + +// printf("test c = %e\n", c); + + /* X = log (x) , Y = log (y - c) */ + double sumxy = 0.0; + double sumx = 0.0; + double sumx2 = 0.0; + double sumy = 0.0; + double sumy2 = 0.0; + double nn = 0; + + unsigned i; + for (i = 0; i < n; i++) + { + double xi = log(x[i]); + double yi = log(y[i]-c); + double popi = fixpop(pop[i], c, y[i]); + if (popi <= 0) + continue; + + // printf("Xi = %e, Yi = %e\n", xi, yi); + + sumxy += xi*yi*popi; + sumx += xi*popi; + sumx2 += xi*xi*popi; + sumy += yi*popi; + sumy2 += yi*yi*popi; + + nn += popi; + } + + //printf("sumxy %e\n", sumxy); + //printf("sumx %e\n", sumx); + //printf("sumx2 %e\n", sumx2); + //printf("sumy %e\n", sumy); + //printf("sumy2 %e\n", sumy2); + + r = (nn * sumxy - sumx * sumy) / sqrt((nn* sumx2 - sumx*sumx) * (nn*sumy2 - sumy*sumy)); + + return r; +} + +static unsigned find_list_size(struct starpu_perfmodel_history_list *list_history) +{ + unsigned cnt = 0; + + struct starpu_perfmodel_history_list *ptr = list_history; + while (ptr) + { + if (ptr->entry->nsample) + cnt++; + ptr = ptr->next; + } + + return cnt; +} + +static int compar(const void *_a, const void *_b) +{ + double a = *(double*) _a; + double b = *(double*) _b; + if (a < b) + return -1; + if (a > b) + return 1; + return 0; +} + +static double get_list_fourth(double *y, unsigned n) +{ + double sorted[n]; + + memcpy(sorted, y, n * sizeof(*sorted)); + + qsort(sorted, n, sizeof(*sorted), compar); + + return sorted[n/3]; +} + +static void dump_list(size_t *x, double *y, unsigned *pop, struct starpu_perfmodel_history_list *list_history) +{ + struct starpu_perfmodel_history_list *ptr = list_history; + unsigned i = 0; + + while (ptr) + { + if (ptr->entry->nsample) + { + x[i] = ptr->entry->size; + y[i] = ptr->entry->mean; + pop[i] = ptr->entry->nsample; + i++; + } + + ptr = ptr->next; + } +} + + +/* y = ax^b + c + * return 0 if success, -1 otherwise + * if success, a, b and c are modified + * */ + +/* See in Cedric Augonnet's PhD thesis's Appendix B for the rationale + * Scheduling Tasks over Multicore machines enhanced with Accelerators: a + * Runtime System’s Perspective */ +int _starpu_regression_non_linear_power(struct starpu_perfmodel_history_list *ptr, double *a, double *b, double *c) +{ + unsigned n = find_list_size(ptr); + if (!n) + return -1; + + size_t *x; + _STARPU_MALLOC(x, n*sizeof(size_t)); + + double *y; + _STARPU_MALLOC(y, n*sizeof(double)); + STARPU_ASSERT(y); + + unsigned *pop; + _STARPU_MALLOC(pop, n*sizeof(unsigned)); + STARPU_ASSERT(y); + + dump_list(x, y, pop, ptr); + + double cmin = 0.0; + double cmax = get_list_fourth(y, n); + + unsigned iter; + + double err = 100000.0; + +/* + unsigned i; + for (i = 0; i < 100; i++) + { + double ci = cmin + (cmax-cmin)*i/100.; + fprintf(stderr,"%f: %f\n", ci, 1.0 - test_r(ci, n, x, y, pop)); + } +*/ + + /* Use dichotomy to find c that gives the best matching */ + for (iter = 0; iter < MAXREGITER; iter++) + { + double c1, c2; + double r1, r2; + + c1 = cmin + (0.33)*(cmax - cmin); + c2 = cmin + (0.67)*(cmax - cmin); + + r1 = test_r(c1, n, x, y, pop); + r2 = test_r(c2, n, x, y, pop); + + double err1, err2; + err1 = fabs(1.0 - r1); + err2 = fabs(1.0 - r2); + + //fprintf(stderr,"%f - %f: %f - %f: %f - %f\n", cmin, c1, err1, c2, err2, cmax); + + if (err1 < err2) + { + /* 1 is better */ + cmax = c2; + } + else + { + /* 2 is better */ + cmin = c1; + } + + if (fabs(err - STARPU_MIN(err1, err2)) < EPS) + break; + + err = STARPU_MIN(err1, err2); + } + + *c = (cmin + cmax)/2; + + *b = compute_b(*c, n, x, y, pop); + *a = exp(compute_a(*c, *b, n, x, y, pop)); + + free(x); + free(y); + free(pop); + + return 0; +} diff --git a/src/core/perfmodel/regression.h b/src/core/perfmodel/regression.h new file mode 100644 index 0000000..92f7340 --- /dev/null +++ b/src/core/perfmodel/regression.h @@ -0,0 +1,34 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __REGRESSION_H__ +#define __REGRESSION_H__ + +/** @file */ + +#include +#include +#include +#include +#include + +#pragma GCC visibility push(hidden) + +int _starpu_regression_non_linear_power(struct starpu_perfmodel_history_list *ptr, double *a, double *b, double *c); + +#pragma GCC visibility pop + +#endif // __REGRESSION_H__ diff --git a/src/core/perfmodel/starpu-perfmodel.dtd b/src/core/perfmodel/starpu-perfmodel.dtd new file mode 100644 index 0000000..26ee05f --- /dev/null +++ b/src/core/perfmodel/starpu-perfmodel.dtd @@ -0,0 +1,64 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/core/progress_hook.c b/src/core/progress_hook.c new file mode 100644 index 0000000..c402568 --- /dev/null +++ b/src/core/progress_hook.c @@ -0,0 +1,116 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include + +#define NMAXHOOKS 16 + +struct progression_hook +{ + unsigned (*func)(void *arg); + void *arg; + unsigned active; +}; + +/* protect the hook table */ +static starpu_pthread_rwlock_t progression_hook_rwlock; + +static struct progression_hook hooks[NMAXHOOKS] = {{NULL, NULL, 0}}; +static int active_hook_cnt = 0; + +/* + * Statically initializing progression_hook_rwlock seems to lead to weird errors + * on Darwin, so we do it dynamically. + */ +void _starpu_init_progression_hooks(void) +{ + STARPU_PTHREAD_RWLOCK_INIT(&progression_hook_rwlock, NULL); + STARPU_HG_DISABLE_CHECKING(active_hook_cnt); +} + +int starpu_progression_hook_register(unsigned (*func)(void *arg), void *arg) +{ + int hook; + STARPU_PTHREAD_RWLOCK_WRLOCK(&progression_hook_rwlock); + for (hook = 0; hook < NMAXHOOKS; hook++) + { + if (!hooks[hook].active) + { + /* We found an empty slot */ + hooks[hook].func = func; + hooks[hook].arg = arg; + hooks[hook].active = 1; + active_hook_cnt++; + + STARPU_PTHREAD_RWLOCK_UNLOCK(&progression_hook_rwlock); + + return hook; + } + } + + STARPU_PTHREAD_RWLOCK_UNLOCK(&progression_hook_rwlock); + + starpu_wake_all_blocked_workers(); + + /* We could not find an empty slot */ + return -1; +} + +void starpu_progression_hook_deregister(int hook_id) +{ + STARPU_PTHREAD_RWLOCK_WRLOCK(&progression_hook_rwlock); + + if (hooks[hook_id].active) + active_hook_cnt--; + + hooks[hook_id].active = 0; + + STARPU_PTHREAD_RWLOCK_UNLOCK(&progression_hook_rwlock); +} + +unsigned _starpu_execute_registered_progression_hooks(void) +{ + if (active_hook_cnt == 0) + return 1; + + /* By default, it is possible to block, but if some progression hooks + * requires that it's not blocking, we disable blocking. */ + unsigned may_block = 1; + + unsigned hook; + for (hook = 0; hook < NMAXHOOKS; hook++) + { + unsigned active; + + STARPU_PTHREAD_RWLOCK_RDLOCK(&progression_hook_rwlock); + active = hooks[hook].active; + STARPU_PTHREAD_RWLOCK_UNLOCK(&progression_hook_rwlock); + + unsigned may_block_hook = 1; + + if (active) + may_block_hook = hooks[hook].func(hooks[hook].arg); + + /* As soon as one hook tells that the driver cannot be + * blocking, we don't allow it. */ + if (!may_block_hook) + may_block = 0; + } + + return may_block; +} diff --git a/src/core/progress_hook.h b/src/core/progress_hook.h new file mode 100644 index 0000000..a433351 --- /dev/null +++ b/src/core/progress_hook.h @@ -0,0 +1,30 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __PROGRESS_HOOK_H__ +#define __PROGRESS_HOOK_H__ + +#pragma GCC visibility push(hidden) + +/** @file */ + +void _starpu_init_progression_hooks(void); + +unsigned _starpu_execute_registered_progression_hooks(void); + +#pragma GCC visibility pop + +#endif /* !__PROGRESS_HOOK_H__ */ diff --git a/src/core/sched_ctx.c b/src/core/sched_ctx.c new file mode 100644 index 0000000..66f3f4b --- /dev/null +++ b/src/core/sched_ctx.c @@ -0,0 +1,2934 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2016-2016 Uppsala University + * Copyright (C) 2017-2017 Arthur Chevalier + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include +#include + +enum _starpu_ctx_change_op +{ + ctx_change_invalid = 0, + ctx_change_add = 1, + ctx_change_remove = 2 +}; +static starpu_pthread_mutex_t sched_ctx_manag = STARPU_PTHREAD_MUTEX_INITIALIZER; +static starpu_pthread_mutex_t finished_submit_mutex = STARPU_PTHREAD_MUTEX_INITIALIZER; +static struct starpu_task stop_submission_task = STARPU_TASK_INITIALIZER; +static starpu_pthread_key_t sched_ctx_key; +static unsigned with_hypervisor = 0; +static double hyp_start_sample[STARPU_NMAX_SCHED_CTXS]; +static double hyp_start_allow_sample[STARPU_NMAX_SCHED_CTXS]; +static double flops[STARPU_NMAX_SCHED_CTXS][STARPU_NMAXWORKERS]; +static size_t data_size[STARPU_NMAX_SCHED_CTXS][STARPU_NMAXWORKERS]; +static double hyp_actual_start_sample[STARPU_NMAX_SCHED_CTXS]; +static double window_size; +static int nobind; +static int occupied_sms = 0; + +static unsigned _starpu_get_first_free_sched_ctx(struct _starpu_machine_config *config); + +static void _starpu_sched_ctx_put_new_master(unsigned sched_ctx_id); +static void _starpu_sched_ctx_block_workers_in_parallel(unsigned sched_ctx_id, unsigned all); +static void _starpu_sched_ctx_unblock_workers_in_parallel(unsigned sched_ctx_id, unsigned all); +static void _starpu_sched_ctx_update_parallel_workers_with(unsigned sched_ctx_id); +static void _starpu_sched_ctx_update_parallel_workers_without(unsigned sched_ctx_id); + +static void set_priority_on_notified_workers(int *workers, int nworkers, unsigned sched_ctx_id, unsigned priority); +static void set_priority_hierarchically_on_notified_workers(int* workers_to_add, unsigned nworkers_to_add, unsigned sched_ctx, unsigned priority); +static void fetch_tasks_from_empty_ctx_list(struct _starpu_sched_ctx *sched_ctx); +static void add_notified_workers(int *workers_to_add, int nworkers_to_add, unsigned sched_ctx_id); + +/* reused from combined_workers.c */ +static int compar_int(const void *pa, const void *pb) +{ + int a = *((int *)pa); + int b = *((int *)pb); + + return a - b; +} + +/* reused from combined_workers.c */ +static void sort_workerid_array(int nworkers, int workerid_array[]) +{ + qsort(workerid_array, nworkers, sizeof(int), compar_int); +} + +/* notify workers that a ctx change operation is about to proceed. + * + * workerids must be sorted by ascending id + * + * Once this function returns, the notified workers must not start a new + * scheduling operation until they are notified that the ctx change op is + * done. + */ +static void notify_workers_about_changing_ctx_pending(const unsigned nworkers, const int * const workerids) +{ + STARPU_ASSERT(!_starpu_worker_sched_op_pending()); + const int cur_workerid = _starpu_worker_get_id(); + unsigned i; + for (i=0; i workerids[i-1])); + if (starpu_worker_is_combined_worker(workerids[i])) + continue; + if (workerids[i] == cur_workerid) + continue; + struct _starpu_worker *worker = _starpu_get_worker_struct(workerids[i]); + STARPU_PTHREAD_MUTEX_LOCK_SCHED(&worker->sched_mutex); + _starpu_worker_enter_changing_ctx_op(worker); + STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&worker->sched_mutex); + } +} + +/* notify workers that a ctx change operation is complete. + * + * workerids must be sorted by ascending id + * + * Once this function returns, the workers may proceed with scheduling operations again. + */ +static void notify_workers_about_changing_ctx_done(const unsigned nworkers, const int * const workerids) +{ + STARPU_ASSERT(!_starpu_worker_sched_op_pending()); + const int cur_workerid = _starpu_worker_get_id(); + unsigned i; + for (i=0; i workerids[i-1])); + if (starpu_worker_is_combined_worker(workerids[i])) + continue; + if (workerids[i] == cur_workerid) + continue; + struct _starpu_worker *worker = _starpu_get_worker_struct(workerids[i]); + STARPU_PTHREAD_MUTEX_LOCK_SCHED(&worker->sched_mutex); + _starpu_worker_leave_changing_ctx_op(worker); + STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&worker->sched_mutex); + } +} + +static void _starpu_worker_gets_into_ctx(unsigned sched_ctx_id, struct _starpu_worker *worker) +{ + unsigned ret_sched_ctx = _starpu_sched_ctx_elt_exists(worker->sched_ctx_list, sched_ctx_id); + /* the worker was planning to go away in another ctx but finally he changed his mind & + he's staying */ + if (!ret_sched_ctx) + { + /* add context to worker */ + _starpu_sched_ctx_list_add(&worker->sched_ctx_list, sched_ctx_id); + worker->nsched_ctxs++; + } + worker->removed_from_ctx[sched_ctx_id] = 0; + if(worker->tmp_sched_ctx == (int) sched_ctx_id) + worker->tmp_sched_ctx = -1; + return; +} + +void _starpu_worker_gets_out_of_ctx(unsigned sched_ctx_id, struct _starpu_worker *worker) +{ + unsigned ret_sched_ctx = _starpu_sched_ctx_elt_exists(worker->sched_ctx_list, sched_ctx_id); + /* remove context from worker */ + if(ret_sched_ctx) + { + /* don't remove scheduling data here, there might be tasks running and when post_exec + executes scheduling data is not there any more, do it when deleting context, then + we really won't need it anymore */ + /* struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); */ + /* if(sched_ctx && sched_ctx->sched_policy && sched_ctx->sched_policy->remove_workers) */ + /* { */ + /* _STARPU_SCHED_BEGIN; */ + /* sched_ctx->sched_policy->remove_workers(sched_ctx_id, &worker->workerid, 1); */ + /* _STARPU_SCHED_END; */ + /* } */ + if (!_starpu_sched_ctx_list_remove(&worker->sched_ctx_list, sched_ctx_id)) + worker->nsched_ctxs--; + } + return; +} + +#if 0 +static void _starpu_update_workers_with_ctx(int *workerids, int nworkers, int sched_ctx_id) +{ + int i; + struct _starpu_worker *worker = NULL; + + for(i = 0; i < nworkers; i++) + { + worker = _starpu_get_worker_struct(workerids[i]); + + STARPU_PTHREAD_MUTEX_LOCK_SCHED(&worker->sched_mutex); + _starpu_worker_gets_into_ctx(sched_ctx_id, worker); + STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&worker->sched_mutex); + } + + return; +} +#endif + +static void _starpu_update_notified_workers_with_ctx(int *workerids, int nworkers, int sched_ctx_id) +{ + int i; + + for(i = 0; i < nworkers; i++) + { + struct _starpu_worker *worker; + worker = _starpu_get_worker_struct(workerids[i]); + _starpu_worker_gets_into_ctx(sched_ctx_id, worker); + } + + return; +} + +#if 0 +static void _starpu_update_workers_without_ctx(int *workerids, int nworkers, int sched_ctx_id, unsigned now) +{ + int i; + struct _starpu_worker *worker = NULL; + + for(i = 0; i < nworkers; i++) + { + worker = _starpu_get_worker_struct(workerids[i]); + if(now) + { + STARPU_PTHREAD_MUTEX_LOCK_SCHED(&worker->sched_mutex); + _starpu_worker_gets_out_of_ctx(sched_ctx_id, worker); + STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&worker->sched_mutex); + } + else + { + STARPU_PTHREAD_MUTEX_LOCK_SCHED(&worker->sched_mutex); + worker->removed_from_ctx[sched_ctx_id] = 1; + STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&worker->sched_mutex); + } + } + return; +} +#endif + +static void _starpu_update_notified_workers_without_ctx(int *workerids, int nworkers, int sched_ctx_id, unsigned now) +{ + int i; + + for(i = 0; i < nworkers; i++) + { + struct _starpu_worker *worker; + worker = _starpu_get_worker_struct(workerids[i]); + if(now) + { + _starpu_worker_gets_out_of_ctx(sched_ctx_id, worker); + } + else + { + worker->removed_from_ctx[sched_ctx_id] = 1; + } + } + return; +} + +void starpu_sched_ctx_stop_task_submission() +{ + _starpu_exclude_task_from_dag(&stop_submission_task); + int ret = _starpu_task_submit_internally(&stop_submission_task); + STARPU_ASSERT(!ret); +} + +/* must be called with sched_mutex locked */ +void starpu_sched_ctx_worker_shares_tasks_lists(int workerid, int sched_ctx_id) +{ + struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); + worker->shares_tasks_lists[sched_ctx_id] = 1; +} + +static void _do_add_notified_workers(struct _starpu_sched_ctx *sched_ctx, int *workerids, int nworkers) +{ + int ndevices = 0; + struct starpu_perfmodel_device devices[nworkers]; + int i = 0; + for(i = 0; i < nworkers; i++) + { + int workerid = workerids[i]; + if (workerid >= (int) starpu_worker_get_count()) + /* Combined worker, don't care */ + continue; + struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); + int dev1, dev2; + unsigned found = 0; + for(dev1 = 0; dev1 < worker->perf_arch.ndevices; dev1++) + { + for(dev2 = 0; dev2 < ndevices; dev2++) + { + if(devices[dev2].type == worker->perf_arch.devices[dev1].type && + devices[dev2].devid == worker->perf_arch.devices[dev1].devid) + { + devices[dev2].ncores += worker->perf_arch.devices[dev1].ncores; + found = 1; + break; + } + } + if(!found) + { + devices[ndevices].type = worker->perf_arch.devices[dev1].type; + devices[ndevices].devid = worker->perf_arch.devices[dev1].devid; + devices[ndevices].ncores = worker->perf_arch.devices[dev1].ncores; + ndevices++; + } + else + found = 0; + } + } + + if(ndevices > 0) + { + + if(sched_ctx->perf_arch.devices == NULL) + { + _STARPU_MALLOC(sched_ctx->perf_arch.devices, ndevices*sizeof(struct starpu_perfmodel_device)); + } + else + { + int nfinal_devices = 0; + int dev1, dev2; + unsigned found = 0; + for(dev1 = 0; dev1 < ndevices; dev1++) + { + for(dev2 = 0; dev2 < sched_ctx->perf_arch.ndevices; dev2++) + { + if(sched_ctx->perf_arch.devices[dev2].type == devices[dev1].type && sched_ctx->perf_arch.devices[dev2].devid == devices[dev1].devid) + found = 1; + } + + if(!found) + { + nfinal_devices++; + } + else + found = 0; + + } + + + int nsize = (sched_ctx->perf_arch.ndevices+nfinal_devices); + _STARPU_REALLOC(sched_ctx->perf_arch.devices, nsize*sizeof(struct starpu_perfmodel_device)); + + } + + int dev1, dev2; + unsigned found = 0; + for(dev1 = 0; dev1 < ndevices; dev1++) + { + for(dev2 = 0; dev2 < sched_ctx->perf_arch.ndevices; dev2++) + { + if(sched_ctx->perf_arch.devices[dev2].type == devices[dev1].type && sched_ctx->perf_arch.devices[dev2].devid == devices[dev1].devid) + { + if(sched_ctx->perf_arch.devices[dev2].type == STARPU_CPU_WORKER) + sched_ctx->perf_arch.devices[dev2].ncores += devices[dev1].ncores; + + found = 1; + } + } + + if(!found) + { + sched_ctx->perf_arch.devices[sched_ctx->perf_arch.ndevices].type = devices[dev1].type; + sched_ctx->perf_arch.devices[sched_ctx->perf_arch.ndevices].devid = devices[dev1].devid; + if (sched_ctx->stream_worker != -1) + sched_ctx->perf_arch.devices[sched_ctx->perf_arch.ndevices].ncores = sched_ctx->nsms; + else + sched_ctx->perf_arch.devices[sched_ctx->perf_arch.ndevices].ncores = devices[dev1].ncores; + sched_ctx->perf_arch.ndevices++; + } + else + found = 0; + + } + } + + + _starpu_sched_ctx_update_parallel_workers_with(sched_ctx->id); + +} + +static void _starpu_add_workers_to_new_sched_ctx(struct _starpu_sched_ctx *sched_ctx, int *workerids, int nworkers) +{ + struct starpu_worker_collection *workers = sched_ctx->workers; + struct _starpu_machine_config *config = _starpu_get_machine_config(); + if (nworkers == -1) + nworkers = config->topology.nworkers; + if (!nworkers) + return; + int _workerids[nworkers]; + int i; + if (workerids == NULL) + { + for(i = 0; i < nworkers; i++) + _workerids[i] = i; + workerids = _workerids; + } + for(i = 0; i < nworkers; i++) + { + int workerid = workerids[i]; + { + workers->add(workers, workerid); + } + struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); + STARPU_PTHREAD_MUTEX_LOCK_SCHED(&worker->sched_mutex); + worker->tmp_sched_ctx = (int)sched_ctx->id; + STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&worker->sched_mutex); + } + + sort_workerid_array(nworkers, workerids); + notify_workers_about_changing_ctx_pending(nworkers, workerids); + _do_add_notified_workers(sched_ctx, workerids, nworkers); + if(sched_ctx->sched_policy && sched_ctx->sched_policy->add_workers) + { + _STARPU_SCHED_BEGIN; + sched_ctx->sched_policy->add_workers(sched_ctx->id, workerids, nworkers); + _STARPU_SCHED_END; + } + notify_workers_about_changing_ctx_done(nworkers, workerids); +} + +static void _starpu_remove_workers_from_sched_ctx(struct _starpu_sched_ctx *sched_ctx, int *workerids, + int nworkers, int *removed_workers, int *n_removed_workers) +{ + struct starpu_worker_collection *workers = sched_ctx->workers; + + struct starpu_perfmodel_device devices[workers->nworkers]; + int ndevices = 0; + + int i = 0; + for(i = 0; i < nworkers; i++) + { + if(workers->nworkers > 0) + { + if(_starpu_worker_belongs_to_a_sched_ctx(workerids[i], sched_ctx->id)) + { + int worker = workers->remove(workers, workerids[i]); + if(worker >= 0) + removed_workers[(*n_removed_workers)++] = worker; + } + } + } + + unsigned found = 0; + int dev; + struct starpu_sched_ctx_iterator it; + if(workers->init_iterator) + workers->init_iterator(workers, &it); + + while(workers->has_next(workers, &it)) + { + int worker = workers->get_next(workers, &it); + struct _starpu_worker *str_worker = _starpu_get_worker_struct(worker); + for(dev = 0; dev < str_worker->perf_arch.ndevices; dev++) + { + int dev2; + for(dev2 = 0; dev2 < ndevices; dev2++) + { + if(devices[dev2].type == str_worker->perf_arch.devices[dev].type && + devices[dev2].devid == str_worker->perf_arch.devices[dev].devid) + { + if(devices[dev2].type == STARPU_CPU_WORKER) + devices[dev2].ncores += str_worker->perf_arch.devices[dev].ncores; + } + + found = 1; + } + if(!found) + { + devices[ndevices].type = str_worker->perf_arch.devices[dev].type; + devices[ndevices].devid = str_worker->perf_arch.devices[dev].devid; + devices[ndevices].ncores = str_worker->perf_arch.devices[dev].ncores; + ndevices++; + } + else + found = 0; + } + found = 0; + + } + sched_ctx->perf_arch.ndevices = ndevices; + for(dev = 0; dev < ndevices; dev++) + { + sched_ctx->perf_arch.devices[dev].type = devices[dev].type; + sched_ctx->perf_arch.devices[dev].devid = devices[dev].devid; + sched_ctx->perf_arch.devices[dev].ncores = devices[dev].ncores; + } + + _starpu_sched_ctx_update_parallel_workers_without(sched_ctx->id); + + return; +} + +static void _starpu_sched_ctx_free_scheduling_data(struct _starpu_sched_ctx *sched_ctx) +{ + if(sched_ctx->sched_policy && sched_ctx->sched_policy->remove_workers) + { + int *workerids = NULL; + + unsigned nworkers_ctx = starpu_sched_ctx_get_workers_list(sched_ctx->id, &workerids); + + if(nworkers_ctx > 0) + { + _STARPU_SCHED_BEGIN; + sched_ctx->sched_policy->remove_workers(sched_ctx->id, workerids, nworkers_ctx); + _STARPU_SCHED_END; + } + + free(workerids); + } + return; + +} + +#ifdef STARPU_HAVE_HWLOC +static void _starpu_sched_ctx_create_hwloc_tree(struct _starpu_sched_ctx *sched_ctx) +{ + sched_ctx->hwloc_workers_set = hwloc_bitmap_alloc(); + + struct starpu_worker_collection *workers = sched_ctx->workers; + struct _starpu_worker *worker; + struct starpu_sched_ctx_iterator it; + + workers->init_iterator(workers, &it); + while(workers->has_next(workers, &it)) + { + unsigned workerid = workers->get_next(workers, &it); + if(!starpu_worker_is_combined_worker(workerid)) + { + worker = _starpu_get_worker_struct(workerid); + hwloc_bitmap_or(sched_ctx->hwloc_workers_set, + sched_ctx->hwloc_workers_set, + worker->hwloc_cpu_set); + } + + } + return; +} +#endif + +/* Must be called with sched_ctx_manag mutex held */ +struct _starpu_sched_ctx* _starpu_create_sched_ctx(struct starpu_sched_policy *policy, int *workerids, + int nworkers_ctx, unsigned is_initial_sched, + const char *sched_ctx_name, + int min_prio_set, int min_prio, + int max_prio_set, int max_prio, + unsigned awake_workers, + void (*sched_policy_callback)(unsigned), + void * user_data, + int nsub_ctxs, int *sub_ctxs, int nsms) +{ + struct _starpu_machine_config *config = _starpu_get_machine_config(); + + STARPU_ASSERT_MSG_ALWAYS(config->topology.nsched_ctxs < STARPU_NMAX_SCHED_CTXS, "There is too many sched_ctx %d, only %d are configured", config->topology.nsched_ctxs, STARPU_NMAX_SCHED_CTXS); + + unsigned id = _starpu_get_first_free_sched_ctx(config); + + struct _starpu_sched_ctx *sched_ctx = &config->sched_ctxs[id]; + STARPU_ASSERT(sched_ctx->do_schedule == 0); + sched_ctx->id = id; + + int nworkers = config->topology.nworkers; + int i; + + STARPU_ASSERT(nworkers_ctx <= nworkers); + + starpu_task_list_init(&sched_ctx->empty_ctx_tasks); + + starpu_task_list_init(&sched_ctx->waiting_tasks); + + if (policy) + { + _STARPU_MALLOC(sched_ctx->sched_policy, sizeof(struct starpu_sched_policy)); + } + else + { + sched_ctx->sched_policy = NULL; + } + sched_ctx->is_initial_sched = is_initial_sched; + sched_ctx->name = sched_ctx_name; + sched_ctx->inheritor = STARPU_GLOBAL_SCHED_CTX; + sched_ctx->finished_submit = 0; + sched_ctx->min_priority_is_set = min_prio_set; + if (sched_ctx->min_priority_is_set) + sched_ctx->min_priority = min_prio; + else + sched_ctx->min_priority = 0; + + sched_ctx->max_priority_is_set = max_prio_set; + if (sched_ctx->max_priority_is_set) + sched_ctx->max_priority = max_prio; + else + sched_ctx->max_priority = 0; + + _starpu_barrier_counter_init(&sched_ctx->tasks_barrier, 0); + _starpu_barrier_counter_init(&sched_ctx->ready_tasks_barrier, 0); + + sched_ctx->ready_flops = 0.0; + for (i = 0; i < (int) (sizeof(sched_ctx->iterations)/sizeof(sched_ctx->iterations[0])); i++) + sched_ctx->iterations[i] = -1; + sched_ctx->iteration_level = 0; + sched_ctx->main_master = -1; + sched_ctx->perf_arch.devices = NULL; + sched_ctx->perf_arch.ndevices = 0; + sched_ctx->callback_sched = sched_policy_callback; + sched_ctx->user_data = user_data; + sched_ctx->sms_start_idx = 0; + sched_ctx->sms_end_idx = STARPU_NMAXSMS; + sched_ctx->nsms = nsms; + sched_ctx->stream_worker = -1; + memset(&sched_ctx->lock_write_owner, 0, sizeof(sched_ctx->lock_write_owner)); + STARPU_PTHREAD_RWLOCK_INIT(&sched_ctx->rwlock, NULL); + if(nsms > 0) + { + STARPU_ASSERT_MSG(workerids, "workerids is needed when setting nsms"); + sched_ctx->sms_start_idx = occupied_sms; + sched_ctx->sms_end_idx = occupied_sms+nsms; + occupied_sms += nsms; + _STARPU_DEBUG("ctx %u: stream worker %d nsms %d occupied sms %d\n", sched_ctx->id, workerids[0], nsms, occupied_sms); + STARPU_ASSERT_MSG_ALWAYS(occupied_sms <= STARPU_NMAXSMS , "STARPU:requested more sms than available"); + _starpu_worker_set_stream_ctx(workerids[0], sched_ctx); + sched_ctx->stream_worker = workerids[0]; + } + + sched_ctx->nesting_sched_ctx = STARPU_NMAX_SCHED_CTXS; + sched_ctx->nsub_ctxs = 0; + sched_ctx->parallel_view = 0; + + /*init the strategy structs and the worker_collection of the resources of the context */ + if(policy) + { + _starpu_init_sched_policy(config, sched_ctx, policy); + sched_ctx->awake_workers = 1; + } + else + { + sched_ctx->awake_workers = awake_workers; + starpu_sched_ctx_create_worker_collection(sched_ctx->id, STARPU_WORKER_LIST); + } + + /*add sub_ctxs before add workers, in order to be able to associate them if necessary */ + if(nsub_ctxs != 0) + { + for(i = 0; i < nsub_ctxs; i++) + sched_ctx->sub_ctxs[i] = sub_ctxs[i]; + sched_ctx->nsub_ctxs = nsub_ctxs; + } + + /* starpu_do_schedule() starts to consider the new sched_ctx for scheduling + * once 'sched_cts->do_schedule == 1' becomes visible. + * Make sure the sched_ctx struct and the policy struct initialization are complete at this time. */ + STARPU_WMB(); + sched_ctx->do_schedule = 1; + + _starpu_add_workers_to_new_sched_ctx(sched_ctx, workerids, nworkers_ctx); + +#ifdef STARPU_HAVE_HWLOC + /* build hwloc tree of the context */ + _starpu_sched_ctx_create_hwloc_tree(sched_ctx); +#endif //STARPU_HAVE_HWLOC + + /* if we create the initial big sched ctx we can update workers' status here + because they haven't been launched yet */ + if(is_initial_sched) + { + for(i = 0; i < nworkers; i++) + { + struct _starpu_worker *worker = _starpu_get_worker_struct(i); + if(!_starpu_sched_ctx_list_add(&worker->sched_ctx_list, sched_ctx->id)) + worker->nsched_ctxs++; + } + } + + (void)STARPU_ATOMIC_ADD(&config->topology.nsched_ctxs,1); + + return sched_ctx; +} + +int starpu_sched_ctx_get_nsms(unsigned sched_ctx) +{ + struct _starpu_sched_ctx *sc = _starpu_get_sched_ctx_struct(sched_ctx); + return sc->nsms; +} + +void starpu_sched_ctx_get_sms_interval(int stream_workerid, int *start, int *end) +{ + struct _starpu_sched_ctx *sc = _starpu_worker_get_ctx_stream(stream_workerid); + *start = sc->sms_start_idx; + *end = sc->sms_end_idx; +} + +int starpu_sched_ctx_get_sub_ctxs(unsigned sched_ctx, int *ctxs) +{ + struct _starpu_sched_ctx *sc = _starpu_get_sched_ctx_struct(sched_ctx); + int i; + for(i = 0; i < sc->nsub_ctxs; i++) + ctxs[i] = sc->sub_ctxs[i]; + return sc->nsub_ctxs; +} + +int starpu_sched_ctx_get_stream_worker(unsigned sub_ctx) +{ + struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sub_ctx); + struct starpu_worker_collection *workers = sched_ctx->workers; + + struct starpu_sched_ctx_iterator it; + int worker = -1; + + workers->init_iterator(workers, &it); + if(workers->has_next(workers, &it)) + { + worker = workers->get_next(workers, &it); + } + + return worker; +} + +unsigned starpu_sched_ctx_create(int *workerids, int nworkers, const char *sched_ctx_name, ...) +{ + STARPU_PTHREAD_MUTEX_LOCK(&sched_ctx_manag); + va_list varg_list; + int arg_type; + int min_prio_set = 0; + int max_prio_set = 0; + int min_prio = 0; + int max_prio = 0; + int nsms = 0; + int *sub_ctxs = NULL; + int nsub_ctxs = 0; + void *user_data = NULL; + struct starpu_sched_policy *sched_policy = NULL; + unsigned hierarchy_level = 0; + unsigned nesting_sched_ctx = STARPU_NMAX_SCHED_CTXS; + unsigned awake_workers = 0; + void (*init_sched)(unsigned) = NULL; + + va_start(varg_list, sched_ctx_name); + while ((arg_type = va_arg(varg_list, int)) != 0) + { + if (arg_type == STARPU_SCHED_CTX_POLICY_NAME) + { + char *policy_name = va_arg(varg_list, char *); + struct _starpu_machine_config *config = _starpu_get_machine_config(); + sched_policy = _starpu_select_sched_policy(config, policy_name); + } + else if (arg_type == STARPU_SCHED_CTX_POLICY_STRUCT) + { + sched_policy = va_arg(varg_list, struct starpu_sched_policy *); + } + else if (arg_type == STARPU_SCHED_CTX_POLICY_MIN_PRIO) + { + min_prio = va_arg(varg_list, int); + min_prio_set = 1; + } + else if (arg_type == STARPU_SCHED_CTX_POLICY_MAX_PRIO) + { + max_prio = va_arg(varg_list, int); + max_prio_set = 1; + } + else if (arg_type == STARPU_SCHED_CTX_HIERARCHY_LEVEL) + { + hierarchy_level = va_arg(varg_list, unsigned); + } + else if (arg_type == STARPU_SCHED_CTX_NESTED) + { + nesting_sched_ctx = va_arg(varg_list, unsigned); + } + else if (arg_type == STARPU_SCHED_CTX_AWAKE_WORKERS) + { + awake_workers = 1; + } + else if (arg_type == STARPU_SCHED_CTX_POLICY_INIT) + { +#ifdef __NVCOMPILER + init_sched = (void(*)(unsigned))va_arg(varg_list, void *); +#else + init_sched = va_arg(varg_list, void(*)(unsigned)); +#endif + } + else if (arg_type == STARPU_SCHED_CTX_USER_DATA) + { + user_data = va_arg(varg_list, void *); + } + else if (arg_type == STARPU_SCHED_CTX_SUB_CTXS) + { + sub_ctxs = va_arg(varg_list, int*); + nsub_ctxs = va_arg(varg_list, int); + } + else if (arg_type == STARPU_SCHED_CTX_CUDA_NSMS) + { + nsms = va_arg(varg_list, int); + } + else + { + STARPU_ABORT_MSG("Unrecognized argument %d\n", arg_type); + } + + } + va_end(varg_list); + + /* Make sure the user doesn't use invalid worker IDs. */ + int num_workers = starpu_worker_get_count(); + int i; + for (i = 0; i < nworkers; i++) + { + if (workerids[i] < 0 || workerids[i] >= num_workers) + { + _STARPU_ERROR("Invalid worker ID (%d) specified!\n", workerids[i]); + STARPU_PTHREAD_MUTEX_UNLOCK(&sched_ctx_manag); + return STARPU_NMAX_SCHED_CTXS; + } + } + + struct _starpu_sched_ctx *sched_ctx; + sched_ctx = _starpu_create_sched_ctx(sched_policy, workerids, nworkers, 0, sched_ctx_name, min_prio_set, min_prio, max_prio_set, max_prio, awake_workers, init_sched, user_data, nsub_ctxs, sub_ctxs, nsms); + sched_ctx->hierarchy_level = hierarchy_level; + sched_ctx->nesting_sched_ctx = nesting_sched_ctx; + + int *added_workerids; + unsigned nw_ctx = starpu_sched_ctx_get_workers_list(sched_ctx->id, &added_workerids); + sort_workerid_array(nw_ctx, added_workerids); + notify_workers_about_changing_ctx_pending(nw_ctx, added_workerids); + _starpu_sched_ctx_lock_write(sched_ctx->id); + _starpu_update_notified_workers_with_ctx(added_workerids, nw_ctx, sched_ctx->id); + notify_workers_about_changing_ctx_done(nw_ctx, added_workerids); + _starpu_sched_ctx_unlock_write(sched_ctx->id); + free(added_workerids); +#ifdef STARPU_USE_SC_HYPERVISOR + sched_ctx->perf_counters = NULL; +#endif + STARPU_PTHREAD_MUTEX_UNLOCK(&sched_ctx_manag); + return sched_ctx->id; +} + +int fstarpu_sched_ctx_create(int *workerids, int nworkers, const char *sched_ctx_name, void **arglist) +{ + STARPU_PTHREAD_MUTEX_LOCK(&sched_ctx_manag); + int arg_i = 0; + int min_prio_set = 0; + int max_prio_set = 0; + int min_prio = 0; + int max_prio = 0; + int nsms = 0; + int *sub_ctxs = NULL; + int nsub_ctxs = 0; + void *user_data = NULL; + struct starpu_sched_policy *sched_policy = NULL; + unsigned hierarchy_level = 0; + unsigned nesting_sched_ctx = STARPU_NMAX_SCHED_CTXS; + unsigned awake_workers = 0; + void (*init_sched)(unsigned) = NULL; + + while (arglist[arg_i] != NULL) + { + const int arg_type = (int)(intptr_t)arglist[arg_i]; + if (arg_type == STARPU_SCHED_CTX_POLICY_NAME) + { + arg_i++; + char *policy_name = arglist[arg_i]; + struct _starpu_machine_config *config = _starpu_get_machine_config(); + sched_policy = _starpu_select_sched_policy(config, policy_name); + } + else if (arg_type == STARPU_SCHED_CTX_POLICY_STRUCT) + { + arg_i++; + sched_policy = arglist[arg_i]; + } + else if (arg_type == STARPU_SCHED_CTX_POLICY_MIN_PRIO) + { + arg_i++; + min_prio = *(int *)arglist[arg_i]; + min_prio_set = 1; + } + else if (arg_type == STARPU_SCHED_CTX_POLICY_MAX_PRIO) + { + arg_i++; + max_prio = *(int *)arglist[arg_i]; + max_prio_set = 1; + } + else if (arg_type == STARPU_SCHED_CTX_HIERARCHY_LEVEL) + { + arg_i++; + int val = *(int *)arglist[arg_i]; + STARPU_ASSERT(val >= 0); + hierarchy_level = (unsigned)val; + } + else if (arg_type == STARPU_SCHED_CTX_NESTED) + { + arg_i++; + int val = *(int *)arglist[arg_i]; + STARPU_ASSERT(val >= 0); + nesting_sched_ctx = (unsigned)val; + } + else if (arg_type == STARPU_SCHED_CTX_AWAKE_WORKERS) + { + awake_workers = 1; + } + else if (arg_type == STARPU_SCHED_CTX_POLICY_INIT) + { + arg_i++; + init_sched = arglist[arg_i]; + } + else if (arg_type == STARPU_SCHED_CTX_USER_DATA) + { + arg_i++; + user_data = arglist[arg_i]; + } + else if (arg_type == STARPU_SCHED_CTX_SUB_CTXS) + { + arg_i++; + sub_ctxs = (int*)arglist[arg_i]; + arg_i++; + nsub_ctxs = *(int*)arglist[arg_i]; + } + else if (arg_type == STARPU_SCHED_CTX_CUDA_NSMS) + { + arg_i++; + nsms = *(int*)arglist[arg_i]; + } + + else + { + STARPU_ABORT_MSG("Unrecognized argument %d\n", arg_type); + } + arg_i++; + } + + if (workerids && nworkers != -1) + { + /* Make sure the user doesn't use invalid worker IDs. */ + int num_workers = starpu_worker_get_count(); + int i; + for (i = 0; i < nworkers; i++) + { + if (workerids[i] < 0 || workerids[i] >= num_workers) + { + _STARPU_ERROR("Invalid worker ID (%d) specified!\n", workerids[i]); + STARPU_PTHREAD_MUTEX_UNLOCK(&sched_ctx_manag); + return STARPU_NMAX_SCHED_CTXS; + } + } + } + + struct _starpu_sched_ctx *sched_ctx; + sched_ctx = _starpu_create_sched_ctx(sched_policy, workerids, nworkers, 0, sched_ctx_name, min_prio_set, min_prio, max_prio_set, max_prio, awake_workers, init_sched, user_data, nsub_ctxs, sub_ctxs, nsms); + sched_ctx->hierarchy_level = hierarchy_level; + sched_ctx->nesting_sched_ctx = nesting_sched_ctx; + + int *added_workerids; + unsigned nw_ctx = starpu_sched_ctx_get_workers_list(sched_ctx->id, &added_workerids); + sort_workerid_array(nw_ctx, added_workerids); + notify_workers_about_changing_ctx_pending(nw_ctx, added_workerids); + _starpu_sched_ctx_lock_write(sched_ctx->id); + _starpu_update_notified_workers_with_ctx(added_workerids, nw_ctx, sched_ctx->id); + notify_workers_about_changing_ctx_done(nw_ctx, added_workerids); + _starpu_sched_ctx_unlock_write(sched_ctx->id); + free(added_workerids); +#ifdef STARPU_USE_SC_HYPERVISOR + sched_ctx->perf_counters = NULL; +#endif + STARPU_PTHREAD_MUTEX_UNLOCK(&sched_ctx_manag); + return (int)sched_ctx->id; +} + +void starpu_sched_ctx_register_close_callback(unsigned sched_ctx_id, void (*close_callback)(unsigned sched_ctx_id, void* args), void *args) +{ + struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); + sched_ctx->close_callback = close_callback; + sched_ctx->close_args = args; + return; +} + +#ifdef STARPU_USE_SC_HYPERVISOR +void starpu_sched_ctx_set_perf_counters(unsigned sched_ctx_id, void* perf_counters) +{ + struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); + sched_ctx->perf_counters = (struct starpu_sched_ctx_performance_counters *)perf_counters; + return; +} +#endif + +/* + * free all structures for the context + * Must be called with sched_ctx_manag mutex held +*/ +static void _starpu_delete_sched_ctx(struct _starpu_sched_ctx *sched_ctx) +{ + STARPU_ASSERT(sched_ctx->id != STARPU_NMAX_SCHED_CTXS); + STARPU_ASSERT(sched_ctx->do_schedule == 1); + sched_ctx->do_schedule = 0; + struct _starpu_machine_config *config = _starpu_get_machine_config(); + if(sched_ctx->sched_policy) + { + _starpu_deinit_sched_policy(sched_ctx); + free(sched_ctx->sched_policy); + sched_ctx->sched_policy = NULL; + } + else + { + starpu_sched_ctx_delete_worker_collection(sched_ctx->id); + } + + if (sched_ctx->perf_arch.devices) + { + free(sched_ctx->perf_arch.devices); + sched_ctx->perf_arch.devices = NULL; + } + + sched_ctx->min_priority_is_set = 0; + sched_ctx->max_priority_is_set = 0; + sched_ctx->id = STARPU_NMAX_SCHED_CTXS; +#ifdef STARPU_HAVE_HWLOC + hwloc_bitmap_free(sched_ctx->hwloc_workers_set); +#endif //STARPU_HAVE_HWLOC + + config->topology.nsched_ctxs--; +} + +void starpu_sched_ctx_delete(unsigned sched_ctx_id) +{ + STARPU_PTHREAD_MUTEX_LOCK(&sched_ctx_manag); + struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); + STARPU_ASSERT(sched_ctx); + +#ifdef STARPU_USE_SC_HYPERVISOR + if (sched_ctx_id != 0 && sched_ctx_id != STARPU_NMAX_SCHED_CTXS && sched_ctx->perf_counters != NULL) + { + _STARPU_TRACE_HYPERVISOR_BEGIN(); + sched_ctx->perf_counters->notify_delete_context(sched_ctx_id); + _STARPU_TRACE_HYPERVISOR_END(); + } +#endif //STARPU_USE_SC_HYPERVISOR + + _starpu_sched_ctx_lock_write(sched_ctx_id); + + unsigned inheritor_sched_ctx_id = sched_ctx->inheritor; + struct _starpu_sched_ctx *inheritor_sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx->inheritor); + _starpu_sched_ctx_lock_write(inheritor_sched_ctx_id); + + STARPU_ASSERT(sched_ctx->id != STARPU_NMAX_SCHED_CTXS); + + int i; + for(i = 0; i < STARPU_NMAX_SCHED_CTXS; i++) + { + struct _starpu_sched_ctx *psched_ctx = _starpu_get_sched_ctx_struct(i); + if (psched_ctx->inheritor == sched_ctx_id) + { + _starpu_sched_ctx_lock_write(i); + psched_ctx->inheritor = inheritor_sched_ctx_id; + _starpu_sched_ctx_unlock_write(i); + } + } + + int *workerids; + unsigned nworkers_ctx = starpu_sched_ctx_get_workers_list(sched_ctx->id, &workerids); + int backup_workerids[nworkers_ctx]; + memcpy(backup_workerids, workerids, nworkers_ctx*sizeof(backup_workerids[0])); + sort_workerid_array(nworkers_ctx, backup_workerids); + notify_workers_about_changing_ctx_pending(nworkers_ctx, backup_workerids); + + /*if both of them have all the resources is pointless*/ + /*trying to transfer resources from one ctx to the other*/ + struct _starpu_machine_config *config = _starpu_get_machine_config(); + unsigned nworkers = config->topology.nworkers; + + if(nworkers_ctx > 0 && inheritor_sched_ctx && inheritor_sched_ctx->id != STARPU_NMAX_SCHED_CTXS && + !(nworkers_ctx == nworkers && nworkers_ctx == inheritor_sched_ctx->workers->nworkers)) + { + add_notified_workers(workerids, nworkers_ctx, inheritor_sched_ctx_id); + } + notify_workers_about_changing_ctx_done(nworkers_ctx, backup_workerids); + _starpu_sched_ctx_unlock_write(sched_ctx_id); + int wait_status = _starpu_wait_for_all_tasks_of_sched_ctx(sched_ctx_id); + _starpu_sched_ctx_lock_write(sched_ctx_id); + notify_workers_about_changing_ctx_pending(nworkers_ctx, backup_workerids); + if(!wait_status) + { + if(!sched_ctx->sched_policy) + _starpu_sched_ctx_unblock_workers_in_parallel(sched_ctx_id, 0); + /*if btw the mutex release & the mutex lock the context has changed take care to free all + scheduling data before deleting the context */ + + /* announce upcoming context changes, then wait for sched_op operations to + * complete before altering the sched_ctx under sched_mutex protection */ + _starpu_update_notified_workers_without_ctx(workerids, nworkers_ctx, sched_ctx_id, 1); + _starpu_sched_ctx_free_scheduling_data(sched_ctx); + notify_workers_about_changing_ctx_done(nworkers_ctx, backup_workerids); + occupied_sms -= sched_ctx->nsms; + _starpu_sched_ctx_unlock_write(sched_ctx_id); + _starpu_sched_ctx_unlock_write(inheritor_sched_ctx_id); + STARPU_PTHREAD_RWLOCK_DESTROY(&sched_ctx->rwlock); + STARPU_PTHREAD_RWLOCK_INIT(&sched_ctx->rwlock, NULL); + _starpu_delete_sched_ctx(sched_ctx); + } + else + { + notify_workers_about_changing_ctx_done(nworkers_ctx, backup_workerids); + occupied_sms -= sched_ctx->nsms; + _starpu_sched_ctx_unlock_write(sched_ctx_id); + _starpu_sched_ctx_unlock_write(inheritor_sched_ctx_id); + } + /* workerids is malloc-ed in starpu_sched_ctx_get_workers_list, don't forget to free it when + you don't use it anymore */ + free(workerids); + STARPU_PTHREAD_MUTEX_UNLOCK(&sched_ctx_manag); +} + +/* called after the workers are terminated so we don't have anything else to do but free the memory*/ +void _starpu_delete_all_sched_ctxs() +{ + STARPU_PTHREAD_MUTEX_LOCK(&sched_ctx_manag); + unsigned i; + for(i = 0; i < STARPU_NMAX_SCHED_CTXS; i++) + { + struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(i); + if(sched_ctx->id != STARPU_NMAX_SCHED_CTXS) + { + _starpu_sched_ctx_lock_write(i); + _starpu_sched_ctx_free_scheduling_data(sched_ctx); + _starpu_barrier_counter_destroy(&sched_ctx->tasks_barrier); + _starpu_barrier_counter_destroy(&sched_ctx->ready_tasks_barrier); + _starpu_sched_ctx_unlock_write(i); + STARPU_PTHREAD_RWLOCK_DESTROY(&sched_ctx->rwlock); + _starpu_delete_sched_ctx(sched_ctx); + } + } + + STARPU_PTHREAD_KEY_DELETE(sched_ctx_key); + STARPU_PTHREAD_MUTEX_UNLOCK(&sched_ctx_manag); +} + +static void _starpu_check_workers(int *workerids, int nworkers) +{ + struct _starpu_machine_config *config = _starpu_get_machine_config(); + int nworkers_conf = config->topology.nworkers; + + int i; + for(i = 0; i < nworkers; i++) + { + /* take care the user does not ask for a resource that does not exist */ + STARPU_ASSERT_MSG(workerids[i] >= 0 && workerids[i] <= nworkers_conf, "requested to add workerid = %d, but that is beyond the range 0 to %d", workerids[i], nworkers_conf); + } +} + +/* ctx_mutex must be held when calling this function */ +static void fetch_tasks_from_empty_ctx_list(struct _starpu_sched_ctx *sched_ctx) +{ + struct starpu_task_list list; + starpu_task_list_move(&list, &sched_ctx->empty_ctx_tasks); + + _starpu_sched_ctx_unlock_write(sched_ctx->id); + while(!starpu_task_list_empty(&list)) + { + struct starpu_task *old_task = starpu_task_list_pop_back(&list); + if(old_task == &stop_submission_task) + break; + + /* if no workers are able to execute the task, it will be put + * in the empty_ctx_tasks list forever again */ + unsigned able = _starpu_workers_able_to_execute_task(old_task, sched_ctx); + STARPU_ASSERT(able); + + int ret = _starpu_push_task_to_workers(old_task); + /* if we should stop poping from empty ctx tasks */ + if (ret == -EAGAIN) + break; + } + _starpu_sched_ctx_lock_write(sched_ctx->id); +} + +unsigned _starpu_can_push_task(struct _starpu_sched_ctx *sched_ctx, struct starpu_task *task) +{ + if(sched_ctx->sched_policy && sched_ctx->sched_policy->simulate_push_task) + { + if (window_size == 0.0) + return 1; + + _starpu_sched_ctx_lock_read(sched_ctx->id); + double expected_end = sched_ctx->sched_policy->simulate_push_task(task); + _starpu_sched_ctx_unlock_read(sched_ctx->id); + + double expected_len = 0.0; + if(hyp_actual_start_sample[sched_ctx->id] != 0.0) + { + expected_len = expected_end - hyp_actual_start_sample[sched_ctx->id] ; + } + else + { + _STARPU_MSG("%u: sc start is 0.0\n", sched_ctx->id); + expected_len = expected_end - starpu_timing_now(); + } + if(expected_len < 0.0) + _STARPU_MSG("exp len negative %lf \n", expected_len); + expected_len /= 1000000.0; + // _STARPU_MSG("exp_end %lf start %lf expected_len %lf \n", expected_end, hyp_actual_start_sample[sched_ctx->id], expected_len); + if(expected_len > (window_size + 0.2*window_size)) + return 0; + } + return 1; +} + +void _starpu_fetch_task_from_waiting_list(struct _starpu_sched_ctx *sched_ctx) +{ + if(starpu_task_list_empty(&sched_ctx->waiting_tasks)) + return; + struct starpu_task *old_task = starpu_task_list_back(&sched_ctx->waiting_tasks); + if(_starpu_can_push_task(sched_ctx, old_task)) + { + old_task = starpu_task_list_pop_back(&sched_ctx->waiting_tasks); + _starpu_push_task_to_workers(old_task); + } + return; +} + +void _starpu_push_task_to_waiting_list(struct _starpu_sched_ctx *sched_ctx, struct starpu_task *task) +{ + starpu_task_list_push_front(&sched_ctx->waiting_tasks, task); + return; +} + +static void set_priority_hierarchically_on_notified_workers(int* workers_to_add, unsigned nworkers_to_add, unsigned sched_ctx, unsigned priority) +{ + if(starpu_sched_ctx_get_hierarchy_level(sched_ctx) > 0) + { + unsigned father = starpu_sched_ctx_get_inheritor(sched_ctx); + set_priority_on_notified_workers(workers_to_add, nworkers_to_add, father, priority); + set_priority_hierarchically_on_notified_workers(workers_to_add, nworkers_to_add, father, priority); + } + return; +} + +static void add_notified_workers(int *workerids, int nworkers, unsigned sched_ctx_id) +{ + if (!nworkers) + return; + struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); + /* if the context has not already been deleted */ + if(sched_ctx->id == STARPU_NMAX_SCHED_CTXS) + return; + int added_workers[nworkers]; + int n_added_workers = 0; + { + struct starpu_worker_collection *workers = sched_ctx->workers; + int i = 0; + for(i = 0; i < nworkers; i++) + { + if (workerids[i] >= (int) starpu_worker_get_count()) + /* Combined worker, don't care */ + continue; + int workerid = workers->add(workers, workerids[i]); + if(workerid >= 0) + { + added_workers[n_added_workers] = workerid; + n_added_workers++; + } + else + { + struct _starpu_worker *worker = _starpu_get_worker_struct(workerids[i]); + worker->removed_from_ctx[sched_ctx->id] = 0; + } + } + } + _do_add_notified_workers(sched_ctx, workerids, nworkers); + if(n_added_workers > 0) + { + if(sched_ctx->sched_policy && sched_ctx->sched_policy->add_workers) + { + _STARPU_SCHED_BEGIN; + sched_ctx->sched_policy->add_workers(sched_ctx->id, added_workers, n_added_workers); + _STARPU_SCHED_END; + } + _starpu_update_notified_workers_with_ctx(added_workers, n_added_workers, sched_ctx->id); + } + set_priority_on_notified_workers(workerids, nworkers, sched_ctx_id, 1); + set_priority_hierarchically_on_notified_workers(workerids, nworkers, sched_ctx_id, 0); + fetch_tasks_from_empty_ctx_list(sched_ctx); +} + +/* Queue a new ctx change operation in the list of deferred ctx changes of the current worker. + * + * The set of workers to notify should contain all workers directly or + * indirectly affected by the change. In particular, all workers of + * sched_ctx_id should be notified even if they are not part of the change */ +static void _defer_ctx_change(int sched_ctx_id, enum _starpu_ctx_change_op op, int nworkers_to_notify, int *workerids_to_notify, int nworkers_to_change, int *workerids_to_change) +{ + STARPU_ASSERT(_starpu_worker_sched_op_pending()); + if (nworkers_to_change == 0) + return; + int workerid = starpu_worker_get_id_check(); + struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); + struct _starpu_ctx_change_list *l = &worker->ctx_change_list; + struct _starpu_ctx_change *chg = _starpu_ctx_change_new(); + chg->sched_ctx_id = sched_ctx_id; + STARPU_ASSERT(op == ctx_change_add || op == ctx_change_remove); + chg->op = op; + STARPU_ASSERT(workerids_to_change != NULL); + chg->nworkers_to_change = nworkers_to_change; + _STARPU_MALLOC(chg->workerids_to_change, nworkers_to_change * sizeof(chg->workerids_to_change[0])); + memcpy(chg->workerids_to_change, workerids_to_change, nworkers_to_change * sizeof(chg->workerids_to_change[0])); + if (nworkers_to_notify != 0) + { + STARPU_ASSERT(workerids_to_notify != NULL); + chg->nworkers_to_notify = nworkers_to_notify; + _STARPU_MALLOC(chg->workerids_to_notify, nworkers_to_notify * sizeof(chg->workerids_to_notify[0])); + memcpy(chg->workerids_to_notify, workerids_to_notify, nworkers_to_notify * sizeof(chg->workerids_to_notify[0])); + } + else + { + STARPU_ASSERT(workerids_to_notify == NULL); + chg->nworkers_to_notify = 0; + chg->workerids_to_notify = 0; + } + _starpu_ctx_change_list_push_back(l, chg); +} + +void starpu_sched_ctx_add_workers(int *workers_to_add, unsigned nworkers_to_add, unsigned sched_ctx_id) +{ + STARPU_ASSERT(workers_to_add != NULL && nworkers_to_add > 0); + _starpu_check_workers(workers_to_add, nworkers_to_add); + int *ctx_workerids = NULL; + _starpu_sched_ctx_lock_read(sched_ctx_id); + unsigned ctx_nworkers = starpu_sched_ctx_get_workers_list_raw(sched_ctx_id, &ctx_workerids); + _starpu_sched_ctx_unlock_read(sched_ctx_id); + int cumulated_workerids[ctx_nworkers + nworkers_to_add]; + memcpy(cumulated_workerids, ctx_workerids, ctx_nworkers*sizeof(cumulated_workerids[0])); + unsigned cumulated_nworkers = ctx_nworkers; + { + unsigned i; + for (i=0; iworkers->nworkers]; + int n_removed_workers = 0; + + _starpu_remove_workers_from_sched_ctx(sched_ctx, workerids, nworkers, removed_workers, &n_removed_workers); + + if(n_removed_workers > 0) + { + _starpu_update_notified_workers_without_ctx(removed_workers, n_removed_workers, sched_ctx_id, 0); + set_priority_on_notified_workers(removed_workers, n_removed_workers, sched_ctx_id, 1); + } +} + +void starpu_sched_ctx_remove_workers(int *workers_to_remove, unsigned nworkers_to_remove, unsigned sched_ctx_id) +{ + struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); + + _starpu_check_workers(workers_to_remove, nworkers_to_remove); + int *ctx_workerids = NULL; + _starpu_sched_ctx_lock_read(sched_ctx_id); + unsigned ctx_nworkers = starpu_sched_ctx_get_workers_list_raw(sched_ctx_id, &ctx_workerids); + _starpu_sched_ctx_unlock_read(sched_ctx_id); + int cumulated_workerids[ctx_nworkers + nworkers_to_remove]; + memcpy(cumulated_workerids, ctx_workerids, ctx_nworkers*sizeof(cumulated_workerids[0])); + unsigned cumulated_nworkers = ctx_nworkers; + { + unsigned i; + for (i=0; iid != STARPU_NMAX_SCHED_CTXS) + { + if (_starpu_worker_sched_op_pending()) + { + _defer_ctx_change(sched_ctx_id, ctx_change_remove, cumulated_nworkers, cumulated_workerids, nworkers_to_remove, workers_to_remove); + } + else + { + sort_workerid_array(cumulated_nworkers, cumulated_workerids); + notify_workers_about_changing_ctx_pending(cumulated_nworkers, cumulated_workerids); + _starpu_sched_ctx_lock_write(sched_ctx_id); + remove_notified_workers(workers_to_remove, nworkers_to_remove, sched_ctx_id); + notify_workers_about_changing_ctx_done(cumulated_nworkers, cumulated_workerids); + _starpu_sched_ctx_unlock_write(sched_ctx_id); + } + } +} + +int _starpu_workers_able_to_execute_task(struct starpu_task *task, struct _starpu_sched_ctx *sched_ctx) +{ + unsigned able = 0; + + _starpu_sched_ctx_lock_read(sched_ctx->id); + struct starpu_worker_collection *workers = sched_ctx->workers; + + struct starpu_sched_ctx_iterator it; + + workers->init_iterator_for_parallel_tasks(workers, &it, task); + while(workers->has_next(workers, &it)) + { + unsigned worker = workers->get_next(workers, &it); + STARPU_ASSERT_MSG(worker < STARPU_NMAXWORKERS, "worker id %u", worker); + if (starpu_worker_can_execute_task_first_impl(worker, task, NULL)) + { + able++; + break; + } + } + _starpu_sched_ctx_unlock_read(sched_ctx->id); + + return able; +} + +/* unused sched_ctx have the id STARPU_NMAX_SCHED_CTXS */ +void _starpu_init_all_sched_ctxs(struct _starpu_machine_config *config) +{ + STARPU_PTHREAD_KEY_CREATE(&sched_ctx_key, NULL); + window_size = starpu_getenv_float_default("STARPU_WINDOW_TIME_SIZE", 0.0); + nobind = starpu_getenv_number("STARPU_WORKERS_NOBIND"); + + unsigned i; + for(i = 0; i <= STARPU_NMAX_SCHED_CTXS; i++) + { + config->sched_ctxs[i].do_schedule = 0; + config->sched_ctxs[i].id = STARPU_NMAX_SCHED_CTXS; + STARPU_PTHREAD_RWLOCK_INIT0(&config->sched_ctxs[i].rwlock, NULL); + } + + return; +} + +/* sched_ctx aren't necessary one next to another */ +/* for eg when we remove one its place is free */ +/* when we add new one we reuse its place */ +static unsigned _starpu_get_first_free_sched_ctx(struct _starpu_machine_config *config) +{ + unsigned i; + for(i = 0; i < STARPU_NMAX_SCHED_CTXS; i++) + if(config->sched_ctxs[i].id == STARPU_NMAX_SCHED_CTXS) + return i; + + STARPU_ASSERT(0); + return STARPU_NMAX_SCHED_CTXS; +} + +int _starpu_wait_for_all_tasks_of_sched_ctx(unsigned sched_ctx_id) +{ + struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); + + STARPU_ASSERT_MSG(_starpu_worker_may_perform_blocking_calls(), "starpu_task_wait_for_all must not be called from a task or callback"); + + _starpu_barrier_counter_wait_for_empty_counter(&sched_ctx->tasks_barrier); + return 0; +} + +int _starpu_wait_for_n_submitted_tasks_of_sched_ctx(unsigned sched_ctx_id, unsigned n) +{ + struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); + + STARPU_ASSERT_MSG(_starpu_worker_may_perform_blocking_calls(), "starpu_task_wait_for_n_submitted_tasks must not be called from a task or callback"); + + return _starpu_barrier_counter_wait_until_counter_reaches_down_to_n(&sched_ctx->tasks_barrier, n); +} + +void _starpu_decrement_nsubmitted_tasks_of_sched_ctx(unsigned sched_ctx_id) +{ + struct _starpu_machine_config *config = _starpu_get_machine_config(); +#ifndef STARPU_SANITIZE_THREAD + if (!config->watchdog_ok) + config->watchdog_ok = 1; +#endif + + struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); + int reached = _starpu_barrier_counter_get_reached_exit(&sched_ctx->tasks_barrier); + int finished = reached == 1; + + /* when finished decrementing the tasks if the user signaled he will not submit tasks anymore + we can move all its workers to the inheritor context */ + if(finished && sched_ctx->inheritor != STARPU_NMAX_SCHED_CTXS) + { + STARPU_PTHREAD_MUTEX_LOCK(&finished_submit_mutex); + if(sched_ctx->finished_submit) + { + STARPU_PTHREAD_MUTEX_UNLOCK(&finished_submit_mutex); + + if(sched_ctx->id != STARPU_NMAX_SCHED_CTXS) + { + if(sched_ctx->close_callback) + sched_ctx->close_callback(sched_ctx->id, sched_ctx->close_args); + + int *workerids = NULL; + unsigned nworkers = starpu_sched_ctx_get_workers_list(sched_ctx->id, &workerids); + + if(nworkers > 0) + { + starpu_sched_ctx_add_workers(workerids, nworkers, sched_ctx->inheritor); + free(workerids); + } + } + _starpu_barrier_counter_decrement_until_empty_counter(&sched_ctx->tasks_barrier, 0.0); + return; + } + STARPU_PTHREAD_MUTEX_UNLOCK(&finished_submit_mutex); + } + + /* We also need to check for config->submitting = 0 (i.e. the + * user called starpu_drivers_request_termination()), in which + * case we need to set config->running to 0 and wake workers, + * so they can terminate, just like + * starpu_drivers_request_termination() does. + */ + + STARPU_PTHREAD_MUTEX_LOCK(&config->submitted_mutex); + if(config->submitting == 0) + { + if(sched_ctx->id != STARPU_NMAX_SCHED_CTXS) + { + if(sched_ctx->close_callback) + sched_ctx->close_callback(sched_ctx->id, sched_ctx->close_args); + } + + ANNOTATE_HAPPENS_AFTER(&config->running); + config->running = 0; + ANNOTATE_HAPPENS_BEFORE(&config->running); + int s; + for(s = 0; s < STARPU_NMAX_SCHED_CTXS; s++) + { + if(config->sched_ctxs[s].id != STARPU_NMAX_SCHED_CTXS) + { + _starpu_check_nsubmitted_tasks_of_sched_ctx(config->sched_ctxs[s].id); + } + } + } + STARPU_PTHREAD_MUTEX_UNLOCK(&config->submitted_mutex); + + _starpu_barrier_counter_decrement_until_empty_counter(&sched_ctx->tasks_barrier, 0.0); + + return; +} + +void _starpu_increment_nsubmitted_tasks_of_sched_ctx(unsigned sched_ctx_id) +{ + struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); + _starpu_barrier_counter_increment(&sched_ctx->tasks_barrier, 0.0); +} + +int _starpu_get_nsubmitted_tasks_of_sched_ctx(unsigned sched_ctx_id) +{ + struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); + return _starpu_barrier_counter_get_reached_start(&sched_ctx->tasks_barrier); +} + +int _starpu_check_nsubmitted_tasks_of_sched_ctx(unsigned sched_ctx_id) +{ + struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); + return _starpu_barrier_counter_check(&sched_ctx->tasks_barrier); +} + +unsigned _starpu_increment_nready_tasks_of_sched_ctx(unsigned sched_ctx_id, double ready_flops, struct starpu_task *task) +{ + unsigned ret = 1; + struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); + + if(!sched_ctx->is_initial_sched) + { + _starpu_sched_ctx_lock_write(sched_ctx->id); + } + + _starpu_barrier_counter_increment(&sched_ctx->ready_tasks_barrier, ready_flops); + + + if(!sched_ctx->is_initial_sched) + { + if(!_starpu_can_push_task(sched_ctx, task)) + { + _starpu_push_task_to_waiting_list(sched_ctx, task); + ret = 0; + } + + _starpu_sched_ctx_unlock_write(sched_ctx->id); + } + return ret; +} + +void _starpu_decrement_nready_tasks_of_sched_ctx_locked(unsigned sched_ctx_id, double ready_flops) +{ + struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); + _starpu_barrier_counter_decrement_until_empty_counter(&sched_ctx->ready_tasks_barrier, ready_flops); +} + +void _starpu_decrement_nready_tasks_of_sched_ctx(unsigned sched_ctx_id, double ready_flops) +{ + struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); + + if(!sched_ctx->is_initial_sched) + { + _starpu_sched_ctx_lock_write(sched_ctx->id); + } + + _starpu_barrier_counter_decrement_until_empty_counter(&sched_ctx->ready_tasks_barrier, ready_flops); + + + if(!sched_ctx->is_initial_sched) + { + _starpu_fetch_task_from_waiting_list(sched_ctx); + _starpu_sched_ctx_unlock_write(sched_ctx->id); + } + +} + +int starpu_sched_ctx_get_nready_tasks(unsigned sched_ctx_id) +{ + struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); + return _starpu_barrier_counter_get_reached_start(&sched_ctx->ready_tasks_barrier); +} + +double starpu_sched_ctx_get_nready_flops(unsigned sched_ctx_id) +{ + struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); + return _starpu_barrier_counter_get_reached_flops(&sched_ctx->ready_tasks_barrier); +} + +int _starpu_wait_for_no_ready_of_sched_ctx(unsigned sched_ctx_id) +{ + struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); + _starpu_barrier_counter_wait_for_empty_counter(&sched_ctx->ready_tasks_barrier); + return 0; +} + +/* + * FIXME: This should rather be + * void starpu_sched_ctx_set_context(unsigned sched_ctx) + */ +void starpu_sched_ctx_set_context(unsigned *sched_ctx) +{ + if (sched_ctx) + STARPU_PTHREAD_SETSPECIFIC(sched_ctx_key, (void*)(uintptr_t)(*sched_ctx + 1)); + else + STARPU_PTHREAD_SETSPECIFIC(sched_ctx_key, (void*)(uintptr_t) 0); +} + +unsigned starpu_sched_ctx_get_context() +{ + unsigned id = (unsigned)(uintptr_t)STARPU_PTHREAD_GETSPECIFIC(sched_ctx_key); + if (id == 0) + return STARPU_NMAX_SCHED_CTXS; + else + return id - 1; +} + +unsigned _starpu_sched_ctx_get_current_context() +{ + unsigned sched_ctx = starpu_sched_ctx_get_context(); + if (sched_ctx == STARPU_NMAX_SCHED_CTXS) + return _starpu_get_initial_sched_ctx()->id; + else + return sched_ctx; +} + +void starpu_sched_ctx_notify_hypervisor_exists() +{ + with_hypervisor = 1; + int i, j; + for(i = 0; i < STARPU_NMAX_SCHED_CTXS; i++) + { + hyp_start_sample[i] = starpu_timing_now(); + hyp_start_allow_sample[i] = 0.0; + for(j = 0; j < STARPU_NMAXWORKERS; j++) + { + flops[i][j] = 0.0; + data_size[i][j] = 0; + } + hyp_actual_start_sample[i] = 0.0; + } +} + +unsigned starpu_sched_ctx_check_if_hypervisor_exists() +{ + return with_hypervisor; +} + +void starpu_sched_ctx_update_start_resizing_sample(unsigned sched_ctx_id, double start_sample) +{ + hyp_actual_start_sample[sched_ctx_id] = start_sample; +} + +unsigned _starpu_sched_ctx_allow_hypervisor(unsigned sched_ctx_id) +{ + (void) sched_ctx_id; + return 1; +#if 0 + double now = starpu_timing_now(); + if(hyp_start_allow_sample[sched_ctx_id] > 0.0) + { + double allow_sample = (now - hyp_start_allow_sample[sched_ctx_id]) / 1000000.0; + if(allow_sample < 0.001) + return 1; + else + { + hyp_start_allow_sample[sched_ctx_id] = 0.0; + hyp_start_sample[sched_ctx_id] = starpu_timing_now(); + return 0; + } + } + double forbid_sample = (now - hyp_start_sample[sched_ctx_id]) / 1000000.0; + if(forbid_sample > 0.01) + { +// hyp_start_sample[sched_ctx_id] = starpu_timing_now(); + hyp_start_allow_sample[sched_ctx_id] = starpu_timing_now(); + return 1; + } + return 0; +#endif +} + +void starpu_sched_ctx_set_policy_data(unsigned sched_ctx_id, void* policy_data) +{ + struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); + sched_ctx->policy_data = policy_data; +} + +void* starpu_sched_ctx_get_policy_data(unsigned sched_ctx_id) +{ + struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); + return sched_ctx->policy_data; +} + +struct starpu_sched_policy *starpu_sched_ctx_get_sched_policy(unsigned sched_ctx_id) +{ + struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); + return sched_ctx->sched_policy; +} + +struct starpu_worker_collection* starpu_sched_ctx_create_worker_collection(unsigned sched_ctx_id, enum starpu_worker_collection_type worker_collection_type) +{ + struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); + _STARPU_MALLOC(sched_ctx->workers, sizeof(struct starpu_worker_collection)); + + switch(worker_collection_type) + { +#ifdef STARPU_HAVE_HWLOC + case STARPU_WORKER_TREE: + sched_ctx->workers->has_next = starpu_worker_tree.has_next; + sched_ctx->workers->get_next = starpu_worker_tree.get_next; + sched_ctx->workers->add = starpu_worker_tree.add; + sched_ctx->workers->remove = starpu_worker_tree.remove; + sched_ctx->workers->init = starpu_worker_tree.init; + sched_ctx->workers->deinit = starpu_worker_tree.deinit; + sched_ctx->workers->init_iterator = starpu_worker_tree.init_iterator; + sched_ctx->workers->init_iterator_for_parallel_tasks = starpu_worker_tree.init_iterator_for_parallel_tasks; + sched_ctx->workers->type = STARPU_WORKER_TREE; + break; +#endif +// case STARPU_WORKER_LIST: + default: + sched_ctx->workers->has_next = starpu_worker_list.has_next; + sched_ctx->workers->get_next = starpu_worker_list.get_next; + sched_ctx->workers->add = starpu_worker_list.add; + sched_ctx->workers->remove = starpu_worker_list.remove; + sched_ctx->workers->init = starpu_worker_list.init; + sched_ctx->workers->deinit = starpu_worker_list.deinit; + sched_ctx->workers->init_iterator = starpu_worker_list.init_iterator; + sched_ctx->workers->init_iterator_for_parallel_tasks = starpu_worker_list.init_iterator_for_parallel_tasks; + sched_ctx->workers->type = STARPU_WORKER_LIST; + break; + + } + + /* construct the collection of workers(list/tree/etc.) */ + sched_ctx->workers->init(sched_ctx->workers); + + return sched_ctx->workers; +} + +void starpu_sched_ctx_display_workers(unsigned sched_ctx_id, FILE *f) +{ + int *workerids = NULL; + unsigned nworkers; + unsigned i; + + nworkers = starpu_sched_ctx_get_workers_list(sched_ctx_id, &workerids); + fprintf(f, "[sched_ctx %u]: %u worker%s\n", sched_ctx_id, nworkers, nworkers>1?"s":""); + for (i = 0; i < nworkers; i++) + { + char name[256]; + starpu_worker_get_name(workerids[i], name, 256); + fprintf(f, "\t\t%s\n", name); + } + free(workerids); +} + +unsigned starpu_sched_ctx_get_workers_list_raw(unsigned sched_ctx_id, int **workerids) +{ + struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); + *workerids = sched_ctx->workers->workerids; + return sched_ctx->workers->nworkers; +} + +unsigned starpu_sched_ctx_get_workers_list(unsigned sched_ctx_id, int **workerids) +{ + struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); + struct starpu_worker_collection *workers = sched_ctx->workers; + unsigned nworkers = 0; + struct starpu_sched_ctx_iterator it; + + if(!workers) + return 0; + + _STARPU_MALLOC(*workerids, workers->nworkers*sizeof(int)); + workers->init_iterator(workers, &it); + while(workers->has_next(workers, &it)) + { + int worker = workers->get_next(workers, &it); + (*workerids)[nworkers++] = worker; + } + return nworkers; +} + +void starpu_sched_ctx_delete_worker_collection(unsigned sched_ctx_id) +{ + struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); + sched_ctx->workers->deinit(sched_ctx->workers); + + free(sched_ctx->workers); + sched_ctx->workers = NULL; +} + +struct starpu_worker_collection* starpu_sched_ctx_get_worker_collection(unsigned sched_ctx_id) +{ + struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); + return sched_ctx->workers; +} + +int _starpu_get_workers_of_sched_ctx(unsigned sched_ctx_id, int *pus, enum starpu_worker_archtype arch) +{ + struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); + + struct starpu_worker_collection *workers = sched_ctx->workers; + + int npus = 0; + struct starpu_sched_ctx_iterator it; + + workers->init_iterator(workers, &it); + while(workers->has_next(workers, &it)) + { + int worker = workers->get_next(workers, &it); + enum starpu_worker_archtype curr_arch = starpu_worker_get_type(worker); + if(curr_arch == arch || arch == STARPU_ANY_WORKER) + pus[npus++] = worker; + } + + return npus; +} + +unsigned starpu_sched_ctx_get_nworkers(unsigned sched_ctx_id) +{ + struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); + if(sched_ctx != NULL) + return sched_ctx->workers->nworkers; + else + return 0; + +} + +unsigned starpu_sched_ctx_get_nshared_workers(unsigned sched_ctx_id, unsigned sched_ctx_id2) +{ + struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); + struct _starpu_sched_ctx *sched_ctx2 = _starpu_get_sched_ctx_struct(sched_ctx_id2); + + struct starpu_worker_collection *workers = sched_ctx->workers; + struct starpu_worker_collection *workers2 = sched_ctx2->workers; + int shared_workers = 0; + struct starpu_sched_ctx_iterator it1, it2; + + workers->init_iterator(workers, &it1); + workers2->init_iterator(workers2, &it2); + while(workers->has_next(workers, &it1)) + { + int worker = workers->get_next(workers, &it1); + while(workers2->has_next(workers2, &it2)) + { + int worker2 = workers2->get_next(workers2, &it2); + if(worker == worker2) + shared_workers++; + } + } + + return shared_workers; +} + +unsigned starpu_sched_ctx_contains_worker(int workerid, unsigned sched_ctx_id) +{ + struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); + + struct starpu_worker_collection *workers = sched_ctx->workers; + if(workers) + { + unsigned i; + + for (i = 0; i < workers->nworkers; i++) + if (workerid == workers->workerids[i]) + return 1; + } + return 0; +} + +unsigned starpu_sched_ctx_contains_type_of_worker(enum starpu_worker_archtype arch, unsigned sched_ctx_id) +{ + struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id); + unsigned i; + + for (i = 0; i < workers->nworkers; i++) + { + int worker = workers->workerids[i]; + enum starpu_worker_archtype curr_arch = starpu_worker_get_type(worker); + if(curr_arch == arch) + return 1; + } + return 0; + +} + +unsigned _starpu_worker_belongs_to_a_sched_ctx(int workerid, unsigned sched_ctx_id) +{ + struct _starpu_machine_config *config = _starpu_get_machine_config(); + int i; + for(i = 0; i < STARPU_NMAX_SCHED_CTXS; i++) + { + struct _starpu_sched_ctx *sched_ctx = &config->sched_ctxs[i]; + if(sched_ctx && sched_ctx->id != STARPU_NMAX_SCHED_CTXS && sched_ctx->id != sched_ctx_id) + if(starpu_sched_ctx_contains_worker(workerid, sched_ctx->id)) + return 1; + } + return 0; +} +unsigned starpu_sched_ctx_worker_get_id(unsigned sched_ctx_id) +{ + int workerid = starpu_worker_get_id(); + if(workerid != -1) + if(starpu_sched_ctx_contains_worker(workerid, sched_ctx_id)) + return workerid; + return -1; +} + +unsigned starpu_sched_ctx_get_ctx_for_task(struct starpu_task *task) +{ + struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(task->sched_ctx); + unsigned ret_sched_ctx = task->sched_ctx; + if (task->possibly_parallel && !sched_ctx->sched_policy + && sched_ctx->nesting_sched_ctx != STARPU_NMAX_SCHED_CTXS) + ret_sched_ctx = sched_ctx->nesting_sched_ctx; + return ret_sched_ctx; +} + +unsigned starpu_sched_ctx_overlapping_ctxs_on_worker(int workerid) +{ + struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); + return worker->nsched_ctxs > 1; +} + +void starpu_sched_ctx_set_inheritor(unsigned sched_ctx_id, unsigned inheritor) +{ + STARPU_ASSERT(sched_ctx_id < STARPU_NMAX_SCHED_CTXS); + STARPU_ASSERT(inheritor < STARPU_NMAX_SCHED_CTXS); + struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); + sched_ctx->inheritor = inheritor; + return; +} + +unsigned starpu_sched_ctx_get_inheritor(unsigned sched_ctx_id) +{ + STARPU_ASSERT(sched_ctx_id < STARPU_NMAX_SCHED_CTXS); + struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); + + return sched_ctx->inheritor; +} + +unsigned starpu_sched_ctx_get_hierarchy_level(unsigned sched_ctx_id) +{ + STARPU_ASSERT(sched_ctx_id < STARPU_NMAX_SCHED_CTXS); + struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); + + return sched_ctx->hierarchy_level; +} + +void starpu_sched_ctx_finished_submit(unsigned sched_ctx_id) +{ + struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); + STARPU_PTHREAD_MUTEX_LOCK(&finished_submit_mutex); + sched_ctx->finished_submit = 1; + STARPU_PTHREAD_MUTEX_UNLOCK(&finished_submit_mutex); + return; +} + +#ifdef STARPU_USE_SC_HYPERVISOR + +void _starpu_sched_ctx_post_exec_task_cb(int workerid, struct starpu_task *task, size_t data_size2, uint32_t footprint) +{ + if (workerid < 0) + return; + struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(task->sched_ctx); + if(sched_ctx != NULL && task->sched_ctx != _starpu_get_initial_sched_ctx()->id && + task->sched_ctx != STARPU_NMAX_SCHED_CTXS && sched_ctx->perf_counters != NULL) + { + flops[task->sched_ctx][workerid] += task->flops; + data_size[task->sched_ctx][workerid] += data_size2; + + if(_starpu_sched_ctx_allow_hypervisor(sched_ctx->id) || task->hypervisor_tag > 0) + { + _STARPU_TRACE_HYPERVISOR_BEGIN(); + sched_ctx->perf_counters->notify_post_exec_task(task, data_size[task->sched_ctx][workerid], footprint, + task->hypervisor_tag, flops[task->sched_ctx][workerid]); + _STARPU_TRACE_HYPERVISOR_END(); + flops[task->sched_ctx][workerid] = 0.0; + data_size[task->sched_ctx][workerid] = 0; + } + } +} + +void starpu_sched_ctx_call_pushed_task_cb(int workerid, unsigned sched_ctx_id) +{ + struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); + + if(sched_ctx != NULL && sched_ctx_id != _starpu_get_initial_sched_ctx()->id && sched_ctx_id != STARPU_NMAX_SCHED_CTXS + && sched_ctx->perf_counters != NULL && _starpu_sched_ctx_allow_hypervisor(sched_ctx_id)) + { + _STARPU_TRACE_HYPERVISOR_BEGIN(); + sched_ctx->perf_counters->notify_pushed_task(sched_ctx_id, workerid); + _STARPU_TRACE_HYPERVISOR_END(); + } +} +#endif //STARPU_USE_SC_HYPERVISOR + +int starpu_sched_get_min_priority(void) +{ + return starpu_sched_ctx_get_min_priority(_starpu_sched_ctx_get_current_context()); +} + +int starpu_sched_get_max_priority(void) +{ + return starpu_sched_ctx_get_max_priority(_starpu_sched_ctx_get_current_context()); +} + +int starpu_sched_set_min_priority(int min_prio) +{ + return starpu_sched_ctx_set_min_priority(_starpu_sched_ctx_get_current_context(), min_prio); +} + +int starpu_sched_set_max_priority(int max_prio) +{ + return starpu_sched_ctx_set_max_priority(_starpu_sched_ctx_get_current_context(), max_prio); +} + +int starpu_sched_ctx_get_min_priority(unsigned sched_ctx_id) +{ + struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); + return sched_ctx->min_priority; +} + +int starpu_sched_ctx_get_max_priority(unsigned sched_ctx_id) +{ + struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); + return sched_ctx->max_priority; +} + +int starpu_sched_ctx_set_min_priority(unsigned sched_ctx_id, int min_prio) +{ + struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); + sched_ctx->min_priority = min_prio; + return 0; +} + +int starpu_sched_ctx_set_max_priority(unsigned sched_ctx_id, int max_prio) +{ + struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); + sched_ctx->max_priority = max_prio; + return 0; +} + +int starpu_sched_ctx_min_priority_is_set(unsigned sched_ctx_id) +{ + struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); + return sched_ctx->min_priority_is_set; +} + +int starpu_sched_ctx_max_priority_is_set(unsigned sched_ctx_id) +{ + struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); + return sched_ctx->max_priority_is_set; +} + +static void set_priority_on_notified_workers(int *workers, int nworkers, unsigned sched_ctx_id, unsigned priority) +{ + if(nworkers != -1) + { + int w; + struct _starpu_worker *worker = NULL; + for(w = 0; w < nworkers; w++) + { + if (workers[w] >= (int) starpu_worker_get_count()) + /* Combined worker, don't care */ + continue; + worker = _starpu_get_worker_struct(workers[w]); + _starpu_sched_ctx_list_move(&worker->sched_ctx_list, sched_ctx_id, priority); + } + } +} + +void starpu_sched_ctx_set_priority(int *workerids, int nworkers, unsigned sched_ctx_id, unsigned priority) +{ + if(nworkers != -1) + { + notify_workers_about_changing_ctx_pending(nworkers, workerids); + _starpu_sched_ctx_lock_write(sched_ctx_id); + int w; + for(w = 0; w < nworkers; w++) + { + struct _starpu_worker *worker = _starpu_get_worker_struct(workerids[w]); + _starpu_sched_ctx_list_move(&worker->sched_ctx_list, sched_ctx_id, priority); + } + notify_workers_about_changing_ctx_done(nworkers, workerids); + _starpu_sched_ctx_unlock_write(sched_ctx_id); + } +} + +unsigned starpu_sched_ctx_get_priority(int workerid, unsigned sched_ctx_id) +{ + struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); + return _starpu_sched_ctx_elt_get_priority(worker->sched_ctx_list, sched_ctx_id); +} + +unsigned _starpu_sched_ctx_last_worker_awake(struct _starpu_worker *worker) +{ + /* The worker being checked must have its status set to sleeping during + * the check, to allow for an other worker being checked concurrently + * to make the safe, pessimistic assumption that it is the last worker + * awake. In the worst case, both workers will follow this pessimistic + * path and perform one more scheduling loop */ + STARPU_HG_DISABLE_CHECKING(_starpu_config.workers[worker->workerid].status); + STARPU_ASSERT(_starpu_config.workers[worker->workerid].status & STATUS_SLEEPING); + STARPU_HG_ENABLE_CHECKING(_starpu_config.workers[worker->workerid].status); + struct _starpu_sched_ctx_list_iterator list_it; + + _starpu_sched_ctx_list_iterator_init(worker->sched_ctx_list, &list_it); + while (_starpu_sched_ctx_list_iterator_has_next(&list_it)) + { + struct _starpu_sched_ctx_elt *e = _starpu_sched_ctx_list_iterator_get_next(&list_it); + struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(e->sched_ctx); + + unsigned last_worker_awake = 1; + struct starpu_worker_collection *workers = sched_ctx->workers; + /* workers can be NULL in some corner cases, since we do not lock sched_ctx here */ + if (workers != NULL) + { + struct starpu_sched_ctx_iterator it; + + workers->init_iterator(workers, &it); + while(workers->has_next(workers, &it)) + { + int workerid = workers->get_next(workers, &it); + if(workerid != worker->workerid) + { + if(starpu_worker_is_combined_worker(workerid)) + { + continue; + } + /* The worker status is intendedly checked + * without taking locks. If multiple workers + * are concurrently assessing whether they are + * the last worker awake, they will follow the + * pessimistic path and assume that they are + * the last worker awake */ + STARPU_HG_DISABLE_CHECKING(_starpu_config.workers[workerid].status); + const int cond = !(_starpu_config.workers[workerid].status & STATUS_SLEEPING); + STARPU_HG_ENABLE_CHECKING(_starpu_config.workers[workerid].status); + + if (cond) + { + last_worker_awake = 0; + break; + } + } + } + } + if(last_worker_awake) + return 1; + } + return 0; +} + +void starpu_sched_ctx_bind_current_thread_to_cpuid(unsigned cpuid) +{ + _starpu_bind_thread_on_cpu(cpuid, STARPU_NOWORKERID, NULL); +} + +unsigned starpu_sched_ctx_worker_is_master_for_child_ctx(int workerid, unsigned sched_ctx_id) +{ + if (_starpu_get_nsched_ctxs() <= 1) + return STARPU_NMAX_SCHED_CTXS; + + struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); + struct _starpu_sched_ctx_list_iterator list_it; + + _starpu_sched_ctx_list_iterator_init(worker->sched_ctx_list, &list_it); + while (_starpu_sched_ctx_list_iterator_has_next(&list_it)) + { + struct _starpu_sched_ctx_elt *e = _starpu_sched_ctx_list_iterator_get_next(&list_it); + struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(e->sched_ctx); + if(sched_ctx-> main_master == workerid && sched_ctx->nesting_sched_ctx == sched_ctx_id) + return sched_ctx->id; + } + return STARPU_NMAX_SCHED_CTXS; +} + +unsigned starpu_sched_ctx_master_get_context(int masterid) +{ + struct _starpu_worker *worker = _starpu_get_worker_struct(masterid); + struct _starpu_sched_ctx_list_iterator list_it; + + _starpu_sched_ctx_list_iterator_init(worker->sched_ctx_list, &list_it); + while (_starpu_sched_ctx_list_iterator_has_next(&list_it)) + { + struct _starpu_sched_ctx_elt *e = _starpu_sched_ctx_list_iterator_get_next(&list_it); + struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(e->sched_ctx); + if(sched_ctx->main_master == masterid) + return sched_ctx->id; + } + return STARPU_NMAX_SCHED_CTXS; +} + +struct _starpu_sched_ctx *__starpu_sched_ctx_get_sched_ctx_for_worker_and_job(struct _starpu_worker *worker, struct _starpu_job *j) +{ + struct _starpu_sched_ctx_list_iterator list_it; + struct _starpu_sched_ctx *ret = NULL; + + starpu_worker_lock(worker->workerid); + _starpu_sched_ctx_list_iterator_init(worker->sched_ctx_list, &list_it); + while (_starpu_sched_ctx_list_iterator_has_next(&list_it)) + { + struct _starpu_sched_ctx_elt *e = _starpu_sched_ctx_list_iterator_get_next(&list_it); + struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(e->sched_ctx); + if (j->task->sched_ctx == sched_ctx->id) + { + ret = sched_ctx; + break; + } + } + starpu_worker_unlock(worker->workerid); + return ret; +} + +void starpu_sched_ctx_revert_task_counters_ctx_locked(unsigned sched_ctx_id, double ready_flops) +{ + _starpu_decrement_nsubmitted_tasks_of_sched_ctx(sched_ctx_id); + _starpu_decrement_nready_tasks_of_sched_ctx_locked(sched_ctx_id, ready_flops); +} + +void starpu_sched_ctx_revert_task_counters(unsigned sched_ctx_id, double ready_flops) +{ + _starpu_decrement_nsubmitted_tasks_of_sched_ctx(sched_ctx_id); + _starpu_decrement_nready_tasks_of_sched_ctx(sched_ctx_id, ready_flops); +} + +void starpu_sched_ctx_move_task_to_ctx_locked(struct starpu_task *task, unsigned sched_ctx, unsigned with_repush) +{ + /* Restore state just like out of dependency layers */ + STARPU_ASSERT(task->status == STARPU_TASK_READY); + task->status = STARPU_TASK_BLOCKED; + + /* TODO: make something cleaner which differentiates between calls + from push or pop (have mutex or not) and from another worker or not */ + task->sched_ctx = sched_ctx; + + struct _starpu_job *j = _starpu_get_job_associated_to_task(task); + + _starpu_increment_nsubmitted_tasks_of_sched_ctx(j->task->sched_ctx); + + if(with_repush) + _starpu_repush_task(j); + else + _starpu_increment_nready_tasks_of_sched_ctx(j->task->sched_ctx, j->task->flops, j->task); +} + +#if 0 +void starpu_sched_ctx_move_task_to_ctx(struct starpu_task *task, unsigned sched_ctx, unsigned manage_mutex, + unsigned with_repush) +{ + /* TODO: make something cleaner which differentiates between calls + from push or pop (have mutex or not) and from another worker or not */ + int workerid = starpu_worker_get_id(); + struct _starpu_worker *worker = NULL; + if(workerid != -1 && manage_mutex) + { + worker = _starpu_get_worker_struct(workerid); + STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&worker->sched_mutex); + } + + + task->sched_ctx = sched_ctx; + + struct _starpu_job *j = _starpu_get_job_associated_to_task(task); + + _starpu_increment_nsubmitted_tasks_of_sched_ctx(j->task->sched_ctx); + + if(with_repush) + _starpu_repush_task(j); + else + _starpu_increment_nready_tasks_of_sched_ctx(j->task->sched_ctx, j->task->flops, j->task); + + if(workerid != -1 && manage_mutex) + STARPU_PTHREAD_MUTEX_LOCK_SCHED(&worker->sched_mutex); +} +#endif + +void starpu_sched_ctx_list_task_counters_increment(unsigned sched_ctx_id, int workerid) +{ + /* Note : often we don't have any sched_mutex taken here but we + should, so take it */ + struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); + + /* FIXME: why do we push events only when the worker belongs to more than one ctx? */ + if (worker->nsched_ctxs > 1) + { + starpu_worker_lock(workerid); + _starpu_sched_ctx_list_push_event(worker->sched_ctx_list, sched_ctx_id); + starpu_worker_unlock(workerid); + } +} + +void starpu_sched_ctx_list_task_counters_decrement(unsigned sched_ctx_id, int workerid) +{ + struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); + if (worker->nsched_ctxs > 1) + _starpu_sched_ctx_list_pop_event(worker->sched_ctx_list, sched_ctx_id); +} + +void starpu_sched_ctx_list_task_counters_reset(unsigned sched_ctx_id, int workerid) +{ + struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); + if (worker->nsched_ctxs > 1) + _starpu_sched_ctx_list_pop_all_event(worker->sched_ctx_list, sched_ctx_id); +} + +void starpu_sched_ctx_list_task_counters_increment_all_ctx_locked(struct starpu_task *task, unsigned sched_ctx_id) +{ + /* TODO: add proper, but light-enough locking to sched_ctx counters */ + + /* Note that with 1 ctx we will default to the global context, + hence our counters are useless */ + if (_starpu_get_nsched_ctxs() > 1) + { + struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id); + struct starpu_sched_ctx_iterator it; + + workers->init_iterator_for_parallel_tasks(workers, &it, task); + while(workers->has_next(workers, &it)) + { + int worker = workers->get_next(workers, &it); + starpu_sched_ctx_list_task_counters_increment(sched_ctx_id, worker); + } + } +} + +void starpu_sched_ctx_list_task_counters_increment_all(struct starpu_task *task, unsigned sched_ctx_id) +{ + /* TODO: add proper, but light-enough locking to sched_ctx counters */ + + /* Note that with 1 ctx we will default to the global context, + hence our counters are useless */ + if (_starpu_get_nsched_ctxs() > 1) + { + struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id); + struct starpu_sched_ctx_iterator it; + + _starpu_sched_ctx_lock_write(sched_ctx_id); + workers->init_iterator_for_parallel_tasks(workers, &it, task); + while(workers->has_next(workers, &it)) + { + int worker = workers->get_next(workers, &it); + starpu_sched_ctx_list_task_counters_increment(sched_ctx_id, worker); + } + _starpu_sched_ctx_unlock_write(sched_ctx_id); + } +} + +void starpu_sched_ctx_list_task_counters_decrement_all_ctx_locked(struct starpu_task *task, unsigned sched_ctx_id) +{ + if (_starpu_get_nsched_ctxs() > 1) + { + struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id); + struct starpu_sched_ctx_iterator it; + workers->init_iterator_for_parallel_tasks(workers, &it, task); + while(workers->has_next(workers, &it)) + { + int workerid = workers->get_next(workers, &it); + struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); + if (worker->nsched_ctxs > 1) + { + starpu_worker_lock(workerid); + starpu_sched_ctx_list_task_counters_decrement(sched_ctx_id, workerid); + starpu_worker_unlock(workerid); + } + } + } +} + +void starpu_sched_ctx_list_task_counters_decrement_all(struct starpu_task *task, unsigned sched_ctx_id) +{ + if (_starpu_get_nsched_ctxs() > 1) + { + struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id); + struct starpu_sched_ctx_iterator it; + _starpu_sched_ctx_lock_write(sched_ctx_id); + workers->init_iterator_for_parallel_tasks(workers, &it, task); + while(workers->has_next(workers, &it)) + { + int workerid = workers->get_next(workers, &it); + struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); + if (worker->nsched_ctxs > 1) + { + starpu_worker_lock(workerid); + starpu_sched_ctx_list_task_counters_decrement(sched_ctx_id, workerid); + starpu_worker_unlock(workerid); + } + } + _starpu_sched_ctx_unlock_write(sched_ctx_id); + } +} + +void starpu_sched_ctx_list_task_counters_reset_all(struct starpu_task *task, unsigned sched_ctx_id) +{ + if (_starpu_get_nsched_ctxs() > 1) + { + _starpu_sched_ctx_lock_write(sched_ctx_id); + struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id); + struct starpu_sched_ctx_iterator it; + workers->init_iterator_for_parallel_tasks(workers, &it, task); + while(workers->has_next(workers, &it)) + { + int workerid = workers->get_next(workers, &it); + struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); + if (worker->nsched_ctxs > 1) + { + starpu_worker_lock(workerid); + starpu_sched_ctx_list_task_counters_reset(sched_ctx_id, workerid); + starpu_worker_unlock(workerid); + } + } + _starpu_sched_ctx_unlock_write(sched_ctx_id); + } +} + +static void _starpu_sched_ctx_block_workers_in_parallel(unsigned sched_ctx_id, unsigned all) +{ + struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); + int current_worker_id = starpu_worker_get_id(); + int master, temp_master = 0; + struct starpu_worker_collection *workers = sched_ctx->workers; + struct starpu_sched_ctx_iterator it; + + /* temporarily put a master if needed */ + if (sched_ctx->main_master == -1) + { + _starpu_sched_ctx_put_new_master(sched_ctx_id); + temp_master = 1; + } + master = sched_ctx->main_master; + + workers->init_iterator(workers, &it); + while(workers->has_next(workers, &it)) + { + int workerid = workers->get_next(workers, &it); + if(starpu_worker_get_type(workerid) == STARPU_CPU_WORKER + && (workerid != master || all) + && (current_worker_id == -1 || workerid != current_worker_id)) + { + struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); + STARPU_PTHREAD_MUTEX_LOCK_SCHED(&worker->sched_mutex); + _starpu_worker_request_blocking_in_parallel(worker); + STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&worker->sched_mutex); + } + } + + if (temp_master) + sched_ctx->main_master = -1; +} + +static void _starpu_sched_ctx_unblock_workers_in_parallel(unsigned sched_ctx_id, unsigned all) +{ + struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); + int current_worker_id = starpu_worker_get_id(); + int master, temp_master = 0; + struct starpu_worker_collection *workers = sched_ctx->workers; + struct starpu_sched_ctx_iterator it; + + /* temporarily put a master if needed */ + if (sched_ctx->main_master == -1) + { + _starpu_sched_ctx_put_new_master(sched_ctx_id); + temp_master = 1; + } + master = sched_ctx->main_master; + + workers->init_iterator(workers, &it); + while(workers->has_next(workers, &it)) + { + int workerid = workers->get_next(workers, &it); + if(starpu_worker_get_type(workerid) == STARPU_CPU_WORKER + && (workerid != master || all)) + { + if (current_worker_id == -1 || workerid != current_worker_id) + { + struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); + STARPU_PTHREAD_MUTEX_LOCK(&worker->sched_mutex); + _starpu_worker_request_unblocking_in_parallel(worker); + STARPU_PTHREAD_MUTEX_UNLOCK(&worker->sched_mutex); + } + } + } + + if (temp_master) + sched_ctx->main_master = -1; + + return; +} + +void* starpu_sched_ctx_exec_parallel_code(void* (*func)(void*), void* param, unsigned sched_ctx_id) +{ + _starpu_sched_ctx_block_workers_in_parallel(sched_ctx_id, 1); + + /* execute parallel code */ + void* ret = func(param); + + /* wake up starpu workers */ + _starpu_sched_ctx_unblock_workers_in_parallel(sched_ctx_id, 1); + return ret; +} + +static void _starpu_sched_ctx_update_parallel_workers_with(unsigned sched_ctx_id) +{ + struct _starpu_sched_ctx * sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); + + if(sched_ctx->sched_policy) + return; + + _starpu_sched_ctx_put_new_master(sched_ctx_id); + + if(!sched_ctx->awake_workers) + { + _starpu_sched_ctx_block_workers_in_parallel(sched_ctx_id, 0); + } +} + +static void _starpu_sched_ctx_update_parallel_workers_without(unsigned sched_ctx_id) +{ + struct _starpu_sched_ctx * sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); + + if(sched_ctx->sched_policy) + return; + + _starpu_sched_ctx_put_new_master(sched_ctx_id); + + if(!sched_ctx->awake_workers) + { + _starpu_sched_ctx_unblock_workers_in_parallel(sched_ctx_id, 0); + } +} + +void starpu_sched_ctx_get_available_cpuids(unsigned sched_ctx_id, int **cpuids, int *ncpuids) +{ + int current_worker_id = starpu_worker_get_id(); + struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); + struct starpu_worker_collection *workers = sched_ctx->workers; + _STARPU_MALLOC((*cpuids), workers->nworkers*sizeof(int)); + int w = 0; + + struct starpu_sched_ctx_iterator it; + + workers->init_iterator(workers, &it); + + while(workers->has_next(workers, &it)) + { + int workerid = workers->get_next(workers, &it); + int master = sched_ctx->main_master; + if(master == current_worker_id || workerid == current_worker_id || current_worker_id == -1) + { + (*cpuids)[w++] = starpu_worker_get_bindid(workerid); + } + } + *ncpuids = w; + return; +} + +static void _starpu_sched_ctx_put_new_master(unsigned sched_ctx_id) +{ + int *workerids; + struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); + unsigned nworkers = starpu_sched_ctx_get_workers_list_raw(sched_ctx_id, &workerids); + unsigned i; + + for (i=0; imain_master = workerids[i]; + break; + } + } + STARPU_ASSERT_MSG(iperf_arch; +} + +int starpu_sched_ctx_get_worker_rank(unsigned sched_ctx_id) +{ + int idx = 0; + int curr_workerid = starpu_worker_get_id(); + struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); + if(sched_ctx->sched_policy || !sched_ctx->awake_workers) + return -1; + struct starpu_worker_collection *workers = sched_ctx->workers; + + struct starpu_sched_ctx_iterator it; + + workers->init_iterator(workers, &it); + while(workers->has_next(workers, &it)) + { + int worker = workers->get_next(workers, &it); + if(worker == curr_workerid) + return idx; + idx++; + } + + return -1; +} + +void (*starpu_sched_ctx_get_sched_policy_callback(unsigned sched_ctx_id))(unsigned) +{ + struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); + return sched_ctx->callback_sched; +} + +unsigned starpu_sched_ctx_has_starpu_scheduler(unsigned sched_ctx_id, unsigned *awake_workers) +{ + struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); + *awake_workers = sched_ctx->awake_workers; + return sched_ctx->sched_policy != NULL; +} + +void *starpu_sched_ctx_get_user_data(unsigned sched_ctx_id) +{ + struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); + STARPU_ASSERT(sched_ctx != NULL); + return sched_ctx->user_data; +} + +void starpu_sched_ctx_set_user_data(unsigned sched_ctx_id, void* user_data) +{ + struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); + STARPU_ASSERT(sched_ctx != NULL); + sched_ctx->user_data = user_data; +} + +void _starpu_worker_apply_deferred_ctx_changes(void) +{ + int workerid = starpu_worker_get_id_check(); + struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); + struct _starpu_ctx_change_list *l = &worker->ctx_change_list; + STARPU_ASSERT(!_starpu_worker_sched_op_pending()); + while (!_starpu_ctx_change_list_empty(l)) + { + struct _starpu_ctx_change *chg = _starpu_ctx_change_list_pop_front(l); + STARPU_ASSERT(chg->workerids_to_change != NULL); + + if (chg->nworkers_to_notify) + { + STARPU_ASSERT(chg->workerids_to_notify != NULL); + notify_workers_about_changing_ctx_pending(chg->nworkers_to_notify, chg->workerids_to_notify); + } + else + { + STARPU_ASSERT(chg->workerids_to_notify == NULL); + notify_workers_about_changing_ctx_pending(chg->nworkers_to_change, chg->workerids_to_change); + } + _starpu_sched_ctx_lock_write(chg->sched_ctx_id); + switch (chg->op) + { + case ctx_change_add: + { + add_notified_workers(chg->workerids_to_change, chg->nworkers_to_change, chg->sched_ctx_id); + } + break; + case ctx_change_remove: + { + remove_notified_workers(chg->workerids_to_change, chg->nworkers_to_change, chg->sched_ctx_id); + { + int i; + for (i = 0; i < chg->nworkers_to_change; i++) + { + struct _starpu_worker *w = + _starpu_get_worker_struct(chg->workerids_to_change[i]); + if(w->removed_from_ctx[chg->sched_ctx_id] == 1 + && w->shares_tasks_lists[chg->sched_ctx_id] == 1) + { + _starpu_worker_gets_out_of_ctx(chg->sched_ctx_id, w); + w->removed_from_ctx[chg->sched_ctx_id] = 0; + } + } + } + } + break; + default: + STARPU_ASSERT_MSG(0, "invalid ctx change opcode\n"); + } + if (chg->nworkers_to_notify) + { + notify_workers_about_changing_ctx_done(chg->nworkers_to_notify, chg->workerids_to_notify); + } + else + { + notify_workers_about_changing_ctx_done(chg->nworkers_to_change, chg->workerids_to_change); + } + + _starpu_sched_ctx_unlock_write(chg->sched_ctx_id); + free(chg->workerids_to_notify); + free(chg->workerids_to_change); + _starpu_ctx_change_delete(chg); + } + + +} + +/* + * TODO: verify starpu_sched_ctx_create_inside_interval correctness before re-enabling the functions below + */ +#if 0 +static void _get_workers(int min, int max, int *workers, int *nw, enum starpu_worker_archtype arch, unsigned allow_overlap) +{ + int pus[max]; + int npus = 0; + int i; + + struct _starpu_machine_config *config = _starpu_get_machine_config(); + if(config->topology.nsched_ctxs == 1) + { + /*we have all available resources */ + npus = _starpu_worker_get_nids_by_type(arch, pus, max); +/*TODO: hierarchical ctxs: get max good workers: close one to another */ + for(i = 0; i < npus; i++) + workers[(*nw)++] = pus[i]; + } + else + { + unsigned enough_resources = 0; + npus = _starpu_worker_get_nids_ctx_free_by_type(arch, pus, max); + + for(i = 0; i < npus; i++) + workers[(*nw)++] = pus[i]; + + if(npus == max) + /*we have enough available resources */ + enough_resources = 1; + + if(!enough_resources && npus >= min) + /*we have enough available resources */ + enough_resources = 1; + + if(!enough_resources) + { + /* try to get resources from ctx who have more than the min of workers they need */ + int s; + for(s = 1; s < STARPU_NMAX_SCHED_CTXS; s++) + { + if(config->sched_ctxs[s].id != STARPU_NMAX_SCHED_CTXS) + { + int _npus = 0; + int _pus[STARPU_NMAXWORKERS]; + _npus = _starpu_get_workers_of_sched_ctx(config->sched_ctxs[s].id, _pus, arch); + int ctx_min = arch == STARPU_CPU_WORKER ? config->sched_ctxs[s].min_ncpus : config->sched_ctxs[s].min_ngpus; + if(_npus > ctx_min) + { + int n=0; + if(npus < min) + { + n = (_npus - ctx_min) > (min - npus) ? min - npus : (_npus - ctx_min); + npus += n; + } +/*TODO: hierarchical ctxs: get n good workers: close to the other ones I already assigned to the ctx */ + for(i = 0; i < n; i++) + workers[(*nw)++] = _pus[i]; + starpu_sched_ctx_remove_workers(_pus, n, config->sched_ctxs[s].id); + } + } + } + + if(npus >= min) + enough_resources = 1; + } + + if(!enough_resources) + { + /* if there is no available workers to satisfy the minimum required + give them workers proportional to their requirements*/ + int global_npus = starpu_worker_get_count_by_type(arch); + + int req_npus = 0; + + int s; + for(s = 1; s < STARPU_NMAX_SCHED_CTXS; s++) + if(config->sched_ctxs[s].id != STARPU_NMAX_SCHED_CTXS) + req_npus += arch == STARPU_CPU_WORKER ? config->sched_ctxs[s].min_ncpus : config->sched_ctxs[s].min_ngpus; + + req_npus += min; + + for(s = 1; s < STARPU_NMAX_SCHED_CTXS; s++) + { + if(config->sched_ctxs[s].id != STARPU_NMAX_SCHED_CTXS) + { + int ctx_min = arch == STARPU_CPU_WORKER ? config->sched_ctxs[s].min_ncpus : config->sched_ctxs[s].min_ngpus; + double needed_npus = ((double)ctx_min * (double)global_npus) / (double)req_npus; + + int _npus = 0; + int _pus[STARPU_NMAXWORKERS]; + + _npus = _starpu_get_workers_of_sched_ctx(config->sched_ctxs[s].id, _pus, arch); + if(needed_npus < (double)_npus) + { + double npus_to_rem = (double)_npus - needed_npus; + int x = floor(npus_to_rem); + double x_double = (double)x; + double diff = npus_to_rem - x_double; + int npus_to_remove = diff >= 0.5 ? x+1 : x; + + int pus_to_remove[npus_to_remove]; + int c = 0; + +/*TODO: hierarchical ctxs: get npus_to_remove good workers: close to the other ones I already assigned to the ctx */ + for(i = _npus-1; i >= (_npus - npus_to_remove); i--) + { + workers[(*nw)++] = _pus[i]; + pus_to_remove[c++] = _pus[i]; + } + if(!allow_overlap) + starpu_sched_ctx_remove_workers(pus_to_remove, npus_to_remove, config->sched_ctxs[s].id); + } + + } + } + } + } +} + +unsigned starpu_sched_ctx_create_inside_interval(const char *policy_name, const char *sched_ctx_name, + int min_ncpus, int max_ncpus, int min_ngpus, int max_ngpus, + unsigned allow_overlap) +{ + struct _starpu_machine_config *config = _starpu_get_machine_config(); + struct starpu_sched_policy *selected_policy = _starpu_select_sched_policy(config, policy_name); + + struct _starpu_sched_ctx *sched_ctx = NULL; + int workers[max_ncpus + max_ngpus]; + int nw = 0; + STARPU_PTHREAD_MUTEX_LOCK(&sched_ctx_manag); + _get_workers(min_ncpus, max_ncpus, workers, &nw, STARPU_CPU_WORKER, allow_overlap); + _get_workers(min_ngpus, max_ngpus, workers, &nw, STARPU_CUDA_WORKER, allow_overlap); + STARPU_PTHREAD_MUTEX_UNLOCK(&sched_ctx_manag); + int i; + _STARPU_DEBUG("%d: ", nw); + for(i = 0; i < nw; i++) + _STARPU_DEBUG_NO_HEADER("%d ", workers[i]); + _STARPU_DEBUG_NO_HEADER("\n"); + sched_ctx = _starpu_create_sched_ctx(selected_policy, workers, nw, 0, sched_ctx_name, 0, 0, 0, 0, 1, NULL, NULL,0, NULL, 0); + sched_ctx->min_ncpus = min_ncpus; + sched_ctx->max_ncpus = max_ncpus; + sched_ctx->min_ngpus = min_ngpus; + sched_ctx->max_ngpus = max_ngpus; + int *added_workerids; + unsigned nw_ctx = starpu_sched_ctx_get_workers_list(sched_ctx->id, &added_workerids); +#warning TODO: verify call below, shouldn t it be _starpu_update_workers_with_ctx? + _starpu_update_workers_without_ctx(added_workerids, nw_ctx, sched_ctx->id, 0); + free(added_workerids); +#ifdef STARPU_USE_SC_HYPERVISOR + sched_ctx->perf_counters = NULL; +#endif + return sched_ctx->id; + +} +#endif diff --git a/src/core/sched_ctx.h b/src/core/sched_ctx.h new file mode 100644 index 0000000..12d1742 --- /dev/null +++ b/src/core/sched_ctx.h @@ -0,0 +1,325 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2016-2016 Uppsala University + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __SCHED_CONTEXT_H__ +#define __SCHED_CONTEXT_H__ + +/** @file */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "sched_ctx_list.h" + +#ifdef STARPU_HAVE_HWLOC +#include +#endif + +#pragma GCC visibility push(hidden) + +#define NO_RESIZE -1 +#define REQ_RESIZE 0 +#define DO_RESIZE 1 + +#define STARPU_GLOBAL_SCHED_CTX 0 +#define STARPU_NMAXSMS 13 +struct _starpu_sched_ctx +{ + /** id of the context used in user mode*/ + unsigned id; + + /** boolean indicating whether the scheduling_ctx will be considered for scheduling (1) or not (0)*/ + unsigned do_schedule; + + /** name of context */ + const char *name; + + /** policy of the context */ + struct starpu_sched_policy *sched_policy; + + /** data necessary for the policy */ + void *policy_data; + + /** pointer for application use */ + void *user_data; + + struct starpu_worker_collection *workers; + + /** we keep an initial sched which we never delete */ + unsigned is_initial_sched; + + /** wait for the tasks submitted to the context to be executed */ + struct _starpu_barrier_counter tasks_barrier; + + /** wait for the tasks ready of the context to be executed */ + struct _starpu_barrier_counter ready_tasks_barrier; + + /** amount of ready flops in a context */ + double ready_flops; + + /** Iteration number, as advertised by application */ + long iterations[2]; + int iteration_level; + + /*ready tasks that couldn't be pushed because the ctx has no workers*/ + struct starpu_task_list empty_ctx_tasks; + + /*ready tasks that couldn't be pushed because the the window of tasks was already full*/ + struct starpu_task_list waiting_tasks; + + /** min CPUs to execute*/ + int min_ncpus; + + /** max CPUs to execute*/ + int max_ncpus; + + /** min GPUs to execute*/ + int min_ngpus; + + /** max GPUs to execute*/ + int max_ngpus; + + /** in case we delete the context leave resources to the inheritor*/ + unsigned inheritor; + + /** indicates whether the application finished submitting tasks + to this context*/ + unsigned finished_submit; + + /** By default we have a binary type of priority: either a task is a priority + * task (level 1) or it is not (level 0). */ + int min_priority; + int max_priority; + int min_priority_is_set; + int max_priority_is_set; + + /** hwloc tree structure of workers */ +#ifdef STARPU_HAVE_HWLOC + hwloc_bitmap_t hwloc_workers_set; +#endif + +#ifdef STARPU_USE_SC_HYPERVISOR + /** a structure containing a series of performance counters determining the resize procedure */ + struct starpu_sched_ctx_performance_counters *perf_counters; +#endif //STARPU_USE_SC_HYPERVISOR + + /** callback called when the context finished executed its submitted tasks */ + void (*close_callback)(unsigned sched_ctx_id, void* args); + void *close_args; + + /** value placing the contexts in their hierarchy */ + unsigned hierarchy_level; + + /** if we execute non-StarPU code inside the context + we have a single master worker that stays awake, + if not master is -1 */ + int main_master; + + /** ctx nesting the current ctx */ + unsigned nesting_sched_ctx; + + /** perf model for the device comb of the ctx */ + struct starpu_perfmodel_arch perf_arch; + + /** For parallel workers, say whether it is viewed as sequential or not. This + is a helper for the prologue code. */ + unsigned parallel_view; + + /** for ctxs without policy: flag to indicate that we want to get + the threads to sleep in order to replace them with other threads or leave + them awake & use them in the parallel code*/ + unsigned awake_workers; + + /** callback function called when initializing the scheduler */ + void (*callback_sched)(unsigned); + + int sub_ctxs[STARPU_NMAXWORKERS]; + int nsub_ctxs; + + /** nr of SMs assigned to this ctx if we partition gpus*/ + int nsms; + int sms_start_idx; + int sms_end_idx; + + int stream_worker; + + starpu_pthread_rwlock_t rwlock; + starpu_pthread_t lock_write_owner; +}; + +/** per-worker list of deferred ctx_change ops */ +LIST_TYPE(_starpu_ctx_change, + int sched_ctx_id; + int op; + int nworkers_to_notify; + int *workerids_to_notify; + int nworkers_to_change; + int *workerids_to_change; +); + +struct _starpu_machine_config; + +/** init sched_ctx_id of all contextes*/ +void _starpu_init_all_sched_ctxs(struct _starpu_machine_config *config); + +/** allocate all structures belonging to a context */ +struct _starpu_sched_ctx* _starpu_create_sched_ctx(struct starpu_sched_policy *policy, int *workerid, int nworkerids, unsigned is_init_sched, const char *sched_name, + int min_prio_set, int min_prio, + int max_prio_set, int max_prio, unsigned awake_workers, void (*sched_policy_callback)(unsigned), void *user_data, + int nsub_ctxs, int *sub_ctxs, int nsms); + +/** delete all sched_ctx */ +void _starpu_delete_all_sched_ctxs(); + +/** This function waits until all the tasks that were already submitted to a specific + * context have been executed. */ +int _starpu_wait_for_all_tasks_of_sched_ctx(unsigned sched_ctx_id); + +/** This function waits until at most n tasks are still submitted. */ +int _starpu_wait_for_n_submitted_tasks_of_sched_ctx(unsigned sched_ctx_id, unsigned n); + +/** In order to implement starpu_wait_for_all_tasks_of_ctx, we keep track of the number of + * task currently submitted to the context */ +void _starpu_decrement_nsubmitted_tasks_of_sched_ctx(unsigned sched_ctx_id); +void _starpu_increment_nsubmitted_tasks_of_sched_ctx(unsigned sched_ctx_id); +int _starpu_get_nsubmitted_tasks_of_sched_ctx(unsigned sched_ctx_id); +int _starpu_check_nsubmitted_tasks_of_sched_ctx(unsigned sched_ctx_id); + +void _starpu_decrement_nready_tasks_of_sched_ctx(unsigned sched_ctx_id, double ready_flops); +unsigned _starpu_increment_nready_tasks_of_sched_ctx(unsigned sched_ctx_id, double ready_flops, struct starpu_task *task); +int _starpu_wait_for_no_ready_of_sched_ctx(unsigned sched_ctx_id); + +/** Get workers belonging to a certain context, it returns the number + * of workers take care: no mutex taken, the list of workers might not + * be updated + */ +int _starpu_get_workers_of_sched_ctx(unsigned sched_ctx_id, int *pus, enum starpu_worker_archtype arch); + +/** Let the worker know it does not belong to the context and that it + * should stop poping from it + */ +void _starpu_worker_gets_out_of_ctx(unsigned sched_ctx_id, struct _starpu_worker *worker); + +/** Check if the worker belongs to another sched_ctx */ +unsigned _starpu_worker_belongs_to_a_sched_ctx(int workerid, unsigned sched_ctx_id); + +/** indicates whether this worker should go to sleep or not (if it is + * the last one awake in a context he should better keep awake) + */ +unsigned _starpu_sched_ctx_last_worker_awake(struct _starpu_worker *worker); + +/** If starpu_sched_ctx_set_context() has been called, returns the context + * id set by its last call, or the id of the initial context */ +unsigned _starpu_sched_ctx_get_current_context() STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; + +/** verify that some worker can execute a certain task */ +int _starpu_workers_able_to_execute_task(struct starpu_task *task, struct _starpu_sched_ctx *sched_ctx); + +unsigned _starpu_sched_ctx_allow_hypervisor(unsigned sched_ctx_id); + +struct starpu_perfmodel_arch * _starpu_sched_ctx_get_perf_archtype(unsigned sched_ctx); +#ifdef STARPU_USE_SC_HYPERVISOR +/** Notifies the hypervisor that a tasks was poped from the workers' list */ +void _starpu_sched_ctx_post_exec_task_cb(int workerid, struct starpu_task *task, size_t data_size, uint32_t footprint); + +#endif //STARPU_USE_SC_HYPERVISOR + +void starpu_sched_ctx_add_combined_workers(int *combined_workers_to_add, unsigned n_combined_workers_to_add, unsigned sched_ctx_id); + +/** if the worker is the master of a parallel context, and the job is meant to be executed on this parallel context, return a pointer to the context */ +struct _starpu_sched_ctx *__starpu_sched_ctx_get_sched_ctx_for_worker_and_job(struct _starpu_worker *worker, struct _starpu_job *j); + +#define _starpu_sched_ctx_get_sched_ctx_for_worker_and_job(w,j) \ + (_starpu_get_nsched_ctxs() <= 1 ? _starpu_get_sched_ctx_struct(0) : __starpu_sched_ctx_get_sched_ctx_for_worker_and_job((w),(j))) + +static inline struct _starpu_sched_ctx *_starpu_get_sched_ctx_struct(unsigned id); + +static inline int _starpu_sched_ctx_check_write_locked(unsigned sched_ctx_id) +{ + struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); + return starpu_pthread_equal(sched_ctx->lock_write_owner, starpu_pthread_self()); +} +#define STARPU_SCHED_CTX_CHECK_LOCK(sched_ctx_id) STARPU_ASSERT(_starpu_sched_ctx_check_write_locked((sched_ctx_id))) + +static inline void _starpu_sched_ctx_lock_write(unsigned sched_ctx_id) +{ + STARPU_ASSERT(sched_ctx_id <= STARPU_NMAX_SCHED_CTXS); + struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); + STARPU_HG_DISABLE_CHECKING(sched_ctx->lock_write_owner); + STARPU_ASSERT(!starpu_pthread_equal(sched_ctx->lock_write_owner, starpu_pthread_self())); + STARPU_HG_ENABLE_CHECKING(sched_ctx->lock_write_owner); + STARPU_PTHREAD_RWLOCK_WRLOCK(&sched_ctx->rwlock); + sched_ctx->lock_write_owner = starpu_pthread_self(); +} + +static inline void _starpu_sched_ctx_unlock_write(unsigned sched_ctx_id) +{ + STARPU_ASSERT(sched_ctx_id <= STARPU_NMAX_SCHED_CTXS); + struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); + STARPU_HG_DISABLE_CHECKING(sched_ctx->lock_write_owner); + STARPU_ASSERT(starpu_pthread_equal(sched_ctx->lock_write_owner, starpu_pthread_self())); + memset(&sched_ctx->lock_write_owner, 0, sizeof(sched_ctx->lock_write_owner)); + STARPU_HG_ENABLE_CHECKING(sched_ctx->lock_write_owner); + STARPU_PTHREAD_RWLOCK_UNLOCK(&sched_ctx->rwlock); +} + +static inline void _starpu_sched_ctx_lock_read(unsigned sched_ctx_id) +{ + STARPU_ASSERT(sched_ctx_id <= STARPU_NMAX_SCHED_CTXS); + struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); + STARPU_HG_DISABLE_CHECKING(sched_ctx->lock_write_owner); + STARPU_ASSERT(!starpu_pthread_equal(sched_ctx->lock_write_owner, starpu_pthread_self())); + STARPU_HG_ENABLE_CHECKING(sched_ctx->lock_write_owner); + STARPU_PTHREAD_RWLOCK_RDLOCK(&sched_ctx->rwlock); +} + +static inline void _starpu_sched_ctx_unlock_read(unsigned sched_ctx_id) +{ + STARPU_ASSERT(sched_ctx_id <= STARPU_NMAX_SCHED_CTXS); + struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); + STARPU_HG_DISABLE_CHECKING(sched_ctx->lock_write_owner); + STARPU_ASSERT(!starpu_pthread_equal(sched_ctx->lock_write_owner, starpu_pthread_self())); + STARPU_HG_ENABLE_CHECKING(sched_ctx->lock_write_owner); + STARPU_PTHREAD_RWLOCK_UNLOCK(&sched_ctx->rwlock); +} + +static inline unsigned _starpu_sched_ctx_worker_is_master_for_child_ctx(unsigned sched_ctx_id, unsigned workerid, struct starpu_task *task) +{ + unsigned child_sched_ctx = starpu_sched_ctx_worker_is_master_for_child_ctx(workerid, sched_ctx_id); + if(child_sched_ctx != STARPU_NMAX_SCHED_CTXS) + { + starpu_sched_ctx_move_task_to_ctx_locked(task, child_sched_ctx, 1); + starpu_sched_ctx_revert_task_counters_ctx_locked(sched_ctx_id, task->flops); + return 1; + } + return 0; +} + +/** Go through the list of deferred ctx changes of the current worker and apply + * any ctx change operation found until the list is empty */ +void _starpu_worker_apply_deferred_ctx_changes(void); + +#pragma GCC visibility pop + +#endif // __SCHED_CONTEXT_H__ diff --git a/src/core/sched_ctx_list.c b/src/core/sched_ctx_list.c new file mode 100644 index 0000000..32ee843 --- /dev/null +++ b/src/core/sched_ctx_list.c @@ -0,0 +1,433 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "sched_ctx_list.h" + +struct _starpu_sched_ctx_elt* _starpu_sched_ctx_elt_find(struct _starpu_sched_ctx_list *list, + unsigned sched_ctx) +{ + struct _starpu_sched_ctx_list *l = NULL; + struct _starpu_sched_ctx_elt *e = NULL; + unsigned found = 0; + + for (l = list; l && !found; l=l->next) + { + e=l->head; //Go in a circle once before stopping + do + { + if (e->sched_ctx == sched_ctx) + { + found = 1; + break; + } + e = e->next; + } + while (e != l->head); + } + + return found ? e : NULL; +} + +void _starpu_sched_ctx_elt_init(struct _starpu_sched_ctx_elt *elt, unsigned sched_ctx) +{ + elt->sched_ctx = sched_ctx; + elt->task_number = 0; + elt->last_poped = 0; + elt->parent = NULL; + elt->next = NULL; + elt->prev = NULL; +} + +void _starpu_sched_ctx_elt_ensure_consistency(struct _starpu_sched_ctx_list *list, + unsigned sched_ctx) +{ + struct _starpu_sched_ctx_elt *elt = _starpu_sched_ctx_elt_find(list, sched_ctx); + if (elt && elt->task_number>0) + elt->task_number = 0; +} + +/* Adds a new element after the head of the given list. */ +struct _starpu_sched_ctx_elt* _starpu_sched_ctx_elt_add_after(struct _starpu_sched_ctx_list *list, + unsigned sched_ctx) +{ + struct _starpu_sched_ctx_elt *head, *next; + struct _starpu_sched_ctx_elt *elt; + _STARPU_MALLOC(elt, sizeof(struct _starpu_sched_ctx_elt)); + + _starpu_sched_ctx_elt_init(elt, sched_ctx); + elt->parent = list; + + head = list->head; + if (head != NULL) + { + next = head->next; + head->next = elt; + elt->prev = head; + + /** We know next != NULL since it is at least head **/ + elt->next = next; + next->prev = elt; + } + else + { + elt->next = elt; + elt->prev = elt; + list->head = elt; + } + + return elt; +} + +/* Adds a new element before the head of the given list. */ +struct _starpu_sched_ctx_elt* _starpu_sched_ctx_elt_add_before(struct _starpu_sched_ctx_list *list, + unsigned sched_ctx) +{ + struct _starpu_sched_ctx_elt *head, *prev; + struct _starpu_sched_ctx_elt *elt; + _STARPU_MALLOC(elt, sizeof(struct _starpu_sched_ctx_elt)); + + _starpu_sched_ctx_elt_init(elt, sched_ctx); + elt->parent = list; + + head = list->head; + if (head != NULL) + { + prev = head->prev; + head->prev = elt; + elt->next = head; + + elt->prev = prev; + prev->next = elt; + } + else + { + elt->next = elt; + elt->prev = elt; + list->head = elt; + } + return elt; +} + +struct _starpu_sched_ctx_elt* _starpu_sched_ctx_elt_add(struct _starpu_sched_ctx_list *list, + unsigned sched_ctx) +{ + return _starpu_sched_ctx_elt_add_after(list, sched_ctx); +} + +/* Remove elt from list */ +void _starpu_sched_ctx_elt_remove(struct _starpu_sched_ctx_list *list, + struct _starpu_sched_ctx_elt *elt) +{ + elt->prev->next = elt->next; + elt->next->prev = elt->prev; + + if (elt->next == elt) //singleton + list->head = NULL; + else if (elt->next != elt && list->head == elt) + list->head = elt->next; + + free(elt); + return; +} + +int _starpu_sched_ctx_elt_exists(struct _starpu_sched_ctx_list *list, + unsigned sched_ctx) +{ + struct _starpu_sched_ctx_elt *e; + e = _starpu_sched_ctx_elt_find(list, sched_ctx); + return (e == NULL) ? 0 : 1; +} + +int _starpu_sched_ctx_elt_get_priority(struct _starpu_sched_ctx_list *list, + unsigned sched_ctx) +{ + struct _starpu_sched_ctx_elt *e; + e = _starpu_sched_ctx_elt_find(list, sched_ctx); + return (e == NULL) ? 0 : e->parent->priority; +} + +struct _starpu_sched_ctx_list* _starpu_sched_ctx_list_find(struct _starpu_sched_ctx_list *list, + unsigned prio) +{ + struct _starpu_sched_ctx_list *l = NULL; + + for (l = list; l != NULL ; l=l->next) + { + if (l->priority == prio) + break; + } + + return l; +} + +/* Adds sched_ctx in a priority list. We consider that we don't add two times + * the same sched_ctx. Returns head of list. */ +struct _starpu_sched_ctx_elt* _starpu_sched_ctx_list_add_prio(struct _starpu_sched_ctx_list **list, + unsigned prio, unsigned sched_ctx) +{ + struct _starpu_sched_ctx_list *parent_list = NULL, *prev = NULL, *last = NULL; + struct _starpu_sched_ctx_list *l; + + for (l = *list; l != NULL; l=l->next) + { + if (l->priority <= prio) + break; + last = l; + } + + if (l != NULL && l->priority == prio) + { + parent_list = l; + } + else //l's priority is inferior or inexistent, add before + { + _STARPU_MALLOC(parent_list, sizeof(struct _starpu_sched_ctx_list)); + parent_list->priority = prio; + parent_list->next = l; + parent_list->head = NULL; + parent_list->prev = NULL; + if (l != NULL) + { + prev = l->prev; + l->prev = parent_list; + if (prev != NULL) + { + prev->next = parent_list; + parent_list->prev = prev; + } + else + { + *list = parent_list; + } + } + else + { + if (last == NULL) + { + *list = parent_list; + } + else + { + last->next = parent_list; + parent_list->prev = last; + } + } + } + + return _starpu_sched_ctx_elt_add(parent_list, sched_ctx); +} + +int _starpu_sched_ctx_list_add(struct _starpu_sched_ctx_list **list, + unsigned sched_ctx) +{ + return _starpu_sched_ctx_list_add_prio(list, 0, sched_ctx) != NULL ? 0 : -1; +} + +void _starpu_sched_ctx_list_remove_elt(struct _starpu_sched_ctx_list **list, + struct _starpu_sched_ctx_elt *rm) +{ + struct _starpu_sched_ctx_list *parent; + + parent = rm->parent; + + _starpu_sched_ctx_elt_remove(parent, rm); + + /* Automatically clean up useless prio list */ + if (parent->head == NULL) + { + if (parent->prev == NULL) + { + *list = parent->next; + if (parent->next != NULL) + parent->next->prev = NULL; + } + else + { + parent->prev->next = parent->next; + if (parent->next != NULL) + parent->next->prev = parent->prev; + } + free(parent); + parent = NULL; + } + return; +} + +/* Searches for a context and remove it */ +int _starpu_sched_ctx_list_remove(struct _starpu_sched_ctx_list **list, + unsigned sched_ctx) +{ + struct _starpu_sched_ctx_elt *rm; + rm = _starpu_sched_ctx_elt_find(*list, sched_ctx); + + if (rm == NULL) + return -1; + + _starpu_sched_ctx_list_remove_elt(list, rm); + return 0; +} + +int _starpu_sched_ctx_list_move(struct _starpu_sched_ctx_list **list, + unsigned sched_ctx, unsigned prio_to) +{ + struct _starpu_sched_ctx_elt *elt = _starpu_sched_ctx_elt_find(*list, sched_ctx); + long task_number = 0; + if (elt == NULL) + return -1; + + task_number = elt->task_number; + _starpu_sched_ctx_list_remove_elt(list, elt); + elt = _starpu_sched_ctx_list_add_prio(list, prio_to, sched_ctx); + elt->task_number = task_number; + + return 0; +} + +int _starpu_sched_ctx_list_exists(struct _starpu_sched_ctx_list *list, + unsigned prio) +{ + struct _starpu_sched_ctx_list *l; + l = _starpu_sched_ctx_list_find(list, prio); + return ((l == NULL && list->priority == prio) || l != NULL) ? 1 : 0; +} + +void _starpu_sched_ctx_list_remove_all(struct _starpu_sched_ctx_list *list) +{ + while (list->head != NULL) + _starpu_sched_ctx_elt_remove(list, list->head); + + free(list); +} + +void _starpu_sched_ctx_list_delete(struct _starpu_sched_ctx_list **list) +{ + while(*list) + { + struct _starpu_sched_ctx_list *next = (*list)->next; + _starpu_sched_ctx_list_remove_all(*list); + *list = NULL; + if(next) + *list = next; + } +} + +int _starpu_sched_ctx_list_iterator_init(struct _starpu_sched_ctx_list *list, + struct _starpu_sched_ctx_list_iterator *it) +{ + it->list_head = list; + it->cursor = NULL; + + return 0; +} + +int _starpu_sched_ctx_list_iterator_has_next(struct _starpu_sched_ctx_list_iterator *it) +{ + if (it->cursor == NULL) + { + if (it->list_head != NULL) + return it->list_head->head != NULL; + else + return 0; + } + else + { + struct _starpu_sched_ctx_list *parent = it->cursor->parent; + if (it->cursor->next == parent->head) + return parent->next != NULL; + } + + return 1; +} + +struct _starpu_sched_ctx_elt* _starpu_sched_ctx_list_iterator_get_next(struct _starpu_sched_ctx_list_iterator *it) +{ + struct _starpu_sched_ctx_elt *ret=NULL, *current; + struct _starpu_sched_ctx_list *parent; + current = it->cursor; + + if (current != NULL) + { + parent = it->cursor->parent; + if (current->next == parent->head) + { + if (parent->next != NULL) + { + it->cursor = parent->next->head; + ret = it->cursor; + } + else + { + /* if everything fails (e.g. worker removed from ctx since related has_next call) + just return head, it'll save us a synchro */ + it->cursor = NULL; + ret = it->list_head->head; + } + } + else + { + it->cursor = current->next; + ret = it->cursor; + } + } + else + { + it->cursor = it->list_head->head; + ret = it->cursor; + } + + return ret; +} + +int _starpu_sched_ctx_list_push_event(struct _starpu_sched_ctx_list *list, unsigned sched_ctx) +{ + struct _starpu_sched_ctx_elt *elt = _starpu_sched_ctx_elt_find(list, sched_ctx); + if (elt == NULL) + return -1; + + elt->task_number++; + + return 0; +} + + +int _starpu_sched_ctx_list_pop_event(struct _starpu_sched_ctx_list *list, unsigned sched_ctx) +{ + struct _starpu_sched_ctx_elt *elt = _starpu_sched_ctx_elt_find(list, sched_ctx); + if (elt == NULL) + return -1; + + elt->task_number--; + + /** Balance circular lists **/ + elt->parent->head = elt->next; + + return 0; +} + +int _starpu_sched_ctx_list_pop_all_event(struct _starpu_sched_ctx_list *list, unsigned sched_ctx) +{ + struct _starpu_sched_ctx_elt *elt = _starpu_sched_ctx_elt_find(list, sched_ctx); + if (elt == NULL) + return -1; + + elt->task_number = 0; + + /** Balance circular lists **/ + elt->parent->head = elt->next; + + return 0; +} diff --git a/src/core/sched_ctx_list.h b/src/core/sched_ctx_list.h new file mode 100644 index 0000000..2bcb615 --- /dev/null +++ b/src/core/sched_ctx_list.h @@ -0,0 +1,86 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __SCHED_CONTEXT_LIST_H__ +#define __SCHED_CONTEXT_LIST_H__ + +#pragma GCC visibility push(hidden) + +/** @file */ + +/** Represents a non circular list of priorities and contains a list of sched context */ +struct _starpu_sched_ctx_elt; +struct _starpu_sched_ctx_list +{ + struct _starpu_sched_ctx_list *prev; + struct _starpu_sched_ctx_list *next; + struct _starpu_sched_ctx_elt *head; + unsigned priority; +}; + +/** Represents a circular list of sched context. */ +struct _starpu_sched_ctx_elt +{ + struct _starpu_sched_ctx_elt *prev; + struct _starpu_sched_ctx_elt *next; + struct _starpu_sched_ctx_list *parent; + unsigned sched_ctx; + long task_number; + unsigned last_poped; +}; + +struct _starpu_sched_ctx_list_iterator +{ + struct _starpu_sched_ctx_list *list_head; + struct _starpu_sched_ctx_elt *cursor; +}; + +/** Element (sched_ctx) level operations */ +struct _starpu_sched_ctx_elt* _starpu_sched_ctx_elt_find(struct _starpu_sched_ctx_list *list, unsigned sched_ctx) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; +void _starpu_sched_ctx_elt_ensure_consistency(struct _starpu_sched_ctx_list *list, unsigned sched_ctx) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; +void _starpu_sched_ctx_elt_init(struct _starpu_sched_ctx_elt *elt, unsigned sched_ctx) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; +struct _starpu_sched_ctx_elt* _starpu_sched_ctx_elt_add_after(struct _starpu_sched_ctx_list *list, unsigned sched_ctx) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; +struct _starpu_sched_ctx_elt* _starpu_sched_ctx_elt_add_before(struct _starpu_sched_ctx_list *list, unsigned sched_ctx) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; +struct _starpu_sched_ctx_elt* _starpu_sched_ctx_elt_add(struct _starpu_sched_ctx_list *list, unsigned sched_ctx) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; +void _starpu_sched_ctx_elt_remove(struct _starpu_sched_ctx_list *list, struct _starpu_sched_ctx_elt *elt) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; +int _starpu_sched_ctx_elt_exists(struct _starpu_sched_ctx_list *list, unsigned sched_ctx) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; +int _starpu_sched_ctx_elt_get_priority(struct _starpu_sched_ctx_list *list, unsigned sched_ctx) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; + + +/** List (priority) level operations */ +struct _starpu_sched_ctx_list* _starpu_sched_ctx_list_find(struct _starpu_sched_ctx_list *list, unsigned prio) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; +struct _starpu_sched_ctx_elt* _starpu_sched_ctx_list_add_prio(struct _starpu_sched_ctx_list **list, unsigned prio, unsigned sched_ctx) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; +int _starpu_sched_ctx_list_add(struct _starpu_sched_ctx_list **list, unsigned sched_ctx) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; +void _starpu_sched_ctx_list_remove_elt(struct _starpu_sched_ctx_list **list, struct _starpu_sched_ctx_elt *rm) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; +int _starpu_sched_ctx_list_remove(struct _starpu_sched_ctx_list **list, unsigned sched_ctx) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; +int _starpu_sched_ctx_list_move(struct _starpu_sched_ctx_list **list, unsigned sched_ctx, unsigned prio_to) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; +int _starpu_sched_ctx_list_exists(struct _starpu_sched_ctx_list *list, unsigned prio) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; +void _starpu_sched_ctx_list_remove_all(struct _starpu_sched_ctx_list *list) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; +void _starpu_sched_ctx_list_delete(struct _starpu_sched_ctx_list **list) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; + +/** Task number management */ +int _starpu_sched_ctx_list_push_event(struct _starpu_sched_ctx_list *list, unsigned sched_ctx); +int _starpu_sched_ctx_list_pop_event(struct _starpu_sched_ctx_list *list, unsigned sched_ctx); +int _starpu_sched_ctx_list_pop_all_event(struct _starpu_sched_ctx_list *list, unsigned sched_ctx); + +/** Iterator operations */ +int _starpu_sched_ctx_list_iterator_init(struct _starpu_sched_ctx_list *list, struct _starpu_sched_ctx_list_iterator *it) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; +int _starpu_sched_ctx_list_iterator_has_next(struct _starpu_sched_ctx_list_iterator *it) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; +struct _starpu_sched_ctx_elt* _starpu_sched_ctx_list_iterator_get_next(struct _starpu_sched_ctx_list_iterator *it) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; + +#pragma GCC visibility pop + +#endif // __SCHED_CONTEXT_H__ diff --git a/src/core/sched_policy.c b/src/core/sched_policy.c new file mode 100644 index 0000000..16e4a4f --- /dev/null +++ b/src/core/sched_policy.c @@ -0,0 +1,1280 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2016-2016 Uppsala University + * Copyright (C) 2013-2013 Thibaut Lambert + * Copyright (C) 2013-2013 Simon Archipoff + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef HAVE_DLOPEN +#include +#endif + +static int use_prefetch = 0; +static double idle[STARPU_NMAXWORKERS]; +static double idle_start[STARPU_NMAXWORKERS]; + +long _starpu_task_break_on_push = -1; +long _starpu_task_break_on_sched = -1; +long _starpu_task_break_on_pop = -1; +long _starpu_task_break_on_exec = -1; +static const char *starpu_idle_file; +static void *dl_sched_handle = NULL; +static const char *sched_lib = NULL; + +void _starpu_sched_init(void) +{ + _starpu_task_break_on_push = starpu_getenv_number_default("STARPU_TASK_BREAK_ON_PUSH", -1); + _starpu_task_break_on_sched = starpu_getenv_number_default("STARPU_TASK_BREAK_ON_SCHED", -1); + _starpu_task_break_on_pop = starpu_getenv_number_default("STARPU_TASK_BREAK_ON_POP", -1); + _starpu_task_break_on_exec = starpu_getenv_number_default("STARPU_TASK_BREAK_ON_EXEC", -1); + starpu_idle_file = starpu_getenv("STARPU_IDLE_FILE"); +} + +int starpu_get_prefetch_flag(void) +{ + return use_prefetch; +} + +static struct starpu_sched_policy *predefined_policies[] = +{ + &_starpu_sched_modular_eager_policy, + &_starpu_sched_modular_eager_prefetching_policy, + &_starpu_sched_modular_eager_prio_policy, + &_starpu_sched_modular_gemm_policy, + &_starpu_sched_modular_prio_policy, + &_starpu_sched_modular_prio_prefetching_policy, + &_starpu_sched_modular_random_policy, + &_starpu_sched_modular_random_prio_policy, + &_starpu_sched_modular_random_prefetching_policy, + &_starpu_sched_modular_random_prio_prefetching_policy, + &_starpu_sched_modular_parallel_random_policy, + &_starpu_sched_modular_parallel_random_prio_policy, + &_starpu_sched_modular_ws_policy, + &_starpu_sched_modular_dmda_policy, + &_starpu_sched_modular_dmdap_policy, + &_starpu_sched_modular_dmdar_policy, + &_starpu_sched_modular_dmdas_policy, + &_starpu_sched_modular_heft_policy, + &_starpu_sched_modular_heft_prio_policy, + &_starpu_sched_modular_heft2_policy, + &_starpu_sched_modular_heteroprio_policy, + &_starpu_sched_modular_heteroprio_heft_policy, + &_starpu_sched_modular_parallel_heft_policy, + &_starpu_sched_eager_policy, + &_starpu_sched_prio_policy, + &_starpu_sched_random_policy, + &_starpu_sched_lws_policy, + &_starpu_sched_ws_policy, + &_starpu_sched_dm_policy, + &_starpu_sched_dmda_policy, + &_starpu_sched_dmda_prio_policy, + &_starpu_sched_dmda_ready_policy, + &_starpu_sched_dmda_sorted_policy, + &_starpu_sched_dmda_sorted_decision_policy, + &_starpu_sched_parallel_heft_policy, + &_starpu_sched_peager_policy, + &_starpu_sched_heteroprio_policy, + &_starpu_sched_graph_test_policy, +#ifdef STARPU_HAVE_HWLOC + //&_starpu_sched_tree_heft_hierarchical_policy, +#endif + NULL +}; + +struct starpu_sched_policy **starpu_sched_get_predefined_policies() +{ + return predefined_policies; +} + +struct starpu_sched_policy *_starpu_get_sched_policy(struct _starpu_sched_ctx *sched_ctx) +{ + return sched_ctx->sched_policy; +} + +struct starpu_sched_policy *starpu_sched_get_sched_policy_in_ctx(unsigned sched_ctx_id) +{ + struct _starpu_machine_config *config = _starpu_get_machine_config(); + struct _starpu_sched_ctx *sched_ctx = &config->sched_ctxs[sched_ctx_id]; + return sched_ctx->sched_policy; +} + +struct starpu_sched_policy *starpu_sched_get_sched_policy(void) +{ + unsigned nsched_ctxs = _starpu_get_nsched_ctxs(); + unsigned sched_ctx_id = nsched_ctxs == 1 ? 0 : starpu_sched_ctx_get_context(); + return starpu_sched_get_sched_policy_in_ctx(sched_ctx_id); +} + +/* + * Methods to initialize the scheduling policy + */ + +static void load_sched_policy(struct starpu_sched_policy *sched_policy, struct _starpu_sched_ctx *sched_ctx) +{ + STARPU_ASSERT(sched_policy); + +#ifdef STARPU_VERBOSE + if (sched_policy->policy_name) + { + if (sched_policy->policy_description) + _STARPU_DEBUG("Use %s scheduler (%s)\n", sched_policy->policy_name, sched_policy->policy_description); + else + _STARPU_DEBUG("Use %s scheduler \n", sched_policy->policy_name); + + } +#endif + + *(sched_ctx->sched_policy) = *sched_policy; +} + +static void load_sched_lib() +{ + /* check if the requested policy can be loaded dynamically */ + sched_lib = starpu_getenv("STARPU_SCHED_LIB"); + if (sched_lib) + { +#ifdef HAVE_DLOPEN + if (dl_sched_handle) + { + dlclose(dl_sched_handle); + dl_sched_handle = NULL; + } + dl_sched_handle = dlopen(sched_lib, RTLD_NOW); + if (!dl_sched_handle) + _STARPU_MSG("Warning: scheduling dynamic library '%s' can not be loaded\n", sched_lib); +#else + _STARPU_MSG("Environment variable 'STARPU_SCHED_LIB' defined but the dlopen functionality is unavailable on the system\n"); +#endif + } +} + +static struct starpu_sched_policy *find_sched_policy_from_name(const char *policy_name) +{ + if (!policy_name) + return NULL; + + if (strcmp(policy_name, "") == 0) + return NULL; + + /* check if the requested policy can be loaded dynamically */ + load_sched_lib(); +#ifdef HAVE_DLOPEN + if (dl_sched_handle) + { + struct starpu_sched_policy *(*func_sched)(const char *); + *(void**)(&func_sched) = dlsym(dl_sched_handle, "starpu_get_sched_lib_policy"); + if (!func_sched) + { + /* no such symbol */ + _STARPU_MSG("Warning: the library '%s' does not define the function 'starpu_get_sched_lib_policy' (error '%s')\n", sched_lib, dlerror()); + dlclose(dl_sched_handle); + dl_sched_handle = NULL; + } + else + { + struct starpu_sched_policy *dl_sched_policy = func_sched(policy_name); + if (dl_sched_policy) + return dl_sched_policy; + else + { + dlclose(dl_sched_handle); + dl_sched_handle = NULL; + } + } + } +#endif + + if (strncmp(policy_name, "heft", 4) == 0) + { + _STARPU_MSG("Warning: heft is now called \"dmda\".\n"); + return &_starpu_sched_dmda_policy; + } + + struct starpu_sched_policy **policy; + for(policy=predefined_policies ; *policy!=NULL ; policy++) + { + struct starpu_sched_policy *p = *policy; + if (p->policy_name) + { + if (strcmp(policy_name, p->policy_name) == 0) + { + /* we found a policy with the requested name */ + return p; + } + } + } + + if (strcmp(policy_name, "help") == 0) + return NULL; + + _STARPU_MSG("Warning: scheduling policy '%s' was not found, try 'help' to get a list\n", policy_name); + + /* nothing was found */ + return NULL; +} + +static void display_sched_help_message(FILE *stream) +{ + const char *sched_env = starpu_getenv("STARPU_SCHED"); + if (sched_env && (strcmp(sched_env, "help") == 0)) + { + /* display the description of all predefined policies */ + struct starpu_sched_policy **policy; + + fprintf(stream, "\nThe variable STARPU_SCHED can be set to one of the following strings:\n"); + for(policy=predefined_policies ; *policy!=NULL ; policy++) + { + struct starpu_sched_policy *p = *policy; + fprintf(stream, "%-30s\t-> %s\n", p->policy_name, p->policy_description); + } + fprintf(stream, "\n"); + + load_sched_lib(); +#ifdef HAVE_DLOPEN + if (dl_sched_handle) + { + struct starpu_sched_policy **(*func_scheds)(void); + *(void**)(&func_scheds) = dlsym(dl_sched_handle, "starpu_get_sched_lib_policies"); + if (func_scheds) + { + fprintf(stream, "(dynamically available policies)\n"); + struct starpu_sched_policy **dl_sched_policies = func_scheds(); + for(policy=dl_sched_policies ; *policy!=NULL ; policy++) + { + struct starpu_sched_policy *p = *policy; + fprintf(stream, "%-30s\t-> %s\n", p->policy_name, p->policy_description); + } + fprintf(stream, "\n"); + } + } +#endif + } +} + +struct starpu_sched_policy *_starpu_select_sched_policy(struct _starpu_machine_config *config, const char *required_policy) +{ + struct starpu_sched_policy *selected_policy = NULL; + struct starpu_conf *user_conf = &config->conf; + + if(required_policy) + selected_policy = find_sched_policy_from_name(required_policy); + + /* If there is a policy that matches the required name, return it */ + if (selected_policy) + return selected_policy; + + /* First, we check whether the application explicitly gave a scheduling policy or not */ + if (user_conf && (user_conf->sched_policy)) + return user_conf->sched_policy; + + /* Otherwise, we look if the application specified the name of a policy to load */ + const char *sched_pol_name; + sched_pol_name = starpu_getenv("STARPU_SCHED"); + if (sched_pol_name == NULL && user_conf && user_conf->sched_policy_name) + sched_pol_name = user_conf->sched_policy_name; + if (sched_pol_name) + selected_policy = find_sched_policy_from_name(sched_pol_name); + + /* If there is a policy that matches the name, return it */ + if (selected_policy) + return selected_policy; + + /* If no policy was specified, we use the lws policy by default */ + return &_starpu_sched_lws_policy; +} + +void _starpu_init_sched_policy(struct _starpu_machine_config *config, struct _starpu_sched_ctx *sched_ctx, struct starpu_sched_policy *selected_policy) +{ + /* Perhaps we have to display some help */ + display_sched_help_message(stderr); + + /* Prefetch is activated by default */ + use_prefetch = starpu_getenv_number("STARPU_PREFETCH"); + if (use_prefetch == -1) + use_prefetch = 1; + + /* Set calibrate flag */ + _starpu_set_calibrate_flag(config->conf.calibrate); + + load_sched_policy(selected_policy, sched_ctx); + + if (starpu_getenv_number_default("STARPU_WORKER_TREE", 0)) + { +#ifdef STARPU_HAVE_HWLOC + sched_ctx->sched_policy->worker_type = STARPU_WORKER_TREE; +#else + _STARPU_DISP("STARPU_WORKER_TREE ignored, please rebuild StarPU with hwloc support to enable it.\n"); +#endif + } + starpu_sched_ctx_create_worker_collection(sched_ctx->id, + sched_ctx->sched_policy->worker_type); + + _STARPU_SCHED_BEGIN; + sched_ctx->sched_policy->init_sched(sched_ctx->id); + _STARPU_SCHED_END; +} + +void _starpu_deinit_sched_policy(struct _starpu_sched_ctx *sched_ctx) +{ + struct starpu_sched_policy *policy = sched_ctx->sched_policy; + if (policy->deinit_sched) + { + _STARPU_SCHED_BEGIN; + policy->deinit_sched(sched_ctx->id); + _STARPU_SCHED_END; + } + starpu_sched_ctx_delete_worker_collection(sched_ctx->id); +#ifdef HAVE_DLOPEN + if (dl_sched_handle) + { + dlclose(dl_sched_handle); + dl_sched_handle = NULL; + } +#endif +} + +void _starpu_sched_task_submit(struct starpu_task *task) +{ + struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(task->sched_ctx); + if (!sched_ctx->sched_policy) + return; + if (!sched_ctx->sched_policy->submit_hook) + return; + _STARPU_SCHED_BEGIN; + sched_ctx->sched_policy->submit_hook(task); + _STARPU_SCHED_END; +} + +void _starpu_sched_do_schedule(unsigned sched_ctx_id) +{ + struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); + if (!sched_ctx->sched_policy) + return; + if (!sched_ctx->sched_policy->do_schedule) + return; + _STARPU_SCHED_BEGIN; + sched_ctx->sched_policy->do_schedule(sched_ctx_id); + _STARPU_SCHED_END; +} + +static void _starpu_push_task_on_specific_worker_notify_sched(struct starpu_task *task, struct _starpu_worker *worker, int workerid, int perf_workerid) +{ + /* if we push a task on a specific worker, notify all the sched_ctxs the worker belongs to */ + struct _starpu_sched_ctx_list_iterator list_it; + + _starpu_sched_ctx_list_iterator_init(worker->sched_ctx_list, &list_it); + while (_starpu_sched_ctx_list_iterator_has_next(&list_it)) + { + struct _starpu_sched_ctx_elt *e = _starpu_sched_ctx_list_iterator_get_next(&list_it); + struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(e->sched_ctx); + if (sched_ctx->sched_policy != NULL && sched_ctx->sched_policy->push_task_notify) + { + _STARPU_SCHED_BEGIN; + sched_ctx->sched_policy->push_task_notify(task, workerid, perf_workerid, sched_ctx->id); + _STARPU_SCHED_END; + } + } +} + +/* Enqueue a task into the list of tasks explicitly attached to a worker. In + * case workerid identifies a combined worker, a task will be enqueued into + * each worker of the combination. */ +static int _starpu_push_task_on_specific_worker(struct starpu_task *task, int workerid) +{ + int nbasic_workers = (int)starpu_worker_get_count(); + + /* Is this a basic worker or a combined worker ? */ + int is_basic_worker = (workerid < nbasic_workers); + + struct _starpu_worker *worker = NULL; + struct _starpu_combined_worker *combined_worker = NULL; + + if (is_basic_worker) + { + worker = _starpu_get_worker_struct(workerid); + } + else + { + combined_worker = _starpu_get_combined_worker_struct(workerid); + } + + if (use_prefetch) + starpu_prefetch_task_input_for(task, workerid); + + if (is_basic_worker) + _starpu_push_task_on_specific_worker_notify_sched(task, worker, workerid, workerid); + else + { + /* Notify all workers of the combined worker */ + int worker_size = combined_worker->worker_size; + int *combined_workerid = combined_worker->combined_workerid; + + int j; + for (j = 0; j < worker_size; j++) + { + int subworkerid = combined_workerid[j]; + _starpu_push_task_on_specific_worker_notify_sched(task, _starpu_get_worker_struct(subworkerid), subworkerid, workerid); + } + } + +#ifdef STARPU_USE_SC_HYPERVISOR + starpu_sched_ctx_call_pushed_task_cb(workerid, task->sched_ctx); +#endif //STARPU_USE_SC_HYPERVISOR + if (is_basic_worker) + { + unsigned node = starpu_worker_get_memory_node(workerid); + if (_starpu_task_uses_multiformat_handles(task)) + { + unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task); + unsigned i; + for (i = 0; i < nbuffers; i++) + { + struct starpu_task *conversion_task; + starpu_data_handle_t handle; + + handle = STARPU_TASK_GET_HANDLE(task, i); + if (!_starpu_handle_needs_conversion_task(handle, node)) + continue; + + conversion_task = _starpu_create_conversion_task(handle, node); + conversion_task->mf_skip = 1; + conversion_task->execute_on_a_specific_worker = 1; + conversion_task->workerid = workerid; + _starpu_task_submit_conversion_task(conversion_task, workerid); + //_STARPU_DEBUG("Pushing a conversion task\n"); + } + + for (i = 0; i < nbuffers; i++) + { + starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, i); + handle->mf_node = node; + } + } +// if(task->sched_ctx != _starpu_get_initial_sched_ctx()->id) + + return _starpu_push_local_task(worker, task); + } + else + { + /* This is a combined worker so we create task aliases */ + int worker_size = combined_worker->worker_size; + int *combined_workerid = combined_worker->combined_workerid; + + int ret = 0; + + struct _starpu_job *job = _starpu_get_job_associated_to_task(task); + job->task_size = worker_size; + job->combined_workerid = workerid; + job->active_task_alias_count = 0; + + STARPU_PTHREAD_BARRIER_INIT(&job->before_work_barrier, NULL, worker_size); + STARPU_PTHREAD_BARRIER_INIT(&job->after_work_barrier, NULL, worker_size); + job->after_work_busy_barrier = worker_size; + + /* Note: we have to call that early, or else the task may have + * disappeared already */ + starpu_push_task_end(task); + + int j; + for (j = 0; j < worker_size; j++) + { + struct starpu_task *alias = starpu_task_dup(task); + alias->destroy = 1; + + _STARPU_TRACE_JOB_PUSH(alias, alias->priority); + worker = _starpu_get_worker_struct(combined_workerid[j]); + ret |= _starpu_push_local_task(worker, alias); + } + + return ret; + } +} + +/* the generic interface that call the proper underlying implementation */ + +int _starpu_push_task(struct _starpu_job *j) +{ +#ifdef STARPU_SIMGRID + if (_starpu_simgrid_task_push_cost()) + starpu_sleep(0.000001); +#endif + if(j->task->prologue_callback_func) + { + _starpu_set_current_task(j->task); + j->task->prologue_callback_func(j->task->prologue_callback_arg); + _starpu_set_current_task(NULL); + } + + if (j->task->transaction) + { + /* If task is part of a transaction and its epoch is cancelled, switch its + * 'where' field to STARPU_NOWHERE to skip its execution */ + struct starpu_transaction *p_trs = j->task->transaction; + STARPU_ASSERT(j->task->transaction->state == _starpu_trs_initialized); + _starpu_spin_lock(&p_trs->lock); + STARPU_ASSERT(!_starpu_trs_epoch_list_empty(&p_trs->epoch_list)); + struct _starpu_trs_epoch *p_epoch = _starpu_trs_epoch_list_front(&p_trs->epoch_list); + STARPU_ASSERT(p_epoch == j->task->trs_epoch); + STARPU_ASSERT(p_epoch->state == _starpu_trs_epoch_confirmed || p_epoch->state == _starpu_trs_epoch_cancelled); + if (p_epoch->state == _starpu_trs_epoch_cancelled) + { + j->task->where = STARPU_NOWHERE; + } + _starpu_spin_unlock(&p_trs->lock); + } + + return _starpu_repush_task(j); +} + +int _starpu_repush_task(struct _starpu_job *j) +{ + struct starpu_task *task = j->task; + struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(task->sched_ctx); + int ret; + + _STARPU_LOG_IN(); + + unsigned can_push = _starpu_increment_nready_tasks_of_sched_ctx(task->sched_ctx, task->flops, task); + STARPU_ASSERT(task->status == STARPU_TASK_BLOCKED || task->status == STARPU_TASK_BLOCKED_ON_TAG || task->status == STARPU_TASK_BLOCKED_ON_TASK || task->status == STARPU_TASK_BLOCKED_ON_DATA); + task->status = STARPU_TASK_READY; + const unsigned continuation = +#ifdef STARPU_OPENMP + j->continuation +#else + 0 +#endif + ; + if (!_starpu_perf_counter_paused() && !j->internal && !continuation) + { + (void) STARPU_PERF_COUNTER_ADD64(& _starpu_task__g_current_submitted__value, -1); + int64_t value = STARPU_PERF_COUNTER_ADD64(& _starpu_task__g_current_ready__value, 1); + _starpu_perf_counter_update_max_int64(&_starpu_task__g_peak_ready__value, value); + if (task->cl && task->cl->perf_counter_values) + { + struct starpu_perf_counter_sample_cl_values * const pcv = task->cl->perf_counter_values; + + (void)STARPU_PERF_COUNTER_ADD64(&pcv->task.current_submitted, -1); + value = STARPU_PERF_COUNTER_ADD64(&pcv->task.current_ready, 1); + _starpu_perf_counter_update_max_int64(&pcv->task.peak_ready, value); + } + } + STARPU_AYU_ADDTOTASKQUEUE(j->job_id, -1); + /* if the context does not have any workers save the tasks in a temp list */ + if ((task->cl != NULL && task->where != STARPU_NOWHERE) && (!sched_ctx->is_initial_sched)) + { + /*if there are workers in the ctx that are not able to execute tasks + we consider the ctx empty */ + unsigned able = _starpu_workers_able_to_execute_task(task, sched_ctx); + + if(!able) + { + _starpu_sched_ctx_lock_write(sched_ctx->id); + starpu_task_list_push_front(&sched_ctx->empty_ctx_tasks, task); + _starpu_sched_ctx_unlock_write(sched_ctx->id); +#ifdef STARPU_USE_SC_HYPERVISOR + if(sched_ctx->id != 0 && sched_ctx->perf_counters != NULL + && sched_ctx->perf_counters->notify_empty_ctx) + { + _STARPU_TRACE_HYPERVISOR_BEGIN(); + sched_ctx->perf_counters->notify_empty_ctx(sched_ctx->id, task); + _STARPU_TRACE_HYPERVISOR_END(); + } +#endif + return 0; + } + + } + + if(!can_push) + return 0; + /* in case there is no codelet associated to the task (that's a control + * task), we directly execute its callback and enforce the + * corresponding dependencies */ + if (task->cl == NULL || task->where == STARPU_NOWHERE) + { + _STARPU_TRACE_TASK_NAME_LINE_COLOR(j); + if (!_starpu_perf_counter_paused() && !j->internal) + { + (void)STARPU_PERF_COUNTER_ADD64(& _starpu_task__g_current_ready__value, -1); + if (task->cl && task->cl->perf_counter_values) + { + struct starpu_perf_counter_sample_cl_values * const pcv = task->cl->perf_counter_values; + (void)STARPU_PERF_COUNTER_ADD64(&pcv->task.current_ready, -1); + } + } + task->status = STARPU_TASK_RUNNING; + if (task->prologue_callback_pop_func) + { + _starpu_set_current_task(task); + task->prologue_callback_pop_func(task->prologue_callback_pop_arg); + _starpu_set_current_task(NULL); + } + + if (task->cl && task->cl->specific_nodes) + { + /* Nothing to do, but we are asked to fetch data on some memory nodes */ + _starpu_fetch_nowhere_task_input(j); + } + else + { + if (task->cl +#ifdef STARPU_BUBBLE + && !j->is_bubble +#endif + ) + __starpu_push_task_output(j); + _starpu_handle_job_termination(j); + _STARPU_LOG_OUT_TAG("handle_job_termination"); + } + return 0; + } + + ret = _starpu_push_task_to_workers(task); + if (ret == -EAGAIN) + /* pushed to empty context, that's fine */ + ret = 0; + return ret; +} + +int _starpu_push_task_to_workers(struct starpu_task *task) +{ + struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(task->sched_ctx); + + _STARPU_TRACE_JOB_PUSH(task, task->priority); + + /* if the contexts still does not have workers put the task back to its place in + the empty ctx list */ + if(!sched_ctx->is_initial_sched) + { + /*if there are workers in the ctx that are not able to execute tasks + we consider the ctx empty */ + unsigned able = _starpu_workers_able_to_execute_task(task, sched_ctx); + + if (!able) + { + _starpu_sched_ctx_lock_write(sched_ctx->id); + starpu_task_list_push_back(&sched_ctx->empty_ctx_tasks, task); + _starpu_sched_ctx_unlock_write(sched_ctx->id); +#ifdef STARPU_USE_SC_HYPERVISOR + if(sched_ctx->id != 0 && sched_ctx->perf_counters != NULL + && sched_ctx->perf_counters->notify_empty_ctx) + { + _STARPU_TRACE_HYPERVISOR_BEGIN(); + sched_ctx->perf_counters->notify_empty_ctx(sched_ctx->id, task); + _STARPU_TRACE_HYPERVISOR_END(); + } +#endif + + return -EAGAIN; + } + } + + _starpu_profiling_set_task_push_start_time(task); + + int ret = 0; + if (STARPU_UNLIKELY(task->execute_on_a_specific_worker)) + { + ret = _starpu_push_task_on_specific_worker(task, task->workerid); + } + else + { + struct _starpu_machine_config *config = _starpu_get_machine_config(); + + if(!sched_ctx->sched_policy) + { + /* Note: we have to call that early, or else the task may have + * disappeared already */ + starpu_push_task_end(task); + if(!sched_ctx->awake_workers) + ret = _starpu_push_task_on_specific_worker(task, sched_ctx->main_master); + else + { + struct starpu_worker_collection *workers = sched_ctx->workers; + + struct _starpu_job *job = _starpu_get_job_associated_to_task(task); + job->task_size = workers->nworkers; + job->combined_workerid = -1; // workerid; its a ctx not combined worker + job->active_task_alias_count = 0; + + STARPU_PTHREAD_BARRIER_INIT(&job->before_work_barrier, NULL, workers->nworkers); + STARPU_PTHREAD_BARRIER_INIT(&job->after_work_barrier, NULL, workers->nworkers); + job->after_work_busy_barrier = workers->nworkers; + + struct starpu_sched_ctx_iterator it; + if(workers->init_iterator) + workers->init_iterator(workers, &it); + + while(workers->has_next(workers, &it)) + { + unsigned workerid = workers->get_next(workers, &it); + struct starpu_task *alias; + if (job->task_size > 1) + { + alias = starpu_task_dup(task); + _STARPU_TRACE_JOB_PUSH(alias, alias->priority); + alias->destroy = 1; + } + else + alias = task; + ret |= _starpu_push_task_on_specific_worker(alias, workerid); + } + } + } + else + { + /* When a task can only be executed on a given arch and we have + * only one memory node for that arch, we can systematically + * prefetch before the scheduling decision. */ + if (!sched_ctx->sched_policy->prefetches + && starpu_get_prefetch_flag() + && starpu_memory_nodes_get_count() > 1) + { + enum starpu_worker_archtype type; + for (type = 0; type < STARPU_NARCH; type++) + { + if (task->where == (int32_t) STARPU_WORKER_TO_MASK(type)) + { + if (config->arch_nodeid[type] >= 0) + starpu_prefetch_task_input_on_node(task, config->arch_nodeid[type]); + break; + } + } + } + + STARPU_ASSERT(sched_ctx->sched_policy->push_task); + /* check out if there are any workers in the context */ + unsigned nworkers = starpu_sched_ctx_get_nworkers(sched_ctx->id); + if (nworkers == 0) + ret = -1; + else + { + struct _starpu_worker *worker = _starpu_get_local_worker_key(); + if (worker) + { + STARPU_PTHREAD_MUTEX_LOCK_SCHED(&worker->sched_mutex); + _starpu_worker_enter_sched_op(worker); + STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&worker->sched_mutex); + } + _STARPU_TASK_BREAK_ON(task, push); + _STARPU_SCHED_BEGIN; + ret = sched_ctx->sched_policy->push_task(task); + _STARPU_SCHED_END; + if (worker) + { + STARPU_PTHREAD_MUTEX_LOCK_SCHED(&worker->sched_mutex); + _starpu_worker_leave_sched_op(worker); + STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&worker->sched_mutex); + } + } + } + + if(ret == -1) + { + _STARPU_MSG("repush task \n"); + _STARPU_TRACE_JOB_POP(task, task->priority); + ret = _starpu_push_task_to_workers(task); + } + } + /* Note: from here, the task might have been destroyed already! */ + _STARPU_LOG_OUT(); + return ret; + +} + +/* This is called right after the scheduler has pushed a task to a queue + * but just before releasing mutexes: we need the task to still be alive! + */ +int starpu_push_task_end(struct starpu_task *task) +{ + _starpu_profiling_set_task_push_end_time(task); + task->scheduled = 1; + return 0; +} + +/* This is called right after the scheduler has pushed a task to a queue + * but just before releasing mutexes: we need the task to still be alive! + */ +int _starpu_pop_task_end(struct starpu_task *task) +{ + if (!task) + return 0; + _STARPU_TRACE_JOB_POP(task, task->priority); + return 0; +} + +/* + * Given a handle that needs to be converted in order to be used on the given + * node, returns a task that takes care of the conversion. + */ +struct starpu_task *_starpu_create_conversion_task(starpu_data_handle_t handle, unsigned int node) +{ + return _starpu_create_conversion_task_for_arch(handle, starpu_node_get_kind(node)); +} + +struct starpu_task *_starpu_create_conversion_task_for_arch(starpu_data_handle_t handle, enum starpu_node_kind node_kind) +{ + struct starpu_task *conversion_task; + +/* Driver porters: adding your driver here is optional, only needed for the support of multiple formats. */ + +#if defined(STARPU_USE_OPENCL) || defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID) + struct starpu_multiformat_interface *format_interface; +#endif + + conversion_task = starpu_task_create(); + conversion_task->name = "conversion_task"; + conversion_task->synchronous = 0; + STARPU_TASK_SET_HANDLE(conversion_task, handle, 0); + +#if defined(STARPU_USE_OPENCL) || defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID) + /* The node does not really matter here */ + format_interface = (struct starpu_multiformat_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); +#endif + + _starpu_spin_lock(&handle->header_lock); + handle->refcnt++; + handle->busy_count++; + _starpu_spin_unlock(&handle->header_lock); + + /* Driver porters: adding your driver here is optional, only needed for the support of multiple formats. */ + + switch(node_kind) + { + case STARPU_CPU_RAM: + switch (starpu_node_get_kind(handle->mf_node)) + { + case STARPU_CPU_RAM: + STARPU_ABORT(); +#if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID) + case STARPU_CUDA_RAM: + { + struct starpu_multiformat_data_interface_ops *mf_ops; + mf_ops = (struct starpu_multiformat_data_interface_ops *) handle->ops->get_mf_ops(format_interface); + conversion_task->cl = mf_ops->cuda_to_cpu_cl; + break; + } +#endif +#if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID) + case STARPU_OPENCL_RAM: + { + struct starpu_multiformat_data_interface_ops *mf_ops; + mf_ops = (struct starpu_multiformat_data_interface_ops *) handle->ops->get_mf_ops(format_interface); + conversion_task->cl = mf_ops->opencl_to_cpu_cl; + break; + } +#endif + default: + _STARPU_ERROR("Oops : %u\n", handle->mf_node); + } + break; +#if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID) + case STARPU_CUDA_RAM: + { + struct starpu_multiformat_data_interface_ops *mf_ops; + mf_ops = (struct starpu_multiformat_data_interface_ops *) handle->ops->get_mf_ops(format_interface); + conversion_task->cl = mf_ops->cpu_to_cuda_cl; + break; + } +#endif +#if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID) + case STARPU_OPENCL_RAM: + { + struct starpu_multiformat_data_interface_ops *mf_ops; + mf_ops = (struct starpu_multiformat_data_interface_ops *) handle->ops->get_mf_ops(format_interface); + conversion_task->cl = mf_ops->cpu_to_opencl_cl; + break; + } +#endif + default: + STARPU_ABORT(); + } + + _starpu_codelet_check_deprecated_fields(conversion_task->cl); + STARPU_TASK_SET_MODE(conversion_task, STARPU_RW, 0); + return conversion_task; +} + +static +struct _starpu_sched_ctx* _get_next_sched_ctx_to_pop_into(struct _starpu_worker *worker) +{ + struct _starpu_sched_ctx_elt *e = NULL; + struct _starpu_sched_ctx_list_iterator list_it; + int found = 0; + + _starpu_sched_ctx_list_iterator_init(worker->sched_ctx_list, &list_it); + while (_starpu_sched_ctx_list_iterator_has_next(&list_it)) + { + e = _starpu_sched_ctx_list_iterator_get_next(&list_it); + if (e->task_number > 0) + return _starpu_get_sched_ctx_struct(e->sched_ctx); + } + + _starpu_sched_ctx_list_iterator_init(worker->sched_ctx_list, &list_it); + while (_starpu_sched_ctx_list_iterator_has_next(&list_it)) + { + e = _starpu_sched_ctx_list_iterator_get_next(&list_it); + if (e->last_poped) + { + e->last_poped = 0; + if (_starpu_sched_ctx_list_iterator_has_next(&list_it)) + { + e = _starpu_sched_ctx_list_iterator_get_next(&list_it); + found = 1; + } + break; + } + } + if (!found) + e = worker->sched_ctx_list->head; + e->last_poped = 1; + + return _starpu_get_sched_ctx_struct(e->sched_ctx); +} + +struct starpu_task *_starpu_pop_task(struct _starpu_worker *worker) +{ + struct starpu_task *task; + int worker_id; + unsigned node; + + /* We can't tell in advance which task will be picked up, so we measure + * a timestamp, and will attribute it afterwards to the task. */ + int profiling = starpu_profiling_status_get(); + struct timespec pop_start_time; + if (profiling) + _starpu_clock_gettime(&pop_start_time); + +pick: + /* perhaps there is some local task to be executed first */ + task = _starpu_pop_local_task(worker); + + if (task) + _STARPU_TASK_BREAK_ON(task, pop); + + /* get tasks from the stacks of the strategy */ + if(!task) + { + struct _starpu_sched_ctx *sched_ctx ; +#ifndef STARPU_NON_BLOCKING_DRIVERS + int been_here[STARPU_NMAX_SCHED_CTXS]; + int i; + for(i = 0; i < STARPU_NMAX_SCHED_CTXS; i++) + been_here[i] = 0; + + while(!task) +#endif + { + if(worker->nsched_ctxs == 1) + sched_ctx = _starpu_get_initial_sched_ctx(); + else + { + while(1) + { + /** Caution + * If you use multiple contexts your scheduler *needs* + * to update the variable task_number of the ctx list. + * In order to get the best performances. + * This is done using functions : + * starpu_sched_ctx_list_task_counters_increment...(...) + * starpu_sched_ctx_list_task_counters_decrement...(...) + **/ + sched_ctx = _get_next_sched_ctx_to_pop_into(worker); + + if(worker->removed_from_ctx[sched_ctx->id] == 1 && worker->shares_tasks_lists[sched_ctx->id] == 1) + { + _starpu_worker_gets_out_of_ctx(sched_ctx->id, worker); + worker->removed_from_ctx[sched_ctx->id] = 0; + sched_ctx = NULL; + } + else + break; + } + } + + if(sched_ctx && sched_ctx->id != STARPU_NMAX_SCHED_CTXS) + { + if (sched_ctx->sched_policy && sched_ctx->sched_policy->pop_task) + { + /* Note: we do not push the scheduling state here, because + * otherwise when a worker is idle, we'd keep + * pushing/popping a scheduling state here, while what we + * want to see in the trace is a permanent idle state. */ + task = sched_ctx->sched_policy->pop_task(sched_ctx->id); + if (task) + _STARPU_TASK_BREAK_ON(task, pop); + _starpu_pop_task_end(task); + } + } + + if(!task) + { + /* it doesn't matter if it shares tasks list or not in the scheduler, + if it does not have any task to pop just get it out of here */ + /* however if it shares a task list it will be removed as soon as he + finishes this job (in handle_job_termination) */ + if(worker->removed_from_ctx[sched_ctx->id]) + { + _starpu_worker_gets_out_of_ctx(sched_ctx->id, worker); + worker->removed_from_ctx[sched_ctx->id] = 0; + } +#ifdef STARPU_USE_SC_HYPERVISOR + if(worker->pop_ctx_priority) + { + struct starpu_sched_ctx_performance_counters *perf_counters = sched_ctx->perf_counters; + if(sched_ctx->id != 0 && perf_counters != NULL && perf_counters->notify_idle_cycle && _starpu_sched_ctx_allow_hypervisor(sched_ctx->id)) + { +// _STARPU_TRACE_HYPERVISOR_BEGIN(); + perf_counters->notify_idle_cycle(sched_ctx->id, worker->workerid, 1.0); +// _STARPU_TRACE_HYPERVISOR_END(); + } + } +#endif //STARPU_USE_SC_HYPERVISOR + +#ifndef STARPU_NON_BLOCKING_DRIVERS + if(been_here[sched_ctx->id] || worker->nsched_ctxs == 1) + break; + + been_here[sched_ctx->id] = 1; + +#endif + } + } + } + + + if (!task) + { + if (starpu_idle_file) + idle_start[worker->workerid] = starpu_timing_now(); + return NULL; + } + + if(starpu_idle_file && idle_start[worker->workerid] != 0.0) + { + double idle_end = starpu_timing_now(); + idle[worker->workerid] += (idle_end - idle_start[worker->workerid]); + idle_start[worker->workerid] = 0.0; + } + + +#ifdef STARPU_USE_SC_HYPERVISOR + struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(task->sched_ctx); + struct starpu_sched_ctx_performance_counters *perf_counters = sched_ctx->perf_counters; + + if(sched_ctx->id != 0 && perf_counters != NULL && perf_counters->notify_poped_task && _starpu_sched_ctx_allow_hypervisor(sched_ctx->id)) + { +// _STARPU_TRACE_HYPERVISOR_BEGIN(); + perf_counters->notify_poped_task(task->sched_ctx, worker->workerid); +// _STARPU_TRACE_HYPERVISOR_END(); + } +#endif //STARPU_USE_SC_HYPERVISOR + + + /* Make sure we do not bother with all the multiformat-specific code if + * it is not necessary. */ + if (!_starpu_task_uses_multiformat_handles(task)) + goto profiling; + + + /* This is either a conversion task, or a regular task for which the + * conversion tasks have already been created and submitted */ + if (task->mf_skip) + goto profiling; + + /* + * This worker may not be able to execute this task. In this case, we + * should return the task anyway. It will be pushed back almost immediately. + * This way, we avoid computing and executing the conversions tasks. + * Here, we do not care about what implementation is used. + */ + worker_id = starpu_worker_get_id_check(); + if (!starpu_worker_can_execute_task_first_impl(worker_id, task, NULL)) + return task; + + node = starpu_worker_get_memory_node(worker_id); + + /* + * We do have a task that uses multiformat handles. Let's create the + * required conversion tasks. + */ + unsigned i; + unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task); + for (i = 0; i < nbuffers; i++) + { + struct starpu_task *conversion_task; + starpu_data_handle_t handle; + + handle = STARPU_TASK_GET_HANDLE(task, i); + if (!_starpu_handle_needs_conversion_task(handle, node)) + continue; + conversion_task = _starpu_create_conversion_task(handle, node); + conversion_task->mf_skip = 1; + conversion_task->execute_on_a_specific_worker = 1; + conversion_task->workerid = worker_id; + /* + * Next tasks will need to know where these handles have gone. + */ + handle->mf_node = node; + _starpu_task_submit_conversion_task(conversion_task, worker_id); + } + + task->mf_skip = 1; + starpu_task_prio_list_push_back(&worker->local_tasks, task); + goto pick; + +profiling: + if (profiling) + { + struct starpu_profiling_task_info *profiling_info; + profiling_info = task->profiling_info; + + /* The task may have been created before profiling was enabled, + * so we check if the profiling_info structure is available + * even though we already tested if profiling is enabled. */ + if (profiling_info) + { + profiling_info->pop_start_time = pop_start_time; + _starpu_clock_gettime(&profiling_info->pop_end_time); + } + } + + if(task->prologue_callback_pop_func) + { + _starpu_set_current_task(task); + task->prologue_callback_pop_func(task->prologue_callback_pop_arg); + _starpu_set_current_task(NULL); + } + + return task; +} + +void _starpu_sched_pre_exec_hook(struct starpu_task *task) +{ + unsigned sched_ctx_id = starpu_sched_ctx_get_ctx_for_task(task); + struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); + if (sched_ctx->sched_policy && sched_ctx->sched_policy->pre_exec_hook) + { + _STARPU_SCHED_BEGIN; + sched_ctx->sched_policy->pre_exec_hook(task, sched_ctx_id); + _STARPU_SCHED_END; + } + + if(!sched_ctx->sched_policy) + { + int workerid = starpu_worker_get_id(); + struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); + struct _starpu_sched_ctx_list_iterator list_it; + + _starpu_sched_ctx_list_iterator_init(worker->sched_ctx_list, &list_it); + while (_starpu_sched_ctx_list_iterator_has_next(&list_it)) + { + struct _starpu_sched_ctx *other_sched_ctx; + struct _starpu_sched_ctx_elt *e; + + e = _starpu_sched_ctx_list_iterator_get_next(&list_it); + other_sched_ctx = _starpu_get_sched_ctx_struct(e->sched_ctx); + if (other_sched_ctx != sched_ctx && + other_sched_ctx->sched_policy != NULL && + other_sched_ctx->sched_policy->pre_exec_hook) + { + _STARPU_SCHED_BEGIN; + other_sched_ctx->sched_policy->pre_exec_hook(task, other_sched_ctx->id); + _STARPU_SCHED_END; + } + } + } + +} + +void _starpu_sched_post_exec_hook(struct starpu_task *task) +{ + STARPU_ASSERT(task->cl != NULL && task->cl->where != STARPU_NOWHERE); + unsigned sched_ctx_id = starpu_sched_ctx_get_ctx_for_task(task); + struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); + if (sched_ctx->sched_policy && sched_ctx->sched_policy->post_exec_hook) + { + _STARPU_SCHED_BEGIN; + sched_ctx->sched_policy->post_exec_hook(task, sched_ctx_id); + _STARPU_SCHED_END; + } + if(!sched_ctx->sched_policy) + { + int workerid = starpu_worker_get_id(); + struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); + struct _starpu_sched_ctx_list_iterator list_it; + + _starpu_sched_ctx_list_iterator_init(worker->sched_ctx_list, &list_it); + while (_starpu_sched_ctx_list_iterator_has_next(&list_it)) + { + struct _starpu_sched_ctx *other_sched_ctx; + struct _starpu_sched_ctx_elt *e; + + e = _starpu_sched_ctx_list_iterator_get_next(&list_it); + other_sched_ctx = _starpu_get_sched_ctx_struct(e->sched_ctx); + if (other_sched_ctx != sched_ctx && + other_sched_ctx->sched_policy != NULL && + other_sched_ctx->sched_policy->post_exec_hook) + { + _STARPU_SCHED_BEGIN; + other_sched_ctx->sched_policy->post_exec_hook(task, other_sched_ctx->id); + _STARPU_SCHED_END; + } + } + } +} + +int starpu_push_local_task(int workerid, struct starpu_task *task, int back STARPU_ATTRIBUTE_UNUSED) +{ + struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); + + return _starpu_push_local_task(worker, task); +} + +void _starpu_print_idle_time() +{ + if(!starpu_idle_file) + return; + double all_idle = 0.0; + int i = 0; + for(i = 0; i < STARPU_NMAXWORKERS; i++) + all_idle += idle[i]; + + FILE *f; + f = fopen(starpu_idle_file, "a"); + if (!f) + { + _STARPU_MSG("couldn't open %s: %s\n", starpu_idle_file, strerror(errno)); + } + else + { + fprintf(f, "%lf \n", all_idle); + fclose(f); + } +} + +void starpu_sched_task_break(struct starpu_task *task STARPU_ATTRIBUTE_UNUSED) +{ + _STARPU_TASK_BREAK_ON(task, sched); +} diff --git a/src/core/sched_policy.h b/src/core/sched_policy.h new file mode 100644 index 0000000..a969e48 --- /dev/null +++ b/src/core/sched_policy.h @@ -0,0 +1,134 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Simon Archipoff + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __SCHED_POLICY_H__ +#define __SCHED_POLICY_H__ + +/** @file */ + +#include +#include +#include +#include +#include + +#include + +#pragma GCC visibility push(hidden) + +#define _STARPU_SCHED_BEGIN \ + _STARPU_TRACE_WORKER_SCHEDULING_PUSH; \ + _SIMGRID_TIMER_BEGIN(_starpu_simgrid_sched_cost()) +#define _STARPU_SCHED_END \ + _SIMGRID_TIMER_END; \ + _STARPU_TRACE_WORKER_SCHEDULING_POP + +void _starpu_sched_init(void); + +struct starpu_machine_config; +struct starpu_sched_policy *_starpu_get_sched_policy(struct _starpu_sched_ctx *sched_ctx); + +void _starpu_init_sched_policy(struct _starpu_machine_config *config, + struct _starpu_sched_ctx *sched_ctx, struct starpu_sched_policy *policy); + +void _starpu_deinit_sched_policy(struct _starpu_sched_ctx *sched_ctx); + +struct starpu_sched_policy *_starpu_select_sched_policy(struct _starpu_machine_config *config, const char *required_policy); + +void _starpu_sched_task_submit(struct starpu_task *task); +void _starpu_sched_do_schedule(unsigned sched_ctx_id); + +int _starpu_push_task(struct _starpu_job *task); +int _starpu_repush_task(struct _starpu_job *task); + +/** actually pushes the tasks to the specific worker or to the scheduler */ +int _starpu_push_task_to_workers(struct starpu_task *task); + +/** pop a task that can be executed on the worker */ +struct starpu_task *_starpu_pop_task(struct _starpu_worker *worker); +void _starpu_sched_post_exec_hook(struct starpu_task *task); +int _starpu_pop_task_end(struct starpu_task *task); + +struct starpu_task *_starpu_create_conversion_task(starpu_data_handle_t handle, + unsigned int node) STARPU_ATTRIBUTE_MALLOC; + +struct starpu_task *_starpu_create_conversion_task_for_arch(starpu_data_handle_t handle, + enum starpu_node_kind node_kind) STARPU_ATTRIBUTE_MALLOC; + +void _starpu_sched_pre_exec_hook(struct starpu_task *task); + +void _starpu_print_idle_time(); +/* + * Predefined policies + */ +extern struct starpu_sched_policy _starpu_sched_lws_policy; +extern struct starpu_sched_policy _starpu_sched_ws_policy; +extern struct starpu_sched_policy _starpu_sched_prio_policy; +extern struct starpu_sched_policy _starpu_sched_random_policy; +extern struct starpu_sched_policy _starpu_sched_dm_policy; +extern struct starpu_sched_policy _starpu_sched_dmda_policy STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; +extern struct starpu_sched_policy _starpu_sched_dmda_prio_policy; +extern struct starpu_sched_policy _starpu_sched_dmda_ready_policy; +extern struct starpu_sched_policy _starpu_sched_dmda_sorted_policy; +extern struct starpu_sched_policy _starpu_sched_dmda_sorted_decision_policy; +extern struct starpu_sched_policy _starpu_sched_eager_policy; +extern struct starpu_sched_policy _starpu_sched_parallel_heft_policy STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; +extern struct starpu_sched_policy _starpu_sched_peager_policy; +extern struct starpu_sched_policy _starpu_sched_heteroprio_policy; +extern struct starpu_sched_policy _starpu_sched_modular_eager_policy; +extern struct starpu_sched_policy _starpu_sched_modular_eager_prefetching_policy; +extern struct starpu_sched_policy _starpu_sched_modular_eager_prio_policy; +extern struct starpu_sched_policy _starpu_sched_modular_gemm_policy; +extern struct starpu_sched_policy _starpu_sched_modular_prio_policy; +extern struct starpu_sched_policy _starpu_sched_modular_prio_prefetching_policy; +extern struct starpu_sched_policy _starpu_sched_modular_random_policy; +extern struct starpu_sched_policy _starpu_sched_modular_random_prio_policy; +extern struct starpu_sched_policy _starpu_sched_modular_random_prefetching_policy; +extern struct starpu_sched_policy _starpu_sched_modular_random_prio_prefetching_policy; +extern struct starpu_sched_policy _starpu_sched_modular_parallel_random_policy; +extern struct starpu_sched_policy _starpu_sched_modular_parallel_random_prio_policy; +extern struct starpu_sched_policy _starpu_sched_modular_ws_policy; +extern struct starpu_sched_policy _starpu_sched_modular_dmda_policy; +extern struct starpu_sched_policy _starpu_sched_modular_dmdap_policy; +extern struct starpu_sched_policy _starpu_sched_modular_dmdar_policy; +extern struct starpu_sched_policy _starpu_sched_modular_dmdas_policy; +extern struct starpu_sched_policy _starpu_sched_modular_heft_policy; +extern struct starpu_sched_policy _starpu_sched_modular_heft_prio_policy; +extern struct starpu_sched_policy _starpu_sched_modular_heft2_policy; +extern struct starpu_sched_policy _starpu_sched_modular_heteroprio_policy; +extern struct starpu_sched_policy _starpu_sched_modular_heteroprio_heft_policy; +extern struct starpu_sched_policy _starpu_sched_modular_parallel_heft_policy; +extern struct starpu_sched_policy _starpu_sched_graph_test_policy; +extern struct starpu_sched_policy _starpu_sched_tree_heft_hierarchical_policy; + +extern long _starpu_task_break_on_push; +extern long _starpu_task_break_on_sched; +extern long _starpu_task_break_on_pop; +extern long _starpu_task_break_on_exec; + +#ifdef SIGTRAP +#define _STARPU_TASK_BREAK_ON(task, what) do { \ + if (_starpu_get_job_associated_to_task(task)->job_id == (unsigned long) _starpu_task_break_on_##what) \ + raise(SIGTRAP); \ +} while(0) +#else +#define _STARPU_TASK_BREAK_ON(task, what) ((void) 0) +#endif + +#pragma GCC visibility pop + +#endif // __SCHED_POLICY_H__ diff --git a/src/core/simgrid.c b/src/core/simgrid.c new file mode 100644 index 0000000..f00e132 --- /dev/null +++ b/src/core/simgrid.c @@ -0,0 +1,1597 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Thibaut Lambert + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#ifdef HAVE_UNISTD_H +#include +#endif +#include +#include +#include +#if defined(HAVE_SIMGRID_SIMDAG_H) && (SIMGRID_VERSION >= 31300) +#include +#endif + +#ifdef STARPU_SIMGRID +#ifdef HAVE_GETRLIMIT +#include +#endif +#if (defined(HAVE_MSG_PROCESS_ATTACH) || defined(MSG_process_attach) || defined(HAVE_SG_ACTOR_ATTACH)) \ + && !defined(HAVE_SIMGRID_SET_MAESTRO) +#include +#endif +#ifdef STARPU_HAVE_SIMGRID_HOST_H +#include +#endif +#ifdef STARPU_HAVE_SIMGRID_LINK_H +#include +#endif +#ifdef STARPU_HAVE_SIMGRID_ENGINE_H +#include +#endif +#ifdef STARPU_HAVE_XBT_CONFIG_H +#include +#endif +#include +#include + +#pragma weak starpu_main +extern int starpu_main(int argc, char *argv[]); +#if SIMGRID_VERSION < 31600 +#pragma weak smpi_main +extern int smpi_main(int (*realmain) (int argc, char *argv[]), int argc, char *argv[]); +#endif +#pragma weak _starpu_mpi_simgrid_init +extern int _starpu_mpi_simgrid_init(int argc, char *argv[]); + +#pragma weak smpi_process_set_user_data +#if !HAVE_DECL_SMPI_PROCESS_SET_USER_DATA && !defined(smpi_process_set_user_data) +extern void smpi_process_set_user_data(void *); +#endif + +static double _starpu_simgrid_dynamic_energy = 0.0; + +/* 1 when MSG_init was done, 2 when initialized through redirected main, 3 when + * initialized through MSG_process_attach */ +static int simgrid_started; + +static int simgrid_transfer_cost = 1; + +static int runners_running; +starpu_pthread_queue_t _starpu_simgrid_transfer_queue[STARPU_MAXNODES]; +static struct transfer_runner +{ + struct transfer *first_transfer, *last_transfer; + starpu_sem_t sem; + starpu_pthread_t runner; +} transfer_runner[STARPU_MAXNODES][STARPU_MAXNODES]; +static void *transfer_execute(void *arg); + +starpu_pthread_queue_t _starpu_simgrid_task_queue[STARPU_NMAXWORKERS]; +static struct worker_runner +{ + struct task *first_task, *last_task; + starpu_sem_t sem; + starpu_pthread_t runner; +} worker_runner[STARPU_NMAXWORKERS]; +static void *task_execute(void *arg); + +struct _starpu_simgrid_event +{ + unsigned finished; + starpu_pthread_queue_t *queue; +}; +static inline struct _starpu_simgrid_event *_starpu_simgrid_event(union _starpu_async_channel_event *_event) +{ + struct _starpu_simgrid_event *event; + STARPU_STATIC_ASSERT(sizeof(*event) <= sizeof(*_event)); + event = (void *) _event; + return event; +} + +size_t _starpu_default_stack_size = 8192; + +void _starpu_simgrid_set_stack_size(size_t stack_size) +{ +#ifdef HAVE_SG_CFG_SET_INT + sg_cfg_set_int("contexts/stack-size", stack_size); +#elif SIMGRID_VERSION >= 31300 + xbt_cfg_set_int("contexts/stack-size", stack_size); +#else + extern xbt_cfg_t _sg_cfg_set; + xbt_cfg_set_int(_sg_cfg_set, "contexts/stack_size", stack_size); +#endif +} + +#ifdef HAVE_SG_ACTOR_ON_EXIT +static void on_exit_backtrace(int failed, void *data STARPU_ATTRIBUTE_UNUSED) +{ + if (failed) + xbt_backtrace_display_current(); +} +#endif + +void _starpu_simgrid_actor_setup(void) +{ +#ifdef HAVE_SG_ACTOR_ON_EXIT + sg_actor_on_exit(on_exit_backtrace, NULL); +#endif +} + +#if defined(HAVE_SG_ZONE_GET_BY_NAME) || defined(sg_zone_get_by_name) +#define HAVE_STARPU_SIMGRID_GET_AS_BY_NAME +sg_netzone_t _starpu_simgrid_get_as_by_name(const char *name) +{ + return sg_zone_get_by_name(name); +} +#elif defined(HAVE_MSG_ZONE_GET_BY_NAME) || defined(MSG_zone_get_by_name) +#define HAVE_STARPU_SIMGRID_GET_AS_BY_NAME +msg_as_t _starpu_simgrid_get_as_by_name(const char *name) +{ + return MSG_zone_get_by_name(name); +} +#elif defined(HAVE_MSG_GET_AS_BY_NAME) || defined(MSG_get_as_by_name) +#define HAVE_STARPU_SIMGRID_GET_AS_BY_NAME +msg_as_t _starpu_simgrid_get_as_by_name(const char *name) +{ + return MSG_get_as_by_name(name); +} +#elif defined(HAVE_MSG_ENVIRONMENT_GET_ROUTING_ROOT) || defined(MSG_environment_as_get_routing_sons) +#define HAVE_STARPU_SIMGRID_GET_AS_BY_NAME +static msg_as_t __starpu_simgrid_get_as_by_name(msg_as_t root, const char *name) +{ + xbt_dict_t dict; + xbt_dict_cursor_t cursor; + const char *key; + msg_as_t as, ret; + dict = MSG_environment_as_get_routing_sons(root); + xbt_dict_foreach(dict, cursor, key, as) + { + if (!strcmp(MSG_environment_as_get_name(as), name)) + return as; + ret = __starpu_simgrid_get_as_by_name(as, name); + if (ret) + return ret; + } + return NULL; +} + +msg_as_t _starpu_simgrid_get_as_by_name(const char *name) +{ + return __starpu_simgrid_get_as_by_name(MSG_environment_get_routing_root(), name); +} +#endif /* HAVE_MSG_ENVIRONMENT_GET_ROUTING_ROOT */ + +int _starpu_simgrid_get_nbhosts(const char *prefix) +{ +#ifdef HAVE_STARPU_SIMGRID_GET_AS_BY_NAME + char new_prefix[32+strlen(prefix)]; +#endif + + int ret; +#ifdef HAVE_SG_HOST_LIST + sg_host_t *hosts_list = NULL; +#endif +#if defined(HAVE_SG_ZONE_GET_ALL_HOSTS) + const_sg_host_t *hosts = NULL; +#else + xbt_dynar_t hosts = NULL; +#endif + int i; + int nb = 0; + unsigned len = strlen(prefix); + + if (_starpu_simgrid_running_smpi()) + { +#ifdef HAVE_STARPU_SIMGRID_GET_AS_BY_NAME + char name[32]; + STARPU_ASSERT(starpu_mpi_world_rank); + snprintf(name, sizeof(name), STARPU_MPI_AS_PREFIX"%d", starpu_mpi_world_rank()); +#if defined(HAVE_SG_ZONE_GET_ALL_HOSTS) + hosts = sg_zone_get_all_hosts(_starpu_simgrid_get_as_by_name(name), &nb); +#elif defined(HAVE_MSG_ZONE_GET_HOSTS) || defined(HAVE_SG_ZONE_GET_HOSTS) || defined(MSG_zone_get_hosts) || defined(sg_zone_get_hosts) + hosts = xbt_dynar_new(sizeof(sg_host_t), NULL); +# if defined(HAVE_SG_ZONE_GET_HOSTS) || defined(sg_zone_get_hosts) + sg_zone_get_hosts(_starpu_simgrid_get_as_by_name(name), hosts); +# else + MSG_zone_get_hosts(_starpu_simgrid_get_as_by_name(name), hosts); +# endif +#else + hosts = MSG_environment_as_get_hosts(_starpu_simgrid_get_as_by_name(name)); +#endif + snprintf(new_prefix, sizeof(new_prefix), "%s-%s", name, prefix); + prefix = new_prefix; + len = strlen(prefix); +#else + STARPU_ABORT_MSG("can not continue without an implementation for _starpu_simgrid_get_as_by_name"); +#endif /* HAVE_STARPU_SIMGRID_GET_AS_BY_NAME */ + } + else + { +#ifdef HAVE_SG_HOST_LIST + hosts_list = sg_host_list(); + nb = sg_host_count(); +#elif defined(STARPU_HAVE_SIMGRID_HOST_H) + hosts = sg_hosts_as_dynar(); +#else + hosts = MSG_hosts_as_dynar(); +#endif + } +#if !defined(HAVE_SG_ZONE_GET_ALL_HOSTS) + if (hosts) + nb = xbt_dynar_length(hosts); +#endif + + ret = 0; + for (i = 0; i < nb; i++) + { + const char *name; +#ifdef HAVE_SG_HOST_LIST + if (hosts_list) + name = sg_host_get_name(hosts_list[i]); + else +#endif +#if defined(HAVE_SG_ZONE_GET_ALL_HOSTS) + name = sg_host_get_name(hosts[i]); +#elif defined(STARPU_HAVE_SIMGRID_HOST_H) + name = sg_host_get_name(xbt_dynar_get_as(hosts, i, sg_host_t)); +#else + name = MSG_host_get_name(xbt_dynar_get_as(hosts, i, msg_host_t)); +#endif + if (!strncmp(name, prefix, len)) + ret++; + } +#if !defined(HAVE_SG_ZONE_GET_ALL_HOSTS) + if (hosts) + xbt_dynar_free(&hosts); +#endif + return ret; +} + +static starpu_sg_host_t _starpu_simgrid_get_host(const char *prefix, unsigned devid) +{ + char name[32]; + + snprintf(name, sizeof(name), "%s%u", prefix, devid); + + return _starpu_simgrid_get_host_by_name(name); +} + +unsigned long long _starpu_simgrid_get_memsize(const char *prefix, unsigned devid) +{ + starpu_sg_host_t host; + const char *memsize; + + host = _starpu_simgrid_get_host(prefix, devid); + if (!host) + return 0; + +#if defined(HAVE_SG_HOST_GET_PROPERTY_NAMES) + if (!sg_host_get_property_names(host, NULL)) +#elif defined(HAVE_SG_HOST_GET_PROPERTIES) + if (!sg_host_get_properties(host)) +#else + if (!MSG_host_get_properties(host)) +#endif + return 0; + +#ifdef HAVE_SG_HOST_GET_PROPERTIES + memsize = sg_host_get_property_value(host, "memsize"); +#else + memsize = MSG_host_get_property_value(host, "memsize"); +#endif + if (!memsize) + return 0; + + return atoll(memsize); +} + +const char *_starpu_simgrid_get_devname(const char *prefix, unsigned devid) +{ + starpu_sg_host_t host; + + host = _starpu_simgrid_get_host(prefix, devid); + if (!host) + return 0; + +#if defined(HAVE_SG_HOST_GET_PROPERTY_NAMES) + if (!sg_host_get_property_names(host, NULL)) +#elif defined(HAVE_SG_HOST_GET_PROPERTIES) + if (!sg_host_get_properties(host)) +#else + if (!MSG_host_get_properties(host)) +#endif + return 0; + +#ifdef HAVE_SG_HOST_GET_PROPERTIES + return sg_host_get_property_value(host, "model"); +#else + return MSG_host_get_property_value(host, "model"); +#endif +} + +starpu_sg_host_t _starpu_simgrid_get_host_by_name(const char *name) +{ + if (_starpu_simgrid_running_smpi()) + { + char mpiname[32]; + STARPU_ASSERT(starpu_mpi_world_rank); + snprintf(mpiname, sizeof(mpiname), STARPU_MPI_AS_PREFIX"%d-%s", starpu_mpi_world_rank(), name); +#ifdef STARPU_HAVE_SIMGRID_HOST_H + return sg_host_by_name(mpiname); +#else + return MSG_get_host_by_name(mpiname); +#endif + } + else +#ifdef STARPU_HAVE_SIMGRID_HOST_H + return sg_host_by_name(name); +#else + return MSG_get_host_by_name(name); +#endif +} + +starpu_sg_host_t _starpu_simgrid_get_host_by_worker(struct _starpu_worker *worker) +{ + const char *prefix; + char name[16]; + starpu_sg_host_t host; + prefix = starpu_driver_info[worker->arch].name_upper; + STARPU_ASSERT(prefix); + snprintf(name, sizeof(name), "%s%u", prefix, worker->devid); + host = _starpu_simgrid_get_host_by_name(name); + STARPU_ASSERT_MSG(host, "Could not find host %s!", name); + return host; +} + +#ifdef STARPU_USE_MPI +/* Simgrid up to 3.15 would rename main into smpi_simulated_main_, and call that + * from SMPI initialization + * In case the MPI application didn't use smpicc to build the file containing + * main(), but included our #define main starpu_main, try to cope by calling + * starpu_main */ +int _starpu_smpi_simulated_main_(int argc, char *argv[]) +{ + if (!starpu_main) + { + _STARPU_ERROR("In simgrid mode, the file containing the main() function of this application needs to be compiled with starpu.h or starpu_simgrid_wrap.h included, to properly rename it into starpu_main\n"); + } + + return starpu_main(argc, argv); +} +int smpi_simulated_main_(int argc, char *argv[]) __attribute__((weak, alias("_starpu_smpi_simulated_main_"))); +#endif + +/* This is used to start a non-MPI simgrid environment */ +void _starpu_start_simgrid(int *argc, char **argv) +{ + char path[256]; + + if (simgrid_started) + return; + + simgrid_started = 1; + +#if defined(STARPU_SIMGRID_HAVE_SIMGRID_INIT) && defined(HAVE_SG_ACTOR_INIT) + simgrid_init(argc, argv); +#else + MSG_init(argc, argv); +#endif + /* Simgrid uses tiny stacks by default. This comes unexpected to our users. */ +#ifdef HAVE_GETRLIMIT + struct rlimit rlim; + if (getrlimit(RLIMIT_STACK, &rlim) == 0 && rlim.rlim_cur != 0 && rlim.rlim_cur != RLIM_INFINITY) + _starpu_default_stack_size = rlim.rlim_cur / 1024; +#endif + _starpu_simgrid_set_stack_size(_starpu_default_stack_size); + + /* Load XML platform */ +#if SIMGRID_VERSION < 31300 + _starpu_simgrid_get_platform_path(3, path, sizeof(path)); +#else + _starpu_simgrid_get_platform_path(4, path, sizeof(path)); +#endif + if (access(path, R_OK) != 0) + { + fprintf(stderr, "Machine performance file <%s> does not exist, please re-run in non-simgrid mode to calibrate it, or fix the STARPU_HOSTNAME and STARPU_PERF_MODEL_DIR environment variables\n", path); + _exit(EXIT_FAILURE); + } + +#if defined(STARPU_SIMGRID_HAVE_SIMGRID_INIT) && defined(HAVE_SG_ACTOR_INIT) + simgrid_load_platform(path); +#else + MSG_create_environment(path); +#endif + int limit_bandwidth = starpu_getenv_number("STARPU_LIMIT_BANDWIDTH"); + if (limit_bandwidth >= 0) + { +#if defined(HAVE_SG_LINK_BANDWIDTH_SET) || defined(HAVE_SG_LINK_SET_BANDWIDTH) + sg_link_t *links = sg_link_list(); + int count = sg_link_count(), i; + for (i = 0; i < count; i++) + { +#ifdef HAVE_SG_LINK_SET_BANDWIDTH + sg_link_set_bandwidth(links[i], limit_bandwidth * 1000000.); +#else + sg_link_bandwidth_set(links[i], limit_bandwidth * 1000000.); +#endif + } +#else + _STARPU_DISP("Warning: STARPU_LIMIT_BANDWIDTH set to %d but this requires simgrid 3.26, thus ignored\n", limit_bandwidth); +#endif + } + + simgrid_transfer_cost = starpu_getenv_number_default("STARPU_SIMGRID_TRANSFER_COST", 1); +} + +static int +run_starpu_main(int argc, char *argv[]) +{ + /* FIXME: Ugly work-around for bug in simgrid: the MPI context is not properly set at MSG process startup */ + starpu_sleep(0.000001); + _starpu_simgrid_actor_setup(); + + if (!starpu_main) + { + _STARPU_ERROR("In simgrid mode, the file containing the main() function of this application needs to be compiled with starpu.h or starpu_simgrid_wrap.h included, to properly rename it into starpu_main\n"); + } + + return starpu_main(argc, argv); +} + +static int main_ret; + +static _starpu_simgrid_main_ret +do_starpu_main(int argc, char *argv[]) +{ + main_ret = run_starpu_main(argc, argv); + _STARPU_SIMGRID_MAIN_RETURN; +} + +/* We need it only when using smpi */ +#pragma weak smpi_process_get_user_data +extern void *smpi_process_get_user_data(); + +/* This is hopefully called before the application and simgrid */ +#undef main +#pragma weak main +int main(int argc, char **argv) +{ +#ifdef HAVE_SG_CONFIG_CONTINUE_AFTER_HELP + sg_config_continue_after_help(); +#endif + if (_starpu_simgrid_running_smpi()) + { + if (!smpi_process_get_user_data) + { + _STARPU_ERROR("Your version of simgrid does not provide smpi_process_get_user_data, we can not continue without it\n"); + } + +#if SIMGRID_VERSION >= 31600 + /* Recent versions of simgrid dlopen() us, so we don't need to + * do circumvolutions, just init MPI early and run the application's main */ + return _starpu_mpi_simgrid_init(argc, argv); +#else + /* Oops, we are running old SMPI, let it start Simgrid, and we'll + * take back hand in _starpu_simgrid_init from starpu_init() */ + return smpi_main(_starpu_mpi_simgrid_init, argc, argv); +#endif + } + + /* Already initialized? It probably has been done through a + * constructor and MSG_process_attach, directly jump to real main */ + if (simgrid_started == 3) + { + return run_starpu_main(argc, argv); + } + + /* Managed to catch application's main, initialize simgrid first */ + _starpu_start_simgrid(&argc, argv); + + simgrid_started = 2; + + /* Create a simgrid process for main */ + char **argv_cpy; + _STARPU_MALLOC(argv_cpy, argc * sizeof(char*)); + int i; + for (i = 0; i < argc; i++) + argv_cpy[i] = strdup(argv[i]); + + /* Run the application in a separate thread */ + _starpu_simgrid_actor_create("main", &do_starpu_main, _starpu_simgrid_get_host_by_name("MAIN"), argc, argv_cpy); + + /* And run maestro in the main thread */ +#if defined(STARPU_SIMGRID_HAVE_SIMGRID_INIT) && defined(HAVE_SG_ACTOR_INIT) + simgrid_run(); +#else + MSG_main(); +#endif + return main_ret; +} + +#if defined(HAVE_MSG_PROCESS_ATTACH) || defined(MSG_process_attach) || defined(HAVE_SG_ACTOR_ATTACH) +static void maestro(void *data STARPU_ATTRIBUTE_UNUSED) +{ +#if defined(STARPU_SIMGRID_HAVE_SIMGRID_INIT) && defined(HAVE_SG_ACTOR_INIT) + simgrid_run(); +#else + MSG_main(); +#endif +} +#endif + +/* This is called early from starpu_init, so thread functions etc. can work */ +void _starpu_simgrid_init_early(int *argc STARPU_ATTRIBUTE_UNUSED, char ***argv STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef HAVE_SG_CONFIG_CONTINUE_AFTER_HELP + sg_config_continue_after_help(); +#endif +#if defined(HAVE_MSG_PROCESS_ATTACH) || defined(MSG_process_attach) || defined(HAVE_SG_ACTOR_ATTACH) + if (simgrid_started < 2 && !_starpu_simgrid_running_smpi()) + { + /* "Cannot create_maestro with this ContextFactory. + * Try using --cfg=contexts/factory:thread instead." + * See https://github.com/simgrid/simgrid/issues/141 */ + _STARPU_DISP("Warning: In simgrid mode, the file containing the main() function of this application should to be compiled with starpu.h or starpu_simgrid_wrap.h included, to properly rename it into starpu_main to avoid having to use --cfg=contexts/factory:thread which reduces performance\n"); +#if SIMGRID_VERSION >= 31400 /* Only recent versions of simgrid support setting sg_cfg_set_string before starting simgrid */ +# ifdef HAVE_SG_CFG_SET_INT + sg_cfg_set_string("contexts/factory", "thread"); +# else + xbt_cfg_set_string("contexts/factory", "thread"); +# endif +#endif + /* We didn't catch application's main. */ + /* Start maestro as a separate thread */ +#ifdef HAVE_SIMGRID_SET_MAESTRO + simgrid_set_maestro(maestro, NULL); +#else + SIMIX_set_maestro(maestro, NULL); +#endif + /* Initialize simgrid */ + int no_argc = 1; + char *starpu = "starpu", *no_argv [] = { starpu, NULL }; + _starpu_start_simgrid(argc ? argc : &no_argc, argv ? *argv : no_argv); + + /* And attach the main thread to the main simgrid process */ + void **tsd; + _STARPU_CALLOC(tsd, MAX_TSD+1, sizeof(void*)); + +#if (defined(HAVE_SG_ACTOR_ATTACH_PTHREAD) || defined(HAVE_SG_ACTOR_ATTACH)) && (defined (HAVE_SG_ACTOR_DATA) || defined(HAVE_SG_ACTOR_GET_DATA)) +#ifdef HAVE_SG_ACTOR_ATTACH_PTHREAD + sg_actor_t actor = sg_actor_attach_pthread("main", NULL, _starpu_simgrid_get_host_by_name("MAIN")); +#else + sg_actor_t actor = sg_actor_attach("main", NULL, _starpu_simgrid_get_host_by_name("MAIN"), NULL); +#endif +#ifdef HAVE_SG_ACTOR_SET_DATA + sg_actor_set_data(actor, tsd); +#else + sg_actor_data_set(actor, tsd); +#endif +#else + MSG_process_attach("main", tsd, _starpu_simgrid_get_host_by_name("MAIN"), NULL); +#endif + + /* We initialized through MSG_process_attach */ + simgrid_started = 3; + } +#endif + + if (!simgrid_started && !starpu_main && !_starpu_simgrid_running_smpi()) + { + /* Oops, we don't have MSG_process_attach and didn't catch the + * 'main' symbol, there is no way for us */ + _STARPU_ERROR("In simgrid mode, the file containing the main() function of this application needs to be compiled with starpu.h or starpu_simgrid_wrap.h included, to properly rename it into starpu_main\n"); + } + if (_starpu_simgrid_running_smpi()) + { +#ifndef STARPU_STATIC_ONLY + _STARPU_ERROR("Simgrid currently does not support privatization for dynamically-linked libraries in SMPI. Please reconfigure and build StarPU with --disable-shared"); +#endif +#if defined(HAVE_MSG_PROCESS_USERDATA_INIT) && !(defined(HAVE_SG_ACTOR_DATA) || defined(HAVE_SG_ACTOR_GET_DATA)) + MSG_process_userdata_init(); +#endif + void **tsd; + _STARPU_CALLOC(tsd, MAX_TSD+1, sizeof(void*)); +#ifdef HAVE_SG_ACTOR_DATA +#ifdef HAVE_SG_ACTOR_SET_DATA + sg_actor_set_data(sg_actor_self(), tsd); +#else + sg_actor_data_set(sg_actor_self(), tsd); +#endif +#else + smpi_process_set_user_data(tsd); +#endif + } + unsigned i; + for (i = 0; i < STARPU_MAXNODES; i++) + starpu_pthread_queue_init(&_starpu_simgrid_transfer_queue[i]); + for (i = 0; i < STARPU_NMAXWORKERS; i++) + starpu_pthread_queue_init(&_starpu_simgrid_task_queue[i]); + _starpu_simgrid_cpp_init(); +} + +/* This is called late from starpu_init, to start task executors */ +void _starpu_simgrid_init(void) +{ + unsigned i; + runners_running = 1; + for (i = 0; i < starpu_worker_get_count(); i++) + { + char s[32]; + snprintf(s, sizeof(s), "worker %u runner", i); + starpu_sem_init(&worker_runner[i].sem, 0, 0); + starpu_pthread_create_on(s, &worker_runner[i].runner, NULL, task_execute, (void*)(uintptr_t) i, _starpu_simgrid_get_host_by_worker(_starpu_get_worker_struct(i))); + } +} + +void _starpu_simgrid_deinit_late(void) +{ +#if defined(HAVE_MSG_PROCESS_ATTACH) || defined(MSG_process_attach) || defined(HAVE_SG_ACTOR_ATTACH) + if (simgrid_started == 3) + { + /* Started with MSG_process_attach, now detach */ +#ifdef HAVE_SG_ACTOR_ATTACH + sg_actor_detach(); +#else + MSG_process_detach(); +#endif + simgrid_started = 0; + } +#endif +} + +void _starpu_simgrid_deinit(void) +{ + unsigned i, j; + runners_running = 0; + for (i = 0; i < STARPU_MAXNODES; i++) + { + for (j = 0; j < STARPU_MAXNODES; j++) + { + struct transfer_runner *t = &transfer_runner[i][j]; + if (t->runner) + { + starpu_sem_post(&t->sem); +#ifdef STARPU_HAVE_SIMGRID_ACTOR_H + sg_actor_join(t->runner, 1000000); +#elif SIMGRID_VERSION >= 31400 + MSG_process_join(t->runner, 1000000); +#else + starpu_sleep(1); +#endif + STARPU_ASSERT(t->first_transfer == NULL); + STARPU_ASSERT(t->last_transfer == NULL); + starpu_sem_destroy(&t->sem); + } + } + /* FIXME: queue not empty at this point, needs proper unregistration */ + /* starpu_pthread_queue_destroy(&_starpu_simgrid_transfer_queue[i]); */ + } + for (i = 0; i < starpu_worker_get_count(); i++) + { + struct worker_runner *w = &worker_runner[i]; + starpu_sem_post(&w->sem); +#ifdef STARPU_HAVE_SIMGRID_ACTOR_H + sg_actor_join(w->runner, 1000000); +#elif SIMGRID_VERSION >= 31400 + MSG_process_join(w->runner, 1000000); +#else + starpu_sleep(1); +#endif + STARPU_ASSERT(w->first_task == NULL); + STARPU_ASSERT(w->last_task == NULL); + starpu_sem_destroy(&w->sem); + starpu_pthread_queue_destroy(&_starpu_simgrid_task_queue[i]); + } + +#if SIMGRID_VERSION >= 31300 + /* clean-atexit introduced in simgrid 3.13 */ +# ifdef HAVE_SG_CFG_SET_INT + if (sg_cfg_get_boolean("debug/clean-atexit")) +# elif SIMGRID_VERSION >= 32300 + if (xbt_cfg_get_boolean("debug/clean-atexit")) +# else + if (xbt_cfg_get_boolean("clean-atexit")) +# endif + { + _starpu_simgrid_deinit_late(); + } +#endif +} + +/* + * Tasks + */ + +struct task +{ +#if defined(HAVE_SG_ACTOR_SELF_EXECUTE) || defined(HAVE_SG_ACTOR_EXECUTE) + double flops; +#else + msg_task_t task; +#endif + double energy; + + /* communication termination signalization */ + unsigned *finished; + + /* Next task on this worker */ + struct task *next; +}; + +/* Actually execute the task. */ +static void *task_execute(void *arg) +{ + unsigned workerid = (uintptr_t) arg; + struct worker_runner *w = &worker_runner[workerid]; + + _STARPU_DEBUG("worker runner %u started\n", workerid); + while (1) + { + struct task *task; + + starpu_sem_wait(&w->sem); + if (!runners_running) + break; + + task = w->first_task; + w->first_task = task->next; + if (w->last_task == task) + w->last_task = NULL; + + _STARPU_DEBUG("task %p started\n", task); +#ifdef HAVE_SG_ACTOR_EXECUTE + sg_actor_execute(task->flops); +#elif defined(HAVE_SG_ACTOR_SELF_EXECUTE) + sg_actor_self_execute(task->flops); +#else + MSG_task_execute(task->task); + MSG_task_destroy(task->task); +#endif + starpu_energy_use(task->energy); + _STARPU_DEBUG("task %p finished\n", task); + + *task->finished = 1; + /* The worker which started this task may be sleeping out of tasks, wake it */ + _starpu_wake_worker_relax(workerid); + + free(task); + } + _STARPU_DEBUG("worker %u stopped\n", workerid); + return 0; +} + +/* Wait for completion of all asynchronous tasks for this worker */ +void _starpu_simgrid_wait_tasks(int workerid) +{ + struct task *task = worker_runner[workerid].last_task; + if (!task) + return; + + unsigned *finished = task->finished; + starpu_pthread_wait_t wait; + starpu_pthread_wait_init(&wait); + starpu_pthread_queue_register(&wait, &_starpu_simgrid_task_queue[workerid]); + + while(1) + { + starpu_pthread_wait_reset(&wait); + if (*finished) + break; + starpu_pthread_wait_wait(&wait); + } + starpu_pthread_queue_unregister(&wait, &_starpu_simgrid_task_queue[workerid]); + starpu_pthread_wait_destroy(&wait); +} + +/* Task execution submitted by StarPU */ +void _starpu_simgrid_submit_job(int workerid, int sched_ctx_id, struct _starpu_job *j, struct starpu_perfmodel_arch* perf_arch STARPU_ATTRIBUTE_UNUSED, double length, double energy, unsigned *finished) +{ + struct starpu_task *starpu_task = j->task; + double flops; +#if !(defined(HAVE_SG_ACTOR_SELF_EXECUTE) || defined(HAVE_SG_ACTOR_EXECUTE)) + msg_task_t simgrid_task; +#endif + + if (j->internal) + /* This is not useful to include in simulation (and probably + * doesn't have a perfmodel anyway) */ + return; + + if (isnan(length)) + { + length = starpu_task_worker_expected_length(starpu_task, workerid, sched_ctx_id, j->nimpl); + if (STARPU_UNLIKELY(_STARPU_IS_ZERO(length) || isnan(length))) + { + fprintf(stderr, "Codelet %s does not have a perfmodel, or is not calibrated enough, please re-run in non-simgrid mode until it is calibrated, or fix the STARPU_HOSTNAME and STARPU_PERF_MODEL_DIR environment variables\n", + _starpu_job_get_model_name(j)); + _exit(EXIT_FAILURE); + } + /* TODO: option to add variance according to performance model, + * to be able to easily check scheduling robustness */ + } + if (isnan(energy)) + { + energy = starpu_task_worker_expected_energy(starpu_task, workerid, sched_ctx_id, j->nimpl); + /* TODO: option to add variance according to performance model, + * to be able to easily check scheduling robustness */ + } + +#ifdef HAVE_SG_HOST_GET_SPEED + flops = length/1000000.0*sg_host_get_speed(sg_host_self()); +#else +#if defined(HAVE_SG_HOST_SPEED) || defined(sg_host_speed) +# if defined(HAVE_SG_HOST_SELF) || defined(sg_host_self) + flops = length/1000000.0*sg_host_speed(sg_host_self()); +# else + flops = length/1000000.0*sg_host_speed(MSG_host_self()); +# endif +#elif defined HAVE_MSG_HOST_GET_SPEED || defined(MSG_host_get_speed) + flops = length/1000000.0*MSG_host_get_speed(MSG_host_self()); +#else + flops = length/1000000.0*MSG_get_host_speed(MSG_host_self()); +#endif +#endif + +#if !(defined(HAVE_SG_ACTOR_SELF_EXECUTE) || defined(HAVE_SG_ACTOR_EXECUTE)) + simgrid_task = MSG_task_create(_starpu_job_get_task_name(j), flops, 0, NULL); +#endif + + if (finished == NULL) + { + /* Synchronous execution */ + /* First wait for previous tasks */ + _starpu_simgrid_wait_tasks(workerid); +#ifdef HAVE_SG_ACTOR_EXECUTE + sg_actor_execute(flops); +#elif defined(HAVE_SG_ACTOR_SELF_EXECUTE) + sg_actor_self_execute(flops); +#else + MSG_task_execute(simgrid_task); + MSG_task_destroy(simgrid_task); +#endif + starpu_energy_use(energy); + } + else + { + /* Asynchronous execution */ + struct task *task; + struct worker_runner *w = &worker_runner[workerid]; + _STARPU_MALLOC(task, sizeof(*task)); +#if defined(HAVE_SG_ACTOR_SELF_EXECUTE) || defined(HAVE_SG_ACTOR_EXECUTE) + task->flops = flops; +#else + task->task = simgrid_task; +#endif + task->energy = energy; + task->finished = finished; + *finished = 0; + task->next = NULL; + /* Sleep 10µs for the GPU task queueing */ + if (_starpu_simgrid_cuda_queue_cost()) + starpu_sleep(0.000010); + if (w->last_task) + { + /* Already running a task, queue */ + w->last_task->next = task; + w->last_task = task; + } + else + { + STARPU_ASSERT(!w->first_task); + w->first_task = task; + w->last_task = task; + } + starpu_sem_post(&w->sem); + } +} + +/* + * Transfers + */ + +/* Note: simgrid is not parallel, so there is no need to hold locks for management of transfers. */ +LIST_TYPE(transfer, +#if defined(HAVE_SG_HOST_SEND_TO) || defined(HAVE_SG_HOST_SENDTO) + size_t size; +#else + msg_task_t task; +#endif + int src_node; + int dst_node; + int run_node; + + /* communication termination signalization */ + unsigned *finished; + + /* transfers which wait for this transfer */ + struct transfer **wake; + unsigned nwake; + + /* Number of transfers that this transfer waits for */ + unsigned nwait; + + /* Next transfer on this stream */ + struct transfer *next; +) + +static struct transfer_list pending; + +/* Tell for two transfers whether they should be handled in sequence */ +static int transfers_are_sequential(struct transfer *new_transfer, struct transfer *old_transfer) +{ + int new_is_cuda STARPU_ATTRIBUTE_UNUSED, old_is_cuda STARPU_ATTRIBUTE_UNUSED; + int new_is_opencl STARPU_ATTRIBUTE_UNUSED, old_is_opencl STARPU_ATTRIBUTE_UNUSED; + int new_is_gpu_gpu, old_is_gpu_gpu; + + new_is_cuda = starpu_node_get_kind(new_transfer->src_node) == STARPU_CUDA_RAM; + new_is_cuda |= starpu_node_get_kind(new_transfer->dst_node) == STARPU_CUDA_RAM; + old_is_cuda = starpu_node_get_kind(old_transfer->src_node) == STARPU_CUDA_RAM; + old_is_cuda |= starpu_node_get_kind(old_transfer->dst_node) == STARPU_CUDA_RAM; + + new_is_opencl = starpu_node_get_kind(new_transfer->src_node) == STARPU_OPENCL_RAM; + new_is_opencl |= starpu_node_get_kind(new_transfer->dst_node) == STARPU_OPENCL_RAM; + old_is_opencl = starpu_node_get_kind(old_transfer->src_node) == STARPU_OPENCL_RAM; + old_is_opencl |= starpu_node_get_kind(old_transfer->dst_node) == STARPU_OPENCL_RAM; + + new_is_gpu_gpu = new_transfer->src_node && new_transfer->dst_node; + old_is_gpu_gpu = old_transfer->src_node && old_transfer->dst_node; + + /* We ignore cuda-opencl transfers, they can not happen */ + STARPU_ASSERT(!((new_is_cuda && old_is_opencl) || (old_is_cuda && new_is_opencl))); + + /* The following constraints have been observed with CUDA alone */ + + /* Same source/destination, sequential */ + if (new_transfer->src_node == old_transfer->src_node && new_transfer->dst_node == old_transfer->dst_node) + return 1; + + /* Crossed GPU-GPU, sequential */ + if (new_is_gpu_gpu + && new_transfer->src_node == old_transfer->dst_node + && old_transfer->src_node == new_transfer->dst_node) + return 1; + + /* GPU-GPU transfers are sequential with any RAM->GPU transfer */ + if (new_is_gpu_gpu + && (old_transfer->dst_node == new_transfer->src_node + || old_transfer->dst_node == new_transfer->dst_node)) + return 1; + if (old_is_gpu_gpu + && (new_transfer->dst_node == old_transfer->src_node + || new_transfer->dst_node == old_transfer->dst_node)) + return 1; + + /* StarPU's constraint on CUDA transfers is using one stream per + * source/destination pair, which is already handled above */ + + return 0; +} + +static void transfer_queue(struct transfer *transfer) +{ + unsigned src = transfer->src_node; + unsigned dst = transfer->dst_node; + struct transfer_runner *t = &transfer_runner[src][dst]; + + if (!t->runner) + { + /* No runner yet, start it */ + static starpu_pthread_mutex_t mutex; /* process_create may yield */ + STARPU_PTHREAD_MUTEX_LOCK(&mutex); + if (!t->runner) + { + char s[64]; + snprintf(s, sizeof(s), "transfer %u-%u runner", src, dst); + starpu_pthread_create_on(s, &t->runner, NULL, transfer_execute, (void*)(uintptr_t)((src<<16) + dst), _starpu_simgrid_get_memnode_host(src)); + starpu_sem_init(&t->sem, 0, 0); + } + STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); + } + + if (t->last_transfer) + { + /* Already running a transfer, queue */ + t->last_transfer->next = transfer; + t->last_transfer = transfer; + } + else + { + STARPU_ASSERT(!t->first_transfer); + t->first_transfer = transfer; + t->last_transfer = transfer; + } + starpu_sem_post(&t->sem); +} + +/* Actually execute the transfer, and then start transfers waiting for this one. */ +static void *transfer_execute(void *arg) +{ + unsigned src_dst = (uintptr_t) arg; + unsigned src = src_dst >> 16; + unsigned dst = src_dst & 0xffff; + struct transfer_runner *t = &transfer_runner[src][dst]; + + _STARPU_DEBUG("transfer runner %u-%u started\n", src, dst); + while (1) + { + struct transfer *transfer; + + starpu_sem_wait(&t->sem); + if (!runners_running) + break; + transfer = t->first_transfer; + t->first_transfer = transfer->next; + if (t->last_transfer == transfer) + t->last_transfer = NULL; + +#if defined(HAVE_SG_HOST_SEND_TO) || defined(HAVE_SG_HOST_SENDTO) + if (transfer->size) +#else + if (transfer->task) +#endif + { + _STARPU_DEBUG("transfer %p started\n", transfer); +#if defined(HAVE_SG_HOST_SEND_TO) || defined(HAVE_SG_HOST_SENDTO) +#ifdef HAVE_SG_HOST_SENDTO + sg_host_sendto +#else + sg_host_send_to +#endif + (_starpu_simgrid_memory_node_get_host(transfer->src_node), + _starpu_simgrid_memory_node_get_host(transfer->dst_node), + transfer->size); +#else + MSG_task_execute(transfer->task); + MSG_task_destroy(transfer->task); +#endif + _STARPU_DEBUG("transfer %p finished\n", transfer); + } + + *transfer->finished = 1; + transfer_list_erase(&pending, transfer); + + /* The workers which started this request may be sleeping out of tasks, wake it */ + _starpu_wake_all_blocked_workers_on_node(transfer->run_node); + + unsigned i; + /* Wake transfers waiting for my termination */ + /* Note: due to possible preemption inside process_create, the array + * may grow while doing this */ + for (i = 0; i < transfer->nwake; i++) + { + struct transfer *wake = transfer->wake[i]; + STARPU_ASSERT(wake->nwait > 0); + wake->nwait--; + if (!wake->nwait) + { + _STARPU_DEBUG("triggering transfer %p\n", wake); + transfer_queue(wake); + } + } + free(transfer->wake); + free(transfer); + } + + return 0; +} + +/* Look for sequentialization between this transfer and pending transfers, and submit this one */ +static void transfer_submit(struct transfer *transfer) +{ + struct transfer *old; + + for (old = transfer_list_begin(&pending); + old != transfer_list_end(&pending); + old = transfer_list_next(old)) + { + if (transfers_are_sequential(transfer, old)) + { + _STARPU_DEBUG("transfer %p(%d->%d) waits for %p(%d->%d)\n", + transfer, transfer->src_node, transfer->dst_node, + old, old->src_node, old->dst_node); + /* Make new wait for the old */ + transfer->nwait++; + /* Make old wake the new */ + _STARPU_REALLOC(old->wake, (old->nwake + 1) * sizeof(old->wake)); + old->wake[old->nwake] = transfer; + old->nwake++; + } + } + + transfer_list_push_front(&pending, transfer); + + if (!transfer->nwait) + { + _STARPU_DEBUG("transfer %p waits for nobody, starting\n", transfer); + transfer_queue(transfer); + } +} + +int _starpu_simgrid_wait_transfer_event(void *_event) +{ + struct _starpu_simgrid_event *event = _event; + /* this is not associated to a request so it's synchronous */ + starpu_pthread_wait_t wait; + starpu_pthread_wait_init(&wait); + starpu_pthread_queue_register(&wait, event->queue); + + while(1) + { + starpu_pthread_wait_reset(&wait); + if (event->finished) + break; + starpu_pthread_wait_wait(&wait); + } + starpu_pthread_queue_unregister(&wait, event->queue); + starpu_pthread_wait_destroy(&wait); + return 0; +} + +int _starpu_simgrid_test_transfer_event(void *_event) +{ + struct _starpu_simgrid_event *event = _event; + return event->finished; +} + +/* Wait for completion of all transfers */ +static void _starpu_simgrid_wait_transfers(void) +{ + unsigned finished = 0; + struct transfer *sync = transfer_new(); + struct transfer *cur; + +#if defined(HAVE_SG_HOST_SEND_TO) || defined(HAVE_SG_HOST_SENDTO) + sync->size = 0; +#else + sync->task = NULL; +#endif + sync->finished = &finished; + + sync->src_node = STARPU_MAIN_RAM; + sync->dst_node = STARPU_MAIN_RAM; + sync->run_node = STARPU_MAIN_RAM; + + sync->wake = NULL; + sync->nwake = 0; + sync->nwait = 0; + sync->next = NULL; + + for (cur = transfer_list_begin(&pending); + cur != transfer_list_end(&pending); + cur = transfer_list_next(cur)) + { + sync->nwait++; + _STARPU_REALLOC(cur->wake, (cur->nwake + 1) * sizeof(cur->wake)); + cur->wake[cur->nwake] = sync; + cur->nwake++; + } + + if (sync->nwait == 0) + { + /* No transfer to wait for */ + free(sync); + return; + } + + /* Push synchronization pseudo-transfer */ + transfer_list_push_front(&pending, sync); + + /* And wait for it */ + starpu_pthread_wait_t wait; + starpu_pthread_wait_init(&wait); + starpu_pthread_queue_register(&wait, &_starpu_simgrid_transfer_queue[STARPU_MAIN_RAM]); + while(1) + { + starpu_pthread_wait_reset(&wait); + if (finished) + break; + starpu_pthread_wait_wait(&wait); + } + starpu_pthread_queue_unregister(&wait, &_starpu_simgrid_transfer_queue[STARPU_MAIN_RAM]); + starpu_pthread_wait_destroy(&wait); +} + +/* Data transfer issued by StarPU */ +int _starpu_simgrid_transfer(size_t size, unsigned src_node, unsigned dst_node, struct _starpu_data_request *req) +{ + /* Simgrid does not like 0-bytes transfers */ + if (!size) + return 0; + + /* Explicitly disabled by user? */ + if (!simgrid_transfer_cost) + return 0; + + struct _starpu_simgrid_event *event, myevent; + double start = 0.; + struct transfer *transfer = transfer_new(); + + _STARPU_DEBUG("creating transfer %p for %lu bytes\n", transfer, (unsigned long) size); + +#if defined(HAVE_SG_HOST_SEND_TO) || defined(HAVE_SG_HOST_SENDTO) + transfer->size = size; +#else + msg_task_t task; + starpu_sg_host_t *hosts; + double *computation; + double *communication; + + _STARPU_CALLOC(hosts, 2, sizeof(*hosts)); + _STARPU_CALLOC(computation, 2, sizeof(*computation)); + _STARPU_CALLOC(communication, 4, sizeof(*communication)); + + hosts[0] = _starpu_simgrid_memory_node_get_host(src_node); + hosts[1] = _starpu_simgrid_memory_node_get_host(dst_node); + STARPU_ASSERT(hosts[0] != hosts[1]); + communication[1] = size; + + task = MSG_parallel_task_create("copy", 2, hosts, computation, communication, NULL); + + transfer->task = task; +#endif + transfer->src_node = src_node; + transfer->dst_node = dst_node; + transfer->run_node = starpu_worker_get_local_memory_node(); + + if (req) + event = _starpu_simgrid_event(&req->async_channel.event); + else + event = &myevent; + event->finished = 0; + transfer->finished = &event->finished; + event->queue = &_starpu_simgrid_transfer_queue[transfer->run_node]; + + transfer->wake = NULL; + transfer->nwake = 0; + transfer->nwait = 0; + transfer->next = NULL; + + if (req) + starpu_interface_start_driver_copy_async(src_node, dst_node, &start); + + /* Sleep 10µs for the GPU transfer queueing */ + if (_starpu_simgrid_cuda_queue_cost()) + starpu_sleep(0.000010); + transfer_submit(transfer); + /* Note: from here, transfer might be already freed */ + + if (req) + { + starpu_interface_end_driver_copy_async(src_node, dst_node, start); + starpu_interface_data_copy(src_node, dst_node, size); + return -EAGAIN; + } + else + { + /* this is not associated to a request so it's synchronous */ + _starpu_simgrid_wait_transfer_event(event); + return 0; + } +} + +/* Sync all GPUs (used on CUDA Free, typically) */ +void _starpu_simgrid_sync_gpus(void) +{ + _starpu_simgrid_wait_transfers(); +} + +_starpu_simgrid_main_ret +_starpu_simgrid_thread_start(int argc STARPU_ATTRIBUTE_UNUSED, char *argv[]) +{ + void *(*f)(void*) = (void*) (uintptr_t) strtol(argv[0], NULL, 16); + void *arg = (void*) (uintptr_t) strtol(argv[1], NULL, 16); + + /* FIXME: Ugly work-around for bug in simgrid: the MPI context is not properly set at MSG process startup */ + starpu_sleep(0.000001); + _starpu_simgrid_actor_setup(); + + /* _args is freed with process context */ + f(arg); + _STARPU_SIMGRID_MAIN_RETURN; +} + +starpu_pthread_t _starpu_simgrid_actor_create(const char *name, xbt_main_func_t code, starpu_sg_host_t host, int argc, char *argv[]) +{ + void **tsd; + starpu_pthread_t actor; + _STARPU_CALLOC(tsd, MAX_TSD+1, sizeof(void*)); +#ifdef HAVE_SG_ACTOR_INIT + actor = sg_actor_init(name, host); +#ifdef HAVE_SG_ACTOR_SET_DATA + sg_actor_set_data(actor, tsd); +#else + sg_actor_data_set(actor, tsd); +#endif + sg_actor_start(actor, code, argc, argv); +#else + actor = MSG_process_create_with_arguments(name, code, tsd, host, argc, argv); +#ifdef HAVE_SG_ACTOR_DATA +#ifdef HAVE_SG_ACTOR_SET_DATA + sg_actor_set_data(actor, tsd); +#else + sg_actor_data_set(actor, tsd); +#endif +#endif +#endif + return actor; +} + +starpu_sg_host_t _starpu_simgrid_get_memnode_host(unsigned node) +{ + const char *fmt; + char name[16]; + + switch (starpu_node_get_kind(node)) + { + case STARPU_CPU_RAM: + /* We do not specify %u as NUMA effects are not taken into account */ + fmt = "RAM"; + break; + case STARPU_CUDA_RAM: + fmt = "CUDA%u"; + break; + case STARPU_OPENCL_RAM: + fmt = "OpenCL%u"; + break; + case STARPU_DISK_RAM: + fmt = "DISK%u"; + break; + default: + STARPU_ABORT(); + break; + } + snprintf(name, sizeof(name), fmt, starpu_memory_node_get_devid(node)); + + return _starpu_simgrid_get_host_by_name(name); +} + +void _starpu_simgrid_count_ngpus(void) +{ +#if (defined(HAVE_SG_LINK_GET_NAME) || defined(HAVE_SG_LINK_NAME) || defined sg_link_name) && (SIMGRID_VERSION >= 31300) + unsigned src, dst; + starpu_sg_host_t ramhost = _starpu_simgrid_get_host_by_name("RAM"); + + /* For each pair of memory nodes, get the route */ + for (src = 1; src < STARPU_MAXNODES; src++) + for (dst = 1; dst < STARPU_MAXNODES; dst++) + { + int busid; + starpu_sg_host_t srchost, dsthost; +#if defined(HAVE_SG_HOST_GET_ROUTE_LINKS) + const_sg_link_t *routes; + const_sg_link_t link; +#else + xbt_dynar_t route_dynar = xbt_dynar_new(sizeof(starpu_sg_link_t), NULL); + starpu_sg_link_t link; +#endif + int i, routesize; + int through; + unsigned src2; + unsigned ngpus; + const char *name; + + if (dst == src) + continue; + busid = starpu_bus_get_id(src, dst); + if (busid == -1) + continue; + + srchost = _starpu_simgrid_get_memnode_host(src); + dsthost = _starpu_simgrid_get_memnode_host(dst); +#if defined(HAVE_SG_HOST_GET_ROUTE_LINKS) || defined(HAVE_SG_HOST_GET_ROUTE) || defined(HAVE_SG_HOST_ROUTE) || defined(sg_host_route) +#if defined(HAVE_SG_HOST_GET_ROUTE_LINKS) + routes = sg_host_get_route_links(srchost, dsthost, &routesize); +#elif defined(HAVE_SG_HOST_GET_ROUTE) + sg_host_get_route(srchost, dsthost, route_dynar); + routesize = xbt_dynar_length(route_dynar); +#else + sg_host_route(srchost, dsthost, route_dynar); + routesize = xbt_dynar_length(route_dynar); +#endif +#else + const starpu_sg_link_t *route = SD_route_get_list(srchost, dsthost); + routesize = SD_route_get_size(srchost, dsthost); + for (i = 0; i < routesize; i++) + xbt_dynar_push(route_dynar, &route[i]); + free(route); +#endif + if (routesize == 1) + { + /* Direct link! no need to count anything */ + starpu_bus_set_ngpus(busid, 1); + continue; + } + + /* If it goes through "Host", do not care, there is no + * direct transfer support */ + for (i = 0; i < routesize; i++) + { +#ifdef HAVE_SG_HOST_GET_ROUTE_LINKS + link = routes[i]; +#else + xbt_dynar_get_cpy(route_dynar, i, &link); +#endif + if ( +#ifdef HAVE_SG_LINK_GET_NAME + !strcmp(sg_link_get_name(link), "Host") +#else + !strcmp(sg_link_name(link), "Host") +#endif + ) + break; + } + if (i < routesize) + continue; + + /* Get the PCI bridge between down and up links */ + through = -1; + for (i = 0; i < routesize; i++) + { +#ifdef HAVE_SG_HOST_GET_ROUTE_LINKS + link = routes[i]; +#else + xbt_dynar_get_cpy(route_dynar, i, &link); +#endif +#ifdef HAVE_SG_LINK_GET_NAME + name = sg_link_get_name(link); +#else + name = sg_link_name(link); +#endif + size_t len = strlen(name); + if (!strcmp(" through", name+len-8)) + through = i; + else if (!strcmp(" up", name+len-3)) + break; + } + /* Didn't find it ?! */ + if (through == -1) + { + _STARPU_DISP("Didn't find through-link for %d->%d\n", src, dst); + continue; + } + +#ifdef HAVE_SG_HOST_GET_ROUTE_LINKS + link = routes[through]; +#else + xbt_dynar_get_cpy(route_dynar, through, &link); +#endif +#ifdef HAVE_SG_LINK_GET_NAME + name = sg_link_get_name(link); +#else + name = sg_link_name(link); +#endif + + /* + * count how many direct routes go through it between + * GPUs and RAM + */ + ngpus = 0; + for (src2 = 1; src2 < STARPU_MAXNODES; src2++) + { + int numa; + int nnumas = starpu_memory_nodes_get_numa_count(); + int found = 0; + for (numa = 0; numa < nnumas; numa++) + if (starpu_bus_get_id(src2, numa) != -1) + { + found = 1; + break; + } + + if (!found) + continue; + + starpu_sg_host_t srchost2 = _starpu_simgrid_get_memnode_host(src2); + int routesize2; +#if defined(HAVE_SG_HOST_GET_ROUTE_LINKS) + const_sg_link_t *routes2; +#else + xbt_dynar_t route_dynar2 = xbt_dynar_new(sizeof(starpu_sg_link_t), NULL); +#endif +#if defined(HAVE_SG_HOST_GET_ROUTE_LINKS) || defined(HAVE_SG_HOST_GET_ROUTE) || defined(HAVE_SG_HOST_ROUTE) || defined(sg_host_route) +#if defined(HAVE_SG_HOST_GET_ROUTE_LINKS) + routes2 = sg_host_get_route_links(srchost2, ramhost, &routesize2); +#elif defined(HAVE_SG_HOST_GET_ROUTE) + sg_host_get_route(srchost2, ramhost, route_dynar2); + routesize2 = xbt_dynar_length(route_dynar2); +#else + sg_host_route(srchost2, ramhost, route_dynar2); + routesize2 = xbt_dynar_length(route_dynar2); +#endif +#else + const starpu_sg_link_t *route2 = SD_route_get_list(srchost2, ramhost); + routesize2 = SD_route_get_size(srchost2, ramhost); + for (i = 0; i < routesize2; i++) + xbt_dynar_push(route_dynar2, &route2[i]); + free(route2); +#endif + + for (i = 0; i < routesize2; i++) + { +#ifdef HAVE_SG_HOST_GET_ROUTE_LINKS + link = routes2[i]; +#else + xbt_dynar_get_cpy(route_dynar2, i, &link); +#endif + if ( +#ifdef HAVE_SG_LINK_GET_NAME + !strcmp(name, sg_link_get_name(link)) +#else + !strcmp(name, sg_link_name(link)) +#endif + ) + { + /* This GPU goes through this PCI bridge to access RAM */ + ngpus++; + break; + } + } + } + _STARPU_DEBUG("%d->%d through %s, %u GPUs\n", src, dst, name, ngpus); + starpu_bus_set_ngpus(busid, ngpus); + } +#endif +} + +#if 0 +static size_t used; + +void _starpu_simgrid_data_new(size_t size) +{ + // Note: this is just declarative + //_STARPU_DISP("data new: %zd, now %zd\n", size, used); +} + +void _starpu_simgrid_data_increase(size_t size) +{ + used += size; + _STARPU_DISP("data increase: %zd, now %zd\n", size, used); +} + +void _starpu_simgrid_data_alloc(size_t size) +{ + used += size; + _STARPU_DISP("data alloc: %zd, now %zd\n", size, used); +} + +void _starpu_simgrid_data_free(size_t size) +{ + used -= size; + _STARPU_DISP("data free: %zd, now %zd\n", size, used); +} + +void _starpu_simgrid_data_transfer(size_t size, unsigned src_node, unsigned dst_node) +{ + _STARPU_DISP("data transfer %zd from %u to %u\n", size, src_node, dst_node); +} +#endif + +void starpu_energy_use(float joules) +{ + _starpu_simgrid_dynamic_energy += joules; +} + +double starpu_energy_used(void) +{ + float idle_power = starpu_getenv_float_default("STARPU_IDLE_POWER", 0.0); + return _starpu_simgrid_dynamic_energy + idle_power * starpu_timing_now() / 1000000; +} + +#endif diff --git a/src/core/simgrid.h b/src/core/simgrid.h new file mode 100644 index 0000000..fb0d2be --- /dev/null +++ b/src/core/simgrid.h @@ -0,0 +1,186 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Thibaut Lambert + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __SIMGRID_H__ +#define __SIMGRID_H__ + +/** @file */ + +/* Note: when changing something here, update the include list in configure.ac + * in the part that tries to enable stdc++11 */ +#ifdef STARPU_SIMGRID +#ifdef STARPU_HAVE_SIMGRID_MSG_H +#include +#elif defined(STARPU_HAVE_MSG_MSG_H) +#include +#endif + +#ifdef STARPU_HAVE_XBT_BASE_H +#include +#endif +#ifdef STARPU_HAVE_SIMGRID_VERSION_H +#include +#endif +#ifdef STARPU_HAVE_SIMGRID_ZONE_H +#include +#endif +#ifdef STARPU_HAVE_SIMGRID_HOST_H +#include +#endif +#if defined(HAVE_SIMGRID_SIMDAG_H) && (SIMGRID_VERSION >= 31300) +#include +#endif + +#include +#endif + +#ifdef __cplusplus +extern "C" +{ +#endif + +#ifdef STARPU_SIMGRID +#pragma GCC visibility push(hidden) + +struct _starpu_pthread_args +{ + void *(*f)(void*); + void *arg; +}; + +#if (SIMGRID_VERSION >= 32600) +typedef void _starpu_simgrid_main_ret; +#define _STARPU_SIMGRID_MAIN_RETURN do { } while (0) +#else +typedef int _starpu_simgrid_main_ret; +#define _STARPU_SIMGRID_MAIN_RETURN return 0 +#endif +#if (SIMGRID_VERSION >= 31500) && (SIMGRID_VERSION != 31559) +typedef sg_link_t starpu_sg_link_t; +#else +typedef SD_link_t starpu_sg_link_t; +#endif +_starpu_simgrid_main_ret +_starpu_simgrid_thread_start(int argc, char *argv[]); + +#define MAX_TSD 16 + +#define STARPU_MPI_AS_PREFIX "StarPU-MPI" +#define _starpu_simgrid_running_smpi() (getenv("SMPI_GLOBAL_SIZE") != NULL) + +void _starpu_start_simgrid(int *argc, char **argv); + +void _starpu_simgrid_init_early(int *argc, char ***argv); +void _starpu_simgrid_init(void); +void _starpu_simgrid_cpp_init(void); +void _starpu_simgrid_deinit(void); +void _starpu_simgrid_deinit_late(void); +void _starpu_simgrid_actor_setup(void); +void _starpu_simgrid_wait_tasks(int workerid); +struct _starpu_job; +void _starpu_simgrid_submit_job(int workerid, int sched_ctx_id, struct _starpu_job *job, struct starpu_perfmodel_arch* perf_arch, double length, double energy, unsigned *finished); +struct _starpu_data_request; +int _starpu_simgrid_transfer(size_t size, unsigned src_node, unsigned dst_node, struct _starpu_data_request *req); +int _starpu_simgrid_wait_transfer_event(void *event); +int _starpu_simgrid_test_transfer_event(void *event); +void _starpu_simgrid_sync_gpus(void); +/** Return the number of hosts prefixed by PREFIX */ +int _starpu_simgrid_get_nbhosts(const char *prefix); +unsigned long long _starpu_simgrid_get_memsize(const char *prefix, unsigned devid); +const char *_starpu_simgrid_get_devname(const char *prefix, unsigned devid); +starpu_sg_host_t _starpu_simgrid_get_host_by_name(const char *name); +starpu_sg_host_t _starpu_simgrid_get_memnode_host(unsigned node); +struct _starpu_worker; +starpu_sg_host_t _starpu_simgrid_get_host_by_worker(struct _starpu_worker *worker); +void _starpu_simgrid_get_platform_path(int version, char *path, size_t maxlen); +#if defined(HAVE_SG_ZONE_GET_BY_NAME) || defined(sg_zone_get_by_name) +sg_netzone_t _starpu_simgrid_get_as_by_name(const char *name); +#else +msg_as_t _starpu_simgrid_get_as_by_name(const char *name); +#endif +#pragma weak starpu_mpi_world_rank +extern int starpu_mpi_world_rank(void); +#pragma weak _starpu_mpi_simgrid_init +int _starpu_mpi_simgrid_init(int argc, char *argv[]); + +extern starpu_pthread_queue_t _starpu_simgrid_transfer_queue[STARPU_MAXNODES]; +extern starpu_pthread_queue_t _starpu_simgrid_task_queue[STARPU_NMAXWORKERS]; + +#ifdef STARPU_HAVE_S4U_ON_TIME_ADVANCE_CB +extern starpu_pthread_mutex_t _starpu_simgrid_time_advance_mutex; +extern starpu_pthread_cond_t _starpu_simgrid_time_advance_cond; +#endif + +#define _starpu_simgrid_cuda_malloc_cost() starpu_getenv_number_default("STARPU_SIMGRID_CUDA_MALLOC_COST", 1) +#define _starpu_simgrid_cuda_queue_cost() starpu_getenv_number_default("STARPU_SIMGRID_CUDA_QUEUE_COST", 1) +#define _starpu_simgrid_task_submit_cost() starpu_getenv_number_default("STARPU_SIMGRID_TASK_SUBMIT_COST", 1) +#define _starpu_simgrid_task_push_cost() starpu_getenv_number_default("STARPU_SIMGRID_TASK_PUSH_COST", 1) +#define _starpu_simgrid_fetching_input_cost() starpu_getenv_number_default("STARPU_SIMGRID_FETCHING_INPUT_COST", 1) +#define _starpu_simgrid_sched_cost() starpu_getenv_number_default("STARPU_SIMGRID_SCHED_COST", 0) + +/** Called at initialization to count how many GPUs are interfering with each + * bus */ +void _starpu_simgrid_count_ngpus(void); + +extern size_t _starpu_default_stack_size; +void _starpu_simgrid_set_stack_size(size_t stack_size); +void _starpu_simgrid_xbt_thread_create(const char *name, starpu_pthread_attr_t *attr, void_f_pvoid_t code, + void *param); + +#define _SIMGRID_TIMER_BEGIN(cond) \ + { \ + xbt_os_timer_t __timer = NULL; \ + if (cond) { \ + __timer = xbt_os_timer_new(); \ + xbt_os_threadtimer_start(__timer); \ + } +#define _SIMGRID_TIMER_END \ + if (__timer) { \ + xbt_os_threadtimer_stop(__timer); \ + starpu_sleep(xbt_os_timer_elapsed(__timer));\ + xbt_os_timer_free(__timer); \ + } \ + } + +#pragma GCC visibility pop + +#else // !STARPU_SIMGRID +#define _SIMGRID_TIMER_BEGIN(cond) { +#define _SIMGRID_TIMER_END } +#endif + +/** Experimental functions for OOC stochastic analysis */ +/* disk <-> MAIN_RAM only */ +#if defined(STARPU_SIMGRID) && 0 +void _starpu_simgrid_data_new(size_t size); +void _starpu_simgrid_data_increase(size_t size); +void _starpu_simgrid_data_alloc(size_t size); +void _starpu_simgrid_data_free(size_t size); +void _starpu_simgrid_data_transfer(size_t size, unsigned src_node, unsigned dst_node); +#else +#define _starpu_simgrid_data_new(size) (void)0 +#define _starpu_simgrid_data_increase(size) (void)0 +#define _starpu_simgrid_data_alloc(size) (void)0 +#define _starpu_simgrid_data_free(size) (void)0 +#define _starpu_simgrid_data_transfer(size, src_node, dst_node) (void)0 +#endif + +#ifdef __cplusplus +} +#endif + +#endif // __SIMGRID_H__ diff --git a/src/core/simgrid_cpp.cpp b/src/core/simgrid_cpp.cpp new file mode 100644 index 0000000..d2f1e9c --- /dev/null +++ b/src/core/simgrid_cpp.cpp @@ -0,0 +1,169 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +#ifdef STARPU_SIMGRID +#include +#include +#if SIMGRID_VERSION >= 32190 +#include +#else +#include +#endif +#include + +#ifdef STARPU_HAVE_S4U_ON_TIME_ADVANCE_CB +#include +#endif + +#ifdef STARPU_HAVE_S4U_ON_TIME_ADVANCE_CB +starpu_pthread_mutex_t _starpu_simgrid_time_advance_mutex; +starpu_pthread_cond_t _starpu_simgrid_time_advance_cond; +#endif + +void _starpu_simgrid_cpp_init(void) +{ +#ifdef STARPU_HAVE_S4U_ON_TIME_ADVANCE_CB + STARPU_PTHREAD_MUTEX_INIT(&_starpu_simgrid_time_advance_mutex, NULL); + STARPU_PTHREAD_COND_INIT(&_starpu_simgrid_time_advance_cond, NULL); + simgrid::s4u::Engine::on_time_advance_cb([](double) + { + STARPU_PTHREAD_COND_BROADCAST(&_starpu_simgrid_time_advance_cond); + }); +#endif +} + +/* thread_create function which implements inheritance of MPI privatization */ +/* See https://github.com/simgrid/simgrid/issues/139 */ + +typedef struct +{ + void_f_pvoid_t code; + void *userparam; +#if SIMGRID_VERSION < 32501 + void *father_data; +#endif +} thread_data_t; + +#if SIMGRID_VERSION >= 32501 +static void *_starpu_simgrid_xbt_thread_create_wrapper(void *arg) +{ + thread_data_t *t = (thread_data_t *) arg; + /* FIXME: Ugly work-around for bug in simgrid: the MPI context is not properly set at MSG process startup */ + starpu_sleep(0.000001); +#ifdef HAVE_SMPI_THREAD_CREATE + /* Make this actor inherit SMPI data from father actor */ + SMPI_thread_create(); +#endif + t->code(t->userparam); + free(t); + return NULL; +} +#else +#if SIMGRID_VERSION >= 32190 +static void _starpu_simgrid_xbt_thread_create_wrapper(void) +#else +static int _starpu_simgrid_xbt_thread_create_wrapper(int argc STARPU_ATTRIBUTE_UNUSED, char *argv[] STARPU_ATTRIBUTE_UNUSED) +#endif +{ + /* FIXME: Ugly work-around for bug in simgrid: the MPI context is not properly set at MSG process startup */ + starpu_sleep(0.000001); + +#ifdef HAVE_SMX_ACTOR_T + smx_actor_t +#else + smx_process_t +#endif + self = SIMIX_process_self(); +#if SIMGRID_VERSION < 31300 + thread_data_t *t = (thread_data_t *) SIMIX_process_self_get_data(self); +#else + thread_data_t *t = (thread_data_t *) SIMIX_process_self_get_data(); +#endif + simcall_process_set_data(self, t->father_data); + t->code(t->userparam); + simcall_process_set_data(self, NULL); + free(t); + +#if SIMGRID_VERSION < 32190 + return 0; +#endif +} +#endif + +void _starpu_simgrid_xbt_thread_create(const char *name, starpu_pthread_attr_t *attr, void_f_pvoid_t code, void *param) +{ +#if SIMGRID_VERSION >= 32501 + starpu_pthread_t t; + thread_data_t *res = (thread_data_t *) malloc(sizeof(thread_data_t)); + res->userparam = param; + res->code = code; + starpu_pthread_create_on(name, &t, attr, _starpu_simgrid_xbt_thread_create_wrapper, res, sg_host_self()); +#else + if (attr && attr->stacksize) + _starpu_simgrid_set_stack_size(attr->stacksize); +#if SIMGRID_VERSION >= 32190 || defined(HAVE_SIMCALL_PROCESS_CREATE) || defined(simcall_process_create) +#ifdef HAVE_SMX_ACTOR_T + smx_actor_t process STARPU_ATTRIBUTE_UNUSED; +#else + smx_process_t process STARPU_ATTRIBUTE_UNUSED; +#endif + thread_data_t *res = (thread_data_t *) malloc(sizeof(thread_data_t)); + res->userparam = param; + res->code = code; +#if SIMGRID_VERSION < 31300 + res->father_data = SIMIX_process_self_get_data(SIMIX_process_self()); +#else + res->father_data = SIMIX_process_self_get_data(); +#endif + +#if SIMGRID_VERSION < 31200 + simcall_process_create(&process, +#else + process = simcall_process_create( +#endif + name, + _starpu_simgrid_xbt_thread_create_wrapper, res, +#if SIMGRID_VERSION < 31400 + SIMIX_host_self_get_name(), +#else +# if defined(HAVE_SG_HOST_SELF) || defined(sg_host_self) + sg_host_self(), +# else + SIMIX_host_self(), +# endif +#endif +#if SIMGRID_VERSION < 31500 || SIMGRID_VERSION == 31559 + -1.0, +#endif +#if SIMGRID_VERSION < 32190 + 0, NULL, +#endif + /*props */ NULL +#if SIMGRID_VERSION < 31500 || SIMGRID_VERSION == 31559 + , 0 +#endif + ); +#else + STARPU_ABORT_MSG("Can't run StarPU-Simgrid-MPI with a Simgrid version which does not provide simcall_process_create and does not fix https://github.com/simgrid/simgrid/issues/139 , sorry."); +#endif + if (attr && attr->stacksize) + _starpu_simgrid_set_stack_size(_starpu_default_stack_size); +#endif +} + +#endif diff --git a/src/core/task.c b/src/core/task.c new file mode 100644 index 0000000..24a444f --- /dev/null +++ b/src/core/task.c @@ -0,0 +1,2187 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2011 Télécom Sud Paris + * Copyright (C) 2013 Thibaut Lambert + * Copyright (C) 2016 Uppsala University + * Copyright (C) 2017 Erwan Leria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef STARPU_HAVE_WINDOWS +#include +#endif + +/* global counters */ +static int __g_total_submitted; +static int __g_peak_submitted; +static int __g_peak_ready; + +/* global counter variables */ +starpu_perf_counter_int64_t _starpu_task__g_total_submitted__value; +starpu_perf_counter_int64_t _starpu_task__g_peak_submitted__value; +starpu_perf_counter_int64_t _starpu_task__g_current_submitted__value; +starpu_perf_counter_int64_t _starpu_task__g_peak_ready__value; +starpu_perf_counter_int64_t _starpu_task__g_current_ready__value; + +/* per-worker counters */ +static int __w_total_executed; +static int __w_cumul_execution_time; + +/* per-codelet counters */ +static int __c_total_submitted; +static int __c_peak_submitted; +static int __c_peak_ready; +static int __c_total_executed; +static int __c_cumul_execution_time; + +/* - */ + +/* per-scheduler knobs */ +static int __s_max_priority_cap_knob; +static int __s_min_priority_cap_knob; + +/* knob variables */ +static int __s_max_priority_cap__value; +static int __s_min_priority_cap__value; + +static struct starpu_perf_knob_group * __kg_starpu_task__per_scheduler; + +/* - */ + +static void global_sample_updater(struct starpu_perf_counter_sample *sample, void *context) +{ + STARPU_ASSERT(context == NULL); /* no context for the global updater */ + (void)context; + + _starpu_perf_counter_sample_set_int64_value(sample, __g_total_submitted, _starpu_task__g_total_submitted__value); + _starpu_perf_counter_sample_set_int64_value(sample, __g_peak_submitted, _starpu_task__g_peak_submitted__value); + _starpu_perf_counter_sample_set_int64_value(sample, __g_peak_ready, _starpu_task__g_peak_ready__value); +} + +static void per_worker_sample_updater(struct starpu_perf_counter_sample *sample, void *context) +{ + STARPU_ASSERT(context != NULL); + struct _starpu_worker *worker = context; + + _starpu_perf_counter_sample_set_int64_value(sample, __w_total_executed, worker->__w_total_executed__value); + _starpu_perf_counter_sample_set_double_value(sample, __w_cumul_execution_time, worker->__w_cumul_execution_time__value); +} + +static void per_codelet_sample_updater(struct starpu_perf_counter_sample *sample, void *context) +{ + STARPU_ASSERT(sample->listener != NULL && sample->listener->set != NULL); + struct starpu_perf_counter_set *set = sample->listener->set; + STARPU_ASSERT(set->scope == starpu_perf_counter_scope_per_codelet); + STARPU_ASSERT(context != NULL); + struct starpu_codelet *cl = context; + + _starpu_perf_counter_sample_set_int64_value(sample, __c_total_submitted, cl->perf_counter_values->task.total_submitted); + _starpu_perf_counter_sample_set_int64_value(sample, __c_peak_submitted, cl->perf_counter_values->task.peak_submitted); + _starpu_perf_counter_sample_set_int64_value(sample, __c_peak_ready, cl->perf_counter_values->task.peak_ready); + _starpu_perf_counter_sample_set_int64_value(sample, __c_total_executed, cl->perf_counter_values->task.total_executed); + _starpu_perf_counter_sample_set_double_value(sample, __c_cumul_execution_time, cl->perf_counter_values->task.cumul_execution_time); +} + +void _starpu__task_c__register_counters(void) +{ + { + const enum starpu_perf_counter_scope scope = starpu_perf_counter_scope_global; + __STARPU_PERF_COUNTER_REG("starpu.task", scope, g_total_submitted, int64, "number of tasks submitted globally (since StarPU initialization)"); + __STARPU_PERF_COUNTER_REG("starpu.task", scope, g_peak_submitted, int64, "maximum simultaneous number of tasks submitted and not yet ready, globally (since StarPU initialization)"); + __STARPU_PERF_COUNTER_REG("starpu.task", scope, g_peak_ready, int64, "maximum simultaneous number of tasks ready and not yet executing, globally (since StarPU initialization)"); + + _starpu_perf_counter_register_updater(scope, global_sample_updater); + } + + { + const enum starpu_perf_counter_scope scope = starpu_perf_counter_scope_per_worker; + __STARPU_PERF_COUNTER_REG("starpu.task", scope, w_total_executed, int64, "number of tasks executed on this worker (since StarPU initialization)"); + __STARPU_PERF_COUNTER_REG("starpu.task", scope, w_cumul_execution_time, double, "cumulated execution time of tasks executed on this worker (microseconds, since StarPU initialization)"); + + _starpu_perf_counter_register_updater(scope, per_worker_sample_updater); + } + + { + const enum starpu_perf_counter_scope scope = starpu_perf_counter_scope_per_codelet; + __STARPU_PERF_COUNTER_REG("starpu.task", scope, c_total_submitted, int64, "number of codelet's task instances submitted using this codelet (since enabled)"); + __STARPU_PERF_COUNTER_REG("starpu.task", scope, c_peak_submitted, int64, "maximum simultaneous number of codelet's task instances submitted and not yet ready (since enabled)"); + __STARPU_PERF_COUNTER_REG("starpu.task", scope, c_peak_ready, int64, "maximum simultaneous number of codelet's task instances ready and not yet executing (since enabled)"); + __STARPU_PERF_COUNTER_REG("starpu.task", scope, c_total_executed, int64, "number of codelet's task instances executed using this codelet (since enabled)"); + __STARPU_PERF_COUNTER_REG("starpu.task", scope, c_cumul_execution_time, double, "cumulated execution time of codelet's task instances (since enabled)"); + + _starpu_perf_counter_register_updater(scope, per_codelet_sample_updater); + } +} + +/* - */ + +static void sched_knobs__set(const struct starpu_perf_knob * const knob, void *context, const struct starpu_perf_knob_value * const value) +{ + const char * const sched_policy_name = *(const char **)context; + (void) sched_policy_name; + if (knob->id == __s_max_priority_cap_knob) + { + STARPU_ASSERT(value->val_int32_t <= STARPU_MAX_PRIO); + STARPU_ASSERT(value->val_int32_t >= STARPU_MIN_PRIO); + STARPU_ASSERT(value->val_int32_t >= __s_min_priority_cap__value); + __s_max_priority_cap__value = value->val_int32_t; + } + else if (knob->id == __s_min_priority_cap_knob) + { + STARPU_ASSERT(value->val_int32_t <= STARPU_MAX_PRIO); + STARPU_ASSERT(value->val_int32_t >= STARPU_MIN_PRIO); + STARPU_ASSERT(value->val_int32_t <= __s_max_priority_cap__value); + __s_min_priority_cap__value = value->val_int32_t; + } + else + { + STARPU_ASSERT(0); + abort(); + } +} + +static void sched_knobs__get(const struct starpu_perf_knob * const knob, void *context, struct starpu_perf_knob_value * const value) +{ + const char * const sched_policy_name = *(const char **)context; + (void) sched_policy_name; + if (knob->id == __s_max_priority_cap_knob) + { + value->val_int32_t = __s_max_priority_cap__value; + } + else if (knob->id == __s_min_priority_cap_knob) + { + value->val_int32_t = __s_min_priority_cap__value; + } + else + { + STARPU_ASSERT(0); + abort(); + } +} + +void _starpu__task_c__register_knobs(void) +{ +#if 0 + { + const enum starpu_perf_knob_scope scope = starpu_perf_knob_scope_global; + __kg_starpu_global = _starpu_perf_knob_group_register(scope, global_knobs__set, global_knobs__get); + } +#endif + +#if 0 + { + const enum starpu_perf_knob_scope scope = starpu_perf_knob_scope_per_worker; + __kg_starpu_worker__per_worker = _starpu_perf_knob_group_register(scope, worker_knobs__set, worker_knobs__get); + } +#endif + + { + const enum starpu_perf_knob_scope scope = starpu_perf_knob_scope_per_scheduler; + __kg_starpu_task__per_scheduler = _starpu_perf_knob_group_register(scope, sched_knobs__set, sched_knobs__get); + + /* TODO: priority capping knobs actually work globally for now, the sched policy name is ignored */ + __STARPU_PERF_KNOB_REG("starpu.task", __kg_starpu_task__per_scheduler, s_max_priority_cap_knob, int32, "force task priority to this value or below (priority value)"); + __s_max_priority_cap__value = STARPU_MAX_PRIO; + + __STARPU_PERF_KNOB_REG("starpu.task", __kg_starpu_task__per_scheduler, s_min_priority_cap_knob, int32, "force task priority to this value or above (priority value)"); + __s_min_priority_cap__value = STARPU_MIN_PRIO; + } +} + +void _starpu__task_c__unregister_knobs(void) +{ + _starpu_perf_knob_group_unregister(__kg_starpu_task__per_scheduler); + __kg_starpu_task__per_scheduler = NULL; +} + +/* - */ + +/* XXX this should be reinitialized when StarPU is shutdown (or we should make + * sure that no task remains !) */ +/* TODO we could make this hierarchical to avoid contention ? */ +//static starpu_pthread_cond_t submitted_cond = STARPU_PTHREAD_COND_INITIALIZER; + +/* This key stores the task currently handled by the thread, note that we + * cannot use the worker structure to store that information because it is + * possible that we have a task with a NULL codelet, which means its callback + * could be executed by a user thread as well. */ +static starpu_pthread_key_t current_task_key; +static int limit_min_submitted_tasks; +static int limit_max_submitted_tasks; +static int watchdog_crash; +static int watchdog_delay; + +/* + * Function to call when watchdog detects that no task has finished for more than STARPU_WATCHDOG_TIMEOUT seconds + */ +static void (*watchdog_hook)(void *) = NULL; +static void * watchdog_hook_arg = NULL; + +#define _STARPU_TASK_MAGIC 42 + +/* Called once at starpu_init */ +void _starpu_task_init(void) +{ + STARPU_PTHREAD_KEY_CREATE(¤t_task_key, NULL); + limit_min_submitted_tasks = starpu_getenv_number("STARPU_LIMIT_MIN_SUBMITTED_TASKS"); + limit_max_submitted_tasks = starpu_getenv_number("STARPU_LIMIT_MAX_SUBMITTED_TASKS"); + watchdog_crash = starpu_getenv_number_default("STARPU_WATCHDOG_CRASH", 0); + watchdog_delay = starpu_getenv_number_default("STARPU_WATCHDOG_DELAY", 0); +} + +void _starpu_task_deinit(void) +{ + STARPU_PTHREAD_KEY_DELETE(current_task_key); +} + +void starpu_set_limit_min_submitted_tasks(int limit_min) +{ + limit_min_submitted_tasks = limit_min; +} + +void starpu_set_limit_max_submitted_tasks(int limit_max) +{ + limit_max_submitted_tasks = limit_max; +} + +void starpu_task_init(struct starpu_task *task) +{ + /* TODO: memcpy from a template instead? benchmark it */ + + STARPU_ASSERT(task); + + /* As most of the fields must be initialised at NULL, let's put 0 + * everywhere */ + memset(task, 0, sizeof(struct starpu_task)); + + task->sequential_consistency = 1; + task->where = -1; + + /* Now we can initialise fields which recquire custom value */ + /* Note: remember to update STARPU_TASK_INITIALIZER as well */ +#if STARPU_DEFAULT_PRIO != 0 + task->priority = STARPU_DEFAULT_PRIO; +#endif + + task->detach = 1; + +#if STARPU_TASK_INIT != 0 + task->status = STARPU_TASK_INIT; +#endif + + task->predicted = NAN; + task->predicted_transfer = NAN; + task->predicted_start = NAN; + + task->magic = _STARPU_TASK_MAGIC; + task->sched_ctx = STARPU_NMAX_SCHED_CTXS; + + task->flops = 0.0; +} + +/* Free all the resources allocated for a task, without deallocating the task + * structure itself (this is required for statically allocated tasks). + * All values previously set by the user, like codelet and handles, remain + * unchanged */ +void starpu_task_clean(struct starpu_task *task) +{ + STARPU_ASSERT(task); + task->magic = 0; + + /* If a buffer was allocated to store the profiling info, we free it. */ + if (task->profiling_info) + { + free(task->profiling_info); + task->profiling_info = NULL; + } + + /* If case the task is (still) part of a bundle */ + starpu_task_bundle_t bundle = task->bundle; + if (bundle) + starpu_task_bundle_remove(bundle, task); + + if (task->dyn_handles) + { + free(task->dyn_handles); + task->dyn_handles = NULL; + free(task->dyn_interfaces); + task->dyn_interfaces = NULL; + } + + if (task->dyn_modes) + { + free(task->dyn_modes); + task->dyn_modes = NULL; + } + + struct _starpu_job *j = (struct _starpu_job *)task->starpu_private; + + if (j) + { + _starpu_job_destroy(j); + task->starpu_private = NULL; + } +} + +struct starpu_task * STARPU_ATTRIBUTE_MALLOC starpu_task_create(void) +{ + struct starpu_task *task; + + _STARPU_MALLOC(task, sizeof(struct starpu_task)); + starpu_task_init(task); + + /* Dynamically allocated tasks are destroyed by default */ + task->destroy = 1; + + return task; +} + + +static struct starpu_codelet _starpu_data_sync_cl = +{ + .where = STARPU_NOWHERE, + .nbuffers = STARPU_VARIABLE_NBUFFERS +}; + +struct starpu_task * STARPU_ATTRIBUTE_MALLOC starpu_task_create_sync(starpu_data_handle_t handle, enum starpu_data_access_mode mode) +{ + struct starpu_task *task = starpu_task_create(); + task->cl = &_starpu_data_sync_cl; + STARPU_TASK_SET_HANDLE(task, handle, 0); + STARPU_TASK_SET_MODE(task, mode, 0); + task->nbuffers = 1; + return task; +} + +/* Free the resource allocated during starpu_task_create. This function can be + * called automatically after the execution of a task by setting the "destroy" + * flag of the starpu_task structure (default behaviour). Calling this function + * on a statically allocated task results in an undefined behaviour. */ +void _starpu_task_destroy(struct starpu_task *task) +{ + + /* If starpu_task_destroy is called in a callback, we just set the destroy + flag. The task will be destroyed after the callback returns */ + if (task == starpu_task_get_current() + && _starpu_get_local_worker_status() & STATUS_CALLBACK) + { + task->destroy = 1; + } + else + { + starpu_task_clean(task); + /* TODO handle the case of task with detach = 1 and destroy = 1 */ + /* TODO handle the case of non terminated tasks -> assertion failure, it's too dangerous to be doing something like this */ + + /* Does user want StarPU release cl_arg ? */ + if (task->cl_arg_free) + free(task->cl_arg); + + /* Does user want StarPU release cl_ret ? */ + if (task->cl_ret_free) + free(task->cl_ret); + + /* Does user want StarPU release callback_arg ? */ + if (task->callback_arg_free) + free(task->callback_arg); + + /* Does user want StarPU release epilogue callback_arg ? */ + if (task->epilogue_callback_arg_free) + free(task->epilogue_callback_arg); + + /* Does user want StarPU release prologue_callback_arg ? */ + if (task->prologue_callback_arg_free) + free(task->prologue_callback_arg); + + /* Does user want StarPU release prologue_pop_arg ? */ + if (task->prologue_callback_pop_arg_free) + free(task->prologue_callback_pop_arg); + + free(task); + } +} + +void starpu_task_destroy(struct starpu_task *task) +{ + STARPU_ASSERT(task); + STARPU_ASSERT_MSG(!task->destroy || !task->detach, "starpu_task_destroy must not be called for task with destroy = 1 and detach = 1"); + _starpu_task_destroy(task); +} + +void starpu_task_set_destroy(struct starpu_task *task) +{ + STARPU_ASSERT(task); + struct _starpu_job *j = _starpu_get_job_associated_to_task(task); + STARPU_PTHREAD_MUTEX_LOCK(&j->sync_mutex); + STARPU_ASSERT_MSG(!task->destroy, "starpu_task_set_destroy must not be called for task with destroy = 1"); + if (j->terminated == 2) + { + STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex); + /* It's already over, _starpu_handle_job_termination will not + * destroy it, do it ourself */ + _starpu_task_destroy(task); + } + else + { + /* Let _starpu_handle_job_termination destroy it */ + task->destroy = 1; + STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex); + } +} + +int starpu_task_finished(struct starpu_task *task) +{ + STARPU_ASSERT(task); + STARPU_ASSERT_MSG(!task->detach, "starpu_task_finished can only be called on tasks with detach = 0"); + return _starpu_job_finished(_starpu_get_job_associated_to_task(task)); +} + +int starpu_task_wait(struct starpu_task *task) +{ + _STARPU_LOG_IN(); + STARPU_ASSERT(task); + + STARPU_ASSERT_MSG(!task->detach, "starpu_task_wait can only be called on tasks with detach = 0"); + + if (task->detach || task->synchronous) + { + _STARPU_DEBUG("Task is detached or synchronous. Waiting returns immediately\n"); + _STARPU_LOG_OUT_TAG("einval"); + return -EINVAL; + } + + STARPU_ASSERT_MSG(_starpu_worker_may_perform_blocking_calls(), "starpu_task_wait must not be called from a task or callback"); + + struct _starpu_job *j = _starpu_get_job_associated_to_task(task); + + _STARPU_TRACE_TASK_WAIT_START(j); + + starpu_do_schedule(); + _starpu_wait_job(j); + + /* as this is a synchronous task, the liberation of the job + structure was deferred */ + if (task->destroy) + _starpu_task_destroy(task); + + _starpu_perf_counter_update_global_sample(); + _STARPU_TRACE_TASK_WAIT_END(); + _STARPU_LOG_OUT(); + return 0; +} + +int starpu_task_wait_array(struct starpu_task **tasks, unsigned nb_tasks) +{ + unsigned i; + + for (i = 0; i < nb_tasks; i++) + { + int ret = starpu_task_wait(tasks[i]); + if (ret) + return ret; + } + return 0; +} + +#ifdef STARPU_OPENMP +int _starpu_task_test_termination(struct starpu_task *task) +{ + STARPU_ASSERT(task); + STARPU_ASSERT_MSG(!task->detach, "starpu_task_wait can only be called on tasks with detach = 0"); + + if (task->detach || task->synchronous) + { + _STARPU_DEBUG("Task is detached or synchronous\n"); + _STARPU_LOG_OUT_TAG("einval"); + return -EINVAL; + } + + struct _starpu_job *j = (struct _starpu_job *)task->starpu_private; + + int ret = _starpu_test_job_termination(j); + + if (ret) + { + if (task->destroy) + _starpu_task_destroy(task); + } + + return ret; +} +#endif + +/* NB in case we have a regenerable task, it is possible that the job was + * already counted. */ +int _starpu_submit_job(struct _starpu_job *j, int nodeps) +{ + struct starpu_task *task = j->task; + int ret; +#ifdef STARPU_OPENMP + const unsigned continuation = j->continuation; +#else + const unsigned continuation = 0; +#endif + + _STARPU_LOG_IN(); + /* notify bound computation of a new task */ + _starpu_bound_record(j); + + _starpu_increment_nsubmitted_tasks_of_sched_ctx(j->task->sched_ctx); + _starpu_sched_task_submit(task); + +#ifdef STARPU_USE_SC_HYPERVISOR + struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(j->task->sched_ctx); + if(sched_ctx != NULL && j->task->sched_ctx != _starpu_get_initial_sched_ctx()->id && j->task->sched_ctx != STARPU_NMAX_SCHED_CTXS + && sched_ctx->perf_counters != NULL) + { + struct starpu_perfmodel_arch arch; + _STARPU_MALLOC(arch.devices, sizeof(struct starpu_perfmodel_device)); + arch.ndevices = 1; + arch.devices[0].type = STARPU_CPU_WORKER; + arch.devices[0].devid = 0; + arch.devices[0].ncores = 1; + _starpu_compute_buffers_footprint(j->task->cl->model, &arch, 0, j); + free(arch.devices); + size_t data_size = 0; + if (j->task->cl) + { + unsigned i, nbuffers = STARPU_TASK_GET_NBUFFERS(j->task); + for(i = 0; i < nbuffers; i++) + { + starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, i); + if (handle != NULL) + data_size += _starpu_data_get_size(handle); + } + } + + _STARPU_TRACE_HYPERVISOR_BEGIN(); + sched_ctx->perf_counters->notify_submitted_job(j->task, j->footprint, data_size); + _STARPU_TRACE_HYPERVISOR_END(); + } +#endif//STARPU_USE_SC_HYPERVISOR + + /* We retain handle reference count */ + if (task->cl && !continuation) + { + unsigned i; + unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task); + for (i=0; iheader_lock); + handle->busy_count++; + _starpu_spin_unlock(&handle->header_lock); + } + } + + STARPU_PTHREAD_MUTEX_LOCK(&j->sync_mutex); + + _starpu_handle_job_submission(j); + +#ifdef STARPU_OPENMP + if (continuation) + { + j->discontinuous = 1; + j->continuation = 0; + } +#endif + + if (nodeps) + { + ret = _starpu_take_deps_and_schedule(j); + } + else + { +#ifdef STARPU_OPENMP + if (continuation) + { + ret = _starpu_reenforce_task_deps_and_schedule(j); + } + else +#endif + { + ret = _starpu_enforce_deps_and_schedule(j); + } + } + + _STARPU_LOG_OUT(); + return ret; +} + +/* Note: this is racy, so valgrind would complain. But since we'll always put + * the same values, this is not a problem. */ +void _starpu_codelet_check_deprecated_fields(struct starpu_codelet *cl) +{ + if (!cl) + return; + if (cl->checked) + { + STARPU_RMB(); + return; + } + + uint32_t where = cl->where; + int is_where_unset = where == 0; + unsigned i, some_impl; + + /* Check deprecated and unset fields (where, _func, + * _funcs) */ + +#if defined(STARPU_USE_CPU) || defined(STARPU_SIMGRID) + /* CPU */ + if (cl->cpu_func && cl->cpu_func != STARPU_MULTIPLE_CPU_IMPLEMENTATIONS && cl->cpu_funcs[0]) + { + _STARPU_DISP("[warning] [struct starpu_codelet] both cpu_func and cpu_funcs are set. Ignoring cpu_func.\n"); + cl->cpu_func = STARPU_MULTIPLE_CPU_IMPLEMENTATIONS; + } + if (cl->cpu_func && cl->cpu_func != STARPU_MULTIPLE_CPU_IMPLEMENTATIONS) + { + cl->cpu_funcs[0] = cl->cpu_func; + cl->cpu_func = STARPU_MULTIPLE_CPU_IMPLEMENTATIONS; + } + some_impl = 0; + for (i = 0; i < STARPU_MAXIMPLEMENTATIONS; i++) + if (cl->cpu_funcs[i]) + { + some_impl = 1; + break; + } + if (some_impl && cl->cpu_func == 0) + { + cl->cpu_func = STARPU_MULTIPLE_CPU_IMPLEMENTATIONS; + } + if (some_impl && is_where_unset) + { + where |= STARPU_CPU; + } +#endif + +#if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID) + /* CUDA */ + if (cl->cuda_func && cl->cuda_func != STARPU_MULTIPLE_CUDA_IMPLEMENTATIONS && cl->cuda_funcs[0]) + { + _STARPU_DISP("[warning] [struct starpu_codelet] both cuda_func and cuda_funcs are set. Ignoring cuda_func.\n"); + cl->cuda_func = STARPU_MULTIPLE_CUDA_IMPLEMENTATIONS; + } + if (cl->cuda_func && cl->cuda_func != STARPU_MULTIPLE_CUDA_IMPLEMENTATIONS) + { + cl->cuda_funcs[0] = cl->cuda_func; + cl->cuda_func = STARPU_MULTIPLE_CUDA_IMPLEMENTATIONS; + } + some_impl = 0; + for (i = 0; i < STARPU_MAXIMPLEMENTATIONS; i++) + if (cl->cuda_funcs[i]) + { + some_impl = 1; + break; + } + if (some_impl && cl->cuda_func == 0) + { + cl->cuda_func = STARPU_MULTIPLE_CUDA_IMPLEMENTATIONS; + } + if (some_impl && is_where_unset) + { + where |= STARPU_CUDA; + } +#endif + +#if defined(STARPU_USE_HIP) + some_impl = 0; + for (i = 0; i < STARPU_MAXIMPLEMENTATIONS; i++) + if (cl->hip_funcs[i]) + { + some_impl = 1; + break; + } + if (some_impl && is_where_unset) + { + where |= STARPU_HIP; + } +#endif + +#if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID) + /* OpenCL */ + if (cl->opencl_func && cl->opencl_func != STARPU_MULTIPLE_OPENCL_IMPLEMENTATIONS && cl->opencl_funcs[0]) + { + _STARPU_DISP("[warning] [struct starpu_codelet] both opencl_func and opencl_funcs are set. Ignoring opencl_func.\n"); + cl->opencl_func = STARPU_MULTIPLE_OPENCL_IMPLEMENTATIONS; + } + if (cl->opencl_func && cl->opencl_func != STARPU_MULTIPLE_OPENCL_IMPLEMENTATIONS) + { + cl->opencl_funcs[0] = cl->opencl_func; + cl->opencl_func = STARPU_MULTIPLE_OPENCL_IMPLEMENTATIONS; + } + some_impl = 0; + for (i = 0; i < STARPU_MAXIMPLEMENTATIONS; i++) + if (cl->opencl_funcs[i]) + { + some_impl = 1; + break; + } + if (some_impl && cl->opencl_func == 0) + { + cl->opencl_func = STARPU_MULTIPLE_OPENCL_IMPLEMENTATIONS; + } + if (some_impl && is_where_unset) + { + where |= STARPU_OPENCL; + } +#endif + +#ifdef STARPU_USE_MAX_FPGA + /* FPGA */ + some_impl = 0; + for (i = 0; i < STARPU_MAXIMPLEMENTATIONS; i++) + if (cl->max_fpga_funcs[i]) + { + some_impl = 1; + break; + } + if (some_impl && is_where_unset) + { + where |= STARPU_MAX_FPGA; + } +#endif + +#ifdef STARPU_USE_MPI_MASTER_SLAVE + some_impl = 0; + for (i = 0; i < STARPU_MAXIMPLEMENTATIONS; i++) + if (cl->cpu_funcs_name[i]) + { + some_impl = 1; + break; + } + if (some_impl && is_where_unset) + { + where |= STARPU_MPI_MS; + } +#endif + +#ifdef STARPU_USE_TCPIP_MASTER_SLAVE + some_impl = 0; + for (i = 0; i < STARPU_MAXIMPLEMENTATIONS; i++) + if (cl->cpu_funcs_name[i]) + { + some_impl = 1; + break; + } + if (some_impl && is_where_unset) + { + where |= STARPU_TCPIP_MS; + } +#endif + + cl->where = where; + + STARPU_WMB(); + cl->checked = 1; +} + +void _starpu_task_check_deprecated_fields(struct starpu_task *task STARPU_ATTRIBUTE_UNUSED) +{ + /* None any more */ +} + +static int _starpu_task_submit_head(struct starpu_task *task) +{ + unsigned is_sync = task->synchronous; + struct _starpu_job *j = _starpu_get_job_associated_to_task(task); + + if (task->status == STARPU_TASK_STOPPED || task->status == STARPU_TASK_FINISHED) + task->status = STARPU_TASK_INIT; + else + STARPU_ASSERT(task->status == STARPU_TASK_INIT); + +#ifdef STARPU_BUBBLE + if ((j->task->bubble_func && j->task->bubble_func(j->task, j->task->bubble_func_arg)) || (j->task->cl && j->task->cl->bubble_func && j->task->cl->bubble_func(j->task, j->task->bubble_func_arg))) + j->is_bubble = 1; + else + j->is_bubble = 0; +#endif + + if (j->internal) + { + // Internal tasks are submitted to initial context + task->sched_ctx = _starpu_get_initial_sched_ctx()->id; + // And we don't want them to interfere with submit order ids + task->no_submitorder = 1; + } + else if (task->sched_ctx == STARPU_NMAX_SCHED_CTXS) + { + // If the task has not specified a context, we set the current context + task->sched_ctx = _starpu_sched_ctx_get_current_context(); + } + + if (is_sync) + { + /* Perhaps it is not possible to submit a synchronous + * (blocking) task */ + STARPU_ASSERT_MSG(_starpu_worker_may_perform_blocking_calls(), "submitting a synchronous task must not be done from a task or a callback"); + task->detach = 0; + } + +#ifdef STARPU_DEBUG + if (task->workerids) + { + unsigned i; + for (i = 0; i < task->workerids_len; i++) + if (task->workerids[i] != 0) + break; + STARPU_ASSERT_MSG(i < task->workerids_len, "The workerids array can't contain only zeros, it would not be executable at all."); + } +#endif + + _starpu_task_check_deprecated_fields(task); + _starpu_codelet_check_deprecated_fields(task->cl); + if (task->where== -1 && task->cl) + task->where = task->cl->where; + + if (task->cl) + { + unsigned i; + unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task); + _STARPU_TRACE_UPDATE_TASK_CNT(0); + + /* Check buffers */ + if (task->dyn_handles == NULL) + STARPU_ASSERT_MSG_ALWAYS(STARPU_TASK_GET_NBUFFERS(task) <= STARPU_NMAXBUFS, + "Codelet %p has too many buffers (%d vs max %d). Either use --enable-maxbuffers configure option to increase the max, or use dyn_handles instead of handles.", + task->cl, STARPU_TASK_GET_NBUFFERS(task), STARPU_NMAXBUFS); + + if (STARPU_UNLIKELY(task->dyn_handles)) + { + _STARPU_MALLOC(task->dyn_interfaces, nbuffers * sizeof(void *)); + } + + struct _starpu_data_descr *descrs = _STARPU_JOB_GET_ORDERED_BUFFERS(j); + for (i = 0; i < nbuffers; i++) + { + starpu_data_handle_t handle = descrs[i].handle; + enum starpu_data_access_mode mode = descrs[i].mode; + + int node = task->cl->specific_nodes ? STARPU_CODELET_GET_NODE(task->cl, i) : -1; + /* Make sure handles are valid */ + STARPU_ASSERT_MSG(handle->magic == _STARPU_TASK_MAGIC, "data %p is invalid (was it already unregistered?)", handle); + /* Make sure handles are not partitioned */ + STARPU_ASSERT_MSG(handle->nchildren == 0, "only unpartitioned data (or the pieces of a partitioned data) can be used in a task"); + /* Make sure the specified node exists */ + STARPU_ASSERT_MSG(node == STARPU_SPECIFIC_NODE_LOCAL || node == STARPU_SPECIFIC_NODE_CPU || node == STARPU_SPECIFIC_NODE_SLOW || node == STARPU_SPECIFIC_NODE_LOCAL_OR_CPU || node == STARPU_SPECIFIC_NODE_NONE || (node >= 0 && node < (int) starpu_memory_nodes_get_count()), "The codelet-specified memory node does not exist"); + /* Provide the home interface for now if any, + * for can_execute hooks */ + if (handle->home_node != -1) + _STARPU_TASK_SET_INTERFACE(task, starpu_data_get_interface_on_node(handle, handle->home_node), i); + + if (!(task->cl->flags & STARPU_CODELET_NOPLANS) && + ((handle->nplans && !handle->nchildren) || handle->siblings) +#ifdef STARPU_BUBBLE + && !j->is_bubble + /* + * => require to set the is_bubble a soon as possible and not in the turn_task_into_bubble. + */ +#endif + && !(mode & STARPU_NOPLAN)) + /* This handle is involved with asynchronous + * partitioning as a parent or a child, make + * sure the right plan is active, submit + * appropriate partitioning / unpartitioning if + * not */ + _starpu_data_partition_access_submit(handle, (mode & (STARPU_W|STARPU_REDUX)) != 0); + } + + /* Check the type of worker(s) required by the task exist */ + if (STARPU_UNLIKELY(!_starpu_worker_exists(task))) + { + _STARPU_LOG_OUT_TAG("ENODEV"); + return -ENODEV; + } + + /* In case we require that a task should be explicitly + * executed on a specific worker, we make sure that the worker + * is able to execute this task. */ + if (STARPU_UNLIKELY(task->execute_on_a_specific_worker && !starpu_combined_worker_can_execute_task(task->workerid, task, 0))) + { + _STARPU_LOG_OUT_TAG("ENODEV"); + return -ENODEV; + } + + if (task->cl->model) + _starpu_init_and_load_perfmodel(task->cl->model); + + if (task->cl->energy_model) + _starpu_init_and_load_perfmodel(task->cl->energy_model); + } + + return 0; +} + +/* application should submit new tasks to StarPU through this function */ +int _starpu_task_submit(struct starpu_task *task, int nodeps) +{ + _STARPU_LOG_IN(); + STARPU_ASSERT(task); + STARPU_ASSERT_MSG(task->magic == _STARPU_TASK_MAGIC, "Tasks must be created with starpu_task_create, or initialized with starpu_task_init."); + STARPU_ASSERT_MSG(starpu_is_initialized(), "starpu_init must be called (and return no error) before submitting tasks."); + + int ret; + { + /* task knobs */ + if (task->priority > __s_max_priority_cap__value) + task->priority = __s_max_priority_cap__value; + if (task->priority < __s_min_priority_cap__value) + task->priority = __s_min_priority_cap__value; + } + + if (task->transaction != NULL) + { + /* If task is part of a transaction, add its handle to the task + * handle list with a STARPU_R access mode to allow concurrency among the epoch + * tasks while serializing it with epoch and transactions operations */ + STARPU_ASSERT(task->cl->nbuffers == STARPU_VARIABLE_NBUFFERS); + STARPU_ASSERT(!_starpu_trs_epoch_list_empty(&task->transaction->epoch_list)); + task->trs_epoch = _starpu_trs_epoch_list_back(&task->transaction->epoch_list); + int nbuffers = task->nbuffers; + int allocated_nbuffers = (task->dyn_handles != NULL)?nbuffers:0; + task->nbuffers++; + starpu_task_insert_data_process_arg(task->cl, task, &allocated_nbuffers, &nbuffers, STARPU_R, task->transaction->handle); + } + + unsigned is_sync = task->synchronous; + starpu_task_bundle_t bundle = task->bundle; + STARPU_ASSERT_MSG(!(nodeps && bundle), "not supported\n"); + /* internally, StarPU manipulates a struct _starpu_job * which is a wrapper around a + * task structure, it is possible that this job structure was already + * allocated. */ + struct _starpu_job *j = _starpu_get_job_associated_to_task(task); + const unsigned continuation = +#ifdef STARPU_OPENMP + j->continuation +#else + 0 +#endif + ; + if (!_starpu_perf_counter_paused() && !j->internal && !continuation) + { + (void) STARPU_PERF_COUNTER_ADD64(&_starpu_task__g_total_submitted__value, 1); + int64_t value = STARPU_PERF_COUNTER_ADD64(&_starpu_task__g_current_submitted__value, 1); + _starpu_perf_counter_update_max_int64(&_starpu_task__g_peak_submitted__value, value); + _starpu_perf_counter_update_global_sample(); + + if (task->cl && task->cl->perf_counter_values) + { + struct starpu_perf_counter_sample_cl_values * const pcv = task->cl->perf_counter_values; + + (void) STARPU_PERF_COUNTER_ADD64(&pcv->task.total_submitted, 1); + value = STARPU_PERF_COUNTER_ADD64(&pcv->task.current_submitted, 1); + _starpu_perf_counter_update_max_int64(&pcv->task.peak_submitted, value); + _starpu_perf_counter_update_per_codelet_sample(task->cl); + } + } + STARPU_ASSERT_MSG(!(nodeps && continuation), "not supported\n"); + + if (!j->internal && limit_max_submitted_tasks >= 0 && limit_min_submitted_tasks >= 0) + { + int nsubmitted_tasks = starpu_task_nsubmitted(); + if (limit_max_submitted_tasks < nsubmitted_tasks + && limit_min_submitted_tasks < nsubmitted_tasks) + { + starpu_do_schedule(); + _STARPU_TRACE_TASK_THROTTLE_START(); + starpu_task_wait_for_n_submitted(limit_min_submitted_tasks); + _STARPU_TRACE_TASK_THROTTLE_END(); + } + } + + _STARPU_TRACE_TASK_SUBMIT_START(); + + if (task->cl && !continuation) + { + _starpu_job_set_ordered_buffers(j); + } + + ret = _starpu_task_submit_head(task); + if (ret) + { + _STARPU_TRACE_TASK_SUBMIT_END(); + return ret; + } + + if (!continuation) + { +#ifndef STARPU_NO_ASSERT + STARPU_PTHREAD_MUTEX_LOCK(&j->sync_mutex); + STARPU_ASSERT_MSG(!j->submitted || j->terminated >= 1, "Tasks can not be submitted a second time before being terminated. Please use different task structures, or use the regenerate flag to let the task resubmit itself automatically."); + STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex); +#endif + _STARPU_TRACE_TASK_SUBMIT(j, + _starpu_get_sched_ctx_struct(task->sched_ctx)->iterations[0], + _starpu_get_sched_ctx_struct(task->sched_ctx)->iterations[1]); + } + + /* If this is a continuation, we don't modify the implicit data dependencies detected earlier. */ + if (task->cl && !continuation && !nodeps +#ifdef STARPU_BUBBLE + && !j->is_bubble +#endif + ) + { + _starpu_detect_implicit_data_deps(task); + } + + if (STARPU_UNLIKELY(bundle)) + { + /* We need to make sure that models for other tasks of the + * bundle are also loaded, so the scheduler can estimate the + * duration of the whole bundle */ + STARPU_PTHREAD_MUTEX_LOCK(&bundle->mutex); + + struct _starpu_task_bundle_entry *entry; + entry = bundle->list; + + while (entry) + { + if (entry->task->cl->model) + _starpu_init_and_load_perfmodel(entry->task->cl->model); + + if (entry->task->cl->energy_model) + _starpu_init_and_load_perfmodel(entry->task->cl->energy_model); + + entry = entry->next; + } + + STARPU_PTHREAD_MUTEX_UNLOCK(&bundle->mutex); + } + + /* If profiling is activated, we allocate a structure to store the + * appropriate info. */ + struct starpu_profiling_task_info *info = task->profiling_info; + int profiling = starpu_profiling_status_get(); + if (!info) + { + info = _starpu_allocate_profiling_info_if_needed(task); + task->profiling_info = info; + } + + /* The task is considered as block until we are sure there remains not + * dependency. */ + task->status = STARPU_TASK_BLOCKED; + + if (STARPU_UNLIKELY(profiling)) + _starpu_clock_gettime(&info->submit_time); + + ret = _starpu_submit_job(j, nodeps); +#ifdef STARPU_SIMGRID + if (_starpu_simgrid_task_submit_cost()) + starpu_sleep(0.000001); +#endif + + if (is_sync) + { + if (starpu_is_paused()) + { + static int warned; + if (!warned) + { + warned = 1; + _STARPU_DISP("[warning]: A task with synchronous=1 was submitted after calling starpu_pause(). We will thus hang until starpu_resume() gets called.\n"); + } + } + _starpu_sched_do_schedule(task->sched_ctx); + _starpu_wait_job(j); + if (task->destroy) + _starpu_task_destroy(task); + } + + _STARPU_TRACE_TASK_SUBMIT_END(); + _STARPU_LOG_OUT(); + return ret; +} + +#undef starpu_task_submit +int starpu_task_submit(struct starpu_task *task) +{ +#ifdef STARPU_BUBBLE_VERBOSE + struct timespec tp; + clock_gettime(CLOCK_MONOTONIC, &tp); + unsigned long long timestamp = 1000000000ULL*tp.tv_sec + tp.tv_nsec; + _STARPU_DEBUG("{%llu} [%s(%p)] Submission | id %lu\n", timestamp, starpu_task_get_name(task), task, starpu_task_get_job_id(task)); +#endif + return _starpu_task_submit(task, 0); +} + +int _starpu_task_submit_internally(struct starpu_task *task) +{ + struct _starpu_job *j = _starpu_get_job_associated_to_task(task); + j->internal = 1; + return starpu_task_submit(task); +} + +/* application should submit new tasks to StarPU through this function */ +int starpu_task_submit_to_ctx(struct starpu_task *task, unsigned sched_ctx_id) +{ + task->sched_ctx = sched_ctx_id; + return starpu_task_submit(task); +} + +/* The StarPU core can submit tasks directly to the scheduler or a worker, + * skipping dependencies completely (when it knows what it is doing). */ +int starpu_task_submit_nodeps(struct starpu_task *task) +{ + return _starpu_task_submit(task, 1); +} + +/* + * worker->sched_mutex must be locked when calling this function. + */ +int _starpu_task_submit_conversion_task(struct starpu_task *task, + unsigned int workerid) +{ + int ret; + STARPU_ASSERT(task->cl); + STARPU_ASSERT(task->execute_on_a_specific_worker); + + struct _starpu_job *j = _starpu_get_job_associated_to_task(task); + + _starpu_job_set_ordered_buffers(j); + + ret = _starpu_task_submit_head(task); + STARPU_ASSERT(ret == 0); + + /* We retain handle reference count that would have been acquired by data dependencies. */ + unsigned i; + unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task); + for (i=0; iheader_lock); + handle->busy_count++; + _starpu_spin_unlock(&handle->header_lock); + } + + _starpu_increment_nsubmitted_tasks_of_sched_ctx(j->task->sched_ctx); + _starpu_sched_task_submit(task); + + STARPU_PTHREAD_MUTEX_LOCK(&j->sync_mutex); + _starpu_handle_job_submission(j); + _starpu_increment_nready_tasks_of_sched_ctx(j->task->sched_ctx, j->task->flops, j->task); + _starpu_job_set_ordered_buffers(j); + + STARPU_ASSERT(task->status == STARPU_TASK_INIT); + task->status = STARPU_TASK_READY; + _starpu_profiling_set_task_push_start_time(task); + + unsigned node = starpu_worker_get_memory_node(workerid); + if (starpu_get_prefetch_flag()) + starpu_prefetch_task_input_on_node(task, node); + + struct _starpu_worker *worker; + worker = _starpu_get_worker_struct(workerid); + starpu_task_prio_list_push_back(&worker->local_tasks, task); + starpu_wake_worker_locked(worker->workerid); + + _starpu_profiling_set_task_push_end_time(task); + + STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex); + return 0; +} + +void starpu_codelet_init(struct starpu_codelet *cl) +{ + memset(cl, 0, sizeof(struct starpu_codelet)); +} + +#define _STARPU_CODELET_WORKER_NAME_LEN 32 + +void starpu_codelet_display_stats(struct starpu_codelet *cl) +{ + unsigned worker; + unsigned nworkers = starpu_worker_get_count(); + + if (cl->name) + fprintf(stderr, "Statistics for codelet %s\n", cl->name); + else if (cl->model && cl->model->symbol) + fprintf(stderr, "Statistics for codelet %s\n", cl->model->symbol); + + unsigned long total = 0; + + for (worker = 0; worker < nworkers; worker++) + total += cl->per_worker_stats[worker]; + + for (worker = 0; worker < nworkers; worker++) + { + char name[_STARPU_CODELET_WORKER_NAME_LEN]; + starpu_worker_get_name(worker, name, _STARPU_CODELET_WORKER_NAME_LEN); + + fprintf(stderr, "\t%s -> %lu / %lu (%2.2f %%)\n", name, cl->per_worker_stats[worker], total, (100.0f*cl->per_worker_stats[worker])/total); + } +} + +/* + * We wait for all tasks that have been submitted to the scheduling context and its nested contexts + */ +void _starpu_do_schedule_in_nested_ctx(unsigned sched_ctx_id) +{ + struct _starpu_machine_config *config = _starpu_get_machine_config(); + unsigned s; + for(s = 0; s < STARPU_NMAX_SCHED_CTXS; s++) + { + if(config->sched_ctxs[s].id != STARPU_NMAX_SCHED_CTXS && config->sched_ctxs[s].do_schedule == 1 && config->sched_ctxs[s].nesting_sched_ctx == sched_ctx_id && s != sched_ctx_id) + { + _starpu_do_schedule_in_nested_ctx(s); + } + } + _starpu_sched_do_schedule(sched_ctx_id); +} + +int _starpu_task_wait_for_all_in_nested_ctx_and_return_nb_waited_tasks(unsigned sched_ctx_id) +{ + struct _starpu_machine_config *config = _starpu_get_machine_config(); + unsigned nb_waited_tasks = 0; + unsigned s; + + for(s = 0; s < STARPU_NMAX_SCHED_CTXS; s++) + { + if(config->sched_ctxs[s].id != STARPU_NMAX_SCHED_CTXS && config->sched_ctxs[s].nesting_sched_ctx == sched_ctx_id && s != sched_ctx_id) + { + _STARPU_DEBUG("Recursively waiting for tasks submitted to sub context %u of %u\n", s, sched_ctx_id); + nb_waited_tasks += _starpu_task_wait_for_all_in_nested_ctx_and_return_nb_waited_tasks(s); + } + } + + nb_waited_tasks += _starpu_task_wait_for_all_in_ctx_and_return_nb_waited_tasks(sched_ctx_id); + return nb_waited_tasks; +} + +/* + * We wait for all the tasks that have already been submitted. Note that a + * regenerable is not considered finished until it was explicitly set as + * non-regenerale anymore (eg. from a callback). + */ +int _starpu_task_wait_for_all_and_return_nb_waited_tasks(void) +{ + unsigned nsched_ctxs = _starpu_get_nsched_ctxs(); + unsigned sched_ctx_id = nsched_ctxs == 1 ? 0 : starpu_sched_ctx_get_context(); + + /* if there is no indication about which context to wait, + we wait for all tasks submitted to starpu */ + if (sched_ctx_id == STARPU_NMAX_SCHED_CTXS) + { + _STARPU_DEBUG("Waiting for all tasks\n"); + STARPU_ASSERT_MSG(_starpu_worker_may_perform_blocking_calls(), "starpu_task_wait_for_all must not be called from a task or callback"); + STARPU_AYU_BARRIER(); + struct _starpu_machine_config *config = _starpu_get_machine_config(); + if(config->topology.nsched_ctxs == 1) + { + _starpu_sched_do_schedule(0); + return _starpu_task_wait_for_all_in_ctx_and_return_nb_waited_tasks(0); + } + else + { + int s; + for(s = 0; s < STARPU_NMAX_SCHED_CTXS; s++) + { + if(config->sched_ctxs[s].do_schedule == 1) + { + _starpu_sched_do_schedule(config->sched_ctxs[s].id); + } + } + for(s = 0; s < STARPU_NMAX_SCHED_CTXS; s++) + { + if(config->sched_ctxs[s].do_schedule == 1) + { + starpu_task_wait_for_all_in_ctx(config->sched_ctxs[s].id); + } + } + return 0; + } + } + else + { + // _starpu_sched_do_schedule(sched_ctx_id); + // _STARPU_DEBUG("Waiting for tasks submitted to context %u\n", sched_ctx_id); + // return _starpu_task_wait_for_all_in_ctx_and_return_nb_waited_tasks(sched_ctx_id); + _starpu_do_schedule_in_nested_ctx(sched_ctx_id); + _STARPU_DEBUG("Waiting for tasks submitted to context %u\n", sched_ctx_id); + return _starpu_task_wait_for_all_in_nested_ctx_and_return_nb_waited_tasks(sched_ctx_id); + } +} + +int starpu_task_wait_for_all(void) +{ + _starpu_task_wait_for_all_and_return_nb_waited_tasks(); + if (!_starpu_perf_counter_paused()) + _starpu_perf_counter_update_global_sample(); + return 0; +} + +int _starpu_task_wait_for_all_in_ctx_and_return_nb_waited_tasks(unsigned sched_ctx) +{ + _STARPU_TRACE_TASK_WAIT_FOR_ALL_START(); + int ret = _starpu_wait_for_all_tasks_of_sched_ctx(sched_ctx); + _STARPU_TRACE_TASK_WAIT_FOR_ALL_END(); + /* TODO: improve Temanejo into knowing about contexts ... */ + STARPU_AYU_BARRIER(); + return ret; +} + +int starpu_task_wait_for_all_in_ctx(unsigned sched_ctx) +{ + _starpu_task_wait_for_all_in_ctx_and_return_nb_waited_tasks(sched_ctx); + if (!_starpu_perf_counter_paused()) + _starpu_perf_counter_update_global_sample(); + return 0; +} + +/* + * We wait until there's a certain number of the tasks that have already been + * submitted left. Note that a regenerable is not considered finished until it + * was explicitly set as non-regenerale anymore (eg. from a callback). + */ +int starpu_task_wait_for_n_submitted(unsigned n) +{ + unsigned nsched_ctxs = _starpu_get_nsched_ctxs(); + unsigned sched_ctx_id = nsched_ctxs == 1 ? 0 : starpu_sched_ctx_get_context(); + + /* if there is no indication about which context to wait, + we wait for all tasks submitted to starpu */ + if (sched_ctx_id == STARPU_NMAX_SCHED_CTXS) + { + _STARPU_DEBUG("Waiting for all tasks\n"); + STARPU_ASSERT_MSG(_starpu_worker_may_perform_blocking_calls(), "starpu_task_wait_for_n_submitted must not be called from a task or callback"); + + struct _starpu_machine_config *config = _starpu_get_machine_config(); + if(config->topology.nsched_ctxs == 1) + _starpu_wait_for_n_submitted_tasks_of_sched_ctx(0, n); + else + { + int s; + for(s = 0; s < STARPU_NMAX_SCHED_CTXS; s++) + { + if(config->sched_ctxs[s].do_schedule == 1) + { + _starpu_wait_for_n_submitted_tasks_of_sched_ctx(config->sched_ctxs[s].id, n); + } + } + } + + } + else + { + _STARPU_DEBUG("Waiting for tasks submitted to context %u\n", sched_ctx_id); + _starpu_wait_for_n_submitted_tasks_of_sched_ctx(sched_ctx_id, n); + } + if (!_starpu_perf_counter_paused()) + _starpu_perf_counter_update_global_sample(); + return 0; +} + +int starpu_task_wait_for_n_submitted_in_ctx(unsigned sched_ctx, unsigned n) +{ + _starpu_wait_for_n_submitted_tasks_of_sched_ctx(sched_ctx, n); + + if (!_starpu_perf_counter_paused()) + _starpu_perf_counter_update_global_sample(); + return 0; +} +/* + * We wait until there is no ready task any more (i.e. StarPU will not be able + * to progress any more). + */ +int starpu_task_wait_for_no_ready(void) +{ + STARPU_ASSERT_MSG(_starpu_worker_may_perform_blocking_calls(), "starpu_task_wait_for_no_ready must not be called from a task or callback"); + + struct _starpu_machine_config *config = _starpu_get_machine_config(); + if(config->topology.nsched_ctxs == 1) + { + _starpu_sched_do_schedule(0); + _starpu_wait_for_no_ready_of_sched_ctx(0); + } + else + { + int s; + for(s = 0; s < STARPU_NMAX_SCHED_CTXS; s++) + { + if(config->sched_ctxs[s].do_schedule == 1) + { + _starpu_sched_do_schedule(config->sched_ctxs[s].id); + } + } + for(s = 0; s < STARPU_NMAX_SCHED_CTXS; s++) + { + if(config->sched_ctxs[s].do_schedule == 1) + { + _starpu_wait_for_no_ready_of_sched_ctx(config->sched_ctxs[s].id); + } + } + } + + if (!_starpu_perf_counter_paused()) + _starpu_perf_counter_update_global_sample(); + return 0; +} + +void starpu_iteration_push(unsigned long iteration) +{ + unsigned id = _starpu_sched_ctx_get_current_context(); + STARPU_ASSERT(id <= STARPU_NMAX_SCHED_CTXS); + struct _starpu_sched_ctx *ctx = _starpu_get_sched_ctx_struct(id); + unsigned level = ctx->iteration_level++; + if (level < sizeof(ctx->iterations)/sizeof(ctx->iterations[0])) + ctx->iterations[level] = iteration; +} + +void starpu_iteration_pop(void) +{ + unsigned id = _starpu_sched_ctx_get_current_context(); + STARPU_ASSERT(id <= STARPU_NMAX_SCHED_CTXS); + struct _starpu_sched_ctx *ctx = _starpu_get_sched_ctx_struct(id); + STARPU_ASSERT_MSG(ctx->iteration_level > 0, "calls to starpu_iteration_pop must match starpu_iteration_push calls"); + unsigned level = ctx->iteration_level--; + if (level < sizeof(ctx->iterations)/sizeof(ctx->iterations[0])) + ctx->iterations[level] = -1; +} + +void starpu_do_schedule(void) +{ + struct _starpu_machine_config *config = _starpu_get_machine_config(); + if(config->topology.nsched_ctxs == 1) + _starpu_sched_do_schedule(0); + else + { + int s; + for(s = 0; s < STARPU_NMAX_SCHED_CTXS; s++) + { + if(config->sched_ctxs[s].do_schedule == 1) + { + _starpu_sched_do_schedule(config->sched_ctxs[s].id); + } + } + } +} + +void +starpu_drivers_request_termination(void) +{ + struct _starpu_machine_config *config = _starpu_get_machine_config(); + + STARPU_PTHREAD_MUTEX_LOCK(&config->submitted_mutex); + int nsubmitted = starpu_task_nsubmitted(); + config->submitting = 0; + if (nsubmitted == 0) + { + ANNOTATE_HAPPENS_AFTER(&config->running); + config->running = 0; + ANNOTATE_HAPPENS_BEFORE(&config->running); + STARPU_WMB(); + int s; + for(s = 0; s < STARPU_NMAX_SCHED_CTXS; s++) + { + if(config->sched_ctxs[s].do_schedule == 1) + { + _starpu_check_nsubmitted_tasks_of_sched_ctx(config->sched_ctxs[s].id); + } + } + } + + STARPU_PTHREAD_MUTEX_UNLOCK(&config->submitted_mutex); +} + +int starpu_task_nsubmitted(void) +{ + int nsubmitted = 0; + struct _starpu_machine_config *config = _starpu_get_machine_config(); + if(config->topology.nsched_ctxs == 1) + nsubmitted = _starpu_get_nsubmitted_tasks_of_sched_ctx(0); + else + { + int s; + for(s = 0; s < STARPU_NMAX_SCHED_CTXS; s++) + { + if(config->sched_ctxs[s].do_schedule == 1) + { + nsubmitted += _starpu_get_nsubmitted_tasks_of_sched_ctx(config->sched_ctxs[s].id); + } + } + } + return nsubmitted; +} + + +int starpu_task_nready(void) +{ + int nready = 0; + struct _starpu_machine_config *config = _starpu_get_machine_config(); + if(config->topology.nsched_ctxs == 1) + nready = starpu_sched_ctx_get_nready_tasks(0); + else + { + int s; + for(s = 0; s < STARPU_NMAX_SCHED_CTXS; s++) + { + if(config->sched_ctxs[s].do_schedule == 1) + { + nready += starpu_sched_ctx_get_nready_tasks(config->sched_ctxs[s].id); + } + } + } + + return nready; +} + +/* Return the task currently executed by the worker, or NULL if this is called + * either from a thread that is not a task or simply because there is no task + * being executed at the moment. */ +struct starpu_task *starpu_task_get_current(void) +{ + return (struct starpu_task *) STARPU_PTHREAD_GETSPECIFIC(current_task_key); +} + +void _starpu_set_current_task(struct starpu_task *task) +{ + STARPU_PTHREAD_SETSPECIFIC(current_task_key, task); +} + +struct starpu_task *starpu_worker_get_current_task(unsigned workerid) +{ + struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); + if (worker->pipeline_length) + return worker->current_tasks[worker->first_task]; + else + return worker->current_task; +} + +int starpu_task_get_current_data_node(unsigned i) +{ + struct starpu_task *task = starpu_task_get_current(); + if (!task) + return -1; + + struct _starpu_job *j = _starpu_get_job_associated_to_task(task); + struct _starpu_data_descr *descrs = _STARPU_JOB_GET_ORDERED_BUFFERS(j); + unsigned orderedindex = descrs[i].orderedindex; + return descrs[orderedindex].node; +} + +#ifdef STARPU_OPENMP +/* Prepare the fields of the current task for accepting a new set of + * dependencies in anticipation of becoming a continuation. + * + * When the task becomes 'continued', it will only be queued again when the new + * set of dependencies is fulfilled. */ +void _starpu_task_prepare_for_continuation(void) +{ + _starpu_job_prepare_for_continuation(_starpu_get_job_associated_to_task(starpu_task_get_current())); +} + +void _starpu_task_prepare_for_continuation_ext(unsigned continuation_resubmit, + void (*continuation_callback_on_sleep)(void *arg), void *continuation_callback_on_sleep_arg) +{ + _starpu_job_prepare_for_continuation_ext(_starpu_get_job_associated_to_task(starpu_task_get_current()), + continuation_resubmit, continuation_callback_on_sleep, continuation_callback_on_sleep_arg); +} + +void _starpu_task_set_omp_cleanup_callback(struct starpu_task *task, void (*omp_cleanup_callback)(void *arg), void *omp_cleanup_callback_arg) +{ + _starpu_job_set_omp_cleanup_callback(_starpu_get_job_associated_to_task(task), + omp_cleanup_callback, omp_cleanup_callback_arg); +} +#endif + +/* + * Returns 0 if tasks does not use any multiformat handle, 1 otherwise. + */ +int +_starpu_task_uses_multiformat_handles(struct starpu_task *task) +{ + unsigned i; + unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task); + for (i = 0; i < nbuffers; i++) + { + if (_starpu_data_is_multiformat_handle(STARPU_TASK_GET_HANDLE(task, i))) + return 1; + } + + return 0; +} + +/* + * Checks whether the given handle needs to be converted in order to be used on + * the node given as the second argument. + */ +int +_starpu_handle_needs_conversion_task(starpu_data_handle_t handle, + unsigned int node) +{ + return _starpu_handle_needs_conversion_task_for_arch(handle, starpu_node_get_kind(node)); +} + +int +_starpu_handle_needs_conversion_task_for_arch(starpu_data_handle_t handle, + enum starpu_node_kind node_kind) +{ + /* + * Here, we assume that CUDA devices and OpenCL devices use the + * same data structure. A conversion is only needed when moving + * data from a CPU to a GPU, or the other way around. + */ + switch (node_kind) + { + case STARPU_CPU_RAM: + case STARPU_MPI_MS_RAM: + case STARPU_TCPIP_MS_RAM: + switch(starpu_node_get_kind(handle->mf_node)) + { + case STARPU_CPU_RAM: + case STARPU_MPI_MS_RAM: + case STARPU_TCPIP_MS_RAM: + return 0; + default: + return 1; + } + break; + default: + switch(starpu_node_get_kind(handle->mf_node)) + { + case STARPU_CPU_RAM: + case STARPU_MPI_MS_RAM: + case STARPU_TCPIP_MS_RAM: + return 1; + default: + return 0; + } + break; + } + /* that instruction should never be reached */ + return -EINVAL; +} + +void starpu_task_set_implementation(struct starpu_task *task, unsigned impl) +{ + _starpu_get_job_associated_to_task(task)->nimpl = impl; +} + +unsigned starpu_task_get_implementation(struct starpu_task *task) +{ + return _starpu_get_job_associated_to_task(task)->nimpl; +} + +unsigned long starpu_task_get_job_id(struct starpu_task *task) +{ + return _starpu_get_job_associated_to_task(task)->job_id; +} + +static starpu_pthread_t watchdog_thread; + +static int sleep_some(float timeout) +{ + /* If we do a sleep(timeout), we might have to wait too long at the end of the computation. */ + /* To avoid that, we do several sleep() of 1s (and check after each if starpu is still running) */ + float t; + for (t = timeout ; t > 1.; t--) + { + starpu_sleep(1.); + if (!_starpu_machine_is_running()) + /* Application finished, don't bother finishing the sleep */ + return 0; + } + /* and one final sleep (of less than 1 s) with the rest (if needed) */ + if (t > 0.) + starpu_sleep(t); + + _starpu_crash_call_hooks(); + return 1; +} + +/* Check from times to times that StarPU does finish some tasks */ +static void *watchdog_func(void *arg) +{ + char *timeout_env = arg; + float timeout, delay; + +#ifdef _MSC_VER + timeout = ((float) _atoi64(timeout_env)) / 1000000; +#else + timeout = ((float) atoll(timeout_env)) / 1000000; +#endif + delay = ((float) watchdog_delay) / 1000000; + struct _starpu_machine_config *config = _starpu_get_machine_config(); + starpu_pthread_setname("watchdog"); + + if (!sleep_some(delay)) + return NULL; + + STARPU_PTHREAD_MUTEX_LOCK(&config->submitted_mutex); + while (_starpu_machine_is_running()) + { + int last_nsubmitted = starpu_task_nsubmitted(); + config->watchdog_ok = 0; + STARPU_PTHREAD_MUTEX_UNLOCK(&config->submitted_mutex); + + if (!sleep_some(timeout)) + return NULL; + + STARPU_PTHREAD_MUTEX_LOCK(&config->submitted_mutex); + if (!config->watchdog_ok && last_nsubmitted + && last_nsubmitted == starpu_task_nsubmitted()) + { + if (watchdog_hook == NULL) + _STARPU_MSG("The StarPU watchdog detected that no task finished for %fs (can be configured through STARPU_WATCHDOG_TIMEOUT)\n", + timeout); + else + watchdog_hook(watchdog_hook_arg); + + if (watchdog_crash) + { + _STARPU_MSG("Crashing the process\n"); + raise(SIGABRT); + } + else if (watchdog_hook == NULL) + _STARPU_MSG("Set the STARPU_WATCHDOG_CRASH environment variable if you want to abort the process in such a case\n"); + } + /* Only shout again after another period */ + config->watchdog_ok = 1; + } + STARPU_PTHREAD_MUTEX_UNLOCK(&config->submitted_mutex); + return NULL; +} + +void starpu_task_watchdog_set_hook(void (*hook)(void *), void *hook_arg) +{ + watchdog_hook = hook; + watchdog_hook_arg = hook_arg; +} + +void _starpu_watchdog_init() +{ + struct _starpu_machine_config *config = _starpu_get_machine_config(); + char *timeout_env = starpu_getenv("STARPU_WATCHDOG_TIMEOUT"); + + STARPU_PTHREAD_MUTEX_INIT(&config->submitted_mutex, NULL); + + if (!timeout_env) + return; + + STARPU_PTHREAD_CREATE(&watchdog_thread, NULL, watchdog_func, timeout_env); +} + +void _starpu_watchdog_shutdown(void) +{ + char *timeout_env = starpu_getenv("STARPU_WATCHDOG_TIMEOUT"); + + if (!timeout_env) + return; + + STARPU_PTHREAD_JOIN(watchdog_thread, NULL); +} + +/* Transaction clean up callback called when the transaction trs_end + * task completes. */ +static void _starpu_transaction_callback(void *_p_trs) +{ + struct starpu_transaction *p_trs = _p_trs; + + _starpu_spin_destroy(&p_trs->lock); + starpu_data_unregister_submit(p_trs->handle); + starpu_free(p_trs); +} + +/* Task function for the trs_begin and trs_begin_no_sync codelets. */ +static void _starpu_transaction_begin(void *buffers[], void *cl_args) +{ + struct starpu_transaction *p_trs = cl_args; + STARPU_ASSERT(p_trs->state == _starpu_trs_initialized); + _starpu_spin_lock(&p_trs->lock); + STARPU_ASSERT(!_starpu_trs_epoch_list_empty(&p_trs->epoch_list)); + struct _starpu_trs_epoch *p_epoch = _starpu_trs_epoch_list_front(&p_trs->epoch_list); + STARPU_ASSERT(p_epoch->state == _starpu_trs_epoch_inactive); + _starpu_spin_unlock(&p_trs->lock); + + int epoch_confirmed = 1; + + /* If the transaction has a user 'do_start_func', we call it to + * decide whether the new epoch is confirmed or cancelled. */ + if (p_trs->do_start_func != NULL) + { + void * sync_buf = p_epoch->do_sync ? buffers[1] : NULL; + epoch_confirmed = p_trs->do_start_func(sync_buf, p_epoch->do_start_arg); + } + + if (epoch_confirmed) + { + p_epoch->state = _starpu_trs_epoch_confirmed; + } + else + { + p_epoch->state = _starpu_trs_epoch_cancelled; + } + STARPU_WMB(); +} + +/* Task function for the trs_end codelet, in charge of cleaning the last epoch. */ +static void _starpu_transaction_end(void *buffers[], void *cl_args) +{ + (void)buffers; + struct starpu_transaction *p_trs = cl_args; + _starpu_spin_lock(&p_trs->lock); + STARPU_ASSERT(!_starpu_trs_epoch_list_empty(&p_trs->epoch_list)); + struct _starpu_trs_epoch *p_epoch = _starpu_trs_epoch_list_pop_front(&p_trs->epoch_list); + STARPU_ASSERT(p_epoch->state == _starpu_trs_epoch_confirmed + || p_epoch->state == _starpu_trs_epoch_cancelled); + _starpu_spin_unlock(&p_trs->lock); + + p_epoch->state = _starpu_trs_epoch_terminated; + + _starpu_trs_epoch_delete(p_epoch); + p_epoch = NULL; + + /* TODO: transition to end */ + STARPU_ASSERT(_starpu_trs_epoch_list_empty(&p_trs->epoch_list)); +} + +/* Task function for the trs_next_epoch codelet, in charge of transitioning from a + * an epoch to the next. */ +static void _starpu_transaction_next_epoch(void *buffers[], void *cl_args) +{ + struct starpu_transaction *p_trs = cl_args; + _starpu_spin_lock(&p_trs->lock); + STARPU_ASSERT(!_starpu_trs_epoch_list_empty(&p_trs->epoch_list)); + struct _starpu_trs_epoch *p_previous_epoch = _starpu_trs_epoch_list_pop_front(&p_trs->epoch_list); + STARPU_ASSERT((p_previous_epoch->state == _starpu_trs_epoch_confirmed) + || (p_previous_epoch->state == _starpu_trs_epoch_cancelled)); + STARPU_ASSERT(!_starpu_trs_epoch_list_empty(&p_trs->epoch_list)); + struct _starpu_trs_epoch *p_next_epoch = _starpu_trs_epoch_list_front(&p_trs->epoch_list); + STARPU_ASSERT(p_next_epoch->state == _starpu_trs_epoch_inactive); + _starpu_spin_unlock(&p_trs->lock); + + p_previous_epoch->state = _starpu_trs_epoch_terminated; + _starpu_trs_epoch_delete(p_previous_epoch); + + /* TODO: transition to next epoch */ + + int epoch_confirmed = 1; + + if (p_trs->do_start_func != NULL) + { + void * sync_buf = p_next_epoch->do_sync ? buffers[1] : NULL; + epoch_confirmed = p_trs->do_start_func(sync_buf, p_next_epoch->do_start_arg); + } + + if (epoch_confirmed) + { + p_next_epoch->state = _starpu_trs_epoch_confirmed; + } + else + { + p_next_epoch->state = _starpu_trs_epoch_cancelled; + } + STARPU_WMB(); +} + +/* Transaction begin codelet, without implicit sync on a previously + * accessed data. */ +struct starpu_codelet _starpu_codelet_trs_begin_no_sync = +{ + .cpu_funcs = {_starpu_transaction_begin}, + .modes = {STARPU_W}, + .nbuffers = 1, + .model = &starpu_perfmodel_nop, + .name = "starpu_transaction_begin_no_sync" +}; + +/* Transaction begin codelet, with an implicit sync on a previously + * accessed data. */ +struct starpu_codelet _starpu_codelet_trs_begin = +{ + .cpu_funcs = {_starpu_transaction_begin}, + .modes = {STARPU_W, STARPU_RW}, + .nbuffers = 2, + .model = &starpu_perfmodel_nop, + .name = "starpu_transaction_begin" +}; + +/* Transaction end codelet. */ +struct starpu_codelet _starpu_codelet_trs_end = +{ + .cpu_funcs = {_starpu_transaction_end}, + .modes = {STARPU_RW}, + .nbuffers = 1, + .model = &starpu_perfmodel_nop, + .name = "starpu_transaction_end" +}; + +/* Epoch transition codelet. */ +struct starpu_codelet _starpu_codelet_trs_next_epoch = +{ + .cpu_funcs = {_starpu_transaction_next_epoch}, + .modes = {STARPU_RW}, + .nbuffers = 1, + .model = &starpu_perfmodel_nop, + .name = "starpu_transaction_next_epoch" +}; + +/* Main entry point for creating and activating a transaction object. + * + * . do_start_func: a boolean function to decide whether each new epoch start should + * be confirmed or not. + * . do_start_sync_handle: a starpu data handle on which the transaction + * start should depend on, or NULL if no sync is required. The handle is + * passed to do_start_func() + * . do_start_arg: an argument passed to do_start_func().*/ +static struct starpu_transaction *_do_starpu_transaction_open(int(*do_start_func)(void *buffer, void *arg), starpu_data_handle_t do_start_sync_handle, void *do_start_arg) +{ + struct starpu_transaction *p_trs = NULL; + int ret = starpu_malloc((void **)&p_trs, sizeof(*p_trs)); + STARPU_ASSERT(ret == 0); + _starpu_spin_init(&p_trs->lock); + _starpu_trs_epoch_list_init(&p_trs->epoch_list); + + p_trs->do_start_func = do_start_func; + + p_trs->dummy_data = 0; + starpu_variable_data_register(&p_trs->handle, STARPU_MAIN_RAM, (uintptr_t)&p_trs->dummy_data, sizeof(p_trs->dummy_data)); + + struct _starpu_trs_epoch *p_epoch = _starpu_trs_epoch_new(); + + struct starpu_task *task = starpu_task_create(); + task->callback_func = NULL; + task->cl_arg = p_trs; + task->handles[0] = p_trs->handle; + if (do_start_sync_handle != NULL) + { + p_epoch->do_sync = 1; + task->cl = &_starpu_codelet_trs_begin; + task->handles[1] = do_start_sync_handle; + } + else + { + p_epoch->do_sync = 0; + task->cl = &_starpu_codelet_trs_begin_no_sync; + } + p_epoch->is_begin = 1; + p_epoch->state = _starpu_trs_epoch_inactive; + p_epoch->do_start_arg = do_start_arg; + _starpu_trs_epoch_list_push_back(&p_trs->epoch_list, p_epoch); + p_trs->state = _starpu_trs_initialized; + + ret = starpu_task_submit(task); + if (ret == -ENODEV) + { + starpu_data_unregister(p_trs->handle); + starpu_free(p_trs); + return NULL; + } + STARPU_ASSERT(ret == 0); + return p_trs; +} + +struct starpu_transaction *starpu_transaction_open(int(*do_start_func)(void *buffer, void *arg), void *do_start_arg) +{ + return _do_starpu_transaction_open(do_start_func, NULL, do_start_arg); +} + +void starpu_transaction_close(struct starpu_transaction *p_trs) +{ + STARPU_ASSERT(p_trs->state == _starpu_trs_initialized); + struct starpu_task *task = starpu_task_create(); + task->cl = &_starpu_codelet_trs_end; + task->callback_func = _starpu_transaction_callback; + task->callback_arg = p_trs; + task->handles[0] = p_trs->handle; + task->cl_arg = p_trs; + + _starpu_spin_lock(&p_trs->lock); + STARPU_ASSERT(!_starpu_trs_epoch_list_empty(&p_trs->epoch_list)); + struct _starpu_trs_epoch *p_epoch = _starpu_trs_epoch_list_back(&p_trs->epoch_list); + _starpu_spin_unlock(&p_trs->lock); + p_epoch->is_end = 1; + + int ret = starpu_task_submit(task); + STARPU_ASSERT(ret == 0); +} + +void starpu_transaction_next_epoch(struct starpu_transaction *p_trs, void *do_start_arg) +{ + STARPU_ASSERT(p_trs->state == _starpu_trs_initialized); + struct _starpu_trs_epoch *p_epoch = _starpu_trs_epoch_new(); + struct starpu_task *task = starpu_task_create(); + task->cl = &_starpu_codelet_trs_next_epoch; + task->handles[0] = p_trs->handle; + task->cl_arg = p_trs; + p_epoch->do_sync = 0; + p_epoch->do_start_arg = do_start_arg; + p_epoch->state = _starpu_trs_epoch_inactive; + + _starpu_spin_lock(&p_trs->lock); + _starpu_trs_epoch_list_push_back(&p_trs->epoch_list, p_epoch); + _starpu_spin_unlock(&p_trs->lock); + int ret = starpu_task_submit(task); + STARPU_ASSERT(ret == 0); +} + +static void _starpu_ft_check_support(const struct starpu_task *task) +{ + unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task); + unsigned i; + + for (i = 0; i < nbuffers; i++) + { + enum starpu_data_access_mode mode = STARPU_TASK_GET_MODE(task, i); + STARPU_ASSERT_MSG (mode == STARPU_R || mode == STARPU_W, + "starpu_task_failed is only supported for tasks with access modes STARPU_R and STARPU_W"); + } +} + +struct starpu_task *starpu_task_ft_create_retry +(const struct starpu_task *meta_task, const struct starpu_task *template_task, void (*check_ft)(void *)) +{ + /* Create a new task to actually perform the result */ + struct starpu_task *new_task = starpu_task_create(); + + *new_task = *template_task; + new_task->prologue_callback_func = NULL; + /* XXX: cl_arg needs to be duplicated */ + STARPU_ASSERT_MSG(!meta_task->cl_arg_free || !meta_task->cl_arg, "not supported yet"); + STARPU_ASSERT_MSG(!meta_task->callback_func, "not supported"); + new_task->callback_func = check_ft; + new_task->callback_arg = (void*) meta_task; + new_task->callback_arg_free = 0; + new_task->prologue_callback_arg_free = 0; + STARPU_ASSERT_MSG(!new_task->prologue_callback_pop_arg_free, "not supported"); + new_task->use_tag = 0; + new_task->synchronous = 0; + new_task->destroy = 1; + new_task->regenerate = 0; + new_task->no_submitorder = 1; + new_task->failed = 0; + new_task->scheduled = 0; + new_task->prefetched = 0; + new_task->status = STARPU_TASK_INIT; + new_task->profiling_info = NULL; + new_task->prev = NULL; + new_task->next = NULL; + new_task->starpu_private = NULL; + new_task->omp_task = NULL; + + return new_task; +} + +static void _starpu_default_check_ft(void *arg) +{ + struct starpu_task *meta_task = arg; + struct starpu_task *current_task = starpu_task_get_current(); + struct starpu_task *new_task; + int ret; + + if (!current_task->failed) + { + starpu_task_ft_success(meta_task); + return; + } + + new_task = starpu_task_ft_create_retry +(meta_task, current_task, _starpu_default_check_ft); + + ret = starpu_task_submit_nodeps(new_task); + STARPU_ASSERT(!ret); +} + +void starpu_task_ft_prologue(void *arg) +{ + struct starpu_task *meta_task = starpu_task_get_current(); + struct starpu_task *new_task; + void (*check_ft)(void*) = arg; + int ret; + + if (!check_ft) + check_ft = _starpu_default_check_ft; + + /* Create a task which will do the actual computation */ + new_task = starpu_task_ft_create_retry +(meta_task, meta_task, check_ft); + + ret = starpu_task_submit_nodeps(new_task); + STARPU_ASSERT(!ret); + + /* Make the parent task wait for the result getting correct */ + starpu_task_end_dep_add(meta_task, 1); + meta_task->where = STARPU_NOWHERE; +} + +void starpu_task_ft_failed(struct starpu_task *task) +{ + _starpu_ft_check_support(task); + + task->failed = 1; +} + +void starpu_task_ft_success(struct starpu_task *meta_task) +{ + starpu_task_end_dep_release(meta_task); +} + +char *starpu_task_status_get_as_string(enum starpu_task_status status) +{ + switch(status) + { + case(STARPU_TASK_INIT) : return "STARPU_TASK_INIT"; + case(STARPU_TASK_BLOCKED): return "STARPU_TASK_BLOCKED"; + case(STARPU_TASK_READY): return "STARPU_TASK_READY"; + case(STARPU_TASK_RUNNING): return "STARPU_TASK_RUNNING"; + case(STARPU_TASK_FINISHED): return "STARPU_TASK_FINISHED"; + case(STARPU_TASK_BLOCKED_ON_TAG): return "STARPU_TASK_BLOCKED_ON_TAG"; + case(STARPU_TASK_BLOCKED_ON_TASK): return "STARPU_TASK_BLOCKED_ON_TASK"; + case(STARPU_TASK_BLOCKED_ON_DATA): return "STARPU_TASK_BLOCKED_ON_DATA"; + case(STARPU_TASK_STOPPED): return "STARPU_TASK_STOPPED"; + default: return "STARPU_TASK_unknown_status"; + } +} + +void starpu_codelet_nop_func(void *descr[], void *arg) +{ + (void)descr; + (void)arg; +} + +struct starpu_codelet starpu_codelet_nop = +{ + .cpu_funcs = {starpu_codelet_nop_func}, + .cuda_funcs = {starpu_codelet_nop_func}, + .hip_funcs = {starpu_codelet_nop_func}, + .opencl_funcs = {starpu_codelet_nop_func}, + .cpu_funcs_name = {"starpu_codelet_nop_func"}, + .model = NULL, + .nbuffers = 0 +}; diff --git a/src/core/task.h b/src/core/task.h new file mode 100644 index 0000000..b4ea7bb --- /dev/null +++ b/src/core/task.h @@ -0,0 +1,208 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __CORE_TASK_H__ +#define __CORE_TASK_H__ + +/** @file */ + +#include +#include +#include +#include + +#pragma GCC visibility push(hidden) + +/** Internal version of starpu_task_destroy: don't check task->destroy flag */ +void _starpu_task_destroy(struct starpu_task *task); + +#ifdef STARPU_OPENMP +/** Test for the termination of the task. + * Call starpu_task_destroy if required and the task is terminated. */ +int _starpu_task_test_termination(struct starpu_task *task); +#endif + +/** A pthread key is used to store the task currently executed on the thread. + * _starpu_task_init initializes this pthread key and + * _starpu_set_current_task updates its current value. */ +void _starpu_task_init(void); +void _starpu_task_deinit(void); +void _starpu_set_current_task(struct starpu_task *task); + +int _starpu_submit_job(struct _starpu_job *j, int nodeps); + +void _starpu_task_declare_deps_array(struct starpu_task *task, unsigned ndeps, struct starpu_task *task_array[], int check); + +#define _STARPU_JOB_UNSET ((struct _starpu_job *) NULL) +#define _STARPU_JOB_SETTING ((struct _starpu_job *) 1) + +/** Returns the job structure (which is the internal data structure associated + * to a task). */ +struct _starpu_job *_starpu_get_job_associated_to_task_slow(struct starpu_task *task, struct _starpu_job *job); +static inline struct _starpu_job *_starpu_get_job_associated_to_task(struct starpu_task *task) +{ + STARPU_ASSERT(task); + struct _starpu_job *job = *(struct _starpu_job * volatile *) &task->starpu_private; + + if (STARPU_LIKELY(job != _STARPU_JOB_UNSET && job != _STARPU_JOB_SETTING)) + { + /* Already available */ + STARPU_RMB(); + return job; + } + + return _starpu_get_job_associated_to_task_slow(task, job); +} + +/** Submits starpu internal tasks to the initial context */ +int _starpu_task_submit_internally(struct starpu_task *task); + +int _starpu_handle_needs_conversion_task(starpu_data_handle_t handle, + unsigned int node); +int +_starpu_handle_needs_conversion_task_for_arch(starpu_data_handle_t handle, + enum starpu_node_kind node_kind); + +#ifdef STARPU_OPENMP +/** Prepare the current task for accepting new dependencies before becoming a continuation. */ +void _starpu_task_prepare_for_continuation_ext(unsigned continuation_resubmit, + void (*continuation_callback_on_sleep)(void *arg), void *continuation_callback_on_sleep_arg); + +void _starpu_task_prepare_for_continuation(void); + +void _starpu_task_set_omp_cleanup_callback(struct starpu_task *task, void (*omp_cleanup_callback)(void *arg), + void *omp_cleanup_callback_arg); +#endif + +int _starpu_task_uses_multiformat_handles(struct starpu_task *task); + +int _starpu_task_submit_conversion_task(struct starpu_task *task, + unsigned int workerid); + +void _starpu_task_check_deprecated_fields(struct starpu_task *task); +void _starpu_codelet_check_deprecated_fields(struct starpu_codelet *cl); + +static inline starpu_cpu_func_t _starpu_task_get_cpu_nth_implementation(struct starpu_codelet *cl, unsigned nimpl) +{ + return cl->cpu_funcs[nimpl]; +} + +static inline starpu_cuda_func_t _starpu_task_get_cuda_nth_implementation(struct starpu_codelet *cl, unsigned nimpl) +{ + return cl->cuda_funcs[nimpl]; +} + +static inline starpu_hip_func_t _starpu_task_get_hip_nth_implementation(struct starpu_codelet *cl, unsigned nimpl) +{ + return cl->hip_funcs[nimpl]; +} + +static inline starpu_opencl_func_t _starpu_task_get_opencl_nth_implementation(struct starpu_codelet *cl, unsigned nimpl) +{ + return cl->opencl_funcs[nimpl]; +} + +static inline starpu_max_fpga_func_t _starpu_task_get_fpga_nth_implementation(struct starpu_codelet *cl, unsigned nimpl) +{ + return cl->max_fpga_funcs[nimpl]; +} + +static inline const char *_starpu_task_get_cpu_name_nth_implementation(struct starpu_codelet *cl, unsigned nimpl) +{ + return cl->cpu_funcs_name[nimpl]; +} + +#define _STARPU_TASK_SET_INTERFACE(task, interface, i) do { if (task->dyn_handles) task->dyn_interfaces[i] = interface; else task->interfaces[i] = interface;} while(0) +#define _STARPU_TASK_GET_INTERFACES(task) ((task->dyn_handles) ? task->dyn_interfaces : task->interfaces) + +void _starpu_watchdog_init(void); +void _starpu_watchdog_shutdown(void); + +int _starpu_task_wait_for_all_and_return_nb_waited_tasks(void); +int _starpu_task_wait_for_all_in_ctx_and_return_nb_waited_tasks(unsigned sched_ctx); + +#pragma GCC visibility pop + +#ifdef BUILDING_STARPU +LIST_CREATE_TYPE_NOSTRUCT(starpu_task, prev, next); +PRIO_LIST_CREATE_TYPE(starpu_task, priority); +#endif + +/** transaction states */ +enum _starpu_trs_state +{ + _starpu_trs_uninitialized = 0, + _starpu_trs_initialized = 1, +}; + +/** transaction epoch states */ +enum _starpu_trs_epoch_state +{ + _starpu_trs_epoch_uninitialized = 0, + + /** epoch is initialized but its entry task has not yet been executed to decide whether to confirm of cancel its execution */ + _starpu_trs_epoch_inactive = 1, + + /** epoch has been confirmed for execution, its tasks will be actually executed */ + _starpu_trs_epoch_confirmed = 2, + + /** epoch has been cancelled, its task will be skipped */ + _starpu_trs_epoch_cancelled = 3, + + /** the exit task of the epoch has been executed */ + _starpu_trs_epoch_terminated = 4, +}; + +LIST_TYPE(_starpu_trs_epoch, + enum _starpu_trs_epoch_state state; + + /** if 1, the epoch entry task will wait on some user-supplied handle + * TODO: only used for first epoch on transaction opening for now, add for next epoch */ + int do_sync; + + /** if 1, the epoch is the first of the transaction */ + int is_begin; + + /** if 1, the epoch will be the last, and the transaction will be closed after its execution */ + int is_end; + + /** inline argument supplied by the user and passed to the user function deciding whether to start + * or cancel the epoch execution */ + void *do_start_arg; +); + +struct starpu_transaction +{ + /** epoch list lock */ + struct _starpu_spinlock lock; + struct _starpu_trs_epoch_list epoch_list; + + /** handle of the transaction object */ + starpu_data_handle_t handle; + + /** dummy data area referenced by the handle */ + int dummy_data; + + /** user function to decide whether to start or cancel an epoch execution, buffer[0] will + * optionally refer to an user suppled handle's object */ + int (*do_start_func)(void *buffer, void* arg); + enum _starpu_trs_state state; + + /** flags, unused for now */ + int flags; +}; + +#endif // __CORE_TASK_H__ diff --git a/src/core/task_bundle.c b/src/core/task_bundle.c new file mode 100644 index 0000000..804d9e5 --- /dev/null +++ b/src/core/task_bundle.c @@ -0,0 +1,233 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2011-2011 Télécom Sud Paris + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +/* Initialize a task bundle */ +void starpu_task_bundle_create(starpu_task_bundle_t *bundle) +{ + _STARPU_CALLOC(*bundle, 1, sizeof(struct _starpu_task_bundle)); + + STARPU_PTHREAD_MUTEX_INIT0(&(*bundle)->mutex, NULL); + /* Of course at the beginning a bundle is open, + * user can insert and remove tasks from it */ + //(*bundle)->closed = 0; + + /* Start with an empty list */ + //(*bundle)->list = NULL; + +} + +int starpu_task_bundle_insert(starpu_task_bundle_t bundle, struct starpu_task *task) +{ + STARPU_PTHREAD_MUTEX_LOCK(&bundle->mutex); + + if (bundle->closed) + { + /* The bundle is closed, we cannot add task anymore */ + STARPU_PTHREAD_MUTEX_UNLOCK(&bundle->mutex); + return -EPERM; + } + + if (task->status != STARPU_TASK_INIT) + { + /* The task has already been submitted, it's too late to put it + * into a bundle now. */ + STARPU_PTHREAD_MUTEX_UNLOCK(&bundle->mutex); + return -EINVAL; + } + + /* Insert a task at the end of the bundle */ + struct _starpu_task_bundle_entry *entry; + _STARPU_MALLOC(entry, sizeof(struct _starpu_task_bundle_entry)); + entry->task = task; + entry->next = NULL; + + if (!bundle->list) + { + bundle->list = entry; + } + else + { + struct _starpu_task_bundle_entry *item; + item = bundle->list; + while (item->next) + item = item->next; + + item->next = entry; + } + + /* Mark the task as belonging the bundle */ + task->bundle = bundle; + + STARPU_PTHREAD_MUTEX_UNLOCK(&bundle->mutex); + return 0; +} + +int starpu_task_bundle_remove(starpu_task_bundle_t bundle, struct starpu_task *task) +{ + struct _starpu_task_bundle_entry *item; + + STARPU_PTHREAD_MUTEX_LOCK(&bundle->mutex); + + item = bundle->list; + + /* List is empty, there is no way the task + * belong to it */ + if (!item) + { + STARPU_PTHREAD_MUTEX_UNLOCK(&bundle->mutex); + return -ENOENT; + } + + STARPU_ASSERT_MSG(task->bundle == bundle, "Task %p was not in bundle %p, but in bundle %p", task, bundle, task->bundle); + task->bundle = NULL; + + if (item->task == task) + { + /* Remove the first element */ + bundle->list = item->next; + free(item); + + /* If the list is now empty, deinitialize the bundle */ + if (bundle->closed && bundle->list == NULL) + { + STARPU_PTHREAD_MUTEX_UNLOCK(&bundle->mutex); + _starpu_task_bundle_destroy(bundle); + return 0; + } + + STARPU_PTHREAD_MUTEX_UNLOCK(&bundle->mutex); + return 0; + } + + /* Go through the list until we find the right task, + * then we delete it */ + while (item->next) + { + struct _starpu_task_bundle_entry *next; + next = item->next; + + if (next->task == task) + { + /* Remove the next element */ + item->next = next->next; + STARPU_PTHREAD_MUTEX_UNLOCK(&bundle->mutex); + free(next); + return 0; + } + + item = next; + } + + STARPU_PTHREAD_MUTEX_UNLOCK(&bundle->mutex); + + /* We could not find the task in the bundle */ + return -ENOENT; +} + +/* Close a bundle. No task can be added to a closed bundle. Tasks can still be + * removed from a closed bundle. A closed bundle automatically gets + * deinitialized when it becomes empty. A closed bundle cannot be reopened. */ +void starpu_task_bundle_close(starpu_task_bundle_t bundle) +{ + STARPU_PTHREAD_MUTEX_LOCK(&bundle->mutex); + + /* If the bundle is already empty, we deinitialize it now as the + * user closed it and thus don't intend to insert new tasks in it. */ + if (bundle->list == NULL) + { + STARPU_PTHREAD_MUTEX_UNLOCK(&bundle->mutex); + _starpu_task_bundle_destroy(bundle); + return; + } + + /* Mark the bundle as closed */ + bundle->closed = 1; + + STARPU_PTHREAD_MUTEX_UNLOCK(&bundle->mutex); + +} + +void _starpu_task_bundle_destroy(starpu_task_bundle_t bundle) +{ + /* Remove all entries from the bundle (which is likely to be empty) */ + while (bundle->list) + { + struct _starpu_task_bundle_entry *entry = bundle->list; + bundle->list = bundle->list->next; + free(entry); + } + + STARPU_PTHREAD_MUTEX_DESTROY(&bundle->mutex); + + free(bundle); +} + +void _starpu_insertion_handle_sorted(struct _starpu_handle_list **listp, starpu_data_handle_t handle, enum starpu_data_access_mode mode) +{ + STARPU_ASSERT(listp); + + struct _starpu_handle_list *list = *listp; + + /* If the list is empty or the handle's address the smallest among the + * list, we insert it as first element */ + if (!list || list->handle > handle) + { + struct _starpu_handle_list *link; + _STARPU_MALLOC(link, sizeof(struct _starpu_handle_list)); + link->handle = handle; + link->mode = mode; + link->next = list; + *listp = link; + return; + } + + struct _starpu_handle_list *prev = list; + + /* Look for the same handle if already present in the list. + * Else place it right before the smallest following handle */ + while (list && (handle >= list->handle)) + { + prev = list; + list = list->next; + } + + if (prev->handle == handle) + { + /* The handle is already in the list, the merge both the access modes */ + prev->mode = (enum starpu_data_access_mode) ((int) prev->mode | (int) mode); + } + else + { + /* The handle was not in the list, we insert it after 'prev', thus right before + * 'list' which is the smallest following handle */ + struct _starpu_handle_list *link; + _STARPU_MALLOC(link, sizeof(struct _starpu_handle_list)); + link->handle = handle; + link->mode = mode; + link->next = prev->next; + prev->next = link; + } +} diff --git a/src/core/task_bundle.h b/src/core/task_bundle.h new file mode 100644 index 0000000..e052619 --- /dev/null +++ b/src/core/task_bundle.h @@ -0,0 +1,140 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __CORE_TASK_BUNDLE_H__ +#define __CORE_TASK_BUNDLE_H__ + +/** @file */ + +#include + +#pragma GCC visibility push(hidden) + +/** struct _starpu_task_bundle_entry + * ================================ + * Purpose + * ======= + * Structure used to describe a linked list containing tasks in _starpu_task_bundle. + * + * Fields + * ====== + * task Pointer to the task structure. + * + * next Pointer to the next element in the linked list. + */ + +struct _starpu_task_bundle_entry +{ + struct starpu_task *task; + struct _starpu_task_bundle_entry *next; +}; + +/** struct _starpu_task_bundle + * ========================== + * Purpose + * ======= + * Structure describing a list of tasks that should be scheduled on the same + * worker whenever it's possible. + * It must be considered as a hint given to the scheduler as there is no guarantee that + * they will be executed on the same worker. + * + * Fields + * ====== + * mutex Mutex protecting the structure. + * + * list Array of tasks included in the bundle. + * + * closed Used to know if the user is still willing to + * add/remove some tasks in the bundle. Especially useful for + * the runtime to know whether it is safe to destroy a bundle. + */ + +struct _starpu_task_bundle +{ + /** Mutex protecting the bundle */ + starpu_pthread_mutex_t mutex; + + struct _starpu_task_bundle_entry *list; + + int closed; +}; + +/** struct _starpu_handle_list + * ========================== + * Purpose + * ======= + * Structure describing a list of handles sorted by address to speed-up + * when looking for an element. + * The list cannot contain duplicate handles. + * + * Fields + * ====== + * handle Pointer to the handle structure. + * + * access_mode Total access mode over the whole bundle. + * + * next Pointer to the next element in the linked list. + */ + +struct _starpu_handle_list +{ + starpu_data_handle_t handle; + enum starpu_data_access_mode mode; + struct _starpu_handle_list *next; +}; + +/** _starpu_task_bundle_destroy + * ========================== + * Purpose + * ======= + * Destroy and deinitialize a bundle, + * memory previously allocated is freed. + * + * Arguments + * ========= + * bundle (input) + * Bundle to destroy. + */ +void _starpu_task_bundle_destroy(starpu_task_bundle_t bundle); + +/** _starpu_insertion_handle_sorted + * ======================== + * Purpose + * ======= + * Insert an handle in a _starpu_handle_list, elements are sorted + * in increasing order, considering their physical address. + * As the list doesn't accept duplicate elements, a handle with the + * same address as an handle contained in the list is not inserted, but + * its mode access is merged with the one of the latter. + * + * Arguments + * ========= + * listp (input, output) + * Pointer to the first element of the list. + * In the case of an empty list or an inserted handle with small address, + * it should have changed when the call returns. + * + * handle (input) + * Handle to insert in the list. + * + * mode (input) + * Access mode of the handle. + */ +void _starpu_insertion_handle_sorted(struct _starpu_handle_list **listp, starpu_data_handle_t handle, enum starpu_data_access_mode mode); + +#pragma GCC visibility pop + +#endif // __CORE_TASK_BUNDLE_H__ diff --git a/src/core/topology.c b/src/core/topology.c new file mode 100644 index 0000000..f2e2ce0 --- /dev/null +++ b/src/core/topology.c @@ -0,0 +1,2363 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2016-2016 Uppsala University + * Copyright (C) 2013-2013 Thibaut Lambert + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#ifdef HAVE_UNISTD_H +#include +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#ifdef STARPU_HAVE_HWLOC +#include +#ifndef HWLOC_API_VERSION +#define HWLOC_OBJ_PU HWLOC_OBJ_PROC +#endif +#if HWLOC_API_VERSION < 0x00010b00 +#define HWLOC_OBJ_NUMANODE HWLOC_OBJ_NODE +#endif + +#endif + +#ifdef STARPU_HAVE_WINDOWS +#include +#endif + +#ifdef STARPU_SIMGRID +#include +#endif + +static int main_thread_cpuid = -1; + +static unsigned topology_is_initialized = 0; +static int nobind; +static int numa_enabled = -1; + +/* For checking whether two workers share the same PU, indexed by PU number */ +static int cpu_worker[STARPU_MAXCPUS]; +static char * cpu_name[STARPU_MAXCPUS]; +static unsigned nb_numa_nodes = 0; +static int numa_memory_nodes_to_hwloclogid[STARPU_MAXNUMANODES]; /* indexed by StarPU numa node to convert in hwloc logid */ +static int numa_memory_nodes_to_physicalid[STARPU_MAXNUMANODES]; /* indexed by StarPU numa node to convert in physical id */ +static unsigned numa_bus_id[STARPU_MAXNUMANODES*STARPU_MAXNUMANODES]; + +#define STARPU_NUMA_UNINITIALIZED (-2) +#define STARPU_NUMA_MAIN_RAM (-1) + +unsigned _starpu_may_bind_automatically[STARPU_NARCH] = { 0 }; + +unsigned starpu_memory_nodes_get_numa_count(void) +{ + return nb_numa_nodes; +} + +#if defined(STARPU_HAVE_HWLOC) +hwloc_obj_t _starpu_numa_get_obj(hwloc_obj_t obj) +{ +#if HWLOC_API_VERSION >= 0x00020000 + while (obj && obj->memory_first_child == NULL) + obj = obj->parent; + + if (!obj) + return NULL; + + return obj->memory_first_child; +#else + while (obj && obj->type != HWLOC_OBJ_NUMANODE) + obj = obj->parent; + + /* Note: If we don't find a "node" obj before the root, this means + * hwloc does not know whether there are numa nodes or not, so + * we should not use a per-node sampling in that case. */ + return obj; +#endif +} +static int numa_get_logical_id(hwloc_obj_t obj) +{ + STARPU_ASSERT(obj); + obj = _starpu_numa_get_obj(obj); + if (!obj) + return 0; + return obj->logical_index; +} + +static int numa_get_physical_id(hwloc_obj_t obj) +{ + STARPU_ASSERT(obj); + obj = _starpu_numa_get_obj(obj); + if (!obj) + return 0; + return obj->os_index; +} +#endif + +int _starpu_get_logical_numa_node_worker(unsigned workerid) +{ +#if defined(STARPU_HAVE_HWLOC) + STARPU_ASSERT(numa_enabled != -1); + if (numa_enabled) + { + struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); + struct _starpu_machine_config *config = (struct _starpu_machine_config *)_starpu_get_machine_config(); + struct _starpu_machine_topology *topology = &config->topology; + + switch(worker->arch) + { + case STARPU_CPU_WORKER: + { + hwloc_obj_t obj; + obj = hwloc_get_obj_by_type(topology->hwtopology, HWLOC_OBJ_PU, worker->bindid); + return numa_get_logical_id(obj); + } + default: + STARPU_ABORT(); + } + + } + else +#endif + { + (void) workerid; /* unused */ + return STARPU_NUMA_MAIN_RAM; + } +} + +/* This returns the exact NUMA node next to a worker */ +static int _starpu_get_physical_numa_node_worker(unsigned workerid) +{ +#if defined(STARPU_HAVE_HWLOC) + STARPU_ASSERT(numa_enabled != -1); + if (numa_enabled) + { + struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); + struct _starpu_machine_config *config = (struct _starpu_machine_config *)_starpu_get_machine_config(); + struct _starpu_machine_topology *topology = &config->topology; + + switch(worker->arch) + { + case STARPU_CPU_WORKER: + { + hwloc_obj_t obj; + obj = hwloc_get_obj_by_type(topology->hwtopology, HWLOC_OBJ_PU, worker->bindid); + return numa_get_physical_id(obj); + } + default: + STARPU_ABORT(); + } + + } + else +#endif + { + (void) workerid; /* unused */ + return STARPU_NUMA_MAIN_RAM; + } +} + +/* This returns the CPU NUMA memory close to a worker */ +static int _starpu_get_logical_close_numa_node_worker(unsigned workerid) +{ +#if defined(STARPU_HAVE_HWLOC) + STARPU_ASSERT(numa_enabled != -1); + if (numa_enabled) + { + struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); + struct _starpu_machine_config *config = (struct _starpu_machine_config *)_starpu_get_machine_config(); + struct _starpu_machine_topology *topology = &config->topology; + + hwloc_obj_t obj = NULL; + if (starpu_driver_info[worker->arch].get_hwloc_obj) + obj = starpu_driver_info[worker->arch].get_hwloc_obj(topology->hwtopology, worker->devid); + if (!obj) + obj = hwloc_get_obj_by_type(topology->hwtopology, HWLOC_OBJ_PU, worker->bindid); + + return numa_get_logical_id(obj); + } + else +#endif + { + (void) workerid; /* unused */ + return STARPU_NUMA_MAIN_RAM; + } +} + +//TODO change this in an array +int starpu_memory_nodes_numa_hwloclogid_to_id(int logid) +{ + unsigned n; + for (n = 0; n < nb_numa_nodes; n++) + if (numa_memory_nodes_to_hwloclogid[n] == logid) + return n; + return -1; +} + +int starpu_memory_nodes_numa_id_to_hwloclogid(unsigned id) +{ + STARPU_ASSERT(id < STARPU_MAXNUMANODES); + return numa_memory_nodes_to_hwloclogid[id]; +} + +int starpu_memory_nodes_numa_devid_to_id(unsigned id) +{ + STARPU_ASSERT(id < STARPU_MAXNUMANODES); + return numa_memory_nodes_to_physicalid[id]; +} + +//TODO change this in an array +int starpu_memory_nodes_numa_id_to_devid(int osid) +{ + unsigned n; + for (n = 0; n < nb_numa_nodes; n++) + if (numa_memory_nodes_to_physicalid[n] == osid) + return n; + return -1; +} + +// TODO: cache the values instead of looking in hwloc each time + +/* Avoid using this one, prefer _starpu_task_data_get_node_on_worker */ +int _starpu_task_data_get_node_on_node(struct starpu_task *task, unsigned index, unsigned local_node) +{ + int node = STARPU_SPECIFIC_NODE_LOCAL; + if (task->cl->specific_nodes) + node = STARPU_CODELET_GET_NODE(task->cl, index); + switch (node) + { + case STARPU_SPECIFIC_NODE_LOCAL: + // TODO: rather find MCDRAM + node = local_node; + break; + case STARPU_SPECIFIC_NODE_CPU: + switch (starpu_node_get_kind(local_node)) + { + case STARPU_CPU_RAM: + node = local_node; + break; + default: + // TODO: rather take close NUMA node + node = STARPU_MAIN_RAM; + break; + } + break; + case STARPU_SPECIFIC_NODE_SLOW: + // TODO: rather leave in DDR + node = local_node; + break; + case STARPU_SPECIFIC_NODE_LOCAL_OR_CPU: + { + enum starpu_data_access_mode mode = STARPU_TASK_GET_MODE(task, index); + if (mode & STARPU_R) + { + if (mode & STARPU_R && task->handles[index]->per_node[local_node].state != STARPU_INVALID) + { + /* It is here already, rather access it from here */ + node = local_node; + } + else + { + /* It is not here already, do not bother moving it */ + node = STARPU_MAIN_RAM; + } + } + else + { + /* Nothing to read, consider where to write */ + starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, index); + if (handle->wt_mask & (1 << STARPU_MAIN_RAM)) + /* Write through, better simply write to the main memory */ + node = STARPU_MAIN_RAM; + else + /* Better keep temporary data on the accelerator to save PCI bandwidth */ + node = local_node; + } + break; + } + case STARPU_SPECIFIC_NODE_NONE: + return -1; + } + return node; +} + +int _starpu_task_data_get_node_on_worker(struct starpu_task *task, unsigned index, unsigned worker) +{ + unsigned local_node = starpu_worker_get_memory_node(worker); + int node = STARPU_SPECIFIC_NODE_LOCAL; + if (task->cl->specific_nodes) + node = STARPU_CODELET_GET_NODE(task->cl, index); + switch (node) + { + case STARPU_SPECIFIC_NODE_LOCAL: + // TODO: rather find MCDRAM + node = local_node; + break; + case STARPU_SPECIFIC_NODE_CPU: + node = starpu_memory_nodes_numa_hwloclogid_to_id(_starpu_get_logical_close_numa_node_worker(worker)); + if (node == -1) + node = STARPU_MAIN_RAM; + break; + case STARPU_SPECIFIC_NODE_SLOW: + // TODO: rather leave in DDR + node = local_node; + break; + case STARPU_SPECIFIC_NODE_LOCAL_OR_CPU: + { + enum starpu_data_access_mode mode = STARPU_TASK_GET_MODE(task, index); + if (mode & STARPU_R) + { + if (task->handles[index]->per_node[local_node].state != STARPU_INVALID) + { + /* It is here already, rather access it from here */ + node = local_node; + } + else + { + /* It is not here already, do not bother moving it */ + node = STARPU_MAIN_RAM; + } + } + else + { + /* Nothing to read, consider where to write */ + starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, index); + if (handle->wt_mask & (1 << STARPU_MAIN_RAM)) + /* Write through, better simply write to the main memory */ + node = STARPU_MAIN_RAM; + else + /* Better keep temporary data on the accelerator to save PCI bandwidth */ + node = local_node; + } + break; + } + case STARPU_SPECIFIC_NODE_NONE: + return -1; + } + return node; +} + +struct _starpu_worker *_starpu_get_worker_from_driver(struct starpu_driver *d) +{ + unsigned nworkers = starpu_worker_get_count(); + unsigned workerid; + + for (workerid = 0; workerid < nworkers; workerid++) + { + if (starpu_worker_get_type(workerid) == d->type) + { + struct _starpu_worker *worker; + worker = _starpu_get_worker_struct(workerid); + STARPU_ASSERT(worker->driver_ops); + STARPU_ASSERT_MSG(worker->driver_ops->is_devid, "The driver operation 'is_devid' is not defined"); + if (worker->driver_ops->is_devid(d, worker)) + return worker; + } + } + + return NULL; +} + +void _starpu_initialize_workers_deviceid(int *explicit_workers_gpuid, + int *current, int *workers_gpuid, + const char *varname, unsigned nhwgpus, + enum starpu_worker_archtype type) +{ + char *strval; + unsigned i; + + *current = 0; + + /* conf->workers_gpuid indicates the successive GPU identifier that + * should be used to bind the workers. It should be either filled + * according to the user's explicit parameters (from starpu_conf) or + * according to the varname env. variable. Otherwise, a + * round-robin policy is used to distributed the workers over the + * cores. */ + + /* what do we use, explicit value, env. variable, or round-robin ? */ + strval = starpu_getenv(varname); + if (strval) + { + /* varname certainly contains less entries than + * STARPU_NMAXWORKERS, so we reuse its entries in a round + * robin fashion: "1 2" is equivalent to "1 2 1 2 1 2 .... 1 + * 2". */ + unsigned wrap = 0; + unsigned number_of_entries = 0; + + char *endptr; + /* we use the content of the varname + * env. variable */ + for (i = 0; i < STARPU_NMAXWORKERS; i++) + { + if (!wrap) + { + long int val; + val = strtol(strval, &endptr, 10); + if (endptr != strval) + { + workers_gpuid[i] = (unsigned)val; + strval = endptr; + } + else + { + /* there must be at least one entry */ + STARPU_ASSERT(i != 0); + number_of_entries = i; + + /* there is no more values in the + * string */ + wrap = 1; + + workers_gpuid[i] = workers_gpuid[0]; + } + } + else + { + workers_gpuid[i] = + workers_gpuid[i % number_of_entries]; + } + } + } + else if (explicit_workers_gpuid) + { + /* we use the explicit value from the user */ + memcpy(workers_gpuid, + explicit_workers_gpuid, + STARPU_NMAXWORKERS*sizeof(unsigned)); + } + else + { + /* by default, we take a round robin policy */ + if (nhwgpus > 0) + for (i = 0; i < STARPU_NMAXWORKERS; i++) + workers_gpuid[i] = (unsigned)(i % nhwgpus); + + /* StarPU can use sampling techniques to bind threads + * correctly */ + _starpu_may_bind_automatically[type] = 1; + } +} + +int _starpu_get_next_devid(struct _starpu_machine_topology *topology, struct _starpu_machine_config *config, enum starpu_worker_archtype arch) +{ + if (topology->nworkers == STARPU_NMAXWORKERS) + // Already full! + return -1; + + unsigned i = ((config->current_devid[arch]++) % config->topology.ndevices[arch]); + + return (int)config->topology.workers_devid[arch][i]; +} + +#ifndef STARPU_SIMGRID +#ifdef STARPU_HAVE_HWLOC +static void _starpu_allocate_topology_userdata(hwloc_obj_t obj) +{ + unsigned i; + + _STARPU_CALLOC(obj->userdata, 1, sizeof(struct _starpu_hwloc_userdata)); + for (i = 0; i < obj->arity; i++) + _starpu_allocate_topology_userdata(obj->children[i]); +#if HWLOC_API_VERSION >= 0x00020000 + hwloc_obj_t child; + for (child = obj->io_first_child; child; child = child->next_sibling) + _starpu_allocate_topology_userdata(child); +#endif +} + +static void _starpu_deallocate_topology_userdata(hwloc_obj_t obj) +{ + unsigned i; + struct _starpu_hwloc_userdata *data = obj->userdata; + + STARPU_ASSERT(!data->worker_list || data->worker_list == (void*)-1); + free(data); + for (i = 0; i < obj->arity; i++) + _starpu_deallocate_topology_userdata(obj->children[i]); +#if HWLOC_API_VERSION >= 0x00020000 + hwloc_obj_t child; + for (child = obj->io_first_child; child; child = child->next_sibling) + _starpu_deallocate_topology_userdata(child); +#endif +} +#endif +#endif + +static void _starpu_init_topology(struct _starpu_machine_config *config) +{ + /* Discover the topology, meaning finding all the available PUs for + the compiled drivers. These drivers MUST have been initialized + before calling this function. The discovered topology is filled in + CONFIG. */ + struct _starpu_machine_topology *topology = &config->topology; + + if (topology_is_initialized) + return; + +#if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID) + if (config->conf.nopencl != 0) + _starpu_opencl_init(); +#endif +#if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID) + if (config->conf.ncuda != 0) + _starpu_init_cuda(); +#endif + +#if defined(STARPU_USE_HIP) + if (config->conf.nhip != 0) + _starpu_init_hip(); +#endif + +#if defined(STARPU_USE_MAX_FPGA) + if (config->conf.nmax_fpga != 0) + _starpu_init_max_fpga(); +#endif + + nobind = starpu_getenv_number("STARPU_WORKERS_NOBIND"); + + topology->nhwdevices[STARPU_CPU_WORKER] = 1; + topology->nhwworker[STARPU_CPU_WORKER][0] = 0; + topology->nhwpus = 0; + topology->nusedpus = 0; + topology->firstusedpu = 0; + +#ifndef STARPU_SIMGRID +#ifdef STARPU_HAVE_HWLOC + int err; + err = hwloc_topology_init(&topology->hwtopology); + STARPU_ASSERT_MSG(err == 0, "Could not initialize Hwloc topology (%s)\n", strerror(errno)); + char *hwloc_input = starpu_getenv("STARPU_HWLOC_INPUT"); + if (hwloc_input && hwloc_input[0]) + { + err = hwloc_topology_set_xml(topology->hwtopology, hwloc_input); + if (err < 0) _STARPU_DISP("Could not load hwloc input %s\n", hwloc_input); + } + + _starpu_topology_filter(topology->hwtopology); + err = hwloc_topology_load(topology->hwtopology); + STARPU_ASSERT_MSG(err == 0, "Could not load Hwloc topology (%s)%s%s%s\n", strerror(errno), hwloc_input ? " (input " : "", hwloc_input ? hwloc_input : "", hwloc_input ? ")" : ""); + +#ifdef HAVE_HWLOC_CPUKINDS_GET_NR + int nr_kinds = hwloc_cpukinds_get_nr(topology->hwtopology, 0); + if (nr_kinds > 1) + _STARPU_DISP("Warning: there are several kinds of CPU on this system. For now StarPU assumes all CPU are equal\n"); +#endif + + _starpu_allocate_topology_userdata(hwloc_get_root_obj(topology->hwtopology)); +#endif +#endif + +#ifdef STARPU_SIMGRID + config->topology.nhwworker[STARPU_CPU_WORKER][0] = + config->topology.nhwpus = + config->topology.nusedpus = _starpu_simgrid_get_nbhosts("CPU"); +#elif defined(STARPU_HAVE_HWLOC) + /* Discover the CPUs relying on the hwloc interface and fills CONFIG + * accordingly. */ + + config->cpu_depth = hwloc_get_type_depth(topology->hwtopology, HWLOC_OBJ_CORE); + config->pu_depth = hwloc_get_type_depth(topology->hwtopology, HWLOC_OBJ_PU); + + /* Would be very odd */ + STARPU_ASSERT(config->cpu_depth != HWLOC_TYPE_DEPTH_MULTIPLE); + + if (config->cpu_depth == HWLOC_TYPE_DEPTH_UNKNOWN) + { + /* unknown, using logical processors as fallback */ + _STARPU_DISP("Warning: The OS did not report CPU cores. Assuming there is only one hardware thread per core.\n"); + config->cpu_depth = hwloc_get_type_depth(topology->hwtopology, + HWLOC_OBJ_PU); + } + + topology->nhwworker[STARPU_CPU_WORKER][0] = hwloc_get_nbobjs_by_depth(topology->hwtopology, config->cpu_depth); + topology->nhwpus = + topology->nusedpus = hwloc_get_nbobjs_by_depth(topology->hwtopology, config->pu_depth); + + if (starpu_getenv_number_default("STARPU_WORKERS_GETBIND", 1)) + { + /* Respect the existing binding */ + + hwloc_bitmap_t cpuset = hwloc_bitmap_alloc(); + hwloc_bitmap_t log_cpuset = hwloc_bitmap_alloc(); + hwloc_bitmap_t check_cpuset = hwloc_bitmap_alloc(); + hwloc_bitmap_t log_coreset = hwloc_bitmap_alloc(); + unsigned n, i, j, first, last, weight; + int ret; +#ifdef STARPU_VERBOSE + char *str; +#endif + + do { + /* Get the process binding (e.g. provided by the job scheduler) */ + ret = hwloc_get_cpubind(topology->hwtopology, cpuset, HWLOC_CPUBIND_THREAD); + if (ret) + { + _STARPU_DISP("Warning: could not get current CPU binding: %s\n", strerror(errno)); + break; + } + +#ifdef STARPU_VERBOSE + hwloc_bitmap_asprintf(&str, cpuset); + _STARPU_DEBUG("Got cpu physical binding: %s\n", str); + free(str); +#endif + + /* Compute logical sets */ + n = hwloc_get_nbobjs_by_depth(topology->hwtopology, config->pu_depth); + for (i = 0; i < n; i++) + { + hwloc_obj_t pu = hwloc_get_obj_by_depth(topology->hwtopology, config->pu_depth, i), core; + + if (!hwloc_bitmap_isset(cpuset, pu->os_index)) + continue; + + hwloc_bitmap_set(log_cpuset, i); + + core = pu; + if (config->cpu_depth != config->pu_depth) + { + while (core && core->type != HWLOC_OBJ_CORE) + core = core->parent; + if (!core) + { + _STARPU_DISP("Warning: hwloc did not report a core above PU %d\n", i); + break; + } + } + + /* Include all PUs from the core to make the set contiguous, we will pick up just one from it by default */ + for (j = 0; j < core->arity; j++) + hwloc_bitmap_set(check_cpuset, core->children[j]->logical_index); + + hwloc_bitmap_set(log_coreset, core->logical_index); + } + +#ifdef STARPU_VERBOSE + hwloc_bitmap_asprintf(&str, log_cpuset); + _STARPU_DEBUG("This maps to logical binding: %s\n", str); + free(str); + hwloc_bitmap_asprintf(&str, check_cpuset); + _STARPU_DEBUG("Which we extend to: %s\n", str); + free(str); + hwloc_bitmap_asprintf(&str, log_coreset); + _STARPU_DEBUG("The logical core binding is thus: %s\n", str); + free(str); +#endif + + /* Check that PU numbers are consecutive */ + first = hwloc_bitmap_first(check_cpuset); + last = hwloc_bitmap_last(check_cpuset); + weight = hwloc_bitmap_weight(check_cpuset); + if (last - first + 1 != weight) + { + _STARPU_DISP("Warning: hwloc reported non-consecutive binding (first %u last %d weight %u, this is not supported yet, sorry, please use STARPU_WORKERS_CPUID or STARPU_WORKERS_COREID to set this by hand\n", first, last, weight); + break; + } + + if (hwloc_bitmap_weight(log_cpuset) == 1 || hwloc_bitmap_weight(log_coreset) == 1) + { + const char *omp_bind = starpu_getenv("OMP_PROC_BIND"); + _STARPU_DISP("Warning: the current CPU binding set contains only one CPU.\n"); + if (omp_bind && strcasecmp(omp_bind, "false")) + _STARPU_DISP("The OMP_PROC_BIND environment variable is set to %s.\n", omp_bind); + else + _STARPU_DISP("Maybe you need to tell your job scheduler to bind on all allocated cores (e.g. --exclusive --ntasks-per-node=1 or --cpus-per-task for Slurm, or --bind-to board for openmpi).\n"); + _STARPU_DISP("You can use STARPU_WORKERS_GETBIND=0 to bypass it, but make sure you are not oversubscribing the machine.\n"); + } + topology->nusedpus = weight; + topology->firstusedpu = hwloc_bitmap_first(log_cpuset);; + } while(0); + + hwloc_bitmap_free(cpuset); + hwloc_bitmap_free(check_cpuset); + topology->log_cpuset = log_cpuset; + topology->log_coreset = log_coreset; + } + +#elif defined(HAVE_SYSCONF) + /* Discover the CPUs relying on the sysconf(3) function and fills + * CONFIG accordingly. */ + + config->topology.nhwworker[STARPU_CPU_WORKER][0] = + config->topology.nhwpus = + config->topology.nusedpus = + sysconf(_SC_NPROCESSORS_ONLN); + +#elif defined(_WIN32) + /* Discover the CPUs on Cygwin and MinGW systems. */ + + SYSTEM_INFO sysinfo; + GetSystemInfo(&sysinfo); + config->topology.nhwworker[STARPU_CPU_WORKER][0] = + config->topology.nhwpus = + config->topology.nusedpus = + sysinfo.dwNumberOfProcessors; +#else +#warning no way to know number of cores, assuming 1 + config->topology.nhwworker[STARPU_CPU_WORKER][0] = + config->topology.nhwpus = + config->topology.nusedpus = + 1; +#endif + if (!starpu_getenv_number_default("STARPU_PERF_MODEL_HOMOGENEOUS_CPU", 1)) + config->topology.nhwdevices[STARPU_CPU_WORKER] = config->topology.nhwworker[STARPU_CPU_WORKER][0]; + + if (config->conf.ncuda != 0) + _starpu_cuda_discover_devices(config); + if (config->conf.nhip != 0) + _starpu_hip_discover_devices(config); + if (config->conf.nopencl != 0) + _starpu_opencl_discover_devices(config); + if (config->conf.nmax_fpga != 0) + _starpu_max_fpga_discover_devices(config); +#ifdef STARPU_USE_MPI_MASTER_SLAVE + config->topology.nhwdevices[STARPU_MPI_MS_WORKER] = _starpu_mpi_src_get_device_count(); +#endif +#ifdef STARPU_USE_TCPIP_MASTER_SLAVE + config->topology.nhwdevices[STARPU_TCPIP_MS_WORKER] = _starpu_tcpip_src_get_device_count(); +#endif + + topology_is_initialized = 1; +} + +/* + * Bind workers on the different processors + */ +static void _starpu_initialize_workers_bindid(struct _starpu_machine_config *config) +{ + char *strval; + unsigned i; + + struct _starpu_machine_topology *topology = &config->topology; + STARPU_ASSERT_MSG(topology->nhwworker[STARPU_CPU_WORKER][0], "Unexpected value for topology->nhwworker[STARPU_CPU_WORKER][0] %u", topology->nhwworker[STARPU_CPU_WORKER][0]); + int nhyperthreads = topology->nhwpus / topology->nhwworker[STARPU_CPU_WORKER][0]; + int scale = 1; + + config->current_bindid = 0; + + if (starpu_getenv("STARPU_WORKERS_CPUID") && starpu_getenv("STARPU_WORKERS_COREID")) + { + _STARPU_DISP("Warning: STARPU_WORKERS_CPUID and STARPU_WORKERS_COREID cannot be set at the same time. STARPU_WORKERS_CPUID will be used.\n"); + } + + if (topology->nhwpus % topology->nhwworker[STARPU_CPU_WORKER][0]) + { + _STARPU_DISP("Warning: hwloc reported %d logical CPUs for %d cores, this is not homogeneous, will assume %d logical CPUs per core\n", topology->nhwpus, topology->nhwworker[STARPU_CPU_WORKER][0], nhyperthreads); + } + + /* conf->workers_bindid indicates the successive logical PU identifier that + * should be used to bind the workers. It should be either filled + * according to the user's explicit parameters (from starpu_conf) or + * according to the STARPU_WORKERS_CPUID env. variable. Otherwise, a + * round-robin policy is used to distributed the workers over the + * cores. */ + + /* what do we use, explicit value, env. variable, or round-robin ? */ + strval = starpu_getenv("STARPU_WORKERS_CPUID"); + if (strval == NULL) + { + strval = starpu_getenv("STARPU_WORKERS_COREID"); + if (strval) + scale = nhyperthreads; + } + + if (strval) + { + /* STARPU_WORKERS_CPUID certainly contains less entries than + * STARPU_NMAXWORKERS, so we reuse its entries in a round + * robin fashion: "1 2" is equivalent to "1 2 1 2 1 2 .... 1 + * 2". */ + unsigned wrap = 0; + unsigned number_of_entries = 0; + + char *endptr; + /* we use the content of the STARPU_WORKERS_CPUID + * env. variable */ + for (i = 0; i < STARPU_NMAXWORKERS; i++) + { + if (!wrap) + { + long int val; + val = strtol(strval, &endptr, 10); + if (endptr != strval) + { + if (scale > 1) + { +#if defined(STARPU_HAVE_HWLOC) + if (config->topology.log_coreset && + !hwloc_bitmap_isset(config->topology.log_coreset, val)) + _STARPU_DISP("Warning: logical core id %ld is not in the CPU binding provided by the OS\n", val); +#endif + if (val * scale >= topology->nhwpus) + _STARPU_DISP("Warning: logical core id %ld is beyond the number of cores (%d), will wrap around it\n", val, topology->nhwpus / scale); + } + else + { +#if defined(STARPU_HAVE_HWLOC) + if (config->topology.log_cpuset && + !hwloc_bitmap_isset(config->topology.log_cpuset, val)) + _STARPU_DISP("Warning: logical CPU id %ld is not in the CPU binding provided by the OS\n", val); +#endif + if (val >= topology->nhwpus) + _STARPU_DISP("Warning: logical CPU id %ld is beyond the number of CPUs (%d), will wrap around it\n", val, topology->nhwpus); + } + topology->workers_bindid[i] = (unsigned)((val * scale) % topology->nhwpus); + strval = endptr; + if (*strval == '-') + { + /* range of values */ + long int endval; + strval++; + if (*strval && *strval != ' ' && *strval != ',') + { + endval = strtol(strval, &endptr, 10); + strval = endptr; + } + else + { + endval = topology->nhwpus / scale - 1; + if (*strval) + strval++; + } + for (val++; val <= endval && i < STARPU_NMAXWORKERS-1; val++) + { + i++; + topology->workers_bindid[i] = (unsigned)((val * scale) % topology->nhwpus); + } + } + number_of_entries = i+1; + if (*strval == ',') + strval++; + } + else + { + /* there must be at least one entry */ + STARPU_ASSERT(i != 0); + number_of_entries = i; + + /* there is no more values in the + * string */ + wrap = 1; + + topology->workers_bindid[i] = + topology->workers_bindid[0]; + } + } + else + { + topology->workers_bindid[i] = + topology->workers_bindid[i % number_of_entries]; + } + } + topology->workers_nbindid = number_of_entries; + } + else if (config->conf.use_explicit_workers_bindid) + { + /* we use the explicit value from the user */ + memcpy(topology->workers_bindid, + config->conf.workers_bindid, + STARPU_NMAXWORKERS*sizeof(unsigned)); + topology->workers_nbindid = STARPU_NMAXWORKERS; + } + else + { + int nth_per_core = starpu_getenv_number_default("STARPU_NTHREADS_PER_CORE", 1); + int k; + int nbindids=0; + STARPU_ASSERT_MSG(nth_per_core > 0 && nth_per_core <= nhyperthreads , "Incorrect number of hyperthreads"); + + i = 0; /* PU number currently assigned */ + k = 0; /* Number of threads already put on the current core */ + while(i < topology->nusedpus) + { + if (k >= nth_per_core) + { + /* We have already put enough workers on this + * core, skip remaining PUs from this core, and + * proceed with next core */ + i += nhyperthreads-nth_per_core; + k = 0; + continue; + } + + /* Add a worker to this core, by using this logical PU */ + unsigned allocated = topology->firstusedpu + (unsigned)i; + +#if defined(STARPU_HAVE_HWLOC) + if (config->topology.log_cpuset && + !hwloc_bitmap_isset(config->topology.log_cpuset, allocated)) + _STARPU_DISP("Warning: logical CPU id %u is not in the CPU binding provided by the OS, did you specify an STARPU_NTHREADS_PER_CORE value that is not covered by the OS-provided CPU binding?\n", allocated); +#endif + + topology->workers_bindid[nbindids++] = allocated; + k++; + i++; + } + topology->workers_nbindid = nbindids; + } + + for (i = 0; i < STARPU_MAXCPUS;i++) + cpu_worker[i] = STARPU_NOWORKERID; + + /* no binding yet */ + memset(&config->currently_bound, 0, sizeof(config->currently_bound)); + memset(&config->currently_shared, 0, sizeof(config->currently_shared)); +} + +static void _starpu_deinitialize_workers_bindid(struct _starpu_machine_config *config STARPU_ATTRIBUTE_UNUSED) +{ + unsigned i; + + for (i = 0; i < STARPU_MAXCPUS;i++) + { + if (cpu_name[i]) + { + free(cpu_name[i]); + cpu_name[i] = NULL; + } + } + +} + +unsigned _starpu_get_next_bindid(struct _starpu_machine_config *config, unsigned flags, + unsigned *preferred_binding, unsigned npreferred) +{ + struct _starpu_machine_topology *topology = &config->topology; + + STARPU_ASSERT_MSG(topology_is_initialized, "The StarPU core is not initialized yet, have you called starpu_init?"); + + unsigned current_preferred; + unsigned nhyperthreads = topology->nhwpus / topology->nhwworker[STARPU_CPU_WORKER][0]; + unsigned workers_nbindid = topology->workers_nbindid; + unsigned i; + + if (npreferred) + { + STARPU_ASSERT_MSG(preferred_binding, "Passing NULL pointer for parameter preferred_binding with a non-0 value of parameter npreferred"); + } + + /* loop over the preference list */ + for (current_preferred = 0; + current_preferred < npreferred; + current_preferred++) + { + /* can we bind the worker on the preferred core ? */ + unsigned requested_core = preferred_binding[current_preferred]; + unsigned requested_bindid = requested_core * nhyperthreads; + + /* Look at the remaining PUs to be bound to */ + for (i = 0; i < workers_nbindid; i++) + { + if (topology->workers_bindid[i] == requested_bindid) + { + if ((!config->currently_bound[i] || + (config->currently_shared[i] && !(flags & STARPU_THREAD_ACTIVE)))) + { + /* the PU is available, or shareable with us, we use it ! */ + _STARPU_DEBUG("PU %d is %sbound and %sshared and we %sshare, use it\n", requested_bindid, + config->currently_bound[i] ? "" : "not ", + config->currently_shared[i] ? "" : "not ", + flags & STARPU_THREAD_ACTIVE ? "don't ": ""); + config->currently_bound[i] = 1; + if (!(flags & STARPU_THREAD_ACTIVE)) + config->currently_shared[i] = 1; + return requested_bindid; + } + break; + } + } + } + + if (!(flags & STARPU_THREAD_ACTIVE)) + { + /* Try to find a shareable PU */ + for (i = 0; i < workers_nbindid; i++) + if (config->currently_shared[i]) + { + _STARPU_DEBUG("PU %d is available for sharing\n", topology->workers_bindid[i]); + return topology->workers_bindid[i]; + } + } + + /* Try to find an available PU from last used PU */ + for (i = config->current_bindid; i < workers_nbindid; i++) + if (!config->currently_bound[i]) + /* Found a cpu ready for use, use it! */ + break; + + if (i == workers_nbindid) + { + _STARPU_DEBUG("Looped over %d cpus, restarting from 0\n", workers_nbindid); + /* Finished binding on all cpus, restart from start in + * case the user really wants overloading */ + memset(&config->currently_bound, 0, sizeof(config->currently_bound)); + i = 0; + } + + STARPU_ASSERT(i < workers_nbindid); + unsigned bindid = topology->workers_bindid[i]; + _STARPU_DEBUG("binding on PU %d\n", bindid); + config->currently_bound[i] = 1; + if (!(flags & STARPU_THREAD_ACTIVE)) + config->currently_shared[i] = 1; + config->current_bindid = i; + return bindid; +} + +unsigned starpu_get_next_bindid(unsigned flags, unsigned *preferred, unsigned npreferred) +{ + return _starpu_get_next_bindid(_starpu_get_machine_config(), flags, preferred, npreferred); +} + +unsigned _starpu_topology_get_nhwcpu(struct _starpu_machine_config *config) +{ + _starpu_init_topology(config); + + return config->topology.nhwworker[STARPU_CPU_WORKER][0]; +} + +unsigned _starpu_topology_get_nhwpu(struct _starpu_machine_config *config) +{ + _starpu_init_topology(config); + + return config->topology.nhwpus; +} + +unsigned _starpu_topology_get_nhwnumanodes(struct _starpu_machine_config *config STARPU_ATTRIBUTE_UNUSED) +{ +#if defined(STARPU_HAVE_HWLOC) + _starpu_init_topology(config); + + struct _starpu_machine_topology *topology = &config->topology; + int nnumanodes = hwloc_get_nbobjs_by_type(topology->hwtopology, HWLOC_OBJ_NUMANODE); + unsigned res = nnumanodes > 0 ? nnumanodes : 1; + + if (res > STARPU_MAXNUMANODES) + { + _STARPU_DISP("Warning: Number of NUMA nodes discovered %d is higher than configured %d, reducing to that. Use configure option --enable-maxnumanodes=xxx to increase the maximum value of supported NUMA nodes.\n", res, STARPU_MAXNUMANODES); + res = STARPU_MAXNUMANODES; + } + return res; +#else + return 1; +#endif +} + +unsigned _starpu_topology_get_nnumanodes(struct _starpu_machine_config *config STARPU_ATTRIBUTE_UNUSED) +{ + unsigned res; +#if defined(STARPU_HAVE_HWLOC) + if (numa_enabled == -1) + numa_enabled = starpu_getenv_number_default("STARPU_USE_NUMA", 0); + if (numa_enabled) + res = _starpu_topology_get_nhwnumanodes(config); + else +#endif + res = 1; + + return res; +} + +#if defined(STARPU_HAVE_HWLOC) +/* Record the logical numbers of the cores within this obj */ +static unsigned _starpu_topology_get_core_binding(unsigned *binding, unsigned nbinding, hwloc_obj_t obj) +{ + unsigned found = 0; + unsigned n; + + if (nbinding && obj->type == HWLOC_OBJ_CORE) + { + *binding = obj->logical_index; + found++; + } + + for (n = 0; n < obj->arity; n++) + { + found += _starpu_topology_get_core_binding(binding + found, nbinding - found, obj->children[n]); + } + return found; +} +#endif + +/* Record the logical numbers of the cores within these numa nodes */ +unsigned _starpu_topology_get_numa_core_binding(struct _starpu_machine_config *config STARPU_ATTRIBUTE_UNUSED, const unsigned *numa_binding STARPU_ATTRIBUTE_UNUSED, unsigned nnuma STARPU_ATTRIBUTE_UNUSED, unsigned *binding STARPU_ATTRIBUTE_UNUSED, unsigned nbinding STARPU_ATTRIBUTE_UNUSED) +{ +#if defined(STARPU_HAVE_HWLOC) + unsigned n; + unsigned cur = 0; + + for (n = 0; n < nnuma; n++) + { + hwloc_obj_t obj = hwloc_get_obj_by_type(config->topology.hwtopology, HWLOC_OBJ_NUMANODE, numa_binding[n]); + + if (!obj) + { + /* NUMA nodes not available, fall back to the whole machine */ + return _starpu_topology_get_core_binding(binding, nbinding, hwloc_get_root_obj(config->topology.hwtopology)); + } + +#if HWLOC_API_VERSION >= 0x00020000 + /* Get the actual topology object */ + obj = obj->parent; +#endif + cur += _starpu_topology_get_core_binding(binding + cur, nbinding - cur, obj); + if (cur == nbinding) + break; + } + return cur; +#else + return 0; +#endif +} + +#ifdef STARPU_HAVE_HWLOC +void _starpu_topology_filter(hwloc_topology_t topology) +{ +#if HWLOC_API_VERSION >= 0x20000 + hwloc_topology_set_io_types_filter(topology, HWLOC_TYPE_FILTER_KEEP_ALL); + hwloc_topology_set_flags(topology, HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM); +#else + hwloc_topology_set_flags(topology, HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM | HWLOC_TOPOLOGY_FLAG_WHOLE_IO); +#endif +#ifdef HAVE_HWLOC_TOPOLOGY_SET_COMPONENTS +/* Driver porters: adding your driver here is optional, it is just to avoid hwloc components which take a lot of time to start. */ +# ifndef STARPU_USE_CUDA + hwloc_topology_set_components(topology, HWLOC_TOPOLOGY_COMPONENTS_FLAG_BLACKLIST, "cuda"); + hwloc_topology_set_components(topology, HWLOC_TOPOLOGY_COMPONENTS_FLAG_BLACKLIST, "nvml"); +# endif +# ifndef STARPU_USE_HIP + hwloc_topology_set_components(topology, HWLOC_TOPOLOGY_COMPONENTS_FLAG_BLACKLIST, "hip"); + hwloc_topology_set_components(topology, HWLOC_TOPOLOGY_COMPONENTS_FLAG_BLACKLIST, "rsmi"); +# endif +# ifndef STARPU_USE_OPENCL + hwloc_topology_set_components(topology, HWLOC_TOPOLOGY_COMPONENTS_FLAG_BLACKLIST, "opencl"); +# endif +#endif +} +#endif + +void _starpu_topology_check_ndevices(int *ndevices, unsigned nhwdevices, int overflow, unsigned max, int reserved, const char *nname, const char *dname, const char *configurename) +{ + if (!*ndevices) + return; + + STARPU_ASSERT_MSG(*ndevices >= -1, "%s can not be negative and different from -1 (is is %d)", nname, *ndevices); + + if (*ndevices == -1) + { + /* Nothing was specified, so let's choose ! */ + + if (reserved > 0) + { + if (nhwdevices < (unsigned) reserved) + { + _STARPU_DISP("Warning: %u %s devices were requested to be reserved, but only %d were available,\n", reserved, dname, nhwdevices); + nhwdevices = 0; + } + else + { + nhwdevices -= reserved; + } + } + + if (nhwdevices > max) + { + _STARPU_MSG("# Warning: %u %s devices available. Only %u enabled. Use configure option --enable-%s=xxx to update the maximum value of supported %s devices.\n", nhwdevices, dname, max, configurename, dname); + nhwdevices = max; + } + *ndevices = nhwdevices; + } + else + { + if (!overflow && *ndevices > (int) nhwdevices) + { + /* The user requires more devices than there is available */ + _STARPU_DISP("Warning: %d %s devices requested. Only %d available.\n", *ndevices, dname, nhwdevices); + *ndevices = nhwdevices; + } + + if (reserved > 0) + { + if (*ndevices < (int) reserved) + { + _STARPU_DISP("Warning: %u %s devices were requested to be reserved, but only %d were configured,\n", reserved, dname, *ndevices); + *ndevices = 0; + } + else + *ndevices -= reserved; + } + + /* Let's make sure this value is OK. */ + if (*ndevices > (int) max) + { + _STARPU_DISP("Warning: %d %s devices requested. Only %d enabled. Use configure option --enable-%s=xxx to update the maximum value of supported %s devices.\n", *ndevices, dname, max, configurename, dname); + *ndevices = max; + } + } +} + +void _starpu_topology_configure_workers(struct _starpu_machine_topology *topology, + struct _starpu_machine_config *config, + enum starpu_worker_archtype type, + int devnum, int devid, + int homogeneous, int worker_devid, + unsigned nworker_per_device, + unsigned ncores, + struct _starpu_worker_set *worker_set, + struct _starpu_worker_set *driver_worker_set) +{ + topology->nworker[type][devnum] = nworker_per_device; + topology->devid[type][devnum] = devid; + + unsigned i; + + for (i = 0; i < nworker_per_device; i++) + { + if (topology->nworkers == STARPU_NMAXWORKERS) + // We are full + break; + + int worker_idx = topology->nworkers++; + + if (worker_set == ALLOC_WORKER_SET) + { + /* Just one worker in the set */ + _STARPU_CALLOC(config->workers[worker_idx].set, 1, sizeof(struct _starpu_worker_set)); + config->workers[worker_idx].set->workers = &config->workers[worker_idx]; + config->workers[worker_idx].set->nworkers = 1; + if (type != STARPU_CPU_WORKER) + _starpu_cpu_busy_cpu(1); + } + else + { + config->workers[worker_idx].set = worker_set; + if ((!worker_set || worker_set->workers == &config->workers[worker_idx]) + && (!driver_worker_set || driver_worker_set == worker_set) + && type != STARPU_CPU_WORKER) + _starpu_cpu_busy_cpu(1); + } + + config->workers[worker_idx].driver_worker_set = driver_worker_set; + config->workers[worker_idx].arch = type; + _STARPU_MALLOC(config->workers[worker_idx].perf_arch.devices, sizeof(struct starpu_perfmodel_device)); + config->workers[worker_idx].perf_arch.ndevices = 1; + config->workers[worker_idx].perf_arch.devices[0].type = type; + config->workers[worker_idx].perf_arch.devices[0].devid = homogeneous ? 0 : worker_devid ? (int) i : devid; + config->workers[worker_idx].perf_arch.devices[0].ncores = ncores; + config->workers[worker_idx].devid = worker_devid ? (int) i : devid; + config->workers[worker_idx].devnum = worker_devid ? (int) i : devnum; + config->workers[worker_idx].subworkerid = worker_devid ? 0 : i; + config->workers[worker_idx].worker_mask = STARPU_WORKER_TO_MASK(type); + config->worker_mask |= STARPU_WORKER_TO_MASK(type); + } +} + +#ifdef STARPU_HAVE_HWLOC +static unsigned _starpu_topology_count_ngpus(hwloc_obj_t obj) +{ + struct _starpu_hwloc_userdata *data = obj->userdata; + unsigned n = data->ngpus; + unsigned i; + + for (i = 0; i < obj->arity; i++) + n += _starpu_topology_count_ngpus(obj->children[i]); +#if HWLOC_API_VERSION >= 0x00020000 + hwloc_obj_t child; + for (child = obj->io_first_child; child; child = child->next_sibling) + n += _starpu_topology_count_ngpus(child); +#endif + + data->ngpus = n; +//#ifdef STARPU_VERBOSE +// { +// char name[64]; +// hwloc_obj_type_snprintf(name, sizeof(name), obj, 0); +// _STARPU_DEBUG("hwloc obj %s has %u GPUs below\n", name, n); +// } +//#endif + return n; +} +#endif + +static int _starpu_init_machine_config(struct _starpu_machine_config *config, int no_mp_config STARPU_ATTRIBUTE_UNUSED) +{ + int i; + + for (i = 0; i < STARPU_NMAXWORKERS; i++) + { + config->workers[i].workerid = i; + config->workers[i].set = NULL; + } + + struct _starpu_machine_topology *topology = &config->topology; + + topology->nworkers = 0; + topology->ncombinedworkers = 0; + topology->nsched_ctxs = 0; + + _starpu_init_topology(config); + + _starpu_initialize_workers_bindid(config); + + /* Reserve thread for main() */ + main_thread_cpuid = starpu_getenv_number_default("STARPU_MAIN_THREAD_CPUID", -1); + int main_thread_coreid = starpu_getenv_number_default("STARPU_MAIN_THREAD_COREID", -1); + if (main_thread_cpuid >= 0 && main_thread_coreid >= 0) + { + _STARPU_DISP("Warning: STARPU_MAIN_THREAD_CPUID and STARPU_MAIN_THREAD_COREID cannot be set at the same time. STARPU_MAIN_THREAD_CPUID will be used.\n"); + } + if (main_thread_cpuid == -1 && main_thread_coreid >= 0) + main_thread_cpuid = main_thread_coreid * _starpu_get_nhyperthreads(); + if (main_thread_coreid == -1 && main_thread_cpuid >= 0) + main_thread_coreid = main_thread_cpuid / _starpu_get_nhyperthreads(); + int main_thread_bind = starpu_getenv_number_default("STARPU_MAIN_THREAD_BIND", 0); + int main_thread_activity = STARPU_NONACTIVETHREAD; + if (main_thread_bind) + { + main_thread_activity = STARPU_ACTIVETHREAD; + if (main_thread_cpuid == -1) + main_thread_cpuid = starpu_get_next_bindid(STARPU_THREAD_ACTIVE, NULL, 0); + else + { + unsigned coreid = main_thread_coreid; + unsigned got_cpuid = starpu_get_next_bindid(STARPU_THREAD_ACTIVE, &coreid, 1); + if (got_cpuid != (unsigned) main_thread_cpuid) + _STARPU_DISP("Warning: Could not reserve requested logical core %d (logical cpu %d) for main, got %d instead\n", main_thread_coreid, main_thread_cpuid, got_cpuid); + } + } + if (main_thread_cpuid >= 0) + _starpu_bind_thread_on_cpu(main_thread_cpuid, main_thread_activity, "main"); + + /* Reserve thread for MPI */ + int mpi_thread_cpuid = starpu_getenv_number_default("STARPU_MPI_THREAD_CPUID", -1); + int mpi_thread_coreid = starpu_getenv_number_default("STARPU_MPI_THREAD_COREID", -1); + + if (mpi_thread_coreid == -1 && mpi_thread_cpuid >= 0) + mpi_thread_coreid = mpi_thread_cpuid / _starpu_get_nhyperthreads(); + if (mpi_thread_cpuid == -1 && mpi_thread_coreid >= 0) + mpi_thread_cpuid = mpi_thread_coreid * _starpu_get_nhyperthreads(); + + if (mpi_thread_coreid >= 0) + { + unsigned coreid = mpi_thread_coreid; + unsigned got_cpuid = starpu_get_next_bindid(STARPU_THREAD_ACTIVE, &coreid, 1); + if (got_cpuid != (unsigned) mpi_thread_cpuid) + _STARPU_DISP("Warning: Could not reserve requested logical core %d (logical cpu %d) for MPI, got %d instead\n", mpi_thread_coreid, mpi_thread_cpuid, got_cpuid); + } + +#if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID) + _starpu_init_cuda_config(topology, config); +#endif + +#if defined(STARPU_USE_HIP) + _starpu_init_hip_config(topology, config); +#endif + +/* We put the OpenCL section after the CUDA section: we rather use NVidia GPUs in CUDA mode than in OpenCL mode */ +#if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID) + _starpu_init_opencl_config(topology, config); +#endif + +#ifdef STARPU_USE_MAX_FPGA + _starpu_init_max_fpga_config(topology, config); +#endif + +#if defined(STARPU_USE_MPI_MASTER_SLAVE) + _starpu_init_mpi_config(topology, config, &config->conf, no_mp_config); +#endif +#if defined(STARPU_USE_TCPIP_MASTER_SLAVE) + _starpu_init_tcpip_config(topology, config, &config->conf, no_mp_config); +#endif + +/* we put the CPU section after the accelerator : in case there was an + * accelerator found, we devote one cpu */ +#if defined(STARPU_USE_CPU) || defined(STARPU_SIMGRID) + _starpu_init_cpu_config(topology, config); +#endif + + if (topology->nworkers == 0) + { + _STARPU_DEBUG("No worker found, aborting ...\n"); + return -ENODEV; + } + return 0; +} + +void _starpu_destroy_machine_config(struct _starpu_machine_config *config, int no_mp_config) +{ + _starpu_close_debug_logfile(); + + unsigned worker; + if (!no_mp_config) + for (worker = 0; worker < config->topology.nworkers; worker++) + { + struct _starpu_worker *workerarg = &config->workers[worker]; + int bindid = workerarg->bindid; + free(workerarg->perf_arch.devices); +#ifdef STARPU_HAVE_HWLOC + hwloc_bitmap_free(workerarg->hwloc_cpu_set); + if (bindid != -1) + { + hwloc_obj_t worker_obj = hwloc_get_obj_by_depth(config->topology.hwtopology, + config->pu_depth, + bindid); + struct _starpu_hwloc_userdata *data = worker_obj->userdata; + if (data->worker_list) + { + _starpu_worker_list_delete(data->worker_list); + data->worker_list = NULL; + } + } +#endif + if (bindid != -1) + { + free(config->bindid_workers[bindid].workerids); + config->bindid_workers[bindid].workerids = NULL; + } + } + free(config->bindid_workers); + config->bindid_workers = NULL; + config->nbindid = 0; + unsigned combined_worker_id; + for(combined_worker_id=0 ; combined_worker_id < config->topology.ncombinedworkers ; combined_worker_id++) + { + struct _starpu_combined_worker *combined_worker = &config->combined_workers[combined_worker_id]; +#ifdef STARPU_HAVE_HWLOC + hwloc_bitmap_free(combined_worker->hwloc_cpu_set); +#endif + free(combined_worker->perf_arch.devices); + } + +#ifdef STARPU_HAVE_HWLOC + _starpu_deallocate_topology_userdata(hwloc_get_root_obj(config->topology.hwtopology)); + hwloc_bitmap_free(config->topology.log_cpuset); + hwloc_bitmap_free(config->topology.log_coreset); + hwloc_topology_destroy(config->topology.hwtopology); +#endif + + topology_is_initialized = 0; + + _starpu_devices_gpu_clean(); + + int i; + for (i=0; itopology.hwtopology, config->pu_depth, cpuid); + hwloc_bitmap_t set = obj->cpuset; + + return hwloc_bitmap_first(set); +#else + return cpuid; +#endif +} + +void _starpu_do_bind_thread_on_cpu(int cpuid STARPU_ATTRIBUTE_UNUSED) +{ +#ifndef STARPU_SIMGRID + if (nobind > 0) + return; + if (cpuid < 0) + return; + +#ifdef STARPU_HAVE_HWLOC + struct _starpu_machine_config *config = _starpu_get_machine_config(); + + const struct hwloc_topology_support *support = hwloc_topology_get_support(config->topology.hwtopology); + if (support->cpubind->set_thisthread_cpubind) + { + hwloc_obj_t obj = hwloc_get_obj_by_depth(config->topology.hwtopology, config->pu_depth, cpuid); + hwloc_bitmap_t set = obj->cpuset; + int res; + + hwloc_bitmap_singlify(set); + res = hwloc_set_cpubind(config->topology.hwtopology, set, HWLOC_CPUBIND_THREAD); + if (res) + { + perror("hwloc_set_cpubind"); + STARPU_ABORT(); + } + } +#elif defined(HAVE_PTHREAD_SETAFFINITY_NP) && defined(__linux__) + int res; + /* fix the thread on the correct cpu */ + cpu_set_t aff_mask; + CPU_ZERO(&aff_mask); + CPU_SET(cpuid, &aff_mask); + + starpu_pthread_t self = starpu_pthread_self(); + + res = pthread_setaffinity_np(self, sizeof(aff_mask), &aff_mask); + if (res) + { + const char *msg = strerror(res); + _STARPU_MSG("pthread_setaffinity_np: %s\n", msg); + STARPU_ABORT(); + } + +#elif defined(_WIN32) + DWORD mask = 1 << cpuid; + if (!SetThreadAffinityMask(GetCurrentThread(), mask)) + { + _STARPU_ERROR("SetThreadMaskAffinity(%lx) failed\n", mask); + } +#else +#warning no CPU binding support +#endif +#endif +} + +int _starpu_bind_thread_on_cpu(int cpuid STARPU_ATTRIBUTE_UNUSED, int workerid STARPU_ATTRIBUTE_UNUSED, const char *name STARPU_ATTRIBUTE_UNUSED) +{ + int ret = 0; + +#ifndef STARPU_SIMGRID + if (nobind > 0) + return ret; + if (cpuid < 0) + return ret; + +#ifdef STARPU_HAVE_HWLOC + struct _starpu_machine_config *config = _starpu_get_machine_config(); + + _starpu_init_topology(config); + + if (workerid != STARPU_NOWORKERID && cpuid < STARPU_MAXCPUS) + { +/* TODO: mutex... */ + int previous = cpu_worker[cpuid]; + /* We would like the PU to be available, or we are perhaps fine to share it */ + if (!(previous == STARPU_NOWORKERID || + (previous == STARPU_NONACTIVETHREAD && workerid == STARPU_NONACTIVETHREAD) || + (previous >= 0 && previous == workerid) || + (name && cpu_name[cpuid] && !strcmp(name, cpu_name[cpuid])))) + { + char hostname[65]; + gethostname(hostname, sizeof(hostname)); + + if (previous == STARPU_ACTIVETHREAD) + _STARPU_DISP("[%s] Warning: active thread %s was already bound to PU %d\n", hostname, cpu_name[cpuid], cpuid); + else if (previous == STARPU_NONACTIVETHREAD) + _STARPU_DISP("[%s] Warning: non-active thread %s was already bound to PU %d\n", hostname, cpu_name[cpuid], cpuid); + else + _STARPU_DISP("[%s] Warning: worker %d was already bound to PU %d\n", hostname, previous, cpuid); + + if (workerid == STARPU_ACTIVETHREAD) + _STARPU_DISP("and we were told to also bind active thread %s to it.\n", name); + else if (workerid == STARPU_NONACTIVETHREAD) + _STARPU_DISP("and we were told to also bind non-active thread %s to it.\n", name); + else + _STARPU_DISP("and we were told to also bind worker %d to it.\n", workerid); + + _STARPU_DISP("This will strongly degrade performance.\n"); + + if (workerid >= 0) + /* This shouldn't happen for workers */ + _STARPU_DISP("[%s] Maybe check starpu_machine_display's output to determine what wrong binding happened. Hwloc reported a total of %d cores and %d threads, and to use %d threads from logical %d, perhaps there is misdetection between hwloc, the kernel and the BIOS, or an administrative allocation issue from e.g. the job scheduler? You may want to try to use export STARPU_WORKERS_GETBIND=0 to ignore the job scheduler binding\n", hostname, config->topology.nhwworker[STARPU_CPU_WORKER][0], config->topology.nhwpus, config->topology.nusedpus, config->topology.firstusedpu); + ret = -1; + } + else + { + cpu_worker[cpuid] = workerid; + if (name) + { + if (cpu_name[cpuid]) + free(cpu_name[cpuid]); + cpu_name[cpuid] = strdup(name); + } + } + } +#endif + + _starpu_do_bind_thread_on_cpu(cpuid); +#endif + return ret; +} + +int +starpu_bind_thread_on(int cpuid, unsigned flags, const char *name) +{ + int workerid; + STARPU_ASSERT_MSG(name, "starpu_bind_thread_on must be provided with a name"); + starpu_pthread_setname(name); + if (flags & STARPU_THREAD_ACTIVE) + workerid = STARPU_ACTIVETHREAD; + else + workerid = STARPU_NONACTIVETHREAD; + return _starpu_bind_thread_on_cpu(cpuid, workerid, name); +} + +void _starpu_bind_thread_on_cpus(struct _starpu_combined_worker *combined_worker STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_SIMGRID + return; +#endif +#ifdef STARPU_HAVE_HWLOC + const struct hwloc_topology_support *support; + struct _starpu_machine_config *config = _starpu_get_machine_config(); + + _starpu_init_topology(config); + + support = hwloc_topology_get_support(config->topology.hwtopology); + if (support->cpubind->set_thisthread_cpubind) + { + hwloc_bitmap_t set = combined_worker->hwloc_cpu_set; + int ret; + + ret = hwloc_set_cpubind(config->topology.hwtopology, set, HWLOC_CPUBIND_THREAD); + if (ret) + { + perror("binding thread"); + STARPU_ABORT(); + } + } +#else +#ifdef __GLIBC__ + sched_setaffinity(0,sizeof(combined_worker->cpu_set),&combined_worker->cpu_set); +#else +# warning no parallel worker CPU binding support +#endif +#endif +} + +void starpu_bind_thread_on_main(void) +{ + _starpu_do_bind_thread_on_cpu(main_thread_cpuid); +} + +void starpu_bind_thread_on_cpu(int cpuid) +{ + _starpu_do_bind_thread_on_cpu(cpuid); +} + +void starpu_bind_thread_on_worker(unsigned workerid) +{ + unsigned basic_worker_count = starpu_worker_get_count(); + if (workerid < basic_worker_count) + _starpu_do_bind_thread_on_cpu(starpu_worker_get_bindid(workerid)); + else + _starpu_bind_thread_on_cpus(_starpu_get_combined_worker_struct(workerid)); +} + +static size_t _starpu_cpu_get_global_mem_size(int nodeid, struct _starpu_machine_config *config) +{ + size_t global_mem; + starpu_ssize_t limit = -1; + +#if defined(STARPU_HAVE_HWLOC) + struct _starpu_machine_topology *topology = &config->topology; + + STARPU_ASSERT(numa_enabled != -1); + if (numa_enabled) + { + int depth_node = hwloc_get_type_depth(topology->hwtopology, HWLOC_OBJ_NUMANODE); + + if (depth_node == HWLOC_TYPE_DEPTH_UNKNOWN) + { +#if HWLOC_API_VERSION >= 0x00020000 + global_mem = hwloc_get_root_obj(topology->hwtopology)->total_memory; +#else + global_mem = hwloc_get_root_obj(topology->hwtopology)->memory.total_memory; +#endif + } + else + { + char name[32]; + hwloc_obj_t obj = hwloc_get_obj_by_depth(topology->hwtopology, depth_node, nodeid); +#if HWLOC_API_VERSION >= 0x00020000 + global_mem = obj->attr->numanode.local_memory; +#else + global_mem = obj->memory.local_memory; +#endif + snprintf(name, sizeof(name), "STARPU_LIMIT_CPU_NUMA_%d_MEM", obj->os_index); + limit = starpu_getenv_number(name); + } + } + else + { + /* Do not limit ourself to a single NUMA node */ +#if HWLOC_API_VERSION >= 0x00020000 + global_mem = hwloc_get_root_obj(topology->hwtopology)->total_memory; +#else + global_mem = hwloc_get_root_obj(topology->hwtopology)->memory.total_memory; +#endif + } + +#else /* STARPU_HAVE_HWLOC */ +#ifdef STARPU_DEVEL +# warning TODO: use sysinfo when available to get global size +#endif + global_mem = 0; +#endif + + if (limit == -1) + limit = starpu_getenv_number("STARPU_LIMIT_CPU_NUMA_MEM"); + + if (limit == -1) + { + limit = starpu_getenv_number("STARPU_LIMIT_CPU_MEM"); + if (limit != -1 && numa_enabled) + { + _STARPU_DISP("NUMA is enabled and STARPU_LIMIT_CPU_MEM is set to %luMB. Assuming that it should be distributed over the %d NUMA node(s). You probably want to use STARPU_LIMIT_CPU_NUMA_MEM instead.\n", (long) limit, _starpu_topology_get_nnumanodes(config)); + limit /= _starpu_topology_get_nnumanodes(config); + } + } + + /* Don't eat all memory for ourself */ + global_mem *= 0.9; + + if (limit < 0) + // No limit is defined, we return the global memory size + return global_mem; + else if (global_mem && (size_t)limit * 1024*1024 > global_mem) + { + if (numa_enabled) + _STARPU_DISP("The requested limit %ldMB for NUMA node %d is higher that available memory %luMB, using the latter\n", (unsigned long) limit, nodeid, (unsigned long) global_mem / (1024*1024)); + else + _STARPU_DISP("The requested limit %ldMB is higher that available memory %luMB, using the latter\n", (long) limit, (unsigned long) global_mem / (1024*1024)); + return global_mem; + } + else + // We limit the memory + return limit*1024*1024; +} + +//TODO : Check SIMGRID +static void _starpu_init_numa_node(struct _starpu_machine_config *config) +{ + nb_numa_nodes = 0; + + unsigned i; + for (i = 0; i < STARPU_MAXNUMANODES; i++) + { + numa_memory_nodes_to_hwloclogid[i] = STARPU_NUMA_UNINITIALIZED; + numa_memory_nodes_to_physicalid[i] = STARPU_NUMA_UNINITIALIZED; + } + +#ifdef STARPU_SIMGRID + char name[16]; + starpu_sg_host_t host; +#endif + + numa_enabled = starpu_getenv_number_default("STARPU_USE_NUMA", 0); + /* NUMA mode activated */ + if (numa_enabled) + { + /* Take all NUMA nodes used by CPU workers */ + unsigned worker; + for (worker = 0; worker < config->topology.nworkers; worker++) + { + struct _starpu_worker *workerarg = &config->workers[worker]; + if (workerarg->arch == STARPU_CPU_WORKER) + { + int numa_logical_id = _starpu_get_logical_numa_node_worker(worker); + + /* Convert logical id to StarPU id to check if this NUMA node is already saved or not */ + int numa_starpu_id = starpu_memory_nodes_numa_hwloclogid_to_id(numa_logical_id); + + /* This shouldn't happen */ + if (numa_starpu_id == -1 && nb_numa_nodes == STARPU_MAXNUMANODES) + { + _STARPU_MSG("Warning: %u NUMA nodes available. Only %u enabled. Use configure option --enable-maxnumanodes=xxx to update the maximum value of supported NUMA nodes.\n", _starpu_topology_get_nnumanodes(config), STARPU_MAXNUMANODES); + STARPU_ABORT(); + } + + if (numa_starpu_id == -1) + { + int devid = numa_logical_id == STARPU_NUMA_MAIN_RAM ? 0 : numa_logical_id; + int memnode = _starpu_memory_node_register(STARPU_CPU_RAM, devid); + _starpu_memory_manager_set_global_memory_size(memnode, _starpu_cpu_get_global_mem_size(devid, config)); + STARPU_ASSERT_MSG_ALWAYS(memnode < STARPU_MAXNUMANODES, "Wrong Memory Node : %d (only %d available)", memnode, STARPU_MAXNUMANODES); + _starpu_memory_node_set_mapped(memnode); + numa_memory_nodes_to_hwloclogid[memnode] = numa_logical_id; + int numa_physical_id = _starpu_get_physical_numa_node_worker(worker); + numa_memory_nodes_to_physicalid[memnode] = numa_physical_id; + nb_numa_nodes++; +#ifdef STARPU_SIMGRID + snprintf(name, sizeof(name), "RAM%d", memnode); + host = _starpu_simgrid_get_host_by_name(name); + STARPU_ASSERT(host); + _starpu_simgrid_memory_node_set_host(memnode, host); +#endif + } + } + } + + /* If we found NUMA nodes from CPU workers, it's good */ + if (nb_numa_nodes != 0) + return; + + _STARPU_DISP("No NUMA nodes found when checking CPU workers...\n"); + +#ifdef STARPU_HAVE_HWLOC + _STARPU_DISP("Take NUMA nodes attached to GPU devices...\n"); + + for (i = 0; i < STARPU_NARCH; i++) + { + if (!starpu_driver_info[i].get_hwloc_obj) + continue; + + unsigned j; + + for (j = 0; j < config->topology.ndevices[i]; j++) + { + hwloc_obj_t obj = starpu_driver_info[i].get_hwloc_obj(config->topology.hwtopology, + config->topology.devid[i][j]); + + if (obj) + obj = _starpu_numa_get_obj(obj); + /* Hwloc cannot recognize some devices */ + if (!obj) + continue; + int numa_starpu_id = starpu_memory_nodes_numa_hwloclogid_to_id(obj->logical_index); + + /* This shouldn't happen */ + if (numa_starpu_id == -1 && nb_numa_nodes == STARPU_MAXNUMANODES) + { + _STARPU_MSG("Warning: %u NUMA nodes available. Only %u enabled. Use configure option --enable-maxnumanodes=xxx to update the maximum value of supported NUMA nodes.\n", _starpu_topology_get_nnumanodes(config), STARPU_MAXNUMANODES); + STARPU_ABORT(); + } + + if (numa_starpu_id == -1) + { + int memnode = _starpu_memory_node_register(STARPU_CPU_RAM, obj->logical_index); + _starpu_memory_manager_set_global_memory_size(memnode, _starpu_cpu_get_global_mem_size(obj->logical_index, config)); + STARPU_ASSERT_MSG_ALWAYS(memnode < STARPU_MAXNUMANODES, "Wrong Memory Node : %d (only %d available)", memnode, STARPU_MAXNUMANODES); + _starpu_memory_node_set_mapped(memnode); + numa_memory_nodes_to_hwloclogid[memnode] = obj->logical_index; + numa_memory_nodes_to_physicalid[memnode] = obj->os_index; + nb_numa_nodes++; +#ifdef STARPU_SIMGRID + snprintf(name, sizeof(name), "RAM%d", memnode); + host = _starpu_simgrid_get_host_by_name(name); + STARPU_ASSERT(host); + _starpu_simgrid_memory_node_set_host(memnode, host); +#endif + } + } + } +#endif + } + +#ifdef STARPU_HAVE_HWLOC + //Found NUMA nodes from CUDA nodes + if (nb_numa_nodes != 0) + return; + + /* In case, we do not find any NUMA nodes when checking NUMA nodes attached to GPUs, we take all of them */ + if (numa_enabled) + _STARPU_DISP("No NUMA nodes found when checking GPUs devices...\n"); +#endif + + if (numa_enabled) + _STARPU_DISP("Finally, take all NUMA nodes available... \n"); + + unsigned nnuma = _starpu_topology_get_nnumanodes(config); + if (nnuma > STARPU_MAXNUMANODES) + { + _STARPU_MSG("Warning: %u NUMA nodes available. Only %u enabled. Use configure option --enable-maxnumanodes=xxx to update the maximum value of supported NUMA nodes.\n", _starpu_topology_get_nnumanodes(config), STARPU_MAXNUMANODES); + nnuma = STARPU_MAXNUMANODES; + } + + unsigned numa; + for (numa = 0; numa < nnuma; numa++) + { + unsigned numa_logical_id; + unsigned numa_physical_id; +#if defined(STARPU_HAVE_HWLOC) + hwloc_obj_t obj = hwloc_get_obj_by_type(config->topology.hwtopology, HWLOC_OBJ_NUMANODE, numa); + if (obj) + { + numa_logical_id = obj->logical_index; + numa_physical_id = obj->os_index; + } + else +#endif + { + numa_logical_id = 0; + numa_physical_id = 0; + } + int memnode = _starpu_memory_node_register(STARPU_CPU_RAM, numa_logical_id); + STARPU_ASSERT(memnode < STARPU_MAXNUMANODES); + _starpu_memory_manager_set_global_memory_size(memnode, _starpu_cpu_get_global_mem_size(numa_logical_id, config)); + _starpu_memory_node_set_mapped(memnode); + + numa_memory_nodes_to_hwloclogid[memnode] = numa_logical_id; + numa_memory_nodes_to_physicalid[memnode] = numa_physical_id; + nb_numa_nodes++; + + if (numa == 0) + STARPU_ASSERT_MSG(memnode == STARPU_MAIN_RAM, "Wrong Memory Node : %d (expected %d) \n", memnode, STARPU_MAIN_RAM); + STARPU_ASSERT_MSG_ALWAYS(memnode < STARPU_MAXNUMANODES, "Wrong Memory Node : %d (only %d available) \n", memnode, STARPU_MAXNUMANODES); + +#ifdef STARPU_SIMGRID + if (nnuma > 1) + { + snprintf(name, sizeof(name), "RAM%d", memnode); + host = _starpu_simgrid_get_host_by_name(name); + } + else + { + /* In this case, nnuma has only one node */ + host = _starpu_simgrid_get_host_by_name("RAM"); + } + + STARPU_ASSERT(host); + _starpu_simgrid_memory_node_set_host(memnode, host); +#endif + } + + STARPU_ASSERT_MSG(nb_numa_nodes > 0, "No NUMA node found... We need at least one memory node !\n"); +} + +static void _starpu_init_numa_bus() +{ + unsigned i, j; + for (i = 0; i < nb_numa_nodes; i++) + for (j = 0; j < nb_numa_nodes; j++) + if (i != j) + numa_bus_id[i*nb_numa_nodes+j] = _starpu_register_bus(i, j); +} + +#if defined(STARPU_HAVE_HWLOC) && !defined(STARPU_SIMGRID) +static int _starpu_find_pu_driving_numa_from(hwloc_obj_t root, unsigned node) +{ + unsigned i; + int found = 0; + + if (!root->arity) + { + if (root->type == HWLOC_OBJ_PU) + { + struct _starpu_hwloc_userdata *userdata = root->userdata; + if (userdata->pu_worker) + { + /* Cool, found a worker! */ + _STARPU_DEBUG("found PU %d to drive memory node %d\n", userdata->pu_worker->bindid, node); + _starpu_worker_drives_memory_node(userdata->pu_worker, node); + found = 1; + } + } + } + for (i = 0; i < root->arity; i++) + { + if (_starpu_find_pu_driving_numa_from(root->children[i], node)) + found = 1; + } + return found; +} + +/* Look upward to find a level containing the given NUMA node and workers to drive it */ +static int _starpu_find_pu_driving_numa_up(hwloc_obj_t root, unsigned node) +{ + if (_starpu_find_pu_driving_numa_from(root, node)) + /* Ok, we already managed to find drivers */ + return 1; + if (!root->parent) + /* And no parent!? nobody can drive this... */ + return 0; + /* Try from parent */ + return _starpu_find_pu_driving_numa_up(root->parent, node); +} +#endif + +static void _starpu_init_workers_binding_and_memory(struct _starpu_machine_config *config, int no_mp_config) +{ + /* We will store all the busid of the different (src, dst) + * combinations in a matrix which we initialize here. */ + _starpu_initialize_busid_matrix(); + + unsigned bindid; + + for (bindid = 0; bindid < config->nbindid; bindid++) + { + free(config->bindid_workers[bindid].workerids); + config->bindid_workers[bindid].workerids = NULL; + config->bindid_workers[bindid].nworkers = 0; + } + + /* First determine the CPU binding */ + unsigned worker; + if (!no_mp_config) + for (worker = 0; worker < config->topology.nworkers; worker++) + { + struct _starpu_worker *workerarg = &config->workers[worker]; + unsigned devid STARPU_ATTRIBUTE_UNUSED = workerarg->devid; + + /* select the worker binding */ + starpu_driver_info[workerarg->arch].init_worker_binding(config, no_mp_config, workerarg); + + _STARPU_DEBUG("worker %u type %d devid %u bound to cpu %d\n", worker, workerarg->arch, devid, workerarg->bindid); + +#ifdef __GLIBC__ + if (workerarg->bindid != -1) + { + /* Save the initial cpuset */ + CPU_ZERO(&workerarg->cpu_set); + CPU_SET(workerarg->bindid, &workerarg->cpu_set); + } +#endif /* __GLIBC__ */ + +#ifdef STARPU_HAVE_HWLOC + if (workerarg->bindid == -1) + { + workerarg->hwloc_cpu_set = hwloc_bitmap_alloc(); + workerarg->hwloc_obj = NULL; + } + else + { + /* Put the worker descriptor in the userdata field of the + * hwloc object describing the CPU */ + hwloc_obj_t worker_obj = hwloc_get_obj_by_depth(config->topology.hwtopology, + config->pu_depth, + workerarg->bindid); + struct _starpu_hwloc_userdata *data = worker_obj->userdata; + if (data->worker_list == NULL) + data->worker_list = _starpu_worker_list_new(); + _starpu_worker_list_push_front(data->worker_list, workerarg); + + /* Clear the cpu set and set the cpu */ + workerarg->hwloc_cpu_set = hwloc_bitmap_dup(worker_obj->cpuset); + workerarg->hwloc_obj = worker_obj; + } +#endif + if (workerarg->bindid != -1) + { + bindid = workerarg->bindid; + unsigned old_nbindid = config->nbindid; + if (bindid >= old_nbindid) + { + /* More room needed */ + if (!old_nbindid) + config->nbindid = STARPU_NMAXWORKERS; + else + config->nbindid = 2 * old_nbindid; + if (bindid >= config->nbindid) + { + config->nbindid = bindid+1; + } + _STARPU_REALLOC(config->bindid_workers, config->nbindid * sizeof(config->bindid_workers[0])); + memset(&config->bindid_workers[old_nbindid], 0, (config->nbindid - old_nbindid) * sizeof(config->bindid_workers[0])); + } + /* Add slot for this worker */ + /* Don't care about amortizing the cost, there are usually very few workers sharing the same bindid */ + config->bindid_workers[bindid].nworkers++; + _STARPU_REALLOC(config->bindid_workers[bindid].workerids, config->bindid_workers[bindid].nworkers * sizeof(config->bindid_workers[bindid].workerids[0])); + config->bindid_workers[bindid].workerids[config->bindid_workers[bindid].nworkers-1] = worker; + } + } + + /* Then initialize NUMA nodes accordingly */ + _starpu_init_numa_node(config); + _starpu_init_numa_bus(); + +#ifdef STARPU_SIMGRID + _starpu_simgrid_count_ngpus(); +#else +#ifdef STARPU_HAVE_HWLOC + _starpu_topology_count_ngpus(hwloc_get_root_obj(config->topology.hwtopology)); +#endif +#endif + + /* Eventually initialize accelerators memory nodes */ + if (!no_mp_config) + for (worker = 0; worker < config->topology.nworkers; worker++) + { + struct _starpu_worker *workerarg = &config->workers[worker]; + unsigned devid STARPU_ATTRIBUTE_UNUSED = workerarg->devid; + + /* select the memory node that contains worker's memory */ + starpu_driver_info[workerarg->arch].init_worker_memory(config, no_mp_config, workerarg); + + _STARPU_DEBUG("worker %u type %d devid %u STARPU memory node %u\n", worker, workerarg->arch, devid, workerarg->memory_node); + } + +#if defined(STARPU_HAVE_HWLOC) && !defined(STARPU_SIMGRID) + /* If some NUMA nodes don't have drivers, attribute some */ + unsigned node, nnodes = starpu_memory_nodes_get_count();; + if (!no_mp_config) + for (node = 0; node < nnodes; node++) + { + if (starpu_node_get_kind(node) != STARPU_CPU_RAM) + /* Only RAM nodes can be processed by any CPU */ + continue; + for (worker = 0; worker < config->topology.nworkers; worker++) + { + if (_starpu_worker_drives_memory[worker][node]) + break; + } + if (worker < config->topology.nworkers) + /* Already somebody driving it */ + continue; + + /* Nobody driving this node! Attribute some */ + _STARPU_DEBUG("nobody drives memory node %d\n", node); + hwloc_obj_t numa_node_obj = hwloc_get_obj_by_type(config->topology.hwtopology, HWLOC_OBJ_NUMANODE, starpu_memory_nodes_numa_id_to_hwloclogid(node)); + int ret = _starpu_find_pu_driving_numa_up(numa_node_obj, node); + STARPU_ASSERT_MSG(ret, "oops, didn't find any worker to drive memory node %d!?", node); + } +#endif +} + +int _starpu_build_topology(struct _starpu_machine_config *config, int no_mp_config) +{ + int ret; + unsigned i; + enum starpu_worker_archtype type; + + /* First determine which devices we will use */ + ret = _starpu_init_machine_config(config, no_mp_config); + if (ret) + return ret; + + /* for the data management library */ + _starpu_memory_nodes_init(); + _starpu_datastats_init(); + + /* Now determine CPU binding and memory nodes */ + _starpu_init_workers_binding_and_memory(config, no_mp_config); + + _starpu_mem_chunk_init_last(); + + for (type = 0; type < STARPU_NARCH; type++) + config->arch_nodeid[type] = -1; + + for (i = 0; i < starpu_worker_get_count(); i++) + { + type = starpu_worker_get_type(i); + if (config->arch_nodeid[type] == -1) + config->arch_nodeid[type] = starpu_worker_get_memory_node(i); + else if (config->arch_nodeid[type] != (int) starpu_worker_get_memory_node(i)) + config->arch_nodeid[type] = -2; + } + + _starpu_init_bus_performance(); + + return 0; +} + +void _starpu_destroy_topology(struct _starpu_machine_config *config STARPU_ATTRIBUTE_UNUSED) +{ +#if defined(STARPU_USE_MPI_MASTER_SLAVE) + _starpu_deinit_mpi_config(config); +#endif +#if defined(STARPU_USE_TCPIP_MASTER_SLAVE) + _starpu_deinit_tcpip_config(config); +#endif + + /* cleanup StarPU internal data structures */ + _starpu_memory_nodes_deinit(); + + _starpu_destroy_machine_config(config, 0); + + _starpu_deinitialize_workers_bindid(config); +} + +void starpu_topology_print(FILE *output) +{ + struct _starpu_machine_config *config = _starpu_get_machine_config(); + struct _starpu_machine_topology *topology = &config->topology; + unsigned pu; + unsigned worker; + unsigned nworkers = starpu_worker_get_count(); + unsigned ncombinedworkers = topology->ncombinedworkers; + unsigned nthreads_per_core = topology->nhwpus / topology->nhwworker[STARPU_CPU_WORKER][0]; + +#ifdef STARPU_HAVE_HWLOC + hwloc_topology_t topo = topology->hwtopology; + hwloc_obj_t pu_obj; + hwloc_obj_t last_numa_obj = (void*) -1, numa_obj; + hwloc_obj_t last_package_obj = (void*) -1, package_obj; +#endif + + for (pu = 0; pu < topology->nhwpus; pu++) + { +#ifdef STARPU_HAVE_HWLOC + pu_obj = hwloc_get_obj_by_type(topo, HWLOC_OBJ_PU, pu); + numa_obj = _starpu_numa_get_obj(pu_obj); + if (numa_obj != last_numa_obj) + { + if (numa_obj) + fprintf(output, "numa %2u", numa_obj->logical_index); + else + fprintf(output, "No numa"); + last_numa_obj = numa_obj; + } + fprintf(output, "\t"); + package_obj = hwloc_get_ancestor_obj_by_type(topo, HWLOC_OBJ_SOCKET, pu_obj); + if (package_obj != last_package_obj) + { + if (package_obj) + fprintf(output, "pack %2u", package_obj->logical_index); + else + fprintf(output, "no pack"); + last_package_obj = package_obj; + } + fprintf(output, "\t"); +#endif + if ((pu % nthreads_per_core) == 0) + fprintf(output, "core %-5u ", pu / nthreads_per_core); + else + fprintf(output, " "); + fprintf(output, "PU %-5u ", pu); + for (worker = 0; + worker < nworkers + ncombinedworkers; + worker++) + { + if (worker < nworkers) + { + struct _starpu_worker *workerarg = &config->workers[worker]; + + if (workerarg->bindid == (int) pu) + { + char name[256]; + starpu_worker_get_name(worker, name, sizeof(name)); + fprintf(output, "%-10s ", name); + } + } + else + { + int worker_size, i; + int *combined_workerid; + starpu_combined_worker_get_description(worker, &worker_size, &combined_workerid); + for (i = 0; i < worker_size; i++) + { + if (topology->workers_bindid[combined_workerid[i]] == pu) + fprintf(output, "comb %-5u ", worker-nworkers); + } + } + } + fprintf(output, "\n"); + } +} + +int starpu_get_pu_os_index(unsigned logical_index) +{ +#ifdef STARPU_HAVE_HWLOC + struct _starpu_machine_config *config = _starpu_get_machine_config(); + struct _starpu_machine_topology *topology = &config->topology; + + hwloc_topology_t topo = topology->hwtopology; + hwloc_obj_t obj = hwloc_get_obj_by_type(topo, HWLOC_OBJ_PU, logical_index); + STARPU_ASSERT(obj); + + return obj->os_index; +#else + return logical_index; +#endif +} + +#ifdef STARPU_HAVE_HWLOC +hwloc_topology_t starpu_get_hwloc_topology(void) +{ + struct _starpu_machine_config *config = _starpu_get_machine_config(); + + return config->topology.hwtopology; +} +#endif + +unsigned _starpu_get_nhyperthreads() +{ + struct _starpu_machine_config *config = _starpu_get_machine_config(); + + return config->topology.nhwpus / config->topology.nhwworker[STARPU_CPU_WORKER][0]; +} + +long starpu_get_memory_location_bitmap(void* ptr, size_t size) +{ + if (ptr == NULL || size == 0) + { + return -1; + } + +#ifdef HAVE_HWLOC_GET_AREA_MEMLOCATION // implies STARPU_HAVE_HWLOC + struct _starpu_machine_config *config = _starpu_get_machine_config(); + struct _starpu_machine_topology *topology = &config->topology; + + hwloc_bitmap_t set = hwloc_bitmap_alloc(); + int ret = hwloc_get_area_memlocation(topology->hwtopology, ptr, size, set, HWLOC_MEMBIND_BYNODESET); + if (ret != 0) + { + hwloc_bitmap_free(set); + return -1; + } + + if (hwloc_bitmap_iszero(set) || hwloc_bitmap_isfull(set)) + { + // If the page isn't allocated yet, the bitmap is empty: + hwloc_bitmap_free(set); + return -1; + } + + /* We could maybe use starpu_bitmap, but that seems a little bit + * overkill and it would make recording it in traces harder. */ + long ret_bitmap = 0; + unsigned i = 0; + hwloc_bitmap_foreach_begin(i, set) + { + hwloc_obj_t numa_node = hwloc_get_numanode_obj_by_os_index(topology->hwtopology, i); + if (numa_node) + { + ret_bitmap |= (1 << numa_node->logical_index); + } + else + { + // We can't find a matching NUMA node, this can happen on machine without NUMA node + hwloc_bitmap_free(set); + return -1; + } + } + hwloc_bitmap_foreach_end(); + + hwloc_bitmap_free(set); + return ret_bitmap; +#else + /* we could use move_pages(), but please, rather use hwloc (version >= 1.11.3)! */ + return -1; +#endif +} diff --git a/src/core/topology.h b/src/core/topology.h new file mode 100644 index 0000000..7b47970 --- /dev/null +++ b/src/core/topology.h @@ -0,0 +1,174 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __TOPOLOGY_H__ +#define __TOPOLOGY_H__ + +/** @file */ + +#include +#include +#include +#include +#include + +#pragma GCC visibility push(hidden) + +struct _starpu_machine_config; + +#ifndef STARPU_SIMGRID +#ifdef STARPU_HAVE_HWLOC +/** This is allocated for each hwloc object */ +struct _starpu_hwloc_userdata +{ + /** List of workers running on this obj */ + struct _starpu_worker_list *worker_list; + /** Number of GPUs sharing this PCI link */ + unsigned ngpus; + /** Worker running this PU */ + struct _starpu_worker *pu_worker; +}; +#endif +#endif + +struct _starpu_worker_set; +struct _starpu_machine_topology; + +/** Detect the number of memory nodes and where to bind the different workers. */ +int _starpu_build_topology(struct _starpu_machine_config *config, int no_mp_config); + +/** + * Initialize a series of workers. + * + * - If \p explicit_workers_gpuid is non-null, it will be used as the list of device + * IDs of the actual hardware devices to be used. + * - If \p current is non-null, it points to the next device ID to be used + * - \p workers_gpuid is filled with the set of device IDs actually used in the end + * - \p varname is the name of the environment variable that users can use to + * override the set of device IDs to be used. + * - \p nhwgpus is the number of actual devices available on the system. + * - \p type is the type of devices. + */ +void _starpu_initialize_workers_deviceid(int *explicit_workers_gpuid, + int *current, int *workers_gpuid, + const char *varname, unsigned nhwgpus, + enum starpu_worker_archtype type); + +/** Get the next devid for architecture \p type */ +int _starpu_get_next_devid(struct _starpu_machine_topology *topology, struct _starpu_machine_config *config, enum starpu_worker_archtype arch); + +/** Check that \p *ndevices is not larger than \p nhwdevices (unless \p overflow is 1), and is not larger than \p max. + * Cap it otherwise, and advise using the \p configurename ./configure option in the \p max case. */ +void _starpu_topology_check_ndevices(int *ndevices, unsigned nhwdevices, int overflow, unsigned max, int reserved, const char *nname, const char *dname, const char *configurename); + +/** Configures the topology according to the desired worker distribution on the device. + * - homogeneous tells to use devid 0 for the perfmodel (all devices have the same performance) + * - worker_devid tells to set a devid per worker, and subworkerid to 0, rather + * than sharing the devid and giving a different subworkerid to each worker. + */ + +/** Request to allocate a worker set for each worker */ +#define ALLOC_WORKER_SET ((struct _starpu_worker_set*) -1) + +/** Request to set a different perfmodel devid per worker */ +#define DEVID_PER_WORKER -2 + +void _starpu_topology_configure_workers(struct _starpu_machine_topology *topology, + struct _starpu_machine_config *config, + enum starpu_worker_archtype type, + int devnum, int devid, + int homogeneous, int worker_devid, + unsigned nworker_per_device, + unsigned ncores, + struct _starpu_worker_set *worker_set, + struct _starpu_worker_set *driver_worker_set); + +extern unsigned _starpu_may_bind_automatically[STARPU_NARCH]; + +/** This function gets the identifier of the next core on which to bind a + * worker. In case a list of preferred cores was specified (logical indexes), + * we look for a an available core among the list if possible, otherwise a + * round-robin policy is used. */ +unsigned _starpu_get_next_bindid(struct _starpu_machine_config *config, unsigned flags, + unsigned *preferred_binding, unsigned npreferred); + +/** Should be called instead of _starpu_destroy_topology when _starpu_build_topology returns a non zero value. */ +void _starpu_destroy_machine_config(struct _starpu_machine_config *config, int no_mp_config); + +/** Destroy all resources used to store the topology of the machine. */ +void _starpu_destroy_topology(struct _starpu_machine_config *config); + +#ifdef STARPU_HAVE_HWLOC +/** Return the hwloc object of the NUMA node corresponding to the given hwloc object */ +hwloc_obj_t _starpu_numa_get_obj(hwloc_obj_t obj); +#endif + +/** returns the number of physical cpus */ +unsigned _starpu_topology_get_nhwcpu(struct _starpu_machine_config *config); + +/** returns the number of logical cpus */ +unsigned _starpu_topology_get_nhwpu(struct _starpu_machine_config *config); + +/** returns the number of logical cpus */ +unsigned _starpu_topology_get_nhwpu(struct _starpu_machine_config *config); + +/** returns the number of hardware NUMA nodes */ +unsigned _starpu_topology_get_nhwnumanodes(struct _starpu_machine_config *config); + +/** returns the number of NUMA nodes to be exposed by StarPU as memory nodes, can be just 1 when STARPU_USE_NUMA is 0 */ +unsigned _starpu_topology_get_nnumanodes(struct _starpu_machine_config *config); + +/** given a list of numa nodes (logical indexes) \p numa_binding, fill \p binding with the corresponding cores (logical indexes) */ +unsigned _starpu_topology_get_numa_core_binding(struct _starpu_machine_config *config, const unsigned *numa_binding, unsigned nnuma, unsigned *binding, unsigned nbinding); + +int starpu_memory_nodes_numa_hwloclogid_to_id(int logid); + +/* This returns the exact NUMA node next to a worker */ +int _starpu_get_logical_numa_node_worker(unsigned workerid); + +/** returns the number of hyperthreads per core */ +unsigned _starpu_get_nhyperthreads() STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; + +#ifdef STARPU_HAVE_HWLOC +/** Small convenient function to filter hwloc topology depending on HWLOC API version */ +void _starpu_topology_filter(hwloc_topology_t topology); +#endif + +#define STARPU_NOWORKERID -1 +#define STARPU_ACTIVETHREAD -2 +#define STARPU_NONACTIVETHREAD -2 +/** Bind the current thread on the CPU logically identified by "cpuid". The + * logical ordering of the processors is either that of hwloc (if available), + * or the ordering exposed by the OS. */ +int _starpu_bind_thread_on_cpu(int cpuid, int workerid, const char *name); + +struct _starpu_combined_worker; +/** Bind the current thread on the set of CPUs for the given combined worker. */ +void _starpu_bind_thread_on_cpus(struct _starpu_combined_worker *combined_worker); + +struct _starpu_worker *_starpu_get_worker_from_driver(struct starpu_driver *d); + +unsigned starpu_memory_nodes_get_numa_count(void) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; +int starpu_memory_nodes_numa_id_to_hwloclogid(unsigned id); + +/** Get the memory node for data number i when task is to be executed on memory node \p target_node. Returns -1 if the data does not need to be loaded. */ +int _starpu_task_data_get_node_on_node(struct starpu_task *task, unsigned index, unsigned target_node); +/** Get the memory node for data number i when task is to be executed on worker \p worker. Returns -1 if the data does not need to be loaded. */ +int _starpu_task_data_get_node_on_worker(struct starpu_task *task, unsigned index, unsigned worker); + +#pragma GCC visibility pop + +#endif // __TOPOLOGY_H__ diff --git a/src/core/tree.c b/src/core/tree.c new file mode 100644 index 0000000..acb78dd --- /dev/null +++ b/src/core/tree.c @@ -0,0 +1,164 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2014-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "starpu_tree.h" +#include "workers.h" + +void starpu_tree_reset_visited(struct starpu_tree *tree, char *visited) +{ + if(tree->arity == 0) + { + int *workerids; + int nworkers = starpu_bindid_get_workerids(tree->id, &workerids); + int w; + for(w = 0; w < nworkers; w++) + { + visited[workerids[w]] = 0; + } + } + int i; + for(i = 0; i < tree->arity; i++) + starpu_tree_reset_visited(&tree->nodes[i], visited); +} + +void starpu_tree_prepare_children(unsigned arity, struct starpu_tree *father) +{ + _STARPU_MALLOC(father->nodes, arity*sizeof(struct starpu_tree)); + father->arity = arity; +} + +void starpu_tree_insert(struct starpu_tree *tree, int id, int level, int is_pu, int arity, struct starpu_tree *father) +{ + tree->level = level; + tree->arity = arity; + tree->nodes = NULL; + tree->id = is_pu ? id : level; + tree->is_pu = is_pu; + tree->father = father; +} + +struct starpu_tree* starpu_tree_get(struct starpu_tree *tree, int id) +{ + if(tree->arity == 0) + { + if(tree->is_pu && tree->id == id) + return tree; + else + return NULL; + } + + int i; + for(i = 0; i < tree->arity; i++) + { + struct starpu_tree *found_tree = starpu_tree_get(&tree->nodes[i], id); + if(found_tree) + return found_tree; + } + + return NULL; +} + +static struct starpu_tree* _get_down_to_leaves(struct starpu_tree *node, char *visited, char *present) +{ + struct starpu_tree *found_tree = NULL; + int i; + for(i = 0; i < node->arity; i++) + { + if(node->nodes[i].arity == 0) + { + if(node->nodes[i].is_pu) + { + int *workerids; + int nworkers = starpu_bindid_get_workerids(node->nodes[i].id, &workerids); + int w; + for(w = 0; w < nworkers; w++) + { + if(!visited[workerids[w]] && present[workerids[w]]) + return &node->nodes[i]; + } + } + } + else + { + found_tree =_get_down_to_leaves(&node->nodes[i], visited, present); + if(found_tree) + return found_tree; + } + } + return NULL; +} + +struct starpu_tree* starpu_tree_get_neighbour(struct starpu_tree *tree, struct starpu_tree *node, char *visited, char *present) +{ + struct starpu_tree *father = node == NULL ? tree : node->father; + + int st, n; + + if (father == NULL) return NULL; + + if (father == tree && father->arity == 0) + return tree; + + for(st = 0; st < father->arity; st++) + { + if(&father->nodes[st] == node) + break; + } + + for(n = 0; n < father->arity; n++) + { + int i = (st+n)%father->arity; + if(&father->nodes[i] != node) + { + if(father->nodes[i].arity == 0) + { + if(father->nodes[i].is_pu) + { + int *workerids; + int nworkers = starpu_bindid_get_workerids(father->nodes[i].id, &workerids); + int w; + for(w = 0; w < nworkers; w++) + { + if(!visited[workerids[w]] && present[workerids[w]]) + return &father->nodes[i]; + } + } + } + else + { + struct starpu_tree *leaf = _get_down_to_leaves(&father->nodes[i], visited, present); + if(leaf) + return leaf; + } + } + } + + if(tree == father) + return NULL; + + return starpu_tree_get_neighbour(tree, father, visited, present); +} + +void starpu_tree_free(struct starpu_tree *tree) +{ + int i; + for(i = 0; i < tree->arity; i++) + starpu_tree_free(&tree->nodes[i]); + free(tree->nodes); + tree->nodes = NULL; + tree->arity = 0; +} diff --git a/src/core/workers.c b/src/core/workers.c new file mode 100644 index 0000000..b3026ef --- /dev/null +++ b/src/core/workers.c @@ -0,0 +1,3168 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2016-2016 Uppsala University + * Copyright (C) 2013-2013 Thibaut Lambert + * Copyright (C) 2011-2011 Télécom Sud Paris + * Copyright (C) 2021-2021 Federal University of Rio Grande do Sul (UFRGS) + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#ifdef __linux__ +#include +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef STARPU_SIMGRID +#include +#endif + +#if defined(_WIN32) && !defined(__CYGWIN__) +#include +#endif + +#include + +#if defined(_WIN32) +#ifdef __GNUC__ +#define ffs(arg) __builtin_ffs(arg) +#else +#define ffs(arg) _bit_scan_forward(arg) +#endif +#endif + + +static int asynchronous_copy_disabled[STARPU_MAX_RAM+1]; + +/* global knobs */ +static int __g_calibrate_knob; +static int __g_enable_catch_signal_knob; + +/* per-worker knobs */ +static int __w_bind_to_pu_knob; +static int __w_enable_worker_knob; + +static struct starpu_perf_knob_group * __kg_starpu_global; +static struct starpu_perf_knob_group * __kg_starpu_worker__per_worker; + +static void global_knobs__set(const struct starpu_perf_knob * const knob, void *context, const struct starpu_perf_knob_value * const value) +{ + /* context is not used for global knobs */ + STARPU_ASSERT(context == NULL); + (void)context; + + if (knob->id == __g_calibrate_knob) + { + _starpu_set_calibrate_flag((unsigned)value->val_int32_t); + } + else if (knob->id == __g_enable_catch_signal_knob) + { + _starpu_set_catch_signals(!!value->val_int32_t); + } + else + { + STARPU_ASSERT(0); + abort(); + } +} +static void global_knobs__get(const struct starpu_perf_knob * const knob, void *context, struct starpu_perf_knob_value * const value) +{ + /* context is not used for global knobs */ + STARPU_ASSERT(context == NULL); + (void)context; + + if (knob->id == __g_calibrate_knob) + { + value->val_int32_t = (int32_t)_starpu_get_calibrate_flag(); + } + else if (knob->id == __g_enable_catch_signal_knob) + { + value->val_int32_t = _starpu_get_catch_signals(); + } + else + { + STARPU_ASSERT(0); + abort(); + } +} + +static void worker_knobs__set(const struct starpu_perf_knob * const knob, void *context, const struct starpu_perf_knob_value * const value) +{ + const unsigned workerid = *(unsigned *)context; + struct _starpu_worker * const worker = _starpu_get_worker_struct(workerid); + if (knob->id == __w_bind_to_pu_knob) + { + STARPU_ASSERT(value->val_int32_t >= 0); + worker->bindid_requested = value->val_int32_t; + } + else if (knob->id == __w_enable_worker_knob) + { + worker->enable_knob = !!value->val_int32_t; + } + else + { + STARPU_ASSERT(0); + abort(); + } +} +static void worker_knobs__get(const struct starpu_perf_knob * const knob, void *context, struct starpu_perf_knob_value * const value) +{ + const unsigned workerid = *(unsigned *)context; + struct _starpu_worker * const worker = _starpu_get_worker_struct(workerid); + if (knob->id == __w_bind_to_pu_knob) + { + value->val_int32_t = worker->bindid; + } + else if (knob->id == __w_enable_worker_knob) + { + value->val_int32_t = worker->enable_knob; + } + else + { + STARPU_ASSERT(0); + abort(); + } +} + +void _starpu__workers_c__register_knobs(void) +{ + { + const enum starpu_perf_knob_scope scope = starpu_perf_knob_scope_global; + __kg_starpu_global = _starpu_perf_knob_group_register(scope, global_knobs__set, global_knobs__get); + __STARPU_PERF_KNOB_REG("starpu.global", __kg_starpu_global, g_calibrate_knob, int32, "enable or disable performance models calibration (override STARPU_CALIBRATE env var)"); + __STARPU_PERF_KNOB_REG("starpu.global", __kg_starpu_global, g_enable_catch_signal_knob, int32, "enable or disable signal catching (override STARPU_CATCH_SIGNALS env var)"); + } + + { + const enum starpu_perf_knob_scope scope = starpu_perf_knob_scope_per_worker; + __kg_starpu_worker__per_worker = _starpu_perf_knob_group_register(scope, worker_knobs__set, worker_knobs__get); + __STARPU_PERF_KNOB_REG("starpu.worker", __kg_starpu_worker__per_worker, w_bind_to_pu_knob, int32, "bind worker to PU (PU logical number, override StarPU binding env vars)"); + __STARPU_PERF_KNOB_REG("starpu.worker", __kg_starpu_worker__per_worker, w_enable_worker_knob, int32, "enable assigning task to that worker (1:Enabled | [0:Disabled])"); + } + +#if 0 + { + const enum starpu_perf_knob_scope scope = starpu_perf_knob_scope_per_scheduler; + __kg_starpu_worker__per_scheduler = _starpu_perf_knob_group_register(scope, sched_knobs__set, sched_knobs__get); + } +#endif +} + +void _starpu__workers_c__unregister_knobs(void) +{ + _starpu_perf_knob_group_unregister(__kg_starpu_global); + _starpu_perf_knob_group_unregister(__kg_starpu_worker__per_worker); + __kg_starpu_global = NULL; + __kg_starpu_worker__per_worker = NULL; +} + +/* acquire/release semantic for concurrent initialization/de-initialization */ +static starpu_pthread_mutex_t init_mutex = STARPU_PTHREAD_MUTEX_INITIALIZER; +static starpu_pthread_cond_t init_cond = STARPU_PTHREAD_COND_INITIALIZER; +static int init_count = 0; +static enum initialization initialized = UNINITIALIZED; + +int _starpu_keys_initialized; +starpu_pthread_key_t _starpu_worker_key; +starpu_pthread_key_t _starpu_worker_set_key; + +struct _starpu_machine_config _starpu_config; + +static int check_entire_platform; + +int _starpu_worker_parallel_blocks; + +/* Pointers to argc and argv + */ +static int *my_argc = 0; +static char ***my_argv = NULL; + +void _starpu__workers_c__register_kobs(void) +{ + /* TODO */ +} + +struct _starpu_driver_info starpu_driver_info[STARPU_NARCH]; + +void _starpu_driver_info_register(enum starpu_worker_archtype archtype, const struct _starpu_driver_info *info) +{ + STARPU_ASSERT(archtype < STARPU_NARCH); + starpu_driver_info[archtype] = *info; +} + +struct _starpu_memory_driver_info starpu_memory_driver_info[STARPU_MAX_RAM+1]; + +void _starpu_memory_driver_info_register(enum starpu_node_kind kind, const struct _starpu_memory_driver_info *info) +{ + starpu_memory_driver_info[kind] = *info; +} + +/* Initialize value of static argc and argv, called when the process begins + */ +void _starpu_set_argc_argv(int *argc_param, char ***argv_param) +{ + my_argc = argc_param; + my_argv = argv_param; +} + +int *_starpu_get_argc() +{ + return my_argc; +} + +char ***_starpu_get_argv() +{ + return my_argv; +} + +int starpu_is_initialized(void) +{ + return initialized != UNINITIALIZED; +} + +void starpu_wait_initialized(void) +{ + STARPU_PTHREAD_MUTEX_LOCK(&init_mutex); + while (initialized != INITIALIZED) + STARPU_PTHREAD_COND_WAIT(&init_cond, &init_mutex); + STARPU_PTHREAD_MUTEX_UNLOCK(&init_mutex); +} + +/* Makes sure that at least one of the workers of type can execute + * , for at least one of its implementations. */ +static uint32_t _starpu_worker_exists_and_can_execute(struct starpu_task *task, + enum starpu_worker_archtype arch) +{ + _starpu_codelet_check_deprecated_fields(task->cl); + + /* make sure there is a worker on the machine able to execute the + task, independent of the sched_ctx, this latter may receive latter on + the necessary worker - the user or the hypervisor should take care this happens */ + + if (check_entire_platform && !task->cl->can_execute) + { + if (!_starpu_get_machine_config()->topology.ndevices[arch]) + return 0; + + unsigned impl; + for (impl = 0; impl < STARPU_MAXIMPLEMENTATIONS; impl++) + { + switch (arch) + { + case STARPU_CPU_WORKER: + if (task->cl->cpu_funcs[impl] != NULL) + return 1; + break; + case STARPU_CUDA_WORKER: + if (task->cl->cuda_funcs[impl] != NULL) + return 1; + break; + case STARPU_HIP_WORKER: + if (task->cl->hip_funcs[impl] != NULL) + return 1; + break; + case STARPU_OPENCL_WORKER: + if (task->cl->opencl_funcs[impl] != NULL) + return 1; + break; + case STARPU_MAX_FPGA_WORKER: + if (task->cl->max_fpga_funcs[impl] != NULL) + return 1; + break; + case STARPU_MPI_MS_WORKER: + case STARPU_TCPIP_MS_WORKER: + if (task->cl->cpu_funcs_name[impl] != NULL) + return 1; + break; + default: + STARPU_ABORT(); + } + } + return 0; + } + + struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(task->sched_ctx); + struct starpu_worker_collection *workers = sched_ctx->workers; + struct starpu_sched_ctx_iterator it; + workers->init_iterator(workers, &it); + while(workers->has_next(workers, &it)) + { + int i = workers->get_next(workers, &it); + if (starpu_worker_get_type(i) != arch) + continue; + + unsigned impl; + for (impl = 0; impl < STARPU_MAXIMPLEMENTATIONS; impl++) + { + /* We could call task->cl->can_execute(i, task, impl) + here, it would definitely work. It is probably + cheaper to check whether it is necessary in order to + avoid a useless function call, though. */ + unsigned test_implementation = 0; + switch (arch) + { + case STARPU_CPU_WORKER: + if (task->cl->cpu_funcs[impl] != NULL) + test_implementation = 1; + break; + case STARPU_CUDA_WORKER: + if (task->cl->cuda_funcs[impl] != NULL) + test_implementation = 1; + break; + case STARPU_HIP_WORKER: + if (task->cl->hip_funcs[impl] != NULL) + test_implementation = 1; + break; + case STARPU_OPENCL_WORKER: + if (task->cl->opencl_funcs[impl] != NULL) + test_implementation = 1; + break; + case STARPU_MAX_FPGA_WORKER: + if (task->cl->max_fpga_funcs[impl] != NULL) + test_implementation = 1; + break; + case STARPU_MPI_MS_WORKER: + case STARPU_TCPIP_MS_WORKER: + if (task->cl->cpu_funcs_name[impl] != NULL) + test_implementation = 1; + break; + default: + STARPU_ABORT(); + } + + if (!test_implementation) + /* No implementation here, cannot execute */ + continue; + + if (task->cl->can_execute && !task->cl->can_execute(i, task, impl)) + /* The implementation cannot be executed here */ + continue; + + return 1; + } + } + + return 0; +} + +/* + * in case a task is submitted, we may check whether there exists a worker + * that may execute the task or not + */ +uint32_t _starpu_worker_exists(struct starpu_task *task) +{ + _starpu_codelet_check_deprecated_fields(task->cl); + if (task->where == STARPU_NOWHERE) + return 1; + + /* if the task belongs to the init context we can + check out all the worker mask of the machine + if not we should iterate on the workers of the ctx + and verify if it exists a worker able to exec the task */ + if(task->sched_ctx == 0) + { + if (!(task->where & _starpu_config.worker_mask)) + return 0; + + if (!task->cl->can_execute) + return 1; + } + +#if defined(STARPU_USE_CPU) || defined(STARPU_SIMGRID) + if ((task->where & STARPU_CPU) && + _starpu_worker_exists_and_can_execute(task, STARPU_CPU_WORKER)) + return 1; +#endif +#if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID) + if ((task->where & STARPU_CUDA) && + _starpu_worker_exists_and_can_execute(task, STARPU_CUDA_WORKER)) + return 1; +#endif +#if defined(STARPU_USE_HIP) + if ((task->where & STARPU_HIP) && + _starpu_worker_exists_and_can_execute(task, STARPU_HIP_WORKER)) + return 1; +#endif +#if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID) + if ((task->where & STARPU_OPENCL) && + _starpu_worker_exists_and_can_execute(task, STARPU_OPENCL_WORKER)) + return 1; +#endif +#if defined(STARPU_USE_MAX_FPGA) + if ((task->where & STARPU_MAX_FPGA) && + _starpu_worker_exists_and_can_execute(task, STARPU_MAX_FPGA_WORKER)) + return 1; +#endif +#ifdef STARPU_USE_MPI_MASTER_SLAVE + if ((task->where & STARPU_MPI_MS) && + _starpu_worker_exists_and_can_execute(task, STARPU_MPI_MS_WORKER)) + return 1; +#endif +#ifdef STARPU_USE_TCPIP_MASTER_SLAVE + if ((task->where & STARPU_TCPIP_MS) && + _starpu_worker_exists_and_can_execute(task, STARPU_TCPIP_MS_WORKER)) + return 1; +#endif + + return 0; +} + +uint32_t _starpu_can_submit_ms_task(void) +{ + return (STARPU_MPI_MS & _starpu_config.worker_mask) || (STARPU_TCPIP_MS & _starpu_config.worker_mask) ; +} + +uint32_t _starpu_can_submit_cuda_task(void) +{ + return STARPU_CUDA & _starpu_config.worker_mask; +} + +uint32_t _starpu_can_submit_hip_task(void) +{ + return STARPU_HIP & _starpu_config.worker_mask; +} + +uint32_t _starpu_can_submit_cpu_task(void) +{ + return STARPU_CPU & _starpu_config.worker_mask; +} + +uint32_t _starpu_can_submit_opencl_task(void) +{ + return STARPU_OPENCL & _starpu_config.worker_mask; +} + +static inline int _starpu_can_use_nth_implementation(enum starpu_worker_archtype arch, struct starpu_codelet *cl, unsigned nimpl) +{ + switch(arch) + { + case STARPU_ANY_WORKER: + { + int cpu_func_enabled=1, cuda_func_enabled=1, hip_func_enabled=1, opencl_func_enabled=1; + +#if defined(STARPU_USE_CPU) || defined(STARPU_SIMGRID) + starpu_cpu_func_t cpu_func = _starpu_task_get_cpu_nth_implementation(cl, nimpl); + cpu_func_enabled = cpu_func != NULL && starpu_cpu_worker_get_count(); +#endif +#if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID) + starpu_cuda_func_t cuda_func = _starpu_task_get_cuda_nth_implementation(cl, nimpl); + cuda_func_enabled = cuda_func != NULL && starpu_cuda_worker_get_count(); +#endif +#if defined(STARPU_USE_HIP) + starpu_hip_func_t hip_func = _starpu_task_get_hip_nth_implementation(cl, nimpl); + hip_func_enabled = hip_func != NULL && starpu_hip_worker_get_count(); +#endif +#if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID) + starpu_opencl_func_t opencl_func = _starpu_task_get_opencl_nth_implementation(cl, nimpl); + opencl_func_enabled = opencl_func != NULL && starpu_opencl_worker_get_count(); +#endif + + return cpu_func_enabled && cuda_func_enabled && opencl_func_enabled && hip_func_enabled; + } + case STARPU_CPU_WORKER: + { + starpu_cpu_func_t func = _starpu_task_get_cpu_nth_implementation(cl, nimpl); + return func != NULL; + } + case STARPU_CUDA_WORKER: + { + starpu_cuda_func_t func = _starpu_task_get_cuda_nth_implementation(cl, nimpl); + return func != NULL; + } + case STARPU_HIP_WORKER: + { + starpu_hip_func_t func = _starpu_task_get_hip_nth_implementation(cl, nimpl); + return func != NULL; + } + case STARPU_OPENCL_WORKER: + { + starpu_opencl_func_t func = _starpu_task_get_opencl_nth_implementation(cl, nimpl); + return func != NULL; + } + case STARPU_MAX_FPGA_WORKER: + { + starpu_max_fpga_func_t func = _starpu_task_get_fpga_nth_implementation(cl, nimpl); + return func != NULL; + } + case STARPU_MPI_MS_WORKER: + case STARPU_TCPIP_MS_WORKER: + { + const char *func_name = _starpu_task_get_cpu_name_nth_implementation(cl, nimpl); + return func_name != NULL; + } + default: + STARPU_ASSERT_MSG(0, "Unknown arch type %d", arch); + } + return 0; +} + +int _starpu_enforce_locality(unsigned workerid, struct starpu_task *task) +{ + if (!_starpu_config.conf.data_locality_enforce) + return 1; + + unsigned i, requested_node = starpu_worker_get_memory_node(workerid); + int owner = -1, shared=-1; + for (i = 0; i < STARPU_TASK_GET_NBUFFERS(task); i++) + if (STARPU_TASK_GET_MODE(task, i) & STARPU_LOCALITY) + { + starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, i); + unsigned nnodes = starpu_memory_nodes_get_count(); + unsigned node; + for (node = 0; node < nnodes; node++) + { + enum _starpu_cache_state state = handle->per_node[node].state; + if (state == STARPU_OWNER) + owner = node; + if (state == STARPU_SHARED) + shared = node; + } + if (owner != -1 && owner != (int)requested_node) + return 0; + if (shared != -1 && handle->per_node[requested_node].state != STARPU_SHARED) + return 0; + } + return 1; +} + +/* Test if this task can be processed on this worker, regardless of the implementation */ +/* must be called with sched_mutex locked to protect state_blocked */ +static inline int _starpu_can_execute_task_any_impl(unsigned workerid, struct starpu_task *task) +{ + if (!_starpu_enforce_locality(workerid, task)) + return 0; + + if (!_starpu_config.workers[workerid].enable_knob) + return 0; + + if (task->workerids_len) + { + size_t div = sizeof(*task->workerids) * 8; + if (workerid / div >= task->workerids_len || ! (task->workerids[workerid / div] & (1UL << workerid % div))) + return 0; + } + + /* if the worker is blocked in a parallel ctx don't submit tasks on it */ +#ifdef STARPU_DEVEL +#warning FIXME: this is very expensive, while can_execute is supposed to be not very costly so schedulers can call it a lot +#endif + if(starpu_worker_is_blocked_in_parallel(workerid)) + return 0; + + if (!(task->where & _starpu_config.workers[workerid].worker_mask)) + return 0; + + return 1; +} + +/* must be called with sched_mutex locked to protect state_blocked_in_parallel */ +int starpu_worker_can_execute_task(unsigned workerid, struct starpu_task *task, unsigned nimpl) +{ + /* TODO: check that the task operand sizes will fit on that device */ + return _starpu_can_execute_task_any_impl(workerid, task) && + _starpu_can_use_nth_implementation(_starpu_config.workers[workerid].arch, task->cl, nimpl) && + (!task->cl->can_execute || task->cl->can_execute(workerid, task, nimpl)); +} + +/* must be called with sched_mutex locked to protect state_blocked_in_parallel */ +int starpu_worker_can_execute_task_impl(unsigned workerid, struct starpu_task *task, unsigned *impl_mask) +{ + if (!_starpu_can_execute_task_any_impl(workerid, task)) + return 0; + + unsigned mask; + int i; + enum starpu_worker_archtype arch; + struct starpu_codelet *cl; + /* TODO: check that the task operand sizes will fit on that device */ + cl = task->cl; + + mask = 0; + arch = _starpu_config.workers[workerid].arch; + if (!task->cl->can_execute) + { + for (i = 0; i < STARPU_MAXIMPLEMENTATIONS; i++) + if (_starpu_can_use_nth_implementation(arch, cl, i)) + { + mask |= 1U << i; + if (!impl_mask) + break; + } + } + else + { + for (i = 0; i < STARPU_MAXIMPLEMENTATIONS; i++) + if (_starpu_can_use_nth_implementation(arch, cl, i) + && (!task->cl->can_execute || task->cl->can_execute(workerid, task, i))) + { + mask |= 1U << i; + if (!impl_mask) + break; + } + } + if (impl_mask) + *impl_mask = mask; + return mask != 0; +} + +/* must be called with sched_mutex locked to protect state_blocked */ +int starpu_worker_can_execute_task_first_impl(unsigned workerid, struct starpu_task *task, unsigned *nimpl) +{ + if (!_starpu_can_execute_task_any_impl(workerid, task)) + return 0; + int i; + enum starpu_worker_archtype arch; + struct starpu_codelet *cl; + /* TODO: check that the task operand sizes will fit on that device */ + cl = task->cl; + + arch = _starpu_config.workers[workerid].arch; + if (!task->cl->can_execute) + { + for (i = 0; i < STARPU_MAXIMPLEMENTATIONS; i++) + if (_starpu_can_use_nth_implementation(arch, cl, i)) + { + if (nimpl) + *nimpl = i; + return 1; + } + } + else + { + for (i = 0; i < STARPU_MAXIMPLEMENTATIONS; i++) + if (_starpu_can_use_nth_implementation(arch, cl, i) + && (task->cl->can_execute(workerid, task, i))) + { + if (nimpl) + *nimpl = i; + return 1; + } + } + return 0; +} + + + +int starpu_combined_worker_can_execute_task(unsigned workerid, struct starpu_task *task, unsigned nimpl) +{ + /* TODO: check that the task operand sizes will fit on that device */ + + struct starpu_codelet *cl = task->cl; + unsigned nworkers = _starpu_config.topology.nworkers; + + + /* Is this a parallel worker ? */ + if (workerid < nworkers) + { + if (!_starpu_can_execute_task_any_impl(workerid, task)) + return 0; + + return !!((task->where & _starpu_config.workers[workerid].worker_mask) && + _starpu_can_use_nth_implementation(_starpu_config.workers[workerid].arch, task->cl, nimpl) && + (!task->cl->can_execute || task->cl->can_execute(workerid, task, nimpl))); + } + else + { + if (!_starpu_enforce_locality(workerid, task)) + return 0; + + if (task->workerids_len) + { + size_t div = sizeof(*task->workerids) * 8; + if (workerid / div >= task->workerids_len || ! (task->workerids[workerid / div] & (1UL << workerid % div))) + return 0; + } + + if (cl->type == STARPU_SPMD +#ifdef STARPU_HAVE_HWLOC + || cl->type == STARPU_FORKJOIN +#else +#ifdef __GLIBC__ + || cl->type == STARPU_FORKJOIN +#endif +#endif + + ) + { + /* TODO we should add other types of constraints */ + + /* Is the worker larger than requested ? */ + int worker_size = (int)_starpu_config.combined_workers[workerid - nworkers].worker_size; + int worker0 = _starpu_config.combined_workers[workerid - nworkers].combined_workerid[0]; + return !!((worker_size <= task->cl->max_parallelism) && + _starpu_can_use_nth_implementation(_starpu_config.workers[worker0].arch, task->cl, nimpl) && + (!task->cl->can_execute || task->cl->can_execute(workerid, task, nimpl))); + } + else + { + /* We have a sequential task but a parallel worker */ + return 0; + } + } +} + +/* + * Runtime initialization methods + */ + +static void _starpu_init_worker_queue(struct _starpu_worker *worker) +{ + _starpu_memory_node_register_condition(worker, &worker->sched_cond, worker->memory_node); +} + +/* + * Returns 0 if the given driver is one of the drivers that must be launched by + * the application itself, and not by StarPU, 1 otherwise. + */ +static unsigned _starpu_may_launch_driver(struct starpu_conf *conf, + struct starpu_driver *d) +{ + if (conf->n_not_launched_drivers == 0 || conf->not_launched_drivers == NULL) + return 1; + + /* Is in conf->not_launched_drivers ? */ + unsigned i; + for (i = 0; i < conf->n_not_launched_drivers; i++) + { + if (d->type != conf->not_launched_drivers[i].type) + continue; + + /* Driver porters: adding your driver here is optional, only + * needed for supporting running the driver in a thread provided by + * the application. */ + + switch (d->type) + { + case STARPU_CPU_WORKER: + if (d->id.cpu_id == conf->not_launched_drivers[i].id.cpu_id) + return 0; + break; + case STARPU_CUDA_WORKER: + if (d->id.cuda_id == conf->not_launched_drivers[i].id.cuda_id) + return 0; + break; + case STARPU_HIP_WORKER: + if (d->id.hip_id == conf->not_launched_drivers[i].id.hip_id) + return 0; + break; +#ifdef STARPU_USE_OPENCL + case STARPU_OPENCL_WORKER: + if (d->id.opencl_id == conf->not_launched_drivers[i].id.opencl_id) + return 0; + break; +#endif + default: + STARPU_ABORT(); + } + } + + return 1; +} + +#ifdef STARPU_PERF_DEBUG +struct itimerval prof_itimer; +#endif + +void _starpu_worker_init(struct _starpu_worker *workerarg, struct _starpu_machine_config *pconfig) +{ + workerarg->config = pconfig; + STARPU_PTHREAD_MUTEX_INIT(&workerarg->mutex, NULL); + /* arch initialized by topology.c */ + /* worker_mask initialized by topology.c */ + /* perf_arch initialized by topology.c */ + /* worker_thread initialized by _starpu_launch_drivers */ + /* devid initialized by topology.c */ + /* subworkerid initialized by topology.c */ + /* bindid initialized by topology.c */ + /* workerid initialized by topology.c */ + workerarg->combined_workerid = workerarg->workerid; + workerarg->current_rank = 0; + workerarg->worker_size = 1; + STARPU_PTHREAD_COND_INIT(&workerarg->started_cond, NULL); + STARPU_PTHREAD_COND_INIT(&workerarg->ready_cond, NULL); + /* memory_node initialized by topology.c */ + STARPU_PTHREAD_COND_INIT(&workerarg->sched_cond, NULL); + STARPU_PTHREAD_MUTEX_INIT(&workerarg->sched_mutex, NULL); + starpu_task_prio_list_init(&workerarg->local_tasks); + _starpu_ctx_change_list_init(&workerarg->ctx_change_list); + workerarg->local_ordered_tasks = NULL; + workerarg->local_ordered_tasks_size = 0; + workerarg->current_ordered_task = 0; + workerarg->current_ordered_task_order = 1; + workerarg->current_task = NULL; +#ifdef STARPU_SIMGRID + starpu_pthread_wait_init(&workerarg->wait); + starpu_pthread_queue_register(&workerarg->wait, &_starpu_simgrid_task_queue[workerarg->workerid]); +#endif + workerarg->task_transferring = NULL; + workerarg->nb_buffers_transferred = 0; + STARPU_HG_DISABLE_CHECKING(workerarg->nb_buffers_transferred); + workerarg->nb_buffers_totransfer = 0; + + workerarg->first_task = 0; + workerarg->ntasks = 0; + /* set initialized by topology.c */ + workerarg->pipeline_length = 0; + workerarg->pipeline_stuck = 0; + workerarg->worker_is_running = 0; + workerarg->worker_is_initialized = 0; + workerarg->wait_for_worker_initialization = 0; + workerarg->status = STATUS_INITIALIZING; + workerarg->state_keep_awake = 0; + /* name initialized by driver */ + /* short_name initialized by driver */ + workerarg->run_by_starpu = 1; + workerarg->driver_ops = NULL; + + workerarg->sched_ctx_list = NULL; + workerarg->tmp_sched_ctx = -1; + workerarg->nsched_ctxs = 0; + _starpu_barrier_counter_init(&workerarg->tasks_barrier, 0); + + workerarg->has_prev_init = 0; + + int ctx; + for(ctx = 0; ctx < STARPU_NMAX_SCHED_CTXS; ctx++) + workerarg->removed_from_ctx[ctx] = 0; + + workerarg->spinning_backoff = 1; + + for(ctx = 0; ctx < STARPU_NMAX_SCHED_CTXS; ctx++) + { + workerarg->shares_tasks_lists[ctx] = 0; + workerarg->poped_in_ctx[ctx] = 0; + } + workerarg->reverse_phase[0] = 0; + workerarg->reverse_phase[1] = 0; + workerarg->pop_ctx_priority = 1; + workerarg->is_slave_somewhere = 0; + + workerarg->state_relax_refcnt = 1; +#ifdef STARPU_SPINLOCK_CHECK + workerarg->relax_on_file = __FILE__; + workerarg->relax_on_line = __LINE__; + workerarg->relax_on_func = __starpu_func__; + workerarg->relax_off_file = NULL; + workerarg->relax_off_line = 0; + workerarg->relax_off_func = NULL; +#endif + workerarg->state_sched_op_pending = 0; + workerarg->state_changing_ctx_waiting = 0; + workerarg->state_changing_ctx_notice = 0; + workerarg->state_blocked_in_parallel_observed = 0; + workerarg->state_blocked_in_parallel = 0; + workerarg->state_block_in_parallel_req = 0; + workerarg->state_block_in_parallel_ack = 0; + workerarg->state_unblock_in_parallel_req = 0; + workerarg->state_unblock_in_parallel_ack = 0; + workerarg->block_in_parallel_ref_count = 0; + _starpu_perf_counter_sample_init(&workerarg->perf_counter_sample, starpu_perf_counter_scope_per_worker); + workerarg->enable_knob = 1; + workerarg->bindid_requested = -1; + + /* cpu_set/hwloc_cpu_set/hwloc_obj initialized in topology.c */ +} + +static void _starpu_worker_deinit(struct _starpu_worker *workerarg) +{ + (void) workerarg; + +#ifdef STARPU_SIMGRID + starpu_pthread_queue_unregister(&workerarg->wait, &_starpu_simgrid_task_queue[workerarg->workerid]); + starpu_pthread_wait_destroy(&workerarg->wait); +#endif + _starpu_perf_counter_sample_exit(&workerarg->perf_counter_sample); +} + +#ifdef STARPU_USE_FXT +void _starpu_worker_start(struct _starpu_worker *worker, enum starpu_worker_archtype archtype, unsigned sync) +{ + unsigned devid = worker->devid; + unsigned memnode = worker->memory_node; + _STARPU_TRACE_WORKER_INIT_START(archtype, worker->workerid, devid, memnode, worker->bindid, sync); +} +#endif + +void _starpu_driver_start(struct _starpu_worker *worker, enum starpu_worker_archtype archtype, unsigned sync STARPU_ATTRIBUTE_UNUSED) +{ + (void) archtype; + int devid = worker->devid; + (void) devid; + +#ifdef STARPU_USE_FXT + _STARPU_TRACE_REGISTER_THREAD(worker->bindid); + _starpu_worker_start(worker, archtype, sync); +#endif + _starpu_set_local_worker_key(worker); + + STARPU_PTHREAD_MUTEX_LOCK(&worker->mutex); + worker->worker_is_running = 1; + STARPU_PTHREAD_COND_SIGNAL(&worker->started_cond); + STARPU_PTHREAD_MUTEX_UNLOCK(&worker->mutex); + + _starpu_bind_thread_on_cpu(worker->bindid, worker->workerid, NULL); + +#if defined(STARPU_PERF_DEBUG) && !defined(STARPU_SIMGRID) + setitimer(ITIMER_PROF, &prof_itimer, NULL); +#endif + + _STARPU_DEBUG("worker %p %d for dev %d is ready on logical cpu %d\n", worker, worker->workerid, devid, worker->bindid); +#ifdef STARPU_HAVE_HWLOC + _STARPU_DEBUG("worker %p %d cpuset start at %d\n", worker, worker->workerid, hwloc_bitmap_first(worker->hwloc_cpu_set)); +#endif +} + +static void _starpu_launch_drivers(struct _starpu_machine_config *pconfig) +{ + pconfig->running = 1; + pconfig->pause_depth = 0; + pconfig->submitting = 1; + STARPU_HG_DISABLE_CHECKING(pconfig->watchdog_ok); + + unsigned nworkers = pconfig->topology.nworkers; + unsigned worker; + +#if defined(STARPU_PERF_DEBUG) && !defined(STARPU_SIMGRID) + /* Get itimer of the main thread, to set it for the worker threads */ + getitimer(ITIMER_PROF, &prof_itimer); +#endif + STARPU_AYU_INIT(); + + /* Launch workers asynchronously */ + for (worker = 0; worker < nworkers; worker++) + { + struct _starpu_worker *workerarg = &pconfig->workers[worker]; + workerarg->wait_for_worker_initialization = 0; + + _STARPU_DEBUG("initialising worker %u/%u\n", worker, nworkers); + + _starpu_init_worker_queue(workerarg); + + struct _starpu_worker_set *worker_set = workerarg->set; + struct _starpu_worker_set *driver_worker_set = workerarg->driver_worker_set; + + /* For worker sets, we only start a thread for the first worker. */ + if (!worker_set || worker_set->workers == workerarg) + { + struct starpu_driver driver; + + if (worker_set) + { + worker_set->set_is_initialized = 0; + worker_set->wait_for_set_initialization = !driver_worker_set || driver_worker_set == worker_set; + } + + workerarg->driver_ops = starpu_driver_info[workerarg->arch].driver_ops; + workerarg->wait_for_worker_initialization = starpu_driver_info[workerarg->arch].wait_for_worker_initialization; + + if (workerarg->driver_ops) + { + driver.type = workerarg->arch; + if (workerarg->driver_ops->set_devid) + workerarg->driver_ops->set_devid(&driver, workerarg); + } + + if ((driver_worker_set && driver_worker_set != worker_set) || + (workerarg->driver_ops && !_starpu_may_launch_driver(&pconfig->conf, &driver))) + workerarg->run_by_starpu = 0; + } + else + workerarg->run_by_starpu = 0; + } + + for (worker = 0; worker < nworkers; worker++) + { + struct _starpu_worker *workerarg = &pconfig->workers[worker]; + struct _starpu_worker_set *worker_set = workerarg->set; + + /* For worker sets, we only start a thread for the first worker. */ + if (workerarg->run_by_starpu) + { + starpu_pthread_t *worker_thread; + + if (worker_set) + worker_thread = &worker_set->worker_thread; + else + worker_thread = &workerarg->worker_thread; + + /* For driver worker sets, we only start a thread for the first worker set. */ + STARPU_PTHREAD_CREATE_ON( + starpu_driver_info[workerarg->arch].name_upper, + worker_thread, + NULL, + starpu_driver_info[workerarg->arch].run_worker, + workerarg, + _starpu_simgrid_get_host_by_worker(workerarg)); + } + +#ifdef STARPU_USE_FXT + /* In tracing mode, make sure the thread is really started + * before starting another one, to make sure they appear in + * order in the trace. + */ + if (fut_active && workerarg->run_by_starpu) + { + STARPU_PTHREAD_MUTEX_LOCK(&workerarg->mutex); + while (!workerarg->worker_is_running) + STARPU_PTHREAD_COND_WAIT(&workerarg->started_cond, &workerarg->mutex); + STARPU_PTHREAD_MUTEX_UNLOCK(&workerarg->mutex); + } +#endif + } + + for (worker = 0; worker < nworkers; worker++) + { + struct _starpu_worker *workerarg = &pconfig->workers[worker]; + + _STARPU_DEBUG("waiting for worker %u initialization\n", worker); + if (!workerarg->run_by_starpu) + continue; + + struct _starpu_worker_set *worker_set = workerarg->set; + + if (worker_set && worker_set->wait_for_set_initialization == 1) + { + STARPU_PTHREAD_MUTEX_LOCK(&worker_set->mutex); + while (!worker_set->set_is_initialized) + STARPU_PTHREAD_COND_WAIT(&worker_set->ready_cond, + &worker_set->mutex); + STARPU_PTHREAD_MUTEX_UNLOCK(&worker_set->mutex); + worker_set->started = 1; + worker_set->wait_for_set_initialization = 0; + } + else if (workerarg->wait_for_worker_initialization == 1) + { + STARPU_PTHREAD_MUTEX_LOCK(&workerarg->mutex); + while (!workerarg->worker_is_initialized) + STARPU_PTHREAD_COND_WAIT(&workerarg->ready_cond, &workerarg->mutex); + STARPU_PTHREAD_MUTEX_UNLOCK(&workerarg->mutex); + workerarg->wait_for_worker_initialization = 0; + } + } + + _STARPU_DEBUG("finished launching drivers\n"); +} + +void starpu_worker_wait_for_initialisation() +{ + unsigned nworkers = starpu_worker_get_count(); + unsigned workerid; + + for (workerid = 0; workerid < nworkers; workerid++) + { + struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); + + _STARPU_DEBUG("waiting for worker %u initialization\n", workerid); + if (!worker->run_by_starpu) + break; + + struct _starpu_worker_set *worker_set = worker->set; + + if (worker_set) + { + STARPU_PTHREAD_MUTEX_LOCK(&worker_set->mutex); + while (!worker_set->set_is_initialized) + STARPU_PTHREAD_COND_WAIT(&worker_set->ready_cond, + &worker_set->mutex); + STARPU_PTHREAD_MUTEX_UNLOCK(&worker_set->mutex); + } + else + { + STARPU_PTHREAD_MUTEX_LOCK(&worker->mutex); + while (!worker->worker_is_initialized) + STARPU_PTHREAD_COND_WAIT(&worker->ready_cond, &worker->mutex); + STARPU_PTHREAD_MUTEX_UNLOCK(&worker->mutex); + } + } +} + +/* Initialize the starpu_conf with default values */ +int starpu_conf_init(struct starpu_conf *conf) +{ + if (!conf) + return -EINVAL; + + memset(conf, 0, sizeof(*conf)); + conf->magic = 42; + conf->will_use_mpi = 0; + conf->sched_policy_name = starpu_getenv("STARPU_SCHED"); + conf->sched_policy = NULL; + conf->global_sched_ctx_min_priority = starpu_getenv_number("STARPU_MIN_PRIO"); + conf->global_sched_ctx_max_priority = starpu_getenv_number("STARPU_MAX_PRIO"); + conf->catch_signals = starpu_getenv_number_default("STARPU_CATCH_SIGNALS", 1); + + /* Note that starpu_getenv_number returns -1 in case the variable is + * not defined */ + /* Backward compatibility: check the value of STARPU_NCPUS if + * STARPU_NCPU is not set. */ + conf->ncpus = starpu_getenv_number("STARPU_NCPU"); + if (conf->ncpus == -1) + conf->ncpus = starpu_getenv_number("STARPU_NCPUS"); + conf->reserve_ncpus = starpu_getenv_number("STARPU_RESERVE_NCPU"); + conf->ncuda = starpu_getenv_number("STARPU_NCUDA"); + conf->nhip = starpu_getenv_number("STARPU_NHIP"); + conf->nopencl = starpu_getenv_number("STARPU_NOPENCL"); + conf->nmax_fpga = starpu_getenv_number("STARPU_NMAX_FPGA"); + conf->nmpi_ms = starpu_getenv_number("STARPU_NMPI_MS"); + conf->ntcpip_ms = starpu_getenv_number("STARPU_NTCPIP_MS"); + conf->calibrate = starpu_getenv_number("STARPU_CALIBRATE"); + conf->bus_calibrate = starpu_getenv_number("STARPU_BUS_CALIBRATE"); + + if (conf->calibrate == -1) + conf->calibrate = 0; + + if (conf->bus_calibrate == -1) + conf->bus_calibrate = 0; + + conf->use_explicit_workers_bindid = 0; /* TODO */ + conf->use_explicit_workers_cuda_gpuid = 0; /* TODO */ + conf->use_explicit_workers_hip_gpuid = 0; /* TODO */ + conf->use_explicit_workers_opencl_gpuid = 0; /* TODO */ + conf->use_explicit_workers_max_fpga_deviceid = 0; /* TODO */ + conf->use_explicit_workers_mpi_ms_deviceid = 0; /* TODO */ + + conf->single_combined_worker = starpu_getenv_number("STARPU_SINGLE_COMBINED_WORKER"); + if (conf->single_combined_worker == -1) + conf->single_combined_worker = 0; + +#if defined(STARPU_DATA_LOCALITY_ENFORCE) + conf->data_locality_enforce = 1; +#else + conf->data_locality_enforce = starpu_getenv_number("STARPU_DATA_LOCALITY_ENFORCE"); + if (conf->data_locality_enforce == -1) + conf->data_locality_enforce = 0; +#endif + +#if defined(STARPU_DISABLE_ASYNCHRONOUS_COPY) + conf->disable_asynchronous_copy = 1; +#else + conf->disable_asynchronous_copy = starpu_getenv_number("STARPU_DISABLE_ASYNCHRONOUS_COPY"); + if (conf->disable_asynchronous_copy == -1) + conf->disable_asynchronous_copy = 0; +#endif + +#if defined(STARPU_DISABLE_ASYNCHRONOUS_CUDA_COPY) + conf->disable_asynchronous_cuda_copy = 1; +#else + conf->disable_asynchronous_cuda_copy = starpu_getenv_number("STARPU_DISABLE_ASYNCHRONOUS_CUDA_COPY"); + if (conf->disable_asynchronous_cuda_copy == -1) + conf->disable_asynchronous_cuda_copy = 0; +#endif + + +#if defined(STARPU_DISABLE_ASYNCHRONOUS_HIP_COPY) + conf->disable_asynchronous_hip_copy = 1; +#else + conf->disable_asynchronous_hip_copy = starpu_getenv_number("STARPU_DISABLE_ASYNCHRONOUS_HIP_COPY"); + if (conf->disable_asynchronous_hip_copy == -1) + conf->disable_asynchronous_hip_copy = 0; +#endif + +#if defined(STARPU_DISABLE_ASYNCHRONOUS_OPENCL_COPY) + conf->disable_asynchronous_opencl_copy = 1; +#else + conf->disable_asynchronous_opencl_copy = starpu_getenv_number("STARPU_DISABLE_ASYNCHRONOUS_OPENCL_COPY"); + if (conf->disable_asynchronous_opencl_copy == -1) + conf->disable_asynchronous_opencl_copy = 0; +#endif + +#if defined(STARPU_DISABLE_ASYNCHRONOUS_MAX_FPGA_COPY) + conf->disable_asynchronous_max_fpga_copy = 1; +#else + conf->disable_asynchronous_max_fpga_copy = starpu_getenv_number("STARPU_DISABLE_ASYNCHRONOUS_MAX_FPGA_COPY"); + if (conf->disable_asynchronous_max_fpga_copy == -1) + conf->disable_asynchronous_max_fpga_copy = 0; +#endif + +#if defined(STARPU_DISABLE_ASYNCHRONOUS_MPI_MS_COPY) + conf->disable_asynchronous_mpi_ms_copy = 1; +#else + conf->disable_asynchronous_mpi_ms_copy = starpu_getenv_number("STARPU_DISABLE_ASYNCHRONOUS_MPI_MS_COPY"); + if(conf->disable_asynchronous_mpi_ms_copy == -1) + conf->disable_asynchronous_mpi_ms_copy = 0; +#endif + +#if defined(STARPU_DISABLE_ASYNCHRONOUS_TCPIP_MS_COPY) + conf->disable_asynchronous_tcpip_ms_copy = 1; +#else + conf->disable_asynchronous_tcpip_ms_copy = starpu_getenv_number("STARPU_DISABLE_ASYNCHRONOUS_TCPIP_MS_COPY"); + if(conf->disable_asynchronous_tcpip_ms_copy == -1) + conf->disable_asynchronous_tcpip_ms_copy = 0; +#endif + + conf->enable_map = starpu_getenv_number("STARPU_ENABLE_MAP"); + if (conf->enable_map == -1) + conf->enable_map = 0; + + /* 64MiB by default */ + conf->trace_buffer_size = ((uint64_t) starpu_getenv_number_default("STARPU_TRACE_BUFFER_SIZE", 64)) << 20; + + conf->driver_spinning_backoff_min = (unsigned) starpu_getenv_number_default("STARPU_BACKOFF_MIN", 1); + conf->driver_spinning_backoff_max = (unsigned) starpu_getenv_number_default("STARPU_BACKOFF_MAX", 32); + + /* Do not start performance counter collection by default */ + conf->start_perf_counter_collection = 0; + + conf->cuda_only_fast_alloc_other_memnodes = starpu_getenv_number_default("STARPU_CUDA_ONLY_FAST_ALLOC_OTHER_MEMNODES", 0); + return 0; +} + +int starpu_conf_noworker(struct starpu_conf *conf) +{ + conf->ncpus = 0; + conf->ncuda = 0; + conf->nhip = 0; + conf->nopencl = 0; + conf->nmax_fpga = 0; + conf->nmpi_ms = 0; + conf->ntcpip_ms = 0; + return 0; +} + +static void _starpu_conf_set_value_against_environment(char *name, int *value, int precedence_over_env) +{ + if (precedence_over_env == 0) + { + int number; + number = starpu_getenv_number(name); + if (number != -1) + { + *value = number; + } + } +} + +void _starpu_conf_check_environment(struct starpu_conf *conf) +{ + char *sched = starpu_getenv("STARPU_SCHED"); + if (sched) + { + conf->sched_policy_name = sched; + } + + _starpu_conf_set_value_against_environment("STARPU_NCPUS", &conf->ncpus, conf->precedence_over_environment_variables); + _starpu_conf_set_value_against_environment("STARPU_NCPU", &conf->ncpus, conf->precedence_over_environment_variables); + _starpu_conf_set_value_against_environment("STARPU_RESERVE_NCPU", &conf->reserve_ncpus, conf->precedence_over_environment_variables); + int main_thread_bind = starpu_getenv_number_default("STARPU_MAIN_THREAD_BIND", 0); + if (main_thread_bind) + { + /* Reserve a core for main */ + if (conf->reserve_ncpus == -1) + conf->reserve_ncpus = 1; + else + conf->reserve_ncpus++; + } + _starpu_conf_set_value_against_environment("STARPU_NCUDA", &conf->ncuda, conf->precedence_over_environment_variables); + _starpu_conf_set_value_against_environment("STARPU_NHIP", &conf->nhip, conf->precedence_over_environment_variables); + _starpu_conf_set_value_against_environment("STARPU_NOPENCL", &conf->nopencl, conf->precedence_over_environment_variables); + _starpu_conf_set_value_against_environment("STARPU_NMAX_FPGA", &conf->nmax_fpga, conf->precedence_over_environment_variables); + _starpu_conf_set_value_against_environment("STARPU_NMPI_MS", &conf->nmpi_ms, conf->precedence_over_environment_variables); + _starpu_conf_set_value_against_environment("STARPU_NTCPIP_MS", &conf->ntcpip_ms, conf->precedence_over_environment_variables); + _starpu_conf_set_value_against_environment("STARPU_CALIBRATE", &conf->calibrate, conf->precedence_over_environment_variables); + _starpu_conf_set_value_against_environment("STARPU_BUS_CALIBRATE", &conf->bus_calibrate, conf->precedence_over_environment_variables); +#ifdef STARPU_SIMGRID + if (conf->calibrate == 2) + { + _STARPU_DISP("Warning: History will be cleared due to calibrate or STARPU_CALIBRATE being set to 2. This will prevent simgrid from having task simulation times!\n"); + } + if (conf->bus_calibrate) + { + _STARPU_DISP("Warning: Bus calibration will be cleared due to bus_calibrate or STARPU_BUS_CALIBRATE being set. This will prevent simgrid from having data transfer simulation times!\n"); + } +#endif + _starpu_conf_set_value_against_environment("STARPU_SINGLE_COMBINED_WORKER", &conf->single_combined_worker, conf->precedence_over_environment_variables); + + _starpu_conf_set_value_against_environment("STARPU_DISABLE_ASYNCHRONOUS_COPY", &conf->disable_asynchronous_copy, conf->precedence_over_environment_variables); + _starpu_conf_set_value_against_environment("STARPU_DISABLE_ASYNCHRONOUS_CUDA_COPY", &conf->disable_asynchronous_cuda_copy, conf->precedence_over_environment_variables); + _starpu_conf_set_value_against_environment("STARPU_DISABLE_ASYNCHRONOUS_HIP_COPY", &conf->disable_asynchronous_hip_copy, conf->precedence_over_environment_variables); + _starpu_conf_set_value_against_environment("STARPU_DISABLE_ASYNCHRONOUS_OPENCL_COPY", &conf->disable_asynchronous_opencl_copy, conf->precedence_over_environment_variables); + _starpu_conf_set_value_against_environment("STARPU_DISABLE_ASYNCHRONOUS_MAX_FPGA_COPY", &conf->disable_asynchronous_max_fpga_copy, conf->precedence_over_environment_variables); + _starpu_conf_set_value_against_environment("STARPU_DISABLE_ASYNCHRONOUS_MPI_MS_COPY", &conf->disable_asynchronous_mpi_ms_copy, conf->precedence_over_environment_variables); + _starpu_conf_set_value_against_environment("STARPU_DISABLE_ASYNCHRONOUS_TCPIP_MS_COPY", &conf->disable_asynchronous_tcpip_ms_copy, conf->precedence_over_environment_variables); + + _starpu_conf_set_value_against_environment("STARPU_ENABLE_MAP", &conf->enable_map, conf->precedence_over_environment_variables); + + asynchronous_copy_disabled[STARPU_CPU_RAM] = 0; + asynchronous_copy_disabled[STARPU_CUDA_RAM] = conf->disable_asynchronous_cuda_copy; + asynchronous_copy_disabled[STARPU_HIP_RAM] = conf->disable_asynchronous_hip_copy; + asynchronous_copy_disabled[STARPU_OPENCL_RAM] = conf->disable_asynchronous_opencl_copy; + asynchronous_copy_disabled[STARPU_MAX_FPGA_RAM] = conf->disable_asynchronous_max_fpga_copy; + asynchronous_copy_disabled[STARPU_DISK_RAM] = 0; + asynchronous_copy_disabled[STARPU_MPI_MS_RAM] = conf->disable_asynchronous_mpi_ms_copy; + asynchronous_copy_disabled[STARPU_TCPIP_MS_RAM] = conf->disable_asynchronous_tcpip_ms_copy; + + _starpu_conf_set_value_against_environment("STARPU_MIN_PRIO", &conf->global_sched_ctx_min_priority, conf->precedence_over_environment_variables); + _starpu_conf_set_value_against_environment("STARPU_MAX_PRIO", &conf->global_sched_ctx_max_priority, conf->precedence_over_environment_variables); + _starpu_conf_set_value_against_environment("STARPU_CATCH_SIGNALS", &conf->catch_signals, conf->precedence_over_environment_variables); +} + +struct starpu_tree* starpu_workers_get_tree(void) +{ + return _starpu_config.topology.tree; +} + +#ifdef STARPU_HAVE_HWLOC +#if HWLOC_API_VERSION >= 0x20000 +#define NORMAL_CHILD(obj) 1 +#else +#define NORMAL_CHILD(obj) ((obj)->type < HWLOC_OBJ_BRIDGE) +#endif + +static void _fill_tree(struct starpu_tree *tree, hwloc_obj_t curr_obj, unsigned depth, hwloc_topology_t topology, struct starpu_tree *father) +{ + unsigned i, j; + unsigned arity; +#if HWLOC_API_VERSION >= 0x20000 + arity = curr_obj->arity; +#else + arity = 0; + for(i = 0; i < curr_obj->arity; i++) + { + if (!NORMAL_CHILD(curr_obj->children[i])) + /* I/O stuff, stop caring */ + break; + arity++; + } +#endif + + if (arity == 1) + { + /* Nothing interestin here, skip level */ + _fill_tree(tree, curr_obj->children[0], depth+1, topology, father); + return; + } + + starpu_tree_insert(tree, curr_obj->logical_index, depth, curr_obj->type == HWLOC_OBJ_PU, arity, father); + starpu_tree_prepare_children(arity, tree); + j = 0; + for(i = 0; i < arity; i++) + { + hwloc_obj_t child = curr_obj->children[i]; + if (!NORMAL_CHILD(child)) + /* I/O stuff, stop caring (shouldn't happen, though) */ + break; +#if 0 + char string[128]; + hwloc_obj_snprintf(string, sizeof(string), topology, child, "#", 0); + printf("%*s%s %d is_pu %d \n", 0, "", string, child->logical_index, child->type == HWLOC_OBJ_PU); +#endif + _fill_tree(&tree->nodes[j], child, depth+1, topology, tree); + j++; + } +} +#endif + +static void _starpu_build_tree(void) +{ +#ifdef STARPU_HAVE_HWLOC + struct starpu_tree *tree; + _STARPU_MALLOC(tree, sizeof(struct starpu_tree)); + _starpu_config.topology.tree = tree; + + hwloc_obj_t root = hwloc_get_root_obj(_starpu_config.topology.hwtopology); + +#if 0 + char string[128]; + hwloc_obj_snprintf(string, sizeof(string), topology, root, "#", 0); + printf("%*s%s %d is_pu = %d \n", 0, "", string, root->logical_index, root->type == HWLOC_OBJ_PU); +#endif + + /* level, is_pu, is in the tree (it will be true only after add) */ + _fill_tree(tree, root, 0, _starpu_config.topology.hwtopology, NULL); +#endif +} + +typedef void (*hook_func_t)(void); +static hook_func_t _hook_funcs[10]; +static int _hook_func_nb=0; + +void _starpu_crash_add_hook(void (*hook_func)(void)) +{ + STARPU_ASSERT_MSG(_hook_func_nb < 10, "The number of crash funcs has exceeded the limit\n"); + _hook_funcs[_hook_func_nb] = hook_func; + _hook_func_nb++; +} + +void _starpu_crash_call_hooks() +{ + int i; + + /*_STARPU_DISP("Time: %f\n", starpu_timing_now());*/ + for(i=0 ; i<_hook_func_nb; i++) + _hook_funcs[i](); +} + +static starpu_pthread_mutex_t sig_handlers_mutex = STARPU_PTHREAD_MUTEX_INITIALIZER; +static void (*act_sigint)(int); +static void (*act_sigsegv)(int); +static void (*act_sigabrt)(int); +#ifdef SIGTRAP +static void (*act_sigtrap)(int); +#endif + +void _starpu_handler(int sig) +{ +#ifdef STARPU_VERBOSE + _STARPU_MSG("Catching signal '%d'\n", sig); +#endif +#ifdef STARPU_USE_FXT + _starpu_fxt_dump_file(); +#endif + if (sig == SIGINT) + { + void (*sig_act)(int) = act_sigint; + if (sig_act == NULL) + sig_act = SIG_DFL; + signal(SIGINT, sig_act); + } + if (sig == SIGSEGV) + { + void (*sig_act)(int) = act_sigsegv; + if (sig_act == NULL) + sig_act = SIG_DFL; + signal(SIGSEGV, sig_act); + } + if (sig == SIGABRT) + { + void (*sig_act)(int) = act_sigabrt; + if (sig_act == NULL) + sig_act = SIG_DFL; + signal(SIGABRT, sig_act); + } +#ifdef SIGTRAP + if (sig == SIGTRAP) + { + void (*sig_act)(int) = act_sigtrap; + if (sig_act == NULL) + sig_act = SIG_DFL; + signal(SIGTRAP, sig_act); + } +#endif + + _starpu_crash_call_hooks(); +#ifdef STARPU_VERBOSE + _STARPU_MSG("Rearming signal '%d'\n", sig); +#endif + raise(sig); +} + +void _starpu_catch_signals(void) +{ + if (_starpu_config.conf.catch_signals == 1) + { + static void (*old_sig_act)(int); + old_sig_act = signal(SIGINT, _starpu_handler); + if (old_sig_act != _starpu_handler) + act_sigint = old_sig_act; + + old_sig_act = signal(SIGSEGV, _starpu_handler); + if (old_sig_act != _starpu_handler) + act_sigsegv = old_sig_act; + + old_sig_act = signal(SIGABRT, _starpu_handler); + if (old_sig_act != _starpu_handler) + act_sigabrt = old_sig_act; + +#ifdef SIGTRAP + old_sig_act = signal(SIGTRAP, _starpu_handler); + if (old_sig_act != _starpu_handler) + act_sigtrap = old_sig_act; +#endif + } + else + { + if (act_sigint != NULL) + { + signal(SIGINT, act_sigint); + act_sigint = NULL; + } + + if (act_sigsegv != NULL) + { + signal(SIGSEGV, act_sigsegv); + act_sigsegv = NULL; + } + + if (act_sigabrt != NULL) + { + signal(SIGABRT, act_sigsegv); + act_sigabrt = NULL; + } + +#ifdef SIGTRAP + if (act_sigtrap != NULL) + { + signal(SIGTRAP, act_sigtrap); + act_sigtrap = NULL; + } +#endif + } +} + +void _starpu_set_catch_signals(int do_catch_signal) +{ + STARPU_PTHREAD_MUTEX_LOCK(&sig_handlers_mutex); + _starpu_config.conf.catch_signals = do_catch_signal; + _starpu_catch_signals(); + STARPU_PTHREAD_MUTEX_UNLOCK(&sig_handlers_mutex); +} + +int _starpu_get_catch_signals(void) +{ + return _starpu_config.conf.catch_signals; +} + +void starpu_drivers_preinit(void) +{ + _starpu_cpu_preinit(); + _starpu_cuda_preinit(); + _starpu_hip_preinit(); + _starpu_opencl_preinit(); + _starpu_max_fpga_preinit(); + _starpu_mpi_ms_preinit(); + _starpu_tcpip_ms_preinit(); + _starpu_disk_preinit(); +} + +int starpu_init(struct starpu_conf *user_conf) +{ + return starpu_initialize(user_conf, NULL, NULL); +} + +int starpu_initialize(struct starpu_conf *user_conf, int *argc, char ***argv) +{ + int is_a_sink = 0; /* Always defined. If the MP infrastructure is not + * used, we cannot be a sink. */ + unsigned worker; + +#if !defined(STARPU_SIMGRID) && !defined(STARPU_USE_MP) + (void)argc; + (void)argv; +#endif + + STARPU_HG_DISABLE_CHECKING(_starpu_worker_parallel_blocks); +#ifdef STARPU_SIMGRID + /* This initializes the simgrid thread library, thus needs to be early */ + _starpu_simgrid_init_early(argc, argv); +#endif + + STARPU_PTHREAD_MUTEX_LOCK(&init_mutex); + while (initialized == CHANGING) + /* Wait for the other one changing it */ + STARPU_PTHREAD_COND_WAIT(&init_cond, &init_mutex); + init_count++; + if (initialized == INITIALIZED) + { + /* He initialized it, don't do it again, and let the others get the mutex */ + STARPU_PTHREAD_MUTEX_UNLOCK(&init_mutex); + return 0; + } + /* initialized == UNINITIALIZED */ + initialized = CHANGING; + STARPU_PTHREAD_MUTEX_UNLOCK(&init_mutex); + + /* This initializes _starpu_silent, thus needs to be early */ + _starpu_util_init(); + + int rc = _starpu_prof_tool_try_load(); + (void) rc; /* unused for now */ + +#ifdef STARPU_PROF_TOOL + struct starpu_prof_tool_info pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_init_begin, 0, 0, starpu_prof_tool_driver_cpu, -1, NULL); + starpu_prof_tool_callbacks.starpu_prof_tool_event_init(&pi, NULL, NULL); + starpu_prof_tool_callbacks.starpu_prof_tool_event_init_begin(&pi, NULL, NULL); +#endif + +#ifdef STARPU_USE_MP + _starpu_set_argc_argv(argc, argv); + +#ifdef STARPU_USE_MPI_MASTER_SLAVE + if (_starpu_mpi_common_mp_init() == -ENODEV) + { + STARPU_PTHREAD_MUTEX_LOCK(&init_mutex); + init_count--; + initialized = UNINITIALIZED; + /* Let somebody else try to do it */ + STARPU_PTHREAD_COND_SIGNAL(&init_cond); + STARPU_PTHREAD_MUTEX_UNLOCK(&init_mutex); + return -ENODEV; + } +#endif + +#ifdef STARPU_USE_TCPIP_MASTER_SLAVE + if (_starpu_tcpip_common_mp_init() == -ENODEV) + { + STARPU_PTHREAD_MUTEX_LOCK(&init_mutex); + init_count--; + initialized = UNINITIALIZED; + /* Let somebody else try to do it */ + STARPU_PTHREAD_COND_SIGNAL(&init_cond); + STARPU_PTHREAD_MUTEX_UNLOCK(&init_mutex); + return -ENODEV; + } +#endif + + /* If StarPU was configured to use MP sinks, we have to control the + * kind on node we are running on : host or sink ? */ + if (starpu_getenv("STARPU_SINK")) + is_a_sink = 1; +#endif /* STARPU_USE_MP */ + + int ret; + +#ifdef STARPU_OPENMP + _starpu_omp_dummy_init(); +#endif + +#ifdef STARPU_SIMGRID + /* Warn when the lots of stacks malloc()-ated by simgrid for transfer + * processes will take a long time to get initialized */ + char *perturb = starpu_getenv("MALLOC_PERTURB_"); + if (perturb && perturb[0] && atoi(perturb) != 0) + _STARPU_DISP("Warning: MALLOC_PERTURB_ is set to non-zero, this makes simgrid run very slow\n"); +#else +#ifdef __GNUC__ +#ifndef __OPTIMIZE__ + _STARPU_DISP("Warning: StarPU was configured with --enable-debug (-O0), and is thus not optimized\n"); +#endif +#endif +#ifdef STARPU_SPINLOCK_CHECK + _STARPU_DISP("Warning: StarPU was configured with --enable-spinlock-check, which slows down a bit\n"); +#endif +#if 0 +#ifndef STARPU_NO_ASSERT + _STARPU_DISP("Warning: StarPU was configured without --enable-fast\n"); +#endif +#endif +#ifdef STARPU_MEMORY_STATS + _STARPU_DISP("Warning: StarPU was configured with --enable-memory-stats, which slows down a bit\n"); +#endif +#ifdef STARPU_VERBOSE + _STARPU_DISP("Warning: StarPU was configured with --enable-verbose, which slows down a bit\n"); +#endif +#ifdef STARPU_USE_FXT + if (starpu_fxt_is_enabled()) + _STARPU_DISP("Warning: FxT is enabled, which slows down a bit, limits scalability and makes worker initialization sequential\n"); +#else + if (starpu_getenv_number("STARPU_FXT_TRACE") > 0) + _STARPU_DISP("Warning: FxT trace is requested but StarPU was configured without FxT support\n"); +#endif +#ifdef STARPU_FXT_LOCK_TRACES + _STARPU_DISP("Warning: StarPU was configured with --enable-fxt-lock, which slows down things a huge lot, and is really only meant for StarPU insides debugging. Did you really want to enable that?\n"); +#endif +#ifdef STARPU_PERF_DEBUG + _STARPU_DISP("Warning: StarPU was configured with --enable-perf-debug, which slows down a bit\n"); +#endif +#ifdef STARPU_MODEL_DEBUG + _STARPU_DISP("Warning: StarPU was configured with --enable-model-debug, which slows down a bit\n"); +#endif +#ifdef __linux__ + { + struct utsname buf; + if (uname(&buf) == 0 + && (!strncmp(buf.release, "4.7.", 4) + || !strncmp(buf.release, "4.8.", 4))) + _STARPU_DISP("Warning: This system is running a 4.7 or 4.8 kernel. These have a severe scheduling performance regression issue, please upgrade to at least 4.9.\n"); + } +#endif +#endif + + if (starpu_getenv("STARPU_ENABLE_STATS")) + { + _STARPU_DISP("Warning: STARPU_ENABLE_STATS is enabled, which slows down a bit\n"); + } + +#ifndef STARPU_SIMGRID + if (starpu_getenv_number_default("STARPU_SIMGRID", 0)) + { + _STARPU_DISP("Simulation mode requested, but this libstarpu was built without simgrid support, please recompile\n"); + STARPU_PTHREAD_MUTEX_LOCK(&init_mutex); + init_count--; + initialized = UNINITIALIZED; + /* Let somebody else try to do it */ + STARPU_PTHREAD_COND_SIGNAL(&init_cond); + STARPU_PTHREAD_MUTEX_UNLOCK(&init_mutex); + return -EINVAL; + } +#endif + +#if defined(_WIN32) && !defined(__CYGWIN__) + WSADATA wsadata; + WSAStartup(MAKEWORD(1,0), &wsadata); +#endif + + STARPU_AYU_PREINIT(); + /* store the pointer to the user explicit configuration during the + * initialization */ + if (user_conf == NULL) + starpu_conf_init(&_starpu_config.conf); + else + { + if (user_conf->magic != 42) + { + _STARPU_DISP("starpu_conf structure needs to be initialized with starpu_conf_init\n"); + STARPU_PTHREAD_MUTEX_LOCK(&init_mutex); + init_count--; + initialized = UNINITIALIZED; + /* Let somebody else try to do it */ + STARPU_PTHREAD_COND_SIGNAL(&init_cond); + STARPU_PTHREAD_MUTEX_UNLOCK(&init_mutex); + return -EINVAL; + } + _starpu_config.conf = *user_conf; + } + _starpu_conf_check_environment(&_starpu_config.conf); + + if (is_a_sink && _starpu_config.conf.nmpi_ms == 0 && + _starpu_config.conf.ntcpip_ms == 0) + { + /* MS was explicitly disabled, abort sinks and leave source alone */ + +#ifdef STARPU_USE_MPI_MASTER_SLAVE + if (_starpu_mpi_common_is_mp_initialized()) + _starpu_mpi_common_mp_deinit(); +#endif +#ifdef STARPU_USE_TCPIP_MASTER_SLAVE + if (_starpu_tcpip_common_is_mp_initialized()) + _starpu_tcpip_common_mp_deinit(); +#endif + + STARPU_PTHREAD_MUTEX_LOCK(&init_mutex); + init_count--; + initialized = UNINITIALIZED; + /* Let somebody else try to do it */ + STARPU_PTHREAD_COND_SIGNAL(&init_cond); + STARPU_PTHREAD_MUTEX_UNLOCK(&init_mutex); + return -ENODEV; + } + + /* Make a copy of arrays */ + if (_starpu_config.conf.sched_policy_name) + _starpu_config.conf.sched_policy_name = strdup(_starpu_config.conf.sched_policy_name); + if (_starpu_config.conf.n_cuda_opengl_interoperability) + { + size_t size = _starpu_config.conf.n_cuda_opengl_interoperability * sizeof(*_starpu_config.conf.cuda_opengl_interoperability); + unsigned *copy; + _STARPU_MALLOC(copy, size); + memcpy(copy, _starpu_config.conf.cuda_opengl_interoperability, size); + _starpu_config.conf.cuda_opengl_interoperability = copy; + } + if (_starpu_config.conf.n_not_launched_drivers) + { + size_t size = _starpu_config.conf.n_not_launched_drivers * sizeof(*_starpu_config.conf.not_launched_drivers); + struct starpu_driver *copy; + _STARPU_MALLOC(copy, size); + memcpy(copy, _starpu_config.conf.not_launched_drivers, size); + _starpu_config.conf.not_launched_drivers = copy; + } + + _hook_func_nb = 0; + + /* Let drivers register themselves */ + starpu_drivers_preinit(); + + _starpu_sched_init(); + _starpu_job_init(); + _starpu_graph_init(); + + _starpu_init_all_sched_ctxs(&_starpu_config); + _starpu_init_progression_hooks(); + _starpu_init_idle_hooks(); + + _starpu_init_tags(); + + _starpu_init_perfmodel(); + +#ifdef STARPU_USE_FXT + _starpu_fxt_init_profiling(_starpu_config.conf.trace_buffer_size); +#endif + + _starpu_open_debug_logfile(); + + _starpu_data_interface_init(); + + _starpu_timing_init(); + + _starpu_load_bus_performance_files(); + + /* Note: nothing before here should be allocating anything, in case we + * actually return ENODEV here */ + + /* Depending on whether we are a MP sink or not, we must build the + * topology with MP nodes or not. */ + ret = _starpu_build_topology(&_starpu_config, is_a_sink); + /* sink doesn't exit even if no worker discovered */ + if (ret && !is_a_sink) + { + starpu_perfmodel_free_sampling(); + STARPU_PTHREAD_MUTEX_LOCK(&init_mutex); + init_count--; + + _starpu_destroy_machine_config(&_starpu_config, is_a_sink); + + free((char*) _starpu_config.conf.sched_policy_name); + if (_starpu_config.conf.n_cuda_opengl_interoperability) + free(_starpu_config.conf.cuda_opengl_interoperability); + if (_starpu_config.conf.n_not_launched_drivers) + free(_starpu_config.conf.not_launched_drivers); + +#ifdef STARPU_USE_MPI_MASTER_SLAVE + if (_starpu_mpi_common_is_mp_initialized()) + _starpu_mpi_common_mp_deinit(); +#endif +#ifdef STARPU_USE_TCPIP_MASTER_SLAVE + if (_starpu_tcpip_common_is_mp_initialized()) + _starpu_tcpip_common_mp_deinit(); +#endif + + initialized = UNINITIALIZED; + /* Let somebody else try to do it */ + STARPU_PTHREAD_COND_SIGNAL(&init_cond); + STARPU_PTHREAD_MUTEX_UNLOCK(&init_mutex); + +#ifdef STARPU_USE_FXT + _starpu_stop_fxt_profiling(); +#endif + return ret; + } + + _starpu_profiling_init(); + + _starpu_task_init(); + + for (worker = 0; worker < _starpu_config.topology.nworkers; worker++) + _starpu_worker_init(&_starpu_config.workers[worker], &_starpu_config); + +//FIXME: find out if the variable STARPU_CHECK_ENTIRE_PLATFORM is really needed, for now, just set 1 as a default value + check_entire_platform = 1;//starpu_getenv_number("STARPU_CHECK_ENTIRE_PLATFORM"); + + _starpu_config.disable_kernels = starpu_getenv_number("STARPU_DISABLE_KERNELS"); + STARPU_PTHREAD_KEY_CREATE(&_starpu_worker_key, NULL); + STARPU_PTHREAD_KEY_CREATE(&_starpu_worker_set_key, NULL); + _starpu_keys_initialized = 1; + STARPU_WMB(); + + if (!is_a_sink) + { + _starpu_build_tree(); + + struct starpu_sched_policy *selected_policy = _starpu_select_sched_policy(&_starpu_config, _starpu_config.conf.sched_policy_name); + _starpu_create_sched_ctx(selected_policy, NULL, -1, 1, "init", (_starpu_config.conf.global_sched_ctx_min_priority != -1), _starpu_config.conf.global_sched_ctx_min_priority, (_starpu_config.conf.global_sched_ctx_max_priority != -1), _starpu_config.conf.global_sched_ctx_max_priority, 1, _starpu_config.conf.sched_policy_callback, NULL, 0, NULL, 0); + } + + _starpu_initialize_registered_performance_models(); + _starpu_perf_counter_init(&_starpu_config); + _starpu_perf_knob_init(); + +#if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID) + _starpu_cuda_init(); +#endif + +#if defined(STARPU_USE_HIP) + _starpu_hip_init(); +#endif + +#ifdef STARPU_SIMGRID + _starpu_simgrid_init(); +#endif + if (!is_a_sink) + { + /* Launch "basic" workers (ie. non-combined workers) */ + _starpu_launch_drivers(&_starpu_config); + /* Allocate swap, if any */ + _starpu_swap_init(); + } + + _starpu_watchdog_init(); + + _starpu_profiling_start(); + + STARPU_PTHREAD_MUTEX_LOCK(&init_mutex); + initialized = INITIALIZED; + /* Tell everybody that we initialized */ + STARPU_PTHREAD_COND_BROADCAST(&init_cond); + STARPU_PTHREAD_MUTEX_UNLOCK(&init_mutex); + + _STARPU_DEBUG("Initialisation finished\n"); + +#ifdef STARPU_USE_MP + /* Finally, if we are a MP sink, we never leave this function. Else, + * we enter an infinite event loop which listen for MP commands from + * the source. */ + if (is_a_sink) + { + _starpu_sink_common_worker(); + + /* We should normally never leave the loop as we don't want to + * really initialize STARPU */ + STARPU_ASSERT(0); + } +#endif + + _starpu_catch_signals(); + + /* if MPI is enabled, binding display will be done later, after MPI initialization */ + if (!_starpu_config.conf.will_use_mpi && starpu_getenv_number_default("STARPU_DISPLAY_BINDINGS", 0)) + { + fprintf(stdout, "== Binding ==\n"); + starpu_display_bindings(); + fprintf(stdout, "== End of binding ==\n"); + fflush(stdout); + } + +#ifdef STARPU_PROF_TOOL + pi = _starpu_prof_tool_get_info_init(starpu_prof_tool_event_init_end, 0, starpu_prof_tool_driver_cpu, &(_starpu_config.conf)); + pi.conf = &_starpu_config.conf; + starpu_prof_tool_callbacks.starpu_prof_tool_event_init_end(&pi, NULL, NULL); +#endif + + return 0; +} + +/* + * Handle runtime termination + */ + +static void _starpu_terminate_workers(struct _starpu_machine_config *pconfig) +{ + int status = 0; + unsigned workerid; + unsigned n; + + starpu_wake_all_blocked_workers(); + + for (workerid = 0; workerid < pconfig->topology.nworkers; workerid++) + { + _STARPU_DEBUG("wait for worker %u\n", workerid); + + struct _starpu_worker_set *set = pconfig->workers[workerid].set; + struct _starpu_worker *worker = &pconfig->workers[workerid]; + + /* in case StarPU termination code is called from a callback, + * we have to check if starpu_pthread_self() is the worker itself */ + if (set && set->nworkers > 0) + { + if (set->started) + { + if (!starpu_pthread_equal(starpu_pthread_self(), set->worker_thread)) + status = starpu_pthread_join(set->worker_thread, NULL); + if (status) + { +#ifdef STARPU_VERBOSE + _STARPU_DEBUG("starpu_pthread_join -> %d\n", status); +#endif + } + set->started = 0; + } + } + else + { + if (!worker->run_by_starpu) + goto out; + + if (!starpu_pthread_equal(starpu_pthread_self(), worker->worker_thread)) + status = starpu_pthread_join(worker->worker_thread, NULL); + if (status) + { +#ifdef STARPU_VERBOSE + _STARPU_DEBUG("starpu_pthread_join -> %d\n", status); +#endif + } + } + +out: + STARPU_ASSERT(starpu_task_prio_list_empty(&worker->local_tasks)); + for (n = 0; n < worker->local_ordered_tasks_size; n++) + STARPU_ASSERT(worker->local_ordered_tasks[n] == NULL); + _starpu_sched_ctx_list_delete(&worker->sched_ctx_list); + free(worker->local_ordered_tasks); + STARPU_ASSERT(_starpu_ctx_change_list_empty(&worker->ctx_change_list)); + } + +#ifdef STARPU_PROF_TOOL + struct starpu_prof_tool_info pi = _starpu_prof_tool_get_info_init(starpu_prof_tool_event_terminate, 0, starpu_prof_tool_driver_cpu, NULL); + starpu_prof_tool_callbacks.starpu_prof_tool_event_terminate(&pi, NULL, NULL); +#endif +} + +/* Condition variable and mutex used to pause/resume. */ +static starpu_pthread_cond_t pause_cond = STARPU_PTHREAD_COND_INITIALIZER; +static starpu_pthread_mutex_t pause_mutex = STARPU_PTHREAD_MUTEX_INITIALIZER; + +void _starpu_may_pause(void) +{ + /* pause_depth is just protected by a memory barrier */ + STARPU_RMB(); + + if (STARPU_UNLIKELY(_starpu_config.pause_depth > 0)) + { + STARPU_PTHREAD_MUTEX_LOCK(&pause_mutex); + if (_starpu_config.pause_depth > 0) + { + STARPU_PTHREAD_COND_WAIT(&pause_cond, &pause_mutex); + } + STARPU_PTHREAD_MUTEX_UNLOCK(&pause_mutex); + } +} + +void starpu_pause() +{ + STARPU_HG_DISABLE_CHECKING(_starpu_config.pause_depth); + _starpu_config.pause_depth += 1; + + starpu_fxt_trace_user_event_string("starpu_pause"); +} + +void starpu_resume() +{ + STARPU_PTHREAD_MUTEX_LOCK(&pause_mutex); + _starpu_config.pause_depth -= 1; + if (!_starpu_config.pause_depth) + { + STARPU_PTHREAD_COND_BROADCAST(&pause_cond); + } + STARPU_PTHREAD_MUTEX_UNLOCK(&pause_mutex); + + starpu_fxt_trace_user_event_string("starpu_resume"); +} + +int starpu_is_paused() +{ + STARPU_RMB(); + return _starpu_config.pause_depth > 0; +} + +unsigned _starpu_worker_can_block(unsigned memnode STARPU_ATTRIBUTE_UNUSED, struct _starpu_worker *worker STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_NON_BLOCKING_DRIVERS + return 0; +#else + /* do not block if a sched_ctx change operation is pending */ + if (worker->state_changing_ctx_notice) + return 0; + + unsigned can_block = 1; + + struct starpu_driver driver; + driver.type = worker->arch; + switch (driver.type) + { + case STARPU_CPU_WORKER: + driver.id.cpu_id = worker->devid; + break; + case STARPU_CUDA_WORKER: + driver.id.cuda_id = worker->devid; + break; + case STARPU_HIP_WORKER: + driver.id.hip_id = worker->devid; + break; +#ifdef STARPU_USE_OPENCL + case STARPU_OPENCL_WORKER: + starpu_opencl_get_device(worker->devid, &driver.id.opencl_id); + break; +#endif + default: + goto always_launch; + } + if (!_starpu_may_launch_driver(&_starpu_config.conf, &driver)) + return 0; + +always_launch: + +#ifndef STARPU_SIMGRID + if (!_starpu_check_that_no_data_request_exists(memnode)) + can_block = 0; +#endif + + if (!_starpu_machine_is_running()) + can_block = 0; + + if (!_starpu_execute_registered_progression_hooks()) + can_block = 0; + + return can_block; +#endif +} + +static void _starpu_kill_all_workers(struct _starpu_machine_config *pconfig) +{ + /* set the flag which will tell workers to stop */ + ANNOTATE_HAPPENS_AFTER(&_starpu_config.running); + pconfig->running = 0; + /* running is just protected by a memory barrier */ + ANNOTATE_HAPPENS_BEFORE(&_starpu_config.running); + STARPU_WMB(); + starpu_wake_all_blocked_workers(); +} + +void starpu_display_stats() +{ + starpu_profiling_bus_helper_display_summary(); + starpu_profiling_worker_helper_display_summary(); +} + +void starpu_shutdown(void) +{ + unsigned worker; + STARPU_PTHREAD_MUTEX_LOCK(&init_mutex); + init_count--; + STARPU_ASSERT_MSG(init_count >= 0, "Number of calls to starpu_shutdown() can not be higher than the number of calls to starpu_init()\n"); + if (init_count) + { + _STARPU_DEBUG("Still somebody needing StarPU, don't deinitialize\n"); + STARPU_PTHREAD_MUTEX_UNLOCK(&init_mutex); + return; + } + + /* We're last */ + initialized = CHANGING; + STARPU_PTHREAD_MUTEX_UNLOCK(&init_mutex); + + /* If the workers are frozen, no progress can be made. */ + STARPU_ASSERT_MSG(_starpu_config.pause_depth <= 0, "Did you forget to call starpu_resume before starpu_shutdown?"); + + starpu_task_wait_for_no_ready(); + + starpu_worker_wait_for_initialisation(); + + /* tell all workers to shutdown */ + _starpu_kill_all_workers(&_starpu_config); + + unsigned i; + unsigned nb_numa_nodes = starpu_memory_nodes_get_numa_count(); + for (i=0; i= 0 && type < STARPU_NARCH) + && (starpu_driver_info[type].name_upper != NULL); +} + +enum starpu_worker_archtype starpu_arch_mask_to_worker_archtype(unsigned mask) +{ + STARPU_ASSERT(mask && !(mask & (mask-1))); // ensures that only one bit of the mask is set + + enum starpu_worker_archtype worker_type = ffs(mask)-2; // ffs(mask) is the indice of the lesser bit + + STARPU_ASSERT(worker_type < STARPU_NARCH); // worker_type is positive and lesser than arch number + STARPU_ASSERT(starpu_worker_archtype_is_valid(worker_type)); // worker_type is a valid worker architecture + + return worker_type; +} + +#undef starpu_worker_get_count +unsigned starpu_worker_get_count(void) +{ + return _starpu_config.topology.nworkers; +} + +void starpu_worker_get_current_task_exp_end(unsigned workerid, struct timespec *date) +{ + struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); + *date = worker->cl_expend; +} + +unsigned starpu_worker_is_blocked_in_parallel(int workerid) +{ + if (!_starpu_worker_parallel_blocks) + return 0; + int relax_own_observation_state = 0; + struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); + STARPU_ASSERT(worker != NULL); + STARPU_PTHREAD_MUTEX_LOCK_SCHED(&worker->sched_mutex); + struct _starpu_worker *cur_worker = NULL; + int cur_workerid = starpu_worker_get_id(); + if (workerid != cur_workerid) + { + /* in order to observe the 'blocked' state of a worker from + * another worker, we must avoid race conditions between + * 'blocked' state changes and state observations. This is the + * purpose of this 'if' block. */ + cur_worker = cur_workerid >= 0 ? _starpu_get_worker_struct(cur_workerid) : NULL; + + relax_own_observation_state = (cur_worker != NULL) && (cur_worker->state_relax_refcnt == 0); + if (relax_own_observation_state && !worker->state_relax_refcnt) + { + /* moreover, when a worker (cur_worker != NULL) + * observes another worker, we need to take special + * care to avoid live locks, thus the observing worker + * must enter the relaxed state (if not relaxed + * already) before doing the observation in mutual + * exclusion */ + STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&worker->sched_mutex); + + STARPU_PTHREAD_MUTEX_LOCK_SCHED(&cur_worker->sched_mutex); + cur_worker->state_relax_refcnt = 1; + STARPU_PTHREAD_COND_BROADCAST(&cur_worker->sched_cond); + STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&cur_worker->sched_mutex); + + STARPU_PTHREAD_MUTEX_LOCK_SCHED(&worker->sched_mutex); + } + /* the observer waits for a safe window to observe the state, + * and also waits for any pending blocking state change + * requests to be processed, in order to not obtain an + * ephemeral information */ + while (!worker->state_relax_refcnt + || worker->state_block_in_parallel_req + || worker->state_unblock_in_parallel_req) + { + STARPU_PTHREAD_COND_WAIT(&worker->sched_cond, &worker->sched_mutex); + } + } + unsigned ret = _starpu_config.workers[workerid].state_blocked_in_parallel; + /* once a worker state has been observed, the worker is 'tainted' for the next one full sched_op, + * to avoid changing the observed worker state - on which the observer + * made a scheduling decision - after the fact. */ + worker->state_blocked_in_parallel_observed = 1; + STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&worker->sched_mutex); + if (relax_own_observation_state) + { + STARPU_PTHREAD_MUTEX_LOCK_SCHED(&cur_worker->sched_mutex); + cur_worker->state_relax_refcnt = 0; + STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&cur_worker->sched_mutex); + } + return ret; +} + +unsigned starpu_worker_is_slave_somewhere(int workerid) +{ + starpu_worker_lock(workerid); + unsigned ret = _starpu_config.workers[workerid].is_slave_somewhere; + starpu_worker_unlock(workerid); + return ret; +} + +int starpu_worker_get_count_by_type(enum starpu_worker_archtype type) +{ + unsigned n = 0; + + if (type != STARPU_ANY_WORKER) + { + if (type >= STARPU_NARCH) + return -EINVAL; + + unsigned i; + for (i = 0; i < _starpu_config.topology.ndevices[type]; i++) + n += _starpu_config.topology.nworker[type][i]; + return n; + } + + for (type = 0; type < STARPU_NARCH; type++) + n += starpu_worker_get_count_by_type(type); + return n; +} + +unsigned starpu_combined_worker_get_count(void) +{ + return _starpu_config.topology.ncombinedworkers; +} + +unsigned starpu_cpu_worker_get_count(void) +{ + return starpu_worker_get_count_by_type(STARPU_CPU_WORKER); +} + +unsigned starpu_cuda_worker_get_count(void) +{ + return starpu_worker_get_count_by_type(STARPU_CUDA_WORKER); +} + + +unsigned starpu_hip_worker_get_count(void) +{ + return starpu_worker_get_count_by_type(STARPU_HIP_WORKER); +} + +unsigned starpu_opencl_worker_get_count(void) +{ + return starpu_worker_get_count_by_type(STARPU_OPENCL_WORKER); +} + +int starpu_asynchronous_copy_disabled(void) +{ + return _starpu_config.conf.disable_asynchronous_copy; +} + +int starpu_asynchronous_cuda_copy_disabled(void) +{ + return _starpu_config.conf.disable_asynchronous_cuda_copy; +} + + +int starpu_asynchronous_hip_copy_disabled(void) +{ + return _starpu_config.conf.disable_asynchronous_hip_copy; +} + +int starpu_asynchronous_opencl_copy_disabled(void) +{ + return _starpu_config.conf.disable_asynchronous_opencl_copy; +} + +int starpu_asynchronous_max_fpga_copy_disabled(void) +{ + return _starpu_config.conf.disable_asynchronous_max_fpga_copy; +} + +int starpu_asynchronous_mpi_ms_copy_disabled(void) +{ + return _starpu_config.conf.disable_asynchronous_mpi_ms_copy; +} + +int starpu_asynchronous_tcpip_ms_copy_disabled(void) +{ + return _starpu_config.conf.disable_asynchronous_tcpip_ms_copy; +} + +/* Return whether memory mapping is disabled (0) or enabled (1) */ +int starpu_map_enabled(void) +{ + return _starpu_config.conf.enable_map; +} + +int starpu_asynchronous_copy_disabled_for(enum starpu_node_kind kind) +{ + return asynchronous_copy_disabled[kind]; +} + +unsigned starpu_mpi_ms_worker_get_count(void) +{ + return starpu_worker_get_count_by_type(STARPU_MPI_MS_WORKER); +} + +unsigned starpu_tcpip_ms_worker_get_count(void) +{ + return starpu_worker_get_count_by_type(STARPU_TCPIP_MS_WORKER); +} + +/* When analyzing performance, it is useful to see what is the processing unit + * that actually performed the task. This function returns the id of the + * processing unit actually executing it, therefore it makes no sense to use it + * within the callbacks of SPU functions for instance. If called by some thread + * that is not controlled by StarPU, starpu_worker_get_id returns -1. */ +#undef starpu_worker_get_id +int starpu_worker_get_id(void) +{ + struct _starpu_worker * worker; + + worker = _starpu_get_local_worker_key(); + if (worker) + { + return worker->workerid; + } + else + { + /* there is no worker associated to that thread, perhaps it is + * a thread from the application or this is some SPU worker */ + return -1; + } +} +#define starpu_worker_get_id _starpu_worker_get_id + +#undef _starpu_worker_get_id_check +unsigned _starpu_worker_get_id_check(const char *f, int l) +{ + (void) f; + (void) l; + int id = _starpu_worker_get_id(); + STARPU_ASSERT_MSG(id>=0, "%s:%d Cannot be called from outside a worker\n", f, l); + return id; +} + +int starpu_combined_worker_get_id(void) +{ + struct _starpu_worker *worker; + + worker = _starpu_get_local_worker_key(); + if (worker) + { + return worker->combined_workerid; + } + else + { + /* there is no worker associated to that thread, perhaps it is + * a thread from the application or this is some SPU worker */ + return -1; + } +} + +int starpu_combined_worker_get_size(void) +{ + struct _starpu_worker *worker; + + worker = _starpu_get_local_worker_key(); + if (worker) + { + return worker->worker_size; + } + else + { + /* there is no worker associated to that thread, perhaps it is + * a thread from the application or this is some SPU worker */ + return -1; + } +} + +int starpu_combined_worker_get_rank(void) +{ + struct _starpu_worker *worker; + + worker = _starpu_get_local_worker_key(); + if (worker) + { + return worker->current_rank; + } + else + { + /* there is no worker associated to that thread, perhaps it is + * a thread from the application or this is some SPU worker */ + return -1; + } +} + +int starpu_worker_get_subworkerid(int id) +{ + return _starpu_config.workers[id].subworkerid; +} + +int starpu_worker_get_devid(int id) +{ + return _starpu_config.workers[id].devid; +} + +int starpu_worker_get_devnum(int id) +{ + return _starpu_config.workers[id].devnum; +} + +unsigned starpu_worker_is_combined_worker(int id) +{ + return id >= (int)_starpu_config.topology.nworkers; +} + +struct _starpu_combined_worker *_starpu_get_combined_worker_struct(unsigned id) +{ + unsigned basic_worker_count = starpu_worker_get_count(); + + //_STARPU_DEBUG("basic_worker_count:%d\n",basic_worker_count); + + STARPU_ASSERT(id >= basic_worker_count); + return &_starpu_config.combined_workers[id - basic_worker_count]; +} + +enum starpu_worker_archtype starpu_worker_get_type(int id) +{ + enum starpu_worker_archtype type = _starpu_config.workers[id].arch; + STARPU_ASSERT(type < STARPU_NARCH); + return type; +} + +unsigned starpu_worker_get_ids_by_type(enum starpu_worker_archtype type, int *workerids, unsigned maxsize) +{ + unsigned nworkers = starpu_worker_get_count(); + + unsigned cnt = 0; + + unsigned id; + for (id = 0; id < nworkers; id++) + { + if (type == STARPU_ANY_WORKER || starpu_worker_get_type(id) == type) + { + /* Perhaps the array is too small ? */ + if (cnt >= maxsize) + return -ERANGE; + + workerids[cnt++] = id; + } + } + + return cnt; +} + +int starpu_worker_get_by_type(enum starpu_worker_archtype type, int num) +{ + unsigned nworkers = starpu_worker_get_count(); + + int cnt = 0; + + unsigned id; + for (id = 0; id < nworkers; id++) + { + if (type == STARPU_ANY_WORKER || starpu_worker_get_type(id) == type) + { + if (num == cnt) + return id; + cnt++; + } + } + + /* Not found */ + return -1; +} + +int starpu_worker_get_by_devid(enum starpu_worker_archtype type, int devid) +{ + unsigned nworkers = starpu_worker_get_count(); + + unsigned id; + for (id = 0; id < nworkers; id++) + if (starpu_worker_get_type(id) == type && starpu_worker_get_devid(id) == devid) + return id; + + /* Not found */ + return -1; +} + +int starpu_worker_get_devids(enum starpu_worker_archtype type, int *devids, int num) +{ + unsigned nworkers = starpu_worker_get_count(); + int workerids[nworkers]; + + unsigned ndevice_workers = starpu_worker_get_ids_by_type(type, workerids, nworkers); + + unsigned ndevids = 0; + + if(ndevice_workers > 0) + { + unsigned id, devid; + int cnt = 0; + unsigned found = 0; + for(id = 0; id < ndevice_workers; id++) + { + int curr_devid; + curr_devid = _starpu_config.workers[workerids[id]].devid; + for(devid = 0; devid < ndevids; devid++) + { + if(curr_devid == devids[devid]) + { + found = 1; + break; + } + } + if(!found) + { + devids[ndevids++] = curr_devid; + cnt++; + } + else + found = 0; + + if(cnt == num) + break; + } + } + + return ndevids; +} + +unsigned starpu_worker_type_can_execute_task(enum starpu_worker_archtype worker_type, const struct starpu_task *task) +{ + return (STARPU_WORKER_TO_MASK(worker_type) & task->where) != 0; +} + +void starpu_worker_get_name(int id, char *dst, size_t maxlen) +{ + char *name = _starpu_config.workers[id].name; + + snprintf(dst, maxlen, "%s", name); +} + +int starpu_worker_get_bindid(int workerid) +{ + return _starpu_config.workers[workerid].bindid; +} + +int starpu_bindid_get_workerids(int bindid, int **workerids) +{ + if (bindid >= (int) _starpu_config.nbindid) + return 0; + *workerids = _starpu_config.bindid_workers[bindid].workerids; + return _starpu_config.bindid_workers[bindid].nworkers; +} + +int starpu_worker_get_stream_workerids(unsigned devid, int *workerids, enum starpu_worker_archtype type) +{ + unsigned nworkers = starpu_worker_get_count(); + int nw = 0; + unsigned id; + for (id = 0; id < nworkers; id++) + { + if (_starpu_config.workers[id].devid == devid && + (type == STARPU_ANY_WORKER || _starpu_config.workers[id].arch == type)) + workerids[nw++] = id; + } + return nw; +} + +void starpu_worker_get_sched_condition(int workerid, starpu_pthread_mutex_t **sched_mutex, starpu_pthread_cond_t **sched_cond) +{ + STARPU_ASSERT(workerid >= 0 && workerid < STARPU_NMAXWORKERS); + *sched_cond = &_starpu_config.workers[workerid].sched_cond; + *sched_mutex = &_starpu_config.workers[workerid].sched_mutex; +} + +/* returns 1 if the call results in initiating a transition of worker WORKERID + * from sleeping state to awake + * returns 0 if worker WORKERID is not sleeping or the wake-up transition + * already has been initiated + */ +static int starpu_wakeup_worker_locked(int workerid, starpu_pthread_cond_t *sched_cond, starpu_pthread_mutex_t *mutex STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_SIMGRID + starpu_pthread_queue_broadcast(&_starpu_simgrid_task_queue[workerid]); +#endif + if (_starpu_config.workers[workerid].status & STATUS_SLEEPING) + { + int ret = 0; + if (_starpu_config.workers[workerid].state_keep_awake != 1) + { + _starpu_config.workers[workerid].state_keep_awake = 1; + ret = 1; + } + /* cond_broadcast is required over cond_signal since + * the condition is share for multiple purpose */ + STARPU_PTHREAD_COND_BROADCAST(sched_cond); + return ret; + } + else if (_starpu_config.workers[workerid].status & STATUS_SCHEDULING) + { + _starpu_config.workers[workerid].state_keep_awake = 1; + return 0; + } + return 0; +} + +static int starpu_wakeup_worker_no_relax(int workerid, starpu_pthread_cond_t *sched_cond, starpu_pthread_mutex_t *sched_mutex) +{ + int success; + STARPU_PTHREAD_MUTEX_LOCK_SCHED(sched_mutex); + success = starpu_wakeup_worker_locked(workerid, sched_cond, sched_mutex); + STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(sched_mutex); + return success; +} + +int starpu_wake_worker_locked(int workerid) +{ + starpu_pthread_mutex_t *sched_mutex; + starpu_pthread_cond_t *sched_cond; + starpu_worker_get_sched_condition(workerid, &sched_mutex, &sched_cond); + return starpu_wakeup_worker_locked(workerid, sched_cond, sched_mutex); +} + +int starpu_wake_worker_no_relax(int workerid) +{ + starpu_pthread_mutex_t *sched_mutex; + starpu_pthread_cond_t *sched_cond; + starpu_worker_get_sched_condition(workerid, &sched_mutex, &sched_cond); + return starpu_wakeup_worker_no_relax(workerid, sched_cond, sched_mutex); +} + +int _starpu_worker_get_nids_by_type(enum starpu_worker_archtype type, int *workerids, int maxsize) +{ + unsigned nworkers = starpu_worker_get_count(); + + int cnt = 0; + + unsigned id; + for (id = 0; id < nworkers; id++) + { + if (type == STARPU_ANY_WORKER || starpu_worker_get_type(id) == type) + { + /* Perhaps the array is too small ? */ + if (cnt >= maxsize) + return cnt; + + workerids[cnt++] = id; + } + } + + return cnt; +} + +int _starpu_worker_get_nids_ctx_free_by_type(enum starpu_worker_archtype type, int *workerids, int maxsize) +{ + unsigned nworkers = starpu_worker_get_count(); + int cnt = 0; + unsigned id; + + for (id = 0; id < nworkers; id++) + { + if (type == STARPU_ANY_WORKER || starpu_worker_get_type(id) == type) + { + /* Perhaps the array is too small ? */ + if (cnt >= maxsize) + return cnt; + unsigned found = 0; + int s; + for(s = 1; s < STARPU_NMAX_SCHED_CTXS; s++) + { + if(_starpu_config.sched_ctxs[s].id != STARPU_NMAX_SCHED_CTXS) + { + struct starpu_worker_collection *workers = _starpu_config.sched_ctxs[s].workers; + struct starpu_sched_ctx_iterator it; + + workers->init_iterator(workers, &it); + while(workers->has_next(workers, &it)) + { + unsigned worker = workers->get_next(workers, &it); + if(worker == id) + { + found = 1; + break; + } + } + + if(found) + break; + } + } + if(!found) + workerids[cnt++] = id; + } + } + + return cnt; +} + +void starpu_get_version(int *major, int *minor, int *release) +{ + *major = STARPU_MAJOR_VERSION; + *minor = STARPU_MINOR_VERSION; + *release = STARPU_RELEASE_VERSION; +} + +unsigned starpu_worker_get_sched_ctx_list(int workerid, unsigned **sched_ctxs) +{ + unsigned s = 0; + unsigned nsched_ctxs = _starpu_worker_get_nsched_ctxs(workerid); + _STARPU_MALLOC(*sched_ctxs, nsched_ctxs*sizeof(unsigned)); + struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); + struct _starpu_sched_ctx_elt *e = NULL; + struct _starpu_sched_ctx_list_iterator list_it; + + _starpu_sched_ctx_list_iterator_init(worker->sched_ctx_list, &list_it); + while (_starpu_sched_ctx_list_iterator_has_next(&list_it)) + { + e = _starpu_sched_ctx_list_iterator_get_next(&list_it); + (*sched_ctxs)[s++] = e->sched_ctx; + } + return nsched_ctxs; +} + +const char *starpu_worker_get_type_as_string(enum starpu_worker_archtype type) +{ + STARPU_ASSERT(type < STARPU_NARCH); + const char *ret = starpu_driver_info[type].name_upper; + if (!ret) + ret = "unknown"; + return ret; +} + +enum starpu_worker_archtype starpu_worker_get_type_from_string(const char *name) +{ + enum starpu_worker_archtype type; + for (type = 0; type < STARPU_NARCH; type++) + { + if (!strcmp(name, starpu_driver_info[type].name_upper)) + return type; + } + return STARPU_UNKNOWN_WORKER; +} + +const char *starpu_worker_get_type_as_env_var(enum starpu_worker_archtype type) +{ + STARPU_ASSERT(type < STARPU_NARCH); + const char *ret = starpu_driver_info[type].name_var; + if (!ret) + ret = "UNKNOWN"; + return ret; +} + +void _starpu_worker_set_stream_ctx(unsigned workerid, struct _starpu_sched_ctx *sched_ctx) +{ + STARPU_ASSERT(workerid < starpu_worker_get_count()); + struct _starpu_worker *w = _starpu_get_worker_struct(workerid); + w->stream_ctx = sched_ctx; +} + +struct _starpu_sched_ctx* _starpu_worker_get_ctx_stream(unsigned stream_workerid) +{ + if (stream_workerid >= starpu_worker_get_count()) + return NULL; + struct _starpu_worker *w = _starpu_get_worker_struct(stream_workerid); + return w->stream_ctx; +} + +unsigned starpu_worker_get_sched_ctx_id_stream(unsigned stream_workerid) +{ + if (stream_workerid >= starpu_worker_get_count()) + return STARPU_NMAX_SCHED_CTXS; + struct _starpu_worker *w = _starpu_get_worker_struct(stream_workerid); + return w->stream_ctx != NULL ? w->stream_ctx->id : STARPU_NMAX_SCHED_CTXS; +} + +void starpu_worker_display_count(FILE *output, enum starpu_worker_archtype type) +{ + int nworkers = starpu_worker_get_count_by_type(type); + if (nworkers <= 0) + { + fprintf(output, "No %s worker\n", starpu_worker_get_type_as_string(type)); + } + else + { + int ids[nworkers]; + starpu_worker_get_ids_by_type(type, ids, nworkers); + fprintf(output, "%d %s worker%s\n", nworkers, starpu_worker_get_type_as_string(type), nworkers==1?"":"s"); + } +} + +void starpu_worker_display_names(FILE *output, enum starpu_worker_archtype type) +{ + int nworkers; + if (!starpu_driver_info[type].name_upper) + return; + + nworkers = starpu_worker_get_count_by_type(type); + if (nworkers <= 0) + { + fprintf(output, "No %s worker\n", starpu_worker_get_type_as_string(type)); + } + else + { + int i, ids[nworkers]; + starpu_worker_get_ids_by_type(type, ids, nworkers); + fprintf(output, "%d %s worker%s:\n", nworkers, starpu_worker_get_type_as_string(type), nworkers==1?"":"s"); + for(i = 0; i < nworkers; i++) + { + char name[256]; + starpu_worker_get_name(ids[i], name, 256); + fprintf(output, "\t%s\n", name); + } + } +} + +void starpu_worker_display_all(FILE *output) +{ + enum starpu_worker_archtype type; + for (type = 0; type < STARPU_NARCH; type++) + starpu_worker_display_names(output, type); +} + +void _starpu_worker_refuse_task(struct _starpu_worker *worker, struct starpu_task *task) +{ + if (worker->pipeline_length || worker->arch == STARPU_OPENCL_WORKER) + { + int j; + for (j = 0; j < worker->ntasks; j++) + { + const int j_mod = (j+worker->first_task)%STARPU_MAX_PIPELINE; + if (task == worker->current_tasks[j_mod]) + { + worker->current_tasks[j_mod] = NULL; + if (j == 0) + { + worker->first_task = (worker->first_task + 1) % STARPU_MAX_PIPELINE; + worker->current_task = NULL; + _starpu_set_current_task(NULL); + } + break; + } + } + STARPU_ASSERT(jntasks); + } + else + { + worker->current_task = NULL; + _starpu_set_current_task(NULL); + } + worker->ntasks--; + task->prefetched = 0; + int res = _starpu_push_task_to_workers(task); + STARPU_ASSERT_MSG(res == 0, "_starpu_push_task_to_workers() unexpectedly returned = %d\n", res); +} + +int starpu_worker_sched_op_pending(void) +{ + return _starpu_worker_sched_op_pending(); +} + +#undef starpu_worker_relax_on +void starpu_worker_relax_on(void) +{ + _starpu_worker_relax_on(); +} + +#undef starpu_worker_relax_off +void starpu_worker_relax_off(void) +{ + _starpu_worker_relax_off(); +} + +#undef starpu_worker_get_relax_state +int starpu_worker_get_relax_state(void) +{ + return _starpu_worker_get_relax_state(); +} + +#undef starpu_worker_lock +void starpu_worker_lock(int workerid) +{ + _starpu_worker_lock(workerid); +} + +#undef starpu_worker_trylock +int starpu_worker_trylock(int workerid) +{ + return _starpu_worker_trylock(workerid); +} + +#undef starpu_worker_unlock +void starpu_worker_unlock(int workerid) +{ + _starpu_worker_unlock(workerid); +} + +#undef starpu_worker_lock_self +void starpu_worker_lock_self(void) +{ + _starpu_worker_lock_self(); +} + +#undef starpu_worker_unlock_self +void starpu_worker_unlock_self(void) +{ + _starpu_worker_unlock_self(); +} + +#undef starpu_wake_worker_relax +int starpu_wake_worker_relax(int workerid) +{ + return _starpu_wake_worker_relax(workerid); +} + +#ifdef STARPU_HAVE_HWLOC +hwloc_cpuset_t starpu_worker_get_hwloc_cpuset(int workerid) +{ + struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); + return hwloc_bitmap_dup(worker->hwloc_cpu_set); +} +hwloc_obj_t starpu_worker_get_hwloc_obj(int workerid) +{ + struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); + return worker->hwloc_obj; +} +#endif + +/* Light version of _starpu_wake_worker_relax, which, when possible, + * speculatively sets keep_awake on the target worker without waiting that + * worker to enter the relaxed state. + */ +int starpu_wake_worker_relax_light(int workerid) +{ + struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); + STARPU_ASSERT(worker != NULL); + int cur_workerid = starpu_worker_get_id(); + if (workerid != cur_workerid) + { + starpu_worker_relax_on(); + + STARPU_PTHREAD_MUTEX_LOCK_SCHED(&worker->sched_mutex); + while (!worker->state_relax_refcnt) + { + /* Attempt a fast path if the worker is not really asleep */ + if (_starpu_config.workers[workerid].status & STATUS_SCHEDULING) + { + _starpu_config.workers[workerid].state_keep_awake = 1; + STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&worker->sched_mutex); + starpu_worker_relax_off(); + return 1; + } + + STARPU_PTHREAD_COND_WAIT(&worker->sched_cond, &worker->sched_mutex); + } + } + else + { + STARPU_PTHREAD_MUTEX_LOCK_SCHED(&worker->sched_mutex); + } + int ret = starpu_wake_worker_locked(workerid); + STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&worker->sched_mutex); + if (workerid != cur_workerid) + { + starpu_worker_relax_off(); + } + return ret; +} + +#ifdef STARPU_WORKER_CALLBACKS +void starpu_worker_set_going_to_sleep_callback(void (*callback)(unsigned workerid)) +{ + STARPU_ASSERT(_starpu_config.conf.callback_worker_going_to_sleep); + _starpu_config.conf.callback_worker_going_to_sleep = callback; +} + +void starpu_worker_set_waking_up_callback(void (*callback)(unsigned workerid)) +{ + STARPU_ASSERT(_starpu_config.conf.callback_worker_waking_up); + _starpu_config.conf.callback_worker_waking_up = callback; +} +#endif + +enum starpu_node_kind starpu_worker_get_memory_node_kind(enum starpu_worker_archtype type) +{ + STARPU_ASSERT(type < STARPU_NARCH); + enum starpu_node_kind kind = starpu_driver_info[type].memory_kind; + STARPU_ASSERT_MSG(kind != (enum starpu_node_kind) -1, "no memory for archtype %d", type); + return kind; +} diff --git a/src/core/workers.h b/src/core/workers.h new file mode 100644 index 0000000..23b30bb --- /dev/null +++ b/src/core/workers.h @@ -0,0 +1,1339 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2016-2016 Uppsala University + * Copyright (C) 2013-2013 Thibaut Lambert + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __WORKERS_H__ +#define __WORKERS_H__ + +/** \addtogroup workers */ +/* @{ */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef STARPU_HAVE_HWLOC +#include +#endif +#include + +#include +#include +#include +#include + +#ifdef STARPU_USE_MPI_MASTER_SLAVE +#include +#endif + +#ifdef STARPU_USE_TCPIP_MASTER_SLAVE +#include +#endif + +#include + +#include +#include + +#pragma GCC visibility push(hidden) + +#define STARPU_MAX_PIPELINE 4 + +struct mc_cache_entry; +struct _starpu_node +{ + /* + * used by memalloc.c + */ + /** This per-node RW-locks protect mc_list and memchunk_cache entries */ + /* Note: handle header lock is always taken before this (normal add/remove case) */ + struct _starpu_spinlock mc_lock; + + /** Potentially in use memory chunks. The beginning of the list is clean (home + * node has a copy of the data, or the data is being transferred there), the + * remainder of the list may not be clean. */ + struct _starpu_mem_chunk_list mc_list; + /** This is a shortcut inside the mc_list to the first potentially dirty MC. All + * MC before this are clean, MC before this only *may* be clean. */ + struct _starpu_mem_chunk *mc_dirty_head; + /* TODO: introduce head of data to be evicted */ + /** Number of elements in mc_list, number of elements in the clean part of + * mc_list plus the non-automatically allocated elements (which are thus always + * considered as clean) */ + unsigned mc_nb, mc_clean_nb; + + struct mc_cache_entry *mc_cache; + int mc_cache_nb; + starpu_ssize_t mc_cache_size; + + /** Whether some thread is currently tidying this node */ + unsigned tidying; + /** Whether some thread is currently reclaiming memory for this node */ + unsigned reclaiming; + + /** This records that we tried to prefetch data but went out of memory, so will + * probably fail again to prefetch data, thus not trace each and every + * attempt. */ + volatile int prefetch_out_of_memory; + + /** Whether this memory node can evict data to another node */ + unsigned evictable; + + /* + * used by data_request.c + */ + /** requests that have not been treated at all */ + struct _starpu_data_request_prio_list data_requests[STARPU_MAXNODES][2]; + struct _starpu_data_request_prio_list prefetch_requests[STARPU_MAXNODES][2]; /* Contains both task_prefetch and prefetch */ + struct _starpu_data_request_prio_list idle_requests[STARPU_MAXNODES][2]; + starpu_pthread_mutex_t data_requests_list_mutex[STARPU_MAXNODES][2]; + + /** requests that are not terminated (eg. async transfers) */ + struct _starpu_data_request_prio_list data_requests_pending[STARPU_MAXNODES][2]; + unsigned data_requests_npending[STARPU_MAXNODES][2]; + starpu_pthread_mutex_t data_requests_pending_list_mutex[STARPU_MAXNODES][2]; + + /* + * used by malloc.c + */ + int malloc_on_node_default_flags; + /** One list of chunks per node */ + struct _starpu_chunk_list chunks; + /** Number of completely free chunks */ + int nfreechunks; + /** This protects chunks and nfreechunks */ + starpu_pthread_mutex_t chunk_mutex; + + /* + * used by memory_manager.c + */ + size_t global_size; + size_t used_size; + + /* This is used as an optimization to avoid to wake up allocating threads for + * each and every deallocation, only to find that there is still not enough + * room. */ + /* Minimum amount being waited for */ + size_t waiting_size; + + starpu_pthread_mutex_t lock_nodes; + starpu_pthread_cond_t cond_nodes; + + /** Keep this last, to make sure to separate node data in separate + cache lines. */ + char padding[STARPU_CACHELINE_SIZE]; +}; + +struct _starpu_ctx_change_list; +/** This is initialized by _starpu_worker_init() */ +LIST_TYPE(_starpu_worker, + struct _starpu_machine_config *config; + starpu_pthread_mutex_t mutex; + enum starpu_worker_archtype arch; /**< what is the type of worker ? */ + uint32_t worker_mask; /**< what is the type of worker ? */ + struct starpu_perfmodel_arch perf_arch; /**< in case there are different models of the same arch */ + starpu_pthread_t worker_thread; /**< the thread which runs the worker */ + unsigned devid; /**< which cpu/gpu/etc is controlled by the worker ? */ + unsigned devnum; /**< number of the device controlled by the worker, i.e. ranked from 0 and contiguous */ + unsigned subworkerid; /**< which sub-worker this one is for the cpu/gpu */ + int bindid; /**< which cpu is the driver bound to ? (logical index) */ + int workerid; /**< uniquely identify the worker among all processing units types */ + int combined_workerid; /**< combined worker currently using this worker */ + int current_rank; /**< current rank in case the worker is used in a parallel fashion */ + int worker_size; /**< size of the worker in case we use a combined worker */ + starpu_pthread_cond_t started_cond; /**< indicate when the worker is ready */ + starpu_pthread_cond_t ready_cond; /**< indicate when the worker is ready */ + unsigned memory_node; /**< which memory node is the worker associated with ? */ + unsigned numa_memory_node; /**< which numa memory node is the worker associated with? (logical index) */ + /** + * condition variable used for passive waiting operations on worker + * STARPU_PTHREAD_COND_BROADCAST must be used instead of STARPU_PTHREAD_COND_SIGNAL, + * since the condition is shared for multiple purpose */ + starpu_pthread_cond_t sched_cond; + starpu_pthread_mutex_t sched_mutex; /**< mutex protecting sched_cond */ + unsigned state_relax_refcnt; /**< mark scheduling sections where other workers can safely access the worker state */ +#ifdef STARPU_SPINLOCK_CHECK + const char *relax_on_file; + int relax_on_line; + const char *relax_on_func; + const char *relax_off_file; + int relax_off_line; + const char *relax_off_func; +#endif + unsigned state_sched_op_pending; /**< a task pop is ongoing even though sched_mutex may temporarily be unlocked */ + unsigned state_changing_ctx_waiting; /**< a thread is waiting for operations such as pop to complete before acquiring sched_mutex and modifying the worker ctx*/ + unsigned state_changing_ctx_notice; /**< the worker ctx is about to change or being changed, wait for flag to be cleared before starting new scheduling operations */ + unsigned state_blocked_in_parallel; /**< worker is currently blocked on a parallel section */ + unsigned state_blocked_in_parallel_observed; /**< the blocked state of the worker has been observed by another worker during a relaxed section */ + unsigned state_block_in_parallel_req; /**< a request for state transition from unblocked to blocked is pending */ + unsigned state_block_in_parallel_ack; /**< a block request has been honored */ + unsigned state_unblock_in_parallel_req; /**< a request for state transition from blocked to unblocked is pending */ + unsigned state_unblock_in_parallel_ack; /**< an unblock request has been honored */ + /** + * cumulative blocking depth + * - =0 worker unblocked + * - >0 worker blocked + * - transition from 0 to 1 triggers a block_req + * - transition from 1 to 0 triggers a unblock_req + */ + unsigned block_in_parallel_ref_count; + starpu_pthread_t thread_changing_ctx; /**< thread currently changing a sched_ctx containing the worker */ + /** + list of deferred context changes + * + * when the current thread is a worker, _and_ this worker is in a + * scheduling operation, new ctx changes are queued to this list for + * subsequent processing once worker completes the ongoing scheduling + * operation */ + struct _starpu_ctx_change_list ctx_change_list; + struct starpu_task_prio_list local_tasks; /**< this queue contains tasks that have been explicitly submitted to that queue */ + struct starpu_task **local_ordered_tasks; /**< this queue contains tasks that have been explicitly submitted to that queue with an explicit order */ + unsigned local_ordered_tasks_size; /**< this records the size of local_ordered_tasks */ + unsigned current_ordered_task; /**< this records the index (within local_ordered_tasks) of the next ordered task to be executed */ + unsigned current_ordered_task_order; /**< this records the order of the next ordered task to be executed */ + struct starpu_task *current_task; /**< task currently executed by this worker (non-pipelined version) */ + struct starpu_task *current_tasks[STARPU_MAX_PIPELINE]; /**< tasks currently executed by this worker (pipelined version) */ +#ifdef STARPU_SIMGRID + starpu_pthread_wait_t wait; +#endif + + struct timespec cl_start; /**< Codelet start time of the task currently running */ + struct timespec cl_expend; /**< Codelet expected end time of the task currently running */ + struct timespec cl_end; /**< Codelet end time of the last task running */ + unsigned char first_task; /**< Index of first task in the pipeline */ + unsigned char ntasks; /**< number of tasks in the pipeline */ + unsigned char pipeline_length; /**< number of tasks to be put in the pipeline */ + unsigned char pipeline_stuck; /**< whether a task prevents us from pipelining */ + struct _starpu_worker_set *set; /**< in case this worker belongs to a worker set */ + struct _starpu_worker_set *driver_worker_set; /**< in case this worker belongs to a driver worker set */ + unsigned worker_is_running; + unsigned worker_is_initialized; + unsigned wait_for_worker_initialization; + enum _starpu_worker_status status; /**< what is the worker doing now ? (eg. CALLBACK) */ + unsigned state_keep_awake; /**< !0 if a task has been pushed to the worker and the task has not yet been seen by the worker, the worker should no go to sleep before processing this task*/ + char name[128]; + char short_name[32]; + unsigned run_by_starpu; /**< Is this run by StarPU or directly by the application ? */ + const struct _starpu_driver_ops *driver_ops; + + struct _starpu_sched_ctx_list *sched_ctx_list; + int tmp_sched_ctx; + unsigned nsched_ctxs; /**< the no of contexts a worker belongs to*/ + struct _starpu_barrier_counter tasks_barrier; /**< wait for the tasks submitted */ + + unsigned has_prev_init; /**< had already been inited in another ctx */ + + unsigned removed_from_ctx[STARPU_NMAX_SCHED_CTXS+1]; + + unsigned spinning_backoff ; /**< number of cycles to pause when spinning */ + + unsigned nb_buffers_transferred; /**< number of piece of data already send to worker */ + unsigned nb_buffers_totransfer; /**< number of piece of data already send to worker */ + struct starpu_task *task_transferring; /**< The buffers of this task are being sent */ + + /** + * indicate whether the workers shares tasks lists with other workers + * in this case when removing him from a context it disappears instantly + */ + unsigned shares_tasks_lists[STARPU_NMAX_SCHED_CTXS+1]; + + unsigned poped_in_ctx[STARPU_NMAX_SCHED_CTXS+1]; /**< boolean to chose the next ctx a worker will pop into */ + + /** + * boolean indicating at which moment we checked all ctxs and change phase for the booleab poped_in_ctx + * one for each of the 2 priorities + */ + unsigned reverse_phase[2]; + + unsigned pop_ctx_priority; /**< indicate which priority of ctx is currently active: the values are 0 or 1*/ + unsigned is_slave_somewhere; /**< bool to indicate if the worker is slave in a ctx */ + + struct _starpu_sched_ctx *stream_ctx; + +#ifdef __GLIBC__ + cpu_set_t cpu_set; +#endif /* __GLIBC__ */ +#ifdef STARPU_HAVE_HWLOC + hwloc_bitmap_t hwloc_cpu_set; + hwloc_obj_t hwloc_obj; +#endif + + struct starpu_profiling_worker_info profiling_info; + /* TODO: rather use rwlock? */ + starpu_pthread_mutex_t profiling_info_mutex; + + /* In case the worker is still sleeping when the user request profiling info, + * we need to account for the time elapsed while sleeping. */ + unsigned profiling_registered_start[STATUS_INDEX_NR]; + struct timespec profiling_registered_start_date[STATUS_INDEX_NR]; + enum _starpu_worker_status profiling_status; + struct timespec profiling_status_start_date; + + struct starpu_perf_counter_sample perf_counter_sample; + int64_t __w_total_executed__value; + double __w_cumul_execution_time__value; + + int enable_knob; + int bindid_requested; + + /** Keep this last, to make sure to separate worker data in separate + cache lines. */ + char padding[STARPU_CACHELINE_SIZE]; +); + +struct _starpu_combined_worker +{ + struct starpu_perfmodel_arch perf_arch; /**< in case there are different models of the same arch */ + uint32_t worker_mask; /**< what is the type of workers ? */ + int worker_size; + unsigned memory_node; /**< which memory node is associated that worker to ? */ + int combined_workerid[STARPU_NMAXWORKERS]; +#ifdef STARPU_USE_MP + int count; + starpu_pthread_mutex_t count_mutex; +#endif + +#ifdef __GLIBC__ + cpu_set_t cpu_set; +#endif /* __GLIBC__ */ +#ifdef STARPU_HAVE_HWLOC + hwloc_bitmap_t hwloc_cpu_set; +#endif + + /** Keep this last, to make sure to separate worker data in separate + cache lines. */ + char padding[STARPU_CACHELINE_SIZE]; +}; + +/** + * in case a single CPU worker may control multiple + * accelerators +*/ +struct _starpu_worker_set +{ + starpu_pthread_mutex_t mutex; + starpu_pthread_t worker_thread; /**< the thread which runs the worker */ + unsigned nworkers; + unsigned started; /**< Only one thread for the whole set */ + void *retval; + struct _starpu_worker *workers; + starpu_pthread_cond_t ready_cond; /**< indicate when the set is ready */ + unsigned set_is_initialized; + unsigned wait_for_set_initialization; +}; + +struct _starpu_machine_topology +{ + /** Total number of workers. */ + unsigned nworkers; + + /** Total number of combined workers. */ + unsigned ncombinedworkers; + + unsigned nsched_ctxs; + +#ifdef STARPU_HAVE_HWLOC + /** Topology as detected by hwloc. */ + hwloc_topology_t hwtopology; + hwloc_bitmap_t log_cpuset; + hwloc_bitmap_t log_coreset; +#endif + /** custom hwloc tree*/ + struct starpu_tree *tree; + + /** Total number of PUs (i.e. threads), as detected by the topology code. May + * be different from the actual number of CPU workers. + */ + unsigned nhwpus; + + /** First PU to be used. May be different from 0 for administrative reasons + * (e.g. from job scheduler). + */ + unsigned firstusedpu; + + /** Number of PUs (i.e. threads) to be used. May be different from nhwpus for + * administrative reasons (e.g. from job scheduler). + */ + unsigned nusedpus; + + /** Total number of devices, as detected. May be different from the + * actual number of devices run by StarPU. + */ + unsigned nhwdevices[STARPU_NARCH]; + /** Total number of worker for each device, as detected. May be different from the + * actual number of workers run by StarPU. + */ + unsigned nhwworker[STARPU_NARCH][STARPU_NMAXDEVS]; + + /** Actual number of devices used by StarPU. + */ + unsigned ndevices[STARPU_NARCH]; + + /** Number of worker per device + */ + unsigned nworker[STARPU_NARCH][STARPU_NMAXDEVS]; + + /** Device ids actually used + */ + int devid[STARPU_NARCH][STARPU_NMAXDEVS]; + + /** Whether we should have one thread per stream */ + int cuda_th_per_stream; + /** Whether we should have one thread per device */ + int cuda_th_per_dev; + + /** Whether we should have one thread per stream (for hip) */ + int hip_th_per_stream; + /** Whether we should have one thread per device (for hip) */ + int hip_th_per_dev; + + /** Indicates the successive logical PU identifier that should be used + * to bind the workers. It is either filled according to the + * user's explicit parameters (from starpu_conf) or according + * to the STARPU_WORKERS_CPUID env. variable. Otherwise, a + * round-robin policy is used to distributed the workers over + * the cores. + */ + unsigned workers_bindid[STARPU_NMAXWORKERS]; + + /** Indicates how many different values there are in + * _starpu_machine_topology::workers_bindid, i.e. the length of the + * cycle of the values there. + */ + unsigned workers_nbindid; + + /** Indicates the successive device identifiers that should be + * used by the driver. It is either filled according to + * the user's explicit parameters (from starpu_conf) or + * according to the corresponding env. variable. + * Otherwise, they are taken in ID order. + */ + unsigned workers_devid[STARPU_NARCH][STARPU_NMAXWORKERS]; +}; + +struct _starpu_machine_config +{ + struct _starpu_machine_topology topology; + +#ifdef STARPU_HAVE_HWLOC + int cpu_depth; + int pu_depth; +#endif + + /** Where to bind next worker ? */ + int current_bindid; + char currently_bound[STARPU_NMAXWORKERS]; + char currently_shared[STARPU_NMAXWORKERS]; + + /** Which next device will we use for each arch? */ + int current_devid[STARPU_NARCH]; + + /** Which TCPIP do we use? */ + int current_tcpip_deviceid; + + /** Memory node for different worker types, if only one */ + int arch_nodeid [STARPU_NARCH]; + + /** Separate out previous variables from per-worker data. */ + char padding1[STARPU_CACHELINE_SIZE]; + + /** Basic workers : each of this worker is running its own driver and + * can be combined with other basic workers. */ + struct _starpu_worker workers[STARPU_NMAXWORKERS]; + + /** Memory nodes */ + struct _starpu_node nodes[STARPU_MAXNODES]; + + /** Combined workers: these worker are a combination of basic workers + * that can run parallel tasks together. */ + struct _starpu_combined_worker combined_workers[STARPU_NMAX_COMBINEDWORKERS]; + + starpu_pthread_mutex_t submitted_mutex; + + /** Separate out previous mutex from the rest of the data. */ + char padding2[STARPU_CACHELINE_SIZE]; + + /** Translation table from bindid to worker IDs */ + struct + { + int *workerids; + unsigned nworkers; /**< size of workerids */ + } *bindid_workers; + unsigned nbindid; /**< size of bindid_workers */ + + /** This bitmask indicates which kinds of worker are available. For + * instance it is possible to test if there is a CUDA worker with + * the result of (worker_mask & STARPU_CUDA). */ + uint32_t worker_mask; + + /** either the user given configuration passed to starpu_init or a default configuration */ + struct starpu_conf conf; + + /** this flag is set until the runtime is stopped */ + unsigned running; + + int disable_kernels; + + /** Number of calls to starpu_pause() - calls to starpu_resume(). When >0, + * StarPU should pause. */ + int pause_depth; + + /** all the sched ctx of the current instance of starpu */ + struct _starpu_sched_ctx sched_ctxs[STARPU_NMAX_SCHED_CTXS+1]; + + /** this flag is set until the application is finished submitting tasks */ + unsigned submitting; + + int watchdog_ok; + + /** When >0, StarPU should stop performance counters collection. */ + int perf_counter_pause_depth; +}; + +struct _starpu_machine_topology; + +/** Provides information for a device driver */ +struct _starpu_driver_info +{ + const char *name_upper; /**< Name of worker type in upper case */ + const char *name_var; /**< Name of worker type for environment variables */ + const char *name_lower; /**< Name of worker type in lower case */ + enum starpu_node_kind memory_kind; /**< Kind of memory in device */ + double alpha; /**< Typical relative speed compared to a CPU core */ + unsigned wait_for_worker_initialization; /**< Whether we should make the core wait for worker initialization before starting other workers initialization */ + const struct _starpu_driver_ops *driver_ops; /**< optional: Driver operations */ + void *(*run_worker)(void *); /**< Actually run the worker */ + void (*init_worker_binding)(struct _starpu_machine_config *config, int no_mp_config STARPU_ATTRIBUTE_UNUSED, struct _starpu_worker *workerarg); /**< Setup worker CPU binding */ + void (*init_worker_memory)(struct _starpu_machine_config *config, int no_mp_config STARPU_ATTRIBUTE_UNUSED, struct _starpu_worker *workerarg); /**< Setup worker memory node */ +#ifdef STARPU_HAVE_HWLOC + hwloc_obj_t (*get_hwloc_obj)(hwloc_topology_t topology, int devid); + /**< optional: Return the hwloc object corresponding to this device */ +#endif +}; + +/** Device driver information, indexed by enum starpu_worker_archtype */ +extern struct _starpu_driver_info starpu_driver_info[STARPU_NARCH]; + +void _starpu_driver_info_register(enum starpu_worker_archtype archtype, const struct _starpu_driver_info *info); + +/** Provides information for a memory node driver */ +struct _starpu_memory_driver_info +{ + const char *name_upper; /**< Name of memory in upper case */ + enum starpu_worker_archtype worker_archtype; /**< Kind of device */ + const struct _starpu_node_ops *ops; /**< Memory node operations */ +}; + +/** Memory driver information, indexed by enum starpu_node_kind */ +extern struct _starpu_memory_driver_info starpu_memory_driver_info[STARPU_MAX_RAM+1]; + +void _starpu_memory_driver_info_register(enum starpu_node_kind kind, const struct _starpu_memory_driver_info *info); + +extern int _starpu_worker_parallel_blocks; + +extern struct _starpu_machine_config _starpu_config; +extern int _starpu_keys_initialized; +extern starpu_pthread_key_t _starpu_worker_key; +extern starpu_pthread_key_t _starpu_worker_set_key; + +void _starpu_set_catch_signals(int do_catch_signal); + +/** Three functions to manage argv, argc */ +void _starpu_set_argc_argv(int *argc, char ***argv); +int *_starpu_get_argc(); +char ***_starpu_get_argv(); + +/** Fill conf with environment variables */ +void _starpu_conf_check_environment(struct starpu_conf *conf); + +/** Called by the driver when it is ready to pause */ +void _starpu_may_pause(void); + +/** Has starpu_shutdown already been called ? */ +static inline unsigned _starpu_machine_is_running(void) +{ + unsigned ret; + /* running is just protected by a memory barrier */ + STARPU_RMB(); + + ANNOTATE_HAPPENS_AFTER(&_starpu_config.running); + ret = _starpu_config.running; + ANNOTATE_HAPPENS_BEFORE(&_starpu_config.running); + return ret; +} + + +/** initialise a worker */ +void _starpu_worker_init(struct _starpu_worker *workerarg, struct _starpu_machine_config *pconfig); + +/** Check if there is a worker that may execute the task. */ +uint32_t _starpu_worker_exists(struct starpu_task *) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; + +/** Is there a worker that can execute MS code ? */ +uint32_t _starpu_can_submit_ms_task(void); + +/** Is there a worker that can execute CUDA code ? */ +uint32_t _starpu_can_submit_cuda_task(void); + +/** Is there a worker that can execute HIP code ? */ +uint32_t _starpu_can_submit_hip_task(void); + +/** Is there a worker that can execute CPU code ? */ +uint32_t _starpu_can_submit_cpu_task(void); + +/** Is there a worker that can execute OpenCL code ? */ +uint32_t _starpu_can_submit_opencl_task(void); + +/** Check whether there is anything that the worker should do instead of + * sleeping (waiting on something to happen). */ +unsigned _starpu_worker_can_block(unsigned memnode, struct _starpu_worker *worker); + +/** This function initializes the current driver for the given worker */ +void _starpu_driver_start(struct _starpu_worker *worker, enum starpu_worker_archtype archtype, unsigned sync); +/** This function initializes the current thread for the given worker */ +void _starpu_worker_start(struct _starpu_worker *worker, enum starpu_worker_archtype archtype, unsigned sync); + +static inline unsigned _starpu_worker_get_count(void) +{ + return _starpu_config.topology.nworkers; +} +#define starpu_worker_get_count _starpu_worker_get_count + +/** The _starpu_worker structure describes all the state of a StarPU worker. + * This function sets the pthread key which stores a pointer to this structure. + * */ +static inline void _starpu_set_local_worker_key(struct _starpu_worker *worker) +{ + STARPU_ASSERT(_starpu_keys_initialized); + STARPU_PTHREAD_SETSPECIFIC(_starpu_worker_key, worker); +} + +/** Returns the _starpu_worker structure that describes the state of the + * current worker. */ +static inline struct _starpu_worker *_starpu_get_local_worker_key(void) +{ + if (!_starpu_keys_initialized) + return NULL; + return (struct _starpu_worker *) STARPU_PTHREAD_GETSPECIFIC(_starpu_worker_key); +} + +/** The _starpu_worker_set structure describes all the state of a StarPU worker_set. + * This function sets the pthread key which stores a pointer to this structure. + * */ +static inline void _starpu_set_local_worker_set_key(struct _starpu_worker_set *worker) +{ + STARPU_ASSERT(_starpu_keys_initialized); + STARPU_PTHREAD_SETSPECIFIC(_starpu_worker_set_key, worker); +} + +/** Returns the _starpu_worker_set structure that describes the state of the + * current worker_set. */ +static inline struct _starpu_worker_set *_starpu_get_local_worker_set_key(void) +{ + if (!_starpu_keys_initialized) + return NULL; + return (struct _starpu_worker_set *) STARPU_PTHREAD_GETSPECIFIC(_starpu_worker_set_key); +} + +/** Returns the _starpu_worker structure that describes the state of the + * specified worker. */ +static inline struct _starpu_worker *_starpu_get_worker_struct(unsigned id) +{ + STARPU_ASSERT(id < STARPU_NMAXWORKERS); + return &_starpu_config.workers[id]; +} + +/** Returns the _starpu_node structure that describes the state of the + * specified node. */ +static inline struct _starpu_node *_starpu_get_node_struct(unsigned id) +{ + STARPU_ASSERT(id < STARPU_MAXNODES); + return &_starpu_config.nodes[id]; +} + +/** Returns the starpu_sched_ctx structure that describes the state of the + * specified ctx */ +static inline struct _starpu_sched_ctx *_starpu_get_sched_ctx_struct(unsigned id) +{ + return (id > STARPU_NMAX_SCHED_CTXS) ? NULL : &_starpu_config.sched_ctxs[id]; +} + +struct _starpu_combined_worker *_starpu_get_combined_worker_struct(unsigned id); + +/** Returns the structure that describes the overall machine configuration (eg. + * all workers and topology). */ +static inline struct _starpu_machine_config *_starpu_get_machine_config(void) +{ + return &_starpu_config; +} + +/** Return whether kernels should be run (<=0) or not (>0) */ +static inline int _starpu_get_disable_kernels(void) +{ + return _starpu_config.disable_kernels; +} + +/** Retrieve the status which indicates what the worker is currently doing. */ +static inline enum _starpu_worker_status _starpu_worker_get_status(int workerid) +{ + return _starpu_config.workers[workerid].status; +} + +/** Change the status of the worker which indicates what the worker is currently + * doing (eg. executing a callback). */ +static inline void _starpu_worker_add_status(int workerid, enum _starpu_worker_status_index status) +{ + STARPU_ASSERT(!(_starpu_config.workers[workerid].status & (1 << status))); + if (starpu_profiling_status_get()) + _starpu_worker_start_state(workerid, status, NULL); + _starpu_config.workers[workerid].status |= (1 << status); +} + +/** Change the status of the worker which indicates what the worker is currently + * doing (eg. executing a callback). */ +static inline void _starpu_worker_clear_status(int workerid, enum _starpu_worker_status_index status) +{ + STARPU_ASSERT((_starpu_config.workers[workerid].status & (1 << status))); + if (starpu_profiling_status_get()) + _starpu_worker_stop_state(workerid, status, NULL); + _starpu_config.workers[workerid].status &= ~(1 << status); +} + +/** We keep an initial sched ctx which might be used in case no other ctx is available */ +static inline struct _starpu_sched_ctx* _starpu_get_initial_sched_ctx(void) +{ + return &_starpu_config.sched_ctxs[STARPU_GLOBAL_SCHED_CTX]; +} + +int _starpu_worker_get_nids_by_type(enum starpu_worker_archtype type, int *workerids, int maxsize); + +/** + * returns workers not belonging to any context, be careful no mutex is used, + * the list might not be updated + */ +int _starpu_worker_get_nids_ctx_free_by_type(enum starpu_worker_archtype type, int *workerids, int maxsize); + +static inline unsigned _starpu_worker_mutex_is_sched_mutex(int workerid, starpu_pthread_mutex_t *mutex) +{ + struct _starpu_worker *w = _starpu_get_worker_struct(workerid); + return &w->sched_mutex == mutex; +} + +static inline int _starpu_worker_get_nsched_ctxs(int workerid) +{ + return _starpu_config.workers[workerid].nsched_ctxs; +} + +/** Get the total number of sched_ctxs created till now */ +static inline unsigned _starpu_get_nsched_ctxs(void) +{ + /* topology.nsched_ctxs may be increased asynchronously in sched_ctx_create */ + STARPU_RMB(); + return _starpu_config.topology.nsched_ctxs; +} + +/** Inlined version when building the core. */ +static inline int _starpu_worker_get_id(void) +{ + struct _starpu_worker * worker; + + worker = _starpu_get_local_worker_key(); + if (worker) + { + return worker->workerid; + } + else + { + /* there is no worker associated to that thread, perhaps it is + * a thread from the application or this is some SPU worker */ + return -1; + } +} +#define starpu_worker_get_id _starpu_worker_get_id + +/** Similar behaviour to starpu_worker_get_id() but fails when called from outside a worker */ +/** This returns an unsigned object on purpose, so that the caller is sure to get a positive value */ +static inline unsigned __starpu_worker_get_id_check(const char *f, int l) +{ + (void) l; + (void) f; + int id = starpu_worker_get_id(); + STARPU_ASSERT_MSG(id>=0, "%s:%d Cannot be called from outside a worker\n", f, l); + return id; +} +#define _starpu_worker_get_id_check(f,l) __starpu_worker_get_id_check(f,l) + +void _starpu_worker_set_stream_ctx(unsigned workerid, struct _starpu_sched_ctx *sched_ctx); + +struct _starpu_sched_ctx* _starpu_worker_get_ctx_stream(unsigned stream_workerid); + +/** Send a request to the worker to block, before a parallel task is about to + * begin. + * + * Must be called with worker's sched_mutex held. + */ +static inline void _starpu_worker_request_blocking_in_parallel(struct _starpu_worker * const worker) +{ + _starpu_worker_parallel_blocks = 1; + /* flush pending requests to start on a fresh transaction epoch */ + while (worker->state_unblock_in_parallel_req) + STARPU_PTHREAD_COND_WAIT(&worker->sched_cond, &worker->sched_mutex); + + /* announce blocking intent */ + STARPU_ASSERT(worker->block_in_parallel_ref_count < UINT_MAX); + worker->block_in_parallel_ref_count++; + + if (worker->block_in_parallel_ref_count == 1) + { + /* only the transition from 0 to 1 triggers the block_in_parallel_req */ + + STARPU_ASSERT(!worker->state_blocked_in_parallel); + STARPU_ASSERT(!worker->state_block_in_parallel_req); + STARPU_ASSERT(!worker->state_block_in_parallel_ack); + STARPU_ASSERT(!worker->state_unblock_in_parallel_req); + STARPU_ASSERT(!worker->state_unblock_in_parallel_ack); + + /* trigger the block_in_parallel_req */ + worker->state_block_in_parallel_req = 1; + STARPU_PTHREAD_COND_BROADCAST(&worker->sched_cond); +#ifdef STARPU_SIMGRID + starpu_pthread_queue_broadcast(&_starpu_simgrid_task_queue[worker->workerid]); +#endif + + /* wait for block_in_parallel_req to be processed */ + while (!worker->state_block_in_parallel_ack) + STARPU_PTHREAD_COND_WAIT(&worker->sched_cond, &worker->sched_mutex); + + STARPU_ASSERT(worker->block_in_parallel_ref_count >= 1); + STARPU_ASSERT(worker->state_block_in_parallel_req); + STARPU_ASSERT(worker->state_blocked_in_parallel); + + /* reset block_in_parallel_req state flags */ + worker->state_block_in_parallel_req = 0; + worker->state_block_in_parallel_ack = 0; + + /* broadcast block_in_parallel_req state flags reset */ + STARPU_PTHREAD_COND_BROADCAST(&worker->sched_cond); + } +} + +/** Send a request to the worker to unblock, after a parallel task is complete. + * + * Must be called with worker's sched_mutex held. + */ +static inline void _starpu_worker_request_unblocking_in_parallel(struct _starpu_worker * const worker) +{ + /* flush pending requests to start on a fresh transaction epoch */ + while (worker->state_block_in_parallel_req) + STARPU_PTHREAD_COND_WAIT(&worker->sched_cond, &worker->sched_mutex); + + /* unblocking may be requested unconditionally + * thus, check is unblocking is really needed */ + if (worker->state_blocked_in_parallel) + { + if (worker->block_in_parallel_ref_count == 1) + { + /* only the transition from 1 to 0 triggers the unblock_in_parallel_req */ + + STARPU_ASSERT(!worker->state_block_in_parallel_req); + STARPU_ASSERT(!worker->state_block_in_parallel_ack); + STARPU_ASSERT(!worker->state_unblock_in_parallel_req); + STARPU_ASSERT(!worker->state_unblock_in_parallel_ack); + + /* trigger the unblock_in_parallel_req */ + worker->state_unblock_in_parallel_req = 1; + STARPU_PTHREAD_COND_BROADCAST(&worker->sched_cond); + + /* wait for the unblock_in_parallel_req to be processed */ + while (!worker->state_unblock_in_parallel_ack) + STARPU_PTHREAD_COND_WAIT(&worker->sched_cond, &worker->sched_mutex); + + STARPU_ASSERT(worker->state_unblock_in_parallel_req); + STARPU_ASSERT(!worker->state_blocked_in_parallel); + + /* reset unblock_in_parallel_req state flags */ + worker->state_unblock_in_parallel_req = 0; + worker->state_unblock_in_parallel_ack = 0; + + /* broadcast unblock_in_parallel_req state flags reset */ + STARPU_PTHREAD_COND_BROADCAST(&worker->sched_cond); + } + + /* announce unblocking complete */ + STARPU_ASSERT(worker->block_in_parallel_ref_count > 0); + worker->block_in_parallel_ref_count--; + } +} + +/** Called by the the worker to process incoming requests to block or unblock on + * parallel task boundaries. + * + * Must be called with worker's sched_mutex held. + */ +static inline void _starpu_worker_process_block_in_parallel_requests(struct _starpu_worker * const worker) +{ + while (worker->state_block_in_parallel_req) + { + STARPU_ASSERT(!worker->state_blocked_in_parallel); + STARPU_ASSERT(!worker->state_block_in_parallel_ack); + STARPU_ASSERT(!worker->state_unblock_in_parallel_req); + STARPU_ASSERT(!worker->state_unblock_in_parallel_ack); + STARPU_ASSERT(worker->block_in_parallel_ref_count > 0); + + /* enter effective blocked state */ + worker->state_blocked_in_parallel = 1; + + /* notify block_in_parallel_req processing */ + worker->state_block_in_parallel_ack = 1; + STARPU_PTHREAD_COND_BROADCAST(&worker->sched_cond); + + /* block */ + while (!worker->state_unblock_in_parallel_req) + STARPU_PTHREAD_COND_WAIT(&worker->sched_cond, &worker->sched_mutex); + + STARPU_ASSERT(worker->state_blocked_in_parallel); + STARPU_ASSERT(!worker->state_block_in_parallel_req); + STARPU_ASSERT(!worker->state_block_in_parallel_ack); + STARPU_ASSERT(!worker->state_unblock_in_parallel_ack); + STARPU_ASSERT(worker->block_in_parallel_ref_count > 0); + + /* leave effective blocked state */ + worker->state_blocked_in_parallel = 0; + + /* notify unblock_in_parallel_req processing */ + worker->state_unblock_in_parallel_ack = 1; + STARPU_PTHREAD_COND_BROADCAST(&worker->sched_cond); + } +} + +#ifdef STARPU_SPINLOCK_CHECK +#define _starpu_worker_enter_sched_op(worker) __starpu_worker_enter_sched_op((worker), __FILE__, __LINE__, __starpu_func__) +static inline void __starpu_worker_enter_sched_op(struct _starpu_worker * const worker, const char*file, int line, const char* func) +#else +/** Mark the beginning of a scheduling operation by the worker. No worker + * blocking operations on parallel tasks and no scheduling context change + * operations must be performed on contexts containing the worker, on + * contexts about to add the worker and on contexts about to remove the + * worker, while the scheduling operation is in process. The sched mutex + * of the worker may only be acquired permanently by another thread when + * no scheduling operation is in process, or when a scheduling operation + * is in process _and_ worker->state_relax_refcnt!=0. If a + * scheduling operation is in process _and_ + * worker->state_relax_refcnt==0, a thread other than the worker + * must wait on condition worker->sched_cond for + * worker->state_relax_refcnt!=0 to become true, before acquiring + * the worker sched mutex permanently. + * + * Must be called with worker's sched_mutex held. + */ +static inline void _starpu_worker_enter_sched_op(struct _starpu_worker * const worker) +#endif +{ + STARPU_ASSERT(!worker->state_sched_op_pending); + if (!worker->state_blocked_in_parallel_observed) + { + /* process pending block requests before entering a sched_op region */ + _starpu_worker_process_block_in_parallel_requests(worker); + while (worker->state_changing_ctx_notice) + { + STARPU_PTHREAD_COND_WAIT(&worker->sched_cond, &worker->sched_mutex); + + /* new block requests may have been triggered during the wait, + * need to check again */ + _starpu_worker_process_block_in_parallel_requests(worker); + } + } + else + { + /* if someone observed the worker state since the last call, postpone block request + * processing for one sched_op turn more, because the observer will not have seen + * new block requests between its observation and now. + * + * however, the worker still has to wait for context change operations to complete + * before entering sched_op again*/ + while (worker->state_changing_ctx_notice) + { + STARPU_PTHREAD_COND_WAIT(&worker->sched_cond, &worker->sched_mutex); + } + } + + /* no block request and no ctx change ahead, + * enter sched_op */ + worker->state_sched_op_pending = 1; + worker->state_blocked_in_parallel_observed = 0; + worker->state_relax_refcnt = 0; +#ifdef STARPU_SPINLOCK_CHECK + worker->relax_on_file = file; + worker->relax_on_line = line; + worker->relax_on_func = func; +#endif +} + +void _starpu_worker_apply_deferred_ctx_changes(void); + +#ifdef STARPU_SPINLOCK_CHECK +#define _starpu_worker_leave_sched_op(worker) __starpu_worker_leave_sched_op((worker), __FILE__, __LINE__, __starpu_func__) +static inline void __starpu_worker_leave_sched_op(struct _starpu_worker * const worker, const char*file, int line, const char* func) +#else +/** Mark the end of a scheduling operation by the worker. + * + * Must be called with worker's sched_mutex held. + */ +static inline void _starpu_worker_leave_sched_op(struct _starpu_worker * const worker) +#endif +{ + STARPU_ASSERT(worker->state_sched_op_pending); + worker->state_relax_refcnt = 1; +#ifdef STARPU_SPINLOCK_CHECK + worker->relax_off_file = file; + worker->relax_off_line = line; + worker->relax_off_func = func; +#endif + worker->state_sched_op_pending = 0; + STARPU_PTHREAD_COND_BROADCAST(&worker->sched_cond); + _starpu_worker_apply_deferred_ctx_changes(); +} + +static inline int _starpu_worker_sched_op_pending(void) +{ + int workerid = starpu_worker_get_id(); + if (workerid == -1) + return 0; + struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); + STARPU_ASSERT(worker != NULL); + return worker->state_sched_op_pending; +} + +/** Must be called before altering a context related to the worker + * whether about adding the worker to a context, removing it from a + * context or modifying the set of workers of a context of which the + * worker is a member, to mark the beginning of a context change + * operation. The sched mutex of the worker must be held before calling + * this function. + * + * Must be called with worker's sched_mutex held. + */ +static inline void _starpu_worker_enter_changing_ctx_op(struct _starpu_worker * const worker) +{ + STARPU_ASSERT(!starpu_pthread_equal(worker->thread_changing_ctx, starpu_pthread_self())); + /* flush pending requests to start on a fresh transaction epoch */ + while (worker->state_changing_ctx_notice) + STARPU_PTHREAD_COND_WAIT(&worker->sched_cond, &worker->sched_mutex); + + /* announce changing_ctx intent + * + * - an already started sched_op is allowed to complete + * - no new sched_op may be started + */ + worker->state_changing_ctx_notice = 1; + + worker->thread_changing_ctx = starpu_pthread_self(); + + /* allow for an already started sched_op to complete */ + if (worker->state_sched_op_pending) + { + /* request sched_op to broadcast when way is cleared */ + worker->state_changing_ctx_waiting = 1; + + /* wait for sched_op completion */ + STARPU_PTHREAD_COND_BROADCAST(&worker->sched_cond); +#ifdef STARPU_SIMGRID + starpu_pthread_queue_broadcast(&_starpu_simgrid_task_queue[worker->workerid]); +#endif + do + { + STARPU_PTHREAD_COND_WAIT(&worker->sched_cond, &worker->sched_mutex); + } + while (worker->state_sched_op_pending); + + /* reset flag so other sched_ops won't have to broadcast state */ + worker->state_changing_ctx_waiting = 0; + } +} + +/** Mark the end of a context change operation. + * + * Must be called with worker's sched_mutex held. + */ +static inline void _starpu_worker_leave_changing_ctx_op(struct _starpu_worker * const worker) +{ + worker->thread_changing_ctx = (starpu_pthread_t)0; + worker->state_changing_ctx_notice = 0; + STARPU_PTHREAD_COND_BROADCAST(&worker->sched_cond); +} + +#ifdef STARPU_SPINLOCK_CHECK +#define _starpu_worker_relax_on() __starpu_worker_relax_on(__FILE__, __LINE__, __starpu_func__) +static inline void __starpu_worker_relax_on(const char*file, int line, const char* func) +#else +/** Temporarily allow other worker to access current worker state, when still scheduling, + * but the scheduling has not yet been made or is already done */ +static inline void _starpu_worker_relax_on(void) +#endif +{ + struct _starpu_worker *worker = _starpu_get_local_worker_key(); + if (worker == NULL) + return; + if (!worker->state_sched_op_pending) + return; + STARPU_PTHREAD_MUTEX_LOCK_SCHED(&worker->sched_mutex); +#ifdef STARPU_SPINLOCK_CHECK + STARPU_ASSERT_MSG(worker->state_relax_refcntrelax_on_func, worker->relax_on_file, worker->relax_on_line); +#else + STARPU_ASSERT(worker->state_relax_refcntstate_relax_refcnt++; +#ifdef STARPU_SPINLOCK_CHECK + worker->relax_on_file = file; + worker->relax_on_line = line; + worker->relax_on_func = func; +#endif + STARPU_PTHREAD_COND_BROADCAST(&worker->sched_cond); + STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&worker->sched_mutex); +} +#define starpu_worker_relax_on _starpu_worker_relax_on + +#ifdef STARPU_SPINLOCK_CHECK +#define _starpu_worker_relax_on_locked(worker) __starpu_worker_relax_on_locked(worker,__FILE__, __LINE__, __starpu_func__) +static inline void __starpu_worker_relax_on_locked(struct _starpu_worker *worker, const char*file, int line, const char* func) +#else +/** Same, but with current worker mutex already held */ +static inline void _starpu_worker_relax_on_locked(struct _starpu_worker *worker) +#endif +{ + if (!worker->state_sched_op_pending) + return; +#ifdef STARPU_SPINLOCK_CHECK + STARPU_ASSERT_MSG(worker->state_relax_refcntrelax_on_func, worker->relax_on_file, worker->relax_on_line); +#else + STARPU_ASSERT(worker->state_relax_refcntstate_relax_refcnt++; +#ifdef STARPU_SPINLOCK_CHECK + worker->relax_on_file = file; + worker->relax_on_line = line; + worker->relax_on_func = func; +#endif + STARPU_PTHREAD_COND_BROADCAST(&worker->sched_cond); +} + +#ifdef STARPU_SPINLOCK_CHECK +#define _starpu_worker_relax_off() __starpu_worker_relax_off(__FILE__, __LINE__, __starpu_func__) +static inline void __starpu_worker_relax_off(const char*file, int line, const char* func) +#else +static inline void _starpu_worker_relax_off(void) +#endif +{ + int workerid = starpu_worker_get_id(); + if (workerid == -1) + return; + struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); + STARPU_ASSERT(worker != NULL); + if (!worker->state_sched_op_pending) + return; + STARPU_PTHREAD_MUTEX_LOCK_SCHED(&worker->sched_mutex); +#ifdef STARPU_SPINLOCK_CHECK + STARPU_ASSERT_MSG(worker->state_relax_refcnt>0, "relax last turn off in %s (%s:%d)\n", worker->relax_on_func, worker->relax_on_file, worker->relax_on_line); +#else + STARPU_ASSERT(worker->state_relax_refcnt>0); +#endif + worker->state_relax_refcnt--; +#ifdef STARPU_SPINLOCK_CHECK + worker->relax_off_file = file; + worker->relax_off_line = line; + worker->relax_off_func = func; +#endif + STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&worker->sched_mutex); +} +#define starpu_worker_relax_off _starpu_worker_relax_off + +#ifdef STARPU_SPINLOCK_CHECK +#define _starpu_worker_relax_off_locked() __starpu_worker_relax_off_locked(__FILE__, __LINE__, __starpu_func__) +static inline void __starpu_worker_relax_off_locked(const char*file, int line, const char* func) +#else +static inline void _starpu_worker_relax_off_locked(void) +#endif +{ + int workerid = starpu_worker_get_id(); + if (workerid == -1) + return; + struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); + STARPU_ASSERT(worker != NULL); + if (!worker->state_sched_op_pending) + return; +#ifdef STARPU_SPINLOCK_CHECK + STARPU_ASSERT_MSG(worker->state_relax_refcnt>0, "relax last turn off in %s (%s:%d)\n", worker->relax_on_func, worker->relax_on_file, worker->relax_on_line); +#else + STARPU_ASSERT(worker->state_relax_refcnt>0); +#endif + worker->state_relax_refcnt--; +#ifdef STARPU_SPINLOCK_CHECK + worker->relax_off_file = file; + worker->relax_off_line = line; + worker->relax_off_func = func; +#endif +} + +static inline int _starpu_worker_get_relax_state(void) +{ + int workerid = starpu_worker_get_id(); + if (workerid < 0) + return 1; + struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); + STARPU_ASSERT(worker != NULL); + return worker->state_relax_refcnt != 0; +} +#define starpu_worker_get_relax_state _starpu_worker_get_relax_state + +/** lock a worker for observing contents + * + * notes: + * - if the observed worker is not in state_relax_refcnt, the function block until the state is reached */ +static inline void _starpu_worker_lock(int workerid) +{ + struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); + STARPU_ASSERT(worker != NULL); + int cur_workerid = starpu_worker_get_id(); + if (workerid != cur_workerid) + { + starpu_worker_relax_on(); + + STARPU_PTHREAD_MUTEX_LOCK_SCHED(&worker->sched_mutex); + while (!worker->state_relax_refcnt) + { + STARPU_PTHREAD_COND_WAIT(&worker->sched_cond, &worker->sched_mutex); + } + } + else + { + STARPU_PTHREAD_MUTEX_LOCK_SCHED(&worker->sched_mutex); + } +} +#define starpu_worker_lock _starpu_worker_lock + +static inline int _starpu_worker_trylock(int workerid) +{ + struct _starpu_worker *cur_worker = _starpu_get_local_worker_key(); + STARPU_ASSERT(cur_worker != NULL); + int cur_workerid = cur_worker->workerid; + struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); + STARPU_ASSERT(worker != NULL); + + /* Start with ourself */ + int ret = STARPU_PTHREAD_MUTEX_TRYLOCK_SCHED(&cur_worker->sched_mutex); + if (ret) + return ret; + if (workerid == cur_workerid) + /* We only needed to lock ourself */ + return 0; + + /* Now try to lock the other worker */ + ret = STARPU_PTHREAD_MUTEX_TRYLOCK_SCHED(&worker->sched_mutex); + if (!ret) + { + /* Good, check that it is relaxed */ + ret = !worker->state_relax_refcnt; + if (ret) + STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&worker->sched_mutex); + } + if (!ret) + _starpu_worker_relax_on_locked(cur_worker); + STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&cur_worker->sched_mutex); + return ret; +} +#define starpu_worker_trylock _starpu_worker_trylock + +static inline void _starpu_worker_unlock(int workerid) +{ + struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); + STARPU_ASSERT(worker != NULL); + STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&worker->sched_mutex); + int cur_workerid = starpu_worker_get_id(); + if (workerid != cur_workerid) + { + starpu_worker_relax_off(); + } +} +#define starpu_worker_unlock _starpu_worker_unlock + +static inline void _starpu_worker_lock_self(void) +{ + int workerid = starpu_worker_get_id_check(); + struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); + STARPU_ASSERT(worker != NULL); + STARPU_PTHREAD_MUTEX_LOCK_SCHED(&worker->sched_mutex); +} +#define starpu_worker_lock_self _starpu_worker_lock_self + +static inline void _starpu_worker_unlock_self(void) +{ + int workerid = starpu_worker_get_id_check(); + struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); + STARPU_ASSERT(worker != NULL); + STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&worker->sched_mutex); +} +#define starpu_worker_unlock_self _starpu_worker_unlock_self + +static inline int _starpu_wake_worker_relax(int workerid) +{ + _starpu_worker_lock(workerid); + int ret = starpu_wake_worker_locked(workerid); + _starpu_worker_unlock(workerid); + return ret; +} +#define starpu_wake_worker_relax _starpu_wake_worker_relax + +int starpu_wake_worker_relax_light(int workerid) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; + +/** + * Allow a worker pulling a task it cannot execute to properly refuse it and + * send it back to the scheduler. + */ +void _starpu_worker_refuse_task(struct _starpu_worker *worker, struct starpu_task *task); + +void _starpu_set_catch_signals(int do_catch_signal); +int _starpu_get_catch_signals(void); + +/** Performance Monitoring */ +static inline int _starpu_perf_counter_paused(void) +{ + STARPU_RMB(); + return STARPU_UNLIKELY(_starpu_config.perf_counter_pause_depth > 0); +} + +void _starpu_crash_add_hook(void (*hook_func)(void)); +void _starpu_crash_call_hooks(); + +uint32_t _starpu_worker_exists(struct starpu_task *task); + +/* @}*/ + +#pragma GCC visibility pop + +#endif // __WORKERS_H__ diff --git a/src/datawizard/coherency.c b/src/datawizard/coherency.c new file mode 100644 index 0000000..6a162cf --- /dev/null +++ b/src/datawizard/coherency.c @@ -0,0 +1,1586 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2018,2021 Federal University of Rio Grande do Sul (UFRGS) + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef STARPU_SIMGRID +#include +#endif + +static int link_supports_direct_transfers(starpu_data_handle_t handle, unsigned src_node, unsigned dst_node, unsigned *handling_node); +int _starpu_select_src_node(starpu_data_handle_t handle, unsigned destination) +{ + int src_node = -1; + unsigned i; + + unsigned nnodes = starpu_memory_nodes_get_count(); + + /* first find a valid copy, either a STARPU_OWNER or a STARPU_SHARED */ + unsigned node; + + size_t size = _starpu_data_get_size(handle); + double cost = INFINITY; + unsigned src_node_mask = 0; + + for (node = 0; node < nnodes; node++) + { + if (handle->per_node[node].state != STARPU_INVALID) + { + /* we found a copy ! */ + src_node_mask |= (1<init_cl) + { + /* No copy yet, but applicationg told us how to build it. */ + return -1; + } + + /* we should have found at least one copy ! */ + STARPU_ASSERT_MSG(src_node_mask != 0, "The data for the handle %p is requested, but the handle does not have a valid value. Perhaps some initialization task is missing?", handle); + + /* Without knowing the size, we won't know the cost */ + if (!size) + cost = 0; + + /* Check whether we have transfer cost for all nodes, if so, take the minimum */ + if (cost) + for (i = 0; i < nnodes; i++) + { + if (src_node_mask & (1<per_node[src_node].allocated || handle->per_node[src_node].mapped != STARPU_UNMAPPED); + STARPU_ASSERT(handle->per_node[src_node].initialized); + return src_node; + } + + int i_ram = -1; + int i_gpu = -1; + int i_disk = -1; + + /* Revert to dumb strategy: take RAM unless only a GPU has it */ + for (i = 0; i < nnodes; i++) + { + if (src_node_mask & (1<ops->copy_methods->can_copy; + /* Avoid transfers which the interface does not want */ + if (can_copy) + { + void *src_interface = handle->per_node[i].data_interface; + void *dst_interface = handle->per_node[destination].data_interface; + unsigned handling_node; + + if (!link_supports_direct_transfers(handle, i, destination, &handling_node)) + { + /* Avoid through RAM if the interface does not want it */ + void *ram_interface = handle->per_node[STARPU_MAIN_RAM].data_interface; + if ((!can_copy(src_interface, i, ram_interface, STARPU_MAIN_RAM, i) + && !can_copy(src_interface, i, ram_interface, STARPU_MAIN_RAM, STARPU_MAIN_RAM)) + || (!can_copy(ram_interface, STARPU_MAIN_RAM, dst_interface, destination, STARPU_MAIN_RAM) + && !can_copy(ram_interface, STARPU_MAIN_RAM, dst_interface, destination, destination))) + continue; + } + } + + /* however GPU are expensive sources, really ! + * Unless peer transfer is supported (and it would then have been selected above). + * Other should be ok */ + + if (starpu_node_get_kind(i) == STARPU_CPU_RAM || + starpu_node_get_kind(i) == STARPU_MPI_MS_RAM) + i_ram = i; + else if (starpu_node_get_kind(i) == STARPU_DISK_RAM) + i_disk = i; + else + i_gpu = i; + } + } + + /* we have to use cpu_ram in first */ + if (i_ram != -1) + src_node = i_ram; + else if (i_gpu != -1) + /* otherwise a gpu */ + src_node = i_gpu; + else + /* no luck we have to use the disk memory */ + src_node = i_disk; + + STARPU_ASSERT(src_node != -1); + STARPU_ASSERT(handle->per_node[src_node].allocated || handle->per_node[src_node].mapped != STARPU_UNMAPPED); + STARPU_ASSERT(handle->per_node[src_node].initialized); + return src_node; +} + +/* this may be called once the data is fetched with header and STARPU_RW-lock hold */ +void _starpu_update_data_state(starpu_data_handle_t handle, + struct _starpu_data_replicate *requesting_replicate, + enum starpu_data_access_mode mode) +{ + if (mode == STARPU_UNMAP) + { + /* Unmap request, invalidate */ + requesting_replicate->state = STARPU_INVALID; + return; + } + + /* There is nothing to do for relaxed coherency modes (scratch or + * reductions) */ + if (!(mode & STARPU_RW)) + return; + + unsigned nnodes = starpu_memory_nodes_get_count(); + + /* the data is present now */ + unsigned requesting_node = requesting_replicate->memory_node; + + if (mode & STARPU_W) + { + /* the requesting node now has the only valid copy */ + unsigned node; + for (node = 0; node < nnodes; node++) + { + if (requesting_replicate->mapped == (int) node + && !_starpu_node_needs_map_update(requesting_node)) + /* The mapped node will be kept up to date */ + continue; + if (handle->per_node[node].mapped == (int) requesting_node + && !_starpu_node_needs_map_update(node)) + /* The mapping node will be kept up to date */ + continue; + if (handle->per_node[node].state != STARPU_INVALID) + _STARPU_TRACE_DATA_STATE_INVALID(handle, node); + handle->per_node[node].state = STARPU_INVALID; + } + if (requesting_replicate->state != STARPU_OWNER) + _STARPU_TRACE_DATA_STATE_OWNER(handle, requesting_node); + requesting_replicate->state = STARPU_OWNER; + if (handle->home_node != -1 && handle->per_node[handle->home_node].state == STARPU_INVALID) + /* Notify that this MC is now dirty */ + _starpu_memchunk_dirty(requesting_replicate->mc, requesting_replicate->memory_node); + } + else + { + /* read only */ + if (requesting_replicate->state != STARPU_OWNER) + { + /* there was at least another copy of the data */ + unsigned node; + for (node = 0; node < nnodes; node++) + { + struct _starpu_data_replicate *replicate = &handle->per_node[node]; + if (replicate->state != STARPU_INVALID) + { + if (replicate->state != STARPU_SHARED) + _STARPU_TRACE_DATA_STATE_SHARED(handle, node); + replicate->state = STARPU_SHARED; + } + } + if (requesting_replicate->state != STARPU_SHARED) + _STARPU_TRACE_DATA_STATE_SHARED(handle, requesting_node); + requesting_replicate->state = STARPU_SHARED; + } + } +} + +static int worker_supports_direct_access(unsigned node, unsigned handling_node) +{ + if (node == handling_node) + return 1; + + if (!_starpu_memory_node_get_nworkers(handling_node)) + /* No worker to process the request from that node */ + return 0; + + const struct _starpu_node_ops *node_ops = _starpu_memory_node_get_node_ops(node); + if (node_ops && node_ops->is_direct_access_supported) + return node_ops->is_direct_access_supported(node, handling_node); + else + return 0; +} + +static int link_supports_direct_transfers(starpu_data_handle_t handle, unsigned src_node, unsigned dst_node, unsigned *handling_node) +{ + STARPU_ASSERT_MSG(handle->ops->copy_methods, "The handle %s does not define a copy_methods\n", handle->ops->name); + int (*can_copy)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, unsigned handling_node) = handle->ops->copy_methods->can_copy; + void *src_interface = handle->per_node[src_node].data_interface; + void *dst_interface = handle->per_node[dst_node].data_interface; + + /* Note: with CUDA, performance seems a bit better when issuing the transfer from the destination (tested without GPUDirect, but GPUDirect probably behave the same) */ + if (worker_supports_direct_access(src_node, dst_node) && (!can_copy || can_copy(src_interface, src_node, dst_interface, dst_node, dst_node))) + { + *handling_node = dst_node; + return 1; + } + + if (worker_supports_direct_access(dst_node, src_node) && (!can_copy || can_copy(src_interface, src_node, dst_interface, dst_node, src_node))) + { + *handling_node = src_node; + return 1; + } + + return 0; +} + +/* Now, we use slowness/bandwidth to compare numa nodes, is it better to use latency ? */ +static unsigned chose_best_numa_between_src_and_dest(int src, int dst) +{ + double timing_best; + int best_numa = -1; + unsigned numa; + const unsigned nb_numa_nodes = starpu_memory_nodes_get_numa_count(); + for(numa = 0; numa < nb_numa_nodes; numa++) + { + double actual = 1.0/starpu_transfer_bandwidth(src, numa) + 1.0/starpu_transfer_bandwidth(numa, dst); + + /* Compare slowness : take the lowest */ + if (best_numa < 0 || actual < timing_best) + { + best_numa = numa; + timing_best = actual; + } + } + STARPU_ASSERT(best_numa >= 0); + + return best_numa; +} + +/* Determines the path of a request : each hop is defined by (src,dst) and the + * node that handles the hop. The returned value indicates the number of hops, + * and the max_len is the maximum number of hops (ie. the size of the + * src_nodes, dst_nodes and handling_nodes arrays. */ +int _starpu_determine_request_path(starpu_data_handle_t handle, + int src_node, int dst_node, + enum starpu_data_access_mode mode, int max_len, + unsigned *src_nodes, unsigned *dst_nodes, + unsigned *handling_nodes, unsigned write_invalidation) +{ + if ((mode & STARPU_R) && src_node >= 0 && dst_node >= 0) + { + + struct _starpu_data_replicate *src_replicate = &handle->per_node[src_node]; + struct _starpu_data_replicate *dst_replicate = &handle->per_node[dst_node]; + + if (src_replicate->mapped != STARPU_UNMAPPED) + { + /* Device -> map */ + STARPU_ASSERT(max_len >= 1); + *src_nodes++ = src_node; + *dst_nodes++ = src_replicate->mapped; + *handling_nodes++ = src_node; + max_len--; + + /* map -> Device */ + int consumed = _starpu_determine_request_path(handle, + src_replicate->mapped, dst_node, + mode, max_len, + src_nodes, dst_nodes, handling_nodes, + write_invalidation); + + return consumed + 1; + } + else if (dst_replicate->mapped != STARPU_UNMAPPED) + { + /* Device -> map */ + int consumed = _starpu_determine_request_path(handle, + src_node, dst_replicate->mapped, + mode, max_len, + src_nodes, dst_nodes, handling_nodes, + write_invalidation); + + src_nodes += consumed; + dst_nodes += consumed; + handling_nodes += consumed; + max_len -= consumed; + + /* map -> Device */ + STARPU_ASSERT(max_len >= 1); + *src_nodes++ = dst_replicate->mapped; + *dst_nodes++ = dst_node; + *handling_nodes++ = dst_node; + max_len--; + + return consumed + 1; + } + } + + if (src_node == dst_node || !(mode & STARPU_R)) + { + if (dst_node == -1 || starpu_node_get_kind(dst_node) == STARPU_DISK_RAM) + handling_nodes[0] = src_node; + else + handling_nodes[0] = dst_node; + + if (write_invalidation) + /* The invalidation request will be enough */ + return 0; + + /* The destination node should only allocate the data, no transfer is required */ + STARPU_ASSERT(max_len >= 1); + src_nodes[0] = dst_node; // ignored + dst_nodes[0] = dst_node; + return 1; + } + + if (src_node < 0) + { + /* Will just initialize the destination */ + STARPU_ASSERT(max_len >= 1); + src_nodes[0] = dst_node; // ignored + dst_nodes[0] = dst_node; + return 1; + } + + unsigned handling_node; + int link_is_valid = link_supports_direct_transfers(handle, src_node, dst_node, &handling_node); + + if (!link_is_valid) + { + int (*can_copy)(void *, unsigned, void *, unsigned, unsigned) = handle->ops->copy_methods->can_copy; + void *src_interface = handle->per_node[src_node].data_interface; + void *dst_interface = handle->per_node[dst_node].data_interface; + + /* We need an intermediate hop to implement data staging + * through main memory. */ + STARPU_ASSERT(max_len >= 2); + STARPU_ASSERT(src_node >= 0); + + unsigned numa = chose_best_numa_between_src_and_dest(src_node, dst_node); + + /* GPU -> RAM */ + src_nodes[0] = src_node; + dst_nodes[0] = numa; + + if (starpu_node_get_kind(src_node) == STARPU_DISK_RAM) + /* Disks don't have their own driver thread */ + handling_nodes[0] = dst_node; + else if (!can_copy || can_copy(src_interface, src_node, dst_interface, dst_node, src_node)) + { + handling_nodes[0] = src_node; + } + else + { + STARPU_ASSERT_MSG(can_copy(src_interface, src_node, dst_interface, dst_node, dst_node), "interface %d refuses all kinds of transfers from node %d to node %d\n", handle->ops->interfaceid, src_node, dst_node); + handling_nodes[0] = dst_node; + } + + /* RAM -> GPU */ + src_nodes[1] = numa; + dst_nodes[1] = dst_node; + + if (starpu_node_get_kind(dst_node) == STARPU_DISK_RAM) + /* Disks don't have their own driver thread */ + handling_nodes[1] = src_node; + else if (!can_copy || can_copy(src_interface, src_node, dst_interface, dst_node, dst_node)) + { + handling_nodes[1] = dst_node; + } + else + { + STARPU_ASSERT_MSG(can_copy(src_interface, src_node, dst_interface, dst_node, src_node), "interface %d refuses all kinds of transfers from node %d to node %d\n", handle->ops->interfaceid, src_node, dst_node); + handling_nodes[1] = src_node; + } + + return 2; + } + else + { + STARPU_ASSERT(max_len >= 1); + + src_nodes[0] = src_node; + dst_nodes[0] = dst_node; + handling_nodes[0] = handling_node; + + return 1; + } +} + +/* handle->lock should be taken. r is returned locked. The node parameter + * indicate either the source of the request, or the destination for a + * write-only request. */ +static struct _starpu_data_request *_starpu_search_existing_data_request(struct _starpu_data_replicate *replicate, unsigned node, enum starpu_data_access_mode mode, struct starpu_task *task, enum starpu_is_prefetch is_prefetch) +{ + struct _starpu_data_request *r; + + /* Make sure we don't have anything else than R/W */ + STARPU_ASSERT(mode != STARPU_UNMAP); + + for (r = replicate->request[node]; r; r = r->next_same_req) + { + _starpu_spin_checklocked(&r->handle->header_lock); + + if (r->canceled) + /* Do not reuse a cancelled request */ + continue; + + if (task && r->task && task != r->task) + /* Do not collapse requests for different tasks */ + continue; + + _starpu_spin_lock(&r->lock); + + /* perhaps we need to "upgrade" the request */ + if (is_prefetch < r->prefetch) + _starpu_update_prefetch_status(r, is_prefetch); + + /* TODO: abort on unmapping request */ + + if (mode & STARPU_R) + { + /* in case the existing request did not imply a memory + * transfer yet, we have to take a second refcnt now + * for the source, in addition to the refcnt for the + * destination + * (so that the source remains valid) */ + if (!(r->mode & STARPU_R)) + { + replicate->refcnt++; + replicate->handle->busy_count++; + } + + r->mode = (enum starpu_data_access_mode) ((int) r->mode | (int) STARPU_R); + } + + if (mode & STARPU_W) + r->mode = (enum starpu_data_access_mode) ((int) r->mode | (int) STARPU_W); + + /* We collapse with this request */ + return r; + } + + return NULL; +} + + + +/* + * This function is called when the data is needed on the local node, this + * returns a pointer to the local copy + * + * R STARPU_W STARPU_RW + * Owner OK OK OK + * Shared OK 1 1 + * Invalid 2 3 4 + * + * case 1 : shared + (read)write : + * no data copy but shared->Invalid/Owner + * case 2 : invalid + read : + * data copy + invalid->shared + owner->shared (STARPU_ASSERT(there is a valid)) + * case 3 : invalid + write : + * no data copy + invalid->owner + (owner,shared)->invalid + * case 4 : invalid + R/STARPU_W : + * data copy + if (STARPU_W) (invalid->owner + owner->invalid) + * else (invalid,owner->shared) + */ + +struct _starpu_data_request *_starpu_create_request_to_fetch_data(starpu_data_handle_t handle, + struct _starpu_data_replicate *dst_replicate, + enum starpu_data_access_mode mode, + struct starpu_task *task, + enum starpu_is_prefetch is_prefetch, + unsigned async, + void (*callback_func)(void *), void *callback_arg, int prio, const char *origin) +{ + /* We don't care about commuting for data requests, that was handled before. */ + mode &= ~STARPU_COMMUTE; + + /* This function is called with handle's header lock taken */ + _starpu_spin_checklocked(&handle->header_lock); + + /* TODO: If writing copying to RAM, first update maps into RAM, even if RAM is already up to date. */ + + int requesting_node = dst_replicate ? dst_replicate->memory_node : -1; + unsigned nwait = 0; + + if (mode & STARPU_W) + { + /* We will write to the buffer. We will have to wait for all + * existing requests before the last request which will + * invalidate all their results (which were possibly spurious, + * e.g. too aggressive eviction). + */ + unsigned i, j; + unsigned nnodes = starpu_memory_nodes_get_count(); + for (i = 0; i < nnodes; i++) + for (j = 0; j < nnodes; j++) + { + struct _starpu_data_request *r; + for (r = handle->per_node[i].request[j]; r; r = r->next_same_req) + nwait++; + } + /* If the request is not detached (i.e. the caller really wants + * proper ownership), no new requests will appear because a + * reference will be kept on the dst replicate, which will + * notably prevent data reclaiming. + */ + } + + if ((!dst_replicate || dst_replicate->state != STARPU_INVALID) && (!nwait || is_prefetch)) + { + if (dst_replicate) + { +#ifdef STARPU_MEMORY_STATS + enum _starpu_cache_state old_state = dst_replicate->state; +#endif + /* the data is already available and we don't have to wait for + * any request, so we can stop */ + _starpu_update_data_state(handle, dst_replicate, mode); + _starpu_msi_cache_hit(requesting_node); + +#ifdef STARPU_MEMORY_STATS + _starpu_memory_handle_stats_cache_hit(handle, requesting_node); + + /* XXX Broken ? */ + if (old_state == STARPU_SHARED + && dst_replicate->state == STARPU_OWNER) + _starpu_memory_handle_stats_shared_to_owner(handle, requesting_node); +#endif + + if (dst_replicate->mc) + { + if (is_prefetch == STARPU_TASK_PREFETCH) + /* Make sure it stays there */ + dst_replicate->nb_tasks_prefetch++; + + _starpu_memchunk_recently_used(dst_replicate->mc, requesting_node); + } + + if (task) + { + unsigned j; + unsigned nnodes = starpu_memory_nodes_get_count(); + /* Cancel any existing (prefetch) request */ + struct _starpu_data_request *r2; + for (j = 0; j < nnodes; j++) + { + for (r2 = dst_replicate->request[j]; r2; r2 = r2->next_same_req) + { + if (r2->task && r2->task == task) + { + r2->canceled = 1; + break; + } + } + } + } + } + + _starpu_spin_unlock(&handle->header_lock); + + if (callback_func) + callback_func(callback_arg); + + _STARPU_LOG_OUT_TAG("data available"); + return NULL; + } + + if (dst_replicate) + _starpu_msi_cache_miss(requesting_node); + + /* the only remaining situation is that the local copy was invalid */ + STARPU_ASSERT((dst_replicate && dst_replicate->state == STARPU_INVALID) || nwait); + + /* find someone who already has the data */ + int src_node = -1; + + if (dst_replicate && mode & STARPU_R) + { + if (dst_replicate->state == STARPU_INVALID) + src_node = _starpu_select_src_node(handle, requesting_node); + else + src_node = requesting_node; + if (src_node < 0) + { + /* We will create it, no need to read an existing value */ + mode &= ~STARPU_R; + } + } + else if (dst_replicate) + { + /* if the data is in write only mode (and not SCRATCH or REDUX), there is no need for a source, data will be initialized by the task itself */ + if (mode & STARPU_W && is_prefetch <= STARPU_TASK_PREFETCH) + dst_replicate->initialized = 1; + if (starpu_node_get_kind(requesting_node) == STARPU_CPU_RAM && !nwait + && !_starpu_malloc_willpin_on_node(requesting_node)) + { + /* FIXME: also try to map */ + /* And this is the main RAM without pinning, really no need for a + * request, just quickly allocate and be done */ + if (dst_replicate->mapped != STARPU_UNMAPPED + || _starpu_allocate_memory_on_node(handle, dst_replicate, is_prefetch, 0) == 0) + { + if (is_prefetch <= STARPU_TASK_PREFETCH) + _starpu_update_data_state(handle, dst_replicate, mode); + if (dst_replicate->mc) + { + if (is_prefetch == STARPU_TASK_PREFETCH) + /* Make sure it stays there */ + dst_replicate->nb_tasks_prefetch++; + + _starpu_memchunk_recently_used(dst_replicate->mc, requesting_node); + } + + _starpu_spin_unlock(&handle->header_lock); + + if (callback_func) + callback_func(callback_arg); + _STARPU_LOG_OUT_TAG("data immediately allocated"); + return NULL; + } + } + } + +#define MAX_REQUESTS 4 + /* We can safely assume that there won't be more than 2 hops in the + * current implementation */ + unsigned src_nodes[MAX_REQUESTS], dst_nodes[MAX_REQUESTS], handling_nodes[MAX_REQUESTS]; + /* keep one slot for the last W request, if any */ + int write_invalidation = (mode & STARPU_W) && nwait && !is_prefetch; + int nhops = _starpu_determine_request_path(handle, src_node, requesting_node, mode, MAX_REQUESTS, + src_nodes, dst_nodes, handling_nodes, write_invalidation); + + STARPU_ASSERT(nhops >= 0 && nhops <= MAX_REQUESTS-1); + struct _starpu_data_request *requests[nhops + write_invalidation]; + + /* Did we reuse a request for that hop ? */ + int reused_requests[nhops + write_invalidation]; + + /* Construct an array with a list of requests, possibly reusing existing requests */ + int hop; + for (hop = 0; hop < nhops; hop++) + { + struct _starpu_data_request *r; + + unsigned hop_src_node = src_nodes[hop]; + unsigned hop_dst_node = dst_nodes[hop]; + unsigned hop_handling_node = handling_nodes[hop]; + + struct _starpu_data_replicate *hop_src_replicate; + struct _starpu_data_replicate *hop_dst_replicate; + + /* Only the first request is independent */ + unsigned ndeps = (hop == 0)?0:1; + + hop_src_replicate = &handle->per_node[hop_src_node]; + hop_dst_replicate = (hop != nhops - 1)?&handle->per_node[hop_dst_node]:dst_replicate; + + /* Try to reuse a request if possible */ +#ifdef STARPU_DEVEL +#warning We do not actually want to reuse an existing request when our request is for a task with low priority, that will get executed much later. We don t want to wire down the data in between, at worse that could hog the complete gpu memory... +#endif + r = _starpu_search_existing_data_request(hop_dst_replicate, + (mode & STARPU_R)?hop_src_node:hop_dst_node, + mode, task, is_prefetch); + + reused_requests[hop] = !!r; + + if (!r) + { + /* Create a new request if there was no request to reuse */ + r = _starpu_create_data_request(handle, hop_src_replicate, + hop_dst_replicate, hop_handling_node, + mode, ndeps, task, is_prefetch, prio, 0, origin); + nwait++; + } + + requests[hop] = r; + } + + /* Chain these requests */ + for (hop = 0; hop < nhops; hop++) + { + struct _starpu_data_request *r; + r = requests[hop]; + + if (hop != nhops - 1) + { + if (!reused_requests[hop + 1]) + { + r->next_req[r->next_req_count++] = requests[hop + 1]; + STARPU_ASSERT(r->next_req_count <= STARPU_MAXNODES); + } + } + else + { + if (is_prefetch == STARPU_TASK_PREFETCH) + /* Make last request add the prefetch count on the mc to keep the data + * there until the task gets to execute. */ + r->nb_tasks_prefetch++; + + if (!write_invalidation) + /* The last request will perform the callback after termination */ + _starpu_data_request_append_callback(r, callback_func, callback_arg); + } + + if (reused_requests[hop]) + _starpu_spin_unlock(&r->lock); + } + + if (write_invalidation) + { + /* Some requests were still pending, we have to add yet another + * request, depending on them, which will invalidate their + * result. + */ + struct _starpu_data_request *r = _starpu_create_data_request(handle, dst_replicate, + dst_replicate, requesting_node, + STARPU_W, nwait, task, is_prefetch, prio, 1, origin); + + /* and perform the callback after termination */ + _starpu_data_request_append_callback(r, callback_func, callback_arg); + + /* We will write to the buffer. We will have to wait for all + * existing requests before the last request which will + * invalidate all their results (which were possibly spurious, + * e.g. too aggressive eviction). + */ + unsigned i, j; + unsigned nnodes = starpu_memory_nodes_get_count(); + for (i = 0; i < nnodes; i++) + for (j = 0; j < nnodes; j++) + { + struct _starpu_data_request *r2; + for (r2 = handle->per_node[i].request[j]; r2; r2 = r2->next_same_req) + { + _starpu_spin_lock(&r2->lock); + if (is_prefetch < r2->prefetch) + /* Hasten the request we will have to wait for */ + _starpu_update_prefetch_status(r2, is_prefetch); + r2->next_req[r2->next_req_count++] = r; + STARPU_ASSERT(r2->next_req_count <= STARPU_MAXNODES + 1); + _starpu_spin_unlock(&r2->lock); + nwait--; + } + } + STARPU_ASSERT(nwait == 0); + + nhops++; + requests[nhops - 1] = r; + /* existing requests will post this one */ + reused_requests[nhops - 1] = 1; + } + STARPU_ASSERT(nhops); + + if (!async) + requests[nhops - 1]->refcnt++; + + + /* we only submit the first request, the remaining will be + * automatically submitted afterward */ + if (!reused_requests[0]) + _starpu_post_data_request(requests[0]); + + return requests[nhops - 1]; +} + +int _starpu_fetch_data_on_node(starpu_data_handle_t handle, int node, struct _starpu_data_replicate *dst_replicate, + enum starpu_data_access_mode mode, unsigned detached, + struct starpu_task *task, enum starpu_is_prefetch is_prefetch, unsigned async, + void (*callback_func)(void *), void *callback_arg, int prio, const char *origin) +{ + _STARPU_LOG_IN(); + + _starpu_spin_lock(&handle->header_lock); + + if (mode & STARPU_R && is_prefetch > STARPU_FETCH) + { + unsigned src_node_mask = 0; + + unsigned nnodes = starpu_memory_nodes_get_count(); + unsigned n; + for (n = 0; n < nnodes; n++) + { + if (handle->per_node[n].state != STARPU_INVALID) + { + /* we found a copy ! */ + src_node_mask |= (1<init_cl, "Could not find a valid copy of the data, and no handle initialization function"); + _starpu_spin_unlock(&handle->header_lock); + return 0; + } + } + + if (!detached) + { + /* Take references which will be released by _starpu_release_data_on_node */ + if (dst_replicate) + dst_replicate->refcnt++; + else if (node == STARPU_ACQUIRE_NO_NODE_LOCK_ALL) + { + int i; + for (i = 0; i < STARPU_MAXNODES; i++) + handle->per_node[i].refcnt++; + } + handle->busy_count++; + } + + struct _starpu_data_request *r; + r = _starpu_create_request_to_fetch_data(handle, dst_replicate, mode, + task, is_prefetch, async, callback_func, callback_arg, prio, origin); + + /* If no request was created, the handle was already up-to-date on the + * node. In this case, _starpu_create_request_to_fetch_data has already + * unlocked the header. */ + if (!r) + return 0; + + _starpu_spin_unlock(&handle->header_lock); + + int ret = async?0:_starpu_wait_data_request_completion(r, 1); + _STARPU_LOG_OUT(); + return ret; +} + +static int idle_prefetch_data_on_node(starpu_data_handle_t handle, int node, struct _starpu_data_replicate *replicate, enum starpu_data_access_mode mode, struct starpu_task *task, int prio) +{ + return _starpu_fetch_data_on_node(handle, node, replicate, mode, 1, task, STARPU_IDLEFETCH, 1, NULL, NULL, prio, "idle_prefetch_data_on_node"); +} + +static int task_prefetch_data_on_node(starpu_data_handle_t handle, int node, struct _starpu_data_replicate *replicate, enum starpu_data_access_mode mode, struct starpu_task *task, int prio) +{ + return _starpu_fetch_data_on_node(handle, node, replicate, mode, 1, task, STARPU_TASK_PREFETCH, 1, NULL, NULL, prio, "task_prefetch_data_on_node"); +} + +static int STARPU_ATTRIBUTE_UNUSED prefetch_data_on_node(starpu_data_handle_t handle, int node, struct _starpu_data_replicate *replicate, enum starpu_data_access_mode mode, struct starpu_task *task, int prio) +{ + return _starpu_fetch_data_on_node(handle, node, replicate, mode, 1, task, STARPU_PREFETCH, 1, NULL, NULL, prio, "prefetch_data_on_node"); +} + +static int fetch_data(starpu_data_handle_t handle, int node, struct _starpu_data_replicate *replicate, enum starpu_data_access_mode mode, struct starpu_task *task, int prio) +{ + return _starpu_fetch_data_on_node(handle, node, replicate, mode, 0, task, STARPU_FETCH, 0, NULL, NULL, prio, "fetch_data"); +} + +uint32_t _starpu_get_data_refcnt(starpu_data_handle_t handle, unsigned node) +{ + return handle->per_node[node].refcnt; +} + +size_t _starpu_data_get_size(starpu_data_handle_t handle) +{ + return handle->ops->get_size(handle); +} + +size_t _starpu_data_get_alloc_size(starpu_data_handle_t handle) +{ + if (handle->ops->get_alloc_size) + return handle->ops->get_alloc_size(handle); + else + return handle->ops->get_size(handle); +} + +starpu_ssize_t _starpu_data_get_max_size(starpu_data_handle_t handle) +{ + if (handle->ops->get_max_size) + return handle->ops->get_max_size(handle); + else + return -1; +} + +uint32_t _starpu_data_get_footprint(starpu_data_handle_t handle) +{ + return handle->footprint; +} + +/* in case the data was accessed on a write mode, do not forget to + * make it accessible again once it is possible ! */ +void _starpu_release_data_on_node(starpu_data_handle_t handle, uint32_t default_wt_mask, enum starpu_data_access_mode down_to_mode, struct _starpu_data_replicate *replicate) +{ + uint32_t wt_mask; + size_t max_wt_mask = sizeof(wt_mask) * 8; + unsigned wt_count = starpu_memory_nodes_get_count(); + if (max_wt_mask > STARPU_MAXNODES) + max_wt_mask = STARPU_MAXNODES; + if (wt_count > max_wt_mask) + wt_count = max_wt_mask; + + wt_mask = default_wt_mask | handle->wt_mask; + wt_mask &= (1ULL<memory_node; + + if (replicate->state != STARPU_INVALID && handle->current_mode & STARPU_W) + if (wt_mask && (memory_node >= max_wt_mask || wt_mask & ~(1<header_lock)) + { + cpt++; + _starpu_datawizard_progress(_STARPU_DATAWIZARD_DO_ALLOC); + } + if (cpt == STARPU_SPIN_MAXTRY) + _starpu_spin_lock(&handle->header_lock); + + if (down_to_mode == STARPU_NONE) + { + /* Release refcnt taken by fetch_data_on_node */ + replicate->refcnt--; + STARPU_ASSERT_MSG(replicate->refcnt >= 0, "handle %p released too many times", handle); + + STARPU_ASSERT_MSG(handle->busy_count > 0, "handle %p released too many times", handle); + handle->busy_count--; + } + + if (!_starpu_notify_data_dependencies(handle, down_to_mode)) + _starpu_spin_unlock(&handle->header_lock); +} + +int _starpu_prefetch_task_input_prio(struct starpu_task *task, int target_node, int worker, int prio, enum starpu_is_prefetch prefetch) +{ +#ifdef STARPU_OPENMP + struct _starpu_job *j = _starpu_get_job_associated_to_task(task); + /* do not attempt to prefetch task input if this is an OpenMP task resuming after blocking */ + if (j->discontinuous != 0) + return 0; +#endif + STARPU_ASSERT_MSG(prefetch != STARPU_PREFETCH || !task->prefetched, "Prefetching was already requested for this task! Did you set 'prefetches' to 1 in the starpu_sched_policy structure?"); + unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task); + unsigned index; + + for (index = 0; index < nbuffers; index++) + { + starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, index); + enum starpu_data_access_mode mode = STARPU_TASK_GET_MODE(task, index); + + if (mode & (STARPU_SCRATCH|STARPU_REDUX)) + continue; + + int node; + if (target_node >= 0) + node = _starpu_task_data_get_node_on_node(task, index, target_node); + else + node = _starpu_task_data_get_node_on_worker(task, index, worker); + + if (node < 0) + continue; + + struct _starpu_data_replicate *replicate = &handle->per_node[node]; + if (prefetch == STARPU_PREFETCH) + task_prefetch_data_on_node(handle, node, replicate, mode, task, prio); + else + idle_prefetch_data_on_node(handle, node, replicate, mode, task, prio); + } + + if (prefetch == STARPU_PREFETCH) + task->prefetched = 1; + + return 0; +} + +int starpu_prefetch_task_input_prio(struct starpu_task *task, int target_node, int worker, int prio) +{ + return _starpu_prefetch_task_input_prio(task, target_node, worker, prio, STARPU_PREFETCH); +} + +int starpu_prefetch_task_input_on_node_prio(struct starpu_task *task, unsigned target_node, int prio) +{ + return starpu_prefetch_task_input_prio(task, target_node, -1, prio); +} + + +int starpu_prefetch_task_input_on_node(struct starpu_task *task, unsigned node) +{ + int prio = task->priority; + if (task->workerorder) + prio = INT_MAX - task->workerorder; + return starpu_prefetch_task_input_on_node_prio(task, node, prio); +} + +int starpu_idle_prefetch_task_input_prio(struct starpu_task *task, int target_node, int worker, int prio) +{ + return _starpu_prefetch_task_input_prio(task, target_node, worker, prio, STARPU_IDLEFETCH); +} + +int starpu_idle_prefetch_task_input_on_node_prio(struct starpu_task *task, unsigned target_node, int prio) +{ + return starpu_idle_prefetch_task_input_prio(task, target_node, -1, prio); +} + +int starpu_idle_prefetch_task_input_on_node(struct starpu_task *task, unsigned node) +{ + int prio = task->priority; + if (task->workerorder) + prio = INT_MAX - task->workerorder; + return starpu_idle_prefetch_task_input_on_node_prio(task, node, prio); +} + +int starpu_prefetch_task_input_for_prio(struct starpu_task *task, unsigned worker, int prio) +{ + return starpu_prefetch_task_input_prio(task, -1, worker, prio); +} + +int starpu_prefetch_task_input_for(struct starpu_task *task, unsigned worker) +{ + int prio = task->priority; + if (task->workerorder) + prio = INT_MAX - task->workerorder; + return starpu_prefetch_task_input_for_prio(task, worker, prio); +} + +int starpu_idle_prefetch_task_input_for_prio(struct starpu_task *task, unsigned worker, int prio) +{ + return starpu_idle_prefetch_task_input_prio(task, -1, worker, prio); +} + +int starpu_idle_prefetch_task_input_for(struct starpu_task *task, unsigned worker) +{ + int prio = task->priority; + if (task->workerorder) + prio = INT_MAX - task->workerorder; + return starpu_idle_prefetch_task_input_for_prio(task, worker, prio); +} + +static struct _starpu_data_replicate *get_replicate(starpu_data_handle_t handle, enum starpu_data_access_mode mode, int workerid, unsigned node) +{ + if (mode & (STARPU_SCRATCH|STARPU_REDUX)) + { + STARPU_ASSERT(workerid >= 0); + if (STARPU_RUNNING_ON_VALGRIND || !handle->per_worker) + { + _starpu_spin_lock(&handle->header_lock); + if (!handle->per_worker) + _starpu_data_initialize_per_worker(handle); + _starpu_spin_unlock(&handle->header_lock); + } + return &handle->per_worker[workerid]; + } + else + /* That's a "normal" buffer (R/W) */ + return &handle->per_node[node]; +} + +/* Callback used when a buffer is send asynchronously to the sink */ +static void _starpu_fetch_task_input_cb(void *arg) +{ + struct _starpu_worker * worker = (struct _starpu_worker *) arg; + + /* increase the number of buffer received */ + STARPU_WMB(); + (void)STARPU_ATOMIC_ADD(&worker->nb_buffers_transferred, 1); + +#ifdef STARPU_SIMGRID + starpu_pthread_queue_broadcast(&_starpu_simgrid_transfer_queue[worker->memory_node]); +#endif +} + +/* Synchronously or asynchronously fetch data for a given task (if it's not there already) + * Returns the number of data acquired here. */ + +/* _starpu_fetch_task_input must be called before + * executing the task. __starpu_push_task_output but be called after the + * execution of the task. */ + +/* The driver can either just call _starpu_fetch_task_input with async==0, + * or to improve overlapping, it can call _starpu_fetch_task_input with + * async==1, then wait for transfers to complete, then call + * _starpu_fetch_task_input_tail to complete the fetch. */ +int _starpu_fetch_task_input(struct starpu_task *worker_task, struct _starpu_job *j, int async) +{ + struct starpu_task *task = j->task; + struct _starpu_worker *worker = _starpu_get_local_worker_key(); + int workerid = worker->workerid; + if (async) + { + worker->task_transferring = worker_task; + worker->nb_buffers_transferred = 0; + if (worker->ntasks <= 1) + _STARPU_TRACE_WORKER_START_FETCH_INPUT(NULL, workerid); + } + else + _STARPU_TRACE_START_FETCH_INPUT(NULL); + + int profiling = starpu_profiling_status_get(); + if (profiling && task->profiling_info) + _starpu_clock_gettime(&task->profiling_info->acquire_data_start_time); + + struct _starpu_data_descr *descrs = _STARPU_JOB_GET_ORDERED_BUFFERS(j); + unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task); + unsigned nacquires; + + unsigned index; + int indexdup; + + nacquires = 0; + for (index = 0; index < nbuffers; index++) + { + int ret; + starpu_data_handle_t handle = descrs[index].handle; + enum starpu_data_access_mode mode = descrs[index].mode; + int orig_node = descrs[index].orig_node; + int node = _starpu_task_data_get_node_on_worker(task, descrs[index].index, workerid); + /* We set this here for coherency with __starpu_push_task_output */ + descrs[index].node = node; + if (mode == STARPU_NONE || + (mode & ((1<= STARPU_ACCESS_MODE_MAX || + (mode >> STARPU_MODE_SHIFT) >= (STARPU_SHIFTED_MODE_MAX >> STARPU_MODE_SHIFT)) + STARPU_ASSERT_MSG(0, "mode %d (0x%x) is bogus\n", mode, mode); + + if (node < 0) + continue; + + struct _starpu_data_replicate *local_replicate; + + for (indexdup = (int) index-1; indexdup >= 0; indexdup--) + { + starpu_data_handle_t handle_dup = descrs[indexdup].handle; + int node_dup = descrs[indexdup].orig_node; + if (handle_dup == handle && node_dup == orig_node) + /* We have already taken this data, skip it. This + * depends on ordering putting writes before reads, see + * _starpu_compar_handles */ + goto next; + if (!_starpu_handles_same_root(handle_dup, handle)) + /* We are not checking within the same parent any more, no need to continue checking other handles */ + break; + } + + local_replicate = get_replicate(handle, mode, workerid, node); + + if (async) + { + ret = _starpu_fetch_data_on_node(handle, node, local_replicate, mode, 0, task, STARPU_FETCH, 1, + _starpu_fetch_task_input_cb, worker, task->priority, "_starpu_fetch_task_input"); +#ifdef STARPU_SIMGRID + if (_starpu_simgrid_fetching_input_cost()) + starpu_sleep(0.000001); +#endif + if (STARPU_UNLIKELY(ret)) + { + /* Ooops, not enough memory, make worker wait for these for now, and the synchronous call will finish by forcing eviction*/ + worker->nb_buffers_totransfer = nacquires; + _starpu_add_worker_status(worker, STATUS_INDEX_WAITING, NULL); + return 0; + } + } + else + { + ret = fetch_data(handle, node, local_replicate, mode, task, task->priority); +#ifdef STARPU_SIMGRID + if (_starpu_simgrid_fetching_input_cost()) + starpu_sleep(0.000001); +#endif + if (STARPU_UNLIKELY(ret)) + goto enomem; + } + + nacquires++; + next: + ; + } + _starpu_add_worker_status(worker, STATUS_INDEX_WAITING, NULL); + if (async) + { + worker->nb_buffers_totransfer = nacquires; + return 0; + } + + _starpu_fetch_task_input_tail(task, j, worker); + + return 0; + +enomem: + _STARPU_TRACE_END_FETCH_INPUT(NULL); + _STARPU_DISP("something went wrong with buffer %u\n", index); + + /* try to unreference all the input that were successfully taken */ + unsigned index2; + for (index2 = 0; index2 < index; index2++) + { + starpu_data_handle_t handle = descrs[index2].handle; + enum starpu_data_access_mode mode = descrs[index2].mode; + int orig_node = descrs[index2].orig_node; + int node = descrs[index2].node; + + struct _starpu_data_replicate *local_replicate; + + for (indexdup = (int) index2+1; indexdup < (int) index; indexdup++) + { + starpu_data_handle_t handle_dup = descrs[indexdup].handle; + int node_dup = descrs[indexdup].orig_node; + if (handle_dup == handle && node_dup == orig_node) + /* We have already released this data, skip it. This + * depends on ordering putting writes before reads, see + * _starpu_compar_handles */ + goto next2; + if (!_starpu_handles_same_root(handle_dup, handle)) + /* We are not checking within the same parent any more, no need to continue checking other handles */ + break; + } + + local_replicate = get_replicate(handle, mode, workerid, node); + + _starpu_release_data_on_node(handle, 0, STARPU_NONE, local_replicate); + next2: + ; + } + + return -1; +} + +/* Now that we have taken the data locks in locking order, fill the codelet interfaces in function order. */ +void _starpu_fetch_task_input_tail(struct starpu_task *task, struct _starpu_job *j, struct _starpu_worker *worker) +{ + int workerid = worker->workerid; + + int profiling = starpu_profiling_status_get(); + + unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task); + struct _starpu_data_descr *descrs = _STARPU_JOB_GET_ORDERED_BUFFERS(j); + + unsigned index; + unsigned long total_size = 0; + + for (index = 0; index < nbuffers; index++) + { + starpu_data_handle_t handle = descrs[index].handle; + enum starpu_data_access_mode mode = descrs[index].mode; + int node = descrs[index].node; + + if (node < 0) + continue; + + struct _starpu_data_replicate *local_replicate; + int needs_init; + + local_replicate = get_replicate(handle, mode, workerid, node); + _starpu_spin_lock(&handle->header_lock); + if (local_replicate->mc) + { + if (task->prefetched && local_replicate->initialized && + /* See prefetch conditions in + * starpu_prefetch_task_input_on_node_prio and alike */ + !(mode & (STARPU_SCRATCH|STARPU_REDUX)) && + (mode & STARPU_R)) + { + /* Allocations or transfer prefetches should have been done by now and marked + * this mc as needed for us. + * Now that we added a reference for the task, we can relieve that. */ + /* Note: the replicate might have been evicted in between, thus not 100% sure + * that our prefetch request is still recorded here. */ + if (local_replicate->nb_tasks_prefetch > 0) + local_replicate->nb_tasks_prefetch--; + } + } + if (!(mode & STARPU_R) && (mode & STARPU_W)) + { + /* The task will be initializing it. Possibly we have + * only prefetched the allocation, and now we have to + * record that we'll modify it. */ + local_replicate->initialized = 1; + _starpu_update_data_state(handle, local_replicate, mode); + } + + needs_init = !local_replicate->initialized; + _starpu_spin_unlock(&handle->header_lock); + + _STARPU_TASK_SET_INTERFACE(task , local_replicate->data_interface, descrs[index].index); + + /* If the replicate was not initialized yet, we have to do it now */ + if (!(mode & STARPU_SCRATCH) && needs_init) + _starpu_init_data_replicate(handle, local_replicate, workerid); + +#ifdef STARPU_USE_FXT + if (fut_active) + total_size += _starpu_data_get_size(handle); +#endif + } + _STARPU_TRACE_DATA_LOAD(workerid,total_size); + + if (profiling && task->profiling_info) + _starpu_clock_gettime(&task->profiling_info->acquire_data_end_time); + + _STARPU_TRACE_END_FETCH_INPUT(NULL); + + _starpu_clear_worker_status(worker, STATUS_INDEX_WAITING, NULL); +} + +/* Release task data dependencies */ +void __starpu_push_task_output(struct _starpu_job *j) +{ +#ifdef STARPU_OPENMP + STARPU_ASSERT(!j->continuation); +#endif + int profiling = starpu_profiling_status_get(); + struct starpu_task *task = j->task; + if (profiling && task->profiling_info) + _starpu_clock_gettime(&task->profiling_info->release_data_start_time); + + struct _starpu_data_descr *descrs = _STARPU_JOB_GET_ORDERED_BUFFERS(j); + unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task); + + int workerid = starpu_worker_get_id(); + + unsigned index; + int indexdup; + for (index = 0; index < nbuffers; index++) + { + starpu_data_handle_t handle = descrs[index].handle; + enum starpu_data_access_mode mode = descrs[index].mode; + int orig_node = descrs[index].orig_node; + int node = descrs[index].node; + + struct _starpu_data_replicate *local_replicate = NULL; + + for (indexdup = (int) index-1; indexdup >= 0; indexdup--) + { + starpu_data_handle_t handle_dup = descrs[indexdup].handle; + int node_dup = descrs[indexdup].orig_node; + if (handle_dup == handle && node_dup == orig_node) + /* We have already released this data, skip it. This + * depends on ordering putting writes before reads, see + * _starpu_compar_handles */ + goto next; + + if (!_starpu_handles_same_root(handle_dup, handle)) + /* We are not checking within the same parent any more, no need to continue checking other handles */ + break; + } + + if (node != -1) + local_replicate = get_replicate(handle, mode, workerid, node); + + /* Keep a reference for future + * _starpu_release_task_enforce_sequential_consistency call */ + _starpu_spin_lock(&handle->header_lock); + handle->busy_count++; + + if (node == -1) + { + /* NOWHERE case, just notify dependencies */ + if (!_starpu_notify_data_dependencies(handle, STARPU_NONE)) + _starpu_spin_unlock(&handle->header_lock); + } + else + { + _starpu_spin_unlock(&handle->header_lock); + _starpu_release_data_on_node(handle, 0, STARPU_NONE, local_replicate); + } + next: + ; + } + + if (profiling && task->profiling_info) + _starpu_clock_gettime(&task->profiling_info->release_data_end_time); +} + +/* Version for a driver running on a worker: we show the driver state in the trace */ +void _starpu_push_task_output(struct _starpu_job *j) +{ + _STARPU_TRACE_START_PUSH_OUTPUT(NULL); + __starpu_push_task_output(j); + _STARPU_TRACE_END_PUSH_OUTPUT(NULL); +} + +struct fetch_nowhere_wrapper +{ + struct _starpu_job *j; + unsigned pending; +}; + +static void _starpu_fetch_nowhere_task_input_cb(void *arg); +/* Asynchronously fetch data for a task which will have no content */ +void _starpu_fetch_nowhere_task_input(struct _starpu_job *j) +{ + int profiling = starpu_profiling_status_get(); + struct starpu_task *task = j->task; + if (profiling && task->profiling_info) + _starpu_clock_gettime(&task->profiling_info->acquire_data_start_time); + + struct _starpu_data_descr *descrs = _STARPU_JOB_GET_ORDERED_BUFFERS(j); + unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task); + unsigned nfetchbuffers = 0; + struct fetch_nowhere_wrapper *wrapper; + + unsigned index; + for (index = 0; index < nbuffers; index++) + { + /* Note here we just follow what was requested, and not use _starpu_task_data_get_node* */ + int node = -1; + if (task->cl->specific_nodes) + node = STARPU_CODELET_GET_NODE(task->cl, descrs[index].index); + descrs[index].node = node; + if (node != -1) + nfetchbuffers++; + } + + if (!nfetchbuffers) + { + /* Nothing to fetch actually, already finished! */ + __starpu_push_task_output(j); + _starpu_handle_job_termination(j); + _STARPU_LOG_OUT_TAG("handle_job_termination"); + return; + } + + _STARPU_MALLOC(wrapper, (sizeof(*wrapper))); + wrapper->j = j; + /* +1 for the call below */ + wrapper->pending = nfetchbuffers + 1; + + for (index = 0; index < nbuffers; index++) + { + starpu_data_handle_t handle = descrs[index].handle; + enum starpu_data_access_mode mode = descrs[index].mode; + int node = descrs[index].node; + if (node == -1) + continue; + + if (mode == STARPU_NONE || + (mode & ((1<= STARPU_ACCESS_MODE_MAX || + (mode >> STARPU_MODE_SHIFT) >= (STARPU_SHIFTED_MODE_MAX >> STARPU_MODE_SHIFT)) + STARPU_ASSERT_MSG(0, "mode %d (0x%x) is bogus\n", mode, mode); + STARPU_ASSERT(mode != STARPU_SCRATCH && mode != STARPU_REDUX); + + struct _starpu_data_replicate *local_replicate; + + local_replicate = get_replicate(handle, mode, -1, node); + + _starpu_fetch_data_on_node(handle, node, local_replicate, mode, 0, task, STARPU_FETCH, 1, _starpu_fetch_nowhere_task_input_cb, wrapper, 0, "_starpu_fetch_nowhere_task_input"); + } + + if (profiling && task->profiling_info) + _starpu_clock_gettime(&task->profiling_info->acquire_data_end_time); + + /* Finished working with the task, release our reference */ + _starpu_fetch_nowhere_task_input_cb(wrapper); +} + +static void _starpu_fetch_nowhere_task_input_cb(void *arg) +{ + /* One more transfer finished */ + struct fetch_nowhere_wrapper *wrapper = arg; + + unsigned pending = STARPU_ATOMIC_ADD(&wrapper->pending, -1); + ANNOTATE_HAPPENS_BEFORE(&wrapper->pending); + if (pending == 0) + { + ANNOTATE_HAPPENS_AFTER(&wrapper->pending); + + /* Finished transferring, task is over */ + struct _starpu_job *j = wrapper->j; + free(wrapper); + __starpu_push_task_output(j); + _starpu_handle_job_termination(j); + _STARPU_LOG_OUT_TAG("handle_job_termination"); + } +} + +/* NB : this value can only be an indication of the status of a data + at some point, but there is no strong guarantee ! */ +unsigned starpu_data_is_on_node(starpu_data_handle_t handle, unsigned node) +{ + unsigned ret = 0; + +// XXX : this is just a hint, so we don't take the lock ... +// STARPU_PTHREAD_SPIN_LOCK(&handle->header_lock); + + if (handle->per_node[node].state != STARPU_INVALID) + { + ret = 1; + } + else + { + unsigned i; + unsigned nnodes = starpu_memory_nodes_get_count(); + + for (i = 0; i < nnodes; i++) + { + if (handle->per_node[node].request[i]) + { + ret = 1; + break; + } + } + + } + +// STARPU_PTHREAD_SPIN_UNLOCK(&handle->header_lock); + + return ret; +} + +/* Unmap the data from this node, e.g. before partitioning or unregistering */ +void _starpu_data_unmap(starpu_data_handle_t handle, unsigned node) +{ + struct _starpu_data_request *r = NULL; + STARPU_ASSERT(handle); + + _starpu_spin_lock(&handle->header_lock); + if (handle->per_node[node].mapped != STARPU_UNMAPPED) + { + r = _starpu_create_data_request(handle, &handle->per_node[handle->per_node[node].mapped], &handle->per_node[node], node, STARPU_UNMAP, 0, NULL, STARPU_FETCH, 0, 0, __func__); + + r->refcnt++; + _starpu_post_data_request(r); + } + _starpu_spin_unlock(&handle->header_lock); + if (r) + _starpu_wait_data_request_completion(r, 1); +} diff --git a/src/datawizard/coherency.h b/src/datawizard/coherency.h new file mode 100644 index 0000000..070563c --- /dev/null +++ b/src/datawizard/coherency.h @@ -0,0 +1,421 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __COHERENCY__H__ +#define __COHERENCY__H__ + +/** @file */ + +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#pragma GCC visibility push(hidden) + +enum _starpu_cache_state +{ + STARPU_OWNER, + STARPU_SHARED, + STARPU_INVALID +}; + +/** this should contain the information relative to a given data replicate */ +struct _starpu_data_replicate +{ + starpu_data_handle_t handle; + + /** describe the actual data layout, as manipulated by data interfaces in *_interface.c */ + void *data_interface; + + /** How many requests or tasks are currently working with this replicate */ + int refcnt; + + char memory_node; + + /** describes the state of the local data in term of coherency */ + enum _starpu_cache_state state: 2; + + /** A buffer that is used for SCRATCH or reduction cannot be used with + * filters. */ + unsigned relaxed_coherency:2; + + /** We may need to initialize the replicate with some value before using it. */ + unsigned initialized:1; + + /** is the data locally allocated ? */ + unsigned allocated:1; + /** was it automatically allocated ? (else it's the application-provided + * buffer, don't ever try to free it!) */ + /** perhaps the allocation was perform higher in the hierarchy + * for now this is just translated into !automatically_allocated + * */ + unsigned automatically_allocated:1; + + /** is the write side enabled on the mapping? + * This is important for drivers which may actually make a copy instead + * of a map. + * + * Only meaningful when mapped != STARPU_UNMAPPED */ + unsigned map_write:1; + +#define STARPU_UNMAPPED -1 + /** >= 0 when the data just a mapping of a replicate from that memory node, + * otherwise STARPU_UNMAPPED */ + int mapped; + + /** To help the scheduling policies to make some decision, we + may keep a track of the tasks that are likely to request + this data on the current node. + It is the responsibility of the scheduling _policy_ to set that + flag when it assigns a task to a queue, policies which do not + use this hint can simply ignore it. + */ + uint32_t requested; + + /** This tracks the list of requests to provide the value */ + struct _starpu_data_request *request[STARPU_MAXNODES]; + /** This points to the last entry of request, to easily append to the list */ + struct _starpu_data_request *last_request[STARPU_MAXNODES]; + + /* Which request is loading data here */ + struct _starpu_data_request *load_request; + + /** The number of prefetches that we made for this replicate for various tasks + * This is also the number of tasks that we will wait to see use the mc before + * we attempt to evict it. + */ + unsigned nb_tasks_prefetch; + + /** Pointer to memchunk for LRU strategy */ + struct _starpu_mem_chunk * mc; +}; + +struct _starpu_data_requester_prio_list; + +struct _starpu_jobid_list +{ + unsigned long id; + struct _starpu_jobid_list *next; +}; + +/** This structure describes a simply-linked list of task */ +struct _starpu_task_wrapper_list +{ + struct starpu_task *task; + struct _starpu_task_wrapper_list *next; +}; + +/** This structure describes a doubly-linked list of task */ +struct _starpu_task_wrapper_dlist +{ + struct starpu_task *task; + struct _starpu_task_wrapper_dlist *next; + struct _starpu_task_wrapper_dlist *prev; +}; + +extern int _starpu_has_not_important_data; + +typedef void (*_starpu_data_handle_unregister_hook)(starpu_data_handle_t); + +/** This is initialized in both _starpu_register_new_data and _starpu_data_partition */ +struct _starpu_data_state +{ + int magic; + struct _starpu_data_requester_prio_list req_list; + /** the number of requests currently in the scheduling engine (not in + * the req_list anymore), i.e. the number of holders of the + * current_mode rwlock */ + unsigned refcnt; + /** whether we are already unlocking data requests */ + unsigned unlocking_reqs; + /** Current access mode. Is always either STARPU_R, STARPU_W, + * STARPU_SCRATCH or STARPU_REDUX, but never a combination such as + * STARPU_RW. */ + enum starpu_data_access_mode current_mode; + /** protect meta data */ + struct _starpu_spinlock header_lock; + + /** Condition to make application wait for all transfers before freeing handle */ + /** busy_count is the number of handle->refcnt, handle->per_node[*]->refcnt, number of starpu_data_requesters, and number of tasks that have released it but are still registered on the implicit data dependency lists. */ + /** Core code which releases busy_count has to call + * _starpu_data_check_not_busy to let starpu_data_unregister proceed */ + unsigned busy_count; + /** Is starpu_data_unregister waiting for busy_count? */ + unsigned busy_waiting; + starpu_pthread_mutex_t busy_mutex; + starpu_pthread_cond_t busy_cond; + + /** In case we user filters, the handle may describe a sub-data */ + struct _starpu_data_state *root_handle; /** root of the tree */ + struct _starpu_data_state *father_handle; /** father of the node, NULL if the current node is the root */ + starpu_data_handle_t *active_children; /** The currently active set of read-write children */ + unsigned active_nchildren; + starpu_data_handle_t **active_readonly_children; /** The currently active set of read-only children */ + unsigned *active_readonly_nchildren; /** Size of active_readonly_children[i] array */ + unsigned nactive_readonly_children; /** Size of active_readonly_children and active_readonly_nchildren arrays. Actual use is given by 'partitioned' */ + /** Our siblings in the father partitioning */ + unsigned nsiblings; /** How many siblings */ + starpu_data_handle_t *siblings; + unsigned sibling_index; /** indicate which child this node is from the father's perspective (if any) */ + unsigned depth; /** what's the depth of the tree ? */ + +#ifdef STARPU_BUBBLE + starpu_pthread_mutex_t unpartition_mutex; +#endif + + /** Synchronous partitioning */ + starpu_data_handle_t children; + unsigned nchildren; + /** How many partition plans this handle has */ + unsigned nplans; + /** Switch codelet for asynchronous partitioning */ + struct starpu_codelet *switch_cl; + /** size of dyn_nodes recorded in switch_cl */ + unsigned switch_cl_nparts; + /** Whether a partition plan is currently submitted and the + * corresponding unpartition has not been yet + * + * Or the number of partition plans currently submitted in readonly + * mode. + */ + unsigned partitioned; + /** Whether a partition plan is currently submitted in readonly mode */ + unsigned part_readonly:1; + + /** Whether our father is currently partitioned into ourself */ + unsigned active:1; + unsigned active_ro:1; + + /** describe the state of the data in term of coherency + * This is execution-time state. */ + struct _starpu_data_replicate per_node[STARPU_MAXNODES]; + struct _starpu_data_replicate *per_worker; + + struct starpu_data_interface_ops *ops; + + /** Footprint which identifies data layout */ + uint32_t footprint; + + /* The following bitfields are set from the application initialization */ + + /** in some case, the application may explicitly tell StarPU that a + * piece of data is not likely to be used soon again */ + unsigned is_not_important:1; + /** Can the data be pushed to the disk? */ + unsigned ooc:1; + /** Does StarPU have to enforce some implicit data-dependencies ? */ + unsigned sequential_consistency:1; + /** Whether we shall not ever write to this handle, thus allowing various optimizations */ + unsigned readonly:1; + + /** where is the data home, i.e. which node it was registered from ? -1 if none yet */ + int home_node; + + /** what is the default write-through mask for that data ? */ + uint32_t wt_mask; + + /** for a readonly handle, the number of times that we have returned again the + same handle and thus the number of times we have to ignore unregistration requests */ + unsigned aliases; + /** for a non-readonly handle, a readonly-only duplicate, that we can + return from starpu_data_dup_ro */ + starpu_data_handle_t readonly_dup; + /** for a readonly handle, the non-readonly handle that is referencing + is in its readonly_dup field. */ + starpu_data_handle_t readonly_dup_of; + + /* The following bitfields are set from the application submission thread */ + + /** Is the data initialized, or a task is already submitted to initialize it + * This is submission-time initialization state. */ + unsigned initialized:1; + +#ifdef STARPU_OPENMP + unsigned removed_from_context_hash:1; +#endif + + /* The following field is set by StarPU at execution time */ + + /** Whether lazy unregistration was requested through starpu_data_unregister_submit */ + unsigned char lazy_unregister; + + /** This lock should protect any operation to enforce + * sequential_consistency */ + starpu_pthread_mutex_t sequential_consistency_mutex; + + /** The last submitted task (or application data request) that declared + * it would modify the piece of data ? Any task accessing the data in a + * read-only mode should depend on that task implicitly if the + * sequential_consistency flag is enabled. */ + enum starpu_data_access_mode last_submitted_mode; + struct starpu_task *last_sync_task; + struct _starpu_task_wrapper_dlist last_submitted_accessors; + + /** If FxT is enabled, we keep track of "ghost dependencies": that is to + * say the dependencies that are not needed anymore, but that should + * appear in the post-mortem DAG. For instance if we have the sequence + * f(Aw) g(Aw), and that g is submitted after the termination of f, we + * want to have f->g appear in the DAG even if StarPU does not need to + * enforce this dependency anymore.*/ + unsigned last_submitted_ghost_sync_id_is_valid; + unsigned long last_submitted_ghost_sync_id; + struct _starpu_jobid_list *last_submitted_ghost_accessors_id; + + /** protected by sequential_consistency_mutex */ + struct _starpu_task_wrapper_list *post_sync_tasks; + unsigned post_sync_tasks_cnt; + + /* + * Reductions + */ + + /** During reduction we need some specific methods: redux_func performs + * the reduction of an interface into another one (eg. "+="), and init_func + * initializes the data interface to a default value that is stable by + * reduction (eg. 0 for +=). */ + struct starpu_codelet *redux_cl; + struct starpu_codelet *init_cl; + void *redux_cl_arg; + void *init_cl_arg; + + /** Are we currently performing a reduction on that handle ? If so the + * reduction_refcnt should be non null until there are pending tasks + * that are performing the reduction. */ + unsigned reduction_refcnt; + + /** List of requesters that are specific to the pending reduction. This + * list is used when the requests in the req_list list are frozen until + * the end of the reduction. */ + struct _starpu_data_requester_prio_list reduction_req_list; + + starpu_data_handle_t *reduction_tmp_handles; + + /** Final request for write invalidation */ + struct _starpu_data_request *write_invalidation_req; + + /** Used for MPI */ + void *mpi_data; + + _starpu_memory_stats_t memory_stats; + + unsigned int mf_node; //XXX + + /** hook to be called when unregistering the data */ + _starpu_data_handle_unregister_hook unregister_hook; + + struct starpu_arbiter *arbiter; + /** This is protected by the arbiter mutex */ + struct _starpu_data_requester_prio_list arbitered_req_list; + + /** Data maintained by schedulers themselves */ + /** Last worker that took this data in locality mode, or -1 if nobody + * took it yet */ + int last_locality; + + /** Application-provided coordinates. The maximum dimension (5) is + * relatively arbitrary. */ + unsigned dimensions; + int coordinates[5]; + + /** A generic pointer to data in the user land (could be anything and this + * is not manage by StarPU) */ + void *user_data; + + /** A generic pointer to data in the scheduler (could be anything and this + * is managed by the scheduler) */ + void *sched_data; +}; + +/** This does not take a reference on the handle, the caller has to do it, + * e.g. through _starpu_attempt_to_submit_data_request_from_apps() + * detached means that the core is allowed to drop the request. The caller + * should thus *not* take a reference since it can not know whether the request will complete + * async means that _starpu_fetch_data_on_node will wait for completion of the request + */ +int _starpu_fetch_data_on_node(starpu_data_handle_t handle, int node, struct _starpu_data_replicate *replicate, + enum starpu_data_access_mode mode, unsigned detached, + struct starpu_task *task, enum starpu_is_prefetch is_prefetch, unsigned async, + void (*callback_func)(void *), void *callback_arg, int prio, const char *origin); +/** This releases a reference on the handle */ +void _starpu_release_data_on_node(struct _starpu_data_state *state, uint32_t default_wt_mask, + enum starpu_data_access_mode down_to_mode, + struct _starpu_data_replicate *replicate); + +void _starpu_update_data_state(starpu_data_handle_t handle, + struct _starpu_data_replicate *requesting_replicate, + enum starpu_data_access_mode mode); + +uint32_t _starpu_get_data_refcnt(struct _starpu_data_state *state, unsigned node); + +size_t _starpu_data_get_size(starpu_data_handle_t handle); +size_t _starpu_data_get_alloc_size(starpu_data_handle_t handle); +starpu_ssize_t _starpu_data_get_max_size(starpu_data_handle_t handle); + +uint32_t _starpu_data_get_footprint(starpu_data_handle_t handle); + +void __starpu_push_task_output(struct _starpu_job *j); +/** Version with driver trace */ +void _starpu_push_task_output(struct _starpu_job *j); + +struct _starpu_worker; +STARPU_ATTRIBUTE_WARN_UNUSED_RESULT +/** Fetch the data parameters for task \p task + * Setting \p async to 1 allows to only start the fetches, and call + * \p _starpu_fetch_task_input_tail later when the transfers are finished */ +int _starpu_fetch_task_input(struct starpu_task *task, struct _starpu_job *j, int async); +void _starpu_fetch_task_input_tail(struct starpu_task *task, struct _starpu_job *j, struct _starpu_worker *worker); +void _starpu_fetch_nowhere_task_input(struct _starpu_job *j); + +int _starpu_select_src_node(struct _starpu_data_state *state, unsigned destination); +int _starpu_determine_request_path(starpu_data_handle_t handle, + int src_node, int dst_node, + enum starpu_data_access_mode mode, int max_len, + unsigned *src_nodes, unsigned *dst_nodes, + unsigned *handling_nodes, unsigned write_invalidation); + +/** is_prefetch is whether the DSM may drop the request (when there is not enough memory for instance + * async is whether the caller wants a reference on the last request, to be + * able to wait for it (which will release that reference). + */ +struct _starpu_data_request *_starpu_create_request_to_fetch_data(starpu_data_handle_t handle, + struct _starpu_data_replicate *dst_replicate, + enum starpu_data_access_mode mode, + struct starpu_task *task, enum starpu_is_prefetch is_prefetch, + unsigned async, + void (*callback_func)(void *), void *callback_arg, int prio, const char *origin); + +void _starpu_init_data_replicate(starpu_data_handle_t handle, struct _starpu_data_replicate *replicate, int workerid); +void _starpu_data_start_reduction_mode(starpu_data_handle_t handle); +void _starpu_data_end_reduction_mode(starpu_data_handle_t handle, int priority); +void _starpu_data_end_reduction_mode_terminate(starpu_data_handle_t handle); + +void _starpu_data_unmap(starpu_data_handle_t handle, unsigned node); + +void _starpu_data_set_unregister_hook(starpu_data_handle_t handle, _starpu_data_handle_unregister_hook func) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; + +#pragma GCC visibility pop + +#endif // __COHERENCY__H__ diff --git a/src/datawizard/copy_driver.c b/src/datawizard/copy_driver.c new file mode 100644 index 0000000..52a1f00 --- /dev/null +++ b/src/datawizard/copy_driver.c @@ -0,0 +1,776 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2021-2021 Federal University of Rio Grande do Sul (UFRGS) + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef STARPU_SIMGRID +#include +#endif + +void _starpu_wake_all_blocked_workers_on_node(unsigned nodeid) +{ + /* wake up all workers on that memory node */ + struct _starpu_memory_node_descr * const descr = _starpu_memory_node_get_description(); + const int cur_workerid = starpu_worker_get_id(); + struct _starpu_worker *cur_worker = cur_workerid>=0?_starpu_get_worker_struct(cur_workerid):NULL; + + STARPU_PTHREAD_RWLOCK_RDLOCK(&descr->conditions_rwlock); + + unsigned nconds = descr->condition_count[nodeid]; + unsigned cond_id; + for (cond_id = 0; cond_id < nconds; cond_id++) + { + struct _starpu_cond_and_worker *condition; + condition = &descr->conditions_attached_to_node[nodeid][cond_id]; + + if (condition->worker == cur_worker) + { + if (condition->cond == &condition->worker->sched_cond) + { + condition->worker->state_keep_awake = 1; + } + + /* No need to wake myself, and I might be called from + * the scheduler with mutex locked, through + * starpu_prefetch_task_input_on_node */ + continue; + } + + /* wake anybody waiting on that condition */ + STARPU_PTHREAD_MUTEX_LOCK_SCHED(&condition->worker->sched_mutex); + if (condition->cond == &condition->worker->sched_cond) + { + condition->worker->state_keep_awake = 1; + } + STARPU_PTHREAD_COND_BROADCAST(condition->cond); + STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&condition->worker->sched_mutex); + } + + STARPU_PTHREAD_RWLOCK_UNLOCK(&descr->conditions_rwlock); + +#ifdef STARPU_SIMGRID + starpu_pthread_queue_broadcast(&_starpu_simgrid_transfer_queue[nodeid]); +#endif +} + +void starpu_wake_all_blocked_workers(void) +{ + /* workers may be blocked on the various queues' conditions */ + struct _starpu_memory_node_descr * const descr = _starpu_memory_node_get_description(); + const int cur_workerid = starpu_worker_get_id(); + struct _starpu_worker *cur_worker = cur_workerid>=0?_starpu_get_worker_struct(cur_workerid):NULL; + + STARPU_PTHREAD_RWLOCK_RDLOCK(&descr->conditions_rwlock); + + unsigned nconds = descr->total_condition_count; + unsigned cond_id; + for (cond_id = 0; cond_id < nconds; cond_id++) + { + struct _starpu_cond_and_worker *condition; + condition = &descr->conditions_all[cond_id]; + + if (condition->worker == cur_worker) + { + if (condition->cond == &condition->worker->sched_cond) + { + condition->worker->state_keep_awake = 1; + } + + /* No need to wake myself, and I might be called from + * the scheduler with mutex locked, through + * starpu_prefetch_task_input_on_node */ + continue; + } + + /* wake anybody waiting on that condition */ + STARPU_PTHREAD_MUTEX_LOCK_SCHED(&condition->worker->sched_mutex); + if (condition->cond == &condition->worker->sched_cond) + { + condition->worker->state_keep_awake = 1; + } + STARPU_PTHREAD_COND_BROADCAST(condition->cond); + STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&condition->worker->sched_mutex); + } + + STARPU_PTHREAD_RWLOCK_UNLOCK(&descr->conditions_rwlock); + +#ifdef STARPU_SIMGRID + unsigned workerid, nodeid; + for (workerid = 0; workerid < starpu_worker_get_count(); workerid++) + starpu_pthread_queue_broadcast(&_starpu_simgrid_task_queue[workerid]); + for (nodeid = 0; nodeid < starpu_memory_nodes_get_count(); nodeid++) + starpu_pthread_queue_broadcast(&_starpu_simgrid_transfer_queue[nodeid]); +#endif +} + +#ifdef STARPU_USE_FXT +/* we need to identify each communication so that we can match the beginning + * and the end of a communication in the trace, so we use a unique identifier + * per communication */ +static unsigned long communication_cnt = 0; +#endif + +int _starpu_copy_interface_any_to_any(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req) +{ + enum starpu_node_kind src_kind = starpu_node_get_kind(src_node); + enum starpu_node_kind dst_kind = starpu_node_get_kind(dst_node); + + int ret = 0; + const struct starpu_data_copy_methods *copy_methods = handle->ops->copy_methods; + + if (!req || starpu_asynchronous_copy_disabled() || + starpu_asynchronous_copy_disabled_for(src_kind) || + starpu_asynchronous_copy_disabled_for(dst_kind) || + !copy_methods->any_to_any) + { + /* this is not associated to a request so it's synchronous */ + STARPU_ASSERT_MSG(copy_methods->any_to_any, "Interface <%s> does not define copy_methods->any_to_any", handle->ops->name); + copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, NULL); + } + else + { + if (dst_kind == STARPU_CPU_RAM) + req->async_channel.node_ops = starpu_memory_driver_info[src_kind].ops; + else + req->async_channel.node_ops = starpu_memory_driver_info[dst_kind].ops; + STARPU_ASSERT(copy_methods->any_to_any); + ret = copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, &req->async_channel); + } + return ret; +} + +static int copy_data_1_to_1_generic(starpu_data_handle_t handle, + struct _starpu_data_replicate *src_replicate, + struct _starpu_data_replicate *dst_replicate, + struct _starpu_data_request *req) +{ + unsigned src_node = (unsigned)src_replicate->memory_node; + unsigned dst_node = (unsigned)dst_replicate->memory_node; + + STARPU_ASSERT(src_replicate->refcnt); + STARPU_ASSERT(dst_replicate->refcnt); + + STARPU_ASSERT(src_replicate->allocated); + STARPU_ASSERT(dst_replicate->allocated); + +#ifdef STARPU_SIMGRID + if (src_node == STARPU_MAIN_RAM || dst_node == STARPU_MAIN_RAM) + _starpu_simgrid_data_transfer(handle->ops->get_size(handle), src_node, dst_node); + + return _starpu_simgrid_transfer(handle->ops->get_size(handle), src_node, dst_node, req); +#else /* !SIMGRID */ + enum starpu_node_kind src_kind = starpu_node_get_kind(src_node); + enum starpu_node_kind dst_kind = starpu_node_get_kind(dst_node); + void *src_interface = src_replicate->data_interface; + void *dst_interface = dst_replicate->data_interface; + + const struct _starpu_node_ops *src_node_ops = _starpu_memory_node_get_node_ops(src_node); + const struct _starpu_node_ops *dst_node_ops = _starpu_memory_node_get_node_ops(dst_node); + if (src_node_ops && src_node_ops->copy_interface_to[dst_kind]) + { + return src_node_ops->copy_interface_to[dst_kind](handle, src_interface, src_node, dst_interface, dst_node, req); + } + else if (dst_node_ops && dst_node_ops->copy_interface_from[src_kind]) + { + return dst_node_ops->copy_interface_from[src_kind](handle, src_interface, src_node, dst_interface, dst_node, req); + } + else + { + STARPU_ABORT_MSG("No copy_interface_to function defined from node %s to node %s\n", _starpu_node_get_prefix(starpu_node_get_kind(src_node)), _starpu_node_get_prefix(starpu_node_get_kind(dst_node))); + } +#endif /* !SIMGRID */ +} + +static int update_map_generic(starpu_data_handle_t handle, + struct _starpu_data_replicate *src_replicate, + struct _starpu_data_replicate *dst_replicate, + struct _starpu_data_request *req STARPU_ATTRIBUTE_UNUSED) +{ + int src_node = src_replicate->memory_node; + int dst_node = dst_replicate->memory_node; + + STARPU_ASSERT(src_replicate->refcnt); + STARPU_ASSERT(dst_replicate->refcnt); + + STARPU_ASSERT((src_replicate->mapped == dst_node && dst_replicate->allocated) + ||(src_replicate->allocated && dst_replicate->mapped == src_node)); + + void *src_interface = src_replicate->data_interface; + void *dst_interface = dst_replicate->data_interface; + + handle->ops->update_map(src_interface, src_node, dst_interface, dst_node); + + return 0; +} + +int STARPU_ATTRIBUTE_WARN_UNUSED_RESULT _starpu_driver_copy_data_1_to_1(starpu_data_handle_t handle, + struct _starpu_data_replicate *src_replicate, + struct _starpu_data_replicate *dst_replicate, + unsigned donotread, + struct _starpu_data_request *req, + enum _starpu_may_alloc may_alloc, + enum starpu_is_prefetch prefetch STARPU_ATTRIBUTE_UNUSED) +{ + if (!donotread) + { + STARPU_ASSERT(src_replicate->allocated || src_replicate->mapped != STARPU_UNMAPPED); + STARPU_ASSERT(src_replicate->refcnt); + } + + unsigned src_node = src_replicate->memory_node; + unsigned dst_node = dst_replicate->memory_node; + + if (!dst_replicate->allocated && dst_replicate->mapped == STARPU_UNMAPPED && dst_node != src_node + && handle->ops->map_data + && (_starpu_memory_node_get_mapped(dst_replicate->memory_node) /* || handle wants it */)) + { + /* Memory node which can just map the main memory, try to map. */ + if (!handle->ops->map_data( + src_replicate->data_interface, src_replicate->memory_node, + dst_replicate->data_interface, dst_replicate->memory_node)) + { + dst_replicate->mapped = src_node; + + if (_starpu_node_needs_map_update(dst_node)) + { + /* Driver porters: adding your driver here is + optional, it is only needed when implementing + support for memory mapping */ + switch (starpu_node_get_kind(dst_node)) + { + case STARPU_OPENCL_RAM: + /* OpenCL mappings write access defaults to the device */ + dst_replicate->map_write = 1; + break; + case STARPU_CUDA_RAM: + dst_replicate->map_write = 0; + break; + case STARPU_CPU_RAM: + default: + /* Should not happen */ + STARPU_ABORT(); + break; + } + } + } + } + + /* first make sure the destination has an allocated buffer */ + if (!dst_replicate->allocated && dst_replicate->mapped == STARPU_UNMAPPED) + { + if (may_alloc==_STARPU_DATAWIZARD_DO_NOT_ALLOC || _starpu_is_reclaiming(dst_node)) + /* We're not supposed to allocate there at the moment */ + return -ENOMEM; + + int ret_alloc = _starpu_allocate_memory_on_node(handle, dst_replicate, prefetch, may_alloc==_STARPU_DATAWIZARD_ONLY_FAST_ALLOC); + if (ret_alloc) + return -ENOMEM; + } + + STARPU_ASSERT(dst_replicate->allocated || dst_replicate->mapped != STARPU_UNMAPPED); + STARPU_ASSERT(dst_replicate->refcnt); + + /* In the case of a mapped data, we are here requested either + * - because the destination will write to it, and thus needs write + * access. + * - because the source was modified, and the destination needs to get + * updated. + * All in all, any data change will actually trigger both. + */ + if (!donotread && dst_replicate->mapped != STARPU_UNMAPPED) + { + STARPU_ASSERT(src_replicate->memory_node == dst_replicate->mapped); + if (_starpu_node_needs_map_update(dst_node)) + { + /* We need to flush from RAM to the device */ + if (!dst_replicate->map_write) + { + update_map_generic(handle, src_replicate, dst_replicate, req); + dst_replicate->map_write = 1; + } + } + + dst_replicate->initialized = 1; + } + + else if (!donotread && src_replicate->mapped != STARPU_UNMAPPED) + { + STARPU_ASSERT(dst_replicate->memory_node == src_replicate->mapped); + if (_starpu_node_needs_map_update(src_node)) + { + /* We need to flush from the device to the RAM */ + if (src_replicate->map_write) + { + update_map_generic(handle, src_replicate, dst_replicate, req); + src_replicate->map_write = 0; + } + } + + dst_replicate->initialized = 1; + } + + /* if there is no need to actually read the data, + * we do not perform any transfer */ + else if (!donotread) + { + unsigned long STARPU_ATTRIBUTE_UNUSED com_id = 0; + size_t size = _starpu_data_get_size(handle); + _starpu_bus_update_profiling_info((int)src_node, (int)dst_node, size); + +#ifdef STARPU_USE_FXT + if (fut_active) + { + com_id = STARPU_ATOMIC_ADDL(&communication_cnt, 1); + + if (req) + req->com_id = com_id; + } +#endif + + dst_replicate->initialized = 1; + + _STARPU_TRACE_START_DRIVER_COPY(src_node, dst_node, size, com_id, prefetch, handle); + int ret_copy = copy_data_1_to_1_generic(handle, src_replicate, dst_replicate, req); + if (!req) + /* Synchronous, this is already finished */ + _STARPU_TRACE_END_DRIVER_COPY(src_node, dst_node, size, com_id, prefetch); + + return ret_copy; + } + + return 0; +} + +void starpu_interface_data_copy(unsigned src_node, unsigned dst_node, size_t size) +{ + _STARPU_TRACE_DATA_COPY(src_node, dst_node, size); +} + +void starpu_interface_start_driver_copy_async(unsigned src_node, unsigned dst_node, double *start) +{ + *start = starpu_timing_now(); + _STARPU_TRACE_START_DRIVER_COPY_ASYNC(src_node, dst_node); +} + +void starpu_interface_end_driver_copy_async(unsigned src_node, unsigned dst_node, double start) +{ + double end = starpu_timing_now(); + double elapsed = end - start; + if (elapsed > 300) + { + static int warned = 0; + STARPU_HG_DISABLE_CHECKING(warned); + if (!warned) + { + char src_name[16], dst_name[16]; + warned = 1; + starpu_memory_node_get_name(src_node, src_name, sizeof(src_name)); + starpu_memory_node_get_name(dst_node, dst_name, sizeof(dst_name)); + + _STARPU_DISP("Warning: the submission of asynchronous transfer from %s to %s took a very long time (%f ms)\nFor proper asynchronous transfer overlapping, data registered to StarPU must be allocated with starpu_malloc() or pinned with starpu_memory_pin()\n", src_name, dst_name, elapsed / 1000.); + } + } + _STARPU_TRACE_END_DRIVER_COPY_ASYNC(src_node, dst_node); +} + +/* This can be used by interfaces to easily transfer a piece of data without + * caring about the particular transfer methods. */ + +/* This should either return 0 if the transfer is complete, or -EAGAIN if the + * transfer is still pending, and will have to be waited for by + * _starpu_driver_test_request_completion/_starpu_driver_wait_request_completion + */ +int starpu_interface_copy(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, void *async_data) +{ + struct _starpu_async_channel *async_channel = async_data; + enum starpu_node_kind src_kind = starpu_node_get_kind(src_node); + enum starpu_node_kind dst_kind = starpu_node_get_kind(dst_node); + const struct _starpu_node_ops *src_node_ops = _starpu_memory_node_get_node_ops(src_node); + const struct _starpu_node_ops *dst_node_ops = _starpu_memory_node_get_node_ops(dst_node); + + if (src_node_ops && src_node_ops->copy_data_to[dst_kind]) + { + return src_node_ops->copy_data_to[dst_kind](src, src_offset, src_node, + dst, dst_offset, dst_node, + size, + async_channel); + } + else if (dst_node_ops && dst_node_ops->copy_data_from[src_kind]) + { + return dst_node_ops->copy_data_from[src_kind](src, src_offset, src_node, + dst, dst_offset, dst_node, + size, + async_channel); + } + else + { + STARPU_ABORT_MSG("No copy_data_to function defined from node %s to node %s\n", _starpu_node_get_prefix(starpu_node_get_kind(src_node)), _starpu_node_get_prefix(starpu_node_get_kind(dst_node))); + return -1; + } +} + +int starpu_interface_copy2d(uintptr_t src, size_t src_offset, unsigned src_node, + uintptr_t dst, size_t dst_offset, unsigned dst_node, + size_t blocksize, + size_t numblocks, size_t ld_src, size_t ld_dst, + void *async_data) +{ + int ret = 0; + unsigned i; + struct _starpu_async_channel *async_channel = async_data; + enum starpu_node_kind src_kind = starpu_node_get_kind(src_node); + enum starpu_node_kind dst_kind = starpu_node_get_kind(dst_node); + const struct _starpu_node_ops *src_node_ops = _starpu_memory_node_get_node_ops(src_node); + const struct _starpu_node_ops *dst_node_ops = _starpu_memory_node_get_node_ops(dst_node); + + STARPU_ASSERT_MSG(ld_src >= blocksize, "block size %lu is bigger than ld %lu in source", (unsigned long) blocksize, (unsigned long) ld_src); + STARPU_ASSERT_MSG(ld_dst >= blocksize, "block size %lu is bigger than ld %lu in destination", (unsigned long) blocksize, (unsigned long) ld_dst); + + if (ld_src == blocksize && ld_dst == blocksize) + /* Optimize contiguous case */ + return starpu_interface_copy(src, src_offset, src_node, + dst, dst_offset, dst_node, + blocksize * numblocks, async_data); + + if (src_node_ops && src_node_ops->copy2d_data_to[dst_kind]) + /* Hardware-optimized non-contiguous case */ + return src_node_ops->copy2d_data_to[dst_kind](src, src_offset, src_node, + dst, dst_offset, dst_node, + blocksize, + numblocks, ld_src, ld_dst, + async_channel); + + if (dst_node_ops && dst_node_ops->copy2d_data_from[src_kind]) + /* Hardware-optimized non-contiguous case */ + return dst_node_ops->copy2d_data_from[src_kind](src, src_offset, src_node, + dst, dst_offset, dst_node, + blocksize, + numblocks, ld_src, ld_dst, + async_channel); + + for (i = 0; i < numblocks; i++) + { + if (starpu_interface_copy(src, src_offset + i*ld_src, src_node, + dst, dst_offset + i*ld_dst, dst_node, + blocksize, async_data)) + ret = -EAGAIN; + } + + return ret; +} + +int starpu_interface_copy3d(uintptr_t src, size_t src_offset, unsigned src_node, + uintptr_t dst, size_t dst_offset, unsigned dst_node, + size_t blocksize, + size_t numblocks_1, size_t ld1_src, size_t ld1_dst, + size_t numblocks_2, size_t ld2_src, size_t ld2_dst, + void *async_data) +{ + int ret = 0; + unsigned i; + struct _starpu_async_channel *async_channel = async_data; + enum starpu_node_kind src_kind = starpu_node_get_kind(src_node); + enum starpu_node_kind dst_kind = starpu_node_get_kind(dst_node); + const struct _starpu_node_ops *src_node_ops = _starpu_memory_node_get_node_ops(src_node); + const struct _starpu_node_ops *dst_node_ops = _starpu_memory_node_get_node_ops(dst_node); + + STARPU_ASSERT_MSG(ld1_src >= blocksize, "block size %lu is bigger than ld %lu in source", (unsigned long) blocksize, (unsigned long) ld1_src); + STARPU_ASSERT_MSG(ld1_dst >= blocksize, "block size %lu is bigger than ld %lu in destination", (unsigned long) blocksize, (unsigned long) ld1_dst); + + STARPU_ASSERT_MSG(ld2_src >= numblocks_1 * ld1_src, "block group size %lu is bigger than group ld %lu in source", (unsigned long) (numblocks_1 * ld1_src), (unsigned long) ld2_src); + STARPU_ASSERT_MSG(ld2_dst >= numblocks_1 * ld1_dst, "block group size %lu is bigger than group ld %lu in destination", (unsigned long) (numblocks_1 * ld1_dst), (unsigned long) ld2_dst); + + if (ld2_src == blocksize * numblocks_1 && + ld2_dst == blocksize * numblocks_1) + /* Optimize contiguous case */ + return starpu_interface_copy(src, src_offset, src_node, + dst, dst_offset, dst_node, + blocksize * numblocks_1 * numblocks_2, + async_data); + + if (src_node_ops && src_node_ops->copy3d_data_to[dst_kind]) + /* Hardware-optimized non-contiguous case */ + return src_node_ops->copy3d_data_to[dst_kind](src, src_offset, src_node, + dst, dst_offset, dst_node, + blocksize, + numblocks_1, ld1_src, ld1_dst, + numblocks_2, ld2_src, ld2_dst, + async_channel); + + if (dst_node_ops && dst_node_ops->copy3d_data_from[src_kind]) + /* Hardware-optimized non-contiguous case */ + return dst_node_ops->copy3d_data_from[src_kind](src, src_offset, src_node, + dst, dst_offset, dst_node, + blocksize, + numblocks_1, ld1_src, ld1_dst, + numblocks_2, ld2_src, ld2_dst, + async_channel); + + + for (i = 0; i < numblocks_2; i++) + { + if (starpu_interface_copy2d(src, src_offset + i*ld2_src, src_node, + dst, dst_offset + i*ld2_dst, dst_node, + blocksize, numblocks_1, ld1_src, ld1_dst, + async_data)) + ret = -EAGAIN; + } + + return ret; +} + +int starpu_interface_copy4d(uintptr_t src, size_t src_offset, unsigned src_node, + uintptr_t dst, size_t dst_offset, unsigned dst_node, + size_t blocksize, + size_t numblocks_1, size_t ld1_src, size_t ld1_dst, + size_t numblocks_2, size_t ld2_src, size_t ld2_dst, + size_t numblocks_3, size_t ld3_src, size_t ld3_dst, + void *async_data) +{ + int ret = 0; + unsigned i; + + STARPU_ASSERT_MSG(ld1_src >= blocksize, "block size %lu is bigger than ld %lu in source", (unsigned long) blocksize, (unsigned long) ld1_src); + STARPU_ASSERT_MSG(ld1_dst >= blocksize, "block size %lu is bigger than ld %lu in destination", (unsigned long) blocksize, (unsigned long) ld1_dst); + + STARPU_ASSERT_MSG(ld2_src >= numblocks_1 * ld1_src, "block group size %lu is bigger than group ld %lu in source", (unsigned long) (numblocks_1 * ld1_src), (unsigned long) ld2_src); + STARPU_ASSERT_MSG(ld2_dst >= numblocks_1 * ld1_dst, "block group size %lu is bigger than group ld %lu in destination", (unsigned long) (numblocks_1 * ld1_dst), (unsigned long) ld2_dst); + + STARPU_ASSERT_MSG(ld3_src >= numblocks_2 * ld2_src, "block group group size %lu is bigger than group group ld %lu in source", (unsigned long) (numblocks_2 * ld2_src), (unsigned long) ld3_src); + STARPU_ASSERT_MSG(ld3_dst >= numblocks_2 * ld2_dst, "block group group size %lu is bigger than group group ld %lu in destination", (unsigned long) (numblocks_2 * ld2_dst), (unsigned long) ld3_dst); + + if (ld3_src == blocksize * numblocks_1 * numblocks_2 && + ld3_dst == blocksize * numblocks_1 * numblocks_2) + /* Optimize contiguous case */ + return starpu_interface_copy(src, src_offset, src_node, + dst, dst_offset, dst_node, + blocksize * numblocks_1 * numblocks_2 * numblocks_3, + async_data); + + /* Probably won't ever have a 4D interface in drivers :) */ + + for (i = 0; i < numblocks_3; i++) + { + if (starpu_interface_copy3d(src, src_offset + i*ld3_src, src_node, + dst, dst_offset + i*ld3_dst, dst_node, + blocksize, + numblocks_1, ld1_src, ld1_dst, + numblocks_2, ld2_src, ld2_dst, + async_data)) + ret = -EAGAIN; + } + + return ret; +} + +uintptr_t starpu_interface_map(uintptr_t src, size_t src_offset, unsigned src_node, unsigned dst_node, size_t size, int *ret) +{ + enum starpu_node_kind src_kind = starpu_node_get_kind(src_node); + const struct _starpu_node_ops *node_ops = _starpu_memory_node_get_node_ops(dst_node); + + if (node_ops && node_ops->map[src_kind]) + { + return node_ops->map[src_kind](src, src_offset, src_node, dst_node, size, ret); + } + else + { + *ret = -EIO; + return 0; + } +} + +int starpu_interface_unmap(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, unsigned dst_node, size_t size) +{ + enum starpu_node_kind src_kind = starpu_node_get_kind(src_node); + const struct _starpu_node_ops *node_ops = _starpu_memory_node_get_node_ops(dst_node); + + if (node_ops && node_ops->unmap[src_kind]) + { + return node_ops->unmap[src_kind](src, src_offset, src_node, dst, dst_node, size); + } + else + { + STARPU_ABORT_MSG("No unmap function defined from node %s to node %s\n", _starpu_node_get_prefix(starpu_node_get_kind(src_node)), _starpu_node_get_prefix(starpu_node_get_kind(dst_node))); + return -1; + } +} + +int starpu_interface_update_map(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size) +{ + enum starpu_node_kind src_kind = starpu_node_get_kind(src_node); + enum starpu_node_kind dst_kind = starpu_node_get_kind(dst_node); + const struct _starpu_node_ops *src_node_ops = _starpu_memory_node_get_node_ops(src_node); + const struct _starpu_node_ops *dst_node_ops = _starpu_memory_node_get_node_ops(dst_node); + + if (src_node_ops && src_node_ops->update_map[dst_kind]) + { + return src_node_ops->update_map[dst_kind](src, src_offset, src_node, dst, dst_offset, dst_node, size); + } + else if (dst_node_ops && dst_node_ops->update_map[src_kind]) + { + return dst_node_ops->update_map[src_kind](src, src_offset, src_node, dst, dst_offset, dst_node, size); + } + else + { + STARPU_ABORT_MSG("No unmap function defined from node %s to node %s\n", _starpu_node_get_prefix(starpu_node_get_kind(src_node)), _starpu_node_get_prefix(starpu_node_get_kind(dst_node))); + return -1; + } +} + +static size_t _get_size(uint32_t* nn, size_t ndim) +{ + size_t size = 1; + unsigned i; + for (i=0; i 0) + { + for (i = 0; i < ndim-1; i++) + { + STARPU_ASSERT_MSG(ldn_src[i+1] >= nn[i] * ldn_src[i], "block size %lu is bigger than ld %lu in source", (unsigned long) nn[i] * ldn_src[i], (unsigned long) ldn_src[i+1]); + STARPU_ASSERT_MSG(ldn_dst[i+1] >= nn[i] * ldn_dst[i], "block size %lu is bigger than ld %lu in destination", (unsigned long) nn[i] * ldn_dst[i], (unsigned long) ldn_dst[i+1]); + } + + if (ldn_src[ndim-1] == _get_size(nn, ndim-1) && + ldn_dst[ndim-1] == _get_size(nn, ndim-1)) + /* Optimize contiguous case */ + return starpu_interface_copy(src, src_offset, src_node, + dst, dst_offset, dst_node, + _get_size(nn, ndim) * elemsize, + async_data); + } + + if(ndim > 4) + { + for (i = 0; i < nn[ndim-1]; i++) + { + if (starpu_interface_copynd(src, src_offset + i*ldn_src[ndim-1]*elemsize, src_node, + dst, dst_offset + i*ldn_dst[ndim-1]*elemsize, dst_node, + elemsize, ndim-1, + nn, ldn_src, ldn_dst, + async_data)) + ret = -EAGAIN; + } + } + else if(ndim == 4) + { + return starpu_interface_copy4d(src, src_offset, src_node, + dst, dst_offset, dst_node, + nn[0] * elemsize, + nn[1], ldn_src[1] * elemsize, ldn_dst[1] * elemsize, + nn[2], ldn_src[2] * elemsize, ldn_dst[2] * elemsize, + nn[3], ldn_src[3] * elemsize, ldn_dst[3] * elemsize, + async_data); + } + else if(ndim == 3) + { + return starpu_interface_copy3d(src, src_offset, src_node, + dst, dst_offset, dst_node, + nn[0] * elemsize, + nn[1], ldn_src[1] * elemsize, ldn_dst[1] * elemsize, + nn[2], ldn_src[2] * elemsize, ldn_dst[2] * elemsize, + async_data); + } + else if(ndim == 2) + { + return starpu_interface_copy2d(src, src_offset, src_node, + dst, dst_offset, dst_node, + nn[0] * elemsize, + nn[1], ldn_src[1] * elemsize, ldn_dst[1] * elemsize, + async_data); + } + else if (ndim == 1) + { + return starpu_interface_copy(src, src_offset, src_node, + dst, dst_offset, dst_node, + nn[0] * elemsize, + async_data); + } + else if (ndim == 0) + { + return starpu_interface_copy(src, 0, src_node, + dst, 0, dst_node, + elemsize, + async_data); + } + + return ret; +} + +/* Only used at starpu_shutdown */ +void _starpu_driver_wait_request_completion(struct _starpu_async_channel *async_channel) +{ +#ifdef STARPU_SIMGRID + _starpu_simgrid_wait_transfer_event(&async_channel->event); +#else /* !SIMGRID */ + const struct _starpu_node_ops *node_ops = async_channel->node_ops; + if (node_ops && node_ops->wait_request_completion != NULL) + { + node_ops->wait_request_completion(async_channel); + } + else + { + STARPU_ABORT_MSG("No wait_request_completion function defined for node %s\n", node_ops?node_ops->name:"unknown"); + } +#endif /* !SIMGRID */ +} + +unsigned _starpu_driver_test_request_completion(struct _starpu_async_channel *async_channel) +{ +#ifdef STARPU_SIMGRID + return _starpu_simgrid_test_transfer_event(&async_channel->event); +#else /* !SIMGRID */ + const struct _starpu_node_ops *node_ops = async_channel->node_ops; + if (node_ops && node_ops->test_request_completion != NULL) + { + return node_ops->test_request_completion(async_channel); + } + else + { + STARPU_ABORT_MSG("No test_request_completion function defined for node %s\n", node_ops?node_ops->name:"unknown"); + } +#endif /* !SIMGRID */ +} diff --git a/src/datawizard/copy_driver.h b/src/datawizard/copy_driver.h new file mode 100644 index 0000000..dc9705f --- /dev/null +++ b/src/datawizard/copy_driver.h @@ -0,0 +1,106 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2021-2021 Federal University of Rio Grande do Sul (UFRGS) + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __COPY_DRIVER_H__ +#define __COPY_DRIVER_H__ + +/** @file */ + +#ifdef HAVE_AIO_H +#include +#endif + +#include +#include + +#pragma GCC visibility push(hidden) + +#ifdef __cplusplus +extern "C" +{ +#endif + +struct _starpu_data_request; +struct _starpu_data_replicate; + +enum _starpu_may_alloc +{ + _STARPU_DATAWIZARD_DO_NOT_ALLOC, + _STARPU_DATAWIZARD_DO_ALLOC, + _STARPU_DATAWIZARD_ONLY_FAST_ALLOC +}; + + +LIST_TYPE(_starpu_disk_backend_event, + void *backend_event; +); + +struct _starpu_disk_event +{ + unsigned memory_node; + unsigned node; + struct _starpu_disk_backend_event_list * requests; + + void * ptr; + size_t size; + starpu_data_handle_t handle; +}; + +/** this is a structure that can be queried to see whether an asynchronous + * transfer has terminated or not */ +union _starpu_async_channel_event +{ + char data[40]; +}; + +struct _starpu_async_channel +{ + union _starpu_async_channel_event event; + const struct _starpu_node_ops *node_ops; + /** Which node to polling when needing ACK msg */ + struct _starpu_mp_node *polling_node_sender; + struct _starpu_mp_node *polling_node_receiver; + /** Used to know if the acknowlegdment msg is arrived from sinks */ + volatile int starpu_mp_common_finished_sender; + volatile int starpu_mp_common_finished_receiver; +}; + +void _starpu_wake_all_blocked_workers_on_node(unsigned nodeid); + +int _starpu_driver_copy_data_1_to_1(starpu_data_handle_t handle, + struct _starpu_data_replicate *src_replicate, + struct _starpu_data_replicate *dst_replicate, + unsigned donotread, + struct _starpu_data_request *req, + enum _starpu_may_alloc may_alloc, + enum starpu_is_prefetch prefetch); + +int _starpu_copy_interface_any_to_any(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req); + +/* Just test for request completion */ +unsigned _starpu_driver_test_request_completion(struct _starpu_async_channel *async_channel); + +/* Wait for request completion. Only used at starpu_shutdown */ +void _starpu_driver_wait_request_completion(struct _starpu_async_channel *async_channel); + +#ifdef __cplusplus +} +#endif + +#pragma GCC visibility pop + +#endif // __COPY_DRIVER_H__ diff --git a/src/datawizard/data_request.c b/src/datawizard/data_request.c new file mode 100644 index 0000000..d631d21 --- /dev/null +++ b/src/datawizard/data_request.c @@ -0,0 +1,1035 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Thibaut Lambert + * Copyright (C) 2018,2021 Federal University of Rio Grande do Sul (UFRGS) + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include +#include +#include + +void _starpu_init_data_request_lists(void) +{ + unsigned i, j; + enum _starpu_data_request_inout k; + for (i = 0; i < STARPU_MAXNODES; i++) + { + struct _starpu_node *node = _starpu_get_node_struct(i); + for (j = 0; j < STARPU_MAXNODES; j++) + { + for (k = _STARPU_DATA_REQUEST_IN; k <= _STARPU_DATA_REQUEST_OUT; k++) + { + _starpu_data_request_prio_list_init(&node->data_requests[j][k]); + _starpu_data_request_prio_list_init(&node->prefetch_requests[j][k]); + _starpu_data_request_prio_list_init(&node->idle_requests[j][k]); + +#ifndef STARPU_DEBUG + /* Tell helgrind that we are fine with checking for list_empty + * in _starpu_handle_node_data_requests, we will call it + * periodically anyway */ + STARPU_HG_DISABLE_CHECKING(node->data_requests[j][k].tree.root); + STARPU_HG_DISABLE_CHECKING(node->prefetch_requests[j][k].tree.root); + STARPU_HG_DISABLE_CHECKING(node->idle_requests[j][k].tree.root); +#endif + _starpu_data_request_prio_list_init(&node->data_requests_pending[j][k]); + node->data_requests_npending[j][k] = 0; + + STARPU_PTHREAD_MUTEX_INIT(&node->data_requests_list_mutex[j][k], NULL); + STARPU_PTHREAD_MUTEX_INIT(&node->data_requests_pending_list_mutex[j][k], NULL); + } + } + STARPU_HG_DISABLE_CHECKING(node->data_requests_npending); + } +} + +void _starpu_deinit_data_request_lists(void) +{ + unsigned i, j; + enum _starpu_data_request_inout k; + for (i = 0; i < STARPU_MAXNODES; i++) + { + struct _starpu_node *node = _starpu_get_node_struct(i); + for (j = 0; j < STARPU_MAXNODES; j++) + { + for (k = _STARPU_DATA_REQUEST_IN; k <= _STARPU_DATA_REQUEST_OUT; k++) + { + _starpu_data_request_prio_list_deinit(&node->data_requests[j][k]); + _starpu_data_request_prio_list_deinit(&node->prefetch_requests[j][k]); + _starpu_data_request_prio_list_deinit(&node->idle_requests[j][k]); + _starpu_data_request_prio_list_deinit(&node->data_requests_pending[j][k]); + STARPU_PTHREAD_MUTEX_DESTROY(&node->data_requests_pending_list_mutex[j][k]); + STARPU_PTHREAD_MUTEX_DESTROY(&node->data_requests_list_mutex[j][k]); + } + } + } +} + +/* Unlink the request from the handle. New requests can then be made. */ +/* this should be called with the lock r->handle->header_lock taken */ +static void _starpu_data_request_unlink(struct _starpu_data_request *r) +{ + _starpu_spin_checklocked(&r->handle->header_lock); + + /* If this is a write invalidation request, we store it in the handle + */ + if (r->handle->write_invalidation_req == r) + { + STARPU_ASSERT(r->mode == STARPU_W); + r->handle->write_invalidation_req = NULL; + } + else + { + unsigned node; + struct _starpu_data_request **prevp, *prev; + + if (r->mode & STARPU_R) + /* If this is a read request, we store the pending requests + * between src and dst. */ + node = r->src_replicate->memory_node; + else + /* If this is a write only request, then there is no source and + * we use the destination node to cache the request. */ + node = r->dst_replicate->memory_node; + + /* Look for ourself in the list, we should be not very far. */ + for (prevp = &r->dst_replicate->request[node], prev = NULL; + *prevp && *prevp != r; + prev = *prevp, prevp = &prev->next_same_req) + ; + + STARPU_ASSERT(*prevp == r); + *prevp = r->next_same_req; + + if (!r->next_same_req) + { + /* I was last */ + STARPU_ASSERT(r->dst_replicate->last_request[node] == r); + if (prev) + r->dst_replicate->last_request[node] = prev; + else + r->dst_replicate->last_request[node] = NULL; + } + } +} + +static void _starpu_data_request_destroy(struct _starpu_data_request *r) +{ + //fprintf(stderr, "DESTROY REQ %p (%d) refcnt %d\n", r, node, r->refcnt); + _starpu_data_request_delete(r); +} + +/* handle->lock should already be taken ! */ +struct _starpu_data_request *_starpu_create_data_request(starpu_data_handle_t handle, + struct _starpu_data_replicate *src_replicate, + struct _starpu_data_replicate *dst_replicate, + int handling_node, + enum starpu_data_access_mode mode, + unsigned ndeps, + struct starpu_task *task, + enum starpu_is_prefetch is_prefetch, + int prio, + unsigned is_write_invalidation, + const char *origin) +{ + struct _starpu_data_request *r = _starpu_data_request_new(); + + _starpu_spin_checklocked(&handle->header_lock); + + _starpu_spin_init(&r->lock); + + _STARPU_TRACE_DATA_REQUEST_CREATED(handle, src_replicate?src_replicate->memory_node:-1, dst_replicate?dst_replicate->memory_node:-1, prio, is_prefetch, r); + + r->origin = origin; + r->handle = handle; + r->src_replicate = src_replicate; + r->dst_replicate = dst_replicate; + r->mode = mode; + r->async_channel.node_ops = NULL; + r->async_channel.starpu_mp_common_finished_sender = 0; + r->async_channel.starpu_mp_common_finished_receiver = 0; + r->async_channel.polling_node_sender = NULL; + r->async_channel.polling_node_receiver = NULL; + memset(&r->async_channel.event, 0, sizeof(r->async_channel.event)); + if (handling_node == -1) + handling_node = STARPU_MAIN_RAM; + r->handling_node = handling_node; + if (is_write_invalidation) + { + r->peer_node = handling_node; + r->inout = _STARPU_DATA_REQUEST_IN; + } + else if (dst_replicate->memory_node == handling_node) + { + if (src_replicate) + r->peer_node = src_replicate->memory_node; + else + r->peer_node = handling_node; + r->inout = _STARPU_DATA_REQUEST_IN; + } + else + { + r->peer_node = dst_replicate->memory_node; + r->inout = _STARPU_DATA_REQUEST_OUT; + } + STARPU_ASSERT(starpu_node_get_kind(handling_node) == STARPU_CPU_RAM || _starpu_memory_node_get_nworkers(handling_node)); + r->completed = 0; + r->added_ref = 0; + r->canceled = 0; + r->prefetch = is_prefetch; + r->task = task; + r->nb_tasks_prefetch = 0; + r->prio = prio; + r->retval = -1; + r->ndeps = ndeps; + r->next_same_req = NULL; + r->next_req_count = 0; + r->callbacks = NULL; + r->com_id = 0; + + _starpu_spin_lock(&r->lock); + + /* For a fetch, take a reference as soon as now on the target, to avoid + * replicate eviction */ + if (is_prefetch == STARPU_FETCH && dst_replicate) + { + r->added_ref = 1; + dst_replicate->refcnt++; + } + handle->busy_count++; + + if (is_write_invalidation) + { + STARPU_ASSERT(!handle->write_invalidation_req); + handle->write_invalidation_req = r; + } + else + { + unsigned node; + + if (mode & STARPU_R) + node = src_replicate->memory_node; + else + node = dst_replicate->memory_node; + + if (!dst_replicate->request[node]) + dst_replicate->request[node] = r; + else + dst_replicate->last_request[node]->next_same_req = r; + dst_replicate->last_request[node] = r; + + if (mode & STARPU_R) + { + /* Take a reference on the source for the request to be + * able to read it */ + src_replicate->refcnt++; + handle->busy_count++; + } + } + + r->refcnt = 1; + + _starpu_spin_unlock(&r->lock); + + return r; +} + +int _starpu_wait_data_request_completion(struct _starpu_data_request *r, enum _starpu_may_alloc may_alloc) +{ + int retval; + int do_delete = 0; + int completed; + +#ifdef STARPU_SIMGRID + unsigned local_node = starpu_worker_get_local_memory_node(); + + starpu_pthread_wait_t wait; + + starpu_pthread_wait_init(&wait); + /* We need to get woken both when requests finish on our node, and on + * the target node of the request we are waiting for */ + starpu_pthread_queue_register(&wait, &_starpu_simgrid_transfer_queue[local_node]); + starpu_pthread_queue_register(&wait, &_starpu_simgrid_transfer_queue[(unsigned) r->dst_replicate->memory_node]); +#endif + + struct _starpu_worker *worker = _starpu_get_local_worker_key(); + enum _starpu_worker_status old_status = STATUS_UNKNOWN; + + if (worker) + { + old_status = worker->status; + if (!(old_status & STATUS_WAITING)) + _starpu_add_worker_status(worker, STATUS_INDEX_WAITING, NULL); + } + + do + { +#ifdef STARPU_SIMGRID + starpu_pthread_wait_reset(&wait); +#endif + + STARPU_SYNCHRONIZE(); + if (STARPU_RUNNING_ON_VALGRIND) + completed = 1; + else + completed = r->completed; + if (completed) + { + _starpu_spin_lock(&r->lock); + if (r->completed) + break; + _starpu_spin_unlock(&r->lock); + } + +#ifndef STARPU_SIMGRID +#ifndef STARPU_NON_BLOCKING_DRIVERS + /* XXX: shouldn't be needed, and doesn't work with chained requests anyway */ + _starpu_wake_all_blocked_workers_on_node(r->handling_node); +#endif +#endif + + _starpu_datawizard_progress(may_alloc); + +#ifdef STARPU_SIMGRID + starpu_pthread_wait_wait(&wait); +#endif + } + while (1); + + if (worker) + { + if (!(old_status & STATUS_WAITING)) + _starpu_clear_worker_status(worker, STATUS_INDEX_WAITING, NULL); + } + +#ifdef STARPU_SIMGRID + starpu_pthread_queue_unregister(&wait, &_starpu_simgrid_transfer_queue[local_node]); + starpu_pthread_queue_unregister(&wait, &_starpu_simgrid_transfer_queue[(unsigned) r->dst_replicate->memory_node]); + starpu_pthread_wait_destroy(&wait); +#endif + + + retval = r->retval; + if (retval) + _STARPU_DISP("REQUEST %p completed with retval %d!\n", r, r->retval); + + + r->refcnt--; + + /* if nobody is waiting on that request, we can get rid of it */ + if (r->refcnt == 0) + do_delete = 1; + + _starpu_spin_unlock(&r->lock); + + if (do_delete) + _starpu_data_request_destroy(r); + + return retval; +} + +/* this is non blocking */ +void _starpu_post_data_request(struct _starpu_data_request *r) +{ + unsigned handling_node = r->handling_node; + STARPU_ASSERT(starpu_node_get_kind(handling_node) == STARPU_CPU_RAM || _starpu_memory_node_get_nworkers(handling_node)); + +// _STARPU_DEBUG("POST REQUEST\n"); + + /* If some dependencies are not fulfilled yet, we don't actually post the request */ + if (r->ndeps > 0) + return; + + struct _starpu_node *node_struct = _starpu_get_node_struct(handling_node); + + if (r->mode & STARPU_R) + { + STARPU_ASSERT(r->src_replicate->allocated || r->src_replicate->mapped != STARPU_UNMAPPED); + STARPU_ASSERT(r->src_replicate->refcnt); + } + + /* insert the request in the proper list */ + STARPU_PTHREAD_MUTEX_LOCK(&node_struct->data_requests_list_mutex[r->peer_node][r->inout]); + if (r->prefetch >= STARPU_IDLEFETCH) + _starpu_data_request_prio_list_push_back(&node_struct->idle_requests[r->peer_node][r->inout], r); + else if (r->prefetch > STARPU_FETCH) + _starpu_data_request_prio_list_push_back(&node_struct->prefetch_requests[r->peer_node][r->inout], r); + else + _starpu_data_request_prio_list_push_back(&node_struct->data_requests[r->peer_node][r->inout], r); + STARPU_PTHREAD_MUTEX_UNLOCK(&node_struct->data_requests_list_mutex[r->peer_node][r->inout]); + +#ifndef STARPU_NON_BLOCKING_DRIVERS + _starpu_wake_all_blocked_workers_on_node(handling_node); +#endif +} + +/* We assume that r->lock is taken by the caller */ +void _starpu_data_request_append_callback(struct _starpu_data_request *r, void (*callback_func)(void *), void *callback_arg) +{ + STARPU_ASSERT(r); + + if (callback_func) + { + struct _starpu_callback_list *link; + _STARPU_MALLOC(link, sizeof(struct _starpu_callback_list)); + + link->callback_func = callback_func; + link->callback_arg = callback_arg; + link->next = r->callbacks; + r->callbacks = link; + } +} + +/* This method is called with handle's header_lock taken, and unlocks it */ +static void starpu_handle_data_request_completion(struct _starpu_data_request *r) +{ + unsigned do_delete = 0; + starpu_data_handle_t handle = r->handle; + enum starpu_data_access_mode mode = r->mode; + + struct _starpu_data_replicate *src_replicate = r->src_replicate; + struct _starpu_data_replicate *dst_replicate = r->dst_replicate; + + + if (r->canceled < 2 && dst_replicate) + { +#ifdef STARPU_MEMORY_STATS + enum _starpu_cache_state old_src_replicate_state = src_replicate->state; +#endif + + _starpu_spin_checklocked(&handle->header_lock); + _starpu_update_data_state(handle, r->dst_replicate, mode); + dst_replicate->load_request = NULL; + +#ifdef STARPU_MEMORY_STATS + if (src_replicate->state == STARPU_INVALID) + { + if (old_src_replicate_state == STARPU_OWNER) + _starpu_memory_handle_stats_invalidated(handle, src_replicate->memory_node); + else + { + /* XXX Currently only ex-OWNER are tagged as invalidated */ + /* XXX Have to check all old state of every node in case a SHARED data become OWNED by the dst_replicate */ + } + + } + if (dst_replicate->state == STARPU_SHARED) + _starpu_memory_handle_stats_loaded_shared(handle, dst_replicate->memory_node); + else if (dst_replicate->state == STARPU_OWNER) + { + _starpu_memory_handle_stats_loaded_owner(handle, dst_replicate->memory_node); + } +#endif + } + +#ifdef STARPU_USE_FXT + if (fut_active && r->canceled < 2 && r->com_id > 0) + { + unsigned src_node = src_replicate->memory_node; + unsigned dst_node = dst_replicate->memory_node; + size_t size = _starpu_data_get_size(handle); + _STARPU_TRACE_END_DRIVER_COPY(src_node, dst_node, size, r->com_id, r->prefetch); + } +#endif + + /* Once the request has been fulfilled, we may submit the requests that + * were chained to that request. */ + unsigned chained_req; + for (chained_req = 0; chained_req < r->next_req_count; chained_req++) + { + struct _starpu_data_request *next_req = r->next_req[chained_req]; + STARPU_ASSERT(next_req->ndeps > 0); + next_req->ndeps--; + _starpu_post_data_request(next_req); + } + + r->completed = 1; + +#ifdef STARPU_SIMGRID + /* Wake potential worker which was waiting for it */ + if (dst_replicate) + _starpu_wake_all_blocked_workers_on_node(dst_replicate->memory_node); +#endif + + /* Remove a reference on the destination replicate for the request */ + if (dst_replicate) + { + if (r->canceled < 2 && dst_replicate->mc) + /* Make sure it stays there for the task. */ + dst_replicate->nb_tasks_prefetch += r->nb_tasks_prefetch; + + if (r->added_ref) + { + STARPU_ASSERT(dst_replicate->refcnt > 0); + dst_replicate->refcnt--; + } + } + STARPU_ASSERT(handle->busy_count > 0); + handle->busy_count--; + + /* In case the source was "locked" by the request too */ + if (mode & STARPU_R) + { + STARPU_ASSERT(src_replicate->refcnt > 0); + src_replicate->refcnt--; + STARPU_ASSERT(handle->busy_count > 0); + handle->busy_count--; + } + _starpu_data_request_unlink(r); + + unsigned destroyed = _starpu_data_check_not_busy(handle); + + r->refcnt--; + + /* if nobody is waiting on that request, we can get rid of it */ + if (r->refcnt == 0) + do_delete = 1; + + r->retval = 0; + + /* In case there are one or multiple callbacks, we execute them now. */ + struct _starpu_callback_list *callbacks = r->callbacks; + + _starpu_spin_unlock(&r->lock); + + if (do_delete) + _starpu_data_request_destroy(r); + + if (!destroyed) + _starpu_spin_unlock(&handle->header_lock); + + /* We do the callback once the lock is released so that they can do + * blocking operations with the handle (eg. release it) */ + while (callbacks) + { + callbacks->callback_func(callbacks->callback_arg); + + struct _starpu_callback_list *next = callbacks->next; + free(callbacks); + callbacks = next; + } +} + +void _starpu_data_request_complete_wait(void *arg) +{ + struct _starpu_data_request *r = arg; + _starpu_spin_lock(&r->handle->header_lock); + _starpu_spin_lock(&r->lock); + starpu_handle_data_request_completion(r); +} + +/* TODO : accounting to see how much time was spent working for other people ... */ +static int starpu_handle_data_request(struct _starpu_data_request *r, enum _starpu_may_alloc may_alloc) +{ + starpu_data_handle_t handle = r->handle; + +#ifndef STARPU_SIMGRID + if (_starpu_spin_trylock(&handle->header_lock)) + return -EBUSY; + if (_starpu_spin_trylock(&r->lock)) + { + _starpu_spin_unlock(&handle->header_lock); + return -EBUSY; + } +#else + /* Have to wait for the handle, whatever it takes, in simgrid, + * since we can not afford going to sleep, since nobody would wake us + * up. */ + _starpu_spin_lock(&handle->header_lock); + _starpu_spin_lock(&r->lock); +#endif + + struct _starpu_data_replicate *src_replicate = r->src_replicate; + struct _starpu_data_replicate *dst_replicate = r->dst_replicate; + + if (r->canceled) + { + /* Ok, canceled before starting copies etc. */ + r->canceled = 2; + /* Nothing left to do */ + starpu_handle_data_request_completion(r); + return 0; + } + + if (dst_replicate) + { + struct _starpu_data_request *r2 = dst_replicate->load_request; + if (r2 && r2 != r) + { + /* Oh, some other transfer is already loading the value. Just wait for it */ + r->canceled = 2; + _starpu_spin_unlock(&r->lock); + _starpu_spin_lock(&r2->lock); + if (r->prefetch < r2->prefetch) + /* Upgrade the existing request */ + _starpu_update_prefetch_status(r2, r->prefetch); + _starpu_data_request_append_callback(r2, _starpu_data_request_complete_wait, r); + _starpu_spin_unlock(&r2->lock); + _starpu_spin_unlock(&handle->header_lock); + return 0; + } + + /* We are loading this replicate. + * Note: we might fail to allocate memory, but we will keep on and others will wait for us. */ + dst_replicate->load_request = r; + } + + enum starpu_data_access_mode r_mode = r->mode; + + STARPU_ASSERT(!(r_mode & STARPU_R) || src_replicate); + STARPU_ASSERT(!(r_mode & STARPU_R) || src_replicate->allocated || src_replicate->mapped != STARPU_UNMAPPED); + STARPU_ASSERT(!(r_mode & STARPU_R) || src_replicate->refcnt); + + /* For prefetches, we take a reference on the destination only now that + * we will really try to fetch the data (instead of in + * _starpu_create_data_request) */ + if (dst_replicate && r->prefetch > STARPU_FETCH) + { + r->added_ref = 1; /* Note: we might get upgraded while trying to allocate */ + dst_replicate->refcnt++; + } + + _starpu_spin_unlock(&r->lock); + + if (r_mode == STARPU_UNMAP) + { + /* Unmap request, simply do it */ + STARPU_ASSERT(dst_replicate->mapped == src_replicate->memory_node); + STARPU_ASSERT(handle->ops->unmap_data); + handle->ops->unmap_data(src_replicate->data_interface, src_replicate->memory_node, + dst_replicate->data_interface, dst_replicate->memory_node); + dst_replicate->mapped = STARPU_UNMAPPED; + r->retval = 0; + } + /* FIXME: the request may get upgraded from here to freeing it... */ + + /* perform the transfer */ + /* the header of the data must be locked by the worker that submitted the request */ + + + if (dst_replicate && dst_replicate->state == STARPU_INVALID) + r->retval = _starpu_driver_copy_data_1_to_1(handle, src_replicate, + dst_replicate, !(r_mode & STARPU_R), r, may_alloc, r->prefetch); + else + /* Already valid actually, no need to transfer anything */ + r->retval = 0; + + if (r->retval == -ENOMEM) + { + /* If there was not enough memory, we will try to redo the + * request later. */ + + if (r->prefetch > STARPU_FETCH) + { + STARPU_ASSERT(r->added_ref); + /* Drop ref until next try */ + r->added_ref = 0; + dst_replicate->refcnt--; + } + + _starpu_spin_unlock(&handle->header_lock); + return -ENOMEM; + } + + if (r->retval == -EAGAIN) + { + /* The request was successful, but could not be terminated + * immediately. We will handle the completion of the request + * asynchronously. The request is put in the list of "pending" + * requests in the meantime. */ + _starpu_spin_unlock(&handle->header_lock); + struct _starpu_node *node_struct = _starpu_get_node_struct(r->handling_node); + + STARPU_PTHREAD_MUTEX_LOCK(&node_struct->data_requests_pending_list_mutex[r->peer_node][r->inout]); + _starpu_data_request_prio_list_push_back(&node_struct->data_requests_pending[r->peer_node][r->inout], r); + node_struct->data_requests_npending[r->peer_node][r->inout]++; + STARPU_PTHREAD_MUTEX_UNLOCK(&node_struct->data_requests_pending_list_mutex[r->peer_node][r->inout]); + + return -EAGAIN; + } + + /* the request has been handled */ + _starpu_spin_lock(&r->lock); + starpu_handle_data_request_completion(r); + + return 0; +} + +static int __starpu_handle_node_data_requests(struct _starpu_data_request_prio_list reqlist[STARPU_MAXNODES][2], unsigned handling_node, unsigned peer_node, enum _starpu_data_request_inout inout, enum _starpu_may_alloc may_alloc, unsigned n, unsigned *pushed, enum starpu_is_prefetch prefetch) +{ + struct _starpu_data_request *r; + unsigned i; + int ret = 0; + + *pushed = 0; + +#ifdef STARPU_NON_BLOCKING_DRIVERS + /* This is racy, but not posing problems actually, since we know we + * will come back here to probe again regularly anyway. + * Thus, do not expose this optimization to helgrind */ + if (!STARPU_RUNNING_ON_VALGRIND && _starpu_data_request_prio_list_empty(&reqlist[peer_node][inout])) + return 0; +#endif + + struct _starpu_node *node_struct = _starpu_get_node_struct(handling_node); + /* We create a new list to pickup some requests from the main list, and + * we handle the request(s) one by one from it, without concurrency issues. + */ + struct _starpu_data_request_list local_list, remain_list; + _starpu_data_request_list_init(&local_list); + +#ifdef STARPU_NON_BLOCKING_DRIVERS + /* take all the entries from the request list */ + if (STARPU_PTHREAD_MUTEX_TRYLOCK(&node_struct->data_requests_list_mutex[peer_node][inout])) + { + /* List is busy, do not bother with it */ + return -EBUSY; + } +#else + STARPU_PTHREAD_MUTEX_LOCK(&node_struct->data_requests_list_mutex[peer_node][inout]); +#endif + + for (i = node_struct->data_requests_npending[peer_node][inout]; + i < n && ! _starpu_data_request_prio_list_empty(&reqlist[peer_node][inout]); + i++) + { + r = _starpu_data_request_prio_list_pop_front_highest(&reqlist[peer_node][inout]); + _starpu_data_request_list_push_back(&local_list, r); + } + + if (!_starpu_data_request_prio_list_empty(&reqlist[peer_node][inout])) + /* We have left some requests */ + ret = -EBUSY; + + STARPU_PTHREAD_MUTEX_UNLOCK(&node_struct->data_requests_list_mutex[peer_node][inout]); + + if (_starpu_data_request_list_empty(&local_list)) + /* there is no request */ + return 0; + + /* This will contain the remaining requests */ + _starpu_data_request_list_init(&remain_list); + + double start = starpu_timing_now(); + /* for all entries of the list */ + while (!_starpu_data_request_list_empty(&local_list)) + { + int res; + + if (node_struct->data_requests_npending[peer_node][inout] >= n) + { + /* Too many requests at the same time, skip pushing + * more for now */ + ret = -EBUSY; + break; + } + + r = _starpu_data_request_list_pop_front(&local_list); + + res = starpu_handle_data_request(r, may_alloc); + if (res != 0 && res != -EAGAIN) + { + /* handle is busy, or not enough memory, postpone for now */ + ret = res; + /* Prefetch requests might have gotten promoted while in tmp list */ + _starpu_data_request_list_push_back(&remain_list, r); + if (prefetch > STARPU_FETCH) + /* Prefetching more there would make the situation even worse */ + break; + } + else + (*pushed)++; + + if (starpu_timing_now() - start >= MAX_PUSH_TIME) + { + /* We have spent a lot of time doing requests, skip pushing more for now */ + ret = -EBUSY; + break; + } + } + + /* Gather remainder */ + _starpu_data_request_list_push_list_back(&remain_list, &local_list); + + if (!_starpu_data_request_list_empty(&remain_list)) + { + STARPU_PTHREAD_MUTEX_LOCK(&node_struct->data_requests_list_mutex[peer_node][inout]); + while (!_starpu_data_request_list_empty(&remain_list)) + { + r = _starpu_data_request_list_pop_back(&remain_list); + if (r->prefetch >= STARPU_IDLEFETCH) + _starpu_data_request_prio_list_push_front(&node_struct->idle_requests[r->peer_node][r->inout], r); + else if (r->prefetch > STARPU_FETCH) + _starpu_data_request_prio_list_push_front(&node_struct->prefetch_requests[r->peer_node][r->inout], r); + else + _starpu_data_request_prio_list_push_front(&node_struct->data_requests[r->peer_node][r->inout], r); + } + STARPU_PTHREAD_MUTEX_UNLOCK(&node_struct->data_requests_list_mutex[peer_node][inout]); + +#ifdef STARPU_SIMGRID + if (*pushed) + { + /* We couldn't process the request due to missing + * space. Advance the clock a bit to let eviction have + * the time to make some room for us. Ideally we should + * rather have the caller block, and explicitly wait + * for eviction to happen. + */ + starpu_sleep(0.000001); + _starpu_wake_all_blocked_workers_on_node(handling_node); + } +#elif !defined(STARPU_NON_BLOCKING_DRIVERS) + _starpu_wake_all_blocked_workers_on_node(handling_node); +#endif + } + + return ret; +} + +int _starpu_handle_node_data_requests(unsigned handling_node, unsigned peer_node, enum _starpu_data_request_inout inout, enum _starpu_may_alloc may_alloc, unsigned *pushed) +{ + return __starpu_handle_node_data_requests(_starpu_get_node_struct(handling_node)->data_requests, handling_node, peer_node, inout, may_alloc, MAX_PENDING_REQUESTS_PER_NODE, pushed, STARPU_FETCH); +} + +int _starpu_handle_node_prefetch_requests(unsigned handling_node, unsigned peer_node, enum _starpu_data_request_inout inout, enum _starpu_may_alloc may_alloc, unsigned *pushed) +{ + return __starpu_handle_node_data_requests(_starpu_get_node_struct(handling_node)->prefetch_requests, handling_node, peer_node, inout, may_alloc, MAX_PENDING_PREFETCH_REQUESTS_PER_NODE, pushed, STARPU_PREFETCH); +} + +int _starpu_handle_node_idle_requests(unsigned handling_node, unsigned peer_node, enum _starpu_data_request_inout inout, enum _starpu_may_alloc may_alloc, unsigned *pushed) +{ + return __starpu_handle_node_data_requests(_starpu_get_node_struct(handling_node)->idle_requests, handling_node, peer_node, inout, may_alloc, MAX_PENDING_IDLE_REQUESTS_PER_NODE, pushed, STARPU_IDLEFETCH); +} + +static int _handle_pending_node_data_requests(unsigned handling_node, unsigned peer_node, enum _starpu_data_request_inout inout, unsigned force) +{ +// _STARPU_DEBUG("_starpu_handle_pending_node_data_requests ...\n"); +// + struct _starpu_data_request_prio_list new_data_requests_pending; + unsigned taken, kept; + struct _starpu_node *node_struct = _starpu_get_node_struct(handling_node); + +#ifdef STARPU_NON_BLOCKING_DRIVERS + /* Here helgrind would should that this is an un protected access. + * We however don't care about missing an entry, we will get called + * again sooner or later. */ + if (!STARPU_RUNNING_ON_VALGRIND && _starpu_data_request_prio_list_empty(&node_struct->data_requests_pending[peer_node][inout])) + return 0; +#endif + +#ifdef STARPU_NON_BLOCKING_DRIVERS + if (!force) + { + if (STARPU_PTHREAD_MUTEX_TRYLOCK(&node_struct->data_requests_pending_list_mutex[peer_node][inout])) + { + /* List is busy, do not bother with it */ + return 0; + } + } + else +#endif + /* We really want to handle requests */ + STARPU_PTHREAD_MUTEX_LOCK(&node_struct->data_requests_pending_list_mutex[peer_node][inout]); + + if (_starpu_data_request_prio_list_empty(&node_struct->data_requests_pending[peer_node][inout])) + { + /* there is no request */ + STARPU_PTHREAD_MUTEX_UNLOCK(&node_struct->data_requests_pending_list_mutex[peer_node][inout]); + return 0; + } + /* for all entries of the list */ + struct _starpu_data_request_prio_list local_list = node_struct->data_requests_pending[peer_node][inout]; + _starpu_data_request_prio_list_init(&node_struct->data_requests_pending[peer_node][inout]); + + STARPU_PTHREAD_MUTEX_UNLOCK(&node_struct->data_requests_pending_list_mutex[peer_node][inout]); + + _starpu_data_request_prio_list_init(&new_data_requests_pending); + taken = 0; + kept = 0; + + while (!_starpu_data_request_prio_list_empty(&local_list)) + { + struct _starpu_data_request *r; + r = _starpu_data_request_prio_list_pop_front_highest(&local_list); + taken++; + + starpu_data_handle_t handle = r->handle; + +#ifndef STARPU_SIMGRID + if (force) + /* Have to wait for the handle, whatever it takes */ +#endif + /* Or when running in simgrid, in which case we can not + * afford going to sleep, since nobody would wake us + * up. */ + _starpu_spin_lock(&handle->header_lock); +#ifndef STARPU_SIMGRID + else + if (_starpu_spin_trylock(&handle->header_lock)) + { + /* Handle is busy, retry this later */ + _starpu_data_request_prio_list_push_back(&new_data_requests_pending, r); + kept++; + continue; + } +#endif + + /* This shouldn't be too hard to acquire */ + _starpu_spin_lock(&r->lock); + + /* wait until the transfer is terminated */ + if (force) + { + /* We are doing starpu_shutdown */ + _starpu_driver_wait_request_completion(&r->async_channel); + starpu_handle_data_request_completion(r); + } + else + { + if (_starpu_driver_test_request_completion(&r->async_channel)) + { + /* The request was completed */ + starpu_handle_data_request_completion(r); + } + else + { + /* The request was not completed, so we put it + * back again on the list of pending requests + * so that it can be handled later on. */ + _starpu_spin_unlock(&r->lock); + _starpu_spin_unlock(&handle->header_lock); + + _starpu_data_request_prio_list_push_back(&new_data_requests_pending, r); + kept++; + } + } + } + _starpu_data_request_prio_list_deinit(&local_list); + STARPU_PTHREAD_MUTEX_LOCK(&node_struct->data_requests_pending_list_mutex[peer_node][inout]); + node_struct->data_requests_npending[peer_node][inout] -= taken - kept; + if (kept) + _starpu_data_request_prio_list_push_prio_list_back(&node_struct->data_requests_pending[peer_node][inout], &new_data_requests_pending); + STARPU_PTHREAD_MUTEX_UNLOCK(&node_struct->data_requests_pending_list_mutex[peer_node][inout]); + + return taken - kept; +} + +int _starpu_handle_pending_node_data_requests(unsigned handling_node, unsigned peer_node, enum _starpu_data_request_inout inout) +{ + return _handle_pending_node_data_requests(handling_node, peer_node, inout, 0); +} + +/* Only used at starpu_shutdown */ +int _starpu_handle_all_pending_node_data_requests(unsigned handling_node, unsigned peer_node, enum _starpu_data_request_inout inout) +{ + return _handle_pending_node_data_requests(handling_node, peer_node, inout, 1); +} + +/* Note: the returned value will be outdated since the locks are not taken at + * entry/exit */ +static int __starpu_check_that_no_data_request_exists(unsigned node, unsigned peer_node, enum _starpu_data_request_inout inout) +{ + int no_request; + int no_pending; + struct _starpu_node *node_struct = _starpu_get_node_struct(node); + + STARPU_PTHREAD_MUTEX_LOCK(&node_struct->data_requests_list_mutex[peer_node][inout]); + no_request = _starpu_data_request_prio_list_empty(&node_struct->data_requests[peer_node][inout]) + && _starpu_data_request_prio_list_empty(&node_struct->prefetch_requests[peer_node][inout]) + && _starpu_data_request_prio_list_empty(&node_struct->idle_requests[peer_node][inout]); + STARPU_PTHREAD_MUTEX_UNLOCK(&node_struct->data_requests_list_mutex[peer_node][inout]); + STARPU_PTHREAD_MUTEX_LOCK(&node_struct->data_requests_pending_list_mutex[peer_node][inout]); + no_pending = !node_struct->data_requests_npending[peer_node][inout]; + STARPU_PTHREAD_MUTEX_UNLOCK(&node_struct->data_requests_pending_list_mutex[peer_node][inout]); + + return no_request && no_pending; +} + +int _starpu_check_that_no_data_request_exists(unsigned node) +{ + unsigned peer_node, nnodes = starpu_memory_nodes_get_count(); + + for (peer_node = 0; peer_node < nnodes; peer_node++) + if (!__starpu_check_that_no_data_request_exists(node, peer_node, _STARPU_DATA_REQUEST_IN) + || !__starpu_check_that_no_data_request_exists(node, peer_node, _STARPU_DATA_REQUEST_OUT)) + return 0; + return 1; +} + +/* Note: the returned value will be outdated since the locks are not taken at + * entry/exit */ +int _starpu_check_that_no_data_request_is_pending(unsigned node, unsigned peer_node, enum _starpu_data_request_inout inout) +{ + return !_starpu_get_node_struct(node)->data_requests_npending[peer_node][inout]; +} + + +void _starpu_update_prefetch_status(struct _starpu_data_request *r, enum starpu_is_prefetch prefetch) +{ + struct _starpu_node *node_struct = _starpu_get_node_struct(r->handling_node); + _starpu_spin_checklocked(&r->handle->header_lock); + STARPU_ASSERT(r->prefetch > prefetch); + + if (prefetch == STARPU_FETCH && !r->added_ref) + { + /* That would have been done by _starpu_create_data_request */ + r->added_ref = 1; + r->dst_replicate->refcnt++; + } + + r->prefetch=prefetch; + + if (prefetch >= STARPU_IDLEFETCH) + /* No possible actual change */ + return; + + /* We have to promote chained_request too! */ + unsigned chained_req; + for (chained_req = 0; chained_req < r->next_req_count; chained_req++) + { + struct _starpu_data_request *next_req = r->next_req[chained_req]; + if (next_req->prefetch > prefetch) + _starpu_update_prefetch_status(next_req, prefetch); + } + + STARPU_PTHREAD_MUTEX_LOCK(&node_struct->data_requests_list_mutex[r->peer_node][r->inout]); + + int found = 1; + + /* The request can be in a different list (handling request or the temp list) + * we have to check that it is really in the prefetch or idle list. */ + if (_starpu_data_request_prio_list_ismember(&node_struct->prefetch_requests[r->peer_node][r->inout], r)) + _starpu_data_request_prio_list_erase(&node_struct->prefetch_requests[r->peer_node][r->inout], r); + else if (_starpu_data_request_prio_list_ismember(&node_struct->idle_requests[r->peer_node][r->inout], r)) + _starpu_data_request_prio_list_erase(&node_struct->idle_requests[r->peer_node][r->inout], r); + else + found = 0; + + if (found) + { + if (prefetch > STARPU_FETCH) + _starpu_data_request_prio_list_push_back(&node_struct->prefetch_requests[r->peer_node][r->inout],r); + else + _starpu_data_request_prio_list_push_back(&node_struct->data_requests[r->peer_node][r->inout],r); + } + STARPU_PTHREAD_MUTEX_UNLOCK(&node_struct->data_requests_list_mutex[r->peer_node][r->inout]); + +#ifndef STARPU_NON_BLOCKING_DRIVERS + _starpu_wake_all_blocked_workers_on_node(r->handling_node); +#endif +} diff --git a/src/datawizard/data_request.h b/src/datawizard/data_request.h new file mode 100644 index 0000000..2a72d34 --- /dev/null +++ b/src/datawizard/data_request.h @@ -0,0 +1,195 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2021-2021 Federal University of Rio Grande do Sul (UFRGS) + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/** @file */ + +/* This one includes us, so make sure to include it first */ +#include + +#ifndef __DATA_REQUEST_H__ +#define __DATA_REQUEST_H__ + +#include +#include +#include +#include +#include + +#pragma GCC visibility push(hidden) + +/* TODO: This should be tuned according to driver capabilities + * Data interfaces should also have to declare how many asynchronous requests + * they have actually started (think of e.g. csr). + */ +#define MAX_PENDING_REQUESTS_PER_NODE 5 +#define MAX_PENDING_PREFETCH_REQUESTS_PER_NODE 2 +#define MAX_PENDING_IDLE_REQUESTS_PER_NODE 1 +/** Maximum time in us that we can afford pushing requests before going back to the driver loop, e.g. for checking GPU task termination */ +#define MAX_PUSH_TIME 1000 + +struct _starpu_data_replicate; + +struct _starpu_callback_list +{ + void (*callback_func)(void *); + void *callback_arg; + struct _starpu_callback_list *next; +}; + +enum _starpu_data_request_inout +{ + _STARPU_DATA_REQUEST_IN, _STARPU_DATA_REQUEST_OUT +}; + +/** This represents a data request, i.e. we want some data to get transferred + * from a source to a destination. */ +LIST_TYPE(_starpu_data_request, + struct _starpu_spinlock lock; + unsigned refcnt; + const char *origin; /** Name of the function that triggered the request */ + + starpu_data_handle_t handle; + struct _starpu_data_replicate *src_replicate; + struct _starpu_data_replicate *dst_replicate; + + /** Which memory node will actually perform the transfer. + * This is important in the CUDA/OpenCL case, where only the worker for + * the node can make the CUDA/OpenCL calls. + */ + unsigned handling_node; + unsigned peer_node; + enum _starpu_data_request_inout inout; + + /* + * What the destination node wants to do with the data: write to it, + * read it, or read and write to it. Only in the two latter cases we + * need an actual transfer, the first only needs an allocation. + * + * With mapped buffers, an additional case is mode = 0, which means + * unmapping the buffer. + */ + enum starpu_data_access_mode mode; + + /** Elements needed to make the transfer asynchronous */ + struct _starpu_async_channel async_channel; + + /** Whether the transfer is completed. */ + unsigned completed:1; + + /** Whether we have already added our reference to the dst replicate. */ + unsigned added_ref:1; + + /** Whether the request was canceled before being handled (because the transfer already happened another way). */ + unsigned canceled:2; + + /** Whether this is just a prefetch request */ + enum starpu_is_prefetch prefetch:3; + + /** Task this request is for */ + struct starpu_task *task; + + /** Number of tasks which used this as a prefetch */ + unsigned nb_tasks_prefetch; + + /** Priority of the request. Default is 0 */ + int prio; + + /** The value returned by the transfer function */ + int retval; + + /** The request will not actually be submitted until there remains + * dependencies. */ + unsigned ndeps; + + /** Some further tasks may have requested prefetches for the same data + * much later on, link with them */ + struct _starpu_data_request *next_same_req; + + /** in case we have a chain of request (eg. for nvidia multi-GPU), this + * is the list of requests which are waiting for this one. */ + struct _starpu_data_request *next_req[STARPU_MAXNODES+1]; + /** The number of requests in next_req */ + unsigned next_req_count; + + struct _starpu_callback_list *callbacks; + + unsigned long com_id; +) +PRIO_LIST_TYPE(_starpu_data_request, prio) + +/** Everyone that wants to access some piece of data will post a request. + * Not only StarPU internals, but also the application may put such requests */ +LIST_TYPE(_starpu_data_requester, + /** what kind of access is requested ? */ + enum starpu_data_access_mode mode; + + /** applications may also directly manipulate data */ + unsigned is_requested_by_codelet; + + /** in case this is a codelet that will do the access */ + struct _starpu_job *j; + unsigned buffer_index; + + int prio; + + /** if this is more complicated ... (eg. application request) + * NB: this callback is not called with the lock taken ! + */ + void (*ready_data_callback)(void *argcb); + void *argcb; +) +PRIO_LIST_TYPE(_starpu_data_requester, prio) + +void _starpu_init_data_request_lists(void); +void _starpu_deinit_data_request_lists(void); +void _starpu_post_data_request(struct _starpu_data_request *r); +/** returns 0 if we have pushed all requests, -EBUSY or -ENOMEM otherwise */ +int _starpu_handle_node_data_requests(unsigned handling_node, unsigned peer_node, enum _starpu_data_request_inout inout, enum _starpu_may_alloc may_alloc, unsigned *pushed); +int _starpu_handle_node_prefetch_requests(unsigned handling_node, unsigned peer_node, enum _starpu_data_request_inout inout, enum _starpu_may_alloc may_alloc, unsigned *pushed); +int _starpu_handle_node_idle_requests(unsigned handling_node, unsigned peer_node, enum _starpu_data_request_inout inout, enum _starpu_may_alloc may_alloc, unsigned *pushed); + +int _starpu_handle_pending_node_data_requests(unsigned handling_node, unsigned peer_node, enum _starpu_data_request_inout inout); + +/* Only used at starpu_shutdown */ +int _starpu_handle_all_pending_node_data_requests(unsigned handling_node, unsigned peer_node, enum _starpu_data_request_inout inout); + +int _starpu_check_that_no_data_request_exists(unsigned handling_node); +int _starpu_check_that_no_data_request_is_pending(unsigned handling_node, unsigned peer_node, enum _starpu_data_request_inout inout); + +struct _starpu_data_request *_starpu_create_data_request(starpu_data_handle_t handle, + struct _starpu_data_replicate *src_replicate, + struct _starpu_data_replicate *dst_replicate, + int handling_node, + enum starpu_data_access_mode mode, + unsigned ndeps, + struct starpu_task *task, + enum starpu_is_prefetch is_prefetch, + int prio, + unsigned is_write_invalidation, + const char *origin) STARPU_ATTRIBUTE_MALLOC; + +int _starpu_wait_data_request_completion(struct _starpu_data_request *r, enum _starpu_may_alloc may_alloc); + +void _starpu_data_request_append_callback(struct _starpu_data_request *r, + void (*callback_func)(void *), + void *callback_arg); + +void _starpu_update_prefetch_status(struct _starpu_data_request *r, enum starpu_is_prefetch prefetch); + +#pragma GCC visibility pop + +#endif // __DATA_REQUEST_H__ diff --git a/src/datawizard/datastats.c b/src/datawizard/datastats.c new file mode 100644 index 0000000..f6d91f2 --- /dev/null +++ b/src/datawizard/datastats.c @@ -0,0 +1,117 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include + +int _starpu_enable_stats = 0; + +void _starpu_datastats_init() +{ + _starpu_enable_stats = !!starpu_getenv("STARPU_ENABLE_STATS"); +} + +/* measure the cache hit ratio for each node */ +static unsigned hit_cnt[STARPU_MAXNODES]; +static unsigned miss_cnt[STARPU_MAXNODES]; + +void __starpu_msi_cache_hit(unsigned node) +{ + STARPU_HG_DISABLE_CHECKING(hit_cnt[node]); + hit_cnt[node]++; +} + +void __starpu_msi_cache_miss(unsigned node) +{ + STARPU_HG_DISABLE_CHECKING(miss_cnt[node]); + miss_cnt[node]++; +} + +void _starpu_display_msi_stats(FILE *stream) +{ + if (!starpu_enable_stats()) + return; + + unsigned node; + unsigned total_hit_cnt = 0; + unsigned total_miss_cnt = 0; + + fprintf(stream, "\n#---------------------\n"); + fprintf(stream, "MSI cache stats :\n"); + + for (node = 0; node < STARPU_MAXNODES; node++) + { + total_hit_cnt += hit_cnt[node]; + total_miss_cnt += miss_cnt[node]; + } + + fprintf(stream, "TOTAL MSI stats\thit %u (%2.2f %%)\tmiss %u (%2.2f %%)\n", total_hit_cnt, (100.0f*total_hit_cnt)/(total_hit_cnt+total_miss_cnt), total_miss_cnt, (100.0f*total_miss_cnt)/(total_hit_cnt+total_miss_cnt)); + + for (node = 0; node < STARPU_MAXNODES; node++) + { + if (hit_cnt[node]+miss_cnt[node]) + { + char name[128]; + starpu_memory_node_get_name(node, name, sizeof(name)); + fprintf(stream, "memory node %s\n", name); + fprintf(stream, "\thit : %u (%2.2f %%)\n", hit_cnt[node], (100.0f*hit_cnt[node])/(hit_cnt[node]+miss_cnt[node])); + fprintf(stream, "\tmiss : %u (%2.2f %%)\n", miss_cnt[node], (100.0f*miss_cnt[node])/(hit_cnt[node]+miss_cnt[node])); + } + } + fprintf(stream, "#---------------------\n"); +} + +/* measure the efficiency of our allocation cache */ +static unsigned alloc_cnt[STARPU_MAXNODES]; +static unsigned alloc_cache_hit_cnt[STARPU_MAXNODES]; + +void __starpu_allocation_cache_hit(unsigned node) +{ + STARPU_HG_DISABLE_CHECKING(alloc_cache_hit_cnt[node]); + alloc_cache_hit_cnt[node]++; +} + +void __starpu_data_allocation_inc_stats(unsigned node) +{ + STARPU_HG_DISABLE_CHECKING(alloc_cnt[node]); + alloc_cnt[node]++; +} + +void _starpu_display_alloc_cache_stats(FILE *stream) +{ + if (!starpu_enable_stats()) + return; + + fprintf(stream, "\n#---------------------\n"); + fprintf(stream, "Allocation cache stats:\n"); + unsigned node; + for (node = 0; node < STARPU_MAXNODES; node++) + { + if (alloc_cnt[node]) + { + char name[128]; + starpu_memory_node_get_name(node, name, sizeof(name)); + fprintf(stream, "memory node %s\n", name); + fprintf(stream, "\ttotal alloc : %u\n", alloc_cnt[node]); + fprintf(stream, "\tcached alloc: %u (%2.2f %%)\n", + alloc_cache_hit_cnt[node], (100.0f*alloc_cache_hit_cnt[node])/(alloc_cnt[node])); + } + } + fprintf(stream, "#---------------------\n"); +} diff --git a/src/datawizard/datastats.h b/src/datawizard/datastats.h new file mode 100644 index 0000000..98dbacf --- /dev/null +++ b/src/datawizard/datastats.h @@ -0,0 +1,70 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __DATASTATS_H__ +#define __DATASTATS_H__ + +/** @file */ + +#include +#include +#include +#include + +#pragma GCC visibility push(hidden) + +extern int _starpu_enable_stats; + +void _starpu_datastats_init(); + +static inline int starpu_enable_stats(void) +{ + return _starpu_enable_stats; +} + +void __starpu_msi_cache_hit(unsigned node); +void __starpu_msi_cache_miss(unsigned node); + +#define _starpu_msi_cache_hit(node) do { \ + if (starpu_enable_stats()) \ + __starpu_msi_cache_hit(node); \ +} while (0) + +#define _starpu_msi_cache_miss(node) do { \ + if (starpu_enable_stats()) \ + __starpu_msi_cache_miss(node); \ +} while (0) + +void _starpu_display_msi_stats(FILE *stream); + +void __starpu_allocation_cache_hit(unsigned node STARPU_ATTRIBUTE_UNUSED); +void __starpu_data_allocation_inc_stats(unsigned node STARPU_ATTRIBUTE_UNUSED); + +#define _starpu_allocation_cache_hit(node) do { \ + if (starpu_enable_stats()) \ + __starpu_allocation_cache_hit(node); \ +} while (0) + +#define _starpu_data_allocation_inc_stats(node) do { \ + if (starpu_enable_stats()) \ + __starpu_data_allocation_inc_stats(node); \ +} while (0) + +void _starpu_display_alloc_cache_stats(FILE *stream); + +#pragma GCC visibility pop + +#endif // __DATASTATS_H__ diff --git a/src/datawizard/datawizard.c b/src/datawizard/datawizard.c new file mode 100644 index 0000000..2a08795 --- /dev/null +++ b/src/datawizard/datawizard.c @@ -0,0 +1,174 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2021-2021 Federal University of Rio Grande do Sul (UFRGS) + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef STARPU_SIMGRID +#include +#endif + +static int ____starpu_datawizard_progress(unsigned memory_node, unsigned peer_start, unsigned peer_end, enum _starpu_data_request_inout inout, enum _starpu_may_alloc may_alloc, unsigned push_requests) +{ + int ret = 0; + unsigned peer_node; + + /* in case some other driver requested data */ + for (peer_node = peer_start; peer_node < peer_end; peer_node++) + { + if (_starpu_handle_pending_node_data_requests(memory_node, peer_node, inout)) + ret = 1; + } + + starpu_memchunk_tidy(memory_node); + + if (ret || push_requests) + { + /* Some transfers have finished, or the driver requests to really push more */ + unsigned pushed; + unsigned ok = 1; + + for (peer_node = peer_start; ok && peer_node < peer_end; peer_node++) + { + if (_starpu_handle_node_data_requests(memory_node, peer_node, inout, may_alloc, &pushed) == -ENOMEM) + ok = 0; + if (pushed) + ret = 1; + } + + if (ok) + { + unsigned doidle = 1; + + /* We pushed all pending requests, we can afford pushing + * prefetch requests */ + for (peer_node = peer_start; ok && peer_node < peer_end; peer_node++) + { + if (_starpu_handle_node_prefetch_requests(memory_node, peer_node, inout, may_alloc, &pushed) == -ENOMEM) + ok = 0; + if (pushed) + ret = 1; + if (!_starpu_check_that_no_data_request_is_pending(memory_node, peer_node, inout)) + doidle = 0; + } + + if (doidle) + /* No pending transfer, push some idle transfer */ + for (peer_node = peer_start; ok && peer_node < peer_end; peer_node++) + { + if (_starpu_handle_node_idle_requests(memory_node, peer_node, inout, may_alloc, &pushed) == -ENOMEM) + ok = 0; + if (pushed) + ret = 1; + } + } + + } + + return ret; +} + +static int ___starpu_datawizard_progress(unsigned memory_node, unsigned nnodes, enum _starpu_may_alloc may_alloc, unsigned push_requests) +{ + int ret = 0; + unsigned peer_node; + +#ifdef STARPU_SIMGRID + starpu_sleep(0.000001); +#endif + STARPU_UYIELD(); + + /* First handle all incoming transfers */ + ret |= ____starpu_datawizard_progress(memory_node, 0, nnodes, _STARPU_DATA_REQUEST_IN, may_alloc, push_requests); + + /* Then handle outgoing transfers */ + for (peer_node = 0; peer_node < nnodes; peer_node++) + ret |= ____starpu_datawizard_progress(memory_node, peer_node, peer_node+1, _STARPU_DATA_REQUEST_OUT, may_alloc, push_requests); + + return ret; +} + +int __starpu_datawizard_progress(enum _starpu_may_alloc may_alloc, unsigned push_requests) +{ + struct _starpu_worker *worker = _starpu_get_local_worker_key(); + unsigned memnode; + + if (!worker) + { + /* Call from main application, only make RAM requests progress */ + int ret = 0; + int nnumas = starpu_memory_nodes_get_numa_count(); + int numa; + for (numa = 0; numa < nnumas; numa++) + ret |= ___starpu_datawizard_progress(numa, nnumas, may_alloc, push_requests); + _starpu_execute_registered_progression_hooks(); + + return ret; + } + + /* processing requests may release some tasks, we cannot be already + * scheduling a task. */ + if (worker->state_sched_op_pending) + return 0; + + if (worker->set) + /* Running one of the workers of a worker set. The reference for + * driving memory is its worker 0 (see registrations in topology.c) */ + worker = &worker->set->workers[0]; + + unsigned current_worker_id = worker->workerid; + int ret = 0; + unsigned nnodes = starpu_memory_nodes_get_count(); + + for (memnode = 0; memnode < nnodes; memnode++) + { + if (_starpu_worker_drives_memory[current_worker_id][memnode] == 1) + { + if(_starpu_config.conf.cuda_only_fast_alloc_other_memnodes && worker->arch == STARPU_CUDA_WORKER && worker->memory_node != memnode) + ret |= ___starpu_datawizard_progress(memnode, nnodes, _STARPU_DATAWIZARD_ONLY_FAST_ALLOC, push_requests); + else + ret |= ___starpu_datawizard_progress(memnode, nnodes, may_alloc, push_requests); + } + } + + _starpu_execute_registered_progression_hooks(); + + return ret; +} + +void _starpu_datawizard_progress(enum _starpu_may_alloc may_alloc) +{ + __starpu_datawizard_progress(may_alloc, 1); +} + +/* Only used at starpu_shutdown */ +void _starpu_datawizard_handle_all_pending_node_data_requests(unsigned memnode) +{ + unsigned nnodes = starpu_memory_nodes_get_count(); + unsigned memnode2; + + for (memnode2 = 0; memnode2 < nnodes; memnode2++) + { + _starpu_handle_all_pending_node_data_requests(memnode, memnode2, _STARPU_DATA_REQUEST_IN); + _starpu_handle_all_pending_node_data_requests(memnode, memnode2, _STARPU_DATA_REQUEST_OUT); + } +} diff --git a/src/datawizard/datawizard.h b/src/datawizard/datawizard.h new file mode 100644 index 0000000..f99c45b --- /dev/null +++ b/src/datawizard/datawizard.h @@ -0,0 +1,57 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2021-2021 Federal University of Rio Grande do Sul (UFRGS) + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __DATAWIZARD_H__ +#define __DATAWIZARD_H__ + +/** @file */ + +#include +#include + +#include + +#include +#include +#include +#include + +#include +#include + +#include + +#pragma GCC visibility push(hidden) + +/** Make data transfers progress on all memory nodes driven by the current worker. + * + * If \p push_requests is 1, it can start new transfers + * + * If \p may_alloc is _STARPU_DATAWIZARD_DO_ALLOC, it can allocate destination data for transfers + * (this is not possible e.g. when spinning for a handle lock) + */ +int __starpu_datawizard_progress(enum _starpu_may_alloc may_alloc, unsigned push_requests); +/** Call __starpu_datawizard_progress with push_requests = 1 */ +void _starpu_datawizard_progress(enum _starpu_may_alloc may_alloc); + +/* Only used at starpu_shutdown */ +/** Check for all pending data request progress on node \p memory_node */ +void _starpu_datawizard_handle_all_pending_node_data_requests(unsigned memnode); + +#pragma GCC visibility pop + +#endif // __DATAWIZARD_H__ diff --git a/src/datawizard/filters.c b/src/datawizard/filters.c new file mode 100644 index 0000000..a768e49 --- /dev/null +++ b/src/datawizard/filters.c @@ -0,0 +1,1139 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2021-2021 Federal University of Rio Grande do Sul (UFRGS) + * Copyright (C) 2013-2013 Thibaut Lambert + * Copyright (C) 2010-2010 Mehdi Juhoor + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +//#define STARPU_VERBOSE + +#include +#include +#include +#include +#include + +/* + * This function applies a data filter on all the elements of a partition + */ +static void map_filter(starpu_data_handle_t root_handle, struct starpu_data_filter *f) +{ + /* we need to apply the data filter on all leaf of the tree */ + if (root_handle->nchildren == 0) + { + /* this is a leaf */ + starpu_data_partition(root_handle, f); + } + else + { + /* try to apply the data filter recursively */ + unsigned child; + for (child = 0; child < root_handle->nchildren; child++) + { + starpu_data_handle_t handle_child = starpu_data_get_child(root_handle, child); + map_filter(handle_child, f); + } + } +} +void starpu_data_vmap_filters(starpu_data_handle_t root_handle, unsigned nfilters, va_list pa) +{ + unsigned i; + for (i = 0; i < nfilters; i++) + { + struct starpu_data_filter *next_filter; + next_filter = va_arg(pa, struct starpu_data_filter *); + + STARPU_ASSERT(next_filter); + + map_filter(root_handle, next_filter); + } +} + +void starpu_data_map_filters(starpu_data_handle_t root_handle, unsigned nfilters, ...) +{ + va_list pa; + va_start(pa, nfilters); + starpu_data_vmap_filters(root_handle, nfilters, pa); + va_end(pa); +} + +void starpu_data_map_filters_parray(starpu_data_handle_t root_handle, int nfilters, struct starpu_data_filter **filters) +{ + int i; + STARPU_ASSERT(nfilters >= 0); + for (i = 0; i < nfilters; i++) + { + struct starpu_data_filter *next_filter = filters[i]; + STARPU_ASSERT(next_filter); + map_filter(root_handle, next_filter); + } +} + +void starpu_data_map_filters_array(starpu_data_handle_t root_handle, int nfilters, struct starpu_data_filter *filters) +{ + int i; + STARPU_ASSERT(nfilters >= 0); + for (i = 0; i < nfilters; i++) + { + map_filter(root_handle, &filters[i]); + } +} + +void fstarpu_data_map_filters(starpu_data_handle_t root_handle, int nfilters, struct starpu_data_filter **filters) +{ + starpu_data_map_filters_parray(root_handle, nfilters, filters); +} + +int starpu_data_get_nb_children(starpu_data_handle_t handle) +{ + return handle->nchildren; +} + +starpu_data_handle_t starpu_data_get_child(starpu_data_handle_t handle, unsigned i) +{ + STARPU_ASSERT_MSG(handle->nchildren != 0, "Data %p has to be partitioned before accessing children", handle); + STARPU_ASSERT_MSG(i < handle->nchildren, "Invalid child index %u in handle %p, maximum %u", i, handle, handle->nchildren); + return &handle->children[i]; +} + +/* + * example starpu_data_get_sub_data(starpu_data_handle_t root_handle, 3, 42, 0, 1); + */ +starpu_data_handle_t starpu_data_get_sub_data(starpu_data_handle_t root_handle, unsigned depth, ...) +{ + va_list pa; + va_start(pa, depth); + starpu_data_handle_t handle = starpu_data_vget_sub_data(root_handle, depth, pa); + va_end(pa); + + return handle; +} + +starpu_data_handle_t starpu_data_vget_sub_data(starpu_data_handle_t root_handle, unsigned depth, va_list pa) +{ + STARPU_ASSERT(root_handle); + starpu_data_handle_t current_handle = root_handle; + + /* the variable number of argument must correlate the depth in the tree */ + unsigned i; + for (i = 0; i < depth; i++) + { + unsigned next_child; + next_child = va_arg(pa, unsigned); + + STARPU_ASSERT_MSG(current_handle->nchildren != 0, "Data %p has to be partitioned before accessing children", current_handle); + STARPU_ASSERT_MSG(next_child < current_handle->nchildren, "Bogus child number %u, data %p only has %u children", next_child, current_handle, current_handle->nchildren); + + current_handle = ¤t_handle->children[next_child]; + } + + return current_handle; +} + +starpu_data_handle_t fstarpu_data_get_sub_data(starpu_data_handle_t root_handle, int depth, int *indices) +{ + STARPU_ASSERT(root_handle); + starpu_data_handle_t current_handle = root_handle; + + STARPU_ASSERT(depth >= 0); + /* the variable number of argument must correlate the depth in the tree */ + int i; + for (i = 0; i < depth; i++) + { + int next_child; + next_child = indices[i]; + STARPU_ASSERT(next_child >= 0); + + STARPU_ASSERT_MSG(current_handle->nchildren != 0, "Data %p has to be partitioned before accessing children", current_handle); + STARPU_ASSERT_MSG((unsigned) next_child < current_handle->nchildren, "Bogus child number %d, data %p only has %u children", next_child, current_handle, current_handle->nchildren); + + current_handle = ¤t_handle->children[next_child]; + } + + return current_handle; +} + +static unsigned _starpu_data_partition_nparts(starpu_data_handle_t initial_handle, struct starpu_data_filter *f) +{ + /* how many parts ? */ + if (f->get_nchildren) + return f->get_nchildren(f, initial_handle); + else + return f->nchildren; + +} + +static void _starpu_data_partition(starpu_data_handle_t initial_handle, starpu_data_handle_t *childrenp, unsigned nparts, struct starpu_data_filter *f, int inherit_state) +{ + unsigned i; + unsigned node; + unsigned found = STARPU_MAXNODES; + + for (node = 0; node < STARPU_MAXNODES; node++) + _starpu_data_unmap(initial_handle, node); + + /* first take care to properly lock the data header */ + _starpu_spin_lock(&initial_handle->header_lock); + + initial_handle->nplans++; + + STARPU_ASSERT_MSG(nparts > 0, "Partitioning data %p in 0 piece does not make sense", initial_handle); + + /* allocate the children */ + if (inherit_state) + { + _STARPU_CALLOC(initial_handle->children, nparts, sizeof(struct _starpu_data_state)); + + /* this handle now has children */ + initial_handle->nchildren = nparts; + } + + for (node = 0; node < STARPU_MAXNODES; node++) + { + if (initial_handle->per_node[node].state != STARPU_INVALID) + found = node; + STARPU_ASSERT(initial_handle->per_node[node].mapped == STARPU_UNMAPPED); + } + if (found == STARPU_MAXNODES) + { + /* This is lazy allocation, allocate it now in main RAM, so as + * to have somewhere to gather pieces later */ + /* FIXME: mark as unevictable! */ + int home_node = initial_handle->home_node; + if (home_node < 0 || (starpu_node_get_kind(home_node) != STARPU_CPU_RAM)) + home_node = STARPU_MAIN_RAM; + int ret = _starpu_allocate_memory_on_node(initial_handle, &initial_handle->per_node[home_node], STARPU_FETCH, 0); +#ifdef STARPU_DEVEL +#warning we should reclaim memory if allocation failed +#endif + STARPU_ASSERT(!ret); + } + + if (nparts && !inherit_state) + { + STARPU_ASSERT_MSG(childrenp, "Passing NULL pointer for parameter childrenp while parameter inherit_state is 0"); + } + + for (i = 0; i < nparts; i++) + { + starpu_data_handle_t child; + + if (inherit_state) + child = &initial_handle->children[i]; + else + child = childrenp[i]; + STARPU_ASSERT(child); + + struct starpu_data_interface_ops *ops; + + /* each child may have his own interface type */ + /* what's this child's interface ? */ + if (f->get_child_ops) + ops = f->get_child_ops(f, i); + else + ops = initial_handle->ops; + + /* As most of the fields must be initialized at NULL, let's put + * 0 everywhere */ + memset(child, 0, sizeof(*child)); + _starpu_data_handle_init(child, ops, initial_handle->mf_node); + + child->root_handle = initial_handle->root_handle; + child->father_handle = initial_handle; + + child->nsiblings = nparts; + if (inherit_state) + { + //child->siblings = NULL; + } + else + child->siblings = childrenp; + child->sibling_index = i; + child->depth = initial_handle->depth + 1; + + child->active = inherit_state; + + child->home_node = initial_handle->home_node; + child->wt_mask = initial_handle->wt_mask; + + child->aliases = initial_handle->aliases; + //child->readonly_dup = NULL; + //child->readonly_dup_of = NULL; + + child->is_not_important = initial_handle->is_not_important; + + child->sequential_consistency = initial_handle->sequential_consistency; + child->initialized = initial_handle->initialized; + child->readonly = initial_handle->readonly; + child->ooc = initial_handle->ooc; + + /* The methods used for reduction are propagated to the + * children. */ + child->redux_cl = initial_handle->redux_cl; + child->init_cl = initial_handle->init_cl; + + for (node = 0; node < STARPU_MAXNODES; node++) + { + struct _starpu_data_replicate *initial_replicate; + struct _starpu_data_replicate *child_replicate; + + initial_replicate = &initial_handle->per_node[node]; + child_replicate = &child->per_node[node]; + + if (inherit_state) + child_replicate->state = initial_replicate->state; + else + child_replicate->state = STARPU_INVALID; + if (inherit_state || !initial_replicate->automatically_allocated) + child_replicate->allocated = initial_replicate->allocated; + else + { + //child_replicate->allocated = 0; + } + /* Do not allow memory reclaiming within the child for parent bits */ + //child_replicate->automatically_allocated = 0; + //child_replicate->refcnt = 0; + child_replicate->memory_node = node; + //child_replicate->relaxed_coherency = 0; + child_replicate->mapped = STARPU_UNMAPPED; + if (inherit_state) + child_replicate->initialized = initial_replicate->initialized; + else + { + //child_replicate->initialized = 0; + } + //child_replicate->nb_tasks_prefetch = 0; + + /* update the interface */ + void *initial_interface = starpu_data_get_interface_on_node(initial_handle, node); + void *child_interface = starpu_data_get_interface_on_node(child, node); + + STARPU_ASSERT_MSG(!(!inherit_state && child_replicate->automatically_allocated && child_replicate->allocated), "partition planning is currently not supported when handle has some automatically allocated buffers"); + f->filter_func(initial_interface, child_interface, f, i, nparts); + } + + /* We compute the size and the footprint of the child once and + * store it in the handle */ + child->footprint = _starpu_compute_data_footprint(child); + + _STARPU_TRACE_HANDLE_DATA_REGISTER(child); + } + /* now let the header */ + _starpu_spin_unlock(&initial_handle->header_lock); +} + +static +void _starpu_empty_codelet_function(void *buffers[], void *args) +{ + (void) buffers; // unused; + (void) args; // unused; +} + +void starpu_data_unpartition(starpu_data_handle_t root_handle, unsigned gathering_node) +{ + unsigned child; + unsigned worker; + unsigned nworkers = starpu_worker_get_count(); + unsigned node; + unsigned sizes[root_handle->nchildren]; + + _STARPU_TRACE_START_UNPARTITION(root_handle, gathering_node); + _starpu_spin_lock(&root_handle->header_lock); + + STARPU_ASSERT_MSG(root_handle->nchildren != 0, "data %p is not partitioned, can not unpartition it", root_handle); + + /* first take all the children lock (in order !) */ + for (child = 0; child < root_handle->nchildren; child++) + { + starpu_data_handle_t child_handle = starpu_data_get_child(root_handle, child); + + /* make sure the intermediate children is unpartitionned as well */ + if (child_handle->nchildren > 0) + starpu_data_unpartition(child_handle, gathering_node); + + /* If this is a multiformat handle, we must convert the data now */ +#ifdef STARPU_DEVEL +#warning TODO: _starpu_fetch_data_on_node should be doing it +#endif + if (_starpu_data_is_multiformat_handle(child_handle) && + starpu_node_get_kind(child_handle->mf_node) != STARPU_CPU_RAM) + { + struct starpu_codelet cl = + { + .where = STARPU_CPU, + .cpu_funcs = { _starpu_empty_codelet_function }, + .modes = { STARPU_RW }, + .nbuffers = 1 + }; + struct starpu_task *task = starpu_task_create(); + task->name = "convert_data"; + + STARPU_TASK_SET_HANDLE(task, child_handle, 0); + task->cl = &cl; + task->synchronous = 1; + if (_starpu_task_submit_internally(task) != 0) + _STARPU_ERROR("Could not submit the conversion task while unpartitionning\n"); + } + + int ret; + /* for now we pretend that the RAM is almost unlimited and that gathering + * data should be possible from the node that does the unpartionning ... we + * don't want to have the programming deal with memory shortage at that time, + * really */ + /* Acquire the child data on the gathering node. This will trigger collapsing any reduction */ + ret = starpu_data_acquire_on_node(child_handle, gathering_node, STARPU_RW); + STARPU_ASSERT(ret == 0); + starpu_data_release_on_node(child_handle, gathering_node); + + _starpu_spin_lock(&child_handle->header_lock); + child_handle->busy_waiting = 1; + _starpu_spin_unlock(&child_handle->header_lock); + + /* Make sure it is not mapped */ + for (node = 0; node < STARPU_MAXNODES; node++) + _starpu_data_unmap(child_handle, node); + + /* Wait for all requests to finish (notably WT and UNMAP requests) */ + STARPU_PTHREAD_MUTEX_LOCK(&child_handle->busy_mutex); + while (1) + { + /* Here helgrind would shout that this an unprotected access, + * but this is actually fine: all threads who do busy_count-- + * are supposed to call _starpu_data_check_not_busy, which will + * wake us up through the busy_mutex/busy_cond. */ + if (!child_handle->busy_count) + break; + /* This is woken by _starpu_data_check_not_busy, always called + * after decrementing busy_count */ + STARPU_PTHREAD_COND_WAIT(&child_handle->busy_cond, &child_handle->busy_mutex); + } + STARPU_PTHREAD_MUTEX_UNLOCK(&child_handle->busy_mutex); + + _starpu_spin_lock(&child_handle->header_lock); + + sizes[child] = _starpu_data_get_alloc_size(child_handle); + + if (child_handle->unregister_hook) + { + child_handle->unregister_hook(child_handle); + } + + if (child_handle->per_worker) + { + for (worker = 0; worker < nworkers; worker++) + { + struct _starpu_data_replicate *local = &child_handle->per_worker[worker]; + STARPU_ASSERT(local->state == STARPU_INVALID); + if (local->allocated && local->automatically_allocated) + _starpu_request_mem_chunk_removal(child_handle, local, starpu_worker_get_memory_node(worker), sizes[child]); + } + } + + _starpu_memory_stats_free(child_handle); + } + + /* the gathering_node should now have a valid copy of all the children. + * For all nodes, if the node had all copies and none was locally + * allocated then the data is still valid there, else, it's invalidated + * for the gathering node, if we have some locally allocated data, we + * copy all the children (XXX this should not happen so we just do not + * do anything since this is transparent ?) */ + unsigned still_valid[STARPU_MAXNODES]; + + /* we do 2 passes : the first pass determines whether the data is still + * valid or not, the second pass is needed to choose between STARPU_SHARED and + * STARPU_OWNER */ + + unsigned nvalids = 0; + + /* still valid ? */ + for (node = 0; node < STARPU_MAXNODES; node++) + { + struct _starpu_data_replicate *local; + /* until an issue is found the data is assumed to be valid */ + unsigned isvalid = 1; + + for (child = 0; child < root_handle->nchildren; child++) + { + starpu_data_handle_t child_handle = starpu_data_get_child(root_handle, child); + local = &child_handle->per_node[node]; + + if (local->state == STARPU_INVALID || local->automatically_allocated == 1) + { + /* One of the bits is missing or is not inside the parent */ + isvalid = 0; + } + + if (local->mc && local->allocated && local->automatically_allocated) + /* free the child data copy in a lazy fashion */ + _starpu_request_mem_chunk_removal(child_handle, local, node, sizes[child]); + } + + local = &root_handle->per_node[node]; + + if (!local->allocated) + /* Even if we have all the bits, if we don't have the + * whole data, it's not valid */ + isvalid = 0; + + if (!isvalid && local->mc && local->allocated && local->automatically_allocated && !local->refcnt) + /* free the data copy in a lazy fashion */ + _starpu_request_mem_chunk_removal(root_handle, local, node, _starpu_data_get_alloc_size(root_handle)); + + /* if there was no invalid copy, the node still has a valid copy */ + still_valid[node] = isvalid; + if (isvalid) + nvalids++; + } + + /* either shared or owned */ + STARPU_ASSERT(nvalids > 0); + + enum _starpu_cache_state newstate = (nvalids == 1)?STARPU_OWNER:STARPU_SHARED; + + for (node = 0; node < STARPU_MAXNODES; node++) + { + root_handle->per_node[node].state = still_valid[node]?newstate:STARPU_INVALID; + } + + for (child = 0; child < root_handle->nchildren; child++) + { + starpu_data_handle_t child_handle = starpu_data_get_child(root_handle, child); + _starpu_data_free_interfaces(child_handle); + _starpu_spin_unlock(&child_handle->header_lock); + _starpu_spin_destroy(&child_handle->header_lock); + } + + /* Set the initialized state */ + starpu_data_handle_t first_child = starpu_data_get_child(root_handle, 0); + root_handle->initialized = first_child->initialized; + for (child = 1; child < root_handle->nchildren; child++) + { + starpu_data_handle_t child_handle = starpu_data_get_child(root_handle, child); + STARPU_ASSERT_MSG(child_handle->initialized == root_handle->initialized, "Inconsistent state between children initialization"); + } + if (root_handle->initialized) + { + for (node = 0; node < STARPU_MAXNODES; node++) + { + struct _starpu_data_replicate *root_replicate; + + root_replicate = &root_handle->per_node[node]; + root_replicate->initialized = still_valid[node]; + } + } + + for (child = 0; child < root_handle->nchildren; child++) + { + starpu_data_handle_t child_handle = starpu_data_get_child(root_handle, child); + _starpu_data_clear_implicit(child_handle); + free(child_handle->active_readonly_children); + free(child_handle->active_readonly_nchildren); + + STARPU_PTHREAD_MUTEX_DESTROY(&child_handle->busy_mutex); + STARPU_PTHREAD_COND_DESTROY(&child_handle->busy_cond); + STARPU_PTHREAD_MUTEX_DESTROY(&child_handle->sequential_consistency_mutex); +#ifdef STARPU_BUBBLE + STARPU_PTHREAD_MUTEX_DESTROY(&child_handle->unpartition_mutex); +#endif + + STARPU_HG_ENABLE_CHECKING(child_handle->post_sync_tasks_cnt); + STARPU_HG_ENABLE_CHECKING(child_handle->busy_count); + + _starpu_data_requester_prio_list_deinit(&child_handle->req_list); + _starpu_data_requester_prio_list_deinit(&child_handle->reduction_req_list); + + if (child_handle->switch_cl) + { + free(child_handle->switch_cl->dyn_nodes); + free(child_handle->switch_cl); + } + + _STARPU_TRACE_HANDLE_DATA_UNREGISTER(child_handle); + } + + /* there is no child anymore */ + starpu_data_handle_t children = root_handle->children; + root_handle->children = NULL; + root_handle->nchildren = 0; + root_handle->nplans--; + + /* now the parent may be used again so we release the lock */ + _starpu_spin_unlock(&root_handle->header_lock); + + free(children); + + _STARPU_TRACE_END_UNPARTITION(root_handle, gathering_node); +} + +void starpu_data_partition(starpu_data_handle_t initial_handle, struct starpu_data_filter *f) +{ + unsigned nparts = _starpu_data_partition_nparts(initial_handle, f); + STARPU_ASSERT_MSG(initial_handle->nchildren == 0, "there should not be multiple filters applied on the same data %p, further filtering has to be done on children", initial_handle); + STARPU_ASSERT_MSG(initial_handle->nplans == 0, "partition planning and synchronous partitioning is not supported"); + + initial_handle->children = NULL; + + /* Make sure to wait for previous tasks working on the whole data */ + starpu_data_acquire_on_node(initial_handle, STARPU_ACQUIRE_NO_NODE, initial_handle->initialized?STARPU_RW:STARPU_W); + starpu_data_release_on_node(initial_handle, STARPU_ACQUIRE_NO_NODE); + + _starpu_data_partition(initial_handle, NULL, nparts, f, 1); +} + +void starpu_data_partition_plan(starpu_data_handle_t initial_handle, struct starpu_data_filter *f, starpu_data_handle_t *childrenp) +{ + unsigned i; + unsigned nparts = _starpu_data_partition_nparts(initial_handle, f); + STARPU_ASSERT_MSG(initial_handle->nchildren == 0, "partition planning and synchronous partitioning is not supported"); + STARPU_ASSERT_MSG(initial_handle->sequential_consistency, "partition planning is currently only supported for data with sequential consistency"); + struct starpu_codelet *cl = initial_handle->switch_cl; + int home_node = initial_handle->home_node; + starpu_data_handle_t *children; + if (home_node == -1) + /* Nothing better for now */ + /* TODO: pass -1, and make _starpu_fetch_nowhere_task_input + * really call _starpu_fetch_data_on_node, and make that update + * the coherency. + */ + home_node = STARPU_MAIN_RAM; + + _STARPU_MALLOC(children, nparts * sizeof(*children)); + for (i = 0; i < nparts; i++) + { + _STARPU_CALLOC(children[i], 1, sizeof(struct _starpu_data_state)); + childrenp[i] = children[i]; + } + _starpu_data_partition(initial_handle, children, nparts, f, 0); + + if (!cl) + { + /* Create a codelet that will make the coherency on the home node */ + _STARPU_CALLOC(initial_handle->switch_cl, 1, sizeof(*initial_handle->switch_cl)); + cl = initial_handle->switch_cl; + cl->where = STARPU_NOWHERE; + cl->nbuffers = STARPU_VARIABLE_NBUFFERS; + cl->flags = STARPU_CODELET_NOPLANS; + cl->name = "data_partition_switch"; + cl->specific_nodes = 1; + } + if (initial_handle->switch_cl_nparts < nparts) + { + /* First initialization, or previous initialization was with fewer parts, enlarge it */ + _STARPU_REALLOC(cl->dyn_nodes, (nparts+1) * sizeof(*cl->dyn_nodes)); + for (i = initial_handle->switch_cl_nparts; i < nparts+1; i++) + cl->dyn_nodes[i] = home_node; + initial_handle->switch_cl_nparts = nparts; + } +} + +void starpu_data_partition_clean_node(starpu_data_handle_t root_handle, unsigned nparts, starpu_data_handle_t *children, int gather_node) +{ + unsigned i; + + if (children[0]->active) + { + starpu_data_unpartition_submit(root_handle, nparts, children, gather_node); + } + + free(children[0]->siblings); + + for (i = 0; i < nparts; i++) + { + children[i]->siblings = NULL; + starpu_data_unregister_submit(children[i]); + } + + _starpu_spin_lock(&root_handle->header_lock); + root_handle->nplans--; + _starpu_spin_unlock(&root_handle->header_lock); +} + +void starpu_data_partition_clean(starpu_data_handle_t root_handle, unsigned nparts, starpu_data_handle_t *children) +{ +#ifdef STARPU_DEVEL +#warning FIXME: better choose gathering node +#endif + starpu_data_partition_clean_node(root_handle, nparts, children, root_handle->home_node); +} + +static +void _starpu_data_partition_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, unsigned char *handles_sequential_consistency) +{ + unsigned i; + STARPU_ASSERT_MSG(initial_handle->sequential_consistency, "partition planning is currently only supported for data with sequential consistency"); + _starpu_spin_lock(&initial_handle->header_lock); + STARPU_ASSERT_MSG(initial_handle->partitioned == 0, "One can't submit several partition plannings at the same time"); + STARPU_ASSERT_MSG(initial_handle->part_readonly == 0, "One can't submit a partition planning while a readonly partitioning is active"); + STARPU_ASSERT_MSG(nparts > 0, "One can't partition into 0 parts"); + initial_handle->partitioned++; + initial_handle->active_nchildren = children[0]->nsiblings; + initial_handle->active_children = children[0]->siblings; + _starpu_spin_unlock(&initial_handle->header_lock); + + for (i = 0; i < nparts; i++) + { + _starpu_spin_lock(&children[i]->header_lock); + children[i]->active = 1; + _starpu_spin_unlock(&children[i]->header_lock); + } + + if (!initial_handle->initialized) + /* No need for coherency, it is not initialized */ + return; + + struct starpu_data_descr descr[nparts]; + for (i = 0; i < nparts; i++) + { + STARPU_ASSERT_MSG(children[i]->father_handle == initial_handle, "child(%d) %p is partitioned from %p and not from the given parameter %p", i, children[i], children[i]->father_handle, initial_handle); + descr[i].handle = children[i]; + descr[i].mode = STARPU_W; + } + /* TODO: assert nparts too */ + int ret; + if (handles_sequential_consistency) + ret = starpu_task_insert(initial_handle->switch_cl, STARPU_RW, initial_handle, STARPU_DATA_MODE_ARRAY, descr, nparts, + STARPU_NAME, "partition", + STARPU_HANDLES_SEQUENTIAL_CONSISTENCY, handles_sequential_consistency, + 0); + else + ret = starpu_task_insert(initial_handle->switch_cl, STARPU_RW, initial_handle, STARPU_DATA_MODE_ARRAY, descr, nparts, + STARPU_NAME, "partition", + 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + if (!handles_sequential_consistency || handles_sequential_consistency[0]) + _starpu_data_invalidate_submit_noplan(initial_handle); +} + +void starpu_data_partition_submit_sequential_consistency(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, int sequential_consistency) +{ + unsigned i; + unsigned char handles_sequential_consistency[nparts+1]; + handles_sequential_consistency[0] = sequential_consistency; + for(i=1 ; isequential_consistency; + + _starpu_data_partition_submit(initial_handle, nparts, children, handles_sequential_consistency); +} + +void starpu_data_partition_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children) +{ + _starpu_data_partition_submit(initial_handle, nparts, children, NULL); +} + +void starpu_data_partition_readonly_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children) +{ + starpu_data_partition_readonly_submit_sequential_consistency(initial_handle, nparts, children, initial_handle->sequential_consistency); +} + +void starpu_data_partition_readonly_submit_sequential_consistency(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, int sequential_consistency) +{ + unsigned i; + STARPU_ASSERT_MSG(initial_handle->sequential_consistency, "partition planning is currently only supported for data with sequential consistency"); + _starpu_spin_lock(&initial_handle->header_lock); + STARPU_ASSERT_MSG(initial_handle->partitioned == 0 || initial_handle->part_readonly, "One can't submit a readonly partition planning at the same time as a readwrite partition planning"); + STARPU_ASSERT_MSG(nparts > 0, "One can't partition into 0 parts"); + initial_handle->partitioned++; + initial_handle->part_readonly = 1; + if (initial_handle->nactive_readonly_children < initial_handle->partitioned) + { + _STARPU_REALLOC(initial_handle->active_readonly_children, initial_handle->partitioned * sizeof(initial_handle->active_readonly_children[0])); + _STARPU_REALLOC(initial_handle->active_readonly_nchildren, initial_handle->partitioned * sizeof(initial_handle->active_readonly_nchildren[0])); + initial_handle->nactive_readonly_children = initial_handle->partitioned; + } + initial_handle->active_readonly_children[initial_handle->partitioned-1] = children[0]->siblings; + initial_handle->active_readonly_nchildren[initial_handle->partitioned-1] = children[0]->nsiblings; + _starpu_spin_unlock(&initial_handle->header_lock); + + for (i = 0; i < nparts; i++) + { + _starpu_spin_lock(&children[i]->header_lock); + children[i]->active = 1; + children[i]->active_ro = 1; + _starpu_spin_unlock(&children[i]->header_lock); + } + + STARPU_ASSERT_MSG(initial_handle->initialized || initial_handle->init_cl, "It is odd to read-only-partition a data which does not have a value yet"); + struct starpu_data_descr descr[nparts]; + char handles_sequential_consistency[nparts+1]; + handles_sequential_consistency[0] = sequential_consistency; + + for (i = 0; i < nparts; i++) + { + STARPU_ASSERT_MSG(children[i]->father_handle == initial_handle, "child(%d) %p is partitioned from %p and not from the given parameter %p", i, children[i], children[i]->father_handle, initial_handle); + descr[i].handle = children[i]; + descr[i].mode = STARPU_W; + handles_sequential_consistency[i+1] = (char) children[i]->sequential_consistency; + } + /* TODO: assert nparts too */ + int ret = starpu_task_insert(initial_handle->switch_cl, STARPU_R, initial_handle, + STARPU_DATA_MODE_ARRAY, descr, nparts, + STARPU_HANDLES_SEQUENTIAL_CONSISTENCY, handles_sequential_consistency, + 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); +} + +void starpu_data_partition_readwrite_upgrade_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children) +{ + STARPU_ASSERT_MSG(initial_handle->sequential_consistency, "partition planning is currently only supported for data with sequential consistency"); + _starpu_spin_lock(&initial_handle->header_lock); + STARPU_ASSERT_MSG(initial_handle->partitioned == 1, "One can't upgrade a readonly partition planning to readwrite while other readonly partition plannings are active"); + STARPU_ASSERT_MSG(initial_handle->part_readonly == 1, "One can only upgrade a readonly partition planning"); + STARPU_ASSERT_MSG(nparts > 0, "One can't partition into 0 parts"); + initial_handle->part_readonly = 0; + initial_handle->active_nchildren = initial_handle->active_readonly_nchildren[0]; + initial_handle->active_children = initial_handle->active_readonly_children[0]; + initial_handle->active_readonly_children[0] = NULL; + initial_handle->active_readonly_nchildren[0] = 0; + _starpu_spin_unlock(&initial_handle->header_lock); + + unsigned i; + struct starpu_data_descr descr[nparts]; + for (i = 0; i < nparts; i++) + { + STARPU_ASSERT_MSG(children[i]->father_handle == initial_handle, "child(%d) %p is partitioned from %p and not from the given parameter %p", i, children[i], children[i]->father_handle, initial_handle); + children[i]->active_ro = 0; + descr[i].handle = children[i]; + descr[i].mode = STARPU_W; + } + /* TODO: assert nparts too */ + int ret = starpu_task_insert(initial_handle->switch_cl, STARPU_RW, initial_handle, STARPU_DATA_MODE_ARRAY, descr, nparts, 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + _starpu_data_invalidate_submit_noplan(initial_handle); +} + +void starpu_data_partition_readonly_downgrade_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children) +{ + unsigned i; + STARPU_ASSERT_MSG(initial_handle->sequential_consistency, "partition planning is currently only supported for data with sequential consistency"); + _starpu_spin_lock(&initial_handle->header_lock); + STARPU_ASSERT_MSG(initial_handle->partitioned == 1, "One can't downgrade a read-write partition planning to read-only while other partition plannings are active"); + STARPU_ASSERT_MSG(initial_handle->part_readonly == 0, "Partition is already read-only"); + STARPU_ASSERT_MSG(nparts > 0, "One can't partition into 0 parts"); + initial_handle->part_readonly = 1; + if (initial_handle->nactive_readonly_children < initial_handle->partitioned) + { + _STARPU_REALLOC(initial_handle->active_readonly_children, initial_handle->partitioned * sizeof(initial_handle->active_readonly_children[0])); + _STARPU_REALLOC(initial_handle->active_readonly_nchildren, initial_handle->partitioned * sizeof(initial_handle->active_readonly_nchildren[0])); + initial_handle->nactive_readonly_children = initial_handle->partitioned; + } + initial_handle->active_readonly_children[initial_handle->partitioned-1] = children[0]->siblings; + initial_handle->active_readonly_nchildren[initial_handle->partitioned-1] = children[0]->nsiblings; + initial_handle->active_children = NULL; + initial_handle->active_nchildren = 0; + _starpu_spin_unlock(&initial_handle->header_lock); + + for (i = 0; i < nparts; i++) + { + _starpu_spin_lock(&children[i]->header_lock); + children[i]->active = 1; + children[i]->active_ro = 1; + _starpu_spin_unlock(&children[i]->header_lock); + } + + struct starpu_data_descr descr[nparts]; + unsigned n; + for (i = 0, n = 0; i < nparts; i++) + { + STARPU_ASSERT_MSG(children[i]->father_handle == initial_handle, "child(%d) %p is partitioned from %p and not from the given parameter %p", i, children[i], children[i]->father_handle, initial_handle); + if (!children[i]->initialized) + /* Dropped value, do not care about coherency for this one */ + continue; + descr[n].handle = children[i]; + descr[n].mode = STARPU_R; + n++; + } + /* TODO: assert nparts too */ + int ret = starpu_task_insert(initial_handle->switch_cl, initial_handle->initialized?STARPU_RW:STARPU_W, initial_handle, + STARPU_DATA_MODE_ARRAY, descr, n, + ///STARPU_HANDLES_SEQUENTIAL_CONSISTENCY, handles_sequential_consistency, + 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); +} + +void _starpu_data_unpartition_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, int gather_node, unsigned char *handles_sequential_consistency, void (*callback_func)(void *), void *callback_arg) +{ + unsigned i; + STARPU_ASSERT_MSG(initial_handle->sequential_consistency, "partition planning is currently only supported for data with sequential consistency"); + STARPU_ASSERT_MSG(gather_node == initial_handle->home_node || gather_node == -1, "gathering node different from home node is currently not supported"); + _starpu_spin_lock(&initial_handle->header_lock); + STARPU_ASSERT_MSG(initial_handle->partitioned >= 1, "No partition planning is active for handle %p", initial_handle); + STARPU_ASSERT_MSG(nparts > 0, "One can't partition into 0 parts"); + if (initial_handle->part_readonly) + { + /* Replace this children set with the last set in the list of readonly children sets */ + for (i = 0; i < initial_handle->partitioned-1; i++) + { + if (initial_handle->active_readonly_children[i] == children[0]->siblings) + { + initial_handle->active_readonly_children[i] = initial_handle->active_readonly_children[initial_handle->partitioned-1]; + initial_handle->active_readonly_nchildren[i] = initial_handle->active_readonly_nchildren[initial_handle->partitioned-1]; + initial_handle->active_readonly_children[initial_handle->partitioned-1] = NULL; + initial_handle->active_readonly_nchildren[initial_handle->partitioned-1] = 0; + break; + } + } + } + else + { + initial_handle->active_nchildren = 0; + initial_handle->active_children = NULL; + } + initial_handle->partitioned--; + if (!initial_handle->partitioned) + initial_handle->part_readonly = 0; + initial_handle->active_nchildren = 0; + initial_handle->active_children = NULL; + _starpu_spin_unlock(&initial_handle->header_lock); + + for (i = 0; i < nparts; i++) + { + _starpu_spin_lock(&children[i]->header_lock); + children[i]->active = 0; + children[i]->active_ro = 0; + _starpu_spin_unlock(&children[i]->header_lock); + } + + unsigned n; + struct starpu_data_descr descr[nparts]; + for (i = 0, n = 0; i < nparts; i++) + { + STARPU_ASSERT_MSG(children[i]->father_handle == initial_handle, "child(%d) %p is partitioned from %p and not from the given parameter %p", i, children[i], children[i]->father_handle, initial_handle); + if (!children[i]->initialized) + /* Dropped value, do not care about coherency for this one */ + continue; + descr[n].handle = children[i]; + descr[n].mode = STARPU_RW; + n++; + } + /* TODO: assert nparts too */ + int ret; + if (handles_sequential_consistency) + ret = starpu_task_insert(initial_handle->switch_cl, STARPU_W, initial_handle, STARPU_DATA_MODE_ARRAY, descr, n, + STARPU_NAME, "unpartition", + STARPU_HANDLES_SEQUENTIAL_CONSISTENCY, handles_sequential_consistency, + STARPU_CALLBACK_WITH_ARG_NFREE, callback_func, callback_arg, + 0); + else + ret = starpu_task_insert(initial_handle->switch_cl, STARPU_W, initial_handle, STARPU_DATA_MODE_ARRAY, descr, n, + STARPU_NAME, "unpartition", + STARPU_CALLBACK_WITH_ARG_NFREE, callback_func, callback_arg, + 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + + for (i = 0; i < nparts; i++) + { + if (!handles_sequential_consistency || handles_sequential_consistency[i+1]) + _starpu_data_invalidate_submit_noplan(children[i]); + } +} + +void starpu_data_unpartition_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, int gather_node) +{ + _starpu_data_unpartition_submit(initial_handle, nparts, children, gather_node, NULL, NULL, NULL); +} + +void starpu_data_unpartition_submit_sequential_consistency_cb(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, int gather_node, int sequential_consistency, void (*callback_func)(void *), void *callback_arg) +{ + unsigned i; + unsigned char handles_sequential_consistency[nparts+1]; + handles_sequential_consistency[0] = sequential_consistency; + for(i=1 ; isequential_consistency; + _starpu_data_unpartition_submit(initial_handle, nparts, children, gather_node, handles_sequential_consistency, callback_func, callback_arg); +} + +void starpu_data_unpartition_submit_sequential_consistency(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, int gather_node, int sequential_consistency) +{ + unsigned i; + unsigned char handles_sequential_consistency[nparts+1]; + handles_sequential_consistency[0] = sequential_consistency; + for(i=1 ; isequential_consistency; + _starpu_data_unpartition_submit(initial_handle, nparts, children, gather_node, handles_sequential_consistency, NULL, NULL); +} + +void starpu_data_unpartition_readonly_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, int gather_node) +{ + STARPU_ASSERT_MSG(initial_handle->sequential_consistency, "partition planning is currently only supported for data with sequential consistency"); + STARPU_ASSERT_MSG(gather_node == initial_handle->home_node || gather_node == -1, "gathering node different from home node is currently not supported"); + _starpu_spin_lock(&initial_handle->header_lock); + STARPU_ASSERT_MSG(initial_handle->partitioned >= 1, "No partition planning is active for handle %p", initial_handle); + STARPU_ASSERT_MSG(nparts > 0, "One can't partition into 0 parts"); + initial_handle->part_readonly = 1; + _starpu_spin_unlock(&initial_handle->header_lock); + + unsigned i, n; + struct starpu_data_descr descr[nparts]; + for (i = 0, n = 0; i < nparts; i++) + { + STARPU_ASSERT_MSG(children[i]->father_handle == initial_handle, "child(%d) %p is partitioned from %p and not from the given parameter %p", i, children[i], children[i]->father_handle, initial_handle); + if (!children[i]->initialized) + /* Dropped value, do not care about coherency for this one */ + continue; + descr[n].handle = children[i]; + descr[n].mode = STARPU_R; + n++; + } + /* TODO: assert nparts too */ + int ret = starpu_task_insert(initial_handle->switch_cl, STARPU_W, initial_handle, STARPU_DATA_MODE_ARRAY, descr, n, 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert") +} + +/* Unpartition everything below ancestor */ +static void starpu_data_unpartition_submit_r(starpu_data_handle_t ancestor, int gathering_node) +{ + unsigned i, j, nsiblings; + if (!ancestor->partitioned) + /* It's already unpartitioned */ + return; + _STARPU_DEBUG("ancestor %p needs unpartitioning\n", ancestor); + if (ancestor->part_readonly) + { + unsigned n = ancestor->partitioned; + /* Uh, has to go through all read-only partitions */ + for (i = 0; i < n; i++) + { + /* Note: active_readonly_children is emptied by starpu_data_unpartition_submit_r calls below, that's why we always refer to [0] here */ + starpu_data_handle_t *children = ancestor->active_readonly_children[0]; + _STARPU_DEBUG("unpartition readonly children %p etc.\n", children[0]); + nsiblings = children[0]->nsiblings; + for (j = 0; j < nsiblings; j++) + { + /* Make sure our children are unpartitioned */ + starpu_data_unpartition_submit_r(children[j], gathering_node); + } + /* And unpartition them */ + starpu_data_unpartition_submit(ancestor, nsiblings, children, gathering_node); + } + } + else + { + _STARPU_DEBUG("unpartition children %p\n", ancestor->active_children); + /* Only one partition */ + nsiblings = ancestor->active_children[0]->nsiblings; + for (i = 0; i < nsiblings; i++) + starpu_data_unpartition_submit_r(ancestor->active_children[i], gathering_node); + /* And unpartition ourself */ + starpu_data_unpartition_submit(ancestor, nsiblings, ancestor->active_children, gathering_node); + } +} + +/* Make ancestor partition itself properly for target */ +static void _starpu_data_partition_access_look_up(starpu_data_handle_t ancestor, starpu_data_handle_t target, int write) +{ + /* First make sure ancestor has proper state, if not, ask father */ + if (!ancestor->active || (write && ancestor->active_ro)) + { + /* (The root is always active-rw) */ + STARPU_ASSERT(ancestor->father_handle); + _STARPU_DEBUG("ancestor %p is not ready: %s, asking father %p\n", ancestor, ancestor->active ? ancestor->active_ro ? "RO" : "RW" : "NONE", ancestor->father_handle); + _starpu_data_partition_access_look_up(ancestor->father_handle, ancestor, write); + _STARPU_DEBUG("ancestor %p is now ready\n", ancestor); + } + else + _STARPU_DEBUG("ancestor %p was ready\n", ancestor); + + /* We shouldn't be called for nothing */ + STARPU_ASSERT(!ancestor->partitioned || !target || ancestor->active_children != target->siblings || (ancestor->part_readonly && write)); + + /* Then unpartition ancestor if needed */ + if (ancestor->partitioned && + /* Not the right children, unpartition ourself */ + ((target && write && ancestor->active_children != target->siblings) || + (target && !write && !ancestor->part_readonly) || + /* We are partitioned and we want to write or some child + * is writing and we want to read, unpartition ourself*/ + (!target && (write || !ancestor->part_readonly)))) + { +#ifdef STARPU_DEVEL +#warning FIXME: better choose gathering node +#endif + starpu_data_unpartition_submit_r(ancestor, ancestor->home_node); + } + + if (!target) + { + _STARPU_DEBUG("ancestor %p is done\n", ancestor); + /* No child target, nothing more to do actually. */ + return; + } + + /* Then partition ancestor towards target, if needed */ + if (ancestor->partitioned) + { + /* That must be readonly, otherwise we would have unpartitioned it */ + STARPU_ASSERT(ancestor->part_readonly); + if (write) + { + _STARPU_DEBUG("ancestor %p is already partitioned RO, turn RW\n", ancestor); + /* Already partitioned, normally it's already for the target */ + STARPU_ASSERT(ancestor->active_children == target->siblings); + /* And we are here just because we haven't partitioned rw */ + STARPU_ASSERT(ancestor->part_readonly && write); + /* So we just need to upgrade ro to rw */ + starpu_data_partition_readwrite_upgrade_submit(ancestor, target->nsiblings, target->siblings); + } + else + { + _STARPU_DEBUG("ancestor %p is already partitioned RO, but not to target, partition towards target too\n", ancestor); + /* So we just need to upgrade ro to rw */ + starpu_data_partition_readonly_submit(ancestor, target->nsiblings, target->siblings); + } + } + else + { + /* Just need to partition properly for the child */ + if (write) + { + _STARPU_DEBUG("partition ancestor %p RW\n", ancestor); + starpu_data_partition_submit(ancestor, target->nsiblings, target->siblings); + } + else + { + _STARPU_DEBUG("partition ancestor %p RO\n", ancestor); + starpu_data_partition_readonly_submit(ancestor, target->nsiblings, target->siblings); + } + } +} + +void _starpu_data_partition_access_submit(starpu_data_handle_t target, int write) +{ + _STARPU_DEBUG("accessing %p %s\n", target, write ? "RW" : "RO"); + _starpu_data_partition_access_look_up(target, NULL, write); +} + +void starpu_filter_nparts_compute_chunk_size_and_offset(unsigned n, unsigned nparts, + size_t elemsize, unsigned id, + unsigned blocksize, unsigned *chunk_size, + size_t *offset) +{ + *chunk_size = n/nparts; + unsigned remainder = n % nparts; + if (id < remainder) + (*chunk_size)++; + /* + * Computing the total offset. The formula may not be really clear, but + * it really just is: + * + * total = 0; + * for (i = 0; i < id; i++) + * { + * total += n/nparts; + * if (i < n%nparts) + * total++; + * } + * offset = total * elemsize * blocksize; + */ + if (offset != NULL) + *offset = (id *(n/nparts) + STARPU_MIN(remainder, id)) * (size_t) blocksize * elemsize; +} diff --git a/src/datawizard/filters.h b/src/datawizard/filters.h new file mode 100644 index 0000000..6d8aa97 --- /dev/null +++ b/src/datawizard/filters.h @@ -0,0 +1,36 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __FILTERS_H__ +#define __FILTERS_H__ + +/** @file */ + +#include +#include +#include + +#include +#include + +#pragma GCC visibility push(hidden) + +/** submit asynchronous unpartitioning / partitioning to make target active read-only or read-write */ +void _starpu_data_partition_access_submit(starpu_data_handle_t target, int write); + +#pragma GCC visibility pop + +#endif diff --git a/src/datawizard/footprint.c b/src/datawizard/footprint.c new file mode 100644 index 0000000..8dddc98 --- /dev/null +++ b/src/datawizard/footprint.c @@ -0,0 +1,122 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Thibaut Lambert + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include + +uint32_t starpu_task_data_footprint(struct starpu_task *task) +{ + uint32_t footprint = 0; + unsigned buffer; + unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task); + + for (buffer = 0; buffer < nbuffers; buffer++) + { + starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, buffer); + enum starpu_data_access_mode mode = STARPU_TASK_GET_MODE(task, buffer); + + if (mode & STARPU_NOFOOTPRINT) + continue; + + uint32_t handle_footprint = _starpu_data_get_footprint(handle); + + footprint = starpu_hash_crc32c_be(handle_footprint, footprint); + } + + return footprint; +} + +uint32_t _starpu_compute_buffers_footprint(struct starpu_perfmodel *model, struct starpu_perfmodel_arch* arch, unsigned nimpl, struct _starpu_job *j) +{ + if (j->footprint_is_computed) + return j->footprint; + + uint32_t footprint = 0; + + struct starpu_task *task = j->task; + + if (model) + { + if (model->footprint) + { + footprint = model->footprint(task); + } + else + { + struct starpu_perfmodel_per_arch *per_arch; + if (arch) + per_arch = starpu_perfmodel_get_model_per_arch(model, arch, nimpl); + if (arch && per_arch != NULL && per_arch->size_base) + { + size_t size = per_arch->size_base(task, arch, nimpl); + footprint = starpu_hash_crc32c_be_n(&size, sizeof(size), footprint); + } + else if (model->size_base) + { + size_t size = model->size_base(task, nimpl); + footprint = starpu_hash_crc32c_be_n(&size, sizeof(size), footprint); + } + else + { + footprint = starpu_task_data_footprint(task); + } + } + } + else + { + footprint = starpu_task_data_footprint(task); + } + + j->footprint = footprint; + j->footprint_is_computed = 1; + + return footprint; +} + +uint32_t _starpu_compute_data_footprint(starpu_data_handle_t handle) +{ + uint32_t interfaceid = (uint32_t)starpu_data_get_interface_id(handle); + uint32_t init = interfaceid < STARPU_MAX_INTERFACE_ID ? interfaceid : 0; + + STARPU_ASSERT(handle->ops->footprint); + + uint32_t handle_footprint = handle->ops->footprint(handle); + + return starpu_hash_crc32c_be(handle_footprint, init); +} + +uint32_t _starpu_compute_data_alloc_footprint(starpu_data_handle_t handle) +{ + uint32_t interfaceid = (uint32_t)starpu_data_get_interface_id(handle); + uint32_t init = interfaceid < STARPU_MAX_INTERFACE_ID ? interfaceid : 0; + + uint32_t handle_footprint; + if (handle->ops->alloc_footprint) + handle_footprint = handle->ops->alloc_footprint(handle); + else + handle_footprint = handle->ops->footprint(handle); + + return starpu_hash_crc32c_be(handle_footprint, init); +} + +uint32_t starpu_task_footprint(struct starpu_perfmodel *model, struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl) +{ + struct _starpu_job *j = _starpu_get_job_associated_to_task(task); + return _starpu_compute_buffers_footprint(model, arch, nimpl, j); +} diff --git a/src/datawizard/footprint.h b/src/datawizard/footprint.h new file mode 100644 index 0000000..eab7609 --- /dev/null +++ b/src/datawizard/footprint.h @@ -0,0 +1,41 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Thibaut Lambert + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __FOOTPRINT_H__ +#define __FOOTPRINT_H__ + +/** @file */ + +#include +#include +#include + +#pragma GCC visibility push(hidden) + +/** Compute the footprint that characterizes the job and cache it into the job + * structure. */ +uint32_t _starpu_compute_buffers_footprint(struct starpu_perfmodel *model, struct starpu_perfmodel_arch * arch, unsigned nimpl, struct _starpu_job *j); + +/** Compute the footprint that characterizes the layout of the data handle. */ +uint32_t _starpu_compute_data_footprint(starpu_data_handle_t handle); + +/** Compute the footprint that characterizes the allocation of the data handle. */ +uint32_t _starpu_compute_data_alloc_footprint(starpu_data_handle_t handle); + +#pragma GCC visibility pop + +#endif // __FOOTPRINT_H__ diff --git a/src/datawizard/interfaces/bcsr_filters.c b/src/datawizard/interfaces/bcsr_filters.c new file mode 100644 index 0000000..d346b3d --- /dev/null +++ b/src/datawizard/interfaces/bcsr_filters.c @@ -0,0 +1,105 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2010-2010 Mehdi Juhoor + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include + +void starpu_bcsr_filter_vertical_block(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned id, STARPU_ATTRIBUTE_UNUSED unsigned nparts) +{ + struct starpu_bcsr_interface *bcsr_father = (struct starpu_bcsr_interface *) father_interface; + struct starpu_bcsr_interface *bcsr_child = (struct starpu_bcsr_interface *) child_interface; + + size_t elemsize = bcsr_father->elemsize; + uint32_t firstentry = bcsr_father->firstentry; + uint32_t r = bcsr_father->r; + uint32_t c = bcsr_father->c; + uint32_t *ram_rowptr = bcsr_father->ram_rowptr; + uint32_t *rowptr = bcsr_father->rowptr; + + unsigned child_nrow; + size_t child_rowoffset; + + STARPU_ASSERT_MSG(bcsr_father->id == STARPU_BCSR_INTERFACE_ID, "%s can only be applied on a bcsr data", __func__); + + bcsr_child->id = bcsr_father->id; + + starpu_filter_nparts_compute_chunk_size_and_offset(bcsr_father->nrow, nparts, 1, id, 1, &child_nrow, &child_rowoffset); + + /* child blocks indexes between these (0-based) */ + uint32_t start_block = ram_rowptr[child_rowoffset] - firstentry; + uint32_t end_block = ram_rowptr[child_rowoffset + child_nrow] - firstentry; + + bcsr_child->nnz = end_block - start_block; + bcsr_child->nrow = child_nrow; + + bcsr_child->firstentry = firstentry + start_block; + bcsr_child->r = bcsr_father->r; + bcsr_child->c = bcsr_father->c; + bcsr_child->elemsize = elemsize; + bcsr_child->ram_colind = bcsr_father->ram_colind + start_block; + bcsr_child->ram_rowptr = ram_rowptr + child_rowoffset; + + if (bcsr_father->nzval) + { + bcsr_child->nzval = bcsr_father->nzval + start_block * r*c * elemsize; + bcsr_child->colind = bcsr_father->colind + start_block; + bcsr_child->rowptr = rowptr + child_rowoffset; + } +} + +void starpu_bcsr_filter_canonical_block(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned id, STARPU_ATTRIBUTE_UNUSED unsigned nparts) +{ + struct starpu_bcsr_interface *bcsr_father = (struct starpu_bcsr_interface *) father_interface; + /* each chunk becomes a small dense matrix */ + struct starpu_matrix_interface *matrix_child = (struct starpu_matrix_interface *) child_interface; + + size_t elemsize = bcsr_father->elemsize; + uint32_t firstentry = bcsr_father->firstentry; + + /* size of the tiles */ + uint32_t r = bcsr_father->r; + uint32_t c = bcsr_father->c; + + uint32_t ptr_offset = c*r*id*elemsize; + + STARPU_ASSERT_MSG(bcsr_father->id == STARPU_BCSR_INTERFACE_ID, "%s can only be applied on a bcsr data", __func__); + + matrix_child->id = STARPU_MATRIX_INTERFACE_ID; + matrix_child->nx = c; + matrix_child->ny = r; + matrix_child->ld = c; + matrix_child->elemsize = elemsize; + matrix_child->allocsize = c*r*elemsize; + + if (bcsr_father->nzval) + { + uint8_t *nzval = (uint8_t *)(bcsr_father->nzval); + matrix_child->dev_handle = matrix_child->ptr = (uintptr_t)&nzval[firstentry + ptr_offset]; + matrix_child->offset = 0; + } +} + +unsigned starpu_bcsr_filter_canonical_block_get_nchildren(STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, starpu_data_handle_t handle) +{ + return (unsigned)starpu_bcsr_get_nnz(handle); +} + +struct starpu_data_interface_ops *starpu_bcsr_filter_canonical_block_child_ops(STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, STARPU_ATTRIBUTE_UNUSED unsigned child) +{ + return &starpu_interface_matrix_ops; +} diff --git a/src/datawizard/interfaces/bcsr_interface.c b/src/datawizard/interfaces/bcsr_interface.c new file mode 100644 index 0000000..8b56edc --- /dev/null +++ b/src/datawizard/interfaces/bcsr_interface.c @@ -0,0 +1,499 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#ifdef BUILDING_STARPU +#include +#endif + +/* + * BCSR : blocked CSR, we use blocks of size (r x c) + */ + +static int copy_any_to_any(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *async_data); + +static const struct starpu_data_copy_methods bcsr_copy_data_methods_s = +{ + .any_to_any = copy_any_to_any, +}; + +static void register_bcsr_handle(starpu_data_handle_t handle, int home_node, void *data_interface); +static void *bcsr_to_pointer(void *data_interface, unsigned node); +static starpu_ssize_t allocate_bcsr_buffer_on_node(void *data_interface, unsigned dst_node); +static void free_bcsr_buffer_on_node(void *data_interface, unsigned node); +static size_t bcsr_interface_get_size(starpu_data_handle_t handle); +static int bcsr_compare(void *data_interface_a, void *data_interface_b); +static uint32_t footprint_bcsr_interface_crc32(starpu_data_handle_t handle); +static starpu_ssize_t describe(void *data_interface, char *buf, size_t size); +static int pack_data(starpu_data_handle_t handle, unsigned node, void **ptr, starpu_ssize_t *count); +static int peek_data(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count); +static int unpack_data(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count); + +struct starpu_data_interface_ops starpu_interface_bcsr_ops = +{ + .register_data_handle = register_bcsr_handle, + .allocate_data_on_node = allocate_bcsr_buffer_on_node, + .free_data_on_node = free_bcsr_buffer_on_node, + .copy_methods = &bcsr_copy_data_methods_s, + .get_size = bcsr_interface_get_size, + .interfaceid = STARPU_BCSR_INTERFACE_ID, + .interface_size = sizeof(struct starpu_bcsr_interface), + .footprint = footprint_bcsr_interface_crc32, + .compare = bcsr_compare, + .describe = describe, + .to_pointer = bcsr_to_pointer, + .name = "STARPU_BCSR_INTERFACE", + .pack_data = pack_data, + .peek_data = peek_data, + .unpack_data = unpack_data, + .pack_meta = NULL, + .unpack_meta = NULL, + .free_meta = NULL +}; + +static void *bcsr_to_pointer(void *data_interface, unsigned node) +{ + (void) node; + struct starpu_bcsr_interface *bcsr_interface = data_interface; + + return (void*) bcsr_interface->nzval; +} + +static void register_bcsr_handle(starpu_data_handle_t handle, int home_node, void *data_interface) +{ + struct starpu_bcsr_interface *bcsr_interface = (struct starpu_bcsr_interface *) data_interface; + + int node; + uint32_t *ram_colind = NULL; + uint32_t *ram_rowptr = NULL; + + if (home_node >= 0 && starpu_node_get_kind(home_node) == STARPU_CPU_RAM) + { + ram_colind = bcsr_interface->colind; + ram_rowptr = bcsr_interface->rowptr; + } + + for (node = 0; node < STARPU_MAXNODES; node++) + { + struct starpu_bcsr_interface *local_interface = (struct starpu_bcsr_interface *) + starpu_data_get_interface_on_node(handle, node); + + if (node == home_node) + { + local_interface->nzval = bcsr_interface->nzval; + local_interface->colind = bcsr_interface->colind; + local_interface->rowptr = bcsr_interface->rowptr; + } + else + { + local_interface->nzval = 0; + local_interface->colind = NULL; + local_interface->rowptr = NULL; + } + + local_interface->ram_colind = ram_colind; + local_interface->ram_rowptr = ram_rowptr; + local_interface->id = bcsr_interface->id; + local_interface->nnz = bcsr_interface->nnz; + local_interface->nrow = bcsr_interface->nrow; + local_interface->firstentry = bcsr_interface->firstentry; + local_interface->r = bcsr_interface->r; + local_interface->c = bcsr_interface->c; + local_interface->elemsize = bcsr_interface->elemsize; + } +} + +void starpu_bcsr_data_register(starpu_data_handle_t *handleptr, int home_node, + uint32_t nnz, uint32_t nrow, uintptr_t nzval, uint32_t *colind, + uint32_t *rowptr, uint32_t firstentry, + uint32_t r, uint32_t c, size_t elemsize) +{ + struct starpu_bcsr_interface bcsr_interface = + { + .id = STARPU_BCSR_INTERFACE_ID, + .nzval = nzval, + .colind = colind, + .rowptr = rowptr, + .nnz = nnz, + .nrow = nrow, + .firstentry = firstentry, + .r = r, + .c = c, + .elemsize = elemsize + }; +#ifndef STARPU_SIMGRID + if (home_node >= 0 && starpu_node_get_kind(home_node) == STARPU_CPU_RAM) + { + if (nnz) + { + if (r && c && elemsize) + { + STARPU_ASSERT_ACCESSIBLE(nzval); + STARPU_ASSERT_ACCESSIBLE(nzval + nnz*elemsize*r*c - 1); + } + STARPU_ASSERT_ACCESSIBLE(colind); + STARPU_ASSERT_ACCESSIBLE((uintptr_t) colind + nnz*sizeof(uint32_t) - 1); + } + STARPU_ASSERT_ACCESSIBLE(rowptr); + STARPU_ASSERT_ACCESSIBLE((uintptr_t) rowptr + (nrow+1)*sizeof(uint32_t) - 1); + } +#endif + + starpu_data_register(handleptr, home_node, &bcsr_interface, &starpu_interface_bcsr_ops); +} + +static uint32_t footprint_bcsr_interface_crc32(starpu_data_handle_t handle) +{ + uint32_t hash; + + hash = starpu_hash_crc32c_be(starpu_bcsr_get_nnz(handle), 0); + hash = starpu_hash_crc32c_be(starpu_bcsr_get_c(handle), hash); + hash = starpu_hash_crc32c_be(starpu_bcsr_get_r(handle), hash); + + return hash; +} + +static int bcsr_compare(void *data_interface_a, void *data_interface_b) +{ + struct starpu_bcsr_interface *bcsr_a = (struct starpu_bcsr_interface *) data_interface_a; + struct starpu_bcsr_interface *bcsr_b = (struct starpu_bcsr_interface *) data_interface_b; + + /* Two matrices are considered compatible if they have the same size */ + return (bcsr_a->nnz == bcsr_b->nnz) + && (bcsr_a->nrow == bcsr_b->nrow) + && (bcsr_a->r == bcsr_b->r) + && (bcsr_a->c == bcsr_b->c) + && (bcsr_a->elemsize == bcsr_b->elemsize); +} + +/* offer an access to the data parameters */ +uint32_t starpu_bcsr_get_nnz(starpu_data_handle_t handle) +{ + struct starpu_bcsr_interface *data_interface = (struct starpu_bcsr_interface *) + starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + +#ifdef STARPU_DEBUG + STARPU_ASSERT_MSG(data_interface->id == STARPU_BCSR_INTERFACE_ID, "Error. The given data is not a bcsr."); +#endif + + return data_interface->nnz; +} + +uint32_t starpu_bcsr_get_nrow(starpu_data_handle_t handle) +{ + struct starpu_bcsr_interface *data_interface = (struct starpu_bcsr_interface *) + starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + +#ifdef STARPU_DEBUG + STARPU_ASSERT_MSG(data_interface->id == STARPU_BCSR_INTERFACE_ID, "Error. The given data is not a bcsr."); +#endif + + return data_interface->nrow; +} + +uint32_t starpu_bcsr_get_firstentry(starpu_data_handle_t handle) +{ + struct starpu_bcsr_interface *data_interface = (struct starpu_bcsr_interface *) + starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + +#ifdef STARPU_DEBUG + STARPU_ASSERT_MSG(data_interface->id == STARPU_BCSR_INTERFACE_ID, "Error. The given data is not a bcsr."); +#endif + + return data_interface->firstentry; +} + +uint32_t starpu_bcsr_get_r(starpu_data_handle_t handle) +{ + struct starpu_bcsr_interface *data_interface = (struct starpu_bcsr_interface *) + starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + +#ifdef STARPU_DEBUG + STARPU_ASSERT_MSG(data_interface->id == STARPU_BCSR_INTERFACE_ID, "Error. The given data is not a bcsr."); +#endif + + return data_interface->r; +} + +uint32_t starpu_bcsr_get_c(starpu_data_handle_t handle) +{ + struct starpu_bcsr_interface *data_interface = (struct starpu_bcsr_interface *) + starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + +#ifdef STARPU_DEBUG + STARPU_ASSERT_MSG(data_interface->id == STARPU_BCSR_INTERFACE_ID, "Error. The given data is not a bcsr."); +#endif + + return data_interface->c; +} + +size_t starpu_bcsr_get_elemsize(starpu_data_handle_t handle) +{ + struct starpu_bcsr_interface *data_interface = (struct starpu_bcsr_interface *) + starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + +#ifdef STARPU_DEBUG + STARPU_ASSERT_MSG(data_interface->id == STARPU_BCSR_INTERFACE_ID, "Error. The given data is not a bcsr."); +#endif + + return data_interface->elemsize; +} + +uintptr_t starpu_bcsr_get_local_nzval(starpu_data_handle_t handle) +{ + unsigned node; + node = starpu_worker_get_local_memory_node(); + + STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); + + struct starpu_bcsr_interface *data_interface = (struct starpu_bcsr_interface *) + starpu_data_get_interface_on_node(handle, node); + +#ifdef STARPU_DEBUG + STARPU_ASSERT_MSG(data_interface->id == STARPU_BCSR_INTERFACE_ID, "Error. The given data is not a bcsr."); +#endif + + return data_interface->nzval; +} + +uint32_t *starpu_bcsr_get_local_colind(starpu_data_handle_t handle) +{ + int node; + node = starpu_worker_get_local_memory_node(); + + /* XXX 0 */ + struct starpu_bcsr_interface *data_interface = (struct starpu_bcsr_interface *) + starpu_data_get_interface_on_node(handle, node); + +#ifdef STARPU_DEBUG + STARPU_ASSERT_MSG(data_interface->id == STARPU_BCSR_INTERFACE_ID, "Error. The given data is not a bcsr."); +#endif + + return data_interface->colind; +} + +uint32_t *starpu_bcsr_get_local_rowptr(starpu_data_handle_t handle) +{ + int node; + node = starpu_worker_get_local_memory_node(); + + /* XXX 0 */ + struct starpu_bcsr_interface *data_interface = (struct starpu_bcsr_interface *) + starpu_data_get_interface_on_node(handle, node); + +#ifdef STARPU_DEBUG + STARPU_ASSERT_MSG(data_interface->id == STARPU_BCSR_INTERFACE_ID, "Error. The given data is not a bcsr."); +#endif + + return data_interface->rowptr; +} + +static size_t bcsr_interface_get_size(starpu_data_handle_t handle) +{ + size_t size; + + uint32_t nnz = starpu_bcsr_get_nnz(handle); + uint32_t nrow = starpu_bcsr_get_nrow(handle); + uint32_t r = starpu_bcsr_get_r(handle); + uint32_t c = starpu_bcsr_get_c(handle); + size_t elemsize = starpu_bcsr_get_elemsize(handle); + + size = nnz*r*c*elemsize + nnz*sizeof(uint32_t) + (nrow+1)*sizeof(uint32_t); + + return size; +} + + +/* memory allocation/deallocation primitives for the BLAS interface */ + +/* returns the size of the allocated area */ +static starpu_ssize_t allocate_bcsr_buffer_on_node(void *data_interface_, unsigned dst_node) +{ + uintptr_t addr_nzval, addr_colind, addr_rowptr; + starpu_ssize_t allocated_memory; + + /* we need the 3 arrays to be allocated */ + struct starpu_bcsr_interface *bcsr_interface = (struct starpu_bcsr_interface *) data_interface_; + + uint32_t nnz = bcsr_interface->nnz; + uint32_t nrow = bcsr_interface->nrow; + size_t elemsize = bcsr_interface->elemsize; + + uint32_t r = bcsr_interface->r; + uint32_t c = bcsr_interface->c; + + STARPU_ASSERT_MSG(r && c, "partitioning bcsr with several memory nodes is not supported yet"); + + if (nnz) + { + addr_nzval = starpu_malloc_on_node(dst_node, nnz*r*c*elemsize); + if (!addr_nzval) + goto fail_nzval; + addr_colind = starpu_malloc_on_node(dst_node, nnz*sizeof(uint32_t)); + if (!addr_colind) + goto fail_colind; + } + else + { + addr_nzval = addr_colind = 0; + } + addr_rowptr = starpu_malloc_on_node(dst_node, (nrow+1)*sizeof(uint32_t)); + if (!addr_rowptr) + goto fail_rowptr; + + /* allocation succeeded */ + allocated_memory = + nnz*r*c*elemsize + nnz*sizeof(uint32_t) + (nrow+1)*sizeof(uint32_t); + + /* update the data properly in consequence */ + bcsr_interface->nzval = addr_nzval; + bcsr_interface->colind = (uint32_t*) addr_colind; + bcsr_interface->rowptr = (uint32_t*) addr_rowptr; + + return allocated_memory; + +fail_rowptr: + if (nnz) + starpu_free_on_node(dst_node, addr_colind, nnz*sizeof(uint32_t)); +fail_colind: + if (nnz) + starpu_free_on_node(dst_node, addr_nzval, nnz*r*c*elemsize); +fail_nzval: + /* allocation failed */ + return -ENOMEM; +} + +static void free_bcsr_buffer_on_node(void *data_interface, unsigned node) +{ + struct starpu_bcsr_interface *bcsr_interface = (struct starpu_bcsr_interface *) data_interface; + uint32_t nnz = bcsr_interface->nnz; + uint32_t nrow = bcsr_interface->nrow; + size_t elemsize = bcsr_interface->elemsize; + uint32_t r = bcsr_interface->r; + uint32_t c = bcsr_interface->c; + + if (nnz) + { + starpu_free_on_node(node, bcsr_interface->nzval, nnz*r*c*elemsize); + bcsr_interface->nzval = 0; + starpu_free_on_node(node, (uintptr_t) bcsr_interface->colind, nnz*sizeof(uint32_t)); + bcsr_interface->colind = NULL; + } + starpu_free_on_node(node, (uintptr_t) bcsr_interface->rowptr, (nrow+1)*sizeof(uint32_t)); + bcsr_interface->rowptr = NULL; +} + +static int copy_any_to_any(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *async_data) +{ + struct starpu_bcsr_interface *src_bcsr = (struct starpu_bcsr_interface *) src_interface; + struct starpu_bcsr_interface *dst_bcsr = (struct starpu_bcsr_interface *) dst_interface; + + uint32_t nnz = src_bcsr->nnz; + uint32_t nrow = src_bcsr->nrow; + size_t elemsize = src_bcsr->elemsize; + + uint32_t r = src_bcsr->r; + uint32_t c = src_bcsr->c; + + int ret = 0; + + if (nnz) + { + if (starpu_interface_copy(src_bcsr->nzval, 0, src_node, dst_bcsr->nzval, 0, dst_node, nnz*elemsize*r*c, async_data)) + ret = -EAGAIN; + + if (starpu_interface_copy((uintptr_t)src_bcsr->colind, 0, src_node, (uintptr_t)dst_bcsr->colind, 0, dst_node, nnz*sizeof(uint32_t), async_data)) + ret = -EAGAIN; + } + + if (starpu_interface_copy((uintptr_t)src_bcsr->rowptr, 0, src_node, (uintptr_t)dst_bcsr->rowptr, 0, dst_node, (nrow+1)*sizeof(uint32_t), async_data)) + ret = -EAGAIN; + + starpu_interface_data_copy(src_node, dst_node, nnz*elemsize*r*c + (nnz+nrow+1)*sizeof(uint32_t)); + + return ret; +} + +static starpu_ssize_t describe(void *data_interface, char *buf, size_t size) +{ + struct starpu_bcsr_interface *bcsr = (struct starpu_bcsr_interface *) data_interface; + return snprintf(buf, size, "b%ux%ux%ux%ux%u", + (unsigned) bcsr->nnz, + (unsigned) bcsr->nrow, + (unsigned) bcsr->r, + (unsigned) bcsr->c, + (unsigned) bcsr->elemsize); +} + +static int pack_data(starpu_data_handle_t handle, unsigned node, void **ptr, starpu_ssize_t *count) +{ + STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); + + struct starpu_bcsr_interface *bcsr = (struct starpu_bcsr_interface *) starpu_data_get_interface_on_node(handle, node); + + // We first pack colind + *count = bcsr->nnz * sizeof(bcsr->colind[0]); + // Then rowptr + *count += (bcsr->nrow + 1) * sizeof(bcsr->rowptr[0]); + // Then nnzval + *count += bcsr->r * bcsr->c * bcsr->nnz * bcsr->elemsize; + + if (ptr != NULL) + { + *ptr = (void *)starpu_malloc_on_node_flags(node, *count, 0); + char *tmp = *ptr; + if (bcsr->nnz) + { + memcpy(tmp, (void*)bcsr->colind, bcsr->nnz * sizeof(bcsr->colind[0])); + tmp += bcsr->nnz * sizeof(bcsr->colind[0]); + memcpy(tmp, (void*)bcsr->rowptr, (bcsr->nrow + 1) * sizeof(bcsr->rowptr[0])); + tmp += (bcsr->nrow + 1) * sizeof(bcsr->rowptr[0]); + } + memcpy(tmp, (void*)bcsr->nzval, bcsr->r * bcsr->c * bcsr->nnz * bcsr->elemsize); + } + + return 0; +} + +static int peek_data(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count) +{ + STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); + + struct starpu_bcsr_interface *bcsr = (struct starpu_bcsr_interface *) starpu_data_get_interface_on_node(handle, node); + + STARPU_ASSERT(count == (bcsr->nnz * sizeof(bcsr->colind[0]))+((bcsr->nrow + 1) * sizeof(bcsr->rowptr[0]))+(bcsr->r * bcsr->c * bcsr->nnz * bcsr->elemsize)); + + char *tmp = ptr; + if (bcsr->nnz) + { + memcpy((void*)bcsr->colind, tmp, bcsr->nnz * sizeof(bcsr->colind[0])); + tmp += bcsr->nnz * sizeof(bcsr->colind[0]); + memcpy((void*)bcsr->rowptr, tmp, (bcsr->nrow + 1) * sizeof(bcsr->rowptr[0])); + tmp += (bcsr->nrow + 1) * sizeof(bcsr->rowptr[0]); + } + memcpy((void*)bcsr->nzval, tmp, bcsr->r * bcsr->c * bcsr->nnz * bcsr->elemsize); + + return 0; +} + +static int unpack_data(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count) +{ + peek_data(handle, node, ptr, count); + starpu_free_on_node_flags(node, (uintptr_t)ptr, count, 0); + + return 0; +} + diff --git a/src/datawizard/interfaces/block_filters.c b/src/datawizard/interfaces/block_filters.c new file mode 100644 index 0000000..6ebe873 --- /dev/null +++ b/src/datawizard/interfaces/block_filters.c @@ -0,0 +1,299 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include + +static void _starpu_block_filter_block(int dim, void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, + unsigned id, unsigned nparts, uintptr_t shadow_size) +{ + struct starpu_block_interface *block_father = (struct starpu_block_interface *) father_interface; + struct starpu_block_interface *block_child = (struct starpu_block_interface *) child_interface; + + unsigned blocksize; + /* the element will be split, in case horizontal, it's nx, in case vertical, it's ny, in case depth, it's nz*/ + uint32_t nn; + uint32_t nx; + uint32_t ny; + uint32_t nz; + + switch(dim) + { + /* horizontal*/ + case 1: + /* actual number of elements */ + nx = block_father->nx - 2 * shadow_size; + ny = block_father->ny; + nz = block_father->nz; + nn = nx; + blocksize = 1; + break; + /* vertical*/ + case 2: + nx = block_father->nx; + /* actual number of elements */ + ny = block_father->ny - 2 * shadow_size; + nz = block_father->nz; + nn = ny; + blocksize = block_father->ldy; + break; + /* depth*/ + case 3: + nx = block_father->nx; + ny = block_father->ny; + /* actual number of elements */ + nz = block_father->nz - 2 * shadow_size; + nn = nz; + blocksize = block_father->ldz; + break; + default: + STARPU_ASSERT_MSG(0, "Unknown value for dim"); + } + + size_t elemsize = block_father->elemsize; + + STARPU_ASSERT_MSG(nparts <= nn, "cannot split %u elements in %u parts", nn, nparts); + + uint32_t child_nn; + size_t offset; + starpu_filter_nparts_compute_chunk_size_and_offset(nn, nparts, elemsize, id, blocksize, &child_nn, &offset); + + child_nn += 2 * shadow_size; + + STARPU_ASSERT_MSG(block_father->id == STARPU_BLOCK_INTERFACE_ID, "%s can only be applied on a block data", __func__); + block_child->id = block_father->id; + + switch(dim) + { + case 1: + block_child->nx = child_nn; + block_child->ny = ny; + block_child->nz = nz; + break; + case 2: + block_child->nx = nx; + block_child->ny = child_nn; + block_child->nz = nz; + break; + case 3: + block_child->nx = nx; + block_child->ny = ny; + block_child->nz = child_nn; + break; + } + + block_child->elemsize = elemsize; + + if (block_father->dev_handle) + { + if (block_father->ptr) + block_child->ptr = block_father->ptr + offset; + block_child->ldy = block_father->ldy; + block_child->ldz = block_father->ldz; + block_child->dev_handle = block_father->dev_handle; + block_child->offset = block_father->offset + offset; + } +} + +void starpu_block_filter_block(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, + unsigned id, unsigned nparts) +{ + _starpu_block_filter_block(1, father_interface, child_interface, f, id, nparts, 0); +} + +void starpu_block_filter_block_shadow(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, + unsigned id, unsigned nparts) +{ + uintptr_t shadow_size = (uintptr_t) f->filter_arg_ptr; + + _starpu_block_filter_block(1, father_interface, child_interface, f, id, nparts, shadow_size); +} + +void starpu_block_filter_vertical_block(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, + unsigned id, unsigned nparts) +{ + _starpu_block_filter_block(2, father_interface, child_interface, f, id, nparts, 0); +} + +void starpu_block_filter_vertical_block_shadow(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, + unsigned id, unsigned nparts) +{ + uintptr_t shadow_size = (uintptr_t) f->filter_arg_ptr; + + _starpu_block_filter_block(2, father_interface, child_interface, f, id, nparts, shadow_size); +} + +void starpu_block_filter_depth_block(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, + unsigned id, unsigned nparts) +{ + _starpu_block_filter_block(3, father_interface, child_interface, f, id, nparts, 0); +} + +void starpu_block_filter_depth_block_shadow(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, + unsigned id, unsigned nparts) +{ + uintptr_t shadow_size = (uintptr_t) f->filter_arg_ptr; + + _starpu_block_filter_block(3, father_interface, child_interface, f, id, nparts, shadow_size); +} + +static void _starpu_block_filter_pick_matrix(int dim, void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, + unsigned id, unsigned nparts) +{ + struct starpu_block_interface *block_father = (struct starpu_block_interface *) father_interface; + struct starpu_matrix_interface *matrix_child = (struct starpu_matrix_interface *) child_interface; + + unsigned blocksize; + + uint32_t nn; + uint32_t nx = block_father->nx; + uint32_t ny = block_father->ny; + uint32_t nz = block_father->nz; + + switch(dim) + { + /* along y-axis */ + case 1: + nn = ny; + blocksize = block_father->ldy; + break; + /* along z-axis */ + case 2: + nn = nz; + blocksize = block_father->ldz; + break; + default: + STARPU_ASSERT_MSG(0, "Unknown value for dim"); + } + + size_t elemsize = block_father->elemsize; + + size_t chunk_pos = (size_t)f->filter_arg_ptr; + + STARPU_ASSERT_MSG(nparts <= nn, "cannot get %u matrix", nparts); + STARPU_ASSERT_MSG((chunk_pos + id) < nn, "the chosen matrix should be in the block"); + + size_t offset = (chunk_pos + id) * blocksize * elemsize; + + STARPU_ASSERT_MSG(block_father->id == STARPU_BLOCK_INTERFACE_ID, "%s can only be applied on a block data", __func__); + matrix_child->id = STARPU_MATRIX_INTERFACE_ID; + + switch(dim) + { + /* along y-axis */ + case 1: + matrix_child->nx = nx; + matrix_child->ny = nz; + break; + /* along z-axis */ + case 2: + matrix_child->nx = nx; + matrix_child->ny = ny; + break; + default: + STARPU_ASSERT_MSG(0, "Unknown value for dim"); + } + + matrix_child->elemsize = elemsize; + matrix_child->allocsize = matrix_child->nx * matrix_child->ny * elemsize; + + if (block_father->dev_handle) + { + if (block_father->ptr) + matrix_child->ptr = block_father->ptr + offset; + switch(dim) + { + /* along y-axis */ + case 1: + matrix_child->ld = block_father->ldz; + break; + /* along z-axis */ + case 2: + matrix_child->ld = block_father->ldy; + break; + default: + STARPU_ASSERT_MSG(0, "Unknown value for dim"); + } + matrix_child->dev_handle = block_father->dev_handle; + matrix_child->offset = block_father->offset + offset; + } +} + +void starpu_block_filter_pick_matrix_z(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, + unsigned id, unsigned nparts) +{ + _starpu_block_filter_pick_matrix(2, father_interface, child_interface, f, id, nparts); +} + +void starpu_block_filter_pick_matrix_y(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, + unsigned id, unsigned nparts) +{ + _starpu_block_filter_pick_matrix(1, father_interface, child_interface, f, id, nparts); +} + +struct starpu_data_interface_ops *starpu_block_filter_pick_matrix_child_ops(STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, STARPU_ATTRIBUTE_UNUSED unsigned child) +{ + return &starpu_interface_matrix_ops; +} + + +void starpu_block_filter_pick_variable(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, STARPU_ATTRIBUTE_UNUSED unsigned id, STARPU_ATTRIBUTE_UNUSED unsigned nchunks) +{ + struct starpu_block_interface *block_father = (struct starpu_block_interface *) father_interface; + /* each chunk becomes a variable */ + struct starpu_variable_interface *variable_child = (struct starpu_variable_interface *) child_interface; + + uint32_t nx = block_father->nx; + uint32_t ny = block_father->ny; + uint32_t nz = block_father->nz; + + unsigned ldy = block_father->ldy; + unsigned ldz = block_father->ldz; + + size_t elemsize = block_father->elemsize; + + uint32_t* chunk_pos = (uint32_t*)f->filter_arg_ptr; + // int i; + // for(i=0; i<3; i++) + // { + // printf("pos is %d\n", chunk_pos[i]); + // } + + STARPU_ASSERT_MSG((chunk_pos[0] < nx)&&(chunk_pos[1] < ny)&&(chunk_pos[2] < nz), "the chosen variable should be in the block"); + + size_t offset = (chunk_pos[2] * ldz + chunk_pos[1] * ldy + chunk_pos[0]) * elemsize; + + STARPU_ASSERT_MSG(block_father->id == STARPU_BLOCK_INTERFACE_ID, "%s can only be applied on a block data", __func__); + + /* update the child's interface */ + variable_child->id = STARPU_VARIABLE_INTERFACE_ID; + variable_child->elemsize = elemsize; + + /* is the information on this node valid ? */ + if (block_father->dev_handle) + { + if (block_father->ptr) + variable_child->ptr = block_father->ptr + offset; + variable_child->dev_handle = block_father->dev_handle; + variable_child->offset = block_father->offset + offset; + } +} + +struct starpu_data_interface_ops *starpu_block_filter_pick_variable_child_ops(STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, STARPU_ATTRIBUTE_UNUSED unsigned child) +{ + return &starpu_interface_variable_ops; +} diff --git a/src/datawizard/interfaces/block_interface.c b/src/datawizard/interfaces/block_interface.c new file mode 100644 index 0000000..df44855 --- /dev/null +++ b/src/datawizard/interfaces/block_interface.c @@ -0,0 +1,555 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#ifdef BUILDING_STARPU +#include +#endif + +static int copy_any_to_any(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *async_data); +static int map_block(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node); +static int unmap_block(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node); +static int update_map_block(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node); + +static const struct starpu_data_copy_methods block_copy_data_methods_s = +{ + .any_to_any = copy_any_to_any, +}; + + +static void register_block_handle(starpu_data_handle_t handle, int home_node, void *data_interface); +static void *block_to_pointer(void *data_interface, unsigned node); +static starpu_ssize_t allocate_block_buffer_on_node(void *data_interface_, unsigned dst_node); +static void free_block_buffer_on_node(void *data_interface, unsigned node); +static size_t block_interface_get_size(starpu_data_handle_t handle); +static uint32_t footprint_block_interface_crc32(starpu_data_handle_t handle); +static int block_compare(void *data_interface_a, void *data_interface_b); +static void display_block_interface(starpu_data_handle_t handle, FILE *f); +static int pack_block_handle(starpu_data_handle_t handle, unsigned node, void **ptr, starpu_ssize_t *count); +static int peek_block_handle(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count); +static int unpack_block_handle(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count); +static starpu_ssize_t describe(void *data_interface, char *buf, size_t size); + +struct starpu_data_interface_ops starpu_interface_block_ops = +{ + .register_data_handle = register_block_handle, + .allocate_data_on_node = allocate_block_buffer_on_node, + .to_pointer = block_to_pointer, + .free_data_on_node = free_block_buffer_on_node, + .map_data = map_block, + .unmap_data = unmap_block, + .update_map = update_map_block, + .copy_methods = &block_copy_data_methods_s, + .get_size = block_interface_get_size, + .footprint = footprint_block_interface_crc32, + .compare = block_compare, + .interfaceid = STARPU_BLOCK_INTERFACE_ID, + .interface_size = sizeof(struct starpu_block_interface), + .display = display_block_interface, + .pack_data = pack_block_handle, + .peek_data = peek_block_handle, + .unpack_data = unpack_block_handle, + .describe = describe, + .name = "STARPU_BLOCK_INTERFACE", + .pack_meta = NULL, + .unpack_meta = NULL, + .free_meta = NULL +}; + +static void *block_to_pointer(void *data_interface, unsigned node) +{ + (void) node; + struct starpu_block_interface *block_interface = data_interface; + + return (void*) block_interface->ptr; +} + +static void register_block_handle(starpu_data_handle_t handle, int home_node, void *data_interface) +{ + struct starpu_block_interface *block_interface = (struct starpu_block_interface *) data_interface; + + int node; + for (node = 0; node < STARPU_MAXNODES; node++) + { + struct starpu_block_interface *local_interface = (struct starpu_block_interface *) + starpu_data_get_interface_on_node(handle, node); + + if (node == home_node) + { + local_interface->ptr = block_interface->ptr; + local_interface->dev_handle = block_interface->dev_handle; + local_interface->offset = block_interface->offset; + local_interface->ldy = block_interface->ldy; + local_interface->ldz = block_interface->ldz; + } + else + { + local_interface->ptr = 0; + local_interface->dev_handle = 0; + local_interface->offset = 0; + local_interface->ldy = 0; + local_interface->ldz = 0; + } + + local_interface->id = block_interface->id; + local_interface->nx = block_interface->nx; + local_interface->ny = block_interface->ny; + local_interface->nz = block_interface->nz; + local_interface->elemsize = block_interface->elemsize; + } +} + +/* declare a new data with the BLAS interface */ +void starpu_block_data_register(starpu_data_handle_t *handleptr, int home_node, + uintptr_t ptr, uint32_t ldy, uint32_t ldz, uint32_t nx, + uint32_t ny, uint32_t nz, size_t elemsize) +{ + STARPU_ASSERT_MSG(ldy >= nx, "ldy = %u should not be less than nx = %u.", ldy, nx); + STARPU_ASSERT_MSG(ldz/ldy >= ny, "ldz/ldy = %u/%u = %u should not be less than ny = %u.", ldz, ldy, ldz/ldy, ny); + struct starpu_block_interface block_interface = + { + .id = STARPU_BLOCK_INTERFACE_ID, + .ptr = ptr, + .dev_handle = ptr, + .offset = 0, + .ldy = ldy, + .ldz = ldz, + .nx = nx, + .ny = ny, + .nz = nz, + .elemsize = elemsize + }; +#ifndef STARPU_SIMGRID + if (home_node >= 0 && starpu_node_get_kind(home_node) == STARPU_CPU_RAM) + { + if (nx && ny && nz && elemsize) + { + STARPU_ASSERT_ACCESSIBLE(ptr); + STARPU_ASSERT_ACCESSIBLE(ptr + (nz-1)*ldz*elemsize + (ny-1)*ldy*elemsize + nx*elemsize - 1); + } + } +#endif + + starpu_data_register(handleptr, home_node, &block_interface, &starpu_interface_block_ops); +} + +void starpu_block_ptr_register(starpu_data_handle_t handle, unsigned node, + uintptr_t ptr, uintptr_t dev_handle, size_t offset, uint32_t ldy, uint32_t ldz) +{ + struct starpu_block_interface *block_interface = starpu_data_get_interface_on_node(handle, node); + starpu_data_ptr_register(handle, node); + block_interface->ptr = ptr; + block_interface->dev_handle = dev_handle; + block_interface->offset = offset; + block_interface->ldy = ldy; + block_interface->ldz = ldz; +} + +static uint32_t footprint_block_interface_crc32(starpu_data_handle_t handle) +{ + uint32_t hash; + + hash = starpu_hash_crc32c_be(starpu_block_get_nx(handle), 0); + hash = starpu_hash_crc32c_be(starpu_block_get_ny(handle), hash); + hash = starpu_hash_crc32c_be(starpu_block_get_nz(handle), hash); + + return hash; +} + +static int block_compare(void *data_interface_a, void *data_interface_b) +{ + struct starpu_block_interface *block_a = (struct starpu_block_interface *) data_interface_a; + struct starpu_block_interface *block_b = (struct starpu_block_interface *) data_interface_b; + + /* Two blocks are considered compatible if they have the same size */ + return (block_a->nx == block_b->nx) + && (block_a->ny == block_b->ny) + && (block_a->nz == block_b->nz) + && (block_a->elemsize == block_b->elemsize); +} + +static void display_block_interface(starpu_data_handle_t handle, FILE *f) +{ + struct starpu_block_interface *block_interface; + + block_interface = (struct starpu_block_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + + fprintf(f, "%u\t%u\t%u\t", block_interface->nx, block_interface->ny, block_interface->nz); +} + +#define IS_CONTIGUOUS_MATRIX(nx, ny, ldy) ((nx) == (ldy)) +#define IS_CONTIGUOUS_BLOCK(nx, ny, nz, ldy, ldz) ((nx) * (ny) == (ldz)) + +static int pack_block_handle(starpu_data_handle_t handle, unsigned node, void **ptr, starpu_ssize_t *count) +{ + STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); + + struct starpu_block_interface *block_interface = (struct starpu_block_interface *) + starpu_data_get_interface_on_node(handle, node); + + uint32_t ldy = block_interface->ldy; + uint32_t ldz = block_interface->ldz; + uint32_t nx = block_interface->nx; + uint32_t ny = block_interface->ny; + uint32_t nz = block_interface->nz; + size_t elemsize = block_interface->elemsize; + + *count = nx*ny*nz*elemsize; + + if (ptr != NULL) + { + uint32_t z, y; + char *block = (void *)block_interface->ptr; + + *ptr = (void *)starpu_malloc_on_node_flags(node, *count, 0); + + char *cur = *ptr; + + if (IS_CONTIGUOUS_BLOCK(nx, ny, nz, ldy, ldz)) + memcpy(cur, block, nx * ny * nz * elemsize); + else + { + char *block_z = block; + for(z=0 ; zldy; + uint32_t ldz = block_interface->ldz; + uint32_t nx = block_interface->nx; + uint32_t ny = block_interface->ny; + uint32_t nz = block_interface->nz; + size_t elemsize = block_interface->elemsize; + + STARPU_ASSERT(count == elemsize * nx * ny * nz); + + uint32_t z, y; + char *cur = ptr; + char *block = (void *)block_interface->ptr; + + if (IS_CONTIGUOUS_BLOCK(nx, ny, nz, ldy, ldz)) + memcpy(block, cur, nx * ny * nz * elemsize); + else + { + char *block_z = block; + for(z=0 ; zid == STARPU_BLOCK_INTERFACE_ID, "Error. The given data is not a block."); +#endif + + size = block_interface->nx*block_interface->ny*block_interface->nz*block_interface->elemsize; + + return size; +} + +/* offer an access to the data parameters */ +uint32_t starpu_block_get_nx(starpu_data_handle_t handle) +{ + struct starpu_block_interface *block_interface = (struct starpu_block_interface *) + starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + +#ifdef STARPU_DEBUG + STARPU_ASSERT_MSG(block_interface->id == STARPU_BLOCK_INTERFACE_ID, "Error. The given data is not a block."); +#endif + + return block_interface->nx; +} + +uint32_t starpu_block_get_ny(starpu_data_handle_t handle) +{ + struct starpu_block_interface *block_interface = (struct starpu_block_interface *) + starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + +#ifdef STARPU_DEBUG + STARPU_ASSERT_MSG(block_interface->id == STARPU_BLOCK_INTERFACE_ID, "Error. The given data is not a block."); +#endif + + return block_interface->ny; +} + +uint32_t starpu_block_get_nz(starpu_data_handle_t handle) +{ + struct starpu_block_interface *block_interface = (struct starpu_block_interface *) + starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + +#ifdef STARPU_DEBUG + STARPU_ASSERT_MSG(block_interface->id == STARPU_BLOCK_INTERFACE_ID, "Error. The given data is not a block."); +#endif + + return block_interface->nz; +} + +uint32_t starpu_block_get_local_ldy(starpu_data_handle_t handle) +{ + unsigned node; + node = starpu_worker_get_local_memory_node(); + + STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); + + struct starpu_block_interface *block_interface = (struct starpu_block_interface *) + starpu_data_get_interface_on_node(handle, node); + +#ifdef STARPU_DEBUG + STARPU_ASSERT_MSG(block_interface->id == STARPU_BLOCK_INTERFACE_ID, "Error. The given data is not a block."); +#endif + + return block_interface->ldy; +} + +uint32_t starpu_block_get_local_ldz(starpu_data_handle_t handle) +{ + unsigned node; + node = starpu_worker_get_local_memory_node(); + + STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); + + struct starpu_block_interface *block_interface = (struct starpu_block_interface *) + starpu_data_get_interface_on_node(handle, node); + +#ifdef STARPU_DEBUG + STARPU_ASSERT_MSG(block_interface->id == STARPU_BLOCK_INTERFACE_ID, "Error. The given data is not a block."); +#endif + + return block_interface->ldz; +} + +uintptr_t starpu_block_get_local_ptr(starpu_data_handle_t handle) +{ + unsigned node; + node = starpu_worker_get_local_memory_node(); + + STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); + + struct starpu_block_interface *block_interface = (struct starpu_block_interface *) + starpu_data_get_interface_on_node(handle, node); + +#ifdef STARPU_DEBUG + STARPU_ASSERT_MSG(block_interface->id == STARPU_BLOCK_INTERFACE_ID, "Error. The given data is not a block."); +#endif + + return block_interface->ptr; +} + +size_t starpu_block_get_elemsize(starpu_data_handle_t handle) +{ + struct starpu_block_interface *block_interface = (struct starpu_block_interface *) + starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + +#ifdef STARPU_DEBUG + STARPU_ASSERT_MSG(block_interface->id == STARPU_BLOCK_INTERFACE_ID, "Error. The given data is not a block."); +#endif + + return block_interface->elemsize; +} + + +/* memory allocation/deallocation primitives for the BLOCK interface */ + +/* returns the size of the allocated area */ +static starpu_ssize_t allocate_block_buffer_on_node(void *data_interface_, unsigned dst_node) +{ + uintptr_t addr = 0, handle; + + struct starpu_block_interface *dst_block = (struct starpu_block_interface *) data_interface_; + + uint32_t nx = dst_block->nx; + uint32_t ny = dst_block->ny; + uint32_t nz = dst_block->nz; + size_t elemsize = dst_block->elemsize; + + starpu_ssize_t allocated_memory; + + handle = starpu_malloc_on_node(dst_node, nx*ny*nz*elemsize); + + if (!handle) + return -ENOMEM; + + if (starpu_node_get_kind(dst_node) != STARPU_OPENCL_RAM) + addr = handle; + + allocated_memory = nx*ny*nz*elemsize; + + /* update the data properly in consequence */ + dst_block->ptr = addr; + dst_block->dev_handle = handle; + dst_block->offset = 0; + dst_block->ldy = nx; + dst_block->ldz = nx*ny; + + return allocated_memory; +} + +static void free_block_buffer_on_node(void *data_interface, unsigned node) +{ + struct starpu_block_interface *block_interface = (struct starpu_block_interface *) data_interface; + uint32_t nx = block_interface->nx; + uint32_t ny = block_interface->ny; + uint32_t nz = block_interface->nz; + size_t elemsize = block_interface->elemsize; + + starpu_free_on_node(node, block_interface->dev_handle, nx*ny*nz*elemsize); + block_interface->ptr = 0; + block_interface->dev_handle = 0; +} + +static int map_block(void *src_interface, unsigned src_node, + void *dst_interface, unsigned dst_node) +{ + struct starpu_block_interface *src_block = src_interface; + struct starpu_block_interface *dst_block = dst_interface; + int ret; + uintptr_t mapped; + /* map area ldz*(nz-1)+ldy*(ny-1)+nx */ + mapped = starpu_interface_map(src_block->dev_handle, src_block->offset, src_node, dst_node, (src_block->ldz*(src_block->nz-1)+src_block->ldy*(src_block->ny-1)+src_block->nx)*src_block->elemsize, &ret); + if (mapped) + { + dst_block->dev_handle = mapped; + dst_block->offset = 0; + if (starpu_node_get_kind(dst_node) != STARPU_OPENCL_RAM) + dst_block->ptr = mapped; + dst_block->ldy = src_block->ldy; + dst_block->ldz = src_block->ldz; + return 0; + } + return ret; +} + +static int unmap_block(void *src_interface, unsigned src_node, + void *dst_interface, unsigned dst_node) +{ + struct starpu_block_interface *src_block = src_interface; + struct starpu_block_interface *dst_block = dst_interface; + + int ret = starpu_interface_unmap(src_block->dev_handle, src_block->offset, src_node, dst_block->dev_handle, dst_node, (src_block->ldz*(src_block->nz-1)+src_block->ldy*(src_block->ny-1)+src_block->nx)*src_block->elemsize); + dst_block->dev_handle = 0; + + return ret; +} + +static int update_map_block(void *src_interface, unsigned src_node, + void *dst_interface, unsigned dst_node) +{ + struct starpu_block_interface *src_block = src_interface; + struct starpu_block_interface *dst_block = dst_interface; + + return starpu_interface_update_map(src_block->dev_handle, src_block->offset, src_node, dst_block->dev_handle, dst_block->offset, dst_node, (src_block->ldz*(src_block->nz-1)+src_block->ldy*(src_block->ny-1)+src_block->nx)*src_block->elemsize); +} + +static int copy_any_to_any(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *async_data) +{ + struct starpu_block_interface *src_block = (struct starpu_block_interface *) src_interface; + struct starpu_block_interface *dst_block = (struct starpu_block_interface *) dst_interface; + int ret = 0; + + uint32_t nx = dst_block->nx; + uint32_t ny = dst_block->ny; + uint32_t nz = dst_block->nz; + size_t elemsize = dst_block->elemsize; + + uint32_t ldy_src = src_block->ldy; + uint32_t ldz_src = src_block->ldz; + uint32_t ldy_dst = dst_block->ldy; + uint32_t ldz_dst = dst_block->ldz; + + if (starpu_interface_copy3d(src_block->dev_handle, src_block->offset, src_node, + dst_block->dev_handle, dst_block->offset, dst_node, + nx * elemsize, + ny, ldy_src * elemsize, ldy_dst * elemsize, + nz, ldz_src * elemsize, ldz_dst * elemsize, + async_data)) + ret = -EAGAIN; + + starpu_interface_data_copy(src_node, dst_node, nx*ny*nz*elemsize); + + return ret; +} + +static starpu_ssize_t describe(void *data_interface, char *buf, size_t size) +{ + struct starpu_block_interface *block = (struct starpu_block_interface *) data_interface; + return snprintf(buf, size, "B%ux%ux%ux%u", + (unsigned) block->nx, + (unsigned) block->ny, + (unsigned) block->nz, + (unsigned) block->elemsize); +} diff --git a/src/datawizard/interfaces/coo_interface.c b/src/datawizard/interfaces/coo_interface.c new file mode 100644 index 0000000..ad8e1d1 --- /dev/null +++ b/src/datawizard/interfaces/coo_interface.c @@ -0,0 +1,258 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#ifdef BUILDING_STARPU +#include +#endif + +static int +copy_any_to_any(void *src_interface, unsigned src_node, + void *dst_interface, unsigned dst_node, void *async_data) +{ + size_t size = 0; + struct starpu_coo_interface *src_coo, *dst_coo; + int ret = 0; + + src_coo = (struct starpu_coo_interface *) src_interface; + dst_coo = (struct starpu_coo_interface *) dst_interface; + + size = src_coo->n_values * sizeof(src_coo->columns[0]); + if (starpu_interface_copy( + (uintptr_t) src_coo->columns, 0, src_node, + (uintptr_t) dst_coo->columns, 0, dst_node, + size, async_data)) + ret = -EAGAIN; + + /* sizeof(src_coo->columns[0]) == sizeof(src_coo->rows[0]) */ + if (starpu_interface_copy( + (uintptr_t) src_coo->rows, 0, src_node, + (uintptr_t) dst_coo->rows, 0, dst_node, + size, async_data)) + ret = -EAGAIN; + + size = src_coo->n_values * src_coo->elemsize; + if (starpu_interface_copy( + src_coo->values, 0, src_node, + dst_coo->values, 0, dst_node, + size, async_data)) + ret = -EAGAIN; + + starpu_interface_data_copy(src_node, dst_node, + src_coo->n_values * + (2 * sizeof(src_coo->rows[0]) + src_coo->elemsize)); + + return ret; +} + +static const struct starpu_data_copy_methods coo_copy_data_methods = +{ + .any_to_any = copy_any_to_any, +}; + +static void +register_coo_handle(starpu_data_handle_t handle, int home_node, + void *data_interface) +{ + struct starpu_coo_interface *coo_interface = + (struct starpu_coo_interface *) data_interface; + + int node; + for (node = 0; node < STARPU_MAXNODES; node++) + { + struct starpu_coo_interface *local_interface; + local_interface = (struct starpu_coo_interface *) + starpu_data_get_interface_on_node(handle, node); + + if (node == home_node) + { + local_interface->values = coo_interface->values; + local_interface->columns = coo_interface->columns; + local_interface->rows = coo_interface->rows; + } + else + { + local_interface->values = 0; + local_interface->columns = 0; + local_interface->rows = 0; + } + + local_interface->id = coo_interface->id; + local_interface->nx = coo_interface->nx; + local_interface->ny = coo_interface->ny; + local_interface->n_values = coo_interface->n_values; + local_interface->elemsize = coo_interface->elemsize; + } +} + +static starpu_ssize_t +allocate_coo_buffer_on_node(void *data_interface, unsigned dst_node) +{ + uint32_t *addr_columns; + uint32_t *addr_rows; + uintptr_t addr_values; + + struct starpu_coo_interface *coo_interface = + (struct starpu_coo_interface *) data_interface; + + uint32_t n_values = coo_interface->n_values; + size_t elemsize = coo_interface->elemsize; + + addr_columns = (void*) starpu_malloc_on_node(dst_node, n_values * sizeof(coo_interface->columns[0])); + if (STARPU_UNLIKELY(addr_columns == NULL)) + goto fail_columns; + addr_rows = (void*) starpu_malloc_on_node(dst_node, n_values * sizeof(coo_interface->rows[0])); + if (STARPU_UNLIKELY(addr_rows == NULL)) + goto fail_rows; + addr_values = starpu_malloc_on_node(dst_node, n_values * elemsize); + if (STARPU_UNLIKELY(addr_values == (uintptr_t) NULL)) + goto fail_values; + + coo_interface->columns = addr_columns; + coo_interface->rows = addr_rows; + coo_interface->values = addr_values; + + return n_values * (sizeof(coo_interface->columns[0]) + sizeof(coo_interface->rows[0]) + elemsize); + +fail_values: + starpu_free_on_node(dst_node, (uintptr_t) addr_rows, n_values * sizeof(coo_interface->rows[0])); +fail_rows: + starpu_free_on_node(dst_node, (uintptr_t) addr_columns, n_values * sizeof(coo_interface->columns[0])); +fail_columns: + return -ENOMEM; +} + +static void +free_coo_buffer_on_node(void *data_interface, unsigned node) +{ + struct starpu_coo_interface *coo_interface = (struct starpu_coo_interface *) data_interface; + uint32_t n_values = coo_interface->n_values; + size_t elemsize = coo_interface->elemsize; + + starpu_free_on_node(node, (uintptr_t) coo_interface->columns, n_values * sizeof(coo_interface->columns[0])); + coo_interface->columns = NULL; + starpu_free_on_node(node, (uintptr_t) coo_interface->rows, n_values * sizeof(coo_interface->rows[0])); + coo_interface->rows = NULL; + starpu_free_on_node(node, coo_interface->values, n_values * elemsize); + coo_interface->values = 0; +} + +static size_t +coo_interface_get_size(starpu_data_handle_t handle) +{ + struct starpu_coo_interface *coo_interface; + coo_interface = (struct starpu_coo_interface *) + starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + + return coo_interface->nx * coo_interface->ny * coo_interface->elemsize; +} + +static uint32_t +coo_interface_footprint(starpu_data_handle_t handle) +{ + struct starpu_coo_interface *coo_interface; + coo_interface = (struct starpu_coo_interface *) + starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + + return starpu_hash_crc32c_be(coo_interface->nx * coo_interface->ny, 0); +} + +static int +coo_compare(void *a, void *b) +{ + struct starpu_coo_interface *coo_a, *coo_b; + + coo_a = (struct starpu_coo_interface *) a; + coo_b = (struct starpu_coo_interface *) b; + + return coo_a->nx == coo_b->nx && + coo_a->ny == coo_b->ny && + coo_a->n_values == coo_b->n_values && + coo_a->elemsize == coo_b->elemsize; +} + +static void +display_coo_interface(starpu_data_handle_t handle, FILE *f) +{ + struct starpu_coo_interface *coo_interface; + coo_interface = (struct starpu_coo_interface *) + starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + + fprintf(f, "%u\t%u", coo_interface->nx, coo_interface->ny); +} + +static starpu_ssize_t describe(void *data_interface, char *buf, size_t size) +{ + struct starpu_coo_interface *coo = (struct starpu_coo_interface *) data_interface; + return snprintf(buf, size, "M%ux%ux%ux%u", + (unsigned) coo->nx, + (unsigned) coo->ny, + (unsigned) coo->n_values, + (unsigned) coo->elemsize); +} + +struct starpu_data_interface_ops starpu_interface_coo_ops = +{ + .register_data_handle = register_coo_handle, + .allocate_data_on_node = allocate_coo_buffer_on_node, + .to_pointer = NULL, + .free_data_on_node = free_coo_buffer_on_node, + .copy_methods = &coo_copy_data_methods, + .get_size = coo_interface_get_size, + .footprint = coo_interface_footprint, + .compare = coo_compare, + .interfaceid = STARPU_COO_INTERFACE_ID, + .interface_size = sizeof(struct starpu_coo_interface), + .display = display_coo_interface, + .describe = describe, + .name = "STARPU_COO_INTERFACE" +}; + +void +starpu_coo_data_register(starpu_data_handle_t *handleptr, int home_node, + uint32_t nx, uint32_t ny, uint32_t n_values, + uint32_t *columns, uint32_t *rows, + uintptr_t values, size_t elemsize) +{ + struct starpu_coo_interface coo_interface = + { + .id = STARPU_COO_INTERFACE_ID, + .values = values, + .columns = columns, + .rows = rows, + .nx = nx, + .ny = ny, + .n_values = n_values, + .elemsize = elemsize, + }; +#ifndef STARPU_SIMGRID + if (home_node >= 0 && starpu_node_get_kind(home_node) == STARPU_CPU_RAM) + { + if (n_values) + { + STARPU_ASSERT_ACCESSIBLE(columns); + STARPU_ASSERT_ACCESSIBLE((uintptr_t) columns + n_values*sizeof(uint32_t) - 1); + STARPU_ASSERT_ACCESSIBLE(rows); + STARPU_ASSERT_ACCESSIBLE((uintptr_t) rows + n_values*sizeof(uint32_t) - 1); + } + STARPU_ASSERT_ACCESSIBLE(values); + STARPU_ASSERT_ACCESSIBLE(values + n_values*elemsize - 1); + } +#endif + + starpu_data_register(handleptr, home_node, &coo_interface, + &starpu_interface_coo_ops); +} diff --git a/src/datawizard/interfaces/csr_filters.c b/src/datawizard/interfaces/csr_filters.c new file mode 100644 index 0000000..4e1e316 --- /dev/null +++ b/src/datawizard/interfaces/csr_filters.c @@ -0,0 +1,58 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2010-2010 Mehdi Juhoor + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include + +void starpu_csr_filter_vertical_block(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned id, unsigned nchunks) +{ + struct starpu_csr_interface *csr_father = (struct starpu_csr_interface *) father_interface; + struct starpu_csr_interface *csr_child = (struct starpu_csr_interface *) child_interface; + + uint32_t nrow = csr_father->nrow; + size_t elemsize = csr_father->elemsize; + uint32_t firstentry = csr_father->firstentry; + + uint32_t *ram_rowptr = csr_father->ram_rowptr; + + size_t first_index; + unsigned child_nrow; + + starpu_filter_nparts_compute_chunk_size_and_offset(nrow, nchunks, 1, id, 1, &child_nrow, &first_index); + + uint32_t local_firstentry = ram_rowptr[first_index] - firstentry; + uint32_t local_lastentry = ram_rowptr[first_index + child_nrow] - firstentry; + + uint32_t local_nnz = local_lastentry - local_firstentry; + + STARPU_ASSERT_MSG(csr_father->id == STARPU_CSR_INTERFACE_ID, "%s can only be applied on a csr data", __func__); + csr_child->id = csr_father->id; + csr_child->nnz = local_nnz; + csr_child->nrow = child_nrow; + csr_child->firstentry = local_firstentry; + csr_child->elemsize = elemsize; + csr_child->ram_colind = &csr_father->ram_colind[local_firstentry]; + csr_child->ram_rowptr = &ram_rowptr[first_index]; + + if (csr_father->nzval) + { + csr_child->rowptr = &csr_father->rowptr[first_index]; + csr_child->colind = &csr_father->colind[local_firstentry]; + csr_child->nzval = csr_father->nzval + local_firstentry * elemsize; + } +} diff --git a/src/datawizard/interfaces/csr_interface.c b/src/datawizard/interfaces/csr_interface.c new file mode 100644 index 0000000..e33a921 --- /dev/null +++ b/src/datawizard/interfaces/csr_interface.c @@ -0,0 +1,436 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2010-2010 Mehdi Juhoor + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#ifdef BUILDING_STARPU +#include +#endif + +static int copy_any_to_any(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *async_data); + +static const struct starpu_data_copy_methods csr_copy_data_methods_s = +{ + .any_to_any = copy_any_to_any, +}; + +static void register_csr_handle(starpu_data_handle_t handle, int home_node, void *data_interface); +static starpu_ssize_t allocate_csr_buffer_on_node(void *data_interface_, unsigned dst_node); +static void free_csr_buffer_on_node(void *data_interface, unsigned node); +static size_t csr_interface_get_size(starpu_data_handle_t handle); +static int csr_compare(void *data_interface_a, void *data_interface_b); +static uint32_t footprint_csr_interface_crc32(starpu_data_handle_t handle); +static starpu_ssize_t describe(void *data_interface, char *buf, size_t size); +static int pack_data(starpu_data_handle_t handle, unsigned node, void **ptr, starpu_ssize_t *count); +static int peek_data(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count); +static int unpack_data(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count); + +struct starpu_data_interface_ops starpu_interface_csr_ops = +{ + .register_data_handle = register_csr_handle, + .allocate_data_on_node = allocate_csr_buffer_on_node, + .free_data_on_node = free_csr_buffer_on_node, + .copy_methods = &csr_copy_data_methods_s, + .get_size = csr_interface_get_size, + .interfaceid = STARPU_CSR_INTERFACE_ID, + .interface_size = sizeof(struct starpu_csr_interface), + .footprint = footprint_csr_interface_crc32, + .compare = csr_compare, + .describe = describe, + .name = "STARPU_CSR_INTERFACE", + .pack_data = pack_data, + .peek_data = peek_data, + .unpack_data = unpack_data, + .pack_meta = NULL, + .unpack_meta = NULL, + .free_meta = NULL +}; + +static void register_csr_handle(starpu_data_handle_t handle, int home_node, void *data_interface) +{ + struct starpu_csr_interface *csr_interface = (struct starpu_csr_interface *) data_interface; + uint32_t *ram_colind = NULL; + uint32_t *ram_rowptr = NULL; + + if (home_node >= 0 && starpu_node_get_kind(home_node) == STARPU_CPU_RAM) + { + ram_colind = csr_interface->colind; + ram_rowptr = csr_interface->rowptr; + } + + int node; + for (node = 0; node < STARPU_MAXNODES; node++) + { + struct starpu_csr_interface *local_interface = (struct starpu_csr_interface *) + starpu_data_get_interface_on_node(handle, node); + + if (node == home_node) + { + local_interface->nzval = csr_interface->nzval; + local_interface->colind = csr_interface->colind; + } + else + { + local_interface->nzval = 0; + local_interface->colind = NULL; + } + + local_interface->ram_colind = ram_colind; + local_interface->ram_rowptr = ram_rowptr; + local_interface->id = csr_interface->id; + local_interface->rowptr = csr_interface->rowptr; + local_interface->nnz = csr_interface->nnz; + local_interface->nrow = csr_interface->nrow; + local_interface->firstentry = csr_interface->firstentry; + local_interface->elemsize = csr_interface->elemsize; + + } +} + +/* declare a new data with the BLAS interface */ +void starpu_csr_data_register(starpu_data_handle_t *handleptr, int home_node, + uint32_t nnz, uint32_t nrow, uintptr_t nzval, uint32_t *colind, uint32_t *rowptr, uint32_t firstentry, size_t elemsize) +{ + struct starpu_csr_interface csr_interface = + { + .id = STARPU_CSR_INTERFACE_ID, + .nnz = nnz, + .nrow = nrow, + .nzval = nzval, + .colind = colind, + .rowptr = rowptr, + .firstentry = firstentry, + .elemsize = elemsize + }; +#ifndef STARPU_SIMGRID + if (home_node >= 0 && starpu_node_get_kind(home_node) == STARPU_CPU_RAM) + { + if (nnz) + { + if (elemsize) + { + STARPU_ASSERT_ACCESSIBLE(nzval); + STARPU_ASSERT_ACCESSIBLE(nzval + nnz*elemsize - 1); + } + STARPU_ASSERT_ACCESSIBLE(colind); + STARPU_ASSERT_ACCESSIBLE((uintptr_t) colind + nnz*sizeof(uint32_t) - 1); + } + STARPU_ASSERT_ACCESSIBLE(rowptr); + STARPU_ASSERT_ACCESSIBLE((uintptr_t) rowptr + (nrow+1)*sizeof(uint32_t) - 1); + } +#endif + + starpu_data_register(handleptr, home_node, &csr_interface, &starpu_interface_csr_ops); +} + +static uint32_t footprint_csr_interface_crc32(starpu_data_handle_t handle) +{ + return starpu_hash_crc32c_be(starpu_csr_get_nnz(handle), 0); +} + +static int csr_compare(void *data_interface_a, void *data_interface_b) +{ + struct starpu_csr_interface *csr_a = (struct starpu_csr_interface *) data_interface_a; + struct starpu_csr_interface *csr_b = (struct starpu_csr_interface *) data_interface_b; + + /* Two matrices are considered compatible if they have the same size */ + return (csr_a->nnz == csr_b->nnz) + && (csr_a->nrow == csr_b->nrow) + && (csr_a->elemsize == csr_b->elemsize); +} + +/* offer an access to the data parameters */ +uint32_t starpu_csr_get_nnz(starpu_data_handle_t handle) +{ + struct starpu_csr_interface *csr_interface = (struct starpu_csr_interface *) + starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + +#ifdef STARPU_DEBUG + STARPU_ASSERT_MSG(csr_interface->id == STARPU_CSR_INTERFACE_ID, "Error. The given data is not a csr."); +#endif + + return csr_interface->nnz; +} + +uint32_t starpu_csr_get_nrow(starpu_data_handle_t handle) +{ + struct starpu_csr_interface *csr_interface = (struct starpu_csr_interface *) + starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + +#ifdef STARPU_DEBUG + STARPU_ASSERT_MSG(csr_interface->id == STARPU_CSR_INTERFACE_ID, "Error. The given data is not a csr."); +#endif + + return csr_interface->nrow; +} + +uint32_t starpu_csr_get_firstentry(starpu_data_handle_t handle) +{ + struct starpu_csr_interface *csr_interface = (struct starpu_csr_interface *) + starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + +#ifdef STARPU_DEBUG + STARPU_ASSERT_MSG(csr_interface->id == STARPU_CSR_INTERFACE_ID, "Error. The given data is not a csr."); +#endif + + return csr_interface->firstentry; +} + +size_t starpu_csr_get_elemsize(starpu_data_handle_t handle) +{ + struct starpu_csr_interface *csr_interface = (struct starpu_csr_interface *) + starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + +#ifdef STARPU_DEBUG + STARPU_ASSERT_MSG(csr_interface->id == STARPU_CSR_INTERFACE_ID, "Error. The given data is not a csr."); +#endif + + return csr_interface->elemsize; +} + +uintptr_t starpu_csr_get_local_nzval(starpu_data_handle_t handle) +{ + unsigned node; + node = starpu_worker_get_local_memory_node(); + + STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); + + struct starpu_csr_interface *csr_interface = (struct starpu_csr_interface *) + starpu_data_get_interface_on_node(handle, node); + +#ifdef STARPU_DEBUG + STARPU_ASSERT_MSG(csr_interface->id == STARPU_CSR_INTERFACE_ID, "Error. The given data is not a csr."); +#endif + + return csr_interface->nzval; +} + +uint32_t *starpu_csr_get_local_colind(starpu_data_handle_t handle) +{ + unsigned node; + node = starpu_worker_get_local_memory_node(); + + STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); + + struct starpu_csr_interface *csr_interface = (struct starpu_csr_interface *) + starpu_data_get_interface_on_node(handle, node); + +#ifdef STARPU_DEBUG + STARPU_ASSERT_MSG(csr_interface->id == STARPU_CSR_INTERFACE_ID, "Error. The given data is not a csr."); +#endif + + return csr_interface->colind; +} + +uint32_t *starpu_csr_get_local_rowptr(starpu_data_handle_t handle) +{ + unsigned node; + node = starpu_worker_get_local_memory_node(); + + STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); + + struct starpu_csr_interface *csr_interface = (struct starpu_csr_interface *) + starpu_data_get_interface_on_node(handle, node); + +#ifdef STARPU_DEBUG + STARPU_ASSERT_MSG(csr_interface->id == STARPU_CSR_INTERFACE_ID, "Error. The given data is not a csr."); +#endif + + return csr_interface->rowptr; +} + +static size_t csr_interface_get_size(starpu_data_handle_t handle) +{ + size_t size; + + uint32_t nnz = starpu_csr_get_nnz(handle); + uint32_t nrow = starpu_csr_get_nrow(handle); + size_t elemsize = starpu_csr_get_elemsize(handle); + + size = nnz*elemsize + nnz*sizeof(uint32_t) + (nrow+1)*sizeof(uint32_t); + + return size; +} + +/* memory allocation/deallocation primitives for the BLAS interface */ + +/* returns the size of the allocated area */ +static starpu_ssize_t allocate_csr_buffer_on_node(void *data_interface_, unsigned dst_node) +{ + uintptr_t addr_nzval = 0; + uint32_t *addr_colind = NULL, *addr_rowptr = NULL; + starpu_ssize_t allocated_memory; + + /* we need the 3 arrays to be allocated */ + struct starpu_csr_interface *csr_interface = (struct starpu_csr_interface *) data_interface_; + + uint32_t nnz = csr_interface->nnz; + uint32_t nrow = csr_interface->nrow; + size_t elemsize = csr_interface->elemsize; + + if (nnz) + { + addr_nzval = starpu_malloc_on_node(dst_node, nnz*elemsize); + if (!addr_nzval) + goto fail_nzval; + addr_colind = (uint32_t*) starpu_malloc_on_node(dst_node, nnz*sizeof(uint32_t)); + if (!addr_colind) + goto fail_colind; + } + else + { + addr_nzval = 0; + addr_colind = NULL; + } + addr_rowptr = (uint32_t*) starpu_malloc_on_node(dst_node, (nrow+1)*sizeof(uint32_t)); + if (!addr_rowptr) + goto fail_rowptr; + + /* allocation succeeded */ + allocated_memory = + nnz*elemsize + nnz*sizeof(uint32_t) + (nrow+1)*sizeof(uint32_t); + + /* update the data properly in consequence */ + csr_interface->nzval = addr_nzval; + csr_interface->colind = addr_colind; + csr_interface->rowptr = addr_rowptr; + + return allocated_memory; + +fail_rowptr: + if (nnz) + starpu_free_on_node(dst_node, (uintptr_t) addr_colind, nnz*sizeof(uint32_t)); +fail_colind: + if (nnz) + starpu_free_on_node(dst_node, addr_nzval, nnz*elemsize); +fail_nzval: + /* allocation failed */ + return -ENOMEM; +} + +static void free_csr_buffer_on_node(void *data_interface, unsigned node) +{ + struct starpu_csr_interface *csr_interface = (struct starpu_csr_interface *) data_interface; + uint32_t nnz = csr_interface->nnz; + uint32_t nrow = csr_interface->nrow; + size_t elemsize = csr_interface->elemsize; + + if (nnz) + { + starpu_free_on_node(node, csr_interface->nzval, nnz*elemsize); + csr_interface->nzval = 0; + starpu_free_on_node(node, (uintptr_t) csr_interface->colind, nnz*sizeof(uint32_t)); + csr_interface->colind = NULL; + } + starpu_free_on_node(node, (uintptr_t) csr_interface->rowptr, (nrow+1)*sizeof(uint32_t)); + csr_interface->rowptr = NULL; +} + +/* as not all platform easily have a BLAS lib installed ... */ +static int copy_any_to_any(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *async_data) +{ + struct starpu_csr_interface *src_csr = (struct starpu_csr_interface *) src_interface; + struct starpu_csr_interface *dst_csr = (struct starpu_csr_interface *) dst_interface; + + uint32_t nnz = src_csr->nnz; + uint32_t nrow = src_csr->nrow; + size_t elemsize = src_csr->elemsize; + int ret = 0; + + if (nnz) + { + if (starpu_interface_copy(src_csr->nzval, 0, src_node, dst_csr->nzval, 0, dst_node, nnz*elemsize, async_data)) + ret = -EAGAIN; + + if (starpu_interface_copy((uintptr_t)src_csr->colind, 0, src_node, (uintptr_t)dst_csr->colind, 0, dst_node, nnz*sizeof(uint32_t), async_data)) + ret = -EAGAIN; + } + + if (starpu_interface_copy((uintptr_t)src_csr->rowptr, 0, src_node, (uintptr_t)dst_csr->rowptr, 0, dst_node, (nrow+1)*sizeof(uint32_t), async_data)) + ret = -EAGAIN; + + starpu_interface_data_copy(src_node, dst_node, nnz*elemsize + (nnz+nrow+1)*sizeof(uint32_t)); + + return ret; +} + +static starpu_ssize_t describe(void *data_interface, char *buf, size_t size) +{ + struct starpu_csr_interface *csr = (struct starpu_csr_interface *) data_interface; + return snprintf(buf, size, "C%ux%ux%u", + (unsigned) csr->nnz, + (unsigned) csr->nrow, + (unsigned) csr->elemsize); +} + +static int pack_data(starpu_data_handle_t handle, unsigned node, void **ptr, starpu_ssize_t *count) +{ + STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); + + struct starpu_csr_interface *csr = (struct starpu_csr_interface *) starpu_data_get_interface_on_node(handle, node); + + // We first pack colind + *count = csr->nnz * sizeof(csr->colind[0]); + // Then rowptr + *count += (csr->nrow + 1) * sizeof(csr->rowptr[0]); + // Then nnzval + *count += csr->nnz * csr->elemsize; + + if (ptr != NULL) + { + *ptr = (void *)starpu_malloc_on_node_flags(node, *count, 0); + char *tmp = *ptr; + if (csr->nnz) + { + memcpy(tmp, (void*)csr->colind, csr->nnz * sizeof(csr->colind[0])); + tmp += csr->nnz * sizeof(csr->colind[0]); + memcpy(tmp, (void*)csr->rowptr, (csr->nrow + 1) * sizeof(csr->rowptr[0])); + tmp += (csr->nrow + 1) * sizeof(csr->rowptr[0]); + } + memcpy(tmp, (void*)csr->nzval, csr->nnz * csr->elemsize); + } + + return 0; +} + +static int peek_data(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count) +{ + STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); + + struct starpu_csr_interface *csr = (struct starpu_csr_interface *) starpu_data_get_interface_on_node(handle, node); + + STARPU_ASSERT(count == (csr->nnz * sizeof(csr->colind[0]))+((csr->nrow + 1) * sizeof(csr->rowptr[0]))+(csr->nnz * csr->elemsize)); + + char *tmp = ptr; + if (csr->nnz) + { + memcpy((void*)csr->colind, tmp, csr->nnz * sizeof(csr->colind[0])); + tmp += csr->nnz * sizeof(csr->colind[0]); + memcpy((void*)csr->rowptr, tmp, (csr->nrow + 1) * sizeof(csr->rowptr[0])); + tmp += (csr->nrow + 1) * sizeof(csr->rowptr[0]); + } + memcpy((void*)csr->nzval, tmp, csr->nnz * csr->elemsize); + + return 0; +} + +static int unpack_data(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count) +{ + peek_data(handle, node, ptr, count); + starpu_free_on_node_flags(node, (uintptr_t)ptr, count, 0); + + return 0; +} diff --git a/src/datawizard/interfaces/data_interface.c b/src/datawizard/interfaces/data_interface.c new file mode 100644 index 0000000..2da0b9c --- /dev/null +++ b/src/datawizard/interfaces/data_interface.c @@ -0,0 +1,1302 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef STARPU_OPENMP +#include +#endif + +static struct starpu_data_interface_ops **_id_to_ops_array; +static unsigned _id_to_ops_array_size; + +/* Hash table mapping host pointers to data handles. */ +static int32_t nregistered, maxnregistered; +static int _data_interface_number = STARPU_MAX_INTERFACE_ID; +starpu_arbiter_t _starpu_global_arbiter; +static int max_memory_use; + +static void _starpu_data_unregister(starpu_data_handle_t handle, unsigned coherent, unsigned nowait); + +void _starpu_data_interface_fini(void); + +void _starpu_data_interface_init(void) +{ + max_memory_use = starpu_getenv_number_default("STARPU_MAX_MEMORY_USE", 0); + + /* Just for testing purpose */ + if (starpu_getenv_number_default("STARPU_GLOBAL_ARBITER", 0) > 0) + _starpu_global_arbiter = starpu_arbiter_create(); + + _starpu_crash_add_hook(&_starpu_data_interface_fini); +} + +void _starpu_data_interface_fini(void) +{ + if (max_memory_use) + _STARPU_DISP("Memory used for %d data handles: %lu MiB\n", maxnregistered, (unsigned long) (maxnregistered * sizeof(struct _starpu_data_state)) >> 20); +} + +void _starpu_data_interface_shutdown() +{ + free(_id_to_ops_array); + _id_to_ops_array = NULL; + _id_to_ops_array_size = 0; + + _starpu_data_interface_fini(); +} + +struct starpu_data_interface_ops *_starpu_data_interface_get_ops(unsigned interface_id) +{ + switch (interface_id) + { + case STARPU_MATRIX_INTERFACE_ID: + return &starpu_interface_matrix_ops; + + case STARPU_BLOCK_INTERFACE_ID: + return &starpu_interface_block_ops; + + case STARPU_VECTOR_INTERFACE_ID: + return &starpu_interface_vector_ops; + + case STARPU_CSR_INTERFACE_ID: + return &starpu_interface_csr_ops; + + case STARPU_BCSR_INTERFACE_ID: + return &starpu_interface_bcsr_ops; + + case STARPU_VARIABLE_INTERFACE_ID: + return &starpu_interface_variable_ops; + + case STARPU_VOID_INTERFACE_ID: + return &starpu_interface_void_ops; + + case STARPU_MULTIFORMAT_INTERFACE_ID: + return &starpu_interface_multiformat_ops; + + case STARPU_COO_INTERFACE_ID: + return &starpu_interface_coo_ops; + + case STARPU_TENSOR_INTERFACE_ID: + return &starpu_interface_tensor_ops; + + case STARPU_NDIM_INTERFACE_ID: + return &starpu_interface_ndim_ops; + + default: + { + if (interface_id-STARPU_MAX_INTERFACE_ID > _id_to_ops_array_size || _id_to_ops_array == NULL || _id_to_ops_array[interface_id-STARPU_MAX_INTERFACE_ID]==NULL) + { + _STARPU_MSG("There is no 'struct starpu_data_interface_ops' registered for interface %d\n", interface_id); + STARPU_ABORT(); + return NULL; + } + else + return _id_to_ops_array[interface_id-STARPU_MAX_INTERFACE_ID]; + } + } +} + +/* + * Start monitoring a piece of data + */ +static void _starpu_register_new_data(starpu_data_handle_t handle, int home_node, uint32_t wt_mask) +{ + STARPU_ASSERT(handle); + + /* first take care to properly lock the data */ + _starpu_spin_lock(&handle->header_lock); + + handle->root_handle = handle; + //handle->father_handle = NULL; + //handle->nsiblings = 0; + //handle->siblings = NULL; + //handle->sibling_index = 0; /* could be anything for the root */ + handle->depth = 1; /* the tree is just a node yet */ + + handle->active = 1; + + /* Store some values directly in the handle not to recompute them all + * the time. */ + handle->footprint = _starpu_compute_data_footprint(handle); + + handle->home_node = home_node; + + handle->wt_mask = wt_mask; + + //handle->aliases = 0; + //handle->readonly_dup = NULL; + //handle->readonly_dup_of = NULL; + + //handle->is_not_important = 0; + + handle->sequential_consistency = starpu_data_get_default_sequential_consistency_flag(); + handle->initialized = home_node != -1; + //handle->readonly = 0; + handle->ooc = 1; + + /* By default, there are no methods available to perform a reduction */ + //handle->redux_cl = NULL; + //handle->init_cl = NULL; + + /* that new data is invalid from all nodes perpective except for the + * home node */ + unsigned node; + for (node = 0; node < STARPU_MAXNODES; node++) + { + struct _starpu_data_replicate *replicate; + replicate = &handle->per_node[node]; + + replicate->memory_node = node; + //replicate->relaxed_coherency = 0; + //replicate->refcnt = 0; + //replicate->nb_tasks_prefetch = 0; + + if ((int) node == home_node) + { + /* this is the home node with the only valid copy */ + replicate->state = STARPU_OWNER; + replicate->allocated = 1; + //replicate->automatically_allocated = 0; + replicate->initialized = 1; + } + else + { + /* the value is not available here yet */ + replicate->state = STARPU_INVALID; + //replicate->allocated = 0; + //replicate->initialized = 0; + } + + replicate->mapped = STARPU_UNMAPPED; + } + + /* now the data is available ! */ + _starpu_spin_unlock(&handle->header_lock); + (void)STARPU_ATOMIC_ADD(&nregistered, 1); + _starpu_perf_counter_update_max_int32(&maxnregistered, nregistered); +} + +void _starpu_data_initialize_per_worker(starpu_data_handle_t handle) +{ + unsigned worker; + unsigned nworkers = starpu_worker_get_count(); + + _starpu_spin_checklocked(&handle->header_lock); + + _STARPU_CALLOC(handle->per_worker, nworkers, sizeof(*handle->per_worker)); + + size_t interfacesize = handle->ops->interface_size; + + for (worker = 0; worker < nworkers; worker++) + { + struct _starpu_data_replicate *replicate; + //unsigned node; + replicate = &handle->per_worker[worker]; + //replicate->allocated = 0; + //replicate->automatically_allocated = 0; + replicate->state = STARPU_INVALID; + //replicate->refcnt = 0; + replicate->handle = handle; + //replicate->nb_tasks_prefetch = 0; + + //for (node = 0; node < STARPU_MAXNODES; node++) + //{ + // replicate->request[node] = NULL; + // replicate->last_request[node] = NULL; + //} + //replicate->load_request = NULL; + + /* Assuming being used for SCRATCH for now, patched when entering REDUX mode */ + replicate->relaxed_coherency = 1; + //replicate->initialized = 0; + replicate->memory_node = starpu_worker_get_memory_node(worker); + replicate->mapped = STARPU_UNMAPPED; + + _STARPU_CALLOC(replicate->data_interface, 1, interfacesize); + /* duplicate the content of the interface on node 0 */ + memcpy(replicate->data_interface, handle->per_node[STARPU_MAIN_RAM].data_interface, interfacesize); + } +} + +void starpu_data_ptr_register(starpu_data_handle_t handle, unsigned node) +{ + struct _starpu_data_replicate *replicate = &handle->per_node[node]; + + _starpu_spin_lock(&handle->header_lock); + STARPU_ASSERT_MSG(replicate->allocated == 0, "starpu_data_ptr_register must be called right after starpu_data_register"); + replicate->allocated = 1; + replicate->automatically_allocated = 0; + _starpu_spin_unlock(&handle->header_lock); +} + +int _starpu_data_handle_init(starpu_data_handle_t handle, struct starpu_data_interface_ops *interface_ops, unsigned int mf_node) +{ + unsigned node; + + /* Tell helgrind that our access to busy_count in + * starpu_data_unregister is actually safe */ + STARPU_HG_DISABLE_CHECKING(handle->busy_count); + + handle->magic = 42; + + /* When not specified, the fields are initialized in _starpu_register_new_data and _starpu_data_partition */ + + _starpu_data_requester_prio_list_init0(&handle->req_list); + //handle->refcnt = 0; + //handle->unlocking_reqs = 0; + //handle->current_mode = STARPU_NONE; + _starpu_spin_init(&handle->header_lock); + + //handle->busy_count = 0; + //handle->busy_waiting = 0; + STARPU_PTHREAD_MUTEX_INIT0(&handle->busy_mutex, NULL); + STARPU_PTHREAD_COND_INIT0(&handle->busy_cond, NULL); +#ifdef STARPU_BUBBLE + STARPU_PTHREAD_MUTEX_INIT0(&handle->unpartition_mutex, NULL); +#endif + + //handle->root_handle + //handle->father_handle + //handle->active_children = NULL; + //handle->active_nchildren = 0; + //handle->active_readonly_children = NULL; + //handle->active_readonly_nchildren = NULL; + //handle->nactive_readonly_children = 0; + //handle->nsiblings + //handle->siblings + //handle->sibling_index + //handle->depth + + /* there is no hierarchy yet */ + //handle->children = NULL; + //handle->nchildren = 0; + //handle->nplans = 0; + //handle->switch_cl = NULL; + //handle->switch_cl_nparts = 0; + //handle->partitioned = 0; + //handle->part_readonly = 0; + + //handle->active + //handle->active_ro = 0; + + //handle->per_node below + + handle->ops = interface_ops; + size_t interfacesize = interface_ops->interface_size; + + for (node = 0; node < STARPU_MAXNODES; node++) + { + _starpu_memory_stats_init_per_node(handle, node); + + struct _starpu_data_replicate *replicate; + replicate = &handle->per_node[node]; + /* relaxed_coherency = 0 */ + + replicate->handle = handle; + + _STARPU_CALLOC(replicate->data_interface, 1, interfacesize); + if (handle->ops->init) handle->ops->init(replicate->data_interface); + } + + //handle->per_worker = NULL; + //handle->ops above + + //handle->footprint + + //handle->home_node + //handle->wt_mask + //handle->aliases = 0; + //handle->is_not_important + //handle->sequential_consistency + //handle->initialized + //handle->readonly + //handle->ooc + //handle->lazy_unregister = 0; + //handle->removed_from_context_hash = 0; + + STARPU_PTHREAD_MUTEX_INIT0(&handle->sequential_consistency_mutex, NULL); + + handle->last_submitted_mode = STARPU_R; + //handle->last_sync_task = NULL; + //handle->last_submitted_accessors.task = NULL; + handle->last_submitted_accessors.next = &handle->last_submitted_accessors; + handle->last_submitted_accessors.prev = &handle->last_submitted_accessors; + +#ifdef STARPU_USE_FXT + //handle->last_submitted_ghost_sync_id_is_valid = 0; + //handle->last_submitted_ghost_sync_id = 0; + //handle->last_submitted_ghost_accessors_id = NULL; +#endif + + //handle->post_sync_tasks = NULL; + /* Tell helgrind that the race in _starpu_unlock_post_sync_tasks is fine */ + STARPU_HG_DISABLE_CHECKING(handle->post_sync_tasks_cnt); + //handle->post_sync_tasks_cnt = 0; + + //handle->redux_cl + //handle->init_cl + + //handle->reduction_refcnt = 0; + + _starpu_data_requester_prio_list_init0(&handle->reduction_req_list); + + //handle->reduction_tmp_handles = NULL; + + //handle->write_invalidation_req = NULL; + + //handle->mpi_data = NULL; /* invalid until set */ + + _starpu_memory_stats_init(handle); + + handle->mf_node = mf_node; + + //handle->unregister_hook = NULL; + + if (_starpu_global_arbiter) + /* Just for testing purpose */ + starpu_data_assign_arbiter(handle, _starpu_global_arbiter); + else + { + //handle->arbiter = NULL; + } + _starpu_data_requester_prio_list_init0(&handle->arbitered_req_list); + + handle->last_locality = -1; + + //handle->dimensions = 0; + //handle->coordinates = {}; + + //handle->user_data = NULL; + //handle->sched_data = NULL; + + return 0; +} + +static +starpu_data_handle_t _starpu_data_handle_allocate(struct starpu_data_interface_ops *interface_ops, unsigned int mf_node) +{ + starpu_data_handle_t handle; + _STARPU_CALLOC(handle, 1, sizeof(struct _starpu_data_state)); + _starpu_data_handle_init(handle, interface_ops, mf_node); + return handle; +} + +void _starpu_data_register_ops(struct starpu_data_interface_ops *ops) +{ + /* check the interfaceid is set */ + STARPU_ASSERT(ops->interfaceid != STARPU_UNKNOWN_INTERFACE_ID); + + if ((unsigned)ops->interfaceid >= STARPU_MAX_INTERFACE_ID) + { + if ((unsigned)ops->interfaceid > _id_to_ops_array_size) + { + if (!_id_to_ops_array_size) + { + _id_to_ops_array_size = 16; + } + else + { + _id_to_ops_array_size *= 2; + } + _STARPU_REALLOC(_id_to_ops_array, _id_to_ops_array_size * sizeof(struct starpu_data_interface_ops *)); + } + _id_to_ops_array[ops->interfaceid-STARPU_MAX_INTERFACE_ID] = ops; + } +} + +void starpu_data_register_ops(struct starpu_data_interface_ops *ops) +{ + if (ops->interfaceid == STARPU_UNKNOWN_INTERFACE_ID) + { + ops->interfaceid = starpu_data_interface_get_next_id(); + } + _starpu_data_register_ops(ops); +} + +void starpu_data_register(starpu_data_handle_t *handleptr, int home_node, void *data_interface, struct starpu_data_interface_ops *ops) +{ + STARPU_ASSERT_MSG(home_node >= -1 && home_node < (int)starpu_memory_nodes_get_count(), "Invalid memory node number"); + starpu_data_handle_t handle = _starpu_data_handle_allocate(ops, home_node); + + STARPU_ASSERT(handleptr); + *handleptr = handle; + + if (ops->interfaceid == STARPU_UNKNOWN_INTERFACE_ID) + { + ops->interfaceid = starpu_data_interface_get_next_id(); + } + + /* fill the interface fields with the appropriate method */ + STARPU_ASSERT(ops->register_data_handle); + ops->register_data_handle(handle, home_node, data_interface); + + _starpu_data_register_ops(ops); + + _starpu_register_new_data(handle, home_node, 0); + _STARPU_TRACE_HANDLE_DATA_REGISTER(handle); +} + +void starpu_data_register_same(starpu_data_handle_t *handledst, starpu_data_handle_t handlesrc) +{ + void *local_interface = starpu_data_get_interface_on_node(handlesrc, STARPU_MAIN_RAM); + starpu_data_register(handledst, -1, local_interface, handlesrc->ops); +} + +void *starpu_data_handle_to_pointer(starpu_data_handle_t handle, unsigned node) +{ + /* Check whether the operation is supported and the node has actually + * been allocated. */ + if (!starpu_data_test_if_allocated_on_node(handle, node)) + return NULL; + if (handle->ops->to_pointer) + { + return handle->ops->to_pointer(starpu_data_get_interface_on_node(handle, node), node); + } + + /* Deprecated */ + if (handle->ops->handle_to_pointer) + { + return handle->ops->handle_to_pointer(handle, node); + } + + return NULL; +} + +void *starpu_data_get_local_ptr(starpu_data_handle_t handle) +{ + return starpu_data_handle_to_pointer(handle, starpu_worker_get_local_memory_node()); +} + +struct starpu_data_interface_ops* starpu_data_get_interface_ops(starpu_data_handle_t handle) +{ + return handle->ops; +} + +void _starpu_data_free_interfaces(starpu_data_handle_t handle) +{ + unsigned node; + unsigned nworkers = starpu_worker_get_count(); + + if (handle->ops->unregister_data_handle) + handle->ops->unregister_data_handle(handle); + + for (node = 0; node < STARPU_MAXNODES; node++) + free(handle->per_node[node].data_interface); + + if (handle->per_worker) + { + unsigned worker; + for (worker = 0; worker < nworkers; worker++) + free(handle->per_worker[worker].data_interface); + free(handle->per_worker); + } +} + +struct _starpu_unregister_callback_arg +{ + unsigned memory_node; + starpu_data_handle_t handle; + unsigned terminated; + starpu_pthread_mutex_t mutex; + starpu_pthread_cond_t cond; +}; + +/* Check whether we should tell starpu_data_unregister that the data handle is + * not busy any more. + * The header is supposed to be locked. + * This may free the handle, if it was lazily unregistered (1 is returned in + * that case). The handle pointer thus becomes invalid for the caller. + * + * Note: we inline some of the tests in the _starpu_data_check_not_busy macro. + */ +int __starpu_data_check_not_busy(starpu_data_handle_t handle) +{ + if (STARPU_LIKELY(handle->busy_count)) + return 0; + + /* Not busy any more, perhaps have to unregister etc. */ + if (STARPU_UNLIKELY(handle->busy_waiting)) + { + STARPU_PTHREAD_MUTEX_LOCK(&handle->busy_mutex); + STARPU_PTHREAD_COND_BROADCAST(&handle->busy_cond); + STARPU_PTHREAD_MUTEX_UNLOCK(&handle->busy_mutex); + } + + /* The handle has been destroyed in between (eg. this was a temporary + * handle created for a reduction.) */ + if (STARPU_UNLIKELY(handle->lazy_unregister)) + { + handle->lazy_unregister = 0; + _starpu_spin_unlock(&handle->header_lock); + _starpu_data_unregister(handle, 0, 1); + /* Warning: in case we unregister the handle, we must be sure + * that the caller will not try to unlock the header after + * !*/ + return 1; + } + + return 0; +} + +static +void _starpu_check_if_valid_and_fetch_data_on_node(starpu_data_handle_t handle, struct _starpu_data_replicate *replicate, const char *origin) +{ + unsigned node; + unsigned nnodes = starpu_memory_nodes_get_count(); + int valid = 0; + + _starpu_spin_lock(&handle->header_lock); + for (node = 0; node < nnodes; node++) + { + if (handle->per_node[node].state != STARPU_INVALID) + { + /* we found a copy ! */ + valid = 1; + break; + } + } + _starpu_spin_unlock(&handle->header_lock); + if (valid) + { + int ret = _starpu_fetch_data_on_node(handle, handle->home_node, replicate, STARPU_R, 0, NULL, STARPU_FETCH, 0, NULL, NULL, 0, origin); + STARPU_ASSERT(!ret); + _starpu_release_data_on_node(handle, 0, STARPU_NONE, replicate); + } + else + { + _starpu_spin_lock(&handle->header_lock); + if (!_starpu_notify_data_dependencies(handle, STARPU_NONE)) + _starpu_spin_unlock(&handle->header_lock); + } +} + +static void _starpu_data_unregister_fetch_data_callback(void *_arg) +{ + struct _starpu_unregister_callback_arg *arg = (struct _starpu_unregister_callback_arg *) _arg; + + starpu_data_handle_t handle = arg->handle; + + STARPU_ASSERT(handle); + + struct _starpu_data_replicate *replicate = &handle->per_node[arg->memory_node]; + + _starpu_check_if_valid_and_fetch_data_on_node(handle, replicate, "_starpu_data_unregister_fetch_data_callback"); + + /* unlock the caller */ + STARPU_PTHREAD_MUTEX_LOCK(&arg->mutex); + arg->terminated = 1; + STARPU_PTHREAD_COND_SIGNAL(&arg->cond); + STARPU_PTHREAD_MUTEX_UNLOCK(&arg->mutex); +} + +void _starpu_data_set_unregister_hook(starpu_data_handle_t handle, _starpu_data_handle_unregister_hook func) +{ + STARPU_ASSERT(handle->unregister_hook == NULL); + handle->unregister_hook = func; +} + +/* + * We are about to unregister this R/O data. There might be still other aliases, + * in which case this returns 0. If not, users are not supposed to see it + * any more, so detach it from their sight and return 1 to let unregistration happen. + */ +static int _starpu_ro_data_detach(starpu_data_handle_t handle) +{ + _starpu_spin_lock(&handle->header_lock); + if (handle->aliases) + { + handle->aliases--; + _starpu_spin_unlock(&handle->header_lock); + return 0; + } + if (handle->readonly_dup) + { + STARPU_ASSERT(handle->readonly_dup->readonly_dup_of == handle); + handle->readonly_dup->readonly_dup_of = NULL; + handle->readonly_dup = NULL; + } + if (handle->readonly_dup_of) + { + STARPU_ASSERT(handle->readonly_dup_of->readonly_dup == handle); + handle->readonly_dup_of->readonly_dup = NULL; + handle->readonly_dup_of = NULL; + } + /* So that unregistration can use write dependencies to wait for + * anything to finish */ + handle->readonly = 0; + _starpu_spin_unlock(&handle->header_lock); + return 1; +} + +/* Unregister the data handle, perhaps we don't need to update the home_node + * (in that case coherent is set to 0) + * nowait is for internal use when we already know for sure that we won't have to wait. + */ +static void _starpu_data_unregister(starpu_data_handle_t handle, unsigned coherent, unsigned nowait) +{ + STARPU_ASSERT(handle); + STARPU_ASSERT_MSG(handle->nchildren == 0, "data %p needs to be unpartitioned before unregistration", handle); + STARPU_ASSERT_MSG(handle->nplans == 0, "data %p needs its partition plans to be cleaned before unregistration", handle); + STARPU_ASSERT_MSG(handle->partitioned == 0, "data %p needs its partitioned plans to be unpartitioned before unregistration", handle); + /* TODO: also check that it has the latest coherency */ + STARPU_ASSERT(!(nowait && handle->busy_count != 0)); + + if (!_starpu_ro_data_detach(handle)) + return; + + int sequential_consistency = handle->sequential_consistency; + if (sequential_consistency && !nowait) + { + /* We will acquire it in write mode to catch all dependencies, + * but possibly it's not actually initialized. Fake it to avoid + getting caught doing it */ + handle->initialized = 1; + STARPU_ASSERT_MSG(_starpu_worker_may_perform_blocking_calls(), "starpu_data_unregister must not be called from a task or callback, perhaps you can use starpu_data_unregister_submit instead"); + + /* If sequential consistency is enabled, wait until data is available */ + if ((handle->nplans && !handle->nchildren) || handle->siblings) + _starpu_data_partition_access_submit(handle, !handle->readonly); + _starpu_data_wait_until_available(handle, handle->readonly?STARPU_R:STARPU_RW, "starpu_data_unregister"); + } + + if (coherent && !nowait) + { + STARPU_ASSERT_MSG(_starpu_worker_may_perform_blocking_calls(), "starpu_data_unregister must not be called from a task or callback, perhaps you can use starpu_data_unregister_submit instead"); + + /* Fetch data in the home of the data to ensure we have a valid copy + * where we registered it */ + int home_node = handle->home_node; + if (home_node >= 0) + { + struct _starpu_unregister_callback_arg arg = { 0 }; + arg.handle = handle; + arg.memory_node = (unsigned)home_node; + arg.terminated = 0; + STARPU_PTHREAD_MUTEX_INIT0(&arg.mutex, NULL); + STARPU_PTHREAD_COND_INIT0(&arg.cond, NULL); + + if (!_starpu_attempt_to_submit_data_request_from_apps(handle, STARPU_R, + _starpu_data_unregister_fetch_data_callback, &arg)) + { + /* no one has locked this data yet, so we proceed immediately */ + struct _starpu_data_replicate *home_replicate = &handle->per_node[home_node]; + _starpu_check_if_valid_and_fetch_data_on_node(handle, home_replicate, "_starpu_data_unregister"); + } + else + { + STARPU_PTHREAD_MUTEX_LOCK(&arg.mutex); + while (!arg.terminated) + STARPU_PTHREAD_COND_WAIT(&arg.cond, &arg.mutex); + STARPU_PTHREAD_MUTEX_UNLOCK(&arg.mutex); + } + STARPU_PTHREAD_MUTEX_DESTROY(&arg.mutex); + STARPU_PTHREAD_COND_DESTROY(&arg.cond); + } + + /* Driver porters: adding your driver here is optional, only needed for the support of multiple formats. */ + + /* If this handle uses a multiformat interface, we may have to convert + * this piece of data back into the CPU format. + * XXX : This is quite hacky, could we submit a task instead ? + */ + if (_starpu_data_is_multiformat_handle(handle) && (starpu_node_get_kind(handle->mf_node) != STARPU_CPU_RAM)) + { + _STARPU_DEBUG("Conversion needed\n"); + void *buffers[1]; + struct starpu_multiformat_interface *format_interface; + home_node = handle->home_node; + if (home_node < 0 || (starpu_node_get_kind(home_node) != STARPU_CPU_RAM)) + home_node = STARPU_MAIN_RAM; + format_interface = (struct starpu_multiformat_interface *) starpu_data_get_interface_on_node(handle, home_node); + struct starpu_codelet *cl = NULL; + enum starpu_node_kind node_kind = starpu_node_get_kind(handle->mf_node); + + switch (node_kind) + { +#ifdef STARPU_USE_CUDA + case STARPU_CUDA_RAM: + { + struct starpu_multiformat_data_interface_ops *mf_ops; + mf_ops = (struct starpu_multiformat_data_interface_ops *) handle->ops->get_mf_ops(format_interface); + cl = mf_ops->cuda_to_cpu_cl; + break; + } +#endif +#ifdef STARPU_USE_OPENCL + case STARPU_OPENCL_RAM: + { + struct starpu_multiformat_data_interface_ops *mf_ops; + mf_ops = (struct starpu_multiformat_data_interface_ops *) handle->ops->get_mf_ops(format_interface); + cl = mf_ops->opencl_to_cpu_cl; + break; + } +#endif + case STARPU_CPU_RAM: /* Impossible ! */ + default: + STARPU_ABORT(); + } + buffers[0] = format_interface; + + _starpu_cl_func_t func = _starpu_task_get_cpu_nth_implementation(cl, 0); + STARPU_ASSERT(func); + func(buffers, NULL); + } + } + + /* Prevent any further unregistration */ + handle->magic = 0; + + _starpu_spin_lock(&handle->header_lock); + if (!coherent) + { + /* Should we postpone the unregister operation ? */ + if (handle->lazy_unregister) + { + if (handle->busy_count > 0) + { + _starpu_spin_unlock(&handle->header_lock); + return; + } + handle->lazy_unregister = 0; + } + } + + /* Tell holders of references that we're starting waiting */ + handle->busy_waiting = 1; + _starpu_spin_unlock(&handle->header_lock); + + /* Request unmapping of any mapped data */ + unsigned node; + for (node = 0; node < STARPU_MAXNODES; node++) + _starpu_data_unmap(handle, node); + +retry_busy: + /* Wait for all requests to finish (notably WT requests) */ + STARPU_PTHREAD_MUTEX_LOCK(&handle->busy_mutex); + while (1) + { + /* Here helgrind would shout that this an unprotected access, + * but this is actually fine: all threads who do busy_count-- + * are supposed to call _starpu_data_check_not_busy, which will + * wake us up through the busy_mutex/busy_cond. */ + if (!handle->busy_count) + break; + /* This is woken by _starpu_data_check_not_busy, always called + * after decrementing busy_count */ + STARPU_PTHREAD_COND_WAIT(&handle->busy_cond, &handle->busy_mutex); + } + STARPU_PTHREAD_MUTEX_UNLOCK(&handle->busy_mutex); + + /* Unregister MPI things after having waited for MPI reqs etc. to settle down */ + if (handle->unregister_hook) + { + handle->unregister_hook(handle); + handle->unregister_hook = NULL; + } + + /* Wait for finished requests to release the handle */ + _starpu_spin_lock(&handle->header_lock); + if (handle->busy_count) + { + /* Bad luck: some request went in in between, wait again... */ + _starpu_spin_unlock(&handle->header_lock); + goto retry_busy; + } + + size_t size = _starpu_data_get_alloc_size(handle); + + /* Destroy the data now */ + for (node = 0; node < STARPU_MAXNODES; node++) + { + struct _starpu_data_replicate *local = &handle->per_node[node]; + STARPU_ASSERT(!local->refcnt); + if (local->allocated) + { + /* free the data copy in a lazy fashion */ + if (local->automatically_allocated) + _starpu_request_mem_chunk_removal(handle, local, node, size); + } + } + if (handle->per_worker) + { + unsigned worker; + unsigned nworkers = starpu_worker_get_count(); + for (worker = 0; worker < nworkers; worker++) + { + struct _starpu_data_replicate *local = &handle->per_worker[worker]; + STARPU_ASSERT(!local->refcnt); + /* free the data copy in a lazy fashion */ + if (local->allocated && local->automatically_allocated) + _starpu_request_mem_chunk_removal(handle, local, starpu_worker_get_memory_node(worker), size); + } + } + _starpu_data_free_interfaces(handle); + + _starpu_memory_stats_free(handle); + + _starpu_spin_unlock(&handle->header_lock); + _starpu_spin_destroy(&handle->header_lock); + + _starpu_data_clear_implicit(handle); + free(handle->active_readonly_children); + free(handle->active_readonly_nchildren); + + STARPU_PTHREAD_MUTEX_DESTROY(&handle->busy_mutex); + STARPU_PTHREAD_COND_DESTROY(&handle->busy_cond); + STARPU_PTHREAD_MUTEX_DESTROY(&handle->sequential_consistency_mutex); +#ifdef STARPU_BUBBLE + STARPU_PTHREAD_MUTEX_DESTROY(&handle->unpartition_mutex); +#endif + + STARPU_HG_ENABLE_CHECKING(handle->post_sync_tasks_cnt); + STARPU_HG_ENABLE_CHECKING(handle->busy_count); + + _starpu_data_requester_prio_list_deinit(&handle->req_list); + _starpu_data_requester_prio_list_deinit(&handle->reduction_req_list); + + if (handle->switch_cl) + { + free(handle->switch_cl->dyn_nodes); + free(handle->switch_cl); + } + _STARPU_TRACE_HANDLE_DATA_UNREGISTER(handle); + free(handle); + (void)STARPU_ATOMIC_ADD(&nregistered, -1); +} + +void starpu_data_unregister(starpu_data_handle_t handle) +{ + STARPU_ASSERT_MSG(handle->magic == 42, "data %p is invalid (was it already registered?)", handle); + STARPU_ASSERT_MSG(!handle->lazy_unregister, "data %p can not be unregistered twice", handle); + + _starpu_data_unregister(handle, 1, 0); +} + +void starpu_data_unregister_no_coherency(starpu_data_handle_t handle) +{ + STARPU_ASSERT_MSG(handle->magic == 42, "data %p is invalid (was it already registered?)", handle); + + _starpu_data_unregister(handle, 0, 0); +} + +static void _starpu_data_unregister_submit_cb(void *arg) +{ + starpu_data_handle_t handle = arg; + + _starpu_spin_lock(&handle->header_lock); + handle->lazy_unregister = 1; + /* The handle should be busy since we are working on it. + * when we releases the handle below, it will be destroyed by + * _starpu_data_check_not_busy */ + STARPU_ASSERT(handle->busy_count); + _starpu_spin_unlock(&handle->header_lock); + + starpu_data_release_on_node(handle, STARPU_ACQUIRE_NO_NODE_LOCK_ALL); +} + +void starpu_data_unregister_submit(starpu_data_handle_t handle) +{ + STARPU_ASSERT_MSG(handle->magic == 42, "data %p is invalid (was it already registered?)", handle); + STARPU_ASSERT_MSG(!handle->lazy_unregister, "data %p can not be unregistered twice", handle); + + if (!_starpu_ro_data_detach(handle)) + return; + + /* Wait for all task dependencies on this handle before putting it for free */ + starpu_data_acquire_on_node_cb(handle, STARPU_ACQUIRE_NO_NODE_LOCK_ALL, handle->initialized?STARPU_RW:STARPU_W, _starpu_data_unregister_submit_cb, handle); +} + +static void __starpu_data_deinitialize(starpu_data_handle_t handle) +{ +#ifdef STARPU_DEBUG + { + /* There shouldn't be any pending request since we acquired the data in W mode */ + unsigned i, j, nnodes = starpu_memory_nodes_get_count(); + for (i = 0; i < nnodes; i++) + for (j = 0; j < nnodes; j++) + STARPU_ASSERT_MSG(!handle->per_node[i].request[j], "request for handle %p pending from %u to %u while invalidating data!", handle, j, i); + } +#endif + + unsigned node; + + for (node = 0; node < STARPU_MAXNODES; node++) + { + struct _starpu_data_replicate *local = &handle->per_node[node]; + + if (local->state != STARPU_INVALID) + _STARPU_TRACE_DATA_STATE_INVALID(handle, node); + local->state = STARPU_INVALID; + local->initialized = 0; + } + + if (handle->per_worker) + { + unsigned worker; + unsigned nworkers = starpu_worker_get_count(); + for (worker = 0; worker < nworkers; worker++) + { + struct _starpu_data_replicate *local = &handle->per_worker[worker]; + + local->state = STARPU_INVALID; + } + } +} + + +static void _starpu_data_invalidate(void *data) +{ + starpu_data_handle_t handle = data; + size_t size = _starpu_data_get_alloc_size(handle); + + _starpu_spin_lock(&handle->header_lock); + + //_STARPU_DEBUG("Really invalidating data %p\n", data); + + __starpu_data_deinitialize(handle); + + unsigned node; + + for (node = 0; node < STARPU_MAXNODES; node++) + { + struct _starpu_data_replicate *local = &handle->per_node[node]; + + if (local->refcnt > 1) + /* Avoid freeing chunk still in use by others than this function */ + continue; + + if (local->mc && local->allocated && local->automatically_allocated) + { + unsigned mapping; + for (mapping = 0; mapping < STARPU_MAXNODES; mapping++) + if (handle->per_node[mapping].mapped == (int) node) + break; + + if (mapping == STARPU_MAXNODES) + { + /* free the data copy in a lazy fashion */ + _starpu_request_mem_chunk_removal(handle, local, node, size); + } + } + } + + if (handle->per_worker) + { + unsigned worker; + unsigned nworkers = starpu_worker_get_count(); + for (worker = 0; worker < nworkers; worker++) + { + struct _starpu_data_replicate *local = &handle->per_worker[worker]; + + if (local->mc && local->allocated && local->automatically_allocated && local->refcnt <= 1) + /* free the data copy in a lazy fashion */ + _starpu_request_mem_chunk_removal(handle, local, starpu_worker_get_memory_node(worker), size); + } + } + + _starpu_spin_unlock(&handle->header_lock); + + starpu_data_release_on_node(handle, STARPU_ACQUIRE_NO_NODE_LOCK_ALL); +} + +static void _starpu_data_deinitialize(void *data) +{ + starpu_data_handle_t handle = data; + + _starpu_spin_lock(&handle->header_lock); + + //_STARPU_DEBUG("Really deinitializing data %p\n", data); + + __starpu_data_deinitialize(handle); + + unsigned node; + + for (node = 0; node < STARPU_MAXNODES; node++) + { + struct _starpu_data_replicate *local = &handle->per_node[node]; + + if (local->mc && local->allocated && local->automatically_allocated) + /* note that the data is now clean */ + _starpu_memchunk_clean(local->mc, node); + } + + if (handle->per_worker) + { + unsigned worker; + unsigned nworkers = starpu_worker_get_count(); + for (worker = 0; worker < nworkers; worker++) + { + struct _starpu_data_replicate *local = &handle->per_worker[worker]; + + if (local->mc && local->allocated && local->automatically_allocated) + /* note that the data is now clean */ + _starpu_memchunk_clean(local->mc, starpu_worker_get_memory_node(worker)); + } + } + + _starpu_spin_unlock(&handle->header_lock); + + starpu_data_release_on_node(handle, STARPU_ACQUIRE_NO_NODE_LOCK_ALL); +} + +void starpu_data_deinitialize(starpu_data_handle_t handle) +{ + STARPU_ASSERT(handle); + + starpu_data_acquire_on_node(handle, STARPU_ACQUIRE_NO_NODE_LOCK_ALL, STARPU_W); + + _starpu_data_deinitialize(handle); + + handle->initialized = 0; +} + +void starpu_data_deinitialize_submit(starpu_data_handle_t handle) +{ + STARPU_ASSERT(handle); + + starpu_data_acquire_on_node_cb(handle, STARPU_ACQUIRE_NO_NODE_LOCK_ALL, STARPU_W, _starpu_data_deinitialize, handle); + + handle->initialized = 0; +} + +void _starpu_data_deinitialize_submit_noplan(starpu_data_handle_t handle) +{ + STARPU_ASSERT(handle); + + starpu_data_acquire_on_node_cb(handle, STARPU_ACQUIRE_NO_NODE_LOCK_ALL, STARPU_W | STARPU_NOPLAN, _starpu_data_deinitialize, handle); + + handle->initialized = 0; +} + +void starpu_data_invalidate(starpu_data_handle_t handle) +{ + STARPU_ASSERT(handle); + + starpu_data_acquire_on_node(handle, STARPU_ACQUIRE_NO_NODE_LOCK_ALL, STARPU_W); + + _starpu_data_invalidate(handle); + + handle->initialized = 0; +} + +void starpu_data_invalidate_submit(starpu_data_handle_t handle) +{ + STARPU_ASSERT(handle); + + starpu_data_acquire_on_node_cb(handle, STARPU_ACQUIRE_NO_NODE_LOCK_ALL, STARPU_W, _starpu_data_invalidate, handle); + + handle->initialized = 0; +} + +void _starpu_data_invalidate_submit_noplan(starpu_data_handle_t handle) +{ + STARPU_ASSERT(handle); + + starpu_data_acquire_on_node_cb(handle, STARPU_ACQUIRE_NO_NODE_LOCK_ALL, STARPU_W | STARPU_NOPLAN, _starpu_data_invalidate, handle); + + handle->initialized = 0; +} + +enum starpu_data_interface_id starpu_data_get_interface_id(starpu_data_handle_t handle) +{ + return handle->ops->interfaceid; +} + +void *starpu_data_get_interface_on_node(starpu_data_handle_t handle, unsigned memory_node) +{ + return handle->per_node[memory_node].data_interface; +} + +int starpu_data_interface_get_next_id(void) +{ + _data_interface_number += 1; + return _data_interface_number-1; +} + +int starpu_data_pack_node(starpu_data_handle_t handle, unsigned node, void **ptr, starpu_ssize_t *count) +{ + STARPU_ASSERT_MSG(handle->ops->pack_data, "The datatype interface %s (%d) does not have a pack operation", handle->ops->name, handle->ops->interfaceid); + return handle->ops->pack_data(handle, node, ptr, count); +} + +int starpu_data_pack(starpu_data_handle_t handle, void **ptr, starpu_ssize_t *count) +{ + return starpu_data_pack_node(handle, starpu_worker_get_local_memory_node(), ptr, count); +} + +int starpu_data_peek_node(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count) +{ + STARPU_ASSERT_MSG(handle->ops->peek_data, "The datatype interface %s (%d) does not have a peek operation", handle->ops->name, handle->ops->interfaceid); + int ret; + ret = handle->ops->peek_data(handle, node, ptr, count); + return ret; +} + +int starpu_data_peek(starpu_data_handle_t handle, void *ptr, size_t count) +{ + return starpu_data_peek_node(handle, starpu_worker_get_local_memory_node(), ptr, count); +} + +int starpu_data_unpack_node(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count) +{ + STARPU_ASSERT_MSG(handle->ops->unpack_data, "The datatype interface %s (%d) does not have an unpack operation", handle->ops->name, handle->ops->interfaceid); + int ret; + ret = handle->ops->unpack_data(handle, node, ptr, count); + return ret; +} + +int starpu_data_unpack(starpu_data_handle_t handle, void *ptr, size_t count) +{ + return starpu_data_unpack_node(handle, starpu_worker_get_local_memory_node(), ptr, count); +} + +size_t starpu_data_get_size(starpu_data_handle_t handle) +{ + return handle->ops->get_size(handle); +} + +size_t starpu_data_get_alloc_size(starpu_data_handle_t handle) +{ + if (handle->ops->get_alloc_size) + return handle->ops->get_alloc_size(handle); + else + return handle->ops->get_size(handle); +} + +void starpu_data_set_name(starpu_data_handle_t handle STARPU_ATTRIBUTE_UNUSED, const char *name STARPU_ATTRIBUTE_UNUSED) +{ + _STARPU_TRACE_DATA_NAME(handle, name); +} + +int starpu_data_get_home_node(starpu_data_handle_t handle) +{ + return handle->home_node; +} + +void starpu_data_set_coordinates_array(starpu_data_handle_t handle, unsigned dimensions, int dims[]) +{ + unsigned i; + unsigned max_dimensions = sizeof(handle->coordinates)/sizeof(handle->coordinates[0]); + + if (dimensions > max_dimensions) + dimensions = max_dimensions; + + handle->dimensions = dimensions; + for (i = 0; i < dimensions; i++) + handle->coordinates[i] = dims[i]; + + _STARPU_TRACE_DATA_COORDINATES(handle, dimensions, dims); +} + +void starpu_data_set_coordinates(starpu_data_handle_t handle, unsigned dimensions, ...) +{ + int dims[dimensions]; + unsigned i; + va_list varg_list; + + va_start(varg_list, dimensions); + for (i = 0; i < dimensions; i++) + dims[i] = va_arg(varg_list, int); + va_end(varg_list); + + starpu_data_set_coordinates_array(handle, dimensions, dims); +} + +unsigned starpu_data_get_coordinates_array(starpu_data_handle_t handle, unsigned dimensions, int dims[]) +{ + unsigned i; + + if (dimensions > handle->dimensions) + dimensions = handle->dimensions; + + for (i = 0; i < dimensions; i++) + dims[i] = handle->coordinates[i]; + + return dimensions; +} + +void starpu_data_print(starpu_data_handle_t handle, unsigned node, FILE *stream) +{ + if (handle->ops == NULL) + fprintf(stream, "Undefined"); + else + { + switch (handle->ops->interfaceid) + { + case(STARPU_MATRIX_INTERFACE_ID): + fprintf(stream, "Matrix"); + break; + case(STARPU_BLOCK_INTERFACE_ID): + fprintf(stream, "Block"); + break; + case(STARPU_VECTOR_INTERFACE_ID): + fprintf(stream, "Vector"); + break; + case(STARPU_CSR_INTERFACE_ID): + fprintf(stream, "CSR"); + break; + case(STARPU_BCSR_INTERFACE_ID): + fprintf(stream, "BCSR"); + break; + case(STARPU_VARIABLE_INTERFACE_ID): + fprintf(stream, "Variable"); + break; + case(STARPU_VOID_INTERFACE_ID): + fprintf(stream, "Void"); + break; + case(STARPU_MULTIFORMAT_INTERFACE_ID): + fprintf(stream, "Multfiformat"); + break; + case(STARPU_COO_INTERFACE_ID): + fprintf(stream, "COO"); + break; + case(STARPU_TENSOR_INTERFACE_ID): + fprintf(stream, "Tensor"); + break; + case(STARPU_UNKNOWN_INTERFACE_ID): + fprintf(stream, "UNKNOWN"); + break; + default: + fprintf(stream, "User interface with id %d", handle->ops->interfaceid); + break; + } + } + void *data_interface = NULL; + if (starpu_data_test_if_allocated_on_node(handle, node)) + data_interface = starpu_data_get_interface_on_node(handle, node); + if (starpu_data_test_if_allocated_on_node(handle, handle->home_node)) + data_interface = starpu_data_get_interface_on_node(handle, handle->home_node); + if (handle->ops && handle->ops->describe && data_interface) + { + char buffer[1024]; + handle->ops->describe(data_interface, buffer, sizeof(buffer)); + fprintf(stream, " %s\n", buffer); + } + else + fprintf(stream, "\n"); + +} diff --git a/src/datawizard/interfaces/data_interface.h b/src/datawizard/interfaces/data_interface.h new file mode 100644 index 0000000..cae7ef0 --- /dev/null +++ b/src/datawizard/interfaces/data_interface.h @@ -0,0 +1,74 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __DATA_INTERFACE_H__ +#define __DATA_INTERFACE_H__ + +/** @file */ + +#include +#include +#include +#ifdef STARPU_OPENMP +#include +#endif + +#pragma GCC visibility push(hidden) + +/** Generic type representing an interface, for now it's only used before + * execution on message-passing devices but it can be useful in other cases. + */ +union _starpu_interface +{ + /* struct starpu_void_interface void; void doesn't have any data */ + struct starpu_variable_interface variable; + struct starpu_vector_interface vector; + struct starpu_matrix_interface matrix; + struct starpu_block_interface block; + struct starpu_tensor_interface tensor; + struct starpu_csr_interface csr; + struct starpu_bcsr_interface bcsr; + struct starpu_coo_interface coo; +}; + +/** Some data interfaces or filters use this interface internally */ +extern struct starpu_data_interface_ops starpu_interface_multiformat_ops; + +void _starpu_data_free_interfaces(starpu_data_handle_t handle); + +extern int _starpu_data_handle_init(starpu_data_handle_t handle, struct starpu_data_interface_ops *interface_ops, unsigned int mf_node); +void _starpu_data_initialize_per_worker(starpu_data_handle_t handle); + +extern struct starpu_arbiter *_starpu_global_arbiter; +extern void _starpu_data_interface_init(void); +extern int __starpu_data_check_not_busy(starpu_data_handle_t handle) STARPU_ATTRIBUTE_WARN_UNUSED_RESULT; +#define _starpu_data_check_not_busy(handle) \ + (STARPU_UNLIKELY(!handle->busy_count && \ + (handle->busy_waiting || handle->lazy_unregister)) ? \ + __starpu_data_check_not_busy(handle) : 0) +extern void _starpu_data_interface_shutdown(void); + +struct starpu_data_interface_ops *_starpu_data_interface_get_ops(unsigned interface_id) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; + +#define _starpu_data_is_multiformat_handle(handle) handle->ops->is_multiformat + +void _starpu_data_deinitialize_submit_noplan(starpu_data_handle_t handle); + +void _starpu_data_invalidate_submit_noplan(starpu_data_handle_t handle); + +#pragma GCC visibility pop + +#endif // __DATA_INTERFACE_H__ diff --git a/src/datawizard/interfaces/matrix_filters.c b/src/datawizard/interfaces/matrix_filters.c new file mode 100644 index 0000000..089348b --- /dev/null +++ b/src/datawizard/interfaces/matrix_filters.c @@ -0,0 +1,230 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2010-2010 Mehdi Juhoor + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include + +/* + * an example of a dummy partition function : blocks ... + */ + +static void _starpu_matrix_filter_block(int dim, void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned id, unsigned nchunks, uintptr_t shadow_size) +{ + struct starpu_matrix_interface *matrix_father = (struct starpu_matrix_interface *) father_interface; + struct starpu_matrix_interface *matrix_child = (struct starpu_matrix_interface *) child_interface; + + unsigned blocksize; + /* the element will be split, in case horizontal, it's nx, in case vertical, it's ny*/ + uint32_t nn; + uint32_t nx; + uint32_t ny; + + switch(dim) + { + /* horizontal*/ + case 1: + /* actual number of elements */ + nx = matrix_father->nx - 2 * shadow_size; + ny = matrix_father->ny; + nn = nx; + blocksize = 1; + break; + /* vertical*/ + case 2: + nx = matrix_father->nx; + /* actual number of elements */ + ny = matrix_father->ny - 2 * shadow_size; + nn = ny; + blocksize = matrix_father->ld; + break; + default: + STARPU_ASSERT_MSG(0, "Unknown value for dim"); + } + + size_t elemsize = matrix_father->elemsize; + + STARPU_ASSERT_MSG(nchunks <= nn, "cannot split %u elements in %u parts", nn, nchunks); + + uint32_t child_nn; + size_t offset; + + starpu_filter_nparts_compute_chunk_size_and_offset(nn, nchunks, elemsize, id, blocksize, &child_nn, &offset); + + child_nn += 2 * shadow_size; + + STARPU_ASSERT_MSG(matrix_father->id == STARPU_MATRIX_INTERFACE_ID, "%s can only be applied on a matrix data", __func__); + + /* update the child's interface */ + matrix_child->id = matrix_father->id; + + switch(dim) + { + case 1: + matrix_child->nx = child_nn; + matrix_child->ny = ny; + break; + case 2: + matrix_child->nx = nx; + matrix_child->ny = child_nn; + break; + default: + STARPU_ASSERT_MSG(0, "Unknown value for dim"); + } + + matrix_child->elemsize = elemsize; + + /* is the information on this node valid ? */ + if (matrix_father->dev_handle) + { + if (matrix_father->ptr) + matrix_child->ptr = matrix_father->ptr + offset; + matrix_child->ld = matrix_father->ld; + matrix_child->dev_handle = matrix_father->dev_handle; + matrix_child->offset = matrix_father->offset + offset; + matrix_child->allocsize = matrix_child->ld * matrix_child->ny * elemsize; + } + else + matrix_child->allocsize = matrix_child->nx * matrix_child->ny * elemsize; +} + +void starpu_matrix_filter_block(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned id, unsigned nchunks) +{ + _starpu_matrix_filter_block(1, father_interface, child_interface, f, id, nchunks, 0); +} + +/* + * an example of a dummy partition function : blocks ... + */ +void starpu_matrix_filter_block_shadow(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned id, unsigned nchunks) +{ + uintptr_t shadow_size = (uintptr_t) f->filter_arg_ptr; + + _starpu_matrix_filter_block(1, father_interface, child_interface, f, id, nchunks, shadow_size); +} + +void starpu_matrix_filter_vertical_block(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned id, unsigned nchunks) +{ + _starpu_matrix_filter_block(2, father_interface, child_interface, f, id, nchunks, 0); +} + +void starpu_matrix_filter_vertical_block_shadow(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned id, unsigned nchunks) +{ + uintptr_t shadow_size = (uintptr_t) f->filter_arg_ptr; + + _starpu_matrix_filter_block(2, father_interface, child_interface, f, id, nchunks, shadow_size); +} + +void starpu_matrix_filter_pick_vector_y(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned id, unsigned nchunks) +{ + struct starpu_matrix_interface *matrix_father = (struct starpu_matrix_interface *) father_interface; + /* each chunk becomes a vector */ + struct starpu_vector_interface *vector_child = (struct starpu_vector_interface *) child_interface; + + unsigned blocksize; + + uint32_t nx; + uint32_t ny; + + /* actual number of elements */ + nx = matrix_father->nx; + ny = matrix_father->ny; + blocksize = nx; + + size_t elemsize = matrix_father->elemsize; + + uintptr_t chunk_pos = (uintptr_t)f->filter_arg_ptr; + + STARPU_ASSERT_MSG((chunk_pos + nchunks) <= ny, "cannot get %u vectors", nchunks); + STARPU_ASSERT_MSG((chunk_pos + id) < ny, "the chosen vector should be in the matrix"); + + size_t offset = (chunk_pos + id) * blocksize * elemsize; + + STARPU_ASSERT_MSG(matrix_father->id == STARPU_MATRIX_INTERFACE_ID, "%s can only be applied on a matrix data", __func__); + + /* update the child's interface */ + vector_child->id = STARPU_VECTOR_INTERFACE_ID; + vector_child->nx = nx; + vector_child->elemsize = elemsize; + vector_child->allocsize = vector_child->nx * elemsize; + + /* is the information on this node valid ? */ + if (matrix_father->dev_handle) + { + if (matrix_father->ptr) + vector_child->ptr = matrix_father->ptr + offset; + vector_child->dev_handle = matrix_father->dev_handle; + vector_child->offset = matrix_father->offset + offset; + } +} + +struct starpu_data_interface_ops *starpu_matrix_filter_pick_vector_child_ops(STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, STARPU_ATTRIBUTE_UNUSED unsigned child) +{ + return &starpu_interface_vector_ops; +} + +void starpu_matrix_filter_pick_variable(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, STARPU_ATTRIBUTE_UNUSED unsigned id, STARPU_ATTRIBUTE_UNUSED unsigned nchunks) +{ + struct starpu_matrix_interface *matrix_father = (struct starpu_matrix_interface *) father_interface; + /* each chunk becomes a variable */ + struct starpu_variable_interface *variable_child = (struct starpu_variable_interface *) child_interface; + + unsigned blocksize; + + uint32_t nx; + uint32_t ld; + uint32_t ny; + + /* actual number of elements */ + nx = matrix_father->nx; + ld = matrix_father->ld; + ny = matrix_father->ny; + blocksize = ld; + + size_t elemsize = matrix_father->elemsize; + + uint32_t* chunk_pos = (uint32_t*)f->filter_arg_ptr; + // int i; + // for(i=0; i<2; i++) + // { + // printf("pos is %d\n", chunk_pos[i]); + // } + + STARPU_ASSERT_MSG((chunk_pos[0] < nx)&&(chunk_pos[1] < ny), "the chosen variable should be in the matrix"); + + size_t offset = (((chunk_pos[1]) * blocksize) + chunk_pos[0]) * elemsize; + + STARPU_ASSERT_MSG(matrix_father->id == STARPU_MATRIX_INTERFACE_ID, "%s can only be applied on a matrix data", __func__); + + /* update the child's interface */ + variable_child->id = STARPU_VARIABLE_INTERFACE_ID; + variable_child->elemsize = elemsize; + + /* is the information on this node valid ? */ + if (matrix_father->dev_handle) + { + if (matrix_father->ptr) + variable_child->ptr = matrix_father->ptr + offset; + variable_child->dev_handle = matrix_father->dev_handle; + variable_child->offset = matrix_father->offset + offset; + } +} + +struct starpu_data_interface_ops *starpu_matrix_filter_pick_variable_child_ops(STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, STARPU_ATTRIBUTE_UNUSED unsigned child) +{ + return &starpu_interface_variable_ops; +} diff --git a/src/datawizard/interfaces/matrix_interface.c b/src/datawizard/interfaces/matrix_interface.c new file mode 100644 index 0000000..f836ffe --- /dev/null +++ b/src/datawizard/interfaces/matrix_interface.c @@ -0,0 +1,605 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#ifdef BUILDING_STARPU +#include +#endif + +static int copy_any_to_any(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *async_data); +static int map_matrix(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node); +static int unmap_matrix(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node); +static int update_map_matrix(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node); + +static const struct starpu_data_copy_methods matrix_copy_data_methods_s = +{ + .any_to_any = copy_any_to_any, +}; + +static void matrix_init(void *data_interface); +static void register_matrix_handle(starpu_data_handle_t handle, int home_node, void *data_interface); +static void *matrix_to_pointer(void *data_interface, unsigned node); +static starpu_ssize_t allocate_matrix_buffer_on_node(void *data_interface_, unsigned dst_node); +static void free_matrix_buffer_on_node(void *data_interface, unsigned node); +static void cache_matrix_buffer_on_node(void *cached_interface, void *src_data_interface, unsigned node); +static void reuse_matrix_buffer_on_node(void *dst_data_interface, const void *cached_interface, unsigned node); +static size_t matrix_interface_get_size(starpu_data_handle_t handle); +static size_t matrix_interface_get_alloc_size(starpu_data_handle_t handle); +static uint32_t footprint_matrix_interface_crc32(starpu_data_handle_t handle); +static uint32_t alloc_footprint_matrix_interface_crc32(starpu_data_handle_t handle); +static int matrix_compare(void *data_interface_a, void *data_interface_b); +static int matrix_alloc_compare(void *data_interface_a, void *data_interface_b); +static void display_matrix_interface(starpu_data_handle_t handle, FILE *f); +static int pack_matrix_handle(starpu_data_handle_t handle, unsigned node, void **ptr, starpu_ssize_t *count); +static int peek_matrix_handle(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count); +static int unpack_matrix_handle(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count); +static starpu_ssize_t describe(void *data_interface, char *buf, size_t size); + +struct starpu_data_interface_ops starpu_interface_matrix_ops = +{ + .init = matrix_init, + .register_data_handle = register_matrix_handle, + .allocate_data_on_node = allocate_matrix_buffer_on_node, + .to_pointer = matrix_to_pointer, + .free_data_on_node = free_matrix_buffer_on_node, + .cache_data_on_node = cache_matrix_buffer_on_node, + .reuse_data_on_node = reuse_matrix_buffer_on_node, + .map_data = map_matrix, + .unmap_data = unmap_matrix, + .update_map = update_map_matrix, + .copy_methods = &matrix_copy_data_methods_s, + .get_size = matrix_interface_get_size, + .get_alloc_size = matrix_interface_get_alloc_size, + .footprint = footprint_matrix_interface_crc32, + .alloc_footprint = alloc_footprint_matrix_interface_crc32, + .compare = matrix_compare, + .alloc_compare = matrix_alloc_compare, + .interfaceid = STARPU_MATRIX_INTERFACE_ID, + .interface_size = sizeof(struct starpu_matrix_interface), + .display = display_matrix_interface, + .pack_data = pack_matrix_handle, + .peek_data = peek_matrix_handle, + .unpack_data = unpack_matrix_handle, + .describe = describe, + .name = "STARPU_MATRIX_INTERFACE", + .pack_meta = NULL, + .unpack_meta = NULL, + .free_meta = NULL +}; + +static void matrix_init(void *data_interface) +{ + struct starpu_matrix_interface *matrix_interface = data_interface; + matrix_interface->allocsize = -1; +} + +static void register_matrix_handle(starpu_data_handle_t handle, int home_node, void *data_interface) +{ + struct starpu_matrix_interface *matrix_interface = (struct starpu_matrix_interface *) data_interface; + + int node; + for (node = 0; node < STARPU_MAXNODES; node++) + { + struct starpu_matrix_interface *local_interface = (struct starpu_matrix_interface *) + starpu_data_get_interface_on_node(handle, node); + + if (node == home_node) + { + local_interface->ptr = matrix_interface->ptr; + local_interface->dev_handle = matrix_interface->dev_handle; + local_interface->offset = matrix_interface->offset; + local_interface->ld = matrix_interface->ld; + } + else + { + local_interface->ptr = 0; + local_interface->dev_handle = 0; + local_interface->offset = 0; + local_interface->ld = 0; + } + + local_interface->id = matrix_interface->id; + local_interface->nx = matrix_interface->nx; + local_interface->ny = matrix_interface->ny; + local_interface->elemsize = matrix_interface->elemsize; + local_interface->allocsize = matrix_interface->allocsize; + } +} + +static void *matrix_to_pointer(void *data_interface, unsigned node) +{ + (void) node; + struct starpu_matrix_interface *matrix_interface = data_interface; + + return (void*) matrix_interface->ptr; +} + +/* declare a new data with the matrix interface */ +void starpu_matrix_data_register_allocsize(starpu_data_handle_t *handleptr, int home_node, + uintptr_t ptr, uint32_t ld, uint32_t nx, + uint32_t ny, size_t elemsize, size_t allocsize) +{ + STARPU_ASSERT_MSG(ld >= nx, "ld = %u should not be less than nx = %u.", ld, nx); + struct starpu_matrix_interface matrix_interface = + { + .id = STARPU_MATRIX_INTERFACE_ID, + .ptr = ptr, + .ld = ld, + .nx = nx, + .ny = ny, + .elemsize = elemsize, + .dev_handle = ptr, + .offset = 0, + .allocsize = allocsize, + }; +#ifndef STARPU_SIMGRID + if (home_node >= 0 && starpu_node_get_kind(home_node) == STARPU_CPU_RAM) + { + if (nx && ny && elemsize) + { + STARPU_ASSERT_ACCESSIBLE(ptr); + STARPU_ASSERT_ACCESSIBLE(ptr + (ny-1)*ld*elemsize + nx*elemsize - 1); + } + } +#endif + + starpu_data_register(handleptr, home_node, &matrix_interface, &starpu_interface_matrix_ops); +} + +void starpu_matrix_data_register(starpu_data_handle_t *handleptr, int home_node, + uintptr_t ptr, uint32_t ld, uint32_t nx, + uint32_t ny, size_t elemsize) +{ + starpu_matrix_data_register_allocsize(handleptr, home_node, ptr, ld, nx, ny, elemsize, nx * ny * elemsize); +} + +void starpu_matrix_ptr_register(starpu_data_handle_t handle, unsigned node, + uintptr_t ptr, uintptr_t dev_handle, size_t offset, uint32_t ld) +{ + struct starpu_matrix_interface *matrix_interface = starpu_data_get_interface_on_node(handle, node); + starpu_data_ptr_register(handle, node); + matrix_interface->ptr = ptr; + matrix_interface->dev_handle = dev_handle; + matrix_interface->offset = offset; + matrix_interface->ld = ld; +} + +static uint32_t footprint_matrix_interface_crc32(starpu_data_handle_t handle) +{ + return starpu_hash_crc32c_be(starpu_matrix_get_nx(handle), starpu_matrix_get_ny(handle)); +} + +static uint32_t alloc_footprint_matrix_interface_crc32(starpu_data_handle_t handle) +{ + return starpu_hash_crc32c_be(starpu_matrix_get_allocsize(handle), 0); +} + +static int matrix_compare(void *data_interface_a, void *data_interface_b) +{ + struct starpu_matrix_interface *matrix_a = (struct starpu_matrix_interface *) data_interface_a; + struct starpu_matrix_interface *matrix_b = (struct starpu_matrix_interface *) data_interface_b; + + /* Two matrices are considered compatible if they have the same size */ + return (matrix_a->nx == matrix_b->nx) + && (matrix_a->ny == matrix_b->ny) + && (matrix_a->elemsize == matrix_b->elemsize); +} + +static int matrix_alloc_compare(void *data_interface_a, void *data_interface_b) +{ + struct starpu_matrix_interface *matrix_a = (struct starpu_matrix_interface *) data_interface_a; + struct starpu_matrix_interface *matrix_b = (struct starpu_matrix_interface *) data_interface_b; + + /* Two matrices are considered allocation-compatible if they have the same size */ + return (matrix_a->allocsize == matrix_b->allocsize); +} + +static void display_matrix_interface(starpu_data_handle_t handle, FILE *f) +{ + struct starpu_matrix_interface *matrix_interface = (struct starpu_matrix_interface *) + starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + + fprintf(f, "%u\t%u\t", matrix_interface->nx, matrix_interface->ny); +} + +#define IS_CONTIGUOUS_MATRIX(nx, ny, ld) ((nx) == (ld)) + +//#define DYNAMIC_MATRICES + +struct pack_matrix_header +{ +#ifdef DYNAMIC_MATRICES + /* Receiving matrices with different sizes from MPI */ + /* FIXME: that would break alignment for O_DIRECT disk access... + * while in the disk case, we do know the matrix size anyway */ + /* FIXME: rather make MPI pack the data interface in the envelope for us? */ + uint32_t nx; + uint32_t ny; + size_t elemsize; +#endif +}; + +static int pack_matrix_handle(starpu_data_handle_t handle, unsigned node, void **ptr, starpu_ssize_t *count) +{ + STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); + + struct starpu_matrix_interface *matrix_interface = (struct starpu_matrix_interface *) + starpu_data_get_interface_on_node(handle, node); + + uint32_t ld = matrix_interface->ld; + uint32_t nx = matrix_interface->nx; + uint32_t ny = matrix_interface->ny; + size_t elemsize = matrix_interface->elemsize; + + *count = nx*ny*elemsize + sizeof(struct pack_matrix_header); + + if (ptr != NULL) + { + char *matrix = (void *)matrix_interface->ptr; + + *ptr = (void *)starpu_malloc_on_node_flags(node, *count, 0); + + struct pack_matrix_header *header = *ptr; +#ifdef DYNAMIC_MATRICES + header->nx = nx; + header->ny = ny; + header->elemsize = elemsize; +#endif + + char *cur = (char*) *ptr + sizeof(*header); + + if (IS_CONTIGUOUS_MATRIX(nx, ny, ld)) + memcpy(cur, matrix, nx*ny*elemsize); + else + { + uint32_t y; + for(y=0 ; yld; + uint32_t nx = matrix_interface->nx; + uint32_t ny = matrix_interface->ny; + size_t elemsize = matrix_interface->elemsize; + + struct pack_matrix_header *header = ptr; + +#ifdef DYNAMIC_MATRICES + STARPU_ASSERT(count >= sizeof(*header)); + + if (IS_CONTIGUOUS_MATRIX(nx, ny, ld)) + { + /* We can store whatever can fit */ + + STARPU_ASSERT_MSG(header->elemsize == elemsize, + "Data element size %u needs to be same as the received data element size %u", + (unsigned) elemsize, (unsigned) header->elemsize); + + STARPU_ASSERT_MSG(header->nx * header->ny * header->elemsize <= matrix_interface->allocsize, + "Initial size of data %lu needs to be big enough for received data %ux%ux%u", + (unsigned long) matrix_interface->allocsize, + (unsigned) header->nx, (unsigned) header->ny, + (unsigned) header->elemsize); + + /* Better keep it contiguous */ + matrix_interface->ld = ld = header->nx; + } + else + { + STARPU_ASSERT_MSG(header->nx <= nx, + "Initial nx %u of data needs to be big enough for received data nx %u\n", + nx, header->nx); + STARPU_ASSERT_MSG(header->ny <= ny, + "Initial ny %u of data needs to be big enough for received data ny %u\n", + ny, header->ny); + } + + matrix_interface->nx = nx = header->nx; + matrix_interface->ny = ny = header->ny; +#endif + + char *cur = (char*) ptr + sizeof(*header); + + STARPU_ASSERT(count == sizeof(*header) + elemsize * nx * ny); + + char *matrix = (void *)matrix_interface->ptr; + + if (IS_CONTIGUOUS_MATRIX(nx, ny, ld)) + memcpy(matrix, ptr, nx*ny*elemsize); + else + { + uint32_t y; + for(y=0 ; yid == STARPU_MATRIX_INTERFACE_ID, "Error. The given data is not a matrix."); +#endif + + return matrix_interface->nx * matrix_interface->ny * matrix_interface->elemsize; +} + +static size_t matrix_interface_get_alloc_size(starpu_data_handle_t handle) +{ + struct starpu_matrix_interface *matrix_interface = (struct starpu_matrix_interface *) + starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + +#ifdef STARPU_DEBUG + STARPU_ASSERT_MSG(matrix_interface->id == STARPU_MATRIX_INTERFACE_ID, "Error. The given data is not a matrix."); +#endif + + STARPU_ASSERT_MSG(matrix_interface->allocsize != (size_t)-1, "The matrix allocation size needs to be defined"); + + return matrix_interface->allocsize; +} + +/* offer an access to the data parameters */ +uint32_t starpu_matrix_get_nx(starpu_data_handle_t handle) +{ + struct starpu_matrix_interface *matrix_interface = (struct starpu_matrix_interface *) + starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + +#ifdef STARPU_DEBUG + STARPU_ASSERT_MSG(matrix_interface->id == STARPU_MATRIX_INTERFACE_ID, "Error. The given data is not a matrix."); +#endif + + return matrix_interface->nx; +} + +uint32_t starpu_matrix_get_ny(starpu_data_handle_t handle) +{ + struct starpu_matrix_interface *matrix_interface = (struct starpu_matrix_interface *) + starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + +#ifdef STARPU_DEBUG + STARPU_ASSERT_MSG(matrix_interface->id == STARPU_MATRIX_INTERFACE_ID, "Error. The given data is not a matrix."); +#endif + + return matrix_interface->ny; +} + +uint32_t starpu_matrix_get_local_ld(starpu_data_handle_t handle) +{ + unsigned node; + node = starpu_worker_get_local_memory_node(); + + STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); + + struct starpu_matrix_interface *matrix_interface = (struct starpu_matrix_interface *) + starpu_data_get_interface_on_node(handle, node); + +#ifdef STARPU_DEBUG + STARPU_ASSERT_MSG(matrix_interface->id == STARPU_MATRIX_INTERFACE_ID, "Error. The given data is not a matrix."); +#endif + + return matrix_interface->ld; +} + +uintptr_t starpu_matrix_get_local_ptr(starpu_data_handle_t handle) +{ + unsigned node; + node = starpu_worker_get_local_memory_node(); + + STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); + + struct starpu_matrix_interface *matrix_interface = (struct starpu_matrix_interface *) + starpu_data_get_interface_on_node(handle, node); + +#ifdef STARPU_DEBUG + STARPU_ASSERT_MSG(matrix_interface->id == STARPU_MATRIX_INTERFACE_ID, "Error. The given data is not a matrix."); +#endif + + return matrix_interface->ptr; +} + +size_t starpu_matrix_get_elemsize(starpu_data_handle_t handle) +{ + struct starpu_matrix_interface *matrix_interface = (struct starpu_matrix_interface *) + starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + +#ifdef STARPU_DEBUG + STARPU_ASSERT_MSG(matrix_interface->id == STARPU_MATRIX_INTERFACE_ID, "Error. The given data is not a matrix."); +#endif + + return matrix_interface->elemsize; +} + +size_t starpu_matrix_get_allocsize(starpu_data_handle_t handle) +{ + struct starpu_matrix_interface *matrix_interface = (struct starpu_matrix_interface *) + starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + +#ifdef STARPU_DEBUG + STARPU_ASSERT_MSG(matrix_interface->id == STARPU_MATRIX_INTERFACE_ID, "Error. The given data is not a matrix."); +#endif + + return matrix_interface->allocsize; +} + +/* memory allocation/deallocation primitives for the matrix interface */ + +/* returns the size of the allocated area */ +static starpu_ssize_t allocate_matrix_buffer_on_node(void *data_interface_, unsigned dst_node) +{ + uintptr_t addr = 0, handle; + + struct starpu_matrix_interface *matrix_interface = (struct starpu_matrix_interface *) data_interface_; + + uint32_t ld = matrix_interface->nx; // by default + + starpu_ssize_t allocated_memory = matrix_interface->allocsize; + handle = starpu_malloc_on_node(dst_node, allocated_memory); + + if (!handle) + return -ENOMEM; + + if (starpu_node_get_kind(dst_node) != STARPU_OPENCL_RAM) + addr = handle; + + /* update the data properly in consequence */ + matrix_interface->ptr = addr; + matrix_interface->dev_handle = handle; + matrix_interface->offset = 0; + matrix_interface->ld = ld; + + return allocated_memory; +} + +static void free_matrix_buffer_on_node(void *data_interface, unsigned node) +{ + struct starpu_matrix_interface *matrix_interface = (struct starpu_matrix_interface *) data_interface; + + starpu_free_on_node(node, matrix_interface->dev_handle, matrix_interface->allocsize); + matrix_interface->ptr = 0; + matrix_interface->dev_handle = 0; +} + +static void cache_matrix_buffer_on_node(void *cached_interface, void *src_data_interface, unsigned node STARPU_ATTRIBUTE_UNUSED) +{ + struct starpu_matrix_interface *cached_matrix_interface = cached_interface; + struct starpu_matrix_interface *src_matrix_interface = src_data_interface; + + cached_matrix_interface->ptr = src_matrix_interface->ptr; + src_matrix_interface->ptr = 0; + cached_matrix_interface->dev_handle = src_matrix_interface->dev_handle; + src_matrix_interface->dev_handle = 0; + cached_matrix_interface->allocsize = src_matrix_interface->allocsize; + STARPU_ASSERT(src_matrix_interface->offset == 0); +} + +static void reuse_matrix_buffer_on_node(void *dst_data_interface, const void *cached_interface, unsigned node STARPU_ATTRIBUTE_UNUSED) +{ + struct starpu_matrix_interface *dst_matrix_interface = dst_data_interface; + const struct starpu_matrix_interface *cached_matrix_interface = cached_interface; + + dst_matrix_interface->ptr = cached_matrix_interface->ptr; + dst_matrix_interface->dev_handle = cached_matrix_interface->dev_handle; + dst_matrix_interface->offset = 0; + dst_matrix_interface->ld = dst_matrix_interface->nx; // by default +} + +static int map_matrix(void *src_interface, unsigned src_node, + void *dst_interface, unsigned dst_node) +{ + struct starpu_matrix_interface *src_matrix = src_interface; + struct starpu_matrix_interface *dst_matrix = dst_interface; + int ret; + uintptr_t mapped; + + /* map area ld*(ny-1)+nx */ + mapped = starpu_interface_map(src_matrix->dev_handle, src_matrix->offset, src_node, dst_node, (src_matrix->ld*(src_matrix->ny-1)+src_matrix->nx)*src_matrix->elemsize, &ret); + if (mapped) + { + dst_matrix->dev_handle = mapped; + dst_matrix->offset = 0; + if (starpu_node_get_kind(dst_node) != STARPU_OPENCL_RAM) + dst_matrix->ptr = mapped; + dst_matrix->ld = src_matrix->ld; + return 0; + } + return ret; +} + +static int unmap_matrix(void *src_interface, unsigned src_node, + void *dst_interface, unsigned dst_node) +{ + struct starpu_matrix_interface *src_matrix = src_interface; + struct starpu_matrix_interface *dst_matrix = dst_interface; + + int ret = starpu_interface_unmap(src_matrix->dev_handle, src_matrix->offset, src_node, dst_matrix->dev_handle, dst_node, (src_matrix->ld*(src_matrix->ny-1)+src_matrix->nx)*src_matrix->elemsize); + dst_matrix->dev_handle = 0; + + return ret; +} + +static int update_map_matrix(void *src_interface, unsigned src_node, + void *dst_interface, unsigned dst_node) +{ + struct starpu_matrix_interface *src_matrix = src_interface; + struct starpu_matrix_interface *dst_matrix = dst_interface; + + return starpu_interface_update_map(src_matrix->dev_handle, src_matrix->offset, src_node, dst_matrix->dev_handle, dst_matrix->offset, dst_node, (src_matrix->ld*(src_matrix->ny-1)+src_matrix->nx)*src_matrix->elemsize); +} + +static int copy_any_to_any(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *async_data) +{ + struct starpu_matrix_interface *src_matrix = (struct starpu_matrix_interface *) src_interface; + struct starpu_matrix_interface *dst_matrix = (struct starpu_matrix_interface *) dst_interface; + int ret = 0; + + uint32_t nx = dst_matrix->nx; + uint32_t ny = dst_matrix->ny; + size_t elemsize = dst_matrix->elemsize; + + uint32_t ld_src = src_matrix->ld; + uint32_t ld_dst = dst_matrix->ld; + + if (starpu_interface_copy2d(src_matrix->dev_handle, src_matrix->offset, src_node, + dst_matrix->dev_handle, dst_matrix->offset, dst_node, + nx * elemsize, + ny, ld_src * elemsize, ld_dst * elemsize, + async_data)) + ret = -EAGAIN; + + starpu_interface_data_copy(src_node, dst_node, (size_t)nx*ny*elemsize); + + return ret; +} + +static starpu_ssize_t describe(void *data_interface, char *buf, size_t size) +{ + struct starpu_matrix_interface *matrix = (struct starpu_matrix_interface *) data_interface; + return snprintf(buf, size, "M%ux%ux%u", + (unsigned) matrix->nx, + (unsigned) matrix->ny, + (unsigned) matrix->elemsize); +} diff --git a/src/datawizard/interfaces/multiformat_interface.c b/src/datawizard/interfaces/multiformat_interface.c new file mode 100644 index 0000000..bcf411d --- /dev/null +++ b/src/datawizard/interfaces/multiformat_interface.c @@ -0,0 +1,652 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* Driver porters: adding your driver here is optional, only needed for the support of multiple formats. */ + +#include +#ifdef BUILDING_STARPU +#include +#endif + +/* Driver porters: adding your driver here is optional, only needed for the support of multiple formats. */ + +static int copy_ram_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node); +#ifdef STARPU_USE_CUDA +static int copy_ram_to_cuda(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node); +static int copy_cuda_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node); +static int copy_ram_to_cuda_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, cudaStream_t stream); +static int copy_cuda_to_ram_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, cudaStream_t stream); +static int copy_cuda_to_cuda(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED); +static int copy_cuda_to_cuda_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t stream); +#endif +#ifdef STARPU_USE_OPENCL +static int copy_ram_to_opencl(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node); +static int copy_opencl_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node); +static int copy_opencl_to_opencl(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node); +static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, cl_event *event); +static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, cl_event *event); +#endif + +static const struct starpu_data_copy_methods multiformat_copy_data_methods_s = +{ + .ram_to_ram = copy_ram_to_ram, +#ifdef STARPU_USE_CUDA + .ram_to_cuda = copy_ram_to_cuda, + .cuda_to_ram = copy_cuda_to_ram, + .ram_to_cuda_async = copy_ram_to_cuda_async, + .cuda_to_ram_async = copy_cuda_to_ram_async, + .cuda_to_cuda = copy_cuda_to_cuda, + .cuda_to_cuda_async = copy_cuda_to_cuda_async, +#else +#ifdef STARPU_SIMGRID + /* Enable GPU-GPU transfers in simgrid */ + .cuda_to_cuda_async = (void *)1, +#endif +#endif +#ifdef STARPU_USE_OPENCL + .ram_to_opencl = copy_ram_to_opencl, + .opencl_to_ram = copy_opencl_to_ram, + .opencl_to_opencl = copy_opencl_to_opencl, + .ram_to_opencl_async = copy_ram_to_opencl_async, + .opencl_to_ram_async = copy_opencl_to_ram_async, +#endif +}; + +static void register_multiformat_handle(starpu_data_handle_t handle, int home_node, void *data_interface); +static starpu_ssize_t allocate_multiformat_buffer_on_node(void *data_interface_, unsigned dst_node); +static void *multiformat_to_pointer(void *data_interface, unsigned node); +static void free_multiformat_buffer_on_node(void *data_interface, unsigned node); +static size_t multiformat_interface_get_size(starpu_data_handle_t handle); +static uint32_t footprint_multiformat_interface_crc32(starpu_data_handle_t handle); +static int multiformat_compare(void *data_interface_a, void *data_interface_b); +static void display_multiformat_interface(starpu_data_handle_t handle, FILE *f); +static uint32_t starpu_multiformat_get_nx(starpu_data_handle_t handle); + +static struct starpu_multiformat_data_interface_ops* +get_mf_ops(void *data_interface) +{ + struct starpu_multiformat_interface *mf; + mf = (struct starpu_multiformat_interface *) data_interface; + + return mf->ops; +} + +struct starpu_data_interface_ops starpu_interface_multiformat_ops = +{ + .register_data_handle = register_multiformat_handle, + .allocate_data_on_node = allocate_multiformat_buffer_on_node, + .to_pointer = multiformat_to_pointer, + .free_data_on_node = free_multiformat_buffer_on_node, + .copy_methods = &multiformat_copy_data_methods_s, + .get_size = multiformat_interface_get_size, + .footprint = footprint_multiformat_interface_crc32, + .compare = multiformat_compare, + .interfaceid = STARPU_MULTIFORMAT_INTERFACE_ID, + .interface_size = sizeof(struct starpu_multiformat_interface), + .display = display_multiformat_interface, + .is_multiformat = 1, + .get_mf_ops = get_mf_ops +}; + +static void *multiformat_to_pointer(void *data_interface, unsigned node) +{ + struct starpu_multiformat_interface *multiformat_interface = data_interface; + + switch(starpu_node_get_kind(node)) + { + case STARPU_CPU_RAM: + return multiformat_interface->cpu_ptr; +#ifdef STARPU_USE_CUDA + case STARPU_CUDA_RAM: + return multiformat_interface->cuda_ptr; +#endif +#ifdef STARPU_USE_OPENCL + case STARPU_OPENCL_RAM: + return multiformat_interface->opencl_ptr; +#endif + default: + STARPU_ABORT(); + } + return NULL; +} + +static void register_multiformat_handle(starpu_data_handle_t handle, int home_node, void *data_interface) +{ + struct starpu_multiformat_interface *multiformat_interface; + multiformat_interface = (struct starpu_multiformat_interface *) data_interface; + + int node; + for (node = 0; node < STARPU_MAXNODES; node++) + { + struct starpu_multiformat_interface *local_interface = + (struct starpu_multiformat_interface *) starpu_data_get_interface_on_node(handle, node); + + if (node == home_node) + { + local_interface->cpu_ptr = multiformat_interface->cpu_ptr; +#ifdef STARPU_USE_CUDA + local_interface->cuda_ptr = multiformat_interface->cuda_ptr; +#endif +#ifdef STARPU_USE_OPENCL + local_interface->opencl_ptr = multiformat_interface->opencl_ptr; +#endif + } + else + { + local_interface->cpu_ptr = NULL; +#ifdef STARPU_USE_CUDA + local_interface->cuda_ptr = NULL; +#endif +#ifdef STARPU_USE_OPENCL + local_interface->opencl_ptr = NULL; +#endif + } + local_interface->id = multiformat_interface->id; + local_interface->nx = multiformat_interface->nx; + local_interface->ops = multiformat_interface->ops; + } +} + +void starpu_multiformat_data_register(starpu_data_handle_t *handleptr, + int home_node, + void *ptr, + uint32_t nobjects, + struct starpu_multiformat_data_interface_ops *format_ops) +{ + struct starpu_multiformat_interface multiformat = + { + .id = STARPU_MULTIFORMAT_INTERFACE_ID, + .cpu_ptr = ptr, + .cuda_ptr = NULL, + .opencl_ptr = NULL, + .nx = nobjects, + .ops = format_ops + }; + + starpu_data_register(handleptr, home_node, &multiformat, &starpu_interface_multiformat_ops); +} + +static uint32_t footprint_multiformat_interface_crc32(starpu_data_handle_t handle) +{ + return starpu_hash_crc32c_be(starpu_multiformat_get_nx(handle), 0); +} + +static int multiformat_compare(void *data_interface_a, void *data_interface_b) +{ + struct starpu_multiformat_interface *multiformat_a = (struct starpu_multiformat_interface *) data_interface_a; + struct starpu_multiformat_interface *multiformat_b = (struct starpu_multiformat_interface *) data_interface_b; + + return (multiformat_a->nx == multiformat_b->nx) + && (multiformat_a->ops->cpu_elemsize == multiformat_b->ops->cpu_elemsize) +#ifdef STARPU_USE_CUDA + && (multiformat_a->ops->cuda_elemsize == multiformat_b->ops->cuda_elemsize) +#endif +#ifdef STARPU_USE_OPENCL + && (multiformat_a->ops->opencl_elemsize == multiformat_b->ops->opencl_elemsize) +#endif + ; +} + +static void display_multiformat_interface(starpu_data_handle_t handle, FILE *f) +{ + struct starpu_multiformat_interface *multiformat_interface; + multiformat_interface = (struct starpu_multiformat_interface *) + starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + + fprintf(f, "%u\t", multiformat_interface->nx); +} + +/* XXX : returns CPU size */ +static size_t multiformat_interface_get_size(starpu_data_handle_t handle) +{ + size_t size; + struct starpu_multiformat_interface *multiformat_interface; + multiformat_interface = (struct starpu_multiformat_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + size = multiformat_interface->nx * multiformat_interface->ops->cpu_elemsize; + return size; +} + +uint32_t starpu_multiformat_get_nx(starpu_data_handle_t handle) +{ + struct starpu_multiformat_interface *multiformat_interface; + multiformat_interface = (struct starpu_multiformat_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + return multiformat_interface->nx; +} + +static starpu_ssize_t allocate_multiformat_buffer_on_node(void *data_interface_, unsigned dst_node) +{ + struct starpu_multiformat_interface *multiformat_interface; + multiformat_interface = (struct starpu_multiformat_interface *) data_interface_; + uintptr_t addr = 0; + starpu_ssize_t allocated_memory = 0; + size_t size; + + size = multiformat_interface->nx * multiformat_interface->ops->cpu_elemsize; + allocated_memory += size; + addr = starpu_malloc_on_node(dst_node, size); + if (!addr) + goto fail_cpu; + multiformat_interface->cpu_ptr = (void *) addr; +#ifdef STARPU_USE_CUDA + size = multiformat_interface->nx * multiformat_interface->ops->cuda_elemsize; + allocated_memory += size; + addr = starpu_malloc_on_node(dst_node, size); + if (!addr) + goto fail_cuda; + multiformat_interface->cuda_ptr = (void *) addr; +#endif +#ifdef STARPU_USE_OPENCL + size = multiformat_interface->nx * multiformat_interface->ops->opencl_elemsize; + allocated_memory += size; + addr = starpu_malloc_on_node(dst_node, size); + if (!addr) + goto fail_opencl; + multiformat_interface->opencl_ptr = (void *) addr; +#endif + + return allocated_memory; + +#ifdef STARPU_USE_OPENCL + starpu_free_on_node(dst_node, (uintptr_t) multiformat_interface->opencl_ptr, multiformat_interface->nx * multiformat_interface->ops->opencl_elemsize); +fail_opencl: +#endif +#ifdef STARPU_USE_CUDA + starpu_free_on_node(dst_node, (uintptr_t) multiformat_interface->cuda_ptr, multiformat_interface->nx * multiformat_interface->ops->cuda_elemsize); +fail_cuda: +#endif + starpu_free_on_node(dst_node, (uintptr_t) multiformat_interface->cpu_ptr, multiformat_interface->nx * multiformat_interface->ops->cpu_elemsize); +fail_cpu: + return -ENOMEM; +} + +static void free_multiformat_buffer_on_node(void *data_interface, unsigned node) +{ + struct starpu_multiformat_interface *multiformat_interface; + multiformat_interface = (struct starpu_multiformat_interface *) data_interface; + + starpu_free_on_node(node, (uintptr_t) multiformat_interface->cpu_ptr, + multiformat_interface->nx * multiformat_interface->ops->cpu_elemsize); + multiformat_interface->cpu_ptr = NULL; +#ifdef STARPU_USE_CUDA + starpu_free_on_node(node, (uintptr_t) multiformat_interface->cuda_ptr, + multiformat_interface->nx * multiformat_interface->ops->cuda_elemsize); + multiformat_interface->cuda_ptr = NULL; +#endif +#ifdef STARPU_USE_OPENCL + starpu_free_on_node(node, (uintptr_t) multiformat_interface->opencl_ptr, + multiformat_interface->nx * multiformat_interface->ops->opencl_elemsize); + multiformat_interface->opencl_ptr = NULL; +#endif +} + +/* + * Copy methods + */ +static int copy_ram_to_ram(void *src_interface, unsigned src_node, + void *dst_interface, unsigned dst_node) +{ + struct starpu_multiformat_interface *src_multiformat; + struct starpu_multiformat_interface *dst_multiformat; + + src_multiformat = (struct starpu_multiformat_interface *) src_interface; + dst_multiformat = (struct starpu_multiformat_interface *) dst_interface; + + STARPU_ASSERT(src_multiformat != NULL); + STARPU_ASSERT(dst_multiformat != NULL); + STARPU_ASSERT(dst_multiformat->ops != NULL); + + size_t size = dst_multiformat->nx * dst_multiformat->ops->cpu_elemsize; + memcpy(dst_multiformat->cpu_ptr, src_multiformat->cpu_ptr, size); + starpu_interface_data_copy(src_node, dst_node, size); + + return 0; +} + +#ifdef STARPU_USE_CUDA +static int copy_cuda_common(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, + void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, + enum cudaMemcpyKind kind) +{ + struct starpu_multiformat_interface *src_multiformat; + struct starpu_multiformat_interface *dst_multiformat; + + src_multiformat = (struct starpu_multiformat_interface *) src_interface; + dst_multiformat = (struct starpu_multiformat_interface *) dst_interface; + + size_t size; + + cudaError_t status; + + switch (kind) + { + case cudaMemcpyHostToDevice: + { + size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize; + if (src_multiformat->cuda_ptr == NULL) + { + src_multiformat->cuda_ptr = malloc(size); + if (src_multiformat->cuda_ptr == NULL) + return -ENOMEM; + } + status = cudaMemcpy(dst_multiformat->cpu_ptr, src_multiformat->cpu_ptr, size, kind); + if (!status) + status = cudaDeviceSynchronize(); + if (STARPU_UNLIKELY(status)) + STARPU_CUDA_REPORT_ERROR(status); + break; + } + case cudaMemcpyDeviceToHost: + { + size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize; + status = cudaMemcpy(dst_multiformat->cuda_ptr, src_multiformat->cuda_ptr, size, kind); + if (!status) + status = cudaDeviceSynchronize(); + if (STARPU_UNLIKELY(status)) + STARPU_CUDA_REPORT_ERROR(status); + + break; + } + case cudaMemcpyDeviceToDevice: + { + size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize; + status = cudaMemcpy(dst_multiformat->cuda_ptr, src_multiformat->cuda_ptr, size, kind); + if (!status) + status = cudaDeviceSynchronize(); + if (STARPU_UNLIKELY(status)) + STARPU_CUDA_REPORT_ERROR(status); + break; + } + default: + STARPU_ABORT(); + } + starpu_interface_data_copy(src_node, dst_node, size); + + return 0; +} + +static int copy_ram_to_cuda(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node) +{ + return copy_cuda_common(src_interface, src_node, dst_interface, dst_node, cudaMemcpyHostToDevice); +} + +static int copy_cuda_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node) +{ + return copy_cuda_common(src_interface, src_node, dst_interface, dst_node, cudaMemcpyDeviceToHost); +} + +static int copy_cuda_common_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, + void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, + cudaStream_t stream, enum cudaMemcpyKind kind) +{ + struct starpu_multiformat_interface *src_multiformat; + struct starpu_multiformat_interface *dst_multiformat; + + src_multiformat = (struct starpu_multiformat_interface *) src_interface; + dst_multiformat = (struct starpu_multiformat_interface *) dst_interface; + + size_t size; + cudaError_t status; + double start; + + starpu_interface_start_driver_copy_async(src_node, dst_node, &start); + switch (kind) + { + case cudaMemcpyHostToDevice: + { + size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize; + if (src_multiformat->cuda_ptr == NULL) + { + src_multiformat->cuda_ptr = malloc(size); + if (src_multiformat->cuda_ptr == NULL) + return -ENOMEM; + } + + status = cudaMemcpyAsync(dst_multiformat->cpu_ptr, src_multiformat->cpu_ptr, size, kind, stream); + if (STARPU_UNLIKELY(status)) + { + STARPU_CUDA_REPORT_ERROR(status); + } + break; + } + case cudaMemcpyDeviceToHost: + { + size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize; + status = cudaMemcpyAsync(dst_multiformat->cuda_ptr, src_multiformat->cuda_ptr, size, kind, stream); + if (!status) + status = cudaDeviceSynchronize(); + if (STARPU_UNLIKELY(status)) + STARPU_CUDA_REPORT_ERROR(status); + + break; + } + case cudaMemcpyDeviceToDevice: + { + size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize; + status = cudaMemcpyAsync(dst_multiformat->cuda_ptr, src_multiformat->cuda_ptr, size, kind, stream); + if (STARPU_UNLIKELY(status)) + STARPU_CUDA_REPORT_ERROR(status); + break; + } + default: + STARPU_ABORT(); + } + starpu_interface_end_driver_copy_async(src_node, dst_node, start); + + return 0; +} + +static int copy_ram_to_cuda_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, cudaStream_t stream) +{ + return copy_cuda_common_async(src_interface, src_node, dst_interface, dst_node, stream, cudaMemcpyHostToDevice); +} + +static int copy_cuda_to_ram_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, cudaStream_t stream) +{ + return copy_cuda_common_async(src_interface, src_node, dst_interface, dst_node, stream, cudaMemcpyDeviceToHost); +} + +#ifdef STARPU_HAVE_CUDA_MEMCPY_PEER +static int copy_cuda_peer_common(void *src_interface, unsigned src_node, + void *dst_interface, unsigned dst_node, + cudaStream_t stream) +{ + struct starpu_multiformat_interface *src_multiformat; + struct starpu_multiformat_interface *dst_multiformat; + + src_multiformat = (struct starpu_multiformat_interface *) src_interface; + dst_multiformat = (struct starpu_multiformat_interface *) dst_interface; + + STARPU_ASSERT(src_multiformat != NULL); + STARPU_ASSERT(dst_multiformat != NULL); + STARPU_ASSERT(src_multiformat->ops != NULL); + + cudaError_t status; + int size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize; + int src_dev = starpu_memory_node_get_devid(src_node); + int dst_dev = starpu_memory_node_get_devid(dst_node); + + if (stream) + { + double start; + starpu_interface_start_driver_copy_async(src_node, dst_node, &start); + status = cudaMemcpyPeerAsync(dst_multiformat->cuda_ptr, dst_dev, + src_multiformat->cuda_ptr, src_dev, + size, stream); + starpu_interface_end_driver_copy_async(src_node, dst_node, start); + /* All good ! Still, returning -EAGAIN, because we will need to + check the transfer completion later */ + if (status == cudaSuccess) + return -EAGAIN; + } + + /* Either a synchronous transfer was requested, or the asynchronous one + failed. */ + status = cudaMemcpyPeer(dst_multiformat->cuda_ptr, dst_dev, + src_multiformat->cuda_ptr, src_dev, + size); + if (!status) + status = cudaDeviceSynchronize(); + if (STARPU_UNLIKELY(status != cudaSuccess)) + STARPU_CUDA_REPORT_ERROR(status); + + starpu_interface_data_copy(src_node, dst_node, size); + + return 0; +} +#endif +static int copy_cuda_to_cuda(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED) +{ + if (src_node == dst_node) + { + return copy_cuda_common(src_interface, src_node, dst_interface, dst_node, cudaMemcpyDeviceToDevice); + } + else + { +#ifdef STARPU_HAVE_CUDA_MEMCPY_PEER + return copy_cuda_peer_common(src_interface, src_node, + dst_interface, dst_node, + NULL); +#else + STARPU_ABORT(); +#endif + } +} + +static int copy_cuda_to_cuda_async(void *src_interface, unsigned src_node, + void *dst_interface, unsigned dst_node, + cudaStream_t stream) +{ + if (src_node == dst_node) + { + return copy_cuda_common_async(src_interface, src_node, + dst_interface, dst_node, + stream, cudaMemcpyDeviceToDevice); + } + else + { +#ifdef STARPU_HAVE_CUDA_MEMCPY_PEER + return copy_cuda_peer_common(src_interface, src_node, + dst_interface, dst_node, + stream); +#else + STARPU_ABORT(); +#endif + } +} +#endif /* STARPU_USE_CUDA */ + +#ifdef STARPU_USE_OPENCL +static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node, + void *dst_interface, unsigned dst_node, + cl_event *event) +{ + int err, ret; + size_t size; + struct starpu_multiformat_interface *src_multiformat; + struct starpu_multiformat_interface *dst_multiformat; + + src_multiformat = (struct starpu_multiformat_interface *) src_interface; + dst_multiformat = (struct starpu_multiformat_interface *) dst_interface; + + STARPU_ASSERT(src_multiformat != NULL); + STARPU_ASSERT(dst_multiformat != NULL); + STARPU_ASSERT(src_multiformat->ops != NULL); + + size = src_multiformat->nx * src_multiformat->ops->opencl_elemsize; + + + err = starpu_opencl_copy_ram_to_opencl(src_multiformat->cpu_ptr, + src_node, + (cl_mem) dst_multiformat->cpu_ptr, + dst_node, + size, + 0, + event, + &ret); + if (STARPU_UNLIKELY(err)) + STARPU_OPENCL_REPORT_ERROR(err); + + if (!event) + starpu_interface_data_copy(src_node, dst_node, size); + return ret; +} + +static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node, + void *dst_interface, unsigned dst_node, + cl_event *event) +{ + int err, ret; + size_t size; + struct starpu_multiformat_interface *src_multiformat; + struct starpu_multiformat_interface *dst_multiformat; + + src_multiformat = (struct starpu_multiformat_interface *) src_interface; + dst_multiformat = (struct starpu_multiformat_interface *) dst_interface; + + STARPU_ASSERT(src_multiformat != NULL); + STARPU_ASSERT(dst_multiformat != NULL); + STARPU_ASSERT(src_multiformat->ops != NULL); + STARPU_ASSERT(dst_multiformat->ops != NULL); + + size = src_multiformat->nx * src_multiformat->ops->opencl_elemsize; + + if (dst_multiformat->opencl_ptr == NULL) + { + /* XXX : it is weird that we might have to allocate memory here... */ + dst_multiformat->opencl_ptr = malloc(dst_multiformat->nx * dst_multiformat->ops->opencl_elemsize); + STARPU_ASSERT_MSG(dst_multiformat->opencl_ptr != NULL || dst_multiformat->nx * dst_multiformat->ops->opencl_elemsize == 0, "Cannot allocate %ld bytes\n", (long) (dst_multiformat->nx * dst_multiformat->ops->opencl_elemsize)); + } + err = starpu_opencl_copy_opencl_to_ram((cl_mem)src_multiformat->opencl_ptr, + src_node, + dst_multiformat->opencl_ptr, + dst_node, + size, + 0, + event, + &ret); + if (STARPU_UNLIKELY(err)) + STARPU_OPENCL_REPORT_ERROR(err); + + if (!event) + starpu_interface_data_copy(src_node, dst_node, size); + + return ret; +} + +static int copy_ram_to_opencl(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, + void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED) +{ + return copy_ram_to_opencl_async(src_interface, src_node, dst_interface, dst_node, NULL); +} + +static int copy_opencl_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, + void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED) +{ + return copy_opencl_to_ram_async(src_interface, src_node, dst_interface, dst_node, NULL); +} + +static int copy_opencl_to_opencl(void *src_interface, unsigned src_node, + void *dst_interface, unsigned dst_node) +{ + (void) src_interface; + (void) dst_interface; + (void) src_node; + (void) dst_node; + + STARPU_ASSERT_MSG(0, "XXX multiformat copy OpenCL-OpenCL not supported yet (TODO)"); + return 0; +} +#endif diff --git a/src/datawizard/interfaces/ndim_filters.c b/src/datawizard/interfaces/ndim_filters.c new file mode 100644 index 0000000..79174fe --- /dev/null +++ b/src/datawizard/interfaces/ndim_filters.c @@ -0,0 +1,563 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include + +static void _interface_assignment_ndim_to_tensor(void *ndim_interface, void *child_interface); +static void _interface_assignment_ndim_to_block(void *ndim_interface, void *child_interface); +static void _interface_assignment_ndim_to_matrix(void *ndim_interface, void *child_interface); +static void _interface_assignment_ndim_to_vector(void *ndim_interface, void *child_interface); +static void _interface_assignment_ndim_to_variable(void *ndim_interface, void *child_interface); + +static void _interface_deallocate(void * ndim_interface); + +static void _starpu_ndim_filter_block(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, + unsigned id, unsigned nparts, uintptr_t shadow_size) +{ + struct starpu_ndim_interface *ndim_father = (struct starpu_ndim_interface *) father_interface; + struct starpu_ndim_interface *ndim_child = (struct starpu_ndim_interface *) child_interface; + + STARPU_ASSERT_MSG(ndim_father->id == STARPU_NDIM_INTERFACE_ID, "%s can only be applied on a ndim array data", __func__); + + size_t ndim = ndim_father->ndim; + STARPU_ASSERT_MSG(ndim > 0, "ndim %u must be greater than 0!\n", (unsigned) ndim); + + unsigned dim = 0; + if (ndim > 1) + dim = f->filter_arg; + + STARPU_ASSERT_MSG(dim < ndim, "dim %u must be less than %u!\n", dim, (unsigned) ndim); + + uint32_t father_nn = 0; + uint32_t ni[ndim]; + unsigned i; + for (i=0; inn[i] - 2 * shadow_size; + father_nn = ni[i]; + } + else + { + ni[i] = ndim_father->nn[i]; + } + } + + STARPU_ASSERT_MSG(nparts <= father_nn, "cannot split %u elements in %u parts", father_nn, nparts); + + unsigned blocksize = ndim_father->ldn[dim]; + size_t elemsize = ndim_father->elemsize; + uint32_t child_nn; + size_t offset; + + starpu_filter_nparts_compute_chunk_size_and_offset(father_nn, nparts, elemsize, id, blocksize, &child_nn, &offset); + child_nn += 2 * shadow_size; + ndim_child->id = ndim_father->id; + + _STARPU_MALLOC(ndim_child->nn, ndim*sizeof(uint32_t)); + for (i=0; inn[i] = ni[i]; + } + else + { + ndim_child->nn[i] = child_nn; + } + } + + _STARPU_MALLOC(ndim_child->ldn, ndim*sizeof(uint32_t)); + ndim_child->ndim = ndim; + ndim_child->elemsize = elemsize; + ndim_child->allocsize = elemsize; + + if (ndim_father->dev_handle) + { + if (ndim_father->ptr) + ndim_child->ptr = ndim_father->ptr + offset; + for (i=0; ildn[i] = ndim_father->ldn[i]; + } + + if (ndim >= 1) + ndim_child->allocsize *= ndim_child->ldn[ndim-1] * ndim_child->nn[ndim-1]; + + ndim_child->dev_handle = ndim_father->dev_handle; + ndim_child->offset = ndim_father->offset + offset; + } + else + { + for (i=0; iallocsize *= ndim_child->nn[i]; + } +} + +void starpu_ndim_filter_block(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, + unsigned id, unsigned nparts) +{ + _starpu_ndim_filter_block(father_interface, child_interface, f, id, nparts, 0); +} + +void starpu_ndim_filter_block_shadow(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, + unsigned id, unsigned nparts) +{ + uintptr_t shadow_size = (uintptr_t) f->filter_arg_ptr; + + _starpu_ndim_filter_block(father_interface, child_interface, f, id, nparts, shadow_size); +} + +void starpu_ndim_filter_to_tensor(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, + unsigned id, unsigned nparts) +{ + struct starpu_ndim_interface *ndim_father = (struct starpu_ndim_interface *) father_interface; + STARPU_ASSERT_MSG(ndim_father->ndim == 4, "can only be applied on a 4-dim array"); + if (ndim_father->dev_handle) + STARPU_ASSERT_MSG(ndim_father->ldn[0]==1, "cannot transfer to a tensor if ldn[0] does not equal to 1"); + + struct starpu_ndim_interface ndim_child; + memset(&ndim_child, 0, sizeof(ndim_child)); + _starpu_ndim_filter_block(father_interface, &ndim_child, f, id, nparts, 0); + + _interface_assignment_ndim_to_tensor(&ndim_child, child_interface); + + _interface_deallocate(&ndim_child); +} + +void starpu_ndim_filter_to_block(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, + unsigned id, unsigned nparts) +{ + struct starpu_ndim_interface *ndim_father = (struct starpu_ndim_interface *) father_interface; + STARPU_ASSERT_MSG(ndim_father->ndim == 3, "can only be applied on a 3-dim array"); + if (ndim_father->dev_handle) + STARPU_ASSERT_MSG(ndim_father->ldn[0]==1, "cannot transfer to a block if ldn[0] does not equal to 1"); + + struct starpu_ndim_interface ndim_child; + memset(&ndim_child, 0, sizeof(ndim_child)); + _starpu_ndim_filter_block(father_interface, &ndim_child, f, id, nparts, 0); + + _interface_assignment_ndim_to_block(&ndim_child, child_interface); + + _interface_deallocate(&ndim_child); +} + +void starpu_ndim_filter_to_matrix(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, + unsigned id, unsigned nparts) +{ + struct starpu_ndim_interface *ndim_father = (struct starpu_ndim_interface *) father_interface; + STARPU_ASSERT_MSG(ndim_father->ndim == 2, "can only be applied on a 2-dim array"); + if (ndim_father->dev_handle) + STARPU_ASSERT_MSG(ndim_father->ldn[0]==1, "cannot transfer to a matrix if ldn[0] does not equal to 1"); + + struct starpu_ndim_interface ndim_child; + memset(&ndim_child, 0, sizeof(ndim_child)); + _starpu_ndim_filter_block(father_interface, &ndim_child, f, id, nparts, 0); + + _interface_assignment_ndim_to_matrix(&ndim_child, child_interface); + + _interface_deallocate(&ndim_child); +} + +void starpu_ndim_filter_to_vector(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, + unsigned id, unsigned nparts) +{ + struct starpu_ndim_interface *ndim_father = (struct starpu_ndim_interface *) father_interface; + STARPU_ASSERT_MSG(ndim_father->ndim == 1, "can only be applied on a 1-dim array"); + if (ndim_father->dev_handle) + STARPU_ASSERT_MSG(ndim_father->ldn[0]==1, "cannot transfer to a vector if ldn[0] does not equal to 1"); + + struct starpu_ndim_interface ndim_child; + memset(&ndim_child, 0, sizeof(ndim_child)); + _starpu_ndim_filter_block(father_interface, &ndim_child, f, id, nparts, 0); + + _interface_assignment_ndim_to_vector(&ndim_child, child_interface); + + _interface_deallocate(&ndim_child); +} + +void starpu_ndim_filter_to_variable(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, + unsigned id, unsigned nparts) +{ + struct starpu_ndim_interface *ndim_father = (struct starpu_ndim_interface *) father_interface; + STARPU_ASSERT_MSG(ndim_father->ndim == 0, "can only be applied on a 0-dim array (a variable)"); + STARPU_ASSERT_MSG(id == 0 && nparts == 1, "cannot split a variable"); + + _interface_assignment_ndim_to_variable(father_interface, child_interface); +} + +void starpu_ndim_filter_pick_ndim(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, + unsigned id, unsigned nparts) +{ + struct starpu_ndim_interface *ndim_father = (struct starpu_ndim_interface *) father_interface; + struct starpu_ndim_interface *ndim_child = (struct starpu_ndim_interface *) child_interface; + + STARPU_ASSERT_MSG(ndim_father->id == STARPU_NDIM_INTERFACE_ID, "%s can only be applied on a ndim array data", __func__); + ndim_child->id = STARPU_NDIM_INTERFACE_ID; + + size_t ndim = ndim_father->ndim; + STARPU_ASSERT_MSG(ndim > 0, "ndim %u must be greater than 0!\n", (unsigned) ndim); + + unsigned dim = 0; + if (ndim > 1) + dim = f->filter_arg; + + STARPU_ASSERT_MSG(dim < ndim, "dim %u must be less than %u!\n", dim, (unsigned) ndim); + + uint32_t father_nn = 0; + uint32_t ni[ndim]; + unsigned i; + for (i=0; inn[i]; + if(i==dim) + father_nn = ni[i]; + } + + STARPU_ASSERT_MSG(nparts <= father_nn, "cannot split %u elements in %u parts", father_nn, nparts); + + unsigned blocksize = ndim_father->ldn[dim]; + size_t elemsize = ndim_father->elemsize; + size_t chunk_pos = (size_t)f->filter_arg_ptr; + + STARPU_ASSERT_MSG((chunk_pos + id) < father_nn, "the chosen sub (n-1)dim array should be in the ndim array"); + + size_t offset = (chunk_pos + id) * blocksize * elemsize; + int j; + _STARPU_MALLOC(ndim_child->nn, (ndim-1)*sizeof(uint32_t)); + if (ndim > 1) + { + j = 0; + for (i=0; inn[j] = ni[i]; + j++; + } + } + } + + _STARPU_MALLOC(ndim_child->ldn, (ndim-1)*sizeof(uint32_t)); + ndim_child->ndim = ndim-1; + ndim_child->elemsize = elemsize; + ndim_child->allocsize = elemsize; + + if (ndim_father->dev_handle) + { + if (ndim_father->ptr) + ndim_child->ptr = ndim_father->ptr + offset; + if (ndim > 1) + { + j = 0; + for (i=0; ildn[j] = ndim_father->ldn[i]; + j++; + } + } + + ndim_child->allocsize *= ndim_child->ldn[ndim-2] * ndim_child->nn[ndim-2]; + } + + ndim_child->dev_handle = ndim_father->dev_handle; + ndim_child->offset = ndim_father->offset + offset; + } + else + { + for (i=0; iallocsize *= ndim_child->nn[i]; + } +} + +void starpu_ndim_filter_5d_pick_tensor(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, + unsigned id, unsigned nparts) +{ + struct starpu_ndim_interface *ndim_father = (struct starpu_ndim_interface *) father_interface; + STARPU_ASSERT_MSG(ndim_father->ndim == 5, "can only be applied on a 5-dim array"); + if (ndim_father->dev_handle) + STARPU_ASSERT_MSG(ndim_father->ldn[0]==1, "cannot pick a tensor if ldn[0] does not equal to 1"); + + struct starpu_ndim_interface ndim_child; + memset(&ndim_child, 0, sizeof(ndim_child)); + starpu_ndim_filter_pick_ndim(father_interface, &ndim_child, f, id, nparts); + + _interface_assignment_ndim_to_tensor(&ndim_child, child_interface); + + _interface_deallocate(&ndim_child); +} + +void starpu_ndim_filter_4d_pick_block(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, + unsigned id, unsigned nparts) +{ + struct starpu_ndim_interface *ndim_father = (struct starpu_ndim_interface *) father_interface; + STARPU_ASSERT_MSG(ndim_father->ndim == 4, "can only be applied on a 4-dim array"); + if (ndim_father->dev_handle) + STARPU_ASSERT_MSG(ndim_father->ldn[0]==1, "cannot pick a block if ldn[0] does not equal to 1"); + + struct starpu_ndim_interface ndim_child; + memset(&ndim_child, 0, sizeof(ndim_child)); + starpu_ndim_filter_pick_ndim(father_interface, &ndim_child, f, id, nparts); + + _interface_assignment_ndim_to_block(&ndim_child, child_interface); + + _interface_deallocate(&ndim_child); +} + +void starpu_ndim_filter_3d_pick_matrix(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, + unsigned id, unsigned nparts) +{ + struct starpu_ndim_interface *ndim_father = (struct starpu_ndim_interface *) father_interface; + STARPU_ASSERT_MSG(ndim_father->ndim == 3, "can only be applied on a 3-dim array"); + if (ndim_father->dev_handle) + STARPU_ASSERT_MSG(ndim_father->ldn[0]==1, "cannot pick a matrix if ldn[0] does not equal to 1"); + + struct starpu_ndim_interface ndim_child; + memset(&ndim_child, 0, sizeof(ndim_child)); + starpu_ndim_filter_pick_ndim(father_interface, &ndim_child, f, id, nparts); + + _interface_assignment_ndim_to_matrix(&ndim_child, child_interface); + + _interface_deallocate(&ndim_child); +} + +void starpu_ndim_filter_2d_pick_vector(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, + unsigned id, unsigned nparts) +{ + struct starpu_ndim_interface *ndim_father = (struct starpu_ndim_interface *) father_interface; + STARPU_ASSERT_MSG(ndim_father->ndim == 2, "can only be applied on a 2-dim array"); + if (ndim_father->dev_handle) + STARPU_ASSERT_MSG(ndim_father->ldn[0]==1, "cannot pick a vector if ldn[0] does not equal to 1"); + + struct starpu_ndim_interface ndim_child; + memset(&ndim_child, 0, sizeof(ndim_child)); + starpu_ndim_filter_pick_ndim(father_interface, &ndim_child, f, id, nparts); + + _interface_assignment_ndim_to_vector(&ndim_child, child_interface); + + _interface_deallocate(&ndim_child); +} + +void starpu_ndim_filter_1d_pick_variable(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, + unsigned id, unsigned nparts) +{ + struct starpu_ndim_interface *ndim_father = (struct starpu_ndim_interface *) father_interface; + STARPU_ASSERT_MSG(ndim_father->ndim == 1, "can only be applied on a 1-dim array"); + if (ndim_father->dev_handle) + STARPU_ASSERT_MSG(ndim_father->ldn[0]==1, "cannot pick a variable if ldn[0] does not equal to 1"); + + struct starpu_ndim_interface ndim_child; + memset(&ndim_child, 0, sizeof(ndim_child)); + starpu_ndim_filter_pick_ndim(father_interface, &ndim_child, f, id, nparts); + + _interface_assignment_ndim_to_variable(&ndim_child, child_interface); + + _interface_deallocate(&ndim_child); +} + +static void _interface_deallocate(void *ndim_interface) +{ + struct starpu_ndim_interface *ndarr = (struct starpu_ndim_interface *) ndim_interface; + + free(ndarr->nn); + free(ndarr->ldn); +} + +struct starpu_data_interface_ops *starpu_ndim_filter_pick_tensor_child_ops(STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, STARPU_ATTRIBUTE_UNUSED unsigned child) +{ + return &starpu_interface_tensor_ops; +} + +struct starpu_data_interface_ops *starpu_ndim_filter_pick_block_child_ops(STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, STARPU_ATTRIBUTE_UNUSED unsigned child) +{ + return &starpu_interface_block_ops; +} + +struct starpu_data_interface_ops *starpu_ndim_filter_pick_matrix_child_ops(STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, STARPU_ATTRIBUTE_UNUSED unsigned child) +{ + return &starpu_interface_matrix_ops; +} + +struct starpu_data_interface_ops *starpu_ndim_filter_pick_vector_child_ops(STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, STARPU_ATTRIBUTE_UNUSED unsigned child) +{ + return &starpu_interface_vector_ops; +} + +struct starpu_data_interface_ops *starpu_ndim_filter_pick_variable_child_ops(STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, STARPU_ATTRIBUTE_UNUSED unsigned child) +{ + return &starpu_interface_variable_ops; +} + +struct starpu_data_interface_ops *starpu_ndim_filter_to_tensor_child_ops(STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, STARPU_ATTRIBUTE_UNUSED unsigned child) +{ + return &starpu_interface_tensor_ops; +} + +struct starpu_data_interface_ops *starpu_ndim_filter_to_block_child_ops(STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, STARPU_ATTRIBUTE_UNUSED unsigned child) +{ + return &starpu_interface_block_ops; +} + +struct starpu_data_interface_ops *starpu_ndim_filter_to_matrix_child_ops(STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, STARPU_ATTRIBUTE_UNUSED unsigned child) +{ + return &starpu_interface_matrix_ops; +} + +struct starpu_data_interface_ops *starpu_ndim_filter_to_vector_child_ops(STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, STARPU_ATTRIBUTE_UNUSED unsigned child) +{ + return &starpu_interface_vector_ops; +} + +struct starpu_data_interface_ops *starpu_ndim_filter_to_variable_child_ops(STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, STARPU_ATTRIBUTE_UNUSED unsigned child) +{ + return &starpu_interface_variable_ops; +} + +static void _interface_assignment_ndim_to_tensor(void *ndim_interface, void *child_interface) +{ + struct starpu_tensor_interface *tensor = (struct starpu_tensor_interface *) child_interface; + struct starpu_ndim_interface *ndarr = (struct starpu_ndim_interface *) ndim_interface; + + tensor->id = STARPU_TENSOR_INTERFACE_ID; + tensor->nx = ndarr->nn[0]; + tensor->ny = ndarr->nn[1]; + tensor->nz = ndarr->nn[2]; + tensor->nt = ndarr->nn[3]; + tensor->elemsize = ndarr->elemsize; + tensor->ptr = ndarr->ptr; + tensor->ldy = ndarr->ldn[1]; + tensor->ldz = ndarr->ldn[2]; + tensor->ldt = ndarr->ldn[3]; + tensor->dev_handle = ndarr->dev_handle; + tensor->offset = ndarr->offset; +} + +static void _interface_assignment_ndim_to_block(void *ndim_interface, void *child_interface) +{ + struct starpu_block_interface *block = (struct starpu_block_interface *) child_interface; + struct starpu_ndim_interface *ndarr = (struct starpu_ndim_interface *) ndim_interface; + + block->id = STARPU_BLOCK_INTERFACE_ID; + block->nx = ndarr->nn[0]; + block->ny = ndarr->nn[1]; + block->nz = ndarr->nn[2]; + block->elemsize = ndarr->elemsize; + block->ptr = ndarr->ptr; + block->ldy = ndarr->ldn[1]; + block->ldz = ndarr->ldn[2]; + block->dev_handle = ndarr->dev_handle; + block->offset = ndarr->offset; +} + +static void _interface_assignment_ndim_to_matrix(void *ndim_interface, void *child_interface) +{ + struct starpu_matrix_interface *matrix = (struct starpu_matrix_interface *) child_interface; + struct starpu_ndim_interface *ndarr = (struct starpu_ndim_interface *) ndim_interface; + + matrix->id = STARPU_MATRIX_INTERFACE_ID; + matrix->nx = ndarr->nn[0]; + matrix->ny = ndarr->nn[1]; + matrix->elemsize = ndarr->elemsize; + matrix->ptr = ndarr->ptr; + matrix->ld = ndarr->ldn[1]; + if (matrix->ptr) + matrix->allocsize = matrix->ld * matrix->ny * matrix->elemsize; + else + matrix->allocsize = matrix->nx * matrix->ny * matrix->elemsize; + matrix->dev_handle = ndarr->dev_handle; + matrix->offset = ndarr->offset; +} + +static void _interface_assignment_ndim_to_vector(void *ndim_interface, void *child_interface) +{ + struct starpu_vector_interface *vector = (struct starpu_vector_interface *) child_interface; + struct starpu_ndim_interface *ndarr = (struct starpu_ndim_interface *) ndim_interface; + + vector->id = STARPU_VECTOR_INTERFACE_ID; + vector->nx = ndarr->nn[0]; + vector->elemsize = ndarr->elemsize; + vector->allocsize = vector->nx * vector->elemsize; + vector->ptr = ndarr->ptr; + vector->dev_handle = ndarr->dev_handle; + vector->offset = ndarr->offset; +} + +static void _interface_assignment_ndim_to_variable(void *ndim_interface, void *child_interface) +{ + struct starpu_variable_interface *variable = (struct starpu_variable_interface *) child_interface; + struct starpu_ndim_interface *ndarr = (struct starpu_ndim_interface *) ndim_interface; + + variable->id = STARPU_VARIABLE_INTERFACE_ID; + variable->elemsize = ndarr->elemsize; + variable->ptr = ndarr->ptr; + variable->dev_handle = ndarr->dev_handle; + variable->offset = ndarr->offset; +} + +void starpu_ndim_filter_pick_variable(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, STARPU_ATTRIBUTE_UNUSED unsigned id, STARPU_ATTRIBUTE_UNUSED unsigned nchunks) +{ + struct starpu_ndim_interface *ndim_father = (struct starpu_ndim_interface *) father_interface; + struct starpu_variable_interface *variable_child = (struct starpu_variable_interface *) child_interface; + + STARPU_ASSERT_MSG(ndim_father->id == STARPU_NDIM_INTERFACE_ID, "%s can only be applied on a ndim array data", __func__); + + size_t ndim = ndim_father->ndim; + STARPU_ASSERT_MSG(ndim > 0, "ndim %u must be greater than 0!\n", (unsigned) ndim); + + uint32_t nn[ndim]; + unsigned ldn[ndim]; + unsigned i; + for (i=0; inn[i]; + ldn[i] = ndim_father->ldn[i]; + } + + size_t elemsize = ndim_father->elemsize; + uint32_t* chunk_pos = (uint32_t*)f->filter_arg_ptr; + int b = 1; + size_t offset = 0; + for (i = 0; i < ndim; i++) + { + if(chunk_pos[i] >= nn[i]) + { + b = 0; + break; + } + offset += chunk_pos[i]*ldn[i]*elemsize; + } + + STARPU_ASSERT_MSG(b == 1, "the chosen variable should be in the ndim array"); + + /* update the child's interface */ + variable_child->id = STARPU_VARIABLE_INTERFACE_ID; + variable_child->elemsize = elemsize; + + /* is the information on this node valid ? */ + if (ndim_father->dev_handle) + { + if (ndim_father->ptr) + variable_child->ptr = ndim_father->ptr + offset; + variable_child->dev_handle = ndim_father->dev_handle; + variable_child->offset = ndim_father->offset + offset; + } +} diff --git a/src/datawizard/interfaces/ndim_interface.c b/src/datawizard/interfaces/ndim_interface.c new file mode 100644 index 0000000..e95c27f --- /dev/null +++ b/src/datawizard/interfaces/ndim_interface.c @@ -0,0 +1,798 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#ifdef BUILDING_STARPU +#include +#endif +#include + +static int copy_any_to_any(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *async_data); +static int map_ndim(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node); +static int unmap_ndim(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node); +static int update_map_ndim(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node); +static size_t _get_size(uint32_t* nn, size_t ndim, size_t elemsize); + +static const struct starpu_data_copy_methods ndim_copy_data_methods_s = +{ + .any_to_any = copy_any_to_any, +}; + +static void register_ndim_handle(starpu_data_handle_t handle, int home_node, void *data_interface); +static void unregister_ndim_handle(starpu_data_handle_t handle); +static void *ndim_to_pointer(void *data_interface, unsigned node); +static starpu_ssize_t allocate_ndim_buffer_on_node(void *data_interface_, unsigned dst_node); +static void free_ndim_buffer_on_node(void *data_interface, unsigned node); +static void cache_ndim_buffer_on_node(void *cached_interface, void *src_data_interface, unsigned node); +static void reuse_ndim_buffer_on_node(void *dst_data_interface, const void *cached_interface, unsigned node); +static size_t ndim_interface_get_size(starpu_data_handle_t handle); +static uint32_t footprint_ndim_interface_crc32(starpu_data_handle_t handle); +static int ndim_compare(void *data_interface_a, void *data_interface_b); +static int ndim_alloc_compare(void *data_interface_a, void *data_interface_b); +static void display_ndim_interface(starpu_data_handle_t handle, FILE *f); +static int pack_ndim_handle(starpu_data_handle_t handle, unsigned node, void **ptr, starpu_ssize_t *count); +static int peek_ndim_handle(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count); +static int unpack_ndim_handle(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count); +static starpu_ssize_t describe(void *data_interface, char *buf, size_t size); +static int pack_meta_ndim_handle(void *data_interface, void **ptr, starpu_ssize_t *count); +static int unpack_meta_ndim_handle(void **data_interface, void *ptr, starpu_ssize_t *count); +static int free_meta_ndim_handle(void *data_interface); + +struct starpu_data_interface_ops starpu_interface_ndim_ops = +{ + .register_data_handle = register_ndim_handle, + .unregister_data_handle = unregister_ndim_handle, + .allocate_data_on_node = allocate_ndim_buffer_on_node, + .to_pointer = ndim_to_pointer, + .free_data_on_node = free_ndim_buffer_on_node, + .cache_data_on_node = cache_ndim_buffer_on_node, + .reuse_data_on_node = reuse_ndim_buffer_on_node, + .map_data = map_ndim, + .unmap_data = unmap_ndim, + .update_map = update_map_ndim, + .copy_methods = &ndim_copy_data_methods_s, + .get_size = ndim_interface_get_size, + .footprint = footprint_ndim_interface_crc32, + .compare = ndim_compare, + .alloc_compare = ndim_alloc_compare, + .interfaceid = STARPU_NDIM_INTERFACE_ID, + .interface_size = sizeof(struct starpu_ndim_interface), + .display = display_ndim_interface, + .pack_data = pack_ndim_handle, + .peek_data = peek_ndim_handle, + .unpack_data = unpack_ndim_handle, + .pack_meta = pack_meta_ndim_handle, + .unpack_meta = unpack_meta_ndim_handle, + .free_meta = free_meta_ndim_handle, + .describe = describe, + .name = "STARPU_NDIM_INTERFACE", + .dontcache = 0 +}; + +static void *ndim_to_pointer(void *data_interface, unsigned node) +{ + (void) node; + struct starpu_ndim_interface *ndim_interface = data_interface; + + return (void*) ndim_interface->ptr; +} + +static void register_ndim_handle(starpu_data_handle_t handle, int home_node, void *data_interface) +{ + struct starpu_ndim_interface *ndim_interface = (struct starpu_ndim_interface *) data_interface; + + size_t ndim = ndim_interface->ndim; + + int node; + for (node = 0; node < STARPU_MAXNODES; node++) + { + struct starpu_ndim_interface *local_interface = (struct starpu_ndim_interface *) + starpu_data_get_interface_on_node(handle, node); + + if (node == home_node) + { + local_interface->ptr = ndim_interface->ptr; + local_interface->dev_handle = ndim_interface->dev_handle; + local_interface->offset = ndim_interface->offset; + uint32_t* ldn_org = ndim_interface->ldn; + uint32_t* ldn_cpy; + _STARPU_MALLOC(ldn_cpy, ndim*sizeof(uint32_t)); + if (ndim) + memcpy(ldn_cpy, ldn_org, ndim*sizeof(uint32_t)); + local_interface->ldn = ldn_cpy; + } + else + { + local_interface->ptr = 0; + local_interface->dev_handle = 0; + local_interface->offset = 0; + uint32_t* ldn_zero; + _STARPU_CALLOC(ldn_zero, ndim, sizeof(uint32_t)); + local_interface->ldn = ldn_zero; + } + + local_interface->id = ndim_interface->id; + uint32_t* nn_org = ndim_interface->nn; + uint32_t* nn_cpy; + _STARPU_MALLOC(nn_cpy, ndim*sizeof(uint32_t)); + if (ndim) + memcpy(nn_cpy, nn_org, ndim*sizeof(uint32_t)); + local_interface->nn = nn_cpy; + local_interface->ndim = ndim_interface->ndim; + local_interface->elemsize = ndim_interface->elemsize; + local_interface->allocsize = ndim_interface->allocsize; + } +} + +static void unregister_ndim_handle(starpu_data_handle_t handle) +{ + unsigned home_node = starpu_data_get_home_node(handle); + unsigned node; + for (node = 0; node < STARPU_MAXNODES; node++) + { + struct starpu_ndim_interface *local_interface = (struct starpu_ndim_interface *) starpu_data_get_interface_on_node(handle, node); + + if (node == home_node) + { + local_interface->ptr = 0; + local_interface->dev_handle = 0; + } + else + { + STARPU_ASSERT(local_interface->ptr == 0); + STARPU_ASSERT(local_interface->dev_handle == 0); + } + + free(local_interface->nn); + local_interface->nn = NULL; + free(local_interface->ldn); + local_interface->ldn = NULL; + } +} + +/* declare a new data with the BLAS interface */ +void starpu_ndim_data_register(starpu_data_handle_t *handleptr, int home_node, + uintptr_t ptr, uint32_t* ldn, uint32_t* nn, size_t ndim, size_t elemsize) +{ + unsigned i; + size_t allocsize = _get_size(nn, ndim, elemsize); + + for (i=1; i= nn[i-1], "ldn[%u]/ldn[%u] = %u/%u = %u should not be less than nn[%u] = %u.", i, i-1, ldn[i], ldn[i-1], ldn[i]/ldn[i-1], i-1, nn[i-1]); + } + + struct starpu_ndim_interface ndim_interface = + { + .id = STARPU_NDIM_INTERFACE_ID, + .ptr = ptr, + .dev_handle = ptr, + .offset = 0, + .ldn = ldn, + .nn = nn, + .ndim = ndim, + .elemsize = elemsize, + .allocsize = allocsize, + }; +#ifndef STARPU_SIMGRID + if (home_node >= 0 && starpu_node_get_kind(home_node) == STARPU_CPU_RAM) + { + uint32_t nn0 = ndim?nn[0]:1; + int b = 1; + size_t buffersize = 0; + for (i = 1; i < ndim; i++) + { + if (nn[i]) + { + buffersize += (nn[i]-1)*ldn[i]*elemsize; + } + else + { + b = 0; + break; + } + } + buffersize += nn0*elemsize; + + if (b && elemsize) + { + STARPU_ASSERT_ACCESSIBLE(ptr); + STARPU_ASSERT_ACCESSIBLE(ptr + buffersize - 1); + } + } +#endif + + starpu_data_register(handleptr, home_node, &ndim_interface, &starpu_interface_ndim_ops); +} + +void starpu_ndim_ptr_register(starpu_data_handle_t handle, unsigned node, + uintptr_t ptr, uintptr_t dev_handle, size_t offset, uint32_t* ldn) +{ + struct starpu_ndim_interface *ndim_interface = starpu_data_get_interface_on_node(handle, node); + starpu_data_ptr_register(handle, node); + ndim_interface->ptr = ptr; + ndim_interface->dev_handle = dev_handle; + ndim_interface->offset = offset; + if (ndim_interface->ndim) + memcpy(ndim_interface->ldn, ldn, ndim_interface->ndim*sizeof(uint32_t)); +} + +static uint32_t footprint_ndim_interface_crc32(starpu_data_handle_t handle) +{ + uint32_t hash; + + hash = starpu_hash_crc32c_be(starpu_ndim_get_elemsize(handle), 0); + + unsigned i; + for (i=0; indim != ndim_b->ndim) + return 0; + + if (ndim_a->elemsize != ndim_b->elemsize) + return 0; + + unsigned i; + /* Two matrices are considered compatible if they have the same size */ + for (i=0; indim; i++) + { + if (ndim_a->nn[i] != ndim_b->nn[i]) + return 0; + } + + return 1; +} + +static int ndim_alloc_compare(void *data_interface_a, void *data_interface_b) +{ + struct starpu_ndim_interface *ndim_a = (struct starpu_ndim_interface *) data_interface_a; + struct starpu_ndim_interface *ndim_b = (struct starpu_ndim_interface *) data_interface_b; + + return ndim_a->allocsize == ndim_b->allocsize; +} + +static void display_ndim_interface(starpu_data_handle_t handle, FILE *f) +{ + struct starpu_ndim_interface *ndim_interface = (struct starpu_ndim_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + + unsigned i; + for (i=0; indim; i++) + fprintf(f, "%u\t", ndim_interface->nn[i]); + + if (ndim_interface->ndim == 0) + { + fprintf(f, "%lu\t", (unsigned long)ndim_interface->elemsize); + } +} + +static int _is_contiguous_ndim(uint32_t* nn, uint32_t* ldn, size_t ndim) +{ + if (ndim == 0) + return 1; + + unsigned i; + uint32_t ldi = 1; + for (i = 0; ildn; + uint32_t* nn = ndim_interface->nn; + size_t ndim = ndim_interface->ndim; + size_t elemsize = ndim_interface->elemsize; + + *count = _get_size(nn, ndim, elemsize); + + if (ptr != NULL) + { + char *ndptr = (void *)ndim_interface->ptr; + + *ptr = (void *)starpu_malloc_on_node_flags(node, *count, 0); + + char *cur = *ptr; + + _pack_cpy_ndim_ptr(cur, ndptr, nn, ldn, ndim, elemsize); + } + + return 0; +} + +static int peek_ndim_handle(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count) +{ + STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); + + struct starpu_ndim_interface *ndim_interface = (struct starpu_ndim_interface *) + starpu_data_get_interface_on_node(handle, node); + + uint32_t* ldn = ndim_interface->ldn; + uint32_t* nn = ndim_interface->nn; + size_t ndim = ndim_interface->ndim; + size_t elemsize = ndim_interface->elemsize; + + STARPU_ASSERT(count == _get_size(nn, ndim, elemsize)); + + char *cur = ptr; + char *ndptr = (void *)ndim_interface->ptr; + + _peek_cpy_ndim_ptr(ndptr, cur, nn, ldn, ndim, elemsize); + + return 0; +} + +static int unpack_ndim_handle(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count) +{ + peek_ndim_handle(handle, node, ptr, count); + starpu_free_on_node_flags(node, (uintptr_t)ptr, count, 0); + + return 0; +} + +static size_t ndim_interface_get_size(starpu_data_handle_t handle) +{ + struct starpu_ndim_interface *ndim_interface; + + ndim_interface = (struct starpu_ndim_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + +#ifdef STARPU_DEBUG + STARPU_ASSERT_MSG(ndim_interface->id == STARPU_NDIM_INTERFACE_ID, "Error. The given data is not a ndim array."); +#endif + + return _get_size(ndim_interface->nn, ndim_interface->ndim, ndim_interface->elemsize); +} + +/* offer an access to the data parameters */ +uint32_t* starpu_ndim_get_nn(starpu_data_handle_t handle) +{ + struct starpu_ndim_interface *ndim_interface = (struct starpu_ndim_interface *) + starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + +#ifdef STARPU_DEBUG + STARPU_ASSERT_MSG(ndim_interface->id == STARPU_NDIM_INTERFACE_ID, "Error. The given data is not a ndim array."); +#endif + + return ndim_interface->nn; +} + +uint32_t starpu_ndim_get_ni(starpu_data_handle_t handle, size_t i) +{ + struct starpu_ndim_interface *ndim_interface = (struct starpu_ndim_interface *) + starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + + STARPU_ASSERT_MSG(ndim_interface->ndim > 0, "The function can only be called when array dimension is greater than 0."); + +#ifdef STARPU_DEBUG + STARPU_ASSERT_MSG(ndim_interface->id == STARPU_NDIM_INTERFACE_ID, "Error. The given data is not a ndim array."); +#endif + + return ndim_interface->nn[i]; +} + +uint32_t* starpu_ndim_get_local_ldn(starpu_data_handle_t handle) +{ + unsigned node; + node = starpu_worker_get_local_memory_node(); + + STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); + + struct starpu_ndim_interface *ndim_interface = (struct starpu_ndim_interface *) + starpu_data_get_interface_on_node(handle, node); + +#ifdef STARPU_DEBUG + STARPU_ASSERT_MSG(ndim_interface->id == STARPU_NDIM_INTERFACE_ID, "Error. The given data is not a ndim array."); +#endif + + return ndim_interface->ldn; +} + +uint32_t starpu_ndim_get_local_ldi(starpu_data_handle_t handle, size_t i) +{ + unsigned node; + node = starpu_worker_get_local_memory_node(); + + STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); + + struct starpu_ndim_interface *ndim_interface = (struct starpu_ndim_interface *) + starpu_data_get_interface_on_node(handle, node); + + STARPU_ASSERT_MSG(ndim_interface->ndim > 0, "The function can only be called when array dimension is greater than 0."); + +#ifdef STARPU_DEBUG + STARPU_ASSERT_MSG(ndim_interface->id == STARPU_NDIM_INTERFACE_ID, "Error. The given data is not a ndim array."); +#endif + + return ndim_interface->ldn[i]; +} + +uintptr_t starpu_ndim_get_local_ptr(starpu_data_handle_t handle) +{ + unsigned node; + node = starpu_worker_get_local_memory_node(); + + STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); + + struct starpu_ndim_interface *ndim_interface = (struct starpu_ndim_interface *) + starpu_data_get_interface_on_node(handle, node); + +#ifdef STARPU_DEBUG + STARPU_ASSERT_MSG(ndim_interface->id == STARPU_NDIM_INTERFACE_ID, "Error. The given data is not a ndim array."); +#endif + + return ndim_interface->ptr; +} + +size_t starpu_ndim_get_ndim(starpu_data_handle_t handle) +{ + struct starpu_ndim_interface *ndim_interface = (struct starpu_ndim_interface *) + starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + +#ifdef STARPU_DEBUG + STARPU_ASSERT_MSG(ndim_interface->id == STARPU_NDIM_INTERFACE_ID, "Error. The given data is not a ndim array."); +#endif + + return ndim_interface->ndim; +} + +size_t starpu_ndim_get_elemsize(starpu_data_handle_t handle) +{ + struct starpu_ndim_interface *ndim_interface = (struct starpu_ndim_interface *) + starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + +#ifdef STARPU_DEBUG + STARPU_ASSERT_MSG(ndim_interface->id == STARPU_NDIM_INTERFACE_ID, "Error. The given data is not a ndim array."); +#endif + + return ndim_interface->elemsize; +} + +/* memory allocation/deallocation primitives for the NDIM interface */ + +/* For a newly-allocated interface, the ld values are trivial */ +static void set_trivial_ndim_ld(struct starpu_ndim_interface *dst_ndarr) +{ + size_t ndim = dst_ndarr->ndim; + uint32_t* nn = dst_ndarr->nn; + + if (ndim > 0) + { + uint32_t ntmp = 1; + dst_ndarr->ldn[0] = 1; + size_t i; + for (i=1; ildn[i] = ntmp; + } + } +} + +/* returns the size of the allocated area */ +static starpu_ssize_t allocate_ndim_buffer_on_node(void *data_interface_, unsigned dst_node) +{ + uintptr_t addr = 0, handle; + + struct starpu_ndim_interface *dst_ndarr = (struct starpu_ndim_interface *) data_interface_; + + size_t arrsize = dst_ndarr->allocsize; + + handle = starpu_malloc_on_node(dst_node, arrsize); + + if (!handle) + return -ENOMEM; + + if (starpu_node_get_kind(dst_node) != STARPU_OPENCL_RAM) + addr = handle; + + /* update the data properly in consequence */ + dst_ndarr->ptr = addr; + dst_ndarr->dev_handle = handle; + dst_ndarr->offset = 0; + + set_trivial_ndim_ld(dst_ndarr); + + return arrsize; +} + +static void free_ndim_buffer_on_node(void *data_interface, unsigned node) +{ + struct starpu_ndim_interface *ndim_interface = (struct starpu_ndim_interface *) data_interface; + + starpu_free_on_node(node, ndim_interface->dev_handle, ndim_interface->allocsize); + ndim_interface->ptr = 0; + ndim_interface->dev_handle = 0; +} + +static void cache_ndim_buffer_on_node(void *cached_interface, void *src_data_interface, unsigned node STARPU_ATTRIBUTE_UNUSED) +{ + struct starpu_ndim_interface *cached_ndarr = (struct starpu_ndim_interface *) cached_interface; + struct starpu_ndim_interface *src_ndarr = (struct starpu_ndim_interface *) src_data_interface; + + cached_ndarr->ptr = src_ndarr->ptr; + src_ndarr->ptr = 0; + cached_ndarr->dev_handle = src_ndarr->dev_handle; + src_ndarr->dev_handle = 0; + cached_ndarr->allocsize = src_ndarr->allocsize; + STARPU_ASSERT(src_ndarr->offset == 0); +} + +static void reuse_ndim_buffer_on_node(void *dst_data_interface, const void *cached_interface, unsigned node STARPU_ATTRIBUTE_UNUSED) +{ + struct starpu_ndim_interface *dst_ndarr = (struct starpu_ndim_interface *) dst_data_interface; + const struct starpu_ndim_interface *cached_ndarr = (const struct starpu_ndim_interface *) cached_interface; + + dst_ndarr->ptr = cached_ndarr->ptr; + dst_ndarr->dev_handle = cached_ndarr->dev_handle; + dst_ndarr->offset = 0; + + set_trivial_ndim_ld(dst_ndarr); +} + +static size_t _get_mapsize(uint32_t* nn, uint32_t* ldn, size_t ndim, size_t elemsize) +{ + uint32_t nn0 = ndim?nn[0]:1; + size_t buffersize = 0; + unsigned i; + for (i = 1; i < ndim; i++) + { + buffersize += ldn[i]*(nn[i]-1)*elemsize; + } + buffersize += nn0*elemsize; + return buffersize; +} + +static int map_ndim(void *src_interface, unsigned src_node, + void *dst_interface, unsigned dst_node) +{ + struct starpu_ndim_interface *src_ndarr = src_interface; + struct starpu_ndim_interface *dst_ndarr = dst_interface; + int ret; + uintptr_t mapped; + + size_t ndim = src_ndarr->ndim; + + /* map area ldn[ndim-1]*(nn[ndim-1]-1) + ldn[ndim-2]*(nn[ndim-2]-1) + ... + ldn[1]*(nn[1]-1) + nn0*/ + mapped = starpu_interface_map(src_ndarr->dev_handle, src_ndarr->offset, src_node, dst_node, _get_mapsize(src_ndarr->nn, src_ndarr->ldn, ndim, src_ndarr->elemsize), &ret); + if (mapped) + { + dst_ndarr->dev_handle = mapped; + dst_ndarr->offset = 0; + if (starpu_node_get_kind(dst_node) != STARPU_OPENCL_RAM) + dst_ndarr->ptr = mapped; + size_t i; + for (i=0; ildn[i] = src_ndarr->ldn[i]; + } + return 0; + } + return ret; +} + +static int unmap_ndim(void *src_interface, unsigned src_node, + void *dst_interface, unsigned dst_node) +{ + struct starpu_ndim_interface *src_ndarr = src_interface; + struct starpu_ndim_interface *dst_ndarr = dst_interface; + + size_t ndim = src_ndarr->ndim; + int ret = starpu_interface_unmap(src_ndarr->dev_handle, src_ndarr->offset, src_node, dst_ndarr->dev_handle, dst_node, _get_mapsize(src_ndarr->nn, src_ndarr->ldn, ndim, src_ndarr->elemsize)); + dst_ndarr->dev_handle = 0; + + return ret; +} + +static int update_map_ndim(void *src_interface, unsigned src_node, + void *dst_interface, unsigned dst_node) +{ + struct starpu_ndim_interface *src_ndarr = src_interface; + struct starpu_ndim_interface *dst_ndarr = dst_interface; + + size_t ndim = src_ndarr->ndim; + return starpu_interface_update_map(src_ndarr->dev_handle, src_ndarr->offset, src_node, dst_ndarr->dev_handle, dst_ndarr->offset, dst_node, _get_mapsize(src_ndarr->nn, src_ndarr->ldn, ndim, src_ndarr->elemsize)); +} + +static int copy_any_to_any(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *async_data) +{ + struct starpu_ndim_interface *src_ndarr = (struct starpu_ndim_interface *) src_interface; + struct starpu_ndim_interface *dst_ndarr = (struct starpu_ndim_interface *) dst_interface; + int ret = 0; + + uint32_t* nn = dst_ndarr->nn; + size_t ndim = dst_ndarr->ndim; + size_t elemsize = dst_ndarr->elemsize; + + uint32_t* ldn_src = src_ndarr->ldn; + uint32_t* ldn_dst = dst_ndarr->ldn; + + if (starpu_interface_copynd(src_ndarr->dev_handle, src_ndarr->offset, src_node, + dst_ndarr->dev_handle, dst_ndarr->offset, dst_node, + elemsize, ndim, + nn, ldn_src, ldn_dst, + async_data)) + ret = -EAGAIN; + + starpu_interface_data_copy(src_node, dst_node, _get_size(nn, ndim, elemsize)); + + return ret; +} + +static starpu_ssize_t describe(void *data_interface, char *buf, size_t size) +{ + struct starpu_ndim_interface *ndarr = (struct starpu_ndim_interface *) data_interface; + + size_t ndim = ndarr->ndim; + int n = 0; + size_t ret; + unsigned i; + for (i=0; ielemsize:ndarr->nn[i])); + n += ret; + if(size > ret) + size -= ret; + else + size = 0; + } + + return n; +} + +static starpu_ssize_t size_meta_ndim_handle(struct starpu_ndim_interface *ndarr) +{ + starpu_ssize_t count; + count = sizeof(ndarr->ndim) + sizeof(ndarr->offset) + sizeof(ndarr->allocsize) + sizeof(ndarr->elemsize); + count += ndarr->ndim * (sizeof(ndarr->ldn[0]) + sizeof(ndarr->nn[0])) + sizeof(ndarr->ptr) + sizeof(ndarr->dev_handle); + return count; +} + + +#define _pack(dst, src) do { memcpy(dst, &src, sizeof(src)); dst += sizeof(src); } while (0) + +static int pack_meta_ndim_handle(void *data_interface, void **ptr, starpu_ssize_t *count) +{ + struct starpu_ndim_interface *ndarr = (struct starpu_ndim_interface *) data_interface; + + *count = size_meta_ndim_handle(ndarr); + _STARPU_CALLOC(*ptr, *count, 1); + char *cur = *ptr; + + _pack(cur, ndarr->ndim); + _pack(cur, ndarr->offset); + _pack(cur, ndarr->allocsize); + _pack(cur, ndarr->elemsize); + _pack(cur, ndarr->ptr); + _pack(cur, ndarr->dev_handle); + + memcpy(cur, ndarr->ldn, ndarr->ndim*sizeof(ndarr->ldn[0])); + cur += ndarr->ndim*sizeof(ndarr->ldn[0]); + + memcpy(cur, ndarr->nn, ndarr->ndim*sizeof(ndarr->nn[0])); + return 0; +} + +#define _unpack(dst, src) do { memcpy(&dst, src, sizeof(dst)); src += sizeof(dst); } while(0) + +static int unpack_meta_ndim_handle(void **data_interface, void *ptr, starpu_ssize_t *count) +{ + _STARPU_CALLOC(*data_interface, 1, sizeof(struct starpu_ndim_interface)); + struct starpu_ndim_interface *ndarr = (struct starpu_ndim_interface *)(*data_interface); + char *cur = ptr; + + ndarr->id = STARPU_NDIM_INTERFACE_ID; + + _unpack(ndarr->ndim, cur); + _unpack(ndarr->offset, cur); + _unpack(ndarr->allocsize, cur); + _unpack(ndarr->elemsize, cur); + _unpack(ndarr->ptr, cur); + _unpack(ndarr->dev_handle, cur); + + _STARPU_MALLOC(ndarr->ldn, ndarr->ndim*sizeof(ndarr->ldn[0])); + memcpy(ndarr->ldn, cur, ndarr->ndim*sizeof(ndarr->ldn[0])); + cur += ndarr->ndim*sizeof(ndarr->ldn[0]); + + _STARPU_MALLOC(ndarr->nn, ndarr->ndim*sizeof(ndarr->nn[0])); + memcpy(ndarr->nn, cur, ndarr->ndim*sizeof(ndarr->nn[0])); + + *count = size_meta_ndim_handle(ndarr); + + return 0; +} + +static int free_meta_ndim_handle(void *data_interface) +{ + struct starpu_ndim_interface *ndarr = (struct starpu_ndim_interface *) data_interface; + free(ndarr->ldn); + ndarr->ldn = NULL; + free(ndarr->nn); + ndarr->nn = NULL; + return 0; +} diff --git a/src/datawizard/interfaces/tensor_filters.c b/src/datawizard/interfaces/tensor_filters.c new file mode 100644 index 0000000..c4dfb41 --- /dev/null +++ b/src/datawizard/interfaces/tensor_filters.c @@ -0,0 +1,363 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include + +static void _starpu_tensor_filter_block(int dim, void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, + unsigned id, unsigned nparts, uintptr_t shadow_size) +{ + struct starpu_tensor_interface *tensor_father = (struct starpu_tensor_interface *) father_interface; + struct starpu_tensor_interface *tensor_child = (struct starpu_tensor_interface *) child_interface; + + unsigned blocksize; + /* the element will be split, in case horizontal, it's nx, in case vertical, it's ny, in case depth, it's nz, in case time, it's nt*/ + uint32_t nn; + uint32_t nx; + uint32_t ny; + uint32_t nz; + uint32_t nt; + + switch(dim) + { + case 1: /* horizontal*/ + /* actual number of elements */ + nx = tensor_father->nx - 2 * shadow_size; + ny = tensor_father->ny; + nz = tensor_father->nz; + nt = tensor_father->nt; + nn = nx; + blocksize = 1; + break; + + case 2: /* vertical*/ + nx = tensor_father->nx; + /* actual number of elements */ + ny = tensor_father->ny - 2 * shadow_size; + nz = tensor_father->nz; + nt = tensor_father->nt; + nn = ny; + blocksize = tensor_father->ldy; + break; + + case 3: /* depth*/ + nx = tensor_father->nx; + ny = tensor_father->ny; + /* actual number of elements */ + nz = tensor_father->nz - 2 * shadow_size; + nt = tensor_father->nt; + nn = nz; + blocksize = tensor_father->ldz; + break; + + case 4: /* time*/ + nx = tensor_father->nx; + ny = tensor_father->ny; + nz = tensor_father->nz; + /* actual number of elements */ + nt = tensor_father->nt - 2 * shadow_size; + nn = nt; + blocksize = tensor_father->ldt; + break; + default: + STARPU_ASSERT_MSG(0, "Unknown value for dim"); + } + + size_t elemsize = tensor_father->elemsize; + + STARPU_ASSERT_MSG(nparts <= nn, "cannot split %u elements in %u parts", nn, nparts); + + uint32_t child_nn; + size_t offset; + starpu_filter_nparts_compute_chunk_size_and_offset(nn, nparts, elemsize, id, blocksize, &child_nn, &offset); + + child_nn += 2 * shadow_size; + + STARPU_ASSERT_MSG(tensor_father->id == STARPU_TENSOR_INTERFACE_ID, "%s can only be applied on a tensor data", __func__); + tensor_child->id = tensor_father->id; + + switch(dim) + { + case 1: + tensor_child->nx = child_nn; + tensor_child->ny = ny; + tensor_child->nz = nz; + tensor_child->nt = nt; + break; + case 2: + tensor_child->nx = nx; + tensor_child->ny = child_nn; + tensor_child->nz = nz; + tensor_child->nt = nt; + break; + case 3: + tensor_child->nx = nx; + tensor_child->ny = ny; + tensor_child->nz = child_nn; + tensor_child->nt = nt; + break; + case 4: + tensor_child->nx = nx; + tensor_child->ny = ny; + tensor_child->nz = nz; + tensor_child->nt = child_nn; + break; + default: + STARPU_ASSERT_MSG(0, "Unknown value for dim"); + } + + tensor_child->elemsize = elemsize; + + if (tensor_father->dev_handle) + { + if (tensor_father->ptr) + tensor_child->ptr = tensor_father->ptr + offset; + tensor_child->ldy = tensor_father->ldy; + tensor_child->ldz = tensor_father->ldz; + tensor_child->ldt = tensor_father->ldt; + tensor_child->dev_handle = tensor_father->dev_handle; + tensor_child->offset = tensor_father->offset + offset; + } +} + +void starpu_tensor_filter_block(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, + unsigned id, unsigned nparts) +{ + _starpu_tensor_filter_block(1, father_interface, child_interface, f, id, nparts, 0); +} + +void starpu_tensor_filter_block_shadow(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, + unsigned id, unsigned nparts) +{ + uintptr_t shadow_size = (uintptr_t) f->filter_arg_ptr; + + _starpu_tensor_filter_block(1, father_interface, child_interface, f, id, nparts, shadow_size); +} + +void starpu_tensor_filter_vertical_block(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, + unsigned id, unsigned nparts) +{ + _starpu_tensor_filter_block(2, father_interface, child_interface, f, id, nparts, 0); +} + +void starpu_tensor_filter_vertical_block_shadow(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, + unsigned id, unsigned nparts) +{ + uintptr_t shadow_size = (uintptr_t) f->filter_arg_ptr; + + _starpu_tensor_filter_block(2, father_interface, child_interface, f, id, nparts, shadow_size); +} + +void starpu_tensor_filter_depth_block(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, + unsigned id, unsigned nparts) +{ + _starpu_tensor_filter_block(3, father_interface, child_interface, f, id, nparts, 0); +} + +void starpu_tensor_filter_depth_block_shadow(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, + unsigned id, unsigned nparts) +{ + uintptr_t shadow_size = (uintptr_t) f->filter_arg_ptr; + + _starpu_tensor_filter_block(3, father_interface, child_interface, f, id, nparts, shadow_size); +} + +void starpu_tensor_filter_time_block(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, + unsigned id, unsigned nparts) +{ + _starpu_tensor_filter_block(4, father_interface, child_interface, f, id, nparts, 0); +} + +void starpu_tensor_filter_time_block_shadow(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, + unsigned id, unsigned nparts) +{ + uintptr_t shadow_size = (uintptr_t) f->filter_arg_ptr; + + _starpu_tensor_filter_block(4, father_interface, child_interface, f, id, nparts, shadow_size); +} + +static void _starpu_tensor_filter_pick_block(int dim, void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, + unsigned id, unsigned nparts) +{ + struct starpu_tensor_interface *tensor_father = (struct starpu_tensor_interface *) father_interface; + struct starpu_block_interface *block_child = (struct starpu_block_interface *) child_interface; + + unsigned blocksize; + uint32_t nn; + uint32_t nx = tensor_father->nx; + uint32_t ny = tensor_father->ny; + uint32_t nz = tensor_father->nz; + uint32_t nt = tensor_father->nt; + + switch(dim) + { + /* along y-axis */ + case 1: + nn = ny; + blocksize = tensor_father->ldy; + break; + /* along z-axis */ + case 2: + nn = nz; + blocksize = tensor_father->ldz; + break; + /* along t-axis */ + case 3: + nn = nt; + blocksize = tensor_father->ldt; + break; + default: + STARPU_ASSERT_MSG(0, "Unknown value for dim"); + } + + size_t elemsize = tensor_father->elemsize; + + size_t chunk_pos = (size_t)f->filter_arg_ptr; + + STARPU_ASSERT_MSG(nparts <= nn, "cannot get %u blocks", nparts); + STARPU_ASSERT_MSG((chunk_pos + id) < nn, "the chosen block should be in the tensor"); + + size_t offset = (chunk_pos + id) * blocksize * elemsize; + + STARPU_ASSERT_MSG(tensor_father->id == STARPU_TENSOR_INTERFACE_ID, "%s can only be applied on a tensor data", __func__); + block_child->id = STARPU_BLOCK_INTERFACE_ID; + + switch(dim) + { + /* along y-axis */ + case 1: + block_child->nx = nx; + block_child->ny = nz; + block_child->nz = nt; + break; + /* along z-axis */ + case 2: + block_child->nx = nx; + block_child->ny = ny; + block_child->nz = nt; + break; + /* along t-axis */ + case 3: + block_child->nx = nx; + block_child->ny = ny; + block_child->nz = nz; + break; + default: + STARPU_ASSERT_MSG(0, "Unknown value for dim"); + } + + block_child->elemsize = elemsize; + + if (tensor_father->dev_handle) + { + if (tensor_father->ptr) + block_child->ptr = tensor_father->ptr + offset; + switch(dim) + { + /* along y-axis */ + case 1: + block_child->ldy = tensor_father->ldz; + block_child->ldz = tensor_father->ldt; + break; + /* along z-axis */ + case 2: + block_child->ldy = tensor_father->ldy; + block_child->ldz = tensor_father->ldt; + break; + /* along t-axis */ + case 3: + block_child->ldy = tensor_father->ldy; + block_child->ldz = tensor_father->ldz; + break; + default: + STARPU_ASSERT_MSG(0, "Unknown value for dim"); + } + block_child->dev_handle = tensor_father->dev_handle; + block_child->offset = tensor_father->offset + offset; + } +} +void starpu_tensor_filter_pick_block_t(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, + unsigned id, unsigned nparts) +{ + _starpu_tensor_filter_pick_block(3, father_interface, child_interface, f, id, nparts); +} + +void starpu_tensor_filter_pick_block_z(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, + unsigned id, unsigned nparts) +{ + _starpu_tensor_filter_pick_block(2, father_interface, child_interface, f, id, nparts); +} + +void starpu_tensor_filter_pick_block_y(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, + unsigned id, unsigned nparts) +{ + _starpu_tensor_filter_pick_block(1, father_interface, child_interface, f, id, nparts); +} + +struct starpu_data_interface_ops *starpu_tensor_filter_pick_block_child_ops(STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, STARPU_ATTRIBUTE_UNUSED unsigned child) +{ + return &starpu_interface_block_ops; +} + +void starpu_tensor_filter_pick_variable(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, STARPU_ATTRIBUTE_UNUSED unsigned id, STARPU_ATTRIBUTE_UNUSED unsigned nchunks) +{ + struct starpu_tensor_interface *tensor_father = (struct starpu_tensor_interface *) father_interface; + /* each chunk becomes a variable */ + struct starpu_variable_interface *variable_child = (struct starpu_variable_interface *) child_interface; + + uint32_t nx = tensor_father->nx; + uint32_t ny = tensor_father->ny; + uint32_t nz = tensor_father->nz; + uint32_t nt = tensor_father->nt; + + unsigned ldy = tensor_father->ldy; + unsigned ldz = tensor_father->ldz; + unsigned ldt = tensor_father->ldt; + + size_t elemsize = tensor_father->elemsize; + + uint32_t* chunk_pos = (uint32_t*)f->filter_arg_ptr; + // int i; + // for(i=0; i<4; i++) + // { + // printf("pos is %d\n", chunk_pos[i]); + // } + + STARPU_ASSERT_MSG((chunk_pos[0] < nx)&&(chunk_pos[1] < ny)&&(chunk_pos[2] < nz)&&(chunk_pos[3] < nt), "the chosen variable should be in the tensor"); + + size_t offset = (chunk_pos[3] * ldt + chunk_pos[2] * ldz + chunk_pos[1] * ldy + chunk_pos[0]) * elemsize; + + STARPU_ASSERT_MSG(tensor_father->id == STARPU_TENSOR_INTERFACE_ID, "%s can only be applied on a tensor data", __func__); + + /* update the child's interface */ + variable_child->id = STARPU_VARIABLE_INTERFACE_ID; + variable_child->elemsize = elemsize; + + /* is the information on this node valid ? */ + if (tensor_father->dev_handle) + { + if (tensor_father->ptr) + variable_child->ptr = tensor_father->ptr + offset; + variable_child->dev_handle = tensor_father->dev_handle; + variable_child->offset = tensor_father->offset + offset; + } +} + +struct starpu_data_interface_ops *starpu_tensor_filter_pick_variable_child_ops(STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, STARPU_ATTRIBUTE_UNUSED unsigned child) +{ + return &starpu_interface_variable_ops; +} diff --git a/src/datawizard/interfaces/tensor_interface.c b/src/datawizard/interfaces/tensor_interface.c new file mode 100644 index 0000000..457a06d --- /dev/null +++ b/src/datawizard/interfaces/tensor_interface.c @@ -0,0 +1,633 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#ifdef BUILDING_STARPU +#include +#endif + +static int copy_any_to_any(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *async_data); +static int map_tensor(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node); +static int unmap_tensor(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node); +static int update_map_tensor(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node); + +static const struct starpu_data_copy_methods tensor_copy_data_methods_s = +{ + .any_to_any = copy_any_to_any, +}; + + +static void register_tensor_handle(starpu_data_handle_t handle, int home_node, void *data_interface); +static void *tensor_to_pointer(void *data_interface, unsigned node); +static starpu_ssize_t allocate_tensor_buffer_on_node(void *data_interface_, unsigned dst_node); +static void free_tensor_buffer_on_node(void *data_interface, unsigned node); +static size_t tensor_interface_get_size(starpu_data_handle_t handle); +static uint32_t footprint_tensor_interface_crc32(starpu_data_handle_t handle); +static int tensor_compare(void *data_interface_a, void *data_interface_b); +static void display_tensor_interface(starpu_data_handle_t handle, FILE *f); +static int pack_tensor_handle(starpu_data_handle_t handle, unsigned node, void **ptr, starpu_ssize_t *count); +static int peek_tensor_handle(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count); +static int unpack_tensor_handle(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count); +static starpu_ssize_t describe(void *data_interface, char *buf, size_t size); + +struct starpu_data_interface_ops starpu_interface_tensor_ops = +{ + .register_data_handle = register_tensor_handle, + .allocate_data_on_node = allocate_tensor_buffer_on_node, + .to_pointer = tensor_to_pointer, + .free_data_on_node = free_tensor_buffer_on_node, + .map_data = map_tensor, + .unmap_data = unmap_tensor, + .update_map = update_map_tensor, + .copy_methods = &tensor_copy_data_methods_s, + .get_size = tensor_interface_get_size, + .footprint = footprint_tensor_interface_crc32, + .compare = tensor_compare, + .interfaceid = STARPU_TENSOR_INTERFACE_ID, + .interface_size = sizeof(struct starpu_tensor_interface), + .display = display_tensor_interface, + .pack_data = pack_tensor_handle, + .peek_data = peek_tensor_handle, + .unpack_data = unpack_tensor_handle, + .describe = describe, + .name = "STARPU_TENSOR_INTERFACE", + .pack_meta = NULL, + .unpack_meta = NULL, + .free_meta = NULL +}; + +static void *tensor_to_pointer(void *data_interface, unsigned node) +{ + (void) node; + struct starpu_tensor_interface *tensor_interface = data_interface; + + return (void*) tensor_interface->ptr; +} + +static void register_tensor_handle(starpu_data_handle_t handle, int home_node, void *data_interface) +{ + struct starpu_tensor_interface *tensor_interface = (struct starpu_tensor_interface *) data_interface; + + int node; + for (node = 0; node < STARPU_MAXNODES; node++) + { + struct starpu_tensor_interface *local_interface = (struct starpu_tensor_interface *) + starpu_data_get_interface_on_node(handle, node); + + if (node == home_node) + { + local_interface->ptr = tensor_interface->ptr; + local_interface->dev_handle = tensor_interface->dev_handle; + local_interface->offset = tensor_interface->offset; + local_interface->ldy = tensor_interface->ldy; + local_interface->ldz = tensor_interface->ldz; + local_interface->ldt = tensor_interface->ldt; + } + else + { + local_interface->ptr = 0; + local_interface->dev_handle = 0; + local_interface->offset = 0; + local_interface->ldy = 0; + local_interface->ldz = 0; + local_interface->ldt = 0; + } + + local_interface->id = tensor_interface->id; + local_interface->nx = tensor_interface->nx; + local_interface->ny = tensor_interface->ny; + local_interface->nz = tensor_interface->nz; + local_interface->nt = tensor_interface->nt; + local_interface->elemsize = tensor_interface->elemsize; + } +} + +/* declare a new data with the BLAS interface */ +void starpu_tensor_data_register(starpu_data_handle_t *handleptr, int home_node, + uintptr_t ptr, uint32_t ldy, uint32_t ldz, uint32_t ldt, uint32_t nx, + uint32_t ny, uint32_t nz, uint32_t nt, size_t elemsize) +{ + STARPU_ASSERT_MSG(ldy >= nx, "ldy = %u should not be less than nx = %u.", ldy, nx); + STARPU_ASSERT_MSG(ldz/ldy >= ny, "ldz/ldy = %u/%u = %u should not be less than ny = %u.", ldz, ldy, ldz/ldy, ny); + STARPU_ASSERT_MSG(ldt/ldz >= nz, "ldt/ldz = %u/%u = %u should not be less than nz = %u.", ldt, ldz, ldt/ldz, nz); + struct starpu_tensor_interface tensor_interface = + { + .id = STARPU_TENSOR_INTERFACE_ID, + .ptr = ptr, + .dev_handle = ptr, + .offset = 0, + .ldy = ldy, + .ldz = ldz, + .ldt = ldt, + .nx = nx, + .ny = ny, + .nz = nz, + .nt = nt, + .elemsize = elemsize + }; +#ifndef STARPU_SIMGRID + if (home_node >= 0 && starpu_node_get_kind(home_node) == STARPU_CPU_RAM) + { + if (nx && ny && nz && nt && elemsize) + { + STARPU_ASSERT_ACCESSIBLE(ptr); + STARPU_ASSERT_ACCESSIBLE(ptr + (nt-1)*ldt*elemsize + (nz-1)*ldz*elemsize + (ny-1)*ldy*elemsize + nx*elemsize - 1); + } + } +#endif + + starpu_data_register(handleptr, home_node, &tensor_interface, &starpu_interface_tensor_ops); +} + +void starpu_tensor_ptr_register(starpu_data_handle_t handle, unsigned node, + uintptr_t ptr, uintptr_t dev_handle, size_t offset, uint32_t ldy, uint32_t ldz, uint32_t ldt) +{ + struct starpu_tensor_interface *tensor_interface = starpu_data_get_interface_on_node(handle, node); + starpu_data_ptr_register(handle, node); + tensor_interface->ptr = ptr; + tensor_interface->dev_handle = dev_handle; + tensor_interface->offset = offset; + tensor_interface->ldy = ldy; + tensor_interface->ldz = ldz; + tensor_interface->ldt = ldt; +} + +static uint32_t footprint_tensor_interface_crc32(starpu_data_handle_t handle) +{ + uint32_t hash; + + hash = starpu_hash_crc32c_be(starpu_tensor_get_nx(handle), 0); + hash = starpu_hash_crc32c_be(starpu_tensor_get_ny(handle), hash); + hash = starpu_hash_crc32c_be(starpu_tensor_get_nz(handle), hash); + hash = starpu_hash_crc32c_be(starpu_tensor_get_nt(handle), hash); + + return hash; +} + +static int tensor_compare(void *data_interface_a, void *data_interface_b) +{ + struct starpu_tensor_interface *tensor_a = (struct starpu_tensor_interface *) data_interface_a; + struct starpu_tensor_interface *tensor_b = (struct starpu_tensor_interface *) data_interface_b; + + /* Two tensors are considered compatible if they have the same size */ + return (tensor_a->nx == tensor_b->nx) + && (tensor_a->ny == tensor_b->ny) + && (tensor_a->nz == tensor_b->nz) + && (tensor_a->nt == tensor_b->nt) + && (tensor_a->elemsize == tensor_b->elemsize); +} + +static void display_tensor_interface(starpu_data_handle_t handle, FILE *f) +{ + struct starpu_tensor_interface *tensor_interface; + + tensor_interface = (struct starpu_tensor_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + + fprintf(f, "%u\t%u\t%u\t%u\t", tensor_interface->nx, tensor_interface->ny, tensor_interface->nz, tensor_interface->nt); +} + +#define IS_CONTIGUOUS_MATRIX(nx, ny, ldy) ((nx) == (ldy)) +#define IS_CONTIGUOUS_BLOCK(nx, ny, nz, ldy, ldz) ((nx) * (ny) == (ldz)) +#define IS_CONTIGUOUS_TENSOR(nx, ny, nz, nt, ldy, ldz, ldt) ((nx) * (ny) * (nz) == (ldt)) + +static int pack_tensor_handle(starpu_data_handle_t handle, unsigned node, void **ptr, starpu_ssize_t *count) +{ + STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); + + struct starpu_tensor_interface *tensor_interface = (struct starpu_tensor_interface *) + starpu_data_get_interface_on_node(handle, node); + + uint32_t ldy = tensor_interface->ldy; + uint32_t ldz = tensor_interface->ldz; + uint32_t ldt = tensor_interface->ldt; + uint32_t nx = tensor_interface->nx; + uint32_t ny = tensor_interface->ny; + uint32_t nz = tensor_interface->nz; + uint32_t nt = tensor_interface->nt; + size_t elemsize = tensor_interface->elemsize; + + *count = nx*ny*nz*nt*elemsize; + + if (ptr != NULL) + { + uint32_t t, z, y; + char *block = (void *)tensor_interface->ptr; + + *ptr = (void *)starpu_malloc_on_node_flags(node, *count, 0); + + char *cur = *ptr; + if (IS_CONTIGUOUS_TENSOR(nx, ny, nz, nt, ldy, ldz, ldt)) + memcpy(cur, block, nx * ny * nz * nt * elemsize); + else + { + char *block_t = block; + for(t=0 ; tldy; + uint32_t ldz = tensor_interface->ldz; + uint32_t ldt = tensor_interface->ldt; + uint32_t nx = tensor_interface->nx; + uint32_t ny = tensor_interface->ny; + uint32_t nz = tensor_interface->nz; + uint32_t nt = tensor_interface->nt; + size_t elemsize = tensor_interface->elemsize; + + STARPU_ASSERT(count == elemsize * nx * ny * nz * nt); + + uint32_t t, z, y; + char *cur = ptr; + char *block = (void *)tensor_interface->ptr; + + if (IS_CONTIGUOUS_TENSOR(nx, ny, nz, nt, ldy, ldz, ldt)) + memcpy(block, cur, nx * ny * nz * nt * elemsize); + else + { + char *block_t = block; + for(t=0 ; tid == STARPU_TENSOR_INTERFACE_ID, "Error. The given data is not a block."); +#endif + + size = tensor_interface->nx*tensor_interface->ny*tensor_interface->nz*tensor_interface->nt*tensor_interface->elemsize; + + return size; +} + +/* offer an access to the data parameters */ +uint32_t starpu_tensor_get_nx(starpu_data_handle_t handle) +{ + struct starpu_tensor_interface *tensor_interface = (struct starpu_tensor_interface *) + starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + +#ifdef STARPU_DEBUG + STARPU_ASSERT_MSG(tensor_interface->id == STARPU_TENSOR_INTERFACE_ID, "Error. The given data is not a block."); +#endif + + return tensor_interface->nx; +} + +uint32_t starpu_tensor_get_ny(starpu_data_handle_t handle) +{ + struct starpu_tensor_interface *tensor_interface = (struct starpu_tensor_interface *) + starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + +#ifdef STARPU_DEBUG + STARPU_ASSERT_MSG(tensor_interface->id == STARPU_TENSOR_INTERFACE_ID, "Error. The given data is not a block."); +#endif + + return tensor_interface->ny; +} + +uint32_t starpu_tensor_get_nz(starpu_data_handle_t handle) +{ + struct starpu_tensor_interface *tensor_interface = (struct starpu_tensor_interface *) + starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + +#ifdef STARPU_DEBUG + STARPU_ASSERT_MSG(tensor_interface->id == STARPU_TENSOR_INTERFACE_ID, "Error. The given data is not a block."); +#endif + + return tensor_interface->nz; +} + +uint32_t starpu_tensor_get_nt(starpu_data_handle_t handle) +{ + struct starpu_tensor_interface *tensor_interface = (struct starpu_tensor_interface *) + starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + +#ifdef STARPU_DEBUG + STARPU_ASSERT_MSG(tensor_interface->id == STARPU_TENSOR_INTERFACE_ID, "Error. The given data is not a block."); +#endif + + return tensor_interface->nt; +} + +uint32_t starpu_tensor_get_local_ldy(starpu_data_handle_t handle) +{ + unsigned node; + node = starpu_worker_get_local_memory_node(); + + STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); + + struct starpu_tensor_interface *tensor_interface = (struct starpu_tensor_interface *) + starpu_data_get_interface_on_node(handle, node); + +#ifdef STARPU_DEBUG + STARPU_ASSERT_MSG(tensor_interface->id == STARPU_TENSOR_INTERFACE_ID, "Error. The given data is not a block."); +#endif + + return tensor_interface->ldy; +} + +uint32_t starpu_tensor_get_local_ldz(starpu_data_handle_t handle) +{ + unsigned node; + node = starpu_worker_get_local_memory_node(); + + STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); + + struct starpu_tensor_interface *tensor_interface = (struct starpu_tensor_interface *) + starpu_data_get_interface_on_node(handle, node); + +#ifdef STARPU_DEBUG + STARPU_ASSERT_MSG(tensor_interface->id == STARPU_TENSOR_INTERFACE_ID, "Error. The given data is not a block."); +#endif + + return tensor_interface->ldz; +} + +uint32_t starpu_tensor_get_local_ldt(starpu_data_handle_t handle) +{ + unsigned node; + node = starpu_worker_get_local_memory_node(); + + STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); + + struct starpu_tensor_interface *tensor_interface = (struct starpu_tensor_interface *) + starpu_data_get_interface_on_node(handle, node); + +#ifdef STARPU_DEBUG + STARPU_ASSERT_MSG(tensor_interface->id == STARPU_TENSOR_INTERFACE_ID, "Error. The given data is not a block."); +#endif + + return tensor_interface->ldt; +} + +uintptr_t starpu_tensor_get_local_ptr(starpu_data_handle_t handle) +{ + unsigned node; + node = starpu_worker_get_local_memory_node(); + + STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); + + struct starpu_tensor_interface *tensor_interface = (struct starpu_tensor_interface *) + starpu_data_get_interface_on_node(handle, node); + +#ifdef STARPU_DEBUG + STARPU_ASSERT_MSG(tensor_interface->id == STARPU_TENSOR_INTERFACE_ID, "Error. The given data is not a block."); +#endif + + return tensor_interface->ptr; +} + +size_t starpu_tensor_get_elemsize(starpu_data_handle_t handle) +{ + struct starpu_tensor_interface *tensor_interface = (struct starpu_tensor_interface *) + starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + +#ifdef STARPU_DEBUG + STARPU_ASSERT_MSG(tensor_interface->id == STARPU_TENSOR_INTERFACE_ID, "Error. The given data is not a block."); +#endif + + return tensor_interface->elemsize; +} + + +/* memory allocation/deallocation primitives for the BLOCK interface */ + +/* returns the size of the allocated area */ +static starpu_ssize_t allocate_tensor_buffer_on_node(void *data_interface_, unsigned dst_node) +{ + uintptr_t addr = 0, handle; + + struct starpu_tensor_interface *dst_block = (struct starpu_tensor_interface *) data_interface_; + + uint32_t nx = dst_block->nx; + uint32_t ny = dst_block->ny; + uint32_t nz = dst_block->nz; + uint32_t nt = dst_block->nt; + size_t elemsize = dst_block->elemsize; + + starpu_ssize_t allocated_memory; + + handle = starpu_malloc_on_node(dst_node, nx*ny*nz*nt*elemsize); + + if (!handle) + return -ENOMEM; + + if (starpu_node_get_kind(dst_node) != STARPU_OPENCL_RAM) + addr = handle; + + allocated_memory = nx*ny*nz*nt*elemsize; + + /* update the data properly in consequence */ + dst_block->ptr = addr; + dst_block->dev_handle = handle; + dst_block->offset = 0; + dst_block->ldy = nx; + dst_block->ldz = nx*ny; + dst_block->ldt = nx*ny*nz; + + return allocated_memory; +} + +static void free_tensor_buffer_on_node(void *data_interface, unsigned node) +{ + struct starpu_tensor_interface *tensor_interface = (struct starpu_tensor_interface *) data_interface; + uint32_t nx = tensor_interface->nx; + uint32_t ny = tensor_interface->ny; + uint32_t nz = tensor_interface->nz; + uint32_t nt = tensor_interface->nt; + size_t elemsize = tensor_interface->elemsize; + + starpu_free_on_node(node, tensor_interface->dev_handle, nx*ny*nz*nt*elemsize); + tensor_interface->ptr = 0; + tensor_interface->dev_handle = 0; +} + +static int map_tensor(void *src_interface, unsigned src_node, + void *dst_interface, unsigned dst_node) +{ + struct starpu_tensor_interface *src_tensor = src_interface; + struct starpu_tensor_interface *dst_tensor = dst_interface; + int ret; + uintptr_t mapped; + + /* map area ldt*(nt-1) + ldz*(nz-1) + ldy*(ny-1) + nx*/ + mapped = starpu_interface_map(src_tensor->dev_handle, src_tensor->offset, src_node, dst_node, (src_tensor->ldt*(src_tensor->nt-1)+src_tensor->ldz*(src_tensor->nz-1)+src_tensor->ldy*(src_tensor->ny-1)+src_tensor->nx)*src_tensor->elemsize, &ret); + if (mapped) + { + dst_tensor->dev_handle = mapped; + dst_tensor->offset = 0; + if (starpu_node_get_kind(dst_node) != STARPU_OPENCL_RAM) + dst_tensor->ptr = mapped; + dst_tensor->ldy = src_tensor->ldy; + dst_tensor->ldz = src_tensor->ldz; + dst_tensor->ldt = src_tensor->ldt; + return 0; + } + return ret; +} + +static int unmap_tensor(void *src_interface, unsigned src_node, + void *dst_interface, unsigned dst_node) +{ + struct starpu_tensor_interface *src_tensor = src_interface; + struct starpu_tensor_interface *dst_tensor = dst_interface; + + int ret = starpu_interface_unmap(src_tensor->dev_handle, src_tensor->offset, src_node, dst_tensor->dev_handle, dst_node, (src_tensor->ldt*(src_tensor->nt-1)+src_tensor->ldz*(src_tensor->nz-1)+src_tensor->ldy*(src_tensor->ny-1)+src_tensor->nx)*src_tensor->elemsize); + dst_tensor->dev_handle = 0; + + return ret; +} + +static int update_map_tensor(void *src_interface, unsigned src_node, + void *dst_interface, unsigned dst_node) +{ + struct starpu_tensor_interface *src_tensor = src_interface; + struct starpu_tensor_interface *dst_tensor = dst_interface; + + return starpu_interface_update_map(src_tensor->dev_handle, src_tensor->offset, src_node, dst_tensor->dev_handle, dst_tensor->offset, dst_node, (src_tensor->ldt*(src_tensor->nt-1)+src_tensor->ldz*(src_tensor->nz-1)+src_tensor->ldy*(src_tensor->ny-1)+src_tensor->nx)*src_tensor->elemsize); +} + +static int copy_any_to_any(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *async_data) +{ + struct starpu_tensor_interface *src_block = (struct starpu_tensor_interface *) src_interface; + struct starpu_tensor_interface *dst_block = (struct starpu_tensor_interface *) dst_interface; + int ret = 0; + + uint32_t nx = dst_block->nx; + uint32_t ny = dst_block->ny; + uint32_t nz = dst_block->nz; + uint32_t nt = dst_block->nt; + size_t elemsize = dst_block->elemsize; + + uint32_t ldy_src = src_block->ldy; + uint32_t ldz_src = src_block->ldz; + uint32_t ldt_src = src_block->ldt; + uint32_t ldy_dst = dst_block->ldy; + uint32_t ldz_dst = dst_block->ldz; + uint32_t ldt_dst = dst_block->ldt; + + if (starpu_interface_copy4d(src_block->dev_handle, src_block->offset, src_node, + dst_block->dev_handle, dst_block->offset, dst_node, + nx * elemsize, + ny, ldy_src * elemsize, ldy_dst * elemsize, + nz, ldz_src * elemsize, ldz_dst * elemsize, + nt, ldt_src * elemsize, ldt_dst * elemsize, + async_data)) + ret = -EAGAIN; + + starpu_interface_data_copy(src_node, dst_node, nx*ny*nz*nt*elemsize); + + return ret; +} + +static starpu_ssize_t describe(void *data_interface, char *buf, size_t size) +{ + struct starpu_tensor_interface *block = (struct starpu_tensor_interface *) data_interface; + return snprintf(buf, size, "T%ux%ux%ux%ux%u", + (unsigned) block->nx, + (unsigned) block->ny, + (unsigned) block->nz, + (unsigned) block->nt, + (unsigned) block->elemsize); +} diff --git a/src/datawizard/interfaces/variable_interface.c b/src/datawizard/interfaces/variable_interface.c new file mode 100644 index 0000000..a3cd6ad --- /dev/null +++ b/src/datawizard/interfaces/variable_interface.c @@ -0,0 +1,321 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#ifdef BUILDING_STARPU +#include +#endif + +static int copy_any_to_any(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *async_data); +static int map_variable(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node); +static int unmap_variable(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node); +static int update_map_variable(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node); + +static const struct starpu_data_copy_methods variable_copy_data_methods_s = +{ + .any_to_any = copy_any_to_any, +}; + +static void register_variable_handle(starpu_data_handle_t handle, int home_node, void *data_interface); +static starpu_ssize_t allocate_variable_buffer_on_node(void *data_interface_, unsigned dst_node); +static void *variable_to_pointer(void *data_interface, unsigned node); +static void free_variable_buffer_on_node(void *data_interface, unsigned node); +static size_t variable_interface_get_size(starpu_data_handle_t handle); +static uint32_t footprint_variable_interface_crc32(starpu_data_handle_t handle); +static int variable_compare(void *data_interface_a, void *data_interface_b); +static void display_variable_interface(starpu_data_handle_t handle, FILE *f); +static int pack_variable_handle(starpu_data_handle_t handle, unsigned node, void **ptr, starpu_ssize_t *count); +static int peek_variable_handle(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count); +static int unpack_variable_handle(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count); +static starpu_ssize_t describe(void *data_interface, char *buf, size_t size); + +struct starpu_data_interface_ops starpu_interface_variable_ops = +{ + .register_data_handle = register_variable_handle, + .allocate_data_on_node = allocate_variable_buffer_on_node, + .to_pointer = variable_to_pointer, + .free_data_on_node = free_variable_buffer_on_node, + .map_data = map_variable, + .unmap_data = unmap_variable, + .update_map = update_map_variable, + .copy_methods = &variable_copy_data_methods_s, + .get_size = variable_interface_get_size, + .footprint = footprint_variable_interface_crc32, + .compare = variable_compare, + .interfaceid = STARPU_VARIABLE_INTERFACE_ID, + .interface_size = sizeof(struct starpu_variable_interface), + .display = display_variable_interface, + .pack_data = pack_variable_handle, + .peek_data = peek_variable_handle, + .unpack_data = unpack_variable_handle, + .describe = describe, + .name = "STARPU_VARIABLE_INTERFACE", + .pack_meta = NULL, + .unpack_meta = NULL, + .free_meta = NULL +}; + +static void *variable_to_pointer(void *data_interface, unsigned node) +{ + (void) node; + return (void*) STARPU_VARIABLE_GET_PTR(data_interface); +} + +static void register_variable_handle(starpu_data_handle_t handle, int home_node, void *data_interface) +{ + struct starpu_variable_interface *variable_interface = (struct starpu_variable_interface *)data_interface; + int node; + for (node = 0; node < STARPU_MAXNODES; node++) + { + struct starpu_variable_interface *local_interface = (struct starpu_variable_interface *) + starpu_data_get_interface_on_node(handle, node); + + if (node == home_node) + { + local_interface->ptr = variable_interface->ptr; + local_interface->dev_handle = variable_interface->dev_handle; + local_interface->offset = variable_interface->offset; + } + else + { + local_interface->ptr = 0; + local_interface->dev_handle = 0; + local_interface->offset = 0; + } + + local_interface->id = variable_interface->id; + local_interface->elemsize = variable_interface->elemsize; + } +} + +/* declare a new data with the variable interface */ +void starpu_variable_data_register(starpu_data_handle_t *handleptr, int home_node, + uintptr_t ptr, size_t elemsize) +{ + struct starpu_variable_interface variable = + { + .id = STARPU_VARIABLE_INTERFACE_ID, + .ptr = ptr, + .dev_handle = ptr, + .offset = 0, + .elemsize = elemsize + }; +#ifndef STARPU_SIMGRID + if (home_node >= 0 && starpu_node_get_kind(home_node) == STARPU_CPU_RAM) + { + if (elemsize) + { + STARPU_ASSERT_ACCESSIBLE(ptr); + STARPU_ASSERT_ACCESSIBLE(ptr + elemsize - 1); + } + } +#endif + + starpu_data_register(handleptr, home_node, &variable, &starpu_interface_variable_ops); +} + +void starpu_variable_ptr_register(starpu_data_handle_t handle, unsigned node, + uintptr_t ptr, uintptr_t dev_handle, size_t offset) +{ + struct starpu_variable_interface *variable_interface = starpu_data_get_interface_on_node(handle, node); + starpu_data_ptr_register(handle, node); + variable_interface->ptr = ptr; + variable_interface->dev_handle = dev_handle; + variable_interface->offset = offset; +} + + +static uint32_t footprint_variable_interface_crc32(starpu_data_handle_t handle) +{ + return starpu_hash_crc32c_be(starpu_variable_get_elemsize(handle), 0); +} + +static int variable_compare(void *data_interface_a, void *data_interface_b) +{ + struct starpu_variable_interface *variable_a = (struct starpu_variable_interface *) data_interface_a; + struct starpu_variable_interface *variable_b = (struct starpu_variable_interface *) data_interface_b; + + /* Two variables are considered compatible if they have the same size */ + return variable_a->elemsize == variable_b->elemsize; +} + +static void display_variable_interface(starpu_data_handle_t handle, FILE *f) +{ + struct starpu_variable_interface *variable_interface = (struct starpu_variable_interface *) + starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + + fprintf(f, "%ld\t", (long)variable_interface->elemsize); +} + +static int pack_variable_handle(starpu_data_handle_t handle, unsigned node, void **ptr, starpu_ssize_t *count) +{ + STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); + + struct starpu_variable_interface *variable_interface = (struct starpu_variable_interface *) + starpu_data_get_interface_on_node(handle, node); + + *count = variable_interface->elemsize; + + if (ptr != NULL) + { + *ptr = (void *)starpu_malloc_on_node_flags(node, *count, 0); + memcpy(*ptr, (void*)variable_interface->ptr, variable_interface->elemsize); + } + + return 0; +} + +static int peek_variable_handle(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count) +{ + STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); + + struct starpu_variable_interface *variable_interface = (struct starpu_variable_interface *) + starpu_data_get_interface_on_node(handle, node); + + STARPU_ASSERT(count == variable_interface->elemsize); + + memcpy((void*)variable_interface->ptr, ptr, variable_interface->elemsize); + + return 0; +} + +static int unpack_variable_handle(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count) +{ + peek_variable_handle(handle, node, ptr, count); + starpu_free_on_node_flags(node, (uintptr_t)ptr, count, 0); + + return 0; +} + +static size_t variable_interface_get_size(starpu_data_handle_t handle) +{ + struct starpu_variable_interface *variable_interface = (struct starpu_variable_interface *) + starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + +#ifdef STARPU_DEBUG + STARPU_ASSERT_MSG(variable_interface->id == STARPU_VARIABLE_INTERFACE_ID, "Error. The given data is not a variable."); +#endif + + return variable_interface->elemsize; +} + +uintptr_t starpu_variable_get_local_ptr(starpu_data_handle_t handle) +{ + unsigned node; + node = starpu_worker_get_local_memory_node(); + + STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); + + return STARPU_VARIABLE_GET_PTR(starpu_data_get_interface_on_node(handle, node)); +} + +size_t starpu_variable_get_elemsize(starpu_data_handle_t handle) +{ + return STARPU_VARIABLE_GET_ELEMSIZE(starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM)); +} + +/* memory allocation/deallocation primitives for the variable interface */ + +/* returns the size of the allocated area */ +static starpu_ssize_t allocate_variable_buffer_on_node(void *data_interface_, unsigned dst_node) +{ + struct starpu_variable_interface *variable_interface = (struct starpu_variable_interface *) data_interface_; + size_t elemsize = variable_interface->elemsize; + uintptr_t addr = starpu_malloc_on_node(dst_node, elemsize); + + if (!addr) + return -ENOMEM; + + /* update the data properly in consequence */ + variable_interface->ptr = addr; + variable_interface->dev_handle = addr; + variable_interface->offset = 0; + + return elemsize; +} + +static void free_variable_buffer_on_node(void *data_interface, unsigned node) +{ + struct starpu_variable_interface *variable_interface = (struct starpu_variable_interface *) data_interface; + starpu_free_on_node(node, variable_interface->dev_handle, variable_interface->elemsize); + variable_interface->ptr = 0; + variable_interface->dev_handle = 0; +} + +static int map_variable(void *src_interface, unsigned src_node, + void *dst_interface, unsigned dst_node) +{ + struct starpu_variable_interface *src_variable = src_interface; + struct starpu_variable_interface *dst_variable = dst_interface; + int ret; + uintptr_t mapped; + + mapped = starpu_interface_map(src_variable->dev_handle, src_variable->offset, src_node, dst_node, src_variable->elemsize, &ret); + if (mapped) + { + dst_variable->dev_handle = mapped; + dst_variable->offset = 0; + if (starpu_node_get_kind(dst_node) != STARPU_OPENCL_RAM) + dst_variable->ptr = mapped; + return 0; + } + return ret; +} + +static int unmap_variable(void *src_interface, unsigned src_node, + void *dst_interface, unsigned dst_node) +{ + struct starpu_variable_interface *src_variable = src_interface; + struct starpu_variable_interface *dst_variable = dst_interface; + + int ret = starpu_interface_unmap(src_variable->dev_handle, src_variable->offset, src_node, dst_variable->dev_handle, dst_node, src_variable->elemsize); + dst_variable->dev_handle = 0; + + return ret; +} + +static int update_map_variable(void *src_interface, unsigned src_node, + void *dst_interface, unsigned dst_node) +{ + struct starpu_variable_interface *src_variable = src_interface; + struct starpu_variable_interface *dst_variable = dst_interface; + + return starpu_interface_update_map(src_variable->dev_handle, src_variable->offset, src_node, dst_variable->dev_handle, dst_variable->offset, dst_node, src_variable->elemsize); +} + +static int copy_any_to_any(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *async_data) +{ + struct starpu_variable_interface *src_variable = (struct starpu_variable_interface *) src_interface; + struct starpu_variable_interface *dst_variable = (struct starpu_variable_interface *) dst_interface; + + size_t elemsize = dst_variable->elemsize; + + uintptr_t ptr_src = src_variable->ptr; + uintptr_t ptr_dst = dst_variable->ptr; + int ret; + + ret = starpu_interface_copy(ptr_src, 0, src_node, ptr_dst, 0, dst_node, elemsize, async_data); + + starpu_interface_data_copy(src_node, dst_node, elemsize); + + return ret; +} +static starpu_ssize_t describe(void *data_interface, char *buf, size_t size) +{ + struct starpu_variable_interface *variable = (struct starpu_variable_interface *) data_interface; + return snprintf(buf, size, "v%u", + (unsigned) variable->elemsize); +} diff --git a/src/datawizard/interfaces/vector_filters.c b/src/datawizard/interfaces/vector_filters.c new file mode 100644 index 0000000..f8f3576 --- /dev/null +++ b/src/datawizard/interfaces/vector_filters.c @@ -0,0 +1,215 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2010-2010 Mehdi Juhoor + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include + +static void _starpu_vector_filter_block(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned id, unsigned nchunks, uintptr_t shadow_size) +{ + struct starpu_vector_interface *vector_father = (struct starpu_vector_interface *) father_interface; + struct starpu_vector_interface *vector_child = (struct starpu_vector_interface *) child_interface; + + /* actual number of elements */ + uint32_t nx = vector_father->nx - 2 * shadow_size; + size_t elemsize = vector_father->elemsize; + + STARPU_ASSERT_MSG(nchunks <= nx, "cannot split %u elements in %u parts", nx, nchunks); + + uint32_t child_nx; + size_t offset; + starpu_filter_nparts_compute_chunk_size_and_offset(nx, nchunks, elemsize, id, 1, &child_nx, &offset); + child_nx += 2*shadow_size; + + STARPU_ASSERT_MSG(vector_father->id == STARPU_VECTOR_INTERFACE_ID, "%s can only be applied on a vector data", __func__); + vector_child->id = vector_father->id; + vector_child->nx = child_nx; + vector_child->elemsize = elemsize; + vector_child->allocsize = vector_child->nx * elemsize; + + if (vector_father->dev_handle) + { + if (vector_father->ptr) + vector_child->ptr = vector_father->ptr + offset; + vector_child->dev_handle = vector_father->dev_handle; + vector_child->offset = vector_father->offset + offset; + } +} + +void starpu_vector_filter_block(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned id, unsigned nchunks) +{ + _starpu_vector_filter_block(father_interface, child_interface, f, id, nchunks, 0); +} + + +void starpu_vector_filter_block_shadow(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned id, unsigned nchunks) +{ + uintptr_t shadow_size = (uintptr_t) f->filter_arg_ptr; + + _starpu_vector_filter_block(father_interface, child_interface, f, id, nchunks, shadow_size); +} + + +void starpu_vector_filter_divide_in_2(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, STARPU_ATTRIBUTE_UNUSED unsigned nchunks) +{ + /* there cannot be more than 2 chunks */ + STARPU_ASSERT_MSG(id < 2, "Only %u parts", id); + + struct starpu_vector_interface *vector_father = (struct starpu_vector_interface *) father_interface; + struct starpu_vector_interface *vector_child = (struct starpu_vector_interface *) child_interface; + + uint32_t length_first = f->filter_arg; + + uint32_t nx = vector_father->nx; + size_t elemsize = vector_father->elemsize; + + STARPU_ASSERT_MSG(length_first < nx, "First part is too long: %u vs %u", length_first, nx); + + STARPU_ASSERT_MSG(vector_father->id == STARPU_VECTOR_INTERFACE_ID, "%s can only be applied on a vector data", __func__); + vector_child->id = vector_father->id; + + /* this is the first child */ + if (id == 0) + { + vector_child->nx = length_first; + vector_child->elemsize = elemsize; + vector_child->allocsize = vector_child->nx * elemsize; + + if (vector_father->dev_handle) + { + if (vector_father->ptr) + vector_child->ptr = vector_father->ptr; + vector_child->offset = vector_father->offset; + vector_child->dev_handle = vector_father->dev_handle; + } + } + else /* the second child */ + { + vector_child->nx = nx - length_first; + vector_child->elemsize = elemsize; + vector_child->allocsize = vector_child->nx * elemsize; + + if (vector_father->dev_handle) + { + if (vector_father->ptr) + vector_child->ptr = vector_father->ptr + length_first*elemsize; + vector_child->offset = vector_father->offset + length_first*elemsize; + vector_child->dev_handle = vector_father->dev_handle; + } + } +} + + +void starpu_vector_filter_list_long(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, STARPU_ATTRIBUTE_UNUSED unsigned nchunks) +{ + struct starpu_vector_interface *vector_father = (struct starpu_vector_interface *) father_interface; + struct starpu_vector_interface *vector_child = (struct starpu_vector_interface *) child_interface; + + long *length_tab = (long *) f->filter_arg_ptr; + + size_t elemsize = vector_father->elemsize; + + long chunk_size = length_tab[id]; + + STARPU_ASSERT_MSG(vector_father->id == STARPU_VECTOR_INTERFACE_ID, "%s can only be applied on a vector data", __func__); + vector_child->id = vector_father->id; + vector_child->nx = chunk_size; + vector_child->elemsize = elemsize; + vector_child->allocsize = vector_child->nx * elemsize; + + if (vector_father->dev_handle) + { + /* compute the current position */ + unsigned current_pos = 0; + unsigned i; + for (i = 0; i < id; i++) + current_pos += length_tab[i]; + + if (vector_father->ptr) + vector_child->ptr = vector_father->ptr + current_pos*elemsize; + vector_child->offset = vector_father->offset + current_pos*elemsize; + vector_child->dev_handle = vector_father->dev_handle; + } +} + +void starpu_vector_filter_list(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, STARPU_ATTRIBUTE_UNUSED unsigned nchunks) +{ + struct starpu_vector_interface *vector_father = (struct starpu_vector_interface *) father_interface; + struct starpu_vector_interface *vector_child = (struct starpu_vector_interface *) child_interface; + + uint32_t *length_tab = (uint32_t *) f->filter_arg_ptr; + + size_t elemsize = vector_father->elemsize; + + uint32_t chunk_size = length_tab[id]; + + STARPU_ASSERT_MSG(vector_father->id == STARPU_VECTOR_INTERFACE_ID, "%s can only be applied on a vector data", __func__); + vector_child->id = vector_father->id; + vector_child->nx = chunk_size; + vector_child->elemsize = elemsize; + vector_child->allocsize = vector_child->nx * elemsize; + + if (vector_father->dev_handle) + { + /* compute the current position */ + unsigned current_pos = 0; + unsigned i; + for (i = 0; i < id; i++) + current_pos += length_tab[i]; + + if (vector_father->ptr) + vector_child->ptr = vector_father->ptr + current_pos*elemsize; + vector_child->offset = vector_father->offset + current_pos*elemsize; + vector_child->dev_handle = vector_father->dev_handle; + } +} + +void starpu_vector_filter_pick_variable(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned id, unsigned nchunks) +{ + struct starpu_vector_interface *vector_father = (struct starpu_vector_interface *) father_interface; + /* each chunk becomes a variable */ + struct starpu_variable_interface *variable_child = (struct starpu_variable_interface *) child_interface; + + /* actual number of elements */ + uint32_t nx = vector_father->nx; + size_t elemsize = vector_father->elemsize; + + size_t chunk_pos = (size_t)f->filter_arg_ptr; + + STARPU_ASSERT_MSG(nchunks <= nx, "cannot get %u variables", nchunks); + STARPU_ASSERT_MSG((chunk_pos + id) < nx, "the chosen variable should be in the vector"); + + size_t offset = (chunk_pos + id) * elemsize; + + STARPU_ASSERT_MSG(vector_father->id == STARPU_VECTOR_INTERFACE_ID, "%s can only be applied on a vector data", __func__); + + variable_child->id = STARPU_VARIABLE_INTERFACE_ID; + variable_child->elemsize = elemsize; + + if (vector_father->dev_handle) + { + if (vector_father->ptr) + variable_child->ptr = vector_father->ptr + offset; + variable_child->dev_handle = vector_father->dev_handle; + variable_child->offset = vector_father->offset + offset; + } +} + +struct starpu_data_interface_ops *starpu_vector_filter_pick_variable_child_ops(STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, STARPU_ATTRIBUTE_UNUSED unsigned child) +{ + return &starpu_interface_variable_ops; +} diff --git a/src/datawizard/interfaces/vector_interface.c b/src/datawizard/interfaces/vector_interface.c new file mode 100644 index 0000000..de7b8fc --- /dev/null +++ b/src/datawizard/interfaces/vector_interface.c @@ -0,0 +1,454 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#ifdef BUILDING_STARPU +#include +#endif + +static int copy_any_to_any(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *async_data); +static int map_vector(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node); +static int unmap_vector(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node); +static int update_map(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node); + +static const struct starpu_data_copy_methods vector_copy_data_methods_s = +{ + .any_to_any = copy_any_to_any, +}; + +static void vector_init(void *data_interface); +static void register_vector_handle(starpu_data_handle_t handle, int home_node, void *data_interface); +static starpu_ssize_t allocate_vector_buffer_on_node(void *data_interface_, unsigned dst_node); +static void *vector_to_pointer(void *data_interface, unsigned node); +static void free_vector_buffer_on_node(void *data_interface, unsigned node); +static void cache_vector_buffer_on_node(void *new_data_interface, void *data_interface, unsigned node); +static void reuse_vector_buffer_on_node(void *data_interface, const void *new_data_interface, unsigned node); +static size_t vector_interface_get_size(starpu_data_handle_t handle); +static size_t vector_interface_get_alloc_size(starpu_data_handle_t handle); +static uint32_t footprint_vector_interface_crc32(starpu_data_handle_t handle); +static uint32_t alloc_footprint_vector_interface_crc32(starpu_data_handle_t handle); +static int vector_compare(void *data_interface_a, void *data_interface_b); +static int vector_alloc_compare(void *data_interface_a, void *data_interface_b); +static void display_vector_interface(starpu_data_handle_t handle, FILE *f); +static int pack_vector_handle(starpu_data_handle_t handle, unsigned node, void **ptr, starpu_ssize_t *count); +static int peek_vector_handle(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count); +static int unpack_vector_handle(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count); +static starpu_ssize_t describe(void *data_interface, char *buf, size_t size); + +struct starpu_data_interface_ops starpu_interface_vector_ops = +{ + .init = vector_init, + .register_data_handle = register_vector_handle, + .allocate_data_on_node = allocate_vector_buffer_on_node, + .to_pointer = vector_to_pointer, + .free_data_on_node = free_vector_buffer_on_node, + .cache_data_on_node = cache_vector_buffer_on_node, + .reuse_data_on_node = reuse_vector_buffer_on_node, + .map_data = map_vector, + .unmap_data = unmap_vector, + .update_map = update_map, + .copy_methods = &vector_copy_data_methods_s, + .get_size = vector_interface_get_size, + .get_alloc_size = vector_interface_get_alloc_size, + .footprint = footprint_vector_interface_crc32, + .alloc_footprint = alloc_footprint_vector_interface_crc32, + .compare = vector_compare, + .alloc_compare = vector_alloc_compare, + .interfaceid = STARPU_VECTOR_INTERFACE_ID, + .interface_size = sizeof(struct starpu_vector_interface), + .display = display_vector_interface, + .pack_data = pack_vector_handle, + .peek_data = peek_vector_handle, + .unpack_data = unpack_vector_handle, + .describe = describe, + .name = "STARPU_VECTOR_INTERFACE", + .pack_meta = NULL, + .unpack_meta = NULL, + .free_meta = NULL +}; + +static void vector_init(void *data_interface) +{ + struct starpu_vector_interface *vector_interface = data_interface; + vector_interface->allocsize = -1; +} + +static void *vector_to_pointer(void *data_interface, unsigned node) +{ + (void) node; + struct starpu_vector_interface *vector_interface = data_interface; + + return (void*) vector_interface->ptr; +} + +static void register_vector_handle(starpu_data_handle_t handle, int home_node, void *data_interface) +{ + struct starpu_vector_interface *vector_interface = (struct starpu_vector_interface *) data_interface; + + int node; + for (node = 0; node < STARPU_MAXNODES; node++) + { + struct starpu_vector_interface *local_interface = (struct starpu_vector_interface *) + starpu_data_get_interface_on_node(handle, node); + + if (node == home_node) + { + local_interface->ptr = vector_interface->ptr; + local_interface->dev_handle = vector_interface->dev_handle; + local_interface->offset = vector_interface->offset; + } + else + { + local_interface->ptr = 0; + local_interface->dev_handle = 0; + local_interface->offset = 0; + } + + local_interface->id = vector_interface->id; + local_interface->nx = vector_interface->nx; + local_interface->elemsize = vector_interface->elemsize; + local_interface->allocsize = vector_interface->allocsize; + local_interface->slice_base = vector_interface->slice_base; + } +} + +/* declare a new data with the vector interface */ +void starpu_vector_data_register_allocsize(starpu_data_handle_t *handleptr, int home_node, + uintptr_t ptr, uint32_t nx, size_t elemsize, size_t allocsize) +{ + struct starpu_vector_interface vector = + { + .id = STARPU_VECTOR_INTERFACE_ID, + .ptr = ptr, + .nx = nx, + .elemsize = elemsize, + .dev_handle = ptr, + .slice_base = 0, + .offset = 0, + .allocsize = allocsize, + }; +#if (!defined(STARPU_SIMGRID) && !defined(STARPU_OPENMP)) + if (home_node >= 0 && starpu_node_get_kind(home_node) == STARPU_CPU_RAM) + { + if (nx && elemsize) + { + STARPU_ASSERT_ACCESSIBLE(ptr); + STARPU_ASSERT_ACCESSIBLE(ptr + nx*elemsize - 1); + } + } +#endif + + starpu_data_register(handleptr, home_node, &vector, &starpu_interface_vector_ops); +} + +void starpu_vector_data_register(starpu_data_handle_t *handleptr, int home_node, + uintptr_t ptr, uint32_t nx, size_t elemsize) +{ + starpu_vector_data_register_allocsize(handleptr, home_node, ptr, nx, elemsize, nx * elemsize); +} + +void starpu_vector_ptr_register(starpu_data_handle_t handle, unsigned node, + uintptr_t ptr, uintptr_t dev_handle, size_t offset) +{ + struct starpu_vector_interface *vector_interface = starpu_data_get_interface_on_node(handle, node); + starpu_data_ptr_register(handle, node); + vector_interface->ptr = ptr; + vector_interface->dev_handle = dev_handle; + vector_interface->offset = offset; +} + + +static uint32_t footprint_vector_interface_crc32(starpu_data_handle_t handle) +{ + return starpu_hash_crc32c_be(starpu_vector_get_nx(handle), 0); +} + +static uint32_t alloc_footprint_vector_interface_crc32(starpu_data_handle_t handle) +{ + return starpu_hash_crc32c_be(starpu_vector_get_allocsize(handle), 0); +} + +static int vector_compare(void *data_interface_a, void *data_interface_b) +{ + struct starpu_vector_interface *vector_a = (struct starpu_vector_interface *) data_interface_a; + struct starpu_vector_interface *vector_b = (struct starpu_vector_interface *) data_interface_b; + + /* Two vectors are considered compatible if they have the same size */ + return (vector_a->nx == vector_b->nx) + && (vector_a->elemsize == vector_b->elemsize); +} + +static int vector_alloc_compare(void *data_interface_a, void *data_interface_b) +{ + struct starpu_vector_interface *vector_a = (struct starpu_vector_interface *) data_interface_a; + struct starpu_vector_interface *vector_b = (struct starpu_vector_interface *) data_interface_b; + + /* Two vectors are considered allocation-compatible if they have the same size */ + return (vector_a->allocsize == vector_b->allocsize); +} + +static void display_vector_interface(starpu_data_handle_t handle, FILE *f) +{ + struct starpu_vector_interface *vector_interface = (struct starpu_vector_interface *) + starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + + fprintf(f, "%u\t", vector_interface->nx); +} + +static int pack_vector_handle(starpu_data_handle_t handle, unsigned node, void **ptr, starpu_ssize_t *count) +{ + STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); + + struct starpu_vector_interface *vector_interface = (struct starpu_vector_interface *) + starpu_data_get_interface_on_node(handle, node); + + *count = vector_interface->nx*vector_interface->elemsize; + + if (ptr != NULL) + { + *ptr = (void *)starpu_malloc_on_node_flags(node, *count, 0); + memcpy(*ptr, (void*)vector_interface->ptr, vector_interface->elemsize*vector_interface->nx); + } + + return 0; +} + +static int peek_vector_handle(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count) +{ + STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); + + struct starpu_vector_interface *vector_interface = (struct starpu_vector_interface *) + starpu_data_get_interface_on_node(handle, node); + + STARPU_ASSERT(count == vector_interface->elemsize * vector_interface->nx); + memcpy((void*)vector_interface->ptr, ptr, count); + + return 0; +} + +static int unpack_vector_handle(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count) +{ + peek_vector_handle(handle, node, ptr, count); + starpu_free_on_node_flags(node, (uintptr_t)ptr, count, 0); + + return 0; +} + +static size_t vector_interface_get_size(starpu_data_handle_t handle) +{ + size_t size; + struct starpu_vector_interface *vector_interface = (struct starpu_vector_interface *) + starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + +#ifdef STARPU_DEBUG + STARPU_ASSERT_MSG(vector_interface->id == STARPU_VECTOR_INTERFACE_ID, "Error. The given data is not a vector."); +#endif + + size = vector_interface->nx * vector_interface->elemsize; + + return size; +} + +static size_t vector_interface_get_alloc_size(starpu_data_handle_t handle) +{ + size_t size; + struct starpu_vector_interface *vector_interface = (struct starpu_vector_interface *) + starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + +#ifdef STARPU_DEBUG + STARPU_ASSERT_MSG(vector_interface->id == STARPU_VECTOR_INTERFACE_ID, "Error. The given data is not a vector."); +#endif + + size = vector_interface->allocsize; + STARPU_ASSERT_MSG(size != (size_t)-1, "The vector allocation size needs to be defined"); + + return size; +} + +/* offer an access to the data parameters */ +uint32_t starpu_vector_get_nx(starpu_data_handle_t handle) +{ + struct starpu_vector_interface *vector_interface = (struct starpu_vector_interface *) + starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + +#ifdef STARPU_DEBUG + STARPU_ASSERT_MSG(vector_interface->id == STARPU_VECTOR_INTERFACE_ID, "Error. The given data is not a vector."); +#endif + + return vector_interface->nx; +} + +uintptr_t starpu_vector_get_local_ptr(starpu_data_handle_t handle) +{ + unsigned node; + node = starpu_worker_get_local_memory_node(); + + STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); + + struct starpu_vector_interface *vector_interface = (struct starpu_vector_interface *) + starpu_data_get_interface_on_node(handle, node); + +#ifdef STARPU_DEBUG + STARPU_ASSERT_MSG(vector_interface->id == STARPU_VECTOR_INTERFACE_ID, "Error. The given data is not a vector."); +#endif + + return vector_interface->ptr; +} + +size_t starpu_vector_get_elemsize(starpu_data_handle_t handle) +{ + struct starpu_vector_interface *vector_interface = (struct starpu_vector_interface *) + starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + +#ifdef STARPU_DEBUG + STARPU_ASSERT_MSG(vector_interface->id == STARPU_VECTOR_INTERFACE_ID, "Error. The given data is not a vector."); +#endif + + return vector_interface->elemsize; +} + +size_t starpu_vector_get_allocsize(starpu_data_handle_t handle) +{ + struct starpu_vector_interface *vector_interface = (struct starpu_vector_interface *) + starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + +#ifdef STARPU_DEBUG + STARPU_ASSERT_MSG(vector_interface->id == STARPU_VECTOR_INTERFACE_ID, "Error. The given data is not a vector."); +#endif + + return vector_interface->allocsize; +} + +/* memory allocation/deallocation primitives for the vector interface */ + +/* returns the size of the allocated area */ +static starpu_ssize_t allocate_vector_buffer_on_node(void *data_interface_, unsigned dst_node) +{ + uintptr_t addr = 0, handle; + + struct starpu_vector_interface *vector_interface = (struct starpu_vector_interface *) data_interface_; + + starpu_ssize_t allocated_memory = vector_interface->allocsize; + handle = starpu_malloc_on_node(dst_node, allocated_memory); + if (!handle) + return -ENOMEM; + + if (starpu_node_get_kind(dst_node) != STARPU_OPENCL_RAM) + addr = handle; + + /* update the data properly in consequence */ + vector_interface->ptr = addr; + vector_interface->dev_handle = handle; + vector_interface->offset = 0; + + return allocated_memory; +} + +static void free_vector_buffer_on_node(void *data_interface, unsigned node) +{ + struct starpu_vector_interface *vector_interface = (struct starpu_vector_interface *) data_interface; + + starpu_free_on_node(node, vector_interface->dev_handle, vector_interface->allocsize); + vector_interface->ptr = 0; + vector_interface->dev_handle = 0; +} + +static void cache_vector_buffer_on_node(void *new_data_interface, void *data_interface, unsigned node STARPU_ATTRIBUTE_UNUSED) +{ + struct starpu_vector_interface *new_vector_interface = new_data_interface; + struct starpu_vector_interface *vector_interface = data_interface; + + new_vector_interface->ptr = vector_interface->ptr; + vector_interface->ptr = 0; + new_vector_interface->dev_handle = vector_interface->dev_handle; + vector_interface->dev_handle = 0; + new_vector_interface->allocsize = vector_interface->allocsize; + STARPU_ASSERT(vector_interface->offset == 0); +} + +static void reuse_vector_buffer_on_node(void *data_interface, const void *new_data_interface, unsigned node STARPU_ATTRIBUTE_UNUSED) +{ + struct starpu_vector_interface *vector_interface = data_interface; + const struct starpu_vector_interface *new_vector_interface = new_data_interface; + + vector_interface->ptr = new_vector_interface->ptr; + vector_interface->dev_handle = new_vector_interface->dev_handle; + vector_interface->offset = 0; +} + +static int map_vector(void *src_interface, unsigned src_node, + void *dst_interface, unsigned dst_node) +{ + struct starpu_vector_interface *src_vector = src_interface; + struct starpu_vector_interface *dst_vector = dst_interface; + int ret; + uintptr_t mapped; + + mapped = starpu_interface_map(src_vector->dev_handle, src_vector->offset, src_node, dst_node, src_vector->nx*src_vector->elemsize, &ret); + if (mapped) + { + dst_vector->dev_handle = mapped; + dst_vector->offset = 0; + if (starpu_node_get_kind(dst_node) != STARPU_OPENCL_RAM) + dst_vector->ptr = mapped; + return 0; + } + return ret; +} + +static int unmap_vector(void *src_interface, unsigned src_node, + void *dst_interface, unsigned dst_node) +{ + struct starpu_vector_interface *src_vector = src_interface; + struct starpu_vector_interface *dst_vector = dst_interface; + + int ret = starpu_interface_unmap(src_vector->dev_handle, src_vector->offset, src_node, dst_vector->dev_handle, dst_node, src_vector->nx*src_vector->elemsize); + dst_vector->dev_handle = 0; + + return ret; +} + +static int update_map(void *src_interface, unsigned src_node, + void *dst_interface, unsigned dst_node) +{ + struct starpu_vector_interface *src_vector = src_interface; + struct starpu_vector_interface *dst_vector = dst_interface; + + return starpu_interface_update_map(src_vector->dev_handle, src_vector->offset, src_node, dst_vector->dev_handle, dst_vector->offset, dst_node, src_vector->nx*src_vector->elemsize); +} + +static int copy_any_to_any(void *src_interface, unsigned src_node, + void *dst_interface, unsigned dst_node, void *async_data) +{ + struct starpu_vector_interface *src_vector = src_interface; + struct starpu_vector_interface *dst_vector = dst_interface; + int ret; + + ret = starpu_interface_copy(src_vector->dev_handle, src_vector->offset, src_node, + dst_vector->dev_handle, dst_vector->offset, dst_node, + src_vector->nx*src_vector->elemsize, async_data); + + starpu_interface_data_copy(src_node, dst_node, src_vector->nx*src_vector->elemsize); + return ret; +} + +static starpu_ssize_t describe(void *data_interface, char *buf, size_t size) +{ + struct starpu_vector_interface *vector = (struct starpu_vector_interface *) data_interface; + return snprintf(buf, size, "V%ux%u", + (unsigned) vector->nx, + (unsigned) vector->elemsize); +} diff --git a/src/datawizard/interfaces/void_interface.c b/src/datawizard/interfaces/void_interface.c new file mode 100644 index 0000000..2a6eb7c --- /dev/null +++ b/src/datawizard/interfaces/void_interface.c @@ -0,0 +1,184 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#ifdef BUILDING_STARPU +#include +#endif + +static int dummy_copy(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *async_data); +static int map_void(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node); +static int unmap_void(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node); +static int update_map_void(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node); + +static const struct starpu_data_copy_methods void_copy_data_methods_s = +{ + .any_to_any = dummy_copy, +}; + +static void register_void_handle(starpu_data_handle_t handle, int home_node, void *data_interface); +static starpu_ssize_t allocate_void_buffer_on_node(void *data_interface_, unsigned dst_node); +static void free_void_buffer_on_node(void *data_interface, unsigned node); +static size_t void_interface_get_size(starpu_data_handle_t handle); +static uint32_t footprint_void_interface_crc32(starpu_data_handle_t handle); +static int void_compare(void *data_interface_a, void *data_interface_b); +static void display_void_interface(starpu_data_handle_t handle, FILE *f); +static int pack_void_handle(starpu_data_handle_t handle, unsigned node, void **ptr, starpu_ssize_t *count); +static int peek_void_handle(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count); +static int unpack_void_handle(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count); +static starpu_ssize_t describe(void *data_interface, char *buf, size_t size); + +struct starpu_data_interface_ops starpu_interface_void_ops = +{ + .register_data_handle = register_void_handle, + .allocate_data_on_node = allocate_void_buffer_on_node, + .free_data_on_node = free_void_buffer_on_node, + .map_data = map_void, + .unmap_data = unmap_void, + .update_map = update_map_void, + .copy_methods = &void_copy_data_methods_s, + .get_size = void_interface_get_size, + .footprint = footprint_void_interface_crc32, + .compare = void_compare, + .interfaceid = STARPU_VOID_INTERFACE_ID, + .interface_size = 0, + .display = display_void_interface, + .pack_data = pack_void_handle, + .peek_data = peek_void_handle, + .unpack_data = unpack_void_handle, + .describe = describe, + .name = "STARPU_VOID_INTERFACE", + .pack_meta = NULL, + .unpack_meta = NULL, + .free_meta = NULL +}; + +static void register_void_handle(starpu_data_handle_t handle STARPU_ATTRIBUTE_UNUSED, + int home_node STARPU_ATTRIBUTE_UNUSED, + void *data_interface STARPU_ATTRIBUTE_UNUSED) +{ + /* Since there is no real data to register, we don't do anything */ +} + +/* declare a new data with the void interface */ +void starpu_void_data_register(starpu_data_handle_t *handleptr) +{ + starpu_data_register(handleptr, STARPU_MAIN_RAM, NULL, &starpu_interface_void_ops); +} + + +static uint32_t footprint_void_interface_crc32(starpu_data_handle_t handle STARPU_ATTRIBUTE_UNUSED) +{ + return 0; +} + +static int void_compare(void *data_interface_a STARPU_ATTRIBUTE_UNUSED, + void *data_interface_b STARPU_ATTRIBUTE_UNUSED) +{ + /* There is no allocation required, and therefore nothing to cache + * anyway. */ + return 1; +} + +static void display_void_interface(starpu_data_handle_t handle STARPU_ATTRIBUTE_UNUSED, FILE *f) +{ + fprintf(f, "void\t"); +} + +static int pack_void_handle(starpu_data_handle_t handle STARPU_ATTRIBUTE_UNUSED, + unsigned node STARPU_ATTRIBUTE_UNUSED, + void **ptr, + starpu_ssize_t *count) +{ + *count = 0; + *ptr = NULL; + return 0; +} + +static int peek_void_handle(starpu_data_handle_t handle STARPU_ATTRIBUTE_UNUSED, + unsigned node STARPU_ATTRIBUTE_UNUSED, + void *ptr STARPU_ATTRIBUTE_UNUSED, + size_t count STARPU_ATTRIBUTE_UNUSED) +{ + return 0; +} + +static int unpack_void_handle(starpu_data_handle_t handle STARPU_ATTRIBUTE_UNUSED, + unsigned node STARPU_ATTRIBUTE_UNUSED, + void *ptr STARPU_ATTRIBUTE_UNUSED, + size_t count STARPU_ATTRIBUTE_UNUSED) +{ + return 0; +} + +static size_t void_interface_get_size(starpu_data_handle_t handle STARPU_ATTRIBUTE_UNUSED) +{ + return 0; +} + +/* memory allocation/deallocation primitives for the void interface */ + +/* returns the size of the allocated area */ +static starpu_ssize_t allocate_void_buffer_on_node(void *data_interface STARPU_ATTRIBUTE_UNUSED, + unsigned dst_node STARPU_ATTRIBUTE_UNUSED) +{ + /* Successfully allocated 0 bytes */ + return 0; +} + +static void free_void_buffer_on_node(void *data_interface STARPU_ATTRIBUTE_UNUSED , + unsigned node STARPU_ATTRIBUTE_UNUSED) +{ + /* There is no buffer actually */ +} + +static int map_void(void *src_interface STARPU_ATTRIBUTE_UNUSED, + unsigned src_node STARPU_ATTRIBUTE_UNUSED, + void *dst_interface STARPU_ATTRIBUTE_UNUSED, + unsigned dst_node STARPU_ATTRIBUTE_UNUSED) +{ + return 0; +} + +static int unmap_void(void *src_interface STARPU_ATTRIBUTE_UNUSED, + unsigned src_node STARPU_ATTRIBUTE_UNUSED, + void *dst_interface STARPU_ATTRIBUTE_UNUSED, + unsigned dst_node STARPU_ATTRIBUTE_UNUSED) +{ + return 0; +} + +static int update_map_void(void *src_interface STARPU_ATTRIBUTE_UNUSED, + unsigned src_node STARPU_ATTRIBUTE_UNUSED, + void *dst_interface STARPU_ATTRIBUTE_UNUSED, + unsigned dst_node STARPU_ATTRIBUTE_UNUSED) +{ + return 0; +} + +static int dummy_copy(void *src_interface STARPU_ATTRIBUTE_UNUSED, + unsigned src_node STARPU_ATTRIBUTE_UNUSED, + void *dst_interface STARPU_ATTRIBUTE_UNUSED, + unsigned dst_node STARPU_ATTRIBUTE_UNUSED, + void *async_data STARPU_ATTRIBUTE_UNUSED) +{ + return 0; +} + +static starpu_ssize_t describe(void *data_interface STARPU_ATTRIBUTE_UNUSED, char *buf, size_t size) +{ + return snprintf(buf, size, "0"); +} diff --git a/src/datawizard/malloc.c b/src/datawizard/malloc.c new file mode 100644 index 0000000..31a822a --- /dev/null +++ b/src/datawizard/malloc.c @@ -0,0 +1,1131 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2018,2022 Federal University of Rio Grande do Sul (UFRGS) + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef STARPU_SIMGRID +#include +#include +#include +#endif + +#ifdef STARPU_HAVE_HWLOC +#include +#ifndef HWLOC_API_VERSION +#define HWLOC_OBJ_PU HWLOC_OBJ_PROC +#endif +#if HWLOC_API_VERSION < 0x00010b00 +#define HWLOC_OBJ_NUMANODE HWLOC_OBJ_NODE +#endif +#endif + +#ifndef O_BINARY +#define O_BINARY 0 +#endif + +#ifndef MAP_POPULATE +#define MAP_POPULATE 0 +#endif + +static size_t _malloc_align = sizeof(void*); +static int disable_pinning; +static int enable_suballocator; + +/* This file is used for implementing "folded" allocation */ +#ifdef STARPU_SIMGRID +static int bogusfile = -1; +static unsigned long _starpu_malloc_simulation_fold; +/* Table to control unique simulation mallocs */ +#include +struct unique_shared_alloc +{ + size_t id; + int count; + void* addr; + UT_hash_handle hh; +}; +static struct unique_shared_alloc* unique_shared_alloc_table = NULL; +#endif + +static starpu_malloc_hook malloc_hook; +static starpu_free_hook free_hook; + +void starpu_malloc_set_hooks(starpu_malloc_hook _malloc_hook, starpu_free_hook _free_hook) +{ + malloc_hook = _malloc_hook; + free_hook = _free_hook; +} + +void starpu_malloc_set_align(size_t align) +{ + STARPU_ASSERT_MSG(!(align & (align - 1)), "Alignment given to starpu_malloc_set_align (%lu) must be a power of two", (unsigned long) align); + if (_malloc_align < align) + _malloc_align = align; +} + +/* Driver porters: adding your driver here is optional, only needed for pinning host memory. */ + +#if (defined(STARPU_USE_CUDA) && !defined(STARPU_HAVE_CUDA_MEMCPY_PEER))// || defined(STARPU_USE_OPENCL) +struct malloc_pinned_codelet_struct +{ + void **ptr; + size_t dim; +}; +#endif + +/* Would be difficult to do it this way, we need to remember the cl_mem to be able to free it later... */ + +//#ifdef STARPU_USE_OPENCL +//static void malloc_pinned_opencl_codelet(void *buffers[] STARPU_ATTRIBUTE_UNUSED, void *arg) +//{ +// struct malloc_pinned_codelet_struct *s = arg; +// // _STARPU_MALLOC(*(s->ptr), s->dim); +// starpu_opencl_allocate_memory(devid, (void **)(s->ptr), s->dim, CL_MEM_READ_WRITE|CL_MEM_ALLOC_HOST_PTR); +//} +//#endif + +#if defined(STARPU_USE_CUDA) && !defined(STARPU_HAVE_CUDA_MEMCPY_PEER) && !defined(STARPU_SIMGRID) +static void malloc_pinned_cuda_codelet(void *buffers[] STARPU_ATTRIBUTE_UNUSED, void *arg) +{ + struct malloc_pinned_codelet_struct *s = arg; + cudaError_t cures = cudaErrorMemoryAllocation; +#if 0 //defined(STARPU_USE_CUDA_MAP) && defined(STARPU_HAVE_CUDA_MNGMEM) + /* FIXME: check if devices actually support cudaMallocManaged or fallback to cudaHostAlloc() */ + cures = cudaMallocManaged((void **)(s->ptr), s->dim, cudaMemAttachGlobal); +#endif +#if defined(STARPU_USE_CUDA_MAP) && defined(STARPU_HAVE_CUDA_CANMAPHOST) + if (cures != cudaSuccess) + cures = cudaHostAlloc((void **)(s->ptr), s->dim, cudaHostAllocPortable|cudaHostAllocMapped); +#endif + if (cures != cudaSuccess) + cures = cudaHostAlloc((void **)(s->ptr), s->dim, cudaHostAllocPortable); + + if (STARPU_UNLIKELY(cures)) + STARPU_CUDA_REPORT_ERROR(cures); +} +#endif + +#if (defined(STARPU_USE_CUDA) && !defined(STARPU_HAVE_CUDA_MEMCPY_PEER)) && !defined(STARPU_SIMGRID)// || defined(STARPU_USE_OPENCL) +static struct starpu_perfmodel malloc_pinned_model = +{ + .type = STARPU_HISTORY_BASED, + .symbol = "malloc_pinned" +}; + +static struct starpu_codelet malloc_pinned_cl = +{ + .cuda_funcs = {malloc_pinned_cuda_codelet}, +//#ifdef STARPU_USE_OPENCL +// .opencl_funcs = {malloc_pinned_opencl_codelet}, +//#endif + .nbuffers = 0, + .model = &malloc_pinned_model +}; +#endif + +/* Allocation in CPU RAM */ +int starpu_malloc_flags(void **A, size_t dim, int flags) +{ + return _starpu_malloc_flags_on_node(STARPU_MAIN_RAM, A, dim, flags); +} + +/* Return whether we should pin the allocated data */ +static int _starpu_malloc_should_pin(int flags) +{ + if (flags & STARPU_MALLOC_PINNED && disable_pinning <= 0) + { + if (_starpu_can_submit_cuda_task()) + { + return 1; + } + if (_starpu_can_submit_hip_task()) + { + return 1; + } +// if (_starpu_can_submit_opencl_task()) +// return 1; + } + return 0; +} + +int _starpu_malloc_willpin_on_node(unsigned dst_node) +{ + int flags = _starpu_get_node_struct(dst_node)->malloc_on_node_default_flags; + return (_starpu_malloc_should_pin(flags) && STARPU_RUNNING_ON_VALGRIND == 0 + && (_starpu_can_submit_cuda_task() + || _starpu_can_submit_hip_task() + /* || _starpu_can_submit_opencl_task() */ + )); +} + +int _starpu_malloc_flags_on_node(unsigned dst_node, void **A, size_t dim, int flags) +{ + int ret=0; + + STARPU_ASSERT_MSG(A, "starpu_malloc needs to be passed the address of the pointer to be filled"); + if (!starpu_is_initialized()) + _STARPU_DISP("Warning: starpu_malloc needs to be called after starpu is initialized, to be able to pin memory for CUDA\n"); + + if (dim == 0) + /* Make sure we succeed */ + dim = 1; + + if (flags & STARPU_MALLOC_COUNT) + { + if (!(flags & STARPU_MALLOC_NORECLAIM)) + while (starpu_memory_allocate(dst_node, dim, flags) != 0) + { + size_t freed; + size_t reclaim = 2 * dim; + _STARPU_DEBUG("There is not enough memory left, we are going to reclaim %ld\n", (long)reclaim); + _STARPU_TRACE_START_MEMRECLAIM(dst_node,0); + freed = _starpu_memory_reclaim_generic(dst_node, 0, reclaim, STARPU_FETCH); + _STARPU_TRACE_END_MEMRECLAIM(dst_node,0); + if (freed < dim && !(flags & STARPU_MEMORY_WAIT)) + { + // We could not reclaim enough memory + *A = NULL; + return -ENOMEM; + } + } + else if (flags & STARPU_MEMORY_WAIT) + starpu_memory_allocate(dst_node, dim, flags); + else + starpu_memory_allocate(dst_node, dim, flags | STARPU_MEMORY_OVERFLOW); + } + + if (malloc_hook) + { + ret = malloc_hook(dst_node, A, dim, flags); + goto end; + } + + /* Note: synchronize this test with _starpu_malloc_willpin_on_node */ + if (_starpu_malloc_should_pin(flags) && STARPU_RUNNING_ON_VALGRIND == 0) + { + if (_starpu_can_submit_cuda_task()) + { +#ifdef STARPU_SIMGRID + /* FIXME: CUDA seems to be taking 650µs every 1MiB. + * Ideally we would simulate this batching in 1MiB requests + * instead of computing an average value. + */ + if (_starpu_simgrid_cuda_malloc_cost()) + starpu_sleep((float) dim * 0.000650 / 1048576.); +#else /* STARPU_SIMGRID */ +#ifdef STARPU_USE_CUDA +#ifdef STARPU_HAVE_CUDA_MEMCPY_PEER + cudaError_t cures = cudaErrorMemoryAllocation; + +#if 0 //defined(STARPU_USE_CUDA_MAP) && defined(STARPU_HAVE_CUDA_MNGMEM) + /* FIXME: check if devices actually support cudaMallocManaged or fallback to cudaHostAlloc() */ + cures = cudaMallocManaged(A, dim, cudaMemAttachGlobal); +#endif + +#if defined(STARPU_USE_CUDA_MAP) && defined(STARPU_HAVE_CUDA_CANMAPHOST) + if (cures != cudaSuccess) + cures = cudaHostAlloc(A, dim, cudaHostAllocPortable|cudaHostAllocMapped); +#endif + + if (cures != cudaSuccess) + cures = cudaHostAlloc(A, dim, cudaHostAllocPortable); + + if (STARPU_UNLIKELY(cures)) + { + STARPU_CUDA_REPORT_ERROR(cures); + ret = -ENOMEM; + } + goto end; +#else + int push_res; + + /* Old versions of CUDA are not thread-safe, we have to + * run cudaHostAlloc from CUDA workers */ + STARPU_ASSERT_MSG(_starpu_worker_may_perform_blocking_calls(), "without CUDA peer allocation support, pinned allocation must not be done from task or callback"); + + struct malloc_pinned_codelet_struct s = + { + .ptr = A, + .dim = dim + }; + + malloc_pinned_cl.where = STARPU_CUDA; + struct starpu_task *task = starpu_task_create(); + task->name = "cuda_malloc_pinned"; + task->callback_func = NULL; + task->cl = &malloc_pinned_cl; + task->cl_arg = &s; + task->type = STARPU_TASK_TYPE_INTERNAL; + + task->synchronous = 1; + + _starpu_exclude_task_from_dag(task); + + push_res = _starpu_task_submit_internally(task); + STARPU_ASSERT(push_res != -ENODEV); + goto end; +#endif /* STARPU_HAVE_CUDA_MEMCPY_PEER */ +#endif /* STARPU_USE_CUDA */ + } + if (_starpu_can_submit_hip_task()) + { +#ifdef STARPU_USE_HIP + hipError_t hipres = hipErrorMemoryAllocation; + +#if 0 //defined(STARPU_USE_HIP_MAP) && defined(STARPU_HAVE_HIP_MNGMEM) + /* FIXME: check if devices actually support hipMallocManaged or fallback to hipHostAlloc() */ + hipres = hipMallocManaged(A, dim, hipMemAttachGlobal); +#endif + +#if defined(STARPU_USE_HIP_MAP) && defined(STARPU_HAVE_HIP_CANMAPHOST) + if (hipres != hipSuccess) + hipres = hipHostMalloc(A, dim, hipHostMallocPortable|hipHostMallocMapped); +#endif + + if (hipres != hipSuccess) + { + hipres = hipHostMalloc(A, dim, hipHostMallocPortable); + } + + if (STARPU_UNLIKELY(hipres != hipSuccess)) + { + STARPU_HIP_REPORT_ERROR(hipres); + ret = -ENOMEM; + } + goto end; +#endif /* STARPU_USE_HIP */ +// } +// else if (_starpu_can_submit_opencl_task()) +// { +//#ifdef STARPU_USE_OPENCL +// int push_res; +// +// STARPU_ASSERT_MSG(_starpu_worker_may_perform_blocking_calls(), "pinned OpenCL allocation must not be done from task or callback"); +// +// struct malloc_pinned_codelet_struct s = +// { +// .ptr = A, +// .dim = dim +// }; +// +// malloc_pinned_cl.where = STARPU_OPENCL; +// struct starpu_task *task = starpu_task_create(); +// task->name = "opencl_malloc_pinned"; +// task->callback_func = NULL; +// task->cl = &malloc_pinned_cl; +// task->cl_arg = &s; +// task->synchronous = 1; +// task->type = STARPU_TASK_TYPE_INTERNAL; +// +// _starpu_exclude_task_from_dag(task); +// +// push_res = _starpu_task_submit_internally(task); +// STARPU_ASSERT(push_res != -ENODEV); +// goto end; +//#endif /* STARPU_USE_OPENCL */ +#endif /* STARPU_SIMGRID */ + } + } + +#ifdef STARPU_SIMGRID + if (flags & STARPU_MALLOC_SIMULATION_FOLDED) + { +#if SIMGRID_VERSION >= 31500 && SIMGRID_VERSION != 31559 + if(_starpu_simgrid_running_smpi()) + { + if(flags & STARPU_MALLOC_SIMULATION_UNIQUE) + { + struct unique_shared_alloc *block; + HASH_FIND(hh, unique_shared_alloc_table, &dim, sizeof(dim), block); + if(block==NULL) + { + block = (struct unique_shared_alloc*)malloc(sizeof(struct unique_shared_alloc)); + block->addr = SMPI_SHARED_MALLOC(dim); + block->count = 1; + block->id = dim; + HASH_ADD(hh, unique_shared_alloc_table, id, sizeof(dim), block); + } + else + { + block->count++; + } + *A = block->addr; + } + else + { + *A = SMPI_SHARED_MALLOC(dim); + } + } + else +#endif + { + /* Use "folded" allocation: the same file is mapped several + * times contiguously, to get a memory area one can read/write, + * without consuming memory */ + + /* First reserve memory area */ + void *buf = mmap (NULL, dim, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); + unsigned i; + if (buf == MAP_FAILED) + { + _STARPU_DISP("Warning: could not allocate %luMiB of memory, you need to run \"sysctl vm.overcommit_memory=1\" as root to allow so big allocations\n", (unsigned long) (dim >> 20)); + ret = -ENOMEM; + *A = NULL; + } + else + { + if (bogusfile == -1) + { + char *path = starpu_getenv("TMPDIR"); + if (!path) + path = starpu_getenv("TEMP"); + if (!path) + path = starpu_getenv("TMP"); + if (!path) + path = "/tmp"; + /* Create bogus file if not done already */ + char *name = _starpu_mktemp(path, O_RDWR | O_BINARY, &bogusfile); + char *dumb; + if (!name) + { + ret = errno; + munmap(buf, dim); + *A = NULL; + goto end; + } + unlink(name); + free(name); + _STARPU_CALLOC(dumb, 1,_starpu_malloc_simulation_fold); + write(bogusfile, dumb, _starpu_malloc_simulation_fold); + free(dumb); + } + /* Map the bogus file in place of the anonymous memory */ + for (i = 0; i < dim / _starpu_malloc_simulation_fold; i++) + { + void *pos = (void*) ((unsigned long) buf + i * _starpu_malloc_simulation_fold); + void *res = mmap(pos, _starpu_malloc_simulation_fold, PROT_READ|PROT_WRITE, MAP_FIXED|MAP_SHARED|MAP_POPULATE, bogusfile, 0); + STARPU_ASSERT_MSG(res == pos, "Could not map folded virtual memory (%s). Do you perhaps need to increase the STARPU_MALLOC_SIMULATION_FOLD environment variable or the sysctl vm.max_map_count?", strerror(errno)); + } + + if (dim % _starpu_malloc_simulation_fold) + { + void *pos = (void*) ((unsigned long) buf + i * _starpu_malloc_simulation_fold); + void *res = mmap(pos, dim % _starpu_malloc_simulation_fold, PROT_READ|PROT_WRITE, MAP_FIXED|MAP_SHARED|MAP_POPULATE, bogusfile, 0); + STARPU_ASSERT_MSG(res == pos, "Could not map folded virtual memory (%s). Do you perhaps need to increase the STARPU_MALLOC_SIMULATION_FOLD environment variable or the sysctl vm.max_map_count?", strerror(errno)); + } + *A = buf; + } + } + } +#endif + +#ifdef HAVE_MMAP +#ifdef STARPU_USE_MP + if(_starpu_can_submit_ms_task()) + { + *A = _starpu_map_allocate(dim, dst_node); + + if (!*A) + ret = -ENOMEM; + else + { +#ifdef STARPU_HAVE_HWLOC + struct _starpu_machine_config *config = _starpu_get_machine_config(); + hwloc_topology_t hwtopology = config->topology.hwtopology; + hwloc_obj_t numa_node_obj = hwloc_get_obj_by_type(hwtopology, HWLOC_OBJ_NUMANODE, starpu_memory_nodes_numa_id_to_hwloclogid(dst_node)); + if (numa_node_obj) + { + hwloc_bitmap_t nodeset = numa_node_obj->nodeset; +#if HWLOC_API_VERSION >= 0x00020000 + hwloc_set_area_membind(hwtopology, *A, dim, nodeset, HWLOC_MEMBIND_BIND, HWLOC_MEMBIND_BYNODESET | HWLOC_MEMBIND_NOCPUBIND); +#else + hwloc_set_area_membind_nodeset(hwtopology, *A, dim, nodeset, HWLOC_MEMBIND_BIND, HWLOC_MEMBIND_NOCPUBIND); +#endif + } +#endif + } + } + else +#endif +#endif +#ifdef STARPU_HAVE_HWLOC + if (starpu_memory_nodes_get_numa_count() > 1) + { + struct _starpu_machine_config *config = _starpu_get_machine_config(); + hwloc_topology_t hwtopology = config->topology.hwtopology; + hwloc_obj_t numa_node_obj = hwloc_get_obj_by_type(hwtopology, HWLOC_OBJ_NUMANODE, starpu_memory_nodes_numa_id_to_hwloclogid(dst_node)); + hwloc_bitmap_t nodeset = numa_node_obj->nodeset; +#if HWLOC_API_VERSION >= 0x00020000 + *A = hwloc_alloc_membind(hwtopology, dim, nodeset, HWLOC_MEMBIND_BIND, HWLOC_MEMBIND_BYNODESET | HWLOC_MEMBIND_NOCPUBIND); +#else + *A = hwloc_alloc_membind_nodeset(hwtopology, dim, nodeset, HWLOC_MEMBIND_BIND, HWLOC_MEMBIND_NOCPUBIND); +#endif + //fprintf(stderr, "Allocation %lu bytes on NUMA node %d [%p]\n", (unsigned long) dim, starpu_memnode_get_numaphysid(dst_node), *A); + if (!*A) + ret = -ENOMEM; + } +#endif /* STARPU_HAVE_HWLOC */ + else +#ifdef STARPU_HAVE_POSIX_MEMALIGN + if (_malloc_align != sizeof(void*)) + { + if (posix_memalign(A, _malloc_align, dim)) + { + ret = -ENOMEM; + *A = NULL; + } + } + else +#elif defined(STARPU_HAVE_MEMALIGN) + if (_malloc_align != sizeof(void*)) + { + *A = memalign(_malloc_align, dim); + if (!*A) + ret = -ENOMEM; + } + else +#endif /* STARPU_HAVE_POSIX_MEMALIGN */ + { + *A = malloc(dim); + if (!*A) + ret = -ENOMEM; + } + +end: + if (ret == 0) + { + STARPU_ASSERT_MSG(*A, "Failed to allocated memory of size %lu b\n", (unsigned long)dim); + } + else if (flags & STARPU_MALLOC_COUNT) + { + starpu_memory_deallocate(dst_node, dim); + } + + return ret; +} + +int starpu_malloc(void **A, size_t dim) +{ + return starpu_malloc_flags(A, dim, STARPU_MALLOC_PINNED); +} + +#if defined(STARPU_USE_CUDA) && !defined(STARPU_HAVE_CUDA_MEMCPY_PEER) && !defined(STARPU_SIMGRID) +static void free_pinned_cuda_codelet(void *buffers[] STARPU_ATTRIBUTE_UNUSED, void *arg) +{ + cudaError_t cures; +#if 0 //defined(STARPU_USE_CUDA_MAP) && defined(STARPU_HAVE_CUDA_MNGMEM) + /* FIXME: check if devices actually support cudaMallocManaged or fallback to cudaHostAlloc() */ + cures = cudaFree(arg); +#else + cures = cudaFreeHost(arg); +#endif + if (STARPU_UNLIKELY(cures)) + STARPU_CUDA_REPORT_ERROR(cures); +} +#endif + +//#ifdef STARPU_USE_OPENCL +//static void free_pinned_opencl_codelet(void *buffers[] STARPU_ATTRIBUTE_UNUSED, void *arg) +//{ +// // free(arg); +// int err = clReleaseMemObject(arg); +// if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); +//} +//#endif + +#if defined(STARPU_USE_CUDA) && !defined(STARPU_HAVE_CUDA_MEMCPY_PEER) && !defined(STARPU_SIMGRID) // || defined(STARPU_USE_OPENCL) +static struct starpu_perfmodel free_pinned_model = +{ + .type = STARPU_HISTORY_BASED, + .symbol = "free_pinned" +}; + +static struct starpu_codelet free_pinned_cl = +{ + .cuda_funcs = {free_pinned_cuda_codelet}, +//#ifdef STARPU_USE_OPENCL +// .opencl_funcs = {free_pinned_opencl_codelet}, +//#endif + .nbuffers = 0, + .model = &free_pinned_model +}; +#endif + +int starpu_free_flags(void *A, size_t dim, int flags) +{ + return _starpu_free_flags_on_node(STARPU_MAIN_RAM, A, dim, flags); +} + +int _starpu_free_flags_on_node(unsigned dst_node, void *A, size_t dim, int flags) +{ + if (!A) + return 0; + + if (dim == 0) + dim = 1; + + if (free_hook) + { + free_hook(dst_node, A, dim, flags); + goto out; + } + + if (_starpu_malloc_should_pin(flags) && STARPU_RUNNING_ON_VALGRIND == 0) + { + if (_starpu_can_submit_cuda_task()) + { +#ifdef STARPU_SIMGRID + /* TODO: simulate CUDA barrier */ +#else /* !STARPU_SIMGRID */ +#ifdef STARPU_USE_CUDA +#ifndef STARPU_HAVE_CUDA_MEMCPY_PEER + if (!starpu_is_initialized()) + { +#endif + /* This is especially useful when starpu_free is called even + * though starpu_shutdown has already + * been called, so we will not be able to submit a task. */ + cudaError_t cures; +#if 0 //defined(STARPU_USE_CUDA_MAP) && defined(STARPU_HAVE_CUDA_MNGMEM) + /* FIXME: check if devices actually support cudaMallocManaged or fallback to cudaHostAlloc() */ + cures = cudaFree(A); +#else + cures = cudaFreeHost(A); +#endif + if (STARPU_UNLIKELY(cures)) + STARPU_CUDA_REPORT_ERROR(cures); + goto out; +#ifndef STARPU_HAVE_CUDA_MEMCPY_PEER + } + else + { + int push_res; + + STARPU_ASSERT_MSG(_starpu_worker_may_perform_blocking_calls(), "without CUDA peer allocation support, pinned deallocation must not be done from task or callback"); + + free_pinned_cl.where = STARPU_CUDA; + struct starpu_task *task = starpu_task_create(); + task->name = "cuda_free_pinned"; + task->callback_func = NULL; + task->cl = &free_pinned_cl; + task->cl_arg = A; + task->synchronous = 1; + task->type = STARPU_TASK_TYPE_INTERNAL; + + _starpu_exclude_task_from_dag(task); + + push_res = _starpu_task_submit_internally(task); + STARPU_ASSERT(push_res != -ENODEV); + goto out; + } +#endif /* STARPU_HAVE_CUDA_MEMCPY_PEER */ +#endif /* STARPU_USE_CUDA */ + } + if (_starpu_can_submit_hip_task()) + { +#ifdef STARPU_USE_HIP + /* TODO: submit task */ + /* This is especially useful when starpu_free is called even + * though starpu_shutdown has already + * been called, so we will not be able to submit a task. */ + hipError_t hipres; + hipres = hipHostFree(A); + if (STARPU_UNLIKELY(hipres)) + STARPU_HIP_REPORT_ERROR(hipres); + goto out; +#endif /* STARPU_USE_HIP */ +#endif /* STARPU_SIMGRID */ + } +// else if (_starpu_can_submit_opencl_task()) +// { +//#ifdef STARPU_USE_OPENCL +// int push_res; +// +// STARPU_ASSERT_MSG(_starpu_worker_may_perform_blocking_calls(), "pinned OpenCL deallocation must not be done from task or callback"); +// +// free_pinned_cl.where = STARPU_OPENCL; +// struct starpu_task *task = starpu_task_create(); +// task->name = "opencl_free_pinned"; +// task->callback_func = NULL; +// task->cl = &free_pinned_cl; +// task->cl_arg = A; +// task->synchronous = 1; +// task->type = STARPU_TASK_TYPE_INTERNAL; +// +// _starpu_exclude_task_from_dag(task); +// +// push_res = starpu_task_submit(task); +// STARPU_ASSERT(push_res != -ENODEV); +// goto out; +// } +//#endif + } + +#ifdef STARPU_SIMGRID + if (flags & STARPU_MALLOC_SIMULATION_FOLDED) + { +#if SIMGRID_VERSION >= 31500 && SIMGRID_VERSION != 31559 + if(_starpu_simgrid_running_smpi()) + { + if(flags & STARPU_MALLOC_SIMULATION_UNIQUE) + { + struct unique_shared_alloc *block; + HASH_FIND(hh, unique_shared_alloc_table, &dim, sizeof(dim), block); + STARPU_ASSERT(block != NULL); + block->count--; + if(block->count == 0) + { + SMPI_SHARED_FREE(block->addr); + HASH_DEL(unique_shared_alloc_table, block); + free(block); + } + } + else + { + SMPI_SHARED_FREE(A); + } + } + else +#endif + munmap(A, dim); + } +#endif +#ifdef HAVE_MMAP +#ifdef STARPU_USE_MP + else if(_starpu_can_submit_ms_task()) + { + _starpu_map_deallocate(A, dim); + } +#endif +#endif +#ifdef STARPU_HAVE_HWLOC + else if (starpu_memory_nodes_get_numa_count() > 1) + { + struct _starpu_machine_config *config = _starpu_get_machine_config(); + hwloc_topology_t hwtopology = config->topology.hwtopology; + hwloc_free(hwtopology, A, dim); + } +#endif /* STARPU_HAVE_HWLOC */ + else + free(A); + +out: + if (flags & STARPU_MALLOC_COUNT) + { + starpu_memory_deallocate(dst_node, dim); + } + + return 0; +} + +int starpu_free(void *A) +{ + return starpu_free_flags(A, 0, STARPU_MALLOC_PINNED); +} + +int starpu_free_noflag(void *A, size_t dim) +{ + return starpu_free_flags(A, dim, STARPU_MALLOC_PINNED); +} + +static uintptr_t _starpu_malloc_on_node(unsigned dst_node, size_t size, int flags) +{ + uintptr_t addr = 0; + + if (size == 0) + size = 1; + + /* Handle count first */ + if (flags & STARPU_MALLOC_COUNT) + { + if (starpu_memory_allocate(dst_node, size, flags) != 0) + return 0; + /* And prevent double-count in starpu_malloc_flags */ + flags &= ~STARPU_MALLOC_COUNT; + } + + const struct _starpu_node_ops *node_ops = _starpu_memory_node_get_node_ops(dst_node); + if (node_ops && node_ops->malloc_on_node) + addr = node_ops->malloc_on_node(dst_node, size, flags & ~STARPU_MALLOC_COUNT); + else + STARPU_ABORT_MSG("No malloc_on_node function defined for node %s\n", _starpu_node_get_prefix(starpu_node_get_kind(dst_node))); + + if (addr == 0) + { + // Allocation failed, gives the memory back to the memory manager + _STARPU_TRACE_MEMORY_FULL(size); + if (flags & STARPU_MALLOC_COUNT) + starpu_memory_deallocate(dst_node, size); + } + return addr; +} + +void _starpu_free_on_node_flags(unsigned dst_node, uintptr_t addr, size_t size, int flags) +{ + int count = flags & STARPU_MALLOC_COUNT; + flags &= ~STARPU_MALLOC_COUNT; + + if (size == 0) + size = 1; + + const struct _starpu_node_ops *node_ops = _starpu_memory_node_get_node_ops(dst_node); + if (node_ops && node_ops->free_on_node) + node_ops->free_on_node(dst_node, addr, size, flags); + else + STARPU_ABORT_MSG("No free_on_node function defined for node %s\n", _starpu_node_get_prefix(starpu_node_get_kind(dst_node))); + + if (count) + starpu_memory_deallocate(dst_node, size); +} + +int +starpu_memory_pin(void *addr STARPU_ATTRIBUTE_UNUSED, size_t size STARPU_ATTRIBUTE_UNUSED) +{ + if (STARPU_MALLOC_PINNED && disable_pinning <= 0 && STARPU_RUNNING_ON_VALGRIND == 0) + { +#if defined(STARPU_USE_CUDA) && defined(STARPU_HAVE_CUDA_MEMCPY_PEER) + if (cudaHostRegister(addr, size, cudaHostRegisterPortable) != cudaSuccess) + return -1; +#endif +#if defined(STARPU_USE_HIP) + if (hipHostRegister(addr, size, hipHostRegisterPortable) != hipSuccess) + return -1; +#endif + } + return 0; +} + +int +starpu_memory_unpin(void *addr STARPU_ATTRIBUTE_UNUSED, size_t size STARPU_ATTRIBUTE_UNUSED) +{ + if (STARPU_MALLOC_PINNED && disable_pinning <= 0 && STARPU_RUNNING_ON_VALGRIND == 0) + { +#if defined(STARPU_USE_CUDA) && defined(STARPU_HAVE_CUDA_MEMCPY_PEER) + if (cudaHostUnregister(addr) != cudaSuccess) + return -1; +#endif +#if defined(STARPU_USE_HIP) + if (hipHostUnregister(addr) != hipSuccess) + return -1; +#endif + } + return 0; +} + +void +_starpu_malloc_init(unsigned dst_node) +{ + struct _starpu_node *node_struct = _starpu_get_node_struct(dst_node); + _starpu_chunk_list_init(&node_struct->chunks); + node_struct->nfreechunks = 0; + STARPU_PTHREAD_MUTEX_INIT(&node_struct->chunk_mutex, NULL); + disable_pinning = starpu_getenv_number("STARPU_DISABLE_PINNING"); + enable_suballocator = starpu_getenv_number_default("STARPU_SUBALLOCATOR", 1); + node_struct->malloc_on_node_default_flags = STARPU_MALLOC_PINNED | STARPU_MALLOC_COUNT; +#ifdef STARPU_SIMGRID + /* Reasonably "costless" */ + _starpu_malloc_simulation_fold = starpu_getenv_number_default("STARPU_MALLOC_SIMULATION_FOLD", 1) << 20; +#endif +} + +void +_starpu_malloc_shutdown(unsigned dst_node) +{ + struct _starpu_node *node_struct = _starpu_get_node_struct(dst_node); + struct _starpu_chunk *chunk, *next_chunk; + + STARPU_PTHREAD_MUTEX_LOCK(&node_struct->chunk_mutex); + for (chunk = _starpu_chunk_list_begin(&node_struct->chunks); + chunk != _starpu_chunk_list_end(&node_struct->chunks); + chunk = next_chunk) + { + next_chunk = _starpu_chunk_list_next(chunk); + _starpu_free_on_node_flags(dst_node, chunk->base, CHUNK_SIZE, node_struct->malloc_on_node_default_flags); + _starpu_chunk_list_erase(&node_struct->chunks, chunk); + free(chunk); + } + STARPU_PTHREAD_MUTEX_UNLOCK(&node_struct->chunk_mutex); + STARPU_PTHREAD_MUTEX_DESTROY(&node_struct->chunk_mutex); +} + +/* Create a new chunk */ +static struct _starpu_chunk *_starpu_new_chunk(unsigned dst_node, int flags) +{ + struct _starpu_chunk *chunk; + uintptr_t base = _starpu_malloc_on_node(dst_node, CHUNK_SIZE, flags); + + if (!base) + return NULL; + + /* Create a new chunk */ + chunk = _starpu_chunk_new(); + chunk->base = base; + + /* First block is just a fake block pointing to the free segments list */ + chunk->bitmap[0].length = 0; + chunk->bitmap[0].next = 1; + + /* At first we have only one big segment for the whole chunk */ + chunk->bitmap[1].length = CHUNK_NBLOCKS; + chunk->bitmap[1].next = -1; + + chunk->available_max = CHUNK_NBLOCKS; + chunk->available = CHUNK_NBLOCKS; + return chunk; +} + +/* Return whether we should use our suballocator */ +static int _starpu_malloc_should_suballoc(unsigned dst_node, size_t size, int flags) +{ + return (enable_suballocator && + (size <= CHUNK_ALLOC_MAX && + (starpu_node_get_kind(dst_node) == STARPU_CUDA_RAM + || (starpu_node_get_kind(dst_node) == STARPU_CPU_RAM + && _starpu_malloc_should_pin(flags)) + ))) + || starpu_node_get_kind(dst_node) == STARPU_MAX_FPGA_RAM; +} + +uintptr_t +starpu_malloc_on_node_flags(unsigned dst_node, size_t size, int flags) +{ + /* Big allocation, allocate normally */ + if (!_starpu_malloc_should_suballoc(dst_node, size, flags)) + return _starpu_malloc_on_node(dst_node, size, flags); + + struct _starpu_node *node_struct = _starpu_get_node_struct(dst_node); + + /* Round up allocation to block size */ + int nblocks = (size + CHUNK_ALLOC_MIN - 1) / CHUNK_ALLOC_MIN; + if (!nblocks) + nblocks = 1; + + struct _starpu_chunk *chunk; + int prevblock, block; + int available_max; + struct block *bitmap; + + STARPU_PTHREAD_MUTEX_LOCK(&node_struct->chunk_mutex); + + /* Try to find a big enough segment among the chunks */ + for (chunk = _starpu_chunk_list_begin(&node_struct->chunks); + chunk != _starpu_chunk_list_end(&node_struct->chunks); + chunk = _starpu_chunk_list_next(chunk)) + { + if (chunk->available_max < nblocks) + continue; + + bitmap = chunk->bitmap; + available_max = 0; + for (prevblock = block = 0; + block != -1; + prevblock = block, block = bitmap[prevblock].next) + { + STARPU_ASSERT(block >= 0 && block <= CHUNK_NBLOCKS); + int length = bitmap[block].length; + if (length >= nblocks) + { + + if (length >= 2*nblocks) + { + /* This one this has quite some room, + * put it front, to make finding it + * easier next time. */ + _starpu_chunk_list_erase(&node_struct->chunks, chunk); + _starpu_chunk_list_push_front(&node_struct->chunks, chunk); + } + if (chunk->available == CHUNK_NBLOCKS) + /* This one was empty, it's not empty any more */ + node_struct->nfreechunks--; + goto found; + } + if (length > available_max) + available_max = length; + } + + /* Didn't find a big enough segment in this chunk, its + * available_max is out of date */ + chunk->available_max = available_max; + } + + /* Didn't find a big enough segment, create another chunk. */ + chunk = _starpu_new_chunk(dst_node, flags); + if (!chunk) + { + /* Really no memory any more, fail */ + STARPU_PTHREAD_MUTEX_UNLOCK(&node_struct->chunk_mutex); + errno = ENOMEM; + return 0; + } + + /* And make it easy to find. */ + _starpu_chunk_list_push_front(&node_struct->chunks, chunk); + bitmap = chunk->bitmap; + prevblock = 0; + block = 1; + +found: + + chunk->available -= nblocks; + STARPU_ASSERT(bitmap[block].length >= nblocks); + STARPU_ASSERT(block <= CHUNK_NBLOCKS); + if (bitmap[block].length == nblocks) + { + /* Fits exactly, drop this segment from the skip list */ + bitmap[prevblock].next = bitmap[block].next; + } + else + { + /* Still some room */ + STARPU_ASSERT(block + nblocks <= CHUNK_NBLOCKS); + bitmap[prevblock].next = block + nblocks; + bitmap[block + nblocks].length = bitmap[block].length - nblocks; + bitmap[block + nblocks].next = bitmap[block].next; + } + + STARPU_PTHREAD_MUTEX_UNLOCK(&node_struct->chunk_mutex); + + return chunk->base + (block-1) * CHUNK_ALLOC_MIN; +} + +void +starpu_free_on_node_flags(unsigned dst_node, uintptr_t addr, size_t size, int flags) +{ + /* Big allocation, deallocate normally */ + if (!_starpu_malloc_should_suballoc(dst_node, size, flags)) + { + _starpu_free_on_node_flags(dst_node, addr, size, flags); + return; + } + + struct _starpu_node *node_struct = _starpu_get_node_struct(dst_node); + struct _starpu_chunk *chunk; + + /* Round up allocation to block size */ + int nblocks = (size + CHUNK_ALLOC_MIN - 1) / CHUNK_ALLOC_MIN; + if (!nblocks) + nblocks = 1; + + STARPU_PTHREAD_MUTEX_LOCK(&node_struct->chunk_mutex); + for (chunk = _starpu_chunk_list_begin(&node_struct->chunks); + chunk != _starpu_chunk_list_end(&node_struct->chunks); + chunk = _starpu_chunk_list_next(chunk)) + if (addr >= chunk->base && addr < chunk->base + CHUNK_SIZE) + break; + STARPU_ASSERT(chunk != _starpu_chunk_list_end(&node_struct->chunks)); + + struct block *bitmap = chunk->bitmap; + int block = ((addr - chunk->base) / CHUNK_ALLOC_MIN) + 1, prevblock, nextblock; + + /* Look for free segment just before this one */ + for (prevblock = 0; + prevblock != -1; + prevblock = nextblock) + { + STARPU_ASSERT(prevblock >= 0 && prevblock <= CHUNK_NBLOCKS); + nextblock = bitmap[prevblock].next; + STARPU_ASSERT_MSG(nextblock != block, "It seems data 0x%lx (size %u) on node %u is being freed a second time\n", (unsigned long) addr, (unsigned) size, dst_node); + if (nextblock > block || nextblock == -1) + break; + } + STARPU_ASSERT(prevblock != -1); + + chunk->available += nblocks; + + /* Insert in free segments list */ + bitmap[block].next = nextblock; + bitmap[prevblock].next = block; + bitmap[block].length = nblocks; + + STARPU_ASSERT(nextblock >= -1 && nextblock <= CHUNK_NBLOCKS); + if (nextblock == block + nblocks) + { + /* This freed segment is just before a free segment, merge them */ + bitmap[block].next = bitmap[nextblock].next; + bitmap[block].length += bitmap[nextblock].length; + + if (bitmap[block].length > chunk->available_max) + chunk->available_max = bitmap[block].length; + } + + if (prevblock > 0 && prevblock + bitmap[prevblock].length == block) + { + /* This free segment is just after a free segment, merge them */ + bitmap[prevblock].next = bitmap[block].next; + bitmap[prevblock].length += bitmap[block].length; + + if (bitmap[prevblock].length > chunk->available_max) + chunk->available_max = bitmap[prevblock].length; + + block = prevblock; + } + + if (chunk->available == CHUNK_NBLOCKS) + { + /* This chunk is now empty, but avoid chunk free/alloc + * ping-pong by keeping some of these. */ + if (node_struct->nfreechunks >= CHUNKS_NFREE && + starpu_node_get_kind(dst_node) != STARPU_MAX_FPGA_RAM) + { + /* We already have free chunks, release this one */ + _starpu_free_on_node_flags(dst_node, chunk->base, CHUNK_SIZE, flags); + _starpu_chunk_list_erase(&node_struct->chunks, chunk); + free(chunk); + } + else + node_struct->nfreechunks++; + } + else + { + /* Freed some room, put this first in chunks list */ + _starpu_chunk_list_erase(&node_struct->chunks, chunk); + _starpu_chunk_list_push_front(&node_struct->chunks, chunk); + } + + STARPU_PTHREAD_MUTEX_UNLOCK(&node_struct->chunk_mutex); +} + +void starpu_malloc_on_node_set_default_flags(unsigned node, int flags) +{ + STARPU_ASSERT_MSG(node < STARPU_MAXNODES, "bogus node value %u given to starpu_malloc_on_node_set_default_flags\n", node); + _starpu_get_node_struct(node)->malloc_on_node_default_flags = flags; +} + +uintptr_t +starpu_malloc_on_node(unsigned dst_node, size_t size) +{ + return starpu_malloc_on_node_flags(dst_node, size, _starpu_get_node_struct(dst_node)->malloc_on_node_default_flags); +} + +void +starpu_free_on_node(unsigned dst_node, uintptr_t addr, size_t size) +{ + starpu_free_on_node_flags(dst_node, addr, size, _starpu_get_node_struct(dst_node)->malloc_on_node_default_flags); +} diff --git a/src/datawizard/malloc.h b/src/datawizard/malloc.h new file mode 100644 index 0000000..93120f5 --- /dev/null +++ b/src/datawizard/malloc.h @@ -0,0 +1,101 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2022-2022 Federal University of Rio Grande do Sul (UFRGS) + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __ALLOC_H__ +#define __ALLOC_H__ + +#include +#include + +#pragma GCC visibility push(hidden) + +/** @file */ + +void _starpu_malloc_init(unsigned dst_node); +void _starpu_malloc_shutdown(unsigned dst_node); + +int _starpu_malloc_flags_on_node(unsigned dst_node, void **A, size_t dim, int flags); +int _starpu_free_flags_on_node(unsigned dst_node, void *A, size_t dim, int flags); + +/** + * Returns whether when allocating data on \p dst_node, we will do pinning, i.e. + * the allocation will be very expensive, and should thus be moved out from the + * critical path + */ +int _starpu_malloc_willpin_on_node(unsigned dst_node) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; + +/** + * On CUDA which has very expensive malloc, for small sizes, allocate big + * chunks divided in blocks, and we actually allocate segments of consecutive + * blocks. + * + * We try to keep the list of chunks with increasing occupancy, so we can + * quickly find free segments to allocate. + */ + +#ifdef STARPU_USE_MAX_FPGA +// FIXME: Maxeler FPGAs want 192 byte alignment +#define CHUNK_SIZE (128*1024*192) +#define CHUNK_ALLOC_MAX (CHUNK_SIZE / 8) +#define CHUNK_ALLOC_MIN (128*192) +#else +/* Size of each chunk, 32MiB granularity brings 128 chunks to be allocated in + * order to fill a 4GiB GPU. */ +#define CHUNK_SIZE (32*1024*1024) + +/* Maximum segment size we will allocate in chunks */ +#define CHUNK_ALLOC_MAX (CHUNK_SIZE / 8) + +/* Granularity of allocation, i.e. block size, StarPU will never allocate less + * than this. + * 16KiB (i.e. 64x64 float) granularity eats 2MiB RAM for managing a 4GiB GPU. + */ +#define CHUNK_ALLOC_MIN (16*1024) +#endif + +/* Don't really deallocate chunks unless we have more than this many chunks + * which are completely free. */ +#define CHUNKS_NFREE 4 + +/* Number of blocks */ +#define CHUNK_NBLOCKS (CHUNK_SIZE/CHUNK_ALLOC_MIN) + +/* Linked list for available segments */ +struct block +{ + int length; /* Number of consecutive free blocks */ + int next; /* next free segment */ +}; + +/* One chunk */ +LIST_TYPE(_starpu_chunk, + uintptr_t base; + + /* Available number of blocks, for debugging */ + int available; + + /* Overestimation of the maximum size of available segments in this chunk */ + int available_max; + + /* Bitmap describing availability of the block */ + /* Block 0 is always empty, and is just the head of the free segments list */ + struct block bitmap[CHUNK_NBLOCKS+1]; +) + +#pragma GCC visibility pop + +#endif diff --git a/src/datawizard/memalloc.c b/src/datawizard/memalloc.c new file mode 100644 index 0000000..6842357 --- /dev/null +++ b/src/datawizard/memalloc.c @@ -0,0 +1,1985 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2018,2021 Federal University of Rio Grande do Sul (UFRGS) + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +/* When reclaiming memory to allocate, we reclaim data_size_coefficient*data_size */ +const unsigned starpu_memstrategy_data_size_coefficient=2; + +/* Minimum percentage of available memory in each node */ +static unsigned minimum_p; +static unsigned target_p; +/* Minimum percentage of number of clean buffer in each node */ +static unsigned minimum_clean_p; +static unsigned target_clean_p; +/* Whether CPU memory has been explicitly limited by user */ +static int limit_cpu_mem; + + +/* TODO: no home doesn't mean always clean, should push to larger memory nodes */ +#define MC_LIST_PUSH_BACK(node_struct, mc) do { \ + _starpu_mem_chunk_list_push_back(&node_struct->mc_list, mc); \ + if ((mc)->clean || (mc)->home) \ + /* This is clean */ \ + node_struct->mc_clean_nb++; \ + else if (!node_struct->mc_dirty_head) \ + /* This is the only dirty element for now */ \ + node_struct->mc_dirty_head = mc; \ + node_struct->mc_nb++; \ +} while(0) + +/* Put new clean mc at the end of the clean part of mc_list, i.e. just before mc_dirty_head (if any) */ +#define MC_LIST_PUSH_CLEAN(node_struct, mc) do { \ + if (node_struct->mc_dirty_head) \ + _starpu_mem_chunk_list_insert_before(&node_struct->mc_list, mc, node_struct->mc_dirty_head); \ + else \ + _starpu_mem_chunk_list_push_back(&node_struct->mc_list, mc); \ + /* This is clean */ \ + node_struct->mc_clean_nb++; \ + node_struct->mc_nb++; \ +} while (0) + +#define MC_LIST_ERASE(node_struct, mc) do { \ + if ((mc)->clean || (mc)->home) \ + node_struct->mc_clean_nb--; /* One clean element less */ \ + if ((mc) == node_struct->mc_dirty_head) \ + /* This was the dirty head */ \ + node_struct->mc_dirty_head = _starpu_mem_chunk_list_next((mc)); \ + /* One element less */ \ + node_struct->mc_nb--; \ + /* Remove element */ \ + _starpu_mem_chunk_list_erase(&node_struct->mc_list, (mc)); \ + /* Notify whoever asked for it */ \ + if ((mc)->remove_notify) \ + { \ + *((mc)->remove_notify) = NULL; \ + (mc)->remove_notify = NULL; \ + } \ +} while (0) + +/* Explicitly caches memory chunks that can be reused */ +struct mc_cache_entry +{ + UT_hash_handle hh; + struct _starpu_mem_chunk_list list; + uint32_t footprint; +}; + +int _starpu_is_reclaiming(unsigned node) +{ + struct _starpu_node *node_struct = _starpu_get_node_struct(node); + return node_struct->tidying || node_struct->reclaiming; +} + +static int can_evict(unsigned node) +{ + return _starpu_get_node_struct(node)->evictable; +} + +/* Called after initializing the set of memory nodes */ +/* We use an accelerator -> CPU RAM -> disk storage hierarchy */ +void _starpu_mem_chunk_init_last(void) +{ + unsigned disk = 0; + unsigned nnodes = starpu_memory_nodes_get_count(), i; + + for (i = 0; i < nnodes; i++) + { + enum starpu_node_kind kind = starpu_node_get_kind(i); + struct _starpu_node *node_struct = _starpu_get_node_struct(i); + + if (kind == STARPU_DISK_RAM) + /* Some disk, will be able to evict RAM */ + /* TODO: disk hierarchy */ + disk = 1; + + else if (kind != STARPU_CPU_RAM) + /* This is an accelerator, we can evict to main RAM */ + node_struct->evictable = 1; + } + + if (disk) + for (i = 0; i < nnodes; i++) + { + enum starpu_node_kind kind = starpu_node_get_kind(i); + if (kind == STARPU_CPU_RAM) + _starpu_get_node_struct(i)->evictable = 1; + } +} + +/* A disk was registered, RAM is now evictable */ +void _starpu_mem_chunk_disk_register(unsigned disk_memnode) +{ + (void) disk_memnode; + unsigned nnodes = starpu_memory_nodes_get_count(), i; + + for (i = 0; i < nnodes; i++) + { + enum starpu_node_kind kind = starpu_node_get_kind(i); + if (kind == STARPU_CPU_RAM) + { + struct _starpu_node *node_struct = _starpu_get_node_struct(i); + STARPU_HG_DISABLE_CHECKING(node_struct->evictable); + node_struct->evictable = 1; + } + } +} + +static int get_better_disk_can_accept_size(starpu_data_handle_t handle, unsigned node); +static int choose_target(starpu_data_handle_t handle, unsigned node); + +void _starpu_init_mem_chunk_lists(void) +{ + unsigned i; + for (i = 0; i < STARPU_MAXNODES; i++) + { + struct _starpu_node *node = _starpu_get_node_struct(i); + _starpu_spin_init(&node->mc_lock); + _starpu_mem_chunk_list_init(&node->mc_list); + STARPU_HG_DISABLE_CHECKING(node->mc_cache_size); + STARPU_HG_DISABLE_CHECKING(node->mc_nb); + STARPU_HG_DISABLE_CHECKING(node->mc_clean_nb); + STARPU_HG_DISABLE_CHECKING(node->prefetch_out_of_memory); + } + /* We do not enable forcing available memory by default, since + this makes StarPU spuriously free data when prefetching fills the + memory. Clean buffers should be enough to be able to allocate data + easily anyway. */ + minimum_p = starpu_getenv_number_default("STARPU_MINIMUM_AVAILABLE_MEM", 0); + target_p = starpu_getenv_number_default("STARPU_TARGET_AVAILABLE_MEM", 0); + minimum_clean_p = starpu_getenv_number_default("STARPU_MINIMUM_CLEAN_BUFFERS", 5); + target_clean_p = starpu_getenv_number_default("STARPU_TARGET_CLEAN_BUFFERS", 10); + limit_cpu_mem = starpu_getenv_number("STARPU_LIMIT_CPU_MEM"); +} + +void _starpu_deinit_mem_chunk_lists(void) +{ + unsigned i; + for (i = 0; i < STARPU_MAXNODES; i++) + { + struct _starpu_node *node = _starpu_get_node_struct(i); + struct mc_cache_entry *entry=NULL, *tmp=NULL; + STARPU_ASSERT(node->mc_nb == 0); + STARPU_ASSERT(node->mc_clean_nb == 0); + STARPU_ASSERT(node->mc_dirty_head == NULL); + HASH_ITER(hh, node->mc_cache, entry, tmp) + { + STARPU_ASSERT(_starpu_mem_chunk_list_empty(&entry->list)); + HASH_DEL(node->mc_cache, entry); + free(entry); + } + STARPU_ASSERT(node->mc_cache_nb == 0); + STARPU_ASSERT(node->mc_cache_size == 0); + _starpu_spin_destroy(&node->mc_lock); + } +} + +/* + * Manipulate subtrees + */ + +static void unlock_all_subtree(starpu_data_handle_t handle) +{ + /* lock all sub-subtrees children + * Note that this is done in the reverse order of the + * lock_all_subtree so that we avoid deadlock */ + unsigned i; + for (i =0; i < handle->nchildren; i++) + { + unsigned child = handle->nchildren - 1 - i; + starpu_data_handle_t child_handle = starpu_data_get_child(handle, child); + unlock_all_subtree(child_handle); + } + + _starpu_spin_unlock(&handle->header_lock); +} + +static int lock_all_subtree(starpu_data_handle_t handle) +{ + int child; + + /* lock parent */ + if (_starpu_spin_trylock(&handle->header_lock)) + /* the handle is busy, abort */ + return 0; + + /* lock all sub-subtrees children */ + for (child = 0; child < (int) handle->nchildren; child++) + { + if (!lock_all_subtree(starpu_data_get_child(handle, child))) + { + /* Some child is busy, abort */ + while (--child >= 0) + /* Unlock what we have already uselessly locked */ + unlock_all_subtree(starpu_data_get_child(handle, child)); + return 0; + } + } + + return 1; +} + +static unsigned may_free_handle(starpu_data_handle_t handle, unsigned node) +{ + STARPU_ASSERT(handle->per_node[node].mapped == STARPU_UNMAPPED); + + /* we only free if no one refers to the leaf */ + uint32_t refcnt = _starpu_get_data_refcnt(handle, node); + if (refcnt) + return 0; + + if (handle->current_mode == STARPU_W) + { + if (handle->write_invalidation_req) + /* Some request is invalidating it anyway */ + return 0; + unsigned n; + for (n = 0; n < STARPU_MAXNODES; n++) + if (_starpu_get_data_refcnt(handle, n)) + /* Some task is writing to the handle somewhere */ + return 0; + } + + /* no problem was found */ + return 1; +} + +static unsigned may_free_subtree(starpu_data_handle_t handle, unsigned node) +{ + if (!may_free_handle(handle, node)) + return 0; + + /* look into all sub-subtrees children */ + unsigned child; + for (child = 0; child < handle->nchildren; child++) + { + unsigned res; + starpu_data_handle_t child_handle = starpu_data_get_child(handle, child); + res = may_free_subtree(child_handle, node); + if (!res) + return 0; + } + + /* no problem was found */ + return 1; +} + +/* Warn: this releases the header lock of the handle during the transfer + * The handle may thus unexpectedly disappear. This returns 1 in that case. + */ +static int STARPU_ATTRIBUTE_WARN_UNUSED_RESULT transfer_subtree_to_node(starpu_data_handle_t handle, unsigned src_node, + unsigned dst_node) +{ + STARPU_ASSERT(dst_node != src_node); + + if (handle->nchildren == 0) + { + struct _starpu_data_replicate *src_replicate = &handle->per_node[src_node]; + struct _starpu_data_replicate *dst_replicate = &handle->per_node[dst_node]; + + STARPU_ASSERT(src_replicate->mapped == STARPU_UNMAPPED); + STARPU_ASSERT(dst_replicate->mapped == STARPU_UNMAPPED); + + /* this is a leaf */ + + while (src_replicate->state == STARPU_OWNER) + { + /* This is the only copy, push it to destination */ + struct _starpu_data_request *r; + r = _starpu_create_request_to_fetch_data(handle, dst_replicate, STARPU_R, NULL, STARPU_FETCH, 0, NULL, NULL, 0, "transfer_subtree_to_node"); + /* There is no way we don't need a request, since + * source is OWNER, destination can't be having it */ + STARPU_ASSERT(r); + /* Keep the handle alive while we are working on it */ + handle->busy_count++; + _starpu_spin_unlock(&handle->header_lock); + _starpu_wait_data_request_completion(r, 1); + _starpu_spin_lock(&handle->header_lock); + handle->busy_count--; + if (_starpu_data_check_not_busy(handle)) + /* Actually disappeared, abort completely */ + return -1; + if (!may_free_subtree(handle, src_node)) + /* Oops, while we released the header lock, a + * task got in, abort. */ + return 0; + } + STARPU_ASSERT(may_free_subtree(handle, src_node)); + + if (src_replicate->state == STARPU_SHARED) + { + unsigned i; + unsigned last = 0; + unsigned cnt = 0; + + /* some other node may have the copy */ + if (src_replicate->state != STARPU_INVALID) + _STARPU_TRACE_DATA_STATE_INVALID(handle, src_node); + src_replicate->state = STARPU_INVALID; + + /* count the number of copies */ + for (i = 0; i < STARPU_MAXNODES; i++) + { + if (handle->per_node[i].state == STARPU_SHARED) + { + cnt++; + last = i; + } + } + STARPU_ASSERT(cnt > 0); + + if (cnt == 1) + { + if (handle->per_node[last].state != STARPU_OWNER) + _STARPU_TRACE_DATA_STATE_OWNER(handle, last); + handle->per_node[last].state = STARPU_OWNER; + } + + } + else + STARPU_ASSERT(src_replicate->state == STARPU_INVALID); + /* Already dropped by somebody, in which case there is nothing to be done */ + } + else + { + /* transfer all sub-subtrees children */ + unsigned child; + for (child = 0; child < handle->nchildren; child++) + { + starpu_data_handle_t child_handle = starpu_data_get_child(handle, child); + int res = transfer_subtree_to_node(child_handle, src_node, dst_node); + if (res == 0) + return 0; + /* There is no way children have disappeared since we + * keep the parent lock held */ + STARPU_ASSERT(res != -1); + } + } + /* Success! */ + return 1; +} + +static void notify_handle_children(starpu_data_handle_t handle, struct _starpu_data_replicate *replicate, unsigned node) +{ + unsigned child; + + replicate->allocated = 0; + + /* XXX why do we need that ? */ + replicate->automatically_allocated = 0; + + for (child = 0; child < handle->nchildren; child++) + { + /* Notify children that their buffer has been deallocated too */ + starpu_data_handle_t child_handle = starpu_data_get_child(handle, child); + notify_handle_children(child_handle, &child_handle->per_node[node], node); + } +} + +static size_t free_memory_on_node(struct _starpu_mem_chunk *mc, unsigned node) +{ + size_t freed = 0; + + STARPU_ASSERT(mc->ops); + STARPU_ASSERT(mc->ops->free_data_on_node); + + starpu_data_handle_t handle = mc->data; + + struct _starpu_data_replicate *replicate = mc->replicate; + + if (handle) + _starpu_spin_checklocked(&handle->header_lock); + + if (mc->automatically_allocated && + (!handle || replicate->refcnt == 0)) + { + void *data_interface; + + if (handle) + { + STARPU_ASSERT(replicate->allocated); + STARPU_ASSERT(replicate->mapped == STARPU_UNMAPPED); + } + + if (handle) + data_interface = replicate->data_interface; + else + data_interface = mc->chunk_interface; + STARPU_ASSERT(data_interface); + + _STARPU_TRACE_START_FREE(node, mc->size, handle); + mc->ops->free_data_on_node(data_interface, node); + _STARPU_TRACE_END_FREE(node, handle); + + if (handle) + notify_handle_children(handle, replicate, node); + + freed = mc->size; + + if (handle) + STARPU_ASSERT(replicate->refcnt == 0); + } + + return freed; +} + + + +/* mc_lock is held */ +static size_t do_free_mem_chunk(struct _starpu_mem_chunk *mc, unsigned node) +{ + size_t size; + starpu_data_handle_t handle = mc->data; + + if (handle) + { + _starpu_spin_checklocked(&handle->header_lock); + mc->size = _starpu_data_get_alloc_size(handle); + + mc->replicate->mc=NULL; + } + + /* free the actual buffer */ + size = free_memory_on_node(mc, node); + + /* remove the mem_chunk from the list */ + MC_LIST_ERASE(_starpu_get_node_struct(node), mc); + + _starpu_mem_chunk_delete(mc); + +#ifdef STARPU_SIMGRID + starpu_pthread_queue_broadcast(&_starpu_simgrid_transfer_queue[node]); +#endif + + return size; +} + +/* We assume that node->mc_lock is taken. is_already_in_mc_list indicates + * that the mc is already in the list of buffers that are possibly used, and + * therefore not in the cache. */ +static void reuse_mem_chunk(unsigned node, struct _starpu_data_replicate *new_replicate, struct _starpu_mem_chunk *mc, unsigned is_already_in_mc_list) +{ + void *data_interface; + + /* we found an appropriate mem chunk: so we get it out + * of the "to free" list, and reassign it to the new + * piece of data */ + + struct _starpu_data_replicate *old_replicate = mc->replicate; + if (old_replicate) + { + old_replicate->mc = NULL; + old_replicate->allocated = 0; + old_replicate->automatically_allocated = 0; + old_replicate->initialized = 0; + data_interface = old_replicate->data_interface; + } + else + data_interface = mc->chunk_interface; + + STARPU_ASSERT(new_replicate->data_interface); + STARPU_ASSERT(data_interface); + + if (mc->ops->reuse_data_on_node) + mc->ops->reuse_data_on_node(new_replicate->data_interface, data_interface, node); + else + memcpy(new_replicate->data_interface, data_interface, mc->size_interface); + + if (!old_replicate) + { + /* Free the copy that we made */ + free(mc->chunk_interface); + mc->chunk_interface = NULL; + } + + /* XXX: We do not actually reuse the mc at the moment, only the interface */ + + /* mc->data = new_replicate->handle; */ + /* mc->footprint, mc->ops, mc->size_interface, + * mc->automatically_allocated should be unchanged ! + */ + + /* remove the mem chunk from the list of active memory chunks, register_mem_chunk will put it back later */ + if (is_already_in_mc_list) + MC_LIST_ERASE(_starpu_get_node_struct(node), mc); + + free(mc); +} + +int starpu_data_can_evict(starpu_data_handle_t handle, unsigned node, enum starpu_is_prefetch is_prefetch) +{ + STARPU_ASSERT(node < STARPU_MAXNODES); + /* This data should be written through to this node, avoid dropping it! */ + if (node < sizeof(handle->wt_mask) * 8 && handle->wt_mask & (1<home_node) + return 0; + + unsigned mapnode; + for (mapnode = 0; mapnode < STARPU_MAXNODES; mapnode++) + if (handle->per_node[mapnode].mapped == (int) node) + /* This is mapped, we can't evict it */ + /* TODO: rather check if that can be evicted as well, and if so unmap it before evicting this */ + return 0; + + /* This data cannot be pushed outside CPU memory */ + if (!handle->ooc && handle->home_node == -1 + && starpu_node_get_kind(node) == STARPU_CPU_RAM + && starpu_memory_nodes_get_numa_count() == 1) + return 0; + + if (is_prefetch >= STARPU_TASK_PREFETCH && handle->per_node[node].nb_tasks_prefetch) + /* We have not finished executing the tasks this was prefetched for */ + return 0; + + if (!may_free_handle(handle, node)) + /* Somebody refers to it */ + return 0; + + return 1; +} + +/* This function is called for memory chunks that are possibly in used (ie. not + * in the cache). They should therefore still be associated to a handle. */ +/* mc_lock is held and may be temporarily released! */ +static size_t try_to_throw_mem_chunk(struct _starpu_mem_chunk *mc, unsigned node, struct _starpu_data_replicate *replicate, unsigned is_already_in_mc_list, enum starpu_is_prefetch is_prefetch) +{ + size_t freed = 0; + + starpu_data_handle_t handle; + handle = mc->data; + STARPU_ASSERT(handle); + + if (!starpu_data_can_evict(handle, node, is_prefetch)) + return 0; + + /* REDUX memchunk */ + if (mc->relaxed_coherency == 2) + { + /* TODO: reduce it back to e.g. main memory */ + } + else + /* Either it's a "relaxed coherency" memchunk (SCRATCH), or it's a + * memchunk that could be used with filters. */ + if (mc->relaxed_coherency == 1) + { + if (_starpu_spin_trylock(&handle->header_lock)) + /* Handle is busy, abort */ + return 0; + + if (!mc->replicate) { + /* _starpu_request_mem_chunk_removal removed it before us */ + _starpu_spin_unlock(&handle->header_lock); + return 0; + } + + if (mc->replicate->refcnt == 0) + { + /* Note that there is no need to transfer any data or + * to update the status in terms of MSI protocol + * because this memchunk is associated to a replicate + * in "relaxed coherency" mode. */ + if (replicate) + { + /* Reuse for this replicate */ + reuse_mem_chunk(node, replicate, mc, is_already_in_mc_list); + freed = 1; + } + else + { + /* Free */ + freed = do_free_mem_chunk(mc, node); + } + } + + _starpu_spin_unlock(&handle->header_lock); + } + else if (lock_all_subtree(handle)) + /* try to lock all the subtree */ + { + if (!(replicate && handle->per_node[node].state == STARPU_OWNER)) + { + /* check if they are all "free" */ + if (may_free_subtree(handle, node)) + { + int target = -1; + + /* XXX Considering only owner to invalidate */ + + STARPU_ASSERT(handle->per_node[node].refcnt == 0); + + /* in case there was nobody using that buffer, throw it + * away after writing it back to main memory */ + + /* choose the best target */ + target = choose_target(handle, node); + + if (target != -1 && + /* Only reuse memchunks which are easy to throw + * away (which is likely thanks to periodic tidying). + * If there are none, we prefer to let generic eviction + * perhaps find other kinds of memchunks which will be + * earlier in LRU, and easier to throw away. */ + !(replicate && handle->per_node[node].state == STARPU_OWNER)) + { + int res; + /* Should have been avoided in our caller */ + STARPU_ASSERT(!mc->remove_notify); + mc->remove_notify = &mc; + _starpu_spin_unlock(&_starpu_get_node_struct(node)->mc_lock); +#ifdef STARPU_MEMORY_STATS + if (handle->per_node[node].state == STARPU_OWNER) + _starpu_memory_handle_stats_invalidated(handle, node); +#endif + _STARPU_TRACE_START_WRITEBACK(node, handle); + /* Note: this may need to allocate data etc. + * and thus release the header lock, take + * mc_lock, etc. */ + res = transfer_subtree_to_node(handle, node, target); + _STARPU_TRACE_END_WRITEBACK(node, handle); +#ifdef STARPU_MEMORY_STATS + _starpu_memory_handle_stats_loaded_owner(handle, target); +#endif + _starpu_spin_lock(&_starpu_get_node_struct(node)->mc_lock); + + if (!mc) + { + if (res == -1) + { + /* handle disappeared, abort without unlocking it */ + return 0; + } + } + else + { + STARPU_ASSERT(mc->remove_notify == &mc); + mc->remove_notify = NULL; + + if (res == -1) + { + /* handle disappeared, abort without unlocking it */ + return 0; + } + + if (res == 1) + { + /* mc is still associated with the old + * handle, now free it. + */ + + if (handle->per_node[node].refcnt == 0) + { + /* And still nobody on it, now the actual buffer may be reused or freed */ + if (replicate) + { + /* Reuse for this replicate */ + reuse_mem_chunk(node, replicate, mc, is_already_in_mc_list); + freed = 1; + } + else + { + /* Free */ + freed = do_free_mem_chunk(mc, node); + } + } + } + } + } + } + + } + /* unlock the tree */ + unlock_all_subtree(handle); + } + return freed; +} + +static int _starpu_data_interface_compare(void *data_interface_a, struct starpu_data_interface_ops *ops_a, + void *data_interface_b, struct starpu_data_interface_ops *ops_b) +{ + if (ops_a->interfaceid != ops_b->interfaceid) + return -1; + if (ops_a->dontcache || ops_b->dontcache) + return -1; + + int ret; + if (ops_a->alloc_compare) + ret = ops_a->alloc_compare(data_interface_a, data_interface_b); + else + { + STARPU_ASSERT_MSG(ops_a->compare, "the interface '%s' does define neither alloc_compare nor compare method", ops_a->name); + ret = ops_a->compare(data_interface_a, data_interface_b); + } + + return ret; +} + +#ifdef STARPU_USE_ALLOCATION_CACHE +/* This function must be called with node->mc_lock taken */ +static struct _starpu_mem_chunk *_starpu_memchunk_cache_lookup_locked(unsigned node, starpu_data_handle_t handle, uint32_t footprint) +{ + /* go through all buffers in the cache */ + struct mc_cache_entry *entry; + struct _starpu_node *node_struct = _starpu_get_node_struct(node); + + HASH_FIND(hh, node_struct->mc_cache, &footprint, sizeof(footprint), entry); + if (!entry) + /* No data with that footprint */ + return NULL; + + struct _starpu_mem_chunk *mc; + for (mc = _starpu_mem_chunk_list_begin(&entry->list); + mc != _starpu_mem_chunk_list_end(&entry->list); + mc = _starpu_mem_chunk_list_next(mc)) + { + /* Is that a false hit ? (this is _very_ unlikely) */ + if (_starpu_data_interface_compare(handle->per_node[node].data_interface, handle->ops, mc->chunk_interface, mc->ops) != 1) + continue; + + /* Cache hit */ + + /* Remove from the cache */ + _starpu_mem_chunk_list_erase(&entry->list, mc); + node_struct->mc_cache_nb--; + STARPU_ASSERT_MSG(node_struct->mc_cache_nb >= 0, "allocation cache for node %u has %d objects??", node, node_struct->mc_cache_nb); + node_struct->mc_cache_size -= mc->size; + STARPU_ASSERT_MSG(node_struct->mc_cache_size >= 0, "allocation cache for node %u has %ld bytes??", node, (long) node_struct->mc_cache_size); + return mc; + } + + /* This is a cache miss */ + return NULL; +} + +/* this function looks for a memory chunk that matches a given footprint in the + * list of mem chunk that need to be freed. */ +static int try_to_find_reusable_mc(unsigned node, starpu_data_handle_t data, struct _starpu_data_replicate *replicate, uint32_t footprint) +{ + struct _starpu_mem_chunk *mc; + int success = 0; + + _starpu_spin_lock(&_starpu_get_node_struct(node)->mc_lock); + /* go through all buffers in the cache */ + mc = _starpu_memchunk_cache_lookup_locked(node, data, footprint); + if (mc) + { + /* We found an entry in the cache so we can reuse it */ + reuse_mem_chunk(node, replicate, mc, 0); + success = 1; + } + _starpu_spin_unlock(&_starpu_get_node_struct(node)->mc_lock); + return success; +} +#endif + +/* this function looks for a memory chunk that matches a given footprint in the + * list of mem chunk that are not important */ +static int try_to_reuse_not_important_mc(unsigned node, starpu_data_handle_t data, struct _starpu_data_replicate *replicate, uint32_t footprint, enum starpu_is_prefetch is_prefetch) +{ + struct _starpu_mem_chunk *mc, *orig_next_mc, *next_mc; + int success = 0; + struct _starpu_node *node_struct = _starpu_get_node_struct(node); + + _starpu_spin_lock(&node_struct->mc_lock); +restart: + /* now look for some non essential data in the active list */ + for (mc = _starpu_mem_chunk_list_begin(&node_struct->mc_list); + mc != _starpu_mem_chunk_list_end(&node_struct->mc_list) && !success; + mc = next_mc) + { + /* there is a risk that the memory chunk is freed before next + * iteration starts: so we compute the next element of the list + * now */ + orig_next_mc = next_mc = _starpu_mem_chunk_list_next(mc); + if (mc->remove_notify) + /* Somebody already working here, skip */ + continue; + if (!mc->data->is_not_important) + /* Important data, skip */ + continue; + if (mc->footprint != footprint || _starpu_data_interface_compare(data->per_node[node].data_interface, data->ops, mc->data->per_node[node].data_interface, mc->ops) != 1) + /* Not the right type of interface, skip */ + continue; + if (next_mc) + { + if (next_mc->remove_notify) + /* Somebody already working here, skip */ + continue; + next_mc->remove_notify = &next_mc; + } + + /* Note: this may unlock mc_list! */ + success = try_to_throw_mem_chunk(mc, node, replicate, 1, is_prefetch); + + if (orig_next_mc) + { + if (!next_mc) + /* Oops, somebody dropped the next item while we were + * not keeping the mc_lock. Restart from the beginning + * of the list */ + goto restart; + else + { + STARPU_ASSERT(next_mc->remove_notify == &next_mc); + next_mc->remove_notify = NULL; + } + } + } + _starpu_spin_unlock(&node_struct->mc_lock); + + return success; +} + +/* + * Try to find a buffer currently in use on the memory node which has the given + * footprint. + */ +static int try_to_reuse_potentially_in_use_mc(unsigned node, starpu_data_handle_t handle, struct _starpu_data_replicate *replicate, uint32_t footprint, enum starpu_is_prefetch is_prefetch) +{ + struct _starpu_mem_chunk *mc, *next_mc, *orig_next_mc; + int success = 0; + struct _starpu_node *node_struct = _starpu_get_node_struct(node); + + if (is_prefetch >= STARPU_IDLEFETCH) + /* Do not evict a MC just for an idle fetch */ + return 0; + /* + * We have to unlock mc_lock before locking header_lock, so we have + * to be careful with the list. We try to do just one pass, by + * remembering the next mc to be tried. If it gets dropped, we restart + * from zero. So we continue until we go through the whole list without + * finding anything to free. + */ + + _starpu_spin_lock(&node_struct->mc_lock); + +restart: + for (mc = _starpu_mem_chunk_list_begin(&node_struct->mc_list); + mc != _starpu_mem_chunk_list_end(&node_struct->mc_list) && !success; + mc = next_mc) + { + /* mc hopefully gets out of the list, we thus need to prefetch + * the next element */ + orig_next_mc = next_mc = _starpu_mem_chunk_list_next(mc); + + if (mc->remove_notify) + /* Somebody already working here, skip */ + continue; + if (mc->footprint != footprint || _starpu_data_interface_compare(handle->per_node[node].data_interface, handle->ops, mc->data->per_node[node].data_interface, mc->ops) != 1) + /* Not the right type of interface, skip */ + continue; + if (next_mc) + { + if (next_mc->remove_notify) + /* Somebody already working here, skip */ + continue; + next_mc->remove_notify = &next_mc; + } + + /* Note: this may unlock mc_list! */ + success = try_to_throw_mem_chunk(mc, node, replicate, 1, is_prefetch); + + if (orig_next_mc) + { + if (!next_mc) + /* Oops, somebody dropped the next item while we were + * not keeping the mc_lock. Restart from the beginning + * of the list */ + goto restart; + else + { + STARPU_ASSERT(next_mc->remove_notify == &next_mc); + next_mc->remove_notify = NULL; + } + } + } + _starpu_spin_unlock(&node_struct->mc_lock); + + return success; +} + +/* + * Free the memory chunks that are explicitly tagged to be freed. + */ +static size_t flush_memchunk_cache(unsigned node, size_t reclaim) +{ + struct _starpu_mem_chunk *mc; + struct mc_cache_entry *entry=NULL, *tmp=NULL; + struct _starpu_node *node_struct = _starpu_get_node_struct(node); + + size_t freed = 0; + +restart: + _starpu_spin_lock(&node_struct->mc_lock); + HASH_ITER(hh, node_struct->mc_cache, entry, tmp) + { + if (!_starpu_mem_chunk_list_empty(&entry->list)) + { + mc = _starpu_mem_chunk_list_pop_front(&entry->list); + STARPU_ASSERT(!mc->data); + STARPU_ASSERT(!mc->replicate); + + node_struct->mc_cache_nb--; + STARPU_ASSERT(node_struct->mc_cache_nb >= 0); + node_struct->mc_cache_size -= mc->size; + STARPU_ASSERT(node_struct->mc_cache_size >= 0); + _starpu_spin_unlock(&node_struct->mc_lock); + + freed += free_memory_on_node(mc, node); + + free(mc->chunk_interface); + _starpu_mem_chunk_delete(mc); + + if (reclaim && freed >= reclaim) + goto out; + goto restart; + } + + if (reclaim && freed >= reclaim) + break; + } + _starpu_spin_unlock(&node_struct->mc_lock); +out: + return freed; +} + +/* + * Try to free the buffers currently in use on the memory node. If the force + * flag is set, the memory is freed regardless of coherency concerns (this + * should only be used at the termination of StarPU for instance). + */ +static size_t free_potentially_in_use_mc(unsigned node, unsigned force, size_t reclaim, enum starpu_is_prefetch is_prefetch STARPU_ATTRIBUTE_UNUSED) +{ + size_t freed = 0; + struct _starpu_node *node_struct = _starpu_get_node_struct(node); + + struct _starpu_mem_chunk *mc, *next_mc; + + /* + * We have to unlock mc_lock before locking header_lock, so we have + * to be careful with the list. We try to do just one pass, by + * remembering the next mc to be tried. If it gets dropped, we restart + * from zero. So we continue until we go through the whole list without + * finding anything to free. + */ + +restart: + _starpu_spin_lock(&node_struct->mc_lock); + +restart2: + for (mc = _starpu_mem_chunk_list_begin(&node_struct->mc_list); + mc != _starpu_mem_chunk_list_end(&node_struct->mc_list) && (!reclaim || freed < reclaim); + mc = next_mc) + { + /* mc hopefully gets out of the list, we thus need to prefetch + * the next element */ + next_mc = _starpu_mem_chunk_list_next(mc); + + if (!force) + { + struct _starpu_mem_chunk *orig_next_mc = next_mc; + if (mc->remove_notify) + /* Somebody already working here, skip */ + continue; + if (next_mc) + { + if (next_mc->remove_notify) + /* Somebody already working here, skip */ + continue; + next_mc->remove_notify = &next_mc; + } + /* Note: this may unlock mc_list! */ + freed += try_to_throw_mem_chunk(mc, node, NULL, 0, is_prefetch); + + if (orig_next_mc) + { + if (!next_mc) + /* Oops, somebody dropped the next item while we were + * not keeping the mc_lock. Restart from the beginning + * of the list */ + goto restart2; + else + { + STARPU_ASSERT(next_mc->remove_notify == &next_mc); + next_mc->remove_notify = NULL; + } + } + } + else + { + /* Shutting down, really free */ + starpu_data_handle_t handle = mc->data; + + if (_starpu_spin_trylock(&handle->header_lock)) + { + /* Ergl. We are shutting down, but somebody is + * still locking the handle. That's not + * supposed to happen, but better be safe by + * letting it go through. */ + _starpu_spin_unlock(&node_struct->mc_lock); + goto restart; + } + + /* We must free the memory now, because we are + * terminating the drivers: note that data coherency is + * not maintained in that case ! */ + freed += do_free_mem_chunk(mc, node); + + _starpu_spin_unlock(&handle->header_lock); + } + } + _starpu_spin_unlock(&node_struct->mc_lock); + + return freed; +} + +size_t _starpu_memory_reclaim_generic(unsigned node, unsigned force, size_t reclaim, enum starpu_is_prefetch is_prefetch) +{ + size_t freed = 0; + + STARPU_ASSERT(node < STARPU_MAXNODES); + if (reclaim && !force) + { + static unsigned warned; + STARPU_HG_DISABLE_CHECKING(warned); + if (!warned) + { + if (STARPU_ATOMIC_ADD(&warned, 1) == 1) + { + char name[32]; + starpu_memory_node_get_name(node, name, sizeof(name)); + _STARPU_DISP("Not enough memory left on node %s. Your application data set seems too huge to fit on the device, StarPU will cope by trying to purge %lu MiB out. This message will not be printed again for further purges. You may want to tune the STARPU_MINIMUM_CLEAN_BUFFERS and STARPU_TARGET_CLEAN_BUFFERS environment variables up a bit to make StarPU maintain more clean memory available, to avoid ending up in this situation.\n", name, (unsigned long) ((reclaim+1048575) / 1048576)); + } + } + } + + /* remove all buffers for which there was a removal request */ + freed += flush_memchunk_cache(node, reclaim); + + /* try to free all allocated data potentially in use */ + if (force || (reclaim && freedper_worker) + replicate = &handle->per_worker[node]; + else + replicate = &handle->per_node[node]; + + _starpu_spin_lock(&handle->header_lock); + + struct _starpu_mem_chunk *mc = replicate->mc; + int ret = -1; + + if (!mc) + { + _starpu_spin_unlock(&handle->header_lock); + /* Nothing there */ + goto out; + } + + _starpu_spin_lock(&_starpu_get_node_struct(node)->mc_lock); + /* Now we got the mc, we can unlock the header to let + * try_to_throw_mem_chunk reacquire it */ + _starpu_spin_unlock(&handle->header_lock); + if (mc->remove_notify) + /* Somebody already working here */ + goto out_mc; + if (try_to_throw_mem_chunk(mc, node, NULL, 0, STARPU_FETCH) == 0) + goto out_mc; + ret = 0; +out_mc: + _starpu_spin_unlock(&_starpu_get_node_struct(node)->mc_lock); +out: + return ret; +} + +/* Periodic tidy of available memory */ +void starpu_memchunk_tidy(unsigned node) +{ + starpu_ssize_t total; + starpu_ssize_t available; + size_t target, amount; + struct _starpu_node *node_struct = _starpu_get_node_struct(node); + + STARPU_ASSERT(node < STARPU_MAXNODES); + if (!can_evict(node)) + return; + + if (node_struct->mc_clean_nb < (node_struct->mc_nb * minimum_clean_p) / 100) + { + struct _starpu_mem_chunk *mc, *orig_next_mc, *next_mc; + int skipped = 0; /* Whether we skipped a dirty MC, and we should thus stop updating mc_dirty_head. */ + + /* _STARPU_DEBUG("%d not clean: %d %d\n", node, node_struct->mc_clean_nb, node_struct->mc_nb); */ + + _STARPU_TRACE_START_WRITEBACK_ASYNC(node); + _starpu_spin_lock(&node_struct->mc_lock); + + for (mc = node_struct->mc_dirty_head; + mc && node_struct->mc_clean_nb < (node_struct->mc_nb * target_clean_p) / 100; + mc = next_mc, mc && skipped ? 0 : (node_struct->mc_dirty_head = mc)) + { + starpu_data_handle_t handle; + + /* mc may get out of the list, we thus need to prefetch + * the next element */ + next_mc = _starpu_mem_chunk_list_next(mc); + + if (mc->home) + /* Home node, it's always clean */ + continue; + if (mc->clean) + /* already clean */ + continue; + if (next_mc && next_mc->remove_notify) + { + /* Somebody already working here, skip */ + skipped = 1; + continue; + } + + handle = mc->data; + STARPU_ASSERT(handle); + + /* This data cannot be pushed outside CPU memory */ + if (!handle->ooc && handle->home_node == -1 + && starpu_node_get_kind(node) == STARPU_CPU_RAM + && starpu_memory_nodes_get_numa_count() == 1) + continue; + + if (_starpu_spin_trylock(&handle->header_lock)) + { + /* the handle is busy, abort */ + skipped = 1; + continue; + } + + if (handle->current_mode == STARPU_W) + { + if (handle->write_invalidation_req) + { + /* Some request is invalidating it anyway */ + _starpu_spin_unlock(&handle->header_lock); + continue; + } + + unsigned n; + for (n = 0; n < STARPU_MAXNODES; n++) + if (_starpu_get_data_refcnt(handle, n)) + break; + if (n < STARPU_MAXNODES) + { + /* Some task is writing to the handle somewhere */ + _starpu_spin_unlock(&handle->header_lock); + skipped = 1; + continue; + } + } + + if ( + /* This data should be written through to this node, avoid + * dropping it! */ + (node < sizeof(handle->wt_mask) * 8 && handle->wt_mask & (1<nchildren + /* REDUX, can't do anything with it, skip it */ + || mc->relaxed_coherency == 2 + ) + { + _starpu_spin_unlock(&handle->header_lock); + continue; + } + + if (handle->home_node != -1 && + (handle->per_node[handle->home_node].state != STARPU_INVALID + || mc->relaxed_coherency == 1)) + { + /* It's available in the home node, this should have been marked as clean already */ + mc->clean = 1; + node_struct->mc_clean_nb++; + _starpu_spin_unlock(&handle->header_lock); + continue; + } + + int target_node; + if (handle->home_node == -1) + target_node = choose_target(handle, node); + else + target_node = handle->home_node; + + if (target_node == -1) + { + /* Nowhere to put it, can't do much */ + _starpu_spin_unlock(&handle->header_lock); + continue; + } + + STARPU_ASSERT(target_node != (int) node); + + /* MC is dirty and nobody working on it, submit writeback */ + + /* MC will be clean, consider it as such */ + mc->clean = 1; + node_struct->mc_clean_nb++; + + orig_next_mc = next_mc; + if (next_mc) + { + STARPU_ASSERT(!next_mc->remove_notify); + next_mc->remove_notify = &next_mc; + } + + _starpu_spin_unlock(&node_struct->mc_lock); + if (!_starpu_create_request_to_fetch_data(handle, &handle->per_node[target_node], STARPU_R, NULL, STARPU_IDLEFETCH, 1, NULL, NULL, 0, "starpu_memchunk_tidy")) + { + /* No request was actually needed?? + * Odd, but cope with it. */ + handle = NULL; + } + _starpu_spin_lock(&node_struct->mc_lock); + + if (orig_next_mc) + { + if (!next_mc) + /* Oops, somebody dropped the next item while we were + * not keeping the mc_lock. Give up for now, and we'll + * see the rest later */ + ; + else + { + STARPU_ASSERT(next_mc->remove_notify == &next_mc); + next_mc->remove_notify = NULL; + } + } + + if (handle) + _starpu_spin_unlock(&handle->header_lock); + } + _starpu_spin_unlock(&node_struct->mc_lock); + _STARPU_TRACE_END_WRITEBACK_ASYNC(node); + } + + total = starpu_memory_get_total(node); + + if (total <= 0) + return; + + available = starpu_memory_get_available(node); + /* Count cached allocation as being available */ + available += node_struct->mc_cache_size; + + if (available >= (starpu_ssize_t) (total * minimum_p) / 100) + /* Enough available space, do not trigger reclaiming */ + return; + + /* Not enough available space, reclaim until we reach the target. */ + target = (total * target_p) / 100; + amount = target - available; + + if (!STARPU_RUNNING_ON_VALGRIND && node_struct->tidying) + /* Some thread is already tidying this node, let it do it */ + return; + + if (STARPU_ATOMIC_ADD(&node_struct->tidying, 1) > 1) + /* Some thread got it before us, let it do it */ + goto out; + + static unsigned warned; + STARPU_HG_DISABLE_CHECKING(warned); + if (!warned) + { + if (STARPU_ATOMIC_ADD(&warned, 1) == 1) + { + char name[32]; + starpu_memory_node_get_name(node, name, sizeof(name)); + _STARPU_DISP("Low memory left on node %s (%ldMiB over %luMiB). Your application data set seems too huge to fit on the device, StarPU will cope by trying to purge %lu MiB out. This message will not be printed again for further purges. The thresholds can be tuned using the STARPU_MINIMUM_AVAILABLE_MEM and STARPU_TARGET_AVAILABLE_MEM environment variables.\n", name, (long) (available / 1048576), (unsigned long) (total / 1048576), (unsigned long) ((amount+1048575) / 1048576)); + } + } + + _STARPU_TRACE_START_MEMRECLAIM(node,2); + free_potentially_in_use_mc(node, 0, amount, STARPU_PREFETCH); + _STARPU_TRACE_END_MEMRECLAIM(node,2); +out: + (void) STARPU_ATOMIC_ADD(&node_struct->tidying, -1); +} + +static struct _starpu_mem_chunk *_starpu_memchunk_init(struct _starpu_data_replicate *replicate, size_t interface_size, unsigned home, unsigned automatically_allocated) +{ + struct _starpu_mem_chunk *mc = _starpu_mem_chunk_new(); + starpu_data_handle_t handle = replicate->handle; + + STARPU_ASSERT(handle); + STARPU_ASSERT(handle->ops); + + mc->data = handle; + mc->footprint = _starpu_compute_data_alloc_footprint(handle); + mc->ops = handle->ops; + mc->automatically_allocated = automatically_allocated; + mc->relaxed_coherency = replicate->relaxed_coherency; + mc->home = home; + mc->clean = 1; + mc->replicate = replicate; + mc->replicate->mc = mc; + mc->chunk_interface = NULL; + mc->size_interface = interface_size; + mc->remove_notify = NULL; + mc->wontuse = 0; + + return mc; +} + +static void register_mem_chunk(starpu_data_handle_t handle, struct _starpu_data_replicate *replicate, unsigned automatically_allocated) +{ + unsigned dst_node = replicate->memory_node; + struct _starpu_node *node_struct = _starpu_get_node_struct(dst_node); + + struct _starpu_mem_chunk *mc; + + /* the interface was already filled by ops->allocate_data_on_node */ + size_t interface_size = replicate->handle->ops->interface_size; + + /* Put this memchunk in the list of memchunk in use */ + mc = _starpu_memchunk_init(replicate, interface_size, (int) dst_node == handle->home_node, automatically_allocated); + + _starpu_spin_lock(&node_struct->mc_lock); + MC_LIST_PUSH_BACK(node_struct, mc); + _starpu_spin_unlock(&node_struct->mc_lock); +} + +/* This function is called when the handle is destroyed (eg. when calling + * unregister or unpartition). It puts all the memchunks that refer to the + * specified handle into the cache. + */ +void _starpu_request_mem_chunk_removal(starpu_data_handle_t handle, struct _starpu_data_replicate *replicate, unsigned node, size_t size) +{ + STARPU_ASSERT(replicate->mapped == STARPU_UNMAPPED); + struct _starpu_mem_chunk *mc = replicate->mc; + struct _starpu_node *node_struct = _starpu_get_node_struct(node); + + STARPU_ASSERT(mc->data == handle); + _starpu_spin_checklocked(&handle->header_lock); + STARPU_ASSERT(node < STARPU_MAXNODES); + + /* Record the allocated size, so that later in memory + * reclaiming we can estimate how much memory we free + * by freeing this. */ + mc->size = size; + + /* This memchunk doesn't have to do with the data any more. */ + replicate->mc = NULL; + mc->replicate = NULL; + replicate->allocated = 0; + replicate->automatically_allocated = 0; + replicate->initialized = 0; + + _starpu_spin_lock(&node_struct->mc_lock); + + mc->data = NULL; + /* remove it from the main list */ + MC_LIST_ERASE(node_struct, mc); + + _starpu_spin_unlock(&node_struct->mc_lock); + + /* + * Unless we have a memory limitation, we would fill + * memory with cached data and then eventually swap. + */ + /* + * This is particularly important when + * STARPU_USE_ALLOCATION_CACHE is not enabled, as we + * wouldn't even ever re-use these allocations! + */ + if (handle->ops->dontcache + || (starpu_node_get_kind(node) == STARPU_CPU_RAM && + !_starpu_malloc_willpin_on_node(node)) +#ifndef STARPU_USE_ALLOCATION_CACHE + || !_starpu_memory_manager_get_global_memory_size(node) +#endif + ) + { + /* Free data immediately */ + mc->chunk_interface = replicate->data_interface; + free_memory_on_node(mc, node); + + _starpu_mem_chunk_delete(mc); + } + else + { + /* Keep the interface parameters and pointers, for later reuse + * while detached, or freed */ + _STARPU_MALLOC(mc->chunk_interface, mc->size_interface); + if (mc->ops->cache_data_on_node) + mc->ops->cache_data_on_node(mc->chunk_interface, replicate->data_interface, node); + else + memcpy(mc->chunk_interface, replicate->data_interface, mc->size_interface); + + /* put it in the list of buffers to be removed */ + uint32_t footprint = mc->footprint; + struct mc_cache_entry *entry; + _starpu_spin_lock(&node_struct->mc_lock); + HASH_FIND(hh, node_struct->mc_cache, &footprint, sizeof(footprint), entry); + if (!entry) + { + _STARPU_MALLOC(entry, sizeof(*entry)); + _starpu_mem_chunk_list_init(&entry->list); + entry->footprint = footprint; + HASH_ADD(hh, node_struct->mc_cache, footprint, sizeof(entry->footprint), entry); + } + node_struct->mc_cache_nb++; + node_struct->mc_cache_size += mc->size; + _starpu_mem_chunk_list_push_front(&entry->list, mc); + _starpu_spin_unlock(&node_struct->mc_lock); + } +} + +/* + * In order to allocate a piece of data, we try to reuse existing buffers if + * its possible. + * 1 - we try to reuse a memchunk that is explicitly unused. + * 2 - we go through the list of memory chunks and find one that is not + * referenced and that has the same footprint to reuse it. + * 3 - we call the usual driver's alloc method + * 4 - we go through the list of memory chunks and release those that are + * not referenced (or part of those). + * + */ + +static starpu_ssize_t _starpu_allocate_interface(starpu_data_handle_t handle, struct _starpu_data_replicate *replicate, unsigned dst_node, enum starpu_is_prefetch is_prefetch, int only_fast_alloc) +{ + unsigned attempts = 0; + starpu_ssize_t allocated_memory; + int ret; + starpu_ssize_t data_size = _starpu_data_get_alloc_size(handle); + int told_reclaiming = 0; + int reused = 0; + struct _starpu_node *node_struct = _starpu_get_node_struct(dst_node); + + _starpu_spin_checklocked(&handle->header_lock); + + _starpu_data_allocation_inc_stats(dst_node); + + /* perhaps we can directly reuse a buffer in the free-list */ + uint32_t footprint = _starpu_compute_data_alloc_footprint(handle); + + int prefetch_oom = is_prefetch && node_struct->prefetch_out_of_memory; + +#ifdef STARPU_USE_ALLOCATION_CACHE + if (!prefetch_oom) + _STARPU_TRACE_START_ALLOC_REUSE(dst_node, data_size, handle, is_prefetch); + if (try_to_find_reusable_mc(dst_node, handle, replicate, footprint)) + { + _starpu_allocation_cache_hit(dst_node); + if (!prefetch_oom) + _STARPU_TRACE_END_ALLOC_REUSE(dst_node, handle, 1); + return data_size; + } + if (!prefetch_oom) + _STARPU_TRACE_END_ALLOC_REUSE(dst_node, handle, 0); +#endif + + /* If this is RAM and pinned this will be slow + In case we only want fast allocations return here */ + if (only_fast_alloc && (starpu_node_get_kind(dst_node) != STARPU_CPU_RAM || _starpu_malloc_willpin_on_node(dst_node))) + return -ENOMEM; + + STARPU_ASSERT(handle->ops); + STARPU_ASSERT(handle->ops->allocate_data_on_node); + STARPU_ASSERT(replicate->data_interface); + + size_t size = handle->ops->interface_size; + if (!size) + /* nul-size VLA is undefined... */ + size = 1; + char data_interface[size]; + + memcpy(data_interface, replicate->data_interface, handle->ops->interface_size); + + /* Take temporary reference on the replicate */ + replicate->refcnt++; + handle->busy_count++; + _starpu_spin_unlock(&handle->header_lock); + + do + { + if (!prefetch_oom) + _STARPU_TRACE_START_ALLOC(dst_node, data_size, handle, is_prefetch); + + allocated_memory = handle->ops->allocate_data_on_node(data_interface, dst_node); + if (!prefetch_oom) + _STARPU_TRACE_END_ALLOC(dst_node, handle, allocated_memory); + + if (allocated_memory == -ENOMEM) + { + size_t handle_size = _starpu_data_get_alloc_size(handle); + size_t reclaim = starpu_memstrategy_data_size_coefficient*handle_size; + + /* First try to flush data explicitly marked for freeing */ + size_t freed = flush_memchunk_cache(dst_node, reclaim); + + if (freed >= reclaim) + { + /* That freed enough data, retry allocating */ + node_struct->prefetch_out_of_memory = 0; + continue; + } + reclaim -= freed; + + if (is_prefetch >= STARPU_IDLEFETCH) + { + /* It's just idle fetch, don't bother existing allocations */ + /* And don't bother tracing allocation attempts */ + node_struct->prefetch_out_of_memory = 1; + /* TODO: ideally we should not even try to allocate when we know we have not freed anything */ + continue; + } + + /* Try to reuse an allocated data with the same interface (to avoid spurious free/alloc) */ + if (_starpu_has_not_important_data && try_to_reuse_not_important_mc(dst_node, handle, replicate, footprint, is_prefetch)) + break; + + if (try_to_reuse_potentially_in_use_mc(dst_node, handle, replicate, footprint, is_prefetch)) + { + reused = 1; + allocated_memory = data_size; + break; + } + + if (!told_reclaiming) + { + /* Prevent prefetches and such from happening */ + (void) STARPU_ATOMIC_ADD(&node_struct->reclaiming, 1); + told_reclaiming = 1; + } + /* That was not enough, we have to really reclaim */ + _STARPU_TRACE_START_MEMRECLAIM(dst_node,is_prefetch); + freed = _starpu_memory_reclaim_generic(dst_node, 0, reclaim, is_prefetch); + _STARPU_TRACE_END_MEMRECLAIM(dst_node,is_prefetch); + + if (!freed && is_prefetch >= STARPU_FETCH) + { + /* It's just prefetch, don't bother tracing allocation attempts */ + node_struct->prefetch_out_of_memory = 1; + /* TODO: ideally we should not even try to allocate when we know we have not freed anything */ + continue; + } + + node_struct->prefetch_out_of_memory = 0; + } + else + node_struct->prefetch_out_of_memory = 0; + } + while((allocated_memory == -ENOMEM) && attempts++ < 2); + + int cpt = 0; + while (cpt < STARPU_SPIN_MAXTRY && _starpu_spin_trylock(&handle->header_lock)) + { + cpt++; + _starpu_datawizard_progress(_STARPU_DATAWIZARD_DO_NOT_ALLOC); + } + if (cpt == STARPU_SPIN_MAXTRY) + _starpu_spin_lock(&handle->header_lock); + + replicate->refcnt--; + STARPU_ASSERT(replicate->refcnt >= 0); + STARPU_ASSERT(handle->busy_count > 0); + handle->busy_count--; + ret = _starpu_data_check_not_busy(handle); + STARPU_ASSERT(ret == 0); + + if (told_reclaiming) + /* We've finished with reclaiming memory, let prefetches start again */ + (void) STARPU_ATOMIC_ADD(&node_struct->reclaiming, -1); + + if (allocated_memory == -ENOMEM) + { + if (replicate->allocated) + /* Didn't manage to allocate, but somebody else did */ + allocated_memory = 0; + goto out; + } + + if (reused) + { + /* We just reused an allocation, nothing more to do */ + } + else if (replicate->allocated) + { + /* Argl, somebody allocated it in between already, drop this one */ + _STARPU_TRACE_START_FREE(dst_node, data_size, handle); + handle->ops->free_data_on_node(data_interface, dst_node); + _STARPU_TRACE_END_FREE(dst_node, handle); + allocated_memory = 0; + } + else + /* Install newly-allocated interface */ + memcpy(replicate->data_interface, data_interface, handle->ops->interface_size); + +out: + return allocated_memory; +} + +int _starpu_allocate_memory_on_node(starpu_data_handle_t handle, struct _starpu_data_replicate *replicate, enum starpu_is_prefetch is_prefetch, int only_fast_alloc) +{ + starpu_ssize_t allocated_memory; + + unsigned dst_node = replicate->memory_node; + STARPU_ASSERT(dst_node < STARPU_MAXNODES); + + STARPU_ASSERT(handle); + _starpu_spin_checklocked(&handle->header_lock); + + /* A buffer is already allocated on the node */ + if (replicate->allocated) + return 0; + + STARPU_ASSERT(replicate->mapped == STARPU_UNMAPPED); + + STARPU_ASSERT(replicate->data_interface); + allocated_memory = _starpu_allocate_interface(handle, replicate, dst_node, is_prefetch, only_fast_alloc); + + /* perhaps we could really not handle that capacity misses */ + if (allocated_memory == -ENOMEM) + return -ENOMEM; + + if (replicate->allocated) + /* Somebody allocated it in between already */ + return 0; + + register_mem_chunk(handle, replicate, 1); + + replicate->allocated = 1; + replicate->automatically_allocated = 1; + + return 0; +} + +unsigned starpu_data_test_if_allocated_on_node(starpu_data_handle_t handle, unsigned memory_node) +{ + return handle->per_node[memory_node].allocated || handle->per_node[memory_node].mapped != STARPU_UNMAPPED; +} + +unsigned starpu_data_test_if_mapped_on_node(starpu_data_handle_t handle, unsigned memory_node) +{ + STARPU_ASSERT(memory_node < STARPU_MAXNODES); + return handle->per_node[memory_node].allocated; +} + +/* This memchunk has been recently used, put it last on the mc_list, so we will + * try to evict it as late as possible */ +void _starpu_memchunk_recently_used(struct _starpu_mem_chunk *mc, unsigned node) +{ + if (!mc) + /* user-allocated memory */ + return; + STARPU_ASSERT(node < STARPU_MAXNODES); + if (!can_evict(node)) + /* Don't bother */ + return; + struct _starpu_node *node_struct = _starpu_get_node_struct(node); + _starpu_spin_lock(&node_struct->mc_lock); + MC_LIST_ERASE(node_struct, mc); + mc->wontuse = 0; + MC_LIST_PUSH_BACK(node_struct, mc); + _starpu_spin_unlock(&node_struct->mc_lock); +} + +/* This memchunk will not be used in the close future, put it on the clean + * list, so we will to evict it first */ +void _starpu_memchunk_wont_use(struct _starpu_mem_chunk *mc, unsigned node) +{ + if (!mc) + /* user-allocated memory */ + return; + STARPU_ASSERT(node < STARPU_MAXNODES); + if (!can_evict(node)) + /* Don't bother */ + return; + struct _starpu_node *node_struct = _starpu_get_node_struct(node); + _starpu_spin_lock(&node_struct->mc_lock); + mc->wontuse = 1; + if (mc->data && mc->data->home_node != -1) + { + MC_LIST_ERASE(node_struct, mc); + /* Caller will schedule a clean transfer */ + mc->clean = 1; + MC_LIST_PUSH_CLEAN(node_struct, mc); + } + /* TODO: else push to head of data to be evicted */ + _starpu_spin_unlock(&node_struct->mc_lock); +} + +/* This memchunk content was dropped, and thus becomes clean */ +void _starpu_memchunk_clean(struct _starpu_mem_chunk *mc, unsigned node) +{ + if (!mc) + /* user-allocated memory */ + return; + if (mc->home) + /* Home is always clean */ + return; + STARPU_ASSERT(node < STARPU_MAXNODES); + if (!can_evict(node)) + /* Don't bother */ + return; + struct _starpu_node *node_struct = _starpu_get_node_struct(node); + _starpu_spin_lock(&node_struct->mc_lock); + if (!mc->clean) + { + node_struct->mc_clean_nb++; + mc->clean = 1; + } + _starpu_spin_unlock(&node_struct->mc_lock); +} + +/* This memchunk is being written to, and thus becomes dirty */ +void _starpu_memchunk_dirty(struct _starpu_mem_chunk *mc, unsigned node) +{ + if (!mc) + /* user-allocated memory */ + return; + if (mc->home) + /* Home is always clean */ + return; + STARPU_ASSERT(node < STARPU_MAXNODES); + if (!can_evict(node)) + /* Don't bother */ + return; + struct _starpu_node *node_struct = _starpu_get_node_struct(node); + _starpu_spin_lock(&node_struct->mc_lock); + if (mc->relaxed_coherency == 1) + { + /* SCRATCH, make it clean if not already*/ + if (!mc->clean) + { + node_struct->mc_clean_nb++; + mc->clean = 1; + } + } + else + { + if (mc->clean) + { + node_struct->mc_clean_nb--; + mc->clean = 0; + } + } + _starpu_spin_unlock(&node_struct->mc_lock); +} + +#ifdef STARPU_MEMORY_STATS +void _starpu_memory_display_stats_by_node(FILE *stream, int node) +{ + struct _starpu_node *node_struct = _starpu_get_node_struct(node); + _starpu_spin_lock(&node_struct->mc_lock); + + if (!_starpu_mem_chunk_list_empty(&node_struct->mc_list)) + { + struct _starpu_mem_chunk *mc; + + fprintf(stream, "#-------\n"); + fprintf(stream, "Data on Node #%d\n",node); + + for (mc = _starpu_mem_chunk_list_begin(&node_struct->mc_list); + mc != _starpu_mem_chunk_list_end(&node_struct->mc_list); + mc = _starpu_mem_chunk_list_next(mc)) + { + _starpu_memory_display_handle_stats(stream, mc->data); + } + + } + + _starpu_spin_unlock(&node_struct->mc_lock); +} + +void _starpu_data_display_memory_stats(FILE *stream) +{ + unsigned node; + + fprintf(stream, "\n#---------------------\n"); + fprintf(stream, "Memory stats :\n"); + for (node = 0; node < STARPU_MAXNODES; node++) + { + _starpu_memory_display_stats_by_node(stream, node); + } + fprintf(stream, "\n#---------------------\n"); +} +#endif + +void starpu_data_display_memory_stats(void) +{ +#ifdef STARPU_MEMORY_STATS + _starpu_data_display_memory_stats(stderr); +#endif +} + +static int +get_better_disk_can_accept_size(starpu_data_handle_t handle, unsigned node) +{ + int target = -1; + unsigned nnodes = starpu_memory_nodes_get_count(); + unsigned int i; + double time_disk = 0.0; + + for (i = 0; i < nnodes; i++) + { + if (starpu_node_get_kind(i) == STARPU_DISK_RAM && i != node && + (handle->per_node[i].allocated || + _starpu_memory_manager_test_allocate_size(i, _starpu_data_get_alloc_size(handle)) == 1)) + { + /* if we can write on the disk */ + if ((_starpu_get_disk_flag(i) & STARPU_DISK_NO_RECLAIM) == 0) + { + unsigned numa; + unsigned nnumas = starpu_memory_nodes_get_numa_count(); + for (numa = 0; numa < nnumas; numa++) + { + /* TODO : check if starpu_transfer_predict(node, i,...) is the same */ + double time_tmp = starpu_transfer_predict(node, numa, _starpu_data_get_alloc_size(handle)) + starpu_transfer_predict(i, numa, _starpu_data_get_alloc_size(handle)); + if (target == -1 || time_disk > time_tmp) + { + target = i; + time_disk = time_tmp; + } + } + } + } + } + return target; +} + +#ifdef STARPU_DEVEL +# warning TODO: better choose NUMA node +#endif + +/* Choose a target memory node to put the value of the handle, because the current location (node) is getting tight */ +static int +choose_target(starpu_data_handle_t handle, unsigned node) +{ + int target = -1; + size_t size_handle = _starpu_data_get_alloc_size(handle); + if (handle->home_node != -1) + /* try to push on RAM if we can before to push on disk */ + if(starpu_node_get_kind(handle->home_node) == STARPU_DISK_RAM && (starpu_node_get_kind(node) != STARPU_CPU_RAM)) + { + unsigned i; + unsigned nb_numa_nodes = starpu_memory_nodes_get_numa_count(); + for (i=0; iper_node[i].allocated || + (starpu_ssize_t) size_handle < node_struct->mc_cache_size || + _starpu_memory_manager_test_allocate_size(i, size_handle - node_struct->mc_cache_size) == 1) + { + target = i; + break; + } + } + if (target == -1) + { + target = get_better_disk_can_accept_size(handle, node); + } + + } + /* others memory nodes */ + else + { + target = handle->home_node; + } + else + { + /* handle->home_node == -1 */ + /* no place for data in RAM, we push on disk */ + if (starpu_node_get_kind(node) == STARPU_CPU_RAM) + { + target = get_better_disk_can_accept_size(handle, node); + } else { + /* node != 0 */ + /* try to push data to RAM if we can before to push on disk*/ + unsigned i; + unsigned nb_numa_nodes = starpu_memory_nodes_get_numa_count(); + for (i=0; iper_node[i].allocated || + (starpu_ssize_t) size_handle < node_struct->mc_cache_size || + _starpu_memory_manager_test_allocate_size(i, size_handle - node_struct->mc_cache_size) == 1) + { + target = i; + break; + } + } + /* no place in RAM */ + if (target == -1) + { + target = get_better_disk_can_accept_size(handle, node); + } + } + } + /* we haven't the right to write on the disk */ + if (target != -1 && starpu_node_get_kind(target) == STARPU_DISK_RAM && (_starpu_get_disk_flag(target) & STARPU_DISK_NO_RECLAIM)) + target = -1; + + return target; +} + +void starpu_data_set_user_data(starpu_data_handle_t handle, void* user_data) +{ + handle->user_data = user_data; +} + +void *starpu_data_get_user_data(starpu_data_handle_t handle) +{ + return handle->user_data; +} + +void starpu_data_set_sched_data(starpu_data_handle_t handle, void* sched_data) +{ + handle->sched_data = sched_data; +} + +void *starpu_data_get_sched_data(starpu_data_handle_t handle) +{ + return handle->sched_data; +} diff --git a/src/datawizard/memalloc.h b/src/datawizard/memalloc.h new file mode 100644 index 0000000..6fbac33 --- /dev/null +++ b/src/datawizard/memalloc.h @@ -0,0 +1,103 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2021-2021 Federal University of Rio Grande do Sul (UFRGS) + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __MEMALLOC_H__ +#define __MEMALLOC_H__ + +/** @file */ + +#include +#include + +#include +#include +#include +#include +#include + +#pragma GCC visibility push(hidden) + +struct _starpu_data_replicate; + +/** While associated with a handle, the content is protected by the handle lock, except a few fields + */ +LIST_TYPE(_starpu_mem_chunk, + /** protected by the mc_lock */ + starpu_data_handle_t data; + + uint32_t footprint; + + /* + * When re-using a memchunk, the footprint of the data is not + * sufficient to determine whether two pieces of data have the same + * layout (there could be collision in the hash function ...) so we + * still keep a copy of the actual layout (ie. the data interface) to + * stay on the safe side while the memchunk is detached from an actual + * data. + */ + struct starpu_data_interface_ops *ops; + void *chunk_interface; + size_t size_interface; + + /** Whether StarPU automatically allocated this memory, or the application did */ + unsigned automatically_allocated:1; + /** A buffer that is used for SCRATCH or reduction cannot be used with + * filters. */ + unsigned relaxed_coherency:2; + /** Whether this is the home chunk, or there is no home chunk (and it is thus always clean) */ + unsigned home:1; + /** Whether the memchunk is in the clean part of the mc_list */ + unsigned clean:1; + /** Was this chunk marked as "won't use"? */ + unsigned wontuse:1; + + /** the size of the data is only set when calling _starpu_request_mem_chunk_removal(), + * it is needed to estimate how much memory is in mc_cache, and by + * free_memory_on_node() which is called when the handle is no longer + * valid. + * It should not be used otherwise. + */ + size_t size; + + struct _starpu_data_replicate *replicate; + + /** This is set when one keeps a pointer to this mc obtained from the + * mc_list without mc_lock held. We need to clear the pointer if we + * remove this entry from the mc_list, so we know we have to restart + * from zero. This is protected by the corresponding mc_lock. */ + struct _starpu_mem_chunk **remove_notify; +) + +void _starpu_init_mem_chunk_lists(void); +void _starpu_deinit_mem_chunk_lists(void); +void _starpu_mem_chunk_init_last(void); +void _starpu_request_mem_chunk_removal(starpu_data_handle_t handle, struct _starpu_data_replicate *replicate, unsigned node, size_t size); +int _starpu_allocate_memory_on_node(starpu_data_handle_t handle, struct _starpu_data_replicate *replicate, enum starpu_is_prefetch is_prefetch, int only_fast_alloc); +size_t _starpu_free_all_automatically_allocated_buffers(unsigned node); +void _starpu_memchunk_recently_used(struct _starpu_mem_chunk *mc, unsigned node); +void _starpu_memchunk_wont_use(struct _starpu_mem_chunk *m, unsigned nodec); +void _starpu_memchunk_clean(struct _starpu_mem_chunk *mc, unsigned node); +void _starpu_memchunk_dirty(struct _starpu_mem_chunk *mc, unsigned node); + +size_t _starpu_memory_reclaim_generic(unsigned node, unsigned force, size_t reclaim, enum starpu_is_prefetch is_prefetch); +int _starpu_is_reclaiming(unsigned node); + +void _starpu_mem_chunk_disk_register(unsigned disk_memnode); + +#pragma GCC visibility pop + +#endif diff --git a/src/datawizard/memory_manager.c b/src/datawizard/memory_manager.c new file mode 100644 index 0000000..699b0cc --- /dev/null +++ b/src/datawizard/memory_manager.c @@ -0,0 +1,237 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +int _starpu_memory_manager_init() +{ + int i; + + for(i=0 ; iglobal_size = 0; + node->used_size = 0; + /* This is accessed for statistics outside the lock, don't care + * about that */ + STARPU_HG_DISABLE_CHECKING(node->used_size); + STARPU_HG_DISABLE_CHECKING(node->global_size); + node->waiting_size = 0; + STARPU_PTHREAD_MUTEX_INIT(&node->lock_nodes, NULL); + STARPU_PTHREAD_COND_INIT(&node->cond_nodes, NULL); + } + return 0; +} + +void _starpu_memory_manager_set_global_memory_size(unsigned node, size_t size) +{ + struct _starpu_node *node_struct = _starpu_get_node_struct(node); + STARPU_PTHREAD_MUTEX_LOCK(&node_struct->lock_nodes); + if (!node_struct->global_size) + { + node_struct->global_size = size; + _STARPU_DEBUG("Global size for node %u is %ld\n", node, (long)node_struct->global_size); + } + else + { + STARPU_ASSERT(node_struct->global_size == size); + } + STARPU_PTHREAD_MUTEX_UNLOCK(&node_struct->lock_nodes); +} + +size_t _starpu_memory_manager_get_global_memory_size(unsigned node) +{ + return _starpu_get_node_struct(node)->global_size; +} + + +int starpu_memory_allocate(unsigned node, size_t size, int flags) +{ + struct _starpu_node *node_struct = _starpu_get_node_struct(node); + int ret; + + STARPU_PTHREAD_MUTEX_LOCK(&node_struct->lock_nodes); + if (flags & STARPU_MEMORY_WAIT) + { + struct _starpu_worker *worker = _starpu_get_local_worker_key(); + enum _starpu_worker_status old_status = STATUS_UNKNOWN; + + if (worker) + { + old_status = worker->status; + if (!(old_status & STATUS_WAITING)) + _starpu_add_worker_status(worker, STATUS_INDEX_WAITING, NULL); + } + + while (node_struct->used_size + size > node_struct->global_size) + { + /* Tell deallocators we need this amount */ + if (!node_struct->waiting_size || size < node_struct->waiting_size) + node_struct->waiting_size = size; + + /* Wait for it */ + STARPU_PTHREAD_COND_WAIT(&node_struct->cond_nodes, &node_struct->lock_nodes); + } + + if (worker) + { + if (!(old_status & STATUS_WAITING)) + _starpu_clear_worker_status(worker, STATUS_INDEX_WAITING, NULL); + } + + /* And take it */ + node_struct->used_size += size; + _STARPU_TRACE_USED_MEM(node, node_struct->used_size); + ret = 0; + } + else if (flags & STARPU_MEMORY_OVERFLOW + || node_struct->global_size == 0 + || node_struct->used_size + size <= node_struct->global_size) + { + node_struct->used_size += size; + _STARPU_TRACE_USED_MEM(node, node_struct->used_size); + ret = 0; + } + else + { + ret = -ENOMEM; + } + STARPU_PTHREAD_MUTEX_UNLOCK(&node_struct->lock_nodes); + return ret; +} + +void starpu_memory_deallocate(unsigned node, size_t size) +{ + struct _starpu_node *node_struct = _starpu_get_node_struct(node); + STARPU_PTHREAD_MUTEX_LOCK(&node_struct->lock_nodes); + + node_struct->used_size -= size; + _STARPU_TRACE_USED_MEM(node, node_struct->used_size); + + /* If there's now room for waiters, wake them */ + if (node_struct->waiting_size && + node_struct->global_size - node_struct->used_size >= node_struct->waiting_size) + { + /* And have those not happy enough tell us the size again */ + node_struct->waiting_size = 0; + STARPU_PTHREAD_COND_BROADCAST(&node_struct->cond_nodes); + } + + STARPU_PTHREAD_MUTEX_UNLOCK(&node_struct->lock_nodes); +} + +starpu_ssize_t starpu_memory_get_total(unsigned node) +{ + size_t size = _starpu_get_node_struct(node)->global_size; + if (size == 0) + return -1; + else + return size; +} + +starpu_ssize_t starpu_memory_get_total_all_nodes() +{ + unsigned memnodes, i; + memnodes = starpu_memory_nodes_get_count(); + starpu_ssize_t total = 0; + for(i=0 ; iglobal_size; + if (size == 0) + return -1; + + ret = size - _starpu_get_node_struct(node)->used_size; + return ret; +} + +starpu_ssize_t starpu_memory_get_available_all_nodes() +{ + unsigned memnodes, i; + memnodes = starpu_memory_nodes_get_count(); + starpu_ssize_t avail = 0; + for(i=0 ; iused_size; +} + +size_t starpu_memory_get_used_all_nodes() +{ + unsigned memnodes, i; + memnodes = starpu_memory_nodes_get_count(); + size_t used = 0; + for(i=0 ; ilock_nodes); + while (node_struct->used_size + size > node_struct->global_size) + { + /* Tell deallocators we need this amount */ + if (!node_struct->waiting_size || size < node_struct->waiting_size) + node_struct->waiting_size = size; + + /* Wait for it */ + STARPU_PTHREAD_COND_WAIT(&node_struct->cond_nodes, &node_struct->lock_nodes); + } + STARPU_PTHREAD_MUTEX_UNLOCK(&node_struct->lock_nodes); +} + +int _starpu_memory_manager_test_allocate_size(unsigned node, size_t size) +{ + struct _starpu_node *node_struct = _starpu_get_node_struct(node); + int ret; + + if (node_struct->global_size == 0) + ret = 1; + else if (node_struct->used_size + size <= node_struct->global_size) + ret = 1; + else + ret = 0; + return ret; +} diff --git a/src/datawizard/memory_manager.h b/src/datawizard/memory_manager.h new file mode 100644 index 0000000..67461f3 --- /dev/null +++ b/src/datawizard/memory_manager.h @@ -0,0 +1,56 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __MEMORY_MANAGER_H__ +#define __MEMORY_MANAGER_H__ + +/** @file */ + +#include + +#pragma GCC visibility push(hidden) + +#ifdef __cplusplus +extern "C" +{ +#endif + +/** + * Initialises the memory manager + */ +int _starpu_memory_manager_init(); + +/** + * Initialises the global memory size for the given node + * + */ +void _starpu_memory_manager_set_global_memory_size(unsigned node, size_t size); + +/** + * Gets the global memory size for the given node + * + */ +size_t _starpu_memory_manager_get_global_memory_size(unsigned node); + +int _starpu_memory_manager_test_allocate_size(unsigned node, size_t size); + +#ifdef __cplusplus +} +#endif + +#pragma GCC visibility pop + +#endif /* __MEMORY_MANAGER_H__ */ diff --git a/src/datawizard/memory_nodes.c b/src/datawizard/memory_nodes.c new file mode 100644 index 0000000..0f692fe --- /dev/null +++ b/src/datawizard/memory_nodes.c @@ -0,0 +1,233 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +char _starpu_worker_drives_memory[STARPU_NMAXWORKERS][STARPU_MAXNODES]; + +struct _starpu_memory_node_descr _starpu_descr; + +void _starpu_memory_nodes_init(void) +{ + /* there is no node yet, subsequent nodes will be + * added using _starpu_memory_node_register */ + _starpu_descr.nnodes = 0; + + unsigned i; + for (i = 0; i < STARPU_MAXNODES; i++) + { + _starpu_descr.nodes[i] = STARPU_UNUSED; + _starpu_descr.nworkers[i] = 0; + } + memset(&_starpu_worker_drives_memory, 0, sizeof(_starpu_worker_drives_memory)); + STARPU_HG_DISABLE_CHECKING(_starpu_worker_drives_memory); + + _starpu_init_mem_chunk_lists(); + _starpu_init_data_request_lists(); + _starpu_memory_manager_init(); + + STARPU_PTHREAD_RWLOCK_INIT(&_starpu_descr.conditions_rwlock, NULL); + _starpu_descr.total_condition_count = 0; +} + +void _starpu_memory_nodes_deinit(void) +{ + _starpu_deinit_data_request_lists(); + _starpu_deinit_mem_chunk_lists(); + + STARPU_PTHREAD_RWLOCK_DESTROY(&_starpu_descr.conditions_rwlock); +} + +#undef starpu_node_get_kind +enum starpu_node_kind starpu_node_get_kind(unsigned node) +{ + return _starpu_node_get_kind(node); +} + +#undef starpu_memory_nodes_get_count +unsigned starpu_memory_nodes_get_count(void) +{ + return _starpu_memory_nodes_get_count(); +} + +unsigned starpu_memory_nodes_get_count_by_kind(enum starpu_node_kind kind) +{ + unsigned nnodes = _starpu_memory_nodes_get_count(); + unsigned id, cnt = 0; + + for (id = 0; id < nnodes; id++) + if (_starpu_node_get_kind(id) == kind) + cnt++; + + return cnt; +} + +unsigned starpu_memory_node_get_ids_by_type(enum starpu_node_kind kind, unsigned *memory_nodes_ids, unsigned maxsize) +{ + unsigned nnodes = _starpu_memory_nodes_get_count(); + unsigned cnt = 0; + unsigned id; + + for (id = 0; id < nnodes; id++) + { + if (_starpu_node_get_kind(id) == kind) + { + /* Perhaps the array is too small ? */ + if (cnt >= maxsize) + return -ERANGE; + + memory_nodes_ids[cnt++] = id; + } + } + + return cnt; +} + +int starpu_memory_node_get_name(unsigned node, char *name, size_t size) +{ + const char *prefix = _starpu_node_get_prefix(_starpu_descr.nodes[node]); + return snprintf(name, size, "%s %d", prefix, _starpu_descr.devid[node]); +} + +unsigned _starpu_memory_node_register(enum starpu_node_kind kind, int devid) +{ + const struct _starpu_node_ops *node_ops = starpu_memory_driver_info[kind].ops; + unsigned node; + /* ATOMIC_ADD returns the new value ... */ + node = STARPU_ATOMIC_ADD(&_starpu_descr.nnodes, 1) - 1; + STARPU_ASSERT_MSG_ALWAYS(node < STARPU_MAXNODES,"Too many nodes (%u) for maximum %d. Use configure option --enable-maxnodes=xxx to update the maximum number of nodes.", node + 1, STARPU_MAXNODES); + + _starpu_descr.nodes[node] = kind; + _STARPU_TRACE_NEW_MEM_NODE(node); + + _starpu_descr.devid[node] = devid; + _starpu_descr.node_ops[node] = node_ops; + + /* for now, there is no condition associated to that newly created node */ + _starpu_descr.condition_count[node] = 0; + + _starpu_malloc_init(node); + + return node; +} + +/* TODO move in a more appropriate file !! */ +void _starpu_memory_node_register_condition(struct _starpu_worker *worker, starpu_pthread_cond_t *cond, unsigned nodeid) +{ + unsigned cond_id; + unsigned nconds_total, nconds; + + STARPU_PTHREAD_RWLOCK_WRLOCK(&_starpu_descr.conditions_rwlock); + + /* we only insert the queue if it's not already in the list */ + nconds = _starpu_descr.condition_count[nodeid]; + for (cond_id = 0; cond_id < nconds; cond_id++) + { + if (_starpu_descr.conditions_attached_to_node[nodeid][cond_id].cond == cond) + { + STARPU_ASSERT(_starpu_descr.conditions_attached_to_node[nodeid][cond_id].worker == worker); + + /* the condition is already in the list */ + STARPU_PTHREAD_RWLOCK_UNLOCK(&_starpu_descr.conditions_rwlock); + return; + } + } + + /* it was not found locally */ + _starpu_descr.conditions_attached_to_node[nodeid][cond_id].cond = cond; + _starpu_descr.conditions_attached_to_node[nodeid][cond_id].worker = worker; + _starpu_descr.condition_count[nodeid]++; + + /* do we have to add it in the global list as well ? */ + nconds_total = _starpu_descr.total_condition_count; + for (cond_id = 0; cond_id < nconds_total; cond_id++) + { + if (_starpu_descr.conditions_all[cond_id].cond == cond) + { + /* the queue is already in the global list */ + STARPU_PTHREAD_RWLOCK_UNLOCK(&_starpu_descr.conditions_rwlock); + return; + } + } + + /* it was not in the global list either */ + _starpu_descr.conditions_all[nconds_total].cond = cond; + _starpu_descr.conditions_all[nconds_total].worker = worker; + _starpu_descr.total_condition_count++; + + STARPU_PTHREAD_RWLOCK_UNLOCK(&_starpu_descr.conditions_rwlock); +} + +void _starpu_memory_node_set_mapped(unsigned node) +{ + if (starpu_map_enabled() == 1) + _starpu_descr.mapped[node] = 1; +#ifdef STARPU_VERBOSE + else + _STARPU_DISP("Warning: set_mapped requested on node %u, while map support is disabled\n", node); +#endif +} + +unsigned _starpu_memory_node_get_mapped(unsigned node) +{ + return _starpu_descr.mapped[node]; +} + +#undef starpu_worker_get_memory_node +unsigned starpu_worker_get_memory_node(unsigned workerid) +{ + (void) workerid; + return _starpu_worker_get_memory_node(workerid); +} + +void _starpu_worker_drives_memory_node(struct _starpu_worker *worker, unsigned memnode) +{ + if (! _starpu_worker_drives_memory[worker->workerid][memnode]) + { + _starpu_worker_drives_memory[worker->workerid][memnode] = 1; +#ifdef STARPU_SIMGRID + starpu_pthread_queue_register(&worker->wait, &_starpu_simgrid_transfer_queue[memnode]); +#endif + _starpu_memory_node_register_condition(worker, &worker->sched_cond, memnode); + } +} + +#undef starpu_worker_get_local_memory_node +unsigned starpu_worker_get_local_memory_node(void) +{ + return _starpu_worker_get_local_memory_node(); +} + +int starpu_memory_node_get_devid(unsigned node) +{ + return _starpu_descr.devid[node]; +} + +enum starpu_worker_archtype starpu_memory_node_get_worker_archtype(enum starpu_node_kind node_kind) +{ + enum starpu_worker_archtype archtype = starpu_memory_driver_info[node_kind].worker_archtype; + STARPU_ASSERT_MSG(archtype != (enum starpu_worker_archtype) -1, "ambiguous memory node kind %d", node_kind); + return archtype; +} diff --git a/src/datawizard/memory_nodes.h b/src/datawizard/memory_nodes.h new file mode 100644 index 0000000..41112ca --- /dev/null +++ b/src/datawizard/memory_nodes.h @@ -0,0 +1,191 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __MEMORY_NODES_H__ +#define __MEMORY_NODES_H__ + +/** @file */ + +#include +#include +#include +#include +#include +#include +#include + +#ifdef STARPU_SIMGRID +#include +#endif + +#pragma GCC visibility push(hidden) + +extern char _starpu_worker_drives_memory[STARPU_NMAXWORKERS][STARPU_MAXNODES]; + +struct _starpu_cond_and_worker +{ + starpu_pthread_cond_t *cond; + struct _starpu_worker *worker; +}; + +// TODO: split out all these arrays into struct _starpu_node +struct _starpu_memory_node_descr +{ + unsigned nnodes; + enum starpu_node_kind nodes[STARPU_MAXNODES]; + const struct _starpu_node_ops *node_ops[STARPU_MAXNODES]; + + /** Get the device id associated to this node, or -1 if not applicable */ + int devid[STARPU_MAXNODES]; + + unsigned nworkers[STARPU_MAXNODES]; + +#ifdef STARPU_SIMGRID + starpu_sg_host_t host[STARPU_MAXNODES]; +#endif + + // TODO move this 2 lists outside struct _starpu_memory_node_descr + /** Every worker is associated to a condition variable on which the + * worker waits when there is task available. It is possible that + * multiple worker share the same condition variable, so we maintain a + * list of all these condition variables so that we can wake up all + * worker attached to a memory node that are waiting on a task. */ + starpu_pthread_rwlock_t conditions_rwlock; + struct _starpu_cond_and_worker conditions_attached_to_node[STARPU_MAXNODES][STARPU_NMAXWORKERS]; + struct _starpu_cond_and_worker conditions_all[STARPU_MAXNODES*STARPU_NMAXWORKERS]; + /** the number of queues attached to each node */ + unsigned total_condition_count; + unsigned condition_count[STARPU_MAXNODES]; + unsigned mapped[STARPU_MAXNODES]; +}; + +extern struct _starpu_memory_node_descr _starpu_descr; + +void _starpu_memory_nodes_init(void); +void _starpu_memory_nodes_deinit(void); + +/** Record that there is an additional worker that uses this memory node */ +static inline void _starpu_memory_node_add_nworkers(unsigned node) +{ + _starpu_descr.nworkers[node]++; +} + +/** Record that this worker will driver data transfers for this memory node. */ +void _starpu_worker_drives_memory_node(struct _starpu_worker *worker, unsigned memnode); + +static inline const struct _starpu_node_ops *_starpu_memory_node_get_node_ops(unsigned node) +{ + return _starpu_descr.node_ops[node]; +} + +/** Get the number of workers that use this memory node */ +static inline unsigned _starpu_memory_node_get_nworkers(unsigned node) +{ + return _starpu_descr.nworkers[node]; +} + +#ifdef STARPU_SIMGRID +static inline void _starpu_simgrid_memory_node_set_host(unsigned node, starpu_sg_host_t host) +{ + _starpu_descr.host[node] = host; +} + +static inline starpu_sg_host_t _starpu_simgrid_memory_node_get_host(unsigned node) +{ + return _starpu_descr.host[node]; +} +#endif + +/** Note that this memory node can map CPU data */ +void _starpu_memory_node_set_mapped(unsigned node); +/** Returns whether this memory node can map CPU data */ +unsigned _starpu_memory_node_get_mapped(unsigned node); + +/** Registers a memory node. Returns the memory node number */ +unsigned _starpu_memory_node_register(enum starpu_node_kind kind, int devid); + +//void _starpu_memory_node_attach_queue(struct starpu_jobq_s *q, unsigned nodeid); +/** Register a condition variable associated to worker which is associated to a + * memory node itself. */ +void _starpu_memory_node_register_condition(struct _starpu_worker *worker, starpu_pthread_cond_t *cond, unsigned nodeid); + +/** See starpu_memory_node_get_description() */ +static inline struct _starpu_memory_node_descr *_starpu_memory_node_get_description(void) +{ + return &_starpu_descr; +} + +#define _starpu_node_needs_map_update(node) \ + (starpu_node_get_kind(node) == STARPU_OPENCL_RAM) + +/** See starpu_node_get_kind() */ +static inline enum starpu_node_kind _starpu_node_get_kind(unsigned node) +{ + return _starpu_descr.nodes[node]; +} +#define starpu_node_get_kind _starpu_node_get_kind + +#if STARPU_MAXNODES == 1 +#define _starpu_memory_nodes_get_count() 1 +#else +/** See starpu_memory_nodes_get_count() */ +static inline unsigned _starpu_memory_nodes_get_count(void) +{ + return _starpu_descr.nnodes; +} +#endif +#define starpu_memory_nodes_get_count _starpu_memory_nodes_get_count + +#if STARPU_MAXNODES == 1 +#define _starpu_worker_get_memory_node(workerid) 0 +#else +/** See starpu_worker_get_memory_node() */ +static inline unsigned _starpu_worker_get_memory_node(unsigned workerid) +{ + struct _starpu_machine_config *config = _starpu_get_machine_config(); + + /** This workerid may either be a basic worker or a combined worker */ + unsigned nworkers = config->topology.nworkers; + + if (workerid < config->topology.nworkers) + return config->workers[workerid].memory_node; + + /** We have a combined worker */ + unsigned ncombinedworkers STARPU_ATTRIBUTE_UNUSED = config->topology.ncombinedworkers; + STARPU_ASSERT_MSG(workerid < ncombinedworkers + nworkers, "Bad workerid %u, maximum %u", workerid, ncombinedworkers + nworkers); + return config->combined_workers[workerid - nworkers].memory_node; + +} +#endif +#define starpu_worker_get_memory_node _starpu_worker_get_memory_node + +#if STARPU_MAXNODES == 1 +#define _starpu_worker_get_local_memory_node() 0 +#else +/** See starpu_worker_get_local_memory_node */ +static inline unsigned _starpu_worker_get_local_memory_node(void) +{ + struct _starpu_worker *worker = _starpu_get_local_worker_key(); + if (!worker) + return STARPU_MAIN_RAM; + return worker->memory_node; +} +#endif +#define starpu_worker_get_local_memory_node _starpu_worker_get_local_memory_node + +#pragma GCC visibility pop + +#endif // __MEMORY_NODES_H__ diff --git a/src/datawizard/memstats.c b/src/datawizard/memstats.c new file mode 100644 index 0000000..f364819 --- /dev/null +++ b/src/datawizard/memstats.c @@ -0,0 +1,106 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include + +void _starpu_memory_stats_init(starpu_data_handle_t handle STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_MEMORY_STATS + _STARPU_CALLOC(handle->memory_stats, 1, sizeof(struct _starpu_memory_stats)); +#endif +} + +void _starpu_memory_stats_init_per_node(starpu_data_handle_t handle STARPU_ATTRIBUTE_UNUSED, unsigned node STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_MEMORY_STATS + /* Stats initialization */ + //handle->memory_stats->direct_access[node]=0; + //handle->memory_stats->loaded_shared[node]=0; + //handle->memory_stats->shared_to_owner[node]=0; + //handle->memory_stats->loaded_owner[node]=0; + //handle->memory_stats->invalidated[node]=0; +#endif +} + +void _starpu_memory_stats_free(starpu_data_handle_t handle STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_MEMORY_STATS + free(handle->memory_stats); +#endif +} + +#ifdef STARPU_MEMORY_STATS +void _starpu_memory_display_handle_stats(FILE *stream, starpu_data_handle_t handle) +{ + unsigned node; + + fprintf(stream, "#-----\n"); + fprintf(stream, "Data : %p\n", handle); + fprintf(stream, "Size : %d\n", (int)handle->ops->get_size(handle)); + fprintf(stream, "\n"); + + fprintf(stream, "#--\n"); + fprintf(stream, "Data access stats\n"); + fprintf(stream, "/!\\ Work Underway\n"); + for (node = 0; node < STARPU_MAXNODES; node++) + { + if (handle->memory_stats->direct_access[node]+handle->memory_stats->loaded_shared[node] + +handle->memory_stats->invalidated[node]+handle->memory_stats->loaded_owner[node]) + { + fprintf(stream, "Node #%u\n", node); + fprintf(stream, "\tDirect access : %u\n", handle->memory_stats->direct_access[node]); + /* XXX Not Working yet. */ + if (handle->memory_stats->shared_to_owner[node]) + fprintf(stream, "\t\tShared to Owner : %u\n", handle->memory_stats->shared_to_owner[node]); + fprintf(stream, "\tLoaded (Owner) : %u\n", handle->memory_stats->loaded_owner[node]); + fprintf(stream, "\tLoaded (Shared) : %u\n", handle->memory_stats->loaded_shared[node]); + fprintf(stream, "\tInvalidated (was Owner) : %u\n\n", handle->memory_stats->invalidated[node]); + } + } +} + +void _starpu_memory_handle_stats_cache_hit(starpu_data_handle_t handle, unsigned node) +{ + handle->memory_stats->direct_access[node]++; +} + +void _starpu_memory_handle_stats_loaded_shared(starpu_data_handle_t handle, unsigned node) +{ + handle->memory_stats->loaded_shared[node]++; +} + +void _starpu_memory_handle_stats_loaded_owner(starpu_data_handle_t handle, unsigned node) +{ + handle->memory_stats->loaded_owner[node]++; +} + +void _starpu_memory_handle_stats_shared_to_owner(starpu_data_handle_t handle, unsigned node) +{ + handle->memory_stats->shared_to_owner[node]++; +} + +void _starpu_memory_handle_stats_invalidated(starpu_data_handle_t handle, unsigned node) +{ + handle->memory_stats->invalidated[node]++; +} + +#endif + + + diff --git a/src/datawizard/memstats.h b/src/datawizard/memstats.h new file mode 100644 index 0000000..d893187 --- /dev/null +++ b/src/datawizard/memstats.h @@ -0,0 +1,58 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __MEMSTATS_H__ +#define __MEMSTATS_H__ + +/** @file */ + +#include +#include + +#pragma GCC visibility push(hidden) + +#ifdef STARPU_MEMORY_STATS +struct _starpu_memory_stats +{ + /** Handle access stats per node */ + unsigned direct_access[STARPU_MAXNODES]; + unsigned loaded_shared[STARPU_MAXNODES]; + unsigned loaded_owner[STARPU_MAXNODES]; + unsigned shared_to_owner[STARPU_MAXNODES]; + unsigned invalidated[STARPU_MAXNODES]; +}; + +typedef struct _starpu_memory_stats * _starpu_memory_stats_t; +#else +typedef void * _starpu_memory_stats_t; +#endif + +void _starpu_memory_stats_init(starpu_data_handle_t handle); +void _starpu_memory_stats_init_per_node(starpu_data_handle_t handle, unsigned node); + +void _starpu_memory_stats_free(starpu_data_handle_t handle); + +void _starpu_memory_display_handle_stats(FILE *stream, starpu_data_handle_t handle); + +void _starpu_memory_handle_stats_cache_hit(starpu_data_handle_t handle, unsigned node); +void _starpu_memory_handle_stats_loaded_shared(starpu_data_handle_t handle, unsigned node); +void _starpu_memory_handle_stats_loaded_owner(starpu_data_handle_t handle, unsigned node); +void _starpu_memory_handle_stats_shared_to_owner(starpu_data_handle_t handle, unsigned node); +void _starpu_memory_handle_stats_invalidated(starpu_data_handle_t handle, unsigned node); + +#pragma GCC visibility pop + +#endif /* __MEMSTATS_H__ */ diff --git a/src/datawizard/node_ops.c b/src/datawizard/node_ops.c new file mode 100644 index 0000000..22cffc1 --- /dev/null +++ b/src/datawizard/node_ops.c @@ -0,0 +1,33 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +const char* _starpu_node_get_prefix(enum starpu_node_kind kind) +{ + const char *ret = starpu_memory_driver_info[kind].name_upper; + STARPU_ASSERT(ret); + return ret; +} diff --git a/src/datawizard/node_ops.h b/src/datawizard/node_ops.h new file mode 100644 index 0000000..0fe3401 --- /dev/null +++ b/src/datawizard/node_ops.h @@ -0,0 +1,174 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __NODE_OPS_H__ +#define __NODE_OPS_H__ + +/** @file */ + +#include +#include +#include + +#pragma GCC visibility push(hidden) + +/** Request copying some data interface for handle \p handle: from interface \p + * src_interface that exists on node \p src_node to interface \p dst_interface + * that exists on node \p dst_node. + * + * If \p req is non-NULL, this can be used to start an asynchronous copy, in + * which case -EAGAIN should be returned. Otherwise, 0 should be returned. + * + * _starpu_copy_interface_any_to_any can be used as a generic version, that + * assumes that the data_interface implements the any_to_any method, and + * copy_data_t will be used to queue the actual transfers. + */ +typedef int (*copy_interface_func_t)(starpu_data_handle_t handle, void *src_interface, unsigned src_node, + void *dst_interface, unsigned dst_node, + struct _starpu_data_request *req); + +/** Request copying \p ssize bytes of data from \p src_ptr (plus offset \p src_offset) + * in node \p src_node to \p dst_ptr (plus offset \p dst_offset) in node \p dst_node. + * + * If \p async_channel is non-NULL, this can be used to start an asynchronous copy, in + * which case -EAGAIN should be returned. Otherwise, 0 should be returned. + */ +typedef int (*copy_data_t)(uintptr_t src_ptr, size_t src_offset, unsigned src_node, + uintptr_t dst_ptr, size_t dst_offset, unsigned dst_node, + size_t ssize, struct _starpu_async_channel *async_channel); + +/** This is like copy_data_t, except that there are \p numblocks blocks of size + * \p blocksize bytes to be transferred. On the source, their respective starts are \p + * ld_src bytes apart, and on the destination their respective starts have to be + * \p ld_dst bytes apart. (leading dimension) */ +typedef int (*copy2d_data_t)(uintptr_t src_ptr, size_t src_offset, unsigned src_node, + uintptr_t dst_ptr, size_t dst_offset, unsigned dst_node, + size_t blocksize, + size_t numblocks, size_t ld_src, size_t ld_dst, + struct _starpu_async_channel *async_channel); + +/** This is like copy_data_t, except that there are \p numblocks_2 metablocks to + * be transferred. On the source, their respective starts are \p ld2_src bytes + * apart, and on the destination their respective starts have to be \p ld2_dst + * bytes apart. + * + * The metablocks are composed of \p numblocks_1 blocks of size \p blocksize + * bytes. On the source, their respective starts are \p ld1_src bytes apart, and + * on the destination their respective starts have to be \p ld1_dst bytes apart. + */ +typedef int (*copy3d_data_t)(uintptr_t src_ptr, size_t src_offset, unsigned src_node, + uintptr_t dst_ptr, size_t dst_offset, unsigned dst_node, + size_t blocksize, + size_t numblocks_1, size_t ld1_src, size_t ld1_dst, + size_t numblocks_2, size_t ld2_src, size_t ld2_dst, + struct _starpu_async_channel *async_channel); + +/** Map \p size bytes of data from \p src (plus offset \p src_offset) in node \p src_node + * on node \p dst_node. If successful, return the resulting pointer, otherwise fill *ret */ +typedef uintptr_t (*map_t)(uintptr_t src, size_t src_offset, unsigned src_node, unsigned dst_node, size_t size, int *ret); +/** Unmap \p size bytes of data from \p src (plus offset \p src_offset) in node \p src_node + * on node \p dst_node. */ +typedef int (*unmap_t)(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, unsigned dst_node, size_t size); +/** Update cache coherency for the mapping of \p size bytes of data from \p src (plus offset + * \p src_offset) in node \p src_node on node \p dst_node. */ +typedef int (*update_map_t)(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size); + +/** Reference all the methods for copying data from this kind of device to + * another kind of device. */ +struct _starpu_node_ops +{ + /** Request copying a data interface from this type of node to another type of node. + * As a first start, you can just use the generic _starpu_copy_interface_any_to_any. */ + copy_interface_func_t copy_interface_to[STARPU_MAX_RAM+1]; + + /** Request copying a data interface to this type of node from another type of node. + * As a first start, you can just use the generic _starpu_copy_interface_any_to_any. */ + copy_interface_func_t copy_interface_from[STARPU_MAX_RAM+1]; + + /** Request copying a piece of data from this type of node to another type of node. + * This method is required at least for STARPU_CPU_RAM. */ + copy_data_t copy_data_to[STARPU_MAX_RAM+1]; + + /** Request copying a piece of data to this type of node from another type of node. + * This method is required at least for STARPU_CPU_RAM. */ + copy_data_t copy_data_from[STARPU_MAX_RAM+1]; + + /** Request copying a 2D piece of data (i.e. matrix tile with an ld) + * from this type of node to another type of node. + * This method is optional. */ + copy2d_data_t copy2d_data_to[STARPU_MAX_RAM+1]; + + /** Request copying a 2D piece of data (i.e. matrix tile with an ld) + * to this type of node from another type of node. + * This method is optional. */ + copy2d_data_t copy2d_data_from[STARPU_MAX_RAM+1]; + + /** Request copying a 3D piece of data (i.e. block piece with ldy and ldz) + * from this type of node to another type of node. + * This method is optional. */ + copy3d_data_t copy3d_data_to[STARPU_MAX_RAM+1]; + + /** Request copying a 3D piece of data (i.e. block piece with ldy and ldz) + * to this type of node from another type of node. + * This method is optional. */ + copy3d_data_t copy3d_data_from[STARPU_MAX_RAM+1]; + + /** Wait for the completion of asynchronous request \p async_channel. + * Only used at starpu_shutdown. */ + void (*wait_request_completion)(struct _starpu_async_channel *async_channel); + /** Test whether asynchronous request \p async_channel has completed. */ + unsigned (*test_request_completion)(struct _starpu_async_channel *async_channel); + + /** Return whether inter-device transfers are possible between \p node and \p handling_node. + * If this returns 0, copy_interface_to will always be called with + * CPU RAM as either source or destination. If this returns 1, + * copy_interface_to may be called with both source and destination in + * device memory. + * + * \p handling_node is the node that will initiate the transfer. This + * allows to prefer starting from the driver itself. + */ + int (*is_direct_access_supported)(unsigned node, unsigned handling_node); + + /** Allocate \p size bytes of data on node \p dst_node. + * \p flags can contain STARPU_MALLOC_* flags, only useful for CPU memory */ + uintptr_t (*malloc_on_node)(unsigned dst_node, size_t size, int flags); + /** Free data \p addr, which was a previous allocation of \p size bytes + * of data on node \p dst_node with flags \p flags*/ + void (*free_on_node)(unsigned dst_node, uintptr_t addr, size_t size, int flags); + + /** Map data a piece of data to this type of node from another type of node. + * This method is optional */ + map_t map[STARPU_MAX_RAM+1]; + + /** Unmap data a piece of data to this type of node from another type of node. + * This method is optional */ + unmap_t unmap[STARPU_MAX_RAM+1]; + + /** Update cache coherency for the mapping of a piece of data to this type of + * node from another type of node. + * This method is optional */ + update_map_t update_map[STARPU_MAX_RAM+1]; + + /** Name of the type of memory, for debugging */ + char *name; +}; + +const char* _starpu_node_get_prefix(enum starpu_node_kind kind); + +#pragma GCC visibility pop + +#endif // __NODE_OPS_H__ diff --git a/src/datawizard/reduction.c b/src/datawizard/reduction.c new file mode 100644 index 0000000..ad99c66 --- /dev/null +++ b/src/datawizard/reduction.c @@ -0,0 +1,479 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Thibaut Lambert + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include +#include +#include + +void starpu_data_set_reduction_methods(starpu_data_handle_t handle, struct starpu_codelet *redux_cl, struct starpu_codelet *init_cl) +{ + starpu_data_set_reduction_methods_with_args(handle, redux_cl, NULL, init_cl, NULL); +} + +void starpu_data_set_reduction_methods_with_args(starpu_data_handle_t handle, struct starpu_codelet *redux_cl, void *redux_cl_arg, struct starpu_codelet *init_cl, void *init_cl_arg) +{ + _starpu_spin_lock(&handle->header_lock); + + if (init_cl) + { + STARPU_ASSERT_MSG(init_cl->nbuffers == 1, "The initialization method has to take one STARPU_W parameter"); + STARPU_ASSERT_MSG(init_cl->modes[0] == STARPU_W, "The initialization method has to take one STARPU_W parameter"); + } + if (redux_cl) + { + STARPU_ASSERT_MSG(redux_cl->nbuffers == 2, "The reduction method has to take one STARPU_RW|STARPU_COMMUTE parameter and one STARPU_R parameter"); + if (!(redux_cl->modes[0] & STARPU_COMMUTE)) + { + static int _warned = 0; + STARPU_HG_DISABLE_CHECKING(_warned); + if (!_warned) + { + _STARPU_DISP("Warning: The reduction method should use STARPU_COMMUTE for its first parameter\n"); + _warned = 1; + } + redux_cl->modes[0] |= STARPU_COMMUTE; + } + STARPU_ASSERT_MSG(redux_cl->modes[0] == (STARPU_RW | STARPU_COMMUTE), "The first parameter of the reduction method has to use STARPU_RW|STARPU_COMMUTE"); + STARPU_ASSERT_MSG(redux_cl->modes[1] == STARPU_R, "The second parameter of the reduction method has to use STARPU_R"); + } + + _starpu_codelet_check_deprecated_fields(redux_cl); + _starpu_codelet_check_deprecated_fields(init_cl); + + unsigned child; + for (child = 0; child < handle->nchildren; child++) + { + /* make sure that the flags are applied to the children as well */ + starpu_data_handle_t child_handle = starpu_data_get_child(handle, child); + if (child_handle->nchildren > 0) + starpu_data_set_reduction_methods_with_args(child_handle, redux_cl, redux_cl_arg, init_cl, init_cl_arg); + } + + handle->redux_cl = redux_cl; + handle->init_cl = init_cl; + handle->redux_cl_arg = redux_cl_arg; + handle->init_cl_arg = init_cl_arg; + + _starpu_spin_unlock(&handle->header_lock); +} + +void _starpu_init_data_replicate(starpu_data_handle_t handle, struct _starpu_data_replicate *replicate, int workerid) +{ + STARPU_ASSERT(replicate); + STARPU_ASSERT(replicate->allocated || replicate->mapped != STARPU_UNMAPPED); + + struct starpu_codelet *init_cl = handle->init_cl; + STARPU_ASSERT_MSG(init_cl, "There is no initialisation codelet for the reduction of the handle %p. Maybe you forget to call starpu_data_set_reduction_methods() ?", handle->root_handle); + + _starpu_cl_func_t init_func = NULL; + + /* TODO Check that worker may execute the codelet */ + + switch (starpu_worker_get_type(workerid)) + { + case STARPU_CPU_WORKER: + init_func = _starpu_task_get_cpu_nth_implementation(init_cl, 0); + break; + + case STARPU_CUDA_WORKER: + init_func = _starpu_task_get_cuda_nth_implementation(init_cl, 0); +#if defined(STARPU_HAVE_CUDA_MEMCPY_PEER) && !defined(STARPU_SIMGRID) + /* We make sure we do manipulate the proper device */ + starpu_cuda_set_device(starpu_worker_get_devid(workerid)); +#endif + break; + case STARPU_HIP_WORKER: + init_func = _starpu_task_get_hip_nth_implementation(init_cl, 0); +#if defined(STARPU_HAVE_HIP_MEMCPY_PEER) && !defined(STARPU_SIMGRID) + /* We make sure we do manipulate the proper device */ + starpu_hip_set_device(starpu_worker_get_devid(workerid)); +#endif + break; + case STARPU_OPENCL_WORKER: + init_func = _starpu_task_get_opencl_nth_implementation(init_cl, 0); + break; + +#ifdef STARPU_USE_MPI_MASTER_SLAVE + case STARPU_MPI_MS_WORKER: + init_func = _starpu_src_common_get_cpu_func_from_codelet(init_cl, 0); + break; +#endif + +#ifdef STARPU_USE_TCPIP_MASTER_SLAVE + case STARPU_TCPIP_MS_WORKER: + init_func = _starpu_src_common_get_cpu_func_from_codelet(init_cl, 0); + break; +#endif + + default: + STARPU_ABORT(); + break; + } + + STARPU_ASSERT(init_func); + + switch (starpu_worker_get_type(workerid)) + { +#ifdef STARPU_USE_MPI_MASTER_SLAVE + case STARPU_MPI_MS_WORKER: + { + struct _starpu_mp_node *node = _starpu_mpi_ms_src_get_actual_thread_mp_node(); + int subworkerid = _starpu_get_worker_struct(workerid)->subworkerid; + void * arg; + int arg_size; + + _starpu_src_common_execute_kernel(node, + (void(*)(void))init_func, subworkerid, + STARPU_SEQ, 0, 0, &handle, + &(replicate->data_interface), 1, + NULL, 0 , 1); + + _starpu_src_common_wait_completed_execution(node,subworkerid,&arg,&arg_size); + break; + } +#endif +#ifdef STARPU_USE_TCPIP_MASTER_SLAVE + case STARPU_TCPIP_MS_WORKER: + { + struct _starpu_mp_node *node = _starpu_tcpip_ms_src_get_actual_thread_mp_node(); + int subworkerid = _starpu_get_worker_struct(workerid)->subworkerid; + void * arg; + int arg_size; + + _starpu_src_common_execute_kernel(node, + (void(*)(void))init_func, subworkerid, + STARPU_SEQ, 0, 0, &handle, + &(replicate->data_interface), 1, + NULL, 0 , 1); + + _starpu_src_common_wait_completed_execution(node,subworkerid,&arg,&arg_size); + break; + } +#endif + default: + init_func(&replicate->data_interface, NULL); + break; + } + + replicate->initialized = 1; +} + +/* Enable reduction mode. This function must be called with the header lock + * taken. */ +void _starpu_data_start_reduction_mode(starpu_data_handle_t handle) +{ + STARPU_ASSERT(handle->reduction_refcnt == 0); + + if (!handle->per_worker) + _starpu_data_initialize_per_worker(handle); + + unsigned worker; + + unsigned nworkers = starpu_worker_get_count(); + for (worker = 0; worker < nworkers; worker++) + { + struct _starpu_data_replicate *replicate; + replicate = &handle->per_worker[worker]; + replicate->initialized = 0; + replicate->relaxed_coherency = 2; + if (replicate->mc) + replicate->mc->relaxed_coherency = 2; + } +} + +//#define NO_TREE_REDUCTION + +/* Force reduction. The lock should already have been taken. */ +void _starpu_data_end_reduction_mode(starpu_data_handle_t handle, int priority) +{ + unsigned worker; + unsigned node; + unsigned empty; /* Whether the handle is initially unallocated */ + + /* Put every valid replicate in the same array */ + unsigned replicate_count = 0; + starpu_data_handle_t replicate_array[1 + STARPU_NMAXWORKERS]; + + _starpu_spin_checklocked(&handle->header_lock); + + for (node = 0; node < STARPU_MAXNODES; node++) + { + if (handle->per_node[node].state != STARPU_INVALID) + break; + } + empty = node == STARPU_MAXNODES; + +#ifndef NO_TREE_REDUCTION + if (!empty) + /* Include the initial value into the reduction tree */ + replicate_array[replicate_count++] = handle; +#endif + + /* Register all valid per-worker replicates */ + unsigned nworkers = starpu_worker_get_count(); + STARPU_ASSERT(!handle->reduction_tmp_handles); + _STARPU_MALLOC(handle->reduction_tmp_handles, nworkers*sizeof(handle->reduction_tmp_handles[0])); + for (worker = 0; worker < nworkers; worker++) + { + if (handle->per_worker[worker].initialized) + { + /* Make sure the replicate is not removed */ + handle->per_worker[worker].refcnt++; + + unsigned home_node = starpu_worker_get_memory_node(worker); + starpu_data_register(&handle->reduction_tmp_handles[worker], + home_node, handle->per_worker[worker].data_interface, handle->ops); + + starpu_data_set_sequential_consistency_flag(handle->reduction_tmp_handles[worker], 0); + + replicate_array[replicate_count++] = handle->reduction_tmp_handles[worker]; + } + else + { + handle->reduction_tmp_handles[worker] = NULL; + } + } + +#ifndef NO_TREE_REDUCTION + if (empty) + { + /* Only the final copy will touch the actual handle */ + handle->reduction_refcnt = 1; + } + else + { + unsigned step = 1; + handle->reduction_refcnt = 0; + while (step < replicate_count) + { + /* Each stage will touch the actual handle */ + handle->reduction_refcnt++; + step *= 2; + } + } +#else + /* We know that in this reduction algorithm there is exactly one task per valid replicate. */ + handle->reduction_refcnt = replicate_count + empty; +#endif + +// fprintf(stderr, "REDUX REFCNT = %d\n", handle->reduction_refcnt); + + if (replicate_count > +#ifndef NO_TREE_REDUCTION + !empty +#else + 0 +#endif + ) + { + /* Temporarily unlock the handle */ + _starpu_spin_unlock(&handle->header_lock); + +#ifndef NO_TREE_REDUCTION + /* We will store a pointer to the last task which should modify the + * replicate */ + struct starpu_task *last_replicate_deps[replicate_count]; + memset(last_replicate_deps, 0, replicate_count*sizeof(struct starpu_task *)); + struct starpu_task *redux_tasks[replicate_count]; + + /* Redux step-by-step for step from 1 to replicate_count/2, i.e. + * 1-by-1, then 2-by-2, then 4-by-4, etc. */ + unsigned step; + unsigned redux_task_idx = 0; + for (step = 1; step < replicate_count; step *=2) + { + unsigned i; + for (i = 0; i < replicate_count; i+=2*step) + { + if (i + step < replicate_count) + { + /* Perform the reduction between replicates i + * and i+step and put the result in replicate i */ + struct starpu_task *redux_task = starpu_task_create(); + redux_task->name = "redux_task_between_replicates"; + redux_task->priority = priority; + + /* Mark these tasks so that StarPU does not block them + * when they try to access the handle (normal tasks are + * data requests to that handle are frozen until the + * data is coherent again). */ + struct _starpu_job *j = _starpu_get_job_associated_to_task(redux_task); + j->reduction_task = 1; + + redux_task->cl = handle->redux_cl; + redux_task->cl_arg = handle->redux_cl_arg; + STARPU_ASSERT(redux_task->cl); + if (!(STARPU_CODELET_GET_MODE(redux_task->cl, 0))) + STARPU_CODELET_SET_MODE(redux_task->cl, STARPU_RW|STARPU_COMMUTE, 0); + if (!(STARPU_CODELET_GET_MODE(redux_task->cl, 1))) + STARPU_CODELET_SET_MODE(redux_task->cl, STARPU_R, 1); + + if (!(STARPU_CODELET_GET_MODE(redux_task->cl, 0) & STARPU_COMMUTE)) + { + static int warned; + STARPU_HG_DISABLE_CHECKING(warned); + if (!warned) + { + warned = 1; + _STARPU_DISP("Warning: for reductions, codelet %p should have STARPU_COMMUTE along STARPU_RW\n", redux_task->cl); + } + } + + STARPU_TASK_SET_HANDLE(redux_task, replicate_array[i], 0); + STARPU_TASK_SET_HANDLE(redux_task, replicate_array[i+step], 1); + + int ndeps = 0; + struct starpu_task *task_deps[2]; + + if (last_replicate_deps[i]) + task_deps[ndeps++] = last_replicate_deps[i]; + + if (last_replicate_deps[i+step]) + task_deps[ndeps++] = last_replicate_deps[i+step]; + + /* i depends on this task */ + last_replicate_deps[i] = redux_task; + + /* we don't perform the reduction until both replicates are ready */ + starpu_task_declare_deps_array(redux_task, ndeps, task_deps); + + /* We cannot submit tasks here : we do + * not want to depend on tasks that have + * been completed, so we juste store + * this task : it will be submitted + * later. */ + redux_tasks[redux_task_idx++] = redux_task; + } + } + } + + if (empty) + /* The handle was empty, we just need to copy the reduced value. */ + _starpu_data_cpy(handle, replicate_array[0], 1, NULL, 0, 1, last_replicate_deps[0], priority); + + /* Let's submit all the reduction tasks. */ + unsigned i; + for (i = 0; i < redux_task_idx; i++) + { + int ret = _starpu_task_submit_internally(redux_tasks[i]); + STARPU_ASSERT(ret == 0); + } +#else + if (empty) + { + struct starpu_task *redux_task = starpu_task_create(); + redux_task->name = "redux_task_empty"; + redux_task->priority = priority; + + /* Mark these tasks so that StarPU does not block them + * when they try to access the handle (normal tasks are + * data requests to that handle are frozen until the + * data is coherent again). */ + struct _starpu_job *j = _starpu_get_job_associated_to_task(redux_task); + j->reduction_task = 1; + + redux_task->cl = handle->init_cl; + redux_task->cl_arg = handle->init_cl_arg; + STARPU_ASSERT(redux_task->cl); + + if (!(STARPU_CODELET_GET_MODE(redux_task->cl, 0))) + STARPU_CODELET_SET_MODE(redux_task->cl, STARPU_W, 0); + + STARPU_TASK_SET_HANDLE(redux_task, handle, 0); + + int ret = _starpu_task_submit_internally(redux_task); + STARPU_ASSERT(!ret); + } + + /* Create a set of tasks to perform the reduction */ + unsigned replicate; + for (replicate = 0; replicate < replicate_count; replicate++) + { + struct starpu_task *redux_task = starpu_task_create(); + redux_task->name = "redux_task_reduction"; + redux_task->priority = priority; + + /* Mark these tasks so that StarPU does not block them + * when they try to access the handle (normal tasks are + * data requests to that handle are frozen until the + * data is coherent again). */ + struct _starpu_job *j = _starpu_get_job_associated_to_task(redux_task); + j->reduction_task = 1; + + redux_task->cl = handle->redux_cl; + STARPU_ASSERT(redux_task->cl); + + if (!(STARPU_CODELET_GET_MODE(redux_task->cl, 0))) + STARPU_CODELET_SET_MODE(redux_task->cl, STARPU_RW, 0); + if (!(STARPU_CODELET_GET_MODE(redux_task->cl, 1))) + STARPU_CODELET_SET_MODE(redux_task->cl, STARPU_R, 1); + + STARPU_TASK_SET_HANDLE(redux_task, handle, 0); + STARPU_TASK_SET_HANDLE(redux_task, replicate_array[replicate], 1); + + int ret = _starpu_task_submit_internally(redux_task); + STARPU_ASSERT(!ret); + } +#endif + /* Get the header lock back */ + _starpu_spin_lock(&handle->header_lock); + + } + + for (worker = 0; worker < nworkers; worker++) + { + struct _starpu_data_replicate *replicate; + replicate = &handle->per_worker[worker]; + replicate->relaxed_coherency = 1; + if (replicate->mc) + replicate->mc->relaxed_coherency = 1; + } +} + +void _starpu_data_end_reduction_mode_terminate(starpu_data_handle_t handle) +{ + unsigned nworkers = starpu_worker_get_count(); + +// fprintf(stderr, "_starpu_data_end_reduction_mode_terminate\n"); + unsigned worker; + + _starpu_spin_checklocked(&handle->header_lock); + + for (worker = 0; worker < nworkers; worker++) + { + struct _starpu_data_replicate *replicate; + replicate = &handle->per_worker[worker]; + replicate->initialized = 0; + + if (handle->reduction_tmp_handles[worker]) + { +// fprintf(stderr, "unregister handle %p\n", handle); + _starpu_spin_lock(&handle->reduction_tmp_handles[worker]->header_lock); + handle->reduction_tmp_handles[worker]->lazy_unregister = 1; + _starpu_spin_unlock(&handle->reduction_tmp_handles[worker]->header_lock); + starpu_data_unregister_no_coherency(handle->reduction_tmp_handles[worker]); + handle->per_worker[worker].refcnt--; + /* TODO put in cache */ + } + } + free(handle->reduction_tmp_handles); + handle->reduction_tmp_handles = NULL; +} diff --git a/src/datawizard/sort_data_handles.c b/src/datawizard/sort_data_handles.c new file mode 100644 index 0000000..64a5707 --- /dev/null +++ b/src/datawizard/sort_data_handles.c @@ -0,0 +1,140 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include + +#include +#include + +/* To avoid deadlocks in case we have multiple tasks accessing the same piece + * of data (eg. task T1 needs A and B, and T2 needs B and A), we need to lock + * them in order, so that we need a total order over data. We must also not + * lock a child before its parent. */ + +static void find_data_path(struct _starpu_data_state *data, unsigned path[]) +{ + unsigned depth = data->depth; + struct _starpu_data_state *current = data; + + /* Compute the path from the root to the data */ + unsigned level; /* level is the distance between the node and the current node */ + for (level = 0; level < depth; level++) + { + path[depth - level - 1] = current->sibling_index; + current = current->father_handle; + } +} + +static int _compar_data_paths(const unsigned pathA[], unsigned depthA, + const unsigned pathB[], unsigned depthB) +{ + unsigned level; + unsigned depth = STARPU_MIN(depthA, depthB); + + for (level = 0; level < depth; level++) + { + if (pathA[level] != pathB[level]) + return (pathA[level] < pathB[level])?-1:1; + } + + /* If this is the same path */ + if (depthA == depthB) + return 0; + + /* A is a subdata of B or B is a subdata of A, so the smallest one is + * the father of the other (we take this convention). */ + return (depthA < depthB)?-1:1; +} + +/* A comparison function between two handles makes it possible to use qsort to + * sort a list of handles */ +static int _starpu_compar_handles(const struct _starpu_data_descr *descrA, + const struct _starpu_data_descr *descrB) +{ + starpu_data_handle_t dataA = descrA->handle; + starpu_data_handle_t dataB = descrB->handle; + + /* Perhaps we have the same piece of data */ + if (dataA->root_handle == dataB->root_handle) + { + int Awrites = descrA->mode & STARPU_W; + int Bwrites = descrB->mode & STARPU_W; + int Areads = descrA->mode & STARPU_R; + int Breads = descrB->mode & STARPU_R; + + /* Process write requests first, this is needed for proper + * locking, see _submit_job_access_data, + * _starpu_fetch_task_input, and _starpu_push_task_output */ + + if (Awrites && !Bwrites) + /* Only A writes, take it first */ + return -1; + if (!Awrites && Bwrites) + /* Only B writes, take it first */ + return 1; + /* Both A and B write */ + + if (Areads && !Breads) + /* Only A reads, take it first */ + return -1; + if (!Areads && Breads) + /* Only B reads, take it first */ + return 1; + /* Both A and B read and write */ + + /* Things get more complicated: we need to find the location of dataA + * and dataB within the tree. */ + unsigned dataA_path[dataA->depth]; + unsigned dataB_path[dataB->depth]; + + find_data_path(dataA, dataA_path); + find_data_path(dataB, dataB_path); + + return _compar_data_paths(dataA_path, dataA->depth, dataB_path, dataB->depth); + } + + /* Put arbitered accesses after non-arbitered */ + if (dataA->arbiter && !(dataB->arbiter)) + return 1; + if (dataB->arbiter && !(dataA->arbiter)) + return -1; + if (dataA->arbiter != dataB->arbiter) + /* Both are arbitered, sort by arbiter pointer order */ + return (dataA->arbiter < dataB->arbiter)?-1:1; + /* If both are arbitered by the same arbiter (or they are both not + * arbitered), we'll sort them by handle */ + return (dataA->root_handle < dataB->root_handle)?-1:1; +} + +int _starpu_handles_same_root(starpu_data_handle_t dataA, starpu_data_handle_t dataB) +{ + return dataA->root_handle == dataB->root_handle; +} + +static int _starpu_compar_buffer_descr(const void *_descrA, const void *_descrB) +{ + const struct _starpu_data_descr *descrA = (const struct _starpu_data_descr *) _descrA; + const struct _starpu_data_descr *descrB = (const struct _starpu_data_descr *) _descrB; + + return _starpu_compar_handles(descrA, descrB); +} + +/* The descr array will be overwritten, so this must be a copy ! */ +void _starpu_sort_task_handles(struct _starpu_data_descr descr[], unsigned nbuffers) +{ + qsort(descr, nbuffers, sizeof(descr[0]), _starpu_compar_buffer_descr); +} diff --git a/src/datawizard/sort_data_handles.h b/src/datawizard/sort_data_handles.h new file mode 100644 index 0000000..48b2418 --- /dev/null +++ b/src/datawizard/sort_data_handles.h @@ -0,0 +1,46 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __SORT_DATA_HANDLES_H__ +#define __SORT_DATA_HANDLES_H__ + +/** @file */ + +#include +#include +#include + +#include +#include +#include +#include + +#pragma GCC visibility push(hidden) + +/** To avoid deadlocks, we reorder the different buffers accessed to by the task + * so that we always grab the rw-lock associated to the handles in the same + * order. */ +void _starpu_sort_task_handles(struct _starpu_data_descr descr[], unsigned nbuffers); + +/** The reordering however puts alongside some different handles, just because + * they have the same root. When avoiding to lock/acquire/load the same handle + * several times, we need to keep looking among those. + */ +int _starpu_handles_same_root(starpu_data_handle_t dataA, starpu_data_handle_t dataB); + +#pragma GCC visibility pop + +#endif // SORT_DATA_HANDLES diff --git a/src/datawizard/user_interactions.c b/src/datawizard/user_interactions.c new file mode 100644 index 0000000..8ae597c --- /dev/null +++ b/src/datawizard/user_interactions.c @@ -0,0 +1,861 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2021-2021 Federal University of Rio Grande do Sul (UFRGS) + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static void _starpu_data_check_initialized(starpu_data_handle_t handle, enum starpu_data_access_mode mode) +{ + if (((handle->nplans && !handle->nchildren) || handle->siblings) + && !(mode & STARPU_NOPLAN)) + { + _starpu_data_partition_access_submit(handle, (mode & STARPU_W) != 0); + } + + if (!(mode & STARPU_R)) + return; + + if (!handle->initialized && handle->init_cl) + { + int ret = starpu_task_insert(handle->init_cl, STARPU_W, handle, 0); + STARPU_ASSERT(ret == 0); + } + STARPU_ASSERT_MSG(handle->initialized, "handle %p is not initialized while trying to read it\n", handle); +} + +/* Explicitly ask StarPU to allocate room for a piece of data on the specified + * memory node. */ +int starpu_data_request_allocation(starpu_data_handle_t handle, unsigned node) +{ + struct _starpu_data_request *r; + + STARPU_ASSERT(handle); + + _starpu_spin_lock(&handle->header_lock); + + r = _starpu_create_data_request(handle, NULL, &handle->per_node[node], node, STARPU_NONE, 0, NULL, STARPU_PREFETCH, 0, 0, "starpu_data_request_allocation"); + + /* we do not increase the refcnt associated to the request since we are + * not waiting for its termination */ + + _starpu_post_data_request(r); + + _starpu_spin_unlock(&handle->header_lock); + + return 0; +} + +struct user_interaction_wrapper +{ + starpu_data_handle_t handle; + enum starpu_data_access_mode mode; + int node; + starpu_pthread_cond_t cond; + starpu_pthread_mutex_t lock; + unsigned finished; + unsigned detached; + enum starpu_is_prefetch prefetch; + unsigned async; + int prio; + void (*callback_acquired)(void *, int *node, enum starpu_data_access_mode mode); + void (*callback)(void *); + void *callback_arg; + struct starpu_task *pre_sync_task; + struct starpu_task *post_sync_task; +}; + +static inline void _starpu_data_acquire_wrapper_init(struct user_interaction_wrapper *wrapper, starpu_data_handle_t handle, int node, enum starpu_data_access_mode mode) +{ + memset(wrapper, 0, sizeof(*wrapper)); + wrapper->handle = handle; + wrapper->node = node; + wrapper->mode = mode; + //wrapper->finished = 0; + STARPU_PTHREAD_COND_INIT0(&wrapper->cond, NULL); + STARPU_PTHREAD_MUTEX_INIT0(&wrapper->lock, NULL); +} + +/* Called to signal completion of asynchronous data acquisition */ +static inline void _starpu_data_acquire_wrapper_finished(struct user_interaction_wrapper *wrapper) +{ + STARPU_PTHREAD_MUTEX_LOCK(&wrapper->lock); + wrapper->finished = 1; + STARPU_PTHREAD_COND_SIGNAL(&wrapper->cond); + STARPU_PTHREAD_MUTEX_UNLOCK(&wrapper->lock); +} + +/* Called to wait for completion of asynchronous data acquisition */ +static inline void _starpu_data_acquire_wrapper_wait(struct user_interaction_wrapper *wrapper) +{ + STARPU_PTHREAD_MUTEX_LOCK(&wrapper->lock); + while (!wrapper->finished) + STARPU_PTHREAD_COND_WAIT(&wrapper->cond, &wrapper->lock); + STARPU_PTHREAD_MUTEX_UNLOCK(&wrapper->lock); +} + +static inline void _starpu_data_acquire_wrapper_fini(struct user_interaction_wrapper *wrapper) +{ + STARPU_PTHREAD_COND_DESTROY(&wrapper->cond); + STARPU_PTHREAD_MUTEX_DESTROY(&wrapper->lock); +} + +/* Called when the data acquisition is done, to launch the fetch into target memory */ +static inline void _starpu_data_acquire_launch_fetch(struct user_interaction_wrapper *wrapper, int async, void (*callback)(void *), void *callback_arg) +{ + int node = wrapper->node; + starpu_data_handle_t handle = wrapper->handle; + struct _starpu_data_replicate *replicate = node >= 0 ? &handle->per_node[node] : NULL; + + int ret = _starpu_fetch_data_on_node(handle, node, replicate, wrapper->mode, wrapper->detached, NULL, wrapper->prefetch, async, callback, callback_arg, wrapper->prio, "_starpu_data_acquire_launch_fetch"); + STARPU_ASSERT(!ret); +} + + + +/* + * Non Blocking data request from application + */ + + +/* Called when fetch is done, call the callback */ +static void _starpu_data_acquire_fetch_data_callback(void *arg) +{ + struct user_interaction_wrapper *wrapper = (struct user_interaction_wrapper *) arg; + starpu_data_handle_t handle = wrapper->handle; + + /* At that moment, the caller holds a reference to the piece of data. + * We enqueue the "post" sync task in the list associated to the handle + * so that it is submitted by the starpu_data_release + * function. */ + if (wrapper->post_sync_task) + _starpu_add_post_sync_tasks(wrapper->post_sync_task, handle); + + wrapper->callback(wrapper->callback_arg); + + _starpu_data_acquire_wrapper_fini(wrapper); + free(wrapper); +} + +/* Called when the data acquisition is done, launch the fetch into target memory */ +static void _starpu_data_acquire_continuation_non_blocking(void *arg) +{ + struct user_interaction_wrapper *wrapper = (struct user_interaction_wrapper *) arg; + + if (wrapper->callback_acquired) + /* This can change the node at will according to the current data situation */ + wrapper->callback_acquired(wrapper->callback_arg, &wrapper->node, wrapper->mode); + + _starpu_data_acquire_launch_fetch(arg, 1, _starpu_data_acquire_fetch_data_callback, arg); +} + +/* Called when the implicit data dependencies are done, launch the data acquisition */ +static void starpu_data_acquire_cb_pre_sync_callback(void *arg) +{ + struct user_interaction_wrapper *wrapper = (struct user_interaction_wrapper *) arg; + + /* + * we try to get the data, if we do not succeed immediately, + * we set a callback function that will be executed + * automatically when the data is available again, otherwise we + * fetch the data directly + */ + if (!_starpu_attempt_to_submit_data_request_from_apps(wrapper->handle, wrapper->mode, + _starpu_data_acquire_continuation_non_blocking, wrapper)) + { + /* no one has locked this data yet, so we proceed immediately */ + _starpu_data_acquire_continuation_non_blocking(wrapper); + } +} + +/* The data must be released by calling starpu_data_release later on */ +int starpu_data_acquire_on_node_cb_sequential_consistency_sync_jobids(starpu_data_handle_t handle, int node, + enum starpu_data_access_mode mode, + void (*callback_acquired)(void *arg, int *node, enum starpu_data_access_mode mode), + void (*callback)(void *arg), + void *arg, + int sequential_consistency, int quick, + long *pre_sync_jobid, long *post_sync_jobid, int prio) +{ + STARPU_ASSERT(handle); + STARPU_ASSERT_MSG(handle->nchildren == 0, "Acquiring a partitioned data (%p) is not possible", handle); + _STARPU_LOG_IN(); + + /* Check that previous tasks have set a value if needed */ + _starpu_data_check_initialized(handle, mode); + + struct user_interaction_wrapper *wrapper; + _STARPU_MALLOC(wrapper, sizeof(struct user_interaction_wrapper)); + + _starpu_data_acquire_wrapper_init(wrapper, handle, node, mode); + wrapper->async = 1; + + wrapper->callback_acquired = callback_acquired; + wrapper->callback = callback; + wrapper->callback_arg = arg; + wrapper->pre_sync_task = NULL; + wrapper->post_sync_task = NULL; + wrapper->prio = prio; + + STARPU_PTHREAD_MUTEX_LOCK(&handle->sequential_consistency_mutex); + int handle_sequential_consistency = handle->sequential_consistency; + if (handle_sequential_consistency && sequential_consistency) + { + struct starpu_task *new_task; + struct _starpu_job *pre_sync_job, *post_sync_job; + int submit_pre_sync = 0; + wrapper->pre_sync_task = starpu_task_create(); + wrapper->pre_sync_task->name = "_starpu_data_acquire_cb_pre"; + wrapper->pre_sync_task->detach = 1; + wrapper->pre_sync_task->callback_func = starpu_data_acquire_cb_pre_sync_callback; + wrapper->pre_sync_task->callback_arg = wrapper; + wrapper->pre_sync_task->type = STARPU_TASK_TYPE_DATA_ACQUIRE; + wrapper->pre_sync_task->priority = prio; + pre_sync_job = _starpu_get_job_associated_to_task(wrapper->pre_sync_task); + if (pre_sync_jobid) + *pre_sync_jobid = pre_sync_job->job_id; + + wrapper->post_sync_task = starpu_task_create(); + wrapper->post_sync_task->name = "_starpu_data_acquire_cb_release"; + wrapper->post_sync_task->detach = 1; + wrapper->post_sync_task->type = STARPU_TASK_TYPE_DATA_ACQUIRE; + wrapper->post_sync_task->priority = prio; + post_sync_job = _starpu_get_job_associated_to_task(wrapper->post_sync_task); + if (post_sync_jobid) + *post_sync_jobid = post_sync_job->job_id; + + if (quick) + pre_sync_job->quick_next = post_sync_job; + + new_task = _starpu_detect_implicit_data_deps_with_handle(wrapper->pre_sync_task, &submit_pre_sync, wrapper->post_sync_task, &_starpu_get_job_associated_to_task(wrapper->post_sync_task)->implicit_dep_slot, handle, mode, sequential_consistency); + STARPU_PTHREAD_MUTEX_UNLOCK(&handle->sequential_consistency_mutex); + + if (STARPU_UNLIKELY(new_task)) + { + int ret = _starpu_task_submit_internally(new_task); + STARPU_ASSERT(!ret); + } + + if (submit_pre_sync) + { + int ret = _starpu_task_submit_internally(wrapper->pre_sync_task); + STARPU_ASSERT(!ret); + } + else + { + wrapper->pre_sync_task->detach = 0; + starpu_task_destroy(wrapper->pre_sync_task); + starpu_data_acquire_cb_pre_sync_callback(wrapper); + } + } + else + { + if (pre_sync_jobid) + *pre_sync_jobid = -1; + if (post_sync_jobid) + *post_sync_jobid = -1; + STARPU_PTHREAD_MUTEX_UNLOCK(&handle->sequential_consistency_mutex); + + starpu_data_acquire_cb_pre_sync_callback(wrapper); + } + + _STARPU_LOG_OUT(); + return 0; +} + +static int starpu_data_acquire_on_node_cb_sequential_consistency_quick(starpu_data_handle_t handle, int node, + enum starpu_data_access_mode mode, void (*callback)(void *), void *arg, + int sequential_consistency, int quick) +{ + return starpu_data_acquire_on_node_cb_sequential_consistency_sync_jobids(handle, node, mode, NULL, callback, arg, sequential_consistency, quick, NULL, NULL, STARPU_DEFAULT_PRIO); +} + +int starpu_data_acquire_on_node_cb_sequential_consistency(starpu_data_handle_t handle, int node, + enum starpu_data_access_mode mode, void (*callback)(void *), void *arg, + int sequential_consistency) +{ + return starpu_data_acquire_on_node_cb_sequential_consistency_quick(handle, node, mode, callback, arg, sequential_consistency, 0); +} + +int starpu_data_acquire_on_node_cb(starpu_data_handle_t handle, int node, + enum starpu_data_access_mode mode, void (*callback)(void *), void *arg) +{ + return starpu_data_acquire_on_node_cb_sequential_consistency(handle, node, mode, callback, arg, 1); +} + +int starpu_data_acquire_cb(starpu_data_handle_t handle, + enum starpu_data_access_mode mode, void (*callback)(void *), void *arg) +{ + int home_node = handle->home_node; + if (home_node < 0) + home_node = STARPU_MAIN_RAM; + return starpu_data_acquire_on_node_cb(handle, home_node, mode, callback, arg); +} + +int starpu_data_acquire_cb_sequential_consistency(starpu_data_handle_t handle, + enum starpu_data_access_mode mode, void (*callback)(void *), void *arg, int sequential_consistency) +{ + int home_node = handle->home_node; + if (home_node < 0) + home_node = STARPU_MAIN_RAM; + return starpu_data_acquire_on_node_cb_sequential_consistency(handle, home_node, mode, callback, arg, sequential_consistency); +} + + +/* + * Blocking data request from application + */ + + + +static inline void _starpu_data_acquire_continuation(void *arg) +{ + struct user_interaction_wrapper *wrapper = (struct user_interaction_wrapper *) arg; + + starpu_data_handle_t handle = wrapper->handle; + STARPU_ASSERT(handle); + + _starpu_data_acquire_launch_fetch(wrapper, 0, NULL, NULL); + _starpu_data_acquire_wrapper_finished(wrapper); +} + +/* The data must be released by calling starpu_data_release later on */ +int starpu_data_acquire_on_node(starpu_data_handle_t handle, int node, enum starpu_data_access_mode mode) +{ + STARPU_ASSERT(handle); + STARPU_ASSERT_MSG(handle->nchildren == 0, "Acquiring a partitioned data is not possible"); + _STARPU_LOG_IN(); + + /* unless asynchronous, it is forbidden to call this function from a callback or a codelet */ + STARPU_ASSERT_MSG(_starpu_worker_may_perform_blocking_calls(), "Acquiring a data synchronously is not possible from a codelet or from a task callback, use starpu_data_acquire_cb instead."); + + /* Check that previous tasks have set a value if needed */ + _starpu_data_check_initialized(handle, mode); + + if (node >= 0 && _starpu_data_is_multiformat_handle(handle) && + _starpu_handle_needs_conversion_task(handle, node)) + { + struct starpu_task *task = _starpu_create_conversion_task(handle, node); + int ret; + _starpu_spin_lock(&handle->header_lock); + handle->refcnt--; + handle->busy_count--; + handle->mf_node = node; + _starpu_spin_unlock(&handle->header_lock); + task->synchronous = 1; + ret = _starpu_task_submit_internally(task); + STARPU_ASSERT(!ret); + } + + struct user_interaction_wrapper wrapper; + _starpu_data_acquire_wrapper_init(&wrapper, handle, node, mode); + +// _STARPU_DEBUG("TAKE sequential_consistency_mutex starpu_data_acquire\n"); + STARPU_PTHREAD_MUTEX_LOCK(&handle->sequential_consistency_mutex); + int sequential_consistency = handle->sequential_consistency; + if (sequential_consistency) + { + struct starpu_task *new_task; + int submit_pre_sync = 0; + wrapper.pre_sync_task = starpu_task_create(); + wrapper.pre_sync_task->name = "_starpu_data_acquire_pre"; + wrapper.pre_sync_task->detach = 0; + wrapper.pre_sync_task->type = STARPU_TASK_TYPE_DATA_ACQUIRE; + + wrapper.post_sync_task = starpu_task_create(); + wrapper.post_sync_task->name = "_starpu_data_acquire_post"; + wrapper.post_sync_task->detach = 1; + wrapper.post_sync_task->type = STARPU_TASK_TYPE_DATA_ACQUIRE; + + new_task = _starpu_detect_implicit_data_deps_with_handle(wrapper.pre_sync_task, &submit_pre_sync, wrapper.post_sync_task, &_starpu_get_job_associated_to_task(wrapper.post_sync_task)->implicit_dep_slot, handle, mode, sequential_consistency); + STARPU_PTHREAD_MUTEX_UNLOCK(&handle->sequential_consistency_mutex); + + if (STARPU_UNLIKELY(new_task)) + { + int ret = _starpu_task_submit_internally(new_task); + STARPU_ASSERT(!ret); + } + + if (submit_pre_sync) + { + wrapper.pre_sync_task->synchronous = 1; + int ret = _starpu_task_submit_internally(wrapper.pre_sync_task); + STARPU_ASSERT(!ret); + } + else + { + wrapper.pre_sync_task->detach = 0; + starpu_task_destroy(wrapper.pre_sync_task); + } + } + else + { + STARPU_PTHREAD_MUTEX_UNLOCK(&handle->sequential_consistency_mutex); + } + + /* + * we try to get the data, if we do not succeed immediately, + * we set a callback function that will be executed + * automatically when the data is available again, otherwise we + * fetch the data directly + */ + if (!_starpu_attempt_to_submit_data_request_from_apps(handle, mode, _starpu_data_acquire_continuation, &wrapper)) + { + /* no one has locked this data yet, so we proceed immediately */ + _starpu_data_acquire_launch_fetch(&wrapper, 0, NULL, NULL); + } + else + { + _starpu_data_acquire_wrapper_wait(&wrapper); + } + _starpu_data_acquire_wrapper_fini(&wrapper); + + /* At that moment, the caller holds a reference to the piece of data. + * We enqueue the "post" sync task in the list associated to the handle + * so that it is submitted by the starpu_data_release + * function. */ + if (sequential_consistency) + _starpu_add_post_sync_tasks(wrapper.post_sync_task, handle); + + _STARPU_LOG_OUT(); + return 0; +} + +int starpu_data_acquire(starpu_data_handle_t handle, enum starpu_data_access_mode mode) +{ + int home_node = handle->home_node; + if (home_node < 0) + home_node = STARPU_MAIN_RAM; + return starpu_data_acquire_on_node(handle, home_node, mode); +} + +int starpu_data_acquire_on_node_try(starpu_data_handle_t handle, int node, enum starpu_data_access_mode mode) +{ + STARPU_ASSERT(handle); + STARPU_ASSERT_MSG(handle->nchildren == 0, "Acquiring a partitioned data is not possible"); + /* it is forbidden to call this function from a callback or a codelet */ + STARPU_ASSERT_MSG(_starpu_worker_may_perform_blocking_calls(), "Acquiring a data synchronously is not possible from a codelet or from a task callback, use starpu_data_acquire_cb instead."); + + /* Check that previous tasks have set a value if needed */ + _starpu_data_check_initialized(handle, mode); + + int ret; + STARPU_ASSERT_MSG(!_starpu_data_is_multiformat_handle(handle), "not supported yet"); + STARPU_PTHREAD_MUTEX_LOCK(&handle->sequential_consistency_mutex); + ret = _starpu_test_implicit_data_deps_with_handle(handle, mode); + STARPU_PTHREAD_MUTEX_UNLOCK(&handle->sequential_consistency_mutex); + if (ret) + return ret; + + struct user_interaction_wrapper wrapper; + _starpu_data_acquire_wrapper_init(&wrapper, handle, node, mode); + + /* + * we try to get the data, if we do not succeed immediately, + * we set a callback function that will be executed + * automatically when the data is available again, otherwise we + * fetch the data directly + */ + if (!_starpu_attempt_to_submit_data_request_from_apps(handle, mode, _starpu_data_acquire_continuation, &wrapper)) + { + /* no one has locked this data yet, so we proceed immediately */ + _starpu_data_acquire_launch_fetch(&wrapper, 0, NULL, NULL); + } + else + { + _starpu_data_acquire_wrapper_wait(&wrapper); + } + _starpu_data_acquire_wrapper_fini(&wrapper); + + return 0; +} + +int starpu_data_acquire_try(starpu_data_handle_t handle, enum starpu_data_access_mode mode) +{ + return starpu_data_acquire_on_node_try(handle, STARPU_MAIN_RAM, mode); +} + +/* This function must be called after starpu_data_acquire so that the + * application release the data */ +void starpu_data_release_to_on_node(starpu_data_handle_t handle, enum starpu_data_access_mode mode, int node) +{ + STARPU_ASSERT(handle); + + if (mode == STARPU_RW) + /* They are equivalent here, and current_mode is never STARPU_RW */ + mode = STARPU_W; + + STARPU_ASSERT_MSG(mode == STARPU_NONE || + mode == handle->current_mode || + (mode == STARPU_R && + handle->current_mode == STARPU_W), + "We only support releasing from W to R"); + + /* In case there are some implicit dependencies, unlock the "post sync" tasks */ + _starpu_unlock_post_sync_tasks(handle, mode); + + /* The application can now release the rw-lock */ + if (node >= 0) + _starpu_release_data_on_node(handle, 0, mode, &handle->per_node[node]); + else + { + _starpu_spin_lock(&handle->header_lock); + if (node == STARPU_ACQUIRE_NO_NODE_LOCK_ALL) + { + int i; + for (i = 0; i < STARPU_MAXNODES; i++) + handle->per_node[i].refcnt--; + } + handle->busy_count--; + if (!_starpu_notify_data_dependencies(handle, mode)) + _starpu_spin_unlock(&handle->header_lock); + } +} + +void starpu_data_release_on_node(starpu_data_handle_t handle, int node) +{ + starpu_data_release_to_on_node(handle, STARPU_NONE, node); +} + +void starpu_data_release_to(starpu_data_handle_t handle, enum starpu_data_access_mode mode) +{ + int home_node = handle->home_node; + if (home_node < 0) + home_node = STARPU_MAIN_RAM; + starpu_data_release_to_on_node(handle, mode, home_node); +} + +void starpu_data_release(starpu_data_handle_t handle) +{ + starpu_data_release_to(handle, STARPU_NONE); +} + +static void _prefetch_data_on_node(void *arg) +{ + struct user_interaction_wrapper *wrapper = (struct user_interaction_wrapper *) arg; + starpu_data_handle_t handle = wrapper->handle; + + _starpu_data_acquire_launch_fetch(wrapper, wrapper->async, NULL, NULL); + + if (wrapper->async) + free(wrapper); + else + _starpu_data_acquire_wrapper_finished(wrapper); + + _starpu_spin_lock(&handle->header_lock); + if (!_starpu_notify_data_dependencies(handle, STARPU_NONE)) + _starpu_spin_unlock(&handle->header_lock); +} + +/* Prefetch data. This is the execution-time part */ +static +int __starpu_prefetch_data_on_node_with_mode(starpu_data_handle_t handle, unsigned node, unsigned async, enum starpu_data_access_mode mode, enum starpu_is_prefetch prefetch, int prio) +{ + STARPU_ASSERT(handle); + + /* it is forbidden to call this function from a callback or a codelet */ + STARPU_ASSERT_MSG(async || _starpu_worker_may_perform_blocking_calls(), "Synchronous prefetch is not possible from a task or a callback"); + + struct user_interaction_wrapper *wrapper; + _STARPU_MALLOC(wrapper, sizeof(*wrapper)); + + _starpu_data_acquire_wrapper_init(wrapper, handle, node, STARPU_R); + + wrapper->detached = async; + wrapper->prefetch = prefetch; + wrapper->async = async; + wrapper->prio = prio; + + if (!_starpu_attempt_to_submit_data_request_from_apps(handle, mode, _prefetch_data_on_node, wrapper)) + { + /* we can immediately proceed */ + struct _starpu_data_replicate *replicate = &handle->per_node[node]; + _starpu_data_acquire_launch_fetch(wrapper, async, NULL, NULL); + + _starpu_data_acquire_wrapper_fini(wrapper); + free(wrapper); + + /* remove the "lock"/reference */ + + _starpu_spin_lock(&handle->header_lock); + + if (!async) + { + /* Release our refcnt, like _starpu_release_data_on_node would do */ + replicate->refcnt--; + STARPU_ASSERT(replicate->refcnt >= 0); + STARPU_ASSERT(handle->busy_count > 0); + handle->busy_count--; + } + + /* In case there was a temporary handle (eg. used for reduction), this + * handle may have requested to be destroyed when the data is released + * */ + if (!_starpu_notify_data_dependencies(handle, STARPU_NONE)) + _starpu_spin_unlock(&handle->header_lock); + } + else if (!async) + { + _starpu_data_acquire_wrapper_wait(wrapper); + _starpu_data_acquire_wrapper_fini(wrapper); + free(wrapper); + } + + return 0; +} + +/* Prefetch data. This is the submission-time part */ +static +int _starpu_prefetch_data_on_node_with_mode(starpu_data_handle_t handle, unsigned node, unsigned async, enum starpu_data_access_mode mode, enum starpu_is_prefetch prefetch, int prio) +{ + /* Check that previous tasks have set a value if needed */ + /* Only valid at submission time, not execution time */ + _starpu_data_check_initialized(handle, mode); + + return __starpu_prefetch_data_on_node_with_mode(handle, node, async, mode, prefetch, prio); +} + +int starpu_data_fetch_on_node(starpu_data_handle_t handle, unsigned node, unsigned async) +{ + return _starpu_prefetch_data_on_node_with_mode(handle, node, async, STARPU_R, STARPU_FETCH, STARPU_DEFAULT_PRIO); +} + +int starpu_data_prefetch_on_node_prio(starpu_data_handle_t handle, unsigned node, unsigned async, int prio) +{ + return _starpu_prefetch_data_on_node_with_mode(handle, node, async, STARPU_R, STARPU_PREFETCH, prio); +} + +int starpu_data_prefetch_on_node(starpu_data_handle_t handle, unsigned node, unsigned async) +{ + return starpu_data_prefetch_on_node_prio(handle, node, async, STARPU_DEFAULT_PRIO); +} + +int starpu_data_idle_prefetch_on_node_prio(starpu_data_handle_t handle, unsigned node, unsigned async, int prio) +{ + return _starpu_prefetch_data_on_node_with_mode(handle, node, async, STARPU_R, STARPU_IDLEFETCH, prio); +} + +int starpu_data_idle_prefetch_on_node(starpu_data_handle_t handle, unsigned node, unsigned async) +{ + return starpu_data_idle_prefetch_on_node_prio(handle, node, async, STARPU_DEFAULT_PRIO); +} + +/* Execution-time part */ +static void _starpu_data_wont_use(void *data) +{ + unsigned node; + starpu_data_handle_t handle = data; + + _STARPU_TRACE_DATA_DOING_WONT_USE(handle); + + _starpu_spin_lock(&handle->header_lock); + for (node = 0; node < STARPU_MAXNODES; node++) + { + struct _starpu_data_replicate *local = &handle->per_node[node]; + if (local->allocated && local->automatically_allocated) + _starpu_memchunk_wont_use(local->mc, node); + } + if (handle->per_worker) + { + unsigned nworkers = starpu_worker_get_count(); + unsigned worker; + for (worker = 0; worker < nworkers; worker++) + { + struct _starpu_data_replicate *local = &handle->per_worker[worker]; + if (local->allocated && local->automatically_allocated) + _starpu_memchunk_wont_use(local->mc, starpu_worker_get_memory_node(worker)); + } + } + _starpu_spin_unlock(&handle->header_lock); + starpu_data_release_on_node(handle, STARPU_ACQUIRE_NO_NODE_LOCK_ALL); + if (handle->home_node != -1) + __starpu_prefetch_data_on_node_with_mode(handle, handle->home_node, 1, STARPU_R, STARPU_IDLEFETCH, STARPU_DEFAULT_PRIO); + else + { + if (handle->ooc) + { + /* Try to push it to some disk */ + unsigned i; + unsigned nnodes = starpu_memory_nodes_get_count(); + for (i = 0; i < nnodes; i++) + { + if (starpu_node_get_kind(i) == STARPU_DISK_RAM) + __starpu_prefetch_data_on_node_with_mode(handle, i, 1, STARPU_R, STARPU_IDLEFETCH, STARPU_DEFAULT_PRIO); + } + } + } +} + +void starpu_data_wont_use(starpu_data_handle_t handle) +{ + if (!handle->initialized) + /* No value atm actually */ + return; + + if (starpu_data_get_nb_children(handle) != 0) + { + int i; + for(i=0 ; ipartitioned != 0) + { + unsigned i; + for(i=0 ; ipartitioned; i++) + { + unsigned j; + for(j=0 ; jactive_readonly_nchildren[i] ; j++) + starpu_data_wont_use(handle->active_readonly_children[i][j]); + } + } + + if (handle->active_nchildren != 0) + { + unsigned j; + for(j=0 ; jactive_nchildren ; j++) + starpu_data_wont_use(handle->active_children[j]); + return; + } + + _STARPU_TRACE_DATA_WONT_USE(handle); + starpu_data_acquire_on_node_cb_sequential_consistency_quick(handle, STARPU_ACQUIRE_NO_NODE_LOCK_ALL, STARPU_R, _starpu_data_wont_use, handle, 1, 1); +} + +/* + * It is possible to specify that a piece of data can be discarded without + * impacting the application. + */ +int _starpu_has_not_important_data; +void starpu_data_advise_as_important(starpu_data_handle_t handle, unsigned is_important) +{ + if (!is_important) + _starpu_has_not_important_data = 1; + + _starpu_spin_lock(&handle->header_lock); + + /* first take all the children lock (in order !) */ + unsigned child; + for (child = 0; child < handle->nchildren; child++) + { + /* make sure the intermediate children is advised as well */ + starpu_data_handle_t child_handle = starpu_data_get_child(handle, child); + if (child_handle->nchildren > 0) + starpu_data_advise_as_important(child_handle, is_important); + } + + handle->is_not_important = !is_important; + + /* now the parent may be used again so we release the lock */ + _starpu_spin_unlock(&handle->header_lock); + +} + +void starpu_data_set_sequential_consistency_flag(starpu_data_handle_t handle, unsigned flag) +{ + _starpu_spin_lock(&handle->header_lock); + + unsigned child; + for (child = 0; child < handle->nchildren; child++) + { + /* make sure that the flags are applied to the children as well */ + starpu_data_handle_t child_handle = starpu_data_get_child(handle, child); + if (child_handle->nchildren > 0) + starpu_data_set_sequential_consistency_flag(child_handle, flag); + } + + STARPU_PTHREAD_MUTEX_LOCK(&handle->sequential_consistency_mutex); + handle->sequential_consistency = flag; + STARPU_PTHREAD_MUTEX_UNLOCK(&handle->sequential_consistency_mutex); + + _starpu_spin_unlock(&handle->header_lock); +} + +unsigned starpu_data_get_sequential_consistency_flag(starpu_data_handle_t handle) +{ + return handle->sequential_consistency; +} + +void starpu_data_set_ooc_flag(starpu_data_handle_t handle, unsigned flag) +{ + handle->ooc = flag; +} + +unsigned starpu_data_get_ooc_flag(starpu_data_handle_t handle) +{ + return handle->ooc; +} + +/* By default, sequential consistency is enabled */ +static unsigned default_sequential_consistency_flag = 1; + +unsigned starpu_data_get_default_sequential_consistency_flag(void) +{ + return default_sequential_consistency_flag; +} + +void starpu_data_set_default_sequential_consistency_flag(unsigned flag) +{ + default_sequential_consistency_flag = flag; +} + +/* Query the status of the handle on the specified memory node. */ +void starpu_data_query_status2(starpu_data_handle_t handle, int memory_node, int *is_allocated, int *is_valid, int *is_loading, int *is_requested) +{ +// XXX : this is just a hint, so we don't take the lock ... +// _starpu_spin_lock(&handle->header_lock); + + if (is_allocated) + *is_allocated = handle->per_node[memory_node].allocated || handle->per_node[memory_node].mapped != STARPU_UNMAPPED; + + if (is_valid) + *is_valid = (handle->per_node[memory_node].state != STARPU_INVALID); + + if (is_loading) + *is_loading = handle->per_node[memory_node].load_request != NULL; + + if (is_requested) + { + int requested = 0; + + unsigned node; + for (node = 0; node < STARPU_MAXNODES; node++) + { + if (handle->per_node[memory_node].request[node]) + { + requested = 1; + break; + } + } + + *is_requested = requested; + } + +// _starpu_spin_unlock(&handle->header_lock); +} + +void starpu_data_query_status(starpu_data_handle_t handle, int memory_node, int *is_allocated, int *is_valid, int *is_requested) +{ + return starpu_data_query_status2(handle, memory_node, is_allocated, is_valid, NULL, is_requested); +} diff --git a/src/datawizard/write_back.c b/src/datawizard/write_back.c new file mode 100644 index 0000000..14fcb10 --- /dev/null +++ b/src/datawizard/write_back.c @@ -0,0 +1,93 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2021-2021 Federal University of Rio Grande do Sul (UFRGS) + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include + +static void wt_callback(void *arg) +{ + starpu_data_handle_t handle = (starpu_data_handle_t) arg; + + _starpu_spin_lock(&handle->header_lock); + if (!_starpu_notify_data_dependencies(handle, STARPU_NONE)) + _starpu_spin_unlock(&handle->header_lock); +} + +void _starpu_write_through_data(starpu_data_handle_t handle, unsigned requesting_node, + uint32_t write_through_mask) +{ + if ((write_through_mask & ~(1<header_lock)) + { + cpt++; + __starpu_datawizard_progress(_STARPU_DATAWIZARD_DO_ALLOC, 1); + } + if (cpt == STARPU_SPIN_MAXTRY) + _starpu_spin_lock(&handle->header_lock); + + /* We need to keep a Read lock to avoid letting writers corrupt our copy. */ + STARPU_ASSERT(handle->current_mode != STARPU_REDUX); + STARPU_ASSERT(handle->current_mode != STARPU_SCRATCH); + handle->refcnt++; + handle->busy_count++; + handle->current_mode = STARPU_R; + + struct _starpu_data_request *r; + r = _starpu_create_request_to_fetch_data(handle, &handle->per_node[node], + STARPU_R, NULL, STARPU_IDLEFETCH, 1, wt_callback, handle, 0, "_starpu_write_through_data"); + + /* If no request was created, the handle was already up-to-date on the + * node */ + if (r) + _starpu_spin_unlock(&handle->header_lock); + } + } + } +} + +void starpu_data_set_wt_mask(starpu_data_handle_t handle, uint32_t wt_mask) +{ + handle->wt_mask = wt_mask; + + /* in case the data has some children, set their wt_mask as well */ + if (handle->nchildren > 0) + { + unsigned child; + for (child = 0; child < handle->nchildren; child++) + { + starpu_data_handle_t handle_child = starpu_data_get_child(handle, child); + starpu_data_set_wt_mask(handle_child, wt_mask); + } + } +} diff --git a/src/datawizard/write_back.h b/src/datawizard/write_back.h new file mode 100644 index 0000000..f3ff60c --- /dev/null +++ b/src/datawizard/write_back.h @@ -0,0 +1,35 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __DW_WRITE_BACK_H__ +#define __DW_WRITE_BACK_H__ + +/** @file */ + +#include +#include + +#pragma GCC visibility push(hidden) + +/** If a write-through mask is associated to that data handle, this propagates + * the the current value of the data onto the different memory nodes in the + * write_through_mask. */ +void _starpu_write_through_data(starpu_data_handle_t handle, unsigned requesting_node, + uint32_t write_through_mask); + +#pragma GCC visibility pop + +#endif // __DW_WRITE_BACK_H__ diff --git a/src/debug/latency.c b/src/debug/latency.c new file mode 100644 index 0000000..2c2eed4 --- /dev/null +++ b/src/debug/latency.c @@ -0,0 +1,51 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include + +void _starpu_benchmark_ping_pong(starpu_data_handle_t handle, + unsigned node0, unsigned node1, unsigned niter) +{ + /* We assume that no one is using that handle !! */ + unsigned iter; + for (iter = 0; iter < niter; iter++) + { + int ret; + + _starpu_spin_lock(&handle->header_lock); + handle->refcnt++; + handle->busy_count++; + _starpu_spin_unlock(&handle->header_lock); + + struct _starpu_data_replicate *replicate_0 = &handle->per_node[node0]; + ret = _starpu_fetch_data_on_node(handle, node0, replicate_0, STARPU_RW, 0, NULL, STARPU_FETCH, 0, NULL, NULL, 0, "_starpu_benchmark_ping_pong"); + STARPU_ASSERT(!ret); + _starpu_release_data_on_node(handle, 0, STARPU_NONE, replicate_0); + + _starpu_spin_lock(&handle->header_lock); + handle->refcnt++; + handle->busy_count++; + _starpu_spin_unlock(&handle->header_lock); + + struct _starpu_data_replicate *replicate_1 = &handle->per_node[node1]; + ret = _starpu_fetch_data_on_node(handle, node1, replicate_1, STARPU_RW, 0, NULL, STARPU_FETCH, 0, NULL, NULL, 0, "_starpu_benchmark_ping_pong"); + STARPU_ASSERT(!ret); + _starpu_release_data_on_node(handle, 0, STARPU_NONE, replicate_1); + } +} diff --git a/src/debug/starpu_debug_helpers.h b/src/debug/starpu_debug_helpers.h new file mode 100644 index 0000000..323d196 --- /dev/null +++ b/src/debug/starpu_debug_helpers.h @@ -0,0 +1,45 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __STARPU_DEBUG_HELPERS_H__ +#define __STARPU_DEBUG_HELPERS_H__ + +/** @file */ + +#include +#include +#include + +#pragma GCC visibility push(hidden) + +#ifdef __cplusplus +extern "C" +{ +#endif + +/** Perform a ping pong between the two memory nodes */ +void _starpu_benchmark_ping_pong(starpu_data_handle_t handle, unsigned node0, unsigned node1, unsigned niter) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; + +/** Display the size of different data structures */ +void _starpu_debug_display_structures_size(FILE *stream) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; + +#ifdef __cplusplus +} +#endif + +#pragma GCC visibility pop + +#endif // __STARPU_DEBUG_HELPERS_H__ diff --git a/src/debug/structures_size.c b/src/debug/structures_size.c new file mode 100644 index 0000000..5a30a8c --- /dev/null +++ b/src/debug/structures_size.c @@ -0,0 +1,38 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include +#include + +void _starpu_debug_display_structures_size(FILE *stream) +{ + fprintf(stream, "struct starpu_task\t\t%u bytes\t(%x)\n", + (unsigned) sizeof(struct starpu_task), (unsigned) sizeof(struct starpu_task)); + fprintf(stream, "struct _starpu_job\t\t%u bytes\t(%x)\n", + (unsigned) sizeof(struct _starpu_job), (unsigned) sizeof(struct _starpu_job)); + fprintf(stream, "struct _starpu_data_state\t%u bytes\t(%x)\n", + (unsigned) sizeof(struct _starpu_data_state), (unsigned) sizeof(struct _starpu_data_state)); + fprintf(stream, "struct _starpu_tag\t\t%u bytes\t(%x)\n", + (unsigned) sizeof(struct _starpu_tag), (unsigned) sizeof(struct _starpu_tag)); + fprintf(stream, "struct _starpu_cg\t\t%u bytes\t(%x)\n", + (unsigned) sizeof(struct _starpu_cg), (unsigned) sizeof(struct _starpu_cg)); + fprintf(stream, "struct _starpu_worker\t\t%u bytes\t(%x)\n", + (unsigned) sizeof(struct _starpu_worker), (unsigned) sizeof(struct _starpu_worker)); +} diff --git a/src/debug/traces/anim.c b/src/debug/traces/anim.c new file mode 100644 index 0000000..52c3eb7 --- /dev/null +++ b/src/debug/traces/anim.c @@ -0,0 +1,535 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2015-2015 Anthony Simonet + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include "starpu_fxt.h" + +#ifdef STARPU_USE_FXT +static struct component +{ + UT_hash_handle hh; + char *name; + int workerid; + uint64_t ptr; + unsigned nchildren; + struct component **children; + struct component *parent; + unsigned ntasks; + unsigned npriotasks; +} *components; + +static unsigned global_state = 1; +static unsigned nsubmitted; +static unsigned curq_size; +static unsigned nflowing; + +#define COMPONENT_ADD(head, field, add) HASH_ADD(hh, head, field, sizeof(uint64_t), add); +#define COMPONENT_FIND(head, find, out) HASH_FIND(hh, head, &find, sizeof(uint64_t), out); + +static struct component *fxt_component_root(void) +{ + struct component *comp=NULL, *tmp=NULL; + HASH_ITER(hh, components, comp, tmp) + { + while (comp->parent) + comp = comp->parent; + return comp; + } + return NULL; +} + +void _starpu_fxt_component_new(uint64_t component, char *name) +{ + struct component *comp; + _STARPU_MALLOC(comp, sizeof(*comp)); + + if (!strncmp(name, "worker ", 7)) + { + comp->name = strdup("worker"); + comp->workerid = atoi(name+7); + } + else + { + comp->name = strdup(name); + comp->workerid = -1; + } + comp->ptr = component; + comp->nchildren = 0; + comp->children = NULL; + comp->parent = NULL; + comp->ntasks = 0; + comp->npriotasks = 0; + + COMPONENT_ADD(components, ptr, comp); +} + +void _starpu_fxt_component_deinit(void) +{ + struct component *comp, *tmp; + HASH_ITER(hh, components, comp, tmp) + { + HASH_DEL(components, comp); + free(comp->children); + free(comp->name); + free(comp); + } +} + +static void fxt_component_dump(FILE *file, struct component *comp, unsigned depth) +{ + unsigned i; + fprintf(file,"%*s%s (%d %"PRIx64", %d tasks %d prio tasks)\n", 2*depth, "", comp->name, depth, comp->ptr, comp->ntasks, comp->npriotasks); + for (i = 0; i < comp->nchildren; i++) + if (comp->children[i]->parent == comp) + fxt_component_dump(file, comp->children[i], depth+1); +} + +void _starpu_fxt_component_dump(FILE *file) +{ + fxt_component_dump(file, fxt_component_root(), 0); +} + +static void fxt_worker_print(FILE *file, struct starpu_fxt_options *options, int workerid, unsigned comp_workerid, unsigned depth) +{ + fprintf(file, "\t\t\t%*s\n", 2*depth, ""); + fprintf(file, "\t\t\t%*s
    %s\n", 2*depth, "", + (int) comp_workerid == workerid ? "_sched":"", + options->worker_names[comp_workerid]); + if (_starpu_last_codelet_symbol[comp_workerid][0]) + fprintf(file, "\t\t\t%*s
    %s
    \n", 2*(depth+1), "", _starpu_last_codelet_symbol[comp_workerid]); + else + fprintf(file, "\t\t\t%*s
    \n", 2*(depth+1), ""); + fprintf(file, "\t\t\t%*s
    ", 2*depth, ""); +} + +static void fxt_component_print(FILE *file, struct starpu_fxt_options *options, int workerid, struct component *from, struct component *to, struct component *comp, unsigned depth) +{ + unsigned i, n; + unsigned ntasks = comp->ntasks + comp->npriotasks; + + if (from == comp) + /* Additionally show now-empty slot */ + ntasks++; + + for (i = 0, n = 0; i < comp->nchildren; i++) + if (comp->children[i]->parent == comp) + n++; + fprintf(file, "\t\t\t%*s\n", 2*depth, ""); + + if (comp->nchildren > 0) + { + fprintf(file, "\t\t\t%*s\n", 2*depth, ""); + for (i = 0; i < comp->nchildren; i++) + if (comp->children[i]->parent == comp) + { + fprintf(file, "\t\t\t%*s\n", 2*depth, ""); + } + fprintf(file, "\t\t\t%*s\n", 2*depth, ""); + } + + if (!strcmp(comp->name, "worker")) + { + fprintf(file, "\t\t\t%*s\n", 2*depth, ""); + fprintf(file, "\t\t\t%*s\n", 2*depth, ""); + fprintf(file, "\t\t\t%*s\n", 2*depth, ""); + } + + fprintf(file, "\t\t\t%*s
    %s\n", 2*depth, "", n, comp->name); + + if (!strcmp(comp->name,"prio") || !strcmp(comp->name,"fifo") || !strcmp(comp->name,"heft") || !strcmp(comp->name,"work_stealing")) + { + /* Show task queue */ +#define N 3 + n = ntasks; + if (n > N) + n = N; + for (i = 0; i < N-n; i++) + fprintf(file, "\t\t\t%*s
    \n", 2*depth, ""); + if (ntasks) + { + if (ntasks > N) + fprintf(file, "\t\t\t%*s
    %u
    \n", 2*depth, "", + from == comp + ? (comp->npriotasks >= N ? "last_task_full_prio" : "last_task_full") + : (comp->npriotasks >= N ? "task_prio" : "task"), + comp->ntasks + comp->npriotasks); + else + fprintf(file, "\t\t\t%*s
    \n", 2*depth, "", + from == comp + ? "last_task_empty" + : (comp->ntasks ? "task" : "task_prio")); + for (i = 1; i < n; i++) + fprintf(file, "\t\t\t%*s
    \n", 2*depth, "", + n - i > comp->npriotasks ? "task" : "task_prio"); + } + } + else + { + if (ntasks == 0) + fprintf(file, "\t\t\t%*s
    \n", 2*depth, ""); + else if (ntasks == 1) + fprintf(file, "\t\t\t%*s
    \n", 2*depth, "", + from == comp + ? "last_task_empty" + : (comp->npriotasks ? "task_prio" : "task")); + else + fprintf(file, "\t\t\t%*s
    %u
    \n", 2*depth, "", + from == comp + ? (comp->npriotasks ? "last_task_full_prio" : "last_task_full") + : (comp->npriotasks ? "task_prio" : "task"), comp->ntasks + comp->npriotasks); + } + fprintf(file, "\t\t\t%*s
    \n", 2*depth, ""); + fxt_component_print(file, options, workerid, from, to, comp->children[i], depth+1); + fprintf(file, "\t\t\t%*s
    \n", 2*depth, ""); + fxt_worker_print(file, options, workerid, comp->workerid, depth+1); + fprintf(file, "\t\t\t%*s
    ", 2*depth, ""); +} + +void _starpu_fxt_component_print(FILE *file, struct starpu_fxt_options *options, int workerid, struct component *from, struct component *to) +{ + fprintf(file, "
    \n"); + fxt_component_print(file, options, workerid, from, to, fxt_component_root(), 0); + fprintf(file, "
    \n"); +} + +void _starpu_fxt_component_print_header(FILE *file) +{ + /* CSS and Javascript code from Anthony Simonet */ + fprintf(file, "\n"); + fprintf(file, "\n"); + + fprintf(file, "\t\n"); + fprintf(file, "\t\t\n"); + fprintf(file, "\t\t\n"); + fprintf(file, "\t\t\n"); + fprintf(file, "\t\t\n"); + //fprintf(file, "\t\t\n"); + //fprintf(file, "\t\t\n"); + + fprintf(file, "\t\t\n"); + + fprintf(file, "\t\t\n"); + + fprintf(file, "\t\t\n"); + + fprintf(file, "\t\n"); + + fprintf(file, "\t\n"); +} + +static void fxt_component_print_step(FILE *file, struct starpu_fxt_options *options, double timestamp, int workerid, unsigned push, struct component *from, struct component *to) +{ + fprintf(file, "\t\t
    \n", + global_state, global_state > 1 ? "none":"block", global_state); + fprintf(file, "\t\t

    Time %f, %u submitted %u ready, %s

    \n", timestamp, nsubmitted, curq_size-nflowing, push?"push":"pull"); + //fprintf(file, "\t\t\t
    \n");
    +	//_starpu_fxt_component_dump(file);
    +	//fprintf(file, "\t\t\t
    \n"); + _starpu_fxt_component_print(file, options, workerid, from, to); + fprintf(file,"\t\t
    "); + + global_state++; +} + +void _starpu_fxt_component_connect(uint64_t parent, uint64_t child) +{ + struct component *parent_p, *child_p; + unsigned n; + + COMPONENT_FIND(components, parent, parent_p); + COMPONENT_FIND(components, child, child_p); + STARPU_ASSERT(parent_p); + STARPU_ASSERT(child_p); + + n = ++parent_p->nchildren; + _STARPU_REALLOC(parent_p->children, n * sizeof(*parent_p->children)); + parent_p->children[n-1] = child_p; + if (!child_p->parent) + child_p->parent = parent_p; +} + +void _starpu_fxt_component_update_ntasks(unsigned _nsubmitted, unsigned _curq_size) +{ + nsubmitted = _nsubmitted; + curq_size = _curq_size; +} + +void _starpu_fxt_component_push(FILE *output, struct starpu_fxt_options *options, double timestamp, int workerid, uint64_t from, uint64_t to, uint64_t task STARPU_ATTRIBUTE_UNUSED, unsigned prio) +{ + struct component *from_p = NULL, *to_p = NULL; + + if (to == from) + return; + + if (from) + { + COMPONENT_FIND(components, from, from_p); + STARPU_ASSERT(from_p); + } + if (to) + { + COMPONENT_FIND(components, to, to_p); + STARPU_ASSERT(to_p); + } + if (from_p) + { + if (prio) + from_p->npriotasks--; + else + from_p->ntasks--; + } + else + nflowing++; + if (to_p) + { + if (prio) + to_p->npriotasks++; + else + to_p->ntasks++; + } + + // fprintf(stderr,"push from %s to %s\n", from_p?from_p->name:"none", to_p?to_p->name:"none"); + fxt_component_print_step(output, options, timestamp, workerid, 1, from_p, to_p); +} + +void _starpu_fxt_component_pull(FILE *output, struct starpu_fxt_options *options, double timestamp, int workerid, uint64_t from, uint64_t to, uint64_t task STARPU_ATTRIBUTE_UNUSED, unsigned prio) +{ + struct component *from_p = NULL, *to_p = NULL; + + if (to == from) + return; + + if (from) + { + COMPONENT_FIND(components, from, from_p); + STARPU_ASSERT(from_p); + } + if (to) + { + COMPONENT_FIND(components, to, to_p); + STARPU_ASSERT(to_p); + } + if (from_p) + { + if (prio) + from_p->npriotasks--; + else + from_p->ntasks--; + } + if (to_p) + { + if (prio) + to_p->npriotasks++; + else + to_p->ntasks++; + } + else + nflowing--; + + // fprintf(stderr,"pull from %s to %s\n", from_p?from_p->name:"none", to_p?to_p->name:"none"); + fxt_component_print_step(output, options, timestamp, workerid, 0, from_p, to_p); +} + +void _starpu_fxt_component_finish(FILE *file) +{ + /* Javascript code from Anthony Simonet */ + fprintf(file, "\t\t\n"); + + fprintf(file, "\t\t
    \n"); + fprintf(file, "\t\t
    \n"); + fprintf(file, "\t\t\t

    \n"); + fprintf(file, "\t\t\t\t\n"); + fprintf(file, "\t\t\t\t\n"); + fprintf(file, "\t\t\t\t\n"); + fprintf(file, "\t\t\t\t\n"); + fprintf(file, "\t\t\t\t\n"); + fprintf(file, "\t\t\t\t\n"); + fprintf(file, "\t\t\t\t\n"); + fprintf(file, "\t\t\t\t\n"); + fprintf(file, "\t\t\t\t\n"); + fprintf(file, "\t\t\t\t\n"); + fprintf(file, "\t\t\t

    \n"); + fprintf(file, "\t\t\t\tAuto speed (state/s): 4\n"); + fprintf(file, "\t\t\t\t\n"); + fprintf(file, "\t\t\t\t\n"); + fprintf(file, "\t\t\t\t\n"); + fprintf(file, "\t\t\t\t\n"); + fprintf(file, "\t\t\t

    \n"); + fprintf(file, "\t\t\t

    \n"); + fprintf(file, "\t\t\t
    \n"); + fprintf(file, "\t\t\t\tGo to state\n"); + fprintf(file, "\t\t\t\t\n"); + fprintf(file, "\t\t\t\t\n"); /* Dummy input preventing the page from being refreshed when enter is pressed. */ + fprintf(file, "\t\t\t\t\n"); + fprintf(file, "\t\t\t
    \n"); + fprintf(file, "\t\t\t
    \n"); + fprintf(file, "\t\t
    \n"); + fprintf(file, "\t\n"); + fprintf(file, "\n"); +} +#endif diff --git a/src/debug/traces/starpu_fxt.c b/src/debug/traces/starpu_fxt.c new file mode 100644 index 0000000..e684551 --- /dev/null +++ b/src/debug/traces/starpu_fxt.c @@ -0,0 +1,5421 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2017-2021 Federal University of Rio Grande do Sul (UFRGS) + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include +#include + + +#ifdef STARPU_PAPI +#include +#endif + +#ifdef STARPU_USE_FXT +#include "starpu_fxt.h" +#include +#include + +#define CPUS_WORKER_COLORS_NB 8 +#define ACCEL_WORKER_COLORS_NB 9 + +/* How many times longer an idle period has to be before the smoothing + * heuristics avoids averaging codelet gflops */ +#define IDLE_FACTOR 2 + +static char *cpus_worker_colors[CPUS_WORKER_COLORS_NB] = {"/greens9/7", "/greens9/6", "/greens9/5", "/greens9/4", "/greens9/9", "/greens9/3", "/greens9/2", "/greens9/1" }; +static char *accel_worker_colors[ACCEL_WORKER_COLORS_NB] = {"/ylorrd9/9", "/ylorrd9/6", "/ylorrd9/3", "/ylorrd9/1", "/ylorrd9/8", "/ylorrd9/7", "/ylorrd9/4", "/ylorrd9/2", "/ylorrd9/1"}; +static char *worker_colors[STARPU_NMAXWORKERS]; + +static unsigned cpus_index = 0; +static unsigned accel_index = 0; +static uint64_t* number_events = NULL; + +static unsigned long fut_keymask; + +/* Get pointer to string starting at nth parameter */ +static char *get_fxt_string(struct fxt_ev_64 *ev, int n) +{ + char *s = (char *)&ev->param[n]; + s[(FXT_MAX_PARAMS-n)*sizeof(unsigned long) - 1] = 0; + return s; +} + +/* + * Paje trace file tools + */ + +static FILE *out_paje_file; +static FILE *distrib_time; +static FILE *activity_file; +static FILE *anim_file; +static FILE *tasks_file; +static FILE *data_file; +#ifdef STARPU_PAPI +static FILE *papi_file; +#endif +static FILE *trace_file; +static FILE *comms_file; +static FILE *sched_tasks_file; +static FILE *number_events_file; + +struct data_parameter_info +{ + unsigned long handle; + unsigned long size; + int mode; + long numa_nodes_bitmap; +}; + +struct task_info +{ + UT_hash_handle hh; + char *model_name; + char *name; + char *file; + int line; + int exclude_from_dag; + int show; + unsigned type; + unsigned long job_id; + unsigned long submit_order; + long priority; + int color; + uint64_t tag; + int workerid; + int node; + double submit_time; + double start_time; + double end_time; + unsigned long footprint; + unsigned long kflops; + long iterations[2]; + char *parameters; + unsigned int ndeps; + unsigned long *dependencies; + unsigned int nend_deps; + unsigned long *end_dependencies; + char **dep_labels; + unsigned long ndata; + struct data_parameter_info *data; + int mpi_rank; +#ifdef STARPU_BUBBLE + unsigned is_bubble; + unsigned long bubble_parent; +#endif +}; + +static struct task_info *tasks_info; + +static struct task_info *get_task(unsigned long job_id, int mpi_rank) +{ + struct task_info *task; + + HASH_FIND(hh, tasks_info, &job_id, sizeof(job_id), task); + if (!task) + { + unsigned i; + _STARPU_MALLOC(task, sizeof(*task)); + task->model_name = NULL; + task->name = NULL; + task->file = NULL; + task->line = -1; + task->exclude_from_dag = 0; + task->show = 0; + task->type = 0; + task->job_id = job_id; + task->submit_order = 0; + task->priority = 0; + task->color = 0; + task->tag = 0; + task->workerid = -1; + task->node = -1; + task->submit_time = 0.; + task->start_time = 0.; + task->end_time = 0.; + task->footprint = 0; + task->kflops = 0.; + for (i = 0; i < sizeof(task->iterations)/sizeof(task->iterations[0]); i++) + task->iterations[i] = -1; + task->parameters = NULL; + task->ndeps = 0; + task->dependencies = NULL; + task->nend_deps = 0; + task->end_dependencies = NULL; + task->dep_labels = NULL; + task->ndata = 0; + task->data = NULL; + task->mpi_rank = mpi_rank; +#ifdef STARPU_BUBBLE + task->is_bubble = 0; + task->bubble_parent = 0; +#endif + HASH_ADD(hh, tasks_info, job_id, sizeof(task->job_id), task); + } + else + STARPU_ASSERT(task->mpi_rank == mpi_rank); + + return task; +} + +/* Return whether to show this task in the DAG or not */ +static int show_task(struct task_info *task, struct starpu_fxt_options *options) +{ + if (task->show) + return 1; + if (task->type & STARPU_TASK_TYPE_INTERNAL && !options->internal) + return 0; + if (task->type & STARPU_TASK_TYPE_DATA_ACQUIRE && options->no_acquire) + return 0; + return 1; +} + +void _starpu_convert_numa_nodes_bitmap_to_str(long bitmap, char* str) +{ + if (bitmap < 0) + { + sprintf(str, "%ld", bitmap); + } + else + { + long i = 0; + int first = 1; + for (; i < (long) (sizeof(bitmap)*8)-1; i++) + { + if (bitmap & ((long) 1 << i)) + { + if (first) + { + sprintf(str, "%ld", i); + first = 0; + } + else + { + strcat(str, ","); + char number[4]; + sprintf(number, "%ld", i); + strcat(str, number); + } + } + } + } +} + +static void task_dump(struct task_info *task, struct starpu_fxt_options *options) +{ + char *prefix = options->file_prefix; + unsigned i; + + if (task->exclude_from_dag) + goto out; + if (!tasks_file) + goto out; + + if (task->name) + fprintf(tasks_file, "Name: %s\n", task->name); + if (task->model_name) + fprintf(tasks_file, "Model: %s\n", task->model_name); + if (task->file) + { + fprintf(tasks_file, "File: %s\n", task->file); + fprintf(tasks_file, "Line: %d\n", task->line); + } + fprintf(tasks_file, "JobId: %s%lu\n", prefix, task->job_id); + if (task->submit_order) + fprintf(tasks_file, "SubmitOrder: %lu\n", task->submit_order); + fprintf(tasks_file, "Priority: %ld\n", task->priority); + if (task->dependencies) + { + fprintf(tasks_file, "DependsOn:"); + for (i = 0; i < task->ndeps; i++) + fprintf(tasks_file, " %s%lu", prefix, task->dependencies[i]); + fprintf(tasks_file, "\n"); + } + if (task->dep_labels) + { + fprintf(tasks_file, "DepLabels:"); + for (i = 0; i < task->ndeps; i++) + fprintf(tasks_file, " %s", task->dep_labels[i]); + fprintf(tasks_file, "\n"); + } + fprintf(tasks_file, "Tag: %"PRIx64"\n", task->tag); + if (task->workerid >= 0) + fprintf(tasks_file, "WorkerId: %d\n", task->workerid); + if (task->node >= 0) + fprintf(tasks_file, "MemoryNode: %d\n", task->node); + if (task->submit_time != 0.) + fprintf(tasks_file, "SubmitTime: %f\n", task->submit_time); + if (task->start_time != 0.) + fprintf(tasks_file, "StartTime: %f\n", task->start_time); + if (task->end_time != 0.) + fprintf(tasks_file, "EndTime: %f\n", task->end_time); + fprintf(tasks_file, "Footprint: %lx\n", task->footprint); + if (task->kflops != 0) + fprintf(tasks_file, "GFlop: %f\n", ((double) task->kflops) / 1000000); + if (task->iterations[0] != -1) + { + fprintf(tasks_file, "Iteration:"); + for (i = 0; i < sizeof(task->iterations)/sizeof(task->iterations[0]); i++) + { + if (task->iterations[i] == -1) + break; + fprintf(tasks_file, " %ld", task->iterations[i]); + } + fprintf(tasks_file, "\n"); + } + if (task->parameters) + fprintf(tasks_file, "Parameters: %s\n", task->parameters); + if (task->data) + { + fprintf(tasks_file, "Handles:"); + for (i = 0; i < task->ndata; i++) + fprintf(tasks_file, " %lx", task->data[i].handle); + fprintf(tasks_file, "\n"); + fprintf(tasks_file, "Modes:"); + for (i = 0; i < task->ndata; i++) + fprintf(tasks_file, " %s%s%s%s%s%s", + (task->data[i].mode & STARPU_R)?"R":"", + (task->data[i].mode & STARPU_W)?"W":"", + (task->data[i].mode & STARPU_SCRATCH)?"S":"", + (task->data[i].mode & STARPU_REDUX)?"X":"", + (task->data[i].mode & STARPU_MPI_REDUX)?"X-mpi":"", + (task->data[i].mode & STARPU_COMMUTE)?"C":""); + fprintf(tasks_file, "\n"); + fprintf(tasks_file, "Sizes:"); + for (i = 0; i < task->ndata; i++) + fprintf(tasks_file, " %lu", task->data[i].size); + fprintf(tasks_file, "\n"); + fprintf(tasks_file, "NumaNodes:"); + for (i = 0; i < task->ndata; i++) + { + char str[STARPU_TRACE_STR_LEN] = ""; + _starpu_convert_numa_nodes_bitmap_to_str(task->data[i].numa_nodes_bitmap, str); + fprintf(tasks_file, " %s", str); + } + fprintf(tasks_file, "\n"); + } + fprintf(tasks_file, "MPIRank: %d\n", task->mpi_rank); +#ifdef STARPU_BUBBLE + fprintf(tasks_file, "Bubble: %u\n", task->is_bubble); + fprintf(tasks_file, "ParentBubble: %lu\n", task->bubble_parent); +#endif + if (task->nend_deps) + { + fprintf(tasks_file, "EndDependencies: "); + unsigned int j=0; + for(j=0 ; jnend_deps-1 ; j++) + fprintf(tasks_file, "%lu, ", task->end_dependencies[j]); + fprintf(tasks_file, "%lu ", task->end_dependencies[task->nend_deps-1]); + } + fprintf(tasks_file, "\n"); + +out: + free(task->name); + free(task->model_name); + free(task->file); + free(task->dependencies); + if (task->dep_labels) + { + for (i = 0; i < task->ndeps; i++) + free(task->dep_labels[i]); + free(task->dep_labels); + } + free(task->parameters); + free(task->data); + HASH_DEL(tasks_info, task); + free(task); +} + +struct data_info +{ + UT_hash_handle hh; + unsigned long handle; + char *name; + size_t size; + starpu_ssize_t max_size; + char *description; + unsigned dimensions; + unsigned long *dims; + int home_node; + int mpi_rank; + int mpi_owner; + long mpi_tag; +}; + +static struct data_info *data_info; + +static struct data_info *get_data(unsigned long handle, int mpi_rank) +{ + struct data_info *data; + + HASH_FIND(hh, data_info, &handle, sizeof(handle), data); + if (!data) + { + _STARPU_MALLOC(data, sizeof(*data)); + data->handle = handle; + data->name = NULL; + data->size = 0; + data->max_size = -1; + data->description = 0; + data->dimensions = 0; + data->dims = NULL; + data->home_node = STARPU_MAIN_RAM; + data->mpi_rank = mpi_rank; + data->mpi_owner = mpi_rank; + data->mpi_tag = -1; + HASH_ADD(hh, data_info, handle, sizeof(handle), data); + } + else + STARPU_ASSERT(data->mpi_rank == mpi_rank); + + return data; +} + +unsigned _starpu_fxt_data_get_coord(unsigned long handle, int mpi_rank, unsigned dim) +{ + struct data_info *data = get_data(handle, mpi_rank); + return data->dimensions >= dim+1 ? data->dims[dim] : 0; +} + +const char *_starpu_fxt_data_get_name(unsigned long handle, int mpi_rank) +{ + struct data_info *data = get_data(handle, mpi_rank); + return data->name; +} + +static void handle_papi_event(struct fxt_ev_64 *ev STARPU_ATTRIBUTE_UNUSED, struct starpu_fxt_options *options STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_PAPI + int event_code = ev->param[0]; + unsigned long task = ev->param[1]; + long long int value = ev->param[2]; + //char *prefix = options->file_prefix; + + if (papi_file) + { + char event_str[PAPI_MAX_STR_LEN]; + PAPI_event_code_to_name(event_code, event_str); + fprintf(papi_file, "JobId: %lu\n", task); + fprintf(papi_file, "Event: %s\n", event_str); + fprintf(papi_file, "Value: %lld\n", value); + fprintf(papi_file, "\n"); + } +#endif +} + +static void data_dump(struct data_info *data) +{ + if (!data_file) + goto out; + fprintf(data_file, "Handle: %lx\n", data->handle); + fprintf(data_file, "HomeNode: %d\n", data->home_node); + if (data->mpi_rank >= 0) + fprintf(data_file, "MPIRank: %d\n", data->mpi_rank); + if (data->name) + fprintf(data_file, "Name: %s\n", data->name); + fprintf(data_file, "Size: %lu\n", (unsigned long) data->size); + if (data->max_size != -1) + fprintf(data_file, "MaxSize: %lu\n", (unsigned long) data->max_size); + if (data->description) + fprintf(data_file, "Description: %s\n", data->description); + if (data->dimensions) + { + unsigned i; + fprintf(data_file, "Coordinates:"); + for (i = 0; i < data->dimensions; i++) + fprintf(data_file, " %lu", data->dims[i]); + fprintf(data_file, "\n"); + } + if (data->mpi_owner >= 0) + fprintf(data_file, "MPIOwner: %d\n", data->mpi_owner); + if (data->mpi_tag >= 0) + fprintf(data_file, "MPITag: %ld\n", data->mpi_tag); + fprintf(data_file, "\n"); +out: + free(data->dims); + free(data->description); + free(data->name); + HASH_DEL(data_info, data); + free(data); +} + +static void set_next_cpu_worker_color(int workerid) +{ + if (workerid >= STARPU_NMAXWORKERS) + return; + worker_colors[workerid] = cpus_worker_colors[cpus_index++]; + if (cpus_index == CPUS_WORKER_COLORS_NB) cpus_index = 0; +} + +static void set_next_accel_worker_color(int workerid) +{ + if (workerid >= STARPU_NMAXWORKERS) + return; + worker_colors[workerid] = accel_worker_colors[accel_index++]; + if (accel_index == ACCEL_WORKER_COLORS_NB) accel_index = 0; +} + +static const char *get_worker_color(int workerid) +{ + if (workerid >= STARPU_NMAXWORKERS) + workerid = STARPU_NMAXWORKERS - 1; + return worker_colors[workerid]; +} + +static unsigned get_color_symbol_red(char *name) +{ + /* choose some color ... that's disguting yes */ + uint32_t hash_symbol = starpu_hash_crc32c_string(name, 0); + return (unsigned)starpu_hash_crc32c_string("red", hash_symbol) % 1024; +} + +static unsigned get_color_symbol_green(char *name) +{ + /* choose some color ... that's disguting yes */ + uint32_t hash_symbol = starpu_hash_crc32c_string(name, 0); + return (unsigned)starpu_hash_crc32c_string("green", hash_symbol) % 1024; +} + +static unsigned get_color_symbol_blue(char *name) +{ + /* choose some color ... that's disguting yes */ + uint32_t hash_symbol = starpu_hash_crc32c_string(name, 0); + return (unsigned)starpu_hash_crc32c_string("blue", hash_symbol) % 1024; +} + +/* Start time of last codelet for this worker */ +static double last_codelet_start[STARPU_NMAXWORKERS]; +/* End time of last codelet for this worker */ +static double last_codelet_end[STARPU_NMAXWORKERS]; +/* _STARPU_FUT_DO_PROBE5STR records only 3 longs */ +char _starpu_last_codelet_symbol[STARPU_NMAXWORKERS][(FXT_MAX_PARAMS-5)*sizeof(unsigned long)]; +static int last_codelet_parameter[STARPU_NMAXWORKERS]; +#define MAX_PARAMETERS 8 +static char last_codelet_parameter_description[STARPU_NMAXWORKERS][MAX_PARAMETERS][FXT_MAX_PARAMS*sizeof(unsigned long)]; + +/* If more than a period of time has elapsed, we flush the profiling info, + * otherwise they are accumulated every time there is a new relevant event. */ +#define ACTIVITY_PERIOD 75.0 +static double last_activity_flush_timestamp[STARPU_NMAXWORKERS]; +static double accumulated_sleep_time[STARPU_NMAXWORKERS]; +static double accumulated_exec_time[STARPU_NMAXWORKERS]; + +static unsigned steal_number = 0; + +LIST_TYPE(_starpu_symbol_name, + char *name; +) + +static struct _starpu_symbol_name_list symbol_list; + +/* List of on-going communications */ +LIST_TYPE(_starpu_communication, + unsigned comid; + double comm_start; + double bandwidth; + unsigned src_node; + unsigned dst_node; + unsigned long size; + const char *type; + unsigned long handle; + struct _starpu_communication *peer; +) + +static struct _starpu_communication_list communication_list; +static double current_bandwidth_in_per_node[STARPU_MAXNODES] = {0.0}; +static double current_bandwidth_out_per_node[STARPU_MAXNODES] = {0.0}; + +/* List of on-going computations */ +LIST_TYPE(_starpu_computation, + double comp_start; + double gflops; + struct _starpu_computation *peer; +) + +/* List of ongoing computations */ +static struct _starpu_computation_list computation_list; +/* Last computation for each worker */ +static struct _starpu_computation *ongoing_computation[STARPU_NMAXWORKERS]; + +/* Current total GFlops */ +static double current_computation; +/* Time of last update of current total GFlops */ +static double current_computation_time; + +/* + * Generic tools + */ + +#define WORKER_STATE (1 << 0) +#define THREAD_STATE (1 << 1) +#define COMM_THREAD_STATE (1 << 2) +#define USER_THREAD_STATE (1 << 3) + +static struct +{ + const char *short_name; + const char *long_name; + uint8_t flags; +} states_list[] = +{ + { "Fi", "FetchingInput", WORKER_STATE | THREAD_STATE }, + { "Po", "PushingOutput", WORKER_STATE | THREAD_STATE }, + { "P", "Progressing", WORKER_STATE | THREAD_STATE }, + { "U", "Unpartitioning", WORKER_STATE | THREAD_STATE }, + { "B", "Overhead", WORKER_STATE | THREAD_STATE }, + { "Ps", "Parallel sync", WORKER_STATE | THREAD_STATE }, + { "In", "Initializing", WORKER_STATE | THREAD_STATE }, + { "D", "Deinitializing", WORKER_STATE | THREAD_STATE }, + { "E", "Executing", WORKER_STATE | THREAD_STATE }, + { "C", "Callback", WORKER_STATE | THREAD_STATE | USER_THREAD_STATE }, + { "H", "Hypervisor", WORKER_STATE | THREAD_STATE }, + { "Sc", "Scheduling", WORKER_STATE | THREAD_STATE | USER_THREAD_STATE }, + { "I", "Idle", WORKER_STATE | THREAD_STATE }, + { "Sl", "Sleeping", WORKER_STATE | THREAD_STATE | COMM_THREAD_STATE }, + { "Bu", "Building task", THREAD_STATE | COMM_THREAD_STATE | USER_THREAD_STATE }, + { "Su", "Submitting task", THREAD_STATE | COMM_THREAD_STATE | USER_THREAD_STATE }, + { "Th", "Throttling task submission", THREAD_STATE | COMM_THREAD_STATE | USER_THREAD_STATE }, + { "MD", "Decoding task for MPI", THREAD_STATE | USER_THREAD_STATE }, + { "MPr", "Preparing task for MPI", THREAD_STATE | USER_THREAD_STATE }, + { "MPo", "Post-processing task for MPI", THREAD_STATE | USER_THREAD_STATE }, + { "P", "Processing", COMM_THREAD_STATE }, + { "UT", "UserTesting", COMM_THREAD_STATE }, + { "UW", "UserWaiting", COMM_THREAD_STATE }, + { "SdS", "SendSubmitted", COMM_THREAD_STATE }, + { "RvS", "ReceiveSubmitted", COMM_THREAD_STATE }, + { "SdC", "SendCompleted", COMM_THREAD_STATE }, + { "RvC", "ReceiveCompleted", COMM_THREAD_STATE }, + { "W", "Waiting task", THREAD_STATE | USER_THREAD_STATE }, + { "WA", "Waiting all tasks", THREAD_STATE | USER_THREAD_STATE }, + { "No", "Nothing", THREAD_STATE | USER_THREAD_STATE }, +}; + +static const char *get_state_name(const char *short_name, uint32_t states) +{ + unsigned i; + + for (i = 0; i < sizeof(states_list) / sizeof(states_list[0]); i++) + if ((states_list[i].flags & states) && + !strcmp(states_list[i].short_name, short_name)) + return states_list[i].long_name; + return short_name; +} + +static double compute_time_stamp(double ev_time, struct starpu_fxt_options *options) +{ + double offset = 0; + + if (options->file_offset.nb_barriers < 2) + { + offset = (double) options->file_offset.offset_start; + } + else + { + /* Since a clock drift can happen during the execution, the offset to + * apply at the beginning of the trace can be different from the one + * to apply at the end of the trace. Thus, we make an interpolation to + * know what is the offset at the considerated time. */ + double xA = (double) options->file_offset.local_time_start; + double xB = (double) options->file_offset.local_time_end; + double yA = (double) options->file_offset.offset_start; + double yB = (double) options->file_offset.offset_end; + + /* We interpolate offset only for times between the two synchronization + * barriers, because outside of this interval, applying the + * interpolated offset can lead to negative times... Moreover, + * timestamps of events outside of this interval don't need to be + * precise (events describing the machine, StarPU's initialization...) + * */ + if (ev_time <= xA) + { + offset = yA; + } + else if (ev_time >= xB) + { + offset = yB; + } + else + { + offset = ((yB-yA) / (xB-xA)) * (ev_time-xA) + yA; +#ifndef STARPU_NO_ASSERT + // Check that the offset is correctly inside the interval: + if (yB > yA) + { + STARPU_ASSERT(offset >= yA && offset <= yB); + } + else + { + STARPU_ASSERT(offset <= yA && offset >= yB); + } +#endif + } + } + + STARPU_ASSERT((ev_time + offset) >= 0); + return (ev_time + offset) / 1000000.0; +} + +static double get_event_time_stamp(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + double ev_time = (double) ev->time; + + return compute_time_stamp(ev_time, options); +} + +/* + * Auxiliary functions for poti handling names + */ +#ifdef STARPU_HAVE_POTI +static char *memnode_container_alias(char *output, int len, const char *prefix, long unsigned int memnodeid) +{ + snprintf(output, len, "%smn%lu", prefix, memnodeid); + return output; +} + +static char *memmanager_container_alias(char *output, int len, const char *prefix, long unsigned int memnodeid) +{ + snprintf(output, len, "%smm%lu", prefix, memnodeid); + return output; +} + +static char *thread_container_alias(char *output, int len, const char *prefix, long unsigned int threadid) +{ + snprintf(output, len, "%st%lu", prefix, threadid); + return output; +} + +static char *worker_container_alias(char *output, int len, const char *prefix, long unsigned int workerid) +{ + snprintf(output, len, "%sw%lu", prefix, workerid); + return output; +} + +static char *mpicommthread_container_alias(char *output, int len, const char *prefix) +{ + snprintf(output, len, "%smpict", prefix); + return output; +} + +static char *program_container_alias(char *output, int len, const char *prefix) +{ + snprintf(output, len, "%sp", prefix); + return output; +} + +static char *scheduler_container_alias(char *output, int len, const char *prefix) +{ + snprintf(output, len, "%ssched", prefix); + return output; +} +#endif + +static int nworkers = 0; + +static struct worker_entry +{ + UT_hash_handle hh; + unsigned long tid; + int workerid; + int sync; /* Set only for workers which are part of the same set, i.e. on thread drivers several workers */ +} *worker_ids; + +static int register_thread(unsigned long nodeid, unsigned long tid, int workerid, int sync) +{ + struct worker_entry *entry = NULL; + + tid = nodeid*tid+tid; + + HASH_FIND(hh, worker_ids, &tid, sizeof(tid), entry); + + /* only register a thread once */ + if (entry) + return 0; + + _STARPU_MALLOC(entry, sizeof(*entry)); + entry->tid = tid; + entry->workerid = workerid; + entry->sync = sync; + + HASH_ADD(hh, worker_ids, tid, sizeof(tid), entry); + return 1; +} + +static void free_worker_ids(void) +{ + struct worker_entry *entry, *tmp; + HASH_ITER(hh, worker_ids, entry, tmp) + { + HASH_DEL(worker_ids, entry); + free(entry); + } +} + +static int register_worker_id(unsigned long nodeid, unsigned long tid, int workerid, int sync) +{ + nworkers++; + STARPU_ASSERT_MSG_ALWAYS(workerid < STARPU_NMAXWORKERS, "Too many workers in this trace, please increase in ./configure invocation the maximum number of CPUs and GPUs to the same value as was used for execution"); + + return register_thread(nodeid, tid, workerid, sync); +} +static int prefixTOnodeid (const char *prefix) +{ + //if we are a single-node trace, prefix is empty, so return 0 + if (strcmp(prefix, "")==0) return 0; + + char *str = strdup(prefix); + str[strlen(prefix)-1] = '\0'; + unsigned long nodeid = atoi(str); + free(str); + return nodeid; +} + +/* Register user threads if not done already */ +static void register_user_thread(double timestamp, unsigned long tid, const char *prefix) +{ + if (register_thread(prefixTOnodeid(prefix), tid, -1, 0) && out_paje_file) + { +#ifdef STARPU_HAVE_POTI + char program_container[STARPU_POTI_STR_LEN]; + program_container_alias(program_container, STARPU_POTI_STR_LEN, prefix); + char new_thread_container_alias[STARPU_POTI_STR_LEN]; + thread_container_alias(new_thread_container_alias, STARPU_POTI_STR_LEN, prefix, tid); + char new_thread_container_name[STARPU_POTI_STR_LEN]; + snprintf(new_thread_container_name, sizeof(new_thread_container_name), "%sUserThread%lu", prefix, tid); + poti_CreateContainer(timestamp, new_thread_container_alias, "UT", program_container, new_thread_container_alias); +#else + fprintf(out_paje_file, "7 %.9f %st%lu UT %sp %sUserThread%lu\n", + timestamp, prefix, tid, prefix, prefix, tid); +#endif + } +} + +static void register_mpi_thread(unsigned long nodeid, unsigned long tid) +{ + register_thread(nodeid, tid, -2, 0); +} + +static int find_worker_id(unsigned long nodeid, unsigned long tid) +{ + struct worker_entry *entry; + + tid = nodeid*tid+tid; + + HASH_FIND(hh, worker_ids, &tid, sizeof(tid), entry); + if (!entry) + return -1; + + return entry->workerid; +} + +/* check whether this thread manages several workers */ +static int find_sync(unsigned long nodeid, unsigned long tid) +{ + struct worker_entry *entry; + + tid = nodeid*tid+tid; + + HASH_FIND(hh, worker_ids, &tid, sizeof(tid), entry); + if (!entry) + return 0; + + return entry->sync; +} + +static void update_accumulated_time(int worker, double sleep_time, double exec_time, double current_timestamp, int forceflush) +{ + accumulated_sleep_time[worker] += sleep_time; + accumulated_exec_time[worker] += exec_time; + + /* If sufficient time has elapsed since the last flush, we have a new + * point in our graph */ + double elapsed = current_timestamp - last_activity_flush_timestamp[worker]; + if (forceflush || (elapsed > ACTIVITY_PERIOD)) + { + if (activity_file) + fprintf(activity_file, "%d\t%.9f\t%.9f\t%.9f\t%.9f\n", worker, current_timestamp, elapsed, accumulated_exec_time[worker], accumulated_sleep_time[worker]); + + /* reset the accumulated times */ + last_activity_flush_timestamp[worker] = current_timestamp; + accumulated_sleep_time[worker] = 0.0; + accumulated_exec_time[worker] = 0.0; + } +} + +static void memnode_set_state(double time, const char *prefix, unsigned int memnodeid, const char *name) +{ +#ifdef STARPU_HAVE_POTI + char container[STARPU_POTI_STR_LEN]; + memmanager_container_alias(container, STARPU_POTI_STR_LEN, prefix, memnodeid); + poti_SetState(time, container, "MS", name); +#else + fprintf(out_paje_file, "10 %.9f %smm%u MS %s\n", time, prefix, memnodeid, name); +#endif +} + +static void memnode_push_state(double time, const char *prefix, unsigned int memnodeid, const char *name) +{ +#ifdef STARPU_HAVE_POTI + char container[STARPU_POTI_STR_LEN]; + memmanager_container_alias(container, STARPU_POTI_STR_LEN, prefix, memnodeid); + poti_PushState(time, container, "MS", name); +#else + fprintf(out_paje_file, "11 %.9f %smm%u MS %s\n", time, prefix, memnodeid, name); +#endif +} + +static void memnode_pop_state(double time, const char *prefix, unsigned int memnodeid) +{ +#ifdef STARPU_HAVE_POTI + char container[STARPU_POTI_STR_LEN]; + memmanager_container_alias(container, STARPU_POTI_STR_LEN, prefix, memnodeid); + poti_PopState(time, container, "MS"); +#else + fprintf(out_paje_file, "12 %.9f %smm%u MS\n", time, prefix, memnodeid); +#endif +} + +static void memnode_event(double time, const char *prefix, unsigned int memnodeid, const char *name, unsigned long handle, unsigned long value, unsigned long info, long size_prio, unsigned int dest, struct starpu_fxt_options *options) +{ + if (!options->memory_states) + return; + // If there is not a valid memory node, we cannot associate it + if((int)memnodeid < 0) + return; +#ifdef STARPU_HAVE_POTI + char container[STARPU_POTI_STR_LEN]; + char p_handle[STARPU_POTI_STR_LEN]; + char p_value[STARPU_POTI_STR_LEN]; + memmanager_container_alias(container, STARPU_POTI_STR_LEN, prefix, memnodeid); + snprintf(p_handle, sizeof(p_handle), "%lx", handle); + snprintf(p_value, sizeof(p_value), "%lx", value); + +#ifdef HAVE_POTI_USER_NEWEVENT + char p_dest[STARPU_POTI_STR_LEN]; + char p_info[STARPU_POTI_STR_LEN]; + char p_size[STARPU_POTI_STR_LEN]; + + memmanager_container_alias(p_dest, STARPU_POTI_STR_LEN, prefix, dest); + snprintf(p_info, sizeof(p_info), "%lu", info); + snprintf(p_size, sizeof(p_size), "%ld", size_prio); + + poti_user_NewEvent(_starpu_poti_MemoryEvent, time, container, name, p_value, 4, + p_handle, p_info, p_size, p_dest); +#else + poti_NewEvent(time, container, name, p_handle); +#endif +#else + fprintf(out_paje_file, "22 %.9f %s %smm%u %lx %lx %lu %ld %smm%u\n", time, name, prefix, memnodeid, value, handle, info, size_prio, prefix, dest); +#endif +} + +static void worker_set_state(double time, const char *prefix, long unsigned int workerid, const char *name) +{ + if (fut_keymask == FUT_KEYMASK0) + return; + if (!out_paje_file) + return; +#ifdef STARPU_HAVE_POTI + char container[STARPU_POTI_STR_LEN]; + worker_container_alias(container, STARPU_POTI_STR_LEN, prefix, workerid); + poti_SetState(time, container, "WS", name); +#else + fprintf(out_paje_file, "10 %.9f %sw%lu WS \"%s\"\n", time, prefix, workerid, name); +#endif +} + +static void worker_push_state(double time, const char *prefix, long unsigned int workerid, const char *name) +{ + if (fut_keymask == FUT_KEYMASK0) + return; + if (!out_paje_file) + return; +#ifdef STARPU_HAVE_POTI + char container[STARPU_POTI_STR_LEN]; + worker_container_alias(container, STARPU_POTI_STR_LEN, prefix, workerid); + poti_PushState(time, container, "WS", name); +#else + fprintf(out_paje_file, "11 %.9f %sw%lu WS %s\n", time, prefix, workerid, name); +#endif +} + +static void worker_pop_state(double time, const char *prefix, long unsigned int workerid) +{ + if (fut_keymask == FUT_KEYMASK0) + return; + if (!out_paje_file) + return; +#ifdef STARPU_HAVE_POTI + char container[STARPU_POTI_STR_LEN]; + worker_container_alias(container, STARPU_POTI_STR_LEN, prefix, workerid); + poti_PopState(time, container, "WS"); +#else + fprintf(out_paje_file, "12 %.9f %sw%lu WS\n", time, prefix, workerid); +#endif +} + +static void thread_set_state(double time, const char *prefix, long unsigned int threadid, const char *name, long job_id) +{ + if (find_sync(prefixTOnodeid(prefix), threadid)) + /* Unless using worker sets, collapse thread and worker */ + return worker_set_state(time, prefix, find_worker_id(prefixTOnodeid(prefix), threadid), name); + if (!out_paje_file) + return; + +#ifdef STARPU_HAVE_POTI + char container[STARPU_POTI_STR_LEN]; + thread_container_alias(container, STARPU_POTI_STR_LEN, prefix, threadid); + if (job_id >= 0) + { + char jobid_str[STARPU_POTI_STR_LEN]; + snprintf(jobid_str, sizeof(jobid_str), "%s%lu", prefix, job_id); + poti_user_SetState(_starpu_poti_JobState, time, container, "S", name, 1, jobid_str); + } + else + poti_SetState(time, container, "S", name); +#else + if (job_id >= 0) + fprintf(out_paje_file, "26 %.9f %st%lu S %s %ld\n", time, prefix, threadid, name, job_id); + else + fprintf(out_paje_file, "10 %.9f %st%lu S %s\n", time, prefix, threadid, name); +#endif +} + +#if 0 +/* currently unused */ +static void user_thread_set_state(double time, const char *prefix, long unsigned int threadid, const char *name) +{ + register_user_thread(time, threadid, prefix); + if (!out_paje_file) + return; +#ifdef STARPU_HAVE_POTI + char container[STARPU_POTI_STR_LEN]; + thread_container_alias(container, STARPU_POTI_STR_LEN, prefix, threadid); + poti_SetState(time, container, "US", name); +#else + fprintf(out_paje_file, "10 %.9f %st%lu US %s\n", time, prefix, threadid, name); +#endif +} +#endif + +static void user_thread_push_state(double time, const char *prefix, long unsigned int threadid, const char *name) +{ + register_user_thread(time, threadid, prefix); + if (out_paje_file) + { +#ifdef STARPU_HAVE_POTI + char container[STARPU_POTI_STR_LEN]; + thread_container_alias(container, STARPU_POTI_STR_LEN, prefix, threadid); + poti_PushState(time, container, "US", name); +#else + fprintf(out_paje_file, "11 %.9f %st%lu US %s\n", time, prefix, threadid, name); +#endif + } +} + +static void user_thread_pop_state(double time, const char *prefix, long unsigned int threadid) +{ + register_user_thread(time, threadid, prefix); + if (out_paje_file) + { +#ifdef STARPU_HAVE_POTI + char container[STARPU_POTI_STR_LEN]; + thread_container_alias(container, STARPU_POTI_STR_LEN, prefix, threadid); + poti_PopState(time, container, "US"); +#else + fprintf(out_paje_file, "12 %.9f %st%lu US\n", time, prefix, threadid); +#endif + } +} + +static void thread_push_state(double time, const char *prefix, long unsigned int threadid, const char *name) +{ + if (find_sync(prefixTOnodeid(prefix), threadid)) + /* Unless using worker sets, collapse thread and worker */ + return worker_push_state(time, prefix, find_worker_id(prefixTOnodeid(prefix), threadid), name); + + if (!out_paje_file) + return; +#ifdef STARPU_HAVE_POTI + char container[STARPU_POTI_STR_LEN]; + thread_container_alias(container, STARPU_POTI_STR_LEN, prefix, threadid); + poti_PushState(time, container, "S", name); +#else + fprintf(out_paje_file, "11 %.9f %st%lu S %s\n", time, prefix, threadid, name); +#endif +} + +static void thread_pop_state(double time, const char *prefix, long unsigned int threadid) +{ + if (find_sync(prefixTOnodeid(prefix), threadid)) + /* Unless using worker sets, collapse thread and worker */ + return worker_pop_state(time, prefix, find_worker_id(prefixTOnodeid(prefix), threadid)); + + if (!out_paje_file) + return; +#ifdef STARPU_HAVE_POTI + char container[STARPU_POTI_STR_LEN]; + thread_container_alias(container, STARPU_POTI_STR_LEN, prefix, threadid); + poti_PopState(time, container, "S"); +#else + fprintf(out_paje_file, "12 %.9f %st%lu S\n", time, prefix, threadid); +#endif +} + +static void worker_set_detailed_state(double time, const char *prefix, long unsigned int workerid, const char *name, unsigned long size, const char *parameters, unsigned long footprint, unsigned long long tag, unsigned long job_id, double gflop, unsigned X, unsigned Y, unsigned Z STARPU_ATTRIBUTE_UNUSED, long iteration, long subiteration, const char* numa_nodes, struct starpu_fxt_options *options) +{ + struct task_info *task = get_task(job_id, options->file_rank); +#ifdef STARPU_HAVE_POTI + char container[STARPU_POTI_STR_LEN]; + worker_container_alias(container, STARPU_POTI_STR_LEN, prefix, workerid); + char size_str[STARPU_POTI_STR_LEN]; + char parameters_str[STARPU_POTI_STR_LEN]; + char footprint_str[STARPU_POTI_STR_LEN]; + char tag_str[STARPU_POTI_STR_LEN]; + char jobid_str[STARPU_POTI_STR_LEN]; + char submitorder_str[STARPU_POTI_STR_LEN]; + char priority_str[STARPU_POTI_STR_LEN]; + char gflop_str[STARPU_POTI_STR_LEN]; + char X_str[STARPU_POTI_STR_LEN], Y_str[STARPU_POTI_STR_LEN], Z_str[STARPU_POTI_STR_LEN]; + char iteration_str[STARPU_POTI_STR_LEN], subiteration_str[STARPU_POTI_STR_LEN]; + + snprintf(size_str, sizeof(size_str), "%lu", size); + snprintf(parameters_str, sizeof(parameters_str), "%s", parameters); + snprintf(footprint_str, sizeof(footprint_str), "%08lx", footprint); + snprintf(tag_str, sizeof(tag_str), "%016llx", tag); + snprintf(jobid_str, sizeof(jobid_str), "%s%lu", prefix, job_id); + snprintf(submitorder_str, sizeof(submitorder_str), "%s%lu", prefix, task->submit_order); + snprintf(priority_str, sizeof(priority_str), "%lu", task->priority); + snprintf(gflop_str, sizeof(gflop_str), "%f", gflop); + snprintf(X_str, sizeof(X_str), "%u", X); + snprintf(Y_str, sizeof(Y_str), "%u", Y); + snprintf(Z_str, sizeof(Z_str), "%u", Z); + snprintf(iteration_str, sizeof(iteration_str), "%ld", iteration); + snprintf(subiteration_str, sizeof(subiteration_str), "%ld", subiteration); + +#ifdef HAVE_POTI_INIT_CUSTOM + poti_user_SetState(_starpu_poti_extendedSetState, time, container, "WS", name, 13, size_str, + parameters_str, + footprint_str, + tag_str, + jobid_str, + submitorder_str, + priority_str, + gflop_str, + X_str, + Y_str, + /* Z_str, */ + iteration_str, + subiteration_str, + numa_nodes); +#else + poti_SetState(time, container, "WS", name); +#endif +#else + fprintf(out_paje_file, "20 %.9f %sw%lu WS \"%s\" %lu \"%s\" %08lx %016llx %s%lu %s%lu %lu %f %u %u "/*"%u "*/"%ld %ld \"%s\"\n", time, prefix, workerid, name, size, parameters, footprint, tag, prefix, job_id, prefix, task->submit_order, task->priority, gflop, X, Y, /*Z,*/ iteration, subiteration, numa_nodes); +#endif +} + +static void mpicommthread_set_state(double time, const char *prefix, const char *name) +{ +#ifdef STARPU_HAVE_POTI + char container[STARPU_POTI_STR_LEN]; + mpicommthread_container_alias(container, STARPU_POTI_STR_LEN, prefix); + poti_SetState(time, container, "CtS", name); +#else + fprintf(out_paje_file, "10 %.9f %smpict CtS %s\n", time, prefix, name); +#endif +} + +static void mpicommthread_push_state(double time, const char *prefix, const char *name) +{ +#ifdef STARPU_HAVE_POTI + char container[STARPU_POTI_STR_LEN]; + mpicommthread_container_alias(container, STARPU_POTI_STR_LEN, prefix); + poti_PushState(time, container, "CtS", name); +#else + fprintf(out_paje_file, "11 %.9f %smpict CtS %s\n", time, prefix, name); +#endif +} + +static void mpicommthread_pop_state(double time, const char *prefix) +{ +#ifdef STARPU_HAVE_POTI + char container[STARPU_POTI_STR_LEN]; + mpicommthread_container_alias(container, STARPU_POTI_STR_LEN, prefix); + poti_PopState(time, container, "CtS"); +#else + fprintf(out_paje_file, "12 %.9f %smpict CtS\n", time, prefix); +#endif +} + +static void recfmt_dump_state(double time, const char *event, int workerid, long int threadid, const char *name, const char *type) +{ + fprintf(trace_file, "E: %s\n", event); + if (name) + fprintf(trace_file, "N: %s\n", name); + if (type) + fprintf(trace_file, "C: %s\n", type); + fprintf(trace_file, "W: %d\n", workerid); + if (threadid == -1) + fprintf(trace_file, "T: -1\n"); + else + fprintf(trace_file, "T: %ld\n", threadid); + fprintf(trace_file, "S: %f\n", time); + fprintf(trace_file, "\n"); +} + +static void recfmt_set_state(double time, int workerid, long int threadid, const char *name, const char *type) +{ + recfmt_dump_state(time, "SetState", workerid, threadid, name, type); +} + +static void recfmt_push_state(double time, int workerid, long unsigned int threadid, const char *name, const char *type) +{ + recfmt_dump_state(time, "PushState", workerid, threadid, name, type); +} + +static void recfmt_pop_state(double time, int workerid, long unsigned int threadid) +{ + recfmt_dump_state(time, "PopState", workerid, threadid, NULL, NULL); +} + +static void recfmt_worker_set_state(double time, int workerid, const char *name, const char *type) +{ + const char *state_name; + + /* Special case for task events. */ + if (!strcmp(type, "Task")) + state_name = name; + else + state_name = get_state_name(name, WORKER_STATE); + recfmt_set_state(time, workerid, -1, state_name, type); +} + +static void recfmt_thread_set_state(double time, unsigned long nodeid, long unsigned int threadid, const char *name, const char *type) +{ + const char *state_name; + + /* Special case for the end event which is somehow a fake. */ + if (!strcmp(name, "End") && !type) + state_name = name; + else + state_name = get_state_name(name, THREAD_STATE); + + recfmt_set_state(time, find_worker_id(nodeid, threadid), threadid, state_name, type); +} + +static void recfmt_thread_push_state(double time, unsigned long nodeid, long unsigned int threadid, const char *name, const char *type) +{ + const char *state_name = get_state_name(name, THREAD_STATE); + recfmt_push_state(time, find_worker_id(nodeid, threadid), threadid, state_name, type); +} + +static void recfmt_thread_pop_state(double time, unsigned long nodeid, long unsigned int threadid) +{ + recfmt_pop_state(time, find_worker_id(nodeid, threadid), threadid); +} + +static void recfmt_mpicommthread_set_state(double time, const char *name) +{ + const char *state_name = get_state_name(name, COMM_THREAD_STATE); + recfmt_set_state(time, -1, 0, state_name, "MPI"); /* XXX */ +} + +static void recfmt_mpicommthread_push_state(double time, const char *name) +{ + const char *state_name = get_state_name(name, COMM_THREAD_STATE); + recfmt_push_state(time, -1, 0, state_name, "MPI"); /* XXX */ +} + +static void recfmt_mpicommthread_pop_state(double time) +{ + recfmt_pop_state(time, -1, 0); +} + +static void recfmt_user_thread_push_state(double time, long unsigned threadid, const char *name, const char *type) +{ + const char *state_name = get_state_name(name, USER_THREAD_STATE); + recfmt_push_state(time, -1, threadid, state_name, type); +} + +static void recfmt_user_thread_pop_state(double time, long unsigned threadid) +{ + recfmt_pop_state(time, -1, threadid); +} + +/* + * Fill both paje file and trace file + */ + +static void do_worker_set_state(double time, const char *prefix, int workerid, const char *name, const char *type) +{ + if (out_paje_file) + worker_set_state(time, prefix, workerid, name); + if (trace_file) + recfmt_worker_set_state(time, workerid, name, type); +} + +static void do_thread_set_state(double time, const char *prefix, long unsigned int threadid, const char *name, const char *type, long job_id) +{ + if (out_paje_file) + thread_set_state(time, prefix, threadid, name, job_id); + if (trace_file) + recfmt_thread_set_state(time, prefixTOnodeid(prefix), threadid, name, type); +} + +static void do_thread_push_state(double time, const char *prefix, long unsigned int threadid, const char *name, const char *type) +{ + if (out_paje_file) + thread_push_state(time, prefix, threadid, name); + if (trace_file) + recfmt_thread_push_state(time, prefixTOnodeid(prefix), threadid, name, type); +} + +static void do_thread_pop_state(double time, const char *prefix, long unsigned int threadid) +{ + if (out_paje_file) + thread_pop_state(time, prefix, threadid); + if (trace_file) + recfmt_thread_pop_state(time, prefixTOnodeid(prefix), threadid); +} + +static void do_mpicommthread_set_state(double time, const char *prefix, const char *name) +{ + if (out_paje_file) + mpicommthread_set_state(time, prefix, name); + if (trace_file) + recfmt_mpicommthread_set_state(time, name); +} + +static void do_mpicommthread_push_state(double time, const char *prefix, const char *name) +{ + if (out_paje_file) + mpicommthread_push_state(time, prefix, name); + if (trace_file) + recfmt_mpicommthread_push_state(time, name); +} + +static void do_mpicommthread_pop_state(double time, const char *prefix) +{ + if (out_paje_file) + mpicommthread_pop_state(time, prefix); + if (trace_file) + recfmt_mpicommthread_pop_state(time); +} + +static void do_user_thread_push_state(double time, const char *prefix, long unsigned threadid, const char *name, const char *type) +{ + if (out_paje_file) + user_thread_push_state(time, prefix, threadid, name); + if (trace_file) + recfmt_user_thread_push_state(time, threadid, name, type); +} + +static void do_user_thread_pop_state(double time, const char *prefix, long unsigned threadid) +{ + if (out_paje_file) + user_thread_pop_state(time, prefix, threadid); + if (trace_file) + recfmt_user_thread_pop_state(time, threadid); +} + +/* + * Multiplex between thread types + */ + +static void do_thread_push_state_worker(double time, const char *prefix, long unsigned int threadid, const char *name, const char *type, const char *user_type) +{ + int worker = find_worker_id(prefixTOnodeid(prefix), threadid); + + if (worker >= 0) + { + do_thread_push_state(time, prefix, threadid, name, type); + } + else if (worker == -2) + { + /* MPI thread */ + do_mpicommthread_push_state(time, prefix, name); + } + else + { + do_user_thread_push_state(time, prefix, threadid, name, user_type); + } +} + +static void do_thread_pop_state_worker(double time, const char *prefix, long unsigned int threadid) +{ + int worker = find_worker_id(prefixTOnodeid(prefix), threadid); + + if (worker >= 0) + { + do_thread_pop_state(time, prefix, threadid); + } + else if (worker == -2) + { + /* MPI thread */ + do_mpicommthread_pop_state(time, prefix); + } + else + { + do_user_thread_pop_state(time, prefix, threadid); + } +} + +/* + * Initialization + */ + +static void handle_new_mem_node(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + char *prefix = options->file_prefix; + + if (out_paje_file) + { + double now = get_event_time_stamp(ev, options); +#ifdef STARPU_HAVE_POTI + char program_container[STARPU_POTI_STR_LEN]; + program_container_alias(program_container, STARPU_POTI_STR_LEN, prefix); + char new_memnode_container_alias[STARPU_POTI_STR_LEN], new_memnode_container_name[STARPU_POTI_STR_LEN]; + char new_memmanager_container_alias[STARPU_POTI_STR_LEN], new_memmanager_container_name[STARPU_POTI_STR_LEN]; + memnode_container_alias(new_memnode_container_alias, STARPU_POTI_STR_LEN, prefix, ev->param[0]); + /* TODO: ramkind */ + snprintf(new_memnode_container_name, sizeof(new_memnode_container_name), "%sMEMNODE%"PRIu64"", prefix, ev->param[0]); + poti_CreateContainer(now, new_memnode_container_alias, "Mn", program_container, new_memnode_container_name); + + memmanager_container_alias(new_memmanager_container_alias, STARPU_POTI_STR_LEN, prefix, ev->param[0]); + /* TODO: ramkind */ + snprintf(new_memmanager_container_name, sizeof(new_memmanager_container_name), "%sMEMMANAGER%"PRIu64"", prefix, ev->param[0]); + poti_CreateContainer(now, new_memmanager_container_alias, "Mm", new_memnode_container_alias, new_memmanager_container_name); +#else + fprintf(out_paje_file, "7 %.9f %smn%"PRIu64" Mn %sp %sMEMNODE%"PRIu64"\n", now, prefix, ev->param[0], prefix, options->file_prefix, ev->param[0]); + fprintf(out_paje_file, "7 %.9f %smm%"PRIu64" Mm %smn%"PRIu64" %sMEMMANAGER%"PRIu64"\n", now, prefix, ev->param[0], prefix, ev->param[0], options->file_prefix, ev->param[0]); +#endif + + if (!options->no_bus) + { +#ifdef STARPU_HAVE_POTI + poti_SetVariable(now, new_memmanager_container_alias, "use", 0.0); + poti_SetVariable(now, new_memmanager_container_alias, "bwi_mm", 0.0); + poti_SetVariable(now, new_memmanager_container_alias, "bwo_mm", 0.0); +#else + fprintf(out_paje_file, "13 %.9f %smm%"PRIu64" use 0.0\n", now, prefix, ev->param[0]); + fprintf(out_paje_file, "13 %.9f %smm%"PRIu64" bwi_mm 0.0\n", now, prefix, ev->param[0]); + fprintf(out_paje_file, "13 %.9f %smm%"PRIu64" bwo_mm 0.0\n", now, prefix, ev->param[0]); +#endif + } + } +} + +/* + * Function that creates a synthetic stream id based on the order they appear from the trace + */ +static int create_ordered_stream_id(int nodeid, int devid) +{ + static int stable[STARPU_FXT_MAX_FILES][STARPU_MAXCUDADEVS]; + STARPU_ASSERT(nodeid < STARPU_FXT_MAX_FILES); + STARPU_ASSERT(devid < STARPU_MAXCUDADEVS); + return stable[nodeid][devid]++; +} + +static void handle_worker_init_start(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + /* + arg0 : type of worker (cuda, cpu ..) + arg1 : memory node + arg2 : thread id + */ + char *prefix = options->file_prefix; + + int devid = ev->param[2]; + int workerid = ev->param[1]; + int nodeid = ev->param[3]; + int bindid = ev->param[4]; + int set = ev->param[5]; + long unsigned int threadid = ev->param[6]; + int new_thread; + + new_thread = register_worker_id(prefixTOnodeid(prefix), threadid, workerid, set); + + const char *kindstr; + struct starpu_perfmodel_arch arch; + arch.ndevices = 1; + _STARPU_MALLOC(arch.devices, sizeof(struct starpu_perfmodel_device)); + + enum starpu_worker_archtype archtype = _STARPU_FUT_KEY_WORKER(ev->param[0]); + STARPU_ASSERT(archtype < STARPU_NARCH); + + kindstr = starpu_worker_get_type_as_string(archtype); + arch.devices[0].type = archtype; + arch.devices[0].devid = 0; + arch.devices[0].ncores = 1; + + if (archtype == STARPU_CPU_WORKER) + set_next_cpu_worker_color(workerid); + else + set_next_accel_worker_color(workerid); + + double now = get_event_time_stamp(ev, options); + + if (out_paje_file) + { + char new_worker_container_name[STARPU_TRACE_STR_LEN]; + if (arch.devices[0].type == STARPU_CUDA_WORKER) + { + // If CUDA, workers might be streams, so create an unique name for each of them + int streamid = create_ordered_stream_id(prefixTOnodeid(prefix), devid); + snprintf(new_worker_container_name, sizeof(new_worker_container_name), "%s%s%d_%d", prefix, kindstr, devid, streamid); + } + else + { + // If not CUDA, we suppose worker name is the prefix, the kindstr, and the devid + snprintf(new_worker_container_name, sizeof(new_worker_container_name), "%s%s%d", prefix, kindstr, devid); + } +#ifdef STARPU_HAVE_POTI + char new_thread_container_alias[STARPU_POTI_STR_LEN]; + thread_container_alias(new_thread_container_alias, STARPU_POTI_STR_LEN, prefix, threadid); + char new_worker_container_alias[STARPU_POTI_STR_LEN]; + worker_container_alias(new_worker_container_alias, STARPU_POTI_STR_LEN, prefix, workerid); + char memnode_container[STARPU_POTI_STR_LEN]; + memnode_container_alias(memnode_container, STARPU_POTI_STR_LEN, prefix, nodeid); + char new_thread_container_name[STARPU_POTI_STR_LEN]; + snprintf(new_thread_container_name, sizeof(new_thread_container_name), "%sT%d", prefix, bindid); + if (new_thread) + poti_CreateContainer(now, new_thread_container_alias, "T", memnode_container, new_thread_container_name); + poti_CreateContainer(now, new_worker_container_alias, "W", new_thread_container_alias, new_worker_container_name); + if (!options->no_flops) + poti_SetVariable(now, new_worker_container_alias, "gf", 0.0); +#else + if (new_thread) + fprintf(out_paje_file, "7 %.9f %st%lu T %smn%d %sT%d\n", + now, prefix, threadid, prefix, nodeid, prefix, bindid); + fprintf(out_paje_file, "7 %.9f %sw%d W %st%lu %s\n", + now, prefix, workerid, prefix, threadid, new_worker_container_name); + if (!options->no_flops) + fprintf(out_paje_file, "13 %.9f %sw%d gf 0.0\n", + now, prefix, workerid); +#endif + } + + /* start initialization */ + do_thread_set_state(now, prefix, threadid, "In", "Runtime", -1); + + if (activity_file) + fprintf(activity_file, "name\t%d\t%s %d\n", workerid, kindstr, devid); + + snprintf(options->worker_names[workerid], sizeof(options->worker_names[workerid])-1, "%s %d", kindstr, devid); + options->worker_names[workerid][sizeof(options->worker_names[workerid])-1] = 0; + options->worker_archtypes[workerid] = arch; +} + +static void handle_worker_init_end(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + char *prefix = options->file_prefix; + int worker; + + if (ev->nb_params < 2) + { + worker = find_worker_id(prefixTOnodeid(prefix), ev->param[0]); + STARPU_ASSERT(worker >= 0); + } + else + worker = ev->param[1]; + + do_thread_set_state(get_event_time_stamp(ev, options), prefix, ev->param[0], "B", "Runtime", -1); + + do_worker_set_state(get_event_time_stamp(ev, options), prefix, worker, "I", "Other"); + + /* Initialize the accumulated time counters */ + last_activity_flush_timestamp[worker] = get_event_time_stamp(ev, options); + accumulated_sleep_time[worker] = 0.0; + accumulated_exec_time[worker] = 0.0; +} + +static void handle_worker_deinit_start(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + char *prefix = options->file_prefix; + long unsigned int threadid = ev->param[0]; + + do_thread_set_state(get_event_time_stamp(ev, options), prefix, threadid, "D", "Runtime", -1); +} + +static void handle_worker_deinit_end(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + char *prefix = options->file_prefix; + + if (out_paje_file) + { +#ifdef STARPU_HAVE_POTI + char worker_container[STARPU_POTI_STR_LEN]; + thread_container_alias(worker_container, STARPU_POTI_STR_LEN, prefix, ev->param[1]); + poti_DestroyContainer(get_event_time_stamp(ev, options), "T", worker_container); +#else + fprintf(out_paje_file, "8 %.9f %st%"PRIu64" T\n", + get_event_time_stamp(ev, options), prefix, ev->param[1]); +#endif + } + if (trace_file) + recfmt_thread_set_state(get_event_time_stamp(ev, options), prefixTOnodeid(prefix), ev->param[1], "End", NULL); +} + +#ifdef STARPU_HAVE_POTI +static void create_paje_state_color(char *name, char *type, int ctx, float red, float green, float blue) +{ + char color[STARPU_POTI_STR_LEN]; + char alias[STARPU_POTI_STR_LEN]; + snprintf(color, sizeof(color), "%f %f %f", red, green, blue); + if (ctx) + { + snprintf(alias, sizeof(alias), "%s_%d", name, ctx); + } + else + { + snprintf(alias, sizeof(alias), "%s", name); + } + poti_DefineEntityValue(alias, type, name, color); +} +#endif + +static void create_paje_state_if_not_found(char *name, unsigned color, struct starpu_fxt_options *options) +{ + struct _starpu_symbol_name *itor; + for (itor = _starpu_symbol_name_list_begin(&symbol_list); + itor != _starpu_symbol_name_list_end(&symbol_list); + itor = _starpu_symbol_name_list_next(itor)) + { + if (!strcmp(name, itor->name)) + { + /* we found an entry */ + return; + } + } + + /* it's the first time ... */ + struct _starpu_symbol_name *entry = _starpu_symbol_name_new(); + entry->name = strdup(name); + STARPU_ASSERT(entry->name); + + _starpu_symbol_name_list_push_front(&symbol_list, entry); + + float red, green, blue; + if (color != 0) + { + red = color / 0x100 / 0x100; + green = (color / 0x100) & 0xff; + blue = color & 0xff; + } + else if (options->per_task_colour) + { + /* choose some color ... that's disguting yes */ + unsigned hash_symbol_red = get_color_symbol_red(name); + unsigned hash_symbol_green = get_color_symbol_green(name); + unsigned hash_symbol_blue = get_color_symbol_blue(name); + + uint32_t hash_sum = hash_symbol_red + hash_symbol_green + hash_symbol_blue; + red = (1.0f * hash_symbol_red) / hash_sum; + green = (1.0f * hash_symbol_green) / hash_sum; + blue = (1.0f * hash_symbol_blue) / hash_sum; + } + else + { + /* Use the hardcoded value for execution mode */ + red = 0.0f; + green = 0.6f; + blue = 0.4f; + } + + /* create the Paje state */ + if (out_paje_file) + { +#ifdef STARPU_HAVE_POTI + create_paje_state_color(name, "WS", 0, red, green, blue); + int i; + for(i = 1; i < STARPU_NMAX_SCHED_CTXS; i++) + { + char ctx[10]; + snprintf(ctx, sizeof(ctx), "Ctx%d", i); + if (options->use_task_color) + { + create_paje_state_color(name, ctx, i, red, green, blue); + } + else + { + if(i%10 == 1) + create_paje_state_color(name, ctx, i, 1.0, 0.39, 1.0); + if(i%10 == 2) + create_paje_state_color(name, ctx, i, .0, 1.0, 0.0); + if(i%10 == 3) + create_paje_state_color(name, ctx, i, 1.0, 1.0, .0); + if(i%10 == 4) + create_paje_state_color(name, ctx, i, .0, 0.95, 1.0); + if(i%10 == 5) + create_paje_state_color(name, ctx, i, .0, .0, .0); + if(i%10 == 6) + create_paje_state_color(name, ctx, i, .0, .0, 0.5); + if(i%10 == 7) + create_paje_state_color(name, ctx, i, 0.41, 0.41, 0.41); + if(i%10 == 8) + create_paje_state_color(name, ctx, i, 1.0, .0, 1.0); + if(i%10 == 9) + create_paje_state_color(name, ctx, i, .0, .0, 1.0); + if(i%10 == 0) + create_paje_state_color(name, ctx, i, 0.6, 0.80, 50.0); + } + } +#else + fprintf(out_paje_file, "6 %s WS %s \"%f %f %f\" \n", name, name, red, green, blue); + int i; + for(i = 1; i < STARPU_NMAX_SCHED_CTXS; i++) + { + if (options->use_task_color) + { + fprintf(out_paje_file, "6 %s_%d Ctx%d %s \"%f %f %f\" \n", name, i, i, name, red, green, blue); + } + else + { + if(i%10 == 1) + fprintf(out_paje_file, "6 %s_%d Ctx%d %s \"1.0 0.39 1.0\" \n", name, i, i, name); + if(i%10 == 2) + fprintf(out_paje_file, "6 %s_%d Ctx%d %s \".0 1.0 .0\" \n", name, i, i, name); + if(i%10 == 3) + fprintf(out_paje_file, "6 %s_%d Ctx%d %s \"0.87 0.87 .0\" \n", name, i, i, name); + if(i%10 == 4) + fprintf(out_paje_file, "6 %s_%d Ctx%d %s \".0 0.95 1.0\" \n", name, i, i, name); + if(i%10 == 5) + fprintf(out_paje_file, "6 %s_%d Ctx%d %s \".0 .0 .0\" \n", name, i, i, name); + if(i%10 == 6) + fprintf(out_paje_file, "6 %s_%d Ctx%d %s \".0 .0 0.5\" \n", name, i, i, name); + if(i%10 == 7) + fprintf(out_paje_file, "6 %s_%d Ctx%d %s \"0.41 0.41 0.41\" \n", name, i, i, name); + if(i%10 == 8) + fprintf(out_paje_file, "6 %s_%d Ctx%d %s \"1.0 .0 1.0\" \n", name, i, i, name); + if(i%10 == 9) + fprintf(out_paje_file, "6 %s_%d Ctx%d %s \".0 .0 1.0\" \n", name, i, i, name); + if(i%10 == 0) + fprintf(out_paje_file, "6 %s_%d Ctx%d %s \"0.6 0.80 0.19\" \n", name, i, i, name); + } + } + +#endif + } + +} + + +static void handle_start_codelet_body(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + int worker = ev->param[2]; + int node = ev->param[3]; + + if (worker < 0) return; + + struct task_info *task = get_task(ev->param[0], options->file_rank); + char *name = task->name; + create_paje_state_if_not_found(name, task->color, options); + + snprintf(_starpu_last_codelet_symbol[worker], sizeof(_starpu_last_codelet_symbol[worker]), "%.*s", (int) sizeof(_starpu_last_codelet_symbol[worker])-1, name); + _starpu_last_codelet_symbol[worker][sizeof(_starpu_last_codelet_symbol[worker])-1] = 0; + last_codelet_parameter[worker] = 0; + + double start_codelet_time = get_event_time_stamp(ev, options); + double last_start_codelet_time = last_codelet_start[worker]; + last_codelet_start[worker] = start_codelet_time; + char *prefix = options->file_prefix; + + task->start_time = start_codelet_time; + task->workerid = worker; + task->node = node; + + do_worker_set_state(start_codelet_time, prefix, ev->param[2], name, "Task"); + if (out_paje_file) + { + unsigned sched_ctx = ev->param[1]; + + if (sched_ctx != 0) + { +#ifdef STARPU_HAVE_POTI + char container[STARPU_POTI_STR_LEN]; + char ctx[6]; + snprintf(ctx, sizeof(ctx), "Ctx%u", sched_ctx); + worker_container_alias(container, STARPU_POTI_STR_LEN, prefix, ev->param[2]); + poti_SetState(start_codelet_time, container, ctx, name); +#else + fprintf(out_paje_file, "10 %.9f %sw%"PRIu64" Ctx%d \"%s\"\n", start_codelet_time, prefix, ev->param[2], sched_ctx, name); +#endif + } + } + + struct _starpu_computation *comp = ongoing_computation[worker]; + if (!comp) + { + /* First task for this worker */ + comp = ongoing_computation[worker] = _starpu_computation_new(); + comp->peer = NULL; + comp->comp_start = start_codelet_time; + if (!options->no_flops) + _starpu_computation_list_push_back(&computation_list, comp); + } + else if (options->no_smooth || + (start_codelet_time - last_codelet_end[worker]) >= + IDLE_FACTOR * (last_codelet_end[worker] - last_start_codelet_time)) + { + /* Long idle period, move previously-allocated comp to now */ + comp->comp_start = start_codelet_time; + if (!options->no_flops) + { + _starpu_computation_list_erase(&computation_list, comp); + _starpu_computation_list_push_back(&computation_list, comp); + } + } +} + +static void handle_model_name(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + struct task_info *task = get_task(ev->param[0], options->file_rank); + char *name = get_fxt_string(ev, 2); + task->model_name = strdup(name); +} + +static void handle_codelet_data(struct fxt_ev_64 *ev STARPU_ATTRIBUTE_UNUSED, struct starpu_fxt_options *options STARPU_ATTRIBUTE_UNUSED) +{ + int worker = ev->param[0]; + if (worker < 0) return; + int num = last_codelet_parameter[worker]++; + if (num >= MAX_PARAMETERS) + return; + char *name = get_fxt_string(ev, 1); + snprintf(last_codelet_parameter_description[worker][num], sizeof(last_codelet_parameter_description[worker][num]), "%.*s", (int) sizeof(last_codelet_parameter_description[worker][num])-1, name); + last_codelet_parameter_description[worker][num][sizeof(last_codelet_parameter_description[worker][num])-1] = 0; +} + +static void handle_codelet_data_handle(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + struct task_info *task = get_task(ev->param[0], options->file_rank); + unsigned alloc = 0; + + if (task->ndata == 0) + /* Start with 8=2^3, should be plenty in most cases */ + alloc = 8; + else if (task->ndata >= 8) + { + /* Allocate dependencies array by powers of two */ + if (! ((task->ndata - 1) & task->ndata)) /* Is task->ndata a power of two? */ + { + /* We have filled the previous power of two, get another one */ + alloc = task->ndata * 2; + } + } + if (alloc) + { + _STARPU_REALLOC(task->data, sizeof(*task->data) * alloc); + } + task->data[task->ndata].handle = ev->param[1]; + task->data[task->ndata].size = ev->param[2]; + task->data[task->ndata].mode = ev->param[3]; + task->data[task->ndata].numa_nodes_bitmap = -1; + task->ndata++; +} + +static void handle_codelet_data_handle_numa_access(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + struct task_info *task = get_task(ev->param[0], options->file_rank); + unsigned i = (unsigned) ev->param[1]; + + STARPU_ASSERT(i < task->ndata); + + task->data[i].numa_nodes_bitmap = ev->param[2]; +} + +static void handle_codelet_details(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + int worker = ev->param[5]; + unsigned long job_id = ev->param[6]; + + if (worker < 0) return; + + char parameters[256]; + size_t eaten = 0; + if (!last_codelet_parameter[worker]) + snprintf(parameters, sizeof(parameters) - 1, "nodata"); + else + { + int i; + for (i = 0; i < last_codelet_parameter[worker] && i < MAX_PARAMETERS; i++) + { + eaten += snprintf(parameters + eaten, sizeof(parameters) - eaten - 1, "%s%s", i?" ":"", last_codelet_parameter_description[worker][i]); + } + } + parameters[sizeof(parameters)-1] = 0; + + struct task_info *task = get_task(job_id, options->file_rank); + task->parameters = strdup(parameters); + task->footprint = ev->param[2]; + task->kflops = ev->param[3]; + task->tag = ev->param[4]; + + unsigned i, X = 0, Y = 0, Z = 0; + for (i = 0; i < task->ndata; i++) + { + if (task->data[i].mode & STARPU_W) + { + struct data_info *data = get_data(task->data[i].handle, options->file_rank); + if (data->dimensions >= 1) + X = data->dims[0]; + if (data->dimensions >= 2) + Y = data->dims[1]; + if (data->dimensions >= 3) + Z = data->dims[2]; + break; + } + } + + char numa_nodes_str[STARPU_TRACE_STR_LEN] = ""; + eaten = 0; + for (i = 0; i < task->ndata; i++) + { + char str[STARPU_TRACE_STR_LEN] = ""; + _starpu_convert_numa_nodes_bitmap_to_str(task->data[i].numa_nodes_bitmap, str); + eaten += snprintf(numa_nodes_str + eaten, sizeof(numa_nodes_str) - eaten - 1, "%s%s", i ? "_" : "", str); + } + numa_nodes_str[sizeof(numa_nodes_str)-1] = 0; + + if (out_paje_file) + { + char *prefix = options->file_prefix; + unsigned sched_ctx = ev->param[0]; + + /* Paje won't like spaces or tabs, replace with underscores */ + char *c; + for (c = parameters; *c; c++) + if ((*c == ' ') || (*c == '\t')) + *c = '_'; + + worker_set_detailed_state(last_codelet_start[worker], prefix, worker, _starpu_last_codelet_symbol[worker], ev->param[1], parameters, ev->param[2], ev->param[4], job_id, ((double) task->kflops) / 1000000, X, Y, Z, task->iterations[0], task->iterations[1], numa_nodes_str, options); + if (sched_ctx != 0) + { +#ifdef STARPU_HAVE_POTI + char container[STARPU_POTI_STR_LEN]; + char typectx[STARPU_POTI_STR_LEN]; + snprintf(typectx, sizeof(typectx), "Ctx%u", sched_ctx); + worker_container_alias(container, sizeof(container), prefix, worker); + poti_SetState(last_codelet_start[worker], container, typectx, _starpu_last_codelet_symbol[worker]); + + char name[STARPU_POTI_STR_LEN]; + snprintf(name, sizeof(name), "%s", _starpu_last_codelet_symbol[worker]); + + char size_str[STARPU_POTI_STR_LEN]; + char parameters_str[STARPU_POTI_STR_LEN]; + char footprint_str[STARPU_POTI_STR_LEN]; + char tag_str[STARPU_POTI_STR_LEN]; + char jobid_str[STARPU_POTI_STR_LEN]; + char submitorder_str[STARPU_POTI_STR_LEN]; + snprintf(size_str, sizeof(size_str), "%ld", ev->param[1]); + snprintf(parameters_str, sizeof(parameters_str), "%s", parameters); + snprintf(footprint_str, sizeof(footprint_str), "%08lx", ev->param[2]); + snprintf(tag_str, sizeof(tag_str), "%016lx", ev->param[4]); + snprintf(jobid_str, sizeof(jobid_str), "%s%lu", prefix, job_id); + snprintf(submitorder_str, sizeof(submitorder_str), "%s%lu", prefix, task->submit_order); + +#ifdef HAVE_POTI_INIT_CUSTOM + poti_user_SetState(_starpu_poti_semiExtendedSetState, last_codelet_start[worker], container, typectx, name, 6, size_str, + parameters_str, + footprint_str, + tag_str, + jobid_str, + submitorder_str); +#else + poti_SetState(last_codelet_start[worker], container, typectx, name); +#endif +#else + fprintf(out_paje_file, "21 %.9f %sw%d Ctx%u \"%s\" %ld %s %08lx %016lx %s%lu %s%lu\n", last_codelet_start[worker], prefix, worker, sched_ctx, _starpu_last_codelet_symbol[worker], ev->param[1], parameters, ev->param[2], ev->param[4], prefix, job_id, prefix, task->submit_order); +#endif + } + } +} + +static long dumped_codelets_count; +static struct starpu_fxt_codelet_event *dumped_codelets; + +static void handle_end_codelet_body(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + unsigned long job_id = ev->param[0]; + size_t codelet_size = ev->param[1]; + uint32_t codelet_hash = ev->param[2]; + int worker = ev->param[3]; + long unsigned int threadid = ev->param[4]; + char *name = get_fxt_string(ev, 5); + + if (worker < 0) return; + + char *prefix = options->file_prefix; + double end_codelet_time = get_event_time_stamp(ev, options); + double last_end_codelet_time = last_codelet_end[worker]; + last_codelet_end[worker] = end_codelet_time; + + const char *state = "I"; + if (find_sync(prefixTOnodeid(prefix), threadid)) + state = "B"; + + do_worker_set_state(end_codelet_time, prefix, worker, state, "Other"); + + struct task_info *task = get_task(job_id, options->file_rank); + + task->end_time = end_codelet_time; + update_accumulated_time(worker, 0.0, end_codelet_time - task->start_time, end_codelet_time, 0); + + struct _starpu_computation *peer = ongoing_computation[worker]; + double gflops_start = peer->comp_start; + double codelet_length; + double gflops; + struct _starpu_computation *comp; + + codelet_length = end_codelet_time - gflops_start; + gflops = (((double)task->kflops) / 1000000) / (codelet_length / 1000); + + if (options->no_flops) + { + _starpu_computation_delete(peer); + } + else + { + if (out_paje_file) + { +#ifdef STARPU_HAVE_POTI + char container[STARPU_POTI_STR_LEN]; + worker_container_alias(container, STARPU_POTI_STR_LEN, prefix, worker); + if (gflops_start != last_end_codelet_time) + { + if (last_end_codelet_time != 0) + { + poti_SetVariable(last_end_codelet_time, container, "gf", 0.); + } + } + poti_SetVariable(gflops_start, container, "gf", gflops); +#else + if (gflops_start != last_end_codelet_time) + { + if (last_end_codelet_time != 0) + { + fprintf(out_paje_file, "13 %.9f %sw%d gf %f\n", + last_end_codelet_time, prefix, worker, 0.); + } + } + fprintf(out_paje_file, "13 %.9f %sw%d gf %f\n", + gflops_start, prefix, worker, gflops); +#endif + } + + comp = _starpu_computation_new(); + comp->comp_start = end_codelet_time; + comp->gflops = -gflops; + peer->gflops = +gflops; + comp->peer = peer; + peer->peer = comp; + _starpu_computation_list_push_back(&computation_list, comp); + } + + /* Prepare comp for next codelet */ + comp = _starpu_computation_new(); + comp->comp_start = end_codelet_time; + comp->peer = NULL; + if (!options->no_flops) + _starpu_computation_list_push_back(&computation_list, comp); + ongoing_computation[worker] = comp; + + if (distrib_time) + fprintf(distrib_time, "%s\t%s%d\t%ld\t%"PRIx32"\t%.9f\n", _starpu_last_codelet_symbol[worker], + prefix, worker, (unsigned long) codelet_size, codelet_hash, codelet_length); + + if (options->dumped_codelets) + { + dumped_codelets_count++; + _STARPU_REALLOC(dumped_codelets, dumped_codelets_count*sizeof(struct starpu_fxt_codelet_event)); + + snprintf(dumped_codelets[dumped_codelets_count - 1].symbol, sizeof(dumped_codelets[dumped_codelets_count - 1].symbol)-1, "%s", _starpu_last_codelet_symbol[worker]); + dumped_codelets[dumped_codelets_count - 1].symbol[sizeof(dumped_codelets[dumped_codelets_count - 1].symbol)-1] = 0; + dumped_codelets[dumped_codelets_count - 1].workerid = worker; + snprintf(dumped_codelets[dumped_codelets_count - 1].perfmodel_archname, sizeof(dumped_codelets[dumped_codelets_count - 1].perfmodel_archname), "%.*s", (int) sizeof(dumped_codelets[dumped_codelets_count - 1].perfmodel_archname)-1, name); + dumped_codelets[dumped_codelets_count - 1].perfmodel_archname[sizeof(dumped_codelets[dumped_codelets_count - 1].perfmodel_archname)-1] = 0; + dumped_codelets[dumped_codelets_count - 1].size = codelet_size; + dumped_codelets[dumped_codelets_count - 1].hash = codelet_hash; + dumped_codelets[dumped_codelets_count - 1].time = codelet_length; + } + _starpu_last_codelet_symbol[worker][0] = 0; +} + +static void handle_start_executing(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + char *prefix = options->file_prefix; + long unsigned int threadid = ev->param[0]; + long job_id = ev->param[1]; + + if (!find_sync(prefixTOnodeid(prefix), threadid)) + do_thread_set_state(get_event_time_stamp(ev, options), prefix, threadid, "E", "Runtime", job_id); +} + +static void handle_end_executing(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + char *prefix = options->file_prefix; + long unsigned int threadid = ev->param[0]; + + if (!find_sync(prefixTOnodeid(prefix), threadid)) + do_thread_set_state(get_event_time_stamp(ev, options), prefix, threadid, "B", "Runtime", -1); +} + +static void handle_start_parallel_sync(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + char *prefix = options->file_prefix; + long unsigned int threadid = ev->param[0]; + + thread_push_state(get_event_time_stamp(ev, options), prefix, threadid, "Ps"); +} + +static void handle_end_parallel_sync(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + char *prefix = options->file_prefix; + long unsigned int threadid = ev->param[0]; + + thread_pop_state(get_event_time_stamp(ev, options), prefix, threadid); +} + +static void handle_user_event(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + int worker; + unsigned long code = ev->param[0]; +#ifdef STARPU_HAVE_POTI + char paje_value[STARPU_POTI_STR_LEN], container[STARPU_POTI_STR_LEN]; + snprintf(paje_value, sizeof(paje_value), "%lu", code); +#endif + + char *prefix = options->file_prefix; + double now = get_event_time_stamp(ev, options); + + worker = find_worker_id(prefixTOnodeid(prefix), ev->param[1]); + if (worker < 0) + { + if (out_paje_file) +#ifdef STARPU_HAVE_POTI + program_container_alias(container, STARPU_POTI_STR_LEN, prefix); +#else + fprintf(out_paje_file, "9 %.9f user_user_event %sp %lu\n", now, prefix, code); +#endif + } + else + { + if (out_paje_file) +#ifdef STARPU_HAVE_POTI + thread_container_alias(container, STARPU_POTI_STR_LEN, prefix, ev->param[1]); +#else + fprintf(out_paje_file, "9 %.9f user_event %st%"PRIu64" %lu\n", now, prefix, ev->param[1], code); +#endif + } +#ifdef STARPU_HAVE_POTI + if (out_paje_file) + poti_NewEvent(now, container, "user_event", paje_value); +#endif +} + +static void handle_start_callback(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + do_thread_push_state_worker(get_event_time_stamp(ev, options), options->file_prefix, ev->param[1], "C", "Runtime", "UNK"); /* XXX */ +} + +static void handle_end_callback(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + do_thread_pop_state_worker(get_event_time_stamp(ev, options), options->file_prefix, ev->param[1]); +} + +static void handle_hypervisor_begin(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + do_thread_push_state_worker(get_event_time_stamp(ev, options), options->file_prefix, ev->param[0], "H", "Runtime", "UNK"); /* XXX */ +} + +static void handle_hypervisor_end(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + do_thread_pop_state_worker(get_event_time_stamp(ev, options), options->file_prefix, ev->param[0]); +} + +static void handle_worker_status_on_tid(struct fxt_ev_64 *ev, struct starpu_fxt_options *options, const char *newstatus) +{ + char *prefix = options->file_prefix; + if (find_worker_id(prefixTOnodeid(prefix), ev->param[1]) < 0) + return; + + do_thread_set_state(get_event_time_stamp(ev, options), prefix, ev->param[1], newstatus, "Runtime", -1); +} + +static void handle_worker_status(struct fxt_ev_64 *ev, struct starpu_fxt_options *options, const char *newstatus) +{ + int worker; + worker = ev->param[1]; + if (worker < 0) + return; + + do_worker_set_state(get_event_time_stamp(ev, options), options->file_prefix, ev->param[1], newstatus, "Runtime"); +} + +static double last_sleep_start[STARPU_NMAXWORKERS]; + +static void handle_worker_scheduling_start(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + char *prefix = options->file_prefix; + if (find_worker_id(prefixTOnodeid(prefix), ev->param[0]) < 0) + return; + + do_thread_set_state(get_event_time_stamp(ev, options), prefix, ev->param[0], "Sc", "Runtime", -1); +} + +static void handle_worker_scheduling_end(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + char *prefix = options->file_prefix; + if (find_worker_id(prefixTOnodeid(prefix), ev->param[0]) < 0) + return; + + do_thread_set_state(get_event_time_stamp(ev, options), prefix, ev->param[0], "B", "Runtime", -1); +} + +static void handle_worker_scheduling_push(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + do_thread_push_state_worker(get_event_time_stamp(ev, options), options->file_prefix, ev->param[0], "Sc", "Runtime", "User"); +} + +static void handle_worker_scheduling_pop(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + do_thread_pop_state_worker(get_event_time_stamp(ev, options), options->file_prefix, ev->param[0]); +} + +static void handle_worker_sleep_start(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + char *prefix = options->file_prefix; + int worker = find_worker_id(prefixTOnodeid(prefix), ev->param[0]); + if (worker < 0) + return; + + double start_sleep_time = get_event_time_stamp(ev, options); + last_sleep_start[worker] = start_sleep_time; + + do_thread_set_state(get_event_time_stamp(ev, options), prefix, ev->param[0], "Sl", "Other", -1); +} + +static void handle_worker_sleep_end(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + char *prefix = options->file_prefix; + int worker = find_worker_id(prefixTOnodeid(prefix), ev->param[0]); + if (worker < 0) + return; + + double end_sleep_timestamp = get_event_time_stamp(ev, options); + + do_thread_set_state(end_sleep_timestamp, prefix, ev->param[0], "B", "Runtime", -1); + + double sleep_length = end_sleep_timestamp - last_sleep_start[worker]; + + update_accumulated_time(worker, sleep_length, 0.0, end_sleep_timestamp, 0); +} + +static void handle_data_register(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + unsigned long handle = ev->param[0]; + char *prefix = options->file_prefix; + struct data_info *data = get_data(handle, options->file_rank); + char *description = get_fxt_string(ev, 4); + + data->size = ev->param[1]; + data->max_size = ev->param[2]; + data->home_node = ev->param[3]; + if (description[0]) + data->description = strdup(description); + + if (out_paje_file && !options->no_events) + { +#ifdef STARPU_HAVE_POTI + char paje_value[STARPU_POTI_STR_LEN], container[STARPU_POTI_STR_LEN]; + snprintf(paje_value, sizeof(paje_value), "%lx", handle); + program_container_alias(container, STARPU_POTI_STR_LEN, prefix); + poti_NewEvent(get_event_time_stamp(ev, options), container, "register", paje_value); +#else + fprintf(out_paje_file, "9 %.9f register %sp %lx\n", get_event_time_stamp(ev, options), prefix, handle); +#endif + } +} + +static void handle_data_unregister(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + unsigned long handle = ev->param[0]; + char *prefix = options->file_prefix; + struct data_info *data = get_data(handle, options->file_rank); + + if (out_paje_file && !options->no_events) + { +#ifdef STARPU_HAVE_POTI + char paje_value[STARPU_POTI_STR_LEN], container[STARPU_POTI_STR_LEN]; + snprintf(paje_value, sizeof(paje_value), "%lx", handle); + program_container_alias(container, STARPU_POTI_STR_LEN, prefix); + poti_NewEvent(get_event_time_stamp(ev, options), container, "unregister", paje_value); +#else + fprintf(out_paje_file, "9 %.9f unregister %sp %lx\n", get_event_time_stamp(ev, options), prefix, handle); +#endif + } + + data_dump(data); +} + +static void handle_data_state(struct fxt_ev_64 *ev, struct starpu_fxt_options *options, const char *state) +{ + unsigned long handle = ev->param[0]; + unsigned node = ev->param[1]; + char *prefix = options->file_prefix; + + if (out_paje_file) + { +#ifdef STARPU_HAVE_POTI + char paje_value[STARPU_POTI_STR_LEN], memnode_container[STARPU_POTI_STR_LEN]; + memmanager_container_alias(memnode_container, STARPU_POTI_STR_LEN, prefix, node); + snprintf(paje_value, sizeof(paje_value), "%lx", handle); + poti_NewEvent(get_event_time_stamp(ev, options), memnode_container, state, paje_value); +#else + fprintf(out_paje_file, "9 %.9f %s %smm%u %lx\n", get_event_time_stamp(ev, options), state, prefix, node, handle); +#endif + } +} + +static void handle_data_copy(void) +{ +} + +static void handle_data_name(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + unsigned long handle = ev->param[0]; + char *name = get_fxt_string(ev, 1); + struct data_info *data = get_data(handle, options->file_rank); + + data->name = strdup(name); +} + +static void handle_data_coordinates(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + unsigned long handle = ev->param[0]; + unsigned dimensions = ev->param[1]; + struct data_info *data = get_data(handle, options->file_rank); + unsigned i; + + data->dimensions = dimensions; + _STARPU_MALLOC(data->dims, dimensions * sizeof(*data->dims)); + for (i = 0; i < dimensions; i++) + data->dims[i] = ev->param[i+2]; +} + +static void handle_data_wont_use(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + unsigned long handle = ev->param[0]; + unsigned long submit_order = ev->param[1]; + unsigned long job_id = ev->param[2]; + + fprintf(tasks_file, "Control: WontUse\n"); + fprintf(tasks_file, "JobId: %lu\n", job_id); + fprintf(tasks_file, "SubmitOrder: %lu\n", submit_order); + fprintf(tasks_file, "SubmitTime: %f\n", get_event_time_stamp(ev, options)); + fprintf(tasks_file, "Handles: %lx\n", handle); + fprintf(tasks_file, "MPIRank: %d\n", options->file_rank); + fprintf(tasks_file, "\n"); +} + +static void handle_data_doing_wont_use(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + unsigned long handle = ev->param[0]; + char *prefix = options->file_prefix; + unsigned node = STARPU_MAIN_RAM; + const char *event = "WU"; + + if (out_paje_file) + { +#ifdef STARPU_HAVE_POTI + char paje_value[STARPU_POTI_STR_LEN], memnode_container[STARPU_POTI_STR_LEN]; + memmanager_container_alias(memnode_container, STARPU_POTI_STR_LEN, prefix, node); + snprintf(paje_value, sizeof(paje_value), "%lx", handle); + poti_NewEvent(get_event_time_stamp(ev, options), memnode_container, event, paje_value); +#else + fprintf(out_paje_file, "9 %.9f %s %smm%u %lx\n", get_event_time_stamp(ev, options), event, prefix, node, handle); +#endif + } +} + +static void handle_mpi_data_set_rank(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + unsigned long handle = ev->param[0]; + unsigned long rank = ev->param[1]; + struct data_info *data = get_data(handle, options->file_rank); + + data->mpi_owner = rank; +} + +static void handle_mpi_data_set_tag(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + unsigned long handle = ev->param[0]; + long tag = ev->param[1]; + struct data_info *data = get_data(handle, options->file_rank); + + data->mpi_tag = tag; +} + +static const char *copy_link_type(enum starpu_is_prefetch prefetch) +{ + switch (prefetch) + { + case STARPU_FETCH: return "F"; + case STARPU_TASK_PREFETCH: return "TF"; + case STARPU_PREFETCH: return "PF"; + case STARPU_IDLEFETCH: return "IF"; + default: STARPU_ASSERT(0); + } +} + + +static void handle_checkpoint_begin(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + /* Add an event in the trace */ + if (out_paje_file) + { +#ifdef STARPU_HAVE_POTI + char container[STARPU_POTI_STR_LEN]; + snprintf(container, sizeof(container), "%sp", options->file_prefix); + poti_user_NewEvent(_starpu_poti_checkPointState, get_event_time_stamp(ev, options), container, "prog_event", "checkpoint_begin", + 2, ev->param[0], ev->param[1]); +#else + fprintf(out_paje_file, "25 %.9f checkpoint_begin %sp 0 %lu %lu\n", get_event_time_stamp(ev, options), + options->file_prefix, ev->param[0], ev->param[1]); +#endif + } +} + +static void handle_checkpoint_end(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + /* Add an event in the trace */ + if (out_paje_file) + { +#ifdef STARPU_HAVE_POTI + char container[STARPU_POTI_STR_LEN]; + snprintf(container, sizeof(container), "%sp", options->file_prefix); + poti_user_NewEvent(_starpu_poti_checkPointState, get_event_time_stamp(ev, options), container, "prog_event", "checkpoint_end", + 2, ev->param[0], ev->param[1]); +#else + fprintf(out_paje_file, "25 %.9f checkpoint_end %sp 0 %lu %lu\n", get_event_time_stamp(ev, options), + options->file_prefix, ev->param[0], ev->param[1]); +#endif + } +} + +static void handle_start_driver_copy(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + unsigned src = ev->param[0]; + unsigned dst = ev->param[1]; + unsigned size = ev->param[2]; + unsigned comid = ev->param[3]; + enum starpu_is_prefetch prefetch = ev->param[4]; + unsigned long handle = ev->param[5]; + const char *link_type = copy_link_type(prefetch); + + char *prefix = options->file_prefix; + + if (!options->no_bus) + { + if (out_paje_file) + { + double time = get_event_time_stamp(ev, options); + memnode_push_state(time, prefix, dst, "Co"); + memnode_event(get_event_time_stamp(ev, options), options->file_prefix, dst, "DCo", handle, 0, comid, size, src, options); + unsigned X = _starpu_fxt_data_get_coord(handle, options->file_rank, 0); + unsigned Y = _starpu_fxt_data_get_coord(handle, options->file_rank, 1); + const char *name = _starpu_fxt_data_get_name(handle, options->file_rank); + if (!name) + name = ""; + +#ifdef STARPU_HAVE_POTI + char paje_value[STARPU_POTI_STR_LEN], paje_key[STARPU_POTI_STR_LEN], src_memnode_container[STARPU_POTI_STR_LEN]; + char program_container[STARPU_POTI_STR_LEN]; + snprintf(paje_value, sizeof(paje_value), "%u", size); + snprintf(paje_key, sizeof(paje_key), "com_%u", comid); + program_container_alias(program_container, STARPU_POTI_STR_LEN, prefix); + memmanager_container_alias(src_memnode_container, STARPU_POTI_STR_LEN, prefix, src); + + char str_handle[STARPU_POTI_STR_LEN]; + snprintf(str_handle, sizeof(str_handle), "%lx", handle); + + char X_str[STARPU_POTI_STR_LEN]; + snprintf(X_str, sizeof(X_str), "%u", X); + char Y_str[STARPU_POTI_STR_LEN]; + snprintf(Y_str, sizeof(Y_str), "%u", Y); + + poti_user_StartLink(_starpu_poti_CommLinkStart, time, program_container, link_type, src_memnode_container, paje_value, paje_key, 4, str_handle, name, X_str, Y_str); +#else + fprintf(out_paje_file, "24 %.9f %s %sp %u %smm%u com_%u %lx \"%s\" %u %u\n", time, link_type, prefix, size, prefix, src, comid, handle, name, X, Y); +#endif + } + + /* create a structure to store the start of the communication, this will be matched later */ + struct _starpu_communication *com = _starpu_communication_new(); + com->comid = comid; + com->comm_start = get_event_time_stamp(ev, options); + com->size = size; + com->bandwidth = 0; + com->src_node = src; + com->dst_node = dst; + com->type = link_type; + com->peer = NULL; + com->handle = handle; + + _starpu_communication_list_push_back(&communication_list, com); + } + +} + + +static void handle_work_stealing(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + if (out_paje_file) + { + unsigned dst = ev->param[0]; + unsigned src = ev->param[1]; + char *prefix = options->file_prefix; + unsigned size = 0; + double time = get_event_time_stamp(ev, options); +#ifdef STARPU_HAVE_POTI + char paje_value[STARPU_POTI_STR_LEN], paje_key[STARPU_POTI_STR_LEN], src_worker_container[STARPU_POTI_STR_LEN], dst_worker_container[STARPU_POTI_STR_LEN]; + char program_container[STARPU_POTI_STR_LEN]; + + snprintf(paje_value, sizeof(paje_value), "%u", size); + snprintf(paje_key, sizeof(paje_key), "steal_%u", steal_number); + program_container_alias(program_container, STARPU_POTI_STR_LEN, prefix); + worker_container_alias(src_worker_container, STARPU_POTI_STR_LEN, prefix, src); + worker_container_alias(dst_worker_container, STARPU_POTI_STR_LEN, prefix, dst); + poti_StartLink(time, program_container, "WSL", src_worker_container, paje_value, paje_key); + poti_EndLink(time+0.000000001, program_container, "WSL", dst_worker_container, paje_value, paje_key); +#else + + fprintf(out_paje_file, "18 %.9f WSL %sp %u %sw%u steal_%u\n", time, prefix, size, prefix, src, steal_number); + fprintf(out_paje_file, "19 %.9f WSL %sp %u %sw%u steal_%u\n", time+0.000000001, prefix, size, prefix, dst, steal_number); +#endif + } + + steal_number++; +} + + +static void handle_end_driver_copy(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + int src = -1; + unsigned long handle = 0; + unsigned dst = ev->param[1]; + unsigned long size = ev->param[2]; + unsigned comid = ev->param[3]; + enum starpu_is_prefetch prefetch = ev->param[4]; + const char *link_type = copy_link_type(prefetch); + + char *prefix = options->file_prefix; + + if (!options->no_bus) + { + /* look for a data transfer to match */ +#ifdef STARPU_DEVEL +#warning FIXME: use hash table instead +#endif + struct _starpu_communication *itor; + for (itor = _starpu_communication_list_begin(&communication_list); + itor != _starpu_communication_list_end(&communication_list); + itor = _starpu_communication_list_next(itor)) + { + if (itor->comid == comid) + { + double comm_end = get_event_time_stamp(ev, options); + double bandwidth = (double)((0.001*size)/(comm_end - itor->comm_start)); + + itor->bandwidth = bandwidth; + + struct _starpu_communication *com = _starpu_communication_new(); + com->comid = comid; + com->comm_start = get_event_time_stamp(ev, options); + com->bandwidth = -bandwidth; + com->size = size; + + src = com->src_node = itor->src_node; + com->dst_node = itor->dst_node; + com->type = itor->type; + link_type = itor->type; + handle = itor->handle; + com->peer = itor; + itor->peer = com; + + _starpu_communication_list_push_back(&communication_list, com); + + break; + } + } + + if (out_paje_file) + { + double time = get_event_time_stamp(ev, options); + memnode_pop_state(time, prefix, dst); + memnode_event(get_event_time_stamp(ev, options), options->file_prefix, dst, "DCoE", handle, 0, comid, size, src, options); +#ifdef STARPU_HAVE_POTI + char paje_value[STARPU_POTI_STR_LEN], paje_key[STARPU_POTI_STR_LEN]; + char dst_memnode_container[STARPU_POTI_STR_LEN], program_container[STARPU_POTI_STR_LEN]; + snprintf(paje_value, sizeof(paje_value), "%lu", size); + snprintf(paje_key, sizeof(paje_key), "com_%u", comid); + program_container_alias(program_container, STARPU_POTI_STR_LEN, prefix); + memmanager_container_alias(dst_memnode_container, STARPU_POTI_STR_LEN, prefix, dst); + poti_EndLink(time, program_container, link_type, dst_memnode_container, paje_value, paje_key); +#else + fprintf(out_paje_file, "19 %.9f %s %sp %lu %smm%u com_%u\n", time, link_type, prefix, size, prefix, dst, comid); +#endif + } + } +} + +static void handle_start_driver_copy_async(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + unsigned src = ev->param[0]; + unsigned dst = ev->param[1]; + + char *prefix = options->file_prefix; + + if (!options->no_bus) + if (out_paje_file) + { + memnode_push_state(get_event_time_stamp(ev, options), prefix, dst, "CoA"); + memnode_event(get_event_time_stamp(ev, options), options->file_prefix, dst, "DCoA", 0, 0, 0, 0, src, options); + } + +} + +static void handle_end_driver_copy_async(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + unsigned src = ev->param[0]; + unsigned dst = ev->param[1]; + + char *prefix = options->file_prefix; + + if (!options->no_bus) + if (out_paje_file) + { + memnode_pop_state(get_event_time_stamp(ev, options), prefix, dst); + memnode_event(get_event_time_stamp(ev, options), options->file_prefix, dst, "DCoAE", 0, 0, 0, 0, src, options); + } +} + +/* Currently unused */ +STARPU_ATTRIBUTE_UNUSED +static void handle_memnode_event(struct fxt_ev_64 *ev, struct starpu_fxt_options *options, const char *eventstr) +{ + unsigned memnode = ev->param[0]; + + if (out_paje_file) + memnode_set_state(get_event_time_stamp(ev, options), options->file_prefix, memnode, eventstr); +} + +static void handle_data_request(struct fxt_ev_64 *ev, struct starpu_fxt_options *options, const char *eventstr) +{ + unsigned memnode = ev->param[0]; + unsigned dest = ev->param[1]; + unsigned prio = ev->param[2]; + unsigned long handle = ev->param[3]; + unsigned prefe = ev->param[4]; + unsigned long request = ev->param[5]; + + memnode_event(get_event_time_stamp(ev, options), options->file_prefix, memnode, eventstr, handle, request, prefe, prio, dest, options); +} + +static void handle_memnode_event_start_3(struct fxt_ev_64 *ev, struct starpu_fxt_options *options, const char *eventstr) +{ + unsigned memnode = ev->param[0]; + unsigned size = ev->param[2]; + unsigned long handle = ev->param[3]; + + memnode_event(get_event_time_stamp(ev, options), options->file_prefix, memnode, eventstr, handle, 0, 0, size, memnode, options); +} + +static void handle_memnode_event_start_4(struct fxt_ev_64 *ev, struct starpu_fxt_options *options, const char *eventstr) +{ + unsigned memnode = ev->param[0]; + //unsigned dest = ev->param[1]; // Not used + unsigned size = ev->param[2]; + unsigned long handle = ev->param[3]; + unsigned prefe = ev->param[4]; + + memnode_event(get_event_time_stamp(ev, options), options->file_prefix, memnode, eventstr, handle, 0, prefe, size, memnode, options); +} + +static void handle_memnode_event_end_3(struct fxt_ev_64 *ev, struct starpu_fxt_options *options, const char *eventstr) +{ + unsigned memnode = ev->param[0]; + unsigned long handle = ev->param[2]; + unsigned info = ev->param[3]; + + memnode_event(get_event_time_stamp(ev, options), options->file_prefix, memnode, eventstr, handle, 0, info, 0, memnode, options); +} + +static void handle_memnode_event_start_2(struct fxt_ev_64 *ev, struct starpu_fxt_options *options, const char *eventstr) +{ + unsigned memnode = ev->param[0]; + unsigned long handle = ev->param[2]; + + memnode_event(get_event_time_stamp(ev, options), options->file_prefix, memnode, eventstr, handle, 0, 0, 0, memnode, options); +} + +static void handle_memnode_event_end_2(struct fxt_ev_64 *ev, struct starpu_fxt_options *options, const char *eventstr) +{ + unsigned memnode = ev->param[0]; + unsigned long handle = ev->param[2]; + + memnode_event(get_event_time_stamp(ev, options), options->file_prefix, memnode, eventstr, handle, 0, 0, 0, memnode, options); +} + +static void handle_push_memnode_event(struct fxt_ev_64 *ev, struct starpu_fxt_options *options, const char *eventstr) +{ + unsigned memnode = ev->param[0]; + + if (out_paje_file) + memnode_push_state(get_event_time_stamp(ev, options), options->file_prefix, memnode, eventstr); +} + +static void handle_pop_memnode_event(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + unsigned memnode = ev->param[0]; + + if (out_paje_file) + memnode_pop_state(get_event_time_stamp(ev, options), options->file_prefix, memnode); +} + +static void handle_used_mem(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + unsigned memnode = ev->param[0]; + + if (out_paje_file) + { +#ifdef STARPU_HAVE_POTI + char memnode_container[STARPU_POTI_STR_LEN]; + memmanager_container_alias(memnode_container, STARPU_POTI_STR_LEN, options->file_prefix, memnode); + poti_SetVariable(get_event_time_stamp(ev, options), memnode_container, "use", (double)ev->param[1] / (1<<20)); +#else + fprintf(out_paje_file, "13 %.9f %smm%u use %f\n", + get_event_time_stamp(ev, options), options->file_prefix, memnode, (double)ev->param[1] / (1<<20)); +#endif + } +} + +static void handle_task_submit_event(struct fxt_ev_64 *ev, struct starpu_fxt_options *options, unsigned long tid, const char *eventstr) +{ + char *prefix = options->file_prefix; + double timestamp = get_event_time_stamp(ev, options); + + if (eventstr) + do_thread_push_state_worker(timestamp, prefix, tid, eventstr, "Runtime", "User"); + else + do_thread_pop_state_worker(timestamp, prefix, tid); +} + +/* + * Number of task submitted to the scheduler + */ +static int curq_size = 0; +static int nsubmitted = 0; + +static void handle_job_push(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + double current_timestamp = get_event_time_stamp(ev, options); + + unsigned task = ev->param[0]; + int priority = ev->param[1]; + + curq_size++; + + _starpu_fxt_component_update_ntasks(nsubmitted, curq_size); + + if (!options->no_counter && out_paje_file) + { +#ifdef STARPU_HAVE_POTI + char container[STARPU_POTI_STR_LEN]; + + scheduler_container_alias(container, STARPU_POTI_STR_LEN, options->file_prefix); + poti_SetVariable(current_timestamp, container, "nready", (double)curq_size); + + char paje_value[STARPU_POTI_STR_LEN]; + snprintf(paje_value, sizeof(paje_value), "%u", task); + snprintf(container, sizeof(container), "%sp", options->file_prefix); + if (!options->no_events) + poti_NewEvent(get_event_time_stamp(ev, options), container, "pu", paje_value); +#else + fprintf(out_paje_file, "13 %.9f %ssched nready %f\n", current_timestamp, options->file_prefix, (float)curq_size); + if (!options->no_events) + fprintf(out_paje_file, "9 %.9f %s %sp %u\n", get_event_time_stamp(ev, options), "pu", options->file_prefix, task); +#endif + } + + if (activity_file) + fprintf(activity_file, "cnt_ready\t%.9f\t%d\n", current_timestamp, curq_size); + + if (sched_tasks_file) + { + fprintf(sched_tasks_file, "Type: push\n"); + fprintf(sched_tasks_file, "Time: %.9f\n", current_timestamp); + fprintf(sched_tasks_file, "Priority: %d\n", priority); + if (options->file_rank < 0) + fprintf(sched_tasks_file, "JobId: %u\n", task); + else + fprintf(sched_tasks_file, "JobId: %d_%u\n", options->file_rank, task); + fprintf(sched_tasks_file, "\n"); + } +} + + +static void handle_job_pop(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + double current_timestamp = get_event_time_stamp(ev, options); + unsigned task = ev->param[0]; + int priority = ev->param[1]; + + curq_size--; + nsubmitted--; + _starpu_fxt_component_update_ntasks(nsubmitted, curq_size); + + if (!options->no_counter && out_paje_file) + { +#ifdef STARPU_HAVE_POTI + char container[STARPU_POTI_STR_LEN]; + scheduler_container_alias(container, STARPU_POTI_STR_LEN, options->file_prefix); + poti_SetVariable(current_timestamp, container, "nready", (double)curq_size); + poti_SetVariable(current_timestamp, container, "nsubmitted", (double)nsubmitted); + + char paje_value[STARPU_POTI_STR_LEN]; + snprintf(paje_value, sizeof(paje_value), "%u", task); + snprintf(container, sizeof(container), "%sp", options->file_prefix); + if (!options->no_events) + poti_NewEvent(get_event_time_stamp(ev, options), container, "po", paje_value); +#else + fprintf(out_paje_file, "13 %.9f %ssched nready %f\n", current_timestamp, options->file_prefix, (float)curq_size); + fprintf(out_paje_file, "13 %.9f %ssched nsubmitted %f\n", current_timestamp, options->file_prefix, (float)nsubmitted); + if (!options->no_events) + fprintf(out_paje_file, "9 %.9f %s %sp %u\n", get_event_time_stamp(ev, options), "po", options->file_prefix, task); +#endif + } + + if (activity_file) + { + fprintf(activity_file, "cnt_ready\t%.9f\t%d\n", current_timestamp, curq_size); + fprintf(activity_file, "cnt_submitted\t%.9f\t%d\n", current_timestamp, nsubmitted); + } + + if (sched_tasks_file) + { + fprintf(sched_tasks_file, "Type: pop\n"); + fprintf(sched_tasks_file, "Time: %.9f\n", current_timestamp); + fprintf(sched_tasks_file, "Priority: %d\n", priority); + if (options->file_rank < 0) + fprintf(sched_tasks_file, "JobId: %u\n", task); + else + fprintf(sched_tasks_file, "JobId: %d_%u\n", options->file_rank, task); + fprintf(sched_tasks_file, "\n"); + } +} + +static void handle_component_new(struct fxt_ev_64 *ev, struct starpu_fxt_options *options STARPU_ATTRIBUTE_UNUSED) +{ + _starpu_fxt_component_new(ev->param[0], get_fxt_string(ev, 1)); +} + +static void handle_component_connect(struct fxt_ev_64 *ev, struct starpu_fxt_options *options STARPU_ATTRIBUTE_UNUSED) +{ + _starpu_fxt_component_connect(ev->param[0], ev->param[1]); +} + +static void handle_component_push(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + char *prefix = options->file_prefix; + double current_timestamp = get_event_time_stamp(ev, options); + int workerid = find_worker_id(prefixTOnodeid(prefix), ev->param[0]); + _starpu_fxt_component_push(anim_file, options, current_timestamp, workerid, ev->param[1], ev->param[2], ev->param[3], ev->param[4]); +} + +static void handle_component_pull(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + char *prefix = options->file_prefix; + double current_timestamp = get_event_time_stamp(ev, options); + int workerid = find_worker_id(prefixTOnodeid(prefix), ev->param[0]); + _starpu_fxt_component_pull(anim_file, options, current_timestamp, workerid, ev->param[1], ev->param[2], ev->param[3], ev->param[4]); +} + +static +void handle_update_task_cnt(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + double current_timestamp = get_event_time_stamp(ev, options); + + nsubmitted++; + _starpu_fxt_component_update_ntasks(nsubmitted, curq_size); + if (!options->no_counter && out_paje_file) + { +#ifdef STARPU_HAVE_POTI + char container[STARPU_POTI_STR_LEN]; + scheduler_container_alias(container, STARPU_POTI_STR_LEN, options->file_prefix); + poti_SetVariable(current_timestamp, container, "nsubmitted", (double)nsubmitted); +#else + fprintf(out_paje_file, "13 %.9f %ssched nsubmitted %f\n", current_timestamp, options->file_prefix, (float)nsubmitted); +#endif + } + + + if (activity_file) + fprintf(activity_file, "cnt_submitted\t%.9f\t%d\n", current_timestamp, nsubmitted); +} + +static void handle_tag(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + uint64_t tag; + unsigned long job; + + tag = ev->param[0]; + job = ev->param[1]; + + if (options->label_deps) + _starpu_fxt_dag_add_tag(options->file_prefix, tag, job, "tag"); + else + _starpu_fxt_dag_add_tag(options->file_prefix, tag, job, NULL); +} + +static void handle_tag_deps(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + uint64_t child; + uint64_t father; + + child = ev->param[0]; + father = ev->param[1]; + + if (options->label_deps) + _starpu_fxt_dag_add_tag_deps(options->file_prefix, child, father, "tag"); + else + _starpu_fxt_dag_add_tag_deps(options->file_prefix, child, father, NULL); +} + +static void handle_task_deps(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + unsigned long dep_prev = ev->param[0]; + unsigned long dep_succ = ev->param[1]; + unsigned dep_succ_type = ev->param[2]; + char *name = get_fxt_string(ev,4); + + struct task_info *task = get_task(dep_succ, options->file_rank); + struct task_info *prev_task = get_task(dep_prev, options->file_rank); + unsigned alloc = 0; + + task->type = dep_succ_type; + + if (task->ndeps == 0) + /* Start with 8=2^3, should be plenty in most cases */ + alloc = 8; + else if (task->ndeps >= 8) + { + /* Allocate dependencies array by powers of two */ + if (! ((task->ndeps - 1) & task->ndeps)) /* Is task->ndeps a power of two? */ + { + /* We have filled the previous power of two, get another one */ + alloc = task->ndeps * 2; + } + } + if (alloc) + { + _STARPU_REALLOC(task->dependencies, sizeof(*task->dependencies) * alloc); + _STARPU_REALLOC(task->dep_labels, sizeof(*task->dep_labels) * alloc); + } + task->dependencies[task->ndeps] = dep_prev; + task->dep_labels[task->ndeps] = strdup(name); + task->ndeps++; + + /* There is a dependency between both job id : dep_prev -> dep_succ */ + if (show_task(task, options) && show_task(prev_task, options)) + { + if (!options->label_deps) name = NULL; + /* We should show the name of the predecessor, then. */ + prev_task->show = 1; + _starpu_fxt_dag_add_task_deps(options->file_prefix, dep_prev, dep_succ, name); + } +} + +static void handle_task_end_dep(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + unsigned long dep_prev = ev->param[0]; + unsigned long dep_succ = ev->param[1]; + + struct task_info *task = get_task(dep_succ, options->file_rank); + unsigned alloc = 0; + + if (task->nend_deps == 0) + /* Start with 8=2^3, should be plenty in most cases */ + alloc = 8; + else if (task->nend_deps >= 8) + { + /* Allocate dependencies array by powers of two */ + if (! ((task->nend_deps - 1) & task->nend_deps)) /* Is task->ndeps a power of two? */ + { + /* We have filled the previous power of two, get another one */ + alloc = task->nend_deps * 2; + } + } + if (alloc) + { + _STARPU_REALLOC(task->end_dependencies, sizeof(*task->end_dependencies) * alloc); + } + task->end_dependencies[task->nend_deps++] = dep_prev; + + if (!task->exclude_from_dag && show_task(task, options)) + _starpu_fxt_dag_add_task_end_dep(options->file_prefix, dep_succ, dep_prev); +} + +static void handle_task_submit(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + unsigned long job_id = ev->param[0]; + unsigned long iteration = ev->param[1]; + unsigned long subiteration = ev->param[2]; + unsigned long submit_order = ev->param[3]; + long priority = (long) ev->param[4]; + unsigned type = ev->param[5]; + + struct task_info *task = get_task(job_id, options->file_rank); + task->submit_time = get_event_time_stamp(ev, options); + task->submit_order = submit_order; + task->priority = priority; + task->iterations[0] = iteration; + task->iterations[1] = subiteration; + task->type = type; +} + +static void handle_task_color(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + unsigned long job_id = ev->param[0]; + struct task_info *task = get_task(job_id, options->file_rank); + int color = (long) ev->param[1]; + + task->color = color; +} + +static void handle_task_exclude_from_dag(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + unsigned long job_id = ev->param[0]; + unsigned exclude_from_dag = ev->param[1]; + + struct task_info *task = get_task(job_id, options->file_rank); + task->exclude_from_dag = exclude_from_dag; +} + +static void handle_task_name(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + unsigned long job_id = ev->param[0]; + char *name = get_fxt_string(ev,2); + + char *prefix = options->file_prefix; + struct task_info *task = get_task(job_id, options->file_rank); + int worker = find_worker_id(prefixTOnodeid(prefix), ev->param[1]); + + const char *color; + char buffer[32]; + int code; + if (task->color != 0) + { + snprintf(buffer, sizeof(buffer), "#%06x", task->color); + color = &buffer[0]; + code = ((task->color & 0xff) + + ((task->color >> 8) & 0xff) + + ((task->color >> 16) & 0xff)) / 256; + } + else if (options->per_task_colour) + { + unsigned red = get_color_symbol_red(name)/4; + unsigned green = get_color_symbol_green(name)/4; + unsigned blue = get_color_symbol_blue(name)/4; + snprintf(buffer, sizeof(buffer), "#%s%x%s%x%s%x", + red < 16 ? "0" : "", red, + green < 16 ? "0" : "", green, + blue < 16 ? "0" : "", blue); + color = &buffer[0]; + code = (red + green + blue) / 256; + } + else + { + color= (worker < 0)?"#aaaaaa":get_worker_color(worker); + code = 0; + } + + if (!task->name) + task->name = strdup(name); + + char *fontcolor = code <= 1 ? "white" : "black"; + if (!task->exclude_from_dag && show_task(task, options)) + _starpu_fxt_dag_set_task_name(options->file_prefix, job_id, task->name, color, fontcolor); +} + +#ifdef STARPU_BUBBLE +static void handle_task_bubble(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + unsigned long job_id = ev->param[0]; + int is_bubble = (int)ev->param[1]; + unsigned long bubble_parent = ev->param[2]; + + struct task_info *task = get_task(job_id, options->file_rank); + task->is_bubble = is_bubble; + task->bubble_parent = bubble_parent; + + if (!task->exclude_from_dag && show_task(task, options)) + _starpu_fxt_dag_set_task_bubble(options->file_prefix, job_id, task->is_bubble, task->bubble_parent); +} +#endif + +static void handle_task_line(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + unsigned long job_id = ev->param[0]; + int line = ev->param[1]; + char *file = get_fxt_string(ev,2); + + struct task_info *task = get_task(job_id, options->file_rank); + task->file = strdup(file); + task->line = line; + + if (!task->exclude_from_dag && show_task(task, options)) + _starpu_fxt_dag_set_task_line(options->file_prefix, job_id, task->file, line); +} + + +static void handle_task_done(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + /* Ideally, we would be able to dump tasks as they terminate, to save + * memory. + * We however may have to change their state later, e.g. the show field, + * due to dependencies added way later. */ +#if 0 + unsigned long job_id; + job_id = ev->param[0]; + + struct task_info *task = get_task(job_id, options->file_rank); + + task_dump(task, options); +#else + (void) ev; + (void) options; +#endif +} + +static void handle_tag_done(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + char *prefix = options->file_prefix; + + uint64_t tag_id; + tag_id = ev->param[0]; + + unsigned long has_name = ev->param[2]; + char *name = has_name?get_fxt_string(ev,3):"unknown"; + int worker = find_worker_id(prefixTOnodeid(prefix), ev->param[1]); + + const char *color; + char buffer[32]; + int code; + if (options->per_task_colour) + { + unsigned red = get_color_symbol_red(name)/4; + unsigned green = get_color_symbol_green(name)/4; + unsigned blue = get_color_symbol_blue(name)/4; + snprintf(buffer, sizeof(buffer), "#%s%x%s%x%s%x", + red < 16 ? "0" : "", red, + green < 16 ? "0" : "", green, + blue < 16 ? "0" : "", blue); + color = &buffer[0]; + code = (red + green + blue) / 256; + } + else + { + color= (worker < 0)?"white":get_worker_color(worker); + code = 1; + } + + char *fontcolor = code <= 1 ? "white" : "black"; + _starpu_fxt_dag_set_tag_done(options->file_prefix, tag_id, color, fontcolor); +} + +static void handle_mpi_barrier(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + int rank = ev->param[0]; + double sync_time = ev->param[3]; + + STARPU_ASSERT(rank == options->file_rank || options->file_rank == -1); + + /* Add an event in the trace */ + if (out_paje_file) + { +#ifdef STARPU_HAVE_POTI + char container[STARPU_POTI_STR_LEN], paje_value[STARPU_POTI_STR_LEN]; + snprintf(container, sizeof(container), "%sp", options->file_prefix); + if (sync_time != 0) + { + snprintf(paje_value, sizeof(paje_value), "\"end of mpi_sync_clocks_barrier, rank %d\"", rank); + poti_NewEvent(compute_time_stamp(sync_time, options), container, "prog_event", paje_value); + } + else + { + snprintf(paje_value, sizeof(paje_value), "\"end of MPI_Barrier, rank %d\"", rank); + poti_NewEvent(get_event_time_stamp(ev, options), container, "prog_event", paje_value); + } +#else + if (sync_time != 0) + { + fprintf(out_paje_file, "9 %.9f prog_event %sp \"end of mpi_sync_clocks_barrier, rank %d\"\n", compute_time_stamp(sync_time, options), options->file_prefix, rank); + } + else + { + fprintf(out_paje_file, "9 %.9f prog_event %sp \"end of MPI_Barrier, rank %d\"\n", get_event_time_stamp(ev, options), options->file_prefix, rank); + } +#endif + } +} + +static void show_mpi_thread(struct starpu_fxt_options *options) +{ + char *prefix = options->file_prefix; + + if (out_paje_file) + { + double date = 0.; +#ifdef STARPU_HAVE_POTI + char program_container[STARPU_POTI_STR_LEN]; + program_container_alias(program_container, STARPU_POTI_STR_LEN, prefix); + char new_mpicommthread_container_alias[STARPU_POTI_STR_LEN]; + mpicommthread_container_alias(new_mpicommthread_container_alias, STARPU_POTI_STR_LEN, prefix); + snprintf(new_mpicommthread_container_alias, STARPU_POTI_STR_LEN, "%smpict", prefix); + poti_CreateContainer(date, new_mpicommthread_container_alias, "MPICt", program_container, new_mpicommthread_container_alias); + //set bandwidth variables to zero when they start + poti_SetVariable(date, new_mpicommthread_container_alias, "bwi_mpi", 0.); + poti_SetVariable(date, new_mpicommthread_container_alias, "bwo_mpi", 0.); +#else + fprintf(out_paje_file, "7 %.9f %smpict MPICt %sp %smpict\n", date, prefix, prefix, prefix); + //set bandwidth variables to zero when they start + fprintf(out_paje_file, "13 %.9f %smpict bwi_mpi 0.0\n", date, prefix); + fprintf(out_paje_file, "13 %.9f %smpict bwo_mpi 0.0\n", date, prefix); +#endif + } +} + +static void handle_mpi_start(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + double date = get_event_time_stamp(ev, options); + + char *prefix = options->file_prefix; + + register_mpi_thread(prefixTOnodeid(prefix), ev->param[2]); + + if (!(options->ninputfiles == 2 && options->file_rank == 1)) + show_mpi_thread(options); + + do_mpicommthread_set_state(date, prefix, "Sl"); + +} + +static void handle_mpi_stop(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + double date = get_event_time_stamp(ev, options); + + char *prefix = options->file_prefix; + + if (out_paje_file) + { +#ifdef STARPU_HAVE_POTI + char mpicommthread_container[STARPU_POTI_STR_LEN]; + mpicommthread_container_alias(mpicommthread_container, STARPU_POTI_STR_LEN, prefix); + poti_DestroyContainer(date, "MPICt", mpicommthread_container); +#else + fprintf(out_paje_file, "8 %.9f %smpict MPICt\n", + date, prefix); +#endif + } +} + +static void handle_mpi_isend_submit_begin(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + double date = get_event_time_stamp(ev, options); + + do_mpicommthread_set_state(date, options->file_prefix, "SdS"); +} + +static int mpi_warned; +static void handle_mpi_isend_submit_end(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + unsigned type = ev->param[0]; + int dest = ev->param[1]; + int mpi_tag = ev->param[2]; + size_t size = ev->param[3]; + long jobid = ev->param[4]; + unsigned long handle = ev->param[5]; + int prio = ev->param[6]; + double date = get_event_time_stamp(ev, options); + + do_mpicommthread_set_state(date, options->file_prefix, "P"); + + if (options->file_rank < 0) + { + if (!mpi_warned) + { + _STARPU_MSG("Warning : Only one trace file is given. MPI transfers will not be displayed. Add all trace files to show them ! \n"); + mpi_warned = 1; + } + } + else + _starpu_fxt_mpi_add_send_transfer(options->file_rank, dest, mpi_tag, size, date, jobid, handle, type, prio); +} + +static void handle_mpi_isend_numa_node(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + int dest = ev->param[0]; + long jobid = ev->param[1]; + long numa_nodes_bitmap = ev->param[2]; + + if (options->file_rank >= 0) + _starpu_fxt_mpi_send_transfer_set_numa_node(options->file_rank, dest, jobid, numa_nodes_bitmap); +} + +static void handle_mpi_irecv_submit_begin(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + double date = get_event_time_stamp(ev, options); + + do_mpicommthread_set_state(date, options->file_prefix, "RvS"); +} + +static void handle_mpi_irecv_submit_end(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + double date = get_event_time_stamp(ev, options); + + do_mpicommthread_set_state(date, options->file_prefix, "P"); +} + +static void handle_mpi_isend_complete_begin(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + double date = get_event_time_stamp(ev, options); + + do_mpicommthread_set_state(date, options->file_prefix, "SdC"); +} + +static void handle_mpi_isend_complete_end(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + double date = get_event_time_stamp(ev, options); + + do_mpicommthread_set_state(date, options->file_prefix, "P"); +} + +static void handle_mpi_irecv_complete_begin(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + double date = get_event_time_stamp(ev, options); + + do_mpicommthread_set_state(date, options->file_prefix, "RvC"); +} + +static void handle_mpi_irecv_complete_end(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + double date = get_event_time_stamp(ev, options); + + do_mpicommthread_set_state(date, options->file_prefix, "P"); +} + +static void handle_mpi_irecv_terminated(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + int src = ev->param[0]; + int mpi_tag = ev->param[1]; + long jobid = ev->param[2]; + unsigned long handle = ev->param[4]; + double date = get_event_time_stamp(ev, options); + + if (options->file_rank < 0) + { + if (!mpi_warned) + { + _STARPU_MSG("Warning : Only one trace file is given. MPI transfers will not be displayed. Add all trace files to show them ! \n"); + mpi_warned = 1; + } + } + else + _starpu_fxt_mpi_add_recv_transfer(src, options->file_rank, mpi_tag, date, jobid, handle); +} + +static void handle_mpi_irecv_numa_node(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + int src = ev->param[0]; + long jobid = ev->param[1]; + long numa_nodes_bitmap = ev->param[2]; + + if (options->file_rank >= 0) + _starpu_fxt_mpi_recv_transfer_set_numa_node(src, options->file_rank, jobid, numa_nodes_bitmap); +} + +static void handle_mpi_sleep_begin(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + double date = get_event_time_stamp(ev, options); + + do_mpicommthread_set_state(date, options->file_prefix, "Sl"); +} + +static void handle_mpi_sleep_end(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + double date = get_event_time_stamp(ev, options); + + do_mpicommthread_set_state(date, options->file_prefix, "Pl"); +} + +static void handle_mpi_dtesting_begin(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + double date = get_event_time_stamp(ev, options); + + do_mpicommthread_set_state(date, options->file_prefix, "DT"); +} + +static void handle_mpi_dtesting_end(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + double date = get_event_time_stamp(ev, options); + + do_mpicommthread_set_state(date, options->file_prefix, "P"); +} + +static void handle_mpi_utesting_begin(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + double date = get_event_time_stamp(ev, options); + + do_mpicommthread_set_state(date, options->file_prefix, "UT"); +} + +static void handle_mpi_utesting_end(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + double date = get_event_time_stamp(ev, options); + + do_mpicommthread_set_state(date, options->file_prefix, "P"); +} + +static void handle_mpi_uwait_begin(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + double date = get_event_time_stamp(ev, options); + + do_mpicommthread_set_state(date, options->file_prefix, "UW"); +} + +static void handle_mpi_uwait_end(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + double date = get_event_time_stamp(ev, options); + + do_mpicommthread_set_state(date, options->file_prefix, "P"); +} + +static void handle_mpi_testing_detached_begin(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + double date = get_event_time_stamp(ev, options); + + do_mpicommthread_push_state(date, options->file_prefix, "TD"); +} + +static void handle_mpi_testing_detached_end(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + double date = get_event_time_stamp(ev, options); + + do_mpicommthread_pop_state(date, options->file_prefix); +} + +static void handle_mpi_test_begin(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + double date = get_event_time_stamp(ev, options); + + do_mpicommthread_push_state(date, options->file_prefix, "MT"); +} + +static void handle_mpi_test_end(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + double date = get_event_time_stamp(ev, options); + + do_mpicommthread_pop_state(date, options->file_prefix); +} + +static void handle_mpi_polling_begin(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + double date = get_event_time_stamp(ev, options); + + if (out_paje_file) + mpicommthread_set_state(date, options->file_prefix, "Pl"); +} + +static void handle_mpi_polling_end(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + double date = get_event_time_stamp(ev, options); + + if (out_paje_file) + mpicommthread_set_state(date, options->file_prefix, "P"); +} + +static void handle_mpi_driver_run_begin(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + double date = get_event_time_stamp(ev, options); + + if (out_paje_file) + mpicommthread_set_state(date, options->file_prefix, "Dr"); +} + +static void handle_mpi_driver_run_end(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + double date = get_event_time_stamp(ev, options); + + if (out_paje_file) + mpicommthread_set_state(date, options->file_prefix, "Pl"); +} + +static void handle_set_profiling(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + int status = ev->param[0]; + + if (activity_file) + fprintf(activity_file, "set_profiling\t%.9f\t%d\n", get_event_time_stamp(ev, options), status); +} + +static void handle_task_wait_for_all(void) +{ + _starpu_fxt_dag_add_sync_point(); +} + +static void handle_string_event(struct fxt_ev_64 *ev, const char *event, struct starpu_fxt_options *options) +{ + /* Add an event in the trace */ + if (out_paje_file) + { +#ifdef STARPU_HAVE_POTI + char container[STARPU_POTI_STR_LEN]; + snprintf(container, sizeof(container), "%sp", options->file_prefix); + poti_NewEvent(get_event_time_stamp(ev, options), container, "prog_event", event); +#else + fprintf(out_paje_file, "9 %.9f prog_event %sp \"%s\"\n", get_event_time_stamp(ev, options), options->file_prefix, event); +#endif + } + + if (trace_file) + recfmt_dump_state(get_event_time_stamp(ev, options), "ProgEvent", -1, 0, event, "Program"); +} + +static void handle_event(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + char *event = get_fxt_string(ev, 0); + handle_string_event(ev, event, options); +} + +static void handle_thread_event(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) +{ + /* Add an event in the trace */ + if (out_paje_file) + { + char *event = get_fxt_string(ev, 1); + +#ifdef STARPU_HAVE_POTI + char container[STARPU_POTI_STR_LEN]; + thread_container_alias(container, STARPU_POTI_STR_LEN, options->file_prefix, ev->param[0]); + poti_NewEvent(get_event_time_stamp(ev, options), container, "thread_event", event); +#else + fprintf(out_paje_file, "9 %.9f thread_event %st%"PRIu64" %s\n", get_event_time_stamp(ev, options), options->file_prefix, ev->param[0], event); +#endif + } +} + +static +void _starpu_fxt_process_bandwidth(struct starpu_fxt_options *options) +{ + char *prefix = options->file_prefix; + struct _starpu_communication *itor, *next; + + /* Loop through completed communications */ + for (itor = _starpu_communication_list_begin(&communication_list); + itor != _starpu_communication_list_end(&communication_list); + itor = next) + { + next = _starpu_communication_list_next(itor); + + if (!itor->peer) + break; + + /* This communication is complete */ + _starpu_communication_list_erase(&communication_list, itor); + + current_bandwidth_out_per_node[itor->src_node] += itor->bandwidth; + if (out_paje_file) + { +#ifdef STARPU_HAVE_POTI + char src_memnode_container[STARPU_POTI_STR_LEN]; + memmanager_container_alias(src_memnode_container, STARPU_POTI_STR_LEN, prefix, itor->src_node); + poti_SetVariable(itor->comm_start, src_memnode_container, "bwo_mm", current_bandwidth_out_per_node[itor->src_node]); +#else + fprintf(out_paje_file, "13 %.9f %smm%u bwo_mm %f\n", + itor->comm_start, prefix, itor->src_node, current_bandwidth_out_per_node[itor->src_node]); +#endif + } + + current_bandwidth_in_per_node[itor->dst_node] += itor->bandwidth; + if (out_paje_file) + { +#ifdef STARPU_HAVE_POTI + char dst_memnode_container[STARPU_POTI_STR_LEN]; + memmanager_container_alias(dst_memnode_container, STARPU_POTI_STR_LEN, prefix, itor->dst_node); + poti_SetVariable(itor->comm_start, dst_memnode_container, "bwi_mm", current_bandwidth_in_per_node[itor->dst_node]); +#else + fprintf(out_paje_file, "13 %.9f %smm%u bwi_mm %f\n", + itor->comm_start, prefix, itor->dst_node, current_bandwidth_in_per_node[itor->dst_node]); +#endif + } + _starpu_communication_delete(itor); + } +} + +static +void _starpu_fxt_process_computations(struct starpu_fxt_options *options) +{ + char *prefix = options->file_prefix; + + /* Loop through completed computations */ + struct _starpu_computation*itor; + while (!_starpu_computation_list_empty(&computation_list) + && _starpu_computation_list_begin(&computation_list)->peer) + { + /* This computation is complete */ + itor = _starpu_computation_list_pop_front(&computation_list); + + if (out_paje_file && itor->comp_start != current_computation_time) + { + /* flush last value */ +#ifdef STARPU_HAVE_POTI + char container[STARPU_POTI_STR_LEN]; + + scheduler_container_alias(container, STARPU_POTI_STR_LEN, prefix); + poti_SetVariable(current_computation_time, container, "gft", (double)current_computation); +#else + fprintf(out_paje_file, "13 %.9f %ssched gft %f\n", current_computation_time, prefix, (float)current_computation); +#endif + } + current_computation += itor->gflops; + current_computation_time = itor->comp_start; + + _starpu_computation_delete(itor); + } +} + +static +void _starpu_fxt_parse_new_file(char *filename_in, struct starpu_fxt_options *options) +{ + /* Open the trace file */ + int fd_in; + fd_in = open(filename_in, O_RDONLY); + if (fd_in < 0) + { + STARPU_ABORT_MSG("Failed to open '%s' (err %s)", filename_in, strerror(errno)); + } + + static fxt_t fut; + fut = fxt_fdopen(fd_in); + if (!fut) + { + perror("fxt_fdopen :"); + _exit(EXIT_FAILURE); + } + + fxt_blockev_t block; + block = fxt_blockev_enter(fut); + + char *prefix = options->file_prefix; + + /* TODO starttime ...*/ + /* create the "program" container */ + current_computation = 0.0; + current_computation_time = 0.0; + if (out_paje_file) + { +#ifdef STARPU_HAVE_POTI + char new_program_container_alias[STARPU_POTI_STR_LEN], new_program_container_name[STARPU_POTI_STR_LEN]; + program_container_alias(new_program_container_alias, STARPU_POTI_STR_LEN, prefix); + snprintf(new_program_container_name, sizeof(new_program_container_name), "program %s", prefix); + poti_CreateContainer (0, new_program_container_alias, "P", "MPIroot", new_program_container_name); + char new_scheduler_container_alias[STARPU_POTI_STR_LEN], new_scheduler_container_name[STARPU_POTI_STR_LEN]; + scheduler_container_alias(new_scheduler_container_alias, STARPU_POTI_STR_LEN, prefix); + snprintf(new_scheduler_container_name, sizeof(new_scheduler_container_name), "%sscheduler", prefix); + if (!options->no_counter || !options->no_flops) + { + poti_CreateContainer(0.0, new_scheduler_container_alias, "Sc", new_program_container_alias, new_scheduler_container_name); + } + if (!options->no_counter) + { + poti_SetVariable(0.0, new_scheduler_container_alias, "nsubmitted", 0.0); + poti_SetVariable(0.0, new_scheduler_container_alias, "nready", 0.0); + } + if (!options->no_flops) + { + poti_SetVariable(0.0, new_scheduler_container_alias, "gft", 0.0); + } +#else + fprintf(out_paje_file, "7 0.0 %sp P MPIroot %sprogram \n", prefix, prefix); + if (!options->no_counter || !options->no_flops) + { + fprintf(out_paje_file, "7 0.0 %ssched Sc %sp %sscheduler\n", prefix, prefix, prefix); + } + if (!options->no_counter) + { + /* create a variable with the number of tasks */ + fprintf(out_paje_file, "13 0.0 %ssched nsubmitted 0.0\n", prefix); + fprintf(out_paje_file, "13 0.0 %ssched nready 0.0\n", prefix); + } + if (!options->no_flops) + { + fprintf(out_paje_file, "13 0.0 %ssched gft 0.0\n", prefix); + } +#endif + } + + if ((options->ninputfiles == 2 && options->file_rank == 1)) + /* put the mpi thread at the top, so MPI communications nicely show up in the middle */ + show_mpi_thread(options); + + struct fxt_ev_64 ev; + while(1) + { + unsigned i; + int ret = fxt_next_ev(block, FXT_EV_TYPE_64, (struct fxt_ev *)&ev); + for (i = ev.nb_params; i < FXT_MAX_PARAMS; i++) + ev.param[i] = 0; + if (ret != FXT_EV_OK) + { + break; + } + + if (number_events_file != NULL) + { + assert(number_events != NULL); + assert(ev.code <= FUT_SETUP_CODE); + number_events[ev.code]++; + } + + switch (ev.code) + { + case _STARPU_FUT_WORKER_INIT_START: + handle_worker_init_start(&ev, options); + break; + + case _STARPU_FUT_WORKER_INIT_END: + handle_worker_init_end(&ev, options); + break; + + case _STARPU_FUT_NEW_MEM_NODE: + handle_new_mem_node(&ev, options); + break; + + /* detect when the workers were idling or not */ + case _STARPU_FUT_START_CODELET_BODY: + handle_start_codelet_body(&ev, options); + break; + case _STARPU_FUT_MODEL_NAME: + handle_model_name(&ev, options); + break; + case _STARPU_FUT_CODELET_DATA: + handle_codelet_data(&ev, options); + break; + case _STARPU_FUT_CODELET_DATA_HANDLE: + handle_codelet_data_handle(&ev, options); + break; + case _STARPU_FUT_CODELET_DATA_HANDLE_NUMA_ACCESS: + handle_codelet_data_handle_numa_access(&ev, options); + break; + case _STARPU_FUT_CODELET_DETAILS: + handle_codelet_details(&ev, options); + break; + case _STARPU_FUT_END_CODELET_BODY: + handle_end_codelet_body(&ev, options); + break; + + case _STARPU_FUT_START_EXECUTING: + handle_start_executing(&ev, options); + break; + case _STARPU_FUT_END_EXECUTING: + handle_end_executing(&ev, options); + break; + + case _STARPU_FUT_START_PARALLEL_SYNC: + handle_start_parallel_sync(&ev, options); + break; + case _STARPU_FUT_END_PARALLEL_SYNC: + handle_end_parallel_sync(&ev, options); + break; + + case _STARPU_FUT_START_CALLBACK: + handle_start_callback(&ev, options); + break; + case _STARPU_FUT_END_CALLBACK: + handle_end_callback(&ev, options); + break; + + case _STARPU_FUT_UPDATE_TASK_CNT: + handle_update_task_cnt(&ev, options); + break; + + /* monitor stack size and generate sched_tasks.rec */ + case _STARPU_FUT_JOB_PUSH: + handle_job_push(&ev, options); + break; + case _STARPU_FUT_JOB_POP: + handle_job_pop(&ev, options); + break; + + case _STARPU_FUT_SCHED_COMPONENT_NEW: + handle_component_new(&ev, options); + break; + case _STARPU_FUT_SCHED_COMPONENT_CONNECT: + handle_component_connect(&ev, options); + break; + case _STARPU_FUT_SCHED_COMPONENT_PUSH: + handle_component_push(&ev, options); + break; + case _STARPU_FUT_SCHED_COMPONENT_PULL: + handle_component_pull(&ev, options); + break; + + /* check the memory transfer overhead */ + case _STARPU_FUT_START_FETCH_INPUT_ON_TID: + handle_worker_status_on_tid(&ev, options, "Fi"); + break; + case _STARPU_FUT_START_PUSH_OUTPUT_ON_TID: + handle_worker_status_on_tid(&ev, options, "Po"); + break; + case _STARPU_FUT_START_PROGRESS_ON_TID: + handle_worker_status_on_tid(&ev, options, "P"); + break; + case _STARPU_FUT_START_UNPARTITION_ON_TID: + handle_worker_status_on_tid(&ev, options, "U"); + break; + case _STARPU_FUT_END_FETCH_INPUT_ON_TID: + case _STARPU_FUT_END_PROGRESS_ON_TID: + case _STARPU_FUT_END_PUSH_OUTPUT_ON_TID: + case _STARPU_FUT_END_UNPARTITION_ON_TID: + handle_worker_status_on_tid(&ev, options, "B"); + break; + + case _STARPU_FUT_START_FETCH_INPUT: + handle_worker_status(&ev, options, "Fi"); + break; + + case _STARPU_FUT_END_FETCH_INPUT: + handle_worker_status(&ev, options, "B"); + break; + + case _STARPU_FUT_WORKER_SCHEDULING_START: + handle_worker_scheduling_start(&ev, options); + break; + + case _STARPU_FUT_WORKER_SCHEDULING_END: + handle_worker_scheduling_end(&ev, options); + break; + + case _STARPU_FUT_WORKER_SCHEDULING_PUSH: + handle_worker_scheduling_push(&ev, options); + break; + + case _STARPU_FUT_WORKER_SCHEDULING_POP: + handle_worker_scheduling_pop(&ev, options); + break; + + case _STARPU_FUT_WORKER_SLEEP_START: + handle_worker_sleep_start(&ev, options); + break; + + case _STARPU_FUT_WORKER_SLEEP_END: + handle_worker_sleep_end(&ev, options); + break; + + case _STARPU_FUT_TAG: + handle_tag(&ev, options); + break; + + case _STARPU_FUT_TAG_DEPS: + handle_tag_deps(&ev, options); + break; + + case _STARPU_FUT_TASK_DEPS: + handle_task_deps(&ev, options); + break; + + case _STARPU_FUT_TASK_END_DEP: + handle_task_end_dep(&ev, options); + break; + + case _STARPU_FUT_TASK_SUBMIT: + handle_task_submit(&ev, options); + break; + + case _STARPU_FUT_TASK_BUILD_START: + handle_task_submit_event(&ev, options, ev.param[0], "Bu"); + break; + + case _STARPU_FUT_TASK_SUBMIT_START: + handle_task_submit_event(&ev, options, ev.param[0], "Su"); + break; + + case _STARPU_FUT_TASK_THROTTLE_START: + handle_task_submit_event(&ev, options, ev.param[0], "Th"); + break; + + case _STARPU_FUT_TASK_MPI_DECODE_START: + handle_task_submit_event(&ev, options, ev.param[0], "MD"); + break; + + case _STARPU_FUT_TASK_MPI_PRE_START: + handle_task_submit_event(&ev, options, ev.param[0], "MPr"); + break; + + case _STARPU_FUT_TASK_MPI_POST_START: + handle_task_submit_event(&ev, options, ev.param[0], "MPo"); + break; + + case _STARPU_FUT_TASK_WAIT_START: + handle_task_submit_event(&ev, options, ev.param[1], "W"); + break; + + case _STARPU_FUT_TASK_WAIT_FOR_ALL_START: + handle_task_submit_event(&ev, options, ev.param[0], "WA"); + break; + + case _STARPU_FUT_TASK_BUILD_END: + case _STARPU_FUT_TASK_SUBMIT_END: + case _STARPU_FUT_TASK_THROTTLE_END: + case _STARPU_FUT_TASK_MPI_DECODE_END: + case _STARPU_FUT_TASK_MPI_PRE_END: + case _STARPU_FUT_TASK_MPI_POST_END: + case _STARPU_FUT_TASK_WAIT_FOR_ALL_END: + handle_task_submit_event(&ev, options, ev.param[0], NULL); + break; + + case _STARPU_FUT_TASK_WAIT_END: + handle_task_submit_event(&ev, options, ev.param[0], NULL); + break; + + case _STARPU_FUT_TASK_EXCLUDE_FROM_DAG: + handle_task_exclude_from_dag(&ev, options); + break; + + case _STARPU_FUT_TASK_NAME: + handle_task_name(&ev, options); + break; + +#ifdef STARPU_BUBBLE + case _STARPU_FUT_TASK_BUBBLE: + handle_task_bubble(&ev, options); + break; +#endif + + case _STARPU_FUT_TASK_LINE: + handle_task_line(&ev, options); + break; + + case _STARPU_FUT_TASK_COLOR: + handle_task_color(&ev, options); + break; + + case _STARPU_FUT_TASK_DONE: + handle_task_done(&ev, options); + break; + + case _STARPU_FUT_TAG_DONE: + handle_tag_done(&ev, options); + break; + + case _STARPU_FUT_HANDLE_DATA_REGISTER: + handle_data_register(&ev, options); + break; + + case _STARPU_FUT_HANDLE_DATA_UNREGISTER: + handle_data_unregister(&ev, options); + break; + + case _STARPU_FUT_DATA_STATE_INVALID: + if (options->memory_states) + handle_data_state(&ev, options, "SI"); + break; + case _STARPU_FUT_DATA_STATE_OWNER: + if (options->memory_states) + handle_data_state(&ev, options, "SO"); + break; + case _STARPU_FUT_DATA_STATE_SHARED: + if (options->memory_states) + handle_data_state(&ev, options, "SS"); + break; + case _STARPU_FUT_DATA_REQUEST_CREATED: + if (!options->no_bus && options->memory_states) + { + handle_data_request(&ev, options, "rc"); + } + break; + case _STARPU_FUT_PAPI_TASK_EVENT_VALUE: + handle_papi_event(&ev, options); + break; + case _STARPU_FUT_DATA_COPY: + if (!options->no_bus) + handle_data_copy(); + break; + + case _STARPU_FUT_DATA_LOAD: + break; + + case _STARPU_FUT_DATA_NAME: + handle_data_name(&ev, options); + break; + + case _STARPU_FUT_DATA_COORDINATES: + handle_data_coordinates(&ev, options); + break; + + case _STARPU_FUT_DATA_WONT_USE: + handle_data_wont_use(&ev, options); + break; + + case _STARPU_FUT_DATA_DOING_WONT_USE: + if (options->memory_states) + handle_data_doing_wont_use(&ev, options); + break; + + case _STARPU_FUT_START_DRIVER_COPY: + if (!options->no_bus) + handle_start_driver_copy(&ev, options); + break; + + case _STARPU_FUT_END_DRIVER_COPY: + if (!options->no_bus) + handle_end_driver_copy(&ev, options); + break; + + case _STARPU_FUT_START_DRIVER_COPY_ASYNC: + if (!options->no_bus) + handle_start_driver_copy_async(&ev, options); + break; + + case _STARPU_FUT_END_DRIVER_COPY_ASYNC: + if (!options->no_bus) + handle_end_driver_copy_async(&ev, options); + break; + + case _STARPU_FUT_WORK_STEALING: + handle_work_stealing(&ev, options); + break; + + case _STARPU_FUT_WORKER_DEINIT_START: + handle_worker_deinit_start(&ev, options); + break; + + case _STARPU_FUT_WORKER_DEINIT_END: + handle_worker_deinit_end(&ev, options); + break; + + case _STARPU_FUT_START_ALLOC: + if (!options->no_bus) + { + handle_push_memnode_event(&ev, options, "A"); + handle_memnode_event_start_4(&ev, options, "Al"); + } + break; + case _STARPU_FUT_START_ALLOC_REUSE: + if (!options->no_bus) + { + handle_push_memnode_event(&ev, options, "Ar"); + handle_memnode_event_start_4(&ev, options, "Alr"); + } + break; + case _STARPU_FUT_END_ALLOC: + if (!options->no_bus) + { + handle_pop_memnode_event(&ev, options); + handle_memnode_event_end_3(&ev, options, "AlE"); + } + break; + case _STARPU_FUT_END_ALLOC_REUSE: + if (!options->no_bus) + { + handle_pop_memnode_event(&ev, options); + handle_memnode_event_end_3(&ev, options, "AlrE"); + } + break; + case _STARPU_FUT_START_FREE: + if (!options->no_bus) + { + handle_push_memnode_event(&ev, options, "F"); + handle_memnode_event_start_3(&ev, options, "Fe"); + } + break; + case _STARPU_FUT_END_FREE: + if (!options->no_bus) + { + handle_pop_memnode_event(&ev, options); + handle_memnode_event_end_2(&ev, options, "FeE"); + } + break; + case _STARPU_FUT_START_WRITEBACK: + if (!options->no_bus) + { + handle_push_memnode_event(&ev, options, "W"); + handle_memnode_event_start_2(&ev, options, "Wb"); + } + break; + case _STARPU_FUT_END_WRITEBACK: + if (!options->no_bus) + { + handle_pop_memnode_event(&ev, options); + handle_memnode_event_start_2(&ev, options, "WbE"); + } + break; + case _STARPU_FUT_START_WRITEBACK_ASYNC: + if (!options->no_bus) + handle_push_memnode_event(&ev, options, "Wa"); + break; + case _STARPU_FUT_END_WRITEBACK_ASYNC: + if (!options->no_bus) + handle_pop_memnode_event(&ev, options); + break; + case _STARPU_FUT_START_MEMRECLAIM: + if (!options->no_bus) + handle_push_memnode_event(&ev, options, "R"); + break; + case _STARPU_FUT_END_MEMRECLAIM: + if (!options->no_bus) + handle_pop_memnode_event(&ev, options); + break; + case _STARPU_FUT_USED_MEM: + handle_used_mem(&ev, options); + break; + + case _STARPU_FUT_USER_EVENT: + if (!options->no_events) + handle_user_event(&ev, options); + break; + + case _STARPU_MPI_FUT_START: + handle_mpi_start(&ev, options); + break; + + case _STARPU_MPI_FUT_STOP: + handle_mpi_stop(&ev, options); + break; + + case _STARPU_MPI_FUT_BARRIER: + handle_mpi_barrier(&ev, options); + break; + + case _STARPU_MPI_FUT_ISEND_SUBMIT_BEGIN: + handle_mpi_isend_submit_begin(&ev, options); + break; + + case _STARPU_MPI_FUT_ISEND_SUBMIT_END: + handle_mpi_isend_submit_end(&ev, options); + break; + + case _STARPU_MPI_FUT_ISEND_NUMA_NODE: + handle_mpi_isend_numa_node(&ev, options); + break; + + case _STARPU_MPI_FUT_IRECV_SUBMIT_BEGIN: + handle_mpi_irecv_submit_begin(&ev, options); + break; + + case _STARPU_MPI_FUT_IRECV_SUBMIT_END: + handle_mpi_irecv_submit_end(&ev, options); + break; + + case _STARPU_MPI_FUT_ISEND_COMPLETE_BEGIN: + handle_mpi_isend_complete_begin(&ev, options); + break; + + case _STARPU_MPI_FUT_ISEND_COMPLETE_END: + handle_mpi_isend_complete_end(&ev, options); + break; + + case _STARPU_MPI_FUT_IRECV_COMPLETE_BEGIN: + handle_mpi_irecv_complete_begin(&ev, options); + break; + + case _STARPU_MPI_FUT_IRECV_COMPLETE_END: + handle_mpi_irecv_complete_end(&ev, options); + break; + + case _STARPU_MPI_FUT_ISEND_TERMINATED: + break; + + case _STARPU_MPI_FUT_IRECV_TERMINATED: + handle_mpi_irecv_terminated(&ev, options); + break; + + case _STARPU_MPI_FUT_IRECV_NUMA_NODE: + handle_mpi_irecv_numa_node(&ev, options); + break; + + case _STARPU_MPI_FUT_SLEEP_BEGIN: + handle_mpi_sleep_begin(&ev, options); + break; + + case _STARPU_MPI_FUT_SLEEP_END: + handle_mpi_sleep_end(&ev, options); + break; + + case _STARPU_MPI_FUT_DTESTING_BEGIN: + handle_mpi_dtesting_begin(&ev, options); + break; + + case _STARPU_MPI_FUT_DTESTING_END: + handle_mpi_dtesting_end(&ev, options); + break; + + case _STARPU_MPI_FUT_UTESTING_BEGIN: + handle_mpi_utesting_begin(&ev, options); + break; + + case _STARPU_MPI_FUT_UTESTING_END: + handle_mpi_utesting_end(&ev, options); + break; + + case _STARPU_MPI_FUT_UWAIT_BEGIN: + handle_mpi_uwait_begin(&ev, options); + break; + + case _STARPU_MPI_FUT_UWAIT_END: + handle_mpi_uwait_end(&ev, options); + break; + + case _STARPU_MPI_FUT_DATA_SET_RANK: + handle_mpi_data_set_rank(&ev, options); + break; + case _STARPU_MPI_FUT_DATA_SET_TAG: + handle_mpi_data_set_tag(&ev, options); + break; + + case _STARPU_MPI_FUT_TESTING_DETACHED_BEGIN: + handle_mpi_testing_detached_begin(&ev, options); + break; + + case _STARPU_MPI_FUT_TESTING_DETACHED_END: + handle_mpi_testing_detached_end(&ev, options); + break; + + case _STARPU_MPI_FUT_TEST_BEGIN: + handle_mpi_test_begin(&ev, options); + break; + + case _STARPU_MPI_FUT_TEST_END: + handle_mpi_test_end(&ev, options); + break; + + case _STARPU_MPI_FUT_POLLING_BEGIN: + handle_mpi_polling_begin(&ev, options); + break; + + case _STARPU_MPI_FUT_POLLING_END: + handle_mpi_polling_end(&ev, options); + break; + + case _STARPU_MPI_FUT_DRIVER_RUN_BEGIN: + handle_mpi_driver_run_begin(&ev, options); + break; + + case _STARPU_MPI_FUT_DRIVER_RUN_END: + handle_mpi_driver_run_end(&ev, options); + break; + + case _STARPU_MPI_FUT_CHECKPOINT_BEGIN: + handle_checkpoint_begin(&ev, options); + break; + + case _STARPU_MPI_FUT_CHECKPOINT_END: + handle_checkpoint_end(&ev, options); + break; + + case _STARPU_FUT_SET_PROFILING: + handle_set_profiling(&ev, options); + break; + + case _STARPU_FUT_TASK_WAIT_FOR_ALL: + handle_task_wait_for_all(); + break; + + case _STARPU_FUT_EVENT: + if (!options->no_events) + handle_event(&ev, options); + break; + + case _STARPU_FUT_THREAD_EVENT: + if (!options->no_events) + handle_thread_event(&ev, options); + break; + + case _STARPU_FUT_LOCKING_MUTEX: + break; + + case _STARPU_FUT_MUTEX_LOCKED: + break; + + case _STARPU_FUT_UNLOCKING_MUTEX: + break; + + case _STARPU_FUT_MUTEX_UNLOCKED: + break; + + case _STARPU_FUT_TRYLOCK_MUTEX: + break; + + case _STARPU_FUT_RDLOCKING_RWLOCK: + break; + + case _STARPU_FUT_RWLOCK_RDLOCKED: + break; + + case _STARPU_FUT_WRLOCKING_RWLOCK: + break; + + case _STARPU_FUT_RWLOCK_WRLOCKED: + break; + + case _STARPU_FUT_UNLOCKING_RWLOCK: + break; + + case _STARPU_FUT_RWLOCK_UNLOCKED: + break; + + case _STARPU_FUT_LOCKING_SPINLOCK: + break; + + case _STARPU_FUT_SPINLOCK_LOCKED: + break; + + case _STARPU_FUT_UNLOCKING_SPINLOCK: + break; + + case _STARPU_FUT_SPINLOCK_UNLOCKED: + break; + + case _STARPU_FUT_TRYLOCK_SPINLOCK: + break; + + case _STARPU_FUT_COND_WAIT_BEGIN: + break; + + case _STARPU_FUT_COND_WAIT_END: + break; + + case _STARPU_FUT_BARRIER_WAIT_BEGIN: + break; + + case _STARPU_FUT_BARRIER_WAIT_END: + break; + + case _STARPU_FUT_MEMORY_FULL: + break; + + case _STARPU_FUT_SCHED_COMPONENT_POP_PRIO: + break; + + case _STARPU_FUT_SCHED_COMPONENT_PUSH_PRIO: + break; + + case _STARPU_FUT_HYPERVISOR_BEGIN: + handle_hypervisor_begin(&ev, options); + break; + + case _STARPU_FUT_HYPERVISOR_END: + handle_hypervisor_end(&ev, options); + break; + + case FUT_SETUP_CODE: + fut_keymask = ev.param[0]; + break; + + case FUT_KEYCHANGE_CODE: + fut_keymask = ev.param[0]; + break; + + case FUT_START_FLUSH_CODE: + handle_string_event(&ev, "fxt_start_flush", options); + break; + case FUT_STOP_FLUSH_CODE: + handle_string_event(&ev, "fxt_stop_flush", options); + break; + + /* We can safely ignore FUT internal events */ + case FUT_CALIBRATE0_CODE: + case FUT_CALIBRATE1_CODE: + case FUT_CALIBRATE2_CODE: + case FUT_NEW_LWP_CODE: + case FUT_GCC_INSTRUMENT_ENTRY_CODE: + break; + + default: +#ifdef STARPU_VERBOSE + _STARPU_MSG("unknown event.. %x at time %llx WITH OFFSET %llx\n", + (unsigned)ev.code, (long long unsigned)ev.time, (long long unsigned)(ev.time-options->file_offset.offset_start)); +#endif + break; + } + _starpu_fxt_process_bandwidth(options); + if (!options->no_flops) + _starpu_fxt_process_computations(options); + } + + unsigned i; + if (!options->no_flops) + { + /* computations are supposed to be over, unref any pending comp */ + for (i = 0; i < STARPU_NMAXWORKERS; i++) + { + struct _starpu_computation *comp = ongoing_computation[i]; + if (comp) + { + STARPU_ASSERT(!comp->peer); + _starpu_computation_list_erase(&computation_list, comp); + } + } + /* And flush completed computations */ + _starpu_fxt_process_computations(options); + } + + for (i = 0; i < STARPU_NMAXWORKERS; i++) + { + struct _starpu_computation *comp = ongoing_computation[i]; + if (comp) + { + STARPU_ASSERT(!comp->peer); + _starpu_computation_delete(comp); + ongoing_computation[i] = 0; + } + } + + if (!options->no_bus) + { + while (!_starpu_communication_list_empty(&communication_list)) + { + struct _starpu_communication*itor; + itor = _starpu_communication_list_pop_front(&communication_list); + + if (out_paje_file && !itor->peer) + { + /* Trace finished with this communication uncompleted, fake its termination */ + + unsigned comid = itor->comid; + unsigned long size = itor->size; + double time = current_computation_time; + const char *link_type = itor->type; +#ifdef STARPU_HAVE_POTI + char paje_value[STARPU_POTI_STR_LEN], paje_key[STARPU_POTI_STR_LEN]; + snprintf(paje_value, sizeof(paje_value), "%lu", size); + snprintf(paje_key, sizeof(paje_key), "com_%u", comid); + char program_container[STARPU_POTI_STR_LEN]; + program_container_alias(program_container, STARPU_POTI_STR_LEN, prefix); +#endif + + if (itor->bandwidth > 0) + { + unsigned dst = itor->dst_node; + /* Fake termination of communication at end of time */ +#ifdef STARPU_HAVE_POTI + char dst_memnode_container[STARPU_POTI_STR_LEN]; + memmanager_container_alias(dst_memnode_container, STARPU_POTI_STR_LEN, prefix, dst); + poti_EndLink(time, program_container, link_type, dst_memnode_container, paje_value, paje_key); +#else + fprintf(out_paje_file, "19 %.9f %s %sp %lu %smm%u com_%u\n", time, link_type, prefix, size, prefix, dst, comid); +#endif + } + else + { + /* Fake start of communication at start of time */ + unsigned src = itor->src_node; + unsigned X = _starpu_fxt_data_get_coord(itor->handle, options->file_rank, 0); + unsigned Y = _starpu_fxt_data_get_coord(itor->handle, options->file_rank, 1); + const char *name = _starpu_fxt_data_get_name(itor->handle, options->file_rank); + if (!name) + name = ""; +#ifdef STARPU_HAVE_POTI + char str_handle[STARPU_POTI_STR_LEN]; + snprintf(str_handle, sizeof(str_handle), "%lx", itor->handle); + char X_str[STARPU_POTI_STR_LEN]; + snprintf(X_str, sizeof(X_str), "%u", X); + char Y_str[STARPU_POTI_STR_LEN]; + snprintf(Y_str, sizeof(Y_str), "%u", Y); + + char src_memnode_container[STARPU_POTI_STR_LEN]; + memmanager_container_alias(src_memnode_container, STARPU_POTI_STR_LEN, prefix, src); + poti_user_StartLink(_starpu_poti_CommLinkStart, 0., program_container, link_type, src_memnode_container, paje_value, paje_key, 4, str_handle, name, X_str, Y_str); +#else + fprintf(out_paje_file, "24 %.9f %s %sp %lu %smm%u com_%u %lx \"%s\" %u %u\n", 0., link_type, prefix, size, prefix, src, comid, itor->handle, name, X, Y); +#endif + } + } + _starpu_communication_delete(itor); + } + } + + if (out_paje_file && !options->no_flops) + { + for (i = 0; i < STARPU_NMAXWORKERS; i++) + { + if (last_codelet_end[i] != 0.0) + { +#ifdef STARPU_HAVE_POTI + char container[STARPU_POTI_STR_LEN]; + worker_container_alias(container, STARPU_POTI_STR_LEN, prefix, i); + poti_SetVariable(last_codelet_end[i], container, "gf", 0.); +#else + fprintf(out_paje_file, "13 %.9f %sw%u gf %f\n", + last_codelet_end[i], prefix, i, 0.); +#endif + last_codelet_end[i] = 0.0; + } + } + + /* flush last value */ +#ifdef STARPU_HAVE_POTI + char container[STARPU_POTI_STR_LEN]; + + scheduler_container_alias(container, STARPU_POTI_STR_LEN, prefix); + poti_SetVariable(current_computation_time, container, "gft", (double)current_computation); +#else + fprintf(out_paje_file, "13 %.9f %ssched gft %f\n", current_computation_time, prefix, (float)current_computation); +#endif + } + + { + struct data_info *data=NULL, *tmp=NULL; + HASH_ITER(hh, data_info, data, tmp) + { + data_dump(data); + } + } + + { + struct task_info *task=NULL, *tmp=NULL; + HASH_ITER(hh, tasks_info, task, tmp) + { + task_dump(task, options); + } + } + + for (i = 0; i < STARPU_NMAXWORKERS; i++) + { + free(options->worker_archtypes[i].devices); + options->worker_archtypes[i].devices = NULL; + } + + _starpu_fxt_component_deinit(); + + free_worker_ids(); + +#ifdef HAVE_FXT_BLOCKEV_LEAVE + fxt_blockev_leave(block); +#endif + + /* Close the trace file */ +#ifdef HAVE_FXT_CLOSE + fxt_close(fut); +#else + if (close(fd_in)) + { + perror("close failed :"); + _exit(EXIT_FAILURE); + } +#endif +} + +/* Initialize FxT options to default values */ +void starpu_fxt_options_init(struct starpu_fxt_options *options) +{ + memset(options, 0, sizeof(struct starpu_fxt_options)); + options->out_paje_path = strdup("paje.trace"); + options->dag_path = strdup("dag.dot"); + options->tasks_path = strdup("tasks.rec"); + options->comms_path = strdup("comms.rec"); + options->data_path = strdup("data.rec"); + options->papi_path = strdup("papi.rec"); + options->anim_path = strdup("trace.html"); + options->states_path = strdup("trace.rec"); + options->distrib_time_path = strdup("distrib.data"); + options->activity_path = strdup("activity.data"); + options->sched_tasks_path = strdup("sched_tasks.rec"); +} + +static +void _set_dir(char *dir, char **option) +{ + if (*option) + { + char *tmp = strdup(*option); + free(*option); + _STARPU_MALLOC(*option, 256); + snprintf(*option, 256, "%s/%s", dir, tmp); + free(tmp); + } +} + +static +void _starpu_fxt_options_set_dir(struct starpu_fxt_options *options) +{ + if (!options->dir) + return; + + _starpu_mkpath_and_check(options->dir, S_IRWXU); + _set_dir(options->dir, &options->out_paje_path); + _set_dir(options->dir, &options->dag_path); + _set_dir(options->dir, &options->tasks_path); + _set_dir(options->dir, &options->comms_path); + _set_dir(options->dir, &options->number_events_path); + _set_dir(options->dir, &options->data_path); + _set_dir(options->dir, &options->papi_path); + _set_dir(options->dir, &options->anim_path); + _set_dir(options->dir, &options->states_path); + _set_dir(options->dir, &options->distrib_time_path); + _set_dir(options->dir, &options->activity_path); + _set_dir(options->dir, &options->sched_tasks_path); +} + +void starpu_fxt_options_shutdown(struct starpu_fxt_options *options) +{ + free(options->out_paje_path); + free(options->dag_path); + free(options->tasks_path); + free(options->comms_path); + free(options->number_events_path); + free(options->data_path); + free(options->papi_path); + free(options->anim_path); + free(options->states_path); + free(options->distrib_time_path); + free(options->activity_path); + free(options->sched_tasks_path); +} + +static +void _starpu_fxt_distrib_file_init(struct starpu_fxt_options *options) +{ + dumped_codelets_count = 0; + dumped_codelets = NULL; + + if (options->distrib_time_path) + { + distrib_time = fopen(options->distrib_time_path, "w+"); + if (distrib_time == NULL) + STARPU_ABORT_MSG("Failed to open '%s' (err %s)", options->distrib_time_path, strerror(errno)); + } + else + { + distrib_time = NULL; + } +} + +static +void _starpu_fxt_distrib_file_close(struct starpu_fxt_options *options) +{ + if (distrib_time) + fclose(distrib_time); + + if (options->dumped_codelets) + { + *options->dumped_codelets = dumped_codelets; + options->dumped_codelets_count = dumped_codelets_count; + } +} + +static +void _starpu_fxt_activity_file_init(struct starpu_fxt_options *options) +{ + if (options->activity_path) + { + activity_file = fopen(options->activity_path, "w+"); + if (activity_file == NULL) + STARPU_ABORT_MSG("Failed to open '%s' (err %s)", options->activity_path, strerror(errno)); + } + else + activity_file = NULL; +} + +static +void _starpu_fxt_sched_tasks_file_init(struct starpu_fxt_options *options) +{ + if (options->sched_tasks_path) + { + sched_tasks_file = fopen(options->sched_tasks_path, "w+"); + if (sched_tasks_file == NULL) + STARPU_ABORT_MSG("Failed to open '%s' (err %s)", options->sched_tasks_path, strerror(errno)); + } + else + sched_tasks_file = NULL; +} + +static +void _starpu_fxt_anim_file_init(struct starpu_fxt_options *options) +{ + if (options->anim_path) + { + anim_file = fopen(options->anim_path, "w+"); + if (anim_file == NULL) + STARPU_ABORT_MSG("Failed to open '%s' (err %s)", options->anim_path, strerror(errno)); + + _starpu_fxt_component_print_header(anim_file); + } + else + anim_file = NULL; +} + +static +void _starpu_fxt_tasks_file_init(struct starpu_fxt_options *options) +{ + if (options->tasks_path) + { + tasks_file = fopen(options->tasks_path, "w+"); + if (tasks_file == NULL) + STARPU_ABORT_MSG("Failed to open '%s' (err %s)", options->tasks_path, strerror(errno)); + } + else + tasks_file = NULL; +} + +static +void _starpu_fxt_data_file_init(struct starpu_fxt_options *options) +{ + if (options->data_path) + { + data_file = fopen(options->data_path, "w+"); + if (data_file == NULL) + STARPU_ABORT_MSG("Failed to open '%s' (err %s)", options->data_path, strerror(errno)); + } + else + data_file = NULL; +} + +static +void _starpu_fxt_comms_file_init(struct starpu_fxt_options *options) +{ + if (options->comms_path) + { + comms_file = fopen(options->comms_path, "w+"); + if (comms_file == NULL) + STARPU_ABORT_MSG("Failed to open '%s' (err %s)", options->comms_path, strerror(errno)); + } + else + comms_file = NULL; +} + +static +void _starpu_fxt_number_events_file_init(struct starpu_fxt_options *options) +{ + if (options->number_events_path) + { + number_events_file = fopen(options->number_events_path, "w+"); + if (number_events_file == NULL) + STARPU_ABORT_MSG("Failed to open '%s' (err %s)", options->number_events_path, strerror(errno)); + + /* FUT_SETUP_CODE is the event with the maximal value */ + _STARPU_CALLOC(number_events, FUT_SETUP_CODE+1, sizeof(uint64_t)); + } + else + number_events_file = NULL; +} + +static +void _starpu_fxt_papi_file_init(struct starpu_fxt_options *options) +{ +#ifdef STARPU_PAPI + if (options->papi_path) + { + papi_file = fopen(options->papi_path, "w+"); + if (papi_file == NULL) + STARPU_ABORT_MSG("Failed to open '%s' (err %s)", options->papi_path, strerror(errno)); + } + else + papi_file = NULL; +#else + (void) options; // avoid warning about unused variable +#endif +} + +static +void _starpu_fxt_write_trace_header(FILE *f) +{ + fprintf(f, "#\n"); + fprintf(f, "# E: Event type\n"); + fprintf(f, "# N: Event name\n"); + fprintf(f, "# C: Event category\n"); + fprintf(f, "# W: Worker ID\n"); + fprintf(f, "# T: Thread ID\n"); + fprintf(f, "# S: Start time\n"); + fprintf(f, "#\n"); + fprintf(f, "\n"); +} + +static +void _starpu_fxt_trace_file_init(struct starpu_fxt_options *options) +{ + if (options->states_path) + { + trace_file = fopen(options->states_path, "w+"); + if (trace_file == NULL) + STARPU_ABORT_MSG("Failed to open '%s' (err %s)", options->states_path, strerror(errno)); + } + else + trace_file = NULL; + + if (trace_file) + _starpu_fxt_write_trace_header(trace_file); +} + +static +void _starpu_fxt_activity_file_close(void) +{ + if (activity_file) + fclose(activity_file); +} + +static +void _starpu_fxt_sched_tasks_file_close(void) +{ + if (sched_tasks_file) + fclose(sched_tasks_file); +} + +static +void _starpu_fxt_anim_file_close(void) +{ + //_starpu_fxt_component_dump(stderr); + if (anim_file) + { + _starpu_fxt_component_finish(anim_file); + fclose(anim_file); + } +} + +static +void _starpu_fxt_tasks_file_close(void) +{ + if (tasks_file) + fclose(tasks_file); +} + +static +void _starpu_fxt_comms_file_close(void) +{ + if (comms_file) + fclose(comms_file); +} + +static +void _starpu_fxt_number_events_file_close(void) +{ + if (number_events_file) + { + int i; + + assert(number_events != NULL); + + fprintf(number_events_file, "# Use starpu_fxt_number_events_to_names.py to convert event keys to event names.\n"); + + for (i = 0; i <= FUT_SETUP_CODE; i++) + { + if (number_events[i] > 0) + fprintf(number_events_file, "0x%x\t%"PRIu64"\n", i, number_events[i]); + } + + free(number_events); + number_events = NULL; + + fclose(number_events_file); + } +} + +static +void _starpu_fxt_data_file_close(void) +{ + if (data_file) + fclose(data_file); +} + +static +void _starpu_fxt_papi_file_close(void) +{ +#ifdef STARPU_PAPI + if (papi_file) + fclose(papi_file); +#endif +} + +static +void _starpu_fxt_trace_file_close(void) +{ + if (trace_file) + fclose(trace_file); +} + +static +void _starpu_fxt_paje_file_init(struct starpu_fxt_options *options) +{ + /* create a new file */ + if (options->out_paje_path) + { + out_paje_file = fopen(options->out_paje_path, "w+"); + if (!out_paje_file) + { + _STARPU_MSG("error while opening %s\n", options->out_paje_path); + perror("fopen"); + _exit(EXIT_FAILURE); + } + +#ifdef STARPU_HAVE_POTI +#ifdef HAVE_POTI_INIT_CUSTOM + fclose(out_paje_file); + poti_init_custom(options->out_paje_path, + 0, //if false, allow extended events + 1, //if true, an old header (pj_dump -n) + 0, //if false, the trace has no comments + 1, //if true, events have aliases + 1);//if true, relative timestamps +#else + poti_init(out_paje_file); +#endif +#endif + _starpu_fxt_write_paje_header(out_paje_file, options); + } + else + { + out_paje_file = NULL; + } + + /* create lists for symbols (kernel states) and communications */ + _starpu_symbol_name_list_init(&symbol_list); + _starpu_communication_list_init(&communication_list); + if (!options->no_flops) + _starpu_computation_list_init(&computation_list); +} + +static +void _starpu_fxt_paje_file_close(void) +{ + struct _starpu_symbol_name *itor, *next; + for (itor = _starpu_symbol_name_list_begin(&symbol_list); + itor != _starpu_symbol_name_list_end(&symbol_list); + itor = next) + { + next = _starpu_symbol_name_list_next(itor); + + _starpu_symbol_name_list_erase(&symbol_list, itor); + free(itor->name); + _starpu_symbol_name_delete(itor); + } + if (out_paje_file) + fclose(out_paje_file); +} + +static +uint64_t _starpu_fxt_find_start_time(char *filename_in) +{ + /* Open the trace file */ + int fd_in; + fd_in = open(filename_in, O_RDONLY); + if (fd_in < 0) + { + STARPU_ABORT_MSG("Failed to open '%s' (err %s)", filename_in, strerror(errno)); + } + + static fxt_t fut; + fut = fxt_fdopen(fd_in); + if (!fut) + { + perror("fxt_fdopen :"); + _exit(EXIT_FAILURE); + } + + fxt_blockev_t block; + block = fxt_blockev_enter(fut); + + struct fxt_ev_64 ev; + + int ret = fxt_next_ev(block, FXT_EV_TYPE_64, (struct fxt_ev *)&ev); + STARPU_ASSERT(ret == FXT_EV_OK); + +#ifdef HAVE_FXT_BLOCKEV_LEAVE + fxt_blockev_leave(block); +#endif + + /* Close the trace file */ +#ifdef HAVE_FXT_CLOSE + fxt_close(fut); +#else + if (close(fd_in)) + { + perror("close failed :"); + _exit(EXIT_FAILURE); + } +#endif + return (ev.time); +} + + +struct inputrank { + int input; + int rank; +}; + +static int inputrank_compar(const void *_a, const void *_b) +{ + const struct inputrank *a = _a; + const struct inputrank *b = _b; + return a->rank - b->rank; +} + +void starpu_fxt_generate_trace(struct starpu_fxt_options *options) +{ + starpu_drivers_preinit(); + _starpu_fxt_options_set_dir(options); + _starpu_fxt_dag_init(options->dag_path); + _starpu_fxt_distrib_file_init(options); + _starpu_fxt_activity_file_init(options); + _starpu_fxt_sched_tasks_file_init(options); + _starpu_fxt_anim_file_init(options); + _starpu_fxt_tasks_file_init(options); + _starpu_fxt_data_file_init(options); + _starpu_fxt_papi_file_init(options); + _starpu_fxt_comms_file_init(options); + _starpu_fxt_number_events_file_init(options); + _starpu_fxt_trace_file_init(options); + + _starpu_fxt_paje_file_init(options); + + if (options->ninputfiles == 0) + { + return; + } + else if (options->ninputfiles == 1) + { + /* we usually only have a single trace */ + uint64_t file_start_time = _starpu_fxt_find_start_time(options->filenames[0]); + options->file_prefix = strdup(""); + options->file_offset.nb_barriers = 0; + options->file_offset.offset_start = -file_start_time; + options->file_rank = -1; + + _starpu_fxt_parse_new_file(options->filenames[0], options); + } + else + { + unsigned inputfile, i; + + /* + * Find the trace offsets: + * - If there is no sync point + * psi_k(x) = x - start_k + * - If there is one sync point sync_k + * psi_k(x) = x - sync_k + M + * where M = max { sync_i - start_i | there exists sync_i} + * - If there are two sync points: + * Two offsets are computed, and then offset is interpolated + * and applied in get_event_timestamp() for each timestamp. + * More generally: + * - psi_k(x) = x - offset_k + */ + + int unique_keys[options->ninputfiles]; + int rank_k[options->ninputfiles]; + uint64_t start_k[options->ninputfiles]; + struct starpu_fxt_mpi_offset sync_barriers[options->ninputfiles]; + uint64_t M_start = 0; + uint64_t M_end = 0; + int key = -1; + unsigned display_mpi = 0; + + /* Get all trace starts */ + for (inputfile = 0; inputfile < options->ninputfiles; inputfile++) + { + uint64_t file_start = _starpu_fxt_find_start_time(options->filenames[inputfile]); + start_k[inputfile] = file_start; + } + + /* Look for all synchronization points, if they exist */ + for (inputfile = 0; inputfile < options->ninputfiles; inputfile++) + { + sync_barriers[inputfile] = _starpu_fxt_mpi_find_sync_points(options->filenames[inputfile], + &unique_keys[inputfile], + &rank_k[inputfile]); + if (sync_barriers[inputfile].nb_barriers > 0) + { + /* Let's start by making sure all trace files come from the same execution: */ + if (key == -1) + { + key = unique_keys[inputfile]; // key is in [0, RAND_MAX] + display_mpi = 1; + } + else if (key != unique_keys[inputfile]) + { + _STARPU_MSG("Warning: traces are coming from different run so we will not try to display MPI communications.\n"); + display_mpi = 0; + } + + /* Find what is the most important duration between start of the trace and sync point. + * (see below why we need this information) */ + STARPU_ASSERT(sync_barriers[inputfile].local_time_start >= start_k[inputfile]); + uint64_t diff = sync_barriers[inputfile].local_time_start - start_k[inputfile]; + if (diff > M_start) + { + M_start = diff; + } + if (sync_barriers[inputfile].nb_barriers == 2) + { + STARPU_ASSERT(sync_barriers[inputfile].local_time_end >= sync_barriers[inputfile].local_time_start); + diff = sync_barriers[inputfile].local_time_end - start_k[inputfile]; + if (diff > M_end) + { + M_end = diff; + } + } + } + } + + /* Compute the offset for each trace file. + * Note: offsets will be applied with the following formula: + * t_corrected = t + offset + * The offset represents two steps: + * 1. It changes the time origin of timestamps to the local sync + * point time (since we are sure the sync point occurred at the same + * global time on each node, it is a valid reference point), hence: + * offset[k] = -sync_point[k] + * 2. This will make timestamp of events before the sync point + * happening before 0. We correct this by adding to the offset the + * largest time difference between trace start and sync point among + * all trace files (after step 1., it is the start time which is the + * most in the past, so by taking this value, we are sure all events + * in all processes will have a positive timestamp), hence: + * offset[k] += M + */ + for (inputfile = 0; inputfile < options->ninputfiles; inputfile++) + { + if (sync_barriers[inputfile].nb_barriers) + { + sync_barriers[inputfile].offset_start = -sync_barriers[inputfile].local_time_start + M_start; + + if (sync_barriers[inputfile].nb_barriers == 2) + { + sync_barriers[inputfile].offset_end = -sync_barriers[inputfile].local_time_end + M_end; + } + } + else + { + sync_barriers[inputfile].offset_start = -start_k[inputfile]; + } + } + + /* Sort input files by rank */ + struct inputrank inputrank[options->ninputfiles]; + for (inputfile = 0; inputfile < options->ninputfiles; inputfile++) + { + inputrank[inputfile].input = inputfile; + inputrank[inputfile].rank = rank_k[inputfile]; + } + qsort(inputrank, options->ninputfiles, sizeof(inputrank[0]), inputrank_compar); + + int maxrank = inputrank[options->ninputfiles-1].rank; + + int logn; + if (maxrank == 0) + logn = 1; + else + logn = log10(maxrank)+1; + + /* generate the Paje trace for the different files */ + for (i = 0; i < options->ninputfiles; i++) + { + inputfile = inputrank[i].input; + int filerank = rank_k[inputfile]; + STARPU_ASSERT(filerank == inputrank[i].rank); + + _STARPU_DISP("Parsing file %s (rank %0*d)\n", options->filenames[inputfile], logn, filerank); + + char file_prefix[32]; + snprintf(file_prefix, sizeof(file_prefix), "%0*d_", logn, filerank); + + free(options->file_prefix); + options->file_prefix = strdup(file_prefix); + options->file_offset = sync_barriers[inputfile]; + options->file_rank = filerank; + + _starpu_fxt_parse_new_file(options->filenames[inputfile], options); + } + + /* display the MPI transfers if possible */ + if (display_mpi) + _starpu_fxt_display_mpi_transfers(options, rank_k, out_paje_file, comms_file); + } + + /* close the different files */ + _starpu_fxt_paje_file_close(); + _starpu_fxt_activity_file_close(); + _starpu_fxt_sched_tasks_file_close(); + _starpu_fxt_distrib_file_close(options); + _starpu_fxt_anim_file_close(); + _starpu_fxt_tasks_file_close(); + _starpu_fxt_data_file_close(); + _starpu_fxt_papi_file_close(); + _starpu_fxt_comms_file_close(); + _starpu_fxt_number_events_file_close(); + _starpu_fxt_trace_file_close(); + + _starpu_fxt_dag_terminate(); + + options->nworkers = nworkers; + free(options->file_prefix); +} + +#define DATA_STR_MAX_SIZE 15 + +struct parse_task +{ + unsigned exec_time; + unsigned data_total; + unsigned workerid; + char *codelet_name; +}; + +static struct parse_task tasks[STARPU_NMAXWORKERS]; + +static struct starpu_data_trace_kernel +{ + UT_hash_handle hh; + char *name; + FILE *file; +} *kernels; + +static struct starpu_data_trace_kernel_job +{ + UT_hash_handle hh; + int jobid; + char *name; +} *kernel_jobs; + +static void record_kernel_job_name(int jobid, char *name) +{ + struct starpu_data_trace_kernel_job *kernel_job; + HASH_FIND_INT(kernel_jobs, &jobid, kernel_job); + if (kernel_job == NULL) + { + _STARPU_MALLOC(kernel_job, sizeof(*kernel_job)); + kernel_job->jobid = jobid; + HASH_ADD_INT(kernel_jobs, jobid, kernel_job); + } + else + { + free(kernel_job->name); + } + kernel_job->name = strdup(name); +} + +static char *extract_kernel_job_name(int jobid) +{ + char *name = NULL; + struct starpu_data_trace_kernel_job *kernel_job; + HASH_FIND_INT(kernel_jobs, &jobid, kernel_job); + if (kernel_job != NULL) + { + name = kernel_job->name; + HASH_DEL(kernel_jobs, kernel_job); + free(kernel_job); + } + return name; +} + +#define NANO_SEC_TO_MILI_SEC 0.000001 + +static FILE *codelet_list; + +static void write_task(char *dir, struct parse_task *pt) +{ + struct starpu_data_trace_kernel *kernel; + char *codelet_name = pt->codelet_name; + HASH_FIND_STR(kernels, codelet_name, kernel); + //fprintf(stderr, "%p %p %s\n", kernel, kernels, codelet_name); + if(kernel == NULL) + { + _STARPU_MALLOC(kernel, sizeof(*kernel)); + kernel->name = strdup(codelet_name); + char filename[256]; + snprintf(filename, sizeof(filename), "%s/%s", dir, kernel->name); + //fprintf(stderr, "%s\n", kernel->name); + kernel->file = fopen(filename, "w+"); + if(!kernel->file) + { + STARPU_ABORT_MSG("Failed to open '%s' (err %s)", filename, strerror(errno)); + } + HASH_ADD_STR(kernels, name, kernel); + fprintf(codelet_list, "%s\n", codelet_name); + } + double time = pt->exec_time * NANO_SEC_TO_MILI_SEC; + fprintf(kernel->file, "%lf %u %u\n", time, pt->data_total, pt->workerid); +} + +void starpu_fxt_write_data_trace_in_dir(char *filename_in, char *dir) +{ + int fd_in; + fd_in = open(filename_in, O_RDONLY); + if (fd_in < 0) + { + STARPU_ABORT_MSG("Failed to open '%s' (err %s)", filename_in, strerror(errno)); + } + + static fxt_t fut; + fut = fxt_fdopen(fd_in); + if (!fut) + { + perror("fxt_fdopen :"); + _exit(EXIT_FAILURE); + } + + char filename_out[512]; + snprintf(filename_out, sizeof(filename_out), "%s/codelet_list", dir); + codelet_list = fopen(filename_out, "w+"); + if(!codelet_list) + { + STARPU_ABORT_MSG("Failed to open '%s' (err %s)", filename_out, strerror(errno)); + } + + fxt_blockev_t block; + block = fxt_blockev_enter(fut); + + while(1) + { + unsigned i; + struct fxt_ev_64 ev; + int ret = fxt_next_ev(block, FXT_EV_TYPE_64, (struct fxt_ev *)&ev); + for (i = ev.nb_params; i < FXT_MAX_PARAMS; i++) + ev.param[i] = 0; + if (ret != FXT_EV_OK) + { + break; + } + + switch (ev.code) + { + case _STARPU_FUT_WORKER_INIT_START: + register_worker_id(0 /* TODO: Add nodeid here instead */, ev.param[6], ev.param[1], ev.param[5]); + break; + + case _STARPU_FUT_TASK_NAME: + { + int jobid = (int)ev.param[0]; + char *name = get_fxt_string(&ev,2); + record_kernel_job_name(jobid, name); + } + break; + + case _STARPU_FUT_START_CODELET_BODY: + { + int workerid = ev.param[2]; + tasks[workerid].workerid = (unsigned)workerid; + tasks[workerid].exec_time = ev.time; + } + break; + + case _STARPU_FUT_END_CODELET_BODY: + { + int jobid = (int)ev.param[0]; + int workerid = ev.param[3]; + assert(workerid != -1); + tasks[workerid].exec_time = ev.time - tasks[workerid].exec_time; + char *name = extract_kernel_job_name(jobid); + if (name == NULL) + { + name = strdup("unknown"); + } + tasks[workerid].codelet_name = name; + write_task(dir, &tasks[workerid]); + /* codelet_name is copied in write_task() when needed */ + tasks[workerid].codelet_name = NULL; + free(name); + } + break; + + case _STARPU_FUT_DATA_LOAD: + { + int workerid = ev.param[0]; + tasks[workerid].data_total = ev.param[1]; + } + break; + + default: +#ifdef STARPU_VERBOSE + _STARPU_MSG("unknown event.. %x at time %llx WITH OFFSET %llx\n", + (unsigned)ev.code, (long long unsigned)ev.time, (long long unsigned)(ev.time)); +#endif + break; + } + } + +#ifdef HAVE_FXT_BLOCKEV_LEAVE + fxt_blockev_leave(block); +#endif + +#ifdef HAVE_FXT_CLOSE + fxt_close(fut); +#else + if (close(fd_in)) + { + perror("close failed :"); + _exit(EXIT_FAILURE); + } +#endif + + if(fclose(codelet_list)) + { + perror("close failed :"); + _exit(EXIT_FAILURE); + } + + unsigned i; + for (i = 0; i < STARPU_NMAXWORKERS; i++) + free(tasks[i].codelet_name); + + free_worker_ids(); + + { + struct starpu_data_trace_kernel *kernel=NULL, *tmp=NULL; + HASH_ITER(hh, kernels, kernel, tmp) + { + if(fclose(kernel->file)) + { + perror("close failed :"); + _exit(EXIT_FAILURE); + } + HASH_DEL(kernels, kernel); + free(kernel->name); + free(kernel); + } + } + + { + struct starpu_data_trace_kernel_job *kernel_job=NULL, *tmp=NULL; + HASH_ITER(hh, kernel_jobs, kernel_job, tmp) + { + HASH_DEL(kernel_jobs, kernel_job); + free(kernel_job->name); + free(kernel_job); + } + } +} + +void starpu_fxt_write_data_trace(char *filename_in) +{ + starpu_fxt_write_data_trace_in_dir(filename_in, "."); +} + +#endif // STARPU_USE_FXT diff --git a/src/debug/traces/starpu_fxt.h b/src/debug/traces/starpu_fxt.h new file mode 100644 index 0000000..4b740f9 --- /dev/null +++ b/src/debug/traces/starpu_fxt.h @@ -0,0 +1,112 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2018-2020 Federal University of Rio Grande do Sul (UFRGS) + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __STARPU__FXT_H__ +#define __STARPU__FXT_H__ + +/** @file */ + +#include +#include +#include + +#ifdef STARPU_USE_FXT + +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include "../mpi/src/starpu_mpi_fxt.h" +#include +#include "../../../include/starpu_fxt.h" + +#ifdef STARPU_HAVE_POTI +#include +#define STARPU_POTI_STR_LEN 200 +#endif +#define STARPU_TRACE_STR_LEN 200 + +#pragma GCC visibility push(hidden) + +extern char _starpu_last_codelet_symbol[STARPU_NMAXWORKERS][(FXT_MAX_PARAMS-5)*sizeof(unsigned long)]; + +void _starpu_fxt_dag_init(char *dag_filename); +void _starpu_fxt_dag_terminate(void); +void _starpu_fxt_dag_add_tag(const char *prefix, uint64_t tag, unsigned long job_id, const char *label); +void _starpu_fxt_dag_add_tag_deps(const char *prefix, uint64_t child, uint64_t father, const char *label); +void _starpu_fxt_dag_set_tag_done(const char *prefix, uint64_t tag, const char *color, const char *fontcolor); +void _starpu_fxt_dag_add_task_deps(const char *prefix, unsigned long dep_prev, unsigned long dep_succ, const char *label); +void _starpu_fxt_dag_add_task_end_dep(const char *prefix, unsigned long prev, unsigned long succ); +void _starpu_fxt_dag_set_task_name(const char *prefix, unsigned long job_id, const char *label, const char *color, const char *fontcolor); +#ifdef STARPU_BUBBLE +void _starpu_fxt_dag_set_task_bubble(const char *prefix, unsigned long job_id, int is_bubble, unsigned long bubble_parent); +#endif +void _starpu_fxt_dag_set_task_line(const char *prefix, unsigned long job_id, const char *file, int line); +void _starpu_fxt_dag_add_send(int src, unsigned long dep_prev, unsigned long tag, unsigned long id); +void _starpu_fxt_dag_add_receive(int dst, unsigned long dep_prev, unsigned long tag, unsigned long id); +void _starpu_fxt_dag_add_sync_point(void); +unsigned _starpu_fxt_data_get_coord(unsigned long handle, int mpi_rank, unsigned dim); +const char * _starpu_fxt_data_get_name(unsigned long handle, int mpi_rank); + +void _starpu_convert_numa_nodes_bitmap_to_str(long bitmap, char str[]); + +/* + * MPI + */ + +struct starpu_fxt_mpi_offset _starpu_fxt_mpi_find_sync_points(char *filename_in, int *key, int *rank); +void _starpu_fxt_mpi_add_send_transfer(int src, int dst, long mpi_tag, size_t size, float date, long jobid, unsigned long handle, unsigned type, int prio); +void _starpu_fxt_mpi_send_transfer_set_numa_node(int src, int dest, long jobid, long numa_nodes_bitmap); +void _starpu_fxt_mpi_add_recv_transfer(int src, int dst, long mpi_tag, float date, long jobid, unsigned long handle); +void _starpu_fxt_mpi_recv_transfer_set_numa_node(int src, int dst, long jobid, long numa_nodes_bitmap); +void _starpu_fxt_display_mpi_transfers(struct starpu_fxt_options *options, int *ranks, FILE *out_paje_file, FILE* out_comms_file); + +void _starpu_fxt_write_paje_header(FILE *file, struct starpu_fxt_options *options); + +extern int _starpu_poti_extendedSetState; +extern int _starpu_poti_semiExtendedSetState; +extern int _starpu_poti_MemoryEvent; +extern int _starpu_poti_CommLinkStart; +extern int _starpu_poti_MpiLinkStart; +extern int _starpu_poti_checkPointState; +extern int _starpu_poti_JobState; + +/* + * Animation + */ +void _starpu_fxt_component_print_header(FILE *output); +void _starpu_fxt_component_new(uint64_t component, char *name); +void _starpu_fxt_component_connect(uint64_t parent, uint64_t child); +void _starpu_fxt_component_update_ntasks(unsigned nsubmitted, unsigned curq_size); +void _starpu_fxt_component_push(FILE *output, struct starpu_fxt_options *options, double timestamp, int workerid, uint64_t from, uint64_t to, uint64_t task, unsigned prio); +void _starpu_fxt_component_pull(FILE *output, struct starpu_fxt_options *options, double timestamp, int workerid, uint64_t from, uint64_t to, uint64_t task, unsigned prio); +void _starpu_fxt_component_dump(FILE *output); +void _starpu_fxt_component_finish(FILE *output); +void _starpu_fxt_component_deinit(void); + +#pragma GCC visibility pop + +#endif // STARPU_USE_FXT + +#endif // __STARPU__FXT_H__ diff --git a/src/debug/traces/starpu_fxt_dag.c b/src/debug/traces/starpu_fxt_dag.c new file mode 100644 index 0000000..caadf54 --- /dev/null +++ b/src/debug/traces/starpu_fxt_dag.c @@ -0,0 +1,165 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include + +#ifdef STARPU_USE_FXT + +#include "starpu_fxt.h" + +static FILE *out_file; +static unsigned cluster_cnt; + +void _starpu_fxt_dag_init(char *out_path) +{ + if (!out_path) + { + out_file = NULL; + return; + } + + /* create a new file */ + out_file = fopen(out_path, "w+"); + if (!out_file) + { + _STARPU_MSG("error while opening %s\n", out_path); + perror("fopen"); + _exit(EXIT_FAILURE); + } + cluster_cnt = 0; + + fprintf(out_file, "digraph G {\n"); + fprintf(out_file, "\tcolor=white\n"); + fprintf(out_file, "\trankdir=LR;\n"); + + /* Create a new cluster */ + fprintf(out_file, "subgraph cluster_%u {\n", cluster_cnt); + fprintf(out_file, "\tcolor=black;\n"); +} + +void _starpu_fxt_dag_terminate(void) +{ + if (!out_file) + return; + + /* Close the last cluster */ + fprintf(out_file, "}\n"); + /* Close the graph */ + fprintf(out_file, "}\n"); + fclose(out_file); +} + +void _starpu_fxt_dag_add_tag(const char *prefix, uint64_t tag, unsigned long job_id, const char *label) +{ + if (out_file) + { + if (label) + fprintf(out_file, "\t \"tag_%s%llx\"->\"task_%s%lu\"->\"tag_%s%llx\" [style=dashed] [label=\"%s\"]\n", prefix, (unsigned long long)tag, prefix, (unsigned long)job_id, prefix, (unsigned long long) tag, label); + else + fprintf(out_file, "\t \"tag_%s%llx\"->\"task_%s%lu\"->\"tag_%s%llx\" [style=dashed]\n", prefix, (unsigned long long)tag, prefix, (unsigned long)job_id, prefix, (unsigned long long) tag); + } +} + +void _starpu_fxt_dag_add_tag_deps(const char *prefix, uint64_t child, uint64_t father, const char *label) +{ + if (out_file) + { + if (label) + fprintf(out_file, "\t \"tag_%s%llx\"->\"tag_%s%llx\" [label=\"%s\"]\n", prefix, (unsigned long long)father, prefix, (unsigned long long)child, label); + else + fprintf(out_file, "\t \"tag_%s%llx\"->\"tag_%s%llx\"\n", prefix, (unsigned long long)father, prefix, (unsigned long long)child); + } +} + +void _starpu_fxt_dag_add_task_deps(const char *prefix, unsigned long dep_prev, unsigned long dep_succ, const char *label) +{ + if (out_file) + { + if (label) + fprintf(out_file, "\t \"task_%s%lu\"->\"task_%s%lu\" [label=\"%s\"]\n", prefix, dep_prev, prefix, dep_succ, label); + else + fprintf(out_file, "\t \"task_%s%lu\"->\"task_%s%lu\"\n", prefix, dep_prev, prefix, dep_succ); + } +} + +void _starpu_fxt_dag_set_tag_done(const char *prefix, uint64_t tag, const char *color, const char *fontcolor) +{ + if (out_file) + fprintf(out_file, "\t \"tag_%s%llx\" [ style=filled, fillcolor=\"%s\", fontcolor=\"%s\"]\n", + prefix, (unsigned long long)tag, color, fontcolor); +} + +void _starpu_fxt_dag_add_task_end_dep(const char *prefix, unsigned long prev, unsigned long succ) +{ + if (out_file) + fprintf(out_file, "\t \"task_%s%lu\" [ end_dep=\"%lu\"]\n", prefix, prev, succ); +} + +void _starpu_fxt_dag_set_task_name(const char *prefix, unsigned long job_id, const char *label, const char *color, const char *fontcolor) +{ + if (out_file) + fprintf(out_file, "\t \"task_%s%lu\" [ style=filled, label=\"%s\", fillcolor=\"%s\", fontcolor=\"%s\"]\n", prefix, job_id, label, color, fontcolor); +} + +#ifdef STARPU_BUBBLE +void _starpu_fxt_dag_set_task_bubble(const char *prefix, unsigned long job_id, int is_bubble, unsigned long bubble_parent) +{ + if (out_file) + { + fprintf(out_file, "\t \"task_%s%lu\" [ bubble=\"%d\" ", prefix, job_id, is_bubble); + if (bubble_parent) + fprintf(out_file, ", bubble_parent=\"%lu\"", bubble_parent); + fprintf(out_file, "]\n"); + } +} +#endif + +void _starpu_fxt_dag_set_task_line(const char *prefix, unsigned long job_id, const char *file, int line) +{ + if (out_file) + fprintf(out_file, "\t \"task_%s%lu\" [ href=\"%s#%d\" ]\n", prefix, job_id, file, line); +} + +void _starpu_fxt_dag_add_send(int src, unsigned long dep_prev, unsigned long tag, unsigned long id) +{ + if (out_file) + fprintf(out_file, "\t \"task_%d_%lu\"->\"mpi_%lu_%lu\"\n", src, dep_prev, tag, id); +} + +void _starpu_fxt_dag_add_receive(int dst, unsigned long dep_prev, unsigned long tag, unsigned long id) +{ + if (out_file) + fprintf(out_file, "\t \"mpi_%lu_%lu\"->\"task_%d_%lu\"\n", tag, id, dst, dep_prev); +} + +void _starpu_fxt_dag_add_sync_point(void) +{ + if (!out_file) + return; + + /* Close the previous cluster */ + fprintf(out_file, "}\n"); + + cluster_cnt++; + + /* Create a new cluster */ + fprintf(out_file, "subgraph cluster_%u {\n", cluster_cnt); + fprintf(out_file, "\tcolor=black;\n"); +} + +#endif /* STARPU_USE_FXT */ diff --git a/src/debug/traces/starpu_fxt_mpi.c b/src/debug/traces/starpu_fxt_mpi.c new file mode 100644 index 0000000..8d493ec --- /dev/null +++ b/src/debug/traces/starpu_fxt_mpi.c @@ -0,0 +1,494 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2017-2020 Federal University of Rio Grande do Sul (UFRGS) + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include + +#ifdef STARPU_USE_FXT + +#include "starpu_fxt.h" + +LIST_TYPE(mpi_transfer, + unsigned matched; + int src; + int dst; + long mpi_tag; + size_t size; + float date; + long jobid; + double bandwidth; + unsigned long handle; + char *name; + unsigned X; + unsigned Y; + unsigned type; + int prio; + long numa_nodes_bitmap; +); + +struct starpu_fxt_mpi_offset _starpu_fxt_mpi_find_sync_points(char *filename_in, int *key, int *rank) +{ + struct starpu_fxt_mpi_offset offset; + offset.nb_barriers = 0; + offset.local_time_start = 0; + offset.local_time_end = 0; + offset.offset_start = 0; + offset.offset_end = 0; + + /* Open the trace file */ + int fd_in; + fd_in = open(filename_in, O_RDONLY); + if (fd_in < 0) + { + perror("open failed :"); + _exit(EXIT_FAILURE); + } + + static fxt_t fut; + fut = fxt_fdopen(fd_in); + if (!fut) + { + perror("fxt_fdopen :"); + _exit(EXIT_FAILURE); + } + + fxt_blockev_t block; + block = fxt_blockev_enter(fut); + + struct fxt_ev_64 ev; + int ret; + uint64_t local_sync_time; + + while (offset.nb_barriers < 2 && (ret = fxt_next_ev(block, FXT_EV_TYPE_64, (struct fxt_ev *)&ev)) == FXT_EV_OK) + { + if (ev.code == _STARPU_MPI_FUT_BARRIER) + { + /* We found a sync point */ + *rank = ev.param[0]; + *key = ev.param[2]; + local_sync_time = (uint64_t) ((double) ev.param[3]); // It is stored as a double in the trace + + if (local_sync_time == 0) + { + /* This clock synchronization was made with an + * MPI_Barrier, consider the event timestamp as + * a local synchronized barrier time: */ + local_sync_time = ev.time; + } + + if (offset.nb_barriers == 0) + { + offset.local_time_start = local_sync_time; + } + else + { + offset.local_time_end = local_sync_time; + } + + offset.nb_barriers++; + } + } + + /* Close the trace file */ + if (close(fd_in)) + { + perror("close failed :"); + _exit(EXIT_FAILURE); + } + + return offset; +} + +/* + * Deal with the actual MPI transfers performed with the MPI lib + */ + +/* the list of MPI transfers found in the different traces */ +static struct mpi_transfer *mpi_sends[STARPU_FXT_MAX_FILES] = {NULL}; +static struct mpi_transfer *mpi_recvs[STARPU_FXT_MAX_FILES] = {NULL}; + +/* number of available slots in the lists */ +static unsigned mpi_sends_list_size[STARPU_FXT_MAX_FILES] = {0}; +static unsigned mpi_recvs_list_size[STARPU_FXT_MAX_FILES] = {0}; + +/* number of slots actually used in the list */ +static unsigned mpi_sends_used[STARPU_FXT_MAX_FILES] = {0}; +static unsigned mpi_recvs_used[STARPU_FXT_MAX_FILES] = {0}; + +/* number of slots already matched at the beginning of the list. This permits + * going through the lists from the beginning to match each and every + * transfer, thus avoiding a quadratic complexity. */ +static unsigned mpi_recvs_matched[STARPU_FXT_MAX_FILES][STARPU_FXT_MAX_FILES] = { {0} }; + +void _starpu_fxt_mpi_add_send_transfer(int src, int dst, long mpi_tag, size_t size, float date, long jobid, unsigned long handle, unsigned type, int prio) +{ + STARPU_ASSERT(src >= 0); + if (src >= STARPU_FXT_MAX_FILES) + return; + unsigned slot = mpi_sends_used[src]++; + + if (mpi_sends_used[src] > mpi_sends_list_size[src]) + { + if (mpi_sends_list_size[src] > 0) + { + mpi_sends_list_size[src] *= 2; + } + else + { + mpi_sends_list_size[src] = 1; + } + + _STARPU_REALLOC(mpi_sends[src], mpi_sends_list_size[src]*sizeof(struct mpi_transfer)); + } + + mpi_sends[src][slot].matched = 0; + mpi_sends[src][slot].src = src; + mpi_sends[src][slot].dst = dst; + mpi_sends[src][slot].mpi_tag = mpi_tag; + mpi_sends[src][slot].size = size; + mpi_sends[src][slot].date = date; + mpi_sends[src][slot].jobid = jobid; + mpi_sends[src][slot].handle = handle; + mpi_sends[src][slot].X = _starpu_fxt_data_get_coord(handle, src, 0); + mpi_sends[src][slot].Y = _starpu_fxt_data_get_coord(handle, src, 1); + const char *name = _starpu_fxt_data_get_name(handle, src); + if (!name) + name = ""; + mpi_sends[src][slot].name = strdup(name); + mpi_sends[src][slot].type = type; + mpi_sends[src][slot].prio = prio; + mpi_sends[src][slot].numa_nodes_bitmap = -1; +} + +void _starpu_fxt_mpi_send_transfer_set_numa_node(int src, int dest, long jobid, long numa_nodes_bitmap) +{ + STARPU_ASSERT(src >= 0); + if (src >= STARPU_FXT_MAX_FILES || jobid == -1) + return; + + unsigned i, slot; + for (i = 0; i < mpi_sends_used[src]; i++) + { + /* The probe is just after the one handled by + * _starpu_fxt_mpi_add_send_transfer, so the send transfer should have been + * added recently: */ + slot = mpi_sends_used[src] - i - 1; + if (mpi_sends[src][slot].dst == dest && mpi_sends[src][slot].jobid == jobid) + { + mpi_sends[src][slot].numa_nodes_bitmap = numa_nodes_bitmap; + return; + } + } + + _STARPU_MSG("Warning: did not find the send transfer from %d to %d with jobid %ld\n", src, dest, jobid); +} + +void _starpu_fxt_mpi_add_recv_transfer(int src, int dst, long mpi_tag, float date, long jobid, unsigned long handle) +{ + if (dst >= STARPU_FXT_MAX_FILES) + return; + unsigned slot = mpi_recvs_used[dst]++; + + if (mpi_recvs_used[dst] > mpi_recvs_list_size[dst]) + { + if (mpi_recvs_list_size[dst] > 0) + { + mpi_recvs_list_size[dst] *= 2; + } + else + { + mpi_recvs_list_size[dst] = 1; + } + + _STARPU_REALLOC(mpi_recvs[dst], mpi_recvs_list_size[dst]*sizeof(struct mpi_transfer)); + } + + mpi_recvs[dst][slot].matched = 0; + mpi_recvs[dst][slot].src = src; + mpi_recvs[dst][slot].dst = dst; + mpi_recvs[dst][slot].mpi_tag = mpi_tag; + mpi_recvs[dst][slot].date = date; + mpi_recvs[dst][slot].jobid = jobid; + mpi_recvs[dst][slot].handle = handle; + mpi_recvs[dst][slot].numa_nodes_bitmap = -1; +} + +void _starpu_fxt_mpi_recv_transfer_set_numa_node(int src, int dst, long jobid, long numa_nodes_bitmap) +{ + STARPU_ASSERT(src >= 0); + if (src >= STARPU_FXT_MAX_FILES || jobid == -1) + return; + + unsigned i, slot; + for (i = 0; i < mpi_recvs_used[dst]; i++) + { + /* The probe is just after the one handled by + * _starpu_fxt_mpi_add_send_transfer, so the send transfer should have been + * added recently: */ + slot = mpi_recvs_used[dst] - i - 1; + if (mpi_recvs[dst][slot].src == src && mpi_recvs[dst][slot].jobid == jobid) + { + mpi_recvs[dst][slot].numa_nodes_bitmap = numa_nodes_bitmap; + return; + } + } + + _STARPU_MSG("Warning: did not find the recv transfer from %d to %d with jobid %ld\n", src, dst, jobid); +} + + +static +struct mpi_transfer *try_to_match_send_transfer(int src, int dst, long mpi_tag) +{ + unsigned slot; + unsigned firstslot = mpi_recvs_matched[src][dst]; + + unsigned all_previous_were_matched = 1; + + for (slot = firstslot; slot < mpi_recvs_used[dst]; slot++) + { + if (!mpi_recvs[dst][slot].matched) + { + if (mpi_recvs[dst][slot].mpi_tag == mpi_tag) + { + /* we found a match ! */ + mpi_recvs[dst][slot].matched = 1; + return &mpi_recvs[dst][slot]; + } + + all_previous_were_matched = 0; + } + else + { + if (all_previous_were_matched) + { + /* All previous transfers are already matched, + * we need not consider them anymore */ + mpi_recvs_matched[src][dst] = slot; + } + } + } + + /* If we reached that point, we could not find a match */ + return NULL; +} + +static unsigned long mpi_com_id = 0; + +static const char* get_mpi_type_str(unsigned mpi_type) +{ + switch (mpi_type) + { + case _STARPU_MPI_FUT_POINT_TO_POINT_SEND: + return "PointToPoint"; + case _STARPU_MPI_FUT_COLLECTIVE_SEND: + return "Collective"; + default: + return "Unknown"; + } +} + +static void display_all_transfers_from_trace(FILE *out_paje_file, FILE *out_comms_file, unsigned n) +{ + unsigned slot[STARPU_FXT_MAX_FILES] = { 0 }, node; + unsigned nb_wrong_comm_timing = 0; + struct mpi_transfer_list pending_receives; /* Sorted list of matches which have not happened yet */ + double current_out_bandwidth[STARPU_FXT_MAX_FILES] = { 0. }; + double current_in_bandwidth[STARPU_FXT_MAX_FILES] = { 0. }; +#ifdef STARPU_HAVE_POTI + char mpi_container[STARPU_POTI_STR_LEN]; +#endif + + //bwi_mpi and bwo_mpi are set to zero when MPI thread containers are created + + mpi_transfer_list_init(&pending_receives); + + while (1) + { + float start_date; + struct mpi_transfer *cur, *match; + int src; + + /* Find out which event comes first: a pending receive, or a new send */ + + if (mpi_transfer_list_empty(&pending_receives)) + start_date = INFINITY; + else + start_date = mpi_transfer_list_front(&pending_receives)->date; + + src = STARPU_FXT_MAX_FILES; + for (node = 0; node < n; node++) + { + if (slot[node] < mpi_sends_used[node] && mpi_sends[node][slot[node]].date < start_date) + { + /* next send for node is earlier than others */ + src = node; + start_date = mpi_sends[src][slot[src]].date; + } + } + if (start_date == INFINITY) + /* No event any more, we're finished! */ + break; + + if (src == STARPU_FXT_MAX_FILES) + { + /* Pending match is earlier than all new sends, finish its communication */ + match = mpi_transfer_list_pop_front(&pending_receives); + current_out_bandwidth[match->src] -= match->bandwidth; + current_in_bandwidth[match->dst] -= match->bandwidth; +#ifdef STARPU_HAVE_POTI + snprintf(mpi_container, sizeof(mpi_container), "%d_mpict", match->src); + poti_SetVariable(match->date, mpi_container, "bwo_mpi", current_out_bandwidth[match->src]); + snprintf(mpi_container, sizeof(mpi_container), "%d_mpict", match->dst); + poti_SetVariable(match->date, mpi_container, "bwi_mpi", current_in_bandwidth[match->dst]); +#else + fprintf(out_paje_file, "13 %.9f %d_mpict bwo_mpi %f\n", match->date, match->src, current_out_bandwidth[match->src]); + fprintf(out_paje_file, "13 %.9f %d_mpict bwi_mpi %f\n", match->date, match->dst, current_in_bandwidth[match->dst]); +#endif + continue; + } + + cur = &mpi_sends[src][slot[src]]; + int dst = cur->dst; + long mpi_tag = cur->mpi_tag; + size_t size = cur->size; + unsigned long send_handle = cur->handle; + long send_numa_nodes_bitmap = cur->numa_nodes_bitmap; + + if (dst < STARPU_FXT_MAX_FILES) + match = try_to_match_send_transfer(src, dst, mpi_tag); + else + match = NULL; + + if (match) + { + float end_date = match->date; + unsigned long recv_handle = match->handle; + long recv_numa_nodes_bitmap = match->numa_nodes_bitmap; + struct mpi_transfer *prev; + + if (end_date <= start_date) + nb_wrong_comm_timing++; + + match->bandwidth = (0.001*size)/(end_date - start_date); + current_out_bandwidth[src] += match->bandwidth; + current_in_bandwidth[dst] += match->bandwidth; + + /* Insert in sorted list, most probably at the end so let's use a mere insertion sort */ + for (prev = mpi_transfer_list_last(&pending_receives); + prev != mpi_transfer_list_alpha(&pending_receives); + prev = mpi_transfer_list_prev(prev)) + if (prev->date <= end_date) + { + /* Found its place */ + mpi_transfer_list_insert_after(&pending_receives, match, prev); + break; + } + if (prev == mpi_transfer_list_alpha(&pending_receives)) + { + /* No element earlier than this one, put it at the head */ + mpi_transfer_list_push_front(&pending_receives, match); + } + + unsigned long id = mpi_com_id++; + if (cur->jobid != -1) + _starpu_fxt_dag_add_send(src, cur->jobid, mpi_tag, id); + if (match->jobid != -1) + _starpu_fxt_dag_add_receive(dst, match->jobid, mpi_tag, id); +#ifdef STARPU_HAVE_POTI + char paje_value[STARPU_POTI_STR_LEN], paje_key[STARPU_POTI_STR_LEN]; + snprintf(paje_value, sizeof(paje_value), "%lu", (long unsigned) size); + snprintf(paje_key, sizeof(paje_key), "mpicom_%lu", id); + snprintf(mpi_container, sizeof(mpi_container), "%d_mpict", src); + + char str_mpi_tag[STARPU_POTI_STR_LEN]; + snprintf(str_mpi_tag, sizeof(str_mpi_tag), "%ld", mpi_tag); + char str_priority[STARPU_POTI_STR_LEN]; + snprintf(str_priority, sizeof(str_priority), "%d", cur->prio); + char str_handle[STARPU_POTI_STR_LEN]; + snprintf(str_handle, sizeof(str_handle), "%lx", send_handle); + char X_str[STARPU_POTI_STR_LEN]; + snprintf(X_str, sizeof(X_str), "%u", cur->X); + char Y_str[STARPU_POTI_STR_LEN]; + snprintf(Y_str, sizeof(Y_str), "%u", cur->Y); + + poti_user_StartLink(_starpu_poti_MpiLinkStart, start_date, "MPIroot", "MPIL", mpi_container, paje_value, paje_key, 7, str_mpi_tag, get_mpi_type_str(cur->type), str_priority, str_handle, cur->name, X_str, Y_str); + + poti_SetVariable(start_date, mpi_container, "bwo_mpi", current_out_bandwidth[src]); + snprintf(mpi_container, sizeof(mpi_container), "%d_mpict", dst); + poti_EndLink(end_date, "MPIroot", "MPIL", mpi_container, paje_value, paje_key); + poti_SetVariable(start_date, mpi_container, "bwo_mpi", current_in_bandwidth[dst]); +#else + fprintf(out_paje_file, "13 %.9f %d_mpict bwo_mpi %f\n", start_date, src, current_out_bandwidth[src]); + fprintf(out_paje_file, "13 %.9f %d_mpict bwi_mpi %f\n", start_date, dst, current_in_bandwidth[dst]); + fprintf(out_paje_file, "23 %.9f MPIL MPIroot %lu %d_mpict mpicom_%lu %ld %s %d %lx \"%s\" %u %u\n", start_date, (unsigned long)size, src, id, mpi_tag, get_mpi_type_str(cur->type), cur->prio, send_handle, cur->name, cur->X, cur->Y); + fprintf(out_paje_file, "19 %.9f MPIL MPIroot %lu %d_mpict mpicom_%lu\n", end_date, (unsigned long)size, dst, id); +#endif + + if (out_comms_file != NULL) + { + fprintf(out_comms_file, "Src: %d\n", src); + fprintf(out_comms_file, "Dst: %d\n", dst); + fprintf(out_comms_file, "Tag: %ld\n", mpi_tag); + fprintf(out_comms_file, "SendTime: %.9f\n", start_date); + fprintf(out_comms_file, "RecvTime: %.9f\n", end_date); + fprintf(out_comms_file, "SendHandle: %lx\n", send_handle); + fprintf(out_comms_file, "RecvHandle: %lx\n", recv_handle); + if (cur->jobid != -1) + fprintf(out_comms_file, "SendJobId: %d_%ld\n", src, cur->jobid); + if (match->jobid != -1) + fprintf(out_comms_file, "RecvJobId: %d_%ld\n", dst, match->jobid); + fprintf(out_comms_file, "Size: %lu\n", (unsigned long)size); + fprintf(out_comms_file, "Priority: %d\n", cur->prio); + fprintf(out_comms_file, "Type: %s\n", get_mpi_type_str(cur->type)); + char str[STARPU_TRACE_STR_LEN] = ""; + _starpu_convert_numa_nodes_bitmap_to_str(send_numa_nodes_bitmap, str); + fprintf(out_comms_file, "SendNumaNodes: %s\n", str); + _starpu_convert_numa_nodes_bitmap_to_str(recv_numa_nodes_bitmap, str); + fprintf(out_comms_file, "RecvNumaNodes: %s\n", str); + fprintf(out_comms_file, "\n"); + } + free(cur->name); + } + else + { + _STARPU_DISP("Warning, could not match MPI transfer from %d to %d (tag %lx) starting at %f\n", src, dst, mpi_tag, start_date); + } + + slot[src]++; + } + + if (nb_wrong_comm_timing == 1) + _STARPU_MSG("Warning: a communication finished before it started !\n"); + else if (nb_wrong_comm_timing > 1) + _STARPU_MSG("Warning: %u communications finished before they started !\n", nb_wrong_comm_timing); +} + +void _starpu_fxt_display_mpi_transfers(struct starpu_fxt_options *options, int *ranks STARPU_ATTRIBUTE_UNUSED, FILE *out_paje_file, FILE* out_comms_file) +{ + if (options->ninputfiles > STARPU_FXT_MAX_FILES) + { + _STARPU_DISP("Warning: %u files given, maximum %u supported, truncating to %u\n", options->ninputfiles, STARPU_FXT_MAX_FILES, STARPU_FXT_MAX_FILES); + options->ninputfiles = STARPU_FXT_MAX_FILES; + } + + /* display the MPI transfers if possible */ + if (out_paje_file) + display_all_transfers_from_trace(out_paje_file, out_comms_file, options->ninputfiles); +} + +#endif // STARPU_USE_FXT diff --git a/src/debug/traces/starpu_paje.c b/src/debug/traces/starpu_paje.c new file mode 100644 index 0000000..5931651 --- /dev/null +++ b/src/debug/traces/starpu_paje.c @@ -0,0 +1,609 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2017-2019 Federal University of Rio Grande do Sul (UFRGS) + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "starpu_fxt.h" +#include +#ifdef STARPU_HAVE_POTI +#include +#endif + +#ifdef STARPU_USE_FXT + +#ifdef STARPU_HAVE_POTI +int _starpu_poti_JobState; +int _starpu_poti_checkPointState; +#ifdef HAVE_POTI_INIT_CUSTOM +int _starpu_poti_extendedSetState = -1; +int _starpu_poti_semiExtendedSetState = -1; +int _starpu_poti_MemoryEvent = -1; +int _starpu_poti_CommLinkStart = -1; +int _starpu_poti_MpiLinkStart = -1; +#endif +#endif + +void _starpu_fxt_write_paje_header(FILE *file STARPU_ATTRIBUTE_UNUSED, struct starpu_fxt_options *options) +{ + unsigned i; +#ifdef STARPU_HAVE_POTI +#ifdef HAVE_POTI_INIT_CUSTOM + poti_header(); /* see poti_init_custom to customize the header */ + _starpu_poti_extendedSetState = poti_header_DeclareEvent (PAJE_SetState, + 13, + "Size string", + "Params string", + "Footprint string", + "Tag string", + "JobId string", + "SubmitOrder string", + "Priority string", + "GFlop string", + "X string", + "Y string", + /* "Z string", */ + "Iteration string", + "Subiteration string", + "NumaNodes string"); + _starpu_poti_semiExtendedSetState = poti_header_DeclareEvent (PAJE_SetState, + 6, + "Size string", + "Params string", + "Footprint string", + "Tag string", + "JobId string", + "SubmitOrder string" + ); +#ifdef HAVE_POTI_USER_NEWEVENT + _starpu_poti_CommLinkStart = poti_header_DeclareEvent(PAJE_StartLink, 4, "Handle string", "HName string", "X string", "Y string"); + if (options->memory_states) + { + _starpu_poti_MemoryEvent = poti_header_DeclareEvent (PAJE_NewEvent, + 4, + "Handle string", + "Info string", + "Size string", + "Dest string"); + } + _starpu_poti_MpiLinkStart = poti_header_DeclareEvent(PAJE_StartLink, 5, "MPITAG string", "MPIType string", "Priority string", "Handle string", "HName string", "X string", "Y string"); + _starpu_poti_checkPointState = poti_header_DeclareEvent(PAJE_NewEvent, 2, "CheckpointInstance string", "CheckpointDomain string"); + _starpu_poti_JobState = poti_header_DeclareEvent(PAJE_SetState, 1, "JobId string"); +#endif +#else + poti_header(1,1); +#endif +#else + fprintf(file, "%%EventDef PajeDefineContainerType 1\n"); + fprintf(file, "%% Alias string\n"); + fprintf(file, "%% Type string\n"); + fprintf(file, "%% Name string\n"); + fprintf(file, "%%EndEventDef\n"); + fprintf(file, "%%EventDef PajeDefineEventType 2\n"); + fprintf(file, "%% Alias string\n"); + fprintf(file, "%% Type string\n"); + fprintf(file, "%% Name string\n"); + fprintf(file, "%% Color color\n"); + fprintf(file, "%%EndEventDef\n"); + fprintf(file, "%%EventDef PajeDefineStateType 3\n"); + fprintf(file, "%% Alias string\n"); + fprintf(file, "%% Type string\n"); + fprintf(file, "%% Name string\n"); + fprintf(file, "%%EndEventDef\n"); + fprintf(file, "%%EventDef PajeDefineVariableType 4\n"); + fprintf(file, "%% Alias string\n"); + fprintf(file, "%% Type string\n"); + fprintf(file, "%% Name string\n"); + fprintf(file, "%%EndEventDef\n"); + fprintf(file, "%%EventDef PajeDefineLinkType 5\n"); + fprintf(file, "%% Alias string\n"); + fprintf(file, "%% Type string\n"); + fprintf(file, "%% StartContainerType string\n"); + fprintf(file, "%% EndContainerType string\n"); + fprintf(file, "%% Name string\n"); + fprintf(file, "%%EndEventDef\n"); + fprintf(file, "%%EventDef PajeDefineEntityValue 6\n"); + fprintf(file, "%% Alias string\n"); + fprintf(file, "%% Type string\n"); + fprintf(file, "%% Name string\n"); + fprintf(file, "%% Color color\n"); + fprintf(file, "%%EndEventDef\n"); + fprintf(file, "%%EventDef PajeCreateContainer 7\n"); + fprintf(file, "%% Time date\n"); + fprintf(file, "%% Alias string\n"); + fprintf(file, "%% Type string\n"); + fprintf(file, "%% Container string\n"); + fprintf(file, "%% Name string\n"); + fprintf(file, "%%EndEventDef\n"); + fprintf(file, "%%EventDef PajeDestroyContainer 8\n"); + fprintf(file, "%% Time date\n"); + fprintf(file, "%% Name string\n"); + fprintf(file, "%% Type string\n"); + fprintf(file, "%%EndEventDef\n"); + fprintf(file, "%%EventDef PajeNewEvent 9\n"); + fprintf(file, "%% Time date\n"); + fprintf(file, "%% Type string\n"); + fprintf(file, "%% Container string\n"); + fprintf(file, "%% Value string\n"); + fprintf(file, "%%EndEventDef\n"); + fprintf(file, "%%EventDef PajeSetState 10\n"); + fprintf(file, "%% Time date\n"); + fprintf(file, "%% Container string\n"); + fprintf(file, "%% Type string\n"); + fprintf(file, "%% Value string\n"); + fprintf(file, "%%EndEventDef\n"); + fprintf(file, "%%EventDef PajePushState 11\n"); + fprintf(file, "%% Time date\n"); + fprintf(file, "%% Container string\n"); + fprintf(file, "%% Type string\n"); + fprintf(file, "%% Value string\n"); + fprintf(file, "%%EndEventDef\n"); + fprintf(file, "%%EventDef PajePopState 12\n"); + fprintf(file, "%% Time date\n"); + fprintf(file, "%% Container string\n"); + fprintf(file, "%% Type string\n"); + fprintf(file, "%%EndEventDef\n"); + fprintf(file, "%%EventDef PajeSetVariable 13\n"); + fprintf(file, "%% Time date\n"); + fprintf(file, "%% Container string\n"); + fprintf(file, "%% Type string\n"); + fprintf(file, "%% Value double\n"); + fprintf(file, "%%EndEventDef\n"); + fprintf(file, "%%EventDef PajeAddVariable 14\n"); + fprintf(file, "%% Time date\n"); + fprintf(file, "%% Type string\n"); + fprintf(file, "%% Container string\n"); + fprintf(file, "%% Value double\n"); + fprintf(file, "%%EndEventDef\n"); + fprintf(file, "%%EventDef PajeSubVariable 15\n"); + fprintf(file, "%% Time date\n"); + fprintf(file, "%% Type string\n"); + fprintf(file, "%% Container string\n"); + fprintf(file, "%% Value double\n"); + fprintf(file, "%%EndEventDef\n"); + fprintf(file, "%%EventDef PajeStartLink 18\n"); + fprintf(file, "%% Time date\n"); + fprintf(file, "%% Type string\n"); + fprintf(file, "%% Container string\n"); + fprintf(file, "%% Value string\n"); + fprintf(file, "%% StartContainer string\n"); + fprintf(file, "%% Key string\n"); + fprintf(file, "%%EndEventDef\n"); + fprintf(file, "%%EventDef PajeEndLink 19\n"); + fprintf(file, "%% Time date\n"); + fprintf(file, "%% Type string\n"); + fprintf(file, "%% Container string\n"); + fprintf(file, "%% Value string\n"); + fprintf(file, "%% EndContainer string\n"); + fprintf(file, "%% Key string\n"); + fprintf(file, "%%EndEventDef\n"); + fprintf(file, "%%EventDef PajeSetState 20\n"); + fprintf(file, "%% Time date\n"); + fprintf(file, "%% Container string\n"); + fprintf(file, "%% Type string\n"); + fprintf(file, "%% Value string\n"); + fprintf(file, "%% Size string\n"); + fprintf(file, "%% Params string\n"); + fprintf(file, "%% Footprint string\n"); + fprintf(file, "%% Tag string\n"); + fprintf(file, "%% JobId string\n"); + fprintf(file, "%% SubmitOrder string\n"); + fprintf(file, "%% Priority string\n"); + fprintf(file, "%% GFlop string\n"); + fprintf(file, "%% X string\n"); + fprintf(file, "%% Y string\n"); + /* fprintf(file, "%% Z string\n"); */ + fprintf(file, "%% Iteration string\n"); + fprintf(file, "%% Subiteration string\n"); + fprintf(file, "%% NumaNodes string\n"); + fprintf(file, "%%EndEventDef\n"); + fprintf(file, "%%EventDef PajeSetState 21\n"); + fprintf(file, "%% Time date\n"); + fprintf(file, "%% Container string\n"); + fprintf(file, "%% Type string\n"); + fprintf(file, "%% Value string\n"); + fprintf(file, "%% Size string\n"); + fprintf(file, "%% Params string\n"); + fprintf(file, "%% Footprint string\n"); + fprintf(file, "%% Tag string\n"); + fprintf(file, "%% JobId string\n"); + fprintf(file, "%% SubmitOrder string\n"); + fprintf(file, "%%EndEventDef\n"); + if (options->memory_states) + { + fprintf(file, "%%EventDef PajeNewEvent 22\n"); + fprintf(file, "%% Time date\n"); + fprintf(file, "%% Type string\n"); + fprintf(file, "%% Container string\n"); + fprintf(file, "%% Value string\n"); + fprintf(file, "%% Handle string\n"); + fprintf(file, "%% Info string\n"); + fprintf(file, "%% Size string\n"); + fprintf(file, "%% Tid string\n"); + fprintf(file, "%%EndEventDef\n"); + } + fprintf(file, "%%EventDef PajeStartLink 23\n"); + fprintf(file, "%% Time date\n"); + fprintf(file, "%% Type string\n"); + fprintf(file, "%% Container string\n"); + fprintf(file, "%% Value string\n"); + fprintf(file, "%% StartContainer string\n"); + fprintf(file, "%% Key string\n"); + fprintf(file, "%% MPITAG string\n"); + fprintf(file, "%% MPIType string\n"); + fprintf(file, "%% Priority string\n"); + fprintf(file, "%% Handle string\n"); + fprintf(file, "%% HName string\n"); + fprintf(file, "%% X string\n"); + fprintf(file, "%% Y string\n"); + fprintf(file, "%%EndEventDef\n"); + fprintf(file, "%%EventDef PajeStartLink 24\n"); + fprintf(file, "%% Time date\n"); + fprintf(file, "%% Type string\n"); + fprintf(file, "%% Container string\n"); + fprintf(file, "%% Value string\n"); + fprintf(file, "%% StartContainer string\n"); + fprintf(file, "%% Key string\n"); + fprintf(file, "%% Handle string\n"); + fprintf(file, "%% HName string\n"); + fprintf(file, "%% X string\n"); + fprintf(file, "%% Y string\n"); + fprintf(file, "%%EndEventDef\n"); + fprintf(file, "%%EventDef PajeNewEvent 25\n"); + fprintf(file, "%% Time date\n"); + fprintf(file, "%% Type string\n"); + fprintf(file, "%% Container string\n"); + fprintf(file, "%% Value string\n"); + fprintf(file, "%% CheckpointInstance string\n"); + fprintf(file, "%% CheckpointDomain string\n"); + fprintf(file, "%%EndEventDef\n"); + fprintf(file, "%%EventDef PajeSetState 26\n"); + fprintf(file, "%% Time date\n"); + fprintf(file, "%% Container string\n"); + fprintf(file, "%% Type string\n"); + fprintf(file, "%% Value string\n"); + fprintf(file, "%% JobId string\n"); + fprintf(file, "%%EndEventDef\n"); +#endif + +#ifdef STARPU_HAVE_POTI + poti_DefineContainerType("MPIP", "0", "MPI Program"); + poti_DefineContainerType("P", "MPIP", "Program"); + poti_DefineContainerType("Mn", "P", "Memory Node"); + poti_DefineContainerType("T", "Mn", "Thread"); + poti_DefineContainerType("UT", "P", "User Thread"); + poti_DefineContainerType("Mm", "Mn", "Memory Manager"); + poti_DefineContainerType("W", "T", "Worker"); + poti_DefineContainerType("MPICt", "P", "MPI Communication Thread"); + poti_DefineContainerType("Sc", "P", "Scheduler"); + poti_DefineEventType("prog_event", "P", "program event type"); + poti_DefineEventType("pu", "P", "task push"); + poti_DefineEventType("po", "P", "task pop"); + poti_DefineEventType("register", "P", "data registration"); + poti_DefineEventType("unregister", "P", "data unregistration"); + + /* Types for the memory node */ + poti_DefineEventType("SI", "Mm", "data state invalid"); + poti_DefineEventType("SS", "Mm", "data state shared"); + poti_DefineEventType("SO", "Mm", "data state owner"); + poti_DefineEventType("WU", "Mm", "data wont use"); + poti_DefineEventType("Al", "Mm", "Allocating Start"); + poti_DefineEventType("rc", "Mm", "Request Created"); + poti_DefineEventType("AlE", "Mm", "Allocating End"); + poti_DefineEventType("Alr", "Mm", "Allocating Async Start"); + poti_DefineEventType("AlrE", "Mm", "Allocating Async End"); + poti_DefineEventType("Fe", "Mm", "Free Start"); + poti_DefineEventType("FeE", "Mm", "Free End"); + poti_DefineEventType("Wb", "Mm", "WritingBack Start"); + poti_DefineEventType("WbE", "Mm", "WritingBack End"); + poti_DefineEventType("DCo", "Mm", "DriverCopy Start"); + poti_DefineEventType("DCoE", "Mm", "DriverCopy End"); + poti_DefineEventType("DCoA", "Mm", "DriverCopyAsync Start"); + poti_DefineEventType("DCoAE", "Mm", "DriverCopyAsync End"); + poti_DefineVariableType("use", "Mm", "Used (MB)", "0 0 0"); + poti_DefineVariableType("bwi_mm", "Mm", "Bandwidth In (MB/s)", "0 0 0"); + poti_DefineVariableType("bwo_mm", "Mm", "Bandwidth Out (MB/s)", "0 0 0"); + poti_DefineStateType("MS", "Mm", "Memory Node State"); + poti_DefineEntityValue("A", "MS", "Allocating", ".4 .1 .0"); + poti_DefineEntityValue("Ar", "MS", "AllocatingReuse", ".1 .1 .8"); + poti_DefineEntityValue("F", "MS", "Freeing", ".6 .3 .0"); + poti_DefineEntityValue("W", "MS", "WritingBack", ".0 .0 .5"); + poti_DefineEntityValue("Wa", "MS", "WritingBackAsync", ".0 .0 .4"); + poti_DefineEntityValue("R", "MS", "Reclaiming", ".0 .1 .6"); + poti_DefineEntityValue("Co", "MS", "DriverCopy", ".3 .5 .1"); + poti_DefineEntityValue("CoA", "MS", "DriverCopyAsync", ".1 .3 .1"); + poti_DefineEntityValue("No", "MS", "Nothing", ".0 .0 .0"); + + /* Types for the Worker of the Memory Node */ + poti_DefineEventType("user_event", "P", "user event type"); + poti_DefineEventType("thread_event", "T", "thread event type"); + poti_DefineVariableType("gf", "W", "GFlop/s", "0 0 0"); + poti_DefineStateType("S", "T", "Thread State"); + poti_DefineEntityValue("I", "S", "Idle", ".9 .1 0"); + poti_DefineEntityValue("In", "S", "Initializing", "0.0 .7 1.0"); + poti_DefineEntityValue("D", "S", "Deinitializing", "0.0 .1 .7"); + poti_DefineEntityValue("Fi", "S", "FetchingInput", "1.0 .1 1.0"); + poti_DefineEntityValue("Po", "S", "PushingOutput", "0.1 1.0 1.0"); + poti_DefineEntityValue("C", "S", "Callback", ".0 .3 .8"); + poti_DefineEntityValue("B", "S", "Overhead", ".5 .18 .0"); + poti_DefineEntityValue("Ps", "S", "Parallel sync", ".5 .18 1.0"); + poti_DefineEntityValue("E", "S", "Executing", ".0 .6 .5"); + poti_DefineEntityValue("Sc", "S", "Scheduling", ".7 .36 .0"); + poti_DefineEntityValue("Sl", "S", "Sleeping", ".9 .1 .0"); + poti_DefineEntityValue("P", "S", "Progressing", ".1 .3 .1"); + poti_DefineEntityValue("U", "S", "Unpartitioning", ".0 .0 1.0"); + poti_DefineEntityValue("H", "S", "Hypervisor", ".5 .18 .0"); + poti_DefineEntityValue("Bu", "S", "Building task", ".5 .18 .0"); + poti_DefineEntityValue("Su", "S", "Submitting task", ".3 .09 .0"); + poti_DefineEntityValue("Th", "S", "Throttling task submission", ".8 .6 .6"); + poti_DefineEntityValue("MD", "S", "Decoding task for MPI", ".5 .18 .2"); + poti_DefineEntityValue("MPr", "S", "Preparing task for MPI", ".4 .14 .2"); + poti_DefineEntityValue("MPo", "S", "Post-processing task for MPI", ".3 .09 .2"); + poti_DefineStateType("WS", "W", "Worker State"); + poti_DefineEntityValue("I", "WS", "Idle", ".9 .1 .0"); + poti_DefineEntityValue("In", "WS", "Initializing", "0.0 .7 1.0"); + poti_DefineEntityValue("D", "WS", "Deinitializing", "0.0 .1 .7"); + poti_DefineEntityValue("Fi", "WS", "FetchingInput", "1.0 .1 1.0"); + poti_DefineEntityValue("Po", "WS", "PushingOutput", "0.1 1.0 1.0"); + poti_DefineEntityValue("C", "WS", "Callback", ".0 .3 .8"); + poti_DefineEntityValue("B", "WS", "Overhead", ".5 .18 .0"); + poti_DefineEntityValue("Ps", "WS", "Parallel sync", ".5 .18 1.0"); + poti_DefineEntityValue("E", "WS", "Executing", ".0 .6 .5"); + poti_DefineEntityValue("Sc", "WS", "Scheduling", ".7 .36 .0"); + poti_DefineEntityValue("Sl", "WS", "Sleeping", ".9 .1 .0"); + poti_DefineEntityValue("P", "WS", "Progressing", ".1 .3 .1"); + poti_DefineEntityValue("U", "WS", "Unpartitioning", ".0 .0 1.0"); + poti_DefineEntityValue("H", "WS", "Hypervisor", ".5 .18 .0"); + poti_DefineEntityValue("Bu", "WS", "Building task", ".5 .18 .0"); + poti_DefineEntityValue("Su", "WS", "Submitting task", ".3 .09 .0"); + poti_DefineEntityValue("Th", "WS", "Throttling task submission", ".8 .6 .6"); + + /* Types for the MPI Communication Thread of the Memory Node */ + poti_DefineEventType("MPIev", "MPICt", "MPI event type"); + poti_DefineVariableType("bwi_mpi", "MPICt", "Bandwidth In (MB/s)", "0 0 0"); + poti_DefineVariableType("bwo_mpi", "MPICt", "Bandwidth Out (MB/s)", "0 0 0"); + poti_DefineStateType("CtS", "MPICt", "Communication Thread State"); + poti_DefineEntityValue("P", "CtS", "Processing", "0 0 0"); + poti_DefineEntityValue("Pl", "CtS", "Polling", "1.0 .5 0"); + poti_DefineEntityValue("Dr", "CtS", "DriverRun", ".1 .1 1.0"); + poti_DefineEntityValue("Sl", "CtS", "Sleeping", ".9 .1 .0"); + poti_DefineEntityValue("UT", "CtS", "UserTesting", ".2 .1 .6"); + poti_DefineEntityValue("UW", "CtS", "UserWaiting", ".4 .1 .3"); + poti_DefineEntityValue("SdS", "CtS", "SendSubmitted", "1.0 .1 1.0"); + poti_DefineEntityValue("RvS", "CtS", "ReceiveSubmitted", "0.1 1.0 1.0"); + poti_DefineEntityValue("SdC", "CtS", "SendCompleted", "1.0 .5 1.0"); + poti_DefineEntityValue("RvC", "CtS", "ReceiveCompleted", "0.5 1.0 1.0"); + poti_DefineEntityValue("TD", "CtS", "Testing Detached", ".0 .0 .6"); + poti_DefineEntityValue("MT", "CtS", "MPI Test", ".0 .0 .8"); + poti_DefineEntityValue("Bu", "CtS", "Building task", ".5 .18 .0"); + poti_DefineEntityValue("Su", "CtS", "Submitting task", ".3 .09 .0"); + poti_DefineEntityValue("Th", "CtS", "Throttling task submission", ".8 .6 .6"); + poti_DefineEntityValue("C", "CtS", "Callback", ".0 .3 .8"); + + /* Type for other threads */ + poti_DefineEventType("user_user_event", "UT", "user event type"); + poti_DefineEventType("user_thread_event", "UT", "thread event type"); + poti_DefineStateType("US", "UT", "User Thread State"); + poti_DefineEntityValue("Bu", "US", "Building task", ".5 .18 .0"); + poti_DefineEntityValue("Su", "US", "Submitting task", ".3 .09 .0"); + poti_DefineEntityValue("C", "US", "Callback", ".0 .3 .8"); + poti_DefineEntityValue("Sc", "US", "Scheduling", ".7 .36 .0"); + poti_DefineEntityValue("Th", "US", "Throttling task submission", ".8 .6 .6"); + poti_DefineEntityValue("MD", "US", "Decoding task for MPI", ".5 .18 .2"); + poti_DefineEntityValue("MPr", "US", "Preparing task for MPI", ".4 .14 .2"); + poti_DefineEntityValue("MPo", "US", "Post-processing task for MPI", ".3 .09 .2"); + poti_DefineEntityValue("W", "US", "Waiting task", ".9 .1 .0"); + poti_DefineEntityValue("WA", "US", "Waiting all tasks", ".9 .1 .0"); + poti_DefineEntityValue("No", "US", "Nothing", ".0 .0 .0"); + + for (i=1; i +#include +#if defined(_WIN32) && !defined(__MINGW32__) && !defined(__CYGWIN__) +#include +#endif + +int main(int argc, char *argv[]) +{ + char *prog, *arch, *def, *effective_version, *version, *lib; + char s[1024]; + char name[64]; + int current, age, revision; + + if (argc != 7) + { + fprintf(stderr, "[dolib] bad number of arguments, expected %d, got %d\n", 7, argc); + exit(EXIT_FAILURE); + } + + prog = argv[1]; + arch = argv[2]; + def = argv[3]; + effective_version = argv[4]; + version = argv[5]; + lib = argv[6]; + + if (sscanf(version, "%d:%d:%d", ¤t, &revision, &age) != 3) + { + fprintf(stderr, "version not formatted as current:revision:age (%s)\n", version); + exit(EXIT_FAILURE); + } + + _snprintf(name, sizeof(name), "libstarpu-%s-%d", effective_version, current - age); + name[sizeof(name) - 1] = '\0'; + fprintf(stdout, "[dolib] using soname '%s'\n", name); + + _snprintf(s, sizeof(s), "\"%s\" /machine:%s /def:%s /name:%s /out:%s", prog, arch, def, name, lib); + s[sizeof(s) - 1] = '\0'; + if (system(s)) + { + fprintf(stderr, "%s failed\n", s); + exit(EXIT_FAILURE); + } + + exit(EXIT_SUCCESS); +} diff --git a/src/drivers/cpu/driver_cpu.c b/src/drivers/cpu/driver_cpu.c new file mode 100644 index 0000000..0312a40 --- /dev/null +++ b/src/drivers/cpu/driver_cpu.c @@ -0,0 +1,779 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2022,2023 École de Technologie Supérieure (ETS, Montréal) + * Copyright (C) 2013-2013 Thibaut Lambert + * Copyright (C) 2011-2011 Télécom Sud Paris + * Copyright (C) 2010-2010 Mehdi Juhoor + * Copyright (C) 2020,2021 Federal University of Rio Grande do Sul (UFRGS) + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef STARPU_HAVE_HWLOC +#include +#ifndef HWLOC_API_VERSION +#define HWLOC_OBJ_PU HWLOC_OBJ_PROC +#endif +#if HWLOC_API_VERSION < 0x00010b00 +#define HWLOC_OBJ_NUMANODE HWLOC_OBJ_NODE +#endif +#endif + +#ifdef STARPU_HAVE_WINDOWS +#include +#endif + +static unsigned already_busy_cpus; + +static void _starpu_cpu_init_worker_binding(struct _starpu_machine_config *config, int no_mp_config STARPU_ATTRIBUTE_UNUSED, struct _starpu_worker *workerarg); +static void _starpu_cpu_init_worker_memory(struct _starpu_machine_config *config, int no_mp_config STARPU_ATTRIBUTE_UNUSED, struct _starpu_worker *workerarg); + +static void *_starpu_cpu_worker(void *); + +static struct _starpu_driver_info driver_info = +{ + .name_upper = "CPU", + .name_var = "CPU", + .name_lower = "cpu", + .memory_kind = STARPU_CPU_RAM, + .alpha = 0.5f, + .wait_for_worker_initialization = 1, +#ifdef STARPU_USE_CPU + .driver_ops = &_starpu_driver_cpu_ops, + .run_worker = _starpu_cpu_worker, +#endif + .init_worker_binding = _starpu_cpu_init_worker_binding, + .init_worker_memory = _starpu_cpu_init_worker_memory, +}; + +static struct _starpu_node_ops _starpu_driver_cpu_node_ops; +static struct _starpu_memory_driver_info memory_driver_info = +{ + .name_upper = "NUMA", + .worker_archtype = STARPU_CPU_WORKER, + .ops = &_starpu_driver_cpu_node_ops, +}; + +/* Early library initialization, before anything else, just initialize data */ +void _starpu_cpu_preinit(void) +{ + _starpu_driver_info_register(STARPU_CPU_WORKER, &driver_info); + _starpu_memory_driver_info_register(STARPU_CPU_RAM, &memory_driver_info); + already_busy_cpus = 0; +} + +void _starpu_cpu_busy_cpu(unsigned num) +{ + already_busy_cpus += num; +} + +#if defined(STARPU_USE_CPU) || defined(STARPU_SIMGRID) +/* Determine which devices we will use */ +void _starpu_init_cpu_config(struct _starpu_machine_topology *topology, struct _starpu_machine_config *config) +{ + int ncpu = config->conf.ncpus; + + if (ncpu != 0) + { + STARPU_ASSERT_MSG(ncpu >= -1, "ncpus can not be negative and different from -1 (is is %d)", ncpu); + + int nhyperthreads = topology->nhwpus / topology->nhwworker[STARPU_CPU_WORKER][0]; + long avail_cpus = (long) (topology->nusedpus / nhyperthreads) - (long) already_busy_cpus; + if (avail_cpus < 0) + avail_cpus = 0; + int nth_per_core = starpu_getenv_number_default("STARPU_NTHREADS_PER_CORE", 1); + avail_cpus *= nth_per_core; + + _starpu_topology_check_ndevices(&ncpu, avail_cpus, 1, STARPU_MAXCPUS, config->conf.reserve_ncpus, "ncpus", "CPU cores", "maxcpus"); + } + + topology->ndevices[STARPU_CPU_WORKER] = 1; + unsigned homogeneous = starpu_getenv_number_default("STARPU_PERF_MODEL_HOMOGENEOUS_CPU", 1); + + _starpu_topology_configure_workers(topology, config, + STARPU_CPU_WORKER, + 0, 0, homogeneous, 1, + ncpu, 1, NULL, NULL); +} +#endif + +/* Bind the driver on a CPU core */ +static void _starpu_cpu_init_worker_binding(struct _starpu_machine_config *config STARPU_ATTRIBUTE_UNUSED, int no_mp_config STARPU_ATTRIBUTE_UNUSED, struct _starpu_worker *workerarg) +{ + /* Dedicate a cpu core to that worker */ + workerarg->bindid = _starpu_get_next_bindid(config, STARPU_THREAD_ACTIVE, NULL, 0);; +} + +/* Set up memory and buses */ +static void _starpu_cpu_init_worker_memory(struct _starpu_machine_config *config STARPU_ATTRIBUTE_UNUSED, int no_mp_config STARPU_ATTRIBUTE_UNUSED, struct _starpu_worker *workerarg) +{ + unsigned memory_node = -1; + int numa_logical_id = _starpu_get_logical_numa_node_worker(workerarg->workerid); + int numa_starpu_id = starpu_memory_nodes_numa_hwloclogid_to_id(numa_logical_id); + if (numa_starpu_id < 0 || numa_starpu_id >= STARPU_MAXNUMANODES) + numa_starpu_id = STARPU_MAIN_RAM; + +#if defined(STARPU_HAVE_HWLOC) && !defined(STARPU_SIMGRID) + hwloc_obj_t pu_obj = hwloc_get_obj_by_type(config->topology.hwtopology, HWLOC_OBJ_PU, workerarg->bindid); + struct _starpu_hwloc_userdata *userdata = pu_obj->userdata; + userdata->pu_worker = workerarg; +#endif + + workerarg->numa_memory_node = memory_node = numa_starpu_id; + + _starpu_memory_node_add_nworkers(memory_node); + + _starpu_worker_drives_memory_node(workerarg, numa_starpu_id); + + workerarg->memory_node = memory_node; +} + +#ifdef STARPU_USE_CPU +/* This is run from the driver thread to initialize the driver CUDA context */ +static int _starpu_cpu_driver_init(struct _starpu_worker *cpu_worker) +{ + int devid = cpu_worker->devid; + +#ifdef STARPU_PROF_TOOL + struct starpu_prof_tool_info pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_driver_init, devid, cpu_worker->workerid, starpu_prof_tool_driver_cpu, -1, NULL); + starpu_prof_tool_callbacks.starpu_prof_tool_event_driver_init(&pi, NULL, NULL); + + pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_driver_init_start, devid, cpu_worker->workerid, starpu_prof_tool_driver_cpu, -1, NULL); + starpu_prof_tool_callbacks.starpu_prof_tool_event_driver_init_start(&pi, NULL, NULL); +#endif + + _starpu_driver_start(cpu_worker, STARPU_CPU_WORKER, 1); + snprintf(cpu_worker->name, sizeof(cpu_worker->name), "CPU %d", devid); + snprintf(cpu_worker->short_name, sizeof(cpu_worker->short_name), "CPU %d", devid); + starpu_pthread_setname(cpu_worker->short_name); + + _STARPU_TRACE_WORKER_INIT_END(cpu_worker->workerid); + + STARPU_PTHREAD_MUTEX_LOCK_SCHED(&cpu_worker->sched_mutex); + cpu_worker->status = STATUS_UNKNOWN; + STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&cpu_worker->sched_mutex); + + /* tell the main thread that we are ready */ + STARPU_PTHREAD_MUTEX_LOCK(&cpu_worker->mutex); + cpu_worker->worker_is_initialized = 1; + STARPU_PTHREAD_COND_SIGNAL(&cpu_worker->ready_cond); + STARPU_PTHREAD_MUTEX_UNLOCK(&cpu_worker->mutex); + +#ifdef STARPU_PROF_TOOL + pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_driver_init_end, devid, cpu_worker->workerid, starpu_prof_tool_driver_cpu, -1, NULL); + starpu_prof_tool_callbacks.starpu_prof_tool_event_driver_init_end(&pi, NULL, NULL); +#endif + return 0; +} + +static int _starpu_cpu_driver_deinit(struct _starpu_worker *cpu_worker) +{ + _STARPU_TRACE_WORKER_DEINIT_START; + + unsigned memnode = cpu_worker->memory_node; + _starpu_datawizard_handle_all_pending_node_data_requests(memnode); + + /* In case there remains some memory that was automatically + * allocated by StarPU, we release it now. Note that data + * coherency is not maintained anymore at that point ! */ + _starpu_free_all_automatically_allocated_buffers(memnode); + + cpu_worker->worker_is_initialized = 0; + _STARPU_TRACE_WORKER_DEINIT_END(STARPU_CPU_WORKER); + +#ifdef STARPU_PROF_TOOL + int workerid = cpu_worker->workerid; + struct starpu_prof_tool_info pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_driver_deinit, workerid, workerid, starpu_prof_tool_driver_cpu, memnode, NULL); + starpu_prof_tool_callbacks.starpu_prof_tool_event_driver_deinit(&pi, NULL, NULL); +#endif + + return 0; +} +#endif /* STARPU_USE_CPU */ + +static uintptr_t _starpu_cpu_malloc_on_node(unsigned dst_node, size_t size, int flags) +{ + uintptr_t addr = 0; + _starpu_malloc_flags_on_node(dst_node, (void**) &addr, size, +#if defined(STARPU_USE_CUDA) && !defined(STARPU_HAVE_CUDA_MEMCPY_PEER) && !defined(STARPU_SIMGRID) + /* without memcpy_peer, we can not + * allocated pinned memory, since it + * requires waiting for a task, and we + * may be called with a spinlock held + */ + flags & ~STARPU_MALLOC_PINNED +#else + flags +#endif + ); + return addr; +} + +static void _starpu_cpu_free_on_node(unsigned dst_node, uintptr_t addr, size_t size, int flags) +{ + _starpu_free_flags_on_node(dst_node, (void*)addr, size, +#if defined(STARPU_USE_CUDA) && !defined(STARPU_HAVE_CUDA_MEMCPY_PEER) && !defined(STARPU_SIMGRID) + flags & ~STARPU_MALLOC_PINNED +#else + flags +#endif + ); +} + +static int _starpu_cpu_copy_interface(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req) +{ + int src_kind = starpu_node_get_kind(src_node); + int dst_kind = starpu_node_get_kind(dst_node); + STARPU_ASSERT(src_kind == STARPU_CPU_RAM && dst_kind == STARPU_CPU_RAM); + + int ret = 0; + const struct starpu_data_copy_methods *copy_methods = handle->ops->copy_methods; + if (copy_methods->ram_to_ram) + copy_methods->ram_to_ram(src_interface, src_node, dst_interface, dst_node); + else + { + STARPU_ASSERT_MSG(copy_methods->any_to_any, "the interface '%s' does define neither ram_to_ram nor any_to_any copy method", handle->ops->name); + copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, req ? &req->async_channel : NULL); + } + return ret; +} + +static int _starpu_cpu_copy_data(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel) +{ + int src_kind = starpu_node_get_kind(src_node); + int dst_kind = starpu_node_get_kind(dst_node); + STARPU_ASSERT(src_kind == STARPU_CPU_RAM && dst_kind == STARPU_CPU_RAM); + + (void) async_channel; + + memcpy((void *) (dst + dst_offset), (void *) (src + src_offset), size); + return 0; +} + +static int _starpu_cpu_is_direct_access_supported(unsigned node, unsigned handling_node) +{ + (void) node; + (void) handling_node; + return 1; +} + +static uintptr_t _starpu_cpu_map(uintptr_t src, size_t src_offset, unsigned src_node, unsigned dst_node, size_t size, int *ret) +{ + (void) src_node; + (void) dst_node; + (void) size; + + *ret = 0; + return src + src_offset; +} + +static int _starpu_cpu_unmap(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, unsigned dst_node, size_t size) +{ + (void) src; + (void) src_offset; + (void) src_node; + (void) dst; + (void) dst_node; + (void) size; + + return 0; +} + +static int _starpu_cpu_update_map(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size) +{ + (void) src; + (void) src_offset; + (void) src_node; + (void) dst; + (void) dst_offset; + (void) dst_node; + (void) size; + + /* Memory mappings are cache-coherent */ + return 0; +} + +#ifdef STARPU_USE_CPU +/* Actually launch the job on a cpu worker. + * Handle binding CPUs on cores. + * In the case of a combined worker WORKER_TASK != J->TASK */ + +static int execute_job_on_cpu(struct _starpu_job *j, struct starpu_task *worker_task, struct _starpu_worker *cpu_args, int rank, struct starpu_perfmodel_arch* perf_arch) +{ + int is_parallel_task = (j->task_size > 1); + int profiling = starpu_profiling_status_get(); + struct starpu_task *task = j->task; + struct starpu_codelet *cl = task->cl; +#ifdef STARPU_PROF_TOOL + struct starpu_prof_tool_info pi; + int devid = cpu_args->devid; +#endif + + STARPU_ASSERT(cl); + + if (is_parallel_task) + { + STARPU_PTHREAD_BARRIER_WAIT(&j->before_work_barrier); + + /* In the case of a combined worker, the scheduler needs to know + * when each actual worker begins the execution */ + _starpu_sched_pre_exec_hook(worker_task); + } + + /* Give profiling variable */ + _starpu_driver_start_job(cpu_args, j, perf_arch, rank, profiling); + + _starpu_cl_func_t func = _starpu_task_get_cpu_nth_implementation(cl, j->nimpl); + + /* In case this is a Fork-join parallel task, the worker does not + * execute the kernel at all. */ + if ((rank == 0) || (cl->type != STARPU_FORKJOIN)) + { + if (is_parallel_task && cl->type == STARPU_FORKJOIN) + /* bind to parallel worker */ + _starpu_bind_thread_on_cpus(_starpu_get_combined_worker_struct(j->combined_workerid)); + STARPU_ASSERT_MSG(func, "when STARPU_CPU is defined in 'where', cpu_func or cpu_funcs has to be defined"); + if (_starpu_get_disable_kernels() <= 0) + { + _STARPU_TRACE_START_EXECUTING(j); +#ifdef STARPU_PROF_TOOL + pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_start_cpu_exec, devid, worker_task->workerid, starpu_prof_tool_driver_cpu, -1, (void*)func); + starpu_prof_tool_callbacks.starpu_prof_tool_event_start_cpu_exec(&pi, NULL, NULL); +#endif +#ifdef STARPU_SIMGRID + if (cl->flags & STARPU_CODELET_SIMGRID_EXECUTE) + func(_STARPU_TASK_GET_INTERFACES(task), task->cl_arg); + else if (cl->flags & STARPU_CODELET_SIMGRID_EXECUTE_AND_INJECT) + { + _SIMGRID_TIMER_BEGIN(1); + func(_STARPU_TASK_GET_INTERFACES(task), task->cl_arg); + _SIMGRID_TIMER_END; + } + else + { + struct _starpu_sched_ctx *sched_ctx = _starpu_sched_ctx_get_sched_ctx_for_worker_and_job(cpu_args, j); + _starpu_simgrid_submit_job(cpu_args->workerid, sched_ctx->id, j, perf_arch, NAN, NAN, NULL); + } +#else +#ifdef STARPU_PAPI + if (rank == 0) + _starpu_profiling_papi_task_start_counters(task); +#endif + func(_STARPU_TASK_GET_INTERFACES(task), task->cl_arg); +#ifdef STARPU_PAPI + if (rank == 0) + _starpu_profiling_papi_task_stop_counters(task); +#endif +#endif +#ifdef STARPU_PROF_TOOL + pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_end_cpu_exec, devid, worker_task->workerid, starpu_prof_tool_driver_cpu, -1, (void*)func); + starpu_prof_tool_callbacks.starpu_prof_tool_event_end_cpu_exec(&pi, NULL, NULL); +#endif + _STARPU_TRACE_END_EXECUTING(j); + } + if (is_parallel_task && cl->type == STARPU_FORKJOIN) + /* rebind to single CPU */ + _starpu_bind_thread_on_cpu(cpu_args->bindid, cpu_args->workerid, NULL); + } + else + { +#ifdef STARPU_PROF_TOOL + pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_end_cpu_exec, devid, worker_task->workerid, starpu_prof_tool_driver_cpu, -1, (void*)func); + starpu_prof_tool_callbacks.starpu_prof_tool_event_end_cpu_exec(&pi, NULL, NULL); +#endif + _STARPU_TRACE_START_EXECUTING(j); + } + + if (is_parallel_task) + { + _STARPU_TRACE_START_PARALLEL_SYNC(j); + STARPU_PTHREAD_BARRIER_WAIT(&j->after_work_barrier); + _STARPU_TRACE_END_PARALLEL_SYNC(j); + if (rank != 0) + { +#ifdef STARPU_PROF_TOOL + pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_end_cpu_exec, devid, worker_task->workerid, starpu_prof_tool_driver_cpu, -1, (void*)func); + starpu_prof_tool_callbacks.starpu_prof_tool_event_end_cpu_exec(&pi, NULL, NULL); +#endif + _STARPU_TRACE_END_EXECUTING(j); + } + } + + _starpu_driver_end_job(cpu_args, j, perf_arch, rank, profiling); + + if (is_parallel_task) + { +#ifdef STARPU_SIMGRID + if (rank == 0) + { + /* Wait for other threads to exit barrier_wait so we + * can safely drop the job structure */ + starpu_sleep(0.0000001); + j->after_work_busy_barrier = 0; + } +#else + ANNOTATE_HAPPENS_BEFORE(&j->after_work_busy_barrier); + (void) STARPU_ATOMIC_ADD(&j->after_work_busy_barrier, -1); + if (rank == 0) + { + /* Wait with a busy barrier for other workers to have + * finished with the blocking barrier before we can + * safely drop the job structure */ + while (j->after_work_busy_barrier > 0) + { + STARPU_UYIELD(); + STARPU_SYNCHRONIZE(); + } + ANNOTATE_HAPPENS_AFTER(&j->after_work_busy_barrier); + } +#endif + } + + if (rank == 0) + { + _starpu_driver_update_job_feedback(j, cpu_args, perf_arch, profiling); +#ifdef STARPU_OPENMP + if (!j->continuation) +#endif + { + _starpu_push_task_output(j); + } + } + + return 0; +} + +static int _starpu_cpu_driver_execute_task(struct _starpu_worker *cpu_worker, struct starpu_task *task, struct _starpu_job *j) +{ + int res; + + int rank; + int is_parallel_task = (j->task_size > 1); + + struct starpu_perfmodel_arch* perf_arch; + + rank = cpu_worker->current_rank; + + /* Get the rank in case it is a parallel task */ + if (is_parallel_task) + { + if(j->combined_workerid != -1) + { + struct _starpu_combined_worker *combined_worker; + combined_worker = _starpu_get_combined_worker_struct(j->combined_workerid); + + cpu_worker->combined_workerid = j->combined_workerid; + cpu_worker->worker_size = combined_worker->worker_size; + perf_arch = &combined_worker->perf_arch; + } + else + { + struct _starpu_sched_ctx *sched_ctx = _starpu_sched_ctx_get_sched_ctx_for_worker_and_job(cpu_worker, j); + STARPU_ASSERT_MSG(sched_ctx != NULL, "there should be a worker %d in the ctx of this job \n", cpu_worker->workerid); + + perf_arch = &sched_ctx->perf_arch; + } + } + else + { + cpu_worker->combined_workerid = cpu_worker->workerid; + cpu_worker->worker_size = 1; + + struct _starpu_sched_ctx *sched_ctx = _starpu_sched_ctx_get_sched_ctx_for_worker_and_job(cpu_worker, j); + if (sched_ctx && !sched_ctx->sched_policy && !sched_ctx->awake_workers && sched_ctx->main_master == cpu_worker->workerid) + perf_arch = &sched_ctx->perf_arch; + else + perf_arch = &cpu_worker->perf_arch; + } + + _starpu_set_current_task(j->task); + cpu_worker->current_task = j->task; + j->workerid = cpu_worker->workerid; + +#ifdef STARPU_BUBBLE_VERBOSE + struct timespec tp; + clock_gettime(CLOCK_MONOTONIC, &tp); + unsigned long long timestamp = 1000000000ULL*tp.tv_sec + tp.tv_nsec; + _STARPU_DEBUG("{%llu} [%s(%p)]\n", timestamp, starpu_task_get_name(task), task); +#endif + res = execute_job_on_cpu(j, task, cpu_worker, rank, perf_arch); + + _starpu_set_current_task(NULL); + cpu_worker->current_task = NULL; + + if (res) + { + switch (res) + { + case -EAGAIN: + _starpu_push_task_to_workers(task); + return 0; + default: + STARPU_ABORT(); + } + } + + /* In the case of combined workers, we need to inform the + * scheduler each worker's execution is over. + * Then we free the workers' task alias */ + if (is_parallel_task) + { + _starpu_sched_post_exec_hook(task); + free(task); + } + + if (rank == 0) + _starpu_handle_job_termination(j); + return 0; +} + +/* One iteration of the main driver loop */ +static int _starpu_cpu_driver_run_once(struct _starpu_worker *cpu_worker) +{ + unsigned memnode = cpu_worker->memory_node; + int workerid = cpu_worker->workerid; +#ifdef STARPU_PROF_TOOL + struct starpu_prof_tool_info pi; +#endif + int res; + struct _starpu_job *j; + struct starpu_task *task = NULL, *pending_task; + int rank = 0; + +#ifdef STARPU_SIMGRID + starpu_pthread_wait_reset(&cpu_worker->wait); +#endif + + /* Test if async transfers are completed */ + pending_task = cpu_worker->task_transferring; + if (pending_task != NULL && cpu_worker->nb_buffers_transferred == cpu_worker->nb_buffers_totransfer) + { + int ret; + STARPU_RMB(); + _STARPU_TRACE_END_PROGRESS(memnode); +#ifdef STARPU_PROF_TOOL + pi = _starpu_prof_tool_get_info_d(starpu_prof_tool_event_end_transfer, workerid, workerid, starpu_prof_tool_driver_cpu, memnode, cpu_worker->nb_buffers_totransfer, cpu_worker->nb_buffers_transferred); + starpu_prof_tool_callbacks.starpu_prof_tool_event_end_transfer(&pi, NULL, NULL); +#endif + j = _starpu_get_job_associated_to_task(pending_task); + + _starpu_fetch_task_input_tail(j->task, j, cpu_worker); + /* Reset it */ + cpu_worker->task_transferring = NULL; + + ret = _starpu_cpu_driver_execute_task(cpu_worker, pending_task, j); + _STARPU_TRACE_START_PROGRESS(memnode); +#ifdef STARPU_PROF_TOOL + pi = _starpu_prof_tool_get_info_d(starpu_prof_tool_event_start_transfer, workerid, workerid, starpu_prof_tool_driver_cpu, memnode, cpu_worker->nb_buffers_totransfer, cpu_worker->nb_buffers_transferred); + starpu_prof_tool_callbacks.starpu_prof_tool_event_start_transfer(&pi, NULL, NULL); +#endif + return ret; + } + + res = __starpu_datawizard_progress(_STARPU_DATAWIZARD_DO_ALLOC, 1); + + if (!pending_task) + task = _starpu_get_worker_task(cpu_worker, workerid, memnode); + +#ifdef STARPU_SIMGRID +#ifndef STARPU_OPENMP + if (!res && !task) + /* No progress, wait */ + starpu_pthread_wait_wait(&cpu_worker->wait); +#else +#if SIMGRID_VERSION >= 31800 + if (!res && !task) + { + /* No progress, wait (but at most 1s for OpenMP support) */ + /* TODO: ideally, make OpenMP wake worker when run_once should return */ + struct timespec abstime; + _starpu_clock_gettime(&abstime); + abstime.tv_sec++; + starpu_pthread_wait_timedwait(&cpu_worker->wait, &abstime); + } +#else + /* Previous simgrid versions don't really permit to use wait_timedwait in C */ + starpu_sleep(0.001); +#endif +#endif +#endif + + if (!task) + { + /* No task or task still pending transfers */ + _starpu_execute_registered_idle_hooks(); + return 0; + } + + j = _starpu_get_job_associated_to_task(task); + /* NOTE: j->task is != task for parallel tasks, which share the same + * job. */ + + /* can a cpu perform that task ? */ + if (!_STARPU_MAY_PERFORM(j, CPU)) + { + /* put it and the end of the queue ... XXX */ + _starpu_push_task_to_workers(task); + return 0; + } + +#ifdef STARPU_PROF_TOOL + pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_end_transfer, workerid, workerid, starpu_prof_tool_driver_cpu, memnode, NULL); + starpu_prof_tool_callbacks.starpu_prof_tool_event_end_transfer(&pi, NULL, NULL); +#endif + _STARPU_TRACE_END_PROGRESS(memnode); + /* Get the rank in case it is a parallel task */ + if (j->task_size > 1) + { + STARPU_PTHREAD_MUTEX_LOCK(&j->sync_mutex); + rank = j->active_task_alias_count++; + STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex); + } + else + { + rank = 0; + } + cpu_worker->current_rank = rank; + +#ifdef STARPU_OPENMP + /* At this point, j->continuation as been cleared as the task is being + * woken up, thus we use j->discontinuous instead for the check */ + const unsigned continuation_wake_up = j->discontinuous; +#else + const unsigned continuation_wake_up = 0; +#endif + if (rank == 0 && !continuation_wake_up) + { + res = _starpu_fetch_task_input(task, j, 1); + STARPU_ASSERT(res == 0); + } + else + { + int ret = _starpu_cpu_driver_execute_task(cpu_worker, task, j); +#ifdef STARPU_PROF_TOOL + pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_end_transfer, workerid, cpu_worker->workerid, starpu_prof_tool_driver_cpu, memnode, NULL); + starpu_prof_tool_callbacks.starpu_prof_tool_event_end_transfer(&pi, NULL, NULL); +#endif + _STARPU_TRACE_END_PROGRESS(memnode); + return ret; + } +#ifdef STARPU_PROF_TOOL + pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_end_transfer, workerid, cpu_worker->workerid, starpu_prof_tool_driver_cpu, memnode, NULL); + starpu_prof_tool_callbacks.starpu_prof_tool_event_end_transfer(&pi, NULL, NULL); +#endif + _STARPU_TRACE_END_PROGRESS(memnode); + return 0; +} + +static void *_starpu_cpu_worker(void *arg) +{ + struct _starpu_worker *worker = arg; + + _starpu_cpu_driver_init(worker); + _STARPU_TRACE_START_PROGRESS(worker->memory_node); +#ifdef STARPU_PROF_TOOL + struct starpu_prof_tool_info pi; + pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_start_transfer, worker->workerid, worker->workerid, starpu_prof_tool_driver_cpu, worker->memory_node, NULL); + starpu_prof_tool_callbacks.starpu_prof_tool_event_start_transfer(&pi, NULL, NULL); +#endif + while (_starpu_machine_is_running()) + { + _starpu_may_pause(); + _starpu_cpu_driver_run_once(worker); + } + _STARPU_TRACE_END_PROGRESS(worker->memory_node); +#ifdef STARPU_PROF_TOOL + pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_end_transfer, worker->workerid, worker->workerid, starpu_prof_tool_driver_cpu, worker->memory_node, NULL); + starpu_prof_tool_callbacks.starpu_prof_tool_event_end_transfer(&pi, NULL, NULL); +#endif + _starpu_cpu_driver_deinit(worker); + + return NULL; +} + +static int _starpu_cpu_driver_run(struct _starpu_worker *worker) +{ + _starpu_cpu_worker(worker); + + return 0; +} + +static int _starpu_cpu_driver_set_devid(struct starpu_driver *driver, struct _starpu_worker *worker) +{ + driver->id.cpu_id = worker->devid; + + return 0; +} + +static int _starpu_cpu_driver_is_devid(struct starpu_driver *driver, struct _starpu_worker *worker) +{ + return driver->id.cpu_id == worker->devid; +} + +struct _starpu_driver_ops _starpu_driver_cpu_ops = +{ + .init = _starpu_cpu_driver_init, + .run = _starpu_cpu_driver_run, + .run_once = _starpu_cpu_driver_run_once, + .deinit = _starpu_cpu_driver_deinit, + .set_devid = _starpu_cpu_driver_set_devid, + .is_devid = _starpu_cpu_driver_is_devid, +}; +#endif /* STARPU_USE_CPU */ + +static struct _starpu_node_ops _starpu_driver_cpu_node_ops = +{ + .name = "cpu driver", + + .malloc_on_node = _starpu_cpu_malloc_on_node, + .free_on_node = _starpu_cpu_free_on_node, + + .is_direct_access_supported = _starpu_cpu_is_direct_access_supported, + + .copy_interface_to[STARPU_CPU_RAM] = _starpu_cpu_copy_interface, + + .copy_data_to[STARPU_CPU_RAM] = _starpu_cpu_copy_data, + + .map[STARPU_CPU_RAM] = _starpu_cpu_map, + .unmap[STARPU_CPU_RAM] = _starpu_cpu_unmap, + .update_map[STARPU_CPU_RAM] = _starpu_cpu_update_map, +}; diff --git a/src/drivers/cpu/driver_cpu.h b/src/drivers/cpu/driver_cpu.h new file mode 100644 index 0000000..a6c6e11 --- /dev/null +++ b/src/drivers/cpu/driver_cpu.h @@ -0,0 +1,38 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __DRIVER_CPU_H__ +#define __DRIVER_CPU_H__ + +/** @file */ + +#include +#include + +#pragma GCC visibility push(hidden) + +void _starpu_cpu_preinit(void); + +extern struct _starpu_driver_ops _starpu_driver_cpu_ops; + +/* Reserve one CPU core as busy for starting a driver thread */ +void _starpu_cpu_busy_cpu(unsigned num); + +void _starpu_init_cpu_config(struct _starpu_machine_topology *topology, struct _starpu_machine_config *config); + +#pragma GCC visibility pop + +#endif // __DRIVER_CPU_H__ diff --git a/src/drivers/cuda/driver_cuda.c b/src/drivers/cuda/driver_cuda.c new file mode 100644 index 0000000..ea3787c --- /dev/null +++ b/src/drivers/cuda/driver_cuda.c @@ -0,0 +1,2615 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2022,2023 École de Technologie Supérieure (ETS, Montréal) + * Copyright (C) 2016-2016 Uppsala University + * Copyright (C) 2013-2013 Thibaut Lambert + * Copyright (C) 2011-2011 Télécom Sud Paris + * Copyright (C) 2010-2010 Mehdi Juhoor + * Copyright (C) 2021-2021 Federal University of Rio Grande do Sul (UFRGS) + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef HAVE_CUDA_GL_INTEROP_H +#include +#endif +#ifdef STARPU_HAVE_NVML_H +#include +#endif +#ifdef HAVE_DLOPEN +#include +#endif +#ifdef STARPU_USE_CUDA +#include +#include +#endif +#include +#include +#include +#include +#include +#include +#include + +#ifdef STARPU_SIMGRID +#include +#endif + +#if HAVE_DECL_HWLOC_CUDA_GET_DEVICE_OSDEV_BY_INDEX +#include +#endif + +#ifdef STARPU_USE_CUDA +#if CUDART_VERSION >= 5000 +/* Avoid letting our streams spuriously synchronize with the NULL stream */ +#define starpu_cudaStreamCreate(stream) cudaStreamCreateWithFlags(stream, cudaStreamNonBlocking) +#else +#define starpu_cudaStreamCreate(stream) cudaStreamCreate(stream) +#endif + +/* At least CUDA 4.2 still didn't have working memcpy3D */ +#if CUDART_VERSION < 5000 +#define BUGGED_MEMCPY3D +#endif +#endif + +/* Consider a rough 10% overhead cost */ +#define FREE_MARGIN 0.9 + +static size_t global_mem[STARPU_MAXCUDADEVS]; +#ifdef STARPU_HAVE_NVML_H +static nvmlDevice_t nvmlDev[STARPU_MAXCUDADEVS]; +__typeof__(nvmlInit) *_starpu_nvmlInit; +__typeof__(nvmlDeviceGetNvLinkState) *_starpu_nvmlDeviceGetNvLinkState; +__typeof__(nvmlDeviceGetNvLinkRemotePciInfo) *_starpu_nvmlDeviceGetNvLinkRemotePciInfo; +__typeof__(nvmlDeviceGetHandleByIndex) *_starpu_nvmlDeviceGetHandleByIndex; +__typeof__(nvmlDeviceGetHandleByPciBusId) *_starpu_nvmlDeviceGetHandleByPciBusId; +__typeof__(nvmlDeviceGetIndex) *_starpu_nvmlDeviceGetIndex; +__typeof__(nvmlDeviceGetPciInfo) *_starpu_nvmlDeviceGetPciInfo; +__typeof__(nvmlDeviceGetUUID) *_starpu_nvmlDeviceGetUUID; +#if HAVE_DECL_NVMLDEVICEGETTOTALENERGYCONSUMPTION +__typeof__(nvmlDeviceGetTotalEnergyConsumption) *_starpu_nvmlDeviceGetTotalEnergyConsumption; +#endif +#endif +int _starpu_cuda_bus_ids[STARPU_MAXCUDADEVS+STARPU_MAXNUMANODES][STARPU_MAXCUDADEVS+STARPU_MAXNUMANODES]; +#ifdef STARPU_USE_CUDA +static cudaStream_t streams[STARPU_NMAXWORKERS]; +static char used_stream[STARPU_NMAXWORKERS]; +/* TODO: ideally we'd have different streams for idle, prefetch and fetch, but apparently CUDA doesn't take priorities into account for transfers anyway? */ +static cudaStream_t out_transfer_streams[STARPU_MAXCUDADEVS]; +static cudaStream_t in_transfer_streams[STARPU_MAXCUDADEVS]; +/* Note: streams are not thread-safe, so we define them for each CUDA worker + * emitting a GPU-GPU transfer */ +static cudaStream_t in_peer_transfer_streams[STARPU_MAXCUDADEVS][STARPU_MAXCUDADEVS]; +static struct cudaDeviceProp props[STARPU_MAXCUDADEVS]; +#ifndef STARPU_SIMGRID +static cudaEvent_t task_events[STARPU_NMAXWORKERS][STARPU_MAX_PIPELINE]; +#endif +#endif /* STARPU_USE_CUDA */ +#ifdef STARPU_SIMGRID +static unsigned task_finished[STARPU_NMAXWORKERS][STARPU_MAX_PIPELINE]; +static starpu_pthread_mutex_t cuda_alloc_mutex = STARPU_PTHREAD_MUTEX_INITIALIZER; +#endif /* STARPU_SIMGRID */ + +static enum initialization cuda_device_init[STARPU_MAXCUDADEVS]; +static int cuda_device_users[STARPU_MAXCUDADEVS]; +static starpu_pthread_mutex_t cuda_device_init_mutex[STARPU_MAXCUDADEVS]; +static starpu_pthread_cond_t cuda_device_init_cond[STARPU_MAXCUDADEVS]; + +#if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID) +static struct _starpu_worker_set cuda_worker_set[STARPU_MAXCUDADEVS]; + +static unsigned cuda_bindid_init[STARPU_MAXCUDADEVS]; +static unsigned cuda_bindid[STARPU_MAXCUDADEVS]; +static unsigned cuda_memory_init[STARPU_MAXCUDADEVS]; +static unsigned cuda_memory_nodes[STARPU_MAXCUDADEVS]; +static int cuda_globalbindid; +#endif + +static int _starpu_cuda_peer_access(int devid, int peer_devid); + +int _starpu_nworker_per_cuda; + +static size_t _starpu_cuda_get_global_mem_size(unsigned devid) +{ + return global_mem[devid]; +} + +#ifdef STARPU_USE_CUDA +static cudaStream_t starpu_cuda_get_in_transfer_stream(unsigned dst_node) +{ + int dst_devid = starpu_memory_node_get_devid(dst_node); + cudaStream_t stream; + + stream = in_transfer_streams[dst_devid]; + STARPU_ASSERT(stream); + return stream; +} + +static cudaStream_t starpu_cuda_get_out_transfer_stream(unsigned src_node) +{ + int src_devid = starpu_memory_node_get_devid(src_node); + cudaStream_t stream; + + stream = out_transfer_streams[src_devid]; + STARPU_ASSERT(stream); + return stream; +} + +static cudaStream_t starpu_cuda_get_peer_transfer_stream(unsigned src_node, unsigned dst_node) +{ + int src_devid = starpu_memory_node_get_devid(src_node); + int dst_devid = starpu_memory_node_get_devid(dst_node); + cudaStream_t stream; + + stream = in_peer_transfer_streams[src_devid][dst_devid]; + STARPU_ASSERT(stream); + return stream; +} + +cudaStream_t starpu_cuda_get_local_stream(void) +{ + int worker = starpu_worker_get_id_check(); + + used_stream[worker] = 1; + return streams[worker]; +} + +const struct cudaDeviceProp *starpu_cuda_get_device_properties(unsigned workerid) +{ + struct _starpu_machine_config *config = _starpu_get_machine_config(); + unsigned devid = config->workers[workerid].devid; + return &props[devid]; +} +#endif /* STARPU_USE_CUDA */ + + +/* Early library initialization, before anything else, just initialize data */ +void _starpu_cuda_init(void) +{ + unsigned i; + for (i = 0; i < STARPU_MAXCUDADEVS; i++) + { + STARPU_PTHREAD_MUTEX_INIT(&cuda_device_init_mutex[i], NULL); + STARPU_PTHREAD_COND_INIT(&cuda_device_init_cond[i], NULL); + } + memset(&cuda_bindid_init, 0, sizeof(cuda_bindid_init)); + memset(&cuda_memory_init, 0, sizeof(cuda_memory_init)); + cuda_globalbindid = -1; +} + +/* Return the number of devices usable in the system. + * The value returned cannot be greater than MAXCUDADEVS */ + +unsigned _starpu_get_cuda_device_count(void) +{ + int cnt; +#ifdef STARPU_SIMGRID + cnt = _starpu_simgrid_get_nbhosts("CUDA"); +#else + cudaError_t cures; + cures = cudaGetDeviceCount(&cnt); + if (STARPU_UNLIKELY(cures)) + return 0; +#endif + + if (cnt > STARPU_MAXCUDADEVS) + { + _STARPU_MSG("# Warning: %d CUDA devices available. Only %d enabled. Use configure option --enable-maxcudadev=xxx to update the maximum value of supported CUDA devices.\n", cnt, STARPU_MAXCUDADEVS); + cnt = STARPU_MAXCUDADEVS; + } + return (unsigned)cnt; +} + +/* This is run from initialize to determine the number of CUDA devices */ +void _starpu_init_cuda(void) +{ +} + +/* This is called to really discover the hardware */ +void _starpu_cuda_discover_devices(struct _starpu_machine_config *config) +{ + /* Discover the number of CUDA devices. Fill the result in CONFIG. */ + +#ifdef STARPU_SIMGRID + config->topology.nhwdevices[STARPU_CUDA_WORKER] = _starpu_simgrid_get_nbhosts("CUDA"); +#else + int cnt; + cudaError_t cures; + + cures = cudaGetDeviceCount(&cnt); + if (STARPU_UNLIKELY(cures != cudaSuccess)) + cnt = 0; + config->topology.nhwdevices[STARPU_CUDA_WORKER] = cnt; +#ifdef STARPU_HAVE_NVML_H + void *nvml = dlopen("libnvidia-ml.so.1", RTLD_LAZY); + + if (nvml) + { + _starpu_nvmlInit = dlsym(nvml, "nvmlInit_v2"); + if (!_starpu_nvmlInit) + _starpu_nvmlInit = dlsym(nvml, "nvmlInit"); + } + if (_starpu_nvmlInit) + { + _starpu_nvmlDeviceGetNvLinkState = dlsym(nvml, "nvmlDeviceGetNvLinkState"); + _starpu_nvmlDeviceGetHandleByIndex = dlsym(nvml, "nvmlDeviceGetHandleByIndex_v2"); + if (!_starpu_nvmlDeviceGetHandleByIndex) + _starpu_nvmlDeviceGetHandleByIndex = dlsym(nvml, "nvmlDeviceGetHandleByIndex"); + _starpu_nvmlDeviceGetNvLinkRemotePciInfo = dlsym(nvml, "nvmlDeviceGetNvLinkRemotePciInfo_v2"); + if (!_starpu_nvmlDeviceGetNvLinkRemotePciInfo) + _starpu_nvmlDeviceGetNvLinkRemotePciInfo = dlsym(nvml, "nvmlDeviceGetNvLinkRemotePciInfo"); + _starpu_nvmlDeviceGetHandleByPciBusId = dlsym(nvml, "nvmlDeviceGetHandleByPciBusId_v2"); + if (!_starpu_nvmlDeviceGetHandleByPciBusId) + _starpu_nvmlDeviceGetHandleByPciBusId = dlsym(nvml, "nvmlDeviceGetHandleByPciBusId"); + _starpu_nvmlDeviceGetIndex = dlsym(nvml, "nvmlDeviceGetIndex"); + _starpu_nvmlDeviceGetPciInfo = dlsym(nvml, "nvmlDeviceGetPciInfo_v3"); + if (!_starpu_nvmlDeviceGetPciInfo) + _starpu_nvmlDeviceGetPciInfo = dlsym(nvml, "nvmlDeviceGetPciInfo_v2"); + if (!_starpu_nvmlDeviceGetPciInfo) + _starpu_nvmlDeviceGetPciInfo = dlsym(nvml, "nvmlDeviceGetPciInfo"); + _starpu_nvmlDeviceGetUUID = dlsym(nvml, "nvmlDeviceGetUUID"); +#if HAVE_DECL_NVMLDEVICEGETTOTALENERGYCONSUMPTION + _starpu_nvmlDeviceGetTotalEnergyConsumption = dlsym(nvml, "nvmlDeviceGetTotalEnergyConsumption"); +#endif + _starpu_nvmlInit(); + } +#endif +#endif +} + +#ifdef STARPU_HAVE_HWLOC +#ifdef STARPU_HAVE_NVML_H +static int _starpu_cuda_direct_link(struct _starpu_machine_config *config, unsigned devid1, unsigned devid2) +{ + unsigned i; + struct cudaDeviceProp props_dev1; + struct cudaDeviceProp props_dev2; + cudaError_t cures; + int nvswitch = 0; + + if (!_starpu_nvmlDeviceGetNvLinkState || !_starpu_nvmlDeviceGetNvLinkRemotePciInfo) + return 0; + + cures = cudaGetDeviceProperties(&props_dev1, devid1); + if (cures != cudaSuccess) + return 0; + cures = cudaGetDeviceProperties(&props_dev2, devid2); + if (cures != cudaSuccess) + return 0; + + nvmlDevice_t nvml_dev1 = _starpu_cuda_get_nvmldev(&props_dev1); + + if (!nvml_dev1) + return 0; + + for (i = 0; i < NVML_NVLINK_MAX_LINKS; i++) { + nvmlEnableState_t active; + nvmlReturn_t ret; + ret = _starpu_nvmlDeviceGetNvLinkState(nvml_dev1, i, &active); + if (ret == NVML_ERROR_NOT_SUPPORTED) + continue; + if (active != NVML_FEATURE_ENABLED) + continue; + + nvmlPciInfo_t pci; + _starpu_nvmlDeviceGetNvLinkRemotePciInfo(nvml_dev1, i, &pci); + + hwloc_obj_t obj = hwloc_get_pcidev_by_busid(config->topology.hwtopology, + pci.domain, pci.bus, pci.device, 0); + if (obj && obj->type == HWLOC_OBJ_PCI_DEVICE + && (obj->attr->pcidev.class_id >> 8 == 0x06) + && (obj->attr->pcidev.vendor_id == 0x10de)) + { + /* This is an NVIDIA PCI bridge, i.e. an NVSwitch */ + /* NVSwitch */ + nvswitch = 1; + break; + } + + if ((int) pci.domain == props_dev2.pciDomainID && + (int) pci.bus == props_dev2.pciBusID && + (int) pci.device == props_dev2.pciDeviceID) + /* We have a direct NVLink! */ + return 1; + } + + if (!nvswitch) + { + /* No direct NVLink or NVSwitch found for dev1 */ + return 0; + } + + nvmlDevice_t nvml_dev2 = _starpu_cuda_get_nvmldev(&props_dev2); + + if (!nvml_dev2) + return 0; + + for (i = 0; i < NVML_NVLINK_MAX_LINKS; i++) + { + nvmlEnableState_t active; + nvmlReturn_t ret; + ret = _starpu_nvmlDeviceGetNvLinkState(nvml_dev2, i, &active); + if (ret == NVML_ERROR_NOT_SUPPORTED) + continue; + if (active != NVML_FEATURE_ENABLED) + continue; + + nvmlPciInfo_t pci; + _starpu_nvmlDeviceGetNvLinkRemotePciInfo(nvml_dev2, i, &pci); + + hwloc_obj_t obj = hwloc_get_pcidev_by_busid(config->topology.hwtopology, + pci.domain, pci.bus, pci.device, 0); + if (obj && obj->type == HWLOC_OBJ_PCI_DEVICE + && (obj->attr->pcidev.class_id >> 8 == 0x06) + && (obj->attr->pcidev.vendor_id == 0x10de)) + { + /* This is an NVIDIA PCI bridge, i.e. an NVSwitch */ + /* NVSwitch */ + /* TODO: follow answers to https://forums.developer.nvidia.com/t/how-to-distinguish-different-nvswitch/241983 */ + return 1; + } + } + + /* No NVSwitch found for dev2 */ + return 0; +} +#endif +#endif + +static void _starpu_initialize_workers_cuda_gpuid(struct _starpu_machine_config *config) +{ + struct _starpu_machine_topology *topology = &config->topology; + struct starpu_conf *uconf = &config->conf; + + _starpu_initialize_workers_deviceid(uconf->use_explicit_workers_cuda_gpuid == 0 + ? NULL + : (int *)uconf->workers_cuda_gpuid, + &(config->current_devid[STARPU_CUDA_WORKER]), + (int *)topology->workers_devid[STARPU_CUDA_WORKER], + "STARPU_WORKERS_CUDAID", + topology->nhwdevices[STARPU_CUDA_WORKER], + STARPU_CUDA_WORKER); + _starpu_devices_drop_duplicate(topology->workers_devid[STARPU_CUDA_WORKER]); +} + +/* Determine which devices we will use */ +void _starpu_init_cuda_config(struct _starpu_machine_topology *topology, struct _starpu_machine_config *config) +{ + int i; + + for (i = 0; i < (int) (sizeof(cuda_worker_set)/sizeof(cuda_worker_set[0])); i++) + cuda_worker_set[i].workers = NULL; + + int ncuda = config->conf.ncuda; + + if (ncuda != 0) + { + /* The user did not disable CUDA. We need to + * initialize CUDA early to count the number of + * devices + */ + _starpu_init_cuda(); + int nb_devices = _starpu_get_cuda_device_count(); + + _starpu_topology_check_ndevices(&ncuda, nb_devices, 0, STARPU_MAXCUDADEVS, 0, "ncuda", "CUDA", "maxcudadev"); + } + + int nworker_per_cuda = starpu_getenv_number_default("STARPU_NWORKER_PER_CUDA", 1); + + STARPU_ASSERT_MSG(nworker_per_cuda > 0, "STARPU_NWORKER_PER_CUDA has to be > 0"); + STARPU_ASSERT_MSG_ALWAYS(nworker_per_cuda < STARPU_NMAXWORKERS, "STARPU_NWORKER_PER_CUDA (%d) cannot be higher than STARPU_NMAXWORKERS (%d)\n", nworker_per_cuda, STARPU_NMAXWORKERS); + +#ifndef STARPU_NON_BLOCKING_DRIVERS + if (nworker_per_cuda > 1) + { + _STARPU_DISP("Warning: reducing STARPU_NWORKER_PER_CUDA to 1 because blocking drivers are enabled\n"); + nworker_per_cuda = 1; + } + _starpu_nworker_per_cuda = nworker_per_cuda; +#endif + /* Now we know how many CUDA devices will be used */ + topology->ndevices[STARPU_CUDA_WORKER] = ncuda; + + _starpu_initialize_workers_cuda_gpuid(config); + + /* allow having one worker per stream */ + topology->cuda_th_per_stream = starpu_getenv_number_default("STARPU_CUDA_THREAD_PER_WORKER", -1); + topology->cuda_th_per_dev = starpu_getenv_number_default("STARPU_CUDA_THREAD_PER_DEV", -1); + + STARPU_ASSERT_MSG(!(topology->cuda_th_per_stream == 1 && topology->cuda_th_per_dev != -1), "It does not make sense to set both STARPU_CUDA_THREAD_PER_WORKER to 1 and to set STARPU_CUDA_THREAD_PER_DEV, please choose either per worker or per device or none"); + + /* per device by default */ + if (topology->cuda_th_per_dev == -1) + { + if (topology->cuda_th_per_stream == 1) + topology->cuda_th_per_dev = 0; + else + topology->cuda_th_per_dev = 1; + } + /* Not per stream by default */ + if (topology->cuda_th_per_stream == -1) + { + topology->cuda_th_per_stream = 0; + } + + if (!topology->cuda_th_per_dev) + { + cuda_worker_set[0].workers = &config->workers[topology->nworkers]; + cuda_worker_set[0].nworkers = ncuda * nworker_per_cuda; + } + + unsigned cudagpu; + for (cudagpu = 0; (int) cudagpu < ncuda; cudagpu++) + { + int devid = _starpu_get_next_devid(topology, config, STARPU_CUDA_WORKER); + + if (devid == -1) + { + // There is no more devices left + topology->ndevices[STARPU_CUDA_WORKER] = cudagpu; + break; + } + + struct _starpu_worker_set *worker_set; + + if(topology->cuda_th_per_stream) + { + worker_set = ALLOC_WORKER_SET; + } + else if (topology->cuda_th_per_dev) + { + worker_set = &cuda_worker_set[devid]; + worker_set->workers = &config->workers[topology->nworkers]; + worker_set->nworkers = nworker_per_cuda; + } + else + { + /* Same worker set for all devices */ + worker_set = &cuda_worker_set[0]; + } + + _starpu_topology_configure_workers(topology, config, + STARPU_CUDA_WORKER, + cudagpu, devid, 0, 0, + nworker_per_cuda, + // TODO: fix perfmodels etc. + // nworker_per_cuda - 1, + 1, + worker_set, NULL); + + _starpu_devices_gpu_set_used(devid); + + /* TODO: move this to generic place */ +#ifdef STARPU_HAVE_HWLOC + { + hwloc_obj_t obj = NULL; + if (starpu_driver_info[STARPU_CUDA_WORKER].get_hwloc_obj) + obj = starpu_driver_info[STARPU_CUDA_WORKER].get_hwloc_obj(topology->hwtopology, devid); + + if (obj) + { + struct _starpu_hwloc_userdata *data = obj->userdata; + data->ngpus++; + } + else + { + _STARPU_DISP("Warning: could not find location of CUDA%u, do you have the hwloc CUDA plugin installed?\n", devid); + } + } +#endif + } +} + +/* Bind the driver on a CPU core */ +void _starpu_cuda_init_worker_binding(struct _starpu_machine_config *config, int no_mp_config STARPU_ATTRIBUTE_UNUSED, struct _starpu_worker *workerarg) +{ + /* Perhaps the worker has some "favourite" bindings (logical core) */ + unsigned preferred_binding[STARPU_NMAXWORKERS]; + unsigned npreferred = 0; + unsigned devid = workerarg->devid; + +#ifndef STARPU_SIMGRID + if (_starpu_may_bind_automatically[STARPU_CUDA_WORKER]) + { + /* StarPU is allowed to bind threads automatically */ + unsigned *preferred_numa_binding = _starpu_get_cuda_affinity_vector(devid); + unsigned npreferred_numa = _starpu_topology_get_nhwnumanodes(config); + npreferred = _starpu_topology_get_numa_core_binding(config, preferred_numa_binding, npreferred_numa, preferred_binding, STARPU_NMAXWORKERS); + } +#endif /* SIMGRID */ + if (cuda_bindid_init[devid]) + { + if (config->topology.cuda_th_per_stream == 0) + workerarg->bindid = cuda_bindid[devid]; + else + workerarg->bindid = _starpu_get_next_bindid(config, STARPU_THREAD_ACTIVE, preferred_binding, npreferred); + } + else + { + cuda_bindid_init[devid] = 1; + + if (config->topology.cuda_th_per_dev == 0 && config->topology.cuda_th_per_stream == 0) + { + if (cuda_globalbindid == -1) + cuda_globalbindid = _starpu_get_next_bindid(config, STARPU_THREAD_ACTIVE, preferred_binding, npreferred); + workerarg->bindid = cuda_bindid[devid] = cuda_globalbindid; + } + else + workerarg->bindid = cuda_bindid[devid] = _starpu_get_next_bindid(config, STARPU_THREAD_ACTIVE, preferred_binding, npreferred); + } +} + +/* Set up memory and buses */ +void _starpu_cuda_init_worker_memory(struct _starpu_machine_config *config, int no_mp_config STARPU_ATTRIBUTE_UNUSED, struct _starpu_worker *workerarg) +{ + unsigned memory_node = -1; + unsigned devid = workerarg->devid; + unsigned numa; + + if (cuda_memory_init[devid]) + { + memory_node = cuda_memory_nodes[devid]; + } + else + { + cuda_memory_init[devid] = 1; + + memory_node = cuda_memory_nodes[devid] = _starpu_memory_node_register(STARPU_CUDA_RAM, devid); + +#ifdef STARPU_USE_CUDA_MAP + /* TODO: check node capabilities */ + _starpu_memory_node_set_mapped(memory_node); +#endif + + for (numa = 0; numa < starpu_memory_nodes_get_numa_count(); numa++) + { + _starpu_cuda_bus_ids[numa][devid+STARPU_MAXNUMANODES] = _starpu_register_bus(numa, memory_node); + _starpu_cuda_bus_ids[devid+STARPU_MAXNUMANODES][numa] = _starpu_register_bus(memory_node, numa); + } + +#ifdef STARPU_SIMGRID + const char* cuda_memcpy_peer; + char name[16]; + snprintf(name, sizeof(name), "CUDA%u", devid); + starpu_sg_host_t host = _starpu_simgrid_get_host_by_name(name); + STARPU_ASSERT(host); + _starpu_simgrid_memory_node_set_host(memory_node, host); +# ifdef STARPU_HAVE_SIMGRID_ACTOR_H + cuda_memcpy_peer = sg_host_get_property_value(host, "memcpy_peer"); +# else + cuda_memcpy_peer = MSG_host_get_property_value(host, "memcpy_peer"); +# endif +#endif /* SIMGRID */ + + if ( +#ifdef STARPU_SIMGRID + cuda_memcpy_peer && atoll(cuda_memcpy_peer) +#elif defined(STARPU_HAVE_CUDA_MEMCPY_PEER) + 1 +#else /* MEMCPY_PEER */ + 0 +#endif /* MEMCPY_PEER */ + ) + { + int worker2; + for (worker2 = 0; worker2 < workerarg->workerid; worker2++) + { + struct _starpu_worker *workerarg2 = &config->workers[worker2]; + int devid2 = workerarg2->devid; + if (workerarg2->arch == STARPU_CUDA_WORKER) + { + unsigned memory_node2 = starpu_worker_get_memory_node(worker2); + int bus21 = _starpu_register_bus(memory_node2, memory_node); + int bus12 = _starpu_register_bus(memory_node, memory_node2); + if (bus21 < 0 || bus12 < 0) + /* Already registered because of e.g. several workers per CUDA */ + continue; + _starpu_cuda_bus_ids[devid2+STARPU_MAXNUMANODES][devid+STARPU_MAXNUMANODES] = bus21; + _starpu_cuda_bus_ids[devid+STARPU_MAXNUMANODES][devid2+STARPU_MAXNUMANODES] = bus12; +#ifndef STARPU_SIMGRID +#ifdef STARPU_HAVE_HWLOC +#ifdef STARPU_HAVE_NVML_H + if (_starpu_cuda_direct_link(config, devid, devid2)) + { + starpu_bus_set_ngpus(bus21, 1); + starpu_bus_set_ngpus(bus12, 1); + } + else +#endif +#endif + { +#if HAVE_DECL_HWLOC_CUDA_GET_DEVICE_OSDEV_BY_INDEX + hwloc_obj_t obj, obj2, ancestor; + obj = hwloc_cuda_get_device_osdev_by_index(config->topology.hwtopology, devid); + obj2 = hwloc_cuda_get_device_osdev_by_index(config->topology.hwtopology, devid2); + ancestor = hwloc_get_common_ancestor_obj(config->topology.hwtopology, obj, obj2); + if (ancestor) + { + struct _starpu_hwloc_userdata *data = ancestor->userdata; +#ifdef STARPU_VERBOSE + { + char name[64]; + hwloc_obj_type_snprintf(name, sizeof(name), ancestor, 0); + _STARPU_DEBUG("CUDA%u and CUDA%u are linked through %s, along %u GPUs\n", devid, devid2, name, data->ngpus); + } +#endif + starpu_bus_set_ngpus(bus21, data->ngpus); + starpu_bus_set_ngpus(bus12, data->ngpus); + } +#endif + } +#endif + } + } + } + } + _starpu_memory_node_add_nworkers(memory_node); + + //This worker can also manage transfers on NUMA nodes + for (numa = 0; numa < starpu_memory_nodes_get_numa_count(); numa++) + _starpu_worker_drives_memory_node(&workerarg->set->workers[0], numa); + + _starpu_worker_drives_memory_node(&workerarg->set->workers[0], memory_node); + + workerarg->memory_node = memory_node; +} + +/* Set the current CUDA device */ +void starpu_cuda_set_device(unsigned devid STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_SIMGRID + STARPU_ABORT(); +#else + cudaError_t cures; + struct starpu_conf *conf = &_starpu_get_machine_config()->conf; +#if !defined(STARPU_HAVE_CUDA_MEMCPY_PEER) && defined(HAVE_CUDA_GL_INTEROP_H) + unsigned i; +#endif + +#ifdef STARPU_HAVE_CUDA_MEMCPY_PEER + if (conf->n_cuda_opengl_interoperability) + { + _STARPU_MSG("OpenGL interoperability was requested, but StarPU was built with multithread GPU control support, please reconfigure with --disable-cuda-memcpy-peer but that will disable the memcpy-peer optimizations\n"); + STARPU_ABORT(); + } +#elif !defined(HAVE_CUDA_GL_INTEROP_H) + if (conf->n_cuda_opengl_interoperability) + { + _STARPU_MSG("OpenGL interoperability was requested, but cuda_gl_interop.h could not be compiled, please make sure that OpenGL headers were available before ./configure run.\n"); + STARPU_ABORT(); + } +#else + for (i = 0; i < conf->n_cuda_opengl_interoperability; i++) + { + if (conf->cuda_opengl_interoperability[i] == devid) + { + cures = cudaGLSetGLDevice(devid); + goto done; + } + } +#endif + + cures = cudaSetDevice(devid); + +#if !defined(STARPU_HAVE_CUDA_MEMCPY_PEER) && defined(HAVE_CUDA_GL_INTEROP_H) +done: +#endif +#ifdef STARPU_OPENMP + /* When StarPU is used as Open Runtime support, + * starpu_omp_shutdown() will usually be called from a + * destructor, in which case cudaThreadExit() reports a + * cudaErrorCudartUnloading here. There should not + * be any remaining tasks running at this point so + * we can probably ignore it without much consequences. */ + if (STARPU_UNLIKELY(cures && cures != cudaErrorCudartUnloading)) + STARPU_CUDA_REPORT_ERROR(cures); +#else + if (STARPU_UNLIKELY(cures)) + STARPU_CUDA_REPORT_ERROR(cures); +#endif /* STARPU_OPENMP */ +#endif +} + +/* In case we want to cap the amount of memory available on the GPUs by the + * mean of the STARPU_LIMIT_CUDA_MEM, we decrease the value of + * global_mem[devid] which is the value returned by + * _starpu_cuda_get_global_mem_size() to indicate how much memory can + * be allocated on the device + */ +static void _starpu_cuda_limit_gpu_mem_if_needed(unsigned devid) +{ + starpu_ssize_t limit; + size_t STARPU_ATTRIBUTE_UNUSED totalGlobalMem = 0; + size_t STARPU_ATTRIBUTE_UNUSED to_waste = 0; + +#ifdef STARPU_SIMGRID + totalGlobalMem = _starpu_simgrid_get_memsize("CUDA", devid); +#elif defined(STARPU_USE_CUDA) + /* Find the size of the memory on the device */ + totalGlobalMem = props[devid].totalGlobalMem; +#endif + + limit = starpu_getenv_number("STARPU_LIMIT_CUDA_MEM"); + if (limit == -1) + { + char name[30]; + snprintf(name, sizeof(name), "STARPU_LIMIT_CUDA_%u_MEM", devid); + limit = starpu_getenv_number(name); + } +#if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID) + if (limit == -1) + { + limit = totalGlobalMem / (1024*1024) * FREE_MARGIN; + } +#endif + + global_mem[devid] = limit * 1024*1024; + +#ifdef STARPU_USE_CUDA + /* How much memory to waste ? */ + to_waste = totalGlobalMem - global_mem[devid]; + + props[devid].totalGlobalMem -= to_waste; +#endif /* STARPU_USE_CUDA */ + + _STARPU_DEBUG("CUDA device %u: Wasting %ld MB / Limit %ld MB / Total %ld MB / Remains %ld MB\n", + devid, (long) to_waste/(1024*1024), (long) limit, (long) totalGlobalMem/(1024*1024), + (long) (totalGlobalMem - to_waste)/(1024*1024)); +} + +/* Really initialize one device */ +static void init_device_context(unsigned devid, unsigned memnode) +{ + STARPU_ASSERT(devid < STARPU_MAXCUDADEVS); + +#ifndef STARPU_SIMGRID + cudaError_t cures; + + /* TODO: cudaSetDeviceFlag(cudaDeviceMapHost) */ + + starpu_cuda_set_device(devid); +#endif /* !STARPU_SIMGRID */ + + STARPU_PTHREAD_MUTEX_LOCK(&cuda_device_init_mutex[devid]); + cuda_device_users[devid]++; + if (cuda_device_init[devid] == UNINITIALIZED) + /* Nobody started initialization yet, do it */ + cuda_device_init[devid] = CHANGING; + else + { + /* Somebody else is doing initialization, wait for it */ + while (cuda_device_init[devid] != INITIALIZED) + STARPU_PTHREAD_COND_WAIT(&cuda_device_init_cond[devid], &cuda_device_init_mutex[devid]); + STARPU_PTHREAD_MUTEX_UNLOCK(&cuda_device_init_mutex[devid]); + return; + } + STARPU_PTHREAD_MUTEX_UNLOCK(&cuda_device_init_mutex[devid]); + +#ifndef STARPU_SIMGRID +#ifdef STARPU_HAVE_CUDA_MEMCPY_PEER + if (starpu_getenv_number("STARPU_ENABLE_CUDA_GPU_GPU_DIRECT") != 0) + { + int nworkers = starpu_worker_get_count(); + int workerid; + for (workerid = 0; workerid < nworkers; workerid++) + { + struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); + if (worker->arch == STARPU_CUDA_WORKER && worker->subworkerid == 0 && worker->devid != devid) + { + int can; + cures = cudaDeviceCanAccessPeer(&can, devid, worker->devid); + (void) cudaGetLastError(); + + if (!cures && can) + { + cures = cudaDeviceEnablePeerAccess(worker->devid, 0); + (void) cudaGetLastError(); + + if (!cures) + { + _STARPU_DEBUG("Enabled GPU-Direct %d -> %d\n", worker->devid, devid); + /* direct copies are made from the destination, see link_supports_direct_transfers */ + starpu_bus_set_direct(_starpu_cuda_bus_ids[worker->devid+STARPU_MAXNUMANODES][devid+STARPU_MAXNUMANODES], 1); + } + } + } + } + } +#endif + + /* force CUDA to initialize the context for real */ + cures = cudaFree(0); + if (STARPU_UNLIKELY(cures)) + { + if (cures == cudaErrorDevicesUnavailable) + { + _STARPU_MSG("All CUDA-capable devices are busy or unavailable\n"); + exit(77); + } + STARPU_CUDA_REPORT_ERROR(cures); + } + + cures = cudaGetDeviceProperties(&props[devid], devid); + if (STARPU_UNLIKELY(cures)) + STARPU_CUDA_REPORT_ERROR(cures); +#ifdef STARPU_HAVE_CUDA_MEMCPY_PEER + if (props[devid].computeMode == cudaComputeModeExclusive) + { + _STARPU_MSG("CUDA is in EXCLUSIVE-THREAD mode, but StarPU was built with multithread GPU control support, please either ask your administrator to use EXCLUSIVE-PROCESS mode (which should really be fine), or reconfigure with --disable-cuda-memcpy-peer but that will disable the memcpy-peer optimizations\n"); + STARPU_ABORT(); + } +#endif + + cures = starpu_cudaStreamCreate(&in_transfer_streams[devid]); + if (STARPU_UNLIKELY(cures)) + STARPU_CUDA_REPORT_ERROR(cures); + + cures = starpu_cudaStreamCreate(&out_transfer_streams[devid]); + if (STARPU_UNLIKELY(cures)) + STARPU_CUDA_REPORT_ERROR(cures); + + int nworkers = starpu_worker_get_count(); + int workerid; + for (workerid = 0; workerid < nworkers; workerid++) + { + struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); + if (worker->arch == STARPU_CUDA_WORKER && worker->subworkerid == 0) + { + cures = starpu_cudaStreamCreate(&in_peer_transfer_streams[worker->devid][devid]); + if (STARPU_UNLIKELY(cures)) + STARPU_CUDA_REPORT_ERROR(cures); + } + } +#endif /* !STARPU_SIMGRID */ + + STARPU_PTHREAD_MUTEX_LOCK(&cuda_device_init_mutex[devid]); + cuda_device_init[devid] = INITIALIZED; + STARPU_PTHREAD_COND_BROADCAST(&cuda_device_init_cond[devid]); + STARPU_PTHREAD_MUTEX_UNLOCK(&cuda_device_init_mutex[devid]); + + _starpu_cuda_limit_gpu_mem_if_needed(devid); + _starpu_memory_manager_set_global_memory_size(memnode, _starpu_cuda_get_global_mem_size(devid)); +} + +/* De-initialize one device */ +static void deinit_device_context(unsigned devid STARPU_ATTRIBUTE_UNUSED) +{ +#ifndef STARPU_SIMGRID + starpu_cuda_set_device(devid); + + cudaStreamDestroy(in_transfer_streams[devid]); + cudaStreamDestroy(out_transfer_streams[devid]); + + int nworkers = starpu_worker_get_count(); + int workerid; + for (workerid = 0; workerid < nworkers; workerid++) + { + struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); + if (worker->arch == STARPU_CUDA_WORKER && worker->subworkerid == 0) + { + cudaStreamDestroy(in_peer_transfer_streams[worker->devid][devid]); + } + } +#endif /* !STARPU_SIMGRID */ +} + +static void init_worker_context(unsigned workerid, unsigned devid STARPU_ATTRIBUTE_UNUSED) +{ + int j; +#ifdef STARPU_SIMGRID + for (j = 0; j < STARPU_MAX_PIPELINE; j++) + task_finished[workerid][j] = 0; +#else /* !STARPU_SIMGRID */ + cudaError_t cures; + starpu_cuda_set_device(devid); + + for (j = 0; j < STARPU_MAX_PIPELINE; j++) + { + cures = cudaEventCreateWithFlags(&task_events[workerid][j], cudaEventDisableTiming); + if (STARPU_UNLIKELY(cures)) + STARPU_CUDA_REPORT_ERROR(cures); + } + + cures = starpu_cudaStreamCreate(&streams[workerid]); + if (STARPU_UNLIKELY(cures)) + STARPU_CUDA_REPORT_ERROR(cures); + +#endif /* !STARPU_SIMGRID */ +} + +static void deinit_worker_context(unsigned workerid, unsigned devid STARPU_ATTRIBUTE_UNUSED) +{ + unsigned j; +#ifdef STARPU_SIMGRID + for (j = 0; j < STARPU_MAX_PIPELINE; j++) + task_finished[workerid][j] = 0; +#else /* STARPU_SIMGRID */ + starpu_cuda_set_device(devid); + for (j = 0; j < STARPU_MAX_PIPELINE; j++) + cudaEventDestroy(task_events[workerid][j]); + cudaStreamDestroy(streams[workerid]); +#endif /* STARPU_SIMGRID */ +} + +#ifdef STARPU_HAVE_NVML_H +nvmlDevice_t _starpu_cuda_get_nvmldev(struct cudaDeviceProp *dev_props) +{ + char busid[13]; + nvmlDevice_t ret; + + snprintf(busid, sizeof(busid), "%04x:%02x:%02x.0", dev_props->pciDomainID, dev_props->pciBusID, dev_props->pciDeviceID); + if (!_starpu_nvmlDeviceGetHandleByPciBusId || _starpu_nvmlDeviceGetHandleByPciBusId(busid, &ret) != NVML_SUCCESS) + ret = NULL; + + return ret; +} + +nvmlDevice_t starpu_cuda_get_nvmldev(unsigned devid) +{ + return nvmlDev[devid]; +} +#endif + +/* This is run from the driver thread to initialize the driver CUDA context */ +static int _starpu_cuda_driver_init(struct _starpu_worker *worker) +{ + struct _starpu_worker_set *worker_set = worker->set; + struct _starpu_worker *worker0 = &worker_set->workers[0]; + int lastdevid = -1; + unsigned i; +#ifdef STARPU_PROF_TOOL + struct starpu_prof_tool_info pi; +#endif + + _starpu_driver_start(worker0, STARPU_CUDA_WORKER, 0); + _starpu_set_local_worker_set_key(worker_set); + +#ifdef STARPU_USE_FXT + for (i = 1; i < worker_set->nworkers; i++) + _starpu_worker_start(&worker_set->workers[i], STARPU_CUDA_WORKER, 0); +#endif + + for (i = 0; i < worker_set->nworkers; i++) + { + worker = &worker_set->workers[i]; + unsigned devid = worker->devid; + unsigned memnode = worker->memory_node; + +#ifdef STARPU_PROF_TOOL + pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_driver_init, devid, worker->workerid, starpu_prof_tool_driver_gpu, memnode, NULL); + starpu_prof_tool_callbacks.starpu_prof_tool_event_driver_init(&pi, NULL, NULL); + pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_driver_init_start, devid, worker->workerid, starpu_prof_tool_driver_gpu, memnode, NULL); + starpu_prof_tool_callbacks.starpu_prof_tool_event_driver_init_start(&pi, NULL, NULL); +#endif + + if ((int) devid == lastdevid) + { +#ifdef STARPU_SIMGRID + STARPU_ASSERT_MSG(0, "Simgrid mode does not support concurrent kernel execution yet\n"); +#endif /* !STARPU_SIMGRID */ + + /* Already initialized */ + continue; + } + lastdevid = devid; + init_device_context(devid, memnode); + +#ifndef STARPU_SIMGRID + if (worker->config->topology.nworker[STARPU_CUDA_WORKER][devid] > 1 && props[devid].concurrentKernels == 0) + _STARPU_DISP("Warning: STARPU_NWORKER_PER_CUDA is %u, but CUDA device %u does not support concurrent kernel execution!\n", worker_set->nworkers, devid); +#endif /* !STARPU_SIMGRID */ + } + + /* one more time to avoid hacks from third party lib :) */ + _starpu_bind_thread_on_cpu(worker0->bindid, worker0->workerid, NULL); + + for (i = 0; i < worker_set->nworkers; i++) + { + worker = &worker_set->workers[i]; + unsigned devid = worker->devid; + unsigned workerid = worker->workerid; + unsigned subdev = worker->subworkerid; + + float size = (float) global_mem[devid] / (1<<30); +#ifdef STARPU_SIMGRID + const char *devname = _starpu_simgrid_get_devname("CUDA", devid); + if (!devname) + devname = "Simgrid"; +#else + /* get the device's name */ + char devname[64]; + strncpy(devname, props[devid].name, 63); + devname[63] = 0; +#endif + +#if defined(STARPU_HAVE_BUSID) && !defined(STARPU_SIMGRID) +#if defined(STARPU_HAVE_DOMAINID) && !defined(STARPU_SIMGRID) +#ifdef STARPU_HAVE_NVML_H + nvmlDev[devid] = _starpu_cuda_get_nvmldev(&props[devid]); +#endif + if (props[devid].pciDomainID) + snprintf(worker->name, sizeof(worker->name), "CUDA %u.%u (%s %.1f GiB %04x:%02x:%02x.0)", devid, subdev, devname, size, props[devid].pciDomainID, props[devid].pciBusID, props[devid].pciDeviceID); + else +#endif + snprintf(worker->name, sizeof(worker->name), "CUDA %u.%u (%s %.1f GiB %02x:%02x.0)", devid, subdev, devname, size, props[devid].pciBusID, props[devid].pciDeviceID); +#else + snprintf(worker->name, sizeof(worker->name), "CUDA %u.%u (%s %.1f GiB)", devid, subdev, devname, size); +#endif + snprintf(worker->short_name, sizeof(worker->short_name), "CUDA %u.%u", devid, subdev); + _STARPU_DEBUG("cuda (%s) dev id %u worker %u thread is ready to run on CPU %d !\n", devname, devid, subdev, worker->bindid); + + worker->pipeline_length = starpu_getenv_number_default("STARPU_CUDA_PIPELINE", 2); + if (worker->pipeline_length > STARPU_MAX_PIPELINE) + { + _STARPU_DISP("Warning: STARPU_CUDA_PIPELINE is %u, but STARPU_MAX_PIPELINE is only %u\n", worker->pipeline_length, STARPU_MAX_PIPELINE); + worker->pipeline_length = STARPU_MAX_PIPELINE; + } +#if !defined(STARPU_SIMGRID) && !defined(STARPU_NON_BLOCKING_DRIVERS) + if (worker->pipeline_length >= 1) + { + /* We need non-blocking drivers, to poll for CUDA task + * termination */ + _STARPU_DISP("Warning: reducing STARPU_CUDA_PIPELINE to 0 because blocking drivers are enabled (and simgrid is not enabled)\n"); + worker->pipeline_length = 0; + } +#endif + init_worker_context(workerid, worker->devid); + + _STARPU_TRACE_WORKER_INIT_END(workerid); +#ifdef STARPU_PROF_TOOL + pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_driver_init_end, devid, worker->workerid, starpu_prof_tool_driver_gpu, 0, NULL); + starpu_prof_tool_callbacks.starpu_prof_tool_event_driver_init_end(&pi, NULL, NULL); +#endif + } + { + char thread_name[16]; + snprintf(thread_name, sizeof(thread_name), "CUDA %u", worker0->devid); + starpu_pthread_setname(thread_name); + } + + /* tell the main thread that this one is ready */ + STARPU_PTHREAD_MUTEX_LOCK(&worker0->mutex); + worker0->status = STATUS_UNKNOWN; + worker0->worker_is_initialized = 1; + STARPU_PTHREAD_COND_SIGNAL(&worker0->ready_cond); + STARPU_PTHREAD_MUTEX_UNLOCK(&worker0->mutex); + + /* tell the main thread that this one is ready */ + STARPU_PTHREAD_MUTEX_LOCK(&worker_set->mutex); + worker_set->set_is_initialized = 1; + STARPU_PTHREAD_COND_SIGNAL(&worker_set->ready_cond); + STARPU_PTHREAD_MUTEX_UNLOCK(&worker_set->mutex); + + return 0; +} + +static int _starpu_cuda_driver_deinit(struct _starpu_worker *worker) +{ + struct _starpu_worker_set *worker_set = worker->set; + int lastdevid = -1; + unsigned i; + _STARPU_TRACE_WORKER_DEINIT_START; + + for (i = 0; i < worker_set->nworkers; i++) + { + worker = &worker_set->workers[i]; + unsigned devid = worker->devid; + unsigned memnode = worker->memory_node; + unsigned usersleft; + if ((int) devid == lastdevid) + /* Already initialized */ + continue; + lastdevid = devid; + + STARPU_PTHREAD_MUTEX_LOCK(&cuda_device_init_mutex[devid]); + usersleft = --cuda_device_users[devid]; + STARPU_PTHREAD_MUTEX_UNLOCK(&cuda_device_init_mutex[devid]); + + if (!usersleft) + { + /* I'm last, deinitialize device */ + _starpu_datawizard_handle_all_pending_node_data_requests(memnode); + + /* In case there remains some memory that was automatically + * allocated by StarPU, we release it now. Note that data + * coherency is not maintained anymore at that point ! */ + _starpu_free_all_automatically_allocated_buffers(memnode); + + _starpu_malloc_shutdown(memnode); + + deinit_device_context(devid); + } + STARPU_PTHREAD_MUTEX_LOCK(&cuda_device_init_mutex[devid]); + cuda_device_init[devid] = UNINITIALIZED; + STARPU_PTHREAD_MUTEX_UNLOCK(&cuda_device_init_mutex[devid]); + + } + + for (i = 0; i < worker_set->nworkers; i++) + { + worker = &worker_set->workers[i]; + unsigned workerid = worker->workerid; + unsigned memnode = worker->memory_node; + + deinit_worker_context(workerid, worker->devid); + +#ifdef STARPU_PROF_TOOL + struct starpu_prof_tool_info pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_driver_deinit, workerid, worker->workerid, starpu_prof_tool_driver_gpu, memnode, NULL); + starpu_prof_tool_callbacks.starpu_prof_tool_event_driver_deinit(&pi, NULL, NULL); +#endif + } + + worker_set->workers[0].worker_is_initialized = 0; + _STARPU_TRACE_WORKER_DEINIT_END(STARPU_CUDA_WORKER); + + return 0; +} + +static uintptr_t _starpu_cuda_malloc_on_node(unsigned dst_node, size_t size, int flags) +{ + uintptr_t addr = 0; + (void) flags; + +#if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID) + +#ifdef STARPU_SIMGRID + static uintptr_t last[STARPU_MAXNODES]; +#ifdef STARPU_DEVEL +#warning TODO: record used memory, using a simgrid property to know the available memory +#endif + /* Sleep for the allocation */ + STARPU_PTHREAD_MUTEX_LOCK(&cuda_alloc_mutex); + if (_starpu_simgrid_cuda_malloc_cost()) + starpu_sleep(0.000175); + if (!last[dst_node]) + last[dst_node] = 1<<10; + addr = last[dst_node]; + last[dst_node]+=size; + STARPU_ASSERT(last[dst_node] >= addr); + STARPU_PTHREAD_MUTEX_UNLOCK(&cuda_alloc_mutex); +#else + unsigned devid = starpu_memory_node_get_devid(dst_node); +#if defined(STARPU_HAVE_CUDA_MEMCPY_PEER) + starpu_cuda_set_device(devid); +#else + struct _starpu_worker *worker = _starpu_get_local_worker_key(); + if (!worker || worker->arch != STARPU_CUDA_WORKER || worker->devid != devid) + STARPU_ASSERT_MSG(0, "CUDA peer access is not available with this version of CUDA"); +#endif + /* Check if there is free memory */ + size_t cuda_mem_free, cuda_mem_total; + cudaError_t status; + status = cudaMemGetInfo(&cuda_mem_free, &cuda_mem_total); + if (status == cudaSuccess && cuda_mem_free * FREE_MARGIN < size) + { + addr = 0; + } + else + { + status = cudaMalloc((void **)&addr, size); + if (!addr || (status != cudaSuccess)) + { + if (STARPU_UNLIKELY(status != cudaErrorMemoryAllocation)) + STARPU_CUDA_REPORT_ERROR(status); + addr = 0; + } + } +#endif +#endif + return addr; +} + +static void _starpu_cuda_free_on_node(unsigned dst_node, uintptr_t addr, size_t size, int flags) +{ + (void) dst_node; + (void) addr; + (void) size; + (void) flags; + +#if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID) +#ifdef STARPU_SIMGRID + STARPU_PTHREAD_MUTEX_LOCK(&cuda_alloc_mutex); + /* Sleep for the free */ + if (_starpu_simgrid_cuda_malloc_cost()) + starpu_sleep(0.000750); + STARPU_PTHREAD_MUTEX_UNLOCK(&cuda_alloc_mutex); + /* CUDA also synchronizes roughly everything on cudaFree */ + _starpu_simgrid_sync_gpus(); +#else + cudaError_t err; + unsigned devid = starpu_memory_node_get_devid(dst_node); +#if defined(STARPU_HAVE_CUDA_MEMCPY_PEER) + starpu_cuda_set_device(devid); +#else + struct _starpu_worker *worker = _starpu_get_local_worker_key(); + if (!worker || worker->arch != STARPU_CUDA_WORKER || worker->devid != devid) + STARPU_ASSERT_MSG(0, "CUDA peer access is not available with this version of CUDA"); +#endif /* STARPU_HAVE_CUDA_MEMCPY_PEER */ + err = cudaFree((void*)addr); +#ifdef STARPU_OPENMP + /* When StarPU is used as Open Runtime support, + * starpu_omp_shutdown() will usually be called from a + * destructor, in which case cudaThreadExit() reports a + * cudaErrorCudartUnloading here. There should not + * be any remaining tasks running at this point so + * we can probably ignore it without much consequences. */ + if (STARPU_UNLIKELY(err != cudaSuccess && err != cudaErrorCudartUnloading)) + STARPU_CUDA_REPORT_ERROR(err); +#else + if (STARPU_UNLIKELY(err != cudaSuccess)) + STARPU_CUDA_REPORT_ERROR(err); +#endif /* STARPU_OPENMP */ +#endif /* STARPU_SIMGRID */ +#endif +} + +#ifdef STARPU_USE_CUDA +int starpu_cuda_copy_async_sync(void *src_ptr, unsigned src_node, + void *dst_ptr, unsigned dst_node, + size_t ssize, cudaStream_t stream, + enum cudaMemcpyKind kind) +{ +#ifdef STARPU_HAVE_CUDA_MEMCPY_PEER + int peer_copy = 0; + int src_dev = -1, dst_dev = -1; +#endif + cudaError_t cures = 0; + + if (kind == cudaMemcpyDeviceToDevice && src_node != dst_node) + { +#ifdef STARPU_HAVE_CUDA_MEMCPY_PEER + peer_copy = 1; + src_dev = starpu_memory_node_get_devid(src_node); + dst_dev = starpu_memory_node_get_devid(dst_node); +#else + STARPU_ABORT(); +#endif + } + + if (stream) + { + double start; + starpu_interface_start_driver_copy_async(src_node, dst_node, &start); +#ifdef STARPU_HAVE_CUDA_MEMCPY_PEER + if (peer_copy) + { + cures = cudaMemcpyPeerAsync((char *) dst_ptr, dst_dev, + (char *) src_ptr, src_dev, + ssize, stream); + } + else +#endif + { + cures = cudaMemcpyAsync((char *)dst_ptr, (char *)src_ptr, ssize, kind, stream); + } + (void) cudaGetLastError(); + starpu_interface_end_driver_copy_async(src_node, dst_node, start); + } + + /* Test if the asynchronous copy has failed or if the caller only asked for a synchronous copy */ + if (stream == NULL || cures) + { + /* do it in a synchronous fashion */ +#ifdef STARPU_HAVE_CUDA_MEMCPY_PEER + if (peer_copy) + { + cures = cudaMemcpyPeer((char *) dst_ptr, dst_dev, + (char *) src_ptr, src_dev, + ssize); + } + else +#endif + { + cures = cudaMemcpy((char *)dst_ptr, (char *)src_ptr, ssize, kind); + } + (void) cudaGetLastError(); + + if (!cures) + cures = cudaDeviceSynchronize(); + if (STARPU_UNLIKELY(cures)) + STARPU_CUDA_REPORT_ERROR(cures); + + return 0; + } + + return -EAGAIN; +} + +int +starpu_cuda_copy2d_async_sync(void *src_ptr, unsigned src_node, + void *dst_ptr, unsigned dst_node, + size_t blocksize, + size_t numblocks, size_t ld_src, size_t ld_dst, + cudaStream_t stream, enum cudaMemcpyKind kind) +{ +#ifdef STARPU_HAVE_CUDA_MEMCPY_PEER + int peer_copy = 0; + int src_dev = -1, dst_dev = -1; +#endif + cudaError_t cures = 0; + + if (kind == cudaMemcpyDeviceToDevice && src_node != dst_node) + { +#ifdef STARPU_HAVE_CUDA_MEMCPY_PEER +# ifdef BUGGED_MEMCPY3D + STARPU_ABORT_MSG("CUDA memcpy 3D peer buggy, but core triggered one?!"); +# endif + peer_copy = 1; + src_dev = starpu_memory_node_get_devid(src_node); + dst_dev = starpu_memory_node_get_devid(dst_node); +#else + STARPU_ABORT_MSG("CUDA memcpy 3D peer not available, but core triggered one ?!"); +#endif + } + +#ifdef STARPU_HAVE_CUDA_MEMCPY_PEER + if (peer_copy) + { + struct cudaMemcpy3DPeerParms p; + memset(&p, 0, sizeof(p)); + + p.srcDevice = src_dev; + p.dstDevice = dst_dev; + p.srcPtr = make_cudaPitchedPtr((char *)src_ptr, ld_src, blocksize, numblocks); + p.dstPtr = make_cudaPitchedPtr((char *)dst_ptr, ld_dst, blocksize, numblocks); + p.extent = make_cudaExtent(blocksize, numblocks, 1); + + + if (stream) + { + double start; + starpu_interface_start_driver_copy_async(src_node, dst_node, &start); + cures = cudaMemcpy3DPeerAsync(&p, stream); + (void) cudaGetLastError(); + } + + /* Test if the asynchronous copy has failed or if the caller only asked for a synchronous copy */ + if (stream == NULL || cures) + { + cures = cudaMemcpy3DPeer(&p); + (void) cudaGetLastError(); + + if (!cures) + cures = cudaDeviceSynchronize(); + if (STARPU_UNLIKELY(cures)) + STARPU_CUDA_REPORT_ERROR(cures); + + return 0; + } + } + else +#endif + { + if (stream) + { + double start; + starpu_interface_start_driver_copy_async(src_node, dst_node, &start); + cures = cudaMemcpy2DAsync((char *)dst_ptr, ld_dst, (char *)src_ptr, ld_src, + blocksize, numblocks, kind, stream); + starpu_interface_end_driver_copy_async(src_node, dst_node, start); + } + + /* Test if the asynchronous copy has failed or if the caller only asked for a synchronous copy */ + if (stream == NULL || cures) + { + cures = cudaMemcpy2D((char *)dst_ptr, ld_dst, (char *)src_ptr, ld_src, + blocksize, numblocks, kind); + if (!cures) + cures = cudaDeviceSynchronize(); + if (STARPU_UNLIKELY(cures)) + STARPU_CUDA_REPORT_ERROR(cures); + + return 0; + } + } + + + return -EAGAIN; +} + +#if 0 +/* CUDA doesn't seem to be providing a way to set ld2?? */ +int +starpu_cuda_copy3d_async_sync(void *src_ptr, unsigned src_node, + void *dst_ptr, unsigned dst_node, + size_t blocksize, + size_t numblocks_1, size_t ld1_src, size_t ld1_dst, + size_t numblocks_2, size_t ld2_src, size_t ld2_dst, + cudaStream_t stream, enum cudaMemcpyKind kind) +{ +#ifdef STARPU_HAVE_CUDA_MEMCPY_PEER + int peer_copy = 0; + int src_dev = -1, dst_dev = -1; +#endif + cudaError_t cures = 0; + + if (kind == cudaMemcpyDeviceToDevice && src_node != dst_node) + { +#ifdef STARPU_HAVE_CUDA_MEMCPY_PEER + peer_copy = 1; + src_dev = starpu_memory_node_get_devid(src_node); + dst_dev = starpu_memory_node_get_devid(dst_node); +#else + STARPU_ABORT_MSG("CUDA memcpy 3D peer not available, but core triggered one ?!"); +#endif + } + +#ifdef STARPU_HAVE_CUDA_MEMCPY_PEER + if (peer_copy) + { + struct cudaMemcpy3DPeerParms p; + memset(&p, 0, sizeof(p)); + + p.srcDevice = src_dev; + p.dstDevice = dst_dev; + p.srcPtr = make_cudaPitchedPtr((char *)src_ptr, ld1_src, blocksize, numblocks); + p.dstPtr = make_cudaPitchedPtr((char *)dst_ptr, ld1_dst, blocksize, numblocks); + // FIXME: how to pass ld2_src / ld2_dst ?? + p.extent = make_cudaExtent(blocksize, numblocks_1, numblocks_2); + + + if (stream) + { + double start; + starpu_interface_start_driver_copy_async(src_node, dst_node, &start); + cures = cudaMemcpy3DPeerAsync(&p, stream); + } + + /* Test if the asynchronous copy has failed or if the caller only asked for a synchronous copy */ + if (stream == NULL || cures) + { + cures = cudaMemcpy3DPeer(&p); + (void) cudaGetLastError(); + + if (!cures) + cures = cudaDeviceSynchronize(); + if (STARPU_UNLIKELY(cures)) + STARPU_CUDA_REPORT_ERROR(cures); + + return 0; + } + } + else +#endif + { + struct cudaMemcpy3DParms p; + memset(&p, 0, sizeof(p)); + + p.srcPtr = make_cudaPitchedPtr((char *)src_ptr, ld1_src, blocksize, numblocks); + p.dstPtr = make_cudaPitchedPtr((char *)dst_ptr, ld1_dst, blocksize, numblocks); + // FIXME: how to pass ld2_src / ld2_dst ?? + p.extent = make_cudaExtent(blocksize, numblocks, 1); + p.kind = kind; + + if (stream) + { + double start; + starpu_interface_start_driver_copy_async(src_node, dst_node, &start); + cures = cudaMemcpy3DAsync(&p, stream); + starpu_interface_end_driver_copy_async(src_node, dst_node, start); + } + + /* Test if the asynchronous copy has failed or if the caller only asked for a synchronous copy */ + if (stream == NULL || cures) + { + cures = cudaMemcpy3D(&p); + if (!cures) + cures = cudaDeviceSynchronize(); + if (STARPU_UNLIKELY(cures)) + STARPU_CUDA_REPORT_ERROR(cures); + + return 0; + } + } + + + return -EAGAIN; +} +#endif + +static inline cudaEvent_t *_starpu_cuda_event(union _starpu_async_channel_event *_event) +{ + cudaEvent_t *event; + STARPU_STATIC_ASSERT(sizeof(*event) <= sizeof(*_event)); + event = (void *) _event; + return event; +} + +static unsigned _starpu_cuda_test_request_completion(struct _starpu_async_channel *async_channel) +{ + cudaEvent_t event; + cudaError_t cures; + unsigned success; + + event = *_starpu_cuda_event(&async_channel->event); + cures = cudaEventQuery(event); + success = (cures == cudaSuccess); + + if (success) + cudaEventDestroy(event); + else if (cures != cudaErrorNotReady) + STARPU_CUDA_REPORT_ERROR(cures); + + return success; +} + +/* Only used at starpu_shutdown */ +static void _starpu_cuda_wait_request_completion(struct _starpu_async_channel *async_channel) +{ + cudaEvent_t event; + cudaError_t cures; + + event = *_starpu_cuda_event(&async_channel->event); + + cures = cudaEventSynchronize(event); + if (STARPU_UNLIKELY(cures)) + STARPU_CUDA_REPORT_ERROR(cures); + + cures = cudaEventDestroy(event); + if (STARPU_UNLIKELY(cures)) + STARPU_CUDA_REPORT_ERROR(cures); +} + +#ifdef STARPU_HAVE_CUDA_MEMCPY_PEER +static void +starpu_cuda_set_copy_device(unsigned src_node, unsigned dst_node) +{ + enum starpu_node_kind src_kind = starpu_node_get_kind(src_node); + enum starpu_node_kind dst_kind = starpu_node_get_kind(dst_node); + unsigned devid; + if ((src_kind == STARPU_CUDA_RAM) && (dst_kind == STARPU_CUDA_RAM)) + { + /* GPU-GPU transfer, issue it from the destination */ + devid = starpu_memory_node_get_devid(dst_node); + } + else + { + unsigned node = (dst_kind == STARPU_CUDA_RAM)?dst_node:src_node; + devid = starpu_memory_node_get_devid(node); + } + starpu_cuda_set_device(devid); +} +#endif + +static int _starpu_cuda_copy_interface_from_cuda_to_cuda(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req) +{ + int src_kind = starpu_node_get_kind(src_node); + int dst_kind = starpu_node_get_kind(dst_node); + STARPU_ASSERT(src_kind == STARPU_CUDA_RAM && dst_kind == STARPU_CUDA_RAM); + +#ifdef STARPU_HAVE_CUDA_MEMCPY_PEER + starpu_cuda_set_copy_device(src_node, dst_node); +#else + STARPU_ASSERT(src_node == dst_node); +#endif + + int ret = 1; + cudaError_t cures; + cudaStream_t stream; + const struct starpu_data_copy_methods *copy_methods = handle->ops->copy_methods; +/* CUDA - CUDA transfer */ + if (!req || starpu_asynchronous_copy_disabled() || starpu_asynchronous_cuda_copy_disabled() || !(copy_methods->cuda_to_cuda_async || copy_methods->any_to_any)) + { + STARPU_ASSERT(copy_methods->cuda_to_cuda || copy_methods->any_to_any); + /* this is not associated to a request so it's synchronous */ + if (copy_methods->cuda_to_cuda) + copy_methods->cuda_to_cuda(src_interface, src_node, dst_interface, dst_node); + else + copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, NULL); + } + else + { + req->async_channel.node_ops = &_starpu_driver_cuda_node_ops; + cures = cudaEventCreateWithFlags(_starpu_cuda_event(&req->async_channel.event), cudaEventDisableTiming); + if (STARPU_UNLIKELY(cures != cudaSuccess)) STARPU_CUDA_REPORT_ERROR(cures); + + stream = starpu_cuda_get_peer_transfer_stream(src_node, dst_node); + if (copy_methods->cuda_to_cuda_async) + ret = copy_methods->cuda_to_cuda_async(src_interface, src_node, dst_interface, dst_node, stream); + else + { + STARPU_ASSERT(copy_methods->any_to_any); + ret = copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, &req->async_channel); + } + + cures = cudaEventRecord(*_starpu_cuda_event(&req->async_channel.event), stream); + if (STARPU_UNLIKELY(cures != cudaSuccess)) STARPU_CUDA_REPORT_ERROR(cures); + } + return ret; +} + +static int _starpu_cuda_copy_interface_from_cuda_to_cpu(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req) +{ + int src_kind = starpu_node_get_kind(src_node); + int dst_kind = starpu_node_get_kind(dst_node); + STARPU_ASSERT(src_kind == STARPU_CUDA_RAM && dst_kind == STARPU_CPU_RAM); + +#ifdef STARPU_HAVE_CUDA_MEMCPY_PEER + starpu_cuda_set_copy_device(src_node, dst_node); +#endif + + int ret = 1; + cudaError_t cures; + cudaStream_t stream; + const struct starpu_data_copy_methods *copy_methods = handle->ops->copy_methods; + + /* only the proper CUBLAS thread can initiate this directly ! */ +#if !defined(STARPU_HAVE_CUDA_MEMCPY_PEER) + STARPU_ASSERT(starpu_worker_get_local_memory_node() == src_node); +#endif + if (!req || starpu_asynchronous_copy_disabled() || starpu_asynchronous_cuda_copy_disabled() || !(copy_methods->cuda_to_ram_async || copy_methods->any_to_any)) + { + /* this is not associated to a request so it's synchronous */ + STARPU_ASSERT(copy_methods->cuda_to_ram || copy_methods->any_to_any); + if (copy_methods->cuda_to_ram) + copy_methods->cuda_to_ram(src_interface, src_node, dst_interface, dst_node); + else + copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, NULL); + } + else + { + req->async_channel.node_ops = &_starpu_driver_cuda_node_ops; + cures = cudaEventCreateWithFlags(_starpu_cuda_event(&req->async_channel.event), cudaEventDisableTiming); + if (STARPU_UNLIKELY(cures != cudaSuccess)) STARPU_CUDA_REPORT_ERROR(cures); + + stream = starpu_cuda_get_out_transfer_stream(src_node); + if (copy_methods->cuda_to_ram_async) + ret = copy_methods->cuda_to_ram_async(src_interface, src_node, dst_interface, dst_node, stream); + else + { + STARPU_ASSERT(copy_methods->any_to_any); + ret = copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, &req->async_channel); + } + + cures = cudaEventRecord(*_starpu_cuda_event(&req->async_channel.event), stream); + if (STARPU_UNLIKELY(cures != cudaSuccess)) STARPU_CUDA_REPORT_ERROR(cures); + } + return ret; +} + +static int _starpu_cuda_copy_interface_from_cpu_to_cuda(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req) +{ + int src_kind = starpu_node_get_kind(src_node); + int dst_kind = starpu_node_get_kind(dst_node); + STARPU_ASSERT(src_kind == STARPU_CPU_RAM && dst_kind == STARPU_CUDA_RAM); + +#ifdef STARPU_HAVE_CUDA_MEMCPY_PEER + starpu_cuda_set_copy_device(src_node, dst_node); +#endif + + int ret = 1; + cudaError_t cures; + cudaStream_t stream; + const struct starpu_data_copy_methods *copy_methods = handle->ops->copy_methods; + + /* STARPU_CPU_RAM -> CUBLAS_RAM */ + /* only the proper CUBLAS thread can initiate this ! */ +#if !defined(STARPU_HAVE_CUDA_MEMCPY_PEER) + STARPU_ASSERT(starpu_worker_get_local_memory_node() == dst_node); +#endif + if (!req || starpu_asynchronous_copy_disabled() || starpu_asynchronous_cuda_copy_disabled() || + !(copy_methods->ram_to_cuda_async || copy_methods->any_to_any)) + { + /* this is not associated to a request so it's synchronous */ + STARPU_ASSERT(copy_methods->ram_to_cuda || copy_methods->any_to_any); + if (copy_methods->ram_to_cuda) + copy_methods->ram_to_cuda(src_interface, src_node, dst_interface, dst_node); + else + copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, NULL); + } + else + { + req->async_channel.node_ops = &_starpu_driver_cuda_node_ops; + cures = cudaEventCreateWithFlags(_starpu_cuda_event(&req->async_channel.event), cudaEventDisableTiming); + if (STARPU_UNLIKELY(cures != cudaSuccess)) + STARPU_CUDA_REPORT_ERROR(cures); + + stream = starpu_cuda_get_in_transfer_stream(dst_node); + if (copy_methods->ram_to_cuda_async) + ret = copy_methods->ram_to_cuda_async(src_interface, src_node, dst_interface, dst_node, stream); + else + { + STARPU_ASSERT(copy_methods->any_to_any); + ret = copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, &req->async_channel); + } + + cures = cudaEventRecord(*_starpu_cuda_event(&req->async_channel.event), stream); + if (STARPU_UNLIKELY(cures != cudaSuccess)) + STARPU_CUDA_REPORT_ERROR(cures); + } + return ret; +} + +static int _starpu_cuda_copy_data_from_cuda_to_cpu(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel) +{ + int src_kind = starpu_node_get_kind(src_node); + int dst_kind = starpu_node_get_kind(dst_node); + + STARPU_ASSERT(src_kind == STARPU_CUDA_RAM && dst_kind == STARPU_CPU_RAM); + + return starpu_cuda_copy_async_sync((void*) (src + src_offset), src_node, + (void*) (dst + dst_offset), dst_node, + size, + async_channel?starpu_cuda_get_out_transfer_stream(src_node):NULL, + cudaMemcpyDeviceToHost); +} + +static int _starpu_cuda_copy_data_from_cuda_to_cuda(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel) +{ + int src_kind = starpu_node_get_kind(src_node); + int dst_kind = starpu_node_get_kind(dst_node); + + STARPU_ASSERT(src_kind == STARPU_CUDA_RAM && dst_kind == STARPU_CUDA_RAM); +#ifndef STARPU_HAVE_CUDA_MEMCPY_PEER + STARPU_ASSERT(src_node == dst_node); +#endif + + return starpu_cuda_copy_async_sync((void*) (src + src_offset), src_node, + (void*) (dst + dst_offset), dst_node, + size, + async_channel?starpu_cuda_get_peer_transfer_stream(src_node, dst_node):NULL, + cudaMemcpyDeviceToDevice); +} + +static int _starpu_cuda_copy_data_from_cpu_to_cuda(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel) +{ + int src_kind = starpu_node_get_kind(src_node); + int dst_kind = starpu_node_get_kind(dst_node); + + STARPU_ASSERT(src_kind == STARPU_CPU_RAM && dst_kind == STARPU_CUDA_RAM); + + return starpu_cuda_copy_async_sync((void*) (src + src_offset), src_node, + (void*) (dst + dst_offset), dst_node, + size, + async_channel?starpu_cuda_get_in_transfer_stream(dst_node):NULL, + cudaMemcpyHostToDevice); +} + +static int _starpu_cuda_copy2d_data_from_cuda_to_cpu(uintptr_t src, size_t src_offset, unsigned src_node, + uintptr_t dst, size_t dst_offset, unsigned dst_node, + size_t blocksize, size_t numblocks, size_t ld_src, size_t ld_dst, + struct _starpu_async_channel *async_channel) +{ + int src_kind = starpu_node_get_kind(src_node); + int dst_kind = starpu_node_get_kind(dst_node); + + STARPU_ASSERT(src_kind == STARPU_CUDA_RAM && dst_kind == STARPU_CPU_RAM); + + return starpu_cuda_copy2d_async_sync((void*) (src + src_offset), src_node, + (void*) (dst + dst_offset), dst_node, + blocksize, numblocks, ld_src, ld_dst, + async_channel?starpu_cuda_get_out_transfer_stream(src_node):NULL, + cudaMemcpyDeviceToHost); +} + +static int _starpu_cuda_copy2d_data_from_cuda_to_cuda(uintptr_t src, size_t src_offset, unsigned src_node, + uintptr_t dst, size_t dst_offset, unsigned dst_node, + size_t blocksize, size_t numblocks, size_t ld_src, size_t ld_dst, + struct _starpu_async_channel *async_channel) +{ + int src_kind = starpu_node_get_kind(src_node); + int dst_kind = starpu_node_get_kind(dst_node); + + STARPU_ASSERT(src_kind == STARPU_CUDA_RAM && dst_kind == STARPU_CUDA_RAM); +#ifndef STARPU_HAVE_CUDA_MEMCPY_PEER + STARPU_ASSERT(src_node == dst_node); +#endif + + return starpu_cuda_copy2d_async_sync((void*) (src + src_offset), src_node, + (void*) (dst + dst_offset), dst_node, + blocksize, numblocks, ld_src, ld_dst, + async_channel?starpu_cuda_get_peer_transfer_stream(src_node, dst_node):NULL, + cudaMemcpyDeviceToDevice); +} + +static int _starpu_cuda_copy2d_data_from_cpu_to_cuda(uintptr_t src, size_t src_offset, unsigned src_node, + uintptr_t dst, size_t dst_offset, unsigned dst_node, + size_t blocksize, size_t numblocks, size_t ld_src, size_t ld_dst, + struct _starpu_async_channel *async_channel) +{ + int src_kind = starpu_node_get_kind(src_node); + int dst_kind = starpu_node_get_kind(dst_node); + + STARPU_ASSERT(src_kind == STARPU_CPU_RAM && dst_kind == STARPU_CUDA_RAM); + + return starpu_cuda_copy2d_async_sync((void*) (src + src_offset), src_node, + (void*) (dst + dst_offset), dst_node, + blocksize, numblocks, ld_src, ld_dst, + async_channel?starpu_cuda_get_in_transfer_stream(dst_node):NULL, + cudaMemcpyHostToDevice); +} + +#ifdef STARPU_USE_CUDA_MAP +static uintptr_t _starpu_cuda_map_ram(uintptr_t src_ptr STARPU_ATTRIBUTE_UNUSED, size_t src_offset, unsigned src_node STARPU_ATTRIBUTE_UNUSED, + unsigned dst_node STARPU_ATTRIBUTE_UNUSED, + size_t size STARPU_ATTRIBUTE_UNUSED, int *ret STARPU_ATTRIBUTE_UNUSED) +{ + /* TODO */ + /* + * Old interface: + * + * cudaHostAllocMapped and cudaHostGetDevicePointer + * cudaSetDeviceFlags() must have been called with the cudaDeviceMapHost flag in order + * for the cudaHostAllocMapped flag to have any effect. + * + * + * + * New interface: Unified Addressing + * + * Whether or not a device supports unified addressing may be queried + * by calling cudaGetDeviceProperties() with the device property + * cudaDeviceProp::unifiedAddressing. + * Unified addressing is automatically enabled in 64-bit processes. + * + * Upon enabling direct access from a device that supports unified + * addressing to another peer device that supports unified addressing + * using cudaDeviceEnablePeerAccess() all memory allocated in the peer + * device using cudaMalloc() and cudaMallocPitch() will immediately be + * accessible by the current device. + */ + + *ret = -EIO; + + if (starpu_node_get_kind(src_node) != STARPU_CPU_RAM) + return 0; + + /* + * mapping relevant cudaDeviceProps fields: + * - .canMapHostMemory: "Can map host memory with cudaHostAlloc/cudaHostGetDevicePointer" + * - .unifiedAddressing: "Device shares a unified address space with the host" + * - .managedMemory: "Device supports allocating memory that will be automatically managed by the Unified Memory system" + * - .pageableMemoryAccess: "Device supports coherently accessing pageable memory without calling cudaHostRegister on it" + * - .concurrentManagedAccess: "Device can coherently access managed memory concurrently with the CPU" + */ + + struct _starpu_worker *worker = _starpu_get_local_worker_key(); +#ifdef STARPU_HAVE_CUDA_CANMAPHOST + const int cuda_canMapHostMemory = props[worker->devid].canMapHostMemory; +#else + const int cuda_canMapHostMemory = 0; +#endif + +#ifdef STARPU_HAVE_CUDA_UNIFIEDADDR + const int cuda_unifiedAddressing = props[worker->devid].unifiedAddressing; +#else + const int cuda_unifiedAddressing = 0; +#endif + +#ifdef STARPU_HAVE_CUDA_MNGMEM + const int cuda_managedMemory = props[worker->devid].managedMemory; +#else + const int cuda_managedMemory = 0; +#endif + +#ifdef STARPU_HAVE_CUDA_PAGEABLEMEM + const int cuda_pageableMemoryAccess = props[worker->devid].pageableMemoryAccess; +#else + const int cuda_pageableMemoryAccess = 0; +#endif + uintptr_t dst_addr; + if (cuda_pageableMemoryAccess) + { + dst_addr = (uintptr_t)(src_ptr+src_offset); + *ret = 0; + } + else if (cuda_unifiedAddressing || cuda_managedMemory) + { + struct cudaPointerAttributes cuda_ptrattr; + cudaError_t cures; + cures = cudaPointerGetAttributes(&cuda_ptrattr, (void *)(src_ptr+src_offset)); + if (STARPU_UNLIKELY(cures != cudaSuccess)) + { + if (cures == cudaErrorInvalidValue) + { + cudaGetLastError(); + /* pointer does not support mapping */ + return (uintptr_t)NULL; + } + + STARPU_CUDA_REPORT_ERROR(cures); + } +#ifdef STARPU_HAVE_CUDA_POINTER_TYPE + if (!(cuda_ptrattr.type == cudaMemoryTypeHost || cuda_ptrattr.type == cudaMemoryTypeManaged)) + return 0; +#else + if (!(cuda_ptrattr.memoryType == cudaMemoryTypeHost +#if CUDART_VERSION >= 10000 + || cuda_ptrattr.memoryType == cudaMemoryTypeManaged +#endif + )) + return 0; +#endif + dst_addr = (uintptr_t)cuda_ptrattr.devicePointer; + *ret = 0; + } + else if (cuda_canMapHostMemory) + { + cudaError_t cures; + void *pDevice; + cures = cudaHostGetDevicePointer(&pDevice, (void*)(src_ptr+src_offset), 0); + if (STARPU_UNLIKELY(cures != cudaSuccess)) + { + STARPU_CUDA_REPORT_ERROR(cures); + } + dst_addr = (uintptr_t)pDevice; + *ret = 0; + } + else + { + dst_addr = (uintptr_t)NULL; + } + return dst_addr; +} + +static int _starpu_cuda_unmap_ram(uintptr_t src_ptr STARPU_ATTRIBUTE_UNUSED, size_t src_offset STARPU_ATTRIBUTE_UNUSED, unsigned src_node STARPU_ATTRIBUTE_UNUSED, + uintptr_t dst_ptr STARPU_ATTRIBUTE_UNUSED, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, + size_t size STARPU_ATTRIBUTE_UNUSED) +{ +#if defined(STARPU_HAVE_CUDA_CANMAPHOST) || defined(STARPU_HAVE_CUDA_UNIFIEDADDR) || defined(STARPU_HAVE_CUDA_MNGMEM) + /* TODO */ + return 0; +#else + return -EIO; +#endif +} + +static int _starpu_cuda_update_map(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size) +{ + (void) src; + (void) src_offset; + (void) src_node; + (void) dst; + (void) dst_offset; + (void) dst_node; + (void) size; + + /* CUDA mappings are coherent */ + /* FIXME: not necessarily, depends on board capabilities */ + return 0; +} + +#endif /* STARPU_USE_CUDA_MAP */ + +#endif /* STARPU_USE_CUDA */ + +static int _starpu_cuda_is_direct_access_supported(unsigned node, unsigned handling_node) +{ + /* GPUs not always allow direct remote access: if CUDA4 + * is enabled, we allow two CUDA devices to communicate. */ +#ifdef STARPU_SIMGRID + (void) node; + if (starpu_node_get_kind(handling_node) == STARPU_CUDA_RAM) + { + starpu_sg_host_t host = _starpu_simgrid_get_memnode_host(handling_node); +# ifdef STARPU_HAVE_SIMGRID_ACTOR_H + const char* cuda_memcpy_peer = sg_host_get_property_value(host, "memcpy_peer"); +# else + const char* cuda_memcpy_peer = MSG_host_get_property_value(host, "memcpy_peer"); +# endif + return cuda_memcpy_peer && atoll(cuda_memcpy_peer); + } + else + return 0; +#elif defined(STARPU_HAVE_CUDA_MEMCPY_PEER) + (void) node; + enum starpu_node_kind kind = starpu_node_get_kind(handling_node); + return kind == STARPU_CUDA_RAM; +#else /* STARPU_HAVE_CUDA_MEMCPY_PEER */ + /* Direct GPU-GPU transfers are not allowed in general */ + (void) node; + (void) handling_node; + return 0; +#endif /* STARPU_HAVE_CUDA_MEMCPY_PEER */ +} + +static void start_job_on_cuda(struct _starpu_job *j, struct _starpu_worker *worker, unsigned char pipeline_idx STARPU_ATTRIBUTE_UNUSED) +{ + STARPU_ASSERT(j); + struct starpu_task *task = j->task; + + int profiling = starpu_profiling_status_get(); +#if !defined(STARPU_SIMGRID) && defined(STARPU_PROF_TOOL) + struct starpu_prof_tool_info pi; +#endif + + STARPU_ASSERT(task); + struct starpu_codelet *cl = task->cl; + STARPU_ASSERT(cl); + + _starpu_set_local_worker_key(worker); + _starpu_set_current_task(task); + j->workerid = worker->workerid; + + if (worker->ntasks == 1) + { + /* We are alone in the pipeline, the kernel will start now, record it */ + _starpu_driver_start_job(worker, j, &worker->perf_arch, 0, profiling); + } + +#if defined(STARPU_HAVE_CUDA_MEMCPY_PEER) && !defined(STARPU_SIMGRID) + /* We make sure we do manipulate the proper device */ + starpu_cuda_set_device(worker->devid); +#endif + + starpu_cuda_func_t func = _starpu_task_get_cuda_nth_implementation(cl, j->nimpl); + STARPU_ASSERT_MSG(func, "when STARPU_CUDA is defined in 'where', cuda_func or cuda_funcs has to be defined"); + + if (_starpu_get_disable_kernels() <= 0) + { + _STARPU_TRACE_START_EXECUTING(j); +#ifdef STARPU_SIMGRID + int async = task->cl->cuda_flags[j->nimpl] & STARPU_CUDA_ASYNC; + unsigned workerid = worker->workerid; + if (cl->flags & STARPU_CODELET_SIMGRID_EXECUTE && !async) + func(_STARPU_TASK_GET_INTERFACES(task), task->cl_arg); + else if (cl->flags & STARPU_CODELET_SIMGRID_EXECUTE_AND_INJECT && !async) + { + _SIMGRID_TIMER_BEGIN(1); + func(_STARPU_TASK_GET_INTERFACES(task), task->cl_arg); + _SIMGRID_TIMER_END; + } + else + { + struct _starpu_sched_ctx *sched_ctx = _starpu_sched_ctx_get_sched_ctx_for_worker_and_job(worker, j); + _starpu_simgrid_submit_job(workerid, sched_ctx->id, j, &worker->perf_arch, NAN, NAN, + async ? &task_finished[workerid][pipeline_idx] : NULL); + } +#else +#ifdef HAVE_NVMLDEVICEGETTOTALENERGYCONSUMPTION + unsigned long long energy_start = 0; + nvmlReturn_t nvmlRet = -1; + if (profiling && _starpu_energy_profiling && task->profiling_info && _starpu_nvmlDeviceGetTotalEnergyConsumption) + { + nvmlRet = _starpu_nvmlDeviceGetTotalEnergyConsumption(nvmlDev[worker->devid], &energy_start); + if (nvmlRet == NVML_SUCCESS) + task->profiling_info->energy_consumed = energy_start / 1000.; + } +#endif + +#ifdef STARPU_PROF_TOOL + pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_start_gpu_exec, worker->devid, worker->workerid, starpu_prof_tool_driver_gpu, -1, (void*)func); + starpu_prof_tool_callbacks.starpu_prof_tool_event_start_gpu_exec(&pi, NULL, NULL); +#endif + + func(_STARPU_TASK_GET_INTERFACES(task), task->cl_arg); + +#ifdef STARPU_PROF_TOOL + pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_end_gpu_exec, worker->devid, worker->workerid, starpu_prof_tool_driver_gpu, -1, (void*)func); + starpu_prof_tool_callbacks.starpu_prof_tool_event_end_gpu_exec(&pi, NULL, NULL); +#endif + +#endif + _STARPU_TRACE_END_EXECUTING(j); + } +} + +static void finish_job_on_cuda(struct _starpu_job *j, struct _starpu_worker *worker); + +/* Execute a job, up to completion for synchronous jobs */ +static void execute_job_on_cuda(struct starpu_task *task, struct _starpu_worker *worker) +{ + int workerid = worker->workerid; + + struct _starpu_job *j = _starpu_get_job_associated_to_task(task); + + unsigned char pipeline_idx = (worker->first_task + worker->ntasks - 1)%STARPU_MAX_PIPELINE; + + start_job_on_cuda(j, worker, pipeline_idx); + +#ifndef STARPU_SIMGRID + if (!used_stream[workerid]) + { + used_stream[workerid] = 1; + _STARPU_DISP("Warning: starpu_cuda_get_local_stream() was not used to submit kernel to CUDA on worker %d. CUDA will thus introduce a lot of useless synchronizations, which will prevent proper overlapping of data transfers and kernel execution. See the CUDA-specific part of the 'Check List When Performance Are Not There' of the StarPU handbook\n", workerid); + } +#endif + + if (task->cl->cuda_flags[j->nimpl] & STARPU_CUDA_ASYNC) + { + if (worker->pipeline_length == 0) + { +#ifdef STARPU_SIMGRID + _starpu_simgrid_wait_tasks(workerid); +#else + /* Forced synchronous execution */ + cudaStreamSynchronize(starpu_cuda_get_local_stream()); +#endif + finish_job_on_cuda(j, worker); + } + else + { +#ifndef STARPU_SIMGRID + /* Record event to synchronize with task termination later */ + cudaError_t cures = cudaEventRecord(task_events[workerid][pipeline_idx], starpu_cuda_get_local_stream()); + if (STARPU_UNLIKELY(cures)) + STARPU_CUDA_REPORT_ERROR(cures); +#endif + } + } + else + /* Synchronous execution */ + { +#if !defined(STARPU_SIMGRID) + STARPU_ASSERT_MSG(cudaStreamQuery(starpu_cuda_get_local_stream()) == cudaSuccess, "Unless when using the STARPU_CUDA_ASYNC flag, CUDA codelets have to wait for termination of their kernels on the starpu_cuda_get_local_stream() stream"); +#endif + finish_job_on_cuda(j, worker); + } +} + +static void finish_job_on_cuda(struct _starpu_job *j, struct _starpu_worker *worker) +{ + int profiling = starpu_profiling_status_get(); + + +#ifdef HAVE_NVMLDEVICEGETTOTALENERGYCONSUMPTION + if (profiling && _starpu_energy_profiling && j->task->profiling_info && j->task->profiling_info->energy_consumed && _starpu_nvmlDeviceGetTotalEnergyConsumption) + { + unsigned long long energy_end; + nvmlReturn_t nvmlRet; + nvmlRet = _starpu_nvmlDeviceGetTotalEnergyConsumption(nvmlDev[worker->devid], &energy_end); +#ifdef STARPU_DEVEL +#warning TODO: measure idle consumption to subtract it +#endif + if (nvmlRet == NVML_SUCCESS) + j->task->profiling_info->energy_consumed = + (energy_end / 1000. - j->task->profiling_info->energy_consumed); + } +#endif + if (worker->pipeline_length) + worker->current_tasks[worker->first_task] = NULL; + else + worker->current_task = NULL; + worker->first_task = (worker->first_task + 1) % STARPU_MAX_PIPELINE; + worker->ntasks--; + + _starpu_driver_end_job(worker, j, &worker->perf_arch, 0, profiling); + + struct _starpu_sched_ctx *sched_ctx = _starpu_sched_ctx_get_sched_ctx_for_worker_and_job(worker, j); + if(!sched_ctx) + sched_ctx = _starpu_get_sched_ctx_struct(j->task->sched_ctx); + + if(!sched_ctx->sched_policy) + _starpu_driver_update_job_feedback(j, worker, &sched_ctx->perf_arch, profiling); + else + _starpu_driver_update_job_feedback(j, worker, &worker->perf_arch, profiling); + + _starpu_push_task_output(j); + + _starpu_set_current_task(NULL); + + _starpu_handle_job_termination(j); +} + +/* One iteration of the main driver loop */ +static int _starpu_cuda_driver_run_once(struct _starpu_worker *worker) +{ + struct _starpu_worker_set *worker_set = worker->set; + struct _starpu_worker *worker0 = &worker_set->workers[0]; + struct starpu_task *tasks[worker_set->nworkers], *task; + struct _starpu_job *j; +#ifdef STARPU_PROF_TOOL + struct starpu_prof_tool_info pi; +#endif + int i, res; + + int idle_tasks, idle_transfers; + +#ifdef STARPU_SIMGRID + starpu_pthread_wait_reset(&worker0->wait); +#endif + _starpu_set_local_worker_key(worker0); + + /* First poll for completed jobs */ + idle_tasks = 0; + idle_transfers = 0; + for (i = 0; i < (int) worker_set->nworkers; i++) + { + worker = &worker_set->workers[i]; + int workerid = worker->workerid; + unsigned memnode = worker->memory_node; + + if (!worker->ntasks) + idle_tasks++; + if (!worker->task_transferring) + idle_transfers++; + + if (!worker->ntasks && !worker->task_transferring) + { + /* Even nothing to test */ + continue; + } + + /* First test for transfers pending for next task */ + task = worker->task_transferring; + if (task && worker->nb_buffers_transferred == worker->nb_buffers_totransfer) + { + STARPU_RMB(); + _STARPU_TRACE_END_PROGRESS(memnode); +#ifdef STARPU_PROF_TOOL + pi = _starpu_prof_tool_get_info_d(starpu_prof_tool_event_end_transfer, workerid, workerid, starpu_prof_tool_driver_gpu, memnode, worker->nb_buffers_totransfer, worker->nb_buffers_transferred); + starpu_prof_tool_callbacks.starpu_prof_tool_event_end_transfer(&pi, NULL, NULL); +#endif + j = _starpu_get_job_associated_to_task(task); + + _starpu_set_local_worker_key(worker); + _starpu_fetch_task_input_tail(task, j, worker); + /* Reset it */ + worker->task_transferring = NULL; + + if (worker->ntasks > 1 && !(task->cl->cuda_flags[j->nimpl] & STARPU_CUDA_ASYNC)) + { + /* We have to execute a non-asynchronous task but we + * still have tasks in the pipeline... Record it to + * prevent more tasks from coming, and do it later */ + worker->pipeline_stuck = 1; + } + else + { + execute_job_on_cuda(task, worker); + } + _STARPU_TRACE_START_PROGRESS(memnode); +#ifdef STARPU_PROF_TOOL + pi = _starpu_prof_tool_get_info_d(starpu_prof_tool_event_start_transfer, worker->workerid, workerid, starpu_prof_tool_driver_gpu, memnode, worker->nb_buffers_totransfer, worker->nb_buffers_transferred); + starpu_prof_tool_callbacks.starpu_prof_tool_event_start_transfer(&pi, NULL, NULL); +#endif + } + + /* Then test for termination of queued tasks */ + if (!worker->ntasks) + /* No queued task */ + continue; + + if (worker->pipeline_length) + task = worker->current_tasks[worker->first_task]; + else + task = worker->current_task; + if (task == worker->task_transferring) + /* Next task is still pending transfer */ + continue; + + /* On-going asynchronous task, check for its termination first */ +#ifdef STARPU_SIMGRID + if (task_finished[workerid][worker->first_task]) +#else /* !STARPU_SIMGRID */ + cudaError_t cures = cudaEventQuery(task_events[workerid][worker->first_task]); + + if (cures != cudaSuccess) + { + STARPU_ASSERT_MSG(cures == cudaErrorNotReady, "CUDA error on task %p, codelet %p (%s): %s (%d)", task, task->cl, _starpu_codelet_get_model_name(task->cl), cudaGetErrorString(cures), cures); + } + else +#endif /* !STARPU_SIMGRID */ + { +#ifdef STARPU_PROF_TOOL + pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_end_transfer, workerid, workerid, starpu_prof_tool_driver_gpu, memnode, NULL); + starpu_prof_tool_callbacks.starpu_prof_tool_event_end_transfer(&pi, NULL, NULL); +#endif + _STARPU_TRACE_END_PROGRESS(memnode); + /* Asynchronous task completed! */ + _starpu_set_local_worker_key(worker); + finish_job_on_cuda(_starpu_get_job_associated_to_task(task), worker); + /* See next task if any */ + if (worker->ntasks) + { + if (worker->current_tasks[worker->first_task] != worker->task_transferring) + { + task = worker->current_tasks[worker->first_task]; + j = _starpu_get_job_associated_to_task(task); + if (task->cl->cuda_flags[j->nimpl] & STARPU_CUDA_ASYNC) + { + /* An asynchronous task, it was already + * queued, it's now running, record its start time. */ + _starpu_driver_start_job(worker, j, &worker->perf_arch, 0, starpu_profiling_status_get()); + } + else + { + /* A synchronous task, we have finished + * flushing the pipeline, we can now at + * last execute it. */ + + _STARPU_TRACE_EVENT("sync_task"); + execute_job_on_cuda(task, worker); + _STARPU_TRACE_EVENT("end_sync_task"); + worker->pipeline_stuck = 0; + } + } + else + /* Data for next task didn't have time to finish transferring :/ */ + _STARPU_TRACE_WORKER_START_FETCH_INPUT(NULL, workerid); + } + _STARPU_TRACE_START_PROGRESS(memnode); +#ifdef STARPU_PROF_TOOL + pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_start_transfer, worker->workerid, worker->workerid, starpu_prof_tool_driver_gpu, memnode, NULL); + starpu_prof_tool_callbacks.starpu_prof_tool_event_start_transfer(&pi, NULL, NULL); +#endif + + } + + if (!worker->pipeline_length || worker->ntasks < worker->pipeline_length) + idle_tasks++; + } + +#if defined(STARPU_NON_BLOCKING_DRIVERS) && !defined(STARPU_SIMGRID) + if (!idle_tasks) + { + /* No task ready yet, no better thing to do than waiting */ + __starpu_datawizard_progress(_STARPU_DATAWIZARD_DO_ALLOC, !idle_transfers); + return 0; + } +#endif + + /* Something done, make some progress */ + res = __starpu_datawizard_progress(_STARPU_DATAWIZARD_DO_ALLOC, 1); + + /* And pull tasks */ + res |= _starpu_get_multi_worker_task(worker_set->workers, tasks, worker_set->nworkers, worker0->memory_node); + +#ifdef STARPU_SIMGRID + if (!res) + starpu_pthread_wait_wait(&worker0->wait); +#endif + + for (i = 0; i < (int) worker_set->nworkers; i++) + { + worker = &worker_set->workers[i]; + unsigned memnode STARPU_ATTRIBUTE_UNUSED = worker->memory_node; + + task = tasks[i]; + if (!task) + continue; + + + j = _starpu_get_job_associated_to_task(task); + + /* can CUDA do that task ? */ + if (!_STARPU_MAY_PERFORM(j, CUDA)) + { + /* this is neither a cuda or a cublas task */ + _starpu_worker_refuse_task(worker, task); + continue; + } + + /* Fetch data asynchronously */ +#ifdef STARPU_PROF_TOOL + pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_end_transfer, worker->workerid, worker->workerid, starpu_prof_tool_driver_gpu, memnode, NULL); + starpu_prof_tool_callbacks.starpu_prof_tool_event_end_transfer(&pi, NULL, NULL); +#endif + _STARPU_TRACE_END_PROGRESS(memnode); + _starpu_set_local_worker_key(worker); + res = _starpu_fetch_task_input(task, j, 1); + STARPU_ASSERT(res == 0); + _STARPU_TRACE_START_PROGRESS(memnode); +#ifdef STARPU_PROF_TOOL + pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_start_transfer, worker->workerid, worker->workerid, starpu_prof_tool_driver_gpu, memnode, NULL); + starpu_prof_tool_callbacks.starpu_prof_tool_event_start_transfer(&pi, NULL, NULL); +#endif + // _STARPU_TRACE_END_PROGRESS(memnode); + } + + return 0; +} + +void *_starpu_cuda_worker(void *_arg) +{ + struct _starpu_worker *worker = _arg; + struct _starpu_worker_set* worker_set = worker->set; +#ifdef STARPU_PROF_TOOL + struct starpu_prof_tool_info pi; +#endif + unsigned i; + + _starpu_cuda_driver_init(worker); + for (i = 0; i < worker_set->nworkers; i++) + { +#ifdef STARPU_PROF_TOOL + pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_start_transfer, worker_set->workers[i].workerid, worker_set->workers[i].workerid, starpu_prof_tool_driver_gpu, worker_set->workers[i].memory_node, NULL); + starpu_prof_tool_callbacks.starpu_prof_tool_event_start_transfer(&pi, NULL, NULL); +#endif + _STARPU_TRACE_START_PROGRESS(worker_set->workers[i].memory_node); + } + while (_starpu_machine_is_running()) + { + _starpu_may_pause(); + _starpu_cuda_driver_run_once(worker); + } + for (i = 0; i < worker_set->nworkers; i++) + { + _STARPU_TRACE_END_PROGRESS(worker_set->workers[i].memory_node); +#ifdef STARPU_PROF_TOOL + pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_end_transfer, worker_set->workers[i].workerid, worker_set->workers[i].workerid, starpu_prof_tool_driver_gpu, worker_set->workers[i].memory_node, NULL); + starpu_prof_tool_callbacks.starpu_prof_tool_event_end_transfer(&pi, NULL, NULL); +#endif + + } + _starpu_cuda_driver_deinit(worker); + + return NULL; +} + + +#ifdef STARPU_HAVE_HWLOC +hwloc_obj_t _starpu_cuda_get_hwloc_obj(hwloc_topology_t topology, int devid) +{ +#if !defined(STARPU_SIMGRID) && HAVE_DECL_HWLOC_CUDA_GET_DEVICE_OSDEV_BY_INDEX + return hwloc_cuda_get_device_osdev_by_index(topology, devid); +#else + return NULL; +#endif +} +#endif + +#ifdef STARPU_USE_CUDA +void starpu_cublas_report_error(const char *func, const char *file, int line, int status) +{ + char *errormsg; + switch (status) + { + case CUBLAS_STATUS_SUCCESS: + errormsg = "success"; + break; + case CUBLAS_STATUS_NOT_INITIALIZED: + errormsg = "not initialized"; + break; + case CUBLAS_STATUS_ALLOC_FAILED: + errormsg = "alloc failed"; + break; + case CUBLAS_STATUS_INVALID_VALUE: + errormsg = "invalid value"; + break; + case CUBLAS_STATUS_ARCH_MISMATCH: + errormsg = "arch mismatch"; + break; + case CUBLAS_STATUS_EXECUTION_FAILED: + errormsg = "execution failed"; + break; + case CUBLAS_STATUS_INTERNAL_ERROR: + errormsg = "internal error"; + break; + default: + errormsg = "unknown error"; + break; + } + _STARPU_MSG("oops in %s (%s:%d)... %d: %s \n", func, file, line, status, errormsg); + STARPU_ABORT(); +} + +void starpu_cuda_report_error(const char *func, const char *file, int line, cudaError_t status) +{ + const char *errormsg = cudaGetErrorString(status); + _STARPU_ERROR("oops in %s (%s:%d)... %d: %s \n", func, file, line, status, errormsg); +} + +#ifdef STARPU_HAVE_LIBCUSOLVER +void starpu_cusolver_report_error(const char *func, const char *file, int line, cusolverStatus_t status) +{ +#define REPORT(error) case error: errormsg = #error; break; + char *errormsg; + switch (status) + { + REPORT(CUSOLVER_STATUS_SUCCESS); + REPORT(CUSOLVER_STATUS_NOT_INITIALIZED); + REPORT(CUSOLVER_STATUS_ALLOC_FAILED); + REPORT(CUSOLVER_STATUS_INVALID_VALUE); + REPORT(CUSOLVER_STATUS_ARCH_MISMATCH); + REPORT(CUSOLVER_STATUS_MAPPING_ERROR); + REPORT(CUSOLVER_STATUS_EXECUTION_FAILED); + REPORT(CUSOLVER_STATUS_INTERNAL_ERROR); + REPORT(CUSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED); + REPORT(CUSOLVER_STATUS_NOT_SUPPORTED); + REPORT(CUSOLVER_STATUS_ZERO_PIVOT); + REPORT(CUSOLVER_STATUS_INVALID_LICENSE); + +#if defined(CUSOLVER_VER_MAJOR) && (CUSOLVER_VER_MAJOR >= 11) + REPORT(CUSOLVER_STATUS_IRS_PARAMS_NOT_INITIALIZED); + REPORT(CUSOLVER_STATUS_IRS_PARAMS_INVALID); + REPORT(CUSOLVER_STATUS_IRS_PARAMS_INVALID_PREC); + REPORT(CUSOLVER_STATUS_IRS_PARAMS_INVALID_REFINE); + REPORT(CUSOLVER_STATUS_IRS_PARAMS_INVALID_MAXITER); + REPORT(CUSOLVER_STATUS_IRS_INTERNAL_ERROR); + REPORT(CUSOLVER_STATUS_IRS_NOT_SUPPORTED); + REPORT(CUSOLVER_STATUS_IRS_OUT_OF_RANGE); + REPORT(CUSOLVER_STATUS_IRS_NRHS_NOT_SUPPORTED_FOR_REFINE_GMRES); + REPORT(CUSOLVER_STATUS_IRS_INFOS_NOT_INITIALIZED); + REPORT(CUSOLVER_STATUS_IRS_INFOS_NOT_DESTROYED); + REPORT(CUSOLVER_STATUS_IRS_MATRIX_SINGULAR); + REPORT(CUSOLVER_STATUS_INVALID_WORKSPACE); +#endif + default: + errormsg = "unknown error"; + break; + } + _STARPU_MSG("oops in %s (%s:%d)... %d: %s \n", func, file, line, status, errormsg); + STARPU_ABORT(); +} +#endif + +#endif /* STARPU_USE_CUDA */ + +static int _starpu_cuda_run_from_worker(struct _starpu_worker *worker) +{ + /* Let's go ! */ + _starpu_cuda_worker(worker); + + return 0; +} + +static int _starpu_cuda_driver_set_devid(struct starpu_driver *driver, struct _starpu_worker *worker) +{ + driver->id.cuda_id = worker->devid; + return 0; +} + +static int _starpu_cuda_driver_is_devid(struct starpu_driver *driver, struct _starpu_worker *worker) +{ + return driver->id.cuda_id == worker->devid; +} + +struct _starpu_driver_ops _starpu_driver_cuda_ops = +{ + .init = _starpu_cuda_driver_init, + .run = _starpu_cuda_run_from_worker, + .run_once = _starpu_cuda_driver_run_once, + .deinit = _starpu_cuda_driver_deinit, + .set_devid = _starpu_cuda_driver_set_devid, + .is_devid = _starpu_cuda_driver_is_devid, +}; + +struct _starpu_node_ops _starpu_driver_cuda_node_ops = +{ + .name = "cuda driver", + .malloc_on_node = _starpu_cuda_malloc_on_node, + .free_on_node = _starpu_cuda_free_on_node, + + .is_direct_access_supported = _starpu_cuda_is_direct_access_supported, + +#ifndef STARPU_SIMGRID + .copy_interface_to[STARPU_CPU_RAM] = _starpu_cuda_copy_interface_from_cuda_to_cpu, + .copy_interface_to[STARPU_CUDA_RAM] = _starpu_cuda_copy_interface_from_cuda_to_cuda, + + .copy_interface_from[STARPU_CPU_RAM] = _starpu_cuda_copy_interface_from_cpu_to_cuda, + .copy_interface_from[STARPU_CUDA_RAM] = _starpu_cuda_copy_interface_from_cuda_to_cuda, + + .copy_data_to[STARPU_CPU_RAM] = _starpu_cuda_copy_data_from_cuda_to_cpu, + .copy_data_to[STARPU_CUDA_RAM] = _starpu_cuda_copy_data_from_cuda_to_cuda, + + .copy_data_from[STARPU_CPU_RAM] = _starpu_cuda_copy_data_from_cpu_to_cuda, + .copy_data_from[STARPU_CUDA_RAM] = _starpu_cuda_copy_data_from_cuda_to_cuda, + + .copy2d_data_to[STARPU_CPU_RAM] = _starpu_cuda_copy2d_data_from_cuda_to_cpu, + .copy2d_data_to[STARPU_CUDA_RAM] = _starpu_cuda_copy2d_data_from_cuda_to_cuda, + + .copy2d_data_from[STARPU_CPU_RAM] = _starpu_cuda_copy2d_data_from_cpu_to_cuda, + .copy2d_data_from[STARPU_CUDA_RAM] = _starpu_cuda_copy2d_data_from_cuda_to_cuda, + +#ifdef STARPU_USE_CUDA_MAP + .map[STARPU_CPU_RAM] = _starpu_cuda_map_ram, + .unmap[STARPU_CPU_RAM] = _starpu_cuda_unmap_ram, + .update_map[STARPU_CPU_RAM] = _starpu_cuda_update_map, +#endif + + .wait_request_completion = _starpu_cuda_wait_request_completion, + .test_request_completion = _starpu_cuda_test_request_completion, +#endif +}; diff --git a/src/drivers/cuda/driver_cuda.h b/src/drivers/cuda/driver_cuda.h new file mode 100644 index 0000000..4371b2b --- /dev/null +++ b/src/drivers/cuda/driver_cuda.h @@ -0,0 +1,87 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2015-2015 Mathieu Lirzin + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __DRIVER_CUDA_H__ +#define __DRIVER_CUDA_H__ + +/** @file */ + +#include + +void _starpu_cuda_preinit(void); + +#ifdef STARPU_USE_CUDA +#include +#include +#ifdef STARPU_HAVE_NVML_H +#include +#endif +#endif + +#include +#include +#include + +#pragma GCC visibility push(hidden) + +extern struct _starpu_driver_ops _starpu_driver_cuda_ops; +extern struct _starpu_node_ops _starpu_driver_cuda_node_ops; + +extern int _starpu_nworker_per_cuda; + +void _starpu_cuda_init(void); +unsigned _starpu_get_cuda_device_count(void); +#ifdef STARPU_HAVE_HWLOC +struct _starpu_machine_topology; +hwloc_obj_t _starpu_cuda_get_hwloc_obj(hwloc_topology_t topology, int devid); +#endif +extern int _starpu_cuda_bus_ids[STARPU_MAXCUDADEVS+STARPU_MAXNUMANODES][STARPU_MAXCUDADEVS+STARPU_MAXNUMANODES]; + +#if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID) +void _starpu_cuda_discover_devices (struct _starpu_machine_config *); +void _starpu_init_cuda_config(struct _starpu_machine_topology *topology, struct _starpu_machine_config *); +void _starpu_cuda_init_worker_binding(struct _starpu_machine_config *config, int no_mp_config, struct _starpu_worker *workerarg); +void _starpu_cuda_init_worker_memory(struct _starpu_machine_config *config, int no_mp_config, struct _starpu_worker *workerarg); +void _starpu_init_cuda(void); +void _starpu_init_cublas_v2_func(void); +void _starpu_shutdown_cublas_v2_func(void); +void _starpu_cublas_v2_init(void); +void _starpu_cublas_v2_shutdown(void); +void *_starpu_cuda_worker(void *); +#ifdef STARPU_HAVE_NVML_H +nvmlDevice_t _starpu_cuda_get_nvmldev(struct cudaDeviceProp *props); +extern __typeof__(nvmlInit) *_starpu_nvmlInit; +extern __typeof__(nvmlDeviceGetNvLinkState) *_starpu_nvmlDeviceGetNvLinkState; +extern __typeof__(nvmlDeviceGetNvLinkRemotePciInfo) *_starpu_nvmlDeviceGetNvLinkRemotePciInfo; +extern __typeof__(nvmlDeviceGetHandleByIndex) *_starpu_nvmlDeviceGetHandleByIndex; +extern __typeof__(nvmlDeviceGetHandleByPciBusId) *_starpu_nvmlDeviceGetHandleByPciBusId; +extern __typeof__(nvmlDeviceGetIndex) *_starpu_nvmlDeviceGetIndex; +extern __typeof__(nvmlDeviceGetPciInfo) *_starpu_nvmlDeviceGetPciInfo; +extern __typeof__(nvmlDeviceGetUUID) *_starpu_nvmlDeviceGetUUID; +#if HAVE_DECL_NVMLDEVICEGETTOTALENERGYCONSUMPTION +extern __typeof__(nvmlDeviceGetTotalEnergyConsumption) *_starpu_nvmlDeviceGetTotalEnergyConsumption; +#endif +#endif + +#else +# define _starpu_cuda_discover_devices(config) ((void) config) +#endif + +#pragma GCC visibility pop + +#endif // __DRIVER_CUDA_H__ + diff --git a/src/drivers/cuda/driver_cuda0.c b/src/drivers/cuda/driver_cuda0.c new file mode 100644 index 0000000..1fdbbeb --- /dev/null +++ b/src/drivers/cuda/driver_cuda0.c @@ -0,0 +1,861 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2021-2021 Federal University of Rio Grande do Sul (UFRGS) + * Copyright (C) 2016-2016 Uppsala University + * Copyright (C) 2013-2013 Thibaut Lambert + * Copyright (C) 2011-2011 Télécom Sud Paris + * Copyright (C) 2010-2010 Mehdi Juhoor + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* This is a version of the CUDA driver with very minimal features: + * - synchronous kernel execution + * - synchronous data transfers + * + * This is not meant to be actually used :) + * + * It is only meant as a basic driver sample, easy to get inspired from for + * writing other drivers. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "driver_cuda.h" +#include +#include +#include +#include +#include +#include +#include +#ifdef STARPU_USE_CUDA +#include +#endif + +/* Consider a rough 10% overhead cost */ +#define FREE_MARGIN 0.9 + +static size_t global_mem[STARPU_MAXCUDADEVS]; +int _starpu_cuda_bus_ids[STARPU_MAXCUDADEVS+STARPU_MAXNUMANODES][STARPU_MAXCUDADEVS+STARPU_MAXNUMANODES]; +/* Note: streams are not thread-safe, so we define them for each CUDA worker + * emitting a GPU-GPU transfer */ +static struct cudaDeviceProp props[STARPU_MAXCUDADEVS]; + +static unsigned cuda_bindid_init[STARPU_MAXCUDADEVS]; +static unsigned cuda_bindid[STARPU_MAXCUDADEVS]; +static unsigned cuda_memory_init[STARPU_MAXCUDADEVS]; +static unsigned cuda_memory_nodes[STARPU_MAXCUDADEVS]; + +int _starpu_nworker_per_cuda = 1; + +static size_t _starpu_cuda_get_global_mem_size(unsigned devid) +{ + return global_mem[devid]; +} + +cudaStream_t starpu_cuda_get_local_stream(void) +{ + return NULL; +} + +const struct cudaDeviceProp *starpu_cuda_get_device_properties(unsigned workerid) +{ + struct _starpu_machine_config *config = _starpu_get_machine_config(); + unsigned devid = config->workers[workerid].devid; + return &props[devid]; +} + + +/* Early library initialization, before anything else, just initialize data */ +void _starpu_cuda_init(void) +{ + memset(&cuda_bindid_init, 0, sizeof(cuda_bindid_init)); + memset(&cuda_memory_init, 0, sizeof(cuda_memory_init)); +} + +/* Return the number of devices usable in the system. + * The value returned cannot be greater than MAXCUDADEVS */ + +static unsigned _starpu_get_cuda_device_count(void) +{ + int cnt; + cudaError_t cures; + cures = cudaGetDeviceCount(&cnt); + if (STARPU_UNLIKELY(cures)) + return 0; + + if (cnt > STARPU_MAXCUDADEVS) + { + _STARPU_MSG("# Warning: %d CUDA devices available. Only %d enabled. Use configure option --enable-maxcudadev=xxx to update the maximum value of supported CUDA devices.\n", cnt, STARPU_MAXCUDADEVS); + cnt = STARPU_MAXCUDADEVS; + } + return (unsigned)cnt; +} + +/* This is run from initialize to determine the number of CUDA devices */ +void _starpu_init_cuda(void) +{ +} + +/* This is called to return the real (non-clamped) number of devices */ +void _starpu_cuda_discover_devices(struct _starpu_machine_config *config) +{ + /* Discover the number of CUDA devices. Fill the result in CONFIG. */ + + int cnt; + cudaError_t cures; + + cures = cudaGetDeviceCount(&cnt); + if (STARPU_UNLIKELY(cures != cudaSuccess)) + cnt = 0; + config->topology.nhwdevices[STARPU_CUDA_WORKER] = cnt; +} + +static void _starpu_initialize_workers_cuda_gpuid(struct _starpu_machine_config *config) +{ + struct _starpu_machine_topology *topology = &config->topology; + struct starpu_conf *uconf = &config->conf; + + _starpu_initialize_workers_deviceid(uconf->use_explicit_workers_cuda_gpuid == 0 + ? NULL + : (int *)uconf->workers_cuda_gpuid, + &(config->current_devid[STARPU_CUDA_WORKER]), + (int *)topology->workers_devid[STARPU_CUDA_WORKER], + "STARPU_WORKERS_CUDAID", + topology->nhwdevices[STARPU_CUDA_WORKER], + STARPU_CUDA_WORKER); + _starpu_devices_drop_duplicate(topology->workers_devid[STARPU_CUDA_WORKER]); +} + +/* Determine which devices we will use */ +void _starpu_init_cuda_config(struct _starpu_machine_topology *topology, struct _starpu_machine_config *config) +{ + int ncuda = config->conf.ncuda; + + if (ncuda != 0) + { + /* The user did not disable CUDA. We need to + * initialize CUDA early to count the number of + * devices + */ + _starpu_init_cuda(); + int nb_devices = _starpu_get_cuda_device_count(); + + _starpu_topology_check_ndevices(&ncuda, nb_devices, 0, STARPU_MAXCUDADEVS, 0, "ncuda", "CUDA", "maxcudadev"); + } + + /* Now we know how many CUDA devices will be used */ + topology->ndevices[STARPU_CUDA_WORKER] = ncuda; + + _starpu_initialize_workers_cuda_gpuid(config); + + unsigned cudagpu; + for (cudagpu = 0; (int) cudagpu < ncuda; cudagpu++) + { + int devid = _starpu_get_next_devid(topology, config, STARPU_CUDA_WORKER); + + if (devid == -1) + { + // There is no more devices left + topology->ndevices[STARPU_CUDA_WORKER] = cudagpu; + break; + } + + _starpu_topology_configure_workers(topology, config, + STARPU_CUDA_WORKER, + cudagpu, devid, 0, 0, + 1, 1, NULL, NULL); + } + + /* Don't copy this, just here for other code to work fine */ + topology->cuda_th_per_stream = 0; + topology->cuda_th_per_dev = 1; +} + +/* Bind the driver on a CPU core */ +void _starpu_cuda_init_worker_binding(struct _starpu_machine_config *config, int no_mp_config STARPU_ATTRIBUTE_UNUSED, struct _starpu_worker *workerarg) +{ + /* Perhaps the worker has some "favourite" bindings */ + unsigned *preferred_binding = NULL; + unsigned npreferred = 0; + unsigned devid = workerarg->devid; + + if (cuda_bindid_init[devid]) + { + workerarg->bindid = cuda_bindid[devid]; + } + else + { + cuda_bindid_init[devid] = 1; + + workerarg->bindid = cuda_bindid[devid] = _starpu_get_next_bindid(config, STARPU_THREAD_ACTIVE, preferred_binding, npreferred); + } +} + +/* Set up memory and buses */ +void _starpu_cuda_init_worker_memory(struct _starpu_machine_config *config, int no_mp_config STARPU_ATTRIBUTE_UNUSED, struct _starpu_worker *workerarg) +{ + unsigned memory_node = -1; + unsigned devid = workerarg->devid; + unsigned numa; + + if (cuda_memory_init[devid]) + { + memory_node = cuda_memory_nodes[devid]; + } + else + { + cuda_memory_init[devid] = 1; + + memory_node = cuda_memory_nodes[devid] = _starpu_memory_node_register(STARPU_CUDA_RAM, devid); + + for (numa = 0; numa < starpu_memory_nodes_get_numa_count(); numa++) + { + _starpu_cuda_bus_ids[numa][devid+STARPU_MAXNUMANODES] = _starpu_register_bus(numa, memory_node); + _starpu_cuda_bus_ids[devid+STARPU_MAXNUMANODES][numa] = _starpu_register_bus(memory_node, numa); + } + } + _starpu_memory_node_add_nworkers(memory_node); + + //This worker can also manage transfers on NUMA nodes + for (numa = 0; numa < starpu_memory_nodes_get_numa_count(); numa++) + _starpu_worker_drives_memory_node(workerarg, numa); + + _starpu_worker_drives_memory_node(workerarg, memory_node); + + workerarg->memory_node = memory_node; +} + +/* Set the current CUDA device */ +void starpu_cuda_set_device(unsigned devid STARPU_ATTRIBUTE_UNUSED) +{ + cudaError_t cures; + + cures = cudaSetDevice(devid); + + if (STARPU_UNLIKELY(cures)) + STARPU_CUDA_REPORT_ERROR(cures); +} + +static void _starpu_cuda_limit_gpu_mem_if_needed(unsigned devid) +{ + starpu_ssize_t limit; + size_t STARPU_ATTRIBUTE_UNUSED totalGlobalMem = 0; + size_t STARPU_ATTRIBUTE_UNUSED to_waste = 0; + + /* Find the size of the memory on the device */ + totalGlobalMem = props[devid].totalGlobalMem; + + limit = totalGlobalMem / (1024*1024) * FREE_MARGIN; + + global_mem[devid] = limit * 1024*1024; +} +/* Really initialize one device */ +static void init_device_context(unsigned devid, unsigned memnode) +{ + STARPU_ASSERT(devid < STARPU_MAXCUDADEVS); + + cudaError_t cures; + + starpu_cuda_set_device(devid); + + /* force CUDA to initialize the context for real */ + cures = cudaFree(0); + if (STARPU_UNLIKELY(cures)) + { + if (cures == cudaErrorDevicesUnavailable) + { + _STARPU_MSG("All CUDA-capable devices are busy or unavailable\n"); + exit(77); + } + STARPU_CUDA_REPORT_ERROR(cures); + } + + cures = cudaGetDeviceProperties(&props[devid], devid); + if (STARPU_UNLIKELY(cures)) + STARPU_CUDA_REPORT_ERROR(cures); + + _starpu_cuda_limit_gpu_mem_if_needed(devid); + _starpu_memory_manager_set_global_memory_size(memnode, _starpu_cuda_get_global_mem_size(devid)); +} + +/* De-initialize one device */ +static void deinit_device_context(unsigned devid STARPU_ATTRIBUTE_UNUSED) +{ +} + +/* This is run from the driver thread to initialize the driver CUDA context */ +static int _starpu_cuda_driver_init(struct _starpu_worker *worker) +{ + _starpu_driver_start(worker, STARPU_CUDA_WORKER, 0); + _starpu_set_local_worker_key(worker); + + unsigned devid = worker->devid; + unsigned memnode = worker->memory_node; + + init_device_context(devid, memnode); + + unsigned workerid = worker->workerid; + + float size = (float) global_mem[devid] / (1<<30); + /* get the device's name */ + char devname[64]; + strncpy(devname, props[devid].name, 63); + devname[63] = 0; + + snprintf(worker->name, sizeof(worker->name), "CUDA0 %u (%s %.1f GiB)", devid, devname, size); + snprintf(worker->short_name, sizeof(worker->short_name), "CUDA %u", devid); + _STARPU_DEBUG("cuda (%s) dev id %u thread is ready to run on CPU %d !\n", devname, devid, worker->bindid); + + _STARPU_TRACE_WORKER_INIT_END(workerid); + + { + char thread_name[16]; + snprintf(thread_name, sizeof(thread_name), "CUDA0 %u", worker->devid); + starpu_pthread_setname(thread_name); + } + + /* tell the main thread that this one is ready */ + STARPU_PTHREAD_MUTEX_LOCK(&worker->mutex); + worker->status = STATUS_UNKNOWN; + worker->worker_is_initialized = 1; + STARPU_PTHREAD_COND_SIGNAL(&worker->ready_cond); + STARPU_PTHREAD_MUTEX_UNLOCK(&worker->mutex); + + return 0; +} + +static int _starpu_cuda_driver_deinit(struct _starpu_worker *worker) +{ + _STARPU_TRACE_WORKER_DEINIT_START; + + unsigned devid = worker->devid; + unsigned memnode = worker->memory_node; + + /* I'm last, deinitialize device */ + _starpu_datawizard_handle_all_pending_node_data_requests(memnode); + + /* In case there remains some memory that was automatically + * allocated by StarPU, we release it now. Note that data + * coherency is not maintained anymore at that point ! */ + _starpu_free_all_automatically_allocated_buffers(memnode); + + _starpu_malloc_shutdown(memnode); + + deinit_device_context(devid); + + worker->worker_is_initialized = 0; + _STARPU_TRACE_WORKER_DEINIT_END(STARPU_CUDA_WORKER); + + return 0; +} + +static uintptr_t _starpu_cuda_malloc_on_node(unsigned dst_node, size_t size, int flags) +{ + uintptr_t addr = 0; + (void) flags; + + unsigned devid = starpu_memory_node_get_devid(dst_node); + + starpu_cuda_set_device(devid); + + /* Check if there is free memory */ + size_t cuda_mem_free, cuda_mem_total; + cudaError_t status; + status = cudaMemGetInfo(&cuda_mem_free, &cuda_mem_total); + if (status == cudaSuccess && cuda_mem_free * FREE_MARGIN < size) + { + addr = 0; + } + else + { + status = cudaMalloc((void **)&addr, size); + if (!addr || (status != cudaSuccess)) + { + if (STARPU_UNLIKELY(status != cudaErrorMemoryAllocation)) + STARPU_CUDA_REPORT_ERROR(status); + addr = 0; + } + } + return addr; +} + +static void _starpu_cuda_free_on_node(unsigned dst_node, uintptr_t addr, size_t size, int flags) +{ + (void) dst_node; + (void) addr; + (void) size; + (void) flags; + + cudaError_t err; + unsigned devid = starpu_memory_node_get_devid(dst_node); + starpu_cuda_set_device(devid); + err = cudaFree((void*)addr); + if (STARPU_UNLIKELY(err != cudaSuccess)) + STARPU_CUDA_REPORT_ERROR(err); +} + +int starpu_cuda_copy_async_sync(void *src_ptr, unsigned src_node, + void *dst_ptr, unsigned dst_node, + size_t ssize, cudaStream_t stream STARPU_ATTRIBUTE_UNUSED, + enum cudaMemcpyKind kind) +{ + cudaError_t cures = 0; + + if (kind == cudaMemcpyDeviceToDevice && src_node != dst_node) + { + STARPU_ABORT(); + } + + /* do it in a synchronous fashion */ + cures = cudaMemcpy((char *)dst_ptr, (char *)src_ptr, ssize, kind); + (void) cudaGetLastError(); + + if (!cures) + cures = cudaDeviceSynchronize(); + if (STARPU_UNLIKELY(cures)) + STARPU_CUDA_REPORT_ERROR(cures); + + return 0; +} + +/* Driver porters: this is optional but really recommended */ +int +starpu_cuda_copy2d_async_sync(void *src_ptr, unsigned src_node, + void *dst_ptr, unsigned dst_node, + size_t blocksize, + size_t numblocks, size_t ld_src, size_t ld_dst, + cudaStream_t stream STARPU_ATTRIBUTE_UNUSED, enum cudaMemcpyKind kind) +{ + cudaError_t cures = 0; + + if (kind == cudaMemcpyDeviceToDevice && src_node != dst_node) + { + STARPU_ABORT_MSG("CUDA memcpy 3D peer not available, but core triggered one ?!"); + } + + cures = cudaMemcpy2D((char *)dst_ptr, ld_dst, (char *)src_ptr, ld_src, + blocksize, numblocks, kind); + if (!cures) + cures = cudaDeviceSynchronize(); + if (STARPU_UNLIKELY(cures)) + STARPU_CUDA_REPORT_ERROR(cures); + + return 0; +} + +static int _starpu_cuda_copy_interface(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req) +{ + (void) req; + + int ret = 1; + const struct starpu_data_copy_methods *copy_methods = handle->ops->copy_methods; + + STARPU_ASSERT(copy_methods->any_to_any); + copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, NULL); + return ret; +} + +static int _starpu_cuda_copy_data_from_cuda_to_cpu(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel STARPU_ATTRIBUTE_UNUSED) +{ + int src_kind = starpu_node_get_kind(src_node); + int dst_kind = starpu_node_get_kind(dst_node); + + STARPU_ASSERT(src_kind == STARPU_CUDA_RAM && dst_kind == STARPU_CPU_RAM); + + return starpu_cuda_copy_async_sync((void*) (src + src_offset), src_node, + (void*) (dst + dst_offset), dst_node, + size, + NULL, + cudaMemcpyDeviceToHost); +} + +static int _starpu_cuda_copy_data_from_cuda_to_cuda(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel STARPU_ATTRIBUTE_UNUSED) +{ + int src_kind = starpu_node_get_kind(src_node); + int dst_kind = starpu_node_get_kind(dst_node); + + STARPU_ASSERT(src_kind == STARPU_CUDA_RAM && dst_kind == STARPU_CUDA_RAM); +#ifndef STARPU_HAVE_CUDA_MEMCPY_PEER + STARPU_ASSERT(src_node == dst_node); +#endif + + return starpu_cuda_copy_async_sync((void*) (src + src_offset), src_node, + (void*) (dst + dst_offset), dst_node, + size, + NULL, + cudaMemcpyDeviceToDevice); +} + +static int _starpu_cuda_copy_data_from_cpu_to_cuda(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel STARPU_ATTRIBUTE_UNUSED) +{ + int src_kind = starpu_node_get_kind(src_node); + int dst_kind = starpu_node_get_kind(dst_node); + + STARPU_ASSERT(src_kind == STARPU_CPU_RAM && dst_kind == STARPU_CUDA_RAM); + + return starpu_cuda_copy_async_sync((void*) (src + src_offset), src_node, + (void*) (dst + dst_offset), dst_node, + size, + NULL, + cudaMemcpyHostToDevice); +} + +/* Driver porters: these are optional but really recommended */ +static int _starpu_cuda_copy2d_data_from_cuda_to_cpu(uintptr_t src, size_t src_offset, unsigned src_node, + uintptr_t dst, size_t dst_offset, unsigned dst_node, + size_t blocksize, size_t numblocks, size_t ld_src, size_t ld_dst, + struct _starpu_async_channel *async_channel STARPU_ATTRIBUTE_UNUSED) +{ + int src_kind = starpu_node_get_kind(src_node); + int dst_kind = starpu_node_get_kind(dst_node); + + STARPU_ASSERT(src_kind == STARPU_CUDA_RAM && dst_kind == STARPU_CPU_RAM); + + return starpu_cuda_copy2d_async_sync((void*) (src + src_offset), src_node, + (void*) (dst + dst_offset), dst_node, + blocksize, numblocks, ld_src, ld_dst, + NULL, + cudaMemcpyDeviceToHost); +} + +static int _starpu_cuda_copy2d_data_from_cuda_to_cuda(uintptr_t src, size_t src_offset, unsigned src_node, + uintptr_t dst, size_t dst_offset, unsigned dst_node, + size_t blocksize, size_t numblocks, size_t ld_src, size_t ld_dst, + struct _starpu_async_channel *async_channel STARPU_ATTRIBUTE_UNUSED) +{ + int src_kind = starpu_node_get_kind(src_node); + int dst_kind = starpu_node_get_kind(dst_node); + + STARPU_ASSERT(src_kind == STARPU_CUDA_RAM && dst_kind == STARPU_CUDA_RAM); +#ifndef STARPU_HAVE_CUDA_MEMCPY_PEER + STARPU_ASSERT(src_node == dst_node); +#endif + + return starpu_cuda_copy2d_async_sync((void*) (src + src_offset), src_node, + (void*) (dst + dst_offset), dst_node, + blocksize, numblocks, ld_src, ld_dst, + NULL, + cudaMemcpyDeviceToDevice); +} + +static int _starpu_cuda_copy2d_data_from_cpu_to_cuda(uintptr_t src, size_t src_offset, unsigned src_node, + uintptr_t dst, size_t dst_offset, unsigned dst_node, + size_t blocksize, size_t numblocks, size_t ld_src, size_t ld_dst, + struct _starpu_async_channel *async_channel STARPU_ATTRIBUTE_UNUSED) +{ + int src_kind = starpu_node_get_kind(src_node); + int dst_kind = starpu_node_get_kind(dst_node); + + STARPU_ASSERT(src_kind == STARPU_CPU_RAM && dst_kind == STARPU_CUDA_RAM); + + return starpu_cuda_copy2d_async_sync((void*) (src + src_offset), src_node, + (void*) (dst + dst_offset), dst_node, + blocksize, numblocks, ld_src, ld_dst, + NULL, + cudaMemcpyHostToDevice); +} + +static int _starpu_cuda_is_direct_access_supported(unsigned node, unsigned handling_node) +{ + /* Direct GPU-GPU transfers are not allowed in general */ + (void) node; + (void) handling_node; + return 0; +} + +static int start_job_on_cuda(struct _starpu_job *j, struct _starpu_worker *worker) +{ + STARPU_ASSERT(j); + struct starpu_task *task = j->task; + + int profiling = starpu_profiling_status_get(); + + STARPU_ASSERT(task); + struct starpu_codelet *cl = task->cl; + STARPU_ASSERT(cl); + + _starpu_set_current_task(task); + j->workerid = worker->workerid; + + /* Fetch data input synchronously */ + int ret = _starpu_fetch_task_input(task, j, 0); + if (ret != 0) + { + /* there was not enough memory so the codelet cannot be executed right now ... */ + /* push the codelet back and try another one ... */ + return -EAGAIN; + } + + _starpu_driver_start_job(worker, j, &worker->perf_arch, 0, profiling); + + starpu_cuda_func_t func = _starpu_task_get_cuda_nth_implementation(cl, j->nimpl); + STARPU_ASSERT_MSG(func, "when STARPU_CUDA is defined in 'where', cuda_func or cuda_funcs has to be defined"); + + if (_starpu_get_disable_kernels() <= 0) + { + _STARPU_TRACE_START_EXECUTING(j); + func(_STARPU_TASK_GET_INTERFACES(task), task->cl_arg); + _STARPU_TRACE_END_EXECUTING(j); + } + + return 0; +} + +static void finish_job_on_cuda(struct _starpu_job *j, struct _starpu_worker *worker); + +/* Execute a job, up to completion for synchronous jobs */ +static int execute_job_on_cuda(struct starpu_task *task, struct _starpu_worker *worker) +{ + int res; + + struct _starpu_job *j = _starpu_get_job_associated_to_task(task); + + res = start_job_on_cuda(j, worker); + + if (res) + { + switch (res) + { + case -EAGAIN: + _STARPU_DISP("ouch, CUDA could not actually run task %p, putting it back...\n", task); + _starpu_push_task_to_workers(task); + return -EAGAIN; + default: + STARPU_ABORT(); + } + } + + finish_job_on_cuda(j, worker); + + return 0; +} + +static void finish_job_on_cuda(struct _starpu_job *j, struct _starpu_worker *worker) +{ + int profiling = starpu_profiling_status_get(); + + worker->current_task = NULL; + + _starpu_driver_end_job(worker, j, &worker->perf_arch, 0, profiling); + + _starpu_driver_update_job_feedback(j, worker, &worker->perf_arch, profiling); + + _starpu_push_task_output(j); + + _starpu_set_current_task(NULL); + + _starpu_handle_job_termination(j); +} + +/* One iteration of the main driver loop */ +static int _starpu_cuda_driver_run_once(struct _starpu_worker *worker) +{ + struct starpu_task *task; + struct _starpu_job *j; + int res; + + unsigned memnode = worker->memory_node; + + /* Make some progress */ + _starpu_datawizard_progress(1); + if (memnode != STARPU_MAIN_RAM) + { + _starpu_datawizard_progress(1); + } + + /* And pull a task */ + task = _starpu_get_worker_task(worker, worker->workerid, worker->memory_node); + + if (!task) + return 0; + + j = _starpu_get_job_associated_to_task(task); + + /* can CUDA do that task ? */ + if (!_STARPU_MAY_PERFORM(j, CUDA)) + { + /* this is neither a cuda or a cublas task */ + _starpu_worker_refuse_task(worker, task); + return 0; + } + + worker->current_task = task; + + res = execute_job_on_cuda(task, worker); + + if (res) + { + switch (res) + { + case -EAGAIN: + _starpu_push_task_to_workers(task); + return 0; + default: + STARPU_ABORT(); + } + } + + return 0; +} + +void *_starpu_cuda_worker(void *_arg) +{ + struct _starpu_worker *worker = _arg; + + _starpu_cuda_driver_init(worker); + while (_starpu_machine_is_running()) + { + _starpu_may_pause(); + _starpu_cuda_driver_run_once(worker); + } + _starpu_cuda_driver_deinit(worker); + + return NULL; +} + + +void starpu_cublas_report_error(const char *func, const char *file, int line, int status) +{ + char *errormsg; + switch (status) + { + case CUBLAS_STATUS_SUCCESS: + errormsg = "success"; + break; + case CUBLAS_STATUS_NOT_INITIALIZED: + errormsg = "not initialized"; + break; + case CUBLAS_STATUS_ALLOC_FAILED: + errormsg = "alloc failed"; + break; + case CUBLAS_STATUS_INVALID_VALUE: + errormsg = "invalid value"; + break; + case CUBLAS_STATUS_ARCH_MISMATCH: + errormsg = "arch mismatch"; + break; + case CUBLAS_STATUS_EXECUTION_FAILED: + errormsg = "execution failed"; + break; + case CUBLAS_STATUS_INTERNAL_ERROR: + errormsg = "internal error"; + break; + default: + errormsg = "unknown error"; + break; + } + _STARPU_MSG("oops in %s (%s:%d)... %d: %s \n", func, file, line, status, errormsg); + STARPU_ABORT(); +} + +void starpu_cuda_report_error(const char *func, const char *file, int line, cudaError_t status) +{ + const char *errormsg = cudaGetErrorString(status); + _STARPU_ERROR("oops in %s (%s:%d)... %d: %s \n", func, file, line, status, errormsg); +} + +#ifdef STARPU_HAVE_LIBCUSOLVER +void starpu_cusolver_report_error(const char *func, const char *file, int line, cusolverStatus_t status) +{ +#define REPORT(error) case error: errormsg = #error; break; + char *errormsg; + switch (status) + { + REPORT(CUSOLVER_STATUS_SUCCESS); + REPORT(CUSOLVER_STATUS_NOT_INITIALIZED); + REPORT(CUSOLVER_STATUS_ALLOC_FAILED); + REPORT(CUSOLVER_STATUS_INVALID_VALUE); + REPORT(CUSOLVER_STATUS_ARCH_MISMATCH); + REPORT(CUSOLVER_STATUS_MAPPING_ERROR); + REPORT(CUSOLVER_STATUS_EXECUTION_FAILED); + REPORT(CUSOLVER_STATUS_INTERNAL_ERROR); + REPORT(CUSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED); + REPORT(CUSOLVER_STATUS_NOT_SUPPORTED); + REPORT(CUSOLVER_STATUS_ZERO_PIVOT); + REPORT(CUSOLVER_STATUS_INVALID_LICENSE); + +#if defined(CUSOLVER_VER_MAJOR) && (CUSOLVER_VER_MAJOR >= 11) + REPORT(CUSOLVER_STATUS_IRS_PARAMS_NOT_INITIALIZED); + REPORT(CUSOLVER_STATUS_IRS_PARAMS_INVALID); + REPORT(CUSOLVER_STATUS_IRS_PARAMS_INVALID_PREC); + REPORT(CUSOLVER_STATUS_IRS_PARAMS_INVALID_REFINE); + REPORT(CUSOLVER_STATUS_IRS_PARAMS_INVALID_MAXITER); + REPORT(CUSOLVER_STATUS_IRS_INTERNAL_ERROR); + REPORT(CUSOLVER_STATUS_IRS_NOT_SUPPORTED); + REPORT(CUSOLVER_STATUS_IRS_OUT_OF_RANGE); + REPORT(CUSOLVER_STATUS_IRS_NRHS_NOT_SUPPORTED_FOR_REFINE_GMRES); + REPORT(CUSOLVER_STATUS_IRS_INFOS_NOT_INITIALIZED); + REPORT(CUSOLVER_STATUS_IRS_INFOS_NOT_DESTROYED); + REPORT(CUSOLVER_STATUS_IRS_MATRIX_SINGULAR); + REPORT(CUSOLVER_STATUS_INVALID_WORKSPACE); +#endif + default: + errormsg = "unknown error"; + break; + } + _STARPU_MSG("oops in %s (%s:%d)... %d: %s \n", func, file, line, status, errormsg); + STARPU_ABORT(); +} +#endif + +static int _starpu_cuda_run_from_worker(struct _starpu_worker *worker) +{ + /* Let's go ! */ + _starpu_cuda_worker(worker); + + return 0; +} + +struct _starpu_driver_ops _starpu_driver_cuda_ops = +{ + .init = _starpu_cuda_driver_init, + .run = _starpu_cuda_run_from_worker, + .run_once = _starpu_cuda_driver_run_once, + .deinit = _starpu_cuda_driver_deinit, +}; + +struct _starpu_node_ops _starpu_driver_cuda_node_ops = +{ + .name = "cuda0 driver", + .malloc_on_node = _starpu_cuda_malloc_on_node, + .free_on_node = _starpu_cuda_free_on_node, + + .is_direct_access_supported = _starpu_cuda_is_direct_access_supported, + + .copy_interface_to[STARPU_CPU_RAM] = _starpu_cuda_copy_interface, + .copy_interface_to[STARPU_CUDA_RAM] = _starpu_cuda_copy_interface, + + .copy_interface_from[STARPU_CPU_RAM] = _starpu_cuda_copy_interface, + .copy_interface_from[STARPU_CUDA_RAM] = _starpu_cuda_copy_interface, + + .copy_data_to[STARPU_CPU_RAM] = _starpu_cuda_copy_data_from_cuda_to_cpu, + .copy_data_to[STARPU_CUDA_RAM] = _starpu_cuda_copy_data_from_cuda_to_cuda, + + .copy_data_from[STARPU_CPU_RAM] = _starpu_cuda_copy_data_from_cpu_to_cuda, + .copy_data_from[STARPU_CUDA_RAM] = _starpu_cuda_copy_data_from_cuda_to_cuda, + + .copy2d_data_to[STARPU_CPU_RAM] = _starpu_cuda_copy2d_data_from_cuda_to_cpu, + .copy2d_data_to[STARPU_CUDA_RAM] = _starpu_cuda_copy2d_data_from_cuda_to_cuda, + + .copy2d_data_from[STARPU_CPU_RAM] = _starpu_cuda_copy2d_data_from_cpu_to_cuda, + .copy2d_data_from[STARPU_CUDA_RAM] = _starpu_cuda_copy2d_data_from_cuda_to_cuda, +}; diff --git a/src/drivers/cuda/driver_cuda1.c b/src/drivers/cuda/driver_cuda1.c new file mode 100644 index 0000000..53f07be --- /dev/null +++ b/src/drivers/cuda/driver_cuda1.c @@ -0,0 +1,1418 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2021-2021 Federal University of Rio Grande do Sul (UFRGS) + * Copyright (C) 2016-2016 Uppsala University + * Copyright (C) 2013-2013 Thibaut Lambert + * Copyright (C) 2011-2011 Télécom Sud Paris + * Copyright (C) 2010-2010 Mehdi Juhoor + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* This is a version of the CUDA driver with reduced features: + * - asynchronous kernel execution + * - asynchronous data transfers + * - peer2peer transfers + * + * This is not meant to be actually used :) + * + * It is only meant as a basic driver sample, easy to get inspired from for + * writing other drivers. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "driver_cuda.h" +#include +#include +#include +#include +#include +#include +#include + +#if HAVE_DECL_HWLOC_CUDA_GET_DEVICE_OSDEV_BY_INDEX +#include +#endif +#ifdef STARPU_USE_CUDA +#include +#endif + +#if CUDART_VERSION >= 5000 +/* Avoid letting our streams spuriously synchronize with the NULL stream */ +#define starpu_cudaStreamCreate(stream) cudaStreamCreateWithFlags(stream, cudaStreamNonBlocking) +#else +#define starpu_cudaStreamCreate(stream) cudaStreamCreate(stream) +#endif + +/* Consider a rough 10% overhead cost */ +#define FREE_MARGIN 0.9 + +static size_t global_mem[STARPU_MAXCUDADEVS]; +int _starpu_cuda_bus_ids[STARPU_MAXCUDADEVS+STARPU_MAXNUMANODES][STARPU_MAXCUDADEVS+STARPU_MAXNUMANODES]; +static cudaStream_t streams[STARPU_NMAXWORKERS]; +static char used_stream[STARPU_NMAXWORKERS]; +static cudaStream_t out_transfer_streams[STARPU_MAXCUDADEVS]; +static cudaStream_t in_transfer_streams[STARPU_MAXCUDADEVS]; +/* Note: streams are not thread-safe, so we define them for each CUDA worker + * emitting a GPU-GPU transfer */ +static cudaStream_t in_peer_transfer_streams[STARPU_MAXCUDADEVS][STARPU_MAXCUDADEVS]; +static struct cudaDeviceProp props[STARPU_MAXCUDADEVS]; +static cudaEvent_t task_events[STARPU_NMAXWORKERS]; + +static unsigned cuda_bindid_init[STARPU_MAXCUDADEVS]; +static unsigned cuda_bindid[STARPU_MAXCUDADEVS]; +static unsigned cuda_memory_init[STARPU_MAXCUDADEVS]; +static unsigned cuda_memory_nodes[STARPU_MAXCUDADEVS]; + +int _starpu_nworker_per_cuda = 1; + +static size_t _starpu_cuda_get_global_mem_size(unsigned devid) +{ + return global_mem[devid]; +} + +cudaStream_t starpu_cuda_get_local_in_transfer_stream() +{ + int worker = starpu_worker_get_id_check(); + int devid = starpu_worker_get_devid(worker); + cudaStream_t stream; + + stream = in_transfer_streams[devid]; + STARPU_ASSERT(stream); + return stream; +} + +cudaStream_t starpu_cuda_get_in_transfer_stream(unsigned dst_node) +{ + int dst_devid = starpu_memory_node_get_devid(dst_node); + cudaStream_t stream; + + stream = in_transfer_streams[dst_devid]; + STARPU_ASSERT(stream); + return stream; +} + +cudaStream_t starpu_cuda_get_local_out_transfer_stream() +{ + int worker = starpu_worker_get_id_check(); + int devid = starpu_worker_get_devid(worker); + cudaStream_t stream; + + stream = out_transfer_streams[devid]; + STARPU_ASSERT(stream); + return stream; +} + +cudaStream_t starpu_cuda_get_out_transfer_stream(unsigned src_node) +{ + int src_devid = starpu_memory_node_get_devid(src_node); + cudaStream_t stream; + + stream = out_transfer_streams[src_devid]; + STARPU_ASSERT(stream); + return stream; +} + +cudaStream_t starpu_cuda_get_peer_transfer_stream(unsigned src_node, unsigned dst_node) +{ + int src_devid = starpu_memory_node_get_devid(src_node); + int dst_devid = starpu_memory_node_get_devid(dst_node); + cudaStream_t stream; + + stream = in_peer_transfer_streams[src_devid][dst_devid]; + STARPU_ASSERT(stream); + return stream; +} + +cudaStream_t starpu_cuda_get_local_stream(void) +{ + int worker = starpu_worker_get_id_check(); + + used_stream[worker] = 1; + return streams[worker]; +} + +const struct cudaDeviceProp *starpu_cuda_get_device_properties(unsigned workerid) +{ + struct _starpu_machine_config *config = _starpu_get_machine_config(); + unsigned devid = config->workers[workerid].devid; + return &props[devid]; +} + + +/* Early library initialization, before anything else, just initialize data */ +void _starpu_cuda_init(void) +{ + memset(&cuda_bindid_init, 0, sizeof(cuda_bindid_init)); + memset(&cuda_memory_init, 0, sizeof(cuda_memory_init)); +} + +/* Return the number of devices usable in the system. + * The value returned cannot be greater than MAXCUDADEVS */ + +static unsigned _starpu_get_cuda_device_count(void) +{ + int cnt; + cudaError_t cures; + cures = cudaGetDeviceCount(&cnt); + if (STARPU_UNLIKELY(cures)) + return 0; + + if (cnt > STARPU_MAXCUDADEVS) + { + _STARPU_MSG("# Warning: %d CUDA devices available. Only %d enabled. Use configure option --enable-maxcudadev=xxx to update the maximum value of supported CUDA devices.\n", cnt, STARPU_MAXCUDADEVS); + cnt = STARPU_MAXCUDADEVS; + } + return (unsigned)cnt; +} + +/* This is run from initialize to determine the number of CUDA devices */ +void _starpu_init_cuda(void) +{ +} + +/* This is called to really discover the hardware */ +void _starpu_cuda_discover_devices(struct _starpu_machine_config *config) +{ + /* Discover the number of CUDA devices. Fill the result in CONFIG. */ + + int cnt; + cudaError_t cures; + + cures = cudaGetDeviceCount(&cnt); + if (STARPU_UNLIKELY(cures != cudaSuccess)) + cnt = 0; + config->topology.nhwdevices[STARPU_CUDA_WORKER] = cnt; +} + +static void _starpu_initialize_workers_cuda_gpuid(struct _starpu_machine_config *config) +{ + struct _starpu_machine_topology *topology = &config->topology; + struct starpu_conf *uconf = &config->conf; + + _starpu_initialize_workers_deviceid(uconf->use_explicit_workers_cuda_gpuid == 0 + ? NULL + : (int *)uconf->workers_cuda_gpuid, + &(config->current_devid[STARPU_CUDA_WORKER]), + (int *)topology->workers_devid[STARPU_CUDA_WORKER], + "STARPU_WORKERS_CUDAID", + topology->nhwdevices[STARPU_CUDA_WORKER], + STARPU_CUDA_WORKER); + _starpu_devices_drop_duplicate(topology->workers_devid[STARPU_CUDA_WORKER]); +} + +/* Determine which devices we will use */ +void _starpu_init_cuda_config(struct _starpu_machine_topology *topology, struct _starpu_machine_config *config) +{ + int ncuda = config->conf.ncuda; + + if (ncuda != 0) + { + /* The user did not disable CUDA. We need to + * initialize CUDA early to count the number of + * devices + */ + _starpu_init_cuda(); + int nb_devices = _starpu_get_cuda_device_count(); + + _starpu_topology_check_ndevices(&ncuda, nb_devices, 0, STARPU_MAXCUDADEVS, 0, "ncuda", "CUDA", "maxcudadev"); + } + + /* Now we know how many CUDA devices will be used */ + topology->ndevices[STARPU_CUDA_WORKER] = ncuda; + + _starpu_initialize_workers_cuda_gpuid(config); + + unsigned cudagpu; + for (cudagpu = 0; (int) cudagpu < ncuda; cudagpu++) + { + int devid = _starpu_get_next_devid(topology, config, STARPU_CUDA_WORKER); + + if (devid == -1) + { + // There is no more devices left + topology->ndevices[STARPU_CUDA_WORKER] = cudagpu; + break; + } + + _starpu_topology_configure_workers(topology, config, + STARPU_CUDA_WORKER, + cudagpu, devid, 0, 0, + 1, 1, NULL, NULL); + } + + /* Don't copy this, just here for other code to work fine */ + topology->cuda_th_per_stream = 0; + topology->cuda_th_per_dev = 1; +} + +/* Bind the driver on a CPU core */ +void _starpu_cuda_init_worker_binding(struct _starpu_machine_config *config, int no_mp_config STARPU_ATTRIBUTE_UNUSED, struct _starpu_worker *workerarg) +{ + /* Perhaps the worker has some "favourite" bindings */ + unsigned *preferred_binding = NULL; + unsigned npreferred = 0; + unsigned devid = workerarg->devid; + + if (cuda_bindid_init[devid]) + { + workerarg->bindid = cuda_bindid[devid]; + } + else + { + cuda_bindid_init[devid] = 1; + + workerarg->bindid = cuda_bindid[devid] = _starpu_get_next_bindid(config, STARPU_THREAD_ACTIVE, preferred_binding, npreferred); + } +} + +/* Set up memory and buses */ +void _starpu_cuda_init_worker_memory(struct _starpu_machine_config *config, int no_mp_config STARPU_ATTRIBUTE_UNUSED, struct _starpu_worker *workerarg) +{ + unsigned memory_node = -1; + unsigned devid = workerarg->devid; + unsigned numa; + + if (cuda_memory_init[devid]) + { + memory_node = cuda_memory_nodes[devid]; + } + else + { + cuda_memory_init[devid] = 1; + + memory_node = cuda_memory_nodes[devid] = _starpu_memory_node_register(STARPU_CUDA_RAM, devid); + + for (numa = 0; numa < starpu_memory_nodes_get_numa_count(); numa++) + { + _starpu_cuda_bus_ids[numa][devid+STARPU_MAXNUMANODES] = _starpu_register_bus(numa, memory_node); + _starpu_cuda_bus_ids[devid+STARPU_MAXNUMANODES][numa] = _starpu_register_bus(memory_node, numa); + } + + int worker2; + for (worker2 = 0; worker2 < workerarg->workerid; worker2++) + { + struct _starpu_worker *workerarg2 = &config->workers[worker2]; + int devid2 = workerarg2->devid; + if (workerarg2->arch == STARPU_CUDA_WORKER) + { + unsigned memory_node2 = starpu_worker_get_memory_node(worker2); + _starpu_cuda_bus_ids[devid2+STARPU_MAXNUMANODES][devid+STARPU_MAXNUMANODES] = _starpu_register_bus(memory_node2, memory_node); + _starpu_cuda_bus_ids[devid+STARPU_MAXNUMANODES][devid2+STARPU_MAXNUMANODES] = _starpu_register_bus(memory_node, memory_node2); +#if HAVE_DECL_HWLOC_CUDA_GET_DEVICE_OSDEV_BY_INDEX + { + hwloc_obj_t obj, obj2, ancestor; + obj = hwloc_cuda_get_device_osdev_by_index(config->topology.hwtopology, devid); + obj2 = hwloc_cuda_get_device_osdev_by_index(config->topology.hwtopology, devid2); + ancestor = hwloc_get_common_ancestor_obj(config->topology.hwtopology, obj, obj2); + if (ancestor) + { + struct _starpu_hwloc_userdata *data = ancestor->userdata; +#ifdef STARPU_VERBOSE + { + char name[64]; + hwloc_obj_type_snprintf(name, sizeof(name), ancestor, 0); + _STARPU_DEBUG("CUDA%u and CUDA%u are linked through %s, along %u GPUs\n", devid, devid2, name, data->ngpus); + } +#endif + starpu_bus_set_ngpus(_starpu_cuda_bus_ids[devid2+STARPU_MAXNUMANODES][devid+STARPU_MAXNUMANODES], data->ngpus); + starpu_bus_set_ngpus(_starpu_cuda_bus_ids[devid+STARPU_MAXNUMANODES][devid2+STARPU_MAXNUMANODES], data->ngpus); + } + } +#endif + } + } + } + _starpu_memory_node_add_nworkers(memory_node); + + //This worker can also manage transfers on NUMA nodes + for (numa = 0; numa < starpu_memory_nodes_get_numa_count(); numa++) + _starpu_worker_drives_memory_node(workerarg, numa); + + _starpu_worker_drives_memory_node(workerarg, memory_node); + + workerarg->memory_node = memory_node; +} + +/* Set the current CUDA device */ +void starpu_cuda_set_device(unsigned devid STARPU_ATTRIBUTE_UNUSED) +{ + cudaError_t cures; + + cures = cudaSetDevice(devid); + + if (STARPU_UNLIKELY(cures)) + STARPU_CUDA_REPORT_ERROR(cures); +} + +static void _starpu_cuda_limit_gpu_mem_if_needed(unsigned devid) +{ + starpu_ssize_t limit; + size_t STARPU_ATTRIBUTE_UNUSED totalGlobalMem = 0; + size_t STARPU_ATTRIBUTE_UNUSED to_waste = 0; + + /* Find the size of the memory on the device */ + totalGlobalMem = props[devid].totalGlobalMem; + + limit = totalGlobalMem / (1024*1024) * FREE_MARGIN; + + global_mem[devid] = limit * 1024*1024; +} +/* Really initialize one device */ +static void init_device_context(unsigned devid, unsigned memnode) +{ + STARPU_ASSERT(devid < STARPU_MAXCUDADEVS); + + cudaError_t cures; + + starpu_cuda_set_device(devid); + + /* force CUDA to initialize the context for real */ + cures = cudaFree(0); + if (STARPU_UNLIKELY(cures)) + { + if (cures == cudaErrorDevicesUnavailable) + { + _STARPU_MSG("All CUDA-capable devices are busy or unavailable\n"); + exit(77); + } + STARPU_CUDA_REPORT_ERROR(cures); + } + + cures = cudaGetDeviceProperties(&props[devid], devid); + if (STARPU_UNLIKELY(cures)) + STARPU_CUDA_REPORT_ERROR(cures); +#ifdef STARPU_HAVE_CUDA_MEMCPY_PEER + if (props[devid].computeMode == cudaComputeModeExclusive) + { + _STARPU_MSG("CUDA is in EXCLUSIVE-THREAD mode, but StarPU was built with multithread GPU control support, please either ask your administrator to use EXCLUSIVE-PROCESS mode (which should really be fine), or reconfigure with --disable-cuda-memcpy-peer but that will disable the memcpy-peer optimizations\n"); + STARPU_ABORT(); + } +#endif + + cures = starpu_cudaStreamCreate(&in_transfer_streams[devid]); + if (STARPU_UNLIKELY(cures)) + STARPU_CUDA_REPORT_ERROR(cures); + + cures = starpu_cudaStreamCreate(&out_transfer_streams[devid]); + if (STARPU_UNLIKELY(cures)) + STARPU_CUDA_REPORT_ERROR(cures); + + int nworkers = starpu_worker_get_count(); + int workerid; + for (workerid = 0; workerid < nworkers; workerid++) + { + struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); + if (worker->arch == STARPU_CUDA_WORKER && worker->subworkerid == 0) + { + cures = starpu_cudaStreamCreate(&in_peer_transfer_streams[worker->devid][devid]); + if (STARPU_UNLIKELY(cures)) + STARPU_CUDA_REPORT_ERROR(cures); + } + } + + _starpu_cuda_limit_gpu_mem_if_needed(devid); + _starpu_memory_manager_set_global_memory_size(memnode, _starpu_cuda_get_global_mem_size(devid)); +} + +/* De-initialize one device */ +static void deinit_device_context(unsigned devid STARPU_ATTRIBUTE_UNUSED) +{ + starpu_cuda_set_device(devid); + + cudaStreamDestroy(in_transfer_streams[devid]); + cudaStreamDestroy(out_transfer_streams[devid]); + + int nworkers = starpu_worker_get_count(); + int workerid; + for (workerid = 0; workerid < nworkers; workerid++) + { + struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); + if (worker->arch == STARPU_CUDA_WORKER && worker->subworkerid == 0) + { + cudaStreamDestroy(in_peer_transfer_streams[worker->devid][devid]); + } + } +} + +static void init_worker_context(unsigned workerid, unsigned devid) +{ + cudaError_t cures; + starpu_cuda_set_device(devid); + + cures = cudaEventCreateWithFlags(&task_events[workerid], cudaEventDisableTiming); + if (STARPU_UNLIKELY(cures)) + STARPU_CUDA_REPORT_ERROR(cures); + + cures = starpu_cudaStreamCreate(&streams[workerid]); + if (STARPU_UNLIKELY(cures)) + STARPU_CUDA_REPORT_ERROR(cures); +} + +static void deinit_worker_context(unsigned workerid, unsigned devid) +{ + starpu_cuda_set_device(devid); + cudaEventDestroy(task_events[workerid]); + cudaStreamDestroy(streams[workerid]); +} + + +/* This is run from the driver thread to initialize the driver CUDA context */ +static int _starpu_cuda_driver_init(struct _starpu_worker *worker) +{ + _starpu_driver_start(worker, STARPU_CUDA_WORKER, 0); + _starpu_set_local_worker_key(worker); + + unsigned devid = worker->devid; + unsigned memnode = worker->memory_node; + + init_device_context(devid, memnode); + + unsigned workerid = worker->workerid; + + float size = (float) global_mem[devid] / (1<<30); + /* get the device's name */ + char devname[64]; + strncpy(devname, props[devid].name, 63); + devname[63] = 0; + + snprintf(worker->name, sizeof(worker->name), "CUDA1 %u (%s %.1f GiB)", devid, devname, size); + snprintf(worker->short_name, sizeof(worker->short_name), "CUDA %u", devid); + _STARPU_DEBUG("cuda (%s) dev id %u thread is ready to run on CPU %d !\n", devname, devid, worker->bindid); + + init_worker_context(workerid, worker->devid); + + _STARPU_TRACE_WORKER_INIT_END(workerid); + + { + char thread_name[16]; + snprintf(thread_name, sizeof(thread_name), "CUDA1 %u", worker->devid); + starpu_pthread_setname(thread_name); + } + + /* tell the main thread that this one is ready */ + STARPU_PTHREAD_MUTEX_LOCK(&worker->mutex); + worker->status = STATUS_UNKNOWN; + worker->worker_is_initialized = 1; + STARPU_PTHREAD_COND_SIGNAL(&worker->ready_cond); + STARPU_PTHREAD_MUTEX_UNLOCK(&worker->mutex); + + return 0; +} + +static int _starpu_cuda_driver_deinit(struct _starpu_worker *worker) +{ + _STARPU_TRACE_WORKER_DEINIT_START; + + unsigned devid = worker->devid; + unsigned memnode = worker->memory_node; + + /* I'm last, deinitialize device */ + _starpu_datawizard_handle_all_pending_node_data_requests(memnode); + + /* In case there remains some memory that was automatically + * allocated by StarPU, we release it now. Note that data + * coherency is not maintained anymore at that point ! */ + _starpu_free_all_automatically_allocated_buffers(memnode); + + _starpu_malloc_shutdown(memnode); + + deinit_device_context(devid); + + unsigned workerid = worker->workerid; + + deinit_worker_context(workerid, worker->devid); + + worker->worker_is_initialized = 0; + _STARPU_TRACE_WORKER_DEINIT_END(STARPU_CUDA_WORKER); + + return 0; +} + +static uintptr_t _starpu_cuda_malloc_on_node(unsigned dst_node, size_t size, int flags) +{ + uintptr_t addr = 0; + (void) flags; + + unsigned devid = starpu_memory_node_get_devid(dst_node); + + starpu_cuda_set_device(devid); + + /* Check if there is free memory */ + size_t cuda_mem_free, cuda_mem_total; + cudaError_t status; + status = cudaMemGetInfo(&cuda_mem_free, &cuda_mem_total); + if (status == cudaSuccess && cuda_mem_free * FREE_MARGIN < size) + { + addr = 0; + } + else + { + status = cudaMalloc((void **)&addr, size); + if (!addr || (status != cudaSuccess)) + { + if (STARPU_UNLIKELY(status != cudaErrorMemoryAllocation)) + STARPU_CUDA_REPORT_ERROR(status); + addr = 0; + } + } + return addr; +} + +static void _starpu_cuda_free_on_node(unsigned dst_node, uintptr_t addr, size_t size, int flags) +{ + (void) dst_node; + (void) addr; + (void) size; + (void) flags; + + cudaError_t err; + unsigned devid = starpu_memory_node_get_devid(dst_node); + starpu_cuda_set_device(devid); + err = cudaFree((void*)addr); + if (STARPU_UNLIKELY(err != cudaSuccess)) + STARPU_CUDA_REPORT_ERROR(err); +} + +int starpu_cuda_copy_async_sync(void *src_ptr, unsigned src_node, + void *dst_ptr, unsigned dst_node, + size_t ssize, cudaStream_t stream, + enum cudaMemcpyKind kind) +{ +#ifdef STARPU_HAVE_CUDA_MEMCPY_PEER + int peer_copy = 0; + int src_dev = -1, dst_dev = -1; +#endif + cudaError_t cures = 0; + + if (kind == cudaMemcpyDeviceToDevice && src_node != dst_node) + { +#ifdef STARPU_HAVE_CUDA_MEMCPY_PEER + peer_copy = 1; + src_dev = starpu_memory_node_get_devid(src_node); + dst_dev = starpu_memory_node_get_devid(dst_node); +#else + STARPU_ABORT(); +#endif + } + + if (stream) + { + double start; + starpu_interface_start_driver_copy_async(src_node, dst_node, &start); +#ifdef STARPU_HAVE_CUDA_MEMCPY_PEER + if (peer_copy) + { + cures = cudaMemcpyPeerAsync((char *) dst_ptr, dst_dev, + (char *) src_ptr, src_dev, + ssize, stream); + } + else +#endif + { + cures = cudaMemcpyAsync((char *)dst_ptr, (char *)src_ptr, ssize, kind, stream); + } + (void) cudaGetLastError(); + starpu_interface_end_driver_copy_async(src_node, dst_node, start); + } + + /* Test if the asynchronous copy has failed or if the caller only asked for a synchronous copy */ + if (stream == NULL || cures) + { + /* do it in a synchronous fashion */ +#ifdef STARPU_HAVE_CUDA_MEMCPY_PEER + if (peer_copy) + { + cures = cudaMemcpyPeer((char *) dst_ptr, dst_dev, + (char *) src_ptr, src_dev, + ssize); + } + else +#endif + { + cures = cudaMemcpy((char *)dst_ptr, (char *)src_ptr, ssize, kind); + } + (void) cudaGetLastError(); + + if (!cures) + cures = cudaDeviceSynchronize(); + if (STARPU_UNLIKELY(cures)) + STARPU_CUDA_REPORT_ERROR(cures); + + return 0; + } + + return -EAGAIN; +} + +/* Driver porters: this is optional but really recommended */ +int +starpu_cuda_copy2d_async_sync(void *src_ptr, unsigned src_node, + void *dst_ptr, unsigned dst_node, + size_t blocksize, + size_t numblocks, size_t ld_src, size_t ld_dst, + cudaStream_t stream, enum cudaMemcpyKind kind) +{ +#ifdef STARPU_HAVE_CUDA_MEMCPY_PEER + int peer_copy = 0; + int src_dev = -1, dst_dev = -1; +#endif + cudaError_t cures = 0; + + if (kind == cudaMemcpyDeviceToDevice && src_node != dst_node) + { +#ifdef STARPU_HAVE_CUDA_MEMCPY_PEER +# ifdef BUGGED_MEMCPY3D + STARPU_ABORT_MSG("CUDA memcpy 3D peer buggy, but core triggered one?!"); +# endif + peer_copy = 1; + src_dev = starpu_memory_node_get_devid(src_node); + dst_dev = starpu_memory_node_get_devid(dst_node); +#else + STARPU_ABORT_MSG("CUDA memcpy 3D peer not available, but core triggered one ?!"); +#endif + } + +#ifdef STARPU_HAVE_CUDA_MEMCPY_PEER + if (peer_copy) + { + struct cudaMemcpy3DPeerParms p; + memset(&p, 0, sizeof(p)); + + p.srcDevice = src_dev; + p.dstDevice = dst_dev; + p.srcPtr = make_cudaPitchedPtr((char *)src_ptr, ld_src, blocksize, numblocks); + p.dstPtr = make_cudaPitchedPtr((char *)dst_ptr, ld_dst, blocksize, numblocks); + p.extent = make_cudaExtent(blocksize, numblocks, 1); + + + if (stream) + { + double start; + starpu_interface_start_driver_copy_async(src_node, dst_node, &start); + cures = cudaMemcpy3DPeerAsync(&p, stream); + (void) cudaGetLastError(); + } + + /* Test if the asynchronous copy has failed or if the caller only asked for a synchronous copy */ + if (stream == NULL || cures) + { + cures = cudaMemcpy3DPeer(&p); + (void) cudaGetLastError(); + + if (!cures) + cures = cudaDeviceSynchronize(); + if (STARPU_UNLIKELY(cures)) + STARPU_CUDA_REPORT_ERROR(cures); + + return 0; + } + } + else +#endif + { + if (stream) + { + double start; + starpu_interface_start_driver_copy_async(src_node, dst_node, &start); + cures = cudaMemcpy2DAsync((char *)dst_ptr, ld_dst, (char *)src_ptr, ld_src, + blocksize, numblocks, kind, stream); + starpu_interface_end_driver_copy_async(src_node, dst_node, start); + } + + /* Test if the asynchronous copy has failed or if the caller only asked for a synchronous copy */ + if (stream == NULL || cures) + { + cures = cudaMemcpy2D((char *)dst_ptr, ld_dst, (char *)src_ptr, ld_src, + blocksize, numblocks, kind); + if (!cures) + cures = cudaDeviceSynchronize(); + if (STARPU_UNLIKELY(cures)) + STARPU_CUDA_REPORT_ERROR(cures); + + return 0; + } + } + + return -EAGAIN; +} + +static inline cudaEvent_t *_starpu_cuda_event(union _starpu_async_channel_event *_event) +{ + cudaEvent_t *event; + STARPU_STATIC_ASSERT(sizeof(*event) <= sizeof(*_event)); + event = (void *) _event; + return event; +} + +static unsigned _starpu_cuda_test_request_completion(struct _starpu_async_channel *async_channel) +{ + cudaEvent_t event; + cudaError_t cures; + unsigned success; + + event = *_starpu_cuda_event(&async_channel->event); + cures = cudaEventQuery(event); + success = (cures == cudaSuccess); + + if (success) + cudaEventDestroy(event); + else if (cures != cudaErrorNotReady) + STARPU_CUDA_REPORT_ERROR(cures); + + return success; +} + +/* Only used at starpu_shutdown */ +static void _starpu_cuda_wait_request_completion(struct _starpu_async_channel *async_channel) +{ + cudaEvent_t event; + cudaError_t cures; + + event = *_starpu_cuda_event(&async_channel->event); + + cures = cudaEventSynchronize(event); + if (STARPU_UNLIKELY(cures)) + STARPU_CUDA_REPORT_ERROR(cures); + + cures = cudaEventDestroy(event); + if (STARPU_UNLIKELY(cures)) + STARPU_CUDA_REPORT_ERROR(cures); +} + +#ifdef STARPU_HAVE_CUDA_MEMCPY_PEER +static void +starpu_cuda_set_copy_device(unsigned src_node, unsigned dst_node) +{ + enum starpu_node_kind src_kind = starpu_node_get_kind(src_node); + enum starpu_node_kind dst_kind = starpu_node_get_kind(dst_node); + unsigned devid; + if ((src_kind == STARPU_CUDA_RAM) && (dst_kind == STARPU_CUDA_RAM)) + { + /* GPU-GPU transfer, issue it from the destination */ + devid = starpu_memory_node_get_devid(dst_node); + } + else + { + unsigned node = (dst_kind == STARPU_CUDA_RAM)?dst_node:src_node; + devid = starpu_memory_node_get_devid(node); + } + starpu_cuda_set_device(devid); +} +#endif + +static int _starpu_cuda_copy_interface_from_cuda_to_cuda(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req) +{ + int src_kind = starpu_node_get_kind(src_node); + int dst_kind = starpu_node_get_kind(dst_node); + STARPU_ASSERT(src_kind == STARPU_CUDA_RAM && dst_kind == STARPU_CUDA_RAM); + +#ifdef STARPU_HAVE_CUDA_MEMCPY_PEER + starpu_cuda_set_copy_device(src_node, dst_node); +#else + STARPU_ASSERT(src_node == dst_node); +#endif + + int ret = 1; + cudaError_t cures; + cudaStream_t stream; + const struct starpu_data_copy_methods *copy_methods = handle->ops->copy_methods; +/* CUDA - CUDA transfer */ + if (!req || starpu_asynchronous_copy_disabled() || starpu_asynchronous_cuda_copy_disabled() || !copy_methods->any_to_any) + { + STARPU_ASSERT(copy_methods->any_to_any); + /* this is not associated to a request so it's synchronous */ + copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, NULL); + } + else + { + req->async_channel.node_ops = &_starpu_driver_cuda_node_ops; + cures = cudaEventCreateWithFlags(_starpu_cuda_event(&req->async_channel.event), cudaEventDisableTiming); + if (STARPU_UNLIKELY(cures != cudaSuccess)) STARPU_CUDA_REPORT_ERROR(cures); + + stream = starpu_cuda_get_peer_transfer_stream(src_node, dst_node); + STARPU_ASSERT(copy_methods->any_to_any); + ret = copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, &req->async_channel); + + cures = cudaEventRecord(*_starpu_cuda_event(&req->async_channel.event), stream); + if (STARPU_UNLIKELY(cures != cudaSuccess)) STARPU_CUDA_REPORT_ERROR(cures); + } + return ret; +} + +static int _starpu_cuda_copy_interface_from_cuda_to_cpu(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req) +{ + int src_kind = starpu_node_get_kind(src_node); + int dst_kind = starpu_node_get_kind(dst_node); + STARPU_ASSERT(src_kind == STARPU_CUDA_RAM && dst_kind == STARPU_CPU_RAM); + +#ifdef STARPU_HAVE_CUDA_MEMCPY_PEER + starpu_cuda_set_copy_device(src_node, dst_node); +#endif + + int ret = 1; + cudaError_t cures; + cudaStream_t stream; + const struct starpu_data_copy_methods *copy_methods = handle->ops->copy_methods; + + /* only the proper CUBLAS thread can initiate this directly ! */ +#if !defined(STARPU_HAVE_CUDA_MEMCPY_PEER) + STARPU_ASSERT(starpu_worker_get_local_memory_node() == src_node); +#endif + if (!req || starpu_asynchronous_copy_disabled() || starpu_asynchronous_cuda_copy_disabled() || !copy_methods->any_to_any) + { + /* this is not associated to a request so it's synchronous */ + STARPU_ASSERT(copy_methods->any_to_any); + copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, NULL); + } + else + { + req->async_channel.node_ops = &_starpu_driver_cuda_node_ops; + cures = cudaEventCreateWithFlags(_starpu_cuda_event(&req->async_channel.event), cudaEventDisableTiming); + if (STARPU_UNLIKELY(cures != cudaSuccess)) STARPU_CUDA_REPORT_ERROR(cures); + + stream = starpu_cuda_get_out_transfer_stream(src_node); + STARPU_ASSERT(copy_methods->any_to_any); + ret = copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, &req->async_channel); + + cures = cudaEventRecord(*_starpu_cuda_event(&req->async_channel.event), stream); + if (STARPU_UNLIKELY(cures != cudaSuccess)) STARPU_CUDA_REPORT_ERROR(cures); + } + return ret; +} + +static int _starpu_cuda_copy_interface_from_cpu_to_cuda(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req) +{ + int src_kind = starpu_node_get_kind(src_node); + int dst_kind = starpu_node_get_kind(dst_node); + STARPU_ASSERT(src_kind == STARPU_CPU_RAM && dst_kind == STARPU_CUDA_RAM); + +#ifdef STARPU_HAVE_CUDA_MEMCPY_PEER + starpu_cuda_set_copy_device(src_node, dst_node); +#endif + + int ret = 1; + cudaError_t cures; + cudaStream_t stream; + const struct starpu_data_copy_methods *copy_methods = handle->ops->copy_methods; + + /* STARPU_CPU_RAM -> CUBLAS_RAM */ + /* only the proper CUBLAS thread can initiate this ! */ +#if !defined(STARPU_HAVE_CUDA_MEMCPY_PEER) + STARPU_ASSERT(starpu_worker_get_local_memory_node() == dst_node); +#endif + if (!req || starpu_asynchronous_copy_disabled() || starpu_asynchronous_cuda_copy_disabled() || + !copy_methods->any_to_any) + { + /* this is not associated to a request so it's synchronous */ + STARPU_ASSERT(copy_methods->any_to_any); + copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, NULL); + } + else + { + req->async_channel.node_ops = &_starpu_driver_cuda_node_ops; + cures = cudaEventCreateWithFlags(_starpu_cuda_event(&req->async_channel.event), cudaEventDisableTiming); + if (STARPU_UNLIKELY(cures != cudaSuccess)) + STARPU_CUDA_REPORT_ERROR(cures); + + stream = starpu_cuda_get_in_transfer_stream(dst_node); + STARPU_ASSERT(copy_methods->any_to_any); + ret = copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, &req->async_channel); + + cures = cudaEventRecord(*_starpu_cuda_event(&req->async_channel.event), stream); + if (STARPU_UNLIKELY(cures != cudaSuccess)) + STARPU_CUDA_REPORT_ERROR(cures); + } + return ret; +} + +static int _starpu_cuda_copy_data_from_cuda_to_cpu(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel) +{ + int src_kind = starpu_node_get_kind(src_node); + int dst_kind = starpu_node_get_kind(dst_node); + + STARPU_ASSERT(src_kind == STARPU_CUDA_RAM && dst_kind == STARPU_CPU_RAM); + + return starpu_cuda_copy_async_sync((void*) (src + src_offset), src_node, + (void*) (dst + dst_offset), dst_node, + size, + async_channel?starpu_cuda_get_out_transfer_stream(src_node):NULL, + cudaMemcpyDeviceToHost); +} + +static int _starpu_cuda_copy_data_from_cuda_to_cuda(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel) +{ + int src_kind = starpu_node_get_kind(src_node); + int dst_kind = starpu_node_get_kind(dst_node); + + STARPU_ASSERT(src_kind == STARPU_CUDA_RAM && dst_kind == STARPU_CUDA_RAM); +#ifndef STARPU_HAVE_CUDA_MEMCPY_PEER + STARPU_ASSERT(src_node == dst_node); +#endif + + return starpu_cuda_copy_async_sync((void*) (src + src_offset), src_node, + (void*) (dst + dst_offset), dst_node, + size, + async_channel?starpu_cuda_get_peer_transfer_stream(src_node, dst_node):NULL, + cudaMemcpyDeviceToDevice); +} + +static int _starpu_cuda_copy_data_from_cpu_to_cuda(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel) +{ + int src_kind = starpu_node_get_kind(src_node); + int dst_kind = starpu_node_get_kind(dst_node); + + STARPU_ASSERT(src_kind == STARPU_CPU_RAM && dst_kind == STARPU_CUDA_RAM); + + return starpu_cuda_copy_async_sync((void*) (src + src_offset), src_node, + (void*) (dst + dst_offset), dst_node, + size, + async_channel?starpu_cuda_get_in_transfer_stream(dst_node):NULL, + cudaMemcpyHostToDevice); +} + +static int _starpu_cuda_copy2d_data_from_cuda_to_cpu(uintptr_t src, size_t src_offset, unsigned src_node, + uintptr_t dst, size_t dst_offset, unsigned dst_node, + size_t blocksize, size_t numblocks, size_t ld_src, size_t ld_dst, + struct _starpu_async_channel *async_channel) +{ + int src_kind = starpu_node_get_kind(src_node); + int dst_kind = starpu_node_get_kind(dst_node); + + STARPU_ASSERT(src_kind == STARPU_CUDA_RAM && dst_kind == STARPU_CPU_RAM); + + return starpu_cuda_copy2d_async_sync((void*) (src + src_offset), src_node, + (void*) (dst + dst_offset), dst_node, + blocksize, numblocks, ld_src, ld_dst, + async_channel?starpu_cuda_get_out_transfer_stream(src_node):NULL, + cudaMemcpyDeviceToHost); +} + +static int _starpu_cuda_copy2d_data_from_cuda_to_cuda(uintptr_t src, size_t src_offset, unsigned src_node, + uintptr_t dst, size_t dst_offset, unsigned dst_node, + size_t blocksize, size_t numblocks, size_t ld_src, size_t ld_dst, + struct _starpu_async_channel *async_channel) +{ + int src_kind = starpu_node_get_kind(src_node); + int dst_kind = starpu_node_get_kind(dst_node); + + STARPU_ASSERT(src_kind == STARPU_CUDA_RAM && dst_kind == STARPU_CUDA_RAM); +#ifndef STARPU_HAVE_CUDA_MEMCPY_PEER + STARPU_ASSERT(src_node == dst_node); +#endif + + return starpu_cuda_copy2d_async_sync((void*) (src + src_offset), src_node, + (void*) (dst + dst_offset), dst_node, + blocksize, numblocks, ld_src, ld_dst, + async_channel?starpu_cuda_get_peer_transfer_stream(src_node, dst_node):NULL, + cudaMemcpyDeviceToDevice); +} + +static int _starpu_cuda_copy2d_data_from_cpu_to_cuda(uintptr_t src, size_t src_offset, unsigned src_node, + uintptr_t dst, size_t dst_offset, unsigned dst_node, + size_t blocksize, size_t numblocks, size_t ld_src, size_t ld_dst, + struct _starpu_async_channel *async_channel) +{ + int src_kind = starpu_node_get_kind(src_node); + int dst_kind = starpu_node_get_kind(dst_node); + + STARPU_ASSERT(src_kind == STARPU_CPU_RAM && dst_kind == STARPU_CUDA_RAM); + + return starpu_cuda_copy2d_async_sync((void*) (src + src_offset), src_node, + (void*) (dst + dst_offset), dst_node, + blocksize, numblocks, ld_src, ld_dst, + async_channel?starpu_cuda_get_in_transfer_stream(dst_node):NULL, + cudaMemcpyHostToDevice); +} + +static int _starpu_cuda_is_direct_access_supported(unsigned node, unsigned handling_node) +{ +#if defined(STARPU_HAVE_CUDA_MEMCPY_PEER) + (void) node; + enum starpu_node_kind kind = starpu_node_get_kind(handling_node); + return kind == STARPU_CUDA_RAM; +#else /* STARPU_HAVE_CUDA_MEMCPY_PEER */ + /* Direct GPU-GPU transfers are not allowed in general */ + (void) node; + (void) handling_node; + return 0; +#endif /* STARPU_HAVE_CUDA_MEMCPY_PEER */ +} + +static void start_job_on_cuda(struct _starpu_job *j, struct _starpu_worker *worker) +{ + STARPU_ASSERT(j); + struct starpu_task *task = j->task; + + int profiling = starpu_profiling_status_get(); + + STARPU_ASSERT(task); + struct starpu_codelet *cl = task->cl; + STARPU_ASSERT(cl); + + _starpu_set_current_task(task); + j->workerid = worker->workerid; + + _starpu_driver_start_job(worker, j, &worker->perf_arch, 0, profiling); + +#if defined(STARPU_HAVE_CUDA_MEMCPY_PEER) + /* We make sure we do manipulate the proper device */ + starpu_cuda_set_device(worker->devid); +#endif + + starpu_cuda_func_t func = _starpu_task_get_cuda_nth_implementation(cl, j->nimpl); + STARPU_ASSERT_MSG(func, "when STARPU_CUDA is defined in 'where', cuda_func or cuda_funcs has to be defined"); + + if (_starpu_get_disable_kernels() <= 0) + { + _STARPU_TRACE_START_EXECUTING(j); + func(_STARPU_TASK_GET_INTERFACES(task), task->cl_arg); + _STARPU_TRACE_END_EXECUTING(j); + } +} + +static void finish_job_on_cuda(struct _starpu_job *j, struct _starpu_worker *worker); + +/* Execute a job, up to completion for synchronous jobs */ +static void execute_job_on_cuda(struct starpu_task *task, struct _starpu_worker *worker) +{ + int workerid = worker->workerid; + + struct _starpu_job *j = _starpu_get_job_associated_to_task(task); + + start_job_on_cuda(j, worker); + + if (!used_stream[workerid]) + { + used_stream[workerid] = 1; + _STARPU_DISP("Warning: starpu_cuda_get_local_stream() was not used to submit kernel to CUDA on worker %d. CUDA will thus introduce a lot of useless synchronizations, which will prevent proper overlapping of data transfers and kernel execution. See the CUDA-specific part of the 'Check List When Performance Are Not There' of the StarPU handbook\n", workerid); + } + + if (task->cl->cuda_flags[j->nimpl] & STARPU_CUDA_ASYNC) + { + /* Record event to synchronize with task termination later */ + cudaError_t cures = cudaEventRecord(task_events[workerid], starpu_cuda_get_local_stream()); + if (STARPU_UNLIKELY(cures)) + STARPU_CUDA_REPORT_ERROR(cures); + } + else + /* Synchronous execution */ + { +#if !defined(STARPU_SIMGRID) + STARPU_ASSERT_MSG(cudaStreamQuery(starpu_cuda_get_local_stream()) == cudaSuccess, "Unless when using the STARPU_CUDA_ASYNC flag, CUDA codelets have to wait for termination of their kernels on the starpu_cuda_get_local_stream() stream"); +#endif + finish_job_on_cuda(j, worker); + } +} + +static void finish_job_on_cuda(struct _starpu_job *j, struct _starpu_worker *worker) +{ + int profiling = starpu_profiling_status_get(); + + worker->current_task = NULL; + worker->ntasks--; + + _starpu_driver_end_job(worker, j, &worker->perf_arch, 0, profiling); + + _starpu_driver_update_job_feedback(j, worker, &worker->perf_arch, profiling); + + _starpu_push_task_output(j); + + _starpu_set_current_task(NULL); + + _starpu_handle_job_termination(j); +} + +/* One iteration of the main driver loop */ +static int _starpu_cuda_driver_run_once(struct _starpu_worker *worker) +{ + struct starpu_task *task; + struct _starpu_job *j; + int res; + + int idle_tasks, idle_transfers; + + /* First poll for completed jobs */ + idle_tasks = 0; + idle_transfers = 0; + int workerid = worker->workerid; + unsigned memnode = worker->memory_node; + + do /* This do {} while (0) is only to match the cuda driver worker for look */ + { + if (!worker->ntasks) + idle_tasks++; + if (!worker->task_transferring) + idle_transfers++; + + if (!worker->ntasks && !worker->task_transferring) + { + /* Even nothing to test */ + continue; + } + + /* First test for transfers pending for next task */ + task = worker->task_transferring; + if (task && worker->nb_buffers_transferred == worker->nb_buffers_totransfer) + { + STARPU_RMB(); + _STARPU_TRACE_END_PROGRESS(memnode); + j = _starpu_get_job_associated_to_task(task); + + _starpu_fetch_task_input_tail(task, j, worker); + /* Reset it */ + worker->task_transferring = NULL; + + execute_job_on_cuda(task, worker); + _STARPU_TRACE_START_PROGRESS(memnode); + } + + /* Then test for termination of queued tasks */ + if (!worker->ntasks) + /* No queued task */ + continue; + + task = worker->current_task; + if (task == worker->task_transferring) + /* Next task is still pending transfer */ + continue; + + /* On-going asynchronous task, check for its termination first */ + cudaError_t cures = cudaEventQuery(task_events[workerid]); + + if (cures != cudaSuccess) + { + STARPU_ASSERT_MSG(cures == cudaErrorNotReady, "CUDA error on task %p, codelet %p (%s): %s (%d)", task, task->cl, _starpu_codelet_get_model_name(task->cl), cudaGetErrorString(cures), cures); + } + else + { + _STARPU_TRACE_END_PROGRESS(memnode); + /* Asynchronous task completed! */ + finish_job_on_cuda(_starpu_get_job_associated_to_task(task), worker); + _STARPU_TRACE_START_PROGRESS(memnode); + } + if (worker->ntasks < 1) + idle_tasks++; + } while(0); + +#if defined(STARPU_NON_BLOCKING_DRIVERS) + if (!idle_tasks) + { + /* No task ready yet, no better thing to do than waiting */ + __starpu_datawizard_progress(_STARPU_DATAWIZARD_DO_ALLOC, !idle_transfers); + return 0; + } +#endif + + /* Something done, make some progress */ + res = __starpu_datawizard_progress(_STARPU_DATAWIZARD_DO_ALLOC, 1); + + if (worker->ntasks >= 1) + return 0; + + /* And pull a task */ + task = _starpu_get_worker_task(worker, worker->workerid, worker->memory_node); + + if (!task) + return 0; + + worker->ntasks++; + + j = _starpu_get_job_associated_to_task(task); + + /* can CUDA do that task ? */ + if (!_STARPU_MAY_PERFORM(j, CUDA)) + { + /* this is neither a cuda or a cublas task */ + _starpu_worker_refuse_task(worker, task); + return 0; + } + + worker->current_task = task; + + /* Fetch data asynchronously */ + _STARPU_TRACE_END_PROGRESS(memnode); + _starpu_set_local_worker_key(worker); + res = _starpu_fetch_task_input(task, j, 1); + STARPU_ASSERT(res == 0); + _STARPU_TRACE_START_PROGRESS(memnode); + + return 0; +} + +void *_starpu_cuda_worker(void *_arg) +{ + struct _starpu_worker *worker = _arg; + + _starpu_cuda_driver_init(worker); + _STARPU_TRACE_START_PROGRESS(worker->memory_node); + while (_starpu_machine_is_running()) + { + _starpu_may_pause(); + _starpu_cuda_driver_run_once(worker); + } + _STARPU_TRACE_END_PROGRESS(worker->memory_node); + _starpu_cuda_driver_deinit(worker); + + return NULL; +} + + +#ifdef STARPU_HAVE_HWLOC +hwloc_obj_t _starpu_cuda_get_hwloc_obj(hwloc_topology_t topology, int devid) +{ +#if !defined(STARPU_SIMGRID) && HAVE_DECL_HWLOC_CUDA_GET_DEVICE_OSDEV_BY_INDEX + return hwloc_cuda_get_device_osdev_by_index(topology, devid); +#else + return NULL; +#endif +} +#endif + +void starpu_cublas_report_error(const char *func, const char *file, int line, int status) +{ + char *errormsg; + switch (status) + { + case CUBLAS_STATUS_SUCCESS: + errormsg = "success"; + break; + case CUBLAS_STATUS_NOT_INITIALIZED: + errormsg = "not initialized"; + break; + case CUBLAS_STATUS_ALLOC_FAILED: + errormsg = "alloc failed"; + break; + case CUBLAS_STATUS_INVALID_VALUE: + errormsg = "invalid value"; + break; + case CUBLAS_STATUS_ARCH_MISMATCH: + errormsg = "arch mismatch"; + break; + case CUBLAS_STATUS_EXECUTION_FAILED: + errormsg = "execution failed"; + break; + case CUBLAS_STATUS_INTERNAL_ERROR: + errormsg = "internal error"; + break; + default: + errormsg = "unknown error"; + break; + } + _STARPU_MSG("oops in %s (%s:%d)... %d: %s \n", func, file, line, status, errormsg); + STARPU_ABORT(); +} + +void starpu_cuda_report_error(const char *func, const char *file, int line, cudaError_t status) +{ + const char *errormsg = cudaGetErrorString(status); + _STARPU_ERROR("oops in %s (%s:%d)... %d: %s \n", func, file, line, status, errormsg); +} + +#ifdef STARPU_HAVE_LIBCUSOLVER +void starpu_cusolver_report_error(const char *func, const char *file, int line, cusolverStatus_t status) +{ +#define REPORT(error) case error: errormsg = #error; break; + char *errormsg; + switch (status) + { + REPORT(CUSOLVER_STATUS_SUCCESS); + REPORT(CUSOLVER_STATUS_NOT_INITIALIZED); + REPORT(CUSOLVER_STATUS_ALLOC_FAILED); + REPORT(CUSOLVER_STATUS_INVALID_VALUE); + REPORT(CUSOLVER_STATUS_ARCH_MISMATCH); + REPORT(CUSOLVER_STATUS_MAPPING_ERROR); + REPORT(CUSOLVER_STATUS_EXECUTION_FAILED); + REPORT(CUSOLVER_STATUS_INTERNAL_ERROR); + REPORT(CUSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED); + REPORT(CUSOLVER_STATUS_NOT_SUPPORTED); + REPORT(CUSOLVER_STATUS_ZERO_PIVOT); + REPORT(CUSOLVER_STATUS_INVALID_LICENSE); + +#if defined(CUSOLVER_VER_MAJOR) && (CUSOLVER_VER_MAJOR >= 11) + REPORT(CUSOLVER_STATUS_IRS_PARAMS_NOT_INITIALIZED); + REPORT(CUSOLVER_STATUS_IRS_PARAMS_INVALID); + REPORT(CUSOLVER_STATUS_IRS_PARAMS_INVALID_PREC); + REPORT(CUSOLVER_STATUS_IRS_PARAMS_INVALID_REFINE); + REPORT(CUSOLVER_STATUS_IRS_PARAMS_INVALID_MAXITER); + REPORT(CUSOLVER_STATUS_IRS_INTERNAL_ERROR); + REPORT(CUSOLVER_STATUS_IRS_NOT_SUPPORTED); + REPORT(CUSOLVER_STATUS_IRS_OUT_OF_RANGE); + REPORT(CUSOLVER_STATUS_IRS_NRHS_NOT_SUPPORTED_FOR_REFINE_GMRES); + REPORT(CUSOLVER_STATUS_IRS_INFOS_NOT_INITIALIZED); + REPORT(CUSOLVER_STATUS_IRS_INFOS_NOT_DESTROYED); + REPORT(CUSOLVER_STATUS_IRS_MATRIX_SINGULAR); + REPORT(CUSOLVER_STATUS_INVALID_WORKSPACE); +#endif + default: + errormsg = "unknown error"; + break; + } + _STARPU_MSG("oops in %s (%s:%d)... %d: %s \n", func, file, line, status, errormsg); + STARPU_ABORT(); +} +#endif + +static int _starpu_cuda_run_from_worker(struct _starpu_worker *worker) +{ + /* Let's go ! */ + _starpu_cuda_worker(worker); + + return 0; +} + +struct _starpu_driver_ops _starpu_driver_cuda_ops = +{ + .init = _starpu_cuda_driver_init, + .run = _starpu_cuda_run_from_worker, + .run_once = _starpu_cuda_driver_run_once, + .deinit = _starpu_cuda_driver_deinit, +}; + +struct _starpu_node_ops _starpu_driver_cuda_node_ops = +{ + .name = "cuda1 driver", + .malloc_on_node = _starpu_cuda_malloc_on_node, + .free_on_node = _starpu_cuda_free_on_node, + + .is_direct_access_supported = _starpu_cuda_is_direct_access_supported, + + .copy_interface_to[STARPU_CPU_RAM] = _starpu_cuda_copy_interface_from_cuda_to_cpu, + .copy_interface_to[STARPU_CUDA_RAM] = _starpu_cuda_copy_interface_from_cuda_to_cuda, + + .copy_interface_from[STARPU_CPU_RAM] = _starpu_cuda_copy_interface_from_cpu_to_cuda, + .copy_interface_from[STARPU_CUDA_RAM] = _starpu_cuda_copy_interface_from_cuda_to_cuda, + + .copy_data_to[STARPU_CPU_RAM] = _starpu_cuda_copy_data_from_cuda_to_cpu, + .copy_data_to[STARPU_CUDA_RAM] = _starpu_cuda_copy_data_from_cuda_to_cuda, + + .copy_data_from[STARPU_CPU_RAM] = _starpu_cuda_copy_data_from_cpu_to_cuda, + .copy_data_from[STARPU_CUDA_RAM] = _starpu_cuda_copy_data_from_cuda_to_cuda, + + .copy2d_data_to[STARPU_CPU_RAM] = _starpu_cuda_copy2d_data_from_cuda_to_cpu, + .copy2d_data_to[STARPU_CUDA_RAM] = _starpu_cuda_copy2d_data_from_cuda_to_cuda, + + .copy2d_data_from[STARPU_CPU_RAM] = _starpu_cuda_copy2d_data_from_cpu_to_cuda, + .copy2d_data_from[STARPU_CUDA_RAM] = _starpu_cuda_copy2d_data_from_cuda_to_cuda, + + .wait_request_completion = _starpu_cuda_wait_request_completion, + .test_request_completion = _starpu_cuda_test_request_completion, +}; diff --git a/src/drivers/cuda/driver_cuda_init.c b/src/drivers/cuda/driver_cuda_init.c new file mode 100644 index 0000000..aa4191c --- /dev/null +++ b/src/drivers/cuda/driver_cuda_init.c @@ -0,0 +1,51 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include + +static struct _starpu_driver_info driver_info = +{ + .name_upper = "CUDA", + .name_var = "CUDA", + .name_lower = "cuda", + .memory_kind = STARPU_CUDA_RAM, + .alpha = 13.33f, +#if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID) + .driver_ops = &_starpu_driver_cuda_ops, + .run_worker = _starpu_cuda_worker, +#if defined(STARPU_HAVE_HWLOC) && !defined(STARPU_USE_CUDA0) + .get_hwloc_obj = _starpu_cuda_get_hwloc_obj, +#endif + .init_worker_binding = _starpu_cuda_init_worker_binding, + .init_worker_memory = _starpu_cuda_init_worker_memory, +#endif +}; + +static struct _starpu_memory_driver_info memory_driver_info = +{ + .name_upper = "CUDA", + .worker_archtype = STARPU_CUDA_WORKER, +#if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID) + .ops = &_starpu_driver_cuda_node_ops, +#endif +}; + +void _starpu_cuda_preinit(void) +{ + _starpu_driver_info_register(STARPU_CUDA_WORKER, &driver_info); + _starpu_memory_driver_info_register(STARPU_CUDA_RAM, &memory_driver_info); +} diff --git a/src/drivers/cuda/starpu_cublas.c b/src/drivers/cuda/starpu_cublas.c new file mode 100644 index 0000000..27ebcfc --- /dev/null +++ b/src/drivers/cuda/starpu_cublas.c @@ -0,0 +1,121 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include + +#ifdef STARPU_USE_CUDA +#include +#include + +//#ifdef CUBLAS_V2_H_ +//#error oops +//#endif + +static int cublas_initialized[STARPU_NMAXWORKERS]; +static starpu_pthread_mutex_t mutex[STARPU_MAXCUDADEVS]; + +static unsigned get_idx(void) +{ + unsigned workerid = starpu_worker_get_id_check(); + unsigned th_per_dev = _starpu_get_machine_config()->topology.cuda_th_per_dev; + unsigned th_per_stream = _starpu_get_machine_config()->topology.cuda_th_per_stream; + + if (th_per_dev) + return starpu_worker_get_devid(workerid); + else if (th_per_stream) + return workerid; + else + /* same thread for all devices */ + return 0; +} + +static void init_cublas_func(void *args STARPU_ATTRIBUTE_UNUSED) +{ + unsigned idx = get_idx(); + unsigned devid = starpu_worker_get_devid(starpu_worker_get_id_check()); + STARPU_PTHREAD_MUTEX_LOCK(&mutex[devid]); + if (!(cublas_initialized[idx]++)) + { + cublasStatus_t cublasst = cublasInit(); + if (STARPU_UNLIKELY(cublasst)) + STARPU_CUBLAS_REPORT_ERROR(cublasst); + } + STARPU_PTHREAD_MUTEX_UNLOCK(&mutex[devid]); + + _starpu_init_cublas_v2_func(); +} + +static void set_cublas_stream_func(void *args STARPU_ATTRIBUTE_UNUSED) +{ + cublasSetKernelStream(starpu_cuda_get_local_stream()); +} + +static void shutdown_cublas_func(void *args STARPU_ATTRIBUTE_UNUSED) +{ + unsigned idx = get_idx(); + unsigned devid = starpu_worker_get_devid(starpu_worker_get_id_check()); + STARPU_PTHREAD_MUTEX_LOCK(&mutex[devid]); + if (!--cublas_initialized[idx]) + cublasShutdown(); + STARPU_PTHREAD_MUTEX_UNLOCK(&mutex[devid]); + + _starpu_shutdown_cublas_v2_func(); +} +#endif + +void starpu_cublas_init(void) +{ +#ifdef STARPU_USE_CUDA + if (!starpu_cuda_worker_get_count()) + return; + unsigned i; + for (i = 0; i < STARPU_MAXCUDADEVS; i++) + STARPU_PTHREAD_MUTEX_INIT0(&mutex[i], NULL); + + starpu_execute_on_each_worker(init_cublas_func, NULL, STARPU_CUDA); + starpu_execute_on_each_worker(set_cublas_stream_func, NULL, STARPU_CUDA); + + _starpu_cublas_v2_init(); +#endif +} + +void starpu_cublas_shutdown(void) +{ +#ifdef STARPU_USE_CUDA + if (!starpu_cuda_worker_get_count()) + return; + starpu_execute_on_each_worker(shutdown_cublas_func, NULL, STARPU_CUDA); + + _starpu_cublas_v2_shutdown(); +#endif +} + +void starpu_cublas_set_stream(void) +{ +#ifdef STARPU_USE_CUDA + if (!starpu_cuda_worker_get_count()) + return; + unsigned workerid = starpu_worker_get_id_check(); + int devnum = starpu_worker_get_devnum(workerid); + if (!_starpu_get_machine_config()->topology.cuda_th_per_dev || + (!_starpu_get_machine_config()->topology.cuda_th_per_stream && + _starpu_get_machine_config()->topology.nworker[STARPU_CUDA_WORKER][devnum] > 1)) + cublasSetKernelStream(starpu_cuda_get_local_stream()); +#endif +} diff --git a/src/drivers/cuda/starpu_cublasLt.c b/src/drivers/cuda/starpu_cublasLt.c new file mode 100644 index 0000000..8ef0b77 --- /dev/null +++ b/src/drivers/cuda/starpu_cublasLt.c @@ -0,0 +1,76 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +#include +#include +#include + +#ifdef STARPU_HAVE_LIBCUBLASLT +#include + +static cublasLtHandle_t cublasLt_handles[STARPU_NMAXWORKERS]; +static cublasLtHandle_t main_handle; + +static void init_cublasLt_func(void *args STARPU_ATTRIBUTE_UNUSED) +{ + cublasLtCreate(&cublasLt_handles[starpu_worker_get_id_check()]); + // No need for setting streams, because the cublasLt handles are not bundled with streams +} + +static void shutdown_cublasLt_func(void *args STARPU_ATTRIBUTE_UNUSED) +{ + cublasLtDestroy(cublasLt_handles[starpu_worker_get_id_check()]); +} +#endif + +void starpu_cublasLt_init(void) +{ +#ifdef STARPU_HAVE_LIBCUBLASLT + if (!starpu_cuda_worker_get_count()) + return; + starpu_execute_on_each_worker_ex(init_cublasLt_func, NULL, STARPU_CUDA, "init_cublasLt"); + + if (cublasLtCreate(&main_handle) != CUBLAS_STATUS_SUCCESS) + main_handle = NULL; +#endif +} + +void starpu_cublasLt_shutdown(void) +{ +#ifdef STARPU_HAVE_LIBCUBLASLT + if (!starpu_cuda_worker_get_count()) + return; + starpu_execute_on_each_worker_ex(shutdown_cublasLt_func, NULL, STARPU_CUDA, "shutdown_cublasLt"); + + if (main_handle) + cublasLtDestroy(main_handle); +#endif +} + +#ifdef STARPU_HAVE_LIBCUBLASLT +cublasLtHandle_t starpu_cublasLt_get_local_handle(void) +{ + if (!starpu_cuda_worker_get_count()) + return NULL; + int workerid = starpu_worker_get_id(); + if (workerid >= 0) + return cublasLt_handles[workerid]; + else + return main_handle; +} +#endif diff --git a/src/drivers/cuda/starpu_cublas_v2.c b/src/drivers/cuda/starpu_cublas_v2.c new file mode 100644 index 0000000..537cca3 --- /dev/null +++ b/src/drivers/cuda/starpu_cublas_v2.c @@ -0,0 +1,67 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include + +#ifdef STARPU_USE_CUDA +#include +#include +#include + +//#ifdef CUBLAS_H_ +//#error oops +//#endif + +static cublasHandle_t cublas_handles[STARPU_NMAXWORKERS]; +static cublasHandle_t main_handle; + +void _starpu_init_cublas_v2_func(void) +{ + cublasCreate(&cublas_handles[starpu_worker_get_id_check()]); + cublasSetStream(cublas_handles[starpu_worker_get_id_check()], starpu_cuda_get_local_stream()); +} +void _starpu_shutdown_cublas_v2_func(void) +{ + cublasDestroy(cublas_handles[starpu_worker_get_id_check()]); +} + +void _starpu_cublas_v2_init(void) +{ + if (cublasCreate(&main_handle) != CUBLAS_STATUS_SUCCESS) + main_handle = NULL; +} + +void _starpu_cublas_v2_shutdown(void) +{ + if (main_handle) + cublasDestroy(main_handle); +} + +cublasHandle_t starpu_cublas_get_local_handle(void) +{ + if (!starpu_cuda_worker_get_count()) + return NULL; + + int workerid = starpu_worker_get_id(); + if (workerid >= 0) + return cublas_handles[workerid]; + else + return main_handle; +} +#endif diff --git a/src/drivers/cuda/starpu_cusolver.c b/src/drivers/cuda/starpu_cusolver.c new file mode 100644 index 0000000..0d82b5a --- /dev/null +++ b/src/drivers/cuda/starpu_cusolver.c @@ -0,0 +1,120 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +#include +#include +#include +#include + +#ifdef STARPU_HAVE_LIBCUSOLVER +#include +#include +#include + +static cusolverDnHandle_t cusolverDn_handles[STARPU_NMAXWORKERS]; +static cusolverSpHandle_t cusolverSp_handles[STARPU_NMAXWORKERS]; +static cusolverRfHandle_t cusolverRf_handles[STARPU_NMAXWORKERS]; +static cusolverDnHandle_t mainDn_handle; +static cusolverSpHandle_t mainSp_handle; +static cusolverRfHandle_t mainRf_handle; + +static void init_cusolver_func(void *args STARPU_ATTRIBUTE_UNUSED) +{ + cusolverDnCreate(&cusolverDn_handles[starpu_worker_get_id_check()]); + cusolverDnSetStream(cusolverDn_handles[starpu_worker_get_id_check()], starpu_cuda_get_local_stream()); + cusolverSpCreate(&cusolverSp_handles[starpu_worker_get_id_check()]); + cusolverSpSetStream(cusolverSp_handles[starpu_worker_get_id_check()], starpu_cuda_get_local_stream()); + cusolverRfCreate(&cusolverRf_handles[starpu_worker_get_id_check()]); + // Not available? + //cusolverRfSetStream(cusolverRf_handles[starpu_worker_get_id_check()], starpu_cuda_get_local_stream()); +} + +static void shutdown_cusolver_func(void *args STARPU_ATTRIBUTE_UNUSED) +{ + cusolverDnDestroy(cusolverDn_handles[starpu_worker_get_id_check()]); + cusolverSpDestroy(cusolverSp_handles[starpu_worker_get_id_check()]); + cusolverRfDestroy(cusolverRf_handles[starpu_worker_get_id_check()]); +} +#endif + +void starpu_cusolver_init(void) +{ +#ifdef STARPU_HAVE_LIBCUSOLVER + if (!starpu_cuda_worker_get_count()) + return; + starpu_execute_on_each_worker(init_cusolver_func, NULL, STARPU_CUDA); + + if (cusolverDnCreate(&mainDn_handle) != CUSOLVER_STATUS_SUCCESS) + mainDn_handle = NULL; + if (cusolverSpCreate(&mainSp_handle) != CUSOLVER_STATUS_SUCCESS) + mainSp_handle = NULL; + if (cusolverRfCreate(&mainRf_handle) != CUSOLVER_STATUS_SUCCESS) + mainRf_handle = NULL; +#endif +} + +void starpu_cusolver_shutdown(void) +{ +#ifdef STARPU_HAVE_LIBCUSOLVER + if (!starpu_cuda_worker_get_count()) + return; + starpu_execute_on_each_worker(shutdown_cusolver_func, NULL, STARPU_CUDA); + + if (mainDn_handle) + cusolverDnDestroy(mainDn_handle); + if (mainSp_handle) + cusolverSpDestroy(mainSp_handle); + if (mainRf_handle) + cusolverRfDestroy(mainRf_handle); +#endif +} + +#ifdef STARPU_HAVE_LIBCUSOLVER +cusolverDnHandle_t starpu_cusolverDn_get_local_handle(void) +{ + if (!starpu_cuda_worker_get_count()) + return NULL; + int workerid = starpu_worker_get_id(); + if (workerid >= 0) + return cusolverDn_handles[workerid]; + else + return mainDn_handle; +} + +cusolverSpHandle_t starpu_cusolverSp_get_local_handle(void) +{ + if (!starpu_cuda_worker_get_count()) + return NULL; + int workerid = starpu_worker_get_id(); + if (workerid >= 0) + return cusolverSp_handles[workerid]; + else + return mainSp_handle; +} + +cusolverRfHandle_t starpu_cusolverRf_get_local_handle(void) +{ + if (!starpu_cuda_worker_get_count()) + return NULL; + int workerid = starpu_worker_get_id(); + if (workerid >= 0) + return cusolverRf_handles[workerid]; + else + return mainRf_handle; +} +#endif diff --git a/src/drivers/cuda/starpu_cusparse.c b/src/drivers/cuda/starpu_cusparse.c new file mode 100644 index 0000000..2f473e7 --- /dev/null +++ b/src/drivers/cuda/starpu_cusparse.c @@ -0,0 +1,80 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +#include +#include +#include + +#ifdef STARPU_HAVE_LIBCUSPARSE +#include + +static cusparseHandle_t cusparse_handles[STARPU_NMAXWORKERS]; +static cusparseHandle_t main_handle; + +static void init_cusparse_func(void *args STARPU_ATTRIBUTE_UNUSED) +{ + cusparseCreate(&cusparse_handles[starpu_worker_get_id_check()]); +#if HAVE_DECL_CUSPARSESETSTREAM + cusparseSetStream(cusparse_handles[starpu_worker_get_id_check()], starpu_cuda_get_local_stream()); +#else + cusparseSetKernelStream(cusparse_handles[starpu_worker_get_id_check()], starpu_cuda_get_local_stream()); +#endif +} + +static void shutdown_cusparse_func(void *args STARPU_ATTRIBUTE_UNUSED) +{ + cusparseDestroy(cusparse_handles[starpu_worker_get_id_check()]); +} +#endif + +void starpu_cusparse_init(void) +{ +#ifdef STARPU_HAVE_LIBCUSPARSE + if (!starpu_cuda_worker_get_count()) + return; + starpu_execute_on_each_worker(init_cusparse_func, NULL, STARPU_CUDA); + + if (cusparseCreate(&main_handle) != CUSPARSE_STATUS_SUCCESS) + main_handle = NULL; +#endif +} + +void starpu_cusparse_shutdown(void) +{ +#ifdef STARPU_HAVE_LIBCUSPARSE + if (!starpu_cuda_worker_get_count()) + return; + starpu_execute_on_each_worker(shutdown_cusparse_func, NULL, STARPU_CUDA); + + if (main_handle) + cusparseDestroy(main_handle); +#endif +} + +#ifdef STARPU_HAVE_LIBCUSPARSE +cusparseHandle_t starpu_cusparse_get_local_handle(void) +{ + if (!starpu_cuda_worker_get_count()) + return NULL; + int workerid = starpu_worker_get_id(); + if (workerid >= 0) + return cusparse_handles[workerid]; + else + return main_handle; +} +#endif diff --git a/src/drivers/disk/driver_disk.c b/src/drivers/disk/driver_disk.c new file mode 100644 index 0000000..ce70abc --- /dev/null +++ b/src/drivers/disk/driver_disk.c @@ -0,0 +1,296 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Corentin Salingue + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include +#include +#include + +static struct _starpu_node_ops _starpu_driver_disk_node_ops; +static struct _starpu_memory_driver_info memory_driver_info = +{ + .name_upper = "Disk", + .worker_archtype = (enum starpu_worker_archtype) -1, + .ops = &_starpu_driver_disk_node_ops, +}; + +void _starpu_disk_preinit(void) +{ + _starpu_memory_driver_info_register(STARPU_DISK_RAM, &memory_driver_info); +} + +static uintptr_t _starpu_disk_malloc_on_node(unsigned dst_node, size_t size, int flags) +{ + (void) flags; + uintptr_t addr = 0; + addr = (uintptr_t) _starpu_disk_alloc(dst_node, size); + return addr; +} + +static void _starpu_disk_free_on_node(unsigned dst_node, uintptr_t addr, size_t size, int flags) +{ + (void) flags; + _starpu_disk_free(dst_node, (void *) addr , size); +} + +static int _starpu_disk_copy_src_to_disk(void * src, unsigned src_node, void * dst, size_t dst_offset, unsigned dst_node, size_t size, void * async_channel) +{ + STARPU_ASSERT(starpu_node_get_kind(src_node) == STARPU_CPU_RAM); + + return _starpu_disk_write(src_node, dst_node, dst, src, dst_offset, size, async_channel); +} + +static int _starpu_disk_copy_disk_to_src(void * src, size_t src_offset, unsigned src_node, void * dst, unsigned dst_node, size_t size, void * async_channel) +{ + STARPU_ASSERT(starpu_node_get_kind(dst_node) == STARPU_CPU_RAM); + + return _starpu_disk_read(src_node, dst_node, src, dst, src_offset, size, async_channel); +} + +static int _starpu_disk_copy_disk_to_disk(void * src, size_t src_offset, unsigned src_node, void * dst, size_t dst_offset, unsigned dst_node, size_t size, void * async_channel) +{ + STARPU_ASSERT(starpu_node_get_kind(src_node) == STARPU_DISK_RAM && starpu_node_get_kind(dst_node) == STARPU_DISK_RAM); + + return _starpu_disk_copy(src_node, src, src_offset, dst_node, dst, dst_offset, size, async_channel); +} + +static unsigned _starpu_disk_test_request_completion(struct _starpu_async_channel *async_channel) +{ + struct _starpu_disk_event *disk_event = _starpu_disk_get_event(&async_channel->event); + unsigned success = starpu_disk_test_request(async_channel); + if (disk_event->ptr != NULL && success) + { + if (disk_event->handle != NULL) + { + /* read is finished, we can already unpack */ + disk_event->handle->ops->unpack_data(disk_event->handle, disk_event->node, disk_event->ptr, disk_event->size); + } + else + { + /* write is finished, ptr was allocated in pack_data */ + _starpu_free_flags_on_node(disk_event->node, disk_event->ptr, disk_event->size, 0); + } + } + return success; +} + +/* Only used at starpu_shutdown */ +static void _starpu_disk_wait_request_completion(struct _starpu_async_channel *async_channel) +{ + struct _starpu_disk_event *disk_event = _starpu_disk_get_event(&async_channel->event); + starpu_disk_wait_request(async_channel); + if (disk_event->ptr != NULL) + { + if (disk_event->handle != NULL) + { + /* read is finished, we can already unpack */ + disk_event->handle->ops->unpack_data(disk_event->handle, disk_event->node, disk_event->ptr, disk_event->size); + } + else + { + /* write is finished, ptr was allocated in pack_data */ + _starpu_free_flags_on_node(disk_event->node, disk_event->ptr, disk_event->size, 0); + } + } +} + +static int _starpu_disk_copy_interface_from_disk_to_cpu(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req) +{ + int src_kind = starpu_node_get_kind(src_node); + int dst_kind = starpu_node_get_kind(dst_node); + STARPU_ASSERT(src_kind == STARPU_DISK_RAM && dst_kind == STARPU_CPU_RAM); + + int ret = 0; + const struct starpu_data_copy_methods *copy_methods = handle->ops->copy_methods; + struct _starpu_disk_event *disk_event = _starpu_disk_get_event(&req->async_channel.event); + + if (req && !starpu_asynchronous_copy_disabled()) + { + req->async_channel.node_ops = &_starpu_driver_disk_node_ops; + disk_event->requests = NULL; + disk_event->ptr = NULL; + disk_event->handle = NULL; + } + if(copy_methods->any_to_any) + ret = copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, req && !starpu_asynchronous_copy_disabled() ? &req->async_channel : NULL); + else + { + void *obj = starpu_data_handle_to_pointer(handle, src_node); + void * ptr = NULL; + size_t size = 0; + ret = _starpu_disk_full_read(src_node, dst_node, obj, &ptr, &size, req && !starpu_asynchronous_copy_disabled() ? &req->async_channel : NULL); + if (ret == 0) + { + /* read is already finished, we can already unpack */ + handle->ops->unpack_data(handle, dst_node, ptr, size); + } + else if (ret == -EAGAIN) + { + STARPU_ASSERT(req); + disk_event->ptr = ptr; + disk_event->node = dst_node; + disk_event->size = size; + disk_event->handle = handle; + } + STARPU_ASSERT(ret == 0 || ret == -EAGAIN); + } + + return ret; +} + +static int _starpu_disk_copy_interface_from_disk_to_disk(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req) +{ + int src_kind = starpu_node_get_kind(src_node); + int dst_kind = starpu_node_get_kind(dst_node); + STARPU_ASSERT(src_kind == STARPU_DISK_RAM && dst_kind == STARPU_DISK_RAM); + + int ret = 0; + const struct starpu_data_copy_methods *copy_methods = handle->ops->copy_methods; + + if (req && !starpu_asynchronous_copy_disabled()) + { + struct _starpu_disk_event *disk_event = _starpu_disk_get_event(&req->async_channel.event); + req->async_channel.node_ops = &_starpu_driver_disk_node_ops; + disk_event->requests = NULL; + disk_event->ptr = NULL; + disk_event->handle = NULL; + } + ret = copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, req && !starpu_asynchronous_copy_disabled() ? &req->async_channel : NULL); + return ret; +} + +static int _starpu_disk_copy_interface_from_cpu_to_disk(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req) +{ + int src_kind = starpu_node_get_kind(src_node); + int dst_kind = starpu_node_get_kind(dst_node); + STARPU_ASSERT(src_kind == STARPU_CPU_RAM && dst_kind == STARPU_DISK_RAM); + + int ret = 0; + const struct starpu_data_copy_methods *copy_methods = handle->ops->copy_methods; + struct _starpu_disk_event *disk_event = _starpu_disk_get_event(&req->async_channel.event); + + if (req && !starpu_asynchronous_copy_disabled()) + { + req->async_channel.node_ops = &_starpu_driver_disk_node_ops; + disk_event->requests = NULL; + disk_event->ptr = NULL; + disk_event->handle = NULL; + } + + if(copy_methods->any_to_any) + ret = copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, req && !starpu_asynchronous_copy_disabled() ? &req->async_channel : NULL); + else + { + void *obj = starpu_data_handle_to_pointer(handle, dst_node); + void * ptr = NULL; + starpu_ssize_t size = 0; + handle->ops->pack_data(handle, src_node, &ptr, &size); + ret = _starpu_disk_full_write(src_node, dst_node, obj, ptr, size, req && !starpu_asynchronous_copy_disabled() ? &req->async_channel : NULL); + if (ret == 0) + { + /* write is already finished, ptr was allocated in pack_data */ + _starpu_free_flags_on_node(src_node, ptr, size, 0); + } + else if (ret == -EAGAIN) + { + STARPU_ASSERT(req); + disk_event->ptr = ptr; + disk_event->node = src_node; + disk_event->size = size; + } + STARPU_ASSERT(ret == 0 || ret == -EAGAIN); + } + + return ret; +} + +static int _starpu_disk_copy_data_from_disk_to_cpu(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel) +{ + int src_kind = starpu_node_get_kind(src_node); + int dst_kind = starpu_node_get_kind(dst_node); + STARPU_ASSERT(src_kind == STARPU_DISK_RAM && dst_kind == STARPU_CPU_RAM); + + return _starpu_disk_copy_disk_to_src((void*) src, src_offset, src_node, + (void*) (dst + dst_offset), dst_node, + size, async_channel); +} + +static int _starpu_disk_copy_data_from_disk_to_disk(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel) +{ + int src_kind = starpu_node_get_kind(src_node); + int dst_kind = starpu_node_get_kind(dst_node); + STARPU_ASSERT(src_kind == STARPU_DISK_RAM && dst_kind == STARPU_DISK_RAM); + + return _starpu_disk_copy_disk_to_disk((void*) src, src_offset, src_node, + (void*) dst, dst_offset, dst_node, + size, async_channel); +} + +static int _starpu_disk_copy_data_from_cpu_to_disk(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel) +{ + int src_kind = starpu_node_get_kind(src_node); + int dst_kind = starpu_node_get_kind(dst_node); + STARPU_ASSERT(src_kind == STARPU_CPU_RAM && dst_kind == STARPU_DISK_RAM); + + return _starpu_disk_copy_src_to_disk((void*) (src + src_offset), src_node, + (void*) dst, dst_offset, dst_node, + size, async_channel); +} + +static int _starpu_disk_is_direct_access_supported(unsigned node, unsigned handling_node) +{ + /* Each worker can manage disks but disk <-> disk is not always allowed */ + switch (starpu_node_get_kind(handling_node)) + { + case STARPU_CPU_RAM: + return 1; + case STARPU_DISK_RAM: + return _starpu_disk_can_copy(node, handling_node); + default: + return 0; + } +} + +static struct _starpu_node_ops _starpu_driver_disk_node_ops = +{ + .name = "disk driver", + + .malloc_on_node = _starpu_disk_malloc_on_node, + .free_on_node = _starpu_disk_free_on_node, + + .is_direct_access_supported = _starpu_disk_is_direct_access_supported, + + .copy_interface_to[STARPU_CPU_RAM] = _starpu_disk_copy_interface_from_disk_to_cpu, + .copy_interface_to[STARPU_DISK_RAM] = _starpu_disk_copy_interface_from_disk_to_disk, + + .copy_interface_from[STARPU_CPU_RAM] = _starpu_disk_copy_interface_from_cpu_to_disk, + .copy_interface_from[STARPU_DISK_RAM] = _starpu_disk_copy_interface_from_disk_to_disk, + + .copy_data_to[STARPU_CPU_RAM] = _starpu_disk_copy_data_from_disk_to_cpu, + .copy_data_to[STARPU_DISK_RAM] = _starpu_disk_copy_data_from_disk_to_disk, + + .copy_data_from[STARPU_CPU_RAM] = _starpu_disk_copy_data_from_cpu_to_disk, + .copy_data_from[STARPU_DISK_RAM] = _starpu_disk_copy_data_from_disk_to_disk, + + /* TODO: copy2D/3D? */ + + .wait_request_completion = _starpu_disk_wait_request_completion, + .test_request_completion = _starpu_disk_test_request_completion, +}; diff --git a/src/drivers/disk/driver_disk.h b/src/drivers/disk/driver_disk.h new file mode 100644 index 0000000..fe0df11 --- /dev/null +++ b/src/drivers/disk/driver_disk.h @@ -0,0 +1,29 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Corentin Salingue + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __DRIVER_DISK_H__ +#define __DRIVER_DISK_H__ + +/** @file */ + +#pragma GCC visibility push(hidden) + +void _starpu_disk_preinit(void); + +#pragma GCC visibility pop + +#endif diff --git a/src/drivers/driver_common/driver_common.c b/src/drivers/driver_common/driver_common.c new file mode 100644 index 0000000..e74d567 --- /dev/null +++ b/src/drivers/driver_common/driver_common.c @@ -0,0 +1,948 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Thibaut Lambert + * Copyright (C) 2011-2011 Télécom Sud Paris + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef HAVE_MMAP +#include +#endif +#include +#include +#include + + +void _starpu_driver_start_job(struct _starpu_worker *worker, struct _starpu_job *j, struct starpu_perfmodel_arch* perf_arch, int rank, int profiling) +{ + struct starpu_task *task = j->task; + struct starpu_codelet *cl = task->cl; + int workerid = worker->workerid; + unsigned calibrate_model = 0; + + if (worker->bindid_requested != -1) + { + typedef unsigned __attribute__((__may_alias__)) alias_unsigned; + typedef int __attribute__((__may_alias__)) alias_int; + + unsigned raw_bindid_requested = STARPU_VAL_EXCHANGE((alias_unsigned *)&worker->bindid_requested, -1); + int bindid_requested = *(alias_int *)&raw_bindid_requested; + + if (bindid_requested != -1) + { + worker->bindid = bindid_requested; + _starpu_bind_thread_on_cpu(worker->bindid, worker->workerid, NULL); + } + } + if (cl->model && cl->model->benchmarking) + calibrate_model = 1; + + /* If the job is executed on a combined worker there is no need for the + * scheduler to process on non-master : it doesn't contain any valuable data + * as it's not linked to an actual worker */ + if (j->task_size == 1 && rank == 0) + _starpu_sched_pre_exec_hook(task); + + struct timespec start; + + struct starpu_profiling_task_info *profiling_info = task->profiling_info; + if ((profiling && profiling_info) || (rank == 0 && (calibrate_model || !_starpu_perf_counter_paused()))) + _starpu_clock_gettime(&start); + _starpu_add_worker_status(worker, STATUS_INDEX_EXECUTING, &start); + + if (rank == 0) + { + STARPU_ASSERT(task->status == STARPU_TASK_READY); + if (!_starpu_perf_counter_paused() && !j->internal) + { + (void)STARPU_PERF_COUNTER_ADD64(& _starpu_task__g_current_ready__value, -1); + if (task->cl && task->cl->perf_counter_values) + { + struct starpu_perf_counter_sample_cl_values * const pcv = task->cl->perf_counter_values; + (void)STARPU_PERF_COUNTER_ADD64(&pcv->task.current_ready, -1); + } + } + task->status = STARPU_TASK_RUNNING; + + STARPU_AYU_RUNTASK(j->job_id); + if (_starpu_codelet_profiling) + cl->per_worker_stats[workerid]++; + + if ((profiling && profiling_info) || calibrate_model || !_starpu_perf_counter_paused()) + { + worker->cl_start = start; + if (profiling && profiling_info) + { + profiling_info->start_time = start; + profiling_info->workerid = workerid; + } + + if (task->predicted && !isnan(task->predicted)) + { + struct timespec exp_end = start; + exp_end.tv_sec += task->predicted / 1000000; + exp_end.tv_nsec += fmod(task->predicted, 1000000.) * 1000; + if (exp_end.tv_nsec >= 1000000000) + { + exp_end.tv_sec++; + exp_end.tv_nsec -= 1000000000; + } + worker->cl_expend = exp_end; + } + else + { + worker->cl_expend.tv_sec = 0; + worker->cl_expend.tv_nsec = 0; + } + } + else + { + worker->cl_start.tv_sec = 0; + worker->cl_start.tv_nsec = 0; + } + + _starpu_job_notify_start(j, perf_arch); + } + + // Find out if the worker is the master of a parallel context + struct _starpu_sched_ctx *sched_ctx = _starpu_sched_ctx_get_sched_ctx_for_worker_and_job(worker, j); + if(!sched_ctx) + sched_ctx = _starpu_get_sched_ctx_struct(j->task->sched_ctx); + _starpu_sched_ctx_lock_read(sched_ctx->id); + if(!sched_ctx->sched_policy) + { + if(!sched_ctx->awake_workers && sched_ctx->main_master == worker->workerid) + { + struct starpu_worker_collection *workers = sched_ctx->workers; + struct starpu_sched_ctx_iterator it; + int new_rank = 0; + + if (workers->init_iterator) + workers->init_iterator(workers, &it); + while (workers->has_next(workers, &it)) + { + int _workerid = workers->get_next(workers, &it); + if (_workerid != workerid) + { + new_rank++; + struct _starpu_worker *_worker = _starpu_get_worker_struct(_workerid); + _starpu_driver_start_job(_worker, j, &_worker->perf_arch, new_rank, profiling); + } + } + } + _STARPU_TRACE_TASK_NAME_LINE_COLOR(j); + _STARPU_TRACE_START_CODELET_BODY(j, j->nimpl, &sched_ctx->perf_arch, workerid, rank); + } + else + { + _STARPU_TRACE_TASK_NAME_LINE_COLOR(j); + _STARPU_TRACE_START_CODELET_BODY(j, j->nimpl, perf_arch, workerid, rank); + } + _starpu_sched_ctx_unlock_read(sched_ctx->id); + _STARPU_TASK_BREAK_ON(task, exec); +} + +void _starpu_driver_end_job(struct _starpu_worker *worker, struct _starpu_job *j, struct starpu_perfmodel_arch* perf_arch STARPU_ATTRIBUTE_UNUSED, int rank, int profiling) +{ + struct starpu_task *task = j->task; + struct starpu_codelet *cl = task->cl; + int workerid = worker->workerid; + unsigned calibrate_model = 0; + + // Find out if the worker is the master of a parallel context + struct _starpu_sched_ctx *sched_ctx = _starpu_sched_ctx_get_sched_ctx_for_worker_and_job(worker, j); + if(!sched_ctx) + sched_ctx = _starpu_get_sched_ctx_struct(j->task->sched_ctx); + + if (!sched_ctx->sched_policy) + { + _starpu_perfmodel_create_comb_if_needed(&(sched_ctx->perf_arch)); + _STARPU_TRACE_END_CODELET_BODY(j, j->nimpl, &(sched_ctx->perf_arch), workerid, rank); + } + else + { + _starpu_perfmodel_create_comb_if_needed(perf_arch); + _STARPU_TRACE_END_CODELET_BODY(j, j->nimpl, perf_arch, workerid, rank); + } + + if (cl && cl->model && cl->model->benchmarking) + calibrate_model = 1; + + struct timespec end; + struct starpu_profiling_task_info *profiling_info = task->profiling_info; + if ((profiling && profiling_info) || (rank == 0 && (calibrate_model || !_starpu_perf_counter_paused() || + worker->cl_start.tv_sec || worker->cl_start.tv_nsec))) + _starpu_clock_gettime(&end); + _starpu_clear_worker_status(worker, STATUS_INDEX_EXECUTING, &end); + + if (rank == 0) + { + if (worker->cl_start.tv_sec || worker->cl_start.tv_nsec) + worker->cl_end = end; + STARPU_AYU_POSTRUNTASK(j->job_id); + } + + if(!sched_ctx->sched_policy && !sched_ctx->awake_workers && + sched_ctx->main_master == worker->workerid) + { + struct starpu_worker_collection *workers = sched_ctx->workers; + struct starpu_sched_ctx_iterator it; + int new_rank = 0; + + if (workers->init_iterator) + workers->init_iterator(workers, &it); + while (workers->has_next(workers, &it)) + { + int _workerid = workers->get_next(workers, &it); + if (_workerid != workerid) + { + new_rank++; + struct _starpu_worker *_worker = _starpu_get_worker_struct(_workerid); + _starpu_driver_end_job(_worker, j, &_worker->perf_arch, new_rank, profiling); + } + } + } +} + +void _starpu_driver_update_job_feedback(struct _starpu_job *j, struct _starpu_worker *worker, + struct starpu_perfmodel_arch* perf_arch, + int profiling) +{ + struct starpu_profiling_task_info *profiling_info = j->task->profiling_info; + struct timespec measured_ts; + int workerid = worker->workerid; + struct starpu_codelet *cl = j->task->cl; + int calibrate_model = 0; + int updated = 0; + + _starpu_perfmodel_create_comb_if_needed(perf_arch); + +#ifndef STARPU_SIMGRID + if (cl->model && cl->model->benchmarking) + calibrate_model = 1; +#endif + + if (worker->cl_start.tv_sec || worker->cl_start.tv_nsec) + { + starpu_timespec_sub(&worker->cl_end, &worker->cl_start, &measured_ts); + double measured = starpu_timing_timespec_to_us(&measured_ts); + + STARPU_ASSERT_MSG(measured >= 0, "measured=%lf\n", measured); + + if (!_starpu_perf_counter_paused()) + { + worker->__w_total_executed__value++; + worker->__w_cumul_execution_time__value += measured; + _starpu_perf_counter_update_per_worker_sample(worker->workerid); + if (cl->perf_counter_values) + { + struct starpu_perf_counter_sample_cl_values * const pcv = cl->perf_counter_values; + (void)STARPU_PERF_COUNTER_ADD64(&pcv->task.total_executed, 1); + _starpu_perf_counter_update_acc_double(&pcv->task.cumul_execution_time, measured); + _starpu_perf_counter_update_per_codelet_sample(cl); + } + } + + if (profiling && profiling_info) + { + profiling_info->end_time = worker->cl_end; + + _starpu_worker_update_profiling_info_executing(workerid, 1, + profiling_info->used_cycles, + profiling_info->stall_cycles, + profiling_info->energy_consumed, + j->task->flops); + updated = 1; + } + + if (calibrate_model) + { +#ifdef STARPU_OPENMP + double time_consumed = measured; + unsigned do_update_time_model; + if (j->continuation) + { + /* The job is only paused, thus we accumulate + * its timing, but we don't update its + * perfmodel now. */ + starpu_timespec_accumulate(&j->cumulated_ts, &measured_ts); + do_update_time_model = 0; + } + else + { + if (j->discontinuous) + { + /* The job was paused at least once but is now + * really completing. We need to take into + * account its past execution time in its + * perfmodel. */ + starpu_timespec_accumulate(&measured_ts, &j->cumulated_ts); + time_consumed = starpu_timing_timespec_to_us(&measured_ts); + } + do_update_time_model = 1; + } +#else + unsigned do_update_time_model = 1; + const double time_consumed = measured; +#endif + if (j->task->failed) + /* Do not record perfmodel for failed tasks, they may terminate earlier */ + do_update_time_model = 0; + if (do_update_time_model) + { + _starpu_update_perfmodel_history(j, j->task->cl->model, perf_arch, worker->devid, time_consumed, j->nimpl, 1); + } + } + } + + if (!updated) + _starpu_worker_update_profiling_info_executing(workerid, 1, 0, 0, 0, 0); + + if (profiling_info && profiling_info->energy_consumed && cl->energy_model && cl->energy_model->benchmarking) + { +#ifdef STARPU_OPENMP + double energy_consumed = profiling_info->energy_consumed; + unsigned do_update_energy_model; + if (j->continuation) + { + j->cumulated_energy_consumed += energy_consumed; + do_update_energy_model = 0; + } + else + { + if (j->discontinuous) + { + energy_consumed += j->cumulated_energy_consumed; + } + do_update_energy_model = 1; + } +#else + const double energy_consumed = profiling_info->energy_consumed; + unsigned do_update_energy_model = 1; +#endif + + if (j->task->failed) + /* Do not record perfmodel for failed tasks, they may terminate earlier */ + do_update_energy_model = 0; + if (do_update_energy_model) + { + _starpu_update_perfmodel_history(j, j->task->cl->energy_model, perf_arch, worker->devid, energy_consumed, j->nimpl, 1); + } + } +} + +static void _starpu_worker_set_status_scheduling(int workerid) +{ + if (!(_starpu_worker_get_status(workerid) & STATUS_SCHEDULING)) + { + if (!(_starpu_worker_get_status(workerid) & STATUS_SLEEPING)) + _STARPU_TRACE_WORKER_SCHEDULING_START; + _starpu_worker_add_status(workerid, STATUS_INDEX_SCHEDULING); + } +} + +static void _starpu_worker_set_status_scheduling_done(int workerid) +{ + STARPU_ASSERT(_starpu_worker_get_status(workerid) & STATUS_SCHEDULING); + if (!(_starpu_worker_get_status(workerid) & STATUS_SLEEPING)) + _STARPU_TRACE_WORKER_SCHEDULING_END; + _starpu_worker_clear_status(workerid, STATUS_INDEX_SCHEDULING); +} + +static void _starpu_worker_set_status_sleeping(int workerid) +{ + if (!(_starpu_worker_get_status(workerid) & STATUS_SLEEPING)) + { + _STARPU_TRACE_WORKER_SLEEP_START; + _starpu_worker_add_status(workerid, STATUS_INDEX_SLEEPING); + } +} + +static void _starpu_worker_set_status_wakeup(int workerid) +{ + if ((_starpu_worker_get_status(workerid) & STATUS_SLEEPING)) + { + _STARPU_TRACE_WORKER_SLEEP_END; + _starpu_worker_clear_status(workerid, STATUS_INDEX_SLEEPING); + } +} + + +#if !defined(STARPU_SIMGRID) +static void _starpu_exponential_backoff(struct _starpu_worker *worker) +{ + int delay = worker->spinning_backoff; + + if (worker->spinning_backoff < worker->config->conf.driver_spinning_backoff_max) + worker->spinning_backoff<<=1; + + while(delay--) + STARPU_UYIELD(); +} +#endif + + + +/* Workers may block when there is no work to do at all. */ +struct starpu_task *_starpu_get_worker_task(struct _starpu_worker *worker, int workerid, unsigned memnode STARPU_ATTRIBUTE_UNUSED) +{ + struct starpu_task *task; +#if !defined(STARPU_SIMGRID) + unsigned keep_awake = 0; +#endif + + STARPU_PTHREAD_MUTEX_LOCK_SCHED(&worker->sched_mutex); + _starpu_worker_enter_sched_op(worker); + _starpu_worker_set_status_scheduling(workerid); +#if !defined(STARPU_SIMGRID) + if ((worker->pipeline_length == 0 && worker->current_task) + || (worker->pipeline_length != 0 && worker->ntasks)) + /* This worker is executing something */ + keep_awake = 1; +#endif + + /*if the worker is already executing a task then */ + if (worker->pipeline_length && (worker->ntasks == worker->pipeline_length || worker->pipeline_stuck)) + task = NULL; + /* don't push a task if we are already transferring one */ + else if (worker->task_transferring != NULL) + task = NULL; + /*else try to pop a task*/ + else + { + STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&worker->sched_mutex); + task = _starpu_pop_task(worker); + STARPU_PTHREAD_MUTEX_LOCK_SCHED(&worker->sched_mutex); +#if !defined(STARPU_SIMGRID) + if (worker->state_keep_awake) + { + keep_awake = 1; + worker->state_keep_awake = 0; + } +#endif + } + +#if !defined(STARPU_SIMGRID) + if (task == NULL && !keep_awake) + { + /* Didn't get a task to run and none are running, go to sleep */ + + /* Note: we need to keep the sched condition mutex all along the path + * from popping a task from the scheduler to blocking. Otherwise the + * driver may go block just after the scheduler got a new task to be + * executed, and thus hanging. */ + _starpu_worker_set_status_sleeping(workerid); + _starpu_worker_leave_sched_op(worker); + STARPU_PTHREAD_COND_BROADCAST(&worker->sched_cond); + +#ifndef STARPU_NON_BLOCKING_DRIVERS + if (_starpu_worker_can_block(memnode, worker) + && !worker->state_block_in_parallel_req + && !worker->state_unblock_in_parallel_req + && !_starpu_sched_ctx_last_worker_awake(worker)) + { + +#ifdef STARPU_WORKER_CALLBACKS + if (_starpu_config.conf.callback_worker_going_to_sleep != NULL) + { + _starpu_config.conf.callback_worker_going_to_sleep(workerid); + } +#endif + do + { + STARPU_PTHREAD_COND_WAIT(&worker->sched_cond, &worker->sched_mutex); + if (!worker->state_keep_awake + && _starpu_worker_can_block(memnode, worker) + && !worker->state_block_in_parallel_req + && !worker->state_unblock_in_parallel_req) + { + _starpu_worker_set_status_sleeping(workerid); + if (_starpu_sched_ctx_last_worker_awake(worker)) + { + break; + } + } + else + { + break; + } + } + while (1); + worker->state_keep_awake = 0; + _starpu_worker_set_status_scheduling_done(workerid); + STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&worker->sched_mutex); +#ifdef STARPU_WORKER_CALLBACKS + if (_starpu_config.conf.callback_worker_waking_up != NULL) + { + /* the wake up callback should be called once the sched_mutex has been unlocked, + * so that an external resource manager can potentially defer the wake-up momentarily if + * the corresponding computing unit is still in use by another runtime system */ + _starpu_config.conf.callback_worker_waking_up(workerid); + } +#endif + } + else +#endif + { + _starpu_worker_set_status_scheduling_done(workerid); + STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&worker->sched_mutex); + if (_starpu_machine_is_running()) + _starpu_exponential_backoff(worker); + } + + return NULL; + } +#endif + + if (task) + { + _starpu_worker_set_status_scheduling_done(workerid); + _starpu_worker_set_status_wakeup(workerid); + } + else + { + _starpu_worker_set_status_sleeping(workerid); + } + worker->spinning_backoff = worker->config->conf.driver_spinning_backoff_min; + + _starpu_worker_leave_sched_op(worker); + STARPU_PTHREAD_COND_BROADCAST(&worker->sched_cond); + STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&worker->sched_mutex); + + + STARPU_AYU_PRERUNTASK(_starpu_get_job_associated_to_task(task)->job_id, workerid); + + return task; +} + + +int _starpu_get_multi_worker_task(struct _starpu_worker *workers, struct starpu_task ** tasks, int nworkers, unsigned memnode STARPU_ATTRIBUTE_UNUSED) +{ + int i, count = 0; + struct _starpu_job * j; + int is_parallel_task; + struct _starpu_combined_worker *combined_worker; +#if !defined(STARPU_NON_BLOCKING_DRIVERS) && !defined(STARPU_SIMGRID) + int executing = 0; +#endif + /*for each worker*/ +#ifndef STARPU_NON_BLOCKING_DRIVERS + /* This assumes only 1 worker */ + STARPU_ASSERT_MSG(nworkers == 1, "Multiple workers is not yet possible in blocking drivers mode\n"); + _starpu_set_local_worker_key(&workers[0]); + STARPU_PTHREAD_MUTEX_LOCK_SCHED(&workers[0].sched_mutex); + _starpu_worker_enter_sched_op(&workers[0]); +#endif + for (i = 0; i < nworkers; i++) + { + unsigned keep_awake = 0; +#if !defined(STARPU_NON_BLOCKING_DRIVERS) && !defined(STARPU_SIMGRID) + if ((workers[i].pipeline_length == 0 && workers[i].current_task) + || (workers[i].pipeline_length != 0 && workers[i].ntasks)) + /* At least this worker is executing something */ + executing = 1; +#endif + /*if the worker is already executing a task then */ + if((workers[i].pipeline_length == 0 && workers[i].current_task) + || (workers[i].pipeline_length != 0 && + (workers[i].ntasks == workers[i].pipeline_length + || workers[i].pipeline_stuck))) + { + tasks[i] = NULL; + } + /* don't push a task if we are already transferring one */ + else if (workers[i].task_transferring != NULL) + { + tasks[i] = NULL; + } + /*else try to pop a task*/ + else + { +#ifdef STARPU_NON_BLOCKING_DRIVERS + _starpu_set_local_worker_key(&workers[i]); + STARPU_PTHREAD_MUTEX_LOCK_SCHED(&workers[i].sched_mutex); + _starpu_worker_enter_sched_op(&workers[i]); +#endif + _starpu_worker_set_status_scheduling(workers[i].workerid); + STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&workers[i].sched_mutex); + tasks[i] = _starpu_pop_task(&workers[i]); + STARPU_PTHREAD_MUTEX_LOCK_SCHED(&workers[i].sched_mutex); + if (workers[i].state_keep_awake) + { + keep_awake = workers[i].state_keep_awake; + workers[i].state_keep_awake = 0; + } + if(tasks[i] != NULL || keep_awake) + { + _starpu_worker_set_status_scheduling_done(workers[i].workerid); + _starpu_worker_set_status_wakeup(workers[i].workerid); + STARPU_PTHREAD_COND_BROADCAST(&workers[i].sched_cond); +#ifdef STARPU_NON_BLOCKING_DRIVERS + _starpu_worker_leave_sched_op(&workers[i]); + STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&workers[i].sched_mutex); +#endif + + count ++; + if (tasks[i] == NULL) + /* no task, but keep_awake */ + continue; + j = _starpu_get_job_associated_to_task(tasks[i]); + is_parallel_task = (j->task_size > 1); + if (workers[i].pipeline_length) + workers[i].current_tasks[(workers[i].first_task + workers[i].ntasks)%STARPU_MAX_PIPELINE] = tasks[i]; + else + workers[i].current_task = j->task; + workers[i].ntasks++; + /* Get the rank in case it is a parallel task */ + if (is_parallel_task) + { + + STARPU_PTHREAD_MUTEX_LOCK(&j->sync_mutex); + workers[i].current_rank = j->active_task_alias_count++; + STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex); + + if(j->combined_workerid != -1) + { + combined_worker = _starpu_get_combined_worker_struct(j->combined_workerid); + workers[i].combined_workerid = j->combined_workerid; + workers[i].worker_size = combined_worker->worker_size; + } + } + else + { + workers[i].combined_workerid = workers[i].workerid; + workers[i].worker_size = 1; + workers[i].current_rank = 0; + } + STARPU_AYU_PRERUNTASK(_starpu_get_job_associated_to_task(tasks[i])->job_id, workers[i].workerid); + } + else + { + _starpu_worker_set_status_sleeping(workers[i].workerid); +#ifdef STARPU_NON_BLOCKING_DRIVERS + _starpu_worker_leave_sched_op(&workers[i]); +#endif + STARPU_PTHREAD_COND_BROADCAST(&workers[i].sched_cond); +#ifdef STARPU_NON_BLOCKING_DRIVERS + STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&workers[i].sched_mutex); +#endif + } + } + } + +#if !defined(STARPU_NON_BLOCKING_DRIVERS) +#if !defined(STARPU_SIMGRID) + /* Block the assumed-to-be-only worker */ + struct _starpu_worker *worker = &workers[0]; + unsigned workerid = workers[0].workerid; + + if (!count && !executing) + { + /* Didn't get a task to run and none are running, go to sleep */ + + /* Note: we need to keep the sched condition mutex all along the path + * from popping a task from the scheduler to blocking. Otherwise the + * driver may go block just after the scheduler got a new task to be + * executed, and thus hanging. */ + _starpu_worker_set_status_sleeping(workerid); + _starpu_worker_leave_sched_op(worker); + + if (_starpu_worker_can_block(memnode, worker) + && !worker->state_block_in_parallel_req + && !worker->state_unblock_in_parallel_req + && !_starpu_sched_ctx_last_worker_awake(worker)) + { +#ifdef STARPU_WORKER_CALLBACKS + if (_starpu_config.conf.callback_worker_going_to_sleep != NULL) + { + _starpu_config.conf.callback_worker_going_to_sleep(workerid); + } +#endif + do + { + STARPU_PTHREAD_COND_WAIT(&worker->sched_cond, &worker->sched_mutex); + if (!worker->state_keep_awake + && _starpu_worker_can_block(memnode, worker) + && !worker->state_block_in_parallel_req + && !worker->state_unblock_in_parallel_req) + { + _starpu_worker_set_status_sleeping(workerid); + if (_starpu_sched_ctx_last_worker_awake(worker)) + { + break; + } + } + else + { + break; + } + } + while (1); + worker->state_keep_awake = 0; + _starpu_worker_set_status_scheduling_done(workerid); + STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&worker->sched_mutex); +#ifdef STARPU_WORKER_CALLBACKS + if (_starpu_config.conf.callback_worker_waking_up != NULL) + { + /* the wake up callback should be called once the sched_mutex has been unlocked, + * so that an external resource manager can potentially defer the wake-up momentarily if + * the corresponding computing unit is still in use by another runtime system */ + _starpu_config.conf.callback_worker_waking_up(workerid); + } +#endif + } + else + { + _starpu_worker_set_status_scheduling_done(workerid); + STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&worker->sched_mutex); + if (_starpu_machine_is_running()) + _starpu_exponential_backoff(worker); + } + return 0; + } + + _starpu_worker_set_status_wakeup(workerid); + worker->spinning_backoff = worker->config->conf.driver_spinning_backoff_min; +#endif /* !STARPU_SIMGRID */ + + _starpu_worker_leave_sched_op(&workers[0]); + STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&workers[0].sched_mutex); +#endif /* !STARPU_NON_BLOCKING_DRIVERS */ + + return count; +} + +#ifdef HAVE_MMAP +/*generate and initialize rbtree map_tree*/ +static struct starpu_rbtree map_tree = STARPU_RBTREE_INITIALIZER; +static starpu_pthread_mutex_t map_tree_mutex = STARPU_PTHREAD_MUTEX_INITIALIZER; + +struct map_allocate_info +{ + struct starpu_rbtree_node map_node; + void* map_addr; + size_t length; + char name[]; +}; + +/* the cmp_fn arg for rb_tree_insert() */ +static unsigned int map_addr_cmp_insert(struct starpu_rbtree_node * left_elm, struct starpu_rbtree_node * right_elm) +{ + unsigned int addr_left = (uintptr_t)((struct map_allocate_info *) left_elm)->map_addr; + unsigned int addr_right = (uintptr_t)((struct map_allocate_info *) right_elm)->map_addr; + + return addr_left - addr_right; +} + +/* the cmp_fn arg for starpu_rbtree_lookup() */ +static unsigned int map_addr_cmp_lookup(uintptr_t addr_left, struct starpu_rbtree_node * right_elm) +{ + unsigned int addr_right = (uintptr_t)((struct map_allocate_info *) right_elm)->map_addr; + + return addr_left - addr_right; +} + +void *_starpu_map_allocate(size_t length, unsigned node) +{ + /*file*/ + int fd; + char fd_name[32]; + snprintf(fd_name,sizeof(fd_name), "starpu-%u-XXXXXX", node); + + while(1) + { + mktemp(fd_name); + fd = shm_open(fd_name, O_RDWR|O_CREAT|O_EXCL, 0600); + if(fd >= 0) + break; + /* if name is already existed, recreate one*/ + else if (errno == EEXIST) + continue; + else + { + perror("fail to open file"); + return NULL; + } + } + + /*fix the length of file*/ + int ret = ftruncate(fd, length); + if (ret < 0) + { + perror("fail to allocate room for mapping"); + close(fd); + return NULL; + } + void* map_addr = mmap(NULL, length, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); + close(fd); + if (map_addr == MAP_FAILED) + { + perror("fail to map"); + return NULL; + } + + struct map_allocate_info *map_info; + _STARPU_MALLOC(map_info, sizeof(struct map_allocate_info)+strlen(fd_name)+1); + map_info->map_addr = map_addr; + map_info->length = length; + memcpy(map_info->name, fd_name, strlen(fd_name)+1); + + starpu_rbtree_node_init(&map_info->map_node); + + STARPU_PTHREAD_MUTEX_LOCK(&map_tree_mutex); + starpu_rbtree_insert(&map_tree, &map_info->map_node, map_addr_cmp_insert); + STARPU_PTHREAD_MUTEX_UNLOCK(&map_tree_mutex); + + return map_addr; +} + +int _starpu_map_deallocate(void* map_addr, size_t length) +{ + STARPU_PTHREAD_MUTEX_LOCK(&map_tree_mutex); + struct starpu_rbtree_node * currentNode = starpu_rbtree_lookup(&map_tree, (uintptr_t)map_addr, map_addr_cmp_lookup); + STARPU_PTHREAD_MUTEX_UNLOCK(&map_tree_mutex); + + if (currentNode != NULL) + { + struct map_allocate_info * map_info = (struct map_allocate_info *) currentNode; + if ((uintptr_t)map_addr == (uintptr_t)map_info->map_addr && ((uintptr_t)map_addr + length) == ((uintptr_t)map_info->map_addr + map_info->length)) + { + /*unlink the map fd name*/ + if (shm_unlink(map_info->name) != 0) + { + _STARPU_DISP("warning: cannot unlink file %s: %s\n", map_info->name, strerror(errno)); + } + STARPU_PTHREAD_MUTEX_LOCK(&map_tree_mutex); + starpu_rbtree_remove(&map_tree, &map_info->map_node); + STARPU_PTHREAD_MUTEX_UNLOCK(&map_tree_mutex); + free(map_info); + } + else + { + return -1; + } + } + else + { + _STARPU_DISP("could not find mapped address %p\n", map_addr); + } + + int res = munmap(map_addr, length); + if (res < 0) + { + perror("fail to unmap"); + return -1; + } + + return 0; +} + +/*lookup name from map_addr*/ +char* _starpu_get_fdname_from_mapaddr(uintptr_t map_addr, size_t *offset, size_t length) +{ + char* map_name = NULL; + + STARPU_PTHREAD_MUTEX_LOCK(&map_tree_mutex); + struct starpu_rbtree_node * currentNode = starpu_rbtree_lookup_nearest(&map_tree, map_addr, map_addr_cmp_lookup, STARPU_RBTREE_LEFT); + STARPU_PTHREAD_MUTEX_UNLOCK(&map_tree_mutex); + + if (currentNode != NULL) + { + struct map_allocate_info * map_info = (struct map_allocate_info *) currentNode; + + if ((map_addr >= (uintptr_t)map_info->map_addr) && map_addr + length <= ((uintptr_t)map_info->map_addr + map_info->length)) + { + map_name = strdup(map_info->name); + *offset = map_addr - (uintptr_t)map_info->map_addr; + } + } + + return map_name; +} + +/*map with giving file name*/ +void *_starpu_sink_map(char *fd_name, size_t offset, size_t length) +{ + /*file*/ + int fd; + + fd = shm_open(fd_name, O_RDWR, 0600); + + if(fd < 0) + { + perror("fail to open file"); + return NULL; + } + + /* offset for mmap() must be page aligned */ + off_t pa_offset = offset & ~(sysconf(_SC_PAGE_SIZE) - 1); + + void *map_sink_addr = mmap(NULL, length, PROT_READ|PROT_WRITE, MAP_SHARED, fd, pa_offset); + close(fd); + if (map_sink_addr == MAP_FAILED) + { + perror("fail to map"); + return NULL; + } + + return (void*)((uintptr_t)map_sink_addr + (offset - pa_offset)); +} + +int _starpu_sink_unmap(uintptr_t map_addr, size_t length) +{ + uintptr_t pa_addr = map_addr & ~(sysconf(_SC_PAGE_SIZE) - 1); + size_t offset = map_addr-pa_addr; + + int res = munmap((void*)pa_addr, length + offset); + if (res < 0) + { + perror("fail to unmap"); + return -1; + } + return 0; +} +#else +char* _starpu_get_fdname_from_mapaddr(uintptr_t map_addr, size_t *offset, size_t length) +{ + (void)map_addr; + (void)offset; + (void)length; + return NULL; +} +void *_starpu_sink_map(char *fd_name, size_t offset, size_t length) +{ + (void)fd_name; + (void)offset; + (void)length; + return NULL; +} +int _starpu_sink_unmap(uintptr_t map_addr, size_t length) +{ + (void)map_addr; + (void)length; + return -1; +} +#endif diff --git a/src/drivers/driver_common/driver_common.h b/src/drivers/driver_common/driver_common.h new file mode 100644 index 0000000..555c928 --- /dev/null +++ b/src/drivers/driver_common/driver_common.h @@ -0,0 +1,54 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Thibaut Lambert + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __DRIVER_COMMON_H__ +#define __DRIVER_COMMON_H__ + +/** @file */ + +#include +#include +#include +#include + +/** The task job is about to start (or has already started when kernels are + * queued in a pipeline), record profiling and trace information. */ +void _starpu_driver_start_job(struct _starpu_worker *args, struct _starpu_job *j, struct starpu_perfmodel_arch* perf_arch, + int rank, int profiling); +/** The task job has ended, record profiling and trace information. */ +void _starpu_driver_end_job(struct _starpu_worker *args, struct _starpu_job *j, struct starpu_perfmodel_arch* perf_arch, + int rank, int profiling); +/** Feed performance model with the terminated job statistics */ +void _starpu_driver_update_job_feedback(struct _starpu_job *j, struct _starpu_worker *worker_args, + struct starpu_perfmodel_arch* perf_arch, int profiling); + +#pragma GCC visibility push(hidden) + +/** Get from the scheduler a task to be executed on the worker \p workerid */ +struct starpu_task *_starpu_get_worker_task(struct _starpu_worker *args, int workerid, unsigned memnode); +/** Get from the scheduler tasks to be executed on the workers \p workers */ +int _starpu_get_multi_worker_task(struct _starpu_worker *workers, struct starpu_task ** tasks, int nworker, unsigned memnode); + +void *_starpu_map_allocate(size_t length, unsigned node); +int _starpu_map_deallocate(void* map_addr, size_t length); +char* _starpu_get_fdname_from_mapaddr(uintptr_t map_addr, size_t *offset, size_t length); +void *_starpu_sink_map(char *fd_name, size_t offset, size_t length); +int _starpu_sink_unmap(uintptr_t map_addr, size_t length); + +#pragma GCC visibility pop + +#endif // __DRIVER_COMMON_H__ diff --git a/src/drivers/hip/driver_hip.c b/src/drivers/hip/driver_hip.c new file mode 100644 index 0000000..287ef6c --- /dev/null +++ b/src/drivers/hip/driver_hip.c @@ -0,0 +1,1711 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2022-2022 École de Technologie Supérieure (ETS, Montréal) + * Copyright (C) 2021-2021 Federal University of Rio Grande do Sul (UFRGS) + * Copyright (C) 2016-2016 Uppsala University + * Copyright (C) 2013-2013 Thibaut Lambert + * Copyright (C) 2011-2011 Télécom Sud Paris + * Copyright (C) 2010-2010 Mehdi Juhoor + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if HAVE_DECL_HWLOC_HIP_GET_DEVICE_OSDEV_BY_INDEX +#include +#endif + +#define starpu_hipStreamCreate(stream) hipStreamCreateWithFlags(stream, hipStreamNonBlocking) + +/* Consider a rough 10% overhead cost */ +#define FREE_MARGIN 0.9 + +static size_t global_mem[STARPU_MAXHIPDEVS]; +int _starpu_hip_bus_ids[STARPU_MAXHIPDEVS+STARPU_MAXNUMANODES][STARPU_MAXHIPDEVS+STARPU_MAXNUMANODES]; +static hipStream_t streams[STARPU_NMAXWORKERS]; +static char used_stream[STARPU_NMAXWORKERS]; +static hipStream_t out_transfer_streams[STARPU_MAXHIPDEVS]; +static hipStream_t in_transfer_streams[STARPU_MAXHIPDEVS]; +/* Note: streams are not thread-safe, so we define them for each HIP worker + * emitting a GPU-GPU transfer */ +static hipStream_t in_peer_transfer_streams[STARPU_MAXHIPDEVS][STARPU_MAXHIPDEVS]; +static struct hipDeviceProp_t props[STARPU_MAXHIPDEVS]; +static hipEvent_t task_events[STARPU_NMAXWORKERS][STARPU_MAX_PIPELINE]; + +static unsigned hip_bindid_init[STARPU_MAXHIPDEVS]; +static unsigned hip_bindid[STARPU_MAXHIPDEVS]; +static unsigned hip_memory_init[STARPU_MAXHIPDEVS]; +static unsigned hip_memory_nodes[STARPU_MAXHIPDEVS]; + +static struct _starpu_worker_set hip_worker_set[STARPU_MAXHIPDEVS]; +static enum initialization hip_device_init[STARPU_MAXHIPDEVS]; +static int hip_device_users[STARPU_MAXHIPDEVS]; +static starpu_pthread_mutex_t hip_device_init_mutex[STARPU_MAXHIPDEVS]; +static starpu_pthread_cond_t hip_device_init_cond[STARPU_MAXHIPDEVS]; +static int hip_globalbindid; + +static int _starpu_hip_peer_access(int devid, int peer_devid); + +int _starpu_nworker_per_hip; + +static size_t _starpu_hip_get_global_mem_size(unsigned devid) +{ + return global_mem[devid]; +} + +static hipStream_t starpu_hip_get_in_transfer_stream(unsigned dst_node) +{ + int dst_devid = starpu_memory_node_get_devid(dst_node); + hipStream_t stream; + + stream = in_transfer_streams[dst_devid]; + STARPU_ASSERT(stream); + return stream; +} + +static hipStream_t starpu_hip_get_out_transfer_stream(unsigned src_node) +{ + int src_devid = starpu_memory_node_get_devid(src_node); + hipStream_t stream; + + stream = out_transfer_streams[src_devid]; + STARPU_ASSERT(stream); + return stream; +} + +static hipStream_t starpu_hip_get_peer_transfer_stream(unsigned src_node, unsigned dst_node) +{ + int src_devid = starpu_memory_node_get_devid(src_node); + int dst_devid = starpu_memory_node_get_devid(dst_node); + hipStream_t stream; + + stream = in_peer_transfer_streams[src_devid][dst_devid]; + STARPU_ASSERT(stream); + return stream; +} + +hipStream_t starpu_hip_get_local_stream(void) +{ + int worker = starpu_worker_get_id_check(); + + used_stream[worker] = 1; + return streams[worker]; +} + +const struct hipDeviceProp_t *starpu_hip_get_device_properties(unsigned workerid) +{ + struct _starpu_machine_config *config = _starpu_get_machine_config(); + unsigned devid = config->workers[workerid].devid; + return &props[devid]; +} + +/* Early library initialization, before anything else, just initialize data */ +void _starpu_hip_init(void) +{ + int i; + for (i = 0; i < STARPU_MAXHIPDEVS; i++) + { + STARPU_PTHREAD_MUTEX_INIT(&hip_device_init_mutex[i], NULL); + STARPU_PTHREAD_COND_INIT(&hip_device_init_cond[i], NULL); + } + memset(&hip_bindid_init, 0, sizeof(hip_bindid_init)); + memset(&hip_memory_init, 0, sizeof(hip_memory_init)); + hip_globalbindid = -1; +} + +/* Return the number of devices usable in the system. + * The value returned cannot be greater than MAXHIPDEVS */ +static unsigned _starpu_get_hip_device_count(void) +{ + int cnt; + hipError_t hipres; + hipres = hipGetDeviceCount(&cnt); + if (STARPU_UNLIKELY(hipres)) + return 0; + + if (cnt > STARPU_MAXHIPDEVS) + { + _STARPU_MSG("# Warning: %d HIP devices available. Only %d enabled. Use configure option --enable-maxhipdev=xxx to update the maximum value of supported HIP devices.\n", cnt, STARPU_MAXHIPDEVS); + cnt = STARPU_MAXHIPDEVS; + } + return (unsigned)cnt; +} + +/* This is run from initialize to determine the number of HIP devices */ +void _starpu_init_hip(void) +{ +} + +/* This is called to really discover the hardware */ +void _starpu_hip_discover_devices(struct _starpu_machine_config *config) +{ + /* Discover the number of HIP devices. Fill the result in CONFIG. */ + + int cnt; + hipError_t hipres; + + hipres = hipGetDeviceCount(&cnt); + if (STARPU_UNLIKELY(hipres != hipSuccess)) + cnt = 0; + config->topology.nhwdevices[STARPU_HIP_WORKER] = cnt; +} + +static void _starpu_initialize_workers_hip_gpuid(struct _starpu_machine_config *config) +{ + struct _starpu_machine_topology *topology = &config->topology; + struct starpu_conf *uconf = &config->conf; + + _starpu_initialize_workers_deviceid(uconf->use_explicit_workers_hip_gpuid == 0 + ? NULL + : (int *)uconf->workers_hip_gpuid, + &(config->current_devid[STARPU_HIP_WORKER]), + (int *)topology->workers_devid[STARPU_HIP_WORKER], + "STARPU_WORKERS_HIPID", + topology->nhwdevices[STARPU_HIP_WORKER], + STARPU_HIP_WORKER); + + _starpu_devices_gpu_clear(config, STARPU_HIP_WORKER); + _starpu_devices_drop_duplicate(topology->workers_devid[STARPU_HIP_WORKER]); +} + +/* Determine which devices we will use */ +void _starpu_init_hip_config(struct _starpu_machine_topology *topology, struct _starpu_machine_config *config) +{ + int i; + + for (i = 0; i < (int) (sizeof(hip_worker_set)/sizeof(hip_worker_set[0])); i++) + hip_worker_set[i].workers = NULL; + + int nhip = config->conf.nhip; + + if (nhip != 0) + { + /* The user did not disable HIP. We need to initialize HIP + * early to count the number of devices */ + _starpu_init_hip(); + int nb_devices = _starpu_get_hip_device_count(); + + _starpu_topology_check_ndevices(&nhip, nb_devices, 0, STARPU_MAXHIPDEVS, 0, "nhip", "HIP", "maxhipdev"); + } + + int nworker_per_hip = starpu_get_env_number_default("STARPU_NWORKER_PER_HIP", 1); + + STARPU_ASSERT_MSG(nworker_per_hip > 0, "STARPU_NWORKER_PER_HIP has to be > 0"); + STARPU_ASSERT_MSG_ALWAYS(nworker_per_hip < STARPU_NMAXWORKERS, "STARPU_NWORKER_PER_HIP (%d) cannot be higher than STARPU_NMAXWORKERS (%d)\n", nworker_per_hip, STARPU_NMAXWORKERS); + +#ifndef STARPU_NON_BLOCKING_DRIVERS + if (nworker_per_hip > 1) + { + _STARPU_DISP("Warning: reducing STARPU_NWORKER_PER_HIP to 1 because blocking drivers are enabled\n"); + nworker_per_hip = 1; + } + _starpu_nworker_per_hip = nworker_per_hip; +#endif + + /* Now we know how many HIP devices will be used */ + topology->ndevices[STARPU_HIP_WORKER] = nhip; + + _starpu_initialize_workers_hip_gpuid(config); + + /* allow having one worker per stream */ + topology->hip_th_per_stream = starpu_get_env_number_default("STARPU_HIP_THREAD_PER_WORKER", -1); + topology->hip_th_per_dev = starpu_get_env_number_default("STARPU_HIP_THREAD_PER_DEV", -1); + + STARPU_ASSERT_MSG(!(topology->hip_th_per_stream == 1 && topology->hip_th_per_dev != -1), "It does not make sense to set both STARPU_HIP_THREAD_PER_WORKER to 1 and to set STARPU_HIP_THREAD_PER_DEV, please choose either per worker or per device or none"); + + /* per device by default */ + if (topology->hip_th_per_dev == -1) + { + if (topology->hip_th_per_stream == 1) + topology->hip_th_per_dev = 0; + else + topology->hip_th_per_dev = 1; + } + /* Not per stream by default */ + if (topology->hip_th_per_stream == -1) + { + topology->hip_th_per_stream = 0; + } + + if (!topology->hip_th_per_dev) + { + hip_worker_set[0].workers = &config->workers[topology->nworkers]; + hip_worker_set[0].nworkers = nhip * nworker_per_hip; + } + + unsigned hipgpu; + for (hipgpu = 0; (int) hipgpu < nhip; hipgpu++) + { + int devid = _starpu_get_next_devid(topology, config, STARPU_HIP_WORKER); + + if (devid == -1) + { + // There is no more devices left + topology->ndevices[STARPU_HIP_WORKER] = hipgpu; + break; + } + + struct _starpu_worker_set *worker_set; + + if(topology->hip_th_per_stream) + { + worker_set = ALLOC_WORKER_SET; + } + else if (topology->hip_th_per_dev) + { + worker_set = &hip_worker_set[devid]; + worker_set->workers = &config->workers[topology->nworkers]; + worker_set->nworkers = nworker_per_hip; + } + else + { + /* Same worker set for all devices */ + worker_set = &hip_worker_set[0]; + } + + _starpu_topology_configure_workers(topology, config, + STARPU_HIP_WORKER, + hipgpu, devid, 0, 0, + nworker_per_hip, + // TODO: fix perfmodels etc. + // nworker_per_hip - 1, + 1, + worker_set, NULL); + + _starpu_devices_gpu_set_used(devid); + +/* TODO: move this to generic place */ +#ifdef STARPU_HAVE_HWLOC + { + hwloc_obj_t obj = NULL; + if (starpu_driver_info[STARPU_HIP_WORKER].get_hwloc_obj) + obj = starpu_driver_info[STARPU_HIP_WORKER].get_hwloc_obj(topology->hwtopology, devid); + + if (obj) + { + struct _starpu_hwloc_userdata *data = obj->userdata; + data->ngpus++; + } + else + { + _STARPU_DEBUG("Warning: could not find location of HIP%u, do you have the hwloc HIP plugin installed?\n", devid); + } + } +#endif + } +} + +/* Bind the driver on a CPU core */ +void _starpu_hip_init_worker_binding(struct _starpu_machine_config *config, int no_mp_config STARPU_ATTRIBUTE_UNUSED, struct _starpu_worker *workerarg) +{ + /* Perhaps the worker has some "favourite" bindings */ + unsigned *preferred_binding = NULL; + unsigned npreferred = 0; + unsigned devid = workerarg->devid; + + if (hip_bindid_init[devid]) + { + if (config->topology.hip_th_per_stream == 0) + workerarg->bindid = hip_bindid[devid]; + else + workerarg->bindid = _starpu_get_next_bindid(config, STARPU_THREAD_ACTIVE, preferred_binding, npreferred); + } + else + { + hip_bindid_init[devid] = 1; + + if (config->topology.hip_th_per_dev == 0 && config->topology.hip_th_per_stream == 0) + { + if (hip_globalbindid == -1) + hip_globalbindid = _starpu_get_next_bindid(config, STARPU_THREAD_ACTIVE, preferred_binding, npreferred); + workerarg->bindid = hip_bindid[devid] = hip_globalbindid; + } + else + { + workerarg->bindid = hip_bindid[devid] = _starpu_get_next_bindid(config, STARPU_THREAD_ACTIVE, preferred_binding, npreferred); + } + } +} + +/* Set up memory and buses */ +void _starpu_hip_init_worker_memory(struct _starpu_machine_config *config, int no_mp_config STARPU_ATTRIBUTE_UNUSED, struct _starpu_worker *workerarg) +{ + unsigned memory_node = -1; + unsigned devid = workerarg->devid; + unsigned numa; + + if (hip_memory_init[devid]) + { + memory_node = hip_memory_nodes[devid]; + } + else + { + hip_memory_init[devid] = 1; + + memory_node = hip_memory_nodes[devid] = _starpu_memory_node_register(STARPU_HIP_RAM, devid); + +#ifdef STARPU_USE_HIP_MAP + /* TODO: check node capabilities */ + _starpu_memory_node_set_mapped(memory_node); +#endif + + for (numa = 0; numa < starpu_memory_nodes_get_numa_count(); numa++) + { + _starpu_hip_bus_ids[numa][devid+STARPU_MAXNUMANODES] = _starpu_register_bus(numa, memory_node); + _starpu_hip_bus_ids[devid+STARPU_MAXNUMANODES][numa] = _starpu_register_bus(memory_node, numa); + } + + if ( +#if defined(STARPU_HAVE_HIP_MEMCPY_PEER) + 1 +#else /* MEMCPY_PEER */ + 0 +#endif /* MEMCPY_PEER */ + ) + { + int worker2; + for (worker2 = 0; worker2 < workerarg->workerid; worker2++) + { + struct _starpu_worker *workerarg2 = &config->workers[worker2]; + int devid2 = workerarg2->devid; + if (workerarg2->arch == STARPU_HIP_WORKER) + { + unsigned memory_node2 = starpu_worker_get_memory_node(worker2); + _starpu_hip_bus_ids[devid2+STARPU_MAXNUMANODES][devid+STARPU_MAXNUMANODES] = _starpu_register_bus(memory_node2, memory_node); + _starpu_hip_bus_ids[devid+STARPU_MAXNUMANODES][devid2+STARPU_MAXNUMANODES] = _starpu_register_bus(memory_node, memory_node2); +#if HAVE_DECL_HWLOC_HIP_GET_DEVICE_OSDEV_BY_INDEX + { + hwloc_obj_t obj, obj2, ancestor; + obj = hwloc_hip_get_device_osdev_by_index(config->topology.hwtopology, devid); + obj2 = hwloc_hip_get_device_osdev_by_index(config->topology.hwtopology, devid2); + ancestor = hwloc_get_common_ancestor_obj(config->topology.hwtopology, obj, obj2); + if (ancestor) + { + struct _starpu_hwloc_userdata *data = ancestor->userdata; +#ifdef STARPU_VERBOSE + { + char name[64]; + hwloc_obj_type_snprintf(name, sizeof(name), ancestor, 0); + _STARPU_DEBUG("HIP%u and HIP%u are linked through %s, along %u GPUs\n", devid, devid2, name, data->ngpus); + } +#endif + starpu_bus_set_ngpus(_starpu_hip_bus_ids[devid2+STARPU_MAXNUMANODES][devid+STARPU_MAXNUMANODES], data->ngpus); + starpu_bus_set_ngpus(_starpu_hip_bus_ids[devid+STARPU_MAXNUMANODES][devid2+STARPU_MAXNUMANODES], data->ngpus); + } + } +#endif + } + } + } + } + _starpu_memory_node_add_nworkers(memory_node); + + //This worker can also manage transfers on NUMA nodes + for (numa = 0; numa < starpu_memory_nodes_get_numa_count(); numa++) + _starpu_worker_drives_memory_node(&workerarg->set->workers[0], numa); + + _starpu_worker_drives_memory_node(&workerarg->set->workers[0], memory_node); + + workerarg->memory_node = memory_node; +} + +/* Set the current HIP device */ +void starpu_hip_set_device(int devid STARPU_ATTRIBUTE_UNUSED) +{ + hipError_t hipres; + int attempts = 0; + + hipres = hipSetDevice(devid); + while (hipres == hipErrorDeinitialized && ++attempts < 10) + { + usleep(100000); + hipres = hipSetDevice(devid); + } + + if (STARPU_UNLIKELY(hipres)) + STARPU_HIP_REPORT_ERROR(hipres); +} + +static void _starpu_hip_limit_gpu_mem_if_needed(unsigned devid) +{ + starpu_ssize_t limit; + size_t STARPU_ATTRIBUTE_UNUSED totalGlobalMem = 0; + size_t STARPU_ATTRIBUTE_UNUSED to_waste = 0; + + /* Find the size of the memory on the device */ + totalGlobalMem = props[devid].totalGlobalMem; + + limit = starpu_getenv_number("STARPU_LIMIT_HIP_MEM"); + if (limit == -1) + { + char name[30]; + snprintf(name, sizeof(name), "STARPU_LIMIT_HIP_%u_MEM", devid); + limit = starpu_getenv_number(name); + } +#if defined(STARPU_USE_HIP) + if (limit == -1) + { + limit = totalGlobalMem / (1024*1024) * FREE_MARGIN; + } +#endif + + global_mem[devid] = limit * 1024*1024; +} + +/* Really initialize one device */ +static void init_device_context(unsigned devid, unsigned memnode) +{ + STARPU_ASSERT(devid < STARPU_MAXHIPDEVS); + + hipError_t hipres; + int attempts = 0; + + starpu_hip_set_device(devid); + + STARPU_PTHREAD_MUTEX_LOCK(&hip_device_init_mutex[devid]); + hip_device_users[devid]++; + if (hip_device_init[devid] == UNINITIALIZED) + /* Nobody started initialization yet, do it */ + hip_device_init[devid] = CHANGING; + else + { + /* Somebody else is doing initialization, wait for it */ + while (hip_device_init[devid] != INITIALIZED) + STARPU_PTHREAD_COND_WAIT(&hip_device_init_cond[devid], &hip_device_init_mutex[devid]); + STARPU_PTHREAD_MUTEX_UNLOCK(&hip_device_init_mutex[devid]); + return; + } + STARPU_PTHREAD_MUTEX_UNLOCK(&hip_device_init_mutex[devid]); + + /* force HIP to initialize the context for real */ + hipres = hipInit(0); + while (hipres == hipErrorDeinitialized && ++attempts < 100) + { + usleep(100000); + hipres = hipInit(0); + } + + if (STARPU_UNLIKELY(hipres)) + { + if (hipres != hipSuccess) + { + _STARPU_MSG("Failed to initialize HIP runtime\n"); + exit(77); + } + STARPU_HIP_REPORT_ERROR(hipres); + } + + hipres = hipGetDeviceProperties(&props[devid], devid); + if (STARPU_UNLIKELY(hipres)) + STARPU_HIP_REPORT_ERROR(hipres); +#ifdef STARPU_HAVE_HIP_MEMCPY_PEER + if (props[devid].computeMode == hipComputeModeExclusive) + { + _STARPU_MSG("HIP is in EXCLUSIVE-THREAD mode, but StarPU was built with multithread GPU control support, please either ask your administrator to use EXCLUSIVE-PROCESS mode (which should really be fine), or reconfigure with --disable-hip-memcpy-peer but that will disable the memcpy-peer optimizations\n"); + STARPU_ABORT(); + } +#endif + + hipres = starpu_hipStreamCreate(&in_transfer_streams[devid]); + if (STARPU_UNLIKELY(hipres)) + STARPU_HIP_REPORT_ERROR(hipres); + + hipres = starpu_hipStreamCreate(&out_transfer_streams[devid]); + if (STARPU_UNLIKELY(hipres)) + STARPU_HIP_REPORT_ERROR(hipres); + + int nworkers = starpu_worker_get_count(); + int workerid; + for (workerid = 0; workerid < nworkers; workerid++) + { + struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); + if (worker->arch == STARPU_HIP_WORKER && worker->subworkerid == 0) + { + hipres = starpu_hipStreamCreate(&in_peer_transfer_streams[worker->devid][devid]); + if (STARPU_UNLIKELY(hipres)) + STARPU_HIP_REPORT_ERROR(hipres); + } + } + + STARPU_PTHREAD_MUTEX_LOCK(&hip_device_init_mutex[devid]); + hip_device_init[devid] = INITIALIZED; + STARPU_PTHREAD_COND_BROADCAST(&hip_device_init_cond[devid]); + STARPU_PTHREAD_MUTEX_UNLOCK(&hip_device_init_mutex[devid]); + + _starpu_hip_limit_gpu_mem_if_needed(devid); + _starpu_memory_manager_set_global_memory_size(memnode, _starpu_hip_get_global_mem_size(devid)); +} + +/* De-initialize one device */ +static void deinit_device_context(unsigned devid STARPU_ATTRIBUTE_UNUSED) +{ + starpu_hip_set_device(devid); + + hipStreamDestroy(in_transfer_streams[devid]); + hipStreamDestroy(out_transfer_streams[devid]); + + int nworkers = starpu_worker_get_count(); + int workerid; + for (workerid = 0; workerid < nworkers; workerid++) + { + struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); + if (worker->arch == STARPU_HIP_WORKER && worker->subworkerid == 0) + { + hipStreamDestroy(in_peer_transfer_streams[worker->devid][devid]); + } + } +} + +static void init_worker_context(unsigned workerid, unsigned devid) +{ + int j; + hipError_t hipres; + starpu_hip_set_device(devid); + + for (j = 0; j < STARPU_MAX_PIPELINE; j++) + { + hipres = hipEventCreateWithFlags(&task_events[workerid][j], hipEventDisableTiming); + if (STARPU_UNLIKELY(hipres)) + STARPU_HIP_REPORT_ERROR(hipres); + } + + hipres = starpu_hipStreamCreate(&streams[workerid]); + if (STARPU_UNLIKELY(hipres)) + STARPU_HIP_REPORT_ERROR(hipres); +} + +static void deinit_worker_context(unsigned workerid, unsigned devid STARPU_ATTRIBUTE_UNUSED) +{ + unsigned j; + starpu_hip_set_device(devid); + for (j = 0; j < STARPU_MAX_PIPELINE; j++) + hipEventDestroy(task_events[workerid][j]); + hipStreamDestroy(streams[workerid]); +} + +/* This is run from the driver thread to initialize the driver HIP context */ +int _starpu_hip_driver_init(struct _starpu_worker *worker) +{ + struct _starpu_worker_set *worker_set = worker->set; + struct _starpu_worker *worker0 = &worker_set->workers[0]; + int lastdevid = -1; + unsigned i; + + _starpu_driver_start(worker0, STARPU_HIP_WORKER, 0); + _starpu_set_local_worker_key(worker); + +#ifdef STARPU_PROF_TOOL + struct starpu_prof_tool_info pi; +#endif + +#ifdef STARPU_USE_FXT + for (i = 1; i < worker_set->nworkers; i++) + _starpu_worker_start(&worker_set->workers[i], STARPU_HIP_WORKER, 0); +#endif + + for (i = 0; i < worker_set->nworkers; i++) + { + worker = &worker_set->workers[i]; + unsigned devid = worker->devid; + unsigned memnode = worker->memory_node; + +#ifdef STARPU_PROF_TOOL + pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_driver_init, devid, worker->workerid, starpu_prof_tool_driver_gpu, memnode, NULL); + starpu_prof_tool_callbacks.starpu_prof_tool_event_driver_init(&pi, NULL, NULL); + pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_driver_init_start, devid, worker->workerid, starpu_prof_tool_driver_gpu, memnode, NULL); + starpu_prof_tool_callbacks.starpu_prof_tool_event_driver_init_start(&pi, NULL, NULL); +#endif + + if ((int) devid == lastdevid) + { + /* Already initialized */ + continue; + } + lastdevid = devid; + init_device_context(devid, memnode); + + if (worker->config->topology.nworker[STARPU_HIP_WORKER][devid] > 1 && props[devid].concurrentKernels == 0) + _STARPU_DISP("Warning: STARPU_NWORKER_PER_HIP is %u, but HIP device %u does not support concurrent kernel execution!\n", worker_set->nworkers, devid); + } + + /* one more time to avoid hacks from third party lib :) */ + _starpu_bind_thread_on_cpu(worker0->bindid, worker0->workerid, NULL); + + for (i = 0; i < worker_set->nworkers; i++) + { + worker = &worker_set->workers[i]; + unsigned devid = worker->devid; + unsigned workerid = worker->workerid; + unsigned subdev = worker->subworkerid; + + float size = (float) global_mem[devid] / (1<<30); + /* get the device's name */ + char devname[64]; + strncpy(devname, props[devid].name, 63); + devname[63] = 0; + +#if defined(STARPU_HAVE_BUSID) +#if defined(STARPU_HAVE_DOMAINID) + if (props[devid].pciDomainID) + snprintf(worker->name, sizeof(worker->name), "HIP %u.%u (%s %.1f GiB %04x:%02x:%02x.0)", devid, subdev, devname, size, props[devid].pciDomainID, props[devid].pciBusID, props[devid].pciDeviceID); + else +#endif + snprintf(worker->name, sizeof(worker->name), "HIP %u.%u (%s %.1f GiB %02x:%02x.0)", devid, subdev, devname, size, props[devid].pciBusID, props[devid].pciDeviceID); +#else + snprintf(worker->name, sizeof(worker->name), "HIP %u.%u (%s %.1f GiB)", devid, subdev, devname, size); +#endif + snprintf(worker->short_name, sizeof(worker->short_name), "HIP %u.%u", devid, subdev); + _STARPU_DEBUG("hip (%s) dev id %u worker %u thread is ready to run on CPU %d !\n", devname, devid, subdev, worker->bindid); + + worker->pipeline_length = starpu_get_env_number_default("STARPU_HIP_PIPELINE", 2); + if (worker->pipeline_length > STARPU_MAX_PIPELINE) + { + _STARPU_DISP("Warning: STARPU_HIP_PIPELINE is %u, but STARPU_MAX_PIPELINE is only %u\n", worker->pipeline_length, STARPU_MAX_PIPELINE); + worker->pipeline_length = STARPU_MAX_PIPELINE; + } +#if !defined(STARPU_NON_BLOCKING_DRIVERS) + if (worker->pipeline_length >= 1) + { + /* We need non-blocking drivers, to poll for HIP task + * termination */ + _STARPU_DISP("Warning: reducing STARPU_HIP_PIPELINE to 0 because blocking drivers are enabled (and simgrid is not supported with this driver)\n"); + worker->pipeline_length = 0; + } +#endif + init_worker_context(workerid, worker->devid); + + _STARPU_TRACE_WORKER_INIT_END(workerid); +#ifdef STARPU_PROF_TOOL + pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_driver_init_end, devid, worker->workerid, starpu_prof_tool_driver_gpu, 0, NULL); + starpu_prof_tool_callbacks.starpu_prof_tool_event_driver_init_end(&pi, NULL, NULL); +#endif + } + { + char thread_name[16]; + snprintf(thread_name, sizeof(thread_name), "HIP %u", worker->devid); + starpu_pthread_setname(thread_name); + } + + /* tell the main thread that this one is ready */ + STARPU_PTHREAD_MUTEX_LOCK(&worker0->mutex); + worker0->status = STATUS_UNKNOWN; + worker0->worker_is_initialized = 1; + STARPU_PTHREAD_COND_SIGNAL(&worker0->ready_cond); + STARPU_PTHREAD_MUTEX_UNLOCK(&worker0->mutex); + + /* tell the main thread that this one is also ready */ + STARPU_PTHREAD_MUTEX_LOCK(&worker_set->mutex); + worker_set->set_is_initialized = 1; + STARPU_PTHREAD_COND_SIGNAL(&worker_set->ready_cond); + STARPU_PTHREAD_MUTEX_UNLOCK(&worker_set->mutex); + + return 0; +} + +int _starpu_hip_driver_deinit(struct _starpu_worker *worker) +{ + struct _starpu_worker_set *worker_set = worker->set; + int lastdevid = -1; + unsigned i; + _STARPU_TRACE_WORKER_DEINIT_START; + + for (i = 0; i < worker_set->nworkers; i++) + { + worker = &worker_set->workers[i]; + unsigned devid = worker->devid; + unsigned memnode = worker->memory_node; + unsigned usersleft; + if ((int) devid == lastdevid) + /* Already initialized */ + continue; + lastdevid = devid; + + STARPU_PTHREAD_MUTEX_LOCK(&hip_device_init_mutex[devid]); + usersleft = --hip_device_users[devid]; + STARPU_PTHREAD_MUTEX_UNLOCK(&hip_device_init_mutex[devid]); + + if (!usersleft) + { + /* I'm last, deinitialize device */ + _starpu_datawizard_handle_all_pending_node_data_requests(memnode); + + /* In case there remains some memory that was automatically + * allocated by StarPU, we release it now. Note that data + * coherency is not maintained anymore at that point ! */ + _starpu_free_all_automatically_allocated_buffers(memnode); + + _starpu_malloc_shutdown(memnode); + + deinit_device_context(devid); + } + STARPU_PTHREAD_MUTEX_LOCK(&hip_device_init_mutex[devid]); + hip_device_init[devid] = UNINITIALIZED; + STARPU_PTHREAD_MUTEX_UNLOCK(&hip_device_init_mutex[devid]); + + } + + for (i = 0; i < worker_set->nworkers; i++) + { + worker = &worker_set->workers[i]; + unsigned workerid = worker->workerid; + unsigned memnode = worker->memory_node; + + deinit_worker_context(workerid, worker->devid); + +#ifdef STARPU_PROF_TOOL + struct starpu_prof_tool_info pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_driver_deinit, workerid, worker->workerid, starpu_prof_tool_driver_gpu, memnode, NULL); + starpu_prof_tool_callbacks.starpu_prof_tool_event_driver_deinit(&pi, NULL, NULL); +#endif + } + + worker_set->workers[0].worker_is_initialized = 0; + _STARPU_TRACE_WORKER_DEINIT_END(STARPU_HIP_WORKER); + + return 0; +} + +static uintptr_t _starpu_hip_malloc_on_node(unsigned dst_node, size_t size, int flags) +{ + uintptr_t addr = 0; + (void) flags; + + unsigned devid = starpu_memory_node_get_devid(dst_node); + + starpu_hip_set_device(devid); + + /* Check if there is free memory */ + size_t hip_mem_free, hip_mem_total; + hipError_t status; + status = hipMemGetInfo(&hip_mem_free, &hip_mem_total); + if (status == hipSuccess && hip_mem_free * FREE_MARGIN < size) + { + addr = 0; + } + else + { + status = hipMalloc((void **)&addr, size); + if (!addr || (status != hipSuccess)) + { + if (STARPU_UNLIKELY(status != hipErrorOutOfMemory)) + STARPU_HIP_REPORT_ERROR(status); + addr = 0; + } + } + return addr; +} + +static void _starpu_hip_free_on_node(unsigned dst_node, uintptr_t addr, size_t size, int flags) +{ + (void) size; + (void) flags; + + hipError_t err; + unsigned devid = starpu_memory_node_get_devid(dst_node); + starpu_hip_set_device(devid); + err = hipFree((void*)addr); + if (STARPU_UNLIKELY(err != hipSuccess)) + STARPU_HIP_REPORT_ERROR(err); +} + +int starpu_hip_copy_async_sync(void *src_ptr, unsigned src_node, + void *dst_ptr, unsigned dst_node, + size_t ssize, hipStream_t stream, + hipMemcpyKind kind) +{ +#ifdef STARPU_HAVE_HIP_MEMCPY_PEER + int peer_copy = 0; + int src_dev = -1, dst_dev = -1; +#endif + hipError_t hipres = hipSuccess; + + if (kind == hipMemcpyDeviceToDevice && src_node != dst_node) + { +#ifdef STARPU_HAVE_HIP_MEMCPY_PEER + peer_copy = 1; + src_dev = starpu_memory_node_get_devid(src_node); + dst_dev = starpu_memory_node_get_devid(dst_node); +#else + STARPU_ABORT(); +#endif + } + + if (stream) + { + double start; + starpu_interface_start_driver_copy_async(src_node, dst_node, &start); +#ifdef STARPU_HAVE_HIP_MEMCPY_PEER + if (peer_copy) + { + hipres = hipMemcpyPeerAsync((char *) dst_ptr, dst_dev, + (char *) src_ptr, src_dev, + ssize, stream); + } + else +#endif + { + hipres = hipMemcpyAsync((char *)dst_ptr, (char *)src_ptr, ssize, kind, stream); + } + (void) hipGetLastError(); + starpu_interface_end_driver_copy_async(src_node, dst_node, start); + } + + /* Test if the asynchronous copy has failed or if the caller only asked for a synchronous copy */ + if (stream == NULL || hipres) + { + /* do it in a synchronous fashion */ +#ifdef STARPU_HAVE_HIP_MEMCPY_PEER + if (peer_copy) + { + hipres = hipMemcpyPeer((char *) dst_ptr, dst_dev, + (char *) src_ptr, src_dev, + ssize); + } + else +#endif + { + hipres = hipMemcpy((char *)dst_ptr, (char *)src_ptr, ssize, kind); + } + (void) hipGetLastError(); + + if (!hipres) + hipres = hipDeviceSynchronize(); + if (STARPU_UNLIKELY(hipres)) + STARPU_HIP_REPORT_ERROR(hipres); + + return 0; + } + + return -EAGAIN; +} + +/* Driver porters: this is optional but really recommended */ +int starpu_hip_copy2d_async_sync(void *src_ptr, unsigned src_node, + void *dst_ptr, unsigned dst_node, + size_t blocksize, + size_t numblocks, size_t ld_src, size_t ld_dst, + hipStream_t stream, hipMemcpyKind kind) +{ + hipError_t hipres = hipSuccess; + + if (kind == hipMemcpyDeviceToDevice && src_node != dst_node) + { +#ifdef STARPU_HAVE_HIP_MEMCPY_PEER +# ifdef BUGGED_MEMCPY3D + STARPU_ABORT_MSG("HIP memcpy 3D peer buggy, but core triggered one?!"); +# endif +#else + STARPU_ABORT_MSG("HIP memcpy 3D peer not available, but core triggered one ?!"); +#endif + } + + if (stream) + { + double start; + starpu_interface_start_driver_copy_async(src_node, dst_node, &start); + hipres = hipMemcpy2DAsync((char *)dst_ptr, ld_dst, (char *)src_ptr, ld_src, + blocksize, numblocks, kind, stream); + starpu_interface_end_driver_copy_async(src_node, dst_node, start); + } + + /* Test if the asynchronous copy has failed or if the caller only asked for a synchronous copy */ + if (stream == NULL || hipres) + { + hipres = hipMemcpy2D((char *)dst_ptr, ld_dst, (char *)src_ptr, ld_src, + blocksize, numblocks, kind); + if (!hipres) + hipres = hipDeviceSynchronize(); + if (STARPU_UNLIKELY(hipres)) + STARPU_HIP_REPORT_ERROR(hipres); + + return 0; + } + + return -EAGAIN; +} + +static inline hipEvent_t *_starpu_hip_event(union _starpu_async_channel_event *_event) +{ + hipEvent_t *event; + STARPU_STATIC_ASSERT(sizeof(*event) <= sizeof(*_event)); + event = (void *) _event; + return event; +} + +static unsigned _starpu_hip_test_request_completion(struct _starpu_async_channel *async_channel) +{ + hipEvent_t event; + hipError_t hipres; + unsigned success; + + event = *_starpu_hip_event(&async_channel->event); + hipres = hipEventQuery(event); + success = (hipres == hipSuccess); + + if (success) + hipEventDestroy(event); + else if (hipres != hipErrorNotReady) + STARPU_HIP_REPORT_ERROR(hipres); + + return success; +} + +/* Only used at starpu_shutdown */ +static void _starpu_hip_wait_request_completion(struct _starpu_async_channel *async_channel) +{ + hipEvent_t event; + hipError_t hipres; + + event = *_starpu_hip_event(&async_channel->event); + + hipres = hipEventSynchronize(event); + if (STARPU_UNLIKELY(hipres)) + STARPU_HIP_REPORT_ERROR(hipres); + + hipres = hipEventDestroy(event); + if (STARPU_UNLIKELY(hipres)) + STARPU_HIP_REPORT_ERROR(hipres); +} + +#ifdef STARPU_HAVE_HIP_MEMCPY_PEER +static void starpu_hip_set_copy_device(unsigned src_node, unsigned dst_node) +{ + enum starpu_node_kind src_kind = starpu_node_get_kind(src_node); + enum starpu_node_kind dst_kind = starpu_node_get_kind(dst_node); + unsigned devid; + if ((src_kind == STARPU_HIP_RAM) && (dst_kind == STARPU_HIP_RAM)) + { + /* GPU-GPU transfer, issue it from the destination */ + devid = starpu_memory_node_get_devid(dst_node); + } + else + { + unsigned node = (dst_kind == STARPU_HIP_RAM)?dst_node:src_node; + devid = starpu_memory_node_get_devid(node); + } + starpu_hip_set_device(devid); +} +#endif + +static int _starpu_hip_copy_interface_from_hip_to_hip(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req) +{ + int src_kind = starpu_node_get_kind(src_node); + int dst_kind = starpu_node_get_kind(dst_node); + STARPU_ASSERT(src_kind == STARPU_HIP_RAM && dst_kind == STARPU_HIP_RAM); + +#ifdef STARPU_HAVE_HIP_MEMCPY_PEER + starpu_hip_set_copy_device(src_node, dst_node); +#else + STARPU_ASSERT(src_node == dst_node); +#endif + + int ret = 1; + hipError_t hipres; + hipStream_t stream; + const struct starpu_data_copy_methods *copy_methods = handle->ops->copy_methods; + /* HIP - HIP transfer */ + if (!req || starpu_asynchronous_copy_disabled() || starpu_asynchronous_hip_copy_disabled() || !copy_methods->any_to_any) + { + STARPU_ASSERT(copy_methods->any_to_any); + /* this is not associated to a request so it's synchronous */ + copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, NULL); + } + else + { + req->async_channel.node_ops = &_starpu_driver_hip_node_ops; + hipres = hipEventCreateWithFlags(_starpu_hip_event(&req->async_channel.event), hipEventDisableTiming); + if (STARPU_UNLIKELY(hipres != hipSuccess)) STARPU_HIP_REPORT_ERROR(hipres); + + stream = starpu_hip_get_peer_transfer_stream(src_node, dst_node); + STARPU_ASSERT(copy_methods->any_to_any); + ret = copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, &req->async_channel); + + hipres = hipEventRecord(*_starpu_hip_event(&req->async_channel.event), stream); + if (STARPU_UNLIKELY(hipres != hipSuccess)) STARPU_HIP_REPORT_ERROR(hipres); + } + return ret; +} + +static int _starpu_hip_copy_interface_from_hip_to_cpu(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req) +{ + int src_kind = starpu_node_get_kind(src_node); + int dst_kind = starpu_node_get_kind(dst_node); + STARPU_ASSERT(src_kind == STARPU_HIP_RAM && dst_kind == STARPU_CPU_RAM); + +#ifdef STARPU_HAVE_HIP_MEMCPY_PEER + starpu_hip_set_copy_device(src_node, dst_node); +#endif + + int ret = 1; + hipError_t hipres; + hipStream_t stream; + const struct starpu_data_copy_methods *copy_methods = handle->ops->copy_methods; + + /* only the proper CUBLAS thread can initiate this directly ! */ +#if !defined(STARPU_HAVE_HIP_MEMCPY_PEER) + STARPU_ASSERT(starpu_worker_get_local_memory_node() == src_node); +#endif + if (!req || starpu_asynchronous_copy_disabled() || starpu_asynchronous_hip_copy_disabled() || !copy_methods->any_to_any) + { + /* this is not associated to a request so it's synchronous */ + STARPU_ASSERT(copy_methods->any_to_any); + copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, NULL); + } + else + { + req->async_channel.node_ops = &_starpu_driver_hip_node_ops; + hipres = hipEventCreateWithFlags(_starpu_hip_event(&req->async_channel.event), hipEventDisableTiming); + if (STARPU_UNLIKELY(hipres != hipSuccess)) STARPU_HIP_REPORT_ERROR(hipres); + + stream = starpu_hip_get_out_transfer_stream(src_node); + STARPU_ASSERT(copy_methods->any_to_any); + ret = copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, &req->async_channel); + + hipres = hipEventRecord(*_starpu_hip_event(&req->async_channel.event), stream); + if (STARPU_UNLIKELY(hipres != hipSuccess)) STARPU_HIP_REPORT_ERROR(hipres); + } + return ret; +} + +static int _starpu_hip_copy_interface_from_cpu_to_hip(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req) +{ + int src_kind = starpu_node_get_kind(src_node); + int dst_kind = starpu_node_get_kind(dst_node); + STARPU_ASSERT(src_kind == STARPU_CPU_RAM && dst_kind == STARPU_HIP_RAM); + +#ifdef STARPU_HAVE_HIP_MEMCPY_PEER + starpu_hip_set_copy_device(src_node, dst_node); +#endif + + int ret = 1; + hipError_t hipres; + hipStream_t stream; + const struct starpu_data_copy_methods *copy_methods = handle->ops->copy_methods; + + /* STARPU_CPU_RAM -> CUBLAS_RAM */ + /* only the proper CUBLAS thread can initiate this ! */ +#if !defined(STARPU_HAVE_HIP_MEMCPY_PEER) + STARPU_ASSERT(starpu_worker_get_local_memory_node() == dst_node); +#endif + if (!req || starpu_asynchronous_copy_disabled() || starpu_asynchronous_hip_copy_disabled() || + !copy_methods->any_to_any) + { + /* this is not associated to a request so it's synchronous */ + STARPU_ASSERT(copy_methods->any_to_any); + copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, NULL); + } + else + { + req->async_channel.node_ops = &_starpu_driver_hip_node_ops; + hipres = hipEventCreateWithFlags(_starpu_hip_event(&req->async_channel.event), hipEventDisableTiming); + if (STARPU_UNLIKELY(hipres != hipSuccess)) + STARPU_HIP_REPORT_ERROR(hipres); + + stream = starpu_hip_get_in_transfer_stream(dst_node); + STARPU_ASSERT(copy_methods->any_to_any); + ret = copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, &req->async_channel); + + hipres = hipEventRecord(*_starpu_hip_event(&req->async_channel.event), stream); + if (STARPU_UNLIKELY(hipres != hipSuccess)) + STARPU_HIP_REPORT_ERROR(hipres); + } + return ret; +} + +static int _starpu_hip_copy_data_from_hip_to_cpu(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel) +{ + int src_kind = starpu_node_get_kind(src_node); + int dst_kind = starpu_node_get_kind(dst_node); + + STARPU_ASSERT(src_kind == STARPU_HIP_RAM && dst_kind == STARPU_CPU_RAM); + + return starpu_hip_copy_async_sync((void*) (src + src_offset), src_node, + (void*) (dst + dst_offset), dst_node, + size, + async_channel?starpu_hip_get_out_transfer_stream(src_node):NULL, + hipMemcpyDeviceToHost); +} + +static int _starpu_hip_copy_data_from_hip_to_hip(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel) +{ + int src_kind = starpu_node_get_kind(src_node); + int dst_kind = starpu_node_get_kind(dst_node); + + STARPU_ASSERT(src_kind == STARPU_HIP_RAM && dst_kind == STARPU_HIP_RAM); +#ifndef STARPU_HAVE_HIP_MEMCPY_PEER + STARPU_ASSERT(src_node == dst_node); +#endif + + return starpu_hip_copy_async_sync((void*) (src + src_offset), src_node, + (void*) (dst + dst_offset), dst_node, + size, + async_channel?starpu_hip_get_peer_transfer_stream(src_node, dst_node):NULL, + hipMemcpyDeviceToDevice); +} + +static int _starpu_hip_copy_data_from_cpu_to_hip(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel) +{ + int src_kind = starpu_node_get_kind(src_node); + int dst_kind = starpu_node_get_kind(dst_node); + + STARPU_ASSERT(src_kind == STARPU_CPU_RAM && dst_kind == STARPU_HIP_RAM); + + return starpu_hip_copy_async_sync((void*) (src + src_offset), src_node, + (void*) (dst + dst_offset), dst_node, + size, + async_channel?starpu_hip_get_in_transfer_stream(dst_node):NULL, + hipMemcpyHostToDevice); +} + +static int _starpu_hip_copy2d_data_from_hip_to_cpu(uintptr_t src, size_t src_offset, unsigned src_node, + uintptr_t dst, size_t dst_offset, unsigned dst_node, + size_t blocksize, size_t numblocks, size_t ld_src, size_t ld_dst, + struct _starpu_async_channel *async_channel) +{ + int src_kind = starpu_node_get_kind(src_node); + int dst_kind = starpu_node_get_kind(dst_node); + + STARPU_ASSERT(src_kind == STARPU_HIP_RAM && dst_kind == STARPU_CPU_RAM); + + return starpu_hip_copy2d_async_sync((void*) (src + src_offset), src_node, + (void*) (dst + dst_offset), dst_node, + blocksize, numblocks, ld_src, ld_dst, + async_channel?starpu_hip_get_out_transfer_stream(src_node):NULL, + hipMemcpyDeviceToHost); +} + +static int _starpu_hip_copy2d_data_from_hip_to_hip(uintptr_t src, size_t src_offset, unsigned src_node, + uintptr_t dst, size_t dst_offset, unsigned dst_node, + size_t blocksize, size_t numblocks, size_t ld_src, size_t ld_dst, + struct _starpu_async_channel *async_channel) +{ + int src_kind = starpu_node_get_kind(src_node); + int dst_kind = starpu_node_get_kind(dst_node); + + STARPU_ASSERT(src_kind == STARPU_HIP_RAM && dst_kind == STARPU_HIP_RAM); +#ifndef STARPU_HAVE_HIP_MEMCPY_PEER + STARPU_ASSERT(src_node == dst_node); +#endif + + return starpu_hip_copy2d_async_sync((void*) (src + src_offset), src_node, + (void*) (dst + dst_offset), dst_node, + blocksize, numblocks, ld_src, ld_dst, + async_channel?starpu_hip_get_peer_transfer_stream(src_node, dst_node):NULL, + hipMemcpyDeviceToDevice); +} + +static int _starpu_hip_copy2d_data_from_cpu_to_hip(uintptr_t src, size_t src_offset, unsigned src_node, + uintptr_t dst, size_t dst_offset, unsigned dst_node, + size_t blocksize, size_t numblocks, size_t ld_src, size_t ld_dst, + struct _starpu_async_channel *async_channel) +{ + int src_kind = starpu_node_get_kind(src_node); + int dst_kind = starpu_node_get_kind(dst_node); + + STARPU_ASSERT(src_kind == STARPU_CPU_RAM && dst_kind == STARPU_HIP_RAM); + + return starpu_hip_copy2d_async_sync((void*) (src + src_offset), src_node, + (void*) (dst + dst_offset), dst_node, + blocksize, numblocks, ld_src, ld_dst, + async_channel?starpu_hip_get_in_transfer_stream(dst_node):NULL, + hipMemcpyHostToDevice); +} + +static int _starpu_hip_is_direct_access_supported(unsigned node, unsigned handling_node) +{ +#if defined(STARPU_HAVE_HIP_MEMCPY_PEER) + (void) node; + enum starpu_node_kind kind = starpu_node_get_kind(handling_node); + return kind == STARPU_HIP_RAM; +#else /* STARPU_HAVE_HIP_MEMCPY_PEER */ + /* Direct GPU-GPU transfers are not allowed in general */ + (void) node; + (void) handling_node; + return 0; +#endif /* STARPU_HAVE_HIP_MEMCPY_PEER */ +} + +static void start_job_on_hip(struct _starpu_job *j, struct _starpu_worker *worker, unsigned char pipeline_idx STARPU_ATTRIBUTE_UNUSED) +{ + STARPU_ASSERT(j); + struct starpu_task *task = j->task; + + int profiling = starpu_profiling_status_get(); +#ifdef STARPU_PROF_TOOL + struct starpu_prof_tool_info pi; +#endif + + STARPU_ASSERT(task); + struct starpu_codelet *cl = task->cl; + STARPU_ASSERT(cl); + + _starpu_set_local_worker_key(worker); + _starpu_set_current_task(task); + j->workerid = worker->workerid; + + if (worker->ntasks == 1) + { + /* We are alone in the pipeline, the kernel will start now, record it */ + _starpu_driver_start_job(worker, j, &worker->perf_arch, 0, profiling); + } + +#if defined(STARPU_HAVE_HIP_MEMCPY_PEER) + /* We make sure we do manipulate the proper device */ + starpu_hip_set_device(worker->devid); +#endif + + starpu_hip_func_t func = _starpu_task_get_hip_nth_implementation(cl, j->nimpl); + STARPU_ASSERT_MSG(func, "when STARPU_HIP is defined in 'where', hip_func or hip_funcs has to be defined"); + + if (_starpu_get_disable_kernels() <= 0) + { + _STARPU_TRACE_START_EXECUTING(j); +#ifdef STARPU_PROF_TOOL + pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_start_gpu_exec, worker->devid, worker->workerid, starpu_prof_tool_driver_gpu, -1, (void*)func); + starpu_prof_tool_callbacks.starpu_prof_tool_event_start_gpu_exec(&pi, NULL, NULL); +#endif + + func(_STARPU_TASK_GET_INTERFACES(task), task->cl_arg); + +#ifdef STARPU_PROF_TOOL + pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_end_gpu_exec, worker->devid, worker->workerid, starpu_prof_tool_driver_gpu, -1, (void*)func); + starpu_prof_tool_callbacks.starpu_prof_tool_event_end_gpu_exec(&pi, NULL, NULL); +#endif + _STARPU_TRACE_END_EXECUTING(j); + } +} + +static void finish_job_on_hip(struct _starpu_job *j, struct _starpu_worker *worker); + +/* Execute a job, up to completion for synchronous jobs */ +static void execute_job_on_hip(struct starpu_task *task, struct _starpu_worker *worker) +{ + int workerid = worker->workerid; + + struct _starpu_job *j = _starpu_get_job_associated_to_task(task); + + unsigned char pipeline_idx = (worker->first_task + worker->ntasks - 1)%STARPU_MAX_PIPELINE; + + start_job_on_hip(j, worker, pipeline_idx); + + if (!used_stream[workerid]) + { + used_stream[workerid] = 1; + _STARPU_DISP("Warning: starpu_hip_get_local_stream() was not used to submit kernel to HIP on worker %d. HIP will thus introduce a lot of useless synchronizations, which will prevent proper overlapping of data transfers and kernel execution. See the HIP-specific part of the 'Check List When Performance Are Not There' of the StarPU handbook\n", workerid); + } + + if (task->cl->hip_flags[j->nimpl] & STARPU_HIP_ASYNC) + { + if (worker->pipeline_length == 0) + { + /* Forced synchronous execution */ + hipStreamSynchronize(starpu_hip_get_local_stream()); + finish_job_on_hip(j, worker); + } + else + { + /* Record event to synchronize with task termination later */ + hipError_t hipres = hipEventRecord(task_events[workerid][pipeline_idx], starpu_hip_get_local_stream()); + if (STARPU_UNLIKELY(hipres)) + STARPU_HIP_REPORT_ERROR(hipres); + } + } + else /* Synchronous execution */ + { + STARPU_ASSERT_MSG(hipStreamQuery(starpu_hip_get_local_stream()) == hipSuccess, "Unless when using the STARPU_HIP_ASYNC flag, HIP codelets have to wait for termination of their kernels on the starpu_hip_get_local_stream() stream"); + finish_job_on_hip(j, worker); + } +} + +static void finish_job_on_hip(struct _starpu_job *j, struct _starpu_worker *worker) +{ + int profiling = starpu_profiling_status_get(); + + if (worker->pipeline_length) + worker->current_tasks[worker->first_task] = NULL; + else + worker->current_task = NULL; + worker->first_task = (worker->first_task + 1) % STARPU_MAX_PIPELINE; + worker->ntasks--; + + _starpu_driver_end_job(worker, j, &worker->perf_arch, 0, profiling); + + _starpu_driver_update_job_feedback(j, worker, &worker->perf_arch, profiling); + + _starpu_push_task_output(j); + + _starpu_set_current_task(NULL); + + _starpu_handle_job_termination(j); +} + +/* One iteration of the main driver loop */ +int _starpu_hip_driver_run_once(struct _starpu_worker *worker) +{ + struct _starpu_worker_set *worker_set = worker->set; + struct _starpu_worker *worker0 = &worker_set->workers[0]; + struct starpu_task *tasks[worker_set->nworkers]; + struct starpu_task *task; + struct _starpu_job *j; +#ifdef STARPU_PROF_TOOL + struct starpu_prof_tool_info pi; +#endif + int i, res; + int idle_tasks, idle_transfers; + + _starpu_set_local_worker_key(worker0); + + /* First poll for completed jobs */ + idle_tasks = 0; + idle_transfers = 0; + for (i = 0; i < (int) worker_set->nworkers; i++) + { + worker = &worker_set->workers[i]; + int workerid = worker->workerid; + unsigned memnode = worker->memory_node; + + if (!worker->ntasks) + idle_tasks++; + if (!worker->task_transferring) + idle_transfers++; + + if (!worker->ntasks && !worker->task_transferring) + { + /* Even nothing to test */ + continue; + } + + /* First test for transfers pending for next task */ + task = worker->task_transferring; + if (task && worker->nb_buffers_transferred == worker->nb_buffers_totransfer) + { + STARPU_RMB(); + _STARPU_TRACE_END_PROGRESS(memnode); +#ifdef STARPU_PROF_TOOL + pi = _starpu_prof_tool_get_info_d(starpu_prof_tool_event_end_transfer, workerid, workerid, starpu_prof_tool_driver_gpu, memnode, worker->nb_buffers_totransfer, worker->nb_buffers_transferred); + starpu_prof_tool_callbacks.starpu_prof_tool_event_end_transfer(&pi, NULL, NULL); +#endif + j = _starpu_get_job_associated_to_task(task); + + _starpu_set_local_worker_key(worker); + _starpu_fetch_task_input_tail(task, j, worker); + /* Reset it */ + worker->task_transferring = NULL; + if (worker->ntasks > 1 && !(task->cl->hip_flags[j->nimpl] & STARPU_HIP_ASYNC)) + { + /* We have to execute a non-asynchronous task but we + * still have tasks in the pipeline... Record it to + * prevent more tasks from coming, and do it later */ + worker->pipeline_stuck = 1; + } + else + { + execute_job_on_hip(task, worker); + } + _STARPU_TRACE_START_PROGRESS(memnode); +#ifdef STARPU_PROF_TOOL + pi = _starpu_prof_tool_get_info_d(starpu_prof_tool_event_start_transfer, worker->workerid, workerid, starpu_prof_tool_driver_gpu, memnode, worker->nb_buffers_totransfer, worker->nb_buffers_transferred); + starpu_prof_tool_callbacks.starpu_prof_tool_event_start_transfer(&pi, NULL, NULL); +#endif + } + + /* Then test for termination of queued tasks */ + if (!worker->ntasks) + /* No queued task */ + continue; + + if (worker->pipeline_length) + task = worker->current_tasks[worker->first_task]; + else + task = worker->current_task; + if (task == worker->task_transferring) + /* Next task is still pending transfer */ + continue; + + /* On-going asynchronous task, check for its termination first */ + hipError_t hipres = hipEventQuery(task_events[workerid][worker->first_task]); + + if (hipres != hipSuccess) + { + STARPU_ASSERT_MSG(hipres == hipErrorNotReady, "HIP error on task %p, codelet %p (%s): %s (%d)", task, task->cl, _starpu_codelet_get_model_name(task->cl), hipGetErrorString(hipres), hipres); + } + else + { + _STARPU_TRACE_END_PROGRESS(memnode); +#ifdef STARPU_PROF_TOOL + pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_end_transfer, workerid, workerid, starpu_prof_tool_driver_gpu, memnode, NULL); + starpu_prof_tool_callbacks.starpu_prof_tool_event_end_transfer(&pi, NULL, NULL); +#endif + /* Asynchronous task completed! */ + _starpu_set_local_worker_key(worker); + finish_job_on_hip(_starpu_get_job_associated_to_task(task), worker); + /* See next task if any */ + if (worker->ntasks) + { + if (worker->current_tasks[worker->first_task] != worker->task_transferring) + { + task = worker->current_tasks[worker->first_task]; + j = _starpu_get_job_associated_to_task(task); + if (task->cl->hip_flags[j->nimpl] & STARPU_HIP_ASYNC) + { + /* An asynchronous task, it was already + * queued, it's now running, record its start time. */ + _starpu_driver_start_job(worker, j, &worker->perf_arch, 0, starpu_profiling_status_get()); + } + else + { + /* A synchronous task, we have finished + * flushing the pipeline, we can now at + * last execute it. */ + + _STARPU_TRACE_EVENT("sync_task"); + execute_job_on_hip(task, worker); + _STARPU_TRACE_EVENT("end_sync_task"); + worker->pipeline_stuck = 0; + } + } + else + /* Data for next task didn't have time to finish transferring :/ */ + _STARPU_TRACE_WORKER_START_FETCH_INPUT(NULL, workerid); + } + _STARPU_TRACE_START_PROGRESS(memnode); +#ifdef STARPU_PROF_TOOL + pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_start_transfer, worker->workerid, worker->workerid, starpu_prof_tool_driver_gpu, memnode, NULL); + starpu_prof_tool_callbacks.starpu_prof_tool_event_start_transfer(&pi, NULL, NULL); +#endif + } + if (!worker->pipeline_length || worker->ntasks < worker->pipeline_length) + idle_tasks++; + } + +#if defined(STARPU_NON_BLOCKING_DRIVERS) + if (!idle_tasks) + { + /* No task ready yet, no better thing to do than waiting */ + __starpu_datawizard_progress(_STARPU_DATAWIZARD_DO_ALLOC, !idle_transfers); + return 0; + } +#endif + + /* Something done, make some progress */ + res = __starpu_datawizard_progress(_STARPU_DATAWIZARD_DO_ALLOC, 1); + + /* And pull tasks */ + res |= _starpu_get_multi_worker_task(worker_set->workers, tasks, worker_set->nworkers, worker0->memory_node); + + for (i = 0; i < (int) worker_set->nworkers; i++) + { + worker = &worker_set->workers[i]; + unsigned memnode STARPU_ATTRIBUTE_UNUSED = worker->memory_node; + + task = tasks[i]; + if (!task) + continue; + + + j = _starpu_get_job_associated_to_task(task); + + /* can HIP do that task ? */ + if (!_STARPU_MAY_PERFORM(j, HIP)) + { + /* this is neither a cuda or a cublas task */ + _starpu_worker_refuse_task(worker, task); + continue; + } + + /* Fetch data asynchronously */ +#ifdef STARPU_PROF_TOOL + pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_end_transfer, worker->workerid, worker->workerid, starpu_prof_tool_driver_gpu, memnode, NULL); + starpu_prof_tool_callbacks.starpu_prof_tool_event_end_transfer(&pi, NULL, NULL); +#endif + _STARPU_TRACE_END_PROGRESS(memnode); + _starpu_set_local_worker_key(worker); + res = _starpu_fetch_task_input(task, j, 1); + STARPU_ASSERT(res == 0); + _STARPU_TRACE_START_PROGRESS(memnode); +#ifdef STARPU_PROF_TOOL + pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_start_transfer, worker->workerid, worker->workerid, starpu_prof_tool_driver_gpu, memnode, NULL); + starpu_prof_tool_callbacks.starpu_prof_tool_event_start_transfer(&pi, NULL, NULL); +#endif + } + + return 0; +} + +void *_starpu_hip_worker(void *_arg) +{ + struct _starpu_worker *worker = _arg; + struct _starpu_worker_set* worker_set = worker->set; + +#ifdef STARPU_PROF_TOOL + struct starpu_prof_tool_info pi; +#endif + unsigned i; + + _starpu_hip_driver_init(worker); + for (i = 0; i < worker_set->nworkers; i++) + { +#ifdef STARPU_PROF_TOOL + pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_start_transfer, worker_set->workers[i].workerid, worker_set->workers[i].workerid, starpu_prof_tool_driver_gpu, worker_set->workers[i].memory_node, NULL); + starpu_prof_tool_callbacks.starpu_prof_tool_event_start_transfer(&pi, NULL, NULL); +#endif + _STARPU_TRACE_START_PROGRESS(worker_set->workers[i].memory_node); + } + while (_starpu_machine_is_running()) + { + _starpu_may_pause(); + _starpu_hip_driver_run_once(worker); + } + for (i = 0; i < worker_set->nworkers; i++) + { + _STARPU_TRACE_END_PROGRESS(worker_set->workers[i].memory_node); +#ifdef STARPU_PROF_TOOL + pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_end_transfer, worker_set->workers[i].workerid, worker_set->workers[i].workerid, starpu_prof_tool_driver_gpu, worker_set->workers[i].memory_node, NULL); + starpu_prof_tool_callbacks.starpu_prof_tool_event_end_transfer(&pi, NULL, NULL); +#endif + } + _starpu_hip_driver_deinit(worker); + + return NULL; +} + +#ifdef STARPU_HAVE_HWLOC +hwloc_obj_t _starpu_hip_get_hwloc_obj(hwloc_topology_t topology, int devid) +{ +#if HAVE_DECL_HWLOC_HIP_GET_DEVICE_OSDEV_BY_INDEX + return hwloc_hip_get_device_osdev_by_index(topology, devid); +#else + (void)topology; + (void)devid; + return NULL; +#endif +} +#endif + +void starpu_hipblas_report_error(const char *func, const char *file, int line, int status) +{ +#ifdef STARPU_USE_HIPBLAS + char *errormsg; + switch (status) + { + case HIPBLAS_STATUS_SUCCESS: + errormsg = "success"; + break; + case HIPBLAS_STATUS_NOT_INITIALIZED: + errormsg = "not initialized"; + break; + case HIPBLAS_STATUS_ALLOC_FAILED: + errormsg = "alloc failed"; + break; + case HIPBLAS_STATUS_INVALID_VALUE: + errormsg = "invalid value"; + break; + case HIPBLAS_STATUS_ARCH_MISMATCH: + errormsg = "arch mismatch"; + break; + case HIPBLAS_STATUS_EXECUTION_FAILED: + errormsg = "execution failed"; + break; + case HIPBLAS_STATUS_INTERNAL_ERROR: + errormsg = "internal error"; + break; + default: + errormsg = "unknown error"; + break; + } + _STARPU_MSG("oops in %s (%s:%d)... %d: %s \n", func, file, line, status, errormsg); +#endif + STARPU_ABORT(); +} + +void starpu_hip_report_error(const char *func, const char *file, int line, hipError_t status) +{ + const char *errormsg = hipGetErrorString(status); + _STARPU_ERROR("oops in %s (%s:%d)... %d: %s \n", func, file, line, status, errormsg); +} + +int _starpu_hip_run_from_worker(struct _starpu_worker *worker) +{ + /* Let's go ! */ + _starpu_hip_worker(worker); + + return 0; +} + +int _starpu_hip_driver_set_devid(struct starpu_driver *driver, struct _starpu_worker *worker) +{ + driver->id.hip_id = worker->devid; + return 0; +} + +int _starpu_hip_driver_is_devid(struct starpu_driver *driver, struct _starpu_worker *worker) +{ + return driver->id.hip_id == worker->devid; +} + +struct _starpu_driver_ops _starpu_driver_hip_ops = +{ + .init = _starpu_hip_driver_init, + .run = _starpu_hip_run_from_worker, + .run_once = _starpu_hip_driver_run_once, + .deinit = _starpu_hip_driver_deinit, + .set_devid = _starpu_hip_driver_set_devid, + .is_devid = _starpu_hip_driver_is_devid, +}; + +struct _starpu_node_ops _starpu_driver_hip_node_ops = +{ + .name = "hip driver", + .malloc_on_node = _starpu_hip_malloc_on_node, + .free_on_node = _starpu_hip_free_on_node, + + .is_direct_access_supported = _starpu_hip_is_direct_access_supported, + + .copy_interface_to[STARPU_CPU_RAM] = _starpu_hip_copy_interface_from_hip_to_cpu, + .copy_interface_to[STARPU_HIP_RAM] = _starpu_hip_copy_interface_from_hip_to_hip, + + .copy_interface_from[STARPU_CPU_RAM] = _starpu_hip_copy_interface_from_cpu_to_hip, + .copy_interface_from[STARPU_HIP_RAM] = _starpu_hip_copy_interface_from_hip_to_hip, + + .copy_data_to[STARPU_CPU_RAM] = _starpu_hip_copy_data_from_hip_to_cpu, + .copy_data_to[STARPU_HIP_RAM] = _starpu_hip_copy_data_from_hip_to_hip, + + .copy_data_from[STARPU_CPU_RAM] = _starpu_hip_copy_data_from_cpu_to_hip, + .copy_data_from[STARPU_HIP_RAM] = _starpu_hip_copy_data_from_hip_to_hip, + + .copy2d_data_to[STARPU_CPU_RAM] = _starpu_hip_copy2d_data_from_hip_to_cpu, + .copy2d_data_to[STARPU_HIP_RAM] = _starpu_hip_copy2d_data_from_hip_to_hip, + + .copy2d_data_from[STARPU_CPU_RAM] = _starpu_hip_copy2d_data_from_cpu_to_hip, + .copy2d_data_from[STARPU_HIP_RAM] = _starpu_hip_copy2d_data_from_hip_to_hip, + + .wait_request_completion = _starpu_hip_wait_request_completion, + .test_request_completion = _starpu_hip_test_request_completion, +}; diff --git a/src/drivers/hip/driver_hip.h b/src/drivers/hip/driver_hip.h new file mode 100644 index 0000000..c236827 --- /dev/null +++ b/src/drivers/hip/driver_hip.h @@ -0,0 +1,74 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __DRIVER_HIP_H__ +#define __DRIVER_HIP_H__ + +/** @file */ + +#include + +void _starpu_hip_preinit(void); + +#ifdef STARPU_USE_HIP + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wundef" +#pragma GCC diagnostic ignored "-Wunused-result" +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" +#ifndef __cplusplus +#pragma GCC diagnostic ignored "-Wimplicit-int" +#endif +#pragma GCC diagnostic ignored "-Wreturn-type" +#include +#include +#pragma GCC diagnostic pop +// not needed yet #include +#endif + +#include +#include +#include + +#pragma GCC visibility push(hidden) + +extern struct _starpu_driver_ops _starpu_driver_hip_ops; +extern struct _starpu_node_ops _starpu_driver_hip_node_ops; + +extern int _starpu_nworker_per_hip; + +void _starpu_hip_init(void); +#ifdef STARPU_HAVE_HWLOC +struct _starpu_machine_topology; +hwloc_obj_t _starpu_hip_get_hwloc_obj(hwloc_topology_t topology, int devid); +#endif +extern int _starpu_hip_bus_ids[STARPU_MAXHIPDEVS+STARPU_MAXNUMANODES][STARPU_MAXHIPDEVS+STARPU_MAXNUMANODES]; + +#if defined(STARPU_USE_HIP) +void _starpu_hip_discover_devices(struct _starpu_machine_config *); +void _starpu_init_hip_config(struct _starpu_machine_topology *topology, struct _starpu_machine_config *); +void _starpu_hip_init_worker_binding(struct _starpu_machine_config *config, int no_mp_config, struct _starpu_worker *workerarg); +void _starpu_hip_init_worker_memory(struct _starpu_machine_config *config, int no_mp_config, struct _starpu_worker *workerarg); +void _starpu_init_hip(void); +void *_starpu_hip_worker(void *); +#else +# define _starpu_hip_discover_devices(config) ((void) config) +#endif + +#pragma GCC visibility pop + +#endif // __DRIVER_HIP_H__ + diff --git a/src/drivers/hip/driver_hip_init.c b/src/drivers/hip/driver_hip_init.c new file mode 100644 index 0000000..7fc5744 --- /dev/null +++ b/src/drivers/hip/driver_hip_init.c @@ -0,0 +1,51 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "driver_hip.h" + +static struct _starpu_driver_info driver_info = +{ + .name_upper = "HIP", + .name_var = "HIP", + .name_lower = "hip", + .memory_kind = STARPU_HIP_RAM, + .alpha = 13.33f, +#if defined(STARPU_USE_HIP) + .driver_ops = &_starpu_driver_hip_ops, + .run_worker = _starpu_hip_worker, +#if defined(STARPU_HAVE_HWLOC) + .get_hwloc_obj = _starpu_hip_get_hwloc_obj, +#endif + .init_worker_binding = _starpu_hip_init_worker_binding, + .init_worker_memory = _starpu_hip_init_worker_memory, +#endif +}; + +static struct _starpu_memory_driver_info memory_driver_info = +{ + .name_upper = "HIP", + .worker_archtype = STARPU_HIP_WORKER, +#if defined(STARPU_USE_HIP) + .ops = &_starpu_driver_hip_node_ops, +#endif +}; + +void _starpu_hip_preinit(void) +{ + _starpu_driver_info_register(STARPU_HIP_WORKER, &driver_info); + _starpu_memory_driver_info_register(STARPU_HIP_RAM, &memory_driver_info); +} diff --git a/src/drivers/hip/starpu_hipblas.c b/src/drivers/hip/starpu_hipblas.c new file mode 100644 index 0000000..65ba1b4 --- /dev/null +++ b/src/drivers/hip/starpu_hipblas.c @@ -0,0 +1,145 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include + +#ifdef STARPU_USE_HIP +#ifdef STARPU_USE_HIPBLAS +#include +#include + +#ifdef STARPU_HIP_PLATFORM_AMD +#include +#endif + +#ifdef STARPU_HIP_PLATFORM_NVIDIA +#include +#endif + +static int hipblas_initialized[STARPU_NMAXWORKERS]; +static hipblasHandle_t hipblas_handles[STARPU_NMAXWORKERS]; +static hipblasHandle_t main_handle; +static starpu_pthread_mutex_t mutex; + +static unsigned get_idx(void) +{ + unsigned workerid = starpu_worker_get_id_check(); + unsigned th_per_dev = _starpu_get_machine_config()->topology.hip_th_per_dev; + unsigned th_per_stream = _starpu_get_machine_config()->topology.hip_th_per_stream; + + if (th_per_dev) + return starpu_worker_get_devid(workerid); + else if (th_per_stream) + return workerid; + else + /* same thread for all devices */ + return 0; +} + +static void init_hipblas_func(void *args STARPU_ATTRIBUTE_UNUSED) +{ + unsigned idx = get_idx(); + hipblasStatus_t status = hipblasCreate(&hipblas_handles[starpu_worker_get_id_check()]); + if (status != HIPBLAS_STATUS_SUCCESS) + STARPU_HIPBLAS_REPORT_ERROR(status); + status=hipblasSetStream(hipblas_handles[starpu_worker_get_id_check()], starpu_hip_get_local_stream()); + if (status != HIPBLAS_STATUS_SUCCESS) + STARPU_HIPBLAS_REPORT_ERROR(status); + + STARPU_PTHREAD_MUTEX_LOCK(&mutex); + if (!(hipblas_initialized[idx]++)) + { +#ifdef STARPU_HIP_PLATFORM_NVIDIA + cublasInit(); +#elif defined(STARPU_HIP_PLATFORM_AMD) + rocblas_initialize(); +#endif + } + STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); +} + +static void shutdown_hipblas_func(void *args STARPU_ATTRIBUTE_UNUSED) +{ + unsigned idx = get_idx(); + STARPU_PTHREAD_MUTEX_LOCK(&mutex); + if (!--hipblas_initialized[idx]) + { +#ifdef STARPU_HIP_PLATFORM_NVIDIA + cublasShutdown(); +#elif defined(STARPU_HIP_PLATFORM_AMD) + // no equivalent +#endif + } + STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); + + hipblasDestroy(hipblas_handles[starpu_worker_get_id_check()]); +} +#endif /* STARPU_USE_HIPBLAS */ +#endif /* STARPU_USE_HIP */ + +void starpu_hipblas_init(void) +{ +#ifdef STARPU_USE_HIP +#ifdef STARPU_USE_HIPBLAS + starpu_execute_on_each_worker(init_hipblas_func, NULL, STARPU_HIP); + + if (hipblasCreate(&main_handle) != HIPBLAS_STATUS_SUCCESS) + main_handle = NULL; +#endif +#endif +} + +void starpu_hipblas_shutdown(void) +{ +#ifdef STARPU_USE_HIP +#ifdef STARPU_USE_HIPBLAS + starpu_execute_on_each_worker(shutdown_hipblas_func, NULL, STARPU_HIP); + + if (main_handle) + hipblasDestroy(main_handle); +#endif +#endif +} + +void starpu_hipblas_set_stream(void) +{ +#ifdef STARPU_USE_HIP +#ifdef STARPU_USE_HIPBLAS + unsigned workerid = starpu_worker_get_id_check(); + int devnum = starpu_worker_get_devnum(workerid); + if (!_starpu_get_machine_config()->topology.hip_th_per_dev || + (!_starpu_get_machine_config()->topology.hip_th_per_stream && + _starpu_get_machine_config()->topology.nworker[STARPU_HIP_WORKER][devnum] > 1)) + hipblasSetStream(hipblas_handles[starpu_worker_get_id_check()], starpu_hip_get_local_stream()); +#endif +#endif +} + +#ifdef STARPU_USE_HIP +#ifdef STARPU_USE_HIPBLAS +hipblasHandle_t starpu_hipblas_get_local_handle(void) +{ + int workerid = starpu_worker_get_id(); + if (workerid >= 0) + return hipblas_handles[workerid]; + else + return main_handle; +} +#endif +#endif diff --git a/src/drivers/max/driver_max_fpga.c b/src/drivers/max/driver_max_fpga.c new file mode 100644 index 0000000..0525bbb --- /dev/null +++ b/src/drivers/max/driver_max_fpga.c @@ -0,0 +1,734 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "driver_max_fpga.h" +#include +#include +#include +#include + +/* the number of FPGA devices */ +static unsigned nmax_fpga; +static size_t max_fpga_mem[STARPU_MAXMAXFPGADEVS]; +static max_engine_t *engines[STARPU_MAXMAXFPGADEVS]; +static fpga_mem current_address[STARPU_MAXMAXFPGADEVS]; + +static unsigned max_fpga_bindid_init[STARPU_MAXMAXFPGADEVS]; +static unsigned max_fpga_bindid[STARPU_MAXMAXFPGADEVS]; +static unsigned max_fpga_memory_init[STARPU_MAXMAXFPGADEVS]; +static unsigned max_fpga_memory_nodes[STARPU_MAXMAXFPGADEVS]; + +static void _starpu_max_fpga_limit_max_fpga_mem(unsigned); +static size_t _starpu_max_fpga_get_max_fpga_mem_size(unsigned devid); + +static size_t _starpu_max_fpga_get_max_fpga_mem_size(unsigned devid) +{ + return max_fpga_mem[devid]; +} + +max_engine_t *starpu_max_fpga_get_local_engine(void) +{ + int worker = starpu_worker_get_id_check(); + int devid = starpu_worker_get_devid(worker); + + STARPU_ASSERT_MSG(engines[devid], "engine for fpga %d on worker %d is NULL!?", devid, worker); + + return engines[devid]; +} + +/* This is called to initialize FPGA and discover devices */ +void _starpu_init_max_fpga() +{ + memset(&max_fpga_bindid_init, 0, sizeof(max_fpga_bindid_init)); + memset(&max_fpga_memory_init, 0, sizeof(max_fpga_memory_init)); +} + +static void _starpu_initialize_workers_max_fpga_deviceid(struct _starpu_machine_config *config) +{ + struct _starpu_machine_topology *topology = &config->topology; + struct starpu_conf *uconf = &config->conf; + + _starpu_initialize_workers_deviceid(uconf->use_explicit_workers_max_fpga_deviceid == 0 + ? NULL + : (int *)uconf->workers_max_fpga_deviceid, + &(config->current_devid[STARPU_MAX_FPGA_WORKER]), + (int *)topology->workers_max_fpga_deviceid, + "STARPU_WORKERS_MAX_FPGAID", + topology->nhwdevices[STARPU_MAX_FPGA_WORKER], + STARPU_MAX_FPGA_WORKER); + _starpu_devices_drop_duplicate(topology->workers_max_fpga_deviceid); +} + +static unsigned _starpu_max_fpga_get_device_count(void) +{ + return nmax_fpga; +} + +/* This is called to really discover the hardware */ +void _starpu_max_fpga_discover_devices (struct _starpu_machine_config *config) +{ + //TODO: This is statically assigned, in the next round of integration + // I will have to read from the struct fpga in fpga + struct starpu_max_load *load = _starpu_config.conf.max_fpga_load; + const char *sim_socket = max_config_get_string(MAX_CONFIG_USE_SIMULATION); + int n; + + if (!load) + { + /* Nothing specified, single-FPGA execution with basic static + * interface, file will be auto-loaded by SLiC. */ + n = 1; + } + else + { + struct starpu_max_load *cur, *star = NULL; + size_t nstar = 0; + + /* First check if we have a star, we will want to subtract non-star loads from it */ + for (cur = load; cur->engine_id_pattern; cur++) + if (!strcmp(cur->engine_id_pattern, "*") + || strstr(cur->engine_id_pattern, ":*")) + { + STARPU_ASSERT_MSG(!cur[1].file, "in starpu_max_load array, * pattern must be last"); + star = cur; + + if (sim_socket) + /* not specified, assume 1 */ + nstar = 1; + else + nstar = max_count_engines_free(cur->file, star->engine_id_pattern); + break; + } + + n = 0; + /* Now check the non-star loads */ + for (cur = load; cur != star && cur->engine_id_pattern; cur++) + { + size_t size; + + size = max_count_engines_free(load->file, load->engine_id_pattern); + STARPU_ASSERT_MSG(size > 0, "cannot load starpu_max_load element %u on %s", (unsigned) (cur - load), load->engine_id_pattern); + /* One FPGA more to be used */ + n++; + + if (star) + { + size = max_count_engines_free(load->file, star->engine_id_pattern); + if (size > 1) + /* One of the star devices will be used to load this file */ + nstar--; + } + } + n += nstar; + } + + //LMemInterface addLMemInterface() + //// pour récupérer l'accès à la LMem + + if (n > STARPU_MAXMAXFPGADEVS) + { + _STARPU_DISP("Warning: %d Maxeler FPGA devices available. Only %d enabled. Use configure option --enable-maxmaxfpgadev=xxx to update the maximum value of supported Maxeler FPGA devices.\n", n, STARPU_MAXMAXFPGADEVS); + n = STARPU_MAXMAXFPGADEVS; + } + + config->topology.nhwdevices[STARPU_MAX_FPGA_WORKER] = nmax_fpga = n; +} + +/* Determine which devices we will use */ +void _starpu_init_max_fpga_config(struct _starpu_machine_topology *topology, struct _starpu_machine_config *) +{ + int nmax_fpga = config->conf.nmax_fpga; + if (nmax_fpga != 0) + { + /* The user did not disable FPGA. We need to initialize + * FPGA early to count the number of devices */ + _starpu_init_max_fpga(); + int nb_devices = _starpu_max_fpga_get_device_count(); + + _starpu_topology_check_ndevices(&nmax_fpga, nb_devices, 0, STARPU_MAXMAXFPGADEVS, 0, "nmax_fpga", "Maxeler FPGA", "maxmaxfpgadev"); + } + + /* Now we know how many MAX FPGA devices will be used */ + topology->ndevices[STARPU_MAX_FPGA_WORKER] = nmax_fpga; + + _starpu_initialize_workers_max_fpga_deviceid(config); + + unsigned max_fpga; + for (max_fpga = 0; (int) max_fpga < nmax_fpga; max_fpga++) + { + int devid = _starpu_get_next_devid(topology, config, STARPU_MAX_FPGA_WORKER); + if (devid == -1) + { + // There is no more devices left + topology->ndevices[STARPU_MAX_FPGA_WORKER] = max_fpga; + break; + } + + _starpu_topology_configure_workers(topology, config, + STARPU_MAX_FPGA_WORKER, + max_fpga, devid, 0, 0, + 1, 1, NULL, NULL); + } +} + +/* Bind the driver on a CPU core */ +void _starpu_max_fpga_init_worker_binding(struct _starpu_machine_config *config, int no_mp_config STARPU_ATTRIBUTE_UNUSED, struct _starpu_worker *workerarg) +{ + unsigned *preferred_binding = NULL; + unsigned npreferred = 0; + + if (max_fpga_bindid_init[devid]) + { + workerarg->bindid = max_fpga_bindid[devid]; + } + else + { + max_fpga_bindid_init[devid] = 1; + workerarg->bindid = max_fpga_bindid[devid] = _starpu_get_next_bindid(config, STARPU_THREAD_ACTIVE, preferred_binding, npreferred); + } +} + +/* Set up memory and buses */ +void _starpu_max_fpga_init_worker_memory(struct _starpu_machine_config *config, int no_mp_config STARPU_ATTRIBUTE_UNUSED, struct _starpu_worker *workerarg) +{ + unsigned memory_node = -1; + /* Perhaps the worker has some "favourite" bindings */ + unsigned devid = workerarg->devid; + unsigned numa; + + if (max_fpga_memory_init[devid]) + { + memory_node = max_fpga_memory_nodes[devid]; + } + else + { + max_fpga_memory_init[devid] = 1; + + memory_node = max_fpga_memory_nodes[devid] = _starpu_memory_node_register(STARPU_MAX_FPGA_RAM, devid); + _starpu_register_bus(STARPU_MAIN_RAM, memory_node); + _starpu_register_bus(memory_node, STARPU_MAIN_RAM); + + } + _starpu_memory_node_add_nworkers(memory_node); + + //This worker can manage transfers on NUMA nodes + for (numa = 0; numa < starpu_memory_nodes_get_numa_count(); numa++) + _starpu_worker_drives_memory_node(workerarg, numa); + + _starpu_worker_drives_memory_node(workerarg, memory_node); + + workerarg->memory_node = memory_node; +} + +static void _starpu_max_fpga_limit_max_fpga_mem(unsigned devid) +{ + starpu_ssize_t limit=-1; + + //TODO + limit = starpu_getenv_number("STARPU_LIMIT_MAX_FPGA_MEM"); + if(limit != -1) + max_fpga_mem[devid] = limit*1024*1024; +} + +static void init_device_context(unsigned devid) +{ + struct starpu_max_load *load = _starpu_config.conf.max_fpga_load; + + /* 0 would be seen as NULL, i.e. allocation failed... */ + // FIXME: Maxeler FPGAs want 192-byte alignment + // TODO: use int max_get_burst_size (max_file_t *maxfile, const char *name) + current_address[devid] = (fpga_mem) (8192*192); + max_fpga_mem[devid] = 128ULL*1024*1024*1024; + + _starpu_max_fpga_limit_max_fpga_mem(devid); + + if (!load) + { + /* Nothing specified, single-FPGA execution with basic static + * interface, file will be auto-loaded by SLiC. */ + return; + } + else + { + unsigned n; + + /* Which load we shall use */ + for (n = 0; load->file; n++, load++) + { + if (!strcmp(load->engine_id_pattern, "*") + || strstr(load->engine_id_pattern, ":*")) + break; + if (n == devid) + break; + } + + STARPU_ASSERT(load->file); + + if (!strcmp(load->engine_id_pattern, "*") + || strstr(load->engine_id_pattern, ":*")) + { + char s[strlen(load->engine_id_pattern) + 32]; + if (!strcmp(load->engine_id_pattern, "*")) + snprintf(s, sizeof(s), "*:%u", (unsigned) devid); + else + { + char *colon = strstr(load->engine_id_pattern, ":*"); + snprintf(s, sizeof(s), "%.*s:%u", + (int) (colon - load->engine_id_pattern), + load->engine_id_pattern, + (unsigned) devid); + } + /* FIXME: this assumes that the loads are in-order. + * Ideally we'd detect which ones had an explicit load */ + engines[devid] = max_load(load->file, s); + STARPU_ASSERT_MSG(engines[devid], "engine %u (part of *) could not be loaded\n", n); + } + else + { + engines[n] = max_load(load->file, load->engine_id_pattern); + STARPU_ASSERT_MSG(engines[n], "engine %u could not be loaded\n", n); + } + } +} + +static int _starpu_max_fpga_driver_init(struct _starpu_worker *worker) +{ + int devid = worker->devid; + _starpu_driver_start(worker, STARPU_MAX_FPGA_WORKER, 1); + /* FIXME: when we have NUMA support, properly turn node number into NUMA node number */ + // TODO: drop test when we allocated a memory node for fpga + if (worker->memory_node != STARPU_MAIN_RAM) + _starpu_memory_manager_set_global_memory_size(worker->memory_node, _starpu_max_fpga_get_max_fpga_mem_size(worker->devid)); + + // TODO: multiple fpga in same thread + init_device_context(devid); + + snprintf(worker->name, sizeof(worker->name), "FPGA %d", devid); + snprintf(worker->short_name, sizeof(worker->short_name), "FPGA %d", devid); + starpu_pthread_setname(worker->short_name); + + _STARPU_TRACE_WORKER_INIT_END(worker->workerid); + + /* tell the main thread that we are ready */ + STARPU_PTHREAD_MUTEX_LOCK(&worker->mutex); + worker->status = STATUS_UNKNOWN; + worker->worker_is_initialized = 1; + STARPU_PTHREAD_COND_SIGNAL(&worker->ready_cond); + STARPU_PTHREAD_MUTEX_UNLOCK(&worker->mutex); + return 0; +} + +static int _starpu_max_fpga_driver_deinit(struct _starpu_worker *fpga_worker) +{ + _STARPU_TRACE_WORKER_DEINIT_START; + + unsigned memnode = fpga_worker->memory_node; + _starpu_datawizard_handle_all_pending_node_data_requests(memnode); + + /* In case there remains some memory that was automatically + * allocated by StarPU, we release it now. Note that data + * coherency is not maintained anymore at that point ! */ + _starpu_free_all_automatically_allocated_buffers(memnode); + + fpga_worker->worker_is_initialized = 0; + _STARPU_TRACE_WORKER_DEINIT_END(STARPU_MAX_FPGA_WORKER); + + return 0; +} + +static uintptr_t _starpu_max_fpga_allocate_memory(unsigned dst_node, size_t size, int flags) +{ + (void) flags; + unsigned devid = starpu_memory_node_get_devid(dst_node); + + fpga_mem addr, next_addr; + addr = current_address[devid]; + next_addr = current_address[devid] + size; + if (next_addr >= (fpga_mem) max_fpga_mem[devid]) + { + printf("Memory overflow on %u\n", devid); + return 0; + } + current_address[devid] = next_addr; + printf("fpga mem returned from allocation @: %p - %p\n",addr, addr + size); + return (uintptr_t) addr; +} + +static int _starpu_max_fpga_copy_ram_to_max_fpga(void *src, void *dst, size_t size) +{ + printf("ram to fpga, fpga @= %p\n",dst); + memcpy(dst,src,size); + return 0; + // LMemLoopback_writeLMem(dst, size, src); +} + +/** + Transfer SIZE bytes from the address pointed by SRC in the SRC_NODE memory + node to the address pointed by DST in the DST_NODE memory node +*/ +void copy_ram_to_max_fpga(void *src, void *dst, size_t size) +{ + (void) src; (void) dst; (void) size; + printf("ram to fpga, fpga @= %p\n",dst); + // LMemLoopback_writeLMem(size, dst, src); +} + +void copy_max_fpga_to_ram(void *src, void *dst, size_t size) +{ + (void) src; (void) dst; (void) size; + printf("ram to fpga, fpga @= %p\n",src); + //LMemLoopback_readLMem(size, src, dst); +} + +/** + Transfer SIZE bytes from the address pointed by SRC in the SRC_NODE memory + node to the address pointed by DST in the DST_NODE memory node +*/ +static int _starpu_max_fpga_copy_max_fpga_to_ram(void *src, void *dst, size_t size) +{ + printf("fpga to ram, fpga @= %p\n",src); + memcpy(dst,src,size); + return 0; + //LMemLoopback_readLMem(src, size, dst); +} + +/** + Transfer SIZE bytes from the address pointed by SRC in the SRC_NODE memory + node to the address pointed by DST in the DST_NODE memory node +*/ +int _starpu_max_fpga_copy_fpga_to_fpga(void *src, void *dst, size_t size) +{ + printf("fpga to ram, fpga @= %p\n",src); + memcpy(dst,src,size); + return 0; + //LMemLoopback_XXXLMem(src, size, dst); +} + +/* Asynchronous transfers */ +static int _starpu_max_fpga_copy_ram_to_max_fpga_async(void *src, void *dst, size_t size) +{ + printf("ram to fpga, fpga @= %p\n",dst); + memcpy(dst,src,size); + return 0; + // Trouver dans la doc une version asynchrone de LMemLoopback_writeLMem(); +} + +static int _starpu_max_fpga_copy_max_fpga_to_ram_async(void *src, void *dst, size_t size) +{ + printf("fpga to ram, fpga @= %p\n",src); + memcpy(dst,src,size); + return 0; +} + +static int _starpu_run_fpga(struct _starpu_worker *workerarg) +{ + /* Let's go ! */ + _starpu_max_fpga_worker(workerarg); + return 0; +} + +int _starpu_max_fpga_copy_data_from_cpu_to_fpga(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t ssize, struct _starpu_async_channel *async_channel) +{ + return _starpu_max_fpga_copy_ram_to_max_fpga((char*) src + src_offset, (char*) dst + dst_offset, ssize); +} + +int _starpu_max_fpga_copy_data_from_fpga_to_cpu(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t ssize, struct _starpu_async_channel *async_channel) +{ + return _starpu_max_fpga_copy_max_fpga_to_ram((char*) src + src_offset, (char*) dst + dst_offset, ssize); +} + +int _starpu_max_fpga_copy_data_from_fpga_to_fpga(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t ssize, struct _starpu_async_channel *async_channel) +{ + return _starpu_max_fpga_copy_fpga_to_fpga((char*) src + src_offset, (char*) dst + dst_offset, ssize); +} + +int _starpu_max_fpga_copy_interface_from_fpga_to_cpu(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req) +{ + int src_kind = starpu_node_get_kind(src_node); + int dst_kind = starpu_node_get_kind(dst_node); + + STARPU_ASSERT(src_kind == STARPU_MAX_FPGA_RAM && dst_kind == STARPU_CPU_RAM); + + int ret = 1; + + const struct starpu_data_copy_methods *copy_methods = handle->ops->copy_methods; + if (!req || starpu_asynchronous_copy_disabled() || starpu_asynchronous_max_fpga_copy_disabled() || + !(copy_methods->max_fpga_to_ram_async || copy_methods->any_to_any)) + { + /* this is not associated to a request so it's synchronous */ + STARPU_ASSERT(copy_methods->max_fpga_to_ram || copy_methods->any_to_any); + if (copy_methods->max_fpga_to_ram) + copy_methods->max_fpga_to_ram(src_interface, src_node, dst_interface, dst_node); + else + copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, NULL); + } + else + { + //req->async_channel.type = STARPU_MAX_FPGA_RAM; + if (copy_methods->max_fpga_to_ram_async) + ret = copy_methods->max_fpga_to_ram_async(src_interface, src_node, dst_interface, dst_node); + else + { + STARPU_ASSERT(copy_methods->any_to_any); + ret = copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, &req->async_channel); + } + //_starpu_max_fpga_init_event(&(req->async_channel.event.fpga_event), src_node); + } + return ret; +} + +int _starpu_max_fpga_copy_interface_from_cpu_to_fpga(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req) +{ + int src_kind = starpu_node_get_kind(src_node); + int dst_kind = starpu_node_get_kind(dst_node); + STARPU_ASSERT(src_kind == STARPU_CPU_RAM && dst_kind == STARPU_MAX_FPGA_RAM); + + const struct starpu_data_copy_methods *copy_methods = handle->ops->copy_methods; + + if (!req || starpu_asynchronous_copy_disabled() || starpu_asynchronous_max_fpga_copy_disabled() || + !(copy_methods->ram_to_max_fpga_async || copy_methods->any_to_any)) + { + /* this is not associated to a request so it's synchronous */ + STARPU_ASSERT(copy_methods->ram_to_max_fpga || copy_methods->any_to_any); + if (copy_methods->ram_to_max_fpga) + copy_methods->ram_to_max_fpga(src_interface, src_node, dst_interface, dst_node); + else + copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, NULL); + } + else + { + //req->async_channel.type = STARPU_MAX_FPGA_RAM; + if (copy_methods->ram_to_max_fpga_async) + copy_methods->ram_to_max_fpga_async(src_interface, src_node, dst_interface, dst_node); + else + { + STARPU_ASSERT(copy_methods->any_to_any); + copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, &req->async_channel); + } + //_starpu_max_fpga_init_event(&(req->async_channel.event.fpga_event), dst_node); + } + return 0; +} + +static int execute_job_on_fpga(struct _starpu_job *j, struct starpu_task *worker_task, struct _starpu_worker *fpga_args, int rank, struct starpu_perfmodel_arch* perf_arch) +{ + int ret; + int profiling = starpu_profiling_status_get(); + + struct starpu_task *task = worker_task; + struct starpu_codelet *cl = task->cl; + + STARPU_ASSERT(cl); + + /* TODO: use asynchronous */ + ret = _starpu_fetch_task_input(task, j, 0); + if (ret != 0) + { + /* there was not enough memory so the codelet cannot be executed right now ... */ + /* push the codelet back and try another one ... */ + return -EAGAIN; + } + + /* Give profiling variable */ + _starpu_driver_start_job(fpga_args, j, perf_arch, rank, profiling); + + /* In case this is a Fork-join parallel task, the worker does not + * execute the kernel at all. */ + if ((rank == 0) || (cl->type != STARPU_FORKJOIN)) + { + _starpu_cl_func_t func = _starpu_task_get_fpga_nth_implementation(cl, j->nimpl); + + STARPU_ASSERT_MSG(func, "when STARPU_MAX_FPGA is defined in 'where', fpga_func or max_fpga_funcs has to be defined"); + if (_starpu_get_disable_kernels() <= 0) + { + _STARPU_TRACE_START_EXECUTING(j); + func(_STARPU_TASK_GET_INTERFACES(task), task->cl_arg); + _STARPU_TRACE_END_EXECUTING(j); + } + } + + _starpu_driver_end_job(fpga_args, j, perf_arch, rank, profiling); + + _starpu_driver_update_job_feedback(j, fpga_args, perf_arch, profiling); + + _starpu_push_task_output(j); + + return 0; +} + +int _starpu_max_fpga_driver_run_once(struct _starpu_worker *fpga_worker) +{ + unsigned memnode = fpga_worker->memory_node; + int workerid = fpga_worker->workerid; + + _STARPU_TRACE_START_PROGRESS(memnode); + _starpu_datawizard_progress(1); + if (memnode != STARPU_MAIN_RAM) + { + _starpu_datawizard_progress(1); + } + _STARPU_TRACE_END_PROGRESS(memnode); + + struct _starpu_job *j; + struct starpu_task *task; + int res; + + task = _starpu_get_worker_task(fpga_worker, workerid, memnode); + + if (!task) + return 0; + + j = _starpu_get_job_associated_to_task(task); + + /* can a cpu perform that task ? */ + if (!_STARPU_MAY_PERFORM(j, MAX_FPGA)) + { + /* put it at the end of the queue ... XXX */ + _starpu_push_task_to_workers(task); + return 0; + } + + int rank = 0; + int is_parallel_task = (j->task_size > 1); + + struct starpu_perfmodel_arch* perf_arch; + + if (is_parallel_task) + { + STARPU_PTHREAD_MUTEX_LOCK(&j->sync_mutex); + rank = j->active_task_alias_count++; + STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex); + + if(j->combined_workerid != -1) + { + struct _starpu_combined_worker *combined_worker; + combined_worker = _starpu_get_combined_worker_struct(j->combined_workerid); + + fpga_worker->combined_workerid = j->combined_workerid; + fpga_worker->worker_size = combined_worker->worker_size; + fpga_worker->current_rank = rank; + perf_arch = &combined_worker->perf_arch; + } + else + { + struct _starpu_sched_ctx *sched_ctx = _starpu_sched_ctx_get_sched_ctx_for_worker_and_job(fpga_worker, j); + STARPU_ASSERT_MSG(sched_ctx != NULL, "there should be a worker %d in the ctx of this job \n", fpga_worker->workerid); + + perf_arch = &sched_ctx->perf_arch; + } + } + else + { + fpga_worker->combined_workerid = fpga_worker->workerid; + fpga_worker->worker_size = 1; + fpga_worker->current_rank = 0; + perf_arch = &fpga_worker->perf_arch; + } + + _starpu_set_current_task(j->task); + fpga_worker->current_task = j->task; + j->workerid = fpga_worker->workerid; + + res = execute_job_on_fpga(j, task, fpga_worker, rank, perf_arch); + + _starpu_set_current_task(NULL); + fpga_worker->current_task = NULL; + + if (res) + { + switch (res) + { + case -EAGAIN: + _starpu_push_task_to_workers(task); + return 0; + default: + STARPU_ABORT(); + } + } + + /* In the case of combined workers, we need to inform the + * scheduler each worker's execution is over. + * Then we free the workers' task alias */ + if (is_parallel_task) + { + _starpu_sched_post_exec_hook(task); + free(task); + } + + if (rank == 0) + _starpu_handle_job_termination(j); + return 0; +} + +void *_starpu_max_fpga_worker(void *_arg) +{ + struct _starpu_worker* worker = _arg; + unsigned memnode = worker->memory_node; + + _starpu_max_fpga_driver_init(worker); + _STARPU_TRACE_START_PROGRESS(memnode); + while (_starpu_machine_is_running()) + { + _starpu_may_pause(); + _starpu_max_fpga_driver_run_once(worker); + } + _STARPU_TRACE_END_PROGRESS(memnode); + _starpu_max_fpga_driver_deinit(worker); + + return NULL; +} + +struct _starpu_driver_ops _starpu_driver_max_fpga_ops = +{ + .init = _starpu_max_fpga_driver_init, + .run = _starpu_run_fpga, + .run_once = _starpu_max_fpga_driver_run_once, + .deinit = _starpu_max_fpga_driver_deinit +}; + +// TODO: transfers +struct _starpu_node_ops _starpu_driver_max_fpga_node_ops = +{ + .name = "fpga driver", + + .malloc_on_node = _starpu_max_fpga_allocate_memory, + .free_on_node = NULL, + + .is_direct_access_supported = NULL, + + //.copy_data_to[STARPU_CPU_RAM] = _starpu_max_fpga_copy_data_from_fpga_to_cpu, + //.copy_data_to[STARPU_MAX_FPGA_RAM] = _starpu_max_fpga_copy_data_from_fpga_to_fpga, + + //.copy_data_from[STARPU_CPU_RAM] = _starpu_max_fpga_copy_data_from_cpu_to_fpga, + //.copy_data_from[STARPU_MAX_FPGA_RAM] = _starpu_max_fpga_copy_data_from_fpga_to_fpga, + + //.copy_interface_to[STARPU_CPU_RAM] = _starpu_max_fpga_copy_interface_from_fpga_to_cpu, + //.copy_interface_to[STARPU_MAX_FPGA_RAM] = _starpu_max_fpga_copy_interface_from_fpga_to_fpga, + + //.copy_interface_from[STARPU_CPU_RAM] = _starpu_max_fpga_copy_interface_from_cpu_to_fpga, + //.copy_interface_from[STARPU_MAX_FPGA_RAM] = _starpu_max_fpga_copy_interface_from_fpga_to_fpga, + + .wait_request_completion = NULL, + .test_request_completion = NULL, +}; diff --git a/src/drivers/max/driver_max_fpga.h b/src/drivers/max/driver_max_fpga.h new file mode 100644 index 0000000..a760b7d --- /dev/null +++ b/src/drivers/max/driver_max_fpga.h @@ -0,0 +1,54 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __DRIVER_FPGA_H__ +#define __DRIVER_FPGA_H__ +//#ifdef NOT_DEFINED +#ifdef STARPU_USE_MAX_FPGA +#include +#endif +//#endif +#include +#include + +#include +#include +#include +#include +#include + +void _starpu_max_fpga_preinit(void); + +#ifdef STARPU_USE_MAX_FPGA +typedef unsigned * fpga_mem; + +extern struct _starpu_driver_ops _starpu_driver_max_fpga_ops; +extern struct _starpu_node_ops _starpu_driver_max_fpga_node_ops; + +void _starpu_init_max_fpga(void); +void _starpu_init_max_fpga_config(struct _starpu_machine_topology *topology, struct _starpu_machine_config *); +void _starpu_max_fpga_discover_devices (struct _starpu_machine_config *config); +void _starpu_max_fpga_init_worker_binding(struct _starpu_machine_config *config, int no_mp_config STARPU_ATTRIBUTE_UNUSED, struct _starpu_worker *workerarg); +void _starpu_max_fpga_init_worker_memory(struct _starpu_machine_config *config, int no_mp_config STARPU_ATTRIBUTE_UNUSED, struct _starpu_worker *workerarg); + +void *_starpu_max_fpga_worker(void *); + +#else +#define _starpu_max_fpga_discover_devices(config) ((void) (config)) +#endif + +#endif // __DRIVER_FPGA_H__ + diff --git a/src/drivers/max/driver_max_fpga_init.c b/src/drivers/max/driver_max_fpga_init.c new file mode 100644 index 0000000..a52c5e0 --- /dev/null +++ b/src/drivers/max/driver_max_fpga_init.c @@ -0,0 +1,49 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include + +static struct _starpu_driver_info driver_info = +{ + .name_upper = "FPGA", + .name_var = "FPGA", + .name_lower = "fpga", + .memory_kind = STARPU_MAX_FPGA_RAM, + .alpha = 0.5, + .wait_for_worker_initialization = 1, +#ifdef STARPU_USE_MAX_FPGA + .driver_ops = &_starpu_driver_max_fpga_ops, + .run_worker = _starpu_max_fpga_worker, + .init_worker_binding = _starpu_max_fpga_init_worker_binding, + .init_worker_memory = _starpu_max_fpga_init_worker_memory, +#endif +}; + +static struct _starpu_memory_driver_info memory_driver_info = +{ + .name_upper = "FPGA", + .worker_archtype = STARPU_MAX_FPGA_WORKER, +#ifdef STARPU_USE_MAX_FPGA + .ops = &_starpu_driver_max_fpga_node_ops, +#endif +}; + +void _starpu_max_fpga_preinit(void) +{ + _starpu_driver_info_register(STARPU_MAX_FPGA_WORKER, &driver_info); + _starpu_memory_driver_info_register(STARPU_MAX_FPGA_RAM, &memory_driver_info); +} diff --git a/src/drivers/mp_common/mp_common.c b/src/drivers/mp_common/mp_common.c new file mode 100644 index 0000000..ffc3d96 --- /dev/null +++ b/src/drivers/mp_common/mp_common.c @@ -0,0 +1,547 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Thibaut Lambert + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +const char *_starpu_mp_common_command_to_string(const enum _starpu_mp_command command) +{ + switch(command) + { + /* Commands from master to slave */ + case STARPU_MP_COMMAND_EXIT: + return "EXIT"; + case STARPU_MP_COMMAND_EXECUTE: + return "EXECUTE"; + case STARPU_MP_COMMAND_EXECUTE_DETACHED: + return "EXECUTE_DETACHED"; + case STARPU_MP_COMMAND_SINK_NBCORES: + return "SINK_NBCORES"; + case STARPU_MP_COMMAND_LOOKUP: + return "LOOKUP"; + case STARPU_MP_COMMAND_ALLOCATE: + return "ALLOCATE"; + case STARPU_MP_COMMAND_FREE: + return "FREE"; + case STARPU_MP_COMMAND_MAP: + return "MAP"; + case STARPU_MP_COMMAND_UNMAP: + return "UNMAP"; + case STARPU_MP_COMMAND_SYNC_WORKERS: + return "SYNC_WORKERS"; + + /* Note: synchronous send */ + case STARPU_MP_COMMAND_RECV_FROM_HOST: + return "RECV_FROM_HOST"; + case STARPU_MP_COMMAND_SEND_TO_HOST: + return "SEND_TO_HOST"; + case STARPU_MP_COMMAND_RECV_FROM_SINK: + return "RECV_FROM_SINK"; + case STARPU_MP_COMMAND_SEND_TO_SINK: + return "SEND_TO_SINK"; + + /* Note: Asynchronous send */ + case STARPU_MP_COMMAND_RECV_FROM_HOST_ASYNC: + return "RECV_FROM_HOST_ASYNC"; + case STARPU_MP_COMMAND_SEND_TO_HOST_ASYNC: + return "SEND_TO_HOST_ASYNC"; + case STARPU_MP_COMMAND_RECV_FROM_SINK_ASYNC: + return "RECV_FROM_SINK_ASYNC"; + case STARPU_MP_COMMAND_SEND_TO_SINK_ASYNC: + return "SEND_TO_SINK_ASYNC"; + + /* Synchronous answers from slave to master */ + case STARPU_MP_COMMAND_ERROR_EXECUTE: + return "ERROR_EXECUTE"; + case STARPU_MP_COMMAND_ERROR_EXECUTE_DETACHED: + return "ERROR_EXECUTE_DETACHED"; + case STARPU_MP_COMMAND_ANSWER_LOOKUP: + return "ANSWER_LOOKUP"; + case STARPU_MP_COMMAND_ERROR_LOOKUP: + return "ERROR_LOOKUP"; + case STARPU_MP_COMMAND_ANSWER_ALLOCATE: + return "ANSWER_ALLOCATE"; + case STARPU_MP_COMMAND_ERROR_ALLOCATE: + return "ERROR_ALLOCATE"; + case STARPU_MP_COMMAND_ANSWER_MAP: + return "ANSWER_MAP"; + case STARPU_MP_COMMAND_ERROR_MAP: + return "ERROR_MAP"; + case STARPU_MP_COMMAND_ANSWER_TRANSFER_COMPLETE: + return "ANSWER_TRANSFER_COMPLETE"; + case STARPU_MP_COMMAND_ANSWER_SINK_NBCORES: + return "ANSWER_SINK_NBCORES"; + case STARPU_MP_COMMAND_ANSWER_EXECUTION_SUBMITTED: + return "ANSWER_EXECUTION_SUBMITTED"; + case STARPU_MP_COMMAND_ANSWER_EXECUTION_DETACHED_SUBMITTED: + return "ANSWER_EXECUTION_DETACHED_SUBMITTED"; + + /* Asynchronous notifications from slave to master */ + case STARPU_MP_COMMAND_NOTIF_RECV_FROM_HOST_ASYNC_COMPLETED: + return "NOTIF_RECV_FROM_HOST_ASYNC_COMPLETED"; + case STARPU_MP_COMMAND_NOTIF_SEND_TO_HOST_ASYNC_COMPLETED: + return "NOTIF_SEND_TO_HOST_ASYNC_COMPLETED"; + case STARPU_MP_COMMAND_NOTIF_RECV_FROM_SINK_ASYNC_COMPLETED: + return "NOTIF_RECV_FROM_SINK_ASYNC_COMPLETED"; + case STARPU_MP_COMMAND_NOTIF_SEND_TO_SINK_ASYNC_COMPLETED: + return "NOTIF_SEND_TO_SINK_ASYNC_COMPLETED"; + case STARPU_MP_COMMAND_NOTIF_EXECUTION_COMPLETED: + return "NOTIF_EXECUTION_COMPLETED"; + case STARPU_MP_COMMAND_NOTIF_EXECUTION_DETACHED_COMPLETED: + return "NOTIF_EXECUTION_DETACHED_COMPLETED"; + case STARPU_MP_COMMAND_NOTIF_PRE_EXECUTION: + return "NOTIF_PRE_EXECUTION"; + + default: + return ""; + } +} + +const char *_starpu_mp_common_node_kind_to_string(const int kind) +{ + switch(kind) + { + case STARPU_NODE_MPI_SINK: + return "MPI_SINK"; + case STARPU_NODE_MPI_SOURCE: + return "MPI_SOURCE"; + case STARPU_NODE_TCPIP_SINK: + return "TCPIP_SINK"; + case STARPU_NODE_TCPIP_SOURCE: + return "TCPIP_SOURCE"; + default: + return ""; + } +} +/* Allocate and initialize the sink structure, when the function returns + * all the pointer of functions are linked to the right ones. + */ +struct _starpu_mp_node * STARPU_ATTRIBUTE_MALLOC +_starpu_mp_common_node_create(enum _starpu_mp_node_kind node_kind, + int peer_id) +{ + struct _starpu_mp_node *node; + + _STARPU_MALLOC(node, sizeof(struct _starpu_mp_node)); + + node->kind = node_kind; + + node->peer_id = peer_id; + + switch(node->kind) + { +#ifdef STARPU_USE_MPI_MASTER_SLAVE + case STARPU_NODE_MPI_SOURCE: + { + /* + node->nb_mp_sinks = + node->devid = + */ + node->peer_id = (_starpu_mpi_common_get_src_node() <= peer_id ? peer_id+1 : peer_id); + node->mp_connection.mpi_remote_nodeid = node->peer_id; + + node->init = _starpu_mpi_source_init; + node->launch_workers = NULL; + node->deinit = _starpu_mpi_source_deinit; + /* node->report_error = */ + + node->mp_recv_is_ready = _starpu_mpi_common_recv_is_ready; + node->mp_send = _starpu_mpi_common_mp_send; + node->mp_recv = _starpu_mpi_common_mp_recv; + node->nt_recv_is_ready = _starpu_mpi_common_notif_recv_is_ready; + node->nt_send_is_ready = _starpu_mpi_common_notif_send_is_ready; + node->mp_wait = NULL; + node->mp_signal = NULL; + node->nt_send = _starpu_mpi_common_nt_send; + node->nt_recv = _starpu_mpi_common_nt_recv; + node->dt_send = _starpu_mpi_common_send; + node->dt_recv = _starpu_mpi_common_recv; + node->dt_send_to_device = _starpu_mpi_common_send_to_device; + node->dt_recv_from_device = _starpu_mpi_common_recv_from_device; + + node->get_kernel_from_job = _starpu_src_common_get_cpu_func_from_job; + node->lookup = NULL; + node->bind_thread = NULL; + node->execute = NULL; + node->allocate = NULL; + node->free = NULL; + node->map = NULL; + node->unmap = NULL; + } + break; + + case STARPU_NODE_MPI_SINK: + { + /* + node->nb_mp_sinks = + node->devid = + */ + node->mp_connection.mpi_remote_nodeid = _starpu_mpi_common_get_src_node(); + + node->init = _starpu_mpi_sink_init; + node->launch_workers = _starpu_sink_launch_workers; + node->deinit = _starpu_sink_deinit; + /* node->report_error = */ + + node->mp_recv_is_ready = _starpu_mpi_common_recv_is_ready; + node->mp_send = _starpu_mpi_common_mp_send; + node->mp_recv = _starpu_mpi_common_mp_recv; + node->nt_recv_is_ready = _starpu_mpi_common_notif_recv_is_ready; + node->nt_send_is_ready = _starpu_mpi_common_notif_send_is_ready; + node->mp_wait = NULL; + node->mp_signal = NULL; + node->nt_send = _starpu_mpi_common_nt_send; + node->nt_recv = _starpu_mpi_common_nt_recv; + node->dt_send = _starpu_mpi_common_send; + node->dt_recv = _starpu_mpi_common_recv; + node->dt_send_to_device = _starpu_mpi_common_send_to_device; + node->dt_recv_from_device = _starpu_mpi_common_recv_from_device; + + node->dt_test = _starpu_mpi_common_test_event; + + node->get_kernel_from_job = NULL; + node->lookup = _starpu_sink_common_cpu_lookup; + node->bind_thread = _starpu_mpi_sink_bind_thread; + node->execute = _starpu_sink_common_execute; + node->allocate = _starpu_sink_common_allocate; + node->free = _starpu_sink_common_free; + node->map = _starpu_sink_common_map; + node->unmap = _starpu_sink_common_unmap; + } + break; +#endif /* STARPU_USE_MPI_MASTER_SLAVE */ + +#ifdef STARPU_USE_TCPIP_MASTER_SLAVE + case STARPU_NODE_TCPIP_SOURCE: + { + /* + node->nb_mp_sinks = + node->devid = + */ + node->peer_id = (0 <= peer_id ? peer_id+1 : peer_id); + + node->mp_connection.tcpip_mp_connection = &tcpip_sock[node->peer_id]; + + node->init = _starpu_tcpip_source_init; + node->launch_workers = NULL; + node->deinit = _starpu_tcpip_source_deinit; + /* node->report_error = */ + + node->mp_recv_is_ready = _starpu_tcpip_common_recv_is_ready; + node->mp_send = _starpu_tcpip_common_mp_send; + node->mp_recv = _starpu_tcpip_common_mp_recv; + node->nt_recv_is_ready = _starpu_tcpip_common_notif_recv_is_ready; + node->nt_send_is_ready = _starpu_tcpip_common_notif_send_is_ready; + node->mp_wait = _starpu_tcpip_common_wait; + node->mp_signal = _starpu_tcpip_common_signal; + node->nt_send = _starpu_tcpip_common_nt_send; + node->nt_recv = _starpu_tcpip_common_nt_recv; + node->dt_send = _starpu_tcpip_common_send; + node->dt_recv = _starpu_tcpip_common_recv; + node->dt_send_to_device = _starpu_tcpip_common_send_to_device; + node->dt_recv_from_device = _starpu_tcpip_common_recv_from_device; + + node->get_kernel_from_job = _starpu_src_common_get_cpu_func_from_job; + node->lookup = NULL; + node->bind_thread = NULL; + node->execute = NULL; + node->allocate = NULL; + node->free = NULL; + node->map = NULL; + node->unmap = NULL; + } + break; + + case STARPU_NODE_TCPIP_SINK: + { + /* + node->nb_mp_sinks = + node->devid = + */ + node->mp_connection.tcpip_mp_connection = &tcpip_sock[0]; + + node->init = _starpu_tcpip_sink_init; + node->launch_workers = _starpu_sink_launch_workers; + node->deinit = _starpu_sink_deinit; + /* node->report_error = */ + + node->mp_recv_is_ready = _starpu_tcpip_common_recv_is_ready; + node->mp_send = _starpu_tcpip_common_mp_send; + node->mp_recv = _starpu_tcpip_common_mp_recv; + node->nt_recv_is_ready = _starpu_tcpip_common_notif_recv_is_ready; + node->nt_send_is_ready = _starpu_tcpip_common_notif_send_is_ready; + node->mp_wait = _starpu_tcpip_common_wait; + node->mp_signal = _starpu_tcpip_common_signal; + node->nt_send = _starpu_tcpip_common_nt_send; + node->nt_recv = _starpu_tcpip_common_nt_recv; + node->dt_send = _starpu_tcpip_common_send; + node->dt_recv = _starpu_tcpip_common_recv; + node->dt_send_to_device = _starpu_tcpip_common_send_to_device; + node->dt_recv_from_device = _starpu_tcpip_common_recv_from_device; + + node->dt_test = _starpu_tcpip_common_test_event; + + node->get_kernel_from_job = NULL; + node->lookup = _starpu_sink_common_cpu_lookup; + node->bind_thread = _starpu_tcpip_sink_bind_thread; + node->execute = _starpu_sink_common_execute; + node->allocate = _starpu_sink_common_allocate; + node->free = _starpu_sink_common_free; + node->map = _starpu_sink_common_map; + node->unmap = _starpu_sink_common_unmap; + } + break; +#endif /* STARPU_USE_TCPIP_MASTER_SLAVE */ + + default: + STARPU_ASSERT(0); + } + + /* Let's allocate the buffer, we want it to be big enough to contain + * a command, an argument and the argument size */ + _STARPU_MALLOC(node->buffer, BUFFER_SIZE); + + if (node->init) + node->init(node); + + mp_message_list_init(&node->message_queue); + STARPU_PTHREAD_MUTEX_INIT(&node->message_queue_mutex,NULL); + + STARPU_PTHREAD_MUTEX_INIT(&node->connection_mutex, NULL); + + _starpu_mp_event_list_init(&node->event_list); + _starpu_mp_event_list_init(&node->event_queue); + + /* If the node is a sink then we must initialize some field */ + if(node->kind == STARPU_NODE_MPI_SINK || node->kind == STARPU_NODE_TCPIP_SINK) + { + int i; + STARPU_HG_DISABLE_CHECKING(node->is_running); + node->is_running = 1; + _STARPU_MALLOC(node->run_table, sizeof(struct mp_task *)*node->nb_cores); + _STARPU_MALLOC(node->run_table_detached, sizeof(struct mp_task *)*node->nb_cores); + _STARPU_MALLOC(node->sem_run_table, sizeof(sem_t)*node->nb_cores); + + for(i=0; inb_cores; i++) + { + node->run_table[i] = NULL; + node->run_table_detached[i] = NULL; + sem_init(&node->sem_run_table[i],0,0); + } + mp_barrier_list_init(&node->barrier_list); + STARPU_PTHREAD_MUTEX_INIT(&node->barrier_mutex,NULL); + STARPU_PTHREAD_BARRIER_INIT(&node->init_completed_barrier, NULL, node->nb_cores+1); + + node->launch_workers(node); + } + + return node; +} + +/* Deinitialize the sink structure and release the structure */ +void _starpu_mp_common_node_destroy(struct _starpu_mp_node *node) +{ + if (node->deinit) + node->deinit(node); + + STARPU_PTHREAD_MUTEX_DESTROY(&node->message_queue_mutex); + + /* If the node is a sink then we must destroy some field */ + if(node->kind == STARPU_NODE_MPI_SINK || node->kind == STARPU_NODE_TCPIP_SINK) + { + int i; + for(i=0; inb_cores; i++) + { + sem_destroy(&node->sem_run_table[i]); + } + + free(node->run_table); + free(node->run_table_detached); + free(node->sem_run_table); + + STARPU_PTHREAD_MUTEX_DESTROY(&node->barrier_mutex); + STARPU_PTHREAD_BARRIER_DESTROY(&node->init_completed_barrier); + } + + free(node->buffer); + free(node); +} + +/* Send COMMAND to RECIPIENT, along with ARG if ARG_SIZE is non-zero */ +static void __starpu_mp_common_send_command(const struct _starpu_mp_node *node, const enum _starpu_mp_command command, void *arg, int arg_size, int notif) +{ + STARPU_ASSERT_MSG(arg_size <= BUFFER_SIZE, "Too much data (%d) for the static buffer (%d), increase BUFFER_SIZE perhaps?", arg_size, BUFFER_SIZE); + + //printf("SEND %s: %d/%s - arg_size %d by %lu \n", notif?"NOTIF":"CMD", command, _starpu_mp_common_command_to_string(command), arg_size, starpu_pthread_self()); + + /* MPI sizes are given through a int */ + int command_size = sizeof(enum _starpu_mp_command); + int arg_size_size = sizeof(int); + + /* Let's copy the data into the command line buffer */ + memcpy(node->buffer, &command, command_size); + memcpy((void*) ((uintptr_t)node->buffer + command_size), &arg_size, arg_size_size); + + if (!notif) + node->mp_send(node, node->buffer, command_size + arg_size_size); + else + node->nt_send(node, node->buffer, command_size + arg_size_size); + + if (arg_size) + { + if (!notif) + node->mp_send(node, arg, arg_size); + else + node->nt_send(node, arg, arg_size); + } +} + +/* Send COMMAND to RECIPIENT, along with ARG if ARG_SIZE is non-zero */ +void _starpu_mp_common_send_command(const struct _starpu_mp_node *node, const enum _starpu_mp_command command, void *arg, int arg_size) +{ + __starpu_mp_common_send_command(node, command, arg, arg_size, 0); +} + +/* Send NOTIF COMMAND to RECIPIENT, along with ARG if ARG_SIZE is non-zero */ +void _starpu_nt_common_send_command(const struct _starpu_mp_node *node, const enum _starpu_mp_command command, void *arg, int arg_size) +{ + __starpu_mp_common_send_command(node, command, arg, arg_size, 1); +} + +/* Return the command received from SENDER. In case SENDER sent an argument + * beside the command, an address to a copy of this argument is returns in arg. + * There is no need to free this address as it's not allocated at this time. + * However, the data pointed by arg shouldn't be relied on after a new call to + * STARPU_MP_COMMON_RECV_COMMAND as it might corrupt it. + */ +static enum _starpu_mp_command __starpu_mp_common_recv_command(const struct _starpu_mp_node *node, void **arg, int *arg_size, int notif) +{ + enum _starpu_mp_command command; + + /* MPI sizes are given through a int */ + int command_size = sizeof(enum _starpu_mp_command); + int arg_size_size = sizeof(int); + + if (!notif) + node->mp_recv(node, node->buffer, command_size + arg_size_size); + else + node->nt_recv(node, node->buffer, command_size + arg_size_size); + + command = *((enum _starpu_mp_command *) node->buffer); + *arg_size = *((int *) ((uintptr_t)node->buffer + command_size)); + + //printf("RECV %s : %d/%s - arg_size %d by %lu \n", notif?"NOTIF":"CMD", command, _starpu_mp_common_command_to_string(command), *arg_size, starpu_pthread_self()); + + /* If there is no argument (ie. arg_size == 0), + * let's return the command right now */ + if (!(*arg_size)) + { + *arg = NULL; + return command; + } + + STARPU_ASSERT(*arg_size <= BUFFER_SIZE); + + if (!notif) + node->mp_recv(node, node->buffer, *arg_size); + else + node->nt_recv(node, node->buffer, *arg_size); + + *arg = node->buffer; + + return command; +} + +/* Return the command received from SENDER*/ +enum _starpu_mp_command _starpu_mp_common_recv_command(const struct _starpu_mp_node *node, void **arg, int *arg_size) +{ + return __starpu_mp_common_recv_command(node, arg, arg_size, 0); +} + +/* Return the notif command received from SENDER*/ +enum _starpu_mp_command _starpu_nt_common_recv_command(const struct _starpu_mp_node *node, void **arg, int *arg_size) +{ + return __starpu_mp_common_recv_command(node, arg, arg_size, 1); +} + +void _starpu_sink_deinit(struct _starpu_mp_node *node) +{ + int i; + node->is_running = 0; + for(i=0; inb_cores; i++) + { + sem_post(&node->sem_run_table[i]); + STARPU_PTHREAD_JOIN(((starpu_pthread_t *)node->thread_table)[i],NULL); + } + free(node->thread_table); +} + +void _starpu_sink_launch_workers(struct _starpu_mp_node *node) +{ + //TODO + int i; + struct arg_sink_thread * arg; + cpu_set_t cpuset; + starpu_pthread_attr_t attr; + starpu_pthread_t thread; + + for(i=0; i < node->nb_cores; i++) + { + int ret; + + ret = starpu_pthread_attr_init(&attr); + STARPU_ASSERT(ret == 0); + +#if defined(HAVE_PTHREAD_SETAFFINITY_NP) && defined(__linux__) + //init the set + CPU_ZERO(&cpuset); + CPU_SET(i,&cpuset); + + int nobind = starpu_getenv_number("STARPU_WORKERS_NOBIND"); + + if (nobind <= 0) + { + ret = pthread_attr_setaffinity_np(&attr, sizeof(cpu_set_t), &cpuset); + STARPU_ASSERT(ret == 0); + } +#else +#warning no CPU binding support +#endif + + /*prepare the argument for the thread*/ + _STARPU_MALLOC(arg, sizeof(struct arg_sink_thread)); + arg->coreid = i; + arg->node = node; + + STARPU_PTHREAD_CREATE(&thread, &attr, _starpu_sink_thread, arg); + starpu_pthread_attr_destroy(&attr); + ((starpu_pthread_t *)node->thread_table)[i] = thread; + + } +} diff --git a/src/drivers/mp_common/mp_common.h b/src/drivers/mp_common/mp_common.h new file mode 100644 index 0000000..0f49cd8 --- /dev/null +++ b/src/drivers/mp_common/mp_common.h @@ -0,0 +1,310 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Thibaut Lambert + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __MP_COMMON_H__ +#define __MP_COMMON_H__ + +/** @file */ + +#include + +#include +#include +#include +#include +#include +#include +#include + +#pragma GCC visibility push(hidden) + +#ifdef STARPU_USE_MP + +#define BUFFER_SIZE 65536 + +#define STARPU_MP_SRC_NODE 0 +#define STARPU_MP_SINK_NODE(a) ((a) + 1) + +#define STARPU_MP_COMMON_REPORT_ERROR(node, status) \ + (node)->report_error(__starpu_func__, __FILE__, __LINE__, (status)) +enum _starpu_mp_command +{ + /* Commands from master to slave */ + + STARPU_MP_COMMAND_EXIT, + STARPU_MP_COMMAND_EXECUTE, + STARPU_MP_COMMAND_EXECUTE_DETACHED, + STARPU_MP_COMMAND_SINK_NBCORES, + STARPU_MP_COMMAND_LOOKUP, + STARPU_MP_COMMAND_ALLOCATE, + STARPU_MP_COMMAND_FREE, + STARPU_MP_COMMAND_MAP, + STARPU_MP_COMMAND_UNMAP, + STARPU_MP_COMMAND_SYNC_WORKERS, + + /* Note: synchronous send */ + STARPU_MP_COMMAND_RECV_FROM_HOST, + STARPU_MP_COMMAND_SEND_TO_HOST, + STARPU_MP_COMMAND_RECV_FROM_SINK, + STARPU_MP_COMMAND_SEND_TO_SINK, + + /* Note: Asynchronous send */ + STARPU_MP_COMMAND_RECV_FROM_HOST_ASYNC, + STARPU_MP_COMMAND_SEND_TO_HOST_ASYNC, + STARPU_MP_COMMAND_RECV_FROM_SINK_ASYNC, + STARPU_MP_COMMAND_SEND_TO_SINK_ASYNC, + + /* Synchronous answers from slave to master */ + STARPU_MP_COMMAND_ERROR_EXECUTE, + STARPU_MP_COMMAND_ERROR_EXECUTE_DETACHED, + STARPU_MP_COMMAND_ANSWER_LOOKUP, + STARPU_MP_COMMAND_ERROR_LOOKUP, + STARPU_MP_COMMAND_ANSWER_ALLOCATE, + STARPU_MP_COMMAND_ERROR_ALLOCATE, + STARPU_MP_COMMAND_ANSWER_MAP, + STARPU_MP_COMMAND_ERROR_MAP, + STARPU_MP_COMMAND_ANSWER_TRANSFER_COMPLETE, + STARPU_MP_COMMAND_ANSWER_SINK_NBCORES, + STARPU_MP_COMMAND_ANSWER_EXECUTION_SUBMITTED, + STARPU_MP_COMMAND_ANSWER_EXECUTION_DETACHED_SUBMITTED, + + /* Asynchronous notifications from slave to master */ + STARPU_MP_COMMAND_NOTIF_RECV_FROM_HOST_ASYNC_COMPLETED, + STARPU_MP_COMMAND_NOTIF_SEND_TO_HOST_ASYNC_COMPLETED, + STARPU_MP_COMMAND_NOTIF_RECV_FROM_SINK_ASYNC_COMPLETED, + STARPU_MP_COMMAND_NOTIF_SEND_TO_SINK_ASYNC_COMPLETED, + STARPU_MP_COMMAND_NOTIF_EXECUTION_COMPLETED, + STARPU_MP_COMMAND_NOTIF_EXECUTION_DETACHED_COMPLETED, + STARPU_MP_COMMAND_NOTIF_PRE_EXECUTION, + + STARPU_MP_COMMAND_NOTIF_FIRST = STARPU_MP_COMMAND_NOTIF_RECV_FROM_HOST_ASYNC_COMPLETED, + STARPU_MP_COMMAND_NOTIF_LAST = STARPU_MP_COMMAND_NOTIF_PRE_EXECUTION, +}; + +const char *_starpu_mp_common_command_to_string(const enum _starpu_mp_command command); + +enum _starpu_mp_node_kind +{ + STARPU_NODE_MPI_SINK, + STARPU_NODE_MPI_SOURCE, + STARPU_NODE_TCPIP_SINK, + STARPU_NODE_TCPIP_SOURCE, + STARPU_NODE_INVALID_KIND +}; + +const char *_starpu_mp_common_node_kind_to_string(const int kind); + +union _starpu_mp_connection +{ +#ifdef STARPU_USE_MPI_MASTER_SLAVE + int mpi_remote_nodeid; +#endif +#ifdef STARPU_USE_TCPIP_MASTER_SLAVE + struct _starpu_tcpip_socket *tcpip_mp_connection; +#endif +}; + +struct _starpu_mp_transfer_command +{ + size_t size; + void *addr; + void *event; +}; + +struct _starpu_mp_transfer_command_to_device +{ + size_t size; + void *addr; + void *event; + int devid; + char end[]; /* Keep last to compute non-padded size */ +}; + +struct _starpu_mp_transfer_map_command +{ + size_t offset; + size_t size; + char fd_name[]; +}; + +struct _starpu_mp_transfer_unmap_command +{ + uintptr_t addr; + size_t size; +}; + +LIST_TYPE(mp_barrier, + int id; + starpu_pthread_barrier_t before_work_barrier; + starpu_pthread_barrier_t after_work_barrier; + ); + +LIST_TYPE(mp_message, + enum _starpu_mp_command type; + char *buffer; + int size; + ); + +struct mp_task +{ + void (*kernel)(void **, void *); + enum starpu_data_interface_id *ids; + void **interfaces; + unsigned nb_interfaces; + void *cl_arg; + unsigned cl_arg_size; + void *cl_ret; + unsigned cl_ret_size; + unsigned coreid; + enum starpu_codelet_type type; + int is_parallel_task; + int combined_workerid; + int detached; + struct mp_barrier* mp_barrier; +}; + +LIST_TYPE(_starpu_mp_event, + struct _starpu_async_channel event; + void * remote_event; + enum _starpu_mp_command answer_cmd; +); + + +/** Message-passing working node, whether source + * or sink */ +struct _starpu_mp_node +{ + enum _starpu_mp_node_kind kind; + + int baseworkerid; + + /*the number of core on the device + * Must be initialized during init function*/ + int nb_cores; + + /*Is starpu running*/ + int is_running; + + /** Buffer used for data transfers, allocated + * during node initialization. + * Size : BUFFER_SIZE */ + void *buffer; + + /** For sink : -1. + * For host : index of the sink = devid. + */ + int peer_id; + + /** Connection used for command passing between the host thread and the + * sink it controls */ + union _starpu_mp_connection mp_connection; + + /** Mutex to protect the interleaving of communications when using one thread per node, + * for instance, when a thread transfers piece of data and an other wants to use + * a sink_to_sink communication */ + starpu_pthread_mutex_t connection_mutex; + + /** This list contains events + * about asynchronous request + */ + struct _starpu_mp_event_list event_list; + /*list where threads add events to send to the source node */ + struct _starpu_mp_event_list event_queue; + + /** */ + starpu_pthread_barrier_t init_completed_barrier; + + /** table to store pointer of the thread workers*/ + void* thread_table; + + /*list where threads add messages to send to the source node */ + struct mp_message_list message_queue; + starpu_pthread_mutex_t message_queue_mutex; + + /*list of barrier for combined worker*/ + struct mp_barrier_list barrier_list; + starpu_pthread_mutex_t barrier_mutex; + + /*table where worker comme pick task*/ + struct mp_task ** run_table; + struct mp_task ** run_table_detached; + sem_t * sem_run_table; + + /** Node general functions */ + void (*init) (struct _starpu_mp_node *node); + void (*launch_workers) (struct _starpu_mp_node *node); + void (*deinit) (struct _starpu_mp_node *node); + void (*report_error) (const char *, const char *, const int, const int); + + /** Message passing */ + int (*mp_recv_is_ready) (const struct _starpu_mp_node *); + void (*mp_send) (const struct _starpu_mp_node *, void *, int); + void (*mp_recv) (const struct _starpu_mp_node *, void *, int); + + /** Notifications */ + int (*nt_recv_is_ready) (const struct _starpu_mp_node *); + int (*nt_send_is_ready) (const struct _starpu_mp_node *); + void (*nt_send) (const struct _starpu_mp_node *, void *, int); + void (*nt_recv) (const struct _starpu_mp_node *, void *, int); + + /*signal*/ + void (*mp_wait) (struct _starpu_mp_node *); + void (*mp_signal) (const struct _starpu_mp_node *); + + /** Data transfers */ + void (*dt_send) (const struct _starpu_mp_node *, void *, int, void *); + void (*dt_recv) (const struct _starpu_mp_node *, void *, int, void *); + void (*dt_send_to_device) (const struct _starpu_mp_node *, int, void *, int, void *); + void (*dt_recv_from_device) (const struct _starpu_mp_node *, int, void *, int, void *); + + /** Test async transfers */ + unsigned int (*dt_test) (struct _starpu_async_channel *); + + void (*(*get_kernel_from_job) (const struct _starpu_mp_node *,struct _starpu_job *))(void); + void (*(*lookup) (const struct _starpu_mp_node *, char*))(void); + void (*bind_thread) (const struct _starpu_mp_node *, int,int *,int); + void (*execute) (struct _starpu_mp_node *, void *, int); + void (*allocate) (const struct _starpu_mp_node *, void *, int); + void (*free) (const struct _starpu_mp_node *, void *, int); + void (*map) (const struct _starpu_mp_node *, void *, int); + void (*unmap) (const struct _starpu_mp_node *, void *, int); +}; + +struct _starpu_mp_node * _starpu_mp_common_node_create(enum _starpu_mp_node_kind node_kind, int peer_devid) STARPU_ATTRIBUTE_MALLOC; + +void _starpu_mp_common_node_destroy(struct _starpu_mp_node *node); + +void _starpu_mp_common_send_command(const struct _starpu_mp_node *node, + const enum _starpu_mp_command command, + void *arg, int arg_size); + +void _starpu_nt_common_send_command(const struct _starpu_mp_node *node, + const enum _starpu_mp_command command, + void *arg, int arg_size); + +enum _starpu_mp_command _starpu_mp_common_recv_command(const struct _starpu_mp_node *node, void **arg, int *arg_size); + +enum _starpu_mp_command _starpu_nt_common_recv_command(const struct _starpu_mp_node *node, void **arg, int *arg_size); + +void _starpu_sink_deinit(struct _starpu_mp_node *node); +void _starpu_sink_launch_workers(struct _starpu_mp_node *node); + +#endif /* STARPU_USE_MP */ + +#pragma GCC visibility pop + +#endif /* __MP_COMMON_H__ */ diff --git a/src/drivers/mp_common/sink_common.c b/src/drivers/mp_common/sink_common.c new file mode 100644 index 0000000..a8b26bf --- /dev/null +++ b/src/drivers/mp_common/sink_common.c @@ -0,0 +1,924 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Thibaut Lambert + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "sink_common.h" + +/* Return the sink kind of the running process, based on the value of the + * STARPU_SINK environment variable. + * If there is no valid value retrieved, return STARPU_INVALID_KIND + */ +static enum _starpu_mp_node_kind _starpu_sink_common_get_kind(void) +{ + /* Environment variable STARPU_SINK must be defined when running on sink + * side : let's use it to get the kind of node we're running on */ + char *node_kind = starpu_getenv("STARPU_SINK"); + STARPU_ASSERT(node_kind); + + if (!strcmp(node_kind, "STARPU_MPI_MS")) + return STARPU_NODE_MPI_SINK; + else if (!strcmp(node_kind, "STARPU_TCPIP_MS")) + return STARPU_NODE_TCPIP_SINK; + else + return STARPU_NODE_INVALID_KIND; +} + +/* + Send to host the number of cores of the sink device +*/ +static void _starpu_sink_common_get_nb_cores(struct _starpu_mp_node *node) +{ + // Process packet received from `_starpu_src_common_sink_cores'. + _starpu_mp_common_send_command(node, STARPU_MP_COMMAND_ANSWER_SINK_NBCORES, &node->nb_cores, sizeof(int)); +} + +/* Send to host the address of the function given in parameter + */ +static void _starpu_sink_common_lookup(const struct _starpu_mp_node *node, char *func_name) +{ + void (*func)(void); + func = node->lookup(node,func_name); + + //_STARPU_DEBUG("Looked up %s, got %p\n", func_name, func); + + /* If we couldn't find the function, let's send an error to the host. + * The user probably made a mistake in the name */ + if (func) + _starpu_mp_common_send_command(node, STARPU_MP_COMMAND_ANSWER_LOOKUP, &func, sizeof(func)); + else + _starpu_mp_common_send_command(node, STARPU_MP_COMMAND_ERROR_LOOKUP, NULL, 0); +} + +/* CPU version of sink lookup */ +void (*_starpu_sink_common_cpu_lookup(const struct _starpu_mp_node * node STARPU_ATTRIBUTE_UNUSED, char* func_name))(void) +{ +#ifdef RTLD_DEFAULT + return dlsym(RTLD_DEFAULT, func_name); +#else + void *dl_handle = dlopen(NULL, RTLD_NOW); + return dlsym(dl_handle, func_name); +#endif +} + +/* Allocate a memory space and send the address of this space to the host + */ +void _starpu_sink_common_allocate(const struct _starpu_mp_node *mp_node, void *arg, int arg_size) +{ + STARPU_ASSERT(arg_size == sizeof(size_t)); + + void *addr; + _STARPU_MALLOC(addr, *(size_t *)(arg)); + + /* If the allocation fail, let's send an error to the host. + */ + if (addr) + _starpu_mp_common_send_command(mp_node, STARPU_MP_COMMAND_ANSWER_ALLOCATE, &addr, sizeof(addr)); + else + _starpu_mp_common_send_command(mp_node, STARPU_MP_COMMAND_ERROR_ALLOCATE, NULL, 0); +} + +void _starpu_sink_common_free(const struct _starpu_mp_node *mp_node STARPU_ATTRIBUTE_UNUSED, void *arg, int arg_size) +{ + STARPU_ASSERT(arg_size == sizeof(void *)); + + free(*(void **)(arg)); +} + +/* Map a memory space and send the address of this space to the host + */ +void _starpu_sink_common_map(const struct _starpu_mp_node *mp_node, void *arg, int arg_size) +{ + STARPU_ASSERT((unsigned int)arg_size >= sizeof(struct _starpu_mp_transfer_map_command)); + + struct _starpu_mp_transfer_map_command *map_cmd = (struct _starpu_mp_transfer_map_command *)arg; + + void *map_addr = _starpu_sink_map(map_cmd->fd_name, map_cmd->offset, map_cmd->size); + + /* If mapping fail, let's send an error to the host. + */ + if (map_addr) + _starpu_mp_common_send_command(mp_node, STARPU_MP_COMMAND_ANSWER_MAP, &map_addr, sizeof(map_addr)); + else + _starpu_mp_common_send_command(mp_node, STARPU_MP_COMMAND_ERROR_MAP, NULL, 0); +} + +void _starpu_sink_common_unmap(const struct _starpu_mp_node *mp_node STARPU_ATTRIBUTE_UNUSED, void *arg, int arg_size) +{ + STARPU_ASSERT(arg_size == sizeof(struct _starpu_mp_transfer_unmap_command)); + + struct _starpu_mp_transfer_unmap_command *unmap_cmd = (struct _starpu_mp_transfer_unmap_command *)arg; + + _starpu_sink_unmap(unmap_cmd->addr, unmap_cmd->size); +} + +static void _starpu_sink_common_copy_from_host_sync(const struct _starpu_mp_node *mp_node, void *arg, int arg_size) +{ + STARPU_ASSERT(arg_size == sizeof(struct _starpu_mp_transfer_command)); + + struct _starpu_mp_transfer_command *cmd = (struct _starpu_mp_transfer_command *)arg; + + mp_node->dt_recv(mp_node, cmd->addr, cmd->size, NULL); +} + +static void _starpu_sink_common_copy_from_host_async(struct _starpu_mp_node *mp_node, void *arg, int arg_size) +{ + STARPU_ASSERT(arg_size == sizeof(struct _starpu_mp_transfer_command)); + + struct _starpu_mp_transfer_command *cmd = (struct _starpu_mp_transfer_command *)arg; + + /* For asynchronous transfers, we store events to test them later when they are finished */ + struct _starpu_mp_event * sink_event = _starpu_mp_event_new(); + /* Save the command to send */ + sink_event->answer_cmd = STARPU_MP_COMMAND_NOTIF_RECV_FROM_HOST_ASYNC_COMPLETED; + sink_event->remote_event = cmd->event; + + /* Set the sender (host) ready because we don't want to wait its ack */ + struct _starpu_async_channel * async_channel = &sink_event->event; + async_channel->node_ops = NULL; + async_channel->starpu_mp_common_finished_sender = -1; + async_channel->starpu_mp_common_finished_receiver = 0; + async_channel->polling_node_receiver = NULL; + async_channel->polling_node_sender = NULL; + + mp_node->dt_recv(mp_node, cmd->addr, cmd->size, &sink_event->event); + /* Push event on the list */ + _starpu_mp_event_list_push_back(&mp_node->event_list, sink_event); +} + +static void _starpu_sink_common_copy_to_host_sync(const struct _starpu_mp_node *mp_node, void *arg, int arg_size) +{ + STARPU_ASSERT(arg_size == sizeof(struct _starpu_mp_transfer_command)); + + struct _starpu_mp_transfer_command *cmd = (struct _starpu_mp_transfer_command *)arg; + + /* Save values before sending command to prevent the overwriting */ + size_t size = cmd->size; + void * addr = cmd->addr; + + _starpu_mp_common_send_command(mp_node, STARPU_MP_COMMAND_SEND_TO_HOST, NULL, 0); + + mp_node->dt_send(mp_node, addr, size, NULL); +} + +static void _starpu_sink_common_copy_to_host_async(struct _starpu_mp_node *mp_node, void *arg, int arg_size) +{ + STARPU_ASSERT(arg_size == sizeof(struct _starpu_mp_transfer_command)); + + struct _starpu_mp_transfer_command *cmd = (struct _starpu_mp_transfer_command *)arg; + + /* For asynchronous transfers, we need to say dt_send that we are in async mode + * but we don't push event on list because we don't need to know if it's finished + */ + struct _starpu_mp_event * sink_event = _starpu_mp_event_new(); + /* Save the command to send */ + sink_event->answer_cmd = STARPU_MP_COMMAND_NOTIF_SEND_TO_HOST_ASYNC_COMPLETED; + sink_event->remote_event = cmd->event; + + /* Set the receiver (host) ready because we don't want to wait its ack */ + struct _starpu_async_channel * async_channel = &sink_event->event; + async_channel->node_ops = NULL; + async_channel->starpu_mp_common_finished_sender = 0; + async_channel->starpu_mp_common_finished_receiver = -1; + async_channel->polling_node_receiver = NULL; + async_channel->polling_node_sender = NULL; + + mp_node->dt_send(mp_node, cmd->addr, cmd->size, &sink_event->event); + /* Push event on the list */ + _starpu_mp_event_list_push_back(&mp_node->event_list, sink_event); +} + +static void _starpu_sink_common_copy_from_sink_sync(const struct _starpu_mp_node *mp_node, void *arg, int arg_size) +{ + STARPU_ASSERT(arg_size == offsetof(struct _starpu_mp_transfer_command_to_device, end)); + + struct _starpu_mp_transfer_command_to_device *cmd = (struct _starpu_mp_transfer_command_to_device *)arg; + + mp_node->dt_recv_from_device(mp_node, cmd->devid, cmd->addr, cmd->size, NULL); + _starpu_mp_common_send_command(mp_node, STARPU_MP_COMMAND_ANSWER_TRANSFER_COMPLETE, NULL, 0); +} + +static void _starpu_sink_common_copy_from_sink_async(struct _starpu_mp_node *mp_node, void *arg, int arg_size) +{ + STARPU_ASSERT(arg_size == offsetof(struct _starpu_mp_transfer_command_to_device, end)); + + struct _starpu_mp_transfer_command_to_device *cmd = (struct _starpu_mp_transfer_command_to_device *)arg; + + /* For asynchronous transfers, we store events to test them later when they are finished + */ + struct _starpu_mp_event * sink_event = _starpu_mp_event_new(); + /* Save the command to send */ + sink_event->answer_cmd = STARPU_MP_COMMAND_NOTIF_RECV_FROM_SINK_ASYNC_COMPLETED; + sink_event->remote_event = cmd->event; + + /* Set the sender ready because we don't want to wait its ack */ + struct _starpu_async_channel * async_channel = &sink_event->event; + async_channel->node_ops = NULL; + async_channel->starpu_mp_common_finished_sender = -1; + async_channel->starpu_mp_common_finished_receiver = 0; + async_channel->polling_node_receiver = NULL; + async_channel->polling_node_sender = NULL; + + mp_node->dt_recv_from_device(mp_node, cmd->devid, cmd->addr, cmd->size, &sink_event->event); + /* Push event on the list */ + _starpu_mp_event_list_push_back(&mp_node->event_list, sink_event); +} + +static void _starpu_sink_common_copy_to_sink_sync(const struct _starpu_mp_node *mp_node, void *arg, int arg_size) +{ + STARPU_ASSERT(arg_size == offsetof(struct _starpu_mp_transfer_command_to_device, end)); + + struct _starpu_mp_transfer_command_to_device *cmd = (struct _starpu_mp_transfer_command_to_device *)arg; + + mp_node->dt_send_to_device(mp_node, cmd->devid, cmd->addr, cmd->size, NULL); +} + +static void _starpu_sink_common_copy_to_sink_async(struct _starpu_mp_node *mp_node, void *arg, int arg_size) +{ + STARPU_ASSERT(arg_size == offsetof(struct _starpu_mp_transfer_command_to_device, end)); + + struct _starpu_mp_transfer_command_to_device *cmd = (struct _starpu_mp_transfer_command_to_device *)arg; + + /* For asynchronous transfers, we need to say dt_send that we are in async mode + * but we don't push event on list because we don't need to know if it's finished + */ + struct _starpu_mp_event * sink_event = _starpu_mp_event_new(); + /* Save the command to send */ + sink_event->answer_cmd = STARPU_MP_COMMAND_NOTIF_SEND_TO_SINK_ASYNC_COMPLETED; + sink_event->remote_event = cmd->event; + + /* Set the receiver ready because we don't want to wait its ack */ + struct _starpu_async_channel * async_channel = &sink_event->event; + async_channel->node_ops = NULL; + async_channel->starpu_mp_common_finished_sender = 0; + async_channel->starpu_mp_common_finished_receiver = -1; + async_channel->polling_node_receiver = NULL; + async_channel->polling_node_sender = NULL; + + mp_node->dt_send_to_device(mp_node, cmd->devid, cmd->addr, cmd->size, &sink_event->event); + + /* Push event on the list */ + _starpu_mp_event_list_push_back(&mp_node->event_list, sink_event); +} + +/* Receive workers and combined workers and store them into the struct config + */ +static void _starpu_sink_common_recv_workers(struct _starpu_mp_node * node, void *arg, int arg_size) +{ + /* Retrieve information from the message */ + STARPU_ASSERT(arg_size == (sizeof(int)*5)); + uintptr_t arg_ptr = (uintptr_t) arg; + int i; + + int nworkers = *(int *)arg_ptr; + arg_ptr += sizeof(nworkers); + + int worker_size = *(int *)arg_ptr; + arg_ptr += sizeof(worker_size); + + int combined_worker_size = *(int *)arg_ptr; + arg_ptr += sizeof(combined_worker_size); + + int baseworkerid = *(int *)arg_ptr; + arg_ptr += sizeof(baseworkerid); + + /* Clear data we won't use */ + struct _starpu_machine_config *config = _starpu_get_machine_config(); + for(i=0; itopology.nworkers; i++) + { + free(config->workers[i].perf_arch.devices); + config->workers[i].perf_arch.devices = NULL; + } + + config->topology.nworkers = *(int *)arg_ptr; + + /* Retrieve workers */ + struct _starpu_worker * workers = &config->workers[baseworkerid]; + node->dt_recv(node,workers,worker_size, NULL); + + /* Update workers to have coherent field */ + for(i=0; icombined_workers; + node->dt_recv(node, combined_workers, combined_worker_size, NULL); + + node->baseworkerid = baseworkerid; + STARPU_PTHREAD_BARRIER_WAIT(&node->init_completed_barrier); +} + +/* Function looping on the sink, waiting for tasks to execute. + * If the caller is the host, don't do anything. + */ +void _starpu_sink_common_worker(void) +{ + struct _starpu_mp_node *node = NULL; + enum _starpu_mp_command command; + int arg_size = 0; + void *arg = NULL; + int exit_starpu = 0; + enum _starpu_mp_node_kind node_kind = _starpu_sink_common_get_kind(); + + if (node_kind == STARPU_NODE_INVALID_KIND) + _STARPU_ERROR("No valid sink kind retrieved, use the STARPU_SINK environment variable to specify this\n"); + + /* Create and initialize the node */ + node = _starpu_mp_common_node_create(node_kind, -1); + + starpu_pthread_key_t worker_key; + STARPU_PTHREAD_KEY_CREATE(&worker_key, NULL); + + while (!exit_starpu) + { + /* Wait send/recv is ready */ + if (node->mp_wait) + node->mp_wait(node); + + /* If we have received a message */ + if(node->mp_recv_is_ready(node)) + { + command = _starpu_mp_common_recv_command(node, &arg, &arg_size); + switch(command) + { + case STARPU_MP_COMMAND_EXIT: + exit_starpu = 1; + break; + case STARPU_MP_COMMAND_EXECUTE_DETACHED: + case STARPU_MP_COMMAND_EXECUTE: + node->execute(node, arg, arg_size); + break; + case STARPU_MP_COMMAND_SINK_NBCORES: + _starpu_sink_common_get_nb_cores(node); + break; + case STARPU_MP_COMMAND_LOOKUP: + _starpu_sink_common_lookup(node, (char *) arg); + break; + + case STARPU_MP_COMMAND_ALLOCATE: + node->allocate(node, arg, arg_size); + break; + + case STARPU_MP_COMMAND_FREE: + node->free(node, arg, arg_size); + break; + + case STARPU_MP_COMMAND_MAP: + node->map(node, arg, arg_size); + break; + + case STARPU_MP_COMMAND_UNMAP: + node->unmap(node, arg, arg_size); + break; + + case STARPU_MP_COMMAND_RECV_FROM_HOST: + _starpu_sink_common_copy_from_host_sync(node, arg, arg_size); + break; + + case STARPU_MP_COMMAND_SEND_TO_HOST: + _starpu_sink_common_copy_to_host_sync(node, arg, arg_size); + break; + + case STARPU_MP_COMMAND_RECV_FROM_SINK: + _starpu_sink_common_copy_from_sink_sync(node, arg, arg_size); + break; + + case STARPU_MP_COMMAND_SEND_TO_SINK: + _starpu_sink_common_copy_to_sink_sync(node, arg, arg_size); + break; + + case STARPU_MP_COMMAND_RECV_FROM_HOST_ASYNC: + _starpu_sink_common_copy_from_host_async(node, arg, arg_size); + break; + + case STARPU_MP_COMMAND_SEND_TO_HOST_ASYNC: + _starpu_sink_common_copy_to_host_async(node, arg, arg_size); + break; + + case STARPU_MP_COMMAND_RECV_FROM_SINK_ASYNC: + _starpu_sink_common_copy_from_sink_async(node, arg, arg_size); + break; + + case STARPU_MP_COMMAND_SEND_TO_SINK_ASYNC: + _starpu_sink_common_copy_to_sink_async(node, arg, arg_size); + break; + + case STARPU_MP_COMMAND_SYNC_WORKERS: + _starpu_sink_common_recv_workers(node, arg, arg_size); + break; + default: + _STARPU_MSG("Oops, command %x unrecognized\n", command); + } + } + + STARPU_PTHREAD_MUTEX_LOCK(&node->message_queue_mutex); + /* If the list is not empty and we can send a notification */ + while(!mp_message_list_empty(&node->message_queue) && node->nt_send_is_ready(node)) + { + /* We pop a message and send it to the host */ + struct mp_message * message = mp_message_list_pop_back(&node->message_queue); + STARPU_PTHREAD_MUTEX_UNLOCK(&node->message_queue_mutex); + //_STARPU_DEBUG("telling host that we have finished the task %p sur %d.\n", task->kernel, task->coreid); + STARPU_ASSERT(message->type >= STARPU_MP_COMMAND_NOTIF_FIRST && message->type <= STARPU_MP_COMMAND_NOTIF_LAST); + _starpu_nt_common_send_command(node, message->type, message->buffer, message->size); + free(message->buffer); + mp_message_delete(message); + STARPU_PTHREAD_MUTEX_LOCK(&node->message_queue_mutex); + } + STARPU_PTHREAD_MUTEX_UNLOCK(&node->message_queue_mutex); + + struct _starpu_mp_event * sink_event; + struct _starpu_mp_event * sink_event_next; + + for (sink_event = _starpu_mp_event_list_begin(&node->event_list); + sink_event != _starpu_mp_event_list_end(&node->event_list); + sink_event = sink_event_next) + { + sink_event_next = _starpu_mp_event_list_next(sink_event); + + /*if event is completed move it into event queue*/ + if(node->dt_test(&sink_event->event)) + { + _starpu_mp_event_list_erase(&node->event_list, sink_event); + _starpu_mp_event_list_push_front(&node->event_queue, sink_event); + } + } + + /*if the list is not empty and we can send a notification*/ + while(!_starpu_mp_event_list_empty(&node->event_queue) && node->nt_send_is_ready(node)) + { + struct _starpu_mp_event * sink_event_completed = _starpu_mp_event_list_pop_back(&node->event_queue); + /* send ACK to host */ + STARPU_ASSERT(sink_event_completed->answer_cmd >= STARPU_MP_COMMAND_NOTIF_FIRST && sink_event_completed->answer_cmd <= STARPU_MP_COMMAND_NOTIF_LAST); + _starpu_nt_common_send_command(node, sink_event_completed->answer_cmd, &sink_event_completed->remote_event, sizeof(sink_event_completed->remote_event)); + + _starpu_mp_event_delete(sink_event_completed); + } + } + + STARPU_PTHREAD_KEY_DELETE(worker_key); + + /* Deinitialize the node and release it */ + _starpu_mp_common_node_destroy(node); + + starpu_perfmodel_free_sampling(); + _starpu_profiling_terminate(); + _starpu_perf_knob_exit(); + _starpu_perf_counter_exit(); + + _starpu_destroy_machine_config(&_starpu_config, 1); + + free((char*) _starpu_config.conf.sched_policy_name); + if (_starpu_config.conf.n_cuda_opengl_interoperability) + free(_starpu_config.conf.cuda_opengl_interoperability); + if (_starpu_config.conf.n_not_launched_drivers) + free(_starpu_config.conf.not_launched_drivers); + +#ifdef STARPU_USE_MPI_MASTER_SLAVE + _starpu_mpi_common_mp_deinit(); +#endif +#ifdef STARPU_USE_TCPIP_MASTER_SLAVE + _starpu_tcpip_common_mp_deinit(); +#endif + + exit(0); +} + +/* Search for the mp_barrier correspondind to the specified combined worker + * and create it if it doesn't exist + */ +static struct mp_barrier * _starpu_sink_common_get_barrier(struct _starpu_mp_node * node, int cb_workerid, int cb_workersize) +{ + struct mp_barrier * b = NULL; + STARPU_PTHREAD_MUTEX_LOCK(&node->barrier_mutex); + /* Search if the barrier already exist */ + for(b = mp_barrier_list_begin(&node->barrier_list); + b != mp_barrier_list_end(&node->barrier_list) && b->id != cb_workerid; + b = mp_barrier_list_next(b)); + + /* If we found the barrier */ + if(b != NULL) + { + STARPU_PTHREAD_MUTEX_UNLOCK(&node->barrier_mutex); + return b; + } + else + { + + /* Else we create, initialize and add it to the list*/ + b = mp_barrier_new(); + b->id = cb_workerid; + STARPU_PTHREAD_BARRIER_INIT(&b->before_work_barrier,NULL,cb_workersize); + STARPU_PTHREAD_BARRIER_INIT(&b->after_work_barrier,NULL,cb_workersize); + mp_barrier_list_push_back(&node->barrier_list,b); + STARPU_PTHREAD_MUTEX_UNLOCK(&node->barrier_mutex); + return b; + } +} + +/* Erase for the mp_barrier correspondind to the specified combined worker +*/ +static void _starpu_sink_common_erase_barrier(struct _starpu_mp_node * node, struct mp_barrier *barrier) +{ + STARPU_PTHREAD_MUTEX_LOCK(&node->barrier_mutex); + mp_barrier_list_erase(&node->barrier_list,barrier); + STARPU_PTHREAD_MUTEX_UNLOCK(&node->barrier_mutex); +} + +/* Append the message given in parameter to the message list + */ +static void _starpu_sink_common_append_message(struct _starpu_mp_node *node, struct mp_message * message) +{ + STARPU_PTHREAD_MUTEX_LOCK(&node->message_queue_mutex); + mp_message_list_push_front(&node->message_queue,message); + STARPU_PTHREAD_MUTEX_UNLOCK(&node->message_queue_mutex); + /* Send the signal that message is in message_queue */ + if(node->mp_signal) + { + node->mp_signal(node); + } +} + +/* Append to the message list a "STARPU_PRE_EXECUTION" message + */ +static void _starpu_sink_common_pre_execution_message(struct _starpu_mp_node *node, struct mp_task *task) +{ + /* Init message to tell the sink that the execution has begun */ + struct mp_message * message = mp_message_new(); + message->type = STARPU_MP_COMMAND_NOTIF_PRE_EXECUTION; + _STARPU_MALLOC(message->buffer, sizeof(int)); + *(int *) message->buffer = task->combined_workerid; + message->size = sizeof(int); + + /* Append the message to the queue */ + _starpu_sink_common_append_message(node, message); +} + +/* Append to the message list a "STARPU_EXECUTION_COMPLETED" message and cl_ret + */ +static void _starpu_sink_common_execution_completed_message(struct _starpu_mp_node *node, struct mp_task *task) +{ + /* Init message to tell the sink that the execution is completed */ + struct mp_message * message = mp_message_new(); + if (task->detached) + message->type = STARPU_MP_COMMAND_NOTIF_EXECUTION_DETACHED_COMPLETED; + else + message->type = STARPU_MP_COMMAND_NOTIF_EXECUTION_COMPLETED; + + message->size = sizeof(int); + + /* If the user didn't give any cl_ret, there is no need to send it */ + if (task->cl_ret) + { + STARPU_ASSERT(task->cl_ret_size); + message->size += task->cl_ret_size; + } + + _STARPU_MALLOC(message->buffer, message->size); + + *(int*) message->buffer = task->coreid; + + if (task->cl_ret) + memcpy(message->buffer+sizeof(int), task->cl_ret, task->cl_ret_size); + + /* Append the message to the queue */ + _starpu_sink_common_append_message(node, message); +} + +/* Bind the thread which is running on the specified core to the combined worker */ +static void _starpu_sink_common_bind_to_combined_worker(struct _starpu_mp_node *node, int coreid, struct _starpu_combined_worker * combined_worker) +{ + int i; + int * bind_set; + _STARPU_MALLOC(bind_set, sizeof(int)*combined_worker->worker_size); + for(i=0;iworker_size;i++) + bind_set[i] = combined_worker->combined_workerid[i] - node->baseworkerid; + node->bind_thread(node, coreid, bind_set, combined_worker->worker_size); +} + +/* Get the current rank of the worker in the combined worker + */ +static int _starpu_sink_common_get_current_rank(int workerid, struct _starpu_combined_worker * combined_worker) +{ + int i; + for(i=0; iworker_size; i++) + if(workerid == combined_worker->combined_workerid[i]) + return i; + + STARPU_ASSERT(0); + return -1; +} + +/* Execute the task + */ +static void _starpu_sink_common_execute_kernel(struct _starpu_mp_node *node, int coreid, struct _starpu_worker * worker, int detached) +{ + struct _starpu_combined_worker * combined_worker = NULL; + struct mp_task* task; + if (detached) + task = node->run_table_detached[coreid]; + else + task = node->run_table[coreid]; + + /* If it's a parallel task */ + if(task->is_parallel_task) + { + combined_worker = _starpu_get_combined_worker_struct(task->combined_workerid); + + worker->current_rank = _starpu_sink_common_get_current_rank(worker->workerid, combined_worker); + worker->combined_workerid = task->combined_workerid; + worker->worker_size = combined_worker->worker_size; + + /* Synchronize with others threads of the combined worker*/ + STARPU_PTHREAD_BARRIER_WAIT(&task->mp_barrier->before_work_barrier); + + /* The first thread of the combined worker */ + if(worker->current_rank == 0) + { + /* tell the sink that the execution has begun */ + _starpu_sink_common_pre_execution_message(node,task); + + /* If the mode is FORKJOIN, + * the first thread binds himself + * on all core of the combined worker*/ + if(task->type == STARPU_FORKJOIN) + { + _starpu_sink_common_bind_to_combined_worker(node, coreid, combined_worker); + } + } + } + else + { + worker->current_rank = 0; + worker->combined_workerid = 0; + worker->worker_size = 1; + } + + if(task->type != STARPU_FORKJOIN || worker->current_rank == 0) + { + if (_starpu_get_disable_kernels() <= 0) + { + struct starpu_task s_task; + starpu_task_init(&s_task); + + /*copy cl_arg and cl_arg_size from mp_task into starpu_task*/ + s_task.cl_arg=task->cl_arg; + s_task.cl_arg_size=task->cl_arg_size; + + _starpu_set_current_task(&s_task); + /* execute the task */ + task->kernel(task->interfaces,task->cl_arg); + _starpu_set_current_task(NULL); + + /*copy cl_ret and cl_ret_size from starpu_task into mp_task*/ + task->cl_ret=s_task.cl_ret; + task->cl_ret_size=s_task.cl_ret_size; + } + } + + /* If it's a parallel task */ + if(task->is_parallel_task) + { + /* Synchronize with others threads of the combined worker*/ + STARPU_PTHREAD_BARRIER_WAIT(&task->mp_barrier->after_work_barrier); + + /* The first thread of the combined */ + if(worker->current_rank == 0) + { + /* Erase the barrier from the list */ + _starpu_sink_common_erase_barrier(node,task->mp_barrier); + + /* If the mode is FORKJOIN, + * the first thread rebinds himself on his own core */ + if(task->type == STARPU_FORKJOIN) + node->bind_thread(node, coreid, &coreid, 1); + + } + } + + if (detached) + node->run_table_detached[coreid] = NULL; + else + node->run_table[coreid] = NULL; + + /* tell the sink that the execution is completed */ + _starpu_sink_common_execution_completed_message(node,task); + + /*free the task*/ + unsigned i; + for (i = 0; i < task->nb_interfaces; i++) + { + struct starpu_data_interface_ops *ops = _starpu_data_interface_get_ops(task->ids[i]); + if (ops->free_meta) + { + ops->free_meta(task->interfaces[i]); + } + free(task->interfaces[i]); + } + free(task->interfaces); + free(task->ids); + if (task->cl_arg != NULL) + free(task->cl_arg); + free(task); +} + +/* The main function executed by the thread + * thread_arg is a structure containing the information needed by the thread + */ +void* _starpu_sink_thread(void * thread_arg) +{ + /* Retrieve the information from the structure */ + struct _starpu_mp_node *node = ((struct arg_sink_thread *)thread_arg)->node; + int coreid =((struct arg_sink_thread *)thread_arg)->coreid; + /* free the structure */ + free(thread_arg); + + STARPU_PTHREAD_BARRIER_WAIT(&node->init_completed_barrier); + + struct _starpu_worker *worker = &_starpu_get_machine_config()->workers[node->baseworkerid + coreid]; + char *s; + asprintf(&s, "slave %d core %d", node->baseworkerid, coreid); + starpu_pthread_setname(s); + free(s); + + node->bind_thread(node, coreid, &coreid, 1); + + _starpu_set_local_worker_key(worker); + while(node->is_running) + { + /*Wait there is a task available */ + sem_wait(&node->sem_run_table[coreid]); + + STARPU_ASSERT((node->run_table_detached[coreid]!=NULL) || (node->run_table[coreid]!=NULL) || node->is_running==0); + + if (node->run_table_detached[coreid] != NULL) + _starpu_sink_common_execute_kernel(node, coreid, worker, 1); + else if (node->run_table[coreid] != NULL) + _starpu_sink_common_execute_kernel(node, coreid, worker, 0); + else + STARPU_ASSERT(!node->is_running); + + } + starpu_pthread_exit(NULL); +} + +/* Add the task to the specific thread and wake him up +*/ +static void _starpu_sink_common_execute_thread(struct _starpu_mp_node *node, struct mp_task *task) +{ + int detached = task->detached; + /* Add the task to the specific thread */ + if (detached) + { + STARPU_ASSERT(!node->run_table_detached[task->coreid]); + node->run_table_detached[task->coreid] = task; + } + else + { + STARPU_ASSERT(!node->run_table[task->coreid]); + node->run_table[task->coreid] = task; + } + /* Unlock the mutex to wake up the thread which will execute the task */ + sem_post(&node->sem_run_table[task->coreid]); +} + +/* Receive paquet from _starpu_src_common_execute_kernel in the form below : + * [Function pointer on sink, number of interfaces, interfaces + * (union _starpu_interface), cl_arg] + * Then call the function given, passing as argument an array containing the + * addresses of the received interfaces + */ + +void _starpu_sink_common_execute(struct _starpu_mp_node *node, void *arg, int arg_size) +{ + unsigned i; + + uintptr_t arg_ptr = (uintptr_t) arg; + struct mp_task *task; + + _STARPU_CALLOC(task, 1, sizeof(struct mp_task)); + task->kernel = *(void(**)(void **, void *)) arg_ptr; + arg_ptr += sizeof(task->kernel); + + task->type = *(enum starpu_codelet_type *) arg_ptr; + arg_ptr += sizeof(task->type); + + task->is_parallel_task = *(int *) arg_ptr; + arg_ptr += sizeof(task->is_parallel_task); + + if(task->is_parallel_task) + { + task->combined_workerid= *(int *) arg_ptr; + arg_ptr += sizeof(task->combined_workerid); + + task->mp_barrier = _starpu_sink_common_get_barrier(node,task->combined_workerid,_starpu_get_combined_worker_struct(task->combined_workerid)->worker_size); + } + + task->coreid = *(unsigned *) arg_ptr; + arg_ptr += sizeof(task->coreid); + + task->nb_interfaces = *(unsigned *) arg_ptr; + arg_ptr += sizeof(task->nb_interfaces); + + task->detached = *(int *) arg_ptr; + arg_ptr += sizeof(task->detached); + + _STARPU_MALLOC(task->interfaces, task->nb_interfaces * sizeof(*task->interfaces)); + _STARPU_MALLOC(task->ids, task->nb_interfaces * sizeof(*task->ids)); + + /* The function needs an array pointing to each interface it needs + * during execution. The interface is first identified by its + * id, which will indicate if this is a basic interface or if + * it needs to be unpacked through unpack_meta + */ + for (i = 0; i < task->nb_interfaces; i++) + { + // first extract the interface id + memcpy(&(task->ids[i]), (void *)arg_ptr, sizeof(task->ids[i])); + arg_ptr += sizeof(task->ids[i]); + + // and then the interface + struct starpu_data_interface_ops *ops = _starpu_data_interface_get_ops(task->ids[i]); + if (ops->unpack_meta) + { + STARPU_ASSERT_MSG(ops->pack_meta, "unpack_meta defined without pack_meta for interface %d", task->ids[i]); + starpu_ssize_t count; + ops->unpack_meta(&task->interfaces[i], (void*) arg_ptr, &count); + arg_ptr += count; + } + else + { + union _starpu_interface *interface; + _STARPU_MALLOC(interface, sizeof(union _starpu_interface)); + memcpy(interface, (void*) arg_ptr, sizeof(union _starpu_interface)); + task->interfaces[i] = interface; + arg_ptr += sizeof(union _starpu_interface); + } + } + + /* Was cl_arg sent ? */ + if (arg_size > arg_ptr - (uintptr_t) arg) + { + /* Copy cl_arg to prevent overwriting by an other task */ + unsigned cl_arg_size = arg_size - (arg_ptr - (uintptr_t) arg); + _STARPU_MALLOC(task->cl_arg, cl_arg_size); + memcpy(task->cl_arg, (void *) arg_ptr, cl_arg_size); + task->cl_arg_size=cl_arg_size; + } + else + task->cl_arg = NULL; + + //_STARPU_DEBUG("telling host that we have submitted the task %p.\n", task->kernel); + if (task->detached) + _starpu_mp_common_send_command(node, STARPU_MP_COMMAND_ANSWER_EXECUTION_DETACHED_SUBMITTED, NULL, 0); + else + _starpu_mp_common_send_command(node, STARPU_MP_COMMAND_ANSWER_EXECUTION_SUBMITTED, NULL, 0); + + //_STARPU_DEBUG("executing the task %p\n", task->kernel); + _starpu_sink_common_execute_thread(node, task); +} diff --git a/src/drivers/mp_common/sink_common.h b/src/drivers/mp_common/sink_common.h new file mode 100644 index 0000000..bc90efb --- /dev/null +++ b/src/drivers/mp_common/sink_common.h @@ -0,0 +1,64 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Thibaut Lambert + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + + +#ifndef __SINK_COMMON_H__ +#define __SINK_COMMON_H__ + +/** @file */ + +#include + +#ifdef STARPU_USE_MP + +#include + +#pragma GCC visibility push(hidden) + +/** Represent the topology of sink devices, contains useful information about + * their capabilities + * XXX: unused. + */ +struct _starpu_sink_topology +{ + unsigned nb_cpus; +}; + +struct arg_sink_thread +{ + struct _starpu_mp_node *node; + int coreid; +}; + +void _starpu_sink_common_worker(void); + +void (*_starpu_sink_common_cpu_lookup (const struct _starpu_mp_node * node STARPU_ATTRIBUTE_UNUSED, char* func_name))(void); +void _starpu_sink_common_execute(struct _starpu_mp_node *node, void *arg, int arg_size); + +void _starpu_sink_common_allocate(const struct _starpu_mp_node *mp_node, void *arg, int arg_size); +void _starpu_sink_common_free(const struct _starpu_mp_node *mp_node STARPU_ATTRIBUTE_UNUSED, void *arg, int arg_size); + +void _starpu_sink_common_map(const struct _starpu_mp_node *mp_node, void *arg, int arg_size); +void _starpu_sink_common_unmap(const struct _starpu_mp_node *mp_node STARPU_ATTRIBUTE_UNUSED, void *arg, int arg_size); + +void* _starpu_sink_thread(void * thread_arg); + +#pragma GCC visibility pop + +#endif /* STARPU_USE_MP */ + +#endif /* __SINK_COMMON_H__ */ diff --git a/src/drivers/mp_common/source_common.c b/src/drivers/mp_common/source_common.c new file mode 100644 index 0000000..23eb847 --- /dev/null +++ b/src/drivers/mp_common/source_common.c @@ -0,0 +1,1293 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Thibaut Lambert + * Copyright (C) 2021-2021 Federal University of Rio Grande do Sul (UFRGS) + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include + +struct starpu_save_thread_env +{ + struct starpu_task * current_task; + struct _starpu_worker * current_worker; + struct _starpu_worker_set * current_worker_set; +#ifdef STARPU_OPENMP + struct starpu_omp_thread * current_omp_thread; + struct starpu_omp_task * current_omp_task; +#endif +}; + +#ifdef STARPU_USE_MPI_MASTER_SLAVE +struct starpu_save_thread_env save_thread_env[STARPU_MAXMPIDEVS]; +struct _starpu_mp_node *_starpu_src_nodes[STARPU_NARCH][STARPU_MAXMPIDEVS]; +#endif + +#ifdef STARPU_USE_TCPIP_MASTER_SLAVE +struct starpu_save_thread_env save_thread_env[STARPU_MAXTCPIPDEVS]; +struct _starpu_mp_node *_starpu_src_nodes[STARPU_NARCH][STARPU_MAXTCPIPDEVS]; +#endif + +/* Mutex for concurrent access to the table. + */ +static starpu_pthread_mutex_t htbl_mutex = STARPU_PTHREAD_MUTEX_INITIALIZER; + +/* Structure used by host to store information about a kernel executable on + * a MPI MS device : its name, and its address on each device. + * If a kernel has been initialized, then a lookup has already been achieved and the + * device knows how to call it, else the host still needs to do a lookup. + */ +static struct _starpu_sink_kernel +{ + UT_hash_handle hh; + char *name; + starpu_cpu_func_t func[]; +} *kernels[STARPU_NARCH]; + +static unsigned mp_node_memory_node(struct _starpu_mp_node *node) +{ + return starpu_worker_get_memory_node(node->baseworkerid); +} + +void _starpu_src_common_deinit(void) +{ + enum starpu_worker_archtype arch; + + for (arch = 0; arch < STARPU_NARCH; arch++) + { + struct _starpu_sink_kernel *entry, *tmp; + + HASH_ITER(hh, kernels[arch], entry, tmp) + { + HASH_DEL(kernels[arch], entry); + free(entry->name); + free(entry); + } + } +} + +/* Finalize the execution of a task by a worker*/ +static int _starpu_src_common_finalize_job(struct _starpu_job *j, struct _starpu_worker *worker) +{ + int profiling = starpu_profiling_status_get(); + _starpu_driver_end_job(worker, j, &worker->perf_arch, 0, profiling); + + int count = worker->current_rank; + + /* If it's a combined worker, we check if it's the last one of his combined */ + if(j->task_size > 1) + { + struct _starpu_combined_worker * cb_worker = _starpu_get_combined_worker_struct(worker->combined_workerid); + (void) STARPU_ATOMIC_ADD(&j->after_work_busy_barrier, -1); + + STARPU_PTHREAD_MUTEX_LOCK(&cb_worker->count_mutex); + count = cb_worker->count--; + if(count == 0) + cb_worker->count = cb_worker->worker_size - 1; + STARPU_PTHREAD_MUTEX_UNLOCK(&cb_worker->count_mutex); + } + + /* Finalize the execution */ + if(count == 0) + { + _starpu_driver_update_job_feedback(j, worker, &worker->perf_arch, profiling); + + _starpu_push_task_output(j); + + _starpu_handle_job_termination(j); + } + return 0; +} + +/* Complete the execution of the job */ +static int _starpu_src_common_process_completed_job(struct _starpu_mp_node *node, struct _starpu_worker_set *workerset, void * arg, int arg_size, int stored) +{ + int coreid; + + uintptr_t arg_ptr = (uintptr_t) arg; + + coreid = *(int *) arg_ptr; + arg_ptr += sizeof(coreid); + + struct _starpu_worker *worker = &workerset->workers[coreid]; + struct _starpu_job *j = _starpu_get_job_associated_to_task(worker->current_task); + + struct starpu_task *task = j->task; + STARPU_ASSERT(task); + + struct _starpu_worker * old_worker = _starpu_get_local_worker_key(); + + /* Was cl_ret sent ? */ + if (arg_size > arg_ptr - (uintptr_t) arg) + { + /* Copy cl_ret into the task */ + unsigned cl_ret_size = arg_size - (arg_ptr - (uintptr_t) arg); + _STARPU_MALLOC(task->cl_ret, cl_ret_size); + memcpy(task->cl_ret, (void *) arg_ptr, cl_ret_size); + task->cl_ret_size=cl_ret_size; + } + else + task->cl_ret = NULL; + + /* if arg is not copied we release the mutex */ + if (!stored) + STARPU_PTHREAD_MUTEX_UNLOCK(&node->connection_mutex); + + _starpu_set_local_worker_key(worker); + _starpu_src_common_finalize_job(j, worker); + _starpu_set_local_worker_key(old_worker); + + worker->current_task = NULL; + + return 0; +} + +/* Tell the scheduler when the execution has begun */ +static void _starpu_src_common_pre_exec(struct _starpu_mp_node *node, void * arg, int arg_size, int stored) +{ + int cb_workerid, i; + STARPU_ASSERT(sizeof(cb_workerid) == arg_size); + cb_workerid = *(int *) arg; + struct _starpu_combined_worker *combined_worker = _starpu_get_combined_worker_struct(cb_workerid); + + /* if arg is not copied we release the mutex */ + if (!stored) + STARPU_PTHREAD_MUTEX_LOCK(&node->connection_mutex); + + for(i=0; i < combined_worker->worker_size; i++) + { + struct _starpu_worker * worker = _starpu_get_worker_struct(combined_worker->combined_workerid[i]); + _starpu_set_local_worker_key(worker); + _starpu_sched_pre_exec_hook(worker->current_task); + } +} + +/* recv a message and handle asynchronous message + * return 0 if the message has not been handle (it's certainly mean that it's a synchronous message) + * return 1 if the message has been handle + */ +static int _starpu_src_common_handle_async(struct _starpu_mp_node *node, void * arg, int arg_size, enum _starpu_mp_command answer, int stored) +{ + struct _starpu_worker_set * worker_set = NULL; + switch(answer) + { + case STARPU_MP_COMMAND_NOTIF_EXECUTION_COMPLETED: + { + worker_set = _starpu_get_worker_struct(starpu_worker_get_id())->set; + _starpu_src_common_process_completed_job(node, worker_set, arg, arg_size, stored); + break; + } + case STARPU_MP_COMMAND_NOTIF_EXECUTION_DETACHED_COMPLETED: + { + _STARPU_ERROR("Detached execution completed should not arrive here... \n"); + break; + } + case STARPU_MP_COMMAND_NOTIF_PRE_EXECUTION: + { + _starpu_src_common_pre_exec(node, arg,arg_size, stored); + break; + } + case STARPU_MP_COMMAND_NOTIF_RECV_FROM_HOST_ASYNC_COMPLETED: + case STARPU_MP_COMMAND_NOTIF_RECV_FROM_SINK_ASYNC_COMPLETED: + { + struct _starpu_async_channel * event = *((struct _starpu_async_channel **) arg); + event->starpu_mp_common_finished_receiver--; + if (!stored) + STARPU_PTHREAD_MUTEX_UNLOCK(&node->connection_mutex); + break; + } + case STARPU_MP_COMMAND_NOTIF_SEND_TO_HOST_ASYNC_COMPLETED: + case STARPU_MP_COMMAND_NOTIF_SEND_TO_SINK_ASYNC_COMPLETED: + { + struct _starpu_async_channel * event = *((struct _starpu_async_channel **) arg); + event->starpu_mp_common_finished_sender--; + if (!stored) + STARPU_PTHREAD_MUTEX_UNLOCK(&node->connection_mutex); + break; + } + default: + return 0; + break; + } + return 1; +} + +/* Handle all message which have been stored in the message_queue */ +static void _starpu_src_common_handle_stored_async(struct _starpu_mp_node *node) +{ + int stopped_progress = 0; + STARPU_PTHREAD_MUTEX_LOCK(&node->message_queue_mutex); + /* while the list is not empty */ + while(!mp_message_list_empty(&node->message_queue)) + { + /* We pop a message and handle it */ + struct mp_message * message = mp_message_list_pop_back(&node->message_queue); + /* Release mutex during handle */ + stopped_progress = 1; + _STARPU_TRACE_END_PROGRESS(mp_node_memory_node(node)); + STARPU_PTHREAD_MUTEX_UNLOCK(&node->message_queue_mutex); + _starpu_src_common_handle_async(node, message->buffer, message->size, message->type, 1); + free(message->buffer); + mp_message_delete(message); + /* Take it again */ + STARPU_PTHREAD_MUTEX_LOCK(&node->message_queue_mutex); + } + STARPU_PTHREAD_MUTEX_UNLOCK(&node->message_queue_mutex); + if (stopped_progress) + _STARPU_TRACE_START_PROGRESS(mp_node_memory_node(node)); +} + +/* Store a message if is asynchronous + * return 1 if the message has been stored + * return 0 if the message is unknown or synchrone */ +int _starpu_src_common_store_message(struct _starpu_mp_node *node, void * arg, int arg_size, enum _starpu_mp_command answer) +{ + switch(answer) + { + case STARPU_MP_COMMAND_NOTIF_EXECUTION_COMPLETED: + case STARPU_MP_COMMAND_NOTIF_EXECUTION_DETACHED_COMPLETED: + case STARPU_MP_COMMAND_NOTIF_PRE_EXECUTION: + { + struct mp_message *message = mp_message_new(); + message->type = answer; + _STARPU_MALLOC(message->buffer, arg_size); + memcpy(message->buffer, arg, arg_size); + message->size = arg_size; + + STARPU_PTHREAD_MUTEX_LOCK(&node->message_queue_mutex); + mp_message_list_push_front(&node->message_queue,message); + STARPU_PTHREAD_MUTEX_UNLOCK(&node->message_queue_mutex); + /* Send the signal that message is in message_queue */ + if(node->mp_signal) + { + node->mp_signal(node); + } + return 1; + } + /* For ASYNC commands don't store them, update event */ + case STARPU_MP_COMMAND_NOTIF_RECV_FROM_HOST_ASYNC_COMPLETED: + case STARPU_MP_COMMAND_NOTIF_RECV_FROM_SINK_ASYNC_COMPLETED: + { + struct _starpu_async_channel * event = *((struct _starpu_async_channel **) arg); + event->starpu_mp_common_finished_receiver--; + return 1; + } + case STARPU_MP_COMMAND_NOTIF_SEND_TO_HOST_ASYNC_COMPLETED: + case STARPU_MP_COMMAND_NOTIF_SEND_TO_SINK_ASYNC_COMPLETED: + { + struct _starpu_async_channel * event = *((struct _starpu_async_channel **) arg); + event->starpu_mp_common_finished_sender--; + return 1; + } + default: + return 0; + } +} + +/* Store all asynchronous messages and return when a synchronous message is received */ +static enum _starpu_mp_command _starpu_src_common_wait_command_sync(struct _starpu_mp_node *node, void ** arg, int* arg_size) +{ + enum _starpu_mp_command answer; + int is_sync = 0; + while(!is_sync) + { + answer = _starpu_mp_common_recv_command(node, arg, arg_size); + if(!_starpu_src_common_store_message(node,*arg,*arg_size,answer)) + is_sync=1; + } + return answer; +} + +/* Handle a asynchrone message and return a error if a synchronous message is received */ +static void _starpu_src_common_recv_async(struct _starpu_mp_node * node) +{ + enum _starpu_mp_command answer; + void *arg; + int arg_size; + answer = _starpu_nt_common_recv_command(node, &arg, &arg_size); + if(!_starpu_src_common_handle_async(node,arg,arg_size,answer, 0)) + { + _STARPU_ERROR("incorrect command: unknown command or sync command"); + } +} + +/* Handle all asynchrone message while a completed execution message from a specific worker has been receive */ +enum _starpu_mp_command _starpu_src_common_wait_completed_execution(struct _starpu_mp_node *node, int devid, void **arg, int * arg_size) +{ + enum _starpu_mp_command answer; + + int completed = 0; + /*While the waited completed execution message has not been receive*/ + while(!completed) + { + answer = _starpu_nt_common_recv_command(node, arg, arg_size); + + if(answer == STARPU_MP_COMMAND_NOTIF_EXECUTION_DETACHED_COMPLETED) + { + int coreid; + STARPU_ASSERT(sizeof(coreid) == *arg_size); + coreid = *(int *) *arg; + if(devid == coreid) + completed = 1; + else if(!_starpu_src_common_store_message(node, *arg, *arg_size, answer)) + /* We receive a unknown or asynchronous message */ + STARPU_ASSERT(0); + } + else + { + if(!_starpu_src_common_store_message(node, *arg, *arg_size, answer)) + /* We receive a unknown or asynchronous message */ + STARPU_ASSERT(0); + } + } + return answer; +} + +/* Send a request to the sink NODE for the number of cores on it. */ +int _starpu_src_common_sink_nbcores(struct _starpu_mp_node *node, int *buf) +{ + enum _starpu_mp_command answer; + void *arg; + int arg_size = sizeof(int); + + STARPU_PTHREAD_MUTEX_LOCK(&node->connection_mutex); + + _starpu_mp_common_send_command(node, STARPU_MP_COMMAND_SINK_NBCORES, NULL, 0); + + answer = _starpu_mp_common_recv_command(node, &arg, &arg_size); + + STARPU_ASSERT(answer == STARPU_MP_COMMAND_ANSWER_SINK_NBCORES && arg_size == sizeof(int)); + + memcpy(buf, arg, arg_size); + + STARPU_PTHREAD_MUTEX_UNLOCK(&node->connection_mutex); + + return 0; +} + +/* Send a request to the sink linked to NODE for the pointer to the + * function defined by FUNC_NAME. + * In case of success, it returns 0 and FUNC_PTR contains the pointer ; + * else it returns -ESPIPE if the function was not found. + */ +int _starpu_src_common_lookup(struct _starpu_mp_node *node, void (**func_ptr)(void), const char *func_name) +{ + enum _starpu_mp_command answer; + void *arg; + int arg_size; + + /* strlen ignore the terminating '\0' */ + arg_size = (strlen(func_name) + 1) * sizeof(char); + + STARPU_PTHREAD_MUTEX_LOCK(&node->connection_mutex); + + //_STARPU_DEBUG("Looking up %s\n", func_name); + _starpu_mp_common_send_command(node, STARPU_MP_COMMAND_LOOKUP, (void *) func_name, + arg_size); + + answer = _starpu_src_common_wait_command_sync(node, (void **) &arg, &arg_size); + + if (answer == STARPU_MP_COMMAND_ERROR_LOOKUP) + { + _STARPU_DISP("Error looking up symbol %s\n", func_name); + STARPU_PTHREAD_MUTEX_UNLOCK(&node->connection_mutex); + return -ESPIPE; + } + + /* We have to be sure the device answered the right question and the + * answer has the right size */ + STARPU_ASSERT(answer == STARPU_MP_COMMAND_ANSWER_LOOKUP); + STARPU_ASSERT(arg_size == sizeof(*func_ptr)); + + memcpy(func_ptr, arg, arg_size); + + STARPU_PTHREAD_MUTEX_UNLOCK(&node->connection_mutex); + + //_STARPU_DEBUG("got %p\n", *func_ptr); + + return 0; +} + +/* Send a message to the sink to execute a kernel. + * The message sent has the form below : + * [Function pointer on sink, number of interfaces, interfaces + * (union _starpu_interface), cl_arg] + */ +/* Launch the execution of the function KERNEL points to on the sink linked + * to NODE. Returns 0 in case of success, -EINVAL if kernel is an invalid + * pointer. + * Data interfaces in task are send to the sink. + */ +int _starpu_src_common_execute_kernel(struct _starpu_mp_node *node, + void (*kernel)(void), unsigned coreid, + enum starpu_codelet_type type, + int is_parallel_task, int cb_workerid, + starpu_data_handle_t *handles, + void **interfaces, + unsigned nb_interfaces, + void *cl_arg, size_t cl_arg_size, int detached) +{ + void *buffer, *arg =NULL; + uintptr_t buffer_ptr; + int buffer_size = 0, arg_size =0; + unsigned i; + starpu_ssize_t interface_size[nb_interfaces ? nb_interfaces : 1]; + void *interface_ptr[nb_interfaces ? nb_interfaces : 1]; + + buffer_size = sizeof(kernel) + sizeof(type) + sizeof(is_parallel_task) + sizeof(coreid) + sizeof(nb_interfaces) + sizeof(detached); + + /*if the task is parallel*/ + if(is_parallel_task) + { + buffer_size += sizeof(cb_workerid); + } + + for (i = 0; i < nb_interfaces; i++) + { + buffer_size += sizeof(enum starpu_data_interface_id); + + starpu_data_handle_t handle = handles[i]; + if (handle->ops->pack_meta) + { + handle->ops->pack_meta(interfaces[i], &interface_ptr[i], &interface_size[i]); + buffer_size += interface_size[i]; + } + else + { + buffer_size += sizeof(union _starpu_interface); + } + } + + /* If the user didn't give any cl_arg, there is no need to send it */ + if (cl_arg) + { + STARPU_ASSERT_MSG(cl_arg_size, "Execution of tasks on master-slave needs cl_arg_size to be set, to transfer the content of cl_arg"); + buffer_size += cl_arg_size; + } + + /* We give to send_command a buffer we just allocated, which contains + * a pointer to the function (sink-side), core on which execute this + * function (sink-side), number of interfaces we send, + * an array of generic (union) interfaces and the value of cl_arg */ + _STARPU_MALLOC(buffer, buffer_size); + buffer_ptr = (uintptr_t) buffer; + + *(void(**)(void)) buffer = kernel; + buffer_ptr += sizeof(kernel); + + *(enum starpu_codelet_type *) buffer_ptr = type; + buffer_ptr += sizeof(type); + + *(int *) buffer_ptr = is_parallel_task; + buffer_ptr += sizeof(is_parallel_task); + + if(is_parallel_task) + { + *(int *) buffer_ptr = cb_workerid ; + buffer_ptr += sizeof(cb_workerid); + } + + STARPU_ASSERT(coreid < (unsigned)node->nb_cores); + *(unsigned *) buffer_ptr = coreid; + buffer_ptr += sizeof(coreid); + + *(unsigned *) buffer_ptr = nb_interfaces; + buffer_ptr += sizeof(nb_interfaces); + + *(int *) buffer_ptr = detached; + buffer_ptr += sizeof(detached); + + /* Message-passing execution is a particular case as the codelet is + * executed on a sink with a different memory, whereas a codelet is + * executed on the host part for the other accelerators. + * Thus we need to send a copy of each interface on the MP device */ + for (i = 0; i < nb_interfaces; i++) + { + starpu_data_handle_t handle = handles[i]; + enum starpu_data_interface_id id = starpu_data_get_interface_id(handle); + memcpy((void*) buffer_ptr, &id, sizeof(id)); + buffer_ptr += sizeof(id); + if (handle->ops->pack_meta) + { + STARPU_ASSERT_MSG(handle->ops->unpack_meta, "pack_meta defined without unpack_meta for interface %d", id); + memcpy((void *) buffer_ptr, interface_ptr[i], interface_size[i]); + free(interface_ptr[i]); + buffer_ptr += interface_size[i]; + } + else + { + /* Check that the interface exists in _starpu_interface */ + STARPU_ASSERT_MSG(id == STARPU_VOID_INTERFACE_ID || + id == STARPU_VARIABLE_INTERFACE_ID || + id == STARPU_VECTOR_INTERFACE_ID || + id == STARPU_MATRIX_INTERFACE_ID || + id == STARPU_BLOCK_INTERFACE_ID || + id == STARPU_TENSOR_INTERFACE_ID || + id == STARPU_CSR_INTERFACE_ID || + id == STARPU_BCSR_INTERFACE_ID || + id == STARPU_COO_INTERFACE_ID, + "Master-Slave currently cannot work with interface type %d (%s)", id, handle->ops->name); + + memcpy((void*) buffer_ptr, interfaces[i], handle->ops->interface_size); + STARPU_ASSERT(handle->ops->interface_size <= sizeof(union _starpu_interface)); + memset((char*) buffer_ptr + handle->ops->interface_size, 0, sizeof(union _starpu_interface) - handle->ops->interface_size); + /* The sink side has no mean to get the type of each + * interface, we use a union to make it generic and permit the + * sink to go through the array */ + buffer_ptr += sizeof(union _starpu_interface); + } + } + + if (cl_arg) + memcpy((void*) buffer_ptr, cl_arg, cl_arg_size); + + STARPU_PTHREAD_MUTEX_LOCK(&node->connection_mutex); + + if (detached) + _starpu_mp_common_send_command(node, STARPU_MP_COMMAND_EXECUTE_DETACHED, buffer, buffer_size); + else + _starpu_mp_common_send_command(node, STARPU_MP_COMMAND_EXECUTE, buffer, buffer_size); + + enum _starpu_mp_command answer = _starpu_src_common_wait_command_sync(node, &arg, &arg_size); + + if (answer == STARPU_MP_COMMAND_ERROR_EXECUTE_DETACHED) + { + STARPU_PTHREAD_MUTEX_UNLOCK(&node->connection_mutex); + return -EINVAL; + } + + if (detached) + STARPU_ASSERT(answer == STARPU_MP_COMMAND_ANSWER_EXECUTION_DETACHED_SUBMITTED); + else + STARPU_ASSERT(answer == STARPU_MP_COMMAND_ANSWER_EXECUTION_SUBMITTED); + + STARPU_PTHREAD_MUTEX_UNLOCK(&node->connection_mutex); + + free(buffer); + + return 0; +} + +/* Get the information and call the function to send to the sink a message to execute the task*/ +static int _starpu_src_common_execute(struct _starpu_job *j, struct _starpu_worker *worker, struct _starpu_mp_node * node) +{ + STARPU_ASSERT(j); + struct starpu_task *task = j->task; + + int profiling = starpu_profiling_status_get(); + + STARPU_ASSERT(task); + + void (*kernel)(void) = node->get_kernel_from_job(node,j); + + _starpu_driver_start_job(worker, j, &worker->perf_arch, 0, profiling); + + //_STARPU_DEBUG("\nworkerid:%d, subworkerid:%d, rank:%d, type:%d, cb_workerid:%d, task_size:%d\n\n",worker->devid, worker->subworkerid, worker->current_rank,task->cl->type,j->combined_workerid,j->task_size); + + _starpu_src_common_execute_kernel(node, kernel, worker->subworkerid, task->cl->type, + (j->task_size > 1), + j->combined_workerid, STARPU_TASK_GET_HANDLES(task), + _STARPU_TASK_GET_INTERFACES(task), STARPU_TASK_GET_NBUFFERS(task), + task->cl_arg, task->cl_arg_size, 0); + return 0; +} + +static struct _starpu_sink_kernel *starpu_src_common_register_kernel(const char *func_name) +{ + STARPU_PTHREAD_MUTEX_LOCK(&htbl_mutex); + struct _starpu_sink_kernel *kernel; + unsigned workerid = starpu_worker_get_id_check(); + enum starpu_worker_archtype archtype = starpu_worker_get_type(workerid); + + HASH_FIND_STR(kernels[archtype], func_name, kernel); + + if (kernel != NULL) + { + STARPU_PTHREAD_MUTEX_UNLOCK(&htbl_mutex); + // Function already in the table. + return kernel; + } + + unsigned int nb_devices = _starpu_get_machine_config()->topology.ndevices[archtype]; + _STARPU_MALLOC(kernel, sizeof(*kernel) + nb_devices * sizeof(starpu_cpu_func_t)); + + kernel->name = strdup(func_name); + + HASH_ADD_STR(kernels[archtype], name, kernel); + + unsigned int i; + for (i = 0; i < nb_devices; ++i) + kernel->func[i] = NULL; + + STARPU_PTHREAD_MUTEX_UNLOCK(&htbl_mutex); + + return kernel; +} + +static starpu_cpu_func_t starpu_src_common_get_kernel(const char *func_name) +{ + /* This function has to be called in the codelet only, by the thread + * which will handle the task */ + int workerid = starpu_worker_get_id_check(); + int devid = starpu_worker_get_devid(workerid); + enum starpu_worker_archtype archtype = starpu_worker_get_type(workerid); + + struct _starpu_sink_kernel *kernel = starpu_src_common_register_kernel(func_name); + + if (kernel->func[devid] == NULL) + { + struct _starpu_mp_node *node = _starpu_src_nodes[archtype][devid]; + int ret = _starpu_src_common_lookup(node, (void (**)(void))&kernel->func[devid], kernel->name); + if (ret) + { + _STARPU_DISP("Could not resolve function %s on slave %d\n", kernel->name, devid); + return NULL; + } + } + + return kernel->func[devid]; +} + +starpu_cpu_func_t _starpu_src_common_get_cpu_func_from_codelet(struct starpu_codelet *cl, unsigned nimpl) +{ + /* Try to use cpu_func_name. */ + const char *func_name = _starpu_task_get_cpu_name_nth_implementation(cl, nimpl); + STARPU_ASSERT_MSG(func_name, "when master-slave is used, cpu_funcs_name has to be defined and the function be non-static"); + + starpu_cpu_func_t kernel = starpu_src_common_get_kernel(func_name); + + STARPU_ASSERT_MSG(kernel, "when master-slave is used, cpu_funcs_name has to be defined and the function be non-static"); + + return kernel; +} + +void(* _starpu_src_common_get_cpu_func_from_job(const struct _starpu_mp_node *node STARPU_ATTRIBUTE_UNUSED, struct _starpu_job *j))(void) +{ + /* Try to use cpu_func_name. */ + const char *func_name = _starpu_task_get_cpu_name_nth_implementation(j->task->cl, j->nimpl); + STARPU_ASSERT_MSG(func_name, "when master-slave is used, cpu_funcs_name has to be defined and the function be non-static"); + + starpu_cpu_func_t kernel = starpu_src_common_get_kernel(func_name); + + STARPU_ASSERT_MSG(kernel, "when master-slave is used, cpu_funcs_name has to be defined and the function be non-static"); + + return (void (*)(void))kernel; +} + +struct _starpu_mp_node *_starpu_src_common_get_mp_node_from_memory_node(int memory_node) +{ + int devid = starpu_memory_node_get_devid(memory_node); + enum starpu_worker_archtype archtype = starpu_memory_node_get_worker_archtype(starpu_node_get_kind(memory_node)); +#ifdef STARPU_USE_MPI_MASTER_SLAVE + STARPU_ASSERT_MSG_ALWAYS(devid >= 0 && devid < STARPU_MAXMPIDEVS, "bogus devid %d for memory node %d\n", devid, memory_node); +#endif +#ifdef STARPU_USE_TCPIP_MASTER_SLAVE + STARPU_ASSERT_MSG_ALWAYS(devid >= 0 && devid < STARPU_MAXTCPIPDEVS, "bogus devid %d for memory node %d\n", devid, memory_node); +#endif + + return _starpu_src_nodes[archtype][devid]; +} + +/* Send a request to the sink linked to the MP_NODE to allocate SIZE bytes on + * the sink. + * In case of success, it returns 0 and *ADDR contains the address of the + * allocated area ; + * else it returns 1 if the allocation fail. + */ +uintptr_t _starpu_src_common_allocate(unsigned dst_node, size_t size, int flags) +{ + (void) flags; + struct _starpu_mp_node *mp_node = _starpu_src_common_get_mp_node_from_memory_node(dst_node); + enum _starpu_mp_command answer; + void *arg; + int arg_size; + uintptr_t addr; + + STARPU_PTHREAD_MUTEX_LOCK(&mp_node->connection_mutex); + + _starpu_mp_common_send_command(mp_node, STARPU_MP_COMMAND_ALLOCATE, &size, + sizeof(size)); + + answer = _starpu_src_common_wait_command_sync(mp_node, &arg, &arg_size); + + if (answer == STARPU_MP_COMMAND_ERROR_ALLOCATE) + { + STARPU_PTHREAD_MUTEX_UNLOCK(&mp_node->connection_mutex); + return 0; + } + + STARPU_ASSERT(answer == STARPU_MP_COMMAND_ANSWER_ALLOCATE && arg_size == sizeof(addr)); + + memcpy(&addr, arg, arg_size); + + STARPU_PTHREAD_MUTEX_UNLOCK(&mp_node->connection_mutex); + + return addr; +} + +/* Send a request to the sink linked to the MP_NODE to deallocate the memory + * area pointed by ADDR. + */ +void _starpu_src_common_free(unsigned dst_node, uintptr_t addr, size_t size, int flags) +{ + (void) flags; + (void) size; + struct _starpu_mp_node *mp_node = _starpu_src_common_get_mp_node_from_memory_node(dst_node); + STARPU_PTHREAD_MUTEX_LOCK(&mp_node->connection_mutex); + _starpu_mp_common_send_command(mp_node, STARPU_MP_COMMAND_FREE, &addr, sizeof(addr)); + STARPU_PTHREAD_MUTEX_UNLOCK(&mp_node->connection_mutex); +} + +/* Send a request to the sink linked to the MP_NODE to map SIZE bytes on ADDR as mapped area + * on the sink. + * In case of success, it returns map_addr contains the address of the + * mapped area + * else it returns NULL if the map fail. + */ +uintptr_t _starpu_src_common_map(unsigned dst_node, uintptr_t addr, size_t size) +{ + struct _starpu_mp_node *mp_node = _starpu_src_common_get_mp_node_from_memory_node(dst_node); + enum _starpu_mp_command answer; + void *arg; + int arg_size; + uintptr_t map_addr; + + size_t map_offset; + char* map_name = _starpu_get_fdname_from_mapaddr(addr, &map_offset, size); + + if(map_name == NULL) + { + return 0; + } + + int map_cmd_size = sizeof(struct _starpu_mp_transfer_map_command)+strlen(map_name)+1; + struct _starpu_mp_transfer_map_command *map_cmd; + _STARPU_MALLOC(map_cmd, map_cmd_size); + memcpy(map_cmd->fd_name, map_name, strlen(map_name)+1); + free(map_name); + map_cmd->offset = map_offset; + map_cmd->size = size; + + STARPU_PTHREAD_MUTEX_LOCK(&mp_node->connection_mutex); + + _starpu_mp_common_send_command(mp_node, STARPU_MP_COMMAND_MAP, map_cmd, map_cmd_size); + + answer = _starpu_src_common_wait_command_sync(mp_node, &arg, &arg_size); + + if (answer == STARPU_MP_COMMAND_ERROR_MAP) + { + STARPU_PTHREAD_MUTEX_UNLOCK(&mp_node->connection_mutex); + return 0; + } + + STARPU_ASSERT(answer == STARPU_MP_COMMAND_ANSWER_MAP && arg_size == sizeof(map_addr)); + + memcpy(&map_addr, arg, arg_size); + + STARPU_PTHREAD_MUTEX_UNLOCK(&mp_node->connection_mutex); + + free(map_cmd); + + return map_addr; +} + +/* Send a request to the sink linked to the MP_NODE to unmap the memory + * area pointed by ADDR. + */ +void _starpu_src_common_unmap(unsigned dst_node, uintptr_t addr, size_t size) +{ + (void) size; + struct _starpu_mp_node *mp_node = _starpu_src_common_get_mp_node_from_memory_node(dst_node); + + struct _starpu_mp_transfer_unmap_command unmap_cmd = {.addr = addr, .size = size}; + + STARPU_PTHREAD_MUTEX_LOCK(&mp_node->connection_mutex); + _starpu_mp_common_send_command(mp_node, STARPU_MP_COMMAND_UNMAP, &unmap_cmd, sizeof(unmap_cmd)); + STARPU_PTHREAD_MUTEX_UNLOCK(&mp_node->connection_mutex); +} + +/* Send SIZE bytes pointed by SRC to DST on the sink linked to the MP_NODE with a + * synchronous mode. + */ +int _starpu_src_common_copy_host_to_sink_sync(struct _starpu_mp_node *mp_node, void *src, void *dst, size_t size) +{ + struct _starpu_mp_transfer_command cmd = {.size = size, .addr = dst, .event = NULL}; + + STARPU_PTHREAD_MUTEX_LOCK(&mp_node->connection_mutex); + + _starpu_mp_common_send_command(mp_node, STARPU_MP_COMMAND_RECV_FROM_HOST, &cmd, sizeof(cmd)); + + mp_node->dt_send(mp_node, src, size, NULL); + + STARPU_PTHREAD_MUTEX_UNLOCK(&mp_node->connection_mutex); + + return 0; +} + +/* Send SIZE bytes pointed by SRC to DST on the sink linked to the MP_NODE with an + * asynchronous mode. + */ +int _starpu_src_common_copy_host_to_sink_async(struct _starpu_mp_node *mp_node, void *src, void *dst, size_t size, void * event) +{ + struct _starpu_mp_transfer_command cmd = {.size = size, .addr = dst, .event = event}; + + STARPU_PTHREAD_MUTEX_LOCK(&mp_node->connection_mutex); + + /* For asynchronous transfers, we save information + * to test is they are finished + */ + struct _starpu_async_channel * async_channel = event; + async_channel->polling_node_receiver = mp_node; + + _starpu_mp_common_send_command(mp_node, STARPU_MP_COMMAND_RECV_FROM_HOST_ASYNC, &cmd, sizeof(cmd)); + + mp_node->dt_send(mp_node, src, size, event); + + STARPU_PTHREAD_MUTEX_UNLOCK(&mp_node->connection_mutex); + + return -EAGAIN; +} + +int _starpu_src_common_copy_data_host_to_sink(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel) +{ + (void) src_node; + struct _starpu_mp_node *mp_node = _starpu_src_common_get_mp_node_from_memory_node(dst_node); + + if (async_channel) + return _starpu_src_common_copy_host_to_sink_async(mp_node, + (void*) (src + src_offset), + (void*) (dst + dst_offset), + size, async_channel); + else + return _starpu_src_common_copy_host_to_sink_sync(mp_node, + (void*) (src + src_offset), + (void*) (dst + dst_offset), + size); +} + +/* Receive SIZE bytes pointed by SRC on the sink linked to the MP_NODE and store them in DST + * with a synchronous mode. + */ +int _starpu_src_common_copy_sink_to_host_sync(struct _starpu_mp_node *mp_node, void *src, void *dst, size_t size) +{ + enum _starpu_mp_command answer; + void *arg; + int arg_size; + struct _starpu_mp_transfer_command cmd = {.size = size, .addr = src, .event = NULL}; + + STARPU_PTHREAD_MUTEX_LOCK(&mp_node->connection_mutex); + + _starpu_mp_common_send_command(mp_node, STARPU_MP_COMMAND_SEND_TO_HOST, &cmd, sizeof(cmd)); + + answer = _starpu_src_common_wait_command_sync(mp_node, &arg, &arg_size); + + STARPU_ASSERT(answer == STARPU_MP_COMMAND_SEND_TO_HOST); + + mp_node->dt_recv(mp_node, dst, size, NULL); + + STARPU_PTHREAD_MUTEX_UNLOCK(&mp_node->connection_mutex); + + return 0; +} + +/* Receive SIZE bytes pointed by SRC on the sink linked to the MP_NODE and store them in DST + * with an asynchronous mode. + */ +int _starpu_src_common_copy_sink_to_host_async(struct _starpu_mp_node *mp_node, void *src, void *dst, size_t size, void * event) +{ + struct _starpu_mp_transfer_command cmd = {.size = size, .addr = src, .event = event}; + + STARPU_PTHREAD_MUTEX_LOCK(&mp_node->connection_mutex); + + /* For asynchronous transfers, we save information + * to test is they are finished + */ + struct _starpu_async_channel * async_channel = event; + async_channel->polling_node_sender = mp_node; + + _starpu_mp_common_send_command(mp_node, STARPU_MP_COMMAND_SEND_TO_HOST_ASYNC, &cmd, sizeof(cmd)); + + mp_node->dt_recv(mp_node, dst, size, event); + + STARPU_PTHREAD_MUTEX_UNLOCK(&mp_node->connection_mutex); + + return -EAGAIN; +} + +int _starpu_src_common_copy_data_sink_to_host(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel) +{ + (void) dst_node; + struct _starpu_mp_node *mp_node = _starpu_src_common_get_mp_node_from_memory_node(src_node); + + if (async_channel) + return _starpu_src_common_copy_sink_to_host_async(mp_node, + (void*) (src + src_offset), + (void*) (dst + dst_offset), + size, async_channel); + else + return _starpu_src_common_copy_sink_to_host_sync(mp_node, + (void*) (src + src_offset), + (void*) (dst + dst_offset), + size); +} + +/* Tell the sink linked to SRC_NODE to send SIZE bytes of data pointed by SRC + * to the sink linked to DST_NODE. The latter store them in DST with a synchronous + * mode. + */ +int _starpu_src_common_copy_sink_to_sink_sync(struct _starpu_mp_node *src_node, struct _starpu_mp_node *dst_node, void *src, void *dst, size_t size) +{ + enum _starpu_mp_command answer; + void *arg; + int arg_size; + + struct _starpu_mp_transfer_command_to_device cmd = {.devid = dst_node->peer_id, .size = size, .addr = src, .event = NULL}; + + /* lock the node with the little peer_id first to prevent deadlock */ + if (src_node->peer_id > dst_node->peer_id) + { + STARPU_PTHREAD_MUTEX_LOCK(&dst_node->connection_mutex); + STARPU_PTHREAD_MUTEX_LOCK(&src_node->connection_mutex); + } + else + { + STARPU_PTHREAD_MUTEX_LOCK(&src_node->connection_mutex); + STARPU_PTHREAD_MUTEX_LOCK(&dst_node->connection_mutex); + } + + /* Tell source to send data to dest. */ + _starpu_mp_common_send_command(src_node, STARPU_MP_COMMAND_SEND_TO_SINK, &cmd, offsetof(struct _starpu_mp_transfer_command_to_device, end)); + + /* Release the source as fast as possible */ + STARPU_PTHREAD_MUTEX_UNLOCK(&src_node->connection_mutex); + + cmd.devid = src_node->peer_id; + cmd.size = size; + cmd.addr = dst; + + /* Tell dest to receive data from source. */ + _starpu_mp_common_send_command(dst_node, STARPU_MP_COMMAND_RECV_FROM_SINK, &cmd, offsetof(struct _starpu_mp_transfer_command_to_device, end)); + + /* Wait for answer from dest to know whether transfer is finished. */ + answer = _starpu_src_common_wait_command_sync(dst_node, &arg, &arg_size); + + STARPU_ASSERT(answer == STARPU_MP_COMMAND_ANSWER_TRANSFER_COMPLETE); + + /* Release the receiver when we received the acknowledgment */ + STARPU_PTHREAD_MUTEX_UNLOCK(&dst_node->connection_mutex); + + return 0; +} + +/* Tell the sink linked to SRC_NODE to send SIZE bytes of data pointed by SRC + * to the sink linked to DST_NODE. The latter store them in DST with an asynchronous + * mode. + */ +int _starpu_src_common_copy_sink_to_sink_async(struct _starpu_mp_node *src_node, struct _starpu_mp_node *dst_node, void *src, void *dst, size_t size, void * event) +{ + struct _starpu_mp_transfer_command_to_device cmd = {.devid = dst_node->peer_id, .size = size, .addr = src, .event = event}; + + /* lock the node with the little peer_id first to prevent deadlock */ + if (src_node->peer_id > dst_node->peer_id) + { + STARPU_PTHREAD_MUTEX_LOCK(&dst_node->connection_mutex); + STARPU_PTHREAD_MUTEX_LOCK(&src_node->connection_mutex); + } + else + { + STARPU_PTHREAD_MUTEX_LOCK(&src_node->connection_mutex); + STARPU_PTHREAD_MUTEX_LOCK(&dst_node->connection_mutex); + } + + /* For asynchronous transfers, we save information + * to test is they are finished + */ + struct _starpu_async_channel * async_channel = event; + async_channel->polling_node_sender = src_node; + async_channel->polling_node_receiver = dst_node; + /* Increase number of ack waited */ + async_channel->starpu_mp_common_finished_receiver++; + async_channel->starpu_mp_common_finished_sender++; + + /* Tell source to send data to dest. */ + _starpu_mp_common_send_command(src_node, STARPU_MP_COMMAND_SEND_TO_SINK_ASYNC, &cmd, offsetof(struct _starpu_mp_transfer_command_to_device, end)); + + STARPU_PTHREAD_MUTEX_UNLOCK(&src_node->connection_mutex); + + cmd.devid = src_node->peer_id; + cmd.size = size; + cmd.addr = dst; + + /* Tell dest to receive data from source. */ + _starpu_mp_common_send_command(dst_node, STARPU_MP_COMMAND_RECV_FROM_SINK_ASYNC, &cmd, offsetof(struct _starpu_mp_transfer_command_to_device, end)); + + STARPU_PTHREAD_MUTEX_UNLOCK(&dst_node->connection_mutex); + + return -EAGAIN; +} + +int _starpu_src_common_copy_data_sink_to_sink(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel) +{ + if (async_channel) + return _starpu_src_common_copy_sink_to_sink_async( + _starpu_src_common_get_mp_node_from_memory_node(src_node), + _starpu_src_common_get_mp_node_from_memory_node(dst_node), + (void*) (src + src_offset), + (void*) (dst + dst_offset), + size, async_channel); + else + return _starpu_src_common_copy_sink_to_sink_sync( + _starpu_src_common_get_mp_node_from_memory_node(src_node), + _starpu_src_common_get_mp_node_from_memory_node(dst_node), + (void*) (src + src_offset), + (void*) (dst + dst_offset), + size); +} + +void _starpu_src_common_init_switch_env(unsigned this) +{ + save_thread_env[this].current_task = starpu_task_get_current(); + save_thread_env[this].current_worker = STARPU_PTHREAD_GETSPECIFIC(_starpu_worker_key); + save_thread_env[this].current_worker_set = STARPU_PTHREAD_GETSPECIFIC(_starpu_worker_set_key); +#ifdef STARPU_OPENMP + save_thread_env[this].current_omp_thread = STARPU_PTHREAD_GETSPECIFIC(_starpu_omp_thread_key); + save_thread_env[this].current_omp_task = STARPU_PTHREAD_GETSPECIFIC(_starpu_omp_task_key); +#endif +} + +static void _starpu_src_common_switch_env(unsigned old, unsigned new) +{ + save_thread_env[old].current_task = starpu_task_get_current(); + save_thread_env[old].current_worker = STARPU_PTHREAD_GETSPECIFIC(_starpu_worker_key); + save_thread_env[old].current_worker_set = STARPU_PTHREAD_GETSPECIFIC(_starpu_worker_set_key); +#ifdef STARPU_OPENMP + save_thread_env[old].current_omp_thread = STARPU_PTHREAD_GETSPECIFIC(_starpu_omp_thread_key); + save_thread_env[old].current_omp_task = STARPU_PTHREAD_GETSPECIFIC(_starpu_omp_task_key); +#endif + + _starpu_set_current_task(save_thread_env[new].current_task); + STARPU_PTHREAD_SETSPECIFIC(_starpu_worker_key, save_thread_env[new].current_worker); + STARPU_PTHREAD_SETSPECIFIC(_starpu_worker_set_key, save_thread_env[new].current_worker_set); +#ifdef STARPU_OPENMP + STARPU_PTHREAD_SETSPECIFIC(_starpu_omp_thread_key, save_thread_env[new].current_omp_thread); + STARPU_PTHREAD_SETSPECIFIC(_starpu_omp_task_key, save_thread_env[new].current_omp_task); +#endif +} + +/* Send workers to the sink node + */ +static void _starpu_src_common_send_workers(struct _starpu_mp_node * node, int baseworkerid, int nworkers) +{ + struct _starpu_machine_config *config = _starpu_get_machine_config(); + int worker_size = sizeof(struct _starpu_worker)*nworkers; + int combined_worker_size = STARPU_NMAX_COMBINEDWORKERS*sizeof(struct _starpu_combined_worker); + int msg[5]; + msg[0] = nworkers; + msg[1] = worker_size; + msg[2] = combined_worker_size; + msg[3] = baseworkerid; + msg[4] = starpu_worker_get_count(); + + STARPU_PTHREAD_MUTEX_LOCK(&node->connection_mutex); + + /* tell the sink node that we will send him all workers */ + _starpu_mp_common_send_command(node, STARPU_MP_COMMAND_SYNC_WORKERS, &msg, sizeof(msg)); + + /* Send all worker to the sink node */ + node->dt_send(node,&config->workers[baseworkerid],worker_size, NULL); + + /* Send all combined workers to the sink node */ + node->dt_send(node, &config->combined_workers,combined_worker_size, NULL); + + STARPU_PTHREAD_MUTEX_UNLOCK(&node->connection_mutex); +} + +static void _starpu_src_common_worker_internal_work(struct _starpu_worker_set * worker_set, struct _starpu_mp_node * mp_node, unsigned memnode) +{ + int res = 0; + unsigned i; + struct starpu_task *tasks[worker_set->nworkers]; + + _starpu_may_pause(); + +#ifdef STARPU_SIMGRID + starpu_pthread_wait_reset(&worker_set->workers[0].wait); +#endif + + /* Test if async transfers are completed */ + for (i = 0; i < worker_set->nworkers; i++) + { + struct starpu_task *task = worker_set->workers[i].task_transferring; + /* We send all buffers to execute the task */ + if (task != NULL && worker_set->workers[i].nb_buffers_transferred == worker_set->workers[i].nb_buffers_totransfer) + { + STARPU_RMB(); + struct _starpu_job * j = _starpu_get_job_associated_to_task(task); + + _STARPU_TRACE_END_PROGRESS(memnode); + _starpu_set_local_worker_key(&worker_set->workers[i]); + _starpu_fetch_task_input_tail(task, j, &worker_set->workers[i]); + /* Reset it */ + worker_set->workers[i].task_transferring = NULL; + j->workerid = worker_set->workers[i].workerid; + + /* Execute the task */ + res = _starpu_src_common_execute(j, &worker_set->workers[i], mp_node); + switch (res) + { + case 0: + /* The task task has been launched with no error */ + break; + case -EAGAIN: + _STARPU_DISP("ouch, this MP worker could not actually run task %p, putting it back...\n", tasks[i]); + _starpu_push_task_to_workers(worker_set->workers[i].task_transferring); + STARPU_ABORT(); + continue; + break; + default: + STARPU_ASSERT(0); + } + + _STARPU_TRACE_START_PROGRESS(memnode); + } + } + + res |= __starpu_datawizard_progress(_STARPU_DATAWIZARD_DO_ALLOC, 1); + + /* Handle message which have been store */ + _starpu_src_common_handle_stored_async(mp_node); + + STARPU_PTHREAD_MUTEX_LOCK(&mp_node->connection_mutex); + + unsigned stopped_progress = 0; + /* poll the device for completed jobs.*/ + while(mp_node->nt_recv_is_ready(mp_node)) + { + stopped_progress = 1; + _STARPU_TRACE_END_PROGRESS(mp_node_memory_node(mp_node)); + _starpu_src_common_recv_async(mp_node); + /* Mutex is unlock in _starpu_src_common_recv_async */ + STARPU_PTHREAD_MUTEX_LOCK(&mp_node->connection_mutex); + } + if (stopped_progress) + _STARPU_TRACE_START_PROGRESS(mp_node_memory_node(mp_node)); + + STARPU_PTHREAD_MUTEX_UNLOCK(&mp_node->connection_mutex); + + /* get task for each worker*/ + res |= _starpu_get_multi_worker_task(worker_set->workers, tasks, worker_set->nworkers, memnode); + +#ifdef STARPU_SIMGRID + if (!res) + starpu_pthread_wait_wait(&worker_set->workers[0].wait); +#endif + + /*if at least one worker have pop a task*/ + if(res != 0) + { + for(i=0; inworkers; i++) + { + if(tasks[i] != NULL) + { + struct _starpu_worker *worker = &worker_set->workers[i]; + _STARPU_TRACE_END_PROGRESS(worker->memory_node); + _starpu_set_local_worker_key(worker); + int ret = _starpu_fetch_task_input(tasks[i], _starpu_get_job_associated_to_task(tasks[i]), 1); + STARPU_ASSERT(!ret); + _STARPU_TRACE_START_PROGRESS(worker->memory_node); + } + } + + /* Handle message which have been store */ + _starpu_src_common_handle_stored_async(mp_node); + } +} + +/* Function looping on the source node */ +void _starpu_src_common_workers_set(struct _starpu_worker_set * worker_set, int ndevices, struct _starpu_mp_node ** mp_node) +{ + unsigned memnode[ndevices]; + + int device; + for (device = 0; device < ndevices; device++) + memnode[device] = worker_set[device].workers[0].memory_node; + + for (device = 0; device < ndevices; device++) + { + struct _starpu_worker_set * device_worker_set = &worker_set[device]; + struct _starpu_worker *baseworker = &device_worker_set->workers[0]; + struct _starpu_machine_config *config = baseworker->config; + unsigned baseworkerid = baseworker - config->workers; + + _starpu_src_common_send_workers(mp_node[device], baseworkerid, worker_set[device].nworkers); + } + + for (device = 0; device < ndevices; device++) + { + struct _starpu_worker_set * device_worker_set = &worker_set[device]; + struct _starpu_worker *worker0 = &device_worker_set->workers[0]; + + STARPU_PTHREAD_MUTEX_LOCK(&worker0->mutex); + worker0->status = STATUS_UNKNOWN; + STARPU_PTHREAD_MUTEX_UNLOCK(&worker0->mutex); + } + + for (device = 0; device < ndevices; device++) + { + struct _starpu_worker_set * device_worker_set = &worker_set[device]; + + /* tell the main thread that this one is ready */ + STARPU_PTHREAD_MUTEX_LOCK(&device_worker_set->mutex); + device_worker_set->set_is_initialized = 1; + STARPU_PTHREAD_COND_SIGNAL(&device_worker_set->ready_cond); + STARPU_PTHREAD_MUTEX_UNLOCK(&device_worker_set->mutex); + + _STARPU_TRACE_START_PROGRESS(memnode[device]); + } + + /*main loop*/ + while (_starpu_machine_is_running()) + { + for (device = 0; device < ndevices ; device++) + { + if (ndevices > 1) + _starpu_src_common_switch_env(((device-1)+ndevices)%ndevices, device); + _starpu_src_common_worker_internal_work(&worker_set[device], mp_node[device], memnode[device]); + } + } + + for (device = 0; device < ndevices; device++) + { + _STARPU_TRACE_END_PROGRESS(memnode[device]); + _starpu_datawizard_handle_all_pending_node_data_requests(memnode[device]); + } + + /* In case there remains some memory that was automatically + * allocated by StarPU, we release it now. Note that data + * coherency is not maintained anymore at that point ! */ + for (device = 0; device < ndevices; device++) + _starpu_free_all_automatically_allocated_buffers(memnode[device]); +} diff --git a/src/drivers/mp_common/source_common.h b/src/drivers/mp_common/source_common.h new file mode 100644 index 0000000..4731e45 --- /dev/null +++ b/src/drivers/mp_common/source_common.h @@ -0,0 +1,94 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Thibaut Lambert + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __SOURCE_COMMON_H__ +#define __SOURCE_COMMON_H__ + +/** @file */ + +#ifdef STARPU_USE_MP + +#include +#include +#include + +#pragma GCC visibility push(hidden) + +/* Array of structures containing all the information useful to send + * and receive information with devices */ +#ifdef STARPU_USE_MPI_MASTER_SLAVE +extern struct _starpu_mp_node *_starpu_src_nodes[STARPU_NARCH][STARPU_MAXMPIDEVS]; +#endif + +#ifdef STARPU_USE_TCPIP_MASTER_SLAVE +extern struct _starpu_mp_node *_starpu_src_nodes[STARPU_NARCH][STARPU_MAXTCPIPDEVS]; +#endif + +int _starpu_src_common_store_message(struct _starpu_mp_node *node, void * arg, int arg_size, enum _starpu_mp_command answer); + +enum _starpu_mp_command _starpu_src_common_wait_completed_execution(struct _starpu_mp_node *node, int devid, void **arg, int * arg_size); + +int _starpu_src_common_sink_nbcores(struct _starpu_mp_node *node, int *buf); + +int _starpu_src_common_lookup(struct _starpu_mp_node *node, void (**func_ptr)(void), const char *func_name); + +starpu_cpu_func_t _starpu_src_common_get_cpu_func_from_codelet(struct starpu_codelet *cl, unsigned nimpl); + +void(* _starpu_src_common_get_cpu_func_from_job(const struct _starpu_mp_node *node STARPU_ATTRIBUTE_UNUSED, struct _starpu_job *j))(void); + +struct _starpu_mp_node *_starpu_src_common_get_mp_node_from_memory_node(int memory_node); +uintptr_t _starpu_src_common_allocate(unsigned dst_node, size_t size, int flags); +void _starpu_src_common_free(unsigned dst_node, uintptr_t addr, size_t size, int flags); + +uintptr_t _starpu_src_common_map(unsigned dst_node, uintptr_t addr, size_t size); +void _starpu_src_common_unmap(unsigned dst_node, uintptr_t addr, size_t size); + +int _starpu_src_common_execute_kernel(struct _starpu_mp_node *node, + void (*kernel)(void), unsigned coreid, + enum starpu_codelet_type type, + int is_parallel_task, int cb_workerid, + starpu_data_handle_t *handles, + void **interfaces, + unsigned nb_interfaces, + void *cl_arg, size_t cl_arg_size, int detached); + +int _starpu_src_common_copy_host_to_sink_sync(struct _starpu_mp_node *mp_node, void *src, void *dst, size_t size); + +int _starpu_src_common_copy_sink_to_host_sync(struct _starpu_mp_node *mp_node, void *src, void *dst, size_t size); + +int _starpu_src_common_copy_sink_to_sink_sync(struct _starpu_mp_node *src_node, struct _starpu_mp_node *dst_node, void *src, void *dst, size_t size); + +int _starpu_src_common_copy_host_to_sink_async(struct _starpu_mp_node *mp_node, void *src, void *dst, size_t size, void *event); + +int _starpu_src_common_copy_sink_to_host_async(struct _starpu_mp_node *mp_node, void *src, void *dst, size_t size, void *event); + +int _starpu_src_common_copy_sink_to_sink_async(struct _starpu_mp_node *src_node, struct _starpu_mp_node *dst_node, void *src, void *dst, size_t size, void *event); + +int _starpu_src_common_copy_data_host_to_sink(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel); +int _starpu_src_common_copy_data_sink_to_host(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel); +int _starpu_src_common_copy_data_sink_to_sink(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel); + +void _starpu_src_common_init_switch_env(unsigned this); +void _starpu_src_common_workers_set(struct _starpu_worker_set * worker_set, int ndevices, struct _starpu_mp_node ** mp_node); + +void _starpu_src_common_deinit(void); + +#pragma GCC visibility pop + +#endif /* STARPU_USE_MP */ + +#endif /* __SOURCE_COMMON_H__ */ diff --git a/src/drivers/mpi/driver_mpi_common.c b/src/drivers/mpi/driver_mpi_common.c new file mode 100644 index 0000000..9cfe877 --- /dev/null +++ b/src/drivers/mpi/driver_mpi_common.c @@ -0,0 +1,595 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include + +#define NITER 32 +#define SIZE_BANDWIDTH (1024*1024) + +#define DRIVER_MPI_MASTER_NODE_DEFAULT 0 + +static int mpi_initialized = 0; +static int extern_initialized = 0; +static int src_node_id; + +int _starpu_mpi_common_multiple_thread; + +/* (For a given datawizard we may have several starpu_interface_copy calls) */ +LIST_TYPE(_starpu_mpi_ms_event_request, + MPI_Request request; +); + +struct _starpu_mpi_ms_async_event +{ + int is_sender; + struct _starpu_mpi_ms_event_request_list * requests; +}; + +static inline struct _starpu_mpi_ms_async_event *_starpu_mpi_ms_async_event(union _starpu_async_channel_event *_event) +{ + struct _starpu_mpi_ms_async_event *event; + STARPU_STATIC_ASSERT(sizeof(*event) <= sizeof(*_event)); + event = (void *) _event; + return event; +} + +/* This lets the user decide which MPI rank is to be the master. Usually it's just rank 0 */ +static void _starpu_mpi_set_src_node_id() +{ + int node_id = starpu_getenv_number("STARPU_MPI_MASTER_NODE"); + + if (node_id != -1) + { + int nb_proc, id_proc; + MPI_Comm_size(MPI_COMM_WORLD, &nb_proc); + MPI_Comm_rank(MPI_COMM_WORLD, &id_proc); + + if (node_id < nb_proc) + { + src_node_id = node_id; + return; + } + else if (id_proc == DRIVER_MPI_MASTER_NODE_DEFAULT) + { + /* Only one node prints the error message. */ + _STARPU_MSG("The node (%d) you specify to be the master is " + "greater than the total number of nodes (%d). " + "StarPU will use node %d.\n", node_id, nb_proc, DRIVER_MPI_MASTER_NODE_DEFAULT); + } + } + + /* Node by default. */ + src_node_id = DRIVER_MPI_MASTER_NODE_DEFAULT; +} + +int _starpu_mpi_common_mp_init() +{ + //Here we supposed the programmer called two times starpu_init. + if (mpi_initialized) + return -ENODEV; + + mpi_initialized = 1; + + _starpu_mpi_common_multiple_thread = starpu_getenv_number_default("STARPU_MPI_MS_MULTIPLE_THREAD", 0); + + if (MPI_Initialized(&extern_initialized) != MPI_SUCCESS) + STARPU_ABORT_MSG("Cannot check if MPI is initialized or not !"); + + //Here MPI_Init or MPI_Init_thread is already called + if (!extern_initialized) + { + + int required = _starpu_mpi_common_multiple_thread ? MPI_THREAD_MULTIPLE : MPI_THREAD_FUNNELED; + + int thread_support; + if (MPI_Init_thread(_starpu_get_argc(), _starpu_get_argv(), required, &thread_support) != MPI_SUCCESS) + { + STARPU_ABORT_MSG("Cannot Initialize MPI !"); + } + + if (thread_support != required) + { + if (required == MPI_THREAD_MULTIPLE) + _STARPU_DISP("MPI doesn't support MPI_THREAD_MULTIPLE option. MPI Master-Slave can have problems if multiple slaves are launched. \n"); + if (required == MPI_THREAD_FUNNELED) + _STARPU_DISP("MPI doesn't support MPI_THREAD_FUNNELED option. Many errors can occur. \n"); + } + } + + /* Find which node is the master */ + _starpu_mpi_set_src_node_id(); + + /* In MPI case we look at the rank to know if we are a sink */ + if (!_starpu_mpi_common_is_src_node()) + setenv("STARPU_SINK", "STARPU_MPI_MS", 1); + + return 1; +} + +void _starpu_mpi_common_mp_deinit() +{ + if (!extern_initialized) + MPI_Finalize(); +} + +int _starpu_mpi_common_is_src_node() +{ + int id_proc; + MPI_Comm_rank(MPI_COMM_WORLD, &id_proc); + return id_proc == src_node_id; +} + +int _starpu_mpi_common_get_src_node() +{ + return src_node_id; +} + +int _starpu_mpi_common_is_mp_initialized() +{ + return mpi_initialized; +} + +/* common parts to initialize a source or a sink node */ +void _starpu_mpi_common_mp_initialize_src_sink(struct _starpu_mp_node *node) +{ + struct _starpu_machine_topology *topology = &_starpu_get_machine_config()->topology; + + int nmpicores = starpu_getenv_number("STARPU_NMPIMSTHREADS"); + if (nmpicores == -1) + { + int nhyperthreads = topology->nhwpus / topology->nhwworker[STARPU_CPU_WORKER][0]; + node->nb_cores = topology->nusedpus / nhyperthreads; + } + else + node->nb_cores = nmpicores; +} + +int _starpu_mpi_common_recv_is_ready(const struct _starpu_mp_node *mp_node) +{ + int res, source; + int flag = 0; + int id_proc; + MPI_Comm_rank(MPI_COMM_WORLD, &id_proc); + + if (id_proc == src_node_id) + { + /* Source has mp_node defined */ + source = mp_node->mp_connection.mpi_remote_nodeid; + } + else + { + /* Sink can have sink to sink message */ + source = MPI_ANY_SOURCE; + } + + res = MPI_Iprobe(source, SYNC_TAG, MPI_COMM_WORLD, &flag, MPI_STATUS_IGNORE); + STARPU_ASSERT_MSG(res == MPI_SUCCESS, "MPI Master/Slave cannot test if we received a message !"); + + return flag; +} + +int _starpu_mpi_common_notif_recv_is_ready(const struct _starpu_mp_node *mp_node) +{ + int res, source; + int flag = 0; + int id_proc; + MPI_Comm_rank(MPI_COMM_WORLD, &id_proc); + + if (id_proc == src_node_id) + { + /* Source has mp_node defined */ + source = mp_node->mp_connection.mpi_remote_nodeid; + } + else + { + /* Sink can have sink to sink message */ + source = MPI_ANY_SOURCE; + } + + res = MPI_Iprobe(source, NOTIF_TAG, MPI_COMM_WORLD, &flag, MPI_STATUS_IGNORE); + STARPU_ASSERT_MSG(res == MPI_SUCCESS, "MPI Master/Slave cannot test if we received a message !"); + + return flag; +} + +int _starpu_mpi_common_notif_send_is_ready(const struct _starpu_mp_node *mp_node STARPU_ATTRIBUTE_UNUSED) +{ + return 1; +} + +static void __starpu_mpi_common_send_to_device(const struct _starpu_mp_node *node STARPU_ATTRIBUTE_UNUSED, int dst_devid, void *msg, int len, void * event, int notif); +static void __starpu_mpi_common_recv_from_device(const struct _starpu_mp_node *node STARPU_ATTRIBUTE_UNUSED, int src_devid, void *msg, int len, void * event, int notif); + +/* SEND to source node */ +static void __starpu_mpi_common_send(const struct _starpu_mp_node *node, void *msg, int len, void * event, int notif) +{ + //_STARPU_MSG("envoi %d B to %d\n", len, node->mp_connection.mpi_remote_nodeid); + __starpu_mpi_common_send_to_device(node, node->mp_connection.mpi_remote_nodeid, msg, len, event, notif); +} + +void _starpu_mpi_common_send(const struct _starpu_mp_node *node, void *msg, int len, void * event) +{ + __starpu_mpi_common_send(node, msg, len, event, 0); +} + +void _starpu_mpi_common_mp_send(const struct _starpu_mp_node *node, void *msg, int len) +{ + __starpu_mpi_common_send(node, msg, len, NULL, 0); +} + +void _starpu_mpi_common_nt_send(const struct _starpu_mp_node *node, void *msg, int len) +{ + __starpu_mpi_common_send(node, msg, len, NULL, 1); +} + +/* RECV to source node */ +static void __starpu_mpi_common_recv(const struct _starpu_mp_node *node, void *msg, int len, void * event, int notif) +{ + //_STARPU_MSG("recv %d B from %d in %p\n", len, node->mp_connection.mpi_remote_nodeid, msg); + __starpu_mpi_common_recv_from_device(node, node->mp_connection.mpi_remote_nodeid, msg, len, event, notif); +} + +void _starpu_mpi_common_recv(const struct _starpu_mp_node *node, void *msg, int len, void * event) +{ + __starpu_mpi_common_recv(node, msg, len, event, 0); +} + +void _starpu_mpi_common_mp_recv(const struct _starpu_mp_node *node, void *msg, int len) +{ + __starpu_mpi_common_recv(node, msg, len, NULL, 0); +} + +void _starpu_mpi_common_nt_recv(const struct _starpu_mp_node *node, void *msg, int len) +{ + __starpu_mpi_common_recv(node, msg, len, NULL, 1); +} + +/* SEND to any node */ +static void __starpu_mpi_common_send_to_device(const struct _starpu_mp_node *node STARPU_ATTRIBUTE_UNUSED, int dst_devid, void *msg, int len, void * event, int notif) +{ + int res; + + //_STARPU_MSG("S_to_D send %d bytes from %d from %p\n", len, dst_devid, msg); + + if (event) + { + /* Asynchronous send */ + struct _starpu_async_channel * channel = event; + struct _starpu_mpi_ms_async_event *mpi_ms_event = _starpu_mpi_ms_async_event(&channel->event); + mpi_ms_event->is_sender = 1; + + /* call by sink, we need to initialize some parts, for host it's done in data_request.c */ + if (channel->node_ops == NULL) + mpi_ms_event->requests = NULL; + + /* Initialize the list */ + if (mpi_ms_event->requests == NULL) + mpi_ms_event->requests = _starpu_mpi_ms_event_request_list_new(); + + struct _starpu_mpi_ms_event_request * req = _starpu_mpi_ms_event_request_new(); + + res = MPI_Isend(msg, len, MPI_BYTE, dst_devid, ASYNC_TAG, MPI_COMM_WORLD, &req->request); + + channel->starpu_mp_common_finished_receiver++; + channel->starpu_mp_common_finished_sender++; + + _starpu_mpi_ms_event_request_list_push_back(mpi_ms_event->requests, req); + } + else + { + /* Synchronous send */ + /* Send commands */ + if (!notif) + res = MPI_Send(msg, len, MPI_BYTE, dst_devid, SYNC_TAG, MPI_COMM_WORLD); + /* Send notifications */ + else + res = MPI_Send(msg, len, MPI_BYTE, dst_devid, NOTIF_TAG, MPI_COMM_WORLD); + } + + STARPU_ASSERT_MSG(res == MPI_SUCCESS, "MPI Master/Slave cannot receive a msg with a size of %d Bytes !", len); +} + +void _starpu_mpi_common_send_to_device(const struct _starpu_mp_node *node STARPU_ATTRIBUTE_UNUSED, int dst_devid, void *msg, int len, void * event) +{ + __starpu_mpi_common_send_to_device(node, dst_devid, msg, len, event, 0); +} + +/* RECV to any node */ +static void __starpu_mpi_common_recv_from_device(const struct _starpu_mp_node *node STARPU_ATTRIBUTE_UNUSED, int src_devid, void *msg, int len, void * event, int notif) +{ + int res; + + //_STARPU_MSG("R_to_D nop recv %d bytes from %d\n", len, src_devid); + + if (event) + { + /* Asynchronous recv */ + struct _starpu_async_channel * channel = event; + struct _starpu_mpi_ms_async_event *mpi_ms_event = _starpu_mpi_ms_async_event(&channel->event); + mpi_ms_event->is_sender = 0; + + /* call by sink, we need to initialize some parts, for host it's done in data_request.c */ + if (channel->node_ops == NULL) + mpi_ms_event->requests = NULL; + + /* Initialize the list */ + if (mpi_ms_event->requests == NULL) + mpi_ms_event->requests = _starpu_mpi_ms_event_request_list_new(); + + struct _starpu_mpi_ms_event_request * req = _starpu_mpi_ms_event_request_new(); + + res = MPI_Irecv(msg, len, MPI_BYTE, src_devid, ASYNC_TAG, MPI_COMM_WORLD, &req->request); + STARPU_ASSERT_MSG(res == MPI_SUCCESS, "MPI Master/Slave cannot Ireceive a msg with a size of %d Bytes !", len); + + channel->starpu_mp_common_finished_receiver++; + channel->starpu_mp_common_finished_sender++; + + _starpu_mpi_ms_event_request_list_push_back(mpi_ms_event->requests, req); + } + else + { + /* Synchronous recv */ + MPI_Status s; + /* Send commands */ + if (!notif) + res = MPI_Recv(msg, len, MPI_BYTE, src_devid, SYNC_TAG, MPI_COMM_WORLD, &s); + else + res = MPI_Recv(msg, len, MPI_BYTE, src_devid, NOTIF_TAG, MPI_COMM_WORLD, &s); + + int num_expected; + MPI_Get_count(&s, MPI_BYTE, &num_expected); + + STARPU_ASSERT_MSG(num_expected == len, "MPI Master/Slave received a msg with a size of %d Bytes (expected %d Bytes) !", num_expected, len); + STARPU_ASSERT_MSG(res == MPI_SUCCESS, "MPI Master/Slave cannot receive a msg with a size of %d Bytes !", len); + } +} + +void _starpu_mpi_common_recv_from_device(const struct _starpu_mp_node *node STARPU_ATTRIBUTE_UNUSED, int src_devid, void *msg, int len, void * event) +{ + __starpu_mpi_common_recv_from_device(node, src_devid, msg, len, event, 0); +} + +static void _starpu_mpi_common_polling_node(struct _starpu_mp_node * node) +{ + /* poll the asynchronous messages.*/ + if (node != NULL) + { + STARPU_PTHREAD_MUTEX_LOCK(&node->connection_mutex); + while(node->nt_recv_is_ready(node)) + { + enum _starpu_mp_command answer; + void *arg; + int arg_size; + answer = _starpu_nt_common_recv_command(node, &arg, &arg_size); + if(!_starpu_src_common_store_message(node,arg,arg_size,answer)) + { + _STARPU_ERROR("incorrect command: unknown command or sync command"); + } + } + STARPU_PTHREAD_MUTEX_UNLOCK(&node->connection_mutex); + } +} + +/* - In device to device communications, the first ack received by host + * is considered as the sender (but it cannot be, in fact, the sender) + */ +unsigned int _starpu_mpi_common_test_event(struct _starpu_async_channel * event) +{ + struct _starpu_mpi_ms_async_event *mpi_ms_event = _starpu_mpi_ms_async_event(&event->event); + if (mpi_ms_event->requests != NULL && !_starpu_mpi_ms_event_request_list_empty(mpi_ms_event->requests)) + { + struct _starpu_mpi_ms_event_request * req = _starpu_mpi_ms_event_request_list_begin(mpi_ms_event->requests); + struct _starpu_mpi_ms_event_request * req_next; + + while (req != _starpu_mpi_ms_event_request_list_end(mpi_ms_event->requests)) + { + req_next = _starpu_mpi_ms_event_request_list_next(req); + + int flag = 0; + MPI_Test(&req->request, &flag, MPI_STATUS_IGNORE); + if (flag) + { + _starpu_mpi_ms_event_request_list_erase(mpi_ms_event->requests, req); + _starpu_mpi_ms_event_request_delete(req); + + if (mpi_ms_event->is_sender) + event->starpu_mp_common_finished_sender--; + else + event->starpu_mp_common_finished_receiver--; + + } + req = req_next; + } + + /* When the list is empty, we finished to wait each request */ + if (_starpu_mpi_ms_event_request_list_empty(mpi_ms_event->requests)) + { + /* Destroy the list */ + _starpu_mpi_ms_event_request_list_delete(mpi_ms_event->requests); + mpi_ms_event->requests = NULL; + } + } + + _starpu_mpi_common_polling_node(event->polling_node_sender); + _starpu_mpi_common_polling_node(event->polling_node_receiver); + + return !event->starpu_mp_common_finished_sender && !event->starpu_mp_common_finished_receiver; +} + +/* - In device to device communications, the first ack received by host + * is considered as the sender (but it cannot be, in fact, the sender) + */ +/* Only used at starpu_shutdown */ +void _starpu_mpi_common_wait_request_completion(struct _starpu_async_channel * event) +{ + struct _starpu_mpi_ms_async_event *mpi_ms_event = _starpu_mpi_ms_async_event(&event->event); + if (mpi_ms_event->requests != NULL && !_starpu_mpi_ms_event_request_list_empty(mpi_ms_event->requests)) + { + struct _starpu_mpi_ms_event_request * req = _starpu_mpi_ms_event_request_list_begin(mpi_ms_event->requests); + struct _starpu_mpi_ms_event_request * req_next; + + while (req != _starpu_mpi_ms_event_request_list_end(mpi_ms_event->requests)) + { + req_next = _starpu_mpi_ms_event_request_list_next(req); + + MPI_Wait(&req->request, MPI_STATUS_IGNORE); + _starpu_mpi_ms_event_request_list_erase(mpi_ms_event->requests, req); + + _starpu_mpi_ms_event_request_delete(req); + req = req_next; + + if (mpi_ms_event->is_sender) + event->starpu_mp_common_finished_sender--; + else + event->starpu_mp_common_finished_receiver--; + + } + + STARPU_ASSERT_MSG(_starpu_mpi_ms_event_request_list_empty(mpi_ms_event->requests), "MPI Request list is not empty after a wait_event !"); + + /* Destroy the list */ + _starpu_mpi_ms_event_request_list_delete(mpi_ms_event->requests); + mpi_ms_event->requests = NULL; + } + + //incoming ack from devices + while(event->starpu_mp_common_finished_sender > 0 || event->starpu_mp_common_finished_receiver > 0) + { + _starpu_mpi_common_polling_node(event->polling_node_sender); + _starpu_mpi_common_polling_node(event->polling_node_receiver); + } +} + +void _starpu_mpi_common_barrier(void) +{ + int ret = MPI_Barrier(MPI_COMM_WORLD); + STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Barrier failed"); +} + +/* Compute bandwidth and latency between source and sink nodes + * Source node has to have the entire set of times at the end + */ +void _starpu_mpi_common_measure_bandwidth_latency(double timing_dtod[STARPU_MAXMPIDEVS][STARPU_MAXMPIDEVS], double latency_dtod[STARPU_MAXMPIDEVS][STARPU_MAXMPIDEVS]) +{ + int ret; + unsigned iter; + + int nb_proc, id_proc; + MPI_Comm_rank(MPI_COMM_WORLD, &id_proc); + MPI_Comm_size(MPI_COMM_WORLD, &nb_proc); + + char * buf; + _STARPU_MALLOC(buf, SIZE_BANDWIDTH); + memset(buf, 0, SIZE_BANDWIDTH); + + int sender, receiver; + for(sender = 0; sender < nb_proc; sender++) + { + for(receiver = 0; receiver < nb_proc; receiver++) + { + //Node can't be a sender and a receiver + if(sender == receiver) + continue; + + if (src_node_id == id_proc) + _STARPU_DISP("measuring from %d to %d\n", sender, receiver); + + ret = MPI_Barrier(MPI_COMM_WORLD); + STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Barrier failed"); + + if(id_proc == sender) + { + double start, end; + + /* measure bandwidth sender to receiver */ + start = starpu_timing_now(); + for (iter = 0; iter < NITER; iter++) + { + ret = MPI_Send(buf, SIZE_BANDWIDTH, MPI_BYTE, receiver, 42, MPI_COMM_WORLD); + STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "Bandwidth of MPI Master/Slave cannot be measured !"); + } + end = starpu_timing_now(); + timing_dtod[sender][receiver] = (end - start)/NITER/SIZE_BANDWIDTH; + + /* measure latency sender to receiver */ + start = starpu_timing_now(); + for (iter = 0; iter < NITER; iter++) + { + ret = MPI_Send(buf, 1, MPI_BYTE, receiver, 42, MPI_COMM_WORLD); + STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "Latency of MPI Master/Slave cannot be measured !"); + } + end = starpu_timing_now(); + latency_dtod[sender][receiver] = (end - start)/NITER; + } + + if (id_proc == receiver) + { + /* measure bandwidth sender to receiver*/ + for (iter = 0; iter < NITER; iter++) + { + ret = MPI_Recv(buf, SIZE_BANDWIDTH, MPI_BYTE, sender, 42, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "Bandwidth of MPI Master/Slave cannot be measured !"); + } + + /* measure latency sender to receiver */ + for (iter = 0; iter < NITER; iter++) + { + ret = MPI_Recv(buf, 1, MPI_BYTE, sender, 42, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "Bandwidth of MPI Master/Slave cannot be measured !"); + } + } + } + + /* When a sender finished its work, it has to send its results to the master */ + + /* Sender doesn't need to send to itself its data */ + if (sender == src_node_id) + goto print; + + /* if we are the sender, we send the data */ + if (sender == id_proc) + { + MPI_Send(timing_dtod[sender], STARPU_MAXMPIDEVS, MPI_DOUBLE, src_node_id, 42, MPI_COMM_WORLD); + MPI_Send(latency_dtod[sender], STARPU_MAXMPIDEVS, MPI_DOUBLE, src_node_id, 42, MPI_COMM_WORLD); + } + + /* the master node receives the data */ + if (src_node_id == id_proc) + { + MPI_Recv(timing_dtod[sender], STARPU_MAXMPIDEVS, MPI_DOUBLE, sender, 42, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(latency_dtod[sender], STARPU_MAXMPIDEVS, MPI_DOUBLE, sender, 42, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + } + + +print: + if (src_node_id == id_proc) + { + for(receiver = 0; receiver < nb_proc; receiver++) + { + if(sender == receiver) + continue; + + _STARPU_DISP("BANDWIDTH %d -> %d %.0fMB/s %.2fus\n", sender, receiver, 1/timing_dtod[sender][receiver], latency_dtod[sender][receiver]); + } + } + } + free(buf); +} + diff --git a/src/drivers/mpi/driver_mpi_common.h b/src/drivers/mpi/driver_mpi_common.h new file mode 100644 index 0000000..87cf8e2 --- /dev/null +++ b/src/drivers/mpi/driver_mpi_common.h @@ -0,0 +1,71 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __DRIVER_MPI_COMMON_H__ +#define __DRIVER_MPI_COMMON_H__ + +/** @file */ + +#include +#include + +#pragma GCC visibility push(hidden) + +#ifdef STARPU_USE_MPI_MASTER_SLAVE + +#define SYNC_TAG 44 +#define ASYNC_TAG 45 +#define NOTIF_TAG 46 + +extern int _starpu_mpi_common_multiple_thread; + +int _starpu_mpi_common_mp_init(); +void _starpu_mpi_common_mp_deinit(); + +int _starpu_mpi_common_is_src_node(); +int _starpu_mpi_common_get_src_node(); + +int _starpu_mpi_common_is_mp_initialized(); +int _starpu_mpi_common_recv_is_ready(const struct _starpu_mp_node *mp_node); +int _starpu_mpi_common_notif_recv_is_ready(const struct _starpu_mp_node *mp_node); +int _starpu_mpi_common_notif_send_is_ready(const struct _starpu_mp_node *mp_node); + +void _starpu_mpi_common_mp_initialize_src_sink(struct _starpu_mp_node *node); + +void _starpu_mpi_common_send(const struct _starpu_mp_node *node, void *msg, int len, void * event); +void _starpu_mpi_common_recv(const struct _starpu_mp_node *node, void *msg, int len, void * event); + +void _starpu_mpi_common_mp_send(const struct _starpu_mp_node *node, void *msg, int len); +void _starpu_mpi_common_mp_recv(const struct _starpu_mp_node *node, void *msg, int len); + +void _starpu_mpi_common_nt_send(const struct _starpu_mp_node *node, void *msg, int len); +void _starpu_mpi_common_nt_recv(const struct _starpu_mp_node *node, void *msg, int len); + +void _starpu_mpi_common_recv_from_device(const struct _starpu_mp_node *node, int src_devid, void *msg, int len, void * event); +void _starpu_mpi_common_send_to_device(const struct _starpu_mp_node *node, int dst_devid, void *msg, int len, void * event); + +unsigned int _starpu_mpi_common_test_event(struct _starpu_async_channel * event); +void _starpu_mpi_common_wait_request_completion(struct _starpu_async_channel * event); + +void _starpu_mpi_common_barrier(void); + +void _starpu_mpi_common_measure_bandwidth_latency(double bandwidth_dtod[STARPU_MAXMPIDEVS][STARPU_MAXMPIDEVS], double latency_dtod[STARPU_MAXMPIDEVS][STARPU_MAXMPIDEVS]); + +#endif /* STARPU_USE_MPI_MASTER_SLAVE */ + +#pragma GCC visibility pop + +#endif /* __DRIVER_MPI_COMMON_H__ */ diff --git a/src/drivers/mpi/driver_mpi_init.c b/src/drivers/mpi/driver_mpi_init.c new file mode 100644 index 0000000..8bcce6a --- /dev/null +++ b/src/drivers/mpi/driver_mpi_init.c @@ -0,0 +1,47 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include + +static struct _starpu_driver_info driver_info = +{ + .name_upper = "MPI_MS", + .name_var = "MPI_MS", + .name_lower = "mpi_ms", + .memory_kind = STARPU_MPI_MS_RAM, + .alpha = 1.0f, +#ifdef STARPU_USE_MPI_MASTER_SLAVE + .run_worker = _starpu_mpi_src_worker, + .init_worker_binding = _starpu_mpi_init_worker_binding, + .init_worker_memory = _starpu_mpi_init_worker_memory, +#endif +}; + +static struct _starpu_memory_driver_info memory_driver_info = +{ + .name_upper = "MPI_MS", + .worker_archtype = STARPU_MPI_MS_WORKER, +#ifdef STARPU_USE_MPI_MASTER_SLAVE + .ops = &_starpu_driver_mpi_ms_node_ops, +#endif +}; + +void _starpu_mpi_ms_preinit(void) +{ + _starpu_driver_info_register(STARPU_MPI_MS_WORKER, &driver_info); + _starpu_memory_driver_info_register(STARPU_MPI_MS_RAM, &memory_driver_info); +} diff --git a/src/drivers/mpi/driver_mpi_sink.c b/src/drivers/mpi/driver_mpi_sink.c new file mode 100644 index 0000000..b2b01c2 --- /dev/null +++ b/src/drivers/mpi/driver_mpi_sink.c @@ -0,0 +1,38 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +#include "driver_mpi_sink.h" +#include "driver_mpi_source.h" +#include "driver_mpi_common.h" + +void _starpu_mpi_sink_init(struct _starpu_mp_node *node) +{ + _starpu_mpi_common_mp_initialize_src_sink(node); + + _STARPU_MALLOC(node->thread_table, sizeof(starpu_pthread_t)*node->nb_cores); + //TODO +} + +void _starpu_mpi_sink_bind_thread(const struct _starpu_mp_node *mp_node, int coreid, int *core_table, int nb_core) +{ + //TODO + (void)mp_node; + (void)coreid; + (void)core_table; + (void)nb_core; +} diff --git a/src/drivers/mpi/driver_mpi_sink.h b/src/drivers/mpi/driver_mpi_sink.h new file mode 100644 index 0000000..536b069 --- /dev/null +++ b/src/drivers/mpi/driver_mpi_sink.h @@ -0,0 +1,35 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __DRIVER_MPI_SINK_H__ +#define __DRIVER_MPI_SINK_H__ + +/** @file */ + +#include + +#pragma GCC visibility push(hidden) + +#ifdef STARPU_USE_MPI_MASTER_SLAVE + +void _starpu_mpi_sink_init(struct _starpu_mp_node *node); +void _starpu_mpi_sink_bind_thread(const struct _starpu_mp_node *mp_node STARPU_ATTRIBUTE_UNUSED, int coreid, int * core_table, int nb_core); + +#endif /* STARPU_USE_MPI_MASTER_SLAVE */ + +#pragma GCC visibility pop + +#endif /* __DRIVER_MPI_SINK_H__ */ diff --git a/src/drivers/mpi/driver_mpi_source.c b/src/drivers/mpi/driver_mpi_source.c new file mode 100644 index 0000000..597e226 --- /dev/null +++ b/src/drivers/mpi/driver_mpi_source.c @@ -0,0 +1,399 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include + +#include +#include +#include + +#include + +#include +#include + +#ifdef STARPU_USE_MPI_MASTER_SLAVE +static unsigned mpi_bindid_init[STARPU_MAXMPIDEVS] = { }; +static unsigned mpi_bindid[STARPU_MAXMPIDEVS]; +static unsigned mpi_memory_init[STARPU_MAXMPIDEVS] = { }; +static unsigned mpi_memory_nodes[STARPU_MAXMPIDEVS]; + +static struct _starpu_worker_set mpi_worker_set[STARPU_MAXMPIDEVS]; +#endif + +struct _starpu_mp_node *_starpu_mpi_ms_src_get_actual_thread_mp_node() +{ + struct _starpu_worker *actual_worker = _starpu_get_local_worker_key(); + STARPU_ASSERT(actual_worker); + + int devid = actual_worker->devid; + STARPU_ASSERT(devid >= 0 && devid < STARPU_MAXMPIDEVS); + + return _starpu_src_nodes[STARPU_MPI_MS_WORKER][devid]; +} + +/* Configure one MPI slaves for run */ +static void __starpu_init_mpi_config(struct _starpu_machine_topology *topology, + struct _starpu_machine_config *config, + unsigned mpi_idx) +{ + int nhwcores; + _starpu_src_common_sink_nbcores(_starpu_src_nodes[STARPU_MPI_MS_WORKER][mpi_idx], &nhwcores); + STARPU_ASSERT(mpi_idx < STARPU_NMAXDEVS); + topology->nhwworker[STARPU_MPI_MS_WORKER][mpi_idx] = nhwcores; + + int nmpicores; + nmpicores = starpu_getenv_number("STARPU_NMPIMSTHREADS"); + + _starpu_topology_check_ndevices(&nmpicores, nhwcores, 0, INT_MAX, 0, "STARPU_NMPIMSTHREADS", "MPI cores", ""); + + mpi_worker_set[mpi_idx].workers = &config->workers[topology->nworkers]; + mpi_worker_set[mpi_idx].nworkers = nmpicores; + _starpu_src_nodes[STARPU_MPI_MS_WORKER][mpi_idx]->baseworkerid = topology->nworkers; + + _starpu_topology_configure_workers(topology, config, + STARPU_MPI_MS_WORKER, + mpi_idx, mpi_idx, 0, 0, + nmpicores, 1, &mpi_worker_set[mpi_idx], + _starpu_mpi_common_multiple_thread ? NULL : mpi_worker_set); +} + +/* Determine which devices we will use */ +void _starpu_init_mpi_config(struct _starpu_machine_topology *topology, struct _starpu_machine_config *config, + struct starpu_conf *user_conf, int no_mp_config) +{ + int i; + + /* Discover and configure the mp topology. That means: + * - discover the number of mp nodes; + * - initialize each discovered node; + * - discover the local topology (number of PUs/devices) of each node; + * - configure the workers accordingly. + */ + + for (i = 0; i < (int) (sizeof(mpi_worker_set)/sizeof(mpi_worker_set[0])); i++) + mpi_worker_set[i].workers = NULL; + + int nmpims = user_conf->nmpi_ms; + + if (nmpims != 0) + { + /* Discover and initialize the number of MPI nodes through the mp + * infrastructure. */ + unsigned nhwmpidevices = _starpu_mpi_src_get_device_count(); + + if (nmpims == -1) + /* Nothing was specified, so let's use the number of + * detected mpi devices. ! */ + nmpims = nhwmpidevices; + else + { + if ((unsigned) nmpims > nhwmpidevices) + { + /* The user requires more MPI devices than there is available */ + _STARPU_MSG("# Warning: %d MPI Master-Slave devices requested. Only %u available.\n", + nmpims, nhwmpidevices); + nmpims = nhwmpidevices; + } + /* Let's make sure this value is OK. */ + if (nmpims > STARPU_MAXMPIDEVS) + { + _STARPU_DISP("Warning: %d MPI MS devices requested. Only %d enabled. Use configure option --enable-maxmpidev=xxx to update the maximum value of supported MPI MS devices.\n", nmpims, STARPU_MAXMPIDEVS); + nmpims = STARPU_MAXMPIDEVS; + } + } + } + + topology->ndevices[STARPU_MPI_MS_WORKER] = nmpims; + + /* if user don't want to use MPI slaves, we close the slave processes */ + if (no_mp_config && topology->ndevices[STARPU_MPI_MS_WORKER] == 0) + { + _starpu_mpi_common_mp_deinit(); + exit(0); + } + + if (!no_mp_config) + { + for (i = 0; i < nmpims; i++) + _starpu_src_nodes[STARPU_MPI_MS_WORKER][i] = _starpu_mp_common_node_create(STARPU_NODE_MPI_SOURCE, i); + + for (i = 0; i < nmpims; i++) + __starpu_init_mpi_config(topology, config, i); + } +} + +/* Bind the driver on a CPU core */ +void _starpu_mpi_init_worker_binding(struct _starpu_machine_config *config, int no_mp_config STARPU_ATTRIBUTE_UNUSED, struct _starpu_worker *workerarg) +{ + /* Perhaps the worker has some "favourite" bindings */ + unsigned *preferred_binding = NULL; + unsigned npreferred = 0; + unsigned devid = workerarg->devid; + + if (mpi_bindid_init[devid]) + { + } + else + { + mpi_bindid_init[devid] = 1; + if (_starpu_mpi_common_multiple_thread || devid == 0) + mpi_bindid[devid] = _starpu_get_next_bindid(config, STARPU_THREAD_ACTIVE, preferred_binding, npreferred); + else + mpi_bindid[devid] = mpi_bindid[0]; + } +} + +/* Set up memory and buses */ +void _starpu_mpi_init_worker_memory(struct _starpu_machine_config *config, int no_mp_config STARPU_ATTRIBUTE_UNUSED, struct _starpu_worker *workerarg) +{ + unsigned memory_node = -1; + unsigned devid = workerarg->devid; + unsigned numa, devid2; + + if (mpi_memory_init[devid]) + { + memory_node = mpi_memory_nodes[devid]; + } + else + { + mpi_memory_init[devid] = 1; + memory_node = mpi_memory_nodes[devid] = _starpu_memory_node_register(STARPU_MPI_MS_RAM, devid); + + _starpu_memory_node_set_mapped(memory_node); + + for (numa = 0; numa < starpu_memory_nodes_get_numa_count(); numa++) + { + _starpu_register_bus(numa, memory_node); + _starpu_register_bus(memory_node, numa); + } + for (devid2 = 0; devid2 < STARPU_MAXMPIDEVS; devid2++) + { + if (mpi_memory_init[devid2]) + { + _starpu_register_bus(mpi_memory_nodes[devid], mpi_memory_nodes[devid2]); + _starpu_register_bus(mpi_memory_nodes[devid2], mpi_memory_nodes[devid]); + } + } + + } + //This worker can manage transfers on NUMA nodes + for (numa = 0; numa < starpu_memory_nodes_get_numa_count(); numa++) + _starpu_worker_drives_memory_node(&workerarg->set->workers[0], numa); + + _starpu_worker_drives_memory_node(&workerarg->set->workers[0], memory_node); + + if (!_starpu_mpi_common_multiple_thread) + { + /* MPI driver thread can manage all slave memories if we disable the MPI multiple thread */ + int findworker; + for (findworker = 0; findworker < workerarg->workerid; findworker++) + { + struct _starpu_worker *findworkerarg = &config->workers[findworker]; + if (findworkerarg->arch == STARPU_MPI_MS_WORKER) + { + _starpu_worker_drives_memory_node(workerarg, findworkerarg->memory_node); + _starpu_worker_drives_memory_node(findworkerarg, memory_node); + } + } + } + + workerarg->bindid = mpi_bindid[devid]; + _starpu_memory_node_add_nworkers(memory_node); + + workerarg->memory_node = memory_node; +} + +static void _starpu_deinit_mpi_node(int devid) +{ + _starpu_mp_common_send_command(_starpu_src_nodes[STARPU_MPI_MS_WORKER][devid], STARPU_MP_COMMAND_EXIT, NULL, 0); + + _starpu_mp_common_node_destroy(_starpu_src_nodes[STARPU_MPI_MS_WORKER][devid]); +} + + +void _starpu_deinit_mpi_config(struct _starpu_machine_config *config) +{ + struct _starpu_machine_topology *topology = &config->topology; + unsigned i; + + for (i = 0; i < topology->ndevices[STARPU_MPI_MS_WORKER]; i++) + _starpu_deinit_mpi_node(i); +} + + +void _starpu_mpi_source_init(struct _starpu_mp_node *node) +{ + _starpu_mpi_common_mp_initialize_src_sink(node); + //TODO +} + + +void _starpu_mpi_source_deinit(struct _starpu_mp_node *node STARPU_ATTRIBUTE_UNUSED) +{ + +} + +unsigned _starpu_mpi_src_get_device_count() +{ + int nb_mpi_devices; + + if (!_starpu_mpi_common_is_mp_initialized()) + return 0; + + MPI_Comm_size(MPI_COMM_WORLD, &nb_mpi_devices); + + //Remove one for master + nb_mpi_devices = nb_mpi_devices - 1; + + return nb_mpi_devices; +} + +void *_starpu_mpi_src_worker(void *arg) +{ + struct _starpu_worker *worker0 = arg; + struct _starpu_worker_set *set = worker0->set; + struct _starpu_worker_set *worker_set_mpi = set; + int nbsinknodes = _starpu_mpi_common_multiple_thread ? 1 : _starpu_mpi_src_get_device_count(); + + int workersetnum; + for (workersetnum = 0; workersetnum < nbsinknodes; workersetnum++) + { + struct _starpu_worker_set * worker_set = &worker_set_mpi[workersetnum]; + + /* As all workers of a set share common data, we just use the first + * one for initializing the following stuffs. */ + struct _starpu_worker *baseworker = &worker_set->workers[0]; + struct _starpu_machine_config *config = baseworker->config; + unsigned baseworkerid = baseworker - config->workers; + unsigned devid = baseworker->devid; + unsigned i; + + /* unsigned memnode = baseworker->memory_node; */ + + _starpu_driver_start(baseworker, STARPU_CPU_WORKER, 0); + +#ifdef STARPU_USE_FXT + for (i = 1; i < worker_set->nworkers; i++) + _starpu_worker_start(&worker_set->workers[i], STARPU_MPI_MS_WORKER, 0); +#endif + + // Current task for a thread managing a worker set has no sense. + _starpu_set_current_task(NULL); + + for (i = 0; i < config->topology.nworker[STARPU_MPI_MS_WORKER][devid]; i++) + { + struct _starpu_worker *worker = &config->workers[baseworkerid+i]; + snprintf(worker->name, sizeof(worker->name), "MPI_MS %u core %u", devid, i); + snprintf(worker->short_name, sizeof(worker->short_name), "MPI_MS %u.%u", devid, i); + } + + { + char thread_name[16]; + if (_starpu_mpi_common_multiple_thread) + snprintf(thread_name, sizeof(thread_name), "MPI_MS %u", devid); + else + snprintf(thread_name, sizeof(thread_name), "MPI_MS"); + starpu_pthread_setname(thread_name); + } + + for (i = 0; i < worker_set->nworkers; i++) + { + struct _starpu_worker *worker = &worker_set->workers[i]; + _STARPU_TRACE_WORKER_INIT_END(worker->workerid); + } + + _starpu_src_common_init_switch_env(workersetnum); + } /* for */ + + _starpu_src_common_workers_set(worker_set_mpi, nbsinknodes, &_starpu_src_nodes[STARPU_MPI_MS_WORKER][worker_set_mpi->workers[0].devid]); + + return NULL; +} + +static int _starpu_mpi_is_direct_access_supported(unsigned node, unsigned handling_node) +{ + (void) node; + enum starpu_node_kind kind = starpu_node_get_kind(handling_node); + return (kind == STARPU_MPI_MS_RAM); +} + +static uintptr_t _starpu_mpi_map(uintptr_t src, size_t src_offset, unsigned src_node STARPU_ATTRIBUTE_UNUSED, unsigned dst_node, size_t size, int *ret) +{ + uintptr_t map_addr = _starpu_src_common_map(dst_node, src+src_offset, size); + if(map_addr == 0) + { + *ret=-ENOMEM; + } + else + { + *ret = 0; + } + return map_addr; +} + +static int _starpu_mpi_unmap(uintptr_t src STARPU_ATTRIBUTE_UNUSED, size_t src_offset STARPU_ATTRIBUTE_UNUSED, unsigned src_node STARPU_ATTRIBUTE_UNUSED, uintptr_t dst, unsigned dst_node, size_t size) +{ + _starpu_src_common_unmap(dst_node, dst, size); + + return 0; +} + +static int _starpu_mpi_update_map(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size) +{ + (void) src; + (void) src_offset; + (void) src_node; + (void) dst; + (void) dst_offset; + (void) dst_node; + (void) size; + + /* Memory mappings are cache-coherent */ + return 0; +} + +struct _starpu_node_ops _starpu_driver_mpi_ms_node_ops = +{ + .name = "mpi driver", + + .malloc_on_node = _starpu_src_common_allocate, + .free_on_node = _starpu_src_common_free, + + .is_direct_access_supported = _starpu_mpi_is_direct_access_supported, + + .copy_interface_to[STARPU_CPU_RAM] = _starpu_copy_interface_any_to_any, + .copy_interface_to[STARPU_MPI_MS_RAM] = _starpu_copy_interface_any_to_any, + + .copy_interface_from[STARPU_CPU_RAM] = _starpu_copy_interface_any_to_any, + .copy_interface_from[STARPU_MPI_MS_RAM] = _starpu_copy_interface_any_to_any, + + .copy_data_to[STARPU_CPU_RAM] = _starpu_src_common_copy_data_sink_to_host, + .copy_data_to[STARPU_MPI_MS_RAM] = _starpu_src_common_copy_data_sink_to_sink, + + .copy_data_from[STARPU_CPU_RAM] = _starpu_src_common_copy_data_host_to_sink, + .copy_data_from[STARPU_MPI_MS_RAM] = _starpu_src_common_copy_data_sink_to_sink, + + /* TODO: copy2D/3D? */ + + .wait_request_completion = _starpu_mpi_common_wait_request_completion, + .test_request_completion = _starpu_mpi_common_test_event, + + .map[STARPU_CPU_RAM] = _starpu_mpi_map, + .unmap[STARPU_CPU_RAM] = _starpu_mpi_unmap, + .update_map[STARPU_CPU_RAM] = _starpu_mpi_update_map, +}; diff --git a/src/drivers/mpi/driver_mpi_source.h b/src/drivers/mpi/driver_mpi_source.h new file mode 100644 index 0000000..b10f556 --- /dev/null +++ b/src/drivers/mpi/driver_mpi_source.h @@ -0,0 +1,53 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __DRIVER_MPI_SOURCE_H__ +#define __DRIVER_MPI_SOURCE_H__ + +/** @file */ + +#include +#include +#include + +#pragma GCC visibility push(hidden) + +void _starpu_mpi_ms_preinit(void); + +#ifdef STARPU_USE_MPI_MASTER_SLAVE +extern struct _starpu_node_ops _starpu_driver_mpi_ms_node_ops; + +/** Array of structures containing all the information useful to send + * and receive information with devices */ +struct _starpu_mp_node *_starpu_mpi_ms_src_get_actual_thread_mp_node(); + +unsigned _starpu_mpi_src_get_device_count(); +void *_starpu_mpi_src_worker(void *arg); + +void _starpu_init_mpi_config(struct _starpu_machine_topology *topology, struct _starpu_machine_config *config, + struct starpu_conf *user_conf, int no_mp_config); +void _starpu_mpi_init_worker_binding(struct _starpu_machine_config *config, int no_mp_config STARPU_ATTRIBUTE_UNUSED, struct _starpu_worker *workerarg); +void _starpu_mpi_init_worker_memory(struct _starpu_machine_config *config, int no_mp_config STARPU_ATTRIBUTE_UNUSED, struct _starpu_worker *workerarg); +void _starpu_deinit_mpi_config(struct _starpu_machine_config *config); + +void _starpu_mpi_source_init(struct _starpu_mp_node *node); +void _starpu_mpi_source_deinit(struct _starpu_mp_node *node); + +#endif /* STARPU_USE_MPI_MASTER_SLAVE */ + +#pragma GCC visibility pop + +#endif /* __DRIVER_MPI_SOURCE_H__ */ diff --git a/src/drivers/opencl/driver_opencl.c b/src/drivers/opencl/driver_opencl.c new file mode 100644 index 0000000..687e0de --- /dev/null +++ b/src/drivers/opencl/driver_opencl.c @@ -0,0 +1,1729 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2022,2023 École de Technologie Supérieure (ETS, Montréal) + * Copyright (C) 2013-2013 Thibaut Lambert + * Copyright (C) 2011-2011 Télécom Sud Paris + * Copyright (C) 2010-2010 Mehdi Juhoor + * Copyright (C) 2021-2021 Federal University of Rio Grande do Sul (UFRGS) + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if defined(STARPU_HAVE_HWLOC) && defined(STARPU_USE_OPENCL) +#include +#endif + +#ifdef STARPU_SIMGRID +#include +#endif + +static int nb_devices = -1; +static int init_done = 0; + +static starpu_pthread_mutex_t big_lock = STARPU_PTHREAD_MUTEX_INITIALIZER; + +static size_t global_mem[STARPU_MAXOPENCLDEVS]; + +#ifdef STARPU_USE_OPENCL +static cl_context contexts[STARPU_MAXOPENCLDEVS]; +static cl_device_id devices[STARPU_MAXOPENCLDEVS]; +static cl_command_queue queues[STARPU_MAXOPENCLDEVS]; +static cl_command_queue map_queues[STARPU_MAXOPENCLDEVS]; +static cl_device_type type[STARPU_MAXOPENCLDEVS]; +static cl_command_queue in_transfer_queues[STARPU_MAXOPENCLDEVS]; +static cl_command_queue out_transfer_queues[STARPU_MAXOPENCLDEVS]; +static cl_command_queue peer_transfer_queues[STARPU_MAXOPENCLDEVS]; +#ifndef STARPU_SIMGRID +static cl_command_queue alloc_queues[STARPU_MAXOPENCLDEVS]; +static cl_event task_events[STARPU_MAXOPENCLDEVS][STARPU_MAX_PIPELINE]; +#endif /* !STARPU_SIMGRID */ +#endif +#ifdef STARPU_SIMGRID +static unsigned task_finished[STARPU_MAXOPENCLDEVS][STARPU_MAX_PIPELINE]; +static starpu_pthread_mutex_t opencl_alloc_mutex = STARPU_PTHREAD_MUTEX_INITIALIZER; +#endif /* STARPU_SIMGRID */ +#if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID) +static unsigned opencl_bindid_init[STARPU_MAXOPENCLDEVS]; +static unsigned opencl_bindid[STARPU_MAXOPENCLDEVS]; +static unsigned opencl_memory_init[STARPU_MAXOPENCLDEVS]; +static unsigned opencl_memory_nodes[STARPU_MAXOPENCLDEVS]; +#endif + +#define _STARPU_OPENCL_CHECK_AND_REPORT_ERROR(err) do { if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err); } while(0) + +static size_t _starpu_opencl_get_global_mem_size(int devid) +{ + return global_mem[devid]; +} + +#ifdef STARPU_USE_OPENCL +void starpu_opencl_get_context(int devid, cl_context *context) +{ + *context = contexts[devid]; +} + +void starpu_opencl_get_device(int devid, cl_device_id *device) +{ + *device = devices[devid]; +} + +void starpu_opencl_get_queue(int devid, cl_command_queue *queue) +{ + *queue = queues[devid]; +} + +void starpu_opencl_get_current_queue(cl_command_queue *queue) +{ + struct _starpu_worker *worker = _starpu_get_local_worker_key(); + STARPU_ASSERT(queue); + *queue = queues[worker->devid]; +} + +void starpu_opencl_get_current_context(cl_context *context) +{ + struct _starpu_worker *worker = _starpu_get_local_worker_key(); + STARPU_ASSERT(context); + *context = contexts[worker->devid]; +} +#endif /* STARPU_USE_OPENCL */ + +/* This is called to initialize opencl and discover devices */ +void _starpu_opencl_init(void) +{ + memset(&opencl_bindid_init, 0, sizeof(opencl_bindid_init)); + memset(&opencl_memory_init, 0, sizeof(opencl_memory_init)); + STARPU_PTHREAD_MUTEX_LOCK(&big_lock); + if (!init_done) + { +#ifdef STARPU_SIMGRID + nb_devices = _starpu_simgrid_get_nbhosts("OpenCL"); +#else /* STARPU_USE_OPENCL */ + cl_platform_id platform_id[_STARPU_OPENCL_PLATFORM_MAX]; + cl_uint nb_platforms; + cl_int err; + int i; + cl_device_type device_type = CL_DEVICE_TYPE_GPU|CL_DEVICE_TYPE_ACCELERATOR; + + _STARPU_DEBUG("Initialising OpenCL\n"); + + // Get Platforms + if (starpu_getenv_number("STARPU_OPENCL_ON_CPUS") > 0) + device_type |= CL_DEVICE_TYPE_CPU; + if (starpu_getenv_number("STARPU_OPENCL_ONLY_ON_CPUS") > 0) + device_type = CL_DEVICE_TYPE_CPU; + err = clGetPlatformIDs(_STARPU_OPENCL_PLATFORM_MAX, platform_id, &nb_platforms); + if (STARPU_UNLIKELY(err != CL_SUCCESS)) nb_platforms=0; + _STARPU_DEBUG("Platforms detected: %u\n", nb_platforms); + _STARPU_DEBUG("CPU device type: %s\n", (device_type&CL_DEVICE_TYPE_CPU)?"requested":"not requested"); + _STARPU_DEBUG("GPU device type: %s\n", (device_type&CL_DEVICE_TYPE_GPU)?"requested":"not requested"); + _STARPU_DEBUG("Accelerator device type: %s\n", (device_type&CL_DEVICE_TYPE_ACCELERATOR)?"requested":"not requested"); + + // Get devices + nb_devices = 0; + { + unsigned j; + for (j=0; j STARPU_MAXOPENCLDEVS) + { + _STARPU_DISP("# Warning: %u OpenCL devices available. Only %d enabled. Use configure option --enable-maxopencldev=xxx to update the maximum value of supported OpenCL devices?\n", nb_devices, STARPU_MAXOPENCLDEVS); + nb_devices = STARPU_MAXOPENCLDEVS; + } + + // initialise internal structures + for(i=0 ; itopology.nhwdevices[STARPU_OPENCL_WORKER] = nb_devices; +} + +static void _starpu_initialize_workers_opencl_gpuid(struct _starpu_machine_config*config) +{ + struct _starpu_machine_topology *topology = &config->topology; + struct starpu_conf *uconf = &config->conf; + + _starpu_initialize_workers_deviceid(uconf->use_explicit_workers_opencl_gpuid == 0 + ? NULL + : (int *)uconf->workers_opencl_gpuid, + &(config->current_devid[STARPU_OPENCL_WORKER]), + (int *)topology->workers_devid[STARPU_OPENCL_WORKER], + "STARPU_WORKERS_OPENCLID", + topology->nhwdevices[STARPU_OPENCL_WORKER], + STARPU_OPENCL_WORKER); + + _starpu_devices_gpu_clear(config, STARPU_OPENCL_WORKER); + _starpu_devices_drop_duplicate(topology->workers_devid[STARPU_OPENCL_WORKER]); +} + +/* Determine which devices we will use */ +void _starpu_init_opencl_config(struct _starpu_machine_topology *topology, struct _starpu_machine_config *config) +{ + int nopencl = config->conf.nopencl; + + if (nopencl != 0) + { + /* The user did not disable OPENCL. We need to initialize + * OpenCL early to count the number of devices */ + _starpu_opencl_init(); + int n = _starpu_opencl_get_device_count(); + + _starpu_topology_check_ndevices(&nopencl, n, 0, STARPU_MAXOPENCLDEVS, 0, "nopencl", "OpenCL", "maxopencldev"); + } + + topology->ndevices[STARPU_OPENCL_WORKER] = nopencl; + + _starpu_initialize_workers_opencl_gpuid(config); + + unsigned openclgpu; + for (openclgpu = 0; (int) openclgpu < nopencl; openclgpu++) + { + int devid = _starpu_get_next_devid(topology, config, STARPU_OPENCL_WORKER); + if (devid == -1) + { + // There is no more devices left + topology->ndevices[STARPU_OPENCL_WORKER] = openclgpu; + break; + } + + _starpu_topology_configure_workers(topology, config, + STARPU_OPENCL_WORKER, + openclgpu, devid, 0, 0, + 1, 1, NULL, NULL); + } +} + +/* Bind the driver on a CPU core */ +void _starpu_opencl_init_worker_binding(struct _starpu_machine_config *config, int no_mp_config STARPU_ATTRIBUTE_UNUSED, struct _starpu_worker *workerarg) +{ + /* Perhaps the worker has some "favourite" bindings */ + unsigned preferred_binding[STARPU_NMAXWORKERS]; + unsigned npreferred = 0; + unsigned devid = workerarg->devid; + +#ifndef STARPU_SIMGRID + if (_starpu_may_bind_automatically[STARPU_OPENCL_WORKER]) + { + /* StarPU is allowed to bind threads automatically */ + unsigned *preferred_numa_binding = _starpu_get_opencl_affinity_vector(devid); + unsigned npreferred_numa = _starpu_topology_get_nhwnumanodes(config); + npreferred = _starpu_topology_get_numa_core_binding(config, preferred_numa_binding, npreferred_numa, preferred_binding, STARPU_NMAXWORKERS); + } +#endif /* SIMGRID */ + + if (opencl_bindid_init[devid]) + { +#ifndef STARPU_SIMGRID + workerarg->bindid = opencl_bindid[devid]; +#endif /* SIMGRID */ + } + else + { + opencl_bindid_init[devid] = 1; + workerarg->bindid = opencl_bindid[devid] = _starpu_get_next_bindid(config, STARPU_THREAD_ACTIVE, preferred_binding, npreferred); + } +} + +/* Set up memory and buses */ +void _starpu_opencl_init_worker_memory(struct _starpu_machine_config *config STARPU_ATTRIBUTE_UNUSED, int no_mp_config STARPU_ATTRIBUTE_UNUSED, struct _starpu_worker *workerarg) +{ + unsigned memory_node = -1; + unsigned devid = workerarg->devid; + unsigned numa; + + if (opencl_memory_init[devid]) + { + memory_node = opencl_memory_nodes[devid]; + } + else + { + opencl_memory_init[devid] = 1; + memory_node = opencl_memory_nodes[devid] = _starpu_memory_node_register(STARPU_OPENCL_RAM, devid); + + for (numa = 0; numa < starpu_memory_nodes_get_numa_count(); numa++) + { + _starpu_register_bus(numa, memory_node); + _starpu_register_bus(memory_node, numa); + } +#ifdef STARPU_SIMGRID + char name[16]; + snprintf(name, sizeof(name), "OpenCL%u", devid); + starpu_sg_host_t host = _starpu_simgrid_get_host_by_name(name); + STARPU_ASSERT(host); + _starpu_simgrid_memory_node_set_host(memory_node, host); +#else + if (_starpu_opencl_get_device_type(workerarg->devid) == CL_DEVICE_TYPE_CPU) + _starpu_memory_node_set_mapped(memory_node); +#endif /* SIMGRID */ + } + _starpu_memory_node_add_nworkers(memory_node); + + //This worker can manage transfers on NUMA nodes + for (numa = 0; numa < starpu_memory_nodes_get_numa_count(); numa++) + _starpu_worker_drives_memory_node(workerarg, numa); + + _starpu_worker_drives_memory_node(workerarg, memory_node); + + workerarg->memory_node = memory_node; +} + +/* Really initialize one device */ +int _starpu_opencl_init_context(int devid) +{ +#ifdef STARPU_SIMGRID + int j; + for (j = 0; j < STARPU_MAX_PIPELINE; j++) + task_finished[devid][j] = 0; +#else /* !STARPU_SIMGRID */ + cl_int err; + cl_uint uint; + + STARPU_PTHREAD_MUTEX_LOCK(&big_lock); + + _STARPU_DEBUG("Initialising context for dev %d\n", devid); + + // Create a compute context + err = 0; + contexts[devid] = clCreateContext(NULL, 1, &devices[devid], NULL, NULL, &err); + _STARPU_OPENCL_CHECK_AND_REPORT_ERROR(err); + + err = clGetDeviceInfo(devices[devid], CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof(uint), &uint, NULL); + _STARPU_OPENCL_CHECK_AND_REPORT_ERROR(err); + starpu_malloc_set_align(uint/8); + + // Create execution queue for the given device + queues[devid] = clCreateCommandQueue(contexts[devid], devices[devid], 0, &err); + _STARPU_OPENCL_CHECK_AND_REPORT_ERROR(err); + + // Create transfer queue for the given device + cl_command_queue_properties props; + err = clGetDeviceInfo(devices[devid], CL_DEVICE_QUEUE_PROPERTIES, sizeof(props), &props, NULL); + _STARPU_OPENCL_CHECK_AND_REPORT_ERROR(err); + + props &= ~CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE; + in_transfer_queues[devid] = clCreateCommandQueue(contexts[devid], devices[devid], props, &err); + _STARPU_OPENCL_CHECK_AND_REPORT_ERROR(err); + + out_transfer_queues[devid] = clCreateCommandQueue(contexts[devid], devices[devid], props, &err); + _STARPU_OPENCL_CHECK_AND_REPORT_ERROR(err); + + peer_transfer_queues[devid] = clCreateCommandQueue(contexts[devid], devices[devid], props, &err); + _STARPU_OPENCL_CHECK_AND_REPORT_ERROR(err); + + alloc_queues[devid] = clCreateCommandQueue(contexts[devid], devices[devid], 0, &err); + _STARPU_OPENCL_CHECK_AND_REPORT_ERROR(err); + + map_queues[devid] = clCreateCommandQueue(contexts[devid], devices[devid], 0, &err); + if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err); + + STARPU_PTHREAD_MUTEX_UNLOCK(&big_lock); +#endif /* !STARPU_SIMGRID */ + return 0; +} + +/* De-initialize one device */ +int _starpu_opencl_deinit_context(int devid) +{ +#ifdef STARPU_SIMGRID + int j; + for (j = 0; j < STARPU_MAX_PIPELINE; j++) + task_finished[devid][j] = 0; +#else /* !STARPU_SIMGRID */ + cl_int err; + + STARPU_PTHREAD_MUTEX_LOCK(&big_lock); + + _STARPU_DEBUG("De-initialising context for dev %d\n", devid); + + err = clFinish(queues[devid]); + _STARPU_OPENCL_CHECK_AND_REPORT_ERROR(err); + + err = clReleaseCommandQueue(queues[devid]); + _STARPU_OPENCL_CHECK_AND_REPORT_ERROR(err); + + err = clFinish(in_transfer_queues[devid]); + _STARPU_OPENCL_CHECK_AND_REPORT_ERROR(err); + + err = clReleaseCommandQueue(in_transfer_queues[devid]); + _STARPU_OPENCL_CHECK_AND_REPORT_ERROR(err); + + err = clFinish(out_transfer_queues[devid]); + _STARPU_OPENCL_CHECK_AND_REPORT_ERROR(err); + + err = clReleaseCommandQueue(out_transfer_queues[devid]); + _STARPU_OPENCL_CHECK_AND_REPORT_ERROR(err); + + err = clFinish(peer_transfer_queues[devid]); + _STARPU_OPENCL_CHECK_AND_REPORT_ERROR(err); + + err = clReleaseCommandQueue(peer_transfer_queues[devid]); + _STARPU_OPENCL_CHECK_AND_REPORT_ERROR(err); + + err = clFinish(alloc_queues[devid]); + _STARPU_OPENCL_CHECK_AND_REPORT_ERROR(err); + + err = clReleaseCommandQueue(alloc_queues[devid]); + _STARPU_OPENCL_CHECK_AND_REPORT_ERROR(err); + + err = clReleaseCommandQueue(map_queues[devid]); + if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err); + + err = clReleaseContext(contexts[devid]); + _STARPU_OPENCL_CHECK_AND_REPORT_ERROR(err); + + contexts[devid] = NULL; + + STARPU_PTHREAD_MUTEX_UNLOCK(&big_lock); +#endif + + return 0; +} + +#ifdef STARPU_USE_OPENCL +#ifndef STARPU_SIMGRID +static unsigned _starpu_opencl_get_device_name(int dev, char *name, int lname) +{ + int err; + + if (!init_done) + { + _starpu_opencl_init(); + } + + // Get device name + err = clGetDeviceInfo(devices[dev], CL_DEVICE_NAME, lname, name, NULL); + _STARPU_OPENCL_CHECK_AND_REPORT_ERROR(err); + + _STARPU_DEBUG("Device %d : [%s]\n", dev, name); + return EXIT_SUCCESS; +} +#endif +#endif + +static void _starpu_opencl_limit_gpu_mem_if_needed(unsigned devid) +{ + starpu_ssize_t limit; + size_t STARPU_ATTRIBUTE_UNUSED totalGlobalMem = 0; + size_t STARPU_ATTRIBUTE_UNUSED to_waste = 0; + +#ifdef STARPU_SIMGRID + totalGlobalMem = _starpu_simgrid_get_memsize("OpenCL", devid); +#elif defined(STARPU_USE_OPENCL) + /* Request the size of the current device's memory */ + cl_int err; + cl_ulong size; + err = clGetDeviceInfo(devices[devid], CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(size), &size, NULL); + _STARPU_OPENCL_CHECK_AND_REPORT_ERROR(err); + totalGlobalMem = size; +#endif + + limit = starpu_getenv_number("STARPU_LIMIT_OPENCL_MEM"); + if (limit == -1) + { + char name[30]; + snprintf(name, sizeof(name), "STARPU_LIMIT_OPENCL_%u_MEM", devid); + limit = starpu_getenv_number(name); + } +#if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID) + if (limit == -1) + { + /* Use 90% of the available memory by default. */ + limit = totalGlobalMem / (1024*1024) * 0.9; + } +#endif + + global_mem[devid] = limit * 1024*1024; + +#ifdef STARPU_USE_OPENCL + /* How much memory to waste ? */ + to_waste = totalGlobalMem - global_mem[devid]; +#endif + + _STARPU_DEBUG("OpenCL device %u: Wasting %ld MB / Limit %ld MB / Total %ld MB / Remains %ld MB\n", + devid, (long)to_waste/(1024*1024), (long) limit, (long)totalGlobalMem/(1024*1024), + (long)(totalGlobalMem - to_waste)/(1024*1024)); + +} + +/* This is run from the driver thread to initialize the driver OpenCL context */ +static int _starpu_opencl_driver_init(struct _starpu_worker *worker) +{ + int devid = worker->devid; + +#ifdef STARPU_PROF_TOOL + struct starpu_prof_tool_info pi; + pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_driver_init, devid, worker->workerid, starpu_prof_tool_driver_ocl, worker->memory_node, NULL); + starpu_prof_tool_callbacks.starpu_prof_tool_event_driver_init(&pi, NULL, NULL); + pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_driver_init_start, devid, worker->workerid, starpu_prof_tool_driver_ocl, worker->memory_node, NULL); + starpu_prof_tool_callbacks.starpu_prof_tool_event_driver_init_start(&pi, NULL, NULL); +#endif + + _starpu_driver_start(worker, STARPU_OPENCL_WORKER, 0); + + _starpu_opencl_init_context(devid); + + /* one more time to avoid hacks from third party lib :) */ + _starpu_bind_thread_on_cpu(worker->bindid, worker->workerid, NULL); + + _starpu_opencl_limit_gpu_mem_if_needed(devid); + _starpu_memory_manager_set_global_memory_size(worker->memory_node, _starpu_opencl_get_global_mem_size(devid)); + + float size = (float) global_mem[devid] / (1<<30); + +#ifdef STARPU_SIMGRID + const char *devname = _starpu_simgrid_get_devname("OpenCL", devid); + if (!devname) + devname = "Simgrid"; +#else + /* get the device's name */ + char devname[64]; + _starpu_opencl_get_device_name(devid, devname, 64); +#endif + snprintf(worker->name, sizeof(worker->name), "OpenCL %d (%s %.1f GiB)", devid, devname, size); + snprintf(worker->short_name, sizeof(worker->short_name), "OpenCL %d", devid); + starpu_pthread_setname(worker->short_name); + + worker->pipeline_length = starpu_getenv_number_default("STARPU_OPENCL_PIPELINE", 2); + if (worker->pipeline_length > STARPU_MAX_PIPELINE) + { + _STARPU_DISP("Warning: STARPU_OPENCL_PIPELINE is %u, but STARPU_MAX_PIPELINE is only %u\n", worker->pipeline_length, STARPU_MAX_PIPELINE); + worker->pipeline_length = STARPU_MAX_PIPELINE; + } +#if !defined(STARPU_SIMGRID) && !defined(STARPU_NON_BLOCKING_DRIVERS) + if (worker->pipeline_length >= 1) + { + /* We need non-blocking drivers, to poll for OPENCL task + * termination */ + _STARPU_DISP("Warning: reducing STARPU_OPENCL_PIPELINE to 0 because blocking drivers are enabled (and simgrid is not enabled)\n"); + worker->pipeline_length = 0; + } +#endif + + _STARPU_DEBUG("OpenCL (%s) dev id %d thread is ready to run on CPU %d !\n", devname, devid, worker->bindid); + +#ifdef STARPU_PROF_TOOL + pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_driver_init_end, devid, worker->workerid, starpu_prof_tool_driver_ocl, 0, NULL); + starpu_prof_tool_callbacks.starpu_prof_tool_event_driver_init_end(&pi, NULL, NULL); +#endif + + _STARPU_TRACE_WORKER_INIT_END(worker->workerid); + + /* tell the main thread that this one is ready */ + STARPU_PTHREAD_MUTEX_LOCK(&worker->mutex); + worker->status = STATUS_UNKNOWN; + worker->worker_is_initialized = 1; + STARPU_PTHREAD_COND_SIGNAL(&worker->ready_cond); + STARPU_PTHREAD_MUTEX_UNLOCK(&worker->mutex); + + return 0; +} + +static int _starpu_opencl_driver_deinit(struct _starpu_worker *worker) +{ + _STARPU_TRACE_WORKER_DEINIT_START; + + unsigned memnode = worker->memory_node; + + _starpu_datawizard_handle_all_pending_node_data_requests(memnode); + + /* In case there remains some memory that was automatically + * allocated by StarPU, we release it now. Note that data + * coherency is not maintained anymore at that point ! */ + _starpu_free_all_automatically_allocated_buffers(memnode); + + _starpu_malloc_shutdown(memnode); + + unsigned devid = worker->devid; + _starpu_opencl_deinit_context(devid); + + worker->worker_is_initialized = 0; + _STARPU_TRACE_WORKER_DEINIT_END(STARPU_OPENCL_WORKER); +#ifdef STARPU_PROF_TOOL + struct starpu_prof_tool_info pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_driver_deinit, worker->workerid, worker->workerid, starpu_prof_tool_driver_ocl, memnode, NULL); + starpu_prof_tool_callbacks.starpu_prof_tool_event_driver_deinit(&pi, NULL, NULL); +#endif + + return 0; +} + +#ifdef STARPU_USE_OPENCL +cl_int starpu_opencl_allocate_memory(int devid STARPU_ATTRIBUTE_UNUSED, cl_mem *mem STARPU_ATTRIBUTE_UNUSED, size_t size STARPU_ATTRIBUTE_UNUSED, cl_mem_flags flags STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_SIMGRID + STARPU_ABORT(); +#else + cl_int err; + cl_mem memory; + + memory = clCreateBuffer(contexts[devid], flags, size, NULL, &err); + if (err == CL_OUT_OF_HOST_MEMORY) + return err; + if (err == CL_MEM_OBJECT_ALLOCATION_FAILURE) + return err; + _STARPU_OPENCL_CHECK_AND_REPORT_ERROR(err); + + /* + * OpenCL uses lazy memory allocation: we will only know if the + * allocation failed when trying to copy data onto the device. But we + * want to know this __now__, so we just perform a dummy copy. + */ + char dummy = 0; + cl_event ev; + err = clEnqueueWriteBuffer(alloc_queues[devid], memory, CL_TRUE, + 0, sizeof(dummy), &dummy, + 0, NULL, &ev); + if (err == CL_MEM_OBJECT_ALLOCATION_FAILURE) + return err; + if (err == CL_OUT_OF_RESOURCES) + return err; + _STARPU_OPENCL_CHECK_AND_REPORT_ERROR(err); + + clWaitForEvents(1, &ev); + clReleaseEvent(ev); + + *mem = memory; + return CL_SUCCESS; +#endif +} +#endif + +static uintptr_t _starpu_opencl_malloc_on_node(unsigned dst_node, size_t size, int flags) +{ + (void)flags; + uintptr_t addr = 0; +#ifdef STARPU_SIMGRID + static uintptr_t last[STARPU_MAXNODES]; + /* Sleep for the allocation */ + STARPU_PTHREAD_MUTEX_LOCK(&opencl_alloc_mutex); + if (_starpu_simgrid_cuda_malloc_cost()) + starpu_sleep(0.000175); + if (!last[dst_node]) + last[dst_node] = 1<<10; + addr = last[dst_node]; + last[dst_node]+=size; + STARPU_ASSERT(last[dst_node] >= addr); + STARPU_PTHREAD_MUTEX_UNLOCK(&opencl_alloc_mutex); +#else + int ret; + cl_mem ptr; + + ret = starpu_opencl_allocate_memory(starpu_memory_node_get_devid(dst_node), &ptr, size, CL_MEM_READ_WRITE); + if (ret) + { + addr = 0; + } + else + { + addr = (uintptr_t)ptr; + } +#endif + return addr; +} + +static void _starpu_opencl_free_on_node(unsigned dst_node, uintptr_t addr, size_t size, int flags) +{ + (void)dst_node; + (void)addr; + (void)size; + (void)flags; +#ifdef STARPU_SIMGRID + STARPU_PTHREAD_MUTEX_LOCK(&opencl_alloc_mutex); + /* Sleep for the free */ + if (_starpu_simgrid_cuda_malloc_cost()) + starpu_sleep(0.000750); + STARPU_PTHREAD_MUTEX_UNLOCK(&opencl_alloc_mutex); +#else + cl_int err; + err = clReleaseMemObject((void*)addr); + if (STARPU_UNLIKELY(err != CL_SUCCESS)) + STARPU_OPENCL_REPORT_ERROR(err); +#endif +} + +#ifdef STARPU_USE_OPENCL +cl_int starpu_opencl_copy_ram_to_opencl(void *ptr, unsigned src_node STARPU_ATTRIBUTE_UNUSED, cl_mem buffer, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, size_t size, size_t offset, cl_event *event, int *ret) +{ + cl_int err; + struct _starpu_worker *worker = _starpu_get_local_worker_key(); + double start = 0.; + + if (event) + starpu_interface_start_driver_copy_async(src_node, dst_node, &start); + + cl_event ev; + err = clEnqueueWriteBuffer(in_transfer_queues[worker->devid], buffer, CL_FALSE, offset, size, ptr, 0, NULL, &ev); + + if (event) + starpu_interface_end_driver_copy_async(src_node, dst_node, start); + + if (STARPU_LIKELY(err == CL_SUCCESS)) + { + if (event == NULL) + { + /* We want a synchronous copy, let's synchronise the queue */ + err = clWaitForEvents(1, &ev); + _STARPU_OPENCL_CHECK_AND_REPORT_ERROR(err); + + err = clReleaseEvent(ev); + _STARPU_OPENCL_CHECK_AND_REPORT_ERROR(err); + } + else + { + clFlush(in_transfer_queues[worker->devid]); + *event = ev; + } + + if (ret) + { + *ret = (event == NULL) ? 0 : -EAGAIN; + } + } + return err; +} + +cl_int starpu_opencl_copy_opencl_to_ram(cl_mem buffer, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *ptr, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, size_t size, size_t offset, cl_event *event, int *ret) +{ + cl_int err; + struct _starpu_worker *worker = _starpu_get_local_worker_key(); + double start = 0.; + + if (event) + starpu_interface_start_driver_copy_async(src_node, dst_node, &start); + cl_event ev; + err = clEnqueueReadBuffer(out_transfer_queues[worker->devid], buffer, CL_FALSE, offset, size, ptr, 0, NULL, &ev); + if (event) + starpu_interface_end_driver_copy_async(src_node, dst_node, start); + if (STARPU_LIKELY(err == CL_SUCCESS)) + { + if (event == NULL) + { + /* We want a synchronous copy, let's synchronise the queue */ + err = clWaitForEvents(1, &ev); + _STARPU_OPENCL_CHECK_AND_REPORT_ERROR(err); + + err = clReleaseEvent(ev); + _STARPU_OPENCL_CHECK_AND_REPORT_ERROR(err); + } + else + { + clFlush(out_transfer_queues[worker->devid]); + *event = ev; + } + + if (ret) + { + *ret = (event == NULL) ? 0 : -EAGAIN; + } + } + return err; +} + +cl_int starpu_opencl_copy_opencl_to_opencl(cl_mem src, unsigned src_node STARPU_ATTRIBUTE_UNUSED, size_t src_offset, cl_mem dst, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, size_t dst_offset, size_t size, cl_event *event, int *ret) +{ + cl_int err; + struct _starpu_worker *worker = _starpu_get_local_worker_key(); + double start = 0.; + + if (event) + starpu_interface_start_driver_copy_async(src_node, dst_node, &start); + cl_event ev; + err = clEnqueueCopyBuffer(peer_transfer_queues[worker->devid], src, dst, src_offset, dst_offset, size, 0, NULL, &ev); + if (event) + starpu_interface_end_driver_copy_async(src_node, dst_node, start); + if (STARPU_LIKELY(err == CL_SUCCESS)) + { + if (event == NULL) + { + /* We want a synchronous copy, let's synchronise the queue */ + err = clWaitForEvents(1, &ev); + _STARPU_OPENCL_CHECK_AND_REPORT_ERROR(err); + + err = clReleaseEvent(ev); + _STARPU_OPENCL_CHECK_AND_REPORT_ERROR(err); + } + else + { + clFlush(peer_transfer_queues[worker->devid]); + *event = ev; + } + + if (ret) + { + *ret = (event == NULL) ? 0 : -EAGAIN; + } + } + return err; +} + +cl_int starpu_opencl_copy_async_sync(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, cl_event *event) +{ + enum starpu_node_kind src_kind = starpu_node_get_kind(src_node); + enum starpu_node_kind dst_kind = starpu_node_get_kind(dst_node); + cl_int err; + int ret; + + if (src_kind == STARPU_OPENCL_RAM && dst_kind == STARPU_CPU_RAM) + { + err = starpu_opencl_copy_opencl_to_ram((cl_mem) src, src_node, + (void*) (dst + dst_offset), dst_node, + size, src_offset, event, &ret); + _STARPU_OPENCL_CHECK_AND_REPORT_ERROR(err); + return ret; + } + + if (src_kind == STARPU_CPU_RAM && dst_kind == STARPU_OPENCL_RAM) + { + err = starpu_opencl_copy_ram_to_opencl((void*) (src + src_offset), src_node, + (cl_mem) dst, dst_node, + size, dst_offset, event, &ret); + _STARPU_OPENCL_CHECK_AND_REPORT_ERROR(err); + return ret; + } + + if (src_kind == STARPU_OPENCL_RAM && (dst_kind == STARPU_CPU_RAM || dst_kind == STARPU_OPENCL_RAM)) + { + err = starpu_opencl_copy_opencl_to_opencl((cl_mem) src, src_node, src_offset, + (cl_mem) dst, dst_node, dst_offset, + size, event, &ret); + _STARPU_OPENCL_CHECK_AND_REPORT_ERROR(err); + return ret; + } + + STARPU_ABORT(); +} + +static inline cl_event *_starpu_opencl_event(union _starpu_async_channel_event *_event) +{ + cl_event *event; + STARPU_STATIC_ASSERT(sizeof(*event) <= sizeof(*_event)); + event = (void *) _event; + return event; +} + +static int _starpu_opencl_copy_data_from_opencl_to_opencl(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel) +{ + int src_kind = starpu_node_get_kind(src_node); + int dst_kind = starpu_node_get_kind(dst_node); + STARPU_ASSERT(src_kind == STARPU_OPENCL_RAM && dst_kind == STARPU_OPENCL_RAM); + + return starpu_opencl_copy_async_sync(src, src_offset, src_node, + dst, dst_offset, dst_node, + size, + _starpu_opencl_event(&async_channel->event)); +} + +static int _starpu_opencl_copy_data_from_opencl_to_cpu(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel) +{ + int src_kind = starpu_node_get_kind(src_node); + int dst_kind = starpu_node_get_kind(dst_node); + STARPU_ASSERT(src_kind == STARPU_OPENCL_RAM && dst_kind == STARPU_CPU_RAM); + + return starpu_opencl_copy_async_sync(src, src_offset, src_node, + dst, dst_offset, dst_node, + size, + _starpu_opencl_event(&async_channel->event)); +} + +static int _starpu_opencl_copy_data_from_cpu_to_opencl(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel) +{ + int src_kind = starpu_node_get_kind(src_node); + int dst_kind = starpu_node_get_kind(dst_node); + STARPU_ASSERT(src_kind == STARPU_CPU_RAM && dst_kind == STARPU_OPENCL_RAM); + + return starpu_opencl_copy_async_sync(src, src_offset, src_node, + dst, dst_offset, dst_node, + size, + _starpu_opencl_event(&async_channel->event)); +} + +#if 0 +static cl_int _starpu_opencl_copy_rect_opencl_to_ram(cl_mem buffer, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *ptr, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, const size_t buffer_origin[3], const size_t host_origin[3], + const size_t region[3], size_t buffer_row_pitch, size_t buffer_slice_pitch, + size_t host_row_pitch, size_t host_slice_pitch, cl_event *event) +{ + cl_int err; + struct _starpu_worker *worker = _starpu_get_local_worker_key(); + cl_bool blocking; + double start = 0.; + + blocking = (event == NULL) ? CL_TRUE : CL_FALSE; + if (event) + starpu_interface_start_driver_copy_async(src_node, dst_node, &start); + err = clEnqueueReadBufferRect(out_transfer_queues[worker->devid], buffer, blocking, buffer_origin, host_origin, region, buffer_row_pitch, + buffer_slice_pitch, host_row_pitch, host_slice_pitch, ptr, 0, NULL, event); + clFlush(out_transfer_queues[worker->devid]); + if (event) + starpu_interface_end_driver_copy_async(src_node, dst_node, start); + _STARPU_OPENCL_CHECK_AND_REPORT_ERROR(err); + + return CL_SUCCESS; +} + +static cl_int _starpu_opencl_copy_rect_ram_to_opencl(void *ptr, unsigned src_node STARPU_ATTRIBUTE_UNUSED, cl_mem buffer, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, const size_t buffer_origin[3], const size_t host_origin[3], + const size_t region[3], size_t buffer_row_pitch, size_t buffer_slice_pitch, + size_t host_row_pitch, size_t host_slice_pitch, cl_event *event) +{ + cl_int err; + struct _starpu_worker *worker = _starpu_get_local_worker_key(); + cl_bool blocking; + double start = 0.; + + blocking = (event == NULL) ? CL_TRUE : CL_FALSE; + if (event) + starpu_interface_start_driver_copy_async(src_node, dst_node, &start); + err = clEnqueueWriteBufferRect(in_transfer_queues[worker->devid], buffer, blocking, buffer_origin, host_origin, region, buffer_row_pitch, + buffer_slice_pitch, host_row_pitch, host_slice_pitch, ptr, 0, NULL, event); + clFlush(in_transfer_queues[worker->devid]); + if (event) + starpu_interface_end_driver_copy_async(src_node, dst_node, start); + _STARPU_OPENCL_CHECK_AND_REPORT_ERROR(err); + + return CL_SUCCESS; +} +#endif + +static unsigned _starpu_opencl_test_request_completion(struct _starpu_async_channel *async_channel) +{ + cl_int event_status; + cl_event opencl_event = *_starpu_opencl_event(&async_channel->event); + if (opencl_event == NULL) STARPU_ABORT(); + cl_int err = clGetEventInfo(opencl_event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(event_status), &event_status, NULL); + if (STARPU_UNLIKELY(err != CL_SUCCESS)) + STARPU_OPENCL_REPORT_ERROR(err); + if (event_status < 0) + STARPU_OPENCL_REPORT_ERROR(event_status); + if (event_status == CL_COMPLETE) + { + err = clReleaseEvent(opencl_event); + if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err); + } + return (event_status == CL_COMPLETE); +} + +/* Only used at starpu_shutdown */ +static void _starpu_opencl_wait_request_completion(struct _starpu_async_channel *async_channel) +{ + cl_int err; + if (*_starpu_opencl_event(&async_channel->event) == NULL) + STARPU_ABORT(); + err = clWaitForEvents(1, _starpu_opencl_event(&async_channel->event)); + if (STARPU_UNLIKELY(err != CL_SUCCESS)) + STARPU_OPENCL_REPORT_ERROR(err); + err = clReleaseEvent(*_starpu_opencl_event(&async_channel->event)); + if (STARPU_UNLIKELY(err != CL_SUCCESS)) + STARPU_OPENCL_REPORT_ERROR(err); +} + +static int _starpu_opencl_copy_interface_from_opencl_to_opencl(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req) +{ + int src_kind = starpu_node_get_kind(src_node); + int dst_kind = starpu_node_get_kind(dst_node); + STARPU_ASSERT(src_kind == STARPU_OPENCL_RAM && dst_kind == STARPU_OPENCL_RAM); + + int ret = 1; + const struct starpu_data_copy_methods *copy_methods = handle->ops->copy_methods; + /* STARPU_OPENCL_RAM -> STARPU_OPENCL_RAM */ + STARPU_ASSERT(starpu_worker_get_local_memory_node() == dst_node || starpu_worker_get_local_memory_node() == src_node); + if (!req || starpu_asynchronous_copy_disabled() || starpu_asynchronous_opencl_copy_disabled() || !(copy_methods->opencl_to_opencl_async || copy_methods->any_to_any)) + { + STARPU_ASSERT(copy_methods->opencl_to_opencl || copy_methods->any_to_any); + /* this is not associated to a request so it's synchronous */ + if (copy_methods->opencl_to_opencl) + copy_methods->opencl_to_opencl(src_interface, src_node, dst_interface, dst_node); + else + copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, NULL); + } + else + { + req->async_channel.node_ops = &_starpu_driver_opencl_node_ops; + if (copy_methods->opencl_to_opencl_async) + ret = copy_methods->opencl_to_opencl_async(src_interface, src_node, dst_interface, dst_node, _starpu_opencl_event(&req->async_channel.event)); + else + { + STARPU_ASSERT(copy_methods->any_to_any); + ret = copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, &req->async_channel); + } + } + return ret; +} + +static int _starpu_opencl_copy_interface_from_opencl_to_cpu(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req) +{ + int src_kind = starpu_node_get_kind(src_node); + int dst_kind = starpu_node_get_kind(dst_node); + STARPU_ASSERT(src_kind == STARPU_OPENCL_RAM && dst_kind == STARPU_CPU_RAM); + + int ret = 1; + const struct starpu_data_copy_methods *copy_methods = handle->ops->copy_methods; + /* OpenCL -> RAM */ + STARPU_ASSERT(starpu_worker_get_local_memory_node() == src_node); + if (!req || starpu_asynchronous_copy_disabled() || starpu_asynchronous_opencl_copy_disabled() || !(copy_methods->opencl_to_ram_async || copy_methods->any_to_any)) + { + STARPU_ASSERT(copy_methods->opencl_to_ram || copy_methods->any_to_any); + /* this is not associated to a request so it's synchronous */ + if (copy_methods->opencl_to_ram) + copy_methods->opencl_to_ram(src_interface, src_node, dst_interface, dst_node); + else + copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, NULL); + } + else + { + req->async_channel.node_ops = &_starpu_driver_opencl_node_ops; + if (copy_methods->opencl_to_ram_async) + ret = copy_methods->opencl_to_ram_async(src_interface, src_node, dst_interface, dst_node, _starpu_opencl_event(&req->async_channel.event)); + else + { + STARPU_ASSERT(copy_methods->any_to_any); + ret = copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, &req->async_channel); + } + } + return ret; +} + +static int _starpu_opencl_copy_interface_from_cpu_to_opencl(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req) +{ + int src_kind = starpu_node_get_kind(src_node); + int dst_kind = starpu_node_get_kind(dst_node); + STARPU_ASSERT(src_kind == STARPU_CPU_RAM && dst_kind == STARPU_OPENCL_RAM); + + int ret = 0; + const struct starpu_data_copy_methods *copy_methods = handle->ops->copy_methods; + /* STARPU_CPU_RAM -> STARPU_OPENCL_RAM */ + STARPU_ASSERT(starpu_worker_get_local_memory_node() == dst_node); + if (!req || starpu_asynchronous_copy_disabled() || starpu_asynchronous_opencl_copy_disabled() || !(copy_methods->ram_to_opencl_async || copy_methods->any_to_any)) + { + STARPU_ASSERT(copy_methods->ram_to_opencl || copy_methods->any_to_any); + /* this is not associated to a request so it's synchronous */ + if (copy_methods->ram_to_opencl) + copy_methods->ram_to_opencl(src_interface, src_node, dst_interface, dst_node); + else + copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, NULL); + } + else + { + req->async_channel.node_ops = &_starpu_driver_opencl_node_ops; + if (copy_methods->ram_to_opencl_async) + ret = copy_methods->ram_to_opencl_async(src_interface, src_node, dst_interface, dst_node, _starpu_opencl_event(&req->async_channel.event)); + else + { + STARPU_ASSERT(copy_methods->any_to_any); + ret = copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, &req->async_channel); + } + } + return ret; +} + +static uintptr_t +_starpu_opencl_map_ram(uintptr_t src, size_t src_offset, unsigned src_node STARPU_ATTRIBUTE_UNUSED, + unsigned dst_node STARPU_ATTRIBUTE_UNUSED, size_t size, int *ret) +{ + cl_int err; + cl_mem memory; + struct _starpu_worker *worker = _starpu_get_local_worker_key(); + + *ret = -EIO; + + if (starpu_node_get_kind(src_node) != STARPU_CPU_RAM) + return 0; + + STARPU_ASSERT(dst_node == worker->memory_node); + + memory = clCreateBuffer(contexts[worker->devid], CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, size, (void*)(src + src_offset), &err); + if (err == CL_OUT_OF_HOST_MEMORY) return 0; + if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); + + return (uintptr_t)memory; +} + +static int +_starpu_opencl_unmap_ram(uintptr_t src STARPU_ATTRIBUTE_UNUSED, size_t src_offset STARPU_ATTRIBUTE_UNUSED, unsigned src_node STARPU_ATTRIBUTE_UNUSED, + uintptr_t dst, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, size_t size STARPU_ATTRIBUTE_UNUSED) +{ + cl_int err; + struct _starpu_worker *worker = _starpu_get_local_worker_key(); + + STARPU_ASSERT(dst_node == worker->memory_node); + + err = clReleaseMemObject((cl_mem) dst); + if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); + + return 0; +} + +static int +_starpu_opencl_update_opencl_map(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size) +{ + (void) size; + (void) src_node; + + cl_int err; + struct _starpu_worker *worker = _starpu_get_local_worker_key(); + + STARPU_ASSERT(dst_offset == 0); + STARPU_ASSERT(dst_node == worker->memory_node); + + cl_event ev; + err = clEnqueueUnmapMemObject(map_queues[worker->devid], (cl_mem) (dst + dst_offset), (void*) (src + src_offset), 0, NULL, &ev); + + if (STARPU_UNLIKELY(err)) + STARPU_OPENCL_REPORT_ERROR(err); + + /* We want a synchronous update, let's synchronise the queue */ + err = clWaitForEvents(1, &ev); + if (STARPU_UNLIKELY(err)) + STARPU_OPENCL_REPORT_ERROR(err); + err = clReleaseEvent(ev); + if (STARPU_UNLIKELY(err)) + STARPU_OPENCL_REPORT_ERROR(err); + + return 0; +} + +static int +_starpu_opencl_update_cpu_map(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size) +{ + (void) size; + (void) dst_node; + + cl_int err; + struct _starpu_worker *worker = _starpu_get_local_worker_key(); + + STARPU_ASSERT(src_offset == 0); + STARPU_ASSERT(src_node == worker->memory_node); + + cl_event ev; + void *ptr = clEnqueueMapBuffer(map_queues[worker->devid], (cl_mem) (src + src_offset), CL_FALSE, CL_MAP_READ | CL_MAP_WRITE, 0, size, 0, NULL, &ev, &err); + + if (STARPU_UNLIKELY(!ptr)) + STARPU_OPENCL_REPORT_ERROR(err); + + /* We want a synchronous update, let's synchronise the queue */ + err = clWaitForEvents(1, &ev); + if (STARPU_UNLIKELY(err)) + STARPU_OPENCL_REPORT_ERROR(err); + err = clReleaseEvent(ev); + if (STARPU_UNLIKELY(err)) + STARPU_OPENCL_REPORT_ERROR(err); + + STARPU_ASSERT((uintptr_t) ptr == (dst + dst_offset)); + + return 0; +} + +#endif /* STARPU_USE_OPENCL */ + +static int _starpu_opencl_is_direct_access_supported(unsigned node, unsigned handling_node) +{ + (void)node; + (void)handling_node; + return 0; +} + +static int _starpu_opencl_start_job(struct _starpu_job *j, struct _starpu_worker *worker, unsigned char pipeline_idx STARPU_ATTRIBUTE_UNUSED) +{ + STARPU_ASSERT(j); + struct starpu_task *task = j->task; + + int profiling = starpu_profiling_status_get(); + + STARPU_ASSERT(task); + struct starpu_codelet *cl = task->cl; + STARPU_ASSERT(cl); + + _starpu_set_current_task(task); + j->workerid = worker->workerid; + + if (worker->ntasks == 1) + { + /* We are alone in the pipeline, the kernel will start now, record it */ + _starpu_driver_start_job(worker, j, &worker->perf_arch, 0, profiling); + } + + starpu_opencl_func_t func = _starpu_task_get_opencl_nth_implementation(cl, j->nimpl); + STARPU_ASSERT_MSG(func, "when STARPU_OPENCL is defined in 'where', opencl_func or opencl_funcs has to be defined"); + + if (_starpu_get_disable_kernels() <= 0) + { + _STARPU_TRACE_START_EXECUTING(j); +#ifdef STARPU_SIMGRID + double length = NAN; + double energy = NAN; + int async = task->cl->opencl_flags[j->nimpl] & STARPU_OPENCL_ASYNC; + int simulate = 1; + if (cl->flags & STARPU_CODELET_SIMGRID_EXECUTE && !async) + { + /* Actually execute function */ + simulate = 0; + func(_STARPU_TASK_GET_INTERFACES(task), task->cl_arg); +#ifdef STARPU_OPENCL_SIMULATOR +#ifndef CL_PROFILING_CLOCK_CYCLE_COUNT +#ifdef CL_PROFILING_COMMAND_SHAVE_CYCLE_COUNT +#define CL_PROFILING_CLOCK_CYCLE_COUNT CL_PROFILING_COMMAND_SHAVE_CYCLE_COUNT +#else +#error The OpenCL simulator must provide CL_PROFILING_CLOCK_CYCLE_COUNT +#endif +#endif + struct starpu_profiling_task_info *profiling_info = task->profiling_info; + STARPU_ASSERT_MSG(profiling_info->used_cycles, "Application kernel must call starpu_opencl_collect_stats to collect simulated time"); +#if defined(HAVE_SG_HOST_SPEED) || defined(sg_host_speed) +# if defined(HAVE_SG_HOST_SELF) || defined(sg_host_self) + length = ((double) profiling_info->used_cycles)/sg_host_speed(sg_host_self()); +# else + length = ((double) profiling_info->used_cycles)/sg_host_speed(MSG_host_self()); +# endif +#elif defined HAVE_MSG_HOST_GET_SPEED || defined(MSG_host_get_speed) + length = ((double) profiling_info->used_cycles)/MSG_host_get_speed(MSG_host_self()); +#else + length = ((double) profiling_info->used_cycles)/MSG_get_host_speed(MSG_host_self()); +#endif + energy = info->energy_consumed; + /* And give the simulated time to simgrid */ + simulate = 1; +#endif + } + else if (cl->flags & STARPU_CODELET_SIMGRID_EXECUTE_AND_INJECT && !async) + { + _SIMGRID_TIMER_BEGIN(1); + func(_STARPU_TASK_GET_INTERFACES(task), task->cl_arg); + _SIMGRID_TIMER_END; + simulate=0; + } + + if (simulate) + { + struct _starpu_sched_ctx *sched_ctx = _starpu_sched_ctx_get_sched_ctx_for_worker_and_job(worker, j); + _starpu_simgrid_submit_job(sched_ctx->id, worker->workerid, j, &worker->perf_arch, length, energy, + async ? &task_finished[worker->devid][pipeline_idx] : NULL); + } +#else +#ifdef STARPU_PROF_TOOL + struct starpu_prof_tool_info pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_start_gpu_exec, worker->devid, worker->workerid, starpu_prof_tool_driver_ocl, -1, (void*)func); + starpu_prof_tool_callbacks.starpu_prof_tool_event_start_gpu_exec(&pi, NULL, NULL); +#endif + func(_STARPU_TASK_GET_INTERFACES(task), task->cl_arg); +#ifdef STARPU_PROF_TOOL + pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_end_gpu_exec, worker->devid, worker->workerid, starpu_prof_tool_driver_ocl, -1, (void*)func); + starpu_prof_tool_callbacks.starpu_prof_tool_event_end_gpu_exec(&pi, NULL, NULL); +#endif + + cl_command_queue queue; + starpu_opencl_get_queue(worker->devid, &queue); +#endif + _STARPU_TRACE_END_EXECUTING(j); + } + return 0; +} + +static void _starpu_opencl_stop_job(struct _starpu_job *j, struct _starpu_worker *worker); + +static void _starpu_opencl_execute_job(struct starpu_task *task, struct _starpu_worker *worker) +{ + int res; + + struct _starpu_job *j = _starpu_get_job_associated_to_task(task); + + unsigned char pipeline_idx = (worker->first_task + worker->ntasks - 1)%STARPU_MAX_PIPELINE; + + res = _starpu_opencl_start_job(j, worker, pipeline_idx); + + if (res) + { + switch (res) + { + case -EAGAIN: + _STARPU_DISP("ouch, OpenCL could not actually run task %p, putting it back...\n", task); + _starpu_push_task_to_workers(task); + STARPU_ABORT(); + default: + STARPU_ABORT(); + } + } + + if (task->cl->opencl_flags[j->nimpl] & STARPU_OPENCL_ASYNC) + { + /* Record event to synchronize with task termination later */ +#ifndef STARPU_SIMGRID + cl_command_queue queue; + starpu_opencl_get_queue(worker->devid, &queue); +#endif + + if (worker->pipeline_length == 0) + { +#ifdef STARPU_SIMGRID + _starpu_simgrid_wait_tasks(worker->workerid); +#else + starpu_opencl_get_queue(worker->devid, &queue); + clFinish(queue); +#endif + _starpu_opencl_stop_job(j, worker); + } + else + { +#ifndef STARPU_SIMGRID + int err; + /* the function clEnqueueMarker is deprecated from + * OpenCL version 1.2. We would like to use the new + * function clEnqueueMarkerWithWaitList. We could do + * it by checking its availability through our own + * configure macro HAVE_CLENQUEUEMARKERWITHWAITLIST + * and the OpenCL macro CL_VERSION_1_2. However these + * 2 macros detect the function availability in the + * ICD and not in the device implementation. + */ + err = clEnqueueMarker(queue, &task_events[worker->devid][pipeline_idx]); + _STARPU_OPENCL_CHECK_AND_REPORT_ERROR(err); + clFlush(queue); +#endif + } + } + else + /* Synchronous execution */ + { + _starpu_opencl_stop_job(j, worker); + } +} + +static void _starpu_opencl_stop_job(struct _starpu_job *j, struct _starpu_worker *worker) +{ + int profiling = starpu_profiling_status_get(); + + _starpu_set_current_task(NULL); + if (worker->pipeline_length) + worker->current_tasks[worker->first_task] = NULL; + else + worker->current_task = NULL; + worker->first_task = (worker->first_task + 1) % STARPU_MAX_PIPELINE; + worker->ntasks--; + + _starpu_driver_end_job(worker, j, &worker->perf_arch, 0, profiling); + + struct _starpu_sched_ctx *sched_ctx = _starpu_sched_ctx_get_sched_ctx_for_worker_and_job(worker, j); + STARPU_ASSERT_MSG(sched_ctx != NULL, "there should be a worker %d in the ctx of this job \n", worker->workerid); + if(!sched_ctx->sched_policy) + _starpu_driver_update_job_feedback(j, worker, &sched_ctx->perf_arch, profiling); + else + _starpu_driver_update_job_feedback(j, worker, &worker->perf_arch, profiling); + + _starpu_push_task_output(j); + + _starpu_handle_job_termination(j); + +} + +static int _starpu_opencl_driver_run_once(struct _starpu_worker *worker) +{ + int workerid = worker->workerid; + unsigned memnode = worker->memory_node; + + struct _starpu_job *j; + struct starpu_task *task; + int res; +#ifdef STARPU_PROF_TOOL + struct starpu_prof_tool_info pi; +#endif + + int idle_tasks, idle_transfers; + +#ifdef STARPU_SIMGRID + starpu_pthread_wait_reset(&worker->wait); +#endif + + idle_tasks = 0; + idle_transfers = 0; + + /* First test for transfers pending for next task */ + task = worker->task_transferring; + if (!task) + idle_transfers++; + if (task && worker->nb_buffers_transferred == worker->nb_buffers_totransfer) + { + STARPU_RMB(); +#ifdef STARPU_PROF_TOOL + pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_end_transfer, workerid, workerid, starpu_prof_tool_driver_ocl, memnode, NULL); + starpu_prof_tool_callbacks.starpu_prof_tool_event_end_transfer(&pi, NULL, NULL); +#endif + _STARPU_TRACE_END_PROGRESS(memnode); + j = _starpu_get_job_associated_to_task(task); + + _starpu_fetch_task_input_tail(task, j, worker); + /* Reset it */ + worker->task_transferring = NULL; + + if (worker->ntasks > 1 && !(task->cl->opencl_flags[j->nimpl] & STARPU_OPENCL_ASYNC)) + { + /* We have to execute a non-asynchronous task but we + * still have tasks in the pipeline... Record it to + * prevent more tasks from coming, and do it later */ + worker->pipeline_stuck = 1; + return 0; + } + + _starpu_opencl_execute_job(task, worker); +#ifdef STARPU_PROF_TOOL + pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_end_transfer, workerid, workerid, starpu_prof_tool_driver_ocl, memnode, NULL); + starpu_prof_tool_callbacks.starpu_prof_tool_event_end_transfer(&pi, NULL, NULL); +#endif + _STARPU_TRACE_START_PROGRESS(memnode); + } + + /* Then poll for completed jobs */ + if (worker->pipeline_length) + task = worker->current_tasks[worker->first_task]; + else + task = worker->current_task; + if (worker->ntasks && task != worker->task_transferring) + { +#ifndef STARPU_SIMGRID + size_t size; + int err; +#endif + + /* On-going asynchronous task, check for its termination first */ + +#ifdef STARPU_SIMGRID + if (!task_finished[worker->devid][worker->first_task]) +#else /* !STARPU_SIMGRID */ + cl_int status; + err = clGetEventInfo(task_events[worker->devid][worker->first_task], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &status, &size); + _STARPU_OPENCL_CHECK_AND_REPORT_ERROR(err); + STARPU_ASSERT(size == sizeof(cl_int)); + + if (status != CL_COMPLETE) +#endif /* !STARPU_SIMGRID */ + { + } + else + { + _STARPU_TRACE_END_PROGRESS(memnode); +#ifdef STARPU_PROF_TOOL + pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_end_transfer, workerid, workerid, starpu_prof_tool_driver_ocl, memnode, NULL); + starpu_prof_tool_callbacks.starpu_prof_tool_event_end_transfer(&pi, NULL, NULL); +#endif +#ifndef STARPU_SIMGRID + err = clReleaseEvent(task_events[worker->devid][worker->first_task]); + _STARPU_OPENCL_CHECK_AND_REPORT_ERROR(err); + task_events[worker->devid][worker->first_task] = 0; +#endif + + /* Asynchronous task completed! */ + _starpu_opencl_stop_job(_starpu_get_job_associated_to_task(task), worker); + /* See next task if any */ + if (worker->ntasks && worker->current_tasks[worker->first_task] != worker->task_transferring) + { + task = worker->current_tasks[worker->first_task]; + j = _starpu_get_job_associated_to_task(task); + if (task->cl->opencl_flags[j->nimpl] & STARPU_OPENCL_ASYNC) + { + /* An asynchronous task, it was already queued, + * it's now running, record its start time. */ + _starpu_driver_start_job(worker, j, &worker->perf_arch, 0, starpu_profiling_status_get()); + } + else + { + /* A synchronous task, we have finished flushing the pipeline, we can now at last execute it. */ + _STARPU_TRACE_EVENT("sync_task"); + _starpu_opencl_execute_job(task, worker); + _STARPU_TRACE_EVENT("end_sync_task"); + worker->pipeline_stuck = 0; + } + } + _STARPU_TRACE_START_PROGRESS(memnode); +#ifdef STARPU_PROF_TOOL + pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_start_transfer, worker->workerid, worker->workerid, starpu_prof_tool_driver_ocl, memnode, NULL); + starpu_prof_tool_callbacks.starpu_prof_tool_event_start_transfer(&pi, NULL, NULL); +#endif + } + } + if (!worker->pipeline_length || worker->ntasks < worker->pipeline_length) + idle_tasks++; + +#if defined(STARPU_NON_BLOCKING_DRIVERS) && !defined(STARPU_SIMGRID) + if (!idle_tasks) + { + /* No task ready yet, no better thing to do than waiting */ + __starpu_datawizard_progress(_STARPU_DATAWIZARD_DO_ALLOC, !idle_transfers); + return 0; + } +#endif + + res = __starpu_datawizard_progress(_STARPU_DATAWIZARD_DO_ALLOC, 1); + + task = _starpu_get_worker_task(worker, workerid, memnode); + +#ifdef STARPU_SIMGRID + if (!res && !task) + starpu_pthread_wait_wait(&worker->wait); +#endif + + if (task == NULL) + return 0; + + j = _starpu_get_job_associated_to_task(task); + + if (worker->pipeline_length) + worker->current_tasks[(worker->first_task + worker->ntasks)%STARPU_MAX_PIPELINE] = task; + else + worker->current_task = task; + worker->ntasks++; + + /* can OpenCL do that task ? */ + if (!_STARPU_MAY_PERFORM(j, OPENCL)) + { + /* this is not a OpenCL task */ + _starpu_worker_refuse_task(worker, task); + return 0; + } + + _STARPU_TRACE_END_PROGRESS(memnode); +#ifdef STARPU_PROF_TOOL + pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_end_transfer, workerid, workerid, starpu_prof_tool_driver_ocl, memnode, NULL); + starpu_prof_tool_callbacks.starpu_prof_tool_event_end_transfer(&pi, NULL, NULL); +#endif + + /* Fetch data asynchronously */ + res = _starpu_fetch_task_input(task, j, 1); + STARPU_ASSERT(res == 0); + _STARPU_TRACE_START_PROGRESS(memnode); +#ifdef STARPU_PROF_TOOL + pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_start_transfer, worker->workerid, worker->workerid, starpu_prof_tool_driver_ocl, memnode, NULL); + starpu_prof_tool_callbacks.starpu_prof_tool_event_start_transfer(&pi, NULL, NULL); +#endif + + return 0; +} + +void *_starpu_opencl_worker(void *_arg) +{ + struct _starpu_worker* worker = _arg; + + _starpu_opencl_driver_init(worker); + _STARPU_TRACE_START_PROGRESS(worker->memory_node); +#ifdef STARPU_PROF_TOOL + struct starpu_prof_tool_info pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_start_transfer, worker->workerid, worker->workerid, starpu_prof_tool_driver_ocl, worker->memory_node, NULL); + starpu_prof_tool_callbacks.starpu_prof_tool_event_start_transfer(&pi, NULL, NULL); +#endif + while (_starpu_machine_is_running()) + { + _starpu_may_pause(); + _starpu_opencl_driver_run_once(worker); + } + _starpu_opencl_driver_deinit(worker); + _STARPU_TRACE_END_PROGRESS(worker->memory_node); +#ifdef STARPU_PROF_TOOL + pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_end_transfer, worker->workerid, worker->workerid, starpu_prof_tool_driver_ocl, worker->memory_node, NULL); + starpu_prof_tool_callbacks.starpu_prof_tool_event_end_transfer(&pi, NULL, NULL); +#endif + + return NULL; +} + +#ifdef STARPU_USE_OPENCL +static int _starpu_run_opencl(struct _starpu_worker *workerarg) +{ + _STARPU_DEBUG("Running OpenCL %u from the application\n", workerarg->devid); + + /* Let's go ! */ + _starpu_opencl_worker(workerarg); + + return 0; +} + +static int _starpu_opencl_driver_set_devid(struct starpu_driver *driver, struct _starpu_worker *worker) +{ + starpu_opencl_get_device(worker->devid, &driver->id.opencl_id); + + return 0; +} + +static int _starpu_opencl_driver_is_devid(struct starpu_driver *driver, struct _starpu_worker *worker) +{ + cl_device_id device; + starpu_opencl_get_device(worker->devid, &device); + + return device == driver->id.opencl_id; +} + +struct _starpu_driver_ops _starpu_driver_opencl_ops = +{ + .init = _starpu_opencl_driver_init, + .run = _starpu_run_opencl, + .run_once = _starpu_opencl_driver_run_once, + .deinit = _starpu_opencl_driver_deinit, + .set_devid = _starpu_opencl_driver_set_devid, + .is_devid = _starpu_opencl_driver_is_devid, +}; +#endif + + +#ifdef STARPU_USE_OPENCL +cl_device_type _starpu_opencl_get_device_type(int devid) +{ + if (!init_done) + _starpu_opencl_init(); + return type[devid]; +} +#endif /* STARPU_USE_OPENCL */ + +#ifdef STARPU_HAVE_HWLOC +hwloc_obj_t _starpu_opencl_get_hwloc_obj(hwloc_topology_t topology, int devid) +{ +#if !defined(STARPU_SIMGRID) + cl_device_id device; + starpu_opencl_get_device(devid, &device); + return hwloc_opencl_get_device_osdev(topology, device); +#else + return NULL; +#endif +} +#endif + +struct _starpu_node_ops _starpu_driver_opencl_node_ops = +{ + .name = "opencl driver", + .malloc_on_node = _starpu_opencl_malloc_on_node, + .free_on_node = _starpu_opencl_free_on_node, + + .is_direct_access_supported = _starpu_opencl_is_direct_access_supported, + +#ifndef STARPU_SIMGRID + .copy_interface_to[STARPU_CPU_RAM] = _starpu_opencl_copy_interface_from_opencl_to_cpu, + .copy_interface_to[STARPU_OPENCL_RAM] = _starpu_opencl_copy_interface_from_opencl_to_opencl, + + .copy_interface_from[STARPU_CPU_RAM] = _starpu_opencl_copy_interface_from_cpu_to_opencl, + .copy_interface_from[STARPU_OPENCL_RAM] = _starpu_opencl_copy_interface_from_opencl_to_opencl, + + .copy_data_to[STARPU_CPU_RAM] = _starpu_opencl_copy_data_from_opencl_to_cpu, + .copy_data_to[STARPU_OPENCL_RAM] = _starpu_opencl_copy_data_from_opencl_to_opencl, + + .copy_data_from[STARPU_CPU_RAM] = _starpu_opencl_copy_data_from_cpu_to_opencl, + .copy_data_from[STARPU_OPENCL_RAM] = _starpu_opencl_copy_data_from_opencl_to_opencl, + + /* TODO: copy2D/3D? */ + + .map[STARPU_CPU_RAM] = _starpu_opencl_map_ram, + .unmap[STARPU_CPU_RAM] = _starpu_opencl_unmap_ram, + .update_map[STARPU_CPU_RAM] = _starpu_opencl_update_cpu_map, + + .wait_request_completion = _starpu_opencl_wait_request_completion, + .test_request_completion = _starpu_opencl_test_request_completion, +#endif +}; diff --git a/src/drivers/opencl/driver_opencl.h b/src/drivers/opencl/driver_opencl.h new file mode 100644 index 0000000..6000529 --- /dev/null +++ b/src/drivers/opencl/driver_opencl.h @@ -0,0 +1,76 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __DRIVER_OPENCL_H__ +#define __DRIVER_OPENCL_H__ + +/** @file */ + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE 1 +#endif + +#ifdef STARPU_USE_OPENCL + +#define CL_TARGET_OPENCL_VERSION 100 +#ifdef __APPLE__ +#include +#else +#include +#endif +#endif + +#include +#include + +#pragma GCC visibility push(hidden) + +void _starpu_opencl_preinit(void); + +#if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID) +struct _starpu_machine_config; +void _starpu_opencl_discover_devices(struct _starpu_machine_config *config); + +void _starpu_opencl_init(void); +int _starpu_opencl_init_context(int devid); +int _starpu_opencl_deinit_context(int devid); +unsigned _starpu_opencl_get_device_count(void); +#ifdef STARPU_HAVE_HWLOC +struct _starpu_machine_topology; +hwloc_obj_t _starpu_opencl_get_hwloc_obj(hwloc_topology_t topology, int devid); +#endif +void _starpu_init_opencl_config(struct _starpu_machine_topology *topology, struct _starpu_machine_config *); +void _starpu_opencl_init_worker_binding(struct _starpu_machine_config *config, int no_mp_config STARPU_ATTRIBUTE_UNUSED, struct _starpu_worker *workerarg); +void _starpu_opencl_init_worker_memory(struct _starpu_machine_config *config, int no_mp_config STARPU_ATTRIBUTE_UNUSED, struct _starpu_worker *workerarg); +void *_starpu_opencl_worker(void *); +extern struct _starpu_node_ops _starpu_driver_opencl_node_ops; +#else +#define _starpu_opencl_discover_devices(config) ((void) (config)) +#endif + +#if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID) +extern struct _starpu_driver_ops _starpu_driver_opencl_ops; +#endif + +#ifdef STARPU_USE_OPENCL +extern char *_starpu_opencl_program_dir; + +cl_device_type _starpu_opencl_get_device_type(int devid); +#endif + +#pragma GCC visibility pop + +#endif // __DRIVER_OPENCL_H__ diff --git a/src/drivers/opencl/driver_opencl_init.c b/src/drivers/opencl/driver_opencl_init.c new file mode 100644 index 0000000..7912da1 --- /dev/null +++ b/src/drivers/opencl/driver_opencl_init.c @@ -0,0 +1,54 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include + +static struct _starpu_driver_info driver_info = +{ + .name_upper = "OpenCL", + .name_var = "OPENCL", + .name_lower = "opencl", + .memory_kind = STARPU_OPENCL_RAM, + .alpha = 12.22f, + .wait_for_worker_initialization = 1, +#if defined(STARPU_USE_OPENCL) + .driver_ops = &_starpu_driver_opencl_ops, +#ifdef STARPU_HAVE_HWLOC + .get_hwloc_obj = _starpu_opencl_get_hwloc_obj, +#endif +#endif +#if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID) + .run_worker = _starpu_opencl_worker, + .init_worker_binding = _starpu_opencl_init_worker_binding, + .init_worker_memory = _starpu_opencl_init_worker_memory, +#endif +}; + +static struct _starpu_memory_driver_info memory_driver_info = +{ + .name_upper = "OpenCL", + .worker_archtype = STARPU_OPENCL_WORKER, +#if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID) + .ops = &_starpu_driver_opencl_node_ops, +#endif +}; + +void _starpu_opencl_preinit(void) +{ + _starpu_driver_info_register(STARPU_OPENCL_WORKER, &driver_info); + _starpu_memory_driver_info_register(STARPU_OPENCL_RAM, &memory_driver_info); +} diff --git a/src/drivers/opencl/driver_opencl_utils.c b/src/drivers/opencl/driver_opencl_utils.c new file mode 100644 index 0000000..77e7ffe --- /dev/null +++ b/src/drivers/opencl/driver_opencl_utils.c @@ -0,0 +1,804 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include +#include + +#include +#ifdef HAVE_UNISTD_H +#include +#endif +#include +#include +#include +#include +#include "driver_opencl_utils.h" +#include "driver_opencl.h" +#ifdef STARPU_DEVEL +#include +#endif + +#ifdef HAVE_CL_CL_EXT_H +#include +#endif + +char *_starpu_opencl_program_dir; + +static +int _starpu_opencl_locate_file(const char *source_file_name, char **located_file_name, char **located_dir_name) +{ + int ret = EXIT_FAILURE; + + *located_file_name = NULL; + *located_dir_name = NULL; + + _STARPU_DEBUG("Trying to locate <%s>\n", source_file_name); + if (access(source_file_name, R_OK) == 0) + { + _STARPU_CALLOC(*located_file_name, 1, strlen(source_file_name)+1); + snprintf(*located_file_name, strlen(source_file_name)+1, "%s", source_file_name); + ret = EXIT_SUCCESS; + } + + if (ret == EXIT_FAILURE && _starpu_opencl_program_dir) + { + _STARPU_CALLOC(*located_file_name, 1, strlen(_starpu_opencl_program_dir)+1+strlen(source_file_name)+1); + snprintf(*located_file_name, strlen(_starpu_opencl_program_dir)+1+strlen(source_file_name)+1, "%s/%s", _starpu_opencl_program_dir, source_file_name); + _STARPU_DEBUG("Trying to locate with _starpu_opencl_program_dir <%s>\n", *located_file_name); + if (access(*located_file_name, R_OK) == 0) + ret = EXIT_SUCCESS; + } + +#ifdef STARPU_DEVEL + if (ret == EXIT_FAILURE) + { + _STARPU_CALLOC(*located_file_name, 1, strlen(STARPU_SRC_DIR)+1+strlen(source_file_name)+1); + snprintf(*located_file_name, strlen(STARPU_SRC_DIR)+1+strlen(source_file_name)+1, "%s/%s", STARPU_SRC_DIR, source_file_name); + _STARPU_DEBUG("Trying to locate with STARPU_SRC_DIR <%s>\n", *located_file_name); + if (access(*located_file_name, R_OK) == 0) + ret = EXIT_SUCCESS; + } +#endif + + if (ret == EXIT_FAILURE) + { + _STARPU_CALLOC(*located_file_name, 1, strlen(STARPU_OPENCL_DATADIR)+1+strlen(source_file_name)+1); + snprintf(*located_file_name, strlen(STARPU_OPENCL_DATADIR)+1+strlen(source_file_name)+1, "%s/%s", STARPU_OPENCL_DATADIR, source_file_name); + _STARPU_DEBUG("Trying to locate with STARPU_OPENCL_DATADIR <%s>\n", *located_file_name); + if (access(*located_file_name, R_OK) == 0) + ret = EXIT_SUCCESS; + } + + if (ret == EXIT_FAILURE) + { + _STARPU_ERROR("Cannot locate file <%s>\n", source_file_name); + } + else + { + char *last = strrchr(*located_file_name, '/'); + + if (!last) + { + _STARPU_CALLOC(*located_dir_name, 2, sizeof(char)); + snprintf(*located_dir_name, 2, "%s", ""); + } + else + { + _STARPU_CALLOC(*located_dir_name, 1, 1+strlen(*located_file_name)); + snprintf(*located_dir_name, 1+strlen(*located_file_name), "%s", *located_file_name); + (*located_dir_name)[strlen(*located_file_name)-strlen(last)+1] = '\0'; + } + } + + return ret; +} + +cl_int starpu_opencl_load_kernel(cl_kernel *kernel, cl_command_queue *queue, struct starpu_opencl_program *opencl_programs, const char *kernel_name, int devid) +{ + cl_int err; + cl_device_id device; + cl_program program; + + starpu_opencl_get_device(devid, &device); + starpu_opencl_get_queue(devid, queue); + + program = opencl_programs->programs[devid]; + if (!program) + { + _STARPU_DISP("Program not available for device <%d>\n", devid); + return CL_INVALID_PROGRAM; + } + + // Create the compute kernel in the program we wish to run + *kernel = clCreateKernel(program, kernel_name, &err); + if (STARPU_UNLIKELY(err != CL_SUCCESS)) + STARPU_OPENCL_REPORT_ERROR(err); + + return CL_SUCCESS; +} + +cl_int starpu_opencl_release_kernel(cl_kernel kernel) +{ + cl_int err; + + err = clReleaseKernel(kernel); + if (STARPU_UNLIKELY(err != CL_SUCCESS)) + STARPU_OPENCL_REPORT_ERROR(err); + + return CL_SUCCESS; +} + +static +char *_starpu_opencl_load_program_source(const char *filename) +{ + struct stat statbuf; + FILE *fh; + char *source; + int x; + int c; + int err; + + fh = fopen(filename, "r"); + if (!fh) + return NULL; + + err = stat(filename, &statbuf); + STARPU_ASSERT_MSG(err == 0, "could not open file %s\n", filename); + _STARPU_MALLOC(source, statbuf.st_size + 1); + + for(c=fgetc(fh), x=0 ; c != EOF ; c =fgetc(fh), x++) + { + source[x] = (char)c; + } + source[x] = '\0'; + + _STARPU_EXTRA_DEBUG("OpenCL kernel <%s>\n", source); + + fclose(fh); + + return source; +} + +static +char *_starpu_opencl_load_program_binary(const char *filename, size_t *len) +{ + struct stat statbuf; + FILE *fh; + char *binary; + int err; + + fh = fopen(filename, "r"); + if (fh == 0) + return NULL; + + err = stat(filename, &statbuf); + STARPU_ASSERT_MSG(err == 0, "could not open file %s\n", filename); + + binary = (char *) malloc(statbuf.st_size); + if (!binary) + { + fclose(fh); + return binary; + } + + err = fread(binary, statbuf.st_size, 1, fh); + STARPU_ASSERT_MSG(err == 1, "could not read from file %s\n", filename); + fclose(fh); + + *len = statbuf.st_size; + return binary; +} + +static +void _starpu_opencl_create_binary_directory(char *path, size_t maxlen) +{ + static int _directory_created = 0; + + snprintf(path, maxlen, "%s/.starpu/opencl/", _starpu_get_home_path()); + + if (_directory_created == 0) + { + _STARPU_DEBUG("Creating directory %s\n", path); + _starpu_mkpath_and_check(path, S_IRWXU); + _directory_created = 1; + } +} + +char *_starpu_opencl_get_device_type_as_string(int id) +{ + cl_device_type type; + + type = _starpu_opencl_get_device_type(id); + switch (type) + { + case CL_DEVICE_TYPE_GPU: return "gpu"; + case CL_DEVICE_TYPE_ACCELERATOR: return "acc"; + case CL_DEVICE_TYPE_CPU: return "cpu"; + default: return "unk"; + } +} + +static +int _starpu_opencl_get_binary_name(char *binary_file_name, size_t maxlen, const char *source_file_name, int dev, cl_device_id device) +{ + char binary_directory[1024]; + char *p; + cl_int err; + cl_uint vendor_id; + + _starpu_opencl_create_binary_directory(binary_directory, sizeof(binary_directory)); + + p = strrchr(source_file_name, '/'); + snprintf(binary_file_name, maxlen, "%s/%s", binary_directory, p?p:source_file_name); + + p = strstr(binary_file_name, ".cl"); + if (p == NULL) p=binary_file_name + strlen(binary_file_name); + + err = clGetDeviceInfo(device, CL_DEVICE_VENDOR_ID, sizeof(vendor_id), &vendor_id, NULL); + if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err); + + sprintf(p, ".%s.vendor_id_%d_device_id_%d", _starpu_opencl_get_device_type_as_string(dev), (int)vendor_id, dev); + + return CL_SUCCESS; +} + +static +int _starpu_opencl_compile_or_load_opencl_from_string(const char *opencl_program_source, const char* build_options, + struct starpu_opencl_program *opencl_programs, const char* source_file_name) +{ + unsigned int dev; + unsigned int nb_devices; + + nb_devices = _starpu_opencl_get_device_count(); + // Iterate over each device + for(dev = 0; dev < nb_devices; dev ++) + { + cl_device_id device; + cl_context context; + cl_program program; + cl_int err; + + if (opencl_programs) + { + opencl_programs->programs[dev] = NULL; + } + + starpu_opencl_get_device(dev, &device); + starpu_opencl_get_context(dev, &context); + if (context == NULL) + { + _STARPU_DEBUG("[%u] is not a valid OpenCL context\n", dev); + continue; + } + + // Create the compute program from the source buffer + program = clCreateProgramWithSource(context, 1, (const char **) &opencl_program_source, NULL, &err); + if (!program || err != CL_SUCCESS) + { + _STARPU_DISP("Error: Failed to load program source with options %s!\n", build_options); + return EXIT_FAILURE; + } + + // Build the program executable + err = clBuildProgram(program, 1, &device, build_options, NULL, NULL); + + // Get the status + { + cl_build_status status; + size_t len; + + clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 0, NULL, &len); + if (len > 2) + { + char *buffer; + _STARPU_MALLOC(buffer, len); + + clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, len, buffer, &len); + _STARPU_DISP("Compilation output\n%s\n", buffer); + + free(buffer); + } + + clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_STATUS, sizeof(status), &status, NULL); + if (err != CL_SUCCESS || status != CL_BUILD_SUCCESS) + { + _STARPU_DISP("Error: Failed to build program executable!\n"); + _STARPU_DISP("clBuildProgram: %d - clGetProgramBuildInfo: %d\n", err, status); + return EXIT_FAILURE; + } + } + + // Store program + if (opencl_programs) + { + opencl_programs->programs[dev] = program; + } + else + { + char binary_file_name[2048]; + char *binary; + size_t binary_len; + FILE *fh; + + err = _starpu_opencl_get_binary_name(binary_file_name, sizeof(binary_file_name), source_file_name, dev, device); + if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err); + + err = clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &binary_len, NULL); + if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err); + _STARPU_MALLOC(binary, binary_len); + + err = clGetProgramInfo(program, CL_PROGRAM_BINARIES, sizeof(binary), &binary, NULL); + if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err); + + fh = fopen(binary_file_name, "w"); + if (fh == NULL) + { + _STARPU_DISP("Error: Failed to open file <%s>\n", binary_file_name); + perror("fopen"); + return EXIT_FAILURE; + } + fwrite(binary, binary_len, 1, fh); + fclose(fh); + free(binary); + _STARPU_DEBUG("File <%s> created\n", binary_file_name); + + err = clReleaseProgram(program); + if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err); + } + } + return EXIT_SUCCESS; +} + +void starpu_opencl_load_program_source_malloc(const char *source_file_name, char **located_file_name, char **located_dir_name, char **opencl_program_source) +{ + // Locate source file + _starpu_opencl_locate_file(source_file_name, located_file_name, located_dir_name); + _STARPU_DEBUG("Source file name : <%s>\n", *located_file_name); + _STARPU_DEBUG("Source directory name : <%s>\n", *located_dir_name); + + // Load the compute program from disk into a char * + char *source = _starpu_opencl_load_program_source(*located_file_name); + if(!source) + _STARPU_ERROR("Failed to load compute program from file <%s>!\n", *located_file_name); + + _STARPU_MALLOC(*opencl_program_source, strlen(source)+1); + snprintf(*opencl_program_source, strlen(source)+1, "%s", source); + free(source); +} + +void starpu_opencl_load_program_source(const char *source_file_name, char *located_file_name, char *located_dir_name, char *opencl_program_source) +{ + char *_located_file_name; + char *_located_dir_name; + + // Locate source file + _starpu_opencl_locate_file(source_file_name, &_located_file_name, &_located_dir_name); + _STARPU_DEBUG("Source file name : <%s>\n", _located_file_name); + _STARPU_DEBUG("Source directory name : <%s>\n", _located_dir_name); + + // Load the compute program from disk into a char * + char *source = _starpu_opencl_load_program_source(_located_file_name); + if(!source) + _STARPU_ERROR("Failed to load compute program from file <%s>!\n", _located_file_name); + + sprintf(located_file_name, "%s", _located_file_name); + free(_located_file_name); + sprintf(located_dir_name, "%s", _located_dir_name); + free(_located_dir_name); + sprintf(opencl_program_source, "%s", source); + free(source); +} + +static +int _starpu_opencl_compile_or_load_opencl_from_file(const char *source_file_name, struct starpu_opencl_program *opencl_programs, const char* build_options) +{ + int nb_devices; + int ret; + char *located_file_name; + char *located_dir_name; + char new_build_options[1024]; + char *opencl_program_source; + + // Do not try to load and compile the file if there is no devices + nb_devices = starpu_opencl_worker_get_count(); + if (nb_devices == 0) return EXIT_SUCCESS; + + starpu_opencl_load_program_source_malloc(source_file_name, &located_file_name, &located_dir_name, &opencl_program_source); + + if (!build_options) + build_options = ""; + + if (!strcmp(located_dir_name, "")) + { + snprintf(new_build_options, sizeof(new_build_options), "%s", build_options); + } + else + { + snprintf(new_build_options, sizeof(new_build_options), "-I %s %s", located_dir_name, build_options); + } + _STARPU_DEBUG("Build options: <%s>\n", new_build_options); + + ret = _starpu_opencl_compile_or_load_opencl_from_string(opencl_program_source, new_build_options, opencl_programs, source_file_name); + + _STARPU_DEBUG("located_file_name : <%s>\n", located_file_name); + _STARPU_DEBUG("located_dir_name : <%s>\n", located_dir_name); + free(located_file_name); + free(located_dir_name); + free(opencl_program_source); + + return ret; +} + +int starpu_opencl_compile_opencl_from_file(const char *source_file_name, const char* build_options) +{ + return _starpu_opencl_compile_or_load_opencl_from_file(source_file_name, NULL, build_options); +} + +int starpu_opencl_compile_opencl_from_string(const char *opencl_program_source, const char *file_name, const char* build_options) +{ + return _starpu_opencl_compile_or_load_opencl_from_string(opencl_program_source, build_options, NULL, file_name); +} + +int starpu_opencl_load_opencl_from_string(const char *opencl_program_source, struct starpu_opencl_program *opencl_programs, + const char* build_options) +{ + return _starpu_opencl_compile_or_load_opencl_from_string(opencl_program_source, build_options, opencl_programs, NULL); +} + +int starpu_opencl_load_opencl_from_file(const char *source_file_name, struct starpu_opencl_program *opencl_programs, + const char* build_options) +{ + return _starpu_opencl_compile_or_load_opencl_from_file(source_file_name, opencl_programs, build_options); +} + +int starpu_opencl_load_binary_opencl(const char *kernel_id, struct starpu_opencl_program *opencl_programs) +{ + unsigned int dev; + unsigned int nb_devices; + + nb_devices = _starpu_opencl_get_device_count(); + // Iterate over each device + for(dev = 0; dev < nb_devices; dev ++) + { + cl_device_id device; + cl_context context; + cl_program program; + cl_int err; + char *binary; + char binary_file_name[1024]; + size_t length; + cl_int binary_status; + + opencl_programs->programs[dev] = NULL; + + starpu_opencl_get_device(dev, &device); + starpu_opencl_get_context(dev, &context); + if (context == NULL) + { + _STARPU_DEBUG("[%u] is not a valid OpenCL context\n", dev); + continue; + } + + // Load the binary buffer + err = _starpu_opencl_get_binary_name(binary_file_name, sizeof(binary_file_name), kernel_id, dev, device); + if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err); + binary = _starpu_opencl_load_program_binary(binary_file_name, &length); + + // Create the compute program from the binary buffer + program = clCreateProgramWithBinary(context, 1, &device, &length, (const unsigned char **) &binary, &binary_status, &err); + if (!program || err != CL_SUCCESS) + { + _STARPU_DISP("Error: Failed to load program binary!\n"); + return EXIT_FAILURE; + } + + // Build the program executable + err = clBuildProgram(program, 1, &device, NULL, NULL, NULL); + + // Get the status + { + cl_build_status status; + size_t len; + + clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 0, NULL, &len); + if (len > 2) + { + char *buffer; + _STARPU_MALLOC(buffer, len); + + clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, len, buffer, &len); + _STARPU_DISP("Compilation output\n%s\n", buffer); + + free(buffer); + } + + clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_STATUS, sizeof(status), &status, NULL); + if (err != CL_SUCCESS || status != CL_BUILD_SUCCESS) + { + _STARPU_DISP("Error: Failed to build program executable!\n"); + _STARPU_DISP("clBuildProgram: %d - clGetProgramBuildInfo: %d\n", err, status); + return EXIT_FAILURE; + } + } + + // Store program + opencl_programs->programs[dev] = program; + free(binary); + } + return 0; +} + +int starpu_opencl_unload_opencl(struct starpu_opencl_program *opencl_programs) +{ + unsigned int dev; + unsigned int nb_devices; + + if (!starpu_opencl_worker_get_count()) + return 0; + + nb_devices = _starpu_opencl_get_device_count(); + // Iterate over each device + for(dev = 0; dev < nb_devices; dev ++) + { + if (opencl_programs->programs[dev]) + { + cl_int err; + err = clReleaseProgram(opencl_programs->programs[dev]); + if (STARPU_UNLIKELY(err != CL_SUCCESS)) + STARPU_OPENCL_REPORT_ERROR(err); + } + } + return 0; +} + +int starpu_opencl_collect_stats(cl_event event STARPU_ATTRIBUTE_UNUSED) +{ +#if defined(CL_PROFILING_CLOCK_CYCLE_COUNT)||defined(CL_PROFILING_STALL_CYCLE_COUNT)||defined(CL_PROFILING_POWER_CONSUMED) + struct starpu_task *task = starpu_task_get_current(); + struct starpu_profiling_task_info *info = task->profiling_info; +#endif + +#ifdef CL_PROFILING_CLOCK_CYCLE_COUNT + if (starpu_profiling_status_get() && info) + { + cl_int err; + unsigned int clock_cycle_count; + size_t size; + err = clGetEventProfilingInfo(event, CL_PROFILING_CLOCK_CYCLE_COUNT, sizeof(clock_cycle_count), &clock_cycle_count, &size); + if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); + STARPU_ASSERT(size == sizeof(clock_cycle_count)); + info->used_cycles += clock_cycle_count; + } +#endif +#ifdef CL_PROFILING_STALL_CYCLE_COUNT + if (starpu_profiling_status_get() && info) + { + cl_int err; + unsigned int stall_cycle_count; + size_t size; + err = clGetEventProfilingInfo(event, CL_PROFILING_STALL_CYCLE_COUNT, sizeof(stall_cycle_count), &stall_cycle_count, &size); + if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); + STARPU_ASSERT(size == sizeof(stall_cycle_count)); + + info->stall_cycles += stall_cycle_count; + } +#endif +#ifdef CL_PROFILING_POWER_CONSUMED + if (info && (starpu_profiling_status_get() || (task->cl && task->cl->energy_model && task->cl->energy_model->benchmarking))) + { + cl_int err; + double energy_consumed; + size_t size; + err = clGetEventProfilingInfo(event, CL_PROFILING_POWER_CONSUMED, sizeof(energy_consumed), &energy_consumed, &size); + if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); + STARPU_ASSERT(size == sizeof(energy_consumed)); + + info->energy_consumed += energy_consumed; + } +#endif + + return 0; +} + +const char *starpu_opencl_error_string(cl_int status) +{ + const char *errormsg; + switch (status) + { + case CL_SUCCESS: + errormsg = "Success"; + break; + case CL_DEVICE_NOT_FOUND: + errormsg = "Device not found"; + break; + case CL_DEVICE_NOT_AVAILABLE: + errormsg = "Device not available"; + break; + case CL_COMPILER_NOT_AVAILABLE: + errormsg = "Compiler not available"; + break; + case CL_MEM_OBJECT_ALLOCATION_FAILURE: + errormsg = "Memory object allocation failure"; + break; + case CL_OUT_OF_RESOURCES: + errormsg = "Out of resources"; + break; + case CL_OUT_OF_HOST_MEMORY: + errormsg = "Out of host memory"; + break; + case CL_PROFILING_INFO_NOT_AVAILABLE: + errormsg = "Profiling info not available"; + break; + case CL_MEM_COPY_OVERLAP: + errormsg = "Memory copy overlap"; + break; + case CL_IMAGE_FORMAT_MISMATCH: + errormsg = "Image format mismatch"; + break; + case CL_IMAGE_FORMAT_NOT_SUPPORTED: + errormsg = "Image format not supported"; + break; + case CL_BUILD_PROGRAM_FAILURE: + errormsg = "Build program failure"; + break; + case CL_MAP_FAILURE: + errormsg = "Map failure"; + break; + case CL_INVALID_VALUE: + errormsg = "Invalid value"; + break; + case CL_INVALID_DEVICE_TYPE: + errormsg = "Invalid device type"; + break; + case CL_INVALID_PLATFORM: + errormsg = "Invalid platform"; + break; + case CL_INVALID_DEVICE: + errormsg = "Invalid device"; + break; + case CL_INVALID_CONTEXT: + errormsg = "Invalid context"; + break; + case CL_INVALID_QUEUE_PROPERTIES: + errormsg = "Invalid queue properties"; + break; + case CL_INVALID_COMMAND_QUEUE: + errormsg = "Invalid command queue"; + break; + case CL_INVALID_HOST_PTR: + errormsg = "Invalid host pointer"; + break; + case CL_INVALID_MEM_OBJECT: + errormsg = "Invalid memory object"; + break; + case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR: + errormsg = "Invalid image format descriptor"; + break; + case CL_INVALID_IMAGE_SIZE: + errormsg = "Invalid image size"; + break; + case CL_INVALID_SAMPLER: + errormsg = "Invalid sampler"; + break; + case CL_INVALID_BINARY: + errormsg = "Invalid binary"; + break; + case CL_INVALID_BUILD_OPTIONS: + errormsg = "Invalid build options"; + break; + case CL_INVALID_PROGRAM: + errormsg = "Invalid program"; + break; + case CL_INVALID_PROGRAM_EXECUTABLE: + errormsg = "Invalid program executable"; + break; + case CL_INVALID_KERNEL_NAME: + errormsg = "Invalid kernel name"; + break; + case CL_INVALID_KERNEL_DEFINITION: + errormsg = "Invalid kernel definition"; + break; + case CL_INVALID_KERNEL: + errormsg = "Invalid kernel"; + break; + case CL_INVALID_ARG_INDEX: + errormsg = "Invalid argument index"; + break; + case CL_INVALID_ARG_VALUE: + errormsg = "Invalid argument value"; + break; + case CL_INVALID_ARG_SIZE: + errormsg = "Invalid argument size"; + break; + case CL_INVALID_KERNEL_ARGS: + errormsg = "Invalid kernel arguments"; + break; + case CL_INVALID_WORK_DIMENSION: + errormsg = "Invalid work dimension"; + break; + case CL_INVALID_WORK_GROUP_SIZE: + errormsg = "Invalid work group size"; + break; + case CL_INVALID_WORK_ITEM_SIZE: + errormsg = "Invalid work item size"; + break; + case CL_INVALID_GLOBAL_OFFSET: + errormsg = "Invalid global offset"; + break; + case CL_INVALID_EVENT_WAIT_LIST: + errormsg = "Invalid event wait list"; + break; + case CL_INVALID_EVENT: + errormsg = "Invalid event"; + break; + case CL_INVALID_OPERATION: + errormsg = "Invalid operation"; + break; + case CL_INVALID_GL_OBJECT: + errormsg = "Invalid GL object"; + break; + case CL_INVALID_BUFFER_SIZE: + errormsg = "Invalid buffer size"; + break; + case CL_INVALID_MIP_LEVEL: + errormsg = "Invalid MIP level"; + break; +#ifdef CL_PLATFORM_NOT_FOUND_KHR + case CL_PLATFORM_NOT_FOUND_KHR: + errormsg = "Platform not found"; + break; +#endif + default: + errormsg = "unknown OpenCL error"; + break; + } + return errormsg; +} + +void starpu_opencl_display_error(const char *func, const char *file, int line, const char* msg, cl_int status) +{ + _STARPU_MSG("oops in %s (%s:%d) (%s) ... <%s> (%d) \n", func, file, line, msg, starpu_opencl_error_string (status), status); +} + +int starpu_opencl_set_kernel_args(cl_int *error, cl_kernel *kernel, ...) +{ + int i; + va_list ap; + + va_start(ap, kernel); + + for (i = 0; ; i++) + { + int size = va_arg(ap, int); + if (size == 0) + break; + + cl_mem *ptr = va_arg(ap, cl_mem *); + int err = clSetKernelArg(*kernel, i, size, ptr); + if (STARPU_UNLIKELY(err != CL_SUCCESS)) + { + *error = err; + break; + } + } + + va_end(ap); + return i; +} diff --git a/src/drivers/opencl/driver_opencl_utils.h b/src/drivers/opencl/driver_opencl_utils.h new file mode 100644 index 0000000..7c53942 --- /dev/null +++ b/src/drivers/opencl/driver_opencl_utils.h @@ -0,0 +1,30 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __STARPU_OPENCL_UTILS_H__ +#define __STARPU_OPENCL_UTILS_H__ + +#pragma GCC visibility push(hidden) + +/** @file */ + +char *_starpu_opencl_get_device_type_as_string(int id); + +#define _STARPU_OPENCL_PLATFORM_MAX 4 + +#pragma GCC visibility pop + +#endif /* __STARPU_OPENCL_UTILS_H__ */ diff --git a/src/drivers/tcpip/driver_tcpip_common.c b/src/drivers/tcpip/driver_tcpip_common.c new file mode 100644 index 0000000..bb483b0 --- /dev/null +++ b/src/drivers/tcpip/driver_tcpip_common.c @@ -0,0 +1,1500 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2021-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#ifdef HAVE_UNISTD_H +#include +#endif +#include +#include +#ifdef MSG_ERRQUEUE +#include +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define NITER 32 +#define SIZE_BANDWIDTH (1024*1024) + +#define _SELECT_DEBUG 0 +#if _SELECT_DEBUG +# define _SELECT_PRINT(...) printf(__VA_ARGS__) +#else +# define _SELECT_PRINT(...) +#endif + +#define _ZC_DEBUG 0 +#if _ZC_DEBUG +# define _ZC_PRINT(...) printf(__VA_ARGS__) +#else +# define _ZC_PRINT(...) +#endif + +typedef starpu_ssize_t(*what_t)(int fd, void *buf, size_t count); + +static int tcpip_initialized = 0; +//static int src_node_id = 0; +static int nb_sink; +static char* host_port; +static int index_sink = 0; + +int _starpu_tcpip_common_multiple_thread; + +static int is_running; + +static struct _starpu_spinlock ListLock; + +static starpu_pthread_t thread_pending; +static int thread_pipe[2]; + +static pthread_t master_thread; + +struct _starpu_tcpip_socket *tcpip_sock; + +/* a flag to note whether the socket is local socket*/ +static int *local_flag; + +int _starpu_tcpip_mp_has_local() +{ + for (int i=1; i<=nb_sink; i++) + { + if(local_flag[i] == 1) + return 1; + } + + return 0; +} + +MULTILIST_CREATE_TYPE(_starpu_tcpip_ms_request, event); /*_starpu_tcpip_ms_request_multilist_event*/ +MULTILIST_CREATE_TYPE(_starpu_tcpip_ms_request, thread); /*_starpu_tcpip_ms_request_multilist_thread*/ +MULTILIST_CREATE_TYPE(_starpu_tcpip_ms_request, pending); /*_starpu_tcpip_ms_request_multilist_pending*/ + +struct _starpu_tcpip_ms_request +{ + /*member of list of event*/ + struct _starpu_tcpip_ms_request_multilist_event event; + /*member of list of thread for async send/receive*/ + struct _starpu_tcpip_ms_request_multilist_thread thread; + /*member of list of pending for except in select*/ + struct _starpu_tcpip_ms_request_multilist_pending pending; + /*the struct of remote socket to send/receive message*/ + struct _starpu_tcpip_socket *remote_sock; + /*the message to send/receive*/ + char* buf; + /*the length of message*/ + int len; + /*a flag to detect whether the operation is completed*/ + int flag_completed; + /*a semaphore to detect whether the request is completed*/ + starpu_sem_t sem_wait_request; + /*a flag to detect send or receive*/ + int is_sender; + /*the length of message that has been sent/wrote*/ + int offset; + /*active the flag MSG_ZEROCOPY*/ + int zerocopy; + /*record the count at the end of send*/ + uint32_t send_end; +}; + +MULTILIST_CREATE_INLINES(struct _starpu_tcpip_ms_request, _starpu_tcpip_ms_request, event); +MULTILIST_CREATE_INLINES(struct _starpu_tcpip_ms_request, _starpu_tcpip_ms_request, thread); +MULTILIST_CREATE_INLINES(struct _starpu_tcpip_ms_request, _starpu_tcpip_ms_request, pending); + +static struct _starpu_tcpip_ms_request_multilist_thread thread_list; + +struct _starpu_tcpip_ms_async_event +{ + int is_sender; + struct _starpu_tcpip_ms_request_multilist_event *requests; +}; + +static inline struct _starpu_tcpip_ms_async_event *_starpu_tcpip_ms_async_event(union _starpu_async_channel_event *_event) +{ + struct _starpu_tcpip_ms_async_event *event; + STARPU_STATIC_ASSERT(sizeof(*event) <= sizeof(*_event)); + event = (void *) _event; + return event; +} + +/*hash table struct*/ +struct _starpu_tcpip_req_pending +{ + int remote_sock; + struct _starpu_tcpip_ms_request_multilist_thread send_list; + struct _starpu_tcpip_ms_request_multilist_thread recv_list; + struct _starpu_tcpip_ms_request_multilist_pending pending_list; + UT_hash_handle hh; +}; + +//function thread +static void * _starpu_tcpip_thread_pending(void *foo STARPU_ATTRIBUTE_UNUSED) +{ + fd_set reads; + fd_set writes; + int fdmax=0; + + struct _starpu_tcpip_req_pending *pending_tables = NULL; + struct _starpu_tcpip_req_pending *table, *tmp; + + FD_ZERO(&reads); + FD_ZERO(&writes); + + FD_SET(thread_pipe[0], &reads); + fd_set reads2; + fd_set writes2; + + fdmax = thread_pipe[0]; + + while(is_running) + { + _SELECT_PRINT("in while\n"); + reads2 = reads; + writes2 = writes; + + int ret; + ret=select(fdmax+1, &reads2, &writes2, NULL, NULL); + STARPU_ASSERT(ret>=0); + + if(FD_ISSET(thread_pipe[0], &reads2)) + { + char buf[16]; + int n=read(thread_pipe[0], buf, sizeof(buf)); + STARPU_ASSERT(n>=0); + if(!is_running) + break; + + int i; + for(i=0; iremote_sock->async_sock; + int is_sender = req_thread->is_sender; + + HASH_FIND_INT(pending_tables, &remote_sock, table); + if(table == NULL) + { + _STARPU_MALLOC(table, sizeof(*table)); + table->remote_sock = remote_sock; + _starpu_tcpip_ms_request_multilist_head_init_thread(&table->send_list); + _starpu_tcpip_ms_request_multilist_head_init_thread(&table->recv_list); + _starpu_tcpip_ms_request_multilist_head_init_pending(&table->pending_list); + HASH_ADD_INT(pending_tables, remote_sock, table); + + } + if(is_sender) + { + _starpu_tcpip_ms_request_multilist_push_back_thread(&table->send_list, req_thread); + FD_SET(remote_sock, &writes); + } + else + { + _starpu_tcpip_ms_request_multilist_push_back_thread(&table->recv_list, req_thread); + FD_SET(remote_sock, &reads); + } + + if(remote_sock > fdmax) + fdmax=remote_sock; + } + + } + + HASH_ITER(hh, pending_tables, table, tmp) + { + int remote_sock = table->remote_sock; + _SELECT_PRINT("remote_sock in loop is %d\n", remote_sock); + + void socket_action(what_t what, const char * whatstr, struct _starpu_tcpip_ms_request_multilist_thread *list, fd_set * fdset) + { + struct _starpu_tcpip_ms_request * req = _starpu_tcpip_ms_request_multilist_begin_thread(list); + char* msg = req->buf; + int len = req->len; + + int res = 0; + res = what(remote_sock, msg+req->offset, len-req->offset); + _SELECT_PRINT("%s res is %d\n", whatstr, res); + STARPU_ASSERT_MSG(res > 0, "TCP/IP Master/Slave cannot %s a msg asynchronous with a size of %d Bytes!, the result of %s is %d, the error is %s ", whatstr, len, whatstr, res, strerror(errno)); + req->offset+=res; + + _SELECT_PRINT("offset after %s is %d\n", whatstr, req->offset); + + if(req->offset == len) + { + _starpu_tcpip_ms_request_multilist_erase_thread(list, req); + + if(_starpu_tcpip_ms_request_multilist_empty_thread(list)) + FD_CLR(remote_sock, fdset); + + req->flag_completed = 1; + starpu_sem_post(&req->sem_wait_request); + + /*send the signal that message is ready */ + struct _starpu_mp_node *node = NULL; + _starpu_tcpip_common_signal(node); + } + } + + if(FD_ISSET(remote_sock, &writes2)) + { +#ifdef SO_ZEROCOPY + struct pollfd pfd; + pfd.fd = remote_sock; + pfd.events = POLLERR|POLLOUT; + pfd.revents = 0; + if(poll(&pfd, 1, -1) <= 0) + error(1, errno, "poll"); + + if(pfd.revents & POLLERR) + { + struct _starpu_tcpip_ms_request * req_pending = _starpu_tcpip_ms_request_multilist_begin_pending(&table->pending_list); + _ZC_PRINT("nbsend is %d\n", req_pending->remote_sock->nbsend); + struct sock_extended_err *serr; + struct msghdr mg = {}; + struct cmsghdr *cm; + uint32_t hi, lo; + char control[100]; + + mg.msg_control = control; + mg.msg_controllen = sizeof(control); + + _ZC_PRINT("before recvmsg\n"); + int r = recvmsg(remote_sock, &mg, MSG_ERRQUEUE); + // if (r == -1 && errno == EAGAIN) + // continue; + if (r == -1) + error(1, errno, "recvmsg notification"); + if (mg.msg_flags & MSG_CTRUNC) + error(1, errno, "recvmsg notification: truncated"); + + cm = CMSG_FIRSTHDR(&mg); + if (!cm) + error(1, 0, "cmsg: no cmsg"); + + serr = (void *) CMSG_DATA(cm); + + if (serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY) + error(1, 0, "serr: wrong origin: %u", serr->ee_origin); + if (serr->ee_errno != 0) + error(1, 0, "serr: wrong error code: %u", serr->ee_errno); + + if (serr->ee_code != SO_EE_CODE_ZEROCOPY_COPIED) + req_pending->zerocopy = 0; + + hi = serr->ee_data; + lo = serr->ee_info; + + _ZC_PRINT("h=%u l=%u\n", hi, lo); + + STARPU_ASSERT(lo == req_pending->remote_sock->nback); + STARPU_ASSERT(hi < req_pending->remote_sock->nbsend); + + req_pending->remote_sock->nback = hi+1; + + _ZC_PRINT("send end is %d\n", req_pending->send_end); + while(!_starpu_tcpip_ms_request_multilist_empty_pending(&table->pending_list)) + { + struct _starpu_tcpip_ms_request * req_tmp = _starpu_tcpip_ms_request_multilist_begin_pending(&table->pending_list); + + if(hi+1 >= req_tmp->send_end) + { + _starpu_tcpip_ms_request_multilist_erase_pending(&table->pending_list, req_tmp); + + if(_starpu_tcpip_ms_request_multilist_empty_thread(&table->send_list)&&_starpu_tcpip_ms_request_multilist_empty_pending(&table->pending_list)) + FD_CLR(remote_sock, &writes); + + req_tmp->flag_completed = 1; + starpu_sem_post(&req_tmp->sem_wait_request); + + /*send the signal that message is ready*/ + struct _starpu_mp_node *node = NULL; + _starpu_tcpip_common_signal(node); + } + else + break; + } + + } + else + { + if(!(_starpu_tcpip_ms_request_multilist_empty_thread(&table->send_list))) + { + struct _starpu_tcpip_ms_request * req = _starpu_tcpip_ms_request_multilist_begin_thread(&table->send_list); + char* msg = req->buf; + int len = req->len; + + if(req->remote_sock->zerocopy) + { + _ZC_PRINT("msg len is %d\n", len); + _ZC_PRINT("offset before send is %d\n", req->offset); + + if(req->offset == 0) + { + _starpu_tcpip_ms_request_multilist_push_back_pending(&table->pending_list, req); + } + + int res = send(remote_sock, msg+req->offset, len-req->offset, MSG_ZEROCOPY); + _ZC_PRINT("send return %d\n", res); + STARPU_ASSERT_MSG(res > 0, "TCP/IP Master/Slave cannot send a msg asynchronous with a size of %d Bytes!, the result of send is %d, the error is %s ", len, res, strerror(errno)); + + req->remote_sock->nbsend++; + req->offset+=res; + + _ZC_PRINT("offset after send is %d\n", req->offset); + + if(req->offset == len) + { + req->send_end = req->remote_sock->nbsend; + _ZC_PRINT("send end after send is %d\n", req->send_end); + _starpu_tcpip_ms_request_multilist_erase_thread(&table->send_list, req); + + //if(_starpu_tcpip_ms_request_multilist_empty_thread(&table->send_list)) + //we need this to check whether the msg are all sent, we would have to remove POLLOUT from poll.events + //FD_CLR(remote_sock, &writes); + } + } + else +#endif + { + socket_action((what_t)write, "write", &table->send_list, &writes); + } +#ifdef SO_ZEROCOPY + } + } +#endif + } + + if(FD_ISSET(remote_sock, &reads2)) + { + socket_action(read, "read", &table->recv_list, &reads); + } + /*if the recv/send_list is empty, delete and free hash table*/ + if(_starpu_tcpip_ms_request_multilist_empty_thread(&table->send_list)&&_starpu_tcpip_ms_request_multilist_empty_thread(&table->recv_list)&&_starpu_tcpip_ms_request_multilist_empty_pending(&table->pending_list)) + { + HASH_DEL(pending_tables, table); + free(table); + } + + } + + } + /*all hash tables should be deleted*/ + STARPU_ASSERT(pending_tables == NULL); + + return 0; +} + +static void handler(int num STARPU_ATTRIBUTE_UNUSED){} + +int _starpu_tcpip_common_mp_init() +{ + //Here we supposed the programmer called two times starpu_init. + if (tcpip_initialized) + return -ENODEV; + + /*get the slave number*/ + nb_sink = starpu_getenv_number("STARPU_TCPIP_MS_SLAVES"); + //_TCPIP_PRINT("the slave number is %d\n", nb_sink); + + if (nb_sink <= 0) + /* No slave */ + return 0; + + tcpip_initialized = 1; + + _starpu_tcpip_common_multiple_thread = starpu_getenv_number_default("STARPU_TCPIP_MS_MULTIPLE_THREAD", 0); + + master_thread = pthread_self(); + signal(SIGUSR1, handler); + + /*initialize the pipe*/ + int r=pipe(thread_pipe); + STARPU_ASSERT(r==0); + + _starpu_spin_init(&ListLock); + /*initialize the thread*/ + _starpu_tcpip_ms_request_multilist_head_init_thread(&thread_list); + + STARPU_HG_DISABLE_CHECKING(is_running); + is_running = 1; + STARPU_PTHREAD_CREATE(&thread_pending, NULL, _starpu_tcpip_thread_pending, NULL); + + /*get host info*/ + host_port = starpu_getenv("STARPU_TCPIP_MS_MASTER"); + + _STARPU_CALLOC(tcpip_sock, nb_sink + 1, sizeof(struct _starpu_tcpip_socket)); + _STARPU_MALLOC(local_flag, (nb_sink + 1)*sizeof(int)); + + struct sockaddr_in* sink_addr_list; + _STARPU_MALLOC(sink_addr_list, (nb_sink + 1)*sizeof(struct sockaddr_in)); + +#if _TCPIP_DEBUG + char clnt_ip[20]; +#endif + /*master part*/ + if(!host_port) + { + int source_sock_init = 0; + int local_sock = 0; + struct sockaddr_un name; + struct sockaddr_in source_addr_init; + socklen_t source_addr_init_size = sizeof(source_addr_init); + + unsigned short port = starpu_getenv_number_default("STARPU_TCPIP_MS_PORT", 1234); + + int init_res = master_init(1, &source_sock_init, &local_sock, &source_addr_init, &source_addr_init_size, &name, htonl(INADDR_ANY), htons(port), 3*nb_sink); + if(init_res != 0) + return -1; + + _TCPIP_PRINT("source_sock_init is %d\n", source_sock_init); + _TCPIP_PRINT("local_sock is %d\n", local_sock); + tcpip_sock[0].sync_sock = -1; + tcpip_sock[0].async_sock = -1; + tcpip_sock[0].notif_sock = -1; + tcpip_sock[0].zerocopy = -1; + /*source socket is not local socket*/ + if(local_sock == 0) + local_flag[0] = 0; + /*source socket is local socket*/ + else + local_flag[0] = 1; + + int i; + /*connect each slave, generate sync socket*/ + for (i=1; i<=nb_sink; i++) + { + int sink_sock; + int local_sock_flag; + int accept_res = master_accept(&sink_sock, source_sock_init, local_sock, NULL, &local_sock_flag); + if(accept_res != 0) + return -1; + + _TCPIP_PRINT("sink_sock is %d\n", sink_sock); + tcpip_sock[i].sync_sock = sink_sock; + local_flag[i] = local_sock_flag; + } + for (i=1; i<=nb_sink; i++) + { + /*write the id to slave*/ + int id_sink = i; + WRITE(tcpip_sock[i].sync_sock, &id_sink, sizeof(id_sink)); + + _TCPIP_PRINT("write to slave %d its index\n", id_sink); + + /*receive the slave address with the random allocated port number connect to other slaves*/ + struct sockaddr_in buf_addr; + READ(tcpip_sock[i].sync_sock, &buf_addr, sizeof(buf_addr)); + + sink_addr_list[i] = buf_addr; + _TCPIP_PRINT("Message from slave (slave address) is , ip : %s, port : %d.\n", + inet_ntop(AF_INET, &sink_addr_list[i].sin_addr, clnt_ip, sizeof(clnt_ip)), ntohs(sink_addr_list[i].sin_port)); + + } + /*connect each slave, generate async socket and notif socket*/ + for (i=1; i<=2*nb_sink; i++) + { + int sink_sock2; + int zerocopy; + int accept_res = master_accept(&sink_sock2, source_sock_init, local_sock, &zerocopy, NULL); + if(accept_res != 0) + return -1; + + int i_sink; + /*get slave index*/ + READ(sink_sock2, &i_sink, sizeof(i_sink)); + + _TCPIP_PRINT("the index received is %d, the index in loop is %d\n", i_sink, i); + _TCPIP_PRINT("sink_sock2 is %d\n", sink_sock2); + if(tcpip_sock[i_sink].async_sock == 0) + { + tcpip_sock[i_sink].async_sock = sink_sock2; + tcpip_sock[i_sink].zerocopy = zerocopy; + } + else + { + STARPU_ASSERT(tcpip_sock[i_sink].notif_sock == 0); + tcpip_sock[i_sink].notif_sock = sink_sock2; + } + + } + + close(source_sock_init); + if (starpu_getenv_number_default("STARPU_TCPIP_USE_LOCAL_SOCKET", 1) != 0) + { + close(local_sock); + unlink(name.sun_path); + } + + for(i=0; i<=nb_sink; i++) + { + _TCPIP_PRINT("sock_list[%d] in master part is %d\n", i, tcpip_sock[i].sync_sock); + } + for(i=0; i<=nb_sink; i++) + { + _TCPIP_PRINT("async_sock_list[%d] in master part is %d\n", i, tcpip_sock[i].async_sock); + } + for(i=0; i<=nb_sink; i++) + { + _TCPIP_PRINT("notif_sock_list[%d] in master part is %d\n", i, tcpip_sock[i].notif_sock); + } + /*write the address of one slave to another*/ + int j; + for (i=1; i<=nb_sink; i++) + { + for(j=1; jai_next) + { + int local_sock_flag; + int connect_res; + int try = 0; + while(1) + { + connect_res = slave_connect(&source_sock, cur, &sink_addr, NULL, NULL, &local_sock_flag); + if (connect_res == 0) + break; + if (errno != ECONNREFUSED || try++ >= 10) + break; + sleep(1); + } + if(connect_res == 1) + continue; + else if(connect_res < 0) + return -1; + + _TCPIP_PRINT("source_sock is %d\n", source_sock); + tcpip_sock[0].sync_sock = source_sock; + local_flag[0] = local_sock_flag; + + break; + } + freeaddrinfo(res); + if (!cur) + { + fprintf(stderr, "could not connect\n"); + return -1; + } + + /*****************************connection between slaves********************************/ + + /*get slave index in master sock_list*/ + READ(source_sock, &index_sink, sizeof(index_sink)); + + tcpip_sock[index_sink].sync_sock = -1; + tcpip_sock[index_sink].async_sock = -1; + tcpip_sock[index_sink].notif_sock = -1; + tcpip_sock[index_sink].zerocopy = -1; + + _TCPIP_PRINT("index_sink read from master is %d\n", index_sink); + + int sink_serv_sock = 0; + int sink_local_sock = 0; + struct sockaddr_un sink_name; + struct sockaddr_in sink_serv_addr; + socklen_t sink_serv_addr_size = sizeof(sink_serv_addr); + + int init_res = master_init(0, &sink_serv_sock, &sink_local_sock, &sink_serv_addr, &sink_serv_addr_size, &sink_name, sink_addr.sin_addr.s_addr, 0, 3*(nb_sink-index_sink)); + if(init_res != 0) + return -1; + + _TCPIP_PRINT("sink_serv_sock is %d\n", sink_serv_sock); + _TCPIP_PRINT("sink_local_sock is %d\n", sink_local_sock); + /*sink serv socket is not local socket*/ + if(sink_local_sock == 0) + local_flag[index_sink] = 0; + /*sink serv socket is local socket*/ + else + local_flag[index_sink] = 1; + + /*send slave address to master*/ + WRITE(source_sock, &sink_serv_addr, sink_serv_addr_size); + + /*async and notif communication*/ + int source_async_sock; + int source_notif_sock; + struct addrinfo *res1,*cur1; + struct addrinfo hints1; + + memset(&hints1, 0, sizeof(hints1)); + hints1.ai_socktype = SOCK_STREAM; + + int gaierrno1 = getaddrinfo(host, port, &hints1, &res1); + if (gaierrno1) + { + fprintf(stderr,"getaddrinfo: %s\n", gai_strerror(gaierrno1)); + return -1; + } + + for(cur1 = res1; cur1; cur1 = cur1->ai_next) + { + /*async connect*/ + int zerocopy; + int connect_res = slave_connect(&source_async_sock, cur1, NULL, NULL, &zerocopy, NULL); + if(connect_res == 1) + continue; + else if(connect_res < 0) + return -1; + + _TCPIP_PRINT("source_async_sock is %d\n", source_async_sock); + tcpip_sock[0].async_sock = source_async_sock; + tcpip_sock[0].zerocopy = zerocopy; + + /*notif connect*/ + int connect_notif_res = slave_connect(&source_notif_sock, cur1, NULL, NULL, NULL, NULL); + if(connect_notif_res == 1) + continue; + else if(connect_notif_res < 0) + { + close(source_async_sock); + return -1; + } + + _TCPIP_PRINT("source_notif_sock is %d\n", source_notif_sock); + tcpip_sock[0].notif_sock = source_notif_sock; + + break; + } + freeaddrinfo(res1); + if (!cur1) + { + fprintf(stderr, "could not connect async\n"); + return -1; + } + + /*send slave index to master async socket*/ + WRITE(source_async_sock, &index_sink, sizeof(index_sink)); + + /*send slave index to master notif socket*/ + WRITE(source_notif_sock, &index_sink, sizeof(index_sink)); + + /*communication between slaves*/ + int j; + /*the active part*/ + for (j=1; jtopology; + + int ntcpipcores = starpu_getenv_number("STARPU_NTCPIPMSTHREADS"); + if (ntcpipcores == -1) + { + int nhyperthreads = topology->nhwpus / topology->nhwworker[STARPU_CPU_WORKER][0]; + node->nb_cores = topology->nusedpus / nhyperthreads; + } + else + node->nb_cores = ntcpipcores; +} + +int _starpu_tcpip_common_recv_is_ready(const struct _starpu_mp_node *mp_node) +{ + fd_set set; + int fd = mp_node->mp_connection.tcpip_mp_connection->sync_sock; + int res; + + struct timeval tv = + { + .tv_sec = 0, + .tv_usec = 0 + }; + + FD_ZERO(&set); + FD_SET(fd, &set); + + while((res = select(fd+1, &set, NULL, NULL, &tv)) == -1 && errno == EINTR); + + STARPU_ASSERT_MSG(res >= 0, "There is an error when doing socket select %s %d\n", strerror(errno), errno); + + return res; +} + +int _starpu_tcpip_common_notif_recv_is_ready(const struct _starpu_mp_node *mp_node) +{ + fd_set set; + int fd = mp_node->mp_connection.tcpip_mp_connection->notif_sock; + int res; + + struct timeval tv = + { + .tv_sec = 0, + .tv_usec = 0 + }; + + FD_ZERO(&set); + FD_SET(fd, &set); + + while((res = select(fd+1, &set, NULL, NULL, &tv)) == -1 && errno == EINTR); + + STARPU_ASSERT_MSG(res >= 0, "There is an error when doing socket select %s %d\n", strerror(errno), errno); + + return res; +} + +int _starpu_tcpip_common_notif_send_is_ready(const struct _starpu_mp_node *mp_node) +{ + fd_set set; + int fd = mp_node->mp_connection.tcpip_mp_connection->notif_sock; + int res; + + struct timeval tv = + { + .tv_sec = 0, + .tv_usec = 0 + }; + + FD_ZERO(&set); + FD_SET(fd, &set); + + while((res = select(fd+1, NULL, &set, NULL, &tv)) == -1 && errno == EINTR); + + STARPU_ASSERT_MSG(res >= 0, "There is an error when doing socket select %s %d\n", strerror(errno), errno); + + return res; +} + +void _starpu_tcpip_common_wait(struct _starpu_mp_node *mp_node) +{ + fd_set reads; + fd_set writes; + int fd_sync = mp_node->mp_connection.tcpip_mp_connection->sync_sock; + int fd_notif = mp_node->mp_connection.tcpip_mp_connection->notif_sock; + int fd_max = 0; + int res; + + FD_ZERO(&reads); + FD_ZERO(&writes); + + FD_SET(fd_sync, &reads); + if(fd_sync > fd_max) + fd_max = fd_sync; + + sigset_t sigmask; + sigemptyset(&sigmask); + + STARPU_PTHREAD_MUTEX_LOCK(&mp_node->message_queue_mutex); + if(!mp_message_list_empty(&mp_node->message_queue) || !_starpu_mp_event_list_empty(&mp_node->event_queue)) + { + FD_SET(fd_notif, &writes); + if(fd_notif > fd_max) + fd_max = fd_notif; + } + STARPU_PTHREAD_MUTEX_UNLOCK(&mp_node->message_queue_mutex); + + res = pselect(fd_max+1, &reads, &writes, NULL, NULL, &sigmask); + if(res < 0) + STARPU_ASSERT_MSG(errno == EINTR, "There is an error when doing socket pselect %s %d\n", strerror(errno), errno); +} + +void _starpu_tcpip_common_signal(const struct _starpu_mp_node *mp_node STARPU_ATTRIBUTE_UNUSED) +{ + int res; + res = pthread_kill(master_thread, SIGUSR1); + + STARPU_ASSERT(res == 0); +} + +static void __starpu_tcpip_common_send(const struct _starpu_mp_node *node, void *msg, int len, void * event, int notif); +static void __starpu_tcpip_common_recv(const struct _starpu_mp_node *node, void *msg, int len, void * event, int notif); +static void _starpu_tcpip_common_action_socket(what_t what, const char * whatstr, int is_sender, const struct _starpu_mp_node *node, struct _starpu_tcpip_socket *remote_sock, void *msg, int len, void * event, int notif); +static void _starpu_tcpip_common_send_to_socket(const struct _starpu_mp_node *node, struct _starpu_tcpip_socket *dst_sock, void *msg, int len, void * event, int notif); +static void _starpu_tcpip_common_recv_from_socket(const struct _starpu_mp_node *node, struct _starpu_tcpip_socket *src_sock, void *msg, int len, void * event, int notif); + +/* SEND */ +void _starpu_tcpip_common_mp_send(const struct _starpu_mp_node *node, void *msg, int len) +{ + __starpu_tcpip_common_send(node, msg, len, NULL, 0); +} + +void _starpu_tcpip_common_nt_send(const struct _starpu_mp_node *node, void *msg, int len) +{ + __starpu_tcpip_common_send(node, msg, len, NULL, 1); +} + +/* SEND to source node */ +void _starpu_tcpip_common_send(const struct _starpu_mp_node *node, void *msg, int len, void * event) +{ + __starpu_tcpip_common_send(node, msg, len, event, 0); +} + +static void __starpu_tcpip_common_send(const struct _starpu_mp_node *node, void *msg, int len, void * event, int notif) +{ + _starpu_tcpip_common_send_to_socket(node, node->mp_connection.tcpip_mp_connection, msg, len, event, notif); +} + +/* SEND to any node */ +void _starpu_tcpip_common_send_to_device(const struct _starpu_mp_node *node STARPU_ATTRIBUTE_UNUSED, int devid, void *msg, int len, void * event) +{ + struct _starpu_tcpip_socket *dst_sock = &tcpip_sock[devid]; + _starpu_tcpip_common_send_to_socket(node, dst_sock, msg, len, event, 0); +} + +static void _starpu_tcpip_common_send_to_socket(const struct _starpu_mp_node *node STARPU_ATTRIBUTE_UNUSED, struct _starpu_tcpip_socket *dst_sock, void *msg, int len, void * event, int notif) +{ + _starpu_tcpip_common_action_socket((what_t)write, "send", 1, node, dst_sock, msg, len, event, notif); +} + + +/* RECV */ +void _starpu_tcpip_common_mp_recv(const struct _starpu_mp_node *node, void *msg, int len) +{ + __starpu_tcpip_common_recv(node, msg, len, NULL, 0); +} + +void _starpu_tcpip_common_nt_recv(const struct _starpu_mp_node *node, void *msg, int len) +{ + __starpu_tcpip_common_recv(node, msg, len, NULL, 1); +} + +void _starpu_tcpip_common_recv(const struct _starpu_mp_node *node, void *msg, int len, void * event) +{ + __starpu_tcpip_common_recv(node, msg, len, event, 0); +} + +/* RECV from source node */ +static void __starpu_tcpip_common_recv(const struct _starpu_mp_node *node, void *msg, int len, void * event, int notif) +{ + _starpu_tcpip_common_recv_from_socket(node, node->mp_connection.tcpip_mp_connection, msg, len, event, notif); +} + +/* RECV from any node */ +void _starpu_tcpip_common_recv_from_device(const struct _starpu_mp_node *node STARPU_ATTRIBUTE_UNUSED, int devid, void *msg, int len, void * event) +{ + struct _starpu_tcpip_socket *src_sock = &tcpip_sock[devid]; + _starpu_tcpip_common_recv_from_socket(node, src_sock, msg, len, event, 0); +} + +static void _starpu_tcpip_common_recv_from_socket(const struct _starpu_mp_node *node STARPU_ATTRIBUTE_UNUSED, struct _starpu_tcpip_socket *src_sock, void *msg, int len, void * event, int notif) +{ + _starpu_tcpip_common_action_socket(read, "recv", 0, node, src_sock, msg, len, event, notif); +} + +/*do refactor for SEND to and RECV from socket */ +static void _starpu_tcpip_common_action_socket(what_t what, const char * whatstr, int is_sender, const struct _starpu_mp_node *node STARPU_ATTRIBUTE_UNUSED, struct _starpu_tcpip_socket *remote_sock, void *msg, int len, void * event, int notif) +{ + if (event) + { + _TCPIP_PRINT("async %s\n", whatstr); + _TCPIP_PRINT("%s %d bytes to %d message %x\n", whatstr, len, remote_sock->async_sock, *((int *) (uintptr_t)msg)); + /* Asynchronous*/ + struct _starpu_async_channel * channel = event; + struct _starpu_tcpip_ms_async_event *tcpip_ms_event = _starpu_tcpip_ms_async_event(&channel->event); + tcpip_ms_event->is_sender = is_sender; + + /* call by sink, we need to initialize some parts, for host it's done in data_request.c */ + if (channel->node_ops == NULL) + tcpip_ms_event->requests = NULL; + + /* Initialize the list */ + if (tcpip_ms_event->requests == NULL) + { + _STARPU_MALLOC(tcpip_ms_event->requests, sizeof(*tcpip_ms_event->requests)); + _starpu_tcpip_ms_request_multilist_head_init_event(tcpip_ms_event->requests); + } + + struct _starpu_tcpip_ms_request *req; + _STARPU_MALLOC(req, sizeof(*req)); + _starpu_tcpip_ms_request_multilist_init_thread(req); + _starpu_tcpip_ms_request_multilist_init_event(req); + _starpu_tcpip_ms_request_multilist_init_pending(req); + +#ifdef STARPU_SANITIZE_ADDRESS + /* Poke data immediately, to get a good backtrace where bogus + * pointers come from */ + if (is_sender) + { + char *c = malloc(len); + memcpy(c, msg, len); + free(c); + } + else + memset(msg, 0, len); +#endif + /*complete the fields*/ + req->remote_sock = remote_sock; + req->len = len; + req->buf = msg; + req->flag_completed = 0; + STARPU_HG_DISABLE_CHECKING(req->flag_completed); + starpu_sem_init(&req->sem_wait_request, 0, 0); + req->is_sender = is_sender; + req->offset = 0; + req->send_end = 0; + + _SELECT_PRINT("%s push back\n", whatstr); + _starpu_spin_lock(&ListLock); + _starpu_tcpip_ms_request_multilist_push_back_thread(&thread_list, req); + _starpu_spin_unlock(&ListLock); + + char buf = 0; + int res; + while((res = write(thread_pipe[1], &buf, 1)) == -1 && errno == EINTR) + ; + + channel->starpu_mp_common_finished_receiver++; + channel->starpu_mp_common_finished_sender++; + + _starpu_tcpip_ms_request_multilist_push_back_event(tcpip_ms_event->requests, req); + } + else + { + _TCPIP_PRINT("sync %s\n", whatstr); + /* Synchronous send */ + if(!notif) + { + _TCPIP_PRINT("dst_sock is %d\n", remote_sock->sync_sock); + int res, offset = 0; + while(offset < len) + { + while((res = what(remote_sock->sync_sock, (char*)msg+offset, len-offset)) == -1 && errno == EINTR) + ; + _TCPIP_PRINT("msg after write is %x, res is %d\n", *((int *) (uintptr_t)msg), res); + STARPU_ASSERT_MSG(res != 0 && !(res == -1 && errno == ECONNRESET), "TCP/IP Master/Slave noticed that %s (peer %d) has exited unexpectedly", node->kind == STARPU_NODE_TCPIP_SOURCE ? "the master" : "some slave", node->peer_id); + STARPU_ASSERT_MSG(res > 0, "TCP/IP Master/Slave cannot %s a msg synchronous with a size of %d Bytes!, the result of %s is %d, the error is %s ", whatstr, len, whatstr, res, strerror(errno)); + offset+=res; + } + } + else + { + _TCPIP_PRINT("dst_sock is %d\n", remote_sock->notif_sock); + int res, offset = 0; + while(offset < len) + { + while((res = what(remote_sock->notif_sock, (char*)msg+offset, len-offset)) == -1 && errno == EINTR) + ; + _TCPIP_PRINT("msg after write is %x, res is %d\n", *((int *) (uintptr_t)msg), res); + STARPU_ASSERT_MSG(res != 0 && !(res == -1 && errno == ECONNRESET), "TCP/IP Master/Slave noticed that %s (peer %d) has exited unexpectedly", node->kind == STARPU_NODE_TCPIP_SOURCE ? "the master" : "some slave", node->peer_id); + STARPU_ASSERT_MSG(res > 0, "TCP/IP Master/Slave cannot %s a msg notification with a size of %d Bytes!, the result of %s is %d, the error is %s ", whatstr, len, whatstr, res, strerror(errno)); + offset+=res; + } + } + + _TCPIP_PRINT("finish sync send\n"); + } + +} + +static void _starpu_tcpip_common_polling_node(struct _starpu_mp_node * node) +{ + /* poll the asynchronous messages.*/ + if (node != NULL) + { + STARPU_PTHREAD_MUTEX_LOCK(&node->connection_mutex); + while(node->nt_recv_is_ready(node)) + { + enum _starpu_mp_command answer; + void *arg; + int arg_size; + //_TCPIP_PRINT("polling_node\n"); + answer = _starpu_nt_common_recv_command(node, &arg, &arg_size); + if(!_starpu_src_common_store_message(node,arg,arg_size,answer)) + { + _STARPU_ERROR("incorrect command '%s'", _starpu_mp_common_command_to_string(answer)); + } + } + STARPU_PTHREAD_MUTEX_UNLOCK(&node->connection_mutex); + } +} + +/*do refactor for test event and wait request completion */ +static unsigned int _starpu_tcpip_common_action_completion(int wait, struct _starpu_async_channel * event) +{ + struct _starpu_tcpip_ms_async_event *tcpip_ms_event = _starpu_tcpip_ms_async_event(&event->event); + + if (tcpip_ms_event->requests != NULL) + { + struct _starpu_tcpip_ms_request * req; + struct _starpu_tcpip_ms_request * req_next; + + //_TCPIP_PRINT("event requests is %p\n", req); + for (req = _starpu_tcpip_ms_request_multilist_begin_event(tcpip_ms_event->requests); + req != _starpu_tcpip_ms_request_multilist_end_event(tcpip_ms_event->requests); + req = req_next) + { + req_next = _starpu_tcpip_ms_request_multilist_next_event(req); + + int flag = 0; + if(!wait) + flag = req->flag_completed; + + //_TCPIP_PRINT("the operation is finished? %d\n", flag); + /*operation completed*/ + if (flag || wait) + { + starpu_sem_wait(&req->sem_wait_request); + _starpu_tcpip_ms_request_multilist_erase_event(tcpip_ms_event->requests, req); + STARPU_HG_ENABLE_CHECKING(req->flag_completed); + free(req); + + if (tcpip_ms_event->is_sender) + event->starpu_mp_common_finished_sender--; + else + event->starpu_mp_common_finished_receiver--; + + //_TCPIP_PRINT("common finished sender is %d\n", event->starpu_mp_common_finished_sender); + //_TCPIP_PRINT("common finished receiver is %d\n", event->starpu_mp_common_finished_receiver); + + } + + } + + /* When the list is empty, we finished to wait each request */ + if (_starpu_tcpip_ms_request_multilist_empty_event(tcpip_ms_event->requests)) + { + /* Destroy the list */ + free(tcpip_ms_event->requests); + tcpip_ms_event->requests = NULL; + } + } + + //incoming ack from devices + int i = 0; + while((!wait && i++ == 0)||(wait && event->starpu_mp_common_finished_sender > 0) || (wait && event->starpu_mp_common_finished_receiver > 0)) + { + _starpu_tcpip_common_polling_node(event->polling_node_sender); + _starpu_tcpip_common_polling_node(event->polling_node_receiver); + } + + if(!wait) + return !event->starpu_mp_common_finished_sender && !event->starpu_mp_common_finished_receiver; + else + return 0; +} + +/* - In device to device communications, the first ack received by host + * is considered as the sender (but it cannot be, in fact, the sender) + */ +unsigned int _starpu_tcpip_common_test_event(struct _starpu_async_channel * event) +{ + return _starpu_tcpip_common_action_completion(0, event); +} + +/* - In device to device communications, the first ack received by host + * is considered as the sender (but it cannot be, in fact, the sender) + */ +/* Only used at starpu_shutdown */ +void _starpu_tcpip_common_wait_request_completion(struct _starpu_async_channel * event) +{ + _starpu_tcpip_common_action_completion(1, event); +} + +void _starpu_tcpip_common_barrier(void) +{ + char buf = 0; + //_TCPIP_PRINT("index_sink (in common barrier) is %d\n", index_sink); + int ret; + /*master part*/ + if(index_sink == 0) + { + int i; + for(i=1; i 0, "Cannot read from slave!"); + } + + for(i=1; i 0, "Cannot write to slave!"); + } + + } + /*slave part*/ + else + { + //_TCPIP_PRINT("master socket in sock list is %d\n", sock_list[0]); + ret=write(tcpip_sock[0].sync_sock, &buf, 1); + //printf("ret1 is %d\n", ret); + STARPU_ASSERT_MSG(ret > 0, "Cannot write to master!"); + ret=read(tcpip_sock[0].sync_sock, &buf, 1); + //printf("ret4 is %d\n", ret); + STARPU_ASSERT_MSG(ret > 0, "Cannot read from master!"); + } + _TCPIP_PRINT("finish common barrier\n"); +} + +/* Compute bandwidth and latency between source and sink nodes + * Source node has to have the entire set of times at the end + */ +void _starpu_tcpip_common_measure_bandwidth_latency(double timing_dtod[STARPU_MAXTCPIPDEVS][STARPU_MAXTCPIPDEVS], double latency_dtod[STARPU_MAXTCPIPDEVS][STARPU_MAXTCPIPDEVS]) +{ + int ret; + unsigned iter; + //_TCPIP_PRINT("index_sink is %d\n", index_sink); + char * buf; + _STARPU_MALLOC(buf, SIZE_BANDWIDTH); + memset(buf, 0, SIZE_BANDWIDTH); + + _starpu_tcpip_common_mp_init(); + + int sender, receiver; + for(sender = 0; sender < nb_sink+1; sender++) + { + for(receiver = 0; receiver < nb_sink+1; receiver++) + { + //Node can't be a sender and a receiver + if(sender == receiver) + continue; + + if (!index_sink) + _STARPU_DISP("measuring from %d to %d\n", sender, receiver); + + _starpu_tcpip_common_barrier(); + + // _TCPIP_PRINT("sender id is %d\n", sender); + // _TCPIP_PRINT("index_sink is %d\n", index_sink); + if(index_sink == sender) + { + + //_TCPIP_PRINT("sender id is %d\n", sender); + double start, end; + /* measure bandwidth sender to receiver */ + start = starpu_timing_now(); + for (iter = 0; iter < NITER; iter++) + { + ret = write(tcpip_sock[receiver].sync_sock, buf, SIZE_BANDWIDTH); + STARPU_ASSERT_MSG(ret == SIZE_BANDWIDTH, "short write!"); + STARPU_ASSERT_MSG(ret > 0, "Bandwidth of TCP/IP Master/Slave cannot be measured !"); + ret = read(tcpip_sock[receiver].sync_sock, buf, 1); + STARPU_ASSERT_MSG(ret > 0, "Bandwidth of TCP/IP Master/Slave cannot be measured !"); + } + end = starpu_timing_now(); + timing_dtod[sender][receiver] = (end - start)/NITER/SIZE_BANDWIDTH; + + /* measure latency sender to receiver */ + start = starpu_timing_now(); + for (iter = 0; iter < NITER; iter++) + { + ret = write(tcpip_sock[receiver].sync_sock, buf, 1); + STARPU_ASSERT_MSG(ret > 0, "Bandwidth of TCP/IP Master/Slave cannot be measured !"); + ret = read(tcpip_sock[receiver].sync_sock, buf, 1); + STARPU_ASSERT_MSG(ret > 0, "Bandwidth of TCP/IP Master/Slave cannot be measured !"); + } + end = starpu_timing_now(); + latency_dtod[sender][receiver] = (end - start)/NITER/2; + } + + // _TCPIP_PRINT("receiver id is %d\n", receiver); + // _TCPIP_PRINT("index_sink is %d\n", index_sink); + if (index_sink == receiver) + { + + //_TCPIP_PRINT("receiver id is %d\n", receiver); + /* measure bandwidth sender to receiver*/ + for (iter = 0; iter < NITER; iter++) + { + size_t pending = SIZE_BANDWIDTH; + while (pending) + { + ret = read(tcpip_sock[sender].sync_sock, buf, SIZE_BANDWIDTH); + STARPU_ASSERT_MSG(ret > 0, "Bandwidth of TCP/IP Master/Slave cannot be measured !"); + pending -= ret; + } + ret = write(tcpip_sock[sender].sync_sock, buf, 1); + STARPU_ASSERT_MSG(ret > 0, "Bandwidth of TCP/IP Master/Slave cannot be measured !"); + } + + /* measure latency sender to receiver */ + for (iter = 0; iter < NITER; iter++) + { + ret = read(tcpip_sock[sender].sync_sock, buf, 1); + STARPU_ASSERT_MSG(ret > 0, "Bandwidth of TCP/IP Master/Slave cannot be measured !"); + ret = write(tcpip_sock[sender].sync_sock, buf, 1); + STARPU_ASSERT_MSG(ret > 0, "Bandwidth of TCP/IP Master/Slave cannot be measured !"); + } + } + } + + /* When a sender finished its work, it has to send its results to the master */ + + /* Master doesn't need to send to itself its data */ + if (sender == 0) + goto print; + + /* if we are the sender, we send the data */ + if (sender == index_sink) + { + write(tcpip_sock[0].sync_sock, timing_dtod[sender], sizeof(timing_dtod[sender])); + write(tcpip_sock[0].sync_sock, latency_dtod[sender], sizeof(latency_dtod[sender])); + } + + /* the master node receives the data */ + if (index_sink == 0) + { + read(tcpip_sock[sender].sync_sock, timing_dtod[sender], sizeof(timing_dtod[sender])); + read(tcpip_sock[sender].sync_sock, latency_dtod[sender], sizeof(latency_dtod[sender])); + } + +print: + if (index_sink == 0) + { + for(receiver = 0; receiver < nb_sink+1; receiver++) + { + if(sender == receiver) + continue; + + _STARPU_DISP("BANDWIDTH %d -> %d %.0fMB/s %.2fus\n", sender, receiver, 1/timing_dtod[sender][receiver], latency_dtod[sender][receiver]); + } + } + } + free(buf); +} diff --git a/src/drivers/tcpip/driver_tcpip_common.h b/src/drivers/tcpip/driver_tcpip_common.h new file mode 100644 index 0000000..2100a7c --- /dev/null +++ b/src/drivers/tcpip/driver_tcpip_common.h @@ -0,0 +1,87 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2021-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __DRIVER_TCPIP_COMMON_H__ +#define __DRIVER_TCPIP_COMMON_H__ + +/** @file */ + +#include +#include + +#pragma GCC visibility push(hidden) + +#ifdef STARPU_USE_TCPIP_MASTER_SLAVE + +extern int _starpu_tcpip_common_multiple_thread; + +struct _starpu_tcpip_socket +{ + /* socket used for synchronous communications*/ + int sync_sock; + /* socket used for asynchronous communications*/ + int async_sock; + /* socket used for notification communications*/ + int notif_sock; + /* a flag to detect whether the socket can be used for MSG_ZEROCOPY */ + int zerocopy; + /* how many times is this message split up to send */ + unsigned nbsend; + unsigned nback; +}; + +extern struct _starpu_tcpip_socket *tcpip_sock; + +int _starpu_tcpip_mp_has_local(); + +int _starpu_tcpip_common_mp_init(); +void _starpu_tcpip_common_mp_deinit(); + +int _starpu_tcpip_common_is_src_node(); +int _starpu_tcpip_common_get_src_node(); +int _starpu_tcpip_common_is_mp_initialized(); +int _starpu_tcpip_common_recv_is_ready(const struct _starpu_mp_node *mp_node); +int _starpu_tcpip_common_notif_recv_is_ready(const struct _starpu_mp_node *mp_node); +int _starpu_tcpip_common_notif_send_is_ready(const struct _starpu_mp_node *mp_node); +void _starpu_tcpip_common_wait(struct _starpu_mp_node *mp_node); +void _starpu_tcpip_common_signal(const struct _starpu_mp_node *mp_node); + +void _starpu_tcpip_common_mp_initialize_src_sink(struct _starpu_mp_node *node); + +void _starpu_tcpip_common_send(const struct _starpu_mp_node *node, void *msg, int len, void * event); +void _starpu_tcpip_common_recv(const struct _starpu_mp_node *node, void *msg, int len, void * event); + +void _starpu_tcpip_common_mp_send(const struct _starpu_mp_node *node, void *msg, int len); +void _starpu_tcpip_common_mp_recv(const struct _starpu_mp_node *node, void *msg, int len); + +void _starpu_tcpip_common_nt_send(const struct _starpu_mp_node *node, void *msg, int len); +void _starpu_tcpip_common_nt_recv(const struct _starpu_mp_node *node, void *msg, int len); + +void _starpu_tcpip_common_recv_from_device(const struct _starpu_mp_node *node, int devid, void *msg, int len, void * event); +void _starpu_tcpip_common_send_to_device(const struct _starpu_mp_node *node, int devid, void *msg, int len, void * event); + +unsigned int _starpu_tcpip_common_test_event(struct _starpu_async_channel * event); +void _starpu_tcpip_common_wait_request_completion(struct _starpu_async_channel * event); + +void _starpu_tcpip_common_barrier(void); + +void _starpu_tcpip_common_measure_bandwidth_latency(double bandwidth_dtod[STARPU_MAXTCPIPDEVS][STARPU_MAXTCPIPDEVS], double latency_dtod[STARPU_MAXTCPIPDEVS][STARPU_MAXTCPIPDEVS]); + +#endif /* STARPU_USE_TCPIP_MASTER_SLAVE */ + +#pragma GCC visibility pop + +#endif /* __DRIVER_TCPIP_COMMON_H__ */ diff --git a/src/drivers/tcpip/driver_tcpip_common_func.h b/src/drivers/tcpip/driver_tcpip_common_func.h new file mode 100644 index 0000000..7dddbfa --- /dev/null +++ b/src/drivers/tcpip/driver_tcpip_common_func.h @@ -0,0 +1,347 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2021-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#define _TCPIP_DEBUG 0 +#if _TCPIP_DEBUG +# define _TCPIP_PRINT(...) printf(__VA_ARGS__) +#else +# define _TCPIP_PRINT(...) +#endif + +#ifdef __linux__ +#include +#ifndef ENOTSUPP +#define ENOTSUPP 524 +#endif +#endif + +enum errcase {SOCK_INIT, SOCK_GETADDRINFO, SOCK_GETADDRINFO_LOCAL}; + +#define SOCKET(domain, type, protocol, errcase) ({\ + int sock = 0; \ + sock = socket(domain, type, protocol); \ + if(sock < 0) \ + { \ + if(errcase == SOCK_GETADDRINFO) \ + { \ + if (errno != EAFNOSUPPORT) /* do not raise exception if ipv6 is not available */ \ + perror("fail to create socket"); \ + return 1; \ + } \ + else if(errcase == SOCK_GETADDRINFO_LOCAL) \ + { \ + if (errno != EAFNOSUPPORT) /* do not raise exception if ipv6 is not available */ \ + perror("fail to create socket"); \ + return -1; \ + } \ + else \ + { \ + perror("fail to create socket"); \ + return -1; \ + } \ + } \ + sock; \ + }) + +#define BIND(sockfd, addr, addrlen) ({ \ + if(bind(sockfd, addr, addrlen) != 0) \ + { \ + perror("socket fails to bind"); \ + return -1; \ + } \ + }) + +#define LISTEN(sockfd, backlog)({ \ + if(listen(sockfd, backlog) != 0) \ + { \ + perror("socket fails to listen"); \ + return -1; \ + } \ + }) + +#define ADDR_INIT(source_addr, source_port) ({ \ + struct sockaddr_in sockaddr_init; \ + memset(&sockaddr_init, 0, sizeof(sockaddr_init)); \ + sockaddr_init.sin_family = AF_INET; \ + sockaddr_init.sin_addr.s_addr = source_addr; \ + sockaddr_init.sin_port = source_port; \ + sockaddr_init; \ + }) + +#define LOCAL_ADDR_INIT(source_addr_init) ({ \ + struct sockaddr_un name; \ + memset(&name, 0, sizeof(name)); \ + name.sun_family = AF_UNIX; \ + snprintf(name.sun_path, sizeof(name.sun_path) - 1, "/tmp/starpu-%d.socket", ntohs(source_addr_init.sin_port)); \ + name; \ + }) + +#define GETSOCKNAME(sockfd, addr, addrlen) ({ \ + if(getsockname(sockfd, addr, addrlen) != 0) \ + { \ + perror("getsockname fail"); \ + return -1; \ + } \ + }) + +#define GETPEERNAME(sockfd, addr, addrlen) ({ \ + if(getpeername(sockfd, addr, addrlen) != 0) \ + { \ + perror("getpeername fail"); \ + return -1; \ + } \ + }) + +#define ACCEPT(sockfd, addr, addrlen) ({ \ + int sock; \ + sock = accept(sockfd, addr, addrlen); \ + if(sock < 0) \ + { \ + perror("fail to receive the request of slave"); \ + return -1; \ + } \ + sock; \ + }) + +#define CONNECT(sockfd, addr, addrlen, cur) ({ \ + if (connect(sockfd, addr, addrlen) < 0) \ + { \ + int err = errno; \ + perror("fail to connect socket"); \ + close(sockfd); \ + errno = err; \ + if(cur) \ + return 1; \ + else \ + return -1; \ + } \ + }) + +#define WRITE(fd, buf, count) ({ \ + if(write(fd, buf, count) < 0) \ + { \ + perror("fail to send"); \ + return -1; \ + } \ + }) + +#define READ(fd, buf, count) ({ \ + if(read(fd, buf, count) < 0) \ + { \ + perror("fail to receive"); \ + return -1; \ + } \ + }) + +#define SETSOCKOPT_ZEROCOPY(sockfd, optname) ({ \ + int zc; \ + int one = 1; \ + int ret = setsockopt(sockfd, SOL_SOCKET, optname, &one, sizeof(one)); \ + if (ret!=0) \ + { \ + if (errno != EOPNOTSUPP && errno != ENOPROTOOPT && errno != ENOTSUPP) \ + perror("setsockopt zerocopy"); \ + zc = 0; \ + } \ + else \ + zc = 1; \ + zc; \ + }) + + +/* This function contains all steps to initialize a socket before connect and accept steps. + * When we call this function, we need to indicate that it is for master-slave (master = 1) + * or slave-slave (master = 0). We also need to provide the information sin_addr "source_addr" + * and sin_port "source_port" that we want to set to initialize the binding address and + * the argument "backlog" for listen. It can generate a TCP/IP socket "ss" or a local socket "ls", + * and the bound address "source_addr_init" with its size "source_addr_init_size". + * For local socket, it also generates the bound address "local_name" linking a local path. + */ +static inline int master_init(int master, int *ss, int *ls, struct sockaddr_in *source_addr_init, socklen_t *source_addr_init_size, struct sockaddr_un *local_name, unsigned long source_addr, unsigned short source_port, int backlog) +{ + /*TCPIP*/ + *ss = SOCKET(AF_INET, SOCK_STREAM, 0, SOCK_INIT); + + struct sockaddr_in addr_init = ADDR_INIT(source_addr, source_port); + socklen_t addr_init_size = sizeof(addr_init); + + if(master) + { + int one = 1; + setsockopt(*ss, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)); + } + + + BIND(*ss, (struct sockaddr*) &addr_init, addr_init_size); + + if(!master) + { + GETSOCKNAME(*ss, (struct sockaddr*) &addr_init, &addr_init_size); + } + + LISTEN(*ss, backlog); + + *source_addr_init = addr_init; + *source_addr_init_size = addr_init_size; + + /*local socket*/ + if (starpu_getenv_number_default("STARPU_TCPIP_USE_LOCAL_SOCKET", 1) != 0) + { + *ls = SOCKET(AF_UNIX, SOCK_STREAM, 0, 0); + + *local_name = LOCAL_ADDR_INIT(addr_init); + + _TCPIP_PRINT("local socket name is %s\n", local_name->sun_path); + unlink(local_name->sun_path); + + BIND(*ls, (const struct sockaddr *) &(*local_name), sizeof(*local_name)); + + LISTEN(*ls, backlog); + } + + return 0; +} + +/* Accept step. We provide the TCP/IP socket "source_sock" or local socket "local_sock" + * which is ready to accept the connection request from the other side. It will generate + * the socket of the other side "sink_sock". It will also show whether the zerocopy setting + * is successful (zerocopy = 1) or not (zerocopy = 0). This setting is only for async communication. + */ +static inline int master_accept(int *sink_sock, int source_sock, int local_sock, int *zerocopy, int * local_sock_flag) +{ + struct sockaddr_in sink_addr; + socklen_t sink_addr_size = sizeof(sink_addr); + + *sink_sock = ACCEPT(source_sock, (struct sockaddr*)&sink_addr, &sink_addr_size); + + if (zerocopy != NULL) + { + #ifdef SO_ZEROCOPY + *zerocopy = SETSOCKOPT_ZEROCOPY(*sink_sock, SO_ZEROCOPY); + #else + *zerocopy = 0; + #endif + } + + if (local_sock_flag != NULL) + *local_sock_flag = 0; + + /*local socket*/ + if (starpu_getenv_number_default("STARPU_TCPIP_USE_LOCAL_SOCKET", 1) != 0) + { + struct sockaddr_in boundAddr; + socklen_t boundAddr_size = sizeof(boundAddr); + + GETSOCKNAME(*sink_sock, (struct sockaddr*) &boundAddr, &boundAddr_size); + + /*master and slave sides use the same ip address*/ + if(boundAddr.sin_addr.s_addr == sink_addr.sin_addr.s_addr) + { + close(*sink_sock); + *sink_sock = ACCEPT(local_sock, NULL, NULL); + + if (local_sock_flag != NULL) + *local_sock_flag = 1; + } + + if (zerocopy != NULL) + { + #ifdef SO_ZEROCOPY + *zerocopy = SETSOCKOPT_ZEROCOPY(*sink_sock, SO_ZEROCOPY); + #else + *zerocopy = 0; + #endif + } + } + + return 0; +} + +/* Connect step. We provide the connection address for TCP/IP socket, either it is addrinfo "cur" got from + * function getaddrinfo in master-salve mode, or it is "source_addr" in slave-slave mode. It will generate + * the socket of the other side "source_sock", In the case that slave connects to master, we need to get + * the address "source_addr" to which "source_sock" is bound. It will also show whether the zerocopy setting + * is successful (zerocopy = 1) or not (zerocopy = 0). This setting is only for async communication. + */ +static inline int slave_connect(int *source_sock, struct addrinfo *cur, struct sockaddr_in *bound_addr, struct sockaddr_in *source_addr, int *zerocopy, int * local_sock_flag) +{ + if(cur != NULL) + { + *source_sock = SOCKET(cur->ai_family, cur->ai_socktype, cur->ai_protocol, SOCK_GETADDRINFO); + CONNECT(*source_sock, cur->ai_addr, cur->ai_addrlen, 1); + } + else + { + *source_sock = SOCKET(AF_INET, SOCK_STREAM, 0, SOCK_INIT); + CONNECT(*source_sock, (struct sockaddr*)&(*source_addr), sizeof(*source_addr), 0); + } + + if (zerocopy != NULL) + { + #ifdef SO_ZEROCOPY + *zerocopy = SETSOCKOPT_ZEROCOPY(*source_sock, SO_ZEROCOPY); + #else + *zerocopy = 0; + #endif + } + + if (local_sock_flag != NULL) + *local_sock_flag = 0; + + struct sockaddr_in boundAddr, peerAddr; + socklen_t boundAddr_size = sizeof(boundAddr); + socklen_t peerAddr_size = sizeof(peerAddr); + + GETSOCKNAME(*source_sock, (struct sockaddr*) &boundAddr, &boundAddr_size); + GETPEERNAME(*source_sock, (struct sockaddr*) &peerAddr, &peerAddr_size); + + if(bound_addr != NULL) + *bound_addr = boundAddr; + + /*local socket*/ + if (starpu_getenv_number_default("STARPU_TCPIP_USE_LOCAL_SOCKET", 1) != 0) + { + /*master and slave sides use the same ip address*/ + if(boundAddr.sin_addr.s_addr == peerAddr.sin_addr.s_addr) + { + close(*source_sock); + if(cur != NULL) + *source_sock = SOCKET(AF_UNIX, SOCK_STREAM, 0, SOCK_GETADDRINFO_LOCAL); + else + *source_sock = SOCKET(AF_UNIX, SOCK_STREAM, 0, SOCK_INIT); + + struct sockaddr_un local_name = LOCAL_ADDR_INIT(peerAddr); + + _TCPIP_PRINT("local socket name %s is got for sync connect\n", local_name.sun_path); + + CONNECT(*source_sock, (const struct sockaddr *) &local_name, sizeof(local_name), 0); + + if (local_sock_flag != NULL) + *local_sock_flag = 1; + } + + if (zerocopy != NULL) + { + #ifdef SO_ZEROCOPY + *zerocopy = SETSOCKOPT_ZEROCOPY(*source_sock, SO_ZEROCOPY); + #else + *zerocopy = 0; + #endif + } + } + + return 0; +} diff --git a/src/drivers/tcpip/driver_tcpip_init.c b/src/drivers/tcpip/driver_tcpip_init.c new file mode 100644 index 0000000..8006fb8 --- /dev/null +++ b/src/drivers/tcpip/driver_tcpip_init.c @@ -0,0 +1,47 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include + +static struct _starpu_driver_info driver_info = +{ + .name_upper = "TCPIP_MS", + .name_var = "TCPIP_MS", + .name_lower = "tcpip_ms", + .memory_kind = STARPU_TCPIP_MS_RAM, + .alpha = 1.0f, +#ifdef STARPU_USE_TCPIP_MASTER_SLAVE + .run_worker = _starpu_tcpip_src_worker, + .init_worker_binding = _starpu_tcpip_init_worker_binding, + .init_worker_memory = _starpu_tcpip_init_worker_memory, +#endif +}; + +static struct _starpu_memory_driver_info memory_driver_info = +{ + .name_upper = "TCPIP_MS", + .worker_archtype = STARPU_TCPIP_MS_WORKER, +#ifdef STARPU_USE_TCPIP_MASTER_SLAVE + .ops = &_starpu_driver_tcpip_ms_node_ops, +#endif +}; + +void _starpu_tcpip_ms_preinit(void) +{ + _starpu_driver_info_register(STARPU_TCPIP_MS_WORKER, &driver_info); + _starpu_memory_driver_info_register(STARPU_TCPIP_MS_RAM, &memory_driver_info); +} diff --git a/src/drivers/tcpip/driver_tcpip_sink.c b/src/drivers/tcpip/driver_tcpip_sink.c new file mode 100644 index 0000000..83c1f53 --- /dev/null +++ b/src/drivers/tcpip/driver_tcpip_sink.c @@ -0,0 +1,44 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include "driver_tcpip_sink.h" +#include "driver_tcpip_source.h" +#include "driver_tcpip_common.h" + +void _starpu_tcpip_sink_init(struct _starpu_mp_node *node) +{ + _starpu_tcpip_common_mp_initialize_src_sink(node); + + _STARPU_MALLOC(node->thread_table, sizeof(starpu_pthread_t)*node->nb_cores); + + sigset_t set; + sigemptyset(&set); + sigaddset(&set, SIGUSR1); + + pthread_sigmask(SIG_BLOCK, &set, NULL); + //TODO +} + +void _starpu_tcpip_sink_bind_thread(const struct _starpu_mp_node *mp_node, int coreid, int *core_table, int nb_core) +{ + //TODO + (void)mp_node; + (void)coreid; + (void)core_table; + (void)nb_core; +} diff --git a/src/drivers/tcpip/driver_tcpip_sink.h b/src/drivers/tcpip/driver_tcpip_sink.h new file mode 100644 index 0000000..2afb9d4 --- /dev/null +++ b/src/drivers/tcpip/driver_tcpip_sink.h @@ -0,0 +1,35 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __DRIVER_TCPIP_SINK_H__ +#define __DRIVER_TCPIP_SINK_H__ + +/** @file */ + +#include + +#pragma GCC visibility push(hidden) + +#ifdef STARPU_USE_TCPIP_MASTER_SLAVE + +void _starpu_tcpip_sink_init(struct _starpu_mp_node *node); +void _starpu_tcpip_sink_bind_thread(const struct _starpu_mp_node *mp_node STARPU_ATTRIBUTE_UNUSED, int coreid, int * core_table, int nb_core); + +#endif /* STARPU_USE_TCPIP_MASTER_SLAVE */ + +#pragma GCC visibility pop + +#endif /* __DRIVER_TCPIP_SINK_H__ */ diff --git a/src/drivers/tcpip/driver_tcpip_source.c b/src/drivers/tcpip/driver_tcpip_source.c new file mode 100644 index 0000000..84a7967 --- /dev/null +++ b/src/drivers/tcpip/driver_tcpip_source.c @@ -0,0 +1,395 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2021-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +#include +#include +#include + +#include + +#include +#include + +#ifdef STARPU_USE_TCPIP_MASTER_SLAVE +static unsigned tcpip_bindid_init[STARPU_MAXTCPIPDEVS] = { }; +static unsigned tcpip_bindid[STARPU_MAXTCPIPDEVS]; +static unsigned tcpip_memory_init[STARPU_MAXTCPIPDEVS] = { }; +static unsigned tcpip_memory_nodes[STARPU_MAXTCPIPDEVS]; + +static struct _starpu_worker_set tcpip_worker_set[STARPU_MAXTCPIPDEVS]; +#endif + +struct _starpu_mp_node *_starpu_tcpip_ms_src_get_actual_thread_mp_node() +{ + struct _starpu_worker *actual_worker = _starpu_get_local_worker_key(); + STARPU_ASSERT(actual_worker); + + int devid = actual_worker->devid; + STARPU_ASSERT(devid >= 0 && devid < STARPU_MAXTCPIPDEVS); + + return _starpu_src_nodes[STARPU_TCPIP_MS_WORKER][devid]; +} + +static void __starpu_init_tcpip_config(struct _starpu_machine_topology * topology, + struct _starpu_machine_config *config, + unsigned tcpip_idx) +{ + int nbcores; + _starpu_src_common_sink_nbcores(_starpu_src_nodes[STARPU_TCPIP_MS_WORKER][tcpip_idx], &nbcores); + STARPU_ASSERT(tcpip_idx < STARPU_NMAXDEVS); + topology->nhwworker[STARPU_TCPIP_MS_WORKER][tcpip_idx] = nbcores; + + int ntcpipcores; + ntcpipcores = starpu_getenv_number("STARPU_NTCPIPMSTHREADS"); + + _starpu_topology_check_ndevices(&ntcpipcores, nbcores, 0, INT_MAX, 0, "STARPU_NTCPIPMSTHREADS", "TCPIP cores", ""); + + tcpip_worker_set[tcpip_idx].workers = &config->workers[topology->nworkers]; + tcpip_worker_set[tcpip_idx].nworkers = ntcpipcores; + _starpu_src_nodes[STARPU_TCPIP_MS_WORKER][tcpip_idx]->baseworkerid = topology->nworkers; + + _starpu_topology_configure_workers(topology, config, + STARPU_TCPIP_MS_WORKER, + tcpip_idx, tcpip_idx, 0, 0, + ntcpipcores, 1, &tcpip_worker_set[tcpip_idx], + _starpu_tcpip_common_multiple_thread ? NULL : tcpip_worker_set); +} + +/* Determine which devices we will use */ +void _starpu_init_tcpip_config(struct _starpu_machine_topology *topology, struct _starpu_machine_config *config, + struct starpu_conf *user_conf, int no_mp_config) +{ + int i; + + /* Discover and configure the mp topology. That means: + * - discover the number of mp nodes; + * - initialize each discovered node; + * - discover the local topology (number of PUs/devices) of each node; + * - configure the workers accordingly. + */ + + for (i = 0; i < (int) (sizeof(tcpip_worker_set)/sizeof(tcpip_worker_set[0])); i++) + tcpip_worker_set[i].workers = NULL; + + int ntcpipms = user_conf->ntcpip_ms; + + if(ntcpipms != 0) + { + /* Discover and initialize the number of TCPIP nodes through the mp + * infrastructure. */ + unsigned nhwtcpipdevices = _starpu_tcpip_src_get_device_count(); + + if (ntcpipms == -1) + /* Nothing was specified, so let's use the number of + * detected tcpip devices. ! */ + ntcpipms = nhwtcpipdevices; + else + { + if ((unsigned) ntcpipms > nhwtcpipdevices) + { + /* The user requires more TCPIP devices than there is available */ + _STARPU_MSG("# Warning: %d TCPIP Master-Slave devices requested. Only %u available.\n", + ntcpipms, nhwtcpipdevices); + ntcpipms = nhwtcpipdevices; + } + /*Let's make sure this value is OK.*/ + if(ntcpipms > STARPU_MAXTCPIPDEVS) + { + _STARPU_DISP("# Warning: %d TCPIP Master-Slave devices requested. Only %u enabled. Use configure options --enable-maxtcpipdev=xxx to update the maximum value of supported TCPIP MS devices.\n", + ntcpipms, STARPU_MAXTCPIPDEVS); + ntcpipms = STARPU_MAXTCPIPDEVS; + } + } + } + + topology->ndevices[STARPU_TCPIP_MS_WORKER] = ntcpipms; + + /* if user don't want to use TCPIP slaves, we close the slave processes */ + if (no_mp_config && topology->ndevices[STARPU_TCPIP_MS_WORKER] == 0) + { + _starpu_tcpip_common_mp_deinit(); + exit(0); + } + + if (!no_mp_config) + { + for (i = 0; i < ntcpipms; i++) + _starpu_src_nodes[STARPU_TCPIP_MS_WORKER][i] = _starpu_mp_common_node_create(STARPU_NODE_TCPIP_SOURCE, i); + + for (i = 0; i < ntcpipms; i++) + __starpu_init_tcpip_config(topology, config, i); + } +} + +/*Bind the driver on a CPU core*/ +void _starpu_tcpip_init_worker_binding(struct _starpu_machine_config *config, int no_mp_config STARPU_ATTRIBUTE_UNUSED, struct _starpu_worker *workerarg) +{ + /* Perhaps the worker has some "favourite" bindings */ + unsigned *preferred_binding = NULL; + unsigned npreferred = 0; + unsigned devid = workerarg->devid; + + if (tcpip_bindid_init[devid]) + { + } + else + { + tcpip_bindid_init[devid] = 1; + if (_starpu_tcpip_common_multiple_thread || devid == 0) + tcpip_bindid[devid] = _starpu_get_next_bindid(config, STARPU_THREAD_ACTIVE, preferred_binding, npreferred); + else + tcpip_bindid[devid] = tcpip_bindid[0]; + } + + workerarg->bindid = tcpip_bindid[devid]; +} + +/*Set up memory and buses*/ +void _starpu_tcpip_init_worker_memory(struct _starpu_machine_config *config, int no_mp_config STARPU_ATTRIBUTE_UNUSED, struct _starpu_worker *workerarg) +{ + unsigned memory_node = -1; + unsigned devid = workerarg->devid; + unsigned numa, devid2; + + if (tcpip_memory_init[devid]) + { + memory_node = tcpip_memory_nodes[devid]; + } + else + { + tcpip_memory_init[devid] = 1; + memory_node = tcpip_memory_nodes[devid] = _starpu_memory_node_register(STARPU_TCPIP_MS_RAM, devid); + + _starpu_memory_node_set_mapped(memory_node); + + for (numa = 0; numa < starpu_memory_nodes_get_numa_count(); numa++) + { + _starpu_register_bus(numa, memory_node); + _starpu_register_bus(memory_node, numa); + } + for (devid2 = 0; devid2 < STARPU_MAXTCPIPDEVS; devid2++) + { + if (tcpip_memory_init[devid2]) + { + _starpu_register_bus(tcpip_memory_nodes[devid], tcpip_memory_nodes[devid2]); + _starpu_register_bus(tcpip_memory_nodes[devid2], tcpip_memory_nodes[devid]); + } + } + + } + //This worker can manage transfers on NUMA nodes + for (numa = 0; numa < starpu_memory_nodes_get_numa_count(); numa++) + _starpu_worker_drives_memory_node(&workerarg->set->workers[0], numa); + + _starpu_worker_drives_memory_node(&workerarg->set->workers[0], memory_node); + + if (!_starpu_tcpip_common_multiple_thread) + { + /* TCP/IP driver thread can manage all slave memories if we disable the TCP/IP multiple thread */ + int findworker; + for (findworker = 0; findworker < workerarg->workerid; findworker++) + { + struct _starpu_worker *findworkerarg = &config->workers[findworker]; + if (findworkerarg->arch == STARPU_TCPIP_MS_WORKER) + { + _starpu_worker_drives_memory_node(workerarg, findworkerarg->memory_node); + _starpu_worker_drives_memory_node(findworkerarg, memory_node); + } + } + } + + _starpu_memory_node_add_nworkers(memory_node); + + workerarg->memory_node = memory_node; +} + +static void _starpu_deinit_tcpip_node(int devid) +{ + _starpu_mp_common_send_command(_starpu_src_nodes[STARPU_TCPIP_MS_WORKER][devid], STARPU_MP_COMMAND_EXIT, NULL, 0); + + _starpu_mp_common_node_destroy(_starpu_src_nodes[STARPU_TCPIP_MS_WORKER][devid]); +} + + +void _starpu_deinit_tcpip_config(struct _starpu_machine_config *config) +{ + struct _starpu_machine_topology *topology = &config->topology; + unsigned i; + + for (i = 0; i < topology->ndevices[STARPU_TCPIP_MS_WORKER]; i++) + _starpu_deinit_tcpip_node(i); +} + + +void _starpu_tcpip_source_init(struct _starpu_mp_node *node) +{ + _starpu_tcpip_common_mp_initialize_src_sink(node); + //TODO +} + + +void _starpu_tcpip_source_deinit(struct _starpu_mp_node *node STARPU_ATTRIBUTE_UNUSED) +{ + +} + +unsigned _starpu_tcpip_src_get_device_count() +{ + int nmpims = starpu_getenv_number("STARPU_TCPIP_MS_SLAVES"); + if (nmpims == -1) + /* No slave */ + nmpims = 0; + return nmpims; +} + +void *_starpu_tcpip_src_worker(void *arg) +{ + struct _starpu_worker *worker0 = arg; + struct _starpu_worker_set *set = worker0->set; + struct _starpu_worker_set *worker_set_tcpip = set; + int nbsinknodes = _starpu_tcpip_common_multiple_thread ? 1 : _starpu_tcpip_src_get_device_count(); + + int workersetnum; + for (workersetnum = 0; workersetnum < nbsinknodes; workersetnum++) + { + struct _starpu_worker_set * worker_set = &worker_set_tcpip[workersetnum]; + + /* As all workers of a set share common data, we just use the first + * one for initializing the following stuffs. */ + struct _starpu_worker *baseworker = &worker_set->workers[0]; + struct _starpu_machine_config *config = baseworker->config; + unsigned baseworkerid = baseworker - config->workers; + unsigned devid = baseworker->devid; + unsigned i; + + /* unsigned memnode = baseworker->memory_node; */ + + _starpu_driver_start(baseworker, STARPU_CPU_WORKER, 0); + +#ifdef STARPU_USE_FXT + for (i = 1; i < worker_set->nworkers; i++) + _starpu_worker_start(&worker_set->workers[i], STARPU_TCPIP_MS_WORKER, 0); +#endif + + // Current task for a thread managing a worker set has no sense. + _starpu_set_current_task(NULL); + + for (i = 0; i < config->topology.nworker[STARPU_TCPIP_MS_WORKER][devid]; i++) + { + struct _starpu_worker *worker = &config->workers[baseworkerid+i]; + snprintf(worker->name, sizeof(worker->name), "TCPIP_MS %u core %u", devid, i); + snprintf(worker->short_name, sizeof(worker->short_name), "TCPIP_MS %u.%u", devid, i); + } + + + char thread_name[16]; + if (_starpu_tcpip_common_multiple_thread) + snprintf(thread_name, sizeof(thread_name), "TCPIP_MS %u", devid); + else + snprintf(thread_name, sizeof(thread_name), "TCPIP_MS"); + starpu_pthread_setname(thread_name); + + for (i = 0; i < worker_set->nworkers; i++) + { + struct _starpu_worker *worker = &worker_set->workers[i]; + _STARPU_TRACE_WORKER_INIT_END(worker->workerid); + } + + _starpu_src_common_init_switch_env(workersetnum); + } /* for */ + + _starpu_src_common_workers_set(worker_set_tcpip, nbsinknodes, &_starpu_src_nodes[STARPU_TCPIP_MS_WORKER][worker_set_tcpip->workers[0].devid]); + + return NULL; +} + +static int _starpu_tcpip_is_direct_access_supported(unsigned node, unsigned handling_node) +{ + (void) node; + enum starpu_node_kind kind = starpu_node_get_kind(handling_node); + return (kind == STARPU_TCPIP_MS_RAM); +} + +static uintptr_t _starpu_tcpip_map(uintptr_t src, size_t src_offset, unsigned src_node STARPU_ATTRIBUTE_UNUSED, unsigned dst_node, size_t size, int *ret) +{ + if(!_starpu_tcpip_mp_has_local()) + { + *ret=-EXDEV; + return 0; + } + + uintptr_t map_addr = _starpu_src_common_map(dst_node, src+src_offset, size); + if(map_addr == 0) + { + *ret=-ENOMEM; + } + else + { + *ret = 0; + } + return map_addr; +} + +static int _starpu_tcpip_unmap(uintptr_t src STARPU_ATTRIBUTE_UNUSED, size_t src_offset STARPU_ATTRIBUTE_UNUSED, unsigned src_node STARPU_ATTRIBUTE_UNUSED, uintptr_t dst, unsigned dst_node, size_t size) +{ + _starpu_src_common_unmap(dst_node, dst, size); + + return 0; +} + +static int _starpu_tcpip_update_map(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size) +{ + (void) src; + (void) src_offset; + (void) src_node; + (void) dst; + (void) dst_offset; + (void) dst_node; + (void) size; + + /* Memory mappings are cache-coherent */ + return 0; +} +struct _starpu_node_ops _starpu_driver_tcpip_ms_node_ops = +{ + .name = "tcpip driver", + + .malloc_on_node = _starpu_src_common_allocate, + .free_on_node = _starpu_src_common_free, + + .is_direct_access_supported = _starpu_tcpip_is_direct_access_supported, + + .copy_interface_to[STARPU_CPU_RAM] = _starpu_copy_interface_any_to_any, + .copy_interface_to[STARPU_TCPIP_MS_RAM] = _starpu_copy_interface_any_to_any, + + .copy_interface_from[STARPU_CPU_RAM] = _starpu_copy_interface_any_to_any, + .copy_interface_from[STARPU_TCPIP_MS_RAM] = _starpu_copy_interface_any_to_any, + + .copy_data_to[STARPU_CPU_RAM] = _starpu_src_common_copy_data_sink_to_host, + .copy_data_to[STARPU_TCPIP_MS_RAM] = _starpu_src_common_copy_data_sink_to_sink, + + .copy_data_from[STARPU_CPU_RAM] = _starpu_src_common_copy_data_host_to_sink, + .copy_data_from[STARPU_TCPIP_MS_RAM] = _starpu_src_common_copy_data_sink_to_sink, + + .wait_request_completion = _starpu_tcpip_common_wait_request_completion, + .test_request_completion = _starpu_tcpip_common_test_event, + + .map[STARPU_CPU_RAM] = _starpu_tcpip_map, + .unmap[STARPU_CPU_RAM] = _starpu_tcpip_unmap, + .update_map[STARPU_CPU_RAM] = _starpu_tcpip_update_map, +}; diff --git a/src/drivers/tcpip/driver_tcpip_source.h b/src/drivers/tcpip/driver_tcpip_source.h new file mode 100644 index 0000000..f56cf26 --- /dev/null +++ b/src/drivers/tcpip/driver_tcpip_source.h @@ -0,0 +1,51 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2021-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __DRIVER_TCPIP_SOURCE_H__ +#define __DRIVER_TCPIP_SOURCE_H__ + +/** @file */ + +#include +#include +#include + +#pragma GCC visibility push(hidden) + +void _starpu_tcpip_ms_preinit(void); + +#ifdef STARPU_USE_TCPIP_MASTER_SLAVE +extern struct _starpu_node_ops _starpu_driver_tcpip_ms_node_ops; + +/** Array of structures containing all the information useful to send + * and receive information with devices */ +struct _starpu_mp_node *_starpu_tcpip_ms_src_get_actual_thread_mp_node(); + +unsigned _starpu_tcpip_src_get_device_count(); +void _starpu_init_tcpip_config(struct _starpu_machine_topology * topology, struct _starpu_machine_config *config, struct starpu_conf *user_conf, int no_mp_config); +void _starpu_tcpip_init_worker_binding(struct _starpu_machine_config *config, int no_mp_config STARPU_ATTRIBUTE_UNUSED, struct _starpu_worker *workerarg); +void _starpu_tcpip_init_worker_memory(struct _starpu_machine_config *config, int no_mp_config STARPU_ATTRIBUTE_UNUSED, struct _starpu_worker *workerarg); +void _starpu_deinit_tcpip_config(struct _starpu_machine_config *config); +void *_starpu_tcpip_src_worker(void *arg); + +void _starpu_tcpip_source_init(struct _starpu_mp_node *node); +void _starpu_tcpip_source_deinit(struct _starpu_mp_node *node); + +#endif /* STARPU_USE_TCPIP_MASTER_SLAVE */ + +#pragma GCC visibility pop + +#endif /* __DRIVER_TCPIP_SOURCE_H__ */ diff --git a/src/parallel_worker/starpu_parallel_worker_create.c b/src/parallel_worker/starpu_parallel_worker_create.c new file mode 100644 index 0000000..afec5a7 --- /dev/null +++ b/src/parallel_worker/starpu_parallel_worker_create.c @@ -0,0 +1,836 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* This file creates an interface to manage resources within parallel + * workers and make use of parallel tasks. It entirely depends on the + * hwloc software. + */ + +#include + +#ifdef STARPU_PARALLEL_WORKER + +starpu_binding_function _starpu_parallel_worker_type_get_func(enum starpu_parallel_worker_types type) +{ + starpu_binding_function prologue_func; + + switch (type) + { + case STARPU_PARALLEL_WORKER_OPENMP: + prologue_func = &starpu_parallel_worker_openmp_prologue; + break; + case STARPU_PARALLEL_WORKER_INTEL_OPENMP_MKL: + prologue_func = &starpu_parallel_worker_intel_openmp_mkl_prologue; + break; + case STARPU_PARALLEL_WORKER_GNU_OPENMP_MKL: +#ifdef STARPU_MKL + prologue_func = &starpu_parallel_worker_gnu_openmp_mkl_prologue; +#else + _STARPU_MSG("Warning: MKL support is not available, using STARPU_PARALLEL_WORKER_INTEL_OPENMP_MKL instead\n"); + prologue_func = &starpu_parallel_worker_intel_openmp_mkl_prologue; +#endif + break; + default: + prologue_func = NULL; + } + + return prologue_func; +} + +void starpu_parallel_worker_openmp_prologue(void *arg) +{ + (void) arg; + int workerid = starpu_worker_get_id_check(); + + if (starpu_worker_get_type(workerid) == STARPU_CPU_WORKER) + { + struct starpu_task *task = starpu_task_get_current(); + int sched_ctx = task->sched_ctx; + struct _starpu_sched_ctx *ctx_struct = _starpu_get_sched_ctx_struct(sched_ctx); + /* If the view of the worker doesn't correspond to the view of the task, + adapt the thread team */ + if (ctx_struct->parallel_view != task->possibly_parallel) + { + int *cpuids = NULL; + int ncpuids = 0; + + starpu_sched_ctx_get_available_cpuids(sched_ctx, &cpuids, &ncpuids); + if (!task->possibly_parallel) + ncpuids=1; + omp_set_num_threads(ncpuids); +#pragma omp parallel + { + starpu_sched_ctx_bind_current_thread_to_cpuid(cpuids[omp_get_thread_num()]); + } + free(cpuids); + ctx_struct->parallel_view = !ctx_struct->parallel_view; + } + } + return; +} + +#ifdef STARPU_MKL +void starpu_parallel_worker_gnu_openmp_mkl_prologue(void *arg) +{ + int workerid = starpu_worker_get_id(); + + if (starpu_worker_get_type(workerid) == STARPU_CPU_WORKER) + { + struct starpu_task *task = starpu_task_get_current(); + int sched_ctx = task->sched_ctx; + struct _starpu_sched_ctx *ctx_struct = _starpu_get_sched_ctx_struct(sched_ctx); + /* If the view of the worker doesn't correspond to the view of the task, + adapt the thread team */ + if (ctx_struct->parallel_view != task->possibly_parallel) + { + int *cpuids = NULL; + int ncpuids = 0; + + starpu_sched_ctx_get_available_cpuids(sched_ctx, &cpuids, &ncpuids); + if (!task->possibly_parallel) + ncpuids=1; + omp_set_num_threads(ncpuids); + mkl_set_num_threads_local(ncpuids); + mkl_set_dynamic(0); +#pragma omp parallel + { + starpu_sched_ctx_bind_current_thread_to_cpuid(cpuids[omp_get_thread_num()]); + } + free(cpuids); + ctx_struct->parallel_view = !ctx_struct->parallel_view; + } + } + return; +} +#endif + +/* Main interface function to create a parallel worker view of the machine. + * Its job is to capture what the user wants and store it in a standard view. */ +struct starpu_parallel_worker_config *_starpu_parallel_worker_init_varg(hwloc_obj_type_t parallel_worker_level, va_list varg_list) +{ + int arg_type; + struct starpu_parallel_worker_config *machine; + + _STARPU_CALLOC(machine, 1, sizeof(struct starpu_parallel_worker_config)); + _STARPU_CALLOC(machine->orig_params, 1, sizeof(struct _starpu_parallel_worker_parameters)); + machine->params = machine->orig_params; + machine->id = STARPU_NMAX_SCHED_CTXS; + machine->groups = _starpu_parallel_worker_group_list_new(); + machine->nparallel_workers = 0; + machine->ngroups = 0; + machine->topology = NULL; + _starpu_parallel_worker_init_parameters(machine->params); + + while ((arg_type = va_arg(varg_list, int)) != 0) + { + if (arg_type == STARPU_PARALLEL_WORKER_MIN_NB) + { + machine->params->min_nb = va_arg(varg_list, int); + if (machine->params->min_nb <= 0) + _STARPU_DISP("Caution min number of contexts shouldn't be negative or null\n"); + } + else if (arg_type == STARPU_PARALLEL_WORKER_MAX_NB) + { + machine->params->max_nb = va_arg(varg_list, int); + if (machine->params->max_nb <= 0) + _STARPU_DISP("Caution max number of contexts shouldn't be negative or null\n"); + } + else if (arg_type == STARPU_PARALLEL_WORKER_NB) + { + machine->params->nb = va_arg(varg_list, int); + if (machine->params->nb <= 0) + _STARPU_DISP("Caution number of contexts shouldn't be negative or null\n"); + } + else if (arg_type == STARPU_PARALLEL_WORKER_POLICY_NAME) + { + machine->params->sched_policy_name = va_arg(varg_list, char*); + } + else if (arg_type == STARPU_PARALLEL_WORKER_POLICY_STRUCT) + { + machine->params->sched_policy_struct = va_arg(varg_list, struct starpu_sched_policy*); + } + else if (arg_type == STARPU_PARALLEL_WORKER_KEEP_HOMOGENEOUS) + { + machine->params->keep_homogeneous = va_arg(varg_list, int); /* 0=off, other=on */ + } + else if (arg_type == STARPU_PARALLEL_WORKER_PREFERE_MIN) + { + machine->params->prefere_min = va_arg(varg_list, int); /* 0=off, other=on */ + } + else if (arg_type == STARPU_PARALLEL_WORKER_CREATE_FUNC) + { + typedef void (*fn)(void*); + machine->params->create_func = va_arg(varg_list, fn); + } + else if (arg_type == STARPU_PARALLEL_WORKER_CREATE_FUNC_ARG) + { + machine->params->create_func_arg = va_arg(varg_list, void*); + } + else if (arg_type == STARPU_PARALLEL_WORKER_TYPE) + { + machine->params->type = va_arg(varg_list, enum starpu_parallel_worker_types); + } + else if (arg_type == STARPU_PARALLEL_WORKER_AWAKE_WORKERS) + { + machine->params->awake_workers = va_arg(varg_list, unsigned); + } + else if (arg_type == STARPU_PARALLEL_WORKER_PARTITION_ONE) + { + struct _starpu_parallel_worker_group *group = _starpu_parallel_worker_group_new(); + _starpu_parallel_worker_group_init(group, machine); + _starpu_parallel_worker_group_list_push_back(machine->groups, group); + machine->params = group->params; + } + else if (arg_type == STARPU_PARALLEL_WORKER_NEW) + { + struct _starpu_parallel_worker *parallel_worker = _starpu_parallel_worker_new(); + struct _starpu_parallel_worker_group *group = _starpu_parallel_worker_group_list_back(machine->groups); + if (group == NULL) + { + group = _starpu_parallel_worker_group_new(); + _starpu_parallel_worker_group_init(group, machine); + _starpu_parallel_worker_group_list_push_back(machine->groups, group); + } + _starpu_parallel_worker_init(parallel_worker, group); + _starpu_parallel_worker_list_push_back(group->parallel_workers, parallel_worker); + machine->params = parallel_worker->params; + } + else if (arg_type == STARPU_PARALLEL_WORKER_NCORES) + { + struct _starpu_parallel_worker_group *group = _starpu_parallel_worker_group_list_back(machine->groups); + if (group == NULL) + { + group = _starpu_parallel_worker_group_new(); + _starpu_parallel_worker_group_init(group, machine); + _starpu_parallel_worker_group_list_push_back(machine->groups, group); + } + struct _starpu_parallel_worker *parallel_worker =_starpu_parallel_worker_list_back(group->parallel_workers); + parallel_worker->ncores = va_arg(varg_list, unsigned); + } + else + { + STARPU_ABORT_MSG("Unrecognized argument %d\n", arg_type); + } + } + va_end(varg_list); + + switch(parallel_worker_level) + { + case HWLOC_OBJ_MISC: + case HWLOC_OBJ_BRIDGE: + case HWLOC_OBJ_PCI_DEVICE: + case HWLOC_OBJ_OS_DEVICE: + STARPU_ABORT_MSG("Parallel_Worker aggregation isn't supported for level %s\n", + hwloc_obj_type_string(parallel_worker_level)); + break; + default: /* others can pass */ + break; + } + + if (_starpu_parallel_worker_config(parallel_worker_level, machine) == -ENODEV) + { + starpu_parallel_worker_shutdown(machine); + machine = NULL; + } + + return machine; +} + +struct starpu_parallel_worker_config *starpu_parallel_worker_init(hwloc_obj_type_t parallel_worker_level, ...) +{ + struct starpu_parallel_worker_config *config; + va_list varg_list; + va_start(varg_list, parallel_worker_level); + config = _starpu_parallel_worker_init_varg(parallel_worker_level, varg_list); + va_end(varg_list); + return config; +} + +int starpu_parallel_worker_shutdown(struct starpu_parallel_worker_config *machine) +{ + if (machine == NULL) + return -1; + struct _starpu_parallel_worker_group *g; + struct _starpu_parallel_worker_group_list *group_list = machine->groups; + + if (machine->id != STARPU_NMAX_SCHED_CTXS) + starpu_sched_ctx_delete(machine->id); + + g = _starpu_parallel_worker_group_list_begin(group_list); + while (g != _starpu_parallel_worker_group_list_end(group_list)) + { + struct _starpu_parallel_worker_group *tmp = g; + g = _starpu_parallel_worker_group_list_next(g); + _starpu_parallel_worker_group_remove(group_list, tmp); + } + _starpu_parallel_worker_group_list_delete(group_list); + + if (machine->topology != NULL) + hwloc_topology_destroy(machine->topology); + free(machine->orig_params); + free(machine); + starpu_sched_ctx_set_context(0); + + return 0; +} + +int starpu_parallel_worker_print(struct starpu_parallel_worker_config *parallel_workers) +{ + if (parallel_workers == NULL) + return -1; + + int cnt, w; + struct _starpu_parallel_worker_group *group; + struct _starpu_parallel_worker *parallel_worker; + + printf("Number of parallel workers created: %u\n", parallel_workers->nparallel_workers); + cnt=0; + if (parallel_workers->nparallel_workers) + { + for (group = _starpu_parallel_worker_group_list_begin(parallel_workers->groups); + group != _starpu_parallel_worker_group_list_end(parallel_workers->groups); + group = _starpu_parallel_worker_group_list_next(group)) + { + for (parallel_worker = _starpu_parallel_worker_list_begin(group->parallel_workers); + parallel_worker != _starpu_parallel_worker_list_end(group->parallel_workers); + parallel_worker = _starpu_parallel_worker_list_next(parallel_worker)) + { + printf("Parallel worker %d contains the following logical indexes:\n\t", cnt); + for (w=0; w < parallel_worker->ncores; w++) + printf("%d ", parallel_worker->cores[w]); + printf("\n"); + cnt++; + } + } + } + return 0; +} + +int _starpu_parallel_worker_create(struct _starpu_parallel_worker *parallel_worker) +{ + struct _starpu_machine_config *config = _starpu_get_machine_config(); + + if (config->topology.nsched_ctxs == STARPU_NMAX_SCHED_CTXS) + /* Too many contexts already :/ */ + return 0; + + if (parallel_worker->params->awake_workers) + parallel_worker->id = starpu_sched_ctx_create(parallel_worker->workerids, parallel_worker->ncores, + "parallel_workers", + STARPU_SCHED_CTX_AWAKE_WORKERS, 0); + else + parallel_worker->id = starpu_sched_ctx_create(parallel_worker->workerids, parallel_worker->ncores, + "parallel_workers", 0); + + /* parallel_worker priority can be the lowest, so let's enforce it */ + starpu_sched_ctx_set_priority(parallel_worker->workerids, parallel_worker->ncores, parallel_worker->id, 0); + return 1; +} + +int _starpu_parallel_worker_group_create(struct _starpu_parallel_worker_group *group) +{ + struct _starpu_parallel_worker *c; + for (c = _starpu_parallel_worker_list_begin(group->parallel_workers) ; + c != _starpu_parallel_worker_list_end(group->parallel_workers) ; + c = _starpu_parallel_worker_list_next(c)) + { + if (c->ncores == 0) + continue; + if (_starpu_parallel_worker_create(c) == 0) + return 0; + if (!c->params->awake_workers) + _starpu_parallel_worker_bind(c); + } + + return 1; +} + +void _starpu_parallel_workers_set_nesting(struct starpu_parallel_worker_config *m) +{ + struct _starpu_parallel_worker_group *g; + struct _starpu_parallel_worker *c; + + for (g = _starpu_parallel_worker_group_list_begin(m->groups) ; + g != _starpu_parallel_worker_group_list_end(m->groups) ; + g = _starpu_parallel_worker_group_list_next(g)) + { + for (c = _starpu_parallel_worker_list_begin(g->parallel_workers) ; + c != _starpu_parallel_worker_list_end(g->parallel_workers) ; + c = _starpu_parallel_worker_list_next(c)) + _starpu_get_sched_ctx_struct(c->id)->nesting_sched_ctx = m->id; + } +} + +int _starpu_parallel_worker_bind(struct _starpu_parallel_worker *parallel_worker) +{ + starpu_binding_function func; + void *func_arg; + if (parallel_worker->params->create_func) + { + func = parallel_worker->params->create_func; + func_arg = (void*) parallel_worker->params->create_func_arg; + } + else + { + func = _starpu_parallel_worker_type_get_func(parallel_worker->params->type); + func_arg = NULL; + } + + return starpu_task_insert(&_starpu_parallel_worker_bind_cl, + STARPU_SCHED_CTX, parallel_worker->id, + STARPU_POSSIBLY_PARALLEL, 1, + STARPU_PROLOGUE_CALLBACK_POP, func, + STARPU_PROLOGUE_CALLBACK_POP_ARG_NFREE, func_arg, + 0); +} + +void _starpu_parallel_worker_group_init(struct _starpu_parallel_worker_group *group, struct starpu_parallel_worker_config *father) +{ + group->id = 0; + group->nparallel_workers = 0; + group->parallel_workers = _starpu_parallel_worker_list_new(); + group->father = father; + _STARPU_MALLOC(group->params, sizeof(struct _starpu_parallel_worker_parameters)); + _starpu_parallel_worker_copy_parameters(father->params, group->params); + return; +} + +void _starpu_parallel_worker_init(struct _starpu_parallel_worker *parallel_worker, struct _starpu_parallel_worker_group *father) +{ + parallel_worker->id = STARPU_NMAX_SCHED_CTXS; + parallel_worker->cpuset = hwloc_bitmap_alloc(); + parallel_worker->ncores = 0; + parallel_worker->cores = NULL; + parallel_worker->workerids = NULL; + parallel_worker->father = father; + _STARPU_MALLOC(parallel_worker->params, sizeof(struct _starpu_parallel_worker_parameters)); + _starpu_parallel_worker_copy_parameters(father->params, parallel_worker->params); +} + +int _starpu_parallel_worker_remove(struct _starpu_parallel_worker_list *parallel_worker_list, struct _starpu_parallel_worker *parallel_worker) +{ + if (parallel_worker && parallel_worker->id != STARPU_NMAX_SCHED_CTXS) + starpu_sched_ctx_delete(parallel_worker->id); + else + return -1; + + if (parallel_worker->cores != NULL) + free(parallel_worker->cores); + if (parallel_worker->workerids != NULL) + free(parallel_worker->workerids); + + hwloc_bitmap_free(parallel_worker->cpuset); + free(parallel_worker->params); + _starpu_parallel_worker_list_erase(parallel_worker_list, parallel_worker); + _starpu_parallel_worker_delete(parallel_worker); + + return 0; +} + +int _starpu_parallel_worker_group_remove(struct _starpu_parallel_worker_group_list *group_list, struct _starpu_parallel_worker_group *group) +{ + struct _starpu_parallel_worker_list *parallel_worker_list = group->parallel_workers; + struct _starpu_parallel_worker *c = _starpu_parallel_worker_list_begin(parallel_worker_list); + while (c != _starpu_parallel_worker_list_end(parallel_worker_list)) + { + struct _starpu_parallel_worker *tmp = c; + c = _starpu_parallel_worker_list_next(c); + _starpu_parallel_worker_remove(parallel_worker_list, tmp); + } + _starpu_parallel_worker_list_delete(parallel_worker_list); + + free(group->params); + _starpu_parallel_worker_group_list_erase(group_list, group); + _starpu_parallel_worker_group_delete(group); + + return 0; +} + +void _starpu_parallel_worker_init_parameters(struct _starpu_parallel_worker_parameters *params) +{ + params->min_nb = 0; + params->max_nb = 0; + params->nb = 0; + params->sched_policy_name = NULL; + params->sched_policy_struct = NULL; + params->keep_homogeneous = 0; + params->prefere_min = 0; + params->create_func = NULL; + params->create_func_arg = NULL; + params->type = STARPU_PARALLEL_WORKER_OPENMP; + params->awake_workers = 0; + + return; +} + +void _starpu_parallel_worker_copy_parameters(struct _starpu_parallel_worker_parameters *src, struct _starpu_parallel_worker_parameters *dst) +{ + dst->min_nb = src->min_nb; + dst->max_nb = src->max_nb; + dst->nb = src->nb; + dst->sched_policy_name = src->sched_policy_name; + dst->sched_policy_struct = src->sched_policy_struct; + dst->keep_homogeneous = src->keep_homogeneous; + dst->prefere_min = src->prefere_min; + dst->create_func = src->create_func; + dst->create_func_arg = src->create_func_arg; + dst->type = src->type; + dst->awake_workers = src->awake_workers; + + return; +} + +/* Considering the resources and parameters, how many parallel_workers should we take? */ +int _starpu_parallel_worker_analyze_parameters(struct _starpu_parallel_worker_parameters *params, int npus) +{ + int nb_parallel_workers = 1, j; + if (params->nb) + { + nb_parallel_workers = params->nb <= npus?params->nb : npus; + } + else if (params->min_nb && params->max_nb) + { + if (!params->keep_homogeneous) + { + if (params->prefere_min) + nb_parallel_workers = params->min_nb <= npus? params->min_nb : npus; + else + nb_parallel_workers = params->max_nb <= npus? params->max_nb : npus; + } + else + { + int begin = params->prefere_min? params->min_nb:params->max_nb; + int end = params->prefere_min? params->max_nb+1:params->min_nb-1; + j=begin; + int best = 0, second_best = 0, cpu_loss = INT_MAX; + while (j != end) + { + if (npus%j == 0) + { + best = j; + break; + } + if (npus%j < cpu_loss) + { + cpu_loss = npus%j; + second_best = j; + } + j = params->prefere_min? j+1:j-1; + } + + if (best) + nb_parallel_workers = best; + else if (second_best) + nb_parallel_workers = second_best; + } + } + + return nb_parallel_workers; +} + +int _starpu_parallel_worker_config(hwloc_obj_type_t parallel_worker_level, struct starpu_parallel_worker_config *machine) +{ + struct _starpu_parallel_worker_group *g; + int ret; + + ret = _starpu_parallel_worker_topology(parallel_worker_level, machine); + if (ret) + return ret; + + for (g = _starpu_parallel_worker_group_list_begin(machine->groups) ; + g != _starpu_parallel_worker_group_list_end(machine->groups) ; + g = _starpu_parallel_worker_group_list_next(g)) + if (_starpu_parallel_worker_group_create(g) == 0) + return -ENODEV; + + starpu_task_wait_for_all(); + + struct _starpu_machine_config *config = _starpu_get_machine_config(); + + if (config->topology.nsched_ctxs == STARPU_NMAX_SCHED_CTXS) + /* Too many contexts already :/ */ + return -ENODEV; + + /* Create containing context */ + if (machine->params->sched_policy_struct != NULL) + { + machine->id = starpu_sched_ctx_create(NULL, -1, "main sched ctx", + STARPU_SCHED_CTX_POLICY_STRUCT, + machine->params->sched_policy_struct, + 0); + } + else if (machine->params->sched_policy_name != NULL) + { + machine->id = starpu_sched_ctx_create(NULL, -1, "main sched ctx", + STARPU_SCHED_CTX_POLICY_NAME, + machine->params->sched_policy_name, + 0); + } + else + { + struct starpu_sched_policy *sched_policy; + struct _starpu_sched_ctx *global_ctx =_starpu_get_sched_ctx_struct(STARPU_GLOBAL_SCHED_CTX); + sched_policy = _starpu_get_sched_policy(global_ctx); + machine->id = starpu_sched_ctx_create(NULL, -1, "main sched ctx", + STARPU_SCHED_CTX_POLICY_STRUCT, + sched_policy, 0); + } + + _starpu_parallel_workers_set_nesting(machine); + starpu_sched_ctx_set_context(&machine->id); + + return 0; +} + +int _starpu_parallel_worker_topology(hwloc_obj_type_t parallel_worker_level, struct starpu_parallel_worker_config *machine) +{ + int w; + hwloc_topology_t topology; + hwloc_cpuset_t avail_cpus; + + int nworkers = starpu_worker_get_count_by_type(STARPU_CPU_WORKER); + if (nworkers == 0) + return -ENODEV; + + int *workers; + _STARPU_MALLOC(workers, sizeof(int) * nworkers); + starpu_worker_get_ids_by_type(STARPU_CPU_WORKER, workers, nworkers); + + struct _starpu_machine_config *config = _starpu_get_machine_config(); + STARPU_ASSERT_MSG(config->topology.hwtopology != NULL, "STARPU_PARALLEL_WORKER: You " + "need to call starpu_init() or make sure to activate hwloc."); + hwloc_topology_dup(&topology, config->topology.hwtopology); + + avail_cpus = hwloc_bitmap_alloc(); + hwloc_bitmap_zero(avail_cpus); + + for (w = 0; w < nworkers ; w++) + { + struct _starpu_worker *worker_str = _starpu_get_worker_struct(workers[w]); + hwloc_bitmap_or(avail_cpus, avail_cpus, worker_str->hwloc_cpu_set); + } + + hwloc_topology_restrict(topology, avail_cpus, 0); + hwloc_bitmap_free(avail_cpus); + free(workers); + + if (hwloc_get_nbobjs_by_type(topology, parallel_worker_level) <= 0) + return -ENODEV; + + /* Use new topology to fill in the parallel_worker list */ + machine->topology = topology; + _starpu_parallel_worker_group(parallel_worker_level, machine); + + return 0; +} + +void _starpu_parallel_worker_group(hwloc_obj_type_t parallel_worker_level, struct starpu_parallel_worker_config *machine) +{ + int nb_objects; + int i; + struct _starpu_parallel_worker_group *group = NULL; + + if (machine->groups == NULL) + machine->groups = _starpu_parallel_worker_group_list_new(); + + nb_objects = hwloc_get_nbobjs_by_type(machine->topology, parallel_worker_level); + STARPU_ASSERT(nb_objects > 0); + + group = _starpu_parallel_worker_group_list_begin(machine->groups); + for (i = 0 ; i < nb_objects ; i++) + { + hwloc_obj_t parallel_worker_obj = hwloc_get_obj_by_type(machine->topology, parallel_worker_level, i); + + if (group == NULL) + { + group = _starpu_parallel_worker_group_new(); + _starpu_parallel_worker_group_init(group, machine); + _starpu_parallel_worker_group_list_push_back(machine->groups, group); + } + + group->group_obj = parallel_worker_obj; + + _starpu_parallel_worker(group); + machine->ngroups++; + machine->nparallel_workers += group->nparallel_workers; + group = _starpu_parallel_worker_group_list_next(group); + } + + return; +} + +void _starpu_parallel_worker(struct _starpu_parallel_worker_group *group) +{ + int i, avail_pus, npus, npreset=0; + struct _starpu_parallel_worker *parallel_worker; + npus = hwloc_get_nbobjs_inside_cpuset_by_type(group->father->topology, + group->group_obj->cpuset, + HWLOC_OBJ_PU); + + /* Preset parallel_workers */ + avail_pus = npus; + for (parallel_worker=_starpu_parallel_worker_list_begin(group->parallel_workers); + parallel_worker!=_starpu_parallel_worker_list_end(group->parallel_workers); + parallel_worker=_starpu_parallel_worker_list_next(parallel_worker)) + { + if (parallel_worker->ncores > avail_pus) + parallel_worker->ncores = avail_pus; + else if (avail_pus == 0) + parallel_worker->ncores = 0; + + if (parallel_worker->ncores > 0) + { + _STARPU_MALLOC(parallel_worker->cores, sizeof(int)*parallel_worker->ncores); + _STARPU_MALLOC(parallel_worker->workerids, sizeof(int)*parallel_worker->ncores); + avail_pus -= parallel_worker->ncores; + npreset++; + } + } + + /* Automatic parallel_workers */ + group->nparallel_workers = _starpu_parallel_worker_analyze_parameters(group->params, avail_pus); + for (i=0 ; inparallel_workers && avail_pus>0 ; i++) + { + if (parallel_worker == NULL) + { + parallel_worker = _starpu_parallel_worker_new(); + _starpu_parallel_worker_init(parallel_worker, group); + _starpu_parallel_worker_list_push_back(group->parallel_workers, parallel_worker); + } + + if (parallel_worker->ncores != 0 && parallel_worker->ncores > avail_pus) + { + parallel_worker->ncores = avail_pus; + } + else + { + if (parallel_worker->params->keep_homogeneous) + parallel_worker->ncores = avail_pus/(group->nparallel_workers-i); + else + parallel_worker->ncores = i==group->nparallel_workers-1? + avail_pus: + avail_pus/(group->nparallel_workers-i); + } + avail_pus -= parallel_worker->ncores; + _STARPU_MALLOC(parallel_worker->cores, sizeof(int)*parallel_worker->ncores); + _STARPU_MALLOC(parallel_worker->workerids, sizeof(int)*parallel_worker->ncores); + + parallel_worker = _starpu_parallel_worker_list_next(parallel_worker); + } + group->nparallel_workers += npreset; + + parallel_worker = _starpu_parallel_worker_list_begin(group->parallel_workers); + int count = 0; + static int starpu_parallel_worker_warned = 0; + + for (i=0 ; ifather->topology, + group->group_obj->cpuset, + HWLOC_OBJ_PU, i); + + /* If we have more than one worker on this resource, let's add them too -- + even if it's bad (they'll all be boud on the same PU) */ + int size = 0, j; + struct _starpu_hwloc_userdata *data = pu->userdata; + struct _starpu_worker_list *list = data->worker_list; + struct _starpu_worker *worker_str; + for (worker_str = _starpu_worker_list_begin(list); + worker_str != _starpu_worker_list_end(list); + worker_str = _starpu_worker_list_next(worker_str)) + { + if (worker_str->arch == STARPU_CPU_WORKER) + size++; + } + + if (size > 1) + { + STARPU_HG_DISABLE_CHECKING(starpu_parallel_worker_warned); + if (!starpu_parallel_worker_warned) + { + _STARPU_DISP("STARPU PARALLEL_WORKERS: Caution! It seems that you have" + " multiple workers bound to the same PU. If you have" + " multithreading on your cores it is greatly advised" + " to export STARPU_NTHREADS_PER_CORE=nb.\n"); + starpu_parallel_worker_warned = 1; + } + parallel_worker->ncores += size-1; + _STARPU_REALLOC(parallel_worker->cores, sizeof(int)*parallel_worker->ncores); + _STARPU_REALLOC(parallel_worker->workerids, sizeof(int)*parallel_worker->ncores); + } + + /* grab workerid list and return first cpu */ + worker_str = _starpu_worker_list_begin(list); + if (worker_str) + hwloc_bitmap_or(parallel_worker->cpuset, parallel_worker->cpuset, + worker_str->hwloc_cpu_set); + j = 0; + while (worker_str != _starpu_worker_list_end(list)) + { + if (worker_str->arch == STARPU_CPU_WORKER) + { + parallel_worker->cores[count+j] = worker_str->bindid; + parallel_worker->workerids[count+j] = worker_str->workerid; + j++; + } + worker_str = _starpu_worker_list_next(worker_str); + } + + count+=size; + if (parallel_worker->ncores == count) + { + count = 0; + parallel_worker = _starpu_parallel_worker_list_next(parallel_worker); + } + } + + return; +} + +struct starpu_cluster_machine STARPU_DEPRECATED +{ + unsigned id; + hwloc_topology_t topology; + unsigned nparallel_workers; + unsigned ngroups; + struct _starpu_parallel_worker_group_list *groups; + struct _starpu_parallel_worker_parameters *params; +}; + +struct starpu_cluster_machine *starpu_cluster_machine(hwloc_obj_type_t cluster_level, ...) +{ + struct starpu_parallel_worker_config *config; + va_list varg_list; + va_start(varg_list, cluster_level); + config = _starpu_parallel_worker_init_varg(cluster_level, varg_list); + va_end(varg_list); + return (struct starpu_cluster_machine *)config; +} + +int starpu_uncluster_machine(struct starpu_cluster_machine *clusters) +{ + struct starpu_parallel_worker_config *c = (struct starpu_parallel_worker_config *)clusters; + return starpu_parallel_worker_shutdown(c); +} + +int starpu_cluster_print(struct starpu_cluster_machine *clusters) +{ + struct starpu_parallel_worker_config *c = (struct starpu_parallel_worker_config *)clusters; + return starpu_parallel_worker_print(c); +} + + +#endif diff --git a/src/parallel_worker/starpu_parallel_worker_create.h b/src/parallel_worker/starpu_parallel_worker_create.h new file mode 100644 index 0000000..0d4f7c0 --- /dev/null +++ b/src/parallel_worker/starpu_parallel_worker_create.h @@ -0,0 +1,127 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __STARPU_PARALLEL_WORKERS_CREATE_H__ +#define __STARPU_PARALLEL_WORKERS_CREATE_H__ + +/** @file */ + +#include +#include +#include +#include +#include +#ifdef STARPU_MKL +#include +#endif + +#ifdef STARPU_PARALLEL_WORKER + +#ifdef __cplusplus +extern +#endif + +#pragma GCC visibility push(hidden) + +struct starpu_parallel_worker_config +{ + unsigned id; + hwloc_topology_t topology; + unsigned nparallel_workers; + unsigned ngroups; + struct _starpu_parallel_worker_group_list *groups; + struct _starpu_parallel_worker_parameters *orig_params; + struct _starpu_parallel_worker_parameters *params; +}; + +struct _starpu_parallel_worker_parameters +{ + int min_nb; + int max_nb; + int nb; + char *sched_policy_name; + struct starpu_sched_policy *sched_policy_struct; + unsigned keep_homogeneous; + unsigned prefere_min; + void (*create_func)(void*); + void *create_func_arg; + int type; + unsigned awake_workers; +}; + +LIST_TYPE(_starpu_parallel_worker_group, + unsigned id; + hwloc_obj_t group_obj; + int nparallel_workers; + struct _starpu_parallel_worker_list *parallel_workers; + struct starpu_parallel_worker_config *father; + struct _starpu_parallel_worker_parameters *params; +) + +LIST_TYPE(_starpu_parallel_worker, + unsigned id; + hwloc_cpuset_t cpuset; + int ncores; + int *cores; + int *workerids; + struct _starpu_parallel_worker_group *father; + struct _starpu_parallel_worker_parameters *params; +) + +/** Machine discovery and parallel_worker creation main functions */ +int _starpu_parallel_worker_config(hwloc_obj_type_t parallel_worker_level, struct starpu_parallel_worker_config *machine); +int _starpu_parallel_worker_topology(hwloc_obj_type_t parallel_worker_level, struct starpu_parallel_worker_config *machine); +void _starpu_parallel_worker_group(hwloc_obj_type_t parallel_worker_level, struct starpu_parallel_worker_config *machine); +void _starpu_parallel_worker(struct _starpu_parallel_worker_group *group); + +/** Parameter functions */ +void _starpu_parallel_worker_init_parameters(struct _starpu_parallel_worker_parameters *globals); +void _starpu_parallel_worker_copy_parameters(struct _starpu_parallel_worker_parameters *src, struct _starpu_parallel_worker_parameters *dst); +int _starpu_parallel_worker_analyze_parameters(struct _starpu_parallel_worker_parameters *params, int npus); + +/** Parallel_Worker helper functions */ +void _starpu_parallel_worker_init(struct _starpu_parallel_worker *parallel_worker, struct _starpu_parallel_worker_group *father); +int _starpu_parallel_worker_create(struct _starpu_parallel_worker *parallel_worker); + +int _starpu_parallel_worker_bind(struct _starpu_parallel_worker *parallel_worker); +int _starpu_parallel_worker_remove(struct _starpu_parallel_worker_list *parallel_worker_list, struct _starpu_parallel_worker *parallel_worker); + +/** Parallel_Worker group helper function */ +void _starpu_parallel_worker_group_init(struct _starpu_parallel_worker_group *group, struct starpu_parallel_worker_config *father); +int _starpu_parallel_worker_group_create(struct _starpu_parallel_worker_group *group); +int _starpu_parallel_worker_group_remove(struct _starpu_parallel_worker_group_list *group_list, struct _starpu_parallel_worker_group *group); + +/** Binding helpers */ +void _starpu_parallel_worker_noop(void *buffers[], void *cl_arg) +{ + (void) buffers; + (void) cl_arg; +} + +static struct starpu_codelet _starpu_parallel_worker_bind_cl= +{ + .cpu_funcs = {_starpu_parallel_worker_noop}, + .nbuffers = 0, + .name = "parallel_worker_internal_runtime_init" +}; + +typedef void (*starpu_binding_function)(void*); +starpu_binding_function _starpu_parallel_worker_type_get_func(enum starpu_parallel_worker_types type); + +#pragma GCC visibility pop + +#endif +#endif /* __STARPU_PARALLEL_WORKERS_CREATE_H__ */ diff --git a/src/profiling/bound.c b/src/profiling/bound.c new file mode 100644 index 0000000..7a99d63 --- /dev/null +++ b/src/profiling/bound.c @@ -0,0 +1,1209 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Thibaut Lambert + * Copyright (C) 2011-2011 Télécom Sud Paris + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * Record which kinds of tasks have been executed, to later on compute an upper + * bound of the performance that could have theoretically been achieved + */ + +#include +#include +#include +#include +#include +#include + +#ifdef STARPU_HAVE_GLPK_H +#include +#endif /* STARPU_HAVE_GLPK_H */ + +/* TODO: output duration between starpu_bound_start and starpu_bound_stop */ + +/* TODO: compute critical path and introduce it in the LP */ + +/* + * Record without dependencies: just count each kind of task + * + * The linear programming problem will just have as variables: + * - the number of tasks of kind `t' executed by worker `w' + * - the total duration + * + * and the constraints will be: + * - the time taken by each worker to complete its assigned tasks is lower than + * the total duration. + * - the total number of tasks of a given kind is equal to the number run by the + * application. + */ +struct bound_task_pool +{ + /* Which codelet has been executed */ + struct starpu_codelet *cl; + /* Task footprint key (for history-based perfmodel) */ + uint32_t footprint; + /* Number of tasks of this kind */ + unsigned long n; + /* Other task kinds */ + struct bound_task_pool *next; +}; + +/* + * Record with dependencies: each task is recorded separately + * + * The linear programming problem will have as variables: + * - The start time of each task + * - The completion time of each tag + * - The total duration + * - For each task and for each worker, whether the task is executing on that worker. + * - For each pair of task, which task is scheduled first. + * + * and the constraints will be: + * - All task start time plus duration are less than total duration + * - Each task is executed on exactly one worker. + * - Each task starts after all its task dependencies finish. + * - Each task starts after all its tag dependencies finish. + * - For each task pair and each worker, if both tasks are executed by that worker, + * one is started after the other's completion. + */ +struct task_dep +{ + /* Task this depends on */ + struct bound_task *dep; + /* Data transferred between tasks (i.e. implicit data dep size) */ + size_t size; +}; +struct bound_task +{ + /* Unique ID */ + unsigned long id; + /* Tag ID, if any */ + starpu_tag_t tag_id; + int use_tag; + /* Which codelet has been executed */ + struct starpu_codelet *cl; + /* Task footprint key */ + uint32_t footprint; + /* Task priority */ + int priority; + /* Tasks this one depends on */ + struct task_dep *deps; + int depsn; + + /* Estimated duration */ + double** duration[STARPU_NARCH]; + + /* Other tasks */ + struct bound_task *next; +}; + +struct bound_tag_dep +{ + starpu_tag_t tag; + starpu_tag_t dep_tag; + struct bound_tag_dep *next; +}; + +static struct bound_task_pool *task_pools, *last; +static struct bound_task *tasks; +static struct bound_tag_dep *tag_deps; +int _starpu_bound_recording; +static int recorddeps; +static int recordprio; + +static starpu_pthread_mutex_t mutex = STARPU_PTHREAD_MUTEX_INITIALIZER; + +static void _starpu_bound_clear(int record, int deps, int prio) +{ + struct bound_task_pool *tp; + struct bound_task *t; + struct bound_tag_dep *td; + + STARPU_PTHREAD_MUTEX_LOCK(&mutex); + + tp = task_pools; + task_pools = NULL; + last = NULL; + + t = tasks; + tasks = NULL; + + td = tag_deps; + tag_deps = NULL; + + _starpu_bound_recording = record; + recorddeps = deps; + recordprio = prio; + + STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); + + while (tp != NULL) + { + struct bound_task_pool *next = tp->next; + free(tp); + tp = next; + } + + while (t != NULL) + { + struct bound_task *next = t->next; + unsigned i,j; + for (i = 0; i < STARPU_NARCH; i++) + { + if (t->duration[i]) + { + for (j = 0; t->duration[i][j]; j++) + free(t->duration[i][j]); + free(t->duration[i]); + } + } + free(t->deps); + free(t); + t = next; + } + + while (td != NULL) + { + struct bound_tag_dep *next = td->next; + free(td); + td = next; + } +} + +void starpu_bound_clear(void) +{ + _starpu_bound_clear(0, 0, 0); +} + +/* Initialization */ +void starpu_bound_start(int deps, int prio) +{ + _starpu_bound_clear(1, deps, prio); +} + +/* Whether we will include it in the computation */ +static int good_job(struct _starpu_job *j) +{ + /* No codelet, nothing to measure */ + if (j->exclude_from_dag) + return 0; + if (!j->task->cl) + return 0; + /* No performance model, no time duration estimation */ + if (!j->task->cl->model) + return 0; + /* Only support history based */ + if (j->task->cl->model->type != STARPU_HISTORY_BASED + && j->task->cl->model->type != STARPU_NL_REGRESSION_BASED) + return 0; + return 1; +} +static double** initialize_arch_duration(int maxdevid, unsigned* maxncore_table) +{ + int devid, maxncore; + double ** arch_model; + _STARPU_MALLOC(arch_model, sizeof(*arch_model)*(maxdevid+1)); + arch_model[maxdevid] = NULL; + for(devid=0; devidduration[type] = initialize_arch_duration(conf->topology.nhwdevices[type], conf->topology.nworker[type]); +} + +static struct starpu_perfmodel_device device = +{ + .type = STARPU_CPU_WORKER, + .devid = 0, + .ncores = 1, +}; +static struct starpu_perfmodel_arch dumb_arch = +{ + .ndevices = 1, + .devices = &device, +}; + +/* Create a new task (either because it has just been submitted, or a + * dependency was added before submission) */ +static void new_task(struct _starpu_job *j) +{ + struct bound_task *t; + + if (j->bound_task) + return; + + _STARPU_CALLOC(t, 1, sizeof(*t)); + t->id = j->job_id; + t->tag_id = j->task->tag_id; + t->use_tag = j->task->use_tag; + t->cl = j->task->cl; + t->footprint = _starpu_compute_buffers_footprint(j->task->cl?j->task->cl->model:NULL, &dumb_arch, 0, j); + t->priority = j->task->priority; + t->deps = NULL; + t->depsn = 0; + initialize_duration(t); + t->next = tasks; + j->bound_task = t; + tasks = t; +} + +/* A new task was submitted, record it */ +void _starpu_bound_record(struct _starpu_job *j) +{ + if (STARPU_LIKELY(!_starpu_bound_recording)) + return; + + if (!good_job(j)) + return; + + STARPU_PTHREAD_MUTEX_LOCK(&mutex); + /* Re-check, this time with mutex held */ + if (!_starpu_bound_recording) + { + STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); + return; + } + + if (recorddeps) + { + new_task(j); + } + else + { + struct bound_task_pool *tp; + + _starpu_compute_buffers_footprint(j->task->cl?j->task->cl->model:NULL, NULL, 0, j); + + if (last && last->cl == j->task->cl && last->footprint == j->footprint) + tp = last; + else + for (tp = task_pools; tp; tp = tp->next) + if (tp->cl == j->task->cl && tp->footprint == j->footprint) + break; + + if (!tp) + { + _STARPU_MALLOC(tp, sizeof(*tp)); + tp->cl = j->task->cl; + tp->footprint = j->footprint; + tp->n = 0; + tp->next = task_pools; + task_pools = tp; + } + + /* One more task of this kind */ + tp->n++; + } + + STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); +} + +/* A tag dependency was emitted, record it */ +void _starpu_bound_tag_dep(starpu_tag_t id, starpu_tag_t dep_id) +{ + struct bound_tag_dep *td; + + if (!_starpu_bound_recording || !recorddeps) + return; + + STARPU_PTHREAD_MUTEX_LOCK(&mutex); + /* Re-check, this time with mutex held */ + if (!_starpu_bound_recording || !recorddeps) + { + STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); + return; + } + + _STARPU_MALLOC(td, sizeof(*td)); + td->tag = id; + td->dep_tag = dep_id; + td->next = tag_deps; + tag_deps = td; + STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); +} + +/* A task dependency was emitted, record it */ +void _starpu_bound_task_dep(struct _starpu_job *j, struct _starpu_job *dep_j) +{ + struct bound_task *t; + int i; + + if (!_starpu_bound_recording || !recorddeps) + return; + + if (!good_job(j) || !good_job(dep_j)) + return; + + STARPU_PTHREAD_MUTEX_LOCK(&mutex); + /* Re-check, this time with mutex held */ + if (!_starpu_bound_recording || !recorddeps) + { + STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); + return; + } + + new_task(j); + new_task(dep_j); + t = j->bound_task; + for (i = 0; i < t->depsn; i++) + if (t->deps[i].dep == dep_j->bound_task) + break; + if (i == t->depsn) + { + /* Not already there, add */ + _STARPU_REALLOC(t->deps, ++t->depsn * sizeof(t->deps[0])); + t->deps[t->depsn-1].dep = dep_j->bound_task; + t->deps[t->depsn-1].size = 0; /* We don't have data information in that case */ + } + STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); +} + +/* Look for job with id ID among our tasks */ +static struct bound_task *find_job(unsigned long id) +{ + struct bound_task *t; + + for (t = tasks; t; t = t->next) + if (t->id == id) + return t; + return NULL; +} + +/* Job J depends on previous job of id ID (which is already finished) */ +void _starpu_bound_job_id_dep_size(size_t size, struct _starpu_job *j, unsigned long id) +{ + struct bound_task *t, *dep_t; + int i; + + if (!_starpu_bound_recording || !recorddeps) + return; + + if (!good_job(j)) + return; + + STARPU_PTHREAD_MUTEX_LOCK(&mutex); + /* Re-check, this time with mutex held */ + if (!_starpu_bound_recording || !recorddeps) + { + STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); + return; + } + + new_task(j); + dep_t = find_job(id); + if (!dep_t) + { + _STARPU_MSG("dependency %lu not found !\n", id); + STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); + return; + } + t = j->bound_task; + for (i = 0; i < t->depsn; i++) + if (t->deps[i].dep == dep_t) + { + /* Found, just add size */ + t->deps[i].size += size; + break; + } + if (i == t->depsn) + { + /* Not already there, add */ + _STARPU_REALLOC(t->deps, ++t->depsn * sizeof(t->deps[0])); + t->deps[t->depsn-1].dep = dep_t; + t->deps[t->depsn-1].size = size; + } + STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); +} + +void _starpu_bound_job_id_dep(starpu_data_handle_t handle, struct _starpu_job *j, unsigned long id) +{ + if (!_starpu_bound_recording || !recorddeps) + return; + + if (!good_job(j)) + return; + + _starpu_bound_job_id_dep_size(_starpu_data_get_size(handle), j, id); +} + +void starpu_bound_stop(void) +{ + STARPU_PTHREAD_MUTEX_LOCK(&mutex); + _starpu_bound_recording = 0; + STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); +} + +/* Compute all tasks times on all workers */ +static void _starpu_get_tasks_times(int nw, int nt, double *times) +{ + struct bound_task_pool *tp; + int w, t; + for (w = 0; w < nw; w++) + { + for (t = 0, tp = task_pools; tp; t++, tp = tp->next) + { + struct _starpu_job j = + { + .footprint = tp->footprint, + .footprint_is_computed = 1, + }; + struct starpu_perfmodel_arch* arch = starpu_worker_get_perf_archtype(w, STARPU_NMAX_SCHED_CTXS); + double length = _starpu_history_based_job_expected_perf(tp->cl->model, arch, &j, j.nimpl) + - _starpu_history_based_job_expected_deviation(tp->cl->model, arch, &j, j.nimpl); + if (isnan(length)) + times[w*nt+t] = NAN; + else + times[w*nt+t] = length / 1000.; + } + } +} + +/* Return whether PARENT is an ancestor of CHILD */ +static int ancestor(struct bound_task *child, struct bound_task *parent) +{ + int i; + for (i = 0; i < child->depsn; i++) + { + if (parent == child->deps[i].dep) + return 1; + if (ancestor(child->deps[i].dep, parent)) + return -1; + } + return 0; +} + +/* Print bound recording in .dot format */ +void starpu_bound_print_dot(FILE *output) +{ + struct bound_task *t; + struct bound_tag_dep *td; + int i; + + if (!recorddeps) + { + fprintf(output, "Dependencies were not enabled in the starpu_bound_start call, thus not supported\n"); + return; + } + fprintf(output, "strict digraph bounddeps {\n"); + for (t = tasks; t; t = t->next) + { + fprintf(output, "\"t%lu\" [label=\"%lu: %s\"]\n", t->id, t->id, _starpu_codelet_get_model_name(t->cl)); + for (i = 0; i < t->depsn; i++) + fprintf(output, "\"t%lu\" -> \"t%lu\"\n", t->deps[i].dep->id, t->id); + } + for (td = tag_deps; td; td = td->next) + fprintf(output, "\"tag%lu\" -> \"tag%lu\";\n", (unsigned long) td->dep_tag, (unsigned long) td->tag); + fprintf(output, "}\n"); +} + +/* + * Print bound system in lp_solve format + * + * When dependencies are enabled, you can check the set of tasks and deps that + * were recorded by using tools/lp2paje and vite. + */ +void starpu_bound_print_lp(FILE *output) +{ + int nt; /* Number of different kinds of tasks */ + int nw; /* Number of different workers */ + int t; + int w, w2; /* worker */ + unsigned n, n2; + + STARPU_PTHREAD_MUTEX_LOCK(&mutex); + nw = starpu_worker_get_count(); + if (!nw) + /* Make llvm happy about the VLA below */ + return; + + if (recorddeps) + { + struct bound_task *t1, *t2; + struct bound_tag_dep *td; + int i; + + nt = 0; + for (t1 = tasks; t1; t1 = t1->next) + { + if (t1->cl->model->type != STARPU_HISTORY_BASED && + t1->cl->model->type != STARPU_NL_REGRESSION_BASED) + /* TODO: */ + _STARPU_MSG("Warning: task %s uses a perf model which is neither history nor non-linear regression-based, support for such model is not implemented yet, system will not be solvable.\n", _starpu_codelet_get_model_name(t1->cl)); + + struct _starpu_job j = + { + .footprint = t1->footprint, + .footprint_is_computed = 1, + }; + for (w = 0; w < nw; w++) + { + struct starpu_perfmodel_arch* arch = starpu_worker_get_perf_archtype(w, STARPU_NMAX_SCHED_CTXS); + if (_STARPU_IS_ZERO(t1->duration[arch->devices[0].type][arch->devices[0].devid][arch->devices[0].ncores])) + { + double length = _starpu_history_based_job_expected_perf(t1->cl->model, arch, &j,j.nimpl) + - _starpu_history_based_job_expected_deviation(t1->cl->model, arch, &j,j.nimpl); + if (isnan(length)) + /* Avoid problems with binary coding of doubles */ + t1->duration[arch->devices[0].type][arch->devices[0].devid][arch->devices[0].ncores] = NAN; + else + t1->duration[arch->devices[0].type][arch->devices[0].devid][arch->devices[0].ncores] = length / 1000.; + } + } + nt++; + } + if (!nt) + return; + fprintf(output, "/* StarPU upper bound linear programming problem, to be run in lp_solve. */\n\n"); + fprintf(output, "/* !! This is a big system, it will be long to solve !! */\n\n"); + + fprintf(output, "/* We want to minimize total execution time (ms) */\n"); + fprintf(output, "min: tmax;\n\n"); + + fprintf(output, "/* Number of tasks */\n"); + fprintf(output, "nt = %d;\n", nt); + fprintf(output, "/* Number of workers */\n"); + fprintf(output, "nw = %d;\n", nw); + + fprintf(output, "/* The total execution time is the maximum of all task completion times (ms) */\n"); + for (t1 = tasks; t1; t1 = t1->next) + fprintf(output, "c%lu <= tmax;\n", t1->id); + + fprintf(output, "\n/* We have tasks executing on workers, exactly one worker executes each task */\n"); + for (t1 = tasks; t1; t1 = t1->next) + { + for (w = 0; w < nw; w++) + { + struct starpu_perfmodel_arch* arch = starpu_worker_get_perf_archtype(w, STARPU_NMAX_SCHED_CTXS); + if (!isnan(t1->duration[arch->devices[0].type][arch->devices[0].devid][arch->devices[0].ncores])) + fprintf(output, " +t%luw%d", t1->id, w); + } + fprintf(output, " = 1;\n"); + } + + fprintf(output, "\n/* Completion time is start time plus computation time */\n"); + fprintf(output, "/* According to where the task is indeed executed */\n"); + for (t1 = tasks; t1; t1 = t1->next) + { + fprintf(output, "/* %s %x */\tc%lu = s%lu", _starpu_codelet_get_model_name(t1->cl), (unsigned) t1->footprint, t1->id, t1->id); + for (w = 0; w < nw; w++) + { + struct starpu_perfmodel_arch* arch = starpu_worker_get_perf_archtype(w, STARPU_NMAX_SCHED_CTXS); + if (!isnan(t1->duration[arch->devices[0].type][arch->devices[0].devid][arch->devices[0].ncores])) + fprintf(output, " + %f t%luw%d", t1->duration[arch->devices[0].type][arch->devices[0].devid][arch->devices[0].ncores], t1->id, w); + } + fprintf(output, ";\n"); + } + + fprintf(output, "\n/* Each task starts after all its task dependencies finish and data is transferred. */\n"); + fprintf(output, "/* Note that the dependency finish time depends on the worker where it's working */\n"); + for (t1 = tasks; t1; t1 = t1->next) + for (i = 0; i < t1->depsn; i++) + { + fprintf(output, "/* %lu bytes transferred */\n", (unsigned long) t1->deps[i].size); + fprintf(output, "s%lu >= c%lu", t1->id, t1->deps[i].dep->id); + /* Transfer time: pick up one source node and a worker on it */ + for (n = 0; n < starpu_memory_nodes_get_count(); n++) + for (w = 0; w < nw; w++) + if (starpu_worker_get_memory_node(w) == n) + { + /* pick up another destination node and a worker on it */ + for (n2 = 0; n2 < starpu_memory_nodes_get_count(); n2++) + if (n2 != n) + { + for (w2 = 0; w2 < nw; w2++) + if (starpu_worker_get_memory_node(w2) == n2) + { + /* If predecessor is on worker w and successor + * on worker w2 on different nodes, we need to + * transfer the data. */ + fprintf(output, " + d_t%luw%dt%luw%d", t1->deps[i].dep->id, w, t1->id, w2); + + } + } + } + fprintf(output, ";\n"); + /* Transfer time: pick up one source node and a worker on it */ + for (n = 0; n < starpu_memory_nodes_get_count(); n++) + for (w = 0; w < nw; w++) + if (starpu_worker_get_memory_node(w) == n) + { + /* pick up another destination node and a worker on it */ + for (n2 = 0; n2 < starpu_memory_nodes_get_count(); n2++) + if (n2 != n) + { + for (w2 = 0; w2 < nw; w2++) + if (starpu_worker_get_memory_node(w2) == n2) + { + /* The data transfer is at least 0ms */ + fprintf(output, "d_t%luw%dt%luw%d >= 0;\n", t1->deps[i].dep->id, w, t1->id, w2); + /* The data transfer from w to w2 only happens if tasks run there */ + fprintf(output, "d_t%luw%dt%luw%d >= %f - 2e5 + 1e5 t%luw%d + 1e5 t%luw%d;\n", + t1->deps[i].dep->id, w, t1->id, w2, + starpu_transfer_predict(n, n2, t1->deps[i].size)/1000., + t1->deps[i].dep->id, w, t1->id, w2); + } + } + } + } + + + fprintf(output, "\n/* Each tag finishes when its corresponding task finishes */\n"); + for (t1 = tasks; t1; t1 = t1->next) + if (t1->use_tag) + { + for (w = 0; w < nw; w++) + fprintf(output, "c%lu = tag%lu;\n", t1->id, (unsigned long) t1->tag_id); + } + + fprintf(output, "\n/* tags start after all their tag dependencies finish. */\n"); + for (td = tag_deps; td; td = td->next) + fprintf(output, "tag%lu >= tag%lu;\n", (unsigned long) td->tag, (unsigned long) td->dep_tag); + +/* TODO: factorize ancestor calls */ + fprintf(output, "\n/* For each task pair and each worker, if both tasks are executed by the same worker,\n"); + fprintf(output, " one is started after the other's completion */\n"); + for (t1 = tasks; t1; t1 = t1->next) + { + for (t2 = t1->next; t2; t2 = t2->next) + { + if (!ancestor(t1, t2) && !ancestor(t2, t1)) + { + for (w = 0; w < nw; w++) + { + struct starpu_perfmodel_arch* arch = starpu_worker_get_perf_archtype(w, STARPU_NMAX_SCHED_CTXS); + if (!isnan(t1->duration[arch->devices[0].type][arch->devices[0].devid][arch->devices[0].ncores])) + { + fprintf(output, "s%lu - c%lu >= -3e5 + 1e5 t%luw%d + 1e5 t%luw%d + 1e5 t%luafter%lu;\n", + t1->id, t2->id, t1->id, w, t2->id, w, t1->id, t2->id); + fprintf(output, "s%lu - c%lu >= -2e5 + 1e5 t%luw%d + 1e5 t%luw%d - 1e5 t%luafter%lu;\n", + t2->id, t1->id, t1->id, w, t2->id, w, t1->id, t2->id); + } + } + } + } + } + +#if 0 +/* Doesn't help at all to actually express what "after" means */ + for (t1 = tasks; t1; t1 = t1->next) + for (t2 = t1->next; t2; t2 = t2->next) + if (!ancestor(t1, t2) && !ancestor(t2, t1)) + { + fprintf(output, "s%lu - s%lu >= -1e5 + 1e5 t%luafter%lu;\n", t1->id, t2->id, t1->id, t2->id); + fprintf(output, "s%lu - s%lu >= -1e5 t%luafter%lu;\n", t2->id, t1->id, t1->id, t2->id); + } +#endif + + if (recordprio) + { + fprintf(output, "\n/* For StarPU, a priority means given schedulable tasks it will consider the\n"); + fprintf(output, " * more prioritized first */\n"); + for (t1 = tasks; t1; t1 = t1->next) + { + for (t2 = t1->next; t2; t2 = t2->next) + { + if (!ancestor(t1, t2) && !ancestor(t2, t1) + && t1->priority != t2->priority) + { + if (t1->priority > t2->priority) + { + /* Either t2 is scheduled before t1, but then it + needs to be scheduled before some t dep finishes */ + + /* One of the t1 deps to give the maximum start time for t2 */ + if (t1->depsn > 1) + { + for (i = 0; i < t1->depsn; i++) + fprintf(output, " + t%lut%lud%d", t2->id, t1->id, i); + fprintf(output, " = 1;\n"); + } + + for (i = 0; i < t1->depsn; i++) + { + fprintf(output, "c%lu - s%lu >= ", t1->deps[i].dep->id, t2->id); + if (t1->depsn > 1) + /* Only checks this when it's this dependency that is chosen */ + fprintf(output, "-2e5 + 1e5 t%lut%lud%d", t2->id, t1->id, i); + else + fprintf(output, "-1e5"); + /* Only check this if t1 is after t2 */ + fprintf(output, " + 1e5 t%luafter%lu", t1->id, t2->id); + fprintf(output, ";\n"); + } + + /* Or t2 is scheduled after t1 is. */ + fprintf(output, "s%lu - s%lu >= -1e5 t%luafter%lu;\n", t2->id, t1->id, t1->id, t2->id); + } + else + { + /* Either t1 is scheduled before t2, but then it + needs to be scheduled before some t2 dep finishes */ + + /* One of the t2 deps to give the maximum start time for t1 */ + if (t2->depsn > 1) + { + for (i = 0; i < t2->depsn; i++) + fprintf(output, " + t%lut%lud%d", t1->id, t2->id, i); + fprintf(output, " = 1;\n"); + } + + for (i = 0; i < t2->depsn; i++) + { + fprintf(output, "c%lu - s%lu >= ", t2->deps[i].dep->id, t1->id); + if (t2->depsn > 1) + /* Only checks this when it's this dependency that is chosen */ + fprintf(output, "-1e5 + 1e5 t%lut%lud%d", t1->id, t2->id, i); + /* Only check this if t2 is after t1 */ + fprintf(output, " - 1e5 t%luafter%lu;\n", t1->id, t2->id); + } + + /* Or t1 is scheduled after t2 is. */ + fprintf(output, "s%lu - s%lu >= -1e5 + 1e5 t%luafter%lu;\n", t1->id, t2->id, t1->id, t2->id); + } + } + } + } + } + + + for (t1 = tasks; t1; t1 = t1->next) + for (t2 = t1->next; t2; t2 = t2->next) + if (!ancestor(t1, t2) && !ancestor(t2, t1)) + { + fprintf(output, "bin t%luafter%lu;\n", t1->id, t2->id); + if (recordprio && t1->priority != t2->priority) + { + if (t1->priority > t2->priority) + { + if (t1->depsn > 1) + for (i = 0; i < t1->depsn; i++) + fprintf(output, "bin t%lut%lud%d;\n", t2->id, t1->id, i); + } + else + { + if (t2->depsn > 1) + for (i = 0; i < t2->depsn; i++) + fprintf(output, "bin t%lut%lud%d;\n", t1->id, t2->id, i); + } + } + } + + for (t1 = tasks; t1; t1 = t1->next) + for (w = 0; w < nw; w++) + fprintf(output, "bin t%luw%d;\n", t1->id, w); + } + else + { + struct bound_task_pool *tp; + nt = 0; + for (tp = task_pools; tp; tp = tp->next) + nt++; + if (!nt) + return; + + { + double times[nw*nt]; + + _starpu_get_tasks_times(nw, nt, times); + + fprintf(output, "/* StarPU upper bound linear programming problem, to be run in lp_solve. */\n\n"); + fprintf(output, "/* We want to minimize total execution time (ms) */\n"); + fprintf(output, "min: tmax;\n\n"); + + fprintf(output, "/* Which is the maximum of all worker execution times (ms) */\n"); + for (w = 0; w < nw; w++) + { + char name[32]; + starpu_worker_get_name(w, name, sizeof(name)); + fprintf(output, "/* worker %s */\n0", name); + for (t = 0, tp = task_pools; tp; t++, tp = tp->next) + { + if (!isnan(times[w*nt+t])) + fprintf(output, "\t%+f * w%dt%dn", (float) times[w*nt+t], w, t); + } + fprintf(output, " <= tmax;\n"); + } + fprintf(output, "\n"); + + fprintf(output, "/* And we have to have computed exactly all tasks */\n"); + for (t = 0, tp = task_pools; tp; t++, tp = tp->next) + { + int got_one = 0; + fprintf(output, "/* task %s key %x */\n0", _starpu_codelet_get_model_name(tp->cl), (unsigned) tp->footprint); + for (w = 0; w < nw; w++) + { + if (isnan(times[w*nt+t])) + _STARPU_MSG("Warning: task %s has no performance measurement for worker %d.\n", _starpu_codelet_get_model_name(tp->cl), w); + else + { + got_one = 1; + fprintf(output, "\t+w%dt%dn", w, t); + } + } + fprintf(output, " = %lu;\n", tp->n); + if (!got_one) + _STARPU_MSG("Warning: task %s has no performance measurement for any worker, system will not be solvable!\n", _starpu_codelet_get_model_name(tp->cl)); + /* Show actual values */ + fprintf(output, "/*"); + for (w = 0; w < nw; w++) + fprintf(output, "\t+%lu", tp->cl->per_worker_stats[w]); + fprintf(output, "\t*/\n\n"); + } + + fprintf(output, "/* Optionally tell that tasks can not be divided */\n"); + fprintf(output, "/* int "); + int first = 1; + for (w = 0; w < nw; w++) + for (t = 0, tp = task_pools; tp; t++, tp = tp->next) + { + if (!first) + fprintf(output, ","); + else + first = 0; + fprintf(output, "w%dt%dn", w, t); + } + fprintf(output, "; */\n"); + } + } + + STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); +} + +/* + * Print bound system in MPS output format + */ +void starpu_bound_print_mps(FILE *output) +{ + struct bound_task_pool * tp; + int nt; /* Number of different kinds of tasks */ + int nw; /* Number of different workers */ + int t, w; + + if (recorddeps) + { + fprintf(output, "Dependencies were enabled in the starpu_bound_start call, thus not supported\n"); + return; + } + + nw = starpu_worker_get_count(); + if (!nw) + /* Make llvm happy about the VLA below */ + return; + + STARPU_PTHREAD_MUTEX_LOCK(&mutex); + nt = 0; + for (tp = task_pools; tp; tp = tp->next) + nt++; + if (!nt) + { + STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); + return; + } + + { + double times[nw*nt]; + + _starpu_get_tasks_times(nw, nt, times); + + fprintf(output, "NAME StarPU theoretical bound\n"); + + fprintf(output, "*\nROWS\n"); + + fprintf(output, "* We want to minimize total execution time (ms)\n"); + fprintf(output, " N TMAX\n"); + + fprintf(output, "* Which is the maximum of all worker execution times (ms)\n"); + for (w = 0; w < nw; w++) + { + char name[32]; + starpu_worker_get_name(w, name, sizeof(name)); + fprintf(output, "* worker %s\n", name); + fprintf(output, " L W%d\n", w); + } + + fprintf(output, "*\n* And we have to have computed exactly all tasks\n*\n"); + for (t = 0, tp = task_pools; tp; t++, tp = tp->next) + { + fprintf(output, "* task %s key %x\n", _starpu_codelet_get_model_name(tp->cl), (unsigned) tp->footprint); + fprintf(output, " E T%d\n", t); + } + + fprintf(output, "*\nCOLUMNS\n*\n"); + + fprintf(output, "*\n* Execution times and completion of all tasks\n*\n"); + for (w = 0; w < nw; w++) + for (t = 0, tp = task_pools; tp; t++, tp = tp->next) + if (!isnan(times[w*nt+t])) + { + char name[23]; + snprintf(name, sizeof(name), "W%dT%d", w, t); + fprintf(output," %-8s W%-7d %12f\n", name, w, times[w*nt+t]); + fprintf(output," %-8s T%-7d %12d\n", name, t, 1); + } + + fprintf(output, "*\n* Total execution time\n*\n"); + for (w = 0; w < nw; w++) + fprintf(output," TMAX W%-2d %12d\n", w, -1); + fprintf(output," TMAX TMAX %12d\n", 1); + + fprintf(output, "*\nRHS\n*\n"); + + fprintf(output, "*\n* Total number of tasks\n*\n"); + for (t = 0, tp = task_pools; tp; t++, tp = tp->next) + fprintf(output," NT%-2d T%-7d %12lu\n", t, t, tp->n); + + fprintf(output, "ENDATA\n"); + } + + STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); +} + +/* + * Solve bound system thanks to GNU Linear Programming Kit backend + */ +#ifdef STARPU_HAVE_GLPK_H +static glp_prob *_starpu_bound_glp_resolve(int integer) +{ + struct bound_task_pool * tp; + int nt; /* Number of different kinds of tasks */ + int nw; /* Number of different workers */ + int t, w; + glp_prob *lp; + int ret; + + nw = starpu_worker_get_count(); + if (!nw) + /* Make llvm happy about the VLA below */ + return NULL; + nt = 0; + for (tp = task_pools; tp; tp = tp->next) + nt++; + if (!nt) + return NULL; + + lp = glp_create_prob(); + glp_set_prob_name(lp, "StarPU theoretical bound"); + glp_set_obj_dir(lp, GLP_MIN); + glp_set_obj_name(lp, "total execution time"); + + { + double times[nw*nt]; + int ne = + nw * (nt+1) /* worker execution time */ + + nt * nw + + 1; /* glp dumbness */ + int n = 1; + int ia[ne], ja[ne]; + double ar[ne]; + + _starpu_get_tasks_times(nw, nt, times); + + /* Variables: number of tasks i assigned to worker j, and tmax */ + glp_add_cols(lp, nw*nt+1); +#define colnum(w, t) ((t)*nw+(w)+1) + glp_set_obj_coef(lp, nw*nt+1, 1.); + + for (w = 0; w < nw; w++) + for (t = 0, tp = task_pools; tp; t++, tp = tp->next) + { + char name[32]; + snprintf(name, sizeof(name), "w%dt%dn", w, t); + glp_set_col_name(lp, colnum(w, t), name); + if (integer) + glp_set_col_kind(lp, colnum(w, t), GLP_IV); + glp_set_col_bnds(lp, colnum(w, t), GLP_LO, 0., 0.); + } + glp_set_col_bnds(lp, nw*nt+1, GLP_LO, 0., 0.); + + /* Total worker execution time */ + glp_add_rows(lp, nw); + for (t = 0, tp = task_pools; tp; t++, tp = tp->next) + { + int someone = 0; + for (w = 0; w < nw; w++) + if (!isnan(times[w*nt+t])) + someone = 1; + if (!someone) + { + /* This task does not have any performance model at all, abort */ + glp_delete_prob(lp); + return NULL; + } + } + for (w = 0; w < nw; w++) + { + char name[32], title[64]; + starpu_worker_get_name(w, name, sizeof(name)); + snprintf(title, sizeof(title), "worker %s", name); + glp_set_row_name(lp, w+1, title); + for (t = 0, tp = task_pools; tp; t++, tp = tp->next) + { + ia[n] = w+1; + ja[n] = colnum(w, t); + if (isnan(times[w*nt+t])) + ar[n] = 1000000000.; + else + ar[n] = times[w*nt+t]; + n++; + } + /* tmax */ + ia[n] = w+1; + ja[n] = nw*nt+1; + ar[n] = -1; + n++; + glp_set_row_bnds(lp, w+1, GLP_UP, 0, 0); + } + + /* Total task completion */ + glp_add_rows(lp, nt); + for (t = 0, tp = task_pools; tp; t++, tp = tp->next) + { + char name[32], title[64]; + starpu_worker_get_name(w, name, sizeof(name)); + snprintf(title, sizeof(title), "task %s key %x", _starpu_codelet_get_model_name(tp->cl), (unsigned) tp->footprint); + glp_set_row_name(lp, nw+t+1, title); + for (w = 0; w < nw; w++) + { + ia[n] = nw+t+1; + ja[n] = colnum(w, t); + ar[n] = 1; + n++; + } + glp_set_row_bnds(lp, nw+t+1, GLP_FX, tp->n, tp->n); + } + + STARPU_ASSERT(n == ne); + + glp_load_matrix(lp, ne-1, ia, ja, ar); + } + + glp_smcp parm; + glp_init_smcp(&parm); + parm.msg_lev = GLP_MSG_OFF; + ret = glp_simplex(lp, &parm); + if (ret) + { + glp_delete_prob(lp); + lp = NULL; + return NULL; + } + if (integer) + { + glp_iocp iocp; + glp_init_iocp(&iocp); + iocp.msg_lev = GLP_MSG_OFF; + glp_intopt(lp, &iocp); + } + + return lp; +} +#endif /* STARPU_HAVE_GLPK_H */ + +/* Print the computed bound as well as the optimized distribution of tasks */ +void starpu_bound_print(FILE *output, int integer) +{ +#ifdef STARPU_HAVE_GLPK_H + if (recorddeps) + { + fprintf(output, "Dependencies were enabled in the starpu_bound_start call, thus not supported\n"); + return; + } + + STARPU_PTHREAD_MUTEX_LOCK(&mutex); + glp_prob *lp = _starpu_bound_glp_resolve(integer); + if (lp) + { + struct bound_task_pool * tp; + int t, w; + int nw; /* Number of different workers */ + double tmax; + + nw = starpu_worker_get_count(); + + if (integer) + tmax = glp_mip_obj_val(lp); + else + tmax = glp_get_obj_val(lp); + + fprintf(output, "Theoretical minimum execution time: %f ms\n", tmax); + + for (t = 0, tp = task_pools; tp; t++, tp = tp->next) + { + fprintf(output, "%s key %x\n", _starpu_codelet_get_model_name(tp->cl), (unsigned) tp->footprint); + for (w = 0; w < nw; w++) + if (integer) + fprintf(output, "\tw%dt%dn %f", w, t, glp_mip_col_val(lp, colnum(w, t))); + else + fprintf(output, "\tw%dt%dn %f", w, t, glp_get_col_prim(lp, colnum(w, t))); + fprintf(output, "\n"); + } + + glp_delete_prob(lp); + } + else + { + _STARPU_MSG("Simplex failed\n"); + } + STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); +#else /* STARPU_HAVE_GLPK_H */ + (void) integer; + fprintf(output, "Please rebuild StarPU with glpk installed.\n"); +#endif /* STARPU_HAVE_GLPK_H */ +} + +/* Compute and return the bound */ +void starpu_bound_compute(double *res, double *integer_res, int integer) +{ +#ifdef STARPU_HAVE_GLPK_H + double ret; + + if (recorddeps) + { + *res = 0.; + return; + } + + STARPU_PTHREAD_MUTEX_LOCK(&mutex); + glp_prob *lp = _starpu_bound_glp_resolve(integer); + if (lp) + { + ret = glp_get_obj_val(lp); + if (integer) + *integer_res = glp_mip_obj_val(lp); + glp_delete_prob(lp); + } + else + ret = 0.; + STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); + *res = ret; +#else /* STARPU_HAVE_GLPK_H */ + (void) integer_res; + (void) integer; + *res = 0.; +#endif /* STARPU_HAVE_GLPK_H */ +} diff --git a/src/profiling/bound.h b/src/profiling/bound.h new file mode 100644 index 0000000..725e92b --- /dev/null +++ b/src/profiling/bound.h @@ -0,0 +1,48 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __BOUND_H__ +#define __BOUND_H__ + +/** @file */ + +#include +#include +#include + +#pragma GCC visibility push(hidden) + +/** Are we recording? */ +extern int _starpu_bound_recording; + +/** Record task for bound computation */ +extern void _starpu_bound_record(struct _starpu_job *j); + +/** Record tag dependency: id depends on dep_id */ +extern void _starpu_bound_tag_dep(starpu_tag_t id, starpu_tag_t dep_id); + +/** Record task dependency: j depends on dep_j */ +extern void _starpu_bound_task_dep(struct _starpu_job *j, struct _starpu_job *dep_j); + +/** Record job id dependency: j depends on job_id */ +extern void _starpu_bound_job_id_dep(starpu_data_handle_t handle, struct _starpu_job *dep_j, unsigned long job_id); + +/** Clear recording */ +extern void starpu_bound_clear(void); + +#pragma GCC visibility pop + +#endif // __BOUND_H__ diff --git a/src/profiling/callbacks.c b/src/profiling/callbacks.c new file mode 100644 index 0000000..8b05bcd --- /dev/null +++ b/src/profiling/callbacks.c @@ -0,0 +1,260 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2022-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2022-2022 École de Technologie Supérieure (ETS, Montréal) + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#ifdef HAVE_DLOPEN +#include +#endif +#include +#include +#include +#include +#include + +#define STARPU_NB_CALLBACKS 17 +struct _starpu_prof_tool_callbacks starpu_prof_tool_callbacks; +starpu_prof_tool_cb_func *_starpu_prof_tool_callback_map[STARPU_NB_CALLBACKS]; +#ifdef HAVE_DLOPEN +static void *lib_handle=NULL; +#endif + +/** + Dummy implementations of the callbacks +*/ +static void _starpu_prof_tool_event_dummy_func(struct starpu_prof_tool_info *pti, union starpu_prof_tool_event_info *ptei, struct starpu_prof_tool_api_info *ptai) +{ +} + +void starpu_profiling_init_lib() +{ + starpu_prof_tool_callbacks.starpu_prof_tool_event_init = &_starpu_prof_tool_event_dummy_func; + starpu_prof_tool_callbacks.starpu_prof_tool_event_terminate = &_starpu_prof_tool_event_dummy_func; + starpu_prof_tool_callbacks.starpu_prof_tool_event_init_begin = &_starpu_prof_tool_event_dummy_func; + starpu_prof_tool_callbacks.starpu_prof_tool_event_init_end = &_starpu_prof_tool_event_dummy_func; + + starpu_prof_tool_callbacks.starpu_prof_tool_event_driver_init = &_starpu_prof_tool_event_dummy_func; + starpu_prof_tool_callbacks.starpu_prof_tool_event_driver_deinit = &_starpu_prof_tool_event_dummy_func; + starpu_prof_tool_callbacks.starpu_prof_tool_event_driver_init_start = &_starpu_prof_tool_event_dummy_func; + starpu_prof_tool_callbacks.starpu_prof_tool_event_driver_init_end = &_starpu_prof_tool_event_dummy_func; + + starpu_prof_tool_callbacks.starpu_prof_tool_event_start_cpu_exec = &_starpu_prof_tool_event_dummy_func; + starpu_prof_tool_callbacks.starpu_prof_tool_event_end_cpu_exec = &_starpu_prof_tool_event_dummy_func; + starpu_prof_tool_callbacks.starpu_prof_tool_event_start_gpu_exec = &_starpu_prof_tool_event_dummy_func; + starpu_prof_tool_callbacks.starpu_prof_tool_event_end_gpu_exec = &_starpu_prof_tool_event_dummy_func; + starpu_prof_tool_callbacks.starpu_prof_tool_event_start_transfer = &_starpu_prof_tool_event_dummy_func; + starpu_prof_tool_callbacks.starpu_prof_tool_event_end_transfer = &_starpu_prof_tool_event_dummy_func; + + starpu_prof_tool_callbacks.starpu_prof_tool_event_user_start = &_starpu_prof_tool_event_dummy_func; + starpu_prof_tool_callbacks.starpu_prof_tool_event_user_end = &_starpu_prof_tool_event_dummy_func; +} + +struct starpu_prof_tool_info _starpu_prof_tool_get_info(enum starpu_prof_tool_event event_type, int device_num, int workerid, enum starpu_prof_tool_driver_type driver, unsigned int memnode, void *fun_ptr) +{ + struct starpu_prof_tool_info ret; + + ret.event_type = event_type; + ret.starpu_version[0] = STARPU_MAJOR_VERSION; + ret.starpu_version[1] = STARPU_MINOR_VERSION; + ret.starpu_version[2] = STARPU_RELEASE_VERSION; + ret.device_number = device_num; + ret.driver_type = driver; + ret.fun_ptr = fun_ptr; + ret.memnode = memnode; + + ret.thread_id = (int)pthread_self(); + ret.worker_id = workerid; + + /* unused fields */ + ret.conf = NULL; + ret.bytes_to_transfer = 0; + ret.bytes_transfered = 0; + + return ret; +} + +/** + This function is specific for data transfers, in order to keep the prototypes simple +*/ +struct starpu_prof_tool_info _starpu_prof_tool_get_info_d(enum starpu_prof_tool_event event_type, int device_num, int workerid, enum starpu_prof_tool_driver_type driver, unsigned memnode, unsigned to_transfer, unsigned transfered) +{ + struct starpu_prof_tool_info ret; + + ret.event_type = event_type; + ret.starpu_version[0] = STARPU_MAJOR_VERSION; + ret.starpu_version[1] = STARPU_MINOR_VERSION; + ret.starpu_version[2] = STARPU_RELEASE_VERSION; + ret.device_number = device_num; + ret.driver_type = driver; + ret.memnode = memnode; + ret.bytes_to_transfer = to_transfer; + ret.bytes_transfered = transfered; + ret.fun_ptr = NULL; + + ret.thread_id = (int)pthread_self(); + ret.worker_id = workerid; + + /* unused fields */ + ret.conf = NULL; + ret.fun_ptr = NULL; + + return ret; +} + +struct starpu_prof_tool_info _starpu_prof_tool_get_info_init(enum starpu_prof_tool_event event_type, int device_num, enum starpu_prof_tool_driver_type driver, struct starpu_conf* conf) +{ + struct starpu_prof_tool_info ret; + + ret.event_type = event_type; + ret.starpu_version[0] = STARPU_MAJOR_VERSION; + ret.starpu_version[1] = STARPU_MINOR_VERSION; + ret.starpu_version[2] = STARPU_RELEASE_VERSION; + ret.device_number = device_num; + ret.driver_type = driver; + ret.conf = conf; + + ret.thread_id = (int)pthread_self(); + ret.worker_id = 0; + + /* unused fields */ + ret.memnode = -1; + ret.bytes_to_transfer = 0; + ret.bytes_transfered = 0; + ret.fun_ptr = NULL; + + return ret; +} + +// The name of the function below is important so it can be found in a library preloaded with LD_PRELOAD (necessary for TAU and Apex) +__attribute__((weak)) void starpu_prof_tool_library_register(starpu_prof_tool_entry_register_func reg, starpu_prof_tool_entry_register_func unreg) +{ + (void) reg; + (void) unreg; +} + +/** + Register a callback for a given event. + TODO use a list in order to link multiple callbacks +*/ +void _starpu_prof_tool_register_cb(enum starpu_prof_tool_event event_type, starpu_prof_tool_cb_func cb, enum starpu_prof_tool_command info) +{ + (void) info; + *(_starpu_prof_tool_callback_map[event_type]) = cb; +} + +/** + Unregister a callback for a given event. + TODO use a list in order to link multiple callbacks +*/ +void _starpu_prof_tool_unregister_cb(enum starpu_prof_tool_event event_type, starpu_prof_tool_cb_func cb, enum starpu_prof_tool_command info) +{ + (void) info; + (void) cb; + *(_starpu_prof_tool_callback_map[event_type]) = NULL; +} + +#ifdef STARPU_PROF_TOOL +static void init_prof_map() +{ + _starpu_prof_tool_callback_map[starpu_prof_tool_event_init] = &(starpu_prof_tool_callbacks.starpu_prof_tool_event_init); + _starpu_prof_tool_callback_map[starpu_prof_tool_event_terminate] = &(starpu_prof_tool_callbacks.starpu_prof_tool_event_terminate); + _starpu_prof_tool_callback_map[starpu_prof_tool_event_init_begin] = &(starpu_prof_tool_callbacks.starpu_prof_tool_event_init_begin); + _starpu_prof_tool_callback_map[starpu_prof_tool_event_init_end] = &(starpu_prof_tool_callbacks.starpu_prof_tool_event_init_end); + + _starpu_prof_tool_callback_map[starpu_prof_tool_event_driver_init] = &(starpu_prof_tool_callbacks.starpu_prof_tool_event_driver_init); + _starpu_prof_tool_callback_map[starpu_prof_tool_event_driver_deinit] = &(starpu_prof_tool_callbacks.starpu_prof_tool_event_driver_deinit); + _starpu_prof_tool_callback_map[starpu_prof_tool_event_driver_init_start] = &(starpu_prof_tool_callbacks.starpu_prof_tool_event_driver_init_start); + _starpu_prof_tool_callback_map[starpu_prof_tool_event_driver_init_end] = &(starpu_prof_tool_callbacks.starpu_prof_tool_event_driver_init_end); + + _starpu_prof_tool_callback_map[starpu_prof_tool_event_start_cpu_exec] = &(starpu_prof_tool_callbacks.starpu_prof_tool_event_start_cpu_exec); + _starpu_prof_tool_callback_map[starpu_prof_tool_event_end_cpu_exec] = &(starpu_prof_tool_callbacks.starpu_prof_tool_event_end_cpu_exec); + _starpu_prof_tool_callback_map[starpu_prof_tool_event_start_gpu_exec] = &(starpu_prof_tool_callbacks.starpu_prof_tool_event_start_gpu_exec); + _starpu_prof_tool_callback_map[starpu_prof_tool_event_end_gpu_exec] = &(starpu_prof_tool_callbacks.starpu_prof_tool_event_end_gpu_exec); + _starpu_prof_tool_callback_map[starpu_prof_tool_event_start_transfer] = &(starpu_prof_tool_callbacks.starpu_prof_tool_event_start_transfer); + _starpu_prof_tool_callback_map[starpu_prof_tool_event_end_transfer] = &(starpu_prof_tool_callbacks.starpu_prof_tool_event_end_transfer); + + _starpu_prof_tool_callback_map[starpu_prof_tool_event_user_start] = &(starpu_prof_tool_callbacks.starpu_prof_tool_event_user_start); + _starpu_prof_tool_callback_map[starpu_prof_tool_event_user_end] = &(starpu_prof_tool_callbacks.starpu_prof_tool_event_user_end); +} +#endif + +/** + * Looks if there is a profiling tool pointed at by the appropriate + * environment variable. + * Returns 0 if nothing is loaded, -1 if there was a problem, 1 otherwise. + */ +int _starpu_prof_tool_try_load() +{ +#ifdef STARPU_PROF_TOOL + init_prof_map(); + starpu_profiling_init_lib(); + + const char *tool_libs = starpu_getenv(STARPU_PROF_TOOL_ENV_VAR); + if (tool_libs != NULL) + { +#ifdef HAVE_DLOPEN + void *found; + _STARPU_DEBUG("Loading profiling tool %s\n", tool_libs); + + lib_handle = dlopen(tool_libs, RTLD_LAZY); // TODO best flag? + if (!lib_handle) + { + perror("Could not open the requested file"); + fprintf(stderr, "%s\n", dlerror()); + return -1; + } + + /* load the loading function we find in this library */ + found = dlsym(lib_handle, "starpu_prof_tool_library_register"); + if (!found) + { + perror("Could not find the required registration function in the profiling library\n"); + return -1; + } + + starpu_prof_tool_entry_func entry_func = (starpu_prof_tool_entry_func)found; + entry_func(_starpu_prof_tool_register_cb, _starpu_prof_tool_unregister_cb); + + return 1; +#else + _STARPU_MSG("Environment variable '%s' defined but the dlopen functionality is unavailable on the system\n", STARPU_PROF_TOOL_ENV_VAR); +#endif + } + + /* This corresponds to something if we LD_PRELOAD a tool */ + starpu_prof_tool_library_register(_starpu_prof_tool_register_cb, _starpu_prof_tool_unregister_cb); + return 0; +#else + const char *tool_libs = starpu_getenv(STARPU_PROF_TOOL_ENV_VAR); + if (tool_libs != NULL) + { + _STARPU_MSG("Variable '%s' is defined but StarPU profiling tool is not enabled\n", STARPU_PROF_TOOL_ENV_VAR); + } + return 1; +#endif +} + +void _starpu_prof_tool_unload() +{ +#ifdef HAVE_DLOPEN + if (lib_handle) + { + dlclose(lib_handle); + lib_handle = NULL; + } +#endif +} diff --git a/src/profiling/callbacks.h b/src/profiling/callbacks.h new file mode 100644 index 0000000..10778dc --- /dev/null +++ b/src/profiling/callbacks.h @@ -0,0 +1,78 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2022-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2022-2022 École de Technologie Supérieure (ETS, Montréal) + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef _STARPU_CALLBACKS_H_ +#define _STARPU_CALLBACKS_H_ + +#include +#include + +#define STARPU_PROF_TOOL_ENV_VAR "STARPU_PROF_TOOL" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + The events themselves. + This structure can be built by the preprocessor, but we decided + to list the function pointers explicitly for readability purpose. +*/ +struct _starpu_prof_tool_callbacks +{ + starpu_prof_tool_cb_func starpu_prof_tool_event_init; + starpu_prof_tool_cb_func starpu_prof_tool_event_terminate; + starpu_prof_tool_cb_func starpu_prof_tool_event_init_begin; + starpu_prof_tool_cb_func starpu_prof_tool_event_init_end; + + starpu_prof_tool_cb_func starpu_prof_tool_event_driver_init; + starpu_prof_tool_cb_func starpu_prof_tool_event_driver_deinit; + starpu_prof_tool_cb_func starpu_prof_tool_event_driver_init_start; + starpu_prof_tool_cb_func starpu_prof_tool_event_driver_init_end; + + starpu_prof_tool_cb_func starpu_prof_tool_event_start_cpu_exec; + starpu_prof_tool_cb_func starpu_prof_tool_event_end_cpu_exec; + starpu_prof_tool_cb_func starpu_prof_tool_event_start_gpu_exec; + starpu_prof_tool_cb_func starpu_prof_tool_event_end_gpu_exec; + + starpu_prof_tool_cb_func starpu_prof_tool_event_start_transfer; + starpu_prof_tool_cb_func starpu_prof_tool_event_end_transfer; + + starpu_prof_tool_cb_func starpu_prof_tool_event_user_start; + starpu_prof_tool_cb_func starpu_prof_tool_event_user_end; +}; + +extern struct _starpu_prof_tool_callbacks starpu_prof_tool_callbacks; + +/******************************************************************************* + * Functions used by the callbacks + *******************************************************************************/ +struct starpu_prof_tool_info _starpu_prof_tool_get_info(enum starpu_prof_tool_event, int, int, enum starpu_prof_tool_driver_type, unsigned int, /*_starpu_cl_func_t*/ void*); +struct starpu_prof_tool_info _starpu_prof_tool_get_info_d(enum starpu_prof_tool_event, int, int, enum starpu_prof_tool_driver_type, unsigned, unsigned, unsigned /* void*: can be added later if necessary */); +struct starpu_prof_tool_info _starpu_prof_tool_get_info_init(enum starpu_prof_tool_event, int, enum starpu_prof_tool_driver_type, struct starpu_conf*); + +/******************************************************************************* + * Initialization and cleanup + *******************************************************************************/ +int _starpu_prof_tool_try_load(); +void _starpu_prof_tool_unload(); + +#ifdef __cplusplus +} +#endif + +#endif // _STARPU_CALLBACKS_H_ diff --git a/src/profiling/profiling.c b/src/profiling/profiling.c new file mode 100644 index 0000000..8da5a24 --- /dev/null +++ b/src/profiling/profiling.c @@ -0,0 +1,701 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2020-2020 Federal University of Rio Grande do Sul (UFRGS) + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef STARPU_PAPI +#include +#endif + +#ifdef STARPU_PAPI +static starpu_pthread_mutex_t papi_mutex = STARPU_PTHREAD_MUTEX_INITIALIZER; +static int papi_events[PAPI_MAX_HWCTRS]; +static int papi_nevents = 0; +static int warned_component_unavailable = 0; +#endif + +/* Store the busid of the different (src, dst) pairs. busid_matrix[src][dst] + * contains the busid of (src, dst) or -1 if the bus was not registered. */ +struct node_pair +{ + int src; + int dst; + struct starpu_profiling_bus_info *bus_info; +}; + +static int busid_matrix[STARPU_MAXNODES][STARPU_MAXNODES]; +static struct starpu_profiling_bus_info bus_profiling_info[STARPU_MAXNODES][STARPU_MAXNODES]; +static struct node_pair busid_to_node_pair[STARPU_MAXNODES*STARPU_MAXNODES]; +static char bus_direct[STARPU_MAXNODES*STARPU_MAXNODES]; +static int bus_ngpus[STARPU_MAXNODES*STARPU_MAXNODES]; +static unsigned busid_cnt = 0; + +static void _starpu_bus_reset_profiling_info(struct starpu_profiling_bus_info *bus_info); + +/* Clear all the profiling info related to the worker. */ +static void _starpu_worker_reset_profiling_info_with_lock(int workerid); + +/* + * Global control of profiling + */ + +/* Disabled by default, unless simulating */ +int _starpu_profiling = +#ifdef STARPU_SIMGRID + 1 +#else + 0 +#endif + ; + +int _starpu_codelet_profiling = 1; +int _starpu_energy_profiling = 0; + +void starpu_profiling_init() +{ + _starpu_profiling_init(); +} + +static void _starpu_profiling_reset_counters() +{ + int worker; + for (worker = 0; worker < STARPU_NMAXWORKERS; worker++) + { + _starpu_worker_reset_profiling_info_with_lock(worker); + } + + int busid; + int bus_cnt = starpu_bus_get_count(); + for (busid = 0; busid < bus_cnt; busid++) + { + struct starpu_profiling_bus_info *bus_info; + bus_info = busid_to_node_pair[busid].bus_info; + _starpu_bus_reset_profiling_info(bus_info); + } +} + +int starpu_profiling_status_set(int status) +{ + unsigned worker; + for (worker = 0; worker < starpu_worker_get_count(); worker++) + { + struct _starpu_worker *worker_struct = _starpu_get_worker_struct(worker); + STARPU_PTHREAD_MUTEX_LOCK(&worker_struct->sched_mutex); + } + for (worker = 0; worker < starpu_worker_get_count(); worker++) + { + STARPU_PTHREAD_MUTEX_LOCK(&_starpu_get_worker_struct(worker)->profiling_info_mutex); + } + + ANNOTATE_HAPPENS_AFTER(&_starpu_profiling); + int prev_value = _starpu_profiling; + _starpu_profiling = status; + ANNOTATE_HAPPENS_BEFORE(&_starpu_profiling); + + _STARPU_TRACE_SET_PROFILING(status); + + /* If we enable profiling, we reset the counters. */ + if (status == STARPU_PROFILING_ENABLE) + { + _starpu_profiling_reset_counters(); + } + + for (worker = 0; worker < starpu_worker_get_count(); worker++) + { + struct _starpu_worker *worker_struct = _starpu_get_worker_struct(worker); + STARPU_PTHREAD_MUTEX_UNLOCK(&_starpu_get_worker_struct(worker)->profiling_info_mutex); + STARPU_PTHREAD_MUTEX_UNLOCK(&worker_struct->sched_mutex); + } + + return prev_value; +} + +void _starpu_profiling_init(void) +{ + int workerid; + + for (workerid = 0; workerid < STARPU_NMAXWORKERS; workerid++) + { + struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); + enum _starpu_worker_status_index i; + + memset(&worker->profiling_info, 0, sizeof(worker->profiling_info)); + STARPU_PTHREAD_MUTEX_INIT(&worker->profiling_info_mutex, NULL); + + for (i = 0; i< STATUS_INDEX_NR; i++) + worker->profiling_registered_start[i] = 0; + + worker->profiling_status = STATUS_UNKNOWN; + } + +#ifdef STARPU_PAPI + STARPU_PTHREAD_MUTEX_LOCK(&papi_mutex); + int retval = PAPI_library_init(PAPI_VER_CURRENT); + if (retval != PAPI_VER_CURRENT) + { + _STARPU_MSG("Failed init PAPI, error: %s.\n", PAPI_strerror(retval)); + } + retval = PAPI_thread_init(pthread_self); + if (retval != PAPI_OK) + { + _STARPU_MSG("Failed init PAPI thread, error: %s.\n", PAPI_strerror(retval)); + } + + char *conf_papi_events; + char *papi_event_name; + conf_papi_events = starpu_getenv("STARPU_PROF_PAPI_EVENTS"); + papi_nevents = 0; + if (conf_papi_events != NULL) + { + while ((papi_event_name = strtok_r(conf_papi_events, " ,", &conf_papi_events))) + { + if (papi_nevents == PAPI_MAX_HWCTRS) + { + _STARPU_MSG("Too many requested papi counters, ignoring %s\n", papi_event_name); + continue; + } + + _STARPU_DEBUG("Loading PAPI Event: %s\n", papi_event_name); + retval = PAPI_event_name_to_code((char*)papi_event_name, &papi_events[papi_nevents]); + if (retval != PAPI_OK) + _STARPU_MSG("Failed to codify papi event [%s], error: %s.\n", papi_event_name, PAPI_strerror(retval)); + else + papi_nevents++; + } + } + STARPU_PTHREAD_MUTEX_UNLOCK(&papi_mutex); +#endif +} + +#ifdef STARPU_PAPI +void _starpu_profiling_papi_task_start_counters(struct starpu_task *task) +{ + if (!starpu_profiling_status_get()) + return; + + struct starpu_profiling_task_info *profiling_info; + profiling_info = task->profiling_info; + if (profiling_info && papi_nevents) + { + int i; + profiling_info->papi_event_set = PAPI_NULL; + STARPU_PTHREAD_MUTEX_LOCK(&papi_mutex); + PAPI_create_eventset(&profiling_info->papi_event_set); + for(i=0; ipapi_event_set, papi_events[i]); +#ifdef PAPI_ECMP_DISABLED + if (ret == PAPI_ECMP_DISABLED && !warned_component_unavailable) + { + _STARPU_MSG("Error while registering Papi event: Component containing event is disabled. Try running `papi_component_avail` to get more information.\n"); + warned_component_unavailable = 1; + } +#else + (void)ret; +#endif + profiling_info->papi_values[i]=0; + } + PAPI_reset(profiling_info->papi_event_set); + PAPI_start(profiling_info->papi_event_set); + STARPU_PTHREAD_MUTEX_UNLOCK(&papi_mutex); + } +} + +void _starpu_profiling_papi_task_stop_counters(struct starpu_task *task) +{ + if (!starpu_profiling_status_get()) + return; + + struct starpu_profiling_task_info *profiling_info; + profiling_info = task->profiling_info; + + if (profiling_info && papi_nevents) + { + int i; + STARPU_PTHREAD_MUTEX_LOCK(&papi_mutex); + PAPI_stop(profiling_info->papi_event_set, profiling_info->papi_values); + for(i=0; ipapi_values[i]); + } + PAPI_cleanup_eventset(profiling_info->papi_event_set); + PAPI_destroy_eventset(&profiling_info->papi_event_set); + STARPU_PTHREAD_MUTEX_UNLOCK(&papi_mutex); + } +} +#endif + +void _starpu_profiling_start(void) +{ + const char *env; + if ((env = starpu_getenv("STARPU_PROFILING")) && atoi(env)) + { + starpu_profiling_status_set(STARPU_PROFILING_ENABLE); + } + _starpu_codelet_profiling = starpu_get_env_number_default("STARPU_CODELET_PROFILING", 1); + _starpu_energy_profiling = starpu_get_env_number_default("STARPU_ENERGY_PROFILING", 0); +} + +void _starpu_profiling_terminate(void) +{ + int worker; + + for (worker = 0; worker < STARPU_NMAXWORKERS; worker++) + { + STARPU_PTHREAD_MUTEX_DESTROY(&_starpu_get_worker_struct(worker)->profiling_info_mutex); + } +#ifdef STARPU_PAPI + /* free the resources used by PAPI */ + STARPU_PTHREAD_MUTEX_LOCK(&papi_mutex); + PAPI_shutdown(); + STARPU_PTHREAD_MUTEX_UNLOCK(&papi_mutex); +#endif + +} + +/* + * Task profiling + */ +struct starpu_profiling_task_info *_starpu_allocate_profiling_info_if_needed(struct starpu_task *task) +{ + struct starpu_profiling_task_info *info = NULL; + + /* If we are benchmarking, we need room for the energy */ + if (starpu_profiling_status_get() || (task->cl && task->cl->energy_model && (task->cl->energy_model->benchmarking || _starpu_get_calibrate_flag()))) + { + _STARPU_CALLOC(info, 1, sizeof(struct starpu_profiling_task_info)); + } + + return info; +} + +/* + * Worker profiling + */ +static void _starpu_worker_reset_profiling_info_with_lock(int workerid) +{ + struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); + struct starpu_profiling_worker_info *worker_info = &worker->profiling_info; + struct timespec now; + _starpu_clock_gettime(&now); + + worker_info->start_time = now; + + /* This is computed in a lazy fashion when the application queries + * profiling info. */ + starpu_timespec_clear(&worker_info->total_time); + + starpu_timespec_clear(&worker_info->executing_time); + starpu_timespec_clear(&worker_info->sleeping_time); + + worker_info->executed_tasks = 0; + + worker_info->used_cycles = 0; + worker_info->stall_cycles = 0; + worker_info->energy_consumed = 0; + worker_info->flops = 0; + + /* We detect if the worker is already sleeping or doing some + * computation */ + enum _starpu_worker_status status = _starpu_worker_get_status(workerid); + + enum _starpu_worker_status_index i; + + for (i = 0; i < STATUS_INDEX_NR; i++) + { + if (status & (1 << i)) + { + worker->profiling_registered_start[i] = 1; + worker->profiling_registered_start_date[i] = now; + } + else + { + worker->profiling_registered_start[i] = 0; + } + worker->profiling_status = status; + worker->profiling_status_start_date = now; + } +} + +static void _starpu_worker_time_split_accumulate(struct starpu_profiling_worker_info *worker_info, enum _starpu_worker_status status, struct timespec *delta) +{ + /* We here prioritize where we want to attribute the time spent */ + + if (status & STATUS_EXECUTING) + /* Executing task, this is all we want to know */ + starpu_timespec_accumulate(&worker_info->executing_time, delta); + else if (status & STATUS_CALLBACK) + /* Otherwise, callback, that's fine as well */ + starpu_timespec_accumulate(&worker_info->callback_time, delta); + else if (status & STATUS_WAITING) + /* Not doing any task or callback, held on waiting for some data */ + starpu_timespec_accumulate(&worker_info->waiting_time, delta); + else if (status & STATUS_SLEEPING) + /* Not even waiting for some data, but we don't have any task to do anyway */ + starpu_timespec_accumulate(&worker_info->sleeping_time, delta); + else if (status & STATUS_SCHEDULING) + /* We do have tasks to do, but the scheduler takes time */ + starpu_timespec_accumulate(&worker_info->scheduling_time, delta); + /* And otherwise it's just uncategorized overhead */ +} + +void _starpu_worker_start_state(int workerid, enum _starpu_worker_status_index index, struct timespec *start_time) +{ + if (starpu_profiling_status_get()) + { + struct timespec state_start_time; + + if (!start_time) + { + _starpu_clock_gettime(&state_start_time); + start_time = &state_start_time; + } + + struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); + + STARPU_PTHREAD_MUTEX_LOCK(&worker->profiling_info_mutex); + STARPU_ASSERT(worker->profiling_registered_start[index] == 0); + worker->profiling_registered_start[index] = 1; + worker->profiling_registered_start_date[index] = *start_time; + + if (worker->profiling_status != STATUS_UNKNOWN) + { + struct starpu_profiling_worker_info *worker_info = &worker->profiling_info; + struct timespec state_time; + starpu_timespec_sub(start_time, &worker->profiling_status_start_date, &state_time); + _starpu_worker_time_split_accumulate(worker_info, worker->profiling_status, &state_time); + } + worker->profiling_status = _starpu_worker_get_status(workerid) | (1<profiling_status_start_date = *start_time; + + STARPU_PTHREAD_MUTEX_UNLOCK(&worker->profiling_info_mutex); + } +} + +static void _starpu_worker_time_accumulate(struct starpu_profiling_worker_info *worker_info, enum _starpu_worker_status_index index, struct timespec *delta) +{ + switch (index) + { + case STATUS_INDEX_EXECUTING: + starpu_timespec_accumulate(&worker_info->all_executing_time, delta); + break; + case STATUS_INDEX_CALLBACK: + starpu_timespec_accumulate(&worker_info->all_callback_time, delta); + break; + case STATUS_INDEX_WAITING: + starpu_timespec_accumulate(&worker_info->all_waiting_time, delta); + break; + case STATUS_INDEX_SLEEPING: + starpu_timespec_accumulate(&worker_info->all_sleeping_time, delta); + break; + case STATUS_INDEX_SCHEDULING: + starpu_timespec_accumulate(&worker_info->all_scheduling_time, delta); + break; + case STATUS_INDEX_INITIALIZING: + /* no profiling info for init */ + break; + case STATUS_INDEX_NR: + STARPU_ASSERT(0); + } +} + +void _starpu_worker_stop_state(int workerid, enum _starpu_worker_status_index index, struct timespec *stop_time) +{ + if (starpu_profiling_status_get()) + { + struct timespec *state_start, state_end_time; + struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); + struct starpu_profiling_worker_info *worker_info = &worker->profiling_info; + + if (!stop_time) + { + _starpu_clock_gettime(&state_end_time); + stop_time = &state_end_time; + } + + STARPU_PTHREAD_MUTEX_LOCK(&worker->profiling_info_mutex); + + STARPU_ASSERT (worker->profiling_registered_start[index] == 1); + { + state_start = &worker->profiling_registered_start_date[index]; + + /* Perhaps that profiling was enabled while the worker was + * already blocked, so we don't measure (end - start), but + * (end - max(start,worker_start)) where worker_start is the + * date of the previous profiling info reset on the worker */ + struct timespec *worker_start = &worker_info->start_time; + if (starpu_timespec_cmp(state_start, worker_start, <)) + { + /* state_start < worker_start */ + state_start = worker_start; + } + + struct timespec state_time; + starpu_timespec_sub(stop_time, state_start, &state_time); + + _starpu_worker_time_accumulate(worker_info, index, &state_time); + + worker->profiling_registered_start[index] = 0; + } + + if (worker->profiling_status != STATUS_UNKNOWN) + { + struct timespec state_time; + starpu_timespec_sub(stop_time, &worker->profiling_status_start_date, &state_time); + _starpu_worker_time_split_accumulate(worker_info, worker->profiling_status, &state_time); + } + worker->profiling_status = _starpu_worker_get_status(workerid) & ~(1<profiling_status_start_date = *stop_time; + + STARPU_PTHREAD_MUTEX_UNLOCK(&worker->profiling_info_mutex); + + } +} + +void _starpu_worker_update_profiling_info_executing(int workerid, int executed_tasks, uint64_t used_cycles, uint64_t stall_cycles, double energy_consumed, double flops) +{ + struct starpu_profiling_worker_info *worker_info = &_starpu_get_worker_struct(workerid)->profiling_info; + + if (starpu_profiling_status_get()) + { + STARPU_PTHREAD_MUTEX_LOCK(&_starpu_get_worker_struct(workerid)->profiling_info_mutex); + + worker_info->used_cycles += used_cycles; + worker_info->stall_cycles += stall_cycles; + worker_info->energy_consumed += energy_consumed; + worker_info->executed_tasks += executed_tasks; + worker_info->flops += flops; + + STARPU_PTHREAD_MUTEX_UNLOCK(&_starpu_get_worker_struct(workerid)->profiling_info_mutex); + } + else /* Not thread safe, shouldn't be too much a problem */ + worker_info->executed_tasks += executed_tasks; +} + +int starpu_profiling_worker_get_info(int workerid, struct starpu_profiling_worker_info *info) +{ + struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); + struct starpu_profiling_worker_info *worker_info = &worker->profiling_info; + + if (!starpu_profiling_status_get()) + { + /* Not thread safe, shouldn't be too much a problem */ + info->executed_tasks = worker_info->executed_tasks; + } + + STARPU_PTHREAD_MUTEX_LOCK_SCHED(&_starpu_get_worker_struct(workerid)->sched_mutex); + STARPU_PTHREAD_MUTEX_LOCK(&_starpu_get_worker_struct(workerid)->profiling_info_mutex); + + if (info) + { + /* The total time is computed in a lazy fashion */ + struct timespec now; + _starpu_clock_gettime(&now); + + enum _starpu_worker_status_index i; + + for (i = 0; i< STATUS_INDEX_NR; i++) + { + /* In case some worker is currently doing something, we take into + * account the time spent since it registered. */ + if (worker->profiling_registered_start[i]) + { + struct timespec delta; + starpu_timespec_sub(&now, &worker->profiling_registered_start_date[i], &delta); + _starpu_worker_time_accumulate(worker_info, i, &delta); + } + } + if (worker->profiling_status != STATUS_UNKNOWN) + { + struct timespec delta; + starpu_timespec_sub(&now, &worker->profiling_status_start_date, &delta); + _starpu_worker_time_split_accumulate(worker_info, worker->profiling_status, &delta); + } + + /* total_time = now - start_time */ + starpu_timespec_sub(&now, &worker_info->start_time, + &worker_info->total_time); + + *info = *worker_info; + } + + _starpu_worker_reset_profiling_info_with_lock(workerid); + + STARPU_PTHREAD_MUTEX_UNLOCK(&_starpu_get_worker_struct(workerid)->profiling_info_mutex); + STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&_starpu_get_worker_struct(workerid)->sched_mutex); + + return 0; +} + +/* When did the task reach the scheduler ? */ +void _starpu_profiling_set_task_push_start_time(struct starpu_task *task) +{ + if (!starpu_profiling_status_get()) + return; + + struct starpu_profiling_task_info *profiling_info; + profiling_info = task->profiling_info; + + if (profiling_info) + _starpu_clock_gettime(&profiling_info->push_start_time); +} + +void _starpu_profiling_set_task_push_end_time(struct starpu_task *task) +{ + if (!starpu_profiling_status_get()) + return; + + struct starpu_profiling_task_info *profiling_info; + profiling_info = task->profiling_info; + + if (profiling_info) + _starpu_clock_gettime(&profiling_info->push_end_time); +} + +/* + * Bus profiling + */ + +void _starpu_initialize_busid_matrix(void) +{ + int i, j; + for (j = 0; j < STARPU_MAXNODES; j++) + for (i = 0; i < STARPU_MAXNODES; i++) + busid_matrix[i][j] = -1; + + busid_cnt = 0; +} + +static void _starpu_bus_reset_profiling_info(struct starpu_profiling_bus_info *bus_info) +{ + _starpu_clock_gettime(&bus_info->start_time); + bus_info->transferred_bytes = 0; + bus_info->transfer_count = 0; +} + +int _starpu_register_bus(int src_node, int dst_node) +{ + if (starpu_bus_get_id(src_node, dst_node) != -1) + return -EBUSY; + + int busid = STARPU_ATOMIC_ADD(&busid_cnt, 1) - 1; + + busid_matrix[src_node][dst_node] = busid; + + busid_to_node_pair[busid].src = src_node; + busid_to_node_pair[busid].dst = dst_node; + busid_to_node_pair[busid].bus_info = &bus_profiling_info[src_node][dst_node]; + + _starpu_bus_reset_profiling_info(&bus_profiling_info[src_node][dst_node]); + + return busid; +} + +int starpu_bus_get_count(void) +{ + return busid_cnt; +} + +int starpu_bus_get_id(int src, int dst) +{ + return busid_matrix[src][dst]; +} + +int starpu_bus_get_src(int busid) +{ + return busid_to_node_pair[busid].src; +} + +int starpu_bus_get_dst(int busid) +{ + return busid_to_node_pair[busid].dst; +} + +void starpu_bus_set_direct(int busid, int direct) +{ + bus_direct[busid] = direct; +} + +int starpu_bus_get_direct(int busid) +{ + return bus_direct[busid]; +} + +void starpu_bus_set_ngpus(int busid, int ngpus) +{ + bus_ngpus[busid] = ngpus; +} + +int starpu_bus_get_ngpus(int busid) +{ + int ngpus = bus_ngpus[busid]; + if (!ngpus) + { + struct _starpu_machine_topology *topology = &_starpu_get_machine_config()->topology; + /* Unknown number of GPUs, assume it's shared by all GPUs */ + ngpus = topology->ndevices[STARPU_CUDA_WORKER]+topology->ndevices[STARPU_OPENCL_WORKER]; + } + return ngpus; +} + +int starpu_bus_get_profiling_info(int busid, struct starpu_profiling_bus_info *bus_info) +{ + int src_node = starpu_bus_get_src(busid); + int dst_node = starpu_bus_get_dst(busid); + + /* XXX protect all this method with a mutex */ + if (bus_info) + { + struct timespec now; + _starpu_clock_gettime(&now); + + /* total_time = now - start_time */ + starpu_timespec_sub(&now, &bus_profiling_info[src_node][dst_node].start_time, + &bus_profiling_info[src_node][dst_node].total_time); + + *bus_info = bus_profiling_info[src_node][dst_node]; + } + + _starpu_bus_reset_profiling_info(&bus_profiling_info[src_node][dst_node]); + + return 0; +} + +void _starpu_bus_update_profiling_info(int src_node, int dst_node, size_t size) +{ + bus_profiling_info[src_node][dst_node].transferred_bytes += size; + bus_profiling_info[src_node][dst_node].transfer_count++; +// fprintf(stderr, "PROFILE %d -> %d : %d (cnt %d)\n", src_node, dst_node, size, bus_profiling_info[src_node][dst_node].transfer_count); +} + +#undef starpu_profiling_status_get +int starpu_profiling_status_get(void) +{ + int ret; + ANNOTATE_HAPPENS_AFTER(&_starpu_profiling); + ret = _starpu_profiling; + ANNOTATE_HAPPENS_BEFORE(&_starpu_profiling); + return ret; +} diff --git a/src/profiling/profiling.h b/src/profiling/profiling.h new file mode 100644 index 0000000..3c9388b --- /dev/null +++ b/src/profiling/profiling.h @@ -0,0 +1,89 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2020-2020 Federal University of Rio Grande do Sul (UFRGS) + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __PROFILING_H__ +#define __PROFILING_H__ + +/** @file */ + +#include +#include +#include +#include + +#pragma GCC visibility push(hidden) + +enum _starpu_worker_status_index; + +extern int _starpu_codelet_profiling; +extern int _starpu_energy_profiling; + +/** Create a task profiling info structure (with the proper time stamps) in case + * profiling is enabled. */ +struct starpu_profiling_task_info *_starpu_allocate_profiling_info_if_needed(struct starpu_task *task); + +/** Update the per-worker profiling info after a task (or more) was executed. + * This tells StarPU how much time was spent doing computation. */ +void _starpu_worker_update_profiling_info_executing(int workerid, int executed_tasks, uint64_t used_cycles, uint64_t stall_cycles, double consumed_energy, double flops); + +/** Record the date when the worker entered this state. This permits to measure + * how much time was spent in this state. + * start_time is optional, if unspecified, _starpu_worker_start_state will just + * take the current time. */ +void _starpu_worker_start_state(int workerid, enum _starpu_worker_status_index index, struct timespec *start_time); + +/* Record the date when the worker left this state. This permits to measure + * how much time was spent in this state. + * stop_time is optional, if unspecified, _starpu_worker_start_state will just + * take the current time. */ +void _starpu_worker_stop_state(int workerid, enum _starpu_worker_status_index index, struct timespec *stop_time); + +/** When StarPU is initialized, a matrix describing all the bus between memory + * nodes is created: it indicates whether there is a physical link between two + * memory nodes or not. This matrix should contain the identifier of the bus + * between two nodes or -1 in case there is no link. */ +void _starpu_initialize_busid_matrix(void); + +/** Tell StarPU that there exists a link between the two memory nodes. This + * function returns the identifier associated to the bus which can be used to + * retrieve profiling information about the bus activity later on. */ +int _starpu_register_bus(int src_node, int dst_node); + +/** Tell StarPU that "size" bytes were transferred between the two specified + * memory nodes. */ +void _starpu_bus_update_profiling_info(int src_node, int dst_node, size_t size); + +void _starpu_profiling_set_task_push_start_time(struct starpu_task *task); +void _starpu_profiling_set_task_push_end_time(struct starpu_task *task); + +#ifdef STARPU_PAPI +/** Functions for papi task profilling */ +void _starpu_profiling_papi_task_start_counters(struct starpu_task *task); +void _starpu_profiling_papi_task_stop_counters(struct starpu_task *task); +#endif + +/** This function needs to be called before other starpu_profile_* functions */ +void _starpu_profiling_init(void); + +/** This function starts profiling if the STARPU_PROFILING environment variable was set */ +void _starpu_profiling_start(void); + +void _starpu_profiling_terminate(void); + +#pragma GCC visibility pop + +#endif // __PROFILING_H__ diff --git a/src/profiling/profiling_helpers.c b/src/profiling/profiling_helpers.c new file mode 100644 index 0000000..26e658e --- /dev/null +++ b/src/profiling/profiling_helpers.c @@ -0,0 +1,219 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include + +static double convert_to_GB(float d) +{ + const double divisor = 1024; + return d = (((d / divisor) / divisor) / divisor); +} + +void _starpu_profiling_bus_helper_display_summary(FILE *stream) +{ + int long long sum_transferred = 0; + + fprintf(stream, "\n#---------------------\n"); + fprintf(stream, "Data transfer stats:\n"); + + int busid; + int bus_cnt = starpu_bus_get_count(); + for (busid = 0; busid < bus_cnt; busid++) + { + char src_name[128], dst_name[128]; + int src, dst; + + src = starpu_bus_get_src(busid); + dst = starpu_bus_get_dst(busid); + + struct starpu_profiling_bus_info bus_info; + starpu_bus_get_profiling_info(busid, &bus_info); + + int long long transferred = bus_info.transferred_bytes; + int long long transfer_cnt = bus_info.transfer_count; + double elapsed_time = starpu_timing_timespec_to_us(&bus_info.total_time) / 1e6; + + double d = convert_to_GB(transferred); + + starpu_memory_node_get_name(src, src_name, sizeof(src_name)); + starpu_memory_node_get_name(dst, dst_name, sizeof(dst_name)); + + fprintf(stream, "\t%s -> %s", src_name, dst_name); + fprintf(stream, "\t%.4lf %s", d, "GB"); + fprintf(stream, "\t%.4lf %s/s", (d * 1024) / elapsed_time, "MB"); + fprintf(stream, "\t(transfers : %lld - avg %.4lf %s)\n", transfer_cnt, (d * 1024) / transfer_cnt, "MB"); + + sum_transferred += transferred; + } + + double d = convert_to_GB(sum_transferred); + + fprintf(stream, "Total transfers: %.4lf %s\n", d, "GB"); + fprintf(stream, "#---------------------\n"); +} + +void starpu_profiling_bus_helper_display_summary(void) +{ + const char *stats; + if (!((stats = starpu_getenv("STARPU_BUS_STATS")) && atoi(stats))) return; + const char *filename = starpu_getenv("STARPU_BUS_STATS_FILE"); + if (filename==NULL) + _starpu_profiling_bus_helper_display_summary(stderr); + else + { + FILE *sfile = fopen(filename, "w+"); + STARPU_ASSERT_MSG(sfile, "Could not open file %s for displaying bus stats (%s). You can specify another file destination with the STARPU_BUS_STATS_FILE environment variable", filename, strerror(errno)); + _starpu_profiling_bus_helper_display_summary(sfile); + fclose(sfile); + } + +} + +void _starpu_profiling_worker_helper_display_summary(FILE *stream) +{ + double sum_consumed = 0.; + int profiling = starpu_profiling_status_get(); + double overall_time = 0; + int workerid; + int worker_cnt = starpu_worker_get_count(); + + double tot_total_time = 0.0; + double tot_executing_time = 0.0; + double tot_callback_time = 0.0; + double tot_waiting_time = 0.0; + double tot_sleeping_time = 0.0; + double tot_scheduling_time = 0.0; + + fprintf(stream, "\n#---------------------\n"); + fprintf(stream, "Worker stats:\n"); + + for (workerid = 0; workerid < worker_cnt; workerid++) + { + struct starpu_profiling_worker_info info; + int ret = starpu_profiling_worker_get_info(workerid, &info); + char name[64]; + STARPU_ASSERT(!ret); + + starpu_worker_get_name(workerid, name, sizeof(name)); + + fprintf(stream, "%-32s\n", name); + fprintf(stream, "\t%d task(s)\n", info.executed_tasks); + + if (profiling) + { + double total_time = starpu_timing_timespec_to_us(&info.total_time) / 1000.; + double executing_time = starpu_timing_timespec_to_us(&info.executing_time) / 1000.; + double callback_time = starpu_timing_timespec_to_us(&info.callback_time) / 1000.; + double waiting_time = starpu_timing_timespec_to_us(&info.waiting_time) / 1000.; + double sleeping_time = starpu_timing_timespec_to_us(&info.sleeping_time) / 1000.; + double scheduling_time = starpu_timing_timespec_to_us(&info.scheduling_time) / 1000.; + double overhead_time = total_time - executing_time - callback_time - waiting_time - sleeping_time - scheduling_time; + + tot_total_time += total_time; + tot_executing_time += executing_time; + tot_callback_time += callback_time; + tot_waiting_time += waiting_time; + tot_sleeping_time += sleeping_time; + tot_scheduling_time += scheduling_time; + + double all_executing_time = starpu_timing_timespec_to_us(&info.all_executing_time) / 1000.; + double all_callback_time = starpu_timing_timespec_to_us(&info.all_callback_time) / 1000.; + double all_waiting_time = starpu_timing_timespec_to_us(&info.all_waiting_time) / 1000.; + double all_sleeping_time = starpu_timing_timespec_to_us(&info.all_sleeping_time) / 1000.; + double all_scheduling_time = starpu_timing_timespec_to_us(&info.all_scheduling_time) / 1000.; + + if (total_time > overall_time) + overall_time = total_time; + + fprintf(stream, "\ttime split: total %.2lf ms = " + "executing: %.2lf ms + " + "callback: %.2lf ms + " + "waiting: %.2lf ms + " + "sleeping: %.2lf ms + " + "scheduling: %.2lf ms + " + "overhead %.2lf ms\n" + "\tall time: " + "executing: %.2lf ms " + "callback: %.2lf ms " + "waiting: %.2lf ms " + "sleeping: %.2lf ms " + "scheduling: %.2lf ms\n", + total_time, executing_time, callback_time, waiting_time, sleeping_time, scheduling_time, overhead_time, + all_executing_time, all_callback_time, all_waiting_time, all_sleeping_time, all_scheduling_time); + if (info.used_cycles || info.stall_cycles) + fprintf(stream, "\t%llu Mcy %llu Mcy stall\n", (unsigned long long)info.used_cycles/1000000, (unsigned long long)info.stall_cycles/1000000); + if (info.energy_consumed) + fprintf(stream, "\t%f J consumed\n", info.energy_consumed); + if (info.flops) + fprintf(stream, "\t%f GFlop/s\n\n", info.flops / total_time / 1000000); + } + + sum_consumed += info.energy_consumed; + } + + if (profiling) + { + double tot_overhead_time = tot_total_time - tot_executing_time - tot_callback_time - tot_waiting_time - tot_sleeping_time - tot_scheduling_time; + fprintf(stream, "\nGlobal time split: total %.2lf ms = " + "executing: %.2lf ms (%.2lf%%) + " + "callback: %.2lf ms (%.2lf%%) + " + "waiting: %.2lf ms (%.2lf%%) + " + "sleeping: %.2lf ms (%.2lf%%) + " + "scheduling: %.2lf ms (%.2lf%%) + " + "overhead %.2lf ms (%.2lf%%)\n", + tot_total_time, + tot_executing_time, tot_executing_time * 100 / tot_total_time, + tot_callback_time, tot_callback_time * 100 / tot_total_time, + tot_waiting_time, tot_waiting_time * 100 / tot_total_time, + tot_sleeping_time, tot_sleeping_time * 100 / tot_total_time, + tot_scheduling_time, tot_scheduling_time * 100 / tot_total_time, + tot_overhead_time, tot_overhead_time * 100 / tot_total_time); + } + + if (profiling) + { + const char *strval_idle_power = starpu_getenv("STARPU_IDLE_POWER"); + if (strval_idle_power) + { + double idle_power = atof(strval_idle_power); /* Watt */ + double idle_energy = idle_power * overall_time / 1000.; /* J */ + + fprintf(stream, "Idle energy: %.2lf J\n", idle_energy); + fprintf(stream, "Total energy: %.2lf J\n", + sum_consumed + idle_energy); + } + } + fprintf(stream, "#---------------------\n"); +} + +void starpu_profiling_worker_helper_display_summary(void) +{ + const char *stats; + if (!((stats = starpu_getenv("STARPU_WORKER_STATS")) && atoi(stats))) return; + const char *filename = starpu_getenv("STARPU_WORKER_STATS_FILE"); + if (filename==NULL) + _starpu_profiling_worker_helper_display_summary(stderr); + else + { + FILE *sfile = fopen(filename, "w+"); + STARPU_ASSERT_MSG(sfile, "Could not open file %s for displaying worker stats (%s). You can specify another file destination with the STARPU_WORKER_STATS_FILE environment variable", filename, strerror(errno)); + _starpu_profiling_worker_helper_display_summary(sfile); + fclose(sfile); + } +} diff --git a/src/sched_policies/component_best_implementation.c b/src/sched_policies/component_best_implementation.c new file mode 100644 index 0000000..a03afe6 --- /dev/null +++ b/src/sched_policies/component_best_implementation.c @@ -0,0 +1,126 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Simon Archipoff + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +#include +#include +#ifdef BUILDING_STARPU +#include +#endif + +/* return true if workerid can execute task, and fill task->predicted and task->predicted_transfer + * according to best implementation predictions + */ +static int find_best_impl(unsigned sched_ctx_id, struct starpu_task * task, int workerid) +{ + double len = DBL_MAX; + int best_impl = -1; + unsigned impl; + if (!task->cl->model) + { + /* No perfmodel, first available will be fine */ + int can_execute = starpu_worker_can_execute_task_first_impl(workerid, task, &impl); + STARPU_ASSERT(can_execute); + best_impl = impl; + len = 0.0; + } + else + { + for(impl = 0; impl < STARPU_MAXIMPLEMENTATIONS; impl++) + { + if(starpu_worker_can_execute_task(workerid, task, impl)) + { + double d = starpu_task_worker_expected_length(task, workerid, sched_ctx_id, impl); + if(isnan(d)) + { + best_impl = impl; + len = 0.0; + break; + } + if(d < len) + { + len = d; + best_impl = impl; + } + } + } + } + if(best_impl == -1) + return 0; + + task->predicted = len; + task->predicted_transfer = starpu_task_expected_data_transfer_time_for(task, workerid); + starpu_task_set_implementation(task, best_impl); + return 1; +} + + +/* set implementation, task->predicted and task->predicted_transfer with the first worker of workers that can execute that task + * or have to be calibrated + */ +static void select_best_implementation_and_set_preds(unsigned sched_ctx_id, struct starpu_bitmap * workers, struct starpu_task * task) +{ + int workerid; + for(workerid = starpu_bitmap_first(workers); + -1 != workerid; + workerid = starpu_bitmap_next(workers, workerid)) + if(find_best_impl(sched_ctx_id, task, workerid)) + break; +} + +static int best_implementation_push_task(struct starpu_sched_component * component, struct starpu_task * task) +{ + STARPU_ASSERT(component->nchildren == 1); + select_best_implementation_and_set_preds(component->tree->sched_ctx_id, &component->workers_in_ctx, task); + return starpu_sched_component_push_task(component,component->children[0],task); +} + +int starpu_sched_component_is_best_implementation(struct starpu_sched_component * component) +{ + return component->push_task == best_implementation_push_task; +} + +static struct starpu_task * best_implementation_pull_task(struct starpu_sched_component * component, struct starpu_sched_component * from STARPU_ATTRIBUTE_UNUSED) +{ + struct starpu_task * task = NULL; + unsigned i; + for(i=0; i < component->nparents; i++) + { + if(component->parents[i] == NULL) + continue; + else + { + task = starpu_sched_component_pull_task(component->parents[i], component); + if(task) + break; + } + } + if(task) + /* this worker can execute this task as it was returned by a pop*/ + (void)find_best_impl(component->tree->sched_ctx_id, task, starpu_bitmap_first(&component->workers_in_ctx)); + return task; +} + +struct starpu_sched_component * starpu_sched_component_best_implementation_create(struct starpu_sched_tree *tree, void *arg) +{ + (void)arg; + struct starpu_sched_component * component = starpu_sched_component_create(tree, "best_impl"); + component->push_task = best_implementation_push_task; + component->pull_task = best_implementation_pull_task; + return component; +} diff --git a/src/sched_policies/component_composed.c b/src/sched_policies/component_composed.c new file mode 100644 index 0000000..9775d2e --- /dev/null +++ b/src/sched_policies/component_composed.c @@ -0,0 +1,237 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Simon Archipoff + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include + + +/* a composed component is parametred by a list of pair + * (create_component_function(arg), arg) + */ +LIST_TYPE(fun_create_component, + struct starpu_sched_component *(*create_component)(struct starpu_sched_tree *tree, void * arg); + void * arg; +); + +struct starpu_sched_component_composed_recipe +{ + struct fun_create_component_list list; +}; + +struct starpu_sched_component_composed_recipe * starpu_sched_component_composed_recipe_create(void) +{ + struct starpu_sched_component_composed_recipe *recipe; + _STARPU_MALLOC(recipe, sizeof(*recipe)); + fun_create_component_list_init(&recipe->list); + return recipe; +} + +void starpu_sched_component_composed_recipe_add(struct starpu_sched_component_composed_recipe * recipe, + struct starpu_sched_component *(*create_component)(struct starpu_sched_tree *tree, void * arg), + void * arg) +{ + struct fun_create_component * e = fun_create_component_new(); + e->create_component = create_component; + e->arg = arg; + fun_create_component_list_push_back(&recipe->list, e); +} + +struct starpu_sched_component_composed_recipe *starpu_sched_component_composed_recipe_create_singleton(struct starpu_sched_component *(*create_component)(struct starpu_sched_tree *tree, void * arg), + void * arg) +{ + struct starpu_sched_component_composed_recipe * r = starpu_sched_component_composed_recipe_create(); + starpu_sched_component_composed_recipe_add(r, create_component, arg); + return r; +} + +void starpu_sched_component_composed_recipe_destroy(struct starpu_sched_component_composed_recipe * recipe) +{ + if(!recipe) + return; + while(!fun_create_component_list_empty(&recipe->list)) + fun_create_component_delete(fun_create_component_list_pop_back(&recipe->list)); + free(recipe); +} + +struct composed_component +{ + struct starpu_sched_component *top,*bottom; +}; + +/* this function actually build the composed component data by changing the list of + * (component_create_fun, arg_create_fun) into a tree where all components have 1 children + */ +static struct composed_component create_composed_component(struct starpu_sched_tree *tree, struct starpu_sched_component_composed_recipe * recipe +#ifdef STARPU_HAVE_HWLOC + ,hwloc_obj_t obj +#endif + ) +{ + struct composed_component c; + STARPU_ASSERT(recipe); + + struct fun_create_component_list * list = &recipe->list; + struct fun_create_component * i = fun_create_component_list_begin(list); + STARPU_ASSERT(i); + STARPU_ASSERT(i->create_component); + c.top = c.bottom = i->create_component(tree, i->arg); +#ifdef STARPU_HAVE_HWLOC + c.top->obj = obj; +#endif + for(i = fun_create_component_list_next(i); + i != fun_create_component_list_end(list); + i = fun_create_component_list_next(i)) + { + STARPU_ASSERT(i->create_component); + struct starpu_sched_component * component = i->create_component(tree, i->arg); +#ifdef STARPU_HAVE_HWLOC + component->obj = obj; +#endif + c.bottom->add_child(c.bottom, component); + + /* we want to be able to traverse scheduler bottom up for all sched ctxs + * when a worker call pop() + */ + unsigned j; + for(j = 0; j < STARPU_NMAX_SCHED_CTXS; j++) + component->add_parent(component, c.bottom); + c.bottom = component; + } + STARPU_ASSERT(!starpu_sched_component_is_worker(c.bottom)); + return c; +} + +static int composed_component_push_task(struct starpu_sched_component * component, struct starpu_task * task) +{ + struct composed_component *c = component->data; + return starpu_sched_component_push_task(component,c->top,task); +} + +static struct starpu_task * composed_component_pull_task(struct starpu_sched_component *component, struct starpu_sched_component * to STARPU_ATTRIBUTE_UNUSED) +{ + struct composed_component *c = component->data; + struct starpu_task *task; + + task = starpu_sched_component_pull_task(c->bottom,component); + if(task) + return task; + + unsigned i; + for(i=0; i < component->nparents; i++) + { + if(component->parents[i] == NULL) + continue; + else + { + task = starpu_sched_component_pull_task(component->parents[i],component); + if(task) + break; + } + } + return task; +} + +static double composed_component_estimated_load(struct starpu_sched_component * component) +{ + struct composed_component * c = component->data; + return c->top->estimated_load(c->top); +} + +static void composed_component_add_child(struct starpu_sched_component * component, struct starpu_sched_component * child) +{ + struct composed_component * c = component->data; + component->add_child(component, child); + c->bottom->add_child(c->bottom, child); +} + +static void composed_component_remove_child(struct starpu_sched_component * component, struct starpu_sched_component * child) +{ + struct composed_component * c = component->data; + component->remove_child(component, child); + c->bottom->remove_child(c->bottom, child); +} + +static void composed_component_notify_change_workers(struct starpu_sched_component * component) +{ + struct composed_component * c = component->data; + struct starpu_bitmap * workers = &component->workers; + struct starpu_bitmap * workers_in_ctx = &component->workers_in_ctx; + struct starpu_sched_component * n; + for(n = c->top; ;n = n->children[0]) + { + starpu_bitmap_unset_all(&n->workers); + starpu_bitmap_or(&n->workers, workers); + + starpu_bitmap_unset_all(&n->workers_in_ctx); + starpu_bitmap_or(&n->workers_in_ctx, workers_in_ctx); + + n->properties = component->properties; + if(n == c->bottom) + break; + } +} + +static void composed_component_deinit_data(struct starpu_sched_component * _component) +{ + struct composed_component *c = _component->data; + c->bottom->children = NULL; + c->bottom->nchildren = 0; + struct starpu_sched_component * component; + struct starpu_sched_component * next = c->top; + do + { + component = next; + next = component->children ? component->children[0] : NULL; + starpu_sched_component_destroy(component); + } + while(next); + free(c); + _component->data = NULL; +} + +struct starpu_sched_component * starpu_sched_component_composed_component_create(struct starpu_sched_tree *tree, + struct starpu_sched_component_composed_recipe * recipe) +{ + STARPU_ASSERT(!fun_create_component_list_empty(&recipe->list)); + struct fun_create_component_list * l = &recipe->list; + if(l->_head == l->_tail) + return l->_head->create_component(tree, l->_head->arg); + + struct starpu_sched_component * component = starpu_sched_component_create(tree, "composed"); + struct composed_component *c; + _STARPU_MALLOC(c, sizeof(struct composed_component)); + *c = create_composed_component(tree, recipe +#ifdef STARPU_HAVE_HWLOC + ,component->obj +#endif +); + c->bottom->nchildren = component->nchildren; + c->bottom->children = component->children; + c->bottom->nparents = component->nparents; + c->bottom->parents = component->parents; + + component->data = c; + component->deinit_data = composed_component_deinit_data; + component->push_task = composed_component_push_task; + component->pull_task = composed_component_pull_task; + component->estimated_load = composed_component_estimated_load; + component->estimated_end = starpu_sched_component_estimated_end_min; + component->add_child = composed_component_add_child; + component->remove_child = composed_component_remove_child; + component->notify_change_workers = composed_component_notify_change_workers; + return component; +} diff --git a/src/sched_policies/component_eager.c b/src/sched_policies/component_eager.c new file mode 100644 index 0000000..aedba3e --- /dev/null +++ b/src/sched_policies/component_eager.c @@ -0,0 +1,168 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#ifdef BUILDING_STARPU +#include +#endif + +struct _starpu_eager_data +{ + struct starpu_sched_component *target; + starpu_pthread_mutex_t scheduling_mutex; + int ntasks; +}; + +static int eager_push_task(struct starpu_sched_component * component, struct starpu_task * task) +{ + int ret; + STARPU_ASSERT(component && task && starpu_sched_component_is_eager(component)); + STARPU_ASSERT(starpu_sched_component_can_execute_task(component,task)); + struct _starpu_eager_data *d = component->data; + struct starpu_sched_component *target; + + /* FIX atomicity */ + if (d->ntasks == 0) + /* We have already pushed a task down */ + return 1; + if (d->ntasks > 0) + d->ntasks--; + + if ((target = d->target)) + { + /* target told us we could push to it, try to */ + int idworker; + for(idworker = starpu_bitmap_first(&target->workers); + idworker != -1; + idworker = starpu_bitmap_next(&target->workers, idworker)) + { + int nimpl; + for(nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++) + { + if(starpu_worker_can_execute_task(idworker,task,nimpl) + || starpu_combined_worker_can_execute_task(idworker, task, nimpl)) + { + ret = starpu_sched_component_push_task(component,target,task); + if (!ret) + return 0; + } + } + } + } + + /* FIXME: should rather just loop over children before looping over its workers */ + int workerid; + for(workerid = starpu_bitmap_first(&component->workers_in_ctx); + workerid != -1; + workerid = starpu_bitmap_next(&component->workers_in_ctx, workerid)) + { + int nimpl; + for(nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++) + { + /* FIXME: use starpu_worker_can_execute_task_first_impl instead */ + if(starpu_worker_can_execute_task(workerid,task,nimpl) + || starpu_combined_worker_can_execute_task(workerid, task, nimpl)) + { + unsigned i; + for (i = 0; i < component->nchildren; i++) + { + int idworker; + for(idworker = starpu_bitmap_first(&component->children[i]->workers); + idworker != -1; + idworker = starpu_bitmap_next(&component->children[i]->workers, idworker)) + { + if (idworker == workerid) + { + if(starpu_sched_component_is_worker(component->children[i])) + { + if (component->children[i]->can_pull(component->children[i])) + return 1; + } + else + { + ret = starpu_sched_component_push_task(component,component->children[i],task); + if (!ret) + return 0; + } + } + } + } + } + } + } + return 1; +} + +/* Note: we can't use starpu_sched_component_pump_to ourself because if a fifo below + * refuses a task, we have no way to push it back to a fifo above. */ +static int eager_can_push(struct starpu_sched_component * component, struct starpu_sched_component * to) +{ + int success; + struct _starpu_eager_data *d = component->data; + STARPU_COMPONENT_MUTEX_LOCK(&d->scheduling_mutex); + /* Target flow of tasks to this child */ + d->target = to; + /* But make pump above push only one task */ + d->ntasks = 1; + success = starpu_sched_component_can_push(component, to); + d->target = NULL; + d->ntasks = -1; + STARPU_COMPONENT_MUTEX_UNLOCK(&d->scheduling_mutex); + return success; +} + +static struct starpu_task *eager_pull_task(struct starpu_sched_component * component, struct starpu_sched_component * to) +{ + /* We can't directly pull (in case the obtained task does not match + * the constraints of `to'), but we can try to push, and components + * below will cope with it */ + eager_can_push(component, to); + return NULL; +} + +static void eager_deinit_data(struct starpu_sched_component *component) +{ + STARPU_ASSERT(starpu_sched_component_is_eager(component)); + struct _starpu_eager_data *d = component->data; + STARPU_PTHREAD_MUTEX_DESTROY(&d->scheduling_mutex); + free(d); +} + +int starpu_sched_component_is_eager(struct starpu_sched_component * component) +{ + return component->push_task == eager_push_task; +} + +struct starpu_sched_component * starpu_sched_component_eager_create(struct starpu_sched_tree *tree, void *arg) +{ + (void)arg; + struct starpu_sched_component * component = starpu_sched_component_create(tree, "eager"); + struct _starpu_eager_data *data; + _STARPU_MALLOC(data, sizeof(*data)); + data->target = NULL; + data->ntasks = -1; + STARPU_PTHREAD_MUTEX_INIT(&data->scheduling_mutex, NULL); + + component->data = data; + component->push_task = eager_push_task; + component->pull_task = eager_pull_task; + component->can_push = eager_can_push; + component->can_pull = starpu_sched_component_can_pull_all; + component->deinit_data = eager_deinit_data; + + return component; +} diff --git a/src/sched_policies/component_eager_calibration.c b/src/sched_policies/component_eager_calibration.c new file mode 100644 index 0000000..cb92ea9 --- /dev/null +++ b/src/sched_policies/component_eager_calibration.c @@ -0,0 +1,81 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include + +int eager_calibration_push_task(struct starpu_sched_component * component, struct starpu_task * task) +{ + STARPU_ASSERT(component && task); + STARPU_ASSERT(starpu_sched_component_can_execute_task(component,task)); + + starpu_task_bundle_t bundle = task->bundle; + + int workerid; + for(workerid = starpu_bitmap_first(&component->workers_in_ctx); + workerid != -1; + workerid = starpu_bitmap_next(&component->workers_in_ctx, workerid)) + { + struct starpu_perfmodel_arch* archtype = starpu_worker_get_perf_archtype(workerid, component->tree->sched_ctx_id); + int nimpl; + for(nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++) + { + if(starpu_worker_can_execute_task(workerid,task,nimpl) + || starpu_combined_worker_can_execute_task(workerid, task, nimpl)) + { + double d; + + if(bundle) + d = starpu_task_bundle_expected_length(bundle, archtype, nimpl); + else + d = starpu_task_expected_length(task, archtype, nimpl); + + if(isnan(d)) + { + unsigned i; + for (i = 0; i < component->nchildren; i++) + { + int idworker; + for(idworker = starpu_bitmap_first(&component->children[i]->workers); + idworker != -1; + idworker = starpu_bitmap_next(&component->children[i]->workers, idworker)) + { + if (idworker == workerid) + { + return starpu_sched_component_push_task(component,component->children[i],task); + } + } + } + } + } + } + } + return 1; +} + +int starpu_sched_component_is_eager_calibration(struct starpu_sched_component * component) +{ + return component->push_task == eager_calibration_push_task; +} + +struct starpu_sched_component * starpu_sched_component_eager_calibration_create(struct starpu_sched_tree *tree, void *arg) +{ + (void)arg; + struct starpu_sched_component * component = starpu_sched_component_create(tree, "eager_calibration"); + component->push_task = eager_calibration_push_task; + + return component; +} diff --git a/src/sched_policies/component_eager_prio.c b/src/sched_policies/component_eager_prio.c new file mode 100644 index 0000000..f343b6a --- /dev/null +++ b/src/sched_policies/component_eager_prio.c @@ -0,0 +1,165 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Simon Archipoff + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* eager component which has its own priority queue. It can thus eagerly push + * tasks to lower queues without having to wait for being pulled from. */ + +#include +#include +#include +#include +#include +#include +#include + +struct _starpu_eager_prio_data +{ + struct starpu_st_prio_deque prio; + starpu_pthread_mutex_t mutex; +}; + +static int eager_prio_progress_one(struct starpu_sched_component *component) +{ + struct _starpu_eager_prio_data * data = component->data; + starpu_pthread_mutex_t * mutex = &data->mutex; + struct starpu_st_prio_deque * prio = &data->prio; + struct starpu_task *task; + int ret; + + STARPU_COMPONENT_MUTEX_LOCK(mutex); + task = starpu_st_prio_deque_pop_task(prio); + STARPU_COMPONENT_MUTEX_UNLOCK(mutex); + + if (!task) + { + return 1; + } + + /* FIXME: should rather just loop over children before looping over its workers */ + int workerid; + for(workerid = starpu_bitmap_first(&component->workers_in_ctx); + workerid != -1; + workerid = starpu_bitmap_next(&component->workers_in_ctx, workerid)) + { + int nimpl; + for(nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++) + { + /* FIXME: use starpu_worker_can_execute_task_first_impl instead */ + if(starpu_worker_can_execute_task(workerid,task,nimpl) + || starpu_combined_worker_can_execute_task(workerid, task, nimpl)) + { + unsigned i; + for (i = 0; i < component->nchildren; i++) + { + int idworker; + for(idworker = starpu_bitmap_first(&component->children[i]->workers); + idworker != -1; + idworker = starpu_bitmap_next(&component->children[i]->workers, idworker)) + { + if (idworker == workerid) + { + STARPU_ASSERT(!starpu_sched_component_is_worker(component->children[i])); + ret = starpu_sched_component_push_task(component,component->children[i],task); + if (!ret) + return 0; + } + } + } + } + } + } + + /* Could not push to child actually, push that one back too */ + STARPU_COMPONENT_MUTEX_LOCK(mutex); + starpu_st_prio_deque_push_front_task(prio, task); + STARPU_COMPONENT_MUTEX_UNLOCK(mutex); + return 1; +} + +/* Try to push some tasks below */ +static void eager_prio_progress(struct starpu_sched_component *component) +{ + STARPU_ASSERT(component && starpu_sched_component_is_eager_prio(component)); + while (!eager_prio_progress_one(component)) + ; +} + +static int eager_prio_push_task(struct starpu_sched_component * component, struct starpu_task * task) +{ + STARPU_ASSERT(component && task && starpu_sched_component_is_eager_prio(component)); + struct _starpu_eager_prio_data * data = component->data; + struct starpu_st_prio_deque * prio = &data->prio; + starpu_pthread_mutex_t * mutex = &data->mutex; + + STARPU_COMPONENT_MUTEX_LOCK(mutex); + starpu_st_prio_deque_push_back_task(prio,task); + STARPU_COMPONENT_MUTEX_UNLOCK(mutex); + + eager_prio_progress(component); + + return 0; +} + +static int eager_prio_can_push(struct starpu_sched_component *component, struct starpu_sched_component * to STARPU_ATTRIBUTE_UNUSED) +{ + eager_prio_progress(component); + int ret = 0; + unsigned j; + for(j=0; j < component->nparents; j++) + { + if(component->parents[j] == NULL) + continue; + else + { + ret = component->parents[j]->can_push(component->parents[j], component); + if(ret) + break; + } + } + return ret; +} + +static void eager_prio_component_deinit_data(struct starpu_sched_component * component) +{ + STARPU_ASSERT(starpu_sched_component_is_eager_prio(component)); + struct _starpu_eager_prio_data * d = component->data; + starpu_st_prio_deque_destroy(&d->prio); + free(d); +} + +int starpu_sched_component_is_eager_prio(struct starpu_sched_component * component) +{ + return component->push_task == eager_prio_push_task; +} + +struct starpu_sched_component * starpu_sched_component_eager_prio_create(struct starpu_sched_tree *tree, void *arg) +{ + (void)arg; + struct starpu_sched_component * component = starpu_sched_component_create(tree, "eager_prio"); + struct _starpu_eager_prio_data *data; + _STARPU_MALLOC(data, sizeof(*data)); + + starpu_st_prio_deque_init(&data->prio); + STARPU_PTHREAD_MUTEX_INIT(&data->mutex,NULL); + component->data = data; + + component->push_task = eager_prio_push_task; + component->can_push = eager_prio_can_push; + component->deinit_data = eager_prio_component_deinit_data; + + return component; +} diff --git a/src/sched_policies/component_fifo.c b/src/sched_policies/component_fifo.c new file mode 100644 index 0000000..7509bb3 --- /dev/null +++ b/src/sched_policies/component_fifo.c @@ -0,0 +1,303 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Simon Archipoff + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include + +#include +#include + +struct _starpu_fifo_data +{ + struct starpu_st_fifo_taskq fifo; + starpu_pthread_mutex_t mutex; + unsigned ntasks_threshold; + double exp_len_threshold; + int ready; + int exp; +}; + +static void fifo_component_deinit_data(struct starpu_sched_component * component) +{ + STARPU_ASSERT(component && component->data); + struct _starpu_fifo_data * f = component->data; + STARPU_PTHREAD_MUTEX_DESTROY(&f->mutex); + free(f); +} + +static double fifo_estimated_end(struct starpu_sched_component * component) +{ + STARPU_ASSERT(component && component->data); + struct _starpu_fifo_data * data = component->data; + struct starpu_st_fifo_taskq * queue = &data->fifo; + return starpu_sched_component_estimated_end_min_add(component, queue->exp_len); +} + +static double fifo_estimated_load(struct starpu_sched_component * component) +{ + STARPU_ASSERT(component && component->data); + STARPU_ASSERT(starpu_bitmap_cardinal(&component->workers_in_ctx) != 0); + struct _starpu_fifo_data * data = component->data; + struct starpu_st_fifo_taskq * queue = &data->fifo; + starpu_pthread_mutex_t * mutex = &data->mutex; + double relative_speedup = 0.0; + double load = starpu_sched_component_estimated_load(component); + if(STARPU_SCHED_COMPONENT_IS_HOMOGENEOUS(component)) + { + int first_worker = starpu_bitmap_first(&component->workers_in_ctx); + relative_speedup = starpu_worker_get_relative_speedup(starpu_worker_get_perf_archtype(first_worker, component->tree->sched_ctx_id)); + STARPU_COMPONENT_MUTEX_LOCK(mutex); + load += queue->ntasks / relative_speedup; + STARPU_COMPONENT_MUTEX_UNLOCK(mutex); + return load; + } + else + { + int i; + for(i = starpu_bitmap_first(&component->workers_in_ctx); + i != -1; + i = starpu_bitmap_next(&component->workers_in_ctx, i)) + relative_speedup += starpu_worker_get_relative_speedup(starpu_worker_get_perf_archtype(i, component->tree->sched_ctx_id)); + relative_speedup /= starpu_bitmap_cardinal(&component->workers_in_ctx); + STARPU_ASSERT(!_STARPU_IS_ZERO(relative_speedup)); + STARPU_COMPONENT_MUTEX_LOCK(mutex); + load += queue->ntasks / relative_speedup; + STARPU_COMPONENT_MUTEX_UNLOCK(mutex); + } + return load; +} + +static int fifo_push_local_task(struct starpu_sched_component * component, struct starpu_task * task, unsigned is_pushback) +{ + STARPU_ASSERT(component && component->data && task); + STARPU_ASSERT(starpu_sched_component_can_execute_task(component,task)); + struct _starpu_fifo_data * data = component->data; + struct starpu_st_fifo_taskq * queue = &data->fifo; + starpu_pthread_mutex_t * mutex = &data->mutex; + int ret = 0; + const double now = starpu_timing_now(); + STARPU_COMPONENT_MUTEX_LOCK(mutex); + + if (!is_pushback && data->ntasks_threshold != 0 && queue->ntasks >= data->ntasks_threshold) + { + ret = 1; + STARPU_COMPONENT_MUTEX_UNLOCK(mutex); + } + else if(data->exp) + { + double exp_len; + if(!isnan(task->predicted)) + exp_len = queue->exp_len + task->predicted; + else + exp_len = queue->exp_len; + + if (!is_pushback && data->exp_len_threshold != 0.0 && exp_len >= data->exp_len_threshold) + { + static int warned; + STARPU_HG_DISABLE_CHECKING(warned); + if(data->exp_len_threshold != 0.0 && task->predicted > data->exp_len_threshold && !warned) + { + _STARPU_DISP("Warning : a predicted task length (%lf) exceeds the expected length threshold (%lf) of a prio component queue, you should reconsider the value of this threshold. This message will not be printed again for further thresholds exceeding.\n",task->predicted,data->exp_len_threshold); + warned = 1; + } + ret = 1; + STARPU_COMPONENT_MUTEX_UNLOCK(mutex); + } + else + { + if(!isnan(task->predicted_transfer)) + { + double end = fifo_estimated_end(component); + double tfer_end = now + task->predicted_transfer; + /* FIXME: We don't have overlap when running CPU-CPU transfers */ + if(tfer_end < end) + task->predicted_transfer = 0.0; + else + task->predicted_transfer = tfer_end - end; + exp_len += task->predicted_transfer; + } + + if(!isnan(task->predicted)) + { + queue->exp_len = exp_len; + queue->exp_end = queue->exp_start + queue->exp_len; + } + STARPU_ASSERT(!isnan(queue->exp_end)); + STARPU_ASSERT(!isnan(queue->exp_len)); + STARPU_ASSERT(!isnan(queue->exp_start)); + } + } + + if(!ret) + { + if(is_pushback) + ret = starpu_st_fifo_taskq_push_back_task(queue,task); + else + { + ret = starpu_st_fifo_taskq_push_task(queue,task); + starpu_sched_component_prefetch_on_node(component, task); + } + STARPU_COMPONENT_MUTEX_UNLOCK(mutex); + if(!is_pushback) + component->can_pull(component); + } + + return ret; +} + +static int fifo_push_task(struct starpu_sched_component * component, struct starpu_task * task) +{ + return fifo_push_local_task(component, task, 0); +} + +static struct starpu_task * fifo_pull_task(struct starpu_sched_component * component, struct starpu_sched_component * to) +{ + STARPU_ASSERT(component && component->data); + struct _starpu_fifo_data * data = component->data; + struct starpu_st_fifo_taskq * queue = &data->fifo; + starpu_pthread_mutex_t * mutex = &data->mutex; + const double now = starpu_timing_now(); + + if (!STARPU_RUNNING_ON_VALGRIND && starpu_st_fifo_taskq_empty(queue)) + { + starpu_sched_component_send_can_push_to_parents(component); + return NULL; + } + + STARPU_COMPONENT_MUTEX_LOCK(mutex); + struct starpu_task * task; + if (data->ready && to->properties & STARPU_SCHED_COMPONENT_SINGLE_MEMORY_NODE) +#ifdef STARPU_DEVEL +#warning In eager schedulers, we never write that we want to fill the fifo before picking up a task. Eager is then ineffective since in practice the fifo will not fill +#endif + task = starpu_st_fifo_taskq_pop_first_ready_task(queue, starpu_bitmap_first(&to->workers_in_ctx), -1); + else if (to->properties & STARPU_SCHED_COMPONENT_HOMOGENEOUS) + task = starpu_st_fifo_taskq_pop_task(queue, starpu_bitmap_first(&to->workers_in_ctx)); + else + task = starpu_st_fifo_taskq_pop_task(queue, -1); + if(task && data->exp) + { + if(!isnan(task->predicted)) + { + const double exp_len = queue->exp_len - task->predicted; + queue->exp_start = now + task->predicted; + if (exp_len >= 0.0) + { + queue->exp_len = exp_len; + } + else + { + /* exp_len can become negative due to rounding errors */ + queue->exp_len = 0.0; + } + } + + STARPU_ASSERT_MSG(queue->exp_len>=0, "fifo->exp_len=%lf\n",queue->exp_len); + if(!isnan(task->predicted_transfer)) + { + if (queue->exp_len > task->predicted_transfer) + { + queue->exp_start += task->predicted_transfer; + queue->exp_len -= task->predicted_transfer; + } + else + { + queue->exp_start += queue->exp_len; + queue->exp_len = 0; + } + } + + queue->exp_end = queue->exp_start + queue->exp_len; + if(queue->ntasks == 0) + queue->exp_len = 0.0; + } + STARPU_ASSERT(!isnan(queue->exp_end)); + STARPU_ASSERT(!isnan(queue->exp_len)); + STARPU_ASSERT(!isnan(queue->exp_start)); + STARPU_COMPONENT_MUTEX_UNLOCK(mutex); + + // When a pop is called, a can_push is called for pushing tasks onto + // the empty place of the queue left by the popped task. + + starpu_sched_component_send_can_push_to_parents(component); + + if(task) + return task; + + return NULL; +} + +/* When a can_push is caught by this function, we try to pop and push + * tasks from our local queue as much as possible, until a + * push fails, which means that the worker fifo_components are + * currently "full". + */ +static int fifo_can_push(struct starpu_sched_component * component, struct starpu_sched_component * to STARPU_ATTRIBUTE_UNUSED) +{ + STARPU_ASSERT(component && starpu_sched_component_is_fifo(component)); + int res = 0; + struct starpu_task * task; + + task = starpu_sched_component_pump_downstream(component, &res); + + if(task) + { + int ret = fifo_push_local_task(component,task,1); + STARPU_ASSERT(!ret); + } + + return res; +} + +int starpu_sched_component_is_fifo(struct starpu_sched_component * component) +{ + return component->push_task == fifo_push_task; +} + +struct starpu_sched_component * starpu_sched_component_fifo_create(struct starpu_sched_tree *tree, struct starpu_sched_component_fifo_data * params) +{ + struct starpu_sched_component *component = starpu_sched_component_create(tree, "fifo"); + struct _starpu_fifo_data *data; + _STARPU_MALLOC(data, sizeof(*data)); + starpu_st_fifo_taskq_init(&data->fifo); + STARPU_PTHREAD_MUTEX_INIT(&data->mutex,NULL); + component->data = data; + component->estimated_end = fifo_estimated_end; + component->estimated_load = fifo_estimated_load; + component->push_task = fifo_push_task; + component->pull_task = fifo_pull_task; + component->can_push = fifo_can_push; + component->deinit_data = fifo_component_deinit_data; + + if(params) + { + data->ntasks_threshold=params->ntasks_threshold; + data->exp_len_threshold=params->exp_len_threshold; + data->ready=params->ready; + data->exp=params->exp; + } + else + { + data->ntasks_threshold=0; + data->exp_len_threshold=0.0; + data->ready=0; + data->exp=0; + } + + return component; +} diff --git a/src/sched_policies/component_heft.c b/src/sched_policies/component_heft.c new file mode 100644 index 0000000..cfefe95 --- /dev/null +++ b/src/sched_policies/component_heft.c @@ -0,0 +1,244 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Simon Archipoff + * Copyright (C) 2020-2020 Télécom Sud Paris + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* HEFT variant which tries to schedule a given number of tasks instead of just + * the first of its scheduling window, and actually schedule the task for which + * the most benefit is achieved. */ + +#include +#include +#include +#include "helper_mct.h" +#include +#include +#include +#include + +#define NTASKS 5 + +struct _starpu_heft_data +{ + struct starpu_st_prio_deque prio; + starpu_pthread_mutex_t mutex; + struct _starpu_mct_data *mct_data; +}; + +static int heft_progress_one(struct starpu_sched_component *component) +{ + struct _starpu_heft_data * data = component->data; + starpu_pthread_mutex_t * mutex = &data->mutex; + struct starpu_st_prio_deque * prio = &data->prio; + struct starpu_task * (tasks[NTASKS]); + unsigned ntasks = 0; + + STARPU_COMPONENT_MUTEX_LOCK(mutex); + tasks[0] = starpu_st_prio_deque_pop_task(prio); + if (tasks[0]) + { + int priority = tasks[0]->priority; + /* Try to look at NTASKS from the queue */ + for (ntasks = 1; ntasks < NTASKS; ntasks++) + { + tasks[ntasks] = starpu_st_prio_deque_highest_task(prio); + if (!tasks[ntasks] || tasks[ntasks]->priority < priority) + break; + starpu_st_prio_deque_pop_task(prio); + } + } + STARPU_COMPONENT_MUTEX_UNLOCK(mutex); + + if (!ntasks) + { + return 1; + } + + { + struct _starpu_mct_data * d = data->mct_data; + struct starpu_sched_component * best_component; + unsigned n; + + /* Estimated task duration for each child */ + double estimated_lengths[component->nchildren * ntasks]; + /* Estimated transfer duration for each child */ + double estimated_transfer_length[component->nchildren * ntasks]; + /* Estimated transfer+task termination for each child */ + double estimated_ends_with_task[component->nchildren * ntasks]; + + /* estimated energy */ + double local_energy[component->nchildren * ntasks]; + + /* Minimum transfer+task termination of the NTASKS tasks over all workers */ + double min_exp_end_of_task[ntasks]; + /* Maximum termination of the already-scheduled tasks over all workers */ + double max_exp_end_of_workers; + + unsigned suitable_components[component->nchildren * ntasks]; + + unsigned nsuitable_components[ntasks]; + + /* Estimate durations */ + for (n = 0; n < ntasks; n++) + { + unsigned offset = component->nchildren * n; + + nsuitable_components[n] = starpu_mct_compute_execution_times(component, tasks[n], + estimated_lengths + offset, + estimated_transfer_length + offset, + suitable_components + offset); + + starpu_mct_compute_expected_times(component, tasks[n], + estimated_lengths + offset, + estimated_transfer_length + offset, + estimated_ends_with_task + offset, + &min_exp_end_of_task[n], &max_exp_end_of_workers, + suitable_components + offset, nsuitable_components[n]); + + /* Compute the energy, if provided*/ + starpu_mct_compute_energy(component, tasks[n], local_energy + offset, suitable_components + offset, nsuitable_components[n]); + } + + /* best_task is the task that will finish first among the ntasks, while best_benefit is its expected execution time*/ + int best_task = 0; + double best_benefit = min_exp_end_of_task[0]; + + /* Find the task which provides the most computation time benefit */ + for (n = 1; n < ntasks; n++) + { + if (best_benefit > min_exp_end_of_task[n]) + { + best_benefit = min_exp_end_of_task[n]; + best_task = n; + } + } + + STARPU_ASSERT(best_task >= 0); + + /* Push back the other tasks */ + STARPU_COMPONENT_MUTEX_LOCK(mutex); + for (n = ntasks - 1; n < ntasks; n--) + if ((int) n != best_task) + starpu_st_prio_deque_push_front_task(prio, tasks[n]); + STARPU_COMPONENT_MUTEX_UNLOCK(mutex); + + unsigned offset = component->nchildren * best_task; + + int best_icomponent = starpu_mct_get_best_component(d, tasks[best_task], estimated_lengths + offset, estimated_transfer_length + offset, estimated_ends_with_task + offset, local_energy + offset, min_exp_end_of_task[best_task], max_exp_end_of_workers, suitable_components + offset, nsuitable_components[best_task]); + + if (best_icomponent == -1) + return eager_calibration_push_task(component, tasks[best_task]); + + best_component = component->children[best_icomponent]; + + if(starpu_sched_component_is_worker(best_component)) + { + best_component->can_pull(best_component); + return 1; + } + + starpu_sched_task_break(tasks[best_task]); + int ret = starpu_sched_component_push_task(component, best_component, tasks[best_task]); + + if (ret) + { + /* Could not push to child actually, push that one back too */ + STARPU_COMPONENT_MUTEX_LOCK(mutex); + starpu_st_prio_deque_push_front_task(prio, tasks[best_task]); + STARPU_COMPONENT_MUTEX_UNLOCK(mutex); + return 1; + } + else + return 0; + } +} + +/* Try to push some tasks below */ +static void heft_progress(struct starpu_sched_component *component) +{ + STARPU_ASSERT(component && starpu_sched_component_is_heft(component)); + while (!heft_progress_one(component)) + ; +} + +static int heft_push_task(struct starpu_sched_component * component, struct starpu_task * task) +{ + STARPU_ASSERT(component && task && starpu_sched_component_is_heft(component)); + struct _starpu_heft_data * data = component->data; + struct starpu_st_prio_deque * prio = &data->prio; + starpu_pthread_mutex_t * mutex = &data->mutex; + + STARPU_COMPONENT_MUTEX_LOCK(mutex); + starpu_st_prio_deque_push_back_task(prio,task); + STARPU_COMPONENT_MUTEX_UNLOCK(mutex); + + heft_progress(component); + + return 0; +} + +static int heft_can_push(struct starpu_sched_component *component, struct starpu_sched_component * to STARPU_ATTRIBUTE_UNUSED) +{ + heft_progress(component); + int ret = 0; + unsigned j; + for(j=0; j < component->nparents; j++) + { + if(component->parents[j] == NULL) + continue; + else + { + ret = component->parents[j]->can_push(component->parents[j], component); + if(ret) + break; + } + } + return ret; +} + +static void heft_component_deinit_data(struct starpu_sched_component * component) +{ + STARPU_ASSERT(starpu_sched_component_is_heft(component)); + struct _starpu_heft_data * d = component->data; + struct _starpu_mct_data * mct_d = d->mct_data; + starpu_st_prio_deque_destroy(&d->prio); + free(mct_d); + free(d); +} + +int starpu_sched_component_is_heft(struct starpu_sched_component * component) +{ + return component->push_task == heft_push_task; +} + +struct starpu_sched_component * starpu_sched_component_heft_create(struct starpu_sched_tree *tree, struct starpu_sched_component_mct_data * params) +{ + struct starpu_sched_component * component = starpu_sched_component_create(tree, "heft"); + struct _starpu_mct_data *mct_data = starpu_mct_init_parameters(params); + struct _starpu_heft_data *data; + _STARPU_MALLOC(data, sizeof(*data)); + + starpu_st_prio_deque_init(&data->prio); + STARPU_PTHREAD_MUTEX_INIT(&data->mutex,NULL); + data->mct_data = mct_data; + component->data = data; + + component->push_task = heft_push_task; + component->can_push = heft_can_push; + component->deinit_data = heft_component_deinit_data; + + return component; +} diff --git a/src/sched_policies/component_heteroprio.c b/src/sched_policies/component_heteroprio.c new file mode 100644 index 0000000..01d3781 --- /dev/null +++ b/src/sched_policies/component_heteroprio.c @@ -0,0 +1,575 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Simon Archipoff + * Copyright (C) 2020-2020 Télécom Sud Paris + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* Heteroprio, which sorts tasks by acceleration factor into buckets, and makes + * GPUs take accelerated tasks first and CPUs take non-accelerated tasks first */ + +#include +#include +#include +#include "helper_mct.h" +#include +#include +#include +#include + +/* Approximation ratio for acceleration factor bucketing + * We will put tasks with +-10% similar acceleration into the same bucket. */ +#define APPROX 0.10 + +struct _starpu_heteroprio_data +{ + /* This is an array of priority queues. + * The array is sorted by acceleration factor, most accelerated first */ + struct starpu_st_prio_deque **bucket; + float *accel; + unsigned naccel; + + /* This contains tasks which are not supported on all archs. */ + struct starpu_st_prio_deque no_accel; + + /* This protects all queues */ + starpu_pthread_mutex_t mutex; + + struct _starpu_mct_data *mct_data; + + unsigned batch; +}; + +static int heteroprio_progress_accel(struct starpu_sched_component *component, struct _starpu_heteroprio_data *data, enum starpu_worker_archtype archtype, int front) +{ + struct starpu_task *task = NULL; + starpu_pthread_mutex_t * mutex = &data->mutex; + int j, ret = 1; + double acceleration = INFINITY; + + struct _starpu_mct_data * d = data->mct_data; + + STARPU_COMPONENT_MUTEX_LOCK(mutex); + if (front) + /* Pick up accelerated tasks first */ + for (j = 0; j < (int) data->naccel; j++) + { + task = starpu_st_prio_deque_pop_task(data->bucket[j]); + if (task) + break; + } + else + /* Pick up accelerated tasks last */ + for (j = (int) data->naccel-1; j >= 0; j--) + { + if (data->batch && 0) + task = starpu_st_prio_deque_pop_back_task(data->bucket[j]); + else + task = starpu_st_prio_deque_pop_task(data->bucket[j]); + if (task) + break; + } + + if (task) + { + acceleration = data->accel[j]; + //fprintf(stderr, "for %s thus %s, found task %p in bucket %d: %f\n", starpu_worker_get_type_as_string(archtype), front?"front":"back", task, j, acceleration); + } + + STARPU_COMPONENT_MUTEX_UNLOCK(mutex); + + if (!task) + return 1; + + if (data->batch) + /* In batch mode the fifos below do not use priorities. Do not + * leak a priority for the data prefetches either */ + task->priority = INT_MAX; + + /* TODO: we might want to prefer to pick up a task whose data is already on some GPU */ + + struct starpu_sched_component * best_component; + + /* Estimated task duration for each child */ + double estimated_lengths[component->nchildren]; + /* Estimated transfer duration for each child */ + double estimated_transfer_length[component->nchildren]; + /* Estimated transfer+task termination for each child */ + double estimated_ends_with_task[component->nchildren]; + + /* provided local energy */ + double local_energy[component->nchildren]; + + /* Minimum transfer+task termination of the task over all workers */ + double min_exp_end_of_task; + /* Maximum termination of the already-scheduled tasks over all workers */ + double max_exp_end_of_workers; + + unsigned suitable_components[component->nchildren]; + unsigned nsuitable_components; + + nsuitable_components = starpu_mct_compute_execution_times(component, task, + estimated_lengths, + estimated_transfer_length, + suitable_components); + + if (data->batch && 0) + { + /* In batch mode, we may want to insist on filling workers with tasks + * by ignoring when other workers would finish this. */ + + unsigned i; + for (i = 0; i < component->nchildren; i++) + { + int idworker; + for(idworker = starpu_bitmap_first(&component->children[i]->workers); + idworker != -1; + idworker = starpu_bitmap_next(&component->children[i]->workers, idworker)) + { + if (starpu_worker_get_type(idworker) == archtype) + break; + } + + if (idworker == -1) + { + /* Not the targeted arch, avoid it */ + + /* XXX: INFINITY doesn't seem to be working properly */ + estimated_lengths[i] = 1000000000; + estimated_transfer_length[i] = 1000000000; + } + } + } + + /* Entering critical section to make sure no two workers + make scheduling decisions at the same time */ + STARPU_COMPONENT_MUTEX_LOCK(&d->scheduling_mutex); + + starpu_mct_compute_expected_times(component, task, + estimated_lengths, + estimated_transfer_length, + estimated_ends_with_task, + &min_exp_end_of_task, &max_exp_end_of_workers, + suitable_components, nsuitable_components); + + /* Compute the energy, if provided*/ + starpu_mct_compute_energy(component, task, local_energy, suitable_components, nsuitable_components); + + /* And now find out which worker suits best for this task, + * including data transfer */ + + int best_icomponent = starpu_mct_get_best_component(d, task, + estimated_lengths, + estimated_transfer_length, + estimated_ends_with_task, + local_energy, + min_exp_end_of_task, max_exp_end_of_workers, + suitable_components, nsuitable_components); + + /* If no best component is found, it means that the perfmodel of + * the task had been purged since it has been pushed on the mct component. */ + /* FIXME: We should perform a push_back message to its parent so that it will + * be able to reschedule the task properly. */ + if(best_icomponent == -1) + { + STARPU_COMPONENT_MUTEX_UNLOCK(&d->scheduling_mutex); + return eager_calibration_push_task(component, task); + } + + + best_component = component->children[best_icomponent]; + + int idworker; + for(idworker = starpu_bitmap_first(&best_component->workers); + idworker != -1; + idworker = starpu_bitmap_next(&best_component->workers, idworker)) + { + if (starpu_worker_get_type(idworker) == archtype) + break; + } + + if (idworker == -1) + goto out; + + /* Ok, we do have a worker there of that type, try to push it there. */ + STARPU_ASSERT(!starpu_sched_component_is_worker(best_component)); + starpu_sched_task_break(task); + ret = starpu_sched_component_push_task(component,best_component,task); + + /* I can now exit the critical section: Pushing the task above ensures that its execution + time will be taken into account for subsequent scheduling decisions */ + if (!ret) + { + STARPU_COMPONENT_MUTEX_UNLOCK(&d->scheduling_mutex); + //fprintf(stderr, "pushed %p to %d\n", task, best_icomponent); + /* Great! */ + return 0; + } + +out: + STARPU_COMPONENT_MUTEX_UNLOCK(&d->scheduling_mutex); + /* No such kind of worker there, or it refused our task, abort */ + + //fprintf(stderr, "could not push %p to %d actually\n", task, best_icomponent); + /* Could not push to child actually, push that one back */ + STARPU_COMPONENT_MUTEX_LOCK(mutex); + for (j = 0; j < (int) data->naccel; j++) + { + if (acceleration == data->accel[j]) + { + starpu_st_prio_deque_push_front_task(data->bucket[j], task); + break; + } + } + STARPU_ASSERT(j != (int) data->naccel); + STARPU_COMPONENT_MUTEX_UNLOCK(mutex); + + //fprintf(stderr, "finished pushing to %d\n", archtype); + + return 1; +} + +static int heteroprio_progress_noaccel(struct starpu_sched_component *component, struct _starpu_heteroprio_data *data, struct starpu_task *task) +{ + struct _starpu_mct_data * d = data->mct_data; + int ret; + + struct starpu_sched_component * best_component; + + /* Estimated task duration for each child */ + double estimated_lengths[component->nchildren]; + /* Estimated transfer duration for each child */ + double estimated_transfer_length[component->nchildren]; + /* Estimated transfer+task termination for each child */ + double estimated_ends_with_task[component->nchildren]; + + /* estimated energy */ + double local_energy[component->nchildren]; + + /* Minimum transfer+task termination of the task over all workers */ + double min_exp_end_of_task; + /* Maximum termination of the already-scheduled tasks over all workers */ + double max_exp_end_of_workers; + + unsigned suitable_components[component->nchildren]; + unsigned nsuitable_components; + + nsuitable_components = starpu_mct_compute_execution_times(component, task, + estimated_lengths, + estimated_transfer_length, + suitable_components); + + /* If no suitable components were found, it means that the perfmodel of + * the task had been purged since it has been pushed on the mct component. */ + /* FIXME: We should perform a push_back message to its parent so that it will + * be able to reschedule the task properly. */ + if(nsuitable_components == 0) + return eager_calibration_push_task(component, task); + + /* Entering critical section to make sure no two workers + make scheduling decisions at the same time */ + STARPU_COMPONENT_MUTEX_LOCK(&d->scheduling_mutex); + + starpu_mct_compute_expected_times(component, task, + estimated_lengths, + estimated_transfer_length, + estimated_ends_with_task, + &min_exp_end_of_task, &max_exp_end_of_workers, + suitable_components, nsuitable_components); + + /* Compute the energy, if provided*/ + starpu_mct_compute_energy(component, task, local_energy, suitable_components, nsuitable_components); + + /* And now find out which worker suits best for this task, + * including data transfer */ + + int best_icomponent = starpu_mct_get_best_component(d, task, + estimated_lengths, + estimated_transfer_length, + estimated_ends_with_task, + local_energy, + min_exp_end_of_task, max_exp_end_of_workers, + suitable_components, nsuitable_components); + + /* If no best component is found, it means that the perfmodel of + * the task had been purged since it has been pushed on the mct component. */ + /* FIXME: We should perform a push_back message to its parent so that it will + * be able to reschedule the task properly. */ + if(best_icomponent == -1) + { + STARPU_COMPONENT_MUTEX_UNLOCK(&d->scheduling_mutex); + return eager_calibration_push_task(component, task); + } + + best_component = component->children[best_icomponent]; + + STARPU_ASSERT(!starpu_sched_component_is_worker(best_component)); + ret = starpu_sched_component_push_task(component,best_component,task); + STARPU_COMPONENT_MUTEX_UNLOCK(&d->scheduling_mutex); + + return ret; +} + +static int heteroprio_progress_one(struct starpu_sched_component *component) +{ + struct _starpu_heteroprio_data * data = component->data; + starpu_pthread_mutex_t * mutex = &data->mutex; + struct starpu_task *task; + + struct starpu_st_prio_deque * no_accel = &data->no_accel; + STARPU_COMPONENT_MUTEX_LOCK(mutex); + task = starpu_st_prio_deque_pop_task(no_accel); + STARPU_COMPONENT_MUTEX_UNLOCK(mutex); + + if (task) + { + if (heteroprio_progress_noaccel(component, data, task)) + { + /* Could not push to child actually, push that one back */ + STARPU_COMPONENT_MUTEX_LOCK(mutex); + starpu_st_prio_deque_push_front_task(no_accel, task); + STARPU_COMPONENT_MUTEX_UNLOCK(mutex); + } + } + + /* Note: this hardcodes acceleration order */ + if (!heteroprio_progress_accel(component, data, STARPU_CUDA_WORKER, 1)) + return 0; + if (!heteroprio_progress_accel(component, data, STARPU_HIP_WORKER, 1)) + return 0; + if (!heteroprio_progress_accel(component, data, STARPU_OPENCL_WORKER, 1)) + return 0; + if (!heteroprio_progress_accel(component, data, STARPU_MPI_MS_WORKER, 0)) + return 0; + if (!heteroprio_progress_accel(component, data, STARPU_CPU_WORKER, 0)) + return 0; + + return 1; +} + +/* Try to push some tasks below */ +static void heteroprio_progress(struct starpu_sched_component *component) +{ + STARPU_ASSERT(component && starpu_sched_component_is_heteroprio(component)); + while (!heteroprio_progress_one(component)) + ; +} + +static int heteroprio_push_task(struct starpu_sched_component * component, struct starpu_task * task) +{ + STARPU_ASSERT(component && task && starpu_sched_component_is_heteroprio(component)); + struct _starpu_heteroprio_data * data = component->data; + starpu_pthread_mutex_t * mutex = &data->mutex; + unsigned nimpl; + + double min_expected = INFINITY, max_expected = -INFINITY; + double acceleration; + + if (data->batch && 0) + /* Batch mode, we may want to ignore priorities completely */ + task->priority = INT_MAX; + + /* Compute acceleration between best-performing arch and least-performing arch */ + int workerid; + for(workerid = starpu_bitmap_first(&component->workers_in_ctx); + workerid != -1; + workerid = starpu_bitmap_next(&component->workers_in_ctx, workerid)) + { + unsigned impl_mask; + if (!starpu_worker_can_execute_task_impl(workerid, task, &impl_mask)) + break; + + struct starpu_perfmodel_arch* perf_arch = starpu_worker_get_perf_archtype(workerid, task->sched_ctx); + double min_arch = INFINITY; + for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++) + { + if (!(impl_mask & (1U << nimpl))) + continue; + double expected = starpu_task_expected_length(task, perf_arch, nimpl); + if (isnan(expected) || expected == 0.) + { + min_arch = expected; + break; + } + if (expected < min_arch) + min_arch = expected; + } + if (isnan(min_arch) || min_arch == 0.) + /* No known execution time, can't do anything here */ + break; + + STARPU_ASSERT(min_arch != INFINITY); + if (min_arch < min_expected) + min_expected = min_arch; + if (min_arch > max_expected) + max_expected = min_arch; + } + + if (workerid == -1) + { + /* All archs can run it */ + STARPU_ASSERT(!isnan(min_expected)); + STARPU_ASSERT(!isnan(max_expected)); + STARPU_ASSERT(min_expected != INFINITY); + STARPU_ASSERT(max_expected != -INFINITY); + acceleration = max_expected / min_expected; + STARPU_ASSERT(!isnan(acceleration)); + + //fprintf(stderr,"%s: acceleration %f\n", starpu_task_get_name(task), acceleration); + + STARPU_COMPONENT_MUTEX_LOCK(mutex); + unsigned i, j; + /* Try to find a bucket with similar acceleration */ + for (i = 0; i < data->naccel; i++) + { + if (acceleration >= data->accel[i] * (1 - APPROX) && + acceleration <= data->accel[i] * (1 + APPROX)) + break; + } + + if (i == data->naccel) + { + /* Didn't find it, add one */ + data->naccel++; + + float *newaccel; + _STARPU_MALLOC(newaccel, data->naccel * sizeof(*newaccel)); + struct starpu_st_prio_deque **newbuckets; + _STARPU_MALLOC(newbuckets, data->naccel * sizeof(*newbuckets)); + struct starpu_st_prio_deque *newbucket; + _STARPU_MALLOC(newbucket, sizeof(*newbucket)); + starpu_st_prio_deque_init(newbucket); + int inserted = 0; + + for (j = 0; j < data->naccel-1; j++) + { + if (!inserted && acceleration > data->accel[j]) + { + /* Insert the new bucket here */ + i = j; + newbuckets[j] = newbucket; + newaccel[j] = acceleration; + inserted = 1; + } + newbuckets[j+inserted] = data->bucket[j]; + newaccel[j+inserted] = data->accel[j]; + } + if (!inserted) + { + /* Insert it last */ + newbuckets[data->naccel-1] = newbucket; + newaccel[data->naccel-1] = acceleration; + } + free(data->bucket); + free(data->accel); + data->bucket = newbuckets; + data->accel = newaccel; + } +#if 0 + fprintf(stderr,"buckets:"); + for (j = 0; j < data->naccel; j++) + { + fprintf(stderr, " %f", data->accel[j]); + } + fprintf(stderr,"\ninserting %p %f to %d\n", task, acceleration, i); +#endif + starpu_st_prio_deque_push_back_task(data->bucket[i],task); + STARPU_COMPONENT_MUTEX_UNLOCK(mutex); + } + else + { + /* Not all archs can run it, will resort to HEFT strategy */ + acceleration = INFINITY; + //fprintf(stderr,"%s: some archs can't do it\n", starpu_task_get_name(task)); + struct starpu_st_prio_deque * no_accel = &data->no_accel; + STARPU_COMPONENT_MUTEX_LOCK(mutex); + starpu_st_prio_deque_push_back_task(no_accel,task); + STARPU_COMPONENT_MUTEX_UNLOCK(mutex); + } + + heteroprio_progress(component); + + return 0; +} + +static int heteroprio_can_push(struct starpu_sched_component *component, struct starpu_sched_component * to STARPU_ATTRIBUTE_UNUSED) +{ + heteroprio_progress(component); + int ret = 0; + unsigned j; + for(j=0; j < component->nparents; j++) + { + if(component->parents[j] == NULL) + continue; + else + { + ret = component->parents[j]->can_push(component->parents[j], component); + if(ret) + break; + } + } + return ret; +} + +static void heteroprio_component_deinit_data(struct starpu_sched_component * component) +{ + STARPU_ASSERT(starpu_sched_component_is_heteroprio(component)); + struct _starpu_heteroprio_data * d = component->data; + struct _starpu_mct_data * mct_d = d->mct_data; + unsigned i; + for (i = 0; i < d->naccel; i++) + { + starpu_st_prio_deque_destroy(d->bucket[i]); + free(d->bucket[i]); + } + free(d->bucket); + free(d->accel); + starpu_st_prio_deque_destroy(&d->no_accel); + STARPU_PTHREAD_MUTEX_DESTROY(&d->mutex); + STARPU_PTHREAD_MUTEX_DESTROY(&mct_d->scheduling_mutex); + free(mct_d); + free(d); +} + +int starpu_sched_component_is_heteroprio(struct starpu_sched_component * component) +{ + return component->push_task == heteroprio_push_task; +} + +struct starpu_sched_component * starpu_sched_component_heteroprio_create(struct starpu_sched_tree *tree, struct starpu_sched_component_heteroprio_data * params) +{ + struct starpu_sched_component * component = starpu_sched_component_create(tree, "heteroprio"); + struct _starpu_mct_data *mct_data = starpu_mct_init_parameters(params ? params->mct : NULL); + struct _starpu_heteroprio_data *data; + _STARPU_MALLOC(data, sizeof(*data)); + + data->bucket = NULL; + data->accel = NULL; + data->naccel = 0; + starpu_st_prio_deque_init(&data->no_accel); + STARPU_PTHREAD_MUTEX_INIT(&data->mutex,NULL); + data->mct_data = mct_data; + STARPU_PTHREAD_MUTEX_INIT(&mct_data->scheduling_mutex,NULL); + if (params) + data->batch = params->batch; + else + data->batch = 1; + component->data = data; + + component->push_task = heteroprio_push_task; + component->can_push = heteroprio_can_push; + component->deinit_data = heteroprio_component_deinit_data; + + return component; +} diff --git a/src/sched_policies/component_mct.c b/src/sched_policies/component_mct.c new file mode 100644 index 0000000..1785f3e --- /dev/null +++ b/src/sched_policies/component_mct.c @@ -0,0 +1,132 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Simon Archipoff + * Copyright (C) 2020-2020 Télécom Sud Paris + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include "helper_mct.h" +#include +#include +#include + +static int mct_push_task(struct starpu_sched_component * component, struct starpu_task * task) +{ + STARPU_ASSERT(component && task && starpu_sched_component_is_mct(component)); + struct _starpu_mct_data * d = component->data; + struct starpu_sched_component * best_component; + + /* Estimated task duration for each child */ + double estimated_lengths[component->nchildren]; + /* Estimated transfer duration for each child */ + double estimated_transfer_length[component->nchildren]; + /* Estimated transfer+task termination for each child */ + double estimated_ends_with_task[component->nchildren]; + + /* estimated energy */ + double local_energy[component->nchildren]; + + /* Minimum transfer+task termination of the task over all workers */ + double min_exp_end_of_task; + /* Maximum termination of the already-scheduled tasks over all workers */ + double max_exp_end_of_workers; + + unsigned suitable_components[component->nchildren]; + unsigned nsuitable_components; + + nsuitable_components = starpu_mct_compute_execution_times(component, task, + estimated_lengths, estimated_transfer_length, suitable_components); + + /* If no suitable components were found, it means that the perfmodel of + * the task had been purged since it has been pushed on the mct component. */ + /* FIXME: We should perform a push_back message to its parent so that it will + * be able to reschedule the task properly. */ + if(nsuitable_components == 0) + return eager_calibration_push_task(component, task); + + + + /* Entering critical section to make sure no two workers + make scheduling decisions at the same time */ + STARPU_COMPONENT_MUTEX_LOCK(&d->scheduling_mutex); + + starpu_mct_compute_expected_times(component, task, estimated_lengths, estimated_transfer_length, + estimated_ends_with_task, &min_exp_end_of_task, &max_exp_end_of_workers, suitable_components, nsuitable_components); + + /* Compute the energy, if provided*/ + starpu_mct_compute_energy(component, task, local_energy, suitable_components, nsuitable_components); + + int best_icomponent = starpu_mct_get_best_component(d, task, estimated_lengths, estimated_transfer_length, + estimated_ends_with_task, local_energy, min_exp_end_of_task, max_exp_end_of_workers, suitable_components, nsuitable_components); + + /* If no best component is found, it means that the perfmodel of + * the task had been purged since it has been pushed on the mct component. */ + /* FIXME: We should perform a push_back message to its parent so that it will + * be able to reschedule the task properly. */ + if(best_icomponent == -1) + { + STARPU_COMPONENT_MUTEX_UNLOCK(&d->scheduling_mutex); + return eager_calibration_push_task(component, task); + } + + + best_component = component->children[best_icomponent]; + + if(starpu_sched_component_is_worker(best_component)) + { + best_component->can_pull(best_component); + STARPU_COMPONENT_MUTEX_UNLOCK(&d->scheduling_mutex); + + return 1; + } + + starpu_sched_task_break(task); + int ret = starpu_sched_component_push_task(component, best_component, task); + + /* I can now exit the critical section: Pushing the task below ensures that its execution + time will be taken into account for subsequent scheduling decisions */ + STARPU_COMPONENT_MUTEX_UNLOCK(&d->scheduling_mutex); + + return ret; +} + +static void mct_component_deinit_data(struct starpu_sched_component * component) +{ + STARPU_ASSERT(starpu_sched_component_is_mct(component)); + struct _starpu_mct_data * d = component->data; + STARPU_PTHREAD_MUTEX_DESTROY(&d->scheduling_mutex); + free(d); +} + +int starpu_sched_component_is_mct(struct starpu_sched_component * component) +{ + + return component->push_task == mct_push_task; +} + +struct starpu_sched_component * starpu_sched_component_mct_create(struct starpu_sched_tree *tree, struct starpu_sched_component_mct_data * params) +{ + struct starpu_sched_component * component = starpu_sched_component_create(tree, "mct"); + struct _starpu_mct_data *data = starpu_mct_init_parameters(params); + + component->data = data; + STARPU_PTHREAD_MUTEX_INIT(&data->scheduling_mutex, NULL); + + component->push_task = mct_push_task; + component->deinit_data = mct_component_deinit_data; + + return component; +} diff --git a/src/sched_policies/component_perfmodel_select.c b/src/sched_policies/component_perfmodel_select.c new file mode 100644 index 0000000..645d594 --- /dev/null +++ b/src/sched_policies/component_perfmodel_select.c @@ -0,0 +1,105 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include + +/* The decision component takes care of the scheduling of tasks which are not + * calibrated, or tasks which don't have a performance model, because the scheduling + * architecture of this scheduler for tasks with no performance model is exactly + * the same as the tree-prio scheduler. + * Tasks with a perfmodel are pushed to the perfmodel_component, which takes care of the + * scheduling of those tasks on the correct worker_component. + */ + +struct _starpu_perfmodel_select_data +{ + struct starpu_sched_component * calibrator_component; + struct starpu_sched_component * no_perfmodel_component; + struct starpu_sched_component * perfmodel_component; +}; + +static int perfmodel_select_push_task(struct starpu_sched_component * component, struct starpu_task * task) +{ + STARPU_ASSERT(component && component->data && task && starpu_sched_component_is_perfmodel_select(component)); + STARPU_ASSERT(starpu_sched_component_can_execute_task(component,task)); + + struct _starpu_perfmodel_select_data * data = component->data; + double length; + int can_execute = starpu_sched_component_execute_preds(component,task,&length); + + if(can_execute) + { + if(isnan(length)) + { + static int warned; + STARPU_HG_DISABLE_CHECKING(warned); + if (!warned) + { + warned = 1; + _STARPU_DISP("Warning: performance model for %s not finished calibrating, using a dumb scheduling heuristic for now\n",starpu_task_get_name(task)); + } + return starpu_sched_component_push_task(component,data->calibrator_component,task); + } + if(_STARPU_IS_ZERO(length)) + return starpu_sched_component_push_task(component,data->no_perfmodel_component,task); + return starpu_sched_component_push_task(component,data->perfmodel_component,task); + } + else + return 1; + +} + +static struct starpu_task * perfmodel_select_pull_task(struct starpu_sched_component * component STARPU_ATTRIBUTE_UNUSED, struct starpu_sched_component * to STARPU_ATTRIBUTE_UNUSED) +{ + /* We don't want to pull tasks blindly, only let them go through push, so we push to the right component. */ + return NULL; +} + +static void perfmodel_select_component_deinit_data(struct starpu_sched_component * component) +{ + STARPU_ASSERT(component && component->data); + struct _starpu_perfmodel_select_data * d = component->data; + free(d); +} + +int starpu_sched_component_is_perfmodel_select(struct starpu_sched_component * component) +{ + return component->push_task == perfmodel_select_push_task; +} + +struct starpu_sched_component * starpu_sched_component_perfmodel_select_create(struct starpu_sched_tree *tree, struct starpu_sched_component_perfmodel_select_data * params) +{ + STARPU_ASSERT(params); + STARPU_ASSERT(params->calibrator_component && params->no_perfmodel_component && params->perfmodel_component); + struct starpu_sched_component * component = starpu_sched_component_create(tree, "perfmodel_selector"); + + struct _starpu_perfmodel_select_data *data; + _STARPU_MALLOC(data, sizeof(*data)); + + data->calibrator_component = params->calibrator_component; + data->no_perfmodel_component = params->no_perfmodel_component; + data->perfmodel_component = params->perfmodel_component; + + component->data = data; + component->can_pull = starpu_sched_component_send_can_push_to_parents; + component->push_task = perfmodel_select_push_task; + component->pull_task = perfmodel_select_pull_task; + component->deinit_data = perfmodel_select_component_deinit_data; + component->estimated_end = starpu_sched_component_estimated_end_min; + + return component; +} diff --git a/src/sched_policies/component_prio.c b/src/sched_policies/component_prio.c new file mode 100644 index 0000000..4ad4dc8 --- /dev/null +++ b/src/sched_policies/component_prio.c @@ -0,0 +1,326 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include +#include + +#ifdef STARPU_USE_FXT +#define STARPU_TRACE_SCHED_COMPONENT_PUSH_PRIO(component,ntasks,exp_len) do { \ + if (fut_active) { \ + int workerid = STARPU_NMAXWORKERS + 1; \ + if((component->nchildren == 1) && starpu_sched_component_is_worker(component->children[0])) \ + workerid = starpu_sched_component_worker_get_workerid(component->children[0]); \ + _STARPU_TRACE_SCHED_COMPONENT_PUSH_PRIO(workerid, ntasks, exp_len); \ + } \ +} while (0) + +#define STARPU_TRACE_SCHED_COMPONENT_POP_PRIO(component,ntasks,exp_len) do { \ + if (fut_active) { \ + int workerid = STARPU_NMAXWORKERS + 1; \ + if((component->nchildren == 1) && starpu_sched_component_is_worker(component->children[0])) \ + workerid = starpu_sched_component_worker_get_workerid(component->children[0]); \ + _STARPU_TRACE_SCHED_COMPONENT_POP_PRIO(workerid, ntasks, exp_len); \ + } \ +} while (0) +#else +#define STARPU_TRACE_SCHED_COMPONENT_PUSH_PRIO(component,ntasks,exp_len) do { } while (0) +#define STARPU_TRACE_SCHED_COMPONENT_POP_PRIO(component,ntasks,exp_len) do { } while (0) +#endif + +struct _starpu_prio_data +{ + struct starpu_st_prio_deque prio; + starpu_pthread_mutex_t mutex; + unsigned ntasks_threshold; + double exp_len_threshold; + int ready; + int exp; +}; + +static void prio_component_deinit_data(struct starpu_sched_component * component) +{ + STARPU_ASSERT(component && component->data); + struct _starpu_prio_data * f = component->data; + starpu_st_prio_deque_destroy(&f->prio); + STARPU_PTHREAD_MUTEX_DESTROY(&f->mutex); + free(f); +} + +static double prio_estimated_end(struct starpu_sched_component * component) +{ + STARPU_ASSERT(component && component->data); + struct _starpu_prio_data * data = component->data; + struct starpu_st_prio_deque * queue = &data->prio; + return starpu_sched_component_estimated_end_min_add(component, queue->exp_len); +} + +static double prio_estimated_load(struct starpu_sched_component * component) +{ + STARPU_ASSERT(component && component->data); + STARPU_ASSERT(starpu_bitmap_cardinal(&component->workers_in_ctx) != 0); + struct _starpu_prio_data * data = component->data; + struct starpu_st_prio_deque * queue = &data->prio; + starpu_pthread_mutex_t * mutex = &data->mutex; + double relative_speedup = 0.0; + double load = starpu_sched_component_estimated_load(component); + if(STARPU_SCHED_COMPONENT_IS_HOMOGENEOUS(component)) + { + int first_worker = starpu_bitmap_first(&component->workers_in_ctx); + relative_speedup = starpu_worker_get_relative_speedup(starpu_worker_get_perf_archtype(first_worker, component->tree->sched_ctx_id)); + STARPU_COMPONENT_MUTEX_LOCK(mutex); + load += queue->ntasks / relative_speedup; + STARPU_COMPONENT_MUTEX_UNLOCK(mutex); + return load; + } + else + { + int i; + for(i = starpu_bitmap_first(&component->workers_in_ctx); + i != -1; + i = starpu_bitmap_next(&component->workers_in_ctx, i)) + relative_speedup += starpu_worker_get_relative_speedup(starpu_worker_get_perf_archtype(i, component->tree->sched_ctx_id)); + relative_speedup /= starpu_bitmap_cardinal(&component->workers_in_ctx); + STARPU_ASSERT(!_STARPU_IS_ZERO(relative_speedup)); + STARPU_COMPONENT_MUTEX_LOCK(mutex); + load += queue->ntasks / relative_speedup; + STARPU_COMPONENT_MUTEX_UNLOCK(mutex); + } + return load; +} + +static int prio_push_local_task(struct starpu_sched_component * component, struct starpu_task * task, unsigned is_pushback) +{ + STARPU_ASSERT(component && component->data && task); + STARPU_ASSERT(starpu_sched_component_can_execute_task(component,task)); + struct _starpu_prio_data * data = component->data; + struct starpu_st_prio_deque * queue = &data->prio; + starpu_pthread_mutex_t * mutex = &data->mutex; + int ret = 0; + const double now = starpu_timing_now(); + STARPU_COMPONENT_MUTEX_LOCK(mutex); + + double exp_len = NAN; + + if (!is_pushback && data->ntasks_threshold != 0 && queue->ntasks >= data->ntasks_threshold) + { + ret = 1; + STARPU_COMPONENT_MUTEX_UNLOCK(mutex); + } + else if(data->exp) + { + if(!isnan(task->predicted)) + exp_len = queue->exp_len + task->predicted; + else + exp_len = queue->exp_len; + + if (!is_pushback && data->exp_len_threshold != 0.0 && exp_len >= data->exp_len_threshold) + { + static int warned; + STARPU_HG_DISABLE_CHECKING(warned); + if(data->exp_len_threshold != 0.0 && task->predicted > data->exp_len_threshold && !warned) + { + _STARPU_DISP("Warning : a predicted task length (%lf) exceeds the expected length threshold (%lf) of a prio component queue, you should reconsider the value of this threshold. This message will not be printed again for further thresholds exceeding.\n",task->predicted,data->exp_len_threshold); + warned = 1; + } + ret = 1; + STARPU_COMPONENT_MUTEX_UNLOCK(mutex); + } + else + { + if(!isnan(task->predicted_transfer)) + { + double end = prio_estimated_end(component); + double tfer_end = now + task->predicted_transfer; + /* FIXME: We don't have overlap when running CPU-CPU transfers */ + if(tfer_end < end) + task->predicted_transfer = 0.0; + else + task->predicted_transfer = tfer_end - end; + exp_len += task->predicted_transfer; + } + + if(!isnan(task->predicted)) + { + queue->exp_len = exp_len; + queue->exp_end = queue->exp_start + queue->exp_len; + } + STARPU_ASSERT(!isnan(queue->exp_end)); + STARPU_ASSERT(!isnan(queue->exp_len)); + STARPU_ASSERT(!isnan(queue->exp_start)); + } + } + + if(!ret) + { + if(is_pushback) + ret = starpu_st_prio_deque_push_front_task(queue,task); + else + { + ret = starpu_st_prio_deque_push_back_task(queue,task); + starpu_sched_component_prefetch_on_node(component, task); + STARPU_TRACE_SCHED_COMPONENT_PUSH_PRIO(component, queue->ntasks, exp_len); + } + STARPU_COMPONENT_MUTEX_UNLOCK(mutex); + if(!is_pushback) + component->can_pull(component); + } + + return ret; +} + +static int prio_push_task(struct starpu_sched_component * component, struct starpu_task * task) +{ + int ret = prio_push_local_task(component, task, 0); + return ret; +} + +static struct starpu_task * prio_pull_task(struct starpu_sched_component * component, struct starpu_sched_component * to) +{ + STARPU_ASSERT(component && component->data); + struct _starpu_prio_data * data = component->data; + struct starpu_st_prio_deque * queue = &data->prio; + starpu_pthread_mutex_t * mutex = &data->mutex; + const double now = starpu_timing_now(); + + if (!STARPU_RUNNING_ON_VALGRIND && starpu_st_prio_deque_is_empty(queue)) + { + starpu_sched_component_send_can_push_to_parents(component); + return NULL; + } + + STARPU_COMPONENT_MUTEX_LOCK(mutex); + struct starpu_task * task; + if (data->ready && to->properties & STARPU_SCHED_COMPONENT_SINGLE_MEMORY_NODE) + task = starpu_st_prio_deque_deque_first_ready_task(queue, starpu_bitmap_first(&to->workers_in_ctx)); + else + task = starpu_st_prio_deque_pop_task(queue); + if(task && data->exp) + { + if(!isnan(task->predicted)) + { + const double exp_len = queue->exp_len - task->predicted; + queue->exp_start = now + task->predicted; + if (exp_len >= 0.0) + { + queue->exp_len = exp_len; + } + else + { + /* exp_len can become negative due to rounding errors */ + queue->exp_len = 0.0; + } + } + + STARPU_ASSERT_MSG(queue->exp_len>=0, "prio->exp_len=%lf\n",queue->exp_len); + if(!isnan(task->predicted_transfer)) + { + if (queue->exp_len > task->predicted_transfer) + { + queue->exp_start += task->predicted_transfer; + queue->exp_len -= task->predicted_transfer; + } + else + { + queue->exp_start += queue->exp_len; + queue->exp_len = 0; + } + } + + queue->exp_end = queue->exp_start + queue->exp_len; + if(queue->ntasks == 0) + queue->exp_len = 0.0; + } + if(task) + STARPU_TRACE_SCHED_COMPONENT_POP_PRIO(component, queue->ntasks, queue->exp_len); + STARPU_ASSERT(!isnan(queue->exp_end)); + STARPU_ASSERT(!isnan(queue->exp_len)); + STARPU_ASSERT(!isnan(queue->exp_start)); + STARPU_COMPONENT_MUTEX_UNLOCK(mutex); + + // When a pop is called, a can_push is called for pushing tasks onto + // the empty place of the queue left by the popped task. + + starpu_sched_component_send_can_push_to_parents(component); + + if(task) + return task; + + return NULL; +} + +/* When a can_push is caught by this function, we try to pop and push + * tasks from our local queue as much as possible, until a + * push fails, which means that the worker prio_components are + * currently "full". + */ +static int prio_can_push(struct starpu_sched_component * component, struct starpu_sched_component * to STARPU_ATTRIBUTE_UNUSED) +{ + STARPU_ASSERT(component && starpu_sched_component_is_prio(component)); + int res = 0; + struct starpu_task * task; + + task = starpu_sched_component_pump_downstream(component, &res); + + if(task) + { + int ret = prio_push_local_task(component,task,1); + STARPU_ASSERT(!ret); + } + + return res; +} + +int starpu_sched_component_is_prio(struct starpu_sched_component * component) +{ + return component->push_task == prio_push_task; +} + +struct starpu_sched_component * starpu_sched_component_prio_create(struct starpu_sched_tree *tree, struct starpu_sched_component_prio_data * params) +{ + struct starpu_sched_component * component = starpu_sched_component_create(tree, "prio"); + struct _starpu_prio_data *data; + _STARPU_MALLOC(data, sizeof(*data)); + starpu_st_prio_deque_init(&data->prio); + STARPU_PTHREAD_MUTEX_INIT(&data->mutex,NULL); + component->data = data; + component->estimated_end = prio_estimated_end; + component->estimated_load = prio_estimated_load; + component->push_task = prio_push_task; + component->pull_task = prio_pull_task; + component->can_push = prio_can_push; + component->deinit_data = prio_component_deinit_data; + + if(params) + { + data->ntasks_threshold=params->ntasks_threshold; + data->exp_len_threshold=params->exp_len_threshold; + data->ready=params->ready; + data->exp=params->exp; + } + else + { + data->ntasks_threshold=0; + data->exp_len_threshold=0.0; + data->ready=0; + data->exp=0; + } + + return component; +} diff --git a/src/sched_policies/component_random.c b/src/sched_policies/component_random.c new file mode 100644 index 0000000..1d36c42 --- /dev/null +++ b/src/sched_policies/component_random.c @@ -0,0 +1,118 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Simon Archipoff + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include + +static double compute_relative_speedup(struct starpu_sched_component * component) +{ + double sum = 0.0; + int id; + for(id = starpu_bitmap_first(&component->workers_in_ctx); + id != -1; + id = starpu_bitmap_next(&component->workers_in_ctx, id)) + { + struct starpu_perfmodel_arch* perf_arch = starpu_worker_get_perf_archtype(id, component->tree->sched_ctx_id); + sum += starpu_worker_get_relative_speedup(perf_arch); + + } + STARPU_ASSERT(sum != 0.0); + return sum; +} + +static int random_push_task(struct starpu_sched_component * component, struct starpu_task * task) +{ + STARPU_ASSERT(component->nchildren > 0); + + /* indexes_components and size are used to memoize component that can execute tasks + * during the first phase of algorithm, it contain the size indexes of the components + * that can execute task. + */ + int indexes_components[component->nchildren]; + unsigned size=0; + + /* speedup[i] is revelant only if i is in the size firsts elements of + * indexes_components + */ + double speedup[component->nchildren]; + + double alpha_sum = 0.0; + + unsigned i; + for(i = 0; i < component->nchildren ; i++) + { + if(starpu_sched_component_can_execute_task(component->children[i],task)) + { + speedup[size] = compute_relative_speedup(component->children[i]); + alpha_sum += speedup[size]; + indexes_components[size] = i; + size++; + } + } + if(size == 0) + return -ENODEV; + + /* not fully sure that this code is correct + * because of bad properties of double arithmetic + */ + double random = starpu_drand48()*alpha_sum; + double alpha = 0.0; + struct starpu_sched_component * select = NULL; + + for(i = 0; i < size ; i++) + { + int index = indexes_components[i]; + if(alpha + speedup[i] >= random) + { + select = component->children[index]; + break; + } + alpha += speedup[i]; + } + STARPU_ASSERT(select != NULL); + if(starpu_sched_component_is_worker(select)) + { + select->can_pull(select); + return 1; + } + + starpu_sched_task_break(task); + int ret_val = starpu_sched_component_push_task(component,select,task); + return ret_val; +} + +static struct starpu_task *random_pull_task(struct starpu_sched_component * from, struct starpu_sched_component *to) +{ + starpu_sched_component_can_push(from, to); + return NULL; +} + +int starpu_sched_component_is_random(struct starpu_sched_component *component) +{ + return component->push_task == random_push_task; +} + +struct starpu_sched_component * starpu_sched_component_random_create(struct starpu_sched_tree *tree, void *arg) +{ + (void)arg; + struct starpu_sched_component * component = starpu_sched_component_create(tree, "random"); + component->push_task = random_push_task; + component->pull_task = random_pull_task; + return component; +} diff --git a/src/sched_policies/component_sched.c b/src/sched_policies/component_sched.c new file mode 100644 index 0000000..c16b377 --- /dev/null +++ b/src/sched_policies/component_sched.c @@ -0,0 +1,788 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Simon Archipoff + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include + +#include + +#include "sched_component.h" + +/****************************************************************************** + * Generic Scheduling Components' helper functions * + ******************************************************************************/ + +/* + * this function find the best implementation or an implementation + * that need to be calibrated for a worker available and set + * prediction in *length. nan if a implementation need to be + * calibrated, 0.0 if no perf model are available + * return false if no worker on the component can execute that task + */ +int starpu_sched_component_execute_preds(struct starpu_sched_component * component, struct starpu_task * task, double * length) +{ + STARPU_ASSERT(component && task); + int can_execute = 0; + starpu_task_bundle_t bundle = task->bundle; + double len = DBL_MAX; + + int workerid; + for(workerid = starpu_bitmap_first(&component->workers_in_ctx); + workerid != -1; + workerid = starpu_bitmap_next(&component->workers_in_ctx, workerid)) + { + int nimpl; + for(nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++) + { + if(starpu_worker_can_execute_task(workerid,task,nimpl) + || starpu_combined_worker_can_execute_task(workerid, task, nimpl)) + { + double d; + can_execute = 1; + if(bundle) + { + struct starpu_perfmodel_arch* archtype = + starpu_worker_get_perf_archtype(workerid, component->tree->sched_ctx_id); + d = starpu_task_bundle_expected_length(bundle, archtype, nimpl); + } + else + d = starpu_task_worker_expected_length(task, workerid, component->tree->sched_ctx_id, nimpl); + if(isnan(d)) + { + *length = d; + return can_execute; + } + if(_STARPU_IS_ZERO(d)) + { + continue; + } + STARPU_ASSERT_MSG(d >= 0, "workerid=%d, nimpl=%d, bundle=%p, d=%lf\n", workerid, nimpl, bundle, d); + if(d < len) + { + len = d; + } + } + } + if(STARPU_SCHED_COMPONENT_IS_HOMOGENEOUS(component)) + break; + } + + if(len == DBL_MAX) /* we dont have perf model */ + len = 0.0; + if(length) + *length = len; + return can_execute; +} + +/* very similar function that dont compute prediction */ +int starpu_sched_component_can_execute_task(struct starpu_sched_component * component, struct starpu_task * task) +{ + STARPU_ASSERT(task); + STARPU_ASSERT(component); + unsigned nimpl; + int worker; + for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++) + for(worker = starpu_bitmap_first(&component->workers_in_ctx); + -1 != worker; + worker = starpu_bitmap_next(&component->workers_in_ctx, worker)) + if (starpu_worker_can_execute_task(worker, task, nimpl) + || starpu_combined_worker_can_execute_task(worker, task, nimpl)) + { + if (starpu_worker_can_execute_task(worker, task, nimpl) == 1) + _STARPU_EXTRA_DEBUG("worker %d CAN execute task %s with impl %d\n", worker, starpu_task_get_name(task), nimpl); + if (starpu_combined_worker_can_execute_task(worker, task, nimpl) == 1) + _STARPU_EXTRA_DEBUG("combined worker %d CAN execute task %s with impl %d\n", worker, starpu_task_get_name(task), nimpl); + return 1; + } + else + { + if (starpu_worker_can_execute_task(worker, task, nimpl) == 0) + _STARPU_EXTRA_DEBUG("worker %d CANNOT execute task %s with impl %d\n", worker, starpu_task_get_name(task), nimpl); + if (starpu_combined_worker_can_execute_task(worker, task, nimpl) == 0) + _STARPU_EXTRA_DEBUG("combined worker %d CANNOT execute task %s with impl %d\n", worker, starpu_task_get_name(task), nimpl); + } + return 0; +} + +/* compute the average of transfer length for tasks on all workers + * maybe this should be optimised if all workers are under the same numa component + */ +double starpu_sched_component_transfer_length(struct starpu_sched_component * component, struct starpu_task * task) +{ + STARPU_ASSERT(component && task); + int nworkers = starpu_bitmap_cardinal(&component->workers_in_ctx); + double sum = 0.0; + int worker; + if(STARPU_SCHED_COMPONENT_IS_SINGLE_MEMORY_NODE(component)) + { + unsigned memory_node = starpu_worker_get_memory_node(starpu_bitmap_first(&component->workers_in_ctx)); + if(task->bundle) + return starpu_task_bundle_expected_data_transfer_time(task->bundle,memory_node); + else + return starpu_task_expected_data_transfer_time(memory_node, task); + } + + for(worker = starpu_bitmap_first(&component->workers_in_ctx); + worker != -1; + worker = starpu_bitmap_next(&component->workers_in_ctx, worker)) + { + unsigned memory_node = starpu_worker_get_memory_node(worker); + if(task->bundle) + { + sum += starpu_task_bundle_expected_data_transfer_time(task->bundle,memory_node); + } + else + { + sum += starpu_task_expected_data_transfer_time(memory_node, task); + /* sum += starpu_task_expected_conversion_time(task, starpu_worker_get_perf_archtype(worker, component->tree->sched_ctx_id), impl ?) + * I dont know what to do as we dont know what implementation would be used here... + */ + } + } + return sum / nworkers; +} + +/* This function can be called by components when they think that a prefetching request can be submitted. + * For example, it is currently used by the MCT component to begin the prefetching on accelerators + * on which it pushed tasks as soon as possible. + */ +void starpu_sched_component_prefetch_on_node(struct starpu_sched_component * component, struct starpu_task * task) +{ + if (starpu_get_prefetch_flag() && (!task->prefetched) + && (component->properties & STARPU_SCHED_COMPONENT_SINGLE_MEMORY_NODE)) + { + int worker = starpu_bitmap_first(&component->workers_in_ctx); + unsigned memory_node = starpu_worker_get_memory_node(worker); + starpu_prefetch_task_input_on_node(task, memory_node); + } +} + +/* remove all child + * for all child of component, if child->parents[x] == component, set child->parents[x] to null + * call component->deinit_data + */ +void starpu_sched_component_destroy(struct starpu_sched_component *component) +{ + STARPU_ASSERT(component); + unsigned i,j; + for(i = 0; i < component->nchildren; i++) + { + struct starpu_sched_component * child = component->children[i]; + for(j = 0; j < child->nparents; j++) + if(child->parents[j] == component) + child->remove_parent(child,component); + + } + while(component->nchildren != 0) + component->remove_child(component, component->children[0]); + for(i = 0; i < component->nparents; i++) + { + struct starpu_sched_component * parent = component->parents[i]; + for(j = 0; j < parent->nchildren; j++) + if(parent->children[j] == component) + parent->remove_child(parent,component); + + } + while(component->nparents != 0) + component->remove_parent(component, component->parents[0]); + component->deinit_data(component); + free(component->children); + free(component->parents); + free(component->name); + free(component); +} + +void starpu_sched_component_destroy_rec(struct starpu_sched_component * component) +{ + if(component == NULL) + return; + + unsigned i = 0; + while(i < component->nchildren) + { + if (starpu_sched_component_is_worker(component->children[i])) + i++; + else + starpu_sched_component_destroy_rec(component->children[i]); + } + + if (!starpu_sched_component_is_worker(component)) + starpu_sched_component_destroy(component); +} + +static void set_properties(struct starpu_sched_component * component) +{ + STARPU_ASSERT(component); + component->properties = 0; + + int worker = starpu_bitmap_first(&component->workers_in_ctx); + if (worker == -1) + return; + if (starpu_worker_is_combined_worker(worker)) + return; +#ifdef STARPU_DEVEL +#warning FIXME: Not all CUDA devices have the same speed +#endif + uint32_t first_worker = _starpu_get_worker_struct(worker)->worker_mask; + unsigned first_memory_node = _starpu_get_worker_struct(worker)->memory_node; + int is_homogeneous = 1; + int is_all_same_component = 1; + for(; + worker != -1; + worker = starpu_bitmap_next(&component->workers_in_ctx, worker)) + { + if(starpu_worker_is_combined_worker(worker)) + continue; + if(first_worker != _starpu_get_worker_struct(worker)->worker_mask) + is_homogeneous = 0; + if(first_memory_node != _starpu_get_worker_struct(worker)->memory_node) + is_all_same_component = 0; + } + + + if(is_homogeneous) + component->properties |= STARPU_SCHED_COMPONENT_HOMOGENEOUS; + if(is_all_same_component) + component->properties |= STARPU_SCHED_COMPONENT_SINGLE_MEMORY_NODE; +} + + +/* recursively set the component->workers member of component's subtree + */ +void _starpu_sched_component_update_workers(struct starpu_sched_component * component) +{ + STARPU_ASSERT(component); + if(starpu_sched_component_is_worker(component)) + return; + starpu_bitmap_unset_all(&component->workers); + unsigned i; + for(i = 0; i < component->nchildren; i++) + { + _starpu_sched_component_update_workers(component->children[i]); + starpu_bitmap_or(&component->workers, &component->children[i]->workers); + } + component->notify_change_workers(component); +} + +/* recursively set the component->workers_in_ctx in component's subtree + */ +void _starpu_sched_component_update_workers_in_ctx(struct starpu_sched_component * component, unsigned sched_ctx_id) +{ + STARPU_ASSERT(component); + /* worker components are shared among sched_ctxs, thus we do not apply the sched_ctx worker mask to them. + * per-ctx filtering is performed higher in the tree */ + if(starpu_sched_component_is_worker(component)) + return; + struct starpu_bitmap * workers_in_ctx = _starpu_get_worker_mask(sched_ctx_id); + starpu_bitmap_unset_and(&component->workers_in_ctx,&component->workers, workers_in_ctx); + unsigned i,j; + for(i = starpu_worker_get_count(); i < starpu_worker_get_count() + starpu_combined_worker_get_count(); i++) + { + if (starpu_bitmap_get(&component->workers, i)) + { + /* Component has this combined worker, check whether the + * context has all the corresponding workers */ + int worker_size; + int *combined_workerid; + starpu_combined_worker_get_description(i, &worker_size, &combined_workerid); + for (j = 0; j < (unsigned) worker_size; j++) + if (!starpu_bitmap_get(workers_in_ctx, combined_workerid[j])) + goto nocombined; + /* We have all workers, add it */ + starpu_bitmap_set(&component->workers_in_ctx, i); + } +nocombined: + (void)0; + } + for(i = 0; i < component->nchildren; i++) + { + struct starpu_sched_component * child = component->children[i]; + _starpu_sched_component_update_workers_in_ctx(child, sched_ctx_id); + } + set_properties(component); + component->notify_change_workers(component); +} + + + +/****************************************************************************** + * Scheduling Trees' helper functions * + ******************************************************************************/ + + + +struct starpu_bitmap * _starpu_get_worker_mask(unsigned sched_ctx_id) +{ + STARPU_ASSERT(sched_ctx_id < STARPU_NMAX_SCHED_CTXS); + struct starpu_sched_tree * t = starpu_sched_ctx_get_policy_data(sched_ctx_id); + STARPU_ASSERT(t); + return &t->workers; +} + +void starpu_sched_tree_update_workers_in_ctx(struct starpu_sched_tree * t) +{ + STARPU_ASSERT(t); + if (t->root) + _starpu_sched_component_update_workers_in_ctx(t->root, t->sched_ctx_id); +} + +void starpu_sched_tree_update_workers(struct starpu_sched_tree * t) +{ + STARPU_ASSERT(t); + if (t->root) + _starpu_sched_component_update_workers(t->root); +} + + + +/****************************************************************************** + * Scheduling Trees' Functions * + * Most of them are used to define the starpu_sched_policy interface * + ******************************************************************************/ + +void starpu_sched_component_connect(struct starpu_sched_component *parent, struct starpu_sched_component *child) +{ + parent->add_child(parent, child); + child->add_parent(child, parent); + _STARPU_TRACE_SCHED_COMPONENT_CONNECT(parent,child); +} + +int starpu_sched_tree_push_task(struct starpu_task * task) +{ + STARPU_ASSERT(task); + unsigned sched_ctx_id = task->sched_ctx; + struct starpu_sched_tree *tree = starpu_sched_ctx_get_policy_data(sched_ctx_id); + + int ret_val = starpu_sched_component_push_task(NULL, tree->root,task); + + /* Modular schedulers are not supposed to refuse tasks */ + STARPU_ASSERT(!ret_val); + + return 0; +} + +int starpu_sched_component_push_task(struct starpu_sched_component *from STARPU_ATTRIBUTE_UNUSED, struct starpu_sched_component *to, struct starpu_task *task) +{ + int pushback; + int priority = task->priority; + pushback = to->push_task(to, task); + if (!pushback) + _STARPU_TRACE_SCHED_COMPONENT_PUSH(from, to, task, priority); + return pushback; +} + +struct starpu_task * starpu_sched_tree_pop_task(unsigned sched_ctx) +{ + unsigned workerid = starpu_worker_get_id_check(); + struct starpu_sched_component * component = starpu_sched_component_worker_get(sched_ctx, workerid); + + /* _starpu_sched_component_lock_worker(workerid) is called by component->pull_task() + */ + struct starpu_task * task = starpu_sched_component_pull_task(component,NULL); + return task; +} + +struct starpu_task * starpu_sched_component_pull_task(struct starpu_sched_component *from, struct starpu_sched_component *to) +{ + struct starpu_task *task = from->pull_task(from, to); + if (task) + _STARPU_TRACE_SCHED_COMPONENT_PULL(from, to, task); + return task; +} + + +/* + * Pump mechanic to get the task flow rolling. Takes tasks from + * component and send them to the child. + * To be used by components with only one child +*/ +struct starpu_task* starpu_sched_component_pump_to(struct starpu_sched_component *component, struct starpu_sched_component *child, int* success) +{ + int ret = 0; + + struct starpu_task * task; + + while (1) + { + task = component->pull_task(component,child); + if (!task) + break; + ret = starpu_sched_component_push_task(component,child,task); + if (ret) + break; + if(success) + * success = 1; + } + if(task && ret) + /* Return the task which couldn't actually be pushed */ + return task; + + return NULL; + +} + +struct starpu_task* starpu_sched_component_pump_downstream(struct starpu_sched_component *component, int* success) +{ + STARPU_ASSERT(component->nchildren == 1); + return starpu_sched_component_pump_to(component, component->children[0], success); +} + +void starpu_sched_tree_add_workers(unsigned sched_ctx_id, int *workerids, unsigned nworkers) +{ + STARPU_ASSERT(sched_ctx_id < STARPU_NMAX_SCHED_CTXS); + STARPU_ASSERT(workerids); + struct starpu_sched_tree * t = starpu_sched_ctx_get_policy_data(sched_ctx_id); + + STARPU_COMPONENT_MUTEX_LOCK(&t->lock); + _starpu_sched_component_lock_all_workers(); + + unsigned i; + for(i = 0; i < nworkers; i++) + starpu_bitmap_set(&t->workers, workerids[i]); + + starpu_sched_tree_update_workers_in_ctx(t); + + _starpu_sched_component_unlock_all_workers(); + STARPU_COMPONENT_MUTEX_UNLOCK(&t->lock); +} + +void starpu_sched_tree_remove_workers(unsigned sched_ctx_id, int *workerids, unsigned nworkers) +{ + STARPU_ASSERT(sched_ctx_id < STARPU_NMAX_SCHED_CTXS); + STARPU_ASSERT(workerids); + struct starpu_sched_tree * t = starpu_sched_ctx_get_policy_data(sched_ctx_id); + + STARPU_COMPONENT_MUTEX_LOCK(&t->lock); + _starpu_sched_component_lock_all_workers(); + + unsigned i; + for(i = 0; i < nworkers; i++) + starpu_bitmap_unset(&t->workers, workerids[i]); + + starpu_sched_tree_update_workers_in_ctx(t); + + _starpu_sched_component_unlock_all_workers(); + STARPU_COMPONENT_MUTEX_UNLOCK(&t->lock); +} + +static void _starpu_sched_tree_do_schedule(struct starpu_sched_component *component) +{ + unsigned i; + + if (component->do_schedule) + component->do_schedule(component); + + for (i = 0; i < component->nchildren; i++) + _starpu_sched_tree_do_schedule(component->children[i]); +} + +void starpu_sched_tree_do_schedule(unsigned sched_ctx_id) +{ + STARPU_ASSERT(sched_ctx_id < STARPU_NMAX_SCHED_CTXS); + struct starpu_sched_tree * t = starpu_sched_ctx_get_policy_data(sched_ctx_id); + + if (t->root) + _starpu_sched_tree_do_schedule(t->root); +} + +static struct starpu_sched_tree *trees[STARPU_NMAX_SCHED_CTXS]; + +struct starpu_sched_tree * starpu_sched_tree_create(unsigned sched_ctx_id) +{ + STARPU_ASSERT(sched_ctx_id < STARPU_NMAX_SCHED_CTXS); + STARPU_ASSERT(!trees[sched_ctx_id]); + struct starpu_sched_tree *t; + _STARPU_CALLOC(t, 1, sizeof(*t)); + t->sched_ctx_id = sched_ctx_id; + starpu_bitmap_init(&t->workers); + STARPU_PTHREAD_MUTEX_INIT(&t->lock,NULL); + trees[sched_ctx_id] = t; + return t; +} + +void starpu_sched_tree_destroy(struct starpu_sched_tree * tree) +{ + STARPU_ASSERT(tree); + STARPU_ASSERT(trees[tree->sched_ctx_id] == tree); + trees[tree->sched_ctx_id] = NULL; + if(tree->root) + starpu_sched_component_destroy_rec(tree->root); + STARPU_PTHREAD_MUTEX_DESTROY(&tree->lock); + free(tree); +} + +struct starpu_sched_tree * starpu_sched_tree_get(unsigned sched_ctx_id) +{ + return trees[sched_ctx_id]; +} + +void starpu_sched_tree_deinitialize(unsigned sched_ctx_id) +{ + struct starpu_sched_tree *t = (struct starpu_sched_tree*)starpu_sched_ctx_get_policy_data(sched_ctx_id); + + starpu_sched_tree_destroy(t); +} + + +/****************************************************************************** + * Interface Functions for Generic Scheduling Components * + ******************************************************************************/ + + + +void starpu_sched_component_add_child(struct starpu_sched_component* component, struct starpu_sched_component * child) +{ + STARPU_ASSERT(component && child); + STARPU_ASSERT(!starpu_sched_component_is_simple_worker(component)); + unsigned i; + for(i = 0; i < component->nchildren; i++) + { + STARPU_ASSERT(component->children[i] != component); + STARPU_ASSERT(component->children[i] != NULL); + } + + _STARPU_REALLOC(component->children, sizeof(struct starpu_sched_component *) * (component->nchildren + 1)); + component->children[component->nchildren] = child; + component->nchildren++; +} + +static void starpu_sched_component_remove_child(struct starpu_sched_component * component, struct starpu_sched_component * child) +{ + STARPU_ASSERT(component && child); + STARPU_ASSERT(!starpu_sched_component_is_simple_worker(component)); + unsigned pos; + for(pos = 0; pos < component->nchildren; pos++) + if(component->children[pos] == child) + break; + STARPU_ASSERT(pos != component->nchildren); + component->children[pos] = component->children[--component->nchildren]; +} + +static void starpu_sched_component_add_parent(struct starpu_sched_component* component, struct starpu_sched_component * parent) +{ + STARPU_ASSERT(component && parent); + unsigned i; + for(i = 0; i < component->nparents; i++) + { + STARPU_ASSERT(component->parents[i] != component); + STARPU_ASSERT(component->parents[i] != NULL); + } + + _STARPU_REALLOC(component->parents, sizeof(struct starpu_sched_component *) * (component->nparents + 1)); + component->parents[component->nparents] = parent; + component->nparents++; +} + +static void starpu_sched_component_remove_parent(struct starpu_sched_component * component, struct starpu_sched_component * parent) +{ + STARPU_ASSERT(component && parent); + unsigned pos; + for(pos = 0; pos < component->nparents; pos++) + if(component->parents[pos] == parent) + break; + STARPU_ASSERT(pos != component->nparents); + component->parents[pos] = component->parents[--component->nparents]; +} + +/* default implementation for component->pull_task() + * just perform a recursive call on parent + */ +struct starpu_task * starpu_sched_component_parents_pull_task(struct starpu_sched_component * component, struct starpu_sched_component * to STARPU_ATTRIBUTE_UNUSED) +{ + STARPU_ASSERT(component); + struct starpu_task * task = NULL; + unsigned i; + for(i=0; i < component->nparents; i++) + { + if(component->parents[i] == NULL) + continue; + else + { + task = starpu_sched_component_pull_task(component->parents[i], component); + if(task) + break; + } + } + return task; +} + +/* The default implementation of the can_push function is a recursive call to its parents. + * A personally-made can_push in a component (like in prio components) is necessary to catch + * this recursive call somewhere, if the user wants to exploit it. + */ +int starpu_sched_component_can_push(struct starpu_sched_component * component, struct starpu_sched_component * to STARPU_ATTRIBUTE_UNUSED) +{ + STARPU_ASSERT(component); + int ret = 0; + if(component->nparents > 0) + { + unsigned i; + for(i=0; i < component->nparents; i++) + { + struct starpu_sched_component * parent = component->parents[i]; + if(parent != NULL) + ret = parent->can_push(parent, component); + if(ret) + break; + } + } + return ret; +} + +/* A can_pull call will try to wake up one worker associated to the children of the + * component. It is currently called by components which holds a queue (like fifo and prio + * components) to signify its children that a task has been pushed on its local queue. + */ +int starpu_sched_component_can_pull(struct starpu_sched_component * component) +{ + STARPU_ASSERT(component); + STARPU_ASSERT(!starpu_sched_component_is_worker(component)); + unsigned i; + for(i = 0; i < component->nchildren; i++) + { + if (component->children[i]->can_pull(component->children[i])) + return 1; + } + return 0; +} + +/* A can_pull call will try to wake up one worker associated to the children of the + * component. It is currently called by components which holds a queue (like fifo and prio + * components) to signify its children that a task has been pushed on its local queue. + */ +int starpu_sched_component_can_pull_all(struct starpu_sched_component * component) +{ + STARPU_ASSERT(component); + STARPU_ASSERT(!starpu_sched_component_is_worker(component)); + unsigned i; + for(i = 0; i < component->nchildren; i++) + component->children[i]->can_pull(component->children[i]); + return 0; +} + +/* Alternative can_pull which says that this component does not want + * to pull but prefers that you push. It can be used by decision + * components, in which decisions are usually taken in their push() + * functions +*/ +int starpu_sched_component_send_can_push_to_parents(struct starpu_sched_component * component) +{ + STARPU_ASSERT(component); + STARPU_ASSERT(!starpu_sched_component_is_worker(component)); + + unsigned i; + int ret = 0; + for(i=0; i < component->nparents; i++) + { + if(component->parents[i] == NULL) + continue; + else + { + ret = component->parents[i]->can_push(component->parents[i], component); + if(ret) + break; + } + } + return ret != 0; +} + +double starpu_sched_component_estimated_load(struct starpu_sched_component * component) +{ + double sum = 0.0; + unsigned i; + for(i = 0; i < component->nchildren; i++) + { + struct starpu_sched_component * c = component->children[i]; + sum += c->estimated_load(c); + } + return sum; +} + +double starpu_sched_component_estimated_end_min_add(struct starpu_sched_component * component, double exp_len) +{ + STARPU_ASSERT(component); + double min = DBL_MAX; + unsigned i; + double ends[component->nchildren]; + for(i = 0; i < component->nchildren; i++) + { + double tmp = ends[i] = component->children[i]->estimated_end(component->children[i]); + if(tmp < min) + min = tmp; + } + if (exp_len > 0) + { + /* We don't know which workers will do this, assume it will be + * evenly distributed to existing work */ + int card = starpu_bitmap_cardinal(&component->workers_in_ctx); + if (card == 0) + /* Oops, no resources to compute our tasks. Let's just hope that + * we will be given one at some point */ + card = 1; + for(i = 0; i < component->nchildren; i++) + { + exp_len += ends[i] - min; + } + min += exp_len / card; + } + return min; +} + +double starpu_sched_component_estimated_end_min(struct starpu_sched_component * component) +{ + return starpu_sched_component_estimated_end_min_add(component, 0.); +} + +double starpu_sched_component_estimated_end_average(struct starpu_sched_component * component) +{ + STARPU_ASSERT(component); + double sum = 0.0; + unsigned i; + for(i = 0; i < component->nchildren; i++) + sum += component->children[i]->estimated_end(component->children[i]); + return sum / component->nchildren; +} + +static void take_component_and_does_nothing(struct starpu_sched_component * component STARPU_ATTRIBUTE_UNUSED) +{ +} + +struct starpu_sched_component * starpu_sched_component_create(struct starpu_sched_tree *tree, const char *name) +{ + struct starpu_sched_component *component; + _STARPU_CALLOC(component, 1, sizeof(*component)); + component->tree = tree; + starpu_bitmap_init(&component->workers); + starpu_bitmap_init(&component->workers_in_ctx); + component->add_child = starpu_sched_component_add_child; + component->remove_child = starpu_sched_component_remove_child; + component->add_parent = starpu_sched_component_add_parent; + component->remove_parent = starpu_sched_component_remove_parent; + component->pull_task = starpu_sched_component_parents_pull_task; + component->can_push = starpu_sched_component_can_push; + component->can_pull = starpu_sched_component_can_pull; + component->estimated_load = starpu_sched_component_estimated_load; + component->estimated_end = starpu_sched_component_estimated_end_min; + component->deinit_data = take_component_and_does_nothing; + component->notify_change_workers = take_component_and_does_nothing; + component->name = strdup(name); + _STARPU_TRACE_SCHED_COMPONENT_NEW(component); + return component; +} diff --git a/src/sched_policies/component_stage.c b/src/sched_policies/component_stage.c new file mode 100644 index 0000000..6d38bc3 --- /dev/null +++ b/src/sched_policies/component_stage.c @@ -0,0 +1,59 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* This component takes tasks from its parents in the parent order. + * It can be useful to make scheduling stages, pushing tasks of different stages + * to different schedulers, and this component will pick them up in the right + * order. */ + +#include +#include + +static int stage_push_task(struct starpu_sched_component * component, struct starpu_task * task) +{ + _STARPU_DISP("stage component is not supposed to be pushed to...\n"); + STARPU_ASSERT(component->nchildren == 1); + return starpu_sched_component_push_task(component, component->children[0], task); +} + +static int stage_can_push(struct starpu_sched_component * component, struct starpu_sched_component * to STARPU_ATTRIBUTE_UNUSED) +{ + _STARPU_DISP("stage component is not supposed to be pushed to...\n"); + return starpu_sched_component_can_push(component, to); +} + +static struct starpu_task * stage_pull_task(struct starpu_sched_component * component, struct starpu_sched_component * to STARPU_ATTRIBUTE_UNUSED) +{ + struct starpu_task *task; + task = starpu_sched_component_parents_pull_task(component, to); + return task; +} + +int starpu_sched_component_is_stage(struct starpu_sched_component * component) +{ + return component->push_task == stage_push_task; +} + +struct starpu_sched_component * starpu_sched_component_stage_create(struct starpu_sched_tree *tree, void *args STARPU_ATTRIBUTE_UNUSED) +{ + struct starpu_sched_component *component = starpu_sched_component_create(tree, "stage"); + component->push_task = stage_push_task; + /* The default implementation happens to be doing staged pull from parents */ + component->pull_task = stage_pull_task; + component->can_push = stage_can_push; + + return component; +} diff --git a/src/sched_policies/component_userchoice.c b/src/sched_policies/component_userchoice.c new file mode 100644 index 0000000..0d86ab5 --- /dev/null +++ b/src/sched_policies/component_userchoice.c @@ -0,0 +1,56 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* This component uses (uintptr_t) tasks->sched_data as the child number it + * should push its tasks to. It can thus be used to let the user choose which + * scheduler a task should go to. */ + +#include +#include + +static int userchoice_push_task(struct starpu_sched_component * component, struct starpu_task * task) +{ + unsigned target = (uintptr_t) task->sched_data; + STARPU_ASSERT(target < component->nchildren); + return starpu_sched_component_push_task(component, component->children[target], task); +} + +static struct starpu_task * userchoice_pull_task(struct starpu_sched_component * component, struct starpu_sched_component * to STARPU_ATTRIBUTE_UNUSED) +{ + _STARPU_DISP("stage component is not supposed to be pull from...\n"); + return starpu_sched_component_parents_pull_task(component, to); +} + +static int userchoice_can_pull(struct starpu_sched_component * component) +{ + _STARPU_DISP("stage component is not supposed to be pull from...\n"); + return starpu_sched_component_can_pull(component); +} + +int starpu_sched_component_is_userchoice(struct starpu_sched_component * component) +{ + return component->push_task == userchoice_push_task; +} + +struct starpu_sched_component * starpu_sched_component_userchoice_create(struct starpu_sched_tree *tree, void *args STARPU_ATTRIBUTE_UNUSED) +{ + struct starpu_sched_component *component = starpu_sched_component_create(tree, "userchoice"); + component->push_task = userchoice_push_task; + component->pull_task = userchoice_pull_task; + component->can_pull = userchoice_can_pull; + + return component; +} diff --git a/src/sched_policies/component_work_stealing.c b/src/sched_policies/component_work_stealing.c new file mode 100644 index 0000000..8ba0c3e --- /dev/null +++ b/src/sched_policies/component_work_stealing.c @@ -0,0 +1,405 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Simon Archipoff + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef STARPU_DEVEL +#warning TODO: locality work-stealing +#endif + +struct _starpu_component_work_stealing_data_per_worker +{ + struct starpu_st_prio_deque fifo; + unsigned last_pop_child; +}; + +struct _starpu_component_work_stealing_data +{ +/* keep track of the work performed from the beginning of the algorithm to make + * better decisions about which queue to child when stealing or deferring work + */ + struct _starpu_component_work_stealing_data_per_worker *per_worker; + unsigned performed_total, last_push_child; + + starpu_pthread_mutex_t ** mutexes; + unsigned size; +}; + + +/** + * steal a task in a round robin way + * return NULL if none available + */ +static struct starpu_task * steal_task_round_robin(struct starpu_sched_component *component, int workerid) +{ + struct _starpu_component_work_stealing_data *wsd = component->data; + unsigned i = wsd->per_worker[workerid].last_pop_child; + wsd->per_worker[workerid].last_pop_child = (i + 1) % component->nchildren; + /* If the worker's queue have no suitable tasks, let's try + * the next ones */ + struct starpu_task * task = NULL; + while (1) + { + struct starpu_st_prio_deque * fifo = &wsd->per_worker[i].fifo; + + STARPU_COMPONENT_MUTEX_LOCK(wsd->mutexes[i]); + task = starpu_st_prio_deque_deque_task_for_worker(fifo, workerid, NULL); + if(task && !isnan(task->predicted)) + { + fifo->exp_len -= task->predicted; + fifo->nprocessed--; + } + STARPU_COMPONENT_MUTEX_UNLOCK(wsd->mutexes[i]); + if(task) + { + starpu_sched_task_break(task); + break; + } + + if (i == wsd->per_worker[workerid].last_pop_child) + { + /* We got back to the first worker, + * don't go in infinite loop */ + return NULL; + } + i = (i + 1) % component->nchildren; + + } + return task; +} + +/** + * Return a worker to whom add a task. + * Selecting a worker is done in a round-robin fashion. + */ +static unsigned select_worker_round_robin(struct starpu_sched_component * component) +{ + struct _starpu_component_work_stealing_data *ws = (struct _starpu_component_work_stealing_data*)component->data; + unsigned i = (ws->last_push_child + 1) % component->nchildren ; + ws->last_push_child = i; + return i; +} + + +/** + * Return a worker from which a task can be stolen. + * This is a phony function used to call the right + * function depending on the value of USE_OVERLOAD. + */ +static inline struct starpu_task * steal_task(struct starpu_sched_component * component, int workerid) +{ + return steal_task_round_robin(component, workerid); +} + +/** + * Return a worker from which a task can be stolen. + * This is a phony function used to call the right + * function depending on the value of USE_OVERLOAD. + */ +static inline unsigned select_worker(struct starpu_sched_component * component) +{ + return select_worker_round_robin(component); +} + + +static int is_worker_of_component(struct starpu_sched_component * component, int workerid) +{ + return starpu_bitmap_get(&component->workers, workerid); +} + + + +static struct starpu_task * pull_task(struct starpu_sched_component * component, struct starpu_sched_component * to STARPU_ATTRIBUTE_UNUSED) +{ + unsigned workerid = starpu_worker_get_id_check(); + unsigned i; + for(i = 0; i < component->nchildren; i++) + { + if(is_worker_of_component(component->children[i], workerid)) + break; + } + STARPU_ASSERT(i < component->nchildren); + struct _starpu_component_work_stealing_data * wsd = component->data; + const double now = starpu_timing_now(); + STARPU_COMPONENT_MUTEX_LOCK(wsd->mutexes[i]); + struct starpu_task * task = starpu_st_prio_deque_pop_task(&wsd->per_worker[i].fifo); + if(task) + { + if(!isnan(task->predicted)) + { + wsd->per_worker[i].fifo.exp_len -= task->predicted; + wsd->per_worker[i].fifo.exp_start = now + task->predicted; + } + } + else + wsd->per_worker[i].fifo.exp_len = 0.0; + + STARPU_COMPONENT_MUTEX_UNLOCK(wsd->mutexes[i]); + if(task) + { + return task; + } + + task = steal_task(component, workerid); + if(task) + { + STARPU_COMPONENT_MUTEX_LOCK(wsd->mutexes[i]); + wsd->per_worker[i].fifo.nprocessed++; + STARPU_COMPONENT_MUTEX_UNLOCK(wsd->mutexes[i]); + + return task; + } + for(i=0; i < component->nparents; i++) + { + if(component->parents[i] == NULL) + continue; + else + { + task = starpu_sched_component_pull_task(component->parents[i],component); + if(task) + break; + } + } + if(task) + return task; + else + return NULL; +} + +static double _ws_estimated_end(struct starpu_sched_component * component) +{ + STARPU_ASSERT(starpu_sched_component_is_work_stealing(component)); + struct _starpu_component_work_stealing_data * wsd = component->data; + double sum_len = 0.0; + double sum_start = 0.0; + unsigned i; + const double now = starpu_timing_now(); + for(i = 0; i < component->nchildren; i++) + { + STARPU_COMPONENT_MUTEX_LOCK(wsd->mutexes[i]); + sum_len += wsd->per_worker[i].fifo.exp_len; + wsd->per_worker[i].fifo.exp_start = STARPU_MAX(now, wsd->per_worker[i].fifo.exp_start); + sum_start += wsd->per_worker[i].fifo.exp_start; + STARPU_COMPONENT_MUTEX_UNLOCK(wsd->mutexes[i]); + + } + int nb_workers = starpu_bitmap_cardinal(&component->workers_in_ctx); + + return (sum_start + sum_len) / nb_workers; +} + +static double _ws_estimated_load(struct starpu_sched_component * component) +{ + STARPU_ASSERT(starpu_sched_component_is_work_stealing(component)); + struct _starpu_component_work_stealing_data * wsd = component->data; + int ntasks = 0; + unsigned i; + for(i = 0; i < component->nchildren; i++) + { + STARPU_COMPONENT_MUTEX_LOCK(wsd->mutexes[i]); + ntasks += wsd->per_worker[i].fifo.ntasks; + STARPU_COMPONENT_MUTEX_UNLOCK(wsd->mutexes[i]); + } + double speedup = 0.0; + int workerid; + for(workerid = starpu_bitmap_first(&component->workers_in_ctx); + -1 != workerid; + workerid = starpu_bitmap_next(&component->workers_in_ctx, workerid)) + { + speedup += starpu_worker_get_relative_speedup(starpu_worker_get_perf_archtype(workerid, component->tree->sched_ctx_id)); + } + + return ntasks / speedup; +} + +static int push_task(struct starpu_sched_component * component, struct starpu_task * task) +{ + struct _starpu_component_work_stealing_data * wsd = component->data; + int ret; + unsigned i = wsd->last_push_child; + int found = 0; + + /* Find a child component that can execute this task */ + i = (i+1)%component->nchildren; + while(1) + { + int workerid; + for(workerid = starpu_bitmap_first(&component->children[i]->workers_in_ctx); + -1 != workerid; + workerid = starpu_bitmap_next(&component->children[i]->workers_in_ctx, workerid)) + { + unsigned impl; + int can_execute = starpu_worker_can_execute_task_first_impl(workerid, task, &impl); + if (can_execute) + { + /* Found one, set the implementation by the way */ + starpu_task_set_implementation(task, impl); + found = 1; + break; + } + } + if (found) + break; + STARPU_ASSERT_MSG(i != wsd->last_push_child, "Could not find child able to execute this task"); + i = (i+1)%component->nchildren; + } + + STARPU_COMPONENT_MUTEX_LOCK(wsd->mutexes[i]); + starpu_sched_task_break(task); + ret = starpu_st_prio_deque_push_front_task(&wsd->per_worker[i].fifo, task); + STARPU_COMPONENT_MUTEX_UNLOCK(wsd->mutexes[i]); + + wsd->last_push_child = i; + starpu_sched_component_can_pull_all(component); + return ret; +} + + +//this function is special, when a worker call it, we want to push the task in his fifo +int starpu_sched_tree_work_stealing_push_task(struct starpu_task *task) +{ + int workerid = starpu_worker_get_id(); + if(workerid == -1) + return starpu_sched_tree_push_task(task); + + /* Check that we can execute it */ + unsigned impl; + int can_execute = starpu_worker_can_execute_task_first_impl(workerid, task, &impl); + if (!can_execute) + return starpu_sched_tree_push_task(task); + + /* Ok, use that implementation */ + starpu_task_set_implementation(task, impl); + + unsigned sched_ctx_id = task->sched_ctx; + struct starpu_sched_component * component =starpu_sched_component_worker_get(sched_ctx_id, workerid); + while(sched_ctx_id < component->nparents && component->parents[sched_ctx_id] != NULL) + { + component = component->parents[sched_ctx_id]; + if(starpu_sched_component_is_work_stealing(component)) + { + if(!starpu_sched_component_can_execute_task(component, task)) + return starpu_sched_tree_push_task(task); + + unsigned i; + for(i = 0; i < component->nchildren; i++) + if(is_worker_of_component(component->children[i], workerid)) + break; + STARPU_ASSERT(i < component->nchildren); + + struct _starpu_component_work_stealing_data * wsd = component->data; + STARPU_COMPONENT_MUTEX_LOCK(wsd->mutexes[i]); + int ret = starpu_st_prio_deque_push_front_task(&wsd->per_worker[i].fifo , task); + if(ret == 0 && !isnan(task->predicted)) + wsd->per_worker[i].fifo.exp_len += task->predicted; + STARPU_COMPONENT_MUTEX_UNLOCK(wsd->mutexes[i]); + + component->can_pull(component); + return ret; + } + } + + return starpu_sched_tree_push_task(task); +} + + +static void _ws_add_child(struct starpu_sched_component * component, struct starpu_sched_component * child) +{ + struct _starpu_component_work_stealing_data * wsd = component->data; + starpu_sched_component_add_child(component, child); + if(wsd->size < component->nchildren) + { + STARPU_ASSERT(wsd->size == component->nchildren - 1); + _STARPU_REALLOC(wsd->per_worker, component->nchildren * sizeof(*wsd->per_worker)); + _STARPU_REALLOC(wsd->mutexes, component->nchildren * sizeof(*wsd->mutexes)); + wsd->size = component->nchildren; + } + + wsd->per_worker[component->nchildren - 1].last_pop_child = 0; + starpu_st_prio_deque_init(&wsd->per_worker[component->nchildren - 1].fifo); + + starpu_pthread_mutex_t *mutex; + _STARPU_MALLOC(mutex, sizeof(*mutex)); + STARPU_PTHREAD_MUTEX_INIT(mutex,NULL); + wsd->mutexes[component->nchildren - 1] = mutex; +} + +static void _ws_remove_child(struct starpu_sched_component * component, struct starpu_sched_component * child) +{ + struct _starpu_component_work_stealing_data * wsd = component->data; + + STARPU_PTHREAD_MUTEX_DESTROY(wsd->mutexes[component->nchildren - 1]); + free(wsd->mutexes[component->nchildren - 1]); + + unsigned i_component; + for(i_component = 0; i_component < component->nchildren; i_component++) + { + if(component->children[i_component] == child) + break; + } + STARPU_ASSERT(i_component != component->nchildren); + struct starpu_st_prio_deque tmp_fifo = wsd->per_worker[i_component].fifo; + wsd->per_worker[i_component].fifo = wsd->per_worker[component->nchildren - 1].fifo; + + + component->children[i_component] = component->children[component->nchildren - 1]; + component->nchildren--; + struct starpu_task * task; + while ((task = starpu_st_prio_deque_pop_task(&tmp_fifo))) + { + starpu_sched_component_push_task(NULL, component, task); + } +} + +static void _work_stealing_component_deinit_data(struct starpu_sched_component * component) +{ + struct _starpu_component_work_stealing_data * wsd = component->data; + free(wsd->per_worker); + free(wsd->mutexes); + free(wsd); +} + +int starpu_sched_component_is_work_stealing(struct starpu_sched_component * component) +{ + return component->push_task == push_task; +} + +struct starpu_sched_component * starpu_sched_component_work_stealing_create(struct starpu_sched_tree *tree, void *arg) +{ + (void)arg; + struct starpu_sched_component *component = starpu_sched_component_create(tree, "work_stealing"); + struct _starpu_component_work_stealing_data *wsd; + _STARPU_CALLOC(wsd, 1, sizeof(*wsd)); + component->pull_task = pull_task; + component->push_task = push_task; + component->add_child = _ws_add_child; + component->remove_child = _ws_remove_child; + component->estimated_end = _ws_estimated_end; + component->estimated_load = _ws_estimated_load; + component->deinit_data = _work_stealing_component_deinit_data; + component->data = wsd; + return component; +} diff --git a/src/sched_policies/component_worker.c b/src/sched_policies/component_worker.c new file mode 100644 index 0000000..b7a87da --- /dev/null +++ b/src/sched_policies/component_worker.c @@ -0,0 +1,882 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Simon Archipoff + * Copyright (C) 2011-2011 Télécom Sud Paris + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include + +#include + +/* data structure for worker's queue look like this : + * W = worker + * T = simple task + * P = parallel task + * + * + * P--P T + * | | \| + * P--P T T P T + * | | | | | | + * T T P--P--P T + * | | | | | | + * W W W W W W + * + * + * + * its possible that a _starpu_task_grid won't have task, because it have been + * poped by a worker. + * + * N = no task + * + * T T T + * | | | + * P--N--N + * | | | + * W W W + * + * + * this API is a little asymmetric : struct _starpu_task_grid are allocated by the caller and freed by the data structure + * + */ + + + +/****************************************************************************** + * Worker Components' Data Structures * + *****************************************************************************/ + + + +struct _starpu_task_grid +{ + /* this member may be NULL if a worker have poped it but its a + * parallel task and we dont want mad pointers + */ + struct starpu_task * task; + + struct _starpu_task_grid *up, *down, *left, *right; + + /* this is used to count the number of task to be poped by a worker + * the leftist _starpu_task_grid maintain the ntasks counter (ie .left == NULL), + * all the others use the pntasks that point to it + * + * when the counter reach 0, all the left and right member are set to NULL, + * that mean that we will free that components. + */ + union + { + int ntasks; + int * pntasks; + }; +}; + + +/* list->exp_start, list->exp_len, list-exp_end and list->ntasks + * are updated by starpu_sched_component_worker_push_task(component, task) and pre_exec_hook + */ +struct _starpu_worker_task_list +{ + double exp_start, exp_len, exp_end, pipeline_len; + struct _starpu_task_grid *first, *last; + unsigned ntasks, pipeline_ntasks; + starpu_pthread_mutex_t mutex; +}; + +/* This is called when a transfer request is actually pushed to the worker */ +static void _starpu_worker_task_list_transfer_started(struct _starpu_worker_task_list *l, struct starpu_task *task) +{ + double transfer_model = task->predicted_transfer; + if (isnan(transfer_model)) + return; + + /* We now start the transfer, move it from predicted to pipelined */ + l->exp_len -= transfer_model; + l->pipeline_len += transfer_model; + l->exp_start = starpu_timing_now() + l->pipeline_len; + l->exp_end = l->exp_start + l->exp_len; +} + +#ifdef STARPU_DEVEL +#warning FIXME: merge with deque_modeling_policy_data_aware +#endif +/* This is called when a task is actually pushed to the worker (i.e. the transfer finished */ +static void _starpu_worker_task_list_started(struct _starpu_worker_task_list *l, struct starpu_task *task) +{ + double model = task->predicted; + double transfer_model = task->predicted_transfer; + if(!isnan(transfer_model)) + /* The transfer is over, remove it from pipelined */ + l->pipeline_len -= transfer_model; + + if(!isnan(model)) + { + /* We now start the computation, move it from predicted to pipelined */ + l->exp_len -= model; + l->pipeline_len += model; + l->exp_start = starpu_timing_now() + l->pipeline_len; + l->exp_end= l->exp_start + l->exp_len; + } +} + +/* This is called when a task is actually finished */ +static void _starpu_worker_task_list_finished(struct _starpu_worker_task_list *l, struct starpu_task *task) +{ + if(!isnan(task->predicted)) + /* The execution is over, remove it from pipelined */ + l->pipeline_len -= task->predicted; + if (!l->pipeline_ntasks) + _STARPU_DISP("warning: bogus computation of pipeline_ntasks?\n"); + else + l->pipeline_ntasks--; + l->exp_start = STARPU_MAX(starpu_timing_now() + l->pipeline_len, l->exp_start); + l->exp_end = l->exp_start + l->exp_len; +} + +struct _starpu_worker_component_data +{ + union + { + struct _starpu_worker * worker; + struct + { + unsigned worker_size; + unsigned workerids[STARPU_NMAXWORKERS]; + } parallel_worker; + }; + struct _starpu_worker_task_list * list; +}; + +/* this array store worker components */ +static struct starpu_sched_component * _worker_components[STARPU_NMAX_SCHED_CTXS][STARPU_NMAXWORKERS]; + + +/****************************************************************************** + * Worker Components' Task List and Grid Functions * + *****************************************************************************/ + + + +static struct _starpu_worker_task_list * _starpu_worker_task_list_create(void) +{ + struct _starpu_worker_task_list *l; + _STARPU_MALLOC(l, sizeof(*l)); + memset(l, 0, sizeof(*l)); + l->exp_len = l->pipeline_len = 0.0; + l->exp_start = l->exp_end = starpu_timing_now(); + /* These are only for statistics */ + STARPU_HG_DISABLE_CHECKING(l->exp_end); + STARPU_HG_DISABLE_CHECKING(l->exp_start); + STARPU_HG_DISABLE_CHECKING(l->exp_len); + STARPU_HG_DISABLE_CHECKING(l->pipeline_len); + STARPU_PTHREAD_MUTEX_INIT(&l->mutex,NULL); + return l; +} + +static struct _starpu_task_grid * _starpu_task_grid_create(void) +{ + struct _starpu_task_grid *t; + _STARPU_MALLOC(t, sizeof(*t)); + memset(t, 0, sizeof(*t)); + return t; +} + +static struct _starpu_worker_task_list * _worker_get_list(unsigned sched_ctx_id) +{ + unsigned workerid = starpu_worker_get_id_check(); + STARPU_ASSERT(workerid < starpu_worker_get_count()); + struct _starpu_worker_component_data * d = starpu_sched_component_worker_get(sched_ctx_id, workerid)->data; + return d->list; +} + +static void _starpu_task_grid_destroy(struct _starpu_task_grid * t) +{ + free(t); +} + +static void _starpu_worker_task_list_destroy(struct _starpu_worker_task_list * l) +{ + if(l) + { + /* There can be empty task grids, when we picked the last task after the front task grid */ + struct _starpu_task_grid *t = l->first, *nextt; + + while(t) + { + STARPU_ASSERT(!t->task); + nextt = t->up; + _starpu_task_grid_destroy(t); + t = nextt; + } + STARPU_PTHREAD_MUTEX_DESTROY(&l->mutex); + free(l); + } +} + +static inline void _starpu_worker_task_list_add(struct _starpu_worker_task_list * l, struct starpu_task *task) +{ + double predicted = task->predicted; + double predicted_transfer = task->predicted_transfer; + double end = l->exp_end; + const double now = starpu_timing_now(); + + /* Sometimes workers didn't take the tasks as early as we expected */ + l->exp_start = STARPU_MAX(l->exp_start, now); + + if (now + predicted_transfer < end) + { + /* We may hope that the transfer will be finished by + * the start of the task. */ + predicted_transfer = 0.0; + } + else + { + /* The transfer will not be finished by then, take the + * remainder into account */ + predicted_transfer = (now + predicted_transfer) - end; + } + + if(!isnan(predicted_transfer)) + l->exp_len += predicted_transfer; + + if(!isnan(predicted)) + l->exp_len += predicted; + + l->exp_end = l->exp_start + l->exp_len; + + task->predicted = predicted; + task->predicted_transfer = predicted_transfer; + + l->pipeline_ntasks++; +} + +static inline void _starpu_worker_task_list_push(struct _starpu_worker_task_list * l, struct _starpu_task_grid * t) +{ +/* the task, ntasks, pntasks, left and right members of t are set by the caller */ + STARPU_ASSERT(t->task); + if(l->first == NULL) + l->first = l->last = t; + t->down = l->last; + l->last->up = t; + t->up = NULL; + l->last = t; + l->ntasks++; + + _starpu_worker_task_list_add(l, t->task); +} + +/* recursively set left and right pointers to NULL */ +static inline void _starpu_task_grid_unset_left_right_member(struct _starpu_task_grid * t) +{ + STARPU_ASSERT(t->task == NULL); + struct _starpu_task_grid * t_left = t->left; + struct _starpu_task_grid * t_right = t->right; + t->left = t->right = NULL; + while(t_left) + { + STARPU_ASSERT(t_left->task == NULL); + t = t_left; + t_left = t_left->left; + t->left = NULL; + t->right = NULL; + } + while(t_right) + { + STARPU_ASSERT(t_right->task == NULL); + t = t_right; + t_right = t_right->right; + t->left = NULL; + t->right = NULL; + } +} + +static inline struct starpu_task * _starpu_worker_task_list_pop(struct _starpu_worker_task_list * l) +{ + if(!l->first) + { + l->exp_len = l->pipeline_len = 0.0; + l->exp_start = l->exp_end = starpu_timing_now(); + return NULL; + } + struct _starpu_task_grid * t = l->first; + + /* if there is no task there is no tasks linked to this, then we can free it */ + if(t->task == NULL && t->right == NULL && t->left == NULL) + { + l->first = t->up; + if(l->first) + l->first->down = NULL; + if(l->last == t) + l->last = NULL; + _starpu_task_grid_destroy(t); + return _starpu_worker_task_list_pop(l); + } + + while(t) + { + if(t->task) + { + struct starpu_task * task = t->task; + t->task = NULL; + /* the leftist thing hold the number of tasks, other have a pointer to it */ + int * p = t->left ? t->pntasks : &t->ntasks; + + /* the worker who pop the last task allow the rope to be freed */ + if(STARPU_ATOMIC_ADD(p, -1) == 0) + _starpu_task_grid_unset_left_right_member(t); + + l->ntasks--; + + return task; + } + t = t->up; + } + + return NULL; +} + + + +/****************************************************************************** + * Worker Components' Public Helper Functions (Part 1) * + *****************************************************************************/ + + + +struct _starpu_worker * _starpu_sched_component_worker_get_worker(struct starpu_sched_component * worker_component) +{ + STARPU_ASSERT(starpu_sched_component_is_simple_worker(worker_component)); + struct _starpu_worker_component_data * data = worker_component->data; + return data->worker; +} + +/****************************************************************************** + * Worker Components' Private Helper Functions * + *****************************************************************************/ + + + +#ifndef STARPU_NO_ASSERT +static int _worker_consistant(struct starpu_sched_component * component) +{ + int is_a_worker = 0; + int i; + for(i = 0; itree->sched_ctx_id][i] == component) + is_a_worker = 1; + if(!is_a_worker) + return 0; + struct _starpu_worker_component_data * data = component->data; + if(data->worker) + { + int id = data->worker->workerid; + return (_worker_components[component->tree->sched_ctx_id][id] == component) + && component->nchildren == 0; + } + return 1; +} +#endif + + + +/****************************************************************************** + * Simple Worker Components' Interface Functions * + *****************************************************************************/ + + + +static int simple_worker_can_pull(struct starpu_sched_component * worker_component) +{ + struct _starpu_worker * worker = _starpu_sched_component_worker_get_worker(worker_component); + int workerid = worker->workerid; + return starpu_wake_worker_relax_light(workerid); +} + +static int simple_worker_push_task(struct starpu_sched_component * component, struct starpu_task *task) +{ + STARPU_ASSERT(starpu_sched_component_is_worker(component)); + /*this function take the worker's mutex */ + struct _starpu_worker_component_data * data = component->data; + struct _starpu_task_grid * t = _starpu_task_grid_create(); + t->task = task; + t->ntasks = 1; + + task->workerid = starpu_bitmap_first(&component->workers); +#if 1 /* dead lock problem? */ + if (starpu_get_prefetch_flag() && !task->prefetched) + starpu_prefetch_task_input_for(task, task->workerid); +#endif + struct _starpu_worker_task_list * list = data->list; + STARPU_COMPONENT_MUTEX_LOCK(&list->mutex); + _starpu_worker_task_list_push(list, t); + STARPU_COMPONENT_MUTEX_UNLOCK(&list->mutex); + simple_worker_can_pull(component); + return 0; +} + +static struct starpu_task * simple_worker_pull_task(struct starpu_sched_component *component, struct starpu_sched_component * to) +{ + unsigned workerid = starpu_worker_get_id_check(); + struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); + struct _starpu_worker_component_data * data = component->data; + struct _starpu_worker_task_list * list = data->list; + struct starpu_task * task; + unsigned i; + int n_tries = 0; + do + { + const double now = starpu_timing_now(); + /* do not reset state_keep_awake here has it may hide tasks in worker->local_tasks */ + n_tries++; + STARPU_COMPONENT_MUTEX_LOCK(&list->mutex); + /* Take the opportunity to update start time */ + data->list->exp_start = STARPU_MAX(now, data->list->exp_start); + data->list->exp_end = data->list->exp_start + data->list->exp_len; + task = _starpu_worker_task_list_pop(list); + if(task) + { + _starpu_worker_task_list_transfer_started(list, task); + starpu_push_task_end(task); + STARPU_COMPONENT_MUTEX_UNLOCK(&list->mutex); + goto ret; + } + STARPU_COMPONENT_MUTEX_UNLOCK(&list->mutex); + for(i=0; i < component->nparents; i++) + { + if(component->parents[i] == NULL) + continue; + else + { + task = starpu_sched_component_pull_task(component->parents[i],component); + if(task) + break; + } + } + } + while((!task) && worker->state_keep_awake && n_tries < 2); + if(!task) + goto ret; + if(task->cl->type == STARPU_SPMD) + { + if(!starpu_worker_is_combined_worker(workerid)) + { + STARPU_COMPONENT_MUTEX_LOCK(&list->mutex); + _starpu_worker_task_list_add(list, task); + _starpu_worker_task_list_transfer_started(list, task); + starpu_push_task_end(task); + STARPU_COMPONENT_MUTEX_UNLOCK(&list->mutex); + goto ret; + } + struct starpu_sched_component * combined_worker_component = starpu_sched_component_worker_get(component->tree->sched_ctx_id, workerid); + starpu_sched_component_push_task(component, combined_worker_component, task); + /* we have pushed a task in queue, so can make a recursive call */ + task = simple_worker_pull_task(component, to); + goto ret; + + } + if(task) + { + STARPU_COMPONENT_MUTEX_LOCK(&list->mutex); + _starpu_worker_task_list_add(list, task); + _starpu_worker_task_list_transfer_started(list, task); + starpu_push_task_end(task); + STARPU_COMPONENT_MUTEX_UNLOCK(&list->mutex); + } +ret: + return task; +} + +static double simple_worker_estimated_end(struct starpu_sched_component * component) +{ + struct _starpu_worker_component_data * data = component->data; + double now = starpu_timing_now(); + if (now > data->list->exp_start) + { + data->list->exp_start = now; + data->list->exp_end = data->list->exp_start + data->list->exp_len; + } + return data->list->exp_end; +} + +static double simple_worker_estimated_load(struct starpu_sched_component * component) +{ + struct _starpu_worker * worker = _starpu_sched_component_worker_get_worker(component); + int nb_task = 0; + STARPU_COMPONENT_MUTEX_LOCK(&worker->mutex); + struct starpu_task_prio_list *list = &worker->local_tasks; + struct starpu_task * task; + for(task = starpu_task_prio_list_begin(list); + task != starpu_task_prio_list_end(list); + task = starpu_task_prio_list_next(list, task)) + nb_task++; + STARPU_COMPONENT_MUTEX_UNLOCK(&worker->mutex); + struct _starpu_worker_component_data * d = component->data; + struct _starpu_worker_task_list * l = d->list; + int ntasks_in_fifo = l ? l->ntasks + l->pipeline_ntasks : 0; + return (double) (nb_task + ntasks_in_fifo) + / starpu_worker_get_relative_speedup( + starpu_worker_get_perf_archtype(starpu_bitmap_first(&component->workers), component->tree->sched_ctx_id)); +} + +static void _worker_component_deinit_data(struct starpu_sched_component * component) +{ + struct _starpu_worker_component_data * d = component->data; + _starpu_worker_task_list_destroy(d->list); + int i, j; + for(j = 0; j < STARPU_NMAX_SCHED_CTXS; j++) + for(i = 0; i < STARPU_NMAXWORKERS; i++) + if(_worker_components[j][i] == component) + { + _worker_components[j][i] = NULL; + break; + } + free(d); +} + +static struct starpu_sched_component * starpu_sched_component_worker_create(struct starpu_sched_tree *tree, int workerid) +{ + STARPU_ASSERT(workerid >= 0 && workerid < (int) starpu_worker_get_count()); + + if(_worker_components[tree->sched_ctx_id][workerid]) + return _worker_components[tree->sched_ctx_id][workerid]; + + struct _starpu_worker * worker = _starpu_get_worker_struct(workerid); + if(worker == NULL) + return NULL; + char name[32]; + snprintf(name, sizeof(name), "worker %d", workerid); + struct starpu_sched_component * component = starpu_sched_component_create(tree, name); + struct _starpu_worker_component_data *data; + _STARPU_MALLOC(data, sizeof(*data)); + memset(data, 0, sizeof(*data)); + + data->worker = worker; + data->list = _starpu_worker_task_list_create(); + component->data = data; + + /* FIXME: missing push_task_notify */ + component->push_task = simple_worker_push_task; + component->pull_task = simple_worker_pull_task; + component->can_pull = simple_worker_can_pull; + component->estimated_end = simple_worker_estimated_end; + component->estimated_load = simple_worker_estimated_load; + component->deinit_data = _worker_component_deinit_data; + starpu_bitmap_set(&component->workers, workerid); + starpu_bitmap_or(&component->workers_in_ctx, &component->workers); + _worker_components[tree->sched_ctx_id][workerid] = component; + + /* +#ifdef STARPU_HAVE_HWLOC + struct _starpu_machine_config *config = _starpu_get_machine_config(); + struct _starpu_machine_topology *topology = &config->topology; + hwloc_obj_t obj = hwloc_get_obj_by_depth(topology->hwtopology, config->pu_depth, worker->bindid); + STARPU_ASSERT(obj); + component->obj = obj; +#endif + */ + + return component; +} + + + +/****************************************************************************** + * Combined Worker Components' Interface Functions * + *****************************************************************************/ + + + +static int combined_worker_can_pull(struct starpu_sched_component * component) +{ + (void) component; + STARPU_ASSERT(starpu_sched_component_is_combined_worker(component)); + struct _starpu_worker_component_data * data = component->data; + int workerid = starpu_worker_get_id(); + unsigned i; + for(i = 0; i < data->parallel_worker.worker_size; i++) + { + int target = data->parallel_worker.workerids[i]; + if(target == workerid) + continue; + if (starpu_wake_worker_relax_light(target)) + return 1; + } + return 0; +} + +static int combined_worker_push_task(struct starpu_sched_component * component, struct starpu_task *task) +{ + STARPU_ASSERT(starpu_sched_component_is_combined_worker(component)); + struct _starpu_worker_component_data * data = component->data; + STARPU_ASSERT(data->parallel_worker.worker_size >= 1); + struct _starpu_task_grid * task_alias[data->parallel_worker.worker_size]; + starpu_parallel_task_barrier_init(task, starpu_bitmap_first(&component->workers)); + task_alias[0] = _starpu_task_grid_create(); + task_alias[0]->task = starpu_task_dup(task); + task_alias[0]->task->workerid = data->parallel_worker.workerids[0]; + task_alias[0]->task->destroy = 1; + task_alias[0]->left = NULL; + task_alias[0]->ntasks = data->parallel_worker.worker_size; + _STARPU_TRACE_JOB_PUSH(task_alias[0]->task, task_alias[0]->task->priority > 0); + unsigned i; + for(i = 1; i < data->parallel_worker.worker_size; i++) + { + task_alias[i] = _starpu_task_grid_create(); + task_alias[i]->task = starpu_task_dup(task); + task_alias[i]->task->destroy = 1; + task_alias[i]->task->workerid = data->parallel_worker.workerids[i]; + task_alias[i]->left = task_alias[i-1]; + task_alias[i - 1]->right = task_alias[i]; + task_alias[i]->pntasks = &(task_alias[0]->ntasks); + _STARPU_TRACE_JOB_PUSH(task_alias[i]->task, task_alias[i]->task->priority > 0); + } + + starpu_pthread_mutex_t * mutex_to_unlock = NULL; + i = 0; + do + { + struct starpu_sched_component * worker_component = starpu_sched_component_worker_get(component->tree->sched_ctx_id, data->parallel_worker.workerids[i]); + struct _starpu_worker_component_data * worker_data = worker_component->data; + struct _starpu_worker_task_list * list = worker_data->list; + STARPU_COMPONENT_MUTEX_LOCK(&list->mutex); + if(mutex_to_unlock) + STARPU_COMPONENT_MUTEX_UNLOCK(mutex_to_unlock); + mutex_to_unlock = &list->mutex; + + _starpu_worker_task_list_push(list, task_alias[i]); + i++; + } + while(i < data->parallel_worker.worker_size); + + STARPU_COMPONENT_MUTEX_UNLOCK(mutex_to_unlock); + + int workerid = starpu_worker_get_id(); + if(-1 == workerid) + { + combined_worker_can_pull(component); + } + else + { + /* wake up all other workers of combined worker */ + for(i = 0; i < data->parallel_worker.worker_size; i++) + { + struct starpu_sched_component * worker_component = starpu_sched_component_worker_get(component->tree->sched_ctx_id, data->parallel_worker.workerids[i]); + simple_worker_can_pull(worker_component); + } + + combined_worker_can_pull(component); + } + + return 0; +} + +static struct starpu_task *combined_worker_pull_task(struct starpu_sched_component * from STARPU_ATTRIBUTE_UNUSED, struct starpu_sched_component * to STARPU_ATTRIBUTE_UNUSED) +{ + return NULL; +} + +static double combined_worker_estimated_end(struct starpu_sched_component * component) +{ + STARPU_ASSERT(starpu_sched_component_is_combined_worker(component)); + struct _starpu_worker_component_data * d = component->data; + double max = 0.0; + unsigned i; + for(i = 0; i < d->parallel_worker.worker_size; i++) + { + struct _starpu_worker_component_data * data; + data = _worker_components[component->tree->sched_ctx_id][d->parallel_worker.workerids[i]]->data; + double tmp = data->list->exp_end; + max = tmp > max ? tmp : max; + } + return max; +} + +static double combined_worker_estimated_load(struct starpu_sched_component * component) +{ + struct _starpu_worker_component_data * d = component->data; + double load = 0; + unsigned i; + for(i = 0; i < d->parallel_worker.worker_size; i++) + { + struct starpu_sched_component * n = starpu_sched_component_worker_get(component->tree->sched_ctx_id, d->parallel_worker.workerids[i]); + load += n->estimated_load(n); + } + return load; +} + +struct starpu_sched_component *starpu_sched_component_parallel_worker_create(struct starpu_sched_tree *tree, unsigned nworkers, unsigned *workers) +{ + struct starpu_sched_component * component = starpu_sched_component_create(tree, "combined_worker"); + + struct _starpu_worker_component_data *data; + _STARPU_MALLOC(data, sizeof(*data)); + memset(data, 0, sizeof(*data)); + STARPU_ASSERT(nworkers <= STARPU_NMAXWORKERS); + STARPU_ASSERT(nworkers <= starpu_worker_get_count()); + data->parallel_worker.worker_size = nworkers; + memcpy(data->parallel_worker.workerids, workers, nworkers * sizeof(unsigned)); + + component->data = data; + component->push_task = combined_worker_push_task; + component->pull_task = combined_worker_pull_task; + component->estimated_end = combined_worker_estimated_end; + component->estimated_load = combined_worker_estimated_load; + component->can_pull = combined_worker_can_pull; + component->deinit_data = _worker_component_deinit_data; + + unsigned i; + for (i = 0; i < nworkers; i++) + starpu_sched_component_connect(component, starpu_sched_component_worker_get(tree->sched_ctx_id, workers[i])); + + return component; +} + +static struct starpu_sched_component * starpu_sched_component_combined_worker_create(struct starpu_sched_tree *tree, int workerid) +{ + STARPU_ASSERT(workerid >= 0 && workerid < STARPU_NMAXWORKERS); + + if(_worker_components[tree->sched_ctx_id][workerid]) + return _worker_components[tree->sched_ctx_id][workerid]; + + struct _starpu_combined_worker * combined_worker = _starpu_get_combined_worker_struct(workerid); + if(combined_worker == NULL) + return NULL; + + struct starpu_sched_component *component = starpu_sched_component_parallel_worker_create(tree, combined_worker->worker_size, (unsigned *) combined_worker->combined_workerid); + + starpu_bitmap_set(&component->workers, workerid); + starpu_bitmap_or(&component->workers_in_ctx, &component->workers); + + _worker_components[tree->sched_ctx_id][workerid] = component; + + /* +#ifdef STARPU_HAVE_HWLOC + struct _starpu_worker_component_data * data = component->data; + struct _starpu_machine_config *config = _starpu_get_machine_config(); + struct _starpu_machine_topology *topology = &config->topology; + struct _starpu_worker *worker = _starpu_get_worker_struct(data->parallel_worker.workerids[0]); + hwloc_obj_t obj = hwloc_get_obj_by_depth(topology->hwtopology, config->pu_depth, worker->bindid); + STARPU_ASSERT(obj); + component->obj = obj; +#endif + */ + return component; +} + + +/****************************************************************************** + * Worker Components' Public Helper Functions (Part 2) * + *****************************************************************************/ + + + +void _starpu_sched_component_lock_all_workers(void) +{ + unsigned i; + for(i = 0; i < starpu_worker_get_count(); i++) + starpu_worker_lock(i); +} +void _starpu_sched_component_unlock_all_workers(void) +{ + unsigned i; + for(i = 0; i < starpu_worker_get_count(); i++) + starpu_worker_unlock(i); +} + +void _starpu_sched_component_workers_destroy(void) +{ + int i, j; + for(j = 0; j < STARPU_NMAX_SCHED_CTXS; j++) + for(i = 0; i < STARPU_NMAXWORKERS; i++) + if (_worker_components[j][i]) + starpu_sched_component_destroy(_worker_components[j][i]); +} + +int starpu_sched_component_worker_get_workerid(struct starpu_sched_component * worker_component) +{ +#ifndef STARPU_NO_ASSERT + STARPU_ASSERT(_worker_consistant(worker_component)); +#endif + STARPU_ASSERT(1 == starpu_bitmap_cardinal(&worker_component->workers)); + return starpu_bitmap_first(&worker_component->workers); +} + +void starpu_sched_component_worker_pre_exec_hook(struct starpu_task * task, unsigned sched_ctx_id STARPU_ATTRIBUTE_UNUSED) +{ + struct _starpu_worker_task_list * list = _worker_get_list(sched_ctx_id); + const double now = starpu_timing_now(); + STARPU_COMPONENT_MUTEX_LOCK(&list->mutex); + _starpu_worker_task_list_started(list, task); + /* Take the opportunity to update start time */ + list->exp_start = STARPU_MAX(now + list->pipeline_len, list->exp_start); + STARPU_COMPONENT_MUTEX_UNLOCK(&list->mutex); +} + +void starpu_sched_component_worker_post_exec_hook(struct starpu_task * task, unsigned sched_ctx_id STARPU_ATTRIBUTE_UNUSED) +{ + if(task->execute_on_a_specific_worker) + return; + struct _starpu_worker_task_list * list = _worker_get_list(sched_ctx_id); + STARPU_COMPONENT_MUTEX_LOCK(&list->mutex); + _starpu_worker_task_list_finished(list, task); + STARPU_COMPONENT_MUTEX_UNLOCK(&list->mutex); +} + +int starpu_sched_component_is_simple_worker(struct starpu_sched_component * component) +{ + return component->push_task == simple_worker_push_task; +} +int starpu_sched_component_is_combined_worker(struct starpu_sched_component * component) +{ + return component->push_task == combined_worker_push_task; +} + +int starpu_sched_component_is_worker(struct starpu_sched_component * component) +{ + return starpu_sched_component_is_simple_worker(component) + || starpu_sched_component_is_combined_worker(component); +} + +/* As Worker Components' creating functions are protected, this function allows + * the user to get a Worker Component from a worker id */ +struct starpu_sched_component * starpu_sched_component_worker_get(unsigned sched_ctx, int workerid) +{ + STARPU_ASSERT(workerid >= 0 && workerid < STARPU_NMAXWORKERS); + /* we may need to take a mutex here */ + if (!_worker_components[sched_ctx][workerid]) + return starpu_sched_component_worker_new(sched_ctx, workerid); + return _worker_components[sched_ctx][workerid]; +} + +struct starpu_sched_component * starpu_sched_component_worker_new(unsigned sched_ctx, int workerid) +{ + STARPU_ASSERT(workerid >= 0 && workerid < STARPU_NMAXWORKERS); + /* we may need to take a mutex here */ + if (_worker_components[sched_ctx][workerid]) + return _worker_components[sched_ctx][workerid]; + struct starpu_sched_component * component; + if(workerid < (int) starpu_worker_get_count()) + component = starpu_sched_component_worker_create(starpu_sched_tree_get(sched_ctx), workerid); + else + component = starpu_sched_component_combined_worker_create(starpu_sched_tree_get(sched_ctx), workerid); + _worker_components[sched_ctx][workerid] = component; + return component; +} + + + + diff --git a/src/sched_policies/deque_modeling_policy_data_aware.c b/src/sched_policies/deque_modeling_policy_data_aware.c new file mode 100644 index 0000000..e64efd2 --- /dev/null +++ b/src/sched_policies/deque_modeling_policy_data_aware.c @@ -0,0 +1,1159 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2016-2016 Uppsala University + * Copyright (C) 2013-2013 Thibaut Lambert + * Copyright (C) 2013-2013 Simon Archipoff + * Copyright (C) 2013-2013 Joris Pablo + * Copyright (C) 2011,2020 Télécom Sud Paris + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* Distributed queues using performance modeling to assign tasks */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#ifdef BUILDING_STARPU +#include +#endif +#include + +#include +#include /* for fpclassify() checks on knob values */ + + +#ifndef DBL_MIN +#define DBL_MIN __DBL_MIN__ +#endif + +#ifndef DBL_MAX +#define DBL_MAX __DBL_MAX__ +#endif + +//#define NOTIFY_READY_SOON + +struct _starpu_dmda_data +{ + double alpha; + double beta; + double _gamma; + double idle_power; + + struct starpu_st_fifo_taskq queue_array[STARPU_NMAXWORKERS]; + + long int total_task_cnt; + long int ready_task_cnt; + long int eager_task_cnt; /* number of tasks scheduled without model */ + int num_priorities; +}; + +/* performance steering knobs */ + +/* . per-scheduler knobs */ +static int __s_alpha_knob; +static int __s_beta_knob; +static int __s_gamma_knob; +static int __s_idle_power_knob; + +/* . knob variables */ +static double __s_alpha__value = 1.0; +static double __s_beta__value = 1.0; +static double __s_gamma__value = 1.0; +static double __s_idle_power__value = 1.0; + +/* . per-scheduler knob group */ +static struct starpu_perf_knob_group * __kg_starpu_dmda__per_scheduler; + +static void sched_knobs__set(const struct starpu_perf_knob * const knob, void *context, const struct starpu_perf_knob_value * const value) +{ + const char * const sched_policy_name = *(const char **)context; + (void) sched_policy_name; + if (knob->id == __s_alpha_knob) + { + STARPU_ASSERT(fpclassify(value->val_double) == FP_NORMAL); + __s_alpha__value = value->val_double; + } + else if (knob->id == __s_beta_knob) + { + STARPU_ASSERT(fpclassify(value->val_double) == FP_NORMAL); + __s_beta__value = value->val_double; + } + else if (knob->id == __s_gamma_knob) + { + STARPU_ASSERT(fpclassify(value->val_double) == FP_NORMAL); + __s_gamma__value = value->val_double; + } + else if (knob->id == __s_idle_power_knob) + { + STARPU_ASSERT(fpclassify(value->val_double) == FP_NORMAL); + __s_idle_power__value = value->val_double; + } + else + { + STARPU_ASSERT(0); + abort(); + } +} + +static void sched_knobs__get(const struct starpu_perf_knob * const knob, void *context, struct starpu_perf_knob_value * const value) +{ + const char * const sched_policy_name = *(const char **)context; + (void) sched_policy_name; + if (knob->id == __s_alpha_knob) + { + value->val_double = __s_alpha__value; + } + else if (knob->id == __s_beta_knob) + { + value->val_double = __s_beta__value; + } + else if (knob->id == __s_gamma_knob) + { + value->val_double = __s_gamma__value; + } + else if (knob->id == __s_idle_power_knob) + { + value->val_double = __s_idle_power__value; + } + else + { + STARPU_ASSERT(0); + abort(); + } +} + +void _starpu__dmda_c__register_knobs(void) +{ + { + const enum starpu_perf_knob_scope scope = starpu_perf_knob_scope_per_scheduler; + __kg_starpu_dmda__per_scheduler = _starpu_perf_knob_group_register(scope, sched_knobs__set, sched_knobs__get); + + /* TODO: priority capping knobs actually work globally for now, the sched policy name is ignored */ + __STARPU_PERF_KNOB_REG("starpu.dmda", __kg_starpu_dmda__per_scheduler, s_alpha_knob, double, "alpha constant multiplier"); + + __STARPU_PERF_KNOB_REG("starpu.dmda", __kg_starpu_dmda__per_scheduler, s_beta_knob, double, "beta constant multiplier"); + + __STARPU_PERF_KNOB_REG("starpu.dmda", __kg_starpu_dmda__per_scheduler, s_gamma_knob, double, "gamma constant multiplier"); + + __STARPU_PERF_KNOB_REG("starpu.dmda", __kg_starpu_dmda__per_scheduler, s_idle_power_knob, double, "idle_power constant multiplier"); + } +} + +void _starpu__dmda_c__unregister_knobs(void) +{ + _starpu_perf_knob_group_unregister(__kg_starpu_dmda__per_scheduler); + __kg_starpu_dmda__per_scheduler = NULL; +} + +/* The dmda scheduling policy uses + * + * alpha * T_computation + beta * T_communication + gamma * Consumption + * + * Here are the default values of alpha, beta, gamma + */ + +#define _STARPU_SCHED_ALPHA_DEFAULT 1.0 +#define _STARPU_SCHED_BETA_DEFAULT 1.0 +#define _STARPU_SCHED_GAMMA_DEFAULT 1000.0 + +/* This is called when a transfer request is actually pushed to the worker */ +static void _starpu_fifo_task_transfer_started(struct starpu_st_fifo_taskq *fifo, struct starpu_task *task, int num_priorities) +{ + double transfer_model = task->predicted_transfer; + if (isnan(transfer_model)) + return; + + /* We now start the transfer, move it from predicted to pipelined */ + fifo->exp_len -= transfer_model; + fifo->pipeline_len += transfer_model; + fifo->exp_start = starpu_timing_now() + fifo->pipeline_len; + fifo->exp_end = fifo->exp_start + fifo->exp_len; + if(num_priorities != -1) + { + int i; + int task_prio = starpu_st_normalize_prio(task->priority, num_priorities, task->sched_ctx); + for(i = 0; i <= task_prio; i++) + fifo->exp_len_per_priority[i] -= transfer_model; + } +} + +/* This is called when a task is actually pushed to the worker (i.e. the transfer finished */ +static void _starpu_fifo_task_started(struct starpu_st_fifo_taskq *fifo, struct starpu_task *task, int num_priorities) +{ + double model = task->predicted; + double transfer_model = task->predicted_transfer; + if(!isnan(transfer_model)) + /* The transfer is over, remove it from pipelined */ + fifo->pipeline_len -= transfer_model; + + if(!isnan(model)) + { + /* We now start the computation, move it from predicted to pipelined */ + fifo->exp_len -= model; + fifo->pipeline_len += model; + fifo->exp_start = starpu_timing_now() + fifo->pipeline_len; + fifo->exp_end= fifo->exp_start + fifo->exp_len; + if(num_priorities != -1) + { + int i; + int task_prio = starpu_st_normalize_prio(task->priority, num_priorities, task->sched_ctx); + for(i = 0; i <= task_prio; i++) + fifo->exp_len_per_priority[i] -= model; + } + } +} + +/* This is called when a task is actually finished */ +static void _starpu_fifo_task_finished(struct starpu_st_fifo_taskq *fifo, struct starpu_task *task, int num_priorities STARPU_ATTRIBUTE_UNUSED) +{ + if(!isnan(task->predicted)) + /* The execution is over, remove it from pipelined */ + fifo->pipeline_len -= task->predicted; + if (!fifo->pipeline_ntasks) + _STARPU_DISP("warning: bogus computation of pipeline_ntasks?\n"); + else + fifo->pipeline_ntasks--; + fifo->exp_start = STARPU_MAX(starpu_timing_now() + fifo->pipeline_len, fifo->exp_start); + fifo->exp_end = fifo->exp_start + fifo->exp_len; +} + + +static struct starpu_task *_dmda_pop_task(unsigned sched_ctx_id, int ready) +{ + struct _starpu_dmda_data *dt = (struct _starpu_dmda_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); + + struct starpu_task *task; + + unsigned workerid = starpu_worker_get_id_check(); + struct starpu_st_fifo_taskq *fifo = &dt->queue_array[workerid]; + + /* Take the opportunity to update start time */ + fifo->exp_start = STARPU_MAX(starpu_timing_now(), fifo->exp_start); + fifo->exp_end = fifo->exp_start + fifo->exp_len; + + STARPU_ASSERT_MSG(fifo, "worker %u does not belong to ctx %u anymore.\n", workerid, sched_ctx_id); + + if (ready) + task = starpu_st_fifo_taskq_pop_first_ready_task(fifo, workerid, dt->num_priorities); + else + task = starpu_st_fifo_taskq_pop_local_task(fifo); + if (task) + { + _starpu_fifo_task_transfer_started(fifo, task, dt->num_priorities); + + starpu_sched_ctx_list_task_counters_decrement(sched_ctx_id, workerid); + +#ifdef STARPU_VERBOSE + if (task->cl) + { + int non_ready = starpu_st_non_ready_buffers_count(task, workerid); + if (non_ready == 0) + dt->ready_task_cnt++; + } + + dt->total_task_cnt++; +#endif + } + + return task; +} + +static struct starpu_task *dmda_pop_ready_task(unsigned sched_ctx_id) +{ + return _dmda_pop_task(sched_ctx_id, 1); +} + +static struct starpu_task *dmda_pop_task(unsigned sched_ctx_id) +{ + return _dmda_pop_task(sched_ctx_id, 0); +} + +static int push_task_on_best_worker(struct starpu_task *task, int best_workerid, + double predicted, double predicted_transfer, + int prio, unsigned sched_ctx_id) +{ + struct _starpu_dmda_data *dt = (struct _starpu_dmda_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); + /* make sure someone could execute that task ! */ + STARPU_ASSERT(best_workerid != -1); + + if (_starpu_get_nsched_ctxs() > 1) + { + starpu_worker_relax_on(); + _starpu_sched_ctx_lock_write(sched_ctx_id); + starpu_worker_relax_off(); + if (_starpu_sched_ctx_worker_is_master_for_child_ctx(sched_ctx_id, best_workerid, task)) + task = NULL; + _starpu_sched_ctx_unlock_write(sched_ctx_id); + + if (!task) + return 0; + } + + struct starpu_st_fifo_taskq *fifo = &dt->queue_array[best_workerid]; + + double now = starpu_timing_now(); + +#ifdef STARPU_USE_SC_HYPERVISOR + starpu_sched_ctx_call_pushed_task_cb(best_workerid, sched_ctx_id); +#endif //STARPU_USE_SC_HYPERVISOR + + starpu_worker_lock(best_workerid); + + fifo->pipeline_ntasks++; + + /* Sometimes workers didn't take the tasks as early as we expected */ + fifo->exp_start = isnan(fifo->exp_start) ? now + fifo->pipeline_len : STARPU_MAX(fifo->exp_start, now); + fifo->exp_end = fifo->exp_start + fifo->exp_len; + + /* FIXME: We don't have overlap when running CPU-CPU transfers */ + if ((now + predicted_transfer) < fifo->exp_end) + { + /* We may hope that the transfer will be finished by + * the start of the task. */ + predicted_transfer = 0.0; + } + else + { + /* The transfer will not be finished by then, take the + * remainder into account */ + predicted_transfer = (now + predicted_transfer) - fifo->exp_end; + } + + if(!isnan(predicted_transfer)) + { + fifo->exp_len += predicted_transfer; + if(dt->num_priorities != -1) + { + int i; + int task_prio = starpu_st_normalize_prio(task->priority, dt->num_priorities, task->sched_ctx); + for(i = 0; i <= task_prio; i++) + fifo->exp_len_per_priority[i] += predicted_transfer; + } + + } + + if(!isnan(predicted)) + { + fifo->exp_len += predicted; + if(dt->num_priorities != -1) + { + int i; + int task_prio = starpu_st_normalize_prio(task->priority, dt->num_priorities, task->sched_ctx); + for(i = 0; i <= task_prio; i++) + fifo->exp_len_per_priority[i] += predicted; + } + + } + fifo->exp_end = fifo->exp_start + fifo->exp_len; + + starpu_worker_unlock(best_workerid); + + task->predicted = predicted; + task->predicted_transfer = predicted_transfer; + + if (starpu_get_prefetch_flag()) + starpu_prefetch_task_input_for(task, best_workerid); + + STARPU_AYU_ADDTOTASKQUEUE(starpu_task_get_job_id(task), best_workerid); + + if (_starpu_get_nsched_ctxs() > 1) + { + unsigned stream_ctx_id = starpu_worker_get_sched_ctx_id_stream(best_workerid); + if(stream_ctx_id != STARPU_NMAX_SCHED_CTXS) + { + starpu_worker_relax_on(); + _starpu_sched_ctx_lock_write(sched_ctx_id); + starpu_worker_relax_off(); + starpu_sched_ctx_move_task_to_ctx_locked(task, stream_ctx_id, 0); + starpu_sched_ctx_revert_task_counters_ctx_locked(sched_ctx_id, task->flops); + _starpu_sched_ctx_unlock_write(sched_ctx_id); + } + } + + int ret = 0; + if (prio) + { + starpu_worker_lock(best_workerid); + ret =starpu_st_fifo_taskq_push_sorted_task(&dt->queue_array[best_workerid], task); + if(dt->num_priorities != -1) + { + int i; + int task_prio = starpu_st_normalize_prio(task->priority, dt->num_priorities, task->sched_ctx); + for(i = 0; i <= task_prio; i++) + dt->queue_array[best_workerid].ntasks_per_priority[i]++; + } + + +#if !defined(STARPU_NON_BLOCKING_DRIVERS) || defined(STARPU_SIMGRID) + starpu_wake_worker_locked(best_workerid); +#endif + starpu_push_task_end(task); + starpu_worker_unlock(best_workerid); + } + else + { + starpu_worker_lock(best_workerid); + starpu_task_list_push_back (&dt->queue_array[best_workerid].taskq, task); + dt->queue_array[best_workerid].ntasks++; + dt->queue_array[best_workerid].nprocessed++; +#if !defined(STARPU_NON_BLOCKING_DRIVERS) || defined(STARPU_SIMGRID) + starpu_wake_worker_locked(best_workerid); +#endif + starpu_push_task_end(task); + starpu_worker_unlock(best_workerid); + } + + starpu_sched_ctx_list_task_counters_increment(sched_ctx_id, best_workerid); + + return ret; +} + +/* TODO: factorise CPU computations, expensive with a lot of cores */ +static void compute_all_performance_predictions(struct starpu_task *task, + unsigned nworkers, + double local_task_length[nworkers][STARPU_MAXIMPLEMENTATIONS], + double exp_end[nworkers][STARPU_MAXIMPLEMENTATIONS], + double *max_exp_endp_of_workers, + double *min_exp_endp_of_task, + double local_data_penalty[nworkers][STARPU_MAXIMPLEMENTATIONS], + double local_energy[nworkers][STARPU_MAXIMPLEMENTATIONS], + int *forced_worker, int *forced_impl, unsigned sched_ctx_id, unsigned sorted_decision) +{ + int calibrating = 0; + double max_exp_end_of_workers = DBL_MIN; + double best_exp_end_of_task = DBL_MAX; + int ntasks_best = -1; + int nimpl_best = 0; + double ntasks_best_end = 0.0; + + /* A priori, we know all estimations */ + int unknown = 0; + unsigned worker_ctx = 0; + + int task_prio = 0; + + starpu_task_bundle_t bundle = task->bundle; + struct _starpu_dmda_data *dt = (struct _starpu_dmda_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); + + if(sorted_decision && dt->num_priorities != -1) + task_prio = starpu_st_normalize_prio(task->priority, dt->num_priorities, sched_ctx_id); + + struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id); + double now = starpu_timing_now(); + + struct starpu_sched_ctx_iterator it; + workers->init_iterator_for_parallel_tasks(workers, &it, task); + while(worker_ctxhas_next(workers, &it)) + { + unsigned nimpl; + unsigned impl_mask; + unsigned workerid = workers->get_next(workers, &it); + struct starpu_st_fifo_taskq *fifo = &dt->queue_array[workerid]; + struct starpu_perfmodel_arch* perf_arch = starpu_worker_get_perf_archtype(workerid, sched_ctx_id); + unsigned memory_node = starpu_worker_get_memory_node(workerid); + + STARPU_ASSERT_MSG(fifo != NULL, "workerid %u ctx %u\n", workerid, sched_ctx_id); + + /* Sometimes workers didn't take the tasks as early as we expected */ + double exp_start = isnan(fifo->exp_start) ? now + fifo->pipeline_len : STARPU_MAX(fifo->exp_start, now); + + if (!starpu_worker_can_execute_task_impl(workerid, task, &impl_mask)) + continue; + + for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++) + { + if (!(impl_mask & (1U << nimpl))) + { + /* no one on that queue may execute this task */ + continue; + } + + int fifo_ntasks = fifo->ntasks + fifo->pipeline_ntasks; + double prev_exp_len = fifo->exp_len; + /* consider the priority of the task when deciding on which workerid to schedule, + compute the expected_end of the task if it is inserted before other tasks already scheduled */ + if(sorted_decision) + { + if(dt->num_priorities != -1) + { + prev_exp_len = fifo->exp_len_per_priority[task_prio]; + fifo_ntasks = fifo->ntasks_per_priority[task_prio]; + } + else + { + starpu_worker_lock(workerid); + prev_exp_len = starpu_st_fifo_taskq_get_exp_len_prev_task_list(fifo, task, workerid, nimpl, &fifo_ntasks); + starpu_worker_unlock(workerid); + } + } + + exp_end[worker_ctx][nimpl] = exp_start + prev_exp_len; + if (exp_end[worker_ctx][nimpl] > max_exp_end_of_workers) + max_exp_end_of_workers = exp_end[worker_ctx][nimpl]; + + //_STARPU_DEBUG("Scheduler dmda: task length (%lf) workerid (%u) kernel (%u) \n", local_task_length[workerid][nimpl],workerid,nimpl); + + if (bundle) + { + /* TODO : conversion time */ + local_task_length[worker_ctx][nimpl] = starpu_task_bundle_expected_length(bundle, perf_arch, nimpl); + if (local_data_penalty) + local_data_penalty[worker_ctx][nimpl] = starpu_task_bundle_expected_data_transfer_time(bundle, memory_node); + if (local_energy) + local_energy[worker_ctx][nimpl] = starpu_task_bundle_expected_energy(bundle, perf_arch,nimpl); + + } + else + { + local_task_length[worker_ctx][nimpl] = starpu_task_worker_expected_length(task, workerid, sched_ctx_id, nimpl); + if (local_data_penalty) + local_data_penalty[worker_ctx][nimpl] = starpu_task_expected_data_transfer_time_for(task, workerid); + if (local_energy) + local_energy[worker_ctx][nimpl] = starpu_task_worker_expected_energy(task, workerid, sched_ctx_id,nimpl); + double conversion_time = starpu_task_expected_conversion_time(task, perf_arch, nimpl); + if (conversion_time > 0.0) + local_task_length[worker_ctx][nimpl] += conversion_time; + } + double ntasks_end = fifo_ntasks / starpu_worker_get_relative_speedup(perf_arch); + + /* + * This implements a default greedy scheduler for the + * case of tasks which have no performance model, or + * whose performance model is not calibrated yet. + * + * It simply uses the number of tasks already pushed to + * the workers, divided by the relative performance of + * a CPU and of a GPU. + * + * This is always computed, but the ntasks_best + * selection is only really used if the task indeed has + * no performance model, or is not calibrated yet. + */ + if (ntasks_best == -1 + + /* Always compute the greedy decision, at least for + * the tasks with no performance model. */ + || (!calibrating && ntasks_end < ntasks_best_end) + + /* The performance model of this task is not + * calibrated on this workerid, try to run it there + * to calibrate it there. */ + || (!calibrating && isnan(local_task_length[worker_ctx][nimpl])) + + /* the performance model of this task is not + * calibrated on this workerid either, rather run it + * there if this one is low on scheduled tasks. */ + || (calibrating && isnan(local_task_length[worker_ctx][nimpl]) && ntasks_end < ntasks_best_end) + ) + { + ntasks_best_end = ntasks_end; + ntasks_best = workerid; + nimpl_best = nimpl; + } + + if (isnan(local_task_length[worker_ctx][nimpl])) + /* we are calibrating, we want to speed-up calibration time + * so we privilege non-calibrated tasks (but still + * greedily distribute them to avoid dumb schedules) */ + calibrating = 1; + + if (isnan(local_task_length[worker_ctx][nimpl]) + || _STARPU_IS_ZERO(local_task_length[worker_ctx][nimpl])) + /* there is no prediction available for that task + * with that arch (yet or at all), so switch to a greedy strategy */ + unknown = 1; + + if (unknown) + continue; + + double task_starting_time = exp_start + prev_exp_len; + if (local_data_penalty) + task_starting_time = STARPU_MAX(task_starting_time, + now + local_data_penalty[worker_ctx][nimpl]); + + exp_end[worker_ctx][nimpl] = task_starting_time + local_task_length[worker_ctx][nimpl]; + + if (exp_end[worker_ctx][nimpl] < best_exp_end_of_task) + { + /* a better solution was found */ + best_exp_end_of_task = exp_end[worker_ctx][nimpl]; + nimpl_best = nimpl; + } + + if (local_energy) + if (isnan(local_energy[worker_ctx][nimpl])) + local_energy[worker_ctx][nimpl] = 0.; + + } + worker_ctx++; + } + + *forced_worker = unknown?ntasks_best:-1; + *forced_impl = unknown?nimpl_best:-1; + +#ifdef STARPU_VERBOSE + if (unknown) + { + dt->eager_task_cnt++; + } +#endif + + *min_exp_endp_of_task = best_exp_end_of_task; + *max_exp_endp_of_workers = max_exp_end_of_workers; +} + +static double _dmda_push_task(struct starpu_task *task, unsigned prio, unsigned sched_ctx_id, unsigned da, unsigned simulate, unsigned sorted_decision) +{ + /* find the queue */ + int best = -1, best_in_ctx = -1; + int selected_impl = 0; + double model_best = 0.0; + double transfer_model_best = 0.0; + + /* this flag is set if the corresponding worker is selected because + there is no performance prediction available yet */ + int forced_best = -1; + int forced_impl = -1; + + struct _starpu_dmda_data *dt = (struct _starpu_dmda_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); + struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id); + unsigned nworkers_ctx = workers->nworkers; + double local_task_length[nworkers_ctx][STARPU_MAXIMPLEMENTATIONS]; + double local_data_penalty[nworkers_ctx][STARPU_MAXIMPLEMENTATIONS]; + double local_energy[nworkers_ctx][STARPU_MAXIMPLEMENTATIONS]; + + /* Expected end of this task on the workers */ + double exp_end[nworkers_ctx][STARPU_MAXIMPLEMENTATIONS]; + + /* This is the minimum among the exp_end[] matrix */ + double min_exp_end_of_task; + + /* This is the maximum termination time of already-scheduled tasks over all workers */ + double max_exp_end_of_workers = 0.0; + + double fitness[nworkers_ctx][STARPU_MAXIMPLEMENTATIONS]; + + + compute_all_performance_predictions(task, + nworkers_ctx, + local_task_length, + exp_end, + &max_exp_end_of_workers, + &min_exp_end_of_task, + da ? local_data_penalty : NULL, + da ? local_energy : NULL, + &forced_best, + &forced_impl, sched_ctx_id, sorted_decision); + + + if (forced_best == -1) + { + double best_fitness = -1; + unsigned worker_ctx = 0; + struct starpu_sched_ctx_iterator it; + workers->init_iterator_for_parallel_tasks(workers, &it, task); + while(worker_ctx < nworkers_ctx && workers->has_next(workers, &it)) + { + unsigned worker = workers->get_next(workers, &it); + unsigned nimpl; + unsigned impl_mask; + + if (!starpu_worker_can_execute_task_impl(worker, task, &impl_mask)) + continue; + + for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++) + { + if (!(impl_mask & (1U << nimpl))) + { + /* no one on that queue may execute this task */ + continue; + } + if (da) + fitness[worker_ctx][nimpl] = dt->alpha * __s_alpha__value *(exp_end[worker_ctx][nimpl] - min_exp_end_of_task) + + dt->beta * __s_beta__value *(local_data_penalty[worker_ctx][nimpl]) + + dt->_gamma * __s_gamma__value *(local_energy[worker_ctx][nimpl]); + else + fitness[worker_ctx][nimpl] = exp_end[worker_ctx][nimpl] - min_exp_end_of_task; + + if (da && exp_end[worker_ctx][nimpl] > max_exp_end_of_workers) + { + /* This placement will make the computation + * longer, take into account the idle + * consumption of other cpus */ + fitness[worker_ctx][nimpl] += dt->_gamma * __s_gamma__value * dt->idle_power * __s_idle_power__value * (exp_end[worker_ctx][nimpl] - max_exp_end_of_workers) / 1000000.0; /* Since gamma is the cost in us of one Joules, + then d->idle_power * (exp_end - max_exp_end_of_workers) + must be in Joules, thus the / 1000000.0 */ + } + + if (best == -1 || fitness[worker_ctx][nimpl] < best_fitness) + { + /* we found a better solution */ + best_fitness = fitness[worker_ctx][nimpl]; + best = worker; + best_in_ctx = worker_ctx; + selected_impl = nimpl; + + //_STARPU_DEBUG("best fitness (worker %d) %e = alpha*(%e) + beta(%e) +gamma(%e)\n", worker, best_fitness, exp_end[worker][nimpl] - min_exp_end_of_task, local_data_penalty[worker][nimpl], local_energy[worker][nimpl]); + + } + } + worker_ctx++; + } + } + STARPU_ASSERT(forced_best != -1 || best != -1); + + if (forced_best != -1) + { + /* there is no prediction available for that task + * with that arch we want to speed-up calibration time + * so we force this measurement */ + best = forced_best; + selected_impl = forced_impl; + model_best = 0.0; + transfer_model_best = 0.0; + } + else if (task->bundle) + { + struct starpu_perfmodel_arch* perf_arch = starpu_worker_get_perf_archtype(best_in_ctx, sched_ctx_id); + unsigned memory_node = starpu_worker_get_memory_node(best); + model_best = starpu_task_expected_length(task, perf_arch, selected_impl); + if (da) + transfer_model_best = starpu_task_expected_data_transfer_time(memory_node, task); + } + else + { + model_best = local_task_length[best_in_ctx][selected_impl]; + if (da) + transfer_model_best = local_data_penalty[best_in_ctx][selected_impl]; + } + + //_STARPU_DEBUG("Scheduler dmda: kernel (%u)\n", selected_impl); + starpu_task_set_implementation(task, selected_impl); + + starpu_sched_task_break(task); + if(!simulate) + { + /* we should now have the best worker in variable "best" */ + return push_task_on_best_worker(task, best, model_best, transfer_model_best, prio, sched_ctx_id); + } + else + { + return exp_end[best_in_ctx][selected_impl] ; + } +} + +static int dmda_push_sorted_decision_task(struct starpu_task *task) +{ + return _dmda_push_task(task, 1, task->sched_ctx, 1, 0, 1); +} + +static int dmda_push_sorted_task(struct starpu_task *task) +{ +#ifdef STARPU_DEVEL +#warning TODO: after defining a scheduling window, use that instead of empty_ctx_tasks +#endif + return _dmda_push_task(task, 1, task->sched_ctx, 1, 0, 0); +} + +static int dm_push_task(struct starpu_task *task) +{ + return _dmda_push_task(task, 0, task->sched_ctx, 0, 0, 0); +} + +static double dm_simulate_push_task(struct starpu_task *task) +{ + return _dmda_push_task(task, 0, task->sched_ctx, 0, 1, 0); +} + +static int dmda_push_task(struct starpu_task *task) +{ + STARPU_ASSERT(task); + return _dmda_push_task(task, 0, task->sched_ctx, 1, 0, 0); +} +static double dmda_simulate_push_task(struct starpu_task *task) +{ + STARPU_ASSERT(task); + return _dmda_push_task(task, 0, task->sched_ctx, 1, 1, 0); +} + +static double dmda_simulate_push_sorted_task(struct starpu_task *task) +{ + STARPU_ASSERT(task); + return _dmda_push_task(task, 1, task->sched_ctx, 1, 1, 0); +} + +static double dmda_simulate_push_sorted_decision_task(struct starpu_task *task) +{ + STARPU_ASSERT(task); + return _dmda_push_task(task, 1, task->sched_ctx, 1, 1, 1); +} + +#ifdef NOTIFY_READY_SOON +static void dmda_notify_ready_soon(void *data STARPU_ATTRIBUTE_UNUSED, struct starpu_task *task, double delay) +{ + if (!task->cl) + return; + /* fprintf(stderr, "task %lu %p %p %s %s will be ready within %f\n", starpu_task_get_job_id(task), task, task->cl, task->cl->name, task->cl->model?task->cl->model->symbol : NULL, delay); */ + /* TODO: do something with it */ +} +#endif + +static void dmda_add_workers(unsigned sched_ctx_id, int *workerids, unsigned nworkers) +{ + struct _starpu_dmda_data *dt = (struct _starpu_dmda_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); + + unsigned i; + for (i = 0; i < nworkers; i++) + { + struct starpu_st_fifo_taskq *q; + int workerid = workerids[i]; + /* if the worker has alreadry belonged to this context + the queue and the synchronization variables have been already initialized */ + q = &dt->queue_array[workerid]; + starpu_st_fifo_taskq_init(q); + /* These are only stats, they can be read with races */ + STARPU_HG_DISABLE_CHECKING(q->exp_start); + STARPU_HG_DISABLE_CHECKING(q->exp_len); + STARPU_HG_DISABLE_CHECKING(q->exp_end); + + if(dt->num_priorities != -1) + { + _STARPU_MALLOC(q->exp_len_per_priority, dt->num_priorities*sizeof(double)); + _STARPU_MALLOC(q->ntasks_per_priority, dt->num_priorities*sizeof(unsigned)); + int j; + for(j = 0; j < dt->num_priorities; j++) + { + q->exp_len_per_priority[j] = 0.0; + q->ntasks_per_priority[j] = 0; + } + } + } +} + +static void dmda_remove_workers(unsigned sched_ctx_id, int *workerids, unsigned nworkers) +{ + struct _starpu_dmda_data *dt = (struct _starpu_dmda_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); + + unsigned i; + for (i = 0; i < nworkers; i++) + { + int workerid = workerids[i]; + if(dt->num_priorities != -1) + { + free(dt->queue_array[workerid].exp_len_per_priority); + free(dt->queue_array[workerid].ntasks_per_priority); + } + } +} + +static void initialize_dmda_policy(unsigned sched_ctx_id) +{ + struct _starpu_dmda_data *dt; + _STARPU_CALLOC(dt, 1, sizeof(struct _starpu_dmda_data)); + + starpu_sched_ctx_set_policy_data(sched_ctx_id, (void*)dt); + + dt->alpha = starpu_getenv_float_default("STARPU_SCHED_ALPHA", _STARPU_SCHED_ALPHA_DEFAULT); + dt->beta = starpu_getenv_float_default("STARPU_SCHED_BETA", _STARPU_SCHED_BETA_DEFAULT); + /* data->_gamma: cost of one Joule in us. If gamma is set to 10^6, then one Joule cost 1s */ +#ifdef STARPU_NON_BLOCKING_DRIVERS + if (starpu_getenv("STARPU_SCHED_GAMMA")) + _STARPU_DISP("Warning: STARPU_SCHED_GAMMA was used, but --enable-blocking-drivers configuration was not set, CPU cores will not actually be sleeping\n"); +#endif + dt->_gamma = starpu_getenv_float_default("STARPU_SCHED_GAMMA", _STARPU_SCHED_GAMMA_DEFAULT); + /* data->idle_power: Idle power of the whole machine in Watt */ + dt->idle_power = starpu_getenv_float_default("STARPU_IDLE_POWER", 0.0); + + if(starpu_sched_ctx_min_priority_is_set(sched_ctx_id) != 0 && starpu_sched_ctx_max_priority_is_set(sched_ctx_id) != 0) + dt->num_priorities = starpu_sched_ctx_get_max_priority(sched_ctx_id) - starpu_sched_ctx_get_min_priority(sched_ctx_id) + 1; + else + dt->num_priorities = -1; + +#ifdef NOTIFY_READY_SOON + starpu_task_notify_ready_soon_register(dmda_notify_ready_soon, dt); +#endif +} + +static void initialize_dmda_sorted_policy(unsigned sched_ctx_id) +{ + initialize_dmda_policy(sched_ctx_id); + + /* The application may use any integer */ + if (starpu_sched_ctx_min_priority_is_set(sched_ctx_id) == 0) + starpu_sched_ctx_set_min_priority(sched_ctx_id, INT_MIN); + if (starpu_sched_ctx_max_priority_is_set(sched_ctx_id) == 0) + starpu_sched_ctx_set_max_priority(sched_ctx_id, INT_MAX); +} + +static void deinitialize_dmda_policy(unsigned sched_ctx_id) +{ + struct _starpu_dmda_data *dt = (struct _starpu_dmda_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); +#ifdef STARPU_VERBOSE + { + struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); + long int modelled_task_cnt = dt->total_task_cnt - dt->eager_task_cnt; + _STARPU_DEBUG("%s sched policy (sched_ctx %u): total_task_cnt %ld ready_task_cnt %ld (%.1f%%), modelled_task_cnt = %ld (%.1f%%)%s\n", + sched_ctx->sched_policy?sched_ctx->sched_policy->policy_name:"", + sched_ctx_id, + dt->total_task_cnt, + dt->ready_task_cnt, + (100.0f*dt->ready_task_cnt)/dt->total_task_cnt, + modelled_task_cnt, + (100.0f*modelled_task_cnt)/dt->total_task_cnt, + modelled_task_cnt==0?" *** Check if performance models are enabled and converging on a per-codelet basis, or use an non-modeling scheduling policy. ***":""); + } +#endif + + free(dt); +} + +/* dmda_pre_exec_hook is called right after the data transfer is done and right + * before the computation to begin, it is useful to update more precisely the + * value of the expected start, end, length, etc... */ +static void dmda_pre_exec_hook(struct starpu_task *task, unsigned sched_ctx_id) +{ + unsigned workerid = starpu_worker_get_id_check(); + struct _starpu_dmda_data *dt = (struct _starpu_dmda_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); + struct starpu_st_fifo_taskq *fifo = &dt->queue_array[workerid]; + const double now = starpu_timing_now(); + + /* Once the task is executing, we can update the predicted amount + * of work. */ + starpu_worker_lock_self(); + + _starpu_fifo_task_started(fifo, task, dt->num_priorities); + + /* Take the opportunity to update start time */ + fifo->exp_start = STARPU_MAX(now + fifo->pipeline_len, fifo->exp_start); + fifo->exp_end = fifo->exp_start + fifo->exp_len; + + starpu_worker_unlock_self(); +} + +static void _dm_push_task_notify(struct starpu_task *task, int workerid, int perf_workerid, unsigned sched_ctx_id, int da) +{ + struct _starpu_dmda_data *dt = (struct _starpu_dmda_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); + struct starpu_st_fifo_taskq *fifo = &dt->queue_array[workerid]; + + /* Compute the expected penalty */ + double predicted = starpu_task_worker_expected_length(task, perf_workerid, sched_ctx_id, + starpu_task_get_implementation(task)); + double predicted_transfer = NAN; + + if (da) + predicted_transfer = starpu_task_expected_data_transfer_time_for(task, workerid); + + double now = starpu_timing_now(); + + /* Update the predictions */ + starpu_worker_lock(workerid); + + fifo->pipeline_ntasks++; + + /* Sometimes workers didn't take the tasks as early as we expected */ + fifo->exp_start = isnan(fifo->exp_start) ? now + fifo->pipeline_len : STARPU_MAX(fifo->exp_start, now); + fifo->exp_end = fifo->exp_start + fifo->exp_len; + + if (da) + { + /* If there is no prediction available, we consider the task has a null length */ + if (!isnan(predicted_transfer)) + { + if (now + predicted_transfer < fifo->exp_end) + { + /* We may hope that the transfer will be finished by + * the start of the task. */ + predicted_transfer = 0; + } + else + { + /* The transfer will not be finished by then, take the + * remainder into account */ + predicted_transfer = (now + predicted_transfer) - fifo->exp_end; + } + task->predicted_transfer = predicted_transfer; + fifo->exp_end += predicted_transfer; + fifo->exp_len += predicted_transfer; + if(dt->num_priorities != -1) + { + int i; + int task_prio = starpu_st_normalize_prio(task->priority, dt->num_priorities, task->sched_ctx); + for(i = 0; i <= task_prio; i++) + fifo->exp_len_per_priority[i] += predicted_transfer; + } + + } + } + + /* If there is no prediction available, we consider the task has a null length */ + if (!isnan(predicted)) + { + task->predicted = predicted; + fifo->exp_end += predicted; + fifo->exp_len += predicted; + if(dt->num_priorities != -1) + { + int i; + int task_prio = starpu_st_normalize_prio(task->priority, dt->num_priorities, task->sched_ctx); + for(i = 0; i <= task_prio; i++) + fifo->exp_len_per_priority[i] += predicted; + } + + } + if(dt->num_priorities != -1) + { + int i; + int task_prio = starpu_st_normalize_prio(task->priority, dt->num_priorities, task->sched_ctx); + for(i = 0; i <= task_prio; i++) + fifo->ntasks_per_priority[i]++; + } + + fifo->ntasks++; + + starpu_worker_unlock(workerid); +} + +static void dm_push_task_notify(struct starpu_task *task, int workerid, int perf_workerid, unsigned sched_ctx_id) +{ + _dm_push_task_notify(task, workerid, perf_workerid, sched_ctx_id, 0); +} + +static void dmda_push_task_notify(struct starpu_task *task, int workerid, int perf_workerid, unsigned sched_ctx_id) +{ + _dm_push_task_notify(task, workerid, perf_workerid, sched_ctx_id, 1); +} + +static void dmda_post_exec_hook(struct starpu_task * task, unsigned sched_ctx_id) +{ + struct _starpu_dmda_data *dt = (struct _starpu_dmda_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); + unsigned workerid = starpu_worker_get_id_check(); + struct starpu_st_fifo_taskq *fifo = &dt->queue_array[workerid]; + starpu_worker_lock_self(); + _starpu_fifo_task_finished(fifo, task, dt->num_priorities); + starpu_worker_unlock_self(); +} + +struct starpu_sched_policy _starpu_sched_dm_policy = +{ + .init_sched = initialize_dmda_policy, + .deinit_sched = deinitialize_dmda_policy, + .add_workers = dmda_add_workers , + .remove_workers = dmda_remove_workers, + .push_task = dm_push_task, + .simulate_push_task = dm_simulate_push_task, + .push_task_notify = dm_push_task_notify, + .pop_task = dmda_pop_task, + .pre_exec_hook = dmda_pre_exec_hook, + .post_exec_hook = dmda_post_exec_hook, + .policy_name = "dm", + .policy_description = "performance model", + .worker_type = STARPU_WORKER_LIST, + .prefetches = 1, +}; + +struct starpu_sched_policy _starpu_sched_dmda_policy = +{ + .init_sched = initialize_dmda_policy, + .deinit_sched = deinitialize_dmda_policy, + .add_workers = dmda_add_workers , + .remove_workers = dmda_remove_workers, + .push_task = dmda_push_task, + .simulate_push_task = dmda_simulate_push_task, + .push_task_notify = dmda_push_task_notify, + .pop_task = dmda_pop_task, + .pre_exec_hook = dmda_pre_exec_hook, + .post_exec_hook = dmda_post_exec_hook, + .policy_name = "dmda", + .policy_description = "data-aware performance model", + .worker_type = STARPU_WORKER_LIST, + .prefetches = 1, +}; + +struct starpu_sched_policy _starpu_sched_dmda_prio_policy = +{ + .init_sched = initialize_dmda_sorted_policy, + .deinit_sched = deinitialize_dmda_policy, + .add_workers = dmda_add_workers , + .remove_workers = dmda_remove_workers, + .push_task = dmda_push_sorted_task, + .simulate_push_task = dmda_simulate_push_sorted_task, + .push_task_notify = dmda_push_task_notify, + .pop_task = dmda_pop_task, + .pre_exec_hook = dmda_pre_exec_hook, + .post_exec_hook = dmda_post_exec_hook, + .policy_name = "dmdap", + .policy_description = "data-aware performance model (priority)", + .worker_type = STARPU_WORKER_LIST, + .prefetches = 1, +}; + +struct starpu_sched_policy _starpu_sched_dmda_sorted_policy = +{ + .init_sched = initialize_dmda_sorted_policy, + .deinit_sched = deinitialize_dmda_policy, + .add_workers = dmda_add_workers , + .remove_workers = dmda_remove_workers, + .push_task = dmda_push_sorted_task, + .simulate_push_task = dmda_simulate_push_sorted_task, + .push_task_notify = dmda_push_task_notify, + .pop_task = dmda_pop_ready_task, + .pre_exec_hook = dmda_pre_exec_hook, + .post_exec_hook = dmda_post_exec_hook, + .policy_name = "dmdas", + .policy_description = "data-aware performance model (sorted)", + .worker_type = STARPU_WORKER_LIST, + .prefetches = 1, +}; + +struct starpu_sched_policy _starpu_sched_dmda_sorted_decision_policy = +{ + .init_sched = initialize_dmda_sorted_policy, + .deinit_sched = deinitialize_dmda_policy, + .add_workers = dmda_add_workers , + .remove_workers = dmda_remove_workers, + .push_task = dmda_push_sorted_decision_task, + .simulate_push_task = dmda_simulate_push_sorted_decision_task, + .push_task_notify = dmda_push_task_notify, + .pop_task = dmda_pop_ready_task, + .pre_exec_hook = dmda_pre_exec_hook, + .post_exec_hook = dmda_post_exec_hook, + .policy_name = "dmdasd", + .policy_description = "data-aware performance model (sorted decision)", + .worker_type = STARPU_WORKER_LIST, + .prefetches = 1, +}; + +struct starpu_sched_policy _starpu_sched_dmda_ready_policy = +{ + .init_sched = initialize_dmda_policy, + .deinit_sched = deinitialize_dmda_policy, + .add_workers = dmda_add_workers , + .remove_workers = dmda_remove_workers, + .push_task = dmda_push_task, + .simulate_push_task = dmda_simulate_push_task, + .push_task_notify = dmda_push_task_notify, + .pop_task = dmda_pop_ready_task, + .pre_exec_hook = dmda_pre_exec_hook, + .post_exec_hook = dmda_post_exec_hook, + .policy_name = "dmdar", + .policy_description = "data-aware performance model (ready)", + .worker_type = STARPU_WORKER_LIST, + .prefetches = 1, +}; diff --git a/src/sched_policies/eager_central_policy.c b/src/sched_policies/eager_central_policy.c new file mode 100644 index 0000000..03c6bc3 --- /dev/null +++ b/src/sched_policies/eager_central_policy.c @@ -0,0 +1,210 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2016-2016 Uppsala University + * Copyright (C) 2013-2013 Simon Archipoff + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * This is just the trivial policy where every worker use the same + * JOB QUEUE. + */ + +#include +#include +#include +#include +#include +#include + +struct _starpu_eager_center_policy_data +{ + struct starpu_st_fifo_taskq fifo; + starpu_pthread_mutex_t policy_mutex; + struct starpu_bitmap waiters; +}; + +static void initialize_eager_center_policy(unsigned sched_ctx_id) +{ + struct _starpu_eager_center_policy_data *data; + _STARPU_MALLOC(data, sizeof(struct _starpu_eager_center_policy_data)); + + /* there is only a single queue in that trivial design */ + starpu_st_fifo_taskq_init(&data->fifo); + starpu_bitmap_init(&data->waiters); + + starpu_sched_ctx_set_policy_data(sched_ctx_id, (void*)data); + STARPU_PTHREAD_MUTEX_INIT(&data->policy_mutex, NULL); +} + +static void deinitialize_eager_center_policy(unsigned sched_ctx_id) +{ + struct _starpu_eager_center_policy_data *data = (struct _starpu_eager_center_policy_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); + struct starpu_st_fifo_taskq *fifo = &data->fifo; + + STARPU_ASSERT(starpu_task_list_empty(&fifo->taskq)); + + STARPU_PTHREAD_MUTEX_DESTROY(&data->policy_mutex); + free(data); +} + +static int push_task_eager_policy(struct starpu_task *task) +{ + unsigned sched_ctx_id = task->sched_ctx; + struct _starpu_eager_center_policy_data *data = (struct _starpu_eager_center_policy_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); + + starpu_worker_relax_on(); + STARPU_PTHREAD_MUTEX_LOCK(&data->policy_mutex); + starpu_worker_relax_off(); + starpu_task_list_push_back(&data->fifo.taskq,task); + data->fifo.ntasks++; + data->fifo.nprocessed++; + + if (_starpu_get_nsched_ctxs() > 1) + { + starpu_worker_relax_on(); + _starpu_sched_ctx_lock_write(sched_ctx_id); + starpu_worker_relax_off(); + starpu_sched_ctx_list_task_counters_increment_all_ctx_locked(task, sched_ctx_id); + _starpu_sched_ctx_unlock_write(sched_ctx_id); + } + + starpu_push_task_end(task); + + /*if there are no tasks block */ + /* wake people waiting for a task */ + struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id); + + struct starpu_sched_ctx_iterator it; +#ifndef STARPU_NON_BLOCKING_DRIVERS + char dowake[STARPU_NMAXWORKERS] = { 0 }; +#endif + + workers->init_iterator_for_parallel_tasks(workers, &it, task); + while(workers->has_next(workers, &it)) + { + unsigned worker = workers->get_next(workers, &it); + +#ifdef STARPU_NON_BLOCKING_DRIVERS + if (!starpu_bitmap_get(&data->waiters, worker)) + /* This worker is not waiting for a task */ + continue; +#endif + + if (starpu_worker_can_execute_task_first_impl(worker, task, NULL)) + { + /* It can execute this one, tell him! */ +#ifdef STARPU_NON_BLOCKING_DRIVERS + starpu_bitmap_unset(&data->waiters, worker); + /* We really woke at least somebody, no need to wake somebody else */ + break; +#else + dowake[worker] = 1; +#endif + } + } + /* Let the task free */ + STARPU_PTHREAD_MUTEX_UNLOCK(&data->policy_mutex); + +#if !defined(STARPU_NON_BLOCKING_DRIVERS) || defined(STARPU_SIMGRID) + /* Now that we have a list of potential workers, try to wake one */ + + workers->init_iterator_for_parallel_tasks(workers, &it, task); + while(workers->has_next(workers, &it)) + { + unsigned worker = workers->get_next(workers, &it); + if (dowake[worker]) + if (starpu_wake_worker_relax_light(worker)) + break; // wake up a single worker + } +#endif + + return 0; +} + +static struct starpu_task *pop_task_eager_policy(unsigned sched_ctx_id) +{ + struct starpu_task *chosen_task = NULL; + unsigned workerid = starpu_worker_get_id_check(); + struct _starpu_eager_center_policy_data *data = (struct _starpu_eager_center_policy_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); + + /* Here helgrind would shout that this is unprotected, this is just an + * integer access, and we hold the sched mutex, so we can not miss any + * wake up. */ + if (!STARPU_RUNNING_ON_VALGRIND && starpu_st_fifo_taskq_empty(&data->fifo)) + { + return NULL; + } + +#ifdef STARPU_NON_BLOCKING_DRIVERS + if (!STARPU_RUNNING_ON_VALGRIND && starpu_bitmap_get(&data->waiters, workerid)) + /* Nobody woke us, avoid bothering the mutex */ + { + return NULL; + } +#endif + + starpu_worker_relax_on(); + STARPU_PTHREAD_MUTEX_LOCK(&data->policy_mutex); + starpu_worker_relax_off(); + + chosen_task = starpu_st_fifo_taskq_pop_task(&data->fifo, workerid); + if (!chosen_task) + /* Tell pushers that we are waiting for tasks for us */ + starpu_bitmap_set(&data->waiters, workerid); + + STARPU_PTHREAD_MUTEX_UNLOCK(&data->policy_mutex); + if(chosen_task &&_starpu_get_nsched_ctxs() > 1) + { + starpu_worker_relax_on(); + _starpu_sched_ctx_lock_write(sched_ctx_id); + starpu_worker_relax_off(); + starpu_sched_ctx_list_task_counters_decrement_all_ctx_locked(chosen_task, sched_ctx_id); + + if (_starpu_sched_ctx_worker_is_master_for_child_ctx(sched_ctx_id, workerid, chosen_task)) + chosen_task = NULL; + _starpu_sched_ctx_unlock_write(sched_ctx_id); + } + + return chosen_task; +} + +static void eager_add_workers(unsigned sched_ctx_id, int *workerids, unsigned nworkers) +{ + unsigned i; + for (i = 0; i < nworkers; i++) + { + int workerid = workerids[i]; + int curr_workerid = _starpu_worker_get_id(); + if(workerid != curr_workerid) + starpu_wake_worker_locked(workerid); + + starpu_sched_ctx_worker_shares_tasks_lists(workerid, sched_ctx_id); + } +} + +struct starpu_sched_policy _starpu_sched_eager_policy = +{ + .init_sched = initialize_eager_center_policy, + .deinit_sched = deinitialize_eager_center_policy, + .add_workers = eager_add_workers, + .remove_workers = NULL, + .push_task = push_task_eager_policy, + .pop_task = pop_task_eager_policy, + .pre_exec_hook = NULL, + .post_exec_hook = NULL, + .policy_name = "eager", + .policy_description = "eager policy with a central queue", + .worker_type = STARPU_WORKER_LIST, +}; diff --git a/src/sched_policies/eager_central_priority_policy.c b/src/sched_policies/eager_central_priority_policy.c new file mode 100644 index 0000000..cc52be1 --- /dev/null +++ b/src/sched_policies/eager_central_priority_policy.c @@ -0,0 +1,258 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2016-2016 Uppsala University + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * This is policy where every worker use the same JOB QUEUE, but taking + * task priorities into account + * + * TODO: merge with eager, after checking the scalability + */ + +#include +#include +#include + +#include +#include + +#include +#include +#include + +struct _starpu_eager_central_prio_data +{ + struct starpu_st_prio_deque taskq; + starpu_pthread_mutex_t policy_mutex; + struct starpu_bitmap waiters; +}; + +/* + * Centralized queue with priorities + */ + +static void initialize_eager_center_priority_policy(unsigned sched_ctx_id) +{ + struct _starpu_eager_central_prio_data *data; + _STARPU_MALLOC(data, sizeof(struct _starpu_eager_central_prio_data)); + + /* only a single queue (even though there are several internally) */ + starpu_st_prio_deque_init(&data->taskq); + starpu_bitmap_init(&data->waiters); + + /* Tell helgrind that it's fine to check for empty fifo in + * _starpu_priority_pop_task without actual mutex (it's just an + * integer) */ + STARPU_HG_DISABLE_CHECKING(data->taskq.ntasks); + starpu_sched_ctx_set_policy_data(sched_ctx_id, (void*)data); + STARPU_PTHREAD_MUTEX_INIT(&data->policy_mutex, NULL); + + /* The application may use any integer */ + if (starpu_sched_ctx_min_priority_is_set(sched_ctx_id) == 0) + starpu_sched_ctx_set_min_priority(sched_ctx_id, INT_MIN); + if (starpu_sched_ctx_max_priority_is_set(sched_ctx_id) == 0) + starpu_sched_ctx_set_max_priority(sched_ctx_id, INT_MAX); +} + +static void deinitialize_eager_center_priority_policy(unsigned sched_ctx_id) +{ + /* TODO check that there is no task left in the queue */ + struct _starpu_eager_central_prio_data *data = (struct _starpu_eager_central_prio_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); + + /* deallocate the job queue */ + starpu_st_prio_deque_destroy(&data->taskq); + + STARPU_PTHREAD_MUTEX_DESTROY(&data->policy_mutex); + free(data); +} + +static int _starpu_priority_push_task(struct starpu_task *task) +{ + unsigned sched_ctx_id = task->sched_ctx; + struct _starpu_eager_central_prio_data *data = (struct _starpu_eager_central_prio_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); + struct starpu_st_prio_deque *taskq = &data->taskq; + + starpu_worker_relax_on(); + STARPU_PTHREAD_MUTEX_LOCK(&data->policy_mutex); + starpu_worker_relax_off(); + starpu_st_prio_deque_push_back_task(taskq, task); + + if (_starpu_get_nsched_ctxs() > 1) + { + starpu_worker_relax_on(); + _starpu_sched_ctx_lock_write(sched_ctx_id); + starpu_worker_relax_off(); + starpu_sched_ctx_list_task_counters_increment_all_ctx_locked(task, sched_ctx_id); + _starpu_sched_ctx_unlock_write(sched_ctx_id); + } + + starpu_push_task_end(task); + + /*if there are no tasks block */ + /* wake people waiting for a task */ + struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id); + + struct starpu_sched_ctx_iterator it; +#ifndef STARPU_NON_BLOCKING_DRIVERS + char dowake[STARPU_NMAXWORKERS] = { 0 }; +#endif + + workers->init_iterator_for_parallel_tasks(workers, &it, task); + while(workers->has_next(workers, &it)) + { + unsigned worker = workers->get_next(workers, &it); + +#ifdef STARPU_NON_BLOCKING_DRIVERS + if (!starpu_bitmap_get(&data->waiters, worker)) + /* This worker is not waiting for a task */ + continue; +#endif + + if (starpu_worker_can_execute_task_first_impl(worker, task, NULL)) + { + /* It can execute this one, tell him! */ +#ifdef STARPU_NON_BLOCKING_DRIVERS + starpu_bitmap_unset(&data->waiters, worker); + /* We really woke at least somebody, no need to wake somebody else */ + break; +#else + dowake[worker] = 1; +#endif + } + } + /* Let the task free */ + STARPU_PTHREAD_MUTEX_UNLOCK(&data->policy_mutex); + +#if !defined(STARPU_NON_BLOCKING_DRIVERS) || defined(STARPU_SIMGRID) + /* Now that we have a list of potential workers, try to wake one */ + + workers->init_iterator(workers, &it); + while(workers->has_next(workers, &it)) + { + unsigned worker = workers->get_next(workers, &it); + if (dowake[worker]) + if (starpu_wake_worker_relax_light(worker)) + break; // wake up a single worker + } +#endif + + return 0; +} + +static struct starpu_task *_starpu_priority_pop_task(unsigned sched_ctx_id) +{ + struct starpu_task *chosen_task; + unsigned workerid = starpu_worker_get_id_check(); + struct starpu_task *skipped; + + struct _starpu_eager_central_prio_data *data = (struct _starpu_eager_central_prio_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); + + struct starpu_st_prio_deque *taskq = &data->taskq; + + /* Here helgrind would shout that this is unprotected, this is just an + * integer access, and we hold the sched mutex, so we can not miss any + * wake up. */ + if (!STARPU_RUNNING_ON_VALGRIND && starpu_st_prio_deque_is_empty(taskq)) + { + return NULL; + } + +#ifdef STARPU_NON_BLOCKING_DRIVERS + if (!STARPU_RUNNING_ON_VALGRIND && starpu_bitmap_get(&data->waiters, workerid)) + /* Nobody woke us, avoid bothering the mutex */ + { + return NULL; + } +#endif + + starpu_worker_relax_on(); + STARPU_PTHREAD_MUTEX_LOCK(&data->policy_mutex); + starpu_worker_relax_off(); + + chosen_task = starpu_st_prio_deque_pop_task_for_worker(taskq, workerid, &skipped); + + if (!chosen_task && skipped) + { + /* Notify another worker to do that task */ + struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id); + + struct starpu_sched_ctx_iterator it; + workers->init_iterator(workers, &it); + while(workers->has_next(workers, &it)) + { + unsigned worker = workers->get_next(workers, &it); + + if(worker != workerid && starpu_worker_can_execute_task_first_impl(worker, skipped, NULL)) + { +#ifdef STARPU_NON_BLOCKING_DRIVERS + starpu_bitmap_unset(&data->waiters, worker); +#else + starpu_wake_worker_relax_light(worker); +#endif + } + } + + } + + if (!chosen_task) + /* Tell pushers that we are waiting for tasks for us */ + starpu_bitmap_set(&data->waiters, workerid); + + STARPU_PTHREAD_MUTEX_UNLOCK(&data->policy_mutex); + if(chosen_task &&_starpu_get_nsched_ctxs() > 1) + { + starpu_worker_relax_on(); + _starpu_sched_ctx_lock_write(sched_ctx_id); + starpu_worker_relax_off(); + starpu_sched_ctx_list_task_counters_decrement_all_ctx_locked(chosen_task, sched_ctx_id); + + if (_starpu_sched_ctx_worker_is_master_for_child_ctx(sched_ctx_id, workerid, chosen_task)) + chosen_task = NULL; + + _starpu_sched_ctx_unlock_write(sched_ctx_id); + } + + return chosen_task; +} + +static void eager_center_priority_add_workers(unsigned sched_ctx_id, int *workerids, unsigned nworkers) +{ + unsigned i; + for (i = 0; i < nworkers; i++) + { + int workerid = workerids[i]; + int curr_workerid = _starpu_worker_get_id(); + if(workerid != curr_workerid) + starpu_wake_worker_locked(workerid); + + starpu_sched_ctx_worker_shares_tasks_lists(workerid, sched_ctx_id); + } +} + +struct starpu_sched_policy _starpu_sched_prio_policy = +{ + .add_workers = eager_center_priority_add_workers, + .init_sched = initialize_eager_center_priority_policy, + .deinit_sched = deinitialize_eager_center_priority_policy, + /* we always use priorities in that policy */ + .push_task = _starpu_priority_push_task, + .pop_task = _starpu_priority_pop_task, + .pre_exec_hook = NULL, + .post_exec_hook = NULL, + .policy_name = "prio", + .policy_description = "eager (with priorities)", + .worker_type = STARPU_WORKER_LIST, +}; diff --git a/src/sched_policies/fifo_queues.c b/src/sched_policies/fifo_queues.c new file mode 100644 index 0000000..0893ce6 --- /dev/null +++ b/src/sched_policies/fifo_queues.c @@ -0,0 +1,520 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2016-2016 Uppsala University + * Copyright (C) 2013-2013 Simon Archipoff + * Copyright (C) 2011-2011 Télécom Sud Paris + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* FIFO queues, ready for use by schedulers */ + +#include +#include + +#include +#include +#include + +#include + +/* +static int is_sorted_task_list(struct starpu_task * task) +{ + if(!task) + return 1; + struct starpu_task * next = task->next; + if(!next) + return 1; + while(next) + { + if(task->priority < next->priority) + return 0; + task = next; + next = next->next; + } + return 1; +} +*/ + +void starpu_st_fifo_taskq_init(struct starpu_st_fifo_taskq *fifo) +{ + /* note that not all mechanisms (eg. the semaphore) have to be used */ + starpu_task_list_init(&fifo->taskq); + fifo->ntasks = 0; + fifo->pipeline_ntasks = 0; + /* Tell helgrind that it's fine to check for empty fifo in + * pop_task_graph_test_policy without actual mutex (it's just an integer) + */ + STARPU_HG_DISABLE_CHECKING(fifo->ntasks); + fifo->nprocessed = 0; + + fifo->exp_start = starpu_timing_now(); + fifo->exp_len = 0.0; + fifo->exp_end = fifo->exp_start; + fifo->exp_len_per_priority = NULL; + fifo->pipeline_len = 0.0; + STARPU_HG_DISABLE_CHECKING(fifo->exp_start); + STARPU_HG_DISABLE_CHECKING(fifo->exp_len); + STARPU_HG_DISABLE_CHECKING(fifo->exp_end); +} + +struct starpu_st_fifo_taskq *starpu_st_fifo_taskq_create(void) +{ + struct starpu_st_fifo_taskq *fifo; + _STARPU_MALLOC(fifo, sizeof(struct starpu_st_fifo_taskq)); + + starpu_st_fifo_taskq_init(fifo); + + return fifo; +} + +void starpu_st_fifo_taskq_destroy(struct starpu_st_fifo_taskq *fifo) +{ + free(fifo); +} + +int starpu_st_fifo_taskq_empty(struct starpu_st_fifo_taskq *fifo) +{ + return fifo->ntasks == 0; +} + +unsigned starpu_st_fifo_ntasks_get(struct starpu_st_fifo_taskq *fifo) +{ + return fifo->ntasks; +} + +void starpu_st_fifo_ntasks_inc(struct starpu_st_fifo_taskq *fifo, int n) +{ + fifo->ntasks += n; +} + +unsigned *starpu_st_fifo_ntasks_per_priority_get(struct starpu_st_fifo_taskq *fifo) +{ + return fifo->ntasks_per_priority; +} + +unsigned starpu_st_fifo_nprocessed_get(struct starpu_st_fifo_taskq *fifo) +{ + return fifo->nprocessed; +} + +void starpu_st_fifo_nprocessed_inc(struct starpu_st_fifo_taskq *fifo, int n) +{ + fifo->nprocessed += n; +} + +double starpu_st_fifo_exp_start_get(struct starpu_st_fifo_taskq *fifo) +{ + return fifo->exp_start; +} + +void starpu_st_fifo_exp_start_set(struct starpu_st_fifo_taskq *fifo, double exp_start) +{ + fifo->exp_start = exp_start; +} + +double starpu_st_fifo_exp_end_get(struct starpu_st_fifo_taskq *fifo) +{ + return fifo->exp_end; +} + +void starpu_st_fifo_exp_end_set(struct starpu_st_fifo_taskq *fifo, double exp_end) +{ + fifo->exp_end = exp_end; +} + +double starpu_st_fifo_exp_len_get(struct starpu_st_fifo_taskq *fifo) +{ + return fifo->exp_len; +} + +void starpu_st_fifo_exp_len_set(struct starpu_st_fifo_taskq *fifo, double exp_len) +{ + fifo->exp_len = exp_len; +} + +void starpu_st_fifo_exp_len_inc(struct starpu_st_fifo_taskq *fifo, double exp_len) +{ + fifo->exp_len += exp_len; +} + +double *starpu_st_fifo_exp_len_per_priority_get(struct starpu_st_fifo_taskq *fifo) +{ + return fifo->exp_len_per_priority; +} + +double starpu_st_fifo_pipeline_len_get(struct starpu_st_fifo_taskq *fifo) +{ + return fifo->pipeline_len; +} + +void starpu_st_fifo_pipeline_len_set(struct starpu_st_fifo_taskq *fifo, double pipeline_len) +{ + fifo->pipeline_len = pipeline_len; +} + +void starpu_st_fifo_pipeline_len_inc(struct starpu_st_fifo_taskq *fifo, double pipeline_len) +{ + fifo->pipeline_len += pipeline_len; +} + +double starpu_st_fifo_taskq_get_exp_len_prev_task_list(struct starpu_st_fifo_taskq *fifo_queue, struct starpu_task *task, int workerid, int nimpl, int *fifo_ntasks) +{ + struct starpu_task_list *list = &fifo_queue->taskq; + struct starpu_perfmodel_arch* perf_arch = starpu_worker_get_perf_archtype(workerid, task->sched_ctx); + double exp_len = fifo_queue->pipeline_len; + + if (list->_head != NULL) + { + struct starpu_task *current = list->_head; + struct starpu_task *prev = NULL; + + if (list->_head->priority == task->priority && + list->_head->priority == list->_tail->priority) + { + /* They all have the same priority, the task's place is at the end */ + prev = list->_tail; + current = NULL; + } + else + while (current) + { + if (current->priority < task->priority) + break; + + prev = current; + current = current->next; + } + + if (prev != NULL) + { + if (current) + { + /* the task's place is between prev and current */ + struct starpu_task *it; + *fifo_ntasks = fifo_queue->pipeline_ntasks; + for(it = list->_head; it != current; it = it->next) + { + exp_len += starpu_task_expected_length(it, perf_arch, nimpl); + (*fifo_ntasks) ++; + } + } + else + { + /* the task's place is at the _tail of the list */ + exp_len = fifo_queue->exp_len; + *fifo_ntasks = fifo_queue->ntasks + fifo_queue->pipeline_ntasks; + } + } + } + + + return exp_len; +} + +int starpu_st_fifo_taskq_push_sorted_task(struct starpu_st_fifo_taskq *fifo_queue, struct starpu_task *task) +{ + struct starpu_task_list *list = &fifo_queue->taskq; + + if (list->_head == NULL) + { + list->_head = task; + list->_tail = task; + task->prev = NULL; + task->next = NULL; + } + else if (list->_head->priority == task->priority && + list->_head->priority == list->_tail->priority) + { + /* They all have the same priority, just put at the end */ + list->_tail->next = task; + task->next = NULL; + task->prev = list->_tail; + list->_tail = task; + } + else + { + struct starpu_task *current = list->_head; + struct starpu_task *prev = NULL; + + while (current) + { + if (current->priority < task->priority) + break; + + prev = current; + current = current->next; + } + + if (prev == NULL) + { + /* Insert at the front of the list */ + list->_head->prev = task; + task->prev = NULL; + task->next = list->_head; + list->_head = task; + } + else + { + if (current) + { + /* Insert between prev and current */ + task->prev = prev; + prev->next = task; + task->next = current; + current->prev = task; + } + else + { + /* Insert at the _tail of the list */ + list->_tail->next = task; + task->next = NULL; + task->prev = list->_tail; + list->_tail = task; + } + } + } + + fifo_queue->ntasks++; + fifo_queue->nprocessed++; + + return 0; +} + +int starpu_st_fifo_taskq_push_task(struct starpu_st_fifo_taskq *fifo_queue, struct starpu_task *task) +{ + if (task->priority > 0) + { + starpu_st_fifo_taskq_push_sorted_task(fifo_queue, task); + } + else + { + starpu_task_list_push_back(&fifo_queue->taskq, task); + + fifo_queue->ntasks++; + fifo_queue->nprocessed++; + } + return 0; +} + +int starpu_st_fifo_taskq_push_back_task(struct starpu_st_fifo_taskq *fifo_queue, struct starpu_task *task) +{ + if (task->priority > 0) + { + starpu_st_fifo_taskq_push_sorted_task(fifo_queue, task); + } + else + { + starpu_task_list_push_front(&fifo_queue->taskq, task); + + fifo_queue->ntasks++; + } + return 0; +} + +int starpu_st_fifo_taskq_pop_this_task(struct starpu_st_fifo_taskq *fifo_queue, int workerid, struct starpu_task *task) +{ + unsigned nimpl = 0; + STARPU_ASSERT(task); +#ifdef STARPU_DEBUG + STARPU_ASSERT(starpu_task_list_ismember(&fifo_queue->taskq, task)); +#endif + + if (workerid < 0 || starpu_worker_can_execute_task_first_impl(workerid, task, &nimpl)) + { + starpu_task_set_implementation(task, nimpl); + starpu_task_list_erase(&fifo_queue->taskq, task); + fifo_queue->ntasks--; + return 1; + } + + return 0; +} + +struct starpu_task *starpu_st_fifo_taskq_pop_task(struct starpu_st_fifo_taskq *fifo_queue, int workerid) +{ + struct starpu_task *task; + + for (task = starpu_task_list_begin(&fifo_queue->taskq); + task != starpu_task_list_end(&fifo_queue->taskq); + task = starpu_task_list_next(task)) + { + if (starpu_st_fifo_taskq_pop_this_task(fifo_queue, workerid, task)) + return task; + } + + return NULL; +} + +struct starpu_task *starpu_st_fifo_taskq_pop_local_task(struct starpu_st_fifo_taskq *fifo_queue) +{ + struct starpu_task *task = NULL; + + if (!starpu_task_list_empty(&fifo_queue->taskq)) + { + task = starpu_task_list_pop_front(&fifo_queue->taskq); + fifo_queue->ntasks--; + } + + return task; +} + +int starpu_st_normalize_prio(int priority, int num_priorities, unsigned sched_ctx_id) +{ + int min = starpu_sched_ctx_get_min_priority(sched_ctx_id); + int max = starpu_sched_ctx_get_max_priority(sched_ctx_id); + return ((num_priorities-1)/(max-min)) * (priority - min); +} + +void starpu_st_non_ready_buffers_size(struct starpu_task *task, unsigned worker, size_t *non_readyp, size_t *non_loadingp, size_t *non_allocatedp) +{ + size_t non_ready = 0, non_loading = 0, non_allocated = 0; + unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task); + unsigned index; + + for (index = 0; index < nbuffers; index++) + { + starpu_data_handle_t handle; + enum starpu_data_access_mode mode; + int buffer_node = _starpu_task_data_get_node_on_worker(task, index, worker); + if (buffer_node < 0) + continue; + + handle = STARPU_TASK_GET_HANDLE(task, index); + mode = STARPU_TASK_GET_MODE(task, index); + + if ((mode & STARPU_SCRATCH) || (mode & STARPU_REDUX)) + continue; + + int is_allocated, is_valid, is_loading; + starpu_data_query_status2(handle, buffer_node, &is_allocated, &is_valid, &is_loading, NULL); + + if (!is_allocated) + non_allocated+=starpu_data_get_size(handle); + + if (mode & STARPU_R && !is_valid) + { + non_ready+=starpu_data_get_size(handle); + if (!is_loading) + non_loading+=starpu_data_get_size(handle); + } + } + + *non_readyp = non_ready; + *non_loadingp = non_loading; + *non_allocatedp = non_allocated; +} + +int starpu_st_non_ready_buffers_count(struct starpu_task *task, unsigned worker) +{ + int cnt = 0; + unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task); + unsigned index; + + for (index = 0; index < nbuffers; index++) + { + starpu_data_handle_t handle; + enum starpu_data_access_mode mode; + int buffer_node = _starpu_task_data_get_node_on_worker(task, index, worker); + if (buffer_node < 0) + continue; + + handle = STARPU_TASK_GET_HANDLE(task, index); + mode = STARPU_TASK_GET_MODE(task, index); + + if ((mode & STARPU_SCRATCH) || (mode & STARPU_REDUX)) + continue; + + int is_valid; + starpu_data_query_status(handle, buffer_node, NULL, &is_valid, NULL); + + if (!is_valid) + cnt++; + } + + return cnt; +} + +struct starpu_task *starpu_st_fifo_taskq_pop_first_ready_task(struct starpu_st_fifo_taskq *fifo_queue, unsigned workerid, int num_priorities) +{ + struct starpu_task *task = NULL, *current; + + if (fifo_queue->ntasks == 0) + return NULL; + + if (fifo_queue->ntasks > 0) + { + fifo_queue->ntasks--; + + task = starpu_task_list_front(&fifo_queue->taskq); + if (STARPU_UNLIKELY(!task)) + return NULL; + + int first_task_priority = task->priority; + + size_t non_ready_best = SIZE_MAX; + size_t non_loading_best = SIZE_MAX; + size_t non_allocated_best = SIZE_MAX; + + for (current = task; current; current = current->next) + { + int priority = current->priority; + + if (priority >= first_task_priority) + { + size_t non_ready, non_loading, non_allocated; + starpu_st_non_ready_buffers_size(current, workerid, &non_ready, &non_loading, &non_allocated); + if (non_ready < non_ready_best) + { + non_ready_best = non_ready; + non_loading_best = non_loading; + non_allocated_best = non_allocated; + task = current; + + if (non_ready == 0 && non_allocated == 0) + break; + } + else if (non_ready == non_ready_best) + { + if (non_loading < non_loading_best) + { + non_loading_best = non_loading; + non_allocated_best = non_allocated; + task = current; + } + else if (non_loading == non_loading_best) + { + if (non_allocated < non_allocated_best) + { + non_allocated_best = non_allocated; + task = current; + } + } + } + } + } + + if(num_priorities != -1) + { + int i; + int task_prio = starpu_st_normalize_prio(task->priority, num_priorities, task->sched_ctx); + for(i = 0; i <= task_prio; i++) + fifo_queue->ntasks_per_priority[i]--; + } + + starpu_task_list_erase(&fifo_queue->taskq, task); + } + + return task; +} diff --git a/src/sched_policies/fifo_queues.h b/src/sched_policies/fifo_queues.h new file mode 100644 index 0000000..3ee4e11 --- /dev/null +++ b/src/sched_policies/fifo_queues.h @@ -0,0 +1,53 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2016-2016 Uppsala University + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __FIFO_QUEUES_H__ +#define __FIFO_QUEUES_H__ + +#include + +/** @file */ + +struct starpu_st_fifo_taskq +{ + /** the actual list */ + struct starpu_task_list taskq; + + /** the number of tasks currently in the queue */ + unsigned ntasks; + + /** the number of tasks already pushed to the worker */ + unsigned pipeline_ntasks; + + /** the number of tasks currently in the queue corresponding to each priority */ + unsigned *ntasks_per_priority; + + /** the number of tasks that were processed */ + unsigned nprocessed; + + /** only meaningful if the queue is only used by a single worker */ + double exp_start; /** Expected start date of next item to do in the + * queue (i.e. not started yet). This is thus updated + * when we start it. */ + double exp_end; /** Expected end date of last task in the queue */ + double exp_len; /** Expected duration of the set of tasks in the queue */ + double *exp_len_per_priority; /** Expected duration of the set of tasks in the queue corresponding to each priority */ + double pipeline_len; /** the expected duration of what is already pushed to the worker */ +}; + + +#endif /* __FIFO_QUEUES_H__ */ diff --git a/src/sched_policies/graph_test_policy.c b/src/sched_policies/graph_test_policy.c new file mode 100644 index 0000000..9f1f87b --- /dev/null +++ b/src/sched_policies/graph_test_policy.c @@ -0,0 +1,365 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * This is just a test policy for using task graph information + * + * We keep tasks in the fifo queue, and store the graph of tasks, until we + * get the do_schedule call from the application, which tells us all tasks + * were queued, and we can now compute task depths or descendants and let a simple + * central-queue greedy algorithm proceed. + * + * TODO: let workers starting running tasks before the whole graph is submitted? + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct _starpu_graph_test_policy_data +{ + struct starpu_st_fifo_taskq fifo; /* Bag of tasks which are ready before do_schedule is called */ + struct starpu_st_prio_deque prio_cpu; + struct starpu_st_prio_deque prio_gpu; + starpu_pthread_mutex_t policy_mutex; + struct starpu_bitmap waiters; + unsigned computed; + unsigned descendants; /* Whether we use descendants, or depths, for priorities */ +}; + +static void initialize_graph_test_policy(unsigned sched_ctx_id) +{ + struct _starpu_graph_test_policy_data *data; + _STARPU_MALLOC(data, sizeof(struct _starpu_graph_test_policy_data)); + + /* there is only a single queue in that trivial design */ + starpu_st_fifo_taskq_init(&data->fifo); + starpu_st_prio_deque_init(&data->prio_cpu); + starpu_st_prio_deque_init(&data->prio_gpu); + starpu_bitmap_init(&data->waiters); + data->computed = 0; + data->descendants = starpu_getenv_number_default("STARPU_SCHED_GRAPH_TEST_DESCENDANTS", 0); + + _starpu_graph_record = 1; + + starpu_sched_ctx_set_policy_data(sched_ctx_id, (void*)data); + STARPU_PTHREAD_MUTEX_INIT(&data->policy_mutex, NULL); +} + +static void deinitialize_graph_test_policy(unsigned sched_ctx_id) +{ + struct _starpu_graph_test_policy_data *data = (struct _starpu_graph_test_policy_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); + struct starpu_st_fifo_taskq *fifo = &data->fifo; + + STARPU_ASSERT(starpu_task_list_empty(&fifo->taskq)); + + /* deallocate the job queue */ + starpu_st_prio_deque_destroy(&data->prio_cpu); + starpu_st_prio_deque_destroy(&data->prio_gpu); + + _starpu_graph_record = 0; + STARPU_PTHREAD_MUTEX_DESTROY(&data->policy_mutex); + free(data); +} + +/* Push the given task on CPU or GPU prio list, using a dumb heuristic */ +static struct starpu_st_prio_deque *select_prio(unsigned sched_ctx_id, struct _starpu_graph_test_policy_data *data, struct starpu_task *task) +{ + int cpu_can = 0, gpu_can = 0; + double cpu_speed = 0.; + double gpu_speed = 0.; + + /* Compute how fast CPUs can compute it, and how fast GPUs can compute it */ + struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id); + struct starpu_sched_ctx_iterator it; + workers->init_iterator(workers, &it); + while(workers->has_next(workers, &it)) + { + unsigned worker = workers->get_next(workers, &it); + if (!starpu_worker_can_execute_task(worker, task, 0)) + /* This worker can not execute this task, don't count it */ + continue; + + if (starpu_worker_get_type(worker) == STARPU_CPU_WORKER) + /* At least one CPU can run it */ + cpu_can = 1; + else + /* At least one GPU can run it */ + gpu_can = 1; + + /* Get expected task duration for this worker */ + struct starpu_perfmodel_arch* perf_arch = starpu_worker_get_perf_archtype(worker, sched_ctx_id); + double length = starpu_task_expected_length(task, perf_arch, 0); + double power; + + if (isnan(length)) + /* We don't have an estimation yet */ + length = 0.; + if (length == 0.) + { + if (!task->cl || task->cl->model == NULL) + { + static unsigned _warned; + STARPU_HG_DISABLE_CHECKING(_warned); + if (STARPU_ATOMIC_ADD(&_warned, 1) == 1) + { + _STARPU_DISP("Warning: graph_test needs performance models for all tasks, including %s\n", + starpu_task_get_name(task)); + } + else + { + (void)STARPU_ATOMIC_ADD(&_warned, -1); + } + } + power = 0.; + } + else + power = 1./length; + + /* Add the computation power to the CPU or GPU pool */ + if (starpu_worker_get_type(worker) == STARPU_CPU_WORKER) + cpu_speed += power; + else + gpu_speed += power; + } + + /* Decide to push on CPUs or GPUs depending on the overall computation power */ + if (!gpu_can || (cpu_can && cpu_speed > gpu_speed)) + return &data->prio_cpu; + else + return &data->prio_gpu; + +} + +static void set_priority(void *_data, struct _starpu_graph_node *node) +{ + struct _starpu_graph_test_policy_data *data = _data; + starpu_worker_relax_on(); + STARPU_PTHREAD_MUTEX_LOCK(&node->mutex); + starpu_worker_relax_off(); + struct _starpu_job *job = node->job; + if (job) + { + if (data->descendants) + job->task->priority = node->descendants; + else + job->task->priority = node->depth; + } + STARPU_PTHREAD_MUTEX_UNLOCK(&node->mutex); +} + +static void do_schedule_graph_test_policy(unsigned sched_ctx_id) +{ + struct _starpu_graph_test_policy_data *data = (struct _starpu_graph_test_policy_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); + + starpu_worker_relax_on(); + STARPU_PTHREAD_MUTEX_LOCK(&data->policy_mutex); + starpu_worker_relax_off(); + if (data->descendants) + _starpu_graph_compute_descendants(); + else + _starpu_graph_compute_depths(); + if (data->computed == 0) + { + data->computed = 1; + + /* FIXME: if data->computed already == 1, some tasks may already have been pushed to priority stage '0' in + * push_task_graph_test_policy, then if we change the priority here, the stage lookup to remove the task + * will get the wrong stage */ + _starpu_graph_foreach(set_priority, data); + } + + /* Now that we have priorities, move tasks from bag to priority queue */ + while(!starpu_st_fifo_taskq_empty(&data->fifo)) + { + struct starpu_task *task = starpu_st_fifo_taskq_pop_task(&data->fifo, -1); + struct starpu_st_prio_deque *prio = select_prio(sched_ctx_id, data, task); + starpu_st_prio_deque_push_back_task(prio, task); + } + + /* And unleash the beast! */ + struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id); + struct starpu_sched_ctx_iterator it; +#ifdef STARPU_NON_BLOCKING_DRIVERS + workers->init_iterator(workers, &it); + while(workers->has_next(workers, &it)) + { + /* Tell each worker is shouldn't sleep any more */ + unsigned worker = workers->get_next(workers, &it); + starpu_bitmap_unset(&data->waiters, worker); + } +#endif + STARPU_PTHREAD_MUTEX_UNLOCK(&data->policy_mutex); + +#if !defined(STARPU_NON_BLOCKING_DRIVERS) || defined(STARPU_SIMGRID) + workers->init_iterator(workers, &it); + while(workers->has_next(workers, &it)) + { + /* Wake each worker */ + unsigned worker = workers->get_next(workers, &it); + starpu_wake_worker_relax_light(worker); + } +#endif +} + +static int push_task_graph_test_policy(struct starpu_task *task) +{ + unsigned sched_ctx_id = task->sched_ctx; + struct _starpu_graph_test_policy_data *data = (struct _starpu_graph_test_policy_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); + + starpu_worker_relax_on(); + STARPU_PTHREAD_MUTEX_LOCK(&data->policy_mutex); + starpu_worker_relax_off(); + if (!data->computed) + { + /* Priorities are not computed, leave the task in the bag for now */ + starpu_task_list_push_back(&data->fifo.taskq,task); + data->fifo.ntasks++; + data->fifo.nprocessed++; + starpu_push_task_end(task); + STARPU_PTHREAD_MUTEX_UNLOCK(&data->policy_mutex); + return 0; + } + + /* Priorities are computed, we can push to execution */ + struct starpu_st_prio_deque *prio = select_prio(sched_ctx_id, data, task); + starpu_st_prio_deque_push_back_task(prio, task); + + starpu_push_task_end(task); + + /*if there are no tasks block */ + /* wake people waiting for a task */ + struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id); + + struct starpu_sched_ctx_iterator it; +#ifndef STARPU_NON_BLOCKING_DRIVERS + char dowake[STARPU_NMAXWORKERS] = { 0 }; +#endif + + workers->init_iterator_for_parallel_tasks(workers, &it, task); + while(workers->has_next(workers, &it)) + { + unsigned worker = workers->get_next(workers, &it); + +#ifdef STARPU_NON_BLOCKING_DRIVERS + if (!starpu_bitmap_get(&data->waiters, worker)) + /* This worker is not waiting for a task */ + continue; +#endif + if (prio == &data->prio_cpu && starpu_worker_get_type(worker) != STARPU_CPU_WORKER) + /* This worker doesn't pop from the queue we have filled */ + continue; + if (prio == &data->prio_gpu && starpu_worker_get_type(worker) == STARPU_CPU_WORKER) + /* This worker doesn't pop from the queue we have filled */ + continue; + + if (starpu_worker_can_execute_task_first_impl(worker, task, NULL)) + { + /* It can execute this one, tell him! */ +#ifdef STARPU_NON_BLOCKING_DRIVERS + starpu_bitmap_unset(&data->waiters, worker); + /* We really woke at least somebody, no need to wake somebody else */ + break; +#else + dowake[worker] = 1; +#endif + } + } + /* Let the task free */ + STARPU_PTHREAD_MUTEX_UNLOCK(&data->policy_mutex); + +#if !defined(STARPU_NON_BLOCKING_DRIVERS) || defined(STARPU_SIMGRID) + /* Now that we have a list of potential workers, try to wake one */ + + workers->init_iterator_for_parallel_tasks(workers, &it, task); + while(workers->has_next(workers, &it)) + { + unsigned worker = workers->get_next(workers, &it); + if (dowake[worker]) + { + if (starpu_wake_worker_relax_light(worker)) + break; // wake up a single worker + } + } +#endif + + return 0; +} + +static struct starpu_task *pop_task_graph_test_policy(unsigned sched_ctx_id) +{ + struct starpu_task *chosen_task = NULL; + unsigned workerid = starpu_worker_get_id_check(); + struct _starpu_graph_test_policy_data *data = (struct _starpu_graph_test_policy_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); + struct starpu_st_prio_deque *prio; + + if (starpu_worker_get_type(workerid) == STARPU_CPU_WORKER) + prio = &data->prio_cpu; + else + prio = &data->prio_gpu; + + /* block until some event happens */ + /* Here helgrind would shout that this is unprotected, this is just an + * integer access, and we hold the sched mutex, so we can not miss any + * wake up. */ + if (!STARPU_RUNNING_ON_VALGRIND && starpu_st_prio_deque_is_empty(prio)) + return NULL; + +#ifdef STARPU_NON_BLOCKING_DRIVERS + if (!STARPU_RUNNING_ON_VALGRIND && !data->computed) + /* Not computed yet */ + return NULL; + if (!STARPU_RUNNING_ON_VALGRIND && starpu_bitmap_get(&data->waiters, workerid)) + /* Nobody woke us, avoid bothering the mutex */ + return NULL; +#endif + + starpu_worker_relax_on(); + STARPU_PTHREAD_MUTEX_LOCK(&data->policy_mutex); + starpu_worker_relax_off(); + if (!data->computed) + { + STARPU_PTHREAD_MUTEX_UNLOCK(&data->policy_mutex); + return NULL; + } + + chosen_task = starpu_st_prio_deque_pop_task_for_worker(prio, workerid, NULL); + if (!chosen_task) + /* Tell pushers that we are waiting for tasks for us */ + starpu_bitmap_set(&data->waiters, workerid); + + STARPU_PTHREAD_MUTEX_UNLOCK(&data->policy_mutex); + + return chosen_task; +} + +struct starpu_sched_policy _starpu_sched_graph_test_policy = +{ + .init_sched = initialize_graph_test_policy, + .deinit_sched = deinitialize_graph_test_policy, + .do_schedule = do_schedule_graph_test_policy, + .push_task = push_task_graph_test_policy, + .pop_task = pop_task_graph_test_policy, + .policy_name = "graph_test", + .policy_description = "test policy for using graphs in scheduling decisions", + .worker_type = STARPU_WORKER_LIST, +}; diff --git a/src/sched_policies/helper_mct.c b/src/sched_policies/helper_mct.c new file mode 100644 index 0000000..b4db62e --- /dev/null +++ b/src/sched_policies/helper_mct.c @@ -0,0 +1,222 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Simon Archipoff + * Copyright (C) 2020-2020 Télécom Sud Paris + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "helper_mct.h" +#include + +/* Alpha, Beta and Gamma are MCT-specific values, which allows the + * user to set more precisely the weight of each computing value. + * Beta, for example, controls the weight of communications between + * memories for the computation of the best component to choose. + */ +#define _STARPU_SCHED_ALPHA_DEFAULT 1.0 +#define _STARPU_SCHED_BETA_DEFAULT 1.0 +#define _STARPU_SCHED_GAMMA_DEFAULT 1000.0 + +struct _starpu_mct_data *starpu_mct_init_parameters(struct starpu_sched_component_mct_data *params) +{ + struct _starpu_mct_data *data; + _STARPU_MALLOC(data, sizeof(*data)); + if (params) + { + data->alpha = params->alpha; + data->beta = params->beta; + /* data->_gamma: cost of one Joule in us. If gamma is set to 10^6, then one Joule cost 1s */ + data->_gamma = params->_gamma; + /* data->idle_power: Idle power of the whole machine in Watt */ + data->idle_power = params->idle_power; + } + else + { + data->alpha = starpu_getenv_float_default("STARPU_SCHED_ALPHA", _STARPU_SCHED_ALPHA_DEFAULT); + data->beta = starpu_getenv_float_default("STARPU_SCHED_BETA", _STARPU_SCHED_BETA_DEFAULT); +#ifdef STARPU_NON_BLOCKING_DRIVERS + if (starpu_getenv("STARPU_SCHED_GAMMA")) + _STARPU_DISP("Warning: STARPU_SCHED_GAMMA was used, but --enable-blocking-drivers configuration was not set, CPU cores will not actually be sleeping\n"); +#endif + data->_gamma = starpu_getenv_float_default("STARPU_SCHED_GAMMA", _STARPU_SCHED_GAMMA_DEFAULT); + data->idle_power = starpu_getenv_float_default("STARPU_IDLE_POWER", 0.0); + } + + return data; +} + +/* compute predicted_end by taking into account the case of the predicted transfer and the predicted_end overlap + */ +static double compute_expected_time(double now, double predicted_end, double predicted_length, double predicted_transfer) +{ + STARPU_ASSERT(!isnan(now + predicted_end + predicted_length + predicted_transfer)); + STARPU_ASSERT_MSG(now >= 0.0 && predicted_end >= 0.0 && predicted_length >= 0.0 && predicted_transfer >= 0.0, "now=%lf, predicted_end=%lf, predicted_length=%lf, predicted_transfer=%lf\n", now, predicted_end, predicted_length, predicted_transfer); + + /* TODO: actually schedule transfers */ + /* Compute the transfer time which will not be overlapped */ + /* However, no modification in calling function so that the whole transfer time is counted as a penalty */ + if (now + predicted_transfer < predicted_end) + { + /* We may hope that the transfer will be finished by + * the start of the task. */ + predicted_transfer = 0; + } + else + { + /* The transfer will not be finished by then, take the + * remainder into account */ + predicted_transfer -= (predicted_end - now); + } + + predicted_end += predicted_transfer; + predicted_end += predicted_length; + + return predicted_end; +} + +double starpu_mct_compute_fitness(struct _starpu_mct_data * d, double exp_end, double min_exp_end_of_task, double max_exp_end_of_workers, double transfer_len, double local_energy) +{ + if(isnan(local_energy)) + /* Energy not calibrated yet, but we cannot do this + * automatically anyway, so ignoring this for now */ + local_energy = 0.; + + /* Note: the expected end includes the data transfer duration, which we want to be able to tune separately */ + + /* min_exp_end_of_task is the minimum end time of the task over all workers */ + double fitness = d->alpha * (exp_end - min_exp_end_of_task) + d->beta * transfer_len + d->_gamma * local_energy; + + /* max_exp_end is the maximum end time of the workers. If the total execution time is increased, then an + additional energy penalty must be considered*/ + if(exp_end > max_exp_end_of_workers) + fitness += d->_gamma * d->idle_power * (exp_end - max_exp_end_of_workers) / 1000000.0; /* Since gamma is the cost in us of one Joules, + then d->idle_power * (exp_end - max_exp_end) + must be in Joules, thus the / 1000000.0 */ + + return fitness; +} + +unsigned starpu_mct_compute_execution_times(struct starpu_sched_component *component, struct starpu_task *task, + double *estimated_lengths, double *estimated_transfer_length, unsigned *suitable_components) +{ + unsigned nsuitable_components = 0; + + unsigned i; + for(i = 0; i < component->nchildren; i++) + { + struct starpu_sched_component * c = component->children[i]; + + /* Silence static analysis warnings */ + estimated_lengths[i] = NAN; + estimated_transfer_length[i] = NAN; + + if(starpu_sched_component_execute_preds(c, task, estimated_lengths + i)) + { + if(isnan(estimated_lengths[i])) + /* The perfmodel had been purged since the task was pushed + * onto the mct component. */ + continue; + STARPU_ASSERT_MSG(estimated_lengths[i]>=0, "component=%p, child[%u]=%p, estimated_lengths[%u]=%lf\n", component, i, c, i, estimated_lengths[i]); + + estimated_transfer_length[i] = starpu_sched_component_transfer_length(c, task); + suitable_components[nsuitable_components++] = i; + } + } + return nsuitable_components; +} + +void starpu_mct_compute_expected_times(struct starpu_sched_component *component, struct starpu_task *task STARPU_ATTRIBUTE_UNUSED, + double *estimated_lengths, double *estimated_transfer_length, double *estimated_ends_with_task, + double *min_exp_end_of_task, double *max_exp_end_of_workers, unsigned *suitable_components, unsigned nsuitable_components) +{ + unsigned i; + double now = starpu_timing_now(); + *min_exp_end_of_task = DBL_MAX; + *max_exp_end_of_workers = 0.0; + for(i = 0; i < nsuitable_components; i++) + { + unsigned icomponent = suitable_components[i]; + struct starpu_sched_component * c = component->children[icomponent]; + /* Estimated availability of worker */ + double estimated_end = c->estimated_end(c); + if (estimated_end < now) + estimated_end = now; + estimated_ends_with_task[icomponent] = compute_expected_time(now, + estimated_end, + estimated_lengths[icomponent], + estimated_transfer_length[icomponent]); + + /* estimated_ends_with_task[icomponent]: estimated end of execution on the worker icomponent + estimated_end: estimatated end of the worker + min_exp_end_of_task: minimum estimated execution time of the task over all workers + max_exp_end_of_workers: maximum estimated end of the already-scheduled tasks over all workers + */ + if(estimated_ends_with_task[icomponent] < *min_exp_end_of_task) + *min_exp_end_of_task = estimated_ends_with_task[icomponent]; + if(estimated_end > *max_exp_end_of_workers) + *max_exp_end_of_workers = estimated_end; + } +} + +/* This function retrieves the energy consumption of a task in Joules*/ +void starpu_mct_compute_energy(struct starpu_sched_component *component, struct starpu_task *task , double *local_energy, unsigned *suitable_components, unsigned nsuitable_components) +{ + unsigned i; + for(i = 0; i < nsuitable_components; i++) + { + unsigned icomponent = suitable_components[i]; + int nimpl = 0; + local_energy[icomponent] = starpu_task_worker_expected_energy(task, icomponent, component->tree->sched_ctx_id, nimpl); + for (nimpl = 1; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++) + { + double e; + e = starpu_task_worker_expected_energy(task, icomponent, component->tree->sched_ctx_id, nimpl); + if (e < local_energy[icomponent]) + local_energy[icomponent] = e; + } + } +} + +int starpu_mct_get_best_component(struct _starpu_mct_data *d, struct starpu_task *task, double *estimated_lengths, double *estimated_transfer_length, double *estimated_ends_with_task, double *local_energy, double min_exp_end_of_task, double max_exp_end_of_workers, unsigned *suitable_components, unsigned nsuitable_components) +{ + double best_fitness = DBL_MAX; + int best_icomponent = -1; + unsigned i; + + for(i = 0; i < nsuitable_components; i++) + { + int icomponent = suitable_components[i]; + double tmp = starpu_mct_compute_fitness(d, + estimated_ends_with_task[icomponent], + min_exp_end_of_task, + max_exp_end_of_workers, + estimated_transfer_length[icomponent], + local_energy[icomponent]); + + if(tmp < best_fitness) + { + best_fitness = tmp; + best_icomponent = icomponent; + } + } + + if (best_icomponent != -1) + { + task->predicted = estimated_lengths[best_icomponent]; + task->predicted_transfer = estimated_transfer_length[best_icomponent]; + } + + return best_icomponent; +} diff --git a/src/sched_policies/helper_mct.h b/src/sched_policies/helper_mct.h new file mode 100644 index 0000000..3fffb41 --- /dev/null +++ b/src/sched_policies/helper_mct.h @@ -0,0 +1,77 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2020-2020 Télécom Sud Paris + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#pragma GCC visibility push(hidden) + +/** @file */ + +struct _starpu_mct_data +{ + double alpha; + double beta; + double _gamma; + double idle_power; + starpu_pthread_mutex_t scheduling_mutex; +}; + +struct _starpu_mct_data *starpu_mct_init_parameters(struct starpu_sched_component_mct_data *params); + +unsigned starpu_mct_compute_execution_times(struct starpu_sched_component *component, + struct starpu_task *task, + double *estimated_lengths, + double *estimated_transfer_length, + unsigned *suitable_components); + + +void starpu_mct_compute_expected_times(struct starpu_sched_component *component, + struct starpu_task *task, + double *estimated_lengths, + double *estimated_transfer_length, + double *estimated_ends_with_task, + double *min_exp_end_of_task, + double *max_exp_end_of_workers, + unsigned *suitable_components, + unsigned nsuitable_components); + +double starpu_mct_compute_fitness(struct _starpu_mct_data * d, + double exp_end, + double min_exp_end, + double max_exp_end, + double transfer_len, + double local_energy); + +int starpu_mct_get_best_component(struct _starpu_mct_data *d, + struct starpu_task *task, + double *estimated_lengths, + double *estimated_transfer_length, + double *estimated_ends_with_task, + double *local_energy, + double min_exp_end_of_task, + double max_exp_end_of_workers, + unsigned *suitable_components, + unsigned nsuitable_components); + + +void starpu_mct_compute_energy(struct starpu_sched_component *component, + struct starpu_task *task , + double *local_energy, + unsigned *suitable_components, + unsigned nsuitable_components); + +int eager_calibration_push_task(struct starpu_sched_component * component, struct starpu_task * task); + +#pragma GCC visibility pop diff --git a/src/sched_policies/heteroprio.c b/src/sched_policies/heteroprio.c new file mode 100644 index 0000000..fb29791 --- /dev/null +++ b/src/sched_policies/heteroprio.c @@ -0,0 +1,3876 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2016-2016 Uppsala University + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* Distributed queues using performance modeling to assign tasks */ + +#include +#include +#include +#include +#include + +#include +#include "heteroprio.h" + +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include + +#ifndef DBL_MIN +#define DBL_MIN __DBL_MIN__ +#endif + +#ifndef DBL_MAX +#define DBL_MAX __DBL_MAX__ +#endif + +#define STARPU_NB_TYPES STARPU_NARCH + +#define STR_MAX_SIZE 64 + +#define STRINGIFY(x) _STR(x) +#define _STR(x) #x + +/** Push strategy for use_locality */ +enum laheteroprio_push_strategy +{ + PUSH_LS_SDH, + PUSH_LS_SDH2, + PUSH_LS_SDHB, + PUSH_LC_SMWB, + PUSH_NB_AUTO, // Always last to limit auto + PUSH_LcS, + PUSH_WORKER, + PUSH_AUTO +}; + +/** Queue used when use_locality is enabled */ +struct laqueue +{ + unsigned char* data; + long int capacity; + long int current_index; + long int size_of_element; +}; + +static struct laqueue laqueue_init(const long int size_of_element); +static void laqueue_destroy(struct laqueue* q); +//static long int laqueue_size(struct laqueue* q); +static void laqueue_push(struct laqueue* q, void* data); +static void* laqueue_pop(struct laqueue* q); +//static void* laqueue_top(struct laqueue* q); + +struct starpu_laheteroprio_access_item +{ + unsigned prio_idx; + unsigned wgroup_idx; +}; + +static struct laqueue laqueue_init(const long int size_of_element) +{ + struct laqueue q; + q.data = NULL; + q.capacity = 0; + q.current_index = 0; + q.size_of_element = size_of_element; + return q; +} + +static void laqueue_destroy(struct laqueue* q) +{ + STARPU_ASSERT(q->current_index == 0); + free(q->data); +} + +//static long int laqueue_size(struct laqueue* q) +//{ +// return q->capacity; +//} + +static void laqueue_push(struct laqueue* q, void* data) +{ + if(q->current_index == q->capacity) + { + q->capacity = (q->capacity+10)*2; + _STARPU_REALLOC(q->data, q->size_of_element*q->capacity); + } + memcpy(&q->data[(q->current_index++)*q->size_of_element], data, q->size_of_element); +} + +static void* laqueue_pop(struct laqueue* q) +{ + STARPU_ASSERT(q->current_index-1 >= 0); + unsigned char* data = &q->data[(q->current_index-1)*q->size_of_element]; + q->current_index -= 1; + return data; +} + +//static void* laqueue_top(struct laqueue* q) +//{ +// STARPU_ASSERT(q->current_index-1 >= 0); +// return &q->data[(q->current_index-1)*q->size_of_element]; +//} + +/** How are codelet grouped by priority */ +enum autoheteroprio_codelet_grouping_strategy +{ + BY_PERF_MODEL_OR_NAME = 0, /** Using perfmodel symbol or codelet's name if no perfmodel */ + BY_NAME_ONLY = 1 /** Based on the codelet's name only */ +}; + +/* A bucket corresponds to a Pair of priorities + * When a task is pushed with a priority X, it will be stored + * into the bucket X. + * All the tasks stored in the fifo should be computable by the arch + * in valid_archs. + * For example if valid_archs = (STARPU_CPU|STARPU_CUDA) + * Then task->task->where should be at least (STARPU_CPU|STARPU_CUDA) + */ +struct _heteroprio_bucket +{ + /* Tasks of the current bucket */ + /* In case data locality is NOT used, only the first element of the array is used */ + /* In case data locality IS used, the element refers to a worker group */ + struct starpu_task_list tasks_queue[LAHETEROPRIO_MAX_WORKER_GROUPS]; + /* The correct arch for the current bucket */ + unsigned valid_archs; + /* The slow factors for any archs */ + float slow_factors_per_index[STARPU_NB_TYPES]; + /* The base arch for the slow factor (the fatest arch for the current task in the bucket */ + unsigned factor_base_arch_index; + + /**** Fields used when use_locality == 1 : ****/ + + /* the number of tasks in all the queues (was previously tasks_queue.ntasks) */ + unsigned tasks_queue_ntasks; + /* to keep track of the mn at push time */ + struct laqueue auto_mn[LAHETEROPRIO_MAX_WORKER_GROUPS]; +}; + +static int use_la_mode = 0; +static int use_auto_mode = 0; + +/* Init a bucket */ +static void _heteroprio_bucket_init(struct _heteroprio_bucket* bucket) +{ + if(use_la_mode) + { + unsigned i; + memset(bucket, 0, sizeof(*bucket)); + for(i = 0 ; i < LAHETEROPRIO_MAX_WORKER_GROUPS ; ++i) + { + starpu_task_list_init(&bucket->tasks_queue[i]); + bucket->auto_mn[i] = laqueue_init(sizeof(unsigned)*PUSH_NB_AUTO); + } + } + else + { + memset(bucket, 0, sizeof(*bucket)); + starpu_task_list_init(&bucket->tasks_queue[0]); + } +} + +/* Release a bucket */ +static void _heteroprio_bucket_release(struct _heteroprio_bucket* bucket) +{ + if(use_la_mode) + { + unsigned i; + for(i = 0 ; i < LAHETEROPRIO_MAX_WORKER_GROUPS ; ++i) + { + STARPU_ASSERT(starpu_task_list_empty(&bucket->tasks_queue[i]) != 0); + laqueue_destroy(&bucket->auto_mn[i]); + } + } + else + { + STARPU_ASSERT(starpu_task_list_empty(&bucket->tasks_queue[0]) != 0); + // don't task_lists need to be destroyed ? + } +} + +// Must be manually add to get more stats +//#define LAHETEROPRIO_PRINT_STAT + +static enum laheteroprio_push_strategy getEnvAdvPush() +{ + const char *push = starpu_getenv("STARPU_LAHETEROPRIO_PUSH"); + if (push) + { + if(strcmp(push, "WORKER") == 0) + { +#ifdef LAHETEROPRIO_PRINT_STAT + _STARPU_MSG("[LAHETEROPRIO] Use PUSH_WORKER\n"); +#endif + return PUSH_WORKER; + } + if(strcmp(push, "LcS") == 0) + { +#ifdef LAHETEROPRIO_PRINT_STAT + _STARPU_MSG("[LAHETEROPRIO] Use PUSH_LcS\n"); +#endif + return PUSH_LcS; + } + if(strcmp(push, "LS_SDH") == 0) + { +#ifdef LAHETEROPRIO_PRINT_STAT + _STARPU_MSG("[LAHETEROPRIO] Use PUSH_LS_SDH\n"); +#endif + return PUSH_LS_SDH; + } + if(strcmp(push, "LS_SDH2") == 0) + { +#ifdef LAHETEROPRIO_PRINT_STAT + _STARPU_MSG("[LAHETEROPRIO] Use PUSH_LS_SDH2\n"); +#endif + return PUSH_LS_SDH2; + } + if(strcmp(push, "LS_SDHB") == 0) + { +#ifdef LAHETEROPRIO_PRINT_STAT + _STARPU_MSG("[LAHETEROPRIO] Use PUSH_LS_SDHB\n"); +#endif + return PUSH_LS_SDHB; + } + if(strcmp(push, "LC_SMWB") == 0) + { +#ifdef LAHETEROPRIO_PRINT_STAT + _STARPU_MSG("[LAHETEROPRIO] Use PUSH_LC_SMWB\n"); +#endif + return PUSH_LC_SMWB; + } + if(strcmp(push, "AUTO") == 0) + { +#ifdef LAHETEROPRIO_PRINT_STAT + _STARPU_MSG("[LAHETEROPRIO] Use PUSH_AUTO\n"); +#endif + return PUSH_AUTO; + } + _STARPU_MSG("Undefined push strategy %s\n", push); + } +#ifdef LAHETEROPRIO_PRINT_STAT + _STARPU_MSG("[LAHETEROPRIO] Use PUSH_AUTO\n"); +#endif + return PUSH_AUTO; +} + +/* A worker is mainly composed of a fifo for the tasks + * and some direct access to worker properties. + * The fifo is implemented with any array, + * to read a task, access tasks_queue[tasks_queue_index] + * to write a task, access tasks_queue[(tasks_queue_index+tasks_queue_size)%HETEROPRIO_MAX_PREFETCH] + */ +/* ANDRA_MODIF: can use starpu fifo + starpu sched_mutex*/ +struct _heteroprio_worker_wrapper +{ + unsigned arch_type; + unsigned arch_index; + + /** Only used when use_locality==0 : */ + struct starpu_st_prio_deque tasks_queue; +}; + +struct _starpu_heteroprio_data +{ + starpu_pthread_mutex_t policy_mutex; + struct starpu_bitmap waiters; + /* The bucket to store the tasks */ + struct _heteroprio_bucket buckets[HETEROPRIO_MAX_PRIO]; + /* Whether heteroprio should consider data locality or not */ + unsigned use_locality; + /* The number of buckets for each arch */ + unsigned nb_prio_per_arch_index[STARPU_NB_TYPES]; + /* The mapping to the corresponding buckets */ + unsigned prio_mapping_per_arch_index[STARPU_NB_TYPES][HETEROPRIO_MAX_PRIO]; + /* The number of available tasks for a given arch (not prefetched) */ + unsigned nb_remaining_tasks_per_arch_index[STARPU_NB_TYPES]; + /* The total number of tasks in the bucket (not prefetched) */ + unsigned total_tasks_in_buckets; + /* The number of workers for a given arch */ + unsigned nb_workers_per_arch_index[STARPU_NB_TYPES]; + + /* Information on all the workers */ + struct _heteroprio_worker_wrapper workers_heteroprio[STARPU_NMAXWORKERS]; + + /*** use_locality==0 specific : */ + + /* The total number of prefetched tasks for a given arch */ + unsigned nb_prefetched_tasks_per_arch_index[STARPU_NB_TYPES]; + + /*** use_locality==1 (laheteroprio) specific : */ + + /* Helps ensuring laheteroprio has been correctly initialized */ + unsigned map_wgroup_has_been_called; + /* Helps ensuring laheteroprio has been correctly initialized */ + unsigned warned_change_nb_memory_nodes; + /* Number of memory nodes */ + unsigned nb_memory_nodes; + /* The mapping to the corresponding prio prio_mapping_per_arch_index[x][prio_mapping_per_arch_index[x][y]] = y */ + unsigned bucket_mapping_per_arch_index[STARPU_NB_TYPES][HETEROPRIO_MAX_PRIO]; + /* The wgroup for all the workers */ + unsigned workers_laheteroprio_wgroup_index[STARPU_NMAXWORKERS]; + /* Number of wgroups */ + unsigned nb_wgroups; + /* The task queue for the tasks inserted by the master thread */ + unsigned master_tasks_queue_idx; + /* Arch related to each wgroup (for now only one kind of arch per wgroup */ + unsigned arch_of_wgroups[LAHETEROPRIO_MAX_WORKER_GROUPS]; + /* The pop offset per group */ + struct starpu_laheteroprio_access_item wgroup_pop_access_orders[LAHETEROPRIO_MAX_WORKER_GROUPS][LAHETEROPRIO_MAX_WORKER_GROUPS*HETEROPRIO_MAX_PRIO]; + /* Size of wgroup_pop_access_orders items */ + unsigned wgroup_pop_access_orders_size[LAHETEROPRIO_MAX_WORKER_GROUPS]; + /* The push strategy */ + enum laheteroprio_push_strategy pushStrategyToUse; + enum laheteroprio_push_strategy pushStrategySet; + int pushStrategyHistory[PUSH_NB_AUTO]; + starpu_pthread_mutex_t push_history_mutex; + + /*** auto-heteroprio specific : */ + + /** Strategy to determine on which base which can assign same priority to codelets */ + enum autoheteroprio_codelet_grouping_strategy codelet_grouping_strategy; + + unsigned use_auto_calibration; + + starpu_pthread_mutex_t auto_calibration_mutex; + + // parameters: + + unsigned autoheteroprio_priority_ordering_policy; + // reorder priority every priority_ordering_interval pushed tasks + int priority_ordering_interval; + // if set to 0: will gather data from execution (task time, NOD, etc.) + unsigned freeze_data_gathering; + + + unsigned autoheteroprio_print_prio_after_ordering; + unsigned autoheteroprio_print_data_on_update; + + + // 0 = if a task has no implementation on arch, expected time will be AUTOHETEROPRIO_LONG_TIME + // 1 = if a task has no implementation on arch, expected time will be the shortest time among all archs + unsigned autoheteroprio_time_estimation_policy; + + + // environment hyperparameters + + double NTnodPond; + double NTexpVal; + double BNexpVal; + double URTurt; + double URT2urt; + double URT2prop; + double and2pond; + double and3pond; + double and4pond; + double and5xoffset; + double and5yoffset; + double and9xoffset; + double and9yoffset; + double and10xoffset; + double and10yoffset; + double and11xoffset; + double and11yoffset; + double ANTnodPond; + double ANTexpVal; + + int priority_last_ordering; + + // lightweight time profiling: + + // busy time and free time of each arch for current execution + double current_arch_busy_time[STARPU_NB_TYPES]; + double current_arch_free_time[STARPU_NB_TYPES]; + + // last time a worker executed either pre_exec or post_exec hook + double last_hook_exec_time[STARPU_NMAXWORKERS]; + + // task data: + + unsigned found_codelet_names_length; + char found_codelet_names[HETEROPRIO_MAX_PRIO][CODELET_MAX_NAME_LENGTH]; + unsigned found_codelet_names_on_arch[STARPU_NB_TYPES]; + + // busy time and free time of each arch + double average_arch_busy_time[STARPU_NB_TYPES]; + double average_arch_free_time[STARPU_NB_TYPES]; + + // average prio NOD for each task + double prio_average_NOD[HETEROPRIO_MAX_PRIO]; + // NOD sample size + unsigned prio_average_NOD_count[HETEROPRIO_MAX_PRIO]; + + // average prio URT for each task + double prio_average_URT[STARPU_NB_TYPES][HETEROPRIO_MAX_PRIO]; + // URT sample size + unsigned prio_average_URT_count[HETEROPRIO_MAX_PRIO]; + + // average execution time for each arch + double prio_average_time_arch[STARPU_NB_TYPES][HETEROPRIO_MAX_PRIO]; + // sample size of execution times + unsigned prio_average_time_arch_count[STARPU_NB_TYPES][HETEROPRIO_MAX_PRIO]; + // true if we have at least one sample to compute the average execution time + unsigned prio_arch_has_time_info[STARPU_NB_TYPES][HETEROPRIO_MAX_PRIO]; + + // proportion of each task during execution (sum of each prio should equal 1) + double prio_overall_proportion[HETEROPRIO_MAX_PRIO]; + // sample size (number of added tasks of a type) + unsigned prio_overall_proportion_count[HETEROPRIO_MAX_PRIO]; + + // actual location of a task execution (~= probability of being executed on an arch) (sum of each arch for a prio should equal 1) + double prio_arch_proportion[STARPU_NB_TYPES][HETEROPRIO_MAX_PRIO]; + unsigned prio_arch_proportion_count[HETEROPRIO_MAX_PRIO]; + + // sum of each successor's best time (better arch) + double prio_average_successors_best_time_sum[HETEROPRIO_MAX_PRIO]; + // sample size + unsigned prio_average_successors_best_time_sum_count[HETEROPRIO_MAX_PRIO]; + + // best possible time of a prio (between archs) + double prio_average_best[HETEROPRIO_MAX_PRIO]; + unsigned prio_average_best_count[HETEROPRIO_MAX_PRIO]; +}; + +// declare prototypes +void starpu_heteroprio_map_wgroup_memory_nodes_hp(struct _starpu_heteroprio_data *hp); +static double get_best_autoheteroprio_estimated_time(struct _starpu_heteroprio_data *hp, unsigned priority); + +static int starpu_heteroprio_types_to_arch(enum starpu_worker_archtype arch) +{ + if (arch >= STARPU_NARCH) + return 0; + return STARPU_WORKER_TO_MASK(arch); +} + +static int arch_can_execute_prio(struct _starpu_heteroprio_data *hp, unsigned arch, unsigned prio) +{ + return (hp->buckets[prio].valid_archs&starpu_heteroprio_types_to_arch(arch))!=0; +} + +void starpu_heteroprio_set_use_locality(unsigned sched_ctx_id, unsigned use_locality) +{ + struct _starpu_heteroprio_data *hp = (struct _starpu_heteroprio_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); + + STARPU_ASSERT(use_locality == 0 || use_locality == 1); + + hp->use_locality = use_locality; +} + +/** Tell how many prio there are for a given arch */ +void starpu_heteroprio_set_nb_prios_hp(struct _starpu_heteroprio_data *hp, enum starpu_worker_archtype arch, unsigned max_prio) +{ + STARPU_ASSERT(max_prio <= HETEROPRIO_MAX_PRIO); + + hp->nb_prio_per_arch_index[arch] = max_prio; + + if(hp->use_locality) + { + starpu_heteroprio_map_wgroup_memory_nodes_hp(hp); + } +} + +/** Tell how many prio there are for a given arch */ +void starpu_heteroprio_set_nb_prios(unsigned sched_ctx_id, enum starpu_worker_archtype arch, unsigned max_prio) +{ + struct _starpu_heteroprio_data *hp = (struct _starpu_heteroprio_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); + + starpu_heteroprio_set_nb_prios_hp(hp, arch, max_prio); +} + +void starpu_heteroprio_set_mapping_hp_without_arch(struct _starpu_heteroprio_data *hp, enum starpu_worker_archtype arch, unsigned source_prio, unsigned dest_bucket_id) +{ + STARPU_ASSERT(dest_bucket_id < HETEROPRIO_MAX_PRIO); + + hp->prio_mapping_per_arch_index[arch][source_prio] = dest_bucket_id; + + if(hp->use_locality == 1) + { + hp->bucket_mapping_per_arch_index[arch][dest_bucket_id] = source_prio; + } +} + +void starpu_heteroprio_set_mapping_without_arch(unsigned sched_ctx_id, enum starpu_worker_archtype arch, unsigned source_prio, unsigned dest_bucket_id) +{ + struct _starpu_heteroprio_data *hp = (struct _starpu_heteroprio_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); + + starpu_heteroprio_set_mapping_hp_without_arch(hp, arch, source_prio, dest_bucket_id); +} + +/** Set the mapping for a given arch prio=>bucket */ +void starpu_heteroprio_set_mapping_hp(struct _starpu_heteroprio_data *hp, enum starpu_worker_archtype arch, unsigned source_prio, unsigned dest_bucket_id) +{ + starpu_heteroprio_set_mapping_hp_without_arch(hp, arch, source_prio, dest_bucket_id); + + hp->buckets[dest_bucket_id].valid_archs |= starpu_heteroprio_types_to_arch(arch); + _STARPU_DEBUG("Adding arch %d to bucket %u\n", arch, dest_bucket_id); +} + +inline void starpu_heteroprio_set_mapping(unsigned sched_ctx_id, enum starpu_worker_archtype arch, unsigned source_prio, unsigned dest_bucket_id) +{ + struct _starpu_heteroprio_data *hp = (struct _starpu_heteroprio_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); + + starpu_heteroprio_set_mapping_hp(hp, arch, source_prio, dest_bucket_id); +} + +void starpu_heteroprio_clear_mapping_hp(struct _starpu_heteroprio_data *hp) +{ + // direct mapping for all archs (and overwrite any changes to bucket archs) + unsigned arch; + for(arch=0;archbuckets[prio].valid_archs = 0; + } + } +} + +void starpu_heteroprio_set_faster_arch_hp(struct _starpu_heteroprio_data *hp, enum starpu_worker_archtype arch, unsigned bucket_id) +{ + STARPU_ASSERT(bucket_id < HETEROPRIO_MAX_PRIO); + + hp->buckets[bucket_id].factor_base_arch_index = arch; + + hp->buckets[bucket_id].slow_factors_per_index[arch] = 0; +} + +/** Tell which arch is the faster for the tasks of a bucket (optional) */ +inline void starpu_heteroprio_set_faster_arch(unsigned sched_ctx_id, enum starpu_worker_archtype arch, unsigned bucket_id) +{ + struct _starpu_heteroprio_data *hp = (struct _starpu_heteroprio_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); + + starpu_heteroprio_set_faster_arch_hp(hp, arch, bucket_id); +} + +void starpu_heteroprio_set_arch_slow_factor_hp(struct _starpu_heteroprio_data *hp, enum starpu_worker_archtype arch, unsigned bucket_id, float slow_factor) +{ + STARPU_ASSERT(bucket_id < HETEROPRIO_MAX_PRIO); + + hp->buckets[bucket_id].slow_factors_per_index[arch] = slow_factor; +} + +/** Tell how slow is a arch for the tasks of a bucket (optional) */ +inline void starpu_heteroprio_set_arch_slow_factor(unsigned sched_ctx_id, enum starpu_worker_archtype arch, unsigned bucket_id, float slow_factor) +{ + struct _starpu_heteroprio_data *hp = (struct _starpu_heteroprio_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); + + starpu_heteroprio_set_arch_slow_factor_hp(hp, arch, bucket_id, slow_factor); +} + +void starpu_heteroprio_set_pop_access_order_hp(struct _starpu_heteroprio_data *hp, unsigned wgroup_id, const struct starpu_laheteroprio_access_item access_items[], const unsigned size) +{ + STARPU_ASSERT(size <= LAHETEROPRIO_MAX_WORKER_GROUPS * HETEROPRIO_MAX_PRIO); + const unsigned arch_of_wgroup = hp->arch_of_wgroups[wgroup_id]; + const unsigned nb_prios = hp->nb_prio_per_arch_index[arch_of_wgroup]; + const unsigned nb_wgroups = hp->nb_wgroups; + STARPU_ASSERT(size <= nb_wgroups *nb_prios); + memcpy(hp->wgroup_pop_access_orders[wgroup_id], access_items, sizeof(struct starpu_laheteroprio_access_item) *size); + hp->wgroup_pop_access_orders_size[wgroup_id] = size; +} + +void starpu_heteroprio_set_pop_access_order(unsigned sched_ctx_id, unsigned wgroup_id, const struct starpu_laheteroprio_access_item access_items[], const unsigned size) +{ + STARPU_ASSERT(size <= LAHETEROPRIO_MAX_WORKER_GROUPS * HETEROPRIO_MAX_PRIO); + struct _starpu_heteroprio_data *hp = (struct _starpu_heteroprio_data *) starpu_sched_ctx_get_policy_data(sched_ctx_id); + starpu_heteroprio_set_pop_access_order_hp(hp, wgroup_id, access_items, size); +} + +struct dist +{ + double dist; + unsigned wgroup_idx; +}; + +static int comp_dist(const void *elem1, const void *elem2) +{ + const struct dist *d1 = ((struct dist *) elem1); + const struct dist *d2 = ((struct dist *) elem2); + if (d1->dist > d2->dist) return 1; + if (d1->dist < d2->dist) return -1; + return 0; +} + +void starpu_heteroprio_map_wgroup_memory_nodes_hp(struct _starpu_heteroprio_data *hp) +{ + STARPU_ASSERT_MSG(hp->use_locality == 1, "starpu_heteroprio_map_wgroup_memory_nodes has been called without enabling LA mode\n"); + hp->map_wgroup_has_been_called = 1; // Set flag to 1 + + // Set the number of memory nodes + hp->nb_memory_nodes = starpu_memory_nodes_get_count(); + const unsigned current_nb_memory_nodes = hp->nb_memory_nodes; + + hp->warned_change_nb_memory_nodes = 0; + + hp->nb_wgroups = current_nb_memory_nodes; + // Set memory nodes' type + { + unsigned idx_memnode; + for (idx_memnode = 0; idx_memnode < current_nb_memory_nodes; ++idx_memnode) + { + const enum starpu_node_kind memnode_kind = starpu_node_get_kind(idx_memnode); + hp->arch_of_wgroups[idx_memnode] = starpu_memory_node_get_worker_archtype(memnode_kind); + } + } + // Set workers' type + { + unsigned idx_worker; + for (idx_worker = 0; idx_worker < starpu_worker_get_count(); ++idx_worker) + { + hp->workers_laheteroprio_wgroup_index[idx_worker] = starpu_worker_get_memory_node(idx_worker); + } + } + if (starpu_cpu_worker_get_count() != 0) + { + unsigned cpu_0 = starpu_worker_get_by_type(STARPU_CPU_WORKER, 0); + hp->master_tasks_queue_idx = starpu_worker_get_memory_node(cpu_0); + } + else + { + // Consider memory node 0 as the CPU + STARPU_ASSERT(starpu_node_get_kind(0) == STARPU_CPU_RAM); + hp->master_tasks_queue_idx = 0; + } + // Build memory distance matrix + double dist_mem_matrix[LAHETEROPRIO_MAX_WORKER_GROUPS][LAHETEROPRIO_MAX_WORKER_GROUPS] = {{ 0 }}; + { + unsigned idx_mem_node1; + unsigned idx_mem_node2; + double max_dist_mem = 0; + for (idx_mem_node1 = 0; idx_mem_node1 < current_nb_memory_nodes; ++idx_mem_node1) + { + for (idx_mem_node2 = 0; idx_mem_node2 < current_nb_memory_nodes; ++idx_mem_node2) + { + if (idx_mem_node1 == idx_mem_node2) + { + dist_mem_matrix[idx_mem_node1][idx_mem_node2] = 0; + } + else + { + dist_mem_matrix[idx_mem_node1][idx_mem_node2] = starpu_transfer_predict(idx_mem_node2, idx_mem_node1, 1024 * 1024 *1024); + max_dist_mem = STARPU_MAX(max_dist_mem, dist_mem_matrix[idx_mem_node1][idx_mem_node2]); + } + } + } + for (idx_mem_node1 = 0; idx_mem_node1 < current_nb_memory_nodes; ++idx_mem_node1) + { + for (idx_mem_node2 = 0; idx_mem_node2 < current_nb_memory_nodes; ++idx_mem_node2) + { + dist_mem_matrix[idx_mem_node1][idx_mem_node2] /= max_dist_mem; + } + } + } + // Build priority distance matrix + double dist_prio_matrix[LAHETEROPRIO_MAX_WORKER_GROUPS][LAHETEROPRIO_MAX_WORKER_GROUPS] = {{ 0 }}; + { + unsigned idx_prio_node1; + unsigned idx_prio_node2; + for (idx_prio_node1 = 0; idx_prio_node1 < current_nb_memory_nodes; ++idx_prio_node1) + { + for (idx_prio_node2 = 0; idx_prio_node2 < current_nb_memory_nodes; ++idx_prio_node2) + { + if (idx_prio_node1 == idx_prio_node2) + { + dist_prio_matrix[idx_prio_node1][idx_prio_node2] = 0; + } + else + { + const unsigned arch_wgroup1 = hp->arch_of_wgroups[idx_prio_node1]; + const unsigned arch_wgroup2 = hp->arch_of_wgroups[idx_prio_node2]; + double diff = 0; + int cpt1 = 0; + int cpt2 = 0; + unsigned idx; + for(idx = 0; idx < HETEROPRIO_MAX_PRIO; ++idx) + { + diff += fabs((double)(hp->bucket_mapping_per_arch_index[arch_wgroup1][idx] + 1) - (double)(hp->bucket_mapping_per_arch_index[arch_wgroup2][idx] + 1)); + if (hp->bucket_mapping_per_arch_index[arch_wgroup1][idx] != (unsigned)-1) cpt1 += 1; + if (hp->bucket_mapping_per_arch_index[arch_wgroup2][idx] != (unsigned)-1) cpt2 += 1; + } + const int maxcpt = STARPU_MAX(cpt1, cpt2); + diff /= (maxcpt + 1) *(maxcpt + 2) / 2.; + dist_prio_matrix[idx_prio_node1][idx_prio_node2] = diff; + } + } + } + } + // Build final distance matrix + double dist_matrix[LAHETEROPRIO_MAX_WORKER_GROUPS][LAHETEROPRIO_MAX_WORKER_GROUPS] = {{ 0 }}; + { + const double alpha = 0.5; + unsigned idx_node1; + unsigned idx_node2; + for (idx_node1 = 0; idx_node1 < current_nb_memory_nodes; ++idx_node1) + { + for (idx_node2 = 0; idx_node2 < current_nb_memory_nodes; ++idx_node2) + { + dist_matrix[idx_node1][idx_node2] = (1 - dist_prio_matrix[idx_node1][idx_node2]) *alpha + dist_mem_matrix[idx_node1][idx_node2] *(1 - alpha); + } + } + } + unsigned nb_closed_nodes[STARPU_NB_TYPES]; + { + char var_name[STR_MAX_SIZE]; + + // Retrieving environment variable STARPU_LAHETEROPRIO_S_* for each architecture + unsigned arch; + for(arch = 0; arch < STARPU_NB_TYPES; ++arch) + { + const char *arch_env_name = starpu_worker_get_type_as_env_var(arch); + if(arch_env_name) + { + snprintf(var_name, STR_MAX_SIZE, "STARPU_LAHETEROPRIO_S_%s", + arch_env_name); + unsigned default_value = arch == STARPU_CPU_WORKER ? current_nb_memory_nodes - 1 : 1; + + nb_closed_nodes[arch] = starpu_getenv_number_default(var_name, default_value); + } + } + } + unsigned nb_prio_step[STARPU_NB_TYPES]; + { + char var_name[STR_MAX_SIZE]; + + // Retrieving environment variable STARPU_LAHETEROPRIO_PRIO_STEP_* for each architecture + unsigned arch; + for(arch = 0; arch < STARPU_NB_TYPES; ++arch) + { + const char *arch_env_name = starpu_worker_get_type_as_env_var(arch); + if(arch_env_name) + { + snprintf(var_name, STR_MAX_SIZE, "STARPU_LAHETEROPRIO_PRIO_STEP_%s", + arch_env_name); + unsigned default_value = arch != STARPU_CPU_WORKER ? hp->nb_prio_per_arch_index[arch] : 1; + + nb_prio_step[arch] = starpu_getenv_number_default(var_name, default_value); + } + } + } +#ifdef LAHETEROPRIO_PRINT_STAT + _STARPU_MSG("[LAHETEROPRIO] nb_closed_nodes[STARPU_CPU_WORKER] %u\n", nb_closed_nodes[STARPU_CPU_WORKER]); + _STARPU_MSG("[LAHETEROPRIO] nb_closed_nodes[STARPU_CUDA_WORKER] %u\n", nb_closed_nodes[STARPU_CUDA_WORKER]); + _STARPU_MSG("[LAHETEROPRIO] nb_prio_step[STARPU_CPU_WORKER] %u\n", nb_prio_step[STARPU_CPU_WORKER]); + _STARPU_MSG("[LAHETEROPRIO] nb_prio_step[STARPU_CUDA_WORKER] %u\n", nb_prio_step[STARPU_CUDA_WORKER]); +#endif + STARPU_ASSERT(hp->nb_wgroups == current_nb_memory_nodes); + unsigned wgroup_idx; + for (wgroup_idx = 0; wgroup_idx < current_nb_memory_nodes; ++wgroup_idx) + { + const unsigned wgroup_arch = hp->arch_of_wgroups[wgroup_idx]; + struct dist others[LAHETEROPRIO_MAX_WORKER_GROUPS]; + unsigned access_wgroup_idx; + for (access_wgroup_idx = 0; access_wgroup_idx < current_nb_memory_nodes; ++access_wgroup_idx) + { + others[access_wgroup_idx].wgroup_idx = access_wgroup_idx; + others[access_wgroup_idx].dist = dist_matrix[wgroup_idx][access_wgroup_idx]; + } + { + struct dist tmp = others[wgroup_idx]; + others[wgroup_idx] = others[0]; + others[0] = tmp; + } + qsort(others + 1, current_nb_memory_nodes - 1, sizeof(struct dist), comp_dist); + struct starpu_laheteroprio_access_item buffer_access_items[LAHETEROPRIO_MAX_WORKER_GROUPS *HETEROPRIO_MAX_PRIO]; + const unsigned nb_prio_in_wgroup = hp->nb_prio_per_arch_index[hp->arch_of_wgroups[wgroup_idx]]; + unsigned access_idx = 0; + unsigned prio_block_idx; + for (prio_block_idx = 0; prio_block_idx < nb_prio_in_wgroup; prio_block_idx += nb_prio_step[wgroup_arch]) + { + { + access_wgroup_idx = 0; + unsigned prio_idx; + for (prio_idx = prio_block_idx; prio_idx < STARPU_MIN(prio_block_idx + nb_prio_step[wgroup_arch], nb_prio_in_wgroup); ++prio_idx) + { + buffer_access_items[access_idx].prio_idx = prio_idx; + buffer_access_items[access_idx].wgroup_idx = others[access_wgroup_idx].wgroup_idx; + access_idx += 1; + } + } + unsigned prio_idx; + for (prio_idx = prio_block_idx; prio_idx < STARPU_MIN(prio_block_idx + nb_prio_step[wgroup_arch], nb_prio_in_wgroup); ++prio_idx) + { + for (access_wgroup_idx = 1; access_wgroup_idx < STARPU_MIN(nb_closed_nodes[wgroup_arch] + 1, current_nb_memory_nodes); ++access_wgroup_idx) + { + buffer_access_items[access_idx].prio_idx = prio_idx; + buffer_access_items[access_idx].wgroup_idx = others[access_wgroup_idx].wgroup_idx; + access_idx += 1; + } + } + } + unsigned prio_idx; + for (prio_idx = 0; prio_idx < nb_prio_in_wgroup; ++prio_idx) + { + for (access_wgroup_idx = nb_closed_nodes[wgroup_arch] + 1; access_wgroup_idx < current_nb_memory_nodes; ++access_wgroup_idx) + { + buffer_access_items[access_idx].prio_idx = prio_idx; + buffer_access_items[access_idx].wgroup_idx = others[access_wgroup_idx].wgroup_idx; + access_idx += 1; + } + } + starpu_heteroprio_set_pop_access_order_hp(hp, wgroup_idx, buffer_access_items, access_idx); + } +} + +void starpu_heteroprio_map_wgroup_memory_nodes(unsigned sched_ctx_id) +{ + struct _starpu_heteroprio_data *hp = (struct _starpu_heteroprio_data *) starpu_sched_ctx_get_policy_data(sched_ctx_id); + + starpu_heteroprio_map_wgroup_memory_nodes_hp(hp); +} + +void starpu_heteroprio_print_wgroups(FILE *stream, unsigned sched_ctx_id) +{ + struct _starpu_heteroprio_data *hp = (struct _starpu_heteroprio_data *) starpu_sched_ctx_get_policy_data(sched_ctx_id); + STARPU_ASSERT_MSG(hp->use_locality == 1, "starpu_heteroprio_print_wgroups has been called without enabling LA mode\n"); + + fprintf(stream, "[STARPU-LAHETEROPRIO] There are %u groups\n", hp->nb_wgroups); + char dest_name[512]; + unsigned worker_id; + for (worker_id = 0; worker_id < starpu_worker_get_count(); ++worker_id) + { + starpu_worker_get_name(worker_id, dest_name, 512); + fprintf(stream, "[STARPU-LAHETEROPRIO] Worker %u => group %u (%s)\n", worker_id, hp->workers_laheteroprio_wgroup_index[worker_id], dest_name); + } + fprintf(stream, "\n"); + unsigned idx_wgroup; + for (idx_wgroup = 0; idx_wgroup < hp->nb_wgroups; ++idx_wgroup) + { + int access_order[LAHETEROPRIO_MAX_WORKER_GROUPS][HETEROPRIO_MAX_PRIO] = {{ 0 }}; + memset(access_order, -1, sizeof(access_order[0][0]) *LAHETEROPRIO_MAX_WORKER_GROUPS * HETEROPRIO_MAX_PRIO); + const unsigned wgroup_arch = hp->arch_of_wgroups[idx_wgroup]; + const unsigned nb_prios = hp->nb_prio_per_arch_index[wgroup_arch]; + const unsigned nb_wgroups = hp->nb_wgroups; + const struct starpu_laheteroprio_access_item *wgroup_access_order = hp->wgroup_pop_access_orders[idx_wgroup]; + const unsigned wgroup_access_order_size = hp->wgroup_pop_access_orders_size[idx_wgroup]; + unsigned idx_access_item; + for (idx_access_item = 0; idx_access_item < wgroup_access_order_size; ++idx_access_item) + { + const unsigned current_wgroupid = wgroup_access_order[idx_access_item].wgroup_idx; + const unsigned current_prio = wgroup_access_order[idx_access_item].prio_idx; + access_order[current_wgroupid][current_prio] = idx_access_item; + } + fprintf(stream, "[STARPU-LAHETEROPRIO] Access order for wgroup %u (of arch type %u):\n", idx_wgroup, wgroup_arch); + unsigned idx_prio; + for (idx_prio = nb_prios; idx_prio > 0; --idx_prio) + { + const unsigned current_bucket = hp->prio_mapping_per_arch_index[wgroup_arch][idx_prio - 1]; + fprintf(stream, "[STARPU-LAHETEROPRIO] Prio %3u (Bucket %3u) => ", idx_prio - 1, current_bucket); + unsigned idx_wgroup_prio; + for (idx_wgroup_prio = 0; idx_wgroup_prio < nb_wgroups; ++idx_wgroup_prio) + { + if (access_order[idx_wgroup][idx_prio - 1] == -1) + { + fprintf(stream, "[XX] "); + } + else + { + fprintf(stream, "[%2d] ", access_order[idx_wgroup][idx_prio - 1]); + } + } + fprintf(stream, "\n"); + } + fprintf(stream, "\n"); + } +} + +/** If the user does not provide an init callback we create a single bucket for all architectures */ +static inline void default_init_sched(unsigned sched_ctx_id) +{ + int min_prio = starpu_sched_ctx_get_min_priority(sched_ctx_id); + int max_prio = starpu_sched_ctx_get_max_priority(sched_ctx_id); + STARPU_ASSERT(min_prio >= 0); + STARPU_ASSERT(max_prio >= 0); + + enum starpu_worker_archtype type; + + // By default each type of devices uses 1 bucket and no slow factor + for (type = 0; type < STARPU_NARCH; type++) + if (starpu_worker_get_count_by_type(type) > 0) + starpu_heteroprio_set_nb_prios(sched_ctx_id, type, max_prio-min_prio+1); + + // Direct mapping + int prio; + for(prio=min_prio ; prio<=max_prio ; prio++) + { + // By default each type of devices uses 1 bucket and no slow factor + for (type = 0; type < STARPU_NARCH; type++) + if (starpu_worker_get_count_by_type(type) > 0) + starpu_heteroprio_set_mapping(sched_ctx_id, type, prio, prio); + } +} + +/** stats of heteroprio when use_locality==1 */ +#ifdef LAHETEROPRIO_PRINT_STAT +struct laheteropriostats +{ + long int nb_tasks; + long int nb_tasks_per_worker[128][HETEROPRIO_MAX_PRIO]; + long int nb_tasks_per_wgroup[LAHETEROPRIO_MAX_WORKER_GROUPS][HETEROPRIO_MAX_PRIO]; + long int task_skipt_due_to_factor_per_worker[128][HETEROPRIO_MAX_PRIO]; + long int task_list_empty_per_worker[128][HETEROPRIO_MAX_PRIO]; + long int task_stolen_per_worker[128][HETEROPRIO_MAX_PRIO]; + long int task_stolen_in_wgroup[LAHETEROPRIO_MAX_WORKER_GROUPS][HETEROPRIO_MAX_PRIO]; + long int push_redirect[128+1][LAHETEROPRIO_MAX_WORKER_GROUPS]; + long int pop_redirect[128][LAHETEROPRIO_MAX_WORKER_GROUPS]; + long int push_to_use[128][PUSH_NB_AUTO]; +}; +struct laheteropriostats lastats; +#endif + +static void check_heteroprio_mapping(struct _starpu_heteroprio_data *hp) +{ + //return 0; + + unsigned idx_prio; + + /* Ensure that information have been correctly filled */ + unsigned check_all_archs[HETEROPRIO_MAX_PRIO]; + memset(check_all_archs, 0, sizeof(unsigned)*HETEROPRIO_MAX_PRIO); + unsigned arch_index; + for(arch_index = 0; arch_index < STARPU_NB_TYPES; ++arch_index) + { + STARPU_ASSERT(hp->nb_prio_per_arch_index[arch_index] <= HETEROPRIO_MAX_PRIO); + + unsigned check_archs[HETEROPRIO_MAX_PRIO]; + memset(check_archs, 0, sizeof(unsigned)*HETEROPRIO_MAX_PRIO); + + for(idx_prio = 0; idx_prio < hp->nb_prio_per_arch_index[arch_index]; ++idx_prio) + { + const unsigned mapped_prio = hp->prio_mapping_per_arch_index[arch_index][idx_prio]; + STARPU_ASSERT(mapped_prio <= HETEROPRIO_MAX_PRIO); + STARPU_ASSERT(hp->buckets[mapped_prio].slow_factors_per_index[arch_index] >= 0.0); + + STARPU_ASSERT(hp->buckets[mapped_prio].valid_archs & starpu_heteroprio_types_to_arch(arch_index)); + + check_archs[mapped_prio] = 1; + check_all_archs[mapped_prio] += 1; + } + for(idx_prio = 0; idx_prio < HETEROPRIO_MAX_PRIO; ++idx_prio) + { + /* Ensure the current arch use a bucket or someone else can use it */ + STARPU_ASSERT(check_archs[idx_prio] == 1 || hp->buckets[idx_prio].valid_archs == 0 + || (hp->buckets[idx_prio].valid_archs & ~starpu_heteroprio_types_to_arch(arch_index)) != 0); + } + } + /* Ensure that if a valid_archs = (STARPU_CPU|STARPU_CUDA) then check_all_archs[] = 2 for example */ + + for(idx_prio = 0; idx_prio < HETEROPRIO_MAX_PRIO; ++idx_prio) + { + unsigned nb_arch_on_bucket = 0; + for(arch_index = 0; arch_index < STARPU_NB_TYPES; ++arch_index) + { + if(hp->buckets[idx_prio].valid_archs & starpu_heteroprio_types_to_arch(arch_index)) + { + nb_arch_on_bucket += 1; + } + } + STARPU_ASSERT_MSG(check_all_archs[idx_prio] == nb_arch_on_bucket, "check_all_archs[idx_prio(%u)] = %u != nb_arch_on_bucket = %u\n", idx_prio, check_all_archs[idx_prio], nb_arch_on_bucket); + } +} + +static void starpu_autoheteroprio_add_task(struct _starpu_heteroprio_data *hp, const char name[CODELET_MAX_NAME_LENGTH], unsigned archs[STARPU_NB_TYPES]) +{ + unsigned arch; + for(arch=0;archfound_codelet_names_on_arch[arch], hp->found_codelet_names_length); + ++hp->found_codelet_names_on_arch[arch]; + starpu_heteroprio_set_nb_prios_hp(hp, arch, hp->found_codelet_names_on_arch[arch]); + } + } + + // TODO: remap laheteroprio policy + strncpy(&hp->found_codelet_names[hp->found_codelet_names_length][0], name, CODELET_MAX_NAME_LENGTH); + ++hp->found_codelet_names_length; + + check_heteroprio_mapping(hp); // ensures that priorities are correctly mapped +} + +#define _HETEROPRIO_DIR_MAXLEN 256 +static char _heteroprio_data_dir[_HETEROPRIO_DIR_MAXLEN]; + +/* Try to get the name of the program, to get specific data file for each program */ +#ifdef STARPU_HAVE_PROGRAM_INVOCATION_SHORT_NAME +#define _progname program_invocation_short_name +#else +#define _progname "UNKNOWN_PROGRAM" +#endif + +static char *_starpu_heteroprio_get_data_dir() +{ + static int directory_existence_was_tested = 0; + + if(!directory_existence_was_tested) + { + char *path = starpu_getenv("STARPU_HETEROPRIO_DATA_DIR"); + if(path) + { + snprintf(_heteroprio_data_dir, _HETEROPRIO_DIR_MAXLEN, "%s/", path); + } + else + { + snprintf(_heteroprio_data_dir, _HETEROPRIO_DIR_MAXLEN, "%s/heteroprio/", _starpu_get_perf_model_dir_default()); + } + + _starpu_mkpath_and_check(_heteroprio_data_dir, S_IRWXU); + + directory_existence_was_tested = 1; + } + + return _heteroprio_data_dir; +} + +static void starpu_autoheteroprio_fetch_task_data(struct _starpu_heteroprio_data *hp) +{ + const char *custom_path = starpu_getenv("STARPU_HETEROPRIO_DATA_FILE"); + +#ifndef STARPU_HAVE_PROGRAM_INVOCATION_SHORT_NAME + if(!custom_path) + { + _STARPU_MSG("[HETEROPRIO][INITIALIZATION] Warning, autoheteroprio can't determine the program's name to automatically store performance data. " + "You can specify a path to store program associated data with STARPU_HETEROPRIO_DATA_FILE\n"); + } +#endif + + char path[_HETEROPRIO_DIR_MAXLEN+6]; + if(!custom_path) + { + snprintf(path, _HETEROPRIO_DIR_MAXLEN+6, "%s/%s.data", _starpu_heteroprio_get_data_dir(), + _progname); + } + + FILE *autoheteroprio_file; + int locked; + + autoheteroprio_file = fopen(custom_path ? custom_path : path, "r"); + if(autoheteroprio_file == NULL) + { + // unable to open heteroprio data file + return; + } + locked = _starpu_frdlock(autoheteroprio_file) == 0; + + _starpu_drop_comments(autoheteroprio_file); + + unsigned number_of_archs; + unsigned archs[STARPU_NB_TYPES]; + unsigned arch_ind, arch_type; + int c; + + if(fscanf(autoheteroprio_file, "%u", &number_of_archs) != 1) + { + fclose(autoheteroprio_file); + _STARPU_MSG("[HETEROPRIO][INITIALIZATION] Warning, autoheteroprio's data file is missing a number of architectures\n"); + return; + } + + // Count number of archs not available in this version + const unsigned ignored_archs = STARPU_MAX(0, (int) (number_of_archs - STARPU_NB_TYPES)); + + const unsigned supported_archs = STARPU_MIN(STARPU_NB_TYPES, number_of_archs); + + // Reading list of supported architectures + for(arch_ind = 0; arch_ind < supported_archs; ++arch_ind) + { + if(fscanf(autoheteroprio_file, "%u", &arch_type) != 1) + { + fclose(autoheteroprio_file); + _STARPU_MSG("[HETEROPRIO][INITIALIZATION] Warning, autoheteroprio's data file is missing an architecture id\n"); + return; + } + archs[arch_ind] = arch_type; + } + for(arch_ind = 0; arch_ind < ignored_archs; ++arch_ind) + { + if(fscanf(autoheteroprio_file, "%u", &arch_type) != 1) + { + fclose(autoheteroprio_file); + _STARPU_MSG("[HETEROPRIO][INITIALIZATION] Warning, autoheteroprio's data file is missing an architecture id\n"); + return; + } + } + if(getc(autoheteroprio_file) != '\n') + { + fclose(autoheteroprio_file); + _STARPU_MSG("[HETEROPRIO][INITIALIZATION] Warning, autoheteroprio's data file is improperly formatted\n"); + return; + } + + _starpu_drop_comments(autoheteroprio_file); + + // Reading architectures average times + double avg_arch_busy_time, avg_arch_free_time; + for(arch_ind = 0; arch_ind < supported_archs; ++arch_ind) + { + if(fscanf(autoheteroprio_file, "%lf %lf", &avg_arch_busy_time, &avg_arch_free_time) != 2) + { + fclose(autoheteroprio_file); + _STARPU_MSG("[HETEROPRIO][INITIALIZATION] Warning, autoheteroprio's data file is missing an architecture average times id\n"); + return; + } + else if(arch_ind < STARPU_NB_TYPES && archs[arch_ind] < STARPU_NB_TYPES) + { + hp->average_arch_busy_time[archs[arch_ind]] = avg_arch_busy_time; + hp->average_arch_free_time[archs[arch_ind]] = avg_arch_free_time; + } + } + for(arch_ind = 0; arch_ind < ignored_archs; ++arch_ind) + { + if(fscanf(autoheteroprio_file, "%lf %lf", &avg_arch_busy_time, &avg_arch_free_time) != 2) + { + fclose(autoheteroprio_file); + _STARPU_MSG("[HETEROPRIO][INITIALIZATION] Warning, autoheteroprio's data file is missing an architecture average times id\n"); + return; + } + } + if(getc(autoheteroprio_file) != '\n') + { + fclose(autoheteroprio_file); + _STARPU_MSG("[HETEROPRIO][INITIALIZATION] Warning, autoheteroprio's data file is improperly formatted\n"); + return; + } + + _starpu_drop_comments(autoheteroprio_file); + + unsigned codelet_archs[STARPU_NB_TYPES]; + unsigned codelet_exec_archs[STARPU_NB_TYPES]; + unsigned prio = hp->found_codelet_names_length; + char codelet_name[CODELET_MAX_NAME_LENGTH+1]; + unsigned ignored_lines, arch_can_execute; + + // Read saved stats for each codelet + while(fscanf(autoheteroprio_file, "%" STRINGIFY(CODELET_MAX_NAME_LENGTH) "s", codelet_name) == 1) + { + memset(codelet_exec_archs, 0, STARPU_NB_TYPES * sizeof(unsigned)); + + // Read compatible architectures + ignored_lines = 0; + for(arch_ind = 0; arch_ind < supported_archs; ++arch_ind) + { + if(fscanf(autoheteroprio_file, "%u", &arch_can_execute) != 1) + { + fclose(autoheteroprio_file); + _STARPU_MSG("[HETEROPRIO][INITIALIZATION] Warning, autoheteroprio's data file is missing an architecture information for a codelet\n"); + return; + } + else if(arch_ind < STARPU_NB_TYPES) + { + codelet_archs[arch_ind] = arch_can_execute; + if(archs[arch_ind] < STARPU_NB_TYPES) + codelet_exec_archs[archs[arch_ind]] = arch_can_execute; + } + } + for(arch_ind = 0; arch_ind < ignored_archs; ++arch_ind) + { + if(fscanf(autoheteroprio_file, "%u", &arch_can_execute) != 1) + { + fclose(autoheteroprio_file); + _STARPU_MSG("[HETEROPRIO][INITIALIZATION] Warning, autoheteroprio's data file is missing an architecture information for a codelet\n"); + return; + } + else if(arch_can_execute) + { + ignored_lines += 1; + } + } + + // Read general codelet data + if(fscanf(autoheteroprio_file, "%lf %u %u %lf %u %u %lf %u %lf %u", + &hp->prio_average_NOD[prio], &hp->prio_average_NOD_count[prio], + &hp->prio_average_URT_count[prio], + &hp->prio_overall_proportion[prio], &hp->prio_overall_proportion_count[prio], + &hp->prio_arch_proportion_count[prio], + &hp->prio_average_successors_best_time_sum[prio], &hp->prio_average_successors_best_time_sum_count[prio], + &hp->prio_average_best[prio], &hp->prio_average_best_count[prio] + ) != 10) + { + fclose(autoheteroprio_file); + _STARPU_MSG("[HETEROPRIO][INITIALIZATION] Warning, autoheteroprio's data file is improperly formatted\n"); + return; + } + + // Read architecture specific data + for(arch_ind = 0; arch_ind < supported_archs; ++arch_ind) + { + if(codelet_archs[arch_ind] && archs[arch_ind] < STARPU_NB_TYPES) + { + if(fscanf(autoheteroprio_file, "%lf %lf %u %lf\n", + &hp->prio_average_URT[archs[arch_ind]][prio], + &hp->prio_average_time_arch[archs[arch_ind]][prio], &hp->prio_average_time_arch_count[archs[arch_ind]][prio], + &hp->prio_arch_proportion[archs[arch_ind]][prio] + ) != 4) + { + fclose(autoheteroprio_file); + _STARPU_MSG("[HETEROPRIO][INITIALIZATION] Warning, autoheteroprio's data file is improperly formatted\n"); + return; + } + + if(hp->prio_average_time_arch_count[archs[arch_ind]][prio] > 0) + hp->prio_arch_has_time_info[archs[arch_ind]][prio] = 1; + } + else if(codelet_archs[arch_ind] && archs[arch_ind] >= STARPU_NB_TYPES) + { + while((c = getc(autoheteroprio_file)) != '\n') + if(c == EOF) + { + fclose(autoheteroprio_file); + _STARPU_MSG("[HETEROPRIO][INITIALIZATION] Warning, autoheteroprio's data file ended abruptly\n"); + return; + } + } + } + for(arch_ind = 0; arch_ind < ignored_lines; ++arch_ind) + { + while((c = getc(autoheteroprio_file)) != '\n') + if(c == EOF) + { + fclose(autoheteroprio_file); + _STARPU_MSG("[HETEROPRIO][INITIALIZATION] Warning, autoheteroprio's data file ended abruptly\n"); + return; + } + } + + starpu_autoheteroprio_add_task(hp, codelet_name, codelet_exec_archs); + prio = hp->found_codelet_names_length; // update current prio (+1) + + _starpu_drop_comments(autoheteroprio_file); + } + + if(locked) + _starpu_frdunlock(autoheteroprio_file); + fclose(autoheteroprio_file); +} + +static void starpu_autoheteroprio_save_task_data(struct _starpu_heteroprio_data *hp) +{ + const char *custom_path = starpu_getenv("STARPU_HETEROPRIO_DATA_FILE"); + + char path[_HETEROPRIO_DIR_MAXLEN+6]; + if(!custom_path) + { + snprintf(path, _HETEROPRIO_DIR_MAXLEN+6, "%s/%s.data", _starpu_heteroprio_get_data_dir(), + _progname); + } + + FILE *autoheteroprio_file; + int locked; + + autoheteroprio_file = fopen(custom_path ? custom_path : path, "w+"); + if(autoheteroprio_file == NULL) + { + _STARPU_MSG("[HETEROPRIO][DEINITIALIZATION] Warning: unable to save task data\n"); + return; + } + locked = _starpu_fwrlock(autoheteroprio_file) == 0; + fseek(autoheteroprio_file, 0, SEEK_SET); + _starpu_fftruncate(autoheteroprio_file, 0); + + unsigned number_of_archs = 0; + unsigned is_arch_used[STARPU_NB_TYPES]; + unsigned arch_ind; + + fprintf(autoheteroprio_file, "##################\n"); + fprintf(autoheteroprio_file, "# Known architectures\n"); + fprintf(autoheteroprio_file, "# number_of_archs arch_ids ("); + for(arch_ind = 0; arch_ind < STARPU_NB_TYPES; ++arch_ind) + { + if(hp->found_codelet_names_on_arch[arch_ind] > 0) + { + // Architecture was used + is_arch_used[arch_ind] = 1; + number_of_archs += 1; + fprintf(autoheteroprio_file, "%s - %u, ", + starpu_worker_get_type_as_string(arch_ind), arch_ind); + } + else + is_arch_used[arch_ind] = 0; + } + fprintf(autoheteroprio_file, ")\n"); + + // List of used architectures designed by their id + fprintf(autoheteroprio_file, "%u", number_of_archs); + for(arch_ind = 0; arch_ind < STARPU_NB_TYPES; ++arch_ind) + { + if(is_arch_used[arch_ind]) + fprintf(autoheteroprio_file, " %u", arch_ind); + } + fprintf(autoheteroprio_file, "\n"); + + fprintf(autoheteroprio_file, "##################\n"); + fprintf(autoheteroprio_file, "# Busy/Free proportion per architecture\n"); + fprintf(autoheteroprio_file, "# ARCH1_busy_time ARCH1_free_time ... ARCHn_busy_time ARCHn_free_time\n"); + + // Busy and free proportion per architecture + for(arch_ind = 0; arch_ind < STARPU_NB_TYPES; ++arch_ind) + { + if(is_arch_used[arch_ind]) + fprintf(autoheteroprio_file, " %lf %lf", + hp->average_arch_busy_time[arch_ind], hp->average_arch_free_time[arch_ind]); + } + fprintf(autoheteroprio_file, "\n"); + + fprintf(autoheteroprio_file, "##################\n"); + fprintf(autoheteroprio_file, "# Codelets specific data\n"); + fprintf(autoheteroprio_file, "# codelet_name arch_1_can_exec ... arch_n_can_exec\n"); + fprintf(autoheteroprio_file, "# average_NOD average_NOD_count average_URT_count overall_proportion overall_proportion_count arch_proportion_count avg_best_successor_time avg_best_successor_time_count prio_average_best prio_average_best_count\n"); + fprintf(autoheteroprio_file, "# for each arch which can exec: average_URT_ARCH average_time_ARCH average_time_ARCH_count ARCH_proportion\n"); + fprintf(autoheteroprio_file, "##########\n"); + + unsigned prio; + unsigned codelet_archs[STARPU_NB_TYPES]; + + for(prio = 0; prio < hp->found_codelet_names_length; ++prio) + { + fprintf(autoheteroprio_file, "%s", hp->found_codelet_names[prio]); + + // Indicate if each can execute codelet + for(arch_ind = 0; arch_ind < STARPU_NB_TYPES; ++arch_ind) + { + if(is_arch_used[arch_ind]) + { + codelet_archs[arch_ind] = arch_can_execute_prio(hp, arch_ind, prio); + fprintf(autoheteroprio_file, " %u", codelet_archs[arch_ind]); + } + else + codelet_archs[arch_ind] = 0; + } + fprintf(autoheteroprio_file, "\n"); + + // Non specific codelet data + fprintf(autoheteroprio_file, "%lf %u %u %lf %u %u %lf %u %lf %u\n", + hp->prio_average_NOD[prio], hp->prio_average_NOD_count[prio], + hp->prio_average_URT_count[prio], + hp->prio_overall_proportion[prio], hp->prio_overall_proportion_count[prio], + hp->prio_arch_proportion_count[prio], + hp->prio_average_successors_best_time_sum[prio], hp->prio_average_successors_best_time_sum_count[prio], + hp->prio_average_best[prio], hp->prio_average_best_count[prio]); + + // Architecture specific data + for(arch_ind = 0; arch_ind < STARPU_NB_TYPES; ++arch_ind) + { + if(codelet_archs[arch_ind]) + { + fprintf(autoheteroprio_file, "%lf %lf %u %lf\n", + hp->prio_average_URT[arch_ind][prio], + hp->prio_average_time_arch[arch_ind][prio], hp->prio_average_time_arch_count[arch_ind][prio], + hp->prio_arch_proportion[arch_ind][prio]); + } + } + + fprintf(autoheteroprio_file, "#####\n"); + } + + if(locked) + _starpu_fwrunlock(autoheteroprio_file); + fclose(autoheteroprio_file); +} + +static void initialize_heteroprio_policy(unsigned sched_ctx_id) +{ +#ifdef LAHETEROPRIO_PRINT_STAT + memset(&lastats, 0, sizeof(lastats)); +#endif + + int max_priority = starpu_sched_ctx_get_max_priority(sched_ctx_id); + if(max_priority < HETEROPRIO_MAX_PRIO-1) + { + starpu_sched_ctx_set_max_priority(sched_ctx_id, HETEROPRIO_MAX_PRIO-1); + _STARPU_DISP("[HETEROPRIO][INITIALIZATION] Max priority has been set to %d\n", HETEROPRIO_MAX_PRIO-1); + } + int min_priority = starpu_sched_ctx_get_min_priority(sched_ctx_id); + if(min_priority > 0) + { + starpu_sched_ctx_set_min_priority(sched_ctx_id, 0); + _STARPU_DISP("[HETEROPRIO][INITIALIZATION] Min priority has been set to 0\n"); + } + + /* Alloc the scheduler data */ + struct _starpu_heteroprio_data *hp; + _STARPU_MALLOC(hp, sizeof(struct _starpu_heteroprio_data)); + memset(hp, 0, sizeof(*hp)); + + hp->use_locality = use_la_mode = starpu_getenv_number_default("STARPU_HETEROPRIO_USE_LA", 0); + _STARPU_DISP("[HETEROPRIO] Data locality : %s\n", hp->use_locality?"ENABLED":"DISABLED"); + + hp->codelet_grouping_strategy = use_auto_mode = starpu_getenv_number_default("STARPU_HETEROPRIO_CODELET_GROUPING_STRATEGY", 0); + switch(hp->codelet_grouping_strategy) + { + case BY_PERF_MODEL_OR_NAME: + _STARPU_DISP("[HETEROPRIO] Codelet grouping strategy : BY_PERF_MODEL_OR_NAME\n"); + break; + case BY_NAME_ONLY: + _STARPU_DISP("[HETEROPRIO] Codelet grouping strategy : BY_NAME\n"); + break; + default: + _STARPU_DISP("[HETEROPRIO] Codelet grouping strategy : UNKNOWN\n"); + + hp->codelet_grouping_strategy = BY_PERF_MODEL_OR_NAME; // setting to default + } + + hp->use_auto_calibration = use_auto_mode = starpu_getenv_number_default("STARPU_HETEROPRIO_USE_AUTO_CALIBRATION", 1); + _STARPU_DISP("[HETEROPRIO] Auto calibration : %s\n", hp->use_auto_calibration?"ENABLED":"DISABLED"); + if(hp->use_auto_calibration) + { + const int ordering_policy = starpu_getenv_number_default("STARPU_AUTOHETEROPRIO_PRIORITY_ORDERING_POLICY", STARPU_HETEROPRIO_URT_DOT_DIFF_4); + STARPU_ASSERT_MSG(ordering_policy < STARPU_AUTOHETEROPRIO_PRIORITY_ORDERING_POLICY_COUNT, "STARPU_AUTOHETEROPRIO_PRIORITY_ORDERING_POLICY must be < %d.\n", STARPU_AUTOHETEROPRIO_PRIORITY_ORDERING_POLICY_COUNT); + STARPU_ASSERT_MSG(ordering_policy >= 0, "STARPU_AUTOHETEROPRIO_PRIORITY_ORDERING_POLICY must be >= 0.\n"); + hp->autoheteroprio_priority_ordering_policy = ordering_policy; + _STARPU_DISP("[AUTOHETEROPRIO] Priority ordering policy : %s\n", &starpu_autoheteroprio_priority_ordering_policy_names[hp->autoheteroprio_priority_ordering_policy][0]); + + hp->priority_ordering_interval = starpu_getenv_number_default("STARPU_AUTOHETEROPRIO_ORDERING_INTERVAL", 32); + + hp->freeze_data_gathering = starpu_getenv_number_default("STARPU_AUTOHETEROPRIO_FREEZE_GATHERING", 0); + _STARPU_DISP("[AUTOHETEROPRIO] Data gathering : %s\n", !hp->freeze_data_gathering?"ENABLED":"DISABLED"); + + hp->autoheteroprio_print_prio_after_ordering = starpu_getenv_number_default("STARPU_AUTOHETEROPRIO_PRINT_AFTER_ORDERING", 0); + _STARPU_DISP("[AUTOHETEROPRIO] Print after ordering : %s\n", hp->autoheteroprio_print_prio_after_ordering?"ENABLED":"DISABLED"); + + hp->autoheteroprio_print_data_on_update = starpu_getenv_number_default("STARPU_AUTOHETEROPRIO_PRINT_DATA_ON_UPDATE", 0); + _STARPU_DISP("[AUTOHETEROPRIO] Print on update : %s\n", hp->autoheteroprio_print_data_on_update?"ENABLED":"DISABLED"); + + hp->autoheteroprio_time_estimation_policy = starpu_getenv_number_default("STARPU_AUTOHETEROPRIO_TIME_ESTIMATION_POLICY", 0); + } + + starpu_bitmap_init(&hp->waiters); + if(hp->use_locality) + { + hp->pushStrategySet = getEnvAdvPush(); + if(hp->pushStrategySet != PUSH_AUTO) + { + hp->pushStrategyToUse = hp->pushStrategySet; + } + else + { + hp->pushStrategyToUse = PUSH_LS_SDHB; + } + } + + starpu_sched_ctx_set_policy_data(sched_ctx_id, (void*)hp); + + STARPU_PTHREAD_MUTEX_INIT(&hp->policy_mutex, NULL); + if(hp->use_locality) + { + STARPU_PTHREAD_MUTEX_INIT(&hp->push_history_mutex, NULL); + } + if(hp->use_auto_calibration) + { + STARPU_PTHREAD_MUTEX_INIT(&hp->auto_calibration_mutex, NULL); + } + + // get environment hyperparameters + + hp->NTnodPond = starpu_getenv_float_default("STARPU_HETEROPRIO_NOD_TIME_COMBINATION_NOD_MULTIPLIER", 0.3); + hp->NTexpVal = starpu_getenv_float_default("STARPU_HETEROPRIO_NOD_TIME_COMBINATION_EXP_SELECTIVITY", 0.5); + hp->BNexpVal = starpu_getenv_float_default("STARPU_HETEROPRIO_BEST_NODS_SCORE_EXP_SELECTIVITY", 0.5); + hp->URTurt = starpu_getenv_float_default("STARPU_HETEROPRIO_URT_URT_MULTIPLIER", 0.5); + hp->URT2urt = starpu_getenv_float_default("STARPU_HETEROPRIO_URT_2_URT_MULTIPLIER", 0.5); + hp->URT2prop = starpu_getenv_float_default("STARPU_HETEROPRIO_URT_2_ARCH_NEED_MULTIPLIER", 2.0); + hp->and2pond = starpu_getenv_float_default("STARPU_HETEROPRIO_URT_DOT_DIFF_2_ARCH_NEED_MULTIPLIER", 1.0); + hp->and3pond = starpu_getenv_float_default("STARPU_HETEROPRIO_URT_DOT_DIFF_3_ARCH_NEED_MULTIPLIER", 1.0); + hp->and4pond = starpu_getenv_float_default("STARPU_HETEROPRIO_URT_DOT_DIFF_4_ARCH_NEED_MULTIPLIER", 1.0); + hp->and5xoffset = starpu_getenv_float_default("STARPU_HETEROPRIO_URT_DOT_DIFF_5_NOD_OFFSET", 1.3); + hp->and5yoffset = starpu_getenv_float_default("STARPU_HETEROPRIO_URT_DOT_DIFF_5_ARCH_DIFF_OFFSET", 1.0); + hp->and9xoffset = starpu_getenv_float_default("STARPU_HETEROPRIO_URT_DOT_DIFF_9_NOD_OFFSET", 1.3); + hp->and9yoffset = starpu_getenv_float_default("STARPU_HETEROPRIO_URT_DOT_DIFF_9_ARCH_DIFF_OFFSET", 1.0); + hp->and10xoffset = starpu_getenv_float_default("STARPU_HETEROPRIO_AURT_DOT_DIFF_10_NOD_OFFSET", 1.3); + hp->and10yoffset = starpu_getenv_float_default("STARPU_HETEROPRIO_URT_DOT_DIFF_10_ARCH_DIFF_OFFSET", 1.0); + hp->and11xoffset = starpu_getenv_float_default("STARPU_HETEROPRIO_URT_DOT_DIFF_11_NOD_OFFSET", 1.3); + hp->and11yoffset = starpu_getenv_float_default("STARPU_HETEROPRIO_URT_DOT_DIFF_11_ARCH_DIFF_OFFSET", 1.0); + hp->ANTnodPond = starpu_getenv_float_default("STARPU_HETEROPRIO_URTS_TIME_COMBINATION_NOD_MULTIPLIER", 0.3); + hp->ANTexpVal = starpu_getenv_float_default("STARPU_HETEROPRIO_URTS_TIME_COMBINATION_EXP_SELECTIVITY", 0.5); + + unsigned idx_prio; + for(idx_prio = 0; idx_prio < HETEROPRIO_MAX_PRIO; ++idx_prio) + _heteroprio_bucket_init(&hp->buckets[idx_prio]); + + if(hp->use_locality) + { + hp->nb_wgroups = LAHETEROPRIO_MAX_WORKER_GROUPS; + unsigned idx_wgroup; + for(idx_wgroup = 0 ; idx_wgroup < LAHETEROPRIO_MAX_WORKER_GROUPS ; ++idx_wgroup) + { + hp->arch_of_wgroups[idx_wgroup] = STARPU_ANY_WORKER; // We set STARPU_ANY_WORKER = default (none) value + } + memset(hp->bucket_mapping_per_arch_index, -1, sizeof(unsigned)*STARPU_NB_TYPES*HETEROPRIO_MAX_PRIO); + } + + void (*callback_sched)(unsigned) = starpu_sched_ctx_get_sched_policy_callback(sched_ctx_id); + + if(callback_sched) + { + if(hp->use_auto_calibration) + { + _STARPU_DISP("[HETEROPRIO][INITIALIZATION] Warning: a custom sched init function has been detected while being in auto calibration mode (STARPU_HETEROPRIO_USE_AUTO_CALIBRATION). Custom changes to priority mapping will be overwritten.\n"); + } + callback_sched(sched_ctx_id); + } + else + { + default_init_sched(sched_ctx_id); + } + + check_heteroprio_mapping(hp); + + if(hp->use_auto_calibration) + { + unsigned arch; + for(idx_prio = 0; idx_prio < HETEROPRIO_MAX_PRIO; ++idx_prio) + { + hp->prio_average_NOD[idx_prio] = 0.f; + hp->prio_average_NOD_count[idx_prio] = 0; + + hp->prio_average_URT_count[idx_prio] = 0; + + hp->prio_overall_proportion[idx_prio] = 0.f; + hp->prio_overall_proportion_count[idx_prio] = 0; + + hp->prio_arch_proportion_count[idx_prio] = 0; + + hp->prio_average_successors_best_time_sum[idx_prio] = 0.f; + hp->prio_average_successors_best_time_sum_count[idx_prio] = 0; + + hp->prio_average_best[idx_prio] = 0.f; + hp->prio_average_best_count[idx_prio] = 0; + + for(arch=0;archprio_average_URT[arch][idx_prio] = 0.f; + + hp->prio_average_time_arch[arch][idx_prio] = 0.f; + hp->prio_average_time_arch_count[arch][idx_prio] = 0; + + hp->prio_arch_proportion[arch][idx_prio] = 0.f; + + if(arch != STARPU_CPU_WORKER) + { + starpu_heteroprio_set_arch_slow_factor_hp(hp, arch, idx_prio, 1.0f); + } + } + starpu_heteroprio_set_faster_arch_hp(hp, STARPU_CPU_WORKER, idx_prio); + } + + starpu_heteroprio_clear_mapping_hp(hp); + for(arch=0;archfreeze_data_gathering) + { + _starpu_graph_record = 1; // allow starpu graph recording + } + } +} + +static void register_arch_times(struct _starpu_heteroprio_data *hp, unsigned arch, double busy_time, double free_time); + +static void deinitialize_heteroprio_policy(unsigned sched_ctx_id) +{ + struct _starpu_heteroprio_data *hp = (struct _starpu_heteroprio_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); + + /* Ensure there are no more tasks */ + STARPU_ASSERT(hp->total_tasks_in_buckets == 0); + unsigned arch_index; + for(arch_index = 0; arch_index < STARPU_NB_TYPES; ++arch_index) + { + if(hp->use_locality) + { + STARPU_ASSERT(hp->nb_remaining_tasks_per_arch_index[arch_index] == 0); + } + else + { + STARPU_ASSERT(hp->nb_remaining_tasks_per_arch_index[arch_index] == 0); + STARPU_ASSERT(hp->nb_prefetched_tasks_per_arch_index[arch_index] == 0); + } + } + + unsigned idx_prio; + for(idx_prio = 0; idx_prio < HETEROPRIO_MAX_PRIO; ++idx_prio) + { + STARPU_ASSERT(hp->buckets[idx_prio].tasks_queue_ntasks == 0); // potentially not wanted if use_la==0 + _heteroprio_bucket_release(&hp->buckets[idx_prio]); + } + + if(hp->use_locality) + { +#ifdef LAHETEROPRIO_PRINT_STAT + _STARPU_MSG("[LASTATS] nb tasks %ld\n", lastats.nb_tasks); + { + _STARPU_MSG("[LASTATS] Tasks pushed per workers of kind:\n"); + unsigned nb_tasks = 0; + unsigned worker_id; + for (worker_id = 0; worker_id < starpu_worker_get_count(); ++worker_id) + { + const unsigned worker_arch = hp->workers_heteroprio[worker_id].arch_index; + const unsigned nb_prios = hp->nb_prio_per_arch_index[worker_arch]; + _STARPU_MSG("[LASTATS] "); + for (idx_prio = 0; idx_prio < nb_prios; ++idx_prio) + { + fprintf(stderr, "[%3u] %9ld ", idx_prio, lastats.nb_tasks_per_worker[worker_id][idx_prio]); + nb_tasks += lastats.nb_tasks_per_worker[worker_id][idx_prio]; + } + fprintf(stderr, "\n"); + } + _STARPU_MSG("[LASTATS] Total tasks pushed per workers of kind: %u\n\n", nb_tasks); + } + { + _STARPU_MSG("[LASTATS] Tasks pushed per workers to mem node:\n"); + unsigned nb_tasks = 0; + _STARPU_MSG("[LASTATS] Master: "); + unsigned idx_mem; + for (idx_mem = 0; idx_mem < hp->nb_wgroups; ++idx_mem) + { + fprintf(stderr, "[%3u] %9ld ", idx_mem, lastats.push_redirect[0][idx_mem]); + nb_tasks += lastats.push_redirect[0][idx_mem]; + } + fprintf(stderr, "\n"); + _STARPU_MSG("[LASTATS] Total tasks pushed per workers to mem node: %u\n\n", nb_tasks); + } + { + unsigned worker_id; + for (worker_id = 0; worker_id < starpu_worker_get_count(); ++worker_id) + { + _STARPU_MSG("[LASTATS] %u: ", worker_id); + unsigned idx_mem; + for (idx_mem = 0; idx_mem < hp->nb_wgroups; ++idx_mem) + { + fprintf(stderr, "[%3u] %9ld ", idx_mem, lastats.push_redirect[worker_id + 1][idx_mem]); + } + fprintf(stderr, "\n"); + } + fprintf(stderr, "\n"); + } + { + _STARPU_MSG("[LASTATS] Tasks per wgroup:\n"); + unsigned nb_tasks = 0; + unsigned idx_wgroup; + for (idx_wgroup = 0; idx_wgroup < hp->nb_wgroups; ++idx_wgroup) + { + const unsigned wgroup_arch = hp->arch_of_wgroups[idx_wgroup]; + const unsigned nb_prios = hp->nb_prio_per_arch_index[wgroup_arch]; + _STARPU_MSG("[LASTATS] "); + for (idx_prio = 0; idx_prio < nb_prios; ++idx_prio) + { + fprintf(stderr, "[%3u] %9ld ", idx_prio, lastats.nb_tasks_per_wgroup[idx_wgroup][idx_prio]); + nb_tasks += lastats.nb_tasks_per_wgroup[idx_wgroup][idx_prio]; + } + fprintf(stderr, "\n"); + } + _STARPU_MSG("[LASTATS] Total tasks pushed per wgroup: %u\n\n", nb_tasks); + } + { + _STARPU_MSG("[LASTATS] Tasks skipt per workers:\n"); + unsigned worker_id; + for (worker_id = 0; worker_id < starpu_worker_get_count(); ++worker_id) + { + const unsigned worker_arch = hp->workers_heteroprio[worker_id].arch_index; + const unsigned nb_prios = hp->nb_prio_per_arch_index[worker_arch]; + _STARPU_MSG("[LASTATS] "); + for (idx_prio = 0; idx_prio < nb_prios; ++idx_prio) + { + fprintf(stderr, "[%3u] %9ld ", idx_prio, lastats.task_skipt_due_to_factor_per_worker[worker_id][idx_prio]); + } + fprintf(stderr, "\n"); + } + fprintf(stderr, "\n"); + } + { + _STARPU_MSG("[LASTATS] Tasks list empty per workers:\n"); + unsigned worker_id; + for (worker_id = 0; worker_id < starpu_worker_get_count(); ++worker_id) + { + const unsigned worker_arch = hp->workers_heteroprio[worker_id].arch_index; + const unsigned nb_prios = hp->nb_prio_per_arch_index[worker_arch]; + _STARPU_MSG("[LASTATS] "); + for (idx_prio = 0; idx_prio < nb_prios; ++idx_prio) + { + fprintf(stderr, "[%3u] %9ld ", idx_prio, lastats.task_list_empty_per_worker[worker_id][idx_prio]); + } + fprintf(stderr, "\n"); + } + fprintf(stderr, "\n"); + } + { + _STARPU_MSG("[LASTATS] Tasks stolen per workers:\n"); + unsigned nb_tasks = 0; + unsigned worker_id; + for (worker_id = 0; worker_id < starpu_worker_get_count(); ++worker_id) + { + const unsigned worker_arch = hp->workers_heteroprio[worker_id].arch_index; + const unsigned nb_prios = hp->nb_prio_per_arch_index[worker_arch]; + _STARPU_MSG("[LASTATS] "); + for (idx_prio = 0; idx_prio < nb_prios; ++idx_prio) + { + fprintf(stderr, "[%3u] %9ld ", idx_prio, lastats.task_stolen_per_worker[worker_id][idx_prio]); + nb_tasks += lastats.task_stolen_per_worker[worker_id][idx_prio]; + } + fprintf(stderr, "\n"); + } + _STARPU_MSG("[LASTATS] Total tasks stolen per worker: %u\n\n", nb_tasks); + } + { + _STARPU_MSG("[LASTATS] Tasks stolen in wgroup:\n"); + unsigned nb_tasks = 0; + unsigned idx_wgroup; + for (idx_wgroup = 0; idx_wgroup < hp->nb_wgroups; ++idx_wgroup) + { + const unsigned wgroup_arch = hp->arch_of_wgroups[idx_wgroup]; + const unsigned nb_prios = hp->nb_prio_per_arch_index[wgroup_arch]; + _STARPU_MSG("[LASTATS] "); + for (idx_prio = 0; idx_prio < nb_prios; ++idx_prio) + { + fprintf(stderr, "[%3u] %9ld ", idx_prio, lastats.task_stolen_in_wgroup[idx_wgroup][idx_prio]); + nb_tasks += lastats.task_stolen_in_wgroup[idx_wgroup][idx_prio]; + } + fprintf(stderr, "\n"); + } + _STARPU_MSG("[LASTATS] Total tasks stolen in wgroup: %u\n\n", nb_tasks); + } + { + _STARPU_MSG("[LASTATS] Tasks push/pop different wgroup:\n"); + unsigned nb_tasks = 0; + unsigned worker_id; + for (worker_id = 0; worker_id < starpu_worker_get_count(); ++worker_id) + { + _STARPU_MSG("[LASTATS] %u: ", worker_id); + unsigned idx_mem; + for (idx_mem = 0; idx_mem < hp->nb_wgroups; ++idx_mem) + { + fprintf(stderr, "[%3u] %9ld ", idx_mem, lastats.pop_redirect[worker_id][idx_mem]); + nb_tasks += lastats.pop_redirect[worker_id][idx_mem]; + } + fprintf(stderr, "\n"); + } + _STARPU_MSG("[LASTATS] Total tasks push/pop different wgroup: %u\n\n", nb_tasks); + } + { + _STARPU_MSG("[LASTATS] push strategy used:\n"); + unsigned worker_id; + unsigned counter[PUSH_NB_AUTO] = { 0 }; + unsigned idx_more_used = 0; + for (worker_id = 0; worker_id <= starpu_worker_get_count(); ++worker_id) + { + _STARPU_MSG("[LASTATS] %u: ", worker_id); + unsigned idx_strategy; + for (idx_strategy = 0; idx_strategy < PUSH_NB_AUTO; ++idx_strategy) + { + fprintf(stderr, "[%3u] %9ld ", idx_strategy, lastats.push_to_use[worker_id][idx_strategy]); + counter[idx_strategy] += lastats.push_to_use[worker_id][idx_strategy]; + if (counter[idx_strategy] > counter[idx_more_used]) + { + idx_more_used = idx_strategy; + } + } + fprintf(stderr, "\n"); + } + _STARPU_MSG("[LASTATS] More used push: %u\n\n", idx_more_used); + } + { + _STARPU_MSG("[LASTATS] correct MN pushes:\n"); + unsigned idx_strategy; + for (idx_strategy = 0; idx_strategy < PUSH_NB_AUTO; ++idx_strategy) + { + _STARPU_MSG("[LASTATS][%u] %u \n", idx_strategy, hp->pushStrategyHistory[idx_strategy]); + } + } +#endif + } + + STARPU_PTHREAD_MUTEX_DESTROY(&hp->policy_mutex); + if(hp->use_locality) + { + STARPU_PTHREAD_MUTEX_DESTROY(&hp->push_history_mutex); + } + if(hp->use_auto_calibration) + { + STARPU_PTHREAD_MUTEX_DESTROY(&hp->auto_calibration_mutex); + } + if(hp->use_auto_calibration && !hp->freeze_data_gathering) + { + // update autoheteroprio data with free and busy worker time + for(arch_index = 0; arch_index < STARPU_NB_TYPES; ++arch_index) + { + register_arch_times(hp, arch_index, hp->current_arch_busy_time[arch_index], hp->current_arch_free_time[arch_index]); + } + + starpu_autoheteroprio_save_task_data(hp); + } + + _starpu_graph_record = 0; // disable starpu graph recording (that may have been activated due to hp->use_auto_calibration) + + free(hp); +} + +static void add_workers_heteroprio_policy(unsigned sched_ctx_id, int *workerids, unsigned nworkers) +{ + struct _starpu_heteroprio_data *hp = (struct _starpu_heteroprio_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); + + // Retrieve current time to set as starting time for each worker + struct timespec tsnow; + _starpu_clock_gettime(&tsnow); + const double now = starpu_timing_timespec_to_us(&tsnow); + + unsigned i; + for (i = 0; i < nworkers; i++) + { + int workerid = workerids[i]; + + memset(&hp->workers_heteroprio[workerid], 0, sizeof(hp->workers_heteroprio[workerid])); + if(!hp->use_locality) + { + /* if the worker has already belonged to this context + the queue and the synchronization variables have been already initialized */ + starpu_st_prio_deque_init(&hp->workers_heteroprio[workerid].tasks_queue); + } + + enum starpu_worker_archtype arch_index = starpu_worker_get_type(workerid); + hp->workers_heteroprio[workerid].arch_index = arch_index; + hp->workers_heteroprio[workerid].arch_type = starpu_heteroprio_types_to_arch(arch_index); + hp->nb_workers_per_arch_index[hp->workers_heteroprio[workerid].arch_index]++; + + hp->last_hook_exec_time[workerid] = now; + } +} + +static void remove_workers_heteroprio_policy(unsigned sched_ctx_id, int *workerids, unsigned nworkers) +{ + struct _starpu_heteroprio_data *hp = (struct _starpu_heteroprio_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); + + if(!hp->use_locality) + { + unsigned i; + + for (i = 0; i < nworkers; i++) + { + int workerid = workerids[i]; + starpu_st_prio_deque_destroy(&hp->workers_heteroprio[workerid].tasks_queue); + } + } +} + +static unsigned get_best_mem_node(struct starpu_task *task, struct _starpu_heteroprio_data *hp, const enum laheteroprio_push_strategy pushStrategy) +{ + const unsigned workerid = starpu_worker_get_id(); + unsigned best_mem_node; + + STARPU_ASSERT(task != NULL); + + if (pushStrategy != PUSH_WORKER) + { + if(!hp->warned_change_nb_memory_nodes && starpu_memory_nodes_get_count() != hp->nb_memory_nodes) + { + _STARPU_MSG("[HETEROPRIO][INITIALIZATION][get_best_mem_node] Warning: current memory node number is different from the one retrieved at initialization.\n\ +This warning will only be displayed once.\n"); + hp->warned_change_nb_memory_nodes = 1; + } + + const unsigned nnodes = hp->nb_memory_nodes; // == starpu_memory_nodes_get_count() if number of mem nodes didn't change during execution + + if (pushStrategy == PUSH_LcS) + { + int node_to_worker[LAHETEROPRIO_MAX_WORKER_GROUPS]; + unsigned idx_worker; + for (idx_worker = 0; idx_worker < starpu_worker_get_count(); ++idx_worker) + { + // overwrite, we simply need one worker per mem node + node_to_worker[starpu_worker_get_memory_node(idx_worker)] = idx_worker; + } + double bestTransferTime = starpu_task_expected_data_transfer_time_for(task, node_to_worker[0]); + best_mem_node = 0; + unsigned idx_node; + for (idx_node = 1; idx_node < nnodes; ++idx_node) + { + const double transferTime = starpu_task_expected_data_transfer_time_for(task, node_to_worker[idx_node]); + if (transferTime < bestTransferTime) + { + bestTransferTime = transferTime; + best_mem_node = idx_node; + } + } + } + else if (pushStrategy == PUSH_LS_SDH || pushStrategy == PUSH_LS_SDH2) + { + size_t max_size_so_far = 0; + unsigned idx_max_size = 0; + const unsigned wgroupid = (workerid == (unsigned)-1 ? hp->master_tasks_queue_idx : hp->workers_laheteroprio_wgroup_index[workerid]); + size_t data_per_mem_node[LAHETEROPRIO_MAX_WORKER_GROUPS] = { 0 }; + assert(nnodes <= LAHETEROPRIO_MAX_WORKER_GROUPS); + unsigned idx_data; + for (idx_data = 0; idx_data < STARPU_TASK_GET_NBUFFERS(task); ++idx_data) + { + const starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, idx_data); + const size_t raw_data_size = starpu_data_get_size(handle); + const unsigned is_read = (STARPU_TASK_GET_MODE(task, idx_data) == STARPU_R); + // Easy: + size_t data_size; + if (pushStrategy == PUSH_LS_SDH) + { + data_size = raw_data_size; + } + else + { + assert(pushStrategy == PUSH_LS_SDH2); + data_size = (is_read ? raw_data_size : raw_data_size *raw_data_size); + } + unsigned idx_node; + for (idx_node = 0; idx_node < nnodes; ++idx_node) + { + if (starpu_data_is_on_node(handle, idx_node)) + { + data_per_mem_node[idx_node] += data_size; + if (max_size_so_far < data_per_mem_node[idx_node] || + (max_size_so_far == data_per_mem_node[idx_node] && + idx_node == wgroupid)) + { + max_size_so_far = data_per_mem_node[idx_node]; + idx_max_size = idx_node; + } + } + } + } + best_mem_node = idx_max_size; + } + else if (pushStrategy == PUSH_LC_SMWB) + { + const unsigned wgroupid = (workerid == (unsigned)-1 ? hp->master_tasks_queue_idx : hp->workers_laheteroprio_wgroup_index[workerid]); + assert(nnodes <= LAHETEROPRIO_MAX_WORKER_GROUPS); + const unsigned N = STARPU_TASK_GET_NBUFFERS(task); + unsigned data_exist_every_where[128] = { 0 }; + unsigned nb_data_exist_every_where = 0; + { + unsigned idx_data; + for (idx_data = 0; idx_data < N; ++idx_data) + { + const starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, idx_data); + data_exist_every_where[idx_data] = 1; + unsigned idx_node; + for (idx_node = 0; idx_node < nnodes; ++idx_node) + { + if (starpu_data_is_on_node(handle, idx_node)) + { + // Ok + } + else + { + data_exist_every_where[idx_data] = 0; + break; + } + } + if (data_exist_every_where[idx_data]) + { + nb_data_exist_every_where += 1; + } + } + } + assert(N <= 128); + unsigned data_is_read[128] = { 0 }; + unsigned Nw = 0; + size_t total_size = 0; + size_t total_size_in_read = 0; + size_t total_size_in_write = 0; + size_t data_sizes[128] = { 0 }; + unsigned data_Ri[128] = { 0 }; + size_t data_per_mem_node[LAHETEROPRIO_MAX_WORKER_GROUPS] = { 0 }; + size_t data_per_mem_node_in_read[LAHETEROPRIO_MAX_WORKER_GROUPS] = { 0 }; + size_t data_per_mem_node_in_write[LAHETEROPRIO_MAX_WORKER_GROUPS] = { 0 }; + unsigned nb_data_per_mem_node[LAHETEROPRIO_MAX_WORKER_GROUPS] = { 0 }; + unsigned nb_data_in_w_per_mem_node[LAHETEROPRIO_MAX_WORKER_GROUPS] = { 0 }; + { + unsigned idx_data; + for (idx_data = 0; idx_data < N; ++idx_data) + { + if (data_exist_every_where[idx_data] == 0) + { + const starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, idx_data); + data_sizes[idx_data] = starpu_data_get_size(handle); + data_is_read[idx_data] = (STARPU_TASK_GET_MODE(task, idx_data) == STARPU_R); + total_size += data_sizes[idx_data]; + if (data_is_read[idx_data]) + { + total_size_in_read += data_sizes[idx_data]; + } + else + { + total_size_in_write += data_sizes[idx_data]; + Nw += 1; + } + unsigned idx_node; + for (idx_node = 0; idx_node < nnodes; ++idx_node) + { + if (starpu_data_is_on_node(handle, idx_node)) + { + data_Ri[idx_data] += 1; + data_per_mem_node[idx_node] += data_sizes[idx_data]; + nb_data_per_mem_node[idx_node] += 1; + if (data_is_read[idx_data]) + { + data_per_mem_node_in_read[idx_node] += data_sizes[idx_data]; + } + else + { + data_per_mem_node_in_write[idx_node] += data_sizes[idx_data]; + nb_data_in_w_per_mem_node[idx_node] += 1; + } + } + } + } + } + } + double max_score_so_far = 0; + unsigned idx_max_score = 0; + unsigned idx_node; + for (idx_node = 0; idx_node < nnodes; ++idx_node) + { + double current_score = 0; + current_score = (data_per_mem_node_in_read[idx_node]) + + 1000. *(data_per_mem_node_in_write[idx_node] *nb_data_in_w_per_mem_node[idx_node]); + if (max_score_so_far < current_score || + (max_score_so_far == current_score && + idx_node == wgroupid)) + { + max_score_so_far = current_score; + idx_max_score = idx_node; + } + } + best_mem_node = idx_max_score; + } + else + { + const unsigned wgroupid = (workerid == (unsigned)-1 ? hp->master_tasks_queue_idx : hp->workers_laheteroprio_wgroup_index[workerid]); + assert(nnodes <= LAHETEROPRIO_MAX_WORKER_GROUPS); + const unsigned N = STARPU_TASK_GET_NBUFFERS(task); + assert(N <= 128); + unsigned data_is_read[128] = { 0 }; + unsigned Nw = 0; + size_t total_size = 0; + size_t total_size_in_read = 0; + size_t total_size_in_write = 0; + size_t data_sizes[128] = { 0 }; + unsigned data_Ri[128] = { 0 }; + size_t data_per_mem_node[LAHETEROPRIO_MAX_WORKER_GROUPS] = { 0 }; + size_t data_per_mem_node_in_read[LAHETEROPRIO_MAX_WORKER_GROUPS] = { 0 }; + size_t data_per_mem_node_in_write[LAHETEROPRIO_MAX_WORKER_GROUPS] = { 0 }; + unsigned nb_data_per_mem_node[LAHETEROPRIO_MAX_WORKER_GROUPS] = { 0 }; + unsigned nb_data_in_w_per_mem_node[LAHETEROPRIO_MAX_WORKER_GROUPS] = { 0 }; + { + unsigned idx_data; + for (idx_data = 0; idx_data < N; ++idx_data) + { + const starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, idx_data); + data_sizes[idx_data] = starpu_data_get_size(handle); + data_is_read[idx_data] = (STARPU_TASK_GET_MODE(task, idx_data) == STARPU_R); + total_size += data_sizes[idx_data]; + if (data_is_read[idx_data]) + { + total_size_in_read += data_sizes[idx_data]; + } + else + { + total_size_in_write += data_sizes[idx_data]; + Nw += 1; + } + unsigned idx_node; + for (idx_node = 0; idx_node < nnodes; ++idx_node) + { + if (starpu_data_is_on_node(handle, idx_node)) + { + data_Ri[idx_data] += 1; + data_per_mem_node[idx_node] += data_sizes[idx_data]; + nb_data_per_mem_node[idx_node] += 1; + if (data_is_read[idx_data]) + { + data_per_mem_node_in_read[idx_node] += data_sizes[idx_data]; + } + else + { + data_per_mem_node_in_write[idx_node] += data_sizes[idx_data]; + nb_data_in_w_per_mem_node[idx_node] += 1; + } + } + } + } + } + double max_score_so_far = DBL_MAX; + unsigned idx_max_score = 0; + unsigned idx_node; + for (idx_node = 0; idx_node < nnodes; ++idx_node) + { +/* + const enum starpu_node_kind memnode_kind = starpu_node_get_kind(idx_node); + if(memnode_kind == STARPU_DISK_RAM) + { + continue; // a disk has no associated worker + } +*/ + + double current_score = 0; + assert(pushStrategy == PUSH_LS_SDHB); + current_score = (total_size_in_read - data_per_mem_node_in_read[idx_node]) + + ((total_size_in_write - data_per_mem_node_in_write[idx_node]) *(2. - (double)(Nw) / (double)(N))); + if (max_score_so_far > current_score || + (max_score_so_far == current_score && + idx_node == wgroupid)) + { + max_score_so_far = current_score; + idx_max_score = idx_node; + } + } + best_mem_node = idx_max_score; + } +#ifdef LAHETEROPRIO_PRINT_STAT + lastats.push_redirect[workerid + 1][best_mem_node] += 1; +#endif // LAHETEROPRIO_PRINT_STAT + } + else + { + if (workerid == (unsigned)-1) + { /*master thread */ + best_mem_node = hp->master_tasks_queue_idx; + } + else + { + const unsigned wgroupid = hp->workers_laheteroprio_wgroup_index[workerid]; + best_mem_node = wgroupid; + } + } + return best_mem_node; +} + +static void print_priorities(struct _starpu_heteroprio_data *hp) +{ + STARPU_ASSERT(hp->autoheteroprio_print_prio_after_ordering); + + starpu_worker_relax_on(); + STARPU_PTHREAD_MUTEX_LOCK(&hp->auto_calibration_mutex); + starpu_worker_relax_off(); + + fprintf(stderr, "Updated task priorities :\n"); + unsigned arch; + for(arch=0;archfound_codelet_names_on_arch[arch];++p) + { + fprintf(stderr, "%s ", hp->found_codelet_names[hp->prio_mapping_per_arch_index[arch][p]]); + } + fprintf(stderr, "\n"); + } + + STARPU_PTHREAD_MUTEX_UNLOCK(&hp->auto_calibration_mutex); +} + + +static double get_autoheteroprio_arch_busy_proportion(struct _starpu_heteroprio_data *hp, unsigned arch) +{ + double total = hp->average_arch_busy_time[arch] + hp->average_arch_free_time[arch]; + + if(total <= 0) + { // if we have no info on workers times, we assume they are never busy (near-arbitrary choice) + return 0; + } + + return hp->average_arch_busy_time[arch]/total; +} + +static double get_autoheteroprio_estimated_time(struct _starpu_heteroprio_data *hp, unsigned priority, unsigned arch) +{ + if(hp->prio_arch_has_time_info[arch][priority]) + { + return hp->prio_average_time_arch[arch][priority]; + } + + if(arch_can_execute_prio(hp, arch, priority)) + { // if arch is legit but we have no time information, return a decent arbitrary time + return AUTOHETEROPRIO_FAIR_TIME; + } + + + if(hp->autoheteroprio_time_estimation_policy == 0) + { + return AUTOHETEROPRIO_LONG_TIME; + } + else if(hp->autoheteroprio_time_estimation_policy == 1) + { + // we can't execute this task on this arch, we therefore act as if it would be executed as fast as on the fastest architecture + double bestTime = AUTOHETEROPRIO_EXTREMELY_LONG_TIME; + + unsigned a; + for(a=0;aautoheteroprio_time_estimation_policy == 0 || hp->autoheteroprio_time_estimation_policy == 1); + return 0.; // to get rid of warning + } +} + +static double get_autoheteroprio_prio_proportion(struct _starpu_heteroprio_data *hp, unsigned priority) +{ + if(hp->prio_overall_proportion_count[priority] > 0) + { + return hp->prio_overall_proportion[priority]; + } + + // no prio of this type has ever been recorded + return 0; +} + +// get normalized time (no unit, with average best arch executes tasks in 1.0) +static double get_autoheteroprio_normalized_time(struct _starpu_heteroprio_data *hp, unsigned priority, unsigned arch) +{ + double sum = 0.f; + + unsigned p; + for(p=0;pfound_codelet_names_length;++p) + { + sum += get_autoheteroprio_prio_proportion(hp, p) * get_best_autoheteroprio_estimated_time(hp, p); + } + + if(sum <= 0.f) + { + return 1.0; + } + + return get_autoheteroprio_estimated_time(hp, priority, arch) / sum; +} + +static double get_autoheteroprio_prio_arch_proportion(struct _starpu_heteroprio_data *hp, unsigned priority, unsigned arch) +{ + if(hp->prio_arch_proportion_count[priority] > 0) + { + return hp->prio_arch_proportion[arch][priority]; + } + + // this prio has never been executed on this arch + return 0; +} + +static double get_autoheteroprio_successors_best_time_sum(struct _starpu_heteroprio_data *hp, unsigned priority) +{ + if(hp->prio_average_successors_best_time_sum_count[priority] > 0) + { + return hp->prio_average_successors_best_time_sum[priority]; + } + + return AUTOHETEROPRIO_FAIR_TIME; +} + +// best execution time of a prio +//static double get_autoheteroprio_best_time(struct _starpu_heteroprio_data *hp, unsigned priority) +//{ +// if(hp->prio_average_best_count[priority] > 0) +// { +// return hp->prio_average_best[priority]; +// } +// +// return AUTOHETEROPRIO_FAIR_TIME; +//} + +static double get_autoheteroprio_NOD(struct _starpu_heteroprio_data *hp, unsigned priority) +{ + if(hp->prio_average_NOD_count[priority] > 0) + { + return hp->prio_average_NOD[priority]; + } + + return 1.0f; +} + +static double get_autoheteroprio_URT(struct _starpu_heteroprio_data *hp, unsigned arch, unsigned priority) +{ + if(hp->prio_average_URT_count[priority] > 0) + { + return hp->prio_average_URT[arch][priority]; + } + + return AUTOHETEROPRIO_FAIR_TIME; +} + +static double reLU(double x) +{ + if(x<0.0f) + { + return 0.0f; + } + return x; +} + +static double rpg(double x) +{ + if(x > 1.0f) + { + return 1.0f; + } + return sqrt(x)*sqrt(2.0f-x); +} + +struct prio_score +{ + unsigned index; + double score; +}; + +static int compare_prio_scores(const void* elem1, const void* elem2) +{ + if(((const struct prio_score*)elem1)->score > ((const struct prio_score*)elem2)->score) + return -1; + return ((const struct prio_score*)elem1)->score < ((const struct prio_score*)elem2)->score; +} + +static void order_priorities(struct _starpu_heteroprio_data *hp) +{ + STARPU_ASSERT(use_auto_mode); + STARPU_ASSERT(hp->use_auto_calibration); // priorities should only be changed during execution if in auto calibration mode + + struct prio_score prio_arch[STARPU_NB_TYPES][HETEROPRIO_MAX_PRIO]; + unsigned prio_arch_index[STARPU_NB_TYPES] = {0}; + + // lock the global policy mutex + _starpu_worker_relax_on(); + STARPU_PTHREAD_MUTEX_LOCK(&hp->auto_calibration_mutex); + starpu_worker_relax_off(); + + unsigned p, a; + for(p=0;pfound_codelet_names_length;++p) + { + int worst_arch = -1; + double worstTime = -1.0f; + int second_worst_arch = -1; + double secondWorstTime = -1.0f; + + // Find the worst architecture and the second worst if there is one + for(a = 0; a < STARPU_NB_TYPES; ++a) + { + if((hp->buckets[p].valid_archs & starpu_heteroprio_types_to_arch(a)) == 0) + continue; + + const double arch_time = get_autoheteroprio_normalized_time(hp, p, a); + if(worstTime < arch_time) + { + second_worst_arch = worst_arch; + secondWorstTime = worstTime; + + worst_arch = a; + worstTime = arch_time; + } + else if(secondWorstTime < arch_time) + { + second_worst_arch = a; + secondWorstTime = arch_time; + } + } + + // Ensure that there is at least one arch that can execute priority + STARPU_ASSERT(worst_arch != -1); + + const double worstArchTaskProportion = get_autoheteroprio_prio_arch_proportion(hp, p, worst_arch); + const double URT_worst = get_autoheteroprio_URT(hp, worst_arch, p); + + double secondWorstArchTaskProportion, URT_secondWorst; + if(second_worst_arch == -1) + { + // If there's no second worst set values to worst possible values + secondWorstTime = AUTOHETEROPRIO_EXTREMELY_LONG_TIME; + secondWorstArchTaskProportion = 0.f; + URT_secondWorst = 0.f; + } + else + { + secondWorstTime = get_autoheteroprio_normalized_time(hp, p, second_worst_arch); + secondWorstArchTaskProportion = get_autoheteroprio_prio_arch_proportion(hp, p, second_worst_arch); + URT_secondWorst = get_autoheteroprio_URT(hp, second_worst_arch, p); + } + + // Compute scores + for(a=0;abuckets[p].valid_archs & starpu_heteroprio_types_to_arch(a)) + { + double otherTime, otherArchTaskProportion, URT_other; + unsigned prio = prio_arch_index[a]++; + + if(a == (unsigned) worst_arch) + { + // Compare the worst architecture to the second worst + otherTime = secondWorstTime; + URT_other = URT_secondWorst; + otherArchTaskProportion = secondWorstArchTaskProportion; + } + else + { + // Compare to the worst architecture + otherTime = worstTime; + URT_other = URT_worst; + otherArchTaskProportion = worstArchTaskProportion; + } + + const double need_other = 1.0f - otherArchTaskProportion; + + double NOD = get_autoheteroprio_NOD(hp, p); + double sum = get_autoheteroprio_successors_best_time_sum(hp, p); + + double ownTime = get_autoheteroprio_normalized_time(hp, p, a); + double archDiff = otherTime - ownTime; + double archRelDiff = otherTime/ownTime; + + double ownArchTaskProportion = get_autoheteroprio_prio_arch_proportion(hp, p, a); + + double URT_own = get_autoheteroprio_URT(hp, a, p); + + double need_own = 1.0f - get_autoheteroprio_arch_busy_proportion(hp, a); + double archNeedDiff = need_own-need_other; + + double URT = (URT_own*need_own + URT_other*need_other); + + prio_arch[a][prio].index = p; + + if(hp->autoheteroprio_priority_ordering_policy == STARPU_HETEROPRIO_NOD_TIME_COMBINATION) + { + double relDiff = archRelDiff>1.0f?archRelDiff:1.0/archRelDiff; + double multiplier = exp(-hp->NTexpVal*(relDiff-1)*(relDiff-1)); + prio_arch[a][prio].score = archDiff + hp->NTnodPond*multiplier*NOD; + } + else if(hp->autoheteroprio_priority_ordering_policy == STARPU_HETEROPRIO_BEST_NODS_SCORE || hp->autoheteroprio_priority_ordering_policy == STARPU_HETEROPRIO_BEST_NODS) + { // TODO, implement BEST_NODS + double multiplier = exp(-hp->BNexpVal*(archDiff)*(archDiff)); + if(archDiff > 0.0f) + { // my arch is faster + multiplier = 1.0f; + } + multiplier = 2.0f*multiplier - 1.0f; // bad diff becomes -1, good or equal diff 1 + + prio_arch[a][prio].score = multiplier*NOD; + } + else if(hp->autoheteroprio_priority_ordering_policy == STARPU_HETEROPRIO_URT_PURE) + { + prio_arch[a][prio].score = URT; + } + else if(hp->autoheteroprio_priority_ordering_policy == STARPU_HETEROPRIO_URT) + { + prio_arch[a][prio].score = hp->URTurt * URT + archDiff; + } + else if(hp->autoheteroprio_priority_ordering_policy == STARPU_HETEROPRIO_URT_2) + { + prio_arch[a][prio].score = hp->URT2urt * URT + archDiff + hp->URT2prop * reLU(ownArchTaskProportion*otherArchTaskProportion*archNeedDiff); + } + else if(hp->autoheteroprio_priority_ordering_policy == STARPU_HETEROPRIO_URT_DOT_DIFF_PURE) + { + prio_arch[a][prio].score = URT*archDiff; + } + else if(hp->autoheteroprio_priority_ordering_policy == STARPU_HETEROPRIO_URT_DOT_DIFF_PURE_2) + { + prio_arch[a][prio].score = (1.0f + URT)*archDiff; + } + else if(hp->autoheteroprio_priority_ordering_policy == STARPU_HETEROPRIO_URT_DOT_REL_DIFF_PURE) + { + prio_arch[a][prio].score = URT*archRelDiff; + } + else if(hp->autoheteroprio_priority_ordering_policy == STARPU_HETEROPRIO_URT_DOT_REL_DIFF_PURE_2) + { + prio_arch[a][prio].score = (1.0f + URT)*archRelDiff; + } + else if(hp->autoheteroprio_priority_ordering_policy == STARPU_HETEROPRIO_URT_DOT_DIFF_2) + { + prio_arch[a][prio].score = (1.0f + URT)*archDiff + hp->and2pond * ownTime * archNeedDiff; + } + else if(hp->autoheteroprio_priority_ordering_policy == STARPU_HETEROPRIO_URT_DOT_DIFF_3) + { + prio_arch[a][prio].score = (1.0f + URT)*archDiff + hp->and3pond * ownTime * reLU(archNeedDiff); + } + else if(hp->autoheteroprio_priority_ordering_policy == STARPU_HETEROPRIO_URT_DOT_DIFF_4) + { + prio_arch[a][prio].score = (1.0f + URT)*archDiff - hp->and4pond * ownTime * reLU(-archNeedDiff); + } + else if(hp->autoheteroprio_priority_ordering_policy == STARPU_HETEROPRIO_URT_DOT_DIFF_5) + { + prio_arch[a][prio].score = (hp->and5xoffset + URT) * (hp->and5yoffset + archDiff); + } + else if(hp->autoheteroprio_priority_ordering_policy == STARPU_HETEROPRIO_URT_DOT_DIFF_6) + { + prio_arch[a][prio].score = (1.0f + URT)*log1p(exp(archDiff)); + } + else if(hp->autoheteroprio_priority_ordering_policy == STARPU_HETEROPRIO_URT_DOT_DIFF_7) + { + prio_arch[a][prio].score = rpg(URT)*(1+URT)*(1+archDiff)+(1-rpg(URT))*(-log1p(exp(-archDiff))); + } + else if(hp->autoheteroprio_priority_ordering_policy == STARPU_HETEROPRIO_URT_DOT_DIFF_8) + { + prio_arch[a][prio].score = (1/(1+exp(-URT))-0.5)*(1+URT)*(1+archDiff)+(1/(1+exp(-1/URT))-0.5)*(-exp(-archDiff)); + } + else if(hp->autoheteroprio_priority_ordering_policy == STARPU_HETEROPRIO_URT_DOT_DIFF_9) + { + prio_arch[a][prio].score = log(hp->and9xoffset+URT)*atan(archDiff+hp->and9yoffset*URT); + } + else if(hp->autoheteroprio_priority_ordering_policy == STARPU_HETEROPRIO_URT_DOT_DIFF_10) + { + prio_arch[a][prio].score = (hp->and10xoffset+URT)*atan(archDiff) + hp->and10yoffset*URT; + } + else if(hp->autoheteroprio_priority_ordering_policy == STARPU_HETEROPRIO_URT_DOT_DIFF_11) + { + prio_arch[a][prio].score = (hp->and11xoffset+URT)*(archDiff+hp->and11yoffset*URT); + } + else if(hp->autoheteroprio_priority_ordering_policy == STARPU_HETEROPRIO_URTS_PER_SECONDS) + { + prio_arch[a][prio].score = URT / ownTime; + } + else if(hp->autoheteroprio_priority_ordering_policy == STARPU_HETEROPRIO_URTS_PER_SECONDS_2) + { + prio_arch[a][prio].score = (URT + archDiff) / ownTime; + } + else if(hp->autoheteroprio_priority_ordering_policy == STARPU_HETEROPRIO_URTS_PER_SECONDS_DIFF) + { + prio_arch[a][prio].score = URT / ownTime + archDiff; + } + else if(hp->autoheteroprio_priority_ordering_policy == STARPU_HETEROPRIO_URTS_TIME_RELEASED_DIFF) + { + prio_arch[a][prio].score = URT*(sum+archDiff)/ownTime; + } + else if(hp->autoheteroprio_priority_ordering_policy == STARPU_HETEROPRIO_URTS_TIME_COMBINATION) + { + double relDiff = archRelDiff>1.0f?archRelDiff:1.0/archRelDiff; + double multiplier = exp(-hp->ANTexpVal*(relDiff-1)*(relDiff-1)); + prio_arch[a][prio].score = archDiff + hp->ANTnodPond*multiplier*URT; + } + else if(hp->autoheteroprio_priority_ordering_policy == STARPU_HETEROPRIO_NODS_PER_SECOND) + { + prio_arch[a][prio].score = NOD/ownTime; + } + else if(hp->autoheteroprio_priority_ordering_policy == STARPU_HETEROPRIO_NODS_TIME_RELEASED) + { + prio_arch[a][prio].score = NOD*sum/ownTime; + } + else if(hp->autoheteroprio_priority_ordering_policy == STARPU_HETEROPRIO_NODS_TIME_RELEASED_DIFF) + { + prio_arch[a][prio].score = NOD*(sum+archDiff)/ownTime; + } + else + { + _STARPU_MSG("[AUTOHETEROPRIO] Warning: unknown ordering policy.\n"); + prio_arch[a][prio].score = 0; + } + + if(!hp->freeze_data_gathering && hp->prio_average_time_arch_count[a][p] < AUTOHETEROPRIO_RELEVANT_SAMPLE_SIZE) + { + // if we dont have enough data on execution time, we push execution on it by increasing the score + prio_arch[a][prio].score += 99999999.; + } + } + } + } + + for(a=0;afound_codelet_names_on_arch[a], sizeof(struct prio_score), compare_prio_scores); + } + + starpu_heteroprio_clear_mapping_hp(hp); + + for(a=0;afound_codelet_names_on_arch[a];++p) + { + starpu_heteroprio_set_mapping_hp(hp, a, p, prio_arch[a][p].index); + } + } + +/* // uncomment to print task names ordered by priority (TODO : use environment variable) + printf("priorities sorted:\n"); + printf("CPU:\n"); + for(p=0;pfound_codelet_names_on_arch[STARPU_CPU_WORKER];++p) + { + printf("%d : %s bucket=%d (score = %f)\n", p, hp->found_codelet_names[prio_arch[STARPU_CPU_WORKER][p].index], prio_arch[STARPU_CPU_WORKER][p].index, prio_arch[STARPU_CPU_WORKER][p].score); + } + printf("GPU:\n"); + for(p=0;pfound_codelet_names_on_arch[STARPU_CUDA_WORKER];++p) + { + printf("%d : %s bucket=%d (score = %f)\n", p, hp->found_codelet_names[prio_arch[STARPU_CUDA_WORKER][p].index], prio_arch[STARPU_CUDA_WORKER][p].index, prio_arch[STARPU_CUDA_WORKER][p].score); + } +*/ + + + STARPU_PTHREAD_MUTEX_UNLOCK(&hp->auto_calibration_mutex); +} + +// used to get the name of a codelet, considering a codelet grouping strategy +static const char *_heteroprio_get_codelet_name(enum autoheteroprio_codelet_grouping_strategy strategy, struct starpu_codelet *cl) +{ + const char *name = NULL; + switch(strategy) + { + case BY_PERF_MODEL_OR_NAME: + name = _starpu_codelet_get_model_name(cl); + break; + + case BY_NAME_ONLY: + name = _starpu_codelet_get_name(cl); + break; + } + + return name ? name : AUTOHETEROPRIO_NO_NAME; +} + +// used by get_task_auto_priority for knowing if a submitted codelet equals an other +static int are_same_codelets(struct _starpu_heteroprio_data *hp, const struct starpu_task *task, const char name[CODELET_MAX_NAME_LENGTH], unsigned valid_archs) +{ + unsigned task_valid_archs = task->where >= 0 ? (unsigned) task->where : task->cl->where; + + if(task_valid_archs != valid_archs) + { + // are not same codelet, because different architectures + return 0; + } + + const char *task_name = _heteroprio_get_codelet_name(hp->codelet_grouping_strategy, task->cl); + + return strncmp(name, task_name, CODELET_MAX_NAME_LENGTH) == 0; +} + +static int get_task_auto_priority(struct _starpu_heteroprio_data *hp, const struct starpu_task *task) +{ + STARPU_ASSERT(use_auto_mode); + STARPU_ASSERT(hp->use_auto_calibration); + STARPU_ASSERT(hp->found_codelet_names_length <= HETEROPRIO_MAX_PRIO); + + if(task->cl->where == STARPU_NOWHERE) + { + return -1; + } + + const char *name = _heteroprio_get_codelet_name(hp->codelet_grouping_strategy, task->cl); + + starpu_worker_relax_on(); + STARPU_PTHREAD_MUTEX_LOCK(&hp->auto_calibration_mutex); + starpu_worker_relax_off(); + + unsigned current_priority; + for(current_priority = 0;current_priorityfound_codelet_names_length;++current_priority) + { + if(are_same_codelets(hp, task, &hp->found_codelet_names[current_priority][0], hp->buckets[current_priority].valid_archs)) + { + STARPU_PTHREAD_MUTEX_UNLOCK(&hp->auto_calibration_mutex); + return current_priority; + } + } + + // codelet's name does not exist in found_codelet_names, add it + + STARPU_ASSERT(hp->found_codelet_names_length < HETEROPRIO_MAX_PRIO); + + const unsigned found_codelet_names_length = hp->found_codelet_names_length; + + if(!task->cl->model) + { // The codelet does not have a perf model + _STARPU_DISP("[HETEROPRIO] Warning: codelet %s does not have a perfmodel. This may negatively impact heteroprio's auto prioritizing.\n", name); + } + + unsigned archs[STARPU_NB_TYPES] = {0}; + + unsigned arch; + for(arch=0;archauto_calibration_mutex); + + return found_codelet_names_length; +} + +// checks that auto-heteroprio arrays are correctly set (for debugging purposes) +//static void check_auto_heteroprio_mapping(struct _starpu_heteroprio_data *hp) +//{ +// // may be useful +// (void) hp; +//} + +static double get_job_NOD(struct _starpu_heteroprio_data *hp, struct _starpu_job *job) +{ + STARPU_ASSERT(!hp->freeze_data_gathering); + STARPU_ASSERT(_starpu_graph_record == 1); + + double NOD = 0.f; + + //STARPU_PTHREAD_MUTEX_LOCK(&job->sync_mutex); + + /*if(!job->tag) + { + STARPU_PTHREAD_MUTEX_UNLOCK(&job->sync_mutex); + return 0; + }*/ + + //STARPU_PTHREAD_MUTEX_UNLOCK(&job->sync_mutex); + + _starpu_graph_wrlock(); + + struct _starpu_graph_node *node = job->graph_node; + + if(!node) + { + // No information because the graph isn't available + _starpu_graph_wrunlock(); + return 0.f; + } + + unsigned n; + for(n=0;nn_outgoing;++n) + { + struct _starpu_graph_node *successor = node->outgoing[n]; // there is a node->outgoing_slot, but this ordering array does not seem useful here + if(successor) + { // successor may be NULL + NOD += 1.f/(double)successor->n_incoming; + } + } + + _starpu_graph_wrunlock(); + + return NOD; +} + +// get job's NRT (Normalized Released Time) +static double get_job_NRT(struct _starpu_heteroprio_data *hp, struct _starpu_job *job, unsigned arch) +{ + STARPU_ASSERT(!hp->freeze_data_gathering); + STARPU_ASSERT(_starpu_graph_record == 1); + + double NOD = 0.f; + + //STARPU_PTHREAD_MUTEX_LOCK(&job->sync_mutex); + + /*if(!job->tag) + { + STARPU_PTHREAD_MUTEX_UNLOCK(&job->sync_mutex); + return 0; + }*/ + + //STARPU_PTHREAD_MUTEX_UNLOCK(&job->sync_mutex); + + _starpu_graph_wrlock(); + + struct _starpu_graph_node *node = job->graph_node; + + if(!node) + { + // No information because the graph isn't available + _starpu_graph_wrunlock(); + return 0.f; + } + + unsigned n; + for(n=0;nn_outgoing;++n) + { + struct _starpu_graph_node *successor = node->outgoing[n]; // there is a node->outgoing_slot, but this ordering array does not seem useful here + if(successor) + { + // successor may be NULL + struct _starpu_job *successor_job = successor->job; + STARPU_PTHREAD_MUTEX_LOCK(&successor_job->sync_mutex); + const struct starpu_task *successor_task = successor_job->task; + STARPU_PTHREAD_MUTEX_UNLOCK(&successor_job->sync_mutex); + + if(successor_task->cl) + { + // if a codelet is associated to the task, we can count it in the NOD + int successor_prio = get_task_auto_priority(hp, successor_task); + double successor_arch_time; + if(successor_prio == -1) + { + successor_arch_time = 0.f; + } + else + { + successor_arch_time = get_autoheteroprio_prio_arch_proportion(hp, successor_prio, arch) * get_autoheteroprio_normalized_time(hp, successor_prio, arch); + } + NOD += successor_arch_time/(double)successor->n_incoming; + } + } + } + + _starpu_graph_wrunlock(); + + return NOD; +} + + + + +static void register_arch_times(struct _starpu_heteroprio_data *hp, unsigned arch, double busy_time, double free_time) +{ + STARPU_ASSERT(!hp->freeze_data_gathering); + + double summed_busy_time = hp->average_arch_busy_time[arch] + busy_time; + double summed_free_time = hp->average_arch_free_time[arch] + free_time; + + double max_time = STARPU_MAX(summed_busy_time, summed_free_time); + double scale_to_apply = 1.0f; + + if(max_time > AUTOHETEROPRIO_MAX_WORKER_PROFILING_TIME) + { + scale_to_apply = AUTOHETEROPRIO_MAX_WORKER_PROFILING_TIME/max_time; + } + + hp->average_arch_busy_time[arch] = summed_busy_time*scale_to_apply; + hp->average_arch_free_time[arch] = summed_free_time*scale_to_apply; +} + +// gets the lowest expected time between each architectures +static double get_best_autoheteroprio_estimated_time(struct _starpu_heteroprio_data *hp, unsigned priority) +{ + double time = 999999999999999.f; + + unsigned arch; + for(arch=0;archtask); + + double time; + + if(task_priority == -1) + { + time = AUTOHETEROPRIO_DEFAULT_TASK_TIME; + } + else + { + time = get_best_autoheteroprio_estimated_time(hp, task_priority); + } + + return time; +} + +static double get_job_successors_best_time_sum(struct _starpu_heteroprio_data *hp, struct _starpu_job *job) +{ + STARPU_ASSERT(!hp->freeze_data_gathering); + STARPU_ASSERT(_starpu_graph_record == 1); + + double sum = 0.f; + + _starpu_graph_wrlock(); + + struct _starpu_graph_node *node = job->graph_node; + + if(!node) + { + // No information because the graph isn't available + _starpu_graph_wrunlock(); + return 0.f; + } + + unsigned n; + for(n=0;nn_outgoing;++n) + { + struct _starpu_graph_node *successor = node->outgoing[n]; // there is a node->outgoing_slot, but this ordering array does not seem useful here + if(successor && successor->job && successor->job->task->cl) + { + // successor may be NULL + sum += get_job_best_time(hp, successor->job); + } + } + + _starpu_graph_wrunlock(); + + return sum; +} + +static void add_NOD_to_data(struct _starpu_heteroprio_data *hp, unsigned task_priority, double NOD) +{ + STARPU_ASSERT(!hp->freeze_data_gathering); + + if(hp->prio_average_NOD_count[task_priority] < AUTOHETEROPRIO_RELEVANT_TASK_LIFE) + { + ++hp->prio_average_NOD_count[task_priority]; + } + + const unsigned count = hp->prio_average_NOD_count[task_priority]; + + hp->prio_average_NOD[task_priority] = hp->prio_average_NOD[task_priority] * (double)(count - 1) / (double)count + + NOD / (double)count; +} + +static void add_URTs_to_data(struct _starpu_heteroprio_data *hp, unsigned task_priority, double archs_URTs[STARPU_NARCH]) +{ + STARPU_ASSERT(!hp->freeze_data_gathering); + + if(hp->prio_average_URT_count[task_priority] < AUTOHETEROPRIO_RELEVANT_TASK_LIFE) + { + ++hp->prio_average_URT_count[task_priority]; + } + + const unsigned count = hp->prio_average_URT_count[task_priority]; + + unsigned arch; + for(arch=0;archprio_average_URT[arch][task_priority] = hp->prio_average_URT[arch][task_priority] * (double)(count - 1) / (double)count + + archs_URTs[arch] / (double)count; + } +} + +static void register_execution_time(struct _starpu_heteroprio_data *hp, unsigned arch, unsigned task_priority, double time) +{ + STARPU_ASSERT(!hp->freeze_data_gathering); + + if(hp->prio_average_time_arch_count[arch][task_priority] < AUTOHETEROPRIO_RELEVANT_TASK_LIFE) + { + ++hp->prio_average_time_arch_count[arch][task_priority]; + } + + const unsigned count = hp->prio_average_time_arch_count[arch][task_priority]; + + hp->prio_average_time_arch[arch][task_priority] = hp->prio_average_time_arch[arch][task_priority] * (double)(count - 1) / (double)count + + time / (double)count; + hp->prio_arch_has_time_info[arch][task_priority] = 1; +} + +static inline unsigned get_total_submitted_task_num(struct _starpu_heteroprio_data *hp) +{ + unsigned total = 0; + + unsigned idx_prio; + for(idx_prio = 0; idx_prio < HETEROPRIO_MAX_PRIO; ++idx_prio) + { + total += hp->prio_overall_proportion_count[idx_prio]; + } + + return total; +} + +static inline double get_sum_task_proportions(struct _starpu_heteroprio_data *hp) +{ + double total = 0.f; + + unsigned idx_prio; + for(idx_prio = 0; idx_prio < HETEROPRIO_MAX_PRIO; ++idx_prio) + { + total += hp->prio_overall_proportion[idx_prio]; + } + + return total; +} + +// noralizes tasks proportions so that their sum equals 1 +static inline void normalize_task_proportions(struct _starpu_heteroprio_data *hp) +{ + const double total_task_proportions = get_sum_task_proportions(hp); + + STARPU_ASSERT(total_task_proportions > 0); + + unsigned idx_prio; + for(idx_prio = 0; idx_prio < HETEROPRIO_MAX_PRIO; ++idx_prio) + { + hp->prio_overall_proportion[idx_prio] /= total_task_proportions; + } +} + +static void add_submitted_task_to_data(struct _starpu_heteroprio_data *hp, unsigned task_priority) +{ + STARPU_ASSERT(!hp->freeze_data_gathering); + + if(hp->prio_overall_proportion_count[task_priority] < AUTOHETEROPRIO_RELEVANT_TASK_LIFE) + { + ++hp->prio_overall_proportion_count[task_priority]; + } + + const unsigned count = get_total_submitted_task_num(hp); + + STARPU_ASSERT(count > 0); + + hp->prio_overall_proportion[task_priority] += 1.f/(double)count; + + // take back task proportions to a valid value (sum = 1) + normalize_task_proportions(hp); +} + +// gets the sum of a task's architecture proportions +static inline double get_sum_task_arch_proportions(struct _starpu_heteroprio_data *hp, unsigned task_priority) +{ + double total = 0.f; + unsigned arch; + + for(arch=0;archprio_arch_proportion[arch][task_priority]; + } + + return total; +} + +// noralizes tasks execution proportions so that the sum of proportions of a task on each arch equals 1 +// EXAMPLE : task A : %CPU = 0.75, %GPU = 0.25 +static inline void normalize_task_arch_proportions(struct _starpu_heteroprio_data *hp, unsigned task_priority) +{ + const double total_task_proportions = get_sum_task_arch_proportions(hp, task_priority); + + STARPU_ASSERT(total_task_proportions > 0); + + unsigned arch; + for(arch=0;archprio_arch_proportion[arch][task_priority] /= total_task_proportions; + } +} + +static void register_task_arch_execution(struct _starpu_heteroprio_data *hp, unsigned task_priority, unsigned arch) +{ + STARPU_ASSERT(!hp->freeze_data_gathering); + + if(hp->prio_arch_proportion_count[task_priority] < AUTOHETEROPRIO_RELEVANT_TASK_LIFE) + { + ++hp->prio_arch_proportion_count[task_priority]; + } + + unsigned count = hp->prio_arch_proportion_count[task_priority]; + STARPU_ASSERT(count > 0); + if(count >= 2) + { + // to have correct proportions and not divide by zero + count -=1; + } + + hp->prio_arch_proportion[arch][task_priority] += 1.f/(double)count; + + // take back task proportions to a valid value (sum = 1) + normalize_task_arch_proportions(hp, task_priority); +} + + +static void add_successors_best_time_sum_to_data(struct _starpu_heteroprio_data *hp, unsigned task_priority, double sum) +{ + STARPU_ASSERT(!hp->freeze_data_gathering); + + if(hp->prio_average_successors_best_time_sum_count[task_priority] < AUTOHETEROPRIO_RELEVANT_TASK_LIFE) + { + ++hp->prio_average_successors_best_time_sum_count[task_priority]; + } + + const unsigned count = hp->prio_average_successors_best_time_sum_count[task_priority]; + + hp->prio_average_successors_best_time_sum[task_priority] = hp->prio_average_successors_best_time_sum[task_priority] * (double)(count - 1) / (double)count + + sum / (double)count; +} + +static void add_best_time_to_data(struct _starpu_heteroprio_data *hp, unsigned task_priority, double sum) +{ + STARPU_ASSERT(!hp->freeze_data_gathering); + + if(hp->prio_average_best_count[task_priority] < AUTOHETEROPRIO_RELEVANT_TASK_LIFE) + { + ++hp->prio_average_best_count[task_priority]; + } + + const unsigned count = hp->prio_average_best_count[task_priority]; + + hp->prio_average_best[task_priority] = hp->prio_average_best[task_priority] * (double)(count - 1) / (double)count + + sum / (double)count; +} + +static void autoheteroprio_update_slowdown_data(struct _starpu_heteroprio_data *hp) +{ + unsigned p, arch; + for(p=0;pfound_codelet_names_length;++p) + { + unsigned valid_archs[STARPU_NB_TYPES] = {0}; + double arch_times[STARPU_NB_TYPES] = {0.f}; + + for(arch = 0; arch < STARPU_NB_TYPES; ++arch) + { + valid_archs[arch] = arch_can_execute_prio(hp, arch, p); + if(valid_archs[arch]) + { + double arch_time = get_autoheteroprio_estimated_time(hp, p, arch); + STARPU_ASSERT(arch_time > 0.f); + arch_times[arch] = arch_time; + } + } + + // Assert that at least one architecture can execute priority + for(arch = 0; arch < STARPU_NB_TYPES && !valid_archs[arch]; ++arch) + ; + STARPU_ASSERT(arch < STARPU_NB_TYPES); + + arch = 0; + while(!valid_archs[arch]) + ++arch; + unsigned fastest_arch = arch; + double best_time = arch_times[arch]; + + ++arch; + for(; arch < STARPU_NB_TYPES; ++arch) + { + if(valid_archs[arch] && arch_times[arch] < best_time) + { + fastest_arch = arch; + best_time = arch_times[arch]; + } + } + + starpu_heteroprio_set_faster_arch_hp(hp, fastest_arch, p); + + for(arch = 0; arch < STARPU_NB_TYPES; ++arch) + { + if(valid_archs[arch] && arch != fastest_arch) + starpu_heteroprio_set_arch_slow_factor_hp(hp, arch, p, arch_times[arch]/best_time); + } + } + + check_heteroprio_mapping(hp); +} + +/* Push a new task (simply store it and update counters) */ +static int push_task_heteroprio_policy(struct starpu_task *task) +{ + unsigned sched_ctx_id = task->sched_ctx; + struct _starpu_heteroprio_data *hp = (struct _starpu_heteroprio_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); + + unsigned computed_best_mem_node = 0; + unsigned best_node_now[PUSH_NB_AUTO] = {0}; + + if(hp->use_locality) + { +#ifdef LAHETEROPRIO_PRINT_STAT + lastats.push_to_use[starpu_worker_get_id()+1][hp->pushStrategyToUse] += 1; +#endif + //unsigned best_node_now[PUSH_NB_AUTO] = {0}; + if(hp->pushStrategySet == PUSH_AUTO) + { + unsigned idx_strategy; + for(idx_strategy = 0 ; idx_strategy < PUSH_NB_AUTO ; ++idx_strategy) + { + best_node_now[idx_strategy] = get_best_mem_node(task, hp, idx_strategy); + } + } + computed_best_mem_node = (hp->pushStrategySet == PUSH_AUTO && hp->pushStrategyToUse < PUSH_NB_AUTO ? + best_node_now[hp->pushStrategyToUse] + : get_best_mem_node(task, hp, hp->pushStrategyToUse)); + + STARPU_ASSERT_MSG(hp->map_wgroup_has_been_called, "starpu_laheteroprio_map_wgroup_memory_nodes \ +has not been called while you are using the heteroprio in LA mode. To fix this, you can either turn LA mode off by setting \ +the HETEROPRIO_USE_LA variable to 0, or calling starpu_laheteroprio_map_wgroup_memory_nodes after starpu_laheteroprio_set_nb_prios.\n"); + } + + + const unsigned best_mem_node = computed_best_mem_node; + + /* One worker at a time uses heteroprio */ + starpu_worker_relax_on(); + STARPU_PTHREAD_MUTEX_LOCK(&hp->policy_mutex); + starpu_worker_relax_off(); + + /* Get tasks priority (ID) */ + int task_priority; + if(hp->use_auto_calibration) + { + task_priority = get_task_auto_priority(hp, task); + + if(!hp->freeze_data_gathering && hp->priority_last_ordering >= hp->priority_ordering_interval) + { + hp->priority_last_ordering = 0; + } + + if(hp->priority_last_ordering == 0) + { + // first pushed task OR at least "priority_ordering_interval" tasks have been pushed + order_priorities(hp); + if(hp->autoheteroprio_print_prio_after_ordering) + { + print_priorities(hp); + } + autoheteroprio_update_slowdown_data(hp); + } + + ++hp->priority_last_ordering; + + if(!hp->freeze_data_gathering) + { + struct _starpu_job *job = _starpu_get_job_associated_to_task(task); + + if(task_priority != -1) + { + // register that the task has been submitted + add_submitted_task_to_data(hp, task_priority); + + double NOD = get_job_NOD(hp, job); + add_NOD_to_data(hp, task_priority, NOD); + + double archs_NRTs[STARPU_NARCH]; + unsigned arch; + for(arch=0;archautoheteroprio_print_data_on_update) + { + unsigned arch; + char is_arch_used[STARPU_NB_TYPES]; + + for(arch = 0; arch < STARPU_NB_TYPES; ++arch) + { + if(hp->average_arch_busy_time[arch] + hp->average_arch_free_time[arch] > 0) + is_arch_used[arch] = 1; + else + is_arch_used[arch] = 0; + } + + fprintf(stderr, "Updated values :\n"); + + fprintf(stderr, "Busy proportion :\n\t"); + for(arch = 0; arch < STARPU_NB_TYPES; ++arch) + { + if(is_arch_used[arch]) + fprintf(stderr, "%s : %f, ", + starpu_worker_get_type_as_string(arch), + get_autoheteroprio_arch_busy_proportion(hp, arch)); + } + fprintf(stderr, "\n"); + + unsigned idx_prio; + + fprintf(stderr, "Assumed values for heuristic computation :\n"); + for(idx_prio = 0; idx_prio < hp->found_codelet_names_length; ++idx_prio) + { + fprintf(stderr, "task %s :\n\tNOD = %f", + &hp->found_codelet_names[idx_prio][0], + get_autoheteroprio_NOD(hp, idx_prio)); + + for(arch = 0; arch < STARPU_NB_TYPES; ++arch) + { + if(is_arch_used[arch]) + fprintf(stderr, ", URT_%s = %f", + starpu_worker_get_type_as_string(arch), + get_autoheteroprio_URT(hp, arch, idx_prio)); + } + + fprintf(stderr, "\n\testimated time : "); + for(arch = 0; arch < STARPU_NB_TYPES; ++arch) + { + if(is_arch_used[arch]) + fprintf(stderr, "%s : %f, ", + starpu_worker_get_type_as_string(arch), + get_autoheteroprio_estimated_time(hp, idx_prio, arch)); + } + + fprintf(stderr, "\n\tnormalized time : "); + for(arch = 0; arch < STARPU_NB_TYPES; ++arch) + { + if(is_arch_used[arch]) + fprintf(stderr, "%s : %f, ", + starpu_worker_get_type_as_string(arch), + get_autoheteroprio_normalized_time(hp, idx_prio, arch)); + } + + fprintf(stderr, "\n\tbestsum=%f, proportion=%f", + get_autoheteroprio_successors_best_time_sum(hp, idx_prio), + get_autoheteroprio_prio_proportion(hp, idx_prio)); + for(arch = 0; arch < STARPU_NB_TYPES; ++arch) + { + if(is_arch_used[arch]) + fprintf(stderr, ", prop%s=%f", + starpu_worker_get_type_as_string(arch), + get_autoheteroprio_prio_arch_proportion(hp, idx_prio, arch)); + } + + fprintf(stderr, "\n"); + } + } + } + } + else + { + task_priority = task->priority; + } + + /* Retrieve the correct bucket */ + STARPU_ASSERT(task_priority >= 0); + STARPU_ASSERT(task_priority < HETEROPRIO_MAX_PRIO); + + struct _heteroprio_bucket* bucket = &hp->buckets[task_priority]; + /* Ensure that any worker that check that list can compute the task */ + STARPU_ASSERT_MSG(bucket->valid_archs, "The bucket %d does not have any archs\n", task_priority); + STARPU_ASSERT(((bucket->valid_archs ^ task->where) & bucket->valid_archs) == 0); + + if(hp->use_locality) + { + /* save the task */ + starpu_task_list_push_front(&bucket->tasks_queue[best_mem_node], task); + if(hp->pushStrategySet == PUSH_AUTO) + { + laqueue_push(&bucket->auto_mn[best_mem_node], best_node_now); + } +#ifdef LAHETEROPRIO_PRINT_STAT + if(starpu_worker_get_id() != -1) + { + lastats.nb_tasks_per_wgroup[best_mem_node][task_priority] += 1; + lastats.nb_tasks_per_worker[starpu_worker_get_id()][task_priority] += 1; + } +#endif // LAHETEROPRIO_PRINT_STAT + bucket->tasks_queue_ntasks += 1; +#ifdef LAHETEROPRIO_PRINT_STAT + lastats.nb_tasks += 1; +#endif // LAHETEROPRIO_PRINT_STAT + + } + else + { + /* save the task */ + starpu_task_list_push_front(&bucket->tasks_queue[0],task); + /* Increase the total number of tasks */ + bucket->tasks_queue_ntasks += 1; + } + + /* Inc counters */ + unsigned arch_index; + for(arch_index = 0; arch_index < STARPU_NB_TYPES; ++arch_index) + { + /* We test the archs on the bucket and not on task->where since it is restrictive */ + if(bucket->valid_archs & starpu_heteroprio_types_to_arch(arch_index)) + { + hp->nb_remaining_tasks_per_arch_index[arch_index] += 1; + } + } + + hp->total_tasks_in_buckets += 1; + + starpu_push_task_end(task); + + /*if there are no tasks_queue block */ + /* wake people waiting for a task */ + struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id); + + struct starpu_sched_ctx_iterator it; +#ifndef STARPU_NON_BLOCKING_DRIVERS + char dowake[STARPU_NMAXWORKERS] = { 0 }; +#endif + + workers->init_iterator_for_parallel_tasks(workers, &it, task); + while(workers->has_next(workers, &it)) + { + unsigned worker = workers->get_next(workers, &it); + +#ifdef STARPU_NON_BLOCKING_DRIVERS + if (!starpu_bitmap_get(&hp->waiters, worker)) + /* This worker is not waiting for a task */ + continue; +#endif + + if (starpu_worker_can_execute_task_first_impl(worker, task, NULL)) + { + /* It can execute this one, tell him! */ +#ifdef STARPU_NON_BLOCKING_DRIVERS + starpu_bitmap_unset(&hp->waiters, worker); + /* We really woke at least somebody, no need to wake somebody else */ + break; +#else + dowake[worker] = 1; +#endif + } + } + /* Let the task free */ + STARPU_PTHREAD_MUTEX_UNLOCK(&hp->policy_mutex); + +#if !defined(STARPU_NON_BLOCKING_DRIVERS) || defined(STARPU_SIMGRID) + /* Now that we have a list of potential workers, try to wake one */ + + workers->init_iterator_for_parallel_tasks(workers, &it, task); + while(workers->has_next(workers, &it)) + { + unsigned worker = workers->get_next(workers, &it); + if (dowake[worker]) + if (starpu_wake_worker_relax_light(worker)) + break; // wake up a single worker + } +#endif + + return 0; +} + +static struct starpu_task *pop_task_heteroprio_policy(unsigned sched_ctx_id) +{ + const unsigned workerid = starpu_worker_get_id_check(); + struct _starpu_heteroprio_data *hp = (struct _starpu_heteroprio_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); + struct _heteroprio_worker_wrapper* worker = &hp->workers_heteroprio[workerid]; + struct starpu_task* task = NULL; + +#ifdef STARPU_NON_BLOCKING_DRIVERS + /* If no tasks available, no tasks in worker queue or some arch worker queue just return NULL */ + if (!STARPU_RUNNING_ON_VALGRIND + && (hp->total_tasks_in_buckets == 0 || hp->nb_remaining_tasks_per_arch_index[worker->arch_index] == 0) + && (hp->use_locality || (worker->tasks_queue.ntasks == 0 && hp->nb_prefetched_tasks_per_arch_index[worker->arch_index] == 0))) + { + return NULL; + } + + if (!STARPU_RUNNING_ON_VALGRIND && starpu_bitmap_get(&hp->waiters, workerid)) + { + /* Nobody woke us, avoid bothering the mutex */ + return NULL; + } +#endif + starpu_worker_relax_on(); + STARPU_PTHREAD_MUTEX_LOCK(&hp->policy_mutex); + starpu_worker_relax_off(); + + // if(hp->use_locality) + // { + // used only with use_locality==1 +#ifdef LAHETEROPRIO_PRINT_STAT + unsigned src_mem_node = (unsigned)-1; +#endif + unsigned best_node_previous[PUSH_NB_AUTO] = {0}; + // } + // else + // { + // used only with use_locality==0 + /* keep track of the new added task to perform real prefetch on node */ + unsigned nb_added_tasks = 0; + // } + + if (hp->use_locality) + { + const unsigned wgroupid = hp->workers_laheteroprio_wgroup_index[workerid]; + + if (hp->nb_remaining_tasks_per_arch_index[worker->arch_index] != 0) + { + const struct starpu_laheteroprio_access_item *wgroup_access_order = hp->wgroup_pop_access_orders[wgroupid]; + const unsigned wgroup_access_order_size = hp->wgroup_pop_access_orders_size[wgroupid]; + + unsigned idx_access_item; + for (idx_access_item = 0; task == NULL && idx_access_item < wgroup_access_order_size; ++idx_access_item) + { + const unsigned current_wgroupid = wgroup_access_order[idx_access_item].wgroup_idx; + /*Retrieve the bucket using the mapping */ + struct _heteroprio_bucket *bucket = &hp->buckets[hp->prio_mapping_per_arch_index[worker->arch_index][wgroup_access_order[idx_access_item].prio_idx]]; + /*Ensure we can compute task from this bucket */ + STARPU_ASSERT(bucket->valid_archs &worker->arch_type); + /*Take one task if possible */ + if (!starpu_task_list_empty(&bucket->tasks_queue[current_wgroupid])) + { + if ((bucket->factor_base_arch_index == 0 || + worker->arch_index == bucket->factor_base_arch_index || + (((float) bucket->tasks_queue_ntasks) / ((float) hp->nb_workers_per_arch_index[bucket->factor_base_arch_index])) >= bucket->slow_factors_per_index[worker->arch_index])) + { + task = starpu_task_list_pop_front(&bucket->tasks_queue[current_wgroupid]); + if(!starpu_worker_can_execute_task(workerid, task, 0)) + { + // Put the task back because worker can't execute it (e.g. codelet.can_execute) + starpu_task_list_push_front(&bucket->tasks_queue[0], task); + break; + } + if (hp->pushStrategySet == PUSH_AUTO) + { + memcpy(best_node_previous, laqueue_pop(&bucket->auto_mn[current_wgroupid]), sizeof(unsigned) *PUSH_NB_AUTO); + } + /*Save the task */ + STARPU_AYU_ADDTOTASKQUEUE(starpu_task_get_job_id(task), workerid); + /*Update general counter */ + hp->total_tasks_in_buckets -= 1; + bucket->tasks_queue_ntasks -= 1; + unsigned arch_index; + for (arch_index = 0; arch_index < STARPU_NB_TYPES; ++arch_index) + { + /*We test the archs on the bucket and not on task->where since it is restrictive */ + if (bucket->valid_archs &starpu_heteroprio_types_to_arch(arch_index)) + { + hp->nb_remaining_tasks_per_arch_index[arch_index] -= 1; + } + } +#ifdef LAHETEROPRIO_PRINT_STAT + if (current_wgroupid != wgroupid) + { + lastats.task_stolen_per_worker[workerid][wgroup_access_order[idx_access_item].prio_idx] += 1; + lastats.task_stolen_in_wgroup[current_wgroupid][wgroup_access_order[idx_access_item].prio_idx] += 1; + } + src_mem_node = current_wgroupid; +#endif + break; + } +#ifdef LAHETEROPRIO_PRINT_STAT + else + { + lastats.task_skipt_due_to_factor_per_worker[workerid][wgroup_access_order[idx_access_item].prio_idx] += 1; + } +#endif + } +#ifdef LAHETEROPRIO_PRINT_STAT + else + { + if (current_wgroupid == wgroupid) + { + lastats.task_list_empty_per_worker[workerid][wgroup_access_order[idx_access_item].prio_idx] += 1; + } + } +#endif + } + } + } + else + { + // !hp->use_locality + /* Check that some tasks are available for the current worker arch */ + if(hp->nb_remaining_tasks_per_arch_index[worker->arch_index] != 0) + { + /* Ideally we would like to fill the prefetch array */ + unsigned nb_tasks_to_prefetch = (STARPU_HETEROPRIO_MAX_PREFETCH-worker->tasks_queue.ntasks); + /* But there are maybe less tasks than that! */ + if(nb_tasks_to_prefetch > hp->nb_remaining_tasks_per_arch_index[worker->arch_index]) + { + nb_tasks_to_prefetch = hp->nb_remaining_tasks_per_arch_index[worker->arch_index]; + } + /* But in case there are less tasks than worker we take the minimum */ + if(hp->nb_remaining_tasks_per_arch_index[worker->arch_index] < starpu_sched_ctx_get_nworkers(sched_ctx_id)) + { + if(worker->tasks_queue.ntasks == 0) + nb_tasks_to_prefetch = 1; + else + nb_tasks_to_prefetch = 0; + } + + unsigned idx_prio, arch_index; + /* We iterate until we found all the tasks we need */ + for(idx_prio = 0; nb_tasks_to_prefetch && idx_prio < hp->nb_prio_per_arch_index[worker->arch_index]; ++idx_prio) + { + /* Retrieve the bucket using the mapping */ + struct _heteroprio_bucket* bucket = &hp->buckets[hp->prio_mapping_per_arch_index[worker->arch_index][idx_prio]]; + /* Ensure we can compute task from this bucket */ + STARPU_ASSERT(bucket->valid_archs & worker->arch_type); + /* Take nb_tasks_to_prefetch tasks if possible */ + while(!starpu_task_list_empty(&bucket->tasks_queue[0]) && nb_tasks_to_prefetch && + (bucket->factor_base_arch_index == 0 || + worker->arch_index == bucket->factor_base_arch_index || + (((float)bucket->tasks_queue_ntasks)/((float)hp->nb_workers_per_arch_index[bucket->factor_base_arch_index])) >= bucket->slow_factors_per_index[worker->arch_index] + )) + { + task = starpu_task_list_pop_front(&bucket->tasks_queue[0]); + if(!starpu_worker_can_execute_task(workerid, task, 0)) + { + // Put the task back because worker can't execute it (e.g. codelet.can_execute) + starpu_task_list_push_front(&bucket->tasks_queue[0], task); + break; + } + /* Save the task */ + STARPU_AYU_ADDTOTASKQUEUE(starpu_task_get_job_id(task), workerid); + starpu_st_prio_deque_push_front_task(&worker->tasks_queue, task); + + /* Update general counter */ + hp->nb_prefetched_tasks_per_arch_index[worker->arch_index] += 1; + hp->total_tasks_in_buckets -= 1; + bucket->tasks_queue_ntasks -= 1; + + for(arch_index = 0; arch_index < STARPU_NB_TYPES; ++arch_index) + { + /* We test the archs on the bucket and not on task->where since it is restrictive */ + if(bucket->valid_archs & starpu_heteroprio_types_to_arch(arch_index)) + { + hp->nb_remaining_tasks_per_arch_index[arch_index] -= 1; + } + } + /* Decrease the number of tasks to found */ + nb_tasks_to_prefetch -= 1; + nb_added_tasks += 1; + // TODO starpu_prefetch_task_input_for(task, workerid); + } + } + } + + task = NULL; + + /* The worker has some tasks in its queue */ + if(worker->tasks_queue.ntasks) + { + task = starpu_st_prio_deque_pop_task_for_worker(&worker->tasks_queue, workerid, NULL); + hp->nb_prefetched_tasks_per_arch_index[worker->arch_index] -= 1; + } + /* Otherwise look if we can steal some work */ + else if(hp->nb_prefetched_tasks_per_arch_index[worker->arch_index]) + { + /* If HETEROPRIO_MAX_PREFETCH==1 it should not be possible to steal work */ + STARPU_ASSERT(STARPU_HETEROPRIO_MAX_PREFETCH != 1); + + struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id); + + struct starpu_sched_ctx_iterator it; + + workers->init_iterator(workers, &it); + unsigned victim; + unsigned current_worker; + + /* Start stealing from just after ourself */ + while(workers->has_next(workers, &it)) + { + current_worker = workers->get_next(workers, &it); + if(current_worker == workerid) + break; + } + + /* circular loop */ + while (1) + { + if (!workers->has_next(workers, &it)) + { + /* End of the list, restart from the beginning */ + workers->init_iterator(workers, &it); + } + while(workers->has_next(workers, &it)) + { + victim = workers->get_next(workers, &it); + /* When getting on ourself again, we're done trying to find work */ + if(victim == workerid) + goto done; + + /* If it is the same arch and there is a task to steal */ + if(hp->workers_heteroprio[victim].arch_index == worker->arch_index + && hp->workers_heteroprio[victim].tasks_queue.ntasks) + { + /* ensure the worker is not currently prefetching its data */ + starpu_worker_lock(victim); + + if(hp->workers_heteroprio[victim].arch_index == worker->arch_index + && hp->workers_heteroprio[victim].tasks_queue.ntasks) + { + /* steal the last added task */ + task = starpu_st_prio_deque_pop_task_for_worker(&hp->workers_heteroprio[victim].tasks_queue, workerid, NULL); + /* we steal a task update global counter */ + hp->nb_prefetched_tasks_per_arch_index[hp->workers_heteroprio[victim].arch_index] -= 1; + + starpu_worker_unlock(victim); + goto done; + } + starpu_worker_unlock(victim); + } + } + } +done: ; + } + } + + if (!task) + { + /* Tell pushers that we are waiting for tasks_queue for us */ + starpu_bitmap_set(&hp->waiters, workerid); + } + STARPU_PTHREAD_MUTEX_UNLOCK(&hp->policy_mutex); + + if(task &&_starpu_get_nsched_ctxs() > 1) + { + starpu_worker_relax_on(); + _starpu_sched_ctx_lock_write(sched_ctx_id); + starpu_worker_relax_off(); + if (_starpu_sched_ctx_worker_is_master_for_child_ctx(sched_ctx_id, workerid, task)) + task = NULL; + _starpu_sched_ctx_unlock_write(sched_ctx_id); + + if(hp->use_locality) + { +#ifdef LAHETEROPRIO_PRINT_STAT + { + const unsigned best_node_now = get_best_mem_node(task, hp, hp->pushStrategyToUse); + if (best_node_now != src_mem_node) + { + lastats.pop_redirect[workerid][src_mem_node] += 1; + } + } +#endif + if (hp->pushStrategySet == PUSH_AUTO) + { + unsigned best_node_now[PUSH_NB_AUTO] = { 0 }; + unsigned idx_strategy; + for (idx_strategy = 0; idx_strategy < PUSH_NB_AUTO; ++idx_strategy) + { + best_node_now[idx_strategy] = get_best_mem_node(task, hp, idx_strategy); + } + STARPU_PTHREAD_MUTEX_LOCK(&hp->push_history_mutex); + unsigned idx_best_strategy = 0; + for (idx_strategy = 0; idx_strategy < PUSH_NB_AUTO; ++idx_strategy) + { + if (best_node_now[idx_strategy] == best_node_previous[idx_strategy]) + { + hp->pushStrategyHistory[idx_strategy] += 1; + } + if (hp->pushStrategyHistory[idx_strategy] >= + hp->pushStrategyHistory[idx_best_strategy]) + { + idx_best_strategy = idx_strategy; + } + } + hp->pushStrategyToUse = idx_best_strategy; + STARPU_PTHREAD_MUTEX_UNLOCK(&hp->push_history_mutex); + } + } + } + + if(!hp->use_locality) + { + /* if we have task (task) me way have some in the queue (worker->tasks_queue_size) that was freshly added (nb_added_tasks) */ + if(task && worker->tasks_queue.ntasks && nb_added_tasks && starpu_get_prefetch_flag()) + { + /* TODO berenger: iterate in the other sense */ + struct starpu_task *task_to_prefetch = NULL; + for (task_to_prefetch = starpu_task_prio_list_begin(&worker->tasks_queue.list); + (task_to_prefetch != starpu_task_prio_list_end(&worker->tasks_queue.list) && + nb_added_tasks && hp->nb_remaining_tasks_per_arch_index[worker->arch_index] != 0); + task_to_prefetch = starpu_task_prio_list_next(&worker->tasks_queue.list, task_to_prefetch)) + { + /* prefetch from closest to end task */ + if (!task_to_prefetch->prefetched) /* FIXME: it seems we are prefetching several times?? */ + { + starpu_prefetch_task_input_for(task_to_prefetch, workerid); + } + nb_added_tasks -= 1; + } + } + } + + return task; +} + +static void pre_exec_hook_heteroprio_policy(struct starpu_task *task, unsigned sched_ctx_id) +{ + (void) task; + const unsigned workerid = starpu_worker_get_id_check(); + struct _starpu_heteroprio_data *hp = (struct _starpu_heteroprio_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); + + if(hp->freeze_data_gathering || !hp->use_auto_calibration) + return; + + starpu_worker_relax_on(); + STARPU_PTHREAD_MUTEX_LOCK(&hp->policy_mutex); + starpu_worker_relax_off(); + + struct timespec tsnow; + _starpu_clock_gettime(&tsnow); + const double now = starpu_timing_timespec_to_us(&tsnow); + + // Register free time between the post and pre hook + hp->current_arch_free_time[starpu_worker_get_type(workerid)] += now - hp->last_hook_exec_time[workerid]; + + STARPU_PTHREAD_MUTEX_UNLOCK(&hp->policy_mutex); + + hp->last_hook_exec_time[workerid] = now; +} + +static void post_exec_hook_heteroprio_policy(struct starpu_task *task, unsigned sched_ctx_id) +{ + const unsigned workerid = starpu_worker_get_id_check(); + struct _starpu_heteroprio_data *hp = (struct _starpu_heteroprio_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); + + if(hp->freeze_data_gathering || !hp->use_auto_calibration) + return; + + struct timespec tsnow; + _starpu_clock_gettime(&tsnow); + const double now = starpu_timing_timespec_to_us(&tsnow); + const double busy_time = now - hp->last_hook_exec_time[workerid]; + + starpu_worker_relax_on(); + STARPU_PTHREAD_MUTEX_LOCK(&hp->policy_mutex); + starpu_worker_relax_off(); + + // Register the busy time between the pre and post hook + hp->current_arch_busy_time[starpu_worker_get_type(workerid)] += busy_time; + + // Register task execution + const int prio = get_task_auto_priority(hp, task); + if(prio != -1) + { + register_task_arch_execution(hp, prio, starpu_worker_get_type(workerid)); + register_execution_time(hp, starpu_worker_get_type(workerid), prio, busy_time); + } + + STARPU_PTHREAD_MUTEX_UNLOCK(&hp->policy_mutex); + + hp->last_hook_exec_time[workerid] = now; +} + +struct starpu_sched_policy _starpu_sched_heteroprio_policy = +{ + .init_sched = initialize_heteroprio_policy, + .deinit_sched = deinitialize_heteroprio_policy, + .add_workers = add_workers_heteroprio_policy, + .remove_workers = remove_workers_heteroprio_policy, + .push_task = push_task_heteroprio_policy, + .simulate_push_task = NULL, + .push_task_notify = NULL, + .pop_task = pop_task_heteroprio_policy, + .pre_exec_hook = pre_exec_hook_heteroprio_policy, + .post_exec_hook = post_exec_hook_heteroprio_policy, + .policy_name = "heteroprio", + .policy_description = "heteroprio", + .worker_type = STARPU_WORKER_LIST, + .prefetches = 1, +}; diff --git a/src/sched_policies/heteroprio.h b/src/sched_policies/heteroprio.h new file mode 100644 index 0000000..386c29e --- /dev/null +++ b/src/sched_policies/heteroprio.h @@ -0,0 +1,44 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2021-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __SCHED_HETEROPRIO_H__ +#define __SCHED_HETEROPRIO_H__ + +#include + +#define CODELET_MAX_NAME_LENGTH 32 +#define HETEROPRIO_MAX_PRIO 100 +#define LAHETEROPRIO_MAX_WORKER_GROUPS 10 + +#define AUTOHETEROPRIO_NO_NAME "NO_NAME" + +// will tend to ignore tasks older than this when measuring values such as NOD, execution time, etc. +// i.e. if there are more than STARPU_AUTOHETEROPRIO_RELEVANT_TASK_LIFE of the same type +#define AUTOHETEROPRIO_RELEVANT_TASK_LIFE 256 + +#define AUTOHETEROPRIO_RELEVANT_SAMPLE_SIZE 16 + +#define AUTOHETEROPRIO_EXTREMELY_LONG_TIME 999999999999999.0 +#define AUTOHETEROPRIO_LONG_TIME 100000000.0 +#define AUTOHETEROPRIO_FAIR_TIME 1000.0 + +#define AUTOHETEROPRIO_DEFAULT_TASK_TIME AUTOHETEROPRIO_FAIR_TIME + +// at the end of the execution, if the sum of all worker profiling times is superior to this, the times will be compressed so that no time exceeds this one +// (probably in us) +#define AUTOHETEROPRIO_MAX_WORKER_PROFILING_TIME 1000000000.0 + +#endif // __SCHED_HETEROPRIO_H__ diff --git a/src/sched_policies/hierarchical_heft.c b/src/sched_policies/hierarchical_heft.c new file mode 100644 index 0000000..3897325 --- /dev/null +++ b/src/sched_policies/hierarchical_heft.c @@ -0,0 +1,90 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Simon Archipoff + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include + +static struct starpu_sched_component_composed_recipe * recipe_for_worker(enum starpu_worker_archtype a STARPU_ATTRIBUTE_UNUSED) +{ + struct starpu_sched_component_composed_recipe * r = starpu_sched_component_composed_recipe_create(); + starpu_sched_component_composed_recipe_add(r, (starpu_sched_component_create_t) starpu_sched_component_best_implementation_create, NULL); + starpu_sched_component_composed_recipe_add(r, (starpu_sched_component_create_t) starpu_sched_component_fifo_create, NULL); + return r; +} + + + + +static void initialize_heft_center_policy(unsigned sched_ctx_id) +{ + struct starpu_sched_component_specs specs; + memset(&specs,0,sizeof(specs)); + + + struct starpu_sched_component_mct_data heft_data = + { + .alpha = 1.0, + .beta = 1.0, + ._gamma = 0.0, + .idle_power = 0.0, + /* + .no_perf_model_component_create = starpu_sched_component_random_create, + .arg_no_perf_model = NULL, + .calibrating_component_create = starpu_sched_component_random_create, + .arg_calibrating_component = NULL, + */ + }; + struct starpu_sched_component_composed_recipe * r = starpu_sched_component_composed_recipe_create(); + /* FIXME: add perfmodel_select component */ + starpu_sched_component_composed_recipe_add(r, (starpu_sched_component_create_t) starpu_sched_component_heft_create,&heft_data); + specs.hwloc_machine_composed_sched_component = r; + + r = starpu_sched_component_composed_recipe_create(); + starpu_sched_component_composed_recipe_add(r, (starpu_sched_component_create_t) starpu_sched_component_best_implementation_create, NULL); + starpu_sched_component_composed_recipe_add(r, (starpu_sched_component_create_t) starpu_sched_component_fifo_create ,NULL); + + specs.hwloc_component_composed_sched_component = r; + specs.worker_composed_sched_component = recipe_for_worker; + + struct starpu_sched_tree *t = starpu_sched_component_make_scheduler(sched_ctx_id, specs); + + starpu_sched_component_composed_recipe_destroy(specs.hwloc_machine_composed_sched_component); + + + starpu_sched_tree_update_workers(t); + starpu_sched_ctx_set_policy_data(sched_ctx_id, (void*)t); +} + + + + + +struct starpu_sched_policy _starpu_sched_tree_heft_hierarchical_policy = +{ + .init_sched = initialize_heft_center_policy, + .deinit_sched = starpu_sched_tree_deinitialize, + .add_workers = starpu_sched_tree_add_workers, + .remove_workers = starpu_sched_tree_remove_workers, + .push_task = starpu_sched_tree_push_task, + .pop_task = starpu_sched_tree_pop_task, + .pre_exec_hook = NULL, + .post_exec_hook = NULL, + .policy_name = "modular-heft-hierarchical", + .policy_description = "hierarchical heft tree policy", + .worker_type = STARPU_WORKER_LIST, + .prefetches = 1, +}; diff --git a/src/sched_policies/modular_eager.c b/src/sched_policies/modular_eager.c new file mode 100644 index 0000000..d224609 --- /dev/null +++ b/src/sched_policies/modular_eager.c @@ -0,0 +1,42 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Simon Archipoff + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include + +static void initialize_eager_center_policy(unsigned sched_ctx_id) +{ + starpu_sched_component_initialize_simple_scheduler((starpu_sched_component_create_t) starpu_sched_component_eager_create, NULL, + STARPU_SCHED_SIMPLE_DECIDE_WORKERS | + STARPU_SCHED_SIMPLE_FIFO_ABOVE | + STARPU_SCHED_SIMPLE_IMPL, sched_ctx_id); +} + +struct starpu_sched_policy _starpu_sched_modular_eager_policy = +{ + .init_sched = initialize_eager_center_policy, + .deinit_sched = starpu_sched_tree_deinitialize, + .add_workers = starpu_sched_tree_add_workers, + .remove_workers = starpu_sched_tree_remove_workers, + .push_task = starpu_sched_tree_push_task, + .pop_task = starpu_sched_tree_pop_task, + .pre_exec_hook = starpu_sched_component_worker_pre_exec_hook, + .post_exec_hook = starpu_sched_component_worker_post_exec_hook, + .policy_name = "modular-eager", + .policy_description = "eager modular policy", + .worker_type = STARPU_WORKER_LIST, +}; diff --git a/src/sched_policies/modular_eager_prefetching.c b/src/sched_policies/modular_eager_prefetching.c new file mode 100644 index 0000000..b4efbf2 --- /dev/null +++ b/src/sched_policies/modular_eager_prefetching.c @@ -0,0 +1,45 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Simon Archipoff + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include + +static void initialize_eager_prefetching_center_policy(unsigned sched_ctx_id) +{ + starpu_sched_component_initialize_simple_scheduler((starpu_sched_component_create_t) starpu_sched_component_eager_create, NULL, + STARPU_SCHED_SIMPLE_DECIDE_WORKERS | + STARPU_SCHED_SIMPLE_FIFO_ABOVE | + STARPU_SCHED_SIMPLE_FIFOS_BELOW | + STARPU_SCHED_SIMPLE_FIFOS_BELOW_READY | + STARPU_SCHED_SIMPLE_IMPL, sched_ctx_id); +} + +struct starpu_sched_policy _starpu_sched_modular_eager_prefetching_policy = +{ + .init_sched = initialize_eager_prefetching_center_policy, + .deinit_sched = starpu_sched_tree_deinitialize, + .add_workers = starpu_sched_tree_add_workers, + .remove_workers = starpu_sched_tree_remove_workers, + .push_task = starpu_sched_tree_push_task, + .pop_task = starpu_sched_tree_pop_task, + .pre_exec_hook = starpu_sched_component_worker_pre_exec_hook, + .post_exec_hook = starpu_sched_component_worker_post_exec_hook, + .policy_name = "modular-eager-prefetching", + .policy_description = "eager with prefetching modular policy", + .worker_type = STARPU_WORKER_LIST, + .prefetches = 1, +}; diff --git a/src/sched_policies/modular_eager_prio.c b/src/sched_policies/modular_eager_prio.c new file mode 100644 index 0000000..4bf68df --- /dev/null +++ b/src/sched_policies/modular_eager_prio.c @@ -0,0 +1,47 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Simon Archipoff + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include + +static void initialize_eager_prio_center_policy(unsigned sched_ctx_id) +{ + starpu_sched_component_initialize_simple_scheduler((starpu_sched_component_create_t) starpu_sched_component_eager_prio_create, NULL, + STARPU_SCHED_SIMPLE_DECIDE_WORKERS | + STARPU_SCHED_SIMPLE_FIFOS_BELOW | + STARPU_SCHED_SIMPLE_FIFOS_BELOW_PRIO | + STARPU_SCHED_SIMPLE_FIFOS_BELOW_READY | + STARPU_SCHED_SIMPLE_IMPL, sched_ctx_id); +} + +struct starpu_sched_policy _starpu_sched_modular_eager_prio_policy = +{ + .init_sched = initialize_eager_prio_center_policy, + .deinit_sched = starpu_sched_tree_deinitialize, + .add_workers = starpu_sched_tree_add_workers, + .remove_workers = starpu_sched_tree_remove_workers, + .push_task = starpu_sched_tree_push_task, + .pop_task = starpu_sched_tree_pop_task, + .pre_exec_hook = starpu_sched_component_worker_pre_exec_hook, + .post_exec_hook = starpu_sched_component_worker_post_exec_hook, + .policy_name = "modular-eager-prio", + .policy_description = "eager-prio modular policy", + .worker_type = STARPU_WORKER_LIST, + .prefetches = 1, +}; diff --git a/src/sched_policies/modular_ez.c b/src/sched_policies/modular_ez.c new file mode 100644 index 0000000..67a8889 --- /dev/null +++ b/src/sched_policies/modular_ez.c @@ -0,0 +1,489 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Simon Archipoff + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include + +/* The scheduling strategy may look like this : + * + * | + * fifo_above + * | + * decision_component <--push-- perfmodel_select_component --push--> eager_component + * | | | | + * fifo fifo fifo | + * | | | | + * eager eager eager | + * | | | | + * >--------------------------------------------------------------< + * | | + * best_impl_component best_impl_component + * | | + * worker_component worker_component + */ + +/* The two thresolds concerns the fifo components below, which contains queues + * who can handle the priority of StarPU tasks. You can tune your + * scheduling by benching those values and choose which one is the + * best for your current application. + * The current value of the ntasks_threshold is the best we found + * so far across several types of applications (cholesky, LU, stencil). + */ +#define _STARPU_SCHED_NTASKS_THRESHOLD_HEFT 30 +#define _STARPU_SCHED_NTASKS_THRESHOLD_DEFAULT 2 +#define _STARPU_SCHED_EXP_LEN_THRESHOLD_DEFAULT 1000000000.0 + +void starpu_sched_component_initialize_simple_schedulers(unsigned sched_ctx_id, unsigned ndecisions, ...) +{ + struct starpu_sched_tree * t; + struct starpu_sched_component *last = NULL; /* Stores the last created component, from top to bottom */ + unsigned i, j, n; + struct starpu_sched_component *userchoice_component = NULL; + struct starpu_sched_component *pre_decision_component = NULL; + struct starpu_sched_component *last_pre_decision_component = NULL; + struct starpu_sched_component *decision_component = NULL; + struct starpu_sched_component *no_perfmodel_component = NULL; + struct starpu_sched_component *calibrator_component = NULL; + unsigned sched; + va_list varg_list; + unsigned decide_flags; + unsigned flags; + + /* Start building the tree */ + t = starpu_sched_tree_create(sched_ctx_id); + t->root = NULL; + starpu_sched_ctx_set_policy_data(sched_ctx_id, (void*)t); + + STARPU_ASSERT(ndecisions >= 1); + + if (ndecisions != 1) + { + /* Take choice between schedulers from user */ + userchoice_component = starpu_sched_component_userchoice_create(t, NULL); + t->root = userchoice_component; + } + + + unsigned nbelow; + unsigned nummaxids; + + va_start(varg_list, ndecisions); + for (sched = 0; sched < ndecisions; sched++) + { + last = userchoice_component; + + starpu_sched_component_create_t create_decision_component = va_arg(varg_list, starpu_sched_component_create_t); + void *data = va_arg(varg_list, void *); + flags = va_arg(varg_list, unsigned); + (void) create_decision_component; + (void) data; + + while ((flags & STARPU_SCHED_SIMPLE_PRE_DECISION) == STARPU_SCHED_SIMPLE_PRE_DECISION) + { + STARPU_ASSERT(flags == STARPU_SCHED_SIMPLE_PRE_DECISION); + (void) va_arg(varg_list, starpu_sched_component_create_t); + (void) va_arg(varg_list, void *); + flags = va_arg(varg_list, unsigned); + } + + int above_prio = starpu_getenv_number_default("STARPU_SCHED_SORTED_ABOVE", (flags & STARPU_SCHED_SIMPLE_FIFO_ABOVE_PRIO) ? 1 : 0); + int below_prio = starpu_getenv_number_default("STARPU_SCHED_SORTED_BELOW", (flags & STARPU_SCHED_SIMPLE_FIFOS_BELOW_PRIO) ? 1 : 0); + + /* Create combined workers if requested */ + if (flags & STARPU_SCHED_SIMPLE_COMBINED_WORKERS) + starpu_sched_find_all_worker_combinations(); + + /* Components parameters */ + + if (above_prio || below_prio) + { + /* The application may use any integer */ + if (starpu_sched_ctx_min_priority_is_set(sched_ctx_id) == 0) + starpu_sched_ctx_set_min_priority(sched_ctx_id, INT_MIN); + if (starpu_sched_ctx_max_priority_is_set(sched_ctx_id) == 0) + starpu_sched_ctx_set_max_priority(sched_ctx_id, INT_MAX); + } + + /* See what the component will decide */ + nummaxids = starpu_worker_get_count() + starpu_combined_worker_get_count(); + if (starpu_memory_nodes_get_count() > nummaxids) + nummaxids = starpu_memory_nodes_get_count(); + if (STARPU_NARCH > nummaxids) + nummaxids = STARPU_NARCH; + + if (sched == 0) + decide_flags = flags & STARPU_SCHED_SIMPLE_DECIDE_MASK; + else + STARPU_ASSERT(decide_flags == (flags & STARPU_SCHED_SIMPLE_DECIDE_MASK)); + } + va_end(varg_list); + + unsigned below_id[nummaxids]; + + switch (decide_flags) + { + case STARPU_SCHED_SIMPLE_DECIDE_WORKERS: + /* Count workers */ + nbelow = starpu_worker_get_count() + starpu_combined_worker_get_count(); + /* and no need for IDs */ + break; + case STARPU_SCHED_SIMPLE_DECIDE_MEMNODES: + { + /* Count memory nodes */ + n = starpu_memory_nodes_get_count(); + nbelow = 0; + for(i = 0; i < n; i++) + { + for(j = 0; j < starpu_worker_get_count() + starpu_combined_worker_get_count(); j++) + if (starpu_worker_get_memory_node(j) == i) + break; + if (j >= starpu_worker_get_count() + starpu_combined_worker_get_count()) + /* Don't create a component string for this memory node with no worker */ + continue; + below_id[nbelow] = i; + nbelow++; + } + break; + } + case STARPU_SCHED_SIMPLE_DECIDE_ARCHS: + { + /* Count available architecture types */ + enum starpu_worker_archtype type; + nbelow = 0; + for (type = 0; type < STARPU_NARCH; type++) + { + if (starpu_worker_get_count_by_type(type)) + { + below_id[nbelow] = type; + nbelow++; + } + } + break; + } + default: + STARPU_ABORT(); + } + STARPU_ASSERT(nbelow > 0); + + struct starpu_sched_component *last_below[nbelow]; + memset(&last_below, 0, sizeof(last_below)); + + if (ndecisions != 1) + { + /* Will need to stage pulls, create one per choice */ + for (i = 0; i < nbelow; i++) + last_below[i] = starpu_sched_component_stage_create(t, NULL); + } + + va_start(varg_list, ndecisions); + for (sched = 0; sched < ndecisions; sched++) + { + last = userchoice_component; + + starpu_sched_component_create_t create_decision_component = va_arg(varg_list, starpu_sched_component_create_t); + void *data = va_arg(varg_list, void *); + flags = va_arg(varg_list, unsigned); + + while ((flags & STARPU_SCHED_SIMPLE_PRE_DECISION) == STARPU_SCHED_SIMPLE_PRE_DECISION) + { + starpu_sched_component_create_t create_pre_decision_component = va_arg(varg_list, starpu_sched_component_create_t); + void *pre_data = va_arg(varg_list, void *); + flags = va_arg(varg_list, unsigned); + struct starpu_sched_component *component; + + component = create_pre_decision_component(t, pre_data); + + if (pre_decision_component) + /* Connect after previous pre-decision component */ + starpu_sched_component_connect(pre_decision_component, component); + else + /* We are the first pre-decision component */ + pre_decision_component = component; + last_pre_decision_component = component; + } + + int above_prio = starpu_getenv_number_default("STARPU_SCHED_SORTED_ABOVE", (flags & STARPU_SCHED_SIMPLE_FIFO_ABOVE_PRIO) ? 1 : 0); + int below_prio = starpu_getenv_number_default("STARPU_SCHED_SORTED_BELOW", (flags & STARPU_SCHED_SIMPLE_FIFOS_BELOW_PRIO) ? 1 : 0); + + if (nbelow == 1 && !(flags & STARPU_SCHED_SIMPLE_DECIDE_ALWAYS)) + { + /* Oh, no choice, we don't actually need to decide, just + * use an eager scheduler */ + decision_component = starpu_sched_component_eager_create(t, NULL); + /* But make sure we have a fifo above it, fifos below it would + * possibly refuse tasks out of available room */ + flags |= STARPU_SCHED_SIMPLE_FIFO_ABOVE; + } + else + { + decision_component = create_decision_component(t, data); + } + if (last_pre_decision_component) + starpu_sched_component_connect(last_pre_decision_component, decision_component); + else + pre_decision_component = decision_component; + + /* First, a fifo if requested */ + if (flags & STARPU_SCHED_SIMPLE_FIFO_ABOVE) + { + struct starpu_sched_component *fifo_above; + if (above_prio) + { + fifo_above = starpu_sched_component_prio_create(t, NULL); + } + else + { + fifo_above = starpu_sched_component_fifo_create(t, NULL); + } + if (!last) + last = t->root = fifo_above; + else + { + starpu_sched_component_connect(last, fifo_above); + last = fifo_above; + } + } + + /* Then, perfmodel calibration if requested, and plug the scheduling decision-making component to it */ + if (flags & STARPU_SCHED_SIMPLE_PERFMODEL) + { + no_perfmodel_component = starpu_sched_component_eager_create(t, NULL); + calibrator_component = starpu_sched_component_eager_calibration_create(t, NULL); + + if (! (flags & STARPU_SCHED_SIMPLE_FIFO_ABOVE)) + { + /* We won't have a fifo above, the eager components do need one */ + struct starpu_sched_component *calibrator_fifo = starpu_sched_component_fifo_create(t, NULL); + struct starpu_sched_component *no_perfmodel_fifo = starpu_sched_component_fifo_create(t, NULL); + starpu_sched_component_connect(calibrator_fifo, calibrator_component); + starpu_sched_component_connect(no_perfmodel_fifo, no_perfmodel_component); + calibrator_component = calibrator_fifo; + no_perfmodel_component = no_perfmodel_fifo; + } + + struct starpu_sched_component_perfmodel_select_data perfmodel_select_data = + { + .calibrator_component = calibrator_component, + .no_perfmodel_component = no_perfmodel_component, + .perfmodel_component = pre_decision_component, + }; + + struct starpu_sched_component * perfmodel_select_component = starpu_sched_component_perfmodel_select_create(t, &perfmodel_select_data); + + if (!last) + last = t->root = perfmodel_select_component; + else + starpu_sched_component_connect(last, perfmodel_select_component); + + starpu_sched_component_connect(perfmodel_select_component, pre_decision_component); + starpu_sched_component_connect(perfmodel_select_component, calibrator_component); + starpu_sched_component_connect(perfmodel_select_component, no_perfmodel_component); + } + else + { + /* No perfmodel calibration */ + if (!last) + /* Plug decision_component directly */ + t->root = pre_decision_component; + else + /* Plug decision components to fifo */ + starpu_sched_component_connect(last, pre_decision_component); + } + + /* Take default ntasks_threshold */ + unsigned ntasks_threshold; + if (flags & STARPU_SCHED_SIMPLE_FIFOS_BELOW_NOLIMIT) + { + ntasks_threshold = UINT_MAX; + } + else if (starpu_sched_component_is_heft(decision_component) || + starpu_sched_component_is_mct(decision_component) || + starpu_sched_component_is_heteroprio(decision_component)) + { + /* These need more queueing to allow CPUs to take some share of the work */ + ntasks_threshold = _STARPU_SCHED_NTASKS_THRESHOLD_HEFT; + } + else + { + ntasks_threshold = _STARPU_SCHED_NTASKS_THRESHOLD_DEFAULT; + } + /* But let user tune it */ + ntasks_threshold = starpu_getenv_number_default("STARPU_NTASKS_THRESHOLD", ntasks_threshold); + + double exp_len_threshold; + if (flags & STARPU_SCHED_SIMPLE_FIFOS_BELOW_NOLIMIT) + { + exp_len_threshold = INFINITY; + } + else + { + exp_len_threshold = _STARPU_SCHED_EXP_LEN_THRESHOLD_DEFAULT; + } + /* But let user tune it */ + exp_len_threshold = starpu_getenv_float_default("STARPU_EXP_LEN_THRESHOLD", exp_len_threshold); + + int ready = starpu_getenv_number_default("STARPU_SCHED_READY", (flags & STARPU_SCHED_SIMPLE_FIFOS_BELOW_READY) ? 1 : 0); + + int exp = (flags & STARPU_SCHED_SIMPLE_FIFOS_BELOW_EXP) ? 1 : 0; + + struct starpu_sched_component_prio_data prio_data = + { + .ntasks_threshold = ntasks_threshold, + .exp_len_threshold = exp_len_threshold, + .ready = ready, + .exp = exp, + }; + + struct starpu_sched_component_fifo_data fifo_data = + { + .ntasks_threshold = ntasks_threshold, + .exp_len_threshold = exp_len_threshold, + .ready = ready, + .exp = exp, + }; + + /* Create one fifo+eager component pair per choice, below scheduling decision */ + for(i = 0; i < nbelow; i++) + { + last = decision_component; + + if (flags & STARPU_SCHED_SIMPLE_FIFOS_BELOW + && !(decide_flags == STARPU_SCHED_SIMPLE_DECIDE_WORKERS + && i >= starpu_worker_get_count())) + { + struct starpu_sched_component *fifo_below; + if (below_prio) + { + fifo_below = starpu_sched_component_prio_create(t, &prio_data); + } + else + { + fifo_below = starpu_sched_component_fifo_create(t, &fifo_data); + } + starpu_sched_component_connect(last, fifo_below); + last = fifo_below; + } + switch (decide_flags) + { + case STARPU_SCHED_SIMPLE_DECIDE_WORKERS: + /* 1-1 mapping between choice and worker, no need for an eager component */ + n = 1; + break; + case STARPU_SCHED_SIMPLE_DECIDE_MEMNODES: + n = 0; + for (j = 0; j < starpu_worker_get_count() + starpu_combined_worker_get_count(); j++) + if (starpu_worker_get_memory_node(j) == below_id[i]) + n++; + break; + case STARPU_SCHED_SIMPLE_DECIDE_ARCHS: + n = starpu_worker_get_count_by_type(i); + break; + default: + STARPU_ABORT(); + } + STARPU_ASSERT(n >= 1); + if (n > 1) + { + /* Several workers for this choice, need to introduce + * a component to distribute the work */ + struct starpu_sched_component *distribute; + if (flags & STARPU_SCHED_SIMPLE_WS_BELOW) + { + distribute = starpu_sched_component_work_stealing_create(t, NULL); + } + else + { + distribute = starpu_sched_component_eager_create(t, NULL); + } + + starpu_sched_component_connect(last, distribute); + last = distribute; + } + + if (ndecisions != 1) + /* Connect to stage component */ + starpu_sched_component_connect(last, last_below[i]); + else + /* Directly let it connected to worker */ + last_below[i] = last; + } + } + va_end(varg_list); + + /* Finish by creating components per worker */ + for(i = 0; i < starpu_worker_get_count() + starpu_combined_worker_get_count(); i++) + { + /* Start from the bottom */ + struct starpu_sched_component * worker_component = starpu_sched_component_worker_new(sched_ctx_id, i); + struct starpu_sched_component * worker = worker_component; + unsigned id; + + /* Create implementation chooser if requested */ + if (flags & STARPU_SCHED_SIMPLE_IMPL) + { + struct starpu_sched_component * impl_component = starpu_sched_component_best_implementation_create(t, NULL); + starpu_sched_component_connect(impl_component, worker_component); + /* Reroute components above through it */ + worker = impl_component; + } + + switch (decide_flags) + { + case STARPU_SCHED_SIMPLE_DECIDE_WORKERS: + id = i; + break; + case STARPU_SCHED_SIMPLE_DECIDE_MEMNODES: + for (id = 0; id < nbelow; id++) + if (below_id[id] == starpu_worker_get_memory_node(i)) + break; + break; + case STARPU_SCHED_SIMPLE_DECIDE_ARCHS: + for (id = 0; id < nbelow; id++) + if (below_id[id] == starpu_worker_get_type(i)) + break; + break; + default: + STARPU_ABORT(); + } + STARPU_ASSERT(id < nbelow); + last = last_below[id]; + if (!last) + last = decision_component; + + starpu_sched_component_connect(last, worker); + + /* Plug perfmodel calibrator if requested */ + /* FIXME: this won't work with several scheduling decisions */ + if (flags & STARPU_SCHED_SIMPLE_PERFMODEL) + { + starpu_sched_component_connect(no_perfmodel_component, worker); + /* Calibrator needs to choose the implementation */ + starpu_sched_component_connect(calibrator_component, worker_component); + } + } + + starpu_sched_tree_update_workers(t); + starpu_sched_tree_update_workers_in_ctx(t); +} + +void starpu_sched_component_initialize_simple_scheduler(starpu_sched_component_create_t create_decision_component, void *data, unsigned flags, unsigned sched_ctx_id) +{ + starpu_sched_component_initialize_simple_schedulers(sched_ctx_id, 1, create_decision_component, data, flags); +} diff --git a/src/sched_policies/modular_gemm.c b/src/sched_policies/modular_gemm.c new file mode 100644 index 0000000..2fc3dda --- /dev/null +++ b/src/sched_policies/modular_gemm.c @@ -0,0 +1,196 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Simon Archipoff + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* This scheduler runs only GEMMs on GPUs, and tries to feed them with as many + * GEMMs as possible. */ + +#include +#include + +/* Optionally, it can take memory affinity into account, to avoid too many GPU + * data transfers */ + +#define MEMORY_AFFINITY + +struct child_data +{ + double expected_start; + double predicted; + double predicted_transfer; + double expected_end; + unsigned child; +}; + +static int compar(const void *_a, const void *_b) +{ + const struct child_data *a = _a; + const struct child_data *b = _b; + if (a->expected_end < b->expected_end) + return -1; + if (a->expected_end == b->expected_end) + return 0; + return 1; +} + +static int gemm_push_task(struct starpu_sched_component * component, struct starpu_task * task) +{ + unsigned n = component->nchildren; + unsigned i; + + /* See if it's a GEMM task */ + const char *name = starpu_task_get_model_name(task); + //fprintf(stderr, "it's %s\n", name); + + if (name && (!strcmp(name, "gemm") || + !strcmp(name, "dgemm") || + !strcmp(name, "sgemm") || + !strcmp(name, "chol_model_22") || + !strcmp(name, "starpu_dlu_lu_model_22") || + !strcmp(name, "starpu_slu_lu_model_22"))) + { + /* It's a GEMM, try to push to GPUs */ + + struct child_data child_data[n]; + + for (i = 0; i < n; i++) + { + child_data[i].expected_end = -1; + child_data[i].child = i; + } + + /* Look at GPU availability time */ + for (i = 0; i < n; i++) + { + struct starpu_sched_component *child = component->children[i]; + double predicted; + if (starpu_sched_component_execute_preds(child, task, &predicted)) + { + double expected_start; + child_data[i].expected_start = + expected_start = child->estimated_end(child); + child_data[i].predicted = predicted; + child_data[i].expected_end = expected_start + + predicted; + +#ifdef MEMORY_AFFINITY + double predicted_transfer; + child_data[i].predicted_transfer = + predicted_transfer = starpu_sched_component_transfer_length(child, task); + child_data[i].expected_end += predicted_transfer; +#endif + } + } + + /* Sort by increasing expected end */ + qsort(child_data, n, sizeof(*child_data), compar); + + /* Try to push to the GPU with minimum availability time, to balance the load. */ + for (i = 0; i < n; i++) + { + if (child_data[i].expected_end != -1) + { + struct starpu_sched_component *child = component->children[child_data[i].child]; + + /* Note it in the task so that estimated_end() has it */ + task->predicted = child_data[i].predicted; + task->predicted_transfer = child_data[i].predicted_transfer; + + int ret = starpu_sched_component_push_task(component,child,task); + if (!ret) + /* Ok, this GPU took it */ + return 0; + } + } + } + + int workerid; + /* It's not a GEMM, or no GPU wanted to take it, find somebody else */ + for(workerid = starpu_bitmap_first(&component->workers_in_ctx); + workerid != -1; + workerid = starpu_bitmap_next(&component->workers_in_ctx, workerid)) + { + int nimpl; + for(nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++) + { + if(starpu_worker_can_execute_task(workerid,task,nimpl) + || starpu_combined_worker_can_execute_task(workerid, task, nimpl)) + { + for (i = 0; i < n; i++) + { + struct starpu_sched_component *child = component->children[i]; + int idworker; + for(idworker = starpu_bitmap_first(&component->children[i]->workers); + idworker != -1; + idworker = starpu_bitmap_next(&component->children[i]->workers, idworker)) + { + if (idworker == workerid) + { + if ((starpu_cpu_worker_get_count() == 0 || + starpu_worker_get_type(workerid) == STARPU_CPU_WORKER) + && (starpu_worker_can_execute_task(workerid,task,nimpl) + || starpu_combined_worker_can_execute_task(workerid, task, nimpl))) + { + int ret = starpu_sched_component_push_task(component,child,task); + if (!ret) + return 0; + } + } + } + } + } + } + } + /* FIFOs are full */ + return 1; +} + +struct starpu_sched_component *starpu_sched_component_gemm_create(struct starpu_sched_tree *tree, void *params STARPU_ATTRIBUTE_UNUSED) +{ + struct starpu_sched_component *component = starpu_sched_component_create(tree, "gemm"); + + component->push_task = gemm_push_task; + + return component; +} + +static void initialize_gemm_center_policy(unsigned sched_ctx_id) +{ + starpu_sched_component_initialize_simple_scheduler((starpu_sched_component_create_t) starpu_sched_component_gemm_create, NULL, + STARPU_SCHED_SIMPLE_DECIDE_MEMNODES | + STARPU_SCHED_SIMPLE_FIFO_ABOVE | + STARPU_SCHED_SIMPLE_FIFO_ABOVE_PRIO | + STARPU_SCHED_SIMPLE_FIFOS_BELOW | + STARPU_SCHED_SIMPLE_FIFOS_BELOW_PRIO | + STARPU_SCHED_SIMPLE_FIFOS_BELOW_EXP | + STARPU_SCHED_SIMPLE_IMPL, sched_ctx_id); +} + +struct starpu_sched_policy _starpu_sched_modular_gemm_policy = +{ + .init_sched = initialize_gemm_center_policy, + .deinit_sched = starpu_sched_tree_deinitialize, + .add_workers = starpu_sched_tree_add_workers, + .remove_workers = starpu_sched_tree_remove_workers, + .push_task = starpu_sched_tree_push_task, + .pop_task = starpu_sched_tree_pop_task, + .pre_exec_hook = starpu_sched_component_worker_pre_exec_hook, + .post_exec_hook = starpu_sched_component_worker_post_exec_hook, + .policy_name = "modular-gemm", + .policy_description = "gemm modular policy", + .worker_type = STARPU_WORKER_LIST, + .prefetches = 1, +}; diff --git a/src/sched_policies/modular_heft.c b/src/sched_policies/modular_heft.c new file mode 100644 index 0000000..bea5c39 --- /dev/null +++ b/src/sched_policies/modular_heft.c @@ -0,0 +1,190 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Simon Archipoff + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include + +/* The scheduling strategy look like this : + * + * | + * window_component + * | + * mct_component <--push-- perfmodel_select_component --push--> eager_component + * | | + * | | + * >----------------------------------------------------< + * | | + * best_impl_component best_impl_component + * | | + * prio_component prio_component + * | | + * worker_component worker_component + * + * A window contain the tasks that failed to be pushed, so as when the prio_components reclaim + * tasks by calling can_push to their parent (classically, just after a successful pop have + * been made by its associated worker_component), this call goes up to the window_component which + * pops a task from its local queue and try to schedule it by pushing it to the + * decision_component. + * Finally, the task will be pushed to the prio_component which is the direct + * parent in the tree of the worker_component the task has been scheduled on. This + * component will push the task on its local queue if no one of the two thresholds + * have been reached for it, or send a push_error signal to its parent. + */ + +static void initialize_heft_center_policy(unsigned sched_ctx_id) +{ + starpu_sched_component_initialize_simple_scheduler((starpu_sched_component_create_t) starpu_sched_component_mct_create, NULL, + STARPU_SCHED_SIMPLE_DECIDE_WORKERS | + STARPU_SCHED_SIMPLE_PERFMODEL | + STARPU_SCHED_SIMPLE_FIFO_ABOVE | + STARPU_SCHED_SIMPLE_FIFO_ABOVE_PRIO | + STARPU_SCHED_SIMPLE_FIFOS_BELOW | + STARPU_SCHED_SIMPLE_FIFOS_BELOW_PRIO | + STARPU_SCHED_SIMPLE_FIFOS_BELOW_READY | + STARPU_SCHED_SIMPLE_FIFOS_BELOW_EXP | + STARPU_SCHED_SIMPLE_IMPL, sched_ctx_id); +} + +struct starpu_sched_policy _starpu_sched_modular_heft_policy = +{ + .init_sched = initialize_heft_center_policy, + .deinit_sched = starpu_sched_tree_deinitialize, + .add_workers = starpu_sched_tree_add_workers, + .remove_workers = starpu_sched_tree_remove_workers, + .push_task = starpu_sched_tree_push_task, + .pop_task = starpu_sched_tree_pop_task, + .pre_exec_hook = starpu_sched_component_worker_pre_exec_hook, + .post_exec_hook = starpu_sched_component_worker_post_exec_hook, + .policy_name = "modular-heft", + .policy_description = "heft modular policy", + .worker_type = STARPU_WORKER_LIST, + .prefetches = 1, +}; + +static void initialize_dmda_center_policy(unsigned sched_ctx_id) +{ + starpu_sched_component_initialize_simple_scheduler((starpu_sched_component_create_t) starpu_sched_component_mct_create, NULL, + STARPU_SCHED_SIMPLE_DECIDE_WORKERS | + STARPU_SCHED_SIMPLE_PERFMODEL | + STARPU_SCHED_SIMPLE_FIFOS_BELOW | + STARPU_SCHED_SIMPLE_FIFOS_BELOW_EXP | + STARPU_SCHED_SIMPLE_FIFOS_BELOW_NOLIMIT | + STARPU_SCHED_SIMPLE_IMPL, sched_ctx_id); +} + +struct starpu_sched_policy _starpu_sched_modular_dmda_policy = +{ + .init_sched = initialize_dmda_center_policy, + .deinit_sched = starpu_sched_tree_deinitialize, + .add_workers = starpu_sched_tree_add_workers, + .remove_workers = starpu_sched_tree_remove_workers, + .push_task = starpu_sched_tree_push_task, + .pop_task = starpu_sched_tree_pop_task, + .pre_exec_hook = starpu_sched_component_worker_pre_exec_hook, + .post_exec_hook = starpu_sched_component_worker_post_exec_hook, + .policy_name = "modular-dmda", + .policy_description = "data-aware performance model modular policy", + .worker_type = STARPU_WORKER_LIST, + .prefetches = 1, +}; + +static void initialize_dmdap_center_policy(unsigned sched_ctx_id) +{ + starpu_sched_component_initialize_simple_scheduler((starpu_sched_component_create_t) starpu_sched_component_mct_create, NULL, + STARPU_SCHED_SIMPLE_DECIDE_WORKERS | + STARPU_SCHED_SIMPLE_PERFMODEL | + STARPU_SCHED_SIMPLE_FIFOS_BELOW | + STARPU_SCHED_SIMPLE_FIFOS_BELOW_PRIO | + STARPU_SCHED_SIMPLE_FIFOS_BELOW_EXP | + STARPU_SCHED_SIMPLE_FIFOS_BELOW_NOLIMIT | + STARPU_SCHED_SIMPLE_IMPL, sched_ctx_id); +} + +struct starpu_sched_policy _starpu_sched_modular_dmdap_policy = +{ + .init_sched = initialize_dmdap_center_policy, + .deinit_sched = starpu_sched_tree_deinitialize, + .add_workers = starpu_sched_tree_add_workers, + .remove_workers = starpu_sched_tree_remove_workers, + .push_task = starpu_sched_tree_push_task, + .pop_task = starpu_sched_tree_pop_task, + .pre_exec_hook = starpu_sched_component_worker_pre_exec_hook, + .post_exec_hook = starpu_sched_component_worker_post_exec_hook, + .policy_name = "modular-dmdap", + .policy_description = "data-aware performance model modular policy (priority)", + .worker_type = STARPU_WORKER_LIST, + .prefetches = 1, +}; + +static void initialize_dmdar_center_policy(unsigned sched_ctx_id) +{ + starpu_sched_component_initialize_simple_scheduler((starpu_sched_component_create_t) starpu_sched_component_mct_create, NULL, + STARPU_SCHED_SIMPLE_DECIDE_WORKERS | + STARPU_SCHED_SIMPLE_PERFMODEL | + STARPU_SCHED_SIMPLE_FIFOS_BELOW | + STARPU_SCHED_SIMPLE_FIFOS_BELOW_READY | + STARPU_SCHED_SIMPLE_FIFOS_BELOW_EXP | + STARPU_SCHED_SIMPLE_FIFOS_BELOW_NOLIMIT | + STARPU_SCHED_SIMPLE_IMPL, sched_ctx_id); +} + +struct starpu_sched_policy _starpu_sched_modular_dmdar_policy = +{ + .init_sched = initialize_dmdar_center_policy, + .deinit_sched = starpu_sched_tree_deinitialize, + .add_workers = starpu_sched_tree_add_workers, + .remove_workers = starpu_sched_tree_remove_workers, + .push_task = starpu_sched_tree_push_task, + .pop_task = starpu_sched_tree_pop_task, + .pre_exec_hook = starpu_sched_component_worker_pre_exec_hook, + .post_exec_hook = starpu_sched_component_worker_post_exec_hook, + .policy_name = "modular-dmdar", + .policy_description = "data-aware performance model modular policy (ready)", + .worker_type = STARPU_WORKER_LIST, + .prefetches = 1, +}; + +static void initialize_dmdas_center_policy(unsigned sched_ctx_id) +{ + starpu_sched_component_initialize_simple_scheduler((starpu_sched_component_create_t) starpu_sched_component_mct_create, NULL, + STARPU_SCHED_SIMPLE_DECIDE_WORKERS | + STARPU_SCHED_SIMPLE_PERFMODEL | + STARPU_SCHED_SIMPLE_FIFOS_BELOW | + STARPU_SCHED_SIMPLE_FIFOS_BELOW_PRIO | + STARPU_SCHED_SIMPLE_FIFOS_BELOW_READY | + STARPU_SCHED_SIMPLE_FIFOS_BELOW_EXP | + STARPU_SCHED_SIMPLE_FIFOS_BELOW_NOLIMIT | + STARPU_SCHED_SIMPLE_IMPL, sched_ctx_id); +} + +struct starpu_sched_policy _starpu_sched_modular_dmdas_policy = +{ + .init_sched = initialize_dmdas_center_policy, + .deinit_sched = starpu_sched_tree_deinitialize, + .add_workers = starpu_sched_tree_add_workers, + .remove_workers = starpu_sched_tree_remove_workers, + .push_task = starpu_sched_tree_push_task, + .pop_task = starpu_sched_tree_pop_task, + .pre_exec_hook = starpu_sched_component_worker_pre_exec_hook, + .post_exec_hook = starpu_sched_component_worker_post_exec_hook, + .policy_name = "modular-dmdas", + .policy_description = "data-aware performance model (sorted) modular policy", + .worker_type = STARPU_WORKER_LIST, + .prefetches = 1, +}; diff --git a/src/sched_policies/modular_heft2.c b/src/sched_policies/modular_heft2.c new file mode 100644 index 0000000..5f38eb0 --- /dev/null +++ b/src/sched_policies/modular_heft2.c @@ -0,0 +1,78 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Simon Archipoff + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include + +/* The scheduling strategy look like this : + * + * | + * window_component + * | + * heft_component <--push-- perfmodel_select_component --push--> eager_component + * | | + * | | + * >----------------------------------------------------< + * | | + * best_impl_component best_impl_component + * | | + * prio_component prio_component + * | | + * worker_component worker_component + * + * A window contain the tasks that failed to be pushed, so as when the prio_components reclaim + * tasks by calling can_push to their parent (classically, just after a successful pop have + * been made by its associated worker_component), this call goes up to the window_component which + * pops a task from its local queue and try to schedule it by pushing it to the + * decision_component. + * Finally, the task will be pushed to the prio_component which is the direct + * parent in the tree of the worker_component the task has been scheduled on. This + * component will push the task on its local queue if no one of the two thresholds + * have been reached for it, or send a push_error signal to its parent. + */ + +static void initialize_heft2_center_policy(unsigned sched_ctx_id) +{ + starpu_sched_component_initialize_simple_scheduler((starpu_sched_component_create_t) starpu_sched_component_heft_create, NULL, + STARPU_SCHED_SIMPLE_DECIDE_WORKERS | + STARPU_SCHED_SIMPLE_PERFMODEL | + STARPU_SCHED_SIMPLE_FIFO_ABOVE | + STARPU_SCHED_SIMPLE_FIFO_ABOVE_PRIO | + STARPU_SCHED_SIMPLE_FIFOS_BELOW | + STARPU_SCHED_SIMPLE_FIFOS_BELOW_PRIO | + STARPU_SCHED_SIMPLE_FIFOS_BELOW_READY | + STARPU_SCHED_SIMPLE_FIFOS_BELOW_EXP | + STARPU_SCHED_SIMPLE_IMPL, sched_ctx_id); +} + +struct starpu_sched_policy _starpu_sched_modular_heft2_policy = +{ + .init_sched = initialize_heft2_center_policy, + .deinit_sched = starpu_sched_tree_deinitialize, + .add_workers = starpu_sched_tree_add_workers, + .remove_workers = starpu_sched_tree_remove_workers, + .push_task = starpu_sched_tree_push_task, + .pop_task = starpu_sched_tree_pop_task, + .pre_exec_hook = starpu_sched_component_worker_pre_exec_hook, + .post_exec_hook = starpu_sched_component_worker_post_exec_hook, + .policy_name = "modular-heft2", + .policy_description = "heft modular2 policy", + .worker_type = STARPU_WORKER_LIST, + .prefetches = 1, +}; diff --git a/src/sched_policies/modular_heft_prio.c b/src/sched_policies/modular_heft_prio.c new file mode 100644 index 0000000..918755b --- /dev/null +++ b/src/sched_policies/modular_heft_prio.c @@ -0,0 +1,79 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Simon Archipoff + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include + +/* The scheduling strategy look like this : + * + * | + * window_component + * | + * mct_component <--push-- perfmodel_select_component --push--> eager_component + * | | | | + * prio prio prio | + * | | | | + * eager eager eager | + * | | | | + * >--------------------------------------------------------------< + * | | + * best_impl_component best_impl_component + * | | + * worker_component worker_component + * + * A window contain the tasks that failed to be pushed, so as when the prio_components reclaim + * tasks by calling can_push to their parent (classically, just after a successful pop have + * been made by its associated worker_component), this call goes up to the window_component which + * pops a task from its local queue and try to schedule it by pushing it to the + * decision_component. + * Finally, the task will be pushed to the prio_component which is the direct + * parent in the tree of the worker_component the task has been scheduled on. This + * component will push the task on its local queue if no one of the two thresholds + * have been reached for it, or send a push_error signal to its parent. + */ + +static void initialize_heft_prio_policy(unsigned sched_ctx_id) +{ + starpu_sched_component_initialize_simple_scheduler((starpu_sched_component_create_t) starpu_sched_component_mct_create, NULL, + STARPU_SCHED_SIMPLE_DECIDE_MEMNODES | + STARPU_SCHED_SIMPLE_PERFMODEL | + STARPU_SCHED_SIMPLE_FIFO_ABOVE | + STARPU_SCHED_SIMPLE_FIFO_ABOVE_PRIO | + STARPU_SCHED_SIMPLE_FIFOS_BELOW | + STARPU_SCHED_SIMPLE_FIFOS_BELOW_PRIO | + STARPU_SCHED_SIMPLE_FIFOS_BELOW_READY | + STARPU_SCHED_SIMPLE_FIFOS_BELOW_EXP | + STARPU_SCHED_SIMPLE_IMPL, sched_ctx_id); +} + +struct starpu_sched_policy _starpu_sched_modular_heft_prio_policy = +{ + .init_sched = initialize_heft_prio_policy, + .deinit_sched = starpu_sched_tree_deinitialize, + .add_workers = starpu_sched_tree_add_workers, + .remove_workers = starpu_sched_tree_remove_workers, + .push_task = starpu_sched_tree_push_task, + .pop_task = starpu_sched_tree_pop_task, + .pre_exec_hook = starpu_sched_component_worker_pre_exec_hook, + .post_exec_hook = starpu_sched_component_worker_post_exec_hook, + .policy_name = "modular-heft-prio", + .policy_description = "heft+prio modular policy", + .worker_type = STARPU_WORKER_LIST, + .prefetches = 1, +}; diff --git a/src/sched_policies/modular_heteroprio.c b/src/sched_policies/modular_heteroprio.c new file mode 100644 index 0000000..968bdf4 --- /dev/null +++ b/src/sched_policies/modular_heteroprio.c @@ -0,0 +1,51 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Simon Archipoff + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include + +static void initialize_heteroprio_center_policy(unsigned sched_ctx_id) +{ + starpu_sched_component_initialize_simple_scheduler((starpu_sched_component_create_t) starpu_sched_component_heteroprio_create, NULL, + STARPU_SCHED_SIMPLE_DECIDE_WORKERS | + STARPU_SCHED_SIMPLE_PERFMODEL | + STARPU_SCHED_SIMPLE_FIFO_ABOVE | + STARPU_SCHED_SIMPLE_FIFO_ABOVE_PRIO | + STARPU_SCHED_SIMPLE_FIFOS_BELOW | + STARPU_SCHED_SIMPLE_FIFOS_BELOW_PRIO | + STARPU_SCHED_SIMPLE_FIFOS_BELOW_READY | + STARPU_SCHED_SIMPLE_FIFOS_BELOW_EXP | + STARPU_SCHED_SIMPLE_IMPL, sched_ctx_id); +} + +struct starpu_sched_policy _starpu_sched_modular_heteroprio_policy = +{ + .init_sched = initialize_heteroprio_center_policy, + .deinit_sched = starpu_sched_tree_deinitialize, + .add_workers = starpu_sched_tree_add_workers, + .remove_workers = starpu_sched_tree_remove_workers, + .push_task = starpu_sched_tree_push_task, + .pop_task = starpu_sched_tree_pop_task, + .pre_exec_hook = starpu_sched_component_worker_pre_exec_hook, + .post_exec_hook = starpu_sched_component_worker_post_exec_hook, + .policy_name = "modular-heteroprio", + .policy_description = "heteroprio modular policy", + .worker_type = STARPU_WORKER_LIST, + .prefetches = 1, +}; diff --git a/src/sched_policies/modular_heteroprio_heft.c b/src/sched_policies/modular_heteroprio_heft.c new file mode 100644 index 0000000..4132e1b --- /dev/null +++ b/src/sched_policies/modular_heteroprio_heft.c @@ -0,0 +1,62 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Simon Archipoff + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include + +static void initialize_heteroprio_heft_center_policy(unsigned sched_ctx_id) +{ + struct starpu_sched_component_heteroprio_data heteroprio_data = + { + .mct = NULL, + .batch = 1, + }; + starpu_sched_component_initialize_simple_schedulers(sched_ctx_id, 2, + (starpu_sched_component_create_t) starpu_sched_component_heteroprio_create, &heteroprio_data, + STARPU_SCHED_SIMPLE_DECIDE_WORKERS | + STARPU_SCHED_SIMPLE_FIFOS_BELOW | + STARPU_SCHED_SIMPLE_FIFOS_BELOW_READY | + STARPU_SCHED_SIMPLE_FIFOS_BELOW_EXP | + STARPU_SCHED_SIMPLE_IMPL, + (starpu_sched_component_create_t) starpu_sched_component_heft_create, NULL, + STARPU_SCHED_SIMPLE_DECIDE_WORKERS | + STARPU_SCHED_SIMPLE_FIFO_ABOVE | + STARPU_SCHED_SIMPLE_FIFO_ABOVE_PRIO | + STARPU_SCHED_SIMPLE_FIFOS_BELOW | + STARPU_SCHED_SIMPLE_FIFOS_BELOW_PRIO | + STARPU_SCHED_SIMPLE_FIFOS_BELOW_READY | + STARPU_SCHED_SIMPLE_FIFOS_BELOW_EXP | + STARPU_SCHED_SIMPLE_IMPL); +} + +struct starpu_sched_policy _starpu_sched_modular_heteroprio_heft_policy = +{ + .init_sched = initialize_heteroprio_heft_center_policy, + .deinit_sched = starpu_sched_tree_deinitialize, + .add_workers = starpu_sched_tree_add_workers, + .remove_workers = starpu_sched_tree_remove_workers, + .push_task = starpu_sched_tree_push_task, + .pop_task = starpu_sched_tree_pop_task, + .pre_exec_hook = starpu_sched_component_worker_pre_exec_hook, + .post_exec_hook = starpu_sched_component_worker_post_exec_hook, + .policy_name = "modular-heteroprio-heft", + .policy_description = "heteroprio+heft modular policy", + .worker_type = STARPU_WORKER_LIST, + .prefetches = 1, +}; diff --git a/src/sched_policies/modular_parallel_heft.c b/src/sched_policies/modular_parallel_heft.c new file mode 100644 index 0000000..bdcf2c9 --- /dev/null +++ b/src/sched_policies/modular_parallel_heft.c @@ -0,0 +1,82 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Simon Archipoff + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include + +/* The scheduling strategy look like this : + * + * | + * window_component + * | + * mct_component <--push-- perfmodel_select_component --push--> eager_component + * | | + * | | + * >----------------------------------------------------< + * | | + * best_impl_component best_impl_component + * | | + * prio_component prio_component + * | | + * worker_component worker_component + * + * A window contain the tasks that failed to be pushed, so as when the prio_components reclaim + * tasks by calling can_push to their parent (classically, just after a successful pop have + * been made by its associated worker_component), this call goes up to the window_component which + * pops a task from its local queue and try to schedule it by pushing it to the + * decision_component. + * Finally, the task will be pushed to the prio_component which is the direct + * parent in the tree of the worker_component the task has been scheduled on. This + * component will push the task on its local queue if no one of the two thresholds + * have been reached for it, or send a push_error signal to its parent. + */ + +static void initialize_parallel_heft_center_policy(unsigned sched_ctx_id) +{ + _STARPU_DISP("Warning: the modular-pheft scheduler is mostly a proof of concept and not really very optimized\n"); + + starpu_sched_component_initialize_simple_scheduler((starpu_sched_component_create_t) starpu_sched_component_mct_create, NULL, + STARPU_SCHED_SIMPLE_DECIDE_WORKERS | + STARPU_SCHED_SIMPLE_COMBINED_WORKERS | + STARPU_SCHED_SIMPLE_PERFMODEL | + STARPU_SCHED_SIMPLE_FIFO_ABOVE | + STARPU_SCHED_SIMPLE_FIFO_ABOVE_PRIO | + STARPU_SCHED_SIMPLE_FIFOS_BELOW | + STARPU_SCHED_SIMPLE_FIFOS_BELOW_PRIO | + STARPU_SCHED_SIMPLE_FIFOS_BELOW_READY | + STARPU_SCHED_SIMPLE_FIFOS_BELOW_EXP | + STARPU_SCHED_SIMPLE_IMPL, sched_ctx_id); +} + +struct starpu_sched_policy _starpu_sched_modular_parallel_heft_policy = +{ + .init_sched = initialize_parallel_heft_center_policy, + .deinit_sched = starpu_sched_tree_deinitialize, + .add_workers = starpu_sched_tree_add_workers, + .remove_workers = starpu_sched_tree_remove_workers, + .push_task = starpu_sched_tree_push_task, + .pop_task = starpu_sched_tree_pop_task, + .pre_exec_hook = starpu_sched_component_worker_pre_exec_hook, + .post_exec_hook = starpu_sched_component_worker_post_exec_hook, + .policy_name = "modular-pheft", + .policy_description = "parallel heft modular policy", + .worker_type = STARPU_WORKER_LIST, + .prefetches = 1, +}; diff --git a/src/sched_policies/modular_parallel_random.c b/src/sched_policies/modular_parallel_random.c new file mode 100644 index 0000000..af4a484 --- /dev/null +++ b/src/sched_policies/modular_parallel_random.c @@ -0,0 +1,76 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Simon Archipoff + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include + +/* Random scheduler with a fifo queue for its scheduling window */ + +static void initialize_parallel_random_fifo_center_policy(unsigned sched_ctx_id) +{ + starpu_sched_component_initialize_simple_scheduler((starpu_sched_component_create_t) starpu_sched_component_random_create, NULL, + STARPU_SCHED_SIMPLE_DECIDE_WORKERS | + STARPU_SCHED_SIMPLE_COMBINED_WORKERS | + STARPU_SCHED_SIMPLE_FIFO_ABOVE | + STARPU_SCHED_SIMPLE_FIFOS_BELOW | + STARPU_SCHED_SIMPLE_IMPL, sched_ctx_id); +} + +struct starpu_sched_policy _starpu_sched_modular_parallel_random_policy = +{ + .init_sched = initialize_parallel_random_fifo_center_policy, + .deinit_sched = starpu_sched_tree_deinitialize, + .add_workers = starpu_sched_tree_add_workers, + .remove_workers = starpu_sched_tree_remove_workers, + .push_task = starpu_sched_tree_push_task, + .pop_task = starpu_sched_tree_pop_task, + .pre_exec_hook = NULL, + .post_exec_hook = NULL, + .policy_name = "modular-prandom", + .policy_description = "prandom modular policy", + .worker_type = STARPU_WORKER_LIST, +}; + +/* Random scheduler with a priority queue for its scheduling window */ + +static void initialize_parallel_random_prio_center_policy(unsigned sched_ctx_id) +{ + starpu_sched_component_initialize_simple_scheduler((starpu_sched_component_create_t) starpu_sched_component_random_create, NULL, + STARPU_SCHED_SIMPLE_DECIDE_WORKERS | + STARPU_SCHED_SIMPLE_COMBINED_WORKERS | + STARPU_SCHED_SIMPLE_FIFO_ABOVE | + STARPU_SCHED_SIMPLE_FIFOS_BELOW | + STARPU_SCHED_SIMPLE_FIFOS_BELOW_PRIO | + STARPU_SCHED_SIMPLE_IMPL, sched_ctx_id); +} + +struct starpu_sched_policy _starpu_sched_modular_parallel_random_prio_policy = +{ + .init_sched = initialize_parallel_random_prio_center_policy, + .deinit_sched = starpu_sched_tree_deinitialize, + .add_workers = starpu_sched_tree_add_workers, + .remove_workers = starpu_sched_tree_remove_workers, + .push_task = starpu_sched_tree_push_task, + .pop_task = starpu_sched_tree_pop_task, + .pre_exec_hook = NULL, + .post_exec_hook = NULL, + .policy_name = "modular-prandom-prio", + .policy_description = "prandom-prio modular policy", + .worker_type = STARPU_WORKER_LIST, + .prefetches = 1, +}; diff --git a/src/sched_policies/modular_prio.c b/src/sched_policies/modular_prio.c new file mode 100644 index 0000000..dd23b9f --- /dev/null +++ b/src/sched_policies/modular_prio.c @@ -0,0 +1,43 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include + +static void starpu_initialize_prio_center_policy(unsigned sched_ctx_id) +{ + starpu_sched_component_initialize_simple_scheduler((starpu_sched_component_create_t) starpu_sched_component_eager_create, NULL, + STARPU_SCHED_SIMPLE_DECIDE_WORKERS | + STARPU_SCHED_SIMPLE_FIFO_ABOVE | + STARPU_SCHED_SIMPLE_FIFO_ABOVE_PRIO | + STARPU_SCHED_SIMPLE_IMPL, sched_ctx_id); +} + +struct starpu_sched_policy _starpu_sched_modular_prio_policy = +{ + .init_sched = starpu_initialize_prio_center_policy, + .deinit_sched = starpu_sched_tree_deinitialize, + .add_workers = starpu_sched_tree_add_workers, + .remove_workers = starpu_sched_tree_remove_workers, + .push_task = starpu_sched_tree_push_task, + .pop_task = starpu_sched_tree_pop_task, + .pre_exec_hook = starpu_sched_component_worker_pre_exec_hook, + .post_exec_hook = starpu_sched_component_worker_post_exec_hook, + .policy_name = "modular-prio", + .policy_description = "prio modular policy", + .worker_type = STARPU_WORKER_LIST, +}; diff --git a/src/sched_policies/modular_prio_prefetching.c b/src/sched_policies/modular_prio_prefetching.c new file mode 100644 index 0000000..1c63bb1 --- /dev/null +++ b/src/sched_policies/modular_prio_prefetching.c @@ -0,0 +1,87 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include + +/* Just as documentation example, here is the detailed equivalent of the + * starpu_sched_component_initialize_simple_scheduler call below */ +#if 0 +static void initialize_prio_prefetching_center_policy(unsigned sched_ctx_id) +{ + struct starpu_sched_tree *t; + struct starpu_sched_component * eager_component; + + t = starpu_sched_tree_create(sched_ctx_id); + t->root = starpu_sched_component_prio_create(t, NULL); + eager_component = starpu_sched_component_eager_create(t, NULL); + + starpu_sched_component_connect(t->root, eager_component); + + struct starpu_sched_component_prio_data prio_data = + { + .ntasks_threshold = starpu_getenv_number_default("STARPU_NTASKS_THRESHOLD", _STARPU_SCHED_NTASKS_THRESHOLD_DEFAULT), + .exp_len_threshold = starpu_getenv_float_default("STARPU_EXP_LEN_THRESHOLD", _STARPU_SCHED_EXP_LEN_THRESHOLD_DEFAULT), + }; + + unsigned i; + for(i = 0; i < starpu_worker_get_count() + starpu_combined_worker_get_count(); i++) + { + struct starpu_sched_component * worker_component = starpu_sched_component_worker_new(sched_ctx_id, i); + struct starpu_sched_component * prio_component = starpu_sched_component_prio_create(t, &prio_data); + + starpu_sched_component_connect(prio_component, worker_component); + starpu_sched_component_connect(eager_component, prio_component); + } + starpu_sched_tree_update_workers(t); + starpu_sched_ctx_set_policy_data(sched_ctx_id, (void*)t); + + /* The application may use any integer */ + if (starpu_sched_ctx_min_priority_is_set(sched_ctx_id) == 0) + starpu_sched_ctx_set_min_priority(sched_ctx_id, INT_MIN); + if (starpu_sched_ctx_max_priority_is_set(sched_ctx_id) == 0) + starpu_sched_ctx_set_max_priority(sched_ctx_id, INT_MAX); +} +#endif + +static void initialize_prio_prefetching_center_policy(unsigned sched_ctx_id) +{ + starpu_sched_component_initialize_simple_scheduler((starpu_sched_component_create_t) starpu_sched_component_eager_create, NULL, + STARPU_SCHED_SIMPLE_DECIDE_WORKERS | + STARPU_SCHED_SIMPLE_FIFO_ABOVE | + STARPU_SCHED_SIMPLE_FIFO_ABOVE_PRIO | + STARPU_SCHED_SIMPLE_FIFOS_BELOW | + STARPU_SCHED_SIMPLE_FIFOS_BELOW_PRIO | + STARPU_SCHED_SIMPLE_FIFOS_BELOW_READY | + STARPU_SCHED_SIMPLE_IMPL, sched_ctx_id); +} + +struct starpu_sched_policy _starpu_sched_modular_prio_prefetching_policy = +{ + .init_sched = initialize_prio_prefetching_center_policy, + .deinit_sched = starpu_sched_tree_deinitialize, + .add_workers = starpu_sched_tree_add_workers, + .remove_workers = starpu_sched_tree_remove_workers, + .push_task = starpu_sched_tree_push_task, + .pop_task = starpu_sched_tree_pop_task, + .pre_exec_hook = starpu_sched_component_worker_pre_exec_hook, + .post_exec_hook = starpu_sched_component_worker_post_exec_hook, + .policy_name = "modular-prio-prefetching", + .policy_description = "prio prefetching modular policy", + .worker_type = STARPU_WORKER_LIST, + .prefetches = 1, +}; diff --git a/src/sched_policies/modular_random.c b/src/sched_policies/modular_random.c new file mode 100644 index 0000000..053eddb --- /dev/null +++ b/src/sched_policies/modular_random.c @@ -0,0 +1,71 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Simon Archipoff + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include + +/* Random scheduler with a fifo queue for its scheduling window */ + +static void initialize_random_fifo_center_policy(unsigned sched_ctx_id) +{ + starpu_sched_component_initialize_simple_scheduler((starpu_sched_component_create_t) starpu_sched_component_random_create, NULL, + STARPU_SCHED_SIMPLE_DECIDE_WORKERS | + STARPU_SCHED_SIMPLE_FIFO_ABOVE | + STARPU_SCHED_SIMPLE_IMPL, sched_ctx_id); +} + +struct starpu_sched_policy _starpu_sched_modular_random_policy = +{ + .init_sched = initialize_random_fifo_center_policy, + .deinit_sched = starpu_sched_tree_deinitialize, + .add_workers = starpu_sched_tree_add_workers, + .remove_workers = starpu_sched_tree_remove_workers, + .push_task = starpu_sched_tree_push_task, + .pop_task = starpu_sched_tree_pop_task, + .pre_exec_hook = NULL, + .post_exec_hook = NULL, + .policy_name = "modular-random", + .policy_description = "random modular policy", + .worker_type = STARPU_WORKER_LIST, +}; + +/* Random scheduler with a priority queue for its scheduling window */ + +static void initialize_random_prio_center_policy(unsigned sched_ctx_id) +{ + starpu_sched_component_initialize_simple_scheduler((starpu_sched_component_create_t) starpu_sched_component_random_create, NULL, + STARPU_SCHED_SIMPLE_DECIDE_WORKERS | + STARPU_SCHED_SIMPLE_FIFO_ABOVE | + STARPU_SCHED_SIMPLE_FIFO_ABOVE_PRIO | + STARPU_SCHED_SIMPLE_IMPL, sched_ctx_id); +} + +struct starpu_sched_policy _starpu_sched_modular_random_prio_policy = +{ + .init_sched = initialize_random_prio_center_policy, + .deinit_sched = starpu_sched_tree_deinitialize, + .add_workers = starpu_sched_tree_add_workers, + .remove_workers = starpu_sched_tree_remove_workers, + .push_task = starpu_sched_tree_push_task, + .pop_task = starpu_sched_tree_pop_task, + .pre_exec_hook = NULL, + .post_exec_hook = NULL, + .policy_name = "modular-random-prio", + .policy_description = "random-prio modular policy", + .worker_type = STARPU_WORKER_LIST, +}; diff --git a/src/sched_policies/modular_random_prefetching.c b/src/sched_policies/modular_random_prefetching.c new file mode 100644 index 0000000..2825c98 --- /dev/null +++ b/src/sched_policies/modular_random_prefetching.c @@ -0,0 +1,80 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Simon Archipoff + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include + +#define _STARPU_SCHED_NTASKS_THRESHOLD_DEFAULT 2 +#define _STARPU_SCHED_EXP_LEN_THRESHOLD_DEFAULT 1000000000.0 + +/* Random scheduler with fifo queues for its scheduling window and its workers. */ + +static void initialize_random_fifo_prefetching_center_policy(unsigned sched_ctx_id) +{ + starpu_sched_component_initialize_simple_scheduler((starpu_sched_component_create_t) starpu_sched_component_random_create, NULL, + STARPU_SCHED_SIMPLE_DECIDE_WORKERS | + STARPU_SCHED_SIMPLE_FIFO_ABOVE | + STARPU_SCHED_SIMPLE_FIFOS_BELOW | + STARPU_SCHED_SIMPLE_FIFOS_BELOW_READY | + STARPU_SCHED_SIMPLE_IMPL, sched_ctx_id); +} + +struct starpu_sched_policy _starpu_sched_modular_random_prefetching_policy = +{ + .init_sched = initialize_random_fifo_prefetching_center_policy, + .deinit_sched = starpu_sched_tree_deinitialize, + .add_workers = starpu_sched_tree_add_workers, + .remove_workers = starpu_sched_tree_remove_workers, + .push_task = starpu_sched_tree_push_task, + .pop_task = starpu_sched_tree_pop_task, + .pre_exec_hook = NULL, + .post_exec_hook = NULL, + .policy_name = "modular-random-prefetching", + .policy_description = "random prefetching modular policy", + .worker_type = STARPU_WORKER_LIST, +}; + +/* Random scheduler with priority queues for its scheduling window and its workers. */ + +static void initialize_random_prio_prefetching_center_policy(unsigned sched_ctx_id) +{ + starpu_sched_component_initialize_simple_scheduler((starpu_sched_component_create_t) starpu_sched_component_random_create, NULL, + STARPU_SCHED_SIMPLE_DECIDE_WORKERS | + STARPU_SCHED_SIMPLE_FIFO_ABOVE | + STARPU_SCHED_SIMPLE_FIFO_ABOVE_PRIO | + STARPU_SCHED_SIMPLE_FIFOS_BELOW | + STARPU_SCHED_SIMPLE_FIFOS_BELOW_PRIO | + STARPU_SCHED_SIMPLE_FIFOS_BELOW_READY | + STARPU_SCHED_SIMPLE_IMPL, sched_ctx_id); +} + +struct starpu_sched_policy _starpu_sched_modular_random_prio_prefetching_policy = +{ + .init_sched = initialize_random_prio_prefetching_center_policy, + .deinit_sched = starpu_sched_tree_deinitialize, + .add_workers = starpu_sched_tree_add_workers, + .remove_workers = starpu_sched_tree_remove_workers, + .push_task = starpu_sched_tree_push_task, + .pop_task = starpu_sched_tree_pop_task, + .pre_exec_hook = NULL, + .post_exec_hook = NULL, + .policy_name = "modular-random-prio-prefetching", + .policy_description = "random-prio prefetching modular policy", + .worker_type = STARPU_WORKER_LIST, + .prefetches = 1, +}; diff --git a/src/sched_policies/modular_ws.c b/src/sched_policies/modular_ws.c new file mode 100644 index 0000000..e833bcc --- /dev/null +++ b/src/sched_policies/modular_ws.c @@ -0,0 +1,44 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Simon Archipoff + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include + + +static void initialize_ws_center_policy(unsigned sched_ctx_id) +{ + starpu_sched_component_initialize_simple_scheduler((starpu_sched_component_create_t) starpu_sched_component_work_stealing_create, NULL, + STARPU_SCHED_SIMPLE_DECIDE_WORKERS | + STARPU_SCHED_SIMPLE_WS_BELOW | + STARPU_SCHED_SIMPLE_IMPL, sched_ctx_id); +} + + +struct starpu_sched_policy _starpu_sched_modular_ws_policy = +{ + .init_sched = initialize_ws_center_policy, + .deinit_sched = starpu_sched_tree_deinitialize, + .add_workers = starpu_sched_tree_add_workers, + .remove_workers = starpu_sched_tree_remove_workers, + .push_task = starpu_sched_tree_work_stealing_push_task, + .pop_task = starpu_sched_tree_pop_task, + .pre_exec_hook = NULL, + .post_exec_hook = NULL, + .policy_name = "modular-ws", + .policy_description = "work stealing modular policy", + .worker_type = STARPU_WORKER_LIST, +}; diff --git a/src/sched_policies/parallel_eager.c b/src/sched_policies/parallel_eager.c new file mode 100644 index 0000000..ba890e6 --- /dev/null +++ b/src/sched_policies/parallel_eager.c @@ -0,0 +1,369 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Thibaut Lambert + * Copyright (C) 2011-2011 Télécom Sud Paris + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include + +struct _starpu_peager_common_data +{ + int possible_combinations_cnt[STARPU_NMAXWORKERS]; + int *possible_combinations[STARPU_NMAXWORKERS]; + int *possible_combinations_size[STARPU_NMAXWORKERS]; + int max_combination_size[STARPU_NMAXWORKERS]; + int no_combined_workers; + int ref_count; +}; + +static struct _starpu_peager_common_data *_peager_common_data = NULL; + +struct _starpu_peager_data +{ + starpu_pthread_mutex_t policy_mutex; + struct starpu_st_fifo_taskq fifo; + struct starpu_st_fifo_taskq local_fifo[STARPU_NMAXWORKERS]; +}; + +static void initialize_peager_common(void) +{ + if (_peager_common_data == NULL) + { + struct _starpu_peager_common_data *common_data = NULL; + _STARPU_CALLOC(common_data, 1, sizeof(struct _starpu_peager_common_data)); + common_data->ref_count = 1; + _peager_common_data = common_data; + + const unsigned nbasic_workers = starpu_worker_get_count(); + unsigned i; + + starpu_sched_find_all_worker_combinations(); + const unsigned ncombined_workers = starpu_combined_worker_get_count(); + common_data->no_combined_workers = ncombined_workers == 0; + + for(i = 0; i < nbasic_workers; i++) + { + common_data->possible_combinations_cnt[i] = 0; + int cnt = common_data->possible_combinations_cnt[i]++; + /* Allocate ncombined_workers + 1 for the singleton worker itself */ + _STARPU_CALLOC(common_data->possible_combinations[i], 1+ncombined_workers, sizeof(int)); + _STARPU_CALLOC(common_data->possible_combinations_size[i], 1+ncombined_workers, sizeof(int)); + common_data->possible_combinations[i][cnt] = i; + common_data->possible_combinations_size[i][cnt] = 1; + common_data->max_combination_size[i] = 1; + } + + for (i = 0; i < ncombined_workers; i++) + { + unsigned combined_workerid = nbasic_workers + i; + int *workers; + int size; + starpu_combined_worker_get_description(combined_workerid, &size, &workers); + int master = workers[0]; + if (size > common_data->max_combination_size[master]) + { + common_data->max_combination_size[master] = size; + } + int cnt = common_data->possible_combinations_cnt[master]++; + common_data->possible_combinations[master][cnt] = combined_workerid; + common_data->possible_combinations_size[master][cnt] = size; + } + } + else + { + _peager_common_data->ref_count++; + } +} + +static void deinitialize_peager_common(void) +{ + STARPU_ASSERT(_peager_common_data != NULL); + _peager_common_data->ref_count--; + if (_peager_common_data->ref_count == 0) + { + const unsigned nbasic_workers = starpu_worker_get_count(); + unsigned i; + for(i = 0; i < nbasic_workers; i++) + { + free(_peager_common_data->possible_combinations[i]); + _peager_common_data->possible_combinations[i] = NULL; + free(_peager_common_data->possible_combinations_size[i]); + _peager_common_data->possible_combinations_size[i] = NULL; + } + free(_peager_common_data); + _peager_common_data = NULL; + } + +} + +static void peager_add_workers(unsigned sched_ctx_id, int *workerids, unsigned nworkers) +{ + if (sched_ctx_id == 0) + { + /* FIXME Fix scheduling contexts initialization or combined + * worker management, to make the initialize_peager_common() + * call to work right from initialize_peager_policy. For now, + * this fails because it causes combined workers to be generated + * too early. */ + initialize_peager_common(); + } + struct _starpu_peager_data *data = (struct _starpu_peager_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); + unsigned i; + + for(i = 0; i < nworkers; i++) + { + unsigned workerid = workerids[i]; + if(starpu_worker_is_combined_worker(workerid)) + { + continue; + } + starpu_sched_ctx_worker_shares_tasks_lists(workerid, sched_ctx_id); + + /* slaves pick up tasks from their local queue, their master + * will put tasks directly in that local list when a parallel + * tasks comes. */ + starpu_st_fifo_taskq_init(&data->local_fifo[workerid]); + } +} + +static void peager_remove_workers(unsigned sched_ctx_id, int *workerids STARPU_ATTRIBUTE_UNUSED, unsigned nworkers STARPU_ATTRIBUTE_UNUSED) +{ + if (sched_ctx_id == 0) + { + deinitialize_peager_common(); + } +} + +static void initialize_peager_policy(unsigned sched_ctx_id) +{ + struct _starpu_peager_data *data; + _STARPU_CALLOC(data, 1, sizeof(struct _starpu_peager_data)); + + _STARPU_DISP("Warning: the peager scheduler is mostly a proof of concept and not really very optimized\n"); + + /* masters pick tasks from that queue */ + starpu_st_fifo_taskq_init(&data->fifo); + + starpu_sched_ctx_set_policy_data(sched_ctx_id, (void*)data); + STARPU_PTHREAD_MUTEX_INIT(&data->policy_mutex, NULL); +} + +static void deinitialize_peager_policy(unsigned sched_ctx_id) +{ + /* TODO check that there is no task left in the queue */ + struct _starpu_peager_data *data = (struct _starpu_peager_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); + + STARPU_PTHREAD_MUTEX_DESTROY(&data->policy_mutex); + + free(data); +} + +static int push_task_peager_policy(struct starpu_task *task) +{ + unsigned sched_ctx_id = task->sched_ctx; + int ret_val; + + struct _starpu_peager_data *data = (struct _starpu_peager_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); + + STARPU_PTHREAD_MUTEX_LOCK(&data->policy_mutex); + ret_val = starpu_st_fifo_taskq_push_task(&data->fifo, task); +#ifndef STARPU_NON_BLOCKING_DRIVERS + int is_parallel_task = task->cl && task->cl->max_parallelism > 1; +#endif + starpu_push_task_end(task); + STARPU_PTHREAD_MUTEX_UNLOCK(&data->policy_mutex); + +#ifndef STARPU_NON_BLOCKING_DRIVERS + struct _starpu_peager_common_data *common_data = _peager_common_data; + /* if there are no tasks block */ + /* wake people waiting for a task */ + struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id); + + struct starpu_sched_ctx_iterator it; + workers->init_iterator(workers, &it); + while(workers->has_next(workers, &it)) + { + int workerid = workers->get_next(workers, &it); + /* If this is not a CPU then the workerid simply grabs tasks from the fifo */ + if (starpu_worker_is_combined_worker(workerid)) + { + continue; + } + if (starpu_worker_get_type(workerid) != STARPU_CPU_WORKER) + { + starpu_wake_worker_relax_light(workerid); + continue; + } + if ((!is_parallel_task) /* This is not a parallel task, can wake any workerid */ + || (common_data->no_combined_workers) /* There is no combined workerid */ + || (common_data->max_combination_size[workerid] > 1) /* This is a combined workerid master and the task is parallel */ + ) + { + starpu_wake_worker_relax_light(workerid); + } + } +#endif + + return ret_val; +} + +static struct starpu_task *pop_task_peager_policy(unsigned sched_ctx_id) +{ + struct _starpu_peager_common_data *common_data = _peager_common_data; + struct _starpu_peager_data *data = (struct _starpu_peager_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); + + int workerid = starpu_worker_get_id_check(); + + /* If this is not a CPU then the worker simply grabs tasks from the fifo */ + if (starpu_worker_get_type(workerid) != STARPU_CPU_WORKER) + { + struct starpu_task *task; + starpu_worker_relax_on(); + STARPU_PTHREAD_MUTEX_LOCK(&data->policy_mutex); + starpu_worker_relax_off(); + task = starpu_st_fifo_taskq_pop_task(&data->fifo, workerid); + STARPU_PTHREAD_MUTEX_UNLOCK(&data->policy_mutex); + + return task; + } + + struct starpu_task *task; + int slave_task = 0; + starpu_worker_relax_on(); + STARPU_PTHREAD_MUTEX_LOCK(&data->policy_mutex); + starpu_worker_relax_off(); + /* check if a slave task is available in the local queue */ + task = starpu_st_fifo_taskq_pop_task(&data->local_fifo[workerid], workerid); + if (!task) + { + /* no slave task, try to pop a task as master */ + task = starpu_st_fifo_taskq_pop_task(&data->fifo, workerid); + if (task) + { + _STARPU_DEBUG("poping master task %p\n", task); + } + +#if 1 + /* Optional heuristic to filter out purely slave workers for parallel tasks */ + if (task && task->cl && task->cl->max_parallelism > 1 && common_data->max_combination_size[workerid] == 1 && !common_data->no_combined_workers) + { + /* task is potentially parallel, leave it for a combined worker master */ + _STARPU_DEBUG("pushing back master task %p\n", task); + starpu_st_fifo_taskq_push_back_task(&data->fifo, task); + task = NULL; + } +#endif + } + else + { + slave_task = 1; + _STARPU_DEBUG("poping slave task %p\n", task); + } + if (!task || slave_task) + { + STARPU_PTHREAD_MUTEX_UNLOCK(&data->policy_mutex); + goto ret; + } + /* Find the largest compatible worker combination */ + int best_size = -1; + int best_workerid = -1; + int i; + for (i = 0; i < common_data->possible_combinations_cnt[workerid]; i++) + { + if (common_data->possible_combinations_size[workerid][i] > best_size) + { + int combined_worker = common_data->possible_combinations[workerid][i]; + if (starpu_combined_worker_can_execute_task(combined_worker, task, 0)) + { + best_size = common_data->possible_combinations_size[workerid][i]; + best_workerid = combined_worker; + } + } + } + _STARPU_DEBUG("task %p, best_workerid=%d, best_size=%d\n", task, best_workerid, best_size); + + /* In case nobody can execute this task, we let the master + * worker take it anyway, so that it can discard it afterward. + * */ + if (best_workerid == -1) + { + STARPU_PTHREAD_MUTEX_UNLOCK(&data->policy_mutex); + goto ret; + } + + /* Is this a basic worker or a combined worker ? */ + if (best_workerid < (int) starpu_worker_get_count()) + { + STARPU_PTHREAD_MUTEX_UNLOCK(&data->policy_mutex); + /* The master is alone */ + goto ret; + } + starpu_parallel_task_barrier_init(task, best_workerid); + int worker_size = 0; + int *combined_workerid; + starpu_combined_worker_get_description(best_workerid, &worker_size, &combined_workerid); + + _STARPU_DEBUG("dispatching task %p on combined worker %d of size %d\n", task, best_workerid, worker_size); + /* Dispatch task aliases to the different slaves */ + for (i = 1; i < worker_size; i++) + { + struct starpu_task *alias = starpu_task_dup(task); + int local_worker = combined_workerid[i]; + alias->destroy = 1; + _STARPU_TRACE_JOB_PUSH(alias, alias->priority > 0); + starpu_st_fifo_taskq_push_task(&data->local_fifo[local_worker], alias); + } + + /* The master also manipulated an alias */ + struct starpu_task *master_alias = starpu_task_dup(task); + master_alias->destroy = 1; + task = master_alias; + + STARPU_PTHREAD_MUTEX_UNLOCK(&data->policy_mutex); + + _STARPU_TRACE_JOB_PUSH(master_alias, master_alias->priority > 0); + + for (i = 1; i < worker_size; i++) + { + int local_worker = combined_workerid[i]; + starpu_worker_lock(local_worker); +#if !defined(STARPU_NON_BLOCKING_DRIVERS) || defined(STARPU_SIMGRID) + starpu_wake_worker_locked(local_worker); +#endif + starpu_worker_unlock(local_worker); + } + +ret: + return task; +} + +struct starpu_sched_policy _starpu_sched_peager_policy = +{ + .init_sched = initialize_peager_policy, + .deinit_sched = deinitialize_peager_policy, + .add_workers = peager_add_workers, + .remove_workers = peager_remove_workers, + .push_task = push_task_peager_policy, + .pop_task = pop_task_peager_policy, + .pre_exec_hook = NULL, + .post_exec_hook = NULL, + .policy_name = "peager", + .policy_description = "parallel eager policy", + .worker_type = STARPU_WORKER_LIST, +}; diff --git a/src/sched_policies/parallel_heft.c b/src/sched_policies/parallel_heft.c new file mode 100644 index 0000000..bd21af3 --- /dev/null +++ b/src/sched_policies/parallel_heft.c @@ -0,0 +1,615 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Thibaut Lambert + * Copyright (C) 2011-2011 Télécom Sud Paris + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* Distributed queues using performance modeling to assign tasks */ + +#include +#include +#include +#include +#include +#include +#include + +#ifndef DBL_MIN +#define DBL_MIN __DBL_MIN__ +#endif + +#ifndef DBL_MAX +#define DBL_MAX __DBL_MAX__ +#endif + +/* if no priority is set when creating the scheduling context, we use the following ones */ +#define DEFAULT_MIN_PRIORITY 0 +#define DEFAULT_MAX_PRIORITY 1 + +//static unsigned ncombinedworkers; +//static enum starpu_perfmodel_archtype applicable_perf_archtypes[STARPU_NARCH_VARIATIONS]; +//static unsigned napplicable_perf_archtypes = 0; + +/* + * Here are the default values of alpha, beta, gamma + */ + +#define _STARPU_SCHED_ALPHA_DEFAULT 1.0 +#define _STARPU_SCHED_BETA_DEFAULT 1.0 +#define _STARPU_SCHED_GAMMA_DEFAULT 1000.0 + +struct _starpu_pheft_data +{ + double alpha; + double beta; + double _gamma; + double idle_power; +/* When we push a task on a combined worker we need all the cpu workers it contains + * to be locked at once */ + starpu_pthread_mutex_t global_push_mutex; +}; + +static double worker_exp_start[STARPU_NMAXWORKERS]; +static double worker_exp_end[STARPU_NMAXWORKERS]; +static double worker_exp_len[STARPU_NMAXWORKERS]; +static int ntasks[STARPU_NMAXWORKERS]; + +/*!!!!!!! + * It doesn't work with several contexts because the combined workers + * are constructed from the workers available to the program, and not + * to the context !!!!!!!!!!!!!!!!!!!!!!! +*/ + +static void parallel_heft_pre_exec_hook(struct starpu_task *task, unsigned sched_ctx_id STARPU_ATTRIBUTE_UNUSED) +{ + if (!task->cl || task->execute_on_a_specific_worker) + return; + + unsigned workerid = starpu_worker_get_id_check(); + double model = task->predicted; + double transfer_model = task->predicted_transfer; + const double now = starpu_timing_now(); + + if (isnan(model)) + model = 0.0; + + if (isnan(transfer_model)) + transfer_model = 0.0; + + /* Once we have started the task, we can update the predicted amount + * of work. */ + starpu_worker_lock_self(); + worker_exp_len[workerid] -= model + transfer_model; + worker_exp_start[workerid] = now + model; + worker_exp_end[workerid] = worker_exp_start[workerid] + worker_exp_len[workerid]; + ntasks[workerid]--; + starpu_worker_unlock_self(); +} + +static void parallel_heft_post_exec_hook(struct starpu_task *task STARPU_ATTRIBUTE_UNUSED, unsigned sched_ctx_id STARPU_ATTRIBUTE_UNUSED) +{ + unsigned workerid = starpu_worker_get_id_check(); + const double now = starpu_timing_now(); + + /* Once we have executed the task, we can update the predicted amount + * of work. */ + starpu_worker_lock_self(); + worker_exp_start[workerid] = now; + worker_exp_end[workerid] = worker_exp_start[workerid] + worker_exp_len[workerid]; + starpu_worker_unlock_self(); +} + +static int push_task_on_best_worker(struct starpu_task *task, int best_workerid, double exp_start_predicted, double exp_end_predicted, int prio, unsigned sched_ctx_id) +{ + /* make sure someone coule execute that task ! */ + STARPU_ASSERT(best_workerid != -1); + + struct _starpu_pheft_data *hd = (struct _starpu_pheft_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); + + if (starpu_get_prefetch_flag()) + starpu_prefetch_task_input_for(task, best_workerid); + + int ret = 0; + + if (!starpu_worker_is_combined_worker(best_workerid)) + { + starpu_worker_lock(best_workerid); + task->predicted = exp_end_predicted - exp_start_predicted; + /* TODO */ + task->predicted_transfer = 0; + worker_exp_len[best_workerid] += task->predicted; + worker_exp_end[best_workerid] = exp_end_predicted; + worker_exp_start[best_workerid] = exp_end_predicted - worker_exp_len[best_workerid]; + + ntasks[best_workerid]++; + starpu_worker_unlock(best_workerid); + + /* We don't want it to interlace its task with a combined + * worker's one */ + starpu_worker_relax_on(); + STARPU_PTHREAD_MUTEX_LOCK(&hd->global_push_mutex); + starpu_worker_relax_off(); + + ret = starpu_push_local_task(best_workerid, task, prio); + + STARPU_PTHREAD_MUTEX_UNLOCK(&hd->global_push_mutex); + } + else + { + /* This task doesn't belong to an actual worker, it belongs + * to a combined worker and thus the scheduler doesn't care + * of its predicted values which are insignificant */ + task->predicted = 0; + task->predicted_transfer = 0; + + starpu_parallel_task_barrier_init(task, best_workerid); + int worker_size = 0; + int *combined_workerid; + starpu_combined_worker_get_description(best_workerid, &worker_size, &combined_workerid); + + /* All cpu workers must be locked at once */ + starpu_worker_relax_on(); + STARPU_PTHREAD_MUTEX_LOCK(&hd->global_push_mutex); + starpu_worker_relax_off(); + + /* This is a combined worker so we create task aliases */ + int i; + for (i = 0; i < worker_size; i++) + { + struct starpu_task *alias = starpu_task_dup(task); + int local_combined_workerid = combined_workerid[i]; + + alias->predicted = exp_end_predicted - worker_exp_end[local_combined_workerid]; + /* TODO */ + alias->predicted_transfer = 0; + alias->destroy = 1; + starpu_worker_lock(local_combined_workerid); + worker_exp_len[local_combined_workerid] += alias->predicted; + worker_exp_end[local_combined_workerid] = exp_end_predicted; + worker_exp_start[local_combined_workerid] = exp_end_predicted - worker_exp_len[local_combined_workerid]; + + ntasks[local_combined_workerid]++; + starpu_worker_unlock(local_combined_workerid); + + _STARPU_TRACE_JOB_PUSH(alias, alias->priority > 0); + ret |= starpu_push_local_task(local_combined_workerid, alias, prio); + } + + STARPU_PTHREAD_MUTEX_UNLOCK(&hd->global_push_mutex); + + } + + return ret; +} + +static double compute_expected_end(double *_worker_exp_end, int workerid) +{ + if (!starpu_worker_is_combined_worker(workerid)) + { + double res; + /* This is a basic worker */ + + res = _worker_exp_end[workerid]; + + return res; + } + else + { + /* This is a combined worker, the expected end is the end for the latest worker */ + int worker_size; + int *combined_workerid; + starpu_combined_worker_get_description(workerid, &worker_size, &combined_workerid); + + double exp_end = DBL_MIN; + + int i; + for (i = 0; i < worker_size; i++) + { + double local_exp_end = _worker_exp_end[combined_workerid[i]]; + exp_end = STARPU_MAX(exp_end, local_exp_end); + } + + return exp_end; + } +} + +static double compute_ntasks_end(int workerid, unsigned sched_ctx_id) +{ + struct starpu_perfmodel_arch* perf_arch = starpu_worker_get_perf_archtype(workerid, sched_ctx_id); + + if (!starpu_worker_is_combined_worker(workerid)) + { + double res; + /* This is a basic worker */ + + /* Here helgrind would shout that this is unprotected, but we + * are fine with getting outdated values, this is just an + * estimation */ + res = ntasks[workerid] / starpu_worker_get_relative_speedup(perf_arch); + + return res; + } + else + { + /* This is a combined worker, the expected end is the end for the latest worker */ + int worker_size; + int *combined_workerid; + starpu_combined_worker_get_description(workerid, &worker_size, &combined_workerid); + + int ntasks_end=0; + + /* Here helgrind would shout that this is unprotected, but we + * are fine with getting outdated values, this is just an + * estimation */ + int i; + for (i = 0; i < worker_size; i++) + { + /* XXX: this is actually bogus: not all pushed tasks are necessarily parallel... */ + ntasks_end = STARPU_MAX(ntasks_end, (int) ((double) ntasks[combined_workerid[i]] / starpu_worker_get_relative_speedup(perf_arch))); + } + + return ntasks_end; + } +} + +static int _parallel_heft_push_task(struct starpu_task *task, unsigned prio, unsigned sched_ctx_id) +{ + struct _starpu_pheft_data *hd = (struct _starpu_pheft_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); + + struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id); + unsigned nworkers_ctx = workers->nworkers; + + unsigned workerid, worker_ctx = 0; + int best = -1, best_id_ctx = -1; + + /* this flag is set if the corresponding workerid is selected because + there is no performance prediction available yet */ + int forced_best = -1, forced_best_ctx = -1, forced_nimpl = -1; + + double local_task_length[nworkers_ctx][STARPU_MAXIMPLEMENTATIONS]; + double local_data_penalty[nworkers_ctx][STARPU_MAXIMPLEMENTATIONS]; + double local_energy[nworkers_ctx][STARPU_MAXIMPLEMENTATIONS]; + double local_exp_end[nworkers_ctx][STARPU_MAXIMPLEMENTATIONS]; + double fitness[nworkers_ctx][STARPU_MAXIMPLEMENTATIONS]; + + double max_exp_end = 0.0; + + int skip_worker[nworkers_ctx][STARPU_MAXIMPLEMENTATIONS]; + + double best_exp_start; + double best_exp_end = DBL_MAX; + //double penality_best = 0.0; + + int ntasks_best = -1, ntasks_best_ctx = -1, nimpl_best = -1; + double ntasks_best_end = 0.0; + int calibrating = 0; + + /* A priori, we know all estimations */ + int unknown = 0; + struct starpu_sched_ctx_iterator it; + + double now = starpu_timing_now(); + double _worker_exp_end[nworkers_ctx]; + + memset(skip_worker, 0, nworkers_ctx*STARPU_MAXIMPLEMENTATIONS*sizeof(int)); + + workers->init_iterator(workers, &it); + while(workers->has_next(workers, &it)) + { + workerid = workers->get_next(workers, &it); + + if(!starpu_worker_is_combined_worker(workerid)) + { + /* Here helgrind would shout that this is unprotected, but we + * are fine with getting outdated values, this is just an + * estimation */ + /* Sometimes workers didn't take the tasks as early as we expected */ + double exp_start = STARPU_MAX(worker_exp_start[workerid], now); + _worker_exp_end[workerid] = exp_start + worker_exp_len[workerid]; + if (_worker_exp_end[workerid] > max_exp_end) + max_exp_end = _worker_exp_end[workerid]; + } + } + + unsigned nimpl; + worker_ctx = 0; + while(workers->has_next(workers, &it)) + { + workerid = workers->get_next(workers, &it); + + for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++) + { + if (!starpu_combined_worker_can_execute_task(workerid, task, nimpl)) + { + /* no one on that queue may execute this task */ + skip_worker[worker_ctx][nimpl] = 1; + continue; + } + else + { + skip_worker[worker_ctx][nimpl] = 0; + } + + + struct starpu_perfmodel_arch* perf_arch = starpu_worker_get_perf_archtype(workerid, sched_ctx_id); + + local_task_length[worker_ctx][nimpl] = starpu_task_expected_length(task, perf_arch,nimpl); + + local_data_penalty[worker_ctx][nimpl] = starpu_task_expected_data_transfer_time_for(task, workerid); + + double ntasks_end = compute_ntasks_end(workerid, sched_ctx_id); + + if (ntasks_best == -1 + || (!calibrating && ntasks_end < ntasks_best_end) /* Not calibrating, take better task */ + || (!calibrating && isnan(local_task_length[worker_ctx][nimpl])) /* Not calibrating but this workerid is being calibrated */ + || (calibrating && isnan(local_task_length[worker_ctx][nimpl]) && ntasks_end < ntasks_best_end) /* Calibrating, compete this workerid with other non-calibrated */ + ) + { + ntasks_best_end = ntasks_end; + ntasks_best = workerid; + ntasks_best_ctx = worker_ctx; + nimpl_best = nimpl; + } + + if (isnan(local_task_length[worker_ctx][nimpl])) + { + static int warned; + STARPU_HG_DISABLE_CHECKING(warned); + if (!warned) + { + warned = 1; + _STARPU_DISP("Warning: performance model for %s not finished calibrating on %u, using a dumb scheduling heuristic for now\n", starpu_task_get_name(task), workerid); + } + /* we are calibrating, we want to speed-up calibration time + * so we privilege non-calibrated tasks (but still + * greedily distribute them to avoid dumb schedules) */ + calibrating = 1; + } + + if (isnan(local_task_length[worker_ctx][nimpl]) + || _STARPU_IS_ZERO(local_task_length[worker_ctx][nimpl])) + /* there is no prediction available for that task + * with that arch yet, so switch to a greedy strategy */ + unknown = 1; + + if (unknown) + continue; + + double task_starting_time = STARPU_MAX( + compute_expected_end(_worker_exp_end, workerid), + now + local_data_penalty[worker_ctx][nimpl]); + + local_exp_end[worker_ctx][nimpl] = task_starting_time + local_task_length[worker_ctx][nimpl]; + + //fprintf(stderr, "WORKER %d -> length %e end %e\n", workerid, local_task_length[worker_ctx][nimpl], local_exp_end[workerid][nimpl]); + + if (local_exp_end[worker_ctx][nimpl] < best_exp_end) + { + /* a better solution was found */ + best_exp_end = local_exp_end[worker_ctx][nimpl]; + nimpl_best = nimpl; + } + + + local_energy[worker_ctx][nimpl] = starpu_task_expected_energy(task, perf_arch,nimpl); + //_STARPU_DEBUG("Scheduler parallel heft: task length (%lf) local energy (%lf) workerid (%u) kernel (%u) \n", local_task_length[workerid],local_energy[workerid],workerid,nimpl); + + if (isnan(local_energy[worker_ctx][nimpl])) + local_energy[worker_ctx][nimpl] = 0.; + + } + worker_ctx++; + } + + if (unknown) + { + forced_best = ntasks_best; + forced_best_ctx = ntasks_best_ctx; + forced_nimpl = nimpl_best; + } + + + if (forced_best == -1) + { + double best_fitness = -1; + worker_ctx = 0; + while(workers->has_next(workers, &it)) + { + workerid = workers->get_next(workers, &it); + + for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++) + { + if (skip_worker[worker_ctx][nimpl]) + { + /* no one on that queue may execute this task */ + continue; + } + + fitness[worker_ctx][nimpl] = hd->alpha*(local_exp_end[worker_ctx][nimpl] - best_exp_end) + + hd->beta*(local_data_penalty[worker_ctx][nimpl]) + + hd->_gamma*(local_energy[worker_ctx][nimpl]); + + if (local_exp_end[worker_ctx][nimpl] > max_exp_end) + /* This placement will make the computation + * longer, take into account the idle + * consumption of other cpus */ + fitness[worker_ctx][nimpl] += hd->_gamma * hd->idle_power * (local_exp_end[worker_ctx][nimpl] - max_exp_end) / 1000000.0; + + if (best == -1 || fitness[worker_ctx][nimpl] < best_fitness) + { + /* we found a better solution */ + best_fitness = fitness[worker_ctx][nimpl]; + best = workerid; + best_id_ctx = worker_ctx; + nimpl_best = nimpl; + } + + // fprintf(stderr, "FITNESS workerid %d -> %e local_exp_end %e - local_data_penalty %e\n", workerid, fitness[workerid][nimpl], local_exp_end[workerid][nimpl] - best_exp_end, local_data_penalty[workerid][nimpl]); + } + worker_ctx++; + } + } + + STARPU_ASSERT(forced_best != -1 || best != -1); + + if (forced_best != -1) + { + /* there is no prediction available for that task + * with that arch we want to speed-up calibration time + * so we force this measurement */ + best = forced_best; + best_id_ctx = forced_best_ctx; + nimpl_best = forced_nimpl; + //penality_best = 0.0; + best_exp_end = compute_expected_end(_worker_exp_end, best); + } + else + { + //penality_best = local_data_penalty[best_id_ctx][nimpl_best]; + STARPU_ASSERT(best_id_ctx != -1); + STARPU_ASSERT(nimpl_best != -1); + best_exp_end = local_exp_end[best_id_ctx][nimpl_best]; + } + best_exp_start = _worker_exp_end[best]; + + //_STARPU_DEBUG("Scheduler parallel heft: kernel (%u)\n", nimpl_best); + starpu_task_set_implementation(task, nimpl_best); + /* we should now have the best workerid in variable "best" */ + starpu_sched_task_break(task); + return push_task_on_best_worker(task, best, best_exp_start, best_exp_end, prio, sched_ctx_id); +} + +static int parallel_heft_push_task(struct starpu_task *task) +{ + unsigned sched_ctx_id = task->sched_ctx; + int ret_val = -1; + + if (task->priority == STARPU_MAX_PRIO) + { + ret_val = _parallel_heft_push_task(task, 1, sched_ctx_id); + return ret_val; + } + + ret_val = _parallel_heft_push_task(task, 0, sched_ctx_id); + return ret_val; +} + +static void parallel_heft_add_workers(__attribute__((unused)) unsigned sched_ctx_id, int *workerids, unsigned nworkers) +{ + unsigned i; + double now = starpu_timing_now(); + for (i = 0; i < nworkers; i++) + { + int workerid = workerids[i]; + struct _starpu_worker *workerarg = _starpu_get_worker_struct(workerid); + /* init these structures only once for each worker */ + if(!workerarg->has_prev_init) + { + worker_exp_start[workerid] = now; + worker_exp_len[workerid] = 0.0; + worker_exp_end[workerid] = worker_exp_start[workerid]; + ntasks[workerid] = 0; + workerarg->has_prev_init = 1; + } + } + _starpu_sched_find_worker_combinations(workerids, nworkers); + +// start_unclear_part: not very clear where this is used +/* struct _starpu_machine_config *config = _starpu_get_machine_config(); */ +/* ncombinedworkers = config->topology.ncombinedworkers; */ + +/* /\* We pre-compute an array of all the perfmodel archs that are applicable *\/ */ +/* unsigned total_worker_count = nworkers + ncombinedworkers; */ + +/* unsigned used_perf_archtypes[STARPU_NARCH_VARIATIONS]; */ +/* memset(used_perf_archtypes, 0, sizeof(used_perf_archtypes)); */ + +/* for (workerid = 0; workerid < total_worker_count; workerid++) */ +/* { */ +/* enum starpu_perfmodel_archtype perf_archtype = starpu_worker_get_perf_archtype(workerid); */ +/* used_perf_archtypes[perf_archtype] = 1; */ +/* } */ + +// end_unclear_part + +// napplicable_perf_archtypes = 0; + +// int arch; +// for (arch = 0; arch < STARPU_NARCH_VARIATIONS; arch++) +// { +// if (used_perf_archtypes[arch]) +// applicable_perf_archtypes[napplicable_perf_archtypes++] = arch; +// } + +} + +static void initialize_parallel_heft_policy(unsigned sched_ctx_id) +{ + struct _starpu_pheft_data *hd; + _STARPU_MALLOC(hd, sizeof(struct _starpu_pheft_data)); + + _STARPU_DISP("Warning: the pheft scheduler is mostly a proof of concept and not really very optimized\n"); + + if (starpu_sched_ctx_min_priority_is_set(sched_ctx_id) == 0) + starpu_sched_ctx_set_min_priority(sched_ctx_id, DEFAULT_MIN_PRIORITY); + if (starpu_sched_ctx_max_priority_is_set(sched_ctx_id) == 0) + starpu_sched_ctx_set_max_priority(sched_ctx_id, DEFAULT_MAX_PRIORITY); + STARPU_ASSERT_MSG(starpu_sched_ctx_get_min_priority(sched_ctx_id) < starpu_sched_ctx_get_max_priority(sched_ctx_id), + "Priority min %d should be lower than priority max %d\n", + starpu_sched_ctx_get_min_priority(sched_ctx_id), starpu_sched_ctx_get_max_priority(sched_ctx_id)); + + starpu_sched_ctx_set_policy_data(sched_ctx_id, (void*)hd); + + hd->alpha = starpu_getenv_float_default("STARPU_SCHED_ALPHA", _STARPU_SCHED_ALPHA_DEFAULT); + hd->beta = starpu_getenv_float_default("STARPU_SCHED_BETA", _STARPU_SCHED_BETA_DEFAULT); +#ifdef STARPU_NON_BLOCKING_DRIVERS + if (starpu_getenv("STARPU_SCHED_GAMMA")) + _STARPU_DISP("Warning: STARPU_SCHED_GAMMA was used, but --enable-blocking-drivers configuration was not set, CPU cores will not actually be sleeping\n"); +#endif + hd->_gamma = starpu_getenv_float_default("STARPU_SCHED_GAMMA", _STARPU_SCHED_GAMMA_DEFAULT); + hd->idle_power = starpu_getenv_float_default("STARPU_IDLE_POWER", 0.0); + + STARPU_PTHREAD_MUTEX_INIT(&hd->global_push_mutex, NULL); + + /* Tell helgrind that we are fine with getting outdated values when + * estimating schedules */ + STARPU_HG_DISABLE_CHECKING(worker_exp_start); + STARPU_HG_DISABLE_CHECKING(worker_exp_end); + STARPU_HG_DISABLE_CHECKING(worker_exp_len); + STARPU_HG_DISABLE_CHECKING(ntasks); +} + +static void parallel_heft_deinit(unsigned sched_ctx_id) +{ + struct _starpu_pheft_data *hd = (struct _starpu_pheft_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); + STARPU_PTHREAD_MUTEX_DESTROY(&hd->global_push_mutex); + free(hd); +} + +struct starpu_sched_policy _starpu_sched_parallel_heft_policy = +{ + .init_sched = initialize_parallel_heft_policy, + .deinit_sched = parallel_heft_deinit, + .add_workers = parallel_heft_add_workers, + .remove_workers = NULL, + .push_task = parallel_heft_push_task, + .pop_task = NULL, + .pre_exec_hook = parallel_heft_pre_exec_hook, + .post_exec_hook = parallel_heft_post_exec_hook, + .policy_name = "pheft", + .policy_description = "parallel HEFT", + .worker_type = STARPU_WORKER_LIST, + .prefetches = 1, +}; diff --git a/src/sched_policies/prio_deque.c b/src/sched_policies/prio_deque.c new file mode 100644 index 0000000..2b4c7ef --- /dev/null +++ b/src/sched_policies/prio_deque.c @@ -0,0 +1,229 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Simon Archipoff + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include + +void starpu_st_prio_deque_init(struct starpu_st_prio_deque *pdeque) +{ + memset(pdeque,0,sizeof(*pdeque)); + starpu_task_prio_list_init(&pdeque->list); + STARPU_HG_DISABLE_CHECKING(pdeque->exp_start); + STARPU_HG_DISABLE_CHECKING(pdeque->exp_end); + STARPU_HG_DISABLE_CHECKING(pdeque->exp_len); +} + +void starpu_st_prio_deque_destroy(struct starpu_st_prio_deque *pdeque) +{ + starpu_task_prio_list_deinit(&pdeque->list); +} + +int starpu_st_prio_deque_is_empty(struct starpu_st_prio_deque *pdeque) +{ + return pdeque->ntasks == 0; +} + +void starpu_st_prio_deque_erase(struct starpu_st_prio_deque *pdeque, struct starpu_task *task) +{ + starpu_task_prio_list_erase(&pdeque->list, task); +} + +int starpu_st_prio_deque_push_front_task(struct starpu_st_prio_deque *pdeque, struct starpu_task *task) +{ + starpu_task_prio_list_push_front(&pdeque->list, task); + pdeque->ntasks++; + return 0; +} + +int starpu_st_prio_deque_push_back_task(struct starpu_st_prio_deque *pdeque, struct starpu_task *task) +{ + starpu_task_prio_list_push_back(&pdeque->list, task); + pdeque->ntasks++; + return 0; +} + +struct starpu_task *starpu_st_prio_deque_highest_task(struct starpu_st_prio_deque *pdeque) +{ + struct starpu_task *task; + if (starpu_task_prio_list_empty(&pdeque->list)) + return NULL; + task = starpu_task_prio_list_front_highest(&pdeque->list); + return task; +} + +struct starpu_task *starpu_st_prio_deque_pop_task(struct starpu_st_prio_deque *pdeque) +{ + struct starpu_task *task; + if (starpu_task_prio_list_empty(&pdeque->list)) + return NULL; + task = starpu_task_prio_list_pop_front_highest(&pdeque->list); + pdeque->ntasks--; + return task; +} + +struct starpu_task *starpu_st_prio_deque_pop_back_task(struct starpu_st_prio_deque *pdeque) +{ + struct starpu_task *task; + if (starpu_task_prio_list_empty(&pdeque->list)) + return NULL; + task = starpu_task_prio_list_pop_back_lowest(&pdeque->list); + pdeque->ntasks--; + return task; +} + +int starpu_st_prio_deque_pop_this_task(struct starpu_st_prio_deque *pdeque, int workerid, struct starpu_task *task) +{ + unsigned nimpl = 0; +#ifdef STARPU_DEBUG + STARPU_ASSERT(starpu_task_prio_list_ismember(&pdeque->list, task)); +#endif + + if (workerid < 0 || starpu_worker_can_execute_task_first_impl(workerid, task, &nimpl)) + { + starpu_task_set_implementation(task, nimpl); + starpu_task_prio_list_erase(&pdeque->list, task); + pdeque->ntasks--; + return 1; + } + + return 0; +} + +static inline int pred_true(struct starpu_task *t STARPU_ATTRIBUTE_UNUSED, void *v STARPU_ATTRIBUTE_UNUSED) +{ + (void)t; + (void)v; + return 1; +} + +static inline int pred_can_execute(struct starpu_task * t, void * pworkerid) +{ + int i; + for(i = 0; i < STARPU_MAXIMPLEMENTATIONS; i++) + if(starpu_worker_can_execute_task(*(int*)pworkerid, t,i)) + { + starpu_task_set_implementation(t, i); + return 1; + } + return 0; +} + +#define REMOVE_TASK(pdeque, first_task, next_task, predicate, parg) \ + { \ + struct starpu_task * t; \ + if (skipped) \ + *skipped = NULL; \ + for (t = starpu_task_prio_##first_task(&pdeque->list); \ + t != starpu_task_prio_list_end(&pdeque->list); \ + t = starpu_task_prio_##next_task(&pdeque->list, t)) \ + { \ + if (predicate(t, parg)) \ + { \ + starpu_task_prio_list_erase(&pdeque->list, t); \ + pdeque->ntasks--; \ + return t; \ + } \ + else \ + if (skipped) \ + *skipped = t; \ + } \ + return NULL; \ + } + +struct starpu_task *starpu_st_prio_deque_pop_task_for_worker(struct starpu_st_prio_deque * pdeque, int workerid, struct starpu_task * *skipped) +{ + STARPU_ASSERT(pdeque); + STARPU_ASSERT(workerid >= 0 && (unsigned) workerid < starpu_worker_get_count()); + REMOVE_TASK(pdeque, list_begin, list_next, pred_can_execute, &workerid); +} + +struct starpu_task *starpu_st_prio_deque_deque_task_for_worker(struct starpu_st_prio_deque * pdeque, int workerid, struct starpu_task * *skipped) +{ + STARPU_ASSERT(pdeque); + STARPU_ASSERT(workerid >= 0 && (unsigned) workerid < starpu_worker_get_count()); + REMOVE_TASK(pdeque, list_back_highest, list_prev_highest, pred_can_execute, &workerid); +} + +struct starpu_task *starpu_st_prio_deque_deque_first_ready_task(struct starpu_st_prio_deque * pdeque, unsigned workerid) +{ + struct starpu_task *task = NULL, *current; + + if (starpu_task_prio_list_empty(&pdeque->list)) + return NULL; + + if (pdeque->ntasks > 0) + { + pdeque->ntasks--; + + task = starpu_task_prio_list_front_highest(&pdeque->list); + if (STARPU_UNLIKELY(!task)) + return NULL; + + int first_task_priority = task->priority; + + size_t non_ready_best = SIZE_MAX; + size_t non_loading_best = SIZE_MAX; + size_t non_allocated_best = SIZE_MAX; + + for (current = starpu_task_prio_list_begin(&pdeque->list); + current != starpu_task_prio_list_end(&pdeque->list); + current = starpu_task_prio_list_next(&pdeque->list, current)) + { + int priority = current->priority; + + if (priority >= first_task_priority) + { + size_t non_ready, non_loading, non_allocated; + starpu_st_non_ready_buffers_size(current, workerid, &non_ready, &non_loading, &non_allocated); + if (non_ready < non_ready_best) + { + non_ready_best = non_ready; + non_loading_best = non_loading; + non_allocated_best = non_allocated; + task = current; + + if (non_ready == 0 && non_allocated == 0) + break; + } + else if (non_ready == non_ready_best) + { + if (non_loading < non_loading_best) + { + non_loading_best = non_loading; + non_allocated_best = non_allocated; + task = current; + } + else if (non_loading == non_loading_best) + { + if (non_allocated < non_allocated_best) + { + non_allocated_best = non_allocated; + task = current; + } + } + } + } + } + + starpu_task_prio_list_erase(&pdeque->list, task); + } + + return task; +} + diff --git a/src/sched_policies/prio_deque.h b/src/sched_policies/prio_deque.h new file mode 100644 index 0000000..f836864 --- /dev/null +++ b/src/sched_policies/prio_deque.h @@ -0,0 +1,40 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2016-2016 Uppsala University + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __PRIO_DEQUE_H__ +#define __PRIO_DEQUE_H__ + +#include + +/** @file */ + +struct starpu_st_prio_deque +{ + struct starpu_task_prio_list list; + unsigned ntasks; + unsigned nprocessed; + // Assumptions: + // exp_len is the sum of predicted_length + predicted_tansfer of all tasks in list + // exp_start is the time at which the first task of list can start + // exp_end = exp_start + exp_end + // Careful: those are NOT maintained by the prio_queue operations + double exp_start, exp_end, exp_len; +}; + + + +#endif /* __PRIO_DEQUE_H__ */ diff --git a/src/sched_policies/random_policy.c b/src/sched_policies/random_policy.c new file mode 100644 index 0000000..5a38bb3 --- /dev/null +++ b/src/sched_policies/random_policy.c @@ -0,0 +1,114 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Thibaut Lambert + * Copyright (C) 2013-2013 Simon Archipoff + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* Policy attributing tasks randomly to workers */ + +#include +#include +#include +#include +#include +#include + +static int _random_push_task(struct starpu_task *task, unsigned prio) +{ + /* find the queue */ + double alpha_sum = 0.0; + + unsigned sched_ctx_id = task->sched_ctx; + struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id); + int worker; + int worker_arr[STARPU_NMAXWORKERS]; + double speedup_arr[STARPU_NMAXWORKERS]; + int size = 0; + struct starpu_sched_ctx_iterator it; + + workers->init_iterator(workers, &it); + while(workers->has_next(workers, &it)) + { + worker = workers->get_next(workers, &it); + unsigned impl; + if(starpu_worker_can_execute_task_first_impl(worker, task, &impl)) + { + struct starpu_perfmodel_arch* perf_arch = starpu_worker_get_perf_archtype(worker, sched_ctx_id); + double speedup = starpu_worker_get_relative_speedup(perf_arch); + alpha_sum += speedup; + speedup_arr[size] = speedup; + worker_arr[size++] = worker; + } + } + + double random = starpu_drand48()*alpha_sum; + //printf("my rand is %e over %e\n", random, alpha_sum); + + if(size == 0) + return -ENODEV; + + unsigned selected = worker_arr[size - 1]; + + double alpha = 0.0; + int i; + for(i = 0; i < size; i++) + { + worker = worker_arr[i]; + double worker_alpha = speedup_arr[i]; + + if (alpha + worker_alpha >= random) + { + /* we found the worker */ + selected = worker; + break; + } + + alpha += worker_alpha; + } + STARPU_AYU_ADDTOTASKQUEUE(starpu_task_get_job_id(task), selected); + starpu_sched_task_break(task); + return starpu_push_local_task(selected, task, prio); +} + +static int random_push_task(struct starpu_task *task) +{ + return _random_push_task(task, !!task->priority); +} + +static void initialize_random_policy(unsigned sched_ctx_id) +{ + (void) sched_ctx_id; + starpu_srand48(time(NULL)); +} + +static void deinitialize_random_policy(unsigned sched_ctx_id) +{ + (void) sched_ctx_id; +} + +struct starpu_sched_policy _starpu_sched_random_policy = +{ + .init_sched = initialize_random_policy, + .add_workers = NULL, + .remove_workers = NULL, + .deinit_sched = deinitialize_random_policy, + .push_task = random_push_task, + .pop_task = NULL, + .pre_exec_hook = NULL, + .post_exec_hook = NULL, + .policy_name = "random", + .policy_description = "weighted random based on worker overall performance", + .worker_type = STARPU_WORKER_LIST, +}; diff --git a/src/sched_policies/sched_component.h b/src/sched_policies/sched_component.h new file mode 100644 index 0000000..46c1ada --- /dev/null +++ b/src/sched_policies/sched_component.h @@ -0,0 +1,39 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Simon Archipoff + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __SCHED_COMPONENT_H__ +#define __SCHED_COMPONENT_H__ + +/** @file */ + +#include + +#pragma GCC visibility push(hidden) + +/** lock and unlock drivers for modifying schedulers */ +void _starpu_sched_component_lock_all_workers(void); +void _starpu_sched_component_unlock_all_workers(void); + +void _starpu_sched_component_workers_destroy(void); + +struct _starpu_worker * _starpu_sched_component_worker_get_worker(struct starpu_sched_component *); + +struct starpu_bitmap * _starpu_get_worker_mask(unsigned sched_ctx_id); + +#pragma GCC visibility pop + +#endif diff --git a/src/sched_policies/scheduler_maker.c b/src/sched_policies/scheduler_maker.c new file mode 100644 index 0000000..6a9ce37 --- /dev/null +++ b/src/sched_policies/scheduler_maker.c @@ -0,0 +1,291 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Simon Archipoff + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include + +#ifdef STARPU_HAVE_HWLOC +#include +#ifndef HWLOC_API_VERSION +#define HWLOC_OBJ_PU HWLOC_OBJ_PROC +#endif +#if HWLOC_API_VERSION < 0x00010b00 +#define HWLOC_OBJ_NUMANODE HWLOC_OBJ_NODE +#endif +#endif + +#include "sched_component.h" + + + +/* The scheduler is built by a recursive function called on the hwloc topology with a starpu_sched_specs structure, + * each call return a set of starpu_sched_component, not a single one, because you may have a topology like that : + * MACHINE -- MEMORY NODE -- SOCKET + * \- SOCKET + * and you have defined a component for MACHINE, and a component for SOCKET, but not for MEMORY NODE then the recursive call + * on MEMORY NODE will return 2 starpu_sched_component for those 2 sockets + * + * + */ + +struct sched_component_list +{ + struct starpu_sched_component ** arr; + unsigned size; +}; + +static void init_list(struct sched_component_list * list) +{ + memset(list,0,sizeof(*list)); +} +static void destroy_list(struct sched_component_list * list) +{ + free(list->arr); +} +static void add_component(struct sched_component_list *list, struct starpu_sched_component * component) +{ + _STARPU_REALLOC(list->arr, sizeof(*list->arr) * (list->size + 1)); + list->arr[list->size] = component; + list->size++; +} +/* this is the function that actually built the scheduler, but without workers */ +static struct sched_component_list helper_make_scheduler(struct starpu_sched_tree *tree, hwloc_obj_t obj, struct starpu_sched_component_specs specs, unsigned sched_ctx_id) +{ + STARPU_ASSERT(obj); + + struct starpu_sched_component * component = NULL; + + /*set components for this obj */ +#define CASE(ENUM,spec_member) \ + case ENUM: \ + if(specs.spec_member) \ + component = starpu_sched_component_composed_component_create(tree, specs.spec_member); \ + break + switch(obj->type) + { + CASE(HWLOC_OBJ_MACHINE,hwloc_machine_composed_sched_component); + CASE(HWLOC_OBJ_GROUP,hwloc_component_composed_sched_component); + CASE(HWLOC_OBJ_NUMANODE,hwloc_component_composed_sched_component); + CASE(HWLOC_OBJ_SOCKET,hwloc_socket_composed_sched_component); +#ifdef HWLOC_OBJ_CACHE + CASE(HWLOC_OBJ_CACHE,hwloc_cache_composed_sched_component); +#endif +#ifdef HWLOC_OBJ_L1CACHE + CASE(HWLOC_OBJ_L1CACHE,hwloc_cache_composed_sched_component); + CASE(HWLOC_OBJ_L2CACHE,hwloc_cache_composed_sched_component); + CASE(HWLOC_OBJ_L3CACHE,hwloc_cache_composed_sched_component); + CASE(HWLOC_OBJ_L4CACHE,hwloc_cache_composed_sched_component); + CASE(HWLOC_OBJ_L5CACHE,hwloc_cache_composed_sched_component); +#endif + default: + break; + } + + struct sched_component_list l; + init_list(&l); + unsigned i; + /* collect children component's */ + for(i = 0; i < obj->arity; i++) + { + struct sched_component_list lc = helper_make_scheduler(tree, obj->children[i],specs, sched_ctx_id); + unsigned j; + for(j = 0; j < lc.size; j++) + add_component(&l, lc.arr[j]); + destroy_list(&lc); + } + if(!component) + return l; + for(i = 0; i < l.size; i++) + starpu_sched_component_connect(component, l.arr[i]); + destroy_list(&l); + init_list(&l); + component->obj = obj; + add_component(&l, component); + return l; +} +/* return the first component in prefix order such as component->obj == obj, or NULL */ +static struct starpu_sched_component * _find_sched_component_with_obj(struct starpu_sched_component * component, hwloc_obj_t obj) +{ + if(component == NULL) + return NULL; + if(component->obj == obj) + return component; + unsigned i; + for(i = 0; i < component->nchildren; i++) + { + struct starpu_sched_component * tmp = _find_sched_component_with_obj(component->children[i], obj); + if(tmp) + return tmp; + } + return NULL; +} + +/* return true if all workers in the tree have the same perf_arch as w_ref, + * if there is no worker it return true + */ +static int is_same_kind_of_all(struct starpu_sched_component * root, struct _starpu_worker * w_ref) +{ + if(starpu_sched_component_is_worker(root)) + { + struct _starpu_worker * w = root->data; + STARPU_ASSERT(w->perf_arch.ndevices == 1); + return w->perf_arch.devices[0].type == w_ref->perf_arch.devices[0].type; + } + + unsigned i; + for(i = 0;i < root->nchildren; i++) + if(!is_same_kind_of_all(root->children[i], w_ref)) + return 0; + return 1; +} +/* buggy function + * return the starpu_sched_component linked to the supposed memory component of worker_component + */ +static struct starpu_sched_component * find_mem_component(struct starpu_sched_component * root, struct starpu_sched_component * worker_component) +{ + struct starpu_sched_component * component = worker_component; + while(component->obj->type != HWLOC_OBJ_NUMANODE + && component->obj->type != HWLOC_OBJ_GROUP + && component->obj->type != HWLOC_OBJ_MACHINE) + { + hwloc_obj_t tmp = component->obj; + do + { + component = _find_sched_component_with_obj(root,tmp); + tmp = tmp->parent; + } + while(!component); + + } + return component; +} + +static struct starpu_sched_component * where_should_we_plug_this(struct starpu_sched_component *root, struct starpu_sched_component * worker_component, struct starpu_sched_component_specs specs, unsigned sched_ctx_id) +{ + struct starpu_sched_component * mem = find_mem_component(root ,worker_component); + if(specs.mix_heterogeneous_workers || mem->parents[sched_ctx_id] == NULL) + return mem; + hwloc_obj_t obj = mem->obj; + struct starpu_sched_component * parent = mem->parents[sched_ctx_id]; + unsigned i; + for(i = 0; i < parent->nchildren; i++) + { + if(parent->children[i]->obj == obj + && is_same_kind_of_all(parent->children[i], worker_component->data)) + return parent->children[i]; + } + if(obj->type == HWLOC_OBJ_NUMANODE || obj->type == HWLOC_OBJ_GROUP) + { + struct starpu_sched_component * component = starpu_sched_component_composed_component_create(root->tree, specs.hwloc_component_composed_sched_component); + component->obj = obj; + starpu_sched_component_connect(parent, component); + return component; + } + return parent; +} + +static void set_worker_leaf(struct starpu_sched_component * root, struct starpu_sched_component * worker_component, unsigned sched_ctx_id, + struct starpu_sched_component_specs specs) +{ + struct _starpu_worker * worker = worker_component->data; + struct starpu_sched_component * component = where_should_we_plug_this(root,worker_component,specs, sched_ctx_id); + struct starpu_sched_component_composed_recipe * recipe = specs.worker_composed_sched_component ? + specs.worker_composed_sched_component(worker->arch):NULL; + STARPU_ASSERT(component); + if(recipe) + { + struct starpu_sched_component * tmp = starpu_sched_component_composed_component_create(root->tree, recipe); +#ifdef STARPU_DEVEL +#warning FIXME component->obj is set to worker_component->obj even for accelerators workers +#endif + tmp->obj = worker_component->obj; + starpu_sched_component_connect(component, tmp); + component = tmp; + } + starpu_sched_component_composed_recipe_destroy(recipe); + starpu_sched_component_connect(component, worker_component); +} + +#ifdef STARPU_DEVEL +static const char * name_hwloc_component(struct starpu_sched_component * component) +{ + return hwloc_obj_type_string(component->obj->type); +} +static const char * name_sched_component(struct starpu_sched_component * component) +{ + if(starpu_sched_component_is_fifo(component)) + return "fifo component"; + if(starpu_sched_component_is_heft(component)) + return "heft component"; + if(starpu_sched_component_is_random(component)) + return "random component"; + if(starpu_sched_component_is_worker(component)) + { + struct _starpu_worker * w = _starpu_sched_component_worker_get_worker(component); +#define SIZE 256 + static char output[SIZE]; + snprintf(output, SIZE,"component worker %d %s",w->workerid,w->name); + return output; + } + if(starpu_sched_component_is_work_stealing(component)) + return "work stealing component"; + + return "unknown"; +} +static void helper_display_scheduler(FILE* out, unsigned depth, struct starpu_sched_component * component) +{ + if(!component) + return; + fprintf(out,"%*s-> %s : %s\n", depth * 2 , "", name_sched_component(component), name_hwloc_component(component)); + unsigned i; + for(i = 0; i < component->nchildren; i++) + helper_display_scheduler(out, depth + 1, component->children[i]); +} +#endif //STARPU_DEVEL +struct starpu_sched_tree * starpu_sched_component_make_scheduler(unsigned sched_ctx_id, struct starpu_sched_component_specs specs) +{ + struct starpu_sched_tree * tree = starpu_sched_tree_create(sched_ctx_id); + + struct _starpu_machine_config *config = _starpu_get_machine_config(); + hwloc_topology_t topology = config->topology.hwtopology; + + struct sched_component_list list = helper_make_scheduler(tree, hwloc_get_root_obj(topology), specs, sched_ctx_id); + STARPU_ASSERT(list.size == 1); + + tree->root = list.arr[0]; + destroy_list(&list); + + unsigned i; + for(i = 0; i < starpu_worker_get_count(); i++) + { + struct _starpu_worker *worker = _starpu_get_worker_struct(i); + struct starpu_sched_component *worker_component = starpu_sched_component_worker_new(sched_ctx_id, i); + STARPU_ASSERT(worker); + set_worker_leaf(tree->root,worker_component, sched_ctx_id, specs); + } + + + starpu_sched_tree_update_workers(tree); +#ifdef STARPU_DEVEL + _STARPU_MSG("scheduler created :\n"); + helper_display_scheduler(stderr, 0, tree->root); +#endif + + return tree; + +} diff --git a/src/sched_policies/work_stealing_policy.c b/src/sched_policies/work_stealing_policy.c new file mode 100644 index 0000000..adfe881 --- /dev/null +++ b/src/sched_policies/work_stealing_policy.c @@ -0,0 +1,923 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* Work stealing policy */ + +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include + +/* Experimental (dead) code which needs to be tested, fixed... */ +/* #define USE_OVERLOAD */ + +/* + * Experimental code for improving data cache locality: + * + * USE_LOCALITY: + * - for each data, we record on which worker it was last accessed with the + * locality flag. + * + * - when pushing a ready task, we choose the worker which has last accessed the + * most data of the task with the locality flag. + * + * USE_LOCALITY_TASKS: + * - for each worker, we record the locality data that the task used last (i.e. a rough + * estimation of what is contained in the innermost caches). + * + * - for each worker, we have a hash table associating from a data handle to + * all the ready tasks pushed to it that will use it with the locality flag. + * + * - When fetching a task from a queue, pick a task which has the biggest number + * of data estimated to be contained in the cache. + */ + +//#define USE_LOCALITY + + +//#define USE_LOCALITY_TASKS + +/* Maximum number of recorded locality data per task */ +#define MAX_LOCALITY 8 + +/* Entry for queued_tasks_per_data: records that a queued task is accessing the data with locality flag */ +#ifdef USE_LOCALITY_TASKS +struct locality_entry +{ + UT_hash_handle hh; + starpu_data_handle_t data; + struct starpu_task *task; +}; +#endif + +struct _starpu_work_stealing_data_per_worker +{ + char fill1[STARPU_CACHELINE_SIZE]; + /* This is read-mostly, only updated when the queue becomes empty or + * becomes non-empty, to make it generally cheap to check */ + unsigned notask; /* whether the queue is empty */ + char fill2[STARPU_CACHELINE_SIZE]; + + struct starpu_st_prio_deque queue; + int running; + int *proxlist; + int busy; /* Whether this worker is working on a task */ + + /* keep track of the work performed from the beginning of the algorithm to make + * better decisions about which queue to select when deferring work + */ + unsigned last_pop_worker; + +#ifdef USE_LOCALITY_TASKS + /* This records the same as queue, but hashed by data accessed with locality flag. */ + /* FIXME: we record only one task per data, assuming that the access is + * RW, and thus only one task is ready to write to it. Do we really need to handle the R case too? */ + struct locality_entry *queued_tasks_per_data; + + /* This records the last data accessed by the worker */ + starpu_data_handle_t last_locality[MAX_LOCALITY]; + int nlast_locality; +#endif +}; + +struct _starpu_work_stealing_data +{ + int (*select_victim)(struct _starpu_work_stealing_data *, unsigned, int); + struct _starpu_work_stealing_data_per_worker *per_worker; + /* keep track of the work performed from the beginning of the algorithm to make + * better decisions about which queue to select when deferring work + */ + unsigned last_push_worker; +}; + +#ifdef USE_OVERLOAD + +/** + * Minimum number of task we wait for being processed before we start assuming + * on which worker the computation would be faster. + */ +static int calibration_value = 0; + +#endif /* USE_OVERLOAD */ + + +/** + * Return a worker from which a task can be stolen. + * Selecting a worker is done in a round-robin fashion, unless + * the worker previously selected doesn't own any task, + * then we return the first non-empty worker. + */ +static int select_victim_round_robin(struct _starpu_work_stealing_data *ws, unsigned sched_ctx_id) +{ + unsigned workerid = starpu_worker_get_id_check(); + unsigned worker = ws->per_worker[workerid].last_pop_worker; + unsigned nworkers; + int *workerids = NULL; + nworkers = starpu_sched_ctx_get_workers_list_raw(sched_ctx_id, &workerids); + unsigned ntasks = 0; + + /* If the worker's queue is empty, let's try + * the next ones */ + while (1) + { + /* Here helgrind would shout that this is unprotected, but we + * are fine with getting outdated values, this is just an + * estimation */ + if (!ws->per_worker[workerids[worker]].notask) + { + if (ws->per_worker[workerids[worker]].busy + || starpu_worker_is_blocked_in_parallel(workerids[worker])) + { + ntasks = 1; + break; + } + } + + worker = (worker + 1) % nworkers; + if (worker == ws->per_worker[workerid].last_pop_worker) + { + /* We got back to the first worker, + * don't go in infinite loop */ + ntasks = 0; + break; + } + } + + ws->per_worker[workerid].last_pop_worker = (worker + 1) % nworkers; + + worker = workerids[worker]; + + if (ntasks) + return worker; + else + return -1; +} + +/** + * Return a worker to whom add a task. + * Selecting a worker is done in a round-robin fashion. + */ +static unsigned select_worker_round_robin(struct _starpu_work_stealing_data *ws, struct starpu_task *task, unsigned sched_ctx_id) +{ + unsigned worker; + unsigned nworkers; + int *workerids; + nworkers = starpu_sched_ctx_get_workers_list_raw(sched_ctx_id, &workerids); + + worker = ws->last_push_worker; + do + worker = (worker + 1) % nworkers; + while (!ws->per_worker[workerids[worker]].running || !starpu_worker_can_execute_task_first_impl(workerids[worker], task, NULL)); + + ws->last_push_worker = worker; + + return workerids[worker]; +} + +#ifdef USE_LOCALITY +/* Select a worker according to the locality of the data of the task to be scheduled */ +static unsigned select_worker_locality(struct _starpu_work_stealing_data *ws, struct starpu_task *task, unsigned sched_ctx_id) +{ + unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task); + if (nbuffers == 0) + return -1; + + unsigned i, n; + unsigned ndata[STARPU_NMAXWORKERS] = { 0 }; + int best_worker = -1; + + n = 0; + for (i = 0; i < nbuffers; i++) + { + if (STARPU_TASK_GET_MODE(task, i) & STARPU_LOCALITY) + { + starpu_data_handle_t data = STARPU_TASK_GET_HANDLE(task, i); + int locality = data->last_locality; + if (locality >= 0) + ndata[locality]++; + n++; + } + } + + if (n) + { + /* Some locality buffers, choose worker which has most of them */ + struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id); + struct starpu_sched_ctx_iterator it; + unsigned best_ndata = 0; + + workers->init_iterator(workers, &it); + while(workers->has_next(workers, &it)) + { + int workerid = workers->get_next(workers, &it); + if (ndata[workerid] > best_ndata && ws->per_worker[workerid].running && ws->per_worker[workerid].busy) + { + best_worker = workerid; + best_ndata = ndata[workerid]; + } + } + } + return best_worker; +} + +/* Record in the data which worker will handle the task with the locality flag */ +static void record_data_locality(struct starpu_task *task, int workerid) +{ + /* Record where in locality data where the task went */ + unsigned i; + for (i = 0; i < STARPU_TASK_GET_NBUFFERS(task); i++) + if (STARPU_TASK_GET_MODE(task, i) & STARPU_LOCALITY) + { + STARPU_TASK_GET_HANDLE(task, i)->last_locality = workerid; + } +} +#else +static void record_data_locality(struct starpu_task *task STARPU_ATTRIBUTE_UNUSED, int workerid STARPU_ATTRIBUTE_UNUSED) +{ +} +#endif + +#ifdef USE_LOCALITY_TASKS +/* Record in the worker which data it used last with the locality flag */ +static void record_worker_locality(struct _starpu_work_stealing_data *ws, struct starpu_task *task, int workerid, unsigned sched_ctx_id) +{ + /* Record where in locality data where the task went */ + unsigned i; + struct _starpu_work_stealing_data_per_worker *data = &ws->per_worker[workerid]; + + data->nlast_locality = 0; + for (i = 0; i < STARPU_TASK_GET_NBUFFERS(task); i++) + if (STARPU_TASK_GET_MODE(task, i) & STARPU_LOCALITY) + { + data->last_locality[data->nlast_locality] = STARPU_TASK_GET_HANDLE(task, i); + data->nlast_locality++; + if (data->nlast_locality == MAX_LOCALITY) + break; + } +} +/* Called when pushing a task to a queue */ +static void locality_pushed_task(struct _starpu_work_stealing_data *ws, struct starpu_task *task, int workerid, unsigned sched_ctx_id) +{ + struct _starpu_work_stealing_data_per_worker *data = &ws->per_worker[workerid]; + unsigned i; + for (i = 0; i < STARPU_TASK_GET_NBUFFERS(task); i++) + if (STARPU_TASK_GET_MODE(task, i) & STARPU_LOCALITY) + { + starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, i); + struct locality_entry *entry; + HASH_FIND_PTR(data->queued_tasks_per_data, &handle, entry); + if (STARPU_LIKELY(!entry)) + { + _STARPU_MALLOC(entry, sizeof(*entry)); + entry->data = handle; + entry->task = task; + HASH_ADD_PTR(data->queued_tasks_per_data, data, entry); + } + } +} + +/* Pick a task from workerid's queue, for execution on target */ +static struct starpu_task *ws_pick_task(struct _starpu_work_stealing_data *ws, int source, int target) +{ + struct _starpu_work_stealing_data_per_worker *data_source = &ws->per_worker[source]; + struct _starpu_work_stealing_data_per_worker *data_target = &ws->per_worker[target]; + unsigned i, j, n = data_target->nlast_locality; + struct starpu_task *(tasks[MAX_LOCALITY]) = { NULL }, *best_task = NULL; + int ntasks[MAX_LOCALITY] = { 0 }, best_n; /* Number of locality data for this worker used by this task */ + /* Look at the last data accessed by this worker */ + STARPU_ASSERT(n < MAX_LOCALITY); + for (i = 0; i < n; i++) + { + starpu_data_handle_t handle = data_target->last_locality[i]; + struct locality_entry *entry; + HASH_FIND_PTR(data_source->queued_tasks_per_data, &handle, entry); + if (entry) + { + /* Record task */ + tasks[i] = entry->task; + ntasks[i] = 1; + + /* And increment counter of the same task */ + for (j = 0; j < i; j++) + { + if (tasks[j] == tasks[i]) + { + ntasks[j]++; + break; + } + } + } + } + /* Now find the task with most locality data for this worker */ + best_n = 0; + for (i = 0; i < n; i++) + { + if (ntasks[i] > best_n) + { + best_task = tasks[i]; + best_n = ntasks[i]; + } + } + + if (best_n > 0) + { + /* found an interesting task, try to pick it! */ + if (starpu_st_prio_deque_pop_this_task(&data_source->queue, target, best_task)) + { + if (!data_source->queue.ntasks) + { + STARPU_ASSERT(ws->per_worker[source].notask == 0); + ws->per_worker[source].notask = 1; + } + return best_task; + } + } + + /* Didn't find an interesting task, or couldn't run it */ + struct starpu_task *task; + + if (source != target) + task = starpu_st_prio_deque_deque_task_for_worker(&data_source->queue, target, NULL); + else + task = starpu_st_prio_deque_pop_task_for_worker(&data_source->queue, target, NULL); + + if (task && !data_source->queue.ntasks) + { + STARPU_ASSERT(ws->per_worker[source].notask == 0); + ws->per_worker[source].notask = 1; + } + return task; +} + +/* Called when popping a task from a queue */ +static void locality_popped_task(struct _starpu_work_stealing_data *ws, struct starpu_task *task, int workerid, unsigned sched_ctx_id) +{ + struct _starpu_work_stealing_data_per_worker *data = &ws->per_worker[workerid]; + unsigned i; + for (i = 0; i < STARPU_TASK_GET_NBUFFERS(task); i++) + if (STARPU_TASK_GET_MODE(task, i) & STARPU_LOCALITY) + { + starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, i); + struct locality_entry *entry; + HASH_FIND_PTR(data->queued_tasks_per_data, &handle, entry); + if (STARPU_LIKELY(entry)) + { + if (entry->task == task) + { + HASH_DEL(data->queued_tasks_per_data, entry); + free(entry); + } + } + } +} +#else +static void record_worker_locality(struct _starpu_work_stealing_data *ws STARPU_ATTRIBUTE_UNUSED, struct starpu_task *task STARPU_ATTRIBUTE_UNUSED, int workerid STARPU_ATTRIBUTE_UNUSED, unsigned sched_ctx_id STARPU_ATTRIBUTE_UNUSED) +{ +} +/* Called when pushing a task to a queue */ +static void locality_pushed_task(struct _starpu_work_stealing_data *ws STARPU_ATTRIBUTE_UNUSED, struct starpu_task *task STARPU_ATTRIBUTE_UNUSED, int workerid STARPU_ATTRIBUTE_UNUSED, unsigned sched_ctx_id STARPU_ATTRIBUTE_UNUSED) +{ +} +/* Pick a task from workerid's queue, for execution on target */ +static struct starpu_task *ws_pick_task(struct _starpu_work_stealing_data *ws, int source, int target) +{ + struct starpu_task *task; + if (source != target) + task = starpu_st_prio_deque_deque_task_for_worker(&ws->per_worker[source].queue, target, NULL); + else + task = starpu_st_prio_deque_pop_task_for_worker(&ws->per_worker[source].queue, target, NULL); + + if (task && !ws->per_worker[source].queue.ntasks) + { + STARPU_ASSERT(ws->per_worker[source].notask == 0); + ws->per_worker[source].notask = 1; + } + return task; +} +/* Called when popping a task from a queue */ +static void locality_popped_task(struct _starpu_work_stealing_data *ws STARPU_ATTRIBUTE_UNUSED, struct starpu_task *task STARPU_ATTRIBUTE_UNUSED, int workerid STARPU_ATTRIBUTE_UNUSED, unsigned sched_ctx_id STARPU_ATTRIBUTE_UNUSED) +{ +} +#endif + +#ifdef USE_OVERLOAD + +/** + * Return a ratio helpful to determine whether a worker is suitable to steal + * tasks from or to put some tasks in its queue. + * + * \return a ratio with a positive or negative value, describing the current state of the worker : + * a smaller value implies a faster worker with an relatively emptier queue : more suitable to put tasks in + * a bigger value implies a slower worker with an relatively more replete queue : more suitable to steal tasks from + */ +static float overload_metric(struct _starpu_work_stealing_data *ws, unsigned sched_ctx_id, unsigned id) +{ + float execution_ratio = 0.0f; + float current_ratio = 0.0f; + + int nprocessed = _starpu_get_deque_nprocessed(ws->per_worker[id].queue); + unsigned njobs = _starpu_get_deque_njobs(ws->per_worker[id].queue); + + /* Did we get enough information ? */ + if (ws->performed_total > 0 && nprocessed > 0) + { + /* How fast or slow is the worker compared to the other workers */ + execution_ratio = (float) nprocessed / ws->performed_total; + /* How replete is its queue */ + current_ratio = (float) njobs / nprocessed; + } + else + { + return 0.0f; + } + + return (current_ratio - execution_ratio); +} + +/** + * Return the most suitable worker from which a task can be stolen. + * The number of previously processed tasks, total and local, + * and the number of tasks currently awaiting to be processed + * by the tasks are taken into account to select the most suitable + * worker to steal task from. + */ +static int select_victim_overload(struct _starpu_work_stealing_data *ws, unsigned sched_ctx_id) +{ + unsigned best_worker = 0; + float best_ratio = FLT_MIN; + + /* Don't try to play smart until we get + * enough information. */ + if (ws->performed_total < calibration_value) + return select_victim_round_robin(ws, sched_ctx_id); + + struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id); + struct starpu_sched_ctx_iterator it; + + workers->init_iterator(workers, &it); + while(workers->has_next(workers, &it)) + { + unsigned worker = workers->get_next(workers, &it); + float worker_ratio = overload_metric(ws, sched_ctx_id, worker); + + if (worker_ratio > best_ratio && ws->per_worker[worker].running && ws->per_worker[worker].busy) + { + best_worker = worker; + best_ratio = worker_ratio; + } + } + + return best_worker; +} + +/** + * Return the most suitable worker to whom add a task. + * The number of previously processed tasks, total and local, + * and the number of tasks currently awaiting to be processed + * by the tasks are taken into account to select the most suitable + * worker to add a task to. + */ +static unsigned select_worker_overload(struct _starpu_work_stealing_data *ws, struct starpu_task *task, unsigned sched_ctx_id) +{ + unsigned best_worker = 0; + float best_ratio = FLT_MAX; + + /* Don't try to play smart until we get + * enough information. */ + if (ws->performed_total < calibration_value) + return select_worker_round_robin(task, sched_ctx_id); + + struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id); + struct starpu_sched_ctx_iterator it; + + workers->init_iterator(workers, &it); + while(workers->has_next(workers, &it)) + { + unsigned worker = workers->get_next(workers, &it); + float worker_ratio = overload_metric(ws, sched_ctx_id, worker); + + if (worker_ratio < best_ratio && ws->per_worker[worker].running && starpu_worker_can_execute_task_first_impl(worker, task, NULL)) + { + best_worker = worker; + best_ratio = worker_ratio; + } + } + + return best_worker; +} + +#endif /* USE_OVERLOAD */ + + +/** + * Return a worker from which a task can be stolen. + * This is a phony function used to call the right + * function depending on the value of USE_OVERLOAD. + */ +static inline int select_victim(struct _starpu_work_stealing_data *ws, unsigned sched_ctx_id, + int workerid STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef USE_OVERLOAD + return select_victim_overload(ws, sched_ctx_id); +#else + return select_victim_round_robin(ws, sched_ctx_id); +#endif /* USE_OVERLOAD */ +} + +/** + * Return a worker from which a task can be stolen. + * This is a phony function used to call the right + * function depending on the value of USE_OVERLOAD. + */ +static inline unsigned select_worker(struct _starpu_work_stealing_data *ws, struct starpu_task *task, unsigned sched_ctx_id) +{ +#ifdef USE_OVERLOAD + return select_worker_overload(ws, task, sched_ctx_id); +#else + return select_worker_round_robin(ws, task, sched_ctx_id); +#endif /* USE_OVERLOAD */ +} + + +/* Note: this is not scalable work stealing, use lws instead */ +static struct starpu_task *ws_pop_task(unsigned sched_ctx_id) +{ + struct _starpu_work_stealing_data *ws = (struct _starpu_work_stealing_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); + + struct starpu_task *task = NULL; + unsigned workerid = starpu_worker_get_id_check(); + + if (ws->per_worker[workerid].busy) + ws->per_worker[workerid].busy = 0; + +#ifdef STARPU_NON_BLOCKING_DRIVERS + if (STARPU_RUNNING_ON_VALGRIND || !starpu_st_prio_deque_is_empty(&ws->per_worker[workerid].queue)) +#endif + { + task = ws_pick_task(ws, workerid, workerid); + if (task) + locality_popped_task(ws, task, workerid, sched_ctx_id); + } + + if(task) + { + /* there was a local task */ + ws->per_worker[workerid].busy = 1; + if (_starpu_get_nsched_ctxs() > 1) + { + starpu_worker_relax_on(); + _starpu_sched_ctx_lock_write(sched_ctx_id); + starpu_worker_relax_off(); + starpu_sched_ctx_list_task_counters_decrement(sched_ctx_id, workerid); + if (_starpu_sched_ctx_worker_is_master_for_child_ctx(sched_ctx_id, workerid, task)) + task = NULL; + _starpu_sched_ctx_unlock_write(sched_ctx_id); + } + return task; + } + + /* we need to steal someone's job */ + starpu_worker_relax_on(); + int victim = ws->select_victim(ws, sched_ctx_id, workerid); + starpu_worker_relax_off(); + if (victim == -1) + { + return NULL; + } + + if (_starpu_worker_trylock(victim)) + { + /* victim is busy, don't bother it, come back later */ +#ifdef STARPU_SIMGRID + starpu_sleep(0.000001); + /* Make sure we come back and not block */ + starpu_wake_worker_no_relax(workerid); +#endif + return NULL; + } + if (ws->per_worker[victim].running && ws->per_worker[victim].queue.ntasks > 0) + { + task = ws_pick_task(ws, victim, workerid); + } + + if (task) + { + _STARPU_TRACE_WORK_STEALING(workerid, victim); + starpu_sched_task_break(task); + starpu_sched_ctx_list_task_counters_decrement(sched_ctx_id, victim); + record_data_locality(task, workerid); + record_worker_locality(ws, task, workerid, sched_ctx_id); + locality_popped_task(ws, task, victim, sched_ctx_id); + } + starpu_worker_unlock(victim); + +#ifndef STARPU_NON_BLOCKING_DRIVERS + /* While stealing, perhaps somebody actually give us a task, don't miss + * the opportunity to take it before going to sleep. */ + { + struct _starpu_worker *worker = _starpu_get_worker_struct(starpu_worker_get_id()); + if (!task && worker->state_keep_awake) + { + task = ws_pick_task(ws, workerid, workerid); + if (task) + { + /* keep_awake notice taken into account here, clear flag */ + worker->state_keep_awake = 0; + locality_popped_task(ws, task, workerid, sched_ctx_id); + } + } + } +#endif + + if (task &&_starpu_get_nsched_ctxs() > 1) + { + starpu_worker_relax_on(); + _starpu_sched_ctx_lock_write(sched_ctx_id); + starpu_worker_relax_off(); + if (_starpu_sched_ctx_worker_is_master_for_child_ctx(sched_ctx_id, workerid, task)) + task = NULL; + _starpu_sched_ctx_unlock_write(sched_ctx_id); + if (!task) + return NULL; + } + if (ws->per_worker[workerid].busy != !!task) + ws->per_worker[workerid].busy = !!task; + return task; +} + +static +int ws_push_task(struct starpu_task *task) +{ + unsigned sched_ctx_id = task->sched_ctx; + struct _starpu_work_stealing_data *ws = (struct _starpu_work_stealing_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); + int workerid; + +#ifdef USE_LOCALITY + workerid = select_worker_locality(ws, task, sched_ctx_id); +#else + workerid = -1; +#endif + if (workerid == -1) + workerid = starpu_worker_get_id(); + + /* If the current thread is not a worker but + * the main thread (-1) or the current worker is not in the target + * context, we find the better one to put task on its queue */ + if (workerid == -1 || !starpu_sched_ctx_contains_worker(workerid, sched_ctx_id) || + !starpu_worker_can_execute_task_first_impl(workerid, task, NULL)) + workerid = select_worker(ws, task, sched_ctx_id); + starpu_worker_lock(workerid); + STARPU_AYU_ADDTOTASKQUEUE(starpu_task_get_job_id(task), workerid); + starpu_sched_task_break(task); + record_data_locality(task, workerid); + STARPU_ASSERT_MSG(ws->per_worker[workerid].running, "workerid=%d, ws=%p\n", workerid, ws); + starpu_st_prio_deque_push_back_task(&ws->per_worker[workerid].queue, task); + if (ws->per_worker[workerid].queue.ntasks == 1) + { + STARPU_ASSERT(ws->per_worker[workerid].notask == 1); + ws->per_worker[workerid].notask = 0; + } + locality_pushed_task(ws, task, workerid, sched_ctx_id); + + starpu_push_task_end(task); + starpu_worker_unlock(workerid); + starpu_sched_ctx_list_task_counters_increment(sched_ctx_id, workerid); + +#if !defined(STARPU_NON_BLOCKING_DRIVERS) || defined(STARPU_SIMGRID) + /* TODO: implement fine-grain signaling, similar to what eager does */ + struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id); + struct starpu_sched_ctx_iterator it; + + workers->init_iterator(workers, &it); + while(workers->has_next(workers, &it)) + starpu_wake_worker_relax_light(workers->get_next(workers, &it)); +#endif + return 0; +} + +static void ws_push_task_notify(struct starpu_task *task, int workerid, int perf_workerid, unsigned sched_ctx_id) +{ + (void)task; + (void)perf_workerid; + + struct _starpu_work_stealing_data *ws = (struct _starpu_work_stealing_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); + ws->per_worker[workerid].busy = 1; +} + +static void ws_add_workers(unsigned sched_ctx_id, int *workerids,unsigned nworkers) +{ + struct _starpu_work_stealing_data *ws = (struct _starpu_work_stealing_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); + unsigned i; + + for (i = 0; i < nworkers; i++) + { + int workerid = workerids[i]; + starpu_sched_ctx_worker_shares_tasks_lists(workerid, sched_ctx_id); + starpu_st_prio_deque_init(&ws->per_worker[workerid].queue); + ws->per_worker[workerid].notask = 1; + ws->per_worker[workerid].running = 1; + + /* Tell helgrind that we are fine with getting outdated values, + * this is just an estimation */ + STARPU_HG_DISABLE_CHECKING(ws->per_worker[workerid].notask); + STARPU_HG_DISABLE_CHECKING(ws->per_worker[workerid].queue.ntasks); + ws->per_worker[workerid].busy = 0; + STARPU_HG_DISABLE_CHECKING(ws->per_worker[workerid].busy); + } +} + +static void ws_remove_workers(unsigned sched_ctx_id, int *workerids, unsigned nworkers) +{ + struct _starpu_work_stealing_data *ws = (struct _starpu_work_stealing_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); + unsigned i; + + for (i = 0; i < nworkers; i++) + { + int workerid = workerids[i]; + + starpu_st_prio_deque_destroy(&ws->per_worker[workerid].queue); + ws->per_worker[workerid].running = 0; + free(ws->per_worker[workerid].proxlist); + ws->per_worker[workerid].proxlist = NULL; + } +} + +static void initialize_ws_policy(unsigned sched_ctx_id) +{ + struct _starpu_work_stealing_data *ws; + _STARPU_MALLOC(ws, sizeof(struct _starpu_work_stealing_data)); + starpu_sched_ctx_set_policy_data(sched_ctx_id, (void*)ws); + + ws->last_push_worker = 0; + STARPU_HG_DISABLE_CHECKING(ws->last_push_worker); + ws->select_victim = select_victim; + + unsigned nw = starpu_worker_get_count(); + _STARPU_CALLOC(ws->per_worker, nw, sizeof(struct _starpu_work_stealing_data_per_worker)); + + /* The application may use any integer */ + if (starpu_sched_ctx_min_priority_is_set(sched_ctx_id) == 0) + starpu_sched_ctx_set_min_priority(sched_ctx_id, INT_MIN); + if (starpu_sched_ctx_max_priority_is_set(sched_ctx_id) == 0) + starpu_sched_ctx_set_max_priority(sched_ctx_id, INT_MAX); +} + +static void deinit_ws_policy(unsigned sched_ctx_id) +{ + struct _starpu_work_stealing_data *ws = (struct _starpu_work_stealing_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); + + free(ws->per_worker); + free(ws); +} + +struct starpu_sched_policy _starpu_sched_ws_policy = +{ + .init_sched = initialize_ws_policy, + .deinit_sched = deinit_ws_policy, + .add_workers = ws_add_workers, + .remove_workers = ws_remove_workers, + .push_task = ws_push_task, + .pop_task = ws_pop_task, + .push_task_notify = ws_push_task_notify, + .pre_exec_hook = NULL, + .post_exec_hook = NULL, + .policy_name = "ws", + .policy_description = "work stealing", + .worker_type = STARPU_WORKER_LIST, +}; + +/* local work stealing policy */ +/* Return a worker to steal a task from. The worker is selected according to + * the proximity list built using the info on the architecture provided by hwloc + */ +#ifdef STARPU_HAVE_HWLOC +static int lws_select_victim(struct _starpu_work_stealing_data *ws, unsigned sched_ctx_id, int workerid) +{ + int nworkers = starpu_sched_ctx_get_nworkers(sched_ctx_id); + int i; + for (i = 0; i < nworkers; i++) + { + int neighbor = ws->per_worker[workerid].proxlist[i]; + if (ws->per_worker[neighbor].notask) + continue; + /* FIXME: do not keep looking again and again at some worker + * which has tasks, but that can't execute on me */ + if (ws->per_worker[neighbor].busy + || starpu_worker_is_blocked_in_parallel(neighbor)) + return neighbor; + } + return -1; +} +#endif + +static void lws_add_workers(unsigned sched_ctx_id, int *workerids, + unsigned nworkers) +{ + ws_add_workers(sched_ctx_id, workerids, nworkers); + +#ifdef STARPU_HAVE_HWLOC + struct _starpu_work_stealing_data *ws = (struct _starpu_work_stealing_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); + /* Build a proximity list for every worker. It is cheaper to + * build this once and then use it for popping tasks rather + * than traversing the hwloc tree every time a task must be + * stolen */ + struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id); + struct starpu_tree *tree = (struct starpu_tree*)workers->collection_private; + unsigned i; + + /* get the complete list of workers (not just the added one) and rebuild the proxlists */ + nworkers = starpu_sched_ctx_get_workers_list_raw(sched_ctx_id, &workerids); + for (i = 0; i < nworkers; i++) + { + int workerid = workerids[i]; + if (ws->per_worker[workerid].proxlist == NULL) + _STARPU_CALLOC(ws->per_worker[workerid].proxlist, STARPU_NMAXWORKERS, sizeof(int)); + int bindid; + + struct starpu_sched_ctx_iterator it; + workers->init_iterator(workers, &it); + + bindid = starpu_worker_get_bindid(workerid); + it.value = starpu_tree_get(tree, bindid); + int cnt = 0; + for(;;) + { + struct starpu_tree *neighbour = (struct starpu_tree*)it.value; + int *neigh_workerids; + int neigh_nworkers = starpu_bindid_get_workerids(neighbour->id, &neigh_workerids); + int w; + for(w = 0; w < neigh_nworkers; w++) + { + if(!it.visited[neigh_workerids[w]] && workers->present[neigh_workerids[w]]) + { + ws->per_worker[workerid].proxlist[cnt++] = neigh_workerids[w]; + it.visited[neigh_workerids[w]] = 1; + } + } + if(!workers->has_next(workers, &it)) + break; + it.value = it.possible_value; + it.possible_value = NULL; + } + } +#endif +} + +static void initialize_lws_policy(unsigned sched_ctx_id) +{ + /* lws is loosely based on ws, except that it might use hwloc. */ + initialize_ws_policy(sched_ctx_id); + + if (starpu_worker_get_count() != starpu_cpu_worker_get_count() + || starpu_memory_nodes_get_numa_count() > 1 + ) + { + _STARPU_DISP("Warning: you are running the default lws scheduler, which is not a very smart scheduler, while the system has GPUs or several memory nodes. Make sure to read the StarPU documentation about adding performance models in order to be able to use the dmda or dmdas scheduler instead.\n"); + } + +#ifdef STARPU_HAVE_HWLOC + struct _starpu_work_stealing_data *ws = (struct _starpu_work_stealing_data *)starpu_sched_ctx_get_policy_data(sched_ctx_id); + ws->select_victim = lws_select_victim; +#endif +} + +struct starpu_sched_policy _starpu_sched_lws_policy = +{ + .init_sched = initialize_lws_policy, + .deinit_sched = deinit_ws_policy, + .add_workers = lws_add_workers, + .remove_workers = ws_remove_workers, + .push_task = ws_push_task, + .pop_task = ws_pop_task, + .push_task_notify = ws_push_task_notify, + .pre_exec_hook = NULL, + .post_exec_hook = NULL, + .policy_name = "lws", + .policy_description = "locality work stealing", +#ifdef STARPU_HAVE_HWLOC + .worker_type = STARPU_WORKER_TREE, +#else + .worker_type = STARPU_WORKER_LIST, +#endif +}; diff --git a/src/util/execute_on_all.c b/src/util/execute_on_all.c new file mode 100644 index 0000000..b889f2a --- /dev/null +++ b/src/util/execute_on_all.c @@ -0,0 +1,188 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include +#include + +struct wrapper_func_args +{ + void (*func)(void *); + void *arg; +}; + +static void wrapper_func(void *buffers[] STARPU_ATTRIBUTE_UNUSED, void *_args) +{ + struct wrapper_func_args *args = (struct wrapper_func_args *) _args; +#ifdef STARPU_PROF_TOOL + struct starpu_prof_tool_info pi; +#endif + +#ifdef STARPU_PROF_TOOL + int worker = starpu_worker_get_id(); + pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_start_gpu_exec, worker, worker, starpu_prof_tool_driver_gpu, -1, (void*)args->func); + starpu_prof_tool_callbacks.starpu_prof_tool_event_start_gpu_exec(&pi, NULL, NULL); +#endif + + args->func(args->arg); + +#ifdef STARPU_PROF_TOOL + pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_end_gpu_exec, worker, worker, starpu_prof_tool_driver_gpu, -1, (void*)args->func); + starpu_prof_tool_callbacks.starpu_prof_tool_event_end_gpu_exec(&pi, NULL, NULL); +#endif +} + +/** + * Execute func(arg) on the given workers. + */ +void starpu_execute_on_specific_workers(void (*func)(void*), void * arg, unsigned num_workers, unsigned * workers, const char * name) +{ + int ret; + unsigned w; + struct starpu_task *tasks[STARPU_NMAXWORKERS]; + + /* create a wrapper codelet */ + struct starpu_codelet wrapper_cl = + { + .where = 0xFF, + .cuda_funcs = {wrapper_func}, + .hip_funcs = {wrapper_func}, + .cpu_funcs = {wrapper_func}, + .opencl_funcs = {wrapper_func}, + .nbuffers = 0, + .name = name + }; + + struct wrapper_func_args args = + { + .func = func, + .arg = arg + }; + + + for (w = 0; w < num_workers; w++) + { + unsigned worker = workers[w]; + tasks[w] = starpu_task_create(); + tasks[w]->name = name; + + tasks[w]->cl = &wrapper_cl; + tasks[w]->cl_arg = &args; + + tasks[w]->execute_on_a_specific_worker = 1; + tasks[w]->workerid = worker; + + tasks[w]->detach = 0; + tasks[w]->destroy = 0; + + _starpu_exclude_task_from_dag(tasks[w]); + + ret = starpu_task_submit(tasks[w]); + if (ret == -ENODEV) + { + /* if the worker is not able to execute this tasks, we + * don't insist as this means the worker is not + * designated by the "where" bitmap */ + starpu_task_destroy(tasks[w]); + tasks[w] = NULL; + } + } + + for (w= 0; w < num_workers; w++) + { + if (tasks[w]) + { + ret = starpu_task_wait(tasks[w]); + STARPU_ASSERT(!ret); + starpu_task_destroy(tasks[w]); + } + } +} + +/* execute func(arg) on each worker that matches the "where" flag */ +void starpu_execute_on_each_worker_ex(void (*func)(void *), void *arg, uint32_t where, const char * name) +{ + int ret; + unsigned worker; + unsigned nworkers = starpu_worker_get_count(); + struct starpu_task *tasks[STARPU_NMAXWORKERS]; + + STARPU_ASSERT_MSG((where & ~STARPU_CPU & ~STARPU_CUDA & ~STARPU_OPENCL & ~STARPU_HIP) == 0, "This function is implemented only on CPU, CUDA, HIP, OpenCL"); + + /* create a wrapper codelet */ + struct starpu_codelet wrapper_cl = + { + .where = where, + .cuda_funcs = {wrapper_func}, + .hip_funcs = {wrapper_func}, + .cpu_funcs = {wrapper_func}, + .opencl_funcs = {wrapper_func}, + .nbuffers = 0, + .name = (name != NULL ? name : "execute_on_all_wrapper") + }; + + struct wrapper_func_args args = + { + .func = func, + .arg = arg + }; + + + for (worker = 0; worker < nworkers; worker++) + { + tasks[worker] = starpu_task_create(); + tasks[worker]->name = wrapper_cl.name; + + tasks[worker]->cl = &wrapper_cl; + tasks[worker]->cl_arg = &args; + + tasks[worker]->execute_on_a_specific_worker = 1; + tasks[worker]->workerid = worker; + + tasks[worker]->detach = 0; + tasks[worker]->destroy = 0; + + _starpu_exclude_task_from_dag(tasks[worker]); + + ret = _starpu_task_submit_internally(tasks[worker]); + if (ret == -ENODEV) + { + /* if the worker is not able to execute this task, we + * don't insist as this means the worker is not + * designated by the "where" bitmap */ + starpu_task_destroy(tasks[worker]); + tasks[worker] = NULL; + } + } + + for (worker = 0; worker < nworkers; worker++) + { + if (tasks[worker]) + { + ret = starpu_task_wait(tasks[worker]); + STARPU_ASSERT(!ret); + starpu_task_destroy(tasks[worker]); + } + } +} + +void starpu_execute_on_each_worker(void (*func)(void *), void *arg, uint32_t where) +{ + starpu_execute_on_each_worker_ex(func, arg, where, NULL); +} diff --git a/src/util/file.c b/src/util/file.c new file mode 100644 index 0000000..01eff7d --- /dev/null +++ b/src/util/file.c @@ -0,0 +1,47 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include + +void _starpu_drop_comments(FILE *f) +{ + while(1) + { + int c = getc(f); + + switch (c) + { + case '#': + { + char s[128]; + char *ret; + do + { + ret = fgets(s, sizeof(s), f); + } + while (ret && (!strchr(s, '\n'))); + continue; + } + case '\n': + continue; + default: + ungetc(c, f); + return; + } + } +} + diff --git a/src/util/fstarpu.c b/src/util/fstarpu.c new file mode 100644 index 0000000..df19363 --- /dev/null +++ b/src/util/fstarpu.c @@ -0,0 +1,754 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include + +typedef void (*_starpu_callback_func_t)(void *); + +static const intptr_t fstarpu_r = STARPU_R; +static const intptr_t fstarpu_w = STARPU_W; +static const intptr_t fstarpu_rw = STARPU_RW; +static const intptr_t fstarpu_scratch = STARPU_SCRATCH; +static const intptr_t fstarpu_redux = STARPU_REDUX; +static const intptr_t fstarpu_mpi_redux = STARPU_MPI_REDUX; +static const intptr_t fstarpu_commute = STARPU_COMMUTE; +static const intptr_t fstarpu_ssend = STARPU_SSEND; +static const intptr_t fstarpu_locality = STARPU_LOCALITY; +static const intptr_t fstarpu_nofootprint = STARPU_NOFOOTPRINT; + +static const intptr_t fstarpu_data_array = STARPU_DATA_ARRAY; +static const intptr_t fstarpu_data_mode_array = STARPU_DATA_MODE_ARRAY; +static const intptr_t fstarpu_cl_args = STARPU_CL_ARGS; +static const intptr_t fstarpu_cl_args_nfree = STARPU_CL_ARGS_NFREE; +static const intptr_t fstarpu_task_deps_array = STARPU_TASK_DEPS_ARRAY; +static const intptr_t fstarpu_task_end_deps_array = STARPU_TASK_END_DEPS_ARRAY; +static const intptr_t fstarpu_callback = STARPU_CALLBACK; +static const intptr_t fstarpu_callback_with_arg = STARPU_CALLBACK_WITH_ARG; +static const intptr_t fstarpu_callback_with_arg_nfree = STARPU_CALLBACK_WITH_ARG_NFREE; +static const intptr_t fstarpu_callback_arg = STARPU_CALLBACK_ARG; +static const intptr_t fstarpu_callback_arg_nfree= STARPU_CALLBACK_ARG_NFREE; +static const intptr_t fstarpu_prologue_callback = STARPU_PROLOGUE_CALLBACK; +static const intptr_t fstarpu_prologue_callback_arg = STARPU_PROLOGUE_CALLBACK_ARG; +static const intptr_t fstarpu_prologue_callback_arg_nfree = STARPU_PROLOGUE_CALLBACK_ARG_NFREE; +static const intptr_t fstarpu_prologue_callback_pop = STARPU_PROLOGUE_CALLBACK_POP; +static const intptr_t fstarpu_prologue_callback_pop_arg = STARPU_PROLOGUE_CALLBACK_POP_ARG; +static const intptr_t fstarpu_prologue_callback_pop_arg_nfree = STARPU_PROLOGUE_CALLBACK_POP_ARG_NFREE; +static const intptr_t fstarpu_priority = STARPU_PRIORITY; +static const intptr_t fstarpu_execute_on_node = STARPU_EXECUTE_ON_NODE; +static const intptr_t fstarpu_execute_on_data = STARPU_EXECUTE_ON_DATA; +static const intptr_t fstarpu_execute_where = STARPU_EXECUTE_WHERE; +static const intptr_t fstarpu_execute_on_worker = STARPU_EXECUTE_ON_WORKER; +static const intptr_t fstarpu_worker_order = STARPU_WORKER_ORDER; +static const intptr_t fstarpu_hypervisor_tag = STARPU_HYPERVISOR_TAG; +static const intptr_t fstarpu_possibly_parallel = STARPU_POSSIBLY_PARALLEL; +static const intptr_t fstarpu_flops = STARPU_FLOPS; +static const intptr_t fstarpu_tag = STARPU_TAG; +static const intptr_t fstarpu_tag_only = STARPU_TAG_ONLY; +static const intptr_t fstarpu_name = STARPU_NAME; +static const intptr_t fstarpu_task_color = STARPU_TASK_COLOR; +static const intptr_t fstarpu_handles_sequential_consistency = STARPU_HANDLES_SEQUENTIAL_CONSISTENCY; +static const intptr_t fstarpu_task_end_dep = STARPU_TASK_END_DEP; +static const intptr_t fstarpu_task_synchronous = STARPU_TASK_SYNCHRONOUS; +static const intptr_t fstarpu_node_selection_policy = STARPU_NODE_SELECTION_POLICY; +static const intptr_t fstarpu_task_workerids = STARPU_TASK_WORKERIDS; +static const intptr_t fstarpu_sequential_consistency = STARPU_SEQUENTIAL_CONSISTENCY; +static const intptr_t fstarpu_task_profiling_info = STARPU_TASK_PROFILING_INFO; +static const intptr_t fstarpu_task_no_submitorder = STARPU_TASK_NO_SUBMITORDER; +static const intptr_t fstarpu_task_sched_data = STARPU_TASK_SCHED_DATA; +static const intptr_t fstarpu_task_file = STARPU_TASK_FILE; +static const intptr_t fstarpu_task_line = STARPU_TASK_LINE; + +static const intptr_t fstarpu_value = STARPU_VALUE; +static const intptr_t fstarpu_sched_ctx = STARPU_SCHED_CTX; + +static const intptr_t fstarpu_cpu_worker = STARPU_CPU_WORKER; +static const intptr_t fstarpu_cuda_worker = STARPU_CUDA_WORKER; +static const intptr_t fstarpu_opencl_worker = STARPU_OPENCL_WORKER; +static const intptr_t fstarpu_any_worker = STARPU_ANY_WORKER; +static const intptr_t fstarpu_narch = STARPU_NARCH; + +static const intptr_t fstarpu_nmaxbufs = STARPU_NMAXBUFS; + +static const intptr_t fstarpu_sched_ctx_policy_name = STARPU_SCHED_CTX_POLICY_NAME; +static const intptr_t fstarpu_sched_ctx_policy_struct = STARPU_SCHED_CTX_POLICY_STRUCT; +static const intptr_t fstarpu_sched_ctx_policy_min_prio = STARPU_SCHED_CTX_POLICY_MIN_PRIO; +static const intptr_t fstarpu_sched_ctx_policy_max_prio = STARPU_SCHED_CTX_POLICY_MAX_PRIO; +static const intptr_t fstarpu_sched_ctx_hierarchy_level = STARPU_SCHED_CTX_HIERARCHY_LEVEL; +static const intptr_t fstarpu_sched_ctx_nested = STARPU_SCHED_CTX_NESTED; +static const intptr_t fstarpu_sched_ctx_awake_workers = STARPU_SCHED_CTX_AWAKE_WORKERS; +static const intptr_t fstarpu_sched_ctx_policy_init = STARPU_SCHED_CTX_POLICY_INIT; +static const intptr_t fstarpu_sched_ctx_user_data = STARPU_SCHED_CTX_USER_DATA; + +static const intptr_t fstarpu_starpu_nowhere = STARPU_NOWHERE; +static const intptr_t fstarpu_starpu_cpu = STARPU_CPU; +static const intptr_t fstarpu_starpu_cuda = STARPU_CUDA; +static const intptr_t fstarpu_starpu_opencl = STARPU_OPENCL; + +static const intptr_t fstarpu_starpu_codelet_simgrid_execute = STARPU_CODELET_SIMGRID_EXECUTE; +static const intptr_t fstarpu_starpu_codelet_simgrid_execute_and_inject = STARPU_CODELET_SIMGRID_EXECUTE_AND_INJECT; +static const intptr_t fstarpu_starpu_cuda_async = STARPU_CUDA_ASYNC; +static const intptr_t fstarpu_starpu_opencl_async = STARPU_OPENCL_ASYNC; + +//static const intptr_t fstarpu_per_worker = STARPU_PER_WORKER; +//static const intptr_t fstarpu_per_arch = STARPU_PER_ARCH; +//static const intptr_t fstarpu_per_common = STARPU_COMMON; +static const intptr_t fstarpu_history_based = STARPU_HISTORY_BASED; +static const intptr_t fstarpu_regression_based = STARPU_REGRESSION_BASED; +static const intptr_t fstarpu_nl_regression_based = STARPU_NL_REGRESSION_BASED; +static const intptr_t fstarpu_multiple_regression_based = STARPU_MULTIPLE_REGRESSION_BASED; + +static const intptr_t fstarpu_seq = STARPU_SEQ; +static const intptr_t fstarpu_spmd = STARPU_SPMD; +static const intptr_t fstarpu_forkjoin = STARPU_FORKJOIN; + +static const intptr_t fstarpu_default_prio = STARPU_DEFAULT_PRIO; + +intptr_t fstarpu_get_constant(char *s) +{ + if (!strcmp(s, "FSTARPU_R")) { return fstarpu_r; } + else if (!strcmp(s, "FSTARPU_W")) { return fstarpu_w; } + else if (!strcmp(s, "FSTARPU_RW")) { return fstarpu_rw; } + else if (!strcmp(s, "FSTARPU_SCRATCH")) { return fstarpu_scratch; } + else if (!strcmp(s, "FSTARPU_REDUX")) { return fstarpu_redux; } + else if (!strcmp(s, "FSTARPU_MPI_REDUX")) { return fstarpu_mpi_redux; } + else if (!strcmp(s, "FSTARPU_COMMUTE")) { return fstarpu_commute; } + else if (!strcmp(s, "FSTARPU_SSEND")) { return fstarpu_ssend; } + else if (!strcmp(s, "FSTARPU_LOCALITY")) { return fstarpu_locality; } + else if (!strcmp(s, "FSTARPU_NOFOOTPRINT")) { return fstarpu_nofootprint; } + + + else if (!strcmp(s, "FSTARPU_DATA_ARRAY")) { return fstarpu_data_array; } + else if (!strcmp(s, "FSTARPU_DATA_MODE_ARRAY")) { return fstarpu_data_mode_array; } + else if (!strcmp(s, "FSTARPU_CL_ARGS")) { return fstarpu_cl_args; } + else if (!strcmp(s, "FSTARPU_CL_ARGS_NFREE")) { return fstarpu_cl_args_nfree; } + else if (!strcmp(s, "FSTARPU_TASK_DEPS_ARRAY")) { return fstarpu_task_deps_array; } + else if (!strcmp(s, "FSTARPU_TASK_END_DEPS_ARRAY")) { return fstarpu_task_end_deps_array; } + else if (!strcmp(s, "FSTARPU_CALLBACK")) { return fstarpu_callback; } + else if (!strcmp(s, "FSTARPU_CALLBACK_WITH_ARG")) { return fstarpu_callback_with_arg; } + else if (!strcmp(s, "FSTARPU_CALLBACK_WITH_ARG_NFREE")) { return fstarpu_callback_with_arg_nfree; } + else if (!strcmp(s, "FSTARPU_CALLBACK_ARG")) { return fstarpu_callback_arg; } + else if (!strcmp(s, "FSTARPU_CALLBACK_ARG_NFREE")) { return fstarpu_callback_arg_nfree; } + else if (!strcmp(s, "FSTARPU_PROLOGUE_CALLBACK")) { return fstarpu_prologue_callback; } + else if (!strcmp(s, "FSTARPU_PROLOGUE_CALLBACK_ARG")) { return fstarpu_prologue_callback_arg; } + else if (!strcmp(s, "FSTARPU_PROLOGUE_CALLBACK_ARG_NFREE")) { return fstarpu_prologue_callback_arg_nfree; } + else if (!strcmp(s, "FSTARPU_PROLOGUE_CALLBACK_POP")) { return fstarpu_prologue_callback_pop; } + else if (!strcmp(s, "FSTARPU_PROLOGUE_CALLBACK_POP_ARG")) { return fstarpu_prologue_callback_pop_arg; } + else if (!strcmp(s, "FSTARPU_PROLOGUE_CALLBACK_POP_ARG_NFREE")) { return fstarpu_prologue_callback_pop_arg_nfree; } + else if (!strcmp(s, "FSTARPU_PRIORITY")) { return fstarpu_priority; } + else if (!strcmp(s, "FSTARPU_EXECUTE_ON_NODE")) { return fstarpu_execute_on_node; } + else if (!strcmp(s, "FSTARPU_EXECUTE_ON_DATA")) { return fstarpu_execute_on_data; } + else if (!strcmp(s, "FSTARPU_EXECUTE_WHERE")) { return fstarpu_execute_where; } + else if (!strcmp(s, "FSTARPU_EXECUTE_ON_WORKER")) { return fstarpu_execute_on_worker; } + else if (!strcmp(s, "FSTARPU_WORKER_ORDER")) { return fstarpu_worker_order; } + else if (!strcmp(s, "FSTARPU_HYPERVISOR_TAG")) { return fstarpu_hypervisor_tag; } + else if (!strcmp(s, "FSTARPU_POSSIBLY_PARALLEL")) { return fstarpu_possibly_parallel; } + else if (!strcmp(s, "FSTARPU_FLOPS")) { return fstarpu_flops; } + else if (!strcmp(s, "FSTARPU_TAG")) { return fstarpu_tag; } + else if (!strcmp(s, "FSTARPU_TAG_ONLY")) { return fstarpu_tag_only; } + else if (!strcmp(s, "FSTARPU_NAME")) { return fstarpu_name; } + else if (!strcmp(s, "FSTARPU_NODE_SELECTION_POLICY")) { return fstarpu_node_selection_policy; } + else if (!strcmp(s, "FSTARPU_VALUE")) { return fstarpu_value; } + else if (!strcmp(s, "FSTARPU_SCHED_CTX")) { return fstarpu_sched_ctx; } + else if (!strcmp(s, "FSTARPU_TASK_COLOR")) { return fstarpu_task_color; } + else if (!strcmp(s, "FSTARPU_HANDLES_SEQUENTIAL_CONSISTENCY")) { return fstarpu_handles_sequential_consistency; } + else if (!strcmp(s, "FSTARPU_TASK_END_DEP")) { return fstarpu_task_end_dep; } + else if (!strcmp(s, "FSTARPU_TASK_WORKERIDS")) { return fstarpu_task_workerids; } + else if (!strcmp(s, "FSTARPU_TASK_SYNCHRONOUS")) { return fstarpu_task_synchronous; } + else if (!strcmp(s, "FSTARPU_SEQUENTIAL_CONSISTENCY")) { return fstarpu_sequential_consistency; } + else if (!strcmp(s, "FSTARPU_TASK_PROFILING_INFO")) { return fstarpu_task_profiling_info; } + else if (!strcmp(s, "FSTARPU_TASK_NO_SUBMITORDER")) { return fstarpu_task_no_submitorder; } + else if (!strcmp(s, "FSTARPU_TASK_SCHED_DATA")) { return fstarpu_task_sched_data; } + else if (!strcmp(s, "FSTARPU_TASK_FILE")) { return fstarpu_task_file; } + else if (!strcmp(s, "FSTARPU_TASK_LINE")) { return fstarpu_task_line; } + + else if (!strcmp(s, "FSTARPU_CPU_WORKER")) { return fstarpu_cpu_worker; } + else if (!strcmp(s, "FSTARPU_CUDA_WORKER")) { return fstarpu_cuda_worker; } + else if (!strcmp(s, "FSTARPU_OPENCL_WORKER")) { return fstarpu_opencl_worker; } + else if (!strcmp(s, "FSTARPU_ANY_WORKER")) { return fstarpu_any_worker; } + else if (!strcmp(s, "FSTARPU_NARCH")) { return fstarpu_narch; } + + else if (!strcmp(s, "FSTARPU_NMAXBUFS")) { return fstarpu_nmaxbufs; } + + else if (!strcmp(s, "FSTARPU_SCHED_CTX_POLICY_NAME")) { return fstarpu_sched_ctx_policy_name; } + else if (!strcmp(s, "FSTARPU_SCHED_CTX_POLICY_STRUCT")) { return fstarpu_sched_ctx_policy_struct; } + else if (!strcmp(s, "FSTARPU_SCHED_CTX_POLICY_MIN_PRIO")) { return fstarpu_sched_ctx_policy_min_prio; } + else if (!strcmp(s, "FSTARPU_SCHED_CTX_POLICY_MAX_PRIO")) { return fstarpu_sched_ctx_policy_max_prio; } + else if (!strcmp(s, "FSTARPU_SCHED_CTX_HIERARCHY_LEVEL")) { return fstarpu_sched_ctx_hierarchy_level; } + else if (!strcmp(s, "FSTARPU_SCHED_CTX_NESTED")) { return fstarpu_sched_ctx_nested; } + else if (!strcmp(s, "FSTARPU_SCHED_CTX_AWAKE_WORKERS")) { return fstarpu_sched_ctx_awake_workers; } + else if (!strcmp(s, "FSTARPU_SCHED_CTX_POLICY_INIT")) { return fstarpu_sched_ctx_policy_init; } + else if (!strcmp(s, "FSTARPU_SCHED_CTX_USER_DATA")) { return fstarpu_sched_ctx_user_data; } + + else if (!strcmp(s, "FSTARPU_NOWHERE")) { return fstarpu_starpu_nowhere; } + else if (!strcmp(s, "FSTARPU_CPU")) { return fstarpu_starpu_cpu; } + else if (!strcmp(s, "FSTARPU_CUDA")) { return fstarpu_starpu_cuda; } + else if (!strcmp(s, "FSTARPU_OPENCL")) { return fstarpu_starpu_opencl; } + + else if (!strcmp(s, "FSTARPU_CODELET_SIMGRID_EXECUTE")) { return fstarpu_starpu_codelet_simgrid_execute; } + else if (!strcmp(s, "FSTARPU_CODELET_SIMGRID_EXECUTE_AND_INJECT")) { return fstarpu_starpu_codelet_simgrid_execute_and_inject; } + else if (!strcmp(s, "FSTARPU_CUDA_ASYNC")) { return fstarpu_starpu_cuda_async; } + else if (!strcmp(s, "FSTARPU_OPENCL_ASYNC")) { return fstarpu_starpu_opencl_async; } + +// else if (!strcmp(s, "FSTARPU_PER_WORKER")) { return fstarpu_per_worker; } +// else if (!strcmp(s, "FSTARPU_PER_ARCH")) { return fstarpu_per_arch; } +// else if (!strcmp(s, "FSTARPU_COMMON")) { return fstarpu_per_common; } + else if (!strcmp(s, "FSTARPU_HISTORY_BASED")) { return fstarpu_history_based; } + else if (!strcmp(s, "FSTARPU_REGRESSION_BASED")) { return fstarpu_regression_based; } + else if (!strcmp(s, "FSTARPU_NL_REGRESSION_BASED")) { return fstarpu_nl_regression_based; } + else if (!strcmp(s, "FSTARPU_MULTIPLE_REGRESSION_BASED")) { return fstarpu_multiple_regression_based; } + + else if (!strcmp(s, "FSTARPU_SEQ")) { return fstarpu_seq; } + else if (!strcmp(s, "FSTARPU_SPMD")) { return fstarpu_spmd; } + else if (!strcmp(s, "FSTARPU_FORKJOIN")) { return fstarpu_forkjoin; } + + else if (!strcmp(s, "FSTARPU_DEFAULT_PRIO")) { return fstarpu_default_prio; } + + else { _STARPU_ERROR("unknown constant"); } + return -1; +} + +STARPU_ATTRIBUTE_MALLOC +struct starpu_conf *fstarpu_conf_allocate(void) +{ + struct starpu_conf *conf; + _STARPU_MALLOC(conf, sizeof(*conf)); + starpu_conf_init(conf); + return conf; +} + +void fstarpu_conf_free(struct starpu_conf *conf) +{ + memset(conf, 0, sizeof(*conf)); + free(conf); +} + +void fstarpu_conf_set_sched_policy_name(struct starpu_conf *conf, const char *sched_policy_name) +{ + conf->sched_policy_name = sched_policy_name; +} + +void fstarpu_conf_set_min_prio(struct starpu_conf *conf, int min_prio) +{ + conf->global_sched_ctx_min_priority = min_prio; +} + +void fstarpu_conf_set_max_prio(struct starpu_conf *conf, int max_prio) +{ + conf->global_sched_ctx_max_priority = max_prio; +} + +void fstarpu_conf_set_ncpu(struct starpu_conf *conf, int ncpu) +{ + STARPU_ASSERT(ncpu >= 0 && ncpu <= STARPU_NMAXWORKERS); + conf->ncpus = ncpu; +} + +void fstarpu_conf_set_ncuda(struct starpu_conf *conf, int ncuda) +{ + STARPU_ASSERT(ncuda >= 0 && ncuda <= STARPU_NMAXWORKERS); + conf->ncuda = ncuda; +} + +void fstarpu_conf_set_nopencl(struct starpu_conf *conf, int nopencl) +{ + STARPU_ASSERT(nopencl >= 0 && nopencl <= STARPU_NMAXWORKERS); + conf->nopencl = nopencl; +} + +void fstarpu_conf_set_calibrate(struct starpu_conf *conf, int calibrate) +{ + STARPU_ASSERT(calibrate == 0 || calibrate == 1); + conf->calibrate = calibrate; +} + +void fstarpu_conf_set_bus_calibrate(struct starpu_conf *conf, int bus_calibrate) +{ + STARPU_ASSERT(bus_calibrate == 0 || bus_calibrate == 1); + conf->bus_calibrate = bus_calibrate; +} + +void fstarpu_topology_print(void) +{ + starpu_topology_print(stderr); +} + +STARPU_ATTRIBUTE_MALLOC +struct starpu_codelet *fstarpu_codelet_allocate(void) +{ + struct starpu_codelet *cl; + _STARPU_MALLOC(cl, sizeof(*cl)); + starpu_codelet_init(cl); + return cl; +} + +void fstarpu_codelet_free(struct starpu_codelet *cl) +{ + memset(cl, 0, sizeof(*cl)); + free(cl); +} + +void fstarpu_codelet_set_name(struct starpu_codelet *cl, const char *cl_name) +{ + cl->name = cl_name; +} + +void fstarpu_codelet_set_color(struct starpu_codelet *cl, int cl_color) +{ + STARPU_ASSERT(cl_color >= 0); + cl->color = (unsigned)cl_color; +} + +void fstarpu_codelet_set_model(struct starpu_codelet *cl, struct starpu_perfmodel *cl_perfmodel) +{ + cl->model = cl_perfmodel; +} + +void fstarpu_codelet_set_energy_model(struct starpu_codelet *cl, struct starpu_perfmodel *cl_perfmodel) +{ + cl->energy_model = cl_perfmodel; +} + +void fstarpu_codelet_add_cpu_func(struct starpu_codelet *cl, void *f_ptr) +{ + const size_t max_cpu_funcs = sizeof(cl->cpu_funcs)/sizeof(cl->cpu_funcs[0])-1; + size_t i; + for (i = 0; i < max_cpu_funcs; i++) + { + if (cl->cpu_funcs[i] == NULL) + { + cl->cpu_funcs[i] = f_ptr; + return; + } + } + _STARPU_ERROR("fstarpu: too many cpu functions in Fortran codelet"); +} + +void fstarpu_codelet_add_cuda_func(struct starpu_codelet *cl, void *f_ptr) +{ + const size_t max_cuda_funcs = sizeof(cl->cuda_funcs)/sizeof(cl->cuda_funcs[0])-1; + unsigned i; + for (i = 0; i < max_cuda_funcs; i++) + { + if (cl->cuda_funcs[i] == NULL) + { + cl->cuda_funcs[i] = f_ptr; + return; + } + } + _STARPU_ERROR("fstarpu: too many cuda functions in Fortran codelet"); +} + +void fstarpu_codelet_add_cuda_flags(struct starpu_codelet *cl, intptr_t flags) +{ + const size_t max_cuda_flags = sizeof(cl->cuda_flags)/sizeof(cl->cuda_flags[0])-1; + unsigned i; + for (i = 0; i < max_cuda_flags; i++) + { + if (cl->cuda_flags[i] == 0) + { + cl->cuda_flags[i] = (char)flags; + return; + } + } + _STARPU_ERROR("fstarpu: too many cuda flags in Fortran codelet"); +} + +void fstarpu_codelet_add_opencl_func(struct starpu_codelet *cl, void *f_ptr) +{ + const size_t max_opencl_funcs = sizeof(cl->opencl_funcs)/sizeof(cl->opencl_funcs[0])-1; + unsigned i; + for (i = 0; i < max_opencl_funcs; i++) + { + if (cl->opencl_funcs[i] == NULL) + { + cl->opencl_funcs[i] = f_ptr; + return; + } + } + _STARPU_ERROR("fstarpu: too many opencl functions in Fortran codelet"); +} + +void fstarpu_codelet_add_opencl_flags(struct starpu_codelet *cl, intptr_t flags) +{ + const size_t max_opencl_flags = sizeof(cl->opencl_flags)/sizeof(cl->opencl_flags[0])-1; + unsigned i; + for (i = 0; i < max_opencl_flags; i++) + { + if (cl->opencl_flags[i] == 0) + { + cl->opencl_flags[i] = (char)flags; + return; + } + } + _STARPU_ERROR("fstarpu: too many opencl flags in Fortran codelet"); +} + +void fstarpu_codelet_add_buffer(struct starpu_codelet *cl, intptr_t _mode) +{ + + enum starpu_data_access_mode mode = (enum starpu_data_access_mode) _mode; + const size_t max_modes = sizeof(cl->modes)/sizeof(cl->modes[0])-1; + if ((mode & (STARPU_ACCESS_MODE_MAX-1)) != mode) + { + _STARPU_ERROR("fstarpu: invalid data mode"); + } + if (cl->nbuffers < (int) max_modes) + { + cl->modes[cl->nbuffers] = (unsigned int)mode; + cl->nbuffers++; + } + else + { + _STARPU_ERROR("fstarpu: too many buffers in Fortran codelet"); + } +} + +void fstarpu_codelet_set_variable_nbuffers(struct starpu_codelet *cl) +{ + cl->nbuffers = STARPU_VARIABLE_NBUFFERS; +} + +void fstarpu_codelet_set_nbuffers(struct starpu_codelet *cl, int nbuffers) +{ + if (nbuffers >= 0) + { + cl->nbuffers = nbuffers; + } + else + { + _STARPU_ERROR("fstarpu: invalid nbuffers parameter"); + } +} + +void fstarpu_codelet_set_flags(struct starpu_codelet *cl, intptr_t flags) +{ + cl->flags = (int)flags; +} + +void fstarpu_codelet_set_where(struct starpu_codelet *cl, intptr_t where) +{ + STARPU_ASSERT(where >= 0); + cl->where = (uint32_t)where; +} + +void fstarpu_codelet_set_type(struct starpu_codelet *cl, intptr_t type_constant) +{ + STARPU_ASSERT(type_constant == STARPU_SEQ || type_constant == STARPU_SPMD || type_constant == STARPU_FORKJOIN); + cl->type = (int)type_constant; +} + +void fstarpu_codelet_set_max_parallelism(struct starpu_codelet *cl, int max_parallelism) +{ + if (max_parallelism >= 1) + { + cl->max_parallelism = max_parallelism; + } + else + { + _STARPU_ERROR("fstarpu: invalid max_parallelism parameter"); + } +} + +STARPU_ATTRIBUTE_MALLOC +struct starpu_perfmodel *fstarpu_perfmodel_allocate(void) +{ + struct starpu_perfmodel *model; + _STARPU_CALLOC(model, 1, sizeof(*model)); + return model; +} + +void fstarpu_perfmodel_free(struct starpu_perfmodel *model) +{ + memset(model, 0, sizeof(*model)); + free(model); +} + +void fstarpu_perfmodel_set_symbol(struct starpu_perfmodel *model, const char *model_symbol) +{ + model->symbol = model_symbol; +} + +void fstarpu_perfmodel_set_type(struct starpu_perfmodel *model, intptr_t type) +{ + STARPU_ASSERT(type == fstarpu_history_based || type == fstarpu_regression_based || type == fstarpu_nl_regression_based || type == fstarpu_multiple_regression_based); + model->type = type; +} + +void * fstarpu_variable_get_ptr(void *buffers[], int i) +{ + return (void *)STARPU_VARIABLE_GET_PTR(buffers[i]); +} + +void * fstarpu_vector_get_ptr(void *buffers[], int i) +{ + return (void *)STARPU_VECTOR_GET_PTR(buffers[i]); +} + +int fstarpu_vector_get_nx(void *buffers[], int i) +{ + return STARPU_VECTOR_GET_NX(buffers[i]); +} + +void * fstarpu_matrix_get_ptr(void *buffers[], int i) +{ + return (void *)STARPU_MATRIX_GET_PTR(buffers[i]); +} + +int fstarpu_matrix_get_ld(void *buffers[], int i) +{ + return STARPU_MATRIX_GET_LD(buffers[i]); +} + +int fstarpu_matrix_get_nx(void *buffers[], int i) +{ + return STARPU_MATRIX_GET_NX(buffers[i]); +} + +int fstarpu_matrix_get_ny(void *buffers[], int i) +{ + return STARPU_MATRIX_GET_NY(buffers[i]); +} + +void * fstarpu_block_get_ptr(void *buffers[], int i) +{ + return (void *)STARPU_BLOCK_GET_PTR(buffers[i]); +} + +int fstarpu_block_get_ldy(void *buffers[], int i) +{ + return STARPU_BLOCK_GET_LDY(buffers[i]); +} + +int fstarpu_block_get_ldz(void *buffers[], int i) +{ + return STARPU_BLOCK_GET_LDZ(buffers[i]); +} + +int fstarpu_block_get_nx(void *buffers[], int i) +{ + return STARPU_BLOCK_GET_NX(buffers[i]); +} + +int fstarpu_block_get_ny(void *buffers[], int i) +{ + return STARPU_BLOCK_GET_NY(buffers[i]); +} + +int fstarpu_block_get_nz(void *buffers[], int i) +{ + return STARPU_BLOCK_GET_NZ(buffers[i]); +} + +void fstarpu_data_acquire(starpu_data_handle_t handle, intptr_t mode) +{ + STARPU_ASSERT(mode == fstarpu_r || mode == fstarpu_w || mode == fstarpu_rw); + starpu_data_acquire(handle, (int)mode); +} + +void fstarpu_unpack_arg(char *cl_arg, void **buffer_list) +{ + size_t current_arg_offset = 0; + int nargs, arg; + + /* We fill the different pointers with the appropriate arguments */ + memcpy(&nargs, cl_arg, sizeof(nargs)); + current_arg_offset += sizeof(nargs); + + for (arg = 0; arg < nargs; arg++) + { + void *argptr = buffer_list[arg]; + + /* If not reading all cl_args */ + if(argptr == NULL) + break; + + size_t arg_size; + memcpy(&arg_size, cl_arg+current_arg_offset, sizeof(arg_size)); + current_arg_offset += sizeof(arg_size); + + memcpy(argptr, cl_arg+current_arg_offset, arg_size); + current_arg_offset += arg_size; + } +} + +void fstarpu_sched_ctx_display_workers(int ctx) +{ + starpu_sched_ctx_display_workers((unsigned)ctx, stderr); +} + +intptr_t fstarpu_worker_get_type(int workerid) +{ + return (intptr_t)starpu_worker_get_type(workerid); +} + +int fstarpu_worker_get_count_by_type(intptr_t type) +{ + return starpu_worker_get_count_by_type((enum starpu_worker_archtype)type); +} + +unsigned fstarpu_worker_get_ids_by_type(intptr_t type, int *workerids, unsigned maxsize) +{ + return starpu_worker_get_ids_by_type((enum starpu_worker_archtype)type, workerids, maxsize); +} + +int fstarpu_worker_get_by_type(intptr_t type, int num) +{ + return starpu_worker_get_by_type((enum starpu_worker_archtype)type, num); +} + +int fstarpu_worker_get_by_devid(intptr_t type, int devid) +{ + return starpu_worker_get_by_type((enum starpu_worker_archtype)type, devid); +} + +void fstarpu_worker_get_type_as_string(intptr_t type, char *dst, size_t maxlen) +{ + const char *str = starpu_worker_get_type_as_string((enum starpu_worker_archtype)type); + snprintf(dst, maxlen, "%s", str); +} + +STARPU_ATTRIBUTE_MALLOC +starpu_data_handle_t *fstarpu_data_handle_array_alloc(int nb) +{ + void *ptr; + _STARPU_CALLOC(ptr, (size_t)nb, sizeof(starpu_data_handle_t)); + return ptr; +} + +void fstarpu_data_handle_array_free(starpu_data_handle_t *handles) +{ + free(handles); +} + +void fstarpu_data_handle_array_set(starpu_data_handle_t *handles, int i, starpu_data_handle_t handle) +{ + handles[i] = handle; +} + +STARPU_ATTRIBUTE_MALLOC +struct starpu_data_descr *fstarpu_data_descr_array_alloc(int nb) +{ + void *ptr; + _STARPU_CALLOC(ptr, (size_t)nb, sizeof(struct starpu_data_descr)); + return ptr; +} + +STARPU_ATTRIBUTE_MALLOC +struct starpu_data_descr *fstarpu_data_descr_alloc(void) +{ + return fstarpu_data_descr_array_alloc(1); +} + +void fstarpu_data_descr_array_free(struct starpu_data_descr *descrs) +{ + free(descrs); +} + +void fstarpu_data_descr_free(struct starpu_data_descr *descr) +{ + fstarpu_data_descr_array_free(descr); +} + +void fstarpu_data_descr_array_set(struct starpu_data_descr *descrs, int i, starpu_data_handle_t handle, intptr_t mode) +{ + descrs[i].handle = handle; + descrs[i].mode = (enum starpu_data_access_mode)mode; +} + +void fstarpu_data_descr_set(struct starpu_data_descr *descr, starpu_data_handle_t handle, intptr_t mode) +{ + fstarpu_data_descr_array_set(descr, 1, handle, mode); +} + +STARPU_ATTRIBUTE_MALLOC +struct starpu_data_filter *fstarpu_data_filter_allocate(void) +{ + struct starpu_data_filter *filter; + _STARPU_CALLOC(filter, 1, sizeof(*filter)); + return filter; +} + +/* Note: use fstarpu_df_alloc_ prefix instead of fstarpu_data_filter_allocate_ to fit within the + * Fortran id length limit */ +#define _FSTARPU_DATA_FILTER_ALLOCATOR(name) \ +STARPU_ATTRIBUTE_MALLOC \ +struct starpu_data_filter *fstarpu_df_alloc_##name(void) \ +{ \ + struct starpu_data_filter *filter = fstarpu_data_filter_allocate(); \ + filter->filter_func = starpu_##name; \ + return filter; \ +} + +_FSTARPU_DATA_FILTER_ALLOCATOR(bcsr_filter_canonical_block); +_FSTARPU_DATA_FILTER_ALLOCATOR(csr_filter_vertical_block); +_FSTARPU_DATA_FILTER_ALLOCATOR(matrix_filter_block); +_FSTARPU_DATA_FILTER_ALLOCATOR(matrix_filter_block_shadow); +_FSTARPU_DATA_FILTER_ALLOCATOR(matrix_filter_vertical_block); +_FSTARPU_DATA_FILTER_ALLOCATOR(matrix_filter_vertical_block_shadow); +_FSTARPU_DATA_FILTER_ALLOCATOR(vector_filter_block); +_FSTARPU_DATA_FILTER_ALLOCATOR(vector_filter_block_shadow); +_FSTARPU_DATA_FILTER_ALLOCATOR(vector_filter_list); +_FSTARPU_DATA_FILTER_ALLOCATOR(vector_filter_divide_in_2); +_FSTARPU_DATA_FILTER_ALLOCATOR(block_filter_block); +_FSTARPU_DATA_FILTER_ALLOCATOR(block_filter_block_shadow); +_FSTARPU_DATA_FILTER_ALLOCATOR(block_filter_vertical_block); +_FSTARPU_DATA_FILTER_ALLOCATOR(block_filter_vertical_block_shadow); + +#undef _FSTARPU_DATA_FILTER_ALLOCATOR + +void fstarpu_data_filter_free(struct starpu_data_filter *filter) +{ + memset(filter, 0, sizeof(*filter)); + free(filter); +} + +void fstarpu_data_filter_set_filter_func(struct starpu_data_filter *filter, void *f_ptr) +{ + STARPU_ASSERT(f_ptr != NULL); + filter->filter_func = f_ptr; +} + +void fstarpu_data_filter_set_nchildren(struct starpu_data_filter *filter, int nchildren) +{ + STARPU_ASSERT(nchildren >= 0); + filter->nchildren = nchildren; +} + +void fstarpu_data_filter_set_get_nchildren_func(struct starpu_data_filter *filter, void *f_ptr) +{ + filter->get_nchildren = f_ptr; +} + +void fstarpu_data_filter_set_get_child_ops_func(struct starpu_data_filter *filter, void *f_ptr) +{ + filter->get_child_ops = f_ptr; +} + +void fstarpu_data_filter_set_filter_arg(struct starpu_data_filter *filter, int filter_arg) +{ + STARPU_ASSERT(filter_arg >= 0); /* starpu_data_filter.filter_arg is unsigned, but + * Fortran does not support unsigned types */ + filter->filter_arg = (unsigned)filter_arg; +} + +void fstarpu_data_filter_set_filter_arg_ptr(struct starpu_data_filter *filter, void *filter_arg_ptr) +{ + filter->filter_arg_ptr = filter_arg_ptr; +} diff --git a/src/util/misc.c b/src/util/misc.c new file mode 100644 index 0000000..685be9d --- /dev/null +++ b/src/util/misc.c @@ -0,0 +1,88 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include + +const char *_starpu_codelet_get_name(struct starpu_codelet *cl) +{ + if (!cl) + return NULL; + + if (cl->name) + return cl->name; + else if (cl->model && cl->model->symbol && cl->model->symbol[0]) + return cl->model->symbol; + else + return NULL; +} + +const char *_starpu_codelet_get_model_name(struct starpu_codelet *cl) +{ + if (!cl) + return NULL; + + if (cl->model && cl->model->symbol && cl->model->symbol[0]) + return cl->model->symbol; + else + return cl->name; +} + +const char *_starpu_job_get_model_name(struct _starpu_job *j) +{ + if (!j) + return NULL; + + struct starpu_task *task = j->task; + if (!task) + return NULL; + + return _starpu_codelet_get_model_name(task->cl); +} + +const char *_starpu_job_get_task_name(struct _starpu_job *j) +{ + if (!j) + return NULL; + + struct starpu_task *task = j->task; + if (!task) + return NULL; + + if (task->name) + return task->name; + else + return _starpu_job_get_model_name(j); +} + +const char *starpu_task_get_model_name(struct starpu_task *task) +{ + if (!task) + return NULL; + + return _starpu_codelet_get_model_name(task->cl); +} + +const char *starpu_task_get_name(struct starpu_task *task) +{ + if (!task) + return NULL; + if (task->name) + return task->name; + else + return starpu_task_get_model_name(task); +} diff --git a/src/util/openmp_runtime_support.c b/src/util/openmp_runtime_support.c new file mode 100644 index 0000000..ea06594 --- /dev/null +++ b/src/util/openmp_runtime_support.c @@ -0,0 +1,2794 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2014-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#ifdef STARPU_OPENMP +/* + * locally disable -Wdeprecated-declarations to avoid + * lots of deprecated warnings for ucontext related functions + */ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define _STARPU_INITIAL_THREAD_STACKSIZE 2097152 + +static struct starpu_omp_global _global_state; +starpu_pthread_key_t _starpu_omp_thread_key; +starpu_pthread_key_t _starpu_omp_task_key; + +struct starpu_omp_global *_starpu_omp_global_state = NULL; +double _starpu_omp_clock_ref = 0.0; /* clock reference for starpu_omp_get_wtick */ + +/* Entry in the `registered_handles' hash table. */ +struct handle_entry +{ + UT_hash_handle hh; + void *pointer; + starpu_data_handle_t handle; +}; + + +static struct handle_entry *registered_handles; +static struct _starpu_spinlock registered_handles_lock; + +static struct starpu_omp_critical *create_omp_critical_struct(void); +static void destroy_omp_critical_struct(struct starpu_omp_critical *critical); +static struct starpu_omp_device *create_omp_device_struct(void); +static void destroy_omp_device_struct(struct starpu_omp_device *device); +static struct starpu_omp_region *create_omp_region_struct(struct starpu_omp_region *parent_region, struct starpu_omp_device *owner_device); +static void destroy_omp_region_struct(struct starpu_omp_region *region); +static struct starpu_omp_thread *create_omp_thread_struct(struct starpu_omp_region *owner_region); +static void destroy_omp_thread_struct(struct starpu_omp_thread *thread); +static struct starpu_omp_task *create_omp_task_struct(struct starpu_omp_task *parent_task, + struct starpu_omp_thread *owner_thread, struct starpu_omp_region *owner_region, int is_implicit); +static void destroy_omp_task_struct(struct starpu_omp_task *task); +static void wake_up_and_unlock_task(struct starpu_omp_task *task); +static void wake_up_barrier(struct starpu_omp_region *parallel_region); +static void starpu_omp_task_preempt(void); + +struct starpu_omp_thread * _starpu_omp_get_thread(void) +{ + struct starpu_omp_thread *thread = STARPU_PTHREAD_GETSPECIFIC(_starpu_omp_thread_key); + return thread; +} + +static inline void _starpu_omp_set_thread(struct starpu_omp_thread *thread) +{ + STARPU_PTHREAD_SETSPECIFIC(_starpu_omp_thread_key, thread); +} + +struct starpu_omp_task *_starpu_omp_get_task(void) +{ + struct starpu_omp_task *task = STARPU_PTHREAD_GETSPECIFIC(_starpu_omp_task_key); + return task; +} + +static inline void _starpu_omp_set_task(struct starpu_omp_task *task) +{ + STARPU_PTHREAD_SETSPECIFIC(_starpu_omp_task_key, task); +} + +struct starpu_omp_region *_starpu_omp_get_region_at_level(int level) +{ + const struct starpu_omp_task *task = _starpu_omp_get_task(); + struct starpu_omp_region *parallel_region; + + if (!task) + return NULL; + + parallel_region = task->owner_region; + if (level < 0 || level > parallel_region->icvs.levels_var) + return NULL; + + while (level < parallel_region->icvs.levels_var) + { + parallel_region = parallel_region->parent_region; + } + + return parallel_region; +} + +int _starpu_omp_get_region_thread_num(const struct starpu_omp_region * const region) +{ + struct starpu_omp_thread *thread = _starpu_omp_get_thread(); + STARPU_ASSERT(thread != NULL); + if (thread == region->master_thread) + return 0; + int tid = starpu_omp_thread_list_member(®ion->thread_list, thread); + if (tid >= 0) + return tid+1; + _STARPU_ERROR("unrecognized omp thread\n"); +} + +static void weak_task_lock(struct starpu_omp_task *task) +{ + _starpu_spin_lock(&task->lock); + while (task->transaction_pending) + { + _starpu_spin_unlock(&task->lock); + STARPU_UYIELD(); + _starpu_spin_lock(&task->lock); + } +} + +static void weak_task_unlock(struct starpu_omp_task *task) +{ + _starpu_spin_unlock(&task->lock); +} + +static void wake_up_and_unlock_task(struct starpu_omp_task *task) +{ + STARPU_ASSERT(task->transaction_pending == 0); + if (task->wait_on == 0) + { + weak_task_unlock(task); + int ret = starpu_task_submit(task->starpu_task); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + else + { + weak_task_unlock(task); + } +} + +static void transaction_callback(void *_task) +{ + struct starpu_omp_task *task = _task; + _starpu_spin_lock(&task->lock); + STARPU_ASSERT(task->transaction_pending != 0); + task->transaction_pending = 0; + _starpu_spin_unlock(&task->lock); +} + +static void condition_init(struct starpu_omp_condition *condition) +{ + condition->contention_list_head = NULL; +} + +static void condition_exit(struct starpu_omp_condition *condition) +{ + STARPU_ASSERT(condition->contention_list_head == NULL); + condition->contention_list_head = NULL; +} + +static void condition_wait(struct starpu_omp_condition *condition, struct _starpu_spinlock *lock, enum starpu_omp_task_wait_on flag) +{ + struct starpu_omp_task *task = _starpu_omp_get_task(); + struct starpu_omp_task_link link; + _starpu_spin_lock(&task->lock); + task->wait_on |= flag; + link.task = task; + link.next = condition->contention_list_head; + condition->contention_list_head = &link; + task->transaction_pending = 1; + _starpu_spin_unlock(&task->lock); + _starpu_spin_unlock(lock); + _starpu_task_prepare_for_continuation_ext(0, transaction_callback, task); + starpu_omp_task_preempt(); + + /* re-acquire the lock released by the callback */ + _starpu_spin_lock(lock); +} + +#if 0 +/* unused for now */ +static void condition_signal(struct starpu_omp_condition *condition) +{ + if (condition->contention_list_head != NULL) + { + struct starpu_omp_task *next_task = condition->contention_list_head->task; + weak_task_lock(next_task); + condition->contention_list_head = condition->contention_list_head->next; + STARPU_ASSERT(next_task->wait_on & starpu_omp_task_wait_on_condition); + next_task->wait_on &= ~starpu_omp_task_wait_on_condition; + wake_up_and_unlock_task(next_task); + } +} +#endif + +static void condition_broadcast(struct starpu_omp_condition *condition, enum starpu_omp_task_wait_on flag) +{ + while (condition->contention_list_head != NULL) + { + struct starpu_omp_task *next_task = condition->contention_list_head->task; + weak_task_lock(next_task); + condition->contention_list_head = condition->contention_list_head->next; + STARPU_ASSERT(next_task->wait_on & flag); + next_task->wait_on &= ~flag; + wake_up_and_unlock_task(next_task); + } +} + +static void register_thread_worker(struct starpu_omp_thread *thread) +{ + STARPU_ASSERT(thread->worker != NULL); + _starpu_spin_lock(&_global_state.hash_workers_lock); + struct _starpu_worker *check = thread->worker; + struct starpu_omp_thread *tmp = NULL; + HASH_FIND_PTR(_global_state.hash_workers, &check, tmp); + STARPU_ASSERT(tmp == NULL); + HASH_ADD_PTR(_global_state.hash_workers, worker, thread); + _starpu_spin_unlock(&_global_state.hash_workers_lock); +} +static struct starpu_omp_thread *get_worker_thread(struct _starpu_worker *starpu_worker) +{ + struct starpu_omp_thread *thread = NULL; + _starpu_spin_lock(&_global_state.hash_workers_lock); + HASH_FIND_PTR(_global_state.hash_workers, &starpu_worker, thread); + _starpu_spin_unlock(&_global_state.hash_workers_lock); + return thread; +} +static struct starpu_omp_thread *get_local_thread(void) +{ + struct starpu_omp_thread *thread = _starpu_omp_get_thread(); + if (thread == NULL) + { + struct _starpu_worker *starpu_worker = _starpu_get_local_worker_key(); + STARPU_ASSERT(starpu_worker != NULL); + thread = get_worker_thread(starpu_worker); + + if ( +#ifdef STARPU_DEVEL +#warning Why not just checking for STARPU_CPU_WORKER? +#endif +#ifdef STARPU_USE_CUDA + (starpu_worker->arch != STARPU_CUDA_WORKER) + && +#endif +#ifdef STARPU_USE_OPENCL + (starpu_worker->arch != STARPU_OPENCL_WORKER) + && +#endif + 1 + ) + { + STARPU_ASSERT(thread != NULL); + } + + if (thread != NULL) + { + _starpu_omp_set_thread(thread); + } + } + return thread; +} + +static struct starpu_omp_thread * __attribute__ ((noinline)) _get_local_thread_noinline(void) +{ + return get_local_thread(); +} + +static struct starpu_omp_critical *create_omp_critical_struct(void) +{ + struct starpu_omp_critical *critical; + + _STARPU_CALLOC(critical, 1, sizeof(*critical)); + _starpu_spin_init(&critical->lock); + return critical; +} + +static void destroy_omp_critical_struct(struct starpu_omp_critical *critical) +{ + STARPU_ASSERT(critical->state == 0); + STARPU_ASSERT(critical->contention_list_head == NULL); + _starpu_spin_destroy(&critical->lock); + critical->name = NULL; + free(critical); +} + +static struct starpu_omp_device *create_omp_device_struct(void) +{ + struct starpu_omp_device *device; + + _STARPU_CALLOC(device, 1, sizeof(*device)); + _starpu_spin_init(&device->atomic_lock); + return device; +} + +static void destroy_omp_device_struct(struct starpu_omp_device *device) +{ + _starpu_spin_destroy(&device->atomic_lock); + memset(device, 0, sizeof(*device)); + free(device); +} + +static struct starpu_omp_device *get_caller_device(void) +{ + struct starpu_omp_task *task = _starpu_omp_get_task(); + struct starpu_omp_device *device; + if (task) + { + STARPU_ASSERT(task->owner_region != NULL); + device = task->owner_region->owner_device; + } + else + { + device = _global_state.initial_device; + } + STARPU_ASSERT(device != NULL); + return device; +} + +static struct starpu_omp_region *create_omp_region_struct(struct starpu_omp_region *parent_region, struct starpu_omp_device *owner_device) +{ + struct starpu_omp_region *region; + + _STARPU_CALLOC(region, 1, sizeof(*region)); + region->parent_region = parent_region; + region->owner_device = owner_device; + starpu_omp_thread_list_init0(®ion->thread_list); + + _starpu_spin_init(®ion->lock); + _starpu_spin_init(®ion->registered_handles_lock); + region->level = (parent_region != NULL)?parent_region->level+1:0; + return region; +} + +static void destroy_omp_region_struct(struct starpu_omp_region *region) +{ + STARPU_ASSERT(region->nb_threads == 0); + STARPU_ASSERT(starpu_omp_thread_list_empty(®ion->thread_list)); + STARPU_ASSERT(region->continuation_starpu_task == NULL); + _starpu_spin_destroy(®ion->registered_handles_lock); + _starpu_spin_destroy(®ion->lock); + memset(region, 0, sizeof(*region)); + free(region); +} + +static void omp_initial_thread_func(void) +{ + struct starpu_omp_thread *initial_thread = _global_state.initial_thread; + struct starpu_omp_task *initial_task = _global_state.initial_task; + while (1) + { + struct starpu_task *continuation_starpu_task = initial_task->nested_region->continuation_starpu_task; + starpu_driver_run_once(&initial_thread->starpu_driver); + + /* + * if we are leaving the first nested region we give control back to initial task + * otherwise, we should continue to execute work + */ + if (_starpu_task_test_termination(continuation_starpu_task)) + { + initial_task->nested_region->continuation_starpu_task = NULL; + _starpu_omp_set_task(initial_task); + swapcontext(&initial_thread->ctx, &initial_task->ctx); + } + } +} + +static struct starpu_omp_thread *create_omp_thread_struct(struct starpu_omp_region *owner_region) +{ + struct starpu_omp_thread *thread = starpu_omp_thread_new(); + if (thread == NULL) + _STARPU_ERROR("memory allocation failed"); + memset(thread, 0, sizeof(*thread)); + thread->owner_region = owner_region; + return thread; +} + +static void destroy_omp_thread_struct(struct starpu_omp_thread *thread) +{ + STARPU_ASSERT(thread->current_task == NULL); + memset(thread, 0, sizeof(*thread)); + starpu_omp_thread_delete(thread); +} + +/* Register the mapping from PTR to HANDLE. If PTR is already mapped to + * some handle, the new mapping shadows the previous one. */ +static void register_ram_pointer(starpu_data_handle_t handle, void *ptr) +{ + struct handle_entry *entry; + + _STARPU_MALLOC(entry, sizeof(*entry)); + + entry->pointer = ptr; + entry->handle = handle; + + struct starpu_omp_task *task = _starpu_omp_get_task(); + if (task) + { + if (task->flags & STARPU_OMP_TASK_FLAGS_IMPLICIT) + { + struct starpu_omp_region *parallel_region = task->owner_region; + _starpu_spin_lock(¶llel_region->registered_handles_lock); + HASH_ADD_PTR(parallel_region->registered_handles, pointer, entry); + _starpu_spin_unlock(¶llel_region->registered_handles_lock); + } + else + { + HASH_ADD_PTR(task->registered_handles, pointer, entry); + } + } + else + { + struct handle_entry *old_entry; + + _starpu_spin_lock(®istered_handles_lock); + HASH_FIND_PTR(registered_handles, &ptr, old_entry); + if (old_entry) + { + /* Already registered this pointer, avoid undefined + * behavior of duplicate in hash table */ + _starpu_spin_unlock(®istered_handles_lock); + free(entry); + } + else + { + HASH_ADD_PTR(registered_handles, pointer, entry); + _starpu_spin_unlock(®istered_handles_lock); + } + } +} + +void starpu_omp_handle_register(starpu_data_handle_t handle) +{ + unsigned node; + for (node = 0; node < STARPU_MAXNODES; node++) + { + if (starpu_node_get_kind(node) != STARPU_CPU_RAM) + continue; + + void *ptr = starpu_data_handle_to_pointer(handle, node); + if (ptr != NULL) + register_ram_pointer(handle, ptr); + } +} + +/* + * Stop monitoring a piece of data + */ +static void unregister_ram_pointer(starpu_data_handle_t handle, unsigned node) +{ + if (starpu_node_get_kind(node) != STARPU_CPU_RAM) + return; + + if (handle->removed_from_context_hash) + return; + const void *ram_ptr = starpu_data_handle_to_pointer(handle, node); + + if (ram_ptr != NULL) + { + /* Remove the PTR -> HANDLE mapping. If a mapping from PTR + * to another handle existed before (e.g., when using + * filters), it becomes visible again. */ + struct handle_entry *entry; + struct starpu_omp_task *task = _starpu_omp_get_task(); + if (task) + { + if (task->flags & STARPU_OMP_TASK_FLAGS_IMPLICIT) + { + struct starpu_omp_region *parallel_region = task->owner_region; + _starpu_spin_lock(¶llel_region->registered_handles_lock); + HASH_FIND_PTR(parallel_region->registered_handles, &ram_ptr, entry); + STARPU_ASSERT(entry != NULL); + HASH_DEL(registered_handles, entry); + _starpu_spin_unlock(¶llel_region->registered_handles_lock); + } + else + { + HASH_FIND_PTR(task->registered_handles, &ram_ptr, entry); + STARPU_ASSERT(entry != NULL); + HASH_DEL(task->registered_handles, entry); + } + } + else + { + + _starpu_spin_lock(®istered_handles_lock); + HASH_FIND_PTR(registered_handles, &ram_ptr, entry); + if (entry) + { + if (entry->handle == handle) + { + HASH_DEL(registered_handles, entry); + } + else + /* don't free it, it's not ours */ + entry = NULL; + } + _starpu_spin_unlock(®istered_handles_lock); + } + free(entry); + } +} + +void starpu_omp_handle_unregister(starpu_data_handle_t handle) +{ + unsigned node; + for (node = 0; node < STARPU_MAXNODES; node++) + { + struct _starpu_data_replicate *local = &handle->per_node[node]; + STARPU_ASSERT(!local->refcnt); + if (local->allocated) + { + unregister_ram_pointer(handle, node); + } + } +} + +static void unregister_region_handles(struct starpu_omp_region *region) +{ + _starpu_spin_lock(®ion->registered_handles_lock); + struct handle_entry *entry=NULL, *tmp=NULL; + HASH_ITER(hh, (region->registered_handles), entry, tmp) + { + entry->handle->removed_from_context_hash = 1; + HASH_DEL(region->registered_handles, entry); + starpu_data_unregister(entry->handle); + free(entry); + } + _starpu_spin_unlock(®ion->registered_handles_lock); +} + +static void unregister_task_handles(struct starpu_omp_task *task) +{ + struct handle_entry *entry=NULL, *tmp=NULL; + HASH_ITER(hh, task->registered_handles, entry, tmp) + { + entry->handle->removed_from_context_hash = 1; + HASH_DEL(task->registered_handles, entry); + starpu_data_unregister(entry->handle); + free(entry); + } +} + +starpu_data_handle_t starpu_omp_data_lookup(const void *ptr) +{ + starpu_data_handle_t result; + + struct starpu_omp_task *task = _starpu_omp_get_task(); + if (task) + { + if (task->flags & STARPU_OMP_TASK_FLAGS_IMPLICIT) + { + struct starpu_omp_region *parallel_region = task->owner_region; + _starpu_spin_lock(¶llel_region->registered_handles_lock); + { + struct handle_entry *entry; + + HASH_FIND_PTR(parallel_region->registered_handles, &ptr, entry); + if(STARPU_UNLIKELY(entry == NULL)) + result = NULL; + else + result = entry->handle; + } + _starpu_spin_unlock(¶llel_region->registered_handles_lock); + } + else + { + struct handle_entry *entry; + + HASH_FIND_PTR(task->registered_handles, &ptr, entry); + if(STARPU_UNLIKELY(entry == NULL)) + result = NULL; + else + result = entry->handle; + } + } + else + { + _starpu_spin_lock(®istered_handles_lock); + { + struct handle_entry *entry; + + HASH_FIND_PTR(registered_handles, &ptr, entry); + if(STARPU_UNLIKELY(entry == NULL)) + result = NULL; + else + result = entry->handle; + } + _starpu_spin_unlock(®istered_handles_lock); + } + + return result; +} + +static void starpu_omp_explicit_task_entry(struct starpu_omp_task *task) +{ + STARPU_ASSERT(!(task->flags & STARPU_OMP_TASK_FLAGS_IMPLICIT)); + struct _starpu_worker *starpu_worker = _starpu_get_local_worker_key(); + /* XXX on work */ + if (task->is_loop) + { + starpu_omp_for_inline_first_alt(task->nb_iterations, task->chunk, starpu_omp_sched_static, 1, &task->begin_i, &task->end_i); + } + if (starpu_worker->arch == STARPU_CPU_WORKER) + { + task->cpu_f(task->starpu_buffers, task->starpu_cl_arg); + } +#ifdef STARPU_USE_CUDA + else if (starpu_worker->arch == STARPU_CUDA_WORKER) + { + task->cuda_f(task->starpu_buffers, task->starpu_cl_arg); + } +#endif +#ifdef STARPU_USE_OPENCL + else if (starpu_worker->arch == STARPU_OPENCL_WORKER) + { + task->opencl_f(task->starpu_buffers, task->starpu_cl_arg); + } +#endif + else + _STARPU_ERROR("invalid worker architecture"); + /**/ + unregister_task_handles(task); + _starpu_spin_lock(&task->lock); + task->state = starpu_omp_task_state_terminated; + task->transaction_pending=1; + _starpu_spin_unlock(&task->lock); + struct starpu_omp_thread *thread = _starpu_omp_get_thread(); + /* + * the task reached the terminated state, definitively give hand back to the worker code. + * + * about to run on the worker stack... + */ + setcontext(&thread->ctx); + STARPU_ASSERT(0); /* unreachable code */ +} + +static void starpu_omp_implicit_task_entry(struct starpu_omp_task *task) +{ + struct starpu_omp_thread *thread = _starpu_omp_get_thread(); + STARPU_ASSERT(task->flags & STARPU_OMP_TASK_FLAGS_IMPLICIT); + task->cpu_f(task->starpu_buffers, task->starpu_cl_arg); + starpu_omp_barrier(); + if (thread == task->owner_region->master_thread) + { + unregister_region_handles(task->owner_region); + } + task->state = starpu_omp_task_state_terminated; + /* + * the task reached the terminated state, definitively give hand back to the worker code. + * + * about to run on the worker stack... + */ + setcontext(&thread->ctx); + STARPU_ASSERT(0); /* unreachable code */ +} + +/* + * stop executing a task that is about to block + * and give hand back to the thread + */ +static void starpu_omp_task_preempt(void) +{ + struct starpu_omp_task *task = _starpu_omp_get_task(); + struct starpu_omp_thread *thread = _starpu_omp_get_thread(); + task->state = starpu_omp_task_state_preempted; + + /* + * the task reached a blocked state, give hand back to the worker code. + * + * about to run on the worker stack... + */ + swapcontext(&task->ctx, &thread->ctx); + /* now running on the task stack again */ +} + +/* + * wrap a task function to allow the task to be preempted + */ +static void starpu_omp_implicit_task_exec(void *buffers[], void *cl_arg) +{ + struct starpu_omp_task *task = starpu_task_get_current()->omp_task; + STARPU_ASSERT(task->flags & STARPU_OMP_TASK_FLAGS_IMPLICIT); + _starpu_omp_set_task(task); + + /* get_local_thread() inlining triggers a clobbering warning with some + * versions of GCC, thus we explicitly call the noinline variant */ + struct starpu_omp_thread *thread = _get_local_thread_noinline(); + + if (task->state != starpu_omp_task_state_preempted) + { + task->starpu_buffers = buffers; + task->starpu_cl_arg = cl_arg; + STARPU_ASSERT(task->stack == NULL); + STARPU_ASSERT(task->stacksize > 0); + _STARPU_MALLOC(task->stack, task->stacksize); + getcontext(&task->ctx); + /* + * we do not use uc_link, starpu_omp_task_entry will handle + * the end of the task + */ + task->ctx.uc_link = NULL; + task->ctx.uc_stack.ss_sp = task->stack; + task->ctx.uc_stack.ss_size = task->stacksize; + task->stack_vg_id = VALGRIND_STACK_REGISTER(task->stack, task->stack+task->stacksize); + makecontext(&task->ctx, (void (*) ()) starpu_omp_implicit_task_entry, 1, task); + } + + task->state = starpu_omp_task_state_clear; + + /* + * start the task execution, or restore a previously preempted task. + * about to run on the task stack... + * */ + swapcontext(&thread->ctx, &task->ctx); + /* now running on the worker stack again */ + + STARPU_ASSERT(task->state == starpu_omp_task_state_preempted + || task->state == starpu_omp_task_state_terminated); + _starpu_omp_set_task(NULL); + + /* TODO: analyse the cause of the return and take appropriate steps */ + if (task->state == starpu_omp_task_state_terminated) + { + task->starpu_task->omp_task = NULL; + task->starpu_task = NULL; + VALGRIND_STACK_DEREGISTER(task->stack_vg_id); + task->stack_vg_id = 0; + free(task->stack); + task->stack = NULL; + memset(&task->ctx, 0, sizeof(task->ctx)); + } + else if (task->state != starpu_omp_task_state_preempted) + _STARPU_ERROR("invalid omp task state"); +} +static void starpu_omp_task_completion_accounting(struct starpu_omp_task *task) +{ + struct starpu_omp_task *parent_task = task->parent_task; + struct starpu_omp_region *parallel_region = task->owner_region; + + weak_task_lock(parent_task); + if (STARPU_ATOMIC_ADD(&parent_task->child_task_count, -1) == 0) + { + if (parent_task->state == starpu_omp_task_state_zombie) + { + STARPU_ASSERT(!(parent_task->flags & STARPU_OMP_TASK_FLAGS_IMPLICIT)); + weak_task_unlock(parent_task); + destroy_omp_task_struct(parent_task); + } + else if (parent_task->wait_on & starpu_omp_task_wait_on_task_childs) + { + parent_task->wait_on &= ~starpu_omp_task_wait_on_task_childs; + wake_up_and_unlock_task(parent_task); + } + else + { + weak_task_unlock(parent_task); + } + } + else + { + weak_task_unlock(parent_task); + } + _starpu_spin_lock(¶llel_region->lock); + if (STARPU_ATOMIC_ADD(¶llel_region->bound_explicit_task_count, -1) == 0) + { + struct starpu_omp_task *waiting_task = parallel_region->waiting_task; + _starpu_spin_unlock(¶llel_region->lock); + + if (waiting_task) + { + weak_task_lock(waiting_task); + _starpu_spin_lock(¶llel_region->lock); + parallel_region->waiting_task = NULL; + STARPU_ASSERT(waiting_task->wait_on & starpu_omp_task_wait_on_region_tasks); + waiting_task->wait_on &= ~starpu_omp_task_wait_on_region_tasks; + _starpu_spin_unlock(¶llel_region->lock); + wake_up_and_unlock_task(waiting_task); + } + } + else + { + _starpu_spin_unlock(¶llel_region->lock); + } + if (task->task_group) + { + struct starpu_omp_task *leader_task = task->task_group->leader_task; + STARPU_ASSERT(leader_task != task); + weak_task_lock(leader_task); + if (STARPU_ATOMIC_ADD(&task->task_group->descendent_task_count, -1) == 0) + { + if (leader_task->wait_on & starpu_omp_task_wait_on_group + && task->task_group == leader_task->task_group) + /* only wake the leader_task if it is actually + * waiting for the current task's task_group */ + { + leader_task->wait_on &= ~starpu_omp_task_wait_on_group; + wake_up_and_unlock_task(leader_task); + } + else + { + weak_task_unlock(leader_task); + } + } + else + { + weak_task_unlock(leader_task); + } + } +} +/* + * wrap a task function to allow the task to be preempted + */ +static void starpu_omp_explicit_task_exec(void *buffers[], void *cl_arg) +{ + struct starpu_omp_task *task = starpu_task_get_current()->omp_task; + STARPU_ASSERT(!(task->flags & STARPU_OMP_TASK_FLAGS_IMPLICIT)); + _starpu_omp_set_task(task); + + /* get_local_thread() inlining triggers a clobbering warning with some + * versions of GCC, thus we explicitly call the noinline variant */ + struct starpu_omp_thread *thread = _get_local_thread_noinline(); + + if (task->state != starpu_omp_task_state_preempted) + { + if (thread == NULL) + { + struct _starpu_worker *starpu_worker = _starpu_get_local_worker_key(); + if (starpu_worker->arch != STARPU_CPU_WORKER) + { + if ( +#ifdef STARPU_USE_CUDA + (starpu_worker->arch != STARPU_CUDA_WORKER) + && +#endif +#ifdef STARPU_USE_OPENCL + (starpu_worker->arch != STARPU_OPENCL_WORKER) + && +#endif + 1 + ) + { + _STARPU_ERROR("invalid worker architecture"); + } + + struct starpu_omp_thread *new_thread; + new_thread = create_omp_thread_struct(NULL); + new_thread->worker = starpu_worker; + register_thread_worker(new_thread); + + thread = get_local_thread(); + STARPU_ASSERT(thread == new_thread); + } + else + { + _STARPU_ERROR("orphaned CPU thread"); + } + } + STARPU_ASSERT(thread != NULL); + if (!(task->flags & STARPU_OMP_TASK_FLAGS_UNTIED)) + { + struct _starpu_worker *starpu_worker = _starpu_get_local_worker_key(); + task->starpu_task->workerid = starpu_worker->workerid; + task->starpu_task->execute_on_a_specific_worker = 1; + } + task->starpu_buffers = buffers; + task->starpu_cl_arg = cl_arg; + STARPU_ASSERT(task->stack == NULL); + STARPU_ASSERT(task->stacksize > 0); + _STARPU_MALLOC(task->stack, task->stacksize); + getcontext(&task->ctx); + /* + * we do not use uc_link, starpu_omp_task_entry will handle + * the end of the task + */ + task->ctx.uc_link = NULL; + task->ctx.uc_stack.ss_sp = task->stack; + task->ctx.uc_stack.ss_size = task->stacksize; + makecontext(&task->ctx, (void (*) ()) starpu_omp_explicit_task_entry, 1, task); + } + task->state = starpu_omp_task_state_clear; + + /* + * start the task execution, or restore a previously preempted task. + * about to run on the task stack... + * */ + swapcontext(&thread->ctx, &task->ctx); + /* now running on the worker stack again */ + + STARPU_ASSERT(task->state == starpu_omp_task_state_preempted + || task->state == starpu_omp_task_state_terminated); + _starpu_omp_set_task(NULL); + /* TODO: analyse the cause of the return and take appropriate steps */ + if (task->state == starpu_omp_task_state_terminated) + { + free(task->stack); + task->stack = NULL; + memset(&task->ctx, 0, sizeof(task->ctx)); + + starpu_omp_task_completion_accounting(task); + } + else if (task->state != starpu_omp_task_state_preempted) + _STARPU_ERROR("invalid omp task state"); +} + +static struct starpu_omp_task *create_omp_task_struct(struct starpu_omp_task *parent_task, + struct starpu_omp_thread *owner_thread, struct starpu_omp_region *owner_region, int is_implicit) +{ + struct starpu_omp_task *task = starpu_omp_task_new(); + if (task == NULL) + _STARPU_ERROR("memory allocation failed"); + + memset(task, 0, sizeof(*task)); + task->parent_task = parent_task; + task->owner_thread = owner_thread; + task->owner_region = owner_region; + if (is_implicit) + { + task->flags |= STARPU_OMP_TASK_FLAGS_IMPLICIT; + } + _starpu_spin_init(&task->lock); + /* TODO: initialize task->data_env_icvs with proper values */ + memset(&task->data_env_icvs, 0, sizeof(task->data_env_icvs)); + if (is_implicit) + { + /* TODO: initialize task->implicit_task_icvs with proper values */ + memset(&task->implicit_task_icvs, 0, sizeof(task->implicit_task_icvs)); + } + + if (owner_region->level > 0) + { + STARPU_ASSERT(owner_region->owner_device->icvs.stacksize_var > 0); + task->stacksize = owner_region->owner_device->icvs.stacksize_var; + } + + return task; +} + +static void destroy_omp_task_struct(struct starpu_omp_task *task) +{ + STARPU_ASSERT(task->state == starpu_omp_task_state_terminated || (task->state == starpu_omp_task_state_zombie && task->child_task_count == 0) || task->state == starpu_omp_task_state_target); + if (task->state == starpu_omp_task_state_target) + { + starpu_omp_task_completion_accounting(task); + } + STARPU_ASSERT(task->nested_region == NULL); + STARPU_ASSERT(task->starpu_task == NULL); + STARPU_ASSERT(task->stack == NULL); + _starpu_spin_destroy(&task->lock); + memset(task, 0, sizeof(*task)); + starpu_omp_task_delete(task); +} + +/* + * setup the main application thread to handle the possible preemption of the initial task + */ +static int omp_initial_thread_setup(void) +{ + struct starpu_omp_thread *initial_thread = _global_state.initial_thread; + struct starpu_omp_task *initial_task = _global_state.initial_task; + /* .current_task */ + initial_thread->current_task = initial_task; + /* .owner_region already set in create_omp_thread_struct */ + /* .initial_thread_stack */ + _STARPU_MALLOC(initial_thread->initial_thread_stack, _STARPU_INITIAL_THREAD_STACKSIZE); + if (initial_thread->initial_thread_stack == NULL) + _STARPU_ERROR("memory allocation failed"); + /* .ctx */ + getcontext(&initial_thread->ctx); + /* + * we do not use uc_link, the initial thread always should give hand back to the initial task + */ + initial_thread->ctx.uc_link = NULL; + initial_thread->ctx.uc_stack.ss_sp = initial_thread->initial_thread_stack; + initial_thread->ctx.uc_stack.ss_size = _STARPU_INITIAL_THREAD_STACKSIZE; + initial_thread->initial_thread_stack_vg_id = VALGRIND_STACK_REGISTER(initial_thread->initial_thread_stack, initial_thread->initial_thread_stack+_STARPU_INITIAL_THREAD_STACKSIZE); + makecontext(&initial_thread->ctx, omp_initial_thread_func, 0); + /* .starpu_driver */ + /* + * we configure starpu to not launch CPU worker 0 + * because we will use the main thread to play the role of worker 0 + */ + struct starpu_conf omp_starpu_conf; + int ret = starpu_conf_init(&omp_starpu_conf); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_conf_init"); + initial_thread->starpu_driver.type = STARPU_CPU_WORKER; + initial_thread->starpu_driver.id.cpu_id = 0; + omp_starpu_conf.not_launched_drivers = &initial_thread->starpu_driver; + omp_starpu_conf.n_not_launched_drivers = 1; +#ifdef STARPU_DEVEL +#warning setting nhip to 0 should not be necessary +#endif + omp_starpu_conf.nhip = 0; + omp_starpu_conf.nmpi_ms = 0; + omp_starpu_conf.ntcpip_ms = 0; + /* we are now ready to start StarPU */ + ret = starpu_init(&omp_starpu_conf); + int check = _starpu_omp_environment_check(); + if (check == 0) + { + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + ret = starpu_driver_init(&initial_thread->starpu_driver); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_driver_init"); + _starpu_omp_set_task(initial_task); + + _global_state.nb_starpu_cpu_workers = starpu_worker_get_count_by_type(STARPU_CPU_WORKER); + _STARPU_MALLOC(_global_state.starpu_cpu_worker_ids, _global_state.nb_starpu_cpu_workers * sizeof(int)); + if (_global_state.starpu_cpu_worker_ids == NULL) + _STARPU_ERROR("memory allocation failed"); + unsigned n = starpu_worker_get_ids_by_type(STARPU_CPU_WORKER, _global_state.starpu_cpu_worker_ids, _global_state.nb_starpu_cpu_workers); + STARPU_ASSERT(n == _global_state.nb_starpu_cpu_workers); + initial_thread->worker = _starpu_get_worker_struct(_global_state.starpu_cpu_worker_ids[0]); + STARPU_ASSERT(initial_thread->worker); + STARPU_ASSERT(initial_thread->worker->arch == STARPU_CPU_WORKER); + _starpu_omp_set_thread(initial_thread); + register_thread_worker(initial_thread); + } + return check; +} + +static void omp_initial_thread_exit() +{ + struct starpu_omp_thread *initial_thread = _global_state.initial_thread; + int ret = starpu_driver_deinit(&initial_thread->starpu_driver); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_driver_deinit"); + memset(&initial_thread->starpu_driver, 0, sizeof (initial_thread->starpu_driver)); + /* the driver for the main thread is now de-inited, we can shutdown Starpu */ + starpu_shutdown(); + free(_global_state.starpu_cpu_worker_ids); + _global_state.starpu_cpu_worker_ids = NULL; + _global_state.nb_starpu_cpu_workers = 0; + VALGRIND_STACK_DEREGISTER(initial_thread->initial_thread_stack_vg_id); + free(initial_thread->initial_thread_stack); + initial_thread->initial_thread_stack = NULL; + memset(&initial_thread->ctx, 0, sizeof (initial_thread->ctx)); + initial_thread->current_task = NULL; +} + +static int omp_initial_region_setup(void) +{ + int ret = omp_initial_thread_setup(); + if (ret != 0) return ret; + + const int max_active_levels = _starpu_omp_initial_icv_values->max_active_levels_var; + const int max_threads = (int)starpu_cpu_worker_get_count(); + + /* implementation specific initial ICV values override */ + if (_starpu_omp_initial_icv_values->nthreads_var[0] == 0) + { + _starpu_omp_initial_icv_values->nthreads_var[0] = max_threads; + _starpu_omp_initial_icv_values->nthreads_var[1] = 0; + } + else + { + int i; + for (i = 0; i < max_active_levels; i++) + { + if (_starpu_omp_initial_icv_values->nthreads_var[i] == 0) + break; + if (_starpu_omp_initial_icv_values->nthreads_var[i] > max_threads) + { + _starpu_omp_initial_icv_values->nthreads_var[i] = max_threads; + } + } + } + _starpu_omp_initial_icv_values->dyn_var = 0; + _starpu_omp_initial_icv_values->nest_var = 0; + + _global_state.initial_device->icvs.max_active_levels_var = max_active_levels; + _global_state.initial_device->icvs.def_sched_var = _starpu_omp_initial_icv_values->def_sched_var; + _global_state.initial_device->icvs.def_sched_chunk_var = _starpu_omp_initial_icv_values->def_sched_chunk_var; + _global_state.initial_device->icvs.stacksize_var = _starpu_omp_initial_icv_values->stacksize_var; + _global_state.initial_device->icvs.wait_policy_var = _starpu_omp_initial_icv_values->wait_policy_var; + + _global_state.initial_region->master_thread = _global_state.initial_thread; + _global_state.initial_region->nb_threads++; + _global_state.initial_region->icvs.dyn_var = _starpu_omp_initial_icv_values->dyn_var; + _global_state.initial_region->icvs.nest_var = _starpu_omp_initial_icv_values->nest_var; + if (_starpu_omp_initial_icv_values->nthreads_var[1] != 0) + { + _STARPU_MALLOC(_global_state.initial_region->icvs.nthreads_var, (1+max_active_levels-_global_state.initial_region->level) * sizeof(*_global_state.initial_region->icvs.nthreads_var)); + int i,j; + for (i = _global_state.initial_region->level, j = 0; i < max_active_levels; i++, j++) + { + _global_state.initial_region->icvs.nthreads_var[j] = _starpu_omp_initial_icv_values->nthreads_var[j]; + } + _global_state.initial_region->icvs.nthreads_var[j] = 0; + } + else + { + _STARPU_MALLOC(_global_state.initial_region->icvs.nthreads_var, 2 * sizeof(*_global_state.initial_region->icvs.nthreads_var)); + _global_state.initial_region->icvs.nthreads_var[0] = _starpu_omp_initial_icv_values->nthreads_var[0]; + _global_state.initial_region->icvs.nthreads_var[1] = 0; + } + + if (_starpu_omp_initial_icv_values->bind_var[1] != starpu_omp_proc_bind_undefined) + { + _STARPU_MALLOC(_global_state.initial_region->icvs.bind_var, (1+max_active_levels-_global_state.initial_region->level) * sizeof(*_global_state.initial_region->icvs.bind_var)); + int i,j; + for (i = _global_state.initial_region->level, j = 0; i < max_active_levels; i++, j++) + { + _global_state.initial_region->icvs.bind_var[j] = _starpu_omp_initial_icv_values->bind_var[j]; + } + _global_state.initial_region->icvs.bind_var[j] = starpu_omp_proc_bind_undefined; + } + else + { + _STARPU_MALLOC(_global_state.initial_region->icvs.bind_var, 2 * sizeof(*_global_state.initial_region->icvs.bind_var)); + _global_state.initial_region->icvs.bind_var[0] = _starpu_omp_initial_icv_values->bind_var[0]; + _global_state.initial_region->icvs.bind_var[1] = starpu_omp_proc_bind_undefined; + } + _global_state.initial_region->icvs.thread_limit_var = _starpu_omp_initial_icv_values->thread_limit_var; + _global_state.initial_region->icvs.active_levels_var = 0; + _global_state.initial_region->icvs.levels_var = 0; + _global_state.initial_region->icvs.run_sched_var = _starpu_omp_initial_icv_values->run_sched_var; + _global_state.initial_region->icvs.run_sched_chunk_var = _starpu_omp_initial_icv_values->run_sched_chunk_var; + _global_state.initial_region->icvs.default_device_var = _starpu_omp_initial_icv_values->default_device_var; + _global_state.initial_region->icvs.max_task_priority_var = _starpu_omp_initial_icv_values->max_task_priority_var; + _global_state.initial_region->implicit_task_array = &_global_state.initial_task; + return 0; +} + +static void omp_initial_region_exit(void) +{ + omp_initial_thread_exit(); + _global_state.initial_task->state = starpu_omp_task_state_terminated; + _global_state.initial_region->implicit_task_array = NULL; + _global_state.initial_region->master_thread = NULL; + free(_global_state.initial_region->icvs.nthreads_var); + free(_global_state.initial_region->icvs.bind_var); + _global_state.initial_region->nb_threads--; +} + +/* + * If StarPU was compiled with --enable-openmp, but the OpenMP runtime support + * is not in use, starpu_init() may have been called directly instead of + * through starpu_omp_init(). However, some starpu_omp functions may be still + * be called such as _starpu_omp_get_task(). So let's setup a basic environment + * for them. + */ +void _starpu_omp_dummy_init(void) +{ + if (_starpu_omp_global_state != &_global_state) + { + STARPU_PTHREAD_KEY_CREATE(&_starpu_omp_thread_key, NULL); + STARPU_PTHREAD_KEY_CREATE(&_starpu_omp_task_key, NULL); + } +} + +/* + * Free data structures allocated by _starpu_omp_dummy_init(). + */ +void _starpu_omp_dummy_shutdown(void) +{ + if (_starpu_omp_global_state != &_global_state) + { + STARPU_PTHREAD_KEY_DELETE(_starpu_omp_thread_key); + STARPU_PTHREAD_KEY_DELETE(_starpu_omp_task_key); + } +} + +/* + * Entry point to be called by the OpenMP runtime constructor + */ +int starpu_omp_init(void) +{ +#ifdef STARPU_SIMGRID + /* XXX: ideally we'd pass the real argc/argv. */ + /* We have to tell simgrid to avoid cleaning up at exit, since that's before our destructor :/ */ +# if SIMGRID_VERSION >= 32300 + char *argv[] = { "program", "--cfg=debug/clean-atexit:0", NULL }; +# else + char *argv[] = { "program", "--cfg=clean-atexit:0", NULL }; +# endif + int argc = sizeof(argv) / sizeof(argv[0]) - 1; + char **_argv = argv; + /* Initialize simgrid before anything else. */ + _starpu_simgrid_init_early(&argc, &_argv); +#endif + + _starpu_omp_global_state = &_global_state; + + STARPU_PTHREAD_KEY_CREATE(&_starpu_omp_thread_key, NULL); + STARPU_PTHREAD_KEY_CREATE(&_starpu_omp_task_key, NULL); + _global_state.initial_device = create_omp_device_struct(); + _global_state.initial_region = create_omp_region_struct(NULL, _global_state.initial_device); + _global_state.initial_thread = create_omp_thread_struct(_global_state.initial_region); + _global_state.initial_task = create_omp_task_struct(NULL, + _global_state.initial_thread, _global_state.initial_region, 1); + _global_state.default_critical = create_omp_critical_struct(); + _global_state.default_arbiter = starpu_arbiter_create(); + _global_state.named_criticals = NULL; + _starpu_spin_init(&_global_state.named_criticals_lock); + _global_state.hash_workers = NULL; + _starpu_spin_init(&_global_state.hash_workers_lock); + + _starpu_omp_environment_init(); + _global_state.icvs.cancel_var = _starpu_omp_initial_icv_values->cancel_var; + _global_state.environment_valid = -EINVAL; /* in case starpu_init exits (e.g. on a slave) */ + _global_state.environment_valid = omp_initial_region_setup(); + + /* init clock reference for starpu_omp_get_wtick */ + _starpu_omp_clock_ref = starpu_timing_now(); + _starpu_spin_init(®istered_handles_lock); + + return _global_state.environment_valid; +} + +void starpu_omp_shutdown(void) +{ + if (_global_state.environment_valid != 0) return; + + omp_initial_region_exit(); + /* TODO: free ICV variables */ + /* TODO: free task/thread/region/device structures */ + destroy_omp_task_struct(_global_state.initial_task); + _global_state.initial_task = NULL; + _global_state.initial_thread = NULL; + destroy_omp_region_struct(_global_state.initial_region); + _global_state.initial_region = NULL; + destroy_omp_device_struct(_global_state.initial_device); + _global_state.initial_device = NULL; + destroy_omp_critical_struct(_global_state.default_critical); + _global_state.default_critical = NULL; + starpu_arbiter_destroy(_global_state.default_arbiter); + _global_state.default_arbiter = NULL; + _starpu_spin_lock(&_global_state.named_criticals_lock); + { + struct starpu_omp_critical *critical=NULL, *tmp=NULL; + HASH_ITER(hh, _global_state.named_criticals, critical, tmp) + { + STARPU_ASSERT(critical != NULL); + HASH_DEL(_global_state.named_criticals, critical); + destroy_omp_critical_struct(critical); + } + } + STARPU_ASSERT(_global_state.named_criticals == NULL); + _starpu_spin_unlock(&_global_state.named_criticals_lock); + _starpu_spin_destroy(&_global_state.named_criticals_lock); + { + struct handle_entry *entry=NULL, *tmp=NULL; + + if (registered_handles) + { + _STARPU_DISP("[warning] The application has not unregistered all data handles.\n"); + } + + _starpu_spin_destroy(®istered_handles_lock); + + HASH_ITER(hh, registered_handles, entry, tmp) + { + HASH_DEL(registered_handles, entry); + free(entry); + } + + registered_handles = NULL; + } + _starpu_spin_lock(&_global_state.hash_workers_lock); + { + struct starpu_omp_thread *thread=NULL, *tmp=NULL; + HASH_ITER(hh, _global_state.hash_workers, thread, tmp) + { + STARPU_ASSERT(thread != NULL); + HASH_DEL(_global_state.hash_workers, thread); + destroy_omp_thread_struct(thread); + } + } + STARPU_ASSERT(_global_state.hash_workers == NULL); + _starpu_spin_unlock(&_global_state.hash_workers_lock); + _starpu_spin_destroy(&_global_state.hash_workers_lock); + _starpu_omp_environment_exit(); + STARPU_PTHREAD_KEY_DELETE(_starpu_omp_task_key); + STARPU_PTHREAD_KEY_DELETE(_starpu_omp_thread_key); +#ifdef STARPU_SIMGRID + _starpu_simgrid_deinit_late(); +#endif +} + +static void implicit_task__destroy_callback(void *_task) +{ + struct starpu_omp_task *task = _task; + destroy_omp_task_struct(task); +} + +void starpu_omp_parallel_region(const struct starpu_omp_parallel_region_attr *attr) +{ + struct starpu_omp_thread *master_thread = _starpu_omp_get_thread(); + struct starpu_omp_task *task = _starpu_omp_get_task(); + struct starpu_omp_region *generating_region = task->owner_region; + const int max_active_levels = generating_region->owner_device->icvs.max_active_levels_var; + struct starpu_omp_region *new_region = + create_omp_region_struct(generating_region, _global_state.initial_device); + int ret; + int nb_threads = 1; + + /* TODO: for now, nested parallel sections are not supported, thus we + * open an active parallel section only if the generating region is the + * initial region */ + if (attr->if_clause != 0) + { + const int max_threads = (int)starpu_cpu_worker_get_count(); + if (attr->num_threads > 0) + { + nb_threads = attr->num_threads; + } + else + { + nb_threads = generating_region->icvs.nthreads_var[0]; + } + if (nb_threads > max_threads) + { + nb_threads = max_threads; + } + if (nb_threads > 1 && generating_region->icvs.active_levels_var+1 > max_active_levels) + { + nb_threads = 1; + } + } + STARPU_ASSERT(nb_threads > 0); + + new_region->icvs.dyn_var = generating_region->icvs.dyn_var; + new_region->icvs.nest_var = generating_region->icvs.nest_var; + /* the nthreads_var and bind_var arrays do not hold more than + * max_active_levels entries at most, even if some in-between levels + * are inactive */ + if (new_region->level < max_active_levels) + { + if (generating_region->icvs.nthreads_var[1] != 0) + { + _STARPU_MALLOC(new_region->icvs.nthreads_var, (1+max_active_levels-new_region->level) * sizeof(*new_region->icvs.nthreads_var)); + int i,j; + for (i = new_region->level, j = 0; i < max_active_levels; i++, j++) + { + new_region->icvs.nthreads_var[j] = generating_region->icvs.nthreads_var[j+1]; + } + new_region->icvs.nthreads_var[j] = 0; + } + else + { + _STARPU_MALLOC(new_region->icvs.nthreads_var, 2 * sizeof(*new_region->icvs.nthreads_var)); + new_region->icvs.nthreads_var[0] = generating_region->icvs.nthreads_var[0]; + new_region->icvs.nthreads_var[1] = 0; + } + + if (generating_region->icvs.bind_var[1] != starpu_omp_proc_bind_undefined) + { + _STARPU_MALLOC(new_region->icvs.bind_var, (1+max_active_levels-new_region->level) * sizeof(*new_region->icvs.bind_var)); + int i,j; + for (i = new_region->level, j = 0; i < max_active_levels; i++, j++) + { + new_region->icvs.bind_var[j] = generating_region->icvs.bind_var[j+1]; + } + new_region->icvs.bind_var[j] = starpu_omp_proc_bind_undefined; + } + else + { + _STARPU_MALLOC(new_region->icvs.bind_var, 2 * sizeof(*new_region->icvs.bind_var)); + new_region->icvs.bind_var[0] = generating_region->icvs.bind_var[0]; + new_region->icvs.bind_var[1] = starpu_omp_proc_bind_undefined; + } + } + else + { + _STARPU_MALLOC(new_region->icvs.nthreads_var, sizeof(*new_region->icvs.nthreads_var)); + new_region->icvs.nthreads_var[0] = generating_region->icvs.nthreads_var[0]; + + _STARPU_MALLOC(new_region->icvs.bind_var, sizeof(*new_region->icvs.bind_var)); + new_region->icvs.bind_var[0] = generating_region->icvs.bind_var[0]; + } + new_region->icvs.thread_limit_var = generating_region->icvs.thread_limit_var; + new_region->icvs.active_levels_var = (nb_threads > 1)?generating_region->icvs.active_levels_var+1:generating_region->icvs.active_levels_var; + new_region->icvs.levels_var = generating_region->icvs.levels_var+1; + new_region->icvs.run_sched_var = generating_region->icvs.run_sched_var; + new_region->icvs.run_sched_chunk_var = generating_region->icvs.run_sched_chunk_var; + new_region->icvs.default_device_var = generating_region->icvs.default_device_var; + new_region->icvs.max_task_priority_var = generating_region->icvs.max_task_priority_var; + _STARPU_CALLOC(new_region->implicit_task_array, nb_threads, sizeof(*new_region->implicit_task_array)); + + int i; + for (i = 0; i < nb_threads; i++) + { + struct starpu_omp_thread *new_thread; + + if (i == 0) + { + new_thread = master_thread; + new_region->master_thread = master_thread; + } + else + { + /* TODO: specify actual starpu worker */ + + /* TODO: use a less arbitrary thread/worker mapping scheme */ + if (generating_region->level == 0) + { + struct _starpu_worker *worker = _starpu_get_worker_struct(_global_state.starpu_cpu_worker_ids[i]); + new_thread = get_worker_thread(worker); + if (new_thread == NULL) + { + new_thread = create_omp_thread_struct(new_region); + new_thread->worker = _starpu_get_worker_struct(_global_state.starpu_cpu_worker_ids[i]); + register_thread_worker(new_thread); + } + } + else + { + new_thread = master_thread; + } + starpu_omp_thread_list_push_back(&new_region->thread_list, new_thread); + } + + struct starpu_omp_task *new_task = create_omp_task_struct(task, new_thread, new_region, 1); + new_task->rank = new_region->nb_threads; + new_region->nb_threads++; + new_region->implicit_task_array[i] = new_task; + + } + STARPU_ASSERT(new_region->nb_threads == nb_threads); + + /* + * if task == initial_task, create a starpu task as a continuation to all the implicit + * tasks of the new region, else prepare the task for preemption, + * to become itself a continuation to the implicit tasks of the new region + */ + if (task == _global_state.initial_task) + { + new_region->continuation_starpu_task = starpu_task_create(); + /* in that case, the continuation starpu task is only used for synchronisation */ + new_region->continuation_starpu_task->cl = NULL; + new_region->continuation_starpu_task->workerid = master_thread->worker->workerid; + new_region->continuation_starpu_task->execute_on_a_specific_worker = 1; + /* this sync task will be tested for completion in omp_initial_thread_func() */ + new_region->continuation_starpu_task->detach = 0; + + } + else + { + /* through the preemption, the parent starpu task becomes the continuation task */ + _starpu_task_prepare_for_continuation(); + new_region->continuation_starpu_task = task->starpu_task; + } + task->nested_region = new_region; + + /* + * create the starpu tasks for the implicit omp tasks, + * create explicit dependencies between these starpu tasks and the continuation starpu task + */ + for (i = 0; i < nb_threads; i++) + { + struct starpu_omp_task * implicit_task = new_region->implicit_task_array[i]; + implicit_task->cl = attr->cl; + /* + * save pointer to the regions user function from the parallel region codelet + * + * TODO: add support for multiple/heterogeneous implementations + */ + implicit_task->cpu_f = implicit_task->cl.cpu_funcs[0]; + + /* + * plug the task wrapper into the parallel region codelet instead, to support task preemption + */ + implicit_task->cl.cpu_funcs[0] = starpu_omp_implicit_task_exec; + + implicit_task->starpu_task = starpu_task_create(); + _starpu_task_set_omp_cleanup_callback(implicit_task->starpu_task, implicit_task__destroy_callback, implicit_task); + implicit_task->starpu_task->cl = &implicit_task->cl; + { + int j; + for (j = 0; j < implicit_task->cl.nbuffers; j++) + { + implicit_task->starpu_task->handles[j] = attr->handles[j]; + } + } + implicit_task->starpu_task->cl_arg = attr->cl_arg; + implicit_task->starpu_task->cl_arg_size = attr->cl_arg_size; + implicit_task->starpu_task->cl_arg_free = attr->cl_arg_free; + implicit_task->starpu_task->omp_task = implicit_task; + implicit_task->starpu_task->workerid = implicit_task->owner_thread->worker->workerid; + implicit_task->starpu_task->execute_on_a_specific_worker = 1; + starpu_task_declare_deps_array(new_region->continuation_starpu_task, 1, &implicit_task->starpu_task); + } + + attr = NULL; + + /* + * submit all the region implicit starpu tasks + */ + for (i = 0; i < nb_threads; i++) + { + struct starpu_omp_task * implicit_task = new_region->implicit_task_array[i]; + ret = starpu_task_submit(implicit_task->starpu_task); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + /* + * submit the region continuation starpu task if task == initial_task + */ + if (task == _global_state.initial_task) + { + ret = _starpu_task_submit_internally(new_region->continuation_starpu_task); + STARPU_CHECK_RETURN_VALUE(ret, "_starpu_task_submit_internally"); + } + + /* + * preempt for completion of the region + */ + starpu_omp_task_preempt(); + if (task == _global_state.initial_task) + { + STARPU_ASSERT(new_region->continuation_starpu_task == NULL); + } + else + { + STARPU_ASSERT(new_region->continuation_starpu_task != NULL); + new_region->continuation_starpu_task = NULL; + } + /* + * TODO: free region resources + */ + for (i = 0; i < nb_threads; i++) + { + if (i == 0) + { + new_region->master_thread = NULL; + } + else + { + starpu_omp_thread_list_pop_front(&new_region->thread_list); + /* TODO: cleanup unused threads */ + } + new_region->nb_threads--; + } + /* implicit tasks will be freed in implicit_task__destroy_callback() */ + free(new_region->implicit_task_array); + STARPU_ASSERT(new_region->nb_threads == 0); + task->nested_region = NULL; + free(new_region->icvs.bind_var); + free(new_region->icvs.nthreads_var); + destroy_omp_region_struct(new_region); +} + +static void wake_up_barrier(struct starpu_omp_region *parallel_region) +{ + struct starpu_omp_task *task = _starpu_omp_get_task(); + int i; + for (i = 0; i < parallel_region->nb_threads; i++) + { + struct starpu_omp_task * implicit_task = parallel_region->implicit_task_array[i]; + if (implicit_task == task) + continue; + weak_task_lock(implicit_task); + STARPU_ASSERT(implicit_task->wait_on & starpu_omp_task_wait_on_barrier); + implicit_task->wait_on &= ~starpu_omp_task_wait_on_barrier; + wake_up_and_unlock_task(implicit_task); + } +} + +void starpu_omp_barrier(void) +{ + struct starpu_omp_task *task = _starpu_omp_get_task(); + /* Assume barriers are performed in by the implicit tasks of a parallel_region */ + STARPU_ASSERT(task->flags & STARPU_OMP_TASK_FLAGS_IMPLICIT); + struct starpu_omp_region *parallel_region = task->owner_region; + _starpu_spin_lock(&task->lock); + int inc_barrier_count = STARPU_ATOMIC_ADD(¶llel_region->barrier_count, 1); + + if (inc_barrier_count == parallel_region->nb_threads) + { + /* last task reaching the barrier */ + _starpu_spin_lock(¶llel_region->lock); + ANNOTATE_HAPPENS_AFTER(¶llel_region->barrier_count); + ANNOTATE_HAPPENS_BEFORE_FORGET_ALL(¶llel_region->barrier_count); + parallel_region->barrier_count = 0; + ANNOTATE_HAPPENS_AFTER(¶llel_region->barrier_count); + ANNOTATE_HAPPENS_BEFORE_FORGET_ALL(¶llel_region->barrier_count); + if (parallel_region->bound_explicit_task_count > 0) + { + task->wait_on |= starpu_omp_task_wait_on_region_tasks; + parallel_region->waiting_task = task; + task->transaction_pending = 1; + _starpu_spin_unlock(¶llel_region->lock); + _starpu_spin_unlock(&task->lock); + _starpu_task_prepare_for_continuation_ext(0, transaction_callback, task); + starpu_omp_task_preempt(); + } + else + { + _starpu_spin_unlock(¶llel_region->lock); + _starpu_spin_unlock(&task->lock); + } + wake_up_barrier(parallel_region); + } + else + { + ANNOTATE_HAPPENS_BEFORE(¶llel_region->barrier_count); + /* not the last task reaching the barrier + * . prepare for conditional continuation + * . sleep + */ + + task->wait_on |= starpu_omp_task_wait_on_barrier; + task->transaction_pending = 1; + _starpu_spin_unlock(&task->lock); + _starpu_task_prepare_for_continuation_ext(0, transaction_callback, task); + starpu_omp_task_preempt(); + STARPU_ASSERT(task->child_task_count == 0); + } +} + +void starpu_omp_master(void (*f)(void *arg), void *arg) +{ + if (starpu_omp_master_inline()) + f(arg); +} + +/* variant of omp_master for inlined code + * return !0 for the task that should perform the master section + * return 0 for the tasks that should not perform the master section */ +int starpu_omp_master_inline(void) +{ + struct starpu_omp_task *task = _starpu_omp_get_task(); + struct starpu_omp_thread *thread = _starpu_omp_get_thread(); + /* Assume master is performed in by the implicit tasks of a region */ + STARPU_ASSERT(task->flags & STARPU_OMP_TASK_FLAGS_IMPLICIT); + struct starpu_omp_region *region = task->owner_region; + + return thread == region->master_thread; +} + +void starpu_omp_single(void (*f)(void *arg), void *arg, int nowait) +{ + if (starpu_omp_single_inline()) + f(arg); + if (!nowait) + starpu_omp_barrier(); +} + +/* variant of omp_single for inlined code + * return !0 for the task that should perform the single section + * return 0 for the tasks that should not perform the single section + * wait/nowait should be handled directly by the calling code using starpu_omp_barrier */ +int starpu_omp_single_inline(void) +{ + struct starpu_omp_task *task = _starpu_omp_get_task(); + /* Assume singles are performed in by the implicit tasks of a region */ + STARPU_ASSERT(task->flags & STARPU_OMP_TASK_FLAGS_IMPLICIT); + struct starpu_omp_region *region = task->owner_region; + int first = STARPU_BOOL_COMPARE_AND_SWAP(®ion->single_id, task->single_id, task->single_id+1); + task->single_id++; + + return first; +} + +void starpu_omp_single_copyprivate(void (*f)(void *arg, void *data, unsigned long long data_size), void *arg, void *data, unsigned long long data_size) +{ + struct starpu_omp_task *task = _starpu_omp_get_task(); + struct starpu_omp_region *region = task->owner_region; + int first = starpu_omp_single_inline(); + + if (first) + { + region->copy_private_data = data; + f(arg, data, data_size); + } + starpu_omp_barrier(); + if (!first) + memcpy(data, region->copy_private_data, data_size); + starpu_omp_barrier(); +} + +void *starpu_omp_single_copyprivate_inline_begin(void *data) +{ + struct starpu_omp_task *task = _starpu_omp_get_task(); + struct starpu_omp_region *region = task->owner_region; + int first = starpu_omp_single_inline(); + + if (first) + { + task->single_first = 1; + region->copy_private_data = data; + return NULL; + } + + starpu_omp_barrier(); + return region->copy_private_data; +} + +void starpu_omp_single_copyprivate_inline_end(void) +{ + struct starpu_omp_task *task = _starpu_omp_get_task(); + /* Assume singles are performed in by the implicit tasks of a region */ + STARPU_ASSERT(task->flags & STARPU_OMP_TASK_FLAGS_IMPLICIT); + if (task->single_first) + { + task->single_first = 0; + starpu_omp_barrier(); + } + starpu_omp_barrier(); +} + +void starpu_omp_critical(void (*f)(void *arg), void *arg, const char *name) +{ + starpu_omp_critical_inline_begin(name); + f(arg); + starpu_omp_critical_inline_end(name); +} + +void starpu_omp_critical_inline_begin(const char *name) +{ + struct starpu_omp_task *task = _starpu_omp_get_task(); + struct starpu_omp_critical *critical = NULL; + struct starpu_omp_task_link link; + + if (name) + { + _starpu_spin_lock(&_global_state.named_criticals_lock); + HASH_FIND_STR(_global_state.named_criticals, name, critical); + if (critical == NULL) + { + critical = create_omp_critical_struct(); + critical->name = name; + HASH_ADD_STR(_global_state.named_criticals, name, critical); + } + _starpu_spin_unlock(&_global_state.named_criticals_lock); + } + else + { + critical = _global_state.default_critical; + } + + _starpu_spin_lock(&critical->lock); + while (critical->state != 0) + { + _starpu_spin_lock(&task->lock); + task->wait_on |= starpu_omp_task_wait_on_critical; + task->transaction_pending = 1; + link.task = task; + link.next = critical->contention_list_head; + critical->contention_list_head = &link; + _starpu_spin_unlock(&task->lock); + _starpu_spin_unlock(&critical->lock); + _starpu_task_prepare_for_continuation_ext(0, transaction_callback, task); + starpu_omp_task_preempt(); + + /* re-acquire the spin lock */ + _starpu_spin_lock(&critical->lock); + } + critical->state = 1; + _starpu_spin_unlock(&critical->lock); +} + +void starpu_omp_critical_inline_end(const char *name) +{ + struct starpu_omp_critical *critical = NULL; + + if (name) + { + _starpu_spin_lock(&_global_state.named_criticals_lock); + HASH_FIND_STR(_global_state.named_criticals, name, critical); + _starpu_spin_unlock(&_global_state.named_criticals_lock); + } + else + { + critical = _global_state.default_critical; + } + + STARPU_ASSERT(critical != NULL); + _starpu_spin_lock(&critical->lock); + STARPU_ASSERT(critical->state == 1); + critical->state = 0; + if (critical->contention_list_head != NULL) + { + struct starpu_omp_task *next_task = critical->contention_list_head->task; + weak_task_lock(next_task); + critical->contention_list_head = critical->contention_list_head->next; + STARPU_ASSERT(next_task->wait_on & starpu_omp_task_wait_on_critical); + next_task->wait_on &= ~starpu_omp_task_wait_on_critical; + wake_up_and_unlock_task(next_task); + } + _starpu_spin_unlock(&critical->lock); +} + +static void explicit_task__destroy_callback(void *_task) +{ + struct starpu_omp_task *task = _task; + STARPU_ASSERT(!(task->flags & STARPU_OMP_TASK_FLAGS_IMPLICIT)); + task->starpu_task->omp_task = NULL; + task->starpu_task = NULL; + _starpu_spin_lock(&task->lock); + if (task->state != starpu_omp_task_state_target) + { + STARPU_ASSERT(task->transaction_pending == 1); + task->transaction_pending = 0; + if (task->child_task_count != 0) + { + task->state = starpu_omp_task_state_zombie; + _starpu_spin_unlock(&task->lock); + return; + } + } + _starpu_spin_unlock(&task->lock); + destroy_omp_task_struct(task); +} + +void starpu_omp_task_region(const struct starpu_omp_task_region_attr *attr) +{ + struct starpu_omp_task *generating_task = _starpu_omp_get_task(); + struct starpu_omp_region *parallel_region = generating_task->owner_region; + int is_undeferred = 0; + int is_final = 0; + int is_included = 0; + int is_merged = 0; + int ret; + + if (generating_task == _global_state.initial_task) + { + is_undeferred = 1; + is_final = 1; + is_included = 1; + } + else + { + if (!attr->if_clause) + { + is_undeferred = 1; + } + if (generating_task->flags & STARPU_OMP_TASK_FLAGS_FINAL) + { + is_final = 1; + is_included = 1; + } + else if (attr->final_clause) + { + is_final = 1; + } + if (is_included) + { + is_undeferred = 1; + } + if ((is_undeferred || is_included) & attr->mergeable_clause) + { + is_merged = 1; + } + } + if (is_merged || is_included) + { + if (is_included) + { + /* TODO: backup current ICVs and setup new ICVs for the included task */ + } + int i; + unsigned n = attr->cl.nbuffers; + if (n == 0) + n = 1; + void *data_interfaces[n]; + for (i = 0; i < attr->cl.nbuffers; i++) + { + starpu_data_handle_t handle = attr->handles[i]; + ret = starpu_data_acquire(handle, attr->cl.modes[i]); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_acquire"); + data_interfaces[i] = starpu_data_get_interface_on_node(handle, handle->home_node); + } + void (*f)(void **starpu_buffers, void *starpu_cl_arg) = attr->cl.cpu_funcs[0]; + f(data_interfaces, attr->cl_arg); + for (i = 0; i < attr->cl.nbuffers; i++) + { + starpu_data_release(attr->handles[i]); + } + if (attr->cl_arg_free) + { + free(attr->cl_arg); + } + if (is_included) + { + /* TODO: restore backuped ICVs */ + } + } + else + { + struct starpu_omp_task *generated_task = + create_omp_task_struct(generating_task, NULL, parallel_region, 0); + generated_task->cl = attr->cl; + if (attr->untied_clause) + { + generated_task->flags |= STARPU_OMP_TASK_FLAGS_UNTIED; + } + if (is_final) + { + generated_task->flags |= STARPU_OMP_TASK_FLAGS_FINAL; + } + if (is_undeferred) + { + generated_task->flags |= STARPU_OMP_TASK_FLAGS_UNDEFERRED; + } + // XXX taskgroup exist + if (!attr->nogroup_clause) + { + generated_task->task_group = generating_task->task_group; + } + generated_task->rank = -1; + + /* XXX taskloop attributes */ + generated_task->is_loop = attr->is_loop; + generated_task->nb_iterations = attr->nb_iterations; + generated_task->grainsize = attr->grainsize; + generated_task->chunk = attr->chunk; + generated_task->begin_i = attr->begin_i; + generated_task->end_i = attr->end_i; + + /* + * save pointer to the regions user function from the task region codelet + * + * TODO: add support for multiple/heterogeneous implementations + */ + if (generated_task->cl.cpu_funcs[0]) + { + generated_task->cpu_f = generated_task->cl.cpu_funcs[0]; + + /* + * plug the task wrapper into the task region codelet instead, to support task preemption + */ + generated_task->cl.cpu_funcs[0] = starpu_omp_explicit_task_exec; + } +#ifdef STARPU_USE_CUDA + if (generated_task->cl.cuda_funcs[0]) + { + generated_task->cuda_f = generated_task->cl.cuda_funcs[0]; +#if 1 + /* we assume for now that Cuda task won't block, thus we don't need + * to initialize the StarPU OpenMP Runtime Support context for enabling + * continuations on Cuda tasks */ + generated_task->state = starpu_omp_task_state_target; +#else + generated_task->cl.cuda_funcs[0] = starpu_omp_explicit_task_exec; +#endif + } +#endif +#ifdef STARPU_USE_OPENCL + if (generated_task->cl.opencl_funcs[0]) + { + generated_task->opencl_f = generated_task->cl.opencl_funcs[0]; +#if 1 + /* we assume for now that OpenCL task won't block, thus we don't need + * to initialize the StarPU OpenMP Runtime Support context for enabling + * continuations on OpenCL tasks */ + generated_task->state = starpu_omp_task_state_target; +#else + generated_task->cl.opencl_funcs[0] = starpu_omp_explicit_task_exec; +#endif + } +#endif + /* TODO: add other accelerator support */ + + generated_task->starpu_task = starpu_task_create(); + generated_task->starpu_task->cl = &generated_task->cl; + generated_task->starpu_task->cl_arg = attr->cl_arg; + generated_task->starpu_task->cl_arg_size = attr->cl_arg_size; + generated_task->starpu_task->cl_arg_free = attr->cl_arg_free; + generated_task->starpu_task->priority = attr->priority; + { + int i; + for (i = 0; i < generated_task->cl.nbuffers; i++) + { + generated_task->starpu_task->handles[i] = attr->handles[i]; + } + } + generated_task->starpu_task->omp_task = generated_task; + _starpu_task_set_omp_cleanup_callback(generated_task->starpu_task, explicit_task__destroy_callback, generated_task); + /* if the task is tied, execute_on_a_specific_worker will be changed to 1 + * upon the first preemption of the generated task, once we know + * which worker thread has been selected */ + generated_task->starpu_task->execute_on_a_specific_worker = 0; + + (void)STARPU_ATOMIC_ADD(&generating_task->child_task_count, 1); + (void)STARPU_ATOMIC_ADD(¶llel_region->bound_explicit_task_count, 1); + if (generated_task->task_group) + { + (void)STARPU_ATOMIC_ADD(&generated_task->task_group->descendent_task_count, 1); + } + + /* do not use the attribute struct afterward as it may become out of scope */ + attr = NULL; + + if (is_undeferred) + { + _starpu_task_prepare_for_continuation(); + starpu_task_declare_deps_array(generating_task->starpu_task, 1, + &generated_task->starpu_task); + } + ret = starpu_task_submit(generated_task->starpu_task); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + if (is_undeferred) + { + starpu_omp_task_preempt(); + } + } +} + +void starpu_omp_taskwait(void) +{ + struct starpu_omp_task *task = _starpu_omp_get_task(); + _starpu_spin_lock(&task->lock); + if (task->child_task_count > 0) + { + task->wait_on |= starpu_omp_task_wait_on_task_childs; + task->transaction_pending = 1; + _starpu_spin_unlock(&task->lock); + _starpu_task_prepare_for_continuation_ext(0, transaction_callback, task); + starpu_omp_task_preempt(); + STARPU_ASSERT(task->child_task_count == 0); + } + else + { + _starpu_spin_unlock(&task->lock); + } +} + +void starpu_omp_taskgroup(void (*f)(void *arg), void *arg) +{ + struct starpu_omp_task *task = _starpu_omp_get_task(); + struct starpu_omp_task_group task_group; + task_group.p_previous_task_group = task->task_group; + task_group.descendent_task_count = 0; + task_group.leader_task = task; + task->task_group = &task_group; + f(arg); + _starpu_spin_lock(&task->lock); + if (task_group.descendent_task_count > 0) + { + task->wait_on |= starpu_omp_task_wait_on_group; + task->transaction_pending = 1; + _starpu_spin_unlock(&task->lock); + _starpu_task_prepare_for_continuation_ext(0, transaction_callback, task); + starpu_omp_task_preempt(); + STARPU_ASSERT(task_group.descendent_task_count == 0); + } + else + { + _starpu_spin_unlock(&task->lock); + } + task->task_group = task_group.p_previous_task_group; +} + +void starpu_omp_taskgroup_inline_begin(void) +{ + struct starpu_omp_task *task = _starpu_omp_get_task(); + struct starpu_omp_task_group *p_task_group; + _STARPU_MALLOC(p_task_group, sizeof(*p_task_group)); + p_task_group->p_previous_task_group = task->task_group; + p_task_group->descendent_task_count = 0; + p_task_group->leader_task = task; + task->task_group = p_task_group; +} + +void starpu_omp_taskgroup_inline_end(void) +{ + struct starpu_omp_task *task = _starpu_omp_get_task(); + _starpu_spin_lock(&task->lock); + struct starpu_omp_task_group *p_task_group = task->task_group; + if (p_task_group->descendent_task_count > 0) + { + task->wait_on |= starpu_omp_task_wait_on_group; + task->transaction_pending = 1; + _starpu_spin_unlock(&task->lock); + _starpu_task_prepare_for_continuation_ext(0, transaction_callback, task); + starpu_omp_task_preempt(); + STARPU_ASSERT(p_task_group->descendent_task_count == 0); + } + else + { + _starpu_spin_unlock(&task->lock); + } + task->task_group = p_task_group->p_previous_task_group; + free(p_task_group); +} + +// XXX on work +void starpu_omp_taskloop_inline_begin(struct starpu_omp_task_region_attr *attr) +{ + if (!attr->nogroup_clause) + { + starpu_omp_taskgroup_inline_begin(); + } + + int nb_subloop; + if (attr->num_tasks) + { + nb_subloop = attr->num_tasks; + } + else if (attr->grainsize) + { + nb_subloop = attr->nb_iterations / attr->grainsize; + } + else + { + nb_subloop = 4; + } + + attr->is_loop = 1; + + int i; + int nb_iter_i = attr->nb_iterations / nb_subloop; + for (i = 0; i < nb_subloop; i++) + { + attr->begin_i = nb_iter_i * i; + attr->end_i = attr->begin_i + nb_iter_i; + attr->end_i += (i+1 != nb_subloop) ? 0 : (attr->nb_iterations % nb_subloop); + attr->chunk = attr->end_i - attr->begin_i; + starpu_omp_task_region(attr); + } +} + +// XXX on work +void starpu_omp_taskloop_inline_end(const struct starpu_omp_task_region_attr *attr) +{ + if (!attr->nogroup_clause) + { + starpu_omp_taskgroup_inline_end(); + } +} + +static inline void _starpu_omp_for_loop(struct starpu_omp_region *parallel_region, struct starpu_omp_task *task, + struct starpu_omp_loop *loop, int first_call, + unsigned long long nb_iterations, unsigned long long chunk, int schedule, int ordered, unsigned long long *_first_i, unsigned long long *_nb_i) +{ + *_nb_i = 0; + if (schedule == starpu_omp_sched_undefined) + { + schedule = parallel_region->owner_device->icvs.def_sched_var; + chunk = parallel_region->owner_device->icvs.def_sched_chunk_var; + } + else if (schedule == starpu_omp_sched_runtime) + { + schedule = parallel_region->icvs.run_sched_var; + chunk = parallel_region->icvs.run_sched_chunk_var; + } + STARPU_ASSERT(schedule == starpu_omp_sched_static + || schedule == starpu_omp_sched_dynamic + || schedule == starpu_omp_sched_guided + || schedule == starpu_omp_sched_auto); + if (schedule == starpu_omp_sched_auto) + { + schedule = starpu_omp_sched_static; + chunk = 0; + } + if (schedule == starpu_omp_sched_static) + { + if (chunk > 0) + { + if (first_call) + { + *_first_i = task->rank * chunk; + } + else + { + *_first_i += parallel_region->nb_threads * chunk; + } + + if (*_first_i < nb_iterations) + { + if (*_first_i + chunk > nb_iterations) + { + *_nb_i = nb_iterations - *_first_i; + } + else + { + *_nb_i = chunk; + } + } + } + else + { + if (first_call) + { + *_nb_i = nb_iterations / parallel_region->nb_threads; + *_first_i = (unsigned)task->rank * (*_nb_i); + unsigned long long remainder = nb_iterations % parallel_region->nb_threads; + + if (remainder > 0) + { + if ((unsigned)task->rank < remainder) + { + (*_nb_i)++; + *_first_i += (unsigned)task->rank; + } + else + { + *_first_i += remainder; + } + } + } + } + } + else if (schedule == starpu_omp_sched_dynamic) + { + if (chunk == 0) + { + chunk = 1; + } + if (first_call) + { + *_first_i = 0; + } + _starpu_spin_lock(¶llel_region->lock); + if (loop->next_iteration < nb_iterations) + { + *_first_i = loop->next_iteration; + if (*_first_i + chunk > nb_iterations) + { + *_nb_i = nb_iterations - *_first_i; + } + else + { + *_nb_i = chunk; + } + loop->next_iteration += *_nb_i; + } + _starpu_spin_unlock(¶llel_region->lock); + } + else if (schedule == starpu_omp_sched_guided) + { + if (chunk == 0) + { + chunk = 1; + } + if (first_call) + { + *_first_i = 0; + } + _starpu_spin_lock(¶llel_region->lock); + if (loop->next_iteration < nb_iterations) + { + *_first_i = loop->next_iteration; + *_nb_i = (nb_iterations - *_first_i)/parallel_region->nb_threads; + if (*_nb_i < chunk) + { + if (*_first_i+chunk > nb_iterations) + { + *_nb_i = nb_iterations - *_first_i; + } + else + { + *_nb_i = chunk; + } + } + loop->next_iteration += *_nb_i; + } + _starpu_spin_unlock(¶llel_region->lock); + } + if (ordered) + { + task->ordered_first_i = *_first_i; + task->ordered_nb_i = *_nb_i; + } +} + +static inline struct starpu_omp_loop *_starpu_omp_for_get_loop(struct starpu_omp_region *parallel_region, struct starpu_omp_task *task) +{ + struct starpu_omp_loop *loop; + loop = parallel_region->loop_list; + while (loop && loop->id != task->loop_id) + { + loop = loop->next_loop; + } + return loop; +} + +static inline struct starpu_omp_loop *_starpu_omp_for_loop_begin(struct starpu_omp_region *parallel_region, struct starpu_omp_task *task, + int ordered) +{ + struct starpu_omp_loop *loop; + _starpu_spin_lock(¶llel_region->lock); + loop = _starpu_omp_for_get_loop(parallel_region, task); + if (!loop) + { + _STARPU_MALLOC(loop, sizeof(*loop)); + loop->id = task->loop_id; + loop->next_iteration = 0; + loop->nb_completed_threads = 0; + loop->next_loop = parallel_region->loop_list; + parallel_region->loop_list = loop; + if (ordered) + { + loop->ordered_iteration = 0; + _starpu_spin_init(&loop->ordered_lock); + condition_init(&loop->ordered_cond); + } + } + _starpu_spin_unlock(¶llel_region->lock); + return loop; +} +static inline void _starpu_omp_for_loop_end(struct starpu_omp_region *parallel_region, struct starpu_omp_task *task, + struct starpu_omp_loop *loop, int ordered) +{ + _starpu_spin_lock(¶llel_region->lock); + loop->nb_completed_threads++; + if (loop->nb_completed_threads == parallel_region->nb_threads) + { + struct starpu_omp_loop **p_loop; + if (ordered) + { + loop->ordered_iteration = 0; + condition_exit(&loop->ordered_cond); + _starpu_spin_destroy(&loop->ordered_lock); + } + STARPU_ASSERT(loop->next_loop == NULL); + p_loop = &(parallel_region->loop_list); + while (*p_loop != loop) + { + p_loop = &((*p_loop)->next_loop); + } + *p_loop = NULL; + free(loop); + } + _starpu_spin_unlock(¶llel_region->lock); + task->loop_id++; +} + +int starpu_omp_for_inline_first(unsigned long long nb_iterations, unsigned long long chunk, int schedule, int ordered, unsigned long long *_first_i, unsigned long long *_nb_i) +{ + struct starpu_omp_task *task = _starpu_omp_get_task(); + struct starpu_omp_region *parallel_region = task->owner_region; + struct starpu_omp_loop *loop = _starpu_omp_for_loop_begin(parallel_region, task, ordered); + + _starpu_omp_for_loop(parallel_region, task, loop, 1, nb_iterations, chunk, schedule, ordered, _first_i, _nb_i); + if (*_nb_i == 0) + { + _starpu_omp_for_loop_end(parallel_region, task, loop, ordered); + } + return *_nb_i != 0; +} + +int starpu_omp_for_inline_next(unsigned long long nb_iterations, unsigned long long chunk, int schedule, int ordered, unsigned long long *_first_i, unsigned long long *_nb_i) +{ + struct starpu_omp_task *task = _starpu_omp_get_task(); + struct starpu_omp_region *parallel_region = task->owner_region; + struct starpu_omp_loop *loop = _starpu_omp_for_loop_begin(parallel_region, task, ordered); + + _starpu_omp_for_loop(parallel_region, task, loop, 0, nb_iterations, chunk, schedule, ordered, _first_i, _nb_i); + if (*_nb_i == 0) + { + _starpu_omp_for_loop_end(parallel_region, task, loop, ordered); + } + return *_nb_i != 0; +} + +int starpu_omp_for_inline_first_alt(unsigned long long nb_iterations, unsigned long long chunk, int schedule, int ordered, unsigned long long *_begin_i, unsigned long long *_end_i) +{ + unsigned long long nb_i; + int end = starpu_omp_for_inline_first(nb_iterations, chunk, schedule, ordered, _begin_i, &nb_i); + *_end_i = *_begin_i + nb_i; + return end; +} + +int starpu_omp_for_inline_next_alt(unsigned long long nb_iterations, unsigned long long chunk, int schedule, int ordered, unsigned long long *_begin_i, unsigned long long *_end_i) +{ + unsigned long long nb_i; + int end = starpu_omp_for_inline_next(nb_iterations, chunk, schedule, ordered, _begin_i, &nb_i); + *_end_i = *_begin_i + nb_i; + return end; +} + +void starpu_omp_for(void (*f)(unsigned long long _first_i, unsigned long long _nb_i, void *arg), void *arg, unsigned long long nb_iterations, unsigned long long chunk, int schedule, int ordered, int nowait) +{ + unsigned long long _first_i = 0; + unsigned long long _nb_i = 0; + if (starpu_omp_for_inline_first(nb_iterations, chunk, schedule, ordered, &_first_i, &_nb_i)) + { + do + { + f(_first_i, _nb_i, arg); + } + while (starpu_omp_for_inline_next(nb_iterations, chunk, schedule, ordered, &_first_i, &_nb_i)); + } + if (!nowait) + { + starpu_omp_barrier(); + } +} + +void starpu_omp_for_alt(void (*f)(unsigned long long _begin_i, unsigned long long _end_i, void *arg), void *arg, unsigned long long nb_iterations, unsigned long long chunk, int schedule, int ordered, int nowait) +{ + unsigned long long _begin_i = 0; + unsigned long long _end_i = 0; + if (starpu_omp_for_inline_first_alt(nb_iterations, chunk, schedule, ordered, &_begin_i, &_end_i)) + { + do + { + f(_begin_i, _end_i, arg); + } + while (starpu_omp_for_inline_next_alt(nb_iterations, chunk, schedule, ordered, &_begin_i, &_end_i)); + } + if (!nowait) + { + starpu_omp_barrier(); + } +} + +void starpu_omp_ordered(void (*f)(void *arg), void *arg) +{ + starpu_omp_ordered_inline_begin(); + f(arg); + starpu_omp_ordered_inline_end(); +} + +void starpu_omp_ordered_inline_begin(void) +{ + struct starpu_omp_task *task = _starpu_omp_get_task(); + struct starpu_omp_region *parallel_region = task->owner_region; + struct starpu_omp_loop *loop = _starpu_omp_for_get_loop(parallel_region, task); + unsigned long long i; + STARPU_ASSERT(task->ordered_nb_i > 0); + i = task->ordered_first_i; + task->ordered_first_i++; + task->ordered_nb_i--; + _starpu_spin_lock(&loop->ordered_lock); + while (i != loop->ordered_iteration) + { + STARPU_ASSERT(i > loop->ordered_iteration); + condition_wait(&loop->ordered_cond, &loop->ordered_lock, starpu_omp_task_wait_on_ordered); + } +} + +void starpu_omp_ordered_inline_end(void) +{ + struct starpu_omp_task *task = _starpu_omp_get_task(); + struct starpu_omp_region *parallel_region = task->owner_region; + struct starpu_omp_loop *loop = _starpu_omp_for_get_loop(parallel_region, task); + + loop->ordered_iteration++; + condition_broadcast(&loop->ordered_cond, starpu_omp_task_wait_on_ordered); + _starpu_spin_unlock(&loop->ordered_lock); +} + +static inline struct starpu_omp_sections *_starpu_omp_get_sections(struct starpu_omp_region *parallel_region, struct starpu_omp_task *task) +{ + struct starpu_omp_sections *sections; + sections = parallel_region->sections_list; + while (sections && sections->id != task->sections_id) + { + sections = sections->next_sections; + } + return sections; +} + +static inline struct starpu_omp_sections *_starpu_omp_sections_begin(struct starpu_omp_region *parallel_region, struct starpu_omp_task *task) +{ + struct starpu_omp_sections *sections; + _starpu_spin_lock(¶llel_region->lock); + sections = _starpu_omp_get_sections(parallel_region, task); + if (!sections) + { + _STARPU_MALLOC(sections, sizeof(*sections)); + sections->id = task->sections_id; + sections->next_section_num = 0; + sections->nb_completed_threads = 0; + sections->next_sections = parallel_region->sections_list; + parallel_region->sections_list = sections; + } + _starpu_spin_unlock(¶llel_region->lock); + return sections; +} +static inline void _starpu_omp_sections_end(struct starpu_omp_region *parallel_region, struct starpu_omp_task *task, + struct starpu_omp_sections *sections) +{ + _starpu_spin_lock(¶llel_region->lock); + sections->nb_completed_threads++; + if (sections->nb_completed_threads == parallel_region->nb_threads) + { + struct starpu_omp_sections **p_sections; + STARPU_ASSERT(sections->next_sections == NULL); + p_sections = &(parallel_region->sections_list); + while (*p_sections != sections) + { + p_sections = &((*p_sections)->next_sections); + } + *p_sections = NULL; + free(sections); + } + _starpu_spin_unlock(¶llel_region->lock); + task->sections_id++; +} + +void starpu_omp_sections(unsigned long long nb_sections, void (**section_f)(void *arg), void **section_arg, int nowait) +{ + struct starpu_omp_task *task = _starpu_omp_get_task(); + struct starpu_omp_region *parallel_region = task->owner_region; + struct starpu_omp_sections *sections = _starpu_omp_sections_begin(parallel_region, task); + for (;;) + { + void (*f)(void *arg) = NULL; + void *arg = NULL; + _starpu_spin_lock(¶llel_region->lock); + if (sections->next_section_num < nb_sections) + { + f = section_f[sections->next_section_num]; + arg = section_arg[sections->next_section_num]; + sections->next_section_num ++; + } + _starpu_spin_unlock(¶llel_region->lock); + if (f == NULL) + break; + f(arg); + } + _starpu_omp_sections_end(parallel_region, task, sections); + if (!nowait) + { + starpu_omp_barrier(); + } +} + +void starpu_omp_sections_combined(unsigned long long nb_sections, void (*section_f)(unsigned long long section_num, void *arg), void *section_arg, int nowait) +{ + struct starpu_omp_task *task = _starpu_omp_get_task(); + struct starpu_omp_region *parallel_region = task->owner_region; + struct starpu_omp_sections *sections = _starpu_omp_sections_begin(parallel_region, task); + for (;;) + { + unsigned long long section_num; + void *arg = NULL; + _starpu_spin_lock(¶llel_region->lock); + if (sections->next_section_num < nb_sections) + { + section_num = sections->next_section_num; + arg = section_arg; + sections->next_section_num ++; + } + else + { + _starpu_spin_unlock(¶llel_region->lock); + break; + } + _starpu_spin_unlock(¶llel_region->lock); + section_f(section_num, arg); + } + _starpu_omp_sections_end(parallel_region, task, sections); + if (!nowait) + { + starpu_omp_barrier(); + } +} + +static void _starpu_omp_lock_init(void **_internal) +{ + struct _starpu_omp_lock_internal *_lock; + + _STARPU_CALLOC(_lock, 1, sizeof(*_lock)); + _starpu_spin_init(&_lock->lock); + condition_init(&_lock->cond); + *_internal = _lock; +} + +static void _starpu_omp_lock_destroy(void **_internal) +{ + struct _starpu_omp_lock_internal * const _lock = *_internal; + STARPU_ASSERT(_lock->state == 0); + condition_exit(&_lock->cond); + _starpu_spin_destroy(&_lock->lock); + memset(_lock, 0, sizeof(*_lock)); + free(_lock); + *_internal = NULL; +} + +static void _starpu_omp_lock_set(void **_internal) +{ + struct _starpu_omp_lock_internal * const _lock = *_internal; + _starpu_spin_lock(&_lock->lock); + while (_lock->state != 0) + { + condition_wait(&_lock->cond, &_lock->lock, starpu_omp_task_wait_on_lock); + } + _lock->state = 1; + _starpu_spin_unlock(&_lock->lock); +} + +static void _starpu_omp_lock_unset(void **_internal) +{ + struct _starpu_omp_lock_internal * const _lock = *_internal; + _starpu_spin_lock(&_lock->lock); + STARPU_ASSERT(_lock->state == 1); + _lock->state = 0; + condition_broadcast(&_lock->cond, starpu_omp_task_wait_on_lock); + _starpu_spin_unlock(&_lock->lock); +} + +static int _starpu_omp_lock_test(void **_internal) +{ + struct _starpu_omp_lock_internal * const _lock = *_internal; + int ret = 0; + _starpu_spin_lock(&_lock->lock); + if (_lock->state == 0) + { + _lock->state = 1; + ret = 1; + } + _starpu_spin_unlock(&_lock->lock); + return ret; +} + +static void _starpu_omp_nest_lock_init(void **_internal) +{ + struct _starpu_omp_nest_lock_internal *_nest_lock; + + _STARPU_CALLOC(_nest_lock, 1, sizeof(*_nest_lock)); + _starpu_spin_init(&_nest_lock->lock); + condition_init(&_nest_lock->cond); + *_internal = _nest_lock; +} + +static void _starpu_omp_nest_lock_destroy(void **_internal) +{ + struct _starpu_omp_nest_lock_internal * const _nest_lock = *_internal; + STARPU_ASSERT(_nest_lock->state == 0); + STARPU_ASSERT(_nest_lock->nesting == 0); + STARPU_ASSERT(_nest_lock->owner_task == NULL); + condition_exit(&_nest_lock->cond); + _starpu_spin_destroy(&_nest_lock->lock); + memset(_nest_lock, 0, sizeof(*_nest_lock)); + free(_nest_lock); + *_internal = NULL; +} + +static void _starpu_omp_nest_lock_set(void **_internal) +{ + struct _starpu_omp_nest_lock_internal * const _nest_lock = *_internal; + struct starpu_omp_task * const task = _starpu_omp_get_task(); + _starpu_spin_lock(&_nest_lock->lock); + if (_nest_lock->owner_task == task) + { + STARPU_ASSERT(_nest_lock->state == 1); + STARPU_ASSERT(_nest_lock->nesting > 0); + _nest_lock->nesting++; + } + else + { + while (_nest_lock->state != 0) + { + condition_wait(&_nest_lock->cond, &_nest_lock->lock, starpu_omp_task_wait_on_nest_lock); + } + STARPU_ASSERT(_nest_lock->nesting == 0); + STARPU_ASSERT(_nest_lock->owner_task == NULL); + _nest_lock->state = 1; + _nest_lock->owner_task = task; + _nest_lock->nesting = 1; + } + _starpu_spin_unlock(&_nest_lock->lock); +} + +static void _starpu_omp_nest_lock_unset(void **_internal) +{ + struct _starpu_omp_nest_lock_internal * const _nest_lock = *_internal; + struct starpu_omp_task * const task = _starpu_omp_get_task(); + _starpu_spin_lock(&_nest_lock->lock); + STARPU_ASSERT(_nest_lock->owner_task == task); + STARPU_ASSERT(_nest_lock->state == 1); + STARPU_ASSERT(_nest_lock->nesting > 0); + _nest_lock->nesting--; + if (_nest_lock->nesting == 0) + { + _nest_lock->state = 0; + _nest_lock->owner_task = NULL; + condition_broadcast(&_nest_lock->cond, starpu_omp_task_wait_on_nest_lock); + } + _starpu_spin_unlock(&_nest_lock->lock); +} + +static int _starpu_omp_nest_lock_test(void **_internal) +{ + struct _starpu_omp_nest_lock_internal * const _nest_lock = *_internal; + struct starpu_omp_task * const task = _starpu_omp_get_task(); + int ret = 0; + _starpu_spin_lock(&_nest_lock->lock); + if (_nest_lock->state == 0) + { + STARPU_ASSERT(_nest_lock->nesting == 0); + STARPU_ASSERT(_nest_lock->owner_task == NULL); + _nest_lock->state = 1; + _nest_lock->owner_task = task; + _nest_lock->nesting = 1; + ret = _nest_lock->nesting; + } + else if (_nest_lock->owner_task == task) + { + STARPU_ASSERT(_nest_lock->state == 1); + STARPU_ASSERT(_nest_lock->nesting > 0); + _nest_lock->nesting++; + ret = _nest_lock->nesting; + } + _starpu_spin_unlock(&_nest_lock->lock); + return ret; +} + +void starpu_omp_init_lock (starpu_omp_lock_t *lock) +{ + _starpu_omp_lock_init(&lock->internal); +} + +void starpu_omp_destroy_lock (starpu_omp_lock_t *lock) +{ + _starpu_omp_lock_destroy(&lock->internal); +} + +void starpu_omp_set_lock (starpu_omp_lock_t *lock) +{ + _starpu_omp_lock_set(&lock->internal); +} + +void starpu_omp_unset_lock (starpu_omp_lock_t *lock) +{ + _starpu_omp_lock_unset(&lock->internal); +} + +int starpu_omp_test_lock (starpu_omp_lock_t *lock) +{ + return _starpu_omp_lock_test(&lock->internal); +} + +void starpu_omp_init_nest_lock (starpu_omp_nest_lock_t *nest_lock) +{ + _starpu_omp_nest_lock_init(&nest_lock->internal); +} + +void starpu_omp_destroy_nest_lock (starpu_omp_nest_lock_t *nest_lock) +{ + _starpu_omp_nest_lock_destroy(&nest_lock->internal); +} + +void starpu_omp_set_nest_lock (starpu_omp_nest_lock_t *nest_lock) +{ + _starpu_omp_nest_lock_set(&nest_lock->internal); +} + +void starpu_omp_unset_nest_lock (starpu_omp_nest_lock_t *nest_lock) +{ + _starpu_omp_nest_lock_unset(&nest_lock->internal); +} + +int starpu_omp_test_nest_lock (starpu_omp_nest_lock_t *nest_lock) +{ + return _starpu_omp_nest_lock_test(&nest_lock->internal); +} + +void starpu_omp_atomic_fallback_inline_begin(void) +{ + struct starpu_omp_device *device = get_caller_device(); + _starpu_spin_lock(&device->atomic_lock); + +} + +void starpu_omp_atomic_fallback_inline_end(void) +{ + struct starpu_omp_device *device = get_caller_device(); + _starpu_spin_unlock(&device->atomic_lock); +} + +void starpu_omp_vector_annotate(starpu_data_handle_t handle, uint32_t slice_base) +{ + unsigned node; + + for (node = 0; node < STARPU_MAXNODES; node++) + { + struct starpu_vector_interface *vector_interface = (struct starpu_vector_interface *) + starpu_data_get_interface_on_node(handle, node); + assert(vector_interface->id == STARPU_VECTOR_INTERFACE_ID); + vector_interface->slice_base = slice_base; + } +} + +struct starpu_arbiter *starpu_omp_get_default_arbiter(void) +{ + return _global_state.default_arbiter; +} + +/* + * restore deprecated diagnostics (-Wdeprecated-declarations) + */ +#pragma GCC diagnostic pop +#endif /* STARPU_OPENMP */ diff --git a/src/util/openmp_runtime_support.h b/src/util/openmp_runtime_support.h new file mode 100644 index 0000000..68983d8 --- /dev/null +++ b/src/util/openmp_runtime_support.h @@ -0,0 +1,423 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2014-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __OPENMP_RUNTIME_SUPPORT_H__ +#define __OPENMP_RUNTIME_SUPPORT_H__ + +/** @file */ + +#include + +#ifdef STARPU_OPENMP +#include +#include +#include + +/** ucontexts have been deprecated as of POSIX 1-2004 + * _XOPEN_SOURCE required at least on OS/X + * + * TODO: add detection in configure.ac + */ +#ifndef _XOPEN_SOURCE +#define _XOPEN_SOURCE +#endif +#include + +#pragma GCC visibility push(hidden) + +extern starpu_pthread_key_t omp_thread_key; +extern starpu_pthread_key_t omp_task_key; + +/** + * Arbitrary limit on the number of nested parallel sections + */ +#define STARPU_OMP_MAX_ACTIVE_LEVELS 1 + +/** + * Possible abstract names for OpenMP places + */ +enum starpu_omp_place_name +{ + starpu_omp_place_undefined = 0, + starpu_omp_place_threads = 1, + starpu_omp_place_cores = 2, + starpu_omp_place_sockets = 3, + starpu_omp_place_numerical = 4 /** place specified numerically */ +}; + +struct starpu_omp_numeric_place +{ + int excluded_place; + int *included_numeric_items; + int nb_included_numeric_items; + int *excluded_numeric_items; + int nb_excluded_numeric_items; +}; + +/** + * OpenMP place for thread affinity, defined by the OpenMP spec + */ +struct starpu_omp_place +{ + int abstract_name; + int abstract_excluded; + int abstract_length; + struct starpu_omp_numeric_place *numeric_places; + int nb_numeric_places; +}; + +/** + * Internal Control Variables (ICVs) declared following + * OpenMP 4.0.0 spec section 2.3.1 + */ +struct starpu_omp_data_environment_icvs +{ + /** parallel region icvs */ + int dyn_var; + int nest_var; + int *nthreads_var; /** nthreads_var ICV is a list */ + int thread_limit_var; + + int active_levels_var; + int levels_var; + int *bind_var; /** bind_var ICV is a list */ + + /** loop region icvs */ + int run_sched_var; + unsigned long long run_sched_chunk_var; + + /** program execution icvs */ + int default_device_var; + int max_task_priority_var; +}; + +struct starpu_omp_device_icvs +{ + /** parallel region icvs */ + int max_active_levels_var; + + /** loop region icvs */ + int def_sched_var; + unsigned long long def_sched_chunk_var; + + /** program execution icvs */ + int stacksize_var; + int wait_policy_var; +}; + +struct starpu_omp_implicit_task_icvs +{ + /** parallel region icvs */ + int place_partition_var; +}; + +struct starpu_omp_global_icvs +{ + /** program execution icvs */ + int cancel_var; +}; + +struct starpu_omp_initial_icv_values +{ + int dyn_var; + int nest_var; + int *nthreads_var; + int run_sched_var; + unsigned long long run_sched_chunk_var; + int def_sched_var; + unsigned long long def_sched_chunk_var; + int *bind_var; + int stacksize_var; + int wait_policy_var; + int thread_limit_var; + int max_active_levels_var; + int active_levels_var; + int levels_var; + int place_partition_var; + int cancel_var; + int default_device_var; + int max_task_priority_var; + + /** not a real ICV, but needed to store the contents of OMP_PLACES */ + struct starpu_omp_place places; +}; + +struct starpu_omp_task_group +{ + int descendent_task_count; + struct starpu_omp_task *leader_task; + struct starpu_omp_task_group *p_previous_task_group; +}; + +struct starpu_omp_task_link +{ + struct starpu_omp_task *task; + struct starpu_omp_task_link *next; +}; + +struct starpu_omp_condition +{ + struct starpu_omp_task_link *contention_list_head; +}; + +struct starpu_omp_critical +{ + UT_hash_handle hh; + struct _starpu_spinlock lock; + unsigned state; + struct starpu_omp_task_link *contention_list_head; + const char *name; +}; + +enum starpu_omp_task_state +{ + starpu_omp_task_state_clear = 0, + starpu_omp_task_state_preempted = 1, + starpu_omp_task_state_terminated = 2, + starpu_omp_task_state_zombie = 3, + + /** target tasks are non-preemptible tasks, without dedicated stack and OpenMP Runtime Support context */ + starpu_omp_task_state_target = 4, +}; + +enum starpu_omp_task_wait_on +{ + starpu_omp_task_wait_on_task_childs = 1 << 0, + starpu_omp_task_wait_on_region_tasks = 1 << 1, + starpu_omp_task_wait_on_barrier = 1 << 2, + starpu_omp_task_wait_on_group = 1 << 3, + starpu_omp_task_wait_on_critical = 1 << 4, + starpu_omp_task_wait_on_ordered = 1 << 5, + starpu_omp_task_wait_on_lock = 1 << 6, + starpu_omp_task_wait_on_nest_lock = 1 << 7, +}; + +enum starpu_omp_task_flags +{ + STARPU_OMP_TASK_FLAGS_IMPLICIT = 1 << 0, + STARPU_OMP_TASK_FLAGS_UNDEFERRED = 1 << 1, + STARPU_OMP_TASK_FLAGS_FINAL = 1 << 2, + STARPU_OMP_TASK_FLAGS_UNTIED = 1 << 3, +}; + +LIST_TYPE(starpu_omp_task, + struct starpu_omp_implicit_task_icvs icvs; + struct starpu_omp_task *parent_task; + struct starpu_omp_thread *owner_thread; + struct starpu_omp_region *owner_region; + struct starpu_omp_region *nested_region; + int rank; + int child_task_count; + struct starpu_omp_task_group *task_group; + struct _starpu_spinlock lock; + int transaction_pending; + int wait_on; + int barrier_count; + int single_id; + int single_first; + int loop_id; + unsigned long long ordered_first_i; + unsigned long long ordered_nb_i; + int sections_id; + struct starpu_omp_data_environment_icvs data_env_icvs; + struct starpu_omp_implicit_task_icvs implicit_task_icvs; + struct handle_entry *registered_handles; + + struct starpu_task *starpu_task; + struct starpu_codelet cl; + void **starpu_buffers; + void *starpu_cl_arg; + + /* Driver porters: adding your driver here is very optional. */ + /** actual task function to be run */ + void (*cpu_f)(void **starpu_buffers, void *starpu_cl_arg); + void (*cuda_f)(void **starpu_buffers, void *starpu_cl_arg); + void (*opencl_f)(void **starpu_buffers, void *starpu_cl_arg); + + enum starpu_omp_task_state state; + enum starpu_omp_task_flags flags; + + /* + * context to store the processing state of the task + * in case of blocking/recursive task operation + */ + ucontext_t ctx; + + /* + * stack to execute the task over, to be able to switch + * in case blocking/recursive task operation + */ + void *stack; + + /* + * Valgrind stack id + */ + int stack_vg_id; + + size_t stacksize; + + /* + * taskloop attribute + * */ + int is_loop; + unsigned long long nb_iterations; + unsigned long long grainsize; + unsigned long long chunk; + unsigned long long begin_i; + unsigned long long end_i; +) + +LIST_TYPE(starpu_omp_thread, + + UT_hash_handle hh; + struct starpu_omp_task *current_task; + struct starpu_omp_region *owner_region; + + /* + * stack to execute the initial thread over + * when preempting the initial task + * note: should not be used for other threads + */ + void *initial_thread_stack; + /* + * Valgrind stack id + */ + int initial_thread_stack_vg_id; + + /* + * context to store the 'scheduler' state of the thread, + * to which the execution of thread comes back upon a + * blocking/recursive task operation + */ + ucontext_t ctx; + + struct starpu_driver starpu_driver; + struct _starpu_worker *worker; +) + +struct _starpu_omp_lock_internal +{ + struct _starpu_spinlock lock; + struct starpu_omp_condition cond; + unsigned state; +}; + +struct _starpu_omp_nest_lock_internal +{ + struct _starpu_spinlock lock; + struct starpu_omp_condition cond; + unsigned state; + struct starpu_omp_task *owner_task; + unsigned nesting; +}; + +struct starpu_omp_loop +{ + int id; + unsigned long long next_iteration; + int nb_completed_threads; + struct starpu_omp_loop *next_loop; + struct _starpu_spinlock ordered_lock; + struct starpu_omp_condition ordered_cond; + unsigned long long ordered_iteration; +}; + +struct starpu_omp_sections +{ + int id; + unsigned long long next_section_num; + int nb_completed_threads; + struct starpu_omp_sections *next_sections; +}; + +struct starpu_omp_region +{ + struct starpu_omp_data_environment_icvs icvs; + struct starpu_omp_region *parent_region; + struct starpu_omp_device *owner_device; + struct starpu_omp_thread *master_thread; + /** note: the list of threads does not include the master_thread */ + struct starpu_omp_thread_list thread_list; + /** list of implicit omp tasks created to run the region */ + struct starpu_omp_task **implicit_task_array; + /** include both the master thread and the region own threads */ + int nb_threads; + struct _starpu_spinlock lock; + struct starpu_omp_task *waiting_task; + int barrier_count; + int bound_explicit_task_count; + int single_id; + void *copy_private_data; + int level; + struct starpu_omp_loop *loop_list; + struct starpu_omp_sections *sections_list; + struct starpu_task *continuation_starpu_task; + struct handle_entry *registered_handles; + struct _starpu_spinlock registered_handles_lock; +}; + +struct starpu_omp_device +{ + struct starpu_omp_device_icvs icvs; + + /** atomic fallback implementation lock */ + struct _starpu_spinlock atomic_lock; +}; + +struct starpu_omp_global +{ + struct starpu_omp_global_icvs icvs; + struct starpu_omp_task *initial_task; + struct starpu_omp_thread *initial_thread; + struct starpu_omp_region *initial_region; + struct starpu_omp_device *initial_device; + struct starpu_omp_critical *default_critical; + struct starpu_omp_critical *named_criticals; + struct _starpu_spinlock named_criticals_lock; + struct starpu_omp_thread *hash_workers; + struct _starpu_spinlock hash_workers_lock; + struct starpu_arbiter *default_arbiter; + unsigned nb_starpu_cpu_workers; + int *starpu_cpu_worker_ids; + int environment_valid; +}; + +/* + * internal global variables + */ +extern struct starpu_omp_initial_icv_values *_starpu_omp_initial_icv_values; +extern struct starpu_omp_global *_starpu_omp_global_state; +extern starpu_pthread_key_t _starpu_omp_thread_key; +extern starpu_pthread_key_t _starpu_omp_task_key; +extern double _starpu_omp_clock_ref; + +/* + * internal API + */ +void _starpu_omp_environment_init(void); +void _starpu_omp_environment_exit(void); +int _starpu_omp_environment_check(void); +struct starpu_omp_thread *_starpu_omp_get_thread(void); +struct starpu_omp_region *_starpu_omp_get_region_at_level(int level) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; +struct starpu_omp_task *_starpu_omp_get_task(void); +int _starpu_omp_get_region_thread_num(const struct starpu_omp_region *const region) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; +void _starpu_omp_dummy_init(void); +void _starpu_omp_dummy_shutdown(void); +#endif // STARPU_OPENMP + +#pragma GCC visibility pop + +#endif // __OPENMP_RUNTIME_SUPPORT_H__ diff --git a/src/util/openmp_runtime_support_environment.c b/src/util/openmp_runtime_support_environment.c new file mode 100644 index 0000000..7f1c8b2 --- /dev/null +++ b/src/util/openmp_runtime_support_environment.c @@ -0,0 +1,795 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2014-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#ifdef STARPU_OPENMP +#include +#include +#include +#include +#include +#include + +#define _STARPU_INITIAL_PLACES_LIST_SIZE 4 +#define _STARPU_INITIAL_PLACE_ITEMS_LIST_SIZE 4 +#define _STARPU_DEFAULT_STACKSIZE 2097152 + +static struct starpu_omp_initial_icv_values _initial_icv_values = +{ + .dyn_var = 0, + .nest_var = 0, + .nthreads_var = NULL, + .run_sched_var = starpu_omp_sched_static, + .run_sched_chunk_var = 0, + .def_sched_var = starpu_omp_sched_static, + .def_sched_chunk_var = 0, + .bind_var = NULL, + .stacksize_var = _STARPU_DEFAULT_STACKSIZE, + .wait_policy_var = 0, + .max_active_levels_var = STARPU_OMP_MAX_ACTIVE_LEVELS, + .active_levels_var = 0, + .levels_var = 0, + .place_partition_var = 0, + .cancel_var = 0, + .default_device_var = 0, + .max_task_priority_var = 0 +}; + +struct starpu_omp_initial_icv_values *_starpu_omp_initial_icv_values = NULL; + +static void remove_spaces(char *str) +{ + int i = 0; + int j = 0; + + while (str[j] != '\0') + { + if (isspace(str[j])) + { + j++; + continue; + } + if (j > i) + { + str[i] = str[j]; + } + i++; + j++; + } + if (j > i) + { + str[i] = str[j]; + } +} + +static int stringsn_cmp(const char *strings[], const char *str, size_t n) +{ + int mode = 0; + while (strings[mode]) + { + if (strncasecmp(str, strings[mode], n) == 0) + break; + mode++; + } + if (strings[mode] == NULL) + return -1; + return mode; +} + +static int read_int_var(const char *str, int *dst) +{ + char *endptr; + int val; + long lval; + + if (!str) + return 0; + + errno = 0; /* To distinguish success/failure after call */ + lval = strtol(str, &endptr, 10); + + /* Check for various possible errors */ + if ((errno == ERANGE && (lval == LONG_MAX || lval == LONG_MIN)) || (errno != 0 && lval == 0)) + return 0; + + if (lval < INT_MIN || lval > INT_MAX) + return 0; + + val = (int) lval; + + /* No digits were found. */ + if (str == endptr) + return 0; + + *dst = val; + return 1; +} + +static int _strings_cmp(const char *strings[], const char *str) +{ + int mode = 0; + while (strings[mode]) + { + if (strncasecmp(str, strings[mode], strlen(strings[mode])) == 0) + break; + mode++; + } + if (strings[mode] == NULL) + return -1; + return mode; +} + +static void read_sched_var(const char *var, int *dest, unsigned long long *dest_chunk) +{ + const char *env = starpu_getenv(var); + if (env) + { + char *str = strdup(env); + if (str == NULL) + _STARPU_ERROR("memory allocation failed\n"); + remove_spaces(str); + if (str[0] == '\0') + { + free(str); + return; + } + static const char *strings[] = { "undefined", "static", "dynamic", "guided", "auto", NULL }; + int mode = _strings_cmp(strings, str); + if (mode < 0) + _STARPU_ERROR("parse error in variable %s\n", var); + *dest = mode; + int offset = strlen(strings[mode]); + if (str[offset] == ',') + { + offset++; + errno = 0; + long long v = strtoll(str+offset, NULL, 10); + if (errno != 0) + _STARPU_ERROR("could not parse environment variable %s, strtol failed with error %s\n", var, strerror(errno)); + if (v < 0) + _STARPU_ERROR("invalid negative modifier in environment variable %s\n", var); + unsigned long long uv = (unsigned long long) v; + *dest_chunk = uv; + } + else + { + *dest_chunk = 1; + } + free(str); + } +} + +static int convert_place_name(const char *str, size_t n) +{ + static const char *strings[] = { "threads", "cores", "sockets", NULL }; + int mode = stringsn_cmp(strings, str, n); + if (mode < 0) + _STARPU_ERROR("place abstract name parse error\n"); + return mode+1; /* 0 is for undefined abstract name */ +} + +/* Note: this function modifies the string str */ +static void read_a_place_name(char *str, struct starpu_omp_place *places) +{ + int i = 0; + /* detect exclusion of abstract name expressed as '!' prefix */ + if (str[i] == '!') + { + places->abstract_excluded = 1; + i++; + } + else + { + places->abstract_excluded = 0; + } + /* detect length value for abstract name expressed as '(length)' suffix) */ + char *begin_length_spec = strchr(str+i,'('); + if (begin_length_spec != NULL) + { + char *end_length_spec = strrchr(begin_length_spec+1, ')'); + if (end_length_spec == NULL || end_length_spec <= begin_length_spec+1) + _STARPU_ERROR("parse error in places list\n"); + *begin_length_spec = '\0'; + *end_length_spec = '\0'; + errno = 0; + int v = (int)strtol(begin_length_spec+1, NULL, 10); + if (errno != 0) + _STARPU_ERROR("parse error in places list\n"); + places->abstract_length = v; + } + else + { + places->abstract_length = 1; + } + /* convert abstract place name string to corresponding value */ + { + int mode = convert_place_name(str+i, strlen(str+i)); + STARPU_ASSERT(mode >= starpu_omp_place_threads && mode <= starpu_omp_place_sockets); + places->abstract_name = mode; + places->numeric_places = NULL; + places->nb_numeric_places = 0; + } +} + +static void read_a_places_list(const char *str, struct starpu_omp_place *places) +{ + if (str[0] == '\0') + { + places->numeric_places = NULL; + places->nb_numeric_places = 0; + places->abstract_name = starpu_omp_place_undefined; + return; + } + enum + { + state_split, + state_read_brace_prefix, + state_read_opening_brace, + state_read_numeric_prefix, + state_read_numeric, + state_split_numeric, + state_read_closing_brace, + state_read_brace_suffix, + }; + struct starpu_omp_numeric_place *places_list = NULL; + int places_list_size = 0; + int nb_places = 0; + int *included_items_list = NULL; + int included_items_list_size = 0; + int nb_included_items = 0; + int *excluded_items_list = NULL; + int excluded_items_list_size = 0; + int nb_excluded_items = 0; + int exclude_place_flag = 0; + int exclude_item_flag = 0; + int i = 0; + int state = state_read_brace_prefix; + while (1) + { + switch (state) + { + /* split a comma separated list of numerical places */ + case state_split: + if (str[i] == '\0') + { + goto eol; + } + else if (str[i] != ',') + _STARPU_ERROR("parse error in places list\n"); + i++; + state = state_read_brace_prefix; + break; + /* read optional exclude flag '!' for numerical place */ + case state_read_brace_prefix: + exclude_place_flag = 0; + if (str[i] == '!') + { + exclude_place_flag = 1; + i++; + } + state = state_read_opening_brace; + break; + /* read place opening brace */ + case state_read_opening_brace: + if (str[i] != '{') + _STARPU_ERROR("parse error in places list\n"); + i++; + state = state_read_numeric_prefix; + break; + /* read optional exclude flag '!' for numerical item */ + case state_read_numeric_prefix: + exclude_item_flag = 0; + if (str[i] == '!') + { + exclude_item_flag = 1; + i++; + } + state = state_read_numeric; + break; + /* read numerical item */ + case state_read_numeric: + { + char *endptr = NULL; + errno = 0; + int v = (int)strtol(str+i, &endptr, 10); + if (errno != 0) + _STARPU_ERROR("parse error in places list, strtol failed with error %s\n", strerror(errno)); + if (exclude_item_flag) + { + if (excluded_items_list_size == 0) + { + excluded_items_list_size = _STARPU_INITIAL_PLACE_ITEMS_LIST_SIZE; + _STARPU_MALLOC(excluded_items_list, excluded_items_list_size * sizeof(int)); + } + else if (nb_excluded_items == excluded_items_list_size) + { + excluded_items_list_size *= 2; + _STARPU_REALLOC(excluded_items_list, excluded_items_list_size * sizeof(int)); + } + excluded_items_list[nb_excluded_items] = v; + nb_excluded_items++; + } + else + { + if (included_items_list_size == 0) + { + included_items_list_size = _STARPU_INITIAL_PLACE_ITEMS_LIST_SIZE; + _STARPU_MALLOC(included_items_list, included_items_list_size * sizeof(int)); + } + else if (nb_included_items == included_items_list_size) + { + included_items_list_size *= 2; + _STARPU_REALLOC(included_items_list, included_items_list_size * sizeof(int)); + } + included_items_list[nb_included_items] = v; + nb_included_items++; + } + exclude_item_flag = 0; + i = endptr - str; + state = state_split_numeric; + } + break; + /* read comma separated or colon separated numerical item list */ + case state_split_numeric: + if (str[i] == ':') + /* length and stride colon separated arguments not supported for now */ + _STARPU_ERROR("colon support unimplemented in numeric place list"); + if (str[i] == ',') + { + i++; + state = state_read_numeric_prefix; + } + else + { + state = state_read_closing_brace; + } + break; + /* read end of numerical item list */ + case state_read_closing_brace: + if (str[i] != '}') + _STARPU_ERROR("parse error in places list\n"); + if (places_list_size == 0) + { + places_list_size = _STARPU_INITIAL_PLACES_LIST_SIZE; + _STARPU_MALLOC(places_list, places_list_size * sizeof(*places_list)); + } + else if (nb_places == places_list_size) + { + places_list_size *= 2; + _STARPU_REALLOC(places_list, places_list_size * sizeof(*places_list)); + } + places_list[nb_places].excluded_place = exclude_place_flag; + places_list[nb_places].included_numeric_items = included_items_list; + places_list[nb_places].nb_included_numeric_items = nb_included_items; + places_list[nb_places].excluded_numeric_items = excluded_items_list; + places_list[nb_places].nb_excluded_numeric_items = nb_excluded_items; + nb_places++; + exclude_place_flag = 0; + included_items_list = NULL; + included_items_list_size = 0; + nb_included_items = 0; + excluded_items_list = NULL; + excluded_items_list_size = 0; + nb_excluded_items = 0; + i++; + state = state_read_brace_suffix; + break; + /* read optional place colon separated suffix */ + case state_read_brace_suffix: + if (str[i] == ':') + /* length and stride colon separated arguments not supported for now */ + _STARPU_ERROR("colon support unimplemented in numeric place list"); + state = state_split; + break; + default: + _STARPU_ERROR("invalid state in parsing places list\n"); + } + } + +eol: + places->numeric_places = places_list; + places->nb_numeric_places = nb_places; + places->abstract_name = starpu_omp_place_numerical; +} + +static void convert_places_string(const char *_str, struct starpu_omp_place *places) +{ + char *str = strdup(_str); + if (str == NULL) + _STARPU_ERROR("memory allocation failed\n"); + remove_spaces(str); + if (str[0] != '\0') + { + /* check whether this is the start of an abstract name */ + if (isalpha(str[0]) || (str[0] == '!' && isalpha(str[1]))) + { + read_a_place_name(str, places); + } + /* else the string must contain a list of braces */ + else + { + read_a_places_list(str, places); + } + } + free(str); +} + +static void free_places(struct starpu_omp_place *places) +{ + int i; + for (i = 0; i < places->nb_numeric_places; i++) + { + if (places->numeric_places[i].nb_included_numeric_items > 0) + { + free(places->numeric_places[i].included_numeric_items); + } + if (places->numeric_places[i].nb_excluded_numeric_items > 0) + { + free(places->numeric_places[i].excluded_numeric_items); + } + } + if (places->nb_numeric_places > 0) + { + free(places->numeric_places); + } +} + +static int _get_env_string_var(const char *str, const char *strings[], int *dst) +{ + int val; + + if (!str) + return 0; + + val = _strings_cmp(strings, str); + if (val < 0) + return 0; + + *dst = val; + return 1; +} + +static void read_proc_bind_var() +{ + const int max_levels = _initial_icv_values.max_active_levels_var + 1; + int *bind_list = NULL; + char *env; + + _STARPU_CALLOC(bind_list, max_levels, sizeof(*bind_list)); + + env = starpu_getenv("OMP_PROC_BIND"); + if (env) + { + static const char *strings[] = { "false", "true", "master", "close", "spread", NULL }; + char *saveptr, *token; + int level = 0; + + token = strtok_r(env, ",", &saveptr); + for (; token != NULL; token = strtok_r(NULL, ",", &saveptr)) + { + int value; + + if (!_get_env_string_var(token, strings, &value)) + { + _STARPU_MSG("StarPU: Invalid value for environment variable OMP_PROC_BIND\n"); + break; + } + + bind_list[level++] = value; + } + } + _initial_icv_values.bind_var = bind_list; +} + +static void read_num_threads_var() +{ + const int max_levels = _initial_icv_values.max_active_levels_var + 1; + int *num_threads_list = NULL; + char *env; + + _STARPU_CALLOC(num_threads_list, max_levels, sizeof(*num_threads_list)); + + env = starpu_getenv("OMP_NUM_THREADS"); + if (env) + { + char *saveptr, *token; + int level = 0; + + token = strtok_r(env, ",", &saveptr); + for (; token != NULL; token = strtok_r(NULL, ",", &saveptr)) + { + int value; + + if (!read_int_var(token, &value)) + { + _STARPU_MSG("StarPU: Invalid value for environment variable OMP_NUM_THREADS\n"); + break; + } + + num_threads_list[level++] = value; + } + } + + _initial_icv_values.nthreads_var = num_threads_list; +} + +static void read_omp_environment(void) +{ + const char *boolean_strings[] = { "false", "true", NULL }; + + _initial_icv_values.dyn_var = starpu_getenv_string_var_default("OMP_DYNAMIC", boolean_strings, _initial_icv_values.dyn_var); + _initial_icv_values.nest_var = starpu_getenv_string_var_default("OMP_NESTED", boolean_strings, _initial_icv_values.nest_var); + + read_sched_var("OMP_SCHEDULE", &_initial_icv_values.run_sched_var, &_initial_icv_values.run_sched_chunk_var); + _initial_icv_values.stacksize_var = starpu_getenv_size_default("OMP_STACKSIZE", _initial_icv_values.stacksize_var); + + { + const char *strings[] = { "passive", "active", NULL }; + _initial_icv_values.wait_policy_var = starpu_getenv_string_var_default("OMP_WAIT_POLICY", strings, _initial_icv_values.wait_policy_var); + } + _initial_icv_values.thread_limit_var = starpu_getenv_number_default("OMP_THREAD_LIMIT", _initial_icv_values.thread_limit_var); + _initial_icv_values.max_active_levels_var = starpu_getenv_number_default("OMP_MAX_ACTIVE_LEVELS", _initial_icv_values.max_active_levels_var); + _initial_icv_values.cancel_var = starpu_getenv_string_var_default("OMP_CANCELLATION", boolean_strings, _initial_icv_values.cancel_var); + _initial_icv_values.default_device_var = starpu_getenv_number_default("OMP_DEFAULT_DEVICE", _initial_icv_values.default_device_var); + _initial_icv_values.max_task_priority_var = starpu_getenv_number_default("OMP_MAX_TASK_PRIORITY", _initial_icv_values.max_task_priority_var); + + /* Avoid overflow e.g. in num_threads_list allocation */ + STARPU_ASSERT_MSG(_initial_icv_values.max_active_levels_var > 0 && _initial_icv_values.max_active_levels_var < 1000000, "OMP_MAX_ACTIVE_LEVELS should have a reasonable value"); + /* TODO: check others */ + + read_proc_bind_var(); + read_num_threads_var(); + + /* read OMP_PLACES */ + { + memset(&_initial_icv_values.places, 0, sizeof(_initial_icv_values.places)); + _initial_icv_values.places.abstract_name = starpu_omp_place_undefined; + const char *env = starpu_getenv("OMP_PLACES"); + if (env) + { + convert_places_string(env, &_initial_icv_values.places); + } + } + + _starpu_omp_initial_icv_values = &_initial_icv_values; +} + +static void free_omp_environment(void) +{ + /**/ + _starpu_omp_initial_icv_values = NULL; + + /* OMP_DYNAMIC */ + /* OMP_NESTED */ + /* OMP_SCHEDULE */ + /* OMP_STACKSIZE */ + /* OMP_WAIT_POLICY */ + /* OMP_THREAD_LIMIT */ + /* OMP_MAX_ACTIVE_LEVELS */ + /* OMP_CANCELLATION */ + /* OMP_DEFAULT_DEVICE */ + /* OMP_MAX_TASK_PRIORITY */ + + /* OMP_PROC_BIND */ + free(_initial_icv_values.bind_var); + _initial_icv_values.bind_var = NULL; + + /* OMP_NUM_THREADS */ + free(_initial_icv_values.nthreads_var); + _initial_icv_values.nthreads_var = NULL; + + /* OMP_PLACES */ + free_places(&_initial_icv_values.places); +} + +static void display_omp_environment(int verbosity_level) +{ + if (verbosity_level > 0) + { + printf("OPENMP DISPLAY ENVIRONMENT BEGIN\n"); + printf(" _OPENMP = 'xxxxxx'\n"); + printf(" [host] OMP_DYNAMIC = '%s'\n", _starpu_omp_initial_icv_values->dyn_var?"TRUE":"FALSE"); + printf(" [host] OMP_NESTED = '%s'\n", _starpu_omp_initial_icv_values->nest_var?"TRUE":"FALSE"); + printf(" [host] OMP_SCHEDULE = '"); + switch (_starpu_omp_initial_icv_values->run_sched_var) + { + case starpu_omp_sched_static: + printf("STATIC, %llu", _starpu_omp_initial_icv_values->run_sched_chunk_var); + break; + case starpu_omp_sched_dynamic: + printf("DYNAMIC, %llu", _starpu_omp_initial_icv_values->run_sched_chunk_var); + break; + case starpu_omp_sched_guided: + printf("GUIDED, %llu", _starpu_omp_initial_icv_values->run_sched_chunk_var); + break; + case starpu_omp_sched_auto: + printf("AUTO, %llu", _starpu_omp_initial_icv_values->run_sched_chunk_var); + break; + case starpu_omp_sched_undefined: + default: + break; + } + printf("'\n"); + + printf(" [host] OMP_STACKSIZE = '%d'\n", _starpu_omp_initial_icv_values->stacksize_var); + printf(" [host] OMP_WAIT_POLICY = '%s'\n", _starpu_omp_initial_icv_values->wait_policy_var?"ACTIVE":"PASSIVE"); + printf(" [host] OMP_MAX_ACTIVE_LEVELS = '%d'\n", _starpu_omp_initial_icv_values->max_active_levels_var); + printf(" [host] OMP_CANCELLATION = '%s'\n", _starpu_omp_initial_icv_values->cancel_var?"TRUE":"FALSE"); + printf(" [host] OMP_DEFAULT_DEVICE = '%d'\n", _starpu_omp_initial_icv_values->default_device_var); + printf(" [host] OMP_MAX_TASK_PRIORITY = '%d'\n", _starpu_omp_initial_icv_values->max_task_priority_var); + printf(" [host] OMP_PROC_BIND = '"); + { + int level; + for (level = 0; level < _starpu_omp_initial_icv_values->max_active_levels_var; level++) + { + if (level > 0) + { + printf(", "); + } + switch (_starpu_omp_initial_icv_values->bind_var[level]) + { + case starpu_omp_proc_bind_false: + printf("FALSE"); + break; + case starpu_omp_proc_bind_true: + printf("TRUE"); + break; + case starpu_omp_proc_bind_master: + printf("MASTER"); + break; + case starpu_omp_proc_bind_close: + printf("CLOSE"); + break; + case starpu_omp_proc_bind_spread: + printf("SPREAD"); + break; + default: + break; + } + } + } + printf("'\n"); + printf(" [host] OMP_NUM_THREADS = '"); + { + int level; + for (level = 0; level < _starpu_omp_initial_icv_values->max_active_levels_var; level++) + { + if (level > 0) + { + printf(", "); + } + printf("%d", _starpu_omp_initial_icv_values->nthreads_var[level]); + } + } + printf("'\n"); + printf(" [host] OMP_PLACES = '"); + { + struct starpu_omp_place *places = &_starpu_omp_initial_icv_values->places; + if (places->nb_numeric_places > 0) + { + int p; + for (p = 0; p < places->nb_numeric_places; p++) + { + if (p > 0) + { + printf(","); + } + struct starpu_omp_numeric_place *np = &places->numeric_places[p]; + if (np->excluded_place) + { + printf("!"); + } + printf("{"); + int i; + for (i = 0; i < np->nb_included_numeric_items; i++) + { + if (i > 0) + { + printf(","); + } + printf("%d", np->included_numeric_items[i]); + } + for (i = 0; i < np->nb_excluded_numeric_items; i++) + { + if (i > 0 || np->nb_included_numeric_items) + { + printf(","); + } + printf("!%d", np->excluded_numeric_items[i]); + } + printf("}"); + /* TODO: print length/stride suffix */ + } + } + else + { + if (places->abstract_excluded) + { + printf("!"); + } + switch (places->abstract_name) + { + case starpu_omp_place_threads: + printf("THREADS"); + break; + case starpu_omp_place_cores: + printf("CORES"); + break; + case starpu_omp_place_sockets: + printf("SOCKETS"); + break; + case starpu_omp_place_numerical: + printf(""); + break; + case starpu_omp_place_undefined: + default: + break; + } + if (places->abstract_length) + { + printf("(%d)", places->abstract_length); + } + } + } + printf("'\n"); + printf(" [host] OMP_THREAD_LIMIT = '%d'\n", _initial_icv_values.thread_limit_var); + + if (verbosity_level > 1) + { + /* no vendor specific runtime variable */ + } + printf("OPENMP DISPLAY ENVIRONMENT END\n"); + } +} + +void _starpu_omp_environment_init(void) +{ + read_omp_environment(); + + const char *strings[] = { "false", "true", "verbose", NULL }; + int display_env = starpu_getenv_string_var_default("OMP_DISPLAY_ENV", strings, 0); + if (display_env > 0) + { + display_omp_environment(display_env); + } +} + +int _starpu_omp_environment_check(void) +{ + if (starpu_cpu_worker_get_count() == 0) + { + _STARPU_DISP("OpenMP support needs at least 1 CPU worker\n"); + return -EINVAL; + } + + int i; + for(i = 0; i < STARPU_NMAX_SCHED_CTXS; i++) + { + struct starpu_sched_policy *sched_policy = starpu_sched_ctx_get_sched_policy(i); + if (sched_policy && (strcmp(sched_policy->policy_name, _starpu_sched_graph_test_policy.policy_name) == 0)) + { + _STARPU_DISP("OpenMP support is not compatible with scheduler '%s' ('%s')\n", _starpu_sched_graph_test_policy.policy_name, _starpu_sched_graph_test_policy.policy_description); + return -EINVAL; + } + } + return 0; +} + +void _starpu_omp_environment_exit(void) +{ + free_omp_environment(); +} +#endif /* STARPU_OPENMP */ diff --git a/src/util/openmp_runtime_support_omp_api.c b/src/util/openmp_runtime_support_omp_api.c new file mode 100644 index 0000000..f8fb1ef --- /dev/null +++ b/src/util/openmp_runtime_support_omp_api.c @@ -0,0 +1,303 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2014-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#ifdef STARPU_OPENMP +#include + +void starpu_omp_set_num_threads(int threads) +{ + STARPU_ASSERT(threads > 0); + struct starpu_omp_task *task = _starpu_omp_get_task(); + STARPU_ASSERT(task != NULL); + struct starpu_omp_region *region; + region = task->owner_region; + STARPU_ASSERT(region != NULL); + region->icvs.nthreads_var[0] = threads; +} + +int starpu_omp_get_num_threads() +{ + struct starpu_omp_task *task = _starpu_omp_get_task(); + struct starpu_omp_region *region; + if (task == NULL) + return 1; + + region = task->owner_region; + return region->nb_threads; +} + +int starpu_omp_get_thread_num() +{ + struct starpu_omp_task *task = _starpu_omp_get_task(); + if (task == NULL) + return 0; + return _starpu_omp_get_region_thread_num(task->owner_region); +} + +int starpu_omp_get_max_threads() +{ + const struct starpu_omp_region * const parallel_region = _starpu_omp_get_task()->owner_region; + int max_threads = parallel_region->icvs.nthreads_var[0]; + /* TODO: for now, nested parallel sections are not supported, thus we + * open an active parallel section only if the generating region is the + * initial region */ + if (parallel_region->level > 0) + { + max_threads = 1; + } + + return max_threads; +} + +int starpu_omp_get_num_procs(void) +{ + /* starpu_cpu_worker_get_count defined as topology.nworkers[STARPU_CPU_WORKER] */ + return starpu_cpu_worker_get_count(); +} + +int starpu_omp_in_parallel(void) +{ + const struct starpu_omp_region * const parallel_region = _starpu_omp_get_task()->owner_region; + return parallel_region->icvs.active_levels_var > 0; +} + +void starpu_omp_set_dynamic(int dynamic_threads) +{ + (void) dynamic_threads; + /* TODO: dynamic adjustment of the number of threads is not supported for now */ +} + +int starpu_omp_get_dynamic(void) +{ + const struct starpu_omp_region * const parallel_region = _starpu_omp_get_task()->owner_region; + return parallel_region->icvs.dyn_var; +} + +void starpu_omp_set_nested(int nested) +{ + (void) nested; + /* TODO: nested parallelism not supported for now */ +} + +int starpu_omp_get_nested(void) +{ + const struct starpu_omp_region * const parallel_region = _starpu_omp_get_task()->owner_region; + return parallel_region->icvs.nest_var; +} + +int starpu_omp_get_cancellation(void) +{ + return _starpu_omp_global_state->icvs.cancel_var; +} + +void starpu_omp_set_schedule(enum starpu_omp_sched_value kind, int modifier) +{ + struct starpu_omp_region * const parallel_region = _starpu_omp_get_task()->owner_region; + STARPU_ASSERT(kind == starpu_omp_sched_static + || kind == starpu_omp_sched_dynamic + || kind == starpu_omp_sched_guided + || kind == starpu_omp_sched_auto); + STARPU_ASSERT(modifier >= 0); + parallel_region->icvs.run_sched_var = kind; + parallel_region->icvs.run_sched_chunk_var = (unsigned long long)modifier; +} + +void starpu_omp_get_schedule(enum starpu_omp_sched_value *kind, int *modifier) +{ + const struct starpu_omp_region * const parallel_region = _starpu_omp_get_task()->owner_region; + *kind = parallel_region->icvs.run_sched_var; + *modifier = (int)parallel_region->icvs.run_sched_chunk_var; +} + +int starpu_omp_get_thread_limit(void) +{ + return starpu_cpu_worker_get_count(); +} + +void starpu_omp_set_max_active_levels(int max_levels) +{ + struct starpu_omp_device * const device = _starpu_omp_get_task()->owner_region->owner_device; + if (max_levels > 1) + { + /* TODO: nested parallelism not supported for now */ + max_levels = 1; + } + device->icvs.max_active_levels_var = max_levels; +} + +int starpu_omp_get_max_active_levels(void) +{ + const struct starpu_omp_device * const device = _starpu_omp_get_task()->owner_region->owner_device; + return device->icvs.max_active_levels_var; +} + +int starpu_omp_get_level(void) +{ + const struct starpu_omp_region * const parallel_region = _starpu_omp_get_task()->owner_region; + return parallel_region->icvs.levels_var; +} + +int starpu_omp_get_ancestor_thread_num(int level) +{ + struct starpu_omp_region *parallel_region; + + if (level == 0) + return 0; + + parallel_region = _starpu_omp_get_region_at_level(level); + if (!parallel_region) + return -1; + + return _starpu_omp_get_region_thread_num(parallel_region); +} + +int starpu_omp_get_team_size(int level) +{ + struct starpu_omp_region *parallel_region; + + if (level == 0) + return 1; + + parallel_region = _starpu_omp_get_region_at_level(level); + if (!parallel_region) + return -1; + + return parallel_region->nb_threads; +} + +int starpu_omp_get_active_level(void) +{ + const struct starpu_omp_region * const parallel_region = _starpu_omp_get_task()->owner_region; + return parallel_region->icvs.active_levels_var; +} + +int starpu_omp_in_final(void) +{ + const struct starpu_omp_task *task = _starpu_omp_get_task(); + return task->flags & STARPU_OMP_TASK_FLAGS_FINAL; +} + +enum starpu_omp_proc_bind_value starpu_omp_get_proc_bind(void) +{ + const struct starpu_omp_region * const parallel_region = _starpu_omp_get_task()->owner_region; + int proc_bind = parallel_region->icvs.bind_var[0]; + return proc_bind; +} + +int starpu_omp_get_num_places(void) +{ + struct starpu_omp_place *places = &_starpu_omp_initial_icv_values->places; + return places->nb_numeric_places; +} + +int starpu_omp_get_place_num_procs(int place_num) +{ + (void) place_num; + /* TODO */ + return 0; +} + +void starpu_omp_get_place_proc_ids(int place_num, int *ids) +{ + (void) place_num; + (void) ids; + /* TODO */ +} + +int starpu_omp_get_place_num(void) +{ + /* TODO */ + return -1; +} + +int starpu_omp_get_partition_num_places(void) +{ + /* TODO */ + return 0; +} + +void starpu_omp_get_partition_place_nums(int *place_nums) +{ + (void) place_nums; + /* TODO */ +} + +void starpu_omp_set_default_device(int device_num) +{ + (void) device_num; + /* TODO: set_default_device not supported for now */ +} + +int starpu_omp_get_default_device(void) +{ + const struct starpu_omp_region * const parallel_region = _starpu_omp_get_task()->owner_region; + return parallel_region->icvs.default_device_var; +} + +int starpu_omp_get_num_devices(void) +{ + /* TODO: get_num_devices not supported for now + * assume 1 device */ + return 1; +} + +int starpu_omp_get_num_teams(void) +{ + /* TODO: num_teams not supported for now + * assume 1 team */ + return 1; +} + +int starpu_omp_get_team_num(void) +{ + /* TODO: team_num not supported for now + * assume team_num 0 */ + return 0; +} + +int starpu_omp_is_initial_device(void) +{ + struct starpu_omp_task *task = _starpu_omp_get_task(); + if (!task) + return 0; + const struct starpu_omp_device * const device = task->owner_region->owner_device; + return device == _starpu_omp_global_state->initial_device; +} + +int starpu_omp_get_initial_device(void) +{ + /* Assume only one device for now. */ + return 0; +} + +int starpu_omp_get_max_task_priority(void) +{ + const struct starpu_omp_region * const parallel_region = _starpu_omp_get_task()->owner_region; + return parallel_region->icvs.max_task_priority_var; +} + +double starpu_omp_get_wtime(void) +{ + return 1e-6 * (starpu_timing_now() - _starpu_omp_clock_ref); +} + +double starpu_omp_get_wtick(void) +{ + /* arbitrary precision value */ + return 1e-6; +} +#endif /* STARPU_OPENMP */ diff --git a/src/util/starpu_create_sync_task.c b/src/util/starpu_create_sync_task.c new file mode 100644 index 0000000..a84a080 --- /dev/null +++ b/src/util/starpu_create_sync_task.c @@ -0,0 +1,55 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include + +void starpu_create_sync_task(starpu_tag_t sync_tag, unsigned ndeps, starpu_tag_t *deps, void (*callback)(void *), void *callback_arg) +{ + starpu_tag_declare_deps_array(sync_tag, ndeps, deps); + + /* We create an empty task */ + struct starpu_task *sync_task = starpu_task_create(); + sync_task->name = "create_sync_task"; + + sync_task->use_tag = 1; + sync_task->tag_id = sync_tag; + + sync_task->callback_func = callback; + sync_task->callback_arg = callback_arg; + + /* This task does nothing */ + sync_task->cl = NULL; + + int sync_ret = _starpu_task_submit_internally(sync_task); + STARPU_ASSERT(!sync_ret); +} + +void starpu_create_callback_task(void (*callback)(void *), void *callback_arg) +{ + /* We create an empty task */ + struct starpu_task *empty_task = starpu_task_create(); + empty_task->name = "empty_task"; + empty_task->callback_func = callback; + empty_task->callback_arg = callback_arg; + + /* This task does nothing */ + empty_task->cl = NULL; + + int ret = _starpu_task_submit_internally(empty_task); + STARPU_ASSERT(!ret); +} diff --git a/src/util/starpu_data_cpy.c b/src/util/starpu_data_cpy.c new file mode 100644 index 0000000..90f9cf7 --- /dev/null +++ b/src/util/starpu_data_cpy.c @@ -0,0 +1,213 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Thibaut Lambert + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include +#include +#include + +static void common_data_cpy_func(void *descr[], void *cl_arg) +{ + unsigned interface_id = *(unsigned *)cl_arg; + + const struct starpu_data_interface_ops *interface_ops = _starpu_data_interface_get_ops(interface_id); + const struct starpu_data_copy_methods *copy_methods = interface_ops->copy_methods; + + int workerid = starpu_worker_get_id_check(); + enum starpu_worker_archtype type = starpu_worker_get_type(workerid); + unsigned memory_node = starpu_worker_get_memory_node(workerid); + + void *dst_interface = descr[0]; + void *src_interface = descr[1]; + + /* Driver porters: adding your driver here is very optional, any_to_any will be enough. */ + + switch (type) + { + case STARPU_CPU_WORKER: + if (copy_methods->ram_to_ram) + { + copy_methods->ram_to_ram(src_interface, memory_node, dst_interface, memory_node); + return; + } + break; +#ifdef STARPU_USE_CUDA + case STARPU_CUDA_WORKER: + { + cudaStream_t stream = starpu_cuda_get_local_stream(); + if (copy_methods->cuda_to_cuda_async) + { + copy_methods->cuda_to_cuda_async(src_interface, memory_node, dst_interface, memory_node, stream); + return; + } + else if (copy_methods->cuda_to_cuda) + { + copy_methods->cuda_to_cuda(src_interface, memory_node, dst_interface, memory_node); + return; + } + break; + } +#endif +#ifdef STARPU_USE_HIP + case STARPU_HIP_WORKER: + { + hipStream_t stream = starpu_hip_get_local_stream(); + if (copy_methods->hip_to_hip_async) + { + copy_methods->hip_to_hip_async(src_interface, memory_node, dst_interface, memory_node, stream); + return; + } + else if (copy_methods->hip_to_hip) + { + copy_methods->hip_to_hip(src_interface, memory_node, dst_interface, memory_node); + return; + } + break; + } +#endif + case STARPU_OPENCL_WORKER: + if (copy_methods->opencl_to_opencl) + { + copy_methods->opencl_to_opencl(src_interface, memory_node, dst_interface, memory_node); + return; + } + break; + default: + /* unknown architecture */ + STARPU_ABORT(); + } + STARPU_ASSERT(copy_methods->any_to_any); + copy_methods->any_to_any(src_interface, memory_node, dst_interface, memory_node, NULL); + +} + +static struct starpu_perfmodel copy_model = +{ + .type = STARPU_HISTORY_BASED, + .symbol = "starpu_data_cpy" +}; + +static struct starpu_codelet copy_cl = +{ + .where = STARPU_CPU|STARPU_CUDA|STARPU_HIP|STARPU_OPENCL, + .cpu_funcs = {common_data_cpy_func}, + .cuda_funcs = {common_data_cpy_func}, + .opencl_funcs = {common_data_cpy_func}, + .hip_funcs = {common_data_cpy_func}, + .nbuffers = 2, + .modes = {STARPU_W, STARPU_R}, + .model = ©_model +}; + +int _starpu_data_cpy(starpu_data_handle_t dst_handle, starpu_data_handle_t src_handle, + int asynchronous, void (*callback_func)(void*), void *callback_arg, + int reduction, struct starpu_task *reduction_dep_task, int priority) +{ + if (dst_handle == src_handle) + { + if (callback_func) + callback_func(callback_arg); + return 0; + } + + struct starpu_task *task = starpu_task_create(); + STARPU_ASSERT(task); + task->name = "data_cpy"; + + struct _starpu_job *j = _starpu_get_job_associated_to_task(task); + if (reduction) + { + j->reduction_task = reduction; + if (reduction_dep_task) + starpu_task_declare_deps_array(task, 1, &reduction_dep_task); + } + + task->cl = ©_cl; + + STARPU_ASSERT(dst_handle->ops->interfaceid == src_handle->ops->interfaceid); + unsigned *interface_id; + _STARPU_MALLOC(interface_id, sizeof(*interface_id)); + *interface_id = dst_handle->ops->interfaceid; + task->cl_arg = interface_id; + task->cl_arg_size = sizeof(*interface_id); + task->cl_arg_free = 1; + task->priority = priority; + task->callback_func = callback_func; + task->callback_arg = callback_arg; + + /* FIXME: priority!! */ + STARPU_TASK_SET_HANDLE(task, dst_handle, 0); + STARPU_TASK_SET_HANDLE(task, src_handle, 1); + + task->synchronous = !asynchronous; + + int ret = _starpu_task_submit_internally(task); + STARPU_ASSERT_MSG(ret != -ENODEV, "Implementation of _starpu_data_cpy is needed for this only available architecture\n"); + STARPU_ASSERT_MSG(!ret, "Task data copy failed with code: %d\n", ret); + + return 0; +} + +int starpu_data_cpy(starpu_data_handle_t dst_handle, starpu_data_handle_t src_handle, + int asynchronous, void (*callback_func)(void*), void *callback_arg) +{ + return _starpu_data_cpy(dst_handle, src_handle, asynchronous, callback_func, callback_arg, 0, NULL, STARPU_DEFAULT_PRIO); +} + +int starpu_data_cpy_priority(starpu_data_handle_t dst_handle, starpu_data_handle_t src_handle, + int asynchronous, void (*callback_func)(void*), void *callback_arg, int priority) +{ + return _starpu_data_cpy(dst_handle, src_handle, asynchronous, callback_func, callback_arg, 0, NULL, priority); +} + +/* TODO: implement copy on write, and introduce starpu_data_dup as well */ +int starpu_data_dup_ro(starpu_data_handle_t *dst_handle, starpu_data_handle_t src_handle, int asynchronous) +{ + _starpu_spin_lock(&src_handle->header_lock); + if (src_handle->readonly_dup) + { + /* Already a ro duplicate, just return it with one more ref */ + *dst_handle = src_handle->readonly_dup; + _starpu_spin_unlock(&src_handle->header_lock); + _starpu_spin_lock(&(*dst_handle)->header_lock); + (*dst_handle)->aliases++; + _starpu_spin_unlock(&(*dst_handle)->header_lock); + return 0; + } + if (src_handle->readonly) + { + src_handle->aliases++; + _starpu_spin_unlock(&src_handle->header_lock); + *dst_handle = src_handle; + return 0; + } + _starpu_spin_unlock(&src_handle->header_lock); + + starpu_data_register_same(dst_handle, src_handle); + _starpu_data_cpy(*dst_handle, src_handle, asynchronous, NULL, NULL, 0, NULL, STARPU_DEFAULT_PRIO); + (*dst_handle)->readonly = 1; + + _starpu_spin_lock(&src_handle->header_lock); + src_handle->readonly_dup = (*dst_handle); + (*dst_handle)->readonly_dup_of = src_handle; + _starpu_spin_unlock(&src_handle->header_lock); + + return 0; +} diff --git a/src/util/starpu_data_cpy.h b/src/util/starpu_data_cpy.h new file mode 100644 index 0000000..38c7a06 --- /dev/null +++ b/src/util/starpu_data_cpy.h @@ -0,0 +1,33 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __STARPU_DATA_CPY_H__ +#define __STARPU_DATA_CPY_H__ + +/** @file */ + +#include + +#pragma GCC visibility push(hidden) + +int _starpu_data_cpy(starpu_data_handle_t dst_handle, starpu_data_handle_t src_handle, + int asynchronous, void (*callback_func)(void*), void *callback_arg, + int reduction, struct starpu_task *reduction_dep_task, int priority); + +#pragma GCC visibility pop + +#endif // __STARPU_DATA_CPY_H__ + diff --git a/src/util/starpu_task_insert.c b/src/util/starpu_task_insert.c new file mode 100644 index 0000000..8c7a3ed --- /dev/null +++ b/src/util/starpu_task_insert.c @@ -0,0 +1,226 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* This file provides an interface that is very similar to that of the Quark + * scheduler from the PLASMA project (see http://icl.cs.utk.edu/plasma/). */ + +#include +#include +#include +#include + +void starpu_codelet_pack_args(void **arg_buffer, size_t *arg_buffer_size, ...) +{ + struct starpu_codelet_pack_arg_data state; + va_list varg_list; + int arg_type; + + starpu_codelet_pack_arg_init(&state); + + va_start(varg_list, arg_buffer_size); + while((arg_type = va_arg(varg_list, int)) != 0) + { + if (arg_type==STARPU_VALUE) + { + /* We have a constant value: this should be followed by a pointer to the cst value and the size of the constant */ + void *ptr = va_arg(varg_list, void *); + size_t ptr_size = va_arg(varg_list, size_t); + + starpu_codelet_pack_arg(&state, ptr, ptr_size); + } + else + { + STARPU_ABORT_MSG("Unrecognized argument %d, did you perhaps forget to end arguments with 0?\n", arg_type); + } + } + va_end(varg_list); + + starpu_codelet_pack_arg_fini(&state, arg_buffer, arg_buffer_size); +} + +void _starpu_codelet_unpack_args_and_copyleft(char *cl_arg, void *_buffer, size_t buffer_size, va_list varg_list) +{ + size_t current_arg_offset = 0; + int nargs, arg; + + /* We fill the different pointers with the appropriate arguments */ + memcpy(&nargs, cl_arg, sizeof(nargs)); + current_arg_offset += sizeof(nargs); + + for (arg = 0; arg < nargs; arg++) + { + void *argptr = va_arg(varg_list, void *); + + /* If not reading all cl_args */ + // NULL was the initial end marker, we now use 0 + // 0 and NULL should be the same value, but we + // keep both equalities for systems on which they could be different + // cppcheck-suppress duplicateExpression + if(argptr == 0 || argptr == NULL) + break; + + size_t arg_size; + memcpy(&arg_size, cl_arg+current_arg_offset, sizeof(arg_size)); + current_arg_offset += sizeof(arg_size); + + memcpy(argptr, cl_arg+current_arg_offset, arg_size); + current_arg_offset += arg_size; + } + + if (buffer_size) + { + int left = nargs-arg; + char *buffer = (char *) _buffer; + int current_buffer_offset = 0; + memcpy(buffer, (int *)&left, sizeof(left)); + current_buffer_offset += sizeof(left); + for (; arg < nargs; arg++) + { + size_t arg_size; + memcpy(&arg_size, cl_arg+current_arg_offset, sizeof(arg_size)); + current_arg_offset += sizeof(arg_size); + memcpy(buffer+current_buffer_offset, &arg_size, sizeof(arg_size)); + current_buffer_offset += sizeof(arg_size); + + memcpy(buffer+current_buffer_offset, cl_arg+current_arg_offset, arg_size); + current_arg_offset += arg_size; + current_buffer_offset += arg_size; + } + } +} + +void starpu_codelet_unpack_args_and_copyleft(void *_cl_arg, void *buffer, size_t buffer_size, ...) +{ + char *cl_arg = (char *) _cl_arg; + va_list varg_list; + + STARPU_ASSERT(cl_arg); + va_start(varg_list, buffer_size); + + _starpu_codelet_unpack_args_and_copyleft(cl_arg, buffer, buffer_size, varg_list); + + va_end(varg_list); +} + +void starpu_codelet_unpack_args(void *_cl_arg, ...) +{ + char *cl_arg = (char *) _cl_arg; + va_list varg_list; + + STARPU_ASSERT(cl_arg); + va_start(varg_list, _cl_arg); + + _starpu_codelet_unpack_args_and_copyleft(cl_arg, NULL, 0, varg_list); + + va_end(varg_list); +} + +static +struct starpu_task *_starpu_task_build_v(struct starpu_task *ptask, struct starpu_codelet *cl, const char* task_name, int cl_arg_free, va_list varg_list) +{ + va_list varg_list_copy; + int ret; + + struct starpu_task *task = ptask ? ptask : starpu_task_create(); + task->name = task_name ? task_name : task->name; + task->cl_arg_free = cl_arg_free; + + va_copy(varg_list_copy, varg_list); + ret = _starpu_task_insert_create(cl, task, varg_list_copy); + va_end(varg_list_copy); + + if (ret != 0) + { + task->destroy = 0; + starpu_task_destroy(task); + } + return (ret == 0) ? task : NULL; +} + +#undef starpu_task_submit +int _starpu_task_insert_v(struct starpu_codelet *cl, va_list varg_list) +{ + struct starpu_task *task; + int ret; + + task = _starpu_task_build_v(NULL, cl, NULL, 1, varg_list); + ret = starpu_task_submit(task); + + if (STARPU_UNLIKELY(ret == -ENODEV)) + { + _STARPU_MSG("submission of task %p with codelet %p failed (symbol `%s') (err: ENODEV)\n", + task, task->cl, + (cl == NULL) ? "none" : + task->cl->name ? task->cl->name : + (task->cl->model && task->cl->model->symbol)?task->cl->model->symbol:"none"); + + task->destroy = 0; + starpu_task_destroy(task); + } + return ret; +} + +#undef starpu_task_set +int starpu_task_set(struct starpu_task *task, struct starpu_codelet *cl, ...) +{ + va_list varg_list; + + va_start(varg_list, cl); + _starpu_task_build_v(task, cl, NULL, 1, varg_list); + va_end(varg_list); + return 0; +} + +#undef starpu_task_insert +int starpu_task_insert(struct starpu_codelet *cl, ...) +{ + va_list varg_list; + int ret; + + va_start(varg_list, cl); + ret = _starpu_task_insert_v(cl, varg_list); + va_end(varg_list); + return ret; +} + +#undef starpu_insert_task +int starpu_insert_task(struct starpu_codelet *cl, ...) +{ + va_list varg_list; + int ret; + + va_start(varg_list, cl); + ret = _starpu_task_insert_v(cl, varg_list); + va_end(varg_list); + return ret; +} + +#undef starpu_task_build +struct starpu_task *starpu_task_build(struct starpu_codelet *cl, ...) +{ + struct starpu_task *task; + va_list varg_list; + + va_start(varg_list, cl); + task = _starpu_task_build_v(NULL, cl, "task_build", 0, varg_list); + if (task && task->cl_arg) + { + task->cl_arg_free = 1; +} + va_end(varg_list); + + return task; +} diff --git a/src/util/starpu_task_insert_utils.c b/src/util/starpu_task_insert_utils.c new file mode 100644 index 0000000..d47bf9b --- /dev/null +++ b/src/util/starpu_task_insert_utils.c @@ -0,0 +1,995 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include + +void starpu_codelet_pack_arg_init(struct starpu_codelet_pack_arg_data *state) +{ + state->arg_buffer = NULL; + state->arg_buffer_size = 0; + state->arg_buffer_used = 0; + state->current_offset = sizeof(int); + state->nargs = 0; +} + +void starpu_codelet_pack_arg(struct starpu_codelet_pack_arg_data *state, const void *ptr, size_t ptr_size) +{ + STARPU_ASSERT_MSG(state->current_offset >= sizeof(int), "struct starpu_codelet_pack_arg has to be initialized with starpu_codelet_pack_arg_init"); + if (state->current_offset + sizeof(ptr_size) + ptr_size > state->arg_buffer_size) + { + if (state->arg_buffer_size == 0) + state->arg_buffer_size = 128 + sizeof(ptr_size) + ptr_size; + else + state->arg_buffer_size = 2 * state->arg_buffer_size + sizeof(ptr_size) + ptr_size; + _STARPU_REALLOC(state->arg_buffer, state->arg_buffer_size); + } + memcpy(state->arg_buffer+state->current_offset, (void *)&ptr_size, sizeof(ptr_size)); + state->current_offset += sizeof(ptr_size); + + memcpy(state->arg_buffer+state->current_offset, ptr, ptr_size); + state->current_offset += ptr_size; + STARPU_ASSERT(state->current_offset <= state->arg_buffer_size); + state->arg_buffer_used = state->current_offset; + state->nargs++; +} + +void starpu_codelet_pack_arg_fini(struct starpu_codelet_pack_arg_data *state, void **cl_arg, size_t *cl_arg_size) +{ + if (state->nargs) + { + memcpy(state->arg_buffer, &state->nargs, sizeof(state->nargs)); + } + else + { + free(state->arg_buffer); + state->arg_buffer = NULL; + } + + *cl_arg = state->arg_buffer; + *cl_arg_size = state->arg_buffer_used; +} + +void starpu_codelet_unpack_arg_init(struct starpu_codelet_pack_arg_data *state, void *cl_arg, size_t cl_arg_size) +{ + state->arg_buffer = cl_arg; + state->arg_buffer_size = cl_arg_size; + state->arg_buffer_used = cl_arg_size; + state->current_offset = sizeof(int); + state->nargs = 0; +} + +void starpu_codelet_unpack_arg(struct starpu_codelet_pack_arg_data *state, void *ptr, size_t size) +{ + size_t ptr_size; + STARPU_ASSERT_MSG(state->current_offset + sizeof(size) <= state->arg_buffer_size, "The unpack brings offset %ld beyond the buffer size (%ld)\n", state->current_offset, (long)state->arg_buffer_size); + memcpy((void *)&ptr_size, state->arg_buffer+state->current_offset, sizeof(ptr_size)); + STARPU_ASSERT_MSG(ptr_size==size, "The given size (%ld) is not the size of the next argument (%ld)\n", size, ptr_size); + state->current_offset += sizeof(size); + + STARPU_ASSERT_MSG(state->current_offset + size <= state->arg_buffer_size, "The recorded size (%ld) brings beyond the buffer size (%ld)\n", (long)size, (long)state->arg_buffer_size); + memcpy(ptr, state->arg_buffer+state->current_offset, ptr_size); + state->current_offset += size; + + state->nargs++; +} + +void starpu_codelet_dup_arg(struct starpu_codelet_pack_arg_data *state, void **ptr, size_t *size) +{ + STARPU_ASSERT_MSG(state->current_offset + sizeof(*size) <= state->arg_buffer_size, "The unpack brings offset %ld beyond the buffer size (%ld)\n", state->current_offset, (long)state->arg_buffer_size); + memcpy((void*)size, state->arg_buffer+state->current_offset, sizeof(*size)); + state->current_offset += sizeof(*size); + + STARPU_ASSERT_MSG(state->current_offset + *size <= state->arg_buffer_size, "The recorded size (%ld) brings beyond the buffer size (%ld)\n", *size, (long)state->arg_buffer_size); + _STARPU_MALLOC(*ptr, *size); + memcpy(*ptr, state->arg_buffer+state->current_offset, *size); + state->current_offset += *size; + + state->nargs++; +} + +void starpu_codelet_pick_arg(struct starpu_codelet_pack_arg_data *state, void **ptr, size_t *size) +{ + STARPU_ASSERT_MSG(state->current_offset + sizeof(*size) <= state->arg_buffer_size, "The unpack brings offset %ld beyond the buffer size (%ld)\n", state->current_offset, (long)state->arg_buffer_size); + memcpy((void*)size, state->arg_buffer+state->current_offset, sizeof(*size)); + state->current_offset += sizeof(*size); + + STARPU_ASSERT_MSG(state->current_offset + *size <= state->arg_buffer_size, "The recorded size (%ld) brings beyond the buffer size (%ld)\n", (long)(*size), (long)state->arg_buffer_size); + *ptr = state->arg_buffer+state->current_offset; + state->current_offset += *size; + + state->nargs++; +} + +void starpu_codelet_unpack_arg_fini(struct starpu_codelet_pack_arg_data *state) +{ + if (state->current_offset < state->arg_buffer_size) + { + _STARPU_MSG("Arguments still need to be unpacked from the starpu_codelet_pack_arg_data (offset %ld - buffer_size %ld)\n", state->current_offset, (long)state->arg_buffer_size); + } +} + +void starpu_codelet_unpack_discard_arg(struct starpu_codelet_pack_arg_data *state) +{ + size_t ptr_size; + memcpy((void *)&ptr_size, state->arg_buffer+state->current_offset, sizeof(ptr_size)); + + state->current_offset += sizeof(ptr_size); + state->current_offset += ptr_size; + + state->nargs++; +} + +void starpu_task_insert_data_make_room(struct starpu_codelet *cl, struct starpu_task *task, int *allocated_buffers, int current_buffer, int room) +{ + if (current_buffer + room > STARPU_NMAXBUFS) + { + if (*allocated_buffers == 0) + { + int i; + struct starpu_codelet *cl2 = task->cl; + *allocated_buffers = (current_buffer + room) * 2; + _STARPU_MALLOC(task->dyn_handles, *allocated_buffers * sizeof(starpu_data_handle_t)); + for(i=0 ; idyn_handles[i] = task->handles[i]; + } + if (cl2->nbuffers == STARPU_VARIABLE_NBUFFERS || !cl2->dyn_modes) + { + _STARPU_MALLOC(task->dyn_modes, *allocated_buffers * sizeof(enum starpu_data_access_mode)); + for(i=0 ; idyn_modes[i] = task->modes[i]; + } + } + } + else if (current_buffer + room > *allocated_buffers) + { + *allocated_buffers = (current_buffer + room) * 2; + _STARPU_REALLOC(task->dyn_handles, *allocated_buffers * sizeof(starpu_data_handle_t)); + if (cl->nbuffers == STARPU_VARIABLE_NBUFFERS || !cl->dyn_modes) + { + _STARPU_REALLOC(task->dyn_modes, *allocated_buffers * sizeof(enum starpu_data_access_mode)); + } + } + } +} + +void starpu_task_insert_data_process_arg(struct starpu_codelet *cl, struct starpu_task *task, int *allocated_buffers, int *current_buffer, int arg_type, starpu_data_handle_t handle) +{ + STARPU_ASSERT(cl != NULL); + STARPU_ASSERT_MSG(cl->nbuffers == STARPU_VARIABLE_NBUFFERS || *current_buffer < cl->nbuffers, "Too many data passed to starpu_task_insert"); + + starpu_task_insert_data_make_room(cl, task, allocated_buffers, *current_buffer, 1); + STARPU_TASK_SET_HANDLE(task, handle, *current_buffer); + + enum starpu_data_access_mode arg_mode = (enum starpu_data_access_mode) arg_type & ~STARPU_SSEND & ~STARPU_NOFOOTPRINT; + + /* MPI_REDUX should be interpreted as RW|COMMUTE by the "ground" StarPU layer.*/ + if (arg_mode & STARPU_MPI_REDUX) + { + arg_mode = STARPU_RW|STARPU_COMMUTE; + } + if (cl->nbuffers == STARPU_VARIABLE_NBUFFERS || (cl->nbuffers > STARPU_NMAXBUFS && !cl->dyn_modes)) + { + STARPU_TASK_SET_MODE(task, arg_mode,* current_buffer); + } + else if (STARPU_CODELET_GET_MODE(cl, *current_buffer)) + { + STARPU_ASSERT_MSG((STARPU_CODELET_GET_MODE(cl, *current_buffer) & ~STARPU_NOFOOTPRINT) == arg_mode, + "The codelet <%s> defines the access mode %d for the buffer %d which is different from the mode %d given to starpu_task_insert\n", + _starpu_codelet_get_name(cl), STARPU_CODELET_GET_MODE(cl, *current_buffer), + *current_buffer, arg_mode); + } + else + { +#ifdef STARPU_DEVEL +# warning shall we print a warning to the user + /* Morse uses it to avoid having to set it in the codelet structure */ +#endif + STARPU_CODELET_SET_MODE(cl, arg_mode, *current_buffer); + } + + (*current_buffer)++; +} + +void starpu_task_insert_data_process_array_arg(struct starpu_codelet *cl, struct starpu_task *task, int *allocated_buffers, int *current_buffer, int nb_handles, starpu_data_handle_t *handles) +{ + STARPU_ASSERT(cl != NULL); + + starpu_task_insert_data_make_room(cl, task, allocated_buffers, *current_buffer, nb_handles); + + int i; + for(i=0 ; inbuffers == STARPU_VARIABLE_NBUFFERS || *current_buffer < cl->nbuffers, "Too many data passed to starpu_task_insert"); + STARPU_TASK_SET_HANDLE(task, descrs[i].handle, *current_buffer); + if (task->dyn_modes) + { + task->dyn_modes[*current_buffer] = descrs[i].mode; + } + else if (cl->nbuffers == STARPU_VARIABLE_NBUFFERS || (cl->nbuffers > STARPU_NMAXBUFS && !cl->dyn_modes)) + STARPU_TASK_SET_MODE(task, descrs[i].mode, *current_buffer); + else if (STARPU_CODELET_GET_MODE(cl, *current_buffer)) + { + STARPU_ASSERT_MSG(STARPU_CODELET_GET_MODE(cl, *current_buffer) == descrs[i].mode, + "The codelet <%s> defines the access mode %d for the buffer %d which is different from the mode %d given to starpu_task_insert\n", + _starpu_codelet_get_name(cl), STARPU_CODELET_GET_MODE(cl, *current_buffer), + *current_buffer, descrs[i].mode); + } + else + { + STARPU_CODELET_SET_MODE(cl, descrs[i].mode, *current_buffer); + } + + (*current_buffer)++; + } + +} + +int _starpu_task_insert_create(struct starpu_codelet *cl, struct starpu_task *task, va_list varg_list) +{ + int arg_type; + int current_buffer; + int allocated_buffers = 0; + unsigned ndeps = 0; + unsigned nend_deps = 0; + struct starpu_task **task_deps_array = NULL; + struct starpu_task **task_end_deps_array = NULL; + + _STARPU_TRACE_TASK_BUILD_START(); + + task->cl = cl; + current_buffer = 0; + + struct starpu_codelet_pack_arg_data state; + starpu_codelet_pack_arg_init(&state); + + while((arg_type = va_arg(varg_list, int)) != 0) + { + if (arg_type & STARPU_R || arg_type & STARPU_W || arg_type & STARPU_SCRATCH || arg_type & STARPU_REDUX || arg_type & STARPU_MPI_REDUX) + { + /* We have an access mode : we expect to find a handle */ + starpu_data_handle_t handle = va_arg(varg_list, starpu_data_handle_t); + starpu_task_insert_data_process_arg(cl, task, &allocated_buffers, ¤t_buffer, arg_type, handle); + } + else if (arg_type == STARPU_DATA_ARRAY) + { + // Expect to find a array of handles and its size + starpu_data_handle_t *handles = va_arg(varg_list, starpu_data_handle_t *); + int nb_handles = va_arg(varg_list, int); + starpu_task_insert_data_process_array_arg(cl, task, &allocated_buffers, ¤t_buffer, nb_handles, handles); + } + else if (arg_type==STARPU_DATA_MODE_ARRAY) + { + // Expect to find a array of descr and its size + struct starpu_data_descr *descrs = va_arg(varg_list, struct starpu_data_descr *); + int nb_descrs = va_arg(varg_list, int); + starpu_task_insert_data_process_mode_array_arg(cl, task, &allocated_buffers, ¤t_buffer, nb_descrs, descrs); + } + else if (arg_type==STARPU_VALUE) + { + void *ptr = va_arg(varg_list, void *); + size_t ptr_size = va_arg(varg_list, size_t); + starpu_codelet_pack_arg(&state, ptr, ptr_size); + } + else if (arg_type==STARPU_CL_ARGS) + { + task->cl_arg = va_arg(varg_list, void *); + task->cl_arg_size = va_arg(varg_list, size_t); + task->cl_arg_free = 1; + } + else if (arg_type==STARPU_CL_ARGS_NFREE) + { + task->cl_arg = va_arg(varg_list, void *); + task->cl_arg_size = va_arg(varg_list, size_t); + task->cl_arg_free = 0; + } + else if (arg_type==STARPU_TASK_DEPS_ARRAY) + { + STARPU_ASSERT_MSG(task_deps_array == NULL, "Parameter 'STARPU_TASK_DEPS_ARRAY' passed twice not supported yet"); + ndeps = va_arg(varg_list, unsigned); + task_deps_array = va_arg(varg_list, struct starpu_task **); + } + else if (arg_type==STARPU_TASK_END_DEPS_ARRAY) + { + STARPU_ASSERT_MSG(task_end_deps_array == NULL, "Parameter 'STARPU_TASK_END_DEPS_ARRAY' passed twice not supported yet"); + nend_deps = va_arg(varg_list, unsigned); + task_end_deps_array = va_arg(varg_list, struct starpu_task **); + } + else if (arg_type==STARPU_CALLBACK) + { + task->callback_func = va_arg(varg_list, _starpu_callback_func_t); + } + else if (arg_type==STARPU_CALLBACK_WITH_ARG) + { + task->callback_func = va_arg(varg_list, _starpu_callback_func_t); + task->callback_arg = va_arg(varg_list, void *); + task->callback_arg_free = 1; + } + else if (arg_type==STARPU_CALLBACK_WITH_ARG_NFREE) + { + task->callback_func = va_arg(varg_list, _starpu_callback_func_t); + task->callback_arg = va_arg(varg_list, void *); + task->callback_arg_free = 0; + } + else if (arg_type==STARPU_CALLBACK_ARG) + { + task->callback_arg = va_arg(varg_list, void *); + task->callback_arg_free = 1; + } + else if (arg_type==STARPU_CALLBACK_ARG_NFREE) + { + task->callback_arg = va_arg(varg_list, void *); + task->callback_arg_free = 0; + } + else if (arg_type==STARPU_EPILOGUE_CALLBACK) + { + task->epilogue_callback_func = va_arg(varg_list, _starpu_callback_func_t); + } + else if (arg_type==STARPU_EPILOGUE_CALLBACK_ARG) + { + task->epilogue_callback_arg = va_arg(varg_list, void *); + task->epilogue_callback_arg_free = 1; + } + else if (arg_type==STARPU_PROLOGUE_CALLBACK) + { + task->prologue_callback_func = va_arg(varg_list, _starpu_callback_func_t); + } + else if (arg_type==STARPU_PROLOGUE_CALLBACK_ARG) + { + task->prologue_callback_arg = va_arg(varg_list, void *); + task->prologue_callback_arg_free = 1; + } + else if (arg_type==STARPU_PROLOGUE_CALLBACK_ARG_NFREE) + { + task->prologue_callback_arg = va_arg(varg_list, void *); + task->prologue_callback_arg_free = 0; + } + else if (arg_type==STARPU_PROLOGUE_CALLBACK_POP) + { + task->prologue_callback_pop_func = va_arg(varg_list, _starpu_callback_func_t); + } + else if (arg_type==STARPU_PROLOGUE_CALLBACK_POP_ARG) + { + task->prologue_callback_pop_arg = va_arg(varg_list, void *); + task->prologue_callback_pop_arg_free = 1; + } + else if (arg_type==STARPU_PROLOGUE_CALLBACK_POP_ARG_NFREE) + { + task->prologue_callback_pop_arg = va_arg(varg_list, void *); + task->prologue_callback_pop_arg_free = 0; + } + else if (arg_type==STARPU_PRIORITY) + { + /* Followed by a priority level */ + int prio = va_arg(varg_list, int); + task->priority = prio; + } + else if (arg_type==STARPU_EXECUTE_ON_NODE) + { + (void)va_arg(varg_list, int); + } + else if (arg_type==STARPU_EXECUTE_ON_DATA) + { + (void)va_arg(varg_list, starpu_data_handle_t); + } + else if (arg_type==STARPU_EXECUTE_WHERE) + { + task->where = va_arg(varg_list, unsigned long long); + } + else if (arg_type==STARPU_EXECUTE_ON_WORKER) + { + int worker = va_arg(varg_list, int); + if (worker != -1) + { + task->workerid = worker; + task->execute_on_a_specific_worker = 1; + } + } + else if (arg_type==STARPU_WORKER_ORDER) + { + unsigned order = va_arg(varg_list, unsigned); + if (order != 0) + { + STARPU_ASSERT_MSG(task->execute_on_a_specific_worker, "worker order only makes sense if a workerid is provided"); + task->workerorder = order; + } + } + else if (arg_type==STARPU_SCHED_CTX) + { + unsigned sched_ctx = va_arg(varg_list, unsigned); + task->sched_ctx = sched_ctx; + } + else if (arg_type==STARPU_HYPERVISOR_TAG) + { + int hypervisor_tag = va_arg(varg_list, int); + task->hypervisor_tag = hypervisor_tag; + } + else if (arg_type==STARPU_POSSIBLY_PARALLEL) + { + unsigned possibly_parallel = va_arg(varg_list, unsigned); + task->possibly_parallel = possibly_parallel; + } + else if (arg_type==STARPU_FLOPS) + { + double flops = va_arg(varg_list, double); + task->flops = flops; + } + else if (arg_type==STARPU_TAG) + { + starpu_tag_t tag = va_arg(varg_list, starpu_tag_t); + task->tag_id = tag; + task->use_tag = 1; + } + else if (arg_type==STARPU_TAG_ONLY) + { + starpu_tag_t tag = va_arg(varg_list, starpu_tag_t); + task->tag_id = tag; + } + else if (arg_type==STARPU_NAME) + { + const char *name = va_arg(varg_list, const char *); + task->name = name; + } + else if (arg_type==STARPU_NODE_SELECTION_POLICY) + { + (void)va_arg(varg_list, int); + } + else if (arg_type==STARPU_TASK_COLOR) + { + task->color = va_arg(varg_list, int); + } + else if (arg_type==STARPU_TASK_SYNCHRONOUS) + { + task->synchronous = va_arg(varg_list, int); + } + else if (arg_type==STARPU_HANDLES_SEQUENTIAL_CONSISTENCY) + { + task->handles_sequential_consistency = va_arg(varg_list, unsigned char *); + } +#ifdef STARPU_BUBBLE + else if (arg_type==STARPU_BUBBLE_FUNC) + { + task->bubble_func = va_arg(varg_list, starpu_bubble_func_t); + } + else if (arg_type==STARPU_BUBBLE_FUNC_ARG) + { + task->bubble_func_arg = va_arg(varg_list, void*); + } + else if (arg_type==STARPU_BUBBLE_GEN_DAG_FUNC) + { + task->bubble_gen_dag_func = va_arg(varg_list, starpu_bubble_gen_dag_func_t); + } + else if (arg_type==STARPU_BUBBLE_GEN_DAG_FUNC_ARG) + { + task->bubble_gen_dag_func_arg = va_arg(varg_list,void*); + } + else if (arg_type==STARPU_BUBBLE_PARENT) + { + struct starpu_task *parent = va_arg(varg_list, struct starpu_task *); + if (parent) + { + struct _starpu_job *job = _starpu_get_job_associated_to_task(parent); + task->bubble_parent = job->job_id; + } + } +#endif + else if (arg_type==STARPU_TASK_END_DEP) + { + int end_dep = va_arg(varg_list, int); + starpu_task_end_dep_add(task, end_dep); + } + else if (arg_type==STARPU_TASK_WORKERIDS) + { + task->workerids_len = va_arg(varg_list, unsigned); + task->workerids = va_arg(varg_list, uint32_t*); + } + else if (arg_type==STARPU_SEQUENTIAL_CONSISTENCY) + { + task->sequential_consistency = va_arg(varg_list, unsigned); + } + else if (arg_type==STARPU_TASK_PROFILING_INFO) + { + task->profiling_info = va_arg(varg_list, struct starpu_profiling_task_info *); + } + else if (arg_type==STARPU_TASK_NO_SUBMITORDER) + { + task->no_submitorder = va_arg(varg_list, unsigned); + } + else if (arg_type==STARPU_TASK_SCHED_DATA) + { + task->sched_data = va_arg(varg_list, void *); + } + else if (arg_type==STARPU_TASK_FILE) + { + task->file = va_arg(varg_list, const char *); + } + else if (arg_type==STARPU_TASK_LINE) + { + task->line = va_arg(varg_list, int); + } + else if (arg_type==STARPU_TRANSACTION) + { + STARPU_ASSERT_MSG(task->transaction == NULL, "a transaction has already been set"); + task->transaction = va_arg(varg_list, struct starpu_transaction *); + } + else + { + STARPU_ABORT_MSG("Unrecognized argument %d, did you perhaps forget to end arguments with 0?\n", arg_type); + } + } + + if (cl) + { + if (cl->nbuffers == STARPU_VARIABLE_NBUFFERS) + { + task->nbuffers = current_buffer; + } + else + { + STARPU_ASSERT_MSG(current_buffer == cl->nbuffers, "Incoherent number of buffers between cl (%d) and number of parameters (%d)", cl->nbuffers, current_buffer); + } + } + + if (state.nargs) + { + if (task->cl_arg != NULL) + { + _STARPU_DISP("Parameters STARPU_CL_ARGS and STARPU_VALUE cannot be used in the same call\n"); + free(state.arg_buffer); + return -EINVAL; + } + starpu_codelet_pack_arg_fini(&state, &task->cl_arg, &task->cl_arg_size); + } + + if (task_deps_array) + { + starpu_task_declare_deps_array(task, ndeps, task_deps_array); + } + + if (task_end_deps_array) + { + starpu_task_declare_end_deps_array(task, nend_deps, task_end_deps_array); + } + + _STARPU_TRACE_TASK_BUILD_END(); + return 0; +} + +int _fstarpu_task_insert_create(struct starpu_codelet *cl, struct starpu_task *task, void **arglist) +{ + int arg_i = 0; + int current_buffer = 0; + int allocated_buffers = 0; + unsigned ndeps = 0; + unsigned nend_deps = 0; + struct starpu_task **task_deps_array = NULL; + struct starpu_task **task_end_deps_array = NULL; + + _STARPU_TRACE_TASK_BUILD_START(); + + struct starpu_codelet_pack_arg_data state; + starpu_codelet_pack_arg_init(&state); + + task->cl = cl; + task->name = NULL; + task->cl_arg_free = 1; + while (arglist[arg_i] != NULL) + { + const int arg_type = (int)(intptr_t)arglist[arg_i]; + if (arg_type & STARPU_R + || arg_type & STARPU_W + || arg_type & STARPU_SCRATCH + || arg_type & STARPU_REDUX + || arg_type & STARPU_MPI_REDUX) + { + arg_i++; + starpu_data_handle_t handle = arglist[arg_i]; + starpu_task_insert_data_process_arg(cl, task, &allocated_buffers, ¤t_buffer, arg_type, handle); + } + else if (arg_type == STARPU_DATA_ARRAY) + { + arg_i++; + starpu_data_handle_t *handles = arglist[arg_i]; + arg_i++; + int nb_handles = *(int *)arglist[arg_i]; + starpu_task_insert_data_process_array_arg(cl, task, &allocated_buffers, ¤t_buffer, nb_handles, handles); + } + else if (arg_type == STARPU_DATA_MODE_ARRAY) + { + arg_i++; + struct starpu_data_descr *descrs = arglist[arg_i]; + arg_i++; + int nb_descrs = *(int *)arglist[arg_i]; + starpu_task_insert_data_process_mode_array_arg(cl, task, &allocated_buffers, ¤t_buffer, nb_descrs, descrs); + } + else if (arg_type == STARPU_VALUE) + { + arg_i++; + void *ptr = arglist[arg_i]; + arg_i++; + size_t ptr_size = (size_t)(intptr_t)arglist[arg_i]; + starpu_codelet_pack_arg(&state, ptr, ptr_size); + } + else if (arg_type == STARPU_CL_ARGS) + { + arg_i++; + task->cl_arg = arglist[arg_i]; + arg_i++; + task->cl_arg_size = (size_t)(intptr_t)arglist[arg_i]; + task->cl_arg_free = 1; + } + else if (arg_type == STARPU_CL_ARGS_NFREE) + { + arg_i++; + task->cl_arg = arglist[arg_i]; + arg_i++; + task->cl_arg_size = (size_t)(intptr_t)arglist[arg_i]; + task->cl_arg_free = 0; + } + else if (arg_type==STARPU_TASK_DEPS_ARRAY) + { + STARPU_ASSERT_MSG(task_deps_array == NULL, "Parameter 'STARPU_TASK_DEPS_ARRAY' passed twice not supported yet"); + arg_i++; + ndeps = *(unsigned *)arglist[arg_i]; + arg_i++; + task_deps_array = arglist[arg_i]; + } + else if (arg_type==STARPU_TASK_END_DEPS_ARRAY) + { + STARPU_ASSERT_MSG(task_end_deps_array == NULL, "Parameter 'STARPU_TASK_END_DEPS_ARRAY' passed twice not supported yet"); + arg_i++; + nend_deps = *(unsigned *)arglist[arg_i]; + arg_i++; + task_end_deps_array = arglist[arg_i]; + } + else if (arg_type == STARPU_CALLBACK) + { + arg_i++; + task->callback_func = (_starpu_callback_func_t)arglist[arg_i]; + } + else if (arg_type == STARPU_CALLBACK_WITH_ARG) + { + arg_i++; + task->callback_func = (_starpu_callback_func_t)arglist[arg_i]; + arg_i++; + task->callback_arg = arglist[arg_i]; + task->callback_arg_free = 1; + } + else if (arg_type == STARPU_CALLBACK_WITH_ARG_NFREE) + { + arg_i++; + task->callback_func = (_starpu_callback_func_t)arglist[arg_i]; + arg_i++; + task->callback_arg = arglist[arg_i]; + task->callback_arg_free = 0; + } + else if (arg_type == STARPU_CALLBACK_ARG) + { + arg_i++; + task->callback_arg = arglist[arg_i]; + task->callback_arg_free = 1; + } + else if (arg_type == STARPU_CALLBACK_ARG_NFREE) + { + arg_i++; + task->callback_arg = arglist[arg_i]; + task->callback_arg_free = 0; + } + else if (arg_type == STARPU_EPILOGUE_CALLBACK) + { + arg_i++; + task->epilogue_callback_func = (_starpu_callback_func_t)arglist[arg_i]; + } + else if (arg_type == STARPU_EPILOGUE_CALLBACK_ARG) + { + arg_i++; + task->epilogue_callback_arg = arglist[arg_i]; + task->epilogue_callback_arg_free = 1; + } + else if (arg_type == STARPU_PROLOGUE_CALLBACK) + { + arg_i++; + task->prologue_callback_func = (_starpu_callback_func_t)arglist[arg_i]; + } + else if (arg_type == STARPU_PROLOGUE_CALLBACK_ARG) + { + arg_i++; + task->prologue_callback_arg = arglist[arg_i]; + task->prologue_callback_arg_free = 1; + } + else if (arg_type == STARPU_PROLOGUE_CALLBACK_ARG_NFREE) + { + arg_i++; + task->prologue_callback_arg = arglist[arg_i]; + task->prologue_callback_arg_free = 0; + } + else if (arg_type == STARPU_PROLOGUE_CALLBACK_POP) + { + arg_i++; + task->prologue_callback_pop_func = (_starpu_callback_func_t)arglist[arg_i]; + } + else if (arg_type == STARPU_PROLOGUE_CALLBACK_POP_ARG) + { + arg_i++; + task->prologue_callback_pop_arg = arglist[arg_i]; + task->prologue_callback_pop_arg_free = 1; + } + else if (arg_type == STARPU_PROLOGUE_CALLBACK_POP_ARG_NFREE) + { + arg_i++; + task->prologue_callback_pop_arg = arglist[arg_i]; + task->prologue_callback_pop_arg_free = 0; + } + else if (arg_type == STARPU_PRIORITY) + { + arg_i++; + task->priority = *(int *)arglist[arg_i]; + } + else if (arg_type == STARPU_EXECUTE_ON_NODE) + { + arg_i++; + (void)arglist[arg_i]; + } + else if (arg_type == STARPU_EXECUTE_ON_DATA) + { + arg_i++; + (void)arglist[arg_i]; + } + else if (arg_type == STARPU_EXECUTE_WHERE) + { + arg_i++; + int32_t where = (int32_t)(intptr_t)arglist[arg_i]; + task->where = where; + } + else if (arg_type == STARPU_EXECUTE_ON_WORKER) + { + arg_i++; + int worker = *(int *)arglist[arg_i]; + if (worker != -1) + { + task->workerid = worker; + task->execute_on_a_specific_worker = 1; + } + } + else if (arg_type == STARPU_WORKER_ORDER) + { + arg_i++; + unsigned order = *(unsigned *)arglist[arg_i]; + if (order != 0) + { + STARPU_ASSERT_MSG(task->execute_on_a_specific_worker, "worker order only makes sense if a workerid is provided"); + task->workerorder = order; + } + } + else if (arg_type == STARPU_SCHED_CTX) + { + arg_i++; + task->sched_ctx = *(unsigned *)arglist[arg_i]; + } + else if (arg_type == STARPU_HYPERVISOR_TAG) + { + arg_i++; + task->hypervisor_tag = *(int *)arglist[arg_i]; + } + else if (arg_type == STARPU_POSSIBLY_PARALLEL) + { + arg_i++; + task->possibly_parallel = *(unsigned *)arglist[arg_i]; + } + else if (arg_type == STARPU_FLOPS) + { + arg_i++; + task->flops = *(double *)arglist[arg_i]; + } + else if (arg_type == STARPU_TAG) + { + arg_i++; + task->tag_id = *(starpu_tag_t *)arglist[arg_i]; + task->use_tag = 1; + } + else if (arg_type == STARPU_TAG_ONLY) + { + arg_i++; + task->tag_id = *(starpu_tag_t *)arglist[arg_i]; + } + else if (arg_type == STARPU_NAME) + { + arg_i++; + task->name = arglist[arg_i]; + } + else if (arg_type == STARPU_NODE_SELECTION_POLICY) + { + arg_i++; + (void)arglist[arg_i]; + } + else if (arg_type == STARPU_TASK_COLOR) + { + arg_i++; + task->color = *(int *)arglist[arg_i]; + } + else if (arg_type == STARPU_TASK_SYNCHRONOUS) + { + arg_i++; + task->synchronous = *(int *)arglist[arg_i]; + } + else if (arg_type==STARPU_HANDLES_SEQUENTIAL_CONSISTENCY) + { + task->handles_sequential_consistency = (unsigned char *)arglist[arg_i]; + } +#ifdef STARPU_BUBBLE + else if (arg_type==STARPU_BUBBLE_FUNC) + { + arg_i++; + task->bubble_func = (starpu_bubble_func_t)arglist[arg_i]; + } + else if (arg_type==STARPU_BUBBLE_FUNC_ARG) + { + arg_i++; + task->bubble_func_arg = (void *)arglist[arg_i]; + } + else if (arg_type==STARPU_BUBBLE_GEN_DAG_FUNC) + { + arg_i++; + task->bubble_gen_dag_func = (starpu_bubble_gen_dag_func_t)arglist[arg_i]; + } + else if (arg_type==STARPU_BUBBLE_GEN_DAG_FUNC_ARG) + { + arg_i++; + task->bubble_gen_dag_func_arg = (void*)arglist[arg_i]; + } + else if (arg_type==STARPU_BUBBLE_PARENT) + { + arg_i++; + struct starpu_task *parent = (struct starpu_task *)arglist[arg_i]; + struct _starpu_job *job = _starpu_get_job_associated_to_task(parent); + task->bubble_parent = job->job_id; + + } +#endif + else if (arg_type==STARPU_TASK_END_DEP) + { + arg_i++; + starpu_task_end_dep_add(task, *(int*)arglist[arg_i]); + } + else if (arg_type==STARPU_TASK_WORKERIDS) + { + arg_i++; + task->workerids_len = *(unsigned *)arglist[arg_i]; + arg_i++; + task->workerids = (uint32_t *)arglist[arg_i]; + } + else if (arg_type==STARPU_SEQUENTIAL_CONSISTENCY) + { + arg_i++; + task->sequential_consistency = *(unsigned *)arglist[arg_i]; + } + else if (arg_type==STARPU_TASK_PROFILING_INFO) + { + arg_i++; + task->profiling_info = (struct starpu_profiling_task_info *)arglist[arg_i]; + } + else if (arg_type==STARPU_TASK_NO_SUBMITORDER) + { + arg_i++; + task->no_submitorder = *(unsigned *)arglist[arg_i]; + } + else if (arg_type == STARPU_TASK_SCHED_DATA) + { + arg_i++; + task->sched_data = (void*)arglist[arg_i]; + } + else if (arg_type == STARPU_TASK_FILE) + { + arg_i++; + task->file = arglist[arg_i]; + } + else if (arg_type == STARPU_TASK_LINE) + { + arg_i++; + task->line = *(int *)arglist[arg_i]; + } + else if (arg_type==STARPU_TRANSACTION) + { + STARPU_ASSERT_MSG(task->transaction == NULL, "a transaction has already been set"); + arg_i++; + task->transaction = arglist[arg_i]; + } + else + { + STARPU_ABORT_MSG("unknown/unsupported argument %d, did you perhaps forget to end arguments with 0?", arg_type); + } + arg_i++; + } + + if (cl) + { + if (cl->nbuffers == STARPU_VARIABLE_NBUFFERS) + { + task->nbuffers = current_buffer; + } + else + { + STARPU_ASSERT_MSG(current_buffer == cl->nbuffers, "Incoherent number of buffers between cl (%d) and number of parameters (%d)", cl->nbuffers, current_buffer); + } + } + + if (state.nargs) + { + if (task->cl_arg != NULL) + { + _STARPU_DISP("Parameters STARPU_CL_ARGS and STARPU_VALUE cannot be used in the same call\n"); + free(state.arg_buffer); + return -EINVAL; + } + starpu_codelet_pack_arg_fini(&state, &task->cl_arg, &task->cl_arg_size); + } + + if (task_deps_array) + { + starpu_task_declare_deps_array(task, ndeps, task_deps_array); + } + + if (task_end_deps_array) + { + starpu_task_declare_end_deps_array(task, nend_deps, task_end_deps_array); + } + + _STARPU_TRACE_TASK_BUILD_END(); + + return 0; +} + +/* Fortran interface to task_insert */ +#undef starpu_task_submit +void fstarpu_task_insert(void **arglist) +{ + struct starpu_codelet *cl = arglist[0]; + if (cl == NULL) + { + STARPU_ABORT_MSG("task without codelet"); + } + struct starpu_task *task = starpu_task_create(); + int ret = _fstarpu_task_insert_create(cl, task, arglist+1); + if (ret != 0) + { + STARPU_ABORT_MSG("task creation failed"); + } + ret = starpu_task_submit(task); + if (ret != 0) + { + STARPU_ABORT_MSG("starpu_task_submit failed"); + } +} + +/* fstarpu_insert_task: aliased to fstarpu_task_insert in fstarpu_mod.f90 */ diff --git a/src/util/starpu_task_insert_utils.h b/src/util/starpu_task_insert_utils.h new file mode 100644 index 0000000..d364f5e --- /dev/null +++ b/src/util/starpu_task_insert_utils.h @@ -0,0 +1,36 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __STARPU_TASK_INSERT_UTILS_H__ +#define __STARPU_TASK_INSERT_UTILS_H__ + +/** @file */ + +#include +#include +#include + +#pragma GCC visibility push(hidden) + +typedef void (*_starpu_callback_func_t)(void *); + +int _starpu_task_insert_create(struct starpu_codelet *cl, struct starpu_task *task, va_list varg_list) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; +int _fstarpu_task_insert_create(struct starpu_codelet *cl, struct starpu_task *task, void **arglist) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; + +#pragma GCC visibility pop + +#endif // __STARPU_TASK_INSERT_UTILS_H__ + diff --git a/src/worker_collection/worker_list.c b/src/worker_collection/worker_list.c new file mode 100644 index 0000000..6c1fbd8 --- /dev/null +++ b/src/worker_collection/worker_list.c @@ -0,0 +1,309 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "core/workers.h" + +static unsigned list_has_next_unblocked_worker(struct starpu_worker_collection *workers, struct starpu_sched_ctx_iterator *it) +{ + int nworkers = workers->nunblocked_workers; + STARPU_ASSERT(it != NULL); + + unsigned ret = it->cursor < nworkers ; + + if(!ret) it->cursor = 0; + + return ret; +} + +static int list_get_next_unblocked_worker(struct starpu_worker_collection *workers, struct starpu_sched_ctx_iterator *it) +{ + int *workerids = (int *)workers->unblocked_workers; + int nworkers = (int)workers->nunblocked_workers; + + STARPU_ASSERT(it->cursor < nworkers); + + int ret = workerids[it->cursor++]; + + return ret; +} + +static unsigned list_has_next_master(struct starpu_worker_collection *workers, struct starpu_sched_ctx_iterator *it) +{ + int nworkers = workers->nmasters; + STARPU_ASSERT(it != NULL); + + unsigned ret = it->cursor < nworkers ; + + if(!ret) it->cursor = 0; + + return ret; +} + +static int list_get_next_master(struct starpu_worker_collection *workers, struct starpu_sched_ctx_iterator *it) +{ + int *workerids = (int *)workers->masters; + int nworkers = (int)workers->nmasters; + + STARPU_ASSERT_MSG(it->cursor < nworkers, "cursor %d nworkers %d\n", it->cursor, nworkers); + + int ret = workerids[it->cursor++]; + + return ret; +} + +static unsigned list_has_next(struct starpu_worker_collection *workers, struct starpu_sched_ctx_iterator *it) +{ + if(it->possibly_parallel == 1) + return list_has_next_master(workers, it); + else if(it->possibly_parallel == 0) + return list_has_next_unblocked_worker(workers, it); + + int nworkers = workers->nworkers; + STARPU_ASSERT(it != NULL); + + unsigned ret = it->cursor < nworkers ; + + if(!ret) it->cursor = 0; + + return ret; +} + +static int list_get_next(struct starpu_worker_collection *workers, struct starpu_sched_ctx_iterator *it) +{ + if(it->possibly_parallel == 1) + return list_get_next_master(workers, it); + else if(it->possibly_parallel == 0) + return list_get_next_unblocked_worker(workers, it); + + int *workerids = (int *)workers->workerids; + int nworkers = (int)workers->nworkers; + + STARPU_ASSERT(it->cursor < nworkers); + + int ret = workerids[it->cursor++]; + + return ret; +} + +static unsigned _worker_belongs_to_ctx(struct starpu_worker_collection *workers, int workerid) +{ + int *workerids = (int *)workers->workerids; + unsigned nworkers = workers->nworkers; + + unsigned i; + for(i = 0; i < nworkers; i++) + { + if(workerids[i] == workerid) + return 1; + } + return 0; +} + +static int list_add(struct starpu_worker_collection *workers, int worker) +{ + int *workerids = (int *)workers->workerids; + unsigned *nworkers = &workers->nworkers; + + STARPU_ASSERT(*nworkers < (STARPU_NMAXWORKERS+STARPU_NMAX_COMBINEDWORKERS)); + + if(!_worker_belongs_to_ctx(workers, worker)) + { + workerids[(*nworkers)++] = worker; + return worker; + } + else + return -1; +} + +static int _get_first_free_worker(int *workerids, int nworkers) +{ + int i; + for(i = 0; i < nworkers; i++) + if(workerids[i] == -1) + return i; + + return -1; +} + +/* rearange array of workerids in order not to have {-1, -1, 5, -1, 7} + * and have instead {5, 7, -1, -1, -1} + * it is easier afterwards to iterate the array +*/ +static void _rearange_workerids(int *workerids, int old_nworkers) +{ + int first_free_id = -1; + int i; + for(i = 0; i < old_nworkers; i++) + { + if(workerids[i] != -1) + { + first_free_id = _get_first_free_worker(workerids, old_nworkers); + if(first_free_id != -1) + { + workerids[first_free_id] = workerids[i]; + workerids[i] = -1; + } + } + } +} + +static int list_remove(struct starpu_worker_collection *workers, int worker) +{ + int *workerids = (int *)workers->workerids; + unsigned nworkers = workers->nworkers; + + int *unblocked_workers = (int *)workers->unblocked_workers; + unsigned nunblocked_workers = workers->nunblocked_workers; + + int *masters = (int *)workers->masters; + unsigned nmasters = workers->nmasters; + + unsigned i; + int found_worker = -1; + for(i = 0; i < nworkers; i++) + { + if(workerids[i] == worker) + { + workerids[i] = -1; + found_worker = worker; + break; + } + } + + _rearange_workerids(workerids, nworkers); + if(found_worker != -1) + workers->nworkers--; + + int found_unblocked = -1; + for(i = 0; i < nunblocked_workers; i++) + { + if(unblocked_workers[i] == worker) + { + unblocked_workers[i] = -1; + found_unblocked = worker; + break; + } + } + + _rearange_workerids(unblocked_workers, nunblocked_workers); + if(found_unblocked != -1) + workers->nunblocked_workers--; + + int found_master = -1; + for(i = 0; i < nmasters; i++) + { + if(masters[i] == worker) + { + masters[i] = -1; + found_master = worker; + break; + } + } + + _rearange_workerids(masters, nmasters); + if(found_master != -1) + workers->nmasters--; + + return found_worker; +} + +static void _init_workers(int *workerids) +{ + unsigned i; + unsigned nworkers = starpu_worker_get_count(); + for(i = 0; i < nworkers; i++) + workerids[i] = -1; + return; +} + +static void list_init(struct starpu_worker_collection *workers) +{ + int *workerids; + int *unblocked_workers; + int *masters; + + _STARPU_MALLOC(workerids, (STARPU_NMAXWORKERS+STARPU_NMAX_COMBINEDWORKERS) * sizeof(int)); + _STARPU_MALLOC(unblocked_workers, (STARPU_NMAXWORKERS+STARPU_NMAX_COMBINEDWORKERS) * sizeof(int)); + _STARPU_MALLOC(masters, (STARPU_NMAXWORKERS+STARPU_NMAX_COMBINEDWORKERS) * sizeof(int)); + _init_workers(workerids); + _init_workers(unblocked_workers); + _init_workers(masters); + + workers->workerids = (void*)workerids; + workers->nworkers = 0; + workers->unblocked_workers = (void*)unblocked_workers; + workers->nunblocked_workers = 0; + workers->masters = (void*)masters; + workers->nmasters = 0; + + return; +} + +static void list_deinit(struct starpu_worker_collection *workers) +{ + free(workers->workerids); + free(workers->unblocked_workers); + free(workers->masters); +} + +static void list_init_iterator(struct starpu_worker_collection *workers, struct starpu_sched_ctx_iterator *it) +{ + (void) workers; + it->cursor = 0; + it->possibly_parallel = -1; /* -1 => we don't care about this field */ + +} + +static void list_init_iterator_for_parallel_tasks(struct starpu_worker_collection *workers, struct starpu_sched_ctx_iterator *it, struct starpu_task *task) +{ + list_init_iterator(workers, it); + if (_starpu_get_nsched_ctxs() <= 1) + return; + + it->possibly_parallel = task->possibly_parallel; /* 0/1 => this field indicates if we consider masters only or slaves not blocked too */ + + int *workerids = (int *)workers->workerids; + unsigned nworkers = workers->nworkers; + unsigned i; + int nm = 0, nub = 0; + for(i = 0; i < nworkers; i++) + { + if(!starpu_worker_is_blocked_in_parallel(workerids[i])) + { + ((int*)workers->unblocked_workers)[nub++] = workerids[i]; + if(!it->possibly_parallel) /* don't bother filling the table with masters we won't use it anyway */ + continue; + if(!starpu_worker_is_slave_somewhere(workerids[i])) + ((int*)workers->masters)[nm++] = workerids[i]; + } + } + workers->nmasters = nm; + workers->nunblocked_workers = nub; +} + +struct starpu_worker_collection starpu_worker_list = +{ + .has_next = list_has_next, + .get_next = list_get_next, + .add = list_add, + .remove = list_remove, + .init = list_init, + .deinit = list_deinit, + .init_iterator = list_init_iterator, + .init_iterator_for_parallel_tasks = list_init_iterator_for_parallel_tasks, + .type = STARPU_WORKER_LIST +}; diff --git a/src/worker_collection/worker_tree.c b/src/worker_collection/worker_tree.c new file mode 100644 index 0000000..048978c --- /dev/null +++ b/src/worker_collection/worker_tree.c @@ -0,0 +1,372 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#ifdef STARPU_HAVE_HWLOC +#include +#include "core/workers.h" + +static unsigned tree_has_next_unblocked_worker(struct starpu_worker_collection *workers, struct starpu_sched_ctx_iterator *it) +{ + STARPU_ASSERT(it != NULL); + if(workers->nworkers == 0) + return 0; + + struct starpu_tree *tree = (struct starpu_tree*)workers->collection_private; + struct starpu_tree *neighbour = starpu_tree_get_neighbour(tree, (struct starpu_tree*)it->value, it->visited, workers->present); + + if(!neighbour) + { + starpu_tree_reset_visited(tree, it->visited); + it->value = NULL; + it->possible_value = NULL; + return 0; + } + int id = -1; + int *workerids; + int nworkers = starpu_bindid_get_workerids(neighbour->id, &workerids); + int w; + for(w = 0; w < nworkers; w++) + { + if(!it->visited[workerids[w]] && workers->present[workerids[w]]) + { + if(workers->is_unblocked[workerids[w]]) + { + id = workerids[w]; + it->possible_value = neighbour; + break; + } + else + { + it->visited[workerids[w]] = 1; + it->value = neighbour; + + return tree_has_next_unblocked_worker(workers, it); + } + } + } + + STARPU_ASSERT_MSG(id != -1, "bind id (%d) for workerid (%d) not correct", neighbour->id, id); + + return 1; +} + +static int tree_get_next_unblocked_worker(struct starpu_worker_collection *workers, struct starpu_sched_ctx_iterator *it) +{ + int ret = -1; + + struct starpu_tree *tree = (struct starpu_tree *)workers->collection_private; + struct starpu_tree *neighbour = NULL; + if(it->possible_value) + { + neighbour = it->possible_value; + it->possible_value = NULL; + } + else + neighbour = starpu_tree_get_neighbour(tree, (struct starpu_tree*)it->value, it->visited, workers->present); + + STARPU_ASSERT_MSG(neighbour, "no element anymore"); + + + int *workerids; + int nworkers = starpu_bindid_get_workerids(neighbour->id, &workerids); + int w; + for(w = 0; w < nworkers; w++) + { + if(!it->visited[workerids[w]] && workers->present[workerids[w]] && workers->is_unblocked[workerids[w]]) + { + ret = workerids[w]; + it->visited[workerids[w]] = 1; + it->value = neighbour; + break; + } + } + STARPU_ASSERT_MSG(ret != -1, "bind id not correct"); + return ret; +} + +static unsigned tree_has_next_master(struct starpu_worker_collection *workers, struct starpu_sched_ctx_iterator *it) +{ + STARPU_ASSERT(it != NULL); + if(workers->nworkers == 0) + return 0; + + struct starpu_tree *tree = (struct starpu_tree*)workers->collection_private; + struct starpu_tree *neighbour = starpu_tree_get_neighbour(tree, (struct starpu_tree*)it->value, it->visited, workers->is_master); + + if(!neighbour) + { + starpu_tree_reset_visited(tree, it->visited); + it->value = NULL; + it->possible_value = NULL; + return 0; + } + int id = -1; + int *workerids; + int nworkers = starpu_bindid_get_workerids(neighbour->id, &workerids); + int w; + for(w = 0; w < nworkers; w++) + { + if(!it->visited[workerids[w]] && workers->is_master[workerids[w]]) + { + id = workerids[w]; + it->possible_value = neighbour; + break; + } + } + + STARPU_ASSERT_MSG(id != -1, "bind id (%d) for workerid (%d) not correct", neighbour->id, id); + + return 1; +} + +static int tree_get_next_master(struct starpu_worker_collection *workers, struct starpu_sched_ctx_iterator *it) +{ + int ret = -1; + + struct starpu_tree *tree = (struct starpu_tree *)workers->collection_private; + struct starpu_tree *neighbour = NULL; + if(it->possible_value) + { + neighbour = it->possible_value; + it->possible_value = NULL; + } + else + neighbour = starpu_tree_get_neighbour(tree, (struct starpu_tree*)it->value, it->visited, workers->is_master); + + STARPU_ASSERT_MSG(neighbour, "no element anymore"); + + + int *workerids; + int nworkers = starpu_bindid_get_workerids(neighbour->id, &workerids); + int w; + for(w = 0; w < nworkers; w++) + { + if(!it->visited[workerids[w]] && workers->is_master[workerids[w]]) + { + ret = workerids[w]; + it->visited[workerids[w]] = 1; + it->value = neighbour; + break; + } + } + STARPU_ASSERT_MSG(ret != -1, "bind id not correct"); + + return ret; +} + +static unsigned tree_has_next(struct starpu_worker_collection *workers, struct starpu_sched_ctx_iterator *it) +{ + if(it->possibly_parallel == 1) + return tree_has_next_master(workers, it); + else if(it->possibly_parallel == 0) + return tree_has_next_unblocked_worker(workers, it); + + STARPU_ASSERT(it != NULL); + if(workers->nworkers == 0) + return 0; + + struct starpu_tree *tree = (struct starpu_tree*)workers->collection_private; + int *workerids; + int nworkers; + int w; + + if (it->value) + { + struct starpu_tree *node = it->value; + /* Are there workers left to be processed in the current node? */ + nworkers = starpu_bindid_get_workerids(node->id, &workerids); + for(w = 0; w < nworkers; w++) + { + if(!it->visited[workerids[w]] && workers->present[workerids[w]]) + { + /* Still some! */ + it->possible_value = node; + return 1; + } + } + } + + struct starpu_tree *neighbour = starpu_tree_get_neighbour(tree, (struct starpu_tree*)it->value, it->visited, workers->present); + + if(!neighbour) + { + starpu_tree_reset_visited(tree, it->visited); + it->value = NULL; + it->possible_value = NULL; + return 0; + } + int id = -1; + nworkers = starpu_bindid_get_workerids(neighbour->id, &workerids); + for(w = 0; w < nworkers; w++) + { + if(!it->visited[workerids[w]] && workers->present[workerids[w]]) + { + id = workerids[w]; + it->possible_value = neighbour; + break; + } + } + + STARPU_ASSERT_MSG(id != -1, "bind id (%d) for workerid (%d) not correct", neighbour->id, id); + + return 1; +} + +static int tree_get_next(struct starpu_worker_collection *workers, struct starpu_sched_ctx_iterator *it) +{ + if(it->possibly_parallel == 1) + return tree_get_next_master(workers, it); + else if(it->possibly_parallel == 0) + return tree_get_next_unblocked_worker(workers, it); + + int ret = -1; + + struct starpu_tree *tree = (struct starpu_tree *)workers->collection_private; + struct starpu_tree *neighbour = NULL; + if(it->possible_value) + { + neighbour = it->possible_value; + it->possible_value = NULL; + } + else + neighbour = starpu_tree_get_neighbour(tree, (struct starpu_tree*)it->value, it->visited, workers->present); + + STARPU_ASSERT_MSG(neighbour, "no element anymore"); + + + int *workerids; + int nworkers = starpu_bindid_get_workerids(neighbour->id, &workerids); + int w; + for(w = 0; w < nworkers; w++) + { + if(!it->visited[workerids[w]] && workers->present[workerids[w]]) + { + ret = workerids[w]; + it->visited[workerids[w]] = 1; + it->value = neighbour; + break; + } + } + STARPU_ASSERT_MSG(ret != -1, "bind id not correct"); + + return ret; +} + +static int tree_add(struct starpu_worker_collection *workers, int worker) +{ + if(!workers->present[worker]) + { + workers->present[worker] = 1; + workers->workerids[workers->nworkers] = worker; + workers->nworkers++; + return worker; + } + else + return -1; +} + + +static int tree_remove(struct starpu_worker_collection *workers, int worker) +{ + if(workers->present[worker]) + { + unsigned i; + for (i = 0; i < workers->nworkers; i++) + if (workers->workerids[i] == worker) + { + memmove(&workers->workerids[i], &workers->workerids[i+1], (workers->nworkers-1-i) * sizeof(workers->workerids[i])); + break; + } + workers->present[worker] = 0; + workers->is_unblocked[worker] = 0; + workers->is_master[worker] = 0; + workers->nworkers--; + return worker; + } + else + return -1; +} + +static void tree_init(struct starpu_worker_collection *workers) +{ + _STARPU_MALLOC(workers->workerids, (STARPU_NMAXWORKERS+STARPU_NMAX_COMBINEDWORKERS) * sizeof(int)); + workers->collection_private = (void*)starpu_workers_get_tree(); + workers->nworkers = 0; + + int i; + int nworkers = starpu_worker_get_count(); + for(i = 0; i < nworkers; i++) + { + workers->workerids[i] = -1; + workers->present[i] = 0; + workers->is_unblocked[i] = 0; + workers->is_master[i] = 0; + } + + return; +} + +static void tree_deinit(struct starpu_worker_collection *workers) +{ + (void) workers; + free(workers->workerids); +} + +static void tree_init_iterator(struct starpu_worker_collection *workers, struct starpu_sched_ctx_iterator *it) +{ + (void) workers; + it->value = NULL; + it->possible_value = NULL; + it->possibly_parallel = -1; + int nworkers = starpu_worker_get_count(); + memset(&it->visited, 0, nworkers * sizeof(it->visited[0])); +} + +static void tree_init_iterator_for_parallel_tasks(struct starpu_worker_collection *workers, struct starpu_sched_ctx_iterator *it, struct starpu_task *task) +{ + if (_starpu_get_nsched_ctxs() <= 1) + { + tree_init_iterator(workers, it); + return; + } + tree_init_iterator(workers, it); + it->possibly_parallel = task->possibly_parallel; + int i; + int nworkers = starpu_worker_get_count(); + for(i = 0; i < nworkers; i++) + { + workers->is_unblocked[i] = (workers->present[i] && !starpu_worker_is_blocked_in_parallel(i)); + if(!it->possibly_parallel) /* don't bother filling the table with masters we won't use it anyway */ + continue; + workers->is_master[i] = (workers->present[i] && !starpu_worker_is_blocked_in_parallel(i) && !starpu_worker_is_slave_somewhere(i)); + } +} + +struct starpu_worker_collection starpu_worker_tree = +{ + .has_next = tree_has_next, + .get_next = tree_get_next, + .add = tree_add, + .remove = tree_remove, + .init = tree_init, + .deinit = tree_deinit, + .init_iterator = tree_init_iterator, + .init_iterator_for_parallel_tasks = tree_init_iterator_for_parallel_tasks, + .type = STARPU_WORKER_TREE +}; + +#endif// STARPU_HAVE_HWLOC diff --git a/starpu_openmp_llvm/Makefile.am b/starpu_openmp_llvm/Makefile.am new file mode 100644 index 0000000..f4a7a1d --- /dev/null +++ b/starpu_openmp_llvm/Makefile.am @@ -0,0 +1,19 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +include $(top_srcdir)/make/starpu-subdirtests.mk + +SUBDIRS=src examples diff --git a/starpu_openmp_llvm/Makefile.in b/starpu_openmp_llvm/Makefile.in new file mode 100644 index 0000000..9987bfa --- /dev/null +++ b/starpu_openmp_llvm/Makefile.in @@ -0,0 +1,889 @@ +# Makefile.in generated by automake 1.16.5 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2021 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +target_triplet = @target@ +subdir = starpu_openmp_llvm +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ + $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ + $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ + $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/src/common/config.h \ + $(top_builddir)/src/common/config-src-build.h \ + $(top_builddir)/include/starpu_config.h \ + $(top_builddir)/starpurm/include/starpurm_config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +SOURCES = +DIST_SOURCES = +RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \ + ctags-recursive dvi-recursive html-recursive info-recursive \ + install-data-recursive install-dvi-recursive \ + install-exec-recursive install-html-recursive \ + install-info-recursive install-pdf-recursive \ + install-ps-recursive install-recursive installcheck-recursive \ + installdirs-recursive pdf-recursive ps-recursive \ + tags-recursive uninstall-recursive +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ + distclean-recursive maintainer-clean-recursive +am__recursive_targets = \ + $(RECURSIVE_TARGETS) \ + $(RECURSIVE_CLEAN_TARGETS) \ + $(am__extra_recursive_targets) +AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \ + distdir distdir-am +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +DIST_SUBDIRS = $(SUBDIRS) +am__DIST_COMMON = $(srcdir)/Makefile.in \ + $(top_srcdir)/make/starpu-subdirtests.mk +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +am__relativize = \ + dir0=`pwd`; \ + sed_first='s,^\([^/]*\)/.*$$,\1,'; \ + sed_rest='s,^[^/]*/*,,'; \ + sed_last='s,^.*/\([^/]*\)$$,\1,'; \ + sed_butlast='s,/*[^/]*$$,,'; \ + while test -n "$$dir1"; do \ + first=`echo "$$dir1" | sed -e "$$sed_first"`; \ + if test "$$first" != "."; then \ + if test "$$first" = ".."; then \ + dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ + dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ + else \ + first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ + if test "$$first2" = "$$first"; then \ + dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ + else \ + dir2="../$$dir2"; \ + fi; \ + dir0="$$dir0"/"$$first"; \ + fi; \ + fi; \ + dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ + done; \ + reldir="$$dir2" +pkglibdir = @pkglibdir@ +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +APP_CFLAGS = @APP_CFLAGS@ +APP_CXXFLAGS = @APP_CXXFLAGS@ +APP_FCFLAGS = @APP_FCFLAGS@ +APP_FFLAGS = @APP_FFLAGS@ +AR = @AR@ +AS = @AS@ +ATLASDIR = @ATLASDIR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BLAS_LIB = @BLAS_LIB@ +BLAS_LIBS = @BLAS_LIBS@ +BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ +BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CC_OR_MPICC = @CC_OR_MPICC@ +CC_OR_NVCC = @CC_OR_NVCC@ +CFLAGS = @CFLAGS@ +COVERAGE = @COVERAGE@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CSCOPE = @CSCOPE@ +CTAGS = @CTAGS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DGELS_LIBS = @DGELS_LIBS@ +DLB_CFLAGS = @DLB_CFLAGS@ +DLB_LIBS = @DLB_LIBS@ +DLLTOOL = @DLLTOOL@ +DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +ECLIPSE = @ECLIPSE@ +EGREP = @EGREP@ +ETAGS = @ETAGS@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FC = @FC@ +FCFLAGS = @FCFLAGS@ +FFLAGS = @FFLAGS@ +FFTWF_CFLAGS = @FFTWF_CFLAGS@ +FFTWF_LIBS = @FFTWF_LIBS@ +FFTWL_CFLAGS = @FFTWL_CFLAGS@ +FFTWL_LIBS = @FFTWL_LIBS@ +FFTW_CFLAGS = @FFTW_CFLAGS@ +FFTW_LIBS = @FFTW_LIBS@ +FGREP = @FGREP@ +FILECMD = @FILECMD@ +FXTDIR = @FXTDIR@ +FXT_CFLAGS = @FXT_CFLAGS@ +FXT_LDFLAGS = @FXT_LDFLAGS@ +FXT_LIBS = @FXT_LIBS@ +GDB = @GDB@ +GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ +GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ +GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ +GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ +GOTODIR = @GOTODIR@ +GREP = @GREP@ +HAVE_CXX11 = @HAVE_CXX11@ +HAVE_FFTWFL = @HAVE_FFTWFL@ +HELP2MAN = @HELP2MAN@ +HIPCC = @HIPCC@ +HIPCCFLAGS = @HIPCCFLAGS@ +HIPCONFIG = @HIPCONFIG@ +HWLOC_CFLAGS = @HWLOC_CFLAGS@ +HWLOC_LIBS = @HWLOC_LIBS@ +HWLOC_REQUIRES = @HWLOC_REQUIRES@ +ICC = @ICC@ +ICC_ARGS = @ICC_ARGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JULIA = @JULIA@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ +LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ +LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ +LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ +LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ +LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ +LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ +LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ +LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ +LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ +LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ +LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ +LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ +LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ +LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ +LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ +LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ +LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ +LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ +LIBSTARPU_LINK = @LIBSTARPU_LINK@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAGMA_CFLAGS = @MAGMA_CFLAGS@ +MAGMA_LIBS = @MAGMA_LIBS@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPICC_LDFLAGS = @MPICC_LDFLAGS@ +MPICXX = @MPICXX@ +MPIEXEC = @MPIEXEC@ +MPIEXEC_ARGS = @MPIEXEC_ARGS@ +MPIFORT = @MPIFORT@ +MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ +MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ +NM = @NM@ +NMAD_CFLAGS = @NMAD_CFLAGS@ +NMAD_LIBS = @NMAD_LIBS@ +NMEDIT = @NMEDIT@ +NVCC = @NVCC@ +NVCCFLAGS = @NVCCFLAGS@ +NVCC_CC = @NVCC_CC@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ +OPENBLAS_LIBS = @OPENBLAS_LIBS@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PAPI_CFLAGS = @PAPI_CFLAGS@ +PAPI_LIBS = @PAPI_LIBS@ +PARALLEL = @PARALLEL@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PKG_CONFIG = @PKG_CONFIG@ +POTI_CFLAGS = @POTI_CFLAGS@ +POTI_LIBS = @POTI_LIBS@ +PROG_CLANG = @PROG_CLANG@ +PROG_DATE = @PROG_DATE@ +PROG_FIND = @PROG_FIND@ +PROG_STAT = @PROG_STAT@ +PYTHON = @PYTHON@ +PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ +PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ +PYTHON_VERSION = @PYTHON_VERSION@ +RANLIB = @RANLIB@ +REALBASH = @REALBASH@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ +SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ +SIMGRID_LIBS = @SIMGRID_LIBS@ +SIMGRID_MC = @SIMGRID_MC@ +SLIC_CONFIG = @SLIC_CONFIG@ +SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ +SOCL_VENDORS = @SOCL_VENDORS@ +STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ +STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ +STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ +STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ +STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ +STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ +STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ +STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ +STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ +STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ +STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ +STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ +STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ +STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ +STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ +STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ +STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ +STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ +STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ +STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ +STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ +STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ +STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ +STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ +STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ +STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ +STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ +STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ +STARPU_LIB_PATH = @STARPU_LIB_PATH@ +STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ +STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ +STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ +STARPU_MS_LIB = @STARPU_MS_LIB@ +STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ +STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ +STARPU_OPENBLAS = @STARPU_OPENBLAS@ +STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ +STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ +STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ +STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ +STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ +STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ +STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ +STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ +STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ +STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ +STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ +STARPU_SRC_DIR = @STARPU_SRC_DIR@ +STARPU_USE_CPU = @STARPU_USE_CPU@ +STARPU_USE_CUDA = @STARPU_USE_CUDA@ +STARPU_USE_FXT = @STARPU_USE_FXT@ +STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ +STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ +STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ +STRIP = @STRIP@ +VERSION = @VERSION@ +XMKMF = @XMKMF@ +X_CFLAGS = @X_CFLAGS@ +X_EXTRA_LIBS = @X_EXTRA_LIBS@ +X_LIBS = @X_LIBS@ +X_PRE_LIBS = @X_PRE_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_ct_F77 = @ac_ct_F77@ +ac_ct_FC = @ac_ct_FC@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +doxygencommand = @doxygencommand@ +dvidir = @dvidir@ +eclipsepath = @eclipsepath@ +epstopdfcommand = @epstopdfcommand@ +exec_prefix = @exec_prefix@ +gitcommand = @gitcommand@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +hwloccalccommand = @hwloccalccommand@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +juliapath = @juliapath@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +mpicc_path = @mpicc_path@ +mpicxx_path = @mpicxx_path@ +mpiexec_path = @mpiexec_path@ +mpifort_path = @mpifort_path@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +pdflatexcommand = @pdflatexcommand@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +runstatedir = @runstatedir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target = @target@ +target_alias = @target_alias@ +target_cpu = @target_cpu@ +target_os = @target_os@ +target_vendor = @target_vendor@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +SUBDIRS = src examples +all: all-recursive + +.SUFFIXES: +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/make/starpu-subdirtests.mk $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign starpu_openmp_llvm/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign starpu_openmp_llvm/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; +$(top_srcdir)/make/starpu-subdirtests.mk $(am__empty): + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +# This directory's subdirectories are mostly independent; you can cd +# into them and run 'make' without going through this Makefile. +# To change the values of 'make' variables: instead of editing Makefiles, +# (1) if the variable is set in 'config.status', edit 'config.status' +# (which will cause the Makefiles to be regenerated when you run 'make'); +# (2) otherwise, pass the desired values on the 'make' command line. +$(am__recursive_targets): + @fail=; \ + if $(am__make_keepgoing); then \ + failcom='fail=yes'; \ + else \ + failcom='exit 1'; \ + fi; \ + dot_seen=no; \ + target=`echo $@ | sed s/-recursive//`; \ + case "$@" in \ + distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ + *) list='$(SUBDIRS)' ;; \ + esac; \ + for subdir in $$list; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + dot_seen=yes; \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || eval $$failcom; \ + done; \ + if test "$$dot_seen" = "no"; then \ + $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ + fi; test -z "$$fail" + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-recursive +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ + include_option=--etags-include; \ + empty_fix=.; \ + else \ + include_option=--include; \ + empty_fix=; \ + fi; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + test ! -f $$subdir/TAGS || \ + set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ + fi; \ + done; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-recursive + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-recursive + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done + @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + $(am__make_dryrun) \ + || test -d "$(distdir)/$$subdir" \ + || $(MKDIR_P) "$(distdir)/$$subdir" \ + || exit 1; \ + dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ + $(am__relativize); \ + new_distdir=$$reldir; \ + dir1=$$subdir; dir2="$(top_distdir)"; \ + $(am__relativize); \ + new_top_distdir=$$reldir; \ + echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ + echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ + ($(am__cd) $$subdir && \ + $(MAKE) $(AM_MAKEFLAGS) \ + top_distdir="$$new_top_distdir" \ + distdir="$$new_distdir" \ + am__remove_distdir=: \ + am__skip_length_check=: \ + am__skip_mode_fix=: \ + distdir) \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-recursive +all-am: Makefile +installdirs: installdirs-recursive +installdirs-am: +install: install-recursive +install-exec: install-exec-recursive +install-data: install-data-recursive +uninstall: uninstall-recursive + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-recursive +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-recursive + +clean-am: clean-generic clean-libtool mostlyclean-am + +distclean: distclean-recursive + -rm -f Makefile +distclean-am: clean-am distclean-generic distclean-tags + +dvi: dvi-recursive + +dvi-am: + +html: html-recursive + +html-am: + +info: info-recursive + +info-am: + +install-data-am: + +install-dvi: install-dvi-recursive + +install-dvi-am: + +install-exec-am: + +install-html: install-html-recursive + +install-html-am: + +install-info: install-info-recursive + +install-info-am: + +install-man: + +install-pdf: install-pdf-recursive + +install-pdf-am: + +install-ps: install-ps-recursive + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-recursive + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-recursive + +mostlyclean-am: mostlyclean-generic mostlyclean-libtool + +pdf: pdf-recursive + +pdf-am: + +ps: ps-recursive + +ps-am: + +uninstall-am: + +.MAKE: $(am__recursive_targets) install-am install-strip + +.PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am check \ + check-am clean clean-generic clean-libtool cscopelist-am ctags \ + ctags-am distclean distclean-generic distclean-libtool \ + distclean-tags distdir dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-html \ + install-html-am install-info install-info-am install-man \ + install-pdf install-pdf-am install-ps install-ps-am \ + install-strip installcheck installcheck-am installdirs \ + installdirs-am maintainer-clean maintainer-clean-generic \ + mostlyclean mostlyclean-generic mostlyclean-libtool pdf pdf-am \ + ps ps-am tags tags-am uninstall uninstall-am + +.PRECIOUS: Makefile + + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +recheck: + RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i recheck || RET=1 ; \ + done ; \ + exit $$RET + +showcheckfailed: + @RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i showcheckfailed || RET=1 ; \ + done ; \ + exit $$RET + +showfailed: + @RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -s -C $$i showfailed || RET=1 ; \ + done ; \ + exit $$RET + +showcheck: + RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i showcheck || RET=1 ; \ + done ; \ + exit $$RET + +showsuite: + RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i showsuite || RET=1 ; \ + done ; \ + exit $$RET + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/starpu_openmp_llvm/examples/Makefile.am b/starpu_openmp_llvm/examples/Makefile.am new file mode 100644 index 0000000..d40284c --- /dev/null +++ b/starpu_openmp_llvm/examples/Makefile.am @@ -0,0 +1,42 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +include $(top_srcdir)/make/starpu-tests.mk + +LAUNCHER_ENV += OMP_NUM_THREADS=4 + +TESTS = $(STARPU_OPENMP_LLVM_EXAMPLES) +STARPU_OPENMP_LLVM_EXAMPLES = + +# we want to compile the application just with clang --fopenmp +CC = $(PROG_CLANG) +AM_CPPFLAGS = +AM_CFLAGS += -Wall -g -fopenmp +# ideally, we should create a link libomp.so.5 to +# libstarpu_openmp_llvm-@STARPU_EFFECTIVE_VERSION@.so but because it +# is libtool we use LDADD +LDADD = $(top_builddir)/starpu_openmp_llvm/src/libstarpu_openmp_llvm-@STARPU_EFFECTIVE_VERSION@.la + +examplebindir = $(libdir)/starpu/examples/starpu_openmp_llvm +examplebin_PROGRAMS = $(STARPU_OPENMP_LLVM_EXAMPLES) +check_PROGRAMS = $(LOADER) $(STARPU_OPENMP_LLVM_EXAMPLES) + +STARPU_OPENMP_LLVM_EXAMPLES += hello-task + +exampledir = $(libdir)/starpu/examples/starpu_openmp_llvm +example_DATA = README hello-task.c + +EXTRA_DIST = README diff --git a/starpu_openmp_llvm/examples/Makefile.in b/starpu_openmp_llvm/examples/Makefile.in new file mode 100644 index 0000000..e522751 --- /dev/null +++ b/starpu_openmp_llvm/examples/Makefile.in @@ -0,0 +1,1440 @@ +# Makefile.in generated by automake 1.16.5 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2021 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +target_triplet = @target@ +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@am__append_1 = --compiler-options -fno-strict-aliasing -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ $(STARPU_NVCC_H_CPPFLAGS) +@STARPU_USE_HIP_TRUE@am__append_2 = -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ +TESTS = $(am__EXEEXT_1) +examplebin_PROGRAMS = $(am__EXEEXT_1) +check_PROGRAMS = $(am__EXEEXT_1) +subdir = starpu_openmp_llvm/examples +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ + $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ + $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ + $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/src/common/config.h \ + $(top_builddir)/src/common/config-src-build.h \ + $(top_builddir)/include/starpu_config.h \ + $(top_builddir)/starpurm/include/starpurm_config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +am__EXEEXT_1 = hello-task$(EXEEXT) +am__installdirs = "$(DESTDIR)$(examplebindir)" \ + "$(DESTDIR)$(exampledir)" +PROGRAMS = $(examplebin_PROGRAMS) +hello_task_SOURCES = hello-task.c +hello_task_OBJECTS = hello-task.$(OBJEXT) +hello_task_LDADD = $(LDADD) +hello_task_DEPENDENCIES = $(top_builddir)/starpu_openmp_llvm/src/libstarpu_openmp_llvm-@STARPU_EFFECTIVE_VERSION@.la +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)/src/common -I$(top_builddir)/include -I$(top_builddir)/starpurm/include +depcomp = $(SHELL) $(top_srcdir)/build-aux/depcomp +am__maybe_remake_depfiles = depfiles +am__depfiles_remade = ./$(DEPDIR)/hello-task.Po +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = hello-task.c +DIST_SOURCES = hello-task.c +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +DATA = $(example_DATA) +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +am__tty_colors_dummy = \ + mgn= red= grn= lgn= blu= brg= std=; \ + am__color_tests=no +am__tty_colors = { \ + $(am__tty_colors_dummy); \ + if test "X$(AM_COLOR_TESTS)" = Xno; then \ + am__color_tests=no; \ + elif test "X$(AM_COLOR_TESTS)" = Xalways; then \ + am__color_tests=yes; \ + elif test "X$$TERM" != Xdumb && { test -t 1; } 2>/dev/null; then \ + am__color_tests=yes; \ + fi; \ + if test $$am__color_tests = yes; then \ + red=''; \ + grn=''; \ + lgn=''; \ + blu=''; \ + mgn=''; \ + brg=''; \ + std=''; \ + fi; \ +} +am__recheck_rx = ^[ ]*:recheck:[ ]* +am__global_test_result_rx = ^[ ]*:global-test-result:[ ]* +am__copy_in_global_log_rx = ^[ ]*:copy-in-global-log:[ ]* +# A command that, given a newline-separated list of test names on the +# standard input, print the name of the tests that are to be re-run +# upon "make recheck". +am__list_recheck_tests = $(AWK) '{ \ + recheck = 1; \ + while ((rc = (getline line < ($$0 ".trs"))) != 0) \ + { \ + if (rc < 0) \ + { \ + if ((getline line2 < ($$0 ".log")) < 0) \ + recheck = 0; \ + break; \ + } \ + else if (line ~ /$(am__recheck_rx)[nN][Oo]/) \ + { \ + recheck = 0; \ + break; \ + } \ + else if (line ~ /$(am__recheck_rx)[yY][eE][sS]/) \ + { \ + break; \ + } \ + }; \ + if (recheck) \ + print $$0; \ + close ($$0 ".trs"); \ + close ($$0 ".log"); \ +}' +# A command that, given a newline-separated list of test names on the +# standard input, create the global log from their .trs and .log files. +am__create_global_log = $(AWK) ' \ +function fatal(msg) \ +{ \ + print "fatal: making $@: " msg | "cat >&2"; \ + exit 1; \ +} \ +function rst_section(header) \ +{ \ + print header; \ + len = length(header); \ + for (i = 1; i <= len; i = i + 1) \ + printf "="; \ + printf "\n\n"; \ +} \ +{ \ + copy_in_global_log = 1; \ + global_test_result = "RUN"; \ + while ((rc = (getline line < ($$0 ".trs"))) != 0) \ + { \ + if (rc < 0) \ + fatal("failed to read from " $$0 ".trs"); \ + if (line ~ /$(am__global_test_result_rx)/) \ + { \ + sub("$(am__global_test_result_rx)", "", line); \ + sub("[ ]*$$", "", line); \ + global_test_result = line; \ + } \ + else if (line ~ /$(am__copy_in_global_log_rx)[nN][oO]/) \ + copy_in_global_log = 0; \ + }; \ + if (copy_in_global_log) \ + { \ + rst_section(global_test_result ": " $$0); \ + while ((rc = (getline line < ($$0 ".log"))) != 0) \ + { \ + if (rc < 0) \ + fatal("failed to read from " $$0 ".log"); \ + print line; \ + }; \ + printf "\n"; \ + }; \ + close ($$0 ".trs"); \ + close ($$0 ".log"); \ +}' +# Restructured Text title. +am__rst_title = { sed 's/.*/ & /;h;s/./=/g;p;x;s/ *$$//;p;g' && echo; } +# Solaris 10 'make', and several other traditional 'make' implementations, +# pass "-e" to $(SHELL), and POSIX 2008 even requires this. Work around it +# by disabling -e (using the XSI extension "set +e") if it's set. +am__sh_e_setup = case $$- in *e*) set +e;; esac +# Default flags passed to test drivers. +am__common_driver_flags = \ + --color-tests "$$am__color_tests" \ + --enable-hard-errors "$$am__enable_hard_errors" \ + --expect-failure "$$am__expect_failure" +# To be inserted before the command running the test. Creates the +# directory for the log if needed. Stores in $dir the directory +# containing $f, in $tst the test, in $log the log. Executes the +# developer- defined test setup AM_TESTS_ENVIRONMENT (if any), and +# passes TESTS_ENVIRONMENT. Set up options for the wrapper that +# will run the test scripts (or their associated LOG_COMPILER, if +# thy have one). +am__check_pre = \ +$(am__sh_e_setup); \ +$(am__vpath_adj_setup) $(am__vpath_adj) \ +$(am__tty_colors); \ +srcdir=$(srcdir); export srcdir; \ +case "$@" in \ + */*) am__odir=`echo "./$@" | sed 's|/[^/]*$$||'`;; \ + *) am__odir=.;; \ +esac; \ +test "x$$am__odir" = x"." || test -d "$$am__odir" \ + || $(MKDIR_P) "$$am__odir" || exit $$?; \ +if test -f "./$$f"; then dir=./; \ +elif test -f "$$f"; then dir=; \ +else dir="$(srcdir)/"; fi; \ +tst=$$dir$$f; log='$@'; \ +if test -n '$(DISABLE_HARD_ERRORS)'; then \ + am__enable_hard_errors=no; \ +else \ + am__enable_hard_errors=yes; \ +fi; \ +case " $(XFAIL_TESTS) " in \ + *[\ \ ]$$f[\ \ ]* | *[\ \ ]$$dir$$f[\ \ ]*) \ + am__expect_failure=yes;; \ + *) \ + am__expect_failure=no;; \ +esac; \ +$(AM_TESTS_ENVIRONMENT) $(TESTS_ENVIRONMENT) +# A shell command to get the names of the tests scripts with any registered +# extension removed (i.e., equivalently, the names of the test logs, with +# the '.log' extension removed). The result is saved in the shell variable +# '$bases'. This honors runtime overriding of TESTS and TEST_LOGS. Sadly, +# we cannot use something simpler, involving e.g., "$(TEST_LOGS:.log=)", +# since that might cause problem with VPATH rewrites for suffix-less tests. +# See also 'test-harness-vpath-rewrite.sh' and 'test-trs-basic.sh'. +am__set_TESTS_bases = \ + bases='$(TEST_LOGS)'; \ + bases=`for i in $$bases; do echo $$i; done | sed 's/\.log$$//'`; \ + bases=`echo $$bases` +AM_TESTSUITE_SUMMARY_HEADER = ' for $(PACKAGE_STRING)' +RECHECK_LOGS = $(TEST_LOGS) +AM_RECURSIVE_TARGETS = check recheck +TEST_SUITE_LOG = test-suite.log +TEST_EXTENSIONS = @EXEEXT@ .test +LOG_DRIVER = $(SHELL) $(top_srcdir)/build-aux/test-driver +LOG_COMPILE = $(LOG_COMPILER) $(AM_LOG_FLAGS) $(LOG_FLAGS) +am__set_b = \ + case '$@' in \ + */*) \ + case '$*' in \ + */*) b='$*';; \ + *) b=`echo '$@' | sed 's/\.log$$//'`; \ + esac;; \ + *) \ + b='$*';; \ + esac +am__test_logs1 = $(TESTS:=.log) +am__test_logs2 = $(am__test_logs1:@EXEEXT@.log=.log) +TEST_LOGS = $(am__test_logs2:.test.log=.log) +TEST_LOG_DRIVER = $(SHELL) $(top_srcdir)/build-aux/test-driver +TEST_LOG_COMPILE = $(TEST_LOG_COMPILER) $(AM_TEST_LOG_FLAGS) \ + $(TEST_LOG_FLAGS) +am__DIST_COMMON = $(srcdir)/Makefile.in \ + $(top_srcdir)/build-aux/depcomp \ + $(top_srcdir)/build-aux/test-driver \ + $(top_srcdir)/make/starpu-tests.mk \ + $(top_srcdir)/make/starpu.mk README +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +pkglibdir = @pkglibdir@ +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +APP_CFLAGS = @APP_CFLAGS@ +APP_CXXFLAGS = @APP_CXXFLAGS@ +APP_FCFLAGS = @APP_FCFLAGS@ +APP_FFLAGS = @APP_FFLAGS@ +AR = @AR@ +AS = @AS@ +ATLASDIR = @ATLASDIR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BLAS_LIB = @BLAS_LIB@ +BLAS_LIBS = @BLAS_LIBS@ +BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ +BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ + +# we want to compile the application just with clang --fopenmp +CC = $(PROG_CLANG) +CCDEPMODE = @CCDEPMODE@ +CC_OR_MPICC = @CC_OR_MPICC@ +CC_OR_NVCC = @CC_OR_NVCC@ +CFLAGS = @CFLAGS@ +COVERAGE = @COVERAGE@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CSCOPE = @CSCOPE@ +CTAGS = @CTAGS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DGELS_LIBS = @DGELS_LIBS@ +DLB_CFLAGS = @DLB_CFLAGS@ +DLB_LIBS = @DLB_LIBS@ +DLLTOOL = @DLLTOOL@ +DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +ECLIPSE = @ECLIPSE@ +EGREP = @EGREP@ +ETAGS = @ETAGS@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FC = @FC@ +FCFLAGS = @FCFLAGS@ +FFLAGS = @FFLAGS@ +FFTWF_CFLAGS = @FFTWF_CFLAGS@ +FFTWF_LIBS = @FFTWF_LIBS@ +FFTWL_CFLAGS = @FFTWL_CFLAGS@ +FFTWL_LIBS = @FFTWL_LIBS@ +FFTW_CFLAGS = @FFTW_CFLAGS@ +FFTW_LIBS = @FFTW_LIBS@ +FGREP = @FGREP@ +FILECMD = @FILECMD@ +FXTDIR = @FXTDIR@ +FXT_CFLAGS = @FXT_CFLAGS@ +FXT_LDFLAGS = @FXT_LDFLAGS@ +FXT_LIBS = @FXT_LIBS@ +GDB = @GDB@ +GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ +GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ +GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ +GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ +GOTODIR = @GOTODIR@ +GREP = @GREP@ +HAVE_CXX11 = @HAVE_CXX11@ +HAVE_FFTWFL = @HAVE_FFTWFL@ +HELP2MAN = @HELP2MAN@ +HIPCC = @HIPCC@ +HIPCCFLAGS = @HIPCCFLAGS@ $(am__append_2) +HIPCONFIG = @HIPCONFIG@ +HWLOC_CFLAGS = @HWLOC_CFLAGS@ +HWLOC_LIBS = @HWLOC_LIBS@ +HWLOC_REQUIRES = @HWLOC_REQUIRES@ +ICC = @ICC@ +ICC_ARGS = @ICC_ARGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JULIA = @JULIA@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ +LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ +LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ +LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ +LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ +LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ +LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ +LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ +LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ +LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ +LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ +LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ +LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ +LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ +LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ +LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ +LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ +LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ +LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ +LIBSTARPU_LINK = @LIBSTARPU_LINK@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAGMA_CFLAGS = @MAGMA_CFLAGS@ +MAGMA_LIBS = @MAGMA_LIBS@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPICC_LDFLAGS = @MPICC_LDFLAGS@ +MPICXX = @MPICXX@ +MPIEXEC = @MPIEXEC@ +MPIEXEC_ARGS = @MPIEXEC_ARGS@ +MPIFORT = @MPIFORT@ +MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ +MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ +NM = @NM@ +NMAD_CFLAGS = @NMAD_CFLAGS@ +NMAD_LIBS = @NMAD_LIBS@ +NMEDIT = @NMEDIT@ +NVCC = @NVCC@ +NVCCFLAGS = @NVCCFLAGS@ $(am__append_1) +NVCC_CC = @NVCC_CC@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ +OPENBLAS_LIBS = @OPENBLAS_LIBS@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PAPI_CFLAGS = @PAPI_CFLAGS@ +PAPI_LIBS = @PAPI_LIBS@ +PARALLEL = @PARALLEL@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PKG_CONFIG = @PKG_CONFIG@ +POTI_CFLAGS = @POTI_CFLAGS@ +POTI_LIBS = @POTI_LIBS@ +PROG_CLANG = @PROG_CLANG@ +PROG_DATE = @PROG_DATE@ +PROG_FIND = @PROG_FIND@ +PROG_STAT = @PROG_STAT@ +PYTHON = @PYTHON@ +PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ +PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ +PYTHON_VERSION = @PYTHON_VERSION@ +RANLIB = @RANLIB@ +REALBASH = @REALBASH@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ +SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ +SIMGRID_LIBS = @SIMGRID_LIBS@ +SIMGRID_MC = @SIMGRID_MC@ +SLIC_CONFIG = @SLIC_CONFIG@ +SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ +SOCL_VENDORS = @SOCL_VENDORS@ +STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ +STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ +STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ +STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ +STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ +STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ +STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ +STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ +STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ +STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ +STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ +STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ +STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ +STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ +STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ +STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ +STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ +STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ +STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ +STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ +STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ +STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ +STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ +STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ +STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ +STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ +STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ +STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ +STARPU_LIB_PATH = @STARPU_LIB_PATH@ +STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ +STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ +STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ +STARPU_MS_LIB = @STARPU_MS_LIB@ +STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ +STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ +STARPU_OPENBLAS = @STARPU_OPENBLAS@ +STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ +STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ +STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ +STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ +STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ +STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ +STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ +STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ +STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ +STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ +STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ +STARPU_SRC_DIR = @STARPU_SRC_DIR@ +STARPU_USE_CPU = @STARPU_USE_CPU@ +STARPU_USE_CUDA = @STARPU_USE_CUDA@ +STARPU_USE_FXT = @STARPU_USE_FXT@ +STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ +STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ +STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ +STRIP = @STRIP@ +VERSION = @VERSION@ +XMKMF = @XMKMF@ +X_CFLAGS = @X_CFLAGS@ +X_EXTRA_LIBS = @X_EXTRA_LIBS@ +X_LIBS = @X_LIBS@ +X_PRE_LIBS = @X_PRE_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_ct_F77 = @ac_ct_F77@ +ac_ct_FC = @ac_ct_FC@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +doxygencommand = @doxygencommand@ +dvidir = @dvidir@ +eclipsepath = @eclipsepath@ +epstopdfcommand = @epstopdfcommand@ +exec_prefix = @exec_prefix@ +gitcommand = @gitcommand@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +hwloccalccommand = @hwloccalccommand@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +juliapath = @juliapath@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +mpicc_path = @mpicc_path@ +mpicxx_path = @mpicxx_path@ +mpiexec_path = @mpiexec_path@ +mpifort_path = @mpifort_path@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +pdflatexcommand = @pdflatexcommand@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +runstatedir = @runstatedir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target = @target@ +target_alias = @target_alias@ +target_cpu = @target_cpu@ +target_os = @target_os@ +target_vendor = @target_vendor@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +LAUNCHER_ENV = OMP_NUM_THREADS=4 +LAUNCHER = +AM_CFLAGS = $(GLOBAL_AM_CFLAGS) -Wall -g -fopenmp +AM_CXXFLAGS = $(GLOBAL_AM_CXXFLAGS) +AM_FFLAGS = $(GLOBAL_AM_FFLAGS) +AM_FCFLAGS = $(GLOBAL_AM_FCFLAGS) +@STARPU_USE_CUDA_TRUE@V_nvcc_ = $(V_nvcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_USE_CUDA_TRUE@V_nvcc_0 = @echo " NVCC " $@; +@STARPU_USE_CUDA_TRUE@V_nvcc_1 = +@STARPU_USE_CUDA_TRUE@V_nvcc = $(V_nvcc_$(V)) + +# Avoid using nvcc when making a coverity build, nvcc produces millions of +# lines of code which we don't want to analyze. Instead, build dumb .o files +# containing empty functions. +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_ = $(V_mynvcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_0 = @echo " myNVCC " $@; +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_1 = +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc = $(V_mynvcc_$(V)) +@STARPU_USE_HIP_TRUE@V_hipcc_ = $(V_hipcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_USE_HIP_TRUE@V_hipcc_0 = @echo " HIPCC " $@; +@STARPU_USE_HIP_TRUE@V_hipcc_1 = +@STARPU_USE_HIP_TRUE@V_hipcc = $(V_hipcc_$(V)) +V_icc_ = $(V_icc_$(AM_DEFAULT_VERBOSITY)) +V_icc_0 = @echo " ICC " $@; +V_icc_1 = +V_icc = $(V_icc_$(V)) +V_ln_ = $(V_ln_$(AM_DEFAULT_VERBOSITY)) +V_ln_0 = @echo " LN " $@; +V_ln_1 = +V_ln = $(V_ln_$(V)) +V_help2man_ = $(V_help2man_$(AM_DEFAULT_VERBOSITY)) +V_help2man_0 = @echo " HELP2MAN" $@; +V_help2man_1 = +V_help2man = $(V_help2man_$(V)) +# These are always defined, both for starpu-mpi and for mpi-ms +# For MPI tests we don't want to oversubscribe the system +MPI_RUN_ENV = STARPU_WORKERS_GETBIND=0 STARPU_WORKERS_NOBIND=1 STARPU_NCPU=3 +@STARPU_SIMGRID_FALSE@STARPU_MPIEXEC = $(MPIEXEC) $(MPIEXEC_ARGS) -np $(STARPU_MPI_NP) +@STARPU_SIMGRID_TRUE@STARPU_MPIEXEC = $(abs_top_builddir)/tools/starpu_smpirun -np $(STARPU_MPI_NP) -platform $(abs_top_srcdir)/tools/perfmodels/cluster.xml -hostfile $(abs_top_srcdir)/tools/perfmodels/hostfile +STARPU_OPENMP_LLVM_EXAMPLES = hello-task +AM_CPPFLAGS = +# ideally, we should create a link libomp.so.5 to +# libstarpu_openmp_llvm-@STARPU_EFFECTIVE_VERSION@.so but because it +# is libtool we use LDADD +LDADD = $(top_builddir)/starpu_openmp_llvm/src/libstarpu_openmp_llvm-@STARPU_EFFECTIVE_VERSION@.la +examplebindir = $(libdir)/starpu/examples/starpu_openmp_llvm +exampledir = $(libdir)/starpu/examples/starpu_openmp_llvm +example_DATA = README hello-task.c +EXTRA_DIST = README +all: all-am + +.SUFFIXES: +.SUFFIXES: .c .cu .cubin .hip .lo .log .o .obj .test .test$(EXEEXT) .trs +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/make/starpu-tests.mk $(top_srcdir)/make/starpu.mk $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign starpu_openmp_llvm/examples/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign starpu_openmp_llvm/examples/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; +$(top_srcdir)/make/starpu-tests.mk $(top_srcdir)/make/starpu.mk $(am__empty): + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +clean-checkPROGRAMS: + @list='$(check_PROGRAMS)'; test -n "$$list" || exit 0; \ + echo " rm -f" $$list; \ + rm -f $$list || exit $$?; \ + test -n "$(EXEEXT)" || exit 0; \ + list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ + echo " rm -f" $$list; \ + rm -f $$list +install-examplebinPROGRAMS: $(examplebin_PROGRAMS) + @$(NORMAL_INSTALL) + @list='$(examplebin_PROGRAMS)'; test -n "$(examplebindir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(examplebindir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(examplebindir)" || exit 1; \ + fi; \ + for p in $$list; do echo "$$p $$p"; done | \ + sed 's/$(EXEEXT)$$//' | \ + while read p p1; do if test -f $$p \ + || test -f $$p1 \ + ; then echo "$$p"; echo "$$p"; else :; fi; \ + done | \ + sed -e 'p;s,.*/,,;n;h' \ + -e 's|.*|.|' \ + -e 'p;x;s,.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/' | \ + sed 'N;N;N;s,\n, ,g' | \ + $(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1 } \ + { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \ + if ($$2 == $$4) files[d] = files[d] " " $$1; \ + else { print "f", $$3 "/" $$4, $$1; } } \ + END { for (d in files) print "f", d, files[d] }' | \ + while read type dir files; do \ + if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \ + test -z "$$files" || { \ + echo " $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files '$(DESTDIR)$(examplebindir)$$dir'"; \ + $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files "$(DESTDIR)$(examplebindir)$$dir" || exit $$?; \ + } \ + ; done + +uninstall-examplebinPROGRAMS: + @$(NORMAL_UNINSTALL) + @list='$(examplebin_PROGRAMS)'; test -n "$(examplebindir)" || list=; \ + files=`for p in $$list; do echo "$$p"; done | \ + sed -e 'h;s,^.*/,,;s/$(EXEEXT)$$//;$(transform)' \ + -e 's/$$/$(EXEEXT)/' \ + `; \ + test -n "$$list" || exit 0; \ + echo " ( cd '$(DESTDIR)$(examplebindir)' && rm -f" $$files ")"; \ + cd "$(DESTDIR)$(examplebindir)" && rm -f $$files + +clean-examplebinPROGRAMS: + @list='$(examplebin_PROGRAMS)'; test -n "$$list" || exit 0; \ + echo " rm -f" $$list; \ + rm -f $$list || exit $$?; \ + test -n "$(EXEEXT)" || exit 0; \ + list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ + echo " rm -f" $$list; \ + rm -f $$list + +hello-task$(EXEEXT): $(hello_task_OBJECTS) $(hello_task_DEPENDENCIES) $(EXTRA_hello_task_DEPENDENCIES) + @rm -f hello-task$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(hello_task_OBJECTS) $(hello_task_LDADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hello-task.Po@am__quote@ # am--include-marker + +$(am__depfiles_remade): + @$(MKDIR_P) $(@D) + @echo '# dummy' >$@-t && $(am__mv) $@-t $@ + +am--depfiles: $(am__depfiles_remade) + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ +@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs +install-exampleDATA: $(example_DATA) + @$(NORMAL_INSTALL) + @list='$(example_DATA)'; test -n "$(exampledir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(exampledir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(exampledir)" || exit 1; \ + fi; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; \ + done | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(exampledir)'"; \ + $(INSTALL_DATA) $$files "$(DESTDIR)$(exampledir)" || exit $$?; \ + done + +uninstall-exampleDATA: + @$(NORMAL_UNINSTALL) + @list='$(example_DATA)'; test -n "$(exampledir)" || list=; \ + files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ + dir='$(DESTDIR)$(exampledir)'; $(am__uninstall_files_from_dir) + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-am +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-am + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-am + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +# Recover from deleted '.trs' file; this should ensure that +# "rm -f foo.log; make foo.trs" re-run 'foo.test', and re-create +# both 'foo.log' and 'foo.trs'. Break the recipe in two subshells +# to avoid problems with "make -n". +.log.trs: + rm -f $< $@ + $(MAKE) $(AM_MAKEFLAGS) $< + +# Leading 'am--fnord' is there to ensure the list of targets does not +# expand to empty, as could happen e.g. with make check TESTS=''. +am--fnord $(TEST_LOGS) $(TEST_LOGS:.log=.trs): $(am__force_recheck) +am--force-recheck: + @: + +$(TEST_SUITE_LOG): $(TEST_LOGS) + @$(am__set_TESTS_bases); \ + am__f_ok () { test -f "$$1" && test -r "$$1"; }; \ + redo_bases=`for i in $$bases; do \ + am__f_ok $$i.trs && am__f_ok $$i.log || echo $$i; \ + done`; \ + if test -n "$$redo_bases"; then \ + redo_logs=`for i in $$redo_bases; do echo $$i.log; done`; \ + redo_results=`for i in $$redo_bases; do echo $$i.trs; done`; \ + if $(am__make_dryrun); then :; else \ + rm -f $$redo_logs && rm -f $$redo_results || exit 1; \ + fi; \ + fi; \ + if test -n "$$am__remaking_logs"; then \ + echo "fatal: making $(TEST_SUITE_LOG): possible infinite" \ + "recursion detected" >&2; \ + elif test -n "$$redo_logs"; then \ + am__remaking_logs=yes $(MAKE) $(AM_MAKEFLAGS) $$redo_logs; \ + fi; \ + if $(am__make_dryrun); then :; else \ + st=0; \ + errmsg="fatal: making $(TEST_SUITE_LOG): failed to create"; \ + for i in $$redo_bases; do \ + test -f $$i.trs && test -r $$i.trs \ + || { echo "$$errmsg $$i.trs" >&2; st=1; }; \ + test -f $$i.log && test -r $$i.log \ + || { echo "$$errmsg $$i.log" >&2; st=1; }; \ + done; \ + test $$st -eq 0 || exit 1; \ + fi + @$(am__sh_e_setup); $(am__tty_colors); $(am__set_TESTS_bases); \ + ws='[ ]'; \ + results=`for b in $$bases; do echo $$b.trs; done`; \ + test -n "$$results" || results=/dev/null; \ + all=` grep "^$$ws*:test-result:" $$results | wc -l`; \ + pass=` grep "^$$ws*:test-result:$$ws*PASS" $$results | wc -l`; \ + fail=` grep "^$$ws*:test-result:$$ws*FAIL" $$results | wc -l`; \ + skip=` grep "^$$ws*:test-result:$$ws*SKIP" $$results | wc -l`; \ + xfail=`grep "^$$ws*:test-result:$$ws*XFAIL" $$results | wc -l`; \ + xpass=`grep "^$$ws*:test-result:$$ws*XPASS" $$results | wc -l`; \ + error=`grep "^$$ws*:test-result:$$ws*ERROR" $$results | wc -l`; \ + if test `expr $$fail + $$xpass + $$error` -eq 0; then \ + success=true; \ + else \ + success=false; \ + fi; \ + br='==================='; br=$$br$$br$$br$$br; \ + result_count () \ + { \ + if test x"$$1" = x"--maybe-color"; then \ + maybe_colorize=yes; \ + elif test x"$$1" = x"--no-color"; then \ + maybe_colorize=no; \ + else \ + echo "$@: invalid 'result_count' usage" >&2; exit 4; \ + fi; \ + shift; \ + desc=$$1 count=$$2; \ + if test $$maybe_colorize = yes && test $$count -gt 0; then \ + color_start=$$3 color_end=$$std; \ + else \ + color_start= color_end=; \ + fi; \ + echo "$${color_start}# $$desc $$count$${color_end}"; \ + }; \ + create_testsuite_report () \ + { \ + result_count $$1 "TOTAL:" $$all "$$brg"; \ + result_count $$1 "PASS: " $$pass "$$grn"; \ + result_count $$1 "SKIP: " $$skip "$$blu"; \ + result_count $$1 "XFAIL:" $$xfail "$$lgn"; \ + result_count $$1 "FAIL: " $$fail "$$red"; \ + result_count $$1 "XPASS:" $$xpass "$$red"; \ + result_count $$1 "ERROR:" $$error "$$mgn"; \ + }; \ + { \ + echo "$(PACKAGE_STRING): $(subdir)/$(TEST_SUITE_LOG)" | \ + $(am__rst_title); \ + create_testsuite_report --no-color; \ + echo; \ + echo ".. contents:: :depth: 2"; \ + echo; \ + for b in $$bases; do echo $$b; done \ + | $(am__create_global_log); \ + } >$(TEST_SUITE_LOG).tmp || exit 1; \ + mv $(TEST_SUITE_LOG).tmp $(TEST_SUITE_LOG); \ + if $$success; then \ + col="$$grn"; \ + else \ + col="$$red"; \ + test x"$$VERBOSE" = x || cat $(TEST_SUITE_LOG); \ + fi; \ + echo "$${col}$$br$${std}"; \ + echo "$${col}Testsuite summary"$(AM_TESTSUITE_SUMMARY_HEADER)"$${std}"; \ + echo "$${col}$$br$${std}"; \ + create_testsuite_report --maybe-color; \ + echo "$$col$$br$$std"; \ + if $$success; then :; else \ + echo "$${col}See $(subdir)/$(TEST_SUITE_LOG)$${std}"; \ + if test -n "$(PACKAGE_BUGREPORT)"; then \ + echo "$${col}Please report to $(PACKAGE_BUGREPORT)$${std}"; \ + fi; \ + echo "$$col$$br$$std"; \ + fi; \ + $$success || exit 1 + +check-TESTS: $(check_PROGRAMS) + @list='$(RECHECK_LOGS)'; test -z "$$list" || rm -f $$list + @list='$(RECHECK_LOGS:.log=.trs)'; test -z "$$list" || rm -f $$list + @test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) + @set +e; $(am__set_TESTS_bases); \ + log_list=`for i in $$bases; do echo $$i.log; done`; \ + trs_list=`for i in $$bases; do echo $$i.trs; done`; \ + log_list=`echo $$log_list`; trs_list=`echo $$trs_list`; \ + $(MAKE) $(AM_MAKEFLAGS) $(TEST_SUITE_LOG) TEST_LOGS="$$log_list"; \ + exit $$?; +recheck: all $(check_PROGRAMS) + @test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) + @set +e; $(am__set_TESTS_bases); \ + bases=`for i in $$bases; do echo $$i; done \ + | $(am__list_recheck_tests)` || exit 1; \ + log_list=`for i in $$bases; do echo $$i.log; done`; \ + log_list=`echo $$log_list`; \ + $(MAKE) $(AM_MAKEFLAGS) $(TEST_SUITE_LOG) \ + am__force_recheck=am--force-recheck \ + TEST_LOGS="$$log_list"; \ + exit $$? +hello-task.log: hello-task$(EXEEXT) + @p='hello-task$(EXEEXT)'; \ + b='hello-task'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +.test.log: + @p='$<'; \ + $(am__set_b); \ + $(am__check_pre) $(TEST_LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_TEST_LOG_DRIVER_FLAGS) $(TEST_LOG_DRIVER_FLAGS) -- $(TEST_LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +@am__EXEEXT_TRUE@.test$(EXEEXT).log: +@am__EXEEXT_TRUE@ @p='$<'; \ +@am__EXEEXT_TRUE@ $(am__set_b); \ +@am__EXEEXT_TRUE@ $(am__check_pre) $(TEST_LOG_DRIVER) --test-name "$$f" \ +@am__EXEEXT_TRUE@ --log-file $$b.log --trs-file $$b.trs \ +@am__EXEEXT_TRUE@ $(am__common_driver_flags) $(AM_TEST_LOG_DRIVER_FLAGS) $(TEST_LOG_DRIVER_FLAGS) -- $(TEST_LOG_COMPILE) \ +@am__EXEEXT_TRUE@ "$$tst" $(AM_TESTS_FD_REDIRECT) +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am + $(MAKE) $(AM_MAKEFLAGS) $(check_PROGRAMS) + $(MAKE) $(AM_MAKEFLAGS) check-TESTS +check: check-am +all-am: Makefile $(PROGRAMS) $(DATA) +installdirs: + for dir in "$(DESTDIR)$(examplebindir)" "$(DESTDIR)$(exampledir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + -test -z "$(TEST_LOGS)" || rm -f $(TEST_LOGS) + -test -z "$(TEST_LOGS:.log=.trs)" || rm -f $(TEST_LOGS:.log=.trs) + -test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-checkPROGRAMS clean-examplebinPROGRAMS clean-generic \ + clean-libtool mostlyclean-am + +distclean: distclean-am + -rm -f ./$(DEPDIR)/hello-task.Po + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: install-exampleDATA install-examplebinPROGRAMS + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f ./$(DEPDIR)/hello-task.Po + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-exampleDATA uninstall-examplebinPROGRAMS + +.MAKE: check-am install-am install-strip + +.PHONY: CTAGS GTAGS TAGS all all-am am--depfiles check check-TESTS \ + check-am clean clean-checkPROGRAMS clean-examplebinPROGRAMS \ + clean-generic clean-libtool cscopelist-am ctags ctags-am \ + distclean distclean-compile distclean-generic \ + distclean-libtool distclean-tags distdir dvi dvi-am html \ + html-am info info-am install install-am install-data \ + install-data-am install-dvi install-dvi-am install-exampleDATA \ + install-examplebinPROGRAMS install-exec install-exec-am \ + install-html install-html-am install-info install-info-am \ + install-man install-pdf install-pdf-am install-ps \ + install-ps-am install-strip installcheck installcheck-am \ + installdirs maintainer-clean maintainer-clean-generic \ + mostlyclean mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool pdf pdf-am ps ps-am recheck tags tags-am \ + uninstall uninstall-am uninstall-exampleDATA \ + uninstall-examplebinPROGRAMS + +.PRECIOUS: Makefile + +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@.cu.o: +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ @$(MKDIR_P) `dirname $@` +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ $(V_mynvcc)grep 'extern *"C" *void *' $< | sed -ne 's/extern *"C" *void *\([a-zA-Z0-9_]*\) *(.*/void \1(void) {}/p' | $(CC) -x c - -o $@ -c + +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.cubin: +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) -cubin $< -o $@ $(NVCCFLAGS) + +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.o: +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) $< -c -o $@ $(NVCCFLAGS) +@STARPU_USE_HIP_TRUE@.hip.o: +@STARPU_USE_HIP_TRUE@ $(V_hipcc) $(HIPCC) $< -c -o $@ $(HIPCCFLAGS) + +STARPU_MPI_NP ?= 4 + +showcheckfailed: + @ for x in $(shell grep -l "^FAIL " $(TEST_LOGS) /dev/null 2>/dev/null) ; do cat $$x ; done + @RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i showcheckfailed || RET=1 ; \ + done ; \ + exit $$RET + +showfailed: + @! grep "^FAIL " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "ERROR: AddressSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "WARNING: AddressSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "ERROR: ThreadSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "WARNING: ThreadSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "ERROR: LeakSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "WARNING: LeakSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l " runtime error: " $(TEST_LOGS) /dev/null 2>/dev/null + @RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -s -C $$i showfailed || RET=1 ; \ + done ; \ + exit $$RET + +showcheck: + -cat $(TEST_LOGS) /dev/null + @! grep -q "ERROR: AddressSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q "WARNING: AddressSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q "ERROR: ThreadSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q "WARNING: ThreadSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q "ERROR: LeakSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q "WARNING: LeakSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q " runtime error: " $(TEST_LOGS) /dev/null + RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i showcheck || RET=1 ; \ + done ; \ + exit $$RET + +showsuite: + -cat $(TEST_SUITE_LOG) /dev/null + @! grep -q "ERROR: AddressSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q "WARNING: AddressSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q "ERROR: ThreadSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q "WARNING: ThreadSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q "ERROR: LeakSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q "WARNING: LeakSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q " runtime error: " $(TEST_SUITE_LOG) /dev/null + RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i showsuite || RET=1 ; \ + done ; \ + exit $$RET + +@STARPU_SIMGRID_TRUE@export STARPU_PERF_MODEL_DIR=$(abs_top_srcdir)/tools/perfmodels/sampling +@STARPU_SIMGRID_TRUE@export STARPU_HOSTNAME=mirage +@STARPU_SIMGRID_TRUE@export MALLOC_PERTURB_=0 + +@STARPU_SIMGRID_TRUE@env: +@STARPU_SIMGRID_TRUE@ @echo export STARPU_PERF_MODEL_DIR=$(STARPU_PERF_MODEL_DIR) +@STARPU_SIMGRID_TRUE@ @echo export STARPU_HOSTNAME=$(STARPU_HOSTNAME) +@STARPU_SIMGRID_TRUE@ @echo export MALLOC_PERTURB_=$(MALLOC_PERTURB_) + +@STARPU_SIMGRID_TRUE@export STARPU_SIMGRID=1 + +@STARPU_QUICK_CHECK_TRUE@export STARPU_QUICK_CHECK=1 + +@STARPU_LONG_CHECK_TRUE@export STARPU_LONG_CHECK=1 + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/starpu_openmp_llvm/examples/README b/starpu_openmp_llvm/examples/README new file mode 100644 index 0000000..f16ef89 --- /dev/null +++ b/starpu_openmp_llvm/examples/README @@ -0,0 +1,46 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +To compile and execute this application outside StarPU : + +$ clang -fopenmp ./hello-task.c +$ ldd ./a.out +... + libomp.so.5 => /usr/lib/x86_64-linux-gnu/libomp.so.5 (0x00007fbf3d42d000) +... +$ ./a.out +Hello from 0 +Hey 0 +Hey 1 +Hey 2 +Hey 3 +Hey there +array: 1, 1, 1, 1, + +To execute the application using the StarPU OpenMP LLVM support, one just needs it to create a symbolic link named libomp.so.5 to the StarPU OpenMP LLVM library, e.g + +$ mkdir libs +$ ln -s $STARPU_ROOT/lib/libstarpu_openmp_llvm-1.3.so libs/libomp.so.5 +$ LD_LIBRARY_PATH=./libs:$LD_LIBRARY_PATH ./a.out +[starpu][__kmp_constructor] Initialising the StarPU OpenMP LLVM Support +Hello from 0 +[starpu][__kmpc_omp_task_with_deps] Using the StarPU OpenMP LLVM Support +Hey 0 +Hey 1 +Hey 2 +Hey 3 +Hey there +array: 1, 1, 1, 1, diff --git a/starpu_openmp_llvm/examples/hello-task.c b/starpu_openmp_llvm/examples/hello-task.c new file mode 100644 index 0000000..945e24f --- /dev/null +++ b/starpu_openmp_llvm/examples/hello-task.c @@ -0,0 +1,60 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2018-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include + +int array[] = {1, 2, 3, 4}; + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +int main() +{ + int res=0; +#pragma omp parallel +#pragma omp master + { + FPRINTF(stderr, "Hello from %i\n", omp_get_thread_num()); +#pragma omp task + { + sleep(2); + FPRINTF(stderr, "Hey there\n"); + } + for (int i = 0; i < 4; i++) + { +#pragma omp task depend(in: array[i]) depend(inout: array[(i+1)%4]) + { + array[(i+1)%4] = array[i]; + FPRINTF(stderr, "Hey %i\n", i); + } + } + } + FPRINTF(stderr, "array: "); + for (int i = 0; i < 4; i++) + { + FPRINTF(stderr, "%i, ", array[i]); + if (array[i] != 1) + { + FPRINTF(stderr, "\n"); + FPRINTF(stderr, "Incorrect value. Should be 1\n"); + res = 1; + } + } + FPRINTF(stderr, "\n"); + return res; +} diff --git a/starpu_openmp_llvm/src/Makefile.am b/starpu_openmp_llvm/src/Makefile.am new file mode 100644 index 0000000..a151002 --- /dev/null +++ b/starpu_openmp_llvm/src/Makefile.am @@ -0,0 +1,38 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +include $(top_srcdir)/make/starpu-notests.mk + +SUBDIRS = + +CLEANFILES = *.gcno *.gcda *.linkinfo + +AM_CFLAGS += $(FXT_CFLAGS) +AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_srcdir)/src/ -I$(top_builddir)/src -I$(top_builddir)/include $(STARPU_H_CPPFLAGS) +LIBS += $(top_builddir)/src/@LIBSTARPU_LINK@ $(STARPU_EXPORTED_LIBS) +LIBS += $(FXT_LDFLAGS) $(FXT_LIBS) + +libstarpu_openmp_llvm_so_version = $(LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT):$(LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION):$(LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE) + +lib_LTLIBRARIES = libstarpu_openmp_llvm-@STARPU_EFFECTIVE_VERSION@.la + +libstarpu_openmp_llvm_@STARPU_EFFECTIVE_VERSION@_la_CPPFLAGS = $(AM_CPPFLAGS) +libstarpu_openmp_llvm_@STARPU_EFFECTIVE_VERSION@_la_CFLAGS = $(AM_CFLAGS) +libstarpu_openmp_llvm_@STARPU_EFFECTIVE_VERSION@_la_LDFLAGS = $(AM_LDFLAGS) -no-undefined -version-info $(libstarpu_openmp_llvm_so_version) +libstarpu_openmp_llvm_@STARPU_EFFECTIVE_VERSION@_la_LIBADD = $(AM_LIBADD) $(top_builddir)/src/libstarpu-@STARPU_EFFECTIVE_VERSION@.la $(HWLOC_LIBS) +libstarpu_openmp_llvm_@STARPU_EFFECTIVE_VERSION@_la_SOURCES = \ + openmp_runtime_support_llvm.c + diff --git a/starpu_openmp_llvm/src/Makefile.in b/starpu_openmp_llvm/src/Makefile.in new file mode 100644 index 0000000..1ab9914 --- /dev/null +++ b/starpu_openmp_llvm/src/Makefile.in @@ -0,0 +1,1111 @@ +# Makefile.in generated by automake 1.16.5 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2021 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +target_triplet = @target@ +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@am__append_1 = --compiler-options -fno-strict-aliasing -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ $(STARPU_NVCC_H_CPPFLAGS) +@STARPU_USE_HIP_TRUE@am__append_2 = -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ +subdir = starpu_openmp_llvm/src +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ + $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ + $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ + $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/src/common/config.h \ + $(top_builddir)/src/common/config-src-build.h \ + $(top_builddir)/include/starpu_config.h \ + $(top_builddir)/starpurm/include/starpurm_config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__installdirs = "$(DESTDIR)$(libdir)" +LTLIBRARIES = $(lib_LTLIBRARIES) +am__DEPENDENCIES_1 = +libstarpu_openmp_llvm_@STARPU_EFFECTIVE_VERSION@_la_DEPENDENCIES = \ + $(top_builddir)/src/libstarpu-@STARPU_EFFECTIVE_VERSION@.la \ + $(am__DEPENDENCIES_1) +am_libstarpu_openmp_llvm_@STARPU_EFFECTIVE_VERSION@_la_OBJECTS = libstarpu_openmp_llvm_@STARPU_EFFECTIVE_VERSION@_la-openmp_runtime_support_llvm.lo +libstarpu_openmp_llvm_@STARPU_EFFECTIVE_VERSION@_la_OBJECTS = $(am_libstarpu_openmp_llvm_@STARPU_EFFECTIVE_VERSION@_la_OBJECTS) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +libstarpu_openmp_llvm_@STARPU_EFFECTIVE_VERSION@_la_LINK = $(LIBTOOL) \ + $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ + --mode=link $(CCLD) \ + $(libstarpu_openmp_llvm_@STARPU_EFFECTIVE_VERSION@_la_CFLAGS) \ + $(CFLAGS) \ + $(libstarpu_openmp_llvm_@STARPU_EFFECTIVE_VERSION@_la_LDFLAGS) \ + $(LDFLAGS) -o $@ +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)/src/common -I$(top_builddir)/include -I$(top_builddir)/starpurm/include +depcomp = $(SHELL) $(top_srcdir)/build-aux/depcomp +am__maybe_remake_depfiles = depfiles +am__depfiles_remade = ./$(DEPDIR)/libstarpu_openmp_llvm_@STARPU_EFFECTIVE_VERSION@_la-openmp_runtime_support_llvm.Plo +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(libstarpu_openmp_llvm_@STARPU_EFFECTIVE_VERSION@_la_SOURCES) +DIST_SOURCES = $(libstarpu_openmp_llvm_@STARPU_EFFECTIVE_VERSION@_la_SOURCES) +RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \ + ctags-recursive dvi-recursive html-recursive info-recursive \ + install-data-recursive install-dvi-recursive \ + install-exec-recursive install-html-recursive \ + install-info-recursive install-pdf-recursive \ + install-ps-recursive install-recursive installcheck-recursive \ + installdirs-recursive pdf-recursive ps-recursive \ + tags-recursive uninstall-recursive +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ + distclean-recursive maintainer-clean-recursive +am__recursive_targets = \ + $(RECURSIVE_TARGETS) \ + $(RECURSIVE_CLEAN_TARGETS) \ + $(am__extra_recursive_targets) +AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \ + distdir distdir-am +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +DIST_SUBDIRS = $(SUBDIRS) +am__DIST_COMMON = $(srcdir)/Makefile.in \ + $(top_srcdir)/build-aux/depcomp \ + $(top_srcdir)/make/starpu-notests.mk \ + $(top_srcdir)/make/starpu.mk +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +am__relativize = \ + dir0=`pwd`; \ + sed_first='s,^\([^/]*\)/.*$$,\1,'; \ + sed_rest='s,^[^/]*/*,,'; \ + sed_last='s,^.*/\([^/]*\)$$,\1,'; \ + sed_butlast='s,/*[^/]*$$,,'; \ + while test -n "$$dir1"; do \ + first=`echo "$$dir1" | sed -e "$$sed_first"`; \ + if test "$$first" != "."; then \ + if test "$$first" = ".."; then \ + dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ + dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ + else \ + first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ + if test "$$first2" = "$$first"; then \ + dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ + else \ + dir2="../$$dir2"; \ + fi; \ + dir0="$$dir0"/"$$first"; \ + fi; \ + fi; \ + dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ + done; \ + reldir="$$dir2" +pkglibdir = @pkglibdir@ +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +APP_CFLAGS = @APP_CFLAGS@ +APP_CXXFLAGS = @APP_CXXFLAGS@ +APP_FCFLAGS = @APP_FCFLAGS@ +APP_FFLAGS = @APP_FFLAGS@ +AR = @AR@ +AS = @AS@ +ATLASDIR = @ATLASDIR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BLAS_LIB = @BLAS_LIB@ +BLAS_LIBS = @BLAS_LIBS@ +BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ +BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CC_OR_MPICC = @CC_OR_MPICC@ +CC_OR_NVCC = @CC_OR_NVCC@ +CFLAGS = @CFLAGS@ +COVERAGE = @COVERAGE@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CSCOPE = @CSCOPE@ +CTAGS = @CTAGS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DGELS_LIBS = @DGELS_LIBS@ +DLB_CFLAGS = @DLB_CFLAGS@ +DLB_LIBS = @DLB_LIBS@ +DLLTOOL = @DLLTOOL@ +DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +ECLIPSE = @ECLIPSE@ +EGREP = @EGREP@ +ETAGS = @ETAGS@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FC = @FC@ +FCFLAGS = @FCFLAGS@ +FFLAGS = @FFLAGS@ +FFTWF_CFLAGS = @FFTWF_CFLAGS@ +FFTWF_LIBS = @FFTWF_LIBS@ +FFTWL_CFLAGS = @FFTWL_CFLAGS@ +FFTWL_LIBS = @FFTWL_LIBS@ +FFTW_CFLAGS = @FFTW_CFLAGS@ +FFTW_LIBS = @FFTW_LIBS@ +FGREP = @FGREP@ +FILECMD = @FILECMD@ +FXTDIR = @FXTDIR@ +FXT_CFLAGS = @FXT_CFLAGS@ +FXT_LDFLAGS = @FXT_LDFLAGS@ +FXT_LIBS = @FXT_LIBS@ +GDB = @GDB@ +GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ +GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ +GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ +GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ +GOTODIR = @GOTODIR@ +GREP = @GREP@ +HAVE_CXX11 = @HAVE_CXX11@ +HAVE_FFTWFL = @HAVE_FFTWFL@ +HELP2MAN = @HELP2MAN@ +HIPCC = @HIPCC@ +HIPCCFLAGS = @HIPCCFLAGS@ $(am__append_2) +HIPCONFIG = @HIPCONFIG@ +HWLOC_CFLAGS = @HWLOC_CFLAGS@ +HWLOC_LIBS = @HWLOC_LIBS@ +HWLOC_REQUIRES = @HWLOC_REQUIRES@ +ICC = @ICC@ +ICC_ARGS = @ICC_ARGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JULIA = @JULIA@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ $(top_builddir)/src/@LIBSTARPU_LINK@ \ + $(STARPU_EXPORTED_LIBS) $(FXT_LDFLAGS) $(FXT_LIBS) +LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ +LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ +LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ +LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ +LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ +LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ +LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ +LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ +LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ +LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ +LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ +LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ +LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ +LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ +LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ +LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ +LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ +LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ +LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ +LIBSTARPU_LINK = @LIBSTARPU_LINK@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAGMA_CFLAGS = @MAGMA_CFLAGS@ +MAGMA_LIBS = @MAGMA_LIBS@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPICC_LDFLAGS = @MPICC_LDFLAGS@ +MPICXX = @MPICXX@ +MPIEXEC = @MPIEXEC@ +MPIEXEC_ARGS = @MPIEXEC_ARGS@ +MPIFORT = @MPIFORT@ +MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ +MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ +NM = @NM@ +NMAD_CFLAGS = @NMAD_CFLAGS@ +NMAD_LIBS = @NMAD_LIBS@ +NMEDIT = @NMEDIT@ +NVCC = @NVCC@ +NVCCFLAGS = @NVCCFLAGS@ $(am__append_1) +NVCC_CC = @NVCC_CC@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ +OPENBLAS_LIBS = @OPENBLAS_LIBS@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PAPI_CFLAGS = @PAPI_CFLAGS@ +PAPI_LIBS = @PAPI_LIBS@ +PARALLEL = @PARALLEL@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PKG_CONFIG = @PKG_CONFIG@ +POTI_CFLAGS = @POTI_CFLAGS@ +POTI_LIBS = @POTI_LIBS@ +PROG_CLANG = @PROG_CLANG@ +PROG_DATE = @PROG_DATE@ +PROG_FIND = @PROG_FIND@ +PROG_STAT = @PROG_STAT@ +PYTHON = @PYTHON@ +PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ +PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ +PYTHON_VERSION = @PYTHON_VERSION@ +RANLIB = @RANLIB@ +REALBASH = @REALBASH@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ +SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ +SIMGRID_LIBS = @SIMGRID_LIBS@ +SIMGRID_MC = @SIMGRID_MC@ +SLIC_CONFIG = @SLIC_CONFIG@ +SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ +SOCL_VENDORS = @SOCL_VENDORS@ +STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ +STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ +STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ +STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ +STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ +STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ +STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ +STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ +STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ +STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ +STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ +STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ +STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ +STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ +STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ +STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ +STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ +STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ +STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ +STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ +STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ +STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ +STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ +STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ +STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ +STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ +STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ +STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ +STARPU_LIB_PATH = @STARPU_LIB_PATH@ +STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ +STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ +STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ +STARPU_MS_LIB = @STARPU_MS_LIB@ +STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ +STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ +STARPU_OPENBLAS = @STARPU_OPENBLAS@ +STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ +STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ +STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ +STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ +STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ +STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ +STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ +STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ +STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ +STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ +STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ +STARPU_SRC_DIR = @STARPU_SRC_DIR@ +STARPU_USE_CPU = @STARPU_USE_CPU@ +STARPU_USE_CUDA = @STARPU_USE_CUDA@ +STARPU_USE_FXT = @STARPU_USE_FXT@ +STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ +STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ +STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ +STRIP = @STRIP@ +VERSION = @VERSION@ +XMKMF = @XMKMF@ +X_CFLAGS = @X_CFLAGS@ +X_EXTRA_LIBS = @X_EXTRA_LIBS@ +X_LIBS = @X_LIBS@ +X_PRE_LIBS = @X_PRE_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_ct_F77 = @ac_ct_F77@ +ac_ct_FC = @ac_ct_FC@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +doxygencommand = @doxygencommand@ +dvidir = @dvidir@ +eclipsepath = @eclipsepath@ +epstopdfcommand = @epstopdfcommand@ +exec_prefix = @exec_prefix@ +gitcommand = @gitcommand@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +hwloccalccommand = @hwloccalccommand@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +juliapath = @juliapath@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +mpicc_path = @mpicc_path@ +mpicxx_path = @mpicxx_path@ +mpiexec_path = @mpiexec_path@ +mpifort_path = @mpifort_path@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +pdflatexcommand = @pdflatexcommand@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +runstatedir = @runstatedir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target = @target@ +target_alias = @target_alias@ +target_cpu = @target_cpu@ +target_os = @target_os@ +target_vendor = @target_vendor@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +AM_CFLAGS = $(GLOBAL_AM_CFLAGS) $(FXT_CFLAGS) +AM_CXXFLAGS = $(GLOBAL_AM_CXXFLAGS) +AM_FFLAGS = $(GLOBAL_AM_FFLAGS) +AM_FCFLAGS = $(GLOBAL_AM_FCFLAGS) +@STARPU_USE_CUDA_TRUE@V_nvcc_ = $(V_nvcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_USE_CUDA_TRUE@V_nvcc_0 = @echo " NVCC " $@; +@STARPU_USE_CUDA_TRUE@V_nvcc_1 = +@STARPU_USE_CUDA_TRUE@V_nvcc = $(V_nvcc_$(V)) + +# Avoid using nvcc when making a coverity build, nvcc produces millions of +# lines of code which we don't want to analyze. Instead, build dumb .o files +# containing empty functions. +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_ = $(V_mynvcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_0 = @echo " myNVCC " $@; +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_1 = +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc = $(V_mynvcc_$(V)) +@STARPU_USE_HIP_TRUE@V_hipcc_ = $(V_hipcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_USE_HIP_TRUE@V_hipcc_0 = @echo " HIPCC " $@; +@STARPU_USE_HIP_TRUE@V_hipcc_1 = +@STARPU_USE_HIP_TRUE@V_hipcc = $(V_hipcc_$(V)) +V_icc_ = $(V_icc_$(AM_DEFAULT_VERBOSITY)) +V_icc_0 = @echo " ICC " $@; +V_icc_1 = +V_icc = $(V_icc_$(V)) +V_ln_ = $(V_ln_$(AM_DEFAULT_VERBOSITY)) +V_ln_0 = @echo " LN " $@; +V_ln_1 = +V_ln = $(V_ln_$(V)) +V_help2man_ = $(V_help2man_$(AM_DEFAULT_VERBOSITY)) +V_help2man_0 = @echo " HELP2MAN" $@; +V_help2man_1 = +V_help2man = $(V_help2man_$(V)) +SUBDIRS = +CLEANFILES = *.gcno *.gcda *.linkinfo +AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_srcdir)/src/ -I$(top_builddir)/src -I$(top_builddir)/include $(STARPU_H_CPPFLAGS) +libstarpu_openmp_llvm_so_version = $(LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT):$(LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION):$(LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE) +lib_LTLIBRARIES = libstarpu_openmp_llvm-@STARPU_EFFECTIVE_VERSION@.la +libstarpu_openmp_llvm_@STARPU_EFFECTIVE_VERSION@_la_CPPFLAGS = $(AM_CPPFLAGS) +libstarpu_openmp_llvm_@STARPU_EFFECTIVE_VERSION@_la_CFLAGS = $(AM_CFLAGS) +libstarpu_openmp_llvm_@STARPU_EFFECTIVE_VERSION@_la_LDFLAGS = $(AM_LDFLAGS) -no-undefined -version-info $(libstarpu_openmp_llvm_so_version) +libstarpu_openmp_llvm_@STARPU_EFFECTIVE_VERSION@_la_LIBADD = $(AM_LIBADD) $(top_builddir)/src/libstarpu-@STARPU_EFFECTIVE_VERSION@.la $(HWLOC_LIBS) +libstarpu_openmp_llvm_@STARPU_EFFECTIVE_VERSION@_la_SOURCES = \ + openmp_runtime_support_llvm.c + +all: all-recursive + +.SUFFIXES: +.SUFFIXES: .c .cu .cubin .hip .lo .o .obj +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/make/starpu-notests.mk $(top_srcdir)/make/starpu.mk $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign starpu_openmp_llvm/src/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign starpu_openmp_llvm/src/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; +$(top_srcdir)/make/starpu-notests.mk $(top_srcdir)/make/starpu.mk $(am__empty): + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +install-libLTLIBRARIES: $(lib_LTLIBRARIES) + @$(NORMAL_INSTALL) + @list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \ + list2=; for p in $$list; do \ + if test -f $$p; then \ + list2="$$list2 $$p"; \ + else :; fi; \ + done; \ + test -z "$$list2" || { \ + echo " $(MKDIR_P) '$(DESTDIR)$(libdir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(libdir)" || exit 1; \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(libdir)'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(libdir)"; \ + } + +uninstall-libLTLIBRARIES: + @$(NORMAL_UNINSTALL) + @list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \ + for p in $$list; do \ + $(am__strip_dir) \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(libdir)/$$f'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(libdir)/$$f"; \ + done + +clean-libLTLIBRARIES: + -test -z "$(lib_LTLIBRARIES)" || rm -f $(lib_LTLIBRARIES) + @list='$(lib_LTLIBRARIES)'; \ + locs=`for p in $$list; do echo $$p; done | \ + sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ + sort -u`; \ + test -z "$$locs" || { \ + echo rm -f $${locs}; \ + rm -f $${locs}; \ + } + +libstarpu_openmp_llvm-@STARPU_EFFECTIVE_VERSION@.la: $(libstarpu_openmp_llvm_@STARPU_EFFECTIVE_VERSION@_la_OBJECTS) $(libstarpu_openmp_llvm_@STARPU_EFFECTIVE_VERSION@_la_DEPENDENCIES) $(EXTRA_libstarpu_openmp_llvm_@STARPU_EFFECTIVE_VERSION@_la_DEPENDENCIES) + $(AM_V_CCLD)$(libstarpu_openmp_llvm_@STARPU_EFFECTIVE_VERSION@_la_LINK) -rpath $(libdir) $(libstarpu_openmp_llvm_@STARPU_EFFECTIVE_VERSION@_la_OBJECTS) $(libstarpu_openmp_llvm_@STARPU_EFFECTIVE_VERSION@_la_LIBADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libstarpu_openmp_llvm_@STARPU_EFFECTIVE_VERSION@_la-openmp_runtime_support_llvm.Plo@am__quote@ # am--include-marker + +$(am__depfiles_remade): + @$(MKDIR_P) $(@D) + @echo '# dummy' >$@-t && $(am__mv) $@-t $@ + +am--depfiles: $(am__depfiles_remade) + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ +@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +libstarpu_openmp_llvm_@STARPU_EFFECTIVE_VERSION@_la-openmp_runtime_support_llvm.lo: openmp_runtime_support_llvm.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libstarpu_openmp_llvm_@STARPU_EFFECTIVE_VERSION@_la_CPPFLAGS) $(CPPFLAGS) $(libstarpu_openmp_llvm_@STARPU_EFFECTIVE_VERSION@_la_CFLAGS) $(CFLAGS) -MT libstarpu_openmp_llvm_@STARPU_EFFECTIVE_VERSION@_la-openmp_runtime_support_llvm.lo -MD -MP -MF $(DEPDIR)/libstarpu_openmp_llvm_@STARPU_EFFECTIVE_VERSION@_la-openmp_runtime_support_llvm.Tpo -c -o libstarpu_openmp_llvm_@STARPU_EFFECTIVE_VERSION@_la-openmp_runtime_support_llvm.lo `test -f 'openmp_runtime_support_llvm.c' || echo '$(srcdir)/'`openmp_runtime_support_llvm.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libstarpu_openmp_llvm_@STARPU_EFFECTIVE_VERSION@_la-openmp_runtime_support_llvm.Tpo $(DEPDIR)/libstarpu_openmp_llvm_@STARPU_EFFECTIVE_VERSION@_la-openmp_runtime_support_llvm.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='openmp_runtime_support_llvm.c' object='libstarpu_openmp_llvm_@STARPU_EFFECTIVE_VERSION@_la-openmp_runtime_support_llvm.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libstarpu_openmp_llvm_@STARPU_EFFECTIVE_VERSION@_la_CPPFLAGS) $(CPPFLAGS) $(libstarpu_openmp_llvm_@STARPU_EFFECTIVE_VERSION@_la_CFLAGS) $(CFLAGS) -c -o libstarpu_openmp_llvm_@STARPU_EFFECTIVE_VERSION@_la-openmp_runtime_support_llvm.lo `test -f 'openmp_runtime_support_llvm.c' || echo '$(srcdir)/'`openmp_runtime_support_llvm.c + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +# This directory's subdirectories are mostly independent; you can cd +# into them and run 'make' without going through this Makefile. +# To change the values of 'make' variables: instead of editing Makefiles, +# (1) if the variable is set in 'config.status', edit 'config.status' +# (which will cause the Makefiles to be regenerated when you run 'make'); +# (2) otherwise, pass the desired values on the 'make' command line. +$(am__recursive_targets): + @fail=; \ + if $(am__make_keepgoing); then \ + failcom='fail=yes'; \ + else \ + failcom='exit 1'; \ + fi; \ + dot_seen=no; \ + target=`echo $@ | sed s/-recursive//`; \ + case "$@" in \ + distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ + *) list='$(SUBDIRS)' ;; \ + esac; \ + for subdir in $$list; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + dot_seen=yes; \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || eval $$failcom; \ + done; \ + if test "$$dot_seen" = "no"; then \ + $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ + fi; test -z "$$fail" + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-recursive +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ + include_option=--etags-include; \ + empty_fix=.; \ + else \ + include_option=--include; \ + empty_fix=; \ + fi; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + test ! -f $$subdir/TAGS || \ + set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ + fi; \ + done; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-recursive + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-recursive + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done + @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + $(am__make_dryrun) \ + || test -d "$(distdir)/$$subdir" \ + || $(MKDIR_P) "$(distdir)/$$subdir" \ + || exit 1; \ + dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ + $(am__relativize); \ + new_distdir=$$reldir; \ + dir1=$$subdir; dir2="$(top_distdir)"; \ + $(am__relativize); \ + new_top_distdir=$$reldir; \ + echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ + echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ + ($(am__cd) $$subdir && \ + $(MAKE) $(AM_MAKEFLAGS) \ + top_distdir="$$new_top_distdir" \ + distdir="$$new_distdir" \ + am__remove_distdir=: \ + am__skip_length_check=: \ + am__skip_mode_fix=: \ + distdir) \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-recursive +all-am: Makefile $(LTLIBRARIES) +installdirs: installdirs-recursive +installdirs-am: + for dir in "$(DESTDIR)$(libdir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-recursive +install-exec: install-exec-recursive +install-data: install-data-recursive +uninstall: uninstall-recursive + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-recursive +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-recursive + +clean-am: clean-generic clean-libLTLIBRARIES clean-libtool \ + mostlyclean-am + +distclean: distclean-recursive + -rm -f ./$(DEPDIR)/libstarpu_openmp_llvm_@STARPU_EFFECTIVE_VERSION@_la-openmp_runtime_support_llvm.Plo + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-recursive + +dvi-am: + +html: html-recursive + +html-am: + +info: info-recursive + +info-am: + +install-data-am: + +install-dvi: install-dvi-recursive + +install-dvi-am: + +install-exec-am: install-libLTLIBRARIES + +install-html: install-html-recursive + +install-html-am: + +install-info: install-info-recursive + +install-info-am: + +install-man: + +install-pdf: install-pdf-recursive + +install-pdf-am: + +install-ps: install-ps-recursive + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-recursive + -rm -f ./$(DEPDIR)/libstarpu_openmp_llvm_@STARPU_EFFECTIVE_VERSION@_la-openmp_runtime_support_llvm.Plo + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-recursive + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-recursive + +pdf-am: + +ps: ps-recursive + +ps-am: + +uninstall-am: uninstall-libLTLIBRARIES + +.MAKE: $(am__recursive_targets) install-am install-strip + +.PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am \ + am--depfiles check check-am clean clean-generic \ + clean-libLTLIBRARIES clean-libtool cscopelist-am ctags \ + ctags-am distclean distclean-compile distclean-generic \ + distclean-libtool distclean-tags distdir dvi dvi-am html \ + html-am info info-am install install-am install-data \ + install-data-am install-dvi install-dvi-am install-exec \ + install-exec-am install-html install-html-am install-info \ + install-info-am install-libLTLIBRARIES install-man install-pdf \ + install-pdf-am install-ps install-ps-am install-strip \ + installcheck installcheck-am installdirs installdirs-am \ + maintainer-clean maintainer-clean-generic mostlyclean \ + mostlyclean-compile mostlyclean-generic mostlyclean-libtool \ + pdf pdf-am ps ps-am tags tags-am uninstall uninstall-am \ + uninstall-libLTLIBRARIES + +.PRECIOUS: Makefile + +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@.cu.o: +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ @$(MKDIR_P) `dirname $@` +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ $(V_mynvcc)grep 'extern *"C" *void *' $< | sed -ne 's/extern *"C" *void *\([a-zA-Z0-9_]*\) *(.*/void \1(void) {}/p' | $(CC) -x c - -o $@ -c + +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.cubin: +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) -cubin $< -o $@ $(NVCCFLAGS) + +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.o: +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) $< -c -o $@ $(NVCCFLAGS) +@STARPU_USE_HIP_TRUE@.hip.o: +@STARPU_USE_HIP_TRUE@ $(V_hipcc) $(HIPCC) $< -c -o $@ $(HIPCCFLAGS) + +recheck: + -cat /dev/null + +showcheckfailed: + @-cat /dev/null + +showfailed: + @-cat /dev/null + +showcheck: + -cat /dev/null + +showsuite: + -cat /dev/null + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/starpu_openmp_llvm/src/openmp_runtime_support_llvm.c b/starpu_openmp_llvm/src/openmp_runtime_support_llvm.c new file mode 100644 index 0000000..3d41e98 --- /dev/null +++ b/starpu_openmp_llvm/src/openmp_runtime_support_llvm.c @@ -0,0 +1,1062 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2018-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#ifdef STARPU_OPENMP_LLVM +#include +#include +#include +#include +#include +#include +#include + +typedef struct ident ident_t; +typedef int32_t kmp_int32; +typedef void * kmp_intptr_t; + +typedef void(* kmpc_micro) (kmp_int32 *global_tid, kmp_int32 *bound_tid,...); + +typedef kmp_int32 (*kmp_routine_entry_t)(kmp_int32 gtid, void *kmp_task); + +typedef struct kmp_depend_info +{ + kmp_intptr_t base_addr; + size_t len; + struct + { + bool in : 1; + bool out : 1; + } flags; + size_t elem_size; +} kmp_depend_info_t; + +typedef union kmp_cmplrdata +{ + kmp_int32 priority; /**< priority specified by user for the task */ + kmp_routine_entry_t destructors; /* pointer to function to invoke deconstructors of firstprivate C++ objects */ + /* future data */ +} kmp_cmplrdata_t; + +// the LLVM support was first implemented with a compiler supporting variants, however as most compilers do not enable variants, we disable the feature +// by default, variants are not enabled, it is not possible to enable them with configure.ac as we do not want users to enable it by mistake +#ifdef _STARPU_OPENMP_LLVM_VARIANT +typedef void *(*kmp_variant_entry_t)(void *, ...); + +typedef enum kmp_variant_kind +{ + VARIANT_CPU, + VARIANT_OPENCL, + VARIANT_CUDA +} kmp_variant_kind_t; + +typedef struct kmp_variant +{ + kmp_variant_entry_t fn; + kmp_variant_kind_t kind; +} kmp_variant_t; +#endif + +typedef struct kmp_task +{ /* GEH: Shouldn't this be aligned somehow? */ + void *shareds; /**< pointer to block of pointers to shared vars */ + kmp_routine_entry_t routine; /**< pointer to routine to call for executing task */ + kmp_int32 part_id; /**< part id for the task */ + kmp_cmplrdata_t data1; /* Two known optional additions: destructors and priority */ + kmp_cmplrdata_t data2; /* Process destructors first, priority second */ + /* future data */ +#ifdef _STARPU_OPENMP_LLVM_VARIANT + kmp_variant_t *variants; + kmp_int32 nvariants; +#endif +} kmp_task_t; + +struct s_microtask_wrap +{ + int argc; + void **arg_ptrs; + kmpc_micro microtask; +}; + +enum sched_type /* : kmp_int32 */ +{ + kmp_sch_lower = 32, + kmp_sch_static_chunked = 33, + kmp_sch_static = 34, + kmp_sch_dynamic_chunked = 35, + kmp_sch_guided_chunked = 36, + kmp_sch_runtime = 37, + kmp_sch_auto = 38, + kmp_sch_trapezoidal = 39, + + kmp_sch_static_greedy = 40, + kmp_sch_static_balanced = 41, + + kmp_sch_guided_iterative_chunked = 42, + kmp_sch_guided_analytical_chunked = 43, + + kmp_sch_static_steal = 44, + + kmp_sch_static_balanced_chunked = 45, + kmp_sch_guided_simd = 46, + kmp_sch_runtime_simd = 47, + + kmp_sch_upper, + + kmp_ord_lower = 64, + kmp_ord_static_chunked = 65, + kmp_ord_static = 66, + kmp_ord_dynamic_chunked = 67, + kmp_ord_guided_chunked = 68, + kmp_ord_runtime = 69, + kmp_ord_auto = 70, + kmp_ord_trapezoidal = 71, + kmp_ord_upper, + + kmp_distribute_static_chunked = 91, + kmp_distribute_static = 92, + + kmp_nm_lower = 160, + + kmp_nm_static_chunked = (kmp_sch_static_chunked - kmp_sch_lower + kmp_nm_lower), + kmp_nm_static = 162, + kmp_nm_dynamic_chunked = 163, + kmp_nm_guided_chunked = 164, + kmp_nm_runtime = 165, + kmp_nm_auto = 166, + kmp_nm_trapezoidal = 167, + + kmp_nm_static_greedy = 168, + kmp_nm_static_balanced = 169, + kmp_nm_guided_iterative_chunked = 170, + kmp_nm_guided_analytical_chunked = 171, + kmp_nm_static_steal = 172, + + kmp_nm_ord_static_chunked = 193, + kmp_nm_ord_static = 194, + kmp_nm_ord_dynamic_chunked = 195, + kmp_nm_ord_guided_chunked = 196, + kmp_nm_ord_runtime = 197, + kmp_nm_ord_auto = 198, + kmp_nm_ord_trapezoidal = 199, + kmp_nm_upper, + + kmp_sch_modifier_monotonic = (1 << 29), + kmp_sch_modifier_nonmonotonic = (1 << 30), + + kmp_sch_default = kmp_sch_static +}; + +typedef kmp_int32 kmp_critical_name[8]; + +kmp_int32 __kmpc_global_thread_num(ident_t *loc); +kmp_int32 __kmpc_global_num_threads(ident_t *loc); +kmp_int32 __kmpc_bound_thread_num(ident_t *loc); +kmp_int32 __kmpc_bound_num_threads(ident_t *loc); + +static void parallel_call(void *buffers[], void *args) +{ + (void) buffers; + int gtid=__kmpc_global_thread_num(NULL); + int ltid=__kmpc_bound_thread_num(NULL); + void **arg_ptrs = args; + kmpc_micro microtask = *arg_ptrs++; + kmp_int32 argc = (intptr_t)*arg_ptrs++; + switch (argc) + { + case 0: + microtask(>id, <id); + break; + + case 1: + microtask(>id, <id, arg_ptrs[0]); + break; + + case 2: + microtask(>id, <id, arg_ptrs[0], arg_ptrs[1]); + break; + + case 3: + microtask(>id, <id, arg_ptrs[0], arg_ptrs[1], arg_ptrs[2]); + break; + + case 4: + microtask(>id, <id, arg_ptrs[0], arg_ptrs[1], arg_ptrs[2], arg_ptrs[3]); + break; + + case 5: + microtask(>id, <id, arg_ptrs[0], arg_ptrs[1], arg_ptrs[2], arg_ptrs[3], arg_ptrs[4]); + break; + + case 6: + microtask(>id, <id, arg_ptrs[0], arg_ptrs[1], arg_ptrs[2], arg_ptrs[3], arg_ptrs[4], arg_ptrs[5]); + break; + + case 7: + microtask(>id, <id, arg_ptrs[0], arg_ptrs[1], arg_ptrs[2], arg_ptrs[3], arg_ptrs[4], arg_ptrs[5], arg_ptrs[6]); + break; + + case 8: + microtask(>id, <id, arg_ptrs[0], arg_ptrs[1], arg_ptrs[2], arg_ptrs[3], arg_ptrs[4], arg_ptrs[5], arg_ptrs[6], arg_ptrs[7]); + break; + + case 9: + microtask(>id, <id, arg_ptrs[0], arg_ptrs[1], arg_ptrs[2], arg_ptrs[3], arg_ptrs[4], arg_ptrs[5], arg_ptrs[6], arg_ptrs[7], arg_ptrs[8]); + break; + + case 10: + microtask(>id, <id, arg_ptrs[0], arg_ptrs[1], arg_ptrs[2], arg_ptrs[3], arg_ptrs[4], arg_ptrs[5], arg_ptrs[6], arg_ptrs[7], arg_ptrs[8], arg_ptrs[9]); + break; + + case 11: + microtask(>id, <id, arg_ptrs[0], arg_ptrs[1], arg_ptrs[2], arg_ptrs[3], arg_ptrs[4], arg_ptrs[5], arg_ptrs[6], arg_ptrs[7], arg_ptrs[8], arg_ptrs[9], arg_ptrs[10]); + break; + + case 12: + microtask(>id, <id, arg_ptrs[0], arg_ptrs[1], arg_ptrs[2], arg_ptrs[3], arg_ptrs[4], arg_ptrs[5], arg_ptrs[6], arg_ptrs[7], arg_ptrs[8], arg_ptrs[9], arg_ptrs[10], arg_ptrs[11]); + break; + + case 13: + microtask(>id, <id, arg_ptrs[0], arg_ptrs[1], arg_ptrs[2], arg_ptrs[3], arg_ptrs[4], arg_ptrs[5], arg_ptrs[6], arg_ptrs[7], arg_ptrs[8], arg_ptrs[9], arg_ptrs[10], arg_ptrs[11], arg_ptrs[12]); + break; + + case 14: + microtask(>id, <id, arg_ptrs[0], arg_ptrs[1], arg_ptrs[2], arg_ptrs[3], arg_ptrs[4], arg_ptrs[5], arg_ptrs[6], arg_ptrs[7], arg_ptrs[8], arg_ptrs[9], arg_ptrs[10], arg_ptrs[11], arg_ptrs[12], arg_ptrs[13]); + break; + + case 15: + microtask(>id, <id, arg_ptrs[0], arg_ptrs[1], arg_ptrs[2], arg_ptrs[3], arg_ptrs[4], arg_ptrs[5], arg_ptrs[6], arg_ptrs[7], arg_ptrs[8], arg_ptrs[9], arg_ptrs[10], arg_ptrs[11], arg_ptrs[12], arg_ptrs[13], arg_ptrs[14]); + break; + + default: + assert(0); + } +} + +/* Deprecated Functions */ +kmp_int32 __kmpc_ok_to_fork(ident_t *loc) +{ + (void) loc; + return !0; +} + +/* Startup and Shutdown */ +void __kmpc_begin(ident_t *loc, kmp_int32 flags) +{ + (void) loc; + (void) flags; + /* TODO: add auto-init in other lib funcs if kmpc_begin is not called */ + starpu_omp_init(); +} + +void __kmpc_end(ident_t *loc) +{ + (void) loc; + /* TODO: add support for KMP_IGNORE_MPPEND */ + starpu_omp_shutdown(); +} + +/* Parallel (fork/join) */ +void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads) +{ + (void) loc; + (void) global_tid; + (void) num_threads; + abort(); +} + +void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) +{ + (void) loc; + va_list vargs; + va_start(vargs, microtask); + void *arg_ptrs[2+argc]; + arg_ptrs[0] = microtask; + arg_ptrs[1] = (void*)(intptr_t)argc; + + int i; + for (i=0; icl.model = &starpu_perfmodel_nop; + attr->cl.flags = STARPU_CODELET_SIMGRID_EXECUTE; +#endif + attr->cl.cpu_funcs[0] = parallel_call; + attr->cl.where = STARPU_CPU; + attr->cl_arg_size = (argc+2)*sizeof(void *); + attr->cl_arg_free = 0; + attr->cl_arg = arg_ptrs; + attr->if_clause = 1; + starpu_omp_parallel_region(attr); + free((void *)attr); + + va_end(vargs); +} + +static void task_call(void *buffers[], void *args) +{ + (void) buffers; + int gtid=__kmpc_global_thread_num(NULL); + void **arg_ptrs = args; + kmp_task_t *task = *arg_ptrs++; + /*typedef kmp_int32 (*kmp_routine_entry_t)(kmp_int32 gtid, void *kmp_task);*/ + task->routine(gtid, task); +} + +kmp_task_t *__kmpc_omp_task_alloc(ident_t *loc_ref, kmp_int32 gtid, + kmp_int32 flags, size_t sizeof_kmp_task_t, + size_t sizeof_shareds, + kmp_routine_entry_t task_entry) +{ + (void) loc_ref; + (void) gtid; + (void) flags; + // The initial content of kmp_task_t is: + // - void *shared + // - kmp_routine_entry_t routine + // - kmp_int32 part_id + // But the compiler may need more fields, hence it passes a "sizeof_kmp_task_t" that we should honor. + kmp_task_t *task; + /* FIXME: avoid double malloc by allocating shared+task_t at once */ + /* FIXME: free the things somewhere*/ + _STARPU_MALLOC(task, sizeof_kmp_task_t); + void *shared; + _STARPU_MALLOC(shared, sizeof_shareds); + task->shareds = shared; + task->routine = task_entry; + task->part_id = 0; +#ifdef _STARPU_OPENMP_LLVM_VARIANT + task->variants = 0; + task->nvariants = 0; +#endif + return task; +} + +#define GETDEP(task, i) starpu_data_handle_to_pointer(task->starpu_task->handles[i], STARPU_MAIN_RAM) +#define GET(i) (void*)STARPU_VARIABLE_GET_PTR(buffers[i]) + +static void task_call_variants(void (*fn)(void*, ...), void *buffers[], void *args) +{ + void **arg_ptrs = args; + intptr_t nargs = (intptr_t) arg_ptrs[1]; + // TODO: asm it, as we could do it nicely in a loop + switch (nargs) + { + case 0: + fn(0); + break; + case 1: + fn(GET(0)); + break; + case 2: + fn(GET(0), GET(1)); + break; + case 3: + fn(GET(0), GET(1), GET(2)); + break; + case 4: + fn(GET(0), GET(1), GET(2), GET(3)); + break; + case 5: + fn(GET(0), GET(1), GET(2), GET(3), GET(4)); + break; + case 6: + fn(GET(0), GET(1), GET(2), GET(3), GET(4), GET(5)); + break; + default: + fprintf(stderr, "Unsupported number of dependencies/arguments in task call.\n"); + abort(); + break; + } +} +#undef GETDEP + +#ifdef _STARPU_OPENMP_LLVM_VARIANT +static void task_call_cpu(void *buffers[], void *args) +{ + void **arg_ptrs = args; + task_call_variants((void (*)(void *, ...))arg_ptrs[2], buffers, args); +} + +static void task_call_cuda(void *buffers[], void *args) +{ + void **arg_ptrs = args; + task_call_variants((void (*)(void *, ...))arg_ptrs[3], buffers, args); +} +#endif + +/*TODO: wrapper void *(buffers[], nbuffer) { push push call }*/ + +kmp_task_t *__kmpc_omp_task_alloc_variants(ident_t *loc_ref, kmp_int32 gtid, + kmp_int32 flags, + size_t sizeof_kmp_task_t, + size_t sizeof_shareds, + kmp_routine_entry_t task_entry, + kmp_int32 nvariants STARPU_ATTRIBUTE_UNUSED) +{ + kmp_task_t *task = __kmpc_omp_task_alloc(loc_ref, gtid, flags, sizeof_kmp_task_t, sizeof_shareds, task_entry); +#ifdef _STARPU_OPENMP_LLVM_VARIANT + task->nvariants = nvariants; + _STARPU_MALLOC(task->variants, nvariants * sizeof(kmp_variant_t)); +#endif + return task; +} + +kmp_int32 __kmpc_omp_taskwait(ident_t *loc_ref, kmp_int32 gtid) +{ + (void) loc_ref; + (void) gtid; + starpu_omp_taskwait(); + return 0; +} + +kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid, + kmp_task_t * new_task, kmp_int32 ndeps, + kmp_depend_info_t *dep_list, + kmp_int32 ndeps_noalias, + kmp_depend_info_t *noalias_dep_list) +{ + (void) loc_ref; + (void) gtid; + + /* NOTE: for some reason, just having a static struct and passing its address + * triggered a segfault in the starpu_omp_task_region. + * */ + static int _msg=0; + if (_msg == 0) + { + _STARPU_MSG("Using the StarPU OpenMP LLVM Support\n"); + _msg = 1; + } + + struct starpu_omp_task_region_attr *attr = calloc(1, sizeof(struct starpu_omp_task_region_attr)); + + /* This is freed in starpu_omp_task_region, as attr.cl_arg_free is set to true*/ + void **arg_ptrs = calloc(4, sizeof(void*)); + arg_ptrs[0] = new_task; + arg_ptrs[1] = (void*) (intptr_t) (ndeps + ndeps_noalias); + +#ifdef _STARPU_OPENMP_LLVM_VARIANT + if (new_task->nvariants == 0) +#endif + { + attr->cl.cpu_funcs[0] = task_call; + attr->cl.where = STARPU_CPU; + } +#ifdef _STARPU_OPENMP_LLVM_VARIANT + else + { + for (int i = 0; i < new_task->nvariants; ++i) + { + switch(new_task->variants[i].kind) + { + case VARIANT_CPU: + attr->cl.where |= STARPU_CPU; + attr->cl.cpu_funcs[0] = task_call_cpu; + arg_ptrs[2] = new_task->variants[i].fn; + break; + case VARIANT_CUDA: + attr->cl.where |= STARPU_CUDA; + attr->cl.cuda_funcs[0] = task_call_cuda; + arg_ptrs[3] = new_task->variants[i].fn; + break; + case VARIANT_OPENCL: + fprintf(stderr, "variant for opencl detected but not supported: %p, ignoring.\n", new_task->variants[i].fn); + break; + } + } + } +#endif + + attr->cl_arg_size = (4)*sizeof(void *); + attr->cl_arg_free = 1; + attr->cl_arg = arg_ptrs; + attr->if_clause = 1; + attr->final_clause = 0; + attr->untied_clause = 1; + attr->mergeable_clause = 0; + attr->cl.nbuffers = ndeps + ndeps_noalias; + starpu_data_handle_t *handles = calloc(attr->cl.nbuffers, sizeof(starpu_data_handle_t)); + int current_buffer = 0; + starpu_data_handle_t current_handler = 0; + for (int i = 0; i < ndeps; i++) + { + if (dep_list[i].flags.in && dep_list[i].flags.out) + { + attr->cl.modes[current_buffer] = STARPU_RW; + } + else if (dep_list[i].flags.in) + { + attr->cl.modes[current_buffer] = STARPU_R; + } + else + { + attr->cl.modes[current_buffer] = STARPU_W; + } + current_handler = starpu_omp_data_lookup(dep_list[i].base_addr); + if (current_handler) + { + handles[current_buffer] = current_handler; + } + else + { + if (dep_list[i].len == 1) + { + starpu_variable_data_register(&handles[current_buffer], STARPU_MAIN_RAM, (uintptr_t)dep_list[i].base_addr, sizeof(kmp_intptr_t)); + starpu_omp_handle_register(handles[current_buffer]); + } + else + { + starpu_vector_data_register(&handles[current_buffer], STARPU_MAIN_RAM, (uintptr_t)dep_list[i].base_addr, dep_list[i].len, dep_list[i].elem_size); + starpu_omp_handle_register(handles[current_buffer]); + } + } + current_buffer++; + } + for (int i = 0; i < ndeps_noalias; i++) + { + if (noalias_dep_list[i].flags.in && noalias_dep_list[i].flags.out) + { + attr->cl.modes[current_buffer] = STARPU_RW; + } + else if (noalias_dep_list[i].flags.in) + { + attr->cl.modes[current_buffer] = STARPU_R; + } + else + { + attr->cl.modes[current_buffer] = STARPU_W; + } + current_handler = starpu_omp_data_lookup(noalias_dep_list[i].base_addr); + if (current_handler) + { + handles[current_buffer] = current_handler; + } + else + { + if (dep_list[i].len == 1) + { + starpu_variable_data_register(&handles[current_buffer], STARPU_MAIN_RAM, (uintptr_t)dep_list[i].base_addr, sizeof(kmp_intptr_t)); + starpu_omp_handle_register(handles[current_buffer]); + } + else + { + starpu_vector_data_register(&handles[current_buffer], STARPU_MAIN_RAM, (uintptr_t)dep_list[i].base_addr, dep_list[i].len, dep_list[i].elem_size); + starpu_omp_handle_register(handles[current_buffer]); + } + } + current_buffer++; + } + + if (current_buffer) + { + // If we have any deps + attr->handles = &handles[0]; + } + + // thoughts : create starpu_omp_task_region_attr here, fill it with kmp_taskdata + // keep an arg to the wrapper with the kmp_task_t + starpu_omp_task_region(attr); + free(attr); + return 0; +} + +kmp_int32 __kmpc_omp_task(ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *new_task) +{ + int retval = __kmpc_omp_task_with_deps(loc_ref, gtid, new_task, 0, 0, 0, 0); + return retval; +} + +void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads) +{ + (void) loc; + (void) global_tid; + (void) num_teams; + (void) num_threads; + abort(); +} + +void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) +{ + (void) loc; + (void) argc; + (void) microtask; + abort(); +} + +void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) +{ + (void) loc; + (void) global_tid; + abort(); +} + +void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) +{ + (void) loc; + (void) global_tid; + abort(); +} + +/* Thread Information */ +kmp_int32 __kmpc_global_thread_num(ident_t *loc) +{ + (void) loc; + struct starpu_omp_region *region; + region = _starpu_omp_get_region_at_level(1); + if (region == NULL) + return 0; + return _starpu_omp_get_region_thread_num(region); +} + +kmp_int32 __kmpc_global_num_threads(ident_t *loc) +{ + (void) loc; + struct starpu_omp_region *region; + region = _starpu_omp_get_region_at_level(1); + if (region == NULL) + return 1; + return region->nb_threads; +} + +kmp_int32 __kmpc_bound_thread_num(ident_t *loc) +{ + (void) loc; + return starpu_omp_get_thread_num(); +} + +kmp_int32 __kmpc_bound_num_threads(ident_t *loc) +{ + (void) loc; + return starpu_omp_get_num_threads(); +} + +kmp_int32 __kmpc_in_parallel(ident_t *loc) +{ + (void) loc; + return starpu_omp_in_parallel(); +} + +/* Work sharing */ +kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) +{ + (void) loc; + (void) global_tid; + return starpu_omp_master_inline(); +} + +void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) +{ + (void) loc; + (void) global_tid; + /* nothing */ +} + +void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid) +{ + (void) loc; + (void) global_tid; + starpu_omp_ordered_inline_begin(); +} + +void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid) +{ + (void) loc; + (void) global_tid; + starpu_omp_ordered_inline_end(); +} + +kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) +{ + (void) loc; + (void) global_tid; + return starpu_omp_single_inline(); +} + +void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) +{ + (void) loc; + (void) global_tid; + /* nothing */ +} + +void __kmpc_dispatch_init_4(ident_t *loc) +{ + (void) loc; + abort(); +} + +void __kmpc_dispatch_next_4(ident_t *loc) +{ + (void) loc; + abort(); +} + +/* Work sharing */ +void __kmpc_flush(ident_t *loc) +{ + (void) loc; + abort(); +} + +void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) +{ + (void) loc; + (void) global_tid; + starpu_omp_barrier(); +} + +kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) +{ + (void) loc; + (void) global_tid; + abort(); +} + +void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) +{ + (void) loc; + (void) global_tid; + abort(); +} + +kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) +{ + (void) loc; + (void) global_tid; + abort(); +} + +void __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck) +{ + (void) loc; + (void) global_tid; + (void) num_vars; + (void) reduce_size; + (void) reduce_data; + (void) reduce_func; + (void) lck; + abort(); +} + +void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck) +{ + (void) loc; + (void) global_tid; + (void) lck; + abort(); +} + +void __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck) +{ + (void) loc; + (void) global_tid; + (void) num_vars; + (void) reduce_size; + (void) reduce_data; + (void) reduce_func; + (void) lck; + abort(); +} + +void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck) +{ + (void) loc; + (void) global_tid; + (void) lck; + abort(); +} + +/* lib constructor/destructor */ + +__attribute__((constructor)) +static void __kmp_constructor(void) +{ + static int _msg=0; + if (_msg == 0) + { + _STARPU_MSG("Initialising the StarPU OpenMP LLVM Support\n"); + _msg = 1; + } + + int ret = starpu_omp_init(); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init"); +} + +__attribute__((destructor)) +static void kmp_destructor(void) +{ + starpu_omp_shutdown(); +} + +/* omp lib API */ + +void omp_set_num_threads(int threads) +{ + starpu_omp_set_num_threads(threads); +} + +int omp_get_num_threads() +{ + return starpu_omp_get_num_threads(); +} + +int omp_get_thread_num() +{ + return starpu_omp_get_thread_num(); +} + +int omp_get_max_threads() +{ + return starpu_omp_get_max_threads(); +} + +int omp_get_num_procs(void) +{ + return starpu_omp_get_num_procs(); +} + +int omp_in_parallel(void) +{ + return starpu_omp_in_parallel(); +} + +void omp_set_dynamic(int dynamic_threads) +{ + starpu_omp_set_dynamic(dynamic_threads); +} + +int omp_get_dynamic(void) +{ + return starpu_omp_get_dynamic(); +} + +void omp_set_nested(int nested) +{ + starpu_omp_set_nested(nested); +} + +int omp_get_nested(void) +{ + return starpu_omp_get_nested(); +} + +int omp_get_cancellation(void) +{ + return starpu_omp_get_cancellation(); +} + +void omp_set_schedule(enum omp_sched_value kind, int modifier) +{ + starpu_omp_set_schedule(kind, modifier); +} + +void omp_get_schedule(enum omp_sched_value *kind, int *modifier) +{ + starpu_omp_get_schedule((enum starpu_omp_sched_value*)kind, modifier); +} + +int omp_get_thread_limit(void) +{ + return starpu_omp_get_thread_limit(); +} + +void omp_set_max_active_levels(int max_levels) +{ + starpu_omp_set_max_active_levels(max_levels); +} + +int omp_get_max_active_levels(void) +{ + return starpu_omp_get_max_active_levels(); +} + +int omp_get_level(void) +{ + return starpu_omp_get_level(); +} + +int omp_get_ancestor_thread_num(int level) +{ + return starpu_omp_get_ancestor_thread_num(level); +} + +int omp_get_team_size(int level) +{ + return starpu_omp_get_team_size(level); +} + +int omp_get_active_level(void) +{ + return starpu_omp_get_active_level(); +} + +int omp_in_final(void) +{ + return starpu_omp_in_final(); +} + +enum omp_proc_bind_value omp_get_proc_bind(void) +{ + return starpu_omp_get_proc_bind(); +} + +int omp_get_num_places(void) +{ + return starpu_omp_get_num_places(); +} + +int omp_get_place_num_procs(int place_num) +{ + return starpu_omp_get_place_num_procs(place_num); +} + +void omp_get_place_proc_ids(int place_num, int *ids) +{ + starpu_omp_get_place_proc_ids(place_num, ids); +} + +int omp_get_place_num(void) +{ + return starpu_omp_get_place_num(); +} + +int omp_get_partition_num_places(void) +{ + return starpu_omp_get_partition_num_places(); +} + +void omp_get_partition_place_nums(int *place_nums) +{ + starpu_omp_get_partition_place_nums(place_nums); +} + +void omp_set_default_device(int device_num) +{ + starpu_omp_set_default_device(device_num); +} + +int omp_get_default_device(void) +{ + return starpu_omp_get_default_device(); +} + +int omp_get_num_devices(void) +{ + return starpu_omp_get_num_devices(); +} + +int omp_get_num_teams(void) +{ + return starpu_omp_get_num_teams(); +} + +int omp_get_team_num(void) +{ + return starpu_omp_get_team_num(); +} + +int omp_is_initial_device(void) +{ + return starpu_omp_is_initial_device(); +} + +int omp_get_initial_device(void) +{ + return starpu_omp_get_initial_device(); +} + +int omp_get_max_task_priority(void) +{ + return starpu_omp_get_max_task_priority(); +} + +void omp_init_lock(omp_lock_t *lock) +{ + starpu_omp_init_lock(lock); +} + +void omp_destroy_lock(omp_lock_t *lock) +{ + starpu_omp_destroy_lock(lock); +} + +void omp_set_lock(omp_lock_t *lock) +{ + starpu_omp_set_lock(lock); +} + +void omp_unset_lock(omp_lock_t *lock) +{ + starpu_omp_unset_lock(lock); +} + +int omp_test_lock(omp_lock_t *lock) +{ + return starpu_omp_test_lock(lock); +} + +void omp_init_nest_lock(omp_nest_lock_t *lock) +{ + starpu_omp_init_nest_lock(lock); +} + +void omp_destroy_nest_lock(omp_nest_lock_t *lock) +{ + starpu_omp_destroy_nest_lock(lock); +} + +void omp_set_nest_lock(omp_nest_lock_t *lock) +{ + starpu_omp_set_nest_lock(lock); +} + +void omp_unset_nest_lock(omp_nest_lock_t *lock) +{ + starpu_omp_unset_nest_lock(lock); +} + +int omp_test_nest_lock(omp_nest_lock_t *lock) +{ + return starpu_omp_test_nest_lock(lock); +} + +double omp_get_wtime(void) +{ + return starpu_omp_get_wtime(); +} + +double omp_get_wtick(void) +{ + return starpu_omp_get_wtick(); +} + +void *omp_get_local_cuda_stream(void) +{ +#ifdef STARPU_USE_CUDA + return starpu_cuda_get_local_stream(); +#else + return 0; +#endif +} + +#endif /* STARPU_OPENMP_LLVM */ diff --git a/starpufft/Makefile.am b/starpufft/Makefile.am new file mode 100644 index 0000000..1c565fe --- /dev/null +++ b/starpufft/Makefile.am @@ -0,0 +1,31 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +include $(top_srcdir)/make/starpu-subdirtests.mk + +SUBDIRS=src + +if STARPU_BUILD_STARPUFFT_EXAMPLES +if STARPU_BUILD_TESTS +SUBDIRS += tests +endif +endif + +versincludedir = $(includedir)/starpu/$(STARPU_EFFECTIVE_VERSION) +versinclude_HEADERS = \ + include/starpufft.h + +pkgconfigdir = $(libdir)/pkgconfig +pkgconfig_DATA = packages/libstarpufft.pc packages/starpufft-1.0.pc packages/starpufft-1.1.pc packages/starpufft-1.2.pc packages/starpufft-1.3.pc packages/starpufft-1.4.pc diff --git a/starpufft/Makefile.in b/starpufft/Makefile.in new file mode 100644 index 0000000..bf5b54f --- /dev/null +++ b/starpufft/Makefile.in @@ -0,0 +1,977 @@ +# Makefile.in generated by automake 1.16.5 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2021 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +target_triplet = @target@ +@STARPU_BUILD_STARPUFFT_EXAMPLES_TRUE@@STARPU_BUILD_TESTS_TRUE@am__append_1 = tests +subdir = starpufft +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ + $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ + $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ + $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(versinclude_HEADERS) \ + $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/src/common/config.h \ + $(top_builddir)/src/common/config-src-build.h \ + $(top_builddir)/include/starpu_config.h \ + $(top_builddir)/starpurm/include/starpurm_config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +SOURCES = +DIST_SOURCES = +RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \ + ctags-recursive dvi-recursive html-recursive info-recursive \ + install-data-recursive install-dvi-recursive \ + install-exec-recursive install-html-recursive \ + install-info-recursive install-pdf-recursive \ + install-ps-recursive install-recursive installcheck-recursive \ + installdirs-recursive pdf-recursive ps-recursive \ + tags-recursive uninstall-recursive +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__installdirs = "$(DESTDIR)$(pkgconfigdir)" \ + "$(DESTDIR)$(versincludedir)" +DATA = $(pkgconfig_DATA) +HEADERS = $(versinclude_HEADERS) +RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ + distclean-recursive maintainer-clean-recursive +am__recursive_targets = \ + $(RECURSIVE_TARGETS) \ + $(RECURSIVE_CLEAN_TARGETS) \ + $(am__extra_recursive_targets) +AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \ + distdir distdir-am +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +DIST_SUBDIRS = src tests +am__DIST_COMMON = $(srcdir)/Makefile.in \ + $(top_srcdir)/make/starpu-subdirtests.mk +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +am__relativize = \ + dir0=`pwd`; \ + sed_first='s,^\([^/]*\)/.*$$,\1,'; \ + sed_rest='s,^[^/]*/*,,'; \ + sed_last='s,^.*/\([^/]*\)$$,\1,'; \ + sed_butlast='s,/*[^/]*$$,,'; \ + while test -n "$$dir1"; do \ + first=`echo "$$dir1" | sed -e "$$sed_first"`; \ + if test "$$first" != "."; then \ + if test "$$first" = ".."; then \ + dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ + dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ + else \ + first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ + if test "$$first2" = "$$first"; then \ + dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ + else \ + dir2="../$$dir2"; \ + fi; \ + dir0="$$dir0"/"$$first"; \ + fi; \ + fi; \ + dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ + done; \ + reldir="$$dir2" +pkglibdir = @pkglibdir@ +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +APP_CFLAGS = @APP_CFLAGS@ +APP_CXXFLAGS = @APP_CXXFLAGS@ +APP_FCFLAGS = @APP_FCFLAGS@ +APP_FFLAGS = @APP_FFLAGS@ +AR = @AR@ +AS = @AS@ +ATLASDIR = @ATLASDIR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BLAS_LIB = @BLAS_LIB@ +BLAS_LIBS = @BLAS_LIBS@ +BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ +BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CC_OR_MPICC = @CC_OR_MPICC@ +CC_OR_NVCC = @CC_OR_NVCC@ +CFLAGS = @CFLAGS@ +COVERAGE = @COVERAGE@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CSCOPE = @CSCOPE@ +CTAGS = @CTAGS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DGELS_LIBS = @DGELS_LIBS@ +DLB_CFLAGS = @DLB_CFLAGS@ +DLB_LIBS = @DLB_LIBS@ +DLLTOOL = @DLLTOOL@ +DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +ECLIPSE = @ECLIPSE@ +EGREP = @EGREP@ +ETAGS = @ETAGS@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FC = @FC@ +FCFLAGS = @FCFLAGS@ +FFLAGS = @FFLAGS@ +FFTWF_CFLAGS = @FFTWF_CFLAGS@ +FFTWF_LIBS = @FFTWF_LIBS@ +FFTWL_CFLAGS = @FFTWL_CFLAGS@ +FFTWL_LIBS = @FFTWL_LIBS@ +FFTW_CFLAGS = @FFTW_CFLAGS@ +FFTW_LIBS = @FFTW_LIBS@ +FGREP = @FGREP@ +FILECMD = @FILECMD@ +FXTDIR = @FXTDIR@ +FXT_CFLAGS = @FXT_CFLAGS@ +FXT_LDFLAGS = @FXT_LDFLAGS@ +FXT_LIBS = @FXT_LIBS@ +GDB = @GDB@ +GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ +GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ +GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ +GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ +GOTODIR = @GOTODIR@ +GREP = @GREP@ +HAVE_CXX11 = @HAVE_CXX11@ +HAVE_FFTWFL = @HAVE_FFTWFL@ +HELP2MAN = @HELP2MAN@ +HIPCC = @HIPCC@ +HIPCCFLAGS = @HIPCCFLAGS@ +HIPCONFIG = @HIPCONFIG@ +HWLOC_CFLAGS = @HWLOC_CFLAGS@ +HWLOC_LIBS = @HWLOC_LIBS@ +HWLOC_REQUIRES = @HWLOC_REQUIRES@ +ICC = @ICC@ +ICC_ARGS = @ICC_ARGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JULIA = @JULIA@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ +LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ +LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ +LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ +LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ +LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ +LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ +LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ +LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ +LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ +LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ +LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ +LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ +LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ +LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ +LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ +LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ +LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ +LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ +LIBSTARPU_LINK = @LIBSTARPU_LINK@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAGMA_CFLAGS = @MAGMA_CFLAGS@ +MAGMA_LIBS = @MAGMA_LIBS@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPICC_LDFLAGS = @MPICC_LDFLAGS@ +MPICXX = @MPICXX@ +MPIEXEC = @MPIEXEC@ +MPIEXEC_ARGS = @MPIEXEC_ARGS@ +MPIFORT = @MPIFORT@ +MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ +MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ +NM = @NM@ +NMAD_CFLAGS = @NMAD_CFLAGS@ +NMAD_LIBS = @NMAD_LIBS@ +NMEDIT = @NMEDIT@ +NVCC = @NVCC@ +NVCCFLAGS = @NVCCFLAGS@ +NVCC_CC = @NVCC_CC@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ +OPENBLAS_LIBS = @OPENBLAS_LIBS@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PAPI_CFLAGS = @PAPI_CFLAGS@ +PAPI_LIBS = @PAPI_LIBS@ +PARALLEL = @PARALLEL@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PKG_CONFIG = @PKG_CONFIG@ +POTI_CFLAGS = @POTI_CFLAGS@ +POTI_LIBS = @POTI_LIBS@ +PROG_CLANG = @PROG_CLANG@ +PROG_DATE = @PROG_DATE@ +PROG_FIND = @PROG_FIND@ +PROG_STAT = @PROG_STAT@ +PYTHON = @PYTHON@ +PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ +PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ +PYTHON_VERSION = @PYTHON_VERSION@ +RANLIB = @RANLIB@ +REALBASH = @REALBASH@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ +SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ +SIMGRID_LIBS = @SIMGRID_LIBS@ +SIMGRID_MC = @SIMGRID_MC@ +SLIC_CONFIG = @SLIC_CONFIG@ +SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ +SOCL_VENDORS = @SOCL_VENDORS@ +STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ +STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ +STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ +STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ +STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ +STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ +STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ +STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ +STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ +STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ +STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ +STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ +STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ +STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ +STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ +STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ +STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ +STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ +STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ +STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ +STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ +STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ +STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ +STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ +STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ +STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ +STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ +STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ +STARPU_LIB_PATH = @STARPU_LIB_PATH@ +STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ +STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ +STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ +STARPU_MS_LIB = @STARPU_MS_LIB@ +STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ +STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ +STARPU_OPENBLAS = @STARPU_OPENBLAS@ +STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ +STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ +STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ +STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ +STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ +STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ +STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ +STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ +STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ +STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ +STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ +STARPU_SRC_DIR = @STARPU_SRC_DIR@ +STARPU_USE_CPU = @STARPU_USE_CPU@ +STARPU_USE_CUDA = @STARPU_USE_CUDA@ +STARPU_USE_FXT = @STARPU_USE_FXT@ +STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ +STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ +STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ +STRIP = @STRIP@ +VERSION = @VERSION@ +XMKMF = @XMKMF@ +X_CFLAGS = @X_CFLAGS@ +X_EXTRA_LIBS = @X_EXTRA_LIBS@ +X_LIBS = @X_LIBS@ +X_PRE_LIBS = @X_PRE_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_ct_F77 = @ac_ct_F77@ +ac_ct_FC = @ac_ct_FC@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +doxygencommand = @doxygencommand@ +dvidir = @dvidir@ +eclipsepath = @eclipsepath@ +epstopdfcommand = @epstopdfcommand@ +exec_prefix = @exec_prefix@ +gitcommand = @gitcommand@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +hwloccalccommand = @hwloccalccommand@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +juliapath = @juliapath@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +mpicc_path = @mpicc_path@ +mpicxx_path = @mpicxx_path@ +mpiexec_path = @mpiexec_path@ +mpifort_path = @mpifort_path@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +pdflatexcommand = @pdflatexcommand@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +runstatedir = @runstatedir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target = @target@ +target_alias = @target_alias@ +target_cpu = @target_cpu@ +target_os = @target_os@ +target_vendor = @target_vendor@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +SUBDIRS = src $(am__append_1) +versincludedir = $(includedir)/starpu/$(STARPU_EFFECTIVE_VERSION) +versinclude_HEADERS = \ + include/starpufft.h + +pkgconfigdir = $(libdir)/pkgconfig +pkgconfig_DATA = packages/libstarpufft.pc packages/starpufft-1.0.pc packages/starpufft-1.1.pc packages/starpufft-1.2.pc packages/starpufft-1.3.pc packages/starpufft-1.4.pc +all: all-recursive + +.SUFFIXES: +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/make/starpu-subdirtests.mk $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign starpufft/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign starpufft/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; +$(top_srcdir)/make/starpu-subdirtests.mk $(am__empty): + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs +install-pkgconfigDATA: $(pkgconfig_DATA) + @$(NORMAL_INSTALL) + @list='$(pkgconfig_DATA)'; test -n "$(pkgconfigdir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(pkgconfigdir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(pkgconfigdir)" || exit 1; \ + fi; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; \ + done | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(pkgconfigdir)'"; \ + $(INSTALL_DATA) $$files "$(DESTDIR)$(pkgconfigdir)" || exit $$?; \ + done + +uninstall-pkgconfigDATA: + @$(NORMAL_UNINSTALL) + @list='$(pkgconfig_DATA)'; test -n "$(pkgconfigdir)" || list=; \ + files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ + dir='$(DESTDIR)$(pkgconfigdir)'; $(am__uninstall_files_from_dir) +install-versincludeHEADERS: $(versinclude_HEADERS) + @$(NORMAL_INSTALL) + @list='$(versinclude_HEADERS)'; test -n "$(versincludedir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(versincludedir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(versincludedir)" || exit 1; \ + fi; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; \ + done | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_HEADER) $$files '$(DESTDIR)$(versincludedir)'"; \ + $(INSTALL_HEADER) $$files "$(DESTDIR)$(versincludedir)" || exit $$?; \ + done + +uninstall-versincludeHEADERS: + @$(NORMAL_UNINSTALL) + @list='$(versinclude_HEADERS)'; test -n "$(versincludedir)" || list=; \ + files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ + dir='$(DESTDIR)$(versincludedir)'; $(am__uninstall_files_from_dir) + +# This directory's subdirectories are mostly independent; you can cd +# into them and run 'make' without going through this Makefile. +# To change the values of 'make' variables: instead of editing Makefiles, +# (1) if the variable is set in 'config.status', edit 'config.status' +# (which will cause the Makefiles to be regenerated when you run 'make'); +# (2) otherwise, pass the desired values on the 'make' command line. +$(am__recursive_targets): + @fail=; \ + if $(am__make_keepgoing); then \ + failcom='fail=yes'; \ + else \ + failcom='exit 1'; \ + fi; \ + dot_seen=no; \ + target=`echo $@ | sed s/-recursive//`; \ + case "$@" in \ + distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ + *) list='$(SUBDIRS)' ;; \ + esac; \ + for subdir in $$list; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + dot_seen=yes; \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || eval $$failcom; \ + done; \ + if test "$$dot_seen" = "no"; then \ + $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ + fi; test -z "$$fail" + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-recursive +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ + include_option=--etags-include; \ + empty_fix=.; \ + else \ + include_option=--include; \ + empty_fix=; \ + fi; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + test ! -f $$subdir/TAGS || \ + set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ + fi; \ + done; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-recursive + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-recursive + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done + @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + $(am__make_dryrun) \ + || test -d "$(distdir)/$$subdir" \ + || $(MKDIR_P) "$(distdir)/$$subdir" \ + || exit 1; \ + dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ + $(am__relativize); \ + new_distdir=$$reldir; \ + dir1=$$subdir; dir2="$(top_distdir)"; \ + $(am__relativize); \ + new_top_distdir=$$reldir; \ + echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ + echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ + ($(am__cd) $$subdir && \ + $(MAKE) $(AM_MAKEFLAGS) \ + top_distdir="$$new_top_distdir" \ + distdir="$$new_distdir" \ + am__remove_distdir=: \ + am__skip_length_check=: \ + am__skip_mode_fix=: \ + distdir) \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-recursive +all-am: Makefile $(DATA) $(HEADERS) +installdirs: installdirs-recursive +installdirs-am: + for dir in "$(DESTDIR)$(pkgconfigdir)" "$(DESTDIR)$(versincludedir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-recursive +install-exec: install-exec-recursive +install-data: install-data-recursive +uninstall: uninstall-recursive + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-recursive +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-recursive + +clean-am: clean-generic clean-libtool mostlyclean-am + +distclean: distclean-recursive + -rm -f Makefile +distclean-am: clean-am distclean-generic distclean-tags + +dvi: dvi-recursive + +dvi-am: + +html: html-recursive + +html-am: + +info: info-recursive + +info-am: + +install-data-am: install-pkgconfigDATA install-versincludeHEADERS + +install-dvi: install-dvi-recursive + +install-dvi-am: + +install-exec-am: + +install-html: install-html-recursive + +install-html-am: + +install-info: install-info-recursive + +install-info-am: + +install-man: + +install-pdf: install-pdf-recursive + +install-pdf-am: + +install-ps: install-ps-recursive + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-recursive + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-recursive + +mostlyclean-am: mostlyclean-generic mostlyclean-libtool + +pdf: pdf-recursive + +pdf-am: + +ps: ps-recursive + +ps-am: + +uninstall-am: uninstall-pkgconfigDATA uninstall-versincludeHEADERS + +.MAKE: $(am__recursive_targets) install-am install-strip + +.PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am check \ + check-am clean clean-generic clean-libtool cscopelist-am ctags \ + ctags-am distclean distclean-generic distclean-libtool \ + distclean-tags distdir dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-html \ + install-html-am install-info install-info-am install-man \ + install-pdf install-pdf-am install-pkgconfigDATA install-ps \ + install-ps-am install-strip install-versincludeHEADERS \ + installcheck installcheck-am installdirs installdirs-am \ + maintainer-clean maintainer-clean-generic mostlyclean \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags tags-am uninstall uninstall-am uninstall-pkgconfigDATA \ + uninstall-versincludeHEADERS + +.PRECIOUS: Makefile + + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +recheck: + RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i recheck || RET=1 ; \ + done ; \ + exit $$RET + +showcheckfailed: + @RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i showcheckfailed || RET=1 ; \ + done ; \ + exit $$RET + +showfailed: + @RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -s -C $$i showfailed || RET=1 ; \ + done ; \ + exit $$RET + +showcheck: + RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i showcheck || RET=1 ; \ + done ; \ + exit $$RET + +showsuite: + RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i showsuite || RET=1 ; \ + done ; \ + exit $$RET + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/starpufft/include/starpufft.h b/starpufft/include/starpufft.h new file mode 100644 index 0000000..8b9427b --- /dev/null +++ b/starpufft/include/starpufft.h @@ -0,0 +1,71 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +// The documentation for this file is in doc/doxygen/chapters/api/fft_support.doxy + +#ifndef __STARPU_FFT_H__ +#define __STARPU_FFT_H__ + +#include +#include +#include +#ifdef STARPU_USE_CUDA +#include +#define STARPU_CUFFT_REPORT_ERROR(status) STARPUFFT(report_error) (__starpu_func__, __FILE__, __LINE__, status) +#endif /* !STARPU_USE_CUDA */ + +#define STARPUFFT_FORWARD -1 +#define STARPUFFT_INVERSE 1 + +#define __STARPUFFT(name) starpufft_##name +#define __STARPUFFTF(name) starpufftf_##name +#define __STARPUFFTL(name) starpufftl_##name + +#define __STARPUFFT_INTERFACE(starpufft, real) \ + typedef real _Complex starpufft(complex); \ + \ + typedef struct starpufft(plan) * starpufft(plan); \ + \ + starpufft(plan) starpufft(plan_dft_1d)(int n, int sign, unsigned flags); \ + starpufft(plan) starpufft(plan_dft_2d)(int n, int m, int sign, unsigned flags); \ + starpufft(plan) starpufft(plan_dft_3d)(int n, int m, int p, int sign, unsigned flags); \ + starpufft(plan) starpufft(plan_dft_r2c_1d)(int n, unsigned flags); \ + starpufft(plan) starpufft(plan_dft_c2r_1d)(int n, unsigned flags); \ + \ + void *starpufft(malloc)(size_t n); \ + void starpufft(free)(void *p, size_t dim); \ + \ + int starpufft(execute)(starpufft(plan) p, void *in, void *out); \ + struct starpu_task *starpufft(start)(starpufft(plan) p, void *in, void *out); \ + \ + int starpufft(execute_handle)(starpufft(plan) p, starpu_data_handle_t in, starpu_data_handle_t out); \ + struct starpu_task *starpufft(start_handle)(starpufft(plan) p, starpu_data_handle_t in, starpu_data_handle_t out); \ + \ + void starpufft(cleanup)(starpufft(plan) p); \ + void starpufft(destroy_plan)(starpufft(plan) p); \ + \ + void starpufft(startstats)(void); \ + void starpufft(stopstats)(void); \ + void starpufft(showstats)(FILE * out); + +__STARPUFFT_INTERFACE(__STARPUFFT, double) +__STARPUFFT_INTERFACE(__STARPUFFTF, float) +__STARPUFFT_INTERFACE(__STARPUFFTL, long double) + +/* Internal use */ +extern int starpufft_last_plan_number; + +#endif // __STARPU_FFT_H__ diff --git a/starpufft/packages/libstarpufft.pc.in b/starpufft/packages/libstarpufft.pc.in new file mode 100644 index 0000000..cf24fdf --- /dev/null +++ b/starpufft/packages/libstarpufft.pc.in @@ -0,0 +1,26 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +prefix=@prefix@ +exec_prefix=@exec_prefix@ +libdir=@libdir@ +includedir=@includedir@ + +Name: starpufft +Description: offers support for heterogeneous multicore architecture +Version: @PACKAGE_VERSION@ +Cflags: -I${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ @STARPU_CUDA_CPPFLAGS@ -DSTARPU_USE_DEPRECATED_API +Libs: -L${libdir} -lstarpufft-@STARPU_EFFECTIVE_VERSION@ +Libs.private: @LDFLAGS@ @LIBS@ @STARPU_CUFFT_LDFLAGS@ @FFTW_LIBS@ @FFTWF_LIBS@ diff --git a/starpufft/packages/starpufft-1.0.pc.in b/starpufft/packages/starpufft-1.0.pc.in new file mode 100644 index 0000000..2fd26a6 --- /dev/null +++ b/starpufft/packages/starpufft-1.0.pc.in @@ -0,0 +1,26 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +prefix=@prefix@ +exec_prefix=@exec_prefix@ +libdir=@libdir@ +includedir=@includedir@ + +Name: starpufft +Description: offers support for heterogeneous multicore architecture +Version: @PACKAGE_VERSION@ +Cflags: -I${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ @STARPU_CUDA_CPPFLAGS@ +Libs: -L${libdir} -lstarpufft-@STARPU_EFFECTIVE_VERSION@ +Libs.private: @LDFLAGS@ @LIBS@ @STARPU_CUFFT_LDFLAGS@ @FFTW_LIBS@ @FFTWF_LIBS@ diff --git a/starpufft/packages/starpufft-1.1.pc.in b/starpufft/packages/starpufft-1.1.pc.in new file mode 100644 index 0000000..2fd26a6 --- /dev/null +++ b/starpufft/packages/starpufft-1.1.pc.in @@ -0,0 +1,26 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +prefix=@prefix@ +exec_prefix=@exec_prefix@ +libdir=@libdir@ +includedir=@includedir@ + +Name: starpufft +Description: offers support for heterogeneous multicore architecture +Version: @PACKAGE_VERSION@ +Cflags: -I${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ @STARPU_CUDA_CPPFLAGS@ +Libs: -L${libdir} -lstarpufft-@STARPU_EFFECTIVE_VERSION@ +Libs.private: @LDFLAGS@ @LIBS@ @STARPU_CUFFT_LDFLAGS@ @FFTW_LIBS@ @FFTWF_LIBS@ diff --git a/starpufft/packages/starpufft-1.2.pc.in b/starpufft/packages/starpufft-1.2.pc.in new file mode 100644 index 0000000..2fd26a6 --- /dev/null +++ b/starpufft/packages/starpufft-1.2.pc.in @@ -0,0 +1,26 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +prefix=@prefix@ +exec_prefix=@exec_prefix@ +libdir=@libdir@ +includedir=@includedir@ + +Name: starpufft +Description: offers support for heterogeneous multicore architecture +Version: @PACKAGE_VERSION@ +Cflags: -I${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ @STARPU_CUDA_CPPFLAGS@ +Libs: -L${libdir} -lstarpufft-@STARPU_EFFECTIVE_VERSION@ +Libs.private: @LDFLAGS@ @LIBS@ @STARPU_CUFFT_LDFLAGS@ @FFTW_LIBS@ @FFTWF_LIBS@ diff --git a/starpufft/packages/starpufft-1.3.pc.in b/starpufft/packages/starpufft-1.3.pc.in new file mode 100644 index 0000000..2fd26a6 --- /dev/null +++ b/starpufft/packages/starpufft-1.3.pc.in @@ -0,0 +1,26 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +prefix=@prefix@ +exec_prefix=@exec_prefix@ +libdir=@libdir@ +includedir=@includedir@ + +Name: starpufft +Description: offers support for heterogeneous multicore architecture +Version: @PACKAGE_VERSION@ +Cflags: -I${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ @STARPU_CUDA_CPPFLAGS@ +Libs: -L${libdir} -lstarpufft-@STARPU_EFFECTIVE_VERSION@ +Libs.private: @LDFLAGS@ @LIBS@ @STARPU_CUFFT_LDFLAGS@ @FFTW_LIBS@ @FFTWF_LIBS@ diff --git a/starpufft/packages/starpufft-1.4.pc.in b/starpufft/packages/starpufft-1.4.pc.in new file mode 100644 index 0000000..c007571 --- /dev/null +++ b/starpufft/packages/starpufft-1.4.pc.in @@ -0,0 +1,26 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +prefix=@prefix@ +exec_prefix=@exec_prefix@ +libdir=@libdir@ +includedir=@includedir@ + +Name: starpufft +Description: offers support for heterogeneous multicore architecture +Version: @PACKAGE_VERSION@ +Cflags: -I${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ @STARPU_CUDA_CPPFLAGS@ +Libs: -L${libdir} -lstarpufft-@STARPU_EFFECTIVE_VERSION@ +Libs.private: @LDFLAGS@ @LIBS@ @STARPU_CUFFT_LDFLAGS@ @FFTW_LIBS@ @FFTWF_LIBS@ diff --git a/starpufft/src/Makefile.am b/starpufft/src/Makefile.am new file mode 100644 index 0000000..2d94e45 --- /dev/null +++ b/starpufft/src/Makefile.am @@ -0,0 +1,54 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +include $(top_srcdir)/make/starpu-notests.mk + +AM_CFLAGS += $(FFTWF_CFLAGS) +AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_srcdir)/starpufft/include/ -I$(top_builddir)/include -I$(top_builddir)/src -I$(top_srcdir)/src/ $(STARPU_H_CPPFLAGS) +LIBS += $(top_builddir)/src/@LIBSTARPU_LINK@ $(STARPU_EXPORTED_LIBS) +LIBS += $(FFTW_LIBS) $(FFTWF_LIBS) +LIBS += $(STARPU_CUDA_LDFLAGS) $(STARPU_CUFFT_LDFLAGS) + +lib_LTLIBRARIES = libstarpufft-@STARPU_EFFECTIVE_VERSION@.la + +EXTRA_DIST = \ + starpufft-float.h \ + starpufft-double.h \ + cudax_kernels.h \ + starpufftx.c \ + starpufftx1d.c \ + starpufftx2d.c \ + starpufftx3d.c \ + cuda_kernels.cu \ + cudaf_kernels.cu \ + cudax_kernels.cu + +libstarpufft_@STARPU_EFFECTIVE_VERSION@_la_SOURCES = starpufft.c starpufftf.c starpufft_common.c +libstarpufft_@STARPU_EFFECTIVE_VERSION@_la_LDFLAGS = $(ldflags) -no-undefined \ + -version-info $(LIBSTARPUFFT_INTERFACE_CURRENT):$(LIBSTARPUFFT_INTERFACE_REVISION):$(LIBSTARPUFFT_INTERFACE_AGE) + +libstarpufft_@STARPU_EFFECTIVE_VERSION@_la_LIBADD = +if STARPU_USE_CUDA +NVCCFLAGS += -Xcompiler -fPIC -Xlinker -fPIC + +libstarpufft_@STARPU_EFFECTIVE_VERSION@_la_LIBADD += cudaf_kernels.o + +if STARPU_HAVE_CUFFTDOUBLECOMPLEX +libstarpufft_@STARPU_EFFECTIVE_VERSION@_la_LIBADD += cuda_kernels.o +endif + +libstarpufft_@STARPU_EFFECTIVE_VERSION@_la_LIBS = $(LIBS) $(STARPU_CUDA_LDFLAGS) +endif diff --git a/starpufft/src/Makefile.in b/starpufft/src/Makefile.in new file mode 100644 index 0000000..4cf22ca --- /dev/null +++ b/starpufft/src/Makefile.in @@ -0,0 +1,941 @@ +# Makefile.in generated by automake 1.16.5 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2021 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +target_triplet = @target@ +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@am__append_1 = --compiler-options -fno-strict-aliasing -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ $(STARPU_NVCC_H_CPPFLAGS) +@STARPU_USE_HIP_TRUE@am__append_2 = -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ +@STARPU_USE_CUDA_TRUE@am__append_3 = -Xcompiler -fPIC -Xlinker -fPIC +@STARPU_USE_CUDA_TRUE@am__append_4 = cudaf_kernels.o +@STARPU_HAVE_CUFFTDOUBLECOMPLEX_TRUE@@STARPU_USE_CUDA_TRUE@am__append_5 = cuda_kernels.o +subdir = starpufft/src +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ + $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ + $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ + $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/src/common/config.h \ + $(top_builddir)/src/common/config-src-build.h \ + $(top_builddir)/include/starpu_config.h \ + $(top_builddir)/starpurm/include/starpurm_config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__installdirs = "$(DESTDIR)$(libdir)" +LTLIBRARIES = $(lib_LTLIBRARIES) +libstarpufft_@STARPU_EFFECTIVE_VERSION@_la_DEPENDENCIES = \ + $(am__append_4) $(am__append_5) +am_libstarpufft_@STARPU_EFFECTIVE_VERSION@_la_OBJECTS = starpufft.lo \ + starpufftf.lo starpufft_common.lo +libstarpufft_@STARPU_EFFECTIVE_VERSION@_la_OBJECTS = \ + $(am_libstarpufft_@STARPU_EFFECTIVE_VERSION@_la_OBJECTS) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +libstarpufft_@STARPU_EFFECTIVE_VERSION@_la_LINK = $(LIBTOOL) \ + $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ + --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(libstarpufft_@STARPU_EFFECTIVE_VERSION@_la_LDFLAGS) \ + $(LDFLAGS) -o $@ +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)/src/common -I$(top_builddir)/include -I$(top_builddir)/starpurm/include +depcomp = $(SHELL) $(top_srcdir)/build-aux/depcomp +am__maybe_remake_depfiles = depfiles +am__depfiles_remade = ./$(DEPDIR)/starpufft.Plo \ + ./$(DEPDIR)/starpufft_common.Plo ./$(DEPDIR)/starpufftf.Plo +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(libstarpufft_@STARPU_EFFECTIVE_VERSION@_la_SOURCES) +DIST_SOURCES = $(libstarpufft_@STARPU_EFFECTIVE_VERSION@_la_SOURCES) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +am__DIST_COMMON = $(srcdir)/Makefile.in \ + $(top_srcdir)/build-aux/depcomp \ + $(top_srcdir)/make/starpu-notests.mk \ + $(top_srcdir)/make/starpu.mk +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +pkglibdir = @pkglibdir@ +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +APP_CFLAGS = @APP_CFLAGS@ +APP_CXXFLAGS = @APP_CXXFLAGS@ +APP_FCFLAGS = @APP_FCFLAGS@ +APP_FFLAGS = @APP_FFLAGS@ +AR = @AR@ +AS = @AS@ +ATLASDIR = @ATLASDIR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BLAS_LIB = @BLAS_LIB@ +BLAS_LIBS = @BLAS_LIBS@ +BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ +BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CC_OR_MPICC = @CC_OR_MPICC@ +CC_OR_NVCC = @CC_OR_NVCC@ +CFLAGS = @CFLAGS@ +COVERAGE = @COVERAGE@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CSCOPE = @CSCOPE@ +CTAGS = @CTAGS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DGELS_LIBS = @DGELS_LIBS@ +DLB_CFLAGS = @DLB_CFLAGS@ +DLB_LIBS = @DLB_LIBS@ +DLLTOOL = @DLLTOOL@ +DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +ECLIPSE = @ECLIPSE@ +EGREP = @EGREP@ +ETAGS = @ETAGS@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FC = @FC@ +FCFLAGS = @FCFLAGS@ +FFLAGS = @FFLAGS@ +FFTWF_CFLAGS = @FFTWF_CFLAGS@ +FFTWF_LIBS = @FFTWF_LIBS@ +FFTWL_CFLAGS = @FFTWL_CFLAGS@ +FFTWL_LIBS = @FFTWL_LIBS@ +FFTW_CFLAGS = @FFTW_CFLAGS@ +FFTW_LIBS = @FFTW_LIBS@ +FGREP = @FGREP@ +FILECMD = @FILECMD@ +FXTDIR = @FXTDIR@ +FXT_CFLAGS = @FXT_CFLAGS@ +FXT_LDFLAGS = @FXT_LDFLAGS@ +FXT_LIBS = @FXT_LIBS@ +GDB = @GDB@ +GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ +GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ +GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ +GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ +GOTODIR = @GOTODIR@ +GREP = @GREP@ +HAVE_CXX11 = @HAVE_CXX11@ +HAVE_FFTWFL = @HAVE_FFTWFL@ +HELP2MAN = @HELP2MAN@ +HIPCC = @HIPCC@ +HIPCCFLAGS = @HIPCCFLAGS@ $(am__append_2) +HIPCONFIG = @HIPCONFIG@ +HWLOC_CFLAGS = @HWLOC_CFLAGS@ +HWLOC_LIBS = @HWLOC_LIBS@ +HWLOC_REQUIRES = @HWLOC_REQUIRES@ +ICC = @ICC@ +ICC_ARGS = @ICC_ARGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JULIA = @JULIA@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ $(top_builddir)/src/@LIBSTARPU_LINK@ \ + $(STARPU_EXPORTED_LIBS) $(FFTW_LIBS) $(FFTWF_LIBS) \ + $(STARPU_CUDA_LDFLAGS) $(STARPU_CUFFT_LDFLAGS) +LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ +LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ +LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ +LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ +LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ +LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ +LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ +LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ +LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ +LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ +LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ +LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ +LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ +LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ +LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ +LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ +LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ +LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ +LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ +LIBSTARPU_LINK = @LIBSTARPU_LINK@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAGMA_CFLAGS = @MAGMA_CFLAGS@ +MAGMA_LIBS = @MAGMA_LIBS@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPICC_LDFLAGS = @MPICC_LDFLAGS@ +MPICXX = @MPICXX@ +MPIEXEC = @MPIEXEC@ +MPIEXEC_ARGS = @MPIEXEC_ARGS@ +MPIFORT = @MPIFORT@ +MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ +MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ +NM = @NM@ +NMAD_CFLAGS = @NMAD_CFLAGS@ +NMAD_LIBS = @NMAD_LIBS@ +NMEDIT = @NMEDIT@ +NVCC = @NVCC@ +NVCCFLAGS = @NVCCFLAGS@ $(am__append_1) $(am__append_3) +NVCC_CC = @NVCC_CC@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ +OPENBLAS_LIBS = @OPENBLAS_LIBS@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PAPI_CFLAGS = @PAPI_CFLAGS@ +PAPI_LIBS = @PAPI_LIBS@ +PARALLEL = @PARALLEL@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PKG_CONFIG = @PKG_CONFIG@ +POTI_CFLAGS = @POTI_CFLAGS@ +POTI_LIBS = @POTI_LIBS@ +PROG_CLANG = @PROG_CLANG@ +PROG_DATE = @PROG_DATE@ +PROG_FIND = @PROG_FIND@ +PROG_STAT = @PROG_STAT@ +PYTHON = @PYTHON@ +PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ +PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ +PYTHON_VERSION = @PYTHON_VERSION@ +RANLIB = @RANLIB@ +REALBASH = @REALBASH@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ +SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ +SIMGRID_LIBS = @SIMGRID_LIBS@ +SIMGRID_MC = @SIMGRID_MC@ +SLIC_CONFIG = @SLIC_CONFIG@ +SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ +SOCL_VENDORS = @SOCL_VENDORS@ +STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ +STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ +STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ +STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ +STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ +STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ +STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ +STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ +STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ +STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ +STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ +STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ +STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ +STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ +STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ +STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ +STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ +STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ +STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ +STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ +STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ +STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ +STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ +STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ +STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ +STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ +STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ +STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ +STARPU_LIB_PATH = @STARPU_LIB_PATH@ +STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ +STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ +STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ +STARPU_MS_LIB = @STARPU_MS_LIB@ +STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ +STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ +STARPU_OPENBLAS = @STARPU_OPENBLAS@ +STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ +STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ +STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ +STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ +STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ +STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ +STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ +STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ +STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ +STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ +STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ +STARPU_SRC_DIR = @STARPU_SRC_DIR@ +STARPU_USE_CPU = @STARPU_USE_CPU@ +STARPU_USE_CUDA = @STARPU_USE_CUDA@ +STARPU_USE_FXT = @STARPU_USE_FXT@ +STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ +STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ +STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ +STRIP = @STRIP@ +VERSION = @VERSION@ +XMKMF = @XMKMF@ +X_CFLAGS = @X_CFLAGS@ +X_EXTRA_LIBS = @X_EXTRA_LIBS@ +X_LIBS = @X_LIBS@ +X_PRE_LIBS = @X_PRE_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_ct_F77 = @ac_ct_F77@ +ac_ct_FC = @ac_ct_FC@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +doxygencommand = @doxygencommand@ +dvidir = @dvidir@ +eclipsepath = @eclipsepath@ +epstopdfcommand = @epstopdfcommand@ +exec_prefix = @exec_prefix@ +gitcommand = @gitcommand@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +hwloccalccommand = @hwloccalccommand@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +juliapath = @juliapath@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +mpicc_path = @mpicc_path@ +mpicxx_path = @mpicxx_path@ +mpiexec_path = @mpiexec_path@ +mpifort_path = @mpifort_path@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +pdflatexcommand = @pdflatexcommand@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +runstatedir = @runstatedir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target = @target@ +target_alias = @target_alias@ +target_cpu = @target_cpu@ +target_os = @target_os@ +target_vendor = @target_vendor@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +AM_CFLAGS = $(GLOBAL_AM_CFLAGS) $(FFTWF_CFLAGS) +AM_CXXFLAGS = $(GLOBAL_AM_CXXFLAGS) +AM_FFLAGS = $(GLOBAL_AM_FFLAGS) +AM_FCFLAGS = $(GLOBAL_AM_FCFLAGS) +@STARPU_USE_CUDA_TRUE@V_nvcc_ = $(V_nvcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_USE_CUDA_TRUE@V_nvcc_0 = @echo " NVCC " $@; +@STARPU_USE_CUDA_TRUE@V_nvcc_1 = +@STARPU_USE_CUDA_TRUE@V_nvcc = $(V_nvcc_$(V)) + +# Avoid using nvcc when making a coverity build, nvcc produces millions of +# lines of code which we don't want to analyze. Instead, build dumb .o files +# containing empty functions. +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_ = $(V_mynvcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_0 = @echo " myNVCC " $@; +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_1 = +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc = $(V_mynvcc_$(V)) +@STARPU_USE_HIP_TRUE@V_hipcc_ = $(V_hipcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_USE_HIP_TRUE@V_hipcc_0 = @echo " HIPCC " $@; +@STARPU_USE_HIP_TRUE@V_hipcc_1 = +@STARPU_USE_HIP_TRUE@V_hipcc = $(V_hipcc_$(V)) +V_icc_ = $(V_icc_$(AM_DEFAULT_VERBOSITY)) +V_icc_0 = @echo " ICC " $@; +V_icc_1 = +V_icc = $(V_icc_$(V)) +V_ln_ = $(V_ln_$(AM_DEFAULT_VERBOSITY)) +V_ln_0 = @echo " LN " $@; +V_ln_1 = +V_ln = $(V_ln_$(V)) +V_help2man_ = $(V_help2man_$(AM_DEFAULT_VERBOSITY)) +V_help2man_0 = @echo " HELP2MAN" $@; +V_help2man_1 = +V_help2man = $(V_help2man_$(V)) +AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_srcdir)/starpufft/include/ -I$(top_builddir)/include -I$(top_builddir)/src -I$(top_srcdir)/src/ $(STARPU_H_CPPFLAGS) +lib_LTLIBRARIES = libstarpufft-@STARPU_EFFECTIVE_VERSION@.la +EXTRA_DIST = \ + starpufft-float.h \ + starpufft-double.h \ + cudax_kernels.h \ + starpufftx.c \ + starpufftx1d.c \ + starpufftx2d.c \ + starpufftx3d.c \ + cuda_kernels.cu \ + cudaf_kernels.cu \ + cudax_kernels.cu + +libstarpufft_@STARPU_EFFECTIVE_VERSION@_la_SOURCES = starpufft.c starpufftf.c starpufft_common.c +libstarpufft_@STARPU_EFFECTIVE_VERSION@_la_LDFLAGS = $(ldflags) -no-undefined \ + -version-info $(LIBSTARPUFFT_INTERFACE_CURRENT):$(LIBSTARPUFFT_INTERFACE_REVISION):$(LIBSTARPUFFT_INTERFACE_AGE) + +libstarpufft_@STARPU_EFFECTIVE_VERSION@_la_LIBADD = $(am__append_4) \ + $(am__append_5) +@STARPU_USE_CUDA_TRUE@libstarpufft_@STARPU_EFFECTIVE_VERSION@_la_LIBS = $(LIBS) $(STARPU_CUDA_LDFLAGS) +all: all-am + +.SUFFIXES: +.SUFFIXES: .c .cu .cubin .hip .lo .o .obj +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/make/starpu-notests.mk $(top_srcdir)/make/starpu.mk $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign starpufft/src/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign starpufft/src/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; +$(top_srcdir)/make/starpu-notests.mk $(top_srcdir)/make/starpu.mk $(am__empty): + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +install-libLTLIBRARIES: $(lib_LTLIBRARIES) + @$(NORMAL_INSTALL) + @list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \ + list2=; for p in $$list; do \ + if test -f $$p; then \ + list2="$$list2 $$p"; \ + else :; fi; \ + done; \ + test -z "$$list2" || { \ + echo " $(MKDIR_P) '$(DESTDIR)$(libdir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(libdir)" || exit 1; \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(libdir)'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(libdir)"; \ + } + +uninstall-libLTLIBRARIES: + @$(NORMAL_UNINSTALL) + @list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \ + for p in $$list; do \ + $(am__strip_dir) \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(libdir)/$$f'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(libdir)/$$f"; \ + done + +clean-libLTLIBRARIES: + -test -z "$(lib_LTLIBRARIES)" || rm -f $(lib_LTLIBRARIES) + @list='$(lib_LTLIBRARIES)'; \ + locs=`for p in $$list; do echo $$p; done | \ + sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ + sort -u`; \ + test -z "$$locs" || { \ + echo rm -f $${locs}; \ + rm -f $${locs}; \ + } + +libstarpufft-@STARPU_EFFECTIVE_VERSION@.la: $(libstarpufft_@STARPU_EFFECTIVE_VERSION@_la_OBJECTS) $(libstarpufft_@STARPU_EFFECTIVE_VERSION@_la_DEPENDENCIES) $(EXTRA_libstarpufft_@STARPU_EFFECTIVE_VERSION@_la_DEPENDENCIES) + $(AM_V_CCLD)$(libstarpufft_@STARPU_EFFECTIVE_VERSION@_la_LINK) -rpath $(libdir) $(libstarpufft_@STARPU_EFFECTIVE_VERSION@_la_OBJECTS) $(libstarpufft_@STARPU_EFFECTIVE_VERSION@_la_LIBADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/starpufft.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/starpufft_common.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/starpufftf.Plo@am__quote@ # am--include-marker + +$(am__depfiles_remade): + @$(MKDIR_P) $(@D) + @echo '# dummy' >$@-t && $(am__mv) $@-t $@ + +am--depfiles: $(am__depfiles_remade) + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ +@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs +tags TAGS: + +ctags CTAGS: + +cscope cscopelist: + +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile $(LTLIBRARIES) +installdirs: + for dir in "$(DESTDIR)$(libdir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-generic clean-libLTLIBRARIES clean-libtool \ + mostlyclean-am + +distclean: distclean-am + -rm -f ./$(DEPDIR)/starpufft.Plo + -rm -f ./$(DEPDIR)/starpufft_common.Plo + -rm -f ./$(DEPDIR)/starpufftf.Plo + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: install-libLTLIBRARIES + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f ./$(DEPDIR)/starpufft.Plo + -rm -f ./$(DEPDIR)/starpufft_common.Plo + -rm -f ./$(DEPDIR)/starpufftf.Plo + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-libLTLIBRARIES + +.MAKE: install-am install-strip + +.PHONY: all all-am am--depfiles check check-am clean clean-generic \ + clean-libLTLIBRARIES clean-libtool cscopelist-am ctags-am \ + distclean distclean-compile distclean-generic \ + distclean-libtool distdir dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-html \ + install-html-am install-info install-info-am \ + install-libLTLIBRARIES install-man install-pdf install-pdf-am \ + install-ps install-ps-am install-strip installcheck \ + installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags-am uninstall uninstall-am uninstall-libLTLIBRARIES + +.PRECIOUS: Makefile + +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@.cu.o: +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ @$(MKDIR_P) `dirname $@` +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ $(V_mynvcc)grep 'extern *"C" *void *' $< | sed -ne 's/extern *"C" *void *\([a-zA-Z0-9_]*\) *(.*/void \1(void) {}/p' | $(CC) -x c - -o $@ -c + +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.cubin: +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) -cubin $< -o $@ $(NVCCFLAGS) + +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.o: +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) $< -c -o $@ $(NVCCFLAGS) +@STARPU_USE_HIP_TRUE@.hip.o: +@STARPU_USE_HIP_TRUE@ $(V_hipcc) $(HIPCC) $< -c -o $@ $(HIPCCFLAGS) + +recheck: + -cat /dev/null + +showcheckfailed: + @-cat /dev/null + +showfailed: + @-cat /dev/null + +showcheck: + -cat /dev/null + +showsuite: + -cat /dev/null + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/starpufft/src/cuda_kernels.cu b/starpufft/src/cuda_kernels.cu new file mode 100644 index 0000000..a9e9a84 --- /dev/null +++ b/starpufft/src/cuda_kernels.cu @@ -0,0 +1,18 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "starpufft-double.h" +#include "cudax_kernels.cu" diff --git a/starpufft/src/cudaf_kernels.cu b/starpufft/src/cudaf_kernels.cu new file mode 100644 index 0000000..5125f49 --- /dev/null +++ b/starpufft/src/cudaf_kernels.cu @@ -0,0 +1,18 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "starpufft-float.h" +#include "cudax_kernels.cu" diff --git a/starpufft/src/cudax_kernels.cu b/starpufft/src/cudax_kernels.cu new file mode 100644 index 0000000..5be0e19 --- /dev/null +++ b/starpufft/src/cudax_kernels.cu @@ -0,0 +1,159 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#define _externC extern "C" +#include "cudax_kernels.h" + +/* Note: these assume that the sizes are powers of two */ + +#define VARS_1d \ + unsigned start = threadIdx.x + blockIdx.x * blockDim.x; \ + unsigned numthreads = blockDim.x * gridDim.x; + +#define DISTRIB_1d(n, func,args) \ + unsigned threads_per_block = 128; \ +\ + if (n < threads_per_block) \ + { \ + dim3 dimGrid(n); \ + func <<>> args; \ + cudaError_t status = cudaGetLastError(); \ + if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status); \ + } \ + else \ + { \ + dim3 dimGrid(n / threads_per_block); \ + dim3 dimBlock(threads_per_block); \ + func <<>> args; \ + cudaError_t status = cudaGetLastError(); \ + if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status); \ + } \ + cudaStreamSynchronize(starpu_cuda_get_local_stream()); \ + +extern "C" __global__ void STARPUFFT(cuda_twist1_1d)(const _cuComplex *in, _cuComplex *twisted1, unsigned i, unsigned n1, unsigned n2) +{ + unsigned j; + VARS_1d + unsigned end = n2; + + for (j = start; j < end; j += numthreads) + twisted1[j] = in[i+j*n1]; +} + +extern "C" void STARPUFFT(cuda_twist1_1d_host)(const _cuComplex *in, _cuComplex *twisted1, unsigned i, unsigned n1, unsigned n2) +{ + DISTRIB_1d(n2, STARPUFFT(cuda_twist1_1d), (in, twisted1, i, n1, n2)); +} + +extern "C" __global__ void STARPUFFT(cuda_twiddle_1d)(_cuComplex * out, const _cuComplex * roots, unsigned n, unsigned i) +{ + unsigned j; + VARS_1d + unsigned end = n; + + for (j = start; j < end; j += numthreads) + out[j] = _cuCmul(out[j], roots[i*j]); + return; +} + +extern "C" void STARPUFFT(cuda_twiddle_1d_host)(_cuComplex *out, const _cuComplex *roots, unsigned n, unsigned i) +{ + DISTRIB_1d(n, STARPUFFT(cuda_twiddle_1d), (out, roots, n, i)); +} + +#define VARS_2d \ + unsigned startx = threadIdx.x + blockIdx.x * blockDim.x; \ + unsigned starty = threadIdx.y + blockIdx.y * blockDim.y; \ + unsigned numthreadsx = blockDim.x * gridDim.x; \ + unsigned numthreadsy = blockDim.y * gridDim.y; + +/* FIXME: introduce threads_per_dim_n / m instead */ +#define DISTRIB_2d(n, m, func, args) \ + unsigned threads_per_dim = 16; \ + if (n < threads_per_dim) \ + { \ + if (m < threads_per_dim) \ + { \ + dim3 dimGrid(n, m); \ + func <<>> args; \ + cudaError_t status = cudaGetLastError(); \ + if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status); \ + } \ + else \ + { \ + dim3 dimGrid(1, m / threads_per_dim); \ + dim3 dimBlock(n, threads_per_dim); \ + func <<>> args; \ + cudaError_t status = cudaGetLastError(); \ + if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status); \ + } \ + } \ + else \ + { \ + if (m < threads_per_dim) \ + { \ + dim3 dimGrid(n / threads_per_dim, 1); \ + dim3 dimBlock(threads_per_dim, m); \ + func <<>> args; \ + cudaError_t status = cudaGetLastError(); \ + if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status); \ + } \ + else \ + { \ + dim3 dimGrid(n / threads_per_dim, m / threads_per_dim); \ + dim3 dimBlock(threads_per_dim, threads_per_dim); \ + func <<>> args; \ + cudaError_t status = cudaGetLastError(); \ + if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status); \ + } \ + } \ + cudaStreamSynchronize(starpu_cuda_get_local_stream()); \ + +extern "C" __global__ void STARPUFFT(cuda_twist1_2d)(const _cuComplex *in, _cuComplex *twisted1, unsigned i, unsigned j, unsigned n1, unsigned n2, unsigned m1, unsigned m2) +{ + unsigned k, l; + VARS_2d + unsigned endx = n2; + unsigned endy = m2; + unsigned m = m1*m2; + + for (k = startx; k < endx; k += numthreadsx) + for (l = starty; l < endy; l += numthreadsy) + twisted1[k*m2+l] = in[i*m+j+k*m*n1+l*m1]; +} + +extern "C" void STARPUFFT(cuda_twist1_2d_host)(const _cuComplex *in, _cuComplex *twisted1, unsigned i, unsigned j, unsigned n1, unsigned n2, unsigned m1, unsigned m2) +{ + DISTRIB_2d(n2, m2, STARPUFFT(cuda_twist1_2d), (in, twisted1, i, j, n1, n2, m1, m2)); +} + +extern "C" __global__ void STARPUFFT(cuda_twiddle_2d)(_cuComplex * out, const _cuComplex * roots0, const _cuComplex * roots1, unsigned n2, unsigned m2, unsigned i, unsigned j) +{ + unsigned k, l; + VARS_2d + unsigned endx = n2; + unsigned endy = m2; + + for (k = startx; k < endx ; k += numthreadsx) + for (l = starty; l < endy ; l += numthreadsy) + out[k*m2 + l] = _cuCmul(_cuCmul(out[k*m2 + l], roots0[i*k]), roots1[j*l]); + return; +} + +extern "C" void STARPUFFT(cuda_twiddle_2d_host)(_cuComplex *out, const _cuComplex *roots0, const _cuComplex *roots1, unsigned n2, unsigned m2, unsigned i, unsigned j) +{ + DISTRIB_2d(n2, m2, STARPUFFT(cuda_twiddle_2d), (out, roots0, roots1, n2, m2, i, j)); +} diff --git a/starpufft/src/cudax_kernels.h b/starpufft/src/cudax_kernels.h new file mode 100644 index 0000000..1de07b2 --- /dev/null +++ b/starpufft/src/cudax_kernels.h @@ -0,0 +1,22 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +_externC void STARPUFFT(cuda_twist1_1d_host)(const _cuComplex *in, _cuComplex *twisted1, unsigned i, unsigned n1, unsigned n2); +_externC void STARPUFFT(cuda_twiddle_1d_host)(_cuComplex *out, const _cuComplex *roots, unsigned n, unsigned i); +_externC void STARPUFFT(cuda_twist1_2d_host)(const _cuComplex *in, _cuComplex *twisted1, unsigned i, unsigned j, unsigned n1, unsigned n2, unsigned m1, unsigned m2); +_externC void STARPUFFT(cuda_twiddle_2d_host)(_cuComplex *out, const _cuComplex *roots0, const _cuComplex *roots1, unsigned n2, unsigned m2, unsigned i, unsigned j); diff --git a/starpufft/src/starpufft-double.h b/starpufft/src/starpufft-double.h new file mode 100644 index 0000000..3df10bc --- /dev/null +++ b/starpufft/src/starpufft-double.h @@ -0,0 +1,54 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +#if defined(STARPU_HAVE_FFTW) && !defined(__CUDACC__) +#include +#include +#endif + +#ifdef STARPU_USE_CUDA +#include +#endif + +#undef STARPUFFT_FLOAT +#define STARPUFFT_DOUBLE + +typedef double real; +#if defined(STARPU_HAVE_FFTW) && !defined(__CUDACC__) +typedef fftw_complex _fftw_complex; +typedef fftw_plan _fftw_plan; +#endif +#ifdef STARPU_USE_CUDA +typedef cuDoubleComplex _cuComplex; +typedef cufftDoubleComplex _cufftComplex; +#define _cufftExecC2C cufftExecZ2Z +#define _cufftExecR2C cufftExecD2Z +#define _cufftExecC2R cufftExecZ2D +#define _CUFFT_C2C CUFFT_Z2Z +#define _CUFFT_R2C CUFFT_D2Z +#define _CUFFT_C2R CUFFT_Z2D +#define _cuCmul(x,y) cuCmul(x,y) +#endif +#define STARPUFFT(name) starpufft_##name +#define _FFTW(name) fftw_##name + +#ifdef STARPU_USE_CUDA +void STARPUFFT(report_error)(const char *func, const char *file, int line, cufftResult status); +#endif /* !STARPU_USE_CUDA */ + +#define TYPE "" diff --git a/starpufft/src/starpufft-float.h b/starpufft/src/starpufft-float.h new file mode 100644 index 0000000..1a776ec --- /dev/null +++ b/starpufft/src/starpufft-float.h @@ -0,0 +1,54 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +#if defined(STARPU_HAVE_FFTW) && !defined(__CUDACC__) +#include +#include +#endif + +#ifdef STARPU_USE_CUDA +#include +#endif + +#undef STARPUFFT_DOUBLE +#define STARPUFFT_FLOAT + +typedef float real; +#if defined(STARPU_HAVE_FFTW) && !defined(__CUDACC__) +typedef fftwf_complex _fftw_complex; +typedef fftwf_plan _fftw_plan; +#endif +#ifdef STARPU_USE_CUDA +typedef cuComplex _cuComplex; +typedef cufftComplex _cufftComplex; +#define _cufftExecC2C cufftExecC2C +#define _cufftExecR2C cufftExecR2C +#define _cufftExecC2R cufftExecC2R +#define _CUFFT_C2C CUFFT_C2C +#define _CUFFT_R2C CUFFT_R2C +#define _CUFFT_C2R CUFFT_C2R +#define _cuCmul(x,y) cuCmulf(x,y) +#endif +#define STARPUFFT(name) starpufftf_##name +#define _FFTW(name) fftwf_##name + +#ifdef STARPU_USE_CUDA +void STARPUFFT(report_error)(const char *func, const char *file, int line, cufftResult status); +#endif /* !STARPU_USE_CUDA */ + +#define TYPE "f" diff --git a/starpufft/src/starpufft.c b/starpufft/src/starpufft.c new file mode 100644 index 0000000..d633ecd --- /dev/null +++ b/starpufft/src/starpufft.c @@ -0,0 +1,18 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "starpufft-double.h" +#include "starpufftx.c" diff --git a/starpufft/src/starpufft_common.c b/starpufft/src/starpufft_common.c new file mode 100644 index 0000000..9056f5e --- /dev/null +++ b/starpufft/src/starpufft_common.c @@ -0,0 +1,20 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "starpufft.h" + +/* Used as an identifier in starpu tags to let plans run concurrently */ +int starpufft_last_plan_number; diff --git a/starpufft/src/starpufftf.c b/starpufft/src/starpufftf.c new file mode 100644 index 0000000..3864721 --- /dev/null +++ b/starpufft/src/starpufftf.c @@ -0,0 +1,18 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "starpufft-float.h" +#include "starpufftx.c" diff --git a/starpufft/src/starpufftx.c b/starpufft/src/starpufftx.c new file mode 100644 index 0000000..0601b39 --- /dev/null +++ b/starpufft/src/starpufftx.c @@ -0,0 +1,542 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#define PARALLEL 0 + +#include +#include +#include + +#include +#include + +#include "starpufft.h" +#ifdef STARPU_USE_CUDA +#define _externC extern +#include "cudax_kernels.h" + +#if (defined(STARPUFFT_FLOAT) || defined(STARPU_HAVE_CUFFTDOUBLECOMPLEX)) && !defined(STARPU_COVERITY) +# define __STARPU_USE_CUDA +#else +# undef __STARPU_USE_CUDA +#endif + +#endif + +#define _FFTW_FLAGS FFTW_ESTIMATE + +/* Steps for the parallel variant */ +enum steps +{ + SPECIAL, TWIST1, FFT1, JOIN, TWIST2, FFT2, TWIST3, END +}; + +#define NUMBER_BITS 5 +#define NUMBER_SHIFT (64 - NUMBER_BITS) +#define STEP_BITS 3 +#define STEP_SHIFT (NUMBER_SHIFT - STEP_BITS) + +/* Tags for the steps of the parallel variant */ +#define _STEP_TAG(plan, step, i) (((starpu_tag_t) plan->number << NUMBER_SHIFT) | ((starpu_tag_t)(step) << STEP_SHIFT) | (starpu_tag_t) (i)) + + +#define I_BITS STEP_SHIFT + +enum type +{ + R2C, + C2R, + C2C +}; + +static unsigned task_per_worker[STARPU_NMAXWORKERS]; +static unsigned samples_per_worker[STARPU_NMAXWORKERS]; +static struct timeval start, submit_tasks, end; + +/* + * + * The actual kernels + * + */ + +struct STARPUFFT(plan) +{ + int number; /* uniquely identifies the plan, for starpu tags */ + + int *n; + int *n1; + int *n2; + int totsize; + int totsize1; /* Number of first-round tasks */ + int totsize2; /* Size of first-round tasks */ + int totsize3; /* Number of second-round tasks */ + int totsize4; /* Size of second-round tasks */ + int dim; + enum type type; + int sign; + + STARPUFFT(complex) *roots[2]; + starpu_data_handle_t roots_handle[2]; + + /* For each worker, we need some data */ + struct + { +#ifdef STARPU_USE_CUDA + /* CUFFT plans */ + cufftHandle plan1_cuda, plan2_cuda; + /* Sequential version */ + cufftHandle plan_cuda; +#endif +#ifdef STARPU_HAVE_FFTW + /* FFTW plans */ + _fftw_plan plan1_cpu, plan2_cpu; + /* Sequential version */ + _fftw_plan plan_cpu; +#endif + } plans[STARPU_NMAXWORKERS]; + + /* Buffers for codelets */ + STARPUFFT(complex) *in, *twisted1, *fft1, *twisted2, *fft2, *out; + size_t twisted1_size, twisted2_size, fft1_size, fft2_size; + + /* corresponding starpu DSM handles */ + starpu_data_handle_t in_handle, *twisted1_handle, *fft1_handle, *twisted2_handle, *fft2_handle, out_handle; + + /* Tasks */ + struct starpu_task **twist1_tasks, **fft1_tasks, **twist2_tasks, **fft2_tasks, **twist3_tasks; + struct starpu_task *join_task, *end_task; + + /* Arguments for tasks */ + struct STARPUFFT(args) *fft1_args, *fft2_args; +}; + +struct STARPUFFT(args) +{ + struct STARPUFFT(plan) *plan; + int i, j, jj, kk, ll, *iv, *kkv; +}; + +static void +check_dims(STARPUFFT(plan) plan) +{ + int dim; + for (dim = 0; dim < plan->dim; dim++) + if (plan->n[dim] & (plan->n[dim]-1)) + { + fprintf(stderr,"can't cope with non-power-of-2\n"); + STARPU_ABORT(); + } +} + +static void +compute_roots(STARPUFFT(plan) plan) +{ + int dim, k; + + /* Compute the n-roots and m-roots of unity for twiddling */ + for (dim = 0; dim < plan->dim; dim++) + { + STARPUFFT(complex) exp = (plan->sign * 2. * 4.*atan(1.)) * _Complex_I / (STARPUFFT(complex)) plan->n[dim]; + plan->roots[dim] = malloc(plan->n[dim] * sizeof(**plan->roots)); + for (k = 0; k < plan->n[dim]; k++) + plan->roots[dim][k] = cexp(exp*k); + starpu_vector_data_register(&plan->roots_handle[dim], STARPU_MAIN_RAM, (uintptr_t) plan->roots[dim], plan->n[dim], sizeof(**plan->roots)); + +#ifdef STARPU_USE_CUDA + if (plan->n[dim] > 100000) + { + /* prefetch the big root array on GPUs */ + unsigned worker; + unsigned nworkers = starpu_worker_get_count(); + for (worker = 0; worker < nworkers; worker++) + { + unsigned node = starpu_worker_get_memory_node(worker); + if (starpu_worker_get_type(worker) == STARPU_CUDA_WORKER) + starpu_data_prefetch_on_node(plan->roots_handle[dim], node, 0); + } + } +#endif + } +} + +/* Only CUDA capability >= 1.3 supports doubles, rule old card out. */ +#ifdef STARPUFFT_DOUBLE +static int can_execute(unsigned workerid, struct starpu_task *task STARPU_ATTRIBUTE_UNUSED, unsigned nimpl STARPU_ATTRIBUTE_UNUSED) { + if (starpu_worker_get_type(workerid) == STARPU_CPU_WORKER) + return 1; +#ifdef STARPU_USE_CUDA + { + /* Cuda device */ + const struct cudaDeviceProp *props; + props = starpu_cuda_get_device_properties(workerid); + if (props->major >= 2 || props->minor >= 3) + /* At least compute capability 1.3, supports doubles */ + return 1; + /* Old card does not support doubles */ + return 0; + } +#endif + return 0; +} +#define CAN_EXECUTE .can_execute = can_execute, +#else +#define CAN_EXECUTE +#endif + +#include "starpufftx1d.c" +#include "starpufftx2d.c" +#include "starpufftx3d.c" + +struct starpu_task * +STARPUFFT(start)(STARPUFFT(plan) plan, void *_in, void *_out) +{ + struct starpu_task *task; + int z; + + plan->in = _in; + plan->out = _out; + + switch (plan->dim) + { + case 1: + { + switch (plan->type) + { + case C2C: + starpu_vector_data_register(&plan->in_handle, STARPU_MAIN_RAM, (uintptr_t) plan->in, plan->totsize, sizeof(STARPUFFT(complex))); + if (!PARALLEL) + starpu_vector_data_register(&plan->out_handle, STARPU_MAIN_RAM, (uintptr_t) plan->out, plan->totsize, sizeof(STARPUFFT(complex))); + if (PARALLEL) + { + for (z = 0; z < plan->totsize1; z++) + plan->twist1_tasks[z]->handles[0] = plan->in_handle; + } + task = STARPUFFT(start1dC2C)(plan, plan->in_handle, plan->out_handle); + break; + default: + STARPU_ABORT(); + break; + } + break; + } + case 2: + starpu_vector_data_register(&plan->in_handle, STARPU_MAIN_RAM, (uintptr_t) plan->in, plan->totsize, sizeof(STARPUFFT(complex))); + if (!PARALLEL) + starpu_vector_data_register(&plan->out_handle, STARPU_MAIN_RAM, (uintptr_t) plan->out, plan->totsize, sizeof(STARPUFFT(complex))); + if (PARALLEL) + { + for (z = 0; z < plan->totsize1; z++) + plan->twist1_tasks[z]->handles[0] = plan->in_handle; + } + task = STARPUFFT(start2dC2C)(plan, plan->in_handle, plan->out_handle); + break; + case 3: + starpu_vector_data_register(&plan->in_handle, STARPU_MAIN_RAM, (uintptr_t) plan->in, plan->totsize, sizeof(STARPUFFT(complex))); + if (!PARALLEL) + starpu_vector_data_register(&plan->out_handle, STARPU_MAIN_RAM, (uintptr_t) plan->out, plan->totsize, sizeof(STARPUFFT(complex))); + if (PARALLEL) + { + for (z = 0; z < plan->totsize1; z++) + plan->twist1_tasks[z]->handles[0] = plan->in_handle; + } + task = STARPUFFT(start3dC2C)(plan, plan->in_handle, plan->out_handle); + break; + default: + STARPU_ABORT(); + break; + } + return task; +} + +void +STARPUFFT(cleanup)(STARPUFFT(plan) plan) +{ + if (plan->in_handle) + starpu_data_unregister(plan->in_handle); + if (!PARALLEL) + { + if (plan->out_handle) + starpu_data_unregister(plan->out_handle); + } +} + +struct starpu_task * +STARPUFFT(start_handle)(STARPUFFT(plan) plan, starpu_data_handle_t in, starpu_data_handle_t out) +{ + return STARPUFFT(start1dC2C)(plan, in, out); +} + +int +STARPUFFT(execute)(STARPUFFT(plan) plan, void *in, void *out) +{ + int ret; + + memset(task_per_worker, 0, sizeof(task_per_worker)); + memset(samples_per_worker, 0, sizeof(task_per_worker)); + + gettimeofday(&start, NULL); + + struct starpu_task *task = STARPUFFT(start)(plan, in, out); + gettimeofday(&submit_tasks, NULL); + if (task) + { + ret = starpu_task_wait(task); + STARPU_ASSERT(ret == 0); + } + + STARPUFFT(cleanup)(plan); + + gettimeofday(&end, NULL); + return (task == NULL ? -1 : 0); +} + +int +STARPUFFT(execute_handle)(STARPUFFT(plan) plan, starpu_data_handle_t in, starpu_data_handle_t out) +{ + int ret; + + struct starpu_task *task = STARPUFFT(start_handle)(plan, in, out); + if (!task) return -1; + ret = starpu_task_wait(task); + STARPU_ASSERT(ret == 0); + return 0; +} + +/* Destroy FFTW plans, unregister and free buffers, and free tags */ +void +STARPUFFT(destroy_plan)(STARPUFFT(plan) plan) +{ + unsigned workerid; + int dim, i; + + for (workerid = 0; workerid < starpu_worker_get_count(); workerid++) + { + switch (starpu_worker_get_type(workerid)) + { + case STARPU_CPU_WORKER: +#ifdef STARPU_HAVE_FFTW + if (PARALLEL) + { + _FFTW(destroy_plan)(plan->plans[workerid].plan1_cpu); + _FFTW(destroy_plan)(plan->plans[workerid].plan2_cpu); + } + else + { + _FFTW(destroy_plan)(plan->plans[workerid].plan_cpu); + } +#endif + break; + case STARPU_CUDA_WORKER: +#ifdef STARPU_USE_CUDA + /* FIXME: Can't deallocate */ +#endif + break; + default: + /* Do not care, we won't be executing anything there. */ + break; + } + } + + if (PARALLEL) + { + for (i = 0; i < plan->totsize1; i++) + { + starpu_data_unregister(plan->twisted1_handle[i]); + free(plan->twist1_tasks[i]); + starpu_data_unregister(plan->fft1_handle[i]); + free(plan->fft1_tasks[i]); + } + + free(plan->twisted1_handle); + free(plan->twist1_tasks); + free(plan->fft1_handle); + free(plan->fft1_tasks); + free(plan->fft1_args); + + free(plan->join_task); + + for (i = 0; i < plan->totsize3; i++) + { + starpu_data_unregister(plan->twisted2_handle[i]); + free(plan->twist2_tasks[i]); + starpu_data_unregister(plan->fft2_handle[i]); + free(plan->fft2_tasks[i]); + free(plan->twist3_tasks[i]); + } + + free(plan->end_task); + + free(plan->twisted2_handle); + free(plan->twist2_tasks); + free(plan->fft2_handle); + free(plan->fft2_tasks); + free(plan->twist3_tasks); + free(plan->fft2_args); + + for (dim = 0; dim < plan->dim; dim++) + { + starpu_data_unregister(plan->roots_handle[dim]); + free(plan->roots[dim]); + } + + switch (plan->dim) + { + case 1: + STARPUFFT(free_1d_tags)(plan); + break; + case 2: + STARPUFFT(free_2d_tags)(plan); + break; + default: + STARPU_ABORT(); + break; + } + + free(plan->n1); + free(plan->n2); + STARPUFFT(free)(plan->twisted1, plan->twisted1_size); + STARPUFFT(free)(plan->fft1, plan->fft1_size); + STARPUFFT(free)(plan->twisted2, plan->twisted2_size); + STARPUFFT(free)(plan->fft2, plan->fft2_size); + } + free(plan->n); + free(plan); +} + +void * +STARPUFFT(malloc)(size_t n) +{ +#ifdef STARPU_USE_CUDA + void *res; + starpu_malloc(&res, n); + return res; +#else +# ifdef STARPU_HAVE_FFTW + return _FFTW(malloc)(n); +# else + return malloc(n); +# endif +#endif +} + +void +STARPUFFT(free)(void *p, size_t dim) +{ +#ifdef STARPU_USE_CUDA + starpu_free_noflag(p, dim); +#else + (void)dim; +# ifdef STARPU_HAVE_FFTW + _FFTW(free)(p); +# else + free(p); +# endif +#endif +} + +void +STARPUFFT(showstats)(FILE *out) +{ + unsigned worker; + unsigned total; + +#define TIMING(begin,end) (double)((end.tv_sec - begin.tv_sec)*1000000 + (end.tv_usec - begin.tv_usec)) +#define MSTIMING(begin,end) (TIMING(begin,end)/1000.) + double paratiming = TIMING(start,end); + fprintf(out, "Tasks submission took %2.2f ms\n", MSTIMING(start,submit_tasks)); + fprintf(out, "Tasks termination took %2.2f ms\n", MSTIMING(submit_tasks,end)); + + fprintf(out, "Total %2.2f ms\n", MSTIMING(start,end)); + + for (worker = 0, total = 0; worker < starpu_worker_get_count(); worker++) + total += task_per_worker[worker]; + + if (!total) + return; + for (worker = 0; worker < starpu_worker_get_count(); worker++) + { + if (task_per_worker[worker]) + { + char name[32]; + starpu_worker_get_name(worker, name, sizeof(name)); + + unsigned long bytes = sizeof(STARPUFFT(complex))*samples_per_worker[worker]; + + fprintf(stderr, "\t%s -> %2.2f MB\t%2.2f\tMB/s\t%u %2.2f %%\n", name, (1.0*bytes)/(1024*1024), bytes/paratiming, task_per_worker[worker], (100.0*task_per_worker[worker])/total); + } + } +} + +#ifdef STARPU_USE_CUDA +void +STARPUFFT(report_error)(const char *func, const char *file, int line, cufftResult status) +{ + char *errormsg; + switch (status) + { + case CUFFT_SUCCESS: + errormsg = "success"; /* It'd be weird to get here. */ + break; + case CUFFT_INVALID_PLAN: + errormsg = "invalid plan"; + break; + case CUFFT_ALLOC_FAILED: + errormsg = "alloc failed"; + break; + case CUFFT_INVALID_TYPE: + errormsg = "invalid type"; + break; + case CUFFT_INVALID_VALUE: + errormsg = "invalid value"; + break; + case CUFFT_INTERNAL_ERROR: + errormsg = "internal error"; + break; + case CUFFT_EXEC_FAILED: + errormsg = "exec failed"; + break; + case CUFFT_SETUP_FAILED: + errormsg = "setup failed"; + break; + case CUFFT_INVALID_SIZE: + errormsg = "invalid size"; + break; + case CUFFT_UNALIGNED_DATA: + errormsg = "unaligned data"; + break; +#if defined(MAX_CUFFT_ERROR) && (MAX_CUFFT_ERROR >= 0xE) + case CUFFT_INCOMPLETE_PARAMETER_LIST: + errormsg = "incomplete parameter list"; + break; + case CUFFT_INVALID_DEVICE: + errormsg = "invalid device"; + break; + case CUFFT_PARSE_ERROR: + errormsg = "parse error"; + break; + case CUFFT_NO_WORKSPACE: + errormsg = "no workspace"; + break; +#endif /* MAX_CUFFT_ERROR >= 0xE */ + default: + errormsg = "unknown error"; + break; + } + fprintf(stderr, "oops in %s (%s:%d)... %d: %s\n", + func, file, line, status, errormsg); + STARPU_ABORT(); +} +#endif /* !STARPU_USE_CUDA */ diff --git a/starpufft/src/starpufftx1d.c b/starpufft/src/starpufftx1d.c new file mode 100644 index 0000000..a9f6ed4 --- /dev/null +++ b/starpufft/src/starpufftx1d.c @@ -0,0 +1,877 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * + * Dumb parallel version + * + */ + +#define DIV_1D 64 + +/* + * Overall strategy for an fft of size n: + * - perform n1 ffts of size n2 + * - twiddle + * - perform n2 ffts of size n1 + * + * - n1 defaults to DIV_1D, thus n2 defaults to n / DIV_1D. + * + * Precise tasks: + * + * - twist1: twist the whole n-element input (called "in") into n1 chunks of + * size n2, by using n1 tasks taking the whole n-element input as a + * R parameter and one n2 output as a W parameter. The result is + * called twisted1. + * - fft1: perform n1 (n2) ffts, by using n1 tasks doing one fft each. Also + * twiddle the result to prepare for the fft2. The result is called + * fft1. + * - join: depends on all the fft1s, to gather the n1 results of size n2 in + * the fft1 vector. + * - twist2: twist the fft1 vector into n2 chunks of size n1, called twisted2. + * since n2 is typically very large, this step is divided in DIV_1D + * tasks, each of them performing n2/DIV_1D of them + * - fft2: perform n2 ffts of size n1. This is divided in DIV_1D tasks of + * n2/DIV_1D ffts, to be performed in batches. The result is called + * fft2. + * - twist3: twist back the result of the fft2s above into the output buffer. + * Only implemented on CPUs for simplicity of the gathering. + * + * The tag space thus uses 3 dimensions: + * - the number of the plan. + * - the step (TWIST1, FFT1, JOIN, TWIST2, FFT2, TWIST3, END) + * - an index i between 0 and DIV_1D-1. + */ + +#define STEP_TAG_1D(plan, step, i) _STEP_TAG(plan, step, i) + +#ifdef __STARPU_USE_CUDA +/* twist1: + * + * Twist the full input vector (first parameter) into one chunk of size n2 + * (second parameter) */ +static void +STARPUFFT(twist1_1d_kernel_gpu)(void *descr[], void *_args) +{ + struct STARPUFFT(args) *args = _args; + STARPUFFT(plan) plan = args->plan; + int i = args->i; + int n1 = plan->n1[0]; + int n2 = plan->n2[0]; + + _cufftComplex * restrict in = (_cufftComplex *)STARPU_VECTOR_GET_PTR(descr[0]); + _cufftComplex * restrict twisted1 = (_cufftComplex *)STARPU_VECTOR_GET_PTR(descr[1]); + + STARPUFFT(cuda_twist1_1d_host)(in, twisted1, i, n1, n2); + + cudaStreamSynchronize(starpu_cuda_get_local_stream()); +} + +/* fft1: + * + * Perform one fft of size n2 */ +static void +STARPUFFT(fft1_1d_plan_gpu)(void *args) +{ + STARPUFFT(plan) plan = args; + int n2 = plan->n2[0]; + int workerid = starpu_worker_get_id_check(); + cufftResult cures; + + cures = cufftPlan1d(&plan->plans[workerid].plan1_cuda, n2, _CUFFT_C2C, 1); + if (cures != CUFFT_SUCCESS) + STARPU_CUFFT_REPORT_ERROR(cures); + cufftSetStream(plan->plans[workerid].plan1_cuda, starpu_cuda_get_local_stream()); + if (cures != CUFFT_SUCCESS) + STARPU_CUFFT_REPORT_ERROR(cures); +} + +static void +STARPUFFT(fft1_1d_kernel_gpu)(void *descr[], void *_args) +{ + struct STARPUFFT(args) *args = _args; + STARPUFFT(plan) plan = args->plan; + int i = args->i; + int n2 = plan->n2[0]; + cufftResult cures; + + _cufftComplex * restrict in = (_cufftComplex *)STARPU_VECTOR_GET_PTR(descr[0]); + _cufftComplex * restrict out = (_cufftComplex *)STARPU_VECTOR_GET_PTR(descr[1]); + const _cufftComplex * restrict roots = (_cufftComplex *)STARPU_VECTOR_GET_PTR(descr[2]); + + int workerid = starpu_worker_get_id_check(); + + task_per_worker[workerid]++; + + cures = _cufftExecC2C(plan->plans[workerid].plan1_cuda, in, out, plan->sign == -1 ? CUFFT_FORWARD : CUFFT_INVERSE); + if (cures != CUFFT_SUCCESS) + STARPU_CUFFT_REPORT_ERROR(cures); + + STARPUFFT(cuda_twiddle_1d_host)(out, roots, n2, i); + + cudaStreamSynchronize(starpu_cuda_get_local_stream()); +} + +/* fft2: + * + * Perform n3 = n2/DIV_1D ffts of size n1 */ +static void +STARPUFFT(fft2_1d_plan_gpu)(void *args) +{ + STARPUFFT(plan) plan = args; + int n1 = plan->n1[0]; + int n2 = plan->n2[0]; + int n3 = n2/DIV_1D; + cufftResult cures; + int workerid = starpu_worker_get_id_check(); + + cures = cufftPlan1d(&plan->plans[workerid].plan2_cuda, n1, _CUFFT_C2C, n3); + if (cures != CUFFT_SUCCESS) + STARPU_CUFFT_REPORT_ERROR(cures); + cufftSetStream(plan->plans[workerid].plan2_cuda, starpu_cuda_get_local_stream()); + if (cures != CUFFT_SUCCESS) + STARPU_CUFFT_REPORT_ERROR(cures); +} + +static void +STARPUFFT(fft2_1d_kernel_gpu)(void *descr[], void *_args) +{ + struct STARPUFFT(args) *args = _args; + STARPUFFT(plan) plan = args->plan; + cufftResult cures; + + _cufftComplex * restrict in = (_cufftComplex *)STARPU_VECTOR_GET_PTR(descr[0]); + _cufftComplex * restrict out = (_cufftComplex *)STARPU_VECTOR_GET_PTR(descr[1]); + + int workerid = starpu_worker_get_id_check(); + + task_per_worker[workerid]++; + + /* NOTE using batch support */ + cures = _cufftExecC2C(plan->plans[workerid].plan2_cuda, in, out, plan->sign == -1 ? CUFFT_FORWARD : CUFFT_INVERSE); + if (cures != CUFFT_SUCCESS) + STARPU_CUFFT_REPORT_ERROR(cures); + + cudaStreamSynchronize(starpu_cuda_get_local_stream()); +} +#endif + +/* twist1: + * + * Twist the full input vector (first parameter) into one chunk of size n2 + * (second parameter) */ +static void +STARPUFFT(twist1_1d_kernel_cpu)(void *descr[], void *_args) +{ + struct STARPUFFT(args) *args = _args; + STARPUFFT(plan) plan = args->plan; + int i = args->i; + int j; + int n1 = plan->n1[0]; + int n2 = plan->n2[0]; + + STARPUFFT(complex) * restrict in = (STARPUFFT(complex) *)STARPU_VECTOR_GET_PTR(descr[0]); + STARPUFFT(complex) * restrict twisted1 = (STARPUFFT(complex) *)STARPU_VECTOR_GET_PTR(descr[1]); + + /* printf("twist1 %d %g\n", i, (double) cabs(plan->in[i])); */ + + for (j = 0; j < n2; j++) + twisted1[j] = in[i+j*n1]; +} + +#ifdef STARPU_HAVE_FFTW +/* fft1: + * + * Perform one fft of size n2 */ +static void +STARPUFFT(fft1_1d_kernel_cpu)(void *descr[], void *_args) +{ + struct STARPUFFT(args) *args = _args; + STARPUFFT(plan) plan = args->plan; + int i = args->i; + int j; + int n2 = plan->n2[0]; + int workerid = starpu_worker_get_id_check(); + + task_per_worker[workerid]++; + + STARPUFFT(complex) * restrict twisted1 = (STARPUFFT(complex) *)STARPU_VECTOR_GET_PTR(descr[0]); + STARPUFFT(complex) * restrict fft1 = (STARPUFFT(complex) *)STARPU_VECTOR_GET_PTR(descr[1]); + + /* printf("fft1 %d %g\n", i, (double) cabs(twisted1[0])); */ + + _FFTW(execute_dft)(plan->plans[workerid].plan1_cpu, twisted1, fft1); + + /* twiddle fft1 buffer */ + for (j = 0; j < n2; j++) + fft1[j] = fft1[j] * plan->roots[0][i*j]; +} +#endif + +/* twist2: + * + * Twist the full vector (results of the fft1s) into one package of n2/DIV_1D + * chunks of size n1 */ +static void +STARPUFFT(twist2_1d_kernel_cpu)(void *descr[], void *_args) +{ + struct STARPUFFT(args) *args = _args; + STARPUFFT(plan) plan = args->plan; + int jj = args->jj; /* between 0 and DIV_1D */ + int jjj; /* between 0 and n3 */ + int i; + int n1 = plan->n1[0]; + int n2 = plan->n2[0]; + int n3 = n2/DIV_1D; + + STARPUFFT(complex) * restrict twisted2 = (STARPUFFT(complex) *)STARPU_VECTOR_GET_PTR(descr[0]); + + /* printf("twist2 %d %g\n", jj, (double) cabs(plan->fft1[jj])); */ + + for (jjj = 0; jjj < n3; jjj++) { + int j = jj * n3 + jjj; + for (i = 0; i < n1; i++) + twisted2[jjj*n1+i] = plan->fft1[i*n2+j]; + } +} + +#ifdef STARPU_HAVE_FFTW +/* fft2: + * + * Perform n3 = n2/DIV_1D ffts of size n1 */ +static void +STARPUFFT(fft2_1d_kernel_cpu)(void *descr[], void *_args) +{ + struct STARPUFFT(args) *args = _args; + STARPUFFT(plan) plan = args->plan; + /* int jj = args->jj; */ + int workerid = starpu_worker_get_id_check(); + + task_per_worker[workerid]++; + + STARPUFFT(complex) * restrict twisted2 = (STARPUFFT(complex) *)STARPU_VECTOR_GET_PTR(descr[0]); + STARPUFFT(complex) * restrict fft2 = (STARPUFFT(complex) *)STARPU_VECTOR_GET_PTR(descr[1]); + + /* printf("fft2 %d %g\n", jj, (double) cabs(twisted2[plan->totsize4-1])); */ + + _FFTW(execute_dft)(plan->plans[workerid].plan2_cpu, twisted2, fft2); +} +#endif + +/* twist3: + * + * Spread the package of n2/DIV_1D chunks of size n1 into the output vector */ +static void +STARPUFFT(twist3_1d_kernel_cpu)(void *descr[], void *_args) +{ + struct STARPUFFT(args) *args = _args; + STARPUFFT(plan) plan = args->plan; + int jj = args->jj; /* between 0 and DIV_1D */ + int jjj; /* between 0 and n3 */ + int i; + int n1 = plan->n1[0]; + int n2 = plan->n2[0]; + int n3 = n2/DIV_1D; + + const STARPUFFT(complex) * restrict fft2 = (STARPUFFT(complex) *)STARPU_VECTOR_GET_PTR(descr[0]); + + /* printf("twist3 %d %g\n", jj, (double) cabs(fft2[0])); */ + + for (jjj = 0; jjj < n3; jjj++) { + int j = jj * n3 + jjj; + for (i = 0; i < n1; i++) + plan->out[i*n2+j] = fft2[jjj*n1+i]; + } +} + +/* Performance models for the 5 kinds of tasks */ +static struct starpu_perfmodel STARPUFFT(twist1_1d_model) = { + .type = STARPU_HISTORY_BASED, + .symbol = TYPE"twist1_1d" +}; + +static struct starpu_perfmodel STARPUFFT(fft1_1d_model) = { + .type = STARPU_HISTORY_BASED, + .symbol = TYPE"fft1_1d" +}; + +static struct starpu_perfmodel STARPUFFT(twist2_1d_model) = { + .type = STARPU_HISTORY_BASED, + .symbol = TYPE"twist2_1d" +}; + +static struct starpu_perfmodel STARPUFFT(fft2_1d_model) = { + .type = STARPU_HISTORY_BASED, + .symbol = TYPE"fft2_1d" +}; + +static struct starpu_perfmodel STARPUFFT(twist3_1d_model) = { + .type = STARPU_HISTORY_BASED, + .symbol = TYPE"twist3_1d" +}; + +/* codelet pointers for the 5 kinds of tasks */ +static struct starpu_codelet STARPUFFT(twist1_1d_codelet) = { + .where = +#ifdef __STARPU_USE_CUDA + STARPU_CUDA| +#endif + STARPU_CPU, +#ifdef __STARPU_USE_CUDA + .cuda_funcs = {STARPUFFT(twist1_1d_kernel_gpu)}, +#endif + .cpu_funcs = {STARPUFFT(twist1_1d_kernel_cpu)}, + CAN_EXECUTE + .model = &STARPUFFT(twist1_1d_model), + .nbuffers = 2, + .modes = {STARPU_R, STARPU_W}, + .name = "twist1_1d_codelet" +}; + +static struct starpu_codelet STARPUFFT(fft1_1d_codelet) = { + .where = +#ifdef __STARPU_USE_CUDA + STARPU_CUDA| +#endif +#ifdef STARPU_HAVE_FFTW + STARPU_CPU| +#endif + 0, +#ifdef __STARPU_USE_CUDA + .cuda_funcs = {STARPUFFT(fft1_1d_kernel_gpu)}, +#endif +#ifdef STARPU_HAVE_FFTW + .cpu_funcs = {STARPUFFT(fft1_1d_kernel_cpu)}, +#endif + CAN_EXECUTE + .model = &STARPUFFT(fft1_1d_model), + .nbuffers = 3, + .modes = {STARPU_R, STARPU_W, STARPU_R}, + .name = "fft1_1d_codelet" +}; + +static struct starpu_codelet STARPUFFT(twist2_1d_codelet) = { + .where = STARPU_CPU, + .cpu_funcs = {STARPUFFT(twist2_1d_kernel_cpu)}, + CAN_EXECUTE + .model = &STARPUFFT(twist2_1d_model), + .nbuffers = 1, + .modes = {STARPU_W}, + .name = "twist2_1d_codelet" +}; + +static struct starpu_codelet STARPUFFT(fft2_1d_codelet) = { + .where = +#ifdef __STARPU_USE_CUDA + STARPU_CUDA| +#endif +#ifdef STARPU_HAVE_FFTW + STARPU_CPU| +#endif + 0, +#ifdef __STARPU_USE_CUDA + .cuda_funcs = {STARPUFFT(fft2_1d_kernel_gpu)}, +#endif +#ifdef STARPU_HAVE_FFTW + .cpu_funcs = {STARPUFFT(fft2_1d_kernel_cpu)}, +#endif + CAN_EXECUTE + .model = &STARPUFFT(fft2_1d_model), + .nbuffers = 2, + .modes = {STARPU_R, STARPU_W}, + .name = "fft2_1d_codelet" +}; + +static struct starpu_codelet STARPUFFT(twist3_1d_codelet) = { + .where = STARPU_CPU, + .cpu_funcs = {STARPUFFT(twist3_1d_kernel_cpu)}, + CAN_EXECUTE + .model = &STARPUFFT(twist3_1d_model), + .nbuffers = 1, + .modes = {STARPU_R}, + .name = "twist3_1d_codelet" +}; + +/* + * + * Sequential version + * + */ + +#ifdef __STARPU_USE_CUDA +/* Perform one fft of size n */ +static void +STARPUFFT(fft_1d_plan_gpu)(void *args) +{ + STARPUFFT(plan) plan = args; + cufftResult cures; + int n = plan->n[0]; + int workerid = starpu_worker_get_id_check(); + + cures = cufftPlan1d(&plan->plans[workerid].plan_cuda, n, _CUFFT_C2C, 1); + if (cures != CUFFT_SUCCESS) + STARPU_CUFFT_REPORT_ERROR(cures); + cufftSetStream(plan->plans[workerid].plan_cuda, starpu_cuda_get_local_stream()); + if (cures != CUFFT_SUCCESS) + STARPU_CUFFT_REPORT_ERROR(cures); +} + +static void +STARPUFFT(fft_1d_kernel_gpu)(void *descr[], void *args) +{ + STARPUFFT(plan) plan = args; + cufftResult cures; + + _cufftComplex * restrict in = (_cufftComplex *)STARPU_VECTOR_GET_PTR(descr[0]); + _cufftComplex * restrict out = (_cufftComplex *)STARPU_VECTOR_GET_PTR(descr[1]); + + int workerid = starpu_worker_get_id_check(); + + task_per_worker[workerid]++; + + cures = _cufftExecC2C(plan->plans[workerid].plan_cuda, in, out, plan->sign == -1 ? CUFFT_FORWARD : CUFFT_INVERSE); + if (cures != CUFFT_SUCCESS) + STARPU_CUFFT_REPORT_ERROR(cures); + + cudaStreamSynchronize(starpu_cuda_get_local_stream()); +} +#endif + +#ifdef STARPU_HAVE_FFTW +/* Perform one fft of size n */ +static void +STARPUFFT(fft_1d_kernel_cpu)(void *descr[], void *_args) +{ + STARPUFFT(plan) plan = _args; + int workerid = starpu_worker_get_id_check(); + + task_per_worker[workerid]++; + + STARPUFFT(complex) * restrict in = (STARPUFFT(complex) *)STARPU_VECTOR_GET_PTR(descr[0]); + STARPUFFT(complex) * restrict out = (STARPUFFT(complex) *)STARPU_VECTOR_GET_PTR(descr[1]); + + _FFTW(execute_dft)(plan->plans[workerid].plan_cpu, in, out); +} +#endif + +static struct starpu_perfmodel STARPUFFT(fft_1d_model) = { + .type = STARPU_HISTORY_BASED, + .symbol = TYPE"fft_1d" +}; + +static struct starpu_codelet STARPUFFT(fft_1d_codelet) = { + .where = +#ifdef __STARPU_USE_CUDA + STARPU_CUDA| +#endif +#ifdef STARPU_HAVE_FFTW + STARPU_CPU| +#endif + 0, +#ifdef __STARPU_USE_CUDA + .cuda_funcs = {STARPUFFT(fft_1d_kernel_gpu)}, +#endif +#ifdef STARPU_HAVE_FFTW + .cpu_funcs = {STARPUFFT(fft_1d_kernel_cpu)}, +#endif + CAN_EXECUTE + .model = &STARPUFFT(fft_1d_model), + .nbuffers = 2, + .modes = {STARPU_R, STARPU_W}, + .name = "fft_1d_codelet" +}; + +/* Planning: + * + * - For each CPU worker, we need to plan the two fftw stages. + * - For GPU workers, we need to do the planning in the CUDA context, so we do + * this lazily through the initialised1 and initialised2 flags ; TODO: use + * starpu_execute_on_each_worker instead (done in the omp branch). + * - We allocate all the temporary buffers and register them to starpu. + * - We create all the tasks, but do not submit them yet. It will be possible + * to reuse them at will to perform several ffts with the same planning. + */ +STARPUFFT(plan) +STARPUFFT(plan_dft_1d)(int n, int sign, unsigned flags) +{ + unsigned workerid; + int n1 = DIV_1D; + int n2 = n / n1; + int n3; + int z; + struct starpu_task *task; + +if (PARALLEL) { +#ifdef __STARPU_USE_CUDA + /* cufft 1D limited to 8M elements */ + while (n2 > 8 << 20) { + n1 *= 2; + n2 /= 2; + } +#endif + STARPU_ASSERT(n == n1*n2); + STARPU_ASSERT((unsigned long long) n1 < (1ULL << I_BITS)); + + /* distribute the n2 second ffts into DIV_1D packages */ + n3 = n2 / DIV_1D; + STARPU_ASSERT(n2 == n3*DIV_1D); +} + + /* TODO: flags? Automatically set FFTW_MEASURE on calibration? */ + STARPU_ASSERT(flags == 0); + + STARPUFFT(plan) plan = malloc(sizeof(*plan)); + memset(plan, 0, sizeof(*plan)); + +if (PARALLEL) { + plan->number = STARPU_ATOMIC_ADD(&starpufft_last_plan_number, 1) - 1; + + /* The plan number has a limited size */ + STARPU_ASSERT((unsigned long long) plan->number < (1ULL << NUMBER_BITS)); +} + + /* Just one dimension */ + plan->dim = 1; + plan->n = malloc(plan->dim * sizeof(*plan->n)); + plan->n[0] = n; + +if (PARALLEL) { + check_dims(plan); + + plan->n1 = malloc(plan->dim * sizeof(*plan->n1)); + plan->n1[0] = n1; + plan->n2 = malloc(plan->dim * sizeof(*plan->n2)); + plan->n2[0] = n2; +} + + /* Note: this is for coherency with the 2D case */ + plan->totsize = n; + +if (PARALLEL) { + plan->totsize1 = n1; + plan->totsize2 = n2; + plan->totsize3 = DIV_1D; + plan->totsize4 = plan->totsize / plan->totsize3; +} + plan->type = C2C; + plan->sign = sign; + +if (PARALLEL) { + /* Compute the w^k just once. */ + compute_roots(plan); +} + + /* Initialize per-worker working set */ + for (workerid = 0; workerid < starpu_worker_get_count(); workerid++) { + switch (starpu_worker_get_type(workerid)) { + case STARPU_CPU_WORKER: +#ifdef STARPU_HAVE_FFTW +if (PARALLEL) { + /* first fft plan: one fft of size n2. + * FFTW imposes that buffer pointers are known at + * planning time. */ + plan->plans[workerid].plan1_cpu = _FFTW(plan_dft_1d)(n2, NULL, (void*) 1, sign, _FFTW_FLAGS); + STARPU_ASSERT(plan->plans[workerid].plan1_cpu); + + /* second fft plan: n3 ffts of size n1 */ + plan->plans[workerid].plan2_cpu = _FFTW(plan_many_dft)(plan->dim, + plan->n1, n3, + NULL, NULL, 1, plan->totsize1, + (void*) 1, NULL, 1, plan->totsize1, + sign, _FFTW_FLAGS); + STARPU_ASSERT(plan->plans[workerid].plan2_cpu); +} else { + /* fft plan: one fft of size n. */ + plan->plans[workerid].plan_cpu = _FFTW(plan_dft_1d)(n, NULL, (void*) 1, sign, _FFTW_FLAGS); + STARPU_ASSERT(plan->plans[workerid].plan_cpu); +} +#else +/* #warning libstarpufft can not work correctly if libfftw3 is not installed */ +#endif + break; + case STARPU_CUDA_WORKER: + break; + default: + /* Do not care, we won't be executing anything there. */ + break; + } + } +#ifdef __STARPU_USE_CUDA +if (PARALLEL) { + starpu_execute_on_each_worker(STARPUFFT(fft1_1d_plan_gpu), plan, STARPU_CUDA); + starpu_execute_on_each_worker(STARPUFFT(fft2_1d_plan_gpu), plan, STARPU_CUDA); +} else { + starpu_execute_on_each_worker(STARPUFFT(fft_1d_plan_gpu), plan, STARPU_CUDA); +} +#endif + +if (PARALLEL) { + /* Allocate buffers. */ + plan->twisted1_size = plan->totsize * sizeof(*plan->twisted1); + plan->twisted1 = STARPUFFT(malloc)(plan->twisted1_size); + memset(plan->twisted1, 0, plan->twisted1_size); + + plan->fft1_size = plan->totsize * sizeof(*plan->fft1); + plan->fft1 = STARPUFFT(malloc)(plan->fft1_size); + memset(plan->fft1, 0, plan->fft1_size); + + plan->twisted2_size = plan->totsize * sizeof(*plan->twisted2); + plan->twisted2 = STARPUFFT(malloc)(plan->twisted2_size); + memset(plan->twisted2, 0, plan->twisted2_size); + + plan->fft2_size = plan->totsize * sizeof(*plan->fft2); + plan->fft2 = STARPUFFT(malloc)(plan->fft2_size); + memset(plan->fft2, 0, plan->fft2_size); + + /* Allocate handle arrays */ + plan->twisted1_handle = malloc(plan->totsize1 * sizeof(*plan->twisted1_handle)); + plan->fft1_handle = malloc(plan->totsize1 * sizeof(*plan->fft1_handle)); + plan->twisted2_handle = malloc(plan->totsize3 * sizeof(*plan->twisted2_handle)); + plan->fft2_handle = malloc(plan->totsize3 * sizeof(*plan->fft2_handle)); + + /* Allocate task arrays */ + plan->twist1_tasks = malloc(plan->totsize1 * sizeof(*plan->twist1_tasks)); + plan->fft1_tasks = malloc(plan->totsize1 * sizeof(*plan->fft1_tasks)); + plan->twist2_tasks = malloc(plan->totsize3 * sizeof(*plan->twist2_tasks)); + plan->fft2_tasks = malloc(plan->totsize3 * sizeof(*plan->fft2_tasks)); + plan->twist3_tasks = malloc(plan->totsize3 * sizeof(*plan->twist3_tasks)); + + /* Allocate codelet argument arrays */ + plan->fft1_args = malloc(plan->totsize1 * sizeof(*plan->fft1_args)); + plan->fft2_args = malloc(plan->totsize3 * sizeof(*plan->fft2_args)); + + /* Create first-round tasks: DIV_1D tasks of type twist1 and fft1 */ + for (z = 0; z < plan->totsize1; z++) { + int i = z; +#define STEP_TAG(step) STEP_TAG_1D(plan, step, i) + + /* TODO: get rid of tags */ + + plan->fft1_args[z].plan = plan; + plan->fft1_args[z].i = i; + + /* Register the twisted1 buffer of size n2. */ + starpu_vector_data_register(&plan->twisted1_handle[z], STARPU_MAIN_RAM, (uintptr_t) &plan->twisted1[z*plan->totsize2], plan->totsize2, sizeof(*plan->twisted1)); + /* Register the fft1 buffer of size n2. */ + starpu_vector_data_register(&plan->fft1_handle[z], STARPU_MAIN_RAM, (uintptr_t) &plan->fft1[z*plan->totsize2], plan->totsize2, sizeof(*plan->fft1)); + + /* We'll need the result of fft1 on the CPU for the second + * twist anyway, so tell starpu to not keep the fft1 buffer in + * the GPU. */ + starpu_data_set_wt_mask(plan->fft1_handle[z], 1<<0); + + /* Create twist1 task */ + plan->twist1_tasks[z] = task = starpu_task_create(); + task->cl = &STARPUFFT(twist1_1d_codelet); + /* task->handles[0] = to be filled at execution to point + to the application input. */ + task->handles[1] = plan->twisted1_handle[z]; + task->cl_arg = &plan->fft1_args[z]; + task->tag_id = STEP_TAG(TWIST1); + task->use_tag = 1; + task->destroy = 0; + + /* Tell that fft1 depends on twisted1 */ + starpu_tag_declare_deps(STEP_TAG(FFT1), + 1, STEP_TAG(TWIST1)); + + /* Create FFT1 task */ + plan->fft1_tasks[z] = task = starpu_task_create(); + task->cl = &STARPUFFT(fft1_1d_codelet); + task->handles[0] = plan->twisted1_handle[z]; + task->handles[1] = plan->fft1_handle[z]; + task->handles[2] = plan->roots_handle[0]; + task->cl_arg = &plan->fft1_args[z]; + task->tag_id = STEP_TAG(FFT1); + task->use_tag = 1; + task->destroy = 0; + + /* Tell that the join task will depend on the fft1 task. */ + starpu_tag_declare_deps(STEP_TAG_1D(plan, JOIN, 0), + 1, STEP_TAG(FFT1)); +#undef STEP_TAG + } + + /* Create the join task, only serving as a dependency point between + * fft1 and twist2 tasks */ + plan->join_task = task = starpu_task_create(); + task->cl = NULL; + task->tag_id = STEP_TAG_1D(plan, JOIN, 0); + task->use_tag = 1; + task->destroy = 0; + + /* Create second-round tasks: DIV_1D batches of n2/DIV_1D twist2, fft2, + * and twist3 */ + for (z = 0; z < plan->totsize3; z++) { + int jj = z; +#define STEP_TAG(step) STEP_TAG_1D(plan, step, jj) + + plan->fft2_args[z].plan = plan; + plan->fft2_args[z].jj = jj; + + /* Register n3 twisted2 buffers of size n1 */ + starpu_vector_data_register(&plan->twisted2_handle[z], STARPU_MAIN_RAM, (uintptr_t) &plan->twisted2[z*plan->totsize4], plan->totsize4, sizeof(*plan->twisted2)); + starpu_vector_data_register(&plan->fft2_handle[z], STARPU_MAIN_RAM, (uintptr_t) &plan->fft2[z*plan->totsize4], plan->totsize4, sizeof(*plan->fft2)); + + /* We'll need the result of fft2 on the CPU for the third + * twist anyway, so tell starpu to not keep the fft2 buffer in + * the GPU. */ + starpu_data_set_wt_mask(plan->fft2_handle[z], 1<<0); + + /* Tell that twisted2 depends on the join task */ + starpu_tag_declare_deps(STEP_TAG(TWIST2), + 1, STEP_TAG_1D(plan, JOIN, 0)); + + /* Create twist2 task */ + plan->twist2_tasks[z] = task = starpu_task_create(); + task->cl = &STARPUFFT(twist2_1d_codelet); + task->handles[0] = plan->twisted2_handle[z]; + task->cl_arg = &plan->fft2_args[z]; + task->tag_id = STEP_TAG(TWIST2); + task->use_tag = 1; + task->destroy = 0; + + /* Tell that fft2 depends on twisted2 */ + starpu_tag_declare_deps(STEP_TAG(FFT2), + 1, STEP_TAG(TWIST2)); + + /* Create FFT2 task */ + plan->fft2_tasks[z] = task = starpu_task_create(); + task->cl = &STARPUFFT(fft2_1d_codelet); + task->handles[0] = plan->twisted2_handle[z]; + task->handles[1] = plan->fft2_handle[z]; + task->cl_arg = &plan->fft2_args[z]; + task->tag_id = STEP_TAG(FFT2); + task->use_tag = 1; + task->destroy = 0; + + /* Tell that twist3 depends on fft2 */ + starpu_tag_declare_deps(STEP_TAG(TWIST3), + 1, STEP_TAG(FFT2)); + + /* Create twist3 tasks */ + /* These run only on CPUs and thus write directly into the + * application output buffer. */ + plan->twist3_tasks[z] = task = starpu_task_create(); + task->cl = &STARPUFFT(twist3_1d_codelet); + task->handles[0] = plan->fft2_handle[z]; + task->cl_arg = &plan->fft2_args[z]; + task->tag_id = STEP_TAG(TWIST3); + task->use_tag = 1; + task->destroy = 0; + + /* Tell that to be completely finished we need to have finished + * this twisted3 */ + starpu_tag_declare_deps(STEP_TAG_1D(plan, END, 0), + 1, STEP_TAG(TWIST3)); +#undef STEP_TAG + } + + /* Create end task, only serving as a join point. */ + plan->end_task = task = starpu_task_create(); + task->cl = NULL; + task->tag_id = STEP_TAG_1D(plan, END, 0); + task->use_tag = 1; + task->destroy = 0; + task->detach = 0; + +} + + return plan; +} + +/* Actually submit all the tasks. */ +static struct starpu_task * +STARPUFFT(start1dC2C)(STARPUFFT(plan) plan, starpu_data_handle_t in, starpu_data_handle_t out) +{ + STARPU_ASSERT(plan->type == C2C); + int z; + int ret; + +if (PARALLEL) { + for (z=0; z < plan->totsize1; z++) { + ret = starpu_task_submit(plan->twist1_tasks[z]); + if (ret == -ENODEV) return NULL; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + ret = starpu_task_submit(plan->fft1_tasks[z]); + if (ret == -ENODEV) return NULL; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + ret = starpu_task_submit(plan->join_task); + if (ret == -ENODEV) return NULL; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + for (z=0; z < plan->totsize3; z++) { + ret = starpu_task_submit(plan->twist2_tasks[z]); + if (ret == -ENODEV) return NULL; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + ret = starpu_task_submit(plan->fft2_tasks[z]); + if (ret == -ENODEV) return NULL; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + ret = starpu_task_submit(plan->twist3_tasks[z]); + if (ret == -ENODEV) return NULL; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + ret = starpu_task_submit(plan->end_task); + if (ret == -ENODEV) return NULL; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + return plan->end_task; +} else /* !PARALLEL */ { + struct starpu_task *task; + + /* Create FFT task */ + task = starpu_task_create(); + task->detach = 0; + task->cl = &STARPUFFT(fft_1d_codelet); + task->handles[0] = in; + task->handles[1] = out; + task->cl_arg = plan; + + ret = starpu_task_submit(task); + if (ret == -ENODEV) return NULL; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + return task; +} +} + +/* Free all the tags. The generic code handles freeing the buffers. */ +static void +STARPUFFT(free_1d_tags)(STARPUFFT(plan) plan) +{ + int i; + int n1 = plan->n1[0]; + + if (!PARALLEL) + return; + + for (i = 0; i < n1; i++) { + starpu_tag_remove(STEP_TAG_1D(plan, TWIST1, i)); + starpu_tag_remove(STEP_TAG_1D(plan, FFT1, i)); + } + + starpu_tag_remove(STEP_TAG_1D(plan, JOIN, 0)); + + for (i = 0; i < DIV_1D; i++) { + starpu_tag_remove(STEP_TAG_1D(plan, TWIST2, i)); + starpu_tag_remove(STEP_TAG_1D(plan, FFT2, i)); + starpu_tag_remove(STEP_TAG_1D(plan, TWIST3, i)); + } + + starpu_tag_remove(STEP_TAG_1D(plan, END, 0)); +} diff --git a/starpufft/src/starpufftx2d.c b/starpufft/src/starpufftx2d.c new file mode 100644 index 0000000..3f6db89 --- /dev/null +++ b/starpufft/src/starpufftx2d.c @@ -0,0 +1,880 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#define DIV_2D_N 8 +#define DIV_2D_M 8 + +#define I_SHIFT (I_BITS/2) +#define J_BITS I_SHIFT + +#define STEP_TAG_2D(plan, step, i, j) _STEP_TAG(plan, step, ((starpu_tag_t) i << I_SHIFT) | (starpu_tag_t) j) + +#ifdef __STARPU_USE_CUDA +/* Twist the full vector into a n2,m2 chunk */ +static void +STARPUFFT(twist1_2d_kernel_gpu)(void *descr[], void *_args) +{ + struct STARPUFFT(args) *args = _args; + STARPUFFT(plan) plan = args->plan; + int i = args->i; + int j = args->j; + int n1 = plan->n1[0]; + int n2 = plan->n2[0]; + int m1 = plan->n1[1]; + int m2 = plan->n2[1]; + + _cufftComplex * restrict in = (_cufftComplex *)STARPU_VECTOR_GET_PTR(descr[0]); + _cufftComplex * restrict twisted1 = (_cufftComplex *)STARPU_VECTOR_GET_PTR(descr[1]); + + STARPUFFT(cuda_twist1_2d_host)(in, twisted1, i, j, n1, n2, m1, m2); + cudaStreamSynchronize(starpu_cuda_get_local_stream()); +} + +/* fft1: + * + * Perform one fft of size n2,m2 */ +static void +STARPUFFT(fft1_2d_plan_gpu)(void *args) +{ + STARPUFFT(plan) plan = args; + int n2 = plan->n2[0]; + int m2 = plan->n2[1]; + int workerid = starpu_worker_get_id_check(); + cufftResult cures; + + cures = cufftPlan2d(&plan->plans[workerid].plan1_cuda, n2, m2, _CUFFT_C2C); + if (cures != CUFFT_SUCCESS) + STARPU_CUFFT_REPORT_ERROR(cures); + cufftSetStream(plan->plans[workerid].plan1_cuda, starpu_cuda_get_local_stream()); + if (cures != CUFFT_SUCCESS) + STARPU_CUFFT_REPORT_ERROR(cures); +} + +static void +STARPUFFT(fft1_2d_kernel_gpu)(void *descr[], void *_args) +{ + struct STARPUFFT(args) *args = _args; + STARPUFFT(plan) plan = args->plan; + int i = args->i; + int j = args->j; + int n2 = plan->n2[0]; + int m2 = plan->n2[1]; + cufftResult cures; + + _cufftComplex * restrict in = (_cufftComplex *)STARPU_VECTOR_GET_PTR(descr[0]); + _cufftComplex * restrict out = (_cufftComplex *)STARPU_VECTOR_GET_PTR(descr[1]); + const _cufftComplex * restrict roots0 = (_cufftComplex *)STARPU_VECTOR_GET_PTR(descr[2]); + const _cufftComplex * restrict roots1 = (_cufftComplex *)STARPU_VECTOR_GET_PTR(descr[3]); + + int workerid = starpu_worker_get_id_check(); + + task_per_worker[workerid]++; + + cures = _cufftExecC2C(plan->plans[workerid].plan1_cuda, in, out, plan->sign == -1 ? CUFFT_FORWARD : CUFFT_INVERSE); + if (cures != CUFFT_SUCCESS) + STARPU_CUFFT_REPORT_ERROR(cures); + + /* synchronization is done after the twiddling */ + STARPUFFT(cuda_twiddle_2d_host)(out, roots0, roots1, n2, m2, i, j); + + cudaStreamSynchronize(starpu_cuda_get_local_stream()); +} + +/* fft2: + * + * Perform n3*m3 ffts of size n1,m1 */ +static void +STARPUFFT(fft2_2d_plan_gpu(void *args)) +{ + STARPUFFT(plan) plan = args; + int n1 = plan->n1[0]; + int m1 = plan->n1[1]; + cufftResult cures; + int workerid = starpu_worker_get_id_check(); + + cures = cufftPlan2d(&plan->plans[workerid].plan2_cuda, n1, m1, _CUFFT_C2C); + if (cures != CUFFT_SUCCESS) + STARPU_CUFFT_REPORT_ERROR(cures); + cufftSetStream(plan->plans[workerid].plan2_cuda, starpu_cuda_get_local_stream()); + if (cures != CUFFT_SUCCESS) + STARPU_CUFFT_REPORT_ERROR(cures); +} + +static void +STARPUFFT(fft2_2d_kernel_gpu)(void *descr[], void *_args) +{ + struct STARPUFFT(args) *args = _args; + STARPUFFT(plan) plan = args->plan; + int n1 = plan->n1[0]; + int n2 = plan->n2[0]; + int m1 = plan->n1[1]; + int m2 = plan->n2[1]; + int n3 = n2/DIV_2D_N; + int m3 = m2/DIV_2D_M; + int n; + cufftResult cures; + + _cufftComplex * restrict in = (_cufftComplex *)STARPU_VECTOR_GET_PTR(descr[0]); + _cufftComplex * restrict out = (_cufftComplex *)STARPU_VECTOR_GET_PTR(descr[1]); + + int workerid = starpu_worker_get_id_check(); + + task_per_worker[workerid]++; + + for (n = 0; n < n3*m3; n++) { + cures = _cufftExecC2C(plan->plans[workerid].plan2_cuda, in + n * n1*m1, out + n * n1*m1, plan->sign == -1 ? CUFFT_FORWARD : CUFFT_INVERSE); + if (cures != CUFFT_SUCCESS) + STARPU_CUFFT_REPORT_ERROR(cures); + } + + cudaStreamSynchronize(starpu_cuda_get_local_stream()); +} +#endif + +/* Twist the full vector into a n2,m2 chunk */ +static void +STARPUFFT(twist1_2d_kernel_cpu)(void *descr[], void *_args) +{ + struct STARPUFFT(args) *args = _args; + STARPUFFT(plan) plan = args->plan; + int i = args->i; + int j = args->j; + int k, l; + int n1 = plan->n1[0]; + int n2 = plan->n2[0]; + int m1 = plan->n1[1]; + int m2 = plan->n2[1]; + int m = plan->n[1]; + + STARPUFFT(complex) * restrict in = (STARPUFFT(complex) *)STARPU_VECTOR_GET_PTR(descr[0]); + STARPUFFT(complex) * restrict twisted1 = (STARPUFFT(complex) *)STARPU_VECTOR_GET_PTR(descr[1]); + + /* printf("twist1 %d %d %g\n", i, j, (double) cabs(plan->in[i+j])); */ + + for (k = 0; k < n2; k++) + for (l = 0; l < m2; l++) + twisted1[k*m2+l] = in[i*m+j+k*m*n1+l*m1]; +} + +#ifdef STARPU_HAVE_FFTW +/* Perform an n2,m2 fft */ +static void +STARPUFFT(fft1_2d_kernel_cpu)(void *descr[], void *_args) +{ + struct STARPUFFT(args) *args = _args; + STARPUFFT(plan) plan = args->plan; + int i = args->i; + int j = args->j; + int k, l; + int n2 = plan->n2[0]; + int m2 = plan->n2[1]; + int workerid = starpu_worker_get_id_check(); + + task_per_worker[workerid]++; + + STARPUFFT(complex) *twisted1 = (STARPUFFT(complex) *)STARPU_VECTOR_GET_PTR(descr[0]); + STARPUFFT(complex) *fft1 = (STARPUFFT(complex) *)STARPU_VECTOR_GET_PTR(descr[1]); + + /* printf("fft1 %d %d %g\n", i, j, (double) cabs(twisted1[0])); */ + + _FFTW(execute_dft)(plan->plans[workerid].plan1_cpu, twisted1, fft1); + for (k = 0; k < n2; k++) + for (l = 0; l < m2; l++) + fft1[k*m2 + l] = fft1[k*m2 + l] * plan->roots[0][i*k] * plan->roots[1][j*l]; +} +#endif + +/* Twist the full vector into a package of n2/DIV_2D_N,m2/DIV_2D_M (n1,m1) chunks */ +static void +STARPUFFT(twist2_2d_kernel_cpu)(void *descr[], void *_args) +{ + struct STARPUFFT(args) *args = _args; + STARPUFFT(plan) plan = args->plan; + int kk = args->kk; /* between 0 and DIV_2D_N */ + int ll = args->ll; /* between 0 and DIV_2D_M */ + int kkk, lll; /* between 0,0 and n3,m3 */ + int i, j; + int n1 = plan->n1[0]; + int n2 = plan->n2[0]; + int m1 = plan->n1[1]; + int m2 = plan->n2[1]; + int n3 = n2/DIV_2D_N; + int m3 = m2/DIV_2D_M; + + STARPUFFT(complex) * restrict twisted2 = (STARPUFFT(complex) *)STARPU_VECTOR_GET_PTR(descr[0]); + + /* printf("twist2 %d %d %g\n", kk, ll, (double) cabs(plan->fft1[kk+ll])); */ + + for (kkk = 0; kkk < n3; kkk++) { + int k = kk * n3 + kkk; + for (lll = 0; lll < m3; lll++) { + int l = ll * m3 + lll; + for (i = 0; i < n1; i++) + for (j = 0; j < m1; j++) + twisted2[kkk*m3*n1*m1+lll*n1*m1+i*m1+j] = plan->fft1[i*n1*n2*m2+j*n2*m2+k*m2+l]; + } + } +} + +#ifdef STARPU_HAVE_FFTW +/* Perform (n2/DIV_2D_N)*(m2/DIV_2D_M) (n1,m1) ffts */ +static void +STARPUFFT(fft2_2d_kernel_cpu)(void *descr[], void *_args) +{ + struct STARPUFFT(args) *args = _args; + STARPUFFT(plan) plan = args->plan; + /* int kk = args->kk; */ + /* int ll = args->ll; */ + int workerid = starpu_worker_get_id_check(); + + task_per_worker[workerid]++; + + STARPUFFT(complex) *twisted2 = (STARPUFFT(complex) *)STARPU_VECTOR_GET_PTR(descr[0]); + STARPUFFT(complex) *fft2 = (STARPUFFT(complex) *)STARPU_VECTOR_GET_PTR(descr[1]); + + /* printf("fft2 %d %d %g\n", kk, ll, (double) cabs(twisted2[plan->totsize4-1])); */ + + _FFTW(execute_dft)(plan->plans[workerid].plan2_cpu, twisted2, fft2); +} +#endif + +/* Spread the package of (n2/DIV_2D_N)*(m2/DIV_2D_M) (n1,m1) chunks into the full vector */ +static void +STARPUFFT(twist3_2d_kernel_cpu)(void *descr[], void *_args) +{ + struct STARPUFFT(args) *args = _args; + STARPUFFT(plan) plan = args->plan; + int kk = args->kk; /* between 0 and DIV_2D_N */ + int ll = args->ll; /* between 0 and DIV_2D_M */ + int kkk, lll; /* between 0,0 and n3,m3 */ + int i, j; + int n1 = plan->n1[0]; + int n2 = plan->n2[0]; + int m1 = plan->n1[1]; + int m2 = plan->n2[1]; + int n3 = n2/DIV_2D_N; + int m3 = m2/DIV_2D_M; + int m = plan->n[1]; + + const STARPUFFT(complex) * restrict fft2 = (STARPUFFT(complex) *)STARPU_VECTOR_GET_PTR(descr[0]); + + /* printf("twist3 %d %d %g\n", kk, ll, (double) cabs(fft2[0])); */ + + for (kkk = 0; kkk < n3; kkk++) { + int k = kk * n3 + kkk; + for (lll = 0; lll < m3; lll++) { + int l = ll * m3 + lll; + for (i = 0; i < n1; i++) + for (j = 0; j < m1; j++) + plan->out[i*n2*m+j*m2+k*m+l] = fft2[kkk*m3*n1*m1+lll*n1*m1+i*m1+j]; + } + } +} + +struct starpu_perfmodel STARPUFFT(twist1_2d_model) = { + .type = STARPU_HISTORY_BASED, + .symbol = TYPE"twist1_2d" +}; + +struct starpu_perfmodel STARPUFFT(fft1_2d_model) = { + .type = STARPU_HISTORY_BASED, + .symbol = TYPE"fft1_2d" +}; + +struct starpu_perfmodel STARPUFFT(twist2_2d_model) = { + .type = STARPU_HISTORY_BASED, + .symbol = TYPE"twist2_2d" +}; + +struct starpu_perfmodel STARPUFFT(fft2_2d_model) = { + .type = STARPU_HISTORY_BASED, + .symbol = TYPE"fft2_2d" +}; + +struct starpu_perfmodel STARPUFFT(twist3_2d_model) = { + .type = STARPU_HISTORY_BASED, + .symbol = TYPE"twist3_2d" +}; + +static struct starpu_codelet STARPUFFT(twist1_2d_codelet) = { + .where = +#ifdef __STARPU_USE_CUDA + STARPU_CUDA| +#endif + STARPU_CPU, +#ifdef __STARPU_USE_CUDA + .cuda_funcs = {STARPUFFT(twist1_2d_kernel_gpu)}, +#endif + .cpu_funcs = {STARPUFFT(twist1_2d_kernel_cpu)}, + CAN_EXECUTE + .model = &STARPUFFT(twist1_2d_model), + .nbuffers = 2, + .modes = {STARPU_R, STARPU_W}, + .name = "twist1_2d_codelet" +}; + +static struct starpu_codelet STARPUFFT(fft1_2d_codelet) = { + .where = +#ifdef __STARPU_USE_CUDA + STARPU_CUDA| +#endif +#ifdef STARPU_HAVE_FFTW + STARPU_CPU| +#endif + 0, +#ifdef __STARPU_USE_CUDA + .cuda_funcs = {STARPUFFT(fft1_2d_kernel_gpu)}, +#endif +#ifdef STARPU_HAVE_FFTW + .cpu_funcs = {STARPUFFT(fft1_2d_kernel_cpu)}, +#endif + CAN_EXECUTE + .model = &STARPUFFT(fft1_2d_model), + .nbuffers = 4, + .modes = {STARPU_R, STARPU_W, STARPU_R, STARPU_R}, + .name = "fft1_2d_codelet" +}; + +static struct starpu_codelet STARPUFFT(twist2_2d_codelet) = { + .where = STARPU_CPU, + .cpu_funcs = {STARPUFFT(twist2_2d_kernel_cpu)}, + CAN_EXECUTE + .model = &STARPUFFT(twist2_2d_model), + .nbuffers = 1, + .modes = {STARPU_W}, + .name = "twist2_2d_codelet" +}; + +static struct starpu_codelet STARPUFFT(fft2_2d_codelet) = { + .where = +#ifdef __STARPU_USE_CUDA + STARPU_CUDA| +#endif +#ifdef STARPU_HAVE_FFTW + STARPU_CPU| +#endif + 0, +#ifdef __STARPU_USE_CUDA + .cuda_funcs = {STARPUFFT(fft2_2d_kernel_gpu)}, +#endif +#ifdef STARPU_HAVE_FFTW + .cpu_funcs = {STARPUFFT(fft2_2d_kernel_cpu)}, +#endif + CAN_EXECUTE + .model = &STARPUFFT(fft2_2d_model), + .nbuffers = 2, + .modes = {STARPU_R, STARPU_W}, + .name = "fft2_2d_codelet" +}; + +static struct starpu_codelet STARPUFFT(twist3_2d_codelet) = { + .where = STARPU_CPU, + .cpu_funcs = {STARPUFFT(twist3_2d_kernel_cpu)}, + CAN_EXECUTE + .model = &STARPUFFT(twist3_2d_model), + .nbuffers = 1, + .modes = {STARPU_R}, + .name = "twist3_2d_codelet" +}; + +/* + * + * Sequential version + * + */ + +#ifdef __STARPU_USE_CUDA +/* Perform one fft of size n,m */ +static void +STARPUFFT(fft_2d_plan_gpu)(void *args) +{ + STARPUFFT(plan) plan = args; + cufftResult cures; + int n = plan->n[0]; + int m = plan->n[1]; + int workerid = starpu_worker_get_id_check(); + + cures = cufftPlan2d(&plan->plans[workerid].plan_cuda, n, m, _CUFFT_C2C); + if (cures != CUFFT_SUCCESS) + STARPU_CUFFT_REPORT_ERROR(cures); + cufftSetStream(plan->plans[workerid].plan_cuda, starpu_cuda_get_local_stream()); + if (cures != CUFFT_SUCCESS) + STARPU_CUFFT_REPORT_ERROR(cures); +} + +static void +STARPUFFT(fft_2d_kernel_gpu)(void *descr[], void *args) +{ + STARPUFFT(plan) plan = args; + cufftResult cures; + + _cufftComplex * restrict in = (_cufftComplex *)STARPU_VECTOR_GET_PTR(descr[0]); + _cufftComplex * restrict out = (_cufftComplex *)STARPU_VECTOR_GET_PTR(descr[1]); + + int workerid = starpu_worker_get_id_check(); + + task_per_worker[workerid]++; + + cures = _cufftExecC2C(plan->plans[workerid].plan_cuda, in, out, plan->sign == -1 ? CUFFT_FORWARD : CUFFT_INVERSE); + if (cures != CUFFT_SUCCESS) + STARPU_CUFFT_REPORT_ERROR(cures); + + cudaStreamSynchronize(starpu_cuda_get_local_stream()); +} +#endif + +#ifdef STARPU_HAVE_FFTW +/* Perform one fft of size n,m */ +static void +STARPUFFT(fft_2d_kernel_cpu)(void *descr[], void *_args) +{ + STARPUFFT(plan) plan = _args; + int workerid = starpu_worker_get_id_check(); + + task_per_worker[workerid]++; + + STARPUFFT(complex) * restrict in = (STARPUFFT(complex) *)STARPU_VECTOR_GET_PTR(descr[0]); + STARPUFFT(complex) * restrict out = (STARPUFFT(complex) *)STARPU_VECTOR_GET_PTR(descr[1]); + + _FFTW(execute_dft)(plan->plans[workerid].plan_cpu, in, out); +} +#endif + +static struct starpu_perfmodel STARPUFFT(fft_2d_model) = { + .type = STARPU_HISTORY_BASED, + .symbol = TYPE"fft_2d" +}; + +static struct starpu_codelet STARPUFFT(fft_2d_codelet) = { + .where = +#ifdef __STARPU_USE_CUDA + STARPU_CUDA| +#endif +#ifdef STARPU_HAVE_FFTW + STARPU_CPU| +#endif + 0, +#ifdef __STARPU_USE_CUDA + .cuda_funcs = {STARPUFFT(fft_2d_kernel_gpu)}, +#endif +#ifdef STARPU_HAVE_FFTW + .cpu_funcs = {STARPUFFT(fft_2d_kernel_cpu)}, +#endif + CAN_EXECUTE + .model = &STARPUFFT(fft_2d_model), + .nbuffers = 2, + .modes = {STARPU_R, STARPU_W}, + .name = "fft_2d_codelet" +}; + +STARPUFFT(plan) +STARPUFFT(plan_dft_2d)(int n, int m, int sign, unsigned flags) +{ + unsigned workerid; + int n1 = DIV_2D_N; + int n2 = n / n1; + int n3; + int m1 = DIV_2D_M; + int m2 = m / m1; + int m3; + int z; + struct starpu_task *task; + +if (PARALLEL) { + /* + * Simple strategy: + * + * - twist1: twist input in n1*m1 (n2,m2) chunks + * - fft1: perform n1*m1 (n2,m2) ffts + * - twist2: twist into n2*m2 (n1,m1) chunks distributed in + * DIV_2D_N*DIV_2D_M groups + * - fft2: perform DIV_2D_N*DIV_2D_M times n3*m3 (n1,m1) ffts + * - twist3: twist back into output + */ + +#ifdef __STARPU_USE_CUDA + /* cufft 2D-3D limited to [2,16384] */ + while (n2 > 16384) { + n1 *= 2; + n2 /= 2; + } +#endif + STARPU_ASSERT(n == n1*n2); + STARPU_ASSERT((unsigned long long) n1 < (1ULL << J_BITS)); + + +#ifdef __STARPU_USE_CUDA + /* cufft 2D-3D limited to [2,16384] */ + while (m2 > 16384) { + m1 *= 2; + m2 /= 2; + } +#endif + STARPU_ASSERT(m == m1*m2); + STARPU_ASSERT((unsigned long long) m1 < (1ULL << J_BITS)); + + /* distribute the n2*m2 second ffts into DIV_2D_N*DIV_2D_M packages */ + n3 = n2 / DIV_2D_N; + STARPU_ASSERT(n2 == n3*DIV_2D_N); + m3 = m2 / DIV_2D_M; + STARPU_ASSERT(m2 == m3*DIV_2D_M); +} + + /* TODO: flags? Automatically set FFTW_MEASURE on calibration? */ + STARPU_ASSERT(flags == 0); + + STARPUFFT(plan) plan = malloc(sizeof(*plan)); + memset(plan, 0, sizeof(*plan)); + +if (PARALLEL) { + plan->number = STARPU_ATOMIC_ADD(&starpufft_last_plan_number, 1) - 1; + + /* 4bit limitation in the tag space */ + STARPU_ASSERT((unsigned long long) plan->number < (1ULL << NUMBER_BITS)); +} + + plan->dim = 2; + plan->n = malloc(plan->dim * sizeof(*plan->n)); + plan->n[0] = n; + plan->n[1] = m; + +if (PARALLEL) { + check_dims(plan); + + plan->n1 = malloc(plan->dim * sizeof(*plan->n1)); + plan->n1[0] = n1; + plan->n1[1] = m1; + plan->n2 = malloc(plan->dim * sizeof(*plan->n2)); + plan->n2[0] = n2; + plan->n2[1] = m2; +} + + plan->totsize = n * m; + +if (PARALLEL) { + plan->totsize1 = n1 * m1; + plan->totsize2 = n2 * m2; + plan->totsize3 = DIV_2D_N * DIV_2D_M; + plan->totsize4 = plan->totsize / plan->totsize3; +} + plan->type = C2C; + plan->sign = sign; + +if (PARALLEL) { + /* Compute the w^k just once. */ + compute_roots(plan); +} + + /* Initialize per-worker working set */ + for (workerid = 0; workerid < starpu_worker_get_count(); workerid++) { + switch (starpu_worker_get_type(workerid)) { + case STARPU_CPU_WORKER: +#ifdef STARPU_HAVE_FFTW +if (PARALLEL) { + /* first fft plan: one n2*m2 fft */ + plan->plans[workerid].plan1_cpu = _FFTW(plan_dft_2d)(n2, m2, NULL, (void*) 1, sign, _FFTW_FLAGS); + STARPU_ASSERT(plan->plans[workerid].plan1_cpu); + + /* second fft plan: n3*m3 n1*m1 ffts */ + plan->plans[workerid].plan2_cpu = _FFTW(plan_many_dft)(plan->dim, + plan->n1, n3*m3, + NULL, NULL, 1, plan->totsize1, + (void*) 1, NULL, 1, plan->totsize1, + sign, _FFTW_FLAGS); + STARPU_ASSERT(plan->plans[workerid].plan2_cpu); +} else { + /* fft plan: one fft of size n, m. */ + plan->plans[workerid].plan_cpu = _FFTW(plan_dft_2d)(n, m, NULL, (void*) 1, sign, _FFTW_FLAGS); + STARPU_ASSERT(plan->plans[workerid].plan_cpu); +} +#else +/* #warning libstarpufft can not work correctly if libfftw3 is not installed */ +#endif + break; + case STARPU_CUDA_WORKER: + break; + default: + /* Do not care, we won't be executing anything there. */ + break; + } + } +#ifdef __STARPU_USE_CUDA +if (PARALLEL) { + starpu_execute_on_each_worker(STARPUFFT(fft1_2d_plan_gpu), plan, STARPU_CUDA); + starpu_execute_on_each_worker(STARPUFFT(fft2_2d_plan_gpu), plan, STARPU_CUDA); +} else { + starpu_execute_on_each_worker(STARPUFFT(fft_2d_plan_gpu), plan, STARPU_CUDA); +} +#endif + +if (PARALLEL) { + /* Allocate buffers. */ + plan->twisted1_size = plan->totsize * sizeof(*plan->twisted1); + plan->twisted1 = STARPUFFT(malloc)(plan->twisted1_size); + memset(plan->twisted1, 0, plan->twisted1_size); + + plan->fft1_size = plan->totsize * sizeof(*plan->fft1); + plan->fft1 = STARPUFFT(malloc)(plan->fft1_size); + memset(plan->fft1, 0, plan->fft1_size); + + plan->twisted2_size = plan->totsize * sizeof(*plan->twisted2); + plan->twisted2 = STARPUFFT(malloc)(plan->twisted2_size); + memset(plan->twisted2, 0, plan->twisted2_size); + + plan->fft2_size = plan->totsize * sizeof(*plan->fft2); + plan->fft2 = STARPUFFT(malloc)(plan->fft2_size); + memset(plan->fft2, 0, plan->fft2_size); + + /* Allocate handle arrays */ + plan->twisted1_handle = malloc(plan->totsize1 * sizeof(*plan->twisted1_handle)); + plan->fft1_handle = malloc(plan->totsize1 * sizeof(*plan->fft1_handle)); + plan->twisted2_handle = malloc(plan->totsize3 * sizeof(*plan->twisted2_handle)); + plan->fft2_handle = malloc(plan->totsize3 * sizeof(*plan->fft2_handle)); + + /* Allocate task arrays */ + plan->twist1_tasks = malloc(plan->totsize1 * sizeof(*plan->twist1_tasks)); + plan->fft1_tasks = malloc(plan->totsize1 * sizeof(*plan->fft1_tasks)); + plan->twist2_tasks = malloc(plan->totsize3 * sizeof(*plan->twist2_tasks)); + plan->fft2_tasks = malloc(plan->totsize3 * sizeof(*plan->fft2_tasks)); + plan->twist3_tasks = malloc(plan->totsize3 * sizeof(*plan->twist3_tasks)); + + /* Allocate codelet argument arrays */ + plan->fft1_args = malloc(plan->totsize1 * sizeof(*plan->fft1_args)); + plan->fft2_args = malloc(plan->totsize3 * sizeof(*plan->fft2_args)); + + /* Create first-round tasks */ + for (z = 0; z < plan->totsize1; z++) { + int i = z / m1, j = z % m1; +#define STEP_TAG(step) STEP_TAG_2D(plan, step, i, j) + + /* TODO: get rid of tags */ + + plan->fft1_args[z].plan = plan; + plan->fft1_args[z].i = i; + plan->fft1_args[z].j = j; + + /* Register (n2,m2) chunks */ + starpu_vector_data_register(&plan->twisted1_handle[z], STARPU_MAIN_RAM, (uintptr_t) &plan->twisted1[z*plan->totsize2], plan->totsize2, sizeof(*plan->twisted1)); + starpu_vector_data_register(&plan->fft1_handle[z], STARPU_MAIN_RAM, (uintptr_t) &plan->fft1[z*plan->totsize2], plan->totsize2, sizeof(*plan->fft1)); + + /* We'll need it on the CPU for the second twist anyway */ + starpu_data_set_wt_mask(plan->fft1_handle[z], 1<<0); + + /* Create twist1 task */ + plan->twist1_tasks[z] = task = starpu_task_create(); + task->cl = &STARPUFFT(twist1_2d_codelet); + /* task->handles[0] = to be filled at execution */ + task->handles[1] = plan->twisted1_handle[z]; + task->cl_arg = &plan->fft1_args[z]; + task->tag_id = STEP_TAG(TWIST1); + task->use_tag = 1; + task->destroy = 0; + + /* Tell that fft1 depends on twisted1 */ + starpu_tag_declare_deps(STEP_TAG(FFT1), + 1, STEP_TAG(TWIST1)); + + /* Create FFT1 task */ + plan->fft1_tasks[z] = task = starpu_task_create(); + task->cl = &STARPUFFT(fft1_2d_codelet); + task->handles[0] = plan->twisted1_handle[z]; + task->handles[1] = plan->fft1_handle[z]; + task->handles[2] = plan->roots_handle[0]; + task->handles[3] = plan->roots_handle[1]; + task->cl_arg = &plan->fft1_args[z]; + task->tag_id = STEP_TAG(FFT1); + task->use_tag = 1; + task->destroy = 0; + + /* Tell that to be done with first step we need to have + * finished this fft1 */ + starpu_tag_declare_deps(STEP_TAG_2D(plan, JOIN, 0, 0), + 1, STEP_TAG(FFT1)); +#undef STEP_TAG + } + + /* Create join task */ + plan->join_task = task = starpu_task_create(); + task->cl = NULL; + task->tag_id = STEP_TAG_2D(plan, JOIN, 0, 0); + task->use_tag = 1; + task->destroy = 0; + + /* Create second-round tasks */ + for (z = 0; z < plan->totsize3; z++) { + int kk = z / DIV_2D_M, ll = z % DIV_2D_M; +#define STEP_TAG(step) STEP_TAG_2D(plan, step, kk, ll) + + plan->fft2_args[z].plan = plan; + plan->fft2_args[z].kk = kk; + plan->fft2_args[z].ll = ll; + + /* Register n3*m3 (n1,m1) chunks */ + starpu_vector_data_register(&plan->twisted2_handle[z], STARPU_MAIN_RAM, (uintptr_t) &plan->twisted2[z*plan->totsize4], plan->totsize4, sizeof(*plan->twisted2)); + starpu_vector_data_register(&plan->fft2_handle[z], STARPU_MAIN_RAM, (uintptr_t) &plan->fft2[z*plan->totsize4], plan->totsize4, sizeof(*plan->fft2)); + + /* We'll need it on the CPU for the last twist anyway */ + starpu_data_set_wt_mask(plan->fft2_handle[z], 1<<0); + + /* Tell that twisted2 depends on the whole first step to be + * done */ + starpu_tag_declare_deps(STEP_TAG(TWIST2), + 1, STEP_TAG_2D(plan, JOIN, 0, 0)); + + /* Create twist2 task */ + plan->twist2_tasks[z] = task = starpu_task_create(); + task->cl = &STARPUFFT(twist2_2d_codelet); + task->handles[0] = plan->twisted2_handle[z]; + task->cl_arg = &plan->fft2_args[z]; + task->tag_id = STEP_TAG(TWIST2); + task->use_tag = 1; + task->destroy = 0; + + /* Tell that fft2 depends on twisted2 */ + starpu_tag_declare_deps(STEP_TAG(FFT2), + 1, STEP_TAG(TWIST2)); + + /* Create FFT2 task */ + plan->fft2_tasks[z] = task = starpu_task_create(); + task->cl = &STARPUFFT(fft2_2d_codelet); + task->handles[0] = plan->twisted2_handle[z]; + task->handles[1] = plan->fft2_handle[z]; + task->cl_arg = &plan->fft2_args[z]; + task->tag_id = STEP_TAG(FFT2); + task->use_tag = 1; + task->destroy = 0; + + /* Tell that twist3 depends on fft2 */ + starpu_tag_declare_deps(STEP_TAG(TWIST3), + 1, STEP_TAG(FFT2)); + + /* Create twist3 tasks */ + /* These run only on CPUs and thus write directly into the + * application output buffer. */ + plan->twist3_tasks[z] = task = starpu_task_create(); + task->cl = &STARPUFFT(twist3_2d_codelet); + task->handles[0] = plan->fft2_handle[z]; + task->cl_arg = &plan->fft2_args[z]; + task->tag_id = STEP_TAG(TWIST3); + task->use_tag = 1; + task->destroy = 0; + + /* Tell that to be completely finished we need to have finished this twisted3 */ + starpu_tag_declare_deps(STEP_TAG_2D(plan, END, 0, 0), + 1, STEP_TAG(TWIST3)); +#undef STEP_TAG + } + + /* Create end task */ + plan->end_task = task = starpu_task_create(); + task->cl = NULL; + task->tag_id = STEP_TAG_2D(plan, END, 0, 0); + task->use_tag = 1; + task->destroy = 0; + task->detach = 0; + +} + + return plan; +} + +/* Actually submit all the tasks. */ +static struct starpu_task * +STARPUFFT(start2dC2C)(STARPUFFT(plan) plan, starpu_data_handle_t in, starpu_data_handle_t out) +{ + STARPU_ASSERT(plan->type == C2C); + int z; + int ret; + +if (PARALLEL) { + for (z=0; z < plan->totsize1; z++) { + ret = starpu_task_submit(plan->twist1_tasks[z]); + if (ret == -ENODEV) return NULL; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + ret = starpu_task_submit(plan->fft1_tasks[z]); + if (ret == -ENODEV) return NULL; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + ret = starpu_task_submit(plan->join_task); + if (ret == -ENODEV) return NULL; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + for (z=0; z < plan->totsize3; z++) { + ret = starpu_task_submit(plan->twist2_tasks[z]); + if (ret == -ENODEV) return NULL; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + ret = starpu_task_submit(plan->fft2_tasks[z]); + if (ret == -ENODEV) return NULL; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + ret = starpu_task_submit(plan->twist3_tasks[z]); + if (ret == -ENODEV) return NULL; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + ret = starpu_task_submit(plan->end_task); + if (ret == -ENODEV) return NULL; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + return plan->end_task; +} else /* !PARALLEL */ { + struct starpu_task *task; + + /* Create FFT task */ + task = starpu_task_create(); + task->detach = 0; + task->cl = &STARPUFFT(fft_2d_codelet); + task->handles[0] = in; + task->handles[1] = out; + task->cl_arg = plan; + + ret = starpu_task_submit(task); + if (ret == -ENODEV) return NULL; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + return task; +} +} + +/* Free all the tags. The generic code handles freeing the buffers. */ +static void +STARPUFFT(free_2d_tags)(STARPUFFT(plan) plan) +{ + int i, j; + int n1 = plan->n1[0]; + int m1 = plan->n1[1]; + + if (!PARALLEL) + return; + + for (i = 0; i < n1; i++) { + for (j = 0; j < m1; j++) { + starpu_tag_remove(STEP_TAG_2D(plan, TWIST1, i, j)); + starpu_tag_remove(STEP_TAG_2D(plan, FFT1, i, j)); + } + } + + starpu_tag_remove(STEP_TAG_2D(plan, JOIN, 0, 0)); + + for (i = 0; i < DIV_2D_N; i++) { + for (j = 0; j < DIV_2D_M; j++) { + starpu_tag_remove(STEP_TAG_2D(plan, TWIST2, i, j)); + starpu_tag_remove(STEP_TAG_2D(plan, FFT2, i, j)); + starpu_tag_remove(STEP_TAG_2D(plan, TWIST3, i, j)); + } + } + + starpu_tag_remove(STEP_TAG_2D(plan, END, 0, 0)); +} diff --git a/starpufft/src/starpufftx3d.c b/starpufft/src/starpufftx3d.c new file mode 100644 index 0000000..584f7d8 --- /dev/null +++ b/starpufft/src/starpufftx3d.c @@ -0,0 +1,188 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * + * Sequential version + * + */ + +#ifdef __STARPU_USE_CUDA +/* Perform one fft of size n,m */ +static void +STARPUFFT(fft_3d_plan_gpu)(void *args) +{ + STARPUFFT(plan) plan = args; + cufftResult cures; + int n = plan->n[0]; + int m = plan->n[1]; + int p = plan->n[2]; + int workerid = starpu_worker_get_id_check(); + + cures = cufftPlan3d(&plan->plans[workerid].plan_cuda, n, m, p, _CUFFT_C2C); + if (cures != CUFFT_SUCCESS) + STARPU_CUFFT_REPORT_ERROR(cures); + cufftSetStream(plan->plans[workerid].plan_cuda, starpu_cuda_get_local_stream()); + if (cures != CUFFT_SUCCESS) + STARPU_CUFFT_REPORT_ERROR(cures); +} + +static void +STARPUFFT(fft_3d_kernel_gpu)(void *descr[], void *args) +{ + STARPUFFT(plan) plan = args; + cufftResult cures; + + _cufftComplex * restrict in = (_cufftComplex *)STARPU_VECTOR_GET_PTR(descr[0]); + _cufftComplex * restrict out = (_cufftComplex *)STARPU_VECTOR_GET_PTR(descr[1]); + + int workerid = starpu_worker_get_id_check(); + + task_per_worker[workerid]++; + + cures = _cufftExecC2C(plan->plans[workerid].plan_cuda, in, out, plan->sign == -1 ? CUFFT_FORWARD : CUFFT_INVERSE); + if (cures != CUFFT_SUCCESS) + STARPU_CUFFT_REPORT_ERROR(cures); + + cudaStreamSynchronize(starpu_cuda_get_local_stream()); +} +#endif + +#ifdef STARPU_HAVE_FFTW +/* Perform one fft of size n,m */ +static void +STARPUFFT(fft_3d_kernel_cpu)(void *descr[], void *_args) +{ + STARPUFFT(plan) plan = _args; + int workerid = starpu_worker_get_id_check(); + + task_per_worker[workerid]++; + + STARPUFFT(complex) * restrict in = (STARPUFFT(complex) *)STARPU_VECTOR_GET_PTR(descr[0]); + STARPUFFT(complex) * restrict out = (STARPUFFT(complex) *)STARPU_VECTOR_GET_PTR(descr[1]); + + _FFTW(execute_dft)(plan->plans[workerid].plan_cpu, in, out); +} +#endif + +static struct starpu_perfmodel STARPUFFT(fft_3d_model) = { + .type = STARPU_HISTORY_BASED, + .symbol = TYPE"fft_3d" +}; + +static struct starpu_codelet STARPUFFT(fft_3d_codelet) = { + .where = +#ifdef __STARPU_USE_CUDA + STARPU_CUDA| +#endif +#ifdef STARPU_HAVE_FFTW + STARPU_CPU| +#endif + 0, +#ifdef __STARPU_USE_CUDA + .cuda_funcs = {STARPUFFT(fft_3d_kernel_gpu)}, +#endif +#ifdef STARPU_HAVE_FFTW + .cpu_funcs = {STARPUFFT(fft_3d_kernel_cpu)}, +#endif + CAN_EXECUTE + .model = &STARPUFFT(fft_3d_model), + .nbuffers = 2, + .modes = {STARPU_R, STARPU_W}, + .name = "fft_3d_codelet" +}; + +STARPUFFT(plan) +STARPUFFT(plan_dft_3d)(int n, int m, int p, int sign, unsigned flags) +{ + unsigned workerid; + +if (PARALLEL) { + /* TODO */ + STARPU_ASSERT(0); +} + + /* TODO: flags? Automatically set FFTW_MEASURE on calibration? */ + STARPU_ASSERT(flags == 0); + + STARPUFFT(plan) plan = malloc(sizeof(*plan)); + memset(plan, 0, sizeof(*plan)); + + plan->dim = 3; + plan->n = malloc(plan->dim * sizeof(*plan->n)); + plan->n[0] = n; + plan->n[1] = m; + plan->n[2] = p; + + plan->totsize = n * m; + + plan->type = C2C; + plan->sign = sign; + + + /* Initialize per-worker working set */ + for (workerid = 0; workerid < starpu_worker_get_count(); workerid++) { + switch (starpu_worker_get_type(workerid)) { + case STARPU_CPU_WORKER: +#ifdef STARPU_HAVE_FFTW + /* fft plan: one fft of size n, m. */ + plan->plans[workerid].plan_cpu = _FFTW(plan_dft_3d)(n, m, p, NULL, (void*) 1, sign, _FFTW_FLAGS); + STARPU_ASSERT(plan->plans[workerid].plan_cpu); +#else +/* #warning libstarpufft can not work correctly if libfftw3 is not installed */ +#endif + break; + case STARPU_CUDA_WORKER: + break; + default: + /* Do not care, we won't be executing anything there. */ + break; + } + } +#ifdef __STARPU_USE_CUDA + starpu_execute_on_each_worker(STARPUFFT(fft_3d_plan_gpu), plan, STARPU_CUDA); +#endif + + return plan; +} + +/* Actually submit all the tasks. */ +static struct starpu_task * +STARPUFFT(start3dC2C)(STARPUFFT(plan) plan, starpu_data_handle_t in, starpu_data_handle_t out) +{ + STARPU_ASSERT(plan->type == C2C); + int ret; + +if (PARALLEL) { + /* TODO */ + STARPU_ASSERT(0); +} else /* !PARALLEL */ { + struct starpu_task *task; + + /* Create FFT task */ + task = starpu_task_create(); + task->detach = 0; + task->cl = &STARPUFFT(fft_3d_codelet); + task->handles[0] = in; + task->handles[1] = out; + task->cl_arg = plan; + + ret = starpu_task_submit(task); + if (ret == -ENODEV) return NULL; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + return task; +} +} diff --git a/starpufft/tests/Makefile.am b/starpufft/tests/Makefile.am new file mode 100644 index 0000000..b263df2 --- /dev/null +++ b/starpufft/tests/Makefile.am @@ -0,0 +1,61 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +include $(top_srcdir)/make/starpu-tests.mk +include $(top_srcdir)/make/starpu-loader.mk + +CLEANFILES = starpu_idle_microsec.log +examplebindir = $(libdir)/starpu/examples/starpufft + +EXTRA_DIST = \ + testx.c \ + testx_threads.c \ + testf_threads.c \ + test_threads.c + +check_PROGRAMS = $(STARPU_FFT_EXAMPLES) + +AM_CFLAGS += $(APP_CFLAGS) +AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_builddir)/include -I$(top_srcdir)/starpufft/include -I$(top_srcdir)/starpufft/src $(STARPU_H_CPPFLAGS) +AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@ +LIBS += $(top_builddir)/src/@LIBSTARPU_LINK@ ../src/libstarpufft-@STARPU_EFFECTIVE_VERSION@.la $(STARPU_EXPORTED_LIBS) +LIBS += $(STARPU_CUDA_LDFLAGS) + +examplebin_PROGRAMS = +examplebin_PROGRAMS += \ + testf \ + test +STARPU_FFT_EXAMPLES = testf +testf_LDADD = $(FFTWF_LIBS) + +# If we don't have CUDA, we assume that we have fftw available in double +# precision anyway, we just want to make sure that if CUFFT is used, it also +# supports double precision. +if !STARPU_USE_CUDA +STARPU_FFT_EXAMPLES += test +else +if STARPU_HAVE_CUFFTDOUBLECOMPLEX +STARPU_FFT_EXAMPLES += test +endif +endif +test_LDADD = $(FFTW_LIBS) + +TESTS = $(STARPU_FFT_EXAMPLES) + + +#check_PROGRAMS += examples/test_threads examples/testf_threads +#examples_test_threads_LDADD = -lfftw3_threads +#examples_testf_threads_LDADD = -lfftw3f_threads diff --git a/starpufft/tests/Makefile.in b/starpufft/tests/Makefile.in new file mode 100644 index 0000000..b3bbd12 --- /dev/null +++ b/starpufft/tests/Makefile.in @@ -0,0 +1,1563 @@ +# Makefile.in generated by automake 1.16.5 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2021 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +target_triplet = @target@ +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@am__append_1 = --compiler-options -fno-strict-aliasing -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ $(STARPU_NVCC_H_CPPFLAGS) +@STARPU_USE_HIP_TRUE@am__append_2 = -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ +noinst_PROGRAMS = $(am__EXEEXT_4) +# Make tests run through mpiexec +@STARPU_USE_MPI_MASTER_SLAVE_TRUE@am__append_3 = $(abs_top_srcdir)/tools/starpu_msexec +@STARPU_USE_MPI_MASTER_SLAVE_TRUE@am__append_4 = $(MPI_RUN_ENV) STARPU_NMPIMSTHREADS=4 +@STARPU_USE_TCPIP_MASTER_SLAVE_TRUE@am__append_5 = $(abs_top_srcdir)/tools/starpu_msexec +# switch off local socket usage +#MS_LAUNCHER = $(abs_top_builddir)/tools/starpu_tcpipexec -np 2 -nobind -ncpus 1 -nolocal +@STARPU_USE_TCPIP_MASTER_SLAVE_TRUE@am__append_6 = STARPU_RESERVE_NCPU=2 +@STARPU_HAVE_WINDOWS_FALSE@am__append_7 = loader +check_PROGRAMS = $(am__EXEEXT_3) +examplebin_PROGRAMS = testf$(EXEEXT) test$(EXEEXT) + +# If we don't have CUDA, we assume that we have fftw available in double +# precision anyway, we just want to make sure that if CUFFT is used, it also +# supports double precision. +@STARPU_USE_CUDA_FALSE@am__append_8 = test +@STARPU_HAVE_CUFFTDOUBLECOMPLEX_TRUE@@STARPU_USE_CUDA_TRUE@am__append_9 = test +TESTS = $(am__EXEEXT_3) +subdir = starpufft/tests +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ + $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ + $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ + $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/src/common/config.h \ + $(top_builddir)/src/common/config-src-build.h \ + $(top_builddir)/include/starpu_config.h \ + $(top_builddir)/starpurm/include/starpurm_config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +@STARPU_USE_CUDA_FALSE@am__EXEEXT_1 = test$(EXEEXT) +@STARPU_HAVE_CUFFTDOUBLECOMPLEX_TRUE@@STARPU_USE_CUDA_TRUE@am__EXEEXT_2 = test$(EXEEXT) +am__EXEEXT_3 = testf$(EXEEXT) $(am__EXEEXT_1) $(am__EXEEXT_2) +am__installdirs = "$(DESTDIR)$(examplebindir)" +@STARPU_HAVE_WINDOWS_FALSE@am__EXEEXT_4 = loader$(EXEEXT) +PROGRAMS = $(examplebin_PROGRAMS) $(noinst_PROGRAMS) +loader_SOURCES = loader.c +loader_OBJECTS = loader-loader.$(OBJEXT) +loader_LDADD = $(LDADD) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +test_SOURCES = test.c +test_OBJECTS = test.$(OBJEXT) +am__DEPENDENCIES_1 = +test_DEPENDENCIES = $(am__DEPENDENCIES_1) +testf_SOURCES = testf.c +testf_OBJECTS = testf.$(OBJEXT) +testf_DEPENDENCIES = $(am__DEPENDENCIES_1) +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)/src/common -I$(top_builddir)/include -I$(top_builddir)/starpurm/include +depcomp = $(SHELL) $(top_srcdir)/build-aux/depcomp +am__maybe_remake_depfiles = depfiles +am__depfiles_remade = ./$(DEPDIR)/loader-loader.Po ./$(DEPDIR)/test.Po \ + ./$(DEPDIR)/testf.Po +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = loader.c test.c testf.c +DIST_SOURCES = loader.c test.c testf.c +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +am__tty_colors_dummy = \ + mgn= red= grn= lgn= blu= brg= std=; \ + am__color_tests=no +am__tty_colors = { \ + $(am__tty_colors_dummy); \ + if test "X$(AM_COLOR_TESTS)" = Xno; then \ + am__color_tests=no; \ + elif test "X$(AM_COLOR_TESTS)" = Xalways; then \ + am__color_tests=yes; \ + elif test "X$$TERM" != Xdumb && { test -t 1; } 2>/dev/null; then \ + am__color_tests=yes; \ + fi; \ + if test $$am__color_tests = yes; then \ + red=''; \ + grn=''; \ + lgn=''; \ + blu=''; \ + mgn=''; \ + brg=''; \ + std=''; \ + fi; \ +} +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__recheck_rx = ^[ ]*:recheck:[ ]* +am__global_test_result_rx = ^[ ]*:global-test-result:[ ]* +am__copy_in_global_log_rx = ^[ ]*:copy-in-global-log:[ ]* +# A command that, given a newline-separated list of test names on the +# standard input, print the name of the tests that are to be re-run +# upon "make recheck". +am__list_recheck_tests = $(AWK) '{ \ + recheck = 1; \ + while ((rc = (getline line < ($$0 ".trs"))) != 0) \ + { \ + if (rc < 0) \ + { \ + if ((getline line2 < ($$0 ".log")) < 0) \ + recheck = 0; \ + break; \ + } \ + else if (line ~ /$(am__recheck_rx)[nN][Oo]/) \ + { \ + recheck = 0; \ + break; \ + } \ + else if (line ~ /$(am__recheck_rx)[yY][eE][sS]/) \ + { \ + break; \ + } \ + }; \ + if (recheck) \ + print $$0; \ + close ($$0 ".trs"); \ + close ($$0 ".log"); \ +}' +# A command that, given a newline-separated list of test names on the +# standard input, create the global log from their .trs and .log files. +am__create_global_log = $(AWK) ' \ +function fatal(msg) \ +{ \ + print "fatal: making $@: " msg | "cat >&2"; \ + exit 1; \ +} \ +function rst_section(header) \ +{ \ + print header; \ + len = length(header); \ + for (i = 1; i <= len; i = i + 1) \ + printf "="; \ + printf "\n\n"; \ +} \ +{ \ + copy_in_global_log = 1; \ + global_test_result = "RUN"; \ + while ((rc = (getline line < ($$0 ".trs"))) != 0) \ + { \ + if (rc < 0) \ + fatal("failed to read from " $$0 ".trs"); \ + if (line ~ /$(am__global_test_result_rx)/) \ + { \ + sub("$(am__global_test_result_rx)", "", line); \ + sub("[ ]*$$", "", line); \ + global_test_result = line; \ + } \ + else if (line ~ /$(am__copy_in_global_log_rx)[nN][oO]/) \ + copy_in_global_log = 0; \ + }; \ + if (copy_in_global_log) \ + { \ + rst_section(global_test_result ": " $$0); \ + while ((rc = (getline line < ($$0 ".log"))) != 0) \ + { \ + if (rc < 0) \ + fatal("failed to read from " $$0 ".log"); \ + print line; \ + }; \ + printf "\n"; \ + }; \ + close ($$0 ".trs"); \ + close ($$0 ".log"); \ +}' +# Restructured Text title. +am__rst_title = { sed 's/.*/ & /;h;s/./=/g;p;x;s/ *$$//;p;g' && echo; } +# Solaris 10 'make', and several other traditional 'make' implementations, +# pass "-e" to $(SHELL), and POSIX 2008 even requires this. Work around it +# by disabling -e (using the XSI extension "set +e") if it's set. +am__sh_e_setup = case $$- in *e*) set +e;; esac +# Default flags passed to test drivers. +am__common_driver_flags = \ + --color-tests "$$am__color_tests" \ + --enable-hard-errors "$$am__enable_hard_errors" \ + --expect-failure "$$am__expect_failure" +# To be inserted before the command running the test. Creates the +# directory for the log if needed. Stores in $dir the directory +# containing $f, in $tst the test, in $log the log. Executes the +# developer- defined test setup AM_TESTS_ENVIRONMENT (if any), and +# passes TESTS_ENVIRONMENT. Set up options for the wrapper that +# will run the test scripts (or their associated LOG_COMPILER, if +# thy have one). +am__check_pre = \ +$(am__sh_e_setup); \ +$(am__vpath_adj_setup) $(am__vpath_adj) \ +$(am__tty_colors); \ +srcdir=$(srcdir); export srcdir; \ +case "$@" in \ + */*) am__odir=`echo "./$@" | sed 's|/[^/]*$$||'`;; \ + *) am__odir=.;; \ +esac; \ +test "x$$am__odir" = x"." || test -d "$$am__odir" \ + || $(MKDIR_P) "$$am__odir" || exit $$?; \ +if test -f "./$$f"; then dir=./; \ +elif test -f "$$f"; then dir=; \ +else dir="$(srcdir)/"; fi; \ +tst=$$dir$$f; log='$@'; \ +if test -n '$(DISABLE_HARD_ERRORS)'; then \ + am__enable_hard_errors=no; \ +else \ + am__enable_hard_errors=yes; \ +fi; \ +case " $(XFAIL_TESTS) " in \ + *[\ \ ]$$f[\ \ ]* | *[\ \ ]$$dir$$f[\ \ ]*) \ + am__expect_failure=yes;; \ + *) \ + am__expect_failure=no;; \ +esac; \ +$(AM_TESTS_ENVIRONMENT) $(TESTS_ENVIRONMENT) +# A shell command to get the names of the tests scripts with any registered +# extension removed (i.e., equivalently, the names of the test logs, with +# the '.log' extension removed). The result is saved in the shell variable +# '$bases'. This honors runtime overriding of TESTS and TEST_LOGS. Sadly, +# we cannot use something simpler, involving e.g., "$(TEST_LOGS:.log=)", +# since that might cause problem with VPATH rewrites for suffix-less tests. +# See also 'test-harness-vpath-rewrite.sh' and 'test-trs-basic.sh'. +am__set_TESTS_bases = \ + bases='$(TEST_LOGS)'; \ + bases=`for i in $$bases; do echo $$i; done | sed 's/\.log$$//'`; \ + bases=`echo $$bases` +AM_TESTSUITE_SUMMARY_HEADER = ' for $(PACKAGE_STRING)' +RECHECK_LOGS = $(TEST_LOGS) +AM_RECURSIVE_TARGETS = check recheck +TEST_SUITE_LOG = test-suite.log +TEST_EXTENSIONS = @EXEEXT@ .test +LOG_DRIVER = $(SHELL) $(top_srcdir)/build-aux/test-driver +LOG_COMPILE = $(LOG_COMPILER) $(AM_LOG_FLAGS) $(LOG_FLAGS) +am__set_b = \ + case '$@' in \ + */*) \ + case '$*' in \ + */*) b='$*';; \ + *) b=`echo '$@' | sed 's/\.log$$//'`; \ + esac;; \ + *) \ + b='$*';; \ + esac +am__test_logs1 = $(TESTS:=.log) +am__test_logs2 = $(am__test_logs1:@EXEEXT@.log=.log) +TEST_LOGS = $(am__test_logs2:.test.log=.log) +TEST_LOG_DRIVER = $(SHELL) $(top_srcdir)/build-aux/test-driver +TEST_LOG_COMPILE = $(TEST_LOG_COMPILER) $(AM_TEST_LOG_FLAGS) \ + $(TEST_LOG_FLAGS) +am__DIST_COMMON = $(srcdir)/Makefile.in \ + $(top_srcdir)/build-aux/depcomp \ + $(top_srcdir)/build-aux/test-driver \ + $(top_srcdir)/make/starpu-loader.mk \ + $(top_srcdir)/make/starpu-tests.mk \ + $(top_srcdir)/make/starpu.mk +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +pkglibdir = @pkglibdir@ +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +APP_CFLAGS = @APP_CFLAGS@ +APP_CXXFLAGS = @APP_CXXFLAGS@ +APP_FCFLAGS = @APP_FCFLAGS@ +APP_FFLAGS = @APP_FFLAGS@ +AR = @AR@ +AS = @AS@ +ATLASDIR = @ATLASDIR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BLAS_LIB = @BLAS_LIB@ +BLAS_LIBS = @BLAS_LIBS@ +BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ +BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CC_OR_MPICC = @CC_OR_MPICC@ +CC_OR_NVCC = @CC_OR_NVCC@ +CFLAGS = @CFLAGS@ +COVERAGE = @COVERAGE@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CSCOPE = @CSCOPE@ +CTAGS = @CTAGS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DGELS_LIBS = @DGELS_LIBS@ +DLB_CFLAGS = @DLB_CFLAGS@ +DLB_LIBS = @DLB_LIBS@ +DLLTOOL = @DLLTOOL@ +DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +ECLIPSE = @ECLIPSE@ +EGREP = @EGREP@ +ETAGS = @ETAGS@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FC = @FC@ +FCFLAGS = @FCFLAGS@ +FFLAGS = @FFLAGS@ +FFTWF_CFLAGS = @FFTWF_CFLAGS@ +FFTWF_LIBS = @FFTWF_LIBS@ +FFTWL_CFLAGS = @FFTWL_CFLAGS@ +FFTWL_LIBS = @FFTWL_LIBS@ +FFTW_CFLAGS = @FFTW_CFLAGS@ +FFTW_LIBS = @FFTW_LIBS@ +FGREP = @FGREP@ +FILECMD = @FILECMD@ +FXTDIR = @FXTDIR@ +FXT_CFLAGS = @FXT_CFLAGS@ +FXT_LDFLAGS = @FXT_LDFLAGS@ +FXT_LIBS = @FXT_LIBS@ +GDB = @GDB@ +GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ +GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ +GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ +GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ +GOTODIR = @GOTODIR@ +GREP = @GREP@ +HAVE_CXX11 = @HAVE_CXX11@ +HAVE_FFTWFL = @HAVE_FFTWFL@ +HELP2MAN = @HELP2MAN@ +HIPCC = @HIPCC@ +HIPCCFLAGS = @HIPCCFLAGS@ $(am__append_2) +HIPCONFIG = @HIPCONFIG@ +HWLOC_CFLAGS = @HWLOC_CFLAGS@ +HWLOC_LIBS = @HWLOC_LIBS@ +HWLOC_REQUIRES = @HWLOC_REQUIRES@ +ICC = @ICC@ +ICC_ARGS = @ICC_ARGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JULIA = @JULIA@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ $(top_builddir)/src/@LIBSTARPU_LINK@ \ + ../src/libstarpufft-@STARPU_EFFECTIVE_VERSION@.la \ + $(STARPU_EXPORTED_LIBS) $(STARPU_CUDA_LDFLAGS) +LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ +LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ +LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ +LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ +LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ +LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ +LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ +LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ +LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ +LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ +LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ +LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ +LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ +LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ +LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ +LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ +LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ +LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ +LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ +LIBSTARPU_LINK = @LIBSTARPU_LINK@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAGMA_CFLAGS = @MAGMA_CFLAGS@ +MAGMA_LIBS = @MAGMA_LIBS@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPICC_LDFLAGS = @MPICC_LDFLAGS@ +MPICXX = @MPICXX@ +MPIEXEC = @MPIEXEC@ +MPIEXEC_ARGS = @MPIEXEC_ARGS@ +MPIFORT = @MPIFORT@ +MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ +MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ +NM = @NM@ +NMAD_CFLAGS = @NMAD_CFLAGS@ +NMAD_LIBS = @NMAD_LIBS@ +NMEDIT = @NMEDIT@ +NVCC = @NVCC@ +NVCCFLAGS = @NVCCFLAGS@ $(am__append_1) +NVCC_CC = @NVCC_CC@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ +OPENBLAS_LIBS = @OPENBLAS_LIBS@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PAPI_CFLAGS = @PAPI_CFLAGS@ +PAPI_LIBS = @PAPI_LIBS@ +PARALLEL = @PARALLEL@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PKG_CONFIG = @PKG_CONFIG@ +POTI_CFLAGS = @POTI_CFLAGS@ +POTI_LIBS = @POTI_LIBS@ +PROG_CLANG = @PROG_CLANG@ +PROG_DATE = @PROG_DATE@ +PROG_FIND = @PROG_FIND@ +PROG_STAT = @PROG_STAT@ +PYTHON = @PYTHON@ +PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ +PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ +PYTHON_VERSION = @PYTHON_VERSION@ +RANLIB = @RANLIB@ +REALBASH = @REALBASH@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ +SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ +SIMGRID_LIBS = @SIMGRID_LIBS@ +SIMGRID_MC = @SIMGRID_MC@ +SLIC_CONFIG = @SLIC_CONFIG@ +SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ +SOCL_VENDORS = @SOCL_VENDORS@ +STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ +STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ +STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ +STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ +STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ +STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ +STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ +STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ +STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ +STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ +STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ +STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ +STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ +STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ +STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ +STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ +STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ +STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ +STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ +STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ +STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ +STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ +STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ +STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ +STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ +STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ +STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ +STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ +STARPU_LIB_PATH = @STARPU_LIB_PATH@ +STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ +STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ +STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ +STARPU_MS_LIB = @STARPU_MS_LIB@ +STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ +STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ +STARPU_OPENBLAS = @STARPU_OPENBLAS@ +STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ +STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ +STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ +STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ +STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ +STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ +STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ +STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ +STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ +STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ +STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ +STARPU_SRC_DIR = @STARPU_SRC_DIR@ +STARPU_USE_CPU = @STARPU_USE_CPU@ +STARPU_USE_CUDA = @STARPU_USE_CUDA@ +STARPU_USE_FXT = @STARPU_USE_FXT@ +STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ +STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ +STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ +STRIP = @STRIP@ +VERSION = @VERSION@ +XMKMF = @XMKMF@ +X_CFLAGS = @X_CFLAGS@ +X_EXTRA_LIBS = @X_EXTRA_LIBS@ +X_LIBS = @X_LIBS@ +X_PRE_LIBS = @X_PRE_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_ct_F77 = @ac_ct_F77@ +ac_ct_FC = @ac_ct_FC@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +doxygencommand = @doxygencommand@ +dvidir = @dvidir@ +eclipsepath = @eclipsepath@ +epstopdfcommand = @epstopdfcommand@ +exec_prefix = @exec_prefix@ +gitcommand = @gitcommand@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +hwloccalccommand = @hwloccalccommand@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +juliapath = @juliapath@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +mpicc_path = @mpicc_path@ +mpicxx_path = @mpicxx_path@ +mpiexec_path = @mpiexec_path@ +mpifort_path = @mpifort_path@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +pdflatexcommand = @pdflatexcommand@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +runstatedir = @runstatedir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target = @target@ +target_alias = @target_alias@ +target_cpu = @target_cpu@ +target_os = @target_os@ +target_vendor = @target_vendor@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +LAUNCHER_ENV = $(am__append_4) $(am__append_6) +LAUNCHER = $(am__append_3) $(am__append_5) +AM_CFLAGS = $(GLOBAL_AM_CFLAGS) $(APP_CFLAGS) +AM_CXXFLAGS = $(GLOBAL_AM_CXXFLAGS) +AM_FFLAGS = $(GLOBAL_AM_FFLAGS) +AM_FCFLAGS = $(GLOBAL_AM_FCFLAGS) +@STARPU_USE_CUDA_TRUE@V_nvcc_ = $(V_nvcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_USE_CUDA_TRUE@V_nvcc_0 = @echo " NVCC " $@; +@STARPU_USE_CUDA_TRUE@V_nvcc_1 = +@STARPU_USE_CUDA_TRUE@V_nvcc = $(V_nvcc_$(V)) + +# Avoid using nvcc when making a coverity build, nvcc produces millions of +# lines of code which we don't want to analyze. Instead, build dumb .o files +# containing empty functions. +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_ = $(V_mynvcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_0 = @echo " myNVCC " $@; +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_1 = +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc = $(V_mynvcc_$(V)) +@STARPU_USE_HIP_TRUE@V_hipcc_ = $(V_hipcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_USE_HIP_TRUE@V_hipcc_0 = @echo " HIPCC " $@; +@STARPU_USE_HIP_TRUE@V_hipcc_1 = +@STARPU_USE_HIP_TRUE@V_hipcc = $(V_hipcc_$(V)) +V_icc_ = $(V_icc_$(AM_DEFAULT_VERBOSITY)) +V_icc_0 = @echo " ICC " $@; +V_icc_1 = +V_icc = $(V_icc_$(V)) +V_ln_ = $(V_ln_$(AM_DEFAULT_VERBOSITY)) +V_ln_0 = @echo " LN " $@; +V_ln_1 = +V_ln = $(V_ln_$(V)) +V_help2man_ = $(V_help2man_$(AM_DEFAULT_VERBOSITY)) +V_help2man_0 = @echo " HELP2MAN" $@; +V_help2man_1 = +V_help2man = $(V_help2man_$(V)) +# These are always defined, both for starpu-mpi and for mpi-ms +# For MPI tests we don't want to oversubscribe the system +MPI_RUN_ENV = STARPU_WORKERS_GETBIND=0 STARPU_WORKERS_NOBIND=1 STARPU_NCPU=3 +@STARPU_SIMGRID_FALSE@STARPU_MPIEXEC = $(MPIEXEC) $(MPIEXEC_ARGS) -np $(STARPU_MPI_NP) +@STARPU_SIMGRID_TRUE@STARPU_MPIEXEC = $(abs_top_builddir)/tools/starpu_smpirun -np $(STARPU_MPI_NP) -platform $(abs_top_srcdir)/tools/perfmodels/cluster.xml -hostfile $(abs_top_srcdir)/tools/perfmodels/hostfile + +# When GNU parallel is available and -j is passed to make, run tests through +# parallel, using a "starpu" semaphore. +# Also make test shell scripts run its tests through parallel, using a +# "substarpu" semaphore. This brings some overload, but only one level. +@HAVE_PARALLEL_TRUE@STARPU_SUB_PARALLEL = $(shell echo $(MAKEFLAGS) | sed -ne 's/.*-j\([0-9]\+\).*/parallel --semaphore --id substarpu --fg --fg-exit -j \1/p') +@STARPU_USE_MPI_MASTER_SLAVE_TRUE@MS_LAUNCHER = $(STARPU_MPIEXEC) +@STARPU_USE_TCPIP_MASTER_SLAVE_TRUE@MS_LAUNCHER = $(abs_top_builddir)/tools/starpu_tcpipexec -np 2 -nobind -ncpus 1 +@STARPU_HAVE_WINDOWS_FALSE@LOADER_BIN = $(LAUNCHER) $(LOADER) $(EXTERNAL) +@STARPU_HAVE_WINDOWS_TRUE@LOADER_BIN = $(LAUNCHER) $(EXTERNAL) +@STARPU_HAVE_WINDOWS_FALSE@loader_CPPFLAGS = $(AM_CPPFLAGS) -I$(top_builddir)/src/ +@STARPU_HAVE_AM111_FALSE@TESTS_ENVIRONMENT = $(LAUNCHER_ENV) top_builddir="$(abs_top_builddir)" top_srcdir="$(abs_top_srcdir)" $(LOADER_BIN) +@STARPU_HAVE_AM111_TRUE@TESTS_ENVIRONMENT = $(LAUNCHER_ENV) top_builddir="$(abs_top_builddir)" top_srcdir="$(abs_top_srcdir)" +@STARPU_HAVE_AM111_TRUE@LOG_COMPILER = $(LOADER_BIN) +AM_TESTS_FD_REDIRECT = 9>&2 +CLEANFILES = starpu_idle_microsec.log +examplebindir = $(libdir)/starpu/examples/starpufft +EXTRA_DIST = \ + testx.c \ + testx_threads.c \ + testf_threads.c \ + test_threads.c + +AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_builddir)/include -I$(top_srcdir)/starpufft/include -I$(top_srcdir)/starpufft/src $(STARPU_H_CPPFLAGS) +AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@ +STARPU_FFT_EXAMPLES = testf $(am__append_8) $(am__append_9) +testf_LDADD = $(FFTWF_LIBS) +test_LDADD = $(FFTW_LIBS) +all: all-am + +.SUFFIXES: +.SUFFIXES: .c .cu .cubin .hip .lo .log .o .obj .test .test$(EXEEXT) .trs +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/make/starpu-tests.mk $(top_srcdir)/make/starpu.mk $(top_srcdir)/make/starpu-loader.mk $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign starpufft/tests/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign starpufft/tests/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; +$(top_srcdir)/make/starpu-tests.mk $(top_srcdir)/make/starpu.mk $(top_srcdir)/make/starpu-loader.mk $(am__empty): + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +clean-checkPROGRAMS: + @list='$(check_PROGRAMS)'; test -n "$$list" || exit 0; \ + echo " rm -f" $$list; \ + rm -f $$list || exit $$?; \ + test -n "$(EXEEXT)" || exit 0; \ + list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ + echo " rm -f" $$list; \ + rm -f $$list +install-examplebinPROGRAMS: $(examplebin_PROGRAMS) + @$(NORMAL_INSTALL) + @list='$(examplebin_PROGRAMS)'; test -n "$(examplebindir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(examplebindir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(examplebindir)" || exit 1; \ + fi; \ + for p in $$list; do echo "$$p $$p"; done | \ + sed 's/$(EXEEXT)$$//' | \ + while read p p1; do if test -f $$p \ + || test -f $$p1 \ + ; then echo "$$p"; echo "$$p"; else :; fi; \ + done | \ + sed -e 'p;s,.*/,,;n;h' \ + -e 's|.*|.|' \ + -e 'p;x;s,.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/' | \ + sed 'N;N;N;s,\n, ,g' | \ + $(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1 } \ + { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \ + if ($$2 == $$4) files[d] = files[d] " " $$1; \ + else { print "f", $$3 "/" $$4, $$1; } } \ + END { for (d in files) print "f", d, files[d] }' | \ + while read type dir files; do \ + if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \ + test -z "$$files" || { \ + echo " $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files '$(DESTDIR)$(examplebindir)$$dir'"; \ + $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files "$(DESTDIR)$(examplebindir)$$dir" || exit $$?; \ + } \ + ; done + +uninstall-examplebinPROGRAMS: + @$(NORMAL_UNINSTALL) + @list='$(examplebin_PROGRAMS)'; test -n "$(examplebindir)" || list=; \ + files=`for p in $$list; do echo "$$p"; done | \ + sed -e 'h;s,^.*/,,;s/$(EXEEXT)$$//;$(transform)' \ + -e 's/$$/$(EXEEXT)/' \ + `; \ + test -n "$$list" || exit 0; \ + echo " ( cd '$(DESTDIR)$(examplebindir)' && rm -f" $$files ")"; \ + cd "$(DESTDIR)$(examplebindir)" && rm -f $$files + +clean-examplebinPROGRAMS: + @list='$(examplebin_PROGRAMS)'; test -n "$$list" || exit 0; \ + echo " rm -f" $$list; \ + rm -f $$list || exit $$?; \ + test -n "$(EXEEXT)" || exit 0; \ + list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ + echo " rm -f" $$list; \ + rm -f $$list + +clean-noinstPROGRAMS: + @list='$(noinst_PROGRAMS)'; test -n "$$list" || exit 0; \ + echo " rm -f" $$list; \ + rm -f $$list || exit $$?; \ + test -n "$(EXEEXT)" || exit 0; \ + list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ + echo " rm -f" $$list; \ + rm -f $$list + +loader$(EXEEXT): $(loader_OBJECTS) $(loader_DEPENDENCIES) $(EXTRA_loader_DEPENDENCIES) + @rm -f loader$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(loader_OBJECTS) $(loader_LDADD) $(LIBS) + +test$(EXEEXT): $(test_OBJECTS) $(test_DEPENDENCIES) $(EXTRA_test_DEPENDENCIES) + @rm -f test$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(test_OBJECTS) $(test_LDADD) $(LIBS) + +testf$(EXEEXT): $(testf_OBJECTS) $(testf_DEPENDENCIES) $(EXTRA_testf_DEPENDENCIES) + @rm -f testf$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(testf_OBJECTS) $(testf_LDADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/loader-loader.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/test.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/testf.Po@am__quote@ # am--include-marker + +$(am__depfiles_remade): + @$(MKDIR_P) $(@D) + @echo '# dummy' >$@-t && $(am__mv) $@-t $@ + +am--depfiles: $(am__depfiles_remade) + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ +@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +loader-loader.o: loader.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(loader_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT loader-loader.o -MD -MP -MF $(DEPDIR)/loader-loader.Tpo -c -o loader-loader.o `test -f 'loader.c' || echo '$(srcdir)/'`loader.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/loader-loader.Tpo $(DEPDIR)/loader-loader.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='loader.c' object='loader-loader.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(loader_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o loader-loader.o `test -f 'loader.c' || echo '$(srcdir)/'`loader.c + +loader-loader.obj: loader.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(loader_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT loader-loader.obj -MD -MP -MF $(DEPDIR)/loader-loader.Tpo -c -o loader-loader.obj `if test -f 'loader.c'; then $(CYGPATH_W) 'loader.c'; else $(CYGPATH_W) '$(srcdir)/loader.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/loader-loader.Tpo $(DEPDIR)/loader-loader.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='loader.c' object='loader-loader.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(loader_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o loader-loader.obj `if test -f 'loader.c'; then $(CYGPATH_W) 'loader.c'; else $(CYGPATH_W) '$(srcdir)/loader.c'; fi` + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-am +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-am + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-am + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +# Recover from deleted '.trs' file; this should ensure that +# "rm -f foo.log; make foo.trs" re-run 'foo.test', and re-create +# both 'foo.log' and 'foo.trs'. Break the recipe in two subshells +# to avoid problems with "make -n". +.log.trs: + rm -f $< $@ + $(MAKE) $(AM_MAKEFLAGS) $< + +# Leading 'am--fnord' is there to ensure the list of targets does not +# expand to empty, as could happen e.g. with make check TESTS=''. +am--fnord $(TEST_LOGS) $(TEST_LOGS:.log=.trs): $(am__force_recheck) +am--force-recheck: + @: + +$(TEST_SUITE_LOG): $(TEST_LOGS) + @$(am__set_TESTS_bases); \ + am__f_ok () { test -f "$$1" && test -r "$$1"; }; \ + redo_bases=`for i in $$bases; do \ + am__f_ok $$i.trs && am__f_ok $$i.log || echo $$i; \ + done`; \ + if test -n "$$redo_bases"; then \ + redo_logs=`for i in $$redo_bases; do echo $$i.log; done`; \ + redo_results=`for i in $$redo_bases; do echo $$i.trs; done`; \ + if $(am__make_dryrun); then :; else \ + rm -f $$redo_logs && rm -f $$redo_results || exit 1; \ + fi; \ + fi; \ + if test -n "$$am__remaking_logs"; then \ + echo "fatal: making $(TEST_SUITE_LOG): possible infinite" \ + "recursion detected" >&2; \ + elif test -n "$$redo_logs"; then \ + am__remaking_logs=yes $(MAKE) $(AM_MAKEFLAGS) $$redo_logs; \ + fi; \ + if $(am__make_dryrun); then :; else \ + st=0; \ + errmsg="fatal: making $(TEST_SUITE_LOG): failed to create"; \ + for i in $$redo_bases; do \ + test -f $$i.trs && test -r $$i.trs \ + || { echo "$$errmsg $$i.trs" >&2; st=1; }; \ + test -f $$i.log && test -r $$i.log \ + || { echo "$$errmsg $$i.log" >&2; st=1; }; \ + done; \ + test $$st -eq 0 || exit 1; \ + fi + @$(am__sh_e_setup); $(am__tty_colors); $(am__set_TESTS_bases); \ + ws='[ ]'; \ + results=`for b in $$bases; do echo $$b.trs; done`; \ + test -n "$$results" || results=/dev/null; \ + all=` grep "^$$ws*:test-result:" $$results | wc -l`; \ + pass=` grep "^$$ws*:test-result:$$ws*PASS" $$results | wc -l`; \ + fail=` grep "^$$ws*:test-result:$$ws*FAIL" $$results | wc -l`; \ + skip=` grep "^$$ws*:test-result:$$ws*SKIP" $$results | wc -l`; \ + xfail=`grep "^$$ws*:test-result:$$ws*XFAIL" $$results | wc -l`; \ + xpass=`grep "^$$ws*:test-result:$$ws*XPASS" $$results | wc -l`; \ + error=`grep "^$$ws*:test-result:$$ws*ERROR" $$results | wc -l`; \ + if test `expr $$fail + $$xpass + $$error` -eq 0; then \ + success=true; \ + else \ + success=false; \ + fi; \ + br='==================='; br=$$br$$br$$br$$br; \ + result_count () \ + { \ + if test x"$$1" = x"--maybe-color"; then \ + maybe_colorize=yes; \ + elif test x"$$1" = x"--no-color"; then \ + maybe_colorize=no; \ + else \ + echo "$@: invalid 'result_count' usage" >&2; exit 4; \ + fi; \ + shift; \ + desc=$$1 count=$$2; \ + if test $$maybe_colorize = yes && test $$count -gt 0; then \ + color_start=$$3 color_end=$$std; \ + else \ + color_start= color_end=; \ + fi; \ + echo "$${color_start}# $$desc $$count$${color_end}"; \ + }; \ + create_testsuite_report () \ + { \ + result_count $$1 "TOTAL:" $$all "$$brg"; \ + result_count $$1 "PASS: " $$pass "$$grn"; \ + result_count $$1 "SKIP: " $$skip "$$blu"; \ + result_count $$1 "XFAIL:" $$xfail "$$lgn"; \ + result_count $$1 "FAIL: " $$fail "$$red"; \ + result_count $$1 "XPASS:" $$xpass "$$red"; \ + result_count $$1 "ERROR:" $$error "$$mgn"; \ + }; \ + { \ + echo "$(PACKAGE_STRING): $(subdir)/$(TEST_SUITE_LOG)" | \ + $(am__rst_title); \ + create_testsuite_report --no-color; \ + echo; \ + echo ".. contents:: :depth: 2"; \ + echo; \ + for b in $$bases; do echo $$b; done \ + | $(am__create_global_log); \ + } >$(TEST_SUITE_LOG).tmp || exit 1; \ + mv $(TEST_SUITE_LOG).tmp $(TEST_SUITE_LOG); \ + if $$success; then \ + col="$$grn"; \ + else \ + col="$$red"; \ + test x"$$VERBOSE" = x || cat $(TEST_SUITE_LOG); \ + fi; \ + echo "$${col}$$br$${std}"; \ + echo "$${col}Testsuite summary"$(AM_TESTSUITE_SUMMARY_HEADER)"$${std}"; \ + echo "$${col}$$br$${std}"; \ + create_testsuite_report --maybe-color; \ + echo "$$col$$br$$std"; \ + if $$success; then :; else \ + echo "$${col}See $(subdir)/$(TEST_SUITE_LOG)$${std}"; \ + if test -n "$(PACKAGE_BUGREPORT)"; then \ + echo "$${col}Please report to $(PACKAGE_BUGREPORT)$${std}"; \ + fi; \ + echo "$$col$$br$$std"; \ + fi; \ + $$success || exit 1 + +check-TESTS: $(check_PROGRAMS) + @list='$(RECHECK_LOGS)'; test -z "$$list" || rm -f $$list + @list='$(RECHECK_LOGS:.log=.trs)'; test -z "$$list" || rm -f $$list + @test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) + @set +e; $(am__set_TESTS_bases); \ + log_list=`for i in $$bases; do echo $$i.log; done`; \ + trs_list=`for i in $$bases; do echo $$i.trs; done`; \ + log_list=`echo $$log_list`; trs_list=`echo $$trs_list`; \ + $(MAKE) $(AM_MAKEFLAGS) $(TEST_SUITE_LOG) TEST_LOGS="$$log_list"; \ + exit $$?; +recheck: all $(check_PROGRAMS) + @test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) + @set +e; $(am__set_TESTS_bases); \ + bases=`for i in $$bases; do echo $$i; done \ + | $(am__list_recheck_tests)` || exit 1; \ + log_list=`for i in $$bases; do echo $$i.log; done`; \ + log_list=`echo $$log_list`; \ + $(MAKE) $(AM_MAKEFLAGS) $(TEST_SUITE_LOG) \ + am__force_recheck=am--force-recheck \ + TEST_LOGS="$$log_list"; \ + exit $$? +testf.log: testf$(EXEEXT) + @p='testf$(EXEEXT)'; \ + b='testf'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +test.log: test$(EXEEXT) + @p='test$(EXEEXT)'; \ + b='test'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +.test.log: + @p='$<'; \ + $(am__set_b); \ + $(am__check_pre) $(TEST_LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_TEST_LOG_DRIVER_FLAGS) $(TEST_LOG_DRIVER_FLAGS) -- $(TEST_LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +@am__EXEEXT_TRUE@.test$(EXEEXT).log: +@am__EXEEXT_TRUE@ @p='$<'; \ +@am__EXEEXT_TRUE@ $(am__set_b); \ +@am__EXEEXT_TRUE@ $(am__check_pre) $(TEST_LOG_DRIVER) --test-name "$$f" \ +@am__EXEEXT_TRUE@ --log-file $$b.log --trs-file $$b.trs \ +@am__EXEEXT_TRUE@ $(am__common_driver_flags) $(AM_TEST_LOG_DRIVER_FLAGS) $(TEST_LOG_DRIVER_FLAGS) -- $(TEST_LOG_COMPILE) \ +@am__EXEEXT_TRUE@ "$$tst" $(AM_TESTS_FD_REDIRECT) +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am + $(MAKE) $(AM_MAKEFLAGS) $(check_PROGRAMS) + $(MAKE) $(AM_MAKEFLAGS) check-TESTS +check: check-am +all-am: Makefile $(PROGRAMS) +installdirs: + for dir in "$(DESTDIR)$(examplebindir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + -test -z "$(TEST_LOGS)" || rm -f $(TEST_LOGS) + -test -z "$(TEST_LOGS:.log=.trs)" || rm -f $(TEST_LOGS:.log=.trs) + -test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) + +clean-generic: + -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-checkPROGRAMS clean-examplebinPROGRAMS clean-generic \ + clean-libtool clean-noinstPROGRAMS mostlyclean-am + +distclean: distclean-am + -rm -f ./$(DEPDIR)/loader-loader.Po + -rm -f ./$(DEPDIR)/test.Po + -rm -f ./$(DEPDIR)/testf.Po + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: install-examplebinPROGRAMS + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f ./$(DEPDIR)/loader-loader.Po + -rm -f ./$(DEPDIR)/test.Po + -rm -f ./$(DEPDIR)/testf.Po + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-examplebinPROGRAMS + +.MAKE: check-am install-am install-strip + +.PHONY: CTAGS GTAGS TAGS all all-am am--depfiles check check-TESTS \ + check-am clean clean-checkPROGRAMS clean-examplebinPROGRAMS \ + clean-generic clean-libtool clean-noinstPROGRAMS cscopelist-am \ + ctags ctags-am distclean distclean-compile distclean-generic \ + distclean-libtool distclean-tags distdir dvi dvi-am html \ + html-am info info-am install install-am install-data \ + install-data-am install-dvi install-dvi-am \ + install-examplebinPROGRAMS install-exec install-exec-am \ + install-html install-html-am install-info install-info-am \ + install-man install-pdf install-pdf-am install-ps \ + install-ps-am install-strip installcheck installcheck-am \ + installdirs maintainer-clean maintainer-clean-generic \ + mostlyclean mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool pdf pdf-am ps ps-am recheck tags tags-am \ + uninstall uninstall-am uninstall-examplebinPROGRAMS + +.PRECIOUS: Makefile + +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@.cu.o: +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ @$(MKDIR_P) `dirname $@` +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ $(V_mynvcc)grep 'extern *"C" *void *' $< | sed -ne 's/extern *"C" *void *\([a-zA-Z0-9_]*\) *(.*/void \1(void) {}/p' | $(CC) -x c - -o $@ -c + +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.cubin: +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) -cubin $< -o $@ $(NVCCFLAGS) + +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.o: +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) $< -c -o $@ $(NVCCFLAGS) +@STARPU_USE_HIP_TRUE@.hip.o: +@STARPU_USE_HIP_TRUE@ $(V_hipcc) $(HIPCC) $< -c -o $@ $(HIPCCFLAGS) + +STARPU_MPI_NP ?= 4 + +showcheckfailed: + @ for x in $(shell grep -l "^FAIL " $(TEST_LOGS) /dev/null 2>/dev/null) ; do cat $$x ; done + @RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i showcheckfailed || RET=1 ; \ + done ; \ + exit $$RET + +showfailed: + @! grep "^FAIL " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "ERROR: AddressSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "WARNING: AddressSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "ERROR: ThreadSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "WARNING: ThreadSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "ERROR: LeakSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "WARNING: LeakSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l " runtime error: " $(TEST_LOGS) /dev/null 2>/dev/null + @RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -s -C $$i showfailed || RET=1 ; \ + done ; \ + exit $$RET + +showcheck: + -cat $(TEST_LOGS) /dev/null + @! grep -q "ERROR: AddressSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q "WARNING: AddressSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q "ERROR: ThreadSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q "WARNING: ThreadSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q "ERROR: LeakSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q "WARNING: LeakSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q " runtime error: " $(TEST_LOGS) /dev/null + RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i showcheck || RET=1 ; \ + done ; \ + exit $$RET + +showsuite: + -cat $(TEST_SUITE_LOG) /dev/null + @! grep -q "ERROR: AddressSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q "WARNING: AddressSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q "ERROR: ThreadSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q "WARNING: ThreadSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q "ERROR: LeakSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q "WARNING: LeakSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q " runtime error: " $(TEST_SUITE_LOG) /dev/null + RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i showsuite || RET=1 ; \ + done ; \ + exit $$RET + +@STARPU_SIMGRID_TRUE@export STARPU_PERF_MODEL_DIR=$(abs_top_srcdir)/tools/perfmodels/sampling +@STARPU_SIMGRID_TRUE@export STARPU_HOSTNAME=mirage +@STARPU_SIMGRID_TRUE@export MALLOC_PERTURB_=0 + +@STARPU_SIMGRID_TRUE@env: +@STARPU_SIMGRID_TRUE@ @echo export STARPU_PERF_MODEL_DIR=$(STARPU_PERF_MODEL_DIR) +@STARPU_SIMGRID_TRUE@ @echo export STARPU_HOSTNAME=$(STARPU_HOSTNAME) +@STARPU_SIMGRID_TRUE@ @echo export MALLOC_PERTURB_=$(MALLOC_PERTURB_) + +@STARPU_SIMGRID_TRUE@export STARPU_SIMGRID=1 + +@STARPU_QUICK_CHECK_TRUE@export STARPU_QUICK_CHECK=1 + +@STARPU_LONG_CHECK_TRUE@export STARPU_LONG_CHECK=1 + +# +# Test loading goes through a lot of launchers: +# +# - $(LAUNCHER) is called first, to run the test through starpu_msexec, i.e. +# either mpirun or starpu_tcpipexec +# +# - $(LOADER), i.e. tests/loader, is then called to implement timeout, running +# gdb, etc. But if it detects that the test is a .sh script, it just executes +# it +# +# - $(STARPU_CHECK_LAUNCHER) $(STARPU_CHECK_LAUNCHER_ARGS) is called by loader +# to run the program through e.g. valgrind.sh +# +# When the program is a shell script, additionally: +# +# - $(STARPU_SUB_PARALLEL) is called to control parallelism (see below) +# +# - $(MS_LAUNCHER) is called to run the test through starpu_msexec +# +# - $(STARPU_LAUNCH) was set by tests/loader to its own path, to run the program +# through it. +# +# - $(STARPU_CHECK_LAUNCHER) $(STARPU_CHECK_LAUNCHER_ARGS) is called by loader +# + +export LAUNCHER +@HAVE_PARALLEL_TRUE@export STARPU_SUB_PARALLEL + +export MS_LAUNCHER + +LAUNCHER ?= +MS_LAUNCHER ?= +@STARPU_HAVE_WINDOWS_FALSE@LOADER ?= ./loader + +LSAN_OPTIONS ?= suppressions=$(abs_top_srcdir)/tools/dev/lsan/suppressions +TSAN_OPTIONS ?= suppressions=$(abs_top_srcdir)/tools/dev/tsan/starpu.suppr +export LSAN_OPTIONS +export TSAN_OPTIONS + +#check_PROGRAMS += examples/test_threads examples/testf_threads +#examples_test_threads_LDADD = -lfftw3_threads +#examples_testf_threads_LDADD = -lfftw3f_threads + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/starpufft/tests/loader.c b/starpufft/tests/loader.c new file mode 100644 index 0000000..804797d --- /dev/null +++ b/starpufft/tests/loader.c @@ -0,0 +1,505 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if defined(_WIN32) && !defined(__MINGW32__) && !defined(__CYGWIN__) +#include +#else +#include +#endif + +#ifdef STARPU_QUICK_CHECK +/* Quick checks are supposed to be real quick, typically less than 1s each, sometimes 10s + add some extra times for tests which run with all schedulers +*/ +#define DEFAULT_TIMEOUT 100 +#elif !defined(STARPU_LONG_CHECK) +/* Normal checks are supposed to be short enough, typically less than 10s each, sometimes 1-2m */ +#define DEFAULT_TIMEOUT 300 +#else +/* Long checks can be very long */ +#define DEFAULT_TIMEOUT 1000 +#endif +#define AUTOTEST_SKIPPED_TEST 77 + +static pid_t child_pid = 0; +static int timeout; + +#if defined(_WIN32) && !defined(__MINGW32__) && !defined(__CYGWIN__) +static int mygettimeofday(struct timeval *tv, void *tz) +{ + if (tv) + { + FILETIME ft; + unsigned long long res; + GetSystemTimeAsFileTime(&ft); + /* 100-nanosecond intervals since January 1, 1601 */ + res = ft.dwHighDateTime; + res <<= 32; + res |= ft.dwLowDateTime; + res /= 10; + /* Now we have microseconds */ + res -= (((1970-1601)*365) + 89) * 24ULL * 3600ULL * 1000000ULL; + /* Now we are based on epoch */ + tv->tv_sec = res / 1000000ULL; + tv->tv_usec = res % 1000000ULL; + } +} +#else +#define mygettimeofday(tv,tz) gettimeofday(tv,tz) +#endif + +#ifdef STARPU_GDB_PATH +static int try_launch_gdb(const char *exe, const char *core) +{ +# define GDB_COMMANDS \ + "-ex", "py-list", \ + "-ex", "starpu-tasks", \ + "-ex", "starpu-workers", \ + "-ex", "starpu-print-datas-summary", \ + "-ex", "starpu-memusage", \ + "-ex", "starpu-print-archs", \ + "-ex", "starpu-print-registered-models", \ + "-ex", "bt full", \ + "-ex", "py-bt", \ + "-ex", "thread apply all bt full", \ + "-ex", "thread apply all py-bt", \ + + int err; + pid_t pid; + struct stat st; + const char *top_builddir; + char *gdb; + + err = stat(core, &st); + if (err != 0) + { + fprintf(stderr, "while looking for core file of %s: %s: %m\n", + exe, core); + return -1; + } + + if (!(st.st_mode & S_IFREG)) + { + fprintf(stderr, "%s: not a regular file\n", core); + return -1; + } + + top_builddir = getenv("top_builddir"); + + pid = fork(); + switch (pid) + { + case 0: /* kid */ + if (top_builddir != NULL) + { + /* Run gdb with Libtool. */ + gdb = alloca(strlen(top_builddir) + + sizeof("/libtool") + 1); + strcpy(gdb, top_builddir); + strcat(gdb, "/libtool"); + err = execl(gdb, "gdb", "--mode=execute", + STARPU_GDB_PATH, "--batch", + GDB_COMMANDS + exe, core, NULL); + } + else + { + /* Run gdb directly */ + gdb = STARPU_GDB_PATH; + err = execl(gdb, "gdb", "--batch", + GDB_COMMANDS + exe, core, NULL); + } + if (err != 0) + { + fprintf(stderr, "while launching `%s': %m\n", gdb); + exit(EXIT_FAILURE); + } + exit(EXIT_SUCCESS); + break; + + case -1: + fprintf(stderr, "fork: %m\n"); + return -1; + + default: /* parent */ + { + pid_t who; + int status; + who = waitpid(pid, &status, 0); + if (who != pid) + fprintf(stderr, "while waiting for gdb " + "process %d: %m\n", pid); + } + } + return 0; +# undef GDB_COMMANDS +} +#endif /* STARPU_GDB_PATH */ + +static void launch_gdb(const char *exe) +{ +#ifdef STARPU_GDB_PATH + char s[32]; + snprintf(s, sizeof(s), "core.%d", child_pid); + if (try_launch_gdb(exe, s) < 0) + try_launch_gdb(exe, "core"); +#endif /* STARPU_GDB_PATH */ +} + +static char *test_name; + +static void test_cleaner(int sig) +{ + pid_t child_gid; + int status; + (void) sig; + + // send signal to all loader family members + fprintf(stderr, "[error] test %s has been blocked for %d seconds. Mark it as failed\n", test_name, timeout); + child_gid = getpgid(child_pid); + kill(-child_gid, SIGQUIT); + waitpid(child_pid, &status, 0); + launch_gdb(test_name); + raise(SIGALRM); + exit(EXIT_FAILURE); +} + +static void forwardsig(int sig) +{ + pid_t child_gid; + child_gid = getpgid(child_pid); + kill(-child_gid, sig); +} + +static int _decode(char **src, char *motif, const char *value) +{ + char *found; + + found = strstr(*src, motif); + if (found == NULL) return 0; + + char *new_src = calloc(1, strlen(*src)-strlen(motif)+strlen(value)+1); + + strncpy(new_src, *src, found - *src); + strcat(new_src, value); + strcat(new_src, found+strlen(motif)); + + *src = new_src; + return 1; +} + +static void decode(char **src, char *motif, const char *value) +{ + if (*src) + { + if (strstr(*src, motif) && value == NULL) + { + fprintf(stderr, "error: $%s undefined\n", motif); + exit(EXIT_FAILURE); + } + int d = _decode(src, motif, value); + while (d) + d = _decode(src, motif, value); + } +} + +int main(int argc, char *argv[]) +{ + int child_exit_status; + char *test_args; + char *launcher; + char *launcher_args; + char *libtool; + char *cflags; + const char *top_builddir = getenv("top_builddir"); + struct sigaction sa; + int ret; + struct timeval start; + struct timeval end; + double timing; + int x=1; + int asan = 0, lsan = 0, tsan = 0, usan = 0; + + (void) argc; + test_args = NULL; + timeout = 0; + + launcher=getenv("STARPU_CHECK_LAUNCHER"); + launcher_args=getenv("STARPU_CHECK_LAUNCHER_ARGS"); + cflags = getenv("CFLAGS"); + if (cflags) + { + if (strstr(cflags, "-fsanitize=address")) + asan = 1; + if (strstr(cflags, "-fsanitize=leak")) + lsan = 1; + if (strstr(cflags, "-fsanitize=thread")) + tsan = 1; + if (strstr(cflags, "-fsanitize=undefined")) + usan = 1; + } + + if (argv[x] && strcmp(argv[x], "-t") == 0) + { + timeout = strtol(argv[x+1], NULL, 10); + x += 2; + } + else if (getenv("STARPU_TIMEOUT_ENV")) + { + /* get user-defined iter_max value */ + timeout = strtol(getenv("STARPU_TIMEOUT_ENV"), NULL, 10); + } + else if (timeout <= 0) + { + timeout = DEFAULT_TIMEOUT; + if ((launcher && strstr(launcher, "valgrind")) || + (launcher && strstr(launcher, "helgrind")) || + tsan) + timeout *= 20; + if (asan || usan || lsan || + (launcher && strstr(launcher, "compute-sanitizer"))) + timeout *= 5; + + if (timeout > 1750) + timeout = 1750; + } + +#ifdef STARPU_SIMGRID +#ifdef STARPU_DEBUG + timeout *= 20; +#endif +#endif + +#ifdef STARPU_USE_MPI_MASTER_SLAVE + /* compare values between the 2 values of timeout */ + if (getenv("MPIEXEC_TIMEOUT")) + { + int mpiexec_timeout = strtol(getenv("MPIEXEC_TIMEOUT"), NULL, 10); + if (mpiexec_timeout != timeout) + fprintf(stderr, "[warning] MPIEXEC_TIMEOUT and STARPU_TIMEOUT_ENV values are different (%d and %d). The behavior may be different than expected !\n", mpiexec_timeout, timeout); + } +#endif + + if (argv[x] && strcmp(argv[x], "-p") == 0) + { + test_name = malloc(strlen(argv[x+1]) + 1 + strlen(argv[x+2]) + 1); + sprintf(test_name, "%s/%s", argv[x+1], argv[x+2]); + x += 3; + } + else + { + test_name = argv[x]; + x += 1; + } + + if (!test_name) + { + fprintf(stderr, "[error] Need name of program to start\n"); + exit(EXIT_FAILURE); + } + + size_t len = strlen(test_name); + if (len >= 3 && + test_name[len-3] == '.' && + test_name[len-2] == 's' && + test_name[len-1] == 'h') + { + /* This is a shell script, don't run ourself on bash, but make + * the script call us for each program invocation */ + + char *launch = NULL; + if (top_builddir == NULL) + // this may fail if .libs is in the directory path + setenv("STARPU_LAUNCH", argv[0], 1); + else + { + launch = malloc(strlen(top_builddir) + strlen("/tests/loader") + 1); + strcpy(launch, top_builddir); + strcat(launch, "/tests/loader"); + setenv("STARPU_LAUNCH", launch, 1); + } + + execvp(test_name, argv+x-1); + + fprintf(stderr, "[error] '%s' failed to exec. test marked as failed\n", test_name); + free(launch); + exit(EXIT_FAILURE); + } + + if (strstr(test_name, "spmv/dw_block_spmv")) + { + test_args = (char *) calloc(512, sizeof(char)); + snprintf(test_args, 512, "%s/examples/spmv/matrix_market/examples/fidapm05.mtx", STARPU_SRC_DIR); + } + else if (strstr(test_name, "starpu_perfmodel_display")) + { + if (x >= argc) + test_args = strdup("-l"); + } + else if (strstr(test_name, "starpu_perfmodel_plot")) + { + if (x >= argc) + test_args = strdup("-l"); + } + + /* get launcher program */ + if (launcher_args) + launcher_args=strdup(launcher_args); + + if (top_builddir == NULL) + { + fprintf(stderr, + "warning: $top_builddir undefined, " + "so $STARPU_CHECK_LAUNCHER ignored\n"); + launcher = NULL; + launcher_args = NULL; + libtool = NULL; + } + else + { + libtool = malloc(strlen(top_builddir) + 1 + strlen("libtool") + 1); + strcpy(libtool, top_builddir); + strcat(libtool, "/libtool"); + } + + if (launcher) + { + const char *top_srcdir = getenv("top_srcdir"); + decode(&launcher, "@top_srcdir@", top_srcdir); + decode(&launcher_args, "@top_srcdir@", top_srcdir); + } + + setenv("STARPU_OPENCL_PROGRAM_DIR", STARPU_SRC_DIR, 1); + + /* set SIGALARM handler */ + sa.sa_flags = SA_RESETHAND | SA_NODEFER; + sigemptyset(&sa.sa_mask); + sa.sa_handler = test_cleaner; + if (-1 == sigaction(SIGALRM, &sa, NULL)) + perror("sigaction"); + + signal(SIGINT, forwardsig); + signal(SIGHUP, forwardsig); + signal(SIGPIPE, forwardsig); + signal(SIGTERM, forwardsig); + + child_pid = fork(); + if (child_pid == 0) + { + char *launcher_argv[100]; + int i=0; + + setpgid(0, 0); + + /* "Launchers" such as Valgrind need to be inserted + * after the Libtool-generated wrapper scripts, hence + * this special-case. */ + if (launcher && top_builddir != NULL) + { + launcher_argv[i++] = libtool; + launcher_argv[i++] = "--mode=execute"; + launcher_argv[i++] = launcher; + if (launcher_args) + { + launcher_argv[i++] = strtok(launcher_args, " "); + while (launcher_argv[i-1]) + { + launcher_argv[i++] = strtok(NULL, " "); + } + } + } + + launcher_argv[i++] = test_name; + if (test_args) + launcher_argv[i++] = test_args; + else while (argv[x]) + { + launcher_argv[i++] = argv[x++]; + } +#ifdef STARPU_SIMGRID +#ifdef STARPU_DEBUG + launcher_argv[i++] = "--cfg=contexts/factory:thread"; +#endif +#endif + launcher_argv[i++] = NULL; + execvp(*launcher_argv, launcher_argv); + + fprintf(stderr, "[error] '%s' failed to exec. test marked as failed\n", test_name); + exit(EXIT_FAILURE); + } + if (child_pid == -1) + { + fprintf(stderr, "[error] fork. test marked as failed\n"); + exit(EXIT_FAILURE); + } + free(test_args); + free(libtool); + + ret = EXIT_SUCCESS; + gettimeofday(&start, NULL); + alarm(timeout); + if (child_pid == waitpid(child_pid, &child_exit_status, 0)) + { + if (WIFEXITED(child_exit_status)) + { + int status = WEXITSTATUS(child_exit_status); + if (status == EXIT_SUCCESS) + { + alarm(0); + } + else + { + if (status != AUTOTEST_SKIPPED_TEST) + fprintf(stdout, "`%s' exited with return code %d\n", + test_name, status); + ret = status; + } + } + else if (WIFSIGNALED(child_exit_status)) + { + fprintf(stderr, "[error] `%s' killed with signal %d; test marked as failed\n", + test_name, WTERMSIG(child_exit_status)); + launch_gdb(test_name); + ret = EXIT_FAILURE; + } + else + { + fprintf(stderr, "[error] `%s' did not terminate normally; test marked as failed\n", + test_name); + ret = EXIT_FAILURE; + } + } + + gettimeofday(&end, NULL); + timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec)); + fprintf(stderr, "#Execution_time_in_seconds %f %s\n", timing/1000000, test_name); + + return ret; +} diff --git a/starpufft/tests/test.c b/starpufft/tests/test.c new file mode 100644 index 0000000..9c13ca8 --- /dev/null +++ b/starpufft/tests/test.c @@ -0,0 +1,18 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "starpufft-double.h" +#include "testx.c" diff --git a/starpufft/tests/test_threads.c b/starpufft/tests/test_threads.c new file mode 100644 index 0000000..99e843e --- /dev/null +++ b/starpufft/tests/test_threads.c @@ -0,0 +1,18 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "starpufft-double.h" +#include "testx_threads.c" diff --git a/starpufft/tests/testf.c b/starpufft/tests/testf.c new file mode 100644 index 0000000..640567d --- /dev/null +++ b/starpufft/tests/testf.c @@ -0,0 +1,18 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "starpufft-float.h" +#include "testx.c" diff --git a/starpufft/tests/testf_threads.c b/starpufft/tests/testf_threads.c new file mode 100644 index 0000000..f838333 --- /dev/null +++ b/starpufft/tests/testf_threads.c @@ -0,0 +1,18 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "starpufft-float.h" +#include "testx_threads.c" diff --git a/starpufft/tests/testx.c b/starpufft/tests/testx.c new file mode 100644 index 0000000..e321f4c --- /dev/null +++ b/starpufft/tests/testx.c @@ -0,0 +1,316 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include +#include + +#include + +#include +#include "starpufft.h" + +#undef STARPU_USE_CUDA + +#ifdef STARPU_HAVE_FFTW +#include +#endif +#ifdef STARPU_USE_CUDA +#include +#endif + +#define SIGN (-1) +/* #define SIGN (1) */ + +#ifdef STARPU_HAVE_FFTW +static void check_fftw(STARPUFFT(complex) *out, STARPUFFT(complex) *out_fftw, int size) +{ + int i; + double max = 0., tot = 0., norm = 0., normdiff = 0.; + for (i = 0; i < size; i++) + { + double diff = cabs(out[i]-out_fftw[i]); + double diff2 = diff * diff; + double dsize = cabs(out_fftw[i]); + double size2 = dsize * dsize; + if (diff > max) + max = diff; + tot += diff; + normdiff += diff2; + norm += size2; + } + fprintf(stderr, "\nmaximum difference %g\n", max); + fprintf(stderr, "average difference %g\n", tot / size); + fprintf(stderr, "difference norm %g\n", sqrt(normdiff)); + double relmaxdiff = max / sqrt(norm); + fprintf(stderr, "relative maximum difference %g\n", relmaxdiff); + double relavgdiff = (tot / size) / sqrt(norm); + fprintf(stderr, "relative average difference %g\n", relavgdiff); + if (!strcmp(TYPE, "f") && (relmaxdiff > 1e-7 || relavgdiff > 1e-7)) { + fprintf(stderr, "Failure: Difference too big (TYPE f)\n"); + exit(EXIT_FAILURE); + } + if (!strcmp(TYPE, "") && (relmaxdiff > 1e-16 || relavgdiff > 1e-16)) + { + fprintf(stderr, "Failure: Difference too big\n"); + exit(EXIT_FAILURE); + } +} +#endif + +#ifdef STARPU_USE_CUDA +static void check_cuda(STARPUFFT(complex) *out, STARPUFFT(complex) *out_fftw, int size) +{ + int i; + double max = 0., tot = 0., norm = 0., normdiff = 0.; + for (i = 0; i < size; i++) + { + double diff = cabs(out_cuda[i]-out_fftw[i]); + double diff2 = diff * diff; + double size = cabs(out_fftw[i]); + double size2 = size * size; + if (diff > max) + max = diff; + tot += diff; + normdiff += diff2; + norm += size2; + } + fprintf(stderr, "\nmaximum difference %g\n", max); + fprintf(stderr, "average difference %g\n", tot / size); + fprintf(stderr, "difference norm %g\n", sqrt(normdiff)); + double relmaxdiff = max / sqrt(norm); + fprintf(stderr, "relative maximum difference %g\n", relmaxdiff); + double relavgdiff = (tot / size) / sqrt(norm); + fprintf(stderr, "relative average difference %g\n", relavgdiff); + if (!strcmp(TYPE, "f") && (relmaxdiff > 1e-8 || relavgdiff > 1e-8)) + exit(EXIT_FAILURE); + if (!strcmp(TYPE, "") && (relmaxdiff > 1e-16 || relavgdiff > 1e-16)) + exit(EXIT_FAILURE); +} +#endif + +int main(int argc, char *argv[]) +{ + int i, ret; + int size; + int n = 0, m = 0, p = 0; + STARPUFFT(plan) plan; + starpu_data_handle_t in_handle, out_handle; +#ifdef STARPU_HAVE_FFTW + _FFTW(plan) fftw_plan; +#endif +#ifdef STARPU_USE_CUDA + cufftHandle cuda_plan; + cudaError_t cures; +#endif +#if defined(STARPU_HAVE_FFTW) || defined(STARPU_USE_CUDA) + struct timeval begin, end; + double timing; + size_t bytes; +#endif + struct starpu_conf conf; + starpu_conf_init(&conf); + /* FIXME: the testcase needs to be updated to properly support cuFFT */ + conf.ncuda = 0; + ret = starpu_init(&conf); + + ret = starpu_init(NULL); + if (ret == -ENODEV) return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + if (argc == 1) + { + n = 42; + /* 1D */ + size = n; + } + else if (argc == 2) + { + n = atoi(argv[1]); + + /* 1D */ + size = n; + } + else if (argc == 3) + { + n = atoi(argv[1]); + m = atoi(argv[2]); + + /* 2D */ + size = n * m; + } + else if (argc == 4) + { + n = atoi(argv[1]); + m = atoi(argv[2]); + p = atoi(argv[3]); + + /* 3D */ + size = n * m * p; + } + else + { + assert(0); + } + +#if defined(STARPU_HAVE_FFTW) || defined(STARPU_USE_CUDA) + bytes = size * sizeof(STARPUFFT(complex)); +#endif + + STARPUFFT(complex) *in_orig = STARPUFFT(malloc)(size * sizeof(*in_orig)); + starpu_srand48(0); + for (i = 0; i < size; i++) + in_orig[i] = starpu_drand48() + I * starpu_drand48(); + + STARPUFFT(complex) *in = STARPUFFT(malloc)(size * sizeof(*in)); + + STARPUFFT(complex) *out = STARPUFFT(malloc)(size * sizeof(*out)); + +#ifdef STARPU_HAVE_FFTW + STARPUFFT(complex) *out_fftw = STARPUFFT(malloc)(size * sizeof(*out_fftw)); +#endif + +#ifdef STARPU_USE_CUDA + STARPUFFT(complex) *out_cuda = STARPUFFT(malloc)(size * sizeof(*out_cuda)); +#endif + + if (argc <= 2) + { + plan = STARPUFFT(plan_dft_1d)(n, SIGN, 0); +#ifdef STARPU_HAVE_FFTW + fftw_plan = _FFTW(plan_dft_1d)(n, NULL, (void*) 1, SIGN, FFTW_ESTIMATE); +#endif +#ifdef STARPU_USE_CUDA + if (cufftPlan1d(&cuda_plan, n, _CUFFT_C2C, 1) != CUFFT_SUCCESS) + printf("erf\n"); +#endif + + } + else if (argc == 3) + { + plan = STARPUFFT(plan_dft_2d)(n, m, SIGN, 0); +#ifdef STARPU_HAVE_FFTW + fftw_plan = _FFTW(plan_dft_2d)(n, m, NULL, (void*) 1, SIGN, FFTW_ESTIMATE); +#endif +#ifdef STARPU_USE_CUDA + STARPU_ASSERT(cufftPlan2d(&cuda_plan, n, m, _CUFFT_C2C) == CUFFT_SUCCESS); +#endif + } + else if (argc == 4) + { + plan = STARPUFFT(plan_dft_3d)(n, m, p, SIGN, 0); +#ifdef STARPU_HAVE_FFTW + fftw_plan = _FFTW(plan_dft_3d)(n, m, p, NULL, (void*) 1, SIGN, FFTW_ESTIMATE); +#endif +#ifdef STARPU_USE_CUDA + STARPU_ASSERT(cufftPlan3d(&cuda_plan, n, m, p, _CUFFT_C2C) == CUFFT_SUCCESS); +#endif + } + else + { + assert(0); + } + +#ifdef STARPU_HAVE_FFTW + memcpy(in, in_orig, size * sizeof(*in)); + gettimeofday(&begin, NULL); + _FFTW(execute_dft)(fftw_plan, in, out_fftw); + gettimeofday(&end, NULL); + _FFTW(destroy_plan)(fftw_plan); + timing = (double)((end.tv_sec - begin.tv_sec)*1000000 + (end.tv_usec - begin.tv_usec)); + printf("FFTW took %2.2f ms (%2.2f MB/s)\n\n", timing/1000, bytes/timing); +#endif +#ifdef STARPU_USE_CUDA + memcpy(in, in_orig, size * sizeof(*in)); + gettimeofday(&begin, NULL); + if (cufftExecC2C(cuda_plan, (cufftComplex*) in, (cufftComplex*) out_cuda, CUFFT_FORWARD) != CUFFT_SUCCESS) + printf("erf2\n"); + if ((cures = cudaDeviceSynchronize()) != cudaSuccess) + STARPU_CUDA_REPORT_ERROR(cures); + gettimeofday(&end, NULL); + cufftDestroy(cuda_plan); + timing = (double)((end.tv_sec - begin.tv_sec)*1000000 + (end.tv_usec - begin.tv_usec)); + printf("CUDA took %2.2f ms (%2.2f MB/s)\n\n", timing/1000, bytes/timing); +#endif + + memcpy(in, in_orig, size * sizeof(*in)); + ret = STARPUFFT(execute)(plan, in, out); + if (ret == -1) return 77; + STARPUFFT(showstats)(stdout); + +#ifdef STARPU_HAVE_FFTW + check_fftw(out, out_fftw, size); +#endif +#ifdef STARPU_USE_CUDA + check_cuda(out, out_cuda, size); +#endif + +#if 1 + memcpy(in, in_orig, size * sizeof(*in)); + starpu_vector_data_register(&in_handle, STARPU_MAIN_RAM, (uintptr_t) in, size, sizeof(*in)); + starpu_vector_data_register(&out_handle, STARPU_MAIN_RAM, (uintptr_t) out, size, sizeof(*out)); + + ret = STARPUFFT(execute_handle)(plan, in_handle, out_handle); + if (ret == -1) return 77; + + starpu_data_unregister(in_handle); + starpu_data_unregister(out_handle); + +#ifdef STARPU_HAVE_FFTW + check_fftw(out, out_fftw, size); +#endif +#ifdef STARPU_USE_CUDA + check_cuda(out, out_cuda, size); +#endif +#endif + + STARPUFFT(showstats)(stdout); + STARPUFFT(destroy_plan)(plan); + + printf("\n"); +#if 0 + for (i = 0; i < 16; i++) + printf("(%f,%f) ", cimag(in[i]), creal(in[i])); + printf("\n\n"); + for (i = 0; i < 16; i++) + printf("(%f,%f) ", cimag(out[i]), creal(out[i])); + printf("\n\n"); +#ifdef STARPU_HAVE_FFTW + for (i = 0; i < 16; i++) + printf("(%f,%f) ", cimag(out_fftw[i]), creal(out_fftw[i])); + printf("\n\n"); +#endif +#endif + + STARPUFFT(free)(in_orig, size * sizeof(*in_orig)); + STARPUFFT(free)(in, size * sizeof(*in)); + STARPUFFT(free)(out, size * sizeof(*out)); + +#ifdef STARPU_HAVE_FFTW + STARPUFFT(free)(out_fftw, size * sizeof(*out_fftw)); +#endif + +#ifdef STARPU_USE_CUDA + free(out_cuda); +#endif + + starpu_shutdown(); + + return EXIT_SUCCESS; +} diff --git a/starpufft/tests/testx_threads.c b/starpufft/tests/testx_threads.c new file mode 100644 index 0000000..edd2040 --- /dev/null +++ b/starpufft/tests/testx_threads.c @@ -0,0 +1,112 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include +#include + +#include + +#include +#include "starpufft.h" + +#include + +#define SIGN (-1) +/* #define SIGN (1) */ + +int main(int argc, char *argv[]) +{ + int i; + struct timeval begin, end; + int size; + size_t bytes; + int n = 0, m = 0; + _FFTW(plan) fftw_plan; + double timing; + char *num; + int num_threads = 1; + + _FFTW(init_threads)(); + + num = getenv("NUM_THREADS"); + if (num) + num_threads = atoi(num); + _FFTW(plan_with_nthreads)(num_threads); + + if (argc < 2 || argc > 3) + { + fprintf(stderr,"need one or two size of vector\n"); + exit(EXIT_FAILURE); + } + + if (argc == 2) + { + n = atoi(argv[1]); + + /* 1D */ + size = n; + } + else if (argc == 3) + { + n = atoi(argv[1]); + m = atoi(argv[2]); + + /* 2D */ + size = n * m; + } + else + { + assert(0); + } + + bytes = size * sizeof(_FFTW(complex)); + + _FFTW(complex) *in = _FFTW(malloc)(size * sizeof(*in)); + starpu_srand48(0); + for (i = 0; i < size; i++) + in[i] = starpu_drand48() + I * starpu_drand48(); + + _FFTW(complex) *out_fftw = _FFTW(malloc)(size * sizeof(*out_fftw)); + + if (argc == 2) + { + fftw_plan = _FFTW(plan_dft_1d)(n, in, out_fftw, SIGN, FFTW_ESTIMATE); + + } + else if (argc == 3) + { + fftw_plan = _FFTW(plan_dft_2d)(n, m, in, out_fftw, SIGN, FFTW_ESTIMATE); + } + else + { + assert(0); + } + + gettimeofday(&begin, NULL); + _FFTW(execute)(fftw_plan); + gettimeofday(&end, NULL); + _FFTW(destroy_plan)(fftw_plan); + timing = (double)((end.tv_sec - begin.tv_sec)*1000000 + (end.tv_usec - begin.tv_usec)); + printf("FFTW with %d threads took %2.2f ms (%2.2f MB/s)\n\n", num_threads, timing/1000, bytes/(timing*num_threads)); + + printf("\n"); + + return EXIT_SUCCESS; +} diff --git a/starpupy/Makefile.am b/starpupy/Makefile.am new file mode 100644 index 0000000..dda82d7 --- /dev/null +++ b/starpupy/Makefile.am @@ -0,0 +1,22 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +include $(top_srcdir)/make/starpu-subdirtests.mk + +SUBDIRS = src +SUBDIRS += examples +SUBDIRS += benchmark + diff --git a/starpupy/Makefile.in b/starpupy/Makefile.in new file mode 100644 index 0000000..d26dcd8 --- /dev/null +++ b/starpupy/Makefile.in @@ -0,0 +1,891 @@ +# Makefile.in generated by automake 1.16.5 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2021 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +target_triplet = @target@ +subdir = starpupy +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ + $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ + $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ + $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/src/common/config.h \ + $(top_builddir)/src/common/config-src-build.h \ + $(top_builddir)/include/starpu_config.h \ + $(top_builddir)/starpurm/include/starpurm_config.h +CONFIG_CLEAN_FILES = execute.sh +CONFIG_CLEAN_VPATH_FILES = +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +SOURCES = +DIST_SOURCES = +RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \ + ctags-recursive dvi-recursive html-recursive info-recursive \ + install-data-recursive install-dvi-recursive \ + install-exec-recursive install-html-recursive \ + install-info-recursive install-pdf-recursive \ + install-ps-recursive install-recursive installcheck-recursive \ + installdirs-recursive pdf-recursive ps-recursive \ + tags-recursive uninstall-recursive +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ + distclean-recursive maintainer-clean-recursive +am__recursive_targets = \ + $(RECURSIVE_TARGETS) \ + $(RECURSIVE_CLEAN_TARGETS) \ + $(am__extra_recursive_targets) +AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \ + distdir distdir-am +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +DIST_SUBDIRS = $(SUBDIRS) +am__DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/execute.sh.in \ + $(top_srcdir)/make/starpu-subdirtests.mk +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +am__relativize = \ + dir0=`pwd`; \ + sed_first='s,^\([^/]*\)/.*$$,\1,'; \ + sed_rest='s,^[^/]*/*,,'; \ + sed_last='s,^.*/\([^/]*\)$$,\1,'; \ + sed_butlast='s,/*[^/]*$$,,'; \ + while test -n "$$dir1"; do \ + first=`echo "$$dir1" | sed -e "$$sed_first"`; \ + if test "$$first" != "."; then \ + if test "$$first" = ".."; then \ + dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ + dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ + else \ + first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ + if test "$$first2" = "$$first"; then \ + dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ + else \ + dir2="../$$dir2"; \ + fi; \ + dir0="$$dir0"/"$$first"; \ + fi; \ + fi; \ + dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ + done; \ + reldir="$$dir2" +pkglibdir = @pkglibdir@ +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +APP_CFLAGS = @APP_CFLAGS@ +APP_CXXFLAGS = @APP_CXXFLAGS@ +APP_FCFLAGS = @APP_FCFLAGS@ +APP_FFLAGS = @APP_FFLAGS@ +AR = @AR@ +AS = @AS@ +ATLASDIR = @ATLASDIR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BLAS_LIB = @BLAS_LIB@ +BLAS_LIBS = @BLAS_LIBS@ +BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ +BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CC_OR_MPICC = @CC_OR_MPICC@ +CC_OR_NVCC = @CC_OR_NVCC@ +CFLAGS = @CFLAGS@ +COVERAGE = @COVERAGE@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CSCOPE = @CSCOPE@ +CTAGS = @CTAGS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DGELS_LIBS = @DGELS_LIBS@ +DLB_CFLAGS = @DLB_CFLAGS@ +DLB_LIBS = @DLB_LIBS@ +DLLTOOL = @DLLTOOL@ +DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +ECLIPSE = @ECLIPSE@ +EGREP = @EGREP@ +ETAGS = @ETAGS@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FC = @FC@ +FCFLAGS = @FCFLAGS@ +FFLAGS = @FFLAGS@ +FFTWF_CFLAGS = @FFTWF_CFLAGS@ +FFTWF_LIBS = @FFTWF_LIBS@ +FFTWL_CFLAGS = @FFTWL_CFLAGS@ +FFTWL_LIBS = @FFTWL_LIBS@ +FFTW_CFLAGS = @FFTW_CFLAGS@ +FFTW_LIBS = @FFTW_LIBS@ +FGREP = @FGREP@ +FILECMD = @FILECMD@ +FXTDIR = @FXTDIR@ +FXT_CFLAGS = @FXT_CFLAGS@ +FXT_LDFLAGS = @FXT_LDFLAGS@ +FXT_LIBS = @FXT_LIBS@ +GDB = @GDB@ +GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ +GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ +GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ +GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ +GOTODIR = @GOTODIR@ +GREP = @GREP@ +HAVE_CXX11 = @HAVE_CXX11@ +HAVE_FFTWFL = @HAVE_FFTWFL@ +HELP2MAN = @HELP2MAN@ +HIPCC = @HIPCC@ +HIPCCFLAGS = @HIPCCFLAGS@ +HIPCONFIG = @HIPCONFIG@ +HWLOC_CFLAGS = @HWLOC_CFLAGS@ +HWLOC_LIBS = @HWLOC_LIBS@ +HWLOC_REQUIRES = @HWLOC_REQUIRES@ +ICC = @ICC@ +ICC_ARGS = @ICC_ARGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JULIA = @JULIA@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ +LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ +LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ +LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ +LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ +LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ +LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ +LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ +LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ +LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ +LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ +LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ +LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ +LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ +LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ +LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ +LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ +LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ +LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ +LIBSTARPU_LINK = @LIBSTARPU_LINK@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAGMA_CFLAGS = @MAGMA_CFLAGS@ +MAGMA_LIBS = @MAGMA_LIBS@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPICC_LDFLAGS = @MPICC_LDFLAGS@ +MPICXX = @MPICXX@ +MPIEXEC = @MPIEXEC@ +MPIEXEC_ARGS = @MPIEXEC_ARGS@ +MPIFORT = @MPIFORT@ +MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ +MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ +NM = @NM@ +NMAD_CFLAGS = @NMAD_CFLAGS@ +NMAD_LIBS = @NMAD_LIBS@ +NMEDIT = @NMEDIT@ +NVCC = @NVCC@ +NVCCFLAGS = @NVCCFLAGS@ +NVCC_CC = @NVCC_CC@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ +OPENBLAS_LIBS = @OPENBLAS_LIBS@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PAPI_CFLAGS = @PAPI_CFLAGS@ +PAPI_LIBS = @PAPI_LIBS@ +PARALLEL = @PARALLEL@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PKG_CONFIG = @PKG_CONFIG@ +POTI_CFLAGS = @POTI_CFLAGS@ +POTI_LIBS = @POTI_LIBS@ +PROG_CLANG = @PROG_CLANG@ +PROG_DATE = @PROG_DATE@ +PROG_FIND = @PROG_FIND@ +PROG_STAT = @PROG_STAT@ +PYTHON = @PYTHON@ +PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ +PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ +PYTHON_VERSION = @PYTHON_VERSION@ +RANLIB = @RANLIB@ +REALBASH = @REALBASH@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ +SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ +SIMGRID_LIBS = @SIMGRID_LIBS@ +SIMGRID_MC = @SIMGRID_MC@ +SLIC_CONFIG = @SLIC_CONFIG@ +SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ +SOCL_VENDORS = @SOCL_VENDORS@ +STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ +STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ +STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ +STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ +STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ +STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ +STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ +STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ +STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ +STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ +STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ +STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ +STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ +STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ +STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ +STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ +STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ +STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ +STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ +STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ +STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ +STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ +STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ +STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ +STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ +STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ +STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ +STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ +STARPU_LIB_PATH = @STARPU_LIB_PATH@ +STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ +STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ +STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ +STARPU_MS_LIB = @STARPU_MS_LIB@ +STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ +STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ +STARPU_OPENBLAS = @STARPU_OPENBLAS@ +STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ +STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ +STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ +STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ +STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ +STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ +STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ +STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ +STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ +STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ +STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ +STARPU_SRC_DIR = @STARPU_SRC_DIR@ +STARPU_USE_CPU = @STARPU_USE_CPU@ +STARPU_USE_CUDA = @STARPU_USE_CUDA@ +STARPU_USE_FXT = @STARPU_USE_FXT@ +STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ +STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ +STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ +STRIP = @STRIP@ +VERSION = @VERSION@ +XMKMF = @XMKMF@ +X_CFLAGS = @X_CFLAGS@ +X_EXTRA_LIBS = @X_EXTRA_LIBS@ +X_LIBS = @X_LIBS@ +X_PRE_LIBS = @X_PRE_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_ct_F77 = @ac_ct_F77@ +ac_ct_FC = @ac_ct_FC@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +doxygencommand = @doxygencommand@ +dvidir = @dvidir@ +eclipsepath = @eclipsepath@ +epstopdfcommand = @epstopdfcommand@ +exec_prefix = @exec_prefix@ +gitcommand = @gitcommand@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +hwloccalccommand = @hwloccalccommand@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +juliapath = @juliapath@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +mpicc_path = @mpicc_path@ +mpicxx_path = @mpicxx_path@ +mpiexec_path = @mpiexec_path@ +mpifort_path = @mpifort_path@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +pdflatexcommand = @pdflatexcommand@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +runstatedir = @runstatedir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target = @target@ +target_alias = @target_alias@ +target_cpu = @target_cpu@ +target_os = @target_os@ +target_vendor = @target_vendor@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +SUBDIRS = src examples benchmark +all: all-recursive + +.SUFFIXES: +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/make/starpu-subdirtests.mk $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign starpupy/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign starpupy/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; +$(top_srcdir)/make/starpu-subdirtests.mk $(am__empty): + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): +execute.sh: $(top_builddir)/config.status $(srcdir)/execute.sh.in + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +# This directory's subdirectories are mostly independent; you can cd +# into them and run 'make' without going through this Makefile. +# To change the values of 'make' variables: instead of editing Makefiles, +# (1) if the variable is set in 'config.status', edit 'config.status' +# (which will cause the Makefiles to be regenerated when you run 'make'); +# (2) otherwise, pass the desired values on the 'make' command line. +$(am__recursive_targets): + @fail=; \ + if $(am__make_keepgoing); then \ + failcom='fail=yes'; \ + else \ + failcom='exit 1'; \ + fi; \ + dot_seen=no; \ + target=`echo $@ | sed s/-recursive//`; \ + case "$@" in \ + distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ + *) list='$(SUBDIRS)' ;; \ + esac; \ + for subdir in $$list; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + dot_seen=yes; \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || eval $$failcom; \ + done; \ + if test "$$dot_seen" = "no"; then \ + $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ + fi; test -z "$$fail" + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-recursive +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ + include_option=--etags-include; \ + empty_fix=.; \ + else \ + include_option=--include; \ + empty_fix=; \ + fi; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + test ! -f $$subdir/TAGS || \ + set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ + fi; \ + done; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-recursive + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-recursive + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done + @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + $(am__make_dryrun) \ + || test -d "$(distdir)/$$subdir" \ + || $(MKDIR_P) "$(distdir)/$$subdir" \ + || exit 1; \ + dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ + $(am__relativize); \ + new_distdir=$$reldir; \ + dir1=$$subdir; dir2="$(top_distdir)"; \ + $(am__relativize); \ + new_top_distdir=$$reldir; \ + echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ + echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ + ($(am__cd) $$subdir && \ + $(MAKE) $(AM_MAKEFLAGS) \ + top_distdir="$$new_top_distdir" \ + distdir="$$new_distdir" \ + am__remove_distdir=: \ + am__skip_length_check=: \ + am__skip_mode_fix=: \ + distdir) \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-recursive +all-am: Makefile +installdirs: installdirs-recursive +installdirs-am: +install: install-recursive +install-exec: install-exec-recursive +install-data: install-data-recursive +uninstall: uninstall-recursive + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-recursive +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-recursive + +clean-am: clean-generic clean-libtool mostlyclean-am + +distclean: distclean-recursive + -rm -f Makefile +distclean-am: clean-am distclean-generic distclean-tags + +dvi: dvi-recursive + +dvi-am: + +html: html-recursive + +html-am: + +info: info-recursive + +info-am: + +install-data-am: + +install-dvi: install-dvi-recursive + +install-dvi-am: + +install-exec-am: + +install-html: install-html-recursive + +install-html-am: + +install-info: install-info-recursive + +install-info-am: + +install-man: + +install-pdf: install-pdf-recursive + +install-pdf-am: + +install-ps: install-ps-recursive + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-recursive + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-recursive + +mostlyclean-am: mostlyclean-generic mostlyclean-libtool + +pdf: pdf-recursive + +pdf-am: + +ps: ps-recursive + +ps-am: + +uninstall-am: + +.MAKE: $(am__recursive_targets) install-am install-strip + +.PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am check \ + check-am clean clean-generic clean-libtool cscopelist-am ctags \ + ctags-am distclean distclean-generic distclean-libtool \ + distclean-tags distdir dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-html \ + install-html-am install-info install-info-am install-man \ + install-pdf install-pdf-am install-ps install-ps-am \ + install-strip installcheck installcheck-am installdirs \ + installdirs-am maintainer-clean maintainer-clean-generic \ + mostlyclean mostlyclean-generic mostlyclean-libtool pdf pdf-am \ + ps ps-am tags tags-am uninstall uninstall-am + +.PRECIOUS: Makefile + + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +recheck: + RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i recheck || RET=1 ; \ + done ; \ + exit $$RET + +showcheckfailed: + @RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i showcheckfailed || RET=1 ; \ + done ; \ + exit $$RET + +showfailed: + @RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -s -C $$i showfailed || RET=1 ; \ + done ; \ + exit $$RET + +showcheck: + RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i showcheck || RET=1 ; \ + done ; \ + exit $$RET + +showsuite: + RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i showsuite || RET=1 ; \ + done ; \ + exit $$RET + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/starpupy/benchmark/Makefile.am b/starpupy/benchmark/Makefile.am new file mode 100644 index 0000000..60b70bf --- /dev/null +++ b/starpupy/benchmark/Makefile.am @@ -0,0 +1,41 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +include $(top_srcdir)/make/starpu-tests.mk + +SUBDIRS = + +CLEANFILES = *.gcno *.gcda *.linkinfo + +EXTRA_DIST = \ + tasks_size_overhead.py \ + tasks_size_overhead.sh \ + tasks_size_overhead.gp \ + test_handle_perf.py \ + test_handle_perf.sh \ + test_handle_perf_pickle.py \ + test_handle_perf_pickle.sh \ + test_handle_bench.py \ + handle_perf_plot_pickle.py \ + handle_perf_plot.py + +python_sourcesdir = $(libdir)/starpu/python +dist_python_sources_DATA = \ + tasks_size_overhead.py \ + test_handle_perf.py \ + test_handle_perf_pickle.py + +TESTS = diff --git a/starpupy/benchmark/Makefile.in b/starpupy/benchmark/Makefile.in new file mode 100644 index 0000000..fd008f0 --- /dev/null +++ b/starpupy/benchmark/Makefile.in @@ -0,0 +1,1409 @@ +# Makefile.in generated by automake 1.16.5 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2021 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +target_triplet = @target@ +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@am__append_1 = --compiler-options -fno-strict-aliasing -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ $(STARPU_NVCC_H_CPPFLAGS) +@STARPU_USE_HIP_TRUE@am__append_2 = -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ +TESTS = +subdir = starpupy/benchmark +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ + $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ + $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ + $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(dist_python_sources_DATA) \ + $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/src/common/config.h \ + $(top_builddir)/src/common/config-src-build.h \ + $(top_builddir)/include/starpu_config.h \ + $(top_builddir)/starpurm/include/starpurm_config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +SOURCES = +DIST_SOURCES = +RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \ + ctags-recursive dvi-recursive html-recursive info-recursive \ + install-data-recursive install-dvi-recursive \ + install-exec-recursive install-html-recursive \ + install-info-recursive install-pdf-recursive \ + install-ps-recursive install-recursive installcheck-recursive \ + installdirs-recursive pdf-recursive ps-recursive \ + tags-recursive uninstall-recursive +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__installdirs = "$(DESTDIR)$(python_sourcesdir)" +DATA = $(dist_python_sources_DATA) +RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ + distclean-recursive maintainer-clean-recursive +am__recursive_targets = \ + $(RECURSIVE_TARGETS) \ + $(RECURSIVE_CLEAN_TARGETS) \ + $(am__extra_recursive_targets) +AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \ + check recheck distdir distdir-am +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +am__tty_colors_dummy = \ + mgn= red= grn= lgn= blu= brg= std=; \ + am__color_tests=no +am__tty_colors = { \ + $(am__tty_colors_dummy); \ + if test "X$(AM_COLOR_TESTS)" = Xno; then \ + am__color_tests=no; \ + elif test "X$(AM_COLOR_TESTS)" = Xalways; then \ + am__color_tests=yes; \ + elif test "X$$TERM" != Xdumb && { test -t 1; } 2>/dev/null; then \ + am__color_tests=yes; \ + fi; \ + if test $$am__color_tests = yes; then \ + red=''; \ + grn=''; \ + lgn=''; \ + blu=''; \ + mgn=''; \ + brg=''; \ + std=''; \ + fi; \ +} +am__recheck_rx = ^[ ]*:recheck:[ ]* +am__global_test_result_rx = ^[ ]*:global-test-result:[ ]* +am__copy_in_global_log_rx = ^[ ]*:copy-in-global-log:[ ]* +# A command that, given a newline-separated list of test names on the +# standard input, print the name of the tests that are to be re-run +# upon "make recheck". +am__list_recheck_tests = $(AWK) '{ \ + recheck = 1; \ + while ((rc = (getline line < ($$0 ".trs"))) != 0) \ + { \ + if (rc < 0) \ + { \ + if ((getline line2 < ($$0 ".log")) < 0) \ + recheck = 0; \ + break; \ + } \ + else if (line ~ /$(am__recheck_rx)[nN][Oo]/) \ + { \ + recheck = 0; \ + break; \ + } \ + else if (line ~ /$(am__recheck_rx)[yY][eE][sS]/) \ + { \ + break; \ + } \ + }; \ + if (recheck) \ + print $$0; \ + close ($$0 ".trs"); \ + close ($$0 ".log"); \ +}' +# A command that, given a newline-separated list of test names on the +# standard input, create the global log from their .trs and .log files. +am__create_global_log = $(AWK) ' \ +function fatal(msg) \ +{ \ + print "fatal: making $@: " msg | "cat >&2"; \ + exit 1; \ +} \ +function rst_section(header) \ +{ \ + print header; \ + len = length(header); \ + for (i = 1; i <= len; i = i + 1) \ + printf "="; \ + printf "\n\n"; \ +} \ +{ \ + copy_in_global_log = 1; \ + global_test_result = "RUN"; \ + while ((rc = (getline line < ($$0 ".trs"))) != 0) \ + { \ + if (rc < 0) \ + fatal("failed to read from " $$0 ".trs"); \ + if (line ~ /$(am__global_test_result_rx)/) \ + { \ + sub("$(am__global_test_result_rx)", "", line); \ + sub("[ ]*$$", "", line); \ + global_test_result = line; \ + } \ + else if (line ~ /$(am__copy_in_global_log_rx)[nN][oO]/) \ + copy_in_global_log = 0; \ + }; \ + if (copy_in_global_log) \ + { \ + rst_section(global_test_result ": " $$0); \ + while ((rc = (getline line < ($$0 ".log"))) != 0) \ + { \ + if (rc < 0) \ + fatal("failed to read from " $$0 ".log"); \ + print line; \ + }; \ + printf "\n"; \ + }; \ + close ($$0 ".trs"); \ + close ($$0 ".log"); \ +}' +# Restructured Text title. +am__rst_title = { sed 's/.*/ & /;h;s/./=/g;p;x;s/ *$$//;p;g' && echo; } +# Solaris 10 'make', and several other traditional 'make' implementations, +# pass "-e" to $(SHELL), and POSIX 2008 even requires this. Work around it +# by disabling -e (using the XSI extension "set +e") if it's set. +am__sh_e_setup = case $$- in *e*) set +e;; esac +# Default flags passed to test drivers. +am__common_driver_flags = \ + --color-tests "$$am__color_tests" \ + --enable-hard-errors "$$am__enable_hard_errors" \ + --expect-failure "$$am__expect_failure" +# To be inserted before the command running the test. Creates the +# directory for the log if needed. Stores in $dir the directory +# containing $f, in $tst the test, in $log the log. Executes the +# developer- defined test setup AM_TESTS_ENVIRONMENT (if any), and +# passes TESTS_ENVIRONMENT. Set up options for the wrapper that +# will run the test scripts (or their associated LOG_COMPILER, if +# thy have one). +am__check_pre = \ +$(am__sh_e_setup); \ +$(am__vpath_adj_setup) $(am__vpath_adj) \ +$(am__tty_colors); \ +srcdir=$(srcdir); export srcdir; \ +case "$@" in \ + */*) am__odir=`echo "./$@" | sed 's|/[^/]*$$||'`;; \ + *) am__odir=.;; \ +esac; \ +test "x$$am__odir" = x"." || test -d "$$am__odir" \ + || $(MKDIR_P) "$$am__odir" || exit $$?; \ +if test -f "./$$f"; then dir=./; \ +elif test -f "$$f"; then dir=; \ +else dir="$(srcdir)/"; fi; \ +tst=$$dir$$f; log='$@'; \ +if test -n '$(DISABLE_HARD_ERRORS)'; then \ + am__enable_hard_errors=no; \ +else \ + am__enable_hard_errors=yes; \ +fi; \ +case " $(XFAIL_TESTS) " in \ + *[\ \ ]$$f[\ \ ]* | *[\ \ ]$$dir$$f[\ \ ]*) \ + am__expect_failure=yes;; \ + *) \ + am__expect_failure=no;; \ +esac; \ +$(AM_TESTS_ENVIRONMENT) $(TESTS_ENVIRONMENT) +# A shell command to get the names of the tests scripts with any registered +# extension removed (i.e., equivalently, the names of the test logs, with +# the '.log' extension removed). The result is saved in the shell variable +# '$bases'. This honors runtime overriding of TESTS and TEST_LOGS. Sadly, +# we cannot use something simpler, involving e.g., "$(TEST_LOGS:.log=)", +# since that might cause problem with VPATH rewrites for suffix-less tests. +# See also 'test-harness-vpath-rewrite.sh' and 'test-trs-basic.sh'. +am__set_TESTS_bases = \ + bases='$(TEST_LOGS)'; \ + bases=`for i in $$bases; do echo $$i; done | sed 's/\.log$$//'`; \ + bases=`echo $$bases` +AM_TESTSUITE_SUMMARY_HEADER = ' for $(PACKAGE_STRING)' +RECHECK_LOGS = $(TEST_LOGS) +TEST_SUITE_LOG = test-suite.log +TEST_EXTENSIONS = @EXEEXT@ .test +am__test_logs1 = $(TESTS:=.log) +am__test_logs2 = $(am__test_logs1:@EXEEXT@.log=.log) +TEST_LOGS = $(am__test_logs2:.test.log=.log) +TEST_LOG_DRIVER = $(SHELL) $(top_srcdir)/build-aux/test-driver +TEST_LOG_COMPILE = $(TEST_LOG_COMPILER) $(AM_TEST_LOG_FLAGS) \ + $(TEST_LOG_FLAGS) +am__set_b = \ + case '$@' in \ + */*) \ + case '$*' in \ + */*) b='$*';; \ + *) b=`echo '$@' | sed 's/\.log$$//'`; \ + esac;; \ + *) \ + b='$*';; \ + esac +DIST_SUBDIRS = $(SUBDIRS) +am__DIST_COMMON = $(srcdir)/Makefile.in \ + $(top_srcdir)/build-aux/test-driver \ + $(top_srcdir)/make/starpu-tests.mk \ + $(top_srcdir)/make/starpu.mk +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +am__relativize = \ + dir0=`pwd`; \ + sed_first='s,^\([^/]*\)/.*$$,\1,'; \ + sed_rest='s,^[^/]*/*,,'; \ + sed_last='s,^.*/\([^/]*\)$$,\1,'; \ + sed_butlast='s,/*[^/]*$$,,'; \ + while test -n "$$dir1"; do \ + first=`echo "$$dir1" | sed -e "$$sed_first"`; \ + if test "$$first" != "."; then \ + if test "$$first" = ".."; then \ + dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ + dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ + else \ + first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ + if test "$$first2" = "$$first"; then \ + dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ + else \ + dir2="../$$dir2"; \ + fi; \ + dir0="$$dir0"/"$$first"; \ + fi; \ + fi; \ + dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ + done; \ + reldir="$$dir2" +pkglibdir = @pkglibdir@ +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +APP_CFLAGS = @APP_CFLAGS@ +APP_CXXFLAGS = @APP_CXXFLAGS@ +APP_FCFLAGS = @APP_FCFLAGS@ +APP_FFLAGS = @APP_FFLAGS@ +AR = @AR@ +AS = @AS@ +ATLASDIR = @ATLASDIR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BLAS_LIB = @BLAS_LIB@ +BLAS_LIBS = @BLAS_LIBS@ +BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ +BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CC_OR_MPICC = @CC_OR_MPICC@ +CC_OR_NVCC = @CC_OR_NVCC@ +CFLAGS = @CFLAGS@ +COVERAGE = @COVERAGE@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CSCOPE = @CSCOPE@ +CTAGS = @CTAGS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DGELS_LIBS = @DGELS_LIBS@ +DLB_CFLAGS = @DLB_CFLAGS@ +DLB_LIBS = @DLB_LIBS@ +DLLTOOL = @DLLTOOL@ +DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +ECLIPSE = @ECLIPSE@ +EGREP = @EGREP@ +ETAGS = @ETAGS@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FC = @FC@ +FCFLAGS = @FCFLAGS@ +FFLAGS = @FFLAGS@ +FFTWF_CFLAGS = @FFTWF_CFLAGS@ +FFTWF_LIBS = @FFTWF_LIBS@ +FFTWL_CFLAGS = @FFTWL_CFLAGS@ +FFTWL_LIBS = @FFTWL_LIBS@ +FFTW_CFLAGS = @FFTW_CFLAGS@ +FFTW_LIBS = @FFTW_LIBS@ +FGREP = @FGREP@ +FILECMD = @FILECMD@ +FXTDIR = @FXTDIR@ +FXT_CFLAGS = @FXT_CFLAGS@ +FXT_LDFLAGS = @FXT_LDFLAGS@ +FXT_LIBS = @FXT_LIBS@ +GDB = @GDB@ +GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ +GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ +GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ +GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ +GOTODIR = @GOTODIR@ +GREP = @GREP@ +HAVE_CXX11 = @HAVE_CXX11@ +HAVE_FFTWFL = @HAVE_FFTWFL@ +HELP2MAN = @HELP2MAN@ +HIPCC = @HIPCC@ +HIPCCFLAGS = @HIPCCFLAGS@ $(am__append_2) +HIPCONFIG = @HIPCONFIG@ +HWLOC_CFLAGS = @HWLOC_CFLAGS@ +HWLOC_LIBS = @HWLOC_LIBS@ +HWLOC_REQUIRES = @HWLOC_REQUIRES@ +ICC = @ICC@ +ICC_ARGS = @ICC_ARGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JULIA = @JULIA@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ +LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ +LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ +LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ +LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ +LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ +LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ +LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ +LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ +LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ +LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ +LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ +LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ +LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ +LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ +LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ +LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ +LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ +LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ +LIBSTARPU_LINK = @LIBSTARPU_LINK@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAGMA_CFLAGS = @MAGMA_CFLAGS@ +MAGMA_LIBS = @MAGMA_LIBS@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPICC_LDFLAGS = @MPICC_LDFLAGS@ +MPICXX = @MPICXX@ +MPIEXEC = @MPIEXEC@ +MPIEXEC_ARGS = @MPIEXEC_ARGS@ +MPIFORT = @MPIFORT@ +MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ +MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ +NM = @NM@ +NMAD_CFLAGS = @NMAD_CFLAGS@ +NMAD_LIBS = @NMAD_LIBS@ +NMEDIT = @NMEDIT@ +NVCC = @NVCC@ +NVCCFLAGS = @NVCCFLAGS@ $(am__append_1) +NVCC_CC = @NVCC_CC@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ +OPENBLAS_LIBS = @OPENBLAS_LIBS@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PAPI_CFLAGS = @PAPI_CFLAGS@ +PAPI_LIBS = @PAPI_LIBS@ +PARALLEL = @PARALLEL@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PKG_CONFIG = @PKG_CONFIG@ +POTI_CFLAGS = @POTI_CFLAGS@ +POTI_LIBS = @POTI_LIBS@ +PROG_CLANG = @PROG_CLANG@ +PROG_DATE = @PROG_DATE@ +PROG_FIND = @PROG_FIND@ +PROG_STAT = @PROG_STAT@ +PYTHON = @PYTHON@ +PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ +PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ +PYTHON_VERSION = @PYTHON_VERSION@ +RANLIB = @RANLIB@ +REALBASH = @REALBASH@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ +SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ +SIMGRID_LIBS = @SIMGRID_LIBS@ +SIMGRID_MC = @SIMGRID_MC@ +SLIC_CONFIG = @SLIC_CONFIG@ +SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ +SOCL_VENDORS = @SOCL_VENDORS@ +STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ +STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ +STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ +STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ +STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ +STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ +STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ +STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ +STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ +STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ +STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ +STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ +STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ +STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ +STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ +STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ +STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ +STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ +STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ +STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ +STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ +STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ +STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ +STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ +STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ +STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ +STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ +STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ +STARPU_LIB_PATH = @STARPU_LIB_PATH@ +STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ +STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ +STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ +STARPU_MS_LIB = @STARPU_MS_LIB@ +STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ +STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ +STARPU_OPENBLAS = @STARPU_OPENBLAS@ +STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ +STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ +STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ +STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ +STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ +STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ +STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ +STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ +STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ +STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ +STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ +STARPU_SRC_DIR = @STARPU_SRC_DIR@ +STARPU_USE_CPU = @STARPU_USE_CPU@ +STARPU_USE_CUDA = @STARPU_USE_CUDA@ +STARPU_USE_FXT = @STARPU_USE_FXT@ +STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ +STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ +STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ +STRIP = @STRIP@ +VERSION = @VERSION@ +XMKMF = @XMKMF@ +X_CFLAGS = @X_CFLAGS@ +X_EXTRA_LIBS = @X_EXTRA_LIBS@ +X_LIBS = @X_LIBS@ +X_PRE_LIBS = @X_PRE_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_ct_F77 = @ac_ct_F77@ +ac_ct_FC = @ac_ct_FC@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +doxygencommand = @doxygencommand@ +dvidir = @dvidir@ +eclipsepath = @eclipsepath@ +epstopdfcommand = @epstopdfcommand@ +exec_prefix = @exec_prefix@ +gitcommand = @gitcommand@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +hwloccalccommand = @hwloccalccommand@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +juliapath = @juliapath@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +mpicc_path = @mpicc_path@ +mpicxx_path = @mpicxx_path@ +mpiexec_path = @mpiexec_path@ +mpifort_path = @mpifort_path@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +pdflatexcommand = @pdflatexcommand@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +runstatedir = @runstatedir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target = @target@ +target_alias = @target_alias@ +target_cpu = @target_cpu@ +target_os = @target_os@ +target_vendor = @target_vendor@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +LAUNCHER_ENV = +LAUNCHER = +AM_CFLAGS = $(GLOBAL_AM_CFLAGS) +AM_CXXFLAGS = $(GLOBAL_AM_CXXFLAGS) +AM_FFLAGS = $(GLOBAL_AM_FFLAGS) +AM_FCFLAGS = $(GLOBAL_AM_FCFLAGS) +@STARPU_USE_CUDA_TRUE@V_nvcc_ = $(V_nvcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_USE_CUDA_TRUE@V_nvcc_0 = @echo " NVCC " $@; +@STARPU_USE_CUDA_TRUE@V_nvcc_1 = +@STARPU_USE_CUDA_TRUE@V_nvcc = $(V_nvcc_$(V)) + +# Avoid using nvcc when making a coverity build, nvcc produces millions of +# lines of code which we don't want to analyze. Instead, build dumb .o files +# containing empty functions. +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_ = $(V_mynvcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_0 = @echo " myNVCC " $@; +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_1 = +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc = $(V_mynvcc_$(V)) +@STARPU_USE_HIP_TRUE@V_hipcc_ = $(V_hipcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_USE_HIP_TRUE@V_hipcc_0 = @echo " HIPCC " $@; +@STARPU_USE_HIP_TRUE@V_hipcc_1 = +@STARPU_USE_HIP_TRUE@V_hipcc = $(V_hipcc_$(V)) +V_icc_ = $(V_icc_$(AM_DEFAULT_VERBOSITY)) +V_icc_0 = @echo " ICC " $@; +V_icc_1 = +V_icc = $(V_icc_$(V)) +V_ln_ = $(V_ln_$(AM_DEFAULT_VERBOSITY)) +V_ln_0 = @echo " LN " $@; +V_ln_1 = +V_ln = $(V_ln_$(V)) +V_help2man_ = $(V_help2man_$(AM_DEFAULT_VERBOSITY)) +V_help2man_0 = @echo " HELP2MAN" $@; +V_help2man_1 = +V_help2man = $(V_help2man_$(V)) +# These are always defined, both for starpu-mpi and for mpi-ms +# For MPI tests we don't want to oversubscribe the system +MPI_RUN_ENV = STARPU_WORKERS_GETBIND=0 STARPU_WORKERS_NOBIND=1 STARPU_NCPU=3 +@STARPU_SIMGRID_FALSE@STARPU_MPIEXEC = $(MPIEXEC) $(MPIEXEC_ARGS) -np $(STARPU_MPI_NP) +@STARPU_SIMGRID_TRUE@STARPU_MPIEXEC = $(abs_top_builddir)/tools/starpu_smpirun -np $(STARPU_MPI_NP) -platform $(abs_top_srcdir)/tools/perfmodels/cluster.xml -hostfile $(abs_top_srcdir)/tools/perfmodels/hostfile +SUBDIRS = +CLEANFILES = *.gcno *.gcda *.linkinfo +EXTRA_DIST = \ + tasks_size_overhead.py \ + tasks_size_overhead.sh \ + tasks_size_overhead.gp \ + test_handle_perf.py \ + test_handle_perf.sh \ + test_handle_perf_pickle.py \ + test_handle_perf_pickle.sh \ + test_handle_bench.py \ + handle_perf_plot_pickle.py \ + handle_perf_plot.py + +python_sourcesdir = $(libdir)/starpu/python +dist_python_sources_DATA = \ + tasks_size_overhead.py \ + test_handle_perf.py \ + test_handle_perf_pickle.py + +all: all-recursive + +.SUFFIXES: +.SUFFIXES: .cu .cubin .hip .log .o .test .test$(EXEEXT) .trs +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/make/starpu-tests.mk $(top_srcdir)/make/starpu.mk $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign starpupy/benchmark/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign starpupy/benchmark/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; +$(top_srcdir)/make/starpu-tests.mk $(top_srcdir)/make/starpu.mk $(am__empty): + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs +install-dist_python_sourcesDATA: $(dist_python_sources_DATA) + @$(NORMAL_INSTALL) + @list='$(dist_python_sources_DATA)'; test -n "$(python_sourcesdir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(python_sourcesdir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(python_sourcesdir)" || exit 1; \ + fi; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; \ + done | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(python_sourcesdir)'"; \ + $(INSTALL_DATA) $$files "$(DESTDIR)$(python_sourcesdir)" || exit $$?; \ + done + +uninstall-dist_python_sourcesDATA: + @$(NORMAL_UNINSTALL) + @list='$(dist_python_sources_DATA)'; test -n "$(python_sourcesdir)" || list=; \ + files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ + dir='$(DESTDIR)$(python_sourcesdir)'; $(am__uninstall_files_from_dir) + +# This directory's subdirectories are mostly independent; you can cd +# into them and run 'make' without going through this Makefile. +# To change the values of 'make' variables: instead of editing Makefiles, +# (1) if the variable is set in 'config.status', edit 'config.status' +# (which will cause the Makefiles to be regenerated when you run 'make'); +# (2) otherwise, pass the desired values on the 'make' command line. +$(am__recursive_targets): + @fail=; \ + if $(am__make_keepgoing); then \ + failcom='fail=yes'; \ + else \ + failcom='exit 1'; \ + fi; \ + dot_seen=no; \ + target=`echo $@ | sed s/-recursive//`; \ + case "$@" in \ + distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ + *) list='$(SUBDIRS)' ;; \ + esac; \ + for subdir in $$list; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + dot_seen=yes; \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || eval $$failcom; \ + done; \ + if test "$$dot_seen" = "no"; then \ + $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ + fi; test -z "$$fail" + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-recursive +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ + include_option=--etags-include; \ + empty_fix=.; \ + else \ + include_option=--include; \ + empty_fix=; \ + fi; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + test ! -f $$subdir/TAGS || \ + set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ + fi; \ + done; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-recursive + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-recursive + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +# Recover from deleted '.trs' file; this should ensure that +# "rm -f foo.log; make foo.trs" re-run 'foo.test', and re-create +# both 'foo.log' and 'foo.trs'. Break the recipe in two subshells +# to avoid problems with "make -n". +.log.trs: + rm -f $< $@ + $(MAKE) $(AM_MAKEFLAGS) $< + +# Leading 'am--fnord' is there to ensure the list of targets does not +# expand to empty, as could happen e.g. with make check TESTS=''. +am--fnord $(TEST_LOGS) $(TEST_LOGS:.log=.trs): $(am__force_recheck) +am--force-recheck: + @: + +$(TEST_SUITE_LOG): $(TEST_LOGS) + @$(am__set_TESTS_bases); \ + am__f_ok () { test -f "$$1" && test -r "$$1"; }; \ + redo_bases=`for i in $$bases; do \ + am__f_ok $$i.trs && am__f_ok $$i.log || echo $$i; \ + done`; \ + if test -n "$$redo_bases"; then \ + redo_logs=`for i in $$redo_bases; do echo $$i.log; done`; \ + redo_results=`for i in $$redo_bases; do echo $$i.trs; done`; \ + if $(am__make_dryrun); then :; else \ + rm -f $$redo_logs && rm -f $$redo_results || exit 1; \ + fi; \ + fi; \ + if test -n "$$am__remaking_logs"; then \ + echo "fatal: making $(TEST_SUITE_LOG): possible infinite" \ + "recursion detected" >&2; \ + elif test -n "$$redo_logs"; then \ + am__remaking_logs=yes $(MAKE) $(AM_MAKEFLAGS) $$redo_logs; \ + fi; \ + if $(am__make_dryrun); then :; else \ + st=0; \ + errmsg="fatal: making $(TEST_SUITE_LOG): failed to create"; \ + for i in $$redo_bases; do \ + test -f $$i.trs && test -r $$i.trs \ + || { echo "$$errmsg $$i.trs" >&2; st=1; }; \ + test -f $$i.log && test -r $$i.log \ + || { echo "$$errmsg $$i.log" >&2; st=1; }; \ + done; \ + test $$st -eq 0 || exit 1; \ + fi + @$(am__sh_e_setup); $(am__tty_colors); $(am__set_TESTS_bases); \ + ws='[ ]'; \ + results=`for b in $$bases; do echo $$b.trs; done`; \ + test -n "$$results" || results=/dev/null; \ + all=` grep "^$$ws*:test-result:" $$results | wc -l`; \ + pass=` grep "^$$ws*:test-result:$$ws*PASS" $$results | wc -l`; \ + fail=` grep "^$$ws*:test-result:$$ws*FAIL" $$results | wc -l`; \ + skip=` grep "^$$ws*:test-result:$$ws*SKIP" $$results | wc -l`; \ + xfail=`grep "^$$ws*:test-result:$$ws*XFAIL" $$results | wc -l`; \ + xpass=`grep "^$$ws*:test-result:$$ws*XPASS" $$results | wc -l`; \ + error=`grep "^$$ws*:test-result:$$ws*ERROR" $$results | wc -l`; \ + if test `expr $$fail + $$xpass + $$error` -eq 0; then \ + success=true; \ + else \ + success=false; \ + fi; \ + br='==================='; br=$$br$$br$$br$$br; \ + result_count () \ + { \ + if test x"$$1" = x"--maybe-color"; then \ + maybe_colorize=yes; \ + elif test x"$$1" = x"--no-color"; then \ + maybe_colorize=no; \ + else \ + echo "$@: invalid 'result_count' usage" >&2; exit 4; \ + fi; \ + shift; \ + desc=$$1 count=$$2; \ + if test $$maybe_colorize = yes && test $$count -gt 0; then \ + color_start=$$3 color_end=$$std; \ + else \ + color_start= color_end=; \ + fi; \ + echo "$${color_start}# $$desc $$count$${color_end}"; \ + }; \ + create_testsuite_report () \ + { \ + result_count $$1 "TOTAL:" $$all "$$brg"; \ + result_count $$1 "PASS: " $$pass "$$grn"; \ + result_count $$1 "SKIP: " $$skip "$$blu"; \ + result_count $$1 "XFAIL:" $$xfail "$$lgn"; \ + result_count $$1 "FAIL: " $$fail "$$red"; \ + result_count $$1 "XPASS:" $$xpass "$$red"; \ + result_count $$1 "ERROR:" $$error "$$mgn"; \ + }; \ + { \ + echo "$(PACKAGE_STRING): $(subdir)/$(TEST_SUITE_LOG)" | \ + $(am__rst_title); \ + create_testsuite_report --no-color; \ + echo; \ + echo ".. contents:: :depth: 2"; \ + echo; \ + for b in $$bases; do echo $$b; done \ + | $(am__create_global_log); \ + } >$(TEST_SUITE_LOG).tmp || exit 1; \ + mv $(TEST_SUITE_LOG).tmp $(TEST_SUITE_LOG); \ + if $$success; then \ + col="$$grn"; \ + else \ + col="$$red"; \ + test x"$$VERBOSE" = x || cat $(TEST_SUITE_LOG); \ + fi; \ + echo "$${col}$$br$${std}"; \ + echo "$${col}Testsuite summary"$(AM_TESTSUITE_SUMMARY_HEADER)"$${std}"; \ + echo "$${col}$$br$${std}"; \ + create_testsuite_report --maybe-color; \ + echo "$$col$$br$$std"; \ + if $$success; then :; else \ + echo "$${col}See $(subdir)/$(TEST_SUITE_LOG)$${std}"; \ + if test -n "$(PACKAGE_BUGREPORT)"; then \ + echo "$${col}Please report to $(PACKAGE_BUGREPORT)$${std}"; \ + fi; \ + echo "$$col$$br$$std"; \ + fi; \ + $$success || exit 1 + +check-TESTS: + @list='$(RECHECK_LOGS)'; test -z "$$list" || rm -f $$list + @list='$(RECHECK_LOGS:.log=.trs)'; test -z "$$list" || rm -f $$list + @test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) + @set +e; $(am__set_TESTS_bases); \ + log_list=`for i in $$bases; do echo $$i.log; done`; \ + trs_list=`for i in $$bases; do echo $$i.trs; done`; \ + log_list=`echo $$log_list`; trs_list=`echo $$trs_list`; \ + $(MAKE) $(AM_MAKEFLAGS) $(TEST_SUITE_LOG) TEST_LOGS="$$log_list"; \ + exit $$?; +recheck: all + @test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) + @set +e; $(am__set_TESTS_bases); \ + bases=`for i in $$bases; do echo $$i; done \ + | $(am__list_recheck_tests)` || exit 1; \ + log_list=`for i in $$bases; do echo $$i.log; done`; \ + log_list=`echo $$log_list`; \ + $(MAKE) $(AM_MAKEFLAGS) $(TEST_SUITE_LOG) \ + am__force_recheck=am--force-recheck \ + TEST_LOGS="$$log_list"; \ + exit $$? +.test.log: + @p='$<'; \ + $(am__set_b); \ + $(am__check_pre) $(TEST_LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_TEST_LOG_DRIVER_FLAGS) $(TEST_LOG_DRIVER_FLAGS) -- $(TEST_LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +@am__EXEEXT_TRUE@.test$(EXEEXT).log: +@am__EXEEXT_TRUE@ @p='$<'; \ +@am__EXEEXT_TRUE@ $(am__set_b); \ +@am__EXEEXT_TRUE@ $(am__check_pre) $(TEST_LOG_DRIVER) --test-name "$$f" \ +@am__EXEEXT_TRUE@ --log-file $$b.log --trs-file $$b.trs \ +@am__EXEEXT_TRUE@ $(am__common_driver_flags) $(AM_TEST_LOG_DRIVER_FLAGS) $(TEST_LOG_DRIVER_FLAGS) -- $(TEST_LOG_COMPILE) \ +@am__EXEEXT_TRUE@ "$$tst" $(AM_TESTS_FD_REDIRECT) +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done + @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + $(am__make_dryrun) \ + || test -d "$(distdir)/$$subdir" \ + || $(MKDIR_P) "$(distdir)/$$subdir" \ + || exit 1; \ + dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ + $(am__relativize); \ + new_distdir=$$reldir; \ + dir1=$$subdir; dir2="$(top_distdir)"; \ + $(am__relativize); \ + new_top_distdir=$$reldir; \ + echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ + echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ + ($(am__cd) $$subdir && \ + $(MAKE) $(AM_MAKEFLAGS) \ + top_distdir="$$new_top_distdir" \ + distdir="$$new_distdir" \ + am__remove_distdir=: \ + am__skip_length_check=: \ + am__skip_mode_fix=: \ + distdir) \ + || exit 1; \ + fi; \ + done +check-am: all-am + $(MAKE) $(AM_MAKEFLAGS) check-TESTS +check: check-recursive +all-am: Makefile $(DATA) +installdirs: installdirs-recursive +installdirs-am: + for dir in "$(DESTDIR)$(python_sourcesdir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-recursive +install-exec: install-exec-recursive +install-data: install-data-recursive +uninstall: uninstall-recursive + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-recursive +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + -test -z "$(TEST_LOGS)" || rm -f $(TEST_LOGS) + -test -z "$(TEST_LOGS:.log=.trs)" || rm -f $(TEST_LOGS:.log=.trs) + -test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) + +clean-generic: + -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-recursive + +clean-am: clean-generic clean-libtool mostlyclean-am + +distclean: distclean-recursive + -rm -f Makefile +distclean-am: clean-am distclean-generic distclean-tags + +dvi: dvi-recursive + +dvi-am: + +html: html-recursive + +html-am: + +info: info-recursive + +info-am: + +install-data-am: install-dist_python_sourcesDATA + +install-dvi: install-dvi-recursive + +install-dvi-am: + +install-exec-am: + +install-html: install-html-recursive + +install-html-am: + +install-info: install-info-recursive + +install-info-am: + +install-man: + +install-pdf: install-pdf-recursive + +install-pdf-am: + +install-ps: install-ps-recursive + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-recursive + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-recursive + +mostlyclean-am: mostlyclean-generic mostlyclean-libtool + +pdf: pdf-recursive + +pdf-am: + +ps: ps-recursive + +ps-am: + +uninstall-am: uninstall-dist_python_sourcesDATA + +.MAKE: $(am__recursive_targets) check-am install-am install-strip + +.PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am check \ + check-TESTS check-am clean clean-generic clean-libtool \ + cscopelist-am ctags ctags-am distclean distclean-generic \ + distclean-libtool distclean-tags distdir dvi dvi-am html \ + html-am info info-am install install-am install-data \ + install-data-am install-dist_python_sourcesDATA install-dvi \ + install-dvi-am install-exec install-exec-am install-html \ + install-html-am install-info install-info-am install-man \ + install-pdf install-pdf-am install-ps install-ps-am \ + install-strip installcheck installcheck-am installdirs \ + installdirs-am maintainer-clean maintainer-clean-generic \ + mostlyclean mostlyclean-generic mostlyclean-libtool pdf pdf-am \ + ps ps-am recheck tags tags-am uninstall uninstall-am \ + uninstall-dist_python_sourcesDATA + +.PRECIOUS: Makefile + +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@.cu.o: +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ @$(MKDIR_P) `dirname $@` +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ $(V_mynvcc)grep 'extern *"C" *void *' $< | sed -ne 's/extern *"C" *void *\([a-zA-Z0-9_]*\) *(.*/void \1(void) {}/p' | $(CC) -x c - -o $@ -c + +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.cubin: +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) -cubin $< -o $@ $(NVCCFLAGS) + +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.o: +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) $< -c -o $@ $(NVCCFLAGS) +@STARPU_USE_HIP_TRUE@.hip.o: +@STARPU_USE_HIP_TRUE@ $(V_hipcc) $(HIPCC) $< -c -o $@ $(HIPCCFLAGS) + +STARPU_MPI_NP ?= 4 + +showcheckfailed: + @ for x in $(shell grep -l "^FAIL " $(TEST_LOGS) /dev/null 2>/dev/null) ; do cat $$x ; done + @RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i showcheckfailed || RET=1 ; \ + done ; \ + exit $$RET + +showfailed: + @! grep "^FAIL " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "ERROR: AddressSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "WARNING: AddressSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "ERROR: ThreadSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "WARNING: ThreadSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "ERROR: LeakSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "WARNING: LeakSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l " runtime error: " $(TEST_LOGS) /dev/null 2>/dev/null + @RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -s -C $$i showfailed || RET=1 ; \ + done ; \ + exit $$RET + +showcheck: + -cat $(TEST_LOGS) /dev/null + @! grep -q "ERROR: AddressSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q "WARNING: AddressSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q "ERROR: ThreadSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q "WARNING: ThreadSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q "ERROR: LeakSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q "WARNING: LeakSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q " runtime error: " $(TEST_LOGS) /dev/null + RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i showcheck || RET=1 ; \ + done ; \ + exit $$RET + +showsuite: + -cat $(TEST_SUITE_LOG) /dev/null + @! grep -q "ERROR: AddressSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q "WARNING: AddressSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q "ERROR: ThreadSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q "WARNING: ThreadSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q "ERROR: LeakSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q "WARNING: LeakSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q " runtime error: " $(TEST_SUITE_LOG) /dev/null + RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i showsuite || RET=1 ; \ + done ; \ + exit $$RET + +@STARPU_SIMGRID_TRUE@export STARPU_PERF_MODEL_DIR=$(abs_top_srcdir)/tools/perfmodels/sampling +@STARPU_SIMGRID_TRUE@export STARPU_HOSTNAME=mirage +@STARPU_SIMGRID_TRUE@export MALLOC_PERTURB_=0 + +@STARPU_SIMGRID_TRUE@env: +@STARPU_SIMGRID_TRUE@ @echo export STARPU_PERF_MODEL_DIR=$(STARPU_PERF_MODEL_DIR) +@STARPU_SIMGRID_TRUE@ @echo export STARPU_HOSTNAME=$(STARPU_HOSTNAME) +@STARPU_SIMGRID_TRUE@ @echo export MALLOC_PERTURB_=$(MALLOC_PERTURB_) + +@STARPU_SIMGRID_TRUE@export STARPU_SIMGRID=1 + +@STARPU_QUICK_CHECK_TRUE@export STARPU_QUICK_CHECK=1 + +@STARPU_LONG_CHECK_TRUE@export STARPU_LONG_CHECK=1 + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/starpupy/benchmark/handle_perf_plot.py b/starpupy/benchmark/handle_perf_plot.py new file mode 100644 index 0000000..979eec4 --- /dev/null +++ b/starpupy/benchmark/handle_perf_plot.py @@ -0,0 +1,83 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2021-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +import json +import matplotlib.pyplot as plt +import test_handle_bench + +file1 = open('handle_perf1.txt', 'r') +js1 = file1.read() +retfut_dict = json.loads(js1) +#print(retfut_dict) +program_submit1 = [x*1000 for x in retfut_dict['program_submit']] +program_await1 = [x*1000 for x in retfut_dict['program_await']] + +file2 = open('handle_perf2.txt', 'r') +js2 = file2.read() +rethandle_dict = json.loads(js2) +#print(rethandle_dict) +program_submit2 = [x*1000 for x in rethandle_dict['program_submit']] +program_await2 = [x*1000 for x in rethandle_dict['program_await']] + +file3 = open('handle_perf3.txt', 'r') +js3 = file3.read() +nostarpu_dict = json.loads(js3) +#print(nostarpu_dict) +program_submit3 = [x*1000 for x in nostarpu_dict['program_submit']] + +file_std = open('handle_perf_std.txt', 'r') +js_std = file_std.read() +dict_std = json.loads(js_std) + +file1.close() +file2.close() +file3.close() +file_std.close() + +std11 = dict_std['list_std11'] +std12 = dict_std['list_std12'] +std21 = dict_std['list_std21'] +std22 = dict_std['list_std22'] +std3 = dict_std['list_std3'] + +plt.subplot(2, 1, 1) +plt.xticks(fontsize=15) +plt.yticks(fontsize=15) +plt.xscale("log") +plt.yscale("log") +plt.errorbar([i for i in test_handle_bench.list_size], program_submit1, yerr=std11, fmt='+-', ecolor='r', color='r', elinewidth=1, capsize=3, linewidth=1, label='using StarPU and returning future object') +plt.errorbar([i for i in test_handle_bench.list_size], program_submit2, yerr=std21, fmt='+-', ecolor='b', color='b', elinewidth=1, capsize=3, linewidth=1, label='using StarPU and returning handle object') +plt.errorbar([i for i in test_handle_bench.list_size], program_submit3, yerr=std3, fmt='+-',ecolor='y', color='y', elinewidth=1, capsize=3, linewidth=1, label='using numpy.add function') + +plt.legend(loc='upper left', fontsize=15) +plt.xlabel("Numpy array size (# of elements)", fontsize=15) +plt.ylabel("Program execution time (ms)", fontsize=15) + +plt.subplot(2, 1, 2) +plt.xticks(fontsize=15) +plt.yticks(fontsize=15) +plt.xscale("log") +plt.yscale("log") +plt.errorbar([i for i in test_handle_bench.list_size], program_await1, yerr=std12, fmt='+-',ecolor='r', color='r', elinewidth=1, capsize=3, linewidth=1, label='using StarPU and returning future object') +plt.errorbar([i for i in test_handle_bench.list_size], program_await2, yerr=std22, fmt='+-',ecolor='b', color='b', elinewidth=1, capsize=3, linewidth=1, label='using StarPU and returning handle object') + +plt.legend(loc='upper left', fontsize=15) +plt.xlabel("Numpy array size (# of elements)", fontsize=15) +plt.ylabel("Program await time (ms)", fontsize=15) + +plt.show() +#plt.savefig("starpupy_handle_perf.png") +#plt.savefig("starpupy_handle_perf.eps") + diff --git a/starpupy/benchmark/handle_perf_plot_pickle.py b/starpupy/benchmark/handle_perf_plot_pickle.py new file mode 100644 index 0000000..0889d3a --- /dev/null +++ b/starpupy/benchmark/handle_perf_plot_pickle.py @@ -0,0 +1,89 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2021-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +import json +import matplotlib.pyplot as plt + +num = 1000000 +listX = [10, 100, 1000, 10000, 100000, 1000000] +list_size = [] +for x in listX: + for X in range(x, x*10, x): + list_size.append(X) +list_size.append(10000000) +list_size.append(20000000) +list_size.append(30000000) +list_size.append(40000000) +list_size.append(50000000) +#print(list_size) + +file1 = open('handle_perf1.txt', 'r') +js1 = file1.read() +withhandle_dict = json.loads(js1) +#print(withhandle_dict) +program_submit1 = withhandle_dict['program_submit'] +program_await1 = withhandle_dict['program_await'] + +file2 = open('handle_perf2.txt', 'r') +js2 = file2.read() +nohandle_dict = json.loads(js2) +#print(nohandle_dict) +program_submit2 = nohandle_dict['program_submit'] +program_await2 = nohandle_dict['program_await'] + +file3 = open('handle_perf3.txt', 'r') +js3 = file3.read() +nostarpu_dict = json.loads(js3) +#print(nostarpu_dict) +program_submit3 = nostarpu_dict['program_submit'] + +file_std = open('handle_perf_std.txt', 'r') +js_std = file_std.read() +dict_std = json.loads(js_std) + +std11 = dict_std['list_std11'] +std12 = dict_std['list_std12'] +std21 = dict_std['list_std21'] +std22 = dict_std['list_std22'] +std3 = dict_std['list_std3'] + +plt.subplot(2, 1, 1) +plt.xscale("log") +plt.yscale("log") +plt.errorbar([i/num for i in list_size], program_submit1, yerr=std11, fmt='+-', ecolor='r', color='r', elinewidth=1, capsize=3, linewidth=1, label='using virtually shared memory manager') +plt.errorbar([i/num for i in list_size], program_submit2, yerr=std21, fmt='+-', ecolor='b', color='b', elinewidth=1, capsize=3, linewidth=1, label='without using virtually shared memory manager') +plt.errorbar([i/num for i in list_size], program_submit3, yerr=std3, fmt='+-',ecolor='y', color='y', elinewidth=1, capsize=3, linewidth=1, label='without using StarPU task submitting') + +plt.legend(loc='upper left') +plt.xlabel("Numpy array size (MB)") +plt.ylabel("Program execution time (s)") + + +plt.subplot(2, 1, 2) +plt.xscale("log") +plt.yscale("log") +plt.errorbar([i/num for i in list_size], program_await1, yerr=std12, fmt='+-',ecolor='r', color='r', elinewidth=1, capsize=3, linewidth=1, label='using virtually shared memory manager') +plt.errorbar([i/num for i in list_size], program_await2, yerr=std22, fmt='+-',ecolor='b', color='b', elinewidth=1, capsize=3, linewidth=1, label='without using virtually shared memory manager') + +plt.legend(loc='upper left') +plt.xlabel("Numpy array size (MB)") +plt.ylabel("Program await time (s)") + +plt.show() + +file1.close() +file2.close() +file3.close() +file_std.close() diff --git a/starpupy/benchmark/tasks_size_overhead.gp b/starpupy/benchmark/tasks_size_overhead.gp new file mode 100755 index 0000000..5aab029 --- /dev/null +++ b/starpupy/benchmark/tasks_size_overhead.gp @@ -0,0 +1,44 @@ +#!/bin/sh +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +OUTPUT=tasks_size_overhead.output +VALS=$(sed -n -e '3p' < $OUTPUT) + +PLOTS="" +for x in $(seq 1 11) +do + pos=$((2 * $x + 1)) + double=$((2 * $x)) + value=$(echo "$VALS" | cut -d ' ' -f $pos) + if test -n "$value" + then + PLOTS=",\"$OUTPUT\" using 1:($value)/(\$$pos) with linespoints title columnheader($double) $PLOTS" + fi +done + +[ -n "$TERMINAL" ] || TERMINAL=eps +[ -n "$OUTFILE" ] || OUTFILE=tasks_size_overhead.eps +gnuplot << EOF +set terminal $TERMINAL +set output "$OUTFILE" +set key top left +set xlabel "number of cores" +set ylabel "speedup" +plot \ + x title "linear" $PLOTS +EOF + diff --git a/starpupy/benchmark/tasks_size_overhead.py b/starpupy/benchmark/tasks_size_overhead.py new file mode 100644 index 0000000..ce57584 --- /dev/null +++ b/starpupy/benchmark/tasks_size_overhead.py @@ -0,0 +1,182 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2021-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +import starpu +from starpu import starpupy + +import time +import sys +import getopt +import asyncio +import cProfile +import sys + +mincpus = 1 +maxcpus = starpupy.worker_get_count_by_type(starpu.STARPU_CPU_WORKER) +cpustep = 1 + +mintime = 128 +maxtime = 128*1024 +factortime = 2 + +ntasks = 64 +nbuffers = 0 +total_nbuffers = 0 + +#################parameters############## +try: + opts, args = getopt.getopt(sys.argv[1:],"i:b:B:c:C:s:t:T:f:h") +except getopt.GetoptError: + print("Usage:", sys.argv[0], "\n"\ + "\t[-h help] \n "\ + "\t[-i ntasks] [-b nbuffers] [-B total_nbuffers] \n"\ + "\t[-c mincpus] [ -C maxcpus] [-s cpustep]\n"\ + "\t[-t mintime] [-T maxtime] [-f factortime]") + starpupy.shutdown() + sys.exit(1) +for opt, arg in opts: + if opt == '-i': + ntasks = int(arg) + elif opt == '-b': + nbuffers = int(arg) + elif opt == '-B': + total_nbuffers = int(arg) + elif opt == '-c': + mincpus = int(arg) + elif opt == '-C': + maxcpus = int(arg) + elif opt == '-s': + cpustep = int(arg) + elif opt == '-t': + mintime = int(arg) + elif opt == '-T': + maxtime = int(arg) + elif opt == '-f': + factortime = int(arg) + elif opt == '-h': + print("Usage:", sys.argv[0], "[-h help] \n "\ + "\t[-i ntasks] [-b nbuffers] [-B total_nbuffers] \n"\ + "\t[-c mincpus] [ -C maxcpus] [-s cpustep]\n"\ + "\t[-t mintime] [-T maxtime] [-f factortime]\n") + print("runs \'ntasks\' tasks\n"\ + "- using \'nbuffers\' data each, randomly among \'total_nbuffers\' choices,\n"\ + "- with varying task durations, from \'mintime\' to \'maxtime\' (using \'factortime\')\n"\ + "- on varying numbers of cpus, from \'mincpus\' to \'maxcpus\' (using \'cpustep\')\n"\ + "\n"\ + "currently selected parameters: ", ntasks, " tasks using ", nbuffers, " buffers among ", total_nbuffers, \ + ", from ", mintime, "us to ", maxtime, "us (factor ", factortime, "), from ", mincpus, " cpus to ", maxcpus, " cpus (step ", cpustep, ")", sep='') + starpupy.shutdown() + sys.exit(0) + +######################################## + +# multiplication increment +def range_multi(start, end, factor): + val_multi = [] + val = start + while val <= end: + val_multi.append(val) + val = val * factor + return val_multi + +# the test function +def func_test(t): + time.sleep(t/1000000) + +#pr = cProfile.Profile() + +f = open("tasks_size_overhead.output",'w') + +method="handle" +if len(sys.argv) > 1: + method=sys.argv[1] + +print("# tasks :", ntasks, "buffers :", nbuffers, "totoal_nbuffers :", total_nbuffers, file=f) +print("# ncups", end='\t', file=f) +for size in range_multi(mintime, maxtime, factortime): + print(size, "iters(us)\ttotal(s)", end='\t', file=f) +print(end='\n', file=f) + +print("\"seq\"\t", end=' ', file=f) +for size in range_multi(mintime, maxtime, factortime): + #print("time size is", size) + dstart=time.time() + for i in range(ntasks): + func_test(size) + dend=time.time() + print(int((dend-dstart)/ntasks*1000000), "\t", dend-dstart, end='\t', file=f) + #print(size, "\t", dend-dstart, end='\t', file=f) +print(end='\n', file=f) + +#pr.enable() + +if method == "handle": + # return value is handle + for ncpus in range(mincpus, maxcpus+1, cpustep): + starpupy.set_ncpu(ncpus) + #print("ncpus is", ncpus) + print(ncpus, end='\t', file=f) + for size in range_multi(mintime, maxtime, factortime): + #print("time size is", size) + start=time.time() + for i in range(ntasks*ncpus): + res=starpu.task_submit(ret_handle=True)("func_test", size) + starpupy.task_wait_for_all() + end=time.time() + timing = end-start + print(size, "\t", timing/ncpus, end='\t', file=f) + print(end='\n', file=f) + +elif method == "futur": + # return value is future + async def main(): + for ncpus in range(mincpus, maxcpus+1, cpustep): + starpupy.set_ncpu(ncpus) + #print("ncpus is", ncpus) + print(ncpus, end='\t', file=f) + for size in range_multi(mintime, maxtime, factortime): + #print("time size is", size) + start=time.time() + for i in range(ntasks*ncpus): + fut=starpu.task_submit(ret_fut=True)("func_test", size) + starpupy.task_wait_for_all() + end=time.time() + timing = end-start + print(size, "\t", timing/ncpus, end='\t', file=f) + print(end='\n', file=f) + asyncio.run(main()) + +else: + # return value is neither future nor handle + for ncpus in range(mincpus, maxcpus+1, cpustep): + starpupy.set_ncpu(ncpus) + #print("ncpus is", ncpus) + print(ncpus, end='\t', file=f) + for size in range_multi(mintime, maxtime, factortime): + #print("time size is", size) + start=time.time() + for i in range(ntasks*ncpus): + fut=starpu.task_submit(ret_fut=False)("func_test", size) + starpupy.task_wait_for_all() + end=time.time() + timing = end-start + print(size, "\t", timing/ncpus, end='\t', file=f) + print(end='\n', file=f) + +#pr.disable() + +f.close() +#pr.print_stats() +starpupy.shutdown() diff --git a/starpupy/benchmark/tasks_size_overhead.sh b/starpupy/benchmark/tasks_size_overhead.sh new file mode 100755 index 0000000..006f887 --- /dev/null +++ b/starpupy/benchmark/tasks_size_overhead.sh @@ -0,0 +1,24 @@ +#!/bin/bash +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +ROOT=${0%.sh} +for x in handle futur none +do + $(dirname $0)/../execute.sh benchmark/tasks_size_overhead.py $x $* + TERMINAL="png large size 1280,960" OUTFILE="tasks_size_overhead_py_$x.png" $ROOT.gp + TERMINAL="eps" OUTFILE="tasks_size_overhead_py_$x.eps" $ROOT.gp +done +#gv tasks_size_overhead.eps diff --git a/starpupy/benchmark/test_handle_bench.py b/starpupy/benchmark/test_handle_bench.py new file mode 100644 index 0000000..c696dc5 --- /dev/null +++ b/starpupy/benchmark/test_handle_bench.py @@ -0,0 +1,29 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +listX = [10, 100, 1000, 10000, 100000, 1000000] +#listX = [10, 100] +list_size = [] +for x in listX: + for X in range(x, x*10, x): + list_size.append(X) +list_size.append(10000000) +list_size.append(20000000) +list_size.append(30000000) +list_size.append(40000000) +list_size.append(50000000) +#print("list of size is",list_size) + diff --git a/starpupy/benchmark/test_handle_perf.py b/starpupy/benchmark/test_handle_perf.py new file mode 100644 index 0000000..1ed57ab --- /dev/null +++ b/starpupy/benchmark/test_handle_perf.py @@ -0,0 +1,205 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2021-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +import starpu +from starpu import starpupy +from starpu import Handle +from starpu import HandleNumpy +try: + import numpy as np +except (ModuleNotFoundError, ImportError): + print("\n\nCan't find \"Python3 NumPy\" module (consider running \"pip3 install numpy\" or refer to https://numpy.org/install/)\n\n") + starpupy.shutdown() + exit(77) +import asyncio +import time +import array +import struct +try: + import nest_asyncio +except ModuleNotFoundError as e: + print("\n\nCan't find \"Python3 nest_asyncio\" module (consider running \"pip3 install nest_asyncio\")\n\n") + starpupy.shutdown() + exit(77) +import json +import sys +import statistics +import test_handle_bench + +#############################perf test#################################### +# Numpy function +@starpu.access(a="RW", b="R") +def add_numpy(a,b): + np.add(a,b,out=a) + +# custom function +@starpu.access(a="RW", b="R") +def add_custom(a,b): + for i in range(np.size(a)): + a[i] = a[i] + b[i] + +program_submit1=[] +program_await1=[] + +program_submit2=[] +program_await2=[] + +program_submit3=[] + +num=20 +# calculate the standard deviasion +list_std11 = [] +list_std12 = [] +list_std21 = [] +list_std22 = [] +list_std3 = [] + +# using handle return future +def test_comp_handle_ret_fut(a,b): + async def asy_main(): + start_exec1=0 + end_exec1=0 + start_exec2=0 + end_exec2=0 + list_submit=[] + list_await=[] + for t in range(num): + #print("loop", t) + start_exec1=time.time() + res_fut = starpu.task_submit()(add_custom, a, b) + end_exec1=time.time() + + list_submit.append(end_exec1-start_exec1) + + start_exec2=time.time() + res = await res_fut + end_exec2=time.time() + + list_await.append(end_exec2-start_exec2) + + program_submit1.append(statistics.mean(list_submit)) + program_await1.append(statistics.mean(list_await)) + + list_std11.append(statistics.stdev(list_submit)) + list_std12.append(statistics.stdev(list_await)) + + loop=asyncio.get_event_loop() + nest_asyncio.apply() + loop.run_until_complete(asy_main()) + +# using handle return handle +def test_comp_handle_ret_handle(a,b): + async def asy_main(): + start_exec1=0 + end_exec1=0 + start_exec2=0 + end_exec2=0 + list_submit=[] + list_await=[] + for t in range(num): + #print("loop", t) + start_exec1=time.time() + res_handle = starpu.task_submit(ret_handle=True)(add_custom, a, b) + end_exec1=time.time() + + list_submit.append(end_exec1-start_exec1) + + start_exec2=time.time() + starpupy.task_wait_for_all() + end_exec2=time.time() + + list_await.append(end_exec2-start_exec2) + + program_submit2.append(statistics.mean(list_submit)) + program_await2.append(statistics.mean(list_await)) + + list_std21.append(statistics.stdev(list_submit)) + list_std22.append(statistics.stdev(list_await)) + loop=asyncio.get_event_loop() + nest_asyncio.apply() + loop.run_until_complete(asy_main()) + +#without using starpu +def test_numpy(a,b): + async def asy_main(): + start_exec1=0 + end_exec1=0 + list_submit=[] + for t in range(num): + start_exec1=time.time() + add_numpy(a, b) + end_exec1=time.time() + + list_submit.append(end_exec1-start_exec1) + + program_submit3.append(statistics.mean(list_submit)) + + list_std3.append(statistics.stdev(list_submit)) + loop=asyncio.get_event_loop() + nest_asyncio.apply() + loop.run_until_complete(asy_main()) + + +#with handle return future +for i in test_handle_bench.list_size: + #print("i with handle return future is", i) + A = np.arange(i) + test_comp_handle_ret_fut(A, A) + starpu.unregister(A) + +#with handle return handle +for i in test_handle_bench.list_size: + #print("i with handle return handle is", i) + A = np.arange(i) + test_comp_handle_ret_handle(A, A) + starpu.unregister(A) + +#without starpu +for i in test_handle_bench.list_size: + A = np.arange(i) + test_numpy(A, A) + +retfut_dict={'program_submit':program_submit1, 'program_await': program_await1} +rethandle_dict={'program_submit':program_submit2, 'program_await': program_await2} +nostarpu_dict={'program_submit':program_submit3} + +# print(retfut_dict) +# print(rethandle_dict) +# print(nostarpu_dict) + +dict_std={'list_std11':list_std11, 'list_std12':list_std12, 'list_std21':list_std21, 'list_std22':list_std22, 'list_std3':list_std3} + +#####write the dict in file##### +js1 = json.dumps(retfut_dict) +file1 = open('handle_perf1.txt', 'w') +file1.write(js1) +file1.close() + +js2 = json.dumps(rethandle_dict) +file2 = open('handle_perf2.txt', 'w') +file2.write(js2) +file2.close() + +js3 = json.dumps(nostarpu_dict) +file3 = open('handle_perf3.txt', 'w') +file3.write(js3) +file3.close() + +js_std = json.dumps(dict_std) +file_std = open('handle_perf_std.txt', 'w') +file_std.write(js_std) +file_std.close() + +starpupy.shutdown() diff --git a/starpupy/benchmark/test_handle_perf.sh b/starpupy/benchmark/test_handle_perf.sh new file mode 100755 index 0000000..e51c5f0 --- /dev/null +++ b/starpupy/benchmark/test_handle_perf.sh @@ -0,0 +1,25 @@ +#!/bin/bash +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +$(dirname $0)/../execute.sh benchmark/test_handle_perf.py $* + +if test $? != 77 +then + $(dirname $0)/../execute.sh benchmark/handle_perf_plot.py +else + echo "skip test" +fi diff --git a/starpupy/benchmark/test_handle_perf_pickle.py b/starpupy/benchmark/test_handle_perf_pickle.py new file mode 100644 index 0000000..17dfb83 --- /dev/null +++ b/starpupy/benchmark/test_handle_perf_pickle.py @@ -0,0 +1,214 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2021-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +import starpu +from starpu import starpupy +from starpu import Handle +from starpu import HandleNumpy +try: + import numpy as np +except (ModuleNotFoundError, ImportError): + print("Can't find \"Python3 NumPy\" module (consider running \"pip3 install numpy\" or refer to https://numpy.org/install/)") + starpupy.shutdown() + exit(77) +import asyncio +import time +import array +import struct +import nest_asyncio +import json +import sys +import statistics + +#############################perf test#################################### +# Numpy function +@starpu.access(a="RW", b="R") +def add(a,b): + np.add(a,b,out=a) + +# custom function +# @starpu.access(a="RW", b="R") +# def add(a,b): +# for i in range(np.size(a)): +# a[i] = a[i] + b[i] + +listX = [10, 100, 1000, 10000, 100000, 1000000] +list_size = [] +for x in listX: + for X in range(x, x*10, x): + list_size.append(X) +list_size.append(10000000) +list_size.append(20000000) +list_size.append(30000000) +list_size.append(40000000) +list_size.append(50000000) +#print("list of size is",list_size) + +program_submit1=[] +program_await1=[] + +program_submit2=[] +program_await2=[] + +program_submit3=[] + +num=20 +# calculate the standard deviasion +list_std11 = [] +list_std12 = [] +list_std21 = [] +list_std22 = [] +list_std3 = [] + +# using handle +def test_comp_handle(a,b): + async def asy_main(): + start_exec1=0 + end_exec1=0 + start_exec2=0 + end_exec2=0 + list_submit=[] + list_await=[] + for t in range(num): + #print("loop", t) + start_exec1=time.time() + res_fut = starpu.task_submit()(add, a, b) + end_exec1=time.time() + + list_submit.append(end_exec1-start_exec1) + + start_exec2=time.time() + res = await res_fut + end_exec2=time.time() + + list_await.append(end_exec2-start_exec2) + + program_submit1.append(statistics.mean(list_submit)) + program_await1.append(statistics.mean(list_await)) + + list_std11.append(statistics.stdev(list_submit)) + list_std12.append(statistics.stdev(list_await)) + + loop=asyncio.get_event_loop() + nest_asyncio.apply() + loop.run_until_complete(asy_main()) + +#without using handle +def test_comp(a,b): + async def asy_main(): + start_exec1=0 + end_exec1=0 + start_exec2=0 + end_exec2=0 + list_submit=[] + list_await=[] + for t in range(num): + #print("loop", t) + start_exec1=time.time() + res_fut = starpu.task_submit(arg_handle=False)(add, a, b) + end_exec1=time.time() + + list_submit.append(end_exec1-start_exec1) + + start_exec2=time.time() + res = await res_fut + end_exec2=time.time() + + list_await.append(end_exec2-start_exec2) + + program_submit2.append(statistics.mean(list_submit)) + program_await2.append(statistics.mean(list_await)) + + list_std21.append(statistics.stdev(list_submit)) + list_std22.append(statistics.stdev(list_await)) + + loop=asyncio.get_event_loop() + nest_asyncio.apply() + loop.run_until_complete(asy_main()) + +#without using starpu +def test_numpy(a,b): + async def asy_main(): + start_exec1=0 + end_exec1=0 + list_submit=[] + for t in range(num): + start_exec1=time.time() + add(a, b) + end_exec1=time.time() + + list_submit.append(end_exec1-start_exec1) + + program_submit3.append(statistics.mean(list_submit)) + + list_std3.append(statistics.stdev(list_submit)) + + loop=asyncio.get_event_loop() + nest_asyncio.apply() + loop.run_until_complete(asy_main()) + + +#with handle +for i in list_size: + #print("i with handle is", i) + A = np.arange(i) + test_comp_handle(A, A) + + starpu.unregister(A) + +#without handle +for i in list_size: + #print("i without handle is", i) + A = np.arange(i) + test_comp(A, A) + +#without starpu +for i in list_size: + A = np.arange(i) + test_numpy(A, A) + + +withhandle_dict={'program_submit':program_submit1, 'program_await': program_await1} +nohandle_dict={'program_submit':program_submit2, 'program_await': program_await2} +nostarpu_dict={'program_submit':program_submit3} + +# print(withhandle_dict) +# print(nohandle_dict) +# print(nostarpu_dict) + +dict_std={'list_std11':list_std11, 'list_std12':list_std12, 'list_std21':list_std21, 'list_std22':list_std22, 'list_std3':list_std3} + +#####write the dict in file##### +js1 = json.dumps(withhandle_dict) +file1 = open('handle_perf1.txt', 'w') +file1.write(js1) +file1.close() + +js2 = json.dumps(nohandle_dict) +file2 = open('handle_perf2.txt', 'w') +file2.write(js2) +file2.close() + +js3 = json.dumps(nostarpu_dict) +file3 = open('handle_perf3.txt', 'w') +file3.write(js3) +file3.close() + +js_std = json.dumps(dict_std) +file_std = open('handle_perf_std.txt', 'w') +file_std.write(js_std) +file_std.close() + +starpupy.shutdown() diff --git a/starpupy/benchmark/test_handle_perf_pickle.sh b/starpupy/benchmark/test_handle_perf_pickle.sh new file mode 100755 index 0000000..39686d3 --- /dev/null +++ b/starpupy/benchmark/test_handle_perf_pickle.sh @@ -0,0 +1,19 @@ +#!/bin/bash +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +$(dirname $0)/../execute.sh benchmark/test_handle_perf_pickle.py $* +python3 handle_perf_plot_pickle.py diff --git a/starpupy/examples/Makefile.am b/starpupy/examples/Makefile.am new file mode 100644 index 0000000..ab4bea5 --- /dev/null +++ b/starpupy/examples/Makefile.am @@ -0,0 +1,100 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +include $(top_srcdir)/make/starpu-tests.mk +include $(top_srcdir)/make/starpu-loader.mk + +SUBDIRS = + +CLEANFILES = *.gcno *.gcda *.linkinfo \ + starpu_py.concurrent.py \ + starpu_py_handle.concurrent.py \ + starpu_py_np.concurrent.py \ + starpu_py_partition.concurrent.py \ + starpu_py_perfmodel.concurrent.py \ + starpu_py_numpy.concurrent.py + +if STARPU_USE_MPI_MASTER_SLAVE +TESTS_ENVIRONMENT += LOADER_ARGS="--mpirun" +endif + +%.concurrent.py: %.py + sed -e 's/async //g' -e 's/\ $@ + +all-local: \ + starpu_py.concurrent.py \ + starpu_py_handle.concurrent.py \ + starpu_py_np.concurrent.py \ + starpu_py_partition.concurrent.py \ + starpu_py_perfmodel.concurrent.py \ + starpu_py_numpy.concurrent.py + +TESTS = + +if STARPU_STARPUPY_NUMPY +TESTS += starpu_py_perfmodel.sh +TESTS += starpu_py_perfmodel.concurrent.sh +endif + +if !STARPU_SIMGRID +TESTS += starpu_py.sh +TESTS += starpu_py.concurrent.sh + +TESTS += starpu_py_parallel.sh +TESTS += starpu_py_handle.sh +TESTS += starpu_py_handle.concurrent.sh + +if STARPU_STARPUPY_NUMPY +TESTS += starpu_py_numpy.sh +TESTS += starpu_py_numpy.concurrent.sh +TESTS += starpu_py_np.sh +TESTS += starpu_py_np.concurrent.sh +TESTS += starpu_py_partition.sh +TESTS += starpu_py_partition.concurrent.sh +endif +endif + +EXTRA_DIST = \ + starpu_py.concurrent.sh \ + starpu_py_handle.concurrent.sh \ + starpu_py_handle.py \ + starpu_py_handle.sh \ + starpu_py_np.concurrent.sh \ + starpu_py_np.py \ + starpu_py_np.sh \ + starpu_py_numpy.concurrent.sh \ + starpu_py_numpy.py \ + starpu_py_numpy.sh \ + starpu_py_parallel.py \ + starpu_py_parallel.sh \ + starpu_py_partition.concurrent.sh \ + starpu_py_partition.py \ + starpu_py_partition.sh \ + starpu_py_perfmodel.concurrent.sh \ + starpu_py_perfmodel.py \ + starpu_py_perfmodel.sh \ + starpu_py.py \ + starpu_py.sh + +python_sourcesdir = $(libdir)/starpu/python +dist_python_sources_DATA = \ + starpu_py_handle.py \ + starpu_py_np.py \ + starpu_py_numpy.py \ + starpu_py_parallel.py \ + starpu_py_partition.py \ + starpu_py_perfmodel.py \ + starpu_py.py diff --git a/starpupy/examples/Makefile.in b/starpupy/examples/Makefile.in new file mode 100644 index 0000000..664f332 --- /dev/null +++ b/starpupy/examples/Makefile.in @@ -0,0 +1,1739 @@ +# Makefile.in generated by automake 1.16.5 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2021 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +target_triplet = @target@ +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@am__append_1 = --compiler-options -fno-strict-aliasing -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ $(STARPU_NVCC_H_CPPFLAGS) +@STARPU_USE_HIP_TRUE@am__append_2 = -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ +noinst_PROGRAMS = $(am__EXEEXT_1) +# Make tests run through mpiexec +@STARPU_USE_MPI_MASTER_SLAVE_TRUE@am__append_3 = $(abs_top_srcdir)/tools/starpu_msexec +@STARPU_USE_MPI_MASTER_SLAVE_TRUE@am__append_4 = $(MPI_RUN_ENV) STARPU_NMPIMSTHREADS=4 +@STARPU_USE_TCPIP_MASTER_SLAVE_TRUE@am__append_5 = $(abs_top_srcdir)/tools/starpu_msexec +# switch off local socket usage +#MS_LAUNCHER = $(abs_top_builddir)/tools/starpu_tcpipexec -np 2 -nobind -ncpus 1 -nolocal +@STARPU_USE_TCPIP_MASTER_SLAVE_TRUE@am__append_6 = STARPU_RESERVE_NCPU=2 +@STARPU_HAVE_WINDOWS_FALSE@am__append_7 = loader +@STARPU_USE_MPI_MASTER_SLAVE_TRUE@am__append_8 = LOADER_ARGS="--mpirun" +@STARPU_STARPUPY_NUMPY_TRUE@am__append_9 = starpu_py_perfmodel.sh \ +@STARPU_STARPUPY_NUMPY_TRUE@ starpu_py_perfmodel.concurrent.sh +@STARPU_SIMGRID_FALSE@am__append_10 = starpu_py.sh \ +@STARPU_SIMGRID_FALSE@ starpu_py.concurrent.sh \ +@STARPU_SIMGRID_FALSE@ starpu_py_parallel.sh \ +@STARPU_SIMGRID_FALSE@ starpu_py_handle.sh \ +@STARPU_SIMGRID_FALSE@ starpu_py_handle.concurrent.sh +@STARPU_SIMGRID_FALSE@@STARPU_STARPUPY_NUMPY_TRUE@am__append_11 = starpu_py_numpy.sh \ +@STARPU_SIMGRID_FALSE@@STARPU_STARPUPY_NUMPY_TRUE@ starpu_py_numpy.concurrent.sh \ +@STARPU_SIMGRID_FALSE@@STARPU_STARPUPY_NUMPY_TRUE@ starpu_py_np.sh \ +@STARPU_SIMGRID_FALSE@@STARPU_STARPUPY_NUMPY_TRUE@ starpu_py_np.concurrent.sh \ +@STARPU_SIMGRID_FALSE@@STARPU_STARPUPY_NUMPY_TRUE@ starpu_py_partition.sh \ +@STARPU_SIMGRID_FALSE@@STARPU_STARPUPY_NUMPY_TRUE@ starpu_py_partition.concurrent.sh +subdir = starpupy/examples +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ + $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ + $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ + $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(dist_python_sources_DATA) \ + $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/src/common/config.h \ + $(top_builddir)/src/common/config-src-build.h \ + $(top_builddir)/include/starpu_config.h \ + $(top_builddir)/starpurm/include/starpurm_config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +@STARPU_HAVE_WINDOWS_FALSE@am__EXEEXT_1 = loader$(EXEEXT) +PROGRAMS = $(noinst_PROGRAMS) +loader_SOURCES = loader.c +loader_OBJECTS = loader-loader.$(OBJEXT) +loader_LDADD = $(LDADD) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)/src/common -I$(top_builddir)/include -I$(top_builddir)/starpurm/include +depcomp = $(SHELL) $(top_srcdir)/build-aux/depcomp +am__maybe_remake_depfiles = depfiles +am__depfiles_remade = ./$(DEPDIR)/loader-loader.Po +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = loader.c +DIST_SOURCES = loader.c +RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \ + ctags-recursive dvi-recursive html-recursive info-recursive \ + install-data-recursive install-dvi-recursive \ + install-exec-recursive install-html-recursive \ + install-info-recursive install-pdf-recursive \ + install-ps-recursive install-recursive installcheck-recursive \ + installdirs-recursive pdf-recursive ps-recursive \ + tags-recursive uninstall-recursive +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__installdirs = "$(DESTDIR)$(python_sourcesdir)" +DATA = $(dist_python_sources_DATA) +RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ + distclean-recursive maintainer-clean-recursive +am__recursive_targets = \ + $(RECURSIVE_TARGETS) \ + $(RECURSIVE_CLEAN_TARGETS) \ + $(am__extra_recursive_targets) +AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \ + check recheck distdir distdir-am +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +am__tty_colors_dummy = \ + mgn= red= grn= lgn= blu= brg= std=; \ + am__color_tests=no +am__tty_colors = { \ + $(am__tty_colors_dummy); \ + if test "X$(AM_COLOR_TESTS)" = Xno; then \ + am__color_tests=no; \ + elif test "X$(AM_COLOR_TESTS)" = Xalways; then \ + am__color_tests=yes; \ + elif test "X$$TERM" != Xdumb && { test -t 1; } 2>/dev/null; then \ + am__color_tests=yes; \ + fi; \ + if test $$am__color_tests = yes; then \ + red=''; \ + grn=''; \ + lgn=''; \ + blu=''; \ + mgn=''; \ + brg=''; \ + std=''; \ + fi; \ +} +am__recheck_rx = ^[ ]*:recheck:[ ]* +am__global_test_result_rx = ^[ ]*:global-test-result:[ ]* +am__copy_in_global_log_rx = ^[ ]*:copy-in-global-log:[ ]* +# A command that, given a newline-separated list of test names on the +# standard input, print the name of the tests that are to be re-run +# upon "make recheck". +am__list_recheck_tests = $(AWK) '{ \ + recheck = 1; \ + while ((rc = (getline line < ($$0 ".trs"))) != 0) \ + { \ + if (rc < 0) \ + { \ + if ((getline line2 < ($$0 ".log")) < 0) \ + recheck = 0; \ + break; \ + } \ + else if (line ~ /$(am__recheck_rx)[nN][Oo]/) \ + { \ + recheck = 0; \ + break; \ + } \ + else if (line ~ /$(am__recheck_rx)[yY][eE][sS]/) \ + { \ + break; \ + } \ + }; \ + if (recheck) \ + print $$0; \ + close ($$0 ".trs"); \ + close ($$0 ".log"); \ +}' +# A command that, given a newline-separated list of test names on the +# standard input, create the global log from their .trs and .log files. +am__create_global_log = $(AWK) ' \ +function fatal(msg) \ +{ \ + print "fatal: making $@: " msg | "cat >&2"; \ + exit 1; \ +} \ +function rst_section(header) \ +{ \ + print header; \ + len = length(header); \ + for (i = 1; i <= len; i = i + 1) \ + printf "="; \ + printf "\n\n"; \ +} \ +{ \ + copy_in_global_log = 1; \ + global_test_result = "RUN"; \ + while ((rc = (getline line < ($$0 ".trs"))) != 0) \ + { \ + if (rc < 0) \ + fatal("failed to read from " $$0 ".trs"); \ + if (line ~ /$(am__global_test_result_rx)/) \ + { \ + sub("$(am__global_test_result_rx)", "", line); \ + sub("[ ]*$$", "", line); \ + global_test_result = line; \ + } \ + else if (line ~ /$(am__copy_in_global_log_rx)[nN][oO]/) \ + copy_in_global_log = 0; \ + }; \ + if (copy_in_global_log) \ + { \ + rst_section(global_test_result ": " $$0); \ + while ((rc = (getline line < ($$0 ".log"))) != 0) \ + { \ + if (rc < 0) \ + fatal("failed to read from " $$0 ".log"); \ + print line; \ + }; \ + printf "\n"; \ + }; \ + close ($$0 ".trs"); \ + close ($$0 ".log"); \ +}' +# Restructured Text title. +am__rst_title = { sed 's/.*/ & /;h;s/./=/g;p;x;s/ *$$//;p;g' && echo; } +# Solaris 10 'make', and several other traditional 'make' implementations, +# pass "-e" to $(SHELL), and POSIX 2008 even requires this. Work around it +# by disabling -e (using the XSI extension "set +e") if it's set. +am__sh_e_setup = case $$- in *e*) set +e;; esac +# Default flags passed to test drivers. +am__common_driver_flags = \ + --color-tests "$$am__color_tests" \ + --enable-hard-errors "$$am__enable_hard_errors" \ + --expect-failure "$$am__expect_failure" +# To be inserted before the command running the test. Creates the +# directory for the log if needed. Stores in $dir the directory +# containing $f, in $tst the test, in $log the log. Executes the +# developer- defined test setup AM_TESTS_ENVIRONMENT (if any), and +# passes TESTS_ENVIRONMENT. Set up options for the wrapper that +# will run the test scripts (or their associated LOG_COMPILER, if +# thy have one). +am__check_pre = \ +$(am__sh_e_setup); \ +$(am__vpath_adj_setup) $(am__vpath_adj) \ +$(am__tty_colors); \ +srcdir=$(srcdir); export srcdir; \ +case "$@" in \ + */*) am__odir=`echo "./$@" | sed 's|/[^/]*$$||'`;; \ + *) am__odir=.;; \ +esac; \ +test "x$$am__odir" = x"." || test -d "$$am__odir" \ + || $(MKDIR_P) "$$am__odir" || exit $$?; \ +if test -f "./$$f"; then dir=./; \ +elif test -f "$$f"; then dir=; \ +else dir="$(srcdir)/"; fi; \ +tst=$$dir$$f; log='$@'; \ +if test -n '$(DISABLE_HARD_ERRORS)'; then \ + am__enable_hard_errors=no; \ +else \ + am__enable_hard_errors=yes; \ +fi; \ +case " $(XFAIL_TESTS) " in \ + *[\ \ ]$$f[\ \ ]* | *[\ \ ]$$dir$$f[\ \ ]*) \ + am__expect_failure=yes;; \ + *) \ + am__expect_failure=no;; \ +esac; \ +$(AM_TESTS_ENVIRONMENT) $(TESTS_ENVIRONMENT) +# A shell command to get the names of the tests scripts with any registered +# extension removed (i.e., equivalently, the names of the test logs, with +# the '.log' extension removed). The result is saved in the shell variable +# '$bases'. This honors runtime overriding of TESTS and TEST_LOGS. Sadly, +# we cannot use something simpler, involving e.g., "$(TEST_LOGS:.log=)", +# since that might cause problem with VPATH rewrites for suffix-less tests. +# See also 'test-harness-vpath-rewrite.sh' and 'test-trs-basic.sh'. +am__set_TESTS_bases = \ + bases='$(TEST_LOGS)'; \ + bases=`for i in $$bases; do echo $$i; done | sed 's/\.log$$//'`; \ + bases=`echo $$bases` +AM_TESTSUITE_SUMMARY_HEADER = ' for $(PACKAGE_STRING)' +RECHECK_LOGS = $(TEST_LOGS) +TEST_SUITE_LOG = test-suite.log +TEST_EXTENSIONS = @EXEEXT@ .test +LOG_DRIVER = $(SHELL) $(top_srcdir)/build-aux/test-driver +LOG_COMPILE = $(LOG_COMPILER) $(AM_LOG_FLAGS) $(LOG_FLAGS) +am__set_b = \ + case '$@' in \ + */*) \ + case '$*' in \ + */*) b='$*';; \ + *) b=`echo '$@' | sed 's/\.log$$//'`; \ + esac;; \ + *) \ + b='$*';; \ + esac +am__test_logs1 = $(TESTS:=.log) +am__test_logs2 = $(am__test_logs1:@EXEEXT@.log=.log) +TEST_LOGS = $(am__test_logs2:.test.log=.log) +TEST_LOG_DRIVER = $(SHELL) $(top_srcdir)/build-aux/test-driver +TEST_LOG_COMPILE = $(TEST_LOG_COMPILER) $(AM_TEST_LOG_FLAGS) \ + $(TEST_LOG_FLAGS) +DIST_SUBDIRS = $(SUBDIRS) +am__DIST_COMMON = $(srcdir)/Makefile.in \ + $(top_srcdir)/build-aux/depcomp \ + $(top_srcdir)/build-aux/test-driver \ + $(top_srcdir)/make/starpu-loader.mk \ + $(top_srcdir)/make/starpu-tests.mk \ + $(top_srcdir)/make/starpu.mk +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +am__relativize = \ + dir0=`pwd`; \ + sed_first='s,^\([^/]*\)/.*$$,\1,'; \ + sed_rest='s,^[^/]*/*,,'; \ + sed_last='s,^.*/\([^/]*\)$$,\1,'; \ + sed_butlast='s,/*[^/]*$$,,'; \ + while test -n "$$dir1"; do \ + first=`echo "$$dir1" | sed -e "$$sed_first"`; \ + if test "$$first" != "."; then \ + if test "$$first" = ".."; then \ + dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ + dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ + else \ + first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ + if test "$$first2" = "$$first"; then \ + dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ + else \ + dir2="../$$dir2"; \ + fi; \ + dir0="$$dir0"/"$$first"; \ + fi; \ + fi; \ + dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ + done; \ + reldir="$$dir2" +pkglibdir = @pkglibdir@ +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +APP_CFLAGS = @APP_CFLAGS@ +APP_CXXFLAGS = @APP_CXXFLAGS@ +APP_FCFLAGS = @APP_FCFLAGS@ +APP_FFLAGS = @APP_FFLAGS@ +AR = @AR@ +AS = @AS@ +ATLASDIR = @ATLASDIR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BLAS_LIB = @BLAS_LIB@ +BLAS_LIBS = @BLAS_LIBS@ +BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ +BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CC_OR_MPICC = @CC_OR_MPICC@ +CC_OR_NVCC = @CC_OR_NVCC@ +CFLAGS = @CFLAGS@ +COVERAGE = @COVERAGE@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CSCOPE = @CSCOPE@ +CTAGS = @CTAGS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DGELS_LIBS = @DGELS_LIBS@ +DLB_CFLAGS = @DLB_CFLAGS@ +DLB_LIBS = @DLB_LIBS@ +DLLTOOL = @DLLTOOL@ +DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +ECLIPSE = @ECLIPSE@ +EGREP = @EGREP@ +ETAGS = @ETAGS@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FC = @FC@ +FCFLAGS = @FCFLAGS@ +FFLAGS = @FFLAGS@ +FFTWF_CFLAGS = @FFTWF_CFLAGS@ +FFTWF_LIBS = @FFTWF_LIBS@ +FFTWL_CFLAGS = @FFTWL_CFLAGS@ +FFTWL_LIBS = @FFTWL_LIBS@ +FFTW_CFLAGS = @FFTW_CFLAGS@ +FFTW_LIBS = @FFTW_LIBS@ +FGREP = @FGREP@ +FILECMD = @FILECMD@ +FXTDIR = @FXTDIR@ +FXT_CFLAGS = @FXT_CFLAGS@ +FXT_LDFLAGS = @FXT_LDFLAGS@ +FXT_LIBS = @FXT_LIBS@ +GDB = @GDB@ +GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ +GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ +GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ +GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ +GOTODIR = @GOTODIR@ +GREP = @GREP@ +HAVE_CXX11 = @HAVE_CXX11@ +HAVE_FFTWFL = @HAVE_FFTWFL@ +HELP2MAN = @HELP2MAN@ +HIPCC = @HIPCC@ +HIPCCFLAGS = @HIPCCFLAGS@ $(am__append_2) +HIPCONFIG = @HIPCONFIG@ +HWLOC_CFLAGS = @HWLOC_CFLAGS@ +HWLOC_LIBS = @HWLOC_LIBS@ +HWLOC_REQUIRES = @HWLOC_REQUIRES@ +ICC = @ICC@ +ICC_ARGS = @ICC_ARGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JULIA = @JULIA@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ +LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ +LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ +LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ +LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ +LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ +LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ +LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ +LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ +LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ +LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ +LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ +LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ +LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ +LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ +LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ +LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ +LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ +LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ +LIBSTARPU_LINK = @LIBSTARPU_LINK@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAGMA_CFLAGS = @MAGMA_CFLAGS@ +MAGMA_LIBS = @MAGMA_LIBS@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPICC_LDFLAGS = @MPICC_LDFLAGS@ +MPICXX = @MPICXX@ +MPIEXEC = @MPIEXEC@ +MPIEXEC_ARGS = @MPIEXEC_ARGS@ +MPIFORT = @MPIFORT@ +MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ +MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ +NM = @NM@ +NMAD_CFLAGS = @NMAD_CFLAGS@ +NMAD_LIBS = @NMAD_LIBS@ +NMEDIT = @NMEDIT@ +NVCC = @NVCC@ +NVCCFLAGS = @NVCCFLAGS@ $(am__append_1) +NVCC_CC = @NVCC_CC@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ +OPENBLAS_LIBS = @OPENBLAS_LIBS@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PAPI_CFLAGS = @PAPI_CFLAGS@ +PAPI_LIBS = @PAPI_LIBS@ +PARALLEL = @PARALLEL@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PKG_CONFIG = @PKG_CONFIG@ +POTI_CFLAGS = @POTI_CFLAGS@ +POTI_LIBS = @POTI_LIBS@ +PROG_CLANG = @PROG_CLANG@ +PROG_DATE = @PROG_DATE@ +PROG_FIND = @PROG_FIND@ +PROG_STAT = @PROG_STAT@ +PYTHON = @PYTHON@ +PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ +PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ +PYTHON_VERSION = @PYTHON_VERSION@ +RANLIB = @RANLIB@ +REALBASH = @REALBASH@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ +SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ +SIMGRID_LIBS = @SIMGRID_LIBS@ +SIMGRID_MC = @SIMGRID_MC@ +SLIC_CONFIG = @SLIC_CONFIG@ +SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ +SOCL_VENDORS = @SOCL_VENDORS@ +STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ +STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ +STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ +STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ +STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ +STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ +STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ +STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ +STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ +STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ +STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ +STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ +STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ +STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ +STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ +STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ +STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ +STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ +STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ +STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ +STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ +STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ +STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ +STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ +STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ +STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ +STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ +STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ +STARPU_LIB_PATH = @STARPU_LIB_PATH@ +STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ +STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ +STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ +STARPU_MS_LIB = @STARPU_MS_LIB@ +STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ +STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ +STARPU_OPENBLAS = @STARPU_OPENBLAS@ +STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ +STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ +STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ +STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ +STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ +STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ +STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ +STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ +STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ +STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ +STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ +STARPU_SRC_DIR = @STARPU_SRC_DIR@ +STARPU_USE_CPU = @STARPU_USE_CPU@ +STARPU_USE_CUDA = @STARPU_USE_CUDA@ +STARPU_USE_FXT = @STARPU_USE_FXT@ +STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ +STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ +STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ +STRIP = @STRIP@ +VERSION = @VERSION@ +XMKMF = @XMKMF@ +X_CFLAGS = @X_CFLAGS@ +X_EXTRA_LIBS = @X_EXTRA_LIBS@ +X_LIBS = @X_LIBS@ +X_PRE_LIBS = @X_PRE_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_ct_F77 = @ac_ct_F77@ +ac_ct_FC = @ac_ct_FC@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +doxygencommand = @doxygencommand@ +dvidir = @dvidir@ +eclipsepath = @eclipsepath@ +epstopdfcommand = @epstopdfcommand@ +exec_prefix = @exec_prefix@ +gitcommand = @gitcommand@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +hwloccalccommand = @hwloccalccommand@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +juliapath = @juliapath@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +mpicc_path = @mpicc_path@ +mpicxx_path = @mpicxx_path@ +mpiexec_path = @mpiexec_path@ +mpifort_path = @mpifort_path@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +pdflatexcommand = @pdflatexcommand@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +runstatedir = @runstatedir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target = @target@ +target_alias = @target_alias@ +target_cpu = @target_cpu@ +target_os = @target_os@ +target_vendor = @target_vendor@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +LAUNCHER_ENV = $(am__append_4) $(am__append_6) +LAUNCHER = $(am__append_3) $(am__append_5) +AM_CFLAGS = $(GLOBAL_AM_CFLAGS) +AM_CXXFLAGS = $(GLOBAL_AM_CXXFLAGS) +AM_FFLAGS = $(GLOBAL_AM_FFLAGS) +AM_FCFLAGS = $(GLOBAL_AM_FCFLAGS) +@STARPU_USE_CUDA_TRUE@V_nvcc_ = $(V_nvcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_USE_CUDA_TRUE@V_nvcc_0 = @echo " NVCC " $@; +@STARPU_USE_CUDA_TRUE@V_nvcc_1 = +@STARPU_USE_CUDA_TRUE@V_nvcc = $(V_nvcc_$(V)) + +# Avoid using nvcc when making a coverity build, nvcc produces millions of +# lines of code which we don't want to analyze. Instead, build dumb .o files +# containing empty functions. +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_ = $(V_mynvcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_0 = @echo " myNVCC " $@; +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_1 = +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc = $(V_mynvcc_$(V)) +@STARPU_USE_HIP_TRUE@V_hipcc_ = $(V_hipcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_USE_HIP_TRUE@V_hipcc_0 = @echo " HIPCC " $@; +@STARPU_USE_HIP_TRUE@V_hipcc_1 = +@STARPU_USE_HIP_TRUE@V_hipcc = $(V_hipcc_$(V)) +V_icc_ = $(V_icc_$(AM_DEFAULT_VERBOSITY)) +V_icc_0 = @echo " ICC " $@; +V_icc_1 = +V_icc = $(V_icc_$(V)) +V_ln_ = $(V_ln_$(AM_DEFAULT_VERBOSITY)) +V_ln_0 = @echo " LN " $@; +V_ln_1 = +V_ln = $(V_ln_$(V)) +V_help2man_ = $(V_help2man_$(AM_DEFAULT_VERBOSITY)) +V_help2man_0 = @echo " HELP2MAN" $@; +V_help2man_1 = +V_help2man = $(V_help2man_$(V)) +# These are always defined, both for starpu-mpi and for mpi-ms +# For MPI tests we don't want to oversubscribe the system +MPI_RUN_ENV = STARPU_WORKERS_GETBIND=0 STARPU_WORKERS_NOBIND=1 STARPU_NCPU=3 +@STARPU_SIMGRID_FALSE@STARPU_MPIEXEC = $(MPIEXEC) $(MPIEXEC_ARGS) -np $(STARPU_MPI_NP) +@STARPU_SIMGRID_TRUE@STARPU_MPIEXEC = $(abs_top_builddir)/tools/starpu_smpirun -np $(STARPU_MPI_NP) -platform $(abs_top_srcdir)/tools/perfmodels/cluster.xml -hostfile $(abs_top_srcdir)/tools/perfmodels/hostfile + +# When GNU parallel is available and -j is passed to make, run tests through +# parallel, using a "starpu" semaphore. +# Also make test shell scripts run its tests through parallel, using a +# "substarpu" semaphore. This brings some overload, but only one level. +@HAVE_PARALLEL_TRUE@STARPU_SUB_PARALLEL = $(shell echo $(MAKEFLAGS) | sed -ne 's/.*-j\([0-9]\+\).*/parallel --semaphore --id substarpu --fg --fg-exit -j \1/p') +@STARPU_USE_MPI_MASTER_SLAVE_TRUE@MS_LAUNCHER = $(STARPU_MPIEXEC) +@STARPU_USE_TCPIP_MASTER_SLAVE_TRUE@MS_LAUNCHER = $(abs_top_builddir)/tools/starpu_tcpipexec -np 2 -nobind -ncpus 1 +@STARPU_HAVE_WINDOWS_FALSE@LOADER_BIN = $(LAUNCHER) $(LOADER) $(EXTERNAL) +@STARPU_HAVE_WINDOWS_TRUE@LOADER_BIN = $(LAUNCHER) $(EXTERNAL) +@STARPU_HAVE_WINDOWS_FALSE@loader_CPPFLAGS = $(AM_CPPFLAGS) -I$(top_builddir)/src/ +@STARPU_HAVE_AM111_FALSE@TESTS_ENVIRONMENT = $(LAUNCHER_ENV) \ +@STARPU_HAVE_AM111_FALSE@ top_builddir="$(abs_top_builddir)" \ +@STARPU_HAVE_AM111_FALSE@ top_srcdir="$(abs_top_srcdir)" \ +@STARPU_HAVE_AM111_FALSE@ $(LOADER_BIN) $(am__append_8) +@STARPU_HAVE_AM111_TRUE@TESTS_ENVIRONMENT = $(LAUNCHER_ENV) \ +@STARPU_HAVE_AM111_TRUE@ top_builddir="$(abs_top_builddir)" \ +@STARPU_HAVE_AM111_TRUE@ top_srcdir="$(abs_top_srcdir)" \ +@STARPU_HAVE_AM111_TRUE@ $(am__append_8) +@STARPU_HAVE_AM111_TRUE@LOG_COMPILER = $(LOADER_BIN) +AM_TESTS_FD_REDIRECT = 9>&2 +SUBDIRS = +CLEANFILES = *.gcno *.gcda *.linkinfo \ + starpu_py.concurrent.py \ + starpu_py_handle.concurrent.py \ + starpu_py_np.concurrent.py \ + starpu_py_partition.concurrent.py \ + starpu_py_perfmodel.concurrent.py \ + starpu_py_numpy.concurrent.py + +TESTS = $(am__append_9) $(am__append_10) $(am__append_11) +EXTRA_DIST = \ + starpu_py.concurrent.sh \ + starpu_py_handle.concurrent.sh \ + starpu_py_handle.py \ + starpu_py_handle.sh \ + starpu_py_np.concurrent.sh \ + starpu_py_np.py \ + starpu_py_np.sh \ + starpu_py_numpy.concurrent.sh \ + starpu_py_numpy.py \ + starpu_py_numpy.sh \ + starpu_py_parallel.py \ + starpu_py_parallel.sh \ + starpu_py_partition.concurrent.sh \ + starpu_py_partition.py \ + starpu_py_partition.sh \ + starpu_py_perfmodel.concurrent.sh \ + starpu_py_perfmodel.py \ + starpu_py_perfmodel.sh \ + starpu_py.py \ + starpu_py.sh + +python_sourcesdir = $(libdir)/starpu/python +dist_python_sources_DATA = \ + starpu_py_handle.py \ + starpu_py_np.py \ + starpu_py_numpy.py \ + starpu_py_parallel.py \ + starpu_py_partition.py \ + starpu_py_perfmodel.py \ + starpu_py.py + +all: all-recursive + +.SUFFIXES: +.SUFFIXES: .c .cu .cubin .hip .lo .log .o .obj .test .test$(EXEEXT) .trs +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/make/starpu-tests.mk $(top_srcdir)/make/starpu.mk $(top_srcdir)/make/starpu-loader.mk $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign starpupy/examples/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign starpupy/examples/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; +$(top_srcdir)/make/starpu-tests.mk $(top_srcdir)/make/starpu.mk $(top_srcdir)/make/starpu-loader.mk $(am__empty): + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +clean-noinstPROGRAMS: + @list='$(noinst_PROGRAMS)'; test -n "$$list" || exit 0; \ + echo " rm -f" $$list; \ + rm -f $$list || exit $$?; \ + test -n "$(EXEEXT)" || exit 0; \ + list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ + echo " rm -f" $$list; \ + rm -f $$list + +loader$(EXEEXT): $(loader_OBJECTS) $(loader_DEPENDENCIES) $(EXTRA_loader_DEPENDENCIES) + @rm -f loader$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(loader_OBJECTS) $(loader_LDADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/loader-loader.Po@am__quote@ # am--include-marker + +$(am__depfiles_remade): + @$(MKDIR_P) $(@D) + @echo '# dummy' >$@-t && $(am__mv) $@-t $@ + +am--depfiles: $(am__depfiles_remade) + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ +@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +loader-loader.o: loader.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(loader_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT loader-loader.o -MD -MP -MF $(DEPDIR)/loader-loader.Tpo -c -o loader-loader.o `test -f 'loader.c' || echo '$(srcdir)/'`loader.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/loader-loader.Tpo $(DEPDIR)/loader-loader.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='loader.c' object='loader-loader.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(loader_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o loader-loader.o `test -f 'loader.c' || echo '$(srcdir)/'`loader.c + +loader-loader.obj: loader.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(loader_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT loader-loader.obj -MD -MP -MF $(DEPDIR)/loader-loader.Tpo -c -o loader-loader.obj `if test -f 'loader.c'; then $(CYGPATH_W) 'loader.c'; else $(CYGPATH_W) '$(srcdir)/loader.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/loader-loader.Tpo $(DEPDIR)/loader-loader.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='loader.c' object='loader-loader.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(loader_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o loader-loader.obj `if test -f 'loader.c'; then $(CYGPATH_W) 'loader.c'; else $(CYGPATH_W) '$(srcdir)/loader.c'; fi` + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs +install-dist_python_sourcesDATA: $(dist_python_sources_DATA) + @$(NORMAL_INSTALL) + @list='$(dist_python_sources_DATA)'; test -n "$(python_sourcesdir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(python_sourcesdir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(python_sourcesdir)" || exit 1; \ + fi; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; \ + done | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(python_sourcesdir)'"; \ + $(INSTALL_DATA) $$files "$(DESTDIR)$(python_sourcesdir)" || exit $$?; \ + done + +uninstall-dist_python_sourcesDATA: + @$(NORMAL_UNINSTALL) + @list='$(dist_python_sources_DATA)'; test -n "$(python_sourcesdir)" || list=; \ + files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ + dir='$(DESTDIR)$(python_sourcesdir)'; $(am__uninstall_files_from_dir) + +# This directory's subdirectories are mostly independent; you can cd +# into them and run 'make' without going through this Makefile. +# To change the values of 'make' variables: instead of editing Makefiles, +# (1) if the variable is set in 'config.status', edit 'config.status' +# (which will cause the Makefiles to be regenerated when you run 'make'); +# (2) otherwise, pass the desired values on the 'make' command line. +$(am__recursive_targets): + @fail=; \ + if $(am__make_keepgoing); then \ + failcom='fail=yes'; \ + else \ + failcom='exit 1'; \ + fi; \ + dot_seen=no; \ + target=`echo $@ | sed s/-recursive//`; \ + case "$@" in \ + distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ + *) list='$(SUBDIRS)' ;; \ + esac; \ + for subdir in $$list; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + dot_seen=yes; \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || eval $$failcom; \ + done; \ + if test "$$dot_seen" = "no"; then \ + $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ + fi; test -z "$$fail" + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-recursive +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ + include_option=--etags-include; \ + empty_fix=.; \ + else \ + include_option=--include; \ + empty_fix=; \ + fi; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + test ! -f $$subdir/TAGS || \ + set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ + fi; \ + done; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-recursive + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-recursive + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +# Recover from deleted '.trs' file; this should ensure that +# "rm -f foo.log; make foo.trs" re-run 'foo.test', and re-create +# both 'foo.log' and 'foo.trs'. Break the recipe in two subshells +# to avoid problems with "make -n". +.log.trs: + rm -f $< $@ + $(MAKE) $(AM_MAKEFLAGS) $< + +# Leading 'am--fnord' is there to ensure the list of targets does not +# expand to empty, as could happen e.g. with make check TESTS=''. +am--fnord $(TEST_LOGS) $(TEST_LOGS:.log=.trs): $(am__force_recheck) +am--force-recheck: + @: + +$(TEST_SUITE_LOG): $(TEST_LOGS) + @$(am__set_TESTS_bases); \ + am__f_ok () { test -f "$$1" && test -r "$$1"; }; \ + redo_bases=`for i in $$bases; do \ + am__f_ok $$i.trs && am__f_ok $$i.log || echo $$i; \ + done`; \ + if test -n "$$redo_bases"; then \ + redo_logs=`for i in $$redo_bases; do echo $$i.log; done`; \ + redo_results=`for i in $$redo_bases; do echo $$i.trs; done`; \ + if $(am__make_dryrun); then :; else \ + rm -f $$redo_logs && rm -f $$redo_results || exit 1; \ + fi; \ + fi; \ + if test -n "$$am__remaking_logs"; then \ + echo "fatal: making $(TEST_SUITE_LOG): possible infinite" \ + "recursion detected" >&2; \ + elif test -n "$$redo_logs"; then \ + am__remaking_logs=yes $(MAKE) $(AM_MAKEFLAGS) $$redo_logs; \ + fi; \ + if $(am__make_dryrun); then :; else \ + st=0; \ + errmsg="fatal: making $(TEST_SUITE_LOG): failed to create"; \ + for i in $$redo_bases; do \ + test -f $$i.trs && test -r $$i.trs \ + || { echo "$$errmsg $$i.trs" >&2; st=1; }; \ + test -f $$i.log && test -r $$i.log \ + || { echo "$$errmsg $$i.log" >&2; st=1; }; \ + done; \ + test $$st -eq 0 || exit 1; \ + fi + @$(am__sh_e_setup); $(am__tty_colors); $(am__set_TESTS_bases); \ + ws='[ ]'; \ + results=`for b in $$bases; do echo $$b.trs; done`; \ + test -n "$$results" || results=/dev/null; \ + all=` grep "^$$ws*:test-result:" $$results | wc -l`; \ + pass=` grep "^$$ws*:test-result:$$ws*PASS" $$results | wc -l`; \ + fail=` grep "^$$ws*:test-result:$$ws*FAIL" $$results | wc -l`; \ + skip=` grep "^$$ws*:test-result:$$ws*SKIP" $$results | wc -l`; \ + xfail=`grep "^$$ws*:test-result:$$ws*XFAIL" $$results | wc -l`; \ + xpass=`grep "^$$ws*:test-result:$$ws*XPASS" $$results | wc -l`; \ + error=`grep "^$$ws*:test-result:$$ws*ERROR" $$results | wc -l`; \ + if test `expr $$fail + $$xpass + $$error` -eq 0; then \ + success=true; \ + else \ + success=false; \ + fi; \ + br='==================='; br=$$br$$br$$br$$br; \ + result_count () \ + { \ + if test x"$$1" = x"--maybe-color"; then \ + maybe_colorize=yes; \ + elif test x"$$1" = x"--no-color"; then \ + maybe_colorize=no; \ + else \ + echo "$@: invalid 'result_count' usage" >&2; exit 4; \ + fi; \ + shift; \ + desc=$$1 count=$$2; \ + if test $$maybe_colorize = yes && test $$count -gt 0; then \ + color_start=$$3 color_end=$$std; \ + else \ + color_start= color_end=; \ + fi; \ + echo "$${color_start}# $$desc $$count$${color_end}"; \ + }; \ + create_testsuite_report () \ + { \ + result_count $$1 "TOTAL:" $$all "$$brg"; \ + result_count $$1 "PASS: " $$pass "$$grn"; \ + result_count $$1 "SKIP: " $$skip "$$blu"; \ + result_count $$1 "XFAIL:" $$xfail "$$lgn"; \ + result_count $$1 "FAIL: " $$fail "$$red"; \ + result_count $$1 "XPASS:" $$xpass "$$red"; \ + result_count $$1 "ERROR:" $$error "$$mgn"; \ + }; \ + { \ + echo "$(PACKAGE_STRING): $(subdir)/$(TEST_SUITE_LOG)" | \ + $(am__rst_title); \ + create_testsuite_report --no-color; \ + echo; \ + echo ".. contents:: :depth: 2"; \ + echo; \ + for b in $$bases; do echo $$b; done \ + | $(am__create_global_log); \ + } >$(TEST_SUITE_LOG).tmp || exit 1; \ + mv $(TEST_SUITE_LOG).tmp $(TEST_SUITE_LOG); \ + if $$success; then \ + col="$$grn"; \ + else \ + col="$$red"; \ + test x"$$VERBOSE" = x || cat $(TEST_SUITE_LOG); \ + fi; \ + echo "$${col}$$br$${std}"; \ + echo "$${col}Testsuite summary"$(AM_TESTSUITE_SUMMARY_HEADER)"$${std}"; \ + echo "$${col}$$br$${std}"; \ + create_testsuite_report --maybe-color; \ + echo "$$col$$br$$std"; \ + if $$success; then :; else \ + echo "$${col}See $(subdir)/$(TEST_SUITE_LOG)$${std}"; \ + if test -n "$(PACKAGE_BUGREPORT)"; then \ + echo "$${col}Please report to $(PACKAGE_BUGREPORT)$${std}"; \ + fi; \ + echo "$$col$$br$$std"; \ + fi; \ + $$success || exit 1 + +check-TESTS: + @list='$(RECHECK_LOGS)'; test -z "$$list" || rm -f $$list + @list='$(RECHECK_LOGS:.log=.trs)'; test -z "$$list" || rm -f $$list + @test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) + @set +e; $(am__set_TESTS_bases); \ + log_list=`for i in $$bases; do echo $$i.log; done`; \ + trs_list=`for i in $$bases; do echo $$i.trs; done`; \ + log_list=`echo $$log_list`; trs_list=`echo $$trs_list`; \ + $(MAKE) $(AM_MAKEFLAGS) $(TEST_SUITE_LOG) TEST_LOGS="$$log_list"; \ + exit $$?; +recheck: all + @test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) + @set +e; $(am__set_TESTS_bases); \ + bases=`for i in $$bases; do echo $$i; done \ + | $(am__list_recheck_tests)` || exit 1; \ + log_list=`for i in $$bases; do echo $$i.log; done`; \ + log_list=`echo $$log_list`; \ + $(MAKE) $(AM_MAKEFLAGS) $(TEST_SUITE_LOG) \ + am__force_recheck=am--force-recheck \ + TEST_LOGS="$$log_list"; \ + exit $$? +starpu_py_perfmodel.sh.log: starpu_py_perfmodel.sh + @p='starpu_py_perfmodel.sh'; \ + b='starpu_py_perfmodel.sh'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +starpu_py_perfmodel.concurrent.sh.log: starpu_py_perfmodel.concurrent.sh + @p='starpu_py_perfmodel.concurrent.sh'; \ + b='starpu_py_perfmodel.concurrent.sh'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +starpu_py.sh.log: starpu_py.sh + @p='starpu_py.sh'; \ + b='starpu_py.sh'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +starpu_py.concurrent.sh.log: starpu_py.concurrent.sh + @p='starpu_py.concurrent.sh'; \ + b='starpu_py.concurrent.sh'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +starpu_py_parallel.sh.log: starpu_py_parallel.sh + @p='starpu_py_parallel.sh'; \ + b='starpu_py_parallel.sh'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +starpu_py_handle.sh.log: starpu_py_handle.sh + @p='starpu_py_handle.sh'; \ + b='starpu_py_handle.sh'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +starpu_py_handle.concurrent.sh.log: starpu_py_handle.concurrent.sh + @p='starpu_py_handle.concurrent.sh'; \ + b='starpu_py_handle.concurrent.sh'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +starpu_py_numpy.sh.log: starpu_py_numpy.sh + @p='starpu_py_numpy.sh'; \ + b='starpu_py_numpy.sh'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +starpu_py_numpy.concurrent.sh.log: starpu_py_numpy.concurrent.sh + @p='starpu_py_numpy.concurrent.sh'; \ + b='starpu_py_numpy.concurrent.sh'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +starpu_py_np.sh.log: starpu_py_np.sh + @p='starpu_py_np.sh'; \ + b='starpu_py_np.sh'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +starpu_py_np.concurrent.sh.log: starpu_py_np.concurrent.sh + @p='starpu_py_np.concurrent.sh'; \ + b='starpu_py_np.concurrent.sh'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +starpu_py_partition.sh.log: starpu_py_partition.sh + @p='starpu_py_partition.sh'; \ + b='starpu_py_partition.sh'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +starpu_py_partition.concurrent.sh.log: starpu_py_partition.concurrent.sh + @p='starpu_py_partition.concurrent.sh'; \ + b='starpu_py_partition.concurrent.sh'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +.test.log: + @p='$<'; \ + $(am__set_b); \ + $(am__check_pre) $(TEST_LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_TEST_LOG_DRIVER_FLAGS) $(TEST_LOG_DRIVER_FLAGS) -- $(TEST_LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +@am__EXEEXT_TRUE@.test$(EXEEXT).log: +@am__EXEEXT_TRUE@ @p='$<'; \ +@am__EXEEXT_TRUE@ $(am__set_b); \ +@am__EXEEXT_TRUE@ $(am__check_pre) $(TEST_LOG_DRIVER) --test-name "$$f" \ +@am__EXEEXT_TRUE@ --log-file $$b.log --trs-file $$b.trs \ +@am__EXEEXT_TRUE@ $(am__common_driver_flags) $(AM_TEST_LOG_DRIVER_FLAGS) $(TEST_LOG_DRIVER_FLAGS) -- $(TEST_LOG_COMPILE) \ +@am__EXEEXT_TRUE@ "$$tst" $(AM_TESTS_FD_REDIRECT) +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done + @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + $(am__make_dryrun) \ + || test -d "$(distdir)/$$subdir" \ + || $(MKDIR_P) "$(distdir)/$$subdir" \ + || exit 1; \ + dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ + $(am__relativize); \ + new_distdir=$$reldir; \ + dir1=$$subdir; dir2="$(top_distdir)"; \ + $(am__relativize); \ + new_top_distdir=$$reldir; \ + echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ + echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ + ($(am__cd) $$subdir && \ + $(MAKE) $(AM_MAKEFLAGS) \ + top_distdir="$$new_top_distdir" \ + distdir="$$new_distdir" \ + am__remove_distdir=: \ + am__skip_length_check=: \ + am__skip_mode_fix=: \ + distdir) \ + || exit 1; \ + fi; \ + done +check-am: all-am + $(MAKE) $(AM_MAKEFLAGS) check-TESTS +check: check-recursive +all-am: Makefile $(PROGRAMS) $(DATA) all-local +installdirs: installdirs-recursive +installdirs-am: + for dir in "$(DESTDIR)$(python_sourcesdir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-recursive +install-exec: install-exec-recursive +install-data: install-data-recursive +uninstall: uninstall-recursive + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-recursive +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + -test -z "$(TEST_LOGS)" || rm -f $(TEST_LOGS) + -test -z "$(TEST_LOGS:.log=.trs)" || rm -f $(TEST_LOGS:.log=.trs) + -test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) + +clean-generic: + -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-recursive + +clean-am: clean-generic clean-libtool clean-noinstPROGRAMS \ + mostlyclean-am + +distclean: distclean-recursive + -rm -f ./$(DEPDIR)/loader-loader.Po + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-recursive + +dvi-am: + +html: html-recursive + +html-am: + +info: info-recursive + +info-am: + +install-data-am: install-dist_python_sourcesDATA + +install-dvi: install-dvi-recursive + +install-dvi-am: + +install-exec-am: + +install-html: install-html-recursive + +install-html-am: + +install-info: install-info-recursive + +install-info-am: + +install-man: + +install-pdf: install-pdf-recursive + +install-pdf-am: + +install-ps: install-ps-recursive + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-recursive + -rm -f ./$(DEPDIR)/loader-loader.Po + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-recursive + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-recursive + +pdf-am: + +ps: ps-recursive + +ps-am: + +uninstall-am: uninstall-dist_python_sourcesDATA + +.MAKE: $(am__recursive_targets) check-am install-am install-strip + +.PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am all-local \ + am--depfiles check check-TESTS check-am clean clean-generic \ + clean-libtool clean-noinstPROGRAMS cscopelist-am ctags \ + ctags-am distclean distclean-compile distclean-generic \ + distclean-libtool distclean-tags distdir dvi dvi-am html \ + html-am info info-am install install-am install-data \ + install-data-am install-dist_python_sourcesDATA install-dvi \ + install-dvi-am install-exec install-exec-am install-html \ + install-html-am install-info install-info-am install-man \ + install-pdf install-pdf-am install-ps install-ps-am \ + install-strip installcheck installcheck-am installdirs \ + installdirs-am maintainer-clean maintainer-clean-generic \ + mostlyclean mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool pdf pdf-am ps ps-am recheck tags tags-am \ + uninstall uninstall-am uninstall-dist_python_sourcesDATA + +.PRECIOUS: Makefile + +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@.cu.o: +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ @$(MKDIR_P) `dirname $@` +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ $(V_mynvcc)grep 'extern *"C" *void *' $< | sed -ne 's/extern *"C" *void *\([a-zA-Z0-9_]*\) *(.*/void \1(void) {}/p' | $(CC) -x c - -o $@ -c + +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.cubin: +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) -cubin $< -o $@ $(NVCCFLAGS) + +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.o: +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) $< -c -o $@ $(NVCCFLAGS) +@STARPU_USE_HIP_TRUE@.hip.o: +@STARPU_USE_HIP_TRUE@ $(V_hipcc) $(HIPCC) $< -c -o $@ $(HIPCCFLAGS) + +STARPU_MPI_NP ?= 4 + +showcheckfailed: + @ for x in $(shell grep -l "^FAIL " $(TEST_LOGS) /dev/null 2>/dev/null) ; do cat $$x ; done + @RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i showcheckfailed || RET=1 ; \ + done ; \ + exit $$RET + +showfailed: + @! grep "^FAIL " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "ERROR: AddressSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "WARNING: AddressSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "ERROR: ThreadSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "WARNING: ThreadSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "ERROR: LeakSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "WARNING: LeakSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l " runtime error: " $(TEST_LOGS) /dev/null 2>/dev/null + @RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -s -C $$i showfailed || RET=1 ; \ + done ; \ + exit $$RET + +showcheck: + -cat $(TEST_LOGS) /dev/null + @! grep -q "ERROR: AddressSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q "WARNING: AddressSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q "ERROR: ThreadSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q "WARNING: ThreadSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q "ERROR: LeakSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q "WARNING: LeakSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q " runtime error: " $(TEST_LOGS) /dev/null + RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i showcheck || RET=1 ; \ + done ; \ + exit $$RET + +showsuite: + -cat $(TEST_SUITE_LOG) /dev/null + @! grep -q "ERROR: AddressSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q "WARNING: AddressSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q "ERROR: ThreadSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q "WARNING: ThreadSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q "ERROR: LeakSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q "WARNING: LeakSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q " runtime error: " $(TEST_SUITE_LOG) /dev/null + RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i showsuite || RET=1 ; \ + done ; \ + exit $$RET + +@STARPU_SIMGRID_TRUE@export STARPU_PERF_MODEL_DIR=$(abs_top_srcdir)/tools/perfmodels/sampling +@STARPU_SIMGRID_TRUE@export STARPU_HOSTNAME=mirage +@STARPU_SIMGRID_TRUE@export MALLOC_PERTURB_=0 + +@STARPU_SIMGRID_TRUE@env: +@STARPU_SIMGRID_TRUE@ @echo export STARPU_PERF_MODEL_DIR=$(STARPU_PERF_MODEL_DIR) +@STARPU_SIMGRID_TRUE@ @echo export STARPU_HOSTNAME=$(STARPU_HOSTNAME) +@STARPU_SIMGRID_TRUE@ @echo export MALLOC_PERTURB_=$(MALLOC_PERTURB_) + +@STARPU_SIMGRID_TRUE@export STARPU_SIMGRID=1 + +@STARPU_QUICK_CHECK_TRUE@export STARPU_QUICK_CHECK=1 + +@STARPU_LONG_CHECK_TRUE@export STARPU_LONG_CHECK=1 + +# +# Test loading goes through a lot of launchers: +# +# - $(LAUNCHER) is called first, to run the test through starpu_msexec, i.e. +# either mpirun or starpu_tcpipexec +# +# - $(LOADER), i.e. tests/loader, is then called to implement timeout, running +# gdb, etc. But if it detects that the test is a .sh script, it just executes +# it +# +# - $(STARPU_CHECK_LAUNCHER) $(STARPU_CHECK_LAUNCHER_ARGS) is called by loader +# to run the program through e.g. valgrind.sh +# +# When the program is a shell script, additionally: +# +# - $(STARPU_SUB_PARALLEL) is called to control parallelism (see below) +# +# - $(MS_LAUNCHER) is called to run the test through starpu_msexec +# +# - $(STARPU_LAUNCH) was set by tests/loader to its own path, to run the program +# through it. +# +# - $(STARPU_CHECK_LAUNCHER) $(STARPU_CHECK_LAUNCHER_ARGS) is called by loader +# + +export LAUNCHER +@HAVE_PARALLEL_TRUE@export STARPU_SUB_PARALLEL + +export MS_LAUNCHER + +LAUNCHER ?= +MS_LAUNCHER ?= +@STARPU_HAVE_WINDOWS_FALSE@LOADER ?= ./loader + +LSAN_OPTIONS ?= suppressions=$(abs_top_srcdir)/tools/dev/lsan/suppressions +TSAN_OPTIONS ?= suppressions=$(abs_top_srcdir)/tools/dev/tsan/starpu.suppr +export LSAN_OPTIONS +export TSAN_OPTIONS + +%.concurrent.py: %.py + sed -e 's/async //g' -e 's/\ $@ + +all-local: \ + starpu_py.concurrent.py \ + starpu_py_handle.concurrent.py \ + starpu_py_np.concurrent.py \ + starpu_py_partition.concurrent.py \ + starpu_py_perfmodel.concurrent.py \ + starpu_py_numpy.concurrent.py + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/starpupy/examples/loader.c b/starpupy/examples/loader.c new file mode 100644 index 0000000..804797d --- /dev/null +++ b/starpupy/examples/loader.c @@ -0,0 +1,505 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if defined(_WIN32) && !defined(__MINGW32__) && !defined(__CYGWIN__) +#include +#else +#include +#endif + +#ifdef STARPU_QUICK_CHECK +/* Quick checks are supposed to be real quick, typically less than 1s each, sometimes 10s + add some extra times for tests which run with all schedulers +*/ +#define DEFAULT_TIMEOUT 100 +#elif !defined(STARPU_LONG_CHECK) +/* Normal checks are supposed to be short enough, typically less than 10s each, sometimes 1-2m */ +#define DEFAULT_TIMEOUT 300 +#else +/* Long checks can be very long */ +#define DEFAULT_TIMEOUT 1000 +#endif +#define AUTOTEST_SKIPPED_TEST 77 + +static pid_t child_pid = 0; +static int timeout; + +#if defined(_WIN32) && !defined(__MINGW32__) && !defined(__CYGWIN__) +static int mygettimeofday(struct timeval *tv, void *tz) +{ + if (tv) + { + FILETIME ft; + unsigned long long res; + GetSystemTimeAsFileTime(&ft); + /* 100-nanosecond intervals since January 1, 1601 */ + res = ft.dwHighDateTime; + res <<= 32; + res |= ft.dwLowDateTime; + res /= 10; + /* Now we have microseconds */ + res -= (((1970-1601)*365) + 89) * 24ULL * 3600ULL * 1000000ULL; + /* Now we are based on epoch */ + tv->tv_sec = res / 1000000ULL; + tv->tv_usec = res % 1000000ULL; + } +} +#else +#define mygettimeofday(tv,tz) gettimeofday(tv,tz) +#endif + +#ifdef STARPU_GDB_PATH +static int try_launch_gdb(const char *exe, const char *core) +{ +# define GDB_COMMANDS \ + "-ex", "py-list", \ + "-ex", "starpu-tasks", \ + "-ex", "starpu-workers", \ + "-ex", "starpu-print-datas-summary", \ + "-ex", "starpu-memusage", \ + "-ex", "starpu-print-archs", \ + "-ex", "starpu-print-registered-models", \ + "-ex", "bt full", \ + "-ex", "py-bt", \ + "-ex", "thread apply all bt full", \ + "-ex", "thread apply all py-bt", \ + + int err; + pid_t pid; + struct stat st; + const char *top_builddir; + char *gdb; + + err = stat(core, &st); + if (err != 0) + { + fprintf(stderr, "while looking for core file of %s: %s: %m\n", + exe, core); + return -1; + } + + if (!(st.st_mode & S_IFREG)) + { + fprintf(stderr, "%s: not a regular file\n", core); + return -1; + } + + top_builddir = getenv("top_builddir"); + + pid = fork(); + switch (pid) + { + case 0: /* kid */ + if (top_builddir != NULL) + { + /* Run gdb with Libtool. */ + gdb = alloca(strlen(top_builddir) + + sizeof("/libtool") + 1); + strcpy(gdb, top_builddir); + strcat(gdb, "/libtool"); + err = execl(gdb, "gdb", "--mode=execute", + STARPU_GDB_PATH, "--batch", + GDB_COMMANDS + exe, core, NULL); + } + else + { + /* Run gdb directly */ + gdb = STARPU_GDB_PATH; + err = execl(gdb, "gdb", "--batch", + GDB_COMMANDS + exe, core, NULL); + } + if (err != 0) + { + fprintf(stderr, "while launching `%s': %m\n", gdb); + exit(EXIT_FAILURE); + } + exit(EXIT_SUCCESS); + break; + + case -1: + fprintf(stderr, "fork: %m\n"); + return -1; + + default: /* parent */ + { + pid_t who; + int status; + who = waitpid(pid, &status, 0); + if (who != pid) + fprintf(stderr, "while waiting for gdb " + "process %d: %m\n", pid); + } + } + return 0; +# undef GDB_COMMANDS +} +#endif /* STARPU_GDB_PATH */ + +static void launch_gdb(const char *exe) +{ +#ifdef STARPU_GDB_PATH + char s[32]; + snprintf(s, sizeof(s), "core.%d", child_pid); + if (try_launch_gdb(exe, s) < 0) + try_launch_gdb(exe, "core"); +#endif /* STARPU_GDB_PATH */ +} + +static char *test_name; + +static void test_cleaner(int sig) +{ + pid_t child_gid; + int status; + (void) sig; + + // send signal to all loader family members + fprintf(stderr, "[error] test %s has been blocked for %d seconds. Mark it as failed\n", test_name, timeout); + child_gid = getpgid(child_pid); + kill(-child_gid, SIGQUIT); + waitpid(child_pid, &status, 0); + launch_gdb(test_name); + raise(SIGALRM); + exit(EXIT_FAILURE); +} + +static void forwardsig(int sig) +{ + pid_t child_gid; + child_gid = getpgid(child_pid); + kill(-child_gid, sig); +} + +static int _decode(char **src, char *motif, const char *value) +{ + char *found; + + found = strstr(*src, motif); + if (found == NULL) return 0; + + char *new_src = calloc(1, strlen(*src)-strlen(motif)+strlen(value)+1); + + strncpy(new_src, *src, found - *src); + strcat(new_src, value); + strcat(new_src, found+strlen(motif)); + + *src = new_src; + return 1; +} + +static void decode(char **src, char *motif, const char *value) +{ + if (*src) + { + if (strstr(*src, motif) && value == NULL) + { + fprintf(stderr, "error: $%s undefined\n", motif); + exit(EXIT_FAILURE); + } + int d = _decode(src, motif, value); + while (d) + d = _decode(src, motif, value); + } +} + +int main(int argc, char *argv[]) +{ + int child_exit_status; + char *test_args; + char *launcher; + char *launcher_args; + char *libtool; + char *cflags; + const char *top_builddir = getenv("top_builddir"); + struct sigaction sa; + int ret; + struct timeval start; + struct timeval end; + double timing; + int x=1; + int asan = 0, lsan = 0, tsan = 0, usan = 0; + + (void) argc; + test_args = NULL; + timeout = 0; + + launcher=getenv("STARPU_CHECK_LAUNCHER"); + launcher_args=getenv("STARPU_CHECK_LAUNCHER_ARGS"); + cflags = getenv("CFLAGS"); + if (cflags) + { + if (strstr(cflags, "-fsanitize=address")) + asan = 1; + if (strstr(cflags, "-fsanitize=leak")) + lsan = 1; + if (strstr(cflags, "-fsanitize=thread")) + tsan = 1; + if (strstr(cflags, "-fsanitize=undefined")) + usan = 1; + } + + if (argv[x] && strcmp(argv[x], "-t") == 0) + { + timeout = strtol(argv[x+1], NULL, 10); + x += 2; + } + else if (getenv("STARPU_TIMEOUT_ENV")) + { + /* get user-defined iter_max value */ + timeout = strtol(getenv("STARPU_TIMEOUT_ENV"), NULL, 10); + } + else if (timeout <= 0) + { + timeout = DEFAULT_TIMEOUT; + if ((launcher && strstr(launcher, "valgrind")) || + (launcher && strstr(launcher, "helgrind")) || + tsan) + timeout *= 20; + if (asan || usan || lsan || + (launcher && strstr(launcher, "compute-sanitizer"))) + timeout *= 5; + + if (timeout > 1750) + timeout = 1750; + } + +#ifdef STARPU_SIMGRID +#ifdef STARPU_DEBUG + timeout *= 20; +#endif +#endif + +#ifdef STARPU_USE_MPI_MASTER_SLAVE + /* compare values between the 2 values of timeout */ + if (getenv("MPIEXEC_TIMEOUT")) + { + int mpiexec_timeout = strtol(getenv("MPIEXEC_TIMEOUT"), NULL, 10); + if (mpiexec_timeout != timeout) + fprintf(stderr, "[warning] MPIEXEC_TIMEOUT and STARPU_TIMEOUT_ENV values are different (%d and %d). The behavior may be different than expected !\n", mpiexec_timeout, timeout); + } +#endif + + if (argv[x] && strcmp(argv[x], "-p") == 0) + { + test_name = malloc(strlen(argv[x+1]) + 1 + strlen(argv[x+2]) + 1); + sprintf(test_name, "%s/%s", argv[x+1], argv[x+2]); + x += 3; + } + else + { + test_name = argv[x]; + x += 1; + } + + if (!test_name) + { + fprintf(stderr, "[error] Need name of program to start\n"); + exit(EXIT_FAILURE); + } + + size_t len = strlen(test_name); + if (len >= 3 && + test_name[len-3] == '.' && + test_name[len-2] == 's' && + test_name[len-1] == 'h') + { + /* This is a shell script, don't run ourself on bash, but make + * the script call us for each program invocation */ + + char *launch = NULL; + if (top_builddir == NULL) + // this may fail if .libs is in the directory path + setenv("STARPU_LAUNCH", argv[0], 1); + else + { + launch = malloc(strlen(top_builddir) + strlen("/tests/loader") + 1); + strcpy(launch, top_builddir); + strcat(launch, "/tests/loader"); + setenv("STARPU_LAUNCH", launch, 1); + } + + execvp(test_name, argv+x-1); + + fprintf(stderr, "[error] '%s' failed to exec. test marked as failed\n", test_name); + free(launch); + exit(EXIT_FAILURE); + } + + if (strstr(test_name, "spmv/dw_block_spmv")) + { + test_args = (char *) calloc(512, sizeof(char)); + snprintf(test_args, 512, "%s/examples/spmv/matrix_market/examples/fidapm05.mtx", STARPU_SRC_DIR); + } + else if (strstr(test_name, "starpu_perfmodel_display")) + { + if (x >= argc) + test_args = strdup("-l"); + } + else if (strstr(test_name, "starpu_perfmodel_plot")) + { + if (x >= argc) + test_args = strdup("-l"); + } + + /* get launcher program */ + if (launcher_args) + launcher_args=strdup(launcher_args); + + if (top_builddir == NULL) + { + fprintf(stderr, + "warning: $top_builddir undefined, " + "so $STARPU_CHECK_LAUNCHER ignored\n"); + launcher = NULL; + launcher_args = NULL; + libtool = NULL; + } + else + { + libtool = malloc(strlen(top_builddir) + 1 + strlen("libtool") + 1); + strcpy(libtool, top_builddir); + strcat(libtool, "/libtool"); + } + + if (launcher) + { + const char *top_srcdir = getenv("top_srcdir"); + decode(&launcher, "@top_srcdir@", top_srcdir); + decode(&launcher_args, "@top_srcdir@", top_srcdir); + } + + setenv("STARPU_OPENCL_PROGRAM_DIR", STARPU_SRC_DIR, 1); + + /* set SIGALARM handler */ + sa.sa_flags = SA_RESETHAND | SA_NODEFER; + sigemptyset(&sa.sa_mask); + sa.sa_handler = test_cleaner; + if (-1 == sigaction(SIGALRM, &sa, NULL)) + perror("sigaction"); + + signal(SIGINT, forwardsig); + signal(SIGHUP, forwardsig); + signal(SIGPIPE, forwardsig); + signal(SIGTERM, forwardsig); + + child_pid = fork(); + if (child_pid == 0) + { + char *launcher_argv[100]; + int i=0; + + setpgid(0, 0); + + /* "Launchers" such as Valgrind need to be inserted + * after the Libtool-generated wrapper scripts, hence + * this special-case. */ + if (launcher && top_builddir != NULL) + { + launcher_argv[i++] = libtool; + launcher_argv[i++] = "--mode=execute"; + launcher_argv[i++] = launcher; + if (launcher_args) + { + launcher_argv[i++] = strtok(launcher_args, " "); + while (launcher_argv[i-1]) + { + launcher_argv[i++] = strtok(NULL, " "); + } + } + } + + launcher_argv[i++] = test_name; + if (test_args) + launcher_argv[i++] = test_args; + else while (argv[x]) + { + launcher_argv[i++] = argv[x++]; + } +#ifdef STARPU_SIMGRID +#ifdef STARPU_DEBUG + launcher_argv[i++] = "--cfg=contexts/factory:thread"; +#endif +#endif + launcher_argv[i++] = NULL; + execvp(*launcher_argv, launcher_argv); + + fprintf(stderr, "[error] '%s' failed to exec. test marked as failed\n", test_name); + exit(EXIT_FAILURE); + } + if (child_pid == -1) + { + fprintf(stderr, "[error] fork. test marked as failed\n"); + exit(EXIT_FAILURE); + } + free(test_args); + free(libtool); + + ret = EXIT_SUCCESS; + gettimeofday(&start, NULL); + alarm(timeout); + if (child_pid == waitpid(child_pid, &child_exit_status, 0)) + { + if (WIFEXITED(child_exit_status)) + { + int status = WEXITSTATUS(child_exit_status); + if (status == EXIT_SUCCESS) + { + alarm(0); + } + else + { + if (status != AUTOTEST_SKIPPED_TEST) + fprintf(stdout, "`%s' exited with return code %d\n", + test_name, status); + ret = status; + } + } + else if (WIFSIGNALED(child_exit_status)) + { + fprintf(stderr, "[error] `%s' killed with signal %d; test marked as failed\n", + test_name, WTERMSIG(child_exit_status)); + launch_gdb(test_name); + ret = EXIT_FAILURE; + } + else + { + fprintf(stderr, "[error] `%s' did not terminate normally; test marked as failed\n", + test_name); + ret = EXIT_FAILURE; + } + } + + gettimeofday(&end, NULL); + timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec)); + fprintf(stderr, "#Execution_time_in_seconds %f %s\n", timing/1000000, test_name); + + return ret; +} diff --git a/starpupy/examples/starpu_py.concurrent.sh b/starpupy/examples/starpu_py.concurrent.sh new file mode 100755 index 0000000..cc90a59 --- /dev/null +++ b/starpupy/examples/starpu_py.concurrent.sh @@ -0,0 +1,19 @@ +#!/bin/bash +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +$(dirname $0)/../execute.sh starpu_py.concurrent.py $* + diff --git a/starpupy/examples/starpu_py.py b/starpupy/examples/starpu_py.py new file mode 100644 index 0000000..eac7e31 --- /dev/null +++ b/starpupy/examples/starpu_py.py @@ -0,0 +1,161 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +from math import sqrt +import starpu +from starpu import starpupy +import time +import asyncio + +def await_fut(fut): + return fut.result() + +try: + starpu.init() +except Exception as e: + print(e) + exit(77) + +############################################################################ +#function no input no output print hello world +def hello(): + print ("Example 1:") + print ("Hello, world!") + +############################################################################# + +#function no input no output +def func1(): + print ("Example 2:") + print ("This is a function no input no output") + +############################################################################## + +#using decorator wrap the function no input no output +@starpu.delayed +def func1_deco(): + #time.sleep(1) + print ("Example 3:") + print ("This is a function no input no output wrapped by the decorator function") + +############################################################################## + +#function no input return a value +def func2(): + print ("Example 4:") + return 12 + +############################################################################### + +#function has 2 int inputs and 1 int output +def multi(a,b): + print ("Example 5:") + return a*b +#print(multi(2, 3)) + +############################################################################### + +#function has 4 float inputs and 1 float output +def add(a,b,c,d): + print ("Example 6:") + return a+b+c+d +#print(add(1.2, 2.5, 3.6, 4.9)) + +############################################################################### + +#function has 2 int inputs 1 float input and 1 float output 1 int output +def sub(a,b,c): + print ("Example 7:") + return a-b-c, a-b +#print(sub(6, 2, 5.9)) + +############################################################################### + +#using decorator wrap the function with input +@starpu.delayed(name="test") +def add_deco(a,b,c): + #time.sleep(1) + print ("Example 8:") + print ("This is a function with input and output wrapped by the decorator function:") + return a+b+c + +############################################################################### + +#using decorator wrap the function with input +@starpu.delayed(color=1) +def sub_deco(x,a): + print ("Example 9:") + print ("This is a function with input and output wrapped by the decorator function:") + return x-a + +############################################################################### + +async def main(): + #submit function "hello" + fut = starpu.task_submit()(hello) + await(fut) + + #submit function "func1" + fut1 = starpu.task_submit()(func1) + await(fut1) + + #apply starpu.delayed(func1_deco()) + await(func1_deco()) + + #submit function "func2" + fut2 = starpu.task_submit()(func2) + res2 = await(fut2) + #print the result of function + print("This is a function no input and the return value is", res2) + + #submit function "multi" + fut3 = starpu.task_submit()(multi, 2, 3) + res3 = await(fut3) + print("The result of function multi is :", res3) + + #submit function "add" + fut4 = starpu.task_submit()(add, 1.2, 2.5, 3.6, 4.9) + res4 = await(fut4) + print("The result of function add is :", res4) + + #submit function "sub" but only provide function name + fut5 = starpu.task_submit()(sub, 6, 2, 5.9) + res5 = await(fut5) + print("The result of function sub is:", res5) + + #apply starpu.delayed(add_deco) + fut6 = add_deco(1,2,3) + #res6 = await(fut6) + #print("The result of function is", res6) + + #apply starpu.delayed(sub_deco) + fut7 = sub_deco(fut6, 1) + res7 = await(fut7) + print("The first argument of this function is the result of Example 8") + print("The result of function is", res7) + + fut8 = starpu.task_submit()("sqrt", 4) + res8 = await(fut8) + print("The result of function sqrt is:", res8) + +try: + asyncio.run(main()) +except starpupy.error as e: + print("No worker to execute the job") + starpu.shutdown() + exit(77) + +starpu.shutdown() +#starpu.task_wait_for_all() diff --git a/starpupy/examples/starpu_py.sh b/starpupy/examples/starpu_py.sh new file mode 100755 index 0000000..ac8538f --- /dev/null +++ b/starpupy/examples/starpu_py.sh @@ -0,0 +1,19 @@ +#!/bin/bash +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +$(dirname $0)/../execute.sh examples/starpu_py.py $* + diff --git a/starpupy/examples/starpu_py_handle.concurrent.sh b/starpupy/examples/starpu_py_handle.concurrent.sh new file mode 100755 index 0000000..cff9c5a --- /dev/null +++ b/starpupy/examples/starpu_py_handle.concurrent.sh @@ -0,0 +1,19 @@ +#!/bin/bash +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +$(dirname $0)/../execute.sh starpu_py_handle.concurrent.py $* + diff --git a/starpupy/examples/starpu_py_handle.py b/starpupy/examples/starpu_py_handle.py new file mode 100644 index 0000000..3e1b842 --- /dev/null +++ b/starpupy/examples/starpu_py_handle.py @@ -0,0 +1,577 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +try: + import numpy as np +except (ModuleNotFoundError, ImportError): + print("Can't find \"Python3 NumPy\" module (consider running \"pip3 install numpy\" or refer to https://numpy.org/install/)") + np = None + +import starpu +from starpu import starpupy +from starpu import Handle +from starpu import HandleNumpy +import asyncio +import time +import array +import struct + +try: + starpu.init() +except Exception as e: + print(e) + exit(77) + +def await_fut(fut): + return fut.result() + +if starpupy.worker_get_count_by_type(starpu.STARPU_MPI_MS_WORKER) >= 1 or starpupy.worker_get_count_by_type(starpu.STARPU_TCPIP_MS_WORKER) >= 1: + print("This program does not work in MS mode") + starpu.shutdown() + exit(77) + +def show(x, y): + print("Function printing:", x, y) + +def add(x, y): + print ("Example add(x, y):") + return x + y + +# create Handle objects +x=2 +y=3 +x_h = Handle(x) +y_h = Handle(y) + +print("*************************") +print("constant handle:") +print("*************************") +# show function returns Handle +ret_h1 = starpu.task_submit(ret_handle=True)(show, "first argument is:", x_h) +print("show function returns:", ret_h1.get()) + +# return value is Handle +res1 = starpu.task_submit(ret_handle=True)(add, x_h, y_h) +print("result of Handle(2)+Handle(3) is:", res1.get()) + +# return value is Handle +res2 = starpu.task_submit(ret_handle=True)(add, res1, y_h) +print("result of res1+Handle(3) is:", res2.get()) + +# show function returns Handle +ret_h2 = starpu.task_submit(ret_handle=True)(show, res1, res2) + +print("*************************************") +print("constant handle return in parameter:") +print("*************************************") + +ret = Handle(0) +print("before calling function, ret value is:", ret.get()) +# return value as parameter +ret_n = starpu.task_submit(ret_param=True)(add, ret, x_h, y_h) +print("result of Handle(2)+Handle(3) is:", ret.get()) +print("return value of task_submit is:", ret_n) +assert ret.get() == x+y + +x_h.unregister() +y_h.unregister() + +ret_h1.unregister() +ret_h2.unregister() +res2.unregister() +ret.unregister() + +if np is not None: + ############################################################################################## + print("*************************") + print("Numpy array handle:") + print("*************************") + def scal(x, t): + for i in range(len(t)): + t[i] = t[i] * x + print ("Example scal(scalar, array):") + + t = np.arange(10) + + # create Handle object for Numpy array + t_h = Handle(t) + + # return value is Handle + res3 = starpu.task_submit(ret_handle=True)(scal, 2, t_h) + print("result of scal(2, Handle(np.arange(10)) is:", t_h.get()) + + # show function returns Future + async def main(): + res_fut1 = starpu.task_submit()(show, res1, t_h) + await(res_fut1) + asyncio.run(main()) + + t_h.unregister() + res1.unregister() + res3.unregister() + + ###################### + def arr_add(a,b): + for i in range(np.size(a)): + a[i] = a[i] + b[i] + + a = np.array([1, 2, 3]) + b = np.array([4, 5, 6]) + + # create Handle objects + a_h = Handle(a) + b_h = Handle(b) + + # two array element addition + res4 = starpu.task_submit(ret_handle=True)(arr_add, a_h, b_h) + print("result of adding two Handle(numpy.array) is:", a_h.get()) + + a_h.unregister() + b_h.unregister() + + res4.unregister() + + ####################### + def multi(x, y): + print ("Example multi(x, y):") + np.multiply(x, y, out=x) + + c = np.array([[1, 2], [3, 4]]) + d = np.array([[2, 2], [2, 2]]) + + # create Handle objects + c_h = Handle(c) + d_h = Handle(d) + + # two array element multiplication + res5 = starpu.task_submit(ret_handle=True)(multi, c_h, d_h) + print("result of multiplying two Handle(numpy.array) is:", c_h.get()) + + ######################## + @starpu.access(x="RW") + def matrix_multi(x, y): + print ("Example matrix_multi(x, y):") + np.dot(x, y, out=x) + + # two array matrix multiplication + res6 = starpu.task_submit(ret_handle=True)(matrix_multi, c_h, d_h) + print("result of matrix multiplying two Handle(numpy.array) is:", c_h.get()) + + # two array matrix multiplication (inverse order) + res7 = starpu.task_submit(ret_handle=True)(matrix_multi, d_h, c_h) + print("result of matrix multiplying two Handle(numpy.array) is:", d_h.get()) + + c_h.unregister() + d_h.unregister() + + res5.unregister() + res6.unregister() + res7.unregister() + + ###################################empty Numpy array handle##################################### + print("*************************") + print("empty Numpy array handle:") + print("*************************") + a1 = np.array([1, 2, 3, 4]) + a2 = np.array([[1, 2, 3], [4, 5, 6]]) + a3 = np.array([[[1, 2, 3], [4, 5, 6]],[[7, 8, 9], [10, 11, 12]]]) + + # create Handle objects + a1_h = Handle(a1) + a2_h = Handle(a2) + a3_h = Handle(a3) + + a1_r = a1_h.acquire(mode='R') + print("original 1-dimension array is:", a1_r) + a1_h.release() + a2_r = a2_h.acquire(mode='R') + print("original 2-dimension array is:", a2_r) + a2_h.release() + a3_r = a3_h.acquire(mode='R') + print("original 3-dimension array is:", a3_r) + a3_h.release() + + @starpu.access(b="W") + def assign(a,b): + for i in range(min(np.size(a), np.size(b))): + b[i] = a[i] + + @starpu.access(b="W") + def assign2(a,b): + for i in range(min(np.size(a,0), np.size(b,0))): + for j in range(min(np.size(a,1), np.size(b,1))): + b[i][j] = a[i][j] + + @starpu.access(b="W") + def assign3(a,b): + for i in range(min(np.size(a,0), np.size(b,0))): + for j in range(min(np.size(a,1), np.size(b,1))): + for k in range(min(np.size(a,2), np.size(b,2))): + b[i][j][k] = a[i][j][k] + + # generate empty arrays Handle object using HandleNumpy + # 1-dimension + e1_h = HandleNumpy(a1.shape, a1.dtype) + + res8 = starpu.task_submit(ret_handle=True)(assign, a1_h, e1_h) + e1_r = e1_h.acquire(mode='RW') + print("assigned 1-dimension array is:", e1_r) + # e1_h is writeable, we modify the first element + e1_r[0] = 100 + print("the first element of 1-dimension array is modified to 100:", e1_r) + e1_h.release() + + # 2-dimension + e2_h = HandleNumpy(a2.shape, a2.dtype) + res9 = starpu.task_submit(ret_handle=True)(assign2, a2_h, e2_h) + e2_r = e2_h.acquire(mode='R') + print("assigned 2-dimension array is", e2_r) + e2_h.release() + + # 3-dimension + e3_h = HandleNumpy(a3.shape, a3.dtype) + res10 = starpu.task_submit(ret_handle=True)(assign3, a3_h, e3_h) + e3_r = e3_h.acquire(mode='R') + print("assigned 3-dimension array is", e3_r) + e3_h.release() + + a1_h.unregister() + a2_h.unregister() + a3_h.unregister() + e1_h.unregister() + e2_h.unregister() + e3_h.unregister() + + res8.unregister() + res9.unregister() + res10.unregister() + +##################################bytes handle############################################ +print("*************************") +print("bytes handle:") +print("*************************") +bt1 = bytes([1,2]) +bt2 = bytes([3,4]) + +bt1_h = Handle(bt1) +bt2_h = Handle(bt2) + +bt1_r = bt1_h.acquire(mode='R') +print("first bytes object is", bt1_r) +bt1_h.release() + +bt2_r = bt2_h.acquire(mode='R') +print("second bytes object is", bt2_r) +bt2_h.release() + +ret_bt1 = starpu.task_submit(ret_handle=True)(add, bt1_h, bt2_h) +print("result of appending two bytes: ", ret_bt1.get()) + +def bytes_add(x, y): + z = bytearray(len(x)) + for i in range (len(x)): + z[i] = x[i] + y[i] + return bytes(z) + +ret_bt2 = starpu.task_submit(ret_handle=True)(bytes_add, bt1_h, bt2_h) +print("result of adding two bytes elements: ", ret_bt2.get()) + +bt1_h.unregister() +bt2_h.unregister() + +ret_bt1.unregister() +ret_bt2.unregister() + +####################################bytearray handle######################################### +print("*************************") +print("bytearray handle:") +print("*************************") +bta1 = bytearray([1,2]) +bta2 = bytearray([3,4]) + +bta1_h = Handle(bta1) +bta2_h = Handle(bta2) + +bta1_r = bta1_h.acquire(mode='RW') +print("first bytearray object is", bta1_r) +bta1[0] = 0 +bta1_h.release() +bta11_r = bta1_h.acquire(mode='R') +print("first bytearray object is modified", bta11_r) +bta1_h.release() + +bta2_r = bta2_h.acquire(mode='R') +print("second bytearray object is", bta2_r) +bta2_h.release() + +def bytearray_add(x, y): + z = bytearray(len(x)) + for i in range (len(x)): + z[i] = x[i] + y[i] + return z + +ret_bta1 = starpu.task_submit(ret_handle=True)(bytearray_add, bta1_h, bta2_h) +print("result of adding two bytearray elements: ", ret_bta1.get()) + +bta1_h.unregister() +bta2_h.unregister() + +ret_bta1.unregister() + +##################################array.array handle########################################## +print("*************************") +print("array.array handle:") +print("*************************") +arr1 = array.array('i', [1, 2, 3, 4]) +arr2 = array.array('i', [2, 2, 2, 2]) +arr3 = array.array('f', [4.5, 5.5, 6.5]) + +arr4 = array.array('u', 'hello') + +def arrarr_add(x, y): + for i in range (len(x)): + x[i] = x[i] + y[i] + #time.sleep(1) + return x + +def arrarr_multi(x, y): + for i in range (len(x)): + x[i] = x[i] * y[i] + return x + +def arrarr_scal(x, s): + for i in range (len(x)): + x[i] = x[i] * s + return x + +arr1_h = Handle(arr1) +arr1_r = arr1_h.acquire(mode='RW') +print("first array.array object is", arr1_r) +arr1[0] = 0 +arr1_h.release() +arr11_r = arr1_h.acquire(mode='R') +print("first array.array object is modified", arr11_r) +arr1_h.release() + +arr2_h = Handle(arr2) +arr2_r = arr2_h.acquire(mode='R') +print("second array.array object is", arr2_r) +arr2_h.release() + +arr3_h = Handle(arr3) +arr3_r = arr3_h.acquire(mode='R') +print("third array.array object is", arr3_r) +arr3_h.release() + +arr4_h = Handle(arr4) +arr4_r = arr4_h.acquire(mode='R') +print("fourth array.array object is", arr4_r) +arr4_h.release() + +ret_arr1 = starpu.task_submit(ret_handle=True)(arrarr_add, arr1_h, arr2_h) +print("result of adding two array.array elements: ", ret_arr1.get()) + +ret_arr2 = starpu.task_submit(ret_handle=True)(arrarr_multi, arr1_h, arr2_h) +print("result of multiplying two array.array elements: ", ret_arr2.get()) + +ret_arr3 = starpu.task_submit(ret_handle=True)(arrarr_scal, arr3_h, 2) +print("result of multiplying array.array element by a scalar: ", ret_arr3.get()) + +arr1_h.unregister() +arr2_h.unregister() +arr3_h.unregister() +arr4_h.unregister() + +ret_arr1.unregister() +ret_arr2.unregister() +ret_arr3.unregister() + +##################################memoryview handle########################################### +print("*************************") +print("memoryview handle:") +print("*************************") +m1 = memoryview(bytearray("hello", 'utf-8')) +m1_tb = m1.tobytes() +print("m1 to bytes is", m1_tb) + +m2 = memoryview(array.array('i', [1, 2, 3, 4])) +m2_tl = m2.tolist() +print("m2 to list is", m2_tl) + +m3 = memoryview(array.array('u', 'hello')) + +m1_h = Handle(m1) +print("m1 is", m1_h.acquire(mode='RW')) +m1[0] = 100 +m1_h.release() + +print("m1 to bytes after modifying is", m1_tb) +print("m1 after modifying is", m1_h.acquire(mode='RW')) +m1_h.release() + +m2_h = Handle(m2) +print("m2 is", m2_h.acquire(mode='R')) +m2_h.release() + +m3_h = Handle(m3) +print("m3 is", m3_h.acquire(mode='R')) +m3_h.release() + +# multi dimension +def mem_show(x): + print("memory is", x) + +buf = struct.pack("L"*12, *list(range(12))) +x = memoryview(buf) +# 2-dimension +y = x.cast('L', shape=[3,4]) +# 3-dimension +z = x.cast('L', shape=[2,3,2]) +print(y.tolist()) +print(z.tolist()) + +y_h = Handle(y) +ret_m1 = starpu.task_submit(ret_handle=True)(mem_show, y_h) + +print("y is", y_h.acquire(mode='R')) +y_h.release() + +z_h = Handle(z) +ret_m2 = starpu.task_submit(ret_handle=True)(mem_show, z_h) + +print("z is", z_h.acquire(mode='R')) +z_h.release() + +m1_h.unregister() +m2_h.unregister() +m3_h.unregister() + +y_h.unregister() +z_h.unregister() + +ret_m1.unregister() +ret_m2.unregister() + +if np is not None: + #####################################access mode annotation################################### + print("*************************") + print("access mode annotation:") + print("*************************") + a = np.array([1, 2, 3, 4]) + a_h = Handle(a) + e_h = HandleNumpy(a.shape, a.dtype) + + a_r = a_h.acquire(mode='R') + print("original array is:", a_r) + a_h.release() + + ######access##### + print("------------------") + print("access decorator:") + print("------------------") + @starpu.access(a="R", b="W") + def assign(a,b): + for i in range(min(np.size(a), np.size(b))): + b[i]=a[i] + + res11 = starpu.task_submit(ret_handle=True)(assign, a_h, e_h) + + e_r = e_h.acquire(mode='RW') + print("assigned 1-dimension array is:", e_r) + e_h.release() + + ######delayed####### + print("------------------") + print("delayed decorator:") + print("------------------") + @starpu.delayed(ret_handle=True, a="R", b="W") + def assign(a,b): + for i in range(min(np.size(a), np.size(b))): + b[i]=a[i] + + res12 = assign(a_h, e_h) + + e_r = e_h.acquire(mode='RW') + print("assigned 1-dimension array is:", e_r) + e_h.release() + + ######set access###### + print("------------------") + print("access function:") + print("------------------") + def assign(a,b): + for i in range(min(np.size(a), np.size(b))): + b[i]=a[i] + + assign_access=starpu.set_access(assign, a="R", b="W") + res13 = starpu.task_submit(ret_handle=True)(assign_access, a_h, e_h) + + e_r = e_h.acquire(mode='RW') + print("assigned 1-dimension array is:", e_r) + e_h.release() + + a_h.unregister() + e_h.unregister() + + res11.unregister() + res12.unregister() + res13.unregister() + + #######################Numpy without explicit handle############################ + print("*******************************") + print("Numpy without explicit handle:") + print("*******************************") + arrh1 = np.array([1, 2, 3]) + arrh2 = np.array([4, 5, 6]) + + @starpu.access(a="RW", b="R") + def np_add(a, b): + #time.sleep(2) + for i in range(np.size(a)): + a[i] = a[i] + b[i] + + print("First argument before task submitting is", starpu.acquire(arrh1, mode='R')) + #a[0]=100 + starpu.release(arrh1) + # without explicit handle + res14 = starpu.task_submit(ret_handle=True)(np_add, arrh1, arrh2) + + print("First argument after task submitting is", starpu.acquire(arrh1, mode='R')) + starpu.release(arrh1) + + # it's mandatory to call unregister when the argument is no longer needed to access, but it's not obligatory, calling starpupy.shutdown() in the end is enough, which will unregister all no-explicit handle + starpu.unregister(arrh1) + + res14.unregister() + + #######################Numpy without using handle############################### + print("*******************************") + print("Numpy without using handle:") + print("*******************************") + npa1 = np.array([1, 2, 3]) + npa2 = np.array([4, 5, 6]) + + print("First argument before task submitting is", npa1) + # without using handle, set option arg_handle to False + res15 = starpu.task_submit(arg_handle=False, ret_handle=True)(np_add, npa1, npa2) + print("First argument after task submitting is", npa1) + #print("The addition result is", res15.get()) + + res15.unregister() + +######################### + +starpu.shutdown() diff --git a/starpupy/examples/starpu_py_handle.sh b/starpupy/examples/starpu_py_handle.sh new file mode 100755 index 0000000..09885f4 --- /dev/null +++ b/starpupy/examples/starpu_py_handle.sh @@ -0,0 +1,19 @@ +#!/bin/bash +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +$(dirname $0)/../execute.sh examples/starpu_py_handle.py $* + diff --git a/starpupy/examples/starpu_py_np.concurrent.sh b/starpupy/examples/starpu_py_np.concurrent.sh new file mode 100755 index 0000000..fb9823a --- /dev/null +++ b/starpupy/examples/starpu_py_np.concurrent.sh @@ -0,0 +1,19 @@ +#!/bin/bash +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +$(dirname $0)/../execute.sh starpu_py_np.concurrent.py $* + diff --git a/starpupy/examples/starpu_py_np.py b/starpupy/examples/starpu_py_np.py new file mode 100644 index 0000000..2717b93 --- /dev/null +++ b/starpupy/examples/starpu_py_np.py @@ -0,0 +1,97 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +try: + import numpy as np +except (ModuleNotFoundError, ImportError): + print("Can't find \"Python3 NumPy\" module (consider running \"pip3 install numpy\" or refer to https://numpy.org/install/)") + exit(77) + +import starpu +from starpu import starpupy +import asyncio + +try: + starpu.init() +except Exception as e: + print(e) + exit(77) + +def await_fut(fut): + return fut.result() + +############################################################################### + +def scal(x, t): + for i in range(len(t)): + t[i] = t[i] * x + print ("Example scal(scalar, array):") + return t + +def add(x, y): + print ("Example add(array, array):") + return x + y + +def multi(x, y): + print ("Example multi(array, array):") + return x * y + +def matrix_multi(x, y): + print ("Example matrix_multi(array, array):") + return x @ y + +t = np.arange(10) + +a = np.array([1, 2, 3]) +b = np.array([4, 5, 6]) + +c = np.array([[1, 2], [3, 4]]) +d = np.array([[2, 2], [2, 2]]) + +async def main(): + fut1 = starpu.task_submit()(scal, 2, t) + res1 = await(fut1) + print("The result is", res1) + + # two array element addition + fut2 = starpu.task_submit()(add, a, b) + res2 = await(fut2) + print("The result is", res2) + + # two array element multiplication + fut3 = starpu.task_submit()(multi, c, d) + res3 = await(fut3) + print("The result is", res3) + + # two array matrix multiplication + fut4 = starpu.task_submit()(matrix_multi, c, d) + res4 = await(fut4) + print("The result is", res4) + + # two array matrix multiplication (inverse order) + fut5 = starpu.task_submit()(matrix_multi, d, c) + res5 = await(fut5) + print("The result is", res5) + + +try: + asyncio.run(main()) +except starpupy.error as e: + print("No worker to execute the job") + starpupy.shutdown() + exit(77) + +starpu.shutdown() diff --git a/starpupy/examples/starpu_py_np.sh b/starpupy/examples/starpu_py_np.sh new file mode 100755 index 0000000..bd02b95 --- /dev/null +++ b/starpupy/examples/starpu_py_np.sh @@ -0,0 +1,19 @@ +#!/bin/bash +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +$(dirname $0)/../execute.sh examples/starpu_py_np.py $* + diff --git a/starpupy/examples/starpu_py_numpy.concurrent.sh b/starpupy/examples/starpu_py_numpy.concurrent.sh new file mode 100755 index 0000000..74841b5 --- /dev/null +++ b/starpupy/examples/starpu_py_numpy.concurrent.sh @@ -0,0 +1,19 @@ +#!/bin/bash +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +$(dirname $0)/../execute.sh starpu_py_numpy.concurrent.py $* + diff --git a/starpupy/examples/starpu_py_numpy.py b/starpupy/examples/starpu_py_numpy.py new file mode 100644 index 0000000..991be64 --- /dev/null +++ b/starpupy/examples/starpu_py_numpy.py @@ -0,0 +1,48 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +try: + import numpy as np +except (ModuleNotFoundError, ImportError): + print("Can't find \"Python3 NumPy\" module (consider running \"pip3 install numpy\" or refer to https://numpy.org/install/)") + exit(77) + +import starpu +from starpu import Handle +import asyncio + +try: + starpu.init() +except Exception as e: + print(e) + exit(77) + +@starpu.access(a="RW",b="R") +def arr_add(a,b): + for i in range(np.size(a)): + a[i] = a[i] + b[i] + +a_h = Handle(np.array([1, 2, 3, 4])) +b_h = Handle(np.array([5, 6, 7, 8])) + +starpu.task_submit(ret_fut=False)(arr_add, a_h, b_h) + +print("Array is", a_h.get()) + +a_h.unregister() +b_h.unregister() + +starpu.shutdown() diff --git a/starpupy/examples/starpu_py_numpy.sh b/starpupy/examples/starpu_py_numpy.sh new file mode 100755 index 0000000..890774d --- /dev/null +++ b/starpupy/examples/starpu_py_numpy.sh @@ -0,0 +1,19 @@ +#!/bin/bash +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +$(dirname $0)/../execute.sh examples/starpu_py_numpy.py $* + diff --git a/starpupy/examples/starpu_py_parallel.py b/starpupy/examples/starpu_py_parallel.py new file mode 100644 index 0000000..d63ffa9 --- /dev/null +++ b/starpupy/examples/starpu_py_parallel.py @@ -0,0 +1,403 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +try: + import numpy as np +except (ModuleNotFoundError, ImportError): + print("Can't find \"Python3 NumPy\" module (consider running \"pip3 install numpy\" or refer to https://numpy.org/install/)") + np = None + +import starpu +try: + import starpu.joblib +except (ModuleNotFoundError, ImportError): + print("Can't find starpu.joblib\" module (consider running \"pip3 install joblib\")") + exit(77) +from starpu import starpupy +import time +import asyncio +from math import sqrt +from math import log10 +import sys + +try: + starpu.init() +except Exception as e: + print(e) + exit(77) + +def await_fut(fut): + return fut.result() + +#generate a list to store functions +g_func=[] + +#function no input no output print hello world +def hello(): + print ("Example 1: Hello, world!") +g_func.append(starpu.joblib.delayed(hello)()) + +#function no input no output +def func1(): + print ("Example 2: This is a function no input no output") +g_func.append(starpu.joblib.delayed(func1)()) + +#function no input return a value +def func2(): + print ("Example 3:") + return 12 +g_func.append(starpu.joblib.delayed(func2)()) + +#function has 2 int inputs and 1 int output +def exp(a,b): + res_exp=a**b + print("Example 4: The result of ",a,"^",b,"is",res_exp) + return res_exp +g_func.append(starpu.joblib.delayed(exp)(2, 3)) + +#function has 4 float inputs and 1 float output +def add(a,b,c,d): + res_add=a+b+c+d + print("Example 5: The result of ",a,"+",b,"+",c,"+",d,"is",res_add) + return res_add +g_func.append(starpu.joblib.delayed(add)(1.2, 2.5, 3.6, 4.9)) + +#function has 2 int inputs 1 float input and 1 float output 1 int output +def sub(a,b,c): + res_sub1=a-b-c + res_sub2=a-b + print ("Example 6: The result of ",a,"-",b,"-",c,"is",res_sub1,"and the result of",a,"-",b,"is",res_sub2) + return res_sub1, res_sub2 +g_func.append(starpu.joblib.delayed(sub)(6, 2, 5.9)) + +##########functions of array calculation############### +def scal(a, t): + for i in range(len(t)): + t[i]=t[i]*a + return t + +@starpu.access(t="RW") +def scal_np(a, t): + for i in range(len(t)): + t[i]=t[i]*a + +@starpu.access(t1="RW") +def add_scal(a, t1, t2): + for i in range(len(t1)): + t1[i]=t1[i]*a+t2[i] + #return t1 + +@starpu.access(t="RW") +def scal_arr(a, t): + for i in range(len(t)): + t[i]=t[i]*a[i] + +def multi(a,b): + res_multi=a*b + return res_multi + +def multi_2arr(a, b): + for i in range(len(a)): + a[i]=a[i]*b[i] + return a + +@starpu.access(a="RW") +def multi_2np(a, b): + for i in range(len(a)): + a[i]=a[i]*b[i] + +def multi_list(l): + res = [] + for (a,b) in l: + res.append(a*b) + return res + +@starpu.access(t="RW") +def log10_arr(t): + for i in range(len(t)): + t[i]=log10(t[i]) + +######################################################## + +displayPlot=False +listX=[10, 100] +for arg in sys.argv[1:]: + if arg == "-long": + listX = [10, 100, 1000, 10000, 100000, 1000000, 10000000] + if arg == "-plot": + displayPlot=True + +if np is not None: + #################scikit test################### + # DEFAULT_JOBLIB_BACKEND = starpu.joblib.get_active_backend()[0].__class__ + # class MyBackend(DEFAULT_JOBLIB_BACKEND): # type: ignore + # def __init__(self, *args, **kwargs): + # self.count = 0 + # super().__init__(*args, **kwargs) + + # def start_call(self): + # self.count += 1 + # return super().start_call() + + # starpu.joblib.register_parallel_backend('testing', MyBackend) + + # with starpu.joblib.parallel_backend("testing") as (ba, n_jobs): + # print("backend and n_jobs is", ba, n_jobs) + ############################################### + + N=100 + # A=np.arange(N) + # B=np.arange(N) + # a=np.arange(N) + # b=np.arange(N, 2*N, 1) + + for x in listX: + for X in range(x, x*10, x): + #print("X=",X) + try : + starpu.joblib.Parallel(mode="normal", n_jobs=-1, perfmodel="log_list")(starpu.joblib.delayed(log10)(i+1)for i in range(X)) + A=np.arange(1,X+1,1) + starpu.joblib.Parallel(mode="normal", n_jobs=-1, perfmodel="log_arr")(starpu.joblib.delayed(log10_arr)(A)) + except starpupy.error as e: + print("No worker to execute the job") + exit(77) + + print("************************") + print("parallel Normal version:") + print("************************") + print("--(sqrt)(i**2)for i in range(N)") + start_exec1=time.time() + start_cpu1=time.process_time() + starpu.joblib.Parallel(mode="normal", n_jobs=-1, perfmodel="sqrt")(starpu.joblib.delayed(sqrt)(i**2)for i in range(N)) + end_exec1=time.time() + end_cpu1=time.process_time() + print("the program execution time is", end_exec1-start_exec1) + print("the cpu execution time is", end_cpu1-start_cpu1) + + print("--(multi)(i,j) for i,j in zip(a,b)") + a=np.arange(N) + b=np.arange(N, 2*N, 1) + start_exec2=time.time() + start_cpu2=time.process_time() + starpu.joblib.Parallel(mode="normal", n_jobs=-1, perfmodel="multi")(starpu.joblib.delayed(multi)(i,j) for i,j in zip(a,b)) + end_exec2=time.time() + end_cpu2=time.process_time() + print("the program execution time is", end_exec2-start_exec2) + print("the cpu execution time is", end_cpu2-start_cpu2) + + print("--(scal_arr)((i for i in b), A)") + A=np.arange(N) + b=np.arange(N, 2*N, 1) + print("The input array is", A) + start_exec3=time.time() + start_cpu3=time.process_time() + starpu.joblib.Parallel(mode="normal", n_jobs=-1, perfmodel="scal_arr")(starpu.joblib.delayed(scal_arr)((i for i in b), A)) + end_exec3=time.time() + end_cpu3=time.process_time() + print("The return array is", A) + print("the program execution time is", end_exec3-start_exec3) + print("the cpu execution time is", end_cpu3-start_cpu3) + + print("--(multi_list)((i,j) for i,j in zip(a,b))") + a=np.arange(N) + b=np.arange(N, 2*N, 1) + start_exec4=time.time() + start_cpu4=time.process_time() + starpu.joblib.Parallel(mode="normal", n_jobs=-1, perfmodel="multi_list")(starpu.joblib.delayed(multi_list)((i,j) for i,j in zip(a,b))) + end_exec4=time.time() + end_cpu4=time.process_time() + print("the program execution time is", end_exec4-start_exec4) + print("the cpu execution time is", end_cpu4-start_cpu4) + + print("--(multi_2arr)((i for i in a), (j for j in b))") + a=np.arange(N) + b=np.arange(N, 2*N, 1) + start_exec5=time.time() + start_cpu5=time.process_time() + starpu.joblib.Parallel(mode="normal", n_jobs=-1, perfmodel="multi_2arr")(starpu.joblib.delayed(multi_2arr)((i for i in a), (j for j in b))) + end_exec5=time.time() + end_cpu5=time.process_time() + print("the program execution time is", end_exec5-start_exec5) + print("the cpu execution time is", end_cpu5-start_cpu5) + + print("--(multi_2np)(A, B)") + # A=np.arange(N) + # B=np.arange(N, 2*N, 1) + n, m = 4, 5 + A = np.arange(n*m).reshape(n, m) + B = np.arange(n*m, 2*n*m, 1).reshape(n, m) + print("The input arrays are A", A, "B", B) + start_exec6=time.time() + start_cpu6=time.process_time() + starpu.joblib.Parallel(mode="normal", n_jobs=-1, perfmodel="multi_2arr")(starpu.joblib.delayed(multi_2np)(A, B)) + end_exec6=time.time() + end_cpu6=time.process_time() + print("The return array is", A) + print("the program execution time is", end_exec6-start_exec6) + print("the cpu execution time is", end_cpu6-start_cpu6) + + print("--(scal)(2, t=(j for j in a))") + a=np.arange(N) + start_exec7=time.time() + start_cpu7=time.process_time() + starpu.joblib.Parallel(mode="normal", n_jobs=-1, perfmodel="scal")(starpu.joblib.delayed(scal)(2, t=(j for j in a))) + end_exec7=time.time() + end_cpu7=time.process_time() + print("the program execution time is", end_exec7-start_exec7) + print("the cpu execution time is", end_cpu7-start_cpu7) + + print("--(scal_np)(2,A)") + A=np.arange(N) + print("The input is", A) + start_exec8=time.time() + start_cpu8=time.process_time() + starpu.joblib.Parallel(mode="normal", n_jobs=-1, perfmodel="scal")(starpu.joblib.delayed(scal_np)(2,A)) + end_exec8=time.time() + end_cpu8=time.process_time() + print("The return array is", A) + print("the program execution time is", end_exec8-start_exec8) + print("the cpu execution time is", end_cpu8-start_cpu8) + + print("--(add_scal)(t1=A,t2=B,a=2)") + A=np.arange(N) + B=np.arange(N) + print("The input arrays are t1", A, "t2", B) + start_exec9=time.time() + start_cpu9=time.process_time() + starpu.joblib.Parallel(mode="normal", n_jobs=-1, perfmodel="add_scal")(starpu.joblib.delayed(add_scal)(t1=A,t2=B,a=2)) + end_exec9=time.time() + end_cpu9=time.process_time() + print("The return array is", A) + print("the program execution time is", end_exec9-start_exec9) + print("the cpu execution time is", end_cpu9-start_cpu9) + + + print("--input is iterable function list") + start_exec10=time.time() + start_cpu10=time.process_time() + starpu.joblib.Parallel(mode="normal", n_jobs=-1, perfmodel="func")(g_func) + end_exec10=time.time() + end_cpu10=time.process_time() + print("the program execution time is", end_exec10-start_exec10) + print("the cpu execution time is", end_cpu10-start_cpu10) + + # def producer(): + # for i in range(6): + # print('Produced %s' % i) + # yield i + #starpu.joblib.Parallel(n_jobs=2)(starpu.joblib.delayed(sqrt)(i) for i in producer()) + + print("************************") + print("parallel Future version:") + print("************************") + async def main(): + + print("--(sqrt)(i**2)for i in range(N)") + fut1=starpu.joblib.Parallel(mode="future", n_jobs=-1, perfmodel="sqrt")(starpu.joblib.delayed(sqrt)(i**2)for i in range(N)) + res1=await(fut1) + print("The result is", sum(res1,[])) + + print("--(multi)(i,j) for i,j in zip(a,b)") + a=np.arange(N) + b=np.arange(N, 2*N, 1) + print("The inputs are a", a, "b", b) + fut2=starpu.joblib.Parallel(mode="future", n_jobs=-1, perfmodel="multi")(starpu.joblib.delayed(multi)(i,j) for i,j in zip(a,b)) + res2=await(fut2) + print("The result is", sum(res2,[])) + + print("--(scal_arr)((i for i in b), A)") + A=np.arange(N) + b=np.arange(N, 2*N, 1) + print("The input arrays are A", A, "b", b) + fut3=starpu.joblib.Parallel(mode="future", n_jobs=-1, perfmodel="scal_arr")(starpu.joblib.delayed(scal_arr)((i for i in b), A)) + res3=await(fut3) + #print("The return array is", np.concatenate(res3)) + print("The return array is", A) + + print("--(multi_list)((i,j) for i,j in zip(a,b))") + a=np.arange(N) + b=np.arange(N, 2*N, 1) + print("The input lists are a", a, "b", b) + fut4=starpu.joblib.Parallel(mode="future", n_jobs=-1, perfmodel="multi_list")(starpu.joblib.delayed(multi_list)((i,j) for i,j in zip(a,b))) + res4=await(fut4) + print("The result is", sum(res4,[])) + + print("--(multi_2arr)((i for i in a), (j for j in b))") + a=np.arange(N) + b=np.arange(N, 2*N, 1) + print("The input lists are a", a, "b", b) + fut5=starpu.joblib.Parallel(mode="future", n_jobs=-1, perfmodel="multi_2arr")(starpu.joblib.delayed(multi_2arr)((i for i in a), (j for j in b))) + res5=await(fut5) + print("The result is", sum(res5,[])) + + print("--(multi_2np)(b=B, a=A)") + A=np.arange(N) + B=np.arange(N, 2*N, 1) + print("The input arrays are A", A, "B", B) + fut6=starpu.joblib.Parallel(mode="future", n_jobs=-1, perfmodel="multi_2arr")(starpu.joblib.delayed(multi_2np)(b=B, a=A)) + res6=await(fut6) + #print("The return array is", np.concatenate(res6)) + print("The return array is", A) + + + print("--(scal)(2, (j for j in a))") + a=np.arange(N) + print("The input list is a", a) + fut7=starpu.joblib.Parallel(mode="future", n_jobs=-1, perfmodel="scal")(starpu.joblib.delayed(scal)(2, (j for j in a))) + res7=await(fut7) + print("The result is", sum(res7,[])) + + print("--(scal_np)(2,t=A)") + A=np.arange(N) + print("The input array is", A) + fut8=starpu.joblib.Parallel(mode="future", n_jobs=-1, perfmodel="scal")(starpu.joblib.delayed(scal_np)(2,t=A)) + res8=await(fut8) + #print("The return array is", np.concatenate(res8)) + print("The return array is", A) + + print("--(scal)(2,A,B)") + A=np.arange(N) + B=np.arange(N) + print("The input arrays are A", A, "B", B) + fut9=starpu.joblib.Parallel(mode="future", n_jobs=-1, perfmodel="add_scal")(starpu.joblib.delayed(add_scal)(2,A,B)) + res9=await(fut9) + #print("The return array is", np.concatenate(res9)) + print("The return array is", A) + + print("--input is iterable function list") + fut10=starpu.joblib.Parallel(mode="future", n_jobs=-1, perfmodel="func")(g_func) + res10=await(fut10) + #print(res9) + + try: + asyncio.run(main()) + except starpupy.error as e: + starpu.shutdown() + exit(77) + + starpu.perfmodel_plot(perfmodel="sqrt",view=displayPlot) + starpu.perfmodel_plot(perfmodel="multi",view=displayPlot) + starpu.perfmodel_plot(perfmodel="scal_arr",view=displayPlot) + starpu.perfmodel_plot(perfmodel="multi_list",view=displayPlot) + starpu.perfmodel_plot(perfmodel="multi_2arr",view=displayPlot) + starpu.perfmodel_plot(perfmodel="scal",view=displayPlot) + starpu.perfmodel_plot(perfmodel="add_scal",view=displayPlot) + starpu.perfmodel_plot(perfmodel="func",view=displayPlot) + + starpu.perfmodel_plot(perfmodel="log_list",view=displayPlot) + starpu.perfmodel_plot(perfmodel="log_arr",view=displayPlot) + +starpu.shutdown() diff --git a/starpupy/examples/starpu_py_parallel.sh b/starpupy/examples/starpu_py_parallel.sh new file mode 100755 index 0000000..b7c4593 --- /dev/null +++ b/starpupy/examples/starpu_py_parallel.sh @@ -0,0 +1,19 @@ +#!/bin/bash +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +STARPU_CALIBRATE=1 $(dirname $0)/../execute.sh examples/starpu_py_parallel.py $* + diff --git a/starpupy/examples/starpu_py_partition.concurrent.sh b/starpupy/examples/starpu_py_partition.concurrent.sh new file mode 100755 index 0000000..b036e01 --- /dev/null +++ b/starpupy/examples/starpu_py_partition.concurrent.sh @@ -0,0 +1,19 @@ +#!/bin/bash +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +$(dirname $0)/../execute.sh starpu_py_partition.concurrent.py $* + diff --git a/starpupy/examples/starpu_py_partition.py b/starpupy/examples/starpu_py_partition.py new file mode 100644 index 0000000..610120d --- /dev/null +++ b/starpupy/examples/starpu_py_partition.py @@ -0,0 +1,91 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +try: + import numpy as np +except (ModuleNotFoundError, ImportError): + print("Can't find \"Python3 NumPy\" module (consider running \"pip3 install numpy\" or refer to https://numpy.org/install/)") + exit(77) + +import starpu +from starpu import Handle +from starpu import HandleNumpy +import asyncio +import time +import array +import struct + +try: + starpu.init() +except Exception as e: + print(e) + exit(77) + +def await_fut(fut): + return fut.result() + +# 1-dimension +# arr = np.arange(20) + +# 2-dimension +# n, m = 20, 10 +# arr = np.arange(n*m).reshape(n, m) + +# 3-dimension +# x, y, z = 10, 15, 20 +# arr = np.arange(x*y*z).reshape(x, y, z) + +# 4-dimension +x, y, z, t = 10, 5, 10, 20 +arr = np.arange(x*y*z*t).reshape(x, y, z, t) +print("input array is", arr) + +arr_h = Handle(arr) + +# split into split_num of sub handles +split_num = 3 +#arr_h_list = arr_h.partition(split_num, 0, [6,6,8]) +arr_h_list = arr_h.partition(split_num, 0, [3,2,5]) + +n_arr = arr_h.get_partition_size(arr_h_list) + +print("partition size is", n_arr) + +def show(x): + print("Function printing:", x) + +@starpu.access(a="RW") +def add(a,b): + np.add(a,b,out=a) + +for i in range(split_num): + starpu.task_submit(ret_handle=False,ret_fut=False)(add, arr_h_list[i], arr_h_list[i]) + +# async def main(): +# for i in range(split_num): +# res=starpu.task_submit()(add, arr_h_list[i], arr_h_list[i]) +# res1=await(res) +# asyncio.run(main()) + +arr_r = arr_h.acquire(mode='RW') +print("output array is:", arr_r) +arr_h.release() + +arr_h.unpartition(arr_h_list, split_num) + +arr_h.unregister() + +starpu.shutdown() diff --git a/starpupy/examples/starpu_py_partition.sh b/starpupy/examples/starpu_py_partition.sh new file mode 100755 index 0000000..c3bf7d0 --- /dev/null +++ b/starpupy/examples/starpu_py_partition.sh @@ -0,0 +1,19 @@ +#!/bin/bash +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +$(dirname $0)/../execute.sh examples/starpu_py_partition.py $* + diff --git a/starpupy/examples/starpu_py_perfmodel.concurrent.sh b/starpupy/examples/starpu_py_perfmodel.concurrent.sh new file mode 100755 index 0000000..051097b --- /dev/null +++ b/starpupy/examples/starpu_py_perfmodel.concurrent.sh @@ -0,0 +1,19 @@ +#!/bin/bash +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +$(dirname $0)/../execute.sh starpu_py_perfmodel.concurrent.py $* + diff --git a/starpupy/examples/starpu_py_perfmodel.py b/starpupy/examples/starpu_py_perfmodel.py new file mode 100644 index 0000000..dc76f63 --- /dev/null +++ b/starpupy/examples/starpu_py_perfmodel.py @@ -0,0 +1,48 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +try: + import numpy as np +except (ModuleNotFoundError, ImportError): + print("Can't find \"Python3 NumPy\" module (consider running \"pip3 install numpy\" or refer to https://numpy.org/install/)") + exit(77) + +import starpu +from starpu import Handle +import asyncio + +try: + starpu.init() +except Exception as e: + print(e) + exit(77) + +@starpu.access(a="RW",b="R") +def arr_add(a,b): + for i in range(np.size(a)): + a[i] = a[i] + b[i] + +a_h = Handle(np.arange(1000)) +b_h = Handle(np.arange(1000)) + +starpu.task_submit(ret_fut=False, perfmodel="arr_add")(arr_add, a_h, b_h) + +print("Array is", a_h.get()) + +a_h.unregister() +b_h.unregister() + +starpu.shutdown() diff --git a/starpupy/examples/starpu_py_perfmodel.sh b/starpupy/examples/starpu_py_perfmodel.sh new file mode 100755 index 0000000..d2657de --- /dev/null +++ b/starpupy/examples/starpu_py_perfmodel.sh @@ -0,0 +1,19 @@ +#!/bin/bash +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +$(dirname $0)/../execute.sh examples/starpu_py_perfmodel.py $* + diff --git a/starpupy/execute.sh.in b/starpupy/execute.sh.in new file mode 100755 index 0000000..66a8954 --- /dev/null +++ b/starpupy/execute.sh.in @@ -0,0 +1,133 @@ +#!@REALBASH@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +PROGNAME=$0 + +usage() +{ + echo "Tool to launch StarPUPY and examples" + echo "" + echo " Typical usage:" + echo " $PROGNAME [python_script.py]" + echo "" + echo "Options:" + echo " -h, --help display this help and exit" + echo " -v, --version output version information and exit" + echo " --valgrind launch executable with valgrind" + echo " --gdb launch executable with gdb" + echo " --mpirun launch executable with MPI using 2 processes" + exit 0 +} + +starpupy_path=@STARPU_SRC_DIR@/starpupy +modpath=@STARPU_BUILD_DIR@/src/.libs${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH} +pypath=@STARPU_BUILD_DIR@/starpupy/src/build:$PYTHONPATH + +LOADER="@STARPU_BUILD_DIR@/starpupy/examples/loader" +PYTHON=@PYTHON@ +# python produces a lot of "possibly lost" false positives +export STARPU_VALGRIND_OPTIONS="--errors-for-leak-kinds=definite,indirect --show-leak-kinds=definite,indirect" + +MPI_LAUNCHER="$LAUNCHER" +if test -z "$MPI_LAUNCHER" +then + MPI_LAUNCHER="mpiexec -np 2" +fi +mpi="" +gdb="" +MPI_LOADER="" +valgrind="" + +EXEC_ARGS="" + +for x in $LOADER_ARGS $* +do + if [ "$x" = "-v" ] || [ "$x" = "--version" ] + then + echo "$PROGNAME (@PACKAGE_NAME@) @PACKAGE_VERSION@" + exit 0 + fi + + if [ "$x" = "-h" ] || [ "$x" = "--help" ] + then + usage + exit 0 + fi + + if test "$x" == "--valgrind" + then + valgrind="valgrind" + export PYTHONMALLOC=malloc + LOADER="$LOADER valgrind --track-origins=yes" + elif test "$x" == "--gdb" + then + gdb="gdb" + LOADER="gdb --args" + elif test "$x" == "--mpirun" + then + mpi="mpi" + else + EXEC_ARGS="$EXEC_ARGS $x" + fi +done + +if test "$mpi" == "mpi" +then + if test -n "$gdb" + then + MPI_LOADER="$MPI_LAUNCHER xterm $XTERM_PARAMS -sl 10000 -e" + elif test -n "$valgrind" + then + MPI_LOADER="$MPI_LAUNCHER xterm $XTERM_PARAMS -sl 10000 -hold -e" + else + MPI_LOADER="$MPI_LAUNCHER" + fi +fi + +# set arguments with the cleaned list +set -- $EXEC_ARGS + +examplefile=$1 +shift +if test -n "$examplefile" +then + if test -f $examplefile + then + pythonscript=$examplefile + elif test -f $starpupy_path/$examplefile + then + pythonscript=$starpupy_path/$examplefile + else + echo "Error. Python script $examplefile not found in current directory or in $starpupy_path" + exit 1 + fi +else + # Interactive use + if ! test -n "$gdb" + then + LOADER="" + fi +fi + +set -x +if [ -n "$STARPU_LD_PRELOAD" ] +then + export LD_PRELOAD=$STARPU_LD_PRELOAD${LD_PRELOAD:+:$LD_PRELOAD} +fi +export PYTHONPATH=$pypath LD_LIBRARY_PATH=$modpath + +exec $MPI_LOADER $LOADER $PYTHON $pythonscript $* diff --git a/starpupy/src/Makefile.am b/starpupy/src/Makefile.am new file mode 100644 index 0000000..18c1d24 --- /dev/null +++ b/starpupy/src/Makefile.am @@ -0,0 +1,95 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +include $(top_srcdir)/make/starpu-notests.mk + +SUBDIRS = + +PYTHON_PY_SRC = $(wildcard $(top_srcdir)/starpupy/src/*py) +PYTHON_PY_BUILD = $(addprefix $(top_builddir)/starpupy/src/starpu/,$(notdir $(PYTHON_PY_SRC))) + +PYTHON_C_SRC = $(wildcard $(top_srcdir)/starpupy/src/*c) +PYTHON_C_BUILD = $(addprefix $(top_builddir)/starpupy/src/starpu/,$(notdir $(PYTHON_C_SRC))) + +PYTHON_H_SRC = $(wildcard $(top_srcdir)/starpupy/src/*h) +PYTHON_H_BUILD = $(addprefix $(top_builddir)/starpupy/src/starpu/,$(notdir $(PYTHON_H_SRC))) + +$(top_builddir)/starpupy/src/starpu/%.py: $(abs_top_srcdir)/starpupy/src/%.py + $(MKDIR_P) starpu + $(V_ln) $(LN_S) $< $@ +$(top_builddir)/starpupy/src/starpu/%.c: $(abs_top_srcdir)/starpupy/src/%.c + @$(MKDIR_P) starpu + $(V_ln) $(LN_S) $< $@ +$(top_builddir)/starpupy/src/starpu/%.h: $(abs_top_srcdir)/starpupy/src/%.h + @$(MKDIR_P) starpu + $(V_ln) $(LN_S) $< $@ + +all: $(PYTHON_PY_BUILD) $(PYTHON_C_BUILD) $(PYTHON_H_BUILD) + LDFLAGS=$${LDFLAGS/-no-pie/} $(PYTHON) setup.py build $(PYTHON_SETUP_OPTIONS) + +# FIXME and enable +dist-hook: + nm -n build/starpu/starpupy.cpython-*.so | grep -v " [Ua-z] " | grep -ve " _\?_\?_\?f\?starpu" | grep -ve " \(_init\|_fini\|_edata\|__bss_start\|_end\|PyInit_starpupy\|__gcov_\|mangle_path\)" | (! grep .) + +check: all + +PYTHONPATH=$(DESTDIR)$(prefix)/lib/python$(PYTHON_VERSION)/site-packages + +install-exec-local: all + LDFLAGS=$${LDFLAGS/-no-pie/} $(PYTHON) setup.py clean + @if test -d $(DESTDIR)$(prefix)/lib/python$(PYTHON_VERSION) ; \ + then \ + chmod u+w $(DESTDIR)$(prefix)/lib/python$(PYTHON_VERSION) ; \ + fi + @if test -d $(DESTDIR)$(prefix)/lib/python$(PYTHON_VERSION)/site-packages ; \ + then \ + chmod u+w $(DESTDIR)$(prefix)/lib/python$(PYTHON_VERSION)/site-packages ; \ + fi + $(MKDIR_P) $(PYTHONPATH)/starpu + chmod u+w $(PYTHONPATH)/starpu + $(PYTHON) setup.py install --prefix '$(prefix)' --root '$(DESTDIR)$(prefix)' + rm -fr build/bdist* + +if STARPU_BUILD_STARPUPY +clean-local: + rm -rf lib dist build starpupy.egg-info + $(PYTHON) setup.py clean -a +endif + +distclean-local: + rm -rf build dist lib + +uninstall-local: clean-local + rm -rf $(DESTDIR)$(prefix)/lib/python$(PYTHON_VERSION)/site-packages/starpu* + rm -rf $(DESTDIR)$(prefix)/lib/python$(PYTHON_VERSION)/site-packages/tmp/starpu* + +EXTRA_DIST = \ + delay.py \ + __init__.py \ + intermedia.py \ + joblib.py \ + handle_access.py \ + starpu_task_wrapper.c \ + starpupy_cloudpickle.h \ + starpupy_interface.c \ + starpupy_interface.h \ + starpupy_buffer_interface.c \ + starpupy_buffer_interface.h \ + starpupy_numpy_filters.c \ + starpupy_numpy_filters.h \ + starpupy_handle.c \ + starpupy_handle.h \ + starpupy_private.h diff --git a/starpupy/src/Makefile.in b/starpupy/src/Makefile.in new file mode 100644 index 0000000..dac6baf --- /dev/null +++ b/starpupy/src/Makefile.in @@ -0,0 +1,1014 @@ +# Makefile.in generated by automake 1.16.5 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2021 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +target_triplet = @target@ +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@am__append_1 = --compiler-options -fno-strict-aliasing -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ $(STARPU_NVCC_H_CPPFLAGS) +@STARPU_USE_HIP_TRUE@am__append_2 = -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ +subdir = starpupy/src +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ + $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ + $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ + $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/src/common/config.h \ + $(top_builddir)/src/common/config-src-build.h \ + $(top_builddir)/include/starpu_config.h \ + $(top_builddir)/starpurm/include/starpurm_config.h +CONFIG_CLEAN_FILES = setup.cfg setup.py +CONFIG_CLEAN_VPATH_FILES = +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +SOURCES = +DIST_SOURCES = +RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \ + ctags-recursive dvi-recursive html-recursive info-recursive \ + install-data-recursive install-dvi-recursive \ + install-exec-recursive install-html-recursive \ + install-info-recursive install-pdf-recursive \ + install-ps-recursive install-recursive installcheck-recursive \ + installdirs-recursive pdf-recursive ps-recursive \ + tags-recursive uninstall-recursive +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ + distclean-recursive maintainer-clean-recursive +am__recursive_targets = \ + $(RECURSIVE_TARGETS) \ + $(RECURSIVE_CLEAN_TARGETS) \ + $(am__extra_recursive_targets) +AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \ + distdir distdir-am +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +DIST_SUBDIRS = $(SUBDIRS) +am__DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/setup.cfg.in \ + $(srcdir)/setup.py.in $(top_srcdir)/make/starpu-notests.mk \ + $(top_srcdir)/make/starpu.mk +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +am__relativize = \ + dir0=`pwd`; \ + sed_first='s,^\([^/]*\)/.*$$,\1,'; \ + sed_rest='s,^[^/]*/*,,'; \ + sed_last='s,^.*/\([^/]*\)$$,\1,'; \ + sed_butlast='s,/*[^/]*$$,,'; \ + while test -n "$$dir1"; do \ + first=`echo "$$dir1" | sed -e "$$sed_first"`; \ + if test "$$first" != "."; then \ + if test "$$first" = ".."; then \ + dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ + dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ + else \ + first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ + if test "$$first2" = "$$first"; then \ + dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ + else \ + dir2="../$$dir2"; \ + fi; \ + dir0="$$dir0"/"$$first"; \ + fi; \ + fi; \ + dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ + done; \ + reldir="$$dir2" +pkglibdir = @pkglibdir@ +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +APP_CFLAGS = @APP_CFLAGS@ +APP_CXXFLAGS = @APP_CXXFLAGS@ +APP_FCFLAGS = @APP_FCFLAGS@ +APP_FFLAGS = @APP_FFLAGS@ +AR = @AR@ +AS = @AS@ +ATLASDIR = @ATLASDIR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BLAS_LIB = @BLAS_LIB@ +BLAS_LIBS = @BLAS_LIBS@ +BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ +BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CC_OR_MPICC = @CC_OR_MPICC@ +CC_OR_NVCC = @CC_OR_NVCC@ +CFLAGS = @CFLAGS@ +COVERAGE = @COVERAGE@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CSCOPE = @CSCOPE@ +CTAGS = @CTAGS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DGELS_LIBS = @DGELS_LIBS@ +DLB_CFLAGS = @DLB_CFLAGS@ +DLB_LIBS = @DLB_LIBS@ +DLLTOOL = @DLLTOOL@ +DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +ECLIPSE = @ECLIPSE@ +EGREP = @EGREP@ +ETAGS = @ETAGS@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FC = @FC@ +FCFLAGS = @FCFLAGS@ +FFLAGS = @FFLAGS@ +FFTWF_CFLAGS = @FFTWF_CFLAGS@ +FFTWF_LIBS = @FFTWF_LIBS@ +FFTWL_CFLAGS = @FFTWL_CFLAGS@ +FFTWL_LIBS = @FFTWL_LIBS@ +FFTW_CFLAGS = @FFTW_CFLAGS@ +FFTW_LIBS = @FFTW_LIBS@ +FGREP = @FGREP@ +FILECMD = @FILECMD@ +FXTDIR = @FXTDIR@ +FXT_CFLAGS = @FXT_CFLAGS@ +FXT_LDFLAGS = @FXT_LDFLAGS@ +FXT_LIBS = @FXT_LIBS@ +GDB = @GDB@ +GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ +GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ +GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ +GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ +GOTODIR = @GOTODIR@ +GREP = @GREP@ +HAVE_CXX11 = @HAVE_CXX11@ +HAVE_FFTWFL = @HAVE_FFTWFL@ +HELP2MAN = @HELP2MAN@ +HIPCC = @HIPCC@ +HIPCCFLAGS = @HIPCCFLAGS@ $(am__append_2) +HIPCONFIG = @HIPCONFIG@ +HWLOC_CFLAGS = @HWLOC_CFLAGS@ +HWLOC_LIBS = @HWLOC_LIBS@ +HWLOC_REQUIRES = @HWLOC_REQUIRES@ +ICC = @ICC@ +ICC_ARGS = @ICC_ARGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JULIA = @JULIA@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ +LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ +LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ +LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ +LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ +LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ +LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ +LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ +LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ +LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ +LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ +LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ +LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ +LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ +LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ +LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ +LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ +LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ +LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ +LIBSTARPU_LINK = @LIBSTARPU_LINK@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAGMA_CFLAGS = @MAGMA_CFLAGS@ +MAGMA_LIBS = @MAGMA_LIBS@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPICC_LDFLAGS = @MPICC_LDFLAGS@ +MPICXX = @MPICXX@ +MPIEXEC = @MPIEXEC@ +MPIEXEC_ARGS = @MPIEXEC_ARGS@ +MPIFORT = @MPIFORT@ +MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ +MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ +NM = @NM@ +NMAD_CFLAGS = @NMAD_CFLAGS@ +NMAD_LIBS = @NMAD_LIBS@ +NMEDIT = @NMEDIT@ +NVCC = @NVCC@ +NVCCFLAGS = @NVCCFLAGS@ $(am__append_1) +NVCC_CC = @NVCC_CC@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ +OPENBLAS_LIBS = @OPENBLAS_LIBS@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PAPI_CFLAGS = @PAPI_CFLAGS@ +PAPI_LIBS = @PAPI_LIBS@ +PARALLEL = @PARALLEL@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PKG_CONFIG = @PKG_CONFIG@ +POTI_CFLAGS = @POTI_CFLAGS@ +POTI_LIBS = @POTI_LIBS@ +PROG_CLANG = @PROG_CLANG@ +PROG_DATE = @PROG_DATE@ +PROG_FIND = @PROG_FIND@ +PROG_STAT = @PROG_STAT@ +PYTHON = @PYTHON@ +PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ +PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ +PYTHON_VERSION = @PYTHON_VERSION@ +RANLIB = @RANLIB@ +REALBASH = @REALBASH@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ +SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ +SIMGRID_LIBS = @SIMGRID_LIBS@ +SIMGRID_MC = @SIMGRID_MC@ +SLIC_CONFIG = @SLIC_CONFIG@ +SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ +SOCL_VENDORS = @SOCL_VENDORS@ +STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ +STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ +STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ +STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ +STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ +STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ +STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ +STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ +STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ +STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ +STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ +STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ +STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ +STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ +STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ +STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ +STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ +STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ +STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ +STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ +STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ +STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ +STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ +STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ +STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ +STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ +STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ +STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ +STARPU_LIB_PATH = @STARPU_LIB_PATH@ +STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ +STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ +STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ +STARPU_MS_LIB = @STARPU_MS_LIB@ +STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ +STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ +STARPU_OPENBLAS = @STARPU_OPENBLAS@ +STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ +STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ +STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ +STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ +STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ +STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ +STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ +STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ +STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ +STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ +STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ +STARPU_SRC_DIR = @STARPU_SRC_DIR@ +STARPU_USE_CPU = @STARPU_USE_CPU@ +STARPU_USE_CUDA = @STARPU_USE_CUDA@ +STARPU_USE_FXT = @STARPU_USE_FXT@ +STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ +STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ +STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ +STRIP = @STRIP@ +VERSION = @VERSION@ +XMKMF = @XMKMF@ +X_CFLAGS = @X_CFLAGS@ +X_EXTRA_LIBS = @X_EXTRA_LIBS@ +X_LIBS = @X_LIBS@ +X_PRE_LIBS = @X_PRE_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_ct_F77 = @ac_ct_F77@ +ac_ct_FC = @ac_ct_FC@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +doxygencommand = @doxygencommand@ +dvidir = @dvidir@ +eclipsepath = @eclipsepath@ +epstopdfcommand = @epstopdfcommand@ +exec_prefix = @exec_prefix@ +gitcommand = @gitcommand@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +hwloccalccommand = @hwloccalccommand@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +juliapath = @juliapath@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +mpicc_path = @mpicc_path@ +mpicxx_path = @mpicxx_path@ +mpiexec_path = @mpiexec_path@ +mpifort_path = @mpifort_path@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +pdflatexcommand = @pdflatexcommand@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +runstatedir = @runstatedir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target = @target@ +target_alias = @target_alias@ +target_cpu = @target_cpu@ +target_os = @target_os@ +target_vendor = @target_vendor@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +AM_CFLAGS = $(GLOBAL_AM_CFLAGS) +AM_CXXFLAGS = $(GLOBAL_AM_CXXFLAGS) +AM_FFLAGS = $(GLOBAL_AM_FFLAGS) +AM_FCFLAGS = $(GLOBAL_AM_FCFLAGS) +@STARPU_USE_CUDA_TRUE@V_nvcc_ = $(V_nvcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_USE_CUDA_TRUE@V_nvcc_0 = @echo " NVCC " $@; +@STARPU_USE_CUDA_TRUE@V_nvcc_1 = +@STARPU_USE_CUDA_TRUE@V_nvcc = $(V_nvcc_$(V)) + +# Avoid using nvcc when making a coverity build, nvcc produces millions of +# lines of code which we don't want to analyze. Instead, build dumb .o files +# containing empty functions. +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_ = $(V_mynvcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_0 = @echo " myNVCC " $@; +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_1 = +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc = $(V_mynvcc_$(V)) +@STARPU_USE_HIP_TRUE@V_hipcc_ = $(V_hipcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_USE_HIP_TRUE@V_hipcc_0 = @echo " HIPCC " $@; +@STARPU_USE_HIP_TRUE@V_hipcc_1 = +@STARPU_USE_HIP_TRUE@V_hipcc = $(V_hipcc_$(V)) +V_icc_ = $(V_icc_$(AM_DEFAULT_VERBOSITY)) +V_icc_0 = @echo " ICC " $@; +V_icc_1 = +V_icc = $(V_icc_$(V)) +V_ln_ = $(V_ln_$(AM_DEFAULT_VERBOSITY)) +V_ln_0 = @echo " LN " $@; +V_ln_1 = +V_ln = $(V_ln_$(V)) +V_help2man_ = $(V_help2man_$(AM_DEFAULT_VERBOSITY)) +V_help2man_0 = @echo " HELP2MAN" $@; +V_help2man_1 = +V_help2man = $(V_help2man_$(V)) +SUBDIRS = +PYTHON_PY_SRC = $(wildcard $(top_srcdir)/starpupy/src/*py) +PYTHON_PY_BUILD = $(addprefix $(top_builddir)/starpupy/src/starpu/,$(notdir $(PYTHON_PY_SRC))) +PYTHON_C_SRC = $(wildcard $(top_srcdir)/starpupy/src/*c) +PYTHON_C_BUILD = $(addprefix $(top_builddir)/starpupy/src/starpu/,$(notdir $(PYTHON_C_SRC))) +PYTHON_H_SRC = $(wildcard $(top_srcdir)/starpupy/src/*h) +PYTHON_H_BUILD = $(addprefix $(top_builddir)/starpupy/src/starpu/,$(notdir $(PYTHON_H_SRC))) +PYTHONPATH = $(DESTDIR)$(prefix)/lib/python$(PYTHON_VERSION)/site-packages +EXTRA_DIST = \ + delay.py \ + __init__.py \ + intermedia.py \ + joblib.py \ + handle_access.py \ + starpu_task_wrapper.c \ + starpupy_cloudpickle.h \ + starpupy_interface.c \ + starpupy_interface.h \ + starpupy_buffer_interface.c \ + starpupy_buffer_interface.h \ + starpupy_numpy_filters.c \ + starpupy_numpy_filters.h \ + starpupy_handle.c \ + starpupy_handle.h \ + starpupy_private.h + +all: all-recursive + +.SUFFIXES: +.SUFFIXES: .cu .cubin .hip .o +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/make/starpu-notests.mk $(top_srcdir)/make/starpu.mk $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign starpupy/src/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign starpupy/src/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; +$(top_srcdir)/make/starpu-notests.mk $(top_srcdir)/make/starpu.mk $(am__empty): + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): +setup.cfg: $(top_builddir)/config.status $(srcdir)/setup.cfg.in + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ +setup.py: $(top_builddir)/config.status $(srcdir)/setup.py.in + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +# This directory's subdirectories are mostly independent; you can cd +# into them and run 'make' without going through this Makefile. +# To change the values of 'make' variables: instead of editing Makefiles, +# (1) if the variable is set in 'config.status', edit 'config.status' +# (which will cause the Makefiles to be regenerated when you run 'make'); +# (2) otherwise, pass the desired values on the 'make' command line. +$(am__recursive_targets): + @fail=; \ + if $(am__make_keepgoing); then \ + failcom='fail=yes'; \ + else \ + failcom='exit 1'; \ + fi; \ + dot_seen=no; \ + target=`echo $@ | sed s/-recursive//`; \ + case "$@" in \ + distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ + *) list='$(SUBDIRS)' ;; \ + esac; \ + for subdir in $$list; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + dot_seen=yes; \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || eval $$failcom; \ + done; \ + if test "$$dot_seen" = "no"; then \ + $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ + fi; test -z "$$fail" + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-recursive +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ + include_option=--etags-include; \ + empty_fix=.; \ + else \ + include_option=--include; \ + empty_fix=; \ + fi; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + test ! -f $$subdir/TAGS || \ + set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ + fi; \ + done; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-recursive + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-recursive + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done + @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + $(am__make_dryrun) \ + || test -d "$(distdir)/$$subdir" \ + || $(MKDIR_P) "$(distdir)/$$subdir" \ + || exit 1; \ + dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ + $(am__relativize); \ + new_distdir=$$reldir; \ + dir1=$$subdir; dir2="$(top_distdir)"; \ + $(am__relativize); \ + new_top_distdir=$$reldir; \ + echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ + echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ + ($(am__cd) $$subdir && \ + $(MAKE) $(AM_MAKEFLAGS) \ + top_distdir="$$new_top_distdir" \ + distdir="$$new_distdir" \ + am__remove_distdir=: \ + am__skip_length_check=: \ + am__skip_mode_fix=: \ + distdir) \ + || exit 1; \ + fi; \ + done + $(MAKE) $(AM_MAKEFLAGS) \ + top_distdir="$(top_distdir)" distdir="$(distdir)" \ + dist-hook +check-am: all-am +check: check-recursive +all-am: Makefile +installdirs: installdirs-recursive +installdirs-am: +install: install-recursive +install-exec: install-exec-recursive +install-data: install-data-recursive +uninstall: uninstall-recursive + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-recursive +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +@STARPU_BUILD_STARPUPY_FALSE@clean-local: +clean: clean-recursive + +clean-am: clean-generic clean-libtool clean-local mostlyclean-am + +distclean: distclean-recursive + -rm -f Makefile +distclean-am: clean-am distclean-generic distclean-local \ + distclean-tags + +dvi: dvi-recursive + +dvi-am: + +html: html-recursive + +html-am: + +info: info-recursive + +info-am: + +install-data-am: + +install-dvi: install-dvi-recursive + +install-dvi-am: + +install-exec-am: install-exec-local + +install-html: install-html-recursive + +install-html-am: + +install-info: install-info-recursive + +install-info-am: + +install-man: + +install-pdf: install-pdf-recursive + +install-pdf-am: + +install-ps: install-ps-recursive + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-recursive + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-recursive + +mostlyclean-am: mostlyclean-generic mostlyclean-libtool + +pdf: pdf-recursive + +pdf-am: + +ps: ps-recursive + +ps-am: + +uninstall-am: uninstall-local + +.MAKE: $(am__recursive_targets) install-am install-strip + +.PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am check \ + check-am clean clean-generic clean-libtool clean-local \ + cscopelist-am ctags ctags-am dist-hook distclean \ + distclean-generic distclean-libtool distclean-local \ + distclean-tags distdir dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-exec-local \ + install-html install-html-am install-info install-info-am \ + install-man install-pdf install-pdf-am install-ps \ + install-ps-am install-strip installcheck installcheck-am \ + installdirs installdirs-am maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-generic \ + mostlyclean-libtool pdf pdf-am ps ps-am tags tags-am uninstall \ + uninstall-am uninstall-local + +.PRECIOUS: Makefile + +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@.cu.o: +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ @$(MKDIR_P) `dirname $@` +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ $(V_mynvcc)grep 'extern *"C" *void *' $< | sed -ne 's/extern *"C" *void *\([a-zA-Z0-9_]*\) *(.*/void \1(void) {}/p' | $(CC) -x c - -o $@ -c + +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.cubin: +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) -cubin $< -o $@ $(NVCCFLAGS) + +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.o: +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) $< -c -o $@ $(NVCCFLAGS) +@STARPU_USE_HIP_TRUE@.hip.o: +@STARPU_USE_HIP_TRUE@ $(V_hipcc) $(HIPCC) $< -c -o $@ $(HIPCCFLAGS) + +recheck: + -cat /dev/null + +showcheckfailed: + @-cat /dev/null + +showfailed: + @-cat /dev/null + +showcheck: + -cat /dev/null + +showsuite: + -cat /dev/null + +$(top_builddir)/starpupy/src/starpu/%.py: $(abs_top_srcdir)/starpupy/src/%.py + $(MKDIR_P) starpu + $(V_ln) $(LN_S) $< $@ +$(top_builddir)/starpupy/src/starpu/%.c: $(abs_top_srcdir)/starpupy/src/%.c + @$(MKDIR_P) starpu + $(V_ln) $(LN_S) $< $@ +$(top_builddir)/starpupy/src/starpu/%.h: $(abs_top_srcdir)/starpupy/src/%.h + @$(MKDIR_P) starpu + $(V_ln) $(LN_S) $< $@ + +all: $(PYTHON_PY_BUILD) $(PYTHON_C_BUILD) $(PYTHON_H_BUILD) + LDFLAGS=$${LDFLAGS/-no-pie/} $(PYTHON) setup.py build $(PYTHON_SETUP_OPTIONS) + +# FIXME and enable +dist-hook: + nm -n build/starpu/starpupy.cpython-*.so | grep -v " [Ua-z] " | grep -ve " _\?_\?_\?f\?starpu" | grep -ve " \(_init\|_fini\|_edata\|__bss_start\|_end\|PyInit_starpupy\|__gcov_\|mangle_path\)" | (! grep .) + +check: all + +install-exec-local: all + LDFLAGS=$${LDFLAGS/-no-pie/} $(PYTHON) setup.py clean + @if test -d $(DESTDIR)$(prefix)/lib/python$(PYTHON_VERSION) ; \ + then \ + chmod u+w $(DESTDIR)$(prefix)/lib/python$(PYTHON_VERSION) ; \ + fi + @if test -d $(DESTDIR)$(prefix)/lib/python$(PYTHON_VERSION)/site-packages ; \ + then \ + chmod u+w $(DESTDIR)$(prefix)/lib/python$(PYTHON_VERSION)/site-packages ; \ + fi + $(MKDIR_P) $(PYTHONPATH)/starpu + chmod u+w $(PYTHONPATH)/starpu + $(PYTHON) setup.py install --prefix '$(prefix)' --root '$(DESTDIR)$(prefix)' + rm -fr build/bdist* + +@STARPU_BUILD_STARPUPY_TRUE@clean-local: +@STARPU_BUILD_STARPUPY_TRUE@ rm -rf lib dist build starpupy.egg-info +@STARPU_BUILD_STARPUPY_TRUE@ $(PYTHON) setup.py clean -a + +distclean-local: + rm -rf build dist lib + +uninstall-local: clean-local + rm -rf $(DESTDIR)$(prefix)/lib/python$(PYTHON_VERSION)/site-packages/starpu* + rm -rf $(DESTDIR)$(prefix)/lib/python$(PYTHON_VERSION)/site-packages/tmp/starpu* + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/starpupy/src/__init__.py b/starpupy/src/__init__.py new file mode 100644 index 0000000..ac12c57 --- /dev/null +++ b/starpupy/src/__init__.py @@ -0,0 +1,116 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +from . import starpupy +from .delay import * +from .handle_access import * +#from . import joblib +from .intermedia import * + +import asyncio +import concurrent.futures +try: + import numpy as np + has_numpy=True +except: + has_numpy=False + +async def wait_for_fut(fut): + return await fut + +async def asyncio_wait_for_fut(fut): + return await fut + +def concurrent_futures_wait_for_fut(fut): + return fut.result() + +#class handle +class Handle(object): + + def __init__(self, obj, retval=False): + self.obj=obj + self.obj_id=id(self.obj) + self.retval=retval + self.handle_cap=starpupy.starpupy_data_register(self.obj, self) + + def get_capsule(self): + return self.handle_cap + + def get_obj_id(self): + return self.obj_id + + def get_retval(self): + return self.retval + + # get PyObject + def get(self): + return starpupy.starpupy_get_object(self.handle_cap) + + # get array object + def acquire(self, mode='R'): + return starpupy.starpupy_acquire_handle(self.handle_cap, mode) + + # release + def release(self): + return starpupy.starpupy_release_handle(self.handle_cap) + + # unregister + def unregister(self): + return starpupy.starpupy_data_unregister(self) + + # unregister_submit + def unregister_submit(self): + return starpupy.starpupy_data_unregister_submit(self) + + # partition + def partition(self, nchildren, dim, chunks_list=[]): + return starpupy.starpupy_data_partition(self.handle_cap, nchildren, dim, chunks_list) + + # get partition size + def get_partition_size(self, handle_list): + return starpupy.starpupy_get_partition_size(self.handle_cap, handle_list) + + # unpartition + def unpartition(self, handle_list, nchildren): + return starpupy.starpupy_data_unpartition(self.handle_cap, handle_list, nchildren) + +def new_empty_numpy(shape, dtype): + return np.empty(shape, dtype) + +#class handle +class HandleNumpy(Handle): + if has_numpy: + def __init__(self, shape, dtype=np.dtype('float64'), retval=False): + self.dtype=dtype + self.retval=retval + self.obj=new_empty_numpy(shape, self.dtype) + self.obj_id=id(self.obj) + self.handle_cap=starpupy.starpupy_data_register(self.obj, self) + + +#detect class handle +class Handle_token(object): + pass + +#this dict contains all handle objects of mutable Python objects +handle_dict={} +def handle_dict_set_item(obj, handle): + assert handle_dict.get(id(obj))==None + handle_dict[id(obj)]=handle + return handle_dict + +#this set contains all handle objects of immutable Python objects +handle_set=set() diff --git a/starpupy/src/delay.py b/starpupy/src/delay.py new file mode 100644 index 0000000..aca6715 --- /dev/null +++ b/starpupy/src/delay.py @@ -0,0 +1,39 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +from starpu import starpupy +import starpu +import asyncio +from functools import partial +import inspect + +def delayed(f=None, **kwargs): + # add options of task_submit + if f is None: + return partial(delayed, **kwargs) + def submit(*args): + # set the access right + access_mode={} + f_args = inspect.getfullargspec(f).args + # check the access right of argument is set in mode or not + for i in range(len(f_args)): + if f_args[i] in kwargs.keys(): + # write access modes in f.access attribute + access_mode[f_args[i]]=kwargs[f_args[i]] + setattr(f, "starpu_access", access_mode) + + fut = starpu.task_submit(**kwargs)(f, *args) + return fut + return submit diff --git a/starpupy/src/handle_access.py b/starpupy/src/handle_access.py new file mode 100644 index 0000000..125a99c --- /dev/null +++ b/starpupy/src/handle_access.py @@ -0,0 +1,49 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +from starpu import starpupy +import starpu +import inspect + +def access(**kwargs): + def access_decorator(func): + # write access modes in f.access attribute + setattr(func,'starpu_access', kwargs) + return func + return access_decorator + +# set mode as **kwargs of func +def set_access(func, **kwargs): + # write access modes in f.access attribute + setattr(func,'starpu_access', kwargs) + return func + + + + + + + + + + + + + + + + + + diff --git a/starpupy/src/intermedia.py b/starpupy/src/intermedia.py new file mode 100644 index 0000000..f0c0c5d --- /dev/null +++ b/starpupy/src/intermedia.py @@ -0,0 +1,113 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +import starpu +from starpu import starpupy +import os +import inspect +import array + +# define the different architecture +STARPU_CPU_WORKER = 0 +STARPU_CUDA_WORKER = 1 +STARPU_OPENCL_WORKER = 2 +STARPU_MAX_FPGA_WORKER = 4 +STARPU_MPI_MS_WORKER = 5 +STARPU_TCPIP_MS_WORKER = 6 +STARPU_HIP_WORKER = 7 +STARPU_NARCH = 8 +STARPU_ANY_WORKER = 255 + +#class perfmodel +class Perfmodel(object): + def __init__(self, symbol): + self.symbol=symbol + self.pstruct=starpupy.init_perfmodel(self.symbol) + + def get_struct(self): + return self.pstruct + + def __del__(self): + #def free_struct(self): + starpupy.free_perfmodel(self.pstruct) + +# generate the dictionary which contains the perfmodel symbol and its struct pointer +dict_perf={} +def dict_perf_generator(perfsymbol): + if dict_perf.get(perfsymbol)==None: + p=Perfmodel(perfsymbol) + dict_perf[perfsymbol]=p + else: + p=dict_perf[perfsymbol] + return p + +# add options in function task_submit +def task_submit(**kwargs): + # set perfmodel + perf=None + if kwargs.__contains__("perfmodel") and kwargs['perfmodel']!=None: + perf=dict_perf_generator(kwargs['perfmodel']) + kwargs['perfmodel']=perf + + def call_task_submit(f, *args): + modes={} + # if there is access mode defined + if hasattr(f,"starpu_access"): + # the starpu_access attribute of f is the access mode + access_mode=f.starpu_access + # get the name of formal arguments of f + f_args = inspect.getfullargspec(f).args + # check the access right of argument is set in mode or not + for i in range(len(f_args)): + if f_args[i] in access_mode.keys(): + # set access mode in modes option + modes[id(args[i])]=access_mode[f_args[i]] + kwargs['modes']=modes + + res=starpupy._task_submit(f, *args, kwargs) + return res + return call_task_submit + +# dump performance model and show the plot +def perfmodel_plot(perfmodel, view=True): + p=dict_perf[perfmodel] + starpupy.save_history_based_model(p) + if view == True: + os.system('starpu_perfmodel_plot -s "' + perfmodel +'"') + os.system('gnuplot starpu_'+perfmodel+'.gp') + os.system('gv starpu_'+perfmodel+'.eps') + +# acquire object +def acquire(obj, mode='R'): + return starpupy.starpupy_acquire_object(obj, mode) + +# release object +def release(obj): + return starpupy.starpupy_release_object(obj) + +# acquire object +def unregister(obj): + return starpupy.starpupy_data_unregister_object(obj) + +# acquire object +def unregister_submit(obj): + return starpupy.starpupy_data_unregister_submit_object(obj) + +def init(): + return starpupy.init() + +def shutdown(): + return starpupy.shutdown() diff --git a/starpupy/src/joblib.py b/starpupy/src/joblib.py new file mode 100644 index 0000000..b7a23eb --- /dev/null +++ b/starpupy/src/joblib.py @@ -0,0 +1,379 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +import sys +import types +import joblib as jl +from joblib import logger +from joblib._parallel_backends import ParallelBackendBase +from starpu import starpupy +from starpu import Handle +import starpu +import asyncio +import math +import functools +try: + import numpy as np + has_numpy=True +except: + has_numpy=False +import inspect +import threading + +loop = asyncio.get_event_loop() +if (loop.is_running()): + try: + import nest_asyncio + nest_asyncio.apply() + has_nest=True + except (ModuleNotFoundError, ImportError): + has_nest=False + +BACKENDS={ + #'loky': LokyBackend, +} +_backend = threading.local() + +# get the number of CPUs controlled by StarPU +def cpu_count(): + n_cpus=starpupy.worker_get_count_by_type(starpu.STARPU_CPU_WORKER) + return n_cpus + +# split a list ls into n_block numbers of sub-lists +def partition(ls, n_block): + if len(ls)>=n_block: + # there are n1 sub-lists which contain q1 elements, and (n_block-n1) sublists which contain q2 elements (n1 can be 0) + q1=math.ceil(len(ls)/n_block) + q2=math.floor(len(ls)/n_block) + n1=len(ls)%n_block + #n2=n_block-n1 + # generate n1 sub-lists in L1, and (n_block-n1) sub-lists in L2 + L1=[ls[i:i+q1] for i in range(0, n1*q1, q1)] + L2=[ls[i:i+q2] for i in range(n1*q1, len(ls), q2)] + + L=L1+L2 + else: + # if the block number is larger than the length of list, each element in the list is a sub-list + L=[ls[i:i+1] for i in range (len(ls))] + return L + +# split a two-dimension numpy matrix into n_block numbers of sub-matrices +def array2d_split(a, n_block): + # decompose number of n_jobs to two integers multiply + c_tmp=math.floor(math.sqrt(n_block)) + for i in range (c_tmp,0,-1): + if n_block%i==0: + c=i + r=int(n_block/c) + break + # split column + arr_split_c=np.array_split(a,c,0) + arr_split=[] + # split row + for i in range(c): + arr_split_r=np.array_split(arr_split_c[i],r,1) + for j in range(r): + arr_split.append(arr_split_r[j].copy(order='C')) + return arr_split + + +def future_generator(iterable, n_jobs, dict_task): + # iterable is generated by delayed function, after converting to a list, the format is [function, (arg1, arg2, ... ,)] + #print("iterable type is ", type(iterable)) + #print("iterable is", iterable) + # get the number of block + if n_jobs<-cpu_count()-1: + raise SystemExit('Error: n_jobs is out of range, number of CPUs is', cpu_count()) + elif n_jobs<0: + n_block=cpu_count()+1+n_jobs + else: + n_block=n_jobs + if (n_block <= 0): + n_block = 1 + + # if arguments is tuple format + if type(iterable) is tuple: + # the function is always the first element + f=iterable[0] + # get the name of formal arguments of f + formal_args=inspect.getfullargspec(f).args + # get the arguments list + args=[] + # argument is arbitrary in iterable[1] + args=list(iterable[1]) + # argument is keyword argument in iterable[2] + for i in range(len(formal_args)): + for j in iterable[2].keys(): + if j==formal_args[i]: + args.append(iterable[2][j]) + # check whether all arrays have the same size + l_arr=[] + # list of Future result + L_fut=[] + # split the vector + args_split=[] + # handle list + arg_h=[] + for i in range(len(args)): + args_split.append([]) + # if the array is an numpy array + if has_numpy and type(args[i]) is np.ndarray: + # check whether the arg is already registered + handle_dict = starpu.handle_dict + if handle_dict.get(id(args[i]))==None: + arr_h = Handle(args[i]) + arg_h.append(arr_h) + args_split[i] = arr_h.partition(n_block, 0) + else: + arr_h = handle_dict.get(id(args[i])) + arg_h.append(arr_h) + args_split[i] = arr_h.partition(n_block, 0) + # if the array is a generator + elif isinstance(args[i],types.GeneratorType): + # split generator + args_split[i]=partition(list(args[i]),n_block) + arg_h.append(None) + # get the length of generator + l_arr.append(sum(len(args_split[i][j]) for j in range(len(args_split[i])))) + else: + arg_h.append(None) + if len(set(l_arr))>1: + raise SystemExit('Error: all arrays should have the same size') + #print("args list is", args_split) + for i in range(n_block): + # generate the argument list + L_args=[] + sizebase=0 + for j in range(len(args)): + if (has_numpy and type(args[j]) is np.ndarray): + L_args.append(args_split[j][i]) + n_arr = arg_h[j].get_partition_size(args_split[j]) + if sizebase==0: + sizebase=n_arr[i] + elif sizebase==n_arr[i]: + continue + else: + raise SystemExit('Error: all arrays should be split into equal size') + elif isinstance(args[j],types.GeneratorType): + L_args.append(args_split[j][i]) + if sizebase==0: + sizebase=len(args_split[j][i]) + elif sizebase==len(args_split[j][i]): + continue + else: + raise SystemExit('Error: all arrays should be split into equal size') + else: + L_args.append(args[j]) + #print("L_args is", L_args) + fut=starpu.task_submit(name=dict_task['name'], synchronous=dict_task['synchronous'], priority=dict_task['priority'],\ + color=dict_task['color'], flops=dict_task['flops'], perfmodel=dict_task['perfmodel'], sizebase=sizebase,\ + ret_handle=dict_task['ret_handle'], ret_fut=dict_task['ret_fut'], arg_handle=dict_task['arg_handle'], modes=dict_task['modes'])\ + (f, *L_args) + L_fut.append(fut) + # unpartition and unregister the numpy array + for i in range(len(args)): + if (has_numpy and type(args[i]) is np.ndarray): + arg_h[i].unpartition(args_split[i], n_block) + arg_h[i].unregister() + return L_fut + + # if iterable is a generator or a list of function + else: + L=list(iterable) + #print(L) + # generate a list of function according to iterable + def lf(ls): + L_func=[] + for i in range(len(ls)): + # the first element is the function + f=ls[i][0] + # the second element is the args list of a type tuple + L_args=list(ls[i][1]) + # generate a list of function + L_func.append(f(*L_args)) + return L_func + + # generate the split function list + L_split=partition(L,n_block) + # operation in each split list + L_fut=[] + for i in range(len(L_split)): + sizebase=len(L_split[i]) + fut=starpu.task_submit(name=dict_task['name'], synchronous=dict_task['synchronous'], priority=dict_task['priority'],\ + color=dict_task['color'], flops=dict_task['flops'], perfmodel=dict_task['perfmodel'], sizebase=sizebase,\ + ret_handle=dict_task['ret_handle'], ret_fut=dict_task['ret_fut'], arg_handle=dict_task['arg_handle'], modes=dict_task['modes'])\ + (lf, L_split[i]) + L_fut.append(fut) + return L_fut + +class Parallel(object): + def __init__(self, mode="normal", perfmodel=None, end_msg=None,\ + name=None, synchronous=0, priority=0, color=None, flops=None,\ + ret_handle=False, ret_fut=True, arg_handle=True, modes=None,\ + n_jobs=None, backend=None, verbose=0, timeout=None, pre_dispatch='2 * n_jobs',\ + batch_size='auto', temp_folder=None, max_nbytes='1M',\ + mmap_mode='r', prefer=None, require=None): + #active_backend= get_active_backend() + # nesting_level = active_backend.nesting_level + + # if backend is None: + # backend = active_backend + + # else: + # try: + # backend_factory = BACKENDS[backend] + # except KeyError as e: + # raise ValueError("Invalid backend: %s, expected one of %r" + # % (backend, sorted(BACKENDS.keys()))) from e + # backend = backend_factory(nesting_level=nesting_level) + + if n_jobs is None: + n_jobs = 1 + + self.mode=mode + self.perfmodel=perfmodel + self.end_msg=end_msg + self.name=name + self.synchronous=synchronous + self.priority=priority + self.color=color + self.flops=flops + self.ret_handle=ret_handle + self.ret_fut=ret_fut + self.arg_handle=arg_handle + self.modes=modes + self.n_jobs=n_jobs + self._backend=backend + + def print_progress(self): + #todo + print("", starpupy.task_nsubmitted()) + + def __call__(self,iterable): + #generate the dictionary of task_submit + dict_task={'name': self.name, 'synchronous': self.synchronous, 'priority': self.priority, 'color': self.color, 'flops': self.flops, 'perfmodel': self.perfmodel, 'ret_handle': self.ret_handle, 'ret_fut': self.ret_fut, 'arg_handle': self.arg_handle, 'modes': self.modes} + if hasattr(self._backend, 'start_call'): + self._backend.start_call() + # the mode normal, user can call the function directly without using async + if self.mode=="normal": + async def asy_main(): + L_fut=future_generator(iterable, self.n_jobs, dict_task) + res=[] + for i in range(len(L_fut)): + L_res=await L_fut[i] + if L_res is None: + res=None + else: + res.extend(L_res) + #print(res) + #print("type of result is", type(res)) + return res + #asyncio.run(asy_main()) + #retVal=asy_main + #loop = asyncio.get_event_loop() + if(loop.is_running() and not has_nest): + raise starpupy.error("Can't find \'nest_asyncio\' module (consider running \"pip3 install nest_asyncio\" or try to remove \"-m asyncio\" when starting Python interpreter)") + + results = loop.run_until_complete(asy_main()) + retVal = results + # the mode future, user needs to use asyncio module and await the Future result in main function + elif self.mode=="future": + L_fut=future_generator(iterable, self.n_jobs, dict_task) + fut=asyncio.gather(*L_fut) + if self.end_msg!=None: + fut.add_done_callback(functools.partial(print, self.end_msg)) + retVal=fut + if hasattr(self._backend, 'stop_call'): + self._backend.stop_call() + return retVal + +def delayed(function): + def delayed_function(*args, **kwargs): + return function, args, kwargs + return delayed_function + + +###################################################################### +__version__ = jl.__version__ + +class Memory(jl.Memory): + def __init__(self,location=None, backend='local', cachedir=None, + mmap_mode=None, compress=False, verbose=1, bytes_limit=None, + backend_options=None): + super(Memory, self).__init__(location=None, backend='local', cachedir=None, + mmap_mode=None, compress=False, verbose=1, bytes_limit=None, + backend_options=None) + + +def dump(value, filename, compress=0, protocol=None, cache_size=None): + return jl.dump(value, filename, compress, protocol, cache_size) + +def load(filename, mmap_mode=None): + return jl.load(filename, mmap_mode) + +def hash(obj, hash_name='md5', coerce_mmap=False): + return jl.hash(obj, hash_name, coerce_mmap) + +def register_compressor(compressor_name, compressor, force=False): + return jl.register_compressor(compressor_name, compressor, force) + +def effective_n_jobs(n_jobs=-1): + return cpu_count() + +def get_active_backend(): + backend_and_jobs = getattr(_backend, 'backend_and_jobs', None) + if backend_and_jobs is not None: + backend,n_jobs=backend_and_jobs + return backend + backend = BACKENDS[loky](nesting_level=0) + return backend + +class parallel_backend(object): + def __init__(self, backend, n_jobs=-1, inner_max_num_threads=None, + **backend_params): + if isinstance(backend, str): + backend = BACKENDS[backend](**backend_params) + + current_backend_and_jobs = getattr(_backend, 'backend_and_jobs', None) + if backend.nesting_level is None: + if current_backend_and_jobs is None: + nesting_level = 0 + else: + nesting_level = current_backend_and_jobs[0].nesting_level + + backend.nesting_level = nesting_level + + # Save the backends info and set the active backend + self.old_backend_and_jobs = current_backend_and_jobs + self.new_backend_and_jobs = (backend, n_jobs) + + _backend.backend_and_jobs = (backend, n_jobs) + + def __enter__(self): + return self.new_backend_and_jobs + + def __exit__(self, type, value, traceback): + self.unregister() + + def unregister(self): + if self.old_backend_and_jobs is None: + if getattr(_backend, 'backend_and_jobs', None) is not None: + del _backend.backend_and_jobs + else: + _backend.backend_and_jobs = self.old_backend_and_jobs + +def register_parallel_backend(name, factory): + BACKENDS[name] = factory diff --git a/starpupy/src/setup.cfg.in b/starpupy/src/setup.cfg.in new file mode 100644 index 0000000..ed6fca0 --- /dev/null +++ b/starpupy/src/setup.cfg.in @@ -0,0 +1,24 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +[build] +build_platlib=build +build_temp=build/tmp + +[install] +root=@prefix@ +install_lib=/lib/python@PYTHON_VERSION@/site-packages + + diff --git a/starpupy/src/setup.py.in b/starpupy/src/setup.py.in new file mode 100644 index 0000000..e4e4cfd --- /dev/null +++ b/starpupy/src/setup.py.in @@ -0,0 +1,56 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +from setuptools import setup, Extension +import sys + +numpy_dir = '@PYTHON_NUMPY_DIR@' +if numpy_dir != '': + numpy_include_dir = [numpy_dir] +else: + numpy_include_dir = [] + +cppflags = '@STARPU_H_CPPFLAGS@' +am_cflags = '@GLOBAL_AM_CFLAGS@' +cflags = '@CFLAGS@' +compile_args = cppflags.split(' ') + am_cflags.split(' ') + cflags.split(' ') +extra_compile_args = [] +for f in compile_args: + if f: + extra_compile_args.append(f) + +ver = sys.version_info +libpython = 'python%s.%s%s' % (ver.major, ver.minor, sys.abiflags) + +starpupy = Extension('starpu.starpupy', + include_dirs = ['@STARPU_SRC_DIR@/include', '@STARPU_BUILD_DIR@/include', '@STARPU_SRC_DIR@/starpupy/src', '@STARPU_SRC_DIR@/src', '@STARPU_BUILD_DIR@/src'] + numpy_include_dir, + libraries = ['starpu-@STARPU_EFFECTIVE_VERSION@', libpython], + extra_compile_args = extra_compile_args, + extra_link_args = ['-Wl,-rpath,$ORIGIN/../../../', '-shared', @STARPUPY_EXTRA_LINK_ARGS@], + library_dirs = ['@STARPU_BUILD_DIR@/src/.libs'], + sources = ['starpu/starpu_task_wrapper.c', 'starpu/starpupy_handle.c', 'starpu/starpupy_interface.c', 'starpu/starpupy_buffer_interface.c', 'starpu/starpupy_numpy_filters.c']) + +setup( + name = 'starpupy', + version = '0.5', + description = 'Python bindings for StarPU', + author = 'StarPU team', + author_email = 'starpu-devel@inria.fr', + url = 'https://starpu.gitlabpages.inria.fr/', + license = 'GPL', + platforms = 'posix', + ext_modules = [starpupy], + packages = ['starpu'], + ) diff --git a/starpupy/src/starpu_task_wrapper.c b/starpupy/src/starpu_task_wrapper.c new file mode 100644 index 0000000..945d68d --- /dev/null +++ b/starpupy/src/starpu_task_wrapper.c @@ -0,0 +1,2075 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* Python C extension reference count special cases: + * 1. Stolen reference: When you pass an object reference into these functions, + * they take over ownership of the item passed to them, even if they fail (except PyModule_AddObject()). + * PyErr_SetExcInfo() + * PyException_SetContext() + * PyException_SetCause() + * PyTuple_SetItem() + * PyTuple_SET_ITEM() + * PyStructSequence_SetItem() + * PyStructSequence_SET_ITEM() + * PyList_SetItem() + * PyList_SET_ITEM() + * PyModule_AddObject(): Unlike other functions that steal references, this function only decrements + * the reference count of value on success. The new PyModule_AddObjectRef() function + * is recommended for Python version >= 3.10 + * 2. Borrowed reference: return references that you borrow from the tuple, list or dictionary etc. + * The borrowed reference’s lifetime is guaranteed until the function returns. It does not modify the + * object reference count. It becomes a dangling pointer if the object is destroyed. + * Calling Py_INCREF() on the borrowed reference is recommended to convert it to a strong reference + * inplace, except when the object cannot be destroyed before the last usage of the borrowed reference. + * PyErr_Occurred() + * PySys_GetObject() + * PySys_GetXOptions() + * PyImport_AddModuleObject() + * PyImport_AddModule() + * PyImport_GetModuleDict() + * PyEval_GetBuiltins() + * PyEval_GetLocals() + * PyEval_GetGlobals() + * PyEval_GetFrame() + * PySequence_Fast_GET_ITEM() + * PyTuple_GetItem() + * PyTuple_GET_ITEM() + * PyStructSequence_GetItem() + * PyStructSequence_GET_ITEM() + * PyList_GetItem() + * PyList_GET_ITEM() + * PyDict_GetItem() + * PyDict_GetItemWithError() + * PyDict_GetItemString() + * PyDict_SetDefault() + * PyFunction_GetCode() + * PyFunction_GetGlobals() + * PyFunction_GetModule() + * PyFunction_GetDefaults() + * PyFunction_GetClosure() + * PyFunction_GetAnnotations() + * PyInstanceMethod_Function() + * PyInstanceMethod_GET_FUNCTION() + * PyMethod_Function() + * PyMethod_GET_FUNCTION() + * PyMethod_Self() + * PyMethod_GET_SELF() + * PyCell_GET() + * PyModule_GetDict() + * PyModuleDef_Init() + * PyState_FindModule() + * PyWeakref_GetObject() + * PyWeakref_GET_OBJECT() + * PyThreadState_GetDict() + * PyObject_Init() + * PyObject_InitVar() + * Py_TYPE() + * +*/ +#undef NDEBUG +#include +#include +#include +#include +#include +#include +#include "starpupy_cloudpickle.h" +#include "starpupy_handle.h" +#include "starpupy_interface.h" +#include "starpupy_buffer_interface.h" +#include "starpupy_numpy_filters.h" + +#define PY_SSIZE_T_CLEAN +#include + +static void STARPU_ATTRIBUTE_NORETURN print_exception(const char *msg, ...) +{ + PyObject *type, *value, *traceback; + PyErr_Fetch(&type, &value, &traceback); + PyObject *str = PyObject_CallMethod(value, "__str__", NULL); + Py_UCS4 *wstr = PyUnicode_AsUCS4Copy(str); + va_list ap; + va_start(ap, msg); + vfprintf(stderr, msg, ap); + va_end(ap); + fprintf(stderr, "got exception %ls\n", wstr); + STARPU_ASSERT(0); +} + +/*********************Functions passed in task_submit wrapper***********************/ + +static int active_multi_interpreter = 0; /*active multi-interpreter */ +static PyObject *StarpupyError; /*starpupy error exception*/ +static PyObject *asyncio_module; /*python asyncio module*/ +static PyObject *concurrent_futures_future_class; /*python concurrent.futures.Future class*/ +static PyObject *cloudpickle_module; /*cloudpickle module*/ +static PyObject *pickle_module; /*pickle module*/ +static PyObject *asyncio_wait_method = Py_None; /*method asyncio_wait_for_fut*/ +static PyObject *concurrent_futures_wait_method = Py_None; /*method concurrent_futures_wait_for_fut*/ +static PyObject *Handle_class = Py_None; /*Handle class*/ +static PyObject *Token_class = Py_None; /*Handle_token class*/ + +static pthread_t main_thread; + +/* Asyncio futures */ +static PyObject *cb_loop = Py_None; /*another event loop besides main running loop*/ +/* concurrent.futures */ +static PyObject *cb_executor = Py_None; /*executor for callbacks*/ +static pthread_t thread_id; + +static PyThreadState *orig_thread_states[STARPU_NMAXWORKERS]; +static PyThreadState *new_thread_states[STARPU_NMAXWORKERS]; + +/*********************************************************************************************/ + +static uint32_t where_inter = STARPU_CPU; + +/* prologue_callback_func*/ +void starpupy_prologue_cb_func(void *cl_arg) +{ + (void)cl_arg; + PyObject *func_data; + size_t func_data_size; + PyObject *func_py; + PyObject *argList; + PyObject *fut; + PyObject *loop; + int h_flag; + PyObject *perfmodel; + int sb; + + /*make sure we own the GIL*/ + PyGILState_STATE state = PyGILState_Ensure(); + + struct starpu_task *task = starpu_task_get_current(); + /*Initialize struct starpu_codelet_unpack_arg_data*/ + struct starpu_codelet_pack_arg_data data_org; + starpu_codelet_unpack_arg_init(&data_org, task->cl_arg, task->cl_arg_size); + + if(active_multi_interpreter) + { + /*get func_py char**/ + starpu_codelet_pick_arg(&data_org, (void**)&func_data, &func_data_size); + } + else + { + /*get func_py*/ + starpu_codelet_unpack_arg(&data_org, &func_py, sizeof(func_py)); + } + + /*get argList*/ + starpu_codelet_unpack_arg(&data_org, &argList, sizeof(argList)); + /*get fut*/ + starpu_codelet_unpack_arg(&data_org, &fut, sizeof(fut)); + /*get loop*/ + starpu_codelet_unpack_arg(&data_org, &loop, sizeof(loop)); + /*get h_flag*/ + starpu_codelet_unpack_arg(&data_org, &h_flag, sizeof(h_flag)); + /*get perfmodel*/ + starpu_codelet_unpack_arg(&data_org, &perfmodel, sizeof(perfmodel)); + /*get sb*/ + starpu_codelet_unpack_arg(&data_org, &sb, sizeof(sb)); + + starpu_codelet_unpack_arg_fini(&data_org); + + /*check if there is Future in argList, if so, get the Future result*/ + int i; + int fut_flag = 0; + + for(i=0; i < PyTuple_Size(argList); i++) + { + PyObject *obj=PyTuple_GetItem(argList, i); + /*protect borrowed reference, decremented in the end of the loop*/ + Py_INCREF(obj); + const char* tp = Py_TYPE(obj)->tp_name; + if(strcmp(tp, "_asyncio.Future") == 0 || + strcmp(tp, "Future") == 0) + { + fut_flag = 1; + PyObject *done = PyObject_CallMethod(obj, "done", NULL); + /*if the argument is Future and future object is not finished, we will await its result in cb_loop, since the main loop may be occupied to await the final result of function*/ + if (!PyObject_IsTrue(done)) + { + /*if the future object is not finished, get its corresponding arg_fut*/ + PyObject *cb_obj = PyObject_GetAttrString(obj, "arg_fut"); + + if(strcmp(tp, "_asyncio.Future") == 0) + { + /* asyncio */ + + /*call the method asyncio_wait_for_fut to await obj*/ + if (asyncio_wait_method == Py_None) + asyncio_wait_method = PyDict_GetItemString(starpu_dict, "asyncio_wait_for_fut"); + + PyObject *wait_obj = PyObject_CallFunctionObjArgs(asyncio_wait_method, cb_obj, NULL); + + /*decrement the reference obtained before if{}, then get the new reference*/ + Py_DECREF(cb_obj); + + /*call obj = asyncio.run_coroutine_threadsafe(wait_for_fut(cb_obj), cb_loop)*/ + cb_obj = PyObject_CallMethod(asyncio_module, "run_coroutine_threadsafe", "O,O", wait_obj, cb_loop); + + Py_DECREF(wait_obj); + } + else + { + /* concurrent.futures */ + + /*call the method concurrent_futures_wait_for_fut to await obj*/ + if (concurrent_futures_wait_method == Py_None) + concurrent_futures_wait_method = PyDict_GetItemString(starpu_dict, "concurrent_futures_wait_for_fut"); + + /*call obj = executor.submit(wait_for_fut, cb_obj)*/ + PyObject *new_obj = PyObject_CallMethod(cb_executor, "submit", "O,O", concurrent_futures_wait_method, cb_obj); + + /*decrement the reference obtained before if{}, then get the new reference*/ + Py_DECREF(cb_obj); + + cb_obj = new_obj; + } + + Py_DECREF(obj); + obj = cb_obj; + } + + /*if one of arguments is Future, get its result*/ + PyObject *fut_result = PyObject_CallMethod(obj, "result", NULL); + /*replace the Future argument to its result*/ + PyTuple_SetItem(argList, i, fut_result); + + Py_DECREF(done); + } + Py_DECREF(obj); + } + + int pack_flag = 0; + if(active_multi_interpreter||fut_flag) + pack_flag = 1; + + /*if the argument is changed in arglist or program runs with multi-interpreter, repack the data*/ + if(pack_flag == 1) + { + /*Initialize struct starpu_codelet_pack_arg_data*/ + struct starpu_codelet_pack_arg_data data; + starpu_codelet_pack_arg_init(&data); + + if(active_multi_interpreter) + { + /*repack func_data*/ + starpu_codelet_pack_arg(&data, func_data, func_data_size); + /*use cloudpickle to dump argList*/ + Py_ssize_t arg_data_size; + char* arg_data; + PyObject *arg_bytes = starpu_cloudpickle_dumps(argList, &arg_data, &arg_data_size); + starpu_codelet_pack_arg(&data, arg_data, arg_data_size); + Py_DECREF(arg_bytes); + Py_DECREF(argList); + } + else if (fut_flag) + { + /*repack func_py*/ + starpu_codelet_pack_arg(&data, &func_py, sizeof(func_py)); + /*repack arglist*/ + starpu_codelet_pack_arg(&data, &argList, sizeof(argList)); + } + + /*repack fut*/ + starpu_codelet_pack_arg(&data, &fut, sizeof(fut)); + /*repack loop*/ + starpu_codelet_pack_arg(&data, &loop, sizeof(loop)); + /*repack h_flag*/ + starpu_codelet_pack_arg(&data, &h_flag, sizeof(h_flag)); + /*repack perfmodel*/ + starpu_codelet_pack_arg(&data, &perfmodel, sizeof(perfmodel)); + /*repack sb*/ + starpu_codelet_pack_arg(&data, &sb, sizeof(sb)); + /*free the pointer precedent*/ + free(task->cl_arg); + /*finish repacking data and store the struct in cl_arg*/ + starpu_codelet_pack_arg_fini(&data, &task->cl_arg, &task->cl_arg_size); + } + free((void*)task->name); + + /*restore previous GIL state*/ + PyGILState_Release(state); +} + +/*function passed to starpu_codelet.cpu_func*/ +void starpupy_codelet_func(void *descr[], void *cl_arg) +{ + (void)cl_arg; + PyObject *func_py; /*the python function passed in*/ + PyObject *pFunc; + PyObject *argList; /*argument list of python function passed in*/ + int h_flag; /*detect return value is handle or not*/ + + /*make sure we own the GIL*/ + PyGILState_STATE state = PyGILState_Ensure(); + + struct starpu_task *task = starpu_task_get_current(); + /*Initialize struct starpu_codelet_unpack_arg_data*/ + struct starpu_codelet_pack_arg_data data; + starpu_codelet_unpack_arg_init(&data, task->cl_arg, task->cl_arg_size); + + if(active_multi_interpreter) + { + char* func_data; + size_t func_data_size; + char* arg_data; + size_t arg_data_size; + /*get func_py char**/ + starpu_codelet_pick_arg(&data, (void**)&func_data, &func_data_size); + /*use cloudpickle to load function (maybe only function name), return a new reference*/ + pFunc=starpu_cloudpickle_loads(func_data, func_data_size); + if (!pFunc) + print_exception("cloudpickle could not unpack the function from the main interpreter"); + /*get argList char**/ + starpu_codelet_pick_arg(&data, (void**)&arg_data, &arg_data_size); + /*use cloudpickle to load argList*/ + argList=starpu_cloudpickle_loads(arg_data, arg_data_size); + if (!argList) + print_exception("cloudpickle could not unpack the argument list from the main interpreter"); + } + else + { + /*get func_py*/ + starpu_codelet_unpack_arg(&data, &pFunc, sizeof(pFunc)); + /*get argList*/ + starpu_codelet_unpack_arg(&data, &argList, sizeof(argList)); + } + + /*skip fut*/ + starpu_codelet_unpack_discard_arg(&data); + /*skip loop*/ + starpu_codelet_unpack_discard_arg(&data); + /*get h_flag*/ + starpu_codelet_unpack_arg(&data, &h_flag, sizeof(h_flag)); + /*skip perfmodel*/ + starpu_codelet_unpack_discard_arg(&data); + /*skip sb*/ + starpu_codelet_unpack_discard_arg(&data); + + starpu_codelet_unpack_arg_fini(&data); + + /* if the function name is passed in*/ + const char* tp_func = Py_TYPE(pFunc)->tp_name; + if (strcmp(tp_func, "str")==0) + { + /*getattr(sys.modules[__name__], "")*/ + /*get sys.modules*/ + PyObject *sys_modules = PyImport_GetModuleDict(); + /*protect borrowed reference, decrement after being called by the function*/ + Py_INCREF(sys_modules); + /*get sys.modules[__name__]*/ + PyObject *sys_modules_name=PyDict_GetItemString(sys_modules,"__main__"); + /*protect borrowed reference, decrement after being called by the function*/ + Py_INCREF(sys_modules_name); + /*get function object*/ + func_py=PyObject_GetAttr(sys_modules_name,pFunc); + Py_DECREF(sys_modules); + Py_DECREF(sys_modules_name); + + /*decrement the reference obtained from unpack*/ + Py_DECREF(pFunc); + } + else + { + /*transfer the ref of pFunc to func_py*/ + func_py=pFunc; + } + + /*check if there is Handle in argList, if so, get the object*/ + int h_index= (h_flag ? 1 : 0); + int i; + /*if there is the return Handle in argList, length of argList minus 1*/ + Py_ssize_t pArglist_len = (h_flag == 2) ? PyTuple_Size(argList)-1 : PyTuple_Size(argList); + /*new tuple contains all function arguments, decrement after calling function*/ + PyObject *pArglist = PyTuple_New(pArglist_len); + for(i=0; i < pArglist_len; i++) + { + /*if there is the return Handle in argList, start with the second argument*/ + PyObject *obj= (h_flag == 2) ? PyTuple_GetItem(argList, i+1) : PyTuple_GetItem(argList, i); + /*protect borrowed reference, is decremented in the end of the loop*/ + Py_INCREF(obj); + const char* tp = Py_TYPE(obj)->tp_name; + if(strcmp(tp, "Handle_token") == 0) + { + /*if one of arguments is Handle, replace the Handle argument to the object*/ + if ((task->handles[h_index] && STARPUPY_PYOBJ_CHECK(task->handles[h_index])) || STARPUPY_PYOBJ_CHECK_INTERFACE(descr[h_index])) + { + PyObject *obj_handle = STARPUPY_GET_PYOBJECT(descr[h_index]); + PyTuple_SetItem(pArglist, i, obj_handle); + } + else if ((task->handles[h_index] && STARPUPY_BUF_CHECK(task->handles[h_index])) || STARPUPY_BUF_CHECK_INTERFACE(descr[h_index])) + { + PyObject *buf_handle = STARPUPY_BUF_GET_PYOBJECT(descr[h_index]); + PyTuple_SetItem(pArglist, i, buf_handle); + } + else + { + STARPU_ASSERT_MSG(0, "unexpected object %d\n", ((struct starpupyobject_interface *)(descr[h_index]))->id); + } + + h_index++; + } + else + { + Py_INCREF(obj); + PyTuple_SetItem(pArglist, i, obj); + } + Py_DECREF(obj); + } + + // printf("arglist before applying is "); + // PyObject_Print(pArglist, stdout, 0); + // printf("\n"); + + /*verify that the function is a proper callable*/ + if (!PyCallable_Check(func_py)) + { + PyErr_Format(StarpupyError, "Expected a callable function"); + } + + /*call the python function get the return value rv, it's a new reference*/ + PyObject *rv = PyObject_CallObject(func_py, pArglist); + if (!rv) + PyErr_PrintEx(1); + + // printf("arglist after applying is "); + // PyObject_Print(pArglist, stdout, 0); + // printf("\n"); + + // printf("rv after call function is "); + // PyObject_Print(rv, stdout, 0); + // printf("\n"); + + /*if return handle*/ + if(h_flag) + { + STARPU_ASSERT(STARPUPY_PYOBJ_CHECK(task->handles[0])); + /*pass ref to descr[0]*/ + STARPUPY_SET_PYOBJECT(descr[0], rv); + } + else + { + /*Initialize struct starpu_codelet_pack_arg_data for return value*/ + struct starpu_codelet_pack_arg_data data_ret; + starpu_codelet_pack_arg_init(&data_ret); + + /*if the result is None type, pack NULL without using cloudpickle*/ + if (rv==Py_None) + { + char* rv_data=NULL; + Py_ssize_t rv_data_size=0; + starpu_codelet_pack_arg(&data_ret, &rv_data_size, sizeof(rv_data_size)); + starpu_codelet_pack_arg(&data_ret, &rv_data, sizeof(rv_data)); + /*decrement the ref obtained from callobject*/ + Py_DECREF(rv); + } + else + { + if(active_multi_interpreter) + { + /*else use cloudpickle to dump rv*/ + Py_ssize_t rv_data_size; + char* rv_data; + PyObject *rv_bytes = starpu_cloudpickle_dumps(rv, &rv_data, &rv_data_size); + starpu_codelet_pack_arg(&data_ret, &rv_data_size, sizeof(rv_data_size)); + starpu_codelet_pack_arg(&data_ret, rv_data, rv_data_size); + Py_DECREF(rv_bytes); + Py_DECREF(rv); + } + else + { + /*if the result is not None type, we set rv_data_size to 1, it does not mean that the data size is 1, but only for determine statements*/ + size_t rv_data_size=1; + starpu_codelet_pack_arg(&data_ret, &rv_data_size, sizeof(rv_data_size)); + /*pack rv*/ + starpu_codelet_pack_arg(&data_ret, &rv, sizeof(rv)); + } + } + + /*store the return value in task->cl_ret*/ + starpu_codelet_pack_arg_fini(&data_ret, &task->cl_ret, &task->cl_ret_size); + + task->cl_ret_free = 1; + } + + /*decrement the ref obtained from pFunc*/ + Py_DECREF(func_py); + /*decrement the ref obtained by unpack*/ + Py_DECREF(argList); + /*decrement the ref obtains by PyTuple_New*/ + Py_DECREF(pArglist); + + /*restore previous GIL state*/ + PyGILState_Release(state); +} + +/*function passed to starpu_task.epilogue_callback_func*/ +void starpupy_epilogue_cb_func(void *v) +{ + (void)v; + PyObject *fut; /*asyncio.Future*/ + PyObject *loop; /*asyncio.Eventloop*/ + int h_flag; + PyObject *perfmodel; + char* rv_data; + size_t rv_data_size; + PyObject *rv; /*return value when using PyObject_CallObject call the function f*/ + + /*make sure we own the GIL*/ + PyGILState_STATE state = PyGILState_Ensure(); + + struct starpu_task *task = starpu_task_get_current(); + + /*Initialize struct starpu_codelet_unpack_arg_data data*/ + struct starpu_codelet_pack_arg_data data; + starpu_codelet_unpack_arg_init(&data, task->cl_arg, task->cl_arg_size); + + /*skip func_py*/ + starpu_codelet_unpack_discard_arg(&data); + /*skip argList*/ + starpu_codelet_unpack_discard_arg(&data); + /*get fut*/ + starpu_codelet_unpack_arg(&data, &fut, sizeof(fut)); + /*get loop*/ + starpu_codelet_unpack_arg(&data, &loop, sizeof(loop)); + /*get h_flag*/ + starpu_codelet_unpack_arg(&data, &h_flag, sizeof(h_flag)); + /*get perfmodel*/ + starpu_codelet_unpack_arg(&data, &perfmodel, sizeof(perfmodel)); + /*skip sb*/ + starpu_codelet_unpack_discard_arg(&data); + + starpu_codelet_unpack_arg_fini(&data); + + /*if return value is not handle, unpack from cl_ret*/ + if(!h_flag) + { + /*Initialize struct starpu_codelet_unpack_arg_data data*/ + struct starpu_codelet_pack_arg_data data_ret; + starpu_codelet_unpack_arg_init(&data_ret, task->cl_ret, task->cl_ret_size); + /*get rv_data_size*/ + starpu_codelet_unpack_arg(&data_ret, &rv_data_size, sizeof(rv_data_size)); + + /*if the rv_data_size is 0, the result is None type*/ + if (rv_data_size==0) + { + starpu_codelet_unpack_discard_arg(&data_ret); + rv=Py_None; + Py_INCREF(rv); + } + /*else use cloudpickle to load rv*/ + else if(active_multi_interpreter) + { + /*get rv char**/ + starpu_codelet_pick_arg(&data_ret, (void**)&rv_data, &rv_data_size); + /*use cloudpickle to load rv*/ + rv=starpu_cloudpickle_loads(rv_data, rv_data_size); + } + else + { + /*unpack rv*/ + starpu_codelet_unpack_arg(&data_ret, &rv, sizeof(rv)); + } + + starpu_codelet_unpack_arg_fini(&data_ret); + + /*set the Future result and mark the Future as done*/ + if(fut!=Py_None) + { + PyObject *cb_fut = PyObject_GetAttrString(fut, "arg_fut"); + if (!cb_fut) + PyErr_PrintEx(1); + PyObject *cb_set_result = PyObject_GetAttrString(cb_fut, "set_result"); + if (!cb_set_result) + PyErr_PrintEx(1); + PyObject *set_result = PyObject_GetAttrString(fut, "set_result"); + if (!set_result) + PyErr_PrintEx(1); + + const char* tp = Py_TYPE(fut)->tp_name; + + if(strcmp(tp, "_asyncio.Future") == 0) + { + /* asyncio */ + + /*set the Future result in cb_loop*/ + PyObject *cb_loop_callback = PyObject_CallMethod(cb_loop, "call_soon_threadsafe", "(O,O)", cb_set_result, rv); + if (!cb_loop_callback) + PyErr_PrintEx(1); + Py_DECREF(cb_loop_callback); + + /*set the Future result in main running loop*/ + PyObject *loop_callback = PyObject_CallMethod(loop, "call_soon_threadsafe", "(O,O)", set_result, rv); + if (!loop_callback) + PyErr_PrintEx(1); + Py_DECREF(loop_callback); + } + else + { + /* concurrent.futures */ + + /*set the Future result in cb_loop*/ + PyObject *cb_loop_callback = PyObject_CallMethod(cb_executor, "submit", "(O,O)", cb_set_result, rv); + if (!cb_loop_callback) + PyErr_PrintEx(1); + Py_DECREF(cb_loop_callback); + + /*set the Future result in main running loop*/ + PyObject *loop_callback = PyObject_CallMethod(cb_executor, "submit", "(O,O)", set_result, rv); + if (!loop_callback) + PyErr_PrintEx(1); + Py_DECREF(loop_callback); + } + + Py_DECREF(cb_set_result); + Py_DECREF(cb_fut); + Py_DECREF(set_result); + } + + /*decrement the refs obtained from upack*/ + Py_DECREF(rv); + } + + Py_DECREF(fut); + Py_DECREF(loop); + + struct starpu_codelet *func_cl=(struct starpu_codelet *) task->cl; + if (func_cl->model != NULL) + { + Py_DECREF(perfmodel); + } + + /*restore previous GIL state*/ + PyGILState_Release(state); +} + +void starpupy_cb_func(void *v) +{ + (void)v; + struct starpu_task *task = starpu_task_get_current(); + + /*deallocate task*/ + free(task->cl); +} + +/***********************************************************************************/ +/*PyObject*->struct starpu_task**/ +static struct starpu_task *PyTask_AsTask(PyObject *obj) +{ + return (struct starpu_task *) PyCapsule_GetPointer(obj, "Task"); +} + +/* destructor function for task */ +static void del_Task(PyObject *obj) +{ + struct starpu_task *obj_task=PyTask_AsTask(obj); + starpu_task_set_destroy(obj_task); +} + +/*struct starpu_task*->PyObject**/ +static PyObject *PyTask_FromTask(struct starpu_task *task) +{ + PyObject * task_cap = PyCapsule_New(task, "Task", del_Task); + return task_cap; +} + +/***********************************************************************************/ +static size_t sizebase (struct starpu_task *task, unsigned nimpl) +{ + (void)nimpl; + int sb; + + /*Initialize struct starpu_codelet_unpack_arg_data*/ + struct starpu_codelet_pack_arg_data data; + starpu_codelet_unpack_arg_init(&data, task->cl_arg, task->cl_arg_size); + + /*skip func_py*/ + //starpu_codelet_unpack_discard_arg(&data); + starpu_codelet_unpack_discard_arg(&data); + /*skip argList*/ + //starpu_codelet_unpack_discard_arg(&data); + starpu_codelet_unpack_discard_arg(&data); + /*skip fut*/ + starpu_codelet_unpack_discard_arg(&data); + /*skip loop*/ + starpu_codelet_unpack_discard_arg(&data); + /*skip h_flag*/ + starpu_codelet_unpack_discard_arg(&data); + /*skip perfmodel*/ + starpu_codelet_unpack_discard_arg(&data); + /*get sb*/ + starpu_codelet_unpack_arg(&data, &sb, sizeof(sb)); + + starpu_codelet_unpack_arg_fini(&data); + + return sb; +} + +/*initialization of perfmodel*/ +static PyObject* init_perfmodel(PyObject *self, PyObject *args) +{ + (void)self; + char *sym; + + if (!PyArg_ParseTuple(args, "s", &sym)) + return NULL; + + /*allocate a perfmodel structure*/ + struct starpu_perfmodel *perf=(struct starpu_perfmodel*)calloc(1, sizeof(struct starpu_perfmodel)); + + /*get the perfmodel symbol*/ + char *p =strdup(sym); + perf->symbol=p; + perf->type=STARPU_HISTORY_BASED; + perf->size_base=&sizebase; + + /*struct perfmodel*->PyObject**/ + PyObject *perfmodel=PyCapsule_New(perf, "Perf", NULL); + + return perfmodel; +} + +/*free perfmodel*/ +static PyObject* free_perfmodel(PyObject *self, PyObject *args) +{ + (void)self; + PyObject *perfmodel; + if (!PyArg_ParseTuple(args, "O", &perfmodel)) + return NULL; + + /*PyObject*->struct perfmodel**/ + struct starpu_perfmodel *perf=PyCapsule_GetPointer(perfmodel, "Perf"); + + Py_BEGIN_ALLOW_THREADS; +#ifndef STARPU_SIMGRID + starpu_save_history_based_model(perf); +#endif + //starpu_perfmodel_unload_model(perf); + starpu_perfmodel_deinit(perf); + Py_END_ALLOW_THREADS; + free((void*)perf->symbol); + free(perf); + + /*return type is void*/ + Py_INCREF(Py_None); + return Py_None; +} + +#ifndef STARPU_SIMGRID +static PyObject* starpu_save_history_based_model_wrapper(PyObject *self, PyObject *args) +{ + (void)self; + PyObject *perfmodel; + if (!PyArg_ParseTuple(args, "O", &perfmodel)) + return NULL; + + /*call the method get_struct*/ + const char *tp_perfmodel = Py_TYPE(perfmodel)->tp_name; + if (strcmp(tp_perfmodel, "Perfmodel") != 0) + { + /*the argument should be the object of class Perfmodel*/ + PyErr_Format(StarpupyError, "Expected a Perfmodel object"); + return NULL; + } + + PyObject *perfmodel_capsule = PyObject_CallMethod(perfmodel, "get_struct", NULL); + + /*PyObject*->struct perfmodel**/ + const char *tp_perf = Py_TYPE(perfmodel_capsule)->tp_name; + if (strcmp(tp_perf, "PyCapsule") != 0) + { + /*the argument should be the PyCapsule object*/ + PyErr_Format(StarpupyError, "Expected a PyCapsule object"); + return NULL; + } + /*PyObject*->struct perfmodel**/ + struct starpu_perfmodel *perf = PyCapsule_GetPointer(perfmodel_capsule, "Perf"); + + Py_BEGIN_ALLOW_THREADS; + starpu_save_history_based_model(perf); + Py_END_ALLOW_THREADS; + + /*decrement the capsule object obtained from Perfmodel class*/ + Py_DECREF(perfmodel_capsule); + + /*return type is void*/ + Py_INCREF(Py_None); + return Py_None; +} +#endif + +/*****************************Wrappers of StarPU methods****************************/ +/*wrapper submit method*/ +static PyObject* starpu_task_submit_wrapper(PyObject *self, PyObject *args) +{ + (void)self; + /*first argument in args is always the python function passed in*/ + PyObject *func_py = PyTuple_GetItem(args, 0); + /*protect borrowed reference, used in codelet pack, in case multi-interpreter, decremented after cloudpickle_dumps, otherwise decremented in starpupy_codelet_func*/ + Py_INCREF(func_py); + + /*Initialize struct starpu_codelet_pack_arg_data*/ + struct starpu_codelet_pack_arg_data data; + starpu_codelet_pack_arg_init(&data); + + if(active_multi_interpreter) + { + /*use cloudpickle to dump func_py*/ + Py_ssize_t func_data_size; + char* func_data; + PyObject *func_bytes = starpu_cloudpickle_dumps(func_py, &func_data, &func_data_size); + starpu_codelet_pack_arg(&data, func_data, func_data_size); + Py_DECREF(func_bytes); + /*decrement the ref obtained from args passed in*/ + Py_DECREF(func_py); + } + else + { + /*if there is no multi interpreter only pack func_py*/ + starpu_codelet_pack_arg(&data, &func_py, sizeof(func_py)); + } + + PyObject *loop; + PyObject *fut; + + /*allocate a task structure and initialize it with default values*/ + struct starpu_task *task = starpu_task_create(); + + /*allocate a codelet structure*/ + struct starpu_codelet *func_cl = (struct starpu_codelet*)malloc(sizeof(struct starpu_codelet)); + /*initialize func_cl with default values*/ + starpu_codelet_init(func_cl); + func_cl->cpu_funcs[0] = &starpupy_codelet_func; + func_cl->cpu_funcs_name[0] = "starpupy_codelet_func"; + func_cl->flags = STARPU_CODELET_SIMGRID_EXECUTE; + + int h_index = 0, h_flag = 0; + int nbuffer = 0; + /*the last argument is the option dictionary*/ + PyObject *dict_option = PyTuple_GetItem(args, PyTuple_Size(args)-1); + /*protect borrowed reference*/ + Py_INCREF(dict_option); + /*check whether the return value is handle*/ + PyObject *ret_handle = PyDict_GetItemString(dict_option, "ret_handle"); + /*set the default value*/ + if(ret_handle == NULL) + { + ret_handle = Py_False; + } + /*check whether the return value is fut*/ + PyObject *ret_fut = PyDict_GetItemString(dict_option, "ret_fut"); + /*set the default value*/ + if(ret_fut == NULL) + { + ret_fut = Py_True; + } + + /*check whether to store the return value as a parameter*/ + PyObject *ret_param = PyDict_GetItemString(dict_option, "ret_param"); + /*set the default value*/ + if(ret_param == NULL) + { + ret_param = Py_False; + } + /*if return value is a parameter, then we will not return a future nor handle object even ret_fut/ret_handle has been set to true*/ + else if(PyObject_IsTrue(ret_param)) + { + h_flag = 2; + ret_fut = Py_False; + ret_handle = Py_False; + } + + /*if return value is handle*/ + PyObject *r_handle_obj = NULL; + if(PyObject_IsTrue(ret_handle)) + { + h_flag = 1; + /*return value is handle there are no loop and fut*/ + loop = Py_None; + fut = Py_None; + + /* these are decremented in starpupy_epilogue_cb_func */ + Py_INCREF(loop); + Py_INCREF(fut); + + /*create Handle object Handle(None)*/ + /*import Handle class*/ + if (Handle_class == Py_None) + { + Handle_class = PyDict_GetItemString(starpu_dict, "Handle"); + } + + /*get the constructor, decremented after being called*/ + PyObject *pInstanceHandle = PyInstanceMethod_New(Handle_class); + + /*create a Null Handle object, decremented in the end of this if{}*/ + PyObject *handle_arg = PyTuple_New(2); + /*Py_None is used for PyTuple_SetItem(handle_arg), once handle_arg is decremented, Py_None is decremented as well*/ + Py_INCREF(Py_None); + PyTuple_SetItem(handle_arg, 0, Py_None); + PyTuple_SetItem(handle_arg, 1, Py_True); + + /*r_handle_obj will be the return value of this function starpu_task_submit_wrapper*/ + r_handle_obj = PyObject_CallObject(pInstanceHandle,handle_arg); + + /*get the Handle capsule object, decremented in the end of this if{}*/ + PyObject *r_handle_cap = PyObject_CallMethod(r_handle_obj, "get_capsule", NULL); + /*get Handle*/ + starpu_data_handle_t r_handle = (starpu_data_handle_t) PyCapsule_GetPointer(r_handle_cap, "Handle"); + + if (r_handle == (void*)-1) + { + PyErr_Format(StarpupyError, "Handle has already been unregistered"); + return NULL; + } + + task->handles[0] = r_handle; + func_cl->modes[0] = STARPU_W; + + h_index++; + nbuffer = h_index; + + Py_DECREF(pInstanceHandle); + Py_DECREF(handle_arg); + Py_DECREF(r_handle_cap); + } + else if(PyObject_IsTrue(ret_fut)) + { + PyObject *cb_fut; + + /*get the running asyncio Event loop, decremented in starpupy_epilogue_cb_func*/ + loop = PyObject_CallMethod(asyncio_module, "get_running_loop", NULL); + + if (loop) + { + /*create a asyncio.Future object, decremented in starpupy_epilogue_cb_func*/ + fut = PyObject_CallMethod(loop, "create_future", NULL); + + if (fut == NULL) + { + PyErr_Format(StarpupyError, "Can't create future for loop from asyncio module (try to add \"-m asyncio\" when starting Python interpreter)"); + return NULL; + } + + /*create a asyncio.Future object attached to cb_loop*/ + cb_fut = PyObject_CallMethod(cb_loop, "create_future", NULL); + + if (cb_fut == NULL) + { + PyErr_Format(StarpupyError, "Can't create future for cb_loop from asyncio module (try to add \"-m asyncio\" when starting Python interpreter)"); + return NULL; + } + } + else + { + PyErr_Clear(); + + loop = Py_None; + /* this is decremented in starpupy_epilogue_cb_func */ + Py_INCREF(loop); + + /*create a concurrent.futures.Future object, decremented in starpupy_epilogue_cb_func*/ + PyObject *fut_instance = PyInstanceMethod_New(concurrent_futures_future_class); + fut = PyObject_CallObject(fut_instance, NULL); + + if (fut == NULL) + { + PyErr_Format(StarpupyError, "Can't create future from concurrent.futures module"); + return NULL; + } + + /*create a concurrent.futures.Future object for cb_executor*/ + cb_fut = PyObject_CallObject(fut_instance, NULL); + + if (cb_fut == NULL) + { + PyErr_Format(StarpupyError, "Can't create future from concurrent.futures module"); + return NULL; + } + } + + int ret; + + /*set one of fut attribute to cb_fut*/ + ret = PyObject_SetAttrString(fut, "arg_fut", cb_fut); + if (ret) + { + PyErr_Format(StarpupyError, "Can't set arg_fut in fut"); + return NULL; + } + + Py_DECREF(cb_fut); + + task->destroy = 0; + PyObject *PyTask = PyTask_FromTask(task); + + /*set one of fut attribute to the task pointer*/ + ret = PyObject_SetAttrString(fut, "starpu_task", PyTask); + if (ret) + { + PyErr_Format(StarpupyError, "Can't set starpu_task in fut"); + return NULL; + } + + /*fut is the return value of this function*/ + Py_INCREF(fut); + + Py_DECREF(PyTask); + } + else + { + /* return value is not fut or handle there are no loop and fut*/ + loop = Py_None; + fut = Py_None; + + /* these are decremented in starpupy_epilogue_cb_func */ + Py_INCREF(loop); + Py_INCREF(fut); + } + + /*check the arguments of python function passed in*/ + int i; + for(i = 1; i < PyTuple_Size(args)-1; i++) + { + PyObject *obj = PyTuple_GetItem(args, i); + /*protect borrowed reference*/ + Py_INCREF(obj); + const char* tp = Py_TYPE(obj)->tp_name; + if(strcmp(tp, "_asyncio.Future") == 0 || + strcmp(tp, "Future") == 0) + { + /*if one of arguments is Future, get its corresponding task*/ + PyObject *fut_task = PyObject_GetAttrString(obj, "starpu_task"); + /*declare task dependencies between the current task and the corresponding task of Future argument*/ + starpu_task_declare_deps(task, 1, PyTask_AsTask(fut_task)); + + Py_DECREF(fut_task); + } + /*decrement the reference which is obtained at the beginning of the loop*/ + Py_DECREF(obj); + } + + /*check whether the option perfmodel is None*/ + PyObject *perfmodel = PyDict_GetItemString(dict_option, "perfmodel"); + /*protect borrowed reference, pack in cl_arg, decrement in starpupy_epilogue_cb_func*/ + Py_INCREF(perfmodel); + + /*call the method get_struct*/ + PyObject *perfmodel_capsule; + const char *tp_perfmodel = Py_TYPE(perfmodel)->tp_name; + if (strcmp(tp_perfmodel, "Perfmodel") == 0) + { + perfmodel_capsule = PyObject_CallMethod(perfmodel, "get_struct", NULL); + } + else + { + Py_INCREF(Py_None); + perfmodel_capsule = Py_None; + } + + const char *tp_perf = Py_TYPE(perfmodel_capsule)->tp_name; + if (strcmp(tp_perf, "PyCapsule") == 0) + { + /*PyObject*->struct perfmodel**/ + struct starpu_perfmodel *perf = PyCapsule_GetPointer(perfmodel_capsule, "Perf"); + func_cl->model = perf; + } + /*decrement the capsule object obtained from Perfmodel class*/ + Py_DECREF(perfmodel_capsule); + + /*create Handle object Handle(None)*/ + /*import Handle_token class*/ + if (Token_class == Py_None) + { + Token_class = PyDict_GetItemString(starpu_dict, "Handle_token"); + } + + /*get the constructor, decremented after passing args in argList*/ + PyObject *pInstanceToken = PyInstanceMethod_New(Token_class); + + /*check whether the argument is explicit handle*/ + PyObject *arg_handle = PyDict_GetItemString(dict_option, "arg_handle"); + /*set the default value*/ + if(arg_handle == NULL) + { + arg_handle = Py_True; + } + + /*argument list of python function passed in*/ + PyObject *argList; + + /*pass args in argList, argList is decremented in starpupy_codelet_func*/ + if (PyTuple_Size(args) == 2)/*function no arguments*/ + argList = PyTuple_New(0); + else + { + /*function has arguments*/ + argList = PyTuple_New(PyTuple_Size(args)-2); + int j; + for(j=0; jtp_name; + //printf("arg type is %s\n", tp_arg); + if (strcmp(tp_arg, "Handle") == 0 || strcmp(tp_arg, "HandleNumpy") == 0) + { + /*create the Handle_token object to replace the Handle Capsule*/ + PyObject *token_obj = PyObject_CallObject(pInstanceToken, NULL); + PyTuple_SetItem(argList, j, token_obj); + + /*get Handle capsule object, decremented in the end of this if{}*/ + PyObject *tmp_cap = PyObject_CallMethod(tmp, "get_capsule", NULL); + + /*get Handle*/ + starpu_data_handle_t tmp_handle = (starpu_data_handle_t) PyCapsule_GetPointer(tmp_cap, "Handle"); + + if (tmp_handle == (void*)-1) + { + PyErr_Format(StarpupyError, "Handle has already been unregistered"); + return NULL; + } + + /*if the function result will be returned in parameter, the first argument will be the handle of return value, but this object should not be the Python object supporting buffer protocol*/ + if(PyObject_IsTrue(ret_param) && i==0 && STARPUPY_BUF_CHECK(tmp_handle)) + { + PyErr_Format(StarpupyError, "Return value as parameter should not be the Python object supporting buffer protocol"); + return NULL; + } + + task->handles[h_index] = tmp_handle; + /*set access mode*/ + /*mode is STARPU_R*/ + if(tmp_mode_py != NULL && strcmp(tmp_mode, "R") == 0) + { + func_cl->modes[h_index] = STARPU_R; + } + /*mode is STARPU_W*/ + if(tmp_mode_py != NULL && strcmp(tmp_mode, "W") == 0) + { + func_cl->modes[h_index] = STARPU_W; + } + /*mode is STARPU_RW*/ + if(tmp_mode_py != NULL && strcmp(tmp_mode, "RW") == 0) + { + func_cl->modes[h_index] = STARPU_RW; + } + /*access mode is not defined for Handle object, and this object is not the return value*/ + if(tmp_mode_py == NULL && strcmp(tp_arg, "Handle") == 0 && (!PyObject_IsTrue(ret_param) || (PyObject_IsTrue(ret_param) && j != 0))) + { + func_cl->modes[h_index] = STARPU_R; + } + /*access mode is not defined for Handle object, and this object is the return value*/ + if(tmp_mode_py == NULL && strcmp(tp_arg, "Handle") == 0 && PyObject_IsTrue(ret_param) && j == 0) + { + func_cl->modes[h_index] = STARPU_W; + } + /*access mode is not defined for HandleNumpy object*/ + if(tmp_mode_py == NULL && strcmp(tp_arg, "HandleNumpy") == 0) + { + PyErr_Format(StarpupyError, "access mode should be set as STARPU_W"); + return NULL; + } + + h_index++; + nbuffer = h_index; + + Py_DECREF(tmp_cap); + Py_DECREF(tmp); + } + /*check if the arg is buffer protocol*/ + else if((PyObject_IsTrue(arg_handle)) && (strcmp(tp_arg, "numpy.ndarray")==0 || strcmp(tp_arg, "bytes")==0 || strcmp(tp_arg, "bytearray")==0 || strcmp(tp_arg, "array.array")==0 || strcmp(tp_arg, "memoryview")==0)) + { + /*get the corresponding handle of the obj, return a new reference, decremented in the end of this else if{}*/ + PyObject *tmp_cap = starpupy_handle_dict_check(tmp, tmp_mode, "register"); + + /*create the Handle_token object to replace the Handle Capsule*/ + PyObject *token_obj = PyObject_CallObject(pInstanceToken, NULL); + PyTuple_SetItem(argList, j, token_obj); + + /*get Handle*/ + starpu_data_handle_t tmp_handle = (starpu_data_handle_t) PyCapsule_GetPointer(tmp_cap, "Handle"); + + task->handles[h_index] = tmp_handle; + + /*set access mode*/ + /*mode is STARPU_R*/ + if(tmp_mode_py != NULL && strcmp(tmp_mode, "R") == 0) + { + func_cl->modes[h_index] = STARPU_R; + } + /*mode is STARPU_W*/ + if(tmp_mode_py != NULL && strcmp(tmp_mode, "W") == 0) + { + func_cl->modes[h_index] = STARPU_W; + } + /*mode is STARPU_RW*/ + if(tmp_mode_py != NULL && strcmp(tmp_mode, "RW") == 0) + { + func_cl->modes[h_index] = STARPU_RW; + } + /*access mode is not defined*/ + if(tmp_mode_py == NULL) + { + func_cl->modes[h_index] = STARPU_R; + } + + h_index++; + nbuffer = h_index; + + Py_DECREF(tmp_cap); + Py_DECREF(tmp); + } + /* check if the arg is the sub handle*/ + else if(strcmp(tp_arg, "PyCapsule")==0) + { + //printf("it's the sub handles\n"); + /*create the Handle_token object to replace the Handle Capsule*/ + PyObject *token_obj = PyObject_CallObject(pInstanceToken, NULL); + PyTuple_SetItem(argList, j, token_obj); + + /*get Handle*/ + starpu_data_handle_t tmp_handle = (starpu_data_handle_t) PyCapsule_GetPointer(tmp, "Handle"); + + task->handles[h_index] = tmp_handle; + + /*set access mode*/ + /*mode is STARPU_R*/ + if(tmp_mode_py != NULL && strcmp(tmp_mode, "R") == 0) + { + func_cl->modes[h_index] = STARPU_R; + } + /*mode is STARPU_W*/ + if(tmp_mode_py != NULL && strcmp(tmp_mode, "W") == 0) + { + func_cl->modes[h_index] = STARPU_W; + } + /*mode is STARPU_RW*/ + if(tmp_mode_py != NULL && strcmp(tmp_mode, "RW") == 0) + { + func_cl->modes[h_index] = STARPU_RW; + } + /*access mode is not defined*/ + if(tmp_mode_py == NULL) + { + func_cl->modes[h_index] = STARPU_R; + } + + h_index++; + nbuffer = h_index; + + Py_DECREF(tmp); + } + else + { + PyTuple_SetItem(argList, j, tmp); + } + + if(tmp_mode_py != NULL) + { + free(tmp_mode); + } + + Py_DECREF(PyModes); + Py_DECREF(arg_id); + } + //printf("nbuffer is %d\n", nbuffer); + } + + /*decrement the references which are obtained before generating the argList*/ + Py_DECREF(pInstanceToken); + func_cl->nbuffers = nbuffer; + + /*pack argList*/ + starpu_codelet_pack_arg(&data, &argList, sizeof(argList)); + /*pack fut*/ + starpu_codelet_pack_arg(&data, &fut, sizeof(fut)); + /*pack loop*/ + starpu_codelet_pack_arg(&data, &loop, sizeof(loop)); + /*pack h_flag*/ + starpu_codelet_pack_arg(&data, &h_flag, sizeof(h_flag)); + /*pack perfmodel*/ + starpu_codelet_pack_arg(&data, &perfmodel, sizeof(perfmodel)); + + task->cl=func_cl; + + /*pass optional values name=None, synchronous=1, priority=0, color=None, flops=None, perfmodel=None, sizebase=0*/ + /*const char * name*/ + PyObject *PyName = PyDict_GetItemString(dict_option, "name"); + if (PyName!=NULL && PyName!=Py_None) + { + const char* name_str = PyUnicode_AsUTF8(PyName); + char* name = strdup(name_str); + //printf("name is %s\n", name); + task->name=name; + } + + /*unsigned synchronous:1*/ + PyObject *PySync = PyDict_GetItemString(dict_option, "synchronous"); + if (PySync!=NULL) + { + unsigned sync=PyLong_AsUnsignedLong(PySync); + //printf("sync is %u\n", sync); + task->synchronous=sync; + } + + /*int priority*/ + PyObject *PyPrio = PyDict_GetItemString(dict_option, "priority"); + if (PyPrio!=NULL) + { + int prio=PyLong_AsLong(PyPrio); + //printf("prio is %d\n", prio); + task->priority=prio; + } + + /*unsigned color*/ + PyObject *PyColor = PyDict_GetItemString(dict_option, "color"); + if (PyColor!=NULL && PyColor!=Py_None) + { + unsigned color=PyLong_AsUnsignedLong(PyColor); + //printf("color is %u\n", color); + task->color=color; + } + + /*double flops*/ + PyObject *PyFlops = PyDict_GetItemString(dict_option, "flops"); + if (PyFlops!=NULL && PyFlops!=Py_None) + { + double flops=PyFloat_AsDouble(PyFlops); + //printf("flops is %f\n", flops); + task->flops=flops; + } + + /*int sizebase*/ + PyObject *PySB = PyDict_GetItemString(dict_option, "sizebase"); + int sb; + if (PySB!=NULL) + { + sb=PyLong_AsLong(PySB); + } + else + { + sb=0; + } + + //printf("pack sizebase is %d\n", sb); + /*pack sb*/ + starpu_codelet_pack_arg(&data, &sb, sizeof(sb)); + + /*finish packing data and store the struct in cl_arg*/ + starpu_codelet_pack_arg_fini(&data, &task->cl_arg, &task->cl_arg_size); + task->cl_arg_free = 1; + + task->prologue_callback_func=&starpupy_prologue_cb_func; + task->epilogue_callback_func=&starpupy_epilogue_cb_func; + task->callback_func=&starpupy_cb_func; + + /*call starpu_task_submit method*/ + int ret; + Py_BEGIN_ALLOW_THREADS; + ret = starpu_task_submit(task); + Py_END_ALLOW_THREADS; + if (ret!=0) + { + PyErr_Format(StarpupyError, "Unexpected value %d returned for starpu_task_submit", ret); + return NULL; + } + + /*decrement the ref obtained at the beginning of this function*/ + Py_DECREF(dict_option); + + //printf("the number of reference is %ld\n", Py_REFCNT(func_py)); + //printf("fut %ld\n", Py_REFCNT(fut)); + /*if return value is handle*/ + if(PyObject_IsTrue(ret_handle)) + { + return r_handle_obj; + } + else if(PyObject_IsTrue(ret_fut)) + { + return fut; + } + else + { + Py_INCREF(Py_None); + return Py_None; + } +} + +/*wrapper wait for all method*/ +static PyObject* starpu_task_wait_for_all_wrapper(PyObject *self, PyObject *args) +{ + (void)self; + (void)args; + + /*call starpu_task_wait_for_all method*/ + Py_BEGIN_ALLOW_THREADS; + starpu_task_wait_for_all(); + Py_END_ALLOW_THREADS; + + /*return type is void*/ + Py_INCREF(Py_None); + return Py_None; +} + +/*wrapper pause method*/ +static PyObject* starpu_pause_wrapper(PyObject *self, PyObject *args) +{ + (void)self; + (void)args; + + /*call starpu_pause method*/ + starpu_pause(); + + /*return type is void*/ + Py_INCREF(Py_None); + return Py_None; +} + +/*wrapper resume method*/ +static PyObject* starpu_resume_wrapper(PyObject *self, PyObject *args) +{ + (void)self; + (void)args; + /*call starpu_resume method*/ + starpu_resume(); + + /*return type is void*/ + Py_INCREF(Py_None); + return Py_None; +} + +/*wrapper worker_get_count_by_type method*/ +static PyObject* starpu_worker_get_count_by_type_wrapper(PyObject *self, PyObject *args) +{ + (void)self; + int type; + + if (!PyArg_ParseTuple(args, "I", &type)) + return NULL; + + if (!((type >= STARPU_CPU_WORKER && type <= STARPU_NARCH) || type == STARPU_ANY_WORKER)) + RETURN_EXCEPT("Parameter %d invalid", type); + + int num_worker=starpu_worker_get_count_by_type(type); + + /*return type is unsigned*/ + return Py_BuildValue("I", num_worker); +} + +/*wrapper get min priority method*/ +static PyObject* starpu_sched_get_min_priority_wrapper(PyObject *self, PyObject *args) +{ + (void)self; + (void)args; + /*call starpu_sched_get_min_priority*/ + int min_prio=starpu_sched_get_min_priority(); + + /*return type is int*/ + return Py_BuildValue("i", min_prio); +} + +/*wrapper get max priority method*/ +static PyObject* starpu_sched_get_max_priority_wrapper(PyObject *self, PyObject *args) +{ + (void)self; + (void)args; + /*call starpu_sched_get_max_priority*/ + int max_prio=starpu_sched_get_max_priority(); + + /*return type is int*/ + return Py_BuildValue("i", max_prio); +} + +/*wrapper get the number of no completed submitted tasks method*/ +static PyObject* starpu_task_nsubmitted_wrapper(PyObject *self, PyObject *args) +{ + (void)self; + (void)args; + /*call starpu_task_nsubmitted*/ + int num_task=starpu_task_nsubmitted(); + + /*Return the number of submitted tasks which have not completed yet */ + return Py_BuildValue("i", num_task); +} + +/*generate new sub-interpreters*/ +static void new_inter(void* arg) +{ + (void)arg; + unsigned workerid = starpu_worker_get_id_check(); + PyThreadState *new_thread_state; + PyGILState_STATE state; + + state = PyGILState_Ensure(); // take the GIL + STARPU_ASSERT(state == PyGILState_UNLOCKED); + orig_thread_states[workerid] = PyThreadState_GET(); + + if (starpu_getenv_number_default("STARPUPY_OWN_GIL", 0)) + { +#ifdef PyInterpreterConfig_OWN_GIL + /* https://peps.nogil.dev/pep-0684/ */ + PyInterpreterConfig config = { + .check_multi_interp_extensions = 1, + .gil = PyInterpreterConfig_OWN_GIL, + }; + Py_NewInterpreterFromConfig(&new_thread_state, &config); +#else + fprintf(stderr, "STARPUPY_OWN_GIL is only supported starting from python 3.12\n"); + exit(1); +#endif + } + else + new_thread_state = Py_NewInterpreter(); + + PyThreadState_Swap(new_thread_state); + new_thread_states[workerid] = new_thread_state; + PyEval_SaveThread(); // releases the GIL +} + +/*delete sub-interpreters*/ +static void del_inter(void* arg) +{ + (void)arg; + unsigned workerid = starpu_worker_get_id_check(); + PyThreadState *new_thread_state = new_thread_states[workerid]; + + PyEval_RestoreThread(new_thread_state); // reacquires the GIL + Py_EndInterpreter(new_thread_state); + + PyThreadState_Swap(orig_thread_states[workerid]); + PyGILState_Release(PyGILState_UNLOCKED); +} + +void _starpupy_data_register_ops(void) +{ + _starpupy_interface_pyobject_ops.interfaceid = starpu_data_interface_get_next_id(); + _starpupy_interface_pybuffer_ops.interfaceid = starpu_data_interface_get_next_id(); + _starpupy_interface_pybuffer_bytes_ops.interfaceid = starpu_data_interface_get_next_id(); + starpu_data_register_ops(&_starpupy_interface_pyobject_ops); + starpu_data_register_ops(&_starpupy_interface_pybuffer_ops); + starpu_data_register_ops(&_starpupy_interface_pybuffer_bytes_ops); +} + +/*wrapper init method*/ +static PyObject* starpu_init_wrapper(PyObject *self, PyObject *args) +{ + (void)self; + (void)args; + + /*starpu initialization*/ + int ret; + + _starpupy_data_register_ops(); + struct starpu_conf conf; + Py_BEGIN_ALLOW_THREADS; + starpu_conf_init(&conf); + ret = starpu_init(&conf); + Py_END_ALLOW_THREADS; + if (ret!=0) + { + PyErr_Format(StarpupyError, "Unexpected value %d returned for starpu_init", ret); + return NULL; + } + + if (conf.sched_policy_name && !strcmp(conf.sched_policy_name, "graph_test")) + { + /* FIXME: should call starpu_do_schedule when appropriate, the graph_test scheduler needs it. */ + fprintf(stderr,"TODO: The graph_test scheduler needs starpu_do_schedule calls\n"); + exit(77); + } + + if (active_multi_interpreter) + { + /*generate new interpreter on each worker*/ + Py_BEGIN_ALLOW_THREADS; + starpu_execute_on_each_worker_ex(new_inter, NULL, where_inter, "new_inter"); + Py_END_ALLOW_THREADS; + } + + /*return type is void*/ + Py_INCREF(Py_None); + return Py_None; +} + +/*wrapper shutdown method*/ +static PyObject* starpu_shutdown_wrapper(PyObject *self, PyObject *args) +{ + (void)self; + (void)args; + //printf("it's starpu_shutdown function\n"); + /*unregister the rest of handle in handle_dict*/ + /*get handle_dict, decrement after using*/ + PyObject *handle_dict = PyObject_GetAttrString(starpu_module, "handle_dict"); + + /*obj_id is the key in dict, handle_obj is the value in dict*/ + PyObject *obj_id, *handle_obj; + Py_ssize_t handle_pos = 0; + + while(PyDict_Next(handle_dict, &handle_pos, &obj_id, &handle_obj)) + { + /*PyObject *->handle*/ + PyObject *handle_cap = PyObject_CallMethod(handle_obj, "get_capsule", NULL); + starpu_data_handle_t handle = (starpu_data_handle_t) PyCapsule_GetPointer(handle_cap, "Handle"); + + if (handle != (void*)-1) + { + /*call starpu_data_unregister method*/ + Py_BEGIN_ALLOW_THREADS + starpu_data_unregister(handle); + Py_END_ALLOW_THREADS + + PyCapsule_SetPointer(handle_cap, (void*)-1); + } + + /*remove this handle from handle_dict*/ + PyDict_DelItem(handle_dict, obj_id); + + Py_DECREF(handle_cap); + } + + Py_DECREF(handle_dict); + + /*unregister the rest of handle in handle_set*/ + /*get handle_set, decrement after using*/ + PyObject *handle_set = PyObject_GetAttrString(starpu_module, "handle_set"); + /*treat set as an iterator, decrement after using*/ + PyObject *handle_set_iterator = PyObject_GetIter(handle_set); + + while((handle_obj=PyIter_Next(handle_set_iterator))) + { + /*PyObject *->handle*/ + PyObject *handle_cap = PyObject_CallMethod(handle_obj, "get_capsule", NULL); + starpu_data_handle_t handle = (starpu_data_handle_t) PyCapsule_GetPointer(handle_cap, "Handle"); + + if (handle != (void*)-1) + { + /*call starpu_data_unregister method*/ + Py_BEGIN_ALLOW_THREADS + starpu_data_unregister(handle); + Py_END_ALLOW_THREADS + + PyCapsule_SetPointer(handle_cap, (void*)-1); + } + + /*remove this handle from handle_set*/ + PySet_Discard(handle_set, handle_obj); + Py_DECREF(handle_set_iterator); + handle_set_iterator = PyObject_GetIter(handle_set); + + Py_DECREF(handle_cap); + /*release ref obtained by PyInter_Next*/ + Py_DECREF(handle_obj); + } + + Py_DECREF(handle_set_iterator); + Py_DECREF(handle_set); + + /*clean all perfmodel which are saved in dict_perf*/ + /*get dict_perf, decrement after using*/ + PyObject *perf_dict = PyObject_GetAttrString(starpu_module, "dict_perf"); + + PyObject *perf_key, *perf_value; + Py_ssize_t perf_pos = 0; + + while(PyDict_Next(perf_dict, &perf_pos, &perf_key, &perf_value)) + { + PyDict_DelItem(perf_dict, perf_key); + } + + Py_DECREF(perf_dict); + + /*gc module import*/ + PyObject *gc_module = PyImport_ImportModule("gc"); + if (gc_module == NULL) + { + PyErr_Format(StarpupyError, "can't find gc module"); + Py_XDECREF(gc_module); + return NULL; + } + PyObject *gc_collect = PyObject_CallMethod(gc_module, "collect", NULL); + PyObject *gc_garbage = PyObject_GetAttrString(gc_module, "garbage"); + + Py_DECREF(gc_collect); + Py_DECREF(gc_garbage); + Py_DECREF(gc_module); + + /*stop the cb_loop*/ + if (cb_loop) + { + PyObject * cb_loop_stop = PyObject_CallMethod(cb_loop, "stop", NULL); + Py_DECREF(cb_loop_stop); + } + + /*call starpu_shutdown method*/ + Py_BEGIN_ALLOW_THREADS; + starpu_task_wait_for_all(); + if(active_multi_interpreter) + { + /*delete interpreter on each worker*/ + starpu_execute_on_each_worker_ex(del_inter, NULL, where_inter, "del_inter"); + } + starpu_shutdown(); + Py_END_ALLOW_THREADS; + + /*return type is void*/ + Py_INCREF(Py_None); + return Py_None; +} + +/*set ncpu*/ +static PyObject* starpu_set_ncpu(PyObject *self, PyObject *args) +{ + (void)self; + int ncpu; + + if (!PyArg_ParseTuple(args, "I", &ncpu)) + return NULL; + + Py_BEGIN_ALLOW_THREADS; + starpu_task_wait_for_all(); + + if(active_multi_interpreter) + { + /*delete interpreter on each worker*/ + starpu_execute_on_each_worker_ex(del_inter, NULL, where_inter, "del_inter"); + } + + starpu_shutdown(); + + if (starpu_getenv("STARPU_NCPU") || + starpu_getenv("STARPU_NCPUS")) + fprintf(stderr, "warning: starpupy.set_ncpu is ineffective when the STARPU_NCPU or STARPU_NCPUS environment variable is defined"); + + int ret; + struct starpu_conf conf; + starpu_conf_init(&conf); + conf.ncpus = ncpu; + + ret = starpu_init(&conf); + if (ret!=0) + { + PyErr_Format(StarpupyError, "Unexpected value %d returned for starpu_init", ret); + return NULL; + } + + if (active_multi_interpreter) + { + /* generate new interpreter on each worker*/ + starpu_execute_on_each_worker_ex(new_inter, NULL, where_inter, "new_inter"); + } + + Py_END_ALLOW_THREADS; + + /*return type is void*/ + Py_INCREF(Py_None); + return Py_None; +} + +/***********************************************************************************/ + +/***************The module’s method table and initialization function**************/ +/*method table*/ +static PyMethodDef starpupyMethods[] = +{ + {"init", starpu_init_wrapper, METH_VARARGS, "initialize StarPU"}, /* init method*/ + {"_task_submit", starpu_task_submit_wrapper, METH_VARARGS, "submit the task"}, /*submit method*/ + {"task_wait_for_all", starpu_task_wait_for_all_wrapper, METH_VARARGS, "wait the task"}, /*wait for all method*/ + {"pause", starpu_pause_wrapper, METH_VARARGS, "suspend the processing of new tasks by workers"}, /*pause method*/ + {"resume", starpu_resume_wrapper, METH_VARARGS, "resume the workers polling for new tasks"}, /*resume method*/ + {"init_perfmodel", init_perfmodel, METH_VARARGS, "initialize struct starpu_perfmodel"}, /*initialize perfmodel*/ + {"free_perfmodel", free_perfmodel, METH_VARARGS, "free struct starpu_perfmodel"}, /*free perfmodel*/ +#ifndef STARPU_SIMGRID + {"save_history_based_model", starpu_save_history_based_model_wrapper, METH_VARARGS, "save the performance model"}, /*save the performance model*/ +#endif + {"sched_get_min_priority", starpu_sched_get_min_priority_wrapper, METH_VARARGS, "get the number of min priority"}, /*get the number of min priority*/ + {"sched_get_max_priority", starpu_sched_get_max_priority_wrapper, METH_VARARGS, "get the number of max priority"}, /*get the number of max priority*/ + {"task_nsubmitted", starpu_task_nsubmitted_wrapper, METH_VARARGS, "get the number of submitted tasks which have not completed yet"}, /*get the number of submitted tasks which have not completed yet*/ + {"shutdown", starpu_shutdown_wrapper, METH_VARARGS, "shutdown starpu"}, /*shutdown starpu*/ + {"starpupy_data_register", starpupy_data_register_wrapper, METH_VARARGS, "register PyObject in a handle"}, /*register PyObject in a handle*/ + {"starpupy_numpy_register", starpupy_numpy_register_wrapper, METH_VARARGS, "register empty Numpy array in a handle"}, /*register PyObject in a handle*/ + {"starpupy_get_object", starpupy_get_object_wrapper, METH_VARARGS, "get PyObject from handle"}, /*get PyObject from handle*/ + {"starpupy_acquire_handle", starpupy_acquire_handle_wrapper, METH_VARARGS, "acquire handle"}, /*acquire handle*/ + {"starpupy_release_handle", starpupy_release_handle_wrapper, METH_VARARGS, "release handle"}, /*release handle*/ + {"starpupy_data_unregister", starpupy_data_unregister_wrapper, METH_VARARGS, "unregister handle"}, /*unregister handle*/ + {"starpupy_data_unregister_submit", starpupy_data_unregister_submit_wrapper, METH_VARARGS, "unregister handle and object"}, /*unregister handle and object*/ + {"starpupy_acquire_object", starpupy_acquire_object_wrapper, METH_VARARGS, "acquire PyObject handle"}, /*acquire handle*/ + {"starpupy_release_object", starpupy_release_object_wrapper, METH_VARARGS, "release PyObject handle"}, /*release handle*/ + {"starpupy_data_unregister_object", starpupy_data_unregister_object_wrapper, METH_VARARGS, "unregister PyObject handle"}, /*unregister handle*/ + {"starpupy_data_unregister_submit_object", starpupy_data_unregister_submit_object_wrapper, METH_VARARGS, "unregister PyObject handle and object"}, /*unregister handle and object*/ + {"starpupy_data_partition", starpu_data_partition_wrapper, METH_VARARGS, "handle partition into sub handles"}, + {"starpupy_data_unpartition", starpu_data_unpartition_wrapper, METH_VARARGS, "handle unpartition sub handles"}, + {"starpupy_get_partition_size", starpupy_get_partition_size_wrapper, METH_VARARGS, "get the array size from each sub handle"}, + {"set_ncpu", starpu_set_ncpu, METH_VARARGS,"reinitialize starpu with given number of CPU"}, + {"worker_get_count_by_type", starpu_worker_get_count_by_type_wrapper, METH_VARARGS, "get the number of workers for a given type"}, + {NULL, NULL,0,NULL} +}; + +/*function of slot type Py_mod_exec */ +static int my_exec(PyObject *m) +{ + PyModule_AddStringConstant(m, "starpupy", "starpupy"); + + /* Add an exception type */ + if (StarpupyError == NULL) + { + StarpupyError = PyErr_NewException("starpupy.error", NULL, NULL); + } + + if (PyModule_AddObject(m, "error", StarpupyError) < 0) + { + Py_XDECREF(StarpupyError); + return -1; + } + + return 0; +} + +/*m_slots member of the module*/ +static PyModuleDef_Slot mySlots[] = +{ + {Py_mod_exec, my_exec}, + {0, NULL} +}; + +/*deallocation function*/ +static void starpupyFree(void *self) +{ + (void)self; + //printf("it's the free function\n"); + Py_XDECREF(asyncio_module); + Py_XDECREF(concurrent_futures_future_class); + Py_XDECREF(cloudpickle_module); + Py_XDECREF(dumps); + Py_XDECREF(pickle_module); + Py_XDECREF(loads); + Py_XDECREF(starpu_module); + Py_XDECREF(starpu_dict); + Py_XDECREF(cb_loop); +} + +/*module definition structure*/ +static struct PyModuleDef starpupymodule = +{ + PyModuleDef_HEAD_INIT, + .m_name = "starpupy", + .m_doc = NULL, + .m_methods = starpupyMethods, + .m_size = 0, + .m_slots = mySlots, + .m_traverse = NULL, + .m_clear = NULL, + .m_free = starpupyFree +}; + +static void* set_cb_loop(void* arg) +{ + (void)arg; + PyGILState_STATE state = PyGILState_Ensure(); + /*second loop will run until we stop it in starpu_shutdown*/ + PyObject * cb_loop_run = PyObject_CallMethod(cb_loop, "run_forever", NULL); + Py_DECREF(cb_loop_run); + PyGILState_Release(state); + return NULL; +} + +/*initialization function*/ +PyMODINIT_FUNC PyInit_starpupy(void) +{ +#if PY_MAJOR_VERSION < 3 || (PY_MAJOR_VERSION == 3 && PY_MINOR_VERSION < 9) + PyEval_InitThreads(); +#endif + +#if defined(STARPU_USE_MPI_MASTER_SLAVE) + active_multi_interpreter = 1; +#else + if (starpu_getenv_number_default("STARPUPY_MULTI_INTERPRETER", 0) + || starpu_getenv_number("STARPU_TCPIP_MS_SLAVES") > 0) + active_multi_interpreter = 1; +#endif + + main_thread = pthread_self(); + + /*python asyncio import*/ + asyncio_module = PyImport_ImportModule("asyncio"); + if (asyncio_module == NULL) + { + PyErr_Format(PyExc_RuntimeError, "can't find asyncio module"); + starpupyFree(NULL); + return NULL; + } + + /*cloudpickle import*/ + if (active_multi_interpreter) + { + cloudpickle_module = PyImport_ImportModule("cloudpickle"); + if (cloudpickle_module == NULL) + { + PyErr_Format(PyExc_RuntimeError, "can't find cloudpickle module"); + starpupyFree(NULL); + return NULL; + } + /*dumps method*/ + dumps = PyObject_GetAttrString(cloudpickle_module, "dumps"); + } + + /*pickle import*/ + if (active_multi_interpreter) + { + pickle_module = PyImport_ImportModule("pickle"); + if (pickle_module == NULL) + { + PyErr_Format(PyExc_RuntimeError, "can't find pickle module"); + starpupyFree(NULL); + return NULL; + } + /*loads method*/ + loads = PyObject_GetAttrString(pickle_module, "loads"); + } + + /*starpu import*/ + starpu_module = PyImport_ImportModule("starpu"); + if (starpu_module == NULL) + { + PyErr_Format(PyExc_RuntimeError, "can't find starpu module"); + starpupyFree(NULL); + return NULL; + } + starpu_dict = PyModule_GetDict(starpu_module); + /*protect borrowed reference, decremented in starpupyFree*/ + Py_INCREF(starpu_dict); + + /* Prepare for running asyncio futures */ + + /*create a new event loop in another thread, in case the main loop is occupied*/ + cb_loop = PyObject_CallMethod(asyncio_module, "new_event_loop", NULL); + if (cb_loop == NULL) + { + PyErr_Format(PyExc_RuntimeError, "can't create cb_loop from asyncio module (try to add \"-m asyncio\" when starting Python interpreter)"); + starpupyFree(NULL); + return NULL; + } + + int pc = pthread_create(&thread_id, NULL, set_cb_loop, NULL); + if (pc) + { + PyErr_Format(PyExc_RuntimeError, "Fail to create thread\n"); + starpupyFree(NULL); + return NULL; + } + + /* Prepare for running concurrent.futures futures */ + + /*python concurrent.futures import*/ + PyObject *concurrent_futures_module = PyImport_ImportModule("concurrent.futures"); + if (concurrent_futures_module == NULL) + { + PyErr_Format(PyExc_RuntimeError, "can't find concurrent.futures module"); + starpupyFree(NULL); + return NULL; + } + + PyObject *concurrent_futures_module_dict = PyModule_GetDict(concurrent_futures_module); /* borrowed */ + Py_DECREF(concurrent_futures_module); + if (concurrent_futures_module_dict == NULL) + { + PyErr_Format(PyExc_RuntimeError, "can't get concurrent.futures dict"); + starpupyFree(NULL); + return NULL; + } + concurrent_futures_future_class = PyDict_GetItemString(concurrent_futures_module_dict, "Future"); + Py_DECREF(concurrent_futures_module_dict); + if (concurrent_futures_future_class == NULL) + { + PyErr_Format(PyExc_RuntimeError, "can't find Future class"); + starpupyFree(NULL); + return NULL; + } + + PyObject *concurrent_futures_thread_module = PyImport_ImportModule("concurrent.futures.thread"); + if (concurrent_futures_thread_module == NULL) + { + PyErr_Format(PyExc_RuntimeError, "can't find concurrent.futures.thread module"); + starpupyFree(NULL); + return NULL; + } + + PyObject *concurrent_futures_thread_module_dict = PyModule_GetDict(concurrent_futures_thread_module); /* borrowed */ + Py_DECREF(concurrent_futures_thread_module); + if (concurrent_futures_thread_module_dict == NULL) + { + PyErr_Format(PyExc_RuntimeError, "can't get concurrent.futures.thread dict"); + Py_DECREF(concurrent_futures_thread_module); + starpupyFree(NULL); + return NULL; + } + + PyObject *executor_class = PyDict_GetItemString(concurrent_futures_thread_module_dict, "ThreadPoolExecutor"); + Py_DECREF(concurrent_futures_thread_module_dict); + if (executor_class == NULL) + { + PyErr_Format(PyExc_RuntimeError, "can't find ThreadPoolExecutor class"); + starpupyFree(NULL); + return NULL; + } + + PyObject *cb_executor_instance = PyInstanceMethod_New(executor_class); + Py_DECREF(executor_class); + if (cb_executor_instance == NULL) + { + PyErr_Format(PyExc_RuntimeError, "can't create concurrent.futures executor"); + starpupyFree(NULL); + return NULL; + } + + cb_executor = PyObject_CallObject(cb_executor_instance, NULL); + Py_DECREF(cb_executor_instance); + if (cb_executor == NULL) + { + PyErr_Format(PyExc_RuntimeError, "can't create concurrent.futures executor"); + starpupyFree(NULL); + return NULL; + } + + /*module import multi-phase initialization*/ + return PyModuleDef_Init(&starpupymodule); +} +/***********************************************************************************/ diff --git a/starpupy/src/starpupy_buffer_interface.c b/starpupy/src/starpupy_buffer_interface.c new file mode 100644 index 0000000..858310f --- /dev/null +++ b/starpupy/src/starpupy_buffer_interface.c @@ -0,0 +1,883 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#undef NDEBUG +#include +#include +#include +#define PY_SSIZE_T_CLEAN +#include + +#ifdef STARPU_PYTHON_HAVE_NUMPY +#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION +#include +#endif + +#include "starpupy_buffer_interface.h" + +PyObject* starpupy_buffer_get_numpy(struct starpupy_buffer_interface *pybuffer_interface) +{ +#ifdef STARPU_PYTHON_HAVE_NUMPY + char* pybuf = pybuffer_interface->py_buffer; + Py_ssize_t nbuf = pybuffer_interface->buffer_size; + int arr_type = pybuffer_interface->array_type; + size_t nitem = pybuffer_interface->item_size; + npy_intp narray = nbuf/nitem; + npy_intp* get_dim = pybuffer_interface->array_dim; + int get_ndim = pybuffer_interface->dim_size; + + /*store the dim array in a tuple*/ + PyObject* dim_tup = PyTuple_New(get_ndim); + + int i; + for (i=0; itypecode; + char* pybuf = pybuffer_interface->py_buffer; + Py_ssize_t nbuf = pybuffer_interface->buffer_size; + size_t nitem = pybuffer_interface->item_size; + /*get size of array*/ + int narray = nbuf/nitem; + + /*create the new array.array*/ + PyObject *arr_module = PyImport_ImportModule("array"); + PyObject *arr_dict = PyModule_GetDict(arr_module); + + /*get array.array class*/ + PyObject *arr_class = PyDict_GetItemString(arr_dict, "array"); + + /*create an instance of array.array, decrement in the end of the function*/ + PyObject *arr_instance = PyInstanceMethod_New(arr_class); + + /*get the buffer bytes, decrement in the end of the function*/ + PyObject *pybt=PyBytes_FromStringAndSize(pybuf, nbuf); + + /*get the array elements, reference is stolen by PyTuple_SetItem*/ + PyObject *arr_list = NULL; + + /*if the element is not unicode character*/ + if (arr_typecode!='u') + { + char type_str[narray+1]; + memset(type_str, arr_typecode, narray); + type_str[narray] = 0; + + /*get the array element list using struct module*/ + PyObject *struct_module = PyImport_ImportModule("struct"); + arr_list = PyObject_CallMethod(struct_module, "unpack", "sO", type_str, pybt); + + Py_DECREF(struct_module); + } + /*if the element is unicode character*/ + else + { + /*decode buffer bytes to unicode*/ + PyObject* pyuni = PyUnicode_DecodeUTF32(PyBytes_AsString(pybt), PyBytes_Size(pybt), "can't decode", NULL); + /*convert unicode to wide char*/ + wchar_t* uni_str = PyUnicode_AsWideCharString(pyuni, NULL); + + if(uni_str != NULL) + { + arr_list = Py_BuildValue("u", uni_str); + PyMem_Free(uni_str); + } + + Py_DECREF(pyuni); + } + + /*initialize the instance*/ + PyObject *arr_args=PyTuple_New(2); + + char arr_type[]={arr_typecode, 0}; + PyTuple_SetItem(arr_args, 0, Py_BuildValue("s", arr_type)); + PyTuple_SetItem(arr_args, 1, arr_list); + + PyObject *arr_obj = PyObject_CallObject(arr_instance,arr_args); + + Py_DECREF(pybt); + Py_DECREF(arr_module); + Py_DECREF(arr_instance); + Py_DECREF(arr_args); + + return arr_obj; +} + +PyObject* starpupy_buffer_get_memview(struct starpupy_buffer_interface *pybuffer_interface) +{ + char* pybuf = pybuffer_interface->py_buffer; + Py_ssize_t nbuf = pybuffer_interface->buffer_size; + char mem_format = pybuffer_interface->typecode; + size_t nitem = pybuffer_interface->item_size; + int ndim = pybuffer_interface->dim_size; + int* mem_shape = pybuffer_interface->shape; + int narray = nbuf/nitem; + + /*decrement in each if{}*/ + PyObject *pybt=PyBytes_FromStringAndSize(pybuf, nbuf); + + /*return value of the function*/ + PyObject *memview_obj = NULL; + if(mem_format=='B') + { + memview_obj = pybt; + } + /*if the element is not unicode character of array.array*/ + else if(mem_format!='w') + { + /* We have a flat array, split it into ndim-dimension lists of lists according to mem_shape */ + char type_str[narray+1]; + memset(type_str, mem_format, narray); + type_str[narray] = 0; + + /*get the array element list using struct module, decrement after used*/ + PyObject *struct_module = PyImport_ImportModule("struct"); + PyObject *m_obj = PyObject_CallMethod(struct_module, "unpack", "sO", type_str, pybt); + + Py_DECREF(struct_module); + Py_DECREF(pybt); + /*reshape the list in case the original array is multi dimension*/ + /*get the index of each element in new multi dimension array*/ + int ind[narray][ndim]; + int d; + int i; + for (i = 0; i < narray; i++) + { + int n = narray; + int ii = i; + for (d = 0; d < ndim; d++) + { + n = n / mem_shape[d]; + ind[i][d] = ii / n; + ii = ii % n; + } + } + + /*put the element of one dimension array into the multi dimension array according to the index*/ + PyObject* list_obj[ndim]; + memset(&list_obj, 0, sizeof(list_obj)); + for (i = 0; i < narray; i++) + { + for (d = ndim-1; d >=0; d--) + { + /*in the innermost nested list, we set the element in the current list*/ + if (d==ndim-1) + { + /*if i is the first element of this list, we need to initialize the list*/ + if(ind[i][d]==0) + { + if(list_obj[d] != NULL) + Py_DECREF(list_obj[d]); + list_obj[d] = PyList_New(mem_shape[d]); + } + + PyObject *m_obj_item = PyTuple_GetItem(m_obj, i); + /*protect borrowed reference, give it to PyList_SetItem*/ + Py_INCREF(m_obj_item); + PyList_SetItem(list_obj[d], ind[i][d], m_obj_item); + } + /*in the rest of nested list, we set the inner list in the current list, once we have the nested list, one element of inner list is changed, current list is changes as well*/ + else + { + /*if the index of element in all inner list is 0, we are the first, we have to add this new list to the upper dimension list*/ + int flag=1; + int dd; + for(dd=ndim-1; dd>=d+1; dd--) + { + if(ind[i][dd]!=0) + flag=0; + } + if(flag==1) + { + /*if i is the first element of this list and also the first element of all inner list, we need to initialize this list*/ + if (ind[i][d]==0) + { + if(list_obj[d] != NULL) + Py_DECREF(list_obj[d]); + list_obj[d] = PyList_New(mem_shape[d]); + } + /*if i is the first element of all inner list, we set the last inner list in the current list*/ + /*reference is stolen by PyList_SetItem*/ + Py_INCREF(list_obj[d+1]); + PyList_SetItem(list_obj[d],ind[i][d],list_obj[d+1]); + } + } + } + } + + Py_DECREF(m_obj); + + memview_obj = list_obj[0]; + + for(i=1; idim_size; + + int node; + for (node = 0; node < STARPU_MAXNODES; node++) + { + struct starpupy_buffer_interface *local_interface = (struct starpupy_buffer_interface *) starpu_data_get_interface_on_node(handle, node); + if (node == home_node) + { + if(pybuffer_interface->object != NULL) + { + Py_INCREF(pybuffer_interface->object); + local_interface->object = pybuffer_interface->object; + } + else + { + local_interface->object = NULL; + } + local_interface->py_buffer = pybuffer_interface->py_buffer; + } + else + { + local_interface->object = NULL; + local_interface->py_buffer = NULL; + } + local_interface->id = pybuffer_interface->id; + local_interface->buffer_type = pybuffer_interface->buffer_type; + local_interface->buffer_size = pybuffer_interface->buffer_size; + local_interface->dim_size = pybuffer_interface->dim_size; + +#ifdef STARPU_PYTHON_HAVE_NUMPY + npy_intp* arr_dim = pybuffer_interface->array_dim; + npy_intp* a_dim; + if (arr_dim!=NULL) + { + a_dim = (npy_intp*)malloc(ndim*sizeof(npy_intp)); + memcpy(a_dim, arr_dim, ndim*sizeof(npy_intp)); + } + else + a_dim = NULL; + + local_interface->array_dim = a_dim; +#endif + local_interface->array_type = pybuffer_interface->array_type; + local_interface->item_size = pybuffer_interface->item_size; + local_interface->typecode = pybuffer_interface->typecode; + + int* mem_shape = pybuffer_interface->shape; + int* m_shape; + if (mem_shape!=NULL) + { + m_shape = (int*)malloc(ndim*sizeof(int)); + memcpy(m_shape, mem_shape, ndim*sizeof(int)); + } + else + m_shape = NULL; + + local_interface->shape = m_shape; + } +} + +static void pybuffer_unregister_data_handle(starpu_data_handle_t handle) +{ + /*make sure we own the GIL*/ + PyGILState_STATE state = PyGILState_Ensure(); + + unsigned home_node = starpu_data_get_home_node(handle); + unsigned node; + for (node = 0; node < STARPU_MAXNODES; node++) + { + struct starpupy_buffer_interface *local_interface = (struct starpupy_buffer_interface *) starpu_data_get_interface_on_node(handle, node); + if(node == home_node) + { + if(local_interface->object!=NULL) + { + Py_DECREF(local_interface->object); + local_interface->object = NULL; + local_interface->py_buffer = NULL; + } + } + else + { + STARPU_ASSERT(local_interface->object == NULL); + STARPU_ASSERT(local_interface->py_buffer == NULL); + } + + +#ifdef STARPU_PYTHON_HAVE_NUMPY + free(local_interface->array_dim); + local_interface->array_dim = NULL; +#endif + free(local_interface->shape); + local_interface->shape = NULL; + } + + /* release GIL */ + PyGILState_Release(state); +} + +static starpu_ssize_t pybuffer_allocate_data_on_node(void *data_interface, unsigned node) +{ + struct starpupy_buffer_interface *pybuffer_interface = (struct starpupy_buffer_interface *) data_interface; + starpu_ssize_t requested_memory = pybuffer_interface->buffer_size; + + pybuffer_interface->py_buffer = (char*)starpu_malloc_on_node(node, requested_memory); + + if (!pybuffer_interface->py_buffer) + return -ENOMEM; + + return requested_memory; +} + +static starpu_ssize_t pybuffer_allocate_bytes_data_on_node(void *data_interface, unsigned node) +{ + (void)node; + /*make sure we own the GIL*/ + PyGILState_STATE state = PyGILState_Ensure(); + + struct starpupy_buffer_interface *pybuffer_interface = (struct starpupy_buffer_interface *) data_interface; + char* pybuf = pybuffer_interface->py_buffer; + Py_ssize_t nbuf = pybuffer_interface->buffer_size; + + STARPU_ASSERT(pybuf == NULL); + PyObject *pybt=PyBytes_FromStringAndSize(NULL, nbuf); + + pybuffer_interface->object = pybt; + + pybuffer_interface->py_buffer = PyBytes_AsString(pybt); + + if (!pybuffer_interface->py_buffer) + return -ENOMEM; + + /* release GIL */ + PyGILState_Release(state); + + return nbuf; +} + +static void pybuffer_free_data_on_node(void *data_interface, unsigned node) +{ + struct starpupy_buffer_interface *pybuffer_interface = (struct starpupy_buffer_interface *) data_interface; + starpu_ssize_t requested_memory = pybuffer_interface->buffer_size; + + starpu_free_on_node(node, (uintptr_t) pybuffer_interface->py_buffer, requested_memory); + + pybuffer_interface->py_buffer = NULL; +} + +static void pybuffer_free_bytes_data_on_node(void *data_interface, unsigned node) +{ + (void)node; + /*make sure we own the GIL*/ + PyGILState_STATE state = PyGILState_Ensure(); + + struct starpupy_buffer_interface *pybuffer_interface = (struct starpupy_buffer_interface *) data_interface; + + if (pybuffer_interface->object != NULL) + { + Py_DECREF(pybuffer_interface->object); + } + + pybuffer_interface->object = NULL; + pybuffer_interface->py_buffer = NULL; + + /* release GIL */ + PyGILState_Release(state); +} + +static void pybuffer_cache_data_on_node(void *cached_interface, void *src_data_interface, unsigned node) +{ + (void)node; + struct starpupy_buffer_interface *cached_pybuffer_interface = (struct starpupy_buffer_interface *) cached_interface; + struct starpupy_buffer_interface *src_pybuffer_interface = (struct starpupy_buffer_interface *) src_data_interface; + + cached_pybuffer_interface->object = src_pybuffer_interface->object; + src_pybuffer_interface->object = NULL; + cached_pybuffer_interface->py_buffer = src_pybuffer_interface->py_buffer; + src_pybuffer_interface->py_buffer = NULL; + cached_pybuffer_interface->buffer_size = src_pybuffer_interface->buffer_size; +} + +static void pybuffer_reuse_data_on_node(void *dst_data_interface, const void *cached_interface, unsigned node) +{ + (void)node; + struct starpupy_buffer_interface *dst_pybuffer_interface = (struct starpupy_buffer_interface *) dst_data_interface; + const struct starpupy_buffer_interface *cached_pybuffer_interface = (const struct starpupy_buffer_interface *) cached_interface; + + dst_pybuffer_interface->object = cached_pybuffer_interface->object; + dst_pybuffer_interface->py_buffer = cached_pybuffer_interface->py_buffer; +} + +static int pybuffer_map_data(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node) +{ + struct starpupy_buffer_interface *src_pybuf = src_interface; + struct starpupy_buffer_interface *dst_pybuf = dst_interface; + int ret; + uintptr_t mapped; + + mapped = starpu_interface_map((uintptr_t )src_pybuf->py_buffer, 0, src_node, dst_node, (size_t)src_pybuf->buffer_size, &ret); + if (mapped) + { + dst_pybuf->py_buffer = (char*)mapped; + return 0; + } + return ret; +} + +static int pybuffer_unmap_data(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node) +{ + struct starpupy_buffer_interface *src_pybuf = src_interface; + struct starpupy_buffer_interface *dst_pybuf = dst_interface; + + int ret = starpu_interface_unmap((uintptr_t)src_pybuf->py_buffer, 0, src_node, (uintptr_t)dst_pybuf->py_buffer, dst_node, (size_t)src_pybuf->buffer_size); + dst_pybuf->py_buffer = 0; + + return ret; +} + +static int pybuffer_update_map(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node) +{ + struct starpupy_buffer_interface *src_pybuf = src_interface; + struct starpupy_buffer_interface *dst_pybuf = dst_interface; + + return starpu_interface_update_map((uintptr_t)src_pybuf->py_buffer, 0, src_node, (uintptr_t)dst_pybuf->py_buffer, 0, dst_node, (size_t)src_pybuf->buffer_size); +} + +static size_t pybuffer_get_size(starpu_data_handle_t handle) +{ + size_t size; + struct starpupy_buffer_interface *pybuffer_interface = (struct starpupy_buffer_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + + size = pybuffer_interface->buffer_size; + return size; +} + +static int pybuffer_pack_data(starpu_data_handle_t handle, unsigned node, void **ptr, starpu_ssize_t *count) +{ + struct starpupy_buffer_interface *pybuffer_interface = (struct starpupy_buffer_interface *) starpu_data_get_interface_on_node(handle, node); + + char* pybuf = pybuffer_interface->py_buffer; + Py_ssize_t nbuf = pybuffer_interface->buffer_size; + + char *data; + data = (void*)starpu_malloc_on_node_flags(node, nbuf, 0); + + memcpy(data, pybuf, nbuf); + + *ptr = data; + *count = nbuf; + return 0; +} + +static int pybuffer_peek_data(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count) +{ + (void)count; + + char *data = ptr; + + struct starpupy_buffer_interface *pybuffer_interface = (struct starpupy_buffer_interface *) starpu_data_get_interface_on_node(handle, node); + pybuffer_interface->id = _starpupy_interface_pybuffer_ops.interfaceid; + + memcpy(pybuffer_interface->py_buffer, data, pybuffer_interface->buffer_size); + + return 0; +} + +static int pybuffer_unpack_data(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count) +{ + pybuffer_peek_data(handle, node, ptr, count); + + starpu_free_on_node_flags(node, (uintptr_t) ptr, count, 0); + + return 0; +} + +static int pybuffer_meta_size(struct starpupy_buffer_interface *pybuffer_interface) +{ + starpu_ssize_t count; + + count = sizeof(pybuffer_interface->buffer_type) + + /* sizeof(pybuffer_interface->object) + => built on the fly */ + sizeof(pybuffer_interface->py_buffer) + + sizeof(pybuffer_interface->buffer_size) + + sizeof(pybuffer_interface->dim_size) + + sizeof(pybuffer_interface->array_type) + + sizeof(pybuffer_interface->item_size) + + sizeof(pybuffer_interface->typecode) + sizeof(int); +#ifdef STARPU_PYTHON_HAVE_NUMPY + count += sizeof(int); +#endif + count += pybuffer_interface->dim_size * ( +#ifdef STARPU_PYTHON_HAVE_NUMPY + sizeof(pybuffer_interface->array_dim[0]) + +#endif + sizeof(pybuffer_interface->shape[0])); + + return count; +} + +#define _pack(dst, src) do { memcpy(dst, &src, sizeof(src)); dst += sizeof(src); } while (0) +static int pybuffer_pack_meta(void *data_interface, void **ptr, starpu_ssize_t *count) +{ + struct starpupy_buffer_interface *pybuffer_interface = data_interface; + *count = pybuffer_meta_size(pybuffer_interface); + _STARPU_CALLOC(*ptr, *count, 1); + char *cur = *ptr; + + _pack(cur, pybuffer_interface->buffer_type); + _pack(cur, pybuffer_interface->py_buffer); + _pack(cur, pybuffer_interface->buffer_size); + _pack(cur, pybuffer_interface->dim_size); + _pack(cur, pybuffer_interface->array_type); + _pack(cur, pybuffer_interface->item_size); + _pack(cur, pybuffer_interface->typecode); + +#ifdef STARPU_PYTHON_HAVE_NUMPY + int array_dim = pybuffer_interface->array_dim ? 1 : 0; + _pack(cur, array_dim); + if (pybuffer_interface->array_dim) + { + memcpy(cur, pybuffer_interface->array_dim, + pybuffer_interface->dim_size * sizeof(pybuffer_interface->array_dim[0])); + cur += pybuffer_interface->dim_size * sizeof(pybuffer_interface->array_dim[0]); + } +#endif + int shape = pybuffer_interface->shape ? 1 : 0; + _pack(cur, shape); + if (pybuffer_interface->shape) + memcpy(cur, pybuffer_interface->shape, + pybuffer_interface->dim_size * sizeof(pybuffer_interface->shape[0])); + return 0; +} + +#define _unpack(dst, src) do { memcpy(&dst, src, sizeof(dst)); src += sizeof(dst); } while(0) +static int pybuffer_unpack_meta(void **data_interface, void *ptr, starpu_ssize_t *count) +{ + _STARPU_CALLOC(*data_interface, 1, sizeof(struct starpupy_buffer_interface)); + struct starpupy_buffer_interface *pybuffer_interface = (*data_interface); + char *cur = ptr; + + pybuffer_interface->id = _starpupy_interface_pybuffer_ops.interfaceid; + _unpack(pybuffer_interface->buffer_type, cur); + _unpack(pybuffer_interface->py_buffer, cur); + _unpack(pybuffer_interface->buffer_size, cur); + _unpack(pybuffer_interface->dim_size, cur); + _unpack(pybuffer_interface->array_type, cur); + _unpack(pybuffer_interface->item_size, cur); + _unpack(pybuffer_interface->typecode, cur); + +#ifdef STARPU_PYTHON_HAVE_NUMPY + int array_dim; + _unpack(array_dim, cur); + if (array_dim) + { + _STARPU_MALLOC(pybuffer_interface->array_dim, + pybuffer_interface->dim_size * sizeof(pybuffer_interface->array_dim[0])); + memcpy(pybuffer_interface->array_dim, cur, + pybuffer_interface->dim_size * sizeof(pybuffer_interface->array_dim[0])); + cur += pybuffer_interface->dim_size * sizeof(pybuffer_interface->array_dim[0]); + } + else + pybuffer_interface->array_dim = NULL; +#endif + int shape; + _unpack(shape, cur); + if (shape) + { + _STARPU_MALLOC(pybuffer_interface->shape, + pybuffer_interface->dim_size * sizeof(pybuffer_interface->shape[0])); + memcpy(pybuffer_interface->shape, cur, + pybuffer_interface->dim_size * sizeof(pybuffer_interface->shape[0])); + } + else + pybuffer_interface->shape = NULL; + + *count = pybuffer_meta_size(pybuffer_interface); + + return 0; +} + +static int pybuffer_free_meta(void *data_interface) +{ + struct starpupy_buffer_interface *pybuffer_interface = data_interface; + +#ifdef STARPU_PYTHON_HAVE_NUMPY + free(pybuffer_interface->array_dim); + pybuffer_interface->array_dim = NULL; +#endif + free(pybuffer_interface->shape); + pybuffer_interface->shape = NULL; + + return 0; +} + +static uint32_t starpupy_buffer_footprint(starpu_data_handle_t handle) +{ + struct starpupy_buffer_interface *pybuffer_interface = (struct starpupy_buffer_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + + int buf_type = pybuffer_interface->buffer_type; + Py_ssize_t nbuf = pybuffer_interface->buffer_size; + int ndim = pybuffer_interface->dim_size; + int arr_type = pybuffer_interface->array_type; + size_t nitem = pybuffer_interface->item_size; + size_t narray = 0; + if(pybuffer_interface->buffer_type != starpupy_bytes_interface && pybuffer_interface->buffer_type != starpupy_bytearray_interface) + { + narray = nbuf/nitem; + } + + uint32_t crc = 0; + + crc=starpu_hash_crc32c_be(buf_type, crc); + crc=starpu_hash_crc32c_be(nbuf, crc); + crc=starpu_hash_crc32c_be(ndim, crc); + crc=starpu_hash_crc32c_be(arr_type, crc); + crc=starpu_hash_crc32c_be(narray, crc); + crc=starpu_hash_crc32c_be(nitem, crc); + + return crc; +} + +static void pybuffer_display(starpu_data_handle_t handle, FILE *f) +{ + struct starpupy_buffer_interface *pybuffer_interface = (struct starpupy_buffer_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + + fprintf(f, "%u\t", pybuffer_interface->dim_size); +} + +static int pybuffer_compare(void *data_interface_a, void *data_interface_b) +{ + struct starpupy_buffer_interface *a = (struct starpupy_buffer_interface *) data_interface_a; + struct starpupy_buffer_interface *b = (struct starpupy_buffer_interface *) data_interface_b; + + /* FIXME: compare content of shape or array_dim */ + return ((a->array_type == b->array_type) && (a->item_size == b->item_size) && (a->dim_size == b->dim_size)); +} + +static int pybuffer_alloc_compare(void *data_interface_a, void *data_interface_b) +{ + struct starpupy_buffer_interface *a = (struct starpupy_buffer_interface *) data_interface_a; + struct starpupy_buffer_interface *b = (struct starpupy_buffer_interface *) data_interface_b; + + return a->buffer_size == b->buffer_size; +} + +static int pybuffer_copy_any_to_any(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *async_data) +{ + struct starpupy_buffer_interface *src = (struct starpupy_buffer_interface *) src_interface; + struct starpupy_buffer_interface *dst = (struct starpupy_buffer_interface *) dst_interface; + + starpu_interface_copy((uintptr_t) src->py_buffer, 0, src_node, + (uintptr_t) dst->py_buffer, 0, dst_node, + src->buffer_size, async_data); + starpu_interface_data_copy(src_node, dst_node, src->buffer_size); + return 0; +} + +static int pybuffer_copy_bytes_ram_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node) +{ + struct starpupy_buffer_interface *src = (struct starpupy_buffer_interface *) src_interface; + struct starpupy_buffer_interface *dst = (struct starpupy_buffer_interface *) dst_interface; + + starpu_interface_copy((uintptr_t) src->py_buffer, 0, src_node, + (uintptr_t) dst->py_buffer, 0, dst_node, + src->buffer_size, NULL); + starpu_interface_data_copy(src_node, dst_node, src->buffer_size); + return 0; +} + +static const struct starpu_data_copy_methods pybuffer_copy_data_methods_s = +{ + .any_to_any = pybuffer_copy_any_to_any, +}; + +static const struct starpu_data_copy_methods pybuffer_bytes_copy_data_methods_s = +{ + .ram_to_ram = pybuffer_copy_bytes_ram_to_ram, +}; + +struct starpu_data_interface_ops _starpupy_interface_pybuffer_ops = +{ + .register_data_handle = pybuffer_register_data_handle, + .unregister_data_handle = pybuffer_unregister_data_handle, + .allocate_data_on_node = pybuffer_allocate_data_on_node, + .free_data_on_node = pybuffer_free_data_on_node, + .cache_data_on_node = pybuffer_cache_data_on_node, + .reuse_data_on_node = pybuffer_reuse_data_on_node, + .map_data = pybuffer_map_data, + .unmap_data = pybuffer_unmap_data, + .update_map = pybuffer_update_map, + .get_size = pybuffer_get_size, + .interfaceid = STARPU_UNKNOWN_INTERFACE_ID, + .interface_size = sizeof(struct starpupy_buffer_interface), + .footprint = starpupy_buffer_footprint, + .pack_data = pybuffer_pack_data, + .peek_data = pybuffer_peek_data, + .unpack_data = pybuffer_unpack_data, + .pack_meta = pybuffer_pack_meta, + .unpack_meta = pybuffer_unpack_meta, + .free_meta = pybuffer_free_meta, + .dontcache = 0, + .display = pybuffer_display, + .compare = pybuffer_compare, + .alloc_compare = pybuffer_alloc_compare, + .name = "STARPUPY_BUFFER_INTERFACE", + .copy_methods = &pybuffer_copy_data_methods_s, +}; + +/* we need another interface for bytes, bytearray, array.array, since we have to copy these objects between processes. +* some more explanations are here: https://discuss.python.org/t/adding-pybytes-frombuffer-and-similar-for-array-array/21717 +*/ +struct starpu_data_interface_ops _starpupy_interface_pybuffer_bytes_ops = +{ + .register_data_handle = pybuffer_register_data_handle, + .unregister_data_handle = pybuffer_unregister_data_handle, + .allocate_data_on_node = pybuffer_allocate_bytes_data_on_node, + .free_data_on_node = pybuffer_free_bytes_data_on_node, + .cache_data_on_node = pybuffer_cache_data_on_node, + .reuse_data_on_node = pybuffer_reuse_data_on_node, + .get_size = pybuffer_get_size, + .interfaceid = STARPU_UNKNOWN_INTERFACE_ID, + .interface_size = sizeof(struct starpupy_buffer_interface), + .footprint = starpupy_buffer_footprint, + .pack_data = pybuffer_pack_data, + .peek_data = pybuffer_peek_data, + .unpack_data = pybuffer_unpack_data, + .dontcache = 0, + .display = pybuffer_display, + .compare = pybuffer_compare, + .alloc_compare = pybuffer_alloc_compare, + .name = "STARPUPY_BUFFER_BYTES_INTERFACE", + .copy_methods = &pybuffer_bytes_copy_data_methods_s, +}; + +#ifdef STARPU_PYTHON_HAVE_NUMPY +void starpupy_buffer_numpy_register(starpu_data_handle_t *handleptr, int home_node, int buf_type, char* pybuf, Py_ssize_t nbuf, int ndim, npy_intp* arr_dim, int arr_type, size_t nitem) +{ + struct starpupy_buffer_interface pybuffer_interface = + { + .id = _starpupy_interface_pybuffer_ops.interfaceid, + .buffer_type = buf_type, + .py_buffer = pybuf, + .buffer_size = nbuf, + .dim_size = ndim, + .array_dim = arr_dim, + .array_type = arr_type, + .item_size = nitem + }; + + starpu_data_register(handleptr, home_node, &pybuffer_interface, &_starpupy_interface_pybuffer_ops); +} +#endif + +void starpupy_buffer_bytes_register(starpu_data_handle_t *handleptr, int home_node, int buf_type, char* pybuf, Py_ssize_t nbuf, PyObject *obj) +{ + struct starpupy_buffer_interface pybuffer_interface = + { + .id = _starpupy_interface_pybuffer_ops.interfaceid, + .object = obj, + .buffer_type = buf_type, + .py_buffer = pybuf, + .buffer_size = nbuf + }; + + starpu_data_register(handleptr, home_node, &pybuffer_interface, &_starpupy_interface_pybuffer_bytes_ops); +} + +void starpupy_buffer_array_register(starpu_data_handle_t *handleptr, int home_node, int buf_type, char* pybuf, Py_ssize_t nbuf, char arr_typecode, size_t nitem, PyObject *obj) +{ + struct starpupy_buffer_interface pybuffer_interface = + { + .id = _starpupy_interface_pybuffer_ops.interfaceid, + .object = obj, + .buffer_type = buf_type, + .py_buffer = pybuf, + .buffer_size = nbuf, + .typecode = arr_typecode, + .item_size = nitem + }; + + starpu_data_register(handleptr, home_node, &pybuffer_interface, &_starpupy_interface_pybuffer_bytes_ops); +} + +void starpupy_buffer_memview_register(starpu_data_handle_t *handleptr, int home_node, int buf_type, char* pybuf, Py_ssize_t nbuf, char mem_format, size_t nitem, int ndim, int* mem_shape) +{ + struct starpupy_buffer_interface pybuffer_interface = + { + .id = _starpupy_interface_pybuffer_ops.interfaceid, + .buffer_type = buf_type, + .py_buffer = pybuf, + .buffer_size = nbuf, + .typecode = mem_format, + .item_size = nitem, + .dim_size = ndim, + .shape = mem_shape + }; + + starpu_data_register(handleptr, home_node, &pybuffer_interface, &_starpupy_interface_pybuffer_ops); +} + +int starpupy_check_buffer_interface_id(starpu_data_handle_t handle) +{ + int interfaceid = (int)starpu_data_get_interface_id(handle); + return (interfaceid == _starpupy_interface_pybuffer_ops.interfaceid || interfaceid == _starpupy_interface_pybuffer_bytes_ops.interfaceid); +} diff --git a/starpupy/src/starpupy_buffer_interface.h b/starpupy/src/starpupy_buffer_interface.h new file mode 100644 index 0000000..babba97 --- /dev/null +++ b/starpupy/src/starpupy_buffer_interface.h @@ -0,0 +1,92 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ +#include +#include +#define PY_SSIZE_T_CLEAN +#include + +#ifdef STARPU_PYTHON_HAVE_NUMPY +#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION +#include +#endif + +#include + +extern struct starpu_data_interface_ops _starpupy_interface_pybuffer_ops; +extern struct starpu_data_interface_ops _starpupy_interface_pybuffer_bytes_ops; + +struct starpupy_buffer_interface +{ + int id; /**< Identifier of the interface */ + enum BufType {starpupy_numpy_interface, starpupy_bytes_interface, starpupy_bytearray_interface, starpupy_array_interface, starpupy_memoryview_interface}buffer_type; + PyObject* object; /* For bytes, bytearray, array.array, object corresponding py_buffer */ + char* py_buffer; /* The buffer actually allocated to store the data */ + Py_ssize_t buffer_size; /* The size of py_buffer */ + int dim_size; /* For numpy objects, the dimension */ +#ifdef STARPU_PYTHON_HAVE_NUMPY + npy_intp* array_dim; /* For numpy objects, the shapes of the different dimensions */ +#endif + int array_type; /* The type of elements */ + size_t item_size; /* The size of elements */ + char typecode; /* For array.array, the type of elements */ + int* shape; /* For memoryview, the shape of each dimension */ +}; + +#ifdef STARPU_PYTHON_HAVE_NUMPY +void starpupy_buffer_numpy_register(starpu_data_handle_t *handleptr, int home_node, int buf_type, char* pybuf, Py_ssize_t nbuf, int ndim, npy_intp* arr_dim, int arr_type, size_t nitem); +#endif + +void starpupy_buffer_bytes_register(starpu_data_handle_t *handleptr, int home_node, int buf_type, char* pybuf, Py_ssize_t nbuf, PyObject* obj); + +void starpupy_buffer_array_register(starpu_data_handle_t *handleptr, int home_node, int buf_type, char* pybuf, Py_ssize_t nbuf, char arr_typecode, size_t nitem, PyObject* obj); + +void starpupy_buffer_memview_register(starpu_data_handle_t *handleptr, int home_node, int buf_type, char* pybuf, Py_ssize_t nbuf, char mem_format, size_t nitem, int ndim, int* mem_shape); + +int starpupy_check_buffer_interface_id(starpu_data_handle_t handle); + +PyObject* starpupy_buffer_get_numpy(struct starpupy_buffer_interface *pybuffer_interface); + +PyObject* starpupy_buffer_get_arrarr(struct starpupy_buffer_interface *pybuffer_interface); + +PyObject* starpupy_buffer_get_memview(struct starpupy_buffer_interface *pybuffer_interface); + +#define STARPUPY_BUF_CHECK(handle) (starpupy_check_buffer_interface_id(handle)) +#define STARPUPY_BUF_CHECK_INTERFACE(interface) (((struct starpupy_buffer_interface *)(interface))->id == _starpupy_interface_pybuffer_ops.interfaceid) + +#define STARPUPY_BUF_GET_TYPE(interface) (((struct starpupy_buffer_interface *)(interface))->buffer_type) +#define STARPUPY_BUF_GET_OBJ(interface) (Py_INCREF(((struct starpupy_buffer_interface *)(interface))->object), ((struct starpupy_buffer_interface *)(interface))->object) +#define STARPUPY_BUF_GET_PYBUF(interface) (((struct starpupy_buffer_interface *)(interface))->py_buffer) +#define STARPUPY_BUF_GET_NBUF(interface) (((struct starpupy_buffer_interface *)(interface))->buffer_size) +#define STARPUPY_BUF_GET_NDIM(interface) (((struct starpupy_buffer_interface *)(interface))->dim_size) +#define STARPUPY_BUF_GET_DIM(interface) (((struct starpupy_buffer_interface *)(interface))->array_dim) +#define STARPUPY_BUF_GET_ARRTYPE(interface) (((struct starpupy_buffer_interface *)(interface))->array_type) +#define STARPUPY_BUF_GET_NITEM(interface) (((struct starpupy_buffer_interface *)(interface))->item_size) +#define STARPUPY_BUF_GET_TYPECODE(interface) (((struct starpupy_buffer_interface *)(interface))->typecode) +#define STARPUPY_BUF_GET_SHAPE(interface) (((struct starpupy_buffer_interface *)(interface))->shape) + +#define STARPUPY_BUF_GET_PYNUMPY(interface) (starpupy_buffer_get_numpy(interface)) + +#define STARPUPY_BUF_GET_PYBYTES(interface) (PyBytes_FromStringAndSize(STARPUPY_BUF_GET_PYBUF(interface), STARPUPY_BUF_GET_NBUF(interface))) + +#define STARPUPY_BUF_GET_PYARRAY(interface) (starpupy_buffer_get_arrarr(interface)) + +#define STARPUPY_BUF_GET_PYMEMVIEW(interface) (starpupy_buffer_get_memview(interface)) + +#define STARPUPY_BUF_GET_PYOBJECT(interface)\ + (STARPUPY_BUF_GET_TYPE(interface)==starpupy_numpy_interface ? STARPUPY_BUF_GET_PYNUMPY(interface) \ + : STARPUPY_BUF_GET_TYPE(interface)==starpupy_bytes_interface || STARPUPY_BUF_GET_TYPE(interface)==starpupy_bytearray_interface || STARPUPY_BUF_GET_TYPE(interface)==starpupy_array_interface ? STARPUPY_BUF_GET_OBJ(interface) \ + : STARPUPY_BUF_GET_TYPE(interface)==starpupy_memoryview_interface ? STARPUPY_BUF_GET_PYMEMVIEW(interface) \ + : NULL) diff --git a/starpupy/src/starpupy_cloudpickle.h b/starpupy/src/starpupy_cloudpickle.h new file mode 100644 index 0000000..9ff974d --- /dev/null +++ b/starpupy/src/starpupy_cloudpickle.h @@ -0,0 +1,42 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#define PY_SSIZE_T_CLEAN +#include + +static PyObject *dumps; /*cloudpickle.dumps method*/ +static PyObject *loads; /*pickle.loads method*/ + +/*return the reference of PyBytes which must be kept while using obj_data. See documentation of PyBytes_AsStringAndSize()*/ +static inline PyObject* starpu_cloudpickle_dumps(PyObject *obj, char **obj_data, Py_ssize_t *obj_data_size) +{ + PyObject *obj_bytes= PyObject_CallFunctionObjArgs(dumps, obj, NULL); + + PyBytes_AsStringAndSize(obj_bytes, obj_data, obj_data_size); + + return obj_bytes; +} + +static inline PyObject* starpu_cloudpickle_loads(char* pyString, Py_ssize_t pyString_size) +{ + PyObject *obj_bytes_str = PyBytes_FromStringAndSize(pyString, pyString_size); + PyObject *obj = PyObject_CallFunctionObjArgs(loads, obj_bytes_str, NULL); + + Py_DECREF(obj_bytes_str); + + return obj; +} diff --git a/starpupy/src/starpupy_handle.c b/starpupy/src/starpupy_handle.c new file mode 100644 index 0000000..cae1b62 --- /dev/null +++ b/starpupy/src/starpupy_handle.c @@ -0,0 +1,809 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#undef NDEBUG +#include +#include "starpupy_interface.h" +#include "starpupy_buffer_interface.h" + +#define PY_SSIZE_T_CLEAN +#include + +#ifdef STARPU_PYTHON_HAVE_NUMPY +#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION +#include +#endif + +#include "starpupy_handle.h" + +PyObject *starpu_module; /*starpu __init__ module*/ +PyObject *starpu_dict; /*starpu __init__ dictionary*/ + +/*register buffer protocol PyObject*/ +static PyObject* starpupy_object_register(PyObject *obj, PyObject *retval, char* mode) +{ + starpu_data_handle_t handle; + int home_node = 0; + + const char *tp = Py_TYPE(obj)->tp_name; + //printf("the type of object is %s\n", tp); + + /*if we are in master slave mode and the object is not a numpy array and a return value, it cannot work */ + if ((starpu_tcpip_ms_worker_get_count() >= 1 || starpu_mpi_ms_worker_get_count() >= 1) && strcmp(tp, "numpy.ndarray") != 0 && !PyObject_IsTrue(retval)) + { + RETURN_EXCEPTION("in master-slave mode, data handles are supported only for numpy arrays for now"); + } + + /*if the object is bytes*/ + if (strcmp(tp, "bytes")==0) + { + /*bytes size*/ + Py_ssize_t nbytes; + char* buf_bytes; + + PyBytes_AsStringAndSize(obj, &buf_bytes, &nbytes); + + /*register the buffer*/ + starpupy_buffer_bytes_register(&handle, home_node, starpupy_bytes_interface, buf_bytes, nbytes, obj); + } +#ifdef STARPU_PYTHON_HAVE_NUMPY + /*if the object is a numpy array*/ + else if (strcmp(tp, "numpy.ndarray")==0) + { + import_array(); + /*if array is not contiguous, treat it as a normal Python object*/ + if (!PyArray_IS_C_CONTIGUOUS((const PyArrayObject *)obj)&&!PyArray_IS_F_CONTIGUOUS((const PyArrayObject *)obj)) + { + if(mode != NULL && strcmp(mode, "R")!=0) + { + RETURN_EXCEPTION("The mode of object should not be other than R"); + } + else + { + starpupy_data_register(&handle, home_node, obj); + } + } + /*otherwise treat it as Python object supporting buffer protocol*/ + else + { + /*get number of dimension*/ + int ndim = PyArray_NDIM((const PyArrayObject *)obj); + /*get array dim*/ + npy_intp* arr_dim = PyArray_DIMS((PyArrayObject *)obj); + /*get the item size*/ + int nitem = PyArray_ITEMSIZE((const PyArrayObject *)obj); + /*get the array type*/ + int arr_type = PyArray_TYPE((const PyArrayObject *)obj); + + /*generate buffer of the array*/ + Py_buffer *view = (Py_buffer *) malloc(sizeof(*view)); + PyObject_GetBuffer(obj, view, PyBUF_SIMPLE); + + /*register the buffer*/ + starpupy_buffer_numpy_register(&handle, home_node, starpupy_numpy_interface, view->buf, view->len, ndim, arr_dim, arr_type, nitem); + + PyBuffer_Release(view); + free(view); + } + } +#endif + /*if the object is bytearray*/ + else if (strcmp(tp, "bytearray")==0) + { + /*generate buffer of the array*/ + Py_buffer *view = (Py_buffer *) malloc(sizeof(*view)); + PyObject_GetBuffer(obj, view, PyBUF_SIMPLE); + + /*register the buffer*/ + starpupy_buffer_bytes_register(&handle, home_node, starpupy_bytearray_interface, view->buf, view->len, obj); + + PyBuffer_Release(view); + free(view); + } + /*if the object is array.array*/ + else if (strcmp(tp, "array.array")==0) + { + /*get the arraytype*/ + PyObject* PyArrtype=PyObject_GetAttrString(obj,"typecode"); + + const char* type_str = PyUnicode_AsUTF8(PyArrtype); + char arr_type = type_str[0]; + + /*generate buffer of the array*/ + Py_buffer *view = (Py_buffer *) malloc(sizeof(*view)); + PyObject_GetBuffer(obj, view, PyBUF_SIMPLE); + + /*register the buffer*/ + starpupy_buffer_array_register(&handle, home_node, starpupy_array_interface, view->buf, view->len, arr_type, view->itemsize, obj); + + Py_DECREF(PyArrtype); + PyBuffer_Release(view); + free(view); + } + /*if the object is memoryview*/ + else if (strcmp(tp, "memoryview")==0) + { + /*generate buffer of the memoryview*/ + Py_buffer *view = PyMemoryView_GET_BUFFER(obj); + + /*get the format of memoryview*/ + PyObject* PyFormat=PyObject_GetAttrString(obj,"format"); + + const char* format_str = PyUnicode_AsUTF8(PyFormat); + char mem_format = format_str[0]; + + PyObject* PyShape=PyObject_GetAttrString(obj,"shape"); + + int ndim = PyTuple_Size(PyShape); + int* mem_shape; + mem_shape = (int*)malloc(ndim*sizeof(int)); + int i; + for(i=0; ibuf, view->len, mem_format, view->itemsize, ndim, mem_shape); + + Py_DECREF(PyFormat); + Py_DECREF(PyShape); + free(mem_shape); + } + /*if the object is PyObject*/ + else + { + if(mode != NULL && strcmp(mode, "R")!=0) + { + RETURN_EXCEPTION("The mode of object should not be other than R"); + } + else + { + starpupy_data_register(&handle, home_node, obj); + } + } + + PyObject *handle_cap=PyCapsule_New(handle, "Handle", NULL); + + return handle_cap; +} + +/*register PyObject in a handle*/ +PyObject* starpupy_data_register_wrapper(PyObject *self, PyObject *args) +{ + PyObject *obj; + PyObject *handle_obj; + + if (!PyArg_ParseTuple(args, "OO", &obj, &handle_obj)) + return NULL; + + PyObject *retval = PyObject_CallMethod(handle_obj, "get_retval", NULL); + + /*register the python object*/ + PyObject *handle_cap = starpupy_object_register(obj, retval, NULL); + if (!handle_cap) + return handle_cap; + + const char *tp = Py_TYPE(obj)->tp_name; + //printf("the type of object is %s\n", tp); + /*if the object is immutable, store the obj_id and handle_obj in handle_set, and registering the same python object several times is authorised*/ + if (strcmp(tp, "int")==0 || strcmp(tp, "float")==0 || strcmp(tp, "str")==0 || strcmp(tp, "bool")==0 || strcmp(tp, "tuple")==0 || strcmp(tp, "range")==0 || strcmp(tp, "complex")==0 || strcmp(tp, "decimal.Decimal")==0 || strcmp(tp, "NoneType")==0) + { + /*set handle_obj in handle_set*/ + /*get handle_set*/ + PyObject *handle_set = PyObject_GetAttrString(starpu_module, "handle_set"); + + /*add new handle object in set*/ + PySet_Add(handle_set, handle_obj); + + Py_DECREF(handle_set); + } + /*if the object is mutable, store the obj_id and handle_obj in handle_dict, and should not register the same python object more than twice*/ + else + { + /*set the obj_id and handle_obj in handle_dict*/ + /*get handle_dict*/ + PyObject *handle_dict = PyObject_GetAttrString(starpu_module, "handle_dict"); + + /*get object id*/ + PyObject *obj_id = PyObject_CallMethod(handle_obj, "get_obj_id", NULL); + + if(PyDict_GetItem(handle_dict, obj_id)!=NULL) + { + RETURN_EXCEPT("Should not register the same mutable python object once more."); + } + + PyDict_SetItem(handle_dict, obj_id, handle_obj); + + Py_DECREF(handle_dict); + Py_DECREF(obj_id); + } + + return handle_cap; +} + +/*generate empty Numpy array*/ +PyObject* starpupy_numpy_register_wrapper(PyObject *self, PyObject *args) +{ +#ifdef STARPU_PYTHON_HAVE_NUMPY + /*get the first argument*/ + PyObject *dimobj = PyTuple_GetItem(args, 0); + /*protect borrowed reference, decrement after check*/ + Py_INCREF(dimobj); + /*detect whether user provides dtype or not*/ + int ndim; + npy_intp *dim; + /*if the first argument is integer, it's an array one dimension*/ + if(PyLong_Check(dimobj)) + { + ndim = 1; + dim = (npy_intp*)malloc(ndim*sizeof(npy_intp)); + dim[0] = PyLong_AsLong(dimobj); + } + /*if the first argument is a tuple, it contains information of dimension*/ + else if(PyTuple_Check(dimobj)) + { + ndim = PyTuple_Size(dimobj); + dim = (npy_intp*)malloc(ndim*sizeof(npy_intp)); + int i; + for (i=0; ihandle*/ + starpu_data_handle_t handle = (starpu_data_handle_t) PyCapsule_GetPointer(handle_cap, "Handle"); + + if (handle == (void*)-1) + { + RETURN_EXCEPT("Handle has already been unregistered"); + } + + int ret; + /*call starpu_data_acquire*/ + Py_BEGIN_ALLOW_THREADS + ret= starpu_data_acquire(handle, STARPU_R); + Py_END_ALLOW_THREADS + if (ret!=0) + { + RETURN_EXCEPT("Unexpected value %d returned for starpu_data_acquire", ret); + } + + PyObject *obj = NULL; + if (STARPUPY_PYOBJ_CHECK(handle)) + { + struct starpupyobject_interface *pyobject_interface = (struct starpupyobject_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + + obj = STARPUPY_GET_PYOBJECT(pyobject_interface); + } + + if (STARPUPY_BUF_CHECK(handle)) + { + struct starpupy_buffer_interface *pybuffer_interface = (struct starpupy_buffer_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + + obj = STARPUPY_BUF_GET_PYOBJECT(pybuffer_interface); + } + + /*call starpu_data_release method*/ + Py_BEGIN_ALLOW_THREADS + starpu_data_release(handle); + Py_END_ALLOW_THREADS + + if(obj == NULL) + { + RETURN_EXCEPT("Unexpected PyObject value NULL returned for get()"); + } + + return obj; +} + +PyObject *starpupy_handle_dict_check(PyObject *obj, char* mode, char* op) +{ + (void)mode; + /*get handle_dict*/ + PyObject *handle_dict = PyObject_GetAttrString(starpu_module, "handle_dict"); + /*get the arg id*/ + PyObject *obj_id = PyLong_FromVoidPtr(obj); //XXX in CPython, the pointer of object can be treated as it's id, in other implementation, it may be realised by other ways + + PyObject *handle_obj = NULL; + if (strcmp(op, "register") == 0) + { + /*check whether the arg is already registered*/ + if(PyDict_GetItem(handle_dict, obj_id)==NULL) + { + PyObject *Handle_class = PyDict_GetItemString(starpu_dict, "Handle"); + + /*get the constructor, decremented after being called*/ + PyObject *pInstanceHandle = PyInstanceMethod_New(Handle_class); + + /*create a Null Handle object, decremented in the end of this if{}*/ + PyObject *handle_arg = PyTuple_New(1); + /*obj is used for PyTuple_SetItem(handle_arg), once handle_arg is decremented, obj is decremented as well*/ + Py_INCREF(obj); + PyTuple_SetItem(handle_arg, 0, obj); + + /*generate the handle object, decremented in the end of this function*/ + handle_obj = PyObject_CallObject(pInstanceHandle,handle_arg); + + /*set the arg_id and handle in handle_dict*/ + PyDict_SetItem(handle_dict, obj_id, handle_obj); + + Py_DECREF(pInstanceHandle); + Py_DECREF(handle_arg); + } + else + { + handle_obj = PyDict_GetItem(handle_dict, obj_id); + /*protect borrowed reference, decremented in the end of this function*/ + Py_INCREF(handle_obj); + } + } + else if (strcmp(op, "exception") == 0) + { + /*check in handle_dict whether this arg is already registered*/ + if(!PyDict_Contains(handle_dict, obj_id)) + { + RETURN_EXCEPTION("Argument does not have registered handle"); + } + + /*get the corresponding handle of the obj*/ + handle_obj = PyDict_GetItem(handle_dict, obj_id); + /*protect borrowed reference, decremented in the end of this function*/ + Py_INCREF(handle_obj); + } + + Py_DECREF(handle_dict); + Py_DECREF(obj_id); + + /*get Handle capsule object, which is the return value of this function*/ + PyObject *handle_cap = PyObject_CallMethod(handle_obj, "get_capsule", NULL); + + Py_DECREF(handle_obj); + return handle_cap; +} + +/*acquire Handle*/ +PyObject *starpupy_acquire_handle_wrapper(PyObject *self, PyObject *args) +{ + PyObject *handle_cap; + PyObject *pyMode; + + if (!PyArg_ParseTuple(args, "OO", &handle_cap, &pyMode)) + return NULL; + + const char* mode_str = PyUnicode_AsUTF8(pyMode); + char* obj_mode = strdup(mode_str); + + /*PyObject *->handle*/ + starpu_data_handle_t handle = (starpu_data_handle_t) PyCapsule_GetPointer(handle_cap, "Handle"); + + if (handle == (void*)-1) + { + RETURN_EXCEPT("Handle has already been unregistered"); + } + + int ret=0; + if(strcmp(obj_mode, "R") == 0) + { + /*call starpu_data_acquire(STARPU_R)*/ + Py_BEGIN_ALLOW_THREADS + ret= starpu_data_acquire(handle, STARPU_R); + Py_END_ALLOW_THREADS + } + if(strcmp(obj_mode, "W") == 0) + { + /*call starpu_data_acquire(STARPU_W)*/ + Py_BEGIN_ALLOW_THREADS + ret= starpu_data_acquire(handle, STARPU_W); + Py_END_ALLOW_THREADS + } + if(strcmp(obj_mode, "RW") == 0) + { + /*call starpu_data_acquire(STARPU_RW)*/ + Py_BEGIN_ALLOW_THREADS + ret= starpu_data_acquire(handle, STARPU_RW); + Py_END_ALLOW_THREADS + } + + free(obj_mode); + + if (ret!=0) + { + RETURN_EXCEPT("Unexpected value returned for starpu_data_acquire"); + } + + PyObject *obj = NULL; + if (STARPUPY_PYOBJ_CHECK(handle)) + { + struct starpupyobject_interface *pyobject_interface = (struct starpupyobject_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + + obj = STARPUPY_GET_PYOBJECT(pyobject_interface); + } + + if (STARPUPY_BUF_CHECK(handle)) + { + struct starpupy_buffer_interface *pybuffer_interface = (struct starpupy_buffer_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + + obj = STARPUPY_BUF_GET_PYOBJECT(pybuffer_interface); + } + + return obj; +} + +/*acquire PyObject Handle*/ +PyObject *starpupy_acquire_object_wrapper(PyObject *self, PyObject *args) +{ + PyObject *obj; + PyObject *pyMode; + + if (!PyArg_ParseTuple(args, "OO", &obj, &pyMode)) + return NULL; + + const char* mode_str = PyUnicode_AsUTF8(pyMode); + char* obj_mode = strdup(mode_str); + + /*get the corresponding handle capsule of the obj*/ + PyObject *handle_cap = starpupy_handle_dict_check(obj, NULL, "register"); + + /*PyObject *->handle*/ + starpu_data_handle_t handle = (starpu_data_handle_t) PyCapsule_GetPointer(handle_cap, "Handle"); + + Py_DECREF(handle_cap); + + int ret=0; + if(strcmp(obj_mode, "R") == 0) + { + /*call starpu_data_acquire(STARPU_R)*/ + Py_BEGIN_ALLOW_THREADS + ret= starpu_data_acquire(handle, STARPU_R); + Py_END_ALLOW_THREADS + } + + if(strcmp(obj_mode, "W") == 0) + { + /*call starpu_data_acquire(STARPU_W)*/ + Py_BEGIN_ALLOW_THREADS + ret= starpu_data_acquire(handle, STARPU_W); + Py_END_ALLOW_THREADS + } + + if(strcmp(obj_mode, "RW") == 0) + { + /*call starpu_data_acquire(STARPU_RW)*/ + Py_BEGIN_ALLOW_THREADS + ret= starpu_data_acquire(handle, STARPU_RW); + Py_END_ALLOW_THREADS + } + + free(obj_mode); + + if (ret!=0) + { + RETURN_EXCEPT("Unexpected value returned for starpu_data_acquire"); + } + + PyObject *obj_get = NULL; + if (STARPUPY_PYOBJ_CHECK(handle)) + { + struct starpupyobject_interface *pyobject_interface = (struct starpupyobject_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + + obj_get = STARPUPY_GET_PYOBJECT(pyobject_interface); + } + + if (STARPUPY_BUF_CHECK(handle)) + { + struct starpupy_buffer_interface *pybuffer_interface = (struct starpupy_buffer_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + + obj_get = STARPUPY_BUF_GET_PYOBJECT(pybuffer_interface); + } + + return obj_get; +} + +/*release Handle*/ +PyObject *starpupy_release_handle_wrapper(PyObject *self, PyObject *args) +{ + PyObject *handle_cap; + + if (!PyArg_ParseTuple(args, "O", &handle_cap)) + return NULL; + + /*PyObject *->handle*/ + starpu_data_handle_t handle = (starpu_data_handle_t) PyCapsule_GetPointer(handle_cap, "Handle"); + + if (handle == (void*)-1) + { + RETURN_EXCEPT("Handle has already been unregistered"); + } + + if (!STARPUPY_BUF_CHECK(handle)) + { + RETURN_EXCEPT("Wrong interface is used"); + } + + /*call starpu_data_release method*/ + Py_BEGIN_ALLOW_THREADS + starpu_data_release(handle); + Py_END_ALLOW_THREADS + + /*return type is void*/ + Py_INCREF(Py_None); + return Py_None; +} + +/*release PyObejct Handle*/ +PyObject *starpupy_release_object_wrapper(PyObject *self, PyObject *args) +{ + (void)self; + PyObject *obj; + + if (!PyArg_ParseTuple(args, "O", &obj)) + return NULL; + + /*get the corresponding handle capsule of the obj*/ + PyObject *handle_cap = starpupy_handle_dict_check(obj, NULL, "exception"); + + if(handle_cap == NULL) + { + Py_XDECREF(handle_cap); + return NULL; + } + + /*PyObject *->handle*/ + starpu_data_handle_t handle = (starpu_data_handle_t) PyCapsule_GetPointer(handle_cap, "Handle"); + + Py_DECREF(handle_cap); + + /*call starpu_data_release method*/ + Py_BEGIN_ALLOW_THREADS + starpu_data_release(handle); + Py_END_ALLOW_THREADS + + /*return type is void*/ + Py_INCREF(Py_None); + return Py_None; +} + +static void starpupy_remove_handle_from_dict(PyObject *obj_id) +{ + /*delete object from handle_dict*/ + PyObject *handle_dict = PyObject_GetAttrString(starpu_module, "handle_dict"); + + if(PyDict_GetItem(handle_dict, obj_id) != NULL) + { + PyDict_DelItem(handle_dict, obj_id); + } + + Py_DECREF(handle_dict); +} + +static void starpupy_remove_handle_from_set(PyObject *handle_obj) +{ + /*delete object from handle_set*/ + PyObject *handle_set = PyObject_GetAttrString(starpu_module, "handle_set"); + + PySet_Discard(handle_set, handle_obj); + + Py_DECREF(handle_set); +} + +/* unregister handle*/ +PyObject *starpupy_data_unregister_wrapper(PyObject *self, PyObject *args) +{ + PyObject *handle_obj; + + if (!PyArg_ParseTuple(args, "O", &handle_obj)) + return NULL; + + /*get the handle capsule*/ + PyObject *handle_cap = PyObject_CallMethod(handle_obj, "get_capsule", NULL); + /*get the id of arg*/ + PyObject *obj_id = PyObject_CallMethod(handle_obj, "get_obj_id", NULL); + + /*PyObject *->handle*/ + starpu_data_handle_t handle = (starpu_data_handle_t) PyCapsule_GetPointer(handle_cap, "Handle"); + + if (handle == (void*)-1) + { + RETURN_EXCEPT("Handle has already been unregistered"); + } + + /*call starpu_data_unregister method*/ + Py_BEGIN_ALLOW_THREADS + starpu_data_unregister(handle); + Py_END_ALLOW_THREADS + + PyCapsule_SetPointer(handle_cap, (void*)-1); + + starpupy_remove_handle_from_dict(obj_id); + starpupy_remove_handle_from_set(handle_obj); + + Py_DECREF(handle_cap); + Py_DECREF(obj_id); + + /*return type is void*/ + Py_INCREF(Py_None); + return Py_None; +} + +/* unregister PyObject handle*/ +PyObject *starpupy_data_unregister_object_wrapper(PyObject *self, PyObject *args) +{ + PyObject *obj; + + if (!PyArg_ParseTuple(args, "O", &obj)) + return NULL; + + /*get the corresponding handle capsule of the obj*/ + PyObject *handle_cap = starpupy_handle_dict_check(obj, NULL, "exception"); + /*get the id of obj*/ + PyObject *obj_id = PyLong_FromVoidPtr(obj); + + if(handle_cap == NULL) + { + Py_XDECREF(handle_cap); + return NULL; + } + + /*PyObject *->handle*/ + starpu_data_handle_t handle = (starpu_data_handle_t) PyCapsule_GetPointer(handle_cap, "Handle"); + + if (handle == (void*)-1) + { + RETURN_EXCEPT("Handle has already been unregistered"); + } + + /*call starpu_data_unregister method*/ + Py_BEGIN_ALLOW_THREADS + starpu_data_unregister(handle); + Py_END_ALLOW_THREADS + + PyCapsule_SetPointer(handle_cap, (void*)-1); + + starpupy_remove_handle_from_dict(obj_id); + + Py_DECREF(handle_cap); + Py_DECREF(obj_id); + + /*return type is void*/ + Py_INCREF(Py_None); + return Py_None; +} + +/* unregister_submit handle*/ +PyObject *starpupy_data_unregister_submit_wrapper(PyObject *self, PyObject *args) +{ + PyObject *handle_obj; + + if (!PyArg_ParseTuple(args, "O", &handle_obj)) + return NULL; + + /*get the handle capsule*/ + PyObject *handle_cap = PyObject_CallMethod(handle_obj, "get_capsule", NULL); + /*get the id of arg*/ + PyObject *obj_id = PyObject_CallMethod(handle_obj, "get_obj_id", NULL); + + /*PyObject *->handle*/ + starpu_data_handle_t handle = (starpu_data_handle_t) PyCapsule_GetPointer(handle_cap, "Handle"); + + if (handle == (void*)-1) + { + RETURN_EXCEPT("Handle has already been unregistered"); + } + + /*call starpu_data_unregister method*/ + Py_BEGIN_ALLOW_THREADS + starpu_data_unregister_submit(handle); + Py_END_ALLOW_THREADS + + PyCapsule_SetPointer(handle_cap, (void*)-1); + + starpupy_remove_handle_from_dict(obj_id); + starpupy_remove_handle_from_set(handle_obj); + + Py_DECREF(handle_cap); + Py_DECREF(obj_id); + + /*return type is void*/ + Py_INCREF(Py_None); + return Py_None; +} + +/* unregister_submit PyObject handle*/ +PyObject *starpupy_data_unregister_submit_object_wrapper(PyObject *self, PyObject *args) +{ + PyObject *obj; + + if (!PyArg_ParseTuple(args, "O", &obj)) + return NULL; + + /*get the corresponding handle capsule of the obj*/ + PyObject *handle_cap = starpupy_handle_dict_check(obj, NULL, "exception"); + /*get the id of obj*/ + PyObject *obj_id = PyLong_FromVoidPtr(obj); + + if(handle_cap == NULL) + { + Py_XDECREF(handle_cap); + return NULL; + } + + /*PyObject *->handle*/ + starpu_data_handle_t handle = (starpu_data_handle_t) PyCapsule_GetPointer(handle_cap, "Handle"); + + if (handle == (void*)-1) + { + PyErr_Format(PyObject_GetAttrString(self, "error"), "Handle has already been unregistered"); + return NULL; + } + + /*call starpu_data_unregister method*/ + Py_BEGIN_ALLOW_THREADS + starpu_data_unregister_submit(handle); + Py_END_ALLOW_THREADS + + PyCapsule_SetPointer(handle_cap, (void*)-1); + + starpupy_remove_handle_from_dict(obj_id); + + Py_DECREF(handle_cap); + Py_DECREF(obj_id); + + /*return type is void*/ + Py_INCREF(Py_None); + return Py_None; +} diff --git a/starpupy/src/starpupy_handle.h b/starpupy/src/starpupy_handle.h new file mode 100644 index 0000000..f749278 --- /dev/null +++ b/starpupy/src/starpupy_handle.h @@ -0,0 +1,38 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#define PY_SSIZE_T_CLEAN +#include + +extern PyObject *starpu_module; /*starpu __init__ module*/ +extern PyObject *starpu_dict; /*starpu __init__ dictionary*/ + +PyObject *starpupy_handle_dict_check(PyObject *obj, char* mode, char* op); + +PyObject *starpupy_data_register_wrapper(PyObject *self, PyObject *args); +PyObject *starpupy_numpy_register_wrapper(PyObject *self, PyObject *args); +PyObject *starpupy_get_object_wrapper(PyObject *self, PyObject *args); + +PyObject *starpupy_acquire_handle_wrapper(PyObject *self, PyObject *args); +PyObject *starpupy_acquire_object_wrapper(PyObject *self, PyObject *args); +PyObject *starpupy_release_handle_wrapper(PyObject *self, PyObject *args); +PyObject *starpupy_release_object_wrapper(PyObject *self, PyObject *args); +PyObject *starpupy_data_unregister_wrapper(PyObject *self, PyObject *args); +PyObject *starpupy_data_unregister_object_wrapper(PyObject *self, PyObject *args); +PyObject *starpupy_data_unregister_submit_wrapper(PyObject *self, PyObject *args); +PyObject *starpupy_data_unregister_submit_object_wrapper(PyObject *self, PyObject *args); + diff --git a/starpupy/src/starpupy_interface.c b/starpupy/src/starpupy_interface.c new file mode 100644 index 0000000..8207b15 --- /dev/null +++ b/starpupy/src/starpupy_interface.c @@ -0,0 +1,334 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#undef NDEBUG +#include +#define PY_SSIZE_T_CLEAN +#include + +#ifdef STARPU_PYTHON_HAVE_NUMPY +#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION +#include +#endif + +#include "starpupy_interface.h" + +void starpupy_set_pyobject(struct starpupyobject_interface *pyobject_interface, PyObject *value) +{ + if (pyobject_interface->object != NULL) + Py_DECREF(pyobject_interface->object); + + pyobject_interface->object = value; +} + +static void pyobject_register_data_handle(starpu_data_handle_t handle, int home_node, void *data_interface) +{ + struct starpupyobject_interface *pyobject_interface = (struct starpupyobject_interface *) data_interface; + + int node; + for (node =0; node < STARPU_MAXNODES; node++) + { + struct starpupyobject_interface *local_interface = (struct starpupyobject_interface *) starpu_data_get_interface_on_node(handle, node); + + local_interface->id = pyobject_interface->id; + if (node == home_node) + { + Py_INCREF(pyobject_interface->object); + local_interface->object = pyobject_interface->object; + } + else + { + local_interface->object = NULL; + } + } +} + +static void pyobject_unregister_data_handle(starpu_data_handle_t handle) +{ + /*make sure we own the GIL*/ + PyGILState_STATE state = PyGILState_Ensure(); + + int node = starpu_data_get_home_node(handle); + if (node >= 0) + { + struct starpupyobject_interface *local_interface = (struct starpupyobject_interface *) starpu_data_get_interface_on_node(handle, node); + + Py_DECREF(local_interface->object); + local_interface->object = NULL; + } + + /* release GIL */ + PyGILState_Release(state); +} + +static starpu_ssize_t pyobject_allocate_data_on_node(void *data_interface, unsigned node) +{ + (void)node; + (void)data_interface; + return 0; +} + +static void pyobject_free_data_on_node(void *data_interface, unsigned node) +{ + (void)node; + /*make sure we own the GIL*/ + PyGILState_STATE state = PyGILState_Ensure(); + + struct starpupyobject_interface *pyobject_interface = (struct starpupyobject_interface *) data_interface; + + if (pyobject_interface->object != NULL) + { + Py_DECREF(pyobject_interface->object); + } + pyobject_interface->object = NULL; + + /* release GIL */ + PyGILState_Release(state); +} + +static size_t pyobject_get_size(starpu_data_handle_t handle) +{ +#ifdef STARPU_DEVEL +#warning this operation is needed for fxt tracing when calling starpu_data_register(), using the cloudpickle as below does not seem to work +#endif + (void)handle; + return sizeof(struct starpupyobject_interface); +} + +/*return the reference of PyBytes which must be kept while using obj_data. See documentation of PyBytes_AsStringAndSize()*/ +static PyObject * _pyobject_pack_data(struct starpupyobject_interface *pyobject_interface, char **obj_data, Py_ssize_t *obj_data_size) +{ + /*make sure we own the GIL*/ + PyGILState_STATE state = PyGILState_Ensure(); + + /*borrow the reference from the interface*/ + PyObject *obj = pyobject_interface->object; + + PyObject *cloudpickle_module = PyImport_ImportModule("cloudpickle"); + if (cloudpickle_module == NULL) + { + printf("can't find cloudpickle module\n"); + Py_XDECREF(cloudpickle_module); + exit(1); + } + PyObject *dumps = PyObject_GetAttrString(cloudpickle_module, "dumps"); + PyObject *obj_bytes = PyObject_CallFunctionObjArgs(dumps, obj, NULL); + + PyBytes_AsStringAndSize(obj_bytes, obj_data, obj_data_size); + + Py_DECREF(cloudpickle_module); + Py_DECREF(dumps); + + /*restore previous GIL state*/ + PyGILState_Release(state); + + return obj_bytes; +} + +static int pyobject_pack_data(starpu_data_handle_t handle, unsigned node, void **ptr, starpu_ssize_t *count) +{ + struct starpupyobject_interface *pyobject_interface = (struct starpupyobject_interface *) starpu_data_get_interface_on_node(handle, node); + PyObject *obj_bytes; + char *obj_data; + Py_ssize_t obj_data_size; + + /*make sure we own the GIL*/ + PyGILState_STATE state = PyGILState_Ensure(); + + obj_bytes = _pyobject_pack_data(pyobject_interface, &obj_data, &obj_data_size); + + char *data; + data = (void*)starpu_malloc_on_node_flags(node, obj_data_size, 0); + + memcpy(data, obj_data, obj_data_size); + + *ptr = data; + *count = obj_data_size; + + Py_DECREF(obj_bytes); + + /* release GIL */ + PyGILState_Release(state); + + return 0; +} + +static int _pyobject_peek_data(struct starpupyobject_interface *pyobject_interface, unsigned node, void *ptr, size_t count) +{ + (void)node; + /*make sure we own the GIL*/ + PyGILState_STATE state = PyGILState_Ensure(); + + char *data = ptr; + PyObject *pickle_module = PyImport_ImportModule("pickle"); + if (pickle_module == NULL) + { + printf("can't find pickle module\n"); + Py_XDECREF(pickle_module); + exit(1); + } + PyObject *loads = PyObject_GetAttrString(pickle_module, "loads"); + /* TODO: should tell python that we want allocation to happen on node \p node */ + PyObject *obj_bytes_str = PyBytes_FromStringAndSize(data, count); + PyObject *obj= PyObject_CallFunctionObjArgs(loads, obj_bytes_str, NULL); + if(pyobject_interface->object != NULL) + Py_DECREF(pyobject_interface->object); + pyobject_interface->object = obj; + pyobject_interface->id = _starpupy_interface_pyobject_ops.interfaceid; + + Py_DECREF(pickle_module); + Py_DECREF(loads); + Py_DECREF(obj_bytes_str); + + /*restore previous GIL state*/ + PyGILState_Release(state); + + return 0; +} + +static int pyobject_peek_data(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count) +{ + struct starpupyobject_interface *pyobject_interface = (struct starpupyobject_interface *) starpu_data_get_interface_on_node(handle, node); + + if (pyobject_interface->object != NULL) + { + Py_DECREF(pyobject_interface->object); + } + + return _pyobject_peek_data(pyobject_interface, node, ptr, count); +} + +static int pyobject_unpack_data(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count) +{ + pyobject_peek_data(handle, node, ptr, count); + + starpu_free_on_node_flags(node, (uintptr_t) ptr, count, 0); + + return 0; +} + +static uint32_t starpupy_footprint(starpu_data_handle_t handle) +{ + struct starpupyobject_interface *pyobject_interface = (struct starpupyobject_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + + /*make sure we own the GIL*/ + PyGILState_STATE state = PyGILState_Ensure(); + + /*borrow the reference from the interface*/ + PyObject *obj = pyobject_interface->object; + + /*fet obj.__class__*/ + PyObject *obj_class=PyObject_GetAttrString(obj,"__class__"); + // PyObject_Print(obj_class, stdout, 0); + // printf("\n"); + + uint32_t crc = 0; + crc=starpu_hash_crc32c_be_ptr(obj_class, crc); + + Py_DECREF(obj_class); + +#ifdef STARPU_PYTHON_HAVE_NUMPY + const char *tp = Py_TYPE(obj)->tp_name; + /*if the object is a numpy array*/ + if (strcmp(tp, "numpy.ndarray")==0) + { + import_array1(0); + /*get the array size*/ + int n1 = PyArray_SIZE((PyArrayObject *)obj); + /*get the item size*/ + int n2 = PyArray_ITEMSIZE((const PyArrayObject *)obj); + + crc=starpu_hash_crc32c_be(n1, crc); + crc=starpu_hash_crc32c_be(n2, crc); + } + else +#endif + { + crc=starpu_hash_crc32c_be_ptr(obj, crc); + } + + /*restore previous GIL state*/ + PyGILState_Release(state); + + return crc; +} + +static int pyobject_copy_ram_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node) +{ + (void)src_node; + struct starpupyobject_interface *src = (struct starpupyobject_interface *) src_interface; + struct starpupyobject_interface *dst = (struct starpupyobject_interface *) dst_interface; + + PyObject *obj_bytes; + char *obj_data; + Py_ssize_t obj_data_size; + + /*make sure we own the GIL*/ + PyGILState_STATE state = PyGILState_Ensure(); + + obj_bytes = _pyobject_pack_data(src, &obj_data, &obj_data_size); + + _pyobject_peek_data(dst, dst_node, obj_data, obj_data_size); + + Py_DECREF(obj_bytes); + + /* release GIL */ + PyGILState_Release(state); + + return 0; +} + +static const struct starpu_data_copy_methods pyobject_copy_data_methods_s = +{ + .ram_to_ram = pyobject_copy_ram_to_ram, +}; + +struct starpu_data_interface_ops _starpupy_interface_pyobject_ops = +{ + .register_data_handle = pyobject_register_data_handle, + .unregister_data_handle = pyobject_unregister_data_handle, + .allocate_data_on_node = pyobject_allocate_data_on_node, + .free_data_on_node = pyobject_free_data_on_node, + .interfaceid = STARPU_UNKNOWN_INTERFACE_ID, + .interface_size = sizeof(struct starpupyobject_interface), + .footprint = starpupy_footprint, + .pack_data = pyobject_pack_data, + .peek_data = pyobject_peek_data, + .unpack_data = pyobject_unpack_data, + .get_size = pyobject_get_size, + .dontcache = 1, + .name = "STARPUPY_OBJECT_INTERFACE", + .copy_methods = &pyobject_copy_data_methods_s, +}; + +void starpupy_data_register(starpu_data_handle_t *handleptr, unsigned home_node, PyObject *obj) +{ + assert(_starpupy_interface_pyobject_ops.interfaceid != STARPU_UNKNOWN_INTERFACE_ID); + struct starpupyobject_interface pyobject_interface = + { + .id = _starpupy_interface_pyobject_ops.interfaceid, + .object = obj + }; + + starpu_data_register(handleptr, home_node, &pyobject_interface, &_starpupy_interface_pyobject_ops); + +} + +int starpupy_check_pyobject_interface_id(starpu_data_handle_t handle) +{ + int interfaceid = (int)starpu_data_get_interface_id(handle); + return interfaceid == _starpupy_interface_pyobject_ops.interfaceid; +} diff --git a/starpupy/src/starpupy_interface.h b/starpupy/src/starpupy_interface.h new file mode 100644 index 0000000..d73215e --- /dev/null +++ b/starpupy/src/starpupy_interface.h @@ -0,0 +1,42 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#define PY_SSIZE_T_CLEAN +#include +#include + +extern struct starpu_data_interface_ops _starpupy_interface_pyobject_ops; + +struct starpupyobject_interface +{ + int id; /**< Identifier of the interface */ + PyObject *object; +}; + +void starpupy_data_register(starpu_data_handle_t *handleptr, unsigned home_node, PyObject *obj); + +int starpupy_check_pyobject_interface_id(starpu_data_handle_t handle); + +/* Steals a reference to value */ +void starpupy_set_pyobject(struct starpupyobject_interface *pyobject_interface, PyObject *value); + +#define STARPUPY_PYOBJ_CHECK(handle) (starpupy_check_pyobject_interface_id(handle)) +#define STARPUPY_PYOBJ_CHECK_INTERFACE(interface) (((struct starpupyobject_interface *)(interface))->id == _starpupy_interface_pyobject_ops.interfaceid) + +#define STARPUPY_GET_PYOBJECT(interface) (Py_INCREF(((struct starpupyobject_interface *)(interface))->object), ((struct starpupyobject_interface *)(interface))->object) + +#define STARPUPY_SET_PYOBJECT(interface, value) (starpupy_set_pyobject(interface, value)) diff --git a/starpupy/src/starpupy_numpy_filters.c b/starpupy/src/starpupy_numpy_filters.c new file mode 100644 index 0000000..5a771b4 --- /dev/null +++ b/starpupy/src/starpupy_numpy_filters.c @@ -0,0 +1,297 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#undef NDEBUG +#include +#define PY_SSIZE_T_CLEAN +#include + +#ifdef STARPU_PYTHON_HAVE_NUMPY +#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION +#include +#endif + +#include "starpupy_buffer_interface.h" +#include "starpupy_numpy_filters.h" + +static void starpupy_numpy_filter(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned id, unsigned nchunks) +{ + struct starpupy_buffer_interface *buffer_father = (struct starpupy_buffer_interface *) father_interface; + struct starpupy_buffer_interface *buffer_child = (struct starpupy_buffer_interface *) child_interface; + + size_t elemsize = buffer_father->item_size; + + STARPU_ASSERT_MSG(buffer_father->id == _starpupy_interface_pybuffer_ops.interfaceid, "%s can only be applied on a vector data", __func__); + + /*get the ndim*/ + int ndim = buffer_father->dim_size; + +#ifdef STARPU_PYTHON_HAVE_NUMPY + Py_ssize_t nbuf = buffer_father->buffer_size; + int narr = nbuf/elemsize; + + int child_narr; + size_t offset; + + int dim = f->filter_arg; + + unsigned ni[ndim]; + int i; + for (i=0; iarray_dim[i]; + } + + unsigned nn = ni[dim]; + unsigned ld; + + if (dim == 0 && ndim != 1) + { + ld = ni[1]; + } + else if (dim == 1 || ndim == 1) + { + ld = 1; + } + else + { + ld = 1; + for (i=0; ifilter_arg_ptr; + + if (chunks_list != NULL) + { + child_nn = chunks_list[id]; + unsigned chunk_nn = 0; + unsigned j = 0; + while(j < id) + { + chunk_nn = chunk_nn + chunks_list[j]; + j++; + } + offset = chunk_nn * ld * elemsize; + } + else + { + starpu_filter_nparts_compute_chunk_size_and_offset(nn, nchunks, elemsize, id, ld, &child_nn, &offset); + } + + child_narr = narr/nn*child_nn; + + if(buffer_father->py_buffer) + buffer_child->py_buffer = buffer_father->py_buffer + offset; + + buffer_child->buffer_size = child_narr * elemsize; + + npy_intp *child_dim; + child_dim = (npy_intp*)malloc(ndim*sizeof(npy_intp)); + for (i=0; iarray_dim = child_dim; +#endif + buffer_child->id = buffer_father->id; + buffer_child->buffer_type = buffer_father->buffer_type; + buffer_child->dim_size = ndim; + buffer_child->array_type = buffer_father->array_type; + buffer_child->item_size = elemsize; + +} + +/*wrapper data partition*/ +PyObject* starpu_data_partition_wrapper(PyObject *self, PyObject *args) +{ + PyObject *handle_obj; + int nparts; + int dim; + PyObject *chunks_list; + + if (!PyArg_ParseTuple(args, "OIIO", &handle_obj, &nparts, &dim, &chunks_list)) + return NULL; + + /*PyObject *->handle*/ + starpu_data_handle_t handle = (starpu_data_handle_t) PyCapsule_GetPointer(handle_obj, "Handle"); + + if (handle == (void*)-1) + { + RETURN_EXCEPT("Handle has already been unregistered"); + } + + int node = starpu_data_get_home_node(handle); + struct starpupy_buffer_interface *local_interface = (struct starpupy_buffer_interface *) starpu_data_get_interface_on_node(handle, node); + + int ndim = local_interface->dim_size; + + if (ndim <= 0) + { + RETURN_EXCEPT("Dimension size %d must be greater than 0.", ndim); + } + + if (dim < 0) + { + RETURN_EXCEPT("The given dimension dim %d must not be less than 0.", dim); + } + + if (dim >= ndim) + { + RETURN_EXCEPT("dim %d must be less than dimension size %d.", dim, ndim); + } + + int i; + int dim_len = 0; + int nlist = PyList_Size(chunks_list); + int nchunks[nparts]; + + if(nlist != 0) + { + if (nlist != nparts) + { + RETURN_EXCEPT("The chunk list size %d does not correspond to the required split size %d.", nlist, nparts); + } + + for (i=0; iarray_dim[dim]) + { + RETURN_EXCEPT("The total length of segments in chunk list %d must be equal to the length of selected dimension %d.", dim_len, local_interface->array_dim[dim]); + } +#endif + } + + /*filter func*/ + struct starpu_data_filter f; + starpu_data_handle_t handles[nparts]; + + f.filter_func = starpupy_numpy_filter; + f.nchildren = nparts; + f.get_nchildren = 0; + f.get_child_ops = 0; + f.filter_arg_ptr = (nlist==0) ? NULL : nchunks; + /* partition along the given dimension */ + f.filter_arg = dim; + + Py_BEGIN_ALLOW_THREADS + starpu_data_partition_plan(handle, &f, handles); + Py_END_ALLOW_THREADS + + PyObject *handle_list = PyList_New(nparts); + for(i=0; ihandle*/ + starpu_data_handle_t handle = (starpu_data_handle_t) PyCapsule_GetPointer(handle_obj, "Handle"); + + if (handle == (void*)-1) + { + RETURN_EXCEPT("Handle has already been unregistered"); + } + + PyObject *arr_size = PyList_New(nparts); + + int i; + for(i=0; ibuffer_size/local_interface->item_size; + + PyList_SetItem(arr_size, i, Py_BuildValue("I", narr)); + + Py_DECREF(handles_cap); + } + + return arr_size; +} + +/*wrapper data unpartition*/ +PyObject* starpu_data_unpartition_wrapper(PyObject *self, PyObject *args) +{ + PyObject *handle_obj; + PyObject *handle_list; + int nparts; + + if (!PyArg_ParseTuple(args, "OOI", &handle_obj, &handle_list, &nparts)) + return NULL; + + /*PyObject *->handle*/ + starpu_data_handle_t handle = (starpu_data_handle_t) PyCapsule_GetPointer(handle_obj, "Handle"); + + if (handle == (void*)-1) + { + RETURN_EXCEPT("Handle has already been unregistered"); + } + + starpu_data_handle_t handles[nparts]; + + int i; + for(i=0; i +#define PY_SSIZE_T_CLEAN +#include + +PyObject* starpu_data_partition_wrapper(PyObject *self, PyObject *args); +PyObject* starpupy_get_partition_size_wrapper(PyObject *self, PyObject *args); +PyObject* starpu_data_unpartition_wrapper(PyObject *self, PyObject *args); diff --git a/starpupy/src/starpupy_private.h b/starpupy/src/starpupy_private.h new file mode 100644 index 0000000..969232b --- /dev/null +++ b/starpupy/src/starpupy_private.h @@ -0,0 +1,36 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2023-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __STARPUPY__PRIVATE_H +#define __STARPUPY__PRIVATE_H + +#define RETURN_EXCEPT(...) do{ \ + PyObject *starpupy_err = PyObject_GetAttrString(self, "error"); \ + PyErr_Format(starpupy_err, __VA_ARGS__); \ + Py_DECREF(starpupy_err); \ + return NULL;\ +}while(0) + +#define RETURN_EXCEPTION(...) do{ \ + PyObject *starpupy_module = PyObject_GetAttrString(starpu_module, "starpupy"); \ + PyObject *starpupy_err = PyObject_GetAttrString(starpupy_module, "error"); \ + PyErr_Format(starpupy_err, __VA_ARGS__); \ + Py_DECREF(starpupy_module); \ + Py_DECREF(starpupy_err); \ + return NULL;\ +}while(0) + +#endif // __STARPUPY__PRIVATE_H diff --git a/starpurm/Makefile.am b/starpurm/Makefile.am new file mode 100644 index 0000000..aa098af --- /dev/null +++ b/starpurm/Makefile.am @@ -0,0 +1,34 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +include $(top_srcdir)/make/starpu-subdirtests.mk + +SUBDIRS = src +SUBDIRS += tests + +if STARPU_BUILD_STARPURM_EXAMPLES +SUBDIRS += examples +endif + +pkgconfigdir = $(libdir)/pkgconfig +pkgconfig_DATA = packages/starpurm-1.3.pc + +versincludedir = $(includedir)/starpu/$(STARPU_EFFECTIVE_VERSION) +versinclude_HEADERS = \ + include/starpurm.h + +nodist_versinclude_HEADERS = \ + include/starpurm_config.h diff --git a/starpurm/Makefile.in b/starpurm/Makefile.in new file mode 100644 index 0000000..fd9a7a8 --- /dev/null +++ b/starpurm/Makefile.in @@ -0,0 +1,1004 @@ +# Makefile.in generated by automake 1.16.5 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2021 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +target_triplet = @target@ +@STARPU_BUILD_STARPURM_EXAMPLES_TRUE@am__append_1 = examples +subdir = starpurm +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ + $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ + $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ + $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(versinclude_HEADERS) \ + $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/src/common/config.h \ + $(top_builddir)/src/common/config-src-build.h \ + $(top_builddir)/include/starpu_config.h \ + $(top_builddir)/starpurm/include/starpurm_config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +SOURCES = +DIST_SOURCES = +RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \ + ctags-recursive dvi-recursive html-recursive info-recursive \ + install-data-recursive install-dvi-recursive \ + install-exec-recursive install-html-recursive \ + install-info-recursive install-pdf-recursive \ + install-ps-recursive install-recursive installcheck-recursive \ + installdirs-recursive pdf-recursive ps-recursive \ + tags-recursive uninstall-recursive +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__installdirs = "$(DESTDIR)$(pkgconfigdir)" \ + "$(DESTDIR)$(versincludedir)" "$(DESTDIR)$(versincludedir)" +DATA = $(pkgconfig_DATA) +HEADERS = $(nodist_versinclude_HEADERS) $(versinclude_HEADERS) +RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ + distclean-recursive maintainer-clean-recursive +am__recursive_targets = \ + $(RECURSIVE_TARGETS) \ + $(RECURSIVE_CLEAN_TARGETS) \ + $(am__extra_recursive_targets) +AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \ + distdir distdir-am +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +DIST_SUBDIRS = src tests examples +am__DIST_COMMON = $(srcdir)/Makefile.in \ + $(top_srcdir)/make/starpu-subdirtests.mk +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +am__relativize = \ + dir0=`pwd`; \ + sed_first='s,^\([^/]*\)/.*$$,\1,'; \ + sed_rest='s,^[^/]*/*,,'; \ + sed_last='s,^.*/\([^/]*\)$$,\1,'; \ + sed_butlast='s,/*[^/]*$$,,'; \ + while test -n "$$dir1"; do \ + first=`echo "$$dir1" | sed -e "$$sed_first"`; \ + if test "$$first" != "."; then \ + if test "$$first" = ".."; then \ + dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ + dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ + else \ + first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ + if test "$$first2" = "$$first"; then \ + dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ + else \ + dir2="../$$dir2"; \ + fi; \ + dir0="$$dir0"/"$$first"; \ + fi; \ + fi; \ + dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ + done; \ + reldir="$$dir2" +pkglibdir = @pkglibdir@ +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +APP_CFLAGS = @APP_CFLAGS@ +APP_CXXFLAGS = @APP_CXXFLAGS@ +APP_FCFLAGS = @APP_FCFLAGS@ +APP_FFLAGS = @APP_FFLAGS@ +AR = @AR@ +AS = @AS@ +ATLASDIR = @ATLASDIR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BLAS_LIB = @BLAS_LIB@ +BLAS_LIBS = @BLAS_LIBS@ +BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ +BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CC_OR_MPICC = @CC_OR_MPICC@ +CC_OR_NVCC = @CC_OR_NVCC@ +CFLAGS = @CFLAGS@ +COVERAGE = @COVERAGE@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CSCOPE = @CSCOPE@ +CTAGS = @CTAGS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DGELS_LIBS = @DGELS_LIBS@ +DLB_CFLAGS = @DLB_CFLAGS@ +DLB_LIBS = @DLB_LIBS@ +DLLTOOL = @DLLTOOL@ +DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +ECLIPSE = @ECLIPSE@ +EGREP = @EGREP@ +ETAGS = @ETAGS@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FC = @FC@ +FCFLAGS = @FCFLAGS@ +FFLAGS = @FFLAGS@ +FFTWF_CFLAGS = @FFTWF_CFLAGS@ +FFTWF_LIBS = @FFTWF_LIBS@ +FFTWL_CFLAGS = @FFTWL_CFLAGS@ +FFTWL_LIBS = @FFTWL_LIBS@ +FFTW_CFLAGS = @FFTW_CFLAGS@ +FFTW_LIBS = @FFTW_LIBS@ +FGREP = @FGREP@ +FILECMD = @FILECMD@ +FXTDIR = @FXTDIR@ +FXT_CFLAGS = @FXT_CFLAGS@ +FXT_LDFLAGS = @FXT_LDFLAGS@ +FXT_LIBS = @FXT_LIBS@ +GDB = @GDB@ +GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ +GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ +GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ +GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ +GOTODIR = @GOTODIR@ +GREP = @GREP@ +HAVE_CXX11 = @HAVE_CXX11@ +HAVE_FFTWFL = @HAVE_FFTWFL@ +HELP2MAN = @HELP2MAN@ +HIPCC = @HIPCC@ +HIPCCFLAGS = @HIPCCFLAGS@ +HIPCONFIG = @HIPCONFIG@ +HWLOC_CFLAGS = @HWLOC_CFLAGS@ +HWLOC_LIBS = @HWLOC_LIBS@ +HWLOC_REQUIRES = @HWLOC_REQUIRES@ +ICC = @ICC@ +ICC_ARGS = @ICC_ARGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JULIA = @JULIA@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ +LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ +LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ +LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ +LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ +LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ +LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ +LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ +LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ +LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ +LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ +LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ +LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ +LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ +LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ +LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ +LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ +LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ +LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ +LIBSTARPU_LINK = @LIBSTARPU_LINK@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAGMA_CFLAGS = @MAGMA_CFLAGS@ +MAGMA_LIBS = @MAGMA_LIBS@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPICC_LDFLAGS = @MPICC_LDFLAGS@ +MPICXX = @MPICXX@ +MPIEXEC = @MPIEXEC@ +MPIEXEC_ARGS = @MPIEXEC_ARGS@ +MPIFORT = @MPIFORT@ +MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ +MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ +NM = @NM@ +NMAD_CFLAGS = @NMAD_CFLAGS@ +NMAD_LIBS = @NMAD_LIBS@ +NMEDIT = @NMEDIT@ +NVCC = @NVCC@ +NVCCFLAGS = @NVCCFLAGS@ +NVCC_CC = @NVCC_CC@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ +OPENBLAS_LIBS = @OPENBLAS_LIBS@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PAPI_CFLAGS = @PAPI_CFLAGS@ +PAPI_LIBS = @PAPI_LIBS@ +PARALLEL = @PARALLEL@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PKG_CONFIG = @PKG_CONFIG@ +POTI_CFLAGS = @POTI_CFLAGS@ +POTI_LIBS = @POTI_LIBS@ +PROG_CLANG = @PROG_CLANG@ +PROG_DATE = @PROG_DATE@ +PROG_FIND = @PROG_FIND@ +PROG_STAT = @PROG_STAT@ +PYTHON = @PYTHON@ +PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ +PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ +PYTHON_VERSION = @PYTHON_VERSION@ +RANLIB = @RANLIB@ +REALBASH = @REALBASH@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ +SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ +SIMGRID_LIBS = @SIMGRID_LIBS@ +SIMGRID_MC = @SIMGRID_MC@ +SLIC_CONFIG = @SLIC_CONFIG@ +SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ +SOCL_VENDORS = @SOCL_VENDORS@ +STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ +STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ +STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ +STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ +STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ +STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ +STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ +STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ +STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ +STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ +STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ +STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ +STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ +STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ +STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ +STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ +STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ +STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ +STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ +STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ +STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ +STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ +STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ +STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ +STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ +STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ +STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ +STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ +STARPU_LIB_PATH = @STARPU_LIB_PATH@ +STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ +STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ +STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ +STARPU_MS_LIB = @STARPU_MS_LIB@ +STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ +STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ +STARPU_OPENBLAS = @STARPU_OPENBLAS@ +STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ +STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ +STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ +STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ +STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ +STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ +STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ +STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ +STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ +STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ +STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ +STARPU_SRC_DIR = @STARPU_SRC_DIR@ +STARPU_USE_CPU = @STARPU_USE_CPU@ +STARPU_USE_CUDA = @STARPU_USE_CUDA@ +STARPU_USE_FXT = @STARPU_USE_FXT@ +STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ +STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ +STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ +STRIP = @STRIP@ +VERSION = @VERSION@ +XMKMF = @XMKMF@ +X_CFLAGS = @X_CFLAGS@ +X_EXTRA_LIBS = @X_EXTRA_LIBS@ +X_LIBS = @X_LIBS@ +X_PRE_LIBS = @X_PRE_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_ct_F77 = @ac_ct_F77@ +ac_ct_FC = @ac_ct_FC@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +doxygencommand = @doxygencommand@ +dvidir = @dvidir@ +eclipsepath = @eclipsepath@ +epstopdfcommand = @epstopdfcommand@ +exec_prefix = @exec_prefix@ +gitcommand = @gitcommand@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +hwloccalccommand = @hwloccalccommand@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +juliapath = @juliapath@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +mpicc_path = @mpicc_path@ +mpicxx_path = @mpicxx_path@ +mpiexec_path = @mpiexec_path@ +mpifort_path = @mpifort_path@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +pdflatexcommand = @pdflatexcommand@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +runstatedir = @runstatedir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target = @target@ +target_alias = @target_alias@ +target_cpu = @target_cpu@ +target_os = @target_os@ +target_vendor = @target_vendor@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +SUBDIRS = src tests $(am__append_1) +pkgconfigdir = $(libdir)/pkgconfig +pkgconfig_DATA = packages/starpurm-1.3.pc +versincludedir = $(includedir)/starpu/$(STARPU_EFFECTIVE_VERSION) +versinclude_HEADERS = \ + include/starpurm.h + +nodist_versinclude_HEADERS = \ + include/starpurm_config.h + +all: all-recursive + +.SUFFIXES: +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/make/starpu-subdirtests.mk $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign starpurm/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign starpurm/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; +$(top_srcdir)/make/starpu-subdirtests.mk $(am__empty): + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs +install-pkgconfigDATA: $(pkgconfig_DATA) + @$(NORMAL_INSTALL) + @list='$(pkgconfig_DATA)'; test -n "$(pkgconfigdir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(pkgconfigdir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(pkgconfigdir)" || exit 1; \ + fi; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; \ + done | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(pkgconfigdir)'"; \ + $(INSTALL_DATA) $$files "$(DESTDIR)$(pkgconfigdir)" || exit $$?; \ + done + +uninstall-pkgconfigDATA: + @$(NORMAL_UNINSTALL) + @list='$(pkgconfig_DATA)'; test -n "$(pkgconfigdir)" || list=; \ + files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ + dir='$(DESTDIR)$(pkgconfigdir)'; $(am__uninstall_files_from_dir) +install-nodist_versincludeHEADERS: $(nodist_versinclude_HEADERS) + @$(NORMAL_INSTALL) + @list='$(nodist_versinclude_HEADERS)'; test -n "$(versincludedir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(versincludedir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(versincludedir)" || exit 1; \ + fi; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; \ + done | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_HEADER) $$files '$(DESTDIR)$(versincludedir)'"; \ + $(INSTALL_HEADER) $$files "$(DESTDIR)$(versincludedir)" || exit $$?; \ + done + +uninstall-nodist_versincludeHEADERS: + @$(NORMAL_UNINSTALL) + @list='$(nodist_versinclude_HEADERS)'; test -n "$(versincludedir)" || list=; \ + files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ + dir='$(DESTDIR)$(versincludedir)'; $(am__uninstall_files_from_dir) +install-versincludeHEADERS: $(versinclude_HEADERS) + @$(NORMAL_INSTALL) + @list='$(versinclude_HEADERS)'; test -n "$(versincludedir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(versincludedir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(versincludedir)" || exit 1; \ + fi; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; \ + done | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_HEADER) $$files '$(DESTDIR)$(versincludedir)'"; \ + $(INSTALL_HEADER) $$files "$(DESTDIR)$(versincludedir)" || exit $$?; \ + done + +uninstall-versincludeHEADERS: + @$(NORMAL_UNINSTALL) + @list='$(versinclude_HEADERS)'; test -n "$(versincludedir)" || list=; \ + files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ + dir='$(DESTDIR)$(versincludedir)'; $(am__uninstall_files_from_dir) + +# This directory's subdirectories are mostly independent; you can cd +# into them and run 'make' without going through this Makefile. +# To change the values of 'make' variables: instead of editing Makefiles, +# (1) if the variable is set in 'config.status', edit 'config.status' +# (which will cause the Makefiles to be regenerated when you run 'make'); +# (2) otherwise, pass the desired values on the 'make' command line. +$(am__recursive_targets): + @fail=; \ + if $(am__make_keepgoing); then \ + failcom='fail=yes'; \ + else \ + failcom='exit 1'; \ + fi; \ + dot_seen=no; \ + target=`echo $@ | sed s/-recursive//`; \ + case "$@" in \ + distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ + *) list='$(SUBDIRS)' ;; \ + esac; \ + for subdir in $$list; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + dot_seen=yes; \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || eval $$failcom; \ + done; \ + if test "$$dot_seen" = "no"; then \ + $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ + fi; test -z "$$fail" + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-recursive +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ + include_option=--etags-include; \ + empty_fix=.; \ + else \ + include_option=--include; \ + empty_fix=; \ + fi; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + test ! -f $$subdir/TAGS || \ + set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ + fi; \ + done; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-recursive + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-recursive + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done + @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + $(am__make_dryrun) \ + || test -d "$(distdir)/$$subdir" \ + || $(MKDIR_P) "$(distdir)/$$subdir" \ + || exit 1; \ + dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ + $(am__relativize); \ + new_distdir=$$reldir; \ + dir1=$$subdir; dir2="$(top_distdir)"; \ + $(am__relativize); \ + new_top_distdir=$$reldir; \ + echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ + echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ + ($(am__cd) $$subdir && \ + $(MAKE) $(AM_MAKEFLAGS) \ + top_distdir="$$new_top_distdir" \ + distdir="$$new_distdir" \ + am__remove_distdir=: \ + am__skip_length_check=: \ + am__skip_mode_fix=: \ + distdir) \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-recursive +all-am: Makefile $(DATA) $(HEADERS) +installdirs: installdirs-recursive +installdirs-am: + for dir in "$(DESTDIR)$(pkgconfigdir)" "$(DESTDIR)$(versincludedir)" "$(DESTDIR)$(versincludedir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-recursive +install-exec: install-exec-recursive +install-data: install-data-recursive +uninstall: uninstall-recursive + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-recursive +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-recursive + +clean-am: clean-generic clean-libtool mostlyclean-am + +distclean: distclean-recursive + -rm -f Makefile +distclean-am: clean-am distclean-generic distclean-tags + +dvi: dvi-recursive + +dvi-am: + +html: html-recursive + +html-am: + +info: info-recursive + +info-am: + +install-data-am: install-nodist_versincludeHEADERS \ + install-pkgconfigDATA install-versincludeHEADERS + +install-dvi: install-dvi-recursive + +install-dvi-am: + +install-exec-am: + +install-html: install-html-recursive + +install-html-am: + +install-info: install-info-recursive + +install-info-am: + +install-man: + +install-pdf: install-pdf-recursive + +install-pdf-am: + +install-ps: install-ps-recursive + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-recursive + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-recursive + +mostlyclean-am: mostlyclean-generic mostlyclean-libtool + +pdf: pdf-recursive + +pdf-am: + +ps: ps-recursive + +ps-am: + +uninstall-am: uninstall-nodist_versincludeHEADERS \ + uninstall-pkgconfigDATA uninstall-versincludeHEADERS + +.MAKE: $(am__recursive_targets) install-am install-strip + +.PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am check \ + check-am clean clean-generic clean-libtool cscopelist-am ctags \ + ctags-am distclean distclean-generic distclean-libtool \ + distclean-tags distdir dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-html \ + install-html-am install-info install-info-am install-man \ + install-nodist_versincludeHEADERS install-pdf install-pdf-am \ + install-pkgconfigDATA install-ps install-ps-am install-strip \ + install-versincludeHEADERS installcheck installcheck-am \ + installdirs installdirs-am maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-generic \ + mostlyclean-libtool pdf pdf-am ps ps-am tags tags-am uninstall \ + uninstall-am uninstall-nodist_versincludeHEADERS \ + uninstall-pkgconfigDATA uninstall-versincludeHEADERS + +.PRECIOUS: Makefile + + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +recheck: + RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i recheck || RET=1 ; \ + done ; \ + exit $$RET + +showcheckfailed: + @RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i showcheckfailed || RET=1 ; \ + done ; \ + exit $$RET + +showfailed: + @RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -s -C $$i showfailed || RET=1 ; \ + done ; \ + exit $$RET + +showcheck: + RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i showcheck || RET=1 ; \ + done ; \ + exit $$RET + +showsuite: + RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i showsuite || RET=1 ; \ + done ; \ + exit $$RET + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/starpurm/examples/Makefile.am b/starpurm/examples/Makefile.am new file mode 100644 index 0000000..c2326e2 --- /dev/null +++ b/starpurm/examples/Makefile.am @@ -0,0 +1,42 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2017-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +include $(top_srcdir)/make/starpu-tests.mk +SUBDIRS = + +CLEANFILES = *.gcno *.gcda *.linkinfo + +AM_CFLAGS += $(DLB_CFLAGS) +AM_CPPFLAGS = -I$(top_srcdir)/include -I$(top_srcdir)/src -I$(top_builddir)/src -I$(top_builddir)/include +AM_CPPFLAGS += -I$(top_srcdir)/starpurm/include -I$(top_srcdir)/starpurm/src -I$(top_builddir)/starpurm/src -I$(top_builddir)/starpurm/include $(STARPU_H_CPPFLAGS) +AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@ +LIBS += $(top_builddir)/starpurm/src/libstarpurm-@STARPU_EFFECTIVE_VERSION@.la $(top_builddir)/src/@LIBSTARPU_LINK@ $(STARPU_EXPORTED_LIBS) +LIBS += $(HWLOC_LIBS) $(DLB_LIBS) + +EXTRA_DIST = \ + chameleon/dgemm.c \ + cuda_vector_scale/vector_scale.c \ + cuda_vector_scale/vs_cuda_kernel.cu + +examplebindir = $(libdir)/starpu/examples/starpurm + +examplebin_PROGRAMS = $(STARPU_EXAMPLES) +TESTS = $(STARPU_EXAMPLES) + +STARPU_EXAMPLES = \ + async_spawn \ + spawn \ + vector_scale \ + block_test/block_test diff --git a/starpurm/examples/Makefile.in b/starpurm/examples/Makefile.in new file mode 100644 index 0000000..9cb3f2e --- /dev/null +++ b/starpurm/examples/Makefile.in @@ -0,0 +1,1597 @@ +# Makefile.in generated by automake 1.16.5 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2021 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +target_triplet = @target@ +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@am__append_1 = --compiler-options -fno-strict-aliasing -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ $(STARPU_NVCC_H_CPPFLAGS) +@STARPU_USE_HIP_TRUE@am__append_2 = -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ +examplebin_PROGRAMS = $(am__EXEEXT_1) +TESTS = $(am__EXEEXT_1) +subdir = starpurm/examples +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ + $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ + $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ + $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/src/common/config.h \ + $(top_builddir)/src/common/config-src-build.h \ + $(top_builddir)/include/starpu_config.h \ + $(top_builddir)/starpurm/include/starpurm_config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +am__EXEEXT_1 = async_spawn$(EXEEXT) spawn$(EXEEXT) \ + vector_scale$(EXEEXT) block_test/block_test$(EXEEXT) +am__installdirs = "$(DESTDIR)$(examplebindir)" +PROGRAMS = $(examplebin_PROGRAMS) +async_spawn_SOURCES = async_spawn.c +async_spawn_OBJECTS = async_spawn.$(OBJEXT) +async_spawn_LDADD = $(LDADD) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +block_test_block_test_SOURCES = block_test/block_test.c +am__dirstamp = $(am__leading_dot)dirstamp +block_test_block_test_OBJECTS = block_test/block_test.$(OBJEXT) +block_test_block_test_LDADD = $(LDADD) +spawn_SOURCES = spawn.c +spawn_OBJECTS = spawn.$(OBJEXT) +spawn_LDADD = $(LDADD) +vector_scale_SOURCES = vector_scale.c +vector_scale_OBJECTS = vector_scale.$(OBJEXT) +vector_scale_LDADD = $(LDADD) +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)/src/common -I$(top_builddir)/include -I$(top_builddir)/starpurm/include +depcomp = $(SHELL) $(top_srcdir)/build-aux/depcomp +am__maybe_remake_depfiles = depfiles +am__depfiles_remade = ./$(DEPDIR)/async_spawn.Po ./$(DEPDIR)/spawn.Po \ + ./$(DEPDIR)/vector_scale.Po block_test/$(DEPDIR)/block_test.Po +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = async_spawn.c block_test/block_test.c spawn.c vector_scale.c +DIST_SOURCES = async_spawn.c block_test/block_test.c spawn.c \ + vector_scale.c +RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \ + ctags-recursive dvi-recursive html-recursive info-recursive \ + install-data-recursive install-dvi-recursive \ + install-exec-recursive install-html-recursive \ + install-info-recursive install-pdf-recursive \ + install-ps-recursive install-recursive installcheck-recursive \ + installdirs-recursive pdf-recursive ps-recursive \ + tags-recursive uninstall-recursive +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ + distclean-recursive maintainer-clean-recursive +am__recursive_targets = \ + $(RECURSIVE_TARGETS) \ + $(RECURSIVE_CLEAN_TARGETS) \ + $(am__extra_recursive_targets) +AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \ + check recheck distdir distdir-am +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +am__tty_colors_dummy = \ + mgn= red= grn= lgn= blu= brg= std=; \ + am__color_tests=no +am__tty_colors = { \ + $(am__tty_colors_dummy); \ + if test "X$(AM_COLOR_TESTS)" = Xno; then \ + am__color_tests=no; \ + elif test "X$(AM_COLOR_TESTS)" = Xalways; then \ + am__color_tests=yes; \ + elif test "X$$TERM" != Xdumb && { test -t 1; } 2>/dev/null; then \ + am__color_tests=yes; \ + fi; \ + if test $$am__color_tests = yes; then \ + red=''; \ + grn=''; \ + lgn=''; \ + blu=''; \ + mgn=''; \ + brg=''; \ + std=''; \ + fi; \ +} +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__recheck_rx = ^[ ]*:recheck:[ ]* +am__global_test_result_rx = ^[ ]*:global-test-result:[ ]* +am__copy_in_global_log_rx = ^[ ]*:copy-in-global-log:[ ]* +# A command that, given a newline-separated list of test names on the +# standard input, print the name of the tests that are to be re-run +# upon "make recheck". +am__list_recheck_tests = $(AWK) '{ \ + recheck = 1; \ + while ((rc = (getline line < ($$0 ".trs"))) != 0) \ + { \ + if (rc < 0) \ + { \ + if ((getline line2 < ($$0 ".log")) < 0) \ + recheck = 0; \ + break; \ + } \ + else if (line ~ /$(am__recheck_rx)[nN][Oo]/) \ + { \ + recheck = 0; \ + break; \ + } \ + else if (line ~ /$(am__recheck_rx)[yY][eE][sS]/) \ + { \ + break; \ + } \ + }; \ + if (recheck) \ + print $$0; \ + close ($$0 ".trs"); \ + close ($$0 ".log"); \ +}' +# A command that, given a newline-separated list of test names on the +# standard input, create the global log from their .trs and .log files. +am__create_global_log = $(AWK) ' \ +function fatal(msg) \ +{ \ + print "fatal: making $@: " msg | "cat >&2"; \ + exit 1; \ +} \ +function rst_section(header) \ +{ \ + print header; \ + len = length(header); \ + for (i = 1; i <= len; i = i + 1) \ + printf "="; \ + printf "\n\n"; \ +} \ +{ \ + copy_in_global_log = 1; \ + global_test_result = "RUN"; \ + while ((rc = (getline line < ($$0 ".trs"))) != 0) \ + { \ + if (rc < 0) \ + fatal("failed to read from " $$0 ".trs"); \ + if (line ~ /$(am__global_test_result_rx)/) \ + { \ + sub("$(am__global_test_result_rx)", "", line); \ + sub("[ ]*$$", "", line); \ + global_test_result = line; \ + } \ + else if (line ~ /$(am__copy_in_global_log_rx)[nN][oO]/) \ + copy_in_global_log = 0; \ + }; \ + if (copy_in_global_log) \ + { \ + rst_section(global_test_result ": " $$0); \ + while ((rc = (getline line < ($$0 ".log"))) != 0) \ + { \ + if (rc < 0) \ + fatal("failed to read from " $$0 ".log"); \ + print line; \ + }; \ + printf "\n"; \ + }; \ + close ($$0 ".trs"); \ + close ($$0 ".log"); \ +}' +# Restructured Text title. +am__rst_title = { sed 's/.*/ & /;h;s/./=/g;p;x;s/ *$$//;p;g' && echo; } +# Solaris 10 'make', and several other traditional 'make' implementations, +# pass "-e" to $(SHELL), and POSIX 2008 even requires this. Work around it +# by disabling -e (using the XSI extension "set +e") if it's set. +am__sh_e_setup = case $$- in *e*) set +e;; esac +# Default flags passed to test drivers. +am__common_driver_flags = \ + --color-tests "$$am__color_tests" \ + --enable-hard-errors "$$am__enable_hard_errors" \ + --expect-failure "$$am__expect_failure" +# To be inserted before the command running the test. Creates the +# directory for the log if needed. Stores in $dir the directory +# containing $f, in $tst the test, in $log the log. Executes the +# developer- defined test setup AM_TESTS_ENVIRONMENT (if any), and +# passes TESTS_ENVIRONMENT. Set up options for the wrapper that +# will run the test scripts (or their associated LOG_COMPILER, if +# thy have one). +am__check_pre = \ +$(am__sh_e_setup); \ +$(am__vpath_adj_setup) $(am__vpath_adj) \ +$(am__tty_colors); \ +srcdir=$(srcdir); export srcdir; \ +case "$@" in \ + */*) am__odir=`echo "./$@" | sed 's|/[^/]*$$||'`;; \ + *) am__odir=.;; \ +esac; \ +test "x$$am__odir" = x"." || test -d "$$am__odir" \ + || $(MKDIR_P) "$$am__odir" || exit $$?; \ +if test -f "./$$f"; then dir=./; \ +elif test -f "$$f"; then dir=; \ +else dir="$(srcdir)/"; fi; \ +tst=$$dir$$f; log='$@'; \ +if test -n '$(DISABLE_HARD_ERRORS)'; then \ + am__enable_hard_errors=no; \ +else \ + am__enable_hard_errors=yes; \ +fi; \ +case " $(XFAIL_TESTS) " in \ + *[\ \ ]$$f[\ \ ]* | *[\ \ ]$$dir$$f[\ \ ]*) \ + am__expect_failure=yes;; \ + *) \ + am__expect_failure=no;; \ +esac; \ +$(AM_TESTS_ENVIRONMENT) $(TESTS_ENVIRONMENT) +# A shell command to get the names of the tests scripts with any registered +# extension removed (i.e., equivalently, the names of the test logs, with +# the '.log' extension removed). The result is saved in the shell variable +# '$bases'. This honors runtime overriding of TESTS and TEST_LOGS. Sadly, +# we cannot use something simpler, involving e.g., "$(TEST_LOGS:.log=)", +# since that might cause problem with VPATH rewrites for suffix-less tests. +# See also 'test-harness-vpath-rewrite.sh' and 'test-trs-basic.sh'. +am__set_TESTS_bases = \ + bases='$(TEST_LOGS)'; \ + bases=`for i in $$bases; do echo $$i; done | sed 's/\.log$$//'`; \ + bases=`echo $$bases` +AM_TESTSUITE_SUMMARY_HEADER = ' for $(PACKAGE_STRING)' +RECHECK_LOGS = $(TEST_LOGS) +TEST_SUITE_LOG = test-suite.log +TEST_EXTENSIONS = @EXEEXT@ .test +LOG_DRIVER = $(SHELL) $(top_srcdir)/build-aux/test-driver +LOG_COMPILE = $(LOG_COMPILER) $(AM_LOG_FLAGS) $(LOG_FLAGS) +am__set_b = \ + case '$@' in \ + */*) \ + case '$*' in \ + */*) b='$*';; \ + *) b=`echo '$@' | sed 's/\.log$$//'`; \ + esac;; \ + *) \ + b='$*';; \ + esac +am__test_logs1 = $(TESTS:=.log) +am__test_logs2 = $(am__test_logs1:@EXEEXT@.log=.log) +TEST_LOGS = $(am__test_logs2:.test.log=.log) +TEST_LOG_DRIVER = $(SHELL) $(top_srcdir)/build-aux/test-driver +TEST_LOG_COMPILE = $(TEST_LOG_COMPILER) $(AM_TEST_LOG_FLAGS) \ + $(TEST_LOG_FLAGS) +DIST_SUBDIRS = $(SUBDIRS) +am__DIST_COMMON = $(srcdir)/Makefile.in \ + $(top_srcdir)/build-aux/depcomp \ + $(top_srcdir)/build-aux/test-driver \ + $(top_srcdir)/make/starpu-tests.mk \ + $(top_srcdir)/make/starpu.mk +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +am__relativize = \ + dir0=`pwd`; \ + sed_first='s,^\([^/]*\)/.*$$,\1,'; \ + sed_rest='s,^[^/]*/*,,'; \ + sed_last='s,^.*/\([^/]*\)$$,\1,'; \ + sed_butlast='s,/*[^/]*$$,,'; \ + while test -n "$$dir1"; do \ + first=`echo "$$dir1" | sed -e "$$sed_first"`; \ + if test "$$first" != "."; then \ + if test "$$first" = ".."; then \ + dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ + dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ + else \ + first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ + if test "$$first2" = "$$first"; then \ + dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ + else \ + dir2="../$$dir2"; \ + fi; \ + dir0="$$dir0"/"$$first"; \ + fi; \ + fi; \ + dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ + done; \ + reldir="$$dir2" +pkglibdir = @pkglibdir@ +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +APP_CFLAGS = @APP_CFLAGS@ +APP_CXXFLAGS = @APP_CXXFLAGS@ +APP_FCFLAGS = @APP_FCFLAGS@ +APP_FFLAGS = @APP_FFLAGS@ +AR = @AR@ +AS = @AS@ +ATLASDIR = @ATLASDIR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BLAS_LIB = @BLAS_LIB@ +BLAS_LIBS = @BLAS_LIBS@ +BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ +BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CC_OR_MPICC = @CC_OR_MPICC@ +CC_OR_NVCC = @CC_OR_NVCC@ +CFLAGS = @CFLAGS@ +COVERAGE = @COVERAGE@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CSCOPE = @CSCOPE@ +CTAGS = @CTAGS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DGELS_LIBS = @DGELS_LIBS@ +DLB_CFLAGS = @DLB_CFLAGS@ +DLB_LIBS = @DLB_LIBS@ +DLLTOOL = @DLLTOOL@ +DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +ECLIPSE = @ECLIPSE@ +EGREP = @EGREP@ +ETAGS = @ETAGS@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FC = @FC@ +FCFLAGS = @FCFLAGS@ +FFLAGS = @FFLAGS@ +FFTWF_CFLAGS = @FFTWF_CFLAGS@ +FFTWF_LIBS = @FFTWF_LIBS@ +FFTWL_CFLAGS = @FFTWL_CFLAGS@ +FFTWL_LIBS = @FFTWL_LIBS@ +FFTW_CFLAGS = @FFTW_CFLAGS@ +FFTW_LIBS = @FFTW_LIBS@ +FGREP = @FGREP@ +FILECMD = @FILECMD@ +FXTDIR = @FXTDIR@ +FXT_CFLAGS = @FXT_CFLAGS@ +FXT_LDFLAGS = @FXT_LDFLAGS@ +FXT_LIBS = @FXT_LIBS@ +GDB = @GDB@ +GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ +GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ +GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ +GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ +GOTODIR = @GOTODIR@ +GREP = @GREP@ +HAVE_CXX11 = @HAVE_CXX11@ +HAVE_FFTWFL = @HAVE_FFTWFL@ +HELP2MAN = @HELP2MAN@ +HIPCC = @HIPCC@ +HIPCCFLAGS = @HIPCCFLAGS@ $(am__append_2) +HIPCONFIG = @HIPCONFIG@ +HWLOC_CFLAGS = @HWLOC_CFLAGS@ +HWLOC_LIBS = @HWLOC_LIBS@ +HWLOC_REQUIRES = @HWLOC_REQUIRES@ +ICC = @ICC@ +ICC_ARGS = @ICC_ARGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JULIA = @JULIA@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ \ + $(top_builddir)/starpurm/src/libstarpurm-@STARPU_EFFECTIVE_VERSION@.la \ + $(top_builddir)/src/@LIBSTARPU_LINK@ $(STARPU_EXPORTED_LIBS) \ + $(HWLOC_LIBS) $(DLB_LIBS) +LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ +LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ +LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ +LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ +LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ +LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ +LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ +LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ +LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ +LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ +LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ +LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ +LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ +LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ +LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ +LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ +LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ +LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ +LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ +LIBSTARPU_LINK = @LIBSTARPU_LINK@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAGMA_CFLAGS = @MAGMA_CFLAGS@ +MAGMA_LIBS = @MAGMA_LIBS@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPICC_LDFLAGS = @MPICC_LDFLAGS@ +MPICXX = @MPICXX@ +MPIEXEC = @MPIEXEC@ +MPIEXEC_ARGS = @MPIEXEC_ARGS@ +MPIFORT = @MPIFORT@ +MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ +MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ +NM = @NM@ +NMAD_CFLAGS = @NMAD_CFLAGS@ +NMAD_LIBS = @NMAD_LIBS@ +NMEDIT = @NMEDIT@ +NVCC = @NVCC@ +NVCCFLAGS = @NVCCFLAGS@ $(am__append_1) +NVCC_CC = @NVCC_CC@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ +OPENBLAS_LIBS = @OPENBLAS_LIBS@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PAPI_CFLAGS = @PAPI_CFLAGS@ +PAPI_LIBS = @PAPI_LIBS@ +PARALLEL = @PARALLEL@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PKG_CONFIG = @PKG_CONFIG@ +POTI_CFLAGS = @POTI_CFLAGS@ +POTI_LIBS = @POTI_LIBS@ +PROG_CLANG = @PROG_CLANG@ +PROG_DATE = @PROG_DATE@ +PROG_FIND = @PROG_FIND@ +PROG_STAT = @PROG_STAT@ +PYTHON = @PYTHON@ +PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ +PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ +PYTHON_VERSION = @PYTHON_VERSION@ +RANLIB = @RANLIB@ +REALBASH = @REALBASH@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ +SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ +SIMGRID_LIBS = @SIMGRID_LIBS@ +SIMGRID_MC = @SIMGRID_MC@ +SLIC_CONFIG = @SLIC_CONFIG@ +SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ +SOCL_VENDORS = @SOCL_VENDORS@ +STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ +STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ +STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ +STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ +STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ +STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ +STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ +STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ +STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ +STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ +STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ +STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ +STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ +STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ +STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ +STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ +STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ +STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ +STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ +STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ +STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ +STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ +STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ +STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ +STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ +STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ +STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ +STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ +STARPU_LIB_PATH = @STARPU_LIB_PATH@ +STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ +STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ +STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ +STARPU_MS_LIB = @STARPU_MS_LIB@ +STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ +STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ +STARPU_OPENBLAS = @STARPU_OPENBLAS@ +STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ +STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ +STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ +STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ +STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ +STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ +STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ +STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ +STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ +STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ +STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ +STARPU_SRC_DIR = @STARPU_SRC_DIR@ +STARPU_USE_CPU = @STARPU_USE_CPU@ +STARPU_USE_CUDA = @STARPU_USE_CUDA@ +STARPU_USE_FXT = @STARPU_USE_FXT@ +STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ +STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ +STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ +STRIP = @STRIP@ +VERSION = @VERSION@ +XMKMF = @XMKMF@ +X_CFLAGS = @X_CFLAGS@ +X_EXTRA_LIBS = @X_EXTRA_LIBS@ +X_LIBS = @X_LIBS@ +X_PRE_LIBS = @X_PRE_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_ct_F77 = @ac_ct_F77@ +ac_ct_FC = @ac_ct_FC@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +doxygencommand = @doxygencommand@ +dvidir = @dvidir@ +eclipsepath = @eclipsepath@ +epstopdfcommand = @epstopdfcommand@ +exec_prefix = @exec_prefix@ +gitcommand = @gitcommand@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +hwloccalccommand = @hwloccalccommand@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +juliapath = @juliapath@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +mpicc_path = @mpicc_path@ +mpicxx_path = @mpicxx_path@ +mpiexec_path = @mpiexec_path@ +mpifort_path = @mpifort_path@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +pdflatexcommand = @pdflatexcommand@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +runstatedir = @runstatedir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target = @target@ +target_alias = @target_alias@ +target_cpu = @target_cpu@ +target_os = @target_os@ +target_vendor = @target_vendor@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +LAUNCHER_ENV = +LAUNCHER = +AM_CFLAGS = $(GLOBAL_AM_CFLAGS) $(DLB_CFLAGS) +AM_CXXFLAGS = $(GLOBAL_AM_CXXFLAGS) +AM_FFLAGS = $(GLOBAL_AM_FFLAGS) +AM_FCFLAGS = $(GLOBAL_AM_FCFLAGS) +@STARPU_USE_CUDA_TRUE@V_nvcc_ = $(V_nvcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_USE_CUDA_TRUE@V_nvcc_0 = @echo " NVCC " $@; +@STARPU_USE_CUDA_TRUE@V_nvcc_1 = +@STARPU_USE_CUDA_TRUE@V_nvcc = $(V_nvcc_$(V)) + +# Avoid using nvcc when making a coverity build, nvcc produces millions of +# lines of code which we don't want to analyze. Instead, build dumb .o files +# containing empty functions. +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_ = $(V_mynvcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_0 = @echo " myNVCC " $@; +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_1 = +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc = $(V_mynvcc_$(V)) +@STARPU_USE_HIP_TRUE@V_hipcc_ = $(V_hipcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_USE_HIP_TRUE@V_hipcc_0 = @echo " HIPCC " $@; +@STARPU_USE_HIP_TRUE@V_hipcc_1 = +@STARPU_USE_HIP_TRUE@V_hipcc = $(V_hipcc_$(V)) +V_icc_ = $(V_icc_$(AM_DEFAULT_VERBOSITY)) +V_icc_0 = @echo " ICC " $@; +V_icc_1 = +V_icc = $(V_icc_$(V)) +V_ln_ = $(V_ln_$(AM_DEFAULT_VERBOSITY)) +V_ln_0 = @echo " LN " $@; +V_ln_1 = +V_ln = $(V_ln_$(V)) +V_help2man_ = $(V_help2man_$(AM_DEFAULT_VERBOSITY)) +V_help2man_0 = @echo " HELP2MAN" $@; +V_help2man_1 = +V_help2man = $(V_help2man_$(V)) +# These are always defined, both for starpu-mpi and for mpi-ms +# For MPI tests we don't want to oversubscribe the system +MPI_RUN_ENV = STARPU_WORKERS_GETBIND=0 STARPU_WORKERS_NOBIND=1 STARPU_NCPU=3 +@STARPU_SIMGRID_FALSE@STARPU_MPIEXEC = $(MPIEXEC) $(MPIEXEC_ARGS) -np $(STARPU_MPI_NP) +@STARPU_SIMGRID_TRUE@STARPU_MPIEXEC = $(abs_top_builddir)/tools/starpu_smpirun -np $(STARPU_MPI_NP) -platform $(abs_top_srcdir)/tools/perfmodels/cluster.xml -hostfile $(abs_top_srcdir)/tools/perfmodels/hostfile + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2017-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +SUBDIRS = +CLEANFILES = *.gcno *.gcda *.linkinfo +AM_CPPFLAGS = -I$(top_srcdir)/include -I$(top_srcdir)/src \ + -I$(top_builddir)/src -I$(top_builddir)/include \ + -I$(top_srcdir)/starpurm/include -I$(top_srcdir)/starpurm/src \ + -I$(top_builddir)/starpurm/src \ + -I$(top_builddir)/starpurm/include $(STARPU_H_CPPFLAGS) +AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@ +EXTRA_DIST = \ + chameleon/dgemm.c \ + cuda_vector_scale/vector_scale.c \ + cuda_vector_scale/vs_cuda_kernel.cu + +examplebindir = $(libdir)/starpu/examples/starpurm +STARPU_EXAMPLES = \ + async_spawn \ + spawn \ + vector_scale \ + block_test/block_test + +all: all-recursive + +.SUFFIXES: +.SUFFIXES: .c .cu .cubin .hip .lo .log .o .obj .test .test$(EXEEXT) .trs +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/make/starpu-tests.mk $(top_srcdir)/make/starpu.mk $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign starpurm/examples/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign starpurm/examples/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; +$(top_srcdir)/make/starpu-tests.mk $(top_srcdir)/make/starpu.mk $(am__empty): + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): +install-examplebinPROGRAMS: $(examplebin_PROGRAMS) + @$(NORMAL_INSTALL) + @list='$(examplebin_PROGRAMS)'; test -n "$(examplebindir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(examplebindir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(examplebindir)" || exit 1; \ + fi; \ + for p in $$list; do echo "$$p $$p"; done | \ + sed 's/$(EXEEXT)$$//' | \ + while read p p1; do if test -f $$p \ + || test -f $$p1 \ + ; then echo "$$p"; echo "$$p"; else :; fi; \ + done | \ + sed -e 'p;s,.*/,,;n;h' \ + -e 's|.*|.|' \ + -e 'p;x;s,.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/' | \ + sed 'N;N;N;s,\n, ,g' | \ + $(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1 } \ + { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \ + if ($$2 == $$4) files[d] = files[d] " " $$1; \ + else { print "f", $$3 "/" $$4, $$1; } } \ + END { for (d in files) print "f", d, files[d] }' | \ + while read type dir files; do \ + if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \ + test -z "$$files" || { \ + echo " $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files '$(DESTDIR)$(examplebindir)$$dir'"; \ + $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files "$(DESTDIR)$(examplebindir)$$dir" || exit $$?; \ + } \ + ; done + +uninstall-examplebinPROGRAMS: + @$(NORMAL_UNINSTALL) + @list='$(examplebin_PROGRAMS)'; test -n "$(examplebindir)" || list=; \ + files=`for p in $$list; do echo "$$p"; done | \ + sed -e 'h;s,^.*/,,;s/$(EXEEXT)$$//;$(transform)' \ + -e 's/$$/$(EXEEXT)/' \ + `; \ + test -n "$$list" || exit 0; \ + echo " ( cd '$(DESTDIR)$(examplebindir)' && rm -f" $$files ")"; \ + cd "$(DESTDIR)$(examplebindir)" && rm -f $$files + +clean-examplebinPROGRAMS: + @list='$(examplebin_PROGRAMS)'; test -n "$$list" || exit 0; \ + echo " rm -f" $$list; \ + rm -f $$list || exit $$?; \ + test -n "$(EXEEXT)" || exit 0; \ + list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ + echo " rm -f" $$list; \ + rm -f $$list + +async_spawn$(EXEEXT): $(async_spawn_OBJECTS) $(async_spawn_DEPENDENCIES) $(EXTRA_async_spawn_DEPENDENCIES) + @rm -f async_spawn$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(async_spawn_OBJECTS) $(async_spawn_LDADD) $(LIBS) +block_test/$(am__dirstamp): + @$(MKDIR_P) block_test + @: > block_test/$(am__dirstamp) +block_test/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) block_test/$(DEPDIR) + @: > block_test/$(DEPDIR)/$(am__dirstamp) +block_test/block_test.$(OBJEXT): block_test/$(am__dirstamp) \ + block_test/$(DEPDIR)/$(am__dirstamp) + +block_test/block_test$(EXEEXT): $(block_test_block_test_OBJECTS) $(block_test_block_test_DEPENDENCIES) $(EXTRA_block_test_block_test_DEPENDENCIES) block_test/$(am__dirstamp) + @rm -f block_test/block_test$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(block_test_block_test_OBJECTS) $(block_test_block_test_LDADD) $(LIBS) + +spawn$(EXEEXT): $(spawn_OBJECTS) $(spawn_DEPENDENCIES) $(EXTRA_spawn_DEPENDENCIES) + @rm -f spawn$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(spawn_OBJECTS) $(spawn_LDADD) $(LIBS) + +vector_scale$(EXEEXT): $(vector_scale_OBJECTS) $(vector_scale_DEPENDENCIES) $(EXTRA_vector_scale_DEPENDENCIES) + @rm -f vector_scale$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(vector_scale_OBJECTS) $(vector_scale_LDADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + -rm -f block_test/*.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/async_spawn.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/spawn.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/vector_scale.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@block_test/$(DEPDIR)/block_test.Po@am__quote@ # am--include-marker + +$(am__depfiles_remade): + @$(MKDIR_P) $(@D) + @echo '# dummy' >$@-t && $(am__mv) $@-t $@ + +am--depfiles: $(am__depfiles_remade) + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ +@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + -rm -rf block_test/.libs block_test/_libs + +# This directory's subdirectories are mostly independent; you can cd +# into them and run 'make' without going through this Makefile. +# To change the values of 'make' variables: instead of editing Makefiles, +# (1) if the variable is set in 'config.status', edit 'config.status' +# (which will cause the Makefiles to be regenerated when you run 'make'); +# (2) otherwise, pass the desired values on the 'make' command line. +$(am__recursive_targets): + @fail=; \ + if $(am__make_keepgoing); then \ + failcom='fail=yes'; \ + else \ + failcom='exit 1'; \ + fi; \ + dot_seen=no; \ + target=`echo $@ | sed s/-recursive//`; \ + case "$@" in \ + distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ + *) list='$(SUBDIRS)' ;; \ + esac; \ + for subdir in $$list; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + dot_seen=yes; \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || eval $$failcom; \ + done; \ + if test "$$dot_seen" = "no"; then \ + $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ + fi; test -z "$$fail" + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-recursive +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ + include_option=--etags-include; \ + empty_fix=.; \ + else \ + include_option=--include; \ + empty_fix=; \ + fi; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + test ! -f $$subdir/TAGS || \ + set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ + fi; \ + done; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-recursive + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-recursive + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +# Recover from deleted '.trs' file; this should ensure that +# "rm -f foo.log; make foo.trs" re-run 'foo.test', and re-create +# both 'foo.log' and 'foo.trs'. Break the recipe in two subshells +# to avoid problems with "make -n". +.log.trs: + rm -f $< $@ + $(MAKE) $(AM_MAKEFLAGS) $< + +# Leading 'am--fnord' is there to ensure the list of targets does not +# expand to empty, as could happen e.g. with make check TESTS=''. +am--fnord $(TEST_LOGS) $(TEST_LOGS:.log=.trs): $(am__force_recheck) +am--force-recheck: + @: + +$(TEST_SUITE_LOG): $(TEST_LOGS) + @$(am__set_TESTS_bases); \ + am__f_ok () { test -f "$$1" && test -r "$$1"; }; \ + redo_bases=`for i in $$bases; do \ + am__f_ok $$i.trs && am__f_ok $$i.log || echo $$i; \ + done`; \ + if test -n "$$redo_bases"; then \ + redo_logs=`for i in $$redo_bases; do echo $$i.log; done`; \ + redo_results=`for i in $$redo_bases; do echo $$i.trs; done`; \ + if $(am__make_dryrun); then :; else \ + rm -f $$redo_logs && rm -f $$redo_results || exit 1; \ + fi; \ + fi; \ + if test -n "$$am__remaking_logs"; then \ + echo "fatal: making $(TEST_SUITE_LOG): possible infinite" \ + "recursion detected" >&2; \ + elif test -n "$$redo_logs"; then \ + am__remaking_logs=yes $(MAKE) $(AM_MAKEFLAGS) $$redo_logs; \ + fi; \ + if $(am__make_dryrun); then :; else \ + st=0; \ + errmsg="fatal: making $(TEST_SUITE_LOG): failed to create"; \ + for i in $$redo_bases; do \ + test -f $$i.trs && test -r $$i.trs \ + || { echo "$$errmsg $$i.trs" >&2; st=1; }; \ + test -f $$i.log && test -r $$i.log \ + || { echo "$$errmsg $$i.log" >&2; st=1; }; \ + done; \ + test $$st -eq 0 || exit 1; \ + fi + @$(am__sh_e_setup); $(am__tty_colors); $(am__set_TESTS_bases); \ + ws='[ ]'; \ + results=`for b in $$bases; do echo $$b.trs; done`; \ + test -n "$$results" || results=/dev/null; \ + all=` grep "^$$ws*:test-result:" $$results | wc -l`; \ + pass=` grep "^$$ws*:test-result:$$ws*PASS" $$results | wc -l`; \ + fail=` grep "^$$ws*:test-result:$$ws*FAIL" $$results | wc -l`; \ + skip=` grep "^$$ws*:test-result:$$ws*SKIP" $$results | wc -l`; \ + xfail=`grep "^$$ws*:test-result:$$ws*XFAIL" $$results | wc -l`; \ + xpass=`grep "^$$ws*:test-result:$$ws*XPASS" $$results | wc -l`; \ + error=`grep "^$$ws*:test-result:$$ws*ERROR" $$results | wc -l`; \ + if test `expr $$fail + $$xpass + $$error` -eq 0; then \ + success=true; \ + else \ + success=false; \ + fi; \ + br='==================='; br=$$br$$br$$br$$br; \ + result_count () \ + { \ + if test x"$$1" = x"--maybe-color"; then \ + maybe_colorize=yes; \ + elif test x"$$1" = x"--no-color"; then \ + maybe_colorize=no; \ + else \ + echo "$@: invalid 'result_count' usage" >&2; exit 4; \ + fi; \ + shift; \ + desc=$$1 count=$$2; \ + if test $$maybe_colorize = yes && test $$count -gt 0; then \ + color_start=$$3 color_end=$$std; \ + else \ + color_start= color_end=; \ + fi; \ + echo "$${color_start}# $$desc $$count$${color_end}"; \ + }; \ + create_testsuite_report () \ + { \ + result_count $$1 "TOTAL:" $$all "$$brg"; \ + result_count $$1 "PASS: " $$pass "$$grn"; \ + result_count $$1 "SKIP: " $$skip "$$blu"; \ + result_count $$1 "XFAIL:" $$xfail "$$lgn"; \ + result_count $$1 "FAIL: " $$fail "$$red"; \ + result_count $$1 "XPASS:" $$xpass "$$red"; \ + result_count $$1 "ERROR:" $$error "$$mgn"; \ + }; \ + { \ + echo "$(PACKAGE_STRING): $(subdir)/$(TEST_SUITE_LOG)" | \ + $(am__rst_title); \ + create_testsuite_report --no-color; \ + echo; \ + echo ".. contents:: :depth: 2"; \ + echo; \ + for b in $$bases; do echo $$b; done \ + | $(am__create_global_log); \ + } >$(TEST_SUITE_LOG).tmp || exit 1; \ + mv $(TEST_SUITE_LOG).tmp $(TEST_SUITE_LOG); \ + if $$success; then \ + col="$$grn"; \ + else \ + col="$$red"; \ + test x"$$VERBOSE" = x || cat $(TEST_SUITE_LOG); \ + fi; \ + echo "$${col}$$br$${std}"; \ + echo "$${col}Testsuite summary"$(AM_TESTSUITE_SUMMARY_HEADER)"$${std}"; \ + echo "$${col}$$br$${std}"; \ + create_testsuite_report --maybe-color; \ + echo "$$col$$br$$std"; \ + if $$success; then :; else \ + echo "$${col}See $(subdir)/$(TEST_SUITE_LOG)$${std}"; \ + if test -n "$(PACKAGE_BUGREPORT)"; then \ + echo "$${col}Please report to $(PACKAGE_BUGREPORT)$${std}"; \ + fi; \ + echo "$$col$$br$$std"; \ + fi; \ + $$success || exit 1 + +check-TESTS: + @list='$(RECHECK_LOGS)'; test -z "$$list" || rm -f $$list + @list='$(RECHECK_LOGS:.log=.trs)'; test -z "$$list" || rm -f $$list + @test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) + @set +e; $(am__set_TESTS_bases); \ + log_list=`for i in $$bases; do echo $$i.log; done`; \ + trs_list=`for i in $$bases; do echo $$i.trs; done`; \ + log_list=`echo $$log_list`; trs_list=`echo $$trs_list`; \ + $(MAKE) $(AM_MAKEFLAGS) $(TEST_SUITE_LOG) TEST_LOGS="$$log_list"; \ + exit $$?; +recheck: all + @test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) + @set +e; $(am__set_TESTS_bases); \ + bases=`for i in $$bases; do echo $$i; done \ + | $(am__list_recheck_tests)` || exit 1; \ + log_list=`for i in $$bases; do echo $$i.log; done`; \ + log_list=`echo $$log_list`; \ + $(MAKE) $(AM_MAKEFLAGS) $(TEST_SUITE_LOG) \ + am__force_recheck=am--force-recheck \ + TEST_LOGS="$$log_list"; \ + exit $$? +async_spawn.log: async_spawn$(EXEEXT) + @p='async_spawn$(EXEEXT)'; \ + b='async_spawn'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +spawn.log: spawn$(EXEEXT) + @p='spawn$(EXEEXT)'; \ + b='spawn'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +vector_scale.log: vector_scale$(EXEEXT) + @p='vector_scale$(EXEEXT)'; \ + b='vector_scale'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +block_test/block_test.log: block_test/block_test$(EXEEXT) + @p='block_test/block_test$(EXEEXT)'; \ + b='block_test/block_test'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +.test.log: + @p='$<'; \ + $(am__set_b); \ + $(am__check_pre) $(TEST_LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_TEST_LOG_DRIVER_FLAGS) $(TEST_LOG_DRIVER_FLAGS) -- $(TEST_LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +@am__EXEEXT_TRUE@.test$(EXEEXT).log: +@am__EXEEXT_TRUE@ @p='$<'; \ +@am__EXEEXT_TRUE@ $(am__set_b); \ +@am__EXEEXT_TRUE@ $(am__check_pre) $(TEST_LOG_DRIVER) --test-name "$$f" \ +@am__EXEEXT_TRUE@ --log-file $$b.log --trs-file $$b.trs \ +@am__EXEEXT_TRUE@ $(am__common_driver_flags) $(AM_TEST_LOG_DRIVER_FLAGS) $(TEST_LOG_DRIVER_FLAGS) -- $(TEST_LOG_COMPILE) \ +@am__EXEEXT_TRUE@ "$$tst" $(AM_TESTS_FD_REDIRECT) +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done + @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + $(am__make_dryrun) \ + || test -d "$(distdir)/$$subdir" \ + || $(MKDIR_P) "$(distdir)/$$subdir" \ + || exit 1; \ + dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ + $(am__relativize); \ + new_distdir=$$reldir; \ + dir1=$$subdir; dir2="$(top_distdir)"; \ + $(am__relativize); \ + new_top_distdir=$$reldir; \ + echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ + echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ + ($(am__cd) $$subdir && \ + $(MAKE) $(AM_MAKEFLAGS) \ + top_distdir="$$new_top_distdir" \ + distdir="$$new_distdir" \ + am__remove_distdir=: \ + am__skip_length_check=: \ + am__skip_mode_fix=: \ + distdir) \ + || exit 1; \ + fi; \ + done +check-am: all-am + $(MAKE) $(AM_MAKEFLAGS) check-TESTS +check: check-recursive +all-am: Makefile $(PROGRAMS) +installdirs: installdirs-recursive +installdirs-am: + for dir in "$(DESTDIR)$(examplebindir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-recursive +install-exec: install-exec-recursive +install-data: install-data-recursive +uninstall: uninstall-recursive + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-recursive +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + -test -z "$(TEST_LOGS)" || rm -f $(TEST_LOGS) + -test -z "$(TEST_LOGS:.log=.trs)" || rm -f $(TEST_LOGS:.log=.trs) + -test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) + +clean-generic: + -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + -rm -f block_test/$(DEPDIR)/$(am__dirstamp) + -rm -f block_test/$(am__dirstamp) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-recursive + +clean-am: clean-examplebinPROGRAMS clean-generic clean-libtool \ + mostlyclean-am + +distclean: distclean-recursive + -rm -f ./$(DEPDIR)/async_spawn.Po + -rm -f ./$(DEPDIR)/spawn.Po + -rm -f ./$(DEPDIR)/vector_scale.Po + -rm -f block_test/$(DEPDIR)/block_test.Po + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-recursive + +dvi-am: + +html: html-recursive + +html-am: + +info: info-recursive + +info-am: + +install-data-am: install-examplebinPROGRAMS + +install-dvi: install-dvi-recursive + +install-dvi-am: + +install-exec-am: + +install-html: install-html-recursive + +install-html-am: + +install-info: install-info-recursive + +install-info-am: + +install-man: + +install-pdf: install-pdf-recursive + +install-pdf-am: + +install-ps: install-ps-recursive + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-recursive + -rm -f ./$(DEPDIR)/async_spawn.Po + -rm -f ./$(DEPDIR)/spawn.Po + -rm -f ./$(DEPDIR)/vector_scale.Po + -rm -f block_test/$(DEPDIR)/block_test.Po + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-recursive + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-recursive + +pdf-am: + +ps: ps-recursive + +ps-am: + +uninstall-am: uninstall-examplebinPROGRAMS + +.MAKE: $(am__recursive_targets) check-am install-am install-strip + +.PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am \ + am--depfiles check check-TESTS check-am clean \ + clean-examplebinPROGRAMS clean-generic clean-libtool \ + cscopelist-am ctags ctags-am distclean distclean-compile \ + distclean-generic distclean-libtool distclean-tags distdir dvi \ + dvi-am html html-am info info-am install install-am \ + install-data install-data-am install-dvi install-dvi-am \ + install-examplebinPROGRAMS install-exec install-exec-am \ + install-html install-html-am install-info install-info-am \ + install-man install-pdf install-pdf-am install-ps \ + install-ps-am install-strip installcheck installcheck-am \ + installdirs installdirs-am maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + recheck tags tags-am uninstall uninstall-am \ + uninstall-examplebinPROGRAMS + +.PRECIOUS: Makefile + +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@.cu.o: +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ @$(MKDIR_P) `dirname $@` +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ $(V_mynvcc)grep 'extern *"C" *void *' $< | sed -ne 's/extern *"C" *void *\([a-zA-Z0-9_]*\) *(.*/void \1(void) {}/p' | $(CC) -x c - -o $@ -c + +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.cubin: +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) -cubin $< -o $@ $(NVCCFLAGS) + +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.o: +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) $< -c -o $@ $(NVCCFLAGS) +@STARPU_USE_HIP_TRUE@.hip.o: +@STARPU_USE_HIP_TRUE@ $(V_hipcc) $(HIPCC) $< -c -o $@ $(HIPCCFLAGS) + +STARPU_MPI_NP ?= 4 + +showcheckfailed: + @ for x in $(shell grep -l "^FAIL " $(TEST_LOGS) /dev/null 2>/dev/null) ; do cat $$x ; done + @RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i showcheckfailed || RET=1 ; \ + done ; \ + exit $$RET + +showfailed: + @! grep "^FAIL " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "ERROR: AddressSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "WARNING: AddressSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "ERROR: ThreadSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "WARNING: ThreadSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "ERROR: LeakSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "WARNING: LeakSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l " runtime error: " $(TEST_LOGS) /dev/null 2>/dev/null + @RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -s -C $$i showfailed || RET=1 ; \ + done ; \ + exit $$RET + +showcheck: + -cat $(TEST_LOGS) /dev/null + @! grep -q "ERROR: AddressSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q "WARNING: AddressSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q "ERROR: ThreadSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q "WARNING: ThreadSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q "ERROR: LeakSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q "WARNING: LeakSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q " runtime error: " $(TEST_LOGS) /dev/null + RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i showcheck || RET=1 ; \ + done ; \ + exit $$RET + +showsuite: + -cat $(TEST_SUITE_LOG) /dev/null + @! grep -q "ERROR: AddressSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q "WARNING: AddressSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q "ERROR: ThreadSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q "WARNING: ThreadSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q "ERROR: LeakSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q "WARNING: LeakSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q " runtime error: " $(TEST_SUITE_LOG) /dev/null + RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i showsuite || RET=1 ; \ + done ; \ + exit $$RET + +@STARPU_SIMGRID_TRUE@export STARPU_PERF_MODEL_DIR=$(abs_top_srcdir)/tools/perfmodels/sampling +@STARPU_SIMGRID_TRUE@export STARPU_HOSTNAME=mirage +@STARPU_SIMGRID_TRUE@export MALLOC_PERTURB_=0 + +@STARPU_SIMGRID_TRUE@env: +@STARPU_SIMGRID_TRUE@ @echo export STARPU_PERF_MODEL_DIR=$(STARPU_PERF_MODEL_DIR) +@STARPU_SIMGRID_TRUE@ @echo export STARPU_HOSTNAME=$(STARPU_HOSTNAME) +@STARPU_SIMGRID_TRUE@ @echo export MALLOC_PERTURB_=$(MALLOC_PERTURB_) + +@STARPU_SIMGRID_TRUE@export STARPU_SIMGRID=1 + +@STARPU_QUICK_CHECK_TRUE@export STARPU_QUICK_CHECK=1 + +@STARPU_LONG_CHECK_TRUE@export STARPU_LONG_CHECK=1 + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/starpurm/examples/async_spawn.c b/starpurm/examples/async_spawn.c new file mode 100644 index 0000000..a3dcd3c --- /dev/null +++ b/starpurm/examples/async_spawn.c @@ -0,0 +1,317 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2017-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* This example shows a basic StarPU vector scale app on top of StarPURM, + * making use of both the main RM API and the spawn_kernel_on_cpus API func */ + +#include +#include +#include +#include +#include +#include +#include + +static int rm_cpu_type_id = -1; +static int rm_nb_cpu_units = 0; + +static void usage(void); +static void test1(const int N); +static void test2(const int N, const int task_mult); +static void init_rm_infos(void); + +static unsigned spawn_pending = 0; +static pthread_mutex_t spawn_pending_mutex = PTHREAD_MUTEX_INITIALIZER; +static pthread_cond_t spawn_pending_cond; + +static void _inc_spawn_pending(void) +{ + pthread_mutex_lock(&spawn_pending_mutex); + assert(spawn_pending < UINT_MAX); + spawn_pending++; + pthread_mutex_unlock(&spawn_pending_mutex); +} + +static void _dec_spawn_pending(void) +{ + pthread_mutex_lock(&spawn_pending_mutex); + assert(spawn_pending > 0); + spawn_pending--; + if (spawn_pending == 0) + pthread_cond_broadcast(&spawn_pending_cond); + pthread_mutex_unlock(&spawn_pending_mutex); +} + +static void _wait_pending_spawns(void) +{ + pthread_mutex_lock(&spawn_pending_mutex); + while (spawn_pending > 0) + pthread_cond_wait(&spawn_pending_cond, &spawn_pending_mutex); + pthread_mutex_unlock(&spawn_pending_mutex); +} + +static void spawn_callback(void *_arg) +{ + assert(42 == (uintptr_t)_arg); + _dec_spawn_pending(); +} + +/* vector scale codelet */ +static void vector_scale_func(void *cl_buffers[], void *cl_arg) +{ + double scalar = -1.0; + int n = STARPU_VECTOR_GET_NX(cl_buffers[0]); + double *vector = (double *)STARPU_VECTOR_GET_PTR(cl_buffers[0]); + int i; + starpu_codelet_unpack_args(cl_arg, &scalar); + + int workerid = starpu_worker_get_id(); + hwloc_cpuset_t worker_cpuset = starpu_worker_get_hwloc_cpuset(workerid); + { + int strl1 = hwloc_bitmap_snprintf(NULL, 0, worker_cpuset); + char str1[strl1+1]; + hwloc_bitmap_snprintf(str1, strl1+1, worker_cpuset); + printf("worker[%03d] - task: vector=%p, n=%d, scalar=%lf, worker cpuset = %s\n", workerid, vector, n, scalar, str1); + } + hwloc_bitmap_free(worker_cpuset); + + for (i = 0; i < n; i++) + { + vector[i] *= scalar; + } +} + +static struct starpu_codelet vector_scale_cl = +{ + .cpu_funcs = {vector_scale_func}, + .nbuffers = 1 +}; + +/* main routines */ +static void usage(void) +{ + fprintf(stderr, "usage: 05_vector_scale [VECTOR_SIZE]\n"); + exit(1); +} + +static void test1(const int N) +{ + double *vector = NULL; + const double scalar = 2.0; + starpu_data_handle_t vector_handle; + int ret; + + starpu_malloc((void **)&vector, N * sizeof(*vector)); + { + int i; + for (i = 0; i < N; i++) + { + vector[i] = i; + } + } + starpu_vector_data_register(&vector_handle, STARPU_MAIN_RAM, (uintptr_t)vector, N, sizeof(*vector)); + + ret = starpu_task_insert(&vector_scale_cl, + STARPU_RW, vector_handle, + STARPU_VALUE, &scalar, sizeof(scalar), + 0); + assert(ret == 0); + starpu_task_wait_for_all(); + + starpu_data_unregister(vector_handle); + { + int i; + for (i = 0; i < N; i++) + { + double d_i = i; + if (vector[i] != d_i*scalar) + { + fprintf(stderr, "%s: check_failed\n", __func__); + exit(1); + } + } + } + starpu_free_noflag(vector, N * sizeof(*vector)); +} + +static void test2(const int N, const int task_mult) +{ + double *vector = NULL; + const double scalar = 3.0; + starpu_data_handle_t vector_handle; + int ret; + + starpu_malloc((void **)&vector, N * sizeof(*vector)); + { + int i; + for (i = 0; i < N; i++) + { + vector[i] = i; + } + } + starpu_vector_data_register(&vector_handle, STARPU_MAIN_RAM, (uintptr_t)vector, N, sizeof(*vector)); + struct starpu_data_filter partition_filter = + { + .filter_func = starpu_vector_filter_block, + .nchildren = rm_nb_cpu_units * task_mult + }; + + starpu_data_partition(vector_handle, &partition_filter); + + { + int i; + for (i = 0; i < rm_nb_cpu_units*task_mult; i++) + { + starpu_data_handle_t sub_vector_handle = starpu_data_get_sub_data(vector_handle, 1, i); + ret = starpu_task_insert(&vector_scale_cl, + STARPU_RW, sub_vector_handle, + STARPU_VALUE, &scalar, sizeof(scalar), + 0); + assert(ret == 0); + } + } + starpu_task_wait_for_all(); + starpu_data_unpartition(vector_handle, STARPU_MAIN_RAM); + + starpu_data_unregister(vector_handle); + { + int i; + for (i = 0; i < N; i++) + { + double d_i = i; + if (vector[i] != d_i*scalar) + { + fprintf(stderr, "%s: check_failed\n", __func__); + exit(1); + } + } + } + starpu_free_noflag(vector, N * sizeof(*vector)); +} + +static void init_rm_infos(void) +{ + int cpu_type = starpurm_get_device_type_id("cpu"); + int nb_cpu_units = starpurm_get_nb_devices_by_type(cpu_type); + if (nb_cpu_units < 1) + { + /* No CPU unit available. */ + exit(77); + } + + rm_cpu_type_id = cpu_type; + rm_nb_cpu_units = nb_cpu_units; +} + +static void kernel_to_spawn(void *args) +{ + int param_N = *(int*)args; + //test1(param_N); + test2(param_N, 1); + //test2(param_N, 10); + //test2(param_N, 100); +} + +int main(int argc, char *argv[]) +{ + pthread_cond_init(&spawn_pending_cond, NULL); + int param_N = 1000000; + int drs_enabled; + if (argc > 1) + { + param_N = atoi(argv[1]); + if (param_N < 1) + { + usage(); + } + } + + hwloc_cpuset_t init_cpuset = hwloc_bitmap_alloc(); + hwloc_bitmap_set_range(init_cpuset, 0, 7); + starpurm_initialize_with_cpuset(init_cpuset); + hwloc_bitmap_free(init_cpuset); + init_rm_infos(); + if (rm_nb_cpu_units > 1) + { + const int half_nb_cpus = rm_nb_cpu_units/2; + starpurm_set_drs_enable(NULL); + drs_enabled = starpurm_drs_enabled_p(); + assert(drs_enabled != 0); + + int repeat; + for (repeat=0; repeat < 20; repeat++) + { + hwloc_cpuset_t cpu_cpuset = starpurm_get_all_cpu_workers_cpuset(); + { + int strl1 = hwloc_bitmap_snprintf(NULL, 0, cpu_cpuset); + char str1[strl1+1]; + hwloc_bitmap_snprintf(str1, strl1+1, cpu_cpuset); + printf("all cpus cpuset = %s\n", str1); + } + int first_idx = hwloc_bitmap_first(cpu_cpuset); + int last_idx = hwloc_bitmap_last(cpu_cpuset); + hwloc_cpuset_t sel_cpuset = hwloc_bitmap_alloc(); + assert(sel_cpuset != NULL); + int count = 0; + int idx = first_idx; + while (idx != -1 && idx <= last_idx && count < half_nb_cpus) + { + if (hwloc_bitmap_isset(cpu_cpuset, idx)) + { + hwloc_bitmap_set(sel_cpuset, idx); + count ++; + } + idx = hwloc_bitmap_next(cpu_cpuset, idx); + } + assert(count == half_nb_cpus); + + { + int strl1 = hwloc_bitmap_snprintf(NULL, 0, sel_cpuset); + char str1[strl1+1]; + hwloc_bitmap_snprintf(str1, strl1+1, sel_cpuset); + printf("spawning a kernel on cpuset = %s\n", str1); + } + _inc_spawn_pending(); + starpurm_spawn_kernel_on_cpus_callback(NULL, kernel_to_spawn, ¶m_N, sel_cpuset, spawn_callback, (void*)(uintptr_t)42); + + hwloc_bitmap_free(sel_cpuset); + hwloc_bitmap_free(cpu_cpuset); + } + _wait_pending_spawns(); + + printf("withdrawing %d cpus from StarPU\n", half_nb_cpus); + starpurm_withdraw_cpus_from_starpu(NULL, half_nb_cpus); + test1(param_N); + test2(param_N, 1); + test2(param_N, 10); + test2(param_N, 100); + + printf("assigning %d cpus to StarPU\n", half_nb_cpus); + starpurm_assign_cpus_to_starpu(NULL, half_nb_cpus); + test1(param_N); + test2(param_N, 1); + test2(param_N, 10); + test2(param_N, 100); + + starpurm_set_drs_disable(NULL); + drs_enabled = starpurm_drs_enabled_p(); + assert(drs_enabled == 0); + } + + starpurm_shutdown(); + pthread_cond_destroy(&spawn_pending_cond); + return 0; +} diff --git a/starpurm/examples/block_test/block_test.c b/starpurm/examples/block_test/block_test.c new file mode 100644 index 0000000..69b53c8 --- /dev/null +++ b/starpurm/examples/block_test/block_test.c @@ -0,0 +1,269 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2017-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* This example shows a basic StarPU vector scale app on top of StarPURM with a nVidia CUDA kernel */ + +#include +#include +#include +#include +#include + +#ifdef STARPU_NON_BLOCKING_DRIVERS +int main(int argc, char *argv[]) +{ + (void)argc; + (void)argv; + return 77; +} +#else +static int rm_cpu_type_id = -1; +static int rm_nb_cpu_units = 0; + +#if defined (STARPU_QUICK_CHECK) +static int global_nb_tasks_1 = 20; +static const int nb_random_tests_1 = 5; +static int global_nb_tasks_2 = 10; +static const int nb_random_tests_2 = 2; +#elif defined (STARPU_LONG_CHECK) +static int global_nb_tasks_1 = 200; +static const int nb_random_tests_1 = 20; +static int global_nb_tasks_2 = 100; +static const int nb_random_tests_2 = 10; +#else +static int global_nb_tasks_1 = 50; +static const int nb_random_tests_1 = 5; +static int global_nb_tasks_2 = 10; +static const int nb_random_tests_2 = 8; +#endif + +/* vector scale codelet */ +static void work_func(void *cl_buffers[], void *cl_arg) +{ + (void)cl_buffers; + (void)cl_arg; + + double timestamp = starpu_timing_now(); + double timestamp2; + do + { + timestamp2 = starpu_timing_now(); + } + while ((timestamp2 - timestamp) < 1e6); +} + +static struct starpu_codelet work_cl = +{ + .cpu_funcs = {work_func}, +}; + +/* main routines */ +static void test_1() +{ + int i; + for (i=0; i 1) + { + global_nb_tasks_1 = atoi(argv[1]); + if (argc > 2) + { + global_nb_tasks_2 = atoi(argv[2]); + } + else + { + global_nb_tasks_2 = global_nb_tasks_1 / 10; + if (global_nb_tasks_2 < 1) + { + global_nb_tasks_2 = 1; + } + } + } + starpurm_initialize(); + init_rm_infos(); + printf("using default units\n"); + disp_selected_cpuset(); + test_1(); + + if (rm_nb_cpu_units > 1) + { + const int nb_cpus = rm_nb_cpu_units; + const int half_nb_cpus = nb_cpus/2; + printf("nb_cpu_units = %d\n", nb_cpus); + + starpurm_set_drs_enable(NULL); + drs_enabled = starpurm_drs_enabled_p(); + assert(drs_enabled != 0); + + printf("withdrawing %d cpus from StarPU\n", half_nb_cpus); + starpurm_withdraw_cpus_from_starpu(NULL, half_nb_cpus); + disp_selected_cpuset(); + test_1(); + + printf("assigning %d cpus to StarPU\n", half_nb_cpus); + starpurm_assign_cpus_to_starpu(NULL, half_nb_cpus); + disp_selected_cpuset(); + test_1(); + + int i; + for (i=0; i +#include +#include +#include +#include +#include +#include +#include + +#define CHECK + +static int rm_cpu_type_id = -1; +static int rm_cuda_type_id = -1; +static int rm_nb_cpu_units = 0; +static int rm_nb_cuda_units = 0; +static const int nb_random_tests = 10; + +static unsigned spawn_pending = 0; +static pthread_mutex_t spawn_pending_mutex = PTHREAD_MUTEX_INITIALIZER; +static pthread_cond_t spawn_pending_cond; + +static void _inc_spawn_pending(void) +{ + pthread_mutex_lock(&spawn_pending_mutex); + assert(spawn_pending < UINT_MAX); + spawn_pending++; + pthread_mutex_unlock(&spawn_pending_mutex); +} + +static void _dec_spawn_pending(void) +{ + pthread_mutex_lock(&spawn_pending_mutex); + assert(spawn_pending > 0); + spawn_pending--; + if (spawn_pending == 0) + pthread_cond_broadcast(&spawn_pending_cond); + pthread_mutex_unlock(&spawn_pending_mutex); +} + +static void _wait_pending_spawns(void) +{ + pthread_mutex_lock(&spawn_pending_mutex); + while (spawn_pending > 0) + pthread_cond_wait(&spawn_pending_cond, &spawn_pending_mutex); + pthread_mutex_unlock(&spawn_pending_mutex); +} + +static void spawn_callback(void *_arg) +{ + assert(42 == (uintptr_t)_arg); + _dec_spawn_pending(); +} + +static void usage(void) +{ + fprintf(stderr, "dgemm: M N K \n"); + exit(EXIT_FAILURE); +} + +static void init_rm_infos(void) +{ + int cpu_type = starpurm_get_device_type_id("cpu"); + int nb_cpu_units = starpurm_get_nb_devices_by_type(cpu_type); + if (nb_cpu_units < 1) + { + /* No CPU unit available. */ + exit(77); + } + + int cuda_type = starpurm_get_device_type_id("cuda"); + int nb_cuda_units = starpurm_get_nb_devices_by_type(cuda_type); + + rm_cpu_type_id = cpu_type; + rm_cuda_type_id = cuda_type; + rm_nb_cpu_units = nb_cpu_units; + rm_nb_cuda_units = nb_cuda_units; +} + + +static void disp_cpuset(hwloc_cpuset_t selected_cpuset) +{ + //hwloc_cpuset_t selected_cpuset = starpurm_get_selected_cpuset(); + int strl = hwloc_bitmap_snprintf(NULL, 0, selected_cpuset); + char str[strl+1]; + hwloc_bitmap_snprintf(str, strl+1, selected_cpuset); + printf("%llx: selected cpuset = %s\n", (unsigned long long)pthread_self(), str); +} + +struct s_test_args +{ + const int m; + const int n; + const int k; + int transA; + int transB; +}; + +static void test(void *_args) +{ + struct s_test_args *args = _args; + const int m = args->m; + const int n = args->n; + const int k = args->k; + int transA = args->transA; + int transB = args->transB; + unsigned rand_seed = (unsigned)time(NULL); + double *A = malloc(m * k * sizeof(double)); + double *B = malloc(k * n * sizeof(double)); + double *C = calloc(m * n, sizeof(double)); + double *C_test = calloc(m * n, sizeof(double)); + + const double alpha = (double)rand_r(&rand_seed) / ((double)rand_r(&rand_seed) + DBL_MIN); + const double beta = (double)rand_r(&rand_seed) / ((double)rand_r(&rand_seed) + DBL_MIN); + + int i; + for (i = 0; i < m; i++) + { + int j; + for (j = 0; j < n; j++) + { + A[i*n+j] = (double)rand_r(&rand_seed) / ((double)rand_r(&rand_seed) + DBL_MIN); + B[i*n+j] = (double)rand_r(&rand_seed) / ((double)rand_r(&rand_seed) + DBL_MIN); + } + } + + MORSE_dgemm(transA, transB, m, n, k, alpha, A, k, B, n, beta, C, n); +#ifdef CHECK + /* Check */ + cblas_dgemm(CblasColMajor, + (CBLAS_TRANSPOSE) transA, + (CBLAS_TRANSPOSE) transB, + m, n, k, + alpha, A, k, + B, n, + beta, C_test, n); + + double C_test_inorm = LAPACKE_dlange(CblasColMajor, 'I', m, n, C_test, n); + cblas_daxpy(m*n, -1, C, 1, C_test, 1); + double inorm = LAPACKE_dlange(CblasColMajor, 'I', m, n, C_test, n); + printf("%llx: ||C_test-C||_I / ||C_test||_I = %e\n", (unsigned long long)pthread_self(), inorm/C_test_inorm); +#endif + free(A); + free(B); + free(C); + free(C_test); +} + +static void select_units(hwloc_cpuset_t selected_cpuset, hwloc_cpuset_t available_cpuset, int offset, int nb) +{ + int first_idx = hwloc_bitmap_first(available_cpuset); + int last_idx = hwloc_bitmap_last(available_cpuset); + int count = 0; + int idx = first_idx; + while (idx != -1 && idx <= last_idx && count < offset+nb) + { + if (hwloc_bitmap_isset(available_cpuset, idx)) + { + if (count >= offset) + { + hwloc_bitmap_set(selected_cpuset, idx); + } + count ++; + } + idx = hwloc_bitmap_next(available_cpuset, idx); + } + assert(count == offset+nb); +} + +void spawn_tests(int cpu_offset, int cpu_nb, int cuda_offset, int cuda_nb, void *args) +{ + if (cpu_offset + cpu_nb > rm_nb_cpu_units) + exit(77); + if (cuda_offset + cuda_nb > rm_nb_cuda_units) + exit(77); + hwloc_cpuset_t cpu_cpuset = starpurm_get_all_cpu_workers_cpuset(); + hwloc_cpuset_t cuda_cpuset = starpurm_get_all_device_workers_cpuset_by_type(rm_cuda_type_id); + hwloc_cpuset_t sel_cpuset = hwloc_bitmap_alloc(); + assert(sel_cpuset != NULL); + + select_units(sel_cpuset, cpu_cpuset, cpu_offset, cpu_nb); + select_units(sel_cpuset, cuda_cpuset, cuda_offset, cuda_nb); + + { + int strl1 = hwloc_bitmap_snprintf(NULL, 0, cpu_cpuset); + char str1[strl1+1]; + hwloc_bitmap_snprintf(str1, strl1+1, cpu_cpuset); + + int strl2 = hwloc_bitmap_snprintf(NULL, 0, cuda_cpuset); + char str2[strl2+1]; + hwloc_bitmap_snprintf(str2, strl2+1, cuda_cpuset); + printf("all cpus cpuset = %s\n", str1); + + int strl3 = hwloc_bitmap_snprintf(NULL, 0, sel_cpuset); + char str3[strl3+1]; + hwloc_bitmap_snprintf(str3, strl1+3, sel_cpuset); + printf("spawn on selected cpuset = %s (avail cpu %s, avail cuda %s)\n", str3, str1, str2); + } + + _inc_spawn_pending(); + starpurm_spawn_kernel_on_cpus_callback(NULL, test, args, sel_cpuset, spawn_callback, (void*)(uintptr_t)42); + + hwloc_bitmap_free(sel_cpuset); + hwloc_bitmap_free(cpu_cpuset); + hwloc_bitmap_free(cuda_cpuset); +} + +int main(int argc, char const *argv[]) +{ + pthread_cond_init(&spawn_pending_cond, NULL); + + int transA = MorseTrans; + int transB = MorseTrans; + + if (argc < 6 || argc > 6) + usage(); + + int m = atoi(argv[1]); + if (m < 1) + usage(); + int n = atoi(argv[2]); + if (n < 1) + usage(); + int k = atoi(argv[3]); + if (k < 1) + usage(); + + if (strcmp(argv[4], "T") == 0) + transA = MorseTrans; + else if (strcmp(argv[4], "N") == 0) + transA = MorseNoTrans; + else + usage(); + + if (strcmp(argv[5], "T") == 0) + transB = MorseTrans; + else if (strcmp(argv[5], "N") == 0) + transB = MorseNoTrans; + else + usage(); + + srand(time(NULL)); + + struct s_test_args test_args = { .m = m, .n = n, .k = k, .transA = transA, .transB = transB }; + + /* Test case */ + starpurm_initialize(); + starpurm_set_drs_enable(NULL); + init_rm_infos(); + printf("cpu units: %d\n", rm_nb_cpu_units); + printf("cuda units: %d\n", rm_nb_cuda_units); + printf("using default units\n"); + disp_cpuset(starpurm_get_selected_cpuset()); + + MORSE_Init(rm_nb_cpu_units, rm_nb_cuda_units); + test(&test_args); + { + int cpu_offset = 0; + int cpu_nb = rm_nb_cpu_units/2; + if (cpu_nb == 0 && rm_nb_cpu_units > 0) + { + cpu_nb = 1; + } + int cuda_offset = 0; + int cuda_nb = rm_nb_cuda_units/2; + if (cuda_nb == 0 && rm_nb_cuda_units > 0) + { + cuda_nb = 1; + } + spawn_tests(cpu_offset, cpu_nb, cuda_offset, cuda_nb, &test_args); + } + { + int cpu_offset = rm_nb_cpu_units/2; + int cpu_nb = cpu_offset; + if (cpu_nb == 0 && rm_nb_cpu_units > 0) + { + cpu_nb = 1; + } + int cuda_offset = rm_nb_cuda_units/2; + int cuda_nb = rm_nb_cuda_units - cuda_offset; + spawn_tests(cpu_offset, cpu_nb, cuda_offset, cuda_nb, &test_args); + } + _wait_pending_spawns(); + MORSE_Finalize(); + + starpurm_shutdown(); + pthread_cond_destroy(&spawn_pending_cond); + + return 0; + +} diff --git a/starpurm/examples/cuda_vector_scale/vector_scale.c b/starpurm/examples/cuda_vector_scale/vector_scale.c new file mode 100644 index 0000000..aaf9a0b --- /dev/null +++ b/starpurm/examples/cuda_vector_scale/vector_scale.c @@ -0,0 +1,289 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2017-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* This example shows a basic StarPU vector scale app on top of StarPURM with a nVidia CUDA kernel */ + +#include +#include +#include +#include +#include + +static int rm_cpu_type_id = -1; +static int rm_cuda_type_id = -1; +static int rm_nb_cpu_units = 0; +static int rm_nb_cuda_units = 0; + +static void usage(void); +static void test1(const int N); +static void test2(const int N, const int task_mult); +static void init_rm_infos(void); + +/* vector scale codelet */ +static void vector_scale_func(void *cl_buffers[], void *cl_arg) +{ + float scalar = -1.0; + int n = STARPU_VECTOR_GET_NX(cl_buffers[0]); + float *vector = (float *)STARPU_VECTOR_GET_PTR(cl_buffers[0]); + int i; + starpu_codelet_unpack_args(cl_arg, &scalar); + + { + int workerid = starpu_worker_get_id(); + hwloc_cpuset_t worker_cpuset = starpu_worker_get_hwloc_cpuset(workerid); + hwloc_cpuset_t check_cpuset = starpurm_get_selected_cpuset(); +#if 0 + { + int strl1 = hwloc_bitmap_snprintf(NULL, 0, worker_cpuset); + char str1[strl1+1]; + hwloc_bitmap_snprintf(str1, strl1+1, worker_cpuset); + int strl2 = hwloc_bitmap_snprintf(NULL, 0, check_cpuset); + char str2[strl2+1]; + hwloc_bitmap_snprintf(str2, strl2+1, check_cpuset); + printf("worker[%03d] - task: vector=%p, n=%d, scalar=%lf, worker cpuset = %s, selected cpuset = %s\n", workerid, vector, n, scalar, str1, str2); + } +#endif + hwloc_bitmap_and(check_cpuset, check_cpuset, worker_cpuset); + assert(!hwloc_bitmap_iszero(check_cpuset)); + hwloc_bitmap_free(check_cpuset); + hwloc_bitmap_free(worker_cpuset); + } + + for (i = 0; i < n; i++) + { + vector[i] *= scalar; + } +} + +extern void vector_scale_cuda_func(void *cl_buffers[], void *cl_arg); + +static struct starpu_codelet vector_scale_cl = +{ + .cpu_funcs = {vector_scale_func}, + .cuda_funcs = {vector_scale_cuda_func}, + .cuda_flags = {STARPU_CUDA_ASYNC}, + .nbuffers = 1 +}; + +/* main routines */ +static void usage(void) +{ + fprintf(stderr, "usage: 05_vector_scale [VECTOR_SIZE]\n"); + exit(1); +} + +static void test1(const int N) +{ + float *vector = NULL; + const float scalar = 2.0; + starpu_data_handle_t vector_handle; + int ret; + + starpu_malloc((void **)&vector, N * sizeof(*vector)); + { + int i; + for (i = 0; i < N; i++) + { + vector[i] = i; + } + } + starpu_vector_data_register(&vector_handle, STARPU_MAIN_RAM, (uintptr_t)vector, N, sizeof(*vector)); + + ret = starpu_task_insert(&vector_scale_cl, + STARPU_RW, vector_handle, + STARPU_VALUE, &scalar, sizeof(scalar), + 0); + assert(ret == 0); + starpu_task_wait_for_all(); + + starpu_data_unregister(vector_handle); + { + int i; + for (i = 0; i < N; i++) + { + float d_i = i; + if (vector[i] != d_i*scalar) + { + fprintf(stderr, "%s: check_failed, vector[%d]: %f != %f\n", __func__, i, vector[i], d_i*scalar); + exit(1); + } + } + } + starpu_free_noflag(vector, N * sizeof(*vector)); +} + +static void test2(const int N, const int task_mult) +{ + float *vector = NULL; + const float scalar = 3.0; + starpu_data_handle_t vector_handle; + int ret; + + starpu_malloc((void **)&vector, N * sizeof(*vector)); + { + int i; + for (i = 0; i < N; i++) + { + vector[i] = i; + } + } + const int nparts = (rm_nb_cpu_units+rm_nb_cuda_units) * task_mult; + starpu_vector_data_register(&vector_handle, STARPU_MAIN_RAM, (uintptr_t)vector, N, sizeof(*vector)); + struct starpu_data_filter partition_filter = + { + .filter_func = starpu_vector_filter_block, + .nchildren = nparts + }; + + starpu_data_partition(vector_handle, &partition_filter); + + { + int i; + for (i = 0; i < nparts; i++) + { + starpu_data_handle_t sub_vector_handle = starpu_data_get_sub_data(vector_handle, 1, i); + ret = starpu_task_insert(&vector_scale_cl, + STARPU_RW, sub_vector_handle, + STARPU_VALUE, &scalar, sizeof(scalar), + 0); + assert(ret == 0); + } + } + starpu_task_wait_for_all(); + starpu_data_unpartition(vector_handle, STARPU_MAIN_RAM); + + starpu_data_unregister(vector_handle); + { + int i; + for (i = 0; i < N; i++) + { + float d_i = i; + if (vector[i] != d_i*scalar) + { + fprintf(stderr, "%s: check_failed, vector[%d]: %f != %f\n", __func__, i, vector[i], d_i*scalar); + exit(1); + } + } + } + starpu_free_noflag(vector, N * sizeof(*vector)); +} + +static void init_rm_infos(void) +{ + int cpu_type = starpurm_get_device_type_id("cpu"); + int nb_cpu_units = starpurm_get_nb_devices_by_type(cpu_type); + if (nb_cpu_units < 1) + { + /* No CPU unit available. */ + exit(77); + } + + int cuda_type = starpurm_get_device_type_id("cuda"); + int nb_cuda_units = starpurm_get_nb_devices_by_type(cuda_type); + if (nb_cuda_units < 1) + { + /* No CUDA unit available. */ + exit(77); + } + + rm_cpu_type_id = cpu_type; + rm_cuda_type_id = cuda_type; + rm_nb_cpu_units = nb_cpu_units; + rm_nb_cuda_units = nb_cuda_units; +} + +static void disp_selected_cpuset(void) +{ + hwloc_cpuset_t selected_cpuset = starpurm_get_selected_cpuset(); + int strl = hwloc_bitmap_snprintf(NULL, 0, selected_cpuset); + char str[strl+1]; + hwloc_bitmap_snprintf(str, strl+1, selected_cpuset); + printf("selected cpuset = %s\n", str); +} + +int main(int argc, char *argv[]) +{ + int param_N = 1000000; + int drs_enabled; + if (argc > 1) + { + param_N = atoi(argv[1]); + if (param_N < 1) + { + usage(); + } + } + + starpurm_initialize(); + init_rm_infos(); + printf("using default units\n"); + disp_selected_cpuset(); + test1(param_N); + test2(param_N, 1); + test2(param_N, 10); + test2(param_N, 100); + + if (rm_nb_cpu_units > 1 && rm_nb_cuda_units > 1) + { + int nb_cpus = rm_nb_cpu_units; + const int nb_cudas = rm_nb_cuda_units; + const int cuda_type = rm_cuda_type_id; + printf("nb_cpu_units = %d\n", nb_cpus); + printf("nb_cuda_units = %d\n", nb_cudas); + + /* Keep at least one CPU core */ + nb_cpus--; + + starpurm_set_drs_enable(NULL); + drs_enabled = starpurm_drs_enabled_p(); + assert(drs_enabled != 0); + + printf("withdrawing %d cpus from StarPU\n", nb_cpus); + starpurm_withdraw_cpus_from_starpu(NULL, nb_cpus); + disp_selected_cpuset(); + test2(param_N, 1); + test2(param_N, 10); + test2(param_N, 100); + + printf("assigning %d cpus to StarPU\n", nb_cpus); + starpurm_assign_cpus_to_starpu(NULL, nb_cpus); + disp_selected_cpuset(); + test2(param_N, 1); + test2(param_N, 10); + test2(param_N, 100); + + printf("withdrawing %d cuda devices from StarPU\n", nb_cudas); + starpurm_withdraw_devices_from_starpu(NULL, cuda_type, nb_cudas); + disp_selected_cpuset(); + test2(param_N, 1); + test2(param_N, 10); + test2(param_N, 100); + + printf("lending %d cuda devices to StarPU\n", nb_cudas); + starpurm_assign_devices_to_starpu(NULL, cuda_type, nb_cudas); + disp_selected_cpuset(); + test2(param_N, 1); + test2(param_N, 10); + test2(param_N, 100); + + starpurm_set_drs_disable(NULL); + drs_enabled = starpurm_drs_enabled_p(); + assert(drs_enabled == 0); + } + + starpurm_shutdown(); + return 0; +} diff --git a/starpurm/examples/cuda_vector_scale/vs_cuda_kernel.cu b/starpurm/examples/cuda_vector_scale/vs_cuda_kernel.cu new file mode 100644 index 0000000..303710b --- /dev/null +++ b/starpurm/examples/cuda_vector_scale/vs_cuda_kernel.cu @@ -0,0 +1,64 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2017-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* This example shows a basic StarPU vector scale app on top of StarPURM with a nVidia CUDA kernel */ + +#include +#include + +static __global__ void vector_scale_cuda_kernel(float *vector, unsigned n, float scalar) +{ + unsigned i = blockIdx.x*blockDim.x + threadIdx.x; + if (i < n) + { + vector[i] *= scalar; + } +} + +extern "C" void vector_scale_cuda_func(void *cl_buffers[], void *cl_arg) +{ + float scalar = -1.0; + unsigned n = STARPU_VECTOR_GET_NX(cl_buffers[0]); + float *vector = (float *)STARPU_VECTOR_GET_PTR(cl_buffers[0]); + starpu_codelet_unpack_args(cl_arg, &scalar); + + { + int workerid = starpu_worker_get_id(); + hwloc_cpuset_t worker_cpuset = starpu_worker_get_hwloc_cpuset(workerid); + hwloc_cpuset_t check_cpuset = starpurm_get_selected_cpuset(); +#if 0 + { + int strl1 = hwloc_bitmap_snprintf(NULL, 0, worker_cpuset); + char str1[strl1+1]; + hwloc_bitmap_snprintf(str1, strl1+1, worker_cpuset); + int strl2 = hwloc_bitmap_snprintf(NULL, 0, check_cpuset); + char str2[strl2+1]; + hwloc_bitmap_snprintf(str2, strl2+1, check_cpuset); + printf("worker[%03d] - task: vector=%p, n=%d, scalar=%lf, worker cpuset = %s, selected cpuset = %s\n", workerid, vector, n, scalar, str1, str2); + } +#endif + hwloc_bitmap_and(check_cpuset, check_cpuset, worker_cpuset); + assert(!hwloc_bitmap_iszero(check_cpuset)); + hwloc_bitmap_free(check_cpuset); + hwloc_bitmap_free(worker_cpuset); + } + + unsigned nb_threads_per_block = 64; + unsigned nb_blocks = (n + nb_threads_per_block-1) / nb_threads_per_block; + vector_scale_cuda_kernel<<>>(vector, n, scalar); + cudaError_t status = cudaGetLastError(); + if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status); +} diff --git a/starpurm/examples/spawn.c b/starpurm/examples/spawn.c new file mode 100644 index 0000000..f89cbcb --- /dev/null +++ b/starpurm/examples/spawn.c @@ -0,0 +1,270 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2017-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* This example shows a basic StarPU vector scale app on top of StarPURM, + * making use of both the main RM API and the spawn_kernel_on_cpus API func */ + +#include +#include +#include +#include +#include + +static int rm_cpu_type_id = -1; +static int rm_nb_cpu_units = 0; + +static void usage(void); +static void test1(const int N); +static void test2(const int N, const int task_mult); +static void init_rm_infos(void); + +/* vector scale codelet */ +static void vector_scale_func(void *cl_buffers[], void *cl_arg) +{ + double scalar = -1.0; + int n = STARPU_VECTOR_GET_NX(cl_buffers[0]); + double *vector = (double *)STARPU_VECTOR_GET_PTR(cl_buffers[0]); + int i; + starpu_codelet_unpack_args(cl_arg, &scalar); + + int workerid = starpu_worker_get_id(); + hwloc_cpuset_t worker_cpuset = starpu_worker_get_hwloc_cpuset(workerid); + { + int strl1 = hwloc_bitmap_snprintf(NULL, 0, worker_cpuset); + char str1[strl1+1]; + hwloc_bitmap_snprintf(str1, strl1+1, worker_cpuset); + printf("worker[%03d] - task: vector=%p, n=%d, scalar=%lf, worker cpuset = %s\n", workerid, vector, n, scalar, str1); + } + hwloc_bitmap_free(worker_cpuset); + + for (i = 0; i < n; i++) + { + vector[i] *= scalar; + } +} + +static struct starpu_codelet vector_scale_cl = +{ + .cpu_funcs = {vector_scale_func}, + .nbuffers = 1 +}; + +/* main routines */ +static void usage(void) +{ + fprintf(stderr, "usage: 05_vector_scale [VECTOR_SIZE]\n"); + exit(1); +} + +static void test1(const int N) +{ + double *vector = NULL; + const double scalar = 2.0; + starpu_data_handle_t vector_handle; + int ret; + + starpu_malloc((void **)&vector, N * sizeof(*vector)); + { + int i; + for (i = 0; i < N; i++) + { + vector[i] = i; + } + } + starpu_vector_data_register(&vector_handle, STARPU_MAIN_RAM, (uintptr_t)vector, N, sizeof(*vector)); + + ret = starpu_task_insert(&vector_scale_cl, + STARPU_RW, vector_handle, + STARPU_VALUE, &scalar, sizeof(scalar), + 0); + assert(ret == 0); + starpu_task_wait_for_all(); + + starpu_data_unregister(vector_handle); + { + int i; + for (i = 0; i < N; i++) + { + double d_i = i; + if (vector[i] != d_i*scalar) + { + fprintf(stderr, "%s: check_failed\n", __func__); + exit(1); + } + } + } + starpu_free_noflag(vector, N * sizeof(*vector)); +} + +static void test2(const int N, const int task_mult) +{ + double *vector = NULL; + const double scalar = 3.0; + starpu_data_handle_t vector_handle; + int ret; + + starpu_malloc((void **)&vector, N * sizeof(*vector)); + { + int i; + for (i = 0; i < N; i++) + { + vector[i] = i; + } + } + starpu_vector_data_register(&vector_handle, STARPU_MAIN_RAM, (uintptr_t)vector, N, sizeof(*vector)); + struct starpu_data_filter partition_filter = + { + .filter_func = starpu_vector_filter_block, + .nchildren = rm_nb_cpu_units * task_mult + }; + + starpu_data_partition(vector_handle, &partition_filter); + + { + int i; + for (i = 0; i < rm_nb_cpu_units*task_mult; i++) + { + starpu_data_handle_t sub_vector_handle = starpu_data_get_sub_data(vector_handle, 1, i); + ret = starpu_task_insert(&vector_scale_cl, + STARPU_RW, sub_vector_handle, + STARPU_VALUE, &scalar, sizeof(scalar), + 0); + assert(ret == 0); + } + } + starpu_task_wait_for_all(); + starpu_data_unpartition(vector_handle, STARPU_MAIN_RAM); + + starpu_data_unregister(vector_handle); + { + int i; + for (i = 0; i < N; i++) + { + double d_i = i; + if (vector[i] != d_i*scalar) + { + fprintf(stderr, "%s: check_failed\n", __func__); + exit(1); + } + } + } + starpu_free_noflag(vector, N * sizeof(*vector)); +} + +static void init_rm_infos(void) +{ + int cpu_type = starpurm_get_device_type_id("cpu"); + int nb_cpu_units = starpurm_get_nb_devices_by_type(cpu_type); + if (nb_cpu_units < 1) + { + /* No CPU unit available. */ + exit(77); + } + + rm_cpu_type_id = cpu_type; + rm_nb_cpu_units = nb_cpu_units; +} + +static void kernel_to_spawn(void *args) +{ + int param_N = *(int*)args; + test1(param_N); + test2(param_N, 1); + test2(param_N, 10); + test2(param_N, 100); +} + +int main(int argc, char *argv[]) +{ + int param_N = 1000000; + int drs_enabled; + if (argc > 1) + { + param_N = atoi(argv[1]); + if (param_N < 1) + { + usage(); + } + } + + starpurm_initialize(); + init_rm_infos(); + if (rm_nb_cpu_units > 1) + { + const int half_nb_cpus = rm_nb_cpu_units/2; + starpurm_set_drs_enable(NULL); + drs_enabled = starpurm_drs_enabled_p(); + assert(drs_enabled != 0); + + { + hwloc_cpuset_t cpu_cpuset = starpurm_get_all_cpu_workers_cpuset(); + { + int strl1 = hwloc_bitmap_snprintf(NULL, 0, cpu_cpuset); + char str1[strl1+1]; + hwloc_bitmap_snprintf(str1, strl1+1, cpu_cpuset); + printf("all cpus cpuset = %s\n", str1); + } + int first_idx = hwloc_bitmap_first(cpu_cpuset); + int last_idx = hwloc_bitmap_last(cpu_cpuset); + hwloc_cpuset_t sel_cpuset = hwloc_bitmap_alloc(); + assert(sel_cpuset != NULL); + int count = 0; + int idx = first_idx; + while (idx != -1 && idx <= last_idx && count < half_nb_cpus) + { + if (hwloc_bitmap_isset(cpu_cpuset, idx)) + { + hwloc_bitmap_set(sel_cpuset, idx); + count ++; + } + idx = hwloc_bitmap_next(cpu_cpuset, idx); + } + assert(count == half_nb_cpus); + + { + int strl1 = hwloc_bitmap_snprintf(NULL, 0, sel_cpuset); + char str1[strl1+1]; + hwloc_bitmap_snprintf(str1, strl1+1, sel_cpuset); + printf("spawning a kernel on cpuset = %s\n", str1); + } + starpurm_spawn_kernel_on_cpus(NULL, kernel_to_spawn, ¶m_N, sel_cpuset); + + hwloc_bitmap_free(sel_cpuset); + hwloc_bitmap_free(cpu_cpuset); + } + + printf("withdrawing %d cpus from StarPU\n", half_nb_cpus); + starpurm_withdraw_cpus_from_starpu(NULL, half_nb_cpus); + test1(param_N); + test2(param_N, 1); + test2(param_N, 10); + test2(param_N, 100); + + printf("assigning %d cpus to StarPU\n", half_nb_cpus); + starpurm_assign_cpus_to_starpu(NULL, half_nb_cpus); + test1(param_N); + test2(param_N, 1); + test2(param_N, 10); + test2(param_N, 100); + + starpurm_set_drs_disable(NULL); + drs_enabled = starpurm_drs_enabled_p(); + assert(drs_enabled == 0); + } + + starpurm_shutdown(); + return 0; +} diff --git a/starpurm/examples/vector_scale.c b/starpurm/examples/vector_scale.c new file mode 100644 index 0000000..e35e8fe --- /dev/null +++ b/starpurm/examples/vector_scale.c @@ -0,0 +1,248 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2017-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* This example shows a basic StarPU vector scale app on top of StarPURM */ + +#include +#include +#include +#include +#include + +static int rm_cpu_type_id = -1; +static int rm_nb_cpu_units = 0; + +static void usage(void); +static void test1(const int N); +static void test2(const int N, const int task_mult); +static void init_rm_infos(void); + +/* vector scale codelet */ +static void vector_scale_func(void *cl_buffers[], void *cl_arg) +{ + double scalar = -1.0; + int n = STARPU_VECTOR_GET_NX(cl_buffers[0]); + double *vector = (double *)STARPU_VECTOR_GET_PTR(cl_buffers[0]); + int i; + starpu_codelet_unpack_args(cl_arg, &scalar); + + int workerid = starpu_worker_get_id(); + hwloc_cpuset_t worker_cpuset = starpu_worker_get_hwloc_cpuset(workerid); + hwloc_cpuset_t check_cpuset = starpurm_get_selected_cpuset(); + { + int strl1 = hwloc_bitmap_snprintf(NULL, 0, worker_cpuset); + char str1[strl1+1]; + hwloc_bitmap_snprintf(str1, strl1+1, worker_cpuset); + int strl2 = hwloc_bitmap_snprintf(NULL, 0, check_cpuset); + char str2[strl2+1]; + hwloc_bitmap_snprintf(str2, strl2+1, check_cpuset); + printf("worker[%03d] - task: vector=%p, n=%d, scalar=%lf, worker cpuset = %s, selected cpuset = %s\n", workerid, vector, n, scalar, str1, str2); + } + hwloc_bitmap_and(check_cpuset, check_cpuset, worker_cpuset); + assert(!hwloc_bitmap_iszero(check_cpuset)); + hwloc_bitmap_free(check_cpuset); + hwloc_bitmap_free(worker_cpuset); + + for (i = 0; i < n; i++) + { + vector[i] *= scalar; + } +} + +static struct starpu_codelet vector_scale_cl = +{ + .cpu_funcs = {vector_scale_func}, + .nbuffers = 1 +}; + +/* main routines */ +static void usage(void) +{ + fprintf(stderr, "usage: 05_vector_scale [VECTOR_SIZE]\n"); + exit(1); +} + +static void test1(const int N) +{ + double *vector = NULL; + const double scalar = 2.0; + starpu_data_handle_t vector_handle; + int ret; + + starpu_malloc((void **)&vector, N * sizeof(*vector)); + { + int i; + for (i = 0; i < N; i++) + { + vector[i] = i; + } + } + starpu_vector_data_register(&vector_handle, STARPU_MAIN_RAM, (uintptr_t)vector, N, sizeof(*vector)); + + ret = starpu_task_insert(&vector_scale_cl, + STARPU_RW, vector_handle, + STARPU_VALUE, &scalar, sizeof(scalar), + 0); + assert(ret == 0); + starpu_task_wait_for_all(); + + starpu_data_unregister(vector_handle); + { + int i; + for (i = 0; i < N; i++) + { + double d_i = i; + if (vector[i] != d_i*scalar) + { + fprintf(stderr, "%s: check_failed\n", __func__); + exit(1); + } + } + } + starpu_free_noflag(vector, N * sizeof(*vector)); +} + +static void test2(const int N, const int task_mult) +{ + double *vector = NULL; + const double scalar = 3.0; + starpu_data_handle_t vector_handle; + int ret; + + starpu_malloc((void **)&vector, N * sizeof(*vector)); + { + int i; + for (i = 0; i < N; i++) + { + vector[i] = i; + } + } + starpu_vector_data_register(&vector_handle, STARPU_MAIN_RAM, (uintptr_t)vector, N, sizeof(*vector)); + struct starpu_data_filter partition_filter = + { + .filter_func = starpu_vector_filter_block, + .nchildren = rm_nb_cpu_units * task_mult + }; + + starpu_data_partition(vector_handle, &partition_filter); + + { + int i; + for (i = 0; i < rm_nb_cpu_units*task_mult; i++) + { + starpu_data_handle_t sub_vector_handle = starpu_data_get_sub_data(vector_handle, 1, i); + ret = starpu_task_insert(&vector_scale_cl, + STARPU_RW, sub_vector_handle, + STARPU_VALUE, &scalar, sizeof(scalar), + 0); + assert(ret == 0); + } + } + starpu_task_wait_for_all(); + starpu_data_unpartition(vector_handle, STARPU_MAIN_RAM); + + starpu_data_unregister(vector_handle); + { + int i; + for (i = 0; i < N; i++) + { + double d_i = i; + if (vector[i] != d_i*scalar) + { + fprintf(stderr, "%s: check_failed\n", __func__); + exit(1); + } + } + } + starpu_free_noflag(vector, N * sizeof(*vector)); +} + +static void init_rm_infos(void) +{ + int cpu_type = starpurm_get_device_type_id("cpu"); + int nb_cpu_units = starpurm_get_nb_devices_by_type(cpu_type); + if (nb_cpu_units < 1) + { + /* No CPU unit available. */ + exit(77); + } + + rm_cpu_type_id = cpu_type; + rm_nb_cpu_units = nb_cpu_units; +} + +int main(int argc, char *argv[]) +{ + int param_N = 1000000; + int drs_enabled; + if (argc > 1) + { + param_N = atoi(argv[1]); + if (param_N < 1) + { + usage(); + } + } + + starpurm_initialize(); + init_rm_infos(); + test1(param_N); + test2(param_N, 1); + test2(param_N, 10); + test2(param_N, 100); + + if (rm_nb_cpu_units > 1) + { + const int half_nb_cpus = rm_nb_cpu_units/2; + starpurm_set_drs_enable(NULL); + drs_enabled = starpurm_drs_enabled_p(); + assert(drs_enabled != 0); + + printf("withdrawing %d cpus from StarPU\n", half_nb_cpus); + starpurm_withdraw_cpus_from_starpu(NULL, half_nb_cpus); + test2(param_N, 1); + test2(param_N, 10); + test2(param_N, 100); + + printf("assigning %d cpus to StarPU\n", half_nb_cpus); + starpurm_assign_cpus_to_starpu(NULL, half_nb_cpus); + test2(param_N, 1); + test2(param_N, 10); + test2(param_N, 100); + + int i; + for (i = rm_nb_cpu_units-1; i > 0; i--) + { + starpurm_set_max_parallelism(NULL, i); + test2(param_N, 10); + } + + printf("withdrawing all cpus from StarPU\n"); + starpurm_withdraw_all_cpus_from_starpu(NULL); + printf("assigning %d cpus to StarPU\n", rm_nb_cpu_units); + starpurm_assign_cpus_to_starpu(NULL, rm_nb_cpu_units); + test2(param_N, 1); + test2(param_N, 10); + test2(param_N, 100); + + starpurm_set_drs_disable(NULL); + drs_enabled = starpurm_drs_enabled_p(); + assert(drs_enabled == 0); + } + + starpurm_shutdown(); + return 0; +} diff --git a/starpurm/include/starpurm.h b/starpurm/include/starpurm.h new file mode 100644 index 0000000..a7acc68 --- /dev/null +++ b/starpurm/include/starpurm.h @@ -0,0 +1,487 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2017-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __STARPURM_H +#define __STARPURM_H +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + @defgroup API_Interop_Support Interoperability Support + @brief API to interoperate with other runtime systems. + @{ +*/ + +/** + StarPU Resource Manager return type. +*/ +enum e_starpurm_drs_ret +{ + starpurm_DRS_SUCCESS = 0, /**< Dynamic resource sharing operation succeeded. */ + starpurm_DRS_DISABLD = -1, /**< Dynamic resource sharing is disabled. */ + starpurm_DRS_PERM = -2, /**< Dynamic resource sharing operation is not authorized or implemented. */ + starpurm_DRS_EINVAL = -3 /**< Dynamic resource sharing operation has been called with one or more invalid parameters. */ +#if 0 + /* Unused for now */ + starpurm_DRS_NOTED, + starpurm_DRS_REQST +#endif +}; +typedef int starpurm_drs_ret_t; +typedef void *starpurm_drs_desc_t; +typedef void *starpurm_drs_cbs_t; +typedef void (*starpurm_drs_cb_t)(void *); +typedef void *starpurm_block_cond_t; +typedef int (*starpurm_polling_t)(void *); + +/** + @name Initialisation + @{ +*/ + +/** + Resource enforcement +*/ +void starpurm_initialize_with_cpuset(hwloc_cpuset_t initially_owned_cpuset); + +/** + Initialize StarPU and the StarPU-RM resource management module. The + starpu_init() function should not have been called before the call + to starpurm_initialize(). The starpurm_initialize() function will + take care of this +*/ +void starpurm_initialize(void); + +/** + Shutdown StarPU-RM and StarPU. The starpu_shutdown() function + should not be called before. The starpurm_shutdown() function will + take care of this. +*/ +void starpurm_shutdown(void); + +/** @} */ + +/** + @name Spawn + @{ +*/ + +/** + Allocate a temporary context spanning the units selected in the + cpuset bitmap, set it as the default context for the current + thread, and call user function \p f. Upon the return of user + function \p f, the temporary context is freed and the previous + default context for the current thread is restored. +*/ +void starpurm_spawn_kernel_on_cpus(void *data, void (*f)(void *), void *args, hwloc_cpuset_t cpuset); + +/** + Spawn a POSIX thread and returns immediately. The thread spawned + will allocate a temporary context spanning the units selected in + the cpuset bitmap, set it as the default context for the current + thread, and call user function \p f. Upon the return of user + function \p f, the temporary context will be freed and the previous + default context for the current thread restored. A user specified + callback \p cb_f will be called just before the termination of the + thread. +*/ +void starpurm_spawn_kernel_on_cpus_callback(void *data, void (*f)(void *), void *args, hwloc_cpuset_t cpuset, void (*cb_f)(void *), void *cb_args); + +void starpurm_spawn_kernel_callback(void *data, void (*f)(void *), void *args, void (*cb_f)(void *), void *cb_args); + +/** @} */ + +/** + @name DynamicResourceSharing + @{ +*/ + +/** + Turn-on dynamic resource sharing support. +*/ +starpurm_drs_ret_t starpurm_set_drs_enable(starpurm_drs_desc_t *spd); + +/** + Turn-off dynamic resource sharing support. +*/ +starpurm_drs_ret_t starpurm_set_drs_disable(starpurm_drs_desc_t *spd); + +/** + Return the state of the dynamic resource sharing support (\p =!0 + enabled, \p =0 disabled). +*/ +int starpurm_drs_enabled_p(void); + +/** + Set the maximum number of CPU computing units available for StarPU + computations to \p max. This number cannot exceed the maximum + number of StarPU's CPU worker allocated at start-up time. +*/ +starpurm_drs_ret_t starpurm_set_max_parallelism(starpurm_drs_desc_t *spd, int max); + +#if 0 +/* Unused for now */ +starpurm_drs_ret_t starpurm_callback_set(starpurm_drs_desc_t *spd, starpurm_drs_cbs_t which, starpurm_drs_cb_t callback); +starpurm_drs_ret_t starpurm_callback_get(starpurm_drs_desc_t *spd, starpurm_drs_cbs_t which, starpurm_drs_cb_t *callback); +#endif + +/** + Extend StarPU's default scheduling context to execute tasks on + worker corresponding to logical unit \p cpuid. If StarPU does not + have a worker thread initialized for logical unit \p cpuid, do + nothing. +*/ +starpurm_drs_ret_t starpurm_assign_cpu_to_starpu(starpurm_drs_desc_t *spd, int cpuid); + +/** + Extend StarPU's default scheduling context to execute tasks on \p + ncpus more workers, up to the number of StarPU worker threads + initialized. +*/ +starpurm_drs_ret_t starpurm_assign_cpus_to_starpu(starpurm_drs_desc_t *spd, int ncpus); + +/** + Extend StarPU's default scheduling context to execute tasks on the + additional logical units selected in \p mask. Logical units of \p + mask for which no StarPU worker is initialized are silently ignored. +*/ +starpurm_drs_ret_t starpurm_assign_cpu_mask_to_starpu(starpurm_drs_desc_t *spd, const hwloc_cpuset_t mask); + +/** + Set StarPU's default scheduling context to execute tasks on all + available logical units for which a StarPU worker has been + initialized. +*/ +starpurm_drs_ret_t starpurm_assign_all_cpus_to_starpu(starpurm_drs_desc_t *spd); + +/** + Shrink StarPU's default scheduling context so as to not execute + tasks on worker corresponding to logical unit \p cpuid. If StarPU + does not have a worker thread initialized for logical unit \p + cpuid, do nothing. +*/ +starpurm_drs_ret_t starpurm_withdraw_cpu_from_starpu(starpurm_drs_desc_t *spd, int cpuid); + +/** + Shrink StarPU's default scheduling context to execute tasks on \p + ncpus less workers. +*/ +starpurm_drs_ret_t starpurm_withdraw_cpus_from_starpu(starpurm_drs_desc_t *spd, int ncpus); + +/** + Shrink StarPU's default scheduling context so as to not execute + tasks on the logical units selected in \p mask. Logical units of \p + mask for which no StarPU worker is initialized are silently ignored. +*/ +starpurm_drs_ret_t starpurm_withdraw_cpu_mask_from_starpu(starpurm_drs_desc_t *spd, const hwloc_cpuset_t mask); + +/** + Shrink StarPU's default scheduling context so as to remove all + logical units. +*/ +starpurm_drs_ret_t starpurm_withdraw_all_cpus_from_starpu(starpurm_drs_desc_t *spd); + +/* --- */ + +/** + Synonym for starpurm_assign_all_cpus_to_starpu(). +*/ +starpurm_drs_ret_t starpurm_lend(starpurm_drs_desc_t *spd); + +/** + Synonym for starpurm_assign_cpu_to_starpu(). +*/ +starpurm_drs_ret_t starpurm_lend_cpu(starpurm_drs_desc_t *spd, int cpuid); + +/** + Synonym for starpurm_assign_cpus_to_starpu(). +*/ +starpurm_drs_ret_t starpurm_lend_cpus(starpurm_drs_desc_t *spd, int ncpus); + +/** + Synonym for starpurm_assign_cpu_mask_to_starpu(). +*/ +starpurm_drs_ret_t starpurm_lend_cpu_mask(starpurm_drs_desc_t *spd, const hwloc_cpuset_t mask); + +/** + Synonym for starpurm_withdraw_all_cpus_from_starpu(). +*/ +starpurm_drs_ret_t starpurm_reclaim(starpurm_drs_desc_t *spd); + +/** + Synonym for starpurm_withdraw_cpu_from_starpu(). +*/ +starpurm_drs_ret_t starpurm_reclaim_cpu(starpurm_drs_desc_t *spd, int cpuid); + +/** + Synonym for starpurm_withdraw_cpus_from_starpu(). +*/ +starpurm_drs_ret_t starpurm_reclaim_cpus(starpurm_drs_desc_t *spd, int ncpus); + +/** + Synonym for starpurm_withdraw_cpu_mask_from_starpu(). +*/ +starpurm_drs_ret_t starpurm_reclaim_cpu_mask(starpurm_drs_desc_t *spd, const hwloc_cpuset_t mask); + +/** + Synonym for starpurm_withdraw_all_cpus_from_starpu(). +*/ +starpurm_drs_ret_t starpurm_acquire(starpurm_drs_desc_t *spd); + +/** + Synonym for starpurm_withdraw_cpu_from_starpu(). +*/ +starpurm_drs_ret_t starpurm_acquire_cpu(starpurm_drs_desc_t *spd, int cpuid); + +/** + Synonym for starpurm_withdraw_cpus_from_starpu(). +*/ +starpurm_drs_ret_t starpurm_acquire_cpus(starpurm_drs_desc_t *spd, int ncpus); + +/** + Synonym for starpurm_withdraw_cpu_mask_from_starpu(). +*/ +starpurm_drs_ret_t starpurm_acquire_cpu_mask(starpurm_drs_desc_t *spd, const hwloc_cpuset_t mask); + +/** + Synonym for starpurm_assign_all_cpus_to_starpu(). +*/ +starpurm_drs_ret_t starpurm_return_all(starpurm_drs_desc_t *spd); + +/** + Synonym for starpurm_assign_cpu_to_starpu(). +*/ +starpurm_drs_ret_t starpurm_return_cpu(starpurm_drs_desc_t *spd, int cpuid); + +#if 0 +/* Pause/resume (not implemented) */ +starpurm_drs_ret_t starpurm_create_block_condition(starpurm_block_cond_t *cond); +void starpurm_block_current_task(starpurm_block_cond_t *cond); +void starpurm_signal_block_condition(starpurm_block_cond_t *cond); + +void starpurm_register_polling_service(const char *service_name, starpurm_polling_t function, void *data); +void starpurm_unregister_polling_service(const char *service_name, starpurm_polling_t function, void *data); +#endif + +/** @} */ + +/** + @name Devices + @{ +*/ + +/** + Return the device type ID constant associated to the device type name. + Valid names for \p type_str are: + - \c "cpu": regular CPU unit; + - \c "opencl": OpenCL device unit; + - \c "cuda": nVidia CUDA device unit; +*/ +int starpurm_get_device_type_id(const char *type_str); + +/** + Return the device type name associated to the device type ID + constant. +*/ +const char *starpurm_get_device_type_name(int type_id); + +/** + Return the number of initialized StarPU worker for the device type + \p type_id. +*/ +int starpurm_get_nb_devices_by_type(int type_id); + +/** + Return the unique ID assigned to the \p device_rank nth device of + type \p type_id. +*/ +int starpurm_get_device_id(int type_id, int device_rank); + +/** + Extend StarPU's default scheduling context to use \p unit_rank nth + device of type \p type_id. +*/ +starpurm_drs_ret_t starpurm_assign_device_to_starpu(starpurm_drs_desc_t *spd, int type_id, int unit_rank); + +/** + Extend StarPU's default scheduling context to use \p ndevices more + devices of type \p type_id, up to the number of StarPU workers + initialized for such device type. + */ +starpurm_drs_ret_t starpurm_assign_devices_to_starpu(starpurm_drs_desc_t *spd, int type_id, int ndevices); + +/** + Extend StarPU's default scheduling context to use additional + devices as designated by their corresponding StarPU worker + thread(s) CPU-set \p mask. + */ +starpurm_drs_ret_t starpurm_assign_device_mask_to_starpu(starpurm_drs_desc_t *spd, const hwloc_cpuset_t mask); + +/** + Extend StarPU's default scheduling context to use all devices of + type \p type_id for which it has a worker thread initialized. +*/ +starpurm_drs_ret_t starpurm_assign_all_devices_to_starpu(starpurm_drs_desc_t *spd, int type_id); + +/** + Shrink StarPU's default scheduling context to not use \p unit_rank + nth device of type \p type_id. + */ +starpurm_drs_ret_t starpurm_withdraw_device_from_starpu(starpurm_drs_desc_t *spd, int type_id, int unit_rank); + +/** + Shrink StarPU's default scheduling context to use \p ndevices less + devices of type \p type_id. +*/ +starpurm_drs_ret_t starpurm_withdraw_devices_from_starpu(starpurm_drs_desc_t *spd, int type_id, int ndevices); + +/** + Shrink StarPU's default scheduling context to not use devices + designated by their corresponding StarPU worker thread(s) CPU-set + \p mask. +*/ +starpurm_drs_ret_t starpurm_withdraw_device_mask_from_starpu(starpurm_drs_desc_t *spd, const hwloc_cpuset_t mask); + +/** + Shrink StarPU's default scheduling context to use no devices of + type \p type_id. +*/ +starpurm_drs_ret_t starpurm_withdraw_all_devices_from_starpu(starpurm_drs_desc_t *spd, int type_id); + +/* --- */ + +/** + Synonym for starpurm_assign_device_to_starpu(). +*/ +starpurm_drs_ret_t starpurm_lend_device(starpurm_drs_desc_t *spd, int type_id, int unit_rank); + +/** + Synonym for starpurm_assign_devices_to_starpu(). +*/ +starpurm_drs_ret_t starpurm_lend_devices(starpurm_drs_desc_t *spd, int type_id, int ndevices); + +/** + Synonym for starpurm_assign_device_mask_to_starpu(). +*/ +starpurm_drs_ret_t starpurm_lend_device_mask(starpurm_drs_desc_t *spd, const hwloc_cpuset_t mask); + +/** + Synonym for starpurm_assign_all_devices_to_starpu(). +*/ +starpurm_drs_ret_t starpurm_lend_all_devices(starpurm_drs_desc_t *spd, int type_id); + +/** + Synonym for starpurm_withdraw_device_from_starpu(). +*/ +starpurm_drs_ret_t starpurm_reclaim_device(starpurm_drs_desc_t *spd, int type_id, int unit_rank); + +/** + Synonym for starpurm_withdraw_devices_from_starpu(). +*/ +starpurm_drs_ret_t starpurm_reclaim_devices(starpurm_drs_desc_t *spd, int type_id, int ndevices); + +/** + Synonym for starpurm_withdraw_device_mask_from_starpu(). +*/ +starpurm_drs_ret_t starpurm_reclaim_device_mask(starpurm_drs_desc_t *spd, const hwloc_cpuset_t mask); + +/** + Synonym for starpurm_withdraw_all_devices_from_starpu(). +*/ +starpurm_drs_ret_t starpurm_reclaim_all_devices(starpurm_drs_desc_t *spd, int type_id); + +/** + Synonym for starpurm_withdraw_device_from_starpu(). +*/ +starpurm_drs_ret_t starpurm_acquire_device(starpurm_drs_desc_t *spd, int type_id, int unit_rank); + +/** + Synonym for starpurm_withdraw_devices_from_starpu(). +*/ +starpurm_drs_ret_t starpurm_acquire_devices(starpurm_drs_desc_t *spd, int type_id, int ndevices); + +/** + Synonym for starpurm_withdraw_device_mask_from_starpu(). +*/ +starpurm_drs_ret_t starpurm_acquire_device_mask(starpurm_drs_desc_t *spd, const hwloc_cpuset_t mask); + +/** + Synonym for starpurm_withdraw_all_devices_from_starpu(). +*/ +starpurm_drs_ret_t starpurm_acquire_all_devices(starpurm_drs_desc_t *spd, int type_id); + +/** + Synonym for starpurm_assign_all_devices_to_starpu(). +*/ +starpurm_drs_ret_t starpurm_return_all_devices(starpurm_drs_desc_t *spd, int type_id); + +/** + Synonym for starpurm_assign_device_to_starpu(). +*/ +starpurm_drs_ret_t starpurm_return_device(starpurm_drs_desc_t *spd, int type_id, int unit_rank); + +/** @} */ + +/** + @name CpusetsQueries + @{ +*/ + +/** + Return the CPU-set of the StarPU worker associated to the \p + unit_rank nth unit of type \p type_id. +*/ +hwloc_cpuset_t starpurm_get_device_worker_cpuset(int type_id, int unit_rank); + +/** + Return the cumulated CPU-set of all StarPU worker threads. +*/ +hwloc_cpuset_t starpurm_get_global_cpuset(void); + +/** + Return the CPU-set of the StarPU worker threads currently selected + in the default StarPU's scheduling context. + */ +hwloc_cpuset_t starpurm_get_selected_cpuset(void); + +/** + Return the cumulated CPU-set of all CPU StarPU worker threads. +*/ +hwloc_cpuset_t starpurm_get_all_cpu_workers_cpuset(void); + +/** + Return the cumulated CPU-set of all "non-CPU" StarPU worker + threads. + */ +hwloc_cpuset_t starpurm_get_all_device_workers_cpuset(void); + +/** + Return the cumulated CPU-set of all StarPU worker threads for + devices of type \p typeid. +*/ +hwloc_cpuset_t starpurm_get_all_device_workers_cpuset_by_type(int typeid); + +/** @} */ +/** @} */ + +#ifdef __cplusplus +} +#endif +#endif /* __STARPURM_H */ diff --git a/starpurm/include/starpurm_config.h.in b/starpurm/include/starpurm_config.h.in new file mode 100644 index 0000000..c5a7b14 --- /dev/null +++ b/starpurm/include/starpurm_config.h.in @@ -0,0 +1,29 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __STARPURM_CONFIG_H__ +#define __STARPURM_CONFIG_H__ + +/* Major version number of StarPU RM. */ +#undef STARPURM_MAJOR_VERSION + +/* Minor version number of StarPU RM. */ +#undef STARPURM_MINOR_VERSION + +/* Release version number of StarPU RM. */ +#undef STARPURM_RELEASE_VERSION + +#endif diff --git a/starpurm/packages/starpurm-1.3.pc.in b/starpurm/packages/starpurm-1.3.pc.in new file mode 100644 index 0000000..5e7c07b --- /dev/null +++ b/starpurm/packages/starpurm-1.3.pc.in @@ -0,0 +1,28 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +prefix=@prefix@ +exec_prefix=@exec_prefix@ +libdir=@libdir@ +pkglibdir=@pkglibdir@ +includedir=@includedir@ + +Name: starpurm +Description: resource management layer on top of StarPU +Version: @PACKAGE_VERSION@ +Cflags: -I${includedir}/starpurm/@STARPU_EFFECTIVE_VERSION@ @DLB_CFLAGS@ +Libs: -L${libdir} -lstarpurm-@STARPU_EFFECTIVE_VERSION@ @DLB_LIBS@ +Libs.private: @LDFLAGS@ @LIBS@ +Requires: starpu-1.3 hwloc diff --git a/starpurm/packages/starpurm-1.4.pc.in b/starpurm/packages/starpurm-1.4.pc.in new file mode 100644 index 0000000..5bb6355 --- /dev/null +++ b/starpurm/packages/starpurm-1.4.pc.in @@ -0,0 +1,28 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +prefix=@prefix@ +exec_prefix=@exec_prefix@ +libdir=@libdir@ +pkglibdir=@pkglibdir@ +includedir=@includedir@ + +Name: starpurm +Description: resource management layer on top of StarPU +Version: @PACKAGE_VERSION@ +Cflags: -I${includedir}/starpurm/@STARPU_EFFECTIVE_VERSION@ @DLB_CFLAGS@ +Libs: -L${libdir} -lstarpurm-@STARPU_EFFECTIVE_VERSION@ @DLB_LIBS@ +Libs.private: @LDFLAGS@ @LIBS@ +Requires: starpu-1.4 hwloc diff --git a/starpurm/src/Makefile.am b/starpurm/src/Makefile.am new file mode 100644 index 0000000..7f169dd --- /dev/null +++ b/starpurm/src/Makefile.am @@ -0,0 +1,41 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2017-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +include $(top_srcdir)/make/starpu-notests.mk + +SUBDIRS = + +CLEANFILES = *.gcno *.gcda *.linkinfo + +AM_CPPFLAGS = -I$(top_srcdir)/include -I$(top_srcdir)/src -I$(top_builddir)/src -I$(top_builddir)/include +AM_CPPFLAGS += -I$(top_srcdir)/starpurm/include -I$(top_srcdir)/starpurm/src -I$(top_builddir)/starpurm/src -I$(top_builddir)/starpurm/include $(STARPU_H_CPPFLAGS) +LIBS += $(top_builddir)/src/@LIBSTARPU_LINK@ $(STARPU_EXPORTED_LIBS) $(HWLOC_LIBS) + +libstarpurm_so_version = $(LIBSTARPURM_INTERFACE_CURRENT):$(LIBSTARPURM_INTERFACE_REVISION):$(LIBSTARPURM_INTERFACE_AGE) + +lib_LTLIBRARIES = libstarpurm-@STARPU_EFFECTIVE_VERSION@.la + +libstarpurm_@STARPU_EFFECTIVE_VERSION@_la_LDFLAGS = $(ldflags) -no-undefined -version-info $(libstarpurm_so_version) +libstarpurm_@STARPU_EFFECTIVE_VERSION@_la_SOURCES = \ + starpurm.c + +if STARPURM_HAVE_DLB +libstarpurm_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += \ + starpurm_dlb.c +endif + +noinst_HEADERS = \ + starpurm_private.h diff --git a/starpurm/src/Makefile.in b/starpurm/src/Makefile.in new file mode 100644 index 0000000..90b48b5 --- /dev/null +++ b/starpurm/src/Makefile.in @@ -0,0 +1,1118 @@ +# Makefile.in generated by automake 1.16.5 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2021 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2017-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +target_triplet = @target@ +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@am__append_1 = --compiler-options -fno-strict-aliasing -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ $(STARPU_NVCC_H_CPPFLAGS) +@STARPU_USE_HIP_TRUE@am__append_2 = -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ +@STARPURM_HAVE_DLB_TRUE@am__append_3 = \ +@STARPURM_HAVE_DLB_TRUE@ starpurm_dlb.c + +subdir = starpurm/src +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ + $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ + $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ + $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(noinst_HEADERS) \ + $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/src/common/config.h \ + $(top_builddir)/src/common/config-src-build.h \ + $(top_builddir)/include/starpu_config.h \ + $(top_builddir)/starpurm/include/starpurm_config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__installdirs = "$(DESTDIR)$(libdir)" +LTLIBRARIES = $(lib_LTLIBRARIES) +libstarpurm_@STARPU_EFFECTIVE_VERSION@_la_LIBADD = +am__libstarpurm_@STARPU_EFFECTIVE_VERSION@_la_SOURCES_DIST = \ + starpurm.c starpurm_dlb.c +@STARPURM_HAVE_DLB_TRUE@am__objects_1 = starpurm_dlb.lo +am_libstarpurm_@STARPU_EFFECTIVE_VERSION@_la_OBJECTS = starpurm.lo \ + $(am__objects_1) +libstarpurm_@STARPU_EFFECTIVE_VERSION@_la_OBJECTS = \ + $(am_libstarpurm_@STARPU_EFFECTIVE_VERSION@_la_OBJECTS) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +libstarpurm_@STARPU_EFFECTIVE_VERSION@_la_LINK = $(LIBTOOL) $(AM_V_lt) \ + --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link \ + $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(libstarpurm_@STARPU_EFFECTIVE_VERSION@_la_LDFLAGS) \ + $(LDFLAGS) -o $@ +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)/src/common -I$(top_builddir)/include -I$(top_builddir)/starpurm/include +depcomp = $(SHELL) $(top_srcdir)/build-aux/depcomp +am__maybe_remake_depfiles = depfiles +am__depfiles_remade = ./$(DEPDIR)/starpurm.Plo \ + ./$(DEPDIR)/starpurm_dlb.Plo +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(libstarpurm_@STARPU_EFFECTIVE_VERSION@_la_SOURCES) +DIST_SOURCES = \ + $(am__libstarpurm_@STARPU_EFFECTIVE_VERSION@_la_SOURCES_DIST) +RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \ + ctags-recursive dvi-recursive html-recursive info-recursive \ + install-data-recursive install-dvi-recursive \ + install-exec-recursive install-html-recursive \ + install-info-recursive install-pdf-recursive \ + install-ps-recursive install-recursive installcheck-recursive \ + installdirs-recursive pdf-recursive ps-recursive \ + tags-recursive uninstall-recursive +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +HEADERS = $(noinst_HEADERS) +RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ + distclean-recursive maintainer-clean-recursive +am__recursive_targets = \ + $(RECURSIVE_TARGETS) \ + $(RECURSIVE_CLEAN_TARGETS) \ + $(am__extra_recursive_targets) +AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \ + distdir distdir-am +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +DIST_SUBDIRS = $(SUBDIRS) +am__DIST_COMMON = $(srcdir)/Makefile.in \ + $(top_srcdir)/build-aux/depcomp \ + $(top_srcdir)/make/starpu-notests.mk \ + $(top_srcdir)/make/starpu.mk +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +am__relativize = \ + dir0=`pwd`; \ + sed_first='s,^\([^/]*\)/.*$$,\1,'; \ + sed_rest='s,^[^/]*/*,,'; \ + sed_last='s,^.*/\([^/]*\)$$,\1,'; \ + sed_butlast='s,/*[^/]*$$,,'; \ + while test -n "$$dir1"; do \ + first=`echo "$$dir1" | sed -e "$$sed_first"`; \ + if test "$$first" != "."; then \ + if test "$$first" = ".."; then \ + dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ + dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ + else \ + first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ + if test "$$first2" = "$$first"; then \ + dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ + else \ + dir2="../$$dir2"; \ + fi; \ + dir0="$$dir0"/"$$first"; \ + fi; \ + fi; \ + dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ + done; \ + reldir="$$dir2" +pkglibdir = @pkglibdir@ +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +APP_CFLAGS = @APP_CFLAGS@ +APP_CXXFLAGS = @APP_CXXFLAGS@ +APP_FCFLAGS = @APP_FCFLAGS@ +APP_FFLAGS = @APP_FFLAGS@ +AR = @AR@ +AS = @AS@ +ATLASDIR = @ATLASDIR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BLAS_LIB = @BLAS_LIB@ +BLAS_LIBS = @BLAS_LIBS@ +BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ +BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CC_OR_MPICC = @CC_OR_MPICC@ +CC_OR_NVCC = @CC_OR_NVCC@ +CFLAGS = @CFLAGS@ +COVERAGE = @COVERAGE@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CSCOPE = @CSCOPE@ +CTAGS = @CTAGS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DGELS_LIBS = @DGELS_LIBS@ +DLB_CFLAGS = @DLB_CFLAGS@ +DLB_LIBS = @DLB_LIBS@ +DLLTOOL = @DLLTOOL@ +DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +ECLIPSE = @ECLIPSE@ +EGREP = @EGREP@ +ETAGS = @ETAGS@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FC = @FC@ +FCFLAGS = @FCFLAGS@ +FFLAGS = @FFLAGS@ +FFTWF_CFLAGS = @FFTWF_CFLAGS@ +FFTWF_LIBS = @FFTWF_LIBS@ +FFTWL_CFLAGS = @FFTWL_CFLAGS@ +FFTWL_LIBS = @FFTWL_LIBS@ +FFTW_CFLAGS = @FFTW_CFLAGS@ +FFTW_LIBS = @FFTW_LIBS@ +FGREP = @FGREP@ +FILECMD = @FILECMD@ +FXTDIR = @FXTDIR@ +FXT_CFLAGS = @FXT_CFLAGS@ +FXT_LDFLAGS = @FXT_LDFLAGS@ +FXT_LIBS = @FXT_LIBS@ +GDB = @GDB@ +GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ +GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ +GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ +GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ +GOTODIR = @GOTODIR@ +GREP = @GREP@ +HAVE_CXX11 = @HAVE_CXX11@ +HAVE_FFTWFL = @HAVE_FFTWFL@ +HELP2MAN = @HELP2MAN@ +HIPCC = @HIPCC@ +HIPCCFLAGS = @HIPCCFLAGS@ $(am__append_2) +HIPCONFIG = @HIPCONFIG@ +HWLOC_CFLAGS = @HWLOC_CFLAGS@ +HWLOC_LIBS = @HWLOC_LIBS@ +HWLOC_REQUIRES = @HWLOC_REQUIRES@ +ICC = @ICC@ +ICC_ARGS = @ICC_ARGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JULIA = @JULIA@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ $(top_builddir)/src/@LIBSTARPU_LINK@ \ + $(STARPU_EXPORTED_LIBS) $(HWLOC_LIBS) +LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ +LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ +LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ +LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ +LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ +LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ +LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ +LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ +LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ +LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ +LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ +LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ +LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ +LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ +LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ +LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ +LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ +LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ +LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ +LIBSTARPU_LINK = @LIBSTARPU_LINK@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAGMA_CFLAGS = @MAGMA_CFLAGS@ +MAGMA_LIBS = @MAGMA_LIBS@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPICC_LDFLAGS = @MPICC_LDFLAGS@ +MPICXX = @MPICXX@ +MPIEXEC = @MPIEXEC@ +MPIEXEC_ARGS = @MPIEXEC_ARGS@ +MPIFORT = @MPIFORT@ +MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ +MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ +NM = @NM@ +NMAD_CFLAGS = @NMAD_CFLAGS@ +NMAD_LIBS = @NMAD_LIBS@ +NMEDIT = @NMEDIT@ +NVCC = @NVCC@ +NVCCFLAGS = @NVCCFLAGS@ $(am__append_1) +NVCC_CC = @NVCC_CC@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ +OPENBLAS_LIBS = @OPENBLAS_LIBS@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PAPI_CFLAGS = @PAPI_CFLAGS@ +PAPI_LIBS = @PAPI_LIBS@ +PARALLEL = @PARALLEL@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PKG_CONFIG = @PKG_CONFIG@ +POTI_CFLAGS = @POTI_CFLAGS@ +POTI_LIBS = @POTI_LIBS@ +PROG_CLANG = @PROG_CLANG@ +PROG_DATE = @PROG_DATE@ +PROG_FIND = @PROG_FIND@ +PROG_STAT = @PROG_STAT@ +PYTHON = @PYTHON@ +PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ +PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ +PYTHON_VERSION = @PYTHON_VERSION@ +RANLIB = @RANLIB@ +REALBASH = @REALBASH@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ +SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ +SIMGRID_LIBS = @SIMGRID_LIBS@ +SIMGRID_MC = @SIMGRID_MC@ +SLIC_CONFIG = @SLIC_CONFIG@ +SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ +SOCL_VENDORS = @SOCL_VENDORS@ +STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ +STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ +STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ +STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ +STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ +STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ +STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ +STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ +STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ +STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ +STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ +STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ +STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ +STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ +STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ +STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ +STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ +STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ +STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ +STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ +STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ +STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ +STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ +STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ +STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ +STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ +STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ +STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ +STARPU_LIB_PATH = @STARPU_LIB_PATH@ +STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ +STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ +STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ +STARPU_MS_LIB = @STARPU_MS_LIB@ +STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ +STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ +STARPU_OPENBLAS = @STARPU_OPENBLAS@ +STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ +STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ +STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ +STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ +STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ +STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ +STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ +STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ +STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ +STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ +STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ +STARPU_SRC_DIR = @STARPU_SRC_DIR@ +STARPU_USE_CPU = @STARPU_USE_CPU@ +STARPU_USE_CUDA = @STARPU_USE_CUDA@ +STARPU_USE_FXT = @STARPU_USE_FXT@ +STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ +STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ +STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ +STRIP = @STRIP@ +VERSION = @VERSION@ +XMKMF = @XMKMF@ +X_CFLAGS = @X_CFLAGS@ +X_EXTRA_LIBS = @X_EXTRA_LIBS@ +X_LIBS = @X_LIBS@ +X_PRE_LIBS = @X_PRE_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_ct_F77 = @ac_ct_F77@ +ac_ct_FC = @ac_ct_FC@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +doxygencommand = @doxygencommand@ +dvidir = @dvidir@ +eclipsepath = @eclipsepath@ +epstopdfcommand = @epstopdfcommand@ +exec_prefix = @exec_prefix@ +gitcommand = @gitcommand@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +hwloccalccommand = @hwloccalccommand@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +juliapath = @juliapath@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +mpicc_path = @mpicc_path@ +mpicxx_path = @mpicxx_path@ +mpiexec_path = @mpiexec_path@ +mpifort_path = @mpifort_path@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +pdflatexcommand = @pdflatexcommand@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +runstatedir = @runstatedir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target = @target@ +target_alias = @target_alias@ +target_cpu = @target_cpu@ +target_os = @target_os@ +target_vendor = @target_vendor@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +AM_CFLAGS = $(GLOBAL_AM_CFLAGS) +AM_CXXFLAGS = $(GLOBAL_AM_CXXFLAGS) +AM_FFLAGS = $(GLOBAL_AM_FFLAGS) +AM_FCFLAGS = $(GLOBAL_AM_FCFLAGS) +@STARPU_USE_CUDA_TRUE@V_nvcc_ = $(V_nvcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_USE_CUDA_TRUE@V_nvcc_0 = @echo " NVCC " $@; +@STARPU_USE_CUDA_TRUE@V_nvcc_1 = +@STARPU_USE_CUDA_TRUE@V_nvcc = $(V_nvcc_$(V)) + +# Avoid using nvcc when making a coverity build, nvcc produces millions of +# lines of code which we don't want to analyze. Instead, build dumb .o files +# containing empty functions. +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_ = $(V_mynvcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_0 = @echo " myNVCC " $@; +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_1 = +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc = $(V_mynvcc_$(V)) +@STARPU_USE_HIP_TRUE@V_hipcc_ = $(V_hipcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_USE_HIP_TRUE@V_hipcc_0 = @echo " HIPCC " $@; +@STARPU_USE_HIP_TRUE@V_hipcc_1 = +@STARPU_USE_HIP_TRUE@V_hipcc = $(V_hipcc_$(V)) +V_icc_ = $(V_icc_$(AM_DEFAULT_VERBOSITY)) +V_icc_0 = @echo " ICC " $@; +V_icc_1 = +V_icc = $(V_icc_$(V)) +V_ln_ = $(V_ln_$(AM_DEFAULT_VERBOSITY)) +V_ln_0 = @echo " LN " $@; +V_ln_1 = +V_ln = $(V_ln_$(V)) +V_help2man_ = $(V_help2man_$(AM_DEFAULT_VERBOSITY)) +V_help2man_0 = @echo " HELP2MAN" $@; +V_help2man_1 = +V_help2man = $(V_help2man_$(V)) +SUBDIRS = +CLEANFILES = *.gcno *.gcda *.linkinfo +AM_CPPFLAGS = -I$(top_srcdir)/include -I$(top_srcdir)/src \ + -I$(top_builddir)/src -I$(top_builddir)/include \ + -I$(top_srcdir)/starpurm/include -I$(top_srcdir)/starpurm/src \ + -I$(top_builddir)/starpurm/src \ + -I$(top_builddir)/starpurm/include $(STARPU_H_CPPFLAGS) +libstarpurm_so_version = $(LIBSTARPURM_INTERFACE_CURRENT):$(LIBSTARPURM_INTERFACE_REVISION):$(LIBSTARPURM_INTERFACE_AGE) +lib_LTLIBRARIES = libstarpurm-@STARPU_EFFECTIVE_VERSION@.la +libstarpurm_@STARPU_EFFECTIVE_VERSION@_la_LDFLAGS = $(ldflags) -no-undefined -version-info $(libstarpurm_so_version) +libstarpurm_@STARPU_EFFECTIVE_VERSION@_la_SOURCES = starpurm.c \ + $(am__append_3) +noinst_HEADERS = \ + starpurm_private.h + +all: all-recursive + +.SUFFIXES: +.SUFFIXES: .c .cu .cubin .hip .lo .o .obj +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/make/starpu-notests.mk $(top_srcdir)/make/starpu.mk $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign starpurm/src/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign starpurm/src/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; +$(top_srcdir)/make/starpu-notests.mk $(top_srcdir)/make/starpu.mk $(am__empty): + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +install-libLTLIBRARIES: $(lib_LTLIBRARIES) + @$(NORMAL_INSTALL) + @list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \ + list2=; for p in $$list; do \ + if test -f $$p; then \ + list2="$$list2 $$p"; \ + else :; fi; \ + done; \ + test -z "$$list2" || { \ + echo " $(MKDIR_P) '$(DESTDIR)$(libdir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(libdir)" || exit 1; \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(libdir)'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(libdir)"; \ + } + +uninstall-libLTLIBRARIES: + @$(NORMAL_UNINSTALL) + @list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \ + for p in $$list; do \ + $(am__strip_dir) \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(libdir)/$$f'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(libdir)/$$f"; \ + done + +clean-libLTLIBRARIES: + -test -z "$(lib_LTLIBRARIES)" || rm -f $(lib_LTLIBRARIES) + @list='$(lib_LTLIBRARIES)'; \ + locs=`for p in $$list; do echo $$p; done | \ + sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ + sort -u`; \ + test -z "$$locs" || { \ + echo rm -f $${locs}; \ + rm -f $${locs}; \ + } + +libstarpurm-@STARPU_EFFECTIVE_VERSION@.la: $(libstarpurm_@STARPU_EFFECTIVE_VERSION@_la_OBJECTS) $(libstarpurm_@STARPU_EFFECTIVE_VERSION@_la_DEPENDENCIES) $(EXTRA_libstarpurm_@STARPU_EFFECTIVE_VERSION@_la_DEPENDENCIES) + $(AM_V_CCLD)$(libstarpurm_@STARPU_EFFECTIVE_VERSION@_la_LINK) -rpath $(libdir) $(libstarpurm_@STARPU_EFFECTIVE_VERSION@_la_OBJECTS) $(libstarpurm_@STARPU_EFFECTIVE_VERSION@_la_LIBADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/starpurm.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/starpurm_dlb.Plo@am__quote@ # am--include-marker + +$(am__depfiles_remade): + @$(MKDIR_P) $(@D) + @echo '# dummy' >$@-t && $(am__mv) $@-t $@ + +am--depfiles: $(am__depfiles_remade) + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ +@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +# This directory's subdirectories are mostly independent; you can cd +# into them and run 'make' without going through this Makefile. +# To change the values of 'make' variables: instead of editing Makefiles, +# (1) if the variable is set in 'config.status', edit 'config.status' +# (which will cause the Makefiles to be regenerated when you run 'make'); +# (2) otherwise, pass the desired values on the 'make' command line. +$(am__recursive_targets): + @fail=; \ + if $(am__make_keepgoing); then \ + failcom='fail=yes'; \ + else \ + failcom='exit 1'; \ + fi; \ + dot_seen=no; \ + target=`echo $@ | sed s/-recursive//`; \ + case "$@" in \ + distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ + *) list='$(SUBDIRS)' ;; \ + esac; \ + for subdir in $$list; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + dot_seen=yes; \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || eval $$failcom; \ + done; \ + if test "$$dot_seen" = "no"; then \ + $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ + fi; test -z "$$fail" + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-recursive +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ + include_option=--etags-include; \ + empty_fix=.; \ + else \ + include_option=--include; \ + empty_fix=; \ + fi; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + test ! -f $$subdir/TAGS || \ + set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ + fi; \ + done; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-recursive + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-recursive + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done + @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + $(am__make_dryrun) \ + || test -d "$(distdir)/$$subdir" \ + || $(MKDIR_P) "$(distdir)/$$subdir" \ + || exit 1; \ + dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ + $(am__relativize); \ + new_distdir=$$reldir; \ + dir1=$$subdir; dir2="$(top_distdir)"; \ + $(am__relativize); \ + new_top_distdir=$$reldir; \ + echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ + echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ + ($(am__cd) $$subdir && \ + $(MAKE) $(AM_MAKEFLAGS) \ + top_distdir="$$new_top_distdir" \ + distdir="$$new_distdir" \ + am__remove_distdir=: \ + am__skip_length_check=: \ + am__skip_mode_fix=: \ + distdir) \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-recursive +all-am: Makefile $(LTLIBRARIES) $(HEADERS) +installdirs: installdirs-recursive +installdirs-am: + for dir in "$(DESTDIR)$(libdir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-recursive +install-exec: install-exec-recursive +install-data: install-data-recursive +uninstall: uninstall-recursive + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-recursive +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-recursive + +clean-am: clean-generic clean-libLTLIBRARIES clean-libtool \ + mostlyclean-am + +distclean: distclean-recursive + -rm -f ./$(DEPDIR)/starpurm.Plo + -rm -f ./$(DEPDIR)/starpurm_dlb.Plo + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-recursive + +dvi-am: + +html: html-recursive + +html-am: + +info: info-recursive + +info-am: + +install-data-am: + +install-dvi: install-dvi-recursive + +install-dvi-am: + +install-exec-am: install-libLTLIBRARIES + +install-html: install-html-recursive + +install-html-am: + +install-info: install-info-recursive + +install-info-am: + +install-man: + +install-pdf: install-pdf-recursive + +install-pdf-am: + +install-ps: install-ps-recursive + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-recursive + -rm -f ./$(DEPDIR)/starpurm.Plo + -rm -f ./$(DEPDIR)/starpurm_dlb.Plo + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-recursive + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-recursive + +pdf-am: + +ps: ps-recursive + +ps-am: + +uninstall-am: uninstall-libLTLIBRARIES + +.MAKE: $(am__recursive_targets) install-am install-strip + +.PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am \ + am--depfiles check check-am clean clean-generic \ + clean-libLTLIBRARIES clean-libtool cscopelist-am ctags \ + ctags-am distclean distclean-compile distclean-generic \ + distclean-libtool distclean-tags distdir dvi dvi-am html \ + html-am info info-am install install-am install-data \ + install-data-am install-dvi install-dvi-am install-exec \ + install-exec-am install-html install-html-am install-info \ + install-info-am install-libLTLIBRARIES install-man install-pdf \ + install-pdf-am install-ps install-ps-am install-strip \ + installcheck installcheck-am installdirs installdirs-am \ + maintainer-clean maintainer-clean-generic mostlyclean \ + mostlyclean-compile mostlyclean-generic mostlyclean-libtool \ + pdf pdf-am ps ps-am tags tags-am uninstall uninstall-am \ + uninstall-libLTLIBRARIES + +.PRECIOUS: Makefile + +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@.cu.o: +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ @$(MKDIR_P) `dirname $@` +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ $(V_mynvcc)grep 'extern *"C" *void *' $< | sed -ne 's/extern *"C" *void *\([a-zA-Z0-9_]*\) *(.*/void \1(void) {}/p' | $(CC) -x c - -o $@ -c + +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.cubin: +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) -cubin $< -o $@ $(NVCCFLAGS) + +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.o: +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) $< -c -o $@ $(NVCCFLAGS) +@STARPU_USE_HIP_TRUE@.hip.o: +@STARPU_USE_HIP_TRUE@ $(V_hipcc) $(HIPCC) $< -c -o $@ $(HIPCCFLAGS) + +recheck: + -cat /dev/null + +showcheckfailed: + @-cat /dev/null + +showfailed: + @-cat /dev/null + +showcheck: + -cat /dev/null + +showsuite: + -cat /dev/null + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/starpurm/src/starpurm.c b/starpurm/src/starpurm.c new file mode 100644 index 0000000..30cf09b --- /dev/null +++ b/starpurm/src/starpurm.c @@ -0,0 +1,1700 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2017-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * #define _DEBUG + */ + +struct s_starpurm_unit +{ + /* Opaque unit id. + * + * For StarPU-RM, this id is used as an index to array starpurm->units[]. + */ + int id; + + /* Id of the unit type. */ + int type; + + /* Boolean indicating whether the device is currently selected for use by the runtime system. */ + int selected; + + /* StarPU id of the worker driving the device. */ + int workerid; + + /* Cpuset of the StarPU worker. */ + hwloc_cpuset_t worker_cpuset; + + /* Condition variable to notify that a unit is now available to driver a worker waking up. */ + starpu_pthread_cond_t unit_available_cond; +}; + +static struct s_starpurm *_starpurm = NULL; + +#if 0 +static char *bitmap_to_str(hwloc_bitmap_t bitmap) +{ + int strl = hwloc_bitmap_snprintf(NULL, 0, bitmap); + char *str = malloc(strl+1); + hwloc_bitmap_snprintf(str, strl+1, bitmap); + return str; +} +#endif + +#ifdef STARPURM_STARPU_HAVE_WORKER_CALLBACKS +enum e_starpurm_event +{ + starpurm_event_code_min = 0, + + starpurm_event_exit = 0, + starpurm_event_worker_going_to_sleep = 1, + starpurm_event_worker_waking_up = 2, + starpurm_event_unit_available = 3, + + starpurm_event_code_max = 3 +}; + +const char *_starpurm_event_to_str(int event_code) +{ + const char *s = NULL; + switch (event_code) + { + case starpurm_event_exit: + s = "starpurm_event_exit"; + break; + case starpurm_event_worker_going_to_sleep: + s = "starpurm_event_worker_going_to_sleep"; + break; + case starpurm_event_worker_waking_up: + s = "starpurm_event_worker_waking_up"; + break; + case starpurm_event_unit_available: + s = "starpurm_event_unit_available"; + break; + default: + s = ""; + break; + } + return s; +} + +struct s_starpurm_event +{ + struct s_starpurm_event *next; + struct s_starpurm_event *prev; + enum e_starpurm_event code; + int workerid; +}; + +static void _enqueue_event(struct s_starpurm_event *event) +{ + assert(_starpurm != NULL); + assert(_starpurm->state != state_uninitialized); + struct s_starpurm *rm = _starpurm; + assert(event->next == NULL); + assert(event->prev == NULL); + assert(event->code >= starpurm_event_code_min && event->code <= starpurm_event_code_max); + STARPU_PTHREAD_MUTEX_LOCK(&rm->event_list_mutex); + if (rm->event_processing_ended) + { + STARPU_PTHREAD_MUTEX_UNLOCK(&rm->event_list_mutex); + return; + } + assert((rm->event_list_head == NULL && rm->event_list_tail == NULL) + || (rm->event_list_head != NULL && rm->event_list_tail != NULL)); + if (rm->event_list_head == NULL) + { + rm->event_list_tail = event; + } + else + { + rm->event_list_head->prev = event; + } + event->next = rm->event_list_head; + rm->event_list_head = event; + if (event->code == starpurm_event_exit) + { + rm->event_processing_ended = 1; + int i; + for (i=0; inunits; i++) + { + STARPU_PTHREAD_COND_BROADCAST(&rm->units[i].unit_available_cond); + } + } +#ifdef STARPURM_VERBOSE + if (event->code != starpurm_event_worker_waking_up) + fprintf(stderr, "%s: event->code=%d('%s'), workerid=%u\n", __func__, event->code, _starpurm_event_to_str(event->code), event->workerid); +#endif + STARPU_PTHREAD_COND_BROADCAST(&rm->event_list_cond); +#ifdef STARPURM_HAVE_DLB + if (event->code == starpurm_event_worker_waking_up) + { + int unit_id = rm->worker_unit_ids[event->workerid]; + /* if DLB is in use, wait for the unit to become available from the point of view of DLB, before using it */ +#ifdef STARPURM_VERBOSE + fprintf(stderr, "%s: event->code=%d('%s'), workerid=%u - waiting\n", __func__, event->code, _starpurm_event_to_str(event->code), event->workerid); +#endif + STARPU_PTHREAD_COND_WAIT(&rm->units[unit_id].unit_available_cond, &rm->event_list_mutex); +#ifdef STARPURM_VERBOSE + fprintf(stderr, "%s: event->code=%d('%s'), workerid=%u - wakeup\n", __func__, event->code, _starpurm_event_to_str(event->code), event->workerid); +#endif + } +#endif + STARPU_PTHREAD_MUTEX_UNLOCK(&rm->event_list_mutex); +} + +static struct s_starpurm_event *_dequeue_event_no_lock(void) +{ + struct s_starpurm *rm = _starpurm; + struct s_starpurm_event *event = NULL; + if (rm->event_list_tail != NULL) + { + event = rm->event_list_tail; + if (event->prev == NULL) + { + rm->event_list_head = NULL; + rm->event_list_tail = NULL; + } + else + { + event->prev->next = NULL; + rm->event_list_tail = event->prev; + } + event->prev = NULL; + event->next = NULL; + } + return event; +} + +static struct s_starpurm_event *_wait_event_no_lock(void) +{ + struct s_starpurm *rm = _starpurm; + while (rm->event_list_head == NULL) + { + STARPU_PTHREAD_COND_WAIT(&rm->event_list_cond, &rm->event_list_mutex); + } + struct s_starpurm_event *event = _dequeue_event_no_lock(); + return event; +} + +/* unused */ +static struct s_starpurm_event *_dequeue_event(void) +{ + assert(_starpurm != NULL); + assert(_starpurm->state != state_uninitialized); + struct s_starpurm *rm = _starpurm; + STARPU_PTHREAD_MUTEX_LOCK(&rm->event_list_mutex); + struct s_starpurm_event *event = _dequeue_event_no_lock(); + STARPU_PTHREAD_MUTEX_UNLOCK(&rm->event_list_mutex); + return event; +} + +/* unused */ +static struct s_starpurm_event *_wait_event(void) +{ + assert(_starpurm != NULL); + assert(_starpurm->state != state_uninitialized); + struct s_starpurm *rm = _starpurm; + STARPU_PTHREAD_MUTEX_LOCK(&rm->event_list_mutex); + struct s_starpurm_event *event = _wait_event_no_lock(); + STARPU_PTHREAD_MUTEX_UNLOCK(&rm->event_list_mutex); + return event; +} + +static void _enqueue_exit_event(void) +{ + struct s_starpurm_event *event = calloc(1, sizeof(*event)); + event->code = starpurm_event_exit; + event->workerid = 0; + _enqueue_event(event); +} + +static void callback_worker_going_to_sleep(int workerid) +{ + struct s_starpurm_event *event = calloc(1, sizeof(*event)); + event->code = starpurm_event_worker_going_to_sleep; + event->workerid = workerid; + _enqueue_event(event); +} + +static void callback_worker_waking_up(int workerid) +{ + struct s_starpurm_event *event = calloc(1, sizeof(*event)); + event->code = starpurm_event_worker_waking_up; + event->workerid = workerid; + _enqueue_event(event); +} + +void starpurm_enqueue_event_cpu_unit_available(int unit_id) +{ + assert(_starpurm != NULL); + assert(_starpurm->state != state_uninitialized); + struct s_starpurm *rm = _starpurm; + assert(unit_id >= 0); + /* + * unit_id may exceed the number of CPU units actually used by StarPU, + * if some CPU cores are not used. + * + * //assert(unit_id < rm->nunits_by_type[starpurm_unit_cpu]); + */ + int workerid = rm->units[unit_id].workerid; struct + s_starpurm_event *event = calloc(1, sizeof(*event)); + event->code = starpurm_event_unit_available; event->workerid = + workerid; _enqueue_event(event); } + +static void *event_thread_func(void *_arg) +{ + (void)_arg; + assert(_starpurm != NULL); + assert(_starpurm->state != state_uninitialized); + struct s_starpurm *rm = _starpurm; + int need_refresh = 0; + + STARPU_PTHREAD_MUTEX_LOCK(&rm->event_list_mutex); + while (rm->event_processing_enabled == 0) + { + STARPU_PTHREAD_COND_WAIT(&rm->event_processing_cond, &rm->event_list_mutex); + } + STARPU_PTHREAD_MUTEX_UNLOCK(&rm->event_list_mutex); + hwloc_cpuset_t owned_cpuset = hwloc_bitmap_dup(rm->global_cpuset); + hwloc_cpuset_t to_reclaim_cpuset = hwloc_bitmap_alloc(); + hwloc_cpuset_t to_lend_cpuset = hwloc_bitmap_alloc(); + while (1) + { + struct s_starpurm_event *event = _dequeue_event(); +#ifdef STARPURM_HAVE_DLB + if ((event == NULL || event->code == starpurm_event_exit) || need_refresh) +#else + if ((event == NULL || event->code == starpurm_event_exit) && need_refresh) +#endif + { + int did_lend_cpuset = 1; +#ifdef STARPURM_HAVE_DLB + /* notify DLB about changes */ + if (!hwloc_bitmap_iszero(to_reclaim_cpuset)) + { + starpurm_dlb_notify_starpu_worker_mask_waking_up(to_reclaim_cpuset); + } + did_lend_cpuset = 0; + if (!hwloc_bitmap_iszero(to_lend_cpuset)) + { + did_lend_cpuset = starpurm_dlb_notify_starpu_worker_mask_going_to_sleep(to_lend_cpuset); + } +#endif + /* if DLB is not initialized, ignore lend operations */ + if (did_lend_cpuset) + { + hwloc_bitmap_andnot(owned_cpuset, owned_cpuset, to_lend_cpuset); + } + hwloc_bitmap_or(owned_cpuset, owned_cpuset, to_reclaim_cpuset); + +#if 0 + { + char *to_lend_str = bitmap_to_str(to_lend_cpuset); + char *to_reclaim_str = bitmap_to_str(to_reclaim_cpuset); + free(to_lend_str); + free(to_reclaim_str); + } +#endif + + need_refresh = 0; + hwloc_bitmap_zero(to_lend_cpuset); + hwloc_bitmap_zero(to_reclaim_cpuset); + } + if (event == NULL) + { + event = _wait_event(); + } + if (event->code == starpurm_event_exit) + { + free(event); + break; + } + + /* TODO: accumulate state change */ + switch (event->code) + { + case starpurm_event_worker_going_to_sleep: + { + if (event->workerid < rm->nunits) + { + int unit_id = rm->worker_unit_ids[event->workerid]; + hwloc_bitmap_or(to_lend_cpuset, to_lend_cpuset, rm->units[unit_id].worker_cpuset); + hwloc_bitmap_andnot(to_reclaim_cpuset, to_reclaim_cpuset, rm->units[unit_id].worker_cpuset); + } + } + break; + case starpurm_event_worker_waking_up: + { + if (event->workerid < rm->nunits) + { + int unit_id = rm->worker_unit_ids[event->workerid]; + hwloc_bitmap_andnot(to_lend_cpuset, to_lend_cpuset, rm->units[unit_id].worker_cpuset); +#ifdef STARPURM_HAVE_DLB + if (rm->units[unit_id].type == starpurm_unit_cpu && !hwloc_bitmap_intersects(rm->units[unit_id].worker_cpuset, owned_cpuset)) + { + /* Only reclaim the unit from DLB if StarPU does not own it already. */ + hwloc_bitmap_or(to_reclaim_cpuset, to_reclaim_cpuset, rm->units[unit_id].worker_cpuset); + } + else + { + STARPU_PTHREAD_COND_BROADCAST(&rm->units[unit_id].unit_available_cond); + } +#else + hwloc_bitmap_or(to_reclaim_cpuset, to_reclaim_cpuset, rm->units[unit_id].worker_cpuset); +#endif + } + } + break; +#ifdef STARPURM_HAVE_DLB + case starpurm_event_unit_available: + { + if (event->workerid < rm->nunits) + { + /* a reclaimed unit is now available from DLB, unlock the corresponding worker waking up */ + int unit_id = rm->worker_unit_ids[event->workerid]; + STARPU_PTHREAD_COND_BROADCAST(&rm->units[unit_id].unit_available_cond); + } + } + break; +#endif + default: + /* unknown event code */ + assert(0); + break; + } + free(event); + need_refresh = 1; + } + STARPU_PTHREAD_MUTEX_LOCK(&rm->event_list_mutex); + /* exit event should be last */ + assert(rm->event_list_head == NULL); + assert(rm->event_list_tail == NULL); + hwloc_bitmap_free(owned_cpuset); + hwloc_bitmap_free(to_reclaim_cpuset); + hwloc_bitmap_free(to_lend_cpuset); + STARPU_PTHREAD_MUTEX_UNLOCK(&rm->event_list_mutex); + return NULL; +} +#endif /* STARPURM_STARPU_HAVE_WORKER_CALLBACKS */ + +/* Resource enforcement */ +static starpurm_drs_ret_t _starpurm_update_cpuset(hwloc_cpuset_t cpuset) +{ + assert(_starpurm != NULL); + assert(_starpurm->state != state_uninitialized); + struct s_starpurm *rm = _starpurm; + if (hwloc_bitmap_isequal(cpuset, rm->selected_cpuset)) + { + return starpurm_DRS_SUCCESS; + } + STARPU_PTHREAD_MUTEX_LOCK(&rm->temporary_ctxs_mutex); + if (rm->starpu_in_pause) + { + starpu_resume(); + rm->starpu_in_pause = 0; + } + int workers_to_remove[_starpurm->nunits]; + int nworkers_to_remove = 0; + int workers_to_add[_starpurm->nunits]; + int nworkers_to_add = 0; + int i; + hwloc_cpuset_t temp_cpuset = hwloc_bitmap_alloc(); + int new_selected_ncpus = 0; + for (i=0; inunits; i++) + { + struct s_starpurm_unit *unit = &rm->units[i]; + hwloc_bitmap_and(temp_cpuset, unit->worker_cpuset, cpuset); + if (hwloc_bitmap_iszero(temp_cpuset)) + { + workers_to_remove[nworkers_to_remove] = unit->workerid; + unit->selected = 0; + nworkers_to_remove++; + } + else + { + workers_to_add[nworkers_to_add] = unit->workerid; + unit->selected = 1; + nworkers_to_add++; + if (unit->type == starpurm_unit_cpu) + { + new_selected_ncpus++; + } + } + } + hwloc_bitmap_free(temp_cpuset); + rm->selected_nworkers = nworkers_to_add; + rm->selected_ncpus = new_selected_ncpus; + hwloc_bitmap_free(rm->selected_cpuset); + rm->selected_cpuset = hwloc_bitmap_dup(cpuset); + + if (nworkers_to_add > 0) + { +#if defined(STARPURM_HAVE_DLB) && !defined(STARPURM_STARPU_HAVE_WORKER_CALLBACKS) + { + /* if StarPU worker callbacks are not enabled, we still + * notify DLB about resource usage changes, but we do + * not wait for the formal DLB go to use the units */ + hwloc_cpuset_t to_reclaim_cpuset = hwloc_bitmap_alloc(); + for (i=0; iworker_unit_ids[workers_to_add[i]]; + hwloc_bitmap_or(to_reclaim_cpuset, to_reclaim_cpuset, rm->units[unit_id].worker_cpuset); + } + starpurm_dlb_notify_starpu_worker_mask_waking_up(to_reclaim_cpuset); + hwloc_bitmap_free(to_reclaim_cpuset); + } +#endif + starpu_sched_ctx_add_workers(workers_to_add, nworkers_to_add, rm->sched_ctx_id); + } + if (nworkers_to_remove > 0) + { + starpu_sched_ctx_remove_workers(workers_to_remove, nworkers_to_remove, rm->sched_ctx_id); +#if defined(STARPURM_HAVE_DLB) && !defined(STARPURM_STARPU_HAVE_WORKER_CALLBACKS) + { + /* if StarPU worker callbacks are not enabled, we still + * notify DLB about resource usage changes, but we do + * not wait for the workers to become idle */ + hwloc_cpuset_t to_lend_cpuset = hwloc_bitmap_alloc(); + for (i=0; iworker_unit_ids[workers_to_remove[i]]; + hwloc_bitmap_or(to_lend_cpuset, to_lend_cpuset, rm->units[unit_id].worker_cpuset); + } + starpurm_dlb_notify_starpu_worker_mask_going_to_sleep(to_lend_cpuset); + hwloc_bitmap_free(to_lend_cpuset); + } +#endif + } +#ifdef _DEBUG + starpu_sched_ctx_display_workers(rm->sched_ctx_id, stderr); +#endif /* _DEBUG */ + if (rm->selected_nworkers == 0 && rm->avail_temporary_ctxs == rm->max_temporary_ctxs) + { + rm->starpu_in_pause = 1; + starpu_pause(); + } + STARPU_PTHREAD_MUTEX_UNLOCK(&rm->temporary_ctxs_mutex); + return starpurm_DRS_SUCCESS; +} + +static unsigned _starpurm_temporary_context_alloc(hwloc_cpuset_t cpuset) +{ + assert(_starpurm != NULL); + assert(_starpurm->state != state_uninitialized); + assert(_starpurm->max_temporary_ctxs > 0); + struct s_starpurm *rm = _starpurm; + STARPU_PTHREAD_MUTEX_LOCK(&rm->temporary_ctxs_mutex); + while(rm->avail_temporary_ctxs == 0) + { + STARPU_PTHREAD_COND_WAIT(&rm->temporary_ctxs_cond, &rm->temporary_ctxs_mutex); + } + assert(rm->avail_temporary_ctxs > 0); + rm->avail_temporary_ctxs--; + if (rm->starpu_in_pause) + { + starpu_resume(); + rm->starpu_in_pause = 0; + } + STARPU_PTHREAD_MUTEX_UNLOCK(&rm->temporary_ctxs_mutex); + unsigned sched_ctx_id = starpu_sched_ctx_create(NULL, -1, "starpurm_temp", STARPU_SCHED_CTX_POLICY_NAME, "eager", 0); + assert(sched_ctx_id != STARPU_NMAX_SCHED_CTXS); + int workers_to_remove[_starpurm->nunits]; + int nworkers_to_remove = 0; + int workers_to_add[_starpurm->nunits]; + int nworkers_to_add = 0; + int i; + hwloc_cpuset_t temp_cpuset = hwloc_bitmap_alloc(); + for (i=0; inunits; i++) + { + struct s_starpurm_unit *unit = &rm->units[i]; + hwloc_bitmap_and(temp_cpuset, unit->worker_cpuset, cpuset); + if (hwloc_bitmap_iszero(temp_cpuset)) + { + workers_to_remove[nworkers_to_remove] = unit->workerid; + nworkers_to_remove++; + } + else + { + workers_to_add[nworkers_to_add] = unit->workerid; + nworkers_to_add++; + } + } + hwloc_bitmap_free(temp_cpuset); + + if (nworkers_to_add > 0) + starpu_sched_ctx_add_workers(workers_to_add, nworkers_to_add, sched_ctx_id); + if (nworkers_to_remove > 0) + starpu_sched_ctx_remove_workers(workers_to_remove, nworkers_to_remove, sched_ctx_id); +#ifdef _DEBUG + starpu_sched_ctx_display_workers(sched_ctx_id, stderr); +#endif /* _DEBUG */ + return sched_ctx_id; +} + +static void _starpurm_temporary_context_free(unsigned ctx) +{ + assert(_starpurm != NULL); + assert(_starpurm->state != state_uninitialized); + assert(_starpurm->max_temporary_ctxs > 0); + struct s_starpurm *rm = _starpurm; + starpu_sched_ctx_delete(ctx); + STARPU_PTHREAD_MUTEX_LOCK(&rm->temporary_ctxs_mutex); + rm->avail_temporary_ctxs++; + STARPU_PTHREAD_COND_SIGNAL(&rm->temporary_ctxs_cond); + if (rm->selected_nworkers == 0 && rm->avail_temporary_ctxs == rm->max_temporary_ctxs) + { + rm->starpu_in_pause = 1; + starpu_pause(); + } + STARPU_PTHREAD_MUTEX_UNLOCK(&rm->temporary_ctxs_mutex); +} + +static starpurm_drs_ret_t _starpurm_set_ncpus(int ncpus) +{ + assert(_starpurm != NULL); + assert(_starpurm->state != state_uninitialized); + struct s_starpurm *rm = _starpurm; + int i; + if (ncpus > rm->nunits_by_type[starpurm_unit_cpu]) + { + ncpus = rm->nunits_by_type[starpurm_unit_cpu]; + } + if (ncpus == rm->selected_ncpus) + { + return starpurm_DRS_SUCCESS; + } + STARPU_PTHREAD_MUTEX_LOCK(&rm->temporary_ctxs_mutex); + if (rm->starpu_in_pause) + { + starpu_resume(); + rm->starpu_in_pause = 0; + } + int workers_to_remove[_starpurm->nunits]; + int nworkers_to_remove = 0; + int workers_to_add[_starpurm->nunits]; + int nworkers_to_add = 0; + for (i=0; inunits; i++) + { + struct s_starpurm_unit *unit = &rm->units[i]; + if (unit->type != starpurm_unit_cpu) + continue; + if (nworkers_to_add < ncpus) + { + workers_to_add[nworkers_to_add] = unit->workerid; + unit->selected = 1; + nworkers_to_add++; + hwloc_bitmap_or(rm->selected_cpuset, rm->selected_cpuset, unit->worker_cpuset); + } + else + { + workers_to_remove[nworkers_to_remove] = unit->workerid; + unit->selected = 0; + hwloc_bitmap_andnot(rm->selected_cpuset, rm->selected_cpuset, unit->worker_cpuset); + nworkers_to_remove++; + } + } + + rm->selected_nworkers = nworkers_to_add; + rm->selected_ncpus = nworkers_to_add; + + if (nworkers_to_add > 0) + starpu_sched_ctx_add_workers(workers_to_add, nworkers_to_add, rm->sched_ctx_id); + if (nworkers_to_remove > 0) + starpu_sched_ctx_remove_workers(workers_to_remove, nworkers_to_remove, rm->sched_ctx_id); +#ifdef _DEBUG + starpu_sched_ctx_display_workers(rm->sched_ctx_id, stderr); +#endif /* _DEBUG */ + if (rm->selected_nworkers == 0 && rm->avail_temporary_ctxs == rm->max_temporary_ctxs) + { + rm->starpu_in_pause = 1; + starpu_pause(); + } + STARPU_PTHREAD_MUTEX_UNLOCK(&rm->temporary_ctxs_mutex); + return starpurm_DRS_SUCCESS; +} + +/* Initialize rm state for StarPU */ +void starpurm_initialize_with_cpuset(const hwloc_cpuset_t initially_owned_cpuset) +{ + int ret; + assert(_starpurm == NULL); + + struct s_starpurm *rm = calloc(1, sizeof(*rm)); + STARPU_PTHREAD_MUTEX_INIT(&rm->temporary_ctxs_mutex, NULL); + STARPU_PTHREAD_COND_INIT(&rm->temporary_ctxs_cond, NULL); + rm->state = state_init; + + /* init hwloc objects */ + ret = hwloc_topology_init(&rm->topology); + STARPU_ASSERT_MSG(ret == 0, "Could not initialize Hwloc topology (%s)\n", strerror(errno)); + ret = hwloc_topology_load(rm->topology); + STARPU_ASSERT_MSG(ret == 0, "Could not load Hwloc topology (%s)\n", strerror(errno)); + rm->global_cpuset = hwloc_bitmap_alloc(); + hwloc_bitmap_zero(rm->global_cpuset); + + rm->initially_owned_cpuset_mask = hwloc_bitmap_dup(initially_owned_cpuset); + + rm->all_cpu_workers_cpuset = hwloc_bitmap_alloc(); + hwloc_bitmap_zero(rm->all_cpu_workers_cpuset); + + rm->all_opencl_device_workers_cpuset = hwloc_bitmap_alloc(); + hwloc_bitmap_zero(rm->all_opencl_device_workers_cpuset); + + rm->all_cuda_device_workers_cpuset = hwloc_bitmap_alloc(); + hwloc_bitmap_zero(rm->all_cuda_device_workers_cpuset); + + rm->all_device_workers_cpuset = hwloc_bitmap_alloc(); + hwloc_bitmap_zero(rm->all_device_workers_cpuset); + + /* init event list, before StarPU is initialized */ + STARPU_PTHREAD_MUTEX_INIT(&rm->event_list_mutex, NULL); + STARPU_PTHREAD_COND_INIT(&rm->event_list_cond, NULL); + STARPU_PTHREAD_COND_INIT(&rm->event_processing_cond, NULL); + STARPU_PTHREAD_MUTEX_LOCK(&rm->event_list_mutex); + rm->event_processing_enabled = 0; + rm->event_processing_ended = 0; + rm->event_list_head = NULL; + rm->event_list_tail = NULL; + STARPU_PTHREAD_MUTEX_UNLOCK(&rm->event_list_mutex); + + /* set _starpurm here since StarPU's callbacks may reference it once starpu_init is called */ + _starpurm = rm; + +#ifdef STARPURM_STARPU_HAVE_WORKER_CALLBACKS + /* launch event thread */ + ret = pthread_create(&rm->event_thread, NULL, event_thread_func, rm); + assert(ret == 0); +#endif + + /* init StarPU */ + struct starpu_conf starpu_conf; + ret = starpu_conf_init(&starpu_conf); + assert(ret == 0); + +#ifdef STARPURM_STARPU_HAVE_WORKER_CALLBACKS + starpu_conf.callback_worker_going_to_sleep = callback_worker_going_to_sleep; + starpu_conf.callback_worker_waking_up = callback_worker_waking_up; +#endif + + ret = starpu_init(&starpu_conf); + assert(ret == 0); + + /* init any worker objects */ + rm->nunits = starpu_worker_get_count_by_type(STARPU_ANY_WORKER); + + /* init device worker objects */ + rm->unit_ntypes = starpurm_unit_ntypes; + rm->nunits_by_type = calloc(rm->unit_ntypes, sizeof(*rm->nunits_by_type)); + rm->unit_offsets_by_type = calloc(rm->unit_ntypes, sizeof(*rm->unit_offsets_by_type)); + + const int cpu_nunits = starpu_worker_get_count_by_type(STARPU_CPU_WORKER); + rm->nunits_by_type[starpurm_unit_cpu] = cpu_nunits; + + const int opencl_nunits = starpu_worker_get_count_by_type(STARPU_OPENCL_WORKER); + rm->nunits_by_type[starpurm_unit_opencl] = opencl_nunits; + + const int cuda_nunits = starpu_worker_get_count_by_type(STARPU_CUDA_WORKER); + rm->nunits_by_type[starpurm_unit_cuda] = cuda_nunits; + + const int nunits = cpu_nunits + opencl_nunits + cuda_nunits; + rm->nunits = nunits; + rm->units = calloc(nunits, sizeof(*rm->units)); + + int unitid = 0; + + int cpu_workerids[cpu_nunits]; + starpu_worker_get_ids_by_type(STARPU_CPU_WORKER, cpu_workerids, cpu_nunits); + rm->unit_offsets_by_type[starpurm_unit_cpu] = unitid; + int max_worker_id = 0; + int i; + for (i = 0; i < cpu_nunits; i++) + { + rm->units[unitid].id = unitid; + rm->units[unitid].type = starpurm_unit_cpu; + rm->units[unitid].selected = 1; /* enabled by default */ + rm->units[unitid].workerid = cpu_workerids[i]; + if (max_worker_id < rm->units[unitid].workerid) + { + max_worker_id = rm->units[unitid].workerid; + } + rm->units[unitid].worker_cpuset = starpu_worker_get_hwloc_cpuset(rm->units[unitid].workerid); + STARPU_PTHREAD_COND_INIT(&rm->units[unitid].unit_available_cond, NULL); + hwloc_bitmap_or(rm->global_cpuset, rm->global_cpuset, rm->units[unitid].worker_cpuset); + hwloc_bitmap_or(rm->all_cpu_workers_cpuset, rm->all_cpu_workers_cpuset, rm->units[unitid].worker_cpuset);; +#ifdef STARPURM_VERBOSE + { + char * s_unit = NULL; + hwloc_bitmap_asprintf(&s_unit, rm->units[unitid].worker_cpuset); + fprintf(stderr, "%s: 'cpu', unitid=%d, cpuset=0x%s, workerid=%d\n", __func__, unitid, s_unit, rm->units[unitid].workerid); + free(s_unit); + } +#endif + unitid++; + } + + int opencl_workerids[opencl_nunits]; + starpu_worker_get_ids_by_type(STARPU_OPENCL_WORKER, opencl_workerids, opencl_nunits); + rm->unit_offsets_by_type[starpurm_unit_opencl] = unitid; + for (i = 0; i < opencl_nunits; i++) + { + rm->units[unitid].id = unitid; + rm->units[unitid].type = starpurm_unit_opencl; + rm->units[unitid].selected = 1; /* enabled by default */ + rm->units[unitid].workerid = opencl_workerids[i]; + if (max_worker_id < rm->units[unitid].workerid) + { + max_worker_id = rm->units[unitid].workerid; + } + rm->units[unitid].worker_cpuset = starpu_worker_get_hwloc_cpuset(rm->units[unitid].workerid); + STARPU_PTHREAD_COND_INIT(&rm->units[unitid].unit_available_cond, NULL); + hwloc_bitmap_or(rm->global_cpuset, rm->global_cpuset, rm->units[unitid].worker_cpuset); + hwloc_bitmap_or(rm->all_opencl_device_workers_cpuset, rm->all_opencl_device_workers_cpuset, rm->units[unitid].worker_cpuset); + hwloc_bitmap_or(rm->all_device_workers_cpuset, rm->all_device_workers_cpuset, rm->units[unitid].worker_cpuset); + unitid++; + } + + int cuda_workerids[opencl_nunits]; + starpu_worker_get_ids_by_type(STARPU_CUDA_WORKER, cuda_workerids, cuda_nunits); + rm->unit_offsets_by_type[starpurm_unit_cuda] = unitid; + for (i = 0; i < cuda_nunits; i++) + { + rm->units[unitid].id = unitid; + rm->units[unitid].type = starpurm_unit_cuda; + rm->units[unitid].selected = 1; /* enabled by default */ + rm->units[unitid].workerid = cuda_workerids[i]; + if (max_worker_id < rm->units[unitid].workerid) + { + max_worker_id = rm->units[unitid].workerid; + } + rm->units[unitid].worker_cpuset = starpu_worker_get_hwloc_cpuset(rm->units[unitid].workerid); + STARPU_PTHREAD_COND_INIT(&rm->units[unitid].unit_available_cond, NULL); + hwloc_bitmap_or(rm->global_cpuset, rm->global_cpuset, rm->units[unitid].worker_cpuset); + hwloc_bitmap_or(rm->all_cuda_device_workers_cpuset, rm->all_cuda_device_workers_cpuset, rm->units[unitid].worker_cpuset); + hwloc_bitmap_or(rm->all_device_workers_cpuset, rm->all_device_workers_cpuset, rm->units[unitid].worker_cpuset); + unitid++; + } + + rm->max_worker_id = max_worker_id; + { + int *worker_unit_ids = malloc((max_worker_id+1) * sizeof(*worker_unit_ids)); + for (i = 0; i < max_worker_id+1; i++) + { + worker_unit_ids[i] = -1; + } + for (i=0; inunits; i++) + { + worker_unit_ids[rm->units[i].workerid] = i; + } + rm->worker_unit_ids = worker_unit_ids; + } + + /* create StarPU sched_ctx for RM instance */ + { + int workerids[rm->nunits]; + starpu_worker_get_ids_by_type(STARPU_ANY_WORKER, workerids, rm->nunits); + /* TODO: make sched_ctx policy configurable */ + rm->sched_ctx_id = starpu_sched_ctx_create(workerids, rm->nunits, "starpurm", STARPU_SCHED_CTX_POLICY_NAME, "eager", 0); +#ifdef _DEBUG + starpu_sched_ctx_display_workers(rm->sched_ctx_id, stderr); +#endif /* _DEBUG */ + } + + starpu_sched_ctx_set_context(&rm->sched_ctx_id); + + /* number selected workers (total) */ + rm->selected_nworkers = rm->nunits; + + /* number of selected CPUs workers */ + rm->selected_ncpus = rm->nunits_by_type[starpurm_unit_cpu]; + + /* cpuset of all currently selected workers */ + rm->selected_cpuset = hwloc_bitmap_dup(rm->global_cpuset); + + if (STARPU_NMAX_SCHED_CTXS > 2) + { + /* account for main ctx (0) and default rm ctx (1) + * TODO: check that no other ctxs are allocated by external codes */ + rm->max_temporary_ctxs = STARPU_NMAX_SCHED_CTXS - 2; + } + else + { + rm->max_temporary_ctxs = 0; + } + rm->avail_temporary_ctxs = rm->max_temporary_ctxs; + if (rm->selected_nworkers == 0) + { + rm->starpu_in_pause = 1; + starpu_pause(); + } + else + { + rm->starpu_in_pause = 0; + } + +#ifdef STARPURM_HAVE_DLB + starpurm_dlb_init(rm); +#endif + STARPU_PTHREAD_MUTEX_LOCK(&rm->event_list_mutex); + rm->event_processing_enabled = 1; + STARPU_PTHREAD_COND_BROADCAST(&rm->event_processing_cond); + STARPU_PTHREAD_MUTEX_UNLOCK(&rm->event_list_mutex); + _starpurm = rm; + +} + +void starpurm_initialize() +{ + hwloc_cpuset_t full_cpuset = hwloc_bitmap_alloc_full(); + starpurm_initialize_with_cpuset(full_cpuset); + hwloc_bitmap_free(full_cpuset); +} + +/* Free rm struct for StarPU */ +void starpurm_shutdown(void) +{ + assert(_starpurm != NULL); + assert(_starpurm->state != state_uninitialized); + struct s_starpurm *rm = _starpurm; + + if (rm->starpu_in_pause) + { + starpu_resume(); + rm->starpu_in_pause = 0; + } + + starpu_sched_ctx_delete(rm->sched_ctx_id); +#ifdef STARPURM_STARPU_HAVE_WORKER_CALLBACKS + _enqueue_exit_event(); +#endif + starpu_shutdown(); +#ifdef STARPURM_HAVE_DLB + starpurm_dlb_exit(); +#endif + hwloc_topology_destroy(rm->topology); +#ifdef STARPURM_STARPU_HAVE_WORKER_CALLBACKS + STARPU_PTHREAD_JOIN(rm->event_thread, NULL); +#endif + assert(rm->event_list_head == NULL); + assert(rm->event_list_tail == NULL); + STARPU_PTHREAD_COND_DESTROY(&rm->event_list_cond); + STARPU_PTHREAD_MUTEX_DESTROY(&rm->event_list_mutex); + + rm->state = state_uninitialized; + + hwloc_bitmap_free(rm->global_cpuset); + hwloc_bitmap_free(rm->all_cpu_workers_cpuset); + hwloc_bitmap_free(rm->all_opencl_device_workers_cpuset); + hwloc_bitmap_free(rm->all_cuda_device_workers_cpuset); + hwloc_bitmap_free(rm->all_device_workers_cpuset); + hwloc_bitmap_free(rm->selected_cpuset); + hwloc_bitmap_free(rm->initially_owned_cpuset_mask); + + int i; + for (i=0; inunits; i++) + { + STARPU_PTHREAD_COND_DESTROY(&rm->units[i].unit_available_cond); + } + free(rm->units); + rm->units = NULL; + + free(rm->nunits_by_type); + rm->nunits_by_type = NULL; + + free(rm->unit_offsets_by_type); + rm->unit_offsets_by_type = NULL; + + free(rm); + _starpurm = NULL; +} + + +void starpurm_spawn_kernel_on_cpus(void *data, void(*f)(void *), void *args, hwloc_cpuset_t cpuset) +{ + (void) data; + assert(_starpurm != NULL); + assert(_starpurm->state != state_uninitialized); + struct s_starpurm *rm = _starpurm; + unsigned ctx = _starpurm_temporary_context_alloc(cpuset); + starpu_sched_ctx_set_context(&ctx); + f(args); + starpu_sched_ctx_set_context(&rm->sched_ctx_id); + _starpurm_temporary_context_free(ctx); +} + +struct s_starpurm__spawn_args +{ + void(*f)(void *); + void *args; + void(*cb_f)(void *); + void *cb_args; + hwloc_cpuset_t cpuset; +}; + +static void *_starpurm_spawn_kernel_thread(void *_spawn_args) +{ + struct s_starpurm__spawn_args *spawn_args = _spawn_args; + unsigned ctx = _starpurm_temporary_context_alloc(spawn_args->cpuset); + starpu_sched_ctx_set_context(&ctx); + spawn_args->f(spawn_args->args); + struct s_starpurm *rm = _starpurm; + starpu_sched_ctx_set_context(&rm->sched_ctx_id); + _starpurm_temporary_context_free(ctx); + spawn_args->cb_f(spawn_args->cb_args); + hwloc_bitmap_free(spawn_args->cpuset); + free(spawn_args); + return NULL; +} + +void starpurm_spawn_kernel_on_cpus_callback(void *data, void(*f)(void *), void *args, hwloc_cpuset_t cpuset, void(*cb_f)(void *), void *cb_args) +{ + (void) data; + struct s_starpurm__spawn_args *spawn_args = calloc(1, sizeof(*spawn_args)); + spawn_args->f = f; + spawn_args->args = args; + spawn_args->cb_f = cb_f; + spawn_args->cb_args = cb_args; + spawn_args->cpuset = hwloc_bitmap_dup(cpuset); + pthread_attr_t attr; + int ret; + ret = pthread_attr_init(&attr); + assert(ret == 0); + ret = pthread_attr_setdetachstate(&attr, 1); + assert(ret == 0); + pthread_t t; + ret = pthread_create(&t, &attr, _starpurm_spawn_kernel_thread, spawn_args); + assert(ret == 0); + +} + +static void *_starpurm_spawn_kernel_in_default_context_thread(void *_spawn_args) +{ + struct s_starpurm__spawn_args *spawn_args = _spawn_args; + struct s_starpurm *rm = _starpurm; + starpu_sched_ctx_set_context(&rm->sched_ctx_id); + spawn_args->f(spawn_args->args); + spawn_args->cb_f(spawn_args->cb_args); + free(spawn_args); + return NULL; +} + +void starpurm_spawn_kernel_callback(void *data, void(*f)(void *), void *args, void(*cb_f)(void *), void *cb_args) +{ + (void) data; + struct s_starpurm__spawn_args *spawn_args = calloc(1, sizeof(*spawn_args)); + spawn_args->f = f; + spawn_args->args = args; + spawn_args->cb_f = cb_f; + spawn_args->cb_args = cb_args; + pthread_attr_t attr; + int ret; + ret = pthread_attr_init(&attr); + assert(ret == 0); + ret = pthread_attr_setdetachstate(&attr, 1); + assert(ret == 0); + pthread_t t; + ret = pthread_create(&t, &attr, _starpurm_spawn_kernel_in_default_context_thread, spawn_args); + assert(ret == 0); + +} + +hwloc_cpuset_t starpurm_get_unit_cpuset(int unitid) +{ + assert(_starpurm != NULL); + assert(_starpurm->state != state_uninitialized); + struct s_starpurm *rm = _starpurm; + + assert(unitid >= 0 && unitid < rm->nunits); + return hwloc_bitmap_dup(rm->units[unitid].worker_cpuset); +} + +hwloc_cpuset_t starpurm_get_cpu_worker_cpuset(int unit_rank) +{ + assert(_starpurm != NULL); + assert(_starpurm->state != state_uninitialized); + struct s_starpurm *rm = _starpurm; + + assert(unit_rank >= 0 && unit_rank < rm->nunits_by_type[starpurm_unit_cpu]); + return hwloc_bitmap_dup(rm->units[rm->unit_offsets_by_type[starpurm_unit_cpu] + unit_rank].worker_cpuset); +} + +/* Dynamic resource sharing */ +starpurm_drs_ret_t starpurm_set_drs_enable(starpurm_drs_desc_t *spd) +{ + (void)spd; + + assert(_starpurm != NULL); + assert(_starpurm->state != state_uninitialized); + struct s_starpurm *rm = _starpurm; + rm->dynamic_resource_sharing = 1; + return starpurm_DRS_SUCCESS; +} + +starpurm_drs_ret_t starpurm_set_drs_disable(starpurm_drs_desc_t *spd) +{ + (void)spd; + + assert(_starpurm != NULL); + assert(_starpurm->state != state_uninitialized); + struct s_starpurm *rm = _starpurm; + rm->dynamic_resource_sharing = 0; + return starpurm_DRS_SUCCESS; +} + +int starpurm_drs_enabled_p(void) +{ + assert(_starpurm != NULL); + assert(_starpurm->state != state_uninitialized); + struct s_starpurm *rm = _starpurm; + return rm->dynamic_resource_sharing; +} + + +starpurm_drs_ret_t starpurm_set_max_parallelism(starpurm_drs_desc_t *spd, int ncpus) +{ + (void)spd; + + assert(_starpurm != NULL); + assert(_starpurm->state != state_uninitialized); + struct s_starpurm *rm = _starpurm; + if (!rm->dynamic_resource_sharing) + return starpurm_DRS_DISABLD; + if (ncpus > rm->nunits_by_type[starpurm_unit_cpu]) + { + ncpus = rm->nunits_by_type[starpurm_unit_cpu]; + } + rm->max_ncpus = ncpus; + if (rm->selected_ncpus > ncpus) + { + return _starpurm_set_ncpus(ncpus); + } + return starpurm_DRS_SUCCESS; +} + + +starpurm_drs_ret_t starpurm_callback_set(starpurm_drs_desc_t *spd, starpurm_drs_cbs_t which, starpurm_drs_cb_t callback) +{ + (void)spd; + (void)which; + (void)callback; + /* unimplemented */ + assert(0); + return starpurm_DRS_PERM; +} + +starpurm_drs_ret_t starpurm_callback_get(starpurm_drs_desc_t *spd, starpurm_drs_cbs_t which, starpurm_drs_cb_t *callback) +{ + (void)spd; + (void)which; + (void)callback; + /* unimplemented */ + assert(0); + return starpurm_DRS_PERM; +} + +starpurm_drs_ret_t starpurm_assign_cpu_to_starpu(starpurm_drs_desc_t *spd, int cpuid) +{ + (void)spd; + assert(_starpurm != NULL); + assert(_starpurm->state != state_uninitialized); + struct s_starpurm *rm = _starpurm; + if (!rm->dynamic_resource_sharing) + return starpurm_DRS_DISABLD; + starpurm_drs_ret_t ret = 0; + assert(hwloc_bitmap_isset(rm->global_cpuset, cpuid)); + if (!hwloc_bitmap_isset(rm->selected_cpuset, cpuid)) + { + hwloc_cpuset_t temp_cpuset = hwloc_bitmap_dup(rm->selected_cpuset); + hwloc_bitmap_set(temp_cpuset, cpuid); + ret = _starpurm_update_cpuset(temp_cpuset); + hwloc_bitmap_free(temp_cpuset); + } + return ret; +} + +starpurm_drs_ret_t starpurm_assign_cpus_to_starpu(starpurm_drs_desc_t *spd, int ncpus) +{ + (void)spd; + assert(_starpurm != NULL); + assert(_starpurm->state != state_uninitialized); + struct s_starpurm *rm = _starpurm; + if (!rm->dynamic_resource_sharing) + return starpurm_DRS_DISABLD; + /* add ncpus more CPUs to the CPUs pool */ + return _starpurm_set_ncpus(rm->selected_ncpus+ncpus); +} + +starpurm_drs_ret_t starpurm_assign_cpu_mask_to_starpu(starpurm_drs_desc_t *spd, const hwloc_cpuset_t mask) +{ + (void)spd; + assert(_starpurm != NULL); + assert(_starpurm->state != state_uninitialized); + struct s_starpurm *rm = _starpurm; + if (!rm->dynamic_resource_sharing) + return starpurm_DRS_DISABLD; + hwloc_cpuset_t temp_cpuset = hwloc_bitmap_dup(rm->selected_cpuset); + hwloc_bitmap_or(temp_cpuset, temp_cpuset, mask); + starpurm_drs_ret_t ret = _starpurm_update_cpuset(temp_cpuset); + hwloc_bitmap_free(temp_cpuset); + return ret; +} + +starpurm_drs_ret_t starpurm_assign_all_cpus_to_starpu(starpurm_drs_desc_t *spd) +{ + assert(_starpurm != NULL); + assert(_starpurm->state != state_uninitialized); + struct s_starpurm *rm = _starpurm; + if (!rm->dynamic_resource_sharing) + return starpurm_DRS_DISABLD; + return starpurm_assign_cpus_to_starpu(spd, rm->nunits_by_type[starpurm_unit_cpu]); +} + +starpurm_drs_ret_t starpurm_withdraw_cpu_from_starpu(starpurm_drs_desc_t *spd, int cpuid) +{ + (void)spd; + assert(_starpurm != NULL); + assert(_starpurm->state != state_uninitialized); + struct s_starpurm *rm = _starpurm; + if (!rm->dynamic_resource_sharing) + return starpurm_DRS_DISABLD; + starpurm_drs_ret_t ret = 0; + assert(hwloc_bitmap_isset(rm->global_cpuset, cpuid)); + if (hwloc_bitmap_isset(rm->selected_cpuset, cpuid)) + { + hwloc_cpuset_t temp_cpuset = hwloc_bitmap_dup(rm->selected_cpuset); + hwloc_bitmap_clr(temp_cpuset, cpuid); + ret = _starpurm_update_cpuset(temp_cpuset); + hwloc_bitmap_free(temp_cpuset); + } + return ret; +} + +starpurm_drs_ret_t starpurm_withdraw_cpus_from_starpu(starpurm_drs_desc_t *spd, int ncpus) +{ + (void)spd; + assert(_starpurm != NULL); + assert(_starpurm->state != state_uninitialized); + struct s_starpurm *rm = _starpurm; + if (!rm->dynamic_resource_sharing) + return starpurm_DRS_DISABLD; + /* add ncpus more CPUs to the CPUs pool */ + starpurm_drs_ret_t ret = 0; + if (ncpus <= rm->nunits_by_type[starpurm_unit_cpu]) + { + ret = _starpurm_set_ncpus(rm->nunits_by_type[starpurm_unit_cpu]-ncpus); + } + else + { + ret = _starpurm_set_ncpus(0); + } + return ret; +} + +starpurm_drs_ret_t starpurm_withdraw_cpu_mask_from_starpu(starpurm_drs_desc_t *spd, const hwloc_cpuset_t mask) +{ + (void)spd; + assert(_starpurm != NULL); + assert(_starpurm->state != state_uninitialized); + struct s_starpurm *rm = _starpurm; + if (!rm->dynamic_resource_sharing) + return starpurm_DRS_DISABLD; + hwloc_cpuset_t temp_cpuset = hwloc_bitmap_dup(rm->selected_cpuset); + hwloc_bitmap_andnot(temp_cpuset, temp_cpuset, mask); + starpurm_drs_ret_t ret = _starpurm_update_cpuset(temp_cpuset); + hwloc_bitmap_free(temp_cpuset); + return ret; +} + +starpurm_drs_ret_t starpurm_withdraw_all_cpus_from_starpu(starpurm_drs_desc_t *spd) +{ + assert(_starpurm != NULL); + assert(_starpurm->state != state_uninitialized); + struct s_starpurm *rm = _starpurm; + if (!rm->dynamic_resource_sharing) + return starpurm_DRS_DISABLD; + return starpurm_withdraw_cpus_from_starpu(spd, rm->nunits_by_type[starpurm_unit_cpu]); +} + +/* --- */ + +starpurm_drs_ret_t starpurm_lend_cpu(starpurm_drs_desc_t *spd, int cpuid) +{ + return starpurm_assign_cpu_to_starpu(spd, cpuid); +} + +starpurm_drs_ret_t starpurm_lend_cpus(starpurm_drs_desc_t *spd, int ncpus) +{ + return starpurm_assign_cpus_to_starpu(spd, ncpus); +} + +starpurm_drs_ret_t starpurm_lend_cpu_mask(starpurm_drs_desc_t *spd, const hwloc_cpuset_t mask) +{ + return starpurm_assign_cpu_mask_to_starpu(spd, mask); +} + +starpurm_drs_ret_t starpurm_lend(starpurm_drs_desc_t *spd) +{ + return starpurm_assign_all_cpus_to_starpu(spd); +} + + +starpurm_drs_ret_t starpurm_reclaim_cpu(starpurm_drs_desc_t *spd, int cpuid) +{ + return starpurm_withdraw_cpu_from_starpu(spd, cpuid); +} + +starpurm_drs_ret_t starpurm_reclaim_cpus(starpurm_drs_desc_t *spd, int ncpus) +{ + return starpurm_withdraw_cpus_from_starpu(spd, ncpus); +} + +starpurm_drs_ret_t starpurm_reclaim_cpu_mask(starpurm_drs_desc_t *spd, const hwloc_cpuset_t mask) +{ + return starpurm_withdraw_cpu_mask_from_starpu(spd, mask); +} + +starpurm_drs_ret_t starpurm_reclaim(starpurm_drs_desc_t *spd) +{ + return starpurm_withdraw_all_cpus_from_starpu(spd); +} + +starpurm_drs_ret_t starpurm_acquire(starpurm_drs_desc_t *spd) +{ + return starpurm_withdraw_all_cpus_from_starpu(spd); +} + +starpurm_drs_ret_t starpurm_acquire_cpu(starpurm_drs_desc_t *spd, int cpuid) +{ + return starpurm_withdraw_cpu_from_starpu(spd, cpuid); +} + +starpurm_drs_ret_t starpurm_acquire_cpus(starpurm_drs_desc_t *spd, int ncpus) +{ + return starpurm_withdraw_cpus_from_starpu(spd, ncpus); +} + +starpurm_drs_ret_t starpurm_acquire_cpu_mask(starpurm_drs_desc_t *spd, const hwloc_cpuset_t mask) +{ + return starpurm_withdraw_cpu_mask_from_starpu(spd, mask); +} + + +starpurm_drs_ret_t starpurm_return_all(starpurm_drs_desc_t *spd) +{ + return starpurm_assign_all_cpus_to_starpu(spd); +} + +starpurm_drs_ret_t starpurm_return_cpu(starpurm_drs_desc_t *spd, int cpuid) +{ + return starpurm_assign_cpu_to_starpu(spd, cpuid); +} + + +/* Pause/resume */ +starpurm_drs_ret_t starpurm_create_block_condition(starpurm_block_cond_t *cond) +{ + /* unimplemented */ + (void)cond; + assert(0); + return starpurm_DRS_PERM; +} + +void starpurm_block_current_task(starpurm_block_cond_t *cond) +{ + /* unimplemented */ + (void)cond; + assert(0); +} + +void starpurm_signal_block_condition(starpurm_block_cond_t *cond) +{ + /* unimplemented */ + (void)cond; + assert(0); +} + + +void starpurm_register_polling_service(const char *service_name, starpurm_polling_t function, void *data) +{ + /* unimplemented */ + (void)service_name; + (void)function; + (void)data; + assert(0); +} + +void starpurm_unregister_polling_service(const char *service_name, starpurm_polling_t function, void *data) +{ + /* unimplemented */ + (void)service_name; + (void)function; + (void)data; + assert(0); +} + +/* devices */ +int starpurm_get_device_type_id(const char *type_str) +{ + if (strcmp(type_str, "cpu") == 0) + return starpurm_unit_cpu; + if (strcmp(type_str, "opencl") == 0) + return starpurm_unit_opencl; + if (strcmp(type_str, "cuda") == 0) + return starpurm_unit_cuda; + return -1; +} + +const char *starpurm_get_device_type_name(int type_id) +{ + if (type_id == starpurm_unit_cpu) + return "cpu"; + if (type_id == starpurm_unit_opencl) + return "opencl"; + if (type_id == starpurm_unit_cuda) + return "cuda"; + return NULL; +} + +int starpurm_get_nb_devices_by_type(int type_id) +{ + assert(_starpurm != NULL); + assert(_starpurm->state != state_uninitialized); + struct s_starpurm *rm = _starpurm; + + if (type_id < 0 || type_id >= starpurm_unit_ntypes) + return -1; + return rm->nunits_by_type[type_id]; +} + +int starpurm_get_device_id(int type_id, int unit_rank) +{ + + assert(_starpurm != NULL); + assert(_starpurm->state != state_uninitialized); + struct s_starpurm *rm = _starpurm; + + if (type_id < 0 || type_id >= starpurm_unit_ntypes) + return -1; + if (unit_rank < 0 || unit_rank >= rm->nunits_by_type[type_id]) + return -1; + return rm->units[rm->unit_offsets_by_type[type_id] + unit_rank].id; +} + +starpurm_drs_ret_t starpurm_assign_device_to_starpu(starpurm_drs_desc_t *spd, int type_id, int unit_rank) +{ + (void)spd; + assert(_starpurm != NULL); + assert(_starpurm->state != state_uninitialized); + struct s_starpurm *rm = _starpurm; + if (!rm->dynamic_resource_sharing) + return starpurm_DRS_DISABLD; + if (type_id < 0 || type_id >= starpurm_unit_ntypes) + return starpurm_DRS_EINVAL; + if (unit_rank < 0 || unit_rank >= rm->nunits_by_type[type_id]) + return starpurm_DRS_EINVAL; + hwloc_cpuset_t temp_cpuset = hwloc_bitmap_dup(rm->selected_cpuset); + hwloc_bitmap_or(temp_cpuset, temp_cpuset, rm->units[rm->unit_offsets_by_type[type_id] + unit_rank].worker_cpuset); + starpurm_drs_ret_t ret = _starpurm_update_cpuset(temp_cpuset); + hwloc_bitmap_free(temp_cpuset); + return ret; +} + +starpurm_drs_ret_t starpurm_assign_devices_to_starpu(starpurm_drs_desc_t *spd, int type_id, int ndevices) +{ + (void)spd; + assert(_starpurm != NULL); + assert(_starpurm->state != state_uninitialized); + struct s_starpurm *rm = _starpurm; + if (!rm->dynamic_resource_sharing) + return starpurm_DRS_DISABLD; + if (type_id < 0 || type_id >= starpurm_unit_ntypes) + return starpurm_DRS_EINVAL; + hwloc_cpuset_t temp_cpuset = hwloc_bitmap_dup(rm->selected_cpuset); + if (ndevices > rm->nunits_by_type[type_id]) + { + ndevices = rm->nunits_by_type[type_id]; + } + int i; + for (i = 0; i < ndevices; i++) + { + hwloc_bitmap_or(temp_cpuset, temp_cpuset, rm->units[rm->unit_offsets_by_type[type_id] + i].worker_cpuset); + } + starpurm_drs_ret_t ret = _starpurm_update_cpuset(temp_cpuset); + hwloc_bitmap_free(temp_cpuset); + return ret; +} + +starpurm_drs_ret_t starpurm_assign_device_mask_to_starpu(starpurm_drs_desc_t *spd, const hwloc_cpuset_t mask) +{ + (void)spd; + assert(_starpurm != NULL); + assert(_starpurm->state != state_uninitialized); + struct s_starpurm *rm = _starpurm; + if (!rm->dynamic_resource_sharing) + return starpurm_DRS_DISABLD; + hwloc_cpuset_t temp_cpuset = hwloc_bitmap_dup(rm->selected_cpuset); + hwloc_bitmap_or(temp_cpuset, temp_cpuset, mask); + starpurm_drs_ret_t ret = _starpurm_update_cpuset(temp_cpuset); + hwloc_bitmap_free(temp_cpuset); + return ret; +} + +starpurm_drs_ret_t starpurm_assign_all_devices_to_starpu(starpurm_drs_desc_t *spd, int type_id) +{ + assert(_starpurm != NULL); + assert(_starpurm->state != state_uninitialized); + struct s_starpurm *rm = _starpurm; + if (!rm->dynamic_resource_sharing) + return starpurm_DRS_DISABLD; + if (type_id < 0 || type_id >= starpurm_unit_ntypes) + return starpurm_DRS_EINVAL; + return starpurm_assign_devices_to_starpu(spd, type_id, rm->nunits_by_type[type_id]); +} + +starpurm_drs_ret_t starpurm_withdraw_device_from_starpu(starpurm_drs_desc_t *spd, int type_id, int unit_rank) +{ + (void)spd; + assert(_starpurm != NULL); + assert(_starpurm->state != state_uninitialized); + struct s_starpurm *rm = _starpurm; + if (!rm->dynamic_resource_sharing) + return starpurm_DRS_DISABLD; + if (type_id < 0 || type_id >= starpurm_unit_ntypes) + return starpurm_DRS_EINVAL; + if (unit_rank < 0 || unit_rank >= rm->nunits_by_type[type_id]) + return starpurm_DRS_EINVAL; + hwloc_cpuset_t temp_cpuset = hwloc_bitmap_dup(rm->selected_cpuset); + hwloc_bitmap_andnot(temp_cpuset, temp_cpuset, rm->units[rm->unit_offsets_by_type[type_id] + unit_rank].worker_cpuset); + starpurm_drs_ret_t ret = _starpurm_update_cpuset(temp_cpuset); + hwloc_bitmap_free(temp_cpuset); + return ret; +} + +starpurm_drs_ret_t starpurm_withdraw_devices_from_starpu(starpurm_drs_desc_t *spd, int type_id, int ndevices) +{ + (void)spd; + assert(_starpurm != NULL); + assert(_starpurm->state != state_uninitialized); + struct s_starpurm *rm = _starpurm; + if (!rm->dynamic_resource_sharing) + return starpurm_DRS_DISABLD; + if (type_id < 0 || type_id >= starpurm_unit_ntypes) + return starpurm_DRS_EINVAL; + hwloc_cpuset_t temp_cpuset = hwloc_bitmap_dup(rm->selected_cpuset); + if (ndevices > rm->nunits_by_type[type_id]) + { + ndevices = rm->nunits_by_type[type_id]; + } + int i; + for (i = 0; i < ndevices; i++) + { + hwloc_bitmap_andnot(temp_cpuset, temp_cpuset, rm->units[rm->unit_offsets_by_type[type_id] + i].worker_cpuset); + } + starpurm_drs_ret_t ret = _starpurm_update_cpuset(temp_cpuset); + hwloc_bitmap_free(temp_cpuset); + return ret; +} + +starpurm_drs_ret_t starpurm_withdraw_device_mask_from_starpu(starpurm_drs_desc_t *spd, const hwloc_cpuset_t mask) +{ + (void)spd; + assert(_starpurm != NULL); + assert(_starpurm->state != state_uninitialized); + struct s_starpurm *rm = _starpurm; + if (!rm->dynamic_resource_sharing) + return starpurm_DRS_DISABLD; + hwloc_cpuset_t temp_cpuset = hwloc_bitmap_dup(rm->selected_cpuset); + hwloc_bitmap_andnot(temp_cpuset, temp_cpuset, mask); + starpurm_drs_ret_t ret = _starpurm_update_cpuset(temp_cpuset); + hwloc_bitmap_free(temp_cpuset); + return ret; +} + +starpurm_drs_ret_t starpurm_withdraw_all_devices_from_starpu(starpurm_drs_desc_t *spd, int type_id) +{ + assert(_starpurm != NULL); + assert(_starpurm->state != state_uninitialized); + struct s_starpurm *rm = _starpurm; + if (!rm->dynamic_resource_sharing) + return starpurm_DRS_DISABLD; + if (type_id < 0 || type_id >= starpurm_unit_ntypes) + return starpurm_DRS_EINVAL; + return starpurm_withdraw_devices_from_starpu(spd, type_id, rm->nunits_by_type[type_id]); +} + +/* --- */ + +starpurm_drs_ret_t starpurm_lend_device(starpurm_drs_desc_t *spd, int type_id, int unit_rank) +{ + return starpurm_assign_device_to_starpu(spd, type_id, unit_rank); +} + +starpurm_drs_ret_t starpurm_lend_devices(starpurm_drs_desc_t *spd, int type_id, int ndevices) +{ + return starpurm_assign_devices_to_starpu(spd, type_id, ndevices); +} + +starpurm_drs_ret_t starpurm_lend_device_mask(starpurm_drs_desc_t *spd, const hwloc_cpuset_t mask) +{ + return starpurm_assign_device_mask_to_starpu(spd, mask); +} + +starpurm_drs_ret_t starpurm_lend_all_devices(starpurm_drs_desc_t *spd, int type_id) +{ + return starpurm_assign_all_devices_to_starpu(spd, type_id); +} + + +starpurm_drs_ret_t starpurm_reclaim_device(starpurm_drs_desc_t *spd, int type_id, int unit_rank) +{ + return starpurm_withdraw_device_from_starpu(spd, type_id, unit_rank); +} + +starpurm_drs_ret_t starpurm_reclaim_devices(starpurm_drs_desc_t *spd, int type_id, int ndevices) +{ + return starpurm_withdraw_devices_from_starpu(spd, type_id, ndevices); +} + +starpurm_drs_ret_t starpurm_reclaim_device_mask(starpurm_drs_desc_t *spd, const hwloc_cpuset_t mask) +{ + return starpurm_withdraw_device_mask_from_starpu(spd, mask); +} + +starpurm_drs_ret_t starpurm_reclaim_all_devices(starpurm_drs_desc_t *spd, int type_id) +{ + return starpurm_withdraw_all_devices_from_starpu(spd, type_id); +} + +starpurm_drs_ret_t starpurm_acquire_all_devices(starpurm_drs_desc_t *spd, int type_id) +{ + return starpurm_withdraw_all_devices_from_starpu(spd, type_id); +} + +starpurm_drs_ret_t starpurm_acquire_device(starpurm_drs_desc_t *spd, int type_id, int unit_rank) +{ + return starpurm_withdraw_device_from_starpu(spd, type_id, unit_rank); +} + +starpurm_drs_ret_t starpurm_acquire_devices(starpurm_drs_desc_t *spd, int type_id, int ndevices) +{ + return starpurm_withdraw_devices_from_starpu(spd, type_id, ndevices); +} + +starpurm_drs_ret_t starpurm_acquire_device_mask(starpurm_drs_desc_t *spd, const hwloc_cpuset_t mask) +{ + return starpurm_withdraw_device_mask_from_starpu(spd, mask); +} + + +starpurm_drs_ret_t starpurm_return_all_devices(starpurm_drs_desc_t *spd, int type_id) +{ + return starpurm_assign_all_devices_to_starpu(spd, type_id); +} + +starpurm_drs_ret_t starpurm_return_device(starpurm_drs_desc_t *spd, int type_id, int unit_rank) +{ + return starpurm_assign_device_to_starpu(spd, type_id, unit_rank); +} + +/* cpusets */ +hwloc_cpuset_t starpurm_get_device_worker_cpuset(int type_id, int unit_rank) +{ + assert(_starpurm != NULL); + assert(_starpurm->state != state_uninitialized); + struct s_starpurm *rm = _starpurm; + + assert(type_id >= 0 && type_id < starpurm_unit_ntypes); + assert(unit_rank >= 0 && unit_rank < rm->nunits_by_type[type_id]); + return hwloc_bitmap_dup(rm->units[rm->unit_offsets_by_type[type_id] + unit_rank].worker_cpuset); +} + +hwloc_cpuset_t starpurm_get_global_cpuset(void) +{ + assert(_starpurm != NULL); + assert(_starpurm->state != state_uninitialized); + struct s_starpurm *rm = _starpurm; + + return hwloc_bitmap_dup(rm->global_cpuset); +} + +hwloc_cpuset_t starpurm_get_selected_cpuset(void) +{ + assert(_starpurm != NULL); + assert(_starpurm->state != state_uninitialized); + struct s_starpurm *rm = _starpurm; + + return hwloc_bitmap_dup(rm->selected_cpuset); +} + +hwloc_cpuset_t starpurm_get_all_cpu_workers_cpuset(void) +{ + assert(_starpurm != NULL); + assert(_starpurm->state != state_uninitialized); + struct s_starpurm *rm = _starpurm; + + return hwloc_bitmap_dup(rm->all_cpu_workers_cpuset); +} + +static hwloc_cpuset_t starpurm_get_all_opencl_device_workers_cpuset(void) +{ + assert(_starpurm != NULL); + assert(_starpurm->state != state_uninitialized); + struct s_starpurm *rm = _starpurm; + + return hwloc_bitmap_dup(rm->all_opencl_device_workers_cpuset); +} + +static hwloc_cpuset_t starpurm_get_all_cuda_device_workers_cpuset(void) +{ + assert(_starpurm != NULL); + assert(_starpurm->state != state_uninitialized); + struct s_starpurm *rm = _starpurm; + + return hwloc_bitmap_dup(rm->all_cuda_device_workers_cpuset); +} + +hwloc_cpuset_t starpurm_get_all_device_workers_cpuset(void) +{ + assert(_starpurm != NULL); + assert(_starpurm->state != state_uninitialized); + struct s_starpurm *rm = _starpurm; + + return hwloc_bitmap_dup(rm->all_device_workers_cpuset); +} + +hwloc_cpuset_t starpurm_get_all_device_workers_cpuset_by_type(int typeid) +{ + assert(_starpurm != NULL); + assert(_starpurm->state != state_uninitialized); + assert(typeid != starpurm_unit_cpu); + if (typeid == starpurm_unit_opencl) + return starpurm_get_all_opencl_device_workers_cpuset(); + if (typeid == starpurm_unit_cuda) + return starpurm_get_all_cuda_device_workers_cpuset(); + hwloc_cpuset_t empty_bitmap = hwloc_bitmap_alloc(); + hwloc_bitmap_zero(empty_bitmap); + return empty_bitmap; +} diff --git a/starpurm/src/starpurm_dlb.c b/starpurm/src/starpurm_dlb.c new file mode 100644 index 0000000..5bca401 --- /dev/null +++ b/starpurm/src/starpurm_dlb.c @@ -0,0 +1,397 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2017-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* CPUSET routines */ +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif +#include + +#include +#include +#include +#include +#include + +#include +#ifdef HAVE_HWLOC_GLIBC_SCHED_H +#include +#endif +#include +#include +#include +#include + +#ifndef STARPURM_HAVE_DLB +#error "STARPU-RM DLB support not enabled" +#endif + +#include +#include + +/* + * DLB interfacing + */ + +static dlb_handler_t dlb_handle; +static cpu_set_t starpurm_process_mask; +static hwloc_cpuset_t starpurm_process_cpuset; +static struct s_starpurm *_starpurm = NULL; +static pthread_mutex_t dlb_handle_mutex = PTHREAD_MUTEX_INITIALIZER; +static int glibc_cpuid_to_unitid[CPU_SETSIZE]; +static int *unitid_to_glibc_cpuid = NULL; + +static const char * _dlb_error_str(int error_code) +{ + const char *s = NULL; + switch (error_code) + { + case DLB_NOUPDT: + s="DLB_NOUPDT"; + break; + case DLB_NOTED: + s="DLB_NOTED"; + break; + case DLB_SUCCESS: + s="DLB_SUCCESS"; + break; + case DLB_ERR_UNKNOWN: + s="DLB_ERR_UNKNOWN"; + break; + case DLB_ERR_NOINIT: + s="DLB_ERR_NOINIT"; + break; + case DLB_ERR_INIT: + s="DLB_ERR_INIT"; + break; + case DLB_ERR_DISBLD: + s="DLB_ERR_DISBLD"; + break; + case DLB_ERR_NOSHMEM: + s="DLB_ERR_NOSHMEM"; + break; + case DLB_ERR_NOPROC: + s="DLB_ERR_NOPROC"; + break; + case DLB_ERR_PDIRTY: + s="DLB_ERR_PDIRTY"; + break; + case DLB_ERR_PERM: + s="DLB_ERR_PERM"; + break; + case DLB_ERR_TIMEOUT: + s="DLB_ERR_TIMEOUT"; + break; + case DLB_ERR_NOCBK: + s="DLB_ERR_NOCBK"; + break; + case DLB_ERR_NOENT: + s="DLB_ERR_NOENT"; + break; + case DLB_ERR_NOCOMP: + s="DLB_ERR_NOCOMP"; + break; + case DLB_ERR_REQST: + s="DLB_ERR_REQST"; + break; + case DLB_ERR_NOMEM: + s="DLB_ERR_NOMEM"; + break; + case DLB_ERR_NOPOL: + s="DLB_ERR_NOPOL"; + break; + + default: + s = ""; + break; + } + return s; +} + +#define _dlb_check(s,r) do { if ((r) != DLB_SUCCESS) {fprintf(stderr, "%s:%d, %s - DLB call '%s' %s %d (%s)\n",__FILE__, __LINE__, __func__, (s), (r)>0?"returned warning code":"failed with error code", (r), _dlb_error_str((r))); assert(dlb_ret >= DLB_SUCCESS); }} while (0) + +#if 0 +/* unused for now */ +static void _glibc_cpuset_to_hwloc_cpuset(const cpu_set_t *glibc_cpuset, hwloc_cpuset_t *hwloc_cpuset) +{ + assert(_starpurm != NULL); + assert(_starpurm->state != state_uninitialized); + struct s_starpurm *rm = _starpurm; + int status = hwloc_cpuset_from_glibc_sched_affinity(rm->topology, *hwloc_cpuset, glibc_cpuset, sizeof(cpu_set_t)); + assert(status == 0); +} +#endif + +static void _hwloc_cpuset_to_glibc_cpuset(const hwloc_cpuset_t hwloc_cpuset, cpu_set_t *glibc_cpuset) +{ + assert(_starpurm != NULL); + assert(_starpurm->state != state_uninitialized); + struct s_starpurm *rm = _starpurm; + int status = hwloc_cpuset_to_glibc_sched_affinity(rm->topology, hwloc_cpuset, glibc_cpuset, sizeof(cpu_set_t)); + assert(status == 0); +} + +int starpurm_dlb_notify_starpu_worker_mask_going_to_sleep(const hwloc_cpuset_t hwloc_workers_cpuset) +{ + int status = 0; + pthread_mutex_lock(&dlb_handle_mutex); + if (dlb_handle != NULL) + { + hwloc_cpuset_t hwloc_to_lend_cpuset = hwloc_bitmap_alloc(); + hwloc_cpuset_t hwloc_to_return_cpuset = hwloc_bitmap_alloc(); + hwloc_bitmap_zero(hwloc_to_lend_cpuset); + hwloc_bitmap_zero(hwloc_to_return_cpuset); + hwloc_bitmap_and(hwloc_to_lend_cpuset, hwloc_workers_cpuset, starpurm_process_cpuset); + hwloc_bitmap_andnot(hwloc_to_return_cpuset, hwloc_workers_cpuset, starpurm_process_cpuset); +#ifdef STARPURM_DLB_VERBOSE + { + char * s_to_lend = NULL; + char * s_to_return = NULL; + hwloc_bitmap_asprintf(&s_to_lend, hwloc_to_lend_cpuset); + hwloc_bitmap_asprintf(&s_to_return, hwloc_to_return_cpuset); + fprintf(stderr, "%s: to_lend='%s', to_return='%s'\n", __func__, s_to_lend, s_to_return); + free(s_to_lend); + free(s_to_return); + } +#endif + if (!hwloc_bitmap_iszero(hwloc_to_lend_cpuset)) + { + cpu_set_t glibc_to_lend_cpuset; + CPU_ZERO(&glibc_to_lend_cpuset); + _hwloc_cpuset_to_glibc_cpuset(hwloc_to_lend_cpuset, &glibc_to_lend_cpuset); + int dlb_ret = DLB_LendCpuMask_sp(dlb_handle, &glibc_to_lend_cpuset); + _dlb_check("DLB_LendCpuMask_sp", dlb_ret); + } + if (!hwloc_bitmap_iszero(hwloc_to_return_cpuset)) + { + cpu_set_t glibc_to_return_cpuset; + CPU_ZERO(&glibc_to_return_cpuset); + _hwloc_cpuset_to_glibc_cpuset(hwloc_to_return_cpuset, &glibc_to_return_cpuset); + /* Use DLB_Lend for returning borrowed units. DLB_Return seems to require that + * a reclaim has previously been emitted by the unit owning runtime system */ +#if 0 + int dlb_ret = DLB_ReturnCpuMask_sp(dlb_handle, &glibc_to_return_cpuset); + _dlb_check("DLB_ReturnCpuMask_sp", dlb_ret); +#else + int dlb_ret = DLB_LendCpuMask_sp(dlb_handle, &glibc_to_return_cpuset); + _dlb_check("DLB_LendCpuMask_sp", dlb_ret); +#endif + } + hwloc_bitmap_free(hwloc_to_lend_cpuset); + hwloc_bitmap_free(hwloc_to_return_cpuset); + status = 1; + } + pthread_mutex_unlock(&dlb_handle_mutex); + return status; +} + +int starpurm_dlb_notify_starpu_worker_mask_waking_up(const hwloc_cpuset_t hwloc_workers_cpuset) +{ + int status = 0; + pthread_mutex_lock(&dlb_handle_mutex); + if (dlb_handle != NULL) + { + hwloc_cpuset_t hwloc_to_reclaim_cpuset = hwloc_bitmap_alloc(); + hwloc_cpuset_t hwloc_to_borrow_cpuset = hwloc_bitmap_alloc(); + hwloc_bitmap_zero(hwloc_to_reclaim_cpuset); + hwloc_bitmap_zero(hwloc_to_borrow_cpuset); + hwloc_bitmap_and(hwloc_to_reclaim_cpuset, hwloc_workers_cpuset, starpurm_process_cpuset); + hwloc_bitmap_andnot(hwloc_to_borrow_cpuset, hwloc_workers_cpuset, starpurm_process_cpuset); +#ifdef STARPURM_DLB_VERBOSE + { + char * s_to_reclaim = NULL; + char * s_to_borrow = NULL; + hwloc_bitmap_asprintf(&s_to_reclaim, hwloc_to_reclaim_cpuset); + hwloc_bitmap_asprintf(&s_to_borrow, hwloc_to_borrow_cpuset); + fprintf(stderr, "%s: to_reclaim='%s', to_borrow='%s'\n", __func__, s_to_reclaim, s_to_borrow); + free(s_to_reclaim); + free(s_to_borrow); + } +#endif + if (!hwloc_bitmap_iszero(hwloc_to_reclaim_cpuset)) + { + cpu_set_t glibc_to_reclaim_cpuset; + CPU_ZERO(&glibc_to_reclaim_cpuset); + _hwloc_cpuset_to_glibc_cpuset(hwloc_to_reclaim_cpuset, &glibc_to_reclaim_cpuset); + int dlb_ret = DLB_ReclaimCpuMask_sp(dlb_handle, &glibc_to_reclaim_cpuset); + _dlb_check("DLB_ReclaimCpuMask_sp", dlb_ret); + } + if (!hwloc_bitmap_iszero(hwloc_to_borrow_cpuset)) + { + cpu_set_t glibc_to_borrow_cpuset; + CPU_ZERO(&glibc_to_borrow_cpuset); + _hwloc_cpuset_to_glibc_cpuset(hwloc_to_borrow_cpuset, &glibc_to_borrow_cpuset); + int dlb_ret = DLB_BorrowCpuMask_sp(dlb_handle, &glibc_to_borrow_cpuset); + _dlb_check("DLB_BorrowCpuMask_sp", dlb_ret); + } + hwloc_bitmap_free(hwloc_to_reclaim_cpuset); + hwloc_bitmap_free(hwloc_to_borrow_cpuset); + status = 1; + } + pthread_mutex_unlock(&dlb_handle_mutex); + return status; +} + +#ifdef STARPURM_STARPU_HAVE_WORKER_CALLBACKS +#ifdef STARPURM_HAVE_DLB_CALLBACK_ARG +static void _dlb_callback_enable_cpu(int cpuid, void *arg) +#else +static void _dlb_callback_enable_cpu(int cpuid) +#endif +{ +#ifdef STARPURM_HAVE_DLB_CALLBACK_ARG + (void) arg; +#endif + int unitid = glibc_cpuid_to_unitid[cpuid]; +#ifdef STARPURM_DLB_VERBOSE + fprintf(stderr, "%s: cpuid=%d, unitid=%d\n", __func__, cpuid, unitid); +#endif + if (unitid != -1) + { + starpurm_enqueue_event_cpu_unit_available(unitid); + } +} + +#ifdef STARPURM_HAVE_DLB_CALLBACK_ARG +static void _dlb_callback_disable_cpu(int cpuid, void *arg) +#else +static void _dlb_callback_disable_cpu(int cpuid) +#endif +{ +#ifdef STARPURM_HAVE_DLB_CALLBACK_ARG + (void) arg; +#endif + int unitid = glibc_cpuid_to_unitid[cpuid]; +#ifdef STARPURM_DLB_VERBOSE + fprintf(stderr, "%s: cpuid=%d, unitid=%d\n", __func__, cpuid, unitid); +#endif + if (unitid != -1) + { + /* nothing */ + } +} +#endif + +void starpurm_dlb_init(struct s_starpurm *rm) +{ + _starpurm = rm; + + { + int unitid; + int cpuid; + unitid_to_glibc_cpuid = malloc(rm->nunits * sizeof(*unitid_to_glibc_cpuid)); + for (cpuid = 0; cpuidnunits; unitid++) + { + hwloc_cpuset_t unit_cpuset = starpurm_get_unit_cpuset(unitid); + cpu_set_t unit_mask; + CPU_ZERO(&unit_mask); + _hwloc_cpuset_to_glibc_cpuset(unit_cpuset, &unit_mask); + unitid_to_glibc_cpuid[unitid] = -1; + for (cpuid = 0; cpuidselected_cpuset); + hwloc_bitmap_and(starpurm_process_cpuset, starpurm_process_cpuset, rm->initially_owned_cpuset_mask); + _hwloc_cpuset_to_glibc_cpuset(starpurm_process_cpuset, &starpurm_process_mask); +#ifdef STARPURM_DLB_VERBOSE + { + char * s_reachable = NULL; + char * s_initially_owned = NULL; + hwloc_bitmap_asprintf(&s_reachable, rm->selected_cpuset); + hwloc_bitmap_asprintf(&s_initially_owned, starpurm_process_cpuset); + fprintf(stderr, "%s: StarPU reachable units='%s', StarPU initially owned units='%s'\n", __func__, s_reachable, s_initially_owned); + free(s_reachable); + free(s_initially_owned); + } +#endif + + pthread_mutex_lock(&dlb_handle_mutex); + + /* TODO: autodetect DLB policy according to DLB version */ +#if 1 + dlb_handle = DLB_Init_sp(0, &starpurm_process_mask, "--lewi=yes --drom=no --mode=async"); +#else + dlb_handle = DLB_Init_sp(0, &starpurm_process_mask, "--policy=new --drom=no --mode=async"); +#endif + + /* cpu-based callbacks are mutually exclusive with mask-based callbacks, + * we only register cpu-based callbacks */ + int dlb_ret; +#ifdef STARPURM_STARPU_HAVE_WORKER_CALLBACKS +#ifdef STARPURM_HAVE_DLB_CALLBACK_ARG + dlb_ret = DLB_CallbackSet_sp(dlb_handle, dlb_callback_disable_cpu, (dlb_callback_t)_dlb_callback_disable_cpu, NULL); + _dlb_check("DLB_CallbackSet_sp", dlb_ret); + dlb_ret = DLB_CallbackSet_sp(dlb_handle, dlb_callback_enable_cpu, (dlb_callback_t)_dlb_callback_enable_cpu, NULL); + _dlb_check("DLB_CallbackSet_sp", dlb_ret); +#else + dlb_ret = DLB_CallbackSet_sp(dlb_handle, dlb_callback_disable_cpu, (dlb_callback_t)_dlb_callback_disable_cpu); + _dlb_check("DLB_CallbackSet_sp", dlb_ret); + dlb_ret = DLB_CallbackSet_sp(dlb_handle, dlb_callback_enable_cpu, (dlb_callback_t)_dlb_callback_enable_cpu); + _dlb_check("DLB_CallbackSet_sp", dlb_ret); +#endif +#endif + + dlb_ret = DLB_Enable_sp(dlb_handle); + _dlb_check("DLB_Enable_sp", dlb_ret); + pthread_mutex_unlock(&dlb_handle_mutex); + +} + +void starpurm_dlb_exit(void) +{ + pthread_mutex_lock(&dlb_handle_mutex); + dlb_handler_t dlb_handle_save = dlb_handle; + dlb_handle = 0; + pthread_mutex_unlock(&dlb_handle_mutex); + + /* lend every resources that StarPU may still have */ + DLB_Lend_sp(dlb_handle_save); + DLB_Return_sp(dlb_handle_save); + + pthread_mutex_lock(&dlb_handle_mutex); + DLB_Disable_sp(dlb_handle_save); + DLB_Finalize_sp(dlb_handle_save); + hwloc_bitmap_free(starpurm_process_cpuset); + free(unitid_to_glibc_cpuid); + pthread_mutex_unlock(&dlb_handle_mutex); +} diff --git a/starpurm/src/starpurm_private.h b/starpurm/src/starpurm_private.h new file mode 100644 index 0000000..c140a61 --- /dev/null +++ b/starpurm/src/starpurm_private.h @@ -0,0 +1,136 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2017-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __STARPURM_PRIVATE_H +#define __STARPURM_PRIVATE_H + +/** @file */ + +enum e_state +{ + state_uninitialized = 0, + state_init +}; + +enum e_starpurm_unit_type +{ + starpurm_unit_cpu = 0, + starpurm_unit_opencl = 1, + starpurm_unit_cuda = 2, + starpurm_unit_ntypes = 3 +}; + +struct s_starpurm +{ + /** Machine topology as detected by hwloc. */ + hwloc_topology_t topology; + + /** Current upper bound on the number of CPU cores selectable for computing with the runtime system. */ + int max_ncpus; + + /** Number of currently selected CPU workers */ + int selected_ncpus; + + /** Number of currently selected workers (CPU+devices) */ + int selected_nworkers; + + /** Initialization state of the RM instance. */ + int state; + + /** Boolean indicating the state of the dynamic resource sharing layer. + * + * !0 indicates that dynamic resource sharing is enabled. + * 0 indicates that dynamic resource sharing is disabled. + */ + int dynamic_resource_sharing; + + /** Id of the StarPU's sched_ctx used by the RM instance. */ + unsigned sched_ctx_id; + + /** Number of unit types supported by this RM instance. */ + int unit_ntypes; + + /** Number of units available for each type. */ + int *nunits_by_type; + + /** Number of units. */ + int nunits; + + /** Offset of unit numbering for each type. */ + int *unit_offsets_by_type; + + /** Array of units. */ + struct s_starpurm_unit *units; + + /** Cpuset of all the StarPU's workers (CPU+devices. */ + hwloc_cpuset_t global_cpuset; + + /** Cpuset of all StarPU CPU workers. */ + hwloc_cpuset_t all_cpu_workers_cpuset; + + /** Cpuset of all StarPU OpenCL workers. */ + hwloc_cpuset_t all_opencl_device_workers_cpuset; + + /** Cpuset of all StarPU CUDA workers. */ + hwloc_cpuset_t all_cuda_device_workers_cpuset; + + /** Cpuset of all StarPU device workers. */ + hwloc_cpuset_t all_device_workers_cpuset; + + /** Cpuset of all selected workers (CPU+devices). */ + hwloc_cpuset_t selected_cpuset; + + /** Cpuset mask of initially owned cpuset or full if not used. */ + hwloc_cpuset_t initially_owned_cpuset_mask; + + /** maximum value among worker ids */ + int max_worker_id; + + /** worker id to unit id table */ + int *worker_unit_ids; + + /** Temporary contexts accounting. */ + unsigned int max_temporary_ctxs; + unsigned int avail_temporary_ctxs; + starpu_pthread_mutex_t temporary_ctxs_mutex; + starpu_pthread_cond_t temporary_ctxs_cond; + + /** Global StarPU pause state */ + int starpu_in_pause; + + /** Event list. */ + pthread_t event_thread; + starpu_pthread_mutex_t event_list_mutex; + starpu_pthread_cond_t event_list_cond; + starpu_pthread_cond_t event_processing_cond; + int event_processing_enabled; + int event_processing_ended; + struct s_starpurm_event *event_list_head; + struct s_starpurm_event *event_list_tail; +}; + + +#ifdef STARPURM_HAVE_DLB +void starpurm_dlb_init(struct s_starpurm *rm); +void starpurm_dlb_exit(void); +int starpurm_dlb_notify_starpu_worker_mask_going_to_sleep(const hwloc_cpuset_t hwloc_workers_cpuset); +int starpurm_dlb_notify_starpu_worker_mask_waking_up(const hwloc_cpuset_t hwloc_workers_cpuset); +#ifdef STARPURM_STARPU_HAVE_WORKER_CALLBACKS +void starpurm_enqueue_event_cpu_unit_available(int cpuid); +#endif +#endif + +#endif /* __STARPURM_PRIVATE_H */ diff --git a/starpurm/tests/01_init_exit.c b/starpurm/tests/01_init_exit.c new file mode 100644 index 0000000..49d3c55 --- /dev/null +++ b/starpurm/tests/01_init_exit.c @@ -0,0 +1,29 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* This example tests the proper initialization and shutdown of StarPURM. */ + +#include +#include + +int main(int argc, char *argv[]) +{ + (void)argc; + (void)argv; + starpurm_initialize(); + starpurm_shutdown(); + return 0; +} diff --git a/starpurm/tests/02_list_units.c b/starpurm/tests/02_list_units.c new file mode 100644 index 0000000..69c8646 --- /dev/null +++ b/starpurm/tests/02_list_units.c @@ -0,0 +1,54 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2017-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* This example lists the CPU and device units detected and managed by + * StarPURM. */ + +#include +#include + +int main(int argc, char *argv[]) +{ + (void)argc; + (void)argv; + int ids[3]; + int i; + starpurm_initialize(); + ids[0] = starpurm_get_device_type_id("cpu"); + ids[1] = starpurm_get_device_type_id("opencl"); + ids[2] = starpurm_get_device_type_id("cuda"); + + for (i=0; i<3; i++) + { + const int id = ids[i]; + if (id == -1) + continue; + const int nb_units = starpurm_get_nb_devices_by_type(id); + printf("%s: %d units\n", starpurm_get_device_type_name(id), nb_units); + int j; + for (j=0; j +#include +#include + +static void disp_cpuset(const char * name, hwloc_cpuset_t cpuset) +{ + int strl = hwloc_bitmap_snprintf(NULL, 0, cpuset); + char str[strl+1]; + hwloc_bitmap_snprintf(str, strl+1, cpuset); + printf(". %s: %s\n", name, str); +} + +int main(int argc, char *argv[]) +{ + (void)argc; + (void)argv; + starpurm_initialize(); + int cpu_id = starpurm_get_device_type_id("cpu"); + const int nb_cpu_units = starpurm_get_nb_devices_by_type(cpu_id); + if (nb_cpu_units < 1) + { + starpurm_shutdown(); + return 77; + } + hwloc_cpuset_t cpuset; + cpuset = starpurm_get_device_worker_cpuset(cpu_id, 0); + disp_cpuset("worker cpuset", cpuset); + hwloc_bitmap_free(cpuset); + + cpuset = starpurm_get_global_cpuset(); + disp_cpuset("global cpuset", cpuset); + hwloc_bitmap_free(cpuset); + + cpuset = starpurm_get_selected_cpuset(); + disp_cpuset("selected cpuset", cpuset); + hwloc_bitmap_free(cpuset); + + cpuset = starpurm_get_all_cpu_workers_cpuset(); + disp_cpuset("all cpu workers cpuset", cpuset); + hwloc_bitmap_free(cpuset); + + cpuset = starpurm_get_all_device_workers_cpuset(); + disp_cpuset("all device workers cpuset", cpuset); + hwloc_bitmap_free(cpuset); + + starpurm_shutdown(); + + return 0; +} diff --git a/starpurm/tests/04_drs_enable.c b/starpurm/tests/04_drs_enable.c new file mode 100644 index 0000000..99c934e --- /dev/null +++ b/starpurm/tests/04_drs_enable.c @@ -0,0 +1,40 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2017-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* This example tests the proper initialization and shutdown of StarPURM. */ + +#include +#include + +int main(int argc, char *argv[]) +{ + (void)argc; + (void)argv; + int drs_enabled; + starpurm_initialize(); + drs_enabled = starpurm_drs_enabled_p(); + printf("drs enabled at startup: %d\n", drs_enabled); + + starpurm_set_drs_enable(NULL); + drs_enabled = starpurm_drs_enabled_p(); + printf("drs state after explicit enable: %d\n", drs_enabled); + + starpurm_set_drs_disable(NULL); + drs_enabled = starpurm_drs_enabled_p(); + printf("drs state after explicit disable: %d\n", drs_enabled); + starpurm_shutdown(); + return 0; +} diff --git a/starpurm/tests/Makefile.am b/starpurm/tests/Makefile.am new file mode 100644 index 0000000..f39feea --- /dev/null +++ b/starpurm/tests/Makefile.am @@ -0,0 +1,39 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2017-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +include $(top_srcdir)/make/starpu-tests.mk +SUBDIRS = + +CLEANFILES = *.gcno *.gcda *.linkinfo + +AM_CFLAGS += $(DLB_CFLAGS) +AM_CPPFLAGS = -I$(top_srcdir)/include -I$(top_srcdir)/src -I$(top_builddir)/src -I$(top_builddir)/include +AM_CPPFLAGS += -I$(top_srcdir)/starpurm/include -I$(top_srcdir)/starpurm/src -I$(top_builddir)/starpurm/src -I$(top_builddir)/starpurm/include $(STARPU_H_CPPFLAGS) +AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@ +LIBS += $(top_builddir)/starpurm/src/libstarpurm-@STARPU_EFFECTIVE_VERSION@.la $(top_builddir)/src/@LIBSTARPU_LINK@ $(STARPU_EXPORTED_LIBS) +LIBS += $(HWLOC_LIBS) $(DLB_LIBS) + +noinst_PROGRAMS = +noinst_PROGRAMS += $(myPROGRAMS) + +check_PROGRAMS = $(myPROGRAMS) + +TESTS = $(myPROGRAMS) + +myPROGRAMS = +myPROGRAMS += 01_init_exit +myPROGRAMS += 02_list_units +myPROGRAMS += 03_cpusets +myPROGRAMS += 04_drs_enable diff --git a/starpurm/tests/Makefile.in b/starpurm/tests/Makefile.in new file mode 100644 index 0000000..ba46753 --- /dev/null +++ b/starpurm/tests/Makefile.in @@ -0,0 +1,1540 @@ +# Makefile.in generated by automake 1.16.5 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2021 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +target_triplet = @target@ +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@am__append_1 = --compiler-options -fno-strict-aliasing -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ $(STARPU_NVCC_H_CPPFLAGS) +@STARPU_USE_HIP_TRUE@am__append_2 = -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ +noinst_PROGRAMS = $(am__EXEEXT_1) +check_PROGRAMS = $(am__EXEEXT_1) +TESTS = $(am__EXEEXT_1) +subdir = starpurm/tests +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ + $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ + $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ + $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/src/common/config.h \ + $(top_builddir)/src/common/config-src-build.h \ + $(top_builddir)/include/starpu_config.h \ + $(top_builddir)/starpurm/include/starpurm_config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +am__EXEEXT_1 = 01_init_exit$(EXEEXT) 02_list_units$(EXEEXT) \ + 03_cpusets$(EXEEXT) 04_drs_enable$(EXEEXT) +PROGRAMS = $(noinst_PROGRAMS) +01_init_exit_SOURCES = 01_init_exit.c +01_init_exit_OBJECTS = 01_init_exit.$(OBJEXT) +01_init_exit_LDADD = $(LDADD) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +02_list_units_SOURCES = 02_list_units.c +02_list_units_OBJECTS = 02_list_units.$(OBJEXT) +02_list_units_LDADD = $(LDADD) +03_cpusets_SOURCES = 03_cpusets.c +03_cpusets_OBJECTS = 03_cpusets.$(OBJEXT) +03_cpusets_LDADD = $(LDADD) +04_drs_enable_SOURCES = 04_drs_enable.c +04_drs_enable_OBJECTS = 04_drs_enable.$(OBJEXT) +04_drs_enable_LDADD = $(LDADD) +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)/src/common -I$(top_builddir)/include -I$(top_builddir)/starpurm/include +depcomp = $(SHELL) $(top_srcdir)/build-aux/depcomp +am__maybe_remake_depfiles = depfiles +am__depfiles_remade = ./$(DEPDIR)/01_init_exit.Po \ + ./$(DEPDIR)/02_list_units.Po ./$(DEPDIR)/03_cpusets.Po \ + ./$(DEPDIR)/04_drs_enable.Po +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = 01_init_exit.c 02_list_units.c 03_cpusets.c 04_drs_enable.c +DIST_SOURCES = 01_init_exit.c 02_list_units.c 03_cpusets.c \ + 04_drs_enable.c +RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \ + ctags-recursive dvi-recursive html-recursive info-recursive \ + install-data-recursive install-dvi-recursive \ + install-exec-recursive install-html-recursive \ + install-info-recursive install-pdf-recursive \ + install-ps-recursive install-recursive installcheck-recursive \ + installdirs-recursive pdf-recursive ps-recursive \ + tags-recursive uninstall-recursive +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ + distclean-recursive maintainer-clean-recursive +am__recursive_targets = \ + $(RECURSIVE_TARGETS) \ + $(RECURSIVE_CLEAN_TARGETS) \ + $(am__extra_recursive_targets) +AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \ + check recheck distdir distdir-am +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +am__tty_colors_dummy = \ + mgn= red= grn= lgn= blu= brg= std=; \ + am__color_tests=no +am__tty_colors = { \ + $(am__tty_colors_dummy); \ + if test "X$(AM_COLOR_TESTS)" = Xno; then \ + am__color_tests=no; \ + elif test "X$(AM_COLOR_TESTS)" = Xalways; then \ + am__color_tests=yes; \ + elif test "X$$TERM" != Xdumb && { test -t 1; } 2>/dev/null; then \ + am__color_tests=yes; \ + fi; \ + if test $$am__color_tests = yes; then \ + red=''; \ + grn=''; \ + lgn=''; \ + blu=''; \ + mgn=''; \ + brg=''; \ + std=''; \ + fi; \ +} +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__recheck_rx = ^[ ]*:recheck:[ ]* +am__global_test_result_rx = ^[ ]*:global-test-result:[ ]* +am__copy_in_global_log_rx = ^[ ]*:copy-in-global-log:[ ]* +# A command that, given a newline-separated list of test names on the +# standard input, print the name of the tests that are to be re-run +# upon "make recheck". +am__list_recheck_tests = $(AWK) '{ \ + recheck = 1; \ + while ((rc = (getline line < ($$0 ".trs"))) != 0) \ + { \ + if (rc < 0) \ + { \ + if ((getline line2 < ($$0 ".log")) < 0) \ + recheck = 0; \ + break; \ + } \ + else if (line ~ /$(am__recheck_rx)[nN][Oo]/) \ + { \ + recheck = 0; \ + break; \ + } \ + else if (line ~ /$(am__recheck_rx)[yY][eE][sS]/) \ + { \ + break; \ + } \ + }; \ + if (recheck) \ + print $$0; \ + close ($$0 ".trs"); \ + close ($$0 ".log"); \ +}' +# A command that, given a newline-separated list of test names on the +# standard input, create the global log from their .trs and .log files. +am__create_global_log = $(AWK) ' \ +function fatal(msg) \ +{ \ + print "fatal: making $@: " msg | "cat >&2"; \ + exit 1; \ +} \ +function rst_section(header) \ +{ \ + print header; \ + len = length(header); \ + for (i = 1; i <= len; i = i + 1) \ + printf "="; \ + printf "\n\n"; \ +} \ +{ \ + copy_in_global_log = 1; \ + global_test_result = "RUN"; \ + while ((rc = (getline line < ($$0 ".trs"))) != 0) \ + { \ + if (rc < 0) \ + fatal("failed to read from " $$0 ".trs"); \ + if (line ~ /$(am__global_test_result_rx)/) \ + { \ + sub("$(am__global_test_result_rx)", "", line); \ + sub("[ ]*$$", "", line); \ + global_test_result = line; \ + } \ + else if (line ~ /$(am__copy_in_global_log_rx)[nN][oO]/) \ + copy_in_global_log = 0; \ + }; \ + if (copy_in_global_log) \ + { \ + rst_section(global_test_result ": " $$0); \ + while ((rc = (getline line < ($$0 ".log"))) != 0) \ + { \ + if (rc < 0) \ + fatal("failed to read from " $$0 ".log"); \ + print line; \ + }; \ + printf "\n"; \ + }; \ + close ($$0 ".trs"); \ + close ($$0 ".log"); \ +}' +# Restructured Text title. +am__rst_title = { sed 's/.*/ & /;h;s/./=/g;p;x;s/ *$$//;p;g' && echo; } +# Solaris 10 'make', and several other traditional 'make' implementations, +# pass "-e" to $(SHELL), and POSIX 2008 even requires this. Work around it +# by disabling -e (using the XSI extension "set +e") if it's set. +am__sh_e_setup = case $$- in *e*) set +e;; esac +# Default flags passed to test drivers. +am__common_driver_flags = \ + --color-tests "$$am__color_tests" \ + --enable-hard-errors "$$am__enable_hard_errors" \ + --expect-failure "$$am__expect_failure" +# To be inserted before the command running the test. Creates the +# directory for the log if needed. Stores in $dir the directory +# containing $f, in $tst the test, in $log the log. Executes the +# developer- defined test setup AM_TESTS_ENVIRONMENT (if any), and +# passes TESTS_ENVIRONMENT. Set up options for the wrapper that +# will run the test scripts (or their associated LOG_COMPILER, if +# thy have one). +am__check_pre = \ +$(am__sh_e_setup); \ +$(am__vpath_adj_setup) $(am__vpath_adj) \ +$(am__tty_colors); \ +srcdir=$(srcdir); export srcdir; \ +case "$@" in \ + */*) am__odir=`echo "./$@" | sed 's|/[^/]*$$||'`;; \ + *) am__odir=.;; \ +esac; \ +test "x$$am__odir" = x"." || test -d "$$am__odir" \ + || $(MKDIR_P) "$$am__odir" || exit $$?; \ +if test -f "./$$f"; then dir=./; \ +elif test -f "$$f"; then dir=; \ +else dir="$(srcdir)/"; fi; \ +tst=$$dir$$f; log='$@'; \ +if test -n '$(DISABLE_HARD_ERRORS)'; then \ + am__enable_hard_errors=no; \ +else \ + am__enable_hard_errors=yes; \ +fi; \ +case " $(XFAIL_TESTS) " in \ + *[\ \ ]$$f[\ \ ]* | *[\ \ ]$$dir$$f[\ \ ]*) \ + am__expect_failure=yes;; \ + *) \ + am__expect_failure=no;; \ +esac; \ +$(AM_TESTS_ENVIRONMENT) $(TESTS_ENVIRONMENT) +# A shell command to get the names of the tests scripts with any registered +# extension removed (i.e., equivalently, the names of the test logs, with +# the '.log' extension removed). The result is saved in the shell variable +# '$bases'. This honors runtime overriding of TESTS and TEST_LOGS. Sadly, +# we cannot use something simpler, involving e.g., "$(TEST_LOGS:.log=)", +# since that might cause problem with VPATH rewrites for suffix-less tests. +# See also 'test-harness-vpath-rewrite.sh' and 'test-trs-basic.sh'. +am__set_TESTS_bases = \ + bases='$(TEST_LOGS)'; \ + bases=`for i in $$bases; do echo $$i; done | sed 's/\.log$$//'`; \ + bases=`echo $$bases` +AM_TESTSUITE_SUMMARY_HEADER = ' for $(PACKAGE_STRING)' +RECHECK_LOGS = $(TEST_LOGS) +TEST_SUITE_LOG = test-suite.log +TEST_EXTENSIONS = @EXEEXT@ .test +LOG_DRIVER = $(SHELL) $(top_srcdir)/build-aux/test-driver +LOG_COMPILE = $(LOG_COMPILER) $(AM_LOG_FLAGS) $(LOG_FLAGS) +am__set_b = \ + case '$@' in \ + */*) \ + case '$*' in \ + */*) b='$*';; \ + *) b=`echo '$@' | sed 's/\.log$$//'`; \ + esac;; \ + *) \ + b='$*';; \ + esac +am__test_logs1 = $(TESTS:=.log) +am__test_logs2 = $(am__test_logs1:@EXEEXT@.log=.log) +TEST_LOGS = $(am__test_logs2:.test.log=.log) +TEST_LOG_DRIVER = $(SHELL) $(top_srcdir)/build-aux/test-driver +TEST_LOG_COMPILE = $(TEST_LOG_COMPILER) $(AM_TEST_LOG_FLAGS) \ + $(TEST_LOG_FLAGS) +DIST_SUBDIRS = $(SUBDIRS) +am__DIST_COMMON = $(srcdir)/Makefile.in \ + $(top_srcdir)/build-aux/depcomp \ + $(top_srcdir)/build-aux/test-driver \ + $(top_srcdir)/make/starpu-tests.mk \ + $(top_srcdir)/make/starpu.mk +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +am__relativize = \ + dir0=`pwd`; \ + sed_first='s,^\([^/]*\)/.*$$,\1,'; \ + sed_rest='s,^[^/]*/*,,'; \ + sed_last='s,^.*/\([^/]*\)$$,\1,'; \ + sed_butlast='s,/*[^/]*$$,,'; \ + while test -n "$$dir1"; do \ + first=`echo "$$dir1" | sed -e "$$sed_first"`; \ + if test "$$first" != "."; then \ + if test "$$first" = ".."; then \ + dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ + dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ + else \ + first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ + if test "$$first2" = "$$first"; then \ + dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ + else \ + dir2="../$$dir2"; \ + fi; \ + dir0="$$dir0"/"$$first"; \ + fi; \ + fi; \ + dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ + done; \ + reldir="$$dir2" +pkglibdir = @pkglibdir@ +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +APP_CFLAGS = @APP_CFLAGS@ +APP_CXXFLAGS = @APP_CXXFLAGS@ +APP_FCFLAGS = @APP_FCFLAGS@ +APP_FFLAGS = @APP_FFLAGS@ +AR = @AR@ +AS = @AS@ +ATLASDIR = @ATLASDIR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BLAS_LIB = @BLAS_LIB@ +BLAS_LIBS = @BLAS_LIBS@ +BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ +BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CC_OR_MPICC = @CC_OR_MPICC@ +CC_OR_NVCC = @CC_OR_NVCC@ +CFLAGS = @CFLAGS@ +COVERAGE = @COVERAGE@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CSCOPE = @CSCOPE@ +CTAGS = @CTAGS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DGELS_LIBS = @DGELS_LIBS@ +DLB_CFLAGS = @DLB_CFLAGS@ +DLB_LIBS = @DLB_LIBS@ +DLLTOOL = @DLLTOOL@ +DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +ECLIPSE = @ECLIPSE@ +EGREP = @EGREP@ +ETAGS = @ETAGS@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FC = @FC@ +FCFLAGS = @FCFLAGS@ +FFLAGS = @FFLAGS@ +FFTWF_CFLAGS = @FFTWF_CFLAGS@ +FFTWF_LIBS = @FFTWF_LIBS@ +FFTWL_CFLAGS = @FFTWL_CFLAGS@ +FFTWL_LIBS = @FFTWL_LIBS@ +FFTW_CFLAGS = @FFTW_CFLAGS@ +FFTW_LIBS = @FFTW_LIBS@ +FGREP = @FGREP@ +FILECMD = @FILECMD@ +FXTDIR = @FXTDIR@ +FXT_CFLAGS = @FXT_CFLAGS@ +FXT_LDFLAGS = @FXT_LDFLAGS@ +FXT_LIBS = @FXT_LIBS@ +GDB = @GDB@ +GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ +GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ +GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ +GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ +GOTODIR = @GOTODIR@ +GREP = @GREP@ +HAVE_CXX11 = @HAVE_CXX11@ +HAVE_FFTWFL = @HAVE_FFTWFL@ +HELP2MAN = @HELP2MAN@ +HIPCC = @HIPCC@ +HIPCCFLAGS = @HIPCCFLAGS@ $(am__append_2) +HIPCONFIG = @HIPCONFIG@ +HWLOC_CFLAGS = @HWLOC_CFLAGS@ +HWLOC_LIBS = @HWLOC_LIBS@ +HWLOC_REQUIRES = @HWLOC_REQUIRES@ +ICC = @ICC@ +ICC_ARGS = @ICC_ARGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JULIA = @JULIA@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ \ + $(top_builddir)/starpurm/src/libstarpurm-@STARPU_EFFECTIVE_VERSION@.la \ + $(top_builddir)/src/@LIBSTARPU_LINK@ $(STARPU_EXPORTED_LIBS) \ + $(HWLOC_LIBS) $(DLB_LIBS) +LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ +LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ +LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ +LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ +LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ +LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ +LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ +LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ +LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ +LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ +LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ +LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ +LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ +LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ +LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ +LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ +LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ +LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ +LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ +LIBSTARPU_LINK = @LIBSTARPU_LINK@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAGMA_CFLAGS = @MAGMA_CFLAGS@ +MAGMA_LIBS = @MAGMA_LIBS@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPICC_LDFLAGS = @MPICC_LDFLAGS@ +MPICXX = @MPICXX@ +MPIEXEC = @MPIEXEC@ +MPIEXEC_ARGS = @MPIEXEC_ARGS@ +MPIFORT = @MPIFORT@ +MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ +MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ +NM = @NM@ +NMAD_CFLAGS = @NMAD_CFLAGS@ +NMAD_LIBS = @NMAD_LIBS@ +NMEDIT = @NMEDIT@ +NVCC = @NVCC@ +NVCCFLAGS = @NVCCFLAGS@ $(am__append_1) +NVCC_CC = @NVCC_CC@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ +OPENBLAS_LIBS = @OPENBLAS_LIBS@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PAPI_CFLAGS = @PAPI_CFLAGS@ +PAPI_LIBS = @PAPI_LIBS@ +PARALLEL = @PARALLEL@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PKG_CONFIG = @PKG_CONFIG@ +POTI_CFLAGS = @POTI_CFLAGS@ +POTI_LIBS = @POTI_LIBS@ +PROG_CLANG = @PROG_CLANG@ +PROG_DATE = @PROG_DATE@ +PROG_FIND = @PROG_FIND@ +PROG_STAT = @PROG_STAT@ +PYTHON = @PYTHON@ +PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ +PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ +PYTHON_VERSION = @PYTHON_VERSION@ +RANLIB = @RANLIB@ +REALBASH = @REALBASH@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ +SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ +SIMGRID_LIBS = @SIMGRID_LIBS@ +SIMGRID_MC = @SIMGRID_MC@ +SLIC_CONFIG = @SLIC_CONFIG@ +SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ +SOCL_VENDORS = @SOCL_VENDORS@ +STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ +STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ +STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ +STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ +STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ +STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ +STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ +STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ +STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ +STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ +STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ +STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ +STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ +STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ +STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ +STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ +STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ +STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ +STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ +STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ +STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ +STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ +STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ +STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ +STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ +STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ +STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ +STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ +STARPU_LIB_PATH = @STARPU_LIB_PATH@ +STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ +STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ +STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ +STARPU_MS_LIB = @STARPU_MS_LIB@ +STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ +STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ +STARPU_OPENBLAS = @STARPU_OPENBLAS@ +STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ +STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ +STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ +STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ +STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ +STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ +STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ +STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ +STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ +STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ +STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ +STARPU_SRC_DIR = @STARPU_SRC_DIR@ +STARPU_USE_CPU = @STARPU_USE_CPU@ +STARPU_USE_CUDA = @STARPU_USE_CUDA@ +STARPU_USE_FXT = @STARPU_USE_FXT@ +STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ +STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ +STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ +STRIP = @STRIP@ +VERSION = @VERSION@ +XMKMF = @XMKMF@ +X_CFLAGS = @X_CFLAGS@ +X_EXTRA_LIBS = @X_EXTRA_LIBS@ +X_LIBS = @X_LIBS@ +X_PRE_LIBS = @X_PRE_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_ct_F77 = @ac_ct_F77@ +ac_ct_FC = @ac_ct_FC@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +doxygencommand = @doxygencommand@ +dvidir = @dvidir@ +eclipsepath = @eclipsepath@ +epstopdfcommand = @epstopdfcommand@ +exec_prefix = @exec_prefix@ +gitcommand = @gitcommand@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +hwloccalccommand = @hwloccalccommand@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +juliapath = @juliapath@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +mpicc_path = @mpicc_path@ +mpicxx_path = @mpicxx_path@ +mpiexec_path = @mpiexec_path@ +mpifort_path = @mpifort_path@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +pdflatexcommand = @pdflatexcommand@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +runstatedir = @runstatedir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target = @target@ +target_alias = @target_alias@ +target_cpu = @target_cpu@ +target_os = @target_os@ +target_vendor = @target_vendor@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +LAUNCHER_ENV = +LAUNCHER = +AM_CFLAGS = $(GLOBAL_AM_CFLAGS) $(DLB_CFLAGS) +AM_CXXFLAGS = $(GLOBAL_AM_CXXFLAGS) +AM_FFLAGS = $(GLOBAL_AM_FFLAGS) +AM_FCFLAGS = $(GLOBAL_AM_FCFLAGS) +@STARPU_USE_CUDA_TRUE@V_nvcc_ = $(V_nvcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_USE_CUDA_TRUE@V_nvcc_0 = @echo " NVCC " $@; +@STARPU_USE_CUDA_TRUE@V_nvcc_1 = +@STARPU_USE_CUDA_TRUE@V_nvcc = $(V_nvcc_$(V)) + +# Avoid using nvcc when making a coverity build, nvcc produces millions of +# lines of code which we don't want to analyze. Instead, build dumb .o files +# containing empty functions. +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_ = $(V_mynvcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_0 = @echo " myNVCC " $@; +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_1 = +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc = $(V_mynvcc_$(V)) +@STARPU_USE_HIP_TRUE@V_hipcc_ = $(V_hipcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_USE_HIP_TRUE@V_hipcc_0 = @echo " HIPCC " $@; +@STARPU_USE_HIP_TRUE@V_hipcc_1 = +@STARPU_USE_HIP_TRUE@V_hipcc = $(V_hipcc_$(V)) +V_icc_ = $(V_icc_$(AM_DEFAULT_VERBOSITY)) +V_icc_0 = @echo " ICC " $@; +V_icc_1 = +V_icc = $(V_icc_$(V)) +V_ln_ = $(V_ln_$(AM_DEFAULT_VERBOSITY)) +V_ln_0 = @echo " LN " $@; +V_ln_1 = +V_ln = $(V_ln_$(V)) +V_help2man_ = $(V_help2man_$(AM_DEFAULT_VERBOSITY)) +V_help2man_0 = @echo " HELP2MAN" $@; +V_help2man_1 = +V_help2man = $(V_help2man_$(V)) +# These are always defined, both for starpu-mpi and for mpi-ms +# For MPI tests we don't want to oversubscribe the system +MPI_RUN_ENV = STARPU_WORKERS_GETBIND=0 STARPU_WORKERS_NOBIND=1 STARPU_NCPU=3 +@STARPU_SIMGRID_FALSE@STARPU_MPIEXEC = $(MPIEXEC) $(MPIEXEC_ARGS) -np $(STARPU_MPI_NP) +@STARPU_SIMGRID_TRUE@STARPU_MPIEXEC = $(abs_top_builddir)/tools/starpu_smpirun -np $(STARPU_MPI_NP) -platform $(abs_top_srcdir)/tools/perfmodels/cluster.xml -hostfile $(abs_top_srcdir)/tools/perfmodels/hostfile + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2017-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +SUBDIRS = +CLEANFILES = *.gcno *.gcda *.linkinfo +AM_CPPFLAGS = -I$(top_srcdir)/include -I$(top_srcdir)/src \ + -I$(top_builddir)/src -I$(top_builddir)/include \ + -I$(top_srcdir)/starpurm/include -I$(top_srcdir)/starpurm/src \ + -I$(top_builddir)/starpurm/src \ + -I$(top_builddir)/starpurm/include $(STARPU_H_CPPFLAGS) +AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@ +myPROGRAMS = 01_init_exit 02_list_units 03_cpusets 04_drs_enable +all: all-recursive + +.SUFFIXES: +.SUFFIXES: .c .cu .cubin .hip .lo .log .o .obj .test .test$(EXEEXT) .trs +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/make/starpu-tests.mk $(top_srcdir)/make/starpu.mk $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign starpurm/tests/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign starpurm/tests/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; +$(top_srcdir)/make/starpu-tests.mk $(top_srcdir)/make/starpu.mk $(am__empty): + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +clean-checkPROGRAMS: + @list='$(check_PROGRAMS)'; test -n "$$list" || exit 0; \ + echo " rm -f" $$list; \ + rm -f $$list || exit $$?; \ + test -n "$(EXEEXT)" || exit 0; \ + list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ + echo " rm -f" $$list; \ + rm -f $$list + +clean-noinstPROGRAMS: + @list='$(noinst_PROGRAMS)'; test -n "$$list" || exit 0; \ + echo " rm -f" $$list; \ + rm -f $$list || exit $$?; \ + test -n "$(EXEEXT)" || exit 0; \ + list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ + echo " rm -f" $$list; \ + rm -f $$list + +01_init_exit$(EXEEXT): $(01_init_exit_OBJECTS) $(01_init_exit_DEPENDENCIES) $(EXTRA_01_init_exit_DEPENDENCIES) + @rm -f 01_init_exit$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(01_init_exit_OBJECTS) $(01_init_exit_LDADD) $(LIBS) + +02_list_units$(EXEEXT): $(02_list_units_OBJECTS) $(02_list_units_DEPENDENCIES) $(EXTRA_02_list_units_DEPENDENCIES) + @rm -f 02_list_units$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(02_list_units_OBJECTS) $(02_list_units_LDADD) $(LIBS) + +03_cpusets$(EXEEXT): $(03_cpusets_OBJECTS) $(03_cpusets_DEPENDENCIES) $(EXTRA_03_cpusets_DEPENDENCIES) + @rm -f 03_cpusets$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(03_cpusets_OBJECTS) $(03_cpusets_LDADD) $(LIBS) + +04_drs_enable$(EXEEXT): $(04_drs_enable_OBJECTS) $(04_drs_enable_DEPENDENCIES) $(EXTRA_04_drs_enable_DEPENDENCIES) + @rm -f 04_drs_enable$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(04_drs_enable_OBJECTS) $(04_drs_enable_LDADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/01_init_exit.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/02_list_units.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/03_cpusets.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/04_drs_enable.Po@am__quote@ # am--include-marker + +$(am__depfiles_remade): + @$(MKDIR_P) $(@D) + @echo '# dummy' >$@-t && $(am__mv) $@-t $@ + +am--depfiles: $(am__depfiles_remade) + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ +@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +# This directory's subdirectories are mostly independent; you can cd +# into them and run 'make' without going through this Makefile. +# To change the values of 'make' variables: instead of editing Makefiles, +# (1) if the variable is set in 'config.status', edit 'config.status' +# (which will cause the Makefiles to be regenerated when you run 'make'); +# (2) otherwise, pass the desired values on the 'make' command line. +$(am__recursive_targets): + @fail=; \ + if $(am__make_keepgoing); then \ + failcom='fail=yes'; \ + else \ + failcom='exit 1'; \ + fi; \ + dot_seen=no; \ + target=`echo $@ | sed s/-recursive//`; \ + case "$@" in \ + distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ + *) list='$(SUBDIRS)' ;; \ + esac; \ + for subdir in $$list; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + dot_seen=yes; \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || eval $$failcom; \ + done; \ + if test "$$dot_seen" = "no"; then \ + $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ + fi; test -z "$$fail" + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-recursive +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ + include_option=--etags-include; \ + empty_fix=.; \ + else \ + include_option=--include; \ + empty_fix=; \ + fi; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + test ! -f $$subdir/TAGS || \ + set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ + fi; \ + done; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-recursive + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-recursive + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +# Recover from deleted '.trs' file; this should ensure that +# "rm -f foo.log; make foo.trs" re-run 'foo.test', and re-create +# both 'foo.log' and 'foo.trs'. Break the recipe in two subshells +# to avoid problems with "make -n". +.log.trs: + rm -f $< $@ + $(MAKE) $(AM_MAKEFLAGS) $< + +# Leading 'am--fnord' is there to ensure the list of targets does not +# expand to empty, as could happen e.g. with make check TESTS=''. +am--fnord $(TEST_LOGS) $(TEST_LOGS:.log=.trs): $(am__force_recheck) +am--force-recheck: + @: + +$(TEST_SUITE_LOG): $(TEST_LOGS) + @$(am__set_TESTS_bases); \ + am__f_ok () { test -f "$$1" && test -r "$$1"; }; \ + redo_bases=`for i in $$bases; do \ + am__f_ok $$i.trs && am__f_ok $$i.log || echo $$i; \ + done`; \ + if test -n "$$redo_bases"; then \ + redo_logs=`for i in $$redo_bases; do echo $$i.log; done`; \ + redo_results=`for i in $$redo_bases; do echo $$i.trs; done`; \ + if $(am__make_dryrun); then :; else \ + rm -f $$redo_logs && rm -f $$redo_results || exit 1; \ + fi; \ + fi; \ + if test -n "$$am__remaking_logs"; then \ + echo "fatal: making $(TEST_SUITE_LOG): possible infinite" \ + "recursion detected" >&2; \ + elif test -n "$$redo_logs"; then \ + am__remaking_logs=yes $(MAKE) $(AM_MAKEFLAGS) $$redo_logs; \ + fi; \ + if $(am__make_dryrun); then :; else \ + st=0; \ + errmsg="fatal: making $(TEST_SUITE_LOG): failed to create"; \ + for i in $$redo_bases; do \ + test -f $$i.trs && test -r $$i.trs \ + || { echo "$$errmsg $$i.trs" >&2; st=1; }; \ + test -f $$i.log && test -r $$i.log \ + || { echo "$$errmsg $$i.log" >&2; st=1; }; \ + done; \ + test $$st -eq 0 || exit 1; \ + fi + @$(am__sh_e_setup); $(am__tty_colors); $(am__set_TESTS_bases); \ + ws='[ ]'; \ + results=`for b in $$bases; do echo $$b.trs; done`; \ + test -n "$$results" || results=/dev/null; \ + all=` grep "^$$ws*:test-result:" $$results | wc -l`; \ + pass=` grep "^$$ws*:test-result:$$ws*PASS" $$results | wc -l`; \ + fail=` grep "^$$ws*:test-result:$$ws*FAIL" $$results | wc -l`; \ + skip=` grep "^$$ws*:test-result:$$ws*SKIP" $$results | wc -l`; \ + xfail=`grep "^$$ws*:test-result:$$ws*XFAIL" $$results | wc -l`; \ + xpass=`grep "^$$ws*:test-result:$$ws*XPASS" $$results | wc -l`; \ + error=`grep "^$$ws*:test-result:$$ws*ERROR" $$results | wc -l`; \ + if test `expr $$fail + $$xpass + $$error` -eq 0; then \ + success=true; \ + else \ + success=false; \ + fi; \ + br='==================='; br=$$br$$br$$br$$br; \ + result_count () \ + { \ + if test x"$$1" = x"--maybe-color"; then \ + maybe_colorize=yes; \ + elif test x"$$1" = x"--no-color"; then \ + maybe_colorize=no; \ + else \ + echo "$@: invalid 'result_count' usage" >&2; exit 4; \ + fi; \ + shift; \ + desc=$$1 count=$$2; \ + if test $$maybe_colorize = yes && test $$count -gt 0; then \ + color_start=$$3 color_end=$$std; \ + else \ + color_start= color_end=; \ + fi; \ + echo "$${color_start}# $$desc $$count$${color_end}"; \ + }; \ + create_testsuite_report () \ + { \ + result_count $$1 "TOTAL:" $$all "$$brg"; \ + result_count $$1 "PASS: " $$pass "$$grn"; \ + result_count $$1 "SKIP: " $$skip "$$blu"; \ + result_count $$1 "XFAIL:" $$xfail "$$lgn"; \ + result_count $$1 "FAIL: " $$fail "$$red"; \ + result_count $$1 "XPASS:" $$xpass "$$red"; \ + result_count $$1 "ERROR:" $$error "$$mgn"; \ + }; \ + { \ + echo "$(PACKAGE_STRING): $(subdir)/$(TEST_SUITE_LOG)" | \ + $(am__rst_title); \ + create_testsuite_report --no-color; \ + echo; \ + echo ".. contents:: :depth: 2"; \ + echo; \ + for b in $$bases; do echo $$b; done \ + | $(am__create_global_log); \ + } >$(TEST_SUITE_LOG).tmp || exit 1; \ + mv $(TEST_SUITE_LOG).tmp $(TEST_SUITE_LOG); \ + if $$success; then \ + col="$$grn"; \ + else \ + col="$$red"; \ + test x"$$VERBOSE" = x || cat $(TEST_SUITE_LOG); \ + fi; \ + echo "$${col}$$br$${std}"; \ + echo "$${col}Testsuite summary"$(AM_TESTSUITE_SUMMARY_HEADER)"$${std}"; \ + echo "$${col}$$br$${std}"; \ + create_testsuite_report --maybe-color; \ + echo "$$col$$br$$std"; \ + if $$success; then :; else \ + echo "$${col}See $(subdir)/$(TEST_SUITE_LOG)$${std}"; \ + if test -n "$(PACKAGE_BUGREPORT)"; then \ + echo "$${col}Please report to $(PACKAGE_BUGREPORT)$${std}"; \ + fi; \ + echo "$$col$$br$$std"; \ + fi; \ + $$success || exit 1 + +check-TESTS: $(check_PROGRAMS) + @list='$(RECHECK_LOGS)'; test -z "$$list" || rm -f $$list + @list='$(RECHECK_LOGS:.log=.trs)'; test -z "$$list" || rm -f $$list + @test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) + @set +e; $(am__set_TESTS_bases); \ + log_list=`for i in $$bases; do echo $$i.log; done`; \ + trs_list=`for i in $$bases; do echo $$i.trs; done`; \ + log_list=`echo $$log_list`; trs_list=`echo $$trs_list`; \ + $(MAKE) $(AM_MAKEFLAGS) $(TEST_SUITE_LOG) TEST_LOGS="$$log_list"; \ + exit $$?; +recheck: all $(check_PROGRAMS) + @test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) + @set +e; $(am__set_TESTS_bases); \ + bases=`for i in $$bases; do echo $$i; done \ + | $(am__list_recheck_tests)` || exit 1; \ + log_list=`for i in $$bases; do echo $$i.log; done`; \ + log_list=`echo $$log_list`; \ + $(MAKE) $(AM_MAKEFLAGS) $(TEST_SUITE_LOG) \ + am__force_recheck=am--force-recheck \ + TEST_LOGS="$$log_list"; \ + exit $$? +01_init_exit.log: 01_init_exit$(EXEEXT) + @p='01_init_exit$(EXEEXT)'; \ + b='01_init_exit'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +02_list_units.log: 02_list_units$(EXEEXT) + @p='02_list_units$(EXEEXT)'; \ + b='02_list_units'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +03_cpusets.log: 03_cpusets$(EXEEXT) + @p='03_cpusets$(EXEEXT)'; \ + b='03_cpusets'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +04_drs_enable.log: 04_drs_enable$(EXEEXT) + @p='04_drs_enable$(EXEEXT)'; \ + b='04_drs_enable'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +.test.log: + @p='$<'; \ + $(am__set_b); \ + $(am__check_pre) $(TEST_LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_TEST_LOG_DRIVER_FLAGS) $(TEST_LOG_DRIVER_FLAGS) -- $(TEST_LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +@am__EXEEXT_TRUE@.test$(EXEEXT).log: +@am__EXEEXT_TRUE@ @p='$<'; \ +@am__EXEEXT_TRUE@ $(am__set_b); \ +@am__EXEEXT_TRUE@ $(am__check_pre) $(TEST_LOG_DRIVER) --test-name "$$f" \ +@am__EXEEXT_TRUE@ --log-file $$b.log --trs-file $$b.trs \ +@am__EXEEXT_TRUE@ $(am__common_driver_flags) $(AM_TEST_LOG_DRIVER_FLAGS) $(TEST_LOG_DRIVER_FLAGS) -- $(TEST_LOG_COMPILE) \ +@am__EXEEXT_TRUE@ "$$tst" $(AM_TESTS_FD_REDIRECT) +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done + @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + $(am__make_dryrun) \ + || test -d "$(distdir)/$$subdir" \ + || $(MKDIR_P) "$(distdir)/$$subdir" \ + || exit 1; \ + dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ + $(am__relativize); \ + new_distdir=$$reldir; \ + dir1=$$subdir; dir2="$(top_distdir)"; \ + $(am__relativize); \ + new_top_distdir=$$reldir; \ + echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ + echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ + ($(am__cd) $$subdir && \ + $(MAKE) $(AM_MAKEFLAGS) \ + top_distdir="$$new_top_distdir" \ + distdir="$$new_distdir" \ + am__remove_distdir=: \ + am__skip_length_check=: \ + am__skip_mode_fix=: \ + distdir) \ + || exit 1; \ + fi; \ + done +check-am: all-am + $(MAKE) $(AM_MAKEFLAGS) $(check_PROGRAMS) + $(MAKE) $(AM_MAKEFLAGS) check-TESTS +check: check-recursive +all-am: Makefile $(PROGRAMS) +installdirs: installdirs-recursive +installdirs-am: +install: install-recursive +install-exec: install-exec-recursive +install-data: install-data-recursive +uninstall: uninstall-recursive + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-recursive +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + -test -z "$(TEST_LOGS)" || rm -f $(TEST_LOGS) + -test -z "$(TEST_LOGS:.log=.trs)" || rm -f $(TEST_LOGS:.log=.trs) + -test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) + +clean-generic: + -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-recursive + +clean-am: clean-checkPROGRAMS clean-generic clean-libtool \ + clean-noinstPROGRAMS mostlyclean-am + +distclean: distclean-recursive + -rm -f ./$(DEPDIR)/01_init_exit.Po + -rm -f ./$(DEPDIR)/02_list_units.Po + -rm -f ./$(DEPDIR)/03_cpusets.Po + -rm -f ./$(DEPDIR)/04_drs_enable.Po + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-recursive + +dvi-am: + +html: html-recursive + +html-am: + +info: info-recursive + +info-am: + +install-data-am: + +install-dvi: install-dvi-recursive + +install-dvi-am: + +install-exec-am: + +install-html: install-html-recursive + +install-html-am: + +install-info: install-info-recursive + +install-info-am: + +install-man: + +install-pdf: install-pdf-recursive + +install-pdf-am: + +install-ps: install-ps-recursive + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-recursive + -rm -f ./$(DEPDIR)/01_init_exit.Po + -rm -f ./$(DEPDIR)/02_list_units.Po + -rm -f ./$(DEPDIR)/03_cpusets.Po + -rm -f ./$(DEPDIR)/04_drs_enable.Po + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-recursive + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-recursive + +pdf-am: + +ps: ps-recursive + +ps-am: + +uninstall-am: + +.MAKE: $(am__recursive_targets) check-am install-am install-strip + +.PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am \ + am--depfiles check check-TESTS check-am clean \ + clean-checkPROGRAMS clean-generic clean-libtool \ + clean-noinstPROGRAMS cscopelist-am ctags ctags-am distclean \ + distclean-compile distclean-generic distclean-libtool \ + distclean-tags distdir dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-html \ + install-html-am install-info install-info-am install-man \ + install-pdf install-pdf-am install-ps install-ps-am \ + install-strip installcheck installcheck-am installdirs \ + installdirs-am maintainer-clean maintainer-clean-generic \ + mostlyclean mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool pdf pdf-am ps ps-am recheck tags tags-am \ + uninstall uninstall-am + +.PRECIOUS: Makefile + +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@.cu.o: +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ @$(MKDIR_P) `dirname $@` +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ $(V_mynvcc)grep 'extern *"C" *void *' $< | sed -ne 's/extern *"C" *void *\([a-zA-Z0-9_]*\) *(.*/void \1(void) {}/p' | $(CC) -x c - -o $@ -c + +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.cubin: +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) -cubin $< -o $@ $(NVCCFLAGS) + +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.o: +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) $< -c -o $@ $(NVCCFLAGS) +@STARPU_USE_HIP_TRUE@.hip.o: +@STARPU_USE_HIP_TRUE@ $(V_hipcc) $(HIPCC) $< -c -o $@ $(HIPCCFLAGS) + +STARPU_MPI_NP ?= 4 + +showcheckfailed: + @ for x in $(shell grep -l "^FAIL " $(TEST_LOGS) /dev/null 2>/dev/null) ; do cat $$x ; done + @RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i showcheckfailed || RET=1 ; \ + done ; \ + exit $$RET + +showfailed: + @! grep "^FAIL " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "ERROR: AddressSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "WARNING: AddressSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "ERROR: ThreadSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "WARNING: ThreadSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "ERROR: LeakSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "WARNING: LeakSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l " runtime error: " $(TEST_LOGS) /dev/null 2>/dev/null + @RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -s -C $$i showfailed || RET=1 ; \ + done ; \ + exit $$RET + +showcheck: + -cat $(TEST_LOGS) /dev/null + @! grep -q "ERROR: AddressSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q "WARNING: AddressSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q "ERROR: ThreadSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q "WARNING: ThreadSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q "ERROR: LeakSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q "WARNING: LeakSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q " runtime error: " $(TEST_LOGS) /dev/null + RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i showcheck || RET=1 ; \ + done ; \ + exit $$RET + +showsuite: + -cat $(TEST_SUITE_LOG) /dev/null + @! grep -q "ERROR: AddressSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q "WARNING: AddressSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q "ERROR: ThreadSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q "WARNING: ThreadSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q "ERROR: LeakSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q "WARNING: LeakSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q " runtime error: " $(TEST_SUITE_LOG) /dev/null + RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i showsuite || RET=1 ; \ + done ; \ + exit $$RET + +@STARPU_SIMGRID_TRUE@export STARPU_PERF_MODEL_DIR=$(abs_top_srcdir)/tools/perfmodels/sampling +@STARPU_SIMGRID_TRUE@export STARPU_HOSTNAME=mirage +@STARPU_SIMGRID_TRUE@export MALLOC_PERTURB_=0 + +@STARPU_SIMGRID_TRUE@env: +@STARPU_SIMGRID_TRUE@ @echo export STARPU_PERF_MODEL_DIR=$(STARPU_PERF_MODEL_DIR) +@STARPU_SIMGRID_TRUE@ @echo export STARPU_HOSTNAME=$(STARPU_HOSTNAME) +@STARPU_SIMGRID_TRUE@ @echo export MALLOC_PERTURB_=$(MALLOC_PERTURB_) + +@STARPU_SIMGRID_TRUE@export STARPU_SIMGRID=1 + +@STARPU_QUICK_CHECK_TRUE@export STARPU_QUICK_CHECK=1 + +@STARPU_LONG_CHECK_TRUE@export STARPU_LONG_CHECK=1 + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/tests/Makefile.am b/tests/Makefile.am new file mode 100644 index 0000000..0b64052 --- /dev/null +++ b/tests/Makefile.am @@ -0,0 +1,1261 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +include $(top_srcdir)/make/starpu-tests.mk +include $(top_srcdir)/make/starpu-loader.mk + +AM_CFLAGS += $(APP_CFLAGS) +AM_CXXFLAGS += $(APP_CXXFLAGS) +AM_FFLAGS += $(APP_FFLAGS) +AM_FCFLAGS += $(APP_FCFLAGS) +AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ $(STARPU_H_CPPFLAGS) +AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@ +LIBS += $(top_builddir)/src/@LIBSTARPU_LINK@ $(STARPU_EXPORTED_LIBS) +LIBS += $(HWLOC_LIBS) +LIBS += $(STARPU_OPENCL_LDFLAGS) $(STARPU_CUDA_LDFLAGS) $(STARPU_HIP_LDFLAGS) +LIBS += $(STARPU_MAX_FPGA_LDFLAGS) + +EXTRA_DIST = \ + helper.h \ + datawizard/locality.sh \ + overlap/overlap.sh \ + datawizard/scal.h \ + regression/profiles.in \ + regression/regression.sh.in \ + regression/profiles.build.only.in \ + microbenchs/tasks_data_overhead.sh \ + microbenchs/sync_tasks_data_overhead.sh \ + microbenchs/async_tasks_data_overhead.sh \ + microbenchs/tasks_size_overhead.sh \ + microbenchs/tasks_size_overhead_sched.sh \ + microbenchs/tasks_size_overhead_scheds.sh \ + microbenchs/tasks_size_overhead.gp \ + microbenchs/parallel_dependent_homogeneous_tasks_data.sh \ + microbenchs/parallel_independent_heterogeneous_tasks_data.sh \ + microbenchs/parallel_independent_heterogeneous_tasks.sh \ + microbenchs/parallel_independent_homogeneous_tasks_data.sh \ + microbenchs/parallel_independent_homogeneous_tasks.sh \ + microbenchs/bandwidth_scheds.sh \ + microbenchs/starpu_check.sh \ + energy/static.sh \ + energy/dynamic.sh \ + energy/perfs.gp \ + datawizard/scratch_opencl_kernel.cl \ + datawizard/sync_and_notify_data_opencl_codelet.cl\ + coverage/coverage.sh \ + variable/increment.h \ + variable/increment_opencl_kernel.cl \ + variable/redux_opencl_kernel.cl \ + variable/neutral_opencl_kernel.cl \ + datawizard/interfaces/test_interfaces.h \ + datawizard/interfaces/bcsr/bcsr_opencl_kernel.cl \ + datawizard/interfaces/coo/coo_opencl_kernel.cl \ + datawizard/interfaces/matrix/matrix_opencl_kernel.cl \ + datawizard/interfaces/variable/variable_opencl_kernel.cl \ + datawizard/interfaces/vector/vector_opencl_kernel.cl \ + datawizard/interfaces/multiformat/multiformat_types.h \ + datawizard/interfaces/multiformat/multiformat_opencl_kernel.cl \ + datawizard/interfaces/multiformat/multiformat_conversion_codelets_kernel.cl \ + datawizard/interfaces/multiformat/advanced/generic.h \ + datawizard/interfaces/csr/csr_opencl_kernel.cl \ + datawizard/interfaces/block/block_opencl_kernel.cl \ + datawizard/interfaces/tensor/tensor_opencl_kernel.cl \ + datawizard/interfaces/ndim/ndim_opencl_kernel.cl \ + perfmodels/opencl_memset_kernel.cl \ + $(MICROBENCHS:=.sh) \ + microbenchs/microbench.sh \ + memory/memstress2.gp \ + memory/memstress2.sh \ + memory/memstress.gp \ + memory/memstress.sh \ + maxfpga/LMemLoopbackCpuCode.c \ + maxfpga/MyTasksManager.maxj \ + maxfpga/MyTasksMuxManager.maxj \ + maxfpga/README.txt \ + maxfpga/StreamFMACpuCode.cpp \ + maxfpga/Task1.maxj \ + maxfpga/Task2.maxj \ + maxfpga/Task3.maxj \ + datawizard/interfaces/test_interfaces.sh + +CLEANFILES = \ + *.gcno *.gcda *.linkinfo core starpu_idle_microsec.log *.mod *.png *.output tasks.rec perfs.rec */perfs.rec */*/perfs.rec perfs2.rec fortran90/starpu_mod.f90 bandwidth-*.dat bandwidth.gp bandwidth.eps bandwidth.svg *.csv *.md *.Rmd *.pdf *.html + +clean-local: + -rm -rf overlap/overlap.traces datawizard/locality.traces + +BUILT_SOURCES = +SUBDIRS = + +examplebindir = $(libdir)/starpu/examples + +if STARPU_USE_OPENCL +nobase_STARPU_OPENCL_DATA_DATA = +endif + +testbindir = $(libdir)/starpu/tests + +##################################### +# What to install and what to check # +##################################### + +TESTS = $(SHELL_TESTS) $(myPROGRAMS) + +SHELL_TESTS = + +if STARPU_COVERAGE_ENABLED +SHELL_TESTS += coverage/coverage.sh +endif + +XFAIL_TESTS = \ + errorcheck/invalid_blocking_calls + +myPROGRAMS = + +myPROGRAMS += \ + main/callback \ + main/bind \ + main/mkdtemp \ + main/execute_schedule \ + main/insert_task_pack \ + main/insert_task_nullcodelet \ + main/insert_task_where \ + main/multithreaded_init \ + main/empty_task \ + main/empty_task_chain \ + main/starpu_worker_exists \ + main/codelet_null_callback \ + datawizard/allocate \ + datawizard/acquire_cb \ + datawizard/deps \ + datawizard/user_interaction_implicit \ + datawizard/interfaces/copy_interfaces \ + datawizard/numa_overflow \ + datawizard/locality \ + datawizard/variable_size \ + errorcheck/starpu_init_noworker \ + errorcheck/invalid_tasks \ + helper/cublas_init \ + helper/cublasLt_init \ + helper/cusparse_init \ + helper/hipblas_init \ + helper/pinned_memory \ + helper/execute_on_all \ + microbenchs/display_structures_size \ + microbenchs/local_pingpong \ + overlap/overlap \ + sched_ctx/sched_ctx_list \ + sched_ctx/sched_ctx_policy_data \ + openmp/init_exit_01 \ + openmp/init_exit_02 \ + openmp/environment \ + openmp/api_01 \ + openmp/parallel_01 \ + openmp/parallel_02 \ + openmp/parallel_03 \ + openmp/parallel_barrier_01 \ + openmp/parallel_master_01 \ + openmp/parallel_master_inline_01 \ + openmp/parallel_single_wait_01 \ + openmp/parallel_single_nowait_01 \ + openmp/parallel_single_inline_01 \ + openmp/parallel_single_copyprivate_01 \ + openmp/parallel_single_copyprivate_inline_01 \ + openmp/parallel_critical_01 \ + openmp/parallel_critical_inline_01 \ + openmp/parallel_critical_named_01 \ + openmp/parallel_critical_named_inline_01\ + openmp/parallel_simple_lock_01 \ + openmp/parallel_nested_lock_01 \ + openmp/parallel_for_01 \ + openmp/parallel_for_02 \ + openmp/parallel_for_ordered_01 \ + openmp/parallel_sections_01 \ + openmp/parallel_sections_combined_01 \ + openmp/task_01 \ + openmp/task_02 \ + openmp/task_03 \ + openmp/taskloop \ + openmp/taskwait_01 \ + openmp/taskgroup_01 \ + openmp/taskgroup_02 \ + openmp/array_slice_01 \ + openmp/cuda_task_01 \ + perfmodels/value_nan \ + sched_policies/workerids + +if STARPU_SIMGRID +myPROGRAMS += \ + energy/energy_efficiency \ + datawizard/simgrid-locality +endif + +if !STARPU_SIMGRID +myPROGRAMS += \ + main/deprecated_func \ + main/driver_api/init_run_deinit \ + main/driver_api/run_driver \ + main/deploop \ + main/display_binding \ + main/execute_on_a_specific_worker \ + main/insert_task \ + main/insert_task_value \ + main/insert_task_dyn_handles \ + main/insert_task_array \ + main/insert_task_many \ + main/job \ + main/multithreaded \ + main/starpu_task_bundle \ + main/starpu_task_wait_for_all \ + main/starpu_task_wait \ + main/static_restartable \ + main/static_restartable_using_initializer\ + main/static_restartable_tag \ + main/regenerate \ + main/regenerate_pipeline \ + main/restart \ + main/wait_all_regenerable_tasks \ + main/subgraph_repeat \ + main/subgraph_repeat_tag \ + main/subgraph_repeat_regenerate \ + main/subgraph_repeat_regenerate_tag \ + main/subgraph_repeat_regenerate_tag_cycle \ + main/empty_task_sync_point \ + main/empty_task_sync_point_tasks \ + main/tag_wait_api \ + main/tag_get_task \ + main/task_wait_api \ + main/declare_deps_in_callback \ + main/declare_deps_after_submission \ + main/declare_deps_after_submission_synchronous \ + main/get_current_task \ + main/starpu_init \ + main/submit \ + main/const_codelet \ + main/pause_resume \ + main/pack \ + main/get_children_tasks \ + main/hwloc_cpuset \ + main/task_end_dep \ + datawizard/acquire_cb_insert \ + datawizard/acquire_release \ + datawizard/acquire_release2 \ + datawizard/acquire_release_to \ + datawizard/acquire_try \ + datawizard/bcsr \ + datawizard/cache \ + datawizard/commute \ + datawizard/commute2 \ + datawizard/copy \ + datawizard/data_implicit_deps \ + datawizard/data_register \ + datawizard/scratch \ + datawizard/scratch_reuse \ + datawizard/sync_and_notify_data \ + datawizard/sync_and_notify_data_implicit\ + datawizard/dsm_stress \ + datawizard/double_parameter \ + datawizard/write_only_tmp_buffer \ + datawizard/data_invalidation \ + datawizard/data_deinitialize \ + datawizard/dining_philosophers \ + datawizard/manual_reduction \ + datawizard/readers_and_writers \ + datawizard/unpartition \ + datawizard/sync_with_data_with_mem \ + datawizard/sync_with_data_with_mem_non_blocking\ + datawizard/sync_with_data_with_mem_non_blocking_implicit\ + datawizard/mpi_like \ + datawizard/mpi_like_async \ + datawizard/critical_section_with_void_interface\ + datawizard/increment_init \ + datawizard/increment_redux \ + datawizard/increment_redux_partition \ + datawizard/increment_redux_v2 \ + datawizard/increment_redux_with_args \ + datawizard/increment_redux_lazy \ + datawizard/handle_to_pointer \ + datawizard/lazy_allocation \ + datawizard/no_unregister \ + datawizard/noreclaim \ + datawizard/nowhere \ + datawizard/interfaces/block/block_interface \ + datawizard/interfaces/bcsr/bcsr_interface \ + datawizard/interfaces/coo/coo_interface \ + datawizard/interfaces/csr/csr_interface \ + datawizard/interfaces/matrix/matrix_interface \ + datawizard/interfaces/multiformat/multiformat_interface \ + datawizard/interfaces/multiformat/advanced/multiformat_cuda_opencl \ + datawizard/interfaces/multiformat/advanced/multiformat_data_release \ + datawizard/interfaces/multiformat/advanced/multiformat_worker \ + datawizard/interfaces/multiformat/advanced/multiformat_handle_conversion \ + datawizard/interfaces/multiformat/advanced/same_handle \ + datawizard/interfaces/tensor/tensor_interface \ + datawizard/interfaces/ndim/ndim_interface \ + datawizard/interfaces/variable/variable_interface \ + datawizard/interfaces/vector/vector_interface \ + datawizard/interfaces/void/void_interface \ + datawizard/in_place_partition \ + datawizard/partition_dep \ + datawizard/partition_lazy \ + datawizard/partition_init \ + datawizard/partition_wontuse \ + datawizard/gpu_register \ + datawizard/gpu_ptr_register \ + datawizard/variable_parameters \ + datawizard/wt_host \ + datawizard/wt_broadcast \ + datawizard/readonly \ + datawizard/specific_node \ + datawizard/specific_node_same \ + datawizard/task_with_multiple_time_the_same_handle \ + datawizard/test_arbiter \ + datawizard/invalidate_pending_requests \ + datawizard/deinitialize_pending_requests \ + datawizard/temporary_partition \ + datawizard/partitioned_initialization \ + datawizard/partitioned_acquire \ + datawizard/temporary_partition_implicit \ + datawizard/temporary_partition_read \ + datawizard/redux_acquire \ + disk/disk_copy \ + disk/disk_copy_unpack \ + disk/disk_copy_to_disk \ + disk/disk_compute \ + disk/disk_pack \ + disk/mem_reclaim \ + errorcheck/invalid_blocking_calls \ + errorcheck/workers_cpuid \ + fault-tolerance/retry \ + helper/starpu_data_cpy \ + helper/starpu_data_dup_ro \ + helper/starpu_create_sync_task \ + microbenchs/async_tasks_overhead \ + microbenchs/sync_tasks_overhead \ + microbenchs/tasks_overhead \ + microbenchs/tasks_size_overhead \ + microbenchs/prefetch_data_on_node \ + microbenchs/redundant_buffer \ + microbenchs/matrix_as_vector \ + microbenchs/bandwidth \ + overlap/gpu_concurrency \ + parallel_tasks/swap \ + parallel_tasks/combined_worker_assign_workerid \ + parallel_tasks/explicit_combined_worker \ + parallel_tasks/parallel_kernels \ + parallel_tasks/parallel_kernels_trivial \ + parallel_tasks/parallel_kernels_spmd \ + parallel_tasks/spmd_peager \ + parallel_tasks/cuda_only \ + perfmodels/regression_based_memset \ + perfmodels/regression_based_check \ + perfmodels/regression_based_multiimpl \ + perfmodels/regression_based_energy \ + perfmodels/regression_based_gpu \ + perfmodels/non_linear_regression_based \ + perfmodels/feed \ + perfmodels/user_base \ + perfmodels/valid_model \ + perfmodels/path \ + perfmodels/memory \ + sched_policies/data_locality \ + sched_policies/execute_all_tasks \ + sched_policies/prio \ + sched_policies/simple_deps \ + sched_policies/simple_cpu_gpu_sched \ + sched_ctx/sched_ctx_hierarchy + +noinst_PROGRAMS += \ + datawizard/allocate_many_numa_nodes + +if STARPU_USE_MAX_FPGA +myPROGRAMS += \ + maxfpga/max_fpga_basic_static \ + maxfpga/max_fpga_advanced_static \ + maxfpga/max_fpga_dynamic \ + maxfpga/max_fpga_mux +endif +endif + +MICROBENCHS = \ + microbenchs/parallel_independent_homogeneous_tasks \ + microbenchs/parallel_independent_heterogeneous_tasks \ + microbenchs/parallel_independent_homogeneous_tasks_data \ + microbenchs/parallel_independent_heterogeneous_tasks_data \ + microbenchs/parallel_redux_homogeneous_tasks_data \ + microbenchs/parallel_redux_heterogeneous_tasks_data \ + microbenchs/parallel_dependent_homogeneous_tasks_data + +if STARPU_HAVE_FC +if !STARPU_SANITIZE +if !STARPU_SIMGRID +myPROGRAMS += \ + fortran90/init_01 +endif +endif +endif + +if !STARPU_SIMGRID +if STARPU_LONG_CHECK +myPROGRAMS += \ + main/tag_task_data_deps \ + datawizard/reclaim +endif +endif + +examplebin_PROGRAMS = \ + main/deadlock \ + microbenchs/async_tasks_overhead \ + microbenchs/sync_tasks_overhead \ + microbenchs/tasks_overhead \ + microbenchs/tasks_size_overhead \ + microbenchs/local_pingpong +examplebin_SCRIPTS = \ + microbenchs/tasks_data_overhead.sh \ + microbenchs/sync_tasks_data_overhead.sh \ + microbenchs/async_tasks_data_overhead.sh \ + microbenchs/tasks_size_overhead.gp \ + microbenchs/tasks_size_overhead.sh +if !STARPU_SIMGRID +if !STARPU_USE_MPI_MASTER_SLAVE +examplebin_PROGRAMS += \ + microbenchs/bandwidth +SHELL_TESTS += \ + microbenchs/tasks_data_overhead.sh \ + microbenchs/sync_tasks_data_overhead.sh \ + microbenchs/async_tasks_data_overhead.sh \ + microbenchs/tasks_size_overhead_scheds.sh +endif +endif + +if STARPU_HAVE_WINDOWS +check_PROGRAMS = $(myPROGRAMS) +else +check_PROGRAMS = $(LOADER) $(myPROGRAMS) +endif +noinst_PROGRAMS += $(myPROGRAMS) +noinst_PROGRAMS += $(MICROBENCHS) + +if STARPU_SIMGRID +SHELL_TESTS += $(MICROBENCHS:=.sh) +endif + +SHELL_TESTS += \ + datawizard/locality.sh \ + microbenchs/bandwidth_scheds.sh + +if STARPU_USE_FXT +SHELL_TESTS += \ + overlap/overlap.sh +endif + +################################ +# Simgrid Model Checking tests # +################################ + +if STARPU_SIMGRID_MC +SUBDIRS += model-checking +endif + +####################### +# Source files # +####################### + +datawizard_acquire_release_SOURCES = \ + datawizard/acquire_release.c \ + variable/increment.c +if STARPU_USE_CUDA +datawizard_acquire_release_SOURCES += \ + variable/increment_cuda.cu +endif +if STARPU_USE_HIP +datawizard_acquire_release_SOURCES += \ + variable/increment_hip.hip +endif +if STARPU_USE_OPENCL +datawizard_acquire_release_SOURCES += \ + variable/increment_opencl.c +nobase_STARPU_OPENCL_DATA_DATA += \ + variable/increment_opencl_kernel.cl +endif + +datawizard_acquire_release2_SOURCES = \ + datawizard/acquire_release2.c \ + variable/increment.c +if STARPU_USE_CUDA +datawizard_acquire_release2_SOURCES += \ + variable/increment_cuda.cu +endif +if STARPU_USE_HIP +datawizard_acquire_release2_SOURCES += \ + variable/increment_hip.hip +endif +if STARPU_USE_OPENCL +datawizard_acquire_release2_SOURCES += \ + variable/increment_opencl.c +endif + +datawizard_acquire_release_to_SOURCES = \ + datawizard/acquire_release_to.c \ + variable/increment.c +if STARPU_USE_CUDA +datawizard_acquire_release_to_SOURCES += \ + variable/increment_cuda.cu +endif +if STARPU_USE_HIP +datawizard_acquire_release_to_SOURCES += \ + variable/increment_hip.hip +endif +if STARPU_USE_OPENCL +datawizard_acquire_release_to_SOURCES += \ + variable/increment_opencl.c +endif + +datawizard_wt_host_SOURCES = \ + datawizard/wt_host.c \ + variable/increment.c +if STARPU_USE_CUDA +datawizard_wt_host_SOURCES += \ + variable/increment_cuda.cu +endif +if STARPU_USE_HIP +datawizard_wt_host_SOURCES += \ + variable/increment_hip.hip +endif +if STARPU_USE_OPENCL +datawizard_wt_host_SOURCES += \ + variable/increment_opencl.c +endif + +datawizard_wt_broadcast_SOURCES = \ + datawizard/wt_broadcast.c \ + variable/increment.c +if STARPU_USE_CUDA +datawizard_wt_broadcast_SOURCES += \ + variable/increment_cuda.cu +endif +if STARPU_USE_HIP +datawizard_wt_broadcast_SOURCES += \ + variable/increment_hip.hip +endif +if STARPU_USE_OPENCL +datawizard_wt_broadcast_SOURCES += \ + variable/increment_opencl.c +endif + +datawizard_increment_redux_lazy_SOURCES = \ + datawizard/increment_redux_lazy.c \ + variable/increment.c +if STARPU_USE_CUDA +datawizard_increment_redux_lazy_SOURCES += \ + variable/increment_cuda.cu +endif +if STARPU_USE_HIP +datawizard_increment_redux_lazy_SOURCES += \ + variable/increment_hip.hip +endif +if STARPU_USE_OPENCL +datawizard_increment_redux_lazy_SOURCES += \ + variable/increment_opencl.c +endif + +datawizard_increment_redux_SOURCES = \ + datawizard/increment_redux.c \ + variable/increment.c +if STARPU_USE_CUDA +datawizard_increment_redux_SOURCES += \ + variable/increment_cuda.cu +endif +if STARPU_USE_HIP +datawizard_increment_redux_SOURCES += \ + variable/increment_hip.hip +endif +if STARPU_USE_OPENCL +datawizard_increment_redux_SOURCES += \ + variable/increment_opencl.c +endif + +datawizard_increment_redux_partition_SOURCES = \ + datawizard/increment_redux_partition.c \ + variable/increment.c +if STARPU_USE_CUDA +datawizard_increment_redux_partition_SOURCES += \ + variable/increment_cuda.cu +endif +if STARPU_USE_HIP +datawizard_increment_redux_partition_SOURCES += \ + variable/increment_hip.hip +endif +if STARPU_USE_OPENCL +datawizard_increment_redux_partition_SOURCES += \ + variable/increment_opencl.c +endif + +datawizard_increment_redux_v2_SOURCES = \ + datawizard/increment_redux_v2.c \ + variable/increment.c +if STARPU_USE_CUDA +datawizard_increment_redux_v2_SOURCES += \ + variable/increment_cuda.cu +endif +if STARPU_USE_HIP +datawizard_increment_redux_v2_SOURCES += \ + variable/increment_hip.hip +endif +if STARPU_USE_OPENCL +datawizard_increment_redux_v2_SOURCES += \ + variable/increment_opencl.c +endif + +datawizard_increment_redux_with_args_SOURCES = \ + datawizard/increment_redux_with_args.c \ + variable/increment.c +if STARPU_USE_CUDA +datawizard_increment_redux_with_args_SOURCES += \ + variable/increment_cuda.cu +endif +if STARPU_USE_HIP +datawizard_increment_redux_with_args_SOURCES += \ + variable/increment_hip.hip +endif +if STARPU_USE_OPENCL +datawizard_increment_redux_with_args_SOURCES += \ + variable/increment_opencl.c +endif + +datawizard_increment_init_SOURCES = \ + datawizard/increment_init.c \ + variable/increment.c +if STARPU_USE_CUDA +datawizard_increment_init_SOURCES += \ + variable/increment_cuda.cu +endif +if STARPU_USE_HIP +datawizard_increment_init_SOURCES += \ + variable/increment_hip.hip +endif +if STARPU_USE_OPENCL +datawizard_increment_init_SOURCES += \ + variable/increment_opencl.c +endif + +datawizard_scratch_SOURCES = \ + datawizard/scratch.c +if STARPU_USE_CUDA +datawizard_scratch_SOURCES += \ + datawizard/scratch_cuda.cu +endif +if STARPU_USE_OPENCL +datawizard_scratch_SOURCES += \ + datawizard/scratch_opencl.c +nobase_STARPU_OPENCL_DATA_DATA += \ + datawizard/scratch_opencl_kernel.cl +endif + +datawizard_mpi_like_SOURCES = \ + datawizard/mpi_like.c \ + variable/increment.c +if STARPU_USE_CUDA +datawizard_mpi_like_SOURCES += \ + variable/increment_cuda.cu +endif +if STARPU_USE_HIP +datawizard_mpi_like_SOURCES += \ + variable/increment_hip.hip +endif +if STARPU_USE_OPENCL +datawizard_mpi_like_SOURCES += \ + variable/increment_opencl.c +endif + +datawizard_mpi_like_async_SOURCES = \ + datawizard/mpi_like_async.c \ + variable/increment.c +if STARPU_USE_CUDA +datawizard_mpi_like_async_SOURCES += \ + variable/increment_cuda.cu +endif +if STARPU_USE_HIP +datawizard_mpi_like_async_SOURCES += \ + variable/increment_hip.hip +endif +if STARPU_USE_OPENCL +datawizard_mpi_like_async_SOURCES += \ + variable/increment_opencl.c +endif + +datawizard_sync_and_notify_data_SOURCES = \ + datawizard/sync_and_notify_data.c +if STARPU_USE_CUDA +datawizard_sync_and_notify_data_SOURCES += \ + datawizard/sync_and_notify_data_kernels.cu +endif +if STARPU_USE_OPENCL +datawizard_sync_and_notify_data_SOURCES += \ + datawizard/sync_and_notify_data_opencl.c +nobase_STARPU_OPENCL_DATA_DATA += \ + datawizard/sync_and_notify_data_opencl_codelet.cl +endif + +datawizard_sync_and_notify_data_implicit_SOURCES = \ + datawizard/sync_and_notify_data_implicit.c +if STARPU_USE_CUDA +datawizard_sync_and_notify_data_implicit_SOURCES += \ + datawizard/sync_and_notify_data_kernels.cu +endif +if STARPU_USE_OPENCL +datawizard_sync_and_notify_data_implicit_SOURCES += \ + datawizard/sync_and_notify_data_opencl.c +endif + +datawizard_in_place_partition_SOURCES = \ + datawizard/in_place_partition.c \ + datawizard/scal.c +if STARPU_USE_CUDA +datawizard_in_place_partition_SOURCES += \ + datawizard/scal_cuda.cu +endif +if STARPU_USE_OPENCL +datawizard_in_place_partition_SOURCES += \ + datawizard/scal_opencl.cl +endif + +datawizard_partition_dep_SOURCES = \ + datawizard/partition_dep.c \ + datawizard/scal.c +if STARPU_USE_CUDA +datawizard_partition_dep_SOURCES += \ + datawizard/scal_cuda.cu +endif +if STARPU_USE_OPENCL +datawizard_partition_dep_SOURCES += \ + datawizard/scal_opencl.cl +endif + +datawizard_partition_lazy_SOURCES = \ + datawizard/partition_lazy.c \ + datawizard/scal.c +if STARPU_USE_CUDA +datawizard_partition_lazy_SOURCES += \ + datawizard/scal_cuda.cu +endif +if STARPU_USE_OPENCL +datawizard_partition_lazy_SOURCES += \ + datawizard/scal_opencl.cl +endif + +datawizard_gpu_register_SOURCES = \ + datawizard/gpu_register.c \ + datawizard/scal.c +if STARPU_USE_CUDA +datawizard_gpu_register_SOURCES += \ + datawizard/scal_cuda.cu +endif +if STARPU_USE_OPENCL +datawizard_gpu_register_SOURCES += \ + datawizard/scal_opencl.cl +endif + +datawizard_gpu_ptr_register_SOURCES = \ + datawizard/gpu_ptr_register.c \ + datawizard/scal.c +if STARPU_USE_CUDA +datawizard_gpu_ptr_register_SOURCES += \ + datawizard/scal_cuda.cu +endif +if STARPU_USE_OPENCL +datawizard_gpu_ptr_register_SOURCES += \ + datawizard/scal_opencl.cl +endif + +datawizard_specific_node_SOURCES = \ + datawizard/specific_node.c \ + variable/increment.c +if STARPU_USE_CUDA +datawizard_specific_node_SOURCES += \ + variable/increment_cuda.cu +endif +if STARPU_USE_HIP +datawizard_specific_node_SOURCES += \ + variable/increment_hip.hip +endif +if STARPU_USE_OPENCL +datawizard_specific_node_SOURCES += \ + variable/increment_opencl.c +endif + +datawizard_test_arbiter_SOURCES = \ + datawizard/test_arbiter.cpp + +main_starpu_worker_exists_CFLAGS = $(AM_CFLAGS) $(FXT_CFLAGS) + +main_deprecated_func_CFLAGS = $(AM_CFLAGS) -Wno-deprecated-declarations + +main_insert_task_where_SOURCES = \ + main/insert_task_where.c \ + variable/increment.c +if STARPU_USE_CUDA +main_insert_task_where_SOURCES += \ + variable/increment_cuda.cu +endif +if STARPU_USE_HIP +main_insert_task_where_SOURCES += \ + variable/increment_hip.hip +endif +if STARPU_USE_OPENCL +main_insert_task_where_SOURCES += \ + variable/increment_opencl.c +endif + +main_subgraph_repeat_SOURCES = \ + main/subgraph_repeat.c \ + variable/increment.c +if STARPU_USE_CUDA +main_subgraph_repeat_SOURCES += \ + variable/increment_cuda.cu +endif +if STARPU_USE_HIP +main_subgraph_repeat_SOURCES += \ + variable/increment_hip.hip +endif +if STARPU_USE_OPENCL +main_subgraph_repeat_SOURCES += \ + variable/increment_opencl.c +endif + +main_subgraph_repeat_tag_SOURCES = \ + main/subgraph_repeat_tag.c \ + variable/increment.c +if STARPU_USE_CUDA +main_subgraph_repeat_tag_SOURCES += \ + variable/increment_cuda.cu +endif +if STARPU_USE_HIP +main_subgraph_repeat_tag_SOURCES += \ + variable/increment_hip.hip +endif +if STARPU_USE_OPENCL +main_subgraph_repeat_tag_SOURCES += \ + variable/increment_opencl.c +endif + +main_subgraph_repeat_regenerate_SOURCES = \ + main/subgraph_repeat_regenerate.c \ + variable/increment.c +if STARPU_USE_CUDA +main_subgraph_repeat_regenerate_SOURCES += \ + variable/increment_cuda.cu +endif +if STARPU_USE_HIP +main_subgraph_repeat_regenerate_SOURCES += \ + variable/increment_hip.hip +endif +if STARPU_USE_OPENCL +main_subgraph_repeat_regenerate_SOURCES += \ + variable/increment_opencl.c +endif + +main_subgraph_repeat_regenerate_tag_SOURCES = \ + main/subgraph_repeat_regenerate_tag.c \ + variable/increment.c +if STARPU_USE_CUDA +main_subgraph_repeat_regenerate_tag_SOURCES += \ + variable/increment_cuda.cu +endif +if STARPU_USE_HIP +main_subgraph_repeat_regenerate_tag_SOURCES += \ + variable/increment_hip.hip +endif +if STARPU_USE_OPENCL +main_subgraph_repeat_regenerate_tag_SOURCES += \ + variable/increment_opencl.c +endif + +main_subgraph_repeat_regenerate_tag_cycle_SOURCES = \ + main/subgraph_repeat_regenerate_tag_cycle.c \ + variable/increment.c +if STARPU_USE_CUDA +main_subgraph_repeat_regenerate_tag_cycle_SOURCES += \ + variable/increment_cuda.cu +endif +if STARPU_USE_HIP +main_subgraph_repeat_regenerate_tag_cycle_SOURCES += \ + variable/increment_hip.hip +endif +if STARPU_USE_OPENCL +main_subgraph_repeat_regenerate_tag_cycle_SOURCES += \ + variable/increment_opencl.c +endif + + +if STARPU_HAVE_FC +fortran90_init_01_SOURCES = \ + fortran90/starpu_mod.f90 \ + fortran90/init_01.f90 +endif + +helper_starpu_data_dup_ro_SOURCES = \ + helper/starpu_data_dup_ro.c \ + variable/increment.c +if STARPU_USE_CUDA +helper_starpu_data_dup_ro_SOURCES += \ + variable/increment_cuda.cu +endif +if STARPU_USE_HIP +helper_starpu_data_dup_ro_SOURCES += \ + variable/increment_hip.hip +endif +if STARPU_USE_OPENCL +helper_starpu_data_dup_ro_SOURCES += \ + variable/increment_opencl.c +endif + +datawizard_interfaces_copy_interfaces_CFLAGS = $(AM_CFLAGS) $(FXT_CFLAGS) + +datawizard_data_register_CFLAGS = $(AM_CFLAGS) $(FXT_CFLAGS) + +################### +# Block interface # +################### +datawizard_interfaces_block_block_interface_SOURCES= \ + datawizard/interfaces/test_interfaces.c \ + datawizard/interfaces/block/block_interface.c + +datawizard_interfaces_block_block_interface_CFLAGS = $(AM_CFLAGS) $(FXT_CFLAGS) + +if STARPU_USE_CUDA +datawizard_interfaces_block_block_interface_SOURCES+= \ + datawizard/interfaces/block/block_cuda.cu +endif + +if STARPU_USE_OPENCL +datawizard_interfaces_block_block_interface_SOURCES+= \ + datawizard/interfaces/block/block_opencl.c +nobase_STARPU_OPENCL_DATA_DATA += \ + datawizard/interfaces/block/block_opencl_kernel.cl +endif + +################### +# Tensor interface # +################### +datawizard_interfaces_tensor_tensor_interface_SOURCES= \ + datawizard/interfaces/test_interfaces.c \ + datawizard/interfaces/tensor/tensor_interface.c + +datawizard_interfaces_tensor_tensor_interface_CFLAGS = $(AM_CFLAGS) $(FXT_CFLAGS) + +if STARPU_USE_CUDA +datawizard_interfaces_tensor_tensor_interface_SOURCES+= \ + datawizard/interfaces/tensor/tensor_cuda.cu +endif + +if STARPU_USE_OPENCL +datawizard_interfaces_tensor_tensor_interface_SOURCES+= \ + datawizard/interfaces/tensor/tensor_opencl.c +nobase_STARPU_OPENCL_DATA_DATA += \ + datawizard/interfaces/tensor/tensor_opencl_kernel.cl +endif + +################### +# Ndim interface # +################### +datawizard_interfaces_ndim_ndim_interface_SOURCES= \ + datawizard/interfaces/test_interfaces.c \ + datawizard/interfaces/ndim/ndim_interface.c + +datawizard_interfaces_ndim_ndim_interface_CFLAGS = $(AM_CFLAGS) $(FXT_CFLAGS) + +if STARPU_USE_CUDA +datawizard_interfaces_ndim_ndim_interface_SOURCES+= \ + datawizard/interfaces/ndim/ndim_cuda.cu +endif + +if STARPU_USE_OPENCL +datawizard_interfaces_ndim_ndim_interface_SOURCES+= \ + datawizard/interfaces/ndim/ndim_opencl.c +nobase_STARPU_OPENCL_DATA_DATA += \ + datawizard/interfaces/ndim/ndim_opencl_kernel.cl +endif + +################## +# BSCR interface # +################## +datawizard_interfaces_bcsr_bcsr_interface_SOURCES= \ + datawizard/interfaces/test_interfaces.c \ + datawizard/interfaces/bcsr/bcsr_interface.c + +datawizard_interfaces_bcsr_bcsr_interface_CFLAGS = $(AM_CFLAGS) $(FXT_CFLAGS) + +if STARPU_USE_CUDA +datawizard_interfaces_bcsr_bcsr_interface_SOURCES+= \ + datawizard/interfaces/bcsr/bcsr_cuda.cu +endif + +if STARPU_USE_OPENCL +datawizard_interfaces_bcsr_bcsr_interface_SOURCES+= \ + datawizard/interfaces/bcsr/bcsr_opencl.c +nobase_STARPU_OPENCL_DATA_DATA += \ + datawizard/interfaces/bcsr/bcsr_opencl_kernel.cl +endif + +################# +# COO interface # +################# +datawizard_interfaces_coo_coo_interface_SOURCES= \ + datawizard/interfaces/test_interfaces.c \ + datawizard/interfaces/coo/coo_interface.c + +datawizard_interfaces_coo_coo_interface_CFLAGS = $(AM_CFLAGS) $(FXT_CFLAGS) + +if STARPU_USE_CUDA +datawizard_interfaces_coo_coo_interface_SOURCES+= \ + datawizard/interfaces/coo/coo_cuda.cu +endif + +if STARPU_USE_OPENCL +datawizard_interfaces_coo_coo_interface_SOURCES+= \ + datawizard/interfaces/coo/coo_opencl.c +nobase_STARPU_OPENCL_DATA_DATA += \ + datawizard/interfaces/coo/coo_opencl_kernel.cl +endif + +################# +# CSR interface # +################# +datawizard_interfaces_csr_csr_interface_SOURCES= \ + datawizard/interfaces/test_interfaces.c \ + datawizard/interfaces/csr/csr_interface.c + +datawizard_interfaces_csr_csr_interface_CFLAGS = $(AM_CFLAGS) $(FXT_CFLAGS) + +if STARPU_USE_CUDA +datawizard_interfaces_csr_csr_interface_SOURCES+= \ + datawizard/interfaces/csr/csr_cuda.cu +endif + +if STARPU_USE_OPENCL +datawizard_interfaces_csr_csr_interface_SOURCES+= \ + datawizard/interfaces/csr/csr_opencl.c +nobase_STARPU_OPENCL_DATA_DATA += \ + datawizard/interfaces/csr/csr_opencl_kernel.cl +endif + + +#################### +# Vector interface # +#################### +datawizard_interfaces_vector_vector_interface_SOURCES = \ + datawizard/interfaces/vector/vector_interface.c \ + datawizard/interfaces/test_interfaces.c + +datawizard_interfaces_vector_vector_interface_CFLAGS = $(AM_CFLAGS) $(FXT_CFLAGS) + +if STARPU_USE_CUDA +datawizard_interfaces_vector_vector_interface_SOURCES += \ + datawizard/interfaces/vector/vector_cuda.cu +endif + +if STARPU_USE_OPENCL +datawizard_interfaces_vector_vector_interface_SOURCES += \ + datawizard/interfaces/vector/vector_opencl.c +nobase_STARPU_OPENCL_DATA_DATA += \ + datawizard/interfaces/vector/vector_opencl_kernel.cl +endif + +#################### +# Matrix interface # +#################### +datawizard_interfaces_matrix_matrix_interface_SOURCES= \ + datawizard/interfaces/test_interfaces.c \ + datawizard/interfaces/matrix/matrix_interface.c + +datawizard_interfaces_matrix_matrix_interface_CFLAGS = $(AM_CFLAGS) $(FXT_CFLAGS) + +if STARPU_USE_CUDA +datawizard_interfaces_matrix_matrix_interface_SOURCES+= \ + datawizard/interfaces/matrix/matrix_cuda.cu +endif + +if STARPU_USE_OPENCL +datawizard_interfaces_matrix_matrix_interface_SOURCES+= \ + datawizard/interfaces/matrix/matrix_opencl.c +nobase_STARPU_OPENCL_DATA_DATA+= \ + datawizard/interfaces/matrix/matrix_opencl_kernel.cl +endif + + +######################### +# Multiformat interface # +######################### +datawizard_interfaces_multiformat_multiformat_interface_SOURCES = \ + datawizard/interfaces/test_interfaces.c \ + datawizard/interfaces/multiformat/multiformat_interface.c \ + datawizard/interfaces/multiformat/multiformat_conversion_codelets.c + +datawizard_interfaces_multiformat_multiformat_interface_CFLAGS = $(AM_CFLAGS) $(FXT_CFLAGS) + +if STARPU_USE_CUDA +datawizard_interfaces_multiformat_multiformat_interface_SOURCES+= \ + datawizard/interfaces/multiformat/multiformat_cuda.cu \ + datawizard/interfaces/multiformat/multiformat_conversion_codelets_cuda.cu +endif + +if STARPU_USE_OPENCL +datawizard_interfaces_multiformat_multiformat_interface_SOURCES+= \ + datawizard/interfaces/multiformat/multiformat_opencl.c \ + datawizard/interfaces/multiformat/multiformat_conversion_codelets_opencl.c +nobase_STARPU_OPENCL_DATA_DATA += \ + datawizard/interfaces/multiformat/multiformat_opencl_kernel.cl \ + datawizard/interfaces/multiformat/multiformat_conversion_codelets_kernel.cl +endif + +datawizard_interfaces_multiformat_advanced_multiformat_cuda_opencl_SOURCES=\ + datawizard/interfaces/multiformat/advanced/generic.c \ + datawizard/interfaces/multiformat/advanced/multiformat_cuda_opencl.c + +datawizard_interfaces_multiformat_advanced_multiformat_data_release_SOURCES = \ + datawizard/interfaces/multiformat/advanced/generic.c \ + datawizard/interfaces/multiformat/advanced/multiformat_data_release.c + +datawizard_interfaces_multiformat_advanced_multiformat_worker_SOURCES=\ + datawizard/interfaces/multiformat/advanced/generic.c \ + datawizard/interfaces/multiformat/advanced/multiformat_worker.c + +datawizard_interfaces_multiformat_advanced_multiformat_handle_conversion_SOURCES = \ + datawizard/interfaces/multiformat/advanced/generic.c \ + datawizard/interfaces/multiformat/advanced/multiformat_handle_conversion.c + +datawizard_interfaces_multiformat_advanced_same_handle_SOURCES= \ + datawizard/interfaces/multiformat/advanced/generic.c \ + datawizard/interfaces/multiformat/advanced/same_handle.c + + +###################### +# Variable interface # +###################### +datawizard_interfaces_variable_variable_interface_SOURCES= \ + datawizard/interfaces/test_interfaces.c \ + datawizard/interfaces/variable/variable_interface.c + +datawizard_interfaces_variable_variable_interface_CFLAGS = $(AM_CFLAGS) $(FXT_CFLAGS) + +if STARPU_USE_CUDA +datawizard_interfaces_variable_variable_interface_SOURCES+= \ + datawizard/interfaces/variable/variable_cuda.cu +endif + +if STARPU_USE_OPENCL +datawizard_interfaces_variable_variable_interface_SOURCES+= \ + datawizard/interfaces/variable/variable_opencl.c +nobase_STARPU_OPENCL_DATA_DATA += \ + datawizard/interfaces/variable/variable_opencl_kernel.cl +endif + +################## +# Void interface # +################## +datawizard_interfaces_void_void_interface_SOURCES=\ + datawizard/interfaces/test_interfaces.c \ + datawizard/interfaces/void/void_interface.c + +datawizard_interfaces_void_void_interface_CFLAGS = $(AM_CFLAGS) $(FXT_CFLAGS) + +overlap_gpu_concurrency_SOURCES=\ + overlap/gpu_concurrency.c +if STARPU_USE_CUDA +overlap_gpu_concurrency_SOURCES+=\ + overlap/long_kernel.cu +endif + +perfmodels_regression_based_memset_SOURCES=\ + perfmodels/regression_based_memset.c + +perfmodels_regression_based_gpu_SOURCES=\ + perfmodels/regression_based_gpu.c + +maxfpga_max_fpga_basic_static_LDADD = $(LDADD) \ + maxfpga/slic_MyTasks.o +maxfpga/max_fpga_basic_static.o: maxfpga/MyTasks.max + +maxfpga_max_fpga_advanced_static_LDADD = $(LDADD) \ + maxfpga/slic_MyTasks.o +maxfpga/max_fpga_advanced_static.o: maxfpga/MyTasks.max + +maxfpga_max_fpga_dynamic_LDADD = $(LDADD) \ + maxfpga/slic_MyTasks.o +maxfpga/max_fpga_dynamic.o: maxfpga/MyTasks.max + +maxfpga_max_fpga_mux_LDADD = $(LDADD) \ + maxfpga/slic_MyTasksMux.o +maxfpga/max_fpga_mux.o: maxfpga/MyTasksMux.max + +if STARPU_USE_OPENCL +perfmodels_regression_based_memset_SOURCES+=\ + perfmodels/opencl_memset.c + +perfmodels_regression_based_gpu_SOURCES+=\ + perfmodels/opencl_memset.c + +nobase_STARPU_OPENCL_DATA_DATA += \ + perfmodels/opencl_memset_kernel.cl +endif + +perfmodels_non_linear_regression_based_SOURCES=\ + perfmodels/non_linear_regression_based.c + +if STARPU_USE_OPENCL +perfmodels_non_linear_regression_based_SOURCES+=\ + perfmodels/opencl_memset.c +endif + +sched_policies_execute_all_tasks_LDFLAGS = $(AM_LDFLAGS) -lm +sched_policies_execute_all_tasks_CFLAGS = $(AM_LDFLAGS) $(FXT_CFLAGS) + +# Fortran90 tests + +# - link over source file to build our own object +fortran90/starpu_mod.f90: + @$(MKDIR_P) $(dir $@) + $(V_ln) $(LN_S) $(abs_top_srcdir)/include/$(notdir $@) $@ + +if STARPU_HAVE_FC +# - express the creation of .mod along .o +starpu_mod.mod: fortran90/starpu_mod.o + +# - list explicit dependences to control proper module files dependencies +fortran90/init_01.o: starpu_mod.mod +endif + +# Maxeler compiler +if STARPU_USE_MAX_FPGA +# or DFE for hardware execution +MAX_TARGET ?= DFE_SIM +MAX_DFE = MAX5C_$(MAX_TARGET) + +MAXJ=$(wildcard maxfpga/*.maxj) +CLASS=$(MAXJ:.maxj=.class) +$(CLASS) &: $(MAXJ) + maxjc -1.7 -cp $$MAXCLASSPATH $(dir $<) + +%.max: %Manager.class + CLASSPATH=$$CLASSPATH:. maxJavaRun $(shell dirname $*).$(notdir $*)Manager DFEModel=MAIA maxFileName=$(notdir $*) target=$(MAX_TARGET) + cp $(notdir $*)_$(MAX_DFE)/results/$(notdir $*).{max,h} $(dir $@) + +slic_%.o: %.max + sliccompile $< $@ + +CLEANFILES += */*.max */*.class max_fpga/*.h +endif + diff --git a/tests/Makefile.in b/tests/Makefile.in new file mode 100644 index 0000000..80ee399 --- /dev/null +++ b/tests/Makefile.in @@ -0,0 +1,10313 @@ +# Makefile.in generated by automake 1.16.5 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2021 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + + + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +target_triplet = @target@ +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@am__append_1 = --compiler-options -fno-strict-aliasing -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ $(STARPU_NVCC_H_CPPFLAGS) +@STARPU_USE_HIP_TRUE@am__append_2 = -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ +noinst_PROGRAMS = $(am__EXEEXT_8) $(am__EXEEXT_9) $(am__EXEEXT_6) \ + $(am__EXEEXT_10) +# Make tests run through mpiexec +@STARPU_USE_MPI_MASTER_SLAVE_TRUE@am__append_3 = $(abs_top_srcdir)/tools/starpu_msexec +@STARPU_USE_MPI_MASTER_SLAVE_TRUE@am__append_4 = $(MPI_RUN_ENV) STARPU_NMPIMSTHREADS=4 +@STARPU_USE_TCPIP_MASTER_SLAVE_TRUE@am__append_5 = $(abs_top_srcdir)/tools/starpu_msexec +# switch off local socket usage +#MS_LAUNCHER = $(abs_top_builddir)/tools/starpu_tcpipexec -np 2 -nobind -ncpus 1 -nolocal +@STARPU_USE_TCPIP_MASTER_SLAVE_TRUE@am__append_6 = STARPU_RESERVE_NCPU=2 +@STARPU_HAVE_WINDOWS_FALSE@am__append_7 = loader +TESTS = $(am__EXEEXT_13) $(am__EXEEXT_6) +@STARPU_COVERAGE_ENABLED_TRUE@am__append_8 = coverage/coverage.sh +XFAIL_TESTS = errorcheck/invalid_blocking_calls$(EXEEXT) +@STARPU_SIMGRID_TRUE@am__append_9 = \ +@STARPU_SIMGRID_TRUE@ energy/energy_efficiency \ +@STARPU_SIMGRID_TRUE@ datawizard/simgrid-locality + +@STARPU_SIMGRID_FALSE@am__append_10 = \ +@STARPU_SIMGRID_FALSE@ main/deprecated_func \ +@STARPU_SIMGRID_FALSE@ main/driver_api/init_run_deinit \ +@STARPU_SIMGRID_FALSE@ main/driver_api/run_driver \ +@STARPU_SIMGRID_FALSE@ main/deploop \ +@STARPU_SIMGRID_FALSE@ main/display_binding \ +@STARPU_SIMGRID_FALSE@ main/execute_on_a_specific_worker \ +@STARPU_SIMGRID_FALSE@ main/insert_task \ +@STARPU_SIMGRID_FALSE@ main/insert_task_value \ +@STARPU_SIMGRID_FALSE@ main/insert_task_dyn_handles \ +@STARPU_SIMGRID_FALSE@ main/insert_task_array \ +@STARPU_SIMGRID_FALSE@ main/insert_task_many \ +@STARPU_SIMGRID_FALSE@ main/job \ +@STARPU_SIMGRID_FALSE@ main/multithreaded \ +@STARPU_SIMGRID_FALSE@ main/starpu_task_bundle \ +@STARPU_SIMGRID_FALSE@ main/starpu_task_wait_for_all \ +@STARPU_SIMGRID_FALSE@ main/starpu_task_wait \ +@STARPU_SIMGRID_FALSE@ main/static_restartable \ +@STARPU_SIMGRID_FALSE@ main/static_restartable_using_initializer\ +@STARPU_SIMGRID_FALSE@ main/static_restartable_tag \ +@STARPU_SIMGRID_FALSE@ main/regenerate \ +@STARPU_SIMGRID_FALSE@ main/regenerate_pipeline \ +@STARPU_SIMGRID_FALSE@ main/restart \ +@STARPU_SIMGRID_FALSE@ main/wait_all_regenerable_tasks \ +@STARPU_SIMGRID_FALSE@ main/subgraph_repeat \ +@STARPU_SIMGRID_FALSE@ main/subgraph_repeat_tag \ +@STARPU_SIMGRID_FALSE@ main/subgraph_repeat_regenerate \ +@STARPU_SIMGRID_FALSE@ main/subgraph_repeat_regenerate_tag \ +@STARPU_SIMGRID_FALSE@ main/subgraph_repeat_regenerate_tag_cycle \ +@STARPU_SIMGRID_FALSE@ main/empty_task_sync_point \ +@STARPU_SIMGRID_FALSE@ main/empty_task_sync_point_tasks \ +@STARPU_SIMGRID_FALSE@ main/tag_wait_api \ +@STARPU_SIMGRID_FALSE@ main/tag_get_task \ +@STARPU_SIMGRID_FALSE@ main/task_wait_api \ +@STARPU_SIMGRID_FALSE@ main/declare_deps_in_callback \ +@STARPU_SIMGRID_FALSE@ main/declare_deps_after_submission \ +@STARPU_SIMGRID_FALSE@ main/declare_deps_after_submission_synchronous \ +@STARPU_SIMGRID_FALSE@ main/get_current_task \ +@STARPU_SIMGRID_FALSE@ main/starpu_init \ +@STARPU_SIMGRID_FALSE@ main/submit \ +@STARPU_SIMGRID_FALSE@ main/const_codelet \ +@STARPU_SIMGRID_FALSE@ main/pause_resume \ +@STARPU_SIMGRID_FALSE@ main/pack \ +@STARPU_SIMGRID_FALSE@ main/get_children_tasks \ +@STARPU_SIMGRID_FALSE@ main/hwloc_cpuset \ +@STARPU_SIMGRID_FALSE@ main/task_end_dep \ +@STARPU_SIMGRID_FALSE@ datawizard/acquire_cb_insert \ +@STARPU_SIMGRID_FALSE@ datawizard/acquire_release \ +@STARPU_SIMGRID_FALSE@ datawizard/acquire_release2 \ +@STARPU_SIMGRID_FALSE@ datawizard/acquire_release_to \ +@STARPU_SIMGRID_FALSE@ datawizard/acquire_try \ +@STARPU_SIMGRID_FALSE@ datawizard/bcsr \ +@STARPU_SIMGRID_FALSE@ datawizard/cache \ +@STARPU_SIMGRID_FALSE@ datawizard/commute \ +@STARPU_SIMGRID_FALSE@ datawizard/commute2 \ +@STARPU_SIMGRID_FALSE@ datawizard/copy \ +@STARPU_SIMGRID_FALSE@ datawizard/data_implicit_deps \ +@STARPU_SIMGRID_FALSE@ datawizard/data_register \ +@STARPU_SIMGRID_FALSE@ datawizard/scratch \ +@STARPU_SIMGRID_FALSE@ datawizard/scratch_reuse \ +@STARPU_SIMGRID_FALSE@ datawizard/sync_and_notify_data \ +@STARPU_SIMGRID_FALSE@ datawizard/sync_and_notify_data_implicit\ +@STARPU_SIMGRID_FALSE@ datawizard/dsm_stress \ +@STARPU_SIMGRID_FALSE@ datawizard/double_parameter \ +@STARPU_SIMGRID_FALSE@ datawizard/write_only_tmp_buffer \ +@STARPU_SIMGRID_FALSE@ datawizard/data_invalidation \ +@STARPU_SIMGRID_FALSE@ datawizard/data_deinitialize \ +@STARPU_SIMGRID_FALSE@ datawizard/dining_philosophers \ +@STARPU_SIMGRID_FALSE@ datawizard/manual_reduction \ +@STARPU_SIMGRID_FALSE@ datawizard/readers_and_writers \ +@STARPU_SIMGRID_FALSE@ datawizard/unpartition \ +@STARPU_SIMGRID_FALSE@ datawizard/sync_with_data_with_mem \ +@STARPU_SIMGRID_FALSE@ datawizard/sync_with_data_with_mem_non_blocking\ +@STARPU_SIMGRID_FALSE@ datawizard/sync_with_data_with_mem_non_blocking_implicit\ +@STARPU_SIMGRID_FALSE@ datawizard/mpi_like \ +@STARPU_SIMGRID_FALSE@ datawizard/mpi_like_async \ +@STARPU_SIMGRID_FALSE@ datawizard/critical_section_with_void_interface\ +@STARPU_SIMGRID_FALSE@ datawizard/increment_init \ +@STARPU_SIMGRID_FALSE@ datawizard/increment_redux \ +@STARPU_SIMGRID_FALSE@ datawizard/increment_redux_partition \ +@STARPU_SIMGRID_FALSE@ datawizard/increment_redux_v2 \ +@STARPU_SIMGRID_FALSE@ datawizard/increment_redux_with_args \ +@STARPU_SIMGRID_FALSE@ datawizard/increment_redux_lazy \ +@STARPU_SIMGRID_FALSE@ datawizard/handle_to_pointer \ +@STARPU_SIMGRID_FALSE@ datawizard/lazy_allocation \ +@STARPU_SIMGRID_FALSE@ datawizard/no_unregister \ +@STARPU_SIMGRID_FALSE@ datawizard/noreclaim \ +@STARPU_SIMGRID_FALSE@ datawizard/nowhere \ +@STARPU_SIMGRID_FALSE@ datawizard/interfaces/block/block_interface \ +@STARPU_SIMGRID_FALSE@ datawizard/interfaces/bcsr/bcsr_interface \ +@STARPU_SIMGRID_FALSE@ datawizard/interfaces/coo/coo_interface \ +@STARPU_SIMGRID_FALSE@ datawizard/interfaces/csr/csr_interface \ +@STARPU_SIMGRID_FALSE@ datawizard/interfaces/matrix/matrix_interface \ +@STARPU_SIMGRID_FALSE@ datawizard/interfaces/multiformat/multiformat_interface \ +@STARPU_SIMGRID_FALSE@ datawizard/interfaces/multiformat/advanced/multiformat_cuda_opencl \ +@STARPU_SIMGRID_FALSE@ datawizard/interfaces/multiformat/advanced/multiformat_data_release \ +@STARPU_SIMGRID_FALSE@ datawizard/interfaces/multiformat/advanced/multiformat_worker \ +@STARPU_SIMGRID_FALSE@ datawizard/interfaces/multiformat/advanced/multiformat_handle_conversion \ +@STARPU_SIMGRID_FALSE@ datawizard/interfaces/multiformat/advanced/same_handle \ +@STARPU_SIMGRID_FALSE@ datawizard/interfaces/tensor/tensor_interface \ +@STARPU_SIMGRID_FALSE@ datawizard/interfaces/ndim/ndim_interface \ +@STARPU_SIMGRID_FALSE@ datawizard/interfaces/variable/variable_interface \ +@STARPU_SIMGRID_FALSE@ datawizard/interfaces/vector/vector_interface \ +@STARPU_SIMGRID_FALSE@ datawizard/interfaces/void/void_interface \ +@STARPU_SIMGRID_FALSE@ datawizard/in_place_partition \ +@STARPU_SIMGRID_FALSE@ datawizard/partition_dep \ +@STARPU_SIMGRID_FALSE@ datawizard/partition_lazy \ +@STARPU_SIMGRID_FALSE@ datawizard/partition_init \ +@STARPU_SIMGRID_FALSE@ datawizard/partition_wontuse \ +@STARPU_SIMGRID_FALSE@ datawizard/gpu_register \ +@STARPU_SIMGRID_FALSE@ datawizard/gpu_ptr_register \ +@STARPU_SIMGRID_FALSE@ datawizard/variable_parameters \ +@STARPU_SIMGRID_FALSE@ datawizard/wt_host \ +@STARPU_SIMGRID_FALSE@ datawizard/wt_broadcast \ +@STARPU_SIMGRID_FALSE@ datawizard/readonly \ +@STARPU_SIMGRID_FALSE@ datawizard/specific_node \ +@STARPU_SIMGRID_FALSE@ datawizard/specific_node_same \ +@STARPU_SIMGRID_FALSE@ datawizard/task_with_multiple_time_the_same_handle \ +@STARPU_SIMGRID_FALSE@ datawizard/test_arbiter \ +@STARPU_SIMGRID_FALSE@ datawizard/invalidate_pending_requests \ +@STARPU_SIMGRID_FALSE@ datawizard/deinitialize_pending_requests \ +@STARPU_SIMGRID_FALSE@ datawizard/temporary_partition \ +@STARPU_SIMGRID_FALSE@ datawizard/partitioned_initialization \ +@STARPU_SIMGRID_FALSE@ datawizard/partitioned_acquire \ +@STARPU_SIMGRID_FALSE@ datawizard/temporary_partition_implicit \ +@STARPU_SIMGRID_FALSE@ datawizard/temporary_partition_read \ +@STARPU_SIMGRID_FALSE@ datawizard/redux_acquire \ +@STARPU_SIMGRID_FALSE@ disk/disk_copy \ +@STARPU_SIMGRID_FALSE@ disk/disk_copy_unpack \ +@STARPU_SIMGRID_FALSE@ disk/disk_copy_to_disk \ +@STARPU_SIMGRID_FALSE@ disk/disk_compute \ +@STARPU_SIMGRID_FALSE@ disk/disk_pack \ +@STARPU_SIMGRID_FALSE@ disk/mem_reclaim \ +@STARPU_SIMGRID_FALSE@ errorcheck/invalid_blocking_calls \ +@STARPU_SIMGRID_FALSE@ errorcheck/workers_cpuid \ +@STARPU_SIMGRID_FALSE@ fault-tolerance/retry \ +@STARPU_SIMGRID_FALSE@ helper/starpu_data_cpy \ +@STARPU_SIMGRID_FALSE@ helper/starpu_data_dup_ro \ +@STARPU_SIMGRID_FALSE@ helper/starpu_create_sync_task \ +@STARPU_SIMGRID_FALSE@ microbenchs/async_tasks_overhead \ +@STARPU_SIMGRID_FALSE@ microbenchs/sync_tasks_overhead \ +@STARPU_SIMGRID_FALSE@ microbenchs/tasks_overhead \ +@STARPU_SIMGRID_FALSE@ microbenchs/tasks_size_overhead \ +@STARPU_SIMGRID_FALSE@ microbenchs/prefetch_data_on_node \ +@STARPU_SIMGRID_FALSE@ microbenchs/redundant_buffer \ +@STARPU_SIMGRID_FALSE@ microbenchs/matrix_as_vector \ +@STARPU_SIMGRID_FALSE@ microbenchs/bandwidth \ +@STARPU_SIMGRID_FALSE@ overlap/gpu_concurrency \ +@STARPU_SIMGRID_FALSE@ parallel_tasks/swap \ +@STARPU_SIMGRID_FALSE@ parallel_tasks/combined_worker_assign_workerid \ +@STARPU_SIMGRID_FALSE@ parallel_tasks/explicit_combined_worker \ +@STARPU_SIMGRID_FALSE@ parallel_tasks/parallel_kernels \ +@STARPU_SIMGRID_FALSE@ parallel_tasks/parallel_kernels_trivial \ +@STARPU_SIMGRID_FALSE@ parallel_tasks/parallel_kernels_spmd \ +@STARPU_SIMGRID_FALSE@ parallel_tasks/spmd_peager \ +@STARPU_SIMGRID_FALSE@ parallel_tasks/cuda_only \ +@STARPU_SIMGRID_FALSE@ perfmodels/regression_based_memset \ +@STARPU_SIMGRID_FALSE@ perfmodels/regression_based_check \ +@STARPU_SIMGRID_FALSE@ perfmodels/regression_based_multiimpl \ +@STARPU_SIMGRID_FALSE@ perfmodels/regression_based_energy \ +@STARPU_SIMGRID_FALSE@ perfmodels/regression_based_gpu \ +@STARPU_SIMGRID_FALSE@ perfmodels/non_linear_regression_based \ +@STARPU_SIMGRID_FALSE@ perfmodels/feed \ +@STARPU_SIMGRID_FALSE@ perfmodels/user_base \ +@STARPU_SIMGRID_FALSE@ perfmodels/valid_model \ +@STARPU_SIMGRID_FALSE@ perfmodels/path \ +@STARPU_SIMGRID_FALSE@ perfmodels/memory \ +@STARPU_SIMGRID_FALSE@ sched_policies/data_locality \ +@STARPU_SIMGRID_FALSE@ sched_policies/execute_all_tasks \ +@STARPU_SIMGRID_FALSE@ sched_policies/prio \ +@STARPU_SIMGRID_FALSE@ sched_policies/simple_deps \ +@STARPU_SIMGRID_FALSE@ sched_policies/simple_cpu_gpu_sched \ +@STARPU_SIMGRID_FALSE@ sched_ctx/sched_ctx_hierarchy + +@STARPU_SIMGRID_FALSE@am__append_11 = \ +@STARPU_SIMGRID_FALSE@ datawizard/allocate_many_numa_nodes + +@STARPU_SIMGRID_FALSE@@STARPU_USE_MAX_FPGA_TRUE@am__append_12 = \ +@STARPU_SIMGRID_FALSE@@STARPU_USE_MAX_FPGA_TRUE@ maxfpga/max_fpga_basic_static \ +@STARPU_SIMGRID_FALSE@@STARPU_USE_MAX_FPGA_TRUE@ maxfpga/max_fpga_advanced_static \ +@STARPU_SIMGRID_FALSE@@STARPU_USE_MAX_FPGA_TRUE@ maxfpga/max_fpga_dynamic \ +@STARPU_SIMGRID_FALSE@@STARPU_USE_MAX_FPGA_TRUE@ maxfpga/max_fpga_mux + +@STARPU_HAVE_FC_TRUE@@STARPU_SANITIZE_FALSE@@STARPU_SIMGRID_FALSE@am__append_13 = \ +@STARPU_HAVE_FC_TRUE@@STARPU_SANITIZE_FALSE@@STARPU_SIMGRID_FALSE@ fortran90/init_01 + +@STARPU_LONG_CHECK_TRUE@@STARPU_SIMGRID_FALSE@am__append_14 = \ +@STARPU_LONG_CHECK_TRUE@@STARPU_SIMGRID_FALSE@ main/tag_task_data_deps \ +@STARPU_LONG_CHECK_TRUE@@STARPU_SIMGRID_FALSE@ datawizard/reclaim + +examplebin_PROGRAMS = main/deadlock$(EXEEXT) \ + microbenchs/async_tasks_overhead$(EXEEXT) \ + microbenchs/sync_tasks_overhead$(EXEEXT) \ + microbenchs/tasks_overhead$(EXEEXT) \ + microbenchs/tasks_size_overhead$(EXEEXT) \ + microbenchs/local_pingpong$(EXEEXT) $(am__EXEEXT_7) +@STARPU_SIMGRID_FALSE@@STARPU_USE_MPI_MASTER_SLAVE_FALSE@am__append_15 = \ +@STARPU_SIMGRID_FALSE@@STARPU_USE_MPI_MASTER_SLAVE_FALSE@ microbenchs/bandwidth + +@STARPU_SIMGRID_FALSE@@STARPU_USE_MPI_MASTER_SLAVE_FALSE@am__append_16 = \ +@STARPU_SIMGRID_FALSE@@STARPU_USE_MPI_MASTER_SLAVE_FALSE@ microbenchs/tasks_data_overhead.sh \ +@STARPU_SIMGRID_FALSE@@STARPU_USE_MPI_MASTER_SLAVE_FALSE@ microbenchs/sync_tasks_data_overhead.sh \ +@STARPU_SIMGRID_FALSE@@STARPU_USE_MPI_MASTER_SLAVE_FALSE@ microbenchs/async_tasks_data_overhead.sh \ +@STARPU_SIMGRID_FALSE@@STARPU_USE_MPI_MASTER_SLAVE_FALSE@ microbenchs/tasks_size_overhead_scheds.sh + +@STARPU_HAVE_WINDOWS_FALSE@check_PROGRAMS = $(am__EXEEXT_6) +@STARPU_HAVE_WINDOWS_TRUE@check_PROGRAMS = $(am__EXEEXT_6) +@STARPU_SIMGRID_TRUE@am__append_17 = $(MICROBENCHS:=.sh) +@STARPU_USE_FXT_TRUE@am__append_18 = \ +@STARPU_USE_FXT_TRUE@ overlap/overlap.sh + + +################################ +# Simgrid Model Checking tests # +################################ +@STARPU_SIMGRID_MC_TRUE@am__append_19 = model-checking +@STARPU_USE_CUDA_TRUE@am__append_20 = \ +@STARPU_USE_CUDA_TRUE@ variable/increment_cuda.cu + +@STARPU_USE_HIP_TRUE@am__append_21 = \ +@STARPU_USE_HIP_TRUE@ variable/increment_hip.hip + +@STARPU_USE_OPENCL_TRUE@am__append_22 = \ +@STARPU_USE_OPENCL_TRUE@ variable/increment_opencl.c + +@STARPU_USE_CUDA_TRUE@am__append_23 = \ +@STARPU_USE_CUDA_TRUE@ variable/increment_cuda.cu + +@STARPU_USE_HIP_TRUE@am__append_24 = \ +@STARPU_USE_HIP_TRUE@ variable/increment_hip.hip + +@STARPU_USE_OPENCL_TRUE@am__append_25 = \ +@STARPU_USE_OPENCL_TRUE@ variable/increment_opencl.c + +@STARPU_USE_CUDA_TRUE@am__append_26 = \ +@STARPU_USE_CUDA_TRUE@ variable/increment_cuda.cu + +@STARPU_USE_HIP_TRUE@am__append_27 = \ +@STARPU_USE_HIP_TRUE@ variable/increment_hip.hip + +@STARPU_USE_OPENCL_TRUE@am__append_28 = \ +@STARPU_USE_OPENCL_TRUE@ variable/increment_opencl.c + +@STARPU_USE_CUDA_TRUE@am__append_29 = \ +@STARPU_USE_CUDA_TRUE@ variable/increment_cuda.cu + +@STARPU_USE_HIP_TRUE@am__append_30 = \ +@STARPU_USE_HIP_TRUE@ variable/increment_hip.hip + +@STARPU_USE_OPENCL_TRUE@am__append_31 = \ +@STARPU_USE_OPENCL_TRUE@ variable/increment_opencl.c + +@STARPU_USE_CUDA_TRUE@am__append_32 = \ +@STARPU_USE_CUDA_TRUE@ variable/increment_cuda.cu + +@STARPU_USE_HIP_TRUE@am__append_33 = \ +@STARPU_USE_HIP_TRUE@ variable/increment_hip.hip + +@STARPU_USE_OPENCL_TRUE@am__append_34 = \ +@STARPU_USE_OPENCL_TRUE@ variable/increment_opencl.c + +@STARPU_USE_CUDA_TRUE@am__append_35 = \ +@STARPU_USE_CUDA_TRUE@ variable/increment_cuda.cu + +@STARPU_USE_HIP_TRUE@am__append_36 = \ +@STARPU_USE_HIP_TRUE@ variable/increment_hip.hip + +@STARPU_USE_OPENCL_TRUE@am__append_37 = \ +@STARPU_USE_OPENCL_TRUE@ variable/increment_opencl.c + +@STARPU_USE_CUDA_TRUE@am__append_38 = \ +@STARPU_USE_CUDA_TRUE@ variable/increment_cuda.cu + +@STARPU_USE_HIP_TRUE@am__append_39 = \ +@STARPU_USE_HIP_TRUE@ variable/increment_hip.hip + +@STARPU_USE_OPENCL_TRUE@am__append_40 = \ +@STARPU_USE_OPENCL_TRUE@ variable/increment_opencl.c + +@STARPU_USE_CUDA_TRUE@am__append_41 = \ +@STARPU_USE_CUDA_TRUE@ variable/increment_cuda.cu + +@STARPU_USE_HIP_TRUE@am__append_42 = \ +@STARPU_USE_HIP_TRUE@ variable/increment_hip.hip + +@STARPU_USE_OPENCL_TRUE@am__append_43 = \ +@STARPU_USE_OPENCL_TRUE@ variable/increment_opencl.c + +@STARPU_USE_CUDA_TRUE@am__append_44 = \ +@STARPU_USE_CUDA_TRUE@ variable/increment_cuda.cu + +@STARPU_USE_HIP_TRUE@am__append_45 = \ +@STARPU_USE_HIP_TRUE@ variable/increment_hip.hip + +@STARPU_USE_OPENCL_TRUE@am__append_46 = \ +@STARPU_USE_OPENCL_TRUE@ variable/increment_opencl.c + +@STARPU_USE_CUDA_TRUE@am__append_47 = \ +@STARPU_USE_CUDA_TRUE@ variable/increment_cuda.cu + +@STARPU_USE_HIP_TRUE@am__append_48 = \ +@STARPU_USE_HIP_TRUE@ variable/increment_hip.hip + +@STARPU_USE_OPENCL_TRUE@am__append_49 = \ +@STARPU_USE_OPENCL_TRUE@ variable/increment_opencl.c + +@STARPU_USE_CUDA_TRUE@am__append_50 = \ +@STARPU_USE_CUDA_TRUE@ variable/increment_cuda.cu + +@STARPU_USE_HIP_TRUE@am__append_51 = \ +@STARPU_USE_HIP_TRUE@ variable/increment_hip.hip + +@STARPU_USE_OPENCL_TRUE@am__append_52 = \ +@STARPU_USE_OPENCL_TRUE@ variable/increment_opencl.c + +@STARPU_USE_CUDA_TRUE@am__append_53 = \ +@STARPU_USE_CUDA_TRUE@ datawizard/scratch_cuda.cu + +@STARPU_USE_OPENCL_TRUE@am__append_54 = \ +@STARPU_USE_OPENCL_TRUE@ datawizard/scratch_opencl.c + +@STARPU_USE_CUDA_TRUE@am__append_55 = \ +@STARPU_USE_CUDA_TRUE@ variable/increment_cuda.cu + +@STARPU_USE_HIP_TRUE@am__append_56 = \ +@STARPU_USE_HIP_TRUE@ variable/increment_hip.hip + +@STARPU_USE_OPENCL_TRUE@am__append_57 = \ +@STARPU_USE_OPENCL_TRUE@ variable/increment_opencl.c + +@STARPU_USE_CUDA_TRUE@am__append_58 = \ +@STARPU_USE_CUDA_TRUE@ variable/increment_cuda.cu + +@STARPU_USE_HIP_TRUE@am__append_59 = \ +@STARPU_USE_HIP_TRUE@ variable/increment_hip.hip + +@STARPU_USE_OPENCL_TRUE@am__append_60 = \ +@STARPU_USE_OPENCL_TRUE@ variable/increment_opencl.c + +@STARPU_USE_CUDA_TRUE@am__append_61 = \ +@STARPU_USE_CUDA_TRUE@ datawizard/sync_and_notify_data_kernels.cu + +@STARPU_USE_OPENCL_TRUE@am__append_62 = \ +@STARPU_USE_OPENCL_TRUE@ datawizard/sync_and_notify_data_opencl.c + +@STARPU_USE_CUDA_TRUE@am__append_63 = \ +@STARPU_USE_CUDA_TRUE@ datawizard/sync_and_notify_data_kernels.cu + +@STARPU_USE_OPENCL_TRUE@am__append_64 = \ +@STARPU_USE_OPENCL_TRUE@ datawizard/sync_and_notify_data_opencl.c + +@STARPU_USE_CUDA_TRUE@am__append_65 = \ +@STARPU_USE_CUDA_TRUE@ datawizard/scal_cuda.cu + +@STARPU_USE_OPENCL_TRUE@am__append_66 = \ +@STARPU_USE_OPENCL_TRUE@ datawizard/scal_opencl.cl + +@STARPU_USE_CUDA_TRUE@am__append_67 = \ +@STARPU_USE_CUDA_TRUE@ datawizard/scal_cuda.cu + +@STARPU_USE_OPENCL_TRUE@am__append_68 = \ +@STARPU_USE_OPENCL_TRUE@ datawizard/scal_opencl.cl + +@STARPU_USE_CUDA_TRUE@am__append_69 = \ +@STARPU_USE_CUDA_TRUE@ datawizard/scal_cuda.cu + +@STARPU_USE_OPENCL_TRUE@am__append_70 = \ +@STARPU_USE_OPENCL_TRUE@ datawizard/scal_opencl.cl + +@STARPU_USE_CUDA_TRUE@am__append_71 = \ +@STARPU_USE_CUDA_TRUE@ datawizard/scal_cuda.cu + +@STARPU_USE_OPENCL_TRUE@am__append_72 = \ +@STARPU_USE_OPENCL_TRUE@ datawizard/scal_opencl.cl + +@STARPU_USE_CUDA_TRUE@am__append_73 = \ +@STARPU_USE_CUDA_TRUE@ datawizard/scal_cuda.cu + +@STARPU_USE_OPENCL_TRUE@am__append_74 = \ +@STARPU_USE_OPENCL_TRUE@ datawizard/scal_opencl.cl + +@STARPU_USE_CUDA_TRUE@am__append_75 = \ +@STARPU_USE_CUDA_TRUE@ variable/increment_cuda.cu + +@STARPU_USE_HIP_TRUE@am__append_76 = \ +@STARPU_USE_HIP_TRUE@ variable/increment_hip.hip + +@STARPU_USE_OPENCL_TRUE@am__append_77 = \ +@STARPU_USE_OPENCL_TRUE@ variable/increment_opencl.c + +@STARPU_USE_CUDA_TRUE@am__append_78 = \ +@STARPU_USE_CUDA_TRUE@ variable/increment_cuda.cu + +@STARPU_USE_HIP_TRUE@am__append_79 = \ +@STARPU_USE_HIP_TRUE@ variable/increment_hip.hip + +@STARPU_USE_OPENCL_TRUE@am__append_80 = \ +@STARPU_USE_OPENCL_TRUE@ variable/increment_opencl.c + +@STARPU_USE_CUDA_TRUE@am__append_81 = \ +@STARPU_USE_CUDA_TRUE@ variable/increment_cuda.cu + +@STARPU_USE_HIP_TRUE@am__append_82 = \ +@STARPU_USE_HIP_TRUE@ variable/increment_hip.hip + +@STARPU_USE_OPENCL_TRUE@am__append_83 = \ +@STARPU_USE_OPENCL_TRUE@ variable/increment_opencl.c + +@STARPU_USE_CUDA_TRUE@am__append_84 = \ +@STARPU_USE_CUDA_TRUE@ variable/increment_cuda.cu + +@STARPU_USE_HIP_TRUE@am__append_85 = \ +@STARPU_USE_HIP_TRUE@ variable/increment_hip.hip + +@STARPU_USE_OPENCL_TRUE@am__append_86 = \ +@STARPU_USE_OPENCL_TRUE@ variable/increment_opencl.c + +@STARPU_USE_CUDA_TRUE@am__append_87 = \ +@STARPU_USE_CUDA_TRUE@ variable/increment_cuda.cu + +@STARPU_USE_HIP_TRUE@am__append_88 = \ +@STARPU_USE_HIP_TRUE@ variable/increment_hip.hip + +@STARPU_USE_OPENCL_TRUE@am__append_89 = \ +@STARPU_USE_OPENCL_TRUE@ variable/increment_opencl.c + +@STARPU_USE_CUDA_TRUE@am__append_90 = \ +@STARPU_USE_CUDA_TRUE@ variable/increment_cuda.cu + +@STARPU_USE_HIP_TRUE@am__append_91 = \ +@STARPU_USE_HIP_TRUE@ variable/increment_hip.hip + +@STARPU_USE_OPENCL_TRUE@am__append_92 = \ +@STARPU_USE_OPENCL_TRUE@ variable/increment_opencl.c + +@STARPU_USE_CUDA_TRUE@am__append_93 = \ +@STARPU_USE_CUDA_TRUE@ variable/increment_cuda.cu + +@STARPU_USE_HIP_TRUE@am__append_94 = \ +@STARPU_USE_HIP_TRUE@ variable/increment_hip.hip + +@STARPU_USE_OPENCL_TRUE@am__append_95 = \ +@STARPU_USE_OPENCL_TRUE@ variable/increment_opencl.c + +@STARPU_USE_CUDA_TRUE@am__append_96 = \ +@STARPU_USE_CUDA_TRUE@ variable/increment_cuda.cu + +@STARPU_USE_HIP_TRUE@am__append_97 = \ +@STARPU_USE_HIP_TRUE@ variable/increment_hip.hip + +@STARPU_USE_OPENCL_TRUE@am__append_98 = \ +@STARPU_USE_OPENCL_TRUE@ variable/increment_opencl.c + +@STARPU_USE_CUDA_TRUE@am__append_99 = \ +@STARPU_USE_CUDA_TRUE@ datawizard/interfaces/block/block_cuda.cu + +@STARPU_USE_OPENCL_TRUE@am__append_100 = \ +@STARPU_USE_OPENCL_TRUE@ datawizard/interfaces/block/block_opencl.c + +@STARPU_USE_CUDA_TRUE@am__append_101 = \ +@STARPU_USE_CUDA_TRUE@ datawizard/interfaces/tensor/tensor_cuda.cu + +@STARPU_USE_OPENCL_TRUE@am__append_102 = \ +@STARPU_USE_OPENCL_TRUE@ datawizard/interfaces/tensor/tensor_opencl.c + +@STARPU_USE_CUDA_TRUE@am__append_103 = \ +@STARPU_USE_CUDA_TRUE@ datawizard/interfaces/ndim/ndim_cuda.cu + +@STARPU_USE_OPENCL_TRUE@am__append_104 = \ +@STARPU_USE_OPENCL_TRUE@ datawizard/interfaces/ndim/ndim_opencl.c + +@STARPU_USE_CUDA_TRUE@am__append_105 = \ +@STARPU_USE_CUDA_TRUE@ datawizard/interfaces/bcsr/bcsr_cuda.cu + +@STARPU_USE_OPENCL_TRUE@am__append_106 = \ +@STARPU_USE_OPENCL_TRUE@ datawizard/interfaces/bcsr/bcsr_opencl.c + +@STARPU_USE_CUDA_TRUE@am__append_107 = \ +@STARPU_USE_CUDA_TRUE@ datawizard/interfaces/coo/coo_cuda.cu + +@STARPU_USE_OPENCL_TRUE@am__append_108 = \ +@STARPU_USE_OPENCL_TRUE@ datawizard/interfaces/coo/coo_opencl.c + +@STARPU_USE_CUDA_TRUE@am__append_109 = \ +@STARPU_USE_CUDA_TRUE@ datawizard/interfaces/csr/csr_cuda.cu + +@STARPU_USE_OPENCL_TRUE@am__append_110 = \ +@STARPU_USE_OPENCL_TRUE@ datawizard/interfaces/csr/csr_opencl.c + +@STARPU_USE_CUDA_TRUE@am__append_111 = \ +@STARPU_USE_CUDA_TRUE@ datawizard/interfaces/vector/vector_cuda.cu + +@STARPU_USE_OPENCL_TRUE@am__append_112 = \ +@STARPU_USE_OPENCL_TRUE@ datawizard/interfaces/vector/vector_opencl.c + +@STARPU_USE_CUDA_TRUE@am__append_113 = \ +@STARPU_USE_CUDA_TRUE@ datawizard/interfaces/matrix/matrix_cuda.cu + +@STARPU_USE_OPENCL_TRUE@am__append_114 = \ +@STARPU_USE_OPENCL_TRUE@ datawizard/interfaces/matrix/matrix_opencl.c + +@STARPU_USE_CUDA_TRUE@am__append_115 = \ +@STARPU_USE_CUDA_TRUE@ datawizard/interfaces/multiformat/multiformat_cuda.cu \ +@STARPU_USE_CUDA_TRUE@ datawizard/interfaces/multiformat/multiformat_conversion_codelets_cuda.cu + +@STARPU_USE_OPENCL_TRUE@am__append_116 = \ +@STARPU_USE_OPENCL_TRUE@ datawizard/interfaces/multiformat/multiformat_opencl.c \ +@STARPU_USE_OPENCL_TRUE@ datawizard/interfaces/multiformat/multiformat_conversion_codelets_opencl.c + +@STARPU_USE_CUDA_TRUE@am__append_117 = \ +@STARPU_USE_CUDA_TRUE@ datawizard/interfaces/variable/variable_cuda.cu + +@STARPU_USE_OPENCL_TRUE@am__append_118 = \ +@STARPU_USE_OPENCL_TRUE@ datawizard/interfaces/variable/variable_opencl.c + +@STARPU_USE_CUDA_TRUE@am__append_119 = \ +@STARPU_USE_CUDA_TRUE@ overlap/long_kernel.cu + +@STARPU_USE_OPENCL_TRUE@am__append_120 = \ +@STARPU_USE_OPENCL_TRUE@ perfmodels/opencl_memset.c + +@STARPU_USE_OPENCL_TRUE@am__append_121 = \ +@STARPU_USE_OPENCL_TRUE@ perfmodels/opencl_memset.c + +@STARPU_USE_OPENCL_TRUE@am__append_122 = \ +@STARPU_USE_OPENCL_TRUE@ perfmodels/opencl_memset.c + +@STARPU_USE_MAX_FPGA_TRUE@am__append_123 = */*.max */*.class max_fpga/*.h +subdir = tests +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ + $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ + $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ + $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/src/common/config.h \ + $(top_builddir)/src/common/config-src-build.h \ + $(top_builddir)/include/starpu_config.h \ + $(top_builddir)/starpurm/include/starpurm_config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +@STARPU_SIMGRID_TRUE@am__EXEEXT_1 = energy/energy_efficiency$(EXEEXT) \ +@STARPU_SIMGRID_TRUE@ datawizard/simgrid-locality$(EXEEXT) +@STARPU_SIMGRID_FALSE@am__EXEEXT_2 = main/deprecated_func$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ main/driver_api/init_run_deinit$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ main/driver_api/run_driver$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ main/deploop$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ main/display_binding$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ main/execute_on_a_specific_worker$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ main/insert_task$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ main/insert_task_value$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ main/insert_task_dyn_handles$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ main/insert_task_array$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ main/insert_task_many$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ main/job$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ main/multithreaded$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ main/starpu_task_bundle$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ main/starpu_task_wait_for_all$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ main/starpu_task_wait$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ main/static_restartable$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ main/static_restartable_using_initializer$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ main/static_restartable_tag$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ main/regenerate$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ main/regenerate_pipeline$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ main/restart$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ main/wait_all_regenerable_tasks$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ main/subgraph_repeat$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ main/subgraph_repeat_tag$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ main/subgraph_repeat_regenerate$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ main/subgraph_repeat_regenerate_tag$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ main/subgraph_repeat_regenerate_tag_cycle$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ main/empty_task_sync_point$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ main/empty_task_sync_point_tasks$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ main/tag_wait_api$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ main/tag_get_task$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ main/task_wait_api$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ main/declare_deps_in_callback$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ main/declare_deps_after_submission$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ main/declare_deps_after_submission_synchronous$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ main/get_current_task$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ main/starpu_init$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ main/submit$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ main/const_codelet$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ main/pause_resume$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ main/pack$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ main/get_children_tasks$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ main/hwloc_cpuset$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ main/task_end_dep$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ datawizard/acquire_cb_insert$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ datawizard/acquire_release$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ datawizard/acquire_release2$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ datawizard/acquire_release_to$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ datawizard/acquire_try$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ datawizard/bcsr$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ datawizard/cache$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ datawizard/commute$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ datawizard/commute2$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ datawizard/copy$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ datawizard/data_implicit_deps$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ datawizard/data_register$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ datawizard/scratch$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ datawizard/scratch_reuse$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ datawizard/sync_and_notify_data$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ datawizard/sync_and_notify_data_implicit$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ datawizard/dsm_stress$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ datawizard/double_parameter$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ datawizard/write_only_tmp_buffer$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ datawizard/data_invalidation$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ datawizard/data_deinitialize$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ datawizard/dining_philosophers$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ datawizard/manual_reduction$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ datawizard/readers_and_writers$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ datawizard/unpartition$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ datawizard/sync_with_data_with_mem$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ datawizard/sync_with_data_with_mem_non_blocking$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ datawizard/sync_with_data_with_mem_non_blocking_implicit$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ datawizard/mpi_like$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ datawizard/mpi_like_async$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ datawizard/critical_section_with_void_interface$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ datawizard/increment_init$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ datawizard/increment_redux$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ datawizard/increment_redux_partition$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ datawizard/increment_redux_v2$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ datawizard/increment_redux_with_args$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ datawizard/increment_redux_lazy$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ datawizard/handle_to_pointer$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ datawizard/lazy_allocation$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ datawizard/no_unregister$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ datawizard/noreclaim$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ datawizard/nowhere$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ datawizard/interfaces/block/block_interface$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ datawizard/interfaces/bcsr/bcsr_interface$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ datawizard/interfaces/coo/coo_interface$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ datawizard/interfaces/csr/csr_interface$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ datawizard/interfaces/matrix/matrix_interface$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ datawizard/interfaces/multiformat/multiformat_interface$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ datawizard/interfaces/multiformat/advanced/multiformat_cuda_opencl$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ datawizard/interfaces/multiformat/advanced/multiformat_data_release$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ datawizard/interfaces/multiformat/advanced/multiformat_worker$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ datawizard/interfaces/multiformat/advanced/multiformat_handle_conversion$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ datawizard/interfaces/multiformat/advanced/same_handle$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ datawizard/interfaces/tensor/tensor_interface$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ datawizard/interfaces/ndim/ndim_interface$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ datawizard/interfaces/variable/variable_interface$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ datawizard/interfaces/vector/vector_interface$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ datawizard/interfaces/void/void_interface$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ datawizard/in_place_partition$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ datawizard/partition_dep$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ datawizard/partition_lazy$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ datawizard/partition_init$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ datawizard/partition_wontuse$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ datawizard/gpu_register$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ datawizard/gpu_ptr_register$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ datawizard/variable_parameters$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ datawizard/wt_host$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ datawizard/wt_broadcast$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ datawizard/readonly$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ datawizard/specific_node$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ datawizard/specific_node_same$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ datawizard/task_with_multiple_time_the_same_handle$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ datawizard/test_arbiter$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ datawizard/invalidate_pending_requests$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ datawizard/deinitialize_pending_requests$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ datawizard/temporary_partition$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ datawizard/partitioned_initialization$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ datawizard/partitioned_acquire$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ datawizard/temporary_partition_implicit$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ datawizard/temporary_partition_read$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ datawizard/redux_acquire$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ disk/disk_copy$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ disk/disk_copy_unpack$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ disk/disk_copy_to_disk$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ disk/disk_compute$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ disk/disk_pack$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ disk/mem_reclaim$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ errorcheck/invalid_blocking_calls$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ errorcheck/workers_cpuid$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ fault-tolerance/retry$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ helper/starpu_data_cpy$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ helper/starpu_data_dup_ro$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ helper/starpu_create_sync_task$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ microbenchs/async_tasks_overhead$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ microbenchs/sync_tasks_overhead$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ microbenchs/tasks_overhead$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ microbenchs/tasks_size_overhead$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ microbenchs/prefetch_data_on_node$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ microbenchs/redundant_buffer$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ microbenchs/matrix_as_vector$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ microbenchs/bandwidth$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ overlap/gpu_concurrency$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ parallel_tasks/swap$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ parallel_tasks/combined_worker_assign_workerid$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ parallel_tasks/explicit_combined_worker$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ parallel_tasks/parallel_kernels$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ parallel_tasks/parallel_kernels_trivial$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ parallel_tasks/parallel_kernels_spmd$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ parallel_tasks/spmd_peager$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ parallel_tasks/cuda_only$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ perfmodels/regression_based_memset$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ perfmodels/regression_based_check$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ perfmodels/regression_based_multiimpl$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ perfmodels/regression_based_energy$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ perfmodels/regression_based_gpu$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ perfmodels/non_linear_regression_based$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ perfmodels/feed$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ perfmodels/user_base$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ perfmodels/valid_model$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ perfmodels/path$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ perfmodels/memory$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ sched_policies/data_locality$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ sched_policies/execute_all_tasks$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ sched_policies/prio$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ sched_policies/simple_deps$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ sched_policies/simple_cpu_gpu_sched$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@ sched_ctx/sched_ctx_hierarchy$(EXEEXT) +@STARPU_SIMGRID_FALSE@@STARPU_USE_MAX_FPGA_TRUE@am__EXEEXT_3 = maxfpga/max_fpga_basic_static$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@@STARPU_USE_MAX_FPGA_TRUE@ maxfpga/max_fpga_advanced_static$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@@STARPU_USE_MAX_FPGA_TRUE@ maxfpga/max_fpga_dynamic$(EXEEXT) \ +@STARPU_SIMGRID_FALSE@@STARPU_USE_MAX_FPGA_TRUE@ maxfpga/max_fpga_mux$(EXEEXT) +@STARPU_HAVE_FC_TRUE@@STARPU_SANITIZE_FALSE@@STARPU_SIMGRID_FALSE@am__EXEEXT_4 = fortran90/init_01$(EXEEXT) +@STARPU_LONG_CHECK_TRUE@@STARPU_SIMGRID_FALSE@am__EXEEXT_5 = main/tag_task_data_deps$(EXEEXT) \ +@STARPU_LONG_CHECK_TRUE@@STARPU_SIMGRID_FALSE@ datawizard/reclaim$(EXEEXT) +am__EXEEXT_6 = main/callback$(EXEEXT) main/bind$(EXEEXT) \ + main/mkdtemp$(EXEEXT) main/execute_schedule$(EXEEXT) \ + main/insert_task_pack$(EXEEXT) \ + main/insert_task_nullcodelet$(EXEEXT) \ + main/insert_task_where$(EXEEXT) \ + main/multithreaded_init$(EXEEXT) main/empty_task$(EXEEXT) \ + main/empty_task_chain$(EXEEXT) \ + main/starpu_worker_exists$(EXEEXT) \ + main/codelet_null_callback$(EXEEXT) \ + datawizard/allocate$(EXEEXT) datawizard/acquire_cb$(EXEEXT) \ + datawizard/deps$(EXEEXT) \ + datawizard/user_interaction_implicit$(EXEEXT) \ + datawizard/interfaces/copy_interfaces$(EXEEXT) \ + datawizard/numa_overflow$(EXEEXT) datawizard/locality$(EXEEXT) \ + datawizard/variable_size$(EXEEXT) \ + errorcheck/starpu_init_noworker$(EXEEXT) \ + errorcheck/invalid_tasks$(EXEEXT) helper/cublas_init$(EXEEXT) \ + helper/cublasLt_init$(EXEEXT) helper/cusparse_init$(EXEEXT) \ + helper/hipblas_init$(EXEEXT) helper/pinned_memory$(EXEEXT) \ + helper/execute_on_all$(EXEEXT) \ + microbenchs/display_structures_size$(EXEEXT) \ + microbenchs/local_pingpong$(EXEEXT) overlap/overlap$(EXEEXT) \ + sched_ctx/sched_ctx_list$(EXEEXT) \ + sched_ctx/sched_ctx_policy_data$(EXEEXT) \ + openmp/init_exit_01$(EXEEXT) openmp/init_exit_02$(EXEEXT) \ + openmp/environment$(EXEEXT) openmp/api_01$(EXEEXT) \ + openmp/parallel_01$(EXEEXT) openmp/parallel_02$(EXEEXT) \ + openmp/parallel_03$(EXEEXT) \ + openmp/parallel_barrier_01$(EXEEXT) \ + openmp/parallel_master_01$(EXEEXT) \ + openmp/parallel_master_inline_01$(EXEEXT) \ + openmp/parallel_single_wait_01$(EXEEXT) \ + openmp/parallel_single_nowait_01$(EXEEXT) \ + openmp/parallel_single_inline_01$(EXEEXT) \ + openmp/parallel_single_copyprivate_01$(EXEEXT) \ + openmp/parallel_single_copyprivate_inline_01$(EXEEXT) \ + openmp/parallel_critical_01$(EXEEXT) \ + openmp/parallel_critical_inline_01$(EXEEXT) \ + openmp/parallel_critical_named_01$(EXEEXT) \ + openmp/parallel_critical_named_inline_01$(EXEEXT) \ + openmp/parallel_simple_lock_01$(EXEEXT) \ + openmp/parallel_nested_lock_01$(EXEEXT) \ + openmp/parallel_for_01$(EXEEXT) \ + openmp/parallel_for_02$(EXEEXT) \ + openmp/parallel_for_ordered_01$(EXEEXT) \ + openmp/parallel_sections_01$(EXEEXT) \ + openmp/parallel_sections_combined_01$(EXEEXT) \ + openmp/task_01$(EXEEXT) openmp/task_02$(EXEEXT) \ + openmp/task_03$(EXEEXT) openmp/taskloop$(EXEEXT) \ + openmp/taskwait_01$(EXEEXT) openmp/taskgroup_01$(EXEEXT) \ + openmp/taskgroup_02$(EXEEXT) openmp/array_slice_01$(EXEEXT) \ + openmp/cuda_task_01$(EXEEXT) perfmodels/value_nan$(EXEEXT) \ + sched_policies/workerids$(EXEEXT) $(am__EXEEXT_1) \ + $(am__EXEEXT_2) $(am__EXEEXT_3) $(am__EXEEXT_4) \ + $(am__EXEEXT_5) +@STARPU_SIMGRID_FALSE@@STARPU_USE_MPI_MASTER_SLAVE_FALSE@am__EXEEXT_7 = microbenchs/bandwidth$(EXEEXT) +am__installdirs = "$(DESTDIR)$(examplebindir)" \ + "$(DESTDIR)$(examplebindir)" \ + "$(DESTDIR)$(STARPU_OPENCL_DATAdir)" +@STARPU_HAVE_WINDOWS_FALSE@am__EXEEXT_8 = loader$(EXEEXT) +@STARPU_SIMGRID_FALSE@am__EXEEXT_9 = datawizard/allocate_many_numa_nodes$(EXEEXT) +am__EXEEXT_10 = \ + microbenchs/parallel_independent_homogeneous_tasks$(EXEEXT) \ + microbenchs/parallel_independent_heterogeneous_tasks$(EXEEXT) \ + microbenchs/parallel_independent_homogeneous_tasks_data$(EXEEXT) \ + microbenchs/parallel_independent_heterogeneous_tasks_data$(EXEEXT) \ + microbenchs/parallel_redux_homogeneous_tasks_data$(EXEEXT) \ + microbenchs/parallel_redux_heterogeneous_tasks_data$(EXEEXT) \ + microbenchs/parallel_dependent_homogeneous_tasks_data$(EXEEXT) +PROGRAMS = $(examplebin_PROGRAMS) $(noinst_PROGRAMS) +datawizard_acquire_cb_SOURCES = datawizard/acquire_cb.c +am__dirstamp = $(am__leading_dot)dirstamp +datawizard_acquire_cb_OBJECTS = datawizard/acquire_cb.$(OBJEXT) +datawizard_acquire_cb_LDADD = $(LDADD) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +datawizard_acquire_cb_insert_SOURCES = datawizard/acquire_cb_insert.c +datawizard_acquire_cb_insert_OBJECTS = \ + datawizard/acquire_cb_insert.$(OBJEXT) +datawizard_acquire_cb_insert_LDADD = $(LDADD) +am__datawizard_acquire_release_SOURCES_DIST = \ + datawizard/acquire_release.c variable/increment.c \ + variable/increment_cuda.cu variable/increment_hip.hip \ + variable/increment_opencl.c +@STARPU_USE_CUDA_TRUE@am__objects_1 = \ +@STARPU_USE_CUDA_TRUE@ variable/increment_cuda.$(OBJEXT) +@STARPU_USE_HIP_TRUE@am__objects_2 = variable/increment_hip.$(OBJEXT) +@STARPU_USE_OPENCL_TRUE@am__objects_3 = \ +@STARPU_USE_OPENCL_TRUE@ variable/increment_opencl.$(OBJEXT) +am_datawizard_acquire_release_OBJECTS = \ + datawizard/acquire_release.$(OBJEXT) \ + variable/increment.$(OBJEXT) $(am__objects_1) $(am__objects_2) \ + $(am__objects_3) +datawizard_acquire_release_OBJECTS = \ + $(am_datawizard_acquire_release_OBJECTS) +datawizard_acquire_release_LDADD = $(LDADD) +am__datawizard_acquire_release2_SOURCES_DIST = \ + datawizard/acquire_release2.c variable/increment.c \ + variable/increment_cuda.cu variable/increment_hip.hip \ + variable/increment_opencl.c +am_datawizard_acquire_release2_OBJECTS = \ + datawizard/acquire_release2.$(OBJEXT) \ + variable/increment.$(OBJEXT) $(am__objects_1) $(am__objects_2) \ + $(am__objects_3) +datawizard_acquire_release2_OBJECTS = \ + $(am_datawizard_acquire_release2_OBJECTS) +datawizard_acquire_release2_LDADD = $(LDADD) +am__datawizard_acquire_release_to_SOURCES_DIST = \ + datawizard/acquire_release_to.c variable/increment.c \ + variable/increment_cuda.cu variable/increment_hip.hip \ + variable/increment_opencl.c +am_datawizard_acquire_release_to_OBJECTS = \ + datawizard/acquire_release_to.$(OBJEXT) \ + variable/increment.$(OBJEXT) $(am__objects_1) $(am__objects_2) \ + $(am__objects_3) +datawizard_acquire_release_to_OBJECTS = \ + $(am_datawizard_acquire_release_to_OBJECTS) +datawizard_acquire_release_to_LDADD = $(LDADD) +datawizard_acquire_try_SOURCES = datawizard/acquire_try.c +datawizard_acquire_try_OBJECTS = datawizard/acquire_try.$(OBJEXT) +datawizard_acquire_try_LDADD = $(LDADD) +datawizard_allocate_SOURCES = datawizard/allocate.c +datawizard_allocate_OBJECTS = datawizard/allocate.$(OBJEXT) +datawizard_allocate_LDADD = $(LDADD) +datawizard_allocate_many_numa_nodes_SOURCES = \ + datawizard/allocate_many_numa_nodes.c +datawizard_allocate_many_numa_nodes_OBJECTS = \ + datawizard/allocate_many_numa_nodes.$(OBJEXT) +datawizard_allocate_many_numa_nodes_LDADD = $(LDADD) +datawizard_bcsr_SOURCES = datawizard/bcsr.c +datawizard_bcsr_OBJECTS = datawizard/bcsr.$(OBJEXT) +datawizard_bcsr_LDADD = $(LDADD) +datawizard_cache_SOURCES = datawizard/cache.c +datawizard_cache_OBJECTS = datawizard/cache.$(OBJEXT) +datawizard_cache_LDADD = $(LDADD) +datawizard_commute_SOURCES = datawizard/commute.c +datawizard_commute_OBJECTS = datawizard/commute.$(OBJEXT) +datawizard_commute_LDADD = $(LDADD) +datawizard_commute2_SOURCES = datawizard/commute2.c +datawizard_commute2_OBJECTS = datawizard/commute2.$(OBJEXT) +datawizard_commute2_LDADD = $(LDADD) +datawizard_copy_SOURCES = datawizard/copy.c +datawizard_copy_OBJECTS = datawizard/copy.$(OBJEXT) +datawizard_copy_LDADD = $(LDADD) +datawizard_critical_section_with_void_interface_SOURCES = \ + datawizard/critical_section_with_void_interface.c +datawizard_critical_section_with_void_interface_OBJECTS = \ + datawizard/critical_section_with_void_interface.$(OBJEXT) +datawizard_critical_section_with_void_interface_LDADD = $(LDADD) +datawizard_data_deinitialize_SOURCES = datawizard/data_deinitialize.c +datawizard_data_deinitialize_OBJECTS = \ + datawizard/data_deinitialize.$(OBJEXT) +datawizard_data_deinitialize_LDADD = $(LDADD) +datawizard_data_implicit_deps_SOURCES = \ + datawizard/data_implicit_deps.c +datawizard_data_implicit_deps_OBJECTS = \ + datawizard/data_implicit_deps.$(OBJEXT) +datawizard_data_implicit_deps_LDADD = $(LDADD) +datawizard_data_invalidation_SOURCES = datawizard/data_invalidation.c +datawizard_data_invalidation_OBJECTS = \ + datawizard/data_invalidation.$(OBJEXT) +datawizard_data_invalidation_LDADD = $(LDADD) +datawizard_data_register_SOURCES = datawizard/data_register.c +datawizard_data_register_OBJECTS = \ + datawizard/data_register-data_register.$(OBJEXT) +datawizard_data_register_LDADD = $(LDADD) +datawizard_data_register_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ + $(datawizard_data_register_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \ + $(LDFLAGS) -o $@ +datawizard_deinitialize_pending_requests_SOURCES = \ + datawizard/deinitialize_pending_requests.c +datawizard_deinitialize_pending_requests_OBJECTS = \ + datawizard/deinitialize_pending_requests.$(OBJEXT) +datawizard_deinitialize_pending_requests_LDADD = $(LDADD) +datawizard_deps_SOURCES = datawizard/deps.c +datawizard_deps_OBJECTS = datawizard/deps.$(OBJEXT) +datawizard_deps_LDADD = $(LDADD) +datawizard_dining_philosophers_SOURCES = \ + datawizard/dining_philosophers.c +datawizard_dining_philosophers_OBJECTS = \ + datawizard/dining_philosophers.$(OBJEXT) +datawizard_dining_philosophers_LDADD = $(LDADD) +datawizard_double_parameter_SOURCES = datawizard/double_parameter.c +datawizard_double_parameter_OBJECTS = \ + datawizard/double_parameter.$(OBJEXT) +datawizard_double_parameter_LDADD = $(LDADD) +datawizard_dsm_stress_SOURCES = datawizard/dsm_stress.c +datawizard_dsm_stress_OBJECTS = datawizard/dsm_stress.$(OBJEXT) +datawizard_dsm_stress_LDADD = $(LDADD) +am__datawizard_gpu_ptr_register_SOURCES_DIST = \ + datawizard/gpu_ptr_register.c datawizard/scal.c \ + datawizard/scal_cuda.cu datawizard/scal_opencl.cl +@STARPU_USE_CUDA_TRUE@am__objects_4 = datawizard/scal_cuda.$(OBJEXT) +am__objects_5 = +am_datawizard_gpu_ptr_register_OBJECTS = \ + datawizard/gpu_ptr_register.$(OBJEXT) \ + datawizard/scal.$(OBJEXT) $(am__objects_4) $(am__objects_5) +datawizard_gpu_ptr_register_OBJECTS = \ + $(am_datawizard_gpu_ptr_register_OBJECTS) +datawizard_gpu_ptr_register_LDADD = $(LDADD) +am__datawizard_gpu_register_SOURCES_DIST = datawizard/gpu_register.c \ + datawizard/scal.c datawizard/scal_cuda.cu \ + datawizard/scal_opencl.cl +am_datawizard_gpu_register_OBJECTS = \ + datawizard/gpu_register.$(OBJEXT) datawizard/scal.$(OBJEXT) \ + $(am__objects_4) $(am__objects_5) +datawizard_gpu_register_OBJECTS = \ + $(am_datawizard_gpu_register_OBJECTS) +datawizard_gpu_register_LDADD = $(LDADD) +datawizard_handle_to_pointer_SOURCES = datawizard/handle_to_pointer.c +datawizard_handle_to_pointer_OBJECTS = \ + datawizard/handle_to_pointer.$(OBJEXT) +datawizard_handle_to_pointer_LDADD = $(LDADD) +am__datawizard_in_place_partition_SOURCES_DIST = \ + datawizard/in_place_partition.c datawizard/scal.c \ + datawizard/scal_cuda.cu datawizard/scal_opencl.cl +am_datawizard_in_place_partition_OBJECTS = \ + datawizard/in_place_partition.$(OBJEXT) \ + datawizard/scal.$(OBJEXT) $(am__objects_4) $(am__objects_5) +datawizard_in_place_partition_OBJECTS = \ + $(am_datawizard_in_place_partition_OBJECTS) +datawizard_in_place_partition_LDADD = $(LDADD) +am__datawizard_increment_init_SOURCES_DIST = \ + datawizard/increment_init.c variable/increment.c \ + variable/increment_cuda.cu variable/increment_hip.hip \ + variable/increment_opencl.c +am_datawizard_increment_init_OBJECTS = \ + datawizard/increment_init.$(OBJEXT) \ + variable/increment.$(OBJEXT) $(am__objects_1) $(am__objects_2) \ + $(am__objects_3) +datawizard_increment_init_OBJECTS = \ + $(am_datawizard_increment_init_OBJECTS) +datawizard_increment_init_LDADD = $(LDADD) +am__datawizard_increment_redux_SOURCES_DIST = \ + datawizard/increment_redux.c variable/increment.c \ + variable/increment_cuda.cu variable/increment_hip.hip \ + variable/increment_opencl.c +am_datawizard_increment_redux_OBJECTS = \ + datawizard/increment_redux.$(OBJEXT) \ + variable/increment.$(OBJEXT) $(am__objects_1) $(am__objects_2) \ + $(am__objects_3) +datawizard_increment_redux_OBJECTS = \ + $(am_datawizard_increment_redux_OBJECTS) +datawizard_increment_redux_LDADD = $(LDADD) +am__datawizard_increment_redux_lazy_SOURCES_DIST = \ + datawizard/increment_redux_lazy.c variable/increment.c \ + variable/increment_cuda.cu variable/increment_hip.hip \ + variable/increment_opencl.c +am_datawizard_increment_redux_lazy_OBJECTS = \ + datawizard/increment_redux_lazy.$(OBJEXT) \ + variable/increment.$(OBJEXT) $(am__objects_1) $(am__objects_2) \ + $(am__objects_3) +datawizard_increment_redux_lazy_OBJECTS = \ + $(am_datawizard_increment_redux_lazy_OBJECTS) +datawizard_increment_redux_lazy_LDADD = $(LDADD) +am__datawizard_increment_redux_partition_SOURCES_DIST = \ + datawizard/increment_redux_partition.c variable/increment.c \ + variable/increment_cuda.cu variable/increment_hip.hip \ + variable/increment_opencl.c +am_datawizard_increment_redux_partition_OBJECTS = \ + datawizard/increment_redux_partition.$(OBJEXT) \ + variable/increment.$(OBJEXT) $(am__objects_1) $(am__objects_2) \ + $(am__objects_3) +datawizard_increment_redux_partition_OBJECTS = \ + $(am_datawizard_increment_redux_partition_OBJECTS) +datawizard_increment_redux_partition_LDADD = $(LDADD) +am__datawizard_increment_redux_v2_SOURCES_DIST = \ + datawizard/increment_redux_v2.c variable/increment.c \ + variable/increment_cuda.cu variable/increment_hip.hip \ + variable/increment_opencl.c +am_datawizard_increment_redux_v2_OBJECTS = \ + datawizard/increment_redux_v2.$(OBJEXT) \ + variable/increment.$(OBJEXT) $(am__objects_1) $(am__objects_2) \ + $(am__objects_3) +datawizard_increment_redux_v2_OBJECTS = \ + $(am_datawizard_increment_redux_v2_OBJECTS) +datawizard_increment_redux_v2_LDADD = $(LDADD) +am__datawizard_increment_redux_with_args_SOURCES_DIST = \ + datawizard/increment_redux_with_args.c variable/increment.c \ + variable/increment_cuda.cu variable/increment_hip.hip \ + variable/increment_opencl.c +am_datawizard_increment_redux_with_args_OBJECTS = \ + datawizard/increment_redux_with_args.$(OBJEXT) \ + variable/increment.$(OBJEXT) $(am__objects_1) $(am__objects_2) \ + $(am__objects_3) +datawizard_increment_redux_with_args_OBJECTS = \ + $(am_datawizard_increment_redux_with_args_OBJECTS) +datawizard_increment_redux_with_args_LDADD = $(LDADD) +am__datawizard_interfaces_bcsr_bcsr_interface_SOURCES_DIST = \ + datawizard/interfaces/test_interfaces.c \ + datawizard/interfaces/bcsr/bcsr_interface.c \ + datawizard/interfaces/bcsr/bcsr_cuda.cu \ + datawizard/interfaces/bcsr/bcsr_opencl.c +@STARPU_USE_CUDA_TRUE@am__objects_6 = datawizard/interfaces/bcsr/bcsr_cuda.$(OBJEXT) +@STARPU_USE_OPENCL_TRUE@am__objects_7 = datawizard/interfaces/bcsr/bcsr_interface-bcsr_opencl.$(OBJEXT) +am_datawizard_interfaces_bcsr_bcsr_interface_OBJECTS = datawizard/interfaces/bcsr_bcsr_interface-test_interfaces.$(OBJEXT) \ + datawizard/interfaces/bcsr/bcsr_interface-bcsr_interface.$(OBJEXT) \ + $(am__objects_6) $(am__objects_7) +datawizard_interfaces_bcsr_bcsr_interface_OBJECTS = \ + $(am_datawizard_interfaces_bcsr_bcsr_interface_OBJECTS) +datawizard_interfaces_bcsr_bcsr_interface_LDADD = $(LDADD) +datawizard_interfaces_bcsr_bcsr_interface_LINK = $(LIBTOOL) $(AM_V_lt) \ + --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link \ + $(CCLD) $(datawizard_interfaces_bcsr_bcsr_interface_CFLAGS) \ + $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ +am__datawizard_interfaces_block_block_interface_SOURCES_DIST = \ + datawizard/interfaces/test_interfaces.c \ + datawizard/interfaces/block/block_interface.c \ + datawizard/interfaces/block/block_cuda.cu \ + datawizard/interfaces/block/block_opencl.c +@STARPU_USE_CUDA_TRUE@am__objects_8 = datawizard/interfaces/block/block_cuda.$(OBJEXT) +@STARPU_USE_OPENCL_TRUE@am__objects_9 = datawizard/interfaces/block/block_interface-block_opencl.$(OBJEXT) +am_datawizard_interfaces_block_block_interface_OBJECTS = datawizard/interfaces/block_block_interface-test_interfaces.$(OBJEXT) \ + datawizard/interfaces/block/block_interface-block_interface.$(OBJEXT) \ + $(am__objects_8) $(am__objects_9) +datawizard_interfaces_block_block_interface_OBJECTS = \ + $(am_datawizard_interfaces_block_block_interface_OBJECTS) +datawizard_interfaces_block_block_interface_LDADD = $(LDADD) +datawizard_interfaces_block_block_interface_LINK = $(LIBTOOL) \ + $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ + --mode=link $(CCLD) \ + $(datawizard_interfaces_block_block_interface_CFLAGS) \ + $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ +am__datawizard_interfaces_coo_coo_interface_SOURCES_DIST = \ + datawizard/interfaces/test_interfaces.c \ + datawizard/interfaces/coo/coo_interface.c \ + datawizard/interfaces/coo/coo_cuda.cu \ + datawizard/interfaces/coo/coo_opencl.c +@STARPU_USE_CUDA_TRUE@am__objects_10 = datawizard/interfaces/coo/coo_cuda.$(OBJEXT) +@STARPU_USE_OPENCL_TRUE@am__objects_11 = datawizard/interfaces/coo/coo_interface-coo_opencl.$(OBJEXT) +am_datawizard_interfaces_coo_coo_interface_OBJECTS = datawizard/interfaces/coo_coo_interface-test_interfaces.$(OBJEXT) \ + datawizard/interfaces/coo/coo_interface-coo_interface.$(OBJEXT) \ + $(am__objects_10) $(am__objects_11) +datawizard_interfaces_coo_coo_interface_OBJECTS = \ + $(am_datawizard_interfaces_coo_coo_interface_OBJECTS) +datawizard_interfaces_coo_coo_interface_LDADD = $(LDADD) +datawizard_interfaces_coo_coo_interface_LINK = $(LIBTOOL) $(AM_V_lt) \ + --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link \ + $(CCLD) $(datawizard_interfaces_coo_coo_interface_CFLAGS) \ + $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ +datawizard_interfaces_copy_interfaces_SOURCES = \ + datawizard/interfaces/copy_interfaces.c +datawizard_interfaces_copy_interfaces_OBJECTS = datawizard/interfaces/copy_interfaces-copy_interfaces.$(OBJEXT) +datawizard_interfaces_copy_interfaces_LDADD = $(LDADD) +datawizard_interfaces_copy_interfaces_LINK = $(LIBTOOL) $(AM_V_lt) \ + --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link \ + $(CCLD) $(datawizard_interfaces_copy_interfaces_CFLAGS) \ + $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ +am__datawizard_interfaces_csr_csr_interface_SOURCES_DIST = \ + datawizard/interfaces/test_interfaces.c \ + datawizard/interfaces/csr/csr_interface.c \ + datawizard/interfaces/csr/csr_cuda.cu \ + datawizard/interfaces/csr/csr_opencl.c +@STARPU_USE_CUDA_TRUE@am__objects_12 = datawizard/interfaces/csr/csr_cuda.$(OBJEXT) +@STARPU_USE_OPENCL_TRUE@am__objects_13 = datawizard/interfaces/csr/csr_interface-csr_opencl.$(OBJEXT) +am_datawizard_interfaces_csr_csr_interface_OBJECTS = datawizard/interfaces/csr_csr_interface-test_interfaces.$(OBJEXT) \ + datawizard/interfaces/csr/csr_interface-csr_interface.$(OBJEXT) \ + $(am__objects_12) $(am__objects_13) +datawizard_interfaces_csr_csr_interface_OBJECTS = \ + $(am_datawizard_interfaces_csr_csr_interface_OBJECTS) +datawizard_interfaces_csr_csr_interface_LDADD = $(LDADD) +datawizard_interfaces_csr_csr_interface_LINK = $(LIBTOOL) $(AM_V_lt) \ + --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link \ + $(CCLD) $(datawizard_interfaces_csr_csr_interface_CFLAGS) \ + $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ +am__datawizard_interfaces_matrix_matrix_interface_SOURCES_DIST = \ + datawizard/interfaces/test_interfaces.c \ + datawizard/interfaces/matrix/matrix_interface.c \ + datawizard/interfaces/matrix/matrix_cuda.cu \ + datawizard/interfaces/matrix/matrix_opencl.c +@STARPU_USE_CUDA_TRUE@am__objects_14 = datawizard/interfaces/matrix/matrix_cuda.$(OBJEXT) +@STARPU_USE_OPENCL_TRUE@am__objects_15 = datawizard/interfaces/matrix/matrix_interface-matrix_opencl.$(OBJEXT) +am_datawizard_interfaces_matrix_matrix_interface_OBJECTS = datawizard/interfaces/matrix_matrix_interface-test_interfaces.$(OBJEXT) \ + datawizard/interfaces/matrix/matrix_interface-matrix_interface.$(OBJEXT) \ + $(am__objects_14) $(am__objects_15) +datawizard_interfaces_matrix_matrix_interface_OBJECTS = \ + $(am_datawizard_interfaces_matrix_matrix_interface_OBJECTS) +datawizard_interfaces_matrix_matrix_interface_LDADD = $(LDADD) +datawizard_interfaces_matrix_matrix_interface_LINK = $(LIBTOOL) \ + $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ + --mode=link $(CCLD) \ + $(datawizard_interfaces_matrix_matrix_interface_CFLAGS) \ + $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ +am_datawizard_interfaces_multiformat_advanced_multiformat_cuda_opencl_OBJECTS = \ + datawizard/interfaces/multiformat/advanced/generic.$(OBJEXT) \ + datawizard/interfaces/multiformat/advanced/multiformat_cuda_opencl.$(OBJEXT) +datawizard_interfaces_multiformat_advanced_multiformat_cuda_opencl_OBJECTS = $(am_datawizard_interfaces_multiformat_advanced_multiformat_cuda_opencl_OBJECTS) +datawizard_interfaces_multiformat_advanced_multiformat_cuda_opencl_LDADD = \ + $(LDADD) +am_datawizard_interfaces_multiformat_advanced_multiformat_data_release_OBJECTS = \ + datawizard/interfaces/multiformat/advanced/generic.$(OBJEXT) \ + datawizard/interfaces/multiformat/advanced/multiformat_data_release.$(OBJEXT) +datawizard_interfaces_multiformat_advanced_multiformat_data_release_OBJECTS = $(am_datawizard_interfaces_multiformat_advanced_multiformat_data_release_OBJECTS) +datawizard_interfaces_multiformat_advanced_multiformat_data_release_LDADD = \ + $(LDADD) +am_datawizard_interfaces_multiformat_advanced_multiformat_handle_conversion_OBJECTS = \ + datawizard/interfaces/multiformat/advanced/generic.$(OBJEXT) \ + datawizard/interfaces/multiformat/advanced/multiformat_handle_conversion.$(OBJEXT) +datawizard_interfaces_multiformat_advanced_multiformat_handle_conversion_OBJECTS = $(am_datawizard_interfaces_multiformat_advanced_multiformat_handle_conversion_OBJECTS) +datawizard_interfaces_multiformat_advanced_multiformat_handle_conversion_LDADD = \ + $(LDADD) +am_datawizard_interfaces_multiformat_advanced_multiformat_worker_OBJECTS = \ + datawizard/interfaces/multiformat/advanced/generic.$(OBJEXT) \ + datawizard/interfaces/multiformat/advanced/multiformat_worker.$(OBJEXT) +datawizard_interfaces_multiformat_advanced_multiformat_worker_OBJECTS = $(am_datawizard_interfaces_multiformat_advanced_multiformat_worker_OBJECTS) +datawizard_interfaces_multiformat_advanced_multiformat_worker_LDADD = \ + $(LDADD) +am_datawizard_interfaces_multiformat_advanced_same_handle_OBJECTS = \ + datawizard/interfaces/multiformat/advanced/generic.$(OBJEXT) \ + datawizard/interfaces/multiformat/advanced/same_handle.$(OBJEXT) +datawizard_interfaces_multiformat_advanced_same_handle_OBJECTS = $(am_datawizard_interfaces_multiformat_advanced_same_handle_OBJECTS) +datawizard_interfaces_multiformat_advanced_same_handle_LDADD = \ + $(LDADD) +am__datawizard_interfaces_multiformat_multiformat_interface_SOURCES_DIST = \ + datawizard/interfaces/test_interfaces.c \ + datawizard/interfaces/multiformat/multiformat_interface.c \ + datawizard/interfaces/multiformat/multiformat_conversion_codelets.c \ + datawizard/interfaces/multiformat/multiformat_cuda.cu \ + datawizard/interfaces/multiformat/multiformat_conversion_codelets_cuda.cu \ + datawizard/interfaces/multiformat/multiformat_opencl.c \ + datawizard/interfaces/multiformat/multiformat_conversion_codelets_opencl.c +@STARPU_USE_CUDA_TRUE@am__objects_16 = datawizard/interfaces/multiformat/multiformat_cuda.$(OBJEXT) \ +@STARPU_USE_CUDA_TRUE@ datawizard/interfaces/multiformat/multiformat_conversion_codelets_cuda.$(OBJEXT) +@STARPU_USE_OPENCL_TRUE@am__objects_17 = datawizard/interfaces/multiformat/multiformat_interface-multiformat_opencl.$(OBJEXT) \ +@STARPU_USE_OPENCL_TRUE@ datawizard/interfaces/multiformat/multiformat_interface-multiformat_conversion_codelets_opencl.$(OBJEXT) +am_datawizard_interfaces_multiformat_multiformat_interface_OBJECTS = datawizard/interfaces/multiformat_multiformat_interface-test_interfaces.$(OBJEXT) \ + datawizard/interfaces/multiformat/multiformat_interface-multiformat_interface.$(OBJEXT) \ + datawizard/interfaces/multiformat/multiformat_interface-multiformat_conversion_codelets.$(OBJEXT) \ + $(am__objects_16) $(am__objects_17) +datawizard_interfaces_multiformat_multiformat_interface_OBJECTS = $(am_datawizard_interfaces_multiformat_multiformat_interface_OBJECTS) +datawizard_interfaces_multiformat_multiformat_interface_LDADD = \ + $(LDADD) +datawizard_interfaces_multiformat_multiformat_interface_LINK = \ + $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) \ + $(datawizard_interfaces_multiformat_multiformat_interface_CFLAGS) \ + $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ +am__datawizard_interfaces_ndim_ndim_interface_SOURCES_DIST = \ + datawizard/interfaces/test_interfaces.c \ + datawizard/interfaces/ndim/ndim_interface.c \ + datawizard/interfaces/ndim/ndim_cuda.cu \ + datawizard/interfaces/ndim/ndim_opencl.c +@STARPU_USE_CUDA_TRUE@am__objects_18 = datawizard/interfaces/ndim/ndim_cuda.$(OBJEXT) +@STARPU_USE_OPENCL_TRUE@am__objects_19 = datawizard/interfaces/ndim/ndim_interface-ndim_opencl.$(OBJEXT) +am_datawizard_interfaces_ndim_ndim_interface_OBJECTS = datawizard/interfaces/ndim_ndim_interface-test_interfaces.$(OBJEXT) \ + datawizard/interfaces/ndim/ndim_interface-ndim_interface.$(OBJEXT) \ + $(am__objects_18) $(am__objects_19) +datawizard_interfaces_ndim_ndim_interface_OBJECTS = \ + $(am_datawizard_interfaces_ndim_ndim_interface_OBJECTS) +datawizard_interfaces_ndim_ndim_interface_LDADD = $(LDADD) +datawizard_interfaces_ndim_ndim_interface_LINK = $(LIBTOOL) $(AM_V_lt) \ + --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link \ + $(CCLD) $(datawizard_interfaces_ndim_ndim_interface_CFLAGS) \ + $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ +am__datawizard_interfaces_tensor_tensor_interface_SOURCES_DIST = \ + datawizard/interfaces/test_interfaces.c \ + datawizard/interfaces/tensor/tensor_interface.c \ + datawizard/interfaces/tensor/tensor_cuda.cu \ + datawizard/interfaces/tensor/tensor_opencl.c +@STARPU_USE_CUDA_TRUE@am__objects_20 = datawizard/interfaces/tensor/tensor_cuda.$(OBJEXT) +@STARPU_USE_OPENCL_TRUE@am__objects_21 = datawizard/interfaces/tensor/tensor_interface-tensor_opencl.$(OBJEXT) +am_datawizard_interfaces_tensor_tensor_interface_OBJECTS = datawizard/interfaces/tensor_tensor_interface-test_interfaces.$(OBJEXT) \ + datawizard/interfaces/tensor/tensor_interface-tensor_interface.$(OBJEXT) \ + $(am__objects_20) $(am__objects_21) +datawizard_interfaces_tensor_tensor_interface_OBJECTS = \ + $(am_datawizard_interfaces_tensor_tensor_interface_OBJECTS) +datawizard_interfaces_tensor_tensor_interface_LDADD = $(LDADD) +datawizard_interfaces_tensor_tensor_interface_LINK = $(LIBTOOL) \ + $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ + --mode=link $(CCLD) \ + $(datawizard_interfaces_tensor_tensor_interface_CFLAGS) \ + $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ +am__datawizard_interfaces_variable_variable_interface_SOURCES_DIST = \ + datawizard/interfaces/test_interfaces.c \ + datawizard/interfaces/variable/variable_interface.c \ + datawizard/interfaces/variable/variable_cuda.cu \ + datawizard/interfaces/variable/variable_opencl.c +@STARPU_USE_CUDA_TRUE@am__objects_22 = datawizard/interfaces/variable/variable_cuda.$(OBJEXT) +@STARPU_USE_OPENCL_TRUE@am__objects_23 = datawizard/interfaces/variable/variable_interface-variable_opencl.$(OBJEXT) +am_datawizard_interfaces_variable_variable_interface_OBJECTS = datawizard/interfaces/variable_variable_interface-test_interfaces.$(OBJEXT) \ + datawizard/interfaces/variable/variable_interface-variable_interface.$(OBJEXT) \ + $(am__objects_22) $(am__objects_23) +datawizard_interfaces_variable_variable_interface_OBJECTS = $(am_datawizard_interfaces_variable_variable_interface_OBJECTS) +datawizard_interfaces_variable_variable_interface_LDADD = $(LDADD) +datawizard_interfaces_variable_variable_interface_LINK = $(LIBTOOL) \ + $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ + --mode=link $(CCLD) \ + $(datawizard_interfaces_variable_variable_interface_CFLAGS) \ + $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ +am__datawizard_interfaces_vector_vector_interface_SOURCES_DIST = \ + datawizard/interfaces/vector/vector_interface.c \ + datawizard/interfaces/test_interfaces.c \ + datawizard/interfaces/vector/vector_cuda.cu \ + datawizard/interfaces/vector/vector_opencl.c +@STARPU_USE_CUDA_TRUE@am__objects_24 = datawizard/interfaces/vector/vector_cuda.$(OBJEXT) +@STARPU_USE_OPENCL_TRUE@am__objects_25 = datawizard/interfaces/vector/vector_interface-vector_opencl.$(OBJEXT) +am_datawizard_interfaces_vector_vector_interface_OBJECTS = datawizard/interfaces/vector/vector_interface-vector_interface.$(OBJEXT) \ + datawizard/interfaces/vector_vector_interface-test_interfaces.$(OBJEXT) \ + $(am__objects_24) $(am__objects_25) +datawizard_interfaces_vector_vector_interface_OBJECTS = \ + $(am_datawizard_interfaces_vector_vector_interface_OBJECTS) +datawizard_interfaces_vector_vector_interface_LDADD = $(LDADD) +datawizard_interfaces_vector_vector_interface_LINK = $(LIBTOOL) \ + $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ + --mode=link $(CCLD) \ + $(datawizard_interfaces_vector_vector_interface_CFLAGS) \ + $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ +am_datawizard_interfaces_void_void_interface_OBJECTS = datawizard/interfaces/void_void_interface-test_interfaces.$(OBJEXT) \ + datawizard/interfaces/void/void_interface-void_interface.$(OBJEXT) +datawizard_interfaces_void_void_interface_OBJECTS = \ + $(am_datawizard_interfaces_void_void_interface_OBJECTS) +datawizard_interfaces_void_void_interface_LDADD = $(LDADD) +datawizard_interfaces_void_void_interface_LINK = $(LIBTOOL) $(AM_V_lt) \ + --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link \ + $(CCLD) $(datawizard_interfaces_void_void_interface_CFLAGS) \ + $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ +datawizard_invalidate_pending_requests_SOURCES = \ + datawizard/invalidate_pending_requests.c +datawizard_invalidate_pending_requests_OBJECTS = \ + datawizard/invalidate_pending_requests.$(OBJEXT) +datawizard_invalidate_pending_requests_LDADD = $(LDADD) +datawizard_lazy_allocation_SOURCES = datawizard/lazy_allocation.c +datawizard_lazy_allocation_OBJECTS = \ + datawizard/lazy_allocation.$(OBJEXT) +datawizard_lazy_allocation_LDADD = $(LDADD) +datawizard_locality_SOURCES = datawizard/locality.c +datawizard_locality_OBJECTS = datawizard/locality.$(OBJEXT) +datawizard_locality_LDADD = $(LDADD) +datawizard_manual_reduction_SOURCES = datawizard/manual_reduction.c +datawizard_manual_reduction_OBJECTS = \ + datawizard/manual_reduction.$(OBJEXT) +datawizard_manual_reduction_LDADD = $(LDADD) +am__datawizard_mpi_like_SOURCES_DIST = datawizard/mpi_like.c \ + variable/increment.c variable/increment_cuda.cu \ + variable/increment_hip.hip variable/increment_opencl.c +am_datawizard_mpi_like_OBJECTS = datawizard/mpi_like.$(OBJEXT) \ + variable/increment.$(OBJEXT) $(am__objects_1) $(am__objects_2) \ + $(am__objects_3) +datawizard_mpi_like_OBJECTS = $(am_datawizard_mpi_like_OBJECTS) +datawizard_mpi_like_LDADD = $(LDADD) +am__datawizard_mpi_like_async_SOURCES_DIST = \ + datawizard/mpi_like_async.c variable/increment.c \ + variable/increment_cuda.cu variable/increment_hip.hip \ + variable/increment_opencl.c +am_datawizard_mpi_like_async_OBJECTS = \ + datawizard/mpi_like_async.$(OBJEXT) \ + variable/increment.$(OBJEXT) $(am__objects_1) $(am__objects_2) \ + $(am__objects_3) +datawizard_mpi_like_async_OBJECTS = \ + $(am_datawizard_mpi_like_async_OBJECTS) +datawizard_mpi_like_async_LDADD = $(LDADD) +datawizard_no_unregister_SOURCES = datawizard/no_unregister.c +datawizard_no_unregister_OBJECTS = datawizard/no_unregister.$(OBJEXT) +datawizard_no_unregister_LDADD = $(LDADD) +datawizard_noreclaim_SOURCES = datawizard/noreclaim.c +datawizard_noreclaim_OBJECTS = datawizard/noreclaim.$(OBJEXT) +datawizard_noreclaim_LDADD = $(LDADD) +datawizard_nowhere_SOURCES = datawizard/nowhere.c +datawizard_nowhere_OBJECTS = datawizard/nowhere.$(OBJEXT) +datawizard_nowhere_LDADD = $(LDADD) +datawizard_numa_overflow_SOURCES = datawizard/numa_overflow.c +datawizard_numa_overflow_OBJECTS = datawizard/numa_overflow.$(OBJEXT) +datawizard_numa_overflow_LDADD = $(LDADD) +am__datawizard_partition_dep_SOURCES_DIST = \ + datawizard/partition_dep.c datawizard/scal.c \ + datawizard/scal_cuda.cu datawizard/scal_opencl.cl +am_datawizard_partition_dep_OBJECTS = \ + datawizard/partition_dep.$(OBJEXT) datawizard/scal.$(OBJEXT) \ + $(am__objects_4) $(am__objects_5) +datawizard_partition_dep_OBJECTS = \ + $(am_datawizard_partition_dep_OBJECTS) +datawizard_partition_dep_LDADD = $(LDADD) +datawizard_partition_init_SOURCES = datawizard/partition_init.c +datawizard_partition_init_OBJECTS = \ + datawizard/partition_init.$(OBJEXT) +datawizard_partition_init_LDADD = $(LDADD) +am__datawizard_partition_lazy_SOURCES_DIST = \ + datawizard/partition_lazy.c datawizard/scal.c \ + datawizard/scal_cuda.cu datawizard/scal_opencl.cl +am_datawizard_partition_lazy_OBJECTS = \ + datawizard/partition_lazy.$(OBJEXT) datawizard/scal.$(OBJEXT) \ + $(am__objects_4) $(am__objects_5) +datawizard_partition_lazy_OBJECTS = \ + $(am_datawizard_partition_lazy_OBJECTS) +datawizard_partition_lazy_LDADD = $(LDADD) +datawizard_partition_wontuse_SOURCES = datawizard/partition_wontuse.c +datawizard_partition_wontuse_OBJECTS = \ + datawizard/partition_wontuse.$(OBJEXT) +datawizard_partition_wontuse_LDADD = $(LDADD) +datawizard_partitioned_acquire_SOURCES = \ + datawizard/partitioned_acquire.c +datawizard_partitioned_acquire_OBJECTS = \ + datawizard/partitioned_acquire.$(OBJEXT) +datawizard_partitioned_acquire_LDADD = $(LDADD) +datawizard_partitioned_initialization_SOURCES = \ + datawizard/partitioned_initialization.c +datawizard_partitioned_initialization_OBJECTS = \ + datawizard/partitioned_initialization.$(OBJEXT) +datawizard_partitioned_initialization_LDADD = $(LDADD) +datawizard_readers_and_writers_SOURCES = \ + datawizard/readers_and_writers.c +datawizard_readers_and_writers_OBJECTS = \ + datawizard/readers_and_writers.$(OBJEXT) +datawizard_readers_and_writers_LDADD = $(LDADD) +datawizard_readonly_SOURCES = datawizard/readonly.c +datawizard_readonly_OBJECTS = datawizard/readonly.$(OBJEXT) +datawizard_readonly_LDADD = $(LDADD) +datawizard_reclaim_SOURCES = datawizard/reclaim.c +datawizard_reclaim_OBJECTS = datawizard/reclaim.$(OBJEXT) +datawizard_reclaim_LDADD = $(LDADD) +datawizard_redux_acquire_SOURCES = datawizard/redux_acquire.c +datawizard_redux_acquire_OBJECTS = datawizard/redux_acquire.$(OBJEXT) +datawizard_redux_acquire_LDADD = $(LDADD) +am__datawizard_scratch_SOURCES_DIST = datawizard/scratch.c \ + datawizard/scratch_cuda.cu datawizard/scratch_opencl.c +@STARPU_USE_CUDA_TRUE@am__objects_26 = \ +@STARPU_USE_CUDA_TRUE@ datawizard/scratch_cuda.$(OBJEXT) +@STARPU_USE_OPENCL_TRUE@am__objects_27 = \ +@STARPU_USE_OPENCL_TRUE@ datawizard/scratch_opencl.$(OBJEXT) +am_datawizard_scratch_OBJECTS = datawizard/scratch.$(OBJEXT) \ + $(am__objects_26) $(am__objects_27) +datawizard_scratch_OBJECTS = $(am_datawizard_scratch_OBJECTS) +datawizard_scratch_LDADD = $(LDADD) +datawizard_scratch_reuse_SOURCES = datawizard/scratch_reuse.c +datawizard_scratch_reuse_OBJECTS = datawizard/scratch_reuse.$(OBJEXT) +datawizard_scratch_reuse_LDADD = $(LDADD) +datawizard_simgrid_locality_SOURCES = datawizard/simgrid-locality.c +datawizard_simgrid_locality_OBJECTS = \ + datawizard/simgrid-locality.$(OBJEXT) +datawizard_simgrid_locality_LDADD = $(LDADD) +am__datawizard_specific_node_SOURCES_DIST = \ + datawizard/specific_node.c variable/increment.c \ + variable/increment_cuda.cu variable/increment_hip.hip \ + variable/increment_opencl.c +am_datawizard_specific_node_OBJECTS = \ + datawizard/specific_node.$(OBJEXT) \ + variable/increment.$(OBJEXT) $(am__objects_1) $(am__objects_2) \ + $(am__objects_3) +datawizard_specific_node_OBJECTS = \ + $(am_datawizard_specific_node_OBJECTS) +datawizard_specific_node_LDADD = $(LDADD) +datawizard_specific_node_same_SOURCES = \ + datawizard/specific_node_same.c +datawizard_specific_node_same_OBJECTS = \ + datawizard/specific_node_same.$(OBJEXT) +datawizard_specific_node_same_LDADD = $(LDADD) +am__datawizard_sync_and_notify_data_SOURCES_DIST = \ + datawizard/sync_and_notify_data.c \ + datawizard/sync_and_notify_data_kernels.cu \ + datawizard/sync_and_notify_data_opencl.c +@STARPU_USE_CUDA_TRUE@am__objects_28 = datawizard/sync_and_notify_data_kernels.$(OBJEXT) +@STARPU_USE_OPENCL_TRUE@am__objects_29 = datawizard/sync_and_notify_data_opencl.$(OBJEXT) +am_datawizard_sync_and_notify_data_OBJECTS = \ + datawizard/sync_and_notify_data.$(OBJEXT) $(am__objects_28) \ + $(am__objects_29) +datawizard_sync_and_notify_data_OBJECTS = \ + $(am_datawizard_sync_and_notify_data_OBJECTS) +datawizard_sync_and_notify_data_LDADD = $(LDADD) +am__datawizard_sync_and_notify_data_implicit_SOURCES_DIST = \ + datawizard/sync_and_notify_data_implicit.c \ + datawizard/sync_and_notify_data_kernels.cu \ + datawizard/sync_and_notify_data_opencl.c +am_datawizard_sync_and_notify_data_implicit_OBJECTS = \ + datawizard/sync_and_notify_data_implicit.$(OBJEXT) \ + $(am__objects_28) $(am__objects_29) +datawizard_sync_and_notify_data_implicit_OBJECTS = \ + $(am_datawizard_sync_and_notify_data_implicit_OBJECTS) +datawizard_sync_and_notify_data_implicit_LDADD = $(LDADD) +datawizard_sync_with_data_with_mem_SOURCES = \ + datawizard/sync_with_data_with_mem.c +datawizard_sync_with_data_with_mem_OBJECTS = \ + datawizard/sync_with_data_with_mem.$(OBJEXT) +datawizard_sync_with_data_with_mem_LDADD = $(LDADD) +datawizard_sync_with_data_with_mem_non_blocking_SOURCES = \ + datawizard/sync_with_data_with_mem_non_blocking.c +datawizard_sync_with_data_with_mem_non_blocking_OBJECTS = \ + datawizard/sync_with_data_with_mem_non_blocking.$(OBJEXT) +datawizard_sync_with_data_with_mem_non_blocking_LDADD = $(LDADD) +datawizard_sync_with_data_with_mem_non_blocking_implicit_SOURCES = \ + datawizard/sync_with_data_with_mem_non_blocking_implicit.c +datawizard_sync_with_data_with_mem_non_blocking_implicit_OBJECTS = datawizard/sync_with_data_with_mem_non_blocking_implicit.$(OBJEXT) +datawizard_sync_with_data_with_mem_non_blocking_implicit_LDADD = \ + $(LDADD) +datawizard_task_with_multiple_time_the_same_handle_SOURCES = \ + datawizard/task_with_multiple_time_the_same_handle.c +datawizard_task_with_multiple_time_the_same_handle_OBJECTS = \ + datawizard/task_with_multiple_time_the_same_handle.$(OBJEXT) +datawizard_task_with_multiple_time_the_same_handle_LDADD = $(LDADD) +datawizard_temporary_partition_SOURCES = \ + datawizard/temporary_partition.c +datawizard_temporary_partition_OBJECTS = \ + datawizard/temporary_partition.$(OBJEXT) +datawizard_temporary_partition_LDADD = $(LDADD) +datawizard_temporary_partition_implicit_SOURCES = \ + datawizard/temporary_partition_implicit.c +datawizard_temporary_partition_implicit_OBJECTS = \ + datawizard/temporary_partition_implicit.$(OBJEXT) +datawizard_temporary_partition_implicit_LDADD = $(LDADD) +datawizard_temporary_partition_read_SOURCES = \ + datawizard/temporary_partition_read.c +datawizard_temporary_partition_read_OBJECTS = \ + datawizard/temporary_partition_read.$(OBJEXT) +datawizard_temporary_partition_read_LDADD = $(LDADD) +am_datawizard_test_arbiter_OBJECTS = \ + datawizard/test_arbiter.$(OBJEXT) +datawizard_test_arbiter_OBJECTS = \ + $(am_datawizard_test_arbiter_OBJECTS) +datawizard_test_arbiter_LDADD = $(LDADD) +datawizard_unpartition_SOURCES = datawizard/unpartition.c +datawizard_unpartition_OBJECTS = datawizard/unpartition.$(OBJEXT) +datawizard_unpartition_LDADD = $(LDADD) +datawizard_user_interaction_implicit_SOURCES = \ + datawizard/user_interaction_implicit.c +datawizard_user_interaction_implicit_OBJECTS = \ + datawizard/user_interaction_implicit.$(OBJEXT) +datawizard_user_interaction_implicit_LDADD = $(LDADD) +datawizard_variable_parameters_SOURCES = \ + datawizard/variable_parameters.c +datawizard_variable_parameters_OBJECTS = \ + datawizard/variable_parameters.$(OBJEXT) +datawizard_variable_parameters_LDADD = $(LDADD) +datawizard_variable_size_SOURCES = datawizard/variable_size.c +datawizard_variable_size_OBJECTS = datawizard/variable_size.$(OBJEXT) +datawizard_variable_size_LDADD = $(LDADD) +datawizard_write_only_tmp_buffer_SOURCES = \ + datawizard/write_only_tmp_buffer.c +datawizard_write_only_tmp_buffer_OBJECTS = \ + datawizard/write_only_tmp_buffer.$(OBJEXT) +datawizard_write_only_tmp_buffer_LDADD = $(LDADD) +am__datawizard_wt_broadcast_SOURCES_DIST = datawizard/wt_broadcast.c \ + variable/increment.c variable/increment_cuda.cu \ + variable/increment_hip.hip variable/increment_opencl.c +am_datawizard_wt_broadcast_OBJECTS = \ + datawizard/wt_broadcast.$(OBJEXT) variable/increment.$(OBJEXT) \ + $(am__objects_1) $(am__objects_2) $(am__objects_3) +datawizard_wt_broadcast_OBJECTS = \ + $(am_datawizard_wt_broadcast_OBJECTS) +datawizard_wt_broadcast_LDADD = $(LDADD) +am__datawizard_wt_host_SOURCES_DIST = datawizard/wt_host.c \ + variable/increment.c variable/increment_cuda.cu \ + variable/increment_hip.hip variable/increment_opencl.c +am_datawizard_wt_host_OBJECTS = datawizard/wt_host.$(OBJEXT) \ + variable/increment.$(OBJEXT) $(am__objects_1) $(am__objects_2) \ + $(am__objects_3) +datawizard_wt_host_OBJECTS = $(am_datawizard_wt_host_OBJECTS) +datawizard_wt_host_LDADD = $(LDADD) +disk_disk_compute_SOURCES = disk/disk_compute.c +disk_disk_compute_OBJECTS = disk/disk_compute.$(OBJEXT) +disk_disk_compute_LDADD = $(LDADD) +disk_disk_copy_SOURCES = disk/disk_copy.c +disk_disk_copy_OBJECTS = disk/disk_copy.$(OBJEXT) +disk_disk_copy_LDADD = $(LDADD) +disk_disk_copy_to_disk_SOURCES = disk/disk_copy_to_disk.c +disk_disk_copy_to_disk_OBJECTS = disk/disk_copy_to_disk.$(OBJEXT) +disk_disk_copy_to_disk_LDADD = $(LDADD) +disk_disk_copy_unpack_SOURCES = disk/disk_copy_unpack.c +disk_disk_copy_unpack_OBJECTS = disk/disk_copy_unpack.$(OBJEXT) +disk_disk_copy_unpack_LDADD = $(LDADD) +disk_disk_pack_SOURCES = disk/disk_pack.c +disk_disk_pack_OBJECTS = disk/disk_pack.$(OBJEXT) +disk_disk_pack_LDADD = $(LDADD) +disk_mem_reclaim_SOURCES = disk/mem_reclaim.c +disk_mem_reclaim_OBJECTS = disk/mem_reclaim.$(OBJEXT) +disk_mem_reclaim_LDADD = $(LDADD) +energy_energy_efficiency_SOURCES = energy/energy_efficiency.c +energy_energy_efficiency_OBJECTS = energy/energy_efficiency.$(OBJEXT) +energy_energy_efficiency_LDADD = $(LDADD) +errorcheck_invalid_blocking_calls_SOURCES = \ + errorcheck/invalid_blocking_calls.c +errorcheck_invalid_blocking_calls_OBJECTS = \ + errorcheck/invalid_blocking_calls.$(OBJEXT) +errorcheck_invalid_blocking_calls_LDADD = $(LDADD) +errorcheck_invalid_tasks_SOURCES = errorcheck/invalid_tasks.c +errorcheck_invalid_tasks_OBJECTS = errorcheck/invalid_tasks.$(OBJEXT) +errorcheck_invalid_tasks_LDADD = $(LDADD) +errorcheck_starpu_init_noworker_SOURCES = \ + errorcheck/starpu_init_noworker.c +errorcheck_starpu_init_noworker_OBJECTS = \ + errorcheck/starpu_init_noworker.$(OBJEXT) +errorcheck_starpu_init_noworker_LDADD = $(LDADD) +errorcheck_workers_cpuid_SOURCES = errorcheck/workers_cpuid.c +errorcheck_workers_cpuid_OBJECTS = errorcheck/workers_cpuid.$(OBJEXT) +errorcheck_workers_cpuid_LDADD = $(LDADD) +fault_tolerance_retry_SOURCES = fault-tolerance/retry.c +fault_tolerance_retry_OBJECTS = fault-tolerance/retry.$(OBJEXT) +fault_tolerance_retry_LDADD = $(LDADD) +am__fortran90_init_01_SOURCES_DIST = fortran90/starpu_mod.f90 \ + fortran90/init_01.f90 +@STARPU_HAVE_FC_TRUE@am_fortran90_init_01_OBJECTS = \ +@STARPU_HAVE_FC_TRUE@ fortran90/starpu_mod.$(OBJEXT) \ +@STARPU_HAVE_FC_TRUE@ fortran90/init_01.$(OBJEXT) +fortran90_init_01_OBJECTS = $(am_fortran90_init_01_OBJECTS) +fortran90_init_01_LDADD = $(LDADD) +helper_cublasLt_init_SOURCES = helper/cublasLt_init.c +helper_cublasLt_init_OBJECTS = helper/cublasLt_init.$(OBJEXT) +helper_cublasLt_init_LDADD = $(LDADD) +helper_cublas_init_SOURCES = helper/cublas_init.c +helper_cublas_init_OBJECTS = helper/cublas_init.$(OBJEXT) +helper_cublas_init_LDADD = $(LDADD) +helper_cusparse_init_SOURCES = helper/cusparse_init.c +helper_cusparse_init_OBJECTS = helper/cusparse_init.$(OBJEXT) +helper_cusparse_init_LDADD = $(LDADD) +helper_execute_on_all_SOURCES = helper/execute_on_all.c +helper_execute_on_all_OBJECTS = helper/execute_on_all.$(OBJEXT) +helper_execute_on_all_LDADD = $(LDADD) +helper_hipblas_init_SOURCES = helper/hipblas_init.c +helper_hipblas_init_OBJECTS = helper/hipblas_init.$(OBJEXT) +helper_hipblas_init_LDADD = $(LDADD) +helper_pinned_memory_SOURCES = helper/pinned_memory.c +helper_pinned_memory_OBJECTS = helper/pinned_memory.$(OBJEXT) +helper_pinned_memory_LDADD = $(LDADD) +helper_starpu_create_sync_task_SOURCES = \ + helper/starpu_create_sync_task.c +helper_starpu_create_sync_task_OBJECTS = \ + helper/starpu_create_sync_task.$(OBJEXT) +helper_starpu_create_sync_task_LDADD = $(LDADD) +helper_starpu_data_cpy_SOURCES = helper/starpu_data_cpy.c +helper_starpu_data_cpy_OBJECTS = helper/starpu_data_cpy.$(OBJEXT) +helper_starpu_data_cpy_LDADD = $(LDADD) +am__helper_starpu_data_dup_ro_SOURCES_DIST = \ + helper/starpu_data_dup_ro.c variable/increment.c \ + variable/increment_cuda.cu variable/increment_hip.hip \ + variable/increment_opencl.c +am_helper_starpu_data_dup_ro_OBJECTS = \ + helper/starpu_data_dup_ro.$(OBJEXT) \ + variable/increment.$(OBJEXT) $(am__objects_1) $(am__objects_2) \ + $(am__objects_3) +helper_starpu_data_dup_ro_OBJECTS = \ + $(am_helper_starpu_data_dup_ro_OBJECTS) +helper_starpu_data_dup_ro_LDADD = $(LDADD) +loader_SOURCES = loader.c +loader_OBJECTS = loader-loader.$(OBJEXT) +loader_LDADD = $(LDADD) +main_bind_SOURCES = main/bind.c +main_bind_OBJECTS = main/bind.$(OBJEXT) +main_bind_LDADD = $(LDADD) +main_callback_SOURCES = main/callback.c +main_callback_OBJECTS = main/callback.$(OBJEXT) +main_callback_LDADD = $(LDADD) +main_codelet_null_callback_SOURCES = main/codelet_null_callback.c +main_codelet_null_callback_OBJECTS = \ + main/codelet_null_callback.$(OBJEXT) +main_codelet_null_callback_LDADD = $(LDADD) +main_const_codelet_SOURCES = main/const_codelet.c +main_const_codelet_OBJECTS = main/const_codelet.$(OBJEXT) +main_const_codelet_LDADD = $(LDADD) +main_deadlock_SOURCES = main/deadlock.c +main_deadlock_OBJECTS = main/deadlock.$(OBJEXT) +main_deadlock_LDADD = $(LDADD) +main_declare_deps_after_submission_SOURCES = \ + main/declare_deps_after_submission.c +main_declare_deps_after_submission_OBJECTS = \ + main/declare_deps_after_submission.$(OBJEXT) +main_declare_deps_after_submission_LDADD = $(LDADD) +main_declare_deps_after_submission_synchronous_SOURCES = \ + main/declare_deps_after_submission_synchronous.c +main_declare_deps_after_submission_synchronous_OBJECTS = \ + main/declare_deps_after_submission_synchronous.$(OBJEXT) +main_declare_deps_after_submission_synchronous_LDADD = $(LDADD) +main_declare_deps_in_callback_SOURCES = \ + main/declare_deps_in_callback.c +main_declare_deps_in_callback_OBJECTS = \ + main/declare_deps_in_callback.$(OBJEXT) +main_declare_deps_in_callback_LDADD = $(LDADD) +main_deploop_SOURCES = main/deploop.c +main_deploop_OBJECTS = main/deploop.$(OBJEXT) +main_deploop_LDADD = $(LDADD) +main_deprecated_func_SOURCES = main/deprecated_func.c +main_deprecated_func_OBJECTS = \ + main/deprecated_func-deprecated_func.$(OBJEXT) +main_deprecated_func_LDADD = $(LDADD) +main_deprecated_func_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ + $(main_deprecated_func_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \ + $(LDFLAGS) -o $@ +main_display_binding_SOURCES = main/display_binding.c +main_display_binding_OBJECTS = main/display_binding.$(OBJEXT) +main_display_binding_LDADD = $(LDADD) +main_driver_api_init_run_deinit_SOURCES = \ + main/driver_api/init_run_deinit.c +main_driver_api_init_run_deinit_OBJECTS = \ + main/driver_api/init_run_deinit.$(OBJEXT) +main_driver_api_init_run_deinit_LDADD = $(LDADD) +main_driver_api_run_driver_SOURCES = main/driver_api/run_driver.c +main_driver_api_run_driver_OBJECTS = \ + main/driver_api/run_driver.$(OBJEXT) +main_driver_api_run_driver_LDADD = $(LDADD) +main_empty_task_SOURCES = main/empty_task.c +main_empty_task_OBJECTS = main/empty_task.$(OBJEXT) +main_empty_task_LDADD = $(LDADD) +main_empty_task_chain_SOURCES = main/empty_task_chain.c +main_empty_task_chain_OBJECTS = main/empty_task_chain.$(OBJEXT) +main_empty_task_chain_LDADD = $(LDADD) +main_empty_task_sync_point_SOURCES = main/empty_task_sync_point.c +main_empty_task_sync_point_OBJECTS = \ + main/empty_task_sync_point.$(OBJEXT) +main_empty_task_sync_point_LDADD = $(LDADD) +main_empty_task_sync_point_tasks_SOURCES = \ + main/empty_task_sync_point_tasks.c +main_empty_task_sync_point_tasks_OBJECTS = \ + main/empty_task_sync_point_tasks.$(OBJEXT) +main_empty_task_sync_point_tasks_LDADD = $(LDADD) +main_execute_on_a_specific_worker_SOURCES = \ + main/execute_on_a_specific_worker.c +main_execute_on_a_specific_worker_OBJECTS = \ + main/execute_on_a_specific_worker.$(OBJEXT) +main_execute_on_a_specific_worker_LDADD = $(LDADD) +main_execute_schedule_SOURCES = main/execute_schedule.c +main_execute_schedule_OBJECTS = main/execute_schedule.$(OBJEXT) +main_execute_schedule_LDADD = $(LDADD) +main_get_children_tasks_SOURCES = main/get_children_tasks.c +main_get_children_tasks_OBJECTS = main/get_children_tasks.$(OBJEXT) +main_get_children_tasks_LDADD = $(LDADD) +main_get_current_task_SOURCES = main/get_current_task.c +main_get_current_task_OBJECTS = main/get_current_task.$(OBJEXT) +main_get_current_task_LDADD = $(LDADD) +main_hwloc_cpuset_SOURCES = main/hwloc_cpuset.c +main_hwloc_cpuset_OBJECTS = main/hwloc_cpuset.$(OBJEXT) +main_hwloc_cpuset_LDADD = $(LDADD) +main_insert_task_SOURCES = main/insert_task.c +main_insert_task_OBJECTS = main/insert_task.$(OBJEXT) +main_insert_task_LDADD = $(LDADD) +main_insert_task_array_SOURCES = main/insert_task_array.c +main_insert_task_array_OBJECTS = main/insert_task_array.$(OBJEXT) +main_insert_task_array_LDADD = $(LDADD) +main_insert_task_dyn_handles_SOURCES = main/insert_task_dyn_handles.c +main_insert_task_dyn_handles_OBJECTS = \ + main/insert_task_dyn_handles.$(OBJEXT) +main_insert_task_dyn_handles_LDADD = $(LDADD) +main_insert_task_many_SOURCES = main/insert_task_many.c +main_insert_task_many_OBJECTS = main/insert_task_many.$(OBJEXT) +main_insert_task_many_LDADD = $(LDADD) +main_insert_task_nullcodelet_SOURCES = main/insert_task_nullcodelet.c +main_insert_task_nullcodelet_OBJECTS = \ + main/insert_task_nullcodelet.$(OBJEXT) +main_insert_task_nullcodelet_LDADD = $(LDADD) +main_insert_task_pack_SOURCES = main/insert_task_pack.c +main_insert_task_pack_OBJECTS = main/insert_task_pack.$(OBJEXT) +main_insert_task_pack_LDADD = $(LDADD) +main_insert_task_value_SOURCES = main/insert_task_value.c +main_insert_task_value_OBJECTS = main/insert_task_value.$(OBJEXT) +main_insert_task_value_LDADD = $(LDADD) +am__main_insert_task_where_SOURCES_DIST = main/insert_task_where.c \ + variable/increment.c variable/increment_cuda.cu \ + variable/increment_hip.hip variable/increment_opencl.c +am_main_insert_task_where_OBJECTS = main/insert_task_where.$(OBJEXT) \ + variable/increment.$(OBJEXT) $(am__objects_1) $(am__objects_2) \ + $(am__objects_3) +main_insert_task_where_OBJECTS = $(am_main_insert_task_where_OBJECTS) +main_insert_task_where_LDADD = $(LDADD) +main_job_SOURCES = main/job.c +main_job_OBJECTS = main/job.$(OBJEXT) +main_job_LDADD = $(LDADD) +main_mkdtemp_SOURCES = main/mkdtemp.c +main_mkdtemp_OBJECTS = main/mkdtemp.$(OBJEXT) +main_mkdtemp_LDADD = $(LDADD) +main_multithreaded_SOURCES = main/multithreaded.c +main_multithreaded_OBJECTS = main/multithreaded.$(OBJEXT) +main_multithreaded_LDADD = $(LDADD) +main_multithreaded_init_SOURCES = main/multithreaded_init.c +main_multithreaded_init_OBJECTS = main/multithreaded_init.$(OBJEXT) +main_multithreaded_init_LDADD = $(LDADD) +main_pack_SOURCES = main/pack.c +main_pack_OBJECTS = main/pack.$(OBJEXT) +main_pack_LDADD = $(LDADD) +main_pause_resume_SOURCES = main/pause_resume.c +main_pause_resume_OBJECTS = main/pause_resume.$(OBJEXT) +main_pause_resume_LDADD = $(LDADD) +main_regenerate_SOURCES = main/regenerate.c +main_regenerate_OBJECTS = main/regenerate.$(OBJEXT) +main_regenerate_LDADD = $(LDADD) +main_regenerate_pipeline_SOURCES = main/regenerate_pipeline.c +main_regenerate_pipeline_OBJECTS = main/regenerate_pipeline.$(OBJEXT) +main_regenerate_pipeline_LDADD = $(LDADD) +main_restart_SOURCES = main/restart.c +main_restart_OBJECTS = main/restart.$(OBJEXT) +main_restart_LDADD = $(LDADD) +main_starpu_init_SOURCES = main/starpu_init.c +main_starpu_init_OBJECTS = main/starpu_init.$(OBJEXT) +main_starpu_init_LDADD = $(LDADD) +main_starpu_task_bundle_SOURCES = main/starpu_task_bundle.c +main_starpu_task_bundle_OBJECTS = main/starpu_task_bundle.$(OBJEXT) +main_starpu_task_bundle_LDADD = $(LDADD) +main_starpu_task_wait_SOURCES = main/starpu_task_wait.c +main_starpu_task_wait_OBJECTS = main/starpu_task_wait.$(OBJEXT) +main_starpu_task_wait_LDADD = $(LDADD) +main_starpu_task_wait_for_all_SOURCES = \ + main/starpu_task_wait_for_all.c +main_starpu_task_wait_for_all_OBJECTS = \ + main/starpu_task_wait_for_all.$(OBJEXT) +main_starpu_task_wait_for_all_LDADD = $(LDADD) +main_starpu_worker_exists_SOURCES = main/starpu_worker_exists.c +main_starpu_worker_exists_OBJECTS = \ + main/starpu_worker_exists-starpu_worker_exists.$(OBJEXT) +main_starpu_worker_exists_LDADD = $(LDADD) +main_starpu_worker_exists_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ + $(main_starpu_worker_exists_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \ + $(LDFLAGS) -o $@ +main_static_restartable_SOURCES = main/static_restartable.c +main_static_restartable_OBJECTS = main/static_restartable.$(OBJEXT) +main_static_restartable_LDADD = $(LDADD) +main_static_restartable_tag_SOURCES = main/static_restartable_tag.c +main_static_restartable_tag_OBJECTS = \ + main/static_restartable_tag.$(OBJEXT) +main_static_restartable_tag_LDADD = $(LDADD) +main_static_restartable_using_initializer_SOURCES = \ + main/static_restartable_using_initializer.c +main_static_restartable_using_initializer_OBJECTS = \ + main/static_restartable_using_initializer.$(OBJEXT) +main_static_restartable_using_initializer_LDADD = $(LDADD) +am__main_subgraph_repeat_SOURCES_DIST = main/subgraph_repeat.c \ + variable/increment.c variable/increment_cuda.cu \ + variable/increment_hip.hip variable/increment_opencl.c +am_main_subgraph_repeat_OBJECTS = main/subgraph_repeat.$(OBJEXT) \ + variable/increment.$(OBJEXT) $(am__objects_1) $(am__objects_2) \ + $(am__objects_3) +main_subgraph_repeat_OBJECTS = $(am_main_subgraph_repeat_OBJECTS) +main_subgraph_repeat_LDADD = $(LDADD) +am__main_subgraph_repeat_regenerate_SOURCES_DIST = \ + main/subgraph_repeat_regenerate.c variable/increment.c \ + variable/increment_cuda.cu variable/increment_hip.hip \ + variable/increment_opencl.c +am_main_subgraph_repeat_regenerate_OBJECTS = \ + main/subgraph_repeat_regenerate.$(OBJEXT) \ + variable/increment.$(OBJEXT) $(am__objects_1) $(am__objects_2) \ + $(am__objects_3) +main_subgraph_repeat_regenerate_OBJECTS = \ + $(am_main_subgraph_repeat_regenerate_OBJECTS) +main_subgraph_repeat_regenerate_LDADD = $(LDADD) +am__main_subgraph_repeat_regenerate_tag_SOURCES_DIST = \ + main/subgraph_repeat_regenerate_tag.c variable/increment.c \ + variable/increment_cuda.cu variable/increment_hip.hip \ + variable/increment_opencl.c +am_main_subgraph_repeat_regenerate_tag_OBJECTS = \ + main/subgraph_repeat_regenerate_tag.$(OBJEXT) \ + variable/increment.$(OBJEXT) $(am__objects_1) $(am__objects_2) \ + $(am__objects_3) +main_subgraph_repeat_regenerate_tag_OBJECTS = \ + $(am_main_subgraph_repeat_regenerate_tag_OBJECTS) +main_subgraph_repeat_regenerate_tag_LDADD = $(LDADD) +am__main_subgraph_repeat_regenerate_tag_cycle_SOURCES_DIST = \ + main/subgraph_repeat_regenerate_tag_cycle.c \ + variable/increment.c variable/increment_cuda.cu \ + variable/increment_hip.hip variable/increment_opencl.c +am_main_subgraph_repeat_regenerate_tag_cycle_OBJECTS = \ + main/subgraph_repeat_regenerate_tag_cycle.$(OBJEXT) \ + variable/increment.$(OBJEXT) $(am__objects_1) $(am__objects_2) \ + $(am__objects_3) +main_subgraph_repeat_regenerate_tag_cycle_OBJECTS = \ + $(am_main_subgraph_repeat_regenerate_tag_cycle_OBJECTS) +main_subgraph_repeat_regenerate_tag_cycle_LDADD = $(LDADD) +am__main_subgraph_repeat_tag_SOURCES_DIST = \ + main/subgraph_repeat_tag.c variable/increment.c \ + variable/increment_cuda.cu variable/increment_hip.hip \ + variable/increment_opencl.c +am_main_subgraph_repeat_tag_OBJECTS = \ + main/subgraph_repeat_tag.$(OBJEXT) \ + variable/increment.$(OBJEXT) $(am__objects_1) $(am__objects_2) \ + $(am__objects_3) +main_subgraph_repeat_tag_OBJECTS = \ + $(am_main_subgraph_repeat_tag_OBJECTS) +main_subgraph_repeat_tag_LDADD = $(LDADD) +main_submit_SOURCES = main/submit.c +main_submit_OBJECTS = main/submit.$(OBJEXT) +main_submit_LDADD = $(LDADD) +main_tag_get_task_SOURCES = main/tag_get_task.c +main_tag_get_task_OBJECTS = main/tag_get_task.$(OBJEXT) +main_tag_get_task_LDADD = $(LDADD) +main_tag_task_data_deps_SOURCES = main/tag_task_data_deps.c +main_tag_task_data_deps_OBJECTS = main/tag_task_data_deps.$(OBJEXT) +main_tag_task_data_deps_LDADD = $(LDADD) +main_tag_wait_api_SOURCES = main/tag_wait_api.c +main_tag_wait_api_OBJECTS = main/tag_wait_api.$(OBJEXT) +main_tag_wait_api_LDADD = $(LDADD) +main_task_end_dep_SOURCES = main/task_end_dep.c +main_task_end_dep_OBJECTS = main/task_end_dep.$(OBJEXT) +main_task_end_dep_LDADD = $(LDADD) +main_task_wait_api_SOURCES = main/task_wait_api.c +main_task_wait_api_OBJECTS = main/task_wait_api.$(OBJEXT) +main_task_wait_api_LDADD = $(LDADD) +main_wait_all_regenerable_tasks_SOURCES = \ + main/wait_all_regenerable_tasks.c +main_wait_all_regenerable_tasks_OBJECTS = \ + main/wait_all_regenerable_tasks.$(OBJEXT) +main_wait_all_regenerable_tasks_LDADD = $(LDADD) +maxfpga_max_fpga_advanced_static_SOURCES = \ + maxfpga/max_fpga_advanced_static.c +maxfpga_max_fpga_advanced_static_OBJECTS = \ + maxfpga/max_fpga_advanced_static.$(OBJEXT) +maxfpga_max_fpga_advanced_static_DEPENDENCIES = \ + maxfpga/slic_MyTasks.o +maxfpga_max_fpga_basic_static_SOURCES = \ + maxfpga/max_fpga_basic_static.c +maxfpga_max_fpga_basic_static_OBJECTS = \ + maxfpga/max_fpga_basic_static.$(OBJEXT) +maxfpga_max_fpga_basic_static_DEPENDENCIES = maxfpga/slic_MyTasks.o +maxfpga_max_fpga_dynamic_SOURCES = maxfpga/max_fpga_dynamic.c +maxfpga_max_fpga_dynamic_OBJECTS = maxfpga/max_fpga_dynamic.$(OBJEXT) +maxfpga_max_fpga_dynamic_DEPENDENCIES = maxfpga/slic_MyTasks.o +maxfpga_max_fpga_mux_SOURCES = maxfpga/max_fpga_mux.c +maxfpga_max_fpga_mux_OBJECTS = maxfpga/max_fpga_mux.$(OBJEXT) +maxfpga_max_fpga_mux_DEPENDENCIES = maxfpga/slic_MyTasksMux.o +microbenchs_async_tasks_overhead_SOURCES = \ + microbenchs/async_tasks_overhead.c +microbenchs_async_tasks_overhead_OBJECTS = \ + microbenchs/async_tasks_overhead.$(OBJEXT) +microbenchs_async_tasks_overhead_LDADD = $(LDADD) +microbenchs_bandwidth_SOURCES = microbenchs/bandwidth.c +microbenchs_bandwidth_OBJECTS = microbenchs/bandwidth.$(OBJEXT) +microbenchs_bandwidth_LDADD = $(LDADD) +microbenchs_display_structures_size_SOURCES = \ + microbenchs/display_structures_size.c +microbenchs_display_structures_size_OBJECTS = \ + microbenchs/display_structures_size.$(OBJEXT) +microbenchs_display_structures_size_LDADD = $(LDADD) +microbenchs_local_pingpong_SOURCES = microbenchs/local_pingpong.c +microbenchs_local_pingpong_OBJECTS = \ + microbenchs/local_pingpong.$(OBJEXT) +microbenchs_local_pingpong_LDADD = $(LDADD) +microbenchs_matrix_as_vector_SOURCES = microbenchs/matrix_as_vector.c +microbenchs_matrix_as_vector_OBJECTS = \ + microbenchs/matrix_as_vector.$(OBJEXT) +microbenchs_matrix_as_vector_LDADD = $(LDADD) +microbenchs_parallel_dependent_homogeneous_tasks_data_SOURCES = \ + microbenchs/parallel_dependent_homogeneous_tasks_data.c +microbenchs_parallel_dependent_homogeneous_tasks_data_OBJECTS = microbenchs/parallel_dependent_homogeneous_tasks_data.$(OBJEXT) +microbenchs_parallel_dependent_homogeneous_tasks_data_LDADD = \ + $(LDADD) +microbenchs_parallel_independent_heterogeneous_tasks_SOURCES = \ + microbenchs/parallel_independent_heterogeneous_tasks.c +microbenchs_parallel_independent_heterogeneous_tasks_OBJECTS = microbenchs/parallel_independent_heterogeneous_tasks.$(OBJEXT) +microbenchs_parallel_independent_heterogeneous_tasks_LDADD = $(LDADD) +microbenchs_parallel_independent_heterogeneous_tasks_data_SOURCES = \ + microbenchs/parallel_independent_heterogeneous_tasks_data.c +microbenchs_parallel_independent_heterogeneous_tasks_data_OBJECTS = microbenchs/parallel_independent_heterogeneous_tasks_data.$(OBJEXT) +microbenchs_parallel_independent_heterogeneous_tasks_data_LDADD = \ + $(LDADD) +microbenchs_parallel_independent_homogeneous_tasks_SOURCES = \ + microbenchs/parallel_independent_homogeneous_tasks.c +microbenchs_parallel_independent_homogeneous_tasks_OBJECTS = \ + microbenchs/parallel_independent_homogeneous_tasks.$(OBJEXT) +microbenchs_parallel_independent_homogeneous_tasks_LDADD = $(LDADD) +microbenchs_parallel_independent_homogeneous_tasks_data_SOURCES = \ + microbenchs/parallel_independent_homogeneous_tasks_data.c +microbenchs_parallel_independent_homogeneous_tasks_data_OBJECTS = microbenchs/parallel_independent_homogeneous_tasks_data.$(OBJEXT) +microbenchs_parallel_independent_homogeneous_tasks_data_LDADD = \ + $(LDADD) +microbenchs_parallel_redux_heterogeneous_tasks_data_SOURCES = \ + microbenchs/parallel_redux_heterogeneous_tasks_data.c +microbenchs_parallel_redux_heterogeneous_tasks_data_OBJECTS = \ + microbenchs/parallel_redux_heterogeneous_tasks_data.$(OBJEXT) +microbenchs_parallel_redux_heterogeneous_tasks_data_LDADD = $(LDADD) +microbenchs_parallel_redux_homogeneous_tasks_data_SOURCES = \ + microbenchs/parallel_redux_homogeneous_tasks_data.c +microbenchs_parallel_redux_homogeneous_tasks_data_OBJECTS = \ + microbenchs/parallel_redux_homogeneous_tasks_data.$(OBJEXT) +microbenchs_parallel_redux_homogeneous_tasks_data_LDADD = $(LDADD) +microbenchs_prefetch_data_on_node_SOURCES = \ + microbenchs/prefetch_data_on_node.c +microbenchs_prefetch_data_on_node_OBJECTS = \ + microbenchs/prefetch_data_on_node.$(OBJEXT) +microbenchs_prefetch_data_on_node_LDADD = $(LDADD) +microbenchs_redundant_buffer_SOURCES = microbenchs/redundant_buffer.c +microbenchs_redundant_buffer_OBJECTS = \ + microbenchs/redundant_buffer.$(OBJEXT) +microbenchs_redundant_buffer_LDADD = $(LDADD) +microbenchs_sync_tasks_overhead_SOURCES = \ + microbenchs/sync_tasks_overhead.c +microbenchs_sync_tasks_overhead_OBJECTS = \ + microbenchs/sync_tasks_overhead.$(OBJEXT) +microbenchs_sync_tasks_overhead_LDADD = $(LDADD) +microbenchs_tasks_overhead_SOURCES = microbenchs/tasks_overhead.c +microbenchs_tasks_overhead_OBJECTS = \ + microbenchs/tasks_overhead.$(OBJEXT) +microbenchs_tasks_overhead_LDADD = $(LDADD) +microbenchs_tasks_size_overhead_SOURCES = \ + microbenchs/tasks_size_overhead.c +microbenchs_tasks_size_overhead_OBJECTS = \ + microbenchs/tasks_size_overhead.$(OBJEXT) +microbenchs_tasks_size_overhead_LDADD = $(LDADD) +openmp_api_01_SOURCES = openmp/api_01.c +openmp_api_01_OBJECTS = openmp/api_01.$(OBJEXT) +openmp_api_01_LDADD = $(LDADD) +openmp_array_slice_01_SOURCES = openmp/array_slice_01.c +openmp_array_slice_01_OBJECTS = openmp/array_slice_01.$(OBJEXT) +openmp_array_slice_01_LDADD = $(LDADD) +openmp_cuda_task_01_SOURCES = openmp/cuda_task_01.c +openmp_cuda_task_01_OBJECTS = openmp/cuda_task_01.$(OBJEXT) +openmp_cuda_task_01_LDADD = $(LDADD) +openmp_environment_SOURCES = openmp/environment.c +openmp_environment_OBJECTS = openmp/environment.$(OBJEXT) +openmp_environment_LDADD = $(LDADD) +openmp_init_exit_01_SOURCES = openmp/init_exit_01.c +openmp_init_exit_01_OBJECTS = openmp/init_exit_01.$(OBJEXT) +openmp_init_exit_01_LDADD = $(LDADD) +openmp_init_exit_02_SOURCES = openmp/init_exit_02.c +openmp_init_exit_02_OBJECTS = openmp/init_exit_02.$(OBJEXT) +openmp_init_exit_02_LDADD = $(LDADD) +openmp_parallel_01_SOURCES = openmp/parallel_01.c +openmp_parallel_01_OBJECTS = openmp/parallel_01.$(OBJEXT) +openmp_parallel_01_LDADD = $(LDADD) +openmp_parallel_02_SOURCES = openmp/parallel_02.c +openmp_parallel_02_OBJECTS = openmp/parallel_02.$(OBJEXT) +openmp_parallel_02_LDADD = $(LDADD) +openmp_parallel_03_SOURCES = openmp/parallel_03.c +openmp_parallel_03_OBJECTS = openmp/parallel_03.$(OBJEXT) +openmp_parallel_03_LDADD = $(LDADD) +openmp_parallel_barrier_01_SOURCES = openmp/parallel_barrier_01.c +openmp_parallel_barrier_01_OBJECTS = \ + openmp/parallel_barrier_01.$(OBJEXT) +openmp_parallel_barrier_01_LDADD = $(LDADD) +openmp_parallel_critical_01_SOURCES = openmp/parallel_critical_01.c +openmp_parallel_critical_01_OBJECTS = \ + openmp/parallel_critical_01.$(OBJEXT) +openmp_parallel_critical_01_LDADD = $(LDADD) +openmp_parallel_critical_inline_01_SOURCES = \ + openmp/parallel_critical_inline_01.c +openmp_parallel_critical_inline_01_OBJECTS = \ + openmp/parallel_critical_inline_01.$(OBJEXT) +openmp_parallel_critical_inline_01_LDADD = $(LDADD) +openmp_parallel_critical_named_01_SOURCES = \ + openmp/parallel_critical_named_01.c +openmp_parallel_critical_named_01_OBJECTS = \ + openmp/parallel_critical_named_01.$(OBJEXT) +openmp_parallel_critical_named_01_LDADD = $(LDADD) +openmp_parallel_critical_named_inline_01_SOURCES = \ + openmp/parallel_critical_named_inline_01.c +openmp_parallel_critical_named_inline_01_OBJECTS = \ + openmp/parallel_critical_named_inline_01.$(OBJEXT) +openmp_parallel_critical_named_inline_01_LDADD = $(LDADD) +openmp_parallel_for_01_SOURCES = openmp/parallel_for_01.c +openmp_parallel_for_01_OBJECTS = openmp/parallel_for_01.$(OBJEXT) +openmp_parallel_for_01_LDADD = $(LDADD) +openmp_parallel_for_02_SOURCES = openmp/parallel_for_02.c +openmp_parallel_for_02_OBJECTS = openmp/parallel_for_02.$(OBJEXT) +openmp_parallel_for_02_LDADD = $(LDADD) +openmp_parallel_for_ordered_01_SOURCES = \ + openmp/parallel_for_ordered_01.c +openmp_parallel_for_ordered_01_OBJECTS = \ + openmp/parallel_for_ordered_01.$(OBJEXT) +openmp_parallel_for_ordered_01_LDADD = $(LDADD) +openmp_parallel_master_01_SOURCES = openmp/parallel_master_01.c +openmp_parallel_master_01_OBJECTS = \ + openmp/parallel_master_01.$(OBJEXT) +openmp_parallel_master_01_LDADD = $(LDADD) +openmp_parallel_master_inline_01_SOURCES = \ + openmp/parallel_master_inline_01.c +openmp_parallel_master_inline_01_OBJECTS = \ + openmp/parallel_master_inline_01.$(OBJEXT) +openmp_parallel_master_inline_01_LDADD = $(LDADD) +openmp_parallel_nested_lock_01_SOURCES = \ + openmp/parallel_nested_lock_01.c +openmp_parallel_nested_lock_01_OBJECTS = \ + openmp/parallel_nested_lock_01.$(OBJEXT) +openmp_parallel_nested_lock_01_LDADD = $(LDADD) +openmp_parallel_sections_01_SOURCES = openmp/parallel_sections_01.c +openmp_parallel_sections_01_OBJECTS = \ + openmp/parallel_sections_01.$(OBJEXT) +openmp_parallel_sections_01_LDADD = $(LDADD) +openmp_parallel_sections_combined_01_SOURCES = \ + openmp/parallel_sections_combined_01.c +openmp_parallel_sections_combined_01_OBJECTS = \ + openmp/parallel_sections_combined_01.$(OBJEXT) +openmp_parallel_sections_combined_01_LDADD = $(LDADD) +openmp_parallel_simple_lock_01_SOURCES = \ + openmp/parallel_simple_lock_01.c +openmp_parallel_simple_lock_01_OBJECTS = \ + openmp/parallel_simple_lock_01.$(OBJEXT) +openmp_parallel_simple_lock_01_LDADD = $(LDADD) +openmp_parallel_single_copyprivate_01_SOURCES = \ + openmp/parallel_single_copyprivate_01.c +openmp_parallel_single_copyprivate_01_OBJECTS = \ + openmp/parallel_single_copyprivate_01.$(OBJEXT) +openmp_parallel_single_copyprivate_01_LDADD = $(LDADD) +openmp_parallel_single_copyprivate_inline_01_SOURCES = \ + openmp/parallel_single_copyprivate_inline_01.c +openmp_parallel_single_copyprivate_inline_01_OBJECTS = \ + openmp/parallel_single_copyprivate_inline_01.$(OBJEXT) +openmp_parallel_single_copyprivate_inline_01_LDADD = $(LDADD) +openmp_parallel_single_inline_01_SOURCES = \ + openmp/parallel_single_inline_01.c +openmp_parallel_single_inline_01_OBJECTS = \ + openmp/parallel_single_inline_01.$(OBJEXT) +openmp_parallel_single_inline_01_LDADD = $(LDADD) +openmp_parallel_single_nowait_01_SOURCES = \ + openmp/parallel_single_nowait_01.c +openmp_parallel_single_nowait_01_OBJECTS = \ + openmp/parallel_single_nowait_01.$(OBJEXT) +openmp_parallel_single_nowait_01_LDADD = $(LDADD) +openmp_parallel_single_wait_01_SOURCES = \ + openmp/parallel_single_wait_01.c +openmp_parallel_single_wait_01_OBJECTS = \ + openmp/parallel_single_wait_01.$(OBJEXT) +openmp_parallel_single_wait_01_LDADD = $(LDADD) +openmp_task_01_SOURCES = openmp/task_01.c +openmp_task_01_OBJECTS = openmp/task_01.$(OBJEXT) +openmp_task_01_LDADD = $(LDADD) +openmp_task_02_SOURCES = openmp/task_02.c +openmp_task_02_OBJECTS = openmp/task_02.$(OBJEXT) +openmp_task_02_LDADD = $(LDADD) +openmp_task_03_SOURCES = openmp/task_03.c +openmp_task_03_OBJECTS = openmp/task_03.$(OBJEXT) +openmp_task_03_LDADD = $(LDADD) +openmp_taskgroup_01_SOURCES = openmp/taskgroup_01.c +openmp_taskgroup_01_OBJECTS = openmp/taskgroup_01.$(OBJEXT) +openmp_taskgroup_01_LDADD = $(LDADD) +openmp_taskgroup_02_SOURCES = openmp/taskgroup_02.c +openmp_taskgroup_02_OBJECTS = openmp/taskgroup_02.$(OBJEXT) +openmp_taskgroup_02_LDADD = $(LDADD) +openmp_taskloop_SOURCES = openmp/taskloop.c +openmp_taskloop_OBJECTS = openmp/taskloop.$(OBJEXT) +openmp_taskloop_LDADD = $(LDADD) +openmp_taskwait_01_SOURCES = openmp/taskwait_01.c +openmp_taskwait_01_OBJECTS = openmp/taskwait_01.$(OBJEXT) +openmp_taskwait_01_LDADD = $(LDADD) +am__overlap_gpu_concurrency_SOURCES_DIST = overlap/gpu_concurrency.c \ + overlap/long_kernel.cu +@STARPU_USE_CUDA_TRUE@am__objects_30 = overlap/long_kernel.$(OBJEXT) +am_overlap_gpu_concurrency_OBJECTS = \ + overlap/gpu_concurrency.$(OBJEXT) $(am__objects_30) +overlap_gpu_concurrency_OBJECTS = \ + $(am_overlap_gpu_concurrency_OBJECTS) +overlap_gpu_concurrency_LDADD = $(LDADD) +overlap_overlap_SOURCES = overlap/overlap.c +overlap_overlap_OBJECTS = overlap/overlap.$(OBJEXT) +overlap_overlap_LDADD = $(LDADD) +parallel_tasks_combined_worker_assign_workerid_SOURCES = \ + parallel_tasks/combined_worker_assign_workerid.c +parallel_tasks_combined_worker_assign_workerid_OBJECTS = \ + parallel_tasks/combined_worker_assign_workerid.$(OBJEXT) +parallel_tasks_combined_worker_assign_workerid_LDADD = $(LDADD) +parallel_tasks_cuda_only_SOURCES = parallel_tasks/cuda_only.c +parallel_tasks_cuda_only_OBJECTS = parallel_tasks/cuda_only.$(OBJEXT) +parallel_tasks_cuda_only_LDADD = $(LDADD) +parallel_tasks_explicit_combined_worker_SOURCES = \ + parallel_tasks/explicit_combined_worker.c +parallel_tasks_explicit_combined_worker_OBJECTS = \ + parallel_tasks/explicit_combined_worker.$(OBJEXT) +parallel_tasks_explicit_combined_worker_LDADD = $(LDADD) +parallel_tasks_parallel_kernels_SOURCES = \ + parallel_tasks/parallel_kernels.c +parallel_tasks_parallel_kernels_OBJECTS = \ + parallel_tasks/parallel_kernels.$(OBJEXT) +parallel_tasks_parallel_kernels_LDADD = $(LDADD) +parallel_tasks_parallel_kernels_spmd_SOURCES = \ + parallel_tasks/parallel_kernels_spmd.c +parallel_tasks_parallel_kernels_spmd_OBJECTS = \ + parallel_tasks/parallel_kernels_spmd.$(OBJEXT) +parallel_tasks_parallel_kernels_spmd_LDADD = $(LDADD) +parallel_tasks_parallel_kernels_trivial_SOURCES = \ + parallel_tasks/parallel_kernels_trivial.c +parallel_tasks_parallel_kernels_trivial_OBJECTS = \ + parallel_tasks/parallel_kernels_trivial.$(OBJEXT) +parallel_tasks_parallel_kernels_trivial_LDADD = $(LDADD) +parallel_tasks_spmd_peager_SOURCES = parallel_tasks/spmd_peager.c +parallel_tasks_spmd_peager_OBJECTS = \ + parallel_tasks/spmd_peager.$(OBJEXT) +parallel_tasks_spmd_peager_LDADD = $(LDADD) +parallel_tasks_swap_SOURCES = parallel_tasks/swap.c +parallel_tasks_swap_OBJECTS = parallel_tasks/swap.$(OBJEXT) +parallel_tasks_swap_LDADD = $(LDADD) +perfmodels_feed_SOURCES = perfmodels/feed.c +perfmodels_feed_OBJECTS = perfmodels/feed.$(OBJEXT) +perfmodels_feed_LDADD = $(LDADD) +perfmodels_memory_SOURCES = perfmodels/memory.c +perfmodels_memory_OBJECTS = perfmodels/memory.$(OBJEXT) +perfmodels_memory_LDADD = $(LDADD) +am__perfmodels_non_linear_regression_based_SOURCES_DIST = \ + perfmodels/non_linear_regression_based.c \ + perfmodels/opencl_memset.c +@STARPU_USE_OPENCL_TRUE@am__objects_31 = \ +@STARPU_USE_OPENCL_TRUE@ perfmodels/opencl_memset.$(OBJEXT) +am_perfmodels_non_linear_regression_based_OBJECTS = \ + perfmodels/non_linear_regression_based.$(OBJEXT) \ + $(am__objects_31) +perfmodels_non_linear_regression_based_OBJECTS = \ + $(am_perfmodels_non_linear_regression_based_OBJECTS) +perfmodels_non_linear_regression_based_LDADD = $(LDADD) +perfmodels_path_SOURCES = perfmodels/path.c +perfmodels_path_OBJECTS = perfmodels/path.$(OBJEXT) +perfmodels_path_LDADD = $(LDADD) +perfmodels_regression_based_check_SOURCES = \ + perfmodels/regression_based_check.c +perfmodels_regression_based_check_OBJECTS = \ + perfmodels/regression_based_check.$(OBJEXT) +perfmodels_regression_based_check_LDADD = $(LDADD) +perfmodels_regression_based_energy_SOURCES = \ + perfmodels/regression_based_energy.c +perfmodels_regression_based_energy_OBJECTS = \ + perfmodels/regression_based_energy.$(OBJEXT) +perfmodels_regression_based_energy_LDADD = $(LDADD) +am__perfmodels_regression_based_gpu_SOURCES_DIST = \ + perfmodels/regression_based_gpu.c perfmodels/opencl_memset.c +am_perfmodels_regression_based_gpu_OBJECTS = \ + perfmodels/regression_based_gpu.$(OBJEXT) $(am__objects_31) +perfmodels_regression_based_gpu_OBJECTS = \ + $(am_perfmodels_regression_based_gpu_OBJECTS) +perfmodels_regression_based_gpu_LDADD = $(LDADD) +am__perfmodels_regression_based_memset_SOURCES_DIST = \ + perfmodels/regression_based_memset.c \ + perfmodels/opencl_memset.c +am_perfmodels_regression_based_memset_OBJECTS = \ + perfmodels/regression_based_memset.$(OBJEXT) $(am__objects_31) +perfmodels_regression_based_memset_OBJECTS = \ + $(am_perfmodels_regression_based_memset_OBJECTS) +perfmodels_regression_based_memset_LDADD = $(LDADD) +perfmodels_regression_based_multiimpl_SOURCES = \ + perfmodels/regression_based_multiimpl.c +perfmodels_regression_based_multiimpl_OBJECTS = \ + perfmodels/regression_based_multiimpl.$(OBJEXT) +perfmodels_regression_based_multiimpl_LDADD = $(LDADD) +perfmodels_user_base_SOURCES = perfmodels/user_base.c +perfmodels_user_base_OBJECTS = perfmodels/user_base.$(OBJEXT) +perfmodels_user_base_LDADD = $(LDADD) +perfmodels_valid_model_SOURCES = perfmodels/valid_model.c +perfmodels_valid_model_OBJECTS = perfmodels/valid_model.$(OBJEXT) +perfmodels_valid_model_LDADD = $(LDADD) +perfmodels_value_nan_SOURCES = perfmodels/value_nan.c +perfmodels_value_nan_OBJECTS = perfmodels/value_nan.$(OBJEXT) +perfmodels_value_nan_LDADD = $(LDADD) +sched_ctx_sched_ctx_hierarchy_SOURCES = \ + sched_ctx/sched_ctx_hierarchy.c +sched_ctx_sched_ctx_hierarchy_OBJECTS = \ + sched_ctx/sched_ctx_hierarchy.$(OBJEXT) +sched_ctx_sched_ctx_hierarchy_LDADD = $(LDADD) +sched_ctx_sched_ctx_list_SOURCES = sched_ctx/sched_ctx_list.c +sched_ctx_sched_ctx_list_OBJECTS = sched_ctx/sched_ctx_list.$(OBJEXT) +sched_ctx_sched_ctx_list_LDADD = $(LDADD) +sched_ctx_sched_ctx_policy_data_SOURCES = \ + sched_ctx/sched_ctx_policy_data.c +sched_ctx_sched_ctx_policy_data_OBJECTS = \ + sched_ctx/sched_ctx_policy_data.$(OBJEXT) +sched_ctx_sched_ctx_policy_data_LDADD = $(LDADD) +sched_policies_data_locality_SOURCES = sched_policies/data_locality.c +sched_policies_data_locality_OBJECTS = \ + sched_policies/data_locality.$(OBJEXT) +sched_policies_data_locality_LDADD = $(LDADD) +sched_policies_execute_all_tasks_SOURCES = \ + sched_policies/execute_all_tasks.c +sched_policies_execute_all_tasks_OBJECTS = \ + sched_policies/execute_all_tasks-execute_all_tasks.$(OBJEXT) +sched_policies_execute_all_tasks_LDADD = $(LDADD) +sched_policies_execute_all_tasks_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ + $(sched_policies_execute_all_tasks_CFLAGS) $(CFLAGS) \ + $(sched_policies_execute_all_tasks_LDFLAGS) $(LDFLAGS) -o $@ +sched_policies_prio_SOURCES = sched_policies/prio.c +sched_policies_prio_OBJECTS = sched_policies/prio.$(OBJEXT) +sched_policies_prio_LDADD = $(LDADD) +sched_policies_simple_cpu_gpu_sched_SOURCES = \ + sched_policies/simple_cpu_gpu_sched.c +sched_policies_simple_cpu_gpu_sched_OBJECTS = \ + sched_policies/simple_cpu_gpu_sched.$(OBJEXT) +sched_policies_simple_cpu_gpu_sched_LDADD = $(LDADD) +sched_policies_simple_deps_SOURCES = sched_policies/simple_deps.c +sched_policies_simple_deps_OBJECTS = \ + sched_policies/simple_deps.$(OBJEXT) +sched_policies_simple_deps_LDADD = $(LDADD) +sched_policies_workerids_SOURCES = sched_policies/workerids.c +sched_policies_workerids_OBJECTS = sched_policies/workerids.$(OBJEXT) +sched_policies_workerids_LDADD = $(LDADD) +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +SCRIPTS = $(examplebin_SCRIPTS) +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)/src/common -I$(top_builddir)/include -I$(top_builddir)/starpurm/include +depcomp = $(SHELL) $(top_srcdir)/build-aux/depcomp +am__maybe_remake_depfiles = depfiles +am__depfiles_remade = ./$(DEPDIR)/loader-loader.Po \ + datawizard/$(DEPDIR)/acquire_cb.Po \ + datawizard/$(DEPDIR)/acquire_cb_insert.Po \ + datawizard/$(DEPDIR)/acquire_release.Po \ + datawizard/$(DEPDIR)/acquire_release2.Po \ + datawizard/$(DEPDIR)/acquire_release_to.Po \ + datawizard/$(DEPDIR)/acquire_try.Po \ + datawizard/$(DEPDIR)/allocate.Po \ + datawizard/$(DEPDIR)/allocate_many_numa_nodes.Po \ + datawizard/$(DEPDIR)/bcsr.Po datawizard/$(DEPDIR)/cache.Po \ + datawizard/$(DEPDIR)/commute.Po \ + datawizard/$(DEPDIR)/commute2.Po datawizard/$(DEPDIR)/copy.Po \ + datawizard/$(DEPDIR)/critical_section_with_void_interface.Po \ + datawizard/$(DEPDIR)/data_deinitialize.Po \ + datawizard/$(DEPDIR)/data_implicit_deps.Po \ + datawizard/$(DEPDIR)/data_invalidation.Po \ + datawizard/$(DEPDIR)/data_register-data_register.Po \ + datawizard/$(DEPDIR)/deinitialize_pending_requests.Po \ + datawizard/$(DEPDIR)/deps.Po \ + datawizard/$(DEPDIR)/dining_philosophers.Po \ + datawizard/$(DEPDIR)/double_parameter.Po \ + datawizard/$(DEPDIR)/dsm_stress.Po \ + datawizard/$(DEPDIR)/gpu_ptr_register.Po \ + datawizard/$(DEPDIR)/gpu_register.Po \ + datawizard/$(DEPDIR)/handle_to_pointer.Po \ + datawizard/$(DEPDIR)/in_place_partition.Po \ + datawizard/$(DEPDIR)/increment_init.Po \ + datawizard/$(DEPDIR)/increment_redux.Po \ + datawizard/$(DEPDIR)/increment_redux_lazy.Po \ + datawizard/$(DEPDIR)/increment_redux_partition.Po \ + datawizard/$(DEPDIR)/increment_redux_v2.Po \ + datawizard/$(DEPDIR)/increment_redux_with_args.Po \ + datawizard/$(DEPDIR)/invalidate_pending_requests.Po \ + datawizard/$(DEPDIR)/lazy_allocation.Po \ + datawizard/$(DEPDIR)/locality.Po \ + datawizard/$(DEPDIR)/manual_reduction.Po \ + datawizard/$(DEPDIR)/mpi_like.Po \ + datawizard/$(DEPDIR)/mpi_like_async.Po \ + datawizard/$(DEPDIR)/no_unregister.Po \ + datawizard/$(DEPDIR)/noreclaim.Po \ + datawizard/$(DEPDIR)/nowhere.Po \ + datawizard/$(DEPDIR)/numa_overflow.Po \ + datawizard/$(DEPDIR)/partition_dep.Po \ + datawizard/$(DEPDIR)/partition_init.Po \ + datawizard/$(DEPDIR)/partition_lazy.Po \ + datawizard/$(DEPDIR)/partition_wontuse.Po \ + datawizard/$(DEPDIR)/partitioned_acquire.Po \ + datawizard/$(DEPDIR)/partitioned_initialization.Po \ + datawizard/$(DEPDIR)/readers_and_writers.Po \ + datawizard/$(DEPDIR)/readonly.Po \ + datawizard/$(DEPDIR)/reclaim.Po \ + datawizard/$(DEPDIR)/redux_acquire.Po \ + datawizard/$(DEPDIR)/scal.Po datawizard/$(DEPDIR)/scratch.Po \ + datawizard/$(DEPDIR)/scratch_opencl.Po \ + datawizard/$(DEPDIR)/scratch_reuse.Po \ + datawizard/$(DEPDIR)/simgrid-locality.Po \ + datawizard/$(DEPDIR)/specific_node.Po \ + datawizard/$(DEPDIR)/specific_node_same.Po \ + datawizard/$(DEPDIR)/sync_and_notify_data.Po \ + datawizard/$(DEPDIR)/sync_and_notify_data_implicit.Po \ + datawizard/$(DEPDIR)/sync_and_notify_data_opencl.Po \ + datawizard/$(DEPDIR)/sync_with_data_with_mem.Po \ + datawizard/$(DEPDIR)/sync_with_data_with_mem_non_blocking.Po \ + datawizard/$(DEPDIR)/sync_with_data_with_mem_non_blocking_implicit.Po \ + datawizard/$(DEPDIR)/task_with_multiple_time_the_same_handle.Po \ + datawizard/$(DEPDIR)/temporary_partition.Po \ + datawizard/$(DEPDIR)/temporary_partition_implicit.Po \ + datawizard/$(DEPDIR)/temporary_partition_read.Po \ + datawizard/$(DEPDIR)/test_arbiter.Po \ + datawizard/$(DEPDIR)/unpartition.Po \ + datawizard/$(DEPDIR)/user_interaction_implicit.Po \ + datawizard/$(DEPDIR)/variable_parameters.Po \ + datawizard/$(DEPDIR)/variable_size.Po \ + datawizard/$(DEPDIR)/write_only_tmp_buffer.Po \ + datawizard/$(DEPDIR)/wt_broadcast.Po \ + datawizard/$(DEPDIR)/wt_host.Po \ + datawizard/interfaces/$(DEPDIR)/bcsr_bcsr_interface-test_interfaces.Po \ + datawizard/interfaces/$(DEPDIR)/block_block_interface-test_interfaces.Po \ + datawizard/interfaces/$(DEPDIR)/coo_coo_interface-test_interfaces.Po \ + datawizard/interfaces/$(DEPDIR)/copy_interfaces-copy_interfaces.Po \ + datawizard/interfaces/$(DEPDIR)/csr_csr_interface-test_interfaces.Po \ + datawizard/interfaces/$(DEPDIR)/matrix_matrix_interface-test_interfaces.Po \ + datawizard/interfaces/$(DEPDIR)/multiformat_multiformat_interface-test_interfaces.Po \ + datawizard/interfaces/$(DEPDIR)/ndim_ndim_interface-test_interfaces.Po \ + datawizard/interfaces/$(DEPDIR)/tensor_tensor_interface-test_interfaces.Po \ + datawizard/interfaces/$(DEPDIR)/variable_variable_interface-test_interfaces.Po \ + datawizard/interfaces/$(DEPDIR)/vector_vector_interface-test_interfaces.Po \ + datawizard/interfaces/$(DEPDIR)/void_void_interface-test_interfaces.Po \ + datawizard/interfaces/bcsr/$(DEPDIR)/bcsr_interface-bcsr_interface.Po \ + datawizard/interfaces/bcsr/$(DEPDIR)/bcsr_interface-bcsr_opencl.Po \ + datawizard/interfaces/block/$(DEPDIR)/block_interface-block_interface.Po \ + datawizard/interfaces/block/$(DEPDIR)/block_interface-block_opencl.Po \ + datawizard/interfaces/coo/$(DEPDIR)/coo_interface-coo_interface.Po \ + datawizard/interfaces/coo/$(DEPDIR)/coo_interface-coo_opencl.Po \ + datawizard/interfaces/csr/$(DEPDIR)/csr_interface-csr_interface.Po \ + datawizard/interfaces/csr/$(DEPDIR)/csr_interface-csr_opencl.Po \ + datawizard/interfaces/matrix/$(DEPDIR)/matrix_interface-matrix_interface.Po \ + datawizard/interfaces/matrix/$(DEPDIR)/matrix_interface-matrix_opencl.Po \ + datawizard/interfaces/multiformat/$(DEPDIR)/multiformat_interface-multiformat_conversion_codelets.Po \ + datawizard/interfaces/multiformat/$(DEPDIR)/multiformat_interface-multiformat_conversion_codelets_opencl.Po \ + datawizard/interfaces/multiformat/$(DEPDIR)/multiformat_interface-multiformat_interface.Po \ + datawizard/interfaces/multiformat/$(DEPDIR)/multiformat_interface-multiformat_opencl.Po \ + datawizard/interfaces/multiformat/advanced/$(DEPDIR)/generic.Po \ + datawizard/interfaces/multiformat/advanced/$(DEPDIR)/multiformat_cuda_opencl.Po \ + datawizard/interfaces/multiformat/advanced/$(DEPDIR)/multiformat_data_release.Po \ + datawizard/interfaces/multiformat/advanced/$(DEPDIR)/multiformat_handle_conversion.Po \ + datawizard/interfaces/multiformat/advanced/$(DEPDIR)/multiformat_worker.Po \ + datawizard/interfaces/multiformat/advanced/$(DEPDIR)/same_handle.Po \ + datawizard/interfaces/ndim/$(DEPDIR)/ndim_interface-ndim_interface.Po \ + datawizard/interfaces/ndim/$(DEPDIR)/ndim_interface-ndim_opencl.Po \ + datawizard/interfaces/tensor/$(DEPDIR)/tensor_interface-tensor_interface.Po \ + datawizard/interfaces/tensor/$(DEPDIR)/tensor_interface-tensor_opencl.Po \ + datawizard/interfaces/variable/$(DEPDIR)/variable_interface-variable_interface.Po \ + datawizard/interfaces/variable/$(DEPDIR)/variable_interface-variable_opencl.Po \ + datawizard/interfaces/vector/$(DEPDIR)/vector_interface-vector_interface.Po \ + datawizard/interfaces/vector/$(DEPDIR)/vector_interface-vector_opencl.Po \ + datawizard/interfaces/void/$(DEPDIR)/void_interface-void_interface.Po \ + disk/$(DEPDIR)/disk_compute.Po disk/$(DEPDIR)/disk_copy.Po \ + disk/$(DEPDIR)/disk_copy_to_disk.Po \ + disk/$(DEPDIR)/disk_copy_unpack.Po disk/$(DEPDIR)/disk_pack.Po \ + disk/$(DEPDIR)/mem_reclaim.Po \ + energy/$(DEPDIR)/energy_efficiency.Po \ + errorcheck/$(DEPDIR)/invalid_blocking_calls.Po \ + errorcheck/$(DEPDIR)/invalid_tasks.Po \ + errorcheck/$(DEPDIR)/starpu_init_noworker.Po \ + errorcheck/$(DEPDIR)/workers_cpuid.Po \ + fault-tolerance/$(DEPDIR)/retry.Po \ + helper/$(DEPDIR)/cublasLt_init.Po \ + helper/$(DEPDIR)/cublas_init.Po \ + helper/$(DEPDIR)/cusparse_init.Po \ + helper/$(DEPDIR)/execute_on_all.Po \ + helper/$(DEPDIR)/hipblas_init.Po \ + helper/$(DEPDIR)/pinned_memory.Po \ + helper/$(DEPDIR)/starpu_create_sync_task.Po \ + helper/$(DEPDIR)/starpu_data_cpy.Po \ + helper/$(DEPDIR)/starpu_data_dup_ro.Po main/$(DEPDIR)/bind.Po \ + main/$(DEPDIR)/callback.Po \ + main/$(DEPDIR)/codelet_null_callback.Po \ + main/$(DEPDIR)/const_codelet.Po main/$(DEPDIR)/deadlock.Po \ + main/$(DEPDIR)/declare_deps_after_submission.Po \ + main/$(DEPDIR)/declare_deps_after_submission_synchronous.Po \ + main/$(DEPDIR)/declare_deps_in_callback.Po \ + main/$(DEPDIR)/deploop.Po \ + main/$(DEPDIR)/deprecated_func-deprecated_func.Po \ + main/$(DEPDIR)/display_binding.Po main/$(DEPDIR)/empty_task.Po \ + main/$(DEPDIR)/empty_task_chain.Po \ + main/$(DEPDIR)/empty_task_sync_point.Po \ + main/$(DEPDIR)/empty_task_sync_point_tasks.Po \ + main/$(DEPDIR)/execute_on_a_specific_worker.Po \ + main/$(DEPDIR)/execute_schedule.Po \ + main/$(DEPDIR)/get_children_tasks.Po \ + main/$(DEPDIR)/get_current_task.Po \ + main/$(DEPDIR)/hwloc_cpuset.Po main/$(DEPDIR)/insert_task.Po \ + main/$(DEPDIR)/insert_task_array.Po \ + main/$(DEPDIR)/insert_task_dyn_handles.Po \ + main/$(DEPDIR)/insert_task_many.Po \ + main/$(DEPDIR)/insert_task_nullcodelet.Po \ + main/$(DEPDIR)/insert_task_pack.Po \ + main/$(DEPDIR)/insert_task_value.Po \ + main/$(DEPDIR)/insert_task_where.Po main/$(DEPDIR)/job.Po \ + main/$(DEPDIR)/mkdtemp.Po main/$(DEPDIR)/multithreaded.Po \ + main/$(DEPDIR)/multithreaded_init.Po main/$(DEPDIR)/pack.Po \ + main/$(DEPDIR)/pause_resume.Po main/$(DEPDIR)/regenerate.Po \ + main/$(DEPDIR)/regenerate_pipeline.Po \ + main/$(DEPDIR)/restart.Po main/$(DEPDIR)/starpu_init.Po \ + main/$(DEPDIR)/starpu_task_bundle.Po \ + main/$(DEPDIR)/starpu_task_wait.Po \ + main/$(DEPDIR)/starpu_task_wait_for_all.Po \ + main/$(DEPDIR)/starpu_worker_exists-starpu_worker_exists.Po \ + main/$(DEPDIR)/static_restartable.Po \ + main/$(DEPDIR)/static_restartable_tag.Po \ + main/$(DEPDIR)/static_restartable_using_initializer.Po \ + main/$(DEPDIR)/subgraph_repeat.Po \ + main/$(DEPDIR)/subgraph_repeat_regenerate.Po \ + main/$(DEPDIR)/subgraph_repeat_regenerate_tag.Po \ + main/$(DEPDIR)/subgraph_repeat_regenerate_tag_cycle.Po \ + main/$(DEPDIR)/subgraph_repeat_tag.Po main/$(DEPDIR)/submit.Po \ + main/$(DEPDIR)/tag_get_task.Po \ + main/$(DEPDIR)/tag_task_data_deps.Po \ + main/$(DEPDIR)/tag_wait_api.Po main/$(DEPDIR)/task_end_dep.Po \ + main/$(DEPDIR)/task_wait_api.Po \ + main/$(DEPDIR)/wait_all_regenerable_tasks.Po \ + main/driver_api/$(DEPDIR)/init_run_deinit.Po \ + main/driver_api/$(DEPDIR)/run_driver.Po \ + maxfpga/$(DEPDIR)/max_fpga_advanced_static.Po \ + maxfpga/$(DEPDIR)/max_fpga_basic_static.Po \ + maxfpga/$(DEPDIR)/max_fpga_dynamic.Po \ + maxfpga/$(DEPDIR)/max_fpga_mux.Po \ + microbenchs/$(DEPDIR)/async_tasks_overhead.Po \ + microbenchs/$(DEPDIR)/bandwidth.Po \ + microbenchs/$(DEPDIR)/display_structures_size.Po \ + microbenchs/$(DEPDIR)/local_pingpong.Po \ + microbenchs/$(DEPDIR)/matrix_as_vector.Po \ + microbenchs/$(DEPDIR)/parallel_dependent_homogeneous_tasks_data.Po \ + microbenchs/$(DEPDIR)/parallel_independent_heterogeneous_tasks.Po \ + microbenchs/$(DEPDIR)/parallel_independent_heterogeneous_tasks_data.Po \ + microbenchs/$(DEPDIR)/parallel_independent_homogeneous_tasks.Po \ + microbenchs/$(DEPDIR)/parallel_independent_homogeneous_tasks_data.Po \ + microbenchs/$(DEPDIR)/parallel_redux_heterogeneous_tasks_data.Po \ + microbenchs/$(DEPDIR)/parallel_redux_homogeneous_tasks_data.Po \ + microbenchs/$(DEPDIR)/prefetch_data_on_node.Po \ + microbenchs/$(DEPDIR)/redundant_buffer.Po \ + microbenchs/$(DEPDIR)/sync_tasks_overhead.Po \ + microbenchs/$(DEPDIR)/tasks_overhead.Po \ + microbenchs/$(DEPDIR)/tasks_size_overhead.Po \ + openmp/$(DEPDIR)/api_01.Po openmp/$(DEPDIR)/array_slice_01.Po \ + openmp/$(DEPDIR)/cuda_task_01.Po \ + openmp/$(DEPDIR)/environment.Po \ + openmp/$(DEPDIR)/init_exit_01.Po \ + openmp/$(DEPDIR)/init_exit_02.Po \ + openmp/$(DEPDIR)/parallel_01.Po \ + openmp/$(DEPDIR)/parallel_02.Po \ + openmp/$(DEPDIR)/parallel_03.Po \ + openmp/$(DEPDIR)/parallel_barrier_01.Po \ + openmp/$(DEPDIR)/parallel_critical_01.Po \ + openmp/$(DEPDIR)/parallel_critical_inline_01.Po \ + openmp/$(DEPDIR)/parallel_critical_named_01.Po \ + openmp/$(DEPDIR)/parallel_critical_named_inline_01.Po \ + openmp/$(DEPDIR)/parallel_for_01.Po \ + openmp/$(DEPDIR)/parallel_for_02.Po \ + openmp/$(DEPDIR)/parallel_for_ordered_01.Po \ + openmp/$(DEPDIR)/parallel_master_01.Po \ + openmp/$(DEPDIR)/parallel_master_inline_01.Po \ + openmp/$(DEPDIR)/parallel_nested_lock_01.Po \ + openmp/$(DEPDIR)/parallel_sections_01.Po \ + openmp/$(DEPDIR)/parallel_sections_combined_01.Po \ + openmp/$(DEPDIR)/parallel_simple_lock_01.Po \ + openmp/$(DEPDIR)/parallel_single_copyprivate_01.Po \ + openmp/$(DEPDIR)/parallel_single_copyprivate_inline_01.Po \ + openmp/$(DEPDIR)/parallel_single_inline_01.Po \ + openmp/$(DEPDIR)/parallel_single_nowait_01.Po \ + openmp/$(DEPDIR)/parallel_single_wait_01.Po \ + openmp/$(DEPDIR)/task_01.Po openmp/$(DEPDIR)/task_02.Po \ + openmp/$(DEPDIR)/task_03.Po openmp/$(DEPDIR)/taskgroup_01.Po \ + openmp/$(DEPDIR)/taskgroup_02.Po openmp/$(DEPDIR)/taskloop.Po \ + openmp/$(DEPDIR)/taskwait_01.Po \ + overlap/$(DEPDIR)/gpu_concurrency.Po \ + overlap/$(DEPDIR)/overlap.Po \ + parallel_tasks/$(DEPDIR)/combined_worker_assign_workerid.Po \ + parallel_tasks/$(DEPDIR)/cuda_only.Po \ + parallel_tasks/$(DEPDIR)/explicit_combined_worker.Po \ + parallel_tasks/$(DEPDIR)/parallel_kernels.Po \ + parallel_tasks/$(DEPDIR)/parallel_kernels_spmd.Po \ + parallel_tasks/$(DEPDIR)/parallel_kernels_trivial.Po \ + parallel_tasks/$(DEPDIR)/spmd_peager.Po \ + parallel_tasks/$(DEPDIR)/swap.Po perfmodels/$(DEPDIR)/feed.Po \ + perfmodels/$(DEPDIR)/memory.Po \ + perfmodels/$(DEPDIR)/non_linear_regression_based.Po \ + perfmodels/$(DEPDIR)/opencl_memset.Po \ + perfmodels/$(DEPDIR)/path.Po \ + perfmodels/$(DEPDIR)/regression_based_check.Po \ + perfmodels/$(DEPDIR)/regression_based_energy.Po \ + perfmodels/$(DEPDIR)/regression_based_gpu.Po \ + perfmodels/$(DEPDIR)/regression_based_memset.Po \ + perfmodels/$(DEPDIR)/regression_based_multiimpl.Po \ + perfmodels/$(DEPDIR)/user_base.Po \ + perfmodels/$(DEPDIR)/valid_model.Po \ + perfmodels/$(DEPDIR)/value_nan.Po \ + sched_ctx/$(DEPDIR)/sched_ctx_hierarchy.Po \ + sched_ctx/$(DEPDIR)/sched_ctx_list.Po \ + sched_ctx/$(DEPDIR)/sched_ctx_policy_data.Po \ + sched_policies/$(DEPDIR)/data_locality.Po \ + sched_policies/$(DEPDIR)/execute_all_tasks-execute_all_tasks.Po \ + sched_policies/$(DEPDIR)/prio.Po \ + sched_policies/$(DEPDIR)/simple_cpu_gpu_sched.Po \ + sched_policies/$(DEPDIR)/simple_deps.Po \ + sched_policies/$(DEPDIR)/workerids.Po \ + variable/$(DEPDIR)/increment.Po \ + variable/$(DEPDIR)/increment_opencl.Po +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +CXXCOMPILE = $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) +LTCXXCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CXXFLAGS) $(CXXFLAGS) +AM_V_CXX = $(am__v_CXX_@AM_V@) +am__v_CXX_ = $(am__v_CXX_@AM_DEFAULT_V@) +am__v_CXX_0 = @echo " CXX " $@; +am__v_CXX_1 = +CXXLD = $(CXX) +CXXLINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CXXLD) $(AM_CXXFLAGS) \ + $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CXXLD = $(am__v_CXXLD_@AM_V@) +am__v_CXXLD_ = $(am__v_CXXLD_@AM_DEFAULT_V@) +am__v_CXXLD_0 = @echo " CXXLD " $@; +am__v_CXXLD_1 = +FCCOMPILE = $(FC) $(AM_FCFLAGS) $(FCFLAGS) +LTFCCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(FC) $(AM_FCFLAGS) $(FCFLAGS) +AM_V_FC = $(am__v_FC_@AM_V@) +am__v_FC_ = $(am__v_FC_@AM_DEFAULT_V@) +am__v_FC_0 = @echo " FC " $@; +am__v_FC_1 = +FCLD = $(FC) +FCLINK = $(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(FCLD) $(AM_FCFLAGS) $(FCFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_FCLD = $(am__v_FCLD_@AM_V@) +am__v_FCLD_ = $(am__v_FCLD_@AM_DEFAULT_V@) +am__v_FCLD_0 = @echo " FCLD " $@; +am__v_FCLD_1 = +SOURCES = datawizard/acquire_cb.c datawizard/acquire_cb_insert.c \ + $(datawizard_acquire_release_SOURCES) \ + $(datawizard_acquire_release2_SOURCES) \ + $(datawizard_acquire_release_to_SOURCES) \ + datawizard/acquire_try.c datawizard/allocate.c \ + datawizard/allocate_many_numa_nodes.c datawizard/bcsr.c \ + datawizard/cache.c datawizard/commute.c datawizard/commute2.c \ + datawizard/copy.c \ + datawizard/critical_section_with_void_interface.c \ + datawizard/data_deinitialize.c datawizard/data_implicit_deps.c \ + datawizard/data_invalidation.c datawizard/data_register.c \ + datawizard/deinitialize_pending_requests.c datawizard/deps.c \ + datawizard/dining_philosophers.c datawizard/double_parameter.c \ + datawizard/dsm_stress.c $(datawizard_gpu_ptr_register_SOURCES) \ + $(datawizard_gpu_register_SOURCES) \ + datawizard/handle_to_pointer.c \ + $(datawizard_in_place_partition_SOURCES) \ + $(datawizard_increment_init_SOURCES) \ + $(datawizard_increment_redux_SOURCES) \ + $(datawizard_increment_redux_lazy_SOURCES) \ + $(datawizard_increment_redux_partition_SOURCES) \ + $(datawizard_increment_redux_v2_SOURCES) \ + $(datawizard_increment_redux_with_args_SOURCES) \ + $(datawizard_interfaces_bcsr_bcsr_interface_SOURCES) \ + $(datawizard_interfaces_block_block_interface_SOURCES) \ + $(datawizard_interfaces_coo_coo_interface_SOURCES) \ + datawizard/interfaces/copy_interfaces.c \ + $(datawizard_interfaces_csr_csr_interface_SOURCES) \ + $(datawizard_interfaces_matrix_matrix_interface_SOURCES) \ + $(datawizard_interfaces_multiformat_advanced_multiformat_cuda_opencl_SOURCES) \ + $(datawizard_interfaces_multiformat_advanced_multiformat_data_release_SOURCES) \ + $(datawizard_interfaces_multiformat_advanced_multiformat_handle_conversion_SOURCES) \ + $(datawizard_interfaces_multiformat_advanced_multiformat_worker_SOURCES) \ + $(datawizard_interfaces_multiformat_advanced_same_handle_SOURCES) \ + $(datawizard_interfaces_multiformat_multiformat_interface_SOURCES) \ + $(datawizard_interfaces_ndim_ndim_interface_SOURCES) \ + $(datawizard_interfaces_tensor_tensor_interface_SOURCES) \ + $(datawizard_interfaces_variable_variable_interface_SOURCES) \ + $(datawizard_interfaces_vector_vector_interface_SOURCES) \ + $(datawizard_interfaces_void_void_interface_SOURCES) \ + datawizard/invalidate_pending_requests.c \ + datawizard/lazy_allocation.c datawizard/locality.c \ + datawizard/manual_reduction.c $(datawizard_mpi_like_SOURCES) \ + $(datawizard_mpi_like_async_SOURCES) \ + datawizard/no_unregister.c datawizard/noreclaim.c \ + datawizard/nowhere.c datawizard/numa_overflow.c \ + $(datawizard_partition_dep_SOURCES) \ + datawizard/partition_init.c \ + $(datawizard_partition_lazy_SOURCES) \ + datawizard/partition_wontuse.c \ + datawizard/partitioned_acquire.c \ + datawizard/partitioned_initialization.c \ + datawizard/readers_and_writers.c datawizard/readonly.c \ + datawizard/reclaim.c datawizard/redux_acquire.c \ + $(datawizard_scratch_SOURCES) datawizard/scratch_reuse.c \ + datawizard/simgrid-locality.c \ + $(datawizard_specific_node_SOURCES) \ + datawizard/specific_node_same.c \ + $(datawizard_sync_and_notify_data_SOURCES) \ + $(datawizard_sync_and_notify_data_implicit_SOURCES) \ + datawizard/sync_with_data_with_mem.c \ + datawizard/sync_with_data_with_mem_non_blocking.c \ + datawizard/sync_with_data_with_mem_non_blocking_implicit.c \ + datawizard/task_with_multiple_time_the_same_handle.c \ + datawizard/temporary_partition.c \ + datawizard/temporary_partition_implicit.c \ + datawizard/temporary_partition_read.c \ + $(datawizard_test_arbiter_SOURCES) datawizard/unpartition.c \ + datawizard/user_interaction_implicit.c \ + datawizard/variable_parameters.c datawizard/variable_size.c \ + datawizard/write_only_tmp_buffer.c \ + $(datawizard_wt_broadcast_SOURCES) \ + $(datawizard_wt_host_SOURCES) disk/disk_compute.c \ + disk/disk_copy.c disk/disk_copy_to_disk.c \ + disk/disk_copy_unpack.c disk/disk_pack.c disk/mem_reclaim.c \ + energy/energy_efficiency.c errorcheck/invalid_blocking_calls.c \ + errorcheck/invalid_tasks.c errorcheck/starpu_init_noworker.c \ + errorcheck/workers_cpuid.c fault-tolerance/retry.c \ + $(fortran90_init_01_SOURCES) helper/cublasLt_init.c \ + helper/cublas_init.c helper/cusparse_init.c \ + helper/execute_on_all.c helper/hipblas_init.c \ + helper/pinned_memory.c helper/starpu_create_sync_task.c \ + helper/starpu_data_cpy.c $(helper_starpu_data_dup_ro_SOURCES) \ + loader.c main/bind.c main/callback.c \ + main/codelet_null_callback.c main/const_codelet.c \ + main/deadlock.c main/declare_deps_after_submission.c \ + main/declare_deps_after_submission_synchronous.c \ + main/declare_deps_in_callback.c main/deploop.c \ + main/deprecated_func.c main/display_binding.c \ + main/driver_api/init_run_deinit.c main/driver_api/run_driver.c \ + main/empty_task.c main/empty_task_chain.c \ + main/empty_task_sync_point.c \ + main/empty_task_sync_point_tasks.c \ + main/execute_on_a_specific_worker.c main/execute_schedule.c \ + main/get_children_tasks.c main/get_current_task.c \ + main/hwloc_cpuset.c main/insert_task.c \ + main/insert_task_array.c main/insert_task_dyn_handles.c \ + main/insert_task_many.c main/insert_task_nullcodelet.c \ + main/insert_task_pack.c main/insert_task_value.c \ + $(main_insert_task_where_SOURCES) main/job.c main/mkdtemp.c \ + main/multithreaded.c main/multithreaded_init.c main/pack.c \ + main/pause_resume.c main/regenerate.c \ + main/regenerate_pipeline.c main/restart.c main/starpu_init.c \ + main/starpu_task_bundle.c main/starpu_task_wait.c \ + main/starpu_task_wait_for_all.c main/starpu_worker_exists.c \ + main/static_restartable.c main/static_restartable_tag.c \ + main/static_restartable_using_initializer.c \ + $(main_subgraph_repeat_SOURCES) \ + $(main_subgraph_repeat_regenerate_SOURCES) \ + $(main_subgraph_repeat_regenerate_tag_SOURCES) \ + $(main_subgraph_repeat_regenerate_tag_cycle_SOURCES) \ + $(main_subgraph_repeat_tag_SOURCES) main/submit.c \ + main/tag_get_task.c main/tag_task_data_deps.c \ + main/tag_wait_api.c main/task_end_dep.c main/task_wait_api.c \ + main/wait_all_regenerable_tasks.c \ + maxfpga/max_fpga_advanced_static.c \ + maxfpga/max_fpga_basic_static.c maxfpga/max_fpga_dynamic.c \ + maxfpga/max_fpga_mux.c microbenchs/async_tasks_overhead.c \ + microbenchs/bandwidth.c microbenchs/display_structures_size.c \ + microbenchs/local_pingpong.c microbenchs/matrix_as_vector.c \ + microbenchs/parallel_dependent_homogeneous_tasks_data.c \ + microbenchs/parallel_independent_heterogeneous_tasks.c \ + microbenchs/parallel_independent_heterogeneous_tasks_data.c \ + microbenchs/parallel_independent_homogeneous_tasks.c \ + microbenchs/parallel_independent_homogeneous_tasks_data.c \ + microbenchs/parallel_redux_heterogeneous_tasks_data.c \ + microbenchs/parallel_redux_homogeneous_tasks_data.c \ + microbenchs/prefetch_data_on_node.c \ + microbenchs/redundant_buffer.c \ + microbenchs/sync_tasks_overhead.c microbenchs/tasks_overhead.c \ + microbenchs/tasks_size_overhead.c openmp/api_01.c \ + openmp/array_slice_01.c openmp/cuda_task_01.c \ + openmp/environment.c openmp/init_exit_01.c \ + openmp/init_exit_02.c openmp/parallel_01.c \ + openmp/parallel_02.c openmp/parallel_03.c \ + openmp/parallel_barrier_01.c openmp/parallel_critical_01.c \ + openmp/parallel_critical_inline_01.c \ + openmp/parallel_critical_named_01.c \ + openmp/parallel_critical_named_inline_01.c \ + openmp/parallel_for_01.c openmp/parallel_for_02.c \ + openmp/parallel_for_ordered_01.c openmp/parallel_master_01.c \ + openmp/parallel_master_inline_01.c \ + openmp/parallel_nested_lock_01.c openmp/parallel_sections_01.c \ + openmp/parallel_sections_combined_01.c \ + openmp/parallel_simple_lock_01.c \ + openmp/parallel_single_copyprivate_01.c \ + openmp/parallel_single_copyprivate_inline_01.c \ + openmp/parallel_single_inline_01.c \ + openmp/parallel_single_nowait_01.c \ + openmp/parallel_single_wait_01.c openmp/task_01.c \ + openmp/task_02.c openmp/task_03.c openmp/taskgroup_01.c \ + openmp/taskgroup_02.c openmp/taskloop.c openmp/taskwait_01.c \ + $(overlap_gpu_concurrency_SOURCES) overlap/overlap.c \ + parallel_tasks/combined_worker_assign_workerid.c \ + parallel_tasks/cuda_only.c \ + parallel_tasks/explicit_combined_worker.c \ + parallel_tasks/parallel_kernels.c \ + parallel_tasks/parallel_kernels_spmd.c \ + parallel_tasks/parallel_kernels_trivial.c \ + parallel_tasks/spmd_peager.c parallel_tasks/swap.c \ + perfmodels/feed.c perfmodels/memory.c \ + $(perfmodels_non_linear_regression_based_SOURCES) \ + perfmodels/path.c perfmodels/regression_based_check.c \ + perfmodels/regression_based_energy.c \ + $(perfmodels_regression_based_gpu_SOURCES) \ + $(perfmodels_regression_based_memset_SOURCES) \ + perfmodels/regression_based_multiimpl.c perfmodels/user_base.c \ + perfmodels/valid_model.c perfmodels/value_nan.c \ + sched_ctx/sched_ctx_hierarchy.c sched_ctx/sched_ctx_list.c \ + sched_ctx/sched_ctx_policy_data.c \ + sched_policies/data_locality.c \ + sched_policies/execute_all_tasks.c sched_policies/prio.c \ + sched_policies/simple_cpu_gpu_sched.c \ + sched_policies/simple_deps.c sched_policies/workerids.c +DIST_SOURCES = datawizard/acquire_cb.c datawizard/acquire_cb_insert.c \ + $(am__datawizard_acquire_release_SOURCES_DIST) \ + $(am__datawizard_acquire_release2_SOURCES_DIST) \ + $(am__datawizard_acquire_release_to_SOURCES_DIST) \ + datawizard/acquire_try.c datawizard/allocate.c \ + datawizard/allocate_many_numa_nodes.c datawizard/bcsr.c \ + datawizard/cache.c datawizard/commute.c datawizard/commute2.c \ + datawizard/copy.c \ + datawizard/critical_section_with_void_interface.c \ + datawizard/data_deinitialize.c datawizard/data_implicit_deps.c \ + datawizard/data_invalidation.c datawizard/data_register.c \ + datawizard/deinitialize_pending_requests.c datawizard/deps.c \ + datawizard/dining_philosophers.c datawizard/double_parameter.c \ + datawizard/dsm_stress.c \ + $(am__datawizard_gpu_ptr_register_SOURCES_DIST) \ + $(am__datawizard_gpu_register_SOURCES_DIST) \ + datawizard/handle_to_pointer.c \ + $(am__datawizard_in_place_partition_SOURCES_DIST) \ + $(am__datawizard_increment_init_SOURCES_DIST) \ + $(am__datawizard_increment_redux_SOURCES_DIST) \ + $(am__datawizard_increment_redux_lazy_SOURCES_DIST) \ + $(am__datawizard_increment_redux_partition_SOURCES_DIST) \ + $(am__datawizard_increment_redux_v2_SOURCES_DIST) \ + $(am__datawizard_increment_redux_with_args_SOURCES_DIST) \ + $(am__datawizard_interfaces_bcsr_bcsr_interface_SOURCES_DIST) \ + $(am__datawizard_interfaces_block_block_interface_SOURCES_DIST) \ + $(am__datawizard_interfaces_coo_coo_interface_SOURCES_DIST) \ + datawizard/interfaces/copy_interfaces.c \ + $(am__datawizard_interfaces_csr_csr_interface_SOURCES_DIST) \ + $(am__datawizard_interfaces_matrix_matrix_interface_SOURCES_DIST) \ + $(datawizard_interfaces_multiformat_advanced_multiformat_cuda_opencl_SOURCES) \ + $(datawizard_interfaces_multiformat_advanced_multiformat_data_release_SOURCES) \ + $(datawizard_interfaces_multiformat_advanced_multiformat_handle_conversion_SOURCES) \ + $(datawizard_interfaces_multiformat_advanced_multiformat_worker_SOURCES) \ + $(datawizard_interfaces_multiformat_advanced_same_handle_SOURCES) \ + $(am__datawizard_interfaces_multiformat_multiformat_interface_SOURCES_DIST) \ + $(am__datawizard_interfaces_ndim_ndim_interface_SOURCES_DIST) \ + $(am__datawizard_interfaces_tensor_tensor_interface_SOURCES_DIST) \ + $(am__datawizard_interfaces_variable_variable_interface_SOURCES_DIST) \ + $(am__datawizard_interfaces_vector_vector_interface_SOURCES_DIST) \ + $(datawizard_interfaces_void_void_interface_SOURCES) \ + datawizard/invalidate_pending_requests.c \ + datawizard/lazy_allocation.c datawizard/locality.c \ + datawizard/manual_reduction.c \ + $(am__datawizard_mpi_like_SOURCES_DIST) \ + $(am__datawizard_mpi_like_async_SOURCES_DIST) \ + datawizard/no_unregister.c datawizard/noreclaim.c \ + datawizard/nowhere.c datawizard/numa_overflow.c \ + $(am__datawizard_partition_dep_SOURCES_DIST) \ + datawizard/partition_init.c \ + $(am__datawizard_partition_lazy_SOURCES_DIST) \ + datawizard/partition_wontuse.c \ + datawizard/partitioned_acquire.c \ + datawizard/partitioned_initialization.c \ + datawizard/readers_and_writers.c datawizard/readonly.c \ + datawizard/reclaim.c datawizard/redux_acquire.c \ + $(am__datawizard_scratch_SOURCES_DIST) \ + datawizard/scratch_reuse.c datawizard/simgrid-locality.c \ + $(am__datawizard_specific_node_SOURCES_DIST) \ + datawizard/specific_node_same.c \ + $(am__datawizard_sync_and_notify_data_SOURCES_DIST) \ + $(am__datawizard_sync_and_notify_data_implicit_SOURCES_DIST) \ + datawizard/sync_with_data_with_mem.c \ + datawizard/sync_with_data_with_mem_non_blocking.c \ + datawizard/sync_with_data_with_mem_non_blocking_implicit.c \ + datawizard/task_with_multiple_time_the_same_handle.c \ + datawizard/temporary_partition.c \ + datawizard/temporary_partition_implicit.c \ + datawizard/temporary_partition_read.c \ + $(datawizard_test_arbiter_SOURCES) datawizard/unpartition.c \ + datawizard/user_interaction_implicit.c \ + datawizard/variable_parameters.c datawizard/variable_size.c \ + datawizard/write_only_tmp_buffer.c \ + $(am__datawizard_wt_broadcast_SOURCES_DIST) \ + $(am__datawizard_wt_host_SOURCES_DIST) disk/disk_compute.c \ + disk/disk_copy.c disk/disk_copy_to_disk.c \ + disk/disk_copy_unpack.c disk/disk_pack.c disk/mem_reclaim.c \ + energy/energy_efficiency.c errorcheck/invalid_blocking_calls.c \ + errorcheck/invalid_tasks.c errorcheck/starpu_init_noworker.c \ + errorcheck/workers_cpuid.c fault-tolerance/retry.c \ + $(am__fortran90_init_01_SOURCES_DIST) helper/cublasLt_init.c \ + helper/cublas_init.c helper/cusparse_init.c \ + helper/execute_on_all.c helper/hipblas_init.c \ + helper/pinned_memory.c helper/starpu_create_sync_task.c \ + helper/starpu_data_cpy.c \ + $(am__helper_starpu_data_dup_ro_SOURCES_DIST) loader.c \ + main/bind.c main/callback.c main/codelet_null_callback.c \ + main/const_codelet.c main/deadlock.c \ + main/declare_deps_after_submission.c \ + main/declare_deps_after_submission_synchronous.c \ + main/declare_deps_in_callback.c main/deploop.c \ + main/deprecated_func.c main/display_binding.c \ + main/driver_api/init_run_deinit.c main/driver_api/run_driver.c \ + main/empty_task.c main/empty_task_chain.c \ + main/empty_task_sync_point.c \ + main/empty_task_sync_point_tasks.c \ + main/execute_on_a_specific_worker.c main/execute_schedule.c \ + main/get_children_tasks.c main/get_current_task.c \ + main/hwloc_cpuset.c main/insert_task.c \ + main/insert_task_array.c main/insert_task_dyn_handles.c \ + main/insert_task_many.c main/insert_task_nullcodelet.c \ + main/insert_task_pack.c main/insert_task_value.c \ + $(am__main_insert_task_where_SOURCES_DIST) main/job.c \ + main/mkdtemp.c main/multithreaded.c main/multithreaded_init.c \ + main/pack.c main/pause_resume.c main/regenerate.c \ + main/regenerate_pipeline.c main/restart.c main/starpu_init.c \ + main/starpu_task_bundle.c main/starpu_task_wait.c \ + main/starpu_task_wait_for_all.c main/starpu_worker_exists.c \ + main/static_restartable.c main/static_restartable_tag.c \ + main/static_restartable_using_initializer.c \ + $(am__main_subgraph_repeat_SOURCES_DIST) \ + $(am__main_subgraph_repeat_regenerate_SOURCES_DIST) \ + $(am__main_subgraph_repeat_regenerate_tag_SOURCES_DIST) \ + $(am__main_subgraph_repeat_regenerate_tag_cycle_SOURCES_DIST) \ + $(am__main_subgraph_repeat_tag_SOURCES_DIST) main/submit.c \ + main/tag_get_task.c main/tag_task_data_deps.c \ + main/tag_wait_api.c main/task_end_dep.c main/task_wait_api.c \ + main/wait_all_regenerable_tasks.c \ + maxfpga/max_fpga_advanced_static.c \ + maxfpga/max_fpga_basic_static.c maxfpga/max_fpga_dynamic.c \ + maxfpga/max_fpga_mux.c microbenchs/async_tasks_overhead.c \ + microbenchs/bandwidth.c microbenchs/display_structures_size.c \ + microbenchs/local_pingpong.c microbenchs/matrix_as_vector.c \ + microbenchs/parallel_dependent_homogeneous_tasks_data.c \ + microbenchs/parallel_independent_heterogeneous_tasks.c \ + microbenchs/parallel_independent_heterogeneous_tasks_data.c \ + microbenchs/parallel_independent_homogeneous_tasks.c \ + microbenchs/parallel_independent_homogeneous_tasks_data.c \ + microbenchs/parallel_redux_heterogeneous_tasks_data.c \ + microbenchs/parallel_redux_homogeneous_tasks_data.c \ + microbenchs/prefetch_data_on_node.c \ + microbenchs/redundant_buffer.c \ + microbenchs/sync_tasks_overhead.c microbenchs/tasks_overhead.c \ + microbenchs/tasks_size_overhead.c openmp/api_01.c \ + openmp/array_slice_01.c openmp/cuda_task_01.c \ + openmp/environment.c openmp/init_exit_01.c \ + openmp/init_exit_02.c openmp/parallel_01.c \ + openmp/parallel_02.c openmp/parallel_03.c \ + openmp/parallel_barrier_01.c openmp/parallel_critical_01.c \ + openmp/parallel_critical_inline_01.c \ + openmp/parallel_critical_named_01.c \ + openmp/parallel_critical_named_inline_01.c \ + openmp/parallel_for_01.c openmp/parallel_for_02.c \ + openmp/parallel_for_ordered_01.c openmp/parallel_master_01.c \ + openmp/parallel_master_inline_01.c \ + openmp/parallel_nested_lock_01.c openmp/parallel_sections_01.c \ + openmp/parallel_sections_combined_01.c \ + openmp/parallel_simple_lock_01.c \ + openmp/parallel_single_copyprivate_01.c \ + openmp/parallel_single_copyprivate_inline_01.c \ + openmp/parallel_single_inline_01.c \ + openmp/parallel_single_nowait_01.c \ + openmp/parallel_single_wait_01.c openmp/task_01.c \ + openmp/task_02.c openmp/task_03.c openmp/taskgroup_01.c \ + openmp/taskgroup_02.c openmp/taskloop.c openmp/taskwait_01.c \ + $(am__overlap_gpu_concurrency_SOURCES_DIST) overlap/overlap.c \ + parallel_tasks/combined_worker_assign_workerid.c \ + parallel_tasks/cuda_only.c \ + parallel_tasks/explicit_combined_worker.c \ + parallel_tasks/parallel_kernels.c \ + parallel_tasks/parallel_kernels_spmd.c \ + parallel_tasks/parallel_kernels_trivial.c \ + parallel_tasks/spmd_peager.c parallel_tasks/swap.c \ + perfmodels/feed.c perfmodels/memory.c \ + $(am__perfmodels_non_linear_regression_based_SOURCES_DIST) \ + perfmodels/path.c perfmodels/regression_based_check.c \ + perfmodels/regression_based_energy.c \ + $(am__perfmodels_regression_based_gpu_SOURCES_DIST) \ + $(am__perfmodels_regression_based_memset_SOURCES_DIST) \ + perfmodels/regression_based_multiimpl.c perfmodels/user_base.c \ + perfmodels/valid_model.c perfmodels/value_nan.c \ + sched_ctx/sched_ctx_hierarchy.c sched_ctx/sched_ctx_list.c \ + sched_ctx/sched_ctx_policy_data.c \ + sched_policies/data_locality.c \ + sched_policies/execute_all_tasks.c sched_policies/prio.c \ + sched_policies/simple_cpu_gpu_sched.c \ + sched_policies/simple_deps.c sched_policies/workerids.c +RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \ + ctags-recursive dvi-recursive html-recursive info-recursive \ + install-data-recursive install-dvi-recursive \ + install-exec-recursive install-html-recursive \ + install-info-recursive install-pdf-recursive \ + install-ps-recursive install-recursive installcheck-recursive \ + installdirs-recursive pdf-recursive ps-recursive \ + tags-recursive uninstall-recursive +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +DATA = $(nobase_STARPU_OPENCL_DATA_DATA) +RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ + distclean-recursive maintainer-clean-recursive +am__recursive_targets = \ + $(RECURSIVE_TARGETS) \ + $(RECURSIVE_CLEAN_TARGETS) \ + $(am__extra_recursive_targets) +AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \ + check recheck distdir distdir-am +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +am__tty_colors_dummy = \ + mgn= red= grn= lgn= blu= brg= std=; \ + am__color_tests=no +am__tty_colors = { \ + $(am__tty_colors_dummy); \ + if test "X$(AM_COLOR_TESTS)" = Xno; then \ + am__color_tests=no; \ + elif test "X$(AM_COLOR_TESTS)" = Xalways; then \ + am__color_tests=yes; \ + elif test "X$$TERM" != Xdumb && { test -t 1; } 2>/dev/null; then \ + am__color_tests=yes; \ + fi; \ + if test $$am__color_tests = yes; then \ + red=''; \ + grn=''; \ + lgn=''; \ + blu=''; \ + mgn=''; \ + brg=''; \ + std=''; \ + fi; \ +} +am__recheck_rx = ^[ ]*:recheck:[ ]* +am__global_test_result_rx = ^[ ]*:global-test-result:[ ]* +am__copy_in_global_log_rx = ^[ ]*:copy-in-global-log:[ ]* +# A command that, given a newline-separated list of test names on the +# standard input, print the name of the tests that are to be re-run +# upon "make recheck". +am__list_recheck_tests = $(AWK) '{ \ + recheck = 1; \ + while ((rc = (getline line < ($$0 ".trs"))) != 0) \ + { \ + if (rc < 0) \ + { \ + if ((getline line2 < ($$0 ".log")) < 0) \ + recheck = 0; \ + break; \ + } \ + else if (line ~ /$(am__recheck_rx)[nN][Oo]/) \ + { \ + recheck = 0; \ + break; \ + } \ + else if (line ~ /$(am__recheck_rx)[yY][eE][sS]/) \ + { \ + break; \ + } \ + }; \ + if (recheck) \ + print $$0; \ + close ($$0 ".trs"); \ + close ($$0 ".log"); \ +}' +# A command that, given a newline-separated list of test names on the +# standard input, create the global log from their .trs and .log files. +am__create_global_log = $(AWK) ' \ +function fatal(msg) \ +{ \ + print "fatal: making $@: " msg | "cat >&2"; \ + exit 1; \ +} \ +function rst_section(header) \ +{ \ + print header; \ + len = length(header); \ + for (i = 1; i <= len; i = i + 1) \ + printf "="; \ + printf "\n\n"; \ +} \ +{ \ + copy_in_global_log = 1; \ + global_test_result = "RUN"; \ + while ((rc = (getline line < ($$0 ".trs"))) != 0) \ + { \ + if (rc < 0) \ + fatal("failed to read from " $$0 ".trs"); \ + if (line ~ /$(am__global_test_result_rx)/) \ + { \ + sub("$(am__global_test_result_rx)", "", line); \ + sub("[ ]*$$", "", line); \ + global_test_result = line; \ + } \ + else if (line ~ /$(am__copy_in_global_log_rx)[nN][oO]/) \ + copy_in_global_log = 0; \ + }; \ + if (copy_in_global_log) \ + { \ + rst_section(global_test_result ": " $$0); \ + while ((rc = (getline line < ($$0 ".log"))) != 0) \ + { \ + if (rc < 0) \ + fatal("failed to read from " $$0 ".log"); \ + print line; \ + }; \ + printf "\n"; \ + }; \ + close ($$0 ".trs"); \ + close ($$0 ".log"); \ +}' +# Restructured Text title. +am__rst_title = { sed 's/.*/ & /;h;s/./=/g;p;x;s/ *$$//;p;g' && echo; } +# Solaris 10 'make', and several other traditional 'make' implementations, +# pass "-e" to $(SHELL), and POSIX 2008 even requires this. Work around it +# by disabling -e (using the XSI extension "set +e") if it's set. +am__sh_e_setup = case $$- in *e*) set +e;; esac +# Default flags passed to test drivers. +am__common_driver_flags = \ + --color-tests "$$am__color_tests" \ + --enable-hard-errors "$$am__enable_hard_errors" \ + --expect-failure "$$am__expect_failure" +# To be inserted before the command running the test. Creates the +# directory for the log if needed. Stores in $dir the directory +# containing $f, in $tst the test, in $log the log. Executes the +# developer- defined test setup AM_TESTS_ENVIRONMENT (if any), and +# passes TESTS_ENVIRONMENT. Set up options for the wrapper that +# will run the test scripts (or their associated LOG_COMPILER, if +# thy have one). +am__check_pre = \ +$(am__sh_e_setup); \ +$(am__vpath_adj_setup) $(am__vpath_adj) \ +$(am__tty_colors); \ +srcdir=$(srcdir); export srcdir; \ +case "$@" in \ + */*) am__odir=`echo "./$@" | sed 's|/[^/]*$$||'`;; \ + *) am__odir=.;; \ +esac; \ +test "x$$am__odir" = x"." || test -d "$$am__odir" \ + || $(MKDIR_P) "$$am__odir" || exit $$?; \ +if test -f "./$$f"; then dir=./; \ +elif test -f "$$f"; then dir=; \ +else dir="$(srcdir)/"; fi; \ +tst=$$dir$$f; log='$@'; \ +if test -n '$(DISABLE_HARD_ERRORS)'; then \ + am__enable_hard_errors=no; \ +else \ + am__enable_hard_errors=yes; \ +fi; \ +case " $(XFAIL_TESTS) " in \ + *[\ \ ]$$f[\ \ ]* | *[\ \ ]$$dir$$f[\ \ ]*) \ + am__expect_failure=yes;; \ + *) \ + am__expect_failure=no;; \ +esac; \ +$(AM_TESTS_ENVIRONMENT) $(TESTS_ENVIRONMENT) +# A shell command to get the names of the tests scripts with any registered +# extension removed (i.e., equivalently, the names of the test logs, with +# the '.log' extension removed). The result is saved in the shell variable +# '$bases'. This honors runtime overriding of TESTS and TEST_LOGS. Sadly, +# we cannot use something simpler, involving e.g., "$(TEST_LOGS:.log=)", +# since that might cause problem with VPATH rewrites for suffix-less tests. +# See also 'test-harness-vpath-rewrite.sh' and 'test-trs-basic.sh'. +am__set_TESTS_bases = \ + bases='$(TEST_LOGS)'; \ + bases=`for i in $$bases; do echo $$i; done | sed 's/\.log$$//'`; \ + bases=`echo $$bases` +AM_TESTSUITE_SUMMARY_HEADER = ' for $(PACKAGE_STRING)' +RECHECK_LOGS = $(TEST_LOGS) +am__EXEEXT_11 = microbenchs/parallel_independent_homogeneous_tasks.sh \ + microbenchs/parallel_independent_heterogeneous_tasks.sh \ + microbenchs/parallel_independent_homogeneous_tasks_data.sh \ + microbenchs/parallel_independent_heterogeneous_tasks_data.sh \ + microbenchs/parallel_redux_homogeneous_tasks_data.sh \ + microbenchs/parallel_redux_heterogeneous_tasks_data.sh \ + microbenchs/parallel_dependent_homogeneous_tasks_data.sh +@STARPU_SIMGRID_TRUE@am__EXEEXT_12 = $(am__EXEEXT_11) +am__EXEEXT_13 = $(am__append_8) $(am__append_16) $(am__EXEEXT_12) \ + datawizard/locality.sh microbenchs/bandwidth_scheds.sh \ + $(am__append_18) +TEST_SUITE_LOG = test-suite.log +TEST_EXTENSIONS = @EXEEXT@ .test +LOG_DRIVER = $(SHELL) $(top_srcdir)/build-aux/test-driver +LOG_COMPILE = $(LOG_COMPILER) $(AM_LOG_FLAGS) $(LOG_FLAGS) +am__set_b = \ + case '$@' in \ + */*) \ + case '$*' in \ + */*) b='$*';; \ + *) b=`echo '$@' | sed 's/\.log$$//'`; \ + esac;; \ + *) \ + b='$*';; \ + esac +am__test_logs1 = $(TESTS:=.log) +am__test_logs2 = $(am__test_logs1:@EXEEXT@.log=.log) +TEST_LOGS = $(am__test_logs2:.test.log=.log) +TEST_LOG_DRIVER = $(SHELL) $(top_srcdir)/build-aux/test-driver +TEST_LOG_COMPILE = $(TEST_LOG_COMPILER) $(AM_TEST_LOG_FLAGS) \ + $(TEST_LOG_FLAGS) +DIST_SUBDIRS = model-checking +am__DIST_COMMON = $(srcdir)/Makefile.in \ + $(top_srcdir)/build-aux/depcomp \ + $(top_srcdir)/build-aux/test-driver \ + $(top_srcdir)/make/starpu-loader.mk \ + $(top_srcdir)/make/starpu-tests.mk \ + $(top_srcdir)/make/starpu.mk +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +am__relativize = \ + dir0=`pwd`; \ + sed_first='s,^\([^/]*\)/.*$$,\1,'; \ + sed_rest='s,^[^/]*/*,,'; \ + sed_last='s,^.*/\([^/]*\)$$,\1,'; \ + sed_butlast='s,/*[^/]*$$,,'; \ + while test -n "$$dir1"; do \ + first=`echo "$$dir1" | sed -e "$$sed_first"`; \ + if test "$$first" != "."; then \ + if test "$$first" = ".."; then \ + dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ + dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ + else \ + first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ + if test "$$first2" = "$$first"; then \ + dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ + else \ + dir2="../$$dir2"; \ + fi; \ + dir0="$$dir0"/"$$first"; \ + fi; \ + fi; \ + dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ + done; \ + reldir="$$dir2" +pkglibdir = @pkglibdir@ +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +APP_CFLAGS = @APP_CFLAGS@ +APP_CXXFLAGS = @APP_CXXFLAGS@ +APP_FCFLAGS = @APP_FCFLAGS@ +APP_FFLAGS = @APP_FFLAGS@ +AR = @AR@ +AS = @AS@ +ATLASDIR = @ATLASDIR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BLAS_LIB = @BLAS_LIB@ +BLAS_LIBS = @BLAS_LIBS@ +BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ +BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CC_OR_MPICC = @CC_OR_MPICC@ +CC_OR_NVCC = @CC_OR_NVCC@ +CFLAGS = @CFLAGS@ +COVERAGE = @COVERAGE@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CSCOPE = @CSCOPE@ +CTAGS = @CTAGS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DGELS_LIBS = @DGELS_LIBS@ +DLB_CFLAGS = @DLB_CFLAGS@ +DLB_LIBS = @DLB_LIBS@ +DLLTOOL = @DLLTOOL@ +DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +ECLIPSE = @ECLIPSE@ +EGREP = @EGREP@ +ETAGS = @ETAGS@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FC = @FC@ +FCFLAGS = @FCFLAGS@ +FFLAGS = @FFLAGS@ +FFTWF_CFLAGS = @FFTWF_CFLAGS@ +FFTWF_LIBS = @FFTWF_LIBS@ +FFTWL_CFLAGS = @FFTWL_CFLAGS@ +FFTWL_LIBS = @FFTWL_LIBS@ +FFTW_CFLAGS = @FFTW_CFLAGS@ +FFTW_LIBS = @FFTW_LIBS@ +FGREP = @FGREP@ +FILECMD = @FILECMD@ +FXTDIR = @FXTDIR@ +FXT_CFLAGS = @FXT_CFLAGS@ +FXT_LDFLAGS = @FXT_LDFLAGS@ +FXT_LIBS = @FXT_LIBS@ +GDB = @GDB@ +GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ +GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ +GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ +GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ +GOTODIR = @GOTODIR@ +GREP = @GREP@ +HAVE_CXX11 = @HAVE_CXX11@ +HAVE_FFTWFL = @HAVE_FFTWFL@ +HELP2MAN = @HELP2MAN@ +HIPCC = @HIPCC@ +HIPCCFLAGS = @HIPCCFLAGS@ $(am__append_2) +HIPCONFIG = @HIPCONFIG@ +HWLOC_CFLAGS = @HWLOC_CFLAGS@ +HWLOC_LIBS = @HWLOC_LIBS@ +HWLOC_REQUIRES = @HWLOC_REQUIRES@ +ICC = @ICC@ +ICC_ARGS = @ICC_ARGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JULIA = @JULIA@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ $(top_builddir)/src/@LIBSTARPU_LINK@ \ + $(STARPU_EXPORTED_LIBS) $(HWLOC_LIBS) $(STARPU_OPENCL_LDFLAGS) \ + $(STARPU_CUDA_LDFLAGS) $(STARPU_HIP_LDFLAGS) \ + $(STARPU_MAX_FPGA_LDFLAGS) +LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ +LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ +LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ +LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ +LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ +LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ +LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ +LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ +LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ +LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ +LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ +LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ +LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ +LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ +LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ +LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ +LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ +LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ +LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ +LIBSTARPU_LINK = @LIBSTARPU_LINK@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAGMA_CFLAGS = @MAGMA_CFLAGS@ +MAGMA_LIBS = @MAGMA_LIBS@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPICC_LDFLAGS = @MPICC_LDFLAGS@ +MPICXX = @MPICXX@ +MPIEXEC = @MPIEXEC@ +MPIEXEC_ARGS = @MPIEXEC_ARGS@ +MPIFORT = @MPIFORT@ +MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ +MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ +NM = @NM@ +NMAD_CFLAGS = @NMAD_CFLAGS@ +NMAD_LIBS = @NMAD_LIBS@ +NMEDIT = @NMEDIT@ +NVCC = @NVCC@ +NVCCFLAGS = @NVCCFLAGS@ $(am__append_1) +NVCC_CC = @NVCC_CC@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ +OPENBLAS_LIBS = @OPENBLAS_LIBS@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PAPI_CFLAGS = @PAPI_CFLAGS@ +PAPI_LIBS = @PAPI_LIBS@ +PARALLEL = @PARALLEL@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PKG_CONFIG = @PKG_CONFIG@ +POTI_CFLAGS = @POTI_CFLAGS@ +POTI_LIBS = @POTI_LIBS@ +PROG_CLANG = @PROG_CLANG@ +PROG_DATE = @PROG_DATE@ +PROG_FIND = @PROG_FIND@ +PROG_STAT = @PROG_STAT@ +PYTHON = @PYTHON@ +PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ +PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ +PYTHON_VERSION = @PYTHON_VERSION@ +RANLIB = @RANLIB@ +REALBASH = @REALBASH@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ +SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ +SIMGRID_LIBS = @SIMGRID_LIBS@ +SIMGRID_MC = @SIMGRID_MC@ +SLIC_CONFIG = @SLIC_CONFIG@ +SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ +SOCL_VENDORS = @SOCL_VENDORS@ +STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ +STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ +STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ +STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ +STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ +STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ +STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ +STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ +STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ +STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ +STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ +STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ +STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ +STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ +STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ +STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ +STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ +STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ +STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ +STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ +STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ +STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ +STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ +STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ +STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ +STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ +STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ +STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ +STARPU_LIB_PATH = @STARPU_LIB_PATH@ +STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ +STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ +STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ +STARPU_MS_LIB = @STARPU_MS_LIB@ +STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ +STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ +STARPU_OPENBLAS = @STARPU_OPENBLAS@ +STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ +STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ +STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ +STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ +STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ +STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ +STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ +STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ +STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ +STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ +STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ +STARPU_SRC_DIR = @STARPU_SRC_DIR@ +STARPU_USE_CPU = @STARPU_USE_CPU@ +STARPU_USE_CUDA = @STARPU_USE_CUDA@ +STARPU_USE_FXT = @STARPU_USE_FXT@ +STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ +STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ +STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ +STRIP = @STRIP@ +VERSION = @VERSION@ +XMKMF = @XMKMF@ +X_CFLAGS = @X_CFLAGS@ +X_EXTRA_LIBS = @X_EXTRA_LIBS@ +X_LIBS = @X_LIBS@ +X_PRE_LIBS = @X_PRE_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_ct_F77 = @ac_ct_F77@ +ac_ct_FC = @ac_ct_FC@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +doxygencommand = @doxygencommand@ +dvidir = @dvidir@ +eclipsepath = @eclipsepath@ +epstopdfcommand = @epstopdfcommand@ +exec_prefix = @exec_prefix@ +gitcommand = @gitcommand@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +hwloccalccommand = @hwloccalccommand@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +juliapath = @juliapath@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +mpicc_path = @mpicc_path@ +mpicxx_path = @mpicxx_path@ +mpiexec_path = @mpiexec_path@ +mpifort_path = @mpifort_path@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +pdflatexcommand = @pdflatexcommand@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +runstatedir = @runstatedir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target = @target@ +target_alias = @target_alias@ +target_cpu = @target_cpu@ +target_os = @target_os@ +target_vendor = @target_vendor@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +LAUNCHER_ENV = $(am__append_4) $(am__append_6) +LAUNCHER = $(am__append_3) $(am__append_5) + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +AM_CFLAGS = $(GLOBAL_AM_CFLAGS) $(APP_CFLAGS) +AM_CXXFLAGS = $(GLOBAL_AM_CXXFLAGS) $(APP_CXXFLAGS) +AM_FFLAGS = $(GLOBAL_AM_FFLAGS) $(APP_FFLAGS) +AM_FCFLAGS = $(GLOBAL_AM_FCFLAGS) $(APP_FCFLAGS) +@STARPU_USE_CUDA_TRUE@V_nvcc_ = $(V_nvcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_USE_CUDA_TRUE@V_nvcc_0 = @echo " NVCC " $@; +@STARPU_USE_CUDA_TRUE@V_nvcc_1 = +@STARPU_USE_CUDA_TRUE@V_nvcc = $(V_nvcc_$(V)) + +# Avoid using nvcc when making a coverity build, nvcc produces millions of +# lines of code which we don't want to analyze. Instead, build dumb .o files +# containing empty functions. +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_ = $(V_mynvcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_0 = @echo " myNVCC " $@; +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_1 = +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc = $(V_mynvcc_$(V)) +@STARPU_USE_HIP_TRUE@V_hipcc_ = $(V_hipcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_USE_HIP_TRUE@V_hipcc_0 = @echo " HIPCC " $@; +@STARPU_USE_HIP_TRUE@V_hipcc_1 = +@STARPU_USE_HIP_TRUE@V_hipcc = $(V_hipcc_$(V)) +V_icc_ = $(V_icc_$(AM_DEFAULT_VERBOSITY)) +V_icc_0 = @echo " ICC " $@; +V_icc_1 = +V_icc = $(V_icc_$(V)) +V_ln_ = $(V_ln_$(AM_DEFAULT_VERBOSITY)) +V_ln_0 = @echo " LN " $@; +V_ln_1 = +V_ln = $(V_ln_$(V)) +V_help2man_ = $(V_help2man_$(AM_DEFAULT_VERBOSITY)) +V_help2man_0 = @echo " HELP2MAN" $@; +V_help2man_1 = +V_help2man = $(V_help2man_$(V)) +# These are always defined, both for starpu-mpi and for mpi-ms +# For MPI tests we don't want to oversubscribe the system +MPI_RUN_ENV = STARPU_WORKERS_GETBIND=0 STARPU_WORKERS_NOBIND=1 STARPU_NCPU=3 +@STARPU_SIMGRID_FALSE@STARPU_MPIEXEC = $(MPIEXEC) $(MPIEXEC_ARGS) -np $(STARPU_MPI_NP) +@STARPU_SIMGRID_TRUE@STARPU_MPIEXEC = $(abs_top_builddir)/tools/starpu_smpirun -np $(STARPU_MPI_NP) -platform $(abs_top_srcdir)/tools/perfmodels/cluster.xml -hostfile $(abs_top_srcdir)/tools/perfmodels/hostfile + +# When GNU parallel is available and -j is passed to make, run tests through +# parallel, using a "starpu" semaphore. +# Also make test shell scripts run its tests through parallel, using a +# "substarpu" semaphore. This brings some overload, but only one level. +@HAVE_PARALLEL_TRUE@STARPU_SUB_PARALLEL = $(shell echo $(MAKEFLAGS) | sed -ne 's/.*-j\([0-9]\+\).*/parallel --semaphore --id substarpu --fg --fg-exit -j \1/p') +@STARPU_USE_MPI_MASTER_SLAVE_TRUE@MS_LAUNCHER = $(STARPU_MPIEXEC) +@STARPU_USE_TCPIP_MASTER_SLAVE_TRUE@MS_LAUNCHER = $(abs_top_builddir)/tools/starpu_tcpipexec -np 2 -nobind -ncpus 1 +@STARPU_HAVE_WINDOWS_FALSE@LOADER_BIN = $(LAUNCHER) $(LOADER) $(EXTERNAL) +@STARPU_HAVE_WINDOWS_TRUE@LOADER_BIN = $(LAUNCHER) $(EXTERNAL) +@STARPU_HAVE_WINDOWS_FALSE@loader_CPPFLAGS = $(AM_CPPFLAGS) -I$(top_builddir)/src/ +@STARPU_HAVE_AM111_FALSE@TESTS_ENVIRONMENT = $(LAUNCHER_ENV) top_builddir="$(abs_top_builddir)" top_srcdir="$(abs_top_srcdir)" $(LOADER_BIN) +@STARPU_HAVE_AM111_TRUE@TESTS_ENVIRONMENT = $(LAUNCHER_ENV) top_builddir="$(abs_top_builddir)" top_srcdir="$(abs_top_srcdir)" +@STARPU_HAVE_AM111_TRUE@LOG_COMPILER = $(LOADER_BIN) +AM_TESTS_FD_REDIRECT = 9>&2 +AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ $(STARPU_H_CPPFLAGS) +AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@ +EXTRA_DIST = \ + helper.h \ + datawizard/locality.sh \ + overlap/overlap.sh \ + datawizard/scal.h \ + regression/profiles.in \ + regression/regression.sh.in \ + regression/profiles.build.only.in \ + microbenchs/tasks_data_overhead.sh \ + microbenchs/sync_tasks_data_overhead.sh \ + microbenchs/async_tasks_data_overhead.sh \ + microbenchs/tasks_size_overhead.sh \ + microbenchs/tasks_size_overhead_sched.sh \ + microbenchs/tasks_size_overhead_scheds.sh \ + microbenchs/tasks_size_overhead.gp \ + microbenchs/parallel_dependent_homogeneous_tasks_data.sh \ + microbenchs/parallel_independent_heterogeneous_tasks_data.sh \ + microbenchs/parallel_independent_heterogeneous_tasks.sh \ + microbenchs/parallel_independent_homogeneous_tasks_data.sh \ + microbenchs/parallel_independent_homogeneous_tasks.sh \ + microbenchs/bandwidth_scheds.sh \ + microbenchs/starpu_check.sh \ + energy/static.sh \ + energy/dynamic.sh \ + energy/perfs.gp \ + datawizard/scratch_opencl_kernel.cl \ + datawizard/sync_and_notify_data_opencl_codelet.cl\ + coverage/coverage.sh \ + variable/increment.h \ + variable/increment_opencl_kernel.cl \ + variable/redux_opencl_kernel.cl \ + variable/neutral_opencl_kernel.cl \ + datawizard/interfaces/test_interfaces.h \ + datawizard/interfaces/bcsr/bcsr_opencl_kernel.cl \ + datawizard/interfaces/coo/coo_opencl_kernel.cl \ + datawizard/interfaces/matrix/matrix_opencl_kernel.cl \ + datawizard/interfaces/variable/variable_opencl_kernel.cl \ + datawizard/interfaces/vector/vector_opencl_kernel.cl \ + datawizard/interfaces/multiformat/multiformat_types.h \ + datawizard/interfaces/multiformat/multiformat_opencl_kernel.cl \ + datawizard/interfaces/multiformat/multiformat_conversion_codelets_kernel.cl \ + datawizard/interfaces/multiformat/advanced/generic.h \ + datawizard/interfaces/csr/csr_opencl_kernel.cl \ + datawizard/interfaces/block/block_opencl_kernel.cl \ + datawizard/interfaces/tensor/tensor_opencl_kernel.cl \ + datawizard/interfaces/ndim/ndim_opencl_kernel.cl \ + perfmodels/opencl_memset_kernel.cl \ + $(MICROBENCHS:=.sh) \ + microbenchs/microbench.sh \ + memory/memstress2.gp \ + memory/memstress2.sh \ + memory/memstress.gp \ + memory/memstress.sh \ + maxfpga/LMemLoopbackCpuCode.c \ + maxfpga/MyTasksManager.maxj \ + maxfpga/MyTasksMuxManager.maxj \ + maxfpga/README.txt \ + maxfpga/StreamFMACpuCode.cpp \ + maxfpga/Task1.maxj \ + maxfpga/Task2.maxj \ + maxfpga/Task3.maxj \ + datawizard/interfaces/test_interfaces.sh + +CLEANFILES = *.gcno *.gcda *.linkinfo core starpu_idle_microsec.log \ + *.mod *.png *.output tasks.rec perfs.rec */perfs.rec \ + */*/perfs.rec perfs2.rec fortran90/starpu_mod.f90 \ + bandwidth-*.dat bandwidth.gp bandwidth.eps bandwidth.svg *.csv \ + *.md *.Rmd *.pdf *.html $(am__append_123) +BUILT_SOURCES = +SUBDIRS = $(am__append_19) +examplebindir = $(libdir)/starpu/examples +@STARPU_USE_OPENCL_TRUE@nobase_STARPU_OPENCL_DATA_DATA = \ +@STARPU_USE_OPENCL_TRUE@ variable/increment_opencl_kernel.cl \ +@STARPU_USE_OPENCL_TRUE@ datawizard/scratch_opencl_kernel.cl \ +@STARPU_USE_OPENCL_TRUE@ datawizard/sync_and_notify_data_opencl_codelet.cl \ +@STARPU_USE_OPENCL_TRUE@ datawizard/interfaces/block/block_opencl_kernel.cl \ +@STARPU_USE_OPENCL_TRUE@ datawizard/interfaces/tensor/tensor_opencl_kernel.cl \ +@STARPU_USE_OPENCL_TRUE@ datawizard/interfaces/ndim/ndim_opencl_kernel.cl \ +@STARPU_USE_OPENCL_TRUE@ datawizard/interfaces/bcsr/bcsr_opencl_kernel.cl \ +@STARPU_USE_OPENCL_TRUE@ datawizard/interfaces/coo/coo_opencl_kernel.cl \ +@STARPU_USE_OPENCL_TRUE@ datawizard/interfaces/csr/csr_opencl_kernel.cl \ +@STARPU_USE_OPENCL_TRUE@ datawizard/interfaces/vector/vector_opencl_kernel.cl \ +@STARPU_USE_OPENCL_TRUE@ datawizard/interfaces/matrix/matrix_opencl_kernel.cl \ +@STARPU_USE_OPENCL_TRUE@ datawizard/interfaces/multiformat/multiformat_opencl_kernel.cl \ +@STARPU_USE_OPENCL_TRUE@ datawizard/interfaces/multiformat/multiformat_conversion_codelets_kernel.cl \ +@STARPU_USE_OPENCL_TRUE@ datawizard/interfaces/variable/variable_opencl_kernel.cl \ +@STARPU_USE_OPENCL_TRUE@ perfmodels/opencl_memset_kernel.cl +testbindir = $(libdir)/starpu/tests +SHELL_TESTS = $(am__append_8) $(am__append_16) $(am__append_17) \ + datawizard/locality.sh microbenchs/bandwidth_scheds.sh \ + $(am__append_18) +myPROGRAMS = main/callback main/bind main/mkdtemp \ + main/execute_schedule main/insert_task_pack \ + main/insert_task_nullcodelet main/insert_task_where \ + main/multithreaded_init main/empty_task main/empty_task_chain \ + main/starpu_worker_exists main/codelet_null_callback \ + datawizard/allocate datawizard/acquire_cb datawizard/deps \ + datawizard/user_interaction_implicit \ + datawizard/interfaces/copy_interfaces datawizard/numa_overflow \ + datawizard/locality datawizard/variable_size \ + errorcheck/starpu_init_noworker errorcheck/invalid_tasks \ + helper/cublas_init helper/cublasLt_init helper/cusparse_init \ + helper/hipblas_init helper/pinned_memory helper/execute_on_all \ + microbenchs/display_structures_size microbenchs/local_pingpong \ + overlap/overlap sched_ctx/sched_ctx_list \ + sched_ctx/sched_ctx_policy_data openmp/init_exit_01 \ + openmp/init_exit_02 openmp/environment openmp/api_01 \ + openmp/parallel_01 openmp/parallel_02 openmp/parallel_03 \ + openmp/parallel_barrier_01 openmp/parallel_master_01 \ + openmp/parallel_master_inline_01 \ + openmp/parallel_single_wait_01 \ + openmp/parallel_single_nowait_01 \ + openmp/parallel_single_inline_01 \ + openmp/parallel_single_copyprivate_01 \ + openmp/parallel_single_copyprivate_inline_01 \ + openmp/parallel_critical_01 openmp/parallel_critical_inline_01 \ + openmp/parallel_critical_named_01 \ + openmp/parallel_critical_named_inline_01 \ + openmp/parallel_simple_lock_01 openmp/parallel_nested_lock_01 \ + openmp/parallel_for_01 openmp/parallel_for_02 \ + openmp/parallel_for_ordered_01 openmp/parallel_sections_01 \ + openmp/parallel_sections_combined_01 openmp/task_01 \ + openmp/task_02 openmp/task_03 openmp/taskloop \ + openmp/taskwait_01 openmp/taskgroup_01 openmp/taskgroup_02 \ + openmp/array_slice_01 openmp/cuda_task_01 perfmodels/value_nan \ + sched_policies/workerids $(am__append_9) $(am__append_10) \ + $(am__append_12) $(am__append_13) $(am__append_14) +MICROBENCHS = \ + microbenchs/parallel_independent_homogeneous_tasks \ + microbenchs/parallel_independent_heterogeneous_tasks \ + microbenchs/parallel_independent_homogeneous_tasks_data \ + microbenchs/parallel_independent_heterogeneous_tasks_data \ + microbenchs/parallel_redux_homogeneous_tasks_data \ + microbenchs/parallel_redux_heterogeneous_tasks_data \ + microbenchs/parallel_dependent_homogeneous_tasks_data + +examplebin_SCRIPTS = \ + microbenchs/tasks_data_overhead.sh \ + microbenchs/sync_tasks_data_overhead.sh \ + microbenchs/async_tasks_data_overhead.sh \ + microbenchs/tasks_size_overhead.gp \ + microbenchs/tasks_size_overhead.sh + + +####################### +# Source files # +####################### +datawizard_acquire_release_SOURCES = datawizard/acquire_release.c \ + variable/increment.c $(am__append_20) $(am__append_21) \ + $(am__append_22) +datawizard_acquire_release2_SOURCES = datawizard/acquire_release2.c \ + variable/increment.c $(am__append_23) $(am__append_24) \ + $(am__append_25) +datawizard_acquire_release_to_SOURCES = \ + datawizard/acquire_release_to.c variable/increment.c \ + $(am__append_26) $(am__append_27) $(am__append_28) +datawizard_wt_host_SOURCES = datawizard/wt_host.c variable/increment.c \ + $(am__append_29) $(am__append_30) $(am__append_31) +datawizard_wt_broadcast_SOURCES = datawizard/wt_broadcast.c \ + variable/increment.c $(am__append_32) $(am__append_33) \ + $(am__append_34) +datawizard_increment_redux_lazy_SOURCES = \ + datawizard/increment_redux_lazy.c variable/increment.c \ + $(am__append_35) $(am__append_36) $(am__append_37) +datawizard_increment_redux_SOURCES = datawizard/increment_redux.c \ + variable/increment.c $(am__append_38) $(am__append_39) \ + $(am__append_40) +datawizard_increment_redux_partition_SOURCES = \ + datawizard/increment_redux_partition.c variable/increment.c \ + $(am__append_41) $(am__append_42) $(am__append_43) +datawizard_increment_redux_v2_SOURCES = \ + datawizard/increment_redux_v2.c variable/increment.c \ + $(am__append_44) $(am__append_45) $(am__append_46) +datawizard_increment_redux_with_args_SOURCES = \ + datawizard/increment_redux_with_args.c variable/increment.c \ + $(am__append_47) $(am__append_48) $(am__append_49) +datawizard_increment_init_SOURCES = datawizard/increment_init.c \ + variable/increment.c $(am__append_50) $(am__append_51) \ + $(am__append_52) +datawizard_scratch_SOURCES = datawizard/scratch.c $(am__append_53) \ + $(am__append_54) +datawizard_mpi_like_SOURCES = datawizard/mpi_like.c \ + variable/increment.c $(am__append_55) $(am__append_56) \ + $(am__append_57) +datawizard_mpi_like_async_SOURCES = datawizard/mpi_like_async.c \ + variable/increment.c $(am__append_58) $(am__append_59) \ + $(am__append_60) +datawizard_sync_and_notify_data_SOURCES = \ + datawizard/sync_and_notify_data.c $(am__append_61) \ + $(am__append_62) +datawizard_sync_and_notify_data_implicit_SOURCES = \ + datawizard/sync_and_notify_data_implicit.c $(am__append_63) \ + $(am__append_64) +datawizard_in_place_partition_SOURCES = \ + datawizard/in_place_partition.c datawizard/scal.c \ + $(am__append_65) $(am__append_66) +datawizard_partition_dep_SOURCES = datawizard/partition_dep.c \ + datawizard/scal.c $(am__append_67) $(am__append_68) +datawizard_partition_lazy_SOURCES = datawizard/partition_lazy.c \ + datawizard/scal.c $(am__append_69) $(am__append_70) +datawizard_gpu_register_SOURCES = datawizard/gpu_register.c \ + datawizard/scal.c $(am__append_71) $(am__append_72) +datawizard_gpu_ptr_register_SOURCES = datawizard/gpu_ptr_register.c \ + datawizard/scal.c $(am__append_73) $(am__append_74) +datawizard_specific_node_SOURCES = datawizard/specific_node.c \ + variable/increment.c $(am__append_75) $(am__append_76) \ + $(am__append_77) +datawizard_test_arbiter_SOURCES = \ + datawizard/test_arbiter.cpp + +main_starpu_worker_exists_CFLAGS = $(AM_CFLAGS) $(FXT_CFLAGS) +main_deprecated_func_CFLAGS = $(AM_CFLAGS) -Wno-deprecated-declarations +main_insert_task_where_SOURCES = main/insert_task_where.c \ + variable/increment.c $(am__append_78) $(am__append_79) \ + $(am__append_80) +main_subgraph_repeat_SOURCES = main/subgraph_repeat.c \ + variable/increment.c $(am__append_81) $(am__append_82) \ + $(am__append_83) +main_subgraph_repeat_tag_SOURCES = main/subgraph_repeat_tag.c \ + variable/increment.c $(am__append_84) $(am__append_85) \ + $(am__append_86) +main_subgraph_repeat_regenerate_SOURCES = \ + main/subgraph_repeat_regenerate.c variable/increment.c \ + $(am__append_87) $(am__append_88) $(am__append_89) +main_subgraph_repeat_regenerate_tag_SOURCES = \ + main/subgraph_repeat_regenerate_tag.c variable/increment.c \ + $(am__append_90) $(am__append_91) $(am__append_92) +main_subgraph_repeat_regenerate_tag_cycle_SOURCES = \ + main/subgraph_repeat_regenerate_tag_cycle.c \ + variable/increment.c $(am__append_93) $(am__append_94) \ + $(am__append_95) +@STARPU_HAVE_FC_TRUE@fortran90_init_01_SOURCES = \ +@STARPU_HAVE_FC_TRUE@ fortran90/starpu_mod.f90 \ +@STARPU_HAVE_FC_TRUE@ fortran90/init_01.f90 + +helper_starpu_data_dup_ro_SOURCES = helper/starpu_data_dup_ro.c \ + variable/increment.c $(am__append_96) $(am__append_97) \ + $(am__append_98) +datawizard_interfaces_copy_interfaces_CFLAGS = $(AM_CFLAGS) $(FXT_CFLAGS) +datawizard_data_register_CFLAGS = $(AM_CFLAGS) $(FXT_CFLAGS) + +################### +# Block interface # +################### +datawizard_interfaces_block_block_interface_SOURCES = \ + datawizard/interfaces/test_interfaces.c \ + datawizard/interfaces/block/block_interface.c $(am__append_99) \ + $(am__append_100) +datawizard_interfaces_block_block_interface_CFLAGS = $(AM_CFLAGS) $(FXT_CFLAGS) + +################### +# Tensor interface # +################### +datawizard_interfaces_tensor_tensor_interface_SOURCES = \ + datawizard/interfaces/test_interfaces.c \ + datawizard/interfaces/tensor/tensor_interface.c \ + $(am__append_101) $(am__append_102) +datawizard_interfaces_tensor_tensor_interface_CFLAGS = $(AM_CFLAGS) $(FXT_CFLAGS) + +################### +# Ndim interface # +################### +datawizard_interfaces_ndim_ndim_interface_SOURCES = \ + datawizard/interfaces/test_interfaces.c \ + datawizard/interfaces/ndim/ndim_interface.c $(am__append_103) \ + $(am__append_104) +datawizard_interfaces_ndim_ndim_interface_CFLAGS = $(AM_CFLAGS) $(FXT_CFLAGS) + +################## +# BSCR interface # +################## +datawizard_interfaces_bcsr_bcsr_interface_SOURCES = \ + datawizard/interfaces/test_interfaces.c \ + datawizard/interfaces/bcsr/bcsr_interface.c $(am__append_105) \ + $(am__append_106) +datawizard_interfaces_bcsr_bcsr_interface_CFLAGS = $(AM_CFLAGS) $(FXT_CFLAGS) + +################# +# COO interface # +################# +datawizard_interfaces_coo_coo_interface_SOURCES = \ + datawizard/interfaces/test_interfaces.c \ + datawizard/interfaces/coo/coo_interface.c $(am__append_107) \ + $(am__append_108) +datawizard_interfaces_coo_coo_interface_CFLAGS = $(AM_CFLAGS) $(FXT_CFLAGS) + +################# +# CSR interface # +################# +datawizard_interfaces_csr_csr_interface_SOURCES = \ + datawizard/interfaces/test_interfaces.c \ + datawizard/interfaces/csr/csr_interface.c $(am__append_109) \ + $(am__append_110) +datawizard_interfaces_csr_csr_interface_CFLAGS = $(AM_CFLAGS) $(FXT_CFLAGS) + +#################### +# Vector interface # +#################### +datawizard_interfaces_vector_vector_interface_SOURCES = \ + datawizard/interfaces/vector/vector_interface.c \ + datawizard/interfaces/test_interfaces.c $(am__append_111) \ + $(am__append_112) +datawizard_interfaces_vector_vector_interface_CFLAGS = $(AM_CFLAGS) $(FXT_CFLAGS) + +#################### +# Matrix interface # +#################### +datawizard_interfaces_matrix_matrix_interface_SOURCES = \ + datawizard/interfaces/test_interfaces.c \ + datawizard/interfaces/matrix/matrix_interface.c \ + $(am__append_113) $(am__append_114) +datawizard_interfaces_matrix_matrix_interface_CFLAGS = $(AM_CFLAGS) $(FXT_CFLAGS) + +######################### +# Multiformat interface # +######################### +datawizard_interfaces_multiformat_multiformat_interface_SOURCES = \ + datawizard/interfaces/test_interfaces.c \ + datawizard/interfaces/multiformat/multiformat_interface.c \ + datawizard/interfaces/multiformat/multiformat_conversion_codelets.c \ + $(am__append_115) $(am__append_116) +datawizard_interfaces_multiformat_multiformat_interface_CFLAGS = $(AM_CFLAGS) $(FXT_CFLAGS) +datawizard_interfaces_multiformat_advanced_multiformat_cuda_opencl_SOURCES = \ + datawizard/interfaces/multiformat/advanced/generic.c \ + datawizard/interfaces/multiformat/advanced/multiformat_cuda_opencl.c + +datawizard_interfaces_multiformat_advanced_multiformat_data_release_SOURCES = \ + datawizard/interfaces/multiformat/advanced/generic.c \ + datawizard/interfaces/multiformat/advanced/multiformat_data_release.c + +datawizard_interfaces_multiformat_advanced_multiformat_worker_SOURCES = \ + datawizard/interfaces/multiformat/advanced/generic.c \ + datawizard/interfaces/multiformat/advanced/multiformat_worker.c + +datawizard_interfaces_multiformat_advanced_multiformat_handle_conversion_SOURCES = \ + datawizard/interfaces/multiformat/advanced/generic.c \ + datawizard/interfaces/multiformat/advanced/multiformat_handle_conversion.c + +datawizard_interfaces_multiformat_advanced_same_handle_SOURCES = \ + datawizard/interfaces/multiformat/advanced/generic.c \ + datawizard/interfaces/multiformat/advanced/same_handle.c + + +###################### +# Variable interface # +###################### +datawizard_interfaces_variable_variable_interface_SOURCES = \ + datawizard/interfaces/test_interfaces.c \ + datawizard/interfaces/variable/variable_interface.c \ + $(am__append_117) $(am__append_118) +datawizard_interfaces_variable_variable_interface_CFLAGS = $(AM_CFLAGS) $(FXT_CFLAGS) + +################## +# Void interface # +################## +datawizard_interfaces_void_void_interface_SOURCES = \ + datawizard/interfaces/test_interfaces.c \ + datawizard/interfaces/void/void_interface.c + +datawizard_interfaces_void_void_interface_CFLAGS = $(AM_CFLAGS) $(FXT_CFLAGS) +overlap_gpu_concurrency_SOURCES = overlap/gpu_concurrency.c \ + $(am__append_119) +perfmodels_regression_based_memset_SOURCES = \ + perfmodels/regression_based_memset.c $(am__append_120) +perfmodels_regression_based_gpu_SOURCES = \ + perfmodels/regression_based_gpu.c $(am__append_121) +maxfpga_max_fpga_basic_static_LDADD = $(LDADD) \ + maxfpga/slic_MyTasks.o + +maxfpga_max_fpga_advanced_static_LDADD = $(LDADD) \ + maxfpga/slic_MyTasks.o + +maxfpga_max_fpga_dynamic_LDADD = $(LDADD) \ + maxfpga/slic_MyTasks.o + +maxfpga_max_fpga_mux_LDADD = $(LDADD) \ + maxfpga/slic_MyTasksMux.o + +perfmodels_non_linear_regression_based_SOURCES = \ + perfmodels/non_linear_regression_based.c $(am__append_122) +sched_policies_execute_all_tasks_LDFLAGS = $(AM_LDFLAGS) -lm +sched_policies_execute_all_tasks_CFLAGS = $(AM_LDFLAGS) $(FXT_CFLAGS) +@STARPU_USE_MAX_FPGA_TRUE@MAX_DFE = MAX5C_$(MAX_TARGET) +@STARPU_USE_MAX_FPGA_TRUE@MAXJ = $(wildcard maxfpga/*.maxj) +@STARPU_USE_MAX_FPGA_TRUE@CLASS = $(MAXJ:.maxj=.class) +all: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) all-recursive + +.SUFFIXES: +.SUFFIXES: .c .cpp .cu .cubin .f90 .hip .lo .log .o .obj .test .test$(EXEEXT) .trs +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/make/starpu-tests.mk $(top_srcdir)/make/starpu.mk $(top_srcdir)/make/starpu-loader.mk $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign tests/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign tests/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; +$(top_srcdir)/make/starpu-tests.mk $(top_srcdir)/make/starpu.mk $(top_srcdir)/make/starpu-loader.mk $(am__empty): + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +clean-checkPROGRAMS: + @list='$(check_PROGRAMS)'; test -n "$$list" || exit 0; \ + echo " rm -f" $$list; \ + rm -f $$list || exit $$?; \ + test -n "$(EXEEXT)" || exit 0; \ + list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ + echo " rm -f" $$list; \ + rm -f $$list +install-examplebinPROGRAMS: $(examplebin_PROGRAMS) + @$(NORMAL_INSTALL) + @list='$(examplebin_PROGRAMS)'; test -n "$(examplebindir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(examplebindir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(examplebindir)" || exit 1; \ + fi; \ + for p in $$list; do echo "$$p $$p"; done | \ + sed 's/$(EXEEXT)$$//' | \ + while read p p1; do if test -f $$p \ + || test -f $$p1 \ + ; then echo "$$p"; echo "$$p"; else :; fi; \ + done | \ + sed -e 'p;s,.*/,,;n;h' \ + -e 's|.*|.|' \ + -e 'p;x;s,.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/' | \ + sed 'N;N;N;s,\n, ,g' | \ + $(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1 } \ + { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \ + if ($$2 == $$4) files[d] = files[d] " " $$1; \ + else { print "f", $$3 "/" $$4, $$1; } } \ + END { for (d in files) print "f", d, files[d] }' | \ + while read type dir files; do \ + if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \ + test -z "$$files" || { \ + echo " $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files '$(DESTDIR)$(examplebindir)$$dir'"; \ + $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files "$(DESTDIR)$(examplebindir)$$dir" || exit $$?; \ + } \ + ; done + +uninstall-examplebinPROGRAMS: + @$(NORMAL_UNINSTALL) + @list='$(examplebin_PROGRAMS)'; test -n "$(examplebindir)" || list=; \ + files=`for p in $$list; do echo "$$p"; done | \ + sed -e 'h;s,^.*/,,;s/$(EXEEXT)$$//;$(transform)' \ + -e 's/$$/$(EXEEXT)/' \ + `; \ + test -n "$$list" || exit 0; \ + echo " ( cd '$(DESTDIR)$(examplebindir)' && rm -f" $$files ")"; \ + cd "$(DESTDIR)$(examplebindir)" && rm -f $$files + +clean-examplebinPROGRAMS: + @list='$(examplebin_PROGRAMS)'; test -n "$$list" || exit 0; \ + echo " rm -f" $$list; \ + rm -f $$list || exit $$?; \ + test -n "$(EXEEXT)" || exit 0; \ + list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ + echo " rm -f" $$list; \ + rm -f $$list + +clean-noinstPROGRAMS: + @list='$(noinst_PROGRAMS)'; test -n "$$list" || exit 0; \ + echo " rm -f" $$list; \ + rm -f $$list || exit $$?; \ + test -n "$(EXEEXT)" || exit 0; \ + list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ + echo " rm -f" $$list; \ + rm -f $$list +datawizard/$(am__dirstamp): + @$(MKDIR_P) datawizard + @: > datawizard/$(am__dirstamp) +datawizard/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) datawizard/$(DEPDIR) + @: > datawizard/$(DEPDIR)/$(am__dirstamp) +datawizard/acquire_cb.$(OBJEXT): datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) + +datawizard/acquire_cb$(EXEEXT): $(datawizard_acquire_cb_OBJECTS) $(datawizard_acquire_cb_DEPENDENCIES) $(EXTRA_datawizard_acquire_cb_DEPENDENCIES) datawizard/$(am__dirstamp) + @rm -f datawizard/acquire_cb$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(datawizard_acquire_cb_OBJECTS) $(datawizard_acquire_cb_LDADD) $(LIBS) +datawizard/acquire_cb_insert.$(OBJEXT): datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) + +datawizard/acquire_cb_insert$(EXEEXT): $(datawizard_acquire_cb_insert_OBJECTS) $(datawizard_acquire_cb_insert_DEPENDENCIES) $(EXTRA_datawizard_acquire_cb_insert_DEPENDENCIES) datawizard/$(am__dirstamp) + @rm -f datawizard/acquire_cb_insert$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(datawizard_acquire_cb_insert_OBJECTS) $(datawizard_acquire_cb_insert_LDADD) $(LIBS) +datawizard/acquire_release.$(OBJEXT): datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) +variable/$(am__dirstamp): + @$(MKDIR_P) variable + @: > variable/$(am__dirstamp) +variable/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) variable/$(DEPDIR) + @: > variable/$(DEPDIR)/$(am__dirstamp) +variable/increment.$(OBJEXT): variable/$(am__dirstamp) \ + variable/$(DEPDIR)/$(am__dirstamp) +variable/increment_cuda.$(OBJEXT): variable/$(am__dirstamp) \ + variable/$(DEPDIR)/$(am__dirstamp) +variable/increment_hip.$(OBJEXT): variable/$(am__dirstamp) \ + variable/$(DEPDIR)/$(am__dirstamp) +variable/increment_opencl.$(OBJEXT): variable/$(am__dirstamp) \ + variable/$(DEPDIR)/$(am__dirstamp) + +datawizard/acquire_release$(EXEEXT): $(datawizard_acquire_release_OBJECTS) $(datawizard_acquire_release_DEPENDENCIES) $(EXTRA_datawizard_acquire_release_DEPENDENCIES) datawizard/$(am__dirstamp) + @rm -f datawizard/acquire_release$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(datawizard_acquire_release_OBJECTS) $(datawizard_acquire_release_LDADD) $(LIBS) +datawizard/acquire_release2.$(OBJEXT): datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) + +datawizard/acquire_release2$(EXEEXT): $(datawizard_acquire_release2_OBJECTS) $(datawizard_acquire_release2_DEPENDENCIES) $(EXTRA_datawizard_acquire_release2_DEPENDENCIES) datawizard/$(am__dirstamp) + @rm -f datawizard/acquire_release2$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(datawizard_acquire_release2_OBJECTS) $(datawizard_acquire_release2_LDADD) $(LIBS) +datawizard/acquire_release_to.$(OBJEXT): datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) + +datawizard/acquire_release_to$(EXEEXT): $(datawizard_acquire_release_to_OBJECTS) $(datawizard_acquire_release_to_DEPENDENCIES) $(EXTRA_datawizard_acquire_release_to_DEPENDENCIES) datawizard/$(am__dirstamp) + @rm -f datawizard/acquire_release_to$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(datawizard_acquire_release_to_OBJECTS) $(datawizard_acquire_release_to_LDADD) $(LIBS) +datawizard/acquire_try.$(OBJEXT): datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) + +datawizard/acquire_try$(EXEEXT): $(datawizard_acquire_try_OBJECTS) $(datawizard_acquire_try_DEPENDENCIES) $(EXTRA_datawizard_acquire_try_DEPENDENCIES) datawizard/$(am__dirstamp) + @rm -f datawizard/acquire_try$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(datawizard_acquire_try_OBJECTS) $(datawizard_acquire_try_LDADD) $(LIBS) +datawizard/allocate.$(OBJEXT): datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) + +datawizard/allocate$(EXEEXT): $(datawizard_allocate_OBJECTS) $(datawizard_allocate_DEPENDENCIES) $(EXTRA_datawizard_allocate_DEPENDENCIES) datawizard/$(am__dirstamp) + @rm -f datawizard/allocate$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(datawizard_allocate_OBJECTS) $(datawizard_allocate_LDADD) $(LIBS) +datawizard/allocate_many_numa_nodes.$(OBJEXT): \ + datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) + +datawizard/allocate_many_numa_nodes$(EXEEXT): $(datawizard_allocate_many_numa_nodes_OBJECTS) $(datawizard_allocate_many_numa_nodes_DEPENDENCIES) $(EXTRA_datawizard_allocate_many_numa_nodes_DEPENDENCIES) datawizard/$(am__dirstamp) + @rm -f datawizard/allocate_many_numa_nodes$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(datawizard_allocate_many_numa_nodes_OBJECTS) $(datawizard_allocate_many_numa_nodes_LDADD) $(LIBS) +datawizard/bcsr.$(OBJEXT): datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) + +datawizard/bcsr$(EXEEXT): $(datawizard_bcsr_OBJECTS) $(datawizard_bcsr_DEPENDENCIES) $(EXTRA_datawizard_bcsr_DEPENDENCIES) datawizard/$(am__dirstamp) + @rm -f datawizard/bcsr$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(datawizard_bcsr_OBJECTS) $(datawizard_bcsr_LDADD) $(LIBS) +datawizard/cache.$(OBJEXT): datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) + +datawizard/cache$(EXEEXT): $(datawizard_cache_OBJECTS) $(datawizard_cache_DEPENDENCIES) $(EXTRA_datawizard_cache_DEPENDENCIES) datawizard/$(am__dirstamp) + @rm -f datawizard/cache$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(datawizard_cache_OBJECTS) $(datawizard_cache_LDADD) $(LIBS) +datawizard/commute.$(OBJEXT): datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) + +datawizard/commute$(EXEEXT): $(datawizard_commute_OBJECTS) $(datawizard_commute_DEPENDENCIES) $(EXTRA_datawizard_commute_DEPENDENCIES) datawizard/$(am__dirstamp) + @rm -f datawizard/commute$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(datawizard_commute_OBJECTS) $(datawizard_commute_LDADD) $(LIBS) +datawizard/commute2.$(OBJEXT): datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) + +datawizard/commute2$(EXEEXT): $(datawizard_commute2_OBJECTS) $(datawizard_commute2_DEPENDENCIES) $(EXTRA_datawizard_commute2_DEPENDENCIES) datawizard/$(am__dirstamp) + @rm -f datawizard/commute2$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(datawizard_commute2_OBJECTS) $(datawizard_commute2_LDADD) $(LIBS) +datawizard/copy.$(OBJEXT): datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) + +datawizard/copy$(EXEEXT): $(datawizard_copy_OBJECTS) $(datawizard_copy_DEPENDENCIES) $(EXTRA_datawizard_copy_DEPENDENCIES) datawizard/$(am__dirstamp) + @rm -f datawizard/copy$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(datawizard_copy_OBJECTS) $(datawizard_copy_LDADD) $(LIBS) +datawizard/critical_section_with_void_interface.$(OBJEXT): \ + datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) + +datawizard/critical_section_with_void_interface$(EXEEXT): $(datawizard_critical_section_with_void_interface_OBJECTS) $(datawizard_critical_section_with_void_interface_DEPENDENCIES) $(EXTRA_datawizard_critical_section_with_void_interface_DEPENDENCIES) datawizard/$(am__dirstamp) + @rm -f datawizard/critical_section_with_void_interface$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(datawizard_critical_section_with_void_interface_OBJECTS) $(datawizard_critical_section_with_void_interface_LDADD) $(LIBS) +datawizard/data_deinitialize.$(OBJEXT): datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) + +datawizard/data_deinitialize$(EXEEXT): $(datawizard_data_deinitialize_OBJECTS) $(datawizard_data_deinitialize_DEPENDENCIES) $(EXTRA_datawizard_data_deinitialize_DEPENDENCIES) datawizard/$(am__dirstamp) + @rm -f datawizard/data_deinitialize$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(datawizard_data_deinitialize_OBJECTS) $(datawizard_data_deinitialize_LDADD) $(LIBS) +datawizard/data_implicit_deps.$(OBJEXT): datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) + +datawizard/data_implicit_deps$(EXEEXT): $(datawizard_data_implicit_deps_OBJECTS) $(datawizard_data_implicit_deps_DEPENDENCIES) $(EXTRA_datawizard_data_implicit_deps_DEPENDENCIES) datawizard/$(am__dirstamp) + @rm -f datawizard/data_implicit_deps$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(datawizard_data_implicit_deps_OBJECTS) $(datawizard_data_implicit_deps_LDADD) $(LIBS) +datawizard/data_invalidation.$(OBJEXT): datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) + +datawizard/data_invalidation$(EXEEXT): $(datawizard_data_invalidation_OBJECTS) $(datawizard_data_invalidation_DEPENDENCIES) $(EXTRA_datawizard_data_invalidation_DEPENDENCIES) datawizard/$(am__dirstamp) + @rm -f datawizard/data_invalidation$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(datawizard_data_invalidation_OBJECTS) $(datawizard_data_invalidation_LDADD) $(LIBS) +datawizard/data_register-data_register.$(OBJEXT): \ + datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) + +datawizard/data_register$(EXEEXT): $(datawizard_data_register_OBJECTS) $(datawizard_data_register_DEPENDENCIES) $(EXTRA_datawizard_data_register_DEPENDENCIES) datawizard/$(am__dirstamp) + @rm -f datawizard/data_register$(EXEEXT) + $(AM_V_CCLD)$(datawizard_data_register_LINK) $(datawizard_data_register_OBJECTS) $(datawizard_data_register_LDADD) $(LIBS) +datawizard/deinitialize_pending_requests.$(OBJEXT): \ + datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) + +datawizard/deinitialize_pending_requests$(EXEEXT): $(datawizard_deinitialize_pending_requests_OBJECTS) $(datawizard_deinitialize_pending_requests_DEPENDENCIES) $(EXTRA_datawizard_deinitialize_pending_requests_DEPENDENCIES) datawizard/$(am__dirstamp) + @rm -f datawizard/deinitialize_pending_requests$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(datawizard_deinitialize_pending_requests_OBJECTS) $(datawizard_deinitialize_pending_requests_LDADD) $(LIBS) +datawizard/deps.$(OBJEXT): datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) + +datawizard/deps$(EXEEXT): $(datawizard_deps_OBJECTS) $(datawizard_deps_DEPENDENCIES) $(EXTRA_datawizard_deps_DEPENDENCIES) datawizard/$(am__dirstamp) + @rm -f datawizard/deps$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(datawizard_deps_OBJECTS) $(datawizard_deps_LDADD) $(LIBS) +datawizard/dining_philosophers.$(OBJEXT): datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) + +datawizard/dining_philosophers$(EXEEXT): $(datawizard_dining_philosophers_OBJECTS) $(datawizard_dining_philosophers_DEPENDENCIES) $(EXTRA_datawizard_dining_philosophers_DEPENDENCIES) datawizard/$(am__dirstamp) + @rm -f datawizard/dining_philosophers$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(datawizard_dining_philosophers_OBJECTS) $(datawizard_dining_philosophers_LDADD) $(LIBS) +datawizard/double_parameter.$(OBJEXT): datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) + +datawizard/double_parameter$(EXEEXT): $(datawizard_double_parameter_OBJECTS) $(datawizard_double_parameter_DEPENDENCIES) $(EXTRA_datawizard_double_parameter_DEPENDENCIES) datawizard/$(am__dirstamp) + @rm -f datawizard/double_parameter$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(datawizard_double_parameter_OBJECTS) $(datawizard_double_parameter_LDADD) $(LIBS) +datawizard/dsm_stress.$(OBJEXT): datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) + +datawizard/dsm_stress$(EXEEXT): $(datawizard_dsm_stress_OBJECTS) $(datawizard_dsm_stress_DEPENDENCIES) $(EXTRA_datawizard_dsm_stress_DEPENDENCIES) datawizard/$(am__dirstamp) + @rm -f datawizard/dsm_stress$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(datawizard_dsm_stress_OBJECTS) $(datawizard_dsm_stress_LDADD) $(LIBS) +datawizard/gpu_ptr_register.$(OBJEXT): datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) +datawizard/scal.$(OBJEXT): datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) +datawizard/scal_cuda.$(OBJEXT): datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) + +datawizard/gpu_ptr_register$(EXEEXT): $(datawizard_gpu_ptr_register_OBJECTS) $(datawizard_gpu_ptr_register_DEPENDENCIES) $(EXTRA_datawizard_gpu_ptr_register_DEPENDENCIES) datawizard/$(am__dirstamp) + @rm -f datawizard/gpu_ptr_register$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(datawizard_gpu_ptr_register_OBJECTS) $(datawizard_gpu_ptr_register_LDADD) $(LIBS) +datawizard/gpu_register.$(OBJEXT): datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) + +datawizard/gpu_register$(EXEEXT): $(datawizard_gpu_register_OBJECTS) $(datawizard_gpu_register_DEPENDENCIES) $(EXTRA_datawizard_gpu_register_DEPENDENCIES) datawizard/$(am__dirstamp) + @rm -f datawizard/gpu_register$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(datawizard_gpu_register_OBJECTS) $(datawizard_gpu_register_LDADD) $(LIBS) +datawizard/handle_to_pointer.$(OBJEXT): datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) + +datawizard/handle_to_pointer$(EXEEXT): $(datawizard_handle_to_pointer_OBJECTS) $(datawizard_handle_to_pointer_DEPENDENCIES) $(EXTRA_datawizard_handle_to_pointer_DEPENDENCIES) datawizard/$(am__dirstamp) + @rm -f datawizard/handle_to_pointer$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(datawizard_handle_to_pointer_OBJECTS) $(datawizard_handle_to_pointer_LDADD) $(LIBS) +datawizard/in_place_partition.$(OBJEXT): datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) + +datawizard/in_place_partition$(EXEEXT): $(datawizard_in_place_partition_OBJECTS) $(datawizard_in_place_partition_DEPENDENCIES) $(EXTRA_datawizard_in_place_partition_DEPENDENCIES) datawizard/$(am__dirstamp) + @rm -f datawizard/in_place_partition$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(datawizard_in_place_partition_OBJECTS) $(datawizard_in_place_partition_LDADD) $(LIBS) +datawizard/increment_init.$(OBJEXT): datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) + +datawizard/increment_init$(EXEEXT): $(datawizard_increment_init_OBJECTS) $(datawizard_increment_init_DEPENDENCIES) $(EXTRA_datawizard_increment_init_DEPENDENCIES) datawizard/$(am__dirstamp) + @rm -f datawizard/increment_init$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(datawizard_increment_init_OBJECTS) $(datawizard_increment_init_LDADD) $(LIBS) +datawizard/increment_redux.$(OBJEXT): datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) + +datawizard/increment_redux$(EXEEXT): $(datawizard_increment_redux_OBJECTS) $(datawizard_increment_redux_DEPENDENCIES) $(EXTRA_datawizard_increment_redux_DEPENDENCIES) datawizard/$(am__dirstamp) + @rm -f datawizard/increment_redux$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(datawizard_increment_redux_OBJECTS) $(datawizard_increment_redux_LDADD) $(LIBS) +datawizard/increment_redux_lazy.$(OBJEXT): datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) + +datawizard/increment_redux_lazy$(EXEEXT): $(datawizard_increment_redux_lazy_OBJECTS) $(datawizard_increment_redux_lazy_DEPENDENCIES) $(EXTRA_datawizard_increment_redux_lazy_DEPENDENCIES) datawizard/$(am__dirstamp) + @rm -f datawizard/increment_redux_lazy$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(datawizard_increment_redux_lazy_OBJECTS) $(datawizard_increment_redux_lazy_LDADD) $(LIBS) +datawizard/increment_redux_partition.$(OBJEXT): \ + datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) + +datawizard/increment_redux_partition$(EXEEXT): $(datawizard_increment_redux_partition_OBJECTS) $(datawizard_increment_redux_partition_DEPENDENCIES) $(EXTRA_datawizard_increment_redux_partition_DEPENDENCIES) datawizard/$(am__dirstamp) + @rm -f datawizard/increment_redux_partition$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(datawizard_increment_redux_partition_OBJECTS) $(datawizard_increment_redux_partition_LDADD) $(LIBS) +datawizard/increment_redux_v2.$(OBJEXT): datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) + +datawizard/increment_redux_v2$(EXEEXT): $(datawizard_increment_redux_v2_OBJECTS) $(datawizard_increment_redux_v2_DEPENDENCIES) $(EXTRA_datawizard_increment_redux_v2_DEPENDENCIES) datawizard/$(am__dirstamp) + @rm -f datawizard/increment_redux_v2$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(datawizard_increment_redux_v2_OBJECTS) $(datawizard_increment_redux_v2_LDADD) $(LIBS) +datawizard/increment_redux_with_args.$(OBJEXT): \ + datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) + +datawizard/increment_redux_with_args$(EXEEXT): $(datawizard_increment_redux_with_args_OBJECTS) $(datawizard_increment_redux_with_args_DEPENDENCIES) $(EXTRA_datawizard_increment_redux_with_args_DEPENDENCIES) datawizard/$(am__dirstamp) + @rm -f datawizard/increment_redux_with_args$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(datawizard_increment_redux_with_args_OBJECTS) $(datawizard_increment_redux_with_args_LDADD) $(LIBS) +datawizard/interfaces/$(am__dirstamp): + @$(MKDIR_P) datawizard/interfaces + @: > datawizard/interfaces/$(am__dirstamp) +datawizard/interfaces/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) datawizard/interfaces/$(DEPDIR) + @: > datawizard/interfaces/$(DEPDIR)/$(am__dirstamp) +datawizard/interfaces/bcsr_bcsr_interface-test_interfaces.$(OBJEXT): \ + datawizard/interfaces/$(am__dirstamp) \ + datawizard/interfaces/$(DEPDIR)/$(am__dirstamp) +datawizard/interfaces/bcsr/$(am__dirstamp): + @$(MKDIR_P) datawizard/interfaces/bcsr + @: > datawizard/interfaces/bcsr/$(am__dirstamp) +datawizard/interfaces/bcsr/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) datawizard/interfaces/bcsr/$(DEPDIR) + @: > datawizard/interfaces/bcsr/$(DEPDIR)/$(am__dirstamp) +datawizard/interfaces/bcsr/bcsr_interface-bcsr_interface.$(OBJEXT): \ + datawizard/interfaces/bcsr/$(am__dirstamp) \ + datawizard/interfaces/bcsr/$(DEPDIR)/$(am__dirstamp) +datawizard/interfaces/bcsr/bcsr_cuda.$(OBJEXT): \ + datawizard/interfaces/bcsr/$(am__dirstamp) \ + datawizard/interfaces/bcsr/$(DEPDIR)/$(am__dirstamp) +datawizard/interfaces/bcsr/bcsr_interface-bcsr_opencl.$(OBJEXT): \ + datawizard/interfaces/bcsr/$(am__dirstamp) \ + datawizard/interfaces/bcsr/$(DEPDIR)/$(am__dirstamp) + +datawizard/interfaces/bcsr/bcsr_interface$(EXEEXT): $(datawizard_interfaces_bcsr_bcsr_interface_OBJECTS) $(datawizard_interfaces_bcsr_bcsr_interface_DEPENDENCIES) $(EXTRA_datawizard_interfaces_bcsr_bcsr_interface_DEPENDENCIES) datawizard/interfaces/bcsr/$(am__dirstamp) + @rm -f datawizard/interfaces/bcsr/bcsr_interface$(EXEEXT) + $(AM_V_CCLD)$(datawizard_interfaces_bcsr_bcsr_interface_LINK) $(datawizard_interfaces_bcsr_bcsr_interface_OBJECTS) $(datawizard_interfaces_bcsr_bcsr_interface_LDADD) $(LIBS) +datawizard/interfaces/block_block_interface-test_interfaces.$(OBJEXT): \ + datawizard/interfaces/$(am__dirstamp) \ + datawizard/interfaces/$(DEPDIR)/$(am__dirstamp) +datawizard/interfaces/block/$(am__dirstamp): + @$(MKDIR_P) datawizard/interfaces/block + @: > datawizard/interfaces/block/$(am__dirstamp) +datawizard/interfaces/block/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) datawizard/interfaces/block/$(DEPDIR) + @: > datawizard/interfaces/block/$(DEPDIR)/$(am__dirstamp) +datawizard/interfaces/block/block_interface-block_interface.$(OBJEXT): \ + datawizard/interfaces/block/$(am__dirstamp) \ + datawizard/interfaces/block/$(DEPDIR)/$(am__dirstamp) +datawizard/interfaces/block/block_cuda.$(OBJEXT): \ + datawizard/interfaces/block/$(am__dirstamp) \ + datawizard/interfaces/block/$(DEPDIR)/$(am__dirstamp) +datawizard/interfaces/block/block_interface-block_opencl.$(OBJEXT): \ + datawizard/interfaces/block/$(am__dirstamp) \ + datawizard/interfaces/block/$(DEPDIR)/$(am__dirstamp) + +datawizard/interfaces/block/block_interface$(EXEEXT): $(datawizard_interfaces_block_block_interface_OBJECTS) $(datawizard_interfaces_block_block_interface_DEPENDENCIES) $(EXTRA_datawizard_interfaces_block_block_interface_DEPENDENCIES) datawizard/interfaces/block/$(am__dirstamp) + @rm -f datawizard/interfaces/block/block_interface$(EXEEXT) + $(AM_V_CCLD)$(datawizard_interfaces_block_block_interface_LINK) $(datawizard_interfaces_block_block_interface_OBJECTS) $(datawizard_interfaces_block_block_interface_LDADD) $(LIBS) +datawizard/interfaces/coo_coo_interface-test_interfaces.$(OBJEXT): \ + datawizard/interfaces/$(am__dirstamp) \ + datawizard/interfaces/$(DEPDIR)/$(am__dirstamp) +datawizard/interfaces/coo/$(am__dirstamp): + @$(MKDIR_P) datawizard/interfaces/coo + @: > datawizard/interfaces/coo/$(am__dirstamp) +datawizard/interfaces/coo/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) datawizard/interfaces/coo/$(DEPDIR) + @: > datawizard/interfaces/coo/$(DEPDIR)/$(am__dirstamp) +datawizard/interfaces/coo/coo_interface-coo_interface.$(OBJEXT): \ + datawizard/interfaces/coo/$(am__dirstamp) \ + datawizard/interfaces/coo/$(DEPDIR)/$(am__dirstamp) +datawizard/interfaces/coo/coo_cuda.$(OBJEXT): \ + datawizard/interfaces/coo/$(am__dirstamp) \ + datawizard/interfaces/coo/$(DEPDIR)/$(am__dirstamp) +datawizard/interfaces/coo/coo_interface-coo_opencl.$(OBJEXT): \ + datawizard/interfaces/coo/$(am__dirstamp) \ + datawizard/interfaces/coo/$(DEPDIR)/$(am__dirstamp) + +datawizard/interfaces/coo/coo_interface$(EXEEXT): $(datawizard_interfaces_coo_coo_interface_OBJECTS) $(datawizard_interfaces_coo_coo_interface_DEPENDENCIES) $(EXTRA_datawizard_interfaces_coo_coo_interface_DEPENDENCIES) datawizard/interfaces/coo/$(am__dirstamp) + @rm -f datawizard/interfaces/coo/coo_interface$(EXEEXT) + $(AM_V_CCLD)$(datawizard_interfaces_coo_coo_interface_LINK) $(datawizard_interfaces_coo_coo_interface_OBJECTS) $(datawizard_interfaces_coo_coo_interface_LDADD) $(LIBS) +datawizard/interfaces/copy_interfaces-copy_interfaces.$(OBJEXT): \ + datawizard/interfaces/$(am__dirstamp) \ + datawizard/interfaces/$(DEPDIR)/$(am__dirstamp) + +datawizard/interfaces/copy_interfaces$(EXEEXT): $(datawizard_interfaces_copy_interfaces_OBJECTS) $(datawizard_interfaces_copy_interfaces_DEPENDENCIES) $(EXTRA_datawizard_interfaces_copy_interfaces_DEPENDENCIES) datawizard/interfaces/$(am__dirstamp) + @rm -f datawizard/interfaces/copy_interfaces$(EXEEXT) + $(AM_V_CCLD)$(datawizard_interfaces_copy_interfaces_LINK) $(datawizard_interfaces_copy_interfaces_OBJECTS) $(datawizard_interfaces_copy_interfaces_LDADD) $(LIBS) +datawizard/interfaces/csr_csr_interface-test_interfaces.$(OBJEXT): \ + datawizard/interfaces/$(am__dirstamp) \ + datawizard/interfaces/$(DEPDIR)/$(am__dirstamp) +datawizard/interfaces/csr/$(am__dirstamp): + @$(MKDIR_P) datawizard/interfaces/csr + @: > datawizard/interfaces/csr/$(am__dirstamp) +datawizard/interfaces/csr/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) datawizard/interfaces/csr/$(DEPDIR) + @: > datawizard/interfaces/csr/$(DEPDIR)/$(am__dirstamp) +datawizard/interfaces/csr/csr_interface-csr_interface.$(OBJEXT): \ + datawizard/interfaces/csr/$(am__dirstamp) \ + datawizard/interfaces/csr/$(DEPDIR)/$(am__dirstamp) +datawizard/interfaces/csr/csr_cuda.$(OBJEXT): \ + datawizard/interfaces/csr/$(am__dirstamp) \ + datawizard/interfaces/csr/$(DEPDIR)/$(am__dirstamp) +datawizard/interfaces/csr/csr_interface-csr_opencl.$(OBJEXT): \ + datawizard/interfaces/csr/$(am__dirstamp) \ + datawizard/interfaces/csr/$(DEPDIR)/$(am__dirstamp) + +datawizard/interfaces/csr/csr_interface$(EXEEXT): $(datawizard_interfaces_csr_csr_interface_OBJECTS) $(datawizard_interfaces_csr_csr_interface_DEPENDENCIES) $(EXTRA_datawizard_interfaces_csr_csr_interface_DEPENDENCIES) datawizard/interfaces/csr/$(am__dirstamp) + @rm -f datawizard/interfaces/csr/csr_interface$(EXEEXT) + $(AM_V_CCLD)$(datawizard_interfaces_csr_csr_interface_LINK) $(datawizard_interfaces_csr_csr_interface_OBJECTS) $(datawizard_interfaces_csr_csr_interface_LDADD) $(LIBS) +datawizard/interfaces/matrix_matrix_interface-test_interfaces.$(OBJEXT): \ + datawizard/interfaces/$(am__dirstamp) \ + datawizard/interfaces/$(DEPDIR)/$(am__dirstamp) +datawizard/interfaces/matrix/$(am__dirstamp): + @$(MKDIR_P) datawizard/interfaces/matrix + @: > datawizard/interfaces/matrix/$(am__dirstamp) +datawizard/interfaces/matrix/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) datawizard/interfaces/matrix/$(DEPDIR) + @: > datawizard/interfaces/matrix/$(DEPDIR)/$(am__dirstamp) +datawizard/interfaces/matrix/matrix_interface-matrix_interface.$(OBJEXT): \ + datawizard/interfaces/matrix/$(am__dirstamp) \ + datawizard/interfaces/matrix/$(DEPDIR)/$(am__dirstamp) +datawizard/interfaces/matrix/matrix_cuda.$(OBJEXT): \ + datawizard/interfaces/matrix/$(am__dirstamp) \ + datawizard/interfaces/matrix/$(DEPDIR)/$(am__dirstamp) +datawizard/interfaces/matrix/matrix_interface-matrix_opencl.$(OBJEXT): \ + datawizard/interfaces/matrix/$(am__dirstamp) \ + datawizard/interfaces/matrix/$(DEPDIR)/$(am__dirstamp) + +datawizard/interfaces/matrix/matrix_interface$(EXEEXT): $(datawizard_interfaces_matrix_matrix_interface_OBJECTS) $(datawizard_interfaces_matrix_matrix_interface_DEPENDENCIES) $(EXTRA_datawizard_interfaces_matrix_matrix_interface_DEPENDENCIES) datawizard/interfaces/matrix/$(am__dirstamp) + @rm -f datawizard/interfaces/matrix/matrix_interface$(EXEEXT) + $(AM_V_CCLD)$(datawizard_interfaces_matrix_matrix_interface_LINK) $(datawizard_interfaces_matrix_matrix_interface_OBJECTS) $(datawizard_interfaces_matrix_matrix_interface_LDADD) $(LIBS) +datawizard/interfaces/multiformat/advanced/$(am__dirstamp): + @$(MKDIR_P) datawizard/interfaces/multiformat/advanced + @: > datawizard/interfaces/multiformat/advanced/$(am__dirstamp) +datawizard/interfaces/multiformat/advanced/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) datawizard/interfaces/multiformat/advanced/$(DEPDIR) + @: > datawizard/interfaces/multiformat/advanced/$(DEPDIR)/$(am__dirstamp) +datawizard/interfaces/multiformat/advanced/generic.$(OBJEXT): \ + datawizard/interfaces/multiformat/advanced/$(am__dirstamp) \ + datawizard/interfaces/multiformat/advanced/$(DEPDIR)/$(am__dirstamp) +datawizard/interfaces/multiformat/advanced/multiformat_cuda_opencl.$(OBJEXT): \ + datawizard/interfaces/multiformat/advanced/$(am__dirstamp) \ + datawizard/interfaces/multiformat/advanced/$(DEPDIR)/$(am__dirstamp) + +datawizard/interfaces/multiformat/advanced/multiformat_cuda_opencl$(EXEEXT): $(datawizard_interfaces_multiformat_advanced_multiformat_cuda_opencl_OBJECTS) $(datawizard_interfaces_multiformat_advanced_multiformat_cuda_opencl_DEPENDENCIES) $(EXTRA_datawizard_interfaces_multiformat_advanced_multiformat_cuda_opencl_DEPENDENCIES) datawizard/interfaces/multiformat/advanced/$(am__dirstamp) + @rm -f datawizard/interfaces/multiformat/advanced/multiformat_cuda_opencl$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(datawizard_interfaces_multiformat_advanced_multiformat_cuda_opencl_OBJECTS) $(datawizard_interfaces_multiformat_advanced_multiformat_cuda_opencl_LDADD) $(LIBS) +datawizard/interfaces/multiformat/advanced/multiformat_data_release.$(OBJEXT): \ + datawizard/interfaces/multiformat/advanced/$(am__dirstamp) \ + datawizard/interfaces/multiformat/advanced/$(DEPDIR)/$(am__dirstamp) + +datawizard/interfaces/multiformat/advanced/multiformat_data_release$(EXEEXT): $(datawizard_interfaces_multiformat_advanced_multiformat_data_release_OBJECTS) $(datawizard_interfaces_multiformat_advanced_multiformat_data_release_DEPENDENCIES) $(EXTRA_datawizard_interfaces_multiformat_advanced_multiformat_data_release_DEPENDENCIES) datawizard/interfaces/multiformat/advanced/$(am__dirstamp) + @rm -f datawizard/interfaces/multiformat/advanced/multiformat_data_release$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(datawizard_interfaces_multiformat_advanced_multiformat_data_release_OBJECTS) $(datawizard_interfaces_multiformat_advanced_multiformat_data_release_LDADD) $(LIBS) +datawizard/interfaces/multiformat/advanced/multiformat_handle_conversion.$(OBJEXT): \ + datawizard/interfaces/multiformat/advanced/$(am__dirstamp) \ + datawizard/interfaces/multiformat/advanced/$(DEPDIR)/$(am__dirstamp) + +datawizard/interfaces/multiformat/advanced/multiformat_handle_conversion$(EXEEXT): $(datawizard_interfaces_multiformat_advanced_multiformat_handle_conversion_OBJECTS) $(datawizard_interfaces_multiformat_advanced_multiformat_handle_conversion_DEPENDENCIES) $(EXTRA_datawizard_interfaces_multiformat_advanced_multiformat_handle_conversion_DEPENDENCIES) datawizard/interfaces/multiformat/advanced/$(am__dirstamp) + @rm -f datawizard/interfaces/multiformat/advanced/multiformat_handle_conversion$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(datawizard_interfaces_multiformat_advanced_multiformat_handle_conversion_OBJECTS) $(datawizard_interfaces_multiformat_advanced_multiformat_handle_conversion_LDADD) $(LIBS) +datawizard/interfaces/multiformat/advanced/multiformat_worker.$(OBJEXT): \ + datawizard/interfaces/multiformat/advanced/$(am__dirstamp) \ + datawizard/interfaces/multiformat/advanced/$(DEPDIR)/$(am__dirstamp) + +datawizard/interfaces/multiformat/advanced/multiformat_worker$(EXEEXT): $(datawizard_interfaces_multiformat_advanced_multiformat_worker_OBJECTS) $(datawizard_interfaces_multiformat_advanced_multiformat_worker_DEPENDENCIES) $(EXTRA_datawizard_interfaces_multiformat_advanced_multiformat_worker_DEPENDENCIES) datawizard/interfaces/multiformat/advanced/$(am__dirstamp) + @rm -f datawizard/interfaces/multiformat/advanced/multiformat_worker$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(datawizard_interfaces_multiformat_advanced_multiformat_worker_OBJECTS) $(datawizard_interfaces_multiformat_advanced_multiformat_worker_LDADD) $(LIBS) +datawizard/interfaces/multiformat/advanced/same_handle.$(OBJEXT): \ + datawizard/interfaces/multiformat/advanced/$(am__dirstamp) \ + datawizard/interfaces/multiformat/advanced/$(DEPDIR)/$(am__dirstamp) + +datawizard/interfaces/multiformat/advanced/same_handle$(EXEEXT): $(datawizard_interfaces_multiformat_advanced_same_handle_OBJECTS) $(datawizard_interfaces_multiformat_advanced_same_handle_DEPENDENCIES) $(EXTRA_datawizard_interfaces_multiformat_advanced_same_handle_DEPENDENCIES) datawizard/interfaces/multiformat/advanced/$(am__dirstamp) + @rm -f datawizard/interfaces/multiformat/advanced/same_handle$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(datawizard_interfaces_multiformat_advanced_same_handle_OBJECTS) $(datawizard_interfaces_multiformat_advanced_same_handle_LDADD) $(LIBS) +datawizard/interfaces/multiformat_multiformat_interface-test_interfaces.$(OBJEXT): \ + datawizard/interfaces/$(am__dirstamp) \ + datawizard/interfaces/$(DEPDIR)/$(am__dirstamp) +datawizard/interfaces/multiformat/$(am__dirstamp): + @$(MKDIR_P) datawizard/interfaces/multiformat + @: > datawizard/interfaces/multiformat/$(am__dirstamp) +datawizard/interfaces/multiformat/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) datawizard/interfaces/multiformat/$(DEPDIR) + @: > datawizard/interfaces/multiformat/$(DEPDIR)/$(am__dirstamp) +datawizard/interfaces/multiformat/multiformat_interface-multiformat_interface.$(OBJEXT): \ + datawizard/interfaces/multiformat/$(am__dirstamp) \ + datawizard/interfaces/multiformat/$(DEPDIR)/$(am__dirstamp) +datawizard/interfaces/multiformat/multiformat_interface-multiformat_conversion_codelets.$(OBJEXT): \ + datawizard/interfaces/multiformat/$(am__dirstamp) \ + datawizard/interfaces/multiformat/$(DEPDIR)/$(am__dirstamp) +datawizard/interfaces/multiformat/multiformat_cuda.$(OBJEXT): \ + datawizard/interfaces/multiformat/$(am__dirstamp) \ + datawizard/interfaces/multiformat/$(DEPDIR)/$(am__dirstamp) +datawizard/interfaces/multiformat/multiformat_conversion_codelets_cuda.$(OBJEXT): \ + datawizard/interfaces/multiformat/$(am__dirstamp) \ + datawizard/interfaces/multiformat/$(DEPDIR)/$(am__dirstamp) +datawizard/interfaces/multiformat/multiformat_interface-multiformat_opencl.$(OBJEXT): \ + datawizard/interfaces/multiformat/$(am__dirstamp) \ + datawizard/interfaces/multiformat/$(DEPDIR)/$(am__dirstamp) +datawizard/interfaces/multiformat/multiformat_interface-multiformat_conversion_codelets_opencl.$(OBJEXT): \ + datawizard/interfaces/multiformat/$(am__dirstamp) \ + datawizard/interfaces/multiformat/$(DEPDIR)/$(am__dirstamp) + +datawizard/interfaces/multiformat/multiformat_interface$(EXEEXT): $(datawizard_interfaces_multiformat_multiformat_interface_OBJECTS) $(datawizard_interfaces_multiformat_multiformat_interface_DEPENDENCIES) $(EXTRA_datawizard_interfaces_multiformat_multiformat_interface_DEPENDENCIES) datawizard/interfaces/multiformat/$(am__dirstamp) + @rm -f datawizard/interfaces/multiformat/multiformat_interface$(EXEEXT) + $(AM_V_CCLD)$(datawizard_interfaces_multiformat_multiformat_interface_LINK) $(datawizard_interfaces_multiformat_multiformat_interface_OBJECTS) $(datawizard_interfaces_multiformat_multiformat_interface_LDADD) $(LIBS) +datawizard/interfaces/ndim_ndim_interface-test_interfaces.$(OBJEXT): \ + datawizard/interfaces/$(am__dirstamp) \ + datawizard/interfaces/$(DEPDIR)/$(am__dirstamp) +datawizard/interfaces/ndim/$(am__dirstamp): + @$(MKDIR_P) datawizard/interfaces/ndim + @: > datawizard/interfaces/ndim/$(am__dirstamp) +datawizard/interfaces/ndim/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) datawizard/interfaces/ndim/$(DEPDIR) + @: > datawizard/interfaces/ndim/$(DEPDIR)/$(am__dirstamp) +datawizard/interfaces/ndim/ndim_interface-ndim_interface.$(OBJEXT): \ + datawizard/interfaces/ndim/$(am__dirstamp) \ + datawizard/interfaces/ndim/$(DEPDIR)/$(am__dirstamp) +datawizard/interfaces/ndim/ndim_cuda.$(OBJEXT): \ + datawizard/interfaces/ndim/$(am__dirstamp) \ + datawizard/interfaces/ndim/$(DEPDIR)/$(am__dirstamp) +datawizard/interfaces/ndim/ndim_interface-ndim_opencl.$(OBJEXT): \ + datawizard/interfaces/ndim/$(am__dirstamp) \ + datawizard/interfaces/ndim/$(DEPDIR)/$(am__dirstamp) + +datawizard/interfaces/ndim/ndim_interface$(EXEEXT): $(datawizard_interfaces_ndim_ndim_interface_OBJECTS) $(datawizard_interfaces_ndim_ndim_interface_DEPENDENCIES) $(EXTRA_datawizard_interfaces_ndim_ndim_interface_DEPENDENCIES) datawizard/interfaces/ndim/$(am__dirstamp) + @rm -f datawizard/interfaces/ndim/ndim_interface$(EXEEXT) + $(AM_V_CCLD)$(datawizard_interfaces_ndim_ndim_interface_LINK) $(datawizard_interfaces_ndim_ndim_interface_OBJECTS) $(datawizard_interfaces_ndim_ndim_interface_LDADD) $(LIBS) +datawizard/interfaces/tensor_tensor_interface-test_interfaces.$(OBJEXT): \ + datawizard/interfaces/$(am__dirstamp) \ + datawizard/interfaces/$(DEPDIR)/$(am__dirstamp) +datawizard/interfaces/tensor/$(am__dirstamp): + @$(MKDIR_P) datawizard/interfaces/tensor + @: > datawizard/interfaces/tensor/$(am__dirstamp) +datawizard/interfaces/tensor/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) datawizard/interfaces/tensor/$(DEPDIR) + @: > datawizard/interfaces/tensor/$(DEPDIR)/$(am__dirstamp) +datawizard/interfaces/tensor/tensor_interface-tensor_interface.$(OBJEXT): \ + datawizard/interfaces/tensor/$(am__dirstamp) \ + datawizard/interfaces/tensor/$(DEPDIR)/$(am__dirstamp) +datawizard/interfaces/tensor/tensor_cuda.$(OBJEXT): \ + datawizard/interfaces/tensor/$(am__dirstamp) \ + datawizard/interfaces/tensor/$(DEPDIR)/$(am__dirstamp) +datawizard/interfaces/tensor/tensor_interface-tensor_opencl.$(OBJEXT): \ + datawizard/interfaces/tensor/$(am__dirstamp) \ + datawizard/interfaces/tensor/$(DEPDIR)/$(am__dirstamp) + +datawizard/interfaces/tensor/tensor_interface$(EXEEXT): $(datawizard_interfaces_tensor_tensor_interface_OBJECTS) $(datawizard_interfaces_tensor_tensor_interface_DEPENDENCIES) $(EXTRA_datawizard_interfaces_tensor_tensor_interface_DEPENDENCIES) datawizard/interfaces/tensor/$(am__dirstamp) + @rm -f datawizard/interfaces/tensor/tensor_interface$(EXEEXT) + $(AM_V_CCLD)$(datawizard_interfaces_tensor_tensor_interface_LINK) $(datawizard_interfaces_tensor_tensor_interface_OBJECTS) $(datawizard_interfaces_tensor_tensor_interface_LDADD) $(LIBS) +datawizard/interfaces/variable_variable_interface-test_interfaces.$(OBJEXT): \ + datawizard/interfaces/$(am__dirstamp) \ + datawizard/interfaces/$(DEPDIR)/$(am__dirstamp) +datawizard/interfaces/variable/$(am__dirstamp): + @$(MKDIR_P) datawizard/interfaces/variable + @: > datawizard/interfaces/variable/$(am__dirstamp) +datawizard/interfaces/variable/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) datawizard/interfaces/variable/$(DEPDIR) + @: > datawizard/interfaces/variable/$(DEPDIR)/$(am__dirstamp) +datawizard/interfaces/variable/variable_interface-variable_interface.$(OBJEXT): \ + datawizard/interfaces/variable/$(am__dirstamp) \ + datawizard/interfaces/variable/$(DEPDIR)/$(am__dirstamp) +datawizard/interfaces/variable/variable_cuda.$(OBJEXT): \ + datawizard/interfaces/variable/$(am__dirstamp) \ + datawizard/interfaces/variable/$(DEPDIR)/$(am__dirstamp) +datawizard/interfaces/variable/variable_interface-variable_opencl.$(OBJEXT): \ + datawizard/interfaces/variable/$(am__dirstamp) \ + datawizard/interfaces/variable/$(DEPDIR)/$(am__dirstamp) + +datawizard/interfaces/variable/variable_interface$(EXEEXT): $(datawizard_interfaces_variable_variable_interface_OBJECTS) $(datawizard_interfaces_variable_variable_interface_DEPENDENCIES) $(EXTRA_datawizard_interfaces_variable_variable_interface_DEPENDENCIES) datawizard/interfaces/variable/$(am__dirstamp) + @rm -f datawizard/interfaces/variable/variable_interface$(EXEEXT) + $(AM_V_CCLD)$(datawizard_interfaces_variable_variable_interface_LINK) $(datawizard_interfaces_variable_variable_interface_OBJECTS) $(datawizard_interfaces_variable_variable_interface_LDADD) $(LIBS) +datawizard/interfaces/vector/$(am__dirstamp): + @$(MKDIR_P) datawizard/interfaces/vector + @: > datawizard/interfaces/vector/$(am__dirstamp) +datawizard/interfaces/vector/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) datawizard/interfaces/vector/$(DEPDIR) + @: > datawizard/interfaces/vector/$(DEPDIR)/$(am__dirstamp) +datawizard/interfaces/vector/vector_interface-vector_interface.$(OBJEXT): \ + datawizard/interfaces/vector/$(am__dirstamp) \ + datawizard/interfaces/vector/$(DEPDIR)/$(am__dirstamp) +datawizard/interfaces/vector_vector_interface-test_interfaces.$(OBJEXT): \ + datawizard/interfaces/$(am__dirstamp) \ + datawizard/interfaces/$(DEPDIR)/$(am__dirstamp) +datawizard/interfaces/vector/vector_cuda.$(OBJEXT): \ + datawizard/interfaces/vector/$(am__dirstamp) \ + datawizard/interfaces/vector/$(DEPDIR)/$(am__dirstamp) +datawizard/interfaces/vector/vector_interface-vector_opencl.$(OBJEXT): \ + datawizard/interfaces/vector/$(am__dirstamp) \ + datawizard/interfaces/vector/$(DEPDIR)/$(am__dirstamp) + +datawizard/interfaces/vector/vector_interface$(EXEEXT): $(datawizard_interfaces_vector_vector_interface_OBJECTS) $(datawizard_interfaces_vector_vector_interface_DEPENDENCIES) $(EXTRA_datawizard_interfaces_vector_vector_interface_DEPENDENCIES) datawizard/interfaces/vector/$(am__dirstamp) + @rm -f datawizard/interfaces/vector/vector_interface$(EXEEXT) + $(AM_V_CCLD)$(datawizard_interfaces_vector_vector_interface_LINK) $(datawizard_interfaces_vector_vector_interface_OBJECTS) $(datawizard_interfaces_vector_vector_interface_LDADD) $(LIBS) +datawizard/interfaces/void_void_interface-test_interfaces.$(OBJEXT): \ + datawizard/interfaces/$(am__dirstamp) \ + datawizard/interfaces/$(DEPDIR)/$(am__dirstamp) +datawizard/interfaces/void/$(am__dirstamp): + @$(MKDIR_P) datawizard/interfaces/void + @: > datawizard/interfaces/void/$(am__dirstamp) +datawizard/interfaces/void/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) datawizard/interfaces/void/$(DEPDIR) + @: > datawizard/interfaces/void/$(DEPDIR)/$(am__dirstamp) +datawizard/interfaces/void/void_interface-void_interface.$(OBJEXT): \ + datawizard/interfaces/void/$(am__dirstamp) \ + datawizard/interfaces/void/$(DEPDIR)/$(am__dirstamp) + +datawizard/interfaces/void/void_interface$(EXEEXT): $(datawizard_interfaces_void_void_interface_OBJECTS) $(datawizard_interfaces_void_void_interface_DEPENDENCIES) $(EXTRA_datawizard_interfaces_void_void_interface_DEPENDENCIES) datawizard/interfaces/void/$(am__dirstamp) + @rm -f datawizard/interfaces/void/void_interface$(EXEEXT) + $(AM_V_CCLD)$(datawizard_interfaces_void_void_interface_LINK) $(datawizard_interfaces_void_void_interface_OBJECTS) $(datawizard_interfaces_void_void_interface_LDADD) $(LIBS) +datawizard/invalidate_pending_requests.$(OBJEXT): \ + datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) + +datawizard/invalidate_pending_requests$(EXEEXT): $(datawizard_invalidate_pending_requests_OBJECTS) $(datawizard_invalidate_pending_requests_DEPENDENCIES) $(EXTRA_datawizard_invalidate_pending_requests_DEPENDENCIES) datawizard/$(am__dirstamp) + @rm -f datawizard/invalidate_pending_requests$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(datawizard_invalidate_pending_requests_OBJECTS) $(datawizard_invalidate_pending_requests_LDADD) $(LIBS) +datawizard/lazy_allocation.$(OBJEXT): datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) + +datawizard/lazy_allocation$(EXEEXT): $(datawizard_lazy_allocation_OBJECTS) $(datawizard_lazy_allocation_DEPENDENCIES) $(EXTRA_datawizard_lazy_allocation_DEPENDENCIES) datawizard/$(am__dirstamp) + @rm -f datawizard/lazy_allocation$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(datawizard_lazy_allocation_OBJECTS) $(datawizard_lazy_allocation_LDADD) $(LIBS) +datawizard/locality.$(OBJEXT): datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) + +datawizard/locality$(EXEEXT): $(datawizard_locality_OBJECTS) $(datawizard_locality_DEPENDENCIES) $(EXTRA_datawizard_locality_DEPENDENCIES) datawizard/$(am__dirstamp) + @rm -f datawizard/locality$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(datawizard_locality_OBJECTS) $(datawizard_locality_LDADD) $(LIBS) +datawizard/manual_reduction.$(OBJEXT): datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) + +datawizard/manual_reduction$(EXEEXT): $(datawizard_manual_reduction_OBJECTS) $(datawizard_manual_reduction_DEPENDENCIES) $(EXTRA_datawizard_manual_reduction_DEPENDENCIES) datawizard/$(am__dirstamp) + @rm -f datawizard/manual_reduction$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(datawizard_manual_reduction_OBJECTS) $(datawizard_manual_reduction_LDADD) $(LIBS) +datawizard/mpi_like.$(OBJEXT): datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) + +datawizard/mpi_like$(EXEEXT): $(datawizard_mpi_like_OBJECTS) $(datawizard_mpi_like_DEPENDENCIES) $(EXTRA_datawizard_mpi_like_DEPENDENCIES) datawizard/$(am__dirstamp) + @rm -f datawizard/mpi_like$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(datawizard_mpi_like_OBJECTS) $(datawizard_mpi_like_LDADD) $(LIBS) +datawizard/mpi_like_async.$(OBJEXT): datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) + +datawizard/mpi_like_async$(EXEEXT): $(datawizard_mpi_like_async_OBJECTS) $(datawizard_mpi_like_async_DEPENDENCIES) $(EXTRA_datawizard_mpi_like_async_DEPENDENCIES) datawizard/$(am__dirstamp) + @rm -f datawizard/mpi_like_async$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(datawizard_mpi_like_async_OBJECTS) $(datawizard_mpi_like_async_LDADD) $(LIBS) +datawizard/no_unregister.$(OBJEXT): datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) + +datawizard/no_unregister$(EXEEXT): $(datawizard_no_unregister_OBJECTS) $(datawizard_no_unregister_DEPENDENCIES) $(EXTRA_datawizard_no_unregister_DEPENDENCIES) datawizard/$(am__dirstamp) + @rm -f datawizard/no_unregister$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(datawizard_no_unregister_OBJECTS) $(datawizard_no_unregister_LDADD) $(LIBS) +datawizard/noreclaim.$(OBJEXT): datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) + +datawizard/noreclaim$(EXEEXT): $(datawizard_noreclaim_OBJECTS) $(datawizard_noreclaim_DEPENDENCIES) $(EXTRA_datawizard_noreclaim_DEPENDENCIES) datawizard/$(am__dirstamp) + @rm -f datawizard/noreclaim$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(datawizard_noreclaim_OBJECTS) $(datawizard_noreclaim_LDADD) $(LIBS) +datawizard/nowhere.$(OBJEXT): datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) + +datawizard/nowhere$(EXEEXT): $(datawizard_nowhere_OBJECTS) $(datawizard_nowhere_DEPENDENCIES) $(EXTRA_datawizard_nowhere_DEPENDENCIES) datawizard/$(am__dirstamp) + @rm -f datawizard/nowhere$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(datawizard_nowhere_OBJECTS) $(datawizard_nowhere_LDADD) $(LIBS) +datawizard/numa_overflow.$(OBJEXT): datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) + +datawizard/numa_overflow$(EXEEXT): $(datawizard_numa_overflow_OBJECTS) $(datawizard_numa_overflow_DEPENDENCIES) $(EXTRA_datawizard_numa_overflow_DEPENDENCIES) datawizard/$(am__dirstamp) + @rm -f datawizard/numa_overflow$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(datawizard_numa_overflow_OBJECTS) $(datawizard_numa_overflow_LDADD) $(LIBS) +datawizard/partition_dep.$(OBJEXT): datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) + +datawizard/partition_dep$(EXEEXT): $(datawizard_partition_dep_OBJECTS) $(datawizard_partition_dep_DEPENDENCIES) $(EXTRA_datawizard_partition_dep_DEPENDENCIES) datawizard/$(am__dirstamp) + @rm -f datawizard/partition_dep$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(datawizard_partition_dep_OBJECTS) $(datawizard_partition_dep_LDADD) $(LIBS) +datawizard/partition_init.$(OBJEXT): datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) + +datawizard/partition_init$(EXEEXT): $(datawizard_partition_init_OBJECTS) $(datawizard_partition_init_DEPENDENCIES) $(EXTRA_datawizard_partition_init_DEPENDENCIES) datawizard/$(am__dirstamp) + @rm -f datawizard/partition_init$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(datawizard_partition_init_OBJECTS) $(datawizard_partition_init_LDADD) $(LIBS) +datawizard/partition_lazy.$(OBJEXT): datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) + +datawizard/partition_lazy$(EXEEXT): $(datawizard_partition_lazy_OBJECTS) $(datawizard_partition_lazy_DEPENDENCIES) $(EXTRA_datawizard_partition_lazy_DEPENDENCIES) datawizard/$(am__dirstamp) + @rm -f datawizard/partition_lazy$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(datawizard_partition_lazy_OBJECTS) $(datawizard_partition_lazy_LDADD) $(LIBS) +datawizard/partition_wontuse.$(OBJEXT): datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) + +datawizard/partition_wontuse$(EXEEXT): $(datawizard_partition_wontuse_OBJECTS) $(datawizard_partition_wontuse_DEPENDENCIES) $(EXTRA_datawizard_partition_wontuse_DEPENDENCIES) datawizard/$(am__dirstamp) + @rm -f datawizard/partition_wontuse$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(datawizard_partition_wontuse_OBJECTS) $(datawizard_partition_wontuse_LDADD) $(LIBS) +datawizard/partitioned_acquire.$(OBJEXT): datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) + +datawizard/partitioned_acquire$(EXEEXT): $(datawizard_partitioned_acquire_OBJECTS) $(datawizard_partitioned_acquire_DEPENDENCIES) $(EXTRA_datawizard_partitioned_acquire_DEPENDENCIES) datawizard/$(am__dirstamp) + @rm -f datawizard/partitioned_acquire$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(datawizard_partitioned_acquire_OBJECTS) $(datawizard_partitioned_acquire_LDADD) $(LIBS) +datawizard/partitioned_initialization.$(OBJEXT): \ + datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) + +datawizard/partitioned_initialization$(EXEEXT): $(datawizard_partitioned_initialization_OBJECTS) $(datawizard_partitioned_initialization_DEPENDENCIES) $(EXTRA_datawizard_partitioned_initialization_DEPENDENCIES) datawizard/$(am__dirstamp) + @rm -f datawizard/partitioned_initialization$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(datawizard_partitioned_initialization_OBJECTS) $(datawizard_partitioned_initialization_LDADD) $(LIBS) +datawizard/readers_and_writers.$(OBJEXT): datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) + +datawizard/readers_and_writers$(EXEEXT): $(datawizard_readers_and_writers_OBJECTS) $(datawizard_readers_and_writers_DEPENDENCIES) $(EXTRA_datawizard_readers_and_writers_DEPENDENCIES) datawizard/$(am__dirstamp) + @rm -f datawizard/readers_and_writers$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(datawizard_readers_and_writers_OBJECTS) $(datawizard_readers_and_writers_LDADD) $(LIBS) +datawizard/readonly.$(OBJEXT): datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) + +datawizard/readonly$(EXEEXT): $(datawizard_readonly_OBJECTS) $(datawizard_readonly_DEPENDENCIES) $(EXTRA_datawizard_readonly_DEPENDENCIES) datawizard/$(am__dirstamp) + @rm -f datawizard/readonly$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(datawizard_readonly_OBJECTS) $(datawizard_readonly_LDADD) $(LIBS) +datawizard/reclaim.$(OBJEXT): datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) + +datawizard/reclaim$(EXEEXT): $(datawizard_reclaim_OBJECTS) $(datawizard_reclaim_DEPENDENCIES) $(EXTRA_datawizard_reclaim_DEPENDENCIES) datawizard/$(am__dirstamp) + @rm -f datawizard/reclaim$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(datawizard_reclaim_OBJECTS) $(datawizard_reclaim_LDADD) $(LIBS) +datawizard/redux_acquire.$(OBJEXT): datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) + +datawizard/redux_acquire$(EXEEXT): $(datawizard_redux_acquire_OBJECTS) $(datawizard_redux_acquire_DEPENDENCIES) $(EXTRA_datawizard_redux_acquire_DEPENDENCIES) datawizard/$(am__dirstamp) + @rm -f datawizard/redux_acquire$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(datawizard_redux_acquire_OBJECTS) $(datawizard_redux_acquire_LDADD) $(LIBS) +datawizard/scratch.$(OBJEXT): datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) +datawizard/scratch_cuda.$(OBJEXT): datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) +datawizard/scratch_opencl.$(OBJEXT): datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) + +datawizard/scratch$(EXEEXT): $(datawizard_scratch_OBJECTS) $(datawizard_scratch_DEPENDENCIES) $(EXTRA_datawizard_scratch_DEPENDENCIES) datawizard/$(am__dirstamp) + @rm -f datawizard/scratch$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(datawizard_scratch_OBJECTS) $(datawizard_scratch_LDADD) $(LIBS) +datawizard/scratch_reuse.$(OBJEXT): datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) + +datawizard/scratch_reuse$(EXEEXT): $(datawizard_scratch_reuse_OBJECTS) $(datawizard_scratch_reuse_DEPENDENCIES) $(EXTRA_datawizard_scratch_reuse_DEPENDENCIES) datawizard/$(am__dirstamp) + @rm -f datawizard/scratch_reuse$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(datawizard_scratch_reuse_OBJECTS) $(datawizard_scratch_reuse_LDADD) $(LIBS) +datawizard/simgrid-locality.$(OBJEXT): datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) + +datawizard/simgrid-locality$(EXEEXT): $(datawizard_simgrid_locality_OBJECTS) $(datawizard_simgrid_locality_DEPENDENCIES) $(EXTRA_datawizard_simgrid_locality_DEPENDENCIES) datawizard/$(am__dirstamp) + @rm -f datawizard/simgrid-locality$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(datawizard_simgrid_locality_OBJECTS) $(datawizard_simgrid_locality_LDADD) $(LIBS) +datawizard/specific_node.$(OBJEXT): datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) + +datawizard/specific_node$(EXEEXT): $(datawizard_specific_node_OBJECTS) $(datawizard_specific_node_DEPENDENCIES) $(EXTRA_datawizard_specific_node_DEPENDENCIES) datawizard/$(am__dirstamp) + @rm -f datawizard/specific_node$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(datawizard_specific_node_OBJECTS) $(datawizard_specific_node_LDADD) $(LIBS) +datawizard/specific_node_same.$(OBJEXT): datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) + +datawizard/specific_node_same$(EXEEXT): $(datawizard_specific_node_same_OBJECTS) $(datawizard_specific_node_same_DEPENDENCIES) $(EXTRA_datawizard_specific_node_same_DEPENDENCIES) datawizard/$(am__dirstamp) + @rm -f datawizard/specific_node_same$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(datawizard_specific_node_same_OBJECTS) $(datawizard_specific_node_same_LDADD) $(LIBS) +datawizard/sync_and_notify_data.$(OBJEXT): datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) +datawizard/sync_and_notify_data_kernels.$(OBJEXT): \ + datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) +datawizard/sync_and_notify_data_opencl.$(OBJEXT): \ + datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) + +datawizard/sync_and_notify_data$(EXEEXT): $(datawizard_sync_and_notify_data_OBJECTS) $(datawizard_sync_and_notify_data_DEPENDENCIES) $(EXTRA_datawizard_sync_and_notify_data_DEPENDENCIES) datawizard/$(am__dirstamp) + @rm -f datawizard/sync_and_notify_data$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(datawizard_sync_and_notify_data_OBJECTS) $(datawizard_sync_and_notify_data_LDADD) $(LIBS) +datawizard/sync_and_notify_data_implicit.$(OBJEXT): \ + datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) + +datawizard/sync_and_notify_data_implicit$(EXEEXT): $(datawizard_sync_and_notify_data_implicit_OBJECTS) $(datawizard_sync_and_notify_data_implicit_DEPENDENCIES) $(EXTRA_datawizard_sync_and_notify_data_implicit_DEPENDENCIES) datawizard/$(am__dirstamp) + @rm -f datawizard/sync_and_notify_data_implicit$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(datawizard_sync_and_notify_data_implicit_OBJECTS) $(datawizard_sync_and_notify_data_implicit_LDADD) $(LIBS) +datawizard/sync_with_data_with_mem.$(OBJEXT): \ + datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) + +datawizard/sync_with_data_with_mem$(EXEEXT): $(datawizard_sync_with_data_with_mem_OBJECTS) $(datawizard_sync_with_data_with_mem_DEPENDENCIES) $(EXTRA_datawizard_sync_with_data_with_mem_DEPENDENCIES) datawizard/$(am__dirstamp) + @rm -f datawizard/sync_with_data_with_mem$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(datawizard_sync_with_data_with_mem_OBJECTS) $(datawizard_sync_with_data_with_mem_LDADD) $(LIBS) +datawizard/sync_with_data_with_mem_non_blocking.$(OBJEXT): \ + datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) + +datawizard/sync_with_data_with_mem_non_blocking$(EXEEXT): $(datawizard_sync_with_data_with_mem_non_blocking_OBJECTS) $(datawizard_sync_with_data_with_mem_non_blocking_DEPENDENCIES) $(EXTRA_datawizard_sync_with_data_with_mem_non_blocking_DEPENDENCIES) datawizard/$(am__dirstamp) + @rm -f datawizard/sync_with_data_with_mem_non_blocking$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(datawizard_sync_with_data_with_mem_non_blocking_OBJECTS) $(datawizard_sync_with_data_with_mem_non_blocking_LDADD) $(LIBS) +datawizard/sync_with_data_with_mem_non_blocking_implicit.$(OBJEXT): \ + datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) + +datawizard/sync_with_data_with_mem_non_blocking_implicit$(EXEEXT): $(datawizard_sync_with_data_with_mem_non_blocking_implicit_OBJECTS) $(datawizard_sync_with_data_with_mem_non_blocking_implicit_DEPENDENCIES) $(EXTRA_datawizard_sync_with_data_with_mem_non_blocking_implicit_DEPENDENCIES) datawizard/$(am__dirstamp) + @rm -f datawizard/sync_with_data_with_mem_non_blocking_implicit$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(datawizard_sync_with_data_with_mem_non_blocking_implicit_OBJECTS) $(datawizard_sync_with_data_with_mem_non_blocking_implicit_LDADD) $(LIBS) +datawizard/task_with_multiple_time_the_same_handle.$(OBJEXT): \ + datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) + +datawizard/task_with_multiple_time_the_same_handle$(EXEEXT): $(datawizard_task_with_multiple_time_the_same_handle_OBJECTS) $(datawizard_task_with_multiple_time_the_same_handle_DEPENDENCIES) $(EXTRA_datawizard_task_with_multiple_time_the_same_handle_DEPENDENCIES) datawizard/$(am__dirstamp) + @rm -f datawizard/task_with_multiple_time_the_same_handle$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(datawizard_task_with_multiple_time_the_same_handle_OBJECTS) $(datawizard_task_with_multiple_time_the_same_handle_LDADD) $(LIBS) +datawizard/temporary_partition.$(OBJEXT): datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) + +datawizard/temporary_partition$(EXEEXT): $(datawizard_temporary_partition_OBJECTS) $(datawizard_temporary_partition_DEPENDENCIES) $(EXTRA_datawizard_temporary_partition_DEPENDENCIES) datawizard/$(am__dirstamp) + @rm -f datawizard/temporary_partition$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(datawizard_temporary_partition_OBJECTS) $(datawizard_temporary_partition_LDADD) $(LIBS) +datawizard/temporary_partition_implicit.$(OBJEXT): \ + datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) + +datawizard/temporary_partition_implicit$(EXEEXT): $(datawizard_temporary_partition_implicit_OBJECTS) $(datawizard_temporary_partition_implicit_DEPENDENCIES) $(EXTRA_datawizard_temporary_partition_implicit_DEPENDENCIES) datawizard/$(am__dirstamp) + @rm -f datawizard/temporary_partition_implicit$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(datawizard_temporary_partition_implicit_OBJECTS) $(datawizard_temporary_partition_implicit_LDADD) $(LIBS) +datawizard/temporary_partition_read.$(OBJEXT): \ + datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) + +datawizard/temporary_partition_read$(EXEEXT): $(datawizard_temporary_partition_read_OBJECTS) $(datawizard_temporary_partition_read_DEPENDENCIES) $(EXTRA_datawizard_temporary_partition_read_DEPENDENCIES) datawizard/$(am__dirstamp) + @rm -f datawizard/temporary_partition_read$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(datawizard_temporary_partition_read_OBJECTS) $(datawizard_temporary_partition_read_LDADD) $(LIBS) +datawizard/test_arbiter.$(OBJEXT): datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) + +datawizard/test_arbiter$(EXEEXT): $(datawizard_test_arbiter_OBJECTS) $(datawizard_test_arbiter_DEPENDENCIES) $(EXTRA_datawizard_test_arbiter_DEPENDENCIES) datawizard/$(am__dirstamp) + @rm -f datawizard/test_arbiter$(EXEEXT) + $(AM_V_CXXLD)$(CXXLINK) $(datawizard_test_arbiter_OBJECTS) $(datawizard_test_arbiter_LDADD) $(LIBS) +datawizard/unpartition.$(OBJEXT): datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) + +datawizard/unpartition$(EXEEXT): $(datawizard_unpartition_OBJECTS) $(datawizard_unpartition_DEPENDENCIES) $(EXTRA_datawizard_unpartition_DEPENDENCIES) datawizard/$(am__dirstamp) + @rm -f datawizard/unpartition$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(datawizard_unpartition_OBJECTS) $(datawizard_unpartition_LDADD) $(LIBS) +datawizard/user_interaction_implicit.$(OBJEXT): \ + datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) + +datawizard/user_interaction_implicit$(EXEEXT): $(datawizard_user_interaction_implicit_OBJECTS) $(datawizard_user_interaction_implicit_DEPENDENCIES) $(EXTRA_datawizard_user_interaction_implicit_DEPENDENCIES) datawizard/$(am__dirstamp) + @rm -f datawizard/user_interaction_implicit$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(datawizard_user_interaction_implicit_OBJECTS) $(datawizard_user_interaction_implicit_LDADD) $(LIBS) +datawizard/variable_parameters.$(OBJEXT): datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) + +datawizard/variable_parameters$(EXEEXT): $(datawizard_variable_parameters_OBJECTS) $(datawizard_variable_parameters_DEPENDENCIES) $(EXTRA_datawizard_variable_parameters_DEPENDENCIES) datawizard/$(am__dirstamp) + @rm -f datawizard/variable_parameters$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(datawizard_variable_parameters_OBJECTS) $(datawizard_variable_parameters_LDADD) $(LIBS) +datawizard/variable_size.$(OBJEXT): datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) + +datawizard/variable_size$(EXEEXT): $(datawizard_variable_size_OBJECTS) $(datawizard_variable_size_DEPENDENCIES) $(EXTRA_datawizard_variable_size_DEPENDENCIES) datawizard/$(am__dirstamp) + @rm -f datawizard/variable_size$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(datawizard_variable_size_OBJECTS) $(datawizard_variable_size_LDADD) $(LIBS) +datawizard/write_only_tmp_buffer.$(OBJEXT): \ + datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) + +datawizard/write_only_tmp_buffer$(EXEEXT): $(datawizard_write_only_tmp_buffer_OBJECTS) $(datawizard_write_only_tmp_buffer_DEPENDENCIES) $(EXTRA_datawizard_write_only_tmp_buffer_DEPENDENCIES) datawizard/$(am__dirstamp) + @rm -f datawizard/write_only_tmp_buffer$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(datawizard_write_only_tmp_buffer_OBJECTS) $(datawizard_write_only_tmp_buffer_LDADD) $(LIBS) +datawizard/wt_broadcast.$(OBJEXT): datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) + +datawizard/wt_broadcast$(EXEEXT): $(datawizard_wt_broadcast_OBJECTS) $(datawizard_wt_broadcast_DEPENDENCIES) $(EXTRA_datawizard_wt_broadcast_DEPENDENCIES) datawizard/$(am__dirstamp) + @rm -f datawizard/wt_broadcast$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(datawizard_wt_broadcast_OBJECTS) $(datawizard_wt_broadcast_LDADD) $(LIBS) +datawizard/wt_host.$(OBJEXT): datawizard/$(am__dirstamp) \ + datawizard/$(DEPDIR)/$(am__dirstamp) + +datawizard/wt_host$(EXEEXT): $(datawizard_wt_host_OBJECTS) $(datawizard_wt_host_DEPENDENCIES) $(EXTRA_datawizard_wt_host_DEPENDENCIES) datawizard/$(am__dirstamp) + @rm -f datawizard/wt_host$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(datawizard_wt_host_OBJECTS) $(datawizard_wt_host_LDADD) $(LIBS) +disk/$(am__dirstamp): + @$(MKDIR_P) disk + @: > disk/$(am__dirstamp) +disk/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) disk/$(DEPDIR) + @: > disk/$(DEPDIR)/$(am__dirstamp) +disk/disk_compute.$(OBJEXT): disk/$(am__dirstamp) \ + disk/$(DEPDIR)/$(am__dirstamp) + +disk/disk_compute$(EXEEXT): $(disk_disk_compute_OBJECTS) $(disk_disk_compute_DEPENDENCIES) $(EXTRA_disk_disk_compute_DEPENDENCIES) disk/$(am__dirstamp) + @rm -f disk/disk_compute$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(disk_disk_compute_OBJECTS) $(disk_disk_compute_LDADD) $(LIBS) +disk/disk_copy.$(OBJEXT): disk/$(am__dirstamp) \ + disk/$(DEPDIR)/$(am__dirstamp) + +disk/disk_copy$(EXEEXT): $(disk_disk_copy_OBJECTS) $(disk_disk_copy_DEPENDENCIES) $(EXTRA_disk_disk_copy_DEPENDENCIES) disk/$(am__dirstamp) + @rm -f disk/disk_copy$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(disk_disk_copy_OBJECTS) $(disk_disk_copy_LDADD) $(LIBS) +disk/disk_copy_to_disk.$(OBJEXT): disk/$(am__dirstamp) \ + disk/$(DEPDIR)/$(am__dirstamp) + +disk/disk_copy_to_disk$(EXEEXT): $(disk_disk_copy_to_disk_OBJECTS) $(disk_disk_copy_to_disk_DEPENDENCIES) $(EXTRA_disk_disk_copy_to_disk_DEPENDENCIES) disk/$(am__dirstamp) + @rm -f disk/disk_copy_to_disk$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(disk_disk_copy_to_disk_OBJECTS) $(disk_disk_copy_to_disk_LDADD) $(LIBS) +disk/disk_copy_unpack.$(OBJEXT): disk/$(am__dirstamp) \ + disk/$(DEPDIR)/$(am__dirstamp) + +disk/disk_copy_unpack$(EXEEXT): $(disk_disk_copy_unpack_OBJECTS) $(disk_disk_copy_unpack_DEPENDENCIES) $(EXTRA_disk_disk_copy_unpack_DEPENDENCIES) disk/$(am__dirstamp) + @rm -f disk/disk_copy_unpack$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(disk_disk_copy_unpack_OBJECTS) $(disk_disk_copy_unpack_LDADD) $(LIBS) +disk/disk_pack.$(OBJEXT): disk/$(am__dirstamp) \ + disk/$(DEPDIR)/$(am__dirstamp) + +disk/disk_pack$(EXEEXT): $(disk_disk_pack_OBJECTS) $(disk_disk_pack_DEPENDENCIES) $(EXTRA_disk_disk_pack_DEPENDENCIES) disk/$(am__dirstamp) + @rm -f disk/disk_pack$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(disk_disk_pack_OBJECTS) $(disk_disk_pack_LDADD) $(LIBS) +disk/mem_reclaim.$(OBJEXT): disk/$(am__dirstamp) \ + disk/$(DEPDIR)/$(am__dirstamp) + +disk/mem_reclaim$(EXEEXT): $(disk_mem_reclaim_OBJECTS) $(disk_mem_reclaim_DEPENDENCIES) $(EXTRA_disk_mem_reclaim_DEPENDENCIES) disk/$(am__dirstamp) + @rm -f disk/mem_reclaim$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(disk_mem_reclaim_OBJECTS) $(disk_mem_reclaim_LDADD) $(LIBS) +energy/$(am__dirstamp): + @$(MKDIR_P) energy + @: > energy/$(am__dirstamp) +energy/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) energy/$(DEPDIR) + @: > energy/$(DEPDIR)/$(am__dirstamp) +energy/energy_efficiency.$(OBJEXT): energy/$(am__dirstamp) \ + energy/$(DEPDIR)/$(am__dirstamp) + +energy/energy_efficiency$(EXEEXT): $(energy_energy_efficiency_OBJECTS) $(energy_energy_efficiency_DEPENDENCIES) $(EXTRA_energy_energy_efficiency_DEPENDENCIES) energy/$(am__dirstamp) + @rm -f energy/energy_efficiency$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(energy_energy_efficiency_OBJECTS) $(energy_energy_efficiency_LDADD) $(LIBS) +errorcheck/$(am__dirstamp): + @$(MKDIR_P) errorcheck + @: > errorcheck/$(am__dirstamp) +errorcheck/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) errorcheck/$(DEPDIR) + @: > errorcheck/$(DEPDIR)/$(am__dirstamp) +errorcheck/invalid_blocking_calls.$(OBJEXT): \ + errorcheck/$(am__dirstamp) \ + errorcheck/$(DEPDIR)/$(am__dirstamp) + +errorcheck/invalid_blocking_calls$(EXEEXT): $(errorcheck_invalid_blocking_calls_OBJECTS) $(errorcheck_invalid_blocking_calls_DEPENDENCIES) $(EXTRA_errorcheck_invalid_blocking_calls_DEPENDENCIES) errorcheck/$(am__dirstamp) + @rm -f errorcheck/invalid_blocking_calls$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(errorcheck_invalid_blocking_calls_OBJECTS) $(errorcheck_invalid_blocking_calls_LDADD) $(LIBS) +errorcheck/invalid_tasks.$(OBJEXT): errorcheck/$(am__dirstamp) \ + errorcheck/$(DEPDIR)/$(am__dirstamp) + +errorcheck/invalid_tasks$(EXEEXT): $(errorcheck_invalid_tasks_OBJECTS) $(errorcheck_invalid_tasks_DEPENDENCIES) $(EXTRA_errorcheck_invalid_tasks_DEPENDENCIES) errorcheck/$(am__dirstamp) + @rm -f errorcheck/invalid_tasks$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(errorcheck_invalid_tasks_OBJECTS) $(errorcheck_invalid_tasks_LDADD) $(LIBS) +errorcheck/starpu_init_noworker.$(OBJEXT): errorcheck/$(am__dirstamp) \ + errorcheck/$(DEPDIR)/$(am__dirstamp) + +errorcheck/starpu_init_noworker$(EXEEXT): $(errorcheck_starpu_init_noworker_OBJECTS) $(errorcheck_starpu_init_noworker_DEPENDENCIES) $(EXTRA_errorcheck_starpu_init_noworker_DEPENDENCIES) errorcheck/$(am__dirstamp) + @rm -f errorcheck/starpu_init_noworker$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(errorcheck_starpu_init_noworker_OBJECTS) $(errorcheck_starpu_init_noworker_LDADD) $(LIBS) +errorcheck/workers_cpuid.$(OBJEXT): errorcheck/$(am__dirstamp) \ + errorcheck/$(DEPDIR)/$(am__dirstamp) + +errorcheck/workers_cpuid$(EXEEXT): $(errorcheck_workers_cpuid_OBJECTS) $(errorcheck_workers_cpuid_DEPENDENCIES) $(EXTRA_errorcheck_workers_cpuid_DEPENDENCIES) errorcheck/$(am__dirstamp) + @rm -f errorcheck/workers_cpuid$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(errorcheck_workers_cpuid_OBJECTS) $(errorcheck_workers_cpuid_LDADD) $(LIBS) +fault-tolerance/$(am__dirstamp): + @$(MKDIR_P) fault-tolerance + @: > fault-tolerance/$(am__dirstamp) +fault-tolerance/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) fault-tolerance/$(DEPDIR) + @: > fault-tolerance/$(DEPDIR)/$(am__dirstamp) +fault-tolerance/retry.$(OBJEXT): fault-tolerance/$(am__dirstamp) \ + fault-tolerance/$(DEPDIR)/$(am__dirstamp) + +fault-tolerance/retry$(EXEEXT): $(fault_tolerance_retry_OBJECTS) $(fault_tolerance_retry_DEPENDENCIES) $(EXTRA_fault_tolerance_retry_DEPENDENCIES) fault-tolerance/$(am__dirstamp) + @rm -f fault-tolerance/retry$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(fault_tolerance_retry_OBJECTS) $(fault_tolerance_retry_LDADD) $(LIBS) +fortran90/$(am__dirstamp): + @$(MKDIR_P) fortran90 + @: > fortran90/$(am__dirstamp) +fortran90/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) fortran90/$(DEPDIR) + @: > fortran90/$(DEPDIR)/$(am__dirstamp) +fortran90/starpu_mod.$(OBJEXT): fortran90/$(am__dirstamp) \ + fortran90/$(DEPDIR)/$(am__dirstamp) +fortran90/init_01.$(OBJEXT): fortran90/$(am__dirstamp) \ + fortran90/$(DEPDIR)/$(am__dirstamp) + +fortran90/init_01$(EXEEXT): $(fortran90_init_01_OBJECTS) $(fortran90_init_01_DEPENDENCIES) $(EXTRA_fortran90_init_01_DEPENDENCIES) fortran90/$(am__dirstamp) + @rm -f fortran90/init_01$(EXEEXT) + $(AM_V_FCLD)$(FCLINK) $(fortran90_init_01_OBJECTS) $(fortran90_init_01_LDADD) $(LIBS) +helper/$(am__dirstamp): + @$(MKDIR_P) helper + @: > helper/$(am__dirstamp) +helper/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) helper/$(DEPDIR) + @: > helper/$(DEPDIR)/$(am__dirstamp) +helper/cublasLt_init.$(OBJEXT): helper/$(am__dirstamp) \ + helper/$(DEPDIR)/$(am__dirstamp) + +helper/cublasLt_init$(EXEEXT): $(helper_cublasLt_init_OBJECTS) $(helper_cublasLt_init_DEPENDENCIES) $(EXTRA_helper_cublasLt_init_DEPENDENCIES) helper/$(am__dirstamp) + @rm -f helper/cublasLt_init$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(helper_cublasLt_init_OBJECTS) $(helper_cublasLt_init_LDADD) $(LIBS) +helper/cublas_init.$(OBJEXT): helper/$(am__dirstamp) \ + helper/$(DEPDIR)/$(am__dirstamp) + +helper/cublas_init$(EXEEXT): $(helper_cublas_init_OBJECTS) $(helper_cublas_init_DEPENDENCIES) $(EXTRA_helper_cublas_init_DEPENDENCIES) helper/$(am__dirstamp) + @rm -f helper/cublas_init$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(helper_cublas_init_OBJECTS) $(helper_cublas_init_LDADD) $(LIBS) +helper/cusparse_init.$(OBJEXT): helper/$(am__dirstamp) \ + helper/$(DEPDIR)/$(am__dirstamp) + +helper/cusparse_init$(EXEEXT): $(helper_cusparse_init_OBJECTS) $(helper_cusparse_init_DEPENDENCIES) $(EXTRA_helper_cusparse_init_DEPENDENCIES) helper/$(am__dirstamp) + @rm -f helper/cusparse_init$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(helper_cusparse_init_OBJECTS) $(helper_cusparse_init_LDADD) $(LIBS) +helper/execute_on_all.$(OBJEXT): helper/$(am__dirstamp) \ + helper/$(DEPDIR)/$(am__dirstamp) + +helper/execute_on_all$(EXEEXT): $(helper_execute_on_all_OBJECTS) $(helper_execute_on_all_DEPENDENCIES) $(EXTRA_helper_execute_on_all_DEPENDENCIES) helper/$(am__dirstamp) + @rm -f helper/execute_on_all$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(helper_execute_on_all_OBJECTS) $(helper_execute_on_all_LDADD) $(LIBS) +helper/hipblas_init.$(OBJEXT): helper/$(am__dirstamp) \ + helper/$(DEPDIR)/$(am__dirstamp) + +helper/hipblas_init$(EXEEXT): $(helper_hipblas_init_OBJECTS) $(helper_hipblas_init_DEPENDENCIES) $(EXTRA_helper_hipblas_init_DEPENDENCIES) helper/$(am__dirstamp) + @rm -f helper/hipblas_init$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(helper_hipblas_init_OBJECTS) $(helper_hipblas_init_LDADD) $(LIBS) +helper/pinned_memory.$(OBJEXT): helper/$(am__dirstamp) \ + helper/$(DEPDIR)/$(am__dirstamp) + +helper/pinned_memory$(EXEEXT): $(helper_pinned_memory_OBJECTS) $(helper_pinned_memory_DEPENDENCIES) $(EXTRA_helper_pinned_memory_DEPENDENCIES) helper/$(am__dirstamp) + @rm -f helper/pinned_memory$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(helper_pinned_memory_OBJECTS) $(helper_pinned_memory_LDADD) $(LIBS) +helper/starpu_create_sync_task.$(OBJEXT): helper/$(am__dirstamp) \ + helper/$(DEPDIR)/$(am__dirstamp) + +helper/starpu_create_sync_task$(EXEEXT): $(helper_starpu_create_sync_task_OBJECTS) $(helper_starpu_create_sync_task_DEPENDENCIES) $(EXTRA_helper_starpu_create_sync_task_DEPENDENCIES) helper/$(am__dirstamp) + @rm -f helper/starpu_create_sync_task$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(helper_starpu_create_sync_task_OBJECTS) $(helper_starpu_create_sync_task_LDADD) $(LIBS) +helper/starpu_data_cpy.$(OBJEXT): helper/$(am__dirstamp) \ + helper/$(DEPDIR)/$(am__dirstamp) + +helper/starpu_data_cpy$(EXEEXT): $(helper_starpu_data_cpy_OBJECTS) $(helper_starpu_data_cpy_DEPENDENCIES) $(EXTRA_helper_starpu_data_cpy_DEPENDENCIES) helper/$(am__dirstamp) + @rm -f helper/starpu_data_cpy$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(helper_starpu_data_cpy_OBJECTS) $(helper_starpu_data_cpy_LDADD) $(LIBS) +helper/starpu_data_dup_ro.$(OBJEXT): helper/$(am__dirstamp) \ + helper/$(DEPDIR)/$(am__dirstamp) + +helper/starpu_data_dup_ro$(EXEEXT): $(helper_starpu_data_dup_ro_OBJECTS) $(helper_starpu_data_dup_ro_DEPENDENCIES) $(EXTRA_helper_starpu_data_dup_ro_DEPENDENCIES) helper/$(am__dirstamp) + @rm -f helper/starpu_data_dup_ro$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(helper_starpu_data_dup_ro_OBJECTS) $(helper_starpu_data_dup_ro_LDADD) $(LIBS) + +loader$(EXEEXT): $(loader_OBJECTS) $(loader_DEPENDENCIES) $(EXTRA_loader_DEPENDENCIES) + @rm -f loader$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(loader_OBJECTS) $(loader_LDADD) $(LIBS) +main/$(am__dirstamp): + @$(MKDIR_P) main + @: > main/$(am__dirstamp) +main/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) main/$(DEPDIR) + @: > main/$(DEPDIR)/$(am__dirstamp) +main/bind.$(OBJEXT): main/$(am__dirstamp) \ + main/$(DEPDIR)/$(am__dirstamp) + +main/bind$(EXEEXT): $(main_bind_OBJECTS) $(main_bind_DEPENDENCIES) $(EXTRA_main_bind_DEPENDENCIES) main/$(am__dirstamp) + @rm -f main/bind$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(main_bind_OBJECTS) $(main_bind_LDADD) $(LIBS) +main/callback.$(OBJEXT): main/$(am__dirstamp) \ + main/$(DEPDIR)/$(am__dirstamp) + +main/callback$(EXEEXT): $(main_callback_OBJECTS) $(main_callback_DEPENDENCIES) $(EXTRA_main_callback_DEPENDENCIES) main/$(am__dirstamp) + @rm -f main/callback$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(main_callback_OBJECTS) $(main_callback_LDADD) $(LIBS) +main/codelet_null_callback.$(OBJEXT): main/$(am__dirstamp) \ + main/$(DEPDIR)/$(am__dirstamp) + +main/codelet_null_callback$(EXEEXT): $(main_codelet_null_callback_OBJECTS) $(main_codelet_null_callback_DEPENDENCIES) $(EXTRA_main_codelet_null_callback_DEPENDENCIES) main/$(am__dirstamp) + @rm -f main/codelet_null_callback$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(main_codelet_null_callback_OBJECTS) $(main_codelet_null_callback_LDADD) $(LIBS) +main/const_codelet.$(OBJEXT): main/$(am__dirstamp) \ + main/$(DEPDIR)/$(am__dirstamp) + +main/const_codelet$(EXEEXT): $(main_const_codelet_OBJECTS) $(main_const_codelet_DEPENDENCIES) $(EXTRA_main_const_codelet_DEPENDENCIES) main/$(am__dirstamp) + @rm -f main/const_codelet$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(main_const_codelet_OBJECTS) $(main_const_codelet_LDADD) $(LIBS) +main/deadlock.$(OBJEXT): main/$(am__dirstamp) \ + main/$(DEPDIR)/$(am__dirstamp) + +main/deadlock$(EXEEXT): $(main_deadlock_OBJECTS) $(main_deadlock_DEPENDENCIES) $(EXTRA_main_deadlock_DEPENDENCIES) main/$(am__dirstamp) + @rm -f main/deadlock$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(main_deadlock_OBJECTS) $(main_deadlock_LDADD) $(LIBS) +main/declare_deps_after_submission.$(OBJEXT): main/$(am__dirstamp) \ + main/$(DEPDIR)/$(am__dirstamp) + +main/declare_deps_after_submission$(EXEEXT): $(main_declare_deps_after_submission_OBJECTS) $(main_declare_deps_after_submission_DEPENDENCIES) $(EXTRA_main_declare_deps_after_submission_DEPENDENCIES) main/$(am__dirstamp) + @rm -f main/declare_deps_after_submission$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(main_declare_deps_after_submission_OBJECTS) $(main_declare_deps_after_submission_LDADD) $(LIBS) +main/declare_deps_after_submission_synchronous.$(OBJEXT): \ + main/$(am__dirstamp) main/$(DEPDIR)/$(am__dirstamp) + +main/declare_deps_after_submission_synchronous$(EXEEXT): $(main_declare_deps_after_submission_synchronous_OBJECTS) $(main_declare_deps_after_submission_synchronous_DEPENDENCIES) $(EXTRA_main_declare_deps_after_submission_synchronous_DEPENDENCIES) main/$(am__dirstamp) + @rm -f main/declare_deps_after_submission_synchronous$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(main_declare_deps_after_submission_synchronous_OBJECTS) $(main_declare_deps_after_submission_synchronous_LDADD) $(LIBS) +main/declare_deps_in_callback.$(OBJEXT): main/$(am__dirstamp) \ + main/$(DEPDIR)/$(am__dirstamp) + +main/declare_deps_in_callback$(EXEEXT): $(main_declare_deps_in_callback_OBJECTS) $(main_declare_deps_in_callback_DEPENDENCIES) $(EXTRA_main_declare_deps_in_callback_DEPENDENCIES) main/$(am__dirstamp) + @rm -f main/declare_deps_in_callback$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(main_declare_deps_in_callback_OBJECTS) $(main_declare_deps_in_callback_LDADD) $(LIBS) +main/deploop.$(OBJEXT): main/$(am__dirstamp) \ + main/$(DEPDIR)/$(am__dirstamp) + +main/deploop$(EXEEXT): $(main_deploop_OBJECTS) $(main_deploop_DEPENDENCIES) $(EXTRA_main_deploop_DEPENDENCIES) main/$(am__dirstamp) + @rm -f main/deploop$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(main_deploop_OBJECTS) $(main_deploop_LDADD) $(LIBS) +main/deprecated_func-deprecated_func.$(OBJEXT): main/$(am__dirstamp) \ + main/$(DEPDIR)/$(am__dirstamp) + +main/deprecated_func$(EXEEXT): $(main_deprecated_func_OBJECTS) $(main_deprecated_func_DEPENDENCIES) $(EXTRA_main_deprecated_func_DEPENDENCIES) main/$(am__dirstamp) + @rm -f main/deprecated_func$(EXEEXT) + $(AM_V_CCLD)$(main_deprecated_func_LINK) $(main_deprecated_func_OBJECTS) $(main_deprecated_func_LDADD) $(LIBS) +main/display_binding.$(OBJEXT): main/$(am__dirstamp) \ + main/$(DEPDIR)/$(am__dirstamp) + +main/display_binding$(EXEEXT): $(main_display_binding_OBJECTS) $(main_display_binding_DEPENDENCIES) $(EXTRA_main_display_binding_DEPENDENCIES) main/$(am__dirstamp) + @rm -f main/display_binding$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(main_display_binding_OBJECTS) $(main_display_binding_LDADD) $(LIBS) +main/driver_api/$(am__dirstamp): + @$(MKDIR_P) main/driver_api + @: > main/driver_api/$(am__dirstamp) +main/driver_api/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) main/driver_api/$(DEPDIR) + @: > main/driver_api/$(DEPDIR)/$(am__dirstamp) +main/driver_api/init_run_deinit.$(OBJEXT): \ + main/driver_api/$(am__dirstamp) \ + main/driver_api/$(DEPDIR)/$(am__dirstamp) + +main/driver_api/init_run_deinit$(EXEEXT): $(main_driver_api_init_run_deinit_OBJECTS) $(main_driver_api_init_run_deinit_DEPENDENCIES) $(EXTRA_main_driver_api_init_run_deinit_DEPENDENCIES) main/driver_api/$(am__dirstamp) + @rm -f main/driver_api/init_run_deinit$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(main_driver_api_init_run_deinit_OBJECTS) $(main_driver_api_init_run_deinit_LDADD) $(LIBS) +main/driver_api/run_driver.$(OBJEXT): main/driver_api/$(am__dirstamp) \ + main/driver_api/$(DEPDIR)/$(am__dirstamp) + +main/driver_api/run_driver$(EXEEXT): $(main_driver_api_run_driver_OBJECTS) $(main_driver_api_run_driver_DEPENDENCIES) $(EXTRA_main_driver_api_run_driver_DEPENDENCIES) main/driver_api/$(am__dirstamp) + @rm -f main/driver_api/run_driver$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(main_driver_api_run_driver_OBJECTS) $(main_driver_api_run_driver_LDADD) $(LIBS) +main/empty_task.$(OBJEXT): main/$(am__dirstamp) \ + main/$(DEPDIR)/$(am__dirstamp) + +main/empty_task$(EXEEXT): $(main_empty_task_OBJECTS) $(main_empty_task_DEPENDENCIES) $(EXTRA_main_empty_task_DEPENDENCIES) main/$(am__dirstamp) + @rm -f main/empty_task$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(main_empty_task_OBJECTS) $(main_empty_task_LDADD) $(LIBS) +main/empty_task_chain.$(OBJEXT): main/$(am__dirstamp) \ + main/$(DEPDIR)/$(am__dirstamp) + +main/empty_task_chain$(EXEEXT): $(main_empty_task_chain_OBJECTS) $(main_empty_task_chain_DEPENDENCIES) $(EXTRA_main_empty_task_chain_DEPENDENCIES) main/$(am__dirstamp) + @rm -f main/empty_task_chain$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(main_empty_task_chain_OBJECTS) $(main_empty_task_chain_LDADD) $(LIBS) +main/empty_task_sync_point.$(OBJEXT): main/$(am__dirstamp) \ + main/$(DEPDIR)/$(am__dirstamp) + +main/empty_task_sync_point$(EXEEXT): $(main_empty_task_sync_point_OBJECTS) $(main_empty_task_sync_point_DEPENDENCIES) $(EXTRA_main_empty_task_sync_point_DEPENDENCIES) main/$(am__dirstamp) + @rm -f main/empty_task_sync_point$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(main_empty_task_sync_point_OBJECTS) $(main_empty_task_sync_point_LDADD) $(LIBS) +main/empty_task_sync_point_tasks.$(OBJEXT): main/$(am__dirstamp) \ + main/$(DEPDIR)/$(am__dirstamp) + +main/empty_task_sync_point_tasks$(EXEEXT): $(main_empty_task_sync_point_tasks_OBJECTS) $(main_empty_task_sync_point_tasks_DEPENDENCIES) $(EXTRA_main_empty_task_sync_point_tasks_DEPENDENCIES) main/$(am__dirstamp) + @rm -f main/empty_task_sync_point_tasks$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(main_empty_task_sync_point_tasks_OBJECTS) $(main_empty_task_sync_point_tasks_LDADD) $(LIBS) +main/execute_on_a_specific_worker.$(OBJEXT): main/$(am__dirstamp) \ + main/$(DEPDIR)/$(am__dirstamp) + +main/execute_on_a_specific_worker$(EXEEXT): $(main_execute_on_a_specific_worker_OBJECTS) $(main_execute_on_a_specific_worker_DEPENDENCIES) $(EXTRA_main_execute_on_a_specific_worker_DEPENDENCIES) main/$(am__dirstamp) + @rm -f main/execute_on_a_specific_worker$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(main_execute_on_a_specific_worker_OBJECTS) $(main_execute_on_a_specific_worker_LDADD) $(LIBS) +main/execute_schedule.$(OBJEXT): main/$(am__dirstamp) \ + main/$(DEPDIR)/$(am__dirstamp) + +main/execute_schedule$(EXEEXT): $(main_execute_schedule_OBJECTS) $(main_execute_schedule_DEPENDENCIES) $(EXTRA_main_execute_schedule_DEPENDENCIES) main/$(am__dirstamp) + @rm -f main/execute_schedule$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(main_execute_schedule_OBJECTS) $(main_execute_schedule_LDADD) $(LIBS) +main/get_children_tasks.$(OBJEXT): main/$(am__dirstamp) \ + main/$(DEPDIR)/$(am__dirstamp) + +main/get_children_tasks$(EXEEXT): $(main_get_children_tasks_OBJECTS) $(main_get_children_tasks_DEPENDENCIES) $(EXTRA_main_get_children_tasks_DEPENDENCIES) main/$(am__dirstamp) + @rm -f main/get_children_tasks$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(main_get_children_tasks_OBJECTS) $(main_get_children_tasks_LDADD) $(LIBS) +main/get_current_task.$(OBJEXT): main/$(am__dirstamp) \ + main/$(DEPDIR)/$(am__dirstamp) + +main/get_current_task$(EXEEXT): $(main_get_current_task_OBJECTS) $(main_get_current_task_DEPENDENCIES) $(EXTRA_main_get_current_task_DEPENDENCIES) main/$(am__dirstamp) + @rm -f main/get_current_task$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(main_get_current_task_OBJECTS) $(main_get_current_task_LDADD) $(LIBS) +main/hwloc_cpuset.$(OBJEXT): main/$(am__dirstamp) \ + main/$(DEPDIR)/$(am__dirstamp) + +main/hwloc_cpuset$(EXEEXT): $(main_hwloc_cpuset_OBJECTS) $(main_hwloc_cpuset_DEPENDENCIES) $(EXTRA_main_hwloc_cpuset_DEPENDENCIES) main/$(am__dirstamp) + @rm -f main/hwloc_cpuset$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(main_hwloc_cpuset_OBJECTS) $(main_hwloc_cpuset_LDADD) $(LIBS) +main/insert_task.$(OBJEXT): main/$(am__dirstamp) \ + main/$(DEPDIR)/$(am__dirstamp) + +main/insert_task$(EXEEXT): $(main_insert_task_OBJECTS) $(main_insert_task_DEPENDENCIES) $(EXTRA_main_insert_task_DEPENDENCIES) main/$(am__dirstamp) + @rm -f main/insert_task$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(main_insert_task_OBJECTS) $(main_insert_task_LDADD) $(LIBS) +main/insert_task_array.$(OBJEXT): main/$(am__dirstamp) \ + main/$(DEPDIR)/$(am__dirstamp) + +main/insert_task_array$(EXEEXT): $(main_insert_task_array_OBJECTS) $(main_insert_task_array_DEPENDENCIES) $(EXTRA_main_insert_task_array_DEPENDENCIES) main/$(am__dirstamp) + @rm -f main/insert_task_array$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(main_insert_task_array_OBJECTS) $(main_insert_task_array_LDADD) $(LIBS) +main/insert_task_dyn_handles.$(OBJEXT): main/$(am__dirstamp) \ + main/$(DEPDIR)/$(am__dirstamp) + +main/insert_task_dyn_handles$(EXEEXT): $(main_insert_task_dyn_handles_OBJECTS) $(main_insert_task_dyn_handles_DEPENDENCIES) $(EXTRA_main_insert_task_dyn_handles_DEPENDENCIES) main/$(am__dirstamp) + @rm -f main/insert_task_dyn_handles$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(main_insert_task_dyn_handles_OBJECTS) $(main_insert_task_dyn_handles_LDADD) $(LIBS) +main/insert_task_many.$(OBJEXT): main/$(am__dirstamp) \ + main/$(DEPDIR)/$(am__dirstamp) + +main/insert_task_many$(EXEEXT): $(main_insert_task_many_OBJECTS) $(main_insert_task_many_DEPENDENCIES) $(EXTRA_main_insert_task_many_DEPENDENCIES) main/$(am__dirstamp) + @rm -f main/insert_task_many$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(main_insert_task_many_OBJECTS) $(main_insert_task_many_LDADD) $(LIBS) +main/insert_task_nullcodelet.$(OBJEXT): main/$(am__dirstamp) \ + main/$(DEPDIR)/$(am__dirstamp) + +main/insert_task_nullcodelet$(EXEEXT): $(main_insert_task_nullcodelet_OBJECTS) $(main_insert_task_nullcodelet_DEPENDENCIES) $(EXTRA_main_insert_task_nullcodelet_DEPENDENCIES) main/$(am__dirstamp) + @rm -f main/insert_task_nullcodelet$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(main_insert_task_nullcodelet_OBJECTS) $(main_insert_task_nullcodelet_LDADD) $(LIBS) +main/insert_task_pack.$(OBJEXT): main/$(am__dirstamp) \ + main/$(DEPDIR)/$(am__dirstamp) + +main/insert_task_pack$(EXEEXT): $(main_insert_task_pack_OBJECTS) $(main_insert_task_pack_DEPENDENCIES) $(EXTRA_main_insert_task_pack_DEPENDENCIES) main/$(am__dirstamp) + @rm -f main/insert_task_pack$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(main_insert_task_pack_OBJECTS) $(main_insert_task_pack_LDADD) $(LIBS) +main/insert_task_value.$(OBJEXT): main/$(am__dirstamp) \ + main/$(DEPDIR)/$(am__dirstamp) + +main/insert_task_value$(EXEEXT): $(main_insert_task_value_OBJECTS) $(main_insert_task_value_DEPENDENCIES) $(EXTRA_main_insert_task_value_DEPENDENCIES) main/$(am__dirstamp) + @rm -f main/insert_task_value$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(main_insert_task_value_OBJECTS) $(main_insert_task_value_LDADD) $(LIBS) +main/insert_task_where.$(OBJEXT): main/$(am__dirstamp) \ + main/$(DEPDIR)/$(am__dirstamp) + +main/insert_task_where$(EXEEXT): $(main_insert_task_where_OBJECTS) $(main_insert_task_where_DEPENDENCIES) $(EXTRA_main_insert_task_where_DEPENDENCIES) main/$(am__dirstamp) + @rm -f main/insert_task_where$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(main_insert_task_where_OBJECTS) $(main_insert_task_where_LDADD) $(LIBS) +main/job.$(OBJEXT): main/$(am__dirstamp) \ + main/$(DEPDIR)/$(am__dirstamp) + +main/job$(EXEEXT): $(main_job_OBJECTS) $(main_job_DEPENDENCIES) $(EXTRA_main_job_DEPENDENCIES) main/$(am__dirstamp) + @rm -f main/job$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(main_job_OBJECTS) $(main_job_LDADD) $(LIBS) +main/mkdtemp.$(OBJEXT): main/$(am__dirstamp) \ + main/$(DEPDIR)/$(am__dirstamp) + +main/mkdtemp$(EXEEXT): $(main_mkdtemp_OBJECTS) $(main_mkdtemp_DEPENDENCIES) $(EXTRA_main_mkdtemp_DEPENDENCIES) main/$(am__dirstamp) + @rm -f main/mkdtemp$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(main_mkdtemp_OBJECTS) $(main_mkdtemp_LDADD) $(LIBS) +main/multithreaded.$(OBJEXT): main/$(am__dirstamp) \ + main/$(DEPDIR)/$(am__dirstamp) + +main/multithreaded$(EXEEXT): $(main_multithreaded_OBJECTS) $(main_multithreaded_DEPENDENCIES) $(EXTRA_main_multithreaded_DEPENDENCIES) main/$(am__dirstamp) + @rm -f main/multithreaded$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(main_multithreaded_OBJECTS) $(main_multithreaded_LDADD) $(LIBS) +main/multithreaded_init.$(OBJEXT): main/$(am__dirstamp) \ + main/$(DEPDIR)/$(am__dirstamp) + +main/multithreaded_init$(EXEEXT): $(main_multithreaded_init_OBJECTS) $(main_multithreaded_init_DEPENDENCIES) $(EXTRA_main_multithreaded_init_DEPENDENCIES) main/$(am__dirstamp) + @rm -f main/multithreaded_init$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(main_multithreaded_init_OBJECTS) $(main_multithreaded_init_LDADD) $(LIBS) +main/pack.$(OBJEXT): main/$(am__dirstamp) \ + main/$(DEPDIR)/$(am__dirstamp) + +main/pack$(EXEEXT): $(main_pack_OBJECTS) $(main_pack_DEPENDENCIES) $(EXTRA_main_pack_DEPENDENCIES) main/$(am__dirstamp) + @rm -f main/pack$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(main_pack_OBJECTS) $(main_pack_LDADD) $(LIBS) +main/pause_resume.$(OBJEXT): main/$(am__dirstamp) \ + main/$(DEPDIR)/$(am__dirstamp) + +main/pause_resume$(EXEEXT): $(main_pause_resume_OBJECTS) $(main_pause_resume_DEPENDENCIES) $(EXTRA_main_pause_resume_DEPENDENCIES) main/$(am__dirstamp) + @rm -f main/pause_resume$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(main_pause_resume_OBJECTS) $(main_pause_resume_LDADD) $(LIBS) +main/regenerate.$(OBJEXT): main/$(am__dirstamp) \ + main/$(DEPDIR)/$(am__dirstamp) + +main/regenerate$(EXEEXT): $(main_regenerate_OBJECTS) $(main_regenerate_DEPENDENCIES) $(EXTRA_main_regenerate_DEPENDENCIES) main/$(am__dirstamp) + @rm -f main/regenerate$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(main_regenerate_OBJECTS) $(main_regenerate_LDADD) $(LIBS) +main/regenerate_pipeline.$(OBJEXT): main/$(am__dirstamp) \ + main/$(DEPDIR)/$(am__dirstamp) + +main/regenerate_pipeline$(EXEEXT): $(main_regenerate_pipeline_OBJECTS) $(main_regenerate_pipeline_DEPENDENCIES) $(EXTRA_main_regenerate_pipeline_DEPENDENCIES) main/$(am__dirstamp) + @rm -f main/regenerate_pipeline$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(main_regenerate_pipeline_OBJECTS) $(main_regenerate_pipeline_LDADD) $(LIBS) +main/restart.$(OBJEXT): main/$(am__dirstamp) \ + main/$(DEPDIR)/$(am__dirstamp) + +main/restart$(EXEEXT): $(main_restart_OBJECTS) $(main_restart_DEPENDENCIES) $(EXTRA_main_restart_DEPENDENCIES) main/$(am__dirstamp) + @rm -f main/restart$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(main_restart_OBJECTS) $(main_restart_LDADD) $(LIBS) +main/starpu_init.$(OBJEXT): main/$(am__dirstamp) \ + main/$(DEPDIR)/$(am__dirstamp) + +main/starpu_init$(EXEEXT): $(main_starpu_init_OBJECTS) $(main_starpu_init_DEPENDENCIES) $(EXTRA_main_starpu_init_DEPENDENCIES) main/$(am__dirstamp) + @rm -f main/starpu_init$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(main_starpu_init_OBJECTS) $(main_starpu_init_LDADD) $(LIBS) +main/starpu_task_bundle.$(OBJEXT): main/$(am__dirstamp) \ + main/$(DEPDIR)/$(am__dirstamp) + +main/starpu_task_bundle$(EXEEXT): $(main_starpu_task_bundle_OBJECTS) $(main_starpu_task_bundle_DEPENDENCIES) $(EXTRA_main_starpu_task_bundle_DEPENDENCIES) main/$(am__dirstamp) + @rm -f main/starpu_task_bundle$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(main_starpu_task_bundle_OBJECTS) $(main_starpu_task_bundle_LDADD) $(LIBS) +main/starpu_task_wait.$(OBJEXT): main/$(am__dirstamp) \ + main/$(DEPDIR)/$(am__dirstamp) + +main/starpu_task_wait$(EXEEXT): $(main_starpu_task_wait_OBJECTS) $(main_starpu_task_wait_DEPENDENCIES) $(EXTRA_main_starpu_task_wait_DEPENDENCIES) main/$(am__dirstamp) + @rm -f main/starpu_task_wait$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(main_starpu_task_wait_OBJECTS) $(main_starpu_task_wait_LDADD) $(LIBS) +main/starpu_task_wait_for_all.$(OBJEXT): main/$(am__dirstamp) \ + main/$(DEPDIR)/$(am__dirstamp) + +main/starpu_task_wait_for_all$(EXEEXT): $(main_starpu_task_wait_for_all_OBJECTS) $(main_starpu_task_wait_for_all_DEPENDENCIES) $(EXTRA_main_starpu_task_wait_for_all_DEPENDENCIES) main/$(am__dirstamp) + @rm -f main/starpu_task_wait_for_all$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(main_starpu_task_wait_for_all_OBJECTS) $(main_starpu_task_wait_for_all_LDADD) $(LIBS) +main/starpu_worker_exists-starpu_worker_exists.$(OBJEXT): \ + main/$(am__dirstamp) main/$(DEPDIR)/$(am__dirstamp) + +main/starpu_worker_exists$(EXEEXT): $(main_starpu_worker_exists_OBJECTS) $(main_starpu_worker_exists_DEPENDENCIES) $(EXTRA_main_starpu_worker_exists_DEPENDENCIES) main/$(am__dirstamp) + @rm -f main/starpu_worker_exists$(EXEEXT) + $(AM_V_CCLD)$(main_starpu_worker_exists_LINK) $(main_starpu_worker_exists_OBJECTS) $(main_starpu_worker_exists_LDADD) $(LIBS) +main/static_restartable.$(OBJEXT): main/$(am__dirstamp) \ + main/$(DEPDIR)/$(am__dirstamp) + +main/static_restartable$(EXEEXT): $(main_static_restartable_OBJECTS) $(main_static_restartable_DEPENDENCIES) $(EXTRA_main_static_restartable_DEPENDENCIES) main/$(am__dirstamp) + @rm -f main/static_restartable$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(main_static_restartable_OBJECTS) $(main_static_restartable_LDADD) $(LIBS) +main/static_restartable_tag.$(OBJEXT): main/$(am__dirstamp) \ + main/$(DEPDIR)/$(am__dirstamp) + +main/static_restartable_tag$(EXEEXT): $(main_static_restartable_tag_OBJECTS) $(main_static_restartable_tag_DEPENDENCIES) $(EXTRA_main_static_restartable_tag_DEPENDENCIES) main/$(am__dirstamp) + @rm -f main/static_restartable_tag$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(main_static_restartable_tag_OBJECTS) $(main_static_restartable_tag_LDADD) $(LIBS) +main/static_restartable_using_initializer.$(OBJEXT): \ + main/$(am__dirstamp) main/$(DEPDIR)/$(am__dirstamp) + +main/static_restartable_using_initializer$(EXEEXT): $(main_static_restartable_using_initializer_OBJECTS) $(main_static_restartable_using_initializer_DEPENDENCIES) $(EXTRA_main_static_restartable_using_initializer_DEPENDENCIES) main/$(am__dirstamp) + @rm -f main/static_restartable_using_initializer$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(main_static_restartable_using_initializer_OBJECTS) $(main_static_restartable_using_initializer_LDADD) $(LIBS) +main/subgraph_repeat.$(OBJEXT): main/$(am__dirstamp) \ + main/$(DEPDIR)/$(am__dirstamp) + +main/subgraph_repeat$(EXEEXT): $(main_subgraph_repeat_OBJECTS) $(main_subgraph_repeat_DEPENDENCIES) $(EXTRA_main_subgraph_repeat_DEPENDENCIES) main/$(am__dirstamp) + @rm -f main/subgraph_repeat$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(main_subgraph_repeat_OBJECTS) $(main_subgraph_repeat_LDADD) $(LIBS) +main/subgraph_repeat_regenerate.$(OBJEXT): main/$(am__dirstamp) \ + main/$(DEPDIR)/$(am__dirstamp) + +main/subgraph_repeat_regenerate$(EXEEXT): $(main_subgraph_repeat_regenerate_OBJECTS) $(main_subgraph_repeat_regenerate_DEPENDENCIES) $(EXTRA_main_subgraph_repeat_regenerate_DEPENDENCIES) main/$(am__dirstamp) + @rm -f main/subgraph_repeat_regenerate$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(main_subgraph_repeat_regenerate_OBJECTS) $(main_subgraph_repeat_regenerate_LDADD) $(LIBS) +main/subgraph_repeat_regenerate_tag.$(OBJEXT): main/$(am__dirstamp) \ + main/$(DEPDIR)/$(am__dirstamp) + +main/subgraph_repeat_regenerate_tag$(EXEEXT): $(main_subgraph_repeat_regenerate_tag_OBJECTS) $(main_subgraph_repeat_regenerate_tag_DEPENDENCIES) $(EXTRA_main_subgraph_repeat_regenerate_tag_DEPENDENCIES) main/$(am__dirstamp) + @rm -f main/subgraph_repeat_regenerate_tag$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(main_subgraph_repeat_regenerate_tag_OBJECTS) $(main_subgraph_repeat_regenerate_tag_LDADD) $(LIBS) +main/subgraph_repeat_regenerate_tag_cycle.$(OBJEXT): \ + main/$(am__dirstamp) main/$(DEPDIR)/$(am__dirstamp) + +main/subgraph_repeat_regenerate_tag_cycle$(EXEEXT): $(main_subgraph_repeat_regenerate_tag_cycle_OBJECTS) $(main_subgraph_repeat_regenerate_tag_cycle_DEPENDENCIES) $(EXTRA_main_subgraph_repeat_regenerate_tag_cycle_DEPENDENCIES) main/$(am__dirstamp) + @rm -f main/subgraph_repeat_regenerate_tag_cycle$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(main_subgraph_repeat_regenerate_tag_cycle_OBJECTS) $(main_subgraph_repeat_regenerate_tag_cycle_LDADD) $(LIBS) +main/subgraph_repeat_tag.$(OBJEXT): main/$(am__dirstamp) \ + main/$(DEPDIR)/$(am__dirstamp) + +main/subgraph_repeat_tag$(EXEEXT): $(main_subgraph_repeat_tag_OBJECTS) $(main_subgraph_repeat_tag_DEPENDENCIES) $(EXTRA_main_subgraph_repeat_tag_DEPENDENCIES) main/$(am__dirstamp) + @rm -f main/subgraph_repeat_tag$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(main_subgraph_repeat_tag_OBJECTS) $(main_subgraph_repeat_tag_LDADD) $(LIBS) +main/submit.$(OBJEXT): main/$(am__dirstamp) \ + main/$(DEPDIR)/$(am__dirstamp) + +main/submit$(EXEEXT): $(main_submit_OBJECTS) $(main_submit_DEPENDENCIES) $(EXTRA_main_submit_DEPENDENCIES) main/$(am__dirstamp) + @rm -f main/submit$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(main_submit_OBJECTS) $(main_submit_LDADD) $(LIBS) +main/tag_get_task.$(OBJEXT): main/$(am__dirstamp) \ + main/$(DEPDIR)/$(am__dirstamp) + +main/tag_get_task$(EXEEXT): $(main_tag_get_task_OBJECTS) $(main_tag_get_task_DEPENDENCIES) $(EXTRA_main_tag_get_task_DEPENDENCIES) main/$(am__dirstamp) + @rm -f main/tag_get_task$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(main_tag_get_task_OBJECTS) $(main_tag_get_task_LDADD) $(LIBS) +main/tag_task_data_deps.$(OBJEXT): main/$(am__dirstamp) \ + main/$(DEPDIR)/$(am__dirstamp) + +main/tag_task_data_deps$(EXEEXT): $(main_tag_task_data_deps_OBJECTS) $(main_tag_task_data_deps_DEPENDENCIES) $(EXTRA_main_tag_task_data_deps_DEPENDENCIES) main/$(am__dirstamp) + @rm -f main/tag_task_data_deps$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(main_tag_task_data_deps_OBJECTS) $(main_tag_task_data_deps_LDADD) $(LIBS) +main/tag_wait_api.$(OBJEXT): main/$(am__dirstamp) \ + main/$(DEPDIR)/$(am__dirstamp) + +main/tag_wait_api$(EXEEXT): $(main_tag_wait_api_OBJECTS) $(main_tag_wait_api_DEPENDENCIES) $(EXTRA_main_tag_wait_api_DEPENDENCIES) main/$(am__dirstamp) + @rm -f main/tag_wait_api$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(main_tag_wait_api_OBJECTS) $(main_tag_wait_api_LDADD) $(LIBS) +main/task_end_dep.$(OBJEXT): main/$(am__dirstamp) \ + main/$(DEPDIR)/$(am__dirstamp) + +main/task_end_dep$(EXEEXT): $(main_task_end_dep_OBJECTS) $(main_task_end_dep_DEPENDENCIES) $(EXTRA_main_task_end_dep_DEPENDENCIES) main/$(am__dirstamp) + @rm -f main/task_end_dep$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(main_task_end_dep_OBJECTS) $(main_task_end_dep_LDADD) $(LIBS) +main/task_wait_api.$(OBJEXT): main/$(am__dirstamp) \ + main/$(DEPDIR)/$(am__dirstamp) + +main/task_wait_api$(EXEEXT): $(main_task_wait_api_OBJECTS) $(main_task_wait_api_DEPENDENCIES) $(EXTRA_main_task_wait_api_DEPENDENCIES) main/$(am__dirstamp) + @rm -f main/task_wait_api$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(main_task_wait_api_OBJECTS) $(main_task_wait_api_LDADD) $(LIBS) +main/wait_all_regenerable_tasks.$(OBJEXT): main/$(am__dirstamp) \ + main/$(DEPDIR)/$(am__dirstamp) + +main/wait_all_regenerable_tasks$(EXEEXT): $(main_wait_all_regenerable_tasks_OBJECTS) $(main_wait_all_regenerable_tasks_DEPENDENCIES) $(EXTRA_main_wait_all_regenerable_tasks_DEPENDENCIES) main/$(am__dirstamp) + @rm -f main/wait_all_regenerable_tasks$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(main_wait_all_regenerable_tasks_OBJECTS) $(main_wait_all_regenerable_tasks_LDADD) $(LIBS) +maxfpga/$(am__dirstamp): + @$(MKDIR_P) maxfpga + @: > maxfpga/$(am__dirstamp) +maxfpga/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) maxfpga/$(DEPDIR) + @: > maxfpga/$(DEPDIR)/$(am__dirstamp) +maxfpga/max_fpga_advanced_static.$(OBJEXT): maxfpga/$(am__dirstamp) \ + maxfpga/$(DEPDIR)/$(am__dirstamp) + +maxfpga/max_fpga_advanced_static$(EXEEXT): $(maxfpga_max_fpga_advanced_static_OBJECTS) $(maxfpga_max_fpga_advanced_static_DEPENDENCIES) $(EXTRA_maxfpga_max_fpga_advanced_static_DEPENDENCIES) maxfpga/$(am__dirstamp) + @rm -f maxfpga/max_fpga_advanced_static$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(maxfpga_max_fpga_advanced_static_OBJECTS) $(maxfpga_max_fpga_advanced_static_LDADD) $(LIBS) +maxfpga/max_fpga_basic_static.$(OBJEXT): maxfpga/$(am__dirstamp) \ + maxfpga/$(DEPDIR)/$(am__dirstamp) + +maxfpga/max_fpga_basic_static$(EXEEXT): $(maxfpga_max_fpga_basic_static_OBJECTS) $(maxfpga_max_fpga_basic_static_DEPENDENCIES) $(EXTRA_maxfpga_max_fpga_basic_static_DEPENDENCIES) maxfpga/$(am__dirstamp) + @rm -f maxfpga/max_fpga_basic_static$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(maxfpga_max_fpga_basic_static_OBJECTS) $(maxfpga_max_fpga_basic_static_LDADD) $(LIBS) +maxfpga/max_fpga_dynamic.$(OBJEXT): maxfpga/$(am__dirstamp) \ + maxfpga/$(DEPDIR)/$(am__dirstamp) + +maxfpga/max_fpga_dynamic$(EXEEXT): $(maxfpga_max_fpga_dynamic_OBJECTS) $(maxfpga_max_fpga_dynamic_DEPENDENCIES) $(EXTRA_maxfpga_max_fpga_dynamic_DEPENDENCIES) maxfpga/$(am__dirstamp) + @rm -f maxfpga/max_fpga_dynamic$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(maxfpga_max_fpga_dynamic_OBJECTS) $(maxfpga_max_fpga_dynamic_LDADD) $(LIBS) +maxfpga/max_fpga_mux.$(OBJEXT): maxfpga/$(am__dirstamp) \ + maxfpga/$(DEPDIR)/$(am__dirstamp) + +maxfpga/max_fpga_mux$(EXEEXT): $(maxfpga_max_fpga_mux_OBJECTS) $(maxfpga_max_fpga_mux_DEPENDENCIES) $(EXTRA_maxfpga_max_fpga_mux_DEPENDENCIES) maxfpga/$(am__dirstamp) + @rm -f maxfpga/max_fpga_mux$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(maxfpga_max_fpga_mux_OBJECTS) $(maxfpga_max_fpga_mux_LDADD) $(LIBS) +microbenchs/$(am__dirstamp): + @$(MKDIR_P) microbenchs + @: > microbenchs/$(am__dirstamp) +microbenchs/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) microbenchs/$(DEPDIR) + @: > microbenchs/$(DEPDIR)/$(am__dirstamp) +microbenchs/async_tasks_overhead.$(OBJEXT): \ + microbenchs/$(am__dirstamp) \ + microbenchs/$(DEPDIR)/$(am__dirstamp) + +microbenchs/async_tasks_overhead$(EXEEXT): $(microbenchs_async_tasks_overhead_OBJECTS) $(microbenchs_async_tasks_overhead_DEPENDENCIES) $(EXTRA_microbenchs_async_tasks_overhead_DEPENDENCIES) microbenchs/$(am__dirstamp) + @rm -f microbenchs/async_tasks_overhead$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(microbenchs_async_tasks_overhead_OBJECTS) $(microbenchs_async_tasks_overhead_LDADD) $(LIBS) +microbenchs/bandwidth.$(OBJEXT): microbenchs/$(am__dirstamp) \ + microbenchs/$(DEPDIR)/$(am__dirstamp) + +microbenchs/bandwidth$(EXEEXT): $(microbenchs_bandwidth_OBJECTS) $(microbenchs_bandwidth_DEPENDENCIES) $(EXTRA_microbenchs_bandwidth_DEPENDENCIES) microbenchs/$(am__dirstamp) + @rm -f microbenchs/bandwidth$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(microbenchs_bandwidth_OBJECTS) $(microbenchs_bandwidth_LDADD) $(LIBS) +microbenchs/display_structures_size.$(OBJEXT): \ + microbenchs/$(am__dirstamp) \ + microbenchs/$(DEPDIR)/$(am__dirstamp) + +microbenchs/display_structures_size$(EXEEXT): $(microbenchs_display_structures_size_OBJECTS) $(microbenchs_display_structures_size_DEPENDENCIES) $(EXTRA_microbenchs_display_structures_size_DEPENDENCIES) microbenchs/$(am__dirstamp) + @rm -f microbenchs/display_structures_size$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(microbenchs_display_structures_size_OBJECTS) $(microbenchs_display_structures_size_LDADD) $(LIBS) +microbenchs/local_pingpong.$(OBJEXT): microbenchs/$(am__dirstamp) \ + microbenchs/$(DEPDIR)/$(am__dirstamp) + +microbenchs/local_pingpong$(EXEEXT): $(microbenchs_local_pingpong_OBJECTS) $(microbenchs_local_pingpong_DEPENDENCIES) $(EXTRA_microbenchs_local_pingpong_DEPENDENCIES) microbenchs/$(am__dirstamp) + @rm -f microbenchs/local_pingpong$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(microbenchs_local_pingpong_OBJECTS) $(microbenchs_local_pingpong_LDADD) $(LIBS) +microbenchs/matrix_as_vector.$(OBJEXT): microbenchs/$(am__dirstamp) \ + microbenchs/$(DEPDIR)/$(am__dirstamp) + +microbenchs/matrix_as_vector$(EXEEXT): $(microbenchs_matrix_as_vector_OBJECTS) $(microbenchs_matrix_as_vector_DEPENDENCIES) $(EXTRA_microbenchs_matrix_as_vector_DEPENDENCIES) microbenchs/$(am__dirstamp) + @rm -f microbenchs/matrix_as_vector$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(microbenchs_matrix_as_vector_OBJECTS) $(microbenchs_matrix_as_vector_LDADD) $(LIBS) +microbenchs/parallel_dependent_homogeneous_tasks_data.$(OBJEXT): \ + microbenchs/$(am__dirstamp) \ + microbenchs/$(DEPDIR)/$(am__dirstamp) + +microbenchs/parallel_dependent_homogeneous_tasks_data$(EXEEXT): $(microbenchs_parallel_dependent_homogeneous_tasks_data_OBJECTS) $(microbenchs_parallel_dependent_homogeneous_tasks_data_DEPENDENCIES) $(EXTRA_microbenchs_parallel_dependent_homogeneous_tasks_data_DEPENDENCIES) microbenchs/$(am__dirstamp) + @rm -f microbenchs/parallel_dependent_homogeneous_tasks_data$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(microbenchs_parallel_dependent_homogeneous_tasks_data_OBJECTS) $(microbenchs_parallel_dependent_homogeneous_tasks_data_LDADD) $(LIBS) +microbenchs/parallel_independent_heterogeneous_tasks.$(OBJEXT): \ + microbenchs/$(am__dirstamp) \ + microbenchs/$(DEPDIR)/$(am__dirstamp) + +microbenchs/parallel_independent_heterogeneous_tasks$(EXEEXT): $(microbenchs_parallel_independent_heterogeneous_tasks_OBJECTS) $(microbenchs_parallel_independent_heterogeneous_tasks_DEPENDENCIES) $(EXTRA_microbenchs_parallel_independent_heterogeneous_tasks_DEPENDENCIES) microbenchs/$(am__dirstamp) + @rm -f microbenchs/parallel_independent_heterogeneous_tasks$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(microbenchs_parallel_independent_heterogeneous_tasks_OBJECTS) $(microbenchs_parallel_independent_heterogeneous_tasks_LDADD) $(LIBS) +microbenchs/parallel_independent_heterogeneous_tasks_data.$(OBJEXT): \ + microbenchs/$(am__dirstamp) \ + microbenchs/$(DEPDIR)/$(am__dirstamp) + +microbenchs/parallel_independent_heterogeneous_tasks_data$(EXEEXT): $(microbenchs_parallel_independent_heterogeneous_tasks_data_OBJECTS) $(microbenchs_parallel_independent_heterogeneous_tasks_data_DEPENDENCIES) $(EXTRA_microbenchs_parallel_independent_heterogeneous_tasks_data_DEPENDENCIES) microbenchs/$(am__dirstamp) + @rm -f microbenchs/parallel_independent_heterogeneous_tasks_data$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(microbenchs_parallel_independent_heterogeneous_tasks_data_OBJECTS) $(microbenchs_parallel_independent_heterogeneous_tasks_data_LDADD) $(LIBS) +microbenchs/parallel_independent_homogeneous_tasks.$(OBJEXT): \ + microbenchs/$(am__dirstamp) \ + microbenchs/$(DEPDIR)/$(am__dirstamp) + +microbenchs/parallel_independent_homogeneous_tasks$(EXEEXT): $(microbenchs_parallel_independent_homogeneous_tasks_OBJECTS) $(microbenchs_parallel_independent_homogeneous_tasks_DEPENDENCIES) $(EXTRA_microbenchs_parallel_independent_homogeneous_tasks_DEPENDENCIES) microbenchs/$(am__dirstamp) + @rm -f microbenchs/parallel_independent_homogeneous_tasks$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(microbenchs_parallel_independent_homogeneous_tasks_OBJECTS) $(microbenchs_parallel_independent_homogeneous_tasks_LDADD) $(LIBS) +microbenchs/parallel_independent_homogeneous_tasks_data.$(OBJEXT): \ + microbenchs/$(am__dirstamp) \ + microbenchs/$(DEPDIR)/$(am__dirstamp) + +microbenchs/parallel_independent_homogeneous_tasks_data$(EXEEXT): $(microbenchs_parallel_independent_homogeneous_tasks_data_OBJECTS) $(microbenchs_parallel_independent_homogeneous_tasks_data_DEPENDENCIES) $(EXTRA_microbenchs_parallel_independent_homogeneous_tasks_data_DEPENDENCIES) microbenchs/$(am__dirstamp) + @rm -f microbenchs/parallel_independent_homogeneous_tasks_data$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(microbenchs_parallel_independent_homogeneous_tasks_data_OBJECTS) $(microbenchs_parallel_independent_homogeneous_tasks_data_LDADD) $(LIBS) +microbenchs/parallel_redux_heterogeneous_tasks_data.$(OBJEXT): \ + microbenchs/$(am__dirstamp) \ + microbenchs/$(DEPDIR)/$(am__dirstamp) + +microbenchs/parallel_redux_heterogeneous_tasks_data$(EXEEXT): $(microbenchs_parallel_redux_heterogeneous_tasks_data_OBJECTS) $(microbenchs_parallel_redux_heterogeneous_tasks_data_DEPENDENCIES) $(EXTRA_microbenchs_parallel_redux_heterogeneous_tasks_data_DEPENDENCIES) microbenchs/$(am__dirstamp) + @rm -f microbenchs/parallel_redux_heterogeneous_tasks_data$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(microbenchs_parallel_redux_heterogeneous_tasks_data_OBJECTS) $(microbenchs_parallel_redux_heterogeneous_tasks_data_LDADD) $(LIBS) +microbenchs/parallel_redux_homogeneous_tasks_data.$(OBJEXT): \ + microbenchs/$(am__dirstamp) \ + microbenchs/$(DEPDIR)/$(am__dirstamp) + +microbenchs/parallel_redux_homogeneous_tasks_data$(EXEEXT): $(microbenchs_parallel_redux_homogeneous_tasks_data_OBJECTS) $(microbenchs_parallel_redux_homogeneous_tasks_data_DEPENDENCIES) $(EXTRA_microbenchs_parallel_redux_homogeneous_tasks_data_DEPENDENCIES) microbenchs/$(am__dirstamp) + @rm -f microbenchs/parallel_redux_homogeneous_tasks_data$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(microbenchs_parallel_redux_homogeneous_tasks_data_OBJECTS) $(microbenchs_parallel_redux_homogeneous_tasks_data_LDADD) $(LIBS) +microbenchs/prefetch_data_on_node.$(OBJEXT): \ + microbenchs/$(am__dirstamp) \ + microbenchs/$(DEPDIR)/$(am__dirstamp) + +microbenchs/prefetch_data_on_node$(EXEEXT): $(microbenchs_prefetch_data_on_node_OBJECTS) $(microbenchs_prefetch_data_on_node_DEPENDENCIES) $(EXTRA_microbenchs_prefetch_data_on_node_DEPENDENCIES) microbenchs/$(am__dirstamp) + @rm -f microbenchs/prefetch_data_on_node$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(microbenchs_prefetch_data_on_node_OBJECTS) $(microbenchs_prefetch_data_on_node_LDADD) $(LIBS) +microbenchs/redundant_buffer.$(OBJEXT): microbenchs/$(am__dirstamp) \ + microbenchs/$(DEPDIR)/$(am__dirstamp) + +microbenchs/redundant_buffer$(EXEEXT): $(microbenchs_redundant_buffer_OBJECTS) $(microbenchs_redundant_buffer_DEPENDENCIES) $(EXTRA_microbenchs_redundant_buffer_DEPENDENCIES) microbenchs/$(am__dirstamp) + @rm -f microbenchs/redundant_buffer$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(microbenchs_redundant_buffer_OBJECTS) $(microbenchs_redundant_buffer_LDADD) $(LIBS) +microbenchs/sync_tasks_overhead.$(OBJEXT): \ + microbenchs/$(am__dirstamp) \ + microbenchs/$(DEPDIR)/$(am__dirstamp) + +microbenchs/sync_tasks_overhead$(EXEEXT): $(microbenchs_sync_tasks_overhead_OBJECTS) $(microbenchs_sync_tasks_overhead_DEPENDENCIES) $(EXTRA_microbenchs_sync_tasks_overhead_DEPENDENCIES) microbenchs/$(am__dirstamp) + @rm -f microbenchs/sync_tasks_overhead$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(microbenchs_sync_tasks_overhead_OBJECTS) $(microbenchs_sync_tasks_overhead_LDADD) $(LIBS) +microbenchs/tasks_overhead.$(OBJEXT): microbenchs/$(am__dirstamp) \ + microbenchs/$(DEPDIR)/$(am__dirstamp) + +microbenchs/tasks_overhead$(EXEEXT): $(microbenchs_tasks_overhead_OBJECTS) $(microbenchs_tasks_overhead_DEPENDENCIES) $(EXTRA_microbenchs_tasks_overhead_DEPENDENCIES) microbenchs/$(am__dirstamp) + @rm -f microbenchs/tasks_overhead$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(microbenchs_tasks_overhead_OBJECTS) $(microbenchs_tasks_overhead_LDADD) $(LIBS) +microbenchs/tasks_size_overhead.$(OBJEXT): \ + microbenchs/$(am__dirstamp) \ + microbenchs/$(DEPDIR)/$(am__dirstamp) + +microbenchs/tasks_size_overhead$(EXEEXT): $(microbenchs_tasks_size_overhead_OBJECTS) $(microbenchs_tasks_size_overhead_DEPENDENCIES) $(EXTRA_microbenchs_tasks_size_overhead_DEPENDENCIES) microbenchs/$(am__dirstamp) + @rm -f microbenchs/tasks_size_overhead$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(microbenchs_tasks_size_overhead_OBJECTS) $(microbenchs_tasks_size_overhead_LDADD) $(LIBS) +openmp/$(am__dirstamp): + @$(MKDIR_P) openmp + @: > openmp/$(am__dirstamp) +openmp/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) openmp/$(DEPDIR) + @: > openmp/$(DEPDIR)/$(am__dirstamp) +openmp/api_01.$(OBJEXT): openmp/$(am__dirstamp) \ + openmp/$(DEPDIR)/$(am__dirstamp) + +openmp/api_01$(EXEEXT): $(openmp_api_01_OBJECTS) $(openmp_api_01_DEPENDENCIES) $(EXTRA_openmp_api_01_DEPENDENCIES) openmp/$(am__dirstamp) + @rm -f openmp/api_01$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(openmp_api_01_OBJECTS) $(openmp_api_01_LDADD) $(LIBS) +openmp/array_slice_01.$(OBJEXT): openmp/$(am__dirstamp) \ + openmp/$(DEPDIR)/$(am__dirstamp) + +openmp/array_slice_01$(EXEEXT): $(openmp_array_slice_01_OBJECTS) $(openmp_array_slice_01_DEPENDENCIES) $(EXTRA_openmp_array_slice_01_DEPENDENCIES) openmp/$(am__dirstamp) + @rm -f openmp/array_slice_01$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(openmp_array_slice_01_OBJECTS) $(openmp_array_slice_01_LDADD) $(LIBS) +openmp/cuda_task_01.$(OBJEXT): openmp/$(am__dirstamp) \ + openmp/$(DEPDIR)/$(am__dirstamp) + +openmp/cuda_task_01$(EXEEXT): $(openmp_cuda_task_01_OBJECTS) $(openmp_cuda_task_01_DEPENDENCIES) $(EXTRA_openmp_cuda_task_01_DEPENDENCIES) openmp/$(am__dirstamp) + @rm -f openmp/cuda_task_01$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(openmp_cuda_task_01_OBJECTS) $(openmp_cuda_task_01_LDADD) $(LIBS) +openmp/environment.$(OBJEXT): openmp/$(am__dirstamp) \ + openmp/$(DEPDIR)/$(am__dirstamp) + +openmp/environment$(EXEEXT): $(openmp_environment_OBJECTS) $(openmp_environment_DEPENDENCIES) $(EXTRA_openmp_environment_DEPENDENCIES) openmp/$(am__dirstamp) + @rm -f openmp/environment$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(openmp_environment_OBJECTS) $(openmp_environment_LDADD) $(LIBS) +openmp/init_exit_01.$(OBJEXT): openmp/$(am__dirstamp) \ + openmp/$(DEPDIR)/$(am__dirstamp) + +openmp/init_exit_01$(EXEEXT): $(openmp_init_exit_01_OBJECTS) $(openmp_init_exit_01_DEPENDENCIES) $(EXTRA_openmp_init_exit_01_DEPENDENCIES) openmp/$(am__dirstamp) + @rm -f openmp/init_exit_01$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(openmp_init_exit_01_OBJECTS) $(openmp_init_exit_01_LDADD) $(LIBS) +openmp/init_exit_02.$(OBJEXT): openmp/$(am__dirstamp) \ + openmp/$(DEPDIR)/$(am__dirstamp) + +openmp/init_exit_02$(EXEEXT): $(openmp_init_exit_02_OBJECTS) $(openmp_init_exit_02_DEPENDENCIES) $(EXTRA_openmp_init_exit_02_DEPENDENCIES) openmp/$(am__dirstamp) + @rm -f openmp/init_exit_02$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(openmp_init_exit_02_OBJECTS) $(openmp_init_exit_02_LDADD) $(LIBS) +openmp/parallel_01.$(OBJEXT): openmp/$(am__dirstamp) \ + openmp/$(DEPDIR)/$(am__dirstamp) + +openmp/parallel_01$(EXEEXT): $(openmp_parallel_01_OBJECTS) $(openmp_parallel_01_DEPENDENCIES) $(EXTRA_openmp_parallel_01_DEPENDENCIES) openmp/$(am__dirstamp) + @rm -f openmp/parallel_01$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(openmp_parallel_01_OBJECTS) $(openmp_parallel_01_LDADD) $(LIBS) +openmp/parallel_02.$(OBJEXT): openmp/$(am__dirstamp) \ + openmp/$(DEPDIR)/$(am__dirstamp) + +openmp/parallel_02$(EXEEXT): $(openmp_parallel_02_OBJECTS) $(openmp_parallel_02_DEPENDENCIES) $(EXTRA_openmp_parallel_02_DEPENDENCIES) openmp/$(am__dirstamp) + @rm -f openmp/parallel_02$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(openmp_parallel_02_OBJECTS) $(openmp_parallel_02_LDADD) $(LIBS) +openmp/parallel_03.$(OBJEXT): openmp/$(am__dirstamp) \ + openmp/$(DEPDIR)/$(am__dirstamp) + +openmp/parallel_03$(EXEEXT): $(openmp_parallel_03_OBJECTS) $(openmp_parallel_03_DEPENDENCIES) $(EXTRA_openmp_parallel_03_DEPENDENCIES) openmp/$(am__dirstamp) + @rm -f openmp/parallel_03$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(openmp_parallel_03_OBJECTS) $(openmp_parallel_03_LDADD) $(LIBS) +openmp/parallel_barrier_01.$(OBJEXT): openmp/$(am__dirstamp) \ + openmp/$(DEPDIR)/$(am__dirstamp) + +openmp/parallel_barrier_01$(EXEEXT): $(openmp_parallel_barrier_01_OBJECTS) $(openmp_parallel_barrier_01_DEPENDENCIES) $(EXTRA_openmp_parallel_barrier_01_DEPENDENCIES) openmp/$(am__dirstamp) + @rm -f openmp/parallel_barrier_01$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(openmp_parallel_barrier_01_OBJECTS) $(openmp_parallel_barrier_01_LDADD) $(LIBS) +openmp/parallel_critical_01.$(OBJEXT): openmp/$(am__dirstamp) \ + openmp/$(DEPDIR)/$(am__dirstamp) + +openmp/parallel_critical_01$(EXEEXT): $(openmp_parallel_critical_01_OBJECTS) $(openmp_parallel_critical_01_DEPENDENCIES) $(EXTRA_openmp_parallel_critical_01_DEPENDENCIES) openmp/$(am__dirstamp) + @rm -f openmp/parallel_critical_01$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(openmp_parallel_critical_01_OBJECTS) $(openmp_parallel_critical_01_LDADD) $(LIBS) +openmp/parallel_critical_inline_01.$(OBJEXT): openmp/$(am__dirstamp) \ + openmp/$(DEPDIR)/$(am__dirstamp) + +openmp/parallel_critical_inline_01$(EXEEXT): $(openmp_parallel_critical_inline_01_OBJECTS) $(openmp_parallel_critical_inline_01_DEPENDENCIES) $(EXTRA_openmp_parallel_critical_inline_01_DEPENDENCIES) openmp/$(am__dirstamp) + @rm -f openmp/parallel_critical_inline_01$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(openmp_parallel_critical_inline_01_OBJECTS) $(openmp_parallel_critical_inline_01_LDADD) $(LIBS) +openmp/parallel_critical_named_01.$(OBJEXT): openmp/$(am__dirstamp) \ + openmp/$(DEPDIR)/$(am__dirstamp) + +openmp/parallel_critical_named_01$(EXEEXT): $(openmp_parallel_critical_named_01_OBJECTS) $(openmp_parallel_critical_named_01_DEPENDENCIES) $(EXTRA_openmp_parallel_critical_named_01_DEPENDENCIES) openmp/$(am__dirstamp) + @rm -f openmp/parallel_critical_named_01$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(openmp_parallel_critical_named_01_OBJECTS) $(openmp_parallel_critical_named_01_LDADD) $(LIBS) +openmp/parallel_critical_named_inline_01.$(OBJEXT): \ + openmp/$(am__dirstamp) openmp/$(DEPDIR)/$(am__dirstamp) + +openmp/parallel_critical_named_inline_01$(EXEEXT): $(openmp_parallel_critical_named_inline_01_OBJECTS) $(openmp_parallel_critical_named_inline_01_DEPENDENCIES) $(EXTRA_openmp_parallel_critical_named_inline_01_DEPENDENCIES) openmp/$(am__dirstamp) + @rm -f openmp/parallel_critical_named_inline_01$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(openmp_parallel_critical_named_inline_01_OBJECTS) $(openmp_parallel_critical_named_inline_01_LDADD) $(LIBS) +openmp/parallel_for_01.$(OBJEXT): openmp/$(am__dirstamp) \ + openmp/$(DEPDIR)/$(am__dirstamp) + +openmp/parallel_for_01$(EXEEXT): $(openmp_parallel_for_01_OBJECTS) $(openmp_parallel_for_01_DEPENDENCIES) $(EXTRA_openmp_parallel_for_01_DEPENDENCIES) openmp/$(am__dirstamp) + @rm -f openmp/parallel_for_01$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(openmp_parallel_for_01_OBJECTS) $(openmp_parallel_for_01_LDADD) $(LIBS) +openmp/parallel_for_02.$(OBJEXT): openmp/$(am__dirstamp) \ + openmp/$(DEPDIR)/$(am__dirstamp) + +openmp/parallel_for_02$(EXEEXT): $(openmp_parallel_for_02_OBJECTS) $(openmp_parallel_for_02_DEPENDENCIES) $(EXTRA_openmp_parallel_for_02_DEPENDENCIES) openmp/$(am__dirstamp) + @rm -f openmp/parallel_for_02$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(openmp_parallel_for_02_OBJECTS) $(openmp_parallel_for_02_LDADD) $(LIBS) +openmp/parallel_for_ordered_01.$(OBJEXT): openmp/$(am__dirstamp) \ + openmp/$(DEPDIR)/$(am__dirstamp) + +openmp/parallel_for_ordered_01$(EXEEXT): $(openmp_parallel_for_ordered_01_OBJECTS) $(openmp_parallel_for_ordered_01_DEPENDENCIES) $(EXTRA_openmp_parallel_for_ordered_01_DEPENDENCIES) openmp/$(am__dirstamp) + @rm -f openmp/parallel_for_ordered_01$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(openmp_parallel_for_ordered_01_OBJECTS) $(openmp_parallel_for_ordered_01_LDADD) $(LIBS) +openmp/parallel_master_01.$(OBJEXT): openmp/$(am__dirstamp) \ + openmp/$(DEPDIR)/$(am__dirstamp) + +openmp/parallel_master_01$(EXEEXT): $(openmp_parallel_master_01_OBJECTS) $(openmp_parallel_master_01_DEPENDENCIES) $(EXTRA_openmp_parallel_master_01_DEPENDENCIES) openmp/$(am__dirstamp) + @rm -f openmp/parallel_master_01$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(openmp_parallel_master_01_OBJECTS) $(openmp_parallel_master_01_LDADD) $(LIBS) +openmp/parallel_master_inline_01.$(OBJEXT): openmp/$(am__dirstamp) \ + openmp/$(DEPDIR)/$(am__dirstamp) + +openmp/parallel_master_inline_01$(EXEEXT): $(openmp_parallel_master_inline_01_OBJECTS) $(openmp_parallel_master_inline_01_DEPENDENCIES) $(EXTRA_openmp_parallel_master_inline_01_DEPENDENCIES) openmp/$(am__dirstamp) + @rm -f openmp/parallel_master_inline_01$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(openmp_parallel_master_inline_01_OBJECTS) $(openmp_parallel_master_inline_01_LDADD) $(LIBS) +openmp/parallel_nested_lock_01.$(OBJEXT): openmp/$(am__dirstamp) \ + openmp/$(DEPDIR)/$(am__dirstamp) + +openmp/parallel_nested_lock_01$(EXEEXT): $(openmp_parallel_nested_lock_01_OBJECTS) $(openmp_parallel_nested_lock_01_DEPENDENCIES) $(EXTRA_openmp_parallel_nested_lock_01_DEPENDENCIES) openmp/$(am__dirstamp) + @rm -f openmp/parallel_nested_lock_01$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(openmp_parallel_nested_lock_01_OBJECTS) $(openmp_parallel_nested_lock_01_LDADD) $(LIBS) +openmp/parallel_sections_01.$(OBJEXT): openmp/$(am__dirstamp) \ + openmp/$(DEPDIR)/$(am__dirstamp) + +openmp/parallel_sections_01$(EXEEXT): $(openmp_parallel_sections_01_OBJECTS) $(openmp_parallel_sections_01_DEPENDENCIES) $(EXTRA_openmp_parallel_sections_01_DEPENDENCIES) openmp/$(am__dirstamp) + @rm -f openmp/parallel_sections_01$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(openmp_parallel_sections_01_OBJECTS) $(openmp_parallel_sections_01_LDADD) $(LIBS) +openmp/parallel_sections_combined_01.$(OBJEXT): \ + openmp/$(am__dirstamp) openmp/$(DEPDIR)/$(am__dirstamp) + +openmp/parallel_sections_combined_01$(EXEEXT): $(openmp_parallel_sections_combined_01_OBJECTS) $(openmp_parallel_sections_combined_01_DEPENDENCIES) $(EXTRA_openmp_parallel_sections_combined_01_DEPENDENCIES) openmp/$(am__dirstamp) + @rm -f openmp/parallel_sections_combined_01$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(openmp_parallel_sections_combined_01_OBJECTS) $(openmp_parallel_sections_combined_01_LDADD) $(LIBS) +openmp/parallel_simple_lock_01.$(OBJEXT): openmp/$(am__dirstamp) \ + openmp/$(DEPDIR)/$(am__dirstamp) + +openmp/parallel_simple_lock_01$(EXEEXT): $(openmp_parallel_simple_lock_01_OBJECTS) $(openmp_parallel_simple_lock_01_DEPENDENCIES) $(EXTRA_openmp_parallel_simple_lock_01_DEPENDENCIES) openmp/$(am__dirstamp) + @rm -f openmp/parallel_simple_lock_01$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(openmp_parallel_simple_lock_01_OBJECTS) $(openmp_parallel_simple_lock_01_LDADD) $(LIBS) +openmp/parallel_single_copyprivate_01.$(OBJEXT): \ + openmp/$(am__dirstamp) openmp/$(DEPDIR)/$(am__dirstamp) + +openmp/parallel_single_copyprivate_01$(EXEEXT): $(openmp_parallel_single_copyprivate_01_OBJECTS) $(openmp_parallel_single_copyprivate_01_DEPENDENCIES) $(EXTRA_openmp_parallel_single_copyprivate_01_DEPENDENCIES) openmp/$(am__dirstamp) + @rm -f openmp/parallel_single_copyprivate_01$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(openmp_parallel_single_copyprivate_01_OBJECTS) $(openmp_parallel_single_copyprivate_01_LDADD) $(LIBS) +openmp/parallel_single_copyprivate_inline_01.$(OBJEXT): \ + openmp/$(am__dirstamp) openmp/$(DEPDIR)/$(am__dirstamp) + +openmp/parallel_single_copyprivate_inline_01$(EXEEXT): $(openmp_parallel_single_copyprivate_inline_01_OBJECTS) $(openmp_parallel_single_copyprivate_inline_01_DEPENDENCIES) $(EXTRA_openmp_parallel_single_copyprivate_inline_01_DEPENDENCIES) openmp/$(am__dirstamp) + @rm -f openmp/parallel_single_copyprivate_inline_01$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(openmp_parallel_single_copyprivate_inline_01_OBJECTS) $(openmp_parallel_single_copyprivate_inline_01_LDADD) $(LIBS) +openmp/parallel_single_inline_01.$(OBJEXT): openmp/$(am__dirstamp) \ + openmp/$(DEPDIR)/$(am__dirstamp) + +openmp/parallel_single_inline_01$(EXEEXT): $(openmp_parallel_single_inline_01_OBJECTS) $(openmp_parallel_single_inline_01_DEPENDENCIES) $(EXTRA_openmp_parallel_single_inline_01_DEPENDENCIES) openmp/$(am__dirstamp) + @rm -f openmp/parallel_single_inline_01$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(openmp_parallel_single_inline_01_OBJECTS) $(openmp_parallel_single_inline_01_LDADD) $(LIBS) +openmp/parallel_single_nowait_01.$(OBJEXT): openmp/$(am__dirstamp) \ + openmp/$(DEPDIR)/$(am__dirstamp) + +openmp/parallel_single_nowait_01$(EXEEXT): $(openmp_parallel_single_nowait_01_OBJECTS) $(openmp_parallel_single_nowait_01_DEPENDENCIES) $(EXTRA_openmp_parallel_single_nowait_01_DEPENDENCIES) openmp/$(am__dirstamp) + @rm -f openmp/parallel_single_nowait_01$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(openmp_parallel_single_nowait_01_OBJECTS) $(openmp_parallel_single_nowait_01_LDADD) $(LIBS) +openmp/parallel_single_wait_01.$(OBJEXT): openmp/$(am__dirstamp) \ + openmp/$(DEPDIR)/$(am__dirstamp) + +openmp/parallel_single_wait_01$(EXEEXT): $(openmp_parallel_single_wait_01_OBJECTS) $(openmp_parallel_single_wait_01_DEPENDENCIES) $(EXTRA_openmp_parallel_single_wait_01_DEPENDENCIES) openmp/$(am__dirstamp) + @rm -f openmp/parallel_single_wait_01$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(openmp_parallel_single_wait_01_OBJECTS) $(openmp_parallel_single_wait_01_LDADD) $(LIBS) +openmp/task_01.$(OBJEXT): openmp/$(am__dirstamp) \ + openmp/$(DEPDIR)/$(am__dirstamp) + +openmp/task_01$(EXEEXT): $(openmp_task_01_OBJECTS) $(openmp_task_01_DEPENDENCIES) $(EXTRA_openmp_task_01_DEPENDENCIES) openmp/$(am__dirstamp) + @rm -f openmp/task_01$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(openmp_task_01_OBJECTS) $(openmp_task_01_LDADD) $(LIBS) +openmp/task_02.$(OBJEXT): openmp/$(am__dirstamp) \ + openmp/$(DEPDIR)/$(am__dirstamp) + +openmp/task_02$(EXEEXT): $(openmp_task_02_OBJECTS) $(openmp_task_02_DEPENDENCIES) $(EXTRA_openmp_task_02_DEPENDENCIES) openmp/$(am__dirstamp) + @rm -f openmp/task_02$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(openmp_task_02_OBJECTS) $(openmp_task_02_LDADD) $(LIBS) +openmp/task_03.$(OBJEXT): openmp/$(am__dirstamp) \ + openmp/$(DEPDIR)/$(am__dirstamp) + +openmp/task_03$(EXEEXT): $(openmp_task_03_OBJECTS) $(openmp_task_03_DEPENDENCIES) $(EXTRA_openmp_task_03_DEPENDENCIES) openmp/$(am__dirstamp) + @rm -f openmp/task_03$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(openmp_task_03_OBJECTS) $(openmp_task_03_LDADD) $(LIBS) +openmp/taskgroup_01.$(OBJEXT): openmp/$(am__dirstamp) \ + openmp/$(DEPDIR)/$(am__dirstamp) + +openmp/taskgroup_01$(EXEEXT): $(openmp_taskgroup_01_OBJECTS) $(openmp_taskgroup_01_DEPENDENCIES) $(EXTRA_openmp_taskgroup_01_DEPENDENCIES) openmp/$(am__dirstamp) + @rm -f openmp/taskgroup_01$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(openmp_taskgroup_01_OBJECTS) $(openmp_taskgroup_01_LDADD) $(LIBS) +openmp/taskgroup_02.$(OBJEXT): openmp/$(am__dirstamp) \ + openmp/$(DEPDIR)/$(am__dirstamp) + +openmp/taskgroup_02$(EXEEXT): $(openmp_taskgroup_02_OBJECTS) $(openmp_taskgroup_02_DEPENDENCIES) $(EXTRA_openmp_taskgroup_02_DEPENDENCIES) openmp/$(am__dirstamp) + @rm -f openmp/taskgroup_02$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(openmp_taskgroup_02_OBJECTS) $(openmp_taskgroup_02_LDADD) $(LIBS) +openmp/taskloop.$(OBJEXT): openmp/$(am__dirstamp) \ + openmp/$(DEPDIR)/$(am__dirstamp) + +openmp/taskloop$(EXEEXT): $(openmp_taskloop_OBJECTS) $(openmp_taskloop_DEPENDENCIES) $(EXTRA_openmp_taskloop_DEPENDENCIES) openmp/$(am__dirstamp) + @rm -f openmp/taskloop$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(openmp_taskloop_OBJECTS) $(openmp_taskloop_LDADD) $(LIBS) +openmp/taskwait_01.$(OBJEXT): openmp/$(am__dirstamp) \ + openmp/$(DEPDIR)/$(am__dirstamp) + +openmp/taskwait_01$(EXEEXT): $(openmp_taskwait_01_OBJECTS) $(openmp_taskwait_01_DEPENDENCIES) $(EXTRA_openmp_taskwait_01_DEPENDENCIES) openmp/$(am__dirstamp) + @rm -f openmp/taskwait_01$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(openmp_taskwait_01_OBJECTS) $(openmp_taskwait_01_LDADD) $(LIBS) +overlap/$(am__dirstamp): + @$(MKDIR_P) overlap + @: > overlap/$(am__dirstamp) +overlap/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) overlap/$(DEPDIR) + @: > overlap/$(DEPDIR)/$(am__dirstamp) +overlap/gpu_concurrency.$(OBJEXT): overlap/$(am__dirstamp) \ + overlap/$(DEPDIR)/$(am__dirstamp) +overlap/long_kernel.$(OBJEXT): overlap/$(am__dirstamp) \ + overlap/$(DEPDIR)/$(am__dirstamp) + +overlap/gpu_concurrency$(EXEEXT): $(overlap_gpu_concurrency_OBJECTS) $(overlap_gpu_concurrency_DEPENDENCIES) $(EXTRA_overlap_gpu_concurrency_DEPENDENCIES) overlap/$(am__dirstamp) + @rm -f overlap/gpu_concurrency$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(overlap_gpu_concurrency_OBJECTS) $(overlap_gpu_concurrency_LDADD) $(LIBS) +overlap/overlap.$(OBJEXT): overlap/$(am__dirstamp) \ + overlap/$(DEPDIR)/$(am__dirstamp) + +overlap/overlap$(EXEEXT): $(overlap_overlap_OBJECTS) $(overlap_overlap_DEPENDENCIES) $(EXTRA_overlap_overlap_DEPENDENCIES) overlap/$(am__dirstamp) + @rm -f overlap/overlap$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(overlap_overlap_OBJECTS) $(overlap_overlap_LDADD) $(LIBS) +parallel_tasks/$(am__dirstamp): + @$(MKDIR_P) parallel_tasks + @: > parallel_tasks/$(am__dirstamp) +parallel_tasks/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) parallel_tasks/$(DEPDIR) + @: > parallel_tasks/$(DEPDIR)/$(am__dirstamp) +parallel_tasks/combined_worker_assign_workerid.$(OBJEXT): \ + parallel_tasks/$(am__dirstamp) \ + parallel_tasks/$(DEPDIR)/$(am__dirstamp) + +parallel_tasks/combined_worker_assign_workerid$(EXEEXT): $(parallel_tasks_combined_worker_assign_workerid_OBJECTS) $(parallel_tasks_combined_worker_assign_workerid_DEPENDENCIES) $(EXTRA_parallel_tasks_combined_worker_assign_workerid_DEPENDENCIES) parallel_tasks/$(am__dirstamp) + @rm -f parallel_tasks/combined_worker_assign_workerid$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(parallel_tasks_combined_worker_assign_workerid_OBJECTS) $(parallel_tasks_combined_worker_assign_workerid_LDADD) $(LIBS) +parallel_tasks/cuda_only.$(OBJEXT): parallel_tasks/$(am__dirstamp) \ + parallel_tasks/$(DEPDIR)/$(am__dirstamp) + +parallel_tasks/cuda_only$(EXEEXT): $(parallel_tasks_cuda_only_OBJECTS) $(parallel_tasks_cuda_only_DEPENDENCIES) $(EXTRA_parallel_tasks_cuda_only_DEPENDENCIES) parallel_tasks/$(am__dirstamp) + @rm -f parallel_tasks/cuda_only$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(parallel_tasks_cuda_only_OBJECTS) $(parallel_tasks_cuda_only_LDADD) $(LIBS) +parallel_tasks/explicit_combined_worker.$(OBJEXT): \ + parallel_tasks/$(am__dirstamp) \ + parallel_tasks/$(DEPDIR)/$(am__dirstamp) + +parallel_tasks/explicit_combined_worker$(EXEEXT): $(parallel_tasks_explicit_combined_worker_OBJECTS) $(parallel_tasks_explicit_combined_worker_DEPENDENCIES) $(EXTRA_parallel_tasks_explicit_combined_worker_DEPENDENCIES) parallel_tasks/$(am__dirstamp) + @rm -f parallel_tasks/explicit_combined_worker$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(parallel_tasks_explicit_combined_worker_OBJECTS) $(parallel_tasks_explicit_combined_worker_LDADD) $(LIBS) +parallel_tasks/parallel_kernels.$(OBJEXT): \ + parallel_tasks/$(am__dirstamp) \ + parallel_tasks/$(DEPDIR)/$(am__dirstamp) + +parallel_tasks/parallel_kernels$(EXEEXT): $(parallel_tasks_parallel_kernels_OBJECTS) $(parallel_tasks_parallel_kernels_DEPENDENCIES) $(EXTRA_parallel_tasks_parallel_kernels_DEPENDENCIES) parallel_tasks/$(am__dirstamp) + @rm -f parallel_tasks/parallel_kernels$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(parallel_tasks_parallel_kernels_OBJECTS) $(parallel_tasks_parallel_kernels_LDADD) $(LIBS) +parallel_tasks/parallel_kernels_spmd.$(OBJEXT): \ + parallel_tasks/$(am__dirstamp) \ + parallel_tasks/$(DEPDIR)/$(am__dirstamp) + +parallel_tasks/parallel_kernels_spmd$(EXEEXT): $(parallel_tasks_parallel_kernels_spmd_OBJECTS) $(parallel_tasks_parallel_kernels_spmd_DEPENDENCIES) $(EXTRA_parallel_tasks_parallel_kernels_spmd_DEPENDENCIES) parallel_tasks/$(am__dirstamp) + @rm -f parallel_tasks/parallel_kernels_spmd$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(parallel_tasks_parallel_kernels_spmd_OBJECTS) $(parallel_tasks_parallel_kernels_spmd_LDADD) $(LIBS) +parallel_tasks/parallel_kernels_trivial.$(OBJEXT): \ + parallel_tasks/$(am__dirstamp) \ + parallel_tasks/$(DEPDIR)/$(am__dirstamp) + +parallel_tasks/parallel_kernels_trivial$(EXEEXT): $(parallel_tasks_parallel_kernels_trivial_OBJECTS) $(parallel_tasks_parallel_kernels_trivial_DEPENDENCIES) $(EXTRA_parallel_tasks_parallel_kernels_trivial_DEPENDENCIES) parallel_tasks/$(am__dirstamp) + @rm -f parallel_tasks/parallel_kernels_trivial$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(parallel_tasks_parallel_kernels_trivial_OBJECTS) $(parallel_tasks_parallel_kernels_trivial_LDADD) $(LIBS) +parallel_tasks/spmd_peager.$(OBJEXT): parallel_tasks/$(am__dirstamp) \ + parallel_tasks/$(DEPDIR)/$(am__dirstamp) + +parallel_tasks/spmd_peager$(EXEEXT): $(parallel_tasks_spmd_peager_OBJECTS) $(parallel_tasks_spmd_peager_DEPENDENCIES) $(EXTRA_parallel_tasks_spmd_peager_DEPENDENCIES) parallel_tasks/$(am__dirstamp) + @rm -f parallel_tasks/spmd_peager$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(parallel_tasks_spmd_peager_OBJECTS) $(parallel_tasks_spmd_peager_LDADD) $(LIBS) +parallel_tasks/swap.$(OBJEXT): parallel_tasks/$(am__dirstamp) \ + parallel_tasks/$(DEPDIR)/$(am__dirstamp) + +parallel_tasks/swap$(EXEEXT): $(parallel_tasks_swap_OBJECTS) $(parallel_tasks_swap_DEPENDENCIES) $(EXTRA_parallel_tasks_swap_DEPENDENCIES) parallel_tasks/$(am__dirstamp) + @rm -f parallel_tasks/swap$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(parallel_tasks_swap_OBJECTS) $(parallel_tasks_swap_LDADD) $(LIBS) +perfmodels/$(am__dirstamp): + @$(MKDIR_P) perfmodels + @: > perfmodels/$(am__dirstamp) +perfmodels/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) perfmodels/$(DEPDIR) + @: > perfmodels/$(DEPDIR)/$(am__dirstamp) +perfmodels/feed.$(OBJEXT): perfmodels/$(am__dirstamp) \ + perfmodels/$(DEPDIR)/$(am__dirstamp) + +perfmodels/feed$(EXEEXT): $(perfmodels_feed_OBJECTS) $(perfmodels_feed_DEPENDENCIES) $(EXTRA_perfmodels_feed_DEPENDENCIES) perfmodels/$(am__dirstamp) + @rm -f perfmodels/feed$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(perfmodels_feed_OBJECTS) $(perfmodels_feed_LDADD) $(LIBS) +perfmodels/memory.$(OBJEXT): perfmodels/$(am__dirstamp) \ + perfmodels/$(DEPDIR)/$(am__dirstamp) + +perfmodels/memory$(EXEEXT): $(perfmodels_memory_OBJECTS) $(perfmodels_memory_DEPENDENCIES) $(EXTRA_perfmodels_memory_DEPENDENCIES) perfmodels/$(am__dirstamp) + @rm -f perfmodels/memory$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(perfmodels_memory_OBJECTS) $(perfmodels_memory_LDADD) $(LIBS) +perfmodels/non_linear_regression_based.$(OBJEXT): \ + perfmodels/$(am__dirstamp) \ + perfmodels/$(DEPDIR)/$(am__dirstamp) +perfmodels/opencl_memset.$(OBJEXT): perfmodels/$(am__dirstamp) \ + perfmodels/$(DEPDIR)/$(am__dirstamp) + +perfmodels/non_linear_regression_based$(EXEEXT): $(perfmodels_non_linear_regression_based_OBJECTS) $(perfmodels_non_linear_regression_based_DEPENDENCIES) $(EXTRA_perfmodels_non_linear_regression_based_DEPENDENCIES) perfmodels/$(am__dirstamp) + @rm -f perfmodels/non_linear_regression_based$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(perfmodels_non_linear_regression_based_OBJECTS) $(perfmodels_non_linear_regression_based_LDADD) $(LIBS) +perfmodels/path.$(OBJEXT): perfmodels/$(am__dirstamp) \ + perfmodels/$(DEPDIR)/$(am__dirstamp) + +perfmodels/path$(EXEEXT): $(perfmodels_path_OBJECTS) $(perfmodels_path_DEPENDENCIES) $(EXTRA_perfmodels_path_DEPENDENCIES) perfmodels/$(am__dirstamp) + @rm -f perfmodels/path$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(perfmodels_path_OBJECTS) $(perfmodels_path_LDADD) $(LIBS) +perfmodels/regression_based_check.$(OBJEXT): \ + perfmodels/$(am__dirstamp) \ + perfmodels/$(DEPDIR)/$(am__dirstamp) + +perfmodels/regression_based_check$(EXEEXT): $(perfmodels_regression_based_check_OBJECTS) $(perfmodels_regression_based_check_DEPENDENCIES) $(EXTRA_perfmodels_regression_based_check_DEPENDENCIES) perfmodels/$(am__dirstamp) + @rm -f perfmodels/regression_based_check$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(perfmodels_regression_based_check_OBJECTS) $(perfmodels_regression_based_check_LDADD) $(LIBS) +perfmodels/regression_based_energy.$(OBJEXT): \ + perfmodels/$(am__dirstamp) \ + perfmodels/$(DEPDIR)/$(am__dirstamp) + +perfmodels/regression_based_energy$(EXEEXT): $(perfmodels_regression_based_energy_OBJECTS) $(perfmodels_regression_based_energy_DEPENDENCIES) $(EXTRA_perfmodels_regression_based_energy_DEPENDENCIES) perfmodels/$(am__dirstamp) + @rm -f perfmodels/regression_based_energy$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(perfmodels_regression_based_energy_OBJECTS) $(perfmodels_regression_based_energy_LDADD) $(LIBS) +perfmodels/regression_based_gpu.$(OBJEXT): perfmodels/$(am__dirstamp) \ + perfmodels/$(DEPDIR)/$(am__dirstamp) + +perfmodels/regression_based_gpu$(EXEEXT): $(perfmodels_regression_based_gpu_OBJECTS) $(perfmodels_regression_based_gpu_DEPENDENCIES) $(EXTRA_perfmodels_regression_based_gpu_DEPENDENCIES) perfmodels/$(am__dirstamp) + @rm -f perfmodels/regression_based_gpu$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(perfmodels_regression_based_gpu_OBJECTS) $(perfmodels_regression_based_gpu_LDADD) $(LIBS) +perfmodels/regression_based_memset.$(OBJEXT): \ + perfmodels/$(am__dirstamp) \ + perfmodels/$(DEPDIR)/$(am__dirstamp) + +perfmodels/regression_based_memset$(EXEEXT): $(perfmodels_regression_based_memset_OBJECTS) $(perfmodels_regression_based_memset_DEPENDENCIES) $(EXTRA_perfmodels_regression_based_memset_DEPENDENCIES) perfmodels/$(am__dirstamp) + @rm -f perfmodels/regression_based_memset$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(perfmodels_regression_based_memset_OBJECTS) $(perfmodels_regression_based_memset_LDADD) $(LIBS) +perfmodels/regression_based_multiimpl.$(OBJEXT): \ + perfmodels/$(am__dirstamp) \ + perfmodels/$(DEPDIR)/$(am__dirstamp) + +perfmodels/regression_based_multiimpl$(EXEEXT): $(perfmodels_regression_based_multiimpl_OBJECTS) $(perfmodels_regression_based_multiimpl_DEPENDENCIES) $(EXTRA_perfmodels_regression_based_multiimpl_DEPENDENCIES) perfmodels/$(am__dirstamp) + @rm -f perfmodels/regression_based_multiimpl$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(perfmodels_regression_based_multiimpl_OBJECTS) $(perfmodels_regression_based_multiimpl_LDADD) $(LIBS) +perfmodels/user_base.$(OBJEXT): perfmodels/$(am__dirstamp) \ + perfmodels/$(DEPDIR)/$(am__dirstamp) + +perfmodels/user_base$(EXEEXT): $(perfmodels_user_base_OBJECTS) $(perfmodels_user_base_DEPENDENCIES) $(EXTRA_perfmodels_user_base_DEPENDENCIES) perfmodels/$(am__dirstamp) + @rm -f perfmodels/user_base$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(perfmodels_user_base_OBJECTS) $(perfmodels_user_base_LDADD) $(LIBS) +perfmodels/valid_model.$(OBJEXT): perfmodels/$(am__dirstamp) \ + perfmodels/$(DEPDIR)/$(am__dirstamp) + +perfmodels/valid_model$(EXEEXT): $(perfmodels_valid_model_OBJECTS) $(perfmodels_valid_model_DEPENDENCIES) $(EXTRA_perfmodels_valid_model_DEPENDENCIES) perfmodels/$(am__dirstamp) + @rm -f perfmodels/valid_model$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(perfmodels_valid_model_OBJECTS) $(perfmodels_valid_model_LDADD) $(LIBS) +perfmodels/value_nan.$(OBJEXT): perfmodels/$(am__dirstamp) \ + perfmodels/$(DEPDIR)/$(am__dirstamp) + +perfmodels/value_nan$(EXEEXT): $(perfmodels_value_nan_OBJECTS) $(perfmodels_value_nan_DEPENDENCIES) $(EXTRA_perfmodels_value_nan_DEPENDENCIES) perfmodels/$(am__dirstamp) + @rm -f perfmodels/value_nan$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(perfmodels_value_nan_OBJECTS) $(perfmodels_value_nan_LDADD) $(LIBS) +sched_ctx/$(am__dirstamp): + @$(MKDIR_P) sched_ctx + @: > sched_ctx/$(am__dirstamp) +sched_ctx/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) sched_ctx/$(DEPDIR) + @: > sched_ctx/$(DEPDIR)/$(am__dirstamp) +sched_ctx/sched_ctx_hierarchy.$(OBJEXT): sched_ctx/$(am__dirstamp) \ + sched_ctx/$(DEPDIR)/$(am__dirstamp) + +sched_ctx/sched_ctx_hierarchy$(EXEEXT): $(sched_ctx_sched_ctx_hierarchy_OBJECTS) $(sched_ctx_sched_ctx_hierarchy_DEPENDENCIES) $(EXTRA_sched_ctx_sched_ctx_hierarchy_DEPENDENCIES) sched_ctx/$(am__dirstamp) + @rm -f sched_ctx/sched_ctx_hierarchy$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(sched_ctx_sched_ctx_hierarchy_OBJECTS) $(sched_ctx_sched_ctx_hierarchy_LDADD) $(LIBS) +sched_ctx/sched_ctx_list.$(OBJEXT): sched_ctx/$(am__dirstamp) \ + sched_ctx/$(DEPDIR)/$(am__dirstamp) + +sched_ctx/sched_ctx_list$(EXEEXT): $(sched_ctx_sched_ctx_list_OBJECTS) $(sched_ctx_sched_ctx_list_DEPENDENCIES) $(EXTRA_sched_ctx_sched_ctx_list_DEPENDENCIES) sched_ctx/$(am__dirstamp) + @rm -f sched_ctx/sched_ctx_list$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(sched_ctx_sched_ctx_list_OBJECTS) $(sched_ctx_sched_ctx_list_LDADD) $(LIBS) +sched_ctx/sched_ctx_policy_data.$(OBJEXT): sched_ctx/$(am__dirstamp) \ + sched_ctx/$(DEPDIR)/$(am__dirstamp) + +sched_ctx/sched_ctx_policy_data$(EXEEXT): $(sched_ctx_sched_ctx_policy_data_OBJECTS) $(sched_ctx_sched_ctx_policy_data_DEPENDENCIES) $(EXTRA_sched_ctx_sched_ctx_policy_data_DEPENDENCIES) sched_ctx/$(am__dirstamp) + @rm -f sched_ctx/sched_ctx_policy_data$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(sched_ctx_sched_ctx_policy_data_OBJECTS) $(sched_ctx_sched_ctx_policy_data_LDADD) $(LIBS) +sched_policies/$(am__dirstamp): + @$(MKDIR_P) sched_policies + @: > sched_policies/$(am__dirstamp) +sched_policies/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) sched_policies/$(DEPDIR) + @: > sched_policies/$(DEPDIR)/$(am__dirstamp) +sched_policies/data_locality.$(OBJEXT): \ + sched_policies/$(am__dirstamp) \ + sched_policies/$(DEPDIR)/$(am__dirstamp) + +sched_policies/data_locality$(EXEEXT): $(sched_policies_data_locality_OBJECTS) $(sched_policies_data_locality_DEPENDENCIES) $(EXTRA_sched_policies_data_locality_DEPENDENCIES) sched_policies/$(am__dirstamp) + @rm -f sched_policies/data_locality$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(sched_policies_data_locality_OBJECTS) $(sched_policies_data_locality_LDADD) $(LIBS) +sched_policies/execute_all_tasks-execute_all_tasks.$(OBJEXT): \ + sched_policies/$(am__dirstamp) \ + sched_policies/$(DEPDIR)/$(am__dirstamp) + +sched_policies/execute_all_tasks$(EXEEXT): $(sched_policies_execute_all_tasks_OBJECTS) $(sched_policies_execute_all_tasks_DEPENDENCIES) $(EXTRA_sched_policies_execute_all_tasks_DEPENDENCIES) sched_policies/$(am__dirstamp) + @rm -f sched_policies/execute_all_tasks$(EXEEXT) + $(AM_V_CCLD)$(sched_policies_execute_all_tasks_LINK) $(sched_policies_execute_all_tasks_OBJECTS) $(sched_policies_execute_all_tasks_LDADD) $(LIBS) +sched_policies/prio.$(OBJEXT): sched_policies/$(am__dirstamp) \ + sched_policies/$(DEPDIR)/$(am__dirstamp) + +sched_policies/prio$(EXEEXT): $(sched_policies_prio_OBJECTS) $(sched_policies_prio_DEPENDENCIES) $(EXTRA_sched_policies_prio_DEPENDENCIES) sched_policies/$(am__dirstamp) + @rm -f sched_policies/prio$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(sched_policies_prio_OBJECTS) $(sched_policies_prio_LDADD) $(LIBS) +sched_policies/simple_cpu_gpu_sched.$(OBJEXT): \ + sched_policies/$(am__dirstamp) \ + sched_policies/$(DEPDIR)/$(am__dirstamp) + +sched_policies/simple_cpu_gpu_sched$(EXEEXT): $(sched_policies_simple_cpu_gpu_sched_OBJECTS) $(sched_policies_simple_cpu_gpu_sched_DEPENDENCIES) $(EXTRA_sched_policies_simple_cpu_gpu_sched_DEPENDENCIES) sched_policies/$(am__dirstamp) + @rm -f sched_policies/simple_cpu_gpu_sched$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(sched_policies_simple_cpu_gpu_sched_OBJECTS) $(sched_policies_simple_cpu_gpu_sched_LDADD) $(LIBS) +sched_policies/simple_deps.$(OBJEXT): sched_policies/$(am__dirstamp) \ + sched_policies/$(DEPDIR)/$(am__dirstamp) + +sched_policies/simple_deps$(EXEEXT): $(sched_policies_simple_deps_OBJECTS) $(sched_policies_simple_deps_DEPENDENCIES) $(EXTRA_sched_policies_simple_deps_DEPENDENCIES) sched_policies/$(am__dirstamp) + @rm -f sched_policies/simple_deps$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(sched_policies_simple_deps_OBJECTS) $(sched_policies_simple_deps_LDADD) $(LIBS) +sched_policies/workerids.$(OBJEXT): sched_policies/$(am__dirstamp) \ + sched_policies/$(DEPDIR)/$(am__dirstamp) + +sched_policies/workerids$(EXEEXT): $(sched_policies_workerids_OBJECTS) $(sched_policies_workerids_DEPENDENCIES) $(EXTRA_sched_policies_workerids_DEPENDENCIES) sched_policies/$(am__dirstamp) + @rm -f sched_policies/workerids$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(sched_policies_workerids_OBJECTS) $(sched_policies_workerids_LDADD) $(LIBS) +install-examplebinSCRIPTS: $(examplebin_SCRIPTS) + @$(NORMAL_INSTALL) + @list='$(examplebin_SCRIPTS)'; test -n "$(examplebindir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(examplebindir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(examplebindir)" || exit 1; \ + fi; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + if test -f "$$d$$p"; then echo "$$d$$p"; echo "$$p"; else :; fi; \ + done | \ + sed -e 'p;s,.*/,,;n' \ + -e 'h;s|.*|.|' \ + -e 'p;x;s,.*/,,;$(transform)' | sed 'N;N;N;s,\n, ,g' | \ + $(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1; } \ + { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \ + if ($$2 == $$4) { files[d] = files[d] " " $$1; \ + if (++n[d] == $(am__install_max)) { \ + print "f", d, files[d]; n[d] = 0; files[d] = "" } } \ + else { print "f", d "/" $$4, $$1 } } \ + END { for (d in files) print "f", d, files[d] }' | \ + while read type dir files; do \ + if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \ + test -z "$$files" || { \ + echo " $(INSTALL_SCRIPT) $$files '$(DESTDIR)$(examplebindir)$$dir'"; \ + $(INSTALL_SCRIPT) $$files "$(DESTDIR)$(examplebindir)$$dir" || exit $$?; \ + } \ + ; done + +uninstall-examplebinSCRIPTS: + @$(NORMAL_UNINSTALL) + @list='$(examplebin_SCRIPTS)'; test -n "$(examplebindir)" || exit 0; \ + files=`for p in $$list; do echo "$$p"; done | \ + sed -e 's,.*/,,;$(transform)'`; \ + dir='$(DESTDIR)$(examplebindir)'; $(am__uninstall_files_from_dir) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + -rm -f datawizard/*.$(OBJEXT) + -rm -f datawizard/interfaces/*.$(OBJEXT) + -rm -f datawizard/interfaces/bcsr/*.$(OBJEXT) + -rm -f datawizard/interfaces/block/*.$(OBJEXT) + -rm -f datawizard/interfaces/coo/*.$(OBJEXT) + -rm -f datawizard/interfaces/csr/*.$(OBJEXT) + -rm -f datawizard/interfaces/matrix/*.$(OBJEXT) + -rm -f datawizard/interfaces/multiformat/*.$(OBJEXT) + -rm -f datawizard/interfaces/multiformat/advanced/*.$(OBJEXT) + -rm -f datawizard/interfaces/ndim/*.$(OBJEXT) + -rm -f datawizard/interfaces/tensor/*.$(OBJEXT) + -rm -f datawizard/interfaces/variable/*.$(OBJEXT) + -rm -f datawizard/interfaces/vector/*.$(OBJEXT) + -rm -f datawizard/interfaces/void/*.$(OBJEXT) + -rm -f disk/*.$(OBJEXT) + -rm -f energy/*.$(OBJEXT) + -rm -f errorcheck/*.$(OBJEXT) + -rm -f fault-tolerance/*.$(OBJEXT) + -rm -f fortran90/*.$(OBJEXT) + -rm -f helper/*.$(OBJEXT) + -rm -f main/*.$(OBJEXT) + -rm -f main/driver_api/*.$(OBJEXT) + -rm -f maxfpga/*.$(OBJEXT) + -rm -f microbenchs/*.$(OBJEXT) + -rm -f openmp/*.$(OBJEXT) + -rm -f overlap/*.$(OBJEXT) + -rm -f parallel_tasks/*.$(OBJEXT) + -rm -f perfmodels/*.$(OBJEXT) + -rm -f sched_ctx/*.$(OBJEXT) + -rm -f sched_policies/*.$(OBJEXT) + -rm -f variable/*.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/loader-loader.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/acquire_cb.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/acquire_cb_insert.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/acquire_release.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/acquire_release2.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/acquire_release_to.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/acquire_try.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/allocate.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/allocate_many_numa_nodes.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/bcsr.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/cache.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/commute.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/commute2.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/copy.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/critical_section_with_void_interface.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/data_deinitialize.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/data_implicit_deps.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/data_invalidation.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/data_register-data_register.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/deinitialize_pending_requests.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/deps.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/dining_philosophers.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/double_parameter.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/dsm_stress.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/gpu_ptr_register.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/gpu_register.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/handle_to_pointer.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/in_place_partition.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/increment_init.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/increment_redux.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/increment_redux_lazy.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/increment_redux_partition.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/increment_redux_v2.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/increment_redux_with_args.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/invalidate_pending_requests.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/lazy_allocation.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/locality.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/manual_reduction.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/mpi_like.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/mpi_like_async.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/no_unregister.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/noreclaim.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/nowhere.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/numa_overflow.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/partition_dep.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/partition_init.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/partition_lazy.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/partition_wontuse.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/partitioned_acquire.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/partitioned_initialization.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/readers_and_writers.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/readonly.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/reclaim.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/redux_acquire.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/scal.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/scratch.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/scratch_opencl.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/scratch_reuse.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/simgrid-locality.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/specific_node.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/specific_node_same.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/sync_and_notify_data.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/sync_and_notify_data_implicit.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/sync_and_notify_data_opencl.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/sync_with_data_with_mem.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/sync_with_data_with_mem_non_blocking.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/sync_with_data_with_mem_non_blocking_implicit.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/task_with_multiple_time_the_same_handle.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/temporary_partition.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/temporary_partition_implicit.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/temporary_partition_read.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/test_arbiter.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/unpartition.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/user_interaction_implicit.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/variable_parameters.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/variable_size.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/write_only_tmp_buffer.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/wt_broadcast.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/wt_host.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/$(DEPDIR)/bcsr_bcsr_interface-test_interfaces.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/$(DEPDIR)/block_block_interface-test_interfaces.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/$(DEPDIR)/coo_coo_interface-test_interfaces.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/$(DEPDIR)/copy_interfaces-copy_interfaces.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/$(DEPDIR)/csr_csr_interface-test_interfaces.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/$(DEPDIR)/matrix_matrix_interface-test_interfaces.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/$(DEPDIR)/multiformat_multiformat_interface-test_interfaces.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/$(DEPDIR)/ndim_ndim_interface-test_interfaces.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/$(DEPDIR)/tensor_tensor_interface-test_interfaces.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/$(DEPDIR)/variable_variable_interface-test_interfaces.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/$(DEPDIR)/vector_vector_interface-test_interfaces.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/$(DEPDIR)/void_void_interface-test_interfaces.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/bcsr/$(DEPDIR)/bcsr_interface-bcsr_interface.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/bcsr/$(DEPDIR)/bcsr_interface-bcsr_opencl.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/block/$(DEPDIR)/block_interface-block_interface.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/block/$(DEPDIR)/block_interface-block_opencl.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/coo/$(DEPDIR)/coo_interface-coo_interface.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/coo/$(DEPDIR)/coo_interface-coo_opencl.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/csr/$(DEPDIR)/csr_interface-csr_interface.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/csr/$(DEPDIR)/csr_interface-csr_opencl.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/matrix/$(DEPDIR)/matrix_interface-matrix_interface.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/matrix/$(DEPDIR)/matrix_interface-matrix_opencl.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/multiformat/$(DEPDIR)/multiformat_interface-multiformat_conversion_codelets.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/multiformat/$(DEPDIR)/multiformat_interface-multiformat_conversion_codelets_opencl.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/multiformat/$(DEPDIR)/multiformat_interface-multiformat_interface.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/multiformat/$(DEPDIR)/multiformat_interface-multiformat_opencl.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/multiformat/advanced/$(DEPDIR)/generic.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/multiformat/advanced/$(DEPDIR)/multiformat_cuda_opencl.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/multiformat/advanced/$(DEPDIR)/multiformat_data_release.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/multiformat/advanced/$(DEPDIR)/multiformat_handle_conversion.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/multiformat/advanced/$(DEPDIR)/multiformat_worker.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/multiformat/advanced/$(DEPDIR)/same_handle.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/ndim/$(DEPDIR)/ndim_interface-ndim_interface.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/ndim/$(DEPDIR)/ndim_interface-ndim_opencl.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/tensor/$(DEPDIR)/tensor_interface-tensor_interface.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/tensor/$(DEPDIR)/tensor_interface-tensor_opencl.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/variable/$(DEPDIR)/variable_interface-variable_interface.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/variable/$(DEPDIR)/variable_interface-variable_opencl.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/vector/$(DEPDIR)/vector_interface-vector_interface.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/vector/$(DEPDIR)/vector_interface-vector_opencl.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/void/$(DEPDIR)/void_interface-void_interface.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@disk/$(DEPDIR)/disk_compute.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@disk/$(DEPDIR)/disk_copy.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@disk/$(DEPDIR)/disk_copy_to_disk.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@disk/$(DEPDIR)/disk_copy_unpack.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@disk/$(DEPDIR)/disk_pack.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@disk/$(DEPDIR)/mem_reclaim.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@energy/$(DEPDIR)/energy_efficiency.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@errorcheck/$(DEPDIR)/invalid_blocking_calls.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@errorcheck/$(DEPDIR)/invalid_tasks.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@errorcheck/$(DEPDIR)/starpu_init_noworker.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@errorcheck/$(DEPDIR)/workers_cpuid.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@fault-tolerance/$(DEPDIR)/retry.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@helper/$(DEPDIR)/cublasLt_init.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@helper/$(DEPDIR)/cublas_init.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@helper/$(DEPDIR)/cusparse_init.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@helper/$(DEPDIR)/execute_on_all.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@helper/$(DEPDIR)/hipblas_init.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@helper/$(DEPDIR)/pinned_memory.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@helper/$(DEPDIR)/starpu_create_sync_task.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@helper/$(DEPDIR)/starpu_data_cpy.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@helper/$(DEPDIR)/starpu_data_dup_ro.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/bind.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/callback.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/codelet_null_callback.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/const_codelet.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/deadlock.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/declare_deps_after_submission.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/declare_deps_after_submission_synchronous.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/declare_deps_in_callback.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/deploop.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/deprecated_func-deprecated_func.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/display_binding.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/empty_task.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/empty_task_chain.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/empty_task_sync_point.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/empty_task_sync_point_tasks.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/execute_on_a_specific_worker.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/execute_schedule.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/get_children_tasks.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/get_current_task.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/hwloc_cpuset.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/insert_task.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/insert_task_array.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/insert_task_dyn_handles.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/insert_task_many.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/insert_task_nullcodelet.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/insert_task_pack.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/insert_task_value.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/insert_task_where.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/job.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/mkdtemp.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/multithreaded.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/multithreaded_init.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/pack.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/pause_resume.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/regenerate.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/regenerate_pipeline.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/restart.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/starpu_init.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/starpu_task_bundle.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/starpu_task_wait.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/starpu_task_wait_for_all.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/starpu_worker_exists-starpu_worker_exists.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/static_restartable.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/static_restartable_tag.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/static_restartable_using_initializer.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/subgraph_repeat.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/subgraph_repeat_regenerate.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/subgraph_repeat_regenerate_tag.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/subgraph_repeat_regenerate_tag_cycle.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/subgraph_repeat_tag.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/submit.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/tag_get_task.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/tag_task_data_deps.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/tag_wait_api.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/task_end_dep.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/task_wait_api.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/wait_all_regenerable_tasks.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@main/driver_api/$(DEPDIR)/init_run_deinit.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@main/driver_api/$(DEPDIR)/run_driver.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@maxfpga/$(DEPDIR)/max_fpga_advanced_static.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@maxfpga/$(DEPDIR)/max_fpga_basic_static.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@maxfpga/$(DEPDIR)/max_fpga_dynamic.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@maxfpga/$(DEPDIR)/max_fpga_mux.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@microbenchs/$(DEPDIR)/async_tasks_overhead.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@microbenchs/$(DEPDIR)/bandwidth.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@microbenchs/$(DEPDIR)/display_structures_size.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@microbenchs/$(DEPDIR)/local_pingpong.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@microbenchs/$(DEPDIR)/matrix_as_vector.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@microbenchs/$(DEPDIR)/parallel_dependent_homogeneous_tasks_data.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@microbenchs/$(DEPDIR)/parallel_independent_heterogeneous_tasks.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@microbenchs/$(DEPDIR)/parallel_independent_heterogeneous_tasks_data.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@microbenchs/$(DEPDIR)/parallel_independent_homogeneous_tasks.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@microbenchs/$(DEPDIR)/parallel_independent_homogeneous_tasks_data.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@microbenchs/$(DEPDIR)/parallel_redux_heterogeneous_tasks_data.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@microbenchs/$(DEPDIR)/parallel_redux_homogeneous_tasks_data.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@microbenchs/$(DEPDIR)/prefetch_data_on_node.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@microbenchs/$(DEPDIR)/redundant_buffer.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@microbenchs/$(DEPDIR)/sync_tasks_overhead.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@microbenchs/$(DEPDIR)/tasks_overhead.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@microbenchs/$(DEPDIR)/tasks_size_overhead.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@openmp/$(DEPDIR)/api_01.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@openmp/$(DEPDIR)/array_slice_01.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@openmp/$(DEPDIR)/cuda_task_01.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@openmp/$(DEPDIR)/environment.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@openmp/$(DEPDIR)/init_exit_01.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@openmp/$(DEPDIR)/init_exit_02.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@openmp/$(DEPDIR)/parallel_01.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@openmp/$(DEPDIR)/parallel_02.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@openmp/$(DEPDIR)/parallel_03.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@openmp/$(DEPDIR)/parallel_barrier_01.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@openmp/$(DEPDIR)/parallel_critical_01.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@openmp/$(DEPDIR)/parallel_critical_inline_01.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@openmp/$(DEPDIR)/parallel_critical_named_01.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@openmp/$(DEPDIR)/parallel_critical_named_inline_01.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@openmp/$(DEPDIR)/parallel_for_01.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@openmp/$(DEPDIR)/parallel_for_02.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@openmp/$(DEPDIR)/parallel_for_ordered_01.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@openmp/$(DEPDIR)/parallel_master_01.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@openmp/$(DEPDIR)/parallel_master_inline_01.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@openmp/$(DEPDIR)/parallel_nested_lock_01.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@openmp/$(DEPDIR)/parallel_sections_01.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@openmp/$(DEPDIR)/parallel_sections_combined_01.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@openmp/$(DEPDIR)/parallel_simple_lock_01.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@openmp/$(DEPDIR)/parallel_single_copyprivate_01.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@openmp/$(DEPDIR)/parallel_single_copyprivate_inline_01.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@openmp/$(DEPDIR)/parallel_single_inline_01.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@openmp/$(DEPDIR)/parallel_single_nowait_01.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@openmp/$(DEPDIR)/parallel_single_wait_01.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@openmp/$(DEPDIR)/task_01.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@openmp/$(DEPDIR)/task_02.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@openmp/$(DEPDIR)/task_03.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@openmp/$(DEPDIR)/taskgroup_01.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@openmp/$(DEPDIR)/taskgroup_02.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@openmp/$(DEPDIR)/taskloop.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@openmp/$(DEPDIR)/taskwait_01.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@overlap/$(DEPDIR)/gpu_concurrency.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@overlap/$(DEPDIR)/overlap.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@parallel_tasks/$(DEPDIR)/combined_worker_assign_workerid.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@parallel_tasks/$(DEPDIR)/cuda_only.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@parallel_tasks/$(DEPDIR)/explicit_combined_worker.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@parallel_tasks/$(DEPDIR)/parallel_kernels.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@parallel_tasks/$(DEPDIR)/parallel_kernels_spmd.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@parallel_tasks/$(DEPDIR)/parallel_kernels_trivial.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@parallel_tasks/$(DEPDIR)/spmd_peager.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@parallel_tasks/$(DEPDIR)/swap.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@perfmodels/$(DEPDIR)/feed.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@perfmodels/$(DEPDIR)/memory.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@perfmodels/$(DEPDIR)/non_linear_regression_based.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@perfmodels/$(DEPDIR)/opencl_memset.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@perfmodels/$(DEPDIR)/path.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@perfmodels/$(DEPDIR)/regression_based_check.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@perfmodels/$(DEPDIR)/regression_based_energy.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@perfmodels/$(DEPDIR)/regression_based_gpu.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@perfmodels/$(DEPDIR)/regression_based_memset.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@perfmodels/$(DEPDIR)/regression_based_multiimpl.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@perfmodels/$(DEPDIR)/user_base.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@perfmodels/$(DEPDIR)/valid_model.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@perfmodels/$(DEPDIR)/value_nan.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@sched_ctx/$(DEPDIR)/sched_ctx_hierarchy.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@sched_ctx/$(DEPDIR)/sched_ctx_list.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@sched_ctx/$(DEPDIR)/sched_ctx_policy_data.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/data_locality.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/execute_all_tasks-execute_all_tasks.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/prio.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/simple_cpu_gpu_sched.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/simple_deps.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/workerids.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@variable/$(DEPDIR)/increment.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@variable/$(DEPDIR)/increment_opencl.Po@am__quote@ # am--include-marker + +$(am__depfiles_remade): + @$(MKDIR_P) $(@D) + @echo '# dummy' >$@-t && $(am__mv) $@-t $@ + +am--depfiles: $(am__depfiles_remade) + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ +@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +datawizard/data_register-data_register.o: datawizard/data_register.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_data_register_CFLAGS) $(CFLAGS) -MT datawizard/data_register-data_register.o -MD -MP -MF datawizard/$(DEPDIR)/data_register-data_register.Tpo -c -o datawizard/data_register-data_register.o `test -f 'datawizard/data_register.c' || echo '$(srcdir)/'`datawizard/data_register.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/$(DEPDIR)/data_register-data_register.Tpo datawizard/$(DEPDIR)/data_register-data_register.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/data_register.c' object='datawizard/data_register-data_register.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_data_register_CFLAGS) $(CFLAGS) -c -o datawizard/data_register-data_register.o `test -f 'datawizard/data_register.c' || echo '$(srcdir)/'`datawizard/data_register.c + +datawizard/data_register-data_register.obj: datawizard/data_register.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_data_register_CFLAGS) $(CFLAGS) -MT datawizard/data_register-data_register.obj -MD -MP -MF datawizard/$(DEPDIR)/data_register-data_register.Tpo -c -o datawizard/data_register-data_register.obj `if test -f 'datawizard/data_register.c'; then $(CYGPATH_W) 'datawizard/data_register.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/data_register.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/$(DEPDIR)/data_register-data_register.Tpo datawizard/$(DEPDIR)/data_register-data_register.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/data_register.c' object='datawizard/data_register-data_register.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_data_register_CFLAGS) $(CFLAGS) -c -o datawizard/data_register-data_register.obj `if test -f 'datawizard/data_register.c'; then $(CYGPATH_W) 'datawizard/data_register.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/data_register.c'; fi` + +datawizard/interfaces/bcsr_bcsr_interface-test_interfaces.o: datawizard/interfaces/test_interfaces.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_bcsr_bcsr_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/bcsr_bcsr_interface-test_interfaces.o -MD -MP -MF datawizard/interfaces/$(DEPDIR)/bcsr_bcsr_interface-test_interfaces.Tpo -c -o datawizard/interfaces/bcsr_bcsr_interface-test_interfaces.o `test -f 'datawizard/interfaces/test_interfaces.c' || echo '$(srcdir)/'`datawizard/interfaces/test_interfaces.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/$(DEPDIR)/bcsr_bcsr_interface-test_interfaces.Tpo datawizard/interfaces/$(DEPDIR)/bcsr_bcsr_interface-test_interfaces.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/test_interfaces.c' object='datawizard/interfaces/bcsr_bcsr_interface-test_interfaces.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_bcsr_bcsr_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/bcsr_bcsr_interface-test_interfaces.o `test -f 'datawizard/interfaces/test_interfaces.c' || echo '$(srcdir)/'`datawizard/interfaces/test_interfaces.c + +datawizard/interfaces/bcsr_bcsr_interface-test_interfaces.obj: datawizard/interfaces/test_interfaces.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_bcsr_bcsr_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/bcsr_bcsr_interface-test_interfaces.obj -MD -MP -MF datawizard/interfaces/$(DEPDIR)/bcsr_bcsr_interface-test_interfaces.Tpo -c -o datawizard/interfaces/bcsr_bcsr_interface-test_interfaces.obj `if test -f 'datawizard/interfaces/test_interfaces.c'; then $(CYGPATH_W) 'datawizard/interfaces/test_interfaces.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/test_interfaces.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/$(DEPDIR)/bcsr_bcsr_interface-test_interfaces.Tpo datawizard/interfaces/$(DEPDIR)/bcsr_bcsr_interface-test_interfaces.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/test_interfaces.c' object='datawizard/interfaces/bcsr_bcsr_interface-test_interfaces.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_bcsr_bcsr_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/bcsr_bcsr_interface-test_interfaces.obj `if test -f 'datawizard/interfaces/test_interfaces.c'; then $(CYGPATH_W) 'datawizard/interfaces/test_interfaces.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/test_interfaces.c'; fi` + +datawizard/interfaces/bcsr/bcsr_interface-bcsr_interface.o: datawizard/interfaces/bcsr/bcsr_interface.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_bcsr_bcsr_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/bcsr/bcsr_interface-bcsr_interface.o -MD -MP -MF datawizard/interfaces/bcsr/$(DEPDIR)/bcsr_interface-bcsr_interface.Tpo -c -o datawizard/interfaces/bcsr/bcsr_interface-bcsr_interface.o `test -f 'datawizard/interfaces/bcsr/bcsr_interface.c' || echo '$(srcdir)/'`datawizard/interfaces/bcsr/bcsr_interface.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/bcsr/$(DEPDIR)/bcsr_interface-bcsr_interface.Tpo datawizard/interfaces/bcsr/$(DEPDIR)/bcsr_interface-bcsr_interface.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/bcsr/bcsr_interface.c' object='datawizard/interfaces/bcsr/bcsr_interface-bcsr_interface.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_bcsr_bcsr_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/bcsr/bcsr_interface-bcsr_interface.o `test -f 'datawizard/interfaces/bcsr/bcsr_interface.c' || echo '$(srcdir)/'`datawizard/interfaces/bcsr/bcsr_interface.c + +datawizard/interfaces/bcsr/bcsr_interface-bcsr_interface.obj: datawizard/interfaces/bcsr/bcsr_interface.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_bcsr_bcsr_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/bcsr/bcsr_interface-bcsr_interface.obj -MD -MP -MF datawizard/interfaces/bcsr/$(DEPDIR)/bcsr_interface-bcsr_interface.Tpo -c -o datawizard/interfaces/bcsr/bcsr_interface-bcsr_interface.obj `if test -f 'datawizard/interfaces/bcsr/bcsr_interface.c'; then $(CYGPATH_W) 'datawizard/interfaces/bcsr/bcsr_interface.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/bcsr/bcsr_interface.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/bcsr/$(DEPDIR)/bcsr_interface-bcsr_interface.Tpo datawizard/interfaces/bcsr/$(DEPDIR)/bcsr_interface-bcsr_interface.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/bcsr/bcsr_interface.c' object='datawizard/interfaces/bcsr/bcsr_interface-bcsr_interface.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_bcsr_bcsr_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/bcsr/bcsr_interface-bcsr_interface.obj `if test -f 'datawizard/interfaces/bcsr/bcsr_interface.c'; then $(CYGPATH_W) 'datawizard/interfaces/bcsr/bcsr_interface.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/bcsr/bcsr_interface.c'; fi` + +datawizard/interfaces/bcsr/bcsr_interface-bcsr_opencl.o: datawizard/interfaces/bcsr/bcsr_opencl.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_bcsr_bcsr_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/bcsr/bcsr_interface-bcsr_opencl.o -MD -MP -MF datawizard/interfaces/bcsr/$(DEPDIR)/bcsr_interface-bcsr_opencl.Tpo -c -o datawizard/interfaces/bcsr/bcsr_interface-bcsr_opencl.o `test -f 'datawizard/interfaces/bcsr/bcsr_opencl.c' || echo '$(srcdir)/'`datawizard/interfaces/bcsr/bcsr_opencl.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/bcsr/$(DEPDIR)/bcsr_interface-bcsr_opencl.Tpo datawizard/interfaces/bcsr/$(DEPDIR)/bcsr_interface-bcsr_opencl.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/bcsr/bcsr_opencl.c' object='datawizard/interfaces/bcsr/bcsr_interface-bcsr_opencl.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_bcsr_bcsr_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/bcsr/bcsr_interface-bcsr_opencl.o `test -f 'datawizard/interfaces/bcsr/bcsr_opencl.c' || echo '$(srcdir)/'`datawizard/interfaces/bcsr/bcsr_opencl.c + +datawizard/interfaces/bcsr/bcsr_interface-bcsr_opencl.obj: datawizard/interfaces/bcsr/bcsr_opencl.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_bcsr_bcsr_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/bcsr/bcsr_interface-bcsr_opencl.obj -MD -MP -MF datawizard/interfaces/bcsr/$(DEPDIR)/bcsr_interface-bcsr_opencl.Tpo -c -o datawizard/interfaces/bcsr/bcsr_interface-bcsr_opencl.obj `if test -f 'datawizard/interfaces/bcsr/bcsr_opencl.c'; then $(CYGPATH_W) 'datawizard/interfaces/bcsr/bcsr_opencl.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/bcsr/bcsr_opencl.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/bcsr/$(DEPDIR)/bcsr_interface-bcsr_opencl.Tpo datawizard/interfaces/bcsr/$(DEPDIR)/bcsr_interface-bcsr_opencl.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/bcsr/bcsr_opencl.c' object='datawizard/interfaces/bcsr/bcsr_interface-bcsr_opencl.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_bcsr_bcsr_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/bcsr/bcsr_interface-bcsr_opencl.obj `if test -f 'datawizard/interfaces/bcsr/bcsr_opencl.c'; then $(CYGPATH_W) 'datawizard/interfaces/bcsr/bcsr_opencl.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/bcsr/bcsr_opencl.c'; fi` + +datawizard/interfaces/block_block_interface-test_interfaces.o: datawizard/interfaces/test_interfaces.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_block_block_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/block_block_interface-test_interfaces.o -MD -MP -MF datawizard/interfaces/$(DEPDIR)/block_block_interface-test_interfaces.Tpo -c -o datawizard/interfaces/block_block_interface-test_interfaces.o `test -f 'datawizard/interfaces/test_interfaces.c' || echo '$(srcdir)/'`datawizard/interfaces/test_interfaces.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/$(DEPDIR)/block_block_interface-test_interfaces.Tpo datawizard/interfaces/$(DEPDIR)/block_block_interface-test_interfaces.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/test_interfaces.c' object='datawizard/interfaces/block_block_interface-test_interfaces.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_block_block_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/block_block_interface-test_interfaces.o `test -f 'datawizard/interfaces/test_interfaces.c' || echo '$(srcdir)/'`datawizard/interfaces/test_interfaces.c + +datawizard/interfaces/block_block_interface-test_interfaces.obj: datawizard/interfaces/test_interfaces.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_block_block_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/block_block_interface-test_interfaces.obj -MD -MP -MF datawizard/interfaces/$(DEPDIR)/block_block_interface-test_interfaces.Tpo -c -o datawizard/interfaces/block_block_interface-test_interfaces.obj `if test -f 'datawizard/interfaces/test_interfaces.c'; then $(CYGPATH_W) 'datawizard/interfaces/test_interfaces.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/test_interfaces.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/$(DEPDIR)/block_block_interface-test_interfaces.Tpo datawizard/interfaces/$(DEPDIR)/block_block_interface-test_interfaces.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/test_interfaces.c' object='datawizard/interfaces/block_block_interface-test_interfaces.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_block_block_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/block_block_interface-test_interfaces.obj `if test -f 'datawizard/interfaces/test_interfaces.c'; then $(CYGPATH_W) 'datawizard/interfaces/test_interfaces.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/test_interfaces.c'; fi` + +datawizard/interfaces/block/block_interface-block_interface.o: datawizard/interfaces/block/block_interface.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_block_block_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/block/block_interface-block_interface.o -MD -MP -MF datawizard/interfaces/block/$(DEPDIR)/block_interface-block_interface.Tpo -c -o datawizard/interfaces/block/block_interface-block_interface.o `test -f 'datawizard/interfaces/block/block_interface.c' || echo '$(srcdir)/'`datawizard/interfaces/block/block_interface.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/block/$(DEPDIR)/block_interface-block_interface.Tpo datawizard/interfaces/block/$(DEPDIR)/block_interface-block_interface.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/block/block_interface.c' object='datawizard/interfaces/block/block_interface-block_interface.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_block_block_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/block/block_interface-block_interface.o `test -f 'datawizard/interfaces/block/block_interface.c' || echo '$(srcdir)/'`datawizard/interfaces/block/block_interface.c + +datawizard/interfaces/block/block_interface-block_interface.obj: datawizard/interfaces/block/block_interface.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_block_block_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/block/block_interface-block_interface.obj -MD -MP -MF datawizard/interfaces/block/$(DEPDIR)/block_interface-block_interface.Tpo -c -o datawizard/interfaces/block/block_interface-block_interface.obj `if test -f 'datawizard/interfaces/block/block_interface.c'; then $(CYGPATH_W) 'datawizard/interfaces/block/block_interface.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/block/block_interface.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/block/$(DEPDIR)/block_interface-block_interface.Tpo datawizard/interfaces/block/$(DEPDIR)/block_interface-block_interface.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/block/block_interface.c' object='datawizard/interfaces/block/block_interface-block_interface.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_block_block_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/block/block_interface-block_interface.obj `if test -f 'datawizard/interfaces/block/block_interface.c'; then $(CYGPATH_W) 'datawizard/interfaces/block/block_interface.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/block/block_interface.c'; fi` + +datawizard/interfaces/block/block_interface-block_opencl.o: datawizard/interfaces/block/block_opencl.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_block_block_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/block/block_interface-block_opencl.o -MD -MP -MF datawizard/interfaces/block/$(DEPDIR)/block_interface-block_opencl.Tpo -c -o datawizard/interfaces/block/block_interface-block_opencl.o `test -f 'datawizard/interfaces/block/block_opencl.c' || echo '$(srcdir)/'`datawizard/interfaces/block/block_opencl.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/block/$(DEPDIR)/block_interface-block_opencl.Tpo datawizard/interfaces/block/$(DEPDIR)/block_interface-block_opencl.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/block/block_opencl.c' object='datawizard/interfaces/block/block_interface-block_opencl.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_block_block_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/block/block_interface-block_opencl.o `test -f 'datawizard/interfaces/block/block_opencl.c' || echo '$(srcdir)/'`datawizard/interfaces/block/block_opencl.c + +datawizard/interfaces/block/block_interface-block_opencl.obj: datawizard/interfaces/block/block_opencl.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_block_block_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/block/block_interface-block_opencl.obj -MD -MP -MF datawizard/interfaces/block/$(DEPDIR)/block_interface-block_opencl.Tpo -c -o datawizard/interfaces/block/block_interface-block_opencl.obj `if test -f 'datawizard/interfaces/block/block_opencl.c'; then $(CYGPATH_W) 'datawizard/interfaces/block/block_opencl.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/block/block_opencl.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/block/$(DEPDIR)/block_interface-block_opencl.Tpo datawizard/interfaces/block/$(DEPDIR)/block_interface-block_opencl.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/block/block_opencl.c' object='datawizard/interfaces/block/block_interface-block_opencl.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_block_block_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/block/block_interface-block_opencl.obj `if test -f 'datawizard/interfaces/block/block_opencl.c'; then $(CYGPATH_W) 'datawizard/interfaces/block/block_opencl.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/block/block_opencl.c'; fi` + +datawizard/interfaces/coo_coo_interface-test_interfaces.o: datawizard/interfaces/test_interfaces.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_coo_coo_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/coo_coo_interface-test_interfaces.o -MD -MP -MF datawizard/interfaces/$(DEPDIR)/coo_coo_interface-test_interfaces.Tpo -c -o datawizard/interfaces/coo_coo_interface-test_interfaces.o `test -f 'datawizard/interfaces/test_interfaces.c' || echo '$(srcdir)/'`datawizard/interfaces/test_interfaces.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/$(DEPDIR)/coo_coo_interface-test_interfaces.Tpo datawizard/interfaces/$(DEPDIR)/coo_coo_interface-test_interfaces.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/test_interfaces.c' object='datawizard/interfaces/coo_coo_interface-test_interfaces.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_coo_coo_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/coo_coo_interface-test_interfaces.o `test -f 'datawizard/interfaces/test_interfaces.c' || echo '$(srcdir)/'`datawizard/interfaces/test_interfaces.c + +datawizard/interfaces/coo_coo_interface-test_interfaces.obj: datawizard/interfaces/test_interfaces.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_coo_coo_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/coo_coo_interface-test_interfaces.obj -MD -MP -MF datawizard/interfaces/$(DEPDIR)/coo_coo_interface-test_interfaces.Tpo -c -o datawizard/interfaces/coo_coo_interface-test_interfaces.obj `if test -f 'datawizard/interfaces/test_interfaces.c'; then $(CYGPATH_W) 'datawizard/interfaces/test_interfaces.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/test_interfaces.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/$(DEPDIR)/coo_coo_interface-test_interfaces.Tpo datawizard/interfaces/$(DEPDIR)/coo_coo_interface-test_interfaces.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/test_interfaces.c' object='datawizard/interfaces/coo_coo_interface-test_interfaces.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_coo_coo_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/coo_coo_interface-test_interfaces.obj `if test -f 'datawizard/interfaces/test_interfaces.c'; then $(CYGPATH_W) 'datawizard/interfaces/test_interfaces.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/test_interfaces.c'; fi` + +datawizard/interfaces/coo/coo_interface-coo_interface.o: datawizard/interfaces/coo/coo_interface.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_coo_coo_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/coo/coo_interface-coo_interface.o -MD -MP -MF datawizard/interfaces/coo/$(DEPDIR)/coo_interface-coo_interface.Tpo -c -o datawizard/interfaces/coo/coo_interface-coo_interface.o `test -f 'datawizard/interfaces/coo/coo_interface.c' || echo '$(srcdir)/'`datawizard/interfaces/coo/coo_interface.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/coo/$(DEPDIR)/coo_interface-coo_interface.Tpo datawizard/interfaces/coo/$(DEPDIR)/coo_interface-coo_interface.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/coo/coo_interface.c' object='datawizard/interfaces/coo/coo_interface-coo_interface.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_coo_coo_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/coo/coo_interface-coo_interface.o `test -f 'datawizard/interfaces/coo/coo_interface.c' || echo '$(srcdir)/'`datawizard/interfaces/coo/coo_interface.c + +datawizard/interfaces/coo/coo_interface-coo_interface.obj: datawizard/interfaces/coo/coo_interface.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_coo_coo_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/coo/coo_interface-coo_interface.obj -MD -MP -MF datawizard/interfaces/coo/$(DEPDIR)/coo_interface-coo_interface.Tpo -c -o datawizard/interfaces/coo/coo_interface-coo_interface.obj `if test -f 'datawizard/interfaces/coo/coo_interface.c'; then $(CYGPATH_W) 'datawizard/interfaces/coo/coo_interface.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/coo/coo_interface.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/coo/$(DEPDIR)/coo_interface-coo_interface.Tpo datawizard/interfaces/coo/$(DEPDIR)/coo_interface-coo_interface.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/coo/coo_interface.c' object='datawizard/interfaces/coo/coo_interface-coo_interface.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_coo_coo_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/coo/coo_interface-coo_interface.obj `if test -f 'datawizard/interfaces/coo/coo_interface.c'; then $(CYGPATH_W) 'datawizard/interfaces/coo/coo_interface.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/coo/coo_interface.c'; fi` + +datawizard/interfaces/coo/coo_interface-coo_opencl.o: datawizard/interfaces/coo/coo_opencl.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_coo_coo_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/coo/coo_interface-coo_opencl.o -MD -MP -MF datawizard/interfaces/coo/$(DEPDIR)/coo_interface-coo_opencl.Tpo -c -o datawizard/interfaces/coo/coo_interface-coo_opencl.o `test -f 'datawizard/interfaces/coo/coo_opencl.c' || echo '$(srcdir)/'`datawizard/interfaces/coo/coo_opencl.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/coo/$(DEPDIR)/coo_interface-coo_opencl.Tpo datawizard/interfaces/coo/$(DEPDIR)/coo_interface-coo_opencl.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/coo/coo_opencl.c' object='datawizard/interfaces/coo/coo_interface-coo_opencl.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_coo_coo_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/coo/coo_interface-coo_opencl.o `test -f 'datawizard/interfaces/coo/coo_opencl.c' || echo '$(srcdir)/'`datawizard/interfaces/coo/coo_opencl.c + +datawizard/interfaces/coo/coo_interface-coo_opencl.obj: datawizard/interfaces/coo/coo_opencl.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_coo_coo_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/coo/coo_interface-coo_opencl.obj -MD -MP -MF datawizard/interfaces/coo/$(DEPDIR)/coo_interface-coo_opencl.Tpo -c -o datawizard/interfaces/coo/coo_interface-coo_opencl.obj `if test -f 'datawizard/interfaces/coo/coo_opencl.c'; then $(CYGPATH_W) 'datawizard/interfaces/coo/coo_opencl.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/coo/coo_opencl.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/coo/$(DEPDIR)/coo_interface-coo_opencl.Tpo datawizard/interfaces/coo/$(DEPDIR)/coo_interface-coo_opencl.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/coo/coo_opencl.c' object='datawizard/interfaces/coo/coo_interface-coo_opencl.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_coo_coo_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/coo/coo_interface-coo_opencl.obj `if test -f 'datawizard/interfaces/coo/coo_opencl.c'; then $(CYGPATH_W) 'datawizard/interfaces/coo/coo_opencl.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/coo/coo_opencl.c'; fi` + +datawizard/interfaces/copy_interfaces-copy_interfaces.o: datawizard/interfaces/copy_interfaces.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_copy_interfaces_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/copy_interfaces-copy_interfaces.o -MD -MP -MF datawizard/interfaces/$(DEPDIR)/copy_interfaces-copy_interfaces.Tpo -c -o datawizard/interfaces/copy_interfaces-copy_interfaces.o `test -f 'datawizard/interfaces/copy_interfaces.c' || echo '$(srcdir)/'`datawizard/interfaces/copy_interfaces.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/$(DEPDIR)/copy_interfaces-copy_interfaces.Tpo datawizard/interfaces/$(DEPDIR)/copy_interfaces-copy_interfaces.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/copy_interfaces.c' object='datawizard/interfaces/copy_interfaces-copy_interfaces.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_copy_interfaces_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/copy_interfaces-copy_interfaces.o `test -f 'datawizard/interfaces/copy_interfaces.c' || echo '$(srcdir)/'`datawizard/interfaces/copy_interfaces.c + +datawizard/interfaces/copy_interfaces-copy_interfaces.obj: datawizard/interfaces/copy_interfaces.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_copy_interfaces_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/copy_interfaces-copy_interfaces.obj -MD -MP -MF datawizard/interfaces/$(DEPDIR)/copy_interfaces-copy_interfaces.Tpo -c -o datawizard/interfaces/copy_interfaces-copy_interfaces.obj `if test -f 'datawizard/interfaces/copy_interfaces.c'; then $(CYGPATH_W) 'datawizard/interfaces/copy_interfaces.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/copy_interfaces.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/$(DEPDIR)/copy_interfaces-copy_interfaces.Tpo datawizard/interfaces/$(DEPDIR)/copy_interfaces-copy_interfaces.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/copy_interfaces.c' object='datawizard/interfaces/copy_interfaces-copy_interfaces.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_copy_interfaces_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/copy_interfaces-copy_interfaces.obj `if test -f 'datawizard/interfaces/copy_interfaces.c'; then $(CYGPATH_W) 'datawizard/interfaces/copy_interfaces.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/copy_interfaces.c'; fi` + +datawizard/interfaces/csr_csr_interface-test_interfaces.o: datawizard/interfaces/test_interfaces.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_csr_csr_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/csr_csr_interface-test_interfaces.o -MD -MP -MF datawizard/interfaces/$(DEPDIR)/csr_csr_interface-test_interfaces.Tpo -c -o datawizard/interfaces/csr_csr_interface-test_interfaces.o `test -f 'datawizard/interfaces/test_interfaces.c' || echo '$(srcdir)/'`datawizard/interfaces/test_interfaces.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/$(DEPDIR)/csr_csr_interface-test_interfaces.Tpo datawizard/interfaces/$(DEPDIR)/csr_csr_interface-test_interfaces.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/test_interfaces.c' object='datawizard/interfaces/csr_csr_interface-test_interfaces.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_csr_csr_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/csr_csr_interface-test_interfaces.o `test -f 'datawizard/interfaces/test_interfaces.c' || echo '$(srcdir)/'`datawizard/interfaces/test_interfaces.c + +datawizard/interfaces/csr_csr_interface-test_interfaces.obj: datawizard/interfaces/test_interfaces.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_csr_csr_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/csr_csr_interface-test_interfaces.obj -MD -MP -MF datawizard/interfaces/$(DEPDIR)/csr_csr_interface-test_interfaces.Tpo -c -o datawizard/interfaces/csr_csr_interface-test_interfaces.obj `if test -f 'datawizard/interfaces/test_interfaces.c'; then $(CYGPATH_W) 'datawizard/interfaces/test_interfaces.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/test_interfaces.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/$(DEPDIR)/csr_csr_interface-test_interfaces.Tpo datawizard/interfaces/$(DEPDIR)/csr_csr_interface-test_interfaces.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/test_interfaces.c' object='datawizard/interfaces/csr_csr_interface-test_interfaces.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_csr_csr_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/csr_csr_interface-test_interfaces.obj `if test -f 'datawizard/interfaces/test_interfaces.c'; then $(CYGPATH_W) 'datawizard/interfaces/test_interfaces.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/test_interfaces.c'; fi` + +datawizard/interfaces/csr/csr_interface-csr_interface.o: datawizard/interfaces/csr/csr_interface.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_csr_csr_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/csr/csr_interface-csr_interface.o -MD -MP -MF datawizard/interfaces/csr/$(DEPDIR)/csr_interface-csr_interface.Tpo -c -o datawizard/interfaces/csr/csr_interface-csr_interface.o `test -f 'datawizard/interfaces/csr/csr_interface.c' || echo '$(srcdir)/'`datawizard/interfaces/csr/csr_interface.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/csr/$(DEPDIR)/csr_interface-csr_interface.Tpo datawizard/interfaces/csr/$(DEPDIR)/csr_interface-csr_interface.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/csr/csr_interface.c' object='datawizard/interfaces/csr/csr_interface-csr_interface.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_csr_csr_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/csr/csr_interface-csr_interface.o `test -f 'datawizard/interfaces/csr/csr_interface.c' || echo '$(srcdir)/'`datawizard/interfaces/csr/csr_interface.c + +datawizard/interfaces/csr/csr_interface-csr_interface.obj: datawizard/interfaces/csr/csr_interface.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_csr_csr_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/csr/csr_interface-csr_interface.obj -MD -MP -MF datawizard/interfaces/csr/$(DEPDIR)/csr_interface-csr_interface.Tpo -c -o datawizard/interfaces/csr/csr_interface-csr_interface.obj `if test -f 'datawizard/interfaces/csr/csr_interface.c'; then $(CYGPATH_W) 'datawizard/interfaces/csr/csr_interface.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/csr/csr_interface.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/csr/$(DEPDIR)/csr_interface-csr_interface.Tpo datawizard/interfaces/csr/$(DEPDIR)/csr_interface-csr_interface.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/csr/csr_interface.c' object='datawizard/interfaces/csr/csr_interface-csr_interface.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_csr_csr_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/csr/csr_interface-csr_interface.obj `if test -f 'datawizard/interfaces/csr/csr_interface.c'; then $(CYGPATH_W) 'datawizard/interfaces/csr/csr_interface.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/csr/csr_interface.c'; fi` + +datawizard/interfaces/csr/csr_interface-csr_opencl.o: datawizard/interfaces/csr/csr_opencl.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_csr_csr_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/csr/csr_interface-csr_opencl.o -MD -MP -MF datawizard/interfaces/csr/$(DEPDIR)/csr_interface-csr_opencl.Tpo -c -o datawizard/interfaces/csr/csr_interface-csr_opencl.o `test -f 'datawizard/interfaces/csr/csr_opencl.c' || echo '$(srcdir)/'`datawizard/interfaces/csr/csr_opencl.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/csr/$(DEPDIR)/csr_interface-csr_opencl.Tpo datawizard/interfaces/csr/$(DEPDIR)/csr_interface-csr_opencl.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/csr/csr_opencl.c' object='datawizard/interfaces/csr/csr_interface-csr_opencl.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_csr_csr_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/csr/csr_interface-csr_opencl.o `test -f 'datawizard/interfaces/csr/csr_opencl.c' || echo '$(srcdir)/'`datawizard/interfaces/csr/csr_opencl.c + +datawizard/interfaces/csr/csr_interface-csr_opencl.obj: datawizard/interfaces/csr/csr_opencl.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_csr_csr_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/csr/csr_interface-csr_opencl.obj -MD -MP -MF datawizard/interfaces/csr/$(DEPDIR)/csr_interface-csr_opencl.Tpo -c -o datawizard/interfaces/csr/csr_interface-csr_opencl.obj `if test -f 'datawizard/interfaces/csr/csr_opencl.c'; then $(CYGPATH_W) 'datawizard/interfaces/csr/csr_opencl.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/csr/csr_opencl.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/csr/$(DEPDIR)/csr_interface-csr_opencl.Tpo datawizard/interfaces/csr/$(DEPDIR)/csr_interface-csr_opencl.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/csr/csr_opencl.c' object='datawizard/interfaces/csr/csr_interface-csr_opencl.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_csr_csr_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/csr/csr_interface-csr_opencl.obj `if test -f 'datawizard/interfaces/csr/csr_opencl.c'; then $(CYGPATH_W) 'datawizard/interfaces/csr/csr_opencl.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/csr/csr_opencl.c'; fi` + +datawizard/interfaces/matrix_matrix_interface-test_interfaces.o: datawizard/interfaces/test_interfaces.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_matrix_matrix_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/matrix_matrix_interface-test_interfaces.o -MD -MP -MF datawizard/interfaces/$(DEPDIR)/matrix_matrix_interface-test_interfaces.Tpo -c -o datawizard/interfaces/matrix_matrix_interface-test_interfaces.o `test -f 'datawizard/interfaces/test_interfaces.c' || echo '$(srcdir)/'`datawizard/interfaces/test_interfaces.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/$(DEPDIR)/matrix_matrix_interface-test_interfaces.Tpo datawizard/interfaces/$(DEPDIR)/matrix_matrix_interface-test_interfaces.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/test_interfaces.c' object='datawizard/interfaces/matrix_matrix_interface-test_interfaces.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_matrix_matrix_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/matrix_matrix_interface-test_interfaces.o `test -f 'datawizard/interfaces/test_interfaces.c' || echo '$(srcdir)/'`datawizard/interfaces/test_interfaces.c + +datawizard/interfaces/matrix_matrix_interface-test_interfaces.obj: datawizard/interfaces/test_interfaces.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_matrix_matrix_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/matrix_matrix_interface-test_interfaces.obj -MD -MP -MF datawizard/interfaces/$(DEPDIR)/matrix_matrix_interface-test_interfaces.Tpo -c -o datawizard/interfaces/matrix_matrix_interface-test_interfaces.obj `if test -f 'datawizard/interfaces/test_interfaces.c'; then $(CYGPATH_W) 'datawizard/interfaces/test_interfaces.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/test_interfaces.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/$(DEPDIR)/matrix_matrix_interface-test_interfaces.Tpo datawizard/interfaces/$(DEPDIR)/matrix_matrix_interface-test_interfaces.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/test_interfaces.c' object='datawizard/interfaces/matrix_matrix_interface-test_interfaces.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_matrix_matrix_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/matrix_matrix_interface-test_interfaces.obj `if test -f 'datawizard/interfaces/test_interfaces.c'; then $(CYGPATH_W) 'datawizard/interfaces/test_interfaces.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/test_interfaces.c'; fi` + +datawizard/interfaces/matrix/matrix_interface-matrix_interface.o: datawizard/interfaces/matrix/matrix_interface.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_matrix_matrix_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/matrix/matrix_interface-matrix_interface.o -MD -MP -MF datawizard/interfaces/matrix/$(DEPDIR)/matrix_interface-matrix_interface.Tpo -c -o datawizard/interfaces/matrix/matrix_interface-matrix_interface.o `test -f 'datawizard/interfaces/matrix/matrix_interface.c' || echo '$(srcdir)/'`datawizard/interfaces/matrix/matrix_interface.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/matrix/$(DEPDIR)/matrix_interface-matrix_interface.Tpo datawizard/interfaces/matrix/$(DEPDIR)/matrix_interface-matrix_interface.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/matrix/matrix_interface.c' object='datawizard/interfaces/matrix/matrix_interface-matrix_interface.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_matrix_matrix_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/matrix/matrix_interface-matrix_interface.o `test -f 'datawizard/interfaces/matrix/matrix_interface.c' || echo '$(srcdir)/'`datawizard/interfaces/matrix/matrix_interface.c + +datawizard/interfaces/matrix/matrix_interface-matrix_interface.obj: datawizard/interfaces/matrix/matrix_interface.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_matrix_matrix_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/matrix/matrix_interface-matrix_interface.obj -MD -MP -MF datawizard/interfaces/matrix/$(DEPDIR)/matrix_interface-matrix_interface.Tpo -c -o datawizard/interfaces/matrix/matrix_interface-matrix_interface.obj `if test -f 'datawizard/interfaces/matrix/matrix_interface.c'; then $(CYGPATH_W) 'datawizard/interfaces/matrix/matrix_interface.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/matrix/matrix_interface.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/matrix/$(DEPDIR)/matrix_interface-matrix_interface.Tpo datawizard/interfaces/matrix/$(DEPDIR)/matrix_interface-matrix_interface.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/matrix/matrix_interface.c' object='datawizard/interfaces/matrix/matrix_interface-matrix_interface.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_matrix_matrix_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/matrix/matrix_interface-matrix_interface.obj `if test -f 'datawizard/interfaces/matrix/matrix_interface.c'; then $(CYGPATH_W) 'datawizard/interfaces/matrix/matrix_interface.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/matrix/matrix_interface.c'; fi` + +datawizard/interfaces/matrix/matrix_interface-matrix_opencl.o: datawizard/interfaces/matrix/matrix_opencl.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_matrix_matrix_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/matrix/matrix_interface-matrix_opencl.o -MD -MP -MF datawizard/interfaces/matrix/$(DEPDIR)/matrix_interface-matrix_opencl.Tpo -c -o datawizard/interfaces/matrix/matrix_interface-matrix_opencl.o `test -f 'datawizard/interfaces/matrix/matrix_opencl.c' || echo '$(srcdir)/'`datawizard/interfaces/matrix/matrix_opencl.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/matrix/$(DEPDIR)/matrix_interface-matrix_opencl.Tpo datawizard/interfaces/matrix/$(DEPDIR)/matrix_interface-matrix_opencl.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/matrix/matrix_opencl.c' object='datawizard/interfaces/matrix/matrix_interface-matrix_opencl.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_matrix_matrix_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/matrix/matrix_interface-matrix_opencl.o `test -f 'datawizard/interfaces/matrix/matrix_opencl.c' || echo '$(srcdir)/'`datawizard/interfaces/matrix/matrix_opencl.c + +datawizard/interfaces/matrix/matrix_interface-matrix_opencl.obj: datawizard/interfaces/matrix/matrix_opencl.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_matrix_matrix_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/matrix/matrix_interface-matrix_opencl.obj -MD -MP -MF datawizard/interfaces/matrix/$(DEPDIR)/matrix_interface-matrix_opencl.Tpo -c -o datawizard/interfaces/matrix/matrix_interface-matrix_opencl.obj `if test -f 'datawizard/interfaces/matrix/matrix_opencl.c'; then $(CYGPATH_W) 'datawizard/interfaces/matrix/matrix_opencl.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/matrix/matrix_opencl.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/matrix/$(DEPDIR)/matrix_interface-matrix_opencl.Tpo datawizard/interfaces/matrix/$(DEPDIR)/matrix_interface-matrix_opencl.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/matrix/matrix_opencl.c' object='datawizard/interfaces/matrix/matrix_interface-matrix_opencl.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_matrix_matrix_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/matrix/matrix_interface-matrix_opencl.obj `if test -f 'datawizard/interfaces/matrix/matrix_opencl.c'; then $(CYGPATH_W) 'datawizard/interfaces/matrix/matrix_opencl.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/matrix/matrix_opencl.c'; fi` + +datawizard/interfaces/multiformat_multiformat_interface-test_interfaces.o: datawizard/interfaces/test_interfaces.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_multiformat_multiformat_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/multiformat_multiformat_interface-test_interfaces.o -MD -MP -MF datawizard/interfaces/$(DEPDIR)/multiformat_multiformat_interface-test_interfaces.Tpo -c -o datawizard/interfaces/multiformat_multiformat_interface-test_interfaces.o `test -f 'datawizard/interfaces/test_interfaces.c' || echo '$(srcdir)/'`datawizard/interfaces/test_interfaces.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/$(DEPDIR)/multiformat_multiformat_interface-test_interfaces.Tpo datawizard/interfaces/$(DEPDIR)/multiformat_multiformat_interface-test_interfaces.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/test_interfaces.c' object='datawizard/interfaces/multiformat_multiformat_interface-test_interfaces.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_multiformat_multiformat_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/multiformat_multiformat_interface-test_interfaces.o `test -f 'datawizard/interfaces/test_interfaces.c' || echo '$(srcdir)/'`datawizard/interfaces/test_interfaces.c + +datawizard/interfaces/multiformat_multiformat_interface-test_interfaces.obj: datawizard/interfaces/test_interfaces.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_multiformat_multiformat_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/multiformat_multiformat_interface-test_interfaces.obj -MD -MP -MF datawizard/interfaces/$(DEPDIR)/multiformat_multiformat_interface-test_interfaces.Tpo -c -o datawizard/interfaces/multiformat_multiformat_interface-test_interfaces.obj `if test -f 'datawizard/interfaces/test_interfaces.c'; then $(CYGPATH_W) 'datawizard/interfaces/test_interfaces.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/test_interfaces.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/$(DEPDIR)/multiformat_multiformat_interface-test_interfaces.Tpo datawizard/interfaces/$(DEPDIR)/multiformat_multiformat_interface-test_interfaces.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/test_interfaces.c' object='datawizard/interfaces/multiformat_multiformat_interface-test_interfaces.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_multiformat_multiformat_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/multiformat_multiformat_interface-test_interfaces.obj `if test -f 'datawizard/interfaces/test_interfaces.c'; then $(CYGPATH_W) 'datawizard/interfaces/test_interfaces.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/test_interfaces.c'; fi` + +datawizard/interfaces/multiformat/multiformat_interface-multiformat_interface.o: datawizard/interfaces/multiformat/multiformat_interface.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_multiformat_multiformat_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/multiformat/multiformat_interface-multiformat_interface.o -MD -MP -MF datawizard/interfaces/multiformat/$(DEPDIR)/multiformat_interface-multiformat_interface.Tpo -c -o datawizard/interfaces/multiformat/multiformat_interface-multiformat_interface.o `test -f 'datawizard/interfaces/multiformat/multiformat_interface.c' || echo '$(srcdir)/'`datawizard/interfaces/multiformat/multiformat_interface.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/multiformat/$(DEPDIR)/multiformat_interface-multiformat_interface.Tpo datawizard/interfaces/multiformat/$(DEPDIR)/multiformat_interface-multiformat_interface.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/multiformat/multiformat_interface.c' object='datawizard/interfaces/multiformat/multiformat_interface-multiformat_interface.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_multiformat_multiformat_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/multiformat/multiformat_interface-multiformat_interface.o `test -f 'datawizard/interfaces/multiformat/multiformat_interface.c' || echo '$(srcdir)/'`datawizard/interfaces/multiformat/multiformat_interface.c + +datawizard/interfaces/multiformat/multiformat_interface-multiformat_interface.obj: datawizard/interfaces/multiformat/multiformat_interface.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_multiformat_multiformat_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/multiformat/multiformat_interface-multiformat_interface.obj -MD -MP -MF datawizard/interfaces/multiformat/$(DEPDIR)/multiformat_interface-multiformat_interface.Tpo -c -o datawizard/interfaces/multiformat/multiformat_interface-multiformat_interface.obj `if test -f 'datawizard/interfaces/multiformat/multiformat_interface.c'; then $(CYGPATH_W) 'datawizard/interfaces/multiformat/multiformat_interface.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/multiformat/multiformat_interface.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/multiformat/$(DEPDIR)/multiformat_interface-multiformat_interface.Tpo datawizard/interfaces/multiformat/$(DEPDIR)/multiformat_interface-multiformat_interface.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/multiformat/multiformat_interface.c' object='datawizard/interfaces/multiformat/multiformat_interface-multiformat_interface.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_multiformat_multiformat_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/multiformat/multiformat_interface-multiformat_interface.obj `if test -f 'datawizard/interfaces/multiformat/multiformat_interface.c'; then $(CYGPATH_W) 'datawizard/interfaces/multiformat/multiformat_interface.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/multiformat/multiformat_interface.c'; fi` + +datawizard/interfaces/multiformat/multiformat_interface-multiformat_conversion_codelets.o: datawizard/interfaces/multiformat/multiformat_conversion_codelets.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_multiformat_multiformat_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/multiformat/multiformat_interface-multiformat_conversion_codelets.o -MD -MP -MF datawizard/interfaces/multiformat/$(DEPDIR)/multiformat_interface-multiformat_conversion_codelets.Tpo -c -o datawizard/interfaces/multiformat/multiformat_interface-multiformat_conversion_codelets.o `test -f 'datawizard/interfaces/multiformat/multiformat_conversion_codelets.c' || echo '$(srcdir)/'`datawizard/interfaces/multiformat/multiformat_conversion_codelets.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/multiformat/$(DEPDIR)/multiformat_interface-multiformat_conversion_codelets.Tpo datawizard/interfaces/multiformat/$(DEPDIR)/multiformat_interface-multiformat_conversion_codelets.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/multiformat/multiformat_conversion_codelets.c' object='datawizard/interfaces/multiformat/multiformat_interface-multiformat_conversion_codelets.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_multiformat_multiformat_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/multiformat/multiformat_interface-multiformat_conversion_codelets.o `test -f 'datawizard/interfaces/multiformat/multiformat_conversion_codelets.c' || echo '$(srcdir)/'`datawizard/interfaces/multiformat/multiformat_conversion_codelets.c + +datawizard/interfaces/multiformat/multiformat_interface-multiformat_conversion_codelets.obj: datawizard/interfaces/multiformat/multiformat_conversion_codelets.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_multiformat_multiformat_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/multiformat/multiformat_interface-multiformat_conversion_codelets.obj -MD -MP -MF datawizard/interfaces/multiformat/$(DEPDIR)/multiformat_interface-multiformat_conversion_codelets.Tpo -c -o datawizard/interfaces/multiformat/multiformat_interface-multiformat_conversion_codelets.obj `if test -f 'datawizard/interfaces/multiformat/multiformat_conversion_codelets.c'; then $(CYGPATH_W) 'datawizard/interfaces/multiformat/multiformat_conversion_codelets.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/multiformat/multiformat_conversion_codelets.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/multiformat/$(DEPDIR)/multiformat_interface-multiformat_conversion_codelets.Tpo datawizard/interfaces/multiformat/$(DEPDIR)/multiformat_interface-multiformat_conversion_codelets.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/multiformat/multiformat_conversion_codelets.c' object='datawizard/interfaces/multiformat/multiformat_interface-multiformat_conversion_codelets.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_multiformat_multiformat_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/multiformat/multiformat_interface-multiformat_conversion_codelets.obj `if test -f 'datawizard/interfaces/multiformat/multiformat_conversion_codelets.c'; then $(CYGPATH_W) 'datawizard/interfaces/multiformat/multiformat_conversion_codelets.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/multiformat/multiformat_conversion_codelets.c'; fi` + +datawizard/interfaces/multiformat/multiformat_interface-multiformat_opencl.o: datawizard/interfaces/multiformat/multiformat_opencl.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_multiformat_multiformat_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/multiformat/multiformat_interface-multiformat_opencl.o -MD -MP -MF datawizard/interfaces/multiformat/$(DEPDIR)/multiformat_interface-multiformat_opencl.Tpo -c -o datawizard/interfaces/multiformat/multiformat_interface-multiformat_opencl.o `test -f 'datawizard/interfaces/multiformat/multiformat_opencl.c' || echo '$(srcdir)/'`datawizard/interfaces/multiformat/multiformat_opencl.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/multiformat/$(DEPDIR)/multiformat_interface-multiformat_opencl.Tpo datawizard/interfaces/multiformat/$(DEPDIR)/multiformat_interface-multiformat_opencl.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/multiformat/multiformat_opencl.c' object='datawizard/interfaces/multiformat/multiformat_interface-multiformat_opencl.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_multiformat_multiformat_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/multiformat/multiformat_interface-multiformat_opencl.o `test -f 'datawizard/interfaces/multiformat/multiformat_opencl.c' || echo '$(srcdir)/'`datawizard/interfaces/multiformat/multiformat_opencl.c + +datawizard/interfaces/multiformat/multiformat_interface-multiformat_opencl.obj: datawizard/interfaces/multiformat/multiformat_opencl.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_multiformat_multiformat_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/multiformat/multiformat_interface-multiformat_opencl.obj -MD -MP -MF datawizard/interfaces/multiformat/$(DEPDIR)/multiformat_interface-multiformat_opencl.Tpo -c -o datawizard/interfaces/multiformat/multiformat_interface-multiformat_opencl.obj `if test -f 'datawizard/interfaces/multiformat/multiformat_opencl.c'; then $(CYGPATH_W) 'datawizard/interfaces/multiformat/multiformat_opencl.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/multiformat/multiformat_opencl.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/multiformat/$(DEPDIR)/multiformat_interface-multiformat_opencl.Tpo datawizard/interfaces/multiformat/$(DEPDIR)/multiformat_interface-multiformat_opencl.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/multiformat/multiformat_opencl.c' object='datawizard/interfaces/multiformat/multiformat_interface-multiformat_opencl.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_multiformat_multiformat_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/multiformat/multiformat_interface-multiformat_opencl.obj `if test -f 'datawizard/interfaces/multiformat/multiformat_opencl.c'; then $(CYGPATH_W) 'datawizard/interfaces/multiformat/multiformat_opencl.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/multiformat/multiformat_opencl.c'; fi` + +datawizard/interfaces/multiformat/multiformat_interface-multiformat_conversion_codelets_opencl.o: datawizard/interfaces/multiformat/multiformat_conversion_codelets_opencl.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_multiformat_multiformat_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/multiformat/multiformat_interface-multiformat_conversion_codelets_opencl.o -MD -MP -MF datawizard/interfaces/multiformat/$(DEPDIR)/multiformat_interface-multiformat_conversion_codelets_opencl.Tpo -c -o datawizard/interfaces/multiformat/multiformat_interface-multiformat_conversion_codelets_opencl.o `test -f 'datawizard/interfaces/multiformat/multiformat_conversion_codelets_opencl.c' || echo '$(srcdir)/'`datawizard/interfaces/multiformat/multiformat_conversion_codelets_opencl.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/multiformat/$(DEPDIR)/multiformat_interface-multiformat_conversion_codelets_opencl.Tpo datawizard/interfaces/multiformat/$(DEPDIR)/multiformat_interface-multiformat_conversion_codelets_opencl.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/multiformat/multiformat_conversion_codelets_opencl.c' object='datawizard/interfaces/multiformat/multiformat_interface-multiformat_conversion_codelets_opencl.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_multiformat_multiformat_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/multiformat/multiformat_interface-multiformat_conversion_codelets_opencl.o `test -f 'datawizard/interfaces/multiformat/multiformat_conversion_codelets_opencl.c' || echo '$(srcdir)/'`datawizard/interfaces/multiformat/multiformat_conversion_codelets_opencl.c + +datawizard/interfaces/multiformat/multiformat_interface-multiformat_conversion_codelets_opencl.obj: datawizard/interfaces/multiformat/multiformat_conversion_codelets_opencl.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_multiformat_multiformat_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/multiformat/multiformat_interface-multiformat_conversion_codelets_opencl.obj -MD -MP -MF datawizard/interfaces/multiformat/$(DEPDIR)/multiformat_interface-multiformat_conversion_codelets_opencl.Tpo -c -o datawizard/interfaces/multiformat/multiformat_interface-multiformat_conversion_codelets_opencl.obj `if test -f 'datawizard/interfaces/multiformat/multiformat_conversion_codelets_opencl.c'; then $(CYGPATH_W) 'datawizard/interfaces/multiformat/multiformat_conversion_codelets_opencl.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/multiformat/multiformat_conversion_codelets_opencl.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/multiformat/$(DEPDIR)/multiformat_interface-multiformat_conversion_codelets_opencl.Tpo datawizard/interfaces/multiformat/$(DEPDIR)/multiformat_interface-multiformat_conversion_codelets_opencl.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/multiformat/multiformat_conversion_codelets_opencl.c' object='datawizard/interfaces/multiformat/multiformat_interface-multiformat_conversion_codelets_opencl.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_multiformat_multiformat_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/multiformat/multiformat_interface-multiformat_conversion_codelets_opencl.obj `if test -f 'datawizard/interfaces/multiformat/multiformat_conversion_codelets_opencl.c'; then $(CYGPATH_W) 'datawizard/interfaces/multiformat/multiformat_conversion_codelets_opencl.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/multiformat/multiformat_conversion_codelets_opencl.c'; fi` + +datawizard/interfaces/ndim_ndim_interface-test_interfaces.o: datawizard/interfaces/test_interfaces.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_ndim_ndim_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/ndim_ndim_interface-test_interfaces.o -MD -MP -MF datawizard/interfaces/$(DEPDIR)/ndim_ndim_interface-test_interfaces.Tpo -c -o datawizard/interfaces/ndim_ndim_interface-test_interfaces.o `test -f 'datawizard/interfaces/test_interfaces.c' || echo '$(srcdir)/'`datawizard/interfaces/test_interfaces.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/$(DEPDIR)/ndim_ndim_interface-test_interfaces.Tpo datawizard/interfaces/$(DEPDIR)/ndim_ndim_interface-test_interfaces.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/test_interfaces.c' object='datawizard/interfaces/ndim_ndim_interface-test_interfaces.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_ndim_ndim_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/ndim_ndim_interface-test_interfaces.o `test -f 'datawizard/interfaces/test_interfaces.c' || echo '$(srcdir)/'`datawizard/interfaces/test_interfaces.c + +datawizard/interfaces/ndim_ndim_interface-test_interfaces.obj: datawizard/interfaces/test_interfaces.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_ndim_ndim_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/ndim_ndim_interface-test_interfaces.obj -MD -MP -MF datawizard/interfaces/$(DEPDIR)/ndim_ndim_interface-test_interfaces.Tpo -c -o datawizard/interfaces/ndim_ndim_interface-test_interfaces.obj `if test -f 'datawizard/interfaces/test_interfaces.c'; then $(CYGPATH_W) 'datawizard/interfaces/test_interfaces.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/test_interfaces.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/$(DEPDIR)/ndim_ndim_interface-test_interfaces.Tpo datawizard/interfaces/$(DEPDIR)/ndim_ndim_interface-test_interfaces.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/test_interfaces.c' object='datawizard/interfaces/ndim_ndim_interface-test_interfaces.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_ndim_ndim_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/ndim_ndim_interface-test_interfaces.obj `if test -f 'datawizard/interfaces/test_interfaces.c'; then $(CYGPATH_W) 'datawizard/interfaces/test_interfaces.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/test_interfaces.c'; fi` + +datawizard/interfaces/ndim/ndim_interface-ndim_interface.o: datawizard/interfaces/ndim/ndim_interface.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_ndim_ndim_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/ndim/ndim_interface-ndim_interface.o -MD -MP -MF datawizard/interfaces/ndim/$(DEPDIR)/ndim_interface-ndim_interface.Tpo -c -o datawizard/interfaces/ndim/ndim_interface-ndim_interface.o `test -f 'datawizard/interfaces/ndim/ndim_interface.c' || echo '$(srcdir)/'`datawizard/interfaces/ndim/ndim_interface.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/ndim/$(DEPDIR)/ndim_interface-ndim_interface.Tpo datawizard/interfaces/ndim/$(DEPDIR)/ndim_interface-ndim_interface.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/ndim/ndim_interface.c' object='datawizard/interfaces/ndim/ndim_interface-ndim_interface.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_ndim_ndim_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/ndim/ndim_interface-ndim_interface.o `test -f 'datawizard/interfaces/ndim/ndim_interface.c' || echo '$(srcdir)/'`datawizard/interfaces/ndim/ndim_interface.c + +datawizard/interfaces/ndim/ndim_interface-ndim_interface.obj: datawizard/interfaces/ndim/ndim_interface.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_ndim_ndim_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/ndim/ndim_interface-ndim_interface.obj -MD -MP -MF datawizard/interfaces/ndim/$(DEPDIR)/ndim_interface-ndim_interface.Tpo -c -o datawizard/interfaces/ndim/ndim_interface-ndim_interface.obj `if test -f 'datawizard/interfaces/ndim/ndim_interface.c'; then $(CYGPATH_W) 'datawizard/interfaces/ndim/ndim_interface.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/ndim/ndim_interface.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/ndim/$(DEPDIR)/ndim_interface-ndim_interface.Tpo datawizard/interfaces/ndim/$(DEPDIR)/ndim_interface-ndim_interface.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/ndim/ndim_interface.c' object='datawizard/interfaces/ndim/ndim_interface-ndim_interface.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_ndim_ndim_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/ndim/ndim_interface-ndim_interface.obj `if test -f 'datawizard/interfaces/ndim/ndim_interface.c'; then $(CYGPATH_W) 'datawizard/interfaces/ndim/ndim_interface.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/ndim/ndim_interface.c'; fi` + +datawizard/interfaces/ndim/ndim_interface-ndim_opencl.o: datawizard/interfaces/ndim/ndim_opencl.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_ndim_ndim_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/ndim/ndim_interface-ndim_opencl.o -MD -MP -MF datawizard/interfaces/ndim/$(DEPDIR)/ndim_interface-ndim_opencl.Tpo -c -o datawizard/interfaces/ndim/ndim_interface-ndim_opencl.o `test -f 'datawizard/interfaces/ndim/ndim_opencl.c' || echo '$(srcdir)/'`datawizard/interfaces/ndim/ndim_opencl.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/ndim/$(DEPDIR)/ndim_interface-ndim_opencl.Tpo datawizard/interfaces/ndim/$(DEPDIR)/ndim_interface-ndim_opencl.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/ndim/ndim_opencl.c' object='datawizard/interfaces/ndim/ndim_interface-ndim_opencl.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_ndim_ndim_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/ndim/ndim_interface-ndim_opencl.o `test -f 'datawizard/interfaces/ndim/ndim_opencl.c' || echo '$(srcdir)/'`datawizard/interfaces/ndim/ndim_opencl.c + +datawizard/interfaces/ndim/ndim_interface-ndim_opencl.obj: datawizard/interfaces/ndim/ndim_opencl.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_ndim_ndim_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/ndim/ndim_interface-ndim_opencl.obj -MD -MP -MF datawizard/interfaces/ndim/$(DEPDIR)/ndim_interface-ndim_opencl.Tpo -c -o datawizard/interfaces/ndim/ndim_interface-ndim_opencl.obj `if test -f 'datawizard/interfaces/ndim/ndim_opencl.c'; then $(CYGPATH_W) 'datawizard/interfaces/ndim/ndim_opencl.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/ndim/ndim_opencl.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/ndim/$(DEPDIR)/ndim_interface-ndim_opencl.Tpo datawizard/interfaces/ndim/$(DEPDIR)/ndim_interface-ndim_opencl.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/ndim/ndim_opencl.c' object='datawizard/interfaces/ndim/ndim_interface-ndim_opencl.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_ndim_ndim_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/ndim/ndim_interface-ndim_opencl.obj `if test -f 'datawizard/interfaces/ndim/ndim_opencl.c'; then $(CYGPATH_W) 'datawizard/interfaces/ndim/ndim_opencl.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/ndim/ndim_opencl.c'; fi` + +datawizard/interfaces/tensor_tensor_interface-test_interfaces.o: datawizard/interfaces/test_interfaces.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_tensor_tensor_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/tensor_tensor_interface-test_interfaces.o -MD -MP -MF datawizard/interfaces/$(DEPDIR)/tensor_tensor_interface-test_interfaces.Tpo -c -o datawizard/interfaces/tensor_tensor_interface-test_interfaces.o `test -f 'datawizard/interfaces/test_interfaces.c' || echo '$(srcdir)/'`datawizard/interfaces/test_interfaces.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/$(DEPDIR)/tensor_tensor_interface-test_interfaces.Tpo datawizard/interfaces/$(DEPDIR)/tensor_tensor_interface-test_interfaces.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/test_interfaces.c' object='datawizard/interfaces/tensor_tensor_interface-test_interfaces.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_tensor_tensor_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/tensor_tensor_interface-test_interfaces.o `test -f 'datawizard/interfaces/test_interfaces.c' || echo '$(srcdir)/'`datawizard/interfaces/test_interfaces.c + +datawizard/interfaces/tensor_tensor_interface-test_interfaces.obj: datawizard/interfaces/test_interfaces.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_tensor_tensor_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/tensor_tensor_interface-test_interfaces.obj -MD -MP -MF datawizard/interfaces/$(DEPDIR)/tensor_tensor_interface-test_interfaces.Tpo -c -o datawizard/interfaces/tensor_tensor_interface-test_interfaces.obj `if test -f 'datawizard/interfaces/test_interfaces.c'; then $(CYGPATH_W) 'datawizard/interfaces/test_interfaces.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/test_interfaces.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/$(DEPDIR)/tensor_tensor_interface-test_interfaces.Tpo datawizard/interfaces/$(DEPDIR)/tensor_tensor_interface-test_interfaces.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/test_interfaces.c' object='datawizard/interfaces/tensor_tensor_interface-test_interfaces.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_tensor_tensor_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/tensor_tensor_interface-test_interfaces.obj `if test -f 'datawizard/interfaces/test_interfaces.c'; then $(CYGPATH_W) 'datawizard/interfaces/test_interfaces.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/test_interfaces.c'; fi` + +datawizard/interfaces/tensor/tensor_interface-tensor_interface.o: datawizard/interfaces/tensor/tensor_interface.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_tensor_tensor_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/tensor/tensor_interface-tensor_interface.o -MD -MP -MF datawizard/interfaces/tensor/$(DEPDIR)/tensor_interface-tensor_interface.Tpo -c -o datawizard/interfaces/tensor/tensor_interface-tensor_interface.o `test -f 'datawizard/interfaces/tensor/tensor_interface.c' || echo '$(srcdir)/'`datawizard/interfaces/tensor/tensor_interface.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/tensor/$(DEPDIR)/tensor_interface-tensor_interface.Tpo datawizard/interfaces/tensor/$(DEPDIR)/tensor_interface-tensor_interface.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/tensor/tensor_interface.c' object='datawizard/interfaces/tensor/tensor_interface-tensor_interface.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_tensor_tensor_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/tensor/tensor_interface-tensor_interface.o `test -f 'datawizard/interfaces/tensor/tensor_interface.c' || echo '$(srcdir)/'`datawizard/interfaces/tensor/tensor_interface.c + +datawizard/interfaces/tensor/tensor_interface-tensor_interface.obj: datawizard/interfaces/tensor/tensor_interface.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_tensor_tensor_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/tensor/tensor_interface-tensor_interface.obj -MD -MP -MF datawizard/interfaces/tensor/$(DEPDIR)/tensor_interface-tensor_interface.Tpo -c -o datawizard/interfaces/tensor/tensor_interface-tensor_interface.obj `if test -f 'datawizard/interfaces/tensor/tensor_interface.c'; then $(CYGPATH_W) 'datawizard/interfaces/tensor/tensor_interface.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/tensor/tensor_interface.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/tensor/$(DEPDIR)/tensor_interface-tensor_interface.Tpo datawizard/interfaces/tensor/$(DEPDIR)/tensor_interface-tensor_interface.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/tensor/tensor_interface.c' object='datawizard/interfaces/tensor/tensor_interface-tensor_interface.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_tensor_tensor_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/tensor/tensor_interface-tensor_interface.obj `if test -f 'datawizard/interfaces/tensor/tensor_interface.c'; then $(CYGPATH_W) 'datawizard/interfaces/tensor/tensor_interface.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/tensor/tensor_interface.c'; fi` + +datawizard/interfaces/tensor/tensor_interface-tensor_opencl.o: datawizard/interfaces/tensor/tensor_opencl.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_tensor_tensor_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/tensor/tensor_interface-tensor_opencl.o -MD -MP -MF datawizard/interfaces/tensor/$(DEPDIR)/tensor_interface-tensor_opencl.Tpo -c -o datawizard/interfaces/tensor/tensor_interface-tensor_opencl.o `test -f 'datawizard/interfaces/tensor/tensor_opencl.c' || echo '$(srcdir)/'`datawizard/interfaces/tensor/tensor_opencl.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/tensor/$(DEPDIR)/tensor_interface-tensor_opencl.Tpo datawizard/interfaces/tensor/$(DEPDIR)/tensor_interface-tensor_opencl.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/tensor/tensor_opencl.c' object='datawizard/interfaces/tensor/tensor_interface-tensor_opencl.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_tensor_tensor_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/tensor/tensor_interface-tensor_opencl.o `test -f 'datawizard/interfaces/tensor/tensor_opencl.c' || echo '$(srcdir)/'`datawizard/interfaces/tensor/tensor_opencl.c + +datawizard/interfaces/tensor/tensor_interface-tensor_opencl.obj: datawizard/interfaces/tensor/tensor_opencl.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_tensor_tensor_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/tensor/tensor_interface-tensor_opencl.obj -MD -MP -MF datawizard/interfaces/tensor/$(DEPDIR)/tensor_interface-tensor_opencl.Tpo -c -o datawizard/interfaces/tensor/tensor_interface-tensor_opencl.obj `if test -f 'datawizard/interfaces/tensor/tensor_opencl.c'; then $(CYGPATH_W) 'datawizard/interfaces/tensor/tensor_opencl.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/tensor/tensor_opencl.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/tensor/$(DEPDIR)/tensor_interface-tensor_opencl.Tpo datawizard/interfaces/tensor/$(DEPDIR)/tensor_interface-tensor_opencl.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/tensor/tensor_opencl.c' object='datawizard/interfaces/tensor/tensor_interface-tensor_opencl.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_tensor_tensor_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/tensor/tensor_interface-tensor_opencl.obj `if test -f 'datawizard/interfaces/tensor/tensor_opencl.c'; then $(CYGPATH_W) 'datawizard/interfaces/tensor/tensor_opencl.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/tensor/tensor_opencl.c'; fi` + +datawizard/interfaces/variable_variable_interface-test_interfaces.o: datawizard/interfaces/test_interfaces.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_variable_variable_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/variable_variable_interface-test_interfaces.o -MD -MP -MF datawizard/interfaces/$(DEPDIR)/variable_variable_interface-test_interfaces.Tpo -c -o datawizard/interfaces/variable_variable_interface-test_interfaces.o `test -f 'datawizard/interfaces/test_interfaces.c' || echo '$(srcdir)/'`datawizard/interfaces/test_interfaces.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/$(DEPDIR)/variable_variable_interface-test_interfaces.Tpo datawizard/interfaces/$(DEPDIR)/variable_variable_interface-test_interfaces.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/test_interfaces.c' object='datawizard/interfaces/variable_variable_interface-test_interfaces.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_variable_variable_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/variable_variable_interface-test_interfaces.o `test -f 'datawizard/interfaces/test_interfaces.c' || echo '$(srcdir)/'`datawizard/interfaces/test_interfaces.c + +datawizard/interfaces/variable_variable_interface-test_interfaces.obj: datawizard/interfaces/test_interfaces.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_variable_variable_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/variable_variable_interface-test_interfaces.obj -MD -MP -MF datawizard/interfaces/$(DEPDIR)/variable_variable_interface-test_interfaces.Tpo -c -o datawizard/interfaces/variable_variable_interface-test_interfaces.obj `if test -f 'datawizard/interfaces/test_interfaces.c'; then $(CYGPATH_W) 'datawizard/interfaces/test_interfaces.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/test_interfaces.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/$(DEPDIR)/variable_variable_interface-test_interfaces.Tpo datawizard/interfaces/$(DEPDIR)/variable_variable_interface-test_interfaces.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/test_interfaces.c' object='datawizard/interfaces/variable_variable_interface-test_interfaces.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_variable_variable_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/variable_variable_interface-test_interfaces.obj `if test -f 'datawizard/interfaces/test_interfaces.c'; then $(CYGPATH_W) 'datawizard/interfaces/test_interfaces.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/test_interfaces.c'; fi` + +datawizard/interfaces/variable/variable_interface-variable_interface.o: datawizard/interfaces/variable/variable_interface.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_variable_variable_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/variable/variable_interface-variable_interface.o -MD -MP -MF datawizard/interfaces/variable/$(DEPDIR)/variable_interface-variable_interface.Tpo -c -o datawizard/interfaces/variable/variable_interface-variable_interface.o `test -f 'datawizard/interfaces/variable/variable_interface.c' || echo '$(srcdir)/'`datawizard/interfaces/variable/variable_interface.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/variable/$(DEPDIR)/variable_interface-variable_interface.Tpo datawizard/interfaces/variable/$(DEPDIR)/variable_interface-variable_interface.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/variable/variable_interface.c' object='datawizard/interfaces/variable/variable_interface-variable_interface.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_variable_variable_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/variable/variable_interface-variable_interface.o `test -f 'datawizard/interfaces/variable/variable_interface.c' || echo '$(srcdir)/'`datawizard/interfaces/variable/variable_interface.c + +datawizard/interfaces/variable/variable_interface-variable_interface.obj: datawizard/interfaces/variable/variable_interface.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_variable_variable_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/variable/variable_interface-variable_interface.obj -MD -MP -MF datawizard/interfaces/variable/$(DEPDIR)/variable_interface-variable_interface.Tpo -c -o datawizard/interfaces/variable/variable_interface-variable_interface.obj `if test -f 'datawizard/interfaces/variable/variable_interface.c'; then $(CYGPATH_W) 'datawizard/interfaces/variable/variable_interface.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/variable/variable_interface.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/variable/$(DEPDIR)/variable_interface-variable_interface.Tpo datawizard/interfaces/variable/$(DEPDIR)/variable_interface-variable_interface.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/variable/variable_interface.c' object='datawizard/interfaces/variable/variable_interface-variable_interface.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_variable_variable_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/variable/variable_interface-variable_interface.obj `if test -f 'datawizard/interfaces/variable/variable_interface.c'; then $(CYGPATH_W) 'datawizard/interfaces/variable/variable_interface.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/variable/variable_interface.c'; fi` + +datawizard/interfaces/variable/variable_interface-variable_opencl.o: datawizard/interfaces/variable/variable_opencl.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_variable_variable_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/variable/variable_interface-variable_opencl.o -MD -MP -MF datawizard/interfaces/variable/$(DEPDIR)/variable_interface-variable_opencl.Tpo -c -o datawizard/interfaces/variable/variable_interface-variable_opencl.o `test -f 'datawizard/interfaces/variable/variable_opencl.c' || echo '$(srcdir)/'`datawizard/interfaces/variable/variable_opencl.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/variable/$(DEPDIR)/variable_interface-variable_opencl.Tpo datawizard/interfaces/variable/$(DEPDIR)/variable_interface-variable_opencl.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/variable/variable_opencl.c' object='datawizard/interfaces/variable/variable_interface-variable_opencl.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_variable_variable_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/variable/variable_interface-variable_opencl.o `test -f 'datawizard/interfaces/variable/variable_opencl.c' || echo '$(srcdir)/'`datawizard/interfaces/variable/variable_opencl.c + +datawizard/interfaces/variable/variable_interface-variable_opencl.obj: datawizard/interfaces/variable/variable_opencl.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_variable_variable_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/variable/variable_interface-variable_opencl.obj -MD -MP -MF datawizard/interfaces/variable/$(DEPDIR)/variable_interface-variable_opencl.Tpo -c -o datawizard/interfaces/variable/variable_interface-variable_opencl.obj `if test -f 'datawizard/interfaces/variable/variable_opencl.c'; then $(CYGPATH_W) 'datawizard/interfaces/variable/variable_opencl.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/variable/variable_opencl.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/variable/$(DEPDIR)/variable_interface-variable_opencl.Tpo datawizard/interfaces/variable/$(DEPDIR)/variable_interface-variable_opencl.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/variable/variable_opencl.c' object='datawizard/interfaces/variable/variable_interface-variable_opencl.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_variable_variable_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/variable/variable_interface-variable_opencl.obj `if test -f 'datawizard/interfaces/variable/variable_opencl.c'; then $(CYGPATH_W) 'datawizard/interfaces/variable/variable_opencl.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/variable/variable_opencl.c'; fi` + +datawizard/interfaces/vector/vector_interface-vector_interface.o: datawizard/interfaces/vector/vector_interface.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_vector_vector_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/vector/vector_interface-vector_interface.o -MD -MP -MF datawizard/interfaces/vector/$(DEPDIR)/vector_interface-vector_interface.Tpo -c -o datawizard/interfaces/vector/vector_interface-vector_interface.o `test -f 'datawizard/interfaces/vector/vector_interface.c' || echo '$(srcdir)/'`datawizard/interfaces/vector/vector_interface.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/vector/$(DEPDIR)/vector_interface-vector_interface.Tpo datawizard/interfaces/vector/$(DEPDIR)/vector_interface-vector_interface.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/vector/vector_interface.c' object='datawizard/interfaces/vector/vector_interface-vector_interface.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_vector_vector_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/vector/vector_interface-vector_interface.o `test -f 'datawizard/interfaces/vector/vector_interface.c' || echo '$(srcdir)/'`datawizard/interfaces/vector/vector_interface.c + +datawizard/interfaces/vector/vector_interface-vector_interface.obj: datawizard/interfaces/vector/vector_interface.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_vector_vector_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/vector/vector_interface-vector_interface.obj -MD -MP -MF datawizard/interfaces/vector/$(DEPDIR)/vector_interface-vector_interface.Tpo -c -o datawizard/interfaces/vector/vector_interface-vector_interface.obj `if test -f 'datawizard/interfaces/vector/vector_interface.c'; then $(CYGPATH_W) 'datawizard/interfaces/vector/vector_interface.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/vector/vector_interface.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/vector/$(DEPDIR)/vector_interface-vector_interface.Tpo datawizard/interfaces/vector/$(DEPDIR)/vector_interface-vector_interface.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/vector/vector_interface.c' object='datawizard/interfaces/vector/vector_interface-vector_interface.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_vector_vector_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/vector/vector_interface-vector_interface.obj `if test -f 'datawizard/interfaces/vector/vector_interface.c'; then $(CYGPATH_W) 'datawizard/interfaces/vector/vector_interface.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/vector/vector_interface.c'; fi` + +datawizard/interfaces/vector_vector_interface-test_interfaces.o: datawizard/interfaces/test_interfaces.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_vector_vector_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/vector_vector_interface-test_interfaces.o -MD -MP -MF datawizard/interfaces/$(DEPDIR)/vector_vector_interface-test_interfaces.Tpo -c -o datawizard/interfaces/vector_vector_interface-test_interfaces.o `test -f 'datawizard/interfaces/test_interfaces.c' || echo '$(srcdir)/'`datawizard/interfaces/test_interfaces.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/$(DEPDIR)/vector_vector_interface-test_interfaces.Tpo datawizard/interfaces/$(DEPDIR)/vector_vector_interface-test_interfaces.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/test_interfaces.c' object='datawizard/interfaces/vector_vector_interface-test_interfaces.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_vector_vector_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/vector_vector_interface-test_interfaces.o `test -f 'datawizard/interfaces/test_interfaces.c' || echo '$(srcdir)/'`datawizard/interfaces/test_interfaces.c + +datawizard/interfaces/vector_vector_interface-test_interfaces.obj: datawizard/interfaces/test_interfaces.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_vector_vector_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/vector_vector_interface-test_interfaces.obj -MD -MP -MF datawizard/interfaces/$(DEPDIR)/vector_vector_interface-test_interfaces.Tpo -c -o datawizard/interfaces/vector_vector_interface-test_interfaces.obj `if test -f 'datawizard/interfaces/test_interfaces.c'; then $(CYGPATH_W) 'datawizard/interfaces/test_interfaces.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/test_interfaces.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/$(DEPDIR)/vector_vector_interface-test_interfaces.Tpo datawizard/interfaces/$(DEPDIR)/vector_vector_interface-test_interfaces.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/test_interfaces.c' object='datawizard/interfaces/vector_vector_interface-test_interfaces.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_vector_vector_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/vector_vector_interface-test_interfaces.obj `if test -f 'datawizard/interfaces/test_interfaces.c'; then $(CYGPATH_W) 'datawizard/interfaces/test_interfaces.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/test_interfaces.c'; fi` + +datawizard/interfaces/vector/vector_interface-vector_opencl.o: datawizard/interfaces/vector/vector_opencl.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_vector_vector_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/vector/vector_interface-vector_opencl.o -MD -MP -MF datawizard/interfaces/vector/$(DEPDIR)/vector_interface-vector_opencl.Tpo -c -o datawizard/interfaces/vector/vector_interface-vector_opencl.o `test -f 'datawizard/interfaces/vector/vector_opencl.c' || echo '$(srcdir)/'`datawizard/interfaces/vector/vector_opencl.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/vector/$(DEPDIR)/vector_interface-vector_opencl.Tpo datawizard/interfaces/vector/$(DEPDIR)/vector_interface-vector_opencl.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/vector/vector_opencl.c' object='datawizard/interfaces/vector/vector_interface-vector_opencl.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_vector_vector_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/vector/vector_interface-vector_opencl.o `test -f 'datawizard/interfaces/vector/vector_opencl.c' || echo '$(srcdir)/'`datawizard/interfaces/vector/vector_opencl.c + +datawizard/interfaces/vector/vector_interface-vector_opencl.obj: datawizard/interfaces/vector/vector_opencl.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_vector_vector_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/vector/vector_interface-vector_opencl.obj -MD -MP -MF datawizard/interfaces/vector/$(DEPDIR)/vector_interface-vector_opencl.Tpo -c -o datawizard/interfaces/vector/vector_interface-vector_opencl.obj `if test -f 'datawizard/interfaces/vector/vector_opencl.c'; then $(CYGPATH_W) 'datawizard/interfaces/vector/vector_opencl.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/vector/vector_opencl.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/vector/$(DEPDIR)/vector_interface-vector_opencl.Tpo datawizard/interfaces/vector/$(DEPDIR)/vector_interface-vector_opencl.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/vector/vector_opencl.c' object='datawizard/interfaces/vector/vector_interface-vector_opencl.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_vector_vector_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/vector/vector_interface-vector_opencl.obj `if test -f 'datawizard/interfaces/vector/vector_opencl.c'; then $(CYGPATH_W) 'datawizard/interfaces/vector/vector_opencl.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/vector/vector_opencl.c'; fi` + +datawizard/interfaces/void_void_interface-test_interfaces.o: datawizard/interfaces/test_interfaces.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_void_void_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/void_void_interface-test_interfaces.o -MD -MP -MF datawizard/interfaces/$(DEPDIR)/void_void_interface-test_interfaces.Tpo -c -o datawizard/interfaces/void_void_interface-test_interfaces.o `test -f 'datawizard/interfaces/test_interfaces.c' || echo '$(srcdir)/'`datawizard/interfaces/test_interfaces.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/$(DEPDIR)/void_void_interface-test_interfaces.Tpo datawizard/interfaces/$(DEPDIR)/void_void_interface-test_interfaces.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/test_interfaces.c' object='datawizard/interfaces/void_void_interface-test_interfaces.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_void_void_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/void_void_interface-test_interfaces.o `test -f 'datawizard/interfaces/test_interfaces.c' || echo '$(srcdir)/'`datawizard/interfaces/test_interfaces.c + +datawizard/interfaces/void_void_interface-test_interfaces.obj: datawizard/interfaces/test_interfaces.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_void_void_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/void_void_interface-test_interfaces.obj -MD -MP -MF datawizard/interfaces/$(DEPDIR)/void_void_interface-test_interfaces.Tpo -c -o datawizard/interfaces/void_void_interface-test_interfaces.obj `if test -f 'datawizard/interfaces/test_interfaces.c'; then $(CYGPATH_W) 'datawizard/interfaces/test_interfaces.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/test_interfaces.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/$(DEPDIR)/void_void_interface-test_interfaces.Tpo datawizard/interfaces/$(DEPDIR)/void_void_interface-test_interfaces.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/test_interfaces.c' object='datawizard/interfaces/void_void_interface-test_interfaces.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_void_void_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/void_void_interface-test_interfaces.obj `if test -f 'datawizard/interfaces/test_interfaces.c'; then $(CYGPATH_W) 'datawizard/interfaces/test_interfaces.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/test_interfaces.c'; fi` + +datawizard/interfaces/void/void_interface-void_interface.o: datawizard/interfaces/void/void_interface.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_void_void_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/void/void_interface-void_interface.o -MD -MP -MF datawizard/interfaces/void/$(DEPDIR)/void_interface-void_interface.Tpo -c -o datawizard/interfaces/void/void_interface-void_interface.o `test -f 'datawizard/interfaces/void/void_interface.c' || echo '$(srcdir)/'`datawizard/interfaces/void/void_interface.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/void/$(DEPDIR)/void_interface-void_interface.Tpo datawizard/interfaces/void/$(DEPDIR)/void_interface-void_interface.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/void/void_interface.c' object='datawizard/interfaces/void/void_interface-void_interface.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_void_void_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/void/void_interface-void_interface.o `test -f 'datawizard/interfaces/void/void_interface.c' || echo '$(srcdir)/'`datawizard/interfaces/void/void_interface.c + +datawizard/interfaces/void/void_interface-void_interface.obj: datawizard/interfaces/void/void_interface.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_void_void_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/void/void_interface-void_interface.obj -MD -MP -MF datawizard/interfaces/void/$(DEPDIR)/void_interface-void_interface.Tpo -c -o datawizard/interfaces/void/void_interface-void_interface.obj `if test -f 'datawizard/interfaces/void/void_interface.c'; then $(CYGPATH_W) 'datawizard/interfaces/void/void_interface.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/void/void_interface.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/void/$(DEPDIR)/void_interface-void_interface.Tpo datawizard/interfaces/void/$(DEPDIR)/void_interface-void_interface.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/void/void_interface.c' object='datawizard/interfaces/void/void_interface-void_interface.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_void_void_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/void/void_interface-void_interface.obj `if test -f 'datawizard/interfaces/void/void_interface.c'; then $(CYGPATH_W) 'datawizard/interfaces/void/void_interface.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/void/void_interface.c'; fi` + +loader-loader.o: loader.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(loader_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT loader-loader.o -MD -MP -MF $(DEPDIR)/loader-loader.Tpo -c -o loader-loader.o `test -f 'loader.c' || echo '$(srcdir)/'`loader.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/loader-loader.Tpo $(DEPDIR)/loader-loader.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='loader.c' object='loader-loader.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(loader_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o loader-loader.o `test -f 'loader.c' || echo '$(srcdir)/'`loader.c + +loader-loader.obj: loader.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(loader_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT loader-loader.obj -MD -MP -MF $(DEPDIR)/loader-loader.Tpo -c -o loader-loader.obj `if test -f 'loader.c'; then $(CYGPATH_W) 'loader.c'; else $(CYGPATH_W) '$(srcdir)/loader.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/loader-loader.Tpo $(DEPDIR)/loader-loader.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='loader.c' object='loader-loader.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(loader_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o loader-loader.obj `if test -f 'loader.c'; then $(CYGPATH_W) 'loader.c'; else $(CYGPATH_W) '$(srcdir)/loader.c'; fi` + +main/deprecated_func-deprecated_func.o: main/deprecated_func.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(main_deprecated_func_CFLAGS) $(CFLAGS) -MT main/deprecated_func-deprecated_func.o -MD -MP -MF main/$(DEPDIR)/deprecated_func-deprecated_func.Tpo -c -o main/deprecated_func-deprecated_func.o `test -f 'main/deprecated_func.c' || echo '$(srcdir)/'`main/deprecated_func.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) main/$(DEPDIR)/deprecated_func-deprecated_func.Tpo main/$(DEPDIR)/deprecated_func-deprecated_func.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='main/deprecated_func.c' object='main/deprecated_func-deprecated_func.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(main_deprecated_func_CFLAGS) $(CFLAGS) -c -o main/deprecated_func-deprecated_func.o `test -f 'main/deprecated_func.c' || echo '$(srcdir)/'`main/deprecated_func.c + +main/deprecated_func-deprecated_func.obj: main/deprecated_func.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(main_deprecated_func_CFLAGS) $(CFLAGS) -MT main/deprecated_func-deprecated_func.obj -MD -MP -MF main/$(DEPDIR)/deprecated_func-deprecated_func.Tpo -c -o main/deprecated_func-deprecated_func.obj `if test -f 'main/deprecated_func.c'; then $(CYGPATH_W) 'main/deprecated_func.c'; else $(CYGPATH_W) '$(srcdir)/main/deprecated_func.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) main/$(DEPDIR)/deprecated_func-deprecated_func.Tpo main/$(DEPDIR)/deprecated_func-deprecated_func.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='main/deprecated_func.c' object='main/deprecated_func-deprecated_func.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(main_deprecated_func_CFLAGS) $(CFLAGS) -c -o main/deprecated_func-deprecated_func.obj `if test -f 'main/deprecated_func.c'; then $(CYGPATH_W) 'main/deprecated_func.c'; else $(CYGPATH_W) '$(srcdir)/main/deprecated_func.c'; fi` + +main/starpu_worker_exists-starpu_worker_exists.o: main/starpu_worker_exists.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(main_starpu_worker_exists_CFLAGS) $(CFLAGS) -MT main/starpu_worker_exists-starpu_worker_exists.o -MD -MP -MF main/$(DEPDIR)/starpu_worker_exists-starpu_worker_exists.Tpo -c -o main/starpu_worker_exists-starpu_worker_exists.o `test -f 'main/starpu_worker_exists.c' || echo '$(srcdir)/'`main/starpu_worker_exists.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) main/$(DEPDIR)/starpu_worker_exists-starpu_worker_exists.Tpo main/$(DEPDIR)/starpu_worker_exists-starpu_worker_exists.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='main/starpu_worker_exists.c' object='main/starpu_worker_exists-starpu_worker_exists.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(main_starpu_worker_exists_CFLAGS) $(CFLAGS) -c -o main/starpu_worker_exists-starpu_worker_exists.o `test -f 'main/starpu_worker_exists.c' || echo '$(srcdir)/'`main/starpu_worker_exists.c + +main/starpu_worker_exists-starpu_worker_exists.obj: main/starpu_worker_exists.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(main_starpu_worker_exists_CFLAGS) $(CFLAGS) -MT main/starpu_worker_exists-starpu_worker_exists.obj -MD -MP -MF main/$(DEPDIR)/starpu_worker_exists-starpu_worker_exists.Tpo -c -o main/starpu_worker_exists-starpu_worker_exists.obj `if test -f 'main/starpu_worker_exists.c'; then $(CYGPATH_W) 'main/starpu_worker_exists.c'; else $(CYGPATH_W) '$(srcdir)/main/starpu_worker_exists.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) main/$(DEPDIR)/starpu_worker_exists-starpu_worker_exists.Tpo main/$(DEPDIR)/starpu_worker_exists-starpu_worker_exists.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='main/starpu_worker_exists.c' object='main/starpu_worker_exists-starpu_worker_exists.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(main_starpu_worker_exists_CFLAGS) $(CFLAGS) -c -o main/starpu_worker_exists-starpu_worker_exists.obj `if test -f 'main/starpu_worker_exists.c'; then $(CYGPATH_W) 'main/starpu_worker_exists.c'; else $(CYGPATH_W) '$(srcdir)/main/starpu_worker_exists.c'; fi` + +sched_policies/execute_all_tasks-execute_all_tasks.o: sched_policies/execute_all_tasks.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(sched_policies_execute_all_tasks_CFLAGS) $(CFLAGS) -MT sched_policies/execute_all_tasks-execute_all_tasks.o -MD -MP -MF sched_policies/$(DEPDIR)/execute_all_tasks-execute_all_tasks.Tpo -c -o sched_policies/execute_all_tasks-execute_all_tasks.o `test -f 'sched_policies/execute_all_tasks.c' || echo '$(srcdir)/'`sched_policies/execute_all_tasks.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) sched_policies/$(DEPDIR)/execute_all_tasks-execute_all_tasks.Tpo sched_policies/$(DEPDIR)/execute_all_tasks-execute_all_tasks.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='sched_policies/execute_all_tasks.c' object='sched_policies/execute_all_tasks-execute_all_tasks.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(sched_policies_execute_all_tasks_CFLAGS) $(CFLAGS) -c -o sched_policies/execute_all_tasks-execute_all_tasks.o `test -f 'sched_policies/execute_all_tasks.c' || echo '$(srcdir)/'`sched_policies/execute_all_tasks.c + +sched_policies/execute_all_tasks-execute_all_tasks.obj: sched_policies/execute_all_tasks.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(sched_policies_execute_all_tasks_CFLAGS) $(CFLAGS) -MT sched_policies/execute_all_tasks-execute_all_tasks.obj -MD -MP -MF sched_policies/$(DEPDIR)/execute_all_tasks-execute_all_tasks.Tpo -c -o sched_policies/execute_all_tasks-execute_all_tasks.obj `if test -f 'sched_policies/execute_all_tasks.c'; then $(CYGPATH_W) 'sched_policies/execute_all_tasks.c'; else $(CYGPATH_W) '$(srcdir)/sched_policies/execute_all_tasks.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) sched_policies/$(DEPDIR)/execute_all_tasks-execute_all_tasks.Tpo sched_policies/$(DEPDIR)/execute_all_tasks-execute_all_tasks.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='sched_policies/execute_all_tasks.c' object='sched_policies/execute_all_tasks-execute_all_tasks.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(sched_policies_execute_all_tasks_CFLAGS) $(CFLAGS) -c -o sched_policies/execute_all_tasks-execute_all_tasks.obj `if test -f 'sched_policies/execute_all_tasks.c'; then $(CYGPATH_W) 'sched_policies/execute_all_tasks.c'; else $(CYGPATH_W) '$(srcdir)/sched_policies/execute_all_tasks.c'; fi` + +.cpp.o: +@am__fastdepCXX_TRUE@ $(AM_V_CXX)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ +@am__fastdepCXX_TRUE@ $(CXXCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCXX_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXXCOMPILE) -c -o $@ $< + +.cpp.obj: +@am__fastdepCXX_TRUE@ $(AM_V_CXX)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ +@am__fastdepCXX_TRUE@ $(CXXCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ +@am__fastdepCXX_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXXCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.cpp.lo: +@am__fastdepCXX_TRUE@ $(AM_V_CXX)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ +@am__fastdepCXX_TRUE@ $(LTCXXCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCXX_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(LTCXXCOMPILE) -c -o $@ $< + +.f90.o: + $(AM_V_FC)$(FCCOMPILE) -c -o $@ $< + +.f90.obj: + $(AM_V_FC)$(FCCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.f90.lo: + $(AM_V_FC)$(LTFCCOMPILE) -c -o $@ $< + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + -rm -rf datawizard/.libs datawizard/_libs + -rm -rf datawizard/interfaces/.libs datawizard/interfaces/_libs + -rm -rf datawizard/interfaces/bcsr/.libs datawizard/interfaces/bcsr/_libs + -rm -rf datawizard/interfaces/block/.libs datawizard/interfaces/block/_libs + -rm -rf datawizard/interfaces/coo/.libs datawizard/interfaces/coo/_libs + -rm -rf datawizard/interfaces/csr/.libs datawizard/interfaces/csr/_libs + -rm -rf datawizard/interfaces/matrix/.libs datawizard/interfaces/matrix/_libs + -rm -rf datawizard/interfaces/multiformat/.libs datawizard/interfaces/multiformat/_libs + -rm -rf datawizard/interfaces/multiformat/advanced/.libs datawizard/interfaces/multiformat/advanced/_libs + -rm -rf datawizard/interfaces/ndim/.libs datawizard/interfaces/ndim/_libs + -rm -rf datawizard/interfaces/tensor/.libs datawizard/interfaces/tensor/_libs + -rm -rf datawizard/interfaces/variable/.libs datawizard/interfaces/variable/_libs + -rm -rf datawizard/interfaces/vector/.libs datawizard/interfaces/vector/_libs + -rm -rf datawizard/interfaces/void/.libs datawizard/interfaces/void/_libs + -rm -rf disk/.libs disk/_libs + -rm -rf energy/.libs energy/_libs + -rm -rf errorcheck/.libs errorcheck/_libs + -rm -rf fault-tolerance/.libs fault-tolerance/_libs + -rm -rf fortran90/.libs fortran90/_libs + -rm -rf helper/.libs helper/_libs + -rm -rf main/.libs main/_libs + -rm -rf main/driver_api/.libs main/driver_api/_libs + -rm -rf maxfpga/.libs maxfpga/_libs + -rm -rf microbenchs/.libs microbenchs/_libs + -rm -rf openmp/.libs openmp/_libs + -rm -rf overlap/.libs overlap/_libs + -rm -rf parallel_tasks/.libs parallel_tasks/_libs + -rm -rf perfmodels/.libs perfmodels/_libs + -rm -rf sched_ctx/.libs sched_ctx/_libs + -rm -rf sched_policies/.libs sched_policies/_libs +install-nobase_STARPU_OPENCL_DATADATA: $(nobase_STARPU_OPENCL_DATA_DATA) + @$(NORMAL_INSTALL) + @list='$(nobase_STARPU_OPENCL_DATA_DATA)'; test -n "$(STARPU_OPENCL_DATAdir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(STARPU_OPENCL_DATAdir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(STARPU_OPENCL_DATAdir)" || exit 1; \ + fi; \ + $(am__nobase_list) | while read dir files; do \ + xfiles=; for file in $$files; do \ + if test -f "$$file"; then xfiles="$$xfiles $$file"; \ + else xfiles="$$xfiles $(srcdir)/$$file"; fi; done; \ + test -z "$$xfiles" || { \ + test "x$$dir" = x. || { \ + echo " $(MKDIR_P) '$(DESTDIR)$(STARPU_OPENCL_DATAdir)/$$dir'"; \ + $(MKDIR_P) "$(DESTDIR)$(STARPU_OPENCL_DATAdir)/$$dir"; }; \ + echo " $(INSTALL_DATA) $$xfiles '$(DESTDIR)$(STARPU_OPENCL_DATAdir)/$$dir'"; \ + $(INSTALL_DATA) $$xfiles "$(DESTDIR)$(STARPU_OPENCL_DATAdir)/$$dir" || exit $$?; }; \ + done + +uninstall-nobase_STARPU_OPENCL_DATADATA: + @$(NORMAL_UNINSTALL) + @list='$(nobase_STARPU_OPENCL_DATA_DATA)'; test -n "$(STARPU_OPENCL_DATAdir)" || list=; \ + $(am__nobase_strip_setup); files=`$(am__nobase_strip)`; \ + dir='$(DESTDIR)$(STARPU_OPENCL_DATAdir)'; $(am__uninstall_files_from_dir) + +# This directory's subdirectories are mostly independent; you can cd +# into them and run 'make' without going through this Makefile. +# To change the values of 'make' variables: instead of editing Makefiles, +# (1) if the variable is set in 'config.status', edit 'config.status' +# (which will cause the Makefiles to be regenerated when you run 'make'); +# (2) otherwise, pass the desired values on the 'make' command line. +$(am__recursive_targets): + @fail=; \ + if $(am__make_keepgoing); then \ + failcom='fail=yes'; \ + else \ + failcom='exit 1'; \ + fi; \ + dot_seen=no; \ + target=`echo $@ | sed s/-recursive//`; \ + case "$@" in \ + distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ + *) list='$(SUBDIRS)' ;; \ + esac; \ + for subdir in $$list; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + dot_seen=yes; \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || eval $$failcom; \ + done; \ + if test "$$dot_seen" = "no"; then \ + $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ + fi; test -z "$$fail" + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-recursive +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ + include_option=--etags-include; \ + empty_fix=.; \ + else \ + include_option=--include; \ + empty_fix=; \ + fi; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + test ! -f $$subdir/TAGS || \ + set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ + fi; \ + done; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-recursive + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-recursive + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +# Recover from deleted '.trs' file; this should ensure that +# "rm -f foo.log; make foo.trs" re-run 'foo.test', and re-create +# both 'foo.log' and 'foo.trs'. Break the recipe in two subshells +# to avoid problems with "make -n". +.log.trs: + rm -f $< $@ + $(MAKE) $(AM_MAKEFLAGS) $< + +# Leading 'am--fnord' is there to ensure the list of targets does not +# expand to empty, as could happen e.g. with make check TESTS=''. +am--fnord $(TEST_LOGS) $(TEST_LOGS:.log=.trs): $(am__force_recheck) +am--force-recheck: + @: + +$(TEST_SUITE_LOG): $(TEST_LOGS) + @$(am__set_TESTS_bases); \ + am__f_ok () { test -f "$$1" && test -r "$$1"; }; \ + redo_bases=`for i in $$bases; do \ + am__f_ok $$i.trs && am__f_ok $$i.log || echo $$i; \ + done`; \ + if test -n "$$redo_bases"; then \ + redo_logs=`for i in $$redo_bases; do echo $$i.log; done`; \ + redo_results=`for i in $$redo_bases; do echo $$i.trs; done`; \ + if $(am__make_dryrun); then :; else \ + rm -f $$redo_logs && rm -f $$redo_results || exit 1; \ + fi; \ + fi; \ + if test -n "$$am__remaking_logs"; then \ + echo "fatal: making $(TEST_SUITE_LOG): possible infinite" \ + "recursion detected" >&2; \ + elif test -n "$$redo_logs"; then \ + am__remaking_logs=yes $(MAKE) $(AM_MAKEFLAGS) $$redo_logs; \ + fi; \ + if $(am__make_dryrun); then :; else \ + st=0; \ + errmsg="fatal: making $(TEST_SUITE_LOG): failed to create"; \ + for i in $$redo_bases; do \ + test -f $$i.trs && test -r $$i.trs \ + || { echo "$$errmsg $$i.trs" >&2; st=1; }; \ + test -f $$i.log && test -r $$i.log \ + || { echo "$$errmsg $$i.log" >&2; st=1; }; \ + done; \ + test $$st -eq 0 || exit 1; \ + fi + @$(am__sh_e_setup); $(am__tty_colors); $(am__set_TESTS_bases); \ + ws='[ ]'; \ + results=`for b in $$bases; do echo $$b.trs; done`; \ + test -n "$$results" || results=/dev/null; \ + all=` grep "^$$ws*:test-result:" $$results | wc -l`; \ + pass=` grep "^$$ws*:test-result:$$ws*PASS" $$results | wc -l`; \ + fail=` grep "^$$ws*:test-result:$$ws*FAIL" $$results | wc -l`; \ + skip=` grep "^$$ws*:test-result:$$ws*SKIP" $$results | wc -l`; \ + xfail=`grep "^$$ws*:test-result:$$ws*XFAIL" $$results | wc -l`; \ + xpass=`grep "^$$ws*:test-result:$$ws*XPASS" $$results | wc -l`; \ + error=`grep "^$$ws*:test-result:$$ws*ERROR" $$results | wc -l`; \ + if test `expr $$fail + $$xpass + $$error` -eq 0; then \ + success=true; \ + else \ + success=false; \ + fi; \ + br='==================='; br=$$br$$br$$br$$br; \ + result_count () \ + { \ + if test x"$$1" = x"--maybe-color"; then \ + maybe_colorize=yes; \ + elif test x"$$1" = x"--no-color"; then \ + maybe_colorize=no; \ + else \ + echo "$@: invalid 'result_count' usage" >&2; exit 4; \ + fi; \ + shift; \ + desc=$$1 count=$$2; \ + if test $$maybe_colorize = yes && test $$count -gt 0; then \ + color_start=$$3 color_end=$$std; \ + else \ + color_start= color_end=; \ + fi; \ + echo "$${color_start}# $$desc $$count$${color_end}"; \ + }; \ + create_testsuite_report () \ + { \ + result_count $$1 "TOTAL:" $$all "$$brg"; \ + result_count $$1 "PASS: " $$pass "$$grn"; \ + result_count $$1 "SKIP: " $$skip "$$blu"; \ + result_count $$1 "XFAIL:" $$xfail "$$lgn"; \ + result_count $$1 "FAIL: " $$fail "$$red"; \ + result_count $$1 "XPASS:" $$xpass "$$red"; \ + result_count $$1 "ERROR:" $$error "$$mgn"; \ + }; \ + { \ + echo "$(PACKAGE_STRING): $(subdir)/$(TEST_SUITE_LOG)" | \ + $(am__rst_title); \ + create_testsuite_report --no-color; \ + echo; \ + echo ".. contents:: :depth: 2"; \ + echo; \ + for b in $$bases; do echo $$b; done \ + | $(am__create_global_log); \ + } >$(TEST_SUITE_LOG).tmp || exit 1; \ + mv $(TEST_SUITE_LOG).tmp $(TEST_SUITE_LOG); \ + if $$success; then \ + col="$$grn"; \ + else \ + col="$$red"; \ + test x"$$VERBOSE" = x || cat $(TEST_SUITE_LOG); \ + fi; \ + echo "$${col}$$br$${std}"; \ + echo "$${col}Testsuite summary"$(AM_TESTSUITE_SUMMARY_HEADER)"$${std}"; \ + echo "$${col}$$br$${std}"; \ + create_testsuite_report --maybe-color; \ + echo "$$col$$br$$std"; \ + if $$success; then :; else \ + echo "$${col}See $(subdir)/$(TEST_SUITE_LOG)$${std}"; \ + if test -n "$(PACKAGE_BUGREPORT)"; then \ + echo "$${col}Please report to $(PACKAGE_BUGREPORT)$${std}"; \ + fi; \ + echo "$$col$$br$$std"; \ + fi; \ + $$success || exit 1 + +check-TESTS: $(check_PROGRAMS) + @list='$(RECHECK_LOGS)'; test -z "$$list" || rm -f $$list + @list='$(RECHECK_LOGS:.log=.trs)'; test -z "$$list" || rm -f $$list + @test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) + @set +e; $(am__set_TESTS_bases); \ + log_list=`for i in $$bases; do echo $$i.log; done`; \ + trs_list=`for i in $$bases; do echo $$i.trs; done`; \ + log_list=`echo $$log_list`; trs_list=`echo $$trs_list`; \ + $(MAKE) $(AM_MAKEFLAGS) $(TEST_SUITE_LOG) TEST_LOGS="$$log_list"; \ + exit $$?; +recheck: all $(check_PROGRAMS) + @test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) + @set +e; $(am__set_TESTS_bases); \ + bases=`for i in $$bases; do echo $$i; done \ + | $(am__list_recheck_tests)` || exit 1; \ + log_list=`for i in $$bases; do echo $$i.log; done`; \ + log_list=`echo $$log_list`; \ + $(MAKE) $(AM_MAKEFLAGS) $(TEST_SUITE_LOG) \ + am__force_recheck=am--force-recheck \ + TEST_LOGS="$$log_list"; \ + exit $$? +coverage/coverage.sh.log: coverage/coverage.sh + @p='coverage/coverage.sh'; \ + b='coverage/coverage.sh'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +microbenchs/tasks_data_overhead.sh.log: microbenchs/tasks_data_overhead.sh + @p='microbenchs/tasks_data_overhead.sh'; \ + b='microbenchs/tasks_data_overhead.sh'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +microbenchs/sync_tasks_data_overhead.sh.log: microbenchs/sync_tasks_data_overhead.sh + @p='microbenchs/sync_tasks_data_overhead.sh'; \ + b='microbenchs/sync_tasks_data_overhead.sh'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +microbenchs/async_tasks_data_overhead.sh.log: microbenchs/async_tasks_data_overhead.sh + @p='microbenchs/async_tasks_data_overhead.sh'; \ + b='microbenchs/async_tasks_data_overhead.sh'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +microbenchs/tasks_size_overhead_scheds.sh.log: microbenchs/tasks_size_overhead_scheds.sh + @p='microbenchs/tasks_size_overhead_scheds.sh'; \ + b='microbenchs/tasks_size_overhead_scheds.sh'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +microbenchs/parallel_independent_homogeneous_tasks.sh.log: microbenchs/parallel_independent_homogeneous_tasks.sh + @p='microbenchs/parallel_independent_homogeneous_tasks.sh'; \ + b='microbenchs/parallel_independent_homogeneous_tasks.sh'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +microbenchs/parallel_independent_heterogeneous_tasks.sh.log: microbenchs/parallel_independent_heterogeneous_tasks.sh + @p='microbenchs/parallel_independent_heterogeneous_tasks.sh'; \ + b='microbenchs/parallel_independent_heterogeneous_tasks.sh'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +microbenchs/parallel_independent_homogeneous_tasks_data.sh.log: microbenchs/parallel_independent_homogeneous_tasks_data.sh + @p='microbenchs/parallel_independent_homogeneous_tasks_data.sh'; \ + b='microbenchs/parallel_independent_homogeneous_tasks_data.sh'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +microbenchs/parallel_independent_heterogeneous_tasks_data.sh.log: microbenchs/parallel_independent_heterogeneous_tasks_data.sh + @p='microbenchs/parallel_independent_heterogeneous_tasks_data.sh'; \ + b='microbenchs/parallel_independent_heterogeneous_tasks_data.sh'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +microbenchs/parallel_redux_homogeneous_tasks_data.sh.log: microbenchs/parallel_redux_homogeneous_tasks_data.sh + @p='microbenchs/parallel_redux_homogeneous_tasks_data.sh'; \ + b='microbenchs/parallel_redux_homogeneous_tasks_data.sh'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +microbenchs/parallel_redux_heterogeneous_tasks_data.sh.log: microbenchs/parallel_redux_heterogeneous_tasks_data.sh + @p='microbenchs/parallel_redux_heterogeneous_tasks_data.sh'; \ + b='microbenchs/parallel_redux_heterogeneous_tasks_data.sh'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +microbenchs/parallel_dependent_homogeneous_tasks_data.sh.log: microbenchs/parallel_dependent_homogeneous_tasks_data.sh + @p='microbenchs/parallel_dependent_homogeneous_tasks_data.sh'; \ + b='microbenchs/parallel_dependent_homogeneous_tasks_data.sh'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +datawizard/locality.sh.log: datawizard/locality.sh + @p='datawizard/locality.sh'; \ + b='datawizard/locality.sh'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +microbenchs/bandwidth_scheds.sh.log: microbenchs/bandwidth_scheds.sh + @p='microbenchs/bandwidth_scheds.sh'; \ + b='microbenchs/bandwidth_scheds.sh'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +overlap/overlap.sh.log: overlap/overlap.sh + @p='overlap/overlap.sh'; \ + b='overlap/overlap.sh'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +main/callback.log: main/callback$(EXEEXT) + @p='main/callback$(EXEEXT)'; \ + b='main/callback'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +main/bind.log: main/bind$(EXEEXT) + @p='main/bind$(EXEEXT)'; \ + b='main/bind'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +main/mkdtemp.log: main/mkdtemp$(EXEEXT) + @p='main/mkdtemp$(EXEEXT)'; \ + b='main/mkdtemp'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +main/execute_schedule.log: main/execute_schedule$(EXEEXT) + @p='main/execute_schedule$(EXEEXT)'; \ + b='main/execute_schedule'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +main/insert_task_pack.log: main/insert_task_pack$(EXEEXT) + @p='main/insert_task_pack$(EXEEXT)'; \ + b='main/insert_task_pack'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +main/insert_task_nullcodelet.log: main/insert_task_nullcodelet$(EXEEXT) + @p='main/insert_task_nullcodelet$(EXEEXT)'; \ + b='main/insert_task_nullcodelet'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +main/insert_task_where.log: main/insert_task_where$(EXEEXT) + @p='main/insert_task_where$(EXEEXT)'; \ + b='main/insert_task_where'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +main/multithreaded_init.log: main/multithreaded_init$(EXEEXT) + @p='main/multithreaded_init$(EXEEXT)'; \ + b='main/multithreaded_init'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +main/empty_task.log: main/empty_task$(EXEEXT) + @p='main/empty_task$(EXEEXT)'; \ + b='main/empty_task'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +main/empty_task_chain.log: main/empty_task_chain$(EXEEXT) + @p='main/empty_task_chain$(EXEEXT)'; \ + b='main/empty_task_chain'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +main/starpu_worker_exists.log: main/starpu_worker_exists$(EXEEXT) + @p='main/starpu_worker_exists$(EXEEXT)'; \ + b='main/starpu_worker_exists'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +main/codelet_null_callback.log: main/codelet_null_callback$(EXEEXT) + @p='main/codelet_null_callback$(EXEEXT)'; \ + b='main/codelet_null_callback'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +datawizard/allocate.log: datawizard/allocate$(EXEEXT) + @p='datawizard/allocate$(EXEEXT)'; \ + b='datawizard/allocate'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +datawizard/acquire_cb.log: datawizard/acquire_cb$(EXEEXT) + @p='datawizard/acquire_cb$(EXEEXT)'; \ + b='datawizard/acquire_cb'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +datawizard/deps.log: datawizard/deps$(EXEEXT) + @p='datawizard/deps$(EXEEXT)'; \ + b='datawizard/deps'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +datawizard/user_interaction_implicit.log: datawizard/user_interaction_implicit$(EXEEXT) + @p='datawizard/user_interaction_implicit$(EXEEXT)'; \ + b='datawizard/user_interaction_implicit'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +datawizard/interfaces/copy_interfaces.log: datawizard/interfaces/copy_interfaces$(EXEEXT) + @p='datawizard/interfaces/copy_interfaces$(EXEEXT)'; \ + b='datawizard/interfaces/copy_interfaces'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +datawizard/numa_overflow.log: datawizard/numa_overflow$(EXEEXT) + @p='datawizard/numa_overflow$(EXEEXT)'; \ + b='datawizard/numa_overflow'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +datawizard/locality.log: datawizard/locality$(EXEEXT) + @p='datawizard/locality$(EXEEXT)'; \ + b='datawizard/locality'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +datawizard/variable_size.log: datawizard/variable_size$(EXEEXT) + @p='datawizard/variable_size$(EXEEXT)'; \ + b='datawizard/variable_size'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +errorcheck/starpu_init_noworker.log: errorcheck/starpu_init_noworker$(EXEEXT) + @p='errorcheck/starpu_init_noworker$(EXEEXT)'; \ + b='errorcheck/starpu_init_noworker'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +errorcheck/invalid_tasks.log: errorcheck/invalid_tasks$(EXEEXT) + @p='errorcheck/invalid_tasks$(EXEEXT)'; \ + b='errorcheck/invalid_tasks'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +helper/cublas_init.log: helper/cublas_init$(EXEEXT) + @p='helper/cublas_init$(EXEEXT)'; \ + b='helper/cublas_init'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +helper/cublasLt_init.log: helper/cublasLt_init$(EXEEXT) + @p='helper/cublasLt_init$(EXEEXT)'; \ + b='helper/cublasLt_init'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +helper/cusparse_init.log: helper/cusparse_init$(EXEEXT) + @p='helper/cusparse_init$(EXEEXT)'; \ + b='helper/cusparse_init'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +helper/hipblas_init.log: helper/hipblas_init$(EXEEXT) + @p='helper/hipblas_init$(EXEEXT)'; \ + b='helper/hipblas_init'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +helper/pinned_memory.log: helper/pinned_memory$(EXEEXT) + @p='helper/pinned_memory$(EXEEXT)'; \ + b='helper/pinned_memory'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +helper/execute_on_all.log: helper/execute_on_all$(EXEEXT) + @p='helper/execute_on_all$(EXEEXT)'; \ + b='helper/execute_on_all'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +microbenchs/display_structures_size.log: microbenchs/display_structures_size$(EXEEXT) + @p='microbenchs/display_structures_size$(EXEEXT)'; \ + b='microbenchs/display_structures_size'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +microbenchs/local_pingpong.log: microbenchs/local_pingpong$(EXEEXT) + @p='microbenchs/local_pingpong$(EXEEXT)'; \ + b='microbenchs/local_pingpong'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +overlap/overlap.log: overlap/overlap$(EXEEXT) + @p='overlap/overlap$(EXEEXT)'; \ + b='overlap/overlap'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +sched_ctx/sched_ctx_list.log: sched_ctx/sched_ctx_list$(EXEEXT) + @p='sched_ctx/sched_ctx_list$(EXEEXT)'; \ + b='sched_ctx/sched_ctx_list'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +sched_ctx/sched_ctx_policy_data.log: sched_ctx/sched_ctx_policy_data$(EXEEXT) + @p='sched_ctx/sched_ctx_policy_data$(EXEEXT)'; \ + b='sched_ctx/sched_ctx_policy_data'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +openmp/init_exit_01.log: openmp/init_exit_01$(EXEEXT) + @p='openmp/init_exit_01$(EXEEXT)'; \ + b='openmp/init_exit_01'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +openmp/init_exit_02.log: openmp/init_exit_02$(EXEEXT) + @p='openmp/init_exit_02$(EXEEXT)'; \ + b='openmp/init_exit_02'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +openmp/environment.log: openmp/environment$(EXEEXT) + @p='openmp/environment$(EXEEXT)'; \ + b='openmp/environment'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +openmp/api_01.log: openmp/api_01$(EXEEXT) + @p='openmp/api_01$(EXEEXT)'; \ + b='openmp/api_01'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +openmp/parallel_01.log: openmp/parallel_01$(EXEEXT) + @p='openmp/parallel_01$(EXEEXT)'; \ + b='openmp/parallel_01'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +openmp/parallel_02.log: openmp/parallel_02$(EXEEXT) + @p='openmp/parallel_02$(EXEEXT)'; \ + b='openmp/parallel_02'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +openmp/parallel_03.log: openmp/parallel_03$(EXEEXT) + @p='openmp/parallel_03$(EXEEXT)'; \ + b='openmp/parallel_03'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +openmp/parallel_barrier_01.log: openmp/parallel_barrier_01$(EXEEXT) + @p='openmp/parallel_barrier_01$(EXEEXT)'; \ + b='openmp/parallel_barrier_01'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +openmp/parallel_master_01.log: openmp/parallel_master_01$(EXEEXT) + @p='openmp/parallel_master_01$(EXEEXT)'; \ + b='openmp/parallel_master_01'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +openmp/parallel_master_inline_01.log: openmp/parallel_master_inline_01$(EXEEXT) + @p='openmp/parallel_master_inline_01$(EXEEXT)'; \ + b='openmp/parallel_master_inline_01'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +openmp/parallel_single_wait_01.log: openmp/parallel_single_wait_01$(EXEEXT) + @p='openmp/parallel_single_wait_01$(EXEEXT)'; \ + b='openmp/parallel_single_wait_01'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +openmp/parallel_single_nowait_01.log: openmp/parallel_single_nowait_01$(EXEEXT) + @p='openmp/parallel_single_nowait_01$(EXEEXT)'; \ + b='openmp/parallel_single_nowait_01'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +openmp/parallel_single_inline_01.log: openmp/parallel_single_inline_01$(EXEEXT) + @p='openmp/parallel_single_inline_01$(EXEEXT)'; \ + b='openmp/parallel_single_inline_01'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +openmp/parallel_single_copyprivate_01.log: openmp/parallel_single_copyprivate_01$(EXEEXT) + @p='openmp/parallel_single_copyprivate_01$(EXEEXT)'; \ + b='openmp/parallel_single_copyprivate_01'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +openmp/parallel_single_copyprivate_inline_01.log: openmp/parallel_single_copyprivate_inline_01$(EXEEXT) + @p='openmp/parallel_single_copyprivate_inline_01$(EXEEXT)'; \ + b='openmp/parallel_single_copyprivate_inline_01'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +openmp/parallel_critical_01.log: openmp/parallel_critical_01$(EXEEXT) + @p='openmp/parallel_critical_01$(EXEEXT)'; \ + b='openmp/parallel_critical_01'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +openmp/parallel_critical_inline_01.log: openmp/parallel_critical_inline_01$(EXEEXT) + @p='openmp/parallel_critical_inline_01$(EXEEXT)'; \ + b='openmp/parallel_critical_inline_01'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +openmp/parallel_critical_named_01.log: openmp/parallel_critical_named_01$(EXEEXT) + @p='openmp/parallel_critical_named_01$(EXEEXT)'; \ + b='openmp/parallel_critical_named_01'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +openmp/parallel_critical_named_inline_01.log: openmp/parallel_critical_named_inline_01$(EXEEXT) + @p='openmp/parallel_critical_named_inline_01$(EXEEXT)'; \ + b='openmp/parallel_critical_named_inline_01'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +openmp/parallel_simple_lock_01.log: openmp/parallel_simple_lock_01$(EXEEXT) + @p='openmp/parallel_simple_lock_01$(EXEEXT)'; \ + b='openmp/parallel_simple_lock_01'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +openmp/parallel_nested_lock_01.log: openmp/parallel_nested_lock_01$(EXEEXT) + @p='openmp/parallel_nested_lock_01$(EXEEXT)'; \ + b='openmp/parallel_nested_lock_01'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +openmp/parallel_for_01.log: openmp/parallel_for_01$(EXEEXT) + @p='openmp/parallel_for_01$(EXEEXT)'; \ + b='openmp/parallel_for_01'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +openmp/parallel_for_02.log: openmp/parallel_for_02$(EXEEXT) + @p='openmp/parallel_for_02$(EXEEXT)'; \ + b='openmp/parallel_for_02'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +openmp/parallel_for_ordered_01.log: openmp/parallel_for_ordered_01$(EXEEXT) + @p='openmp/parallel_for_ordered_01$(EXEEXT)'; \ + b='openmp/parallel_for_ordered_01'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +openmp/parallel_sections_01.log: openmp/parallel_sections_01$(EXEEXT) + @p='openmp/parallel_sections_01$(EXEEXT)'; \ + b='openmp/parallel_sections_01'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +openmp/parallel_sections_combined_01.log: openmp/parallel_sections_combined_01$(EXEEXT) + @p='openmp/parallel_sections_combined_01$(EXEEXT)'; \ + b='openmp/parallel_sections_combined_01'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +openmp/task_01.log: openmp/task_01$(EXEEXT) + @p='openmp/task_01$(EXEEXT)'; \ + b='openmp/task_01'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +openmp/task_02.log: openmp/task_02$(EXEEXT) + @p='openmp/task_02$(EXEEXT)'; \ + b='openmp/task_02'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +openmp/task_03.log: openmp/task_03$(EXEEXT) + @p='openmp/task_03$(EXEEXT)'; \ + b='openmp/task_03'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +openmp/taskloop.log: openmp/taskloop$(EXEEXT) + @p='openmp/taskloop$(EXEEXT)'; \ + b='openmp/taskloop'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +openmp/taskwait_01.log: openmp/taskwait_01$(EXEEXT) + @p='openmp/taskwait_01$(EXEEXT)'; \ + b='openmp/taskwait_01'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +openmp/taskgroup_01.log: openmp/taskgroup_01$(EXEEXT) + @p='openmp/taskgroup_01$(EXEEXT)'; \ + b='openmp/taskgroup_01'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +openmp/taskgroup_02.log: openmp/taskgroup_02$(EXEEXT) + @p='openmp/taskgroup_02$(EXEEXT)'; \ + b='openmp/taskgroup_02'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +openmp/array_slice_01.log: openmp/array_slice_01$(EXEEXT) + @p='openmp/array_slice_01$(EXEEXT)'; \ + b='openmp/array_slice_01'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +openmp/cuda_task_01.log: openmp/cuda_task_01$(EXEEXT) + @p='openmp/cuda_task_01$(EXEEXT)'; \ + b='openmp/cuda_task_01'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +perfmodels/value_nan.log: perfmodels/value_nan$(EXEEXT) + @p='perfmodels/value_nan$(EXEEXT)'; \ + b='perfmodels/value_nan'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +sched_policies/workerids.log: sched_policies/workerids$(EXEEXT) + @p='sched_policies/workerids$(EXEEXT)'; \ + b='sched_policies/workerids'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +energy/energy_efficiency.log: energy/energy_efficiency$(EXEEXT) + @p='energy/energy_efficiency$(EXEEXT)'; \ + b='energy/energy_efficiency'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +datawizard/simgrid-locality.log: datawizard/simgrid-locality$(EXEEXT) + @p='datawizard/simgrid-locality$(EXEEXT)'; \ + b='datawizard/simgrid-locality'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +main/deprecated_func.log: main/deprecated_func$(EXEEXT) + @p='main/deprecated_func$(EXEEXT)'; \ + b='main/deprecated_func'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +main/driver_api/init_run_deinit.log: main/driver_api/init_run_deinit$(EXEEXT) + @p='main/driver_api/init_run_deinit$(EXEEXT)'; \ + b='main/driver_api/init_run_deinit'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +main/driver_api/run_driver.log: main/driver_api/run_driver$(EXEEXT) + @p='main/driver_api/run_driver$(EXEEXT)'; \ + b='main/driver_api/run_driver'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +main/deploop.log: main/deploop$(EXEEXT) + @p='main/deploop$(EXEEXT)'; \ + b='main/deploop'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +main/display_binding.log: main/display_binding$(EXEEXT) + @p='main/display_binding$(EXEEXT)'; \ + b='main/display_binding'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +main/execute_on_a_specific_worker.log: main/execute_on_a_specific_worker$(EXEEXT) + @p='main/execute_on_a_specific_worker$(EXEEXT)'; \ + b='main/execute_on_a_specific_worker'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +main/insert_task.log: main/insert_task$(EXEEXT) + @p='main/insert_task$(EXEEXT)'; \ + b='main/insert_task'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +main/insert_task_value.log: main/insert_task_value$(EXEEXT) + @p='main/insert_task_value$(EXEEXT)'; \ + b='main/insert_task_value'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +main/insert_task_dyn_handles.log: main/insert_task_dyn_handles$(EXEEXT) + @p='main/insert_task_dyn_handles$(EXEEXT)'; \ + b='main/insert_task_dyn_handles'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +main/insert_task_array.log: main/insert_task_array$(EXEEXT) + @p='main/insert_task_array$(EXEEXT)'; \ + b='main/insert_task_array'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +main/insert_task_many.log: main/insert_task_many$(EXEEXT) + @p='main/insert_task_many$(EXEEXT)'; \ + b='main/insert_task_many'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +main/job.log: main/job$(EXEEXT) + @p='main/job$(EXEEXT)'; \ + b='main/job'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +main/multithreaded.log: main/multithreaded$(EXEEXT) + @p='main/multithreaded$(EXEEXT)'; \ + b='main/multithreaded'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +main/starpu_task_bundle.log: main/starpu_task_bundle$(EXEEXT) + @p='main/starpu_task_bundle$(EXEEXT)'; \ + b='main/starpu_task_bundle'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +main/starpu_task_wait_for_all.log: main/starpu_task_wait_for_all$(EXEEXT) + @p='main/starpu_task_wait_for_all$(EXEEXT)'; \ + b='main/starpu_task_wait_for_all'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +main/starpu_task_wait.log: main/starpu_task_wait$(EXEEXT) + @p='main/starpu_task_wait$(EXEEXT)'; \ + b='main/starpu_task_wait'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +main/static_restartable.log: main/static_restartable$(EXEEXT) + @p='main/static_restartable$(EXEEXT)'; \ + b='main/static_restartable'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +main/static_restartable_using_initializer.log: main/static_restartable_using_initializer$(EXEEXT) + @p='main/static_restartable_using_initializer$(EXEEXT)'; \ + b='main/static_restartable_using_initializer'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +main/static_restartable_tag.log: main/static_restartable_tag$(EXEEXT) + @p='main/static_restartable_tag$(EXEEXT)'; \ + b='main/static_restartable_tag'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +main/regenerate.log: main/regenerate$(EXEEXT) + @p='main/regenerate$(EXEEXT)'; \ + b='main/regenerate'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +main/regenerate_pipeline.log: main/regenerate_pipeline$(EXEEXT) + @p='main/regenerate_pipeline$(EXEEXT)'; \ + b='main/regenerate_pipeline'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +main/restart.log: main/restart$(EXEEXT) + @p='main/restart$(EXEEXT)'; \ + b='main/restart'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +main/wait_all_regenerable_tasks.log: main/wait_all_regenerable_tasks$(EXEEXT) + @p='main/wait_all_regenerable_tasks$(EXEEXT)'; \ + b='main/wait_all_regenerable_tasks'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +main/subgraph_repeat.log: main/subgraph_repeat$(EXEEXT) + @p='main/subgraph_repeat$(EXEEXT)'; \ + b='main/subgraph_repeat'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +main/subgraph_repeat_tag.log: main/subgraph_repeat_tag$(EXEEXT) + @p='main/subgraph_repeat_tag$(EXEEXT)'; \ + b='main/subgraph_repeat_tag'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +main/subgraph_repeat_regenerate.log: main/subgraph_repeat_regenerate$(EXEEXT) + @p='main/subgraph_repeat_regenerate$(EXEEXT)'; \ + b='main/subgraph_repeat_regenerate'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +main/subgraph_repeat_regenerate_tag.log: main/subgraph_repeat_regenerate_tag$(EXEEXT) + @p='main/subgraph_repeat_regenerate_tag$(EXEEXT)'; \ + b='main/subgraph_repeat_regenerate_tag'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +main/subgraph_repeat_regenerate_tag_cycle.log: main/subgraph_repeat_regenerate_tag_cycle$(EXEEXT) + @p='main/subgraph_repeat_regenerate_tag_cycle$(EXEEXT)'; \ + b='main/subgraph_repeat_regenerate_tag_cycle'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +main/empty_task_sync_point.log: main/empty_task_sync_point$(EXEEXT) + @p='main/empty_task_sync_point$(EXEEXT)'; \ + b='main/empty_task_sync_point'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +main/empty_task_sync_point_tasks.log: main/empty_task_sync_point_tasks$(EXEEXT) + @p='main/empty_task_sync_point_tasks$(EXEEXT)'; \ + b='main/empty_task_sync_point_tasks'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +main/tag_wait_api.log: main/tag_wait_api$(EXEEXT) + @p='main/tag_wait_api$(EXEEXT)'; \ + b='main/tag_wait_api'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +main/tag_get_task.log: main/tag_get_task$(EXEEXT) + @p='main/tag_get_task$(EXEEXT)'; \ + b='main/tag_get_task'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +main/task_wait_api.log: main/task_wait_api$(EXEEXT) + @p='main/task_wait_api$(EXEEXT)'; \ + b='main/task_wait_api'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +main/declare_deps_in_callback.log: main/declare_deps_in_callback$(EXEEXT) + @p='main/declare_deps_in_callback$(EXEEXT)'; \ + b='main/declare_deps_in_callback'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +main/declare_deps_after_submission.log: main/declare_deps_after_submission$(EXEEXT) + @p='main/declare_deps_after_submission$(EXEEXT)'; \ + b='main/declare_deps_after_submission'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +main/declare_deps_after_submission_synchronous.log: main/declare_deps_after_submission_synchronous$(EXEEXT) + @p='main/declare_deps_after_submission_synchronous$(EXEEXT)'; \ + b='main/declare_deps_after_submission_synchronous'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +main/get_current_task.log: main/get_current_task$(EXEEXT) + @p='main/get_current_task$(EXEEXT)'; \ + b='main/get_current_task'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +main/starpu_init.log: main/starpu_init$(EXEEXT) + @p='main/starpu_init$(EXEEXT)'; \ + b='main/starpu_init'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +main/submit.log: main/submit$(EXEEXT) + @p='main/submit$(EXEEXT)'; \ + b='main/submit'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +main/const_codelet.log: main/const_codelet$(EXEEXT) + @p='main/const_codelet$(EXEEXT)'; \ + b='main/const_codelet'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +main/pause_resume.log: main/pause_resume$(EXEEXT) + @p='main/pause_resume$(EXEEXT)'; \ + b='main/pause_resume'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +main/pack.log: main/pack$(EXEEXT) + @p='main/pack$(EXEEXT)'; \ + b='main/pack'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +main/get_children_tasks.log: main/get_children_tasks$(EXEEXT) + @p='main/get_children_tasks$(EXEEXT)'; \ + b='main/get_children_tasks'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +main/hwloc_cpuset.log: main/hwloc_cpuset$(EXEEXT) + @p='main/hwloc_cpuset$(EXEEXT)'; \ + b='main/hwloc_cpuset'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +main/task_end_dep.log: main/task_end_dep$(EXEEXT) + @p='main/task_end_dep$(EXEEXT)'; \ + b='main/task_end_dep'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +datawizard/acquire_cb_insert.log: datawizard/acquire_cb_insert$(EXEEXT) + @p='datawizard/acquire_cb_insert$(EXEEXT)'; \ + b='datawizard/acquire_cb_insert'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +datawizard/acquire_release.log: datawizard/acquire_release$(EXEEXT) + @p='datawizard/acquire_release$(EXEEXT)'; \ + b='datawizard/acquire_release'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +datawizard/acquire_release2.log: datawizard/acquire_release2$(EXEEXT) + @p='datawizard/acquire_release2$(EXEEXT)'; \ + b='datawizard/acquire_release2'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +datawizard/acquire_release_to.log: datawizard/acquire_release_to$(EXEEXT) + @p='datawizard/acquire_release_to$(EXEEXT)'; \ + b='datawizard/acquire_release_to'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +datawizard/acquire_try.log: datawizard/acquire_try$(EXEEXT) + @p='datawizard/acquire_try$(EXEEXT)'; \ + b='datawizard/acquire_try'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +datawizard/bcsr.log: datawizard/bcsr$(EXEEXT) + @p='datawizard/bcsr$(EXEEXT)'; \ + b='datawizard/bcsr'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +datawizard/cache.log: datawizard/cache$(EXEEXT) + @p='datawizard/cache$(EXEEXT)'; \ + b='datawizard/cache'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +datawizard/commute.log: datawizard/commute$(EXEEXT) + @p='datawizard/commute$(EXEEXT)'; \ + b='datawizard/commute'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +datawizard/commute2.log: datawizard/commute2$(EXEEXT) + @p='datawizard/commute2$(EXEEXT)'; \ + b='datawizard/commute2'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +datawizard/copy.log: datawizard/copy$(EXEEXT) + @p='datawizard/copy$(EXEEXT)'; \ + b='datawizard/copy'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +datawizard/data_implicit_deps.log: datawizard/data_implicit_deps$(EXEEXT) + @p='datawizard/data_implicit_deps$(EXEEXT)'; \ + b='datawizard/data_implicit_deps'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +datawizard/data_register.log: datawizard/data_register$(EXEEXT) + @p='datawizard/data_register$(EXEEXT)'; \ + b='datawizard/data_register'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +datawizard/scratch.log: datawizard/scratch$(EXEEXT) + @p='datawizard/scratch$(EXEEXT)'; \ + b='datawizard/scratch'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +datawizard/scratch_reuse.log: datawizard/scratch_reuse$(EXEEXT) + @p='datawizard/scratch_reuse$(EXEEXT)'; \ + b='datawizard/scratch_reuse'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +datawizard/sync_and_notify_data.log: datawizard/sync_and_notify_data$(EXEEXT) + @p='datawizard/sync_and_notify_data$(EXEEXT)'; \ + b='datawizard/sync_and_notify_data'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +datawizard/sync_and_notify_data_implicit.log: datawizard/sync_and_notify_data_implicit$(EXEEXT) + @p='datawizard/sync_and_notify_data_implicit$(EXEEXT)'; \ + b='datawizard/sync_and_notify_data_implicit'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +datawizard/dsm_stress.log: datawizard/dsm_stress$(EXEEXT) + @p='datawizard/dsm_stress$(EXEEXT)'; \ + b='datawizard/dsm_stress'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +datawizard/double_parameter.log: datawizard/double_parameter$(EXEEXT) + @p='datawizard/double_parameter$(EXEEXT)'; \ + b='datawizard/double_parameter'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +datawizard/write_only_tmp_buffer.log: datawizard/write_only_tmp_buffer$(EXEEXT) + @p='datawizard/write_only_tmp_buffer$(EXEEXT)'; \ + b='datawizard/write_only_tmp_buffer'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +datawizard/data_invalidation.log: datawizard/data_invalidation$(EXEEXT) + @p='datawizard/data_invalidation$(EXEEXT)'; \ + b='datawizard/data_invalidation'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +datawizard/data_deinitialize.log: datawizard/data_deinitialize$(EXEEXT) + @p='datawizard/data_deinitialize$(EXEEXT)'; \ + b='datawizard/data_deinitialize'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +datawizard/dining_philosophers.log: datawizard/dining_philosophers$(EXEEXT) + @p='datawizard/dining_philosophers$(EXEEXT)'; \ + b='datawizard/dining_philosophers'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +datawizard/manual_reduction.log: datawizard/manual_reduction$(EXEEXT) + @p='datawizard/manual_reduction$(EXEEXT)'; \ + b='datawizard/manual_reduction'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +datawizard/readers_and_writers.log: datawizard/readers_and_writers$(EXEEXT) + @p='datawizard/readers_and_writers$(EXEEXT)'; \ + b='datawizard/readers_and_writers'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +datawizard/unpartition.log: datawizard/unpartition$(EXEEXT) + @p='datawizard/unpartition$(EXEEXT)'; \ + b='datawizard/unpartition'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +datawizard/sync_with_data_with_mem.log: datawizard/sync_with_data_with_mem$(EXEEXT) + @p='datawizard/sync_with_data_with_mem$(EXEEXT)'; \ + b='datawizard/sync_with_data_with_mem'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +datawizard/sync_with_data_with_mem_non_blocking.log: datawizard/sync_with_data_with_mem_non_blocking$(EXEEXT) + @p='datawizard/sync_with_data_with_mem_non_blocking$(EXEEXT)'; \ + b='datawizard/sync_with_data_with_mem_non_blocking'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +datawizard/sync_with_data_with_mem_non_blocking_implicit.log: datawizard/sync_with_data_with_mem_non_blocking_implicit$(EXEEXT) + @p='datawizard/sync_with_data_with_mem_non_blocking_implicit$(EXEEXT)'; \ + b='datawizard/sync_with_data_with_mem_non_blocking_implicit'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +datawizard/mpi_like.log: datawizard/mpi_like$(EXEEXT) + @p='datawizard/mpi_like$(EXEEXT)'; \ + b='datawizard/mpi_like'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +datawizard/mpi_like_async.log: datawizard/mpi_like_async$(EXEEXT) + @p='datawizard/mpi_like_async$(EXEEXT)'; \ + b='datawizard/mpi_like_async'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +datawizard/critical_section_with_void_interface.log: datawizard/critical_section_with_void_interface$(EXEEXT) + @p='datawizard/critical_section_with_void_interface$(EXEEXT)'; \ + b='datawizard/critical_section_with_void_interface'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +datawizard/increment_init.log: datawizard/increment_init$(EXEEXT) + @p='datawizard/increment_init$(EXEEXT)'; \ + b='datawizard/increment_init'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +datawizard/increment_redux.log: datawizard/increment_redux$(EXEEXT) + @p='datawizard/increment_redux$(EXEEXT)'; \ + b='datawizard/increment_redux'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +datawizard/increment_redux_partition.log: datawizard/increment_redux_partition$(EXEEXT) + @p='datawizard/increment_redux_partition$(EXEEXT)'; \ + b='datawizard/increment_redux_partition'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +datawizard/increment_redux_v2.log: datawizard/increment_redux_v2$(EXEEXT) + @p='datawizard/increment_redux_v2$(EXEEXT)'; \ + b='datawizard/increment_redux_v2'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +datawizard/increment_redux_with_args.log: datawizard/increment_redux_with_args$(EXEEXT) + @p='datawizard/increment_redux_with_args$(EXEEXT)'; \ + b='datawizard/increment_redux_with_args'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +datawizard/increment_redux_lazy.log: datawizard/increment_redux_lazy$(EXEEXT) + @p='datawizard/increment_redux_lazy$(EXEEXT)'; \ + b='datawizard/increment_redux_lazy'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +datawizard/handle_to_pointer.log: datawizard/handle_to_pointer$(EXEEXT) + @p='datawizard/handle_to_pointer$(EXEEXT)'; \ + b='datawizard/handle_to_pointer'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +datawizard/lazy_allocation.log: datawizard/lazy_allocation$(EXEEXT) + @p='datawizard/lazy_allocation$(EXEEXT)'; \ + b='datawizard/lazy_allocation'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +datawizard/no_unregister.log: datawizard/no_unregister$(EXEEXT) + @p='datawizard/no_unregister$(EXEEXT)'; \ + b='datawizard/no_unregister'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +datawizard/noreclaim.log: datawizard/noreclaim$(EXEEXT) + @p='datawizard/noreclaim$(EXEEXT)'; \ + b='datawizard/noreclaim'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +datawizard/nowhere.log: datawizard/nowhere$(EXEEXT) + @p='datawizard/nowhere$(EXEEXT)'; \ + b='datawizard/nowhere'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +datawizard/interfaces/block/block_interface.log: datawizard/interfaces/block/block_interface$(EXEEXT) + @p='datawizard/interfaces/block/block_interface$(EXEEXT)'; \ + b='datawizard/interfaces/block/block_interface'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +datawizard/interfaces/bcsr/bcsr_interface.log: datawizard/interfaces/bcsr/bcsr_interface$(EXEEXT) + @p='datawizard/interfaces/bcsr/bcsr_interface$(EXEEXT)'; \ + b='datawizard/interfaces/bcsr/bcsr_interface'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +datawizard/interfaces/coo/coo_interface.log: datawizard/interfaces/coo/coo_interface$(EXEEXT) + @p='datawizard/interfaces/coo/coo_interface$(EXEEXT)'; \ + b='datawizard/interfaces/coo/coo_interface'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +datawizard/interfaces/csr/csr_interface.log: datawizard/interfaces/csr/csr_interface$(EXEEXT) + @p='datawizard/interfaces/csr/csr_interface$(EXEEXT)'; \ + b='datawizard/interfaces/csr/csr_interface'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +datawizard/interfaces/matrix/matrix_interface.log: datawizard/interfaces/matrix/matrix_interface$(EXEEXT) + @p='datawizard/interfaces/matrix/matrix_interface$(EXEEXT)'; \ + b='datawizard/interfaces/matrix/matrix_interface'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +datawizard/interfaces/multiformat/multiformat_interface.log: datawizard/interfaces/multiformat/multiformat_interface$(EXEEXT) + @p='datawizard/interfaces/multiformat/multiformat_interface$(EXEEXT)'; \ + b='datawizard/interfaces/multiformat/multiformat_interface'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +datawizard/interfaces/multiformat/advanced/multiformat_cuda_opencl.log: datawizard/interfaces/multiformat/advanced/multiformat_cuda_opencl$(EXEEXT) + @p='datawizard/interfaces/multiformat/advanced/multiformat_cuda_opencl$(EXEEXT)'; \ + b='datawizard/interfaces/multiformat/advanced/multiformat_cuda_opencl'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +datawizard/interfaces/multiformat/advanced/multiformat_data_release.log: datawizard/interfaces/multiformat/advanced/multiformat_data_release$(EXEEXT) + @p='datawizard/interfaces/multiformat/advanced/multiformat_data_release$(EXEEXT)'; \ + b='datawizard/interfaces/multiformat/advanced/multiformat_data_release'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +datawizard/interfaces/multiformat/advanced/multiformat_worker.log: datawizard/interfaces/multiformat/advanced/multiformat_worker$(EXEEXT) + @p='datawizard/interfaces/multiformat/advanced/multiformat_worker$(EXEEXT)'; \ + b='datawizard/interfaces/multiformat/advanced/multiformat_worker'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +datawizard/interfaces/multiformat/advanced/multiformat_handle_conversion.log: datawizard/interfaces/multiformat/advanced/multiformat_handle_conversion$(EXEEXT) + @p='datawizard/interfaces/multiformat/advanced/multiformat_handle_conversion$(EXEEXT)'; \ + b='datawizard/interfaces/multiformat/advanced/multiformat_handle_conversion'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +datawizard/interfaces/multiformat/advanced/same_handle.log: datawizard/interfaces/multiformat/advanced/same_handle$(EXEEXT) + @p='datawizard/interfaces/multiformat/advanced/same_handle$(EXEEXT)'; \ + b='datawizard/interfaces/multiformat/advanced/same_handle'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +datawizard/interfaces/tensor/tensor_interface.log: datawizard/interfaces/tensor/tensor_interface$(EXEEXT) + @p='datawizard/interfaces/tensor/tensor_interface$(EXEEXT)'; \ + b='datawizard/interfaces/tensor/tensor_interface'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +datawizard/interfaces/ndim/ndim_interface.log: datawizard/interfaces/ndim/ndim_interface$(EXEEXT) + @p='datawizard/interfaces/ndim/ndim_interface$(EXEEXT)'; \ + b='datawizard/interfaces/ndim/ndim_interface'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +datawizard/interfaces/variable/variable_interface.log: datawizard/interfaces/variable/variable_interface$(EXEEXT) + @p='datawizard/interfaces/variable/variable_interface$(EXEEXT)'; \ + b='datawizard/interfaces/variable/variable_interface'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +datawizard/interfaces/vector/vector_interface.log: datawizard/interfaces/vector/vector_interface$(EXEEXT) + @p='datawizard/interfaces/vector/vector_interface$(EXEEXT)'; \ + b='datawizard/interfaces/vector/vector_interface'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +datawizard/interfaces/void/void_interface.log: datawizard/interfaces/void/void_interface$(EXEEXT) + @p='datawizard/interfaces/void/void_interface$(EXEEXT)'; \ + b='datawizard/interfaces/void/void_interface'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +datawizard/in_place_partition.log: datawizard/in_place_partition$(EXEEXT) + @p='datawizard/in_place_partition$(EXEEXT)'; \ + b='datawizard/in_place_partition'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +datawizard/partition_dep.log: datawizard/partition_dep$(EXEEXT) + @p='datawizard/partition_dep$(EXEEXT)'; \ + b='datawizard/partition_dep'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +datawizard/partition_lazy.log: datawizard/partition_lazy$(EXEEXT) + @p='datawizard/partition_lazy$(EXEEXT)'; \ + b='datawizard/partition_lazy'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +datawizard/partition_init.log: datawizard/partition_init$(EXEEXT) + @p='datawizard/partition_init$(EXEEXT)'; \ + b='datawizard/partition_init'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +datawizard/partition_wontuse.log: datawizard/partition_wontuse$(EXEEXT) + @p='datawizard/partition_wontuse$(EXEEXT)'; \ + b='datawizard/partition_wontuse'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +datawizard/gpu_register.log: datawizard/gpu_register$(EXEEXT) + @p='datawizard/gpu_register$(EXEEXT)'; \ + b='datawizard/gpu_register'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +datawizard/gpu_ptr_register.log: datawizard/gpu_ptr_register$(EXEEXT) + @p='datawizard/gpu_ptr_register$(EXEEXT)'; \ + b='datawizard/gpu_ptr_register'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +datawizard/variable_parameters.log: datawizard/variable_parameters$(EXEEXT) + @p='datawizard/variable_parameters$(EXEEXT)'; \ + b='datawizard/variable_parameters'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +datawizard/wt_host.log: datawizard/wt_host$(EXEEXT) + @p='datawizard/wt_host$(EXEEXT)'; \ + b='datawizard/wt_host'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +datawizard/wt_broadcast.log: datawizard/wt_broadcast$(EXEEXT) + @p='datawizard/wt_broadcast$(EXEEXT)'; \ + b='datawizard/wt_broadcast'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +datawizard/readonly.log: datawizard/readonly$(EXEEXT) + @p='datawizard/readonly$(EXEEXT)'; \ + b='datawizard/readonly'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +datawizard/specific_node.log: datawizard/specific_node$(EXEEXT) + @p='datawizard/specific_node$(EXEEXT)'; \ + b='datawizard/specific_node'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +datawizard/specific_node_same.log: datawizard/specific_node_same$(EXEEXT) + @p='datawizard/specific_node_same$(EXEEXT)'; \ + b='datawizard/specific_node_same'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +datawizard/task_with_multiple_time_the_same_handle.log: datawizard/task_with_multiple_time_the_same_handle$(EXEEXT) + @p='datawizard/task_with_multiple_time_the_same_handle$(EXEEXT)'; \ + b='datawizard/task_with_multiple_time_the_same_handle'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +datawizard/test_arbiter.log: datawizard/test_arbiter$(EXEEXT) + @p='datawizard/test_arbiter$(EXEEXT)'; \ + b='datawizard/test_arbiter'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +datawizard/invalidate_pending_requests.log: datawizard/invalidate_pending_requests$(EXEEXT) + @p='datawizard/invalidate_pending_requests$(EXEEXT)'; \ + b='datawizard/invalidate_pending_requests'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +datawizard/deinitialize_pending_requests.log: datawizard/deinitialize_pending_requests$(EXEEXT) + @p='datawizard/deinitialize_pending_requests$(EXEEXT)'; \ + b='datawizard/deinitialize_pending_requests'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +datawizard/temporary_partition.log: datawizard/temporary_partition$(EXEEXT) + @p='datawizard/temporary_partition$(EXEEXT)'; \ + b='datawizard/temporary_partition'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +datawizard/partitioned_initialization.log: datawizard/partitioned_initialization$(EXEEXT) + @p='datawizard/partitioned_initialization$(EXEEXT)'; \ + b='datawizard/partitioned_initialization'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +datawizard/partitioned_acquire.log: datawizard/partitioned_acquire$(EXEEXT) + @p='datawizard/partitioned_acquire$(EXEEXT)'; \ + b='datawizard/partitioned_acquire'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +datawizard/temporary_partition_implicit.log: datawizard/temporary_partition_implicit$(EXEEXT) + @p='datawizard/temporary_partition_implicit$(EXEEXT)'; \ + b='datawizard/temporary_partition_implicit'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +datawizard/temporary_partition_read.log: datawizard/temporary_partition_read$(EXEEXT) + @p='datawizard/temporary_partition_read$(EXEEXT)'; \ + b='datawizard/temporary_partition_read'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +datawizard/redux_acquire.log: datawizard/redux_acquire$(EXEEXT) + @p='datawizard/redux_acquire$(EXEEXT)'; \ + b='datawizard/redux_acquire'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +disk/disk_copy.log: disk/disk_copy$(EXEEXT) + @p='disk/disk_copy$(EXEEXT)'; \ + b='disk/disk_copy'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +disk/disk_copy_unpack.log: disk/disk_copy_unpack$(EXEEXT) + @p='disk/disk_copy_unpack$(EXEEXT)'; \ + b='disk/disk_copy_unpack'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +disk/disk_copy_to_disk.log: disk/disk_copy_to_disk$(EXEEXT) + @p='disk/disk_copy_to_disk$(EXEEXT)'; \ + b='disk/disk_copy_to_disk'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +disk/disk_compute.log: disk/disk_compute$(EXEEXT) + @p='disk/disk_compute$(EXEEXT)'; \ + b='disk/disk_compute'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +disk/disk_pack.log: disk/disk_pack$(EXEEXT) + @p='disk/disk_pack$(EXEEXT)'; \ + b='disk/disk_pack'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +disk/mem_reclaim.log: disk/mem_reclaim$(EXEEXT) + @p='disk/mem_reclaim$(EXEEXT)'; \ + b='disk/mem_reclaim'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +errorcheck/invalid_blocking_calls.log: errorcheck/invalid_blocking_calls$(EXEEXT) + @p='errorcheck/invalid_blocking_calls$(EXEEXT)'; \ + b='errorcheck/invalid_blocking_calls'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +errorcheck/workers_cpuid.log: errorcheck/workers_cpuid$(EXEEXT) + @p='errorcheck/workers_cpuid$(EXEEXT)'; \ + b='errorcheck/workers_cpuid'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +fault-tolerance/retry.log: fault-tolerance/retry$(EXEEXT) + @p='fault-tolerance/retry$(EXEEXT)'; \ + b='fault-tolerance/retry'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +helper/starpu_data_cpy.log: helper/starpu_data_cpy$(EXEEXT) + @p='helper/starpu_data_cpy$(EXEEXT)'; \ + b='helper/starpu_data_cpy'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +helper/starpu_data_dup_ro.log: helper/starpu_data_dup_ro$(EXEEXT) + @p='helper/starpu_data_dup_ro$(EXEEXT)'; \ + b='helper/starpu_data_dup_ro'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +helper/starpu_create_sync_task.log: helper/starpu_create_sync_task$(EXEEXT) + @p='helper/starpu_create_sync_task$(EXEEXT)'; \ + b='helper/starpu_create_sync_task'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +microbenchs/async_tasks_overhead.log: microbenchs/async_tasks_overhead$(EXEEXT) + @p='microbenchs/async_tasks_overhead$(EXEEXT)'; \ + b='microbenchs/async_tasks_overhead'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +microbenchs/sync_tasks_overhead.log: microbenchs/sync_tasks_overhead$(EXEEXT) + @p='microbenchs/sync_tasks_overhead$(EXEEXT)'; \ + b='microbenchs/sync_tasks_overhead'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +microbenchs/tasks_overhead.log: microbenchs/tasks_overhead$(EXEEXT) + @p='microbenchs/tasks_overhead$(EXEEXT)'; \ + b='microbenchs/tasks_overhead'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +microbenchs/tasks_size_overhead.log: microbenchs/tasks_size_overhead$(EXEEXT) + @p='microbenchs/tasks_size_overhead$(EXEEXT)'; \ + b='microbenchs/tasks_size_overhead'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +microbenchs/prefetch_data_on_node.log: microbenchs/prefetch_data_on_node$(EXEEXT) + @p='microbenchs/prefetch_data_on_node$(EXEEXT)'; \ + b='microbenchs/prefetch_data_on_node'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +microbenchs/redundant_buffer.log: microbenchs/redundant_buffer$(EXEEXT) + @p='microbenchs/redundant_buffer$(EXEEXT)'; \ + b='microbenchs/redundant_buffer'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +microbenchs/matrix_as_vector.log: microbenchs/matrix_as_vector$(EXEEXT) + @p='microbenchs/matrix_as_vector$(EXEEXT)'; \ + b='microbenchs/matrix_as_vector'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +microbenchs/bandwidth.log: microbenchs/bandwidth$(EXEEXT) + @p='microbenchs/bandwidth$(EXEEXT)'; \ + b='microbenchs/bandwidth'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +overlap/gpu_concurrency.log: overlap/gpu_concurrency$(EXEEXT) + @p='overlap/gpu_concurrency$(EXEEXT)'; \ + b='overlap/gpu_concurrency'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +parallel_tasks/swap.log: parallel_tasks/swap$(EXEEXT) + @p='parallel_tasks/swap$(EXEEXT)'; \ + b='parallel_tasks/swap'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +parallel_tasks/combined_worker_assign_workerid.log: parallel_tasks/combined_worker_assign_workerid$(EXEEXT) + @p='parallel_tasks/combined_worker_assign_workerid$(EXEEXT)'; \ + b='parallel_tasks/combined_worker_assign_workerid'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +parallel_tasks/explicit_combined_worker.log: parallel_tasks/explicit_combined_worker$(EXEEXT) + @p='parallel_tasks/explicit_combined_worker$(EXEEXT)'; \ + b='parallel_tasks/explicit_combined_worker'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +parallel_tasks/parallel_kernels.log: parallel_tasks/parallel_kernels$(EXEEXT) + @p='parallel_tasks/parallel_kernels$(EXEEXT)'; \ + b='parallel_tasks/parallel_kernels'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +parallel_tasks/parallel_kernels_trivial.log: parallel_tasks/parallel_kernels_trivial$(EXEEXT) + @p='parallel_tasks/parallel_kernels_trivial$(EXEEXT)'; \ + b='parallel_tasks/parallel_kernels_trivial'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +parallel_tasks/parallel_kernels_spmd.log: parallel_tasks/parallel_kernels_spmd$(EXEEXT) + @p='parallel_tasks/parallel_kernels_spmd$(EXEEXT)'; \ + b='parallel_tasks/parallel_kernels_spmd'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +parallel_tasks/spmd_peager.log: parallel_tasks/spmd_peager$(EXEEXT) + @p='parallel_tasks/spmd_peager$(EXEEXT)'; \ + b='parallel_tasks/spmd_peager'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +parallel_tasks/cuda_only.log: parallel_tasks/cuda_only$(EXEEXT) + @p='parallel_tasks/cuda_only$(EXEEXT)'; \ + b='parallel_tasks/cuda_only'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +perfmodels/regression_based_memset.log: perfmodels/regression_based_memset$(EXEEXT) + @p='perfmodels/regression_based_memset$(EXEEXT)'; \ + b='perfmodels/regression_based_memset'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +perfmodels/regression_based_check.log: perfmodels/regression_based_check$(EXEEXT) + @p='perfmodels/regression_based_check$(EXEEXT)'; \ + b='perfmodels/regression_based_check'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +perfmodels/regression_based_multiimpl.log: perfmodels/regression_based_multiimpl$(EXEEXT) + @p='perfmodels/regression_based_multiimpl$(EXEEXT)'; \ + b='perfmodels/regression_based_multiimpl'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +perfmodels/regression_based_energy.log: perfmodels/regression_based_energy$(EXEEXT) + @p='perfmodels/regression_based_energy$(EXEEXT)'; \ + b='perfmodels/regression_based_energy'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +perfmodels/regression_based_gpu.log: perfmodels/regression_based_gpu$(EXEEXT) + @p='perfmodels/regression_based_gpu$(EXEEXT)'; \ + b='perfmodels/regression_based_gpu'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +perfmodels/non_linear_regression_based.log: perfmodels/non_linear_regression_based$(EXEEXT) + @p='perfmodels/non_linear_regression_based$(EXEEXT)'; \ + b='perfmodels/non_linear_regression_based'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +perfmodels/feed.log: perfmodels/feed$(EXEEXT) + @p='perfmodels/feed$(EXEEXT)'; \ + b='perfmodels/feed'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +perfmodels/user_base.log: perfmodels/user_base$(EXEEXT) + @p='perfmodels/user_base$(EXEEXT)'; \ + b='perfmodels/user_base'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +perfmodels/valid_model.log: perfmodels/valid_model$(EXEEXT) + @p='perfmodels/valid_model$(EXEEXT)'; \ + b='perfmodels/valid_model'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +perfmodels/path.log: perfmodels/path$(EXEEXT) + @p='perfmodels/path$(EXEEXT)'; \ + b='perfmodels/path'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +perfmodels/memory.log: perfmodels/memory$(EXEEXT) + @p='perfmodels/memory$(EXEEXT)'; \ + b='perfmodels/memory'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +sched_policies/data_locality.log: sched_policies/data_locality$(EXEEXT) + @p='sched_policies/data_locality$(EXEEXT)'; \ + b='sched_policies/data_locality'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +sched_policies/execute_all_tasks.log: sched_policies/execute_all_tasks$(EXEEXT) + @p='sched_policies/execute_all_tasks$(EXEEXT)'; \ + b='sched_policies/execute_all_tasks'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +sched_policies/prio.log: sched_policies/prio$(EXEEXT) + @p='sched_policies/prio$(EXEEXT)'; \ + b='sched_policies/prio'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +sched_policies/simple_deps.log: sched_policies/simple_deps$(EXEEXT) + @p='sched_policies/simple_deps$(EXEEXT)'; \ + b='sched_policies/simple_deps'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +sched_policies/simple_cpu_gpu_sched.log: sched_policies/simple_cpu_gpu_sched$(EXEEXT) + @p='sched_policies/simple_cpu_gpu_sched$(EXEEXT)'; \ + b='sched_policies/simple_cpu_gpu_sched'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +sched_ctx/sched_ctx_hierarchy.log: sched_ctx/sched_ctx_hierarchy$(EXEEXT) + @p='sched_ctx/sched_ctx_hierarchy$(EXEEXT)'; \ + b='sched_ctx/sched_ctx_hierarchy'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +maxfpga/max_fpga_basic_static.log: maxfpga/max_fpga_basic_static$(EXEEXT) + @p='maxfpga/max_fpga_basic_static$(EXEEXT)'; \ + b='maxfpga/max_fpga_basic_static'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +maxfpga/max_fpga_advanced_static.log: maxfpga/max_fpga_advanced_static$(EXEEXT) + @p='maxfpga/max_fpga_advanced_static$(EXEEXT)'; \ + b='maxfpga/max_fpga_advanced_static'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +maxfpga/max_fpga_dynamic.log: maxfpga/max_fpga_dynamic$(EXEEXT) + @p='maxfpga/max_fpga_dynamic$(EXEEXT)'; \ + b='maxfpga/max_fpga_dynamic'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +maxfpga/max_fpga_mux.log: maxfpga/max_fpga_mux$(EXEEXT) + @p='maxfpga/max_fpga_mux$(EXEEXT)'; \ + b='maxfpga/max_fpga_mux'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +fortran90/init_01.log: fortran90/init_01$(EXEEXT) + @p='fortran90/init_01$(EXEEXT)'; \ + b='fortran90/init_01'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +main/tag_task_data_deps.log: main/tag_task_data_deps$(EXEEXT) + @p='main/tag_task_data_deps$(EXEEXT)'; \ + b='main/tag_task_data_deps'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +datawizard/reclaim.log: datawizard/reclaim$(EXEEXT) + @p='datawizard/reclaim$(EXEEXT)'; \ + b='datawizard/reclaim'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +.test.log: + @p='$<'; \ + $(am__set_b); \ + $(am__check_pre) $(TEST_LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_TEST_LOG_DRIVER_FLAGS) $(TEST_LOG_DRIVER_FLAGS) -- $(TEST_LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +@am__EXEEXT_TRUE@.test$(EXEEXT).log: +@am__EXEEXT_TRUE@ @p='$<'; \ +@am__EXEEXT_TRUE@ $(am__set_b); \ +@am__EXEEXT_TRUE@ $(am__check_pre) $(TEST_LOG_DRIVER) --test-name "$$f" \ +@am__EXEEXT_TRUE@ --log-file $$b.log --trs-file $$b.trs \ +@am__EXEEXT_TRUE@ $(am__common_driver_flags) $(AM_TEST_LOG_DRIVER_FLAGS) $(TEST_LOG_DRIVER_FLAGS) -- $(TEST_LOG_COMPILE) \ +@am__EXEEXT_TRUE@ "$$tst" $(AM_TESTS_FD_REDIRECT) +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done + @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + $(am__make_dryrun) \ + || test -d "$(distdir)/$$subdir" \ + || $(MKDIR_P) "$(distdir)/$$subdir" \ + || exit 1; \ + dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ + $(am__relativize); \ + new_distdir=$$reldir; \ + dir1=$$subdir; dir2="$(top_distdir)"; \ + $(am__relativize); \ + new_top_distdir=$$reldir; \ + echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ + echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ + ($(am__cd) $$subdir && \ + $(MAKE) $(AM_MAKEFLAGS) \ + top_distdir="$$new_top_distdir" \ + distdir="$$new_distdir" \ + am__remove_distdir=: \ + am__skip_length_check=: \ + am__skip_mode_fix=: \ + distdir) \ + || exit 1; \ + fi; \ + done +check-am: all-am + $(MAKE) $(AM_MAKEFLAGS) $(check_PROGRAMS) + $(MAKE) $(AM_MAKEFLAGS) check-TESTS +check: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) check-recursive +all-am: Makefile $(PROGRAMS) $(SCRIPTS) $(DATA) +installdirs: installdirs-recursive +installdirs-am: + for dir in "$(DESTDIR)$(examplebindir)" "$(DESTDIR)$(examplebindir)" "$(DESTDIR)$(STARPU_OPENCL_DATAdir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) install-recursive +install-exec: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) install-exec-recursive +install-data: install-data-recursive +uninstall: uninstall-recursive + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-recursive +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + -test -z "$(TEST_LOGS)" || rm -f $(TEST_LOGS) + -test -z "$(TEST_LOGS:.log=.trs)" || rm -f $(TEST_LOGS:.log=.trs) + -test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) + +clean-generic: + -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + -rm -f datawizard/$(DEPDIR)/$(am__dirstamp) + -rm -f datawizard/$(am__dirstamp) + -rm -f datawizard/interfaces/$(DEPDIR)/$(am__dirstamp) + -rm -f datawizard/interfaces/$(am__dirstamp) + -rm -f datawizard/interfaces/bcsr/$(DEPDIR)/$(am__dirstamp) + -rm -f datawizard/interfaces/bcsr/$(am__dirstamp) + -rm -f datawizard/interfaces/block/$(DEPDIR)/$(am__dirstamp) + -rm -f datawizard/interfaces/block/$(am__dirstamp) + -rm -f datawizard/interfaces/coo/$(DEPDIR)/$(am__dirstamp) + -rm -f datawizard/interfaces/coo/$(am__dirstamp) + -rm -f datawizard/interfaces/csr/$(DEPDIR)/$(am__dirstamp) + -rm -f datawizard/interfaces/csr/$(am__dirstamp) + -rm -f datawizard/interfaces/matrix/$(DEPDIR)/$(am__dirstamp) + -rm -f datawizard/interfaces/matrix/$(am__dirstamp) + -rm -f datawizard/interfaces/multiformat/$(DEPDIR)/$(am__dirstamp) + -rm -f datawizard/interfaces/multiformat/$(am__dirstamp) + -rm -f datawizard/interfaces/multiformat/advanced/$(DEPDIR)/$(am__dirstamp) + -rm -f datawizard/interfaces/multiformat/advanced/$(am__dirstamp) + -rm -f datawizard/interfaces/ndim/$(DEPDIR)/$(am__dirstamp) + -rm -f datawizard/interfaces/ndim/$(am__dirstamp) + -rm -f datawizard/interfaces/tensor/$(DEPDIR)/$(am__dirstamp) + -rm -f datawizard/interfaces/tensor/$(am__dirstamp) + -rm -f datawizard/interfaces/variable/$(DEPDIR)/$(am__dirstamp) + -rm -f datawizard/interfaces/variable/$(am__dirstamp) + -rm -f datawizard/interfaces/vector/$(DEPDIR)/$(am__dirstamp) + -rm -f datawizard/interfaces/vector/$(am__dirstamp) + -rm -f datawizard/interfaces/void/$(DEPDIR)/$(am__dirstamp) + -rm -f datawizard/interfaces/void/$(am__dirstamp) + -rm -f disk/$(DEPDIR)/$(am__dirstamp) + -rm -f disk/$(am__dirstamp) + -rm -f energy/$(DEPDIR)/$(am__dirstamp) + -rm -f energy/$(am__dirstamp) + -rm -f errorcheck/$(DEPDIR)/$(am__dirstamp) + -rm -f errorcheck/$(am__dirstamp) + -rm -f fault-tolerance/$(DEPDIR)/$(am__dirstamp) + -rm -f fault-tolerance/$(am__dirstamp) + -rm -f fortran90/$(DEPDIR)/$(am__dirstamp) + -rm -f fortran90/$(am__dirstamp) + -rm -f helper/$(DEPDIR)/$(am__dirstamp) + -rm -f helper/$(am__dirstamp) + -rm -f main/$(DEPDIR)/$(am__dirstamp) + -rm -f main/$(am__dirstamp) + -rm -f main/driver_api/$(DEPDIR)/$(am__dirstamp) + -rm -f main/driver_api/$(am__dirstamp) + -rm -f maxfpga/$(DEPDIR)/$(am__dirstamp) + -rm -f maxfpga/$(am__dirstamp) + -rm -f microbenchs/$(DEPDIR)/$(am__dirstamp) + -rm -f microbenchs/$(am__dirstamp) + -rm -f openmp/$(DEPDIR)/$(am__dirstamp) + -rm -f openmp/$(am__dirstamp) + -rm -f overlap/$(DEPDIR)/$(am__dirstamp) + -rm -f overlap/$(am__dirstamp) + -rm -f parallel_tasks/$(DEPDIR)/$(am__dirstamp) + -rm -f parallel_tasks/$(am__dirstamp) + -rm -f perfmodels/$(DEPDIR)/$(am__dirstamp) + -rm -f perfmodels/$(am__dirstamp) + -rm -f sched_ctx/$(DEPDIR)/$(am__dirstamp) + -rm -f sched_ctx/$(am__dirstamp) + -rm -f sched_policies/$(DEPDIR)/$(am__dirstamp) + -rm -f sched_policies/$(am__dirstamp) + -rm -f variable/$(DEPDIR)/$(am__dirstamp) + -rm -f variable/$(am__dirstamp) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." + -test -z "$(BUILT_SOURCES)" || rm -f $(BUILT_SOURCES) +clean: clean-recursive + +clean-am: clean-checkPROGRAMS clean-examplebinPROGRAMS clean-generic \ + clean-libtool clean-local clean-noinstPROGRAMS mostlyclean-am + +distclean: distclean-recursive + -rm -f ./$(DEPDIR)/loader-loader.Po + -rm -f datawizard/$(DEPDIR)/acquire_cb.Po + -rm -f datawizard/$(DEPDIR)/acquire_cb_insert.Po + -rm -f datawizard/$(DEPDIR)/acquire_release.Po + -rm -f datawizard/$(DEPDIR)/acquire_release2.Po + -rm -f datawizard/$(DEPDIR)/acquire_release_to.Po + -rm -f datawizard/$(DEPDIR)/acquire_try.Po + -rm -f datawizard/$(DEPDIR)/allocate.Po + -rm -f datawizard/$(DEPDIR)/allocate_many_numa_nodes.Po + -rm -f datawizard/$(DEPDIR)/bcsr.Po + -rm -f datawizard/$(DEPDIR)/cache.Po + -rm -f datawizard/$(DEPDIR)/commute.Po + -rm -f datawizard/$(DEPDIR)/commute2.Po + -rm -f datawizard/$(DEPDIR)/copy.Po + -rm -f datawizard/$(DEPDIR)/critical_section_with_void_interface.Po + -rm -f datawizard/$(DEPDIR)/data_deinitialize.Po + -rm -f datawizard/$(DEPDIR)/data_implicit_deps.Po + -rm -f datawizard/$(DEPDIR)/data_invalidation.Po + -rm -f datawizard/$(DEPDIR)/data_register-data_register.Po + -rm -f datawizard/$(DEPDIR)/deinitialize_pending_requests.Po + -rm -f datawizard/$(DEPDIR)/deps.Po + -rm -f datawizard/$(DEPDIR)/dining_philosophers.Po + -rm -f datawizard/$(DEPDIR)/double_parameter.Po + -rm -f datawizard/$(DEPDIR)/dsm_stress.Po + -rm -f datawizard/$(DEPDIR)/gpu_ptr_register.Po + -rm -f datawizard/$(DEPDIR)/gpu_register.Po + -rm -f datawizard/$(DEPDIR)/handle_to_pointer.Po + -rm -f datawizard/$(DEPDIR)/in_place_partition.Po + -rm -f datawizard/$(DEPDIR)/increment_init.Po + -rm -f datawizard/$(DEPDIR)/increment_redux.Po + -rm -f datawizard/$(DEPDIR)/increment_redux_lazy.Po + -rm -f datawizard/$(DEPDIR)/increment_redux_partition.Po + -rm -f datawizard/$(DEPDIR)/increment_redux_v2.Po + -rm -f datawizard/$(DEPDIR)/increment_redux_with_args.Po + -rm -f datawizard/$(DEPDIR)/invalidate_pending_requests.Po + -rm -f datawizard/$(DEPDIR)/lazy_allocation.Po + -rm -f datawizard/$(DEPDIR)/locality.Po + -rm -f datawizard/$(DEPDIR)/manual_reduction.Po + -rm -f datawizard/$(DEPDIR)/mpi_like.Po + -rm -f datawizard/$(DEPDIR)/mpi_like_async.Po + -rm -f datawizard/$(DEPDIR)/no_unregister.Po + -rm -f datawizard/$(DEPDIR)/noreclaim.Po + -rm -f datawizard/$(DEPDIR)/nowhere.Po + -rm -f datawizard/$(DEPDIR)/numa_overflow.Po + -rm -f datawizard/$(DEPDIR)/partition_dep.Po + -rm -f datawizard/$(DEPDIR)/partition_init.Po + -rm -f datawizard/$(DEPDIR)/partition_lazy.Po + -rm -f datawizard/$(DEPDIR)/partition_wontuse.Po + -rm -f datawizard/$(DEPDIR)/partitioned_acquire.Po + -rm -f datawizard/$(DEPDIR)/partitioned_initialization.Po + -rm -f datawizard/$(DEPDIR)/readers_and_writers.Po + -rm -f datawizard/$(DEPDIR)/readonly.Po + -rm -f datawizard/$(DEPDIR)/reclaim.Po + -rm -f datawizard/$(DEPDIR)/redux_acquire.Po + -rm -f datawizard/$(DEPDIR)/scal.Po + -rm -f datawizard/$(DEPDIR)/scratch.Po + -rm -f datawizard/$(DEPDIR)/scratch_opencl.Po + -rm -f datawizard/$(DEPDIR)/scratch_reuse.Po + -rm -f datawizard/$(DEPDIR)/simgrid-locality.Po + -rm -f datawizard/$(DEPDIR)/specific_node.Po + -rm -f datawizard/$(DEPDIR)/specific_node_same.Po + -rm -f datawizard/$(DEPDIR)/sync_and_notify_data.Po + -rm -f datawizard/$(DEPDIR)/sync_and_notify_data_implicit.Po + -rm -f datawizard/$(DEPDIR)/sync_and_notify_data_opencl.Po + -rm -f datawizard/$(DEPDIR)/sync_with_data_with_mem.Po + -rm -f datawizard/$(DEPDIR)/sync_with_data_with_mem_non_blocking.Po + -rm -f datawizard/$(DEPDIR)/sync_with_data_with_mem_non_blocking_implicit.Po + -rm -f datawizard/$(DEPDIR)/task_with_multiple_time_the_same_handle.Po + -rm -f datawizard/$(DEPDIR)/temporary_partition.Po + -rm -f datawizard/$(DEPDIR)/temporary_partition_implicit.Po + -rm -f datawizard/$(DEPDIR)/temporary_partition_read.Po + -rm -f datawizard/$(DEPDIR)/test_arbiter.Po + -rm -f datawizard/$(DEPDIR)/unpartition.Po + -rm -f datawizard/$(DEPDIR)/user_interaction_implicit.Po + -rm -f datawizard/$(DEPDIR)/variable_parameters.Po + -rm -f datawizard/$(DEPDIR)/variable_size.Po + -rm -f datawizard/$(DEPDIR)/write_only_tmp_buffer.Po + -rm -f datawizard/$(DEPDIR)/wt_broadcast.Po + -rm -f datawizard/$(DEPDIR)/wt_host.Po + -rm -f datawizard/interfaces/$(DEPDIR)/bcsr_bcsr_interface-test_interfaces.Po + -rm -f datawizard/interfaces/$(DEPDIR)/block_block_interface-test_interfaces.Po + -rm -f datawizard/interfaces/$(DEPDIR)/coo_coo_interface-test_interfaces.Po + -rm -f datawizard/interfaces/$(DEPDIR)/copy_interfaces-copy_interfaces.Po + -rm -f datawizard/interfaces/$(DEPDIR)/csr_csr_interface-test_interfaces.Po + -rm -f datawizard/interfaces/$(DEPDIR)/matrix_matrix_interface-test_interfaces.Po + -rm -f datawizard/interfaces/$(DEPDIR)/multiformat_multiformat_interface-test_interfaces.Po + -rm -f datawizard/interfaces/$(DEPDIR)/ndim_ndim_interface-test_interfaces.Po + -rm -f datawizard/interfaces/$(DEPDIR)/tensor_tensor_interface-test_interfaces.Po + -rm -f datawizard/interfaces/$(DEPDIR)/variable_variable_interface-test_interfaces.Po + -rm -f datawizard/interfaces/$(DEPDIR)/vector_vector_interface-test_interfaces.Po + -rm -f datawizard/interfaces/$(DEPDIR)/void_void_interface-test_interfaces.Po + -rm -f datawizard/interfaces/bcsr/$(DEPDIR)/bcsr_interface-bcsr_interface.Po + -rm -f datawizard/interfaces/bcsr/$(DEPDIR)/bcsr_interface-bcsr_opencl.Po + -rm -f datawizard/interfaces/block/$(DEPDIR)/block_interface-block_interface.Po + -rm -f datawizard/interfaces/block/$(DEPDIR)/block_interface-block_opencl.Po + -rm -f datawizard/interfaces/coo/$(DEPDIR)/coo_interface-coo_interface.Po + -rm -f datawizard/interfaces/coo/$(DEPDIR)/coo_interface-coo_opencl.Po + -rm -f datawizard/interfaces/csr/$(DEPDIR)/csr_interface-csr_interface.Po + -rm -f datawizard/interfaces/csr/$(DEPDIR)/csr_interface-csr_opencl.Po + -rm -f datawizard/interfaces/matrix/$(DEPDIR)/matrix_interface-matrix_interface.Po + -rm -f datawizard/interfaces/matrix/$(DEPDIR)/matrix_interface-matrix_opencl.Po + -rm -f datawizard/interfaces/multiformat/$(DEPDIR)/multiformat_interface-multiformat_conversion_codelets.Po + -rm -f datawizard/interfaces/multiformat/$(DEPDIR)/multiformat_interface-multiformat_conversion_codelets_opencl.Po + -rm -f datawizard/interfaces/multiformat/$(DEPDIR)/multiformat_interface-multiformat_interface.Po + -rm -f datawizard/interfaces/multiformat/$(DEPDIR)/multiformat_interface-multiformat_opencl.Po + -rm -f datawizard/interfaces/multiformat/advanced/$(DEPDIR)/generic.Po + -rm -f datawizard/interfaces/multiformat/advanced/$(DEPDIR)/multiformat_cuda_opencl.Po + -rm -f datawizard/interfaces/multiformat/advanced/$(DEPDIR)/multiformat_data_release.Po + -rm -f datawizard/interfaces/multiformat/advanced/$(DEPDIR)/multiformat_handle_conversion.Po + -rm -f datawizard/interfaces/multiformat/advanced/$(DEPDIR)/multiformat_worker.Po + -rm -f datawizard/interfaces/multiformat/advanced/$(DEPDIR)/same_handle.Po + -rm -f datawizard/interfaces/ndim/$(DEPDIR)/ndim_interface-ndim_interface.Po + -rm -f datawizard/interfaces/ndim/$(DEPDIR)/ndim_interface-ndim_opencl.Po + -rm -f datawizard/interfaces/tensor/$(DEPDIR)/tensor_interface-tensor_interface.Po + -rm -f datawizard/interfaces/tensor/$(DEPDIR)/tensor_interface-tensor_opencl.Po + -rm -f datawizard/interfaces/variable/$(DEPDIR)/variable_interface-variable_interface.Po + -rm -f datawizard/interfaces/variable/$(DEPDIR)/variable_interface-variable_opencl.Po + -rm -f datawizard/interfaces/vector/$(DEPDIR)/vector_interface-vector_interface.Po + -rm -f datawizard/interfaces/vector/$(DEPDIR)/vector_interface-vector_opencl.Po + -rm -f datawizard/interfaces/void/$(DEPDIR)/void_interface-void_interface.Po + -rm -f disk/$(DEPDIR)/disk_compute.Po + -rm -f disk/$(DEPDIR)/disk_copy.Po + -rm -f disk/$(DEPDIR)/disk_copy_to_disk.Po + -rm -f disk/$(DEPDIR)/disk_copy_unpack.Po + -rm -f disk/$(DEPDIR)/disk_pack.Po + -rm -f disk/$(DEPDIR)/mem_reclaim.Po + -rm -f energy/$(DEPDIR)/energy_efficiency.Po + -rm -f errorcheck/$(DEPDIR)/invalid_blocking_calls.Po + -rm -f errorcheck/$(DEPDIR)/invalid_tasks.Po + -rm -f errorcheck/$(DEPDIR)/starpu_init_noworker.Po + -rm -f errorcheck/$(DEPDIR)/workers_cpuid.Po + -rm -f fault-tolerance/$(DEPDIR)/retry.Po + -rm -f helper/$(DEPDIR)/cublasLt_init.Po + -rm -f helper/$(DEPDIR)/cublas_init.Po + -rm -f helper/$(DEPDIR)/cusparse_init.Po + -rm -f helper/$(DEPDIR)/execute_on_all.Po + -rm -f helper/$(DEPDIR)/hipblas_init.Po + -rm -f helper/$(DEPDIR)/pinned_memory.Po + -rm -f helper/$(DEPDIR)/starpu_create_sync_task.Po + -rm -f helper/$(DEPDIR)/starpu_data_cpy.Po + -rm -f helper/$(DEPDIR)/starpu_data_dup_ro.Po + -rm -f main/$(DEPDIR)/bind.Po + -rm -f main/$(DEPDIR)/callback.Po + -rm -f main/$(DEPDIR)/codelet_null_callback.Po + -rm -f main/$(DEPDIR)/const_codelet.Po + -rm -f main/$(DEPDIR)/deadlock.Po + -rm -f main/$(DEPDIR)/declare_deps_after_submission.Po + -rm -f main/$(DEPDIR)/declare_deps_after_submission_synchronous.Po + -rm -f main/$(DEPDIR)/declare_deps_in_callback.Po + -rm -f main/$(DEPDIR)/deploop.Po + -rm -f main/$(DEPDIR)/deprecated_func-deprecated_func.Po + -rm -f main/$(DEPDIR)/display_binding.Po + -rm -f main/$(DEPDIR)/empty_task.Po + -rm -f main/$(DEPDIR)/empty_task_chain.Po + -rm -f main/$(DEPDIR)/empty_task_sync_point.Po + -rm -f main/$(DEPDIR)/empty_task_sync_point_tasks.Po + -rm -f main/$(DEPDIR)/execute_on_a_specific_worker.Po + -rm -f main/$(DEPDIR)/execute_schedule.Po + -rm -f main/$(DEPDIR)/get_children_tasks.Po + -rm -f main/$(DEPDIR)/get_current_task.Po + -rm -f main/$(DEPDIR)/hwloc_cpuset.Po + -rm -f main/$(DEPDIR)/insert_task.Po + -rm -f main/$(DEPDIR)/insert_task_array.Po + -rm -f main/$(DEPDIR)/insert_task_dyn_handles.Po + -rm -f main/$(DEPDIR)/insert_task_many.Po + -rm -f main/$(DEPDIR)/insert_task_nullcodelet.Po + -rm -f main/$(DEPDIR)/insert_task_pack.Po + -rm -f main/$(DEPDIR)/insert_task_value.Po + -rm -f main/$(DEPDIR)/insert_task_where.Po + -rm -f main/$(DEPDIR)/job.Po + -rm -f main/$(DEPDIR)/mkdtemp.Po + -rm -f main/$(DEPDIR)/multithreaded.Po + -rm -f main/$(DEPDIR)/multithreaded_init.Po + -rm -f main/$(DEPDIR)/pack.Po + -rm -f main/$(DEPDIR)/pause_resume.Po + -rm -f main/$(DEPDIR)/regenerate.Po + -rm -f main/$(DEPDIR)/regenerate_pipeline.Po + -rm -f main/$(DEPDIR)/restart.Po + -rm -f main/$(DEPDIR)/starpu_init.Po + -rm -f main/$(DEPDIR)/starpu_task_bundle.Po + -rm -f main/$(DEPDIR)/starpu_task_wait.Po + -rm -f main/$(DEPDIR)/starpu_task_wait_for_all.Po + -rm -f main/$(DEPDIR)/starpu_worker_exists-starpu_worker_exists.Po + -rm -f main/$(DEPDIR)/static_restartable.Po + -rm -f main/$(DEPDIR)/static_restartable_tag.Po + -rm -f main/$(DEPDIR)/static_restartable_using_initializer.Po + -rm -f main/$(DEPDIR)/subgraph_repeat.Po + -rm -f main/$(DEPDIR)/subgraph_repeat_regenerate.Po + -rm -f main/$(DEPDIR)/subgraph_repeat_regenerate_tag.Po + -rm -f main/$(DEPDIR)/subgraph_repeat_regenerate_tag_cycle.Po + -rm -f main/$(DEPDIR)/subgraph_repeat_tag.Po + -rm -f main/$(DEPDIR)/submit.Po + -rm -f main/$(DEPDIR)/tag_get_task.Po + -rm -f main/$(DEPDIR)/tag_task_data_deps.Po + -rm -f main/$(DEPDIR)/tag_wait_api.Po + -rm -f main/$(DEPDIR)/task_end_dep.Po + -rm -f main/$(DEPDIR)/task_wait_api.Po + -rm -f main/$(DEPDIR)/wait_all_regenerable_tasks.Po + -rm -f main/driver_api/$(DEPDIR)/init_run_deinit.Po + -rm -f main/driver_api/$(DEPDIR)/run_driver.Po + -rm -f maxfpga/$(DEPDIR)/max_fpga_advanced_static.Po + -rm -f maxfpga/$(DEPDIR)/max_fpga_basic_static.Po + -rm -f maxfpga/$(DEPDIR)/max_fpga_dynamic.Po + -rm -f maxfpga/$(DEPDIR)/max_fpga_mux.Po + -rm -f microbenchs/$(DEPDIR)/async_tasks_overhead.Po + -rm -f microbenchs/$(DEPDIR)/bandwidth.Po + -rm -f microbenchs/$(DEPDIR)/display_structures_size.Po + -rm -f microbenchs/$(DEPDIR)/local_pingpong.Po + -rm -f microbenchs/$(DEPDIR)/matrix_as_vector.Po + -rm -f microbenchs/$(DEPDIR)/parallel_dependent_homogeneous_tasks_data.Po + -rm -f microbenchs/$(DEPDIR)/parallel_independent_heterogeneous_tasks.Po + -rm -f microbenchs/$(DEPDIR)/parallel_independent_heterogeneous_tasks_data.Po + -rm -f microbenchs/$(DEPDIR)/parallel_independent_homogeneous_tasks.Po + -rm -f microbenchs/$(DEPDIR)/parallel_independent_homogeneous_tasks_data.Po + -rm -f microbenchs/$(DEPDIR)/parallel_redux_heterogeneous_tasks_data.Po + -rm -f microbenchs/$(DEPDIR)/parallel_redux_homogeneous_tasks_data.Po + -rm -f microbenchs/$(DEPDIR)/prefetch_data_on_node.Po + -rm -f microbenchs/$(DEPDIR)/redundant_buffer.Po + -rm -f microbenchs/$(DEPDIR)/sync_tasks_overhead.Po + -rm -f microbenchs/$(DEPDIR)/tasks_overhead.Po + -rm -f microbenchs/$(DEPDIR)/tasks_size_overhead.Po + -rm -f openmp/$(DEPDIR)/api_01.Po + -rm -f openmp/$(DEPDIR)/array_slice_01.Po + -rm -f openmp/$(DEPDIR)/cuda_task_01.Po + -rm -f openmp/$(DEPDIR)/environment.Po + -rm -f openmp/$(DEPDIR)/init_exit_01.Po + -rm -f openmp/$(DEPDIR)/init_exit_02.Po + -rm -f openmp/$(DEPDIR)/parallel_01.Po + -rm -f openmp/$(DEPDIR)/parallel_02.Po + -rm -f openmp/$(DEPDIR)/parallel_03.Po + -rm -f openmp/$(DEPDIR)/parallel_barrier_01.Po + -rm -f openmp/$(DEPDIR)/parallel_critical_01.Po + -rm -f openmp/$(DEPDIR)/parallel_critical_inline_01.Po + -rm -f openmp/$(DEPDIR)/parallel_critical_named_01.Po + -rm -f openmp/$(DEPDIR)/parallel_critical_named_inline_01.Po + -rm -f openmp/$(DEPDIR)/parallel_for_01.Po + -rm -f openmp/$(DEPDIR)/parallel_for_02.Po + -rm -f openmp/$(DEPDIR)/parallel_for_ordered_01.Po + -rm -f openmp/$(DEPDIR)/parallel_master_01.Po + -rm -f openmp/$(DEPDIR)/parallel_master_inline_01.Po + -rm -f openmp/$(DEPDIR)/parallel_nested_lock_01.Po + -rm -f openmp/$(DEPDIR)/parallel_sections_01.Po + -rm -f openmp/$(DEPDIR)/parallel_sections_combined_01.Po + -rm -f openmp/$(DEPDIR)/parallel_simple_lock_01.Po + -rm -f openmp/$(DEPDIR)/parallel_single_copyprivate_01.Po + -rm -f openmp/$(DEPDIR)/parallel_single_copyprivate_inline_01.Po + -rm -f openmp/$(DEPDIR)/parallel_single_inline_01.Po + -rm -f openmp/$(DEPDIR)/parallel_single_nowait_01.Po + -rm -f openmp/$(DEPDIR)/parallel_single_wait_01.Po + -rm -f openmp/$(DEPDIR)/task_01.Po + -rm -f openmp/$(DEPDIR)/task_02.Po + -rm -f openmp/$(DEPDIR)/task_03.Po + -rm -f openmp/$(DEPDIR)/taskgroup_01.Po + -rm -f openmp/$(DEPDIR)/taskgroup_02.Po + -rm -f openmp/$(DEPDIR)/taskloop.Po + -rm -f openmp/$(DEPDIR)/taskwait_01.Po + -rm -f overlap/$(DEPDIR)/gpu_concurrency.Po + -rm -f overlap/$(DEPDIR)/overlap.Po + -rm -f parallel_tasks/$(DEPDIR)/combined_worker_assign_workerid.Po + -rm -f parallel_tasks/$(DEPDIR)/cuda_only.Po + -rm -f parallel_tasks/$(DEPDIR)/explicit_combined_worker.Po + -rm -f parallel_tasks/$(DEPDIR)/parallel_kernels.Po + -rm -f parallel_tasks/$(DEPDIR)/parallel_kernels_spmd.Po + -rm -f parallel_tasks/$(DEPDIR)/parallel_kernels_trivial.Po + -rm -f parallel_tasks/$(DEPDIR)/spmd_peager.Po + -rm -f parallel_tasks/$(DEPDIR)/swap.Po + -rm -f perfmodels/$(DEPDIR)/feed.Po + -rm -f perfmodels/$(DEPDIR)/memory.Po + -rm -f perfmodels/$(DEPDIR)/non_linear_regression_based.Po + -rm -f perfmodels/$(DEPDIR)/opencl_memset.Po + -rm -f perfmodels/$(DEPDIR)/path.Po + -rm -f perfmodels/$(DEPDIR)/regression_based_check.Po + -rm -f perfmodels/$(DEPDIR)/regression_based_energy.Po + -rm -f perfmodels/$(DEPDIR)/regression_based_gpu.Po + -rm -f perfmodels/$(DEPDIR)/regression_based_memset.Po + -rm -f perfmodels/$(DEPDIR)/regression_based_multiimpl.Po + -rm -f perfmodels/$(DEPDIR)/user_base.Po + -rm -f perfmodels/$(DEPDIR)/valid_model.Po + -rm -f perfmodels/$(DEPDIR)/value_nan.Po + -rm -f sched_ctx/$(DEPDIR)/sched_ctx_hierarchy.Po + -rm -f sched_ctx/$(DEPDIR)/sched_ctx_list.Po + -rm -f sched_ctx/$(DEPDIR)/sched_ctx_policy_data.Po + -rm -f sched_policies/$(DEPDIR)/data_locality.Po + -rm -f sched_policies/$(DEPDIR)/execute_all_tasks-execute_all_tasks.Po + -rm -f sched_policies/$(DEPDIR)/prio.Po + -rm -f sched_policies/$(DEPDIR)/simple_cpu_gpu_sched.Po + -rm -f sched_policies/$(DEPDIR)/simple_deps.Po + -rm -f sched_policies/$(DEPDIR)/workerids.Po + -rm -f variable/$(DEPDIR)/increment.Po + -rm -f variable/$(DEPDIR)/increment_opencl.Po + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-recursive + +dvi-am: + +html: html-recursive + +html-am: + +info: info-recursive + +info-am: + +install-data-am: install-examplebinPROGRAMS install-examplebinSCRIPTS \ + install-nobase_STARPU_OPENCL_DATADATA + +install-dvi: install-dvi-recursive + +install-dvi-am: + +install-exec-am: + +install-html: install-html-recursive + +install-html-am: + +install-info: install-info-recursive + +install-info-am: + +install-man: + +install-pdf: install-pdf-recursive + +install-pdf-am: + +install-ps: install-ps-recursive + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-recursive + -rm -f ./$(DEPDIR)/loader-loader.Po + -rm -f datawizard/$(DEPDIR)/acquire_cb.Po + -rm -f datawizard/$(DEPDIR)/acquire_cb_insert.Po + -rm -f datawizard/$(DEPDIR)/acquire_release.Po + -rm -f datawizard/$(DEPDIR)/acquire_release2.Po + -rm -f datawizard/$(DEPDIR)/acquire_release_to.Po + -rm -f datawizard/$(DEPDIR)/acquire_try.Po + -rm -f datawizard/$(DEPDIR)/allocate.Po + -rm -f datawizard/$(DEPDIR)/allocate_many_numa_nodes.Po + -rm -f datawizard/$(DEPDIR)/bcsr.Po + -rm -f datawizard/$(DEPDIR)/cache.Po + -rm -f datawizard/$(DEPDIR)/commute.Po + -rm -f datawizard/$(DEPDIR)/commute2.Po + -rm -f datawizard/$(DEPDIR)/copy.Po + -rm -f datawizard/$(DEPDIR)/critical_section_with_void_interface.Po + -rm -f datawizard/$(DEPDIR)/data_deinitialize.Po + -rm -f datawizard/$(DEPDIR)/data_implicit_deps.Po + -rm -f datawizard/$(DEPDIR)/data_invalidation.Po + -rm -f datawizard/$(DEPDIR)/data_register-data_register.Po + -rm -f datawizard/$(DEPDIR)/deinitialize_pending_requests.Po + -rm -f datawizard/$(DEPDIR)/deps.Po + -rm -f datawizard/$(DEPDIR)/dining_philosophers.Po + -rm -f datawizard/$(DEPDIR)/double_parameter.Po + -rm -f datawizard/$(DEPDIR)/dsm_stress.Po + -rm -f datawizard/$(DEPDIR)/gpu_ptr_register.Po + -rm -f datawizard/$(DEPDIR)/gpu_register.Po + -rm -f datawizard/$(DEPDIR)/handle_to_pointer.Po + -rm -f datawizard/$(DEPDIR)/in_place_partition.Po + -rm -f datawizard/$(DEPDIR)/increment_init.Po + -rm -f datawizard/$(DEPDIR)/increment_redux.Po + -rm -f datawizard/$(DEPDIR)/increment_redux_lazy.Po + -rm -f datawizard/$(DEPDIR)/increment_redux_partition.Po + -rm -f datawizard/$(DEPDIR)/increment_redux_v2.Po + -rm -f datawizard/$(DEPDIR)/increment_redux_with_args.Po + -rm -f datawizard/$(DEPDIR)/invalidate_pending_requests.Po + -rm -f datawizard/$(DEPDIR)/lazy_allocation.Po + -rm -f datawizard/$(DEPDIR)/locality.Po + -rm -f datawizard/$(DEPDIR)/manual_reduction.Po + -rm -f datawizard/$(DEPDIR)/mpi_like.Po + -rm -f datawizard/$(DEPDIR)/mpi_like_async.Po + -rm -f datawizard/$(DEPDIR)/no_unregister.Po + -rm -f datawizard/$(DEPDIR)/noreclaim.Po + -rm -f datawizard/$(DEPDIR)/nowhere.Po + -rm -f datawizard/$(DEPDIR)/numa_overflow.Po + -rm -f datawizard/$(DEPDIR)/partition_dep.Po + -rm -f datawizard/$(DEPDIR)/partition_init.Po + -rm -f datawizard/$(DEPDIR)/partition_lazy.Po + -rm -f datawizard/$(DEPDIR)/partition_wontuse.Po + -rm -f datawizard/$(DEPDIR)/partitioned_acquire.Po + -rm -f datawizard/$(DEPDIR)/partitioned_initialization.Po + -rm -f datawizard/$(DEPDIR)/readers_and_writers.Po + -rm -f datawizard/$(DEPDIR)/readonly.Po + -rm -f datawizard/$(DEPDIR)/reclaim.Po + -rm -f datawizard/$(DEPDIR)/redux_acquire.Po + -rm -f datawizard/$(DEPDIR)/scal.Po + -rm -f datawizard/$(DEPDIR)/scratch.Po + -rm -f datawizard/$(DEPDIR)/scratch_opencl.Po + -rm -f datawizard/$(DEPDIR)/scratch_reuse.Po + -rm -f datawizard/$(DEPDIR)/simgrid-locality.Po + -rm -f datawizard/$(DEPDIR)/specific_node.Po + -rm -f datawizard/$(DEPDIR)/specific_node_same.Po + -rm -f datawizard/$(DEPDIR)/sync_and_notify_data.Po + -rm -f datawizard/$(DEPDIR)/sync_and_notify_data_implicit.Po + -rm -f datawizard/$(DEPDIR)/sync_and_notify_data_opencl.Po + -rm -f datawizard/$(DEPDIR)/sync_with_data_with_mem.Po + -rm -f datawizard/$(DEPDIR)/sync_with_data_with_mem_non_blocking.Po + -rm -f datawizard/$(DEPDIR)/sync_with_data_with_mem_non_blocking_implicit.Po + -rm -f datawizard/$(DEPDIR)/task_with_multiple_time_the_same_handle.Po + -rm -f datawizard/$(DEPDIR)/temporary_partition.Po + -rm -f datawizard/$(DEPDIR)/temporary_partition_implicit.Po + -rm -f datawizard/$(DEPDIR)/temporary_partition_read.Po + -rm -f datawizard/$(DEPDIR)/test_arbiter.Po + -rm -f datawizard/$(DEPDIR)/unpartition.Po + -rm -f datawizard/$(DEPDIR)/user_interaction_implicit.Po + -rm -f datawizard/$(DEPDIR)/variable_parameters.Po + -rm -f datawizard/$(DEPDIR)/variable_size.Po + -rm -f datawizard/$(DEPDIR)/write_only_tmp_buffer.Po + -rm -f datawizard/$(DEPDIR)/wt_broadcast.Po + -rm -f datawizard/$(DEPDIR)/wt_host.Po + -rm -f datawizard/interfaces/$(DEPDIR)/bcsr_bcsr_interface-test_interfaces.Po + -rm -f datawizard/interfaces/$(DEPDIR)/block_block_interface-test_interfaces.Po + -rm -f datawizard/interfaces/$(DEPDIR)/coo_coo_interface-test_interfaces.Po + -rm -f datawizard/interfaces/$(DEPDIR)/copy_interfaces-copy_interfaces.Po + -rm -f datawizard/interfaces/$(DEPDIR)/csr_csr_interface-test_interfaces.Po + -rm -f datawizard/interfaces/$(DEPDIR)/matrix_matrix_interface-test_interfaces.Po + -rm -f datawizard/interfaces/$(DEPDIR)/multiformat_multiformat_interface-test_interfaces.Po + -rm -f datawizard/interfaces/$(DEPDIR)/ndim_ndim_interface-test_interfaces.Po + -rm -f datawizard/interfaces/$(DEPDIR)/tensor_tensor_interface-test_interfaces.Po + -rm -f datawizard/interfaces/$(DEPDIR)/variable_variable_interface-test_interfaces.Po + -rm -f datawizard/interfaces/$(DEPDIR)/vector_vector_interface-test_interfaces.Po + -rm -f datawizard/interfaces/$(DEPDIR)/void_void_interface-test_interfaces.Po + -rm -f datawizard/interfaces/bcsr/$(DEPDIR)/bcsr_interface-bcsr_interface.Po + -rm -f datawizard/interfaces/bcsr/$(DEPDIR)/bcsr_interface-bcsr_opencl.Po + -rm -f datawizard/interfaces/block/$(DEPDIR)/block_interface-block_interface.Po + -rm -f datawizard/interfaces/block/$(DEPDIR)/block_interface-block_opencl.Po + -rm -f datawizard/interfaces/coo/$(DEPDIR)/coo_interface-coo_interface.Po + -rm -f datawizard/interfaces/coo/$(DEPDIR)/coo_interface-coo_opencl.Po + -rm -f datawizard/interfaces/csr/$(DEPDIR)/csr_interface-csr_interface.Po + -rm -f datawizard/interfaces/csr/$(DEPDIR)/csr_interface-csr_opencl.Po + -rm -f datawizard/interfaces/matrix/$(DEPDIR)/matrix_interface-matrix_interface.Po + -rm -f datawizard/interfaces/matrix/$(DEPDIR)/matrix_interface-matrix_opencl.Po + -rm -f datawizard/interfaces/multiformat/$(DEPDIR)/multiformat_interface-multiformat_conversion_codelets.Po + -rm -f datawizard/interfaces/multiformat/$(DEPDIR)/multiformat_interface-multiformat_conversion_codelets_opencl.Po + -rm -f datawizard/interfaces/multiformat/$(DEPDIR)/multiformat_interface-multiformat_interface.Po + -rm -f datawizard/interfaces/multiformat/$(DEPDIR)/multiformat_interface-multiformat_opencl.Po + -rm -f datawizard/interfaces/multiformat/advanced/$(DEPDIR)/generic.Po + -rm -f datawizard/interfaces/multiformat/advanced/$(DEPDIR)/multiformat_cuda_opencl.Po + -rm -f datawizard/interfaces/multiformat/advanced/$(DEPDIR)/multiformat_data_release.Po + -rm -f datawizard/interfaces/multiformat/advanced/$(DEPDIR)/multiformat_handle_conversion.Po + -rm -f datawizard/interfaces/multiformat/advanced/$(DEPDIR)/multiformat_worker.Po + -rm -f datawizard/interfaces/multiformat/advanced/$(DEPDIR)/same_handle.Po + -rm -f datawizard/interfaces/ndim/$(DEPDIR)/ndim_interface-ndim_interface.Po + -rm -f datawizard/interfaces/ndim/$(DEPDIR)/ndim_interface-ndim_opencl.Po + -rm -f datawizard/interfaces/tensor/$(DEPDIR)/tensor_interface-tensor_interface.Po + -rm -f datawizard/interfaces/tensor/$(DEPDIR)/tensor_interface-tensor_opencl.Po + -rm -f datawizard/interfaces/variable/$(DEPDIR)/variable_interface-variable_interface.Po + -rm -f datawizard/interfaces/variable/$(DEPDIR)/variable_interface-variable_opencl.Po + -rm -f datawizard/interfaces/vector/$(DEPDIR)/vector_interface-vector_interface.Po + -rm -f datawizard/interfaces/vector/$(DEPDIR)/vector_interface-vector_opencl.Po + -rm -f datawizard/interfaces/void/$(DEPDIR)/void_interface-void_interface.Po + -rm -f disk/$(DEPDIR)/disk_compute.Po + -rm -f disk/$(DEPDIR)/disk_copy.Po + -rm -f disk/$(DEPDIR)/disk_copy_to_disk.Po + -rm -f disk/$(DEPDIR)/disk_copy_unpack.Po + -rm -f disk/$(DEPDIR)/disk_pack.Po + -rm -f disk/$(DEPDIR)/mem_reclaim.Po + -rm -f energy/$(DEPDIR)/energy_efficiency.Po + -rm -f errorcheck/$(DEPDIR)/invalid_blocking_calls.Po + -rm -f errorcheck/$(DEPDIR)/invalid_tasks.Po + -rm -f errorcheck/$(DEPDIR)/starpu_init_noworker.Po + -rm -f errorcheck/$(DEPDIR)/workers_cpuid.Po + -rm -f fault-tolerance/$(DEPDIR)/retry.Po + -rm -f helper/$(DEPDIR)/cublasLt_init.Po + -rm -f helper/$(DEPDIR)/cublas_init.Po + -rm -f helper/$(DEPDIR)/cusparse_init.Po + -rm -f helper/$(DEPDIR)/execute_on_all.Po + -rm -f helper/$(DEPDIR)/hipblas_init.Po + -rm -f helper/$(DEPDIR)/pinned_memory.Po + -rm -f helper/$(DEPDIR)/starpu_create_sync_task.Po + -rm -f helper/$(DEPDIR)/starpu_data_cpy.Po + -rm -f helper/$(DEPDIR)/starpu_data_dup_ro.Po + -rm -f main/$(DEPDIR)/bind.Po + -rm -f main/$(DEPDIR)/callback.Po + -rm -f main/$(DEPDIR)/codelet_null_callback.Po + -rm -f main/$(DEPDIR)/const_codelet.Po + -rm -f main/$(DEPDIR)/deadlock.Po + -rm -f main/$(DEPDIR)/declare_deps_after_submission.Po + -rm -f main/$(DEPDIR)/declare_deps_after_submission_synchronous.Po + -rm -f main/$(DEPDIR)/declare_deps_in_callback.Po + -rm -f main/$(DEPDIR)/deploop.Po + -rm -f main/$(DEPDIR)/deprecated_func-deprecated_func.Po + -rm -f main/$(DEPDIR)/display_binding.Po + -rm -f main/$(DEPDIR)/empty_task.Po + -rm -f main/$(DEPDIR)/empty_task_chain.Po + -rm -f main/$(DEPDIR)/empty_task_sync_point.Po + -rm -f main/$(DEPDIR)/empty_task_sync_point_tasks.Po + -rm -f main/$(DEPDIR)/execute_on_a_specific_worker.Po + -rm -f main/$(DEPDIR)/execute_schedule.Po + -rm -f main/$(DEPDIR)/get_children_tasks.Po + -rm -f main/$(DEPDIR)/get_current_task.Po + -rm -f main/$(DEPDIR)/hwloc_cpuset.Po + -rm -f main/$(DEPDIR)/insert_task.Po + -rm -f main/$(DEPDIR)/insert_task_array.Po + -rm -f main/$(DEPDIR)/insert_task_dyn_handles.Po + -rm -f main/$(DEPDIR)/insert_task_many.Po + -rm -f main/$(DEPDIR)/insert_task_nullcodelet.Po + -rm -f main/$(DEPDIR)/insert_task_pack.Po + -rm -f main/$(DEPDIR)/insert_task_value.Po + -rm -f main/$(DEPDIR)/insert_task_where.Po + -rm -f main/$(DEPDIR)/job.Po + -rm -f main/$(DEPDIR)/mkdtemp.Po + -rm -f main/$(DEPDIR)/multithreaded.Po + -rm -f main/$(DEPDIR)/multithreaded_init.Po + -rm -f main/$(DEPDIR)/pack.Po + -rm -f main/$(DEPDIR)/pause_resume.Po + -rm -f main/$(DEPDIR)/regenerate.Po + -rm -f main/$(DEPDIR)/regenerate_pipeline.Po + -rm -f main/$(DEPDIR)/restart.Po + -rm -f main/$(DEPDIR)/starpu_init.Po + -rm -f main/$(DEPDIR)/starpu_task_bundle.Po + -rm -f main/$(DEPDIR)/starpu_task_wait.Po + -rm -f main/$(DEPDIR)/starpu_task_wait_for_all.Po + -rm -f main/$(DEPDIR)/starpu_worker_exists-starpu_worker_exists.Po + -rm -f main/$(DEPDIR)/static_restartable.Po + -rm -f main/$(DEPDIR)/static_restartable_tag.Po + -rm -f main/$(DEPDIR)/static_restartable_using_initializer.Po + -rm -f main/$(DEPDIR)/subgraph_repeat.Po + -rm -f main/$(DEPDIR)/subgraph_repeat_regenerate.Po + -rm -f main/$(DEPDIR)/subgraph_repeat_regenerate_tag.Po + -rm -f main/$(DEPDIR)/subgraph_repeat_regenerate_tag_cycle.Po + -rm -f main/$(DEPDIR)/subgraph_repeat_tag.Po + -rm -f main/$(DEPDIR)/submit.Po + -rm -f main/$(DEPDIR)/tag_get_task.Po + -rm -f main/$(DEPDIR)/tag_task_data_deps.Po + -rm -f main/$(DEPDIR)/tag_wait_api.Po + -rm -f main/$(DEPDIR)/task_end_dep.Po + -rm -f main/$(DEPDIR)/task_wait_api.Po + -rm -f main/$(DEPDIR)/wait_all_regenerable_tasks.Po + -rm -f main/driver_api/$(DEPDIR)/init_run_deinit.Po + -rm -f main/driver_api/$(DEPDIR)/run_driver.Po + -rm -f maxfpga/$(DEPDIR)/max_fpga_advanced_static.Po + -rm -f maxfpga/$(DEPDIR)/max_fpga_basic_static.Po + -rm -f maxfpga/$(DEPDIR)/max_fpga_dynamic.Po + -rm -f maxfpga/$(DEPDIR)/max_fpga_mux.Po + -rm -f microbenchs/$(DEPDIR)/async_tasks_overhead.Po + -rm -f microbenchs/$(DEPDIR)/bandwidth.Po + -rm -f microbenchs/$(DEPDIR)/display_structures_size.Po + -rm -f microbenchs/$(DEPDIR)/local_pingpong.Po + -rm -f microbenchs/$(DEPDIR)/matrix_as_vector.Po + -rm -f microbenchs/$(DEPDIR)/parallel_dependent_homogeneous_tasks_data.Po + -rm -f microbenchs/$(DEPDIR)/parallel_independent_heterogeneous_tasks.Po + -rm -f microbenchs/$(DEPDIR)/parallel_independent_heterogeneous_tasks_data.Po + -rm -f microbenchs/$(DEPDIR)/parallel_independent_homogeneous_tasks.Po + -rm -f microbenchs/$(DEPDIR)/parallel_independent_homogeneous_tasks_data.Po + -rm -f microbenchs/$(DEPDIR)/parallel_redux_heterogeneous_tasks_data.Po + -rm -f microbenchs/$(DEPDIR)/parallel_redux_homogeneous_tasks_data.Po + -rm -f microbenchs/$(DEPDIR)/prefetch_data_on_node.Po + -rm -f microbenchs/$(DEPDIR)/redundant_buffer.Po + -rm -f microbenchs/$(DEPDIR)/sync_tasks_overhead.Po + -rm -f microbenchs/$(DEPDIR)/tasks_overhead.Po + -rm -f microbenchs/$(DEPDIR)/tasks_size_overhead.Po + -rm -f openmp/$(DEPDIR)/api_01.Po + -rm -f openmp/$(DEPDIR)/array_slice_01.Po + -rm -f openmp/$(DEPDIR)/cuda_task_01.Po + -rm -f openmp/$(DEPDIR)/environment.Po + -rm -f openmp/$(DEPDIR)/init_exit_01.Po + -rm -f openmp/$(DEPDIR)/init_exit_02.Po + -rm -f openmp/$(DEPDIR)/parallel_01.Po + -rm -f openmp/$(DEPDIR)/parallel_02.Po + -rm -f openmp/$(DEPDIR)/parallel_03.Po + -rm -f openmp/$(DEPDIR)/parallel_barrier_01.Po + -rm -f openmp/$(DEPDIR)/parallel_critical_01.Po + -rm -f openmp/$(DEPDIR)/parallel_critical_inline_01.Po + -rm -f openmp/$(DEPDIR)/parallel_critical_named_01.Po + -rm -f openmp/$(DEPDIR)/parallel_critical_named_inline_01.Po + -rm -f openmp/$(DEPDIR)/parallel_for_01.Po + -rm -f openmp/$(DEPDIR)/parallel_for_02.Po + -rm -f openmp/$(DEPDIR)/parallel_for_ordered_01.Po + -rm -f openmp/$(DEPDIR)/parallel_master_01.Po + -rm -f openmp/$(DEPDIR)/parallel_master_inline_01.Po + -rm -f openmp/$(DEPDIR)/parallel_nested_lock_01.Po + -rm -f openmp/$(DEPDIR)/parallel_sections_01.Po + -rm -f openmp/$(DEPDIR)/parallel_sections_combined_01.Po + -rm -f openmp/$(DEPDIR)/parallel_simple_lock_01.Po + -rm -f openmp/$(DEPDIR)/parallel_single_copyprivate_01.Po + -rm -f openmp/$(DEPDIR)/parallel_single_copyprivate_inline_01.Po + -rm -f openmp/$(DEPDIR)/parallel_single_inline_01.Po + -rm -f openmp/$(DEPDIR)/parallel_single_nowait_01.Po + -rm -f openmp/$(DEPDIR)/parallel_single_wait_01.Po + -rm -f openmp/$(DEPDIR)/task_01.Po + -rm -f openmp/$(DEPDIR)/task_02.Po + -rm -f openmp/$(DEPDIR)/task_03.Po + -rm -f openmp/$(DEPDIR)/taskgroup_01.Po + -rm -f openmp/$(DEPDIR)/taskgroup_02.Po + -rm -f openmp/$(DEPDIR)/taskloop.Po + -rm -f openmp/$(DEPDIR)/taskwait_01.Po + -rm -f overlap/$(DEPDIR)/gpu_concurrency.Po + -rm -f overlap/$(DEPDIR)/overlap.Po + -rm -f parallel_tasks/$(DEPDIR)/combined_worker_assign_workerid.Po + -rm -f parallel_tasks/$(DEPDIR)/cuda_only.Po + -rm -f parallel_tasks/$(DEPDIR)/explicit_combined_worker.Po + -rm -f parallel_tasks/$(DEPDIR)/parallel_kernels.Po + -rm -f parallel_tasks/$(DEPDIR)/parallel_kernels_spmd.Po + -rm -f parallel_tasks/$(DEPDIR)/parallel_kernels_trivial.Po + -rm -f parallel_tasks/$(DEPDIR)/spmd_peager.Po + -rm -f parallel_tasks/$(DEPDIR)/swap.Po + -rm -f perfmodels/$(DEPDIR)/feed.Po + -rm -f perfmodels/$(DEPDIR)/memory.Po + -rm -f perfmodels/$(DEPDIR)/non_linear_regression_based.Po + -rm -f perfmodels/$(DEPDIR)/opencl_memset.Po + -rm -f perfmodels/$(DEPDIR)/path.Po + -rm -f perfmodels/$(DEPDIR)/regression_based_check.Po + -rm -f perfmodels/$(DEPDIR)/regression_based_energy.Po + -rm -f perfmodels/$(DEPDIR)/regression_based_gpu.Po + -rm -f perfmodels/$(DEPDIR)/regression_based_memset.Po + -rm -f perfmodels/$(DEPDIR)/regression_based_multiimpl.Po + -rm -f perfmodels/$(DEPDIR)/user_base.Po + -rm -f perfmodels/$(DEPDIR)/valid_model.Po + -rm -f perfmodels/$(DEPDIR)/value_nan.Po + -rm -f sched_ctx/$(DEPDIR)/sched_ctx_hierarchy.Po + -rm -f sched_ctx/$(DEPDIR)/sched_ctx_list.Po + -rm -f sched_ctx/$(DEPDIR)/sched_ctx_policy_data.Po + -rm -f sched_policies/$(DEPDIR)/data_locality.Po + -rm -f sched_policies/$(DEPDIR)/execute_all_tasks-execute_all_tasks.Po + -rm -f sched_policies/$(DEPDIR)/prio.Po + -rm -f sched_policies/$(DEPDIR)/simple_cpu_gpu_sched.Po + -rm -f sched_policies/$(DEPDIR)/simple_deps.Po + -rm -f sched_policies/$(DEPDIR)/workerids.Po + -rm -f variable/$(DEPDIR)/increment.Po + -rm -f variable/$(DEPDIR)/increment_opencl.Po + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-recursive + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-recursive + +pdf-am: + +ps: ps-recursive + +ps-am: + +uninstall-am: uninstall-examplebinPROGRAMS uninstall-examplebinSCRIPTS \ + uninstall-nobase_STARPU_OPENCL_DATADATA + +.MAKE: $(am__recursive_targets) all check check-am install install-am \ + install-exec install-strip + +.PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am \ + am--depfiles check check-TESTS check-am clean \ + clean-checkPROGRAMS clean-examplebinPROGRAMS clean-generic \ + clean-libtool clean-local clean-noinstPROGRAMS cscopelist-am \ + ctags ctags-am distclean distclean-compile distclean-generic \ + distclean-libtool distclean-tags distdir dvi dvi-am html \ + html-am info info-am install install-am install-data \ + install-data-am install-dvi install-dvi-am \ + install-examplebinPROGRAMS install-examplebinSCRIPTS \ + install-exec install-exec-am install-html install-html-am \ + install-info install-info-am install-man \ + install-nobase_STARPU_OPENCL_DATADATA install-pdf \ + install-pdf-am install-ps install-ps-am install-strip \ + installcheck installcheck-am installdirs installdirs-am \ + maintainer-clean maintainer-clean-generic mostlyclean \ + mostlyclean-compile mostlyclean-generic mostlyclean-libtool \ + pdf pdf-am ps ps-am recheck tags tags-am uninstall \ + uninstall-am uninstall-examplebinPROGRAMS \ + uninstall-examplebinSCRIPTS \ + uninstall-nobase_STARPU_OPENCL_DATADATA + +.PRECIOUS: Makefile + +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@.cu.o: +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ @$(MKDIR_P) `dirname $@` +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ $(V_mynvcc)grep 'extern *"C" *void *' $< | sed -ne 's/extern *"C" *void *\([a-zA-Z0-9_]*\) *(.*/void \1(void) {}/p' | $(CC) -x c - -o $@ -c + +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.cubin: +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) -cubin $< -o $@ $(NVCCFLAGS) + +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.o: +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) $< -c -o $@ $(NVCCFLAGS) +@STARPU_USE_HIP_TRUE@.hip.o: +@STARPU_USE_HIP_TRUE@ $(V_hipcc) $(HIPCC) $< -c -o $@ $(HIPCCFLAGS) + +STARPU_MPI_NP ?= 4 + +showcheckfailed: + @ for x in $(shell grep -l "^FAIL " $(TEST_LOGS) /dev/null 2>/dev/null) ; do cat $$x ; done + @RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i showcheckfailed || RET=1 ; \ + done ; \ + exit $$RET + +showfailed: + @! grep "^FAIL " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "ERROR: AddressSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "WARNING: AddressSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "ERROR: ThreadSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "WARNING: ThreadSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "ERROR: LeakSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "WARNING: LeakSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l " runtime error: " $(TEST_LOGS) /dev/null 2>/dev/null + @RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -s -C $$i showfailed || RET=1 ; \ + done ; \ + exit $$RET + +showcheck: + -cat $(TEST_LOGS) /dev/null + @! grep -q "ERROR: AddressSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q "WARNING: AddressSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q "ERROR: ThreadSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q "WARNING: ThreadSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q "ERROR: LeakSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q "WARNING: LeakSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q " runtime error: " $(TEST_LOGS) /dev/null + RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i showcheck || RET=1 ; \ + done ; \ + exit $$RET + +showsuite: + -cat $(TEST_SUITE_LOG) /dev/null + @! grep -q "ERROR: AddressSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q "WARNING: AddressSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q "ERROR: ThreadSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q "WARNING: ThreadSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q "ERROR: LeakSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q "WARNING: LeakSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q " runtime error: " $(TEST_SUITE_LOG) /dev/null + RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i showsuite || RET=1 ; \ + done ; \ + exit $$RET + +@STARPU_SIMGRID_TRUE@export STARPU_PERF_MODEL_DIR=$(abs_top_srcdir)/tools/perfmodels/sampling +@STARPU_SIMGRID_TRUE@export STARPU_HOSTNAME=mirage +@STARPU_SIMGRID_TRUE@export MALLOC_PERTURB_=0 + +@STARPU_SIMGRID_TRUE@env: +@STARPU_SIMGRID_TRUE@ @echo export STARPU_PERF_MODEL_DIR=$(STARPU_PERF_MODEL_DIR) +@STARPU_SIMGRID_TRUE@ @echo export STARPU_HOSTNAME=$(STARPU_HOSTNAME) +@STARPU_SIMGRID_TRUE@ @echo export MALLOC_PERTURB_=$(MALLOC_PERTURB_) + +@STARPU_SIMGRID_TRUE@export STARPU_SIMGRID=1 + +@STARPU_QUICK_CHECK_TRUE@export STARPU_QUICK_CHECK=1 + +@STARPU_LONG_CHECK_TRUE@export STARPU_LONG_CHECK=1 + +# +# Test loading goes through a lot of launchers: +# +# - $(LAUNCHER) is called first, to run the test through starpu_msexec, i.e. +# either mpirun or starpu_tcpipexec +# +# - $(LOADER), i.e. tests/loader, is then called to implement timeout, running +# gdb, etc. But if it detects that the test is a .sh script, it just executes +# it +# +# - $(STARPU_CHECK_LAUNCHER) $(STARPU_CHECK_LAUNCHER_ARGS) is called by loader +# to run the program through e.g. valgrind.sh +# +# When the program is a shell script, additionally: +# +# - $(STARPU_SUB_PARALLEL) is called to control parallelism (see below) +# +# - $(MS_LAUNCHER) is called to run the test through starpu_msexec +# +# - $(STARPU_LAUNCH) was set by tests/loader to its own path, to run the program +# through it. +# +# - $(STARPU_CHECK_LAUNCHER) $(STARPU_CHECK_LAUNCHER_ARGS) is called by loader +# + +export LAUNCHER +@HAVE_PARALLEL_TRUE@export STARPU_SUB_PARALLEL + +export MS_LAUNCHER + +LAUNCHER ?= +MS_LAUNCHER ?= +@STARPU_HAVE_WINDOWS_FALSE@LOADER ?= ./loader + +LSAN_OPTIONS ?= suppressions=$(abs_top_srcdir)/tools/dev/lsan/suppressions +TSAN_OPTIONS ?= suppressions=$(abs_top_srcdir)/tools/dev/tsan/starpu.suppr +export LSAN_OPTIONS +export TSAN_OPTIONS + +clean-local: + -rm -rf overlap/overlap.traces datawizard/locality.traces +maxfpga/max_fpga_basic_static.o: maxfpga/MyTasks.max +maxfpga/max_fpga_advanced_static.o: maxfpga/MyTasks.max +maxfpga/max_fpga_dynamic.o: maxfpga/MyTasks.max +maxfpga/max_fpga_mux.o: maxfpga/MyTasksMux.max + +# Fortran90 tests + +# - link over source file to build our own object +fortran90/starpu_mod.f90: + @$(MKDIR_P) $(dir $@) + $(V_ln) $(LN_S) $(abs_top_srcdir)/include/$(notdir $@) $@ + +# - express the creation of .mod along .o +@STARPU_HAVE_FC_TRUE@starpu_mod.mod: fortran90/starpu_mod.o + +# - list explicit dependences to control proper module files dependencies +@STARPU_HAVE_FC_TRUE@fortran90/init_01.o: starpu_mod.mod + +# Maxeler compiler +# or DFE for hardware execution +@STARPU_USE_MAX_FPGA_TRUE@MAX_TARGET ?= DFE_SIM +@STARPU_USE_MAX_FPGA_TRUE@$(CLASS) &: $(MAXJ) +@STARPU_USE_MAX_FPGA_TRUE@ maxjc -1.7 -cp $$MAXCLASSPATH $(dir $<) + +@STARPU_USE_MAX_FPGA_TRUE@%.max: %Manager.class +@STARPU_USE_MAX_FPGA_TRUE@ CLASSPATH=$$CLASSPATH:. maxJavaRun $(shell dirname $*).$(notdir $*)Manager DFEModel=MAIA maxFileName=$(notdir $*) target=$(MAX_TARGET) +@STARPU_USE_MAX_FPGA_TRUE@ cp $(notdir $*)_$(MAX_DFE)/results/$(notdir $*).{max,h} $(dir $@) + +@STARPU_USE_MAX_FPGA_TRUE@slic_%.o: %.max +@STARPU_USE_MAX_FPGA_TRUE@ sliccompile $< $@ + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/tests/coverage/coverage.sh b/tests/coverage/coverage.sh new file mode 100755 index 0000000..8d7c585 --- /dev/null +++ b/tests/coverage/coverage.sh @@ -0,0 +1,141 @@ +#!/bin/bash +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +exampledir=../examples/ + +check_success() +{ + if [ $1 -eq 77 ] ; then + echo "skip" + exit $1 + elif [ $1 != 0 ] ; then + echo "failure" + exit $1 + fi +} + +apps() +{ + if [ -f $exampledir/basic_examples/block ] ; then + echo "block opencl" + STARPU_NCUDA=0 STARPU_NCPUS=0 $MS_LAUNCHER $STARPU_LAUNCH $exampledir/basic_examples/block + check_success $? + fi + + if [ -f $exampledir/basic_examples/variable ] ; then + echo "variable opencl" + STARPU_NCUDA=0 STARPU_NCPUS=0 $MS_LAUNCHER $STARPU_LAUNCH $exampledir/basic_examples/variable 100 + check_success $? + + echo "variable no worker" + STARPU_NCUDA=0 STARPU_NOPENCL=0 STARPU_NCPUS=0 $MS_LAUNCHER $STARPU_LAUNCH $exampledir/basic_examples/variable + check_success $? + fi + + if [ -f $exampledir/incrementer/incrementer ] ; then + echo "incrementer opencl" + STARPU_NCUDA=0 STARPU_NCPUS=0 $MS_LAUNCHER $STARPU_LAUNCH $exampledir/incrementer/incrementer 10 + check_success $? + + echo "incrementer no worker" + STARPU_NCUDA=0 STARPU_NOPENCL=0 STARPU_NCPUS=0 $MS_LAUNCHER $STARPU_LAUNCH $exampledir/incrementer/incrementer + check_success $? + fi + + if [ -f $exampledir/tag_example/tag_example ] ; then + echo "tag_example" + $MS_LAUNCHER $STARPU_LAUNCH $exampledir/tag_example/tag_example -iter 64 -i 128 -j 24 + check_success $? + fi + + if [ -f $exampledir/tag_example/tag_example2 ] ; then + echo "tag_example2" + $MS_LAUNCHER $STARPU_LAUNCH $exampledir/tag_example/tag_example2 -iter 64 -i 128 + check_success $? + fi + + if [ -f $exampledir/cholesky/dw_cholesky ] ; then + echo "chol.dm" + STARPU_CALIBRATE=1 STARPU_SCHED="dm" $MS_LAUNCHER $STARPU_LAUNCH $exampledir/cholesky/dw_cholesky -pin + check_success $? + + echo "chol.dmda" + STARPU_CALIBRATE=1 STARPU_SCHED="dmda" $MS_LAUNCHER $STARPU_LAUNCH $exampledir/cholesky/dw_cholesky -pin + check_success $? + + echo "chol.cpu" + STARPU_CALIBRATE=1 STARPU_NCUDA=0 STARPU_SCHED="dm" $MS_LAUNCHER $STARPU_LAUNCH $exampledir/cholesky/dw_cholesky -pin + check_success $? + + echo "chol.gpu" + STARPU_CALIBRATE=1 STARPU_NCPUS=0 STARPU_SCHED="dm" $MS_LAUNCHER $STARPU_LAUNCH $exampledir/cholesky/dw_cholesky -pin + check_success $? + fi + + if [ -f $exampledir/heat/heat ] ; then + echo "heat.dm.4k.calibrate.v2" + STARPU_CALIBRATE=1 STARPU_SCHED="dm" $MS_LAUNCHER $STARPU_LAUNCH $exampledir/heat/heat -ntheta 66 -nthick 66 -nblocks 4 -v2 -pin + check_success $? + + echo "heat.dm.8k.calibrate.v2" + STARPU_CALIBRATE=1 STARPU_SCHED="dm" $MS_LAUNCHER $STARPU_LAUNCH $exampledir/heat/heat -ntheta 66 -nthick 130 -nblocks 8 -v2 -pin + check_success $? + + echo "heat.dm.8k.no.pin.v2" + STARPU_SCHED="dm" $MS_LAUNCHER $STARPU_LAUNCH $exampledir/heat/heat -ntheta 66 -nthick 130 -nblocks 8 -v2 + check_success $? + +# echo "heat.dm.8k.v2.no.prio" +# STARPU_SCHED="no-prio" $MS_LAUNCHER $STARPU_LAUNCH $exampledir/heat/heat -ntheta 66 -nthick 130 -nblocks 8 -pin -v2 +# check_success $? + + echo "heat.dm.8k.v2.random" + STARPU_SCHED="random" $MS_LAUNCHER $STARPU_LAUNCH $exampledir/heat/heat -ntheta 66 -nthick 130 -nblocks 8 -pin -v2 + check_success $? + + echo "heat.dm.8k.v2" + STARPU_SCHED="dm" $MS_LAUNCHER $STARPU_LAUNCH $exampledir/heat/heat -ntheta 66 -nthick 130 -nblocks 8 -pin -v2 + check_success $? + + echo "heat.greedy.8k.v2" + STARPU_SCHED="greedy" $MS_LAUNCHER $STARPU_LAUNCH $exampledir/heat/heat -ntheta 66 -nthick 130 -nblocks 8 -pin -v2 + check_success $? + + echo "heat.8k.cg" + $MS_LAUNCHER $STARPU_LAUNCH $exampledir/heat/heat -ntheta 66 -nthick 130 -nblocks 8 -pin -v2 -cg + check_success $? + + echo "heat.dm.8k.cg" + STARPU_SCHED="dm" $MS_LAUNCHER $STARPU_LAUNCH $exampledir/heat/heat -ntheta 66 -nthick 130 -nblocks 8 -pin -v2 -cg + check_success $? + fi + + if [ -f $exampledir/mult/dw_mult_no_stride ] ; then + echo "mult.dm.common" + STARPU_SCHED="dm" $MS_LAUNCHER $STARPU_LAUNCH $exampledir/mult/dw_mult_no_stride -nblocks 4 -x 4096 -y 4096 -z 1024 -pin -common-model + check_success $? + + echo "mult.dm" + STARPU_CALIBRATE=1 STARPU_SCHED="dm" $MS_LAUNCHER $STARPU_LAUNCH $exampledir/mult/dw_mult_no_stride -nblocks 8 -x 4096 -y 4096 -z 4096 -pin + check_success $? + + echo "mult.dmda" + STARPU_CALIBRATE=1 STARPU_SCHED="dmda" $MS_LAUNCHER $STARPU_LAUNCH $exampledir/mult/dw_mult_no_stride -nblocks 8 -x 4096 -y 4096 -z 4096 -pin + check_success $? + fi +} + +apps; diff --git a/tests/datawizard/acquire_cb.c b/tests/datawizard/acquire_cb.c new file mode 100644 index 0000000..c2eacb1 --- /dev/null +++ b/tests/datawizard/acquire_cb.c @@ -0,0 +1,107 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../helper.h" + +/* + * Test that when using starpu_data_acquire_cb, the callback_w is properly called + */ + +unsigned token = 0; +starpu_data_handle_t token_handle; + +static +void callback_w(void *arg) +{ + (void)arg; + token = 42; + starpu_data_release(token_handle); +} + +static +void callback_r(void *arg) +{ + (void)arg; + starpu_data_release(token_handle); +} + +int main(int argc, char **argv) +{ + int ret; + struct starpu_conf conf; + + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + conf.ncpus = -1; + conf.nmpi_ms = -1; + conf.ntcpip_ms = -1; + + ret = starpu_initialize(&conf, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + starpu_variable_data_register(&token_handle, -1, 0, sizeof(unsigned)); + starpu_data_acquire_cb(token_handle, STARPU_W, callback_w, NULL); + starpu_data_acquire_cb(token_handle, STARPU_R, callback_r, NULL); + starpu_data_unregister(token_handle); + STARPU_ASSERT(token == 42); + + token = 0; + + starpu_variable_data_register(&token_handle, -1, 0, sizeof(unsigned)); + starpu_data_acquire(token_handle, STARPU_W); + starpu_data_acquire_cb(token_handle, STARPU_R, callback_r, NULL); + starpu_data_release(token_handle); + starpu_data_unregister(token_handle); + + token = 0; + + starpu_variable_data_register(&token_handle, STARPU_MAIN_RAM, (uintptr_t)&token, sizeof(unsigned)); + /* These are getting executed immediately */ + starpu_data_acquire_cb(token_handle, STARPU_R, callback_r, NULL); + starpu_data_acquire_cb(token_handle, STARPU_R, callback_r, NULL); + starpu_data_acquire_cb(token_handle, STARPU_W, callback_w, NULL); + starpu_data_acquire_cb(token_handle, STARPU_W, callback_w, NULL); + starpu_data_acquire_cb(token_handle, STARPU_R, callback_r, NULL); + starpu_data_acquire_cb(token_handle, STARPU_R, callback_r, NULL); + starpu_data_acquire_cb(token_handle, STARPU_RW, callback_w, NULL); + starpu_data_acquire_cb(token_handle, STARPU_RW, callback_w, NULL); + starpu_data_acquire_cb(token_handle, STARPU_R, callback_r, NULL); + starpu_data_acquire_cb(token_handle, STARPU_R, callback_r, NULL); + + starpu_data_acquire(token_handle, STARPU_W); + /* These will wait for our release */ + starpu_data_acquire_cb(token_handle, STARPU_R, callback_r, NULL); + starpu_data_acquire_cb(token_handle, STARPU_R, callback_r, NULL); + starpu_data_acquire_cb(token_handle, STARPU_W, callback_w, NULL); + starpu_data_acquire_cb(token_handle, STARPU_W, callback_w, NULL); + starpu_data_acquire_cb(token_handle, STARPU_R, callback_r, NULL); + starpu_data_acquire_cb(token_handle, STARPU_R, callback_r, NULL); + starpu_data_acquire_cb(token_handle, STARPU_RW, callback_w, NULL); + starpu_data_acquire_cb(token_handle, STARPU_RW, callback_w, NULL); + starpu_data_acquire_cb(token_handle, STARPU_R, callback_r, NULL); + starpu_data_acquire_cb(token_handle, STARPU_R, callback_r, NULL); + starpu_data_release(token_handle); + + starpu_data_unregister(token_handle); + + FPRINTF(stderr, "Token: %u\n", token); + + starpu_shutdown(); + + return (token == 42) ? EXIT_SUCCESS : EXIT_FAILURE; +} diff --git a/tests/datawizard/acquire_cb_insert.c b/tests/datawizard/acquire_cb_insert.c new file mode 100644 index 0000000..8fd54f0 --- /dev/null +++ b/tests/datawizard/acquire_cb_insert.c @@ -0,0 +1,169 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Thibaut Lambert + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../helper.h" + +/* + * Test that inserting a task from the callback of a starpu_data_acquire_cb + * call, with proper dependency with an already-submitted task + */ + +#define N 16 +#define M 4 +#define X 2 + +void which_index_cpu(void *descr[], void *_args) +{ + (void)_args; + + int *x0 = (int *)STARPU_VARIABLE_GET_PTR(descr[0]); + + /* A real case would actually compute something */ + *x0 = X; +} + +struct starpu_codelet which_index = +{ + .cpu_funcs = {which_index_cpu}, + .cpu_funcs_name = {"which_index_cpu"}, + .nbuffers = 1, + .modes = {STARPU_W} +}; + +void work_cpu(void *descr[], void *_args) +{ + int i, n = STARPU_VECTOR_GET_NX(descr[0]); + float *x0 = (float *)STARPU_VECTOR_GET_PTR(descr[0]); + + (void)_args; + + for (i = 0; i < n; i++) + x0[i] = i + 1; +} + +struct starpu_codelet work = +{ + .cpu_funcs = {work_cpu}, + .cpu_funcs_name = {"work_cpu"}, + .nbuffers = 1, + .modes = {STARPU_W} +}; + +static int x; +static starpu_data_handle_t x_handle, f_handle; + +static void callback(void *arg) +{ + (void)arg; + int ret; + ret = starpu_task_insert(&work, STARPU_W, starpu_data_get_sub_data(f_handle, 1, x), 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + starpu_data_release(x_handle); +} + +int main(int argc, char **argv) +{ + int i, ret; + float *f; + struct starpu_conf conf; + + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + conf.ncpus = -1; + conf.nmpi_ms = -1; + conf.ntcpip_ms = -1; + + ret = starpu_initialize(&conf, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + if(starpu_cpu_worker_get_count() == 0) + { + starpu_shutdown(); + return STARPU_TEST_SKIPPED; + } + + /* Declare x */ + starpu_variable_data_register(&x_handle, STARPU_MAIN_RAM, (uintptr_t)&x, sizeof(x)); + + /* Allocate and Declare f */ + ret = starpu_malloc((void**)&f, N * sizeof(*f)); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_malloc"); + memset(f, 0, N * sizeof(*f)); + starpu_vector_data_register(&f_handle, STARPU_MAIN_RAM, (uintptr_t)f, N, sizeof(*f)); + + /* Partition f */ + struct starpu_data_filter filter = + { + .filter_func = starpu_vector_filter_block, + .nchildren = M, + }; + starpu_data_partition(f_handle, &filter); + + /* Compute which portion we will work on */ + ret = starpu_task_insert(&which_index, STARPU_W, x_handle, 0); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + + /* And submit the corresponding task */ +#ifdef __GCC__ + STARPU_DATA_ACQUIRE_CB( + x_handle, + STARPU_R, + starpu_task_insert(&work, STARPU_W, starpu_data_get_sub_data(f_handle, 1, x), 0) + ); +#else + starpu_data_acquire_cb(x_handle, STARPU_R, callback, NULL); +#endif + + /* Wait for acquisition (and thus insertion) */ + starpu_data_acquire(x_handle, STARPU_W); + starpu_data_release(x_handle); + + /* Now wait for the inserted task */ + ret = starpu_task_wait_for_all(); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); + + /* Can now clean */ + starpu_data_unpartition(f_handle, STARPU_MAIN_RAM); + starpu_data_unregister(f_handle); + starpu_data_unregister(x_handle); + + FPRINTF(stderr, "VALUES: %d", x); + for(i=0 ; i +#include "../helper.h" +#include "../variable/increment.h" + +/* + * Call acquire/release in competition with inserting task working on the same data + */ + +#ifdef STARPU_QUICK_CHECK +static unsigned ntasks = 10; +#else +static unsigned ntasks = 10000; +#endif + +unsigned token = 0; +starpu_data_handle_t token_handle; + +static int increment_token(void) +{ + int ret; + struct starpu_task *task = starpu_task_create(); + task->synchronous = 1; + task->cl = &increment_cl; + task->handles[0] = token_handle; + ret = starpu_task_submit(task); + return ret; +} + +static void callback(void *arg) +{ + (void)arg; + token++; + starpu_data_release(token_handle); +} + +int main(int argc, char **argv) +{ + unsigned i; + int ret; + + ret = starpu_initialize(NULL, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + increment_load_opencl(); + + starpu_variable_data_register(&token_handle, STARPU_MAIN_RAM, (uintptr_t)&token, sizeof(unsigned)); + + FPRINTF(stderr, "Token: %u\n", token); + + for(i=0; i +#include "../helper.h" +#include "../variable/increment.h" + +/* + * Call acquire/release in competition with inserting task working on the same data + */ + +#ifdef STARPU_QUICK_CHECK +static unsigned ntasks = 40; +#elif !defined(STARPU_LONG_CHECK) +static unsigned ntasks = 4000; +#else +static unsigned ntasks = 40000; +#endif + +unsigned token = 0; +starpu_data_handle_t token_handle; + +static int increment_token(int synchronous) +{ + struct starpu_task *task = starpu_task_create(); + task->synchronous = synchronous; + task->cl = &increment_cl; + task->handles[0] = token_handle; + return starpu_task_submit(task); +} + +static void callback(void *arg) +{ + (void)arg; + starpu_data_release(token_handle); +} + +#ifdef STARPU_DEVEL +# warning TODO add threads +#endif + +int main(int argc, char **argv) +{ + unsigned i; + int ret; + + ret = starpu_initialize(NULL, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + increment_load_opencl(); + + starpu_variable_data_register(&token_handle, STARPU_MAIN_RAM, (uintptr_t)&token, sizeof(unsigned)); + + FPRINTF(stderr, "Token: %u\n", token); + + for(i=0; i +#include "../helper.h" +#include "../variable/increment.h" + +/* + * Check that _release_to correctly interacts with tasks working on the same data + */ + +#ifdef STARPU_QUICK_CHECK +static unsigned ntasks = 10; +#else +static unsigned ntasks = 1000; +#endif + +void check_cpu(void *descr[], void *arg) +{ + unsigned *val = arg; + unsigned *tokenptr = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[0]); + STARPU_ASSERT(*tokenptr == *val); +} + +static struct starpu_codelet check_cl = +{ + .modes = { STARPU_R }, + .cpu_funcs = {check_cpu}, + .cpu_funcs_name = {"check_cpu"}, + .nbuffers = 1 +}; + +unsigned token = 0; +starpu_data_handle_t token_handle; + +static int increment_token(void) +{ + int ret; + struct starpu_task *task = starpu_task_create(); + task->cl = &increment_cl; + task->handles[0] = token_handle; + ret = starpu_task_submit(task); + return ret; +} + +static int check_token(unsigned value) +{ + unsigned *value_p; + int ret; + struct starpu_task *task = starpu_task_create(); + task->cl = &check_cl; + task->handles[0] = token_handle; + task->cl_arg = value_p = malloc(sizeof(*value_p)); + task->cl_arg_size = sizeof(*value_p); + task->cl_arg_free = 1; + *value_p = value; + ret = starpu_task_submit(task); + return ret; +} + +static void callback(void *arg) +{ + (void)arg; + token++; + starpu_data_release_to(token_handle, STARPU_W); + starpu_sleep(0.001); + starpu_data_release_to(token_handle, STARPU_R); + starpu_sleep(0.001); + starpu_data_release(token_handle); +} + +int main(int argc, char **argv) +{ + unsigned i; + int ret; + + ret = starpu_initialize(NULL, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + increment_load_opencl(); + + starpu_variable_data_register(&token_handle, STARPU_MAIN_RAM, (uintptr_t)&token, sizeof(unsigned)); + + FPRINTF(stderr, "Token: %u\n", token); + + for(i=0; i +#include "../helper.h" + +/* + * Try to use data_acquire_try in parallel with tasks + */ + +void func(void *descr[], void *arg) +{ + (void)descr; + (void)arg; + starpu_sleep(0.01); +} + +static struct starpu_codelet cl = +{ + .modes = { STARPU_RW }, + .cpu_funcs = {func}, + .cuda_funcs = {func}, + .opencl_funcs = {func}, + .cpu_funcs_name = {"func"}, + .nbuffers = 1 +}; + +unsigned token = 0; +starpu_data_handle_t token_handle; + +static +void callback(void *arg) +{ + (void)arg; + starpu_data_release(token_handle); +} + +int main(int argc, char **argv) +{ + int ret; + + ret = starpu_initialize(NULL, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + starpu_variable_data_register(&token_handle, STARPU_MAIN_RAM, (uintptr_t)&token, sizeof(unsigned)); + + ret = starpu_task_insert(&cl, STARPU_RW, token_handle, 0); + if (ret == -ENODEV) + goto enodev; + ret = starpu_data_acquire_try(token_handle, STARPU_R); + STARPU_ASSERT(ret != 0); + + starpu_do_schedule(); + while ((ret = starpu_data_acquire_try(token_handle, STARPU_R)) != 0) + { + starpu_sleep(0.001); + } + + ret = starpu_task_insert(&cl, STARPU_RW, token_handle, 0); + if (ret == -ENODEV) + goto enodev; + + starpu_data_release(token_handle); + + starpu_task_wait_for_all(); + + ret = starpu_data_acquire_try(token_handle, STARPU_R); + STARPU_ASSERT(ret == 0); + starpu_data_release(token_handle); + + starpu_data_unregister(token_handle); + + starpu_shutdown(); + + return 0; + +enodev: + starpu_data_unregister(token_handle); + fprintf(stderr, "WARNING: No one can execute this task\n"); + /* yes, we do not perform the computation but we did detect that no one + * could perform the kernel, so this is not an error from StarPU */ + starpu_shutdown(); + return STARPU_TEST_SKIPPED; +} diff --git a/tests/datawizard/allocate.c b/tests/datawizard/allocate.c new file mode 100644 index 0000000..0655d41 --- /dev/null +++ b/tests/datawizard/allocate.c @@ -0,0 +1,253 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../helper.h" +#include +#include + +/* Stress data allocation on a GPU, triggering eviction */ + +#define SIZE_LIMIT 128 +#define STR_LIMIT "128" +#define SIZE_ALLOC 128 + +#if !defined(STARPU_HAVE_SETENV) +#warning setenv is not defined. Skipping test +int main(int argc, char **argv) +{ + return STARPU_TEST_SKIPPED; +} +#else + +static +int test_prefetch(unsigned memnodes) +{ + float *buffers[4]; + starpu_data_handle_t handles[4]; + unsigned i, j; + starpu_ssize_t available_size; + + if (starpu_getenv_number_default("STARPU_DIDUSE_BARRIER", 0)) + /* This would hang */ + return STARPU_TEST_SKIPPED; + + buffers[0] = malloc(SIZE_ALLOC*1024*512); + STARPU_ASSERT(buffers[0]); + + /* Prefetch half the memory */ + starpu_variable_data_register(&handles[0], STARPU_MAIN_RAM, (uintptr_t)buffers[0], SIZE_ALLOC*1024*512); + for(i=1 ; i +#include +#include +#include "../helper.h" + +/* Allocate a big buffer not fitting in a single NUMA node, to see what + * happens, especially if NUMA nodes are correctly reported in traces. */ + +#if !defined(STARPU_HAVE_UNSETENV) || !defined(STARPU_HAVE_SETENV) || !defined(STARPU_USE_CPU) +#warning unsetenv or setenv are not defined. Or CPU are not enabled. Skipping test +int main(void) +{ + return STARPU_TEST_SKIPPED; +} +#elif !defined(STARPU_HAVE_HWLOC) // We need hwloc to know the size of NUMA nodes +#warning hwloc is not used. Skipping test +int main(void) +{ + return STARPU_TEST_SKIPPED; +} +#else + +#if HWLOC_API_VERSION < 0x00010b00 +#define HWLOC_OBJ_NUMANODE HWLOC_OBJ_NODE +#endif + +static void nop(void *descr[], void *arg) +{ + (void)descr; + (void)arg; +} + +static struct starpu_codelet cl = +{ + .cpu_funcs = { nop }, + .nbuffers = 1, + .modes = { STARPU_RW }, +}; + +int main(int argc, char **argv) +{ + int ret; + starpu_data_handle_t handle; + int worker; + struct starpu_conf conf; + + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + unsetenv("STARPU_NCUDA"); + conf.ncpus = -1; + ret = starpu_initialize(&conf, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + if (starpu_worker_get_ids_by_type(STARPU_CPU_WORKER, &worker, 1) == 0) + { + fprintf(stderr, "Could not find enough workers\n"); + starpu_shutdown(); + return STARPU_TEST_SKIPPED; + } + + size_t numa_node_mem; + hwloc_topology_t topo = starpu_get_hwloc_topology(); + + /* This test should work also when NUMA support isn't enabled in + * StarPU, so we can't rely on starpu_memory_nodes_get_numa_count(). */ + if (hwloc_get_nbobjs_by_type(topo, HWLOC_OBJ_NUMANODE) < 2) + { + /* Requires at least 2 NUMA nodes, to avoid overflowing memory + * if there is only one NUMA node. */ + fprintf(stderr, "Could not find enough NUMA nodes\n"); + starpu_shutdown(); + return STARPU_TEST_SKIPPED; + } + + hwloc_obj_t numa_node = hwloc_get_obj_by_type(topo, HWLOC_OBJ_NUMANODE, 0); + if (!numa_node) + { + fprintf(stderr, "Can't find NUMA node 0\n"); + starpu_shutdown(); + return EXIT_FAILURE; + } +#if HWLOC_API_VERSION >= 0x00020000 + numa_node_mem = numa_node->attr->numanode.local_memory; +#else + numa_node_mem = numa_node->memory.local_memory; +#endif + + size_t buffer_size = numa_node_mem * 1.5; + + printf("NUMA node 0 has %lu MB, the buffer will use %lu MB\n", numa_node_mem / 1024 / 1024, buffer_size / 1024 / 1024); + + uintptr_t buffer = starpu_malloc_on_node(STARPU_MAIN_RAM, buffer_size); + if (!buffer) + { + fprintf(stderr, "Refuses to allocate that much, too bad\n"); + starpu_shutdown(); + return STARPU_TEST_SKIPPED; + } + memset((void*) buffer, 0, buffer_size); + starpu_variable_data_register(&handle, STARPU_MAIN_RAM, buffer, buffer_size); + + ret = starpu_task_insert(&cl, STARPU_RW, handle, 0); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + +enodev: + starpu_data_unregister(handle); + starpu_free_on_node(STARPU_MAIN_RAM, buffer, buffer_size); + starpu_shutdown(); + return EXIT_SUCCESS; +} +#endif diff --git a/tests/datawizard/bcsr.c b/tests/datawizard/bcsr.c new file mode 100644 index 0000000..d464a9e --- /dev/null +++ b/tests/datawizard/bcsr.c @@ -0,0 +1,177 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../helper.h" + +static starpu_data_handle_t bcsr_handle; + +void cpu_show_bcsr(void *descr[], void *arg) +{ + (void)arg; + struct starpu_bcsr_interface *iface = descr[0]; + uint32_t nnz = STARPU_BCSR_GET_NNZ(iface); + uint32_t nrow = STARPU_BCSR_GET_NROW(iface); + int *nzval = (int *)STARPU_BCSR_GET_NZVAL(iface); + uint32_t *colind = STARPU_BCSR_GET_COLIND(iface); + uint32_t *rowptr = STARPU_BCSR_GET_ROWPTR(iface); + STARPU_ASSERT(colind == STARPU_BCSR_GET_RAM_COLIND(iface)); + STARPU_ASSERT(rowptr == STARPU_BCSR_GET_RAM_ROWPTR(iface)); + + uint32_t firstentry = STARPU_BCSR_GET_FIRSTENTRY(iface); + uint32_t r = STARPU_BCSR_GET_R(iface); + uint32_t c = STARPU_BCSR_GET_C(iface); + uint32_t elemsize = STARPU_BCSR_GET_ELEMSIZE(iface); + + uint32_t i, j, y, x; + static starpu_pthread_mutex_t mutex = STARPU_PTHREAD_MUTEX_INITIALIZER; + + STARPU_PTHREAD_MUTEX_LOCK(&mutex); + + printf("\nnnz %u elemsize %u\n", nnz, elemsize); + + for (i = 0; i < nrow; i++) + { + uint32_t row_start = rowptr[i] - firstentry; + uint32_t row_end = rowptr[i+1] - firstentry; + + printf("row %u\n", i); + + for (j = row_start; j < row_end; j++) + { + int *block = nzval + j * r*c; + + printf(" column %u\n", colind[j]); + + for (y = 0; y < r; y++) + { + for (x = 0; x < c; x++) + printf(" %d", block[y*c+x]); + printf("\n"); + } + } + } + STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); +} + + +struct starpu_codelet show_cl = +{ + .cpu_funcs = { cpu_show_bcsr }, + .nbuffers = 1, + .modes = { STARPU_R }, +}; + +/* + * In this test, we use the following matrix: + * + * +----------------+ + * | 0 1 0 0 | + * | 2 3 0 0 | + * | 4 5 8 9 | + * | 6 7 10 11 | + * | 0 0 0 0 | + * | 0 0 0 0 | + * +----------------+ + * + * nzval = [0, 1, 2, 3] ++ [4, 5, 6, 7] ++ [8, 9, 10, 11] + * colind = [0, 0, 1] (column index of each non-zero block) + * rowptr = [0, 1, 3] (index of first non-zero block for each row) + * r = c = 2 + */ + +/* Size of the blocks */ +#define R 2 +#define C 2 + +#define NNZ_BLOCKS 3 /* out of 6 */ +#define NZVAL_SIZE (R*C*NNZ_BLOCKS) + +#define NROWS 3 + +static int nzval[NZVAL_SIZE] = +{ + 0, 1, 2, 3, /* First block */ + 4, 5, 6, 7, /* Second block */ + 8, 9, 10, 11 /* Third block */ +}; +static uint32_t colind[NNZ_BLOCKS] = { 0, 0, 1 }; + +static uint32_t rowptr[NROWS+1] = { 0, 1, NNZ_BLOCKS, NNZ_BLOCKS }; + +int main(int argc, char **argv) +{ + int ret; + struct starpu_conf conf; + starpu_conf_init(&conf); + + conf.precedence_over_environment_variables = 1; + starpu_conf_noworker(&conf); + conf.ncpus = -1; + conf.nmpi_ms = -1; + conf.ntcpip_ms = -1; + + if (starpu_initialize(&conf, &argc, &argv) == -ENODEV) + return STARPU_TEST_SKIPPED; + + if (starpu_cpu_worker_get_count() == 0 || starpu_memory_nodes_get_count() > 1) + { + starpu_shutdown(); + return STARPU_TEST_SKIPPED; + } + + starpu_bcsr_data_register(&bcsr_handle, + STARPU_MAIN_RAM, + NNZ_BLOCKS, + NROWS, + (uintptr_t) nzval, + colind, + rowptr, + 0, /* firstentry */ + R, + C, + sizeof(nzval[0])); + + ret = starpu_task_insert(&show_cl, STARPU_R, bcsr_handle, 0); + if (ret == -ENODEV) + { + starpu_data_unregister(bcsr_handle); + starpu_shutdown(); + return STARPU_TEST_SKIPPED; + } + + struct starpu_data_filter filter = + { + .filter_func = starpu_bcsr_filter_vertical_block, + .nchildren = 3, + }; + starpu_data_partition(bcsr_handle, &filter); + + ret = starpu_task_insert(&show_cl, STARPU_R, starpu_data_get_sub_data(bcsr_handle, 1, 0), 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + ret = starpu_task_insert(&show_cl, STARPU_R, starpu_data_get_sub_data(bcsr_handle, 1, 1), 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + ret = starpu_task_insert(&show_cl, STARPU_R, starpu_data_get_sub_data(bcsr_handle, 1, 2), 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + + starpu_data_unpartition(bcsr_handle, STARPU_MAIN_RAM); + + starpu_data_unregister(bcsr_handle); + + starpu_shutdown(); + + return 0; +} diff --git a/tests/datawizard/cache.c b/tests/datawizard/cache.c new file mode 100644 index 0000000..91a644e --- /dev/null +++ b/tests/datawizard/cache.c @@ -0,0 +1,98 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../helper.h" + +/* + * Trigger re-using a buffer allocation on GPUs + */ + +#if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) +static void codelet(void *descr[], void *arg) +{ + (void)descr; + (void)arg; + FPRINTF(stderr, "%lx\n", (unsigned long) STARPU_VARIABLE_GET_PTR(descr[0])); + FPRINTF(stderr, "codelet\n"); +} +#endif + +#ifdef STARPU_USE_CUDA +static struct starpu_codelet cuda_cl = +{ + .cuda_funcs = {codelet}, + .nbuffers = 1, + .modes = {STARPU_R} +}; +#endif + +#ifdef STARPU_USE_OPENCL +static struct starpu_codelet opencl_cl = +{ + .opencl_funcs = {codelet}, + .nbuffers = 1, + .modes = {STARPU_R} +}; +#endif + +void dotest(struct starpu_codelet *cl) +{ + int ret; + int var = 42; + starpu_data_handle_t handle; + + starpu_variable_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)&var, sizeof(var)); + + ret = starpu_task_insert(cl, STARPU_R, handle, 0); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + + starpu_task_wait_for_all(); + + starpu_data_unregister(handle); + + starpu_variable_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)&var, sizeof(var)); + + ret = starpu_task_insert(cl, STARPU_R, handle, 0); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + + starpu_task_wait_for_all(); + +enodev: + starpu_data_unregister(handle); +} + +int main() +{ + int ret; + + ret = starpu_init(NULL); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + +#ifdef STARPU_USE_CUDA + dotest(&cuda_cl); +#endif +#ifdef STARPU_USE_OPENCL + dotest(&opencl_cl); +#endif + + starpu_shutdown(); + + return 0; +} diff --git a/tests/datawizard/commute.c b/tests/datawizard/commute.c new file mode 100644 index 0000000..93e0674 --- /dev/null +++ b/tests/datawizard/commute.c @@ -0,0 +1,193 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Thibaut Lambert + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../helper.h" + +/* + * Trigger various STARPU_R / STARPU_RW / STARPU_RW|COMMUTE patterns + */ + +void begin(void *descr[], void *arg) +{ + (void)arg; + int *x = (int *)STARPU_VARIABLE_GET_PTR(descr[0]); + + *x = 0; +} + +static struct starpu_codelet codelet_begin = +{ + .cpu_funcs = {begin}, + .cpu_funcs_name = {"begin"}, + .nbuffers = 1, + .name = "begin", +}; + +void commute1(void *descr[], void *arg) +{ + (void)arg; + int *x = (int *)STARPU_VARIABLE_GET_PTR(descr[0]); + + *x = 1; +} + +static struct starpu_codelet codelet_commute1 = +{ + .cpu_funcs = {commute1}, + .cpu_funcs_name = {"commute1"}, + .nbuffers = 1, + .modes = {STARPU_RW | STARPU_COMMUTE}, + .name = "commute1", +}; + +void commute2(void *descr[], void *arg) +{ + (void)arg; + int *x = (int *)STARPU_VARIABLE_GET_PTR(descr[0]); + + *x = 2; +} + +static struct starpu_codelet codelet_commute2 = +{ + .cpu_funcs = {commute2}, + .cpu_funcs_name = {"commute2"}, + .nbuffers = 1, + .modes = {STARPU_W | STARPU_COMMUTE}, + .name = "commute2", +}; + +void commute3(void *descr[], void *arg) +{ + (void)descr; + (void)arg; +} + +static struct starpu_codelet codelet_commute3 = +{ + .cpu_funcs = {commute3}, + .cpu_funcs_name = {"commute3"}, + .nbuffers = 1, + .modes = {STARPU_RW | STARPU_COMMUTE}, + .name = "commute3", +}; + +static struct starpu_codelet codelet_end; +void end(void *descr[], void *_args) +{ + int *x = (int *)STARPU_VARIABLE_GET_PTR(descr[0]); + + enum starpu_data_access_mode end_mode = *(enum starpu_data_access_mode*) _args; + + if (end_mode & STARPU_W) + (*x)++; +} + +static struct starpu_codelet codelet_end = +{ + .cpu_funcs = {end}, + .cpu_funcs_name = {"end"}, + .nbuffers = 1, + .name = "end", +}; + +static int x; +static starpu_data_handle_t x_handle, f_handle; + +static void test(enum starpu_data_access_mode begin_mode, enum starpu_data_access_mode end_mode, int order) +{ + struct starpu_task *begin_t, *commute1_t, *commute2_t, *end_t; + int ret; + + codelet_begin.modes[0] = begin_mode; + codelet_end.modes[0] = end_mode; + + begin_t = starpu_task_create(); + begin_t->cl = &codelet_begin; + begin_t->handles[0] = x_handle; + begin_t->use_tag = 1; + begin_t->tag_id = (order<<20) + (begin_mode<<10) + end_mode; + + commute1_t = starpu_task_create(); + commute1_t->cl = &codelet_commute1; + commute1_t->handles[0] = x_handle; + + commute2_t = starpu_task_create(); + commute2_t->cl = &codelet_commute2; + commute2_t->handles[0] = x_handle; + + if (order) + starpu_task_declare_deps_array(commute2_t, 1, &commute1_t); + else + starpu_task_declare_deps_array(commute1_t, 1, &commute2_t); + + end_t = starpu_task_create(); + end_t->cl = &codelet_end; + end_t->handles[0] = x_handle; + end_t->detach = 0; + end_t->cl_arg = &end_mode; + end_t->cl_arg_size = sizeof(end_mode); + + if (starpu_task_submit(begin_t) == -ENODEV) + exit(STARPU_TEST_SKIPPED); + if (starpu_task_submit(commute1_t) == -ENODEV) + exit(STARPU_TEST_SKIPPED); + if (starpu_task_submit(commute2_t) == -ENODEV) + exit(STARPU_TEST_SKIPPED); + starpu_task_insert(&codelet_commute3, STARPU_RW|STARPU_COMMUTE, x_handle, 0); + if (starpu_task_submit(end_t) == -ENODEV) + exit(STARPU_TEST_SKIPPED); + + ret = starpu_task_wait(end_t); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait"); + starpu_data_acquire(x_handle, STARPU_R); + if (x != 1 + order + !!(end_mode & STARPU_W)) + exit(EXIT_FAILURE); + starpu_data_release(x_handle); +} + +int main(int argc, char **argv) +{ + int i, ret; + struct starpu_conf conf; + + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + conf.ncpus = -1; + conf.nmpi_ms = -1; + conf.ntcpip_ms = -1; + + ret = starpu_initialize(&conf, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + /* Declare x */ + starpu_variable_data_register(&x_handle, STARPU_MAIN_RAM, (uintptr_t)&x, sizeof(x)); + + for (i = 0; i <= 1; i++) + { + test(STARPU_R, STARPU_R, i); + test(STARPU_W, STARPU_R, i); + test(STARPU_W, STARPU_RW, i); + test(STARPU_R, STARPU_RW, i); + } + + starpu_data_unregister(x_handle); + starpu_shutdown(); + STARPU_RETURN(0); +} diff --git a/tests/datawizard/commute2.c b/tests/datawizard/commute2.c new file mode 100644 index 0000000..f7b3c47 --- /dev/null +++ b/tests/datawizard/commute2.c @@ -0,0 +1,128 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2014-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include "../helper.h" + +/* + * Test that STARPU_RW vs STARPU_RW|STARPU_COMMUTE get proper dependency + */ + +static unsigned cnt; + +static void cpu_memcpy(void *descr[], void *cl_arg) +{ + int me = (uintptr_t)cl_arg; + int res; + + (void)descr; + + FPRINTF(stderr,"%d\n", me); + + if (me == 0) + { + /* let commute tasks potentially happen */ + starpu_usleep(100000); + res = STARPU_ATOMIC_ADD(&cnt,1); + STARPU_ASSERT(res == 1); + } + else + { + res = STARPU_ATOMIC_ADD(&cnt,1); + STARPU_ASSERT(res != 1); + } +} + +static struct starpu_codelet my_cl = +{ + .where = STARPU_CPU, + .cpu_funcs = {cpu_memcpy}, + .nbuffers = STARPU_VARIABLE_NBUFFERS +}; + +int main(void) +{ + double *res, *a; + unsigned n=100000, i; + starpu_data_handle_t res_handle, a_handle; + unsigned nb_tasks = 10; + int ret; + struct starpu_conf conf; + + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + conf.ncpus = -1; + conf.nmpi_ms = -1; + conf.ntcpip_ms = -1; + + ret = starpu_init(&conf); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + starpu_malloc((void**)&res, n*sizeof(double)); + starpu_malloc((void**)&a, n*sizeof(double)); + + for(i=0; i < n; i++) + res[i] = a[i] = 1.0; + + starpu_vector_data_register(&res_handle, 0, (uintptr_t)res, (uint32_t)n, sizeof(double)); + starpu_vector_data_register(&a_handle, 0, (uintptr_t)a, (uint32_t)n, sizeof(double)); + + starpu_data_acquire(a_handle, STARPU_RW); + for (i = 0; i < nb_tasks; i++) + { + struct starpu_task *task = starpu_task_create(); + task->cl=&my_cl; + task->nbuffers = i == 0 ? 2 : 1; + task->handles[0] = res_handle; + + if (i == 0) + task->modes[0] = STARPU_RW; + else + task->modes[0] = STARPU_RW | STARPU_COMMUTE; + + task->handles[1] = a_handle; + task->modes[1] = STARPU_R; + task->cl_arg = (void*)(uintptr_t)i; + + ret = starpu_task_submit(task); + if (ret == -ENODEV) + { + starpu_data_release(a_handle); + goto enodev; + } + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + /* let commute tasks potentially happen */ + starpu_usleep(100000); + starpu_data_release(a_handle); + + starpu_task_wait_for_all (); + +enodev: + starpu_data_unregister(res_handle); + starpu_data_unregister(a_handle); + + starpu_free_noflag(res, n*sizeof(double)); + starpu_free_noflag(a, n*sizeof(double)); + + starpu_shutdown(); + return ret == -ENODEV ? STARPU_TEST_SKIPPED : EXIT_SUCCESS; +} diff --git a/tests/datawizard/copy.c b/tests/datawizard/copy.c new file mode 100644 index 0000000..eb0c145 --- /dev/null +++ b/tests/datawizard/copy.c @@ -0,0 +1,115 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../helper.h" + +/* + * Trigger a lot of transfers of a single variable between CPUs and GPUs + */ + +#ifdef STARPU_QUICK_CHECK +static unsigned nloops = 10; +#else +static unsigned nloops = 1000; +#endif + +void dummy_func(void *descr[], void *arg) +{ + (void)descr; + (void)arg; +} + +static struct starpu_codelet cpu_codelet = +{ + .cpu_funcs = {dummy_func}, + .cpu_funcs_name = {"dummy_func"}, + .model = NULL, + .nbuffers = 1, + .modes = {STARPU_RW} +}; + +static struct starpu_codelet gpu_codelet = +{ + .cuda_funcs = {dummy_func}, + .opencl_funcs = {dummy_func}, + .model = NULL, + .nbuffers = 1, + .modes = {STARPU_RW} +}; + + +int main(int argc, char **argv) +{ + float foo; + starpu_data_handle_t float_array_handle; + unsigned i; + int ret; + + ret = starpu_initialize(NULL, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + if (starpu_worker_get_count_by_type(STARPU_CUDA_WORKER) == 0 && starpu_worker_get_count_by_type(STARPU_OPENCL_WORKER) == 0 && + starpu_worker_get_count_by_type(STARPU_MPI_MS_WORKER) == 0) + { + FPRINTF(stderr, "This application requires a CUDA , OpenCL Worker\n"); + starpu_shutdown(); + return STARPU_TEST_SKIPPED; + } + + foo = 0.0f; + starpu_variable_data_register(&float_array_handle, STARPU_MAIN_RAM, (uintptr_t)&foo, sizeof(foo)); + + for (i = 0; i < nloops; i++) + { + struct starpu_task *task_cpu, *task_gpu; + + task_cpu = starpu_task_create(); + task_gpu = starpu_task_create(); + + task_cpu->cl = &cpu_codelet; + task_cpu->callback_func = NULL; + task_cpu->handles[0] = float_array_handle; + + task_gpu->cl = &gpu_codelet; + task_gpu->callback_func = NULL; + task_gpu->handles[0] = float_array_handle; + + ret = starpu_task_submit(task_cpu); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + ret = starpu_task_submit(task_gpu); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + ret = starpu_task_wait_for_all(); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); + starpu_data_unregister(float_array_handle); + starpu_shutdown(); + + return EXIT_SUCCESS; + +enodev: + starpu_data_unregister(float_array_handle); + fprintf(stderr, "WARNING: No one can execute this task\n"); + /* yes, we do not perform the computation but we did detect that no one + * could perform the kernel, so this is not an error from StarPU */ + starpu_shutdown(); + return STARPU_TEST_SKIPPED; +} diff --git a/tests/datawizard/critical_section_with_void_interface.c b/tests/datawizard/critical_section_with_void_interface.c new file mode 100644 index 0000000..906c39d --- /dev/null +++ b/tests/datawizard/critical_section_with_void_interface.c @@ -0,0 +1,96 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include +#include "../helper.h" + +/* + * Use a void interface to protect the access to a variable that is not declared to StarPU + */ + +starpu_data_handle_t void_handle; + +int critical_var; + +void critical_section(void *descr[], void *_args) +{ + (void)descr; + (void)_args; + + /* We do not protect this variable because it is only accessed when the + * "void_handle" piece of data is accessed. */ + critical_var++; +} + +static struct starpu_codelet cl = +{ + .cpu_funcs = {critical_section}, + .cuda_funcs = {critical_section}, + .opencl_funcs = {critical_section}, + .nbuffers = 1, + .modes = {STARPU_RW} +}; + +int main(void) +{ +#ifdef STARPU_QUICK_CHECK + int ntasks = 10; +#else + int ntasks = 1000; +#endif + + int ret; + + ret = starpu_init(NULL); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + critical_var = 0; + + /* Create a void data which will be used as an exclusion mechanism. */ + starpu_void_data_register(&void_handle); + + int i; + for (i = 0; i < ntasks; i++) + { + struct starpu_task *task = starpu_task_create(); + task->cl = &cl; + task->handles[0] = void_handle; + + ret = starpu_task_submit(task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + starpu_data_unregister(void_handle); + + ret = (critical_var == ntasks) ? EXIT_SUCCESS : EXIT_FAILURE; + + starpu_shutdown(); + + return ret; + +enodev: + fprintf(stderr, "WARNING: No one can execute this task\n"); + /* yes, we do not perform the computation but we did detect that no one + * could perform the kernel, so this is not an error from StarPU */ + starpu_shutdown(); + return STARPU_TEST_SKIPPED; +} diff --git a/tests/datawizard/data_deinitialize.c b/tests/datawizard/data_deinitialize.c new file mode 100644 index 0000000..09f12d3 --- /dev/null +++ b/tests/datawizard/data_deinitialize.c @@ -0,0 +1,238 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Thibaut Lambert + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include +#include "../helper.h" + +/* + * Try to mix starpu_data_deinitialize and starpu_data_deinitialize_submit + * calls with task insertions + */ + +#ifdef STARPU_QUICK_CHECK +static unsigned nloops=100; +#else +static unsigned nloops=1000; +#endif +#define VECTORSIZE 1024 + +static starpu_data_handle_t v_handle; + +/* + * Memset + */ + +#ifdef STARPU_USE_CUDA +static void cuda_memset_codelet(void *descr[], void *arg) +{ + (void)arg; + STARPU_SKIP_IF_VALGRIND; + + char *buf = (char *)STARPU_VECTOR_GET_PTR(descr[0]); + unsigned length = STARPU_VECTOR_GET_NX(descr[0]); + + cudaMemsetAsync(buf, 42, length, starpu_cuda_get_local_stream()); +} +#endif + +#ifdef STARPU_USE_OPENCL +static void opencl_memset_codelet(void *buffers[], void *args) +{ + (void) args; + STARPU_SKIP_IF_VALGRIND; + + cl_command_queue queue; + int id = starpu_worker_get_id_check(); + int devid = starpu_worker_get_devid(id); + starpu_opencl_get_queue(devid, &queue); + + cl_mem buffer = (cl_mem) STARPU_VECTOR_GET_DEV_HANDLE(buffers[0]); + unsigned length = STARPU_VECTOR_GET_NX(buffers[0]); + char *v = malloc(length); + STARPU_ASSERT(v != NULL); + memset(v, 42, length); + + cl_int err; + err = clEnqueueWriteBuffer(queue, + buffer, + CL_FALSE, + 0, /* offset */ + length, /* sizeof (char) */ + v, + 0, /* num_events_in_wait_list */ + NULL, /* event_wait_list */ + NULL /* event */); + if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err); +} +#endif /* !STARPU_USE_OPENCL */ + +void cpu_memset_codelet(void *descr[], void *arg) +{ + (void)arg; + STARPU_SKIP_IF_VALGRIND; + + char *buf = (char *)STARPU_VECTOR_GET_PTR(descr[0]); + unsigned length = STARPU_VECTOR_GET_NX(descr[0]); + + memset(buf, 42, length * sizeof(*buf)); +} + +static struct starpu_codelet memset_cl = +{ + .cpu_funcs = {cpu_memset_codelet}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {cuda_memset_codelet}, + .cuda_flags = {STARPU_CUDA_ASYNC}, +#endif +#ifdef STARPU_USE_OPENCL + .opencl_funcs = {opencl_memset_codelet}, + .opencl_flags = {STARPU_OPENCL_ASYNC}, +#endif + .cpu_funcs_name = {"cpu_memset_codelet"}, + .nbuffers = 1, + .modes = {STARPU_W} +}; + +/* + * Check content + */ + +void cpu_check_content_codelet(void *descr[], void *arg) +{ + (void)arg; + STARPU_SKIP_IF_VALGRIND; + + char *buf = (char *)STARPU_VECTOR_GET_PTR(descr[0]); + unsigned length = STARPU_VECTOR_GET_NX(descr[0]); + + unsigned i; + for (i = 0; i < length; i++) + { + if (buf[i] != 42) + { + FPRINTF(stderr, "buf[%u] is '%c' while it should be '%c'\n", i, buf[i], 42); + exit(-1); + } + } +} + +static struct starpu_codelet check_content_cl = +{ + .cpu_funcs = {cpu_check_content_codelet}, + .cpu_funcs_name = {"cpu_check_content_codelet"}, + .nbuffers = 1, + .modes = {STARPU_R} +}; + + +int main(int argc, char **argv) +{ + int ret; + + ret = starpu_initialize(NULL, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + if(starpu_cpu_worker_get_count() == 0) + { + starpu_shutdown(); + return STARPU_TEST_SKIPPED; + } + +#ifdef STARPU_HAVE_VALGRIND_H + if(RUNNING_ON_VALGRIND) nloops = 2; +#endif + + /* The buffer should never be explicitly allocated */ + starpu_vector_data_register(&v_handle, (uint32_t)-1, (uintptr_t)NULL, VECTORSIZE, sizeof(char)); + + unsigned loop; + for (loop = 0; loop < nloops; loop++) + { + struct starpu_task *memset_task; + struct starpu_task *check_content_task; + + memset_task = starpu_task_create(); + memset_task->cl = &memset_cl; + memset_task->handles[0] = v_handle; + memset_task->detach = 0; + + ret = starpu_task_submit(memset_task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + ret = starpu_task_wait(memset_task); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait"); + + check_content_task = starpu_task_create(); + check_content_task->cl = &check_content_cl; + check_content_task->handles[0] = v_handle; + check_content_task->detach = 0; + + ret = starpu_task_submit(check_content_task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + ret = starpu_task_wait(check_content_task); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait"); + + starpu_data_deinitialize(v_handle); + } + + for (loop = 0; loop < nloops; loop++) + { + struct starpu_task *memset_task; + struct starpu_task *check_content_task; + + memset_task = starpu_task_create(); + memset_task->cl = &memset_cl; + memset_task->handles[0] = v_handle; + + ret = starpu_task_submit(memset_task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + check_content_task = starpu_task_create(); + check_content_task->cl = &check_content_cl; + check_content_task->handles[0] = v_handle; + + ret = starpu_task_submit(check_content_task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + starpu_data_deinitialize_submit(v_handle); + } + + /* this should get rid of automatically allocated buffers */ + starpu_data_unregister(v_handle); + + starpu_shutdown(); + + return EXIT_SUCCESS; + +enodev: + starpu_data_unregister(v_handle); + fprintf(stderr, "WARNING: No one can execute this task\n"); + /* yes, we do not perform the computation but we did detect that no one + * could perform the kernel, so this is not an error from StarPU */ + starpu_shutdown(); + return STARPU_TEST_SKIPPED; +} diff --git a/tests/datawizard/data_implicit_deps.c b/tests/datawizard/data_implicit_deps.c new file mode 100644 index 0000000..f7e9935 --- /dev/null +++ b/tests/datawizard/data_implicit_deps.c @@ -0,0 +1,274 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include +#include "../helper.h" + +/* + * Test that implicit dependencies get properly computed + */ + +#define VECTORSIZE 1024 + +static unsigned *A, *B, *C, *D; +starpu_data_handle_t A_handle, B_handle, C_handle, D_handle; + +static unsigned var = 0; +starpu_data_handle_t var_handle; + +void func(void *descr[], void *arg) +{ + (void)descr; + (void)arg; + + STARPU_SKIP_IF_VALGRIND; + + starpu_usleep(200000); +} + +static struct starpu_codelet cl_f = +{ + .modes = { STARPU_RW, STARPU_R, STARPU_RW }, + .cpu_funcs = {func}, + .cuda_funcs = {func}, + .opencl_funcs = {func}, + .cpu_funcs_name = {"func"}, + .nbuffers = 3, +}; + +void g_cpu(void *descr[], void *arg) +{ + (void)descr; + (void)arg; + STARPU_SKIP_IF_VALGRIND; + + unsigned *val = (unsigned *) STARPU_VARIABLE_GET_PTR(descr[0]); + + starpu_usleep(100000); + *val = 42; +} + +#ifdef STARPU_USE_CUDA +void g_cuda(void *descr[], void *arg) +{ + (void)arg; + STARPU_SKIP_IF_VALGRIND; + + unsigned *val = (unsigned *) STARPU_VARIABLE_GET_PTR(descr[0]); + unsigned value = 42; + + starpu_usleep(100000); + cudaMemcpyAsync(val, &value, sizeof(value), cudaMemcpyHostToDevice, starpu_cuda_get_local_stream()); + cudaStreamSynchronize(starpu_cuda_get_local_stream()); +} +#endif + +#ifdef STARPU_USE_OPENCL +void g_opencl(void *descr[], void *arg) +{ + (void)arg; + STARPU_SKIP_IF_VALGRIND; + + cl_mem val = (cl_mem) STARPU_VARIABLE_GET_PTR(descr[0]); + unsigned value = 42; + + starpu_usleep(100000); + cl_command_queue queue; + starpu_opencl_get_current_queue(&queue); + + cl_int err; + err = clEnqueueWriteBuffer(queue, val, CL_TRUE, 0, sizeof(unsigned), (void *)&value, 0, NULL, NULL); + if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err); + clFinish(queue); +} +#endif + +static struct starpu_codelet cl_g = +{ + .modes = { STARPU_RW, STARPU_R, STARPU_RW }, + .cpu_funcs = {g_cpu}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {g_cuda}, +#endif +#ifdef STARPU_USE_OPENCL + .opencl_funcs = {g_opencl}, +#endif + .cpu_funcs_name = {"g_cpu"}, + .nbuffers = 3, +}; + +void h_cpu(void *descr[], void *arg) +{ + (void)arg; + STARPU_SKIP_IF_VALGRIND; + + unsigned *val = (unsigned *) STARPU_VARIABLE_GET_PTR(descr[0]); + + FPRINTF(stderr, "VAR %u (should be 42)\n", *val); + STARPU_ASSERT(*val == 42); +} + +#ifdef STARPU_USE_CUDA +void h_cuda(void *descr[], void *arg) +{ + (void)arg; + STARPU_SKIP_IF_VALGRIND; + + unsigned *val = (unsigned *) STARPU_VARIABLE_GET_PTR(descr[0]); + unsigned value; + + cudaMemcpyAsync(&value, val, sizeof(value), cudaMemcpyDeviceToHost, starpu_cuda_get_local_stream()); + cudaStreamSynchronize(starpu_cuda_get_local_stream()); + FPRINTF(stderr, "VAR %u (should be 42)\n", value); + STARPU_ASSERT(value == 42); +} +#endif + +#ifdef STARPU_USE_OPENCL +void h_opencl(void *descr[], void *arg) +{ + (void)arg; + STARPU_SKIP_IF_VALGRIND; + + cl_mem val = (cl_mem) STARPU_VARIABLE_GET_PTR(descr[0]); + unsigned value = 0; + + cl_command_queue queue; + starpu_opencl_get_current_queue(&queue); + + cl_int err; + err = clEnqueueReadBuffer(queue, val, CL_TRUE, 0, sizeof(unsigned), (void *)&value, 0, NULL, NULL); + if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err); + clFinish(queue); + + FPRINTF(stderr, "VAR %u (should be 42)\n", value); + STARPU_ASSERT(value == 42); +} +#endif + +static struct starpu_codelet cl_h = +{ + .modes = { STARPU_RW, STARPU_R, STARPU_RW }, + .cpu_funcs = {h_cpu}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {h_cuda}, +#endif +#ifdef STARPU_USE_OPENCL + .opencl_funcs = {h_opencl}, +#endif + .cpu_funcs_name = {"h_cpu"}, + .nbuffers = 3 +}; + +int main(int argc, char **argv) +{ + int ret; + + ret = starpu_initialize(NULL, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + starpu_malloc((void **)&A, VECTORSIZE*sizeof(unsigned)); + starpu_malloc((void **)&B, VECTORSIZE*sizeof(unsigned)); + starpu_malloc((void **)&C, VECTORSIZE*sizeof(unsigned)); + starpu_malloc((void **)&D, VECTORSIZE*sizeof(unsigned)); + + starpu_vector_data_register(&A_handle, STARPU_MAIN_RAM, (uintptr_t)A, VECTORSIZE, sizeof(unsigned)); + starpu_vector_data_register(&B_handle, STARPU_MAIN_RAM, (uintptr_t)B, VECTORSIZE, sizeof(unsigned)); + starpu_vector_data_register(&C_handle, STARPU_MAIN_RAM, (uintptr_t)C, VECTORSIZE, sizeof(unsigned)); + starpu_vector_data_register(&D_handle, STARPU_MAIN_RAM, (uintptr_t)D, VECTORSIZE, sizeof(unsigned)); + + starpu_variable_data_register(&var_handle, STARPU_MAIN_RAM, (uintptr_t)(&var), sizeof(var)); + +#if 0 + starpu_data_set_sequential_consistency_flag(A_handle, 0); + starpu_data_set_sequential_consistency_flag(B_handle, 0); + starpu_data_set_sequential_consistency_flag(C_handle, 0); + starpu_data_set_sequential_consistency_flag(D_handle, 0); +#endif + + /* f(Ar, Brw): sleep + * g(Br; Crw); sleep, var = 42 + * h(Cr; Drw); check that var == 42 + */ + struct starpu_task *task_f = starpu_task_create(); + task_f->cl = &cl_f; + task_f->handles[0] = var_handle; + task_f->handles[1] = A_handle; + task_f->handles[2] = B_handle; + ret = starpu_task_submit(task_f); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + struct starpu_task *task_g = starpu_task_create(); + task_g->cl = &cl_g; + task_g->handles[0] = var_handle; + task_g->handles[1] = B_handle; + task_g->handles[2] = C_handle; + ret = starpu_task_submit(task_g); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + struct starpu_task *task_h = starpu_task_create(); + task_h->cl = &cl_h; + task_h->handles[0] = var_handle; + task_h->handles[1] = C_handle; + task_h->handles[2] = D_handle; + ret = starpu_task_submit(task_h); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + ret = starpu_task_wait_for_all(); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); + + starpu_data_unregister(A_handle); + starpu_data_unregister(B_handle); + starpu_data_unregister(C_handle); + starpu_data_unregister(D_handle); + + starpu_data_unregister(var_handle); + + starpu_free_noflag(A, VECTORSIZE*sizeof(unsigned)); + starpu_free_noflag(B, VECTORSIZE*sizeof(unsigned)); + starpu_free_noflag(C, VECTORSIZE*sizeof(unsigned)); + starpu_free_noflag(D, VECTORSIZE*sizeof(unsigned)); + + starpu_shutdown(); + + return EXIT_SUCCESS; + +enodev: + starpu_data_unregister(A_handle); + starpu_data_unregister(B_handle); + starpu_data_unregister(C_handle); + starpu_data_unregister(D_handle); + + starpu_data_unregister(var_handle); + + starpu_free_noflag(A, VECTORSIZE*sizeof(unsigned)); + starpu_free_noflag(B, VECTORSIZE*sizeof(unsigned)); + starpu_free_noflag(C, VECTORSIZE*sizeof(unsigned)); + starpu_free_noflag(D, VECTORSIZE*sizeof(unsigned)); + fprintf(stderr, "WARNING: No one can execute this task\n"); + /* yes, we do not perform the computation but we did detect that no one + * could perform the kernel, so this is not an error from StarPU */ + starpu_shutdown(); + return STARPU_TEST_SKIPPED; +} diff --git a/tests/datawizard/data_invalidation.c b/tests/datawizard/data_invalidation.c new file mode 100644 index 0000000..398f820 --- /dev/null +++ b/tests/datawizard/data_invalidation.c @@ -0,0 +1,238 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Thibaut Lambert + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include +#include "../helper.h" + +/* + * Try to mix starpu_data_invalidate and starpu_data_invalidate_submit + * calls with task insertions + */ + +#ifdef STARPU_QUICK_CHECK +static unsigned nloops=100; +#else +static unsigned nloops=1000; +#endif +#define VECTORSIZE 1024 + +static starpu_data_handle_t v_handle; + +/* + * Memset + */ + +#ifdef STARPU_USE_CUDA +static void cuda_memset_codelet(void *descr[], void *arg) +{ + (void)arg; + STARPU_SKIP_IF_VALGRIND; + + char *buf = (char *)STARPU_VECTOR_GET_PTR(descr[0]); + unsigned length = STARPU_VECTOR_GET_NX(descr[0]); + + cudaMemsetAsync(buf, 42, length, starpu_cuda_get_local_stream()); +} +#endif + +#ifdef STARPU_USE_OPENCL +static void opencl_memset_codelet(void *buffers[], void *args) +{ + (void) args; + STARPU_SKIP_IF_VALGRIND; + + cl_command_queue queue; + int id = starpu_worker_get_id_check(); + int devid = starpu_worker_get_devid(id); + starpu_opencl_get_queue(devid, &queue); + + cl_mem buffer = (cl_mem) STARPU_VECTOR_GET_DEV_HANDLE(buffers[0]); + unsigned length = STARPU_VECTOR_GET_NX(buffers[0]); + char *v = malloc(length); + STARPU_ASSERT(v != NULL); + memset(v, 42, length); + + cl_int err; + err = clEnqueueWriteBuffer(queue, + buffer, + CL_FALSE, + 0, /* offset */ + length, /* sizeof (char) */ + v, + 0, /* num_events_in_wait_list */ + NULL, /* event_wait_list */ + NULL /* event */); + if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err); +} +#endif /* !STARPU_USE_OPENCL */ + +void cpu_memset_codelet(void *descr[], void *arg) +{ + (void)arg; + STARPU_SKIP_IF_VALGRIND; + + char *buf = (char *)STARPU_VECTOR_GET_PTR(descr[0]); + unsigned length = STARPU_VECTOR_GET_NX(descr[0]); + + memset(buf, 42, length * sizeof(*buf)); +} + +static struct starpu_codelet memset_cl = +{ + .cpu_funcs = {cpu_memset_codelet}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {cuda_memset_codelet}, + .cuda_flags = {STARPU_CUDA_ASYNC}, +#endif +#ifdef STARPU_USE_OPENCL + .opencl_funcs = {opencl_memset_codelet}, + .opencl_flags = {STARPU_OPENCL_ASYNC}, +#endif + .cpu_funcs_name = {"cpu_memset_codelet"}, + .nbuffers = 1, + .modes = {STARPU_W} +}; + +/* + * Check content + */ + +void cpu_check_content_codelet(void *descr[], void *arg) +{ + (void)arg; + STARPU_SKIP_IF_VALGRIND; + + char *buf = (char *)STARPU_VECTOR_GET_PTR(descr[0]); + unsigned length = STARPU_VECTOR_GET_NX(descr[0]); + + unsigned i; + for (i = 0; i < length; i++) + { + if (buf[i] != 42) + { + FPRINTF(stderr, "buf[%u] is '%c' while it should be '%c'\n", i, buf[i], 42); + exit(-1); + } + } +} + +static struct starpu_codelet check_content_cl = +{ + .cpu_funcs = {cpu_check_content_codelet}, + .cpu_funcs_name = {"cpu_check_content_codelet"}, + .nbuffers = 1, + .modes = {STARPU_R} +}; + + +int main(int argc, char **argv) +{ + int ret; + + ret = starpu_initialize(NULL, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + if(starpu_cpu_worker_get_count() == 0) + { + starpu_shutdown(); + return STARPU_TEST_SKIPPED; + } + +#ifdef STARPU_HAVE_VALGRIND_H + if(RUNNING_ON_VALGRIND) nloops = 2; +#endif + + /* The buffer should never be explicitly allocated */ + starpu_vector_data_register(&v_handle, (uint32_t)-1, (uintptr_t)NULL, VECTORSIZE, sizeof(char)); + + unsigned loop; + for (loop = 0; loop < nloops; loop++) + { + struct starpu_task *memset_task; + struct starpu_task *check_content_task; + + memset_task = starpu_task_create(); + memset_task->cl = &memset_cl; + memset_task->handles[0] = v_handle; + memset_task->detach = 0; + + ret = starpu_task_submit(memset_task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + ret = starpu_task_wait(memset_task); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait"); + + check_content_task = starpu_task_create(); + check_content_task->cl = &check_content_cl; + check_content_task->handles[0] = v_handle; + check_content_task->detach = 0; + + ret = starpu_task_submit(check_content_task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + ret = starpu_task_wait(check_content_task); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait"); + + starpu_data_invalidate(v_handle); + } + + for (loop = 0; loop < nloops; loop++) + { + struct starpu_task *memset_task; + struct starpu_task *check_content_task; + + memset_task = starpu_task_create(); + memset_task->cl = &memset_cl; + memset_task->handles[0] = v_handle; + + ret = starpu_task_submit(memset_task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + check_content_task = starpu_task_create(); + check_content_task->cl = &check_content_cl; + check_content_task->handles[0] = v_handle; + + ret = starpu_task_submit(check_content_task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + starpu_data_invalidate_submit(v_handle); + } + + /* this should get rid of automatically allocated buffers */ + starpu_data_unregister(v_handle); + + starpu_shutdown(); + + return EXIT_SUCCESS; + +enodev: + starpu_data_unregister(v_handle); + fprintf(stderr, "WARNING: No one can execute this task\n"); + /* yes, we do not perform the computation but we did detect that no one + * could perform the kernel, so this is not an error from StarPU */ + starpu_shutdown(); + return STARPU_TEST_SKIPPED; +} diff --git a/tests/datawizard/data_register.c b/tests/datawizard/data_register.c new file mode 100644 index 0000000..3fd6b11 --- /dev/null +++ b/tests/datawizard/data_register.c @@ -0,0 +1,108 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../helper.h" +#include + +struct my_interface +{ + enum starpu_data_interface_id id; + /* Just a integer */ + int x; +}; + +static struct starpu_data_interface_ops starpu_interface_my_ops; + +static void register_my(starpu_data_handle_t handle, int home_node, void *data_interface) +{ + (void) home_node; + struct my_interface *my_interface = data_interface; + unsigned node; + for (node = 0; node < STARPU_MAXNODES; node++) + { + struct my_interface *local_interface = starpu_data_get_interface_on_node(handle, node); + local_interface->x = my_interface->x; + local_interface->id = my_interface->id; + } +} + +static size_t my_get_size(starpu_data_handle_t handle) +{ + struct my_interface *my_interface = starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + return my_interface->x; +} + +static uint32_t my_footprint(starpu_data_handle_t handle) +{ + return starpu_hash_crc32c_be(my_get_size(handle), 0); +} + +static struct starpu_data_interface_ops starpu_interface_my_ops = +{ + .register_data_handle = register_my, + .allocate_data_on_node = NULL, + .free_data_on_node = NULL, + .copy_methods = NULL, + .get_size = my_get_size, + .get_max_size = NULL, + .footprint = my_footprint, + .compare = NULL, + .interfaceid = STARPU_UNKNOWN_INTERFACE_ID, + .interface_size = sizeof(struct my_interface), + .display = NULL, + .pack_data = NULL, + .peek_data = NULL, + .unpack_data = NULL, + .describe = NULL, +}; + +#define N 42 +int main(void) +{ + int ret; + int x; + starpu_data_handle_t handles[N]; + struct starpu_conf conf; + + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + conf.ncpus = -1; + conf.nmpi_ms = -1; + conf.ntcpip_ms = -1; + + ret = starpu_init(&conf); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + for (x = 0; x < N; x++) + { + starpu_interface_my_ops.interfaceid = starpu_data_interface_get_next_id(); + struct my_interface my_interface = + { + .id = starpu_interface_my_ops.interfaceid, + }; + starpu_data_register(&handles[x], -1, &my_interface, &starpu_interface_my_ops); + STARPU_ASSERT(_starpu_data_interface_get_ops(my_interface.id) == &starpu_interface_my_ops); + } + + for (x = 0; x < N; x++) + starpu_data_unregister(handles[x]); + + starpu_shutdown(); + + return EXIT_SUCCESS; +} diff --git a/tests/datawizard/deinitialize_pending_requests.c b/tests/datawizard/deinitialize_pending_requests.c new file mode 100644 index 0000000..5f016dc --- /dev/null +++ b/tests/datawizard/deinitialize_pending_requests.c @@ -0,0 +1,60 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../helper.h" + +/* + * Try invalidating a variable which is pending a request + */ +#define SIZE (100<<20) + +int main(void) +{ + int ret; + char *var = NULL; + starpu_data_handle_t handle; + + ret = starpu_init(NULL); + if (ret == -ENODEV) goto skip; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + if (starpu_worker_get_count_by_type(STARPU_CUDA_WORKER) == 0 && + starpu_worker_get_count_by_type(STARPU_OPENCL_WORKER) == 0) + goto enodev; + + var = malloc(SIZE); + starpu_variable_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)var, SIZE); + + /* Let a request fly */ + starpu_fxt_trace_user_event_string("requesting"); + starpu_data_fetch_on_node(handle, 1, 1); + starpu_fxt_trace_user_event_string("requested"); + /* But suddenly deinitialize the data while it's on the fly! */ + starpu_data_deinitialize_submit(handle); + starpu_fxt_trace_user_event_string("deinitialized"); + + starpu_data_unregister(handle); + free(var); + starpu_shutdown(); + + return 0; + +enodev: + starpu_shutdown(); +skip: + return STARPU_TEST_SKIPPED; +} diff --git a/tests/datawizard/deps.c b/tests/datawizard/deps.c new file mode 100644 index 0000000..9dd1d01 --- /dev/null +++ b/tests/datawizard/deps.c @@ -0,0 +1,121 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2017-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../helper.h" + +#define N 10 +#define LOOPS 4 + +void null_cpu_func(void *buffers[], void *arg) +{ + (void)arg; + (void)buffers; +} + +void prod_cpu_func(void *buffers[], void *arg) +{ + int *data = (int *)STARPU_VECTOR_GET_PTR(buffers[0]); + int n = STARPU_VECTOR_GET_NX(buffers[0]); + int i; + int factor; + + starpu_codelet_unpack_args(arg, &factor); + + FPRINTF(stderr, "Multiplying by %d\n", factor); + for(i=0 ; i +#include "../helper.h" + +/* + * Test Dijkstra's Dining Philosophers problem + */ + +/* TODO: try to use an arbiter and check improved concurrency */ + +/* number of philosophers */ +#define N 16 + +starpu_data_handle_t fork_handles[N]; +unsigned forks[N]; + +void eat_kernel(void *descr[], void *arg) +{ + (void)descr; + (void)arg; +} + +static struct starpu_codelet eating_cl = +{ + .modes = { STARPU_RW, STARPU_RW }, + .cuda_funcs = {eat_kernel}, + .cpu_funcs = {eat_kernel}, + .opencl_funcs = {eat_kernel}, + .cpu_funcs_name = {"eat_kernel"}, + .nbuffers = 2 +}; + +static +int submit_one_task(unsigned p) +{ + struct starpu_task *task = starpu_task_create(); + + task->cl = &eating_cl; + + unsigned left = p; + unsigned right = (p+1)%N; + + task->handles[0] = fork_handles[left]; + task->handles[1] = fork_handles[right]; + + int ret = starpu_task_submit(task); + return ret; +} + +int main(int argc, char **argv) +{ + int ret; + + ret = starpu_initialize(NULL, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + /* initialize the forks */ + unsigned f; + for (f = 0; f < N; f++) + { + forks[f] = 0; + + starpu_vector_data_register(&fork_handles[f], STARPU_MAIN_RAM, (uintptr_t)&forks[f], 1, sizeof(unsigned)); + starpu_data_set_sequential_consistency_flag(fork_handles[f], 0); + } + + unsigned ntasks = 1024; +#ifdef STARPU_HAVE_VALGRIND_H + if(RUNNING_ON_VALGRIND) ntasks = 3; +#endif + + unsigned t; + for (t = 0; t < ntasks; t++) + { + /* select one philosopher randomly */ + unsigned philosopher = rand() % N; + ret = submit_one_task(philosopher); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + ret = starpu_task_wait_for_all(); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); + + FPRINTF(stderr, "waiting done\n"); + for (f = 0; f < N; f++) + { + starpu_data_unregister(fork_handles[f]); + } + + starpu_shutdown(); + + return EXIT_SUCCESS; + +enodev: + for (f = 0; f < N; f++) + { + starpu_data_unregister(fork_handles[f]); + } + fprintf(stderr, "WARNING: No one can execute this task\n"); + /* yes, we do not perform the computation but we did detect that no one + * could perform the kernel, so this is not an error from StarPU */ + starpu_shutdown(); + return STARPU_TEST_SKIPPED; +} diff --git a/tests/datawizard/double_parameter.c b/tests/datawizard/double_parameter.c new file mode 100644 index 0000000..8b1666f --- /dev/null +++ b/tests/datawizard/double_parameter.c @@ -0,0 +1,186 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../helper.h" + +/* + * Try passing the same parameter twice, with various access modes + */ + +void dummy_func(void *descr[], void *arg) +{ + (void)descr; + (void)arg; +} + +static struct starpu_codelet codelet_R_R = +{ + .cpu_funcs = { dummy_func }, + .cpu_funcs_name = {"dummy_func"}, + .model = NULL, + .nbuffers = 2, + .modes = {STARPU_R, STARPU_R} +}; + +static struct starpu_codelet codelet_R_W = +{ + .cpu_funcs = { dummy_func }, + .cpu_funcs_name = {"dummy_func"}, + .model = NULL, + .nbuffers = 2, + .modes = {STARPU_R, STARPU_W} +}; + +static struct starpu_codelet codelet_R_RW = +{ + .cpu_funcs = { dummy_func }, + .cpu_funcs_name = {"dummy_func"}, + .model = NULL, + .nbuffers = 2, + .modes = {STARPU_R, STARPU_RW} +}; + +static struct starpu_codelet codelet_W_R = +{ + .cpu_funcs = { dummy_func }, + .cpu_funcs_name = {"dummy_func"}, + .model = NULL, + .nbuffers = 2, + .modes = {STARPU_W, STARPU_R} +}; + +static struct starpu_codelet codelet_W_W = +{ + .cpu_funcs = { dummy_func }, + .cpu_funcs_name = {"dummy_func"}, + .model = NULL, + .nbuffers = 2, + .modes = {STARPU_W, STARPU_W} +}; + +static struct starpu_codelet codelet_W_RW = +{ + .cpu_funcs = { dummy_func }, + .cpu_funcs_name = {"dummy_func"}, + .model = NULL, + .nbuffers = 2, + .modes = {STARPU_W, STARPU_RW} +}; + +static struct starpu_codelet codelet_RW_R = +{ + .cpu_funcs = { dummy_func }, + .cpu_funcs_name = {"dummy_func"}, + .model = NULL, + .nbuffers = 2, + .modes = {STARPU_RW, STARPU_R} +}; + +static struct starpu_codelet codelet_RW_W = +{ + .cpu_funcs = { dummy_func }, + .cpu_funcs_name = {"dummy_func"}, + .model = NULL, + .nbuffers = 2, + .modes = {STARPU_RW, STARPU_W} +}; + +static struct starpu_codelet codelet_RW_RW = +{ + .cpu_funcs = { dummy_func }, + .cpu_funcs_name = {"dummy_func"}, + .model = NULL, + .nbuffers = 2, + .modes = {STARPU_RW, STARPU_RW} +}; + +int main(int argc, char **argv) +{ + float foo = 0.0f; + starpu_data_handle_t handle; + int ret; + struct starpu_task *task; + struct starpu_conf conf; + + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + conf.ncpus = -1; + conf.nmpi_ms = -1; + conf.ntcpip_ms = -1; + + ret = starpu_initialize(&conf, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + starpu_variable_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)&foo, sizeof(foo)); + +#define SUBMIT(mode0, mode1) \ + { \ + task = starpu_task_create(); \ + task->handles[0] = handle; \ + task->handles[1] = handle; \ + enum starpu_data_access_mode smode0 = STARPU_##mode0; \ + enum starpu_data_access_mode smode1 = STARPU_##mode0; \ + if (smode0 == STARPU_R && smode1 == STARPU_R) \ + task->cl = &codelet_R_R; \ + else if (smode0 == STARPU_R && smode1 == STARPU_W) \ + task->cl = &codelet_R_W; \ + else if (smode0 == STARPU_R && smode1 == STARPU_RW) \ + task->cl = &codelet_R_RW; \ + else if (smode0 == STARPU_W && smode1 == STARPU_R) \ + task->cl = &codelet_W_R; \ + else if (smode0 == STARPU_W && smode1 == STARPU_W) \ + task->cl = &codelet_W_W; \ + else if (smode0 == STARPU_W && smode1 == STARPU_RW) \ + task->cl = &codelet_W_RW; \ + else if (smode0 == STARPU_RW && smode1 == STARPU_R) \ + task->cl = &codelet_RW_R; \ + else if (smode0 == STARPU_RW && smode1 == STARPU_W) \ + task->cl = &codelet_RW_W; \ + else if (smode0 == STARPU_RW && smode1 == STARPU_RW) \ + task->cl = &codelet_RW_RW; \ + \ + ret = starpu_task_submit(task); \ + if (ret == -ENODEV) goto enodev; \ + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); \ + } + + SUBMIT(R,R); + SUBMIT(R,W); + SUBMIT(R,RW); + SUBMIT(W,R); + SUBMIT(W,W); + SUBMIT(W,RW); + SUBMIT(RW,R); + SUBMIT(RW,W); + SUBMIT(RW,RW); + + ret = starpu_task_wait_for_all(); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); + starpu_data_unregister(handle); + starpu_shutdown(); + + return EXIT_SUCCESS; + +enodev: + starpu_data_unregister(handle); + fprintf(stderr, "WARNING: No one can execute this task\n"); + /* yes, we do not perform the computation but we did detect that no one + * could perform the kernel, so this is not an error from StarPU */ + starpu_shutdown(); + return STARPU_TEST_SKIPPED; +} diff --git a/tests/datawizard/dsm_stress.c b/tests/datawizard/dsm_stress.c new file mode 100644 index 0000000..63215d4 --- /dev/null +++ b/tests/datawizard/dsm_stress.c @@ -0,0 +1,270 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include +#include "../helper.h" +#include + +/* + * Trigger various combinations of access modes + */ + +#ifdef STARPU_QUICK_CHECK +# define N 100 +#else +# define N 10000 +#endif + +#define VECTORSIZE 1024 + +static starpu_pthread_mutex_t mutex = STARPU_PTHREAD_MUTEX_INITIALIZER; +static starpu_pthread_cond_t cond = STARPU_PTHREAD_COND_INITIALIZER; + +static unsigned finished = 0; + +static unsigned cnt = N; + +starpu_data_handle_t v_handle, v_handle2; +static unsigned *v; +static unsigned *v2; + +static void callback(void *arg) +{ + (void)arg; + + unsigned res = STARPU_ATOMIC_ADD(&cnt, -1); + ANNOTATE_HAPPENS_BEFORE(&cnt); + + if (res == 0) + { + ANNOTATE_HAPPENS_AFTER(&cnt); + STARPU_PTHREAD_MUTEX_LOCK(&mutex); + finished = 1; + STARPU_PTHREAD_COND_SIGNAL(&cond); + STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); + } +} + +static void cuda_codelet_null(void *descr[], void *_args) +{ + (void)descr; + (void)_args; +} + +static void opencl_codelet_null(void *descr[], void *_args) +{ + (void)descr; + (void)_args; +} + +void cpu_codelet_null(void *descr[], void *_args) +{ + (void)descr; + (void)_args; +} + +static enum starpu_data_access_mode select_random_mode(void) +{ + int r = rand(); + + switch (r % 3) + { + case 0: + return STARPU_R; + case 1: + return STARPU_W; + case 2: + return STARPU_RW; + }; + return STARPU_RW; +} + +static struct starpu_codelet cl_r_r = +{ + .cpu_funcs = {cpu_codelet_null}, + .cuda_funcs = {cuda_codelet_null}, + .opencl_funcs = {opencl_codelet_null}, + .cpu_funcs_name = {"cpu_codelet_null"}, + .nbuffers = 2, + .modes = {STARPU_R, STARPU_R} +}; + +static struct starpu_codelet cl_r_w = +{ + .cpu_funcs = {cpu_codelet_null}, + .cuda_funcs = {cuda_codelet_null}, + .opencl_funcs = {opencl_codelet_null}, + .cpu_funcs_name = {"cpu_codelet_null"}, + .nbuffers = 2, + .modes = {STARPU_R, STARPU_W} +}; + +static struct starpu_codelet cl_r_rw = +{ + .cpu_funcs = {cpu_codelet_null}, + .cuda_funcs = {cuda_codelet_null}, + .opencl_funcs = {opencl_codelet_null}, + .cpu_funcs_name = {"cpu_codelet_null"}, + .nbuffers = 2, + .modes = {STARPU_R, STARPU_RW} +}; + +static struct starpu_codelet cl_w_r = +{ + .cpu_funcs = {cpu_codelet_null}, + .cuda_funcs = {cuda_codelet_null}, + .opencl_funcs = {opencl_codelet_null}, + .cpu_funcs_name = {"cpu_codelet_null"}, + .nbuffers = 2, + .modes = {STARPU_W, STARPU_R} +}; + +static struct starpu_codelet cl_w_w = +{ + .cpu_funcs = {cpu_codelet_null}, + .cuda_funcs = {cuda_codelet_null}, + .opencl_funcs = {opencl_codelet_null}, + .cpu_funcs_name = {"cpu_codelet_null"}, + .nbuffers = 2, + .modes = {STARPU_W, STARPU_W} +}; + +static struct starpu_codelet cl_w_rw = +{ + .cpu_funcs = {cpu_codelet_null}, + .cuda_funcs = {cuda_codelet_null}, + .opencl_funcs = {opencl_codelet_null}, + .cpu_funcs_name = {"cpu_codelet_null"}, + .nbuffers = 2, + .modes = {STARPU_W, STARPU_RW} +}; + +static struct starpu_codelet cl_rw_r = +{ + .cpu_funcs = {cpu_codelet_null}, + .cuda_funcs = {cuda_codelet_null}, + .opencl_funcs = {opencl_codelet_null}, + .cpu_funcs_name = {"cpu_codelet_null"}, + .nbuffers = 2, + .modes = {STARPU_RW, STARPU_R} +}; + +static struct starpu_codelet cl_rw_w = +{ + .cpu_funcs = {cpu_codelet_null}, + .cuda_funcs = {cuda_codelet_null}, + .opencl_funcs = {opencl_codelet_null}, + .cpu_funcs_name = {"cpu_codelet_null"}, + .nbuffers = 2, + .modes = {STARPU_RW, STARPU_W} +}; + +static struct starpu_codelet cl_rw_rw = +{ + .cpu_funcs = {cpu_codelet_null}, + .cuda_funcs = {cuda_codelet_null}, + .opencl_funcs = {opencl_codelet_null}, + .cpu_funcs_name = {"cpu_codelet_null"}, + .nbuffers = 2, + .modes = {STARPU_RW, STARPU_RW} +}; + + +int main(int argc, char **argv) +{ + int ret; + + ret = starpu_initialize(NULL, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + ret = starpu_malloc((void **)&v, VECTORSIZE*sizeof(unsigned)); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_malloc"); + memset(v, 0, VECTORSIZE*sizeof(unsigned)); + ret = starpu_malloc((void **)&v2, VECTORSIZE*sizeof(unsigned)); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_malloc"); + memset(v2, 0, VECTORSIZE*sizeof(unsigned)); + + starpu_vector_data_register(&v_handle, STARPU_MAIN_RAM, (uintptr_t)v, VECTORSIZE, sizeof(unsigned)); + starpu_vector_data_register(&v_handle2, STARPU_MAIN_RAM, (uintptr_t)v2, VECTORSIZE, sizeof(unsigned)); + + unsigned iter; + for (iter = 0; iter < N; iter++) + { + struct starpu_task *task = starpu_task_create(); + + task->handles[0] = v_handle; + task->handles[1] = v_handle2; + + enum starpu_data_access_mode mode0 = select_random_mode(); + enum starpu_data_access_mode mode1 = select_random_mode(); + + if (mode0 == STARPU_R && mode1 == STARPU_R) + task->cl = &cl_r_r; + else if (mode0 == STARPU_R && mode1 == STARPU_W) + task->cl = &cl_r_w; + else if (mode0 == STARPU_R && mode1 == STARPU_RW) + task->cl = &cl_r_rw; + else if (mode0 == STARPU_W && mode1 == STARPU_R) + task->cl = &cl_w_r; + else if (mode0 == STARPU_W && mode1 == STARPU_W) + task->cl = &cl_w_w; + else if (mode0 == STARPU_W && mode1 == STARPU_RW) + task->cl = &cl_w_rw; + else if (mode0 == STARPU_RW && mode1 == STARPU_R) + task->cl = &cl_rw_r; + else if (mode0 == STARPU_RW && mode1 == STARPU_W) + task->cl = &cl_rw_w; + else if (mode0 == STARPU_RW && mode1 == STARPU_RW) + task->cl = &cl_rw_rw; + + task->callback_func = callback; + task->callback_arg = NULL; + + ret = starpu_task_submit(task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + starpu_do_schedule(); + STARPU_PTHREAD_MUTEX_LOCK(&mutex); + if (!finished) + STARPU_PTHREAD_COND_WAIT(&cond, &mutex); + STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); + + starpu_data_unregister(v_handle); + starpu_data_unregister(v_handle2); + starpu_free_noflag(v, VECTORSIZE*sizeof(unsigned)); + starpu_free_noflag(v2, VECTORSIZE*sizeof(unsigned)); + starpu_shutdown(); + + return EXIT_SUCCESS; + +enodev: + starpu_data_unregister(v_handle); + starpu_data_unregister(v_handle2); + starpu_free_noflag(v, VECTORSIZE*sizeof(unsigned)); + starpu_free_noflag(v2, VECTORSIZE*sizeof(unsigned)); + starpu_shutdown(); + fprintf(stderr, "WARNING: No one can execute this task\n"); + /* yes, we do not perform the computation but we did detect that no one + * could perform the kernel, so this is not an error from StarPU */ + return STARPU_TEST_SKIPPED; +} diff --git a/tests/datawizard/gpu_ptr_register.c b/tests/datawizard/gpu_ptr_register.c new file mode 100644 index 0000000..e22f5e6 --- /dev/null +++ b/tests/datawizard/gpu_ptr_register.c @@ -0,0 +1,298 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../helper.h" +#include "scal.h" + +/* + * Register the GPU buffer to be used for an existing data, and perform + * a partitioned operation + */ + +#if ! (defined(STARPU_USE_OPENCL) || defined(STARPU_USE_CUDA)) +int main(void) +{ + return STARPU_TEST_SKIPPED; +} +#else + +static int +submit_tasks(starpu_data_handle_t handle, int pieces, int n) +{ + int i, ret; + + for (i = 0; i < pieces; i++) + { + struct starpu_task *task = starpu_task_create(); + + task->handles[0] = starpu_data_get_sub_data(handle, 1, i); + task->cl = &scal_codelet; + task->execute_on_a_specific_worker = 1; + task->workerid = i%n; + + ret = starpu_task_submit(task); + if (ret == -ENODEV) + return -ENODEV; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + ret = starpu_task_wait_for_all(); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); + + return 0; +} + +static int +find_a_worker(enum starpu_worker_archtype type) +{ + int worker[STARPU_NMAXWORKERS]; + int ret = starpu_worker_get_ids_by_type(type, worker, STARPU_NMAXWORKERS); + if (ret == 0) + return -ENODEV; + if (ret == -ERANGE) + return worker[STARPU_NMAXWORKERS-1]; + return worker[ret-1]; +} + +static int +check_result(unsigned *t, size_t size) +{ + unsigned i; + for (i = 0; i < size; i++) + { + if (t[i] != i*2) + { + FPRINTF(stderr,"t[%u] is %u instead of %u\n", i, t[i], 2*i); + return 1; + } + } + return 0; +} + +#ifdef STARPU_USE_CUDA +#ifdef STARPU_HAVE_CUDA_MEMCPY_PEER +static int +test_cuda(void) +{ + int ret; + unsigned *foo_gpu; + unsigned *foo; + int n, i, size, pieces; + int devid; + int chosen; + cudaError_t cures; + starpu_data_handle_t handle; + + /* Find a CUDA worker */ + chosen = find_a_worker(STARPU_CUDA_WORKER); + if (chosen == -ENODEV) + return -ENODEV; + + n = starpu_worker_get_count(); + size = 10 * n; + + devid = starpu_worker_get_devid(chosen); + foo_gpu = (void*) starpu_malloc_on_node(starpu_worker_get_memory_node(chosen), size * sizeof(*foo_gpu)); + + foo = calloc(size, sizeof(*foo)); + for (i = 0; i < size; i++) + foo[i] = i; + + starpu_vector_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)foo, size, sizeof(*foo)); + starpu_vector_ptr_register(handle, starpu_worker_get_memory_node(chosen), (uintptr_t)foo_gpu, (uintptr_t)foo_gpu, 0); + + /* Broadcast the data to force in-place partitioning */ + for (i = 0; i < n; i++) + starpu_data_prefetch_on_node(handle, starpu_worker_get_memory_node(i), 0); + + /* Even with just one worker, split in at least two */ + if (n == 1) + pieces = 2; + else + pieces = n; + + struct starpu_data_filter f = + { + .filter_func = starpu_vector_filter_block, + .nchildren = pieces, + }; + + starpu_data_partition(handle, &f); + + ret = submit_tasks(handle, pieces, n); + if (ret == -ENODEV) + return -ENODEV; + + starpu_data_unpartition(handle, starpu_worker_get_memory_node(chosen)); + starpu_data_prefetch_on_node(handle, starpu_worker_get_memory_node(chosen), 0); + starpu_data_unregister(handle); + + starpu_cuda_set_device(devid); + cures = cudaMemcpy(foo, foo_gpu, size * sizeof(*foo_gpu), cudaMemcpyDeviceToHost); + if (!cures) + cures = cudaDeviceSynchronize(); + if (STARPU_UNLIKELY(cures)) + STARPU_CUDA_REPORT_ERROR(cures); + + return check_result(foo, size); +} +#endif +#endif + +#ifdef STARPU_USE_OPENCL +static int +test_opencl(void) +{ + int i; + int ret; + int chosen; + int n; + int size; + int pieces; + cl_mem foo_gpu; + starpu_data_handle_t handle; + + ret = starpu_opencl_load_opencl_from_file("tests/datawizard/scal_opencl.cl", &opencl_program, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); + + /* Find an OpenCL worker */ + chosen = find_a_worker(STARPU_OPENCL_WORKER); + if (chosen == -ENODEV) + return -ENODEV; + + n = starpu_worker_get_count(); + size = 10 * n; + + int devid; + cl_int err; + cl_context context; + cl_command_queue queue; + + devid = starpu_worker_get_devid(chosen); + + starpu_opencl_get_context(devid, &context); + starpu_opencl_get_queue(devid, &queue); + + foo_gpu = (void*) starpu_malloc_on_node(starpu_worker_get_memory_node(chosen), size * sizeof(int)); + + unsigned int *foo = malloc(size*sizeof(*foo)); + for (i = 0; i < size; i++) + foo[i] = i; + + starpu_vector_data_register(&handle, + STARPU_MAIN_RAM, + (uintptr_t)foo, + size, + sizeof(int)); + + starpu_vector_ptr_register(handle, + starpu_worker_get_memory_node(chosen), + (uintptr_t)foo_gpu, + (uintptr_t)foo_gpu, + 0); + + /* Broadcast the data to force in-place partitioning */ + for (i = 0; i < n; i++) + starpu_data_prefetch_on_node(handle, starpu_worker_get_memory_node(i), 0); + + /* Even with just one worker, split in at least two */ + if (n == 1) + pieces = 2; + else + pieces = n; + + struct starpu_data_filter f = + { + .filter_func = starpu_vector_filter_block, + .nchildren = pieces, + }; + + starpu_data_partition(handle, &f); + + ret = submit_tasks(handle, pieces, n); + if (ret == -ENODEV) + return -ENODEV; + + starpu_data_unpartition(handle, starpu_worker_get_memory_node(chosen)); + starpu_data_prefetch_on_node(handle, starpu_worker_get_memory_node(chosen), 0); + starpu_data_unregister(handle); + + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); + ret = starpu_opencl_unload_opencl(&opencl_program); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); + + err = clEnqueueReadBuffer(queue, + foo_gpu, + CL_FALSE, + 0, + size*sizeof(*foo), + foo, + 0, + NULL, + NULL); + if (STARPU_UNLIKELY(err != CL_SUCCESS)) + STARPU_OPENCL_REPORT_ERROR(err); + clFinish(queue); + return check_result(foo, size); +} +#endif /* !STARPU_USE_OPENCL */ + +int main(int argc, char **argv) +{ + int skipped_cuda = 1, skipped_opencl = 1; + int ret; + ret = starpu_initialize(NULL, &argc, &argv); + if (ret == -ENODEV) + return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + +#ifdef STARPU_USE_OPENCL + ret = starpu_opencl_load_opencl_from_file("tests/datawizard/scal_opencl.cl", &opencl_program, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); +#endif + +#ifdef STARPU_USE_CUDA +#ifdef STARPU_HAVE_CUDA_MEMCPY_PEER + ret = test_cuda(); + if (ret == 1) + goto fail; + else if (ret == 0) + skipped_cuda = 0; +#endif +#endif + +#ifdef STARPU_USE_OPENCL + ret = test_opencl(); + if (ret == 1) + goto fail; + else if (ret == 0) + skipped_opencl = 0; +#endif + + starpu_shutdown(); + + if (skipped_cuda == 1 && skipped_opencl == 1) + return STARPU_TEST_SKIPPED; + + return EXIT_SUCCESS; + +fail: + starpu_shutdown(); + return EXIT_FAILURE; +} + +#endif /* defined(STARPU_USE_OPENCL) || defined(STARPU_USE_CUDA) */ diff --git a/tests/datawizard/gpu_register.c b/tests/datawizard/gpu_register.c new file mode 100644 index 0000000..8ef00c8 --- /dev/null +++ b/tests/datawizard/gpu_register.c @@ -0,0 +1,322 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../helper.h" +#include "scal.h" + +/* + * Register a handle from a GPU buffer, and performs a partitioned operation + */ + +#if ! (defined(STARPU_USE_OPENCL) || defined(STARPU_USE_CUDA)) +int main(void) +{ + return STARPU_TEST_SKIPPED; +} +#else + +static int +submit_tasks(starpu_data_handle_t handle, int pieces, int n) +{ + int i, ret; + + for (i = 0; i < pieces; i++) + { + struct starpu_task *task = starpu_task_create(); + + task->handles[0] = starpu_data_get_sub_data(handle, 1, i); + task->cl = &scal_codelet; + task->execute_on_a_specific_worker = 1; + task->workerid = i%n; + + ret = starpu_task_submit(task); + if (ret == -ENODEV) + return -ENODEV; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + ret = starpu_task_wait_for_all(); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); + + return 0; +} + +static int +find_a_worker(enum starpu_worker_archtype type) +{ + int worker[STARPU_NMAXWORKERS]; + int ret = starpu_worker_get_ids_by_type(type, worker, STARPU_NMAXWORKERS); + if (ret == 0) + return -ENODEV; + if (ret == -ERANGE) + return worker[STARPU_NMAXWORKERS-1]; + return worker[ret-1]; +} + +static int +check_result(unsigned *t, size_t size) +{ + unsigned i; + for (i = 0; i < size; i++) + { + if (t[i] != i*2) + { + FPRINTF(stderr,"t[%u] is %u instead of %u\n", i, t[i], 2*i); + return 1; + } + } + return 0; +} + +#ifdef STARPU_USE_CUDA +#ifdef STARPU_HAVE_CUDA_MEMCPY_PEER +static int +test_cuda(void) +{ + int ret; + unsigned *foo_gpu; + unsigned *foo; + int n, i, size, pieces; + int devid; + int chosen; + cudaError_t cures; + starpu_data_handle_t handle; + + /* Find a CUDA worker */ + chosen = find_a_worker(STARPU_CUDA_WORKER); + if (chosen == -ENODEV) + return -ENODEV; + + n = starpu_worker_get_count(); + size = 10 * n; + + devid = starpu_worker_get_devid(chosen); + foo_gpu = (void*) starpu_malloc_on_node(starpu_worker_get_memory_node(chosen), size * sizeof(*foo_gpu)); + + foo = calloc(size, sizeof(*foo)); + for (i = 0; i < size; i++) + foo[i] = i; + + cures = cudaMemcpy(foo_gpu, foo, size * sizeof(*foo_gpu), cudaMemcpyHostToDevice); + if (!cures) + cures = cudaDeviceSynchronize(); + if (STARPU_UNLIKELY(cures)) + STARPU_CUDA_REPORT_ERROR(cures); + + starpu_vector_data_register(&handle, starpu_worker_get_memory_node(chosen), (uintptr_t)foo_gpu, size, sizeof(*foo_gpu)); + + /* Broadcast the data to force in-place partitioning */ + for (i = 0; i < n; i++) + starpu_data_prefetch_on_node(handle, starpu_worker_get_memory_node(i), 0); + + /* Even with just one worker, split in at least two */ + if (n == 1) + pieces = 2; + else + pieces = n; + + struct starpu_data_filter f = + { + .filter_func = starpu_vector_filter_block, + .nchildren = pieces, + }; + + starpu_data_partition(handle, &f); + + ret = submit_tasks(handle, pieces, n); + if (ret == -ENODEV) + { + starpu_free_on_node(starpu_worker_get_memory_node(chosen), (uintptr_t) foo_gpu, size * sizeof(*foo_gpu)); + free(foo); + return -ENODEV; + } + + starpu_data_unpartition(handle, starpu_worker_get_memory_node(chosen)); + starpu_data_unregister(handle); + + starpu_cuda_set_device(devid); + cures = cudaMemcpy(foo, foo_gpu, size * sizeof(*foo_gpu), cudaMemcpyDeviceToHost); + if (!cures) + cures = cudaDeviceSynchronize(); + if (STARPU_UNLIKELY(cures)) + { + starpu_free_on_node(starpu_worker_get_memory_node(chosen), (uintptr_t) foo_gpu, size * sizeof(*foo_gpu)); + free(foo); + STARPU_CUDA_REPORT_ERROR(cures); + return 1; + } + + ret = check_result(foo, size); + starpu_free_on_node(starpu_worker_get_memory_node(chosen), (uintptr_t) foo_gpu, size * sizeof(*foo_gpu)); + free(foo); + return ret; +} +#endif +#endif + +#ifdef STARPU_USE_OPENCL +static int +test_opencl(void) +{ + int i; + int ret; + int chosen; + int n; + int size; + int pieces; + cl_mem foo_gpu; + starpu_data_handle_t handle; + + ret = starpu_opencl_load_opencl_from_file("tests/datawizard/scal_opencl.cl", &opencl_program, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); + + /* Find an OpenCL worker */ + chosen = find_a_worker(STARPU_OPENCL_WORKER); + if (chosen == -ENODEV) + return -ENODEV; + + n = starpu_worker_get_count(); + size = 10 * n; + + int devid; + cl_int err; + cl_context context; + cl_command_queue queue; + + devid = starpu_worker_get_devid(chosen); + + starpu_opencl_get_context(devid, &context); + starpu_opencl_get_queue(devid, &queue); + + foo_gpu = (void*) starpu_malloc_on_node(starpu_worker_get_memory_node(chosen), size * sizeof(int)); + + unsigned int *foo = malloc(size*sizeof(*foo)); + for (i = 0; i < size; i++) + foo[i] = i; + + err = clEnqueueWriteBuffer(queue, + foo_gpu, + CL_FALSE, + 0, + size*sizeof(int), + foo, + 0, + NULL, + NULL); + if (STARPU_UNLIKELY(err != CL_SUCCESS)) + STARPU_OPENCL_REPORT_ERROR(err); + clFinish(queue); + + starpu_vector_data_register(&handle, + starpu_worker_get_memory_node(chosen), + (uintptr_t)foo_gpu, + size, + sizeof(int)); + + /* Broadcast the data to force in-place partitioning */ + for (i = 0; i < n; i++) + starpu_data_prefetch_on_node(handle, starpu_worker_get_memory_node(i), 0); + + /* Even with just one worker, split in at least two */ + if (n == 1) + pieces = 2; + else + pieces = n; + + struct starpu_data_filter f = + { + .filter_func = starpu_vector_filter_block, + .nchildren = pieces, + }; + + starpu_data_partition(handle, &f); + + ret = submit_tasks(handle, pieces, n); + if (ret == -ENODEV) + return -ENODEV; + + starpu_data_unpartition(handle, starpu_worker_get_memory_node(chosen)); + starpu_data_unregister(handle); + + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); + ret = starpu_opencl_unload_opencl(&opencl_program); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); + + err = clEnqueueReadBuffer(queue, + foo_gpu, + CL_FALSE, + 0, + size*sizeof(*foo), + foo, + 0, + NULL, + NULL); + if (STARPU_UNLIKELY(err != CL_SUCCESS)) + STARPU_OPENCL_REPORT_ERROR(err); + clFinish(queue); + ret = check_result(foo, size); + starpu_free_on_node(starpu_worker_get_memory_node(chosen), (uintptr_t) foo_gpu, size * sizeof(int)); + free(foo); + return ret; +} +#endif /* !STARPU_USE_OPENCL */ + +int main(int argc, char **argv) +{ + int skipped_cuda = 1, skipped_opencl = 1; + int ret; + ret = starpu_initialize(NULL, &argc, &argv); + if (ret == -ENODEV) + return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + +#ifdef STARPU_USE_OPENCL + ret = starpu_opencl_load_opencl_from_file("tests/datawizard/scal_opencl.cl", &opencl_program, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); +#endif + +#ifdef STARPU_USE_CUDA +#ifdef STARPU_HAVE_CUDA_MEMCPY_PEER + ret = test_cuda(); + if (ret == 1) + goto fail; + else if (ret == 0) + skipped_cuda = 0; +#endif +#endif + +#ifdef STARPU_USE_OPENCL + ret = test_opencl(); + if (ret == 1) + goto fail; + else if (ret == 0) + skipped_opencl = 0; +#endif + + starpu_shutdown(); + + if (skipped_cuda == 1 && skipped_opencl == 1) + return STARPU_TEST_SKIPPED; + + return EXIT_SUCCESS; + +fail: + starpu_shutdown(); + return EXIT_FAILURE; +} + +#endif /* defined(STARPU_USE_OPENCL) || defined(STARPU_USE_CUDA) */ diff --git a/tests/datawizard/handle_to_pointer.c b/tests/datawizard/handle_to_pointer.c new file mode 100644 index 0000000..19d02d1 --- /dev/null +++ b/tests/datawizard/handle_to_pointer.c @@ -0,0 +1,187 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#undef NDEBUG +#include + +#include +#include +#include "../helper.h" + +/* + * Test the value returned by starpu_handle_to_pointer + */ + +void cpu_task(void **buffers, void *args) +{ + int *numbers; + int i; + int size; + + numbers = (int *) STARPU_VECTOR_GET_PTR(buffers[0]); + starpu_codelet_unpack_args (args, &size); + + for(i = 0; i < (int)size; i++) + { + numbers[i] = i; + } +} + +#ifdef STARPU_USE_CUDA +static void cuda_task(void **buffers, void *args) +{ + int *numbers; + int i; + int size; + + numbers = (int *)STARPU_VECTOR_GET_PTR(buffers[0]); + starpu_codelet_unpack_args (args, &size); + + for(i = 0; i < (int)size; i++) + { + cudaMemcpyAsync(&numbers[i], &i, sizeof(int), cudaMemcpyHostToDevice, starpu_cuda_get_local_stream()); + } +} +#endif + +#ifdef STARPU_USE_OPENCL +static void opencl_task(void *buffers[], void *args) +{ + (void)args; + cl_command_queue queue; + int id = starpu_worker_get_id_check(); + int devid = starpu_worker_get_devid(id); + starpu_opencl_get_queue(devid, &queue); + + cl_mem numbers = (cl_mem) STARPU_VECTOR_GET_DEV_HANDLE(buffers[0]); + unsigned size = STARPU_VECTOR_GET_NX(buffers[0]); + + unsigned i; + for (i = 0; i < size; i++) + { + cl_int err; + err = clEnqueueWriteBuffer(queue, + numbers, + CL_TRUE, + i*sizeof(int), /* offset */ + sizeof(int), + &i, + 0, /* num_events_in_wait_list */ + NULL, /* event_wait_list */ + NULL /* event */); + if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err); + + } +} +#endif + +static struct starpu_codelet cl = +{ + .cpu_funcs = {cpu_task}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {cuda_task}, + .cuda_flags = {STARPU_CUDA_ASYNC}, +#endif +#ifdef STARPU_USE_OPENCL + .opencl_funcs = {opencl_task}, + .opencl_flags = {STARPU_OPENCL_ASYNC}, +#endif + .cpu_funcs_name = {"cpu_task"}, + .nbuffers = 1, + .modes = {STARPU_W} +}; + +int main(int argc, char *argv[]) +{ + int err, ret; + int *pointer; + starpu_data_handle_t handle; + static const int count = 123; + + ret = starpu_initialize(NULL, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + + err = starpu_malloc((void **)&pointer, count * sizeof(int)); + STARPU_ASSERT((err == 0) && (pointer != NULL)); + + starpu_variable_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)pointer, + sizeof(int)); + STARPU_ASSERT(starpu_data_handle_to_pointer(handle, STARPU_MAIN_RAM) == pointer); + starpu_data_unregister(handle); + + starpu_vector_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)pointer, + count, sizeof(int)); + STARPU_ASSERT(starpu_data_handle_to_pointer(handle, STARPU_MAIN_RAM) == pointer); + starpu_data_unregister(handle); + + starpu_matrix_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)pointer, count, + count, 1, sizeof(int)); + STARPU_ASSERT(starpu_data_handle_to_pointer(handle, STARPU_MAIN_RAM) == pointer); + starpu_data_unregister(handle); + + starpu_free_noflag(pointer, count * sizeof(int)); + pointer = NULL; + + /* Lazy allocation. */ + starpu_vector_data_register(&handle, -1, 0 /* NULL */, + count, sizeof(int)); + STARPU_ASSERT(starpu_data_handle_to_pointer(handle, STARPU_MAIN_RAM) == NULL); + + /* Pass the handle to a task. */ + err = starpu_task_insert(&cl, + STARPU_W, handle, + STARPU_VALUE, &count, sizeof(count), + 0); + if (err == -ENODEV) + { + ret = STARPU_TEST_SKIPPED; + goto out; + } + + /* Acquire the handle, forcing a local allocation. */ + starpu_data_acquire(handle, STARPU_R); + + /* Make sure we have a local pointer to it. */ + ret = EXIT_SUCCESS; + pointer = (int *) starpu_data_handle_to_pointer(handle, STARPU_MAIN_RAM); + if (pointer == NULL) + { + FPRINTF(stderr, "pointer should be non NULL\n"); + ret = EXIT_FAILURE; + } + else + { + int i; + for(i = 0; i < count; i++) + { + int *numbers = (int *)pointer; + if (numbers[i] != i) + { + FPRINTF(stderr, "Incorrect value numbers[%d] == %d should be %d\n", (int)i, numbers[i], (int)i); + ret = EXIT_FAILURE; + } + } + } + + starpu_data_release(handle); + +out: + starpu_data_unregister(handle); + + starpu_shutdown(); + + return ret; +} diff --git a/tests/datawizard/in_place_partition.c b/tests/datawizard/in_place_partition.c new file mode 100644 index 0000000..d38a4ae --- /dev/null +++ b/tests/datawizard/in_place_partition.c @@ -0,0 +1,120 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../helper.h" +#include "scal.h" + +/* + * Trigger in-place partitioning by prefetching the whole data before + * partitioning + */ + +int main(int argc, char **argv) +{ + unsigned *foo; + starpu_data_handle_t handle; + int ret; + unsigned n, i, size; + + struct starpu_conf conf; + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + + conf.ncpus = -1; + conf.ncuda = -1; + conf.nopencl = -1; + conf.nmpi_ms = -1; + conf.ntcpip_ms = -1; + + ret = starpu_initialize(&conf, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + +#ifdef STARPU_USE_OPENCL + ret = starpu_opencl_load_opencl_from_file("tests/datawizard/scal_opencl.cl", &opencl_program, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); +#endif + + n = starpu_worker_get_count(); + if (n == 1) + { + starpu_shutdown(); + return STARPU_TEST_SKIPPED; + } + + size = 10 * n; + + foo = (unsigned *) calloc(size, sizeof(*foo)); + for (i = 0; i < size; i++) + foo[i] = i; + + starpu_vector_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)foo, size, sizeof(*foo)); + + /* Broadcast the data to force in-place partitioning */ + for (i = 0; i < n; i++) + starpu_data_prefetch_on_node(handle, starpu_worker_get_memory_node(i), 0); + + struct starpu_data_filter f = + { + .filter_func = starpu_vector_filter_block, + .nchildren = n, + }; + + starpu_data_partition(handle, &f); + + for (i = 0; i < f.nchildren; i++) + { + struct starpu_task *task = starpu_task_create(); + + task->handles[0] = starpu_data_get_sub_data(handle, 1, i); + task->cl = &scal_codelet; + task->execute_on_a_specific_worker = 1; + task->workerid = i; + + ret = starpu_task_submit(task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + ret = starpu_task_wait_for_all(); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); + + starpu_data_unpartition(handle, STARPU_MAIN_RAM); + starpu_data_unregister(handle); + starpu_shutdown(); + + ret = EXIT_SUCCESS; + for (i = 0; i < size; i++) + { + if (foo[i] != i*2) + { + FPRINTF(stderr,"value %u is %u instead of %u\n", i, foo[i], 2*i); + ret = EXIT_FAILURE; + } + } + free(foo); + + return ret; + +enodev: + starpu_data_unregister(handle); + fprintf(stderr, "WARNING: No one can execute this task\n"); + /* yes, we do not perform the computation but we did detect that no one + * could perform the kernel, so this is not an error from StarPU */ + starpu_shutdown(); + return STARPU_TEST_SKIPPED; +} diff --git a/tests/datawizard/increment_init.c b/tests/datawizard/increment_init.c new file mode 100644 index 0000000..0fcb870 --- /dev/null +++ b/tests/datawizard/increment_init.c @@ -0,0 +1,99 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../helper.h" +#include "../variable/increment.h" + +/* + * Check that the initializer passed to starpu_data_set_reduction_methods + * is used to initialize a handle when it is registered from NULL, and when + * starpu_data_invalidate is called + */ + +int main(void) +{ + unsigned *pvar = NULL; + int ret; + starpu_data_handle_t handle; + + ret = starpu_init(NULL); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + increment_load_opencl(); + + starpu_variable_data_register(&handle, -1, 0, sizeof(unsigned)); + + starpu_data_set_reduction_methods(handle, NULL, &neutral_cl); + +#ifdef STARPU_QUICK_CHECK + unsigned ntasks = 32; + unsigned nloops = 4; +#else + unsigned ntasks = 1024; + unsigned nloops = 16; +#endif + + unsigned loop; + unsigned t; + + for (loop = 0; loop < nloops; loop++) + { + for (t = 0; t < ntasks; t++) + { + struct starpu_task *task = starpu_task_create(); + + task->cl = &increment_cl; + task->handles[0] = handle; + + ret = starpu_task_submit(task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + ret = starpu_data_acquire(handle, STARPU_R); + pvar = starpu_data_handle_to_pointer(handle, 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_acquire"); + if (*pvar != ntasks) + { + FPRINTF(stderr, "[end of loop] Value %u != Expected value %u\n", *pvar, ntasks); + starpu_data_release(handle); + starpu_data_unregister(handle); + goto err; + } + starpu_data_release(handle); + starpu_data_invalidate(handle); + } + + starpu_data_unregister(handle); + increment_unload_opencl(); + starpu_shutdown(); + + return EXIT_SUCCESS; + +enodev: + starpu_data_unregister(handle); + fprintf(stderr, "WARNING: No one can execute this task\n"); + /* yes, we do not perform the computation but we did detect that no one + * could perform the kernel, so this is not an error from StarPU */ + starpu_shutdown(); + return STARPU_TEST_SKIPPED; + +err: + starpu_shutdown(); + return EXIT_FAILURE; +} diff --git a/tests/datawizard/increment_redux.c b/tests/datawizard/increment_redux.c new file mode 100644 index 0000000..ad8c115 --- /dev/null +++ b/tests/datawizard/increment_redux.c @@ -0,0 +1,106 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../helper.h" +#include "../variable/increment.h" + +/* + * Check that STARPU_REDUX works with a mere incrementation + */ +int main(int argc, char **argv) +{ + int ret; + unsigned var = 0; + starpu_data_handle_t handle; + + /* Not supported yet */ + if (starpu_getenv_number_default("STARPU_GLOBAL_ARBITER", 0) > 0) + return STARPU_TEST_SKIPPED; + + ret = starpu_initialize(NULL, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + increment_load_opencl(); + + starpu_variable_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)&var, sizeof(unsigned)); + + starpu_data_set_reduction_methods(handle, &redux_cl, &neutral_cl); + +#ifdef STARPU_QUICK_CHECK + unsigned ntasks = 32; + unsigned nloops = 4; +#else + unsigned ntasks = 1024; + unsigned nloops = 16; +#endif + + unsigned loop; + unsigned t; + + for (loop = 0; loop < nloops; loop++) + { + for (t = 0; t < ntasks; t++) + { + struct starpu_task *task = starpu_task_create(); + + task->cl = &increment_redux_cl; + task->handles[0] = handle; + + ret = starpu_task_submit(task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + ret = starpu_data_acquire(handle, STARPU_R); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_acquire"); + if (var != ntasks * (loop+1)) + { + FPRINTF(stderr, "[end of loop] Value %u != Expected value %u\n", var, ntasks * (loop+1)); + starpu_data_release(handle); + starpu_data_unregister(handle); + goto err; + } + starpu_data_release(handle); + } + + starpu_data_unregister(handle); + if (var != ntasks * nloops) + { + FPRINTF(stderr, "Value %u != Expected value %u\n", var, ntasks * (loop+1)); + goto err; + } + + increment_unload_opencl(); + + starpu_shutdown(); + + return EXIT_SUCCESS; + +enodev: + starpu_data_unregister(handle); + fprintf(stderr, "WARNING: No one can execute this task\n"); + /* yes, we do not perform the computation but we did detect that no one + * could perform the kernel, so this is not an error from StarPU */ + starpu_shutdown(); + return STARPU_TEST_SKIPPED; + +err: + starpu_shutdown(); + STARPU_RETURN(EXIT_FAILURE); + +} diff --git a/tests/datawizard/increment_redux_lazy.c b/tests/datawizard/increment_redux_lazy.c new file mode 100644 index 0000000..14ca3f7 --- /dev/null +++ b/tests/datawizard/increment_redux_lazy.c @@ -0,0 +1,116 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../helper.h" +#include "../variable/increment.h" + +/* + * Check that STARPU_REDUX works with a mere incrementation, but without + * initializing the variable + */ + +int main(int argc, char **argv) +{ + int ret; + unsigned *var; + starpu_data_handle_t handle; + + /* Not supported yet */ + if (starpu_getenv_number_default("STARPU_GLOBAL_ARBITER", 0) > 0) + return STARPU_TEST_SKIPPED; + + ret = starpu_initialize(NULL, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + if (starpu_cpu_worker_get_count() + starpu_cuda_worker_get_count() + starpu_opencl_worker_get_count() + starpu_hip_worker_get_count() == 0) + { + starpu_shutdown(); + return STARPU_TEST_SKIPPED; + } + + increment_load_opencl(); + + starpu_variable_data_register(&handle, -1, (uintptr_t)NULL, sizeof(unsigned)); + + starpu_data_set_reduction_methods(handle, &redux_cl, &neutral_cl); + +#ifdef STARPU_QUICK_CHECK + unsigned ntasks = 32; + unsigned nloops = 4; +#else + unsigned ntasks = 1024; + unsigned nloops = 16; +#endif + + unsigned loop; + unsigned t; + + for (loop = 0; loop < nloops; loop++) + { + for (t = 0; t < ntasks; t++) + { + struct starpu_task *task = starpu_task_create(); + + task->cl = &increment_redux_cl; + task->handles[0] = handle; + + ret = starpu_task_submit(task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + ret = starpu_data_acquire(handle, STARPU_R); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_acquire"); + var = (unsigned*) starpu_variable_get_local_ptr(handle); + starpu_data_release(handle); + + if (*var != ntasks*(loop + 1)) + { + ret = EXIT_FAILURE; + FPRINTF(stderr, "[end of loop] Value %u != Expected value %u\n", *var, ntasks * (loop+1)); + goto err; + } + } + + ret = starpu_data_acquire(handle, STARPU_R); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_acquire"); + var = (unsigned*) starpu_variable_get_local_ptr(handle); + + if (*var != ntasks*nloops) + { + ret = EXIT_FAILURE; + FPRINTF(stderr, "Value %u != Expected value %u\n", *var, ntasks * (loop+1)); + goto err; + } + + starpu_data_release(handle); + starpu_data_unregister(handle); + + increment_unload_opencl(); + +err: + starpu_shutdown(); + STARPU_RETURN(ret); + +enodev: + starpu_data_unregister(handle); + fprintf(stderr, "WARNING: No one can execute this task\n"); + /* yes, we do not perform the computation but we did detect that no one + * could perform the kernel, so this is not an error from StarPU */ + starpu_shutdown(); + STARPU_RETURN(STARPU_TEST_SKIPPED); +} diff --git a/tests/datawizard/increment_redux_partition.c b/tests/datawizard/increment_redux_partition.c new file mode 100644 index 0000000..be8128f --- /dev/null +++ b/tests/datawizard/increment_redux_partition.c @@ -0,0 +1,123 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../helper.h" +#include "../variable/increment.h" + +/* + * Check that STARPU_REDUX works with a mere incrementation and on partitioned data + */ +#define N 8 +int main(int argc, char **argv) +{ + int ret; + unsigned vec[N] = {}; + unsigned i; + int status; + starpu_data_handle_t handle; + + /* Not supported yet */ + if (starpu_getenv_number_default("STARPU_GLOBAL_ARBITER", 0) > 0) + return STARPU_TEST_SKIPPED; + + ret = starpu_initialize(NULL, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + increment_load_opencl(); + + for (i = 0; i < N; i++) + vec[i] = i; + starpu_vector_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)&vec, N, sizeof(unsigned)); + + /* Partition the vector in PARTS sub-variables */ + struct starpu_data_filter f = + { + .filter_func = starpu_vector_filter_pick_variable, + .filter_arg_ptr = (void*)(uintptr_t) 0, + .nchildren = N, + /* the children use a variable interface*/ + .get_child_ops = starpu_vector_filter_pick_variable_child_ops + }; + starpu_data_partition(handle, &f); + + for (i = 0; i < N; i++) + { + starpu_data_handle_t sub_handle = starpu_data_get_sub_data(handle, 1, i); + starpu_data_set_reduction_methods(sub_handle, &redux_cl, &neutral_cl); + } + +#ifdef STARPU_QUICK_CHECK + unsigned ntasks = 32; +#else + unsigned ntasks = 1024; +#endif + + unsigned t; + + for (i = 0; i < N; i++) + { + starpu_data_handle_t sub_handle = starpu_data_get_sub_data(handle, 1, i); + + for (t = 0; t < ntasks; t++) + { + struct starpu_task *task = starpu_task_create(); + + task->cl = &increment_redux_cl; + task->handles[0] = sub_handle; + + ret = starpu_task_submit(task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + } + + starpu_data_unpartition(handle, STARPU_MAIN_RAM); + + status = EXIT_SUCCESS; + starpu_data_acquire(handle, STARPU_R); + for (i = 0; i < N; i++) + { + if (vec[i] != i + ntasks) + { + FPRINTF(stderr, "[end of loop] Value %u != Expected value %u\n", vec[i], ntasks); + status = EXIT_FAILURE; + } + } + starpu_data_release(handle); + + starpu_data_unregister(handle); + + increment_unload_opencl(); + + starpu_shutdown(); + + return EXIT_SUCCESS; + +enodev: + starpu_data_unregister(handle); + fprintf(stderr, "WARNING: No one can execute this task\n"); + /* yes, we do not perform the computation but we did detect that no one + * could perform the kernel, so this is not an error from StarPU */ + starpu_shutdown(); + return STARPU_TEST_SKIPPED; + +err: + starpu_shutdown(); + STARPU_RETURN(EXIT_FAILURE); + +} diff --git a/tests/datawizard/increment_redux_v2.c b/tests/datawizard/increment_redux_v2.c new file mode 100644 index 0000000..9183194 --- /dev/null +++ b/tests/datawizard/increment_redux_v2.c @@ -0,0 +1,115 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../helper.h" +#include "../variable/increment.h" + +/* + * Check that STARPU_REDUX works with a mere incrementation, but + * intermixing with non-REDUX accesses + */ + +int main(int argc, char **argv) +{ + int ret; + unsigned var = 0; + starpu_data_handle_t handle; + + /* Not supported yet */ + if (starpu_getenv_number_default("STARPU_GLOBAL_ARBITER", 0) > 0) + return STARPU_TEST_SKIPPED; + + ret = starpu_initialize(NULL, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + increment_load_opencl(); + + starpu_variable_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)&var, sizeof(unsigned)); + + starpu_data_set_reduction_methods(handle, &redux_cl, &neutral_cl); + +#ifdef STARPU_QUICK_CHECK + unsigned ntasks = 32; + unsigned nloops = 4; +#else + unsigned ntasks = 1024; + unsigned nloops = 16; +#endif + + unsigned loop; + unsigned t; + + for (loop = 0; loop < nloops; loop++) + { + for (t = 0; t < ntasks; t++) + { + struct starpu_task *task = starpu_task_create(); + + if (t % 10 == 0) + { + task->cl = &increment_cl; + } + else + { + task->cl = &increment_redux_cl; + } + task->handles[0] = handle; + + ret = starpu_task_submit(task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + ret = starpu_data_acquire(handle, STARPU_R); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_acquire"); + if (var != ntasks * (loop+1)) + { + FPRINTF(stderr, "[end of loop] Value %u != Expected value %u\n", var, ntasks * (loop+1)); + starpu_data_release(handle); + starpu_data_unregister(handle); + goto err; + } + starpu_data_release(handle); + } + + starpu_data_unregister(handle); + if (var != ntasks * nloops) + { + FPRINTF(stderr, "Value %u != Expected value %u\n", var, ntasks * (loop+1)); + goto err; + } + + increment_unload_opencl(); + + starpu_shutdown(); + + return EXIT_SUCCESS; + +enodev: + starpu_data_unregister(handle); + fprintf(stderr, "WARNING: No one can execute this task\n"); + /* yes, we do not perform the computation but we did detect that no one + * could perform the kernel, so this is not an error from StarPU */ + starpu_shutdown(); + return STARPU_TEST_SKIPPED; + +err: + starpu_shutdown(); + STARPU_RETURN(EXIT_FAILURE); + +} diff --git a/tests/datawizard/increment_redux_with_args.c b/tests/datawizard/increment_redux_with_args.c new file mode 100644 index 0000000..5c98920 --- /dev/null +++ b/tests/datawizard/increment_redux_with_args.c @@ -0,0 +1,88 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../helper.h" +#include "../variable/increment.h" + +void redux_with_args_cpu(void *descr[], void *arg) +{ + int *value = (int *)arg; + unsigned *dst = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[0]); + unsigned *src = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[1]); + *dst = *dst + *src + *value; +} + +struct starpu_codelet redux_with_args_cl = +{ + .modes = {STARPU_RW|STARPU_COMMUTE, STARPU_R}, + .nbuffers = 2, + .cpu_funcs = {redux_with_args_cpu}, +}; + +int main(int argc, char **argv) +{ + int ret; + unsigned var = 0; + starpu_data_handle_t handle; + unsigned value = 42; + + /* Not supported yet */ + if (starpu_getenv_number_default("STARPU_GLOBAL_ARBITER", 0) > 0) + return STARPU_TEST_SKIPPED; + + ret = starpu_initialize(NULL, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + if (starpu_cpu_worker_get_count() == 0) + { + FPRINTF(stderr, "we need 1 cpu worker\n"); + starpu_shutdown(); + return STARPU_TEST_SKIPPED; + } + + increment_load_opencl(); + + starpu_variable_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)&var, sizeof(unsigned)); + starpu_data_set_reduction_methods_with_args(handle, &redux_with_args_cl, &value, &neutral_cl, NULL); + ret = starpu_task_insert(&increment_redux_cl, STARPU_REDUX, handle, 0); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + starpu_data_unregister(handle); + + if (var != value+1) + { + FPRINTF(stderr, "Value %u != Expected value %u\n", var, value+1); + goto err; + } + + increment_unload_opencl(); + starpu_shutdown(); + return EXIT_SUCCESS; + +enodev: + starpu_data_unregister(handle); + fprintf(stderr, "WARNING: No one can execute this task\n"); + /* yes, we do not perform the computation but we did detect that no one + * could perform the kernel, so this is not an error from StarPU */ + starpu_shutdown(); + return STARPU_TEST_SKIPPED; + +err: + starpu_shutdown(); + STARPU_RETURN(EXIT_FAILURE); +} diff --git a/tests/datawizard/interfaces/bcsr/bcsr_cuda.cu b/tests/datawizard/interfaces/bcsr/bcsr_cuda.cu new file mode 100644 index 0000000..c62e826 --- /dev/null +++ b/tests/datawizard/interfaces/bcsr/bcsr_cuda.cu @@ -0,0 +1,75 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ +#include +#include "../test_interfaces.h" + +extern struct test_config bcsr_config; + +__global__ void bcsr_cuda(int *nzval, uint32_t nnz, int *err, int factor) +{ + unsigned i = blockIdx.x*blockDim.x + threadIdx.x; + + if (i >= nnz) + return; + + if (nzval[i] != i*factor) + *err = 1; + else + nzval[i] = -nzval[i]; +} + +extern "C" void test_bcsr_cuda_func(void *buffers[], void *args) +{ + int factor; + int *ret; + int *val; + cudaError_t error; + uint32_t nnz = STARPU_BCSR_GET_NNZ(buffers[0]); + uint32_t r = ((struct starpu_bcsr_interface *)buffers[0])->r; + uint32_t c = ((struct starpu_bcsr_interface *)buffers[0])->c; + nnz *= (r*c); + unsigned threads_per_block = 64; + unsigned nblocks = (nnz + threads_per_block-1) / threads_per_block; + + factor = *(int *) args; + val = (int *) STARPU_BCSR_GET_NZVAL(buffers[0]); + + error = cudaMalloc(&ret, sizeof(int)); + if (error != cudaSuccess) + STARPU_CUDA_REPORT_ERROR(error); + + error = cudaMemcpyAsync(ret, + &bcsr_config.copy_failed, + sizeof(int), + cudaMemcpyHostToDevice, starpu_cuda_get_local_stream()); + if (error != cudaSuccess) + STARPU_CUDA_REPORT_ERROR(error); + + bcsr_cuda<<>> + (val, nnz, ret, factor); + error = cudaGetLastError(); + if (error != cudaSuccess) STARPU_CUDA_REPORT_ERROR(error); + + error = cudaMemcpyAsync(&bcsr_config.copy_failed, + ret, + sizeof(int), + cudaMemcpyDeviceToHost, starpu_cuda_get_local_stream()); + if (error != cudaSuccess) + STARPU_CUDA_REPORT_ERROR(error); + + cudaFree(ret); + cudaStreamSynchronize(starpu_cuda_get_local_stream()); +} diff --git a/tests/datawizard/interfaces/bcsr/bcsr_interface.c b/tests/datawizard/interfaces/bcsr/bcsr_interface.c new file mode 100644 index 0000000..1a14332 --- /dev/null +++ b/tests/datawizard/interfaces/bcsr/bcsr_interface.c @@ -0,0 +1,202 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../test_interfaces.h" +#include "../../../helper.h" + +/* + * In this test, we use the following matrix: + * + * +----------------+ + * | 0 1 0 0 | + * | 2 3 0 0 | + * | 4 5 8 9 | + * | 6 7 10 11 | + * +----------------+ + * + * nzval = [0, 1, 2, 3] ++ [4, 5, 6, 7] ++ [8, 9, 10, 11] + * colind = [0, 0, 1] + * rowptr = [0, 1, 3 ] + * r = c = 2 + */ + +/* Size of the blocks */ +#define R 2 +#define C 2 + +#define NNZ_BLOCKS 3 /* out of 4 */ +#define NZVAL_SIZE (R*C*NNZ_BLOCKS) + +#define NROWS 2 + +#ifdef STARPU_USE_CPU +void test_bcsr_cpu_func(void *buffers[], void *args); +#endif /* !STARPU_USE_CPU */ +#ifdef STARPU_USE_CUDA +extern void test_bcsr_cuda_func(void *buffers[], void *_args); +#endif /* !STARPU_USE_CUDA */ +#ifdef STARPU_USE_OPENCL +extern void test_bcsr_opencl_func(void *buffers[], void *args); +#endif /* !STARPU_USE_OPENCL */ + + +static int nzval[NZVAL_SIZE] = +{ + 0, 1, 2, 3, /* First block */ + 4, 5, 6, 7, /* Second block */ + 8, 9, 10, 11 /* Third block */ +}; +static int nzval2[NZVAL_SIZE]; + +static uint32_t colind[NNZ_BLOCKS] = { 0, 0, 1 }; +static uint32_t colind2[NNZ_BLOCKS]; + +static uint32_t rowptr[NROWS+1] = { 0, 1, NNZ_BLOCKS }; +static uint32_t rowptr2[NROWS+1] = { 0, 0, NNZ_BLOCKS }; + +static starpu_data_handle_t bcsr_handle; +static starpu_data_handle_t bcsr2_handle; + + +struct test_config bcsr_config = +{ +#ifdef STARPU_USE_CPU + .cpu_func = test_bcsr_cpu_func, +#endif /* !STARPU_USE_CPU */ +#ifdef STARPU_USE_CUDA + .cuda_func = test_bcsr_cuda_func, +#endif /* !STARPU_USE_CUDA */ +#ifdef STARPU_USE_OPENCL + .opencl_func = test_bcsr_opencl_func, +#endif /* !STARPU_USE_OPENCL */ + .handle = &bcsr_handle, + .ptr = nzval, + .dummy_handle = &bcsr2_handle, + .dummy_ptr = nzval2, + .copy_failed = SUCCESS, + .name = "bcsr_interface" +}; + +static void +register_data(void) +{ + starpu_bcsr_data_register(&bcsr_handle, + STARPU_MAIN_RAM, + NNZ_BLOCKS, + NROWS, + (uintptr_t) nzval, + colind, + rowptr, + 0, /* firstentry */ + R, + C, + sizeof(nzval[0])); + + starpu_bcsr_data_register(&bcsr2_handle, + STARPU_MAIN_RAM, + NNZ_BLOCKS, + NROWS, + (uintptr_t) nzval2, + colind2, + rowptr2, + 0, /* firstentry */ + R, + C, + sizeof(nzval2[0])); +} + +static void +unregister_data(void) +{ + starpu_data_unregister(bcsr_handle); + starpu_data_unregister(bcsr2_handle); +} + +void +test_bcsr_cpu_func(void *buffers[], void *args) +{ + STARPU_SKIP_IF_VALGRIND; + + int *val; + int factor; + int i; + + uint32_t nnz = STARPU_BCSR_GET_NNZ(buffers[0]); + uint32_t r = ((struct starpu_bcsr_interface *)buffers[0])->r; + uint32_t c = ((struct starpu_bcsr_interface *)buffers[0])->c; + if (r != R || c != C) + { + bcsr_config.copy_failed = FAILURE; + return; + } + nnz *= (r*c); + + val = (int *) STARPU_BCSR_GET_NZVAL(buffers[0]); + factor = *(int *) args; + + for (i = 0; i < (int)nnz; i++) + { + if (val[i] != i * factor) + { + bcsr_config.copy_failed = FAILURE; + return; + } + val[i] *= -1; + } + +#if 0 + /* TODO */ + /* Check colind */ + uint32_t *col = STARPU_BCSR_GET_COLIND(buffers[0]); + for (i = 0; i < NNZ_BLOCKS; i++) + if (col[i] != colind[i]) + bcsr_config.copy_failed = FAILURE; + + /* Check rowptr */ + uint32_t *row = STARPU_BCSR_GET_ROWPTR(buffers[0]); + for (i = 0; i < 1 + WIDTH/R; i++) + if (row[i] != rowptr[i]) + bcsr_config.copy_failed = FAILURE; +#endif +} + +int +main(int argc, char **argv) +{ + struct data_interface_test_summary summary; + struct starpu_conf conf; + starpu_conf_init(&conf); + + conf.ncuda = 2; + conf.nopencl = 1; + + int ret = starpu_initialize(&conf, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + register_data(); + + ret = run_tests(&bcsr_config, &summary); + + unregister_data(); + + starpu_shutdown(); + + if (ret) data_interface_test_summary_print(stderr, &summary); + + return data_interface_test_summary_success(&summary); +} diff --git a/tests/datawizard/interfaces/bcsr/bcsr_opencl.c b/tests/datawizard/interfaces/bcsr/bcsr_opencl.c new file mode 100644 index 0000000..e4225c6 --- /dev/null +++ b/tests/datawizard/interfaces/bcsr/bcsr_opencl.c @@ -0,0 +1,134 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../test_interfaces.h" + +#define KERNEL_LOCATION "tests/datawizard/interfaces/bcsr/bcsr_opencl_kernel.cl" +extern struct test_config bcsr_config; +static struct starpu_opencl_program opencl_program; + +void +test_bcsr_opencl_func(void *buffers[], void *args) +{ + STARPU_SKIP_IF_VALGRIND; + + int id, devid, ret; + int factor = *(int *) args; + + cl_int err; + cl_kernel kernel; + cl_command_queue queue; + cl_event event; + + ret = starpu_opencl_load_opencl_from_file(KERNEL_LOCATION, &opencl_program, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); + + uint32_t nnz = STARPU_BCSR_GET_NNZ(buffers[0]); + uint32_t r = ((struct starpu_bcsr_interface *)buffers[0])->r; + uint32_t c = ((struct starpu_bcsr_interface *)buffers[0])->c; + nnz *= (r*c); + cl_mem nzval = (cl_mem)STARPU_BCSR_GET_NZVAL(buffers[0]); + + cl_context context; + id = starpu_worker_get_id_check(); + devid = starpu_worker_get_devid(id); + starpu_opencl_get_context(devid, &context); + + cl_mem fail = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, sizeof(int), &bcsr_config.copy_failed, &err); + + if (err != CL_SUCCESS) + STARPU_OPENCL_REPORT_ERROR(err); + + + err = starpu_opencl_load_kernel(&kernel, + &queue, + &opencl_program, + "test_bcsr_opencl", + devid); + if (err != CL_SUCCESS) + STARPU_OPENCL_REPORT_ERROR(err); + + int nargs; + nargs = starpu_opencl_set_kernel_args(&err, &kernel, + sizeof(nzval), &nzval, + sizeof(nnz), &nnz, + sizeof(fail), &fail, + sizeof(factor), &factor, + 0); + + if (nargs != 4) + { + fprintf(stderr, "Failed to set argument #%d\n", err); + STARPU_OPENCL_REPORT_ERROR(err); + } + + { + size_t global = nnz; + size_t local; + size_t s; + cl_device_id device; + + starpu_opencl_get_device(devid, &device); + + err = clGetKernelWorkGroupInfo (kernel, + device, + CL_KERNEL_WORK_GROUP_SIZE, + sizeof(local), + &local, + &s); + if (err != CL_SUCCESS) + STARPU_OPENCL_REPORT_ERROR(err); + + if (local > global) + local = global; + else + global = (global + local-1) / local * local; + + err = clEnqueueNDRangeKernel(queue, + kernel, + 1, + NULL, + &global, + &local, + 0, + NULL, + &event); + + if (err != CL_SUCCESS) + STARPU_OPENCL_REPORT_ERROR(err); + } + + err = clEnqueueReadBuffer(queue, + fail, + CL_TRUE, + 0, + sizeof(int), + &bcsr_config.copy_failed, + 0, + NULL, + NULL); + if (err != CL_SUCCESS) + STARPU_OPENCL_REPORT_ERROR(err); + + clFinish(queue); + starpu_opencl_collect_stats(event); + clReleaseEvent(event); + + starpu_opencl_release_kernel(kernel); + ret = starpu_opencl_unload_opencl(&opencl_program); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl"); +} diff --git a/tests/datawizard/interfaces/bcsr/bcsr_opencl_kernel.cl b/tests/datawizard/interfaces/bcsr/bcsr_opencl_kernel.cl new file mode 100644 index 0000000..2934c3e --- /dev/null +++ b/tests/datawizard/interfaces/bcsr/bcsr_opencl_kernel.cl @@ -0,0 +1,29 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ +__kernel void test_bcsr_opencl(__global int *val, + unsigned int nx, + __global int *err, + int factor) +{ + const int i = get_global_id(0); + if (i >= nx) + return; + + if (val[i] != i * factor) + *err = 1; + else + val[i] = - val[i]; +} diff --git a/tests/datawizard/interfaces/block/block_cuda.cu b/tests/datawizard/interfaces/block/block_cuda.cu new file mode 100644 index 0000000..4d5d97c --- /dev/null +++ b/tests/datawizard/interfaces/block/block_cuda.cu @@ -0,0 +1,81 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ +#include +#include "../test_interfaces.h" + +extern struct test_config block_config; + +static __global__ void block_cuda(int *block, + int nx, int ny, int nz, + unsigned ldy, unsigned ldz, + float factor, int *err) +{ + int i, j, k; + int val = 0; + + for (k = 0; k < nz ;k++) + { + for (j = 0; j < ny ;j++) + { + for(i = 0; i < nx ;i++) + { + if (block[(k*ldz)+(j*ldy)+i] != factor * val) + { + *err = 1; + return; + } + else + { + block[(k*ldz)+(j*ldy)+i] *= -1; + val++; + } + } + } + } +} + +extern "C" void test_block_cuda_func(void *buffers[], void *args) +{ + cudaError_t error; + int *ret; + + error = cudaMalloc(&ret, sizeof(int)); + if (error != cudaSuccess) + STARPU_CUDA_REPORT_ERROR(error); + + error = cudaMemcpyAsync(ret, &block_config.copy_failed, sizeof(int), cudaMemcpyHostToDevice, starpu_cuda_get_local_stream()); + if (error != cudaSuccess) + STARPU_CUDA_REPORT_ERROR(error); + + int nx = STARPU_BLOCK_GET_NX(buffers[0]); + int ny = STARPU_BLOCK_GET_NY(buffers[0]); + int nz = STARPU_BLOCK_GET_NZ(buffers[0]); + unsigned ldy = STARPU_BLOCK_GET_LDY(buffers[0]); + unsigned ldz = STARPU_BLOCK_GET_LDZ(buffers[0]); + int *block = (int *) STARPU_BLOCK_GET_PTR(buffers[0]); + int factor = *(int*) args; + + block_cuda<<<1,1, 0, starpu_cuda_get_local_stream()>>> + (block, nx, ny, nz, ldy, ldz, factor, ret); + error = cudaGetLastError(); + if (error != cudaSuccess) STARPU_CUDA_REPORT_ERROR(error); + error = cudaMemcpyAsync(&block_config.copy_failed, ret, sizeof(int), cudaMemcpyDeviceToHost, starpu_cuda_get_local_stream()); + if (error != cudaSuccess) + STARPU_CUDA_REPORT_ERROR(error); + + cudaFree(ret); + cudaStreamSynchronize(starpu_cuda_get_local_stream()); +} diff --git a/tests/datawizard/interfaces/block/block_interface.c b/tests/datawizard/interfaces/block/block_interface.c new file mode 100644 index 0000000..f0b273c --- /dev/null +++ b/tests/datawizard/interfaces/block/block_interface.c @@ -0,0 +1,158 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../test_interfaces.h" +#include "../../../helper.h" + +#define NX 16 +#define NY NX +#define NZ NX + +/* Prototypes */ +static void register_data(void); +static void unregister_data(void); +void test_block_cpu_func(void *buffers[], void *args); +#ifdef STARPU_USE_CUDA +extern void test_block_cuda_func(void *buffers[], void *_args); +#endif +#ifdef STARPU_USE_OPENCL +extern void test_block_opencl_func(void *buffers[], void *args); +#endif + +static starpu_data_handle_t _block_handle; +static starpu_data_handle_t _block2_handle; + +static int _block[NX*NY*NZ]; +static int _block2[NX*NY*NZ]; + +struct test_config block_config = +{ + .cpu_func = test_block_cpu_func, +#ifdef STARPU_USE_CUDA + .cuda_func = test_block_cuda_func, +#endif +#ifdef STARPU_USE_OPENCL + .opencl_func = test_block_opencl_func, +#endif + .handle = &_block_handle, + .ptr = _block, + .dummy_handle = &_block2_handle, + .dummy_ptr = _block2, + .copy_failed = SUCCESS, + .name = "block_interface" +}; + +static void +register_data(void) +{ + /* Initializing data */ + int val = 0; + int i, j, k; + for (k = 0; k < NZ; k++) + for (j = 0; j < NY; j++) + for (i = 0; i < NX; i++) + _block[(k*NX*NY)+(j*NX)+i] = val++; + + /* Registering data */ + starpu_block_data_register(&_block_handle, + STARPU_MAIN_RAM, + (uintptr_t)_block, + NX, + NX * NY, + NX, + NY, + NZ, + sizeof(_block[0])); + starpu_block_data_register(&_block2_handle, + STARPU_MAIN_RAM, + (uintptr_t)_block2, + NX, + NX * NY, + NX, + NY, + NZ, + sizeof(_block2[0])); +} + +static void +unregister_data(void) +{ + starpu_data_unregister(_block_handle); + starpu_data_unregister(_block2_handle); +} + +void test_block_cpu_func(void *buffers[], void *args) +{ + STARPU_SKIP_IF_VALGRIND; + + int factor = *(int*)args; + int nx = STARPU_BLOCK_GET_NX(buffers[0]); + int ny = STARPU_BLOCK_GET_NY(buffers[0]); + int nz = STARPU_BLOCK_GET_NZ(buffers[0]); + unsigned ldy = STARPU_BLOCK_GET_LDY(buffers[0]); + unsigned ldz = STARPU_BLOCK_GET_LDZ(buffers[0]); + int *block = (int *) STARPU_BLOCK_GET_PTR(buffers[0]); + int i, j, k; + int val = 0; + block_config.copy_failed = SUCCESS; + for (k = 0; k < nz; k++) + { + for (j = 0; j < ny; j++) + { + for (i = 0; i < nx; i++) + { + if (block[(k*ldz)+(j*ldy)+i] != factor * val) + { + block_config.copy_failed = FAILURE; + return; + } + else + { + block[(k*ldz)+(j*ldy)+i] *= -1; + val++; + } + } + } + } +} + +int +main(int argc, char **argv) +{ + struct data_interface_test_summary summary; + struct starpu_conf conf; + starpu_conf_init(&conf); + conf.ncuda = 2; + conf.nopencl = 1; + + int ret = starpu_initialize(&conf, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + register_data(); + + ret = run_tests(&block_config, &summary); + + unregister_data(); + + starpu_shutdown(); + + if (ret) data_interface_test_summary_print(stderr, &summary); + + return data_interface_test_summary_success(&summary); +} + diff --git a/tests/datawizard/interfaces/block/block_opencl.c b/tests/datawizard/interfaces/block/block_opencl.c new file mode 100644 index 0000000..af9564c --- /dev/null +++ b/tests/datawizard/interfaces/block/block_opencl.c @@ -0,0 +1,120 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../test_interfaces.h" + +#define KERNEL_LOCATION "tests/datawizard/interfaces/block/block_opencl_kernel.cl" +extern struct test_config block_config; +static struct starpu_opencl_program opencl_program; + +void +test_block_opencl_func(void *buffers[], void *args) +{ + STARPU_SKIP_IF_VALGRIND; + + int id, devid, ret; + int factor = *(int *) args; + + cl_int err; + cl_kernel kernel; + cl_command_queue queue; + cl_event event; + + ret = starpu_opencl_load_opencl_from_file(KERNEL_LOCATION, &opencl_program, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); + + int nx = STARPU_BLOCK_GET_NX(buffers[0]); + int ny = STARPU_BLOCK_GET_NY(buffers[0]); + int nz = STARPU_BLOCK_GET_NZ(buffers[0]); + unsigned ldy = STARPU_BLOCK_GET_LDY(buffers[0]); + unsigned ldz = STARPU_BLOCK_GET_LDZ(buffers[0]); + cl_mem block = (cl_mem) STARPU_BLOCK_GET_DEV_HANDLE(buffers[0]); + + cl_context context; + id = starpu_worker_get_id_check(); + devid = starpu_worker_get_devid(id); + starpu_opencl_get_context(devid, &context); + + cl_mem fail = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, + sizeof(int), &block_config.copy_failed, &err); + + if (err != CL_SUCCESS) + STARPU_OPENCL_REPORT_ERROR(err); + + + err = starpu_opencl_load_kernel(&kernel, + &queue, + &opencl_program, + "block_opencl", + devid); + if (err != CL_SUCCESS) + STARPU_OPENCL_REPORT_ERROR(err); + + int nargs; + nargs = starpu_opencl_set_kernel_args(&err, &kernel, + sizeof(block), &block, + sizeof(nx), &nx, + sizeof(ny), &ny, + sizeof(nz), &nz, + sizeof(ldy), &ldy, + sizeof(ldz), &ldz, + sizeof(factor), &factor, + sizeof(fail), &fail, + 0); + + if (nargs != 8) + { + fprintf(stderr, "Failed to set argument #%d\n", nargs); + STARPU_OPENCL_REPORT_ERROR(err); + } + + { + size_t global[3] = {nx, ny, nz}; + err = clEnqueueNDRangeKernel(queue, + kernel, + 3, + NULL, + global, + NULL, + 0, + NULL, + &event); + + if (err != CL_SUCCESS) + STARPU_OPENCL_REPORT_ERROR(err); + } + + err = clEnqueueReadBuffer(queue, + fail, + CL_TRUE, + 0, + sizeof(int), + &block_config.copy_failed, + 0, + NULL, + NULL); + if (err != CL_SUCCESS) + STARPU_OPENCL_REPORT_ERROR(err); + + clFinish(queue); + starpu_opencl_collect_stats(event); + clReleaseEvent(event); + + starpu_opencl_release_kernel(kernel); + ret = starpu_opencl_unload_opencl(&opencl_program); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl"); +} diff --git a/tests/datawizard/interfaces/block/block_opencl_kernel.cl b/tests/datawizard/interfaces/block/block_opencl_kernel.cl new file mode 100644 index 0000000..3b05f10 --- /dev/null +++ b/tests/datawizard/interfaces/block/block_opencl_kernel.cl @@ -0,0 +1,38 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ +__kernel void block_opencl(__global int *block, + int nx, int ny, int nz, + int ldy, int ldz, + int factor, __global int *err) +{ + const int idx = get_global_id(0); + const int idy = get_global_id(1); + const int idz = get_global_id(2); + if (idx >= nx) + return; + if (idy >= ny) + return; + if (idz >= nz) + return; + + int val = idz*ny*nx+idy*nx+idx; + int i = (idz*ldz)+(idy*ldy)+idx; + + if (block[i] != factor * val) + *err = 1; + else + block[i] *= -1; +} diff --git a/tests/datawizard/interfaces/coo/coo_cuda.cu b/tests/datawizard/interfaces/coo/coo_cuda.cu new file mode 100644 index 0000000..53a40f8 --- /dev/null +++ b/tests/datawizard/interfaces/coo/coo_cuda.cu @@ -0,0 +1,72 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ +#include +#include "../test_interfaces.h" + +extern struct test_config coo_config; + +__global__ void coo_cuda(int *val, uint32_t n, int *err, int factor) +{ + unsigned i = blockIdx.x*blockDim.x + threadIdx.x; + + if (i >= n) + return; + + if (val[i] != i * factor) + *err = 1; + else + val[i] *= -1; +} + +extern "C" void test_coo_cuda_func(void *buffers[], void *args) +{ + int factor; + int *ret; + int *val; + cudaError_t error; + uint32_t nvalues = STARPU_COO_GET_NVALUES(buffers[0]); + unsigned threads_per_block = 64; + unsigned nblocks = (nvalues + threads_per_block-1) / threads_per_block; + + factor = *(int *) args; + val = (int *) STARPU_COO_GET_VALUES(buffers[0]); + + error = cudaMalloc(&ret, sizeof(int)); + if (error != cudaSuccess) + STARPU_CUDA_REPORT_ERROR(error); + + error = cudaMemcpyAsync(ret, + &coo_config.copy_failed, + sizeof(int), + cudaMemcpyHostToDevice, starpu_cuda_get_local_stream()); + if (error != cudaSuccess) + STARPU_CUDA_REPORT_ERROR(error); + + coo_cuda<<>> + (val, nvalues, ret, factor); + error = cudaGetLastError(); + if (error != cudaSuccess) STARPU_CUDA_REPORT_ERROR(error); + + error = cudaMemcpyAsync(&coo_config.copy_failed, + ret, + sizeof(int), + cudaMemcpyDeviceToHost, starpu_cuda_get_local_stream()); + if (error != cudaSuccess) + STARPU_CUDA_REPORT_ERROR(error); + + cudaFree(ret); + cudaStreamSynchronize(starpu_cuda_get_local_stream()); +} diff --git a/tests/datawizard/interfaces/coo/coo_interface.c b/tests/datawizard/interfaces/coo/coo_interface.c new file mode 100644 index 0000000..709c0fc --- /dev/null +++ b/tests/datawizard/interfaces/coo/coo_interface.c @@ -0,0 +1,174 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Thibaut Lambert + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ +#include +#include "../test_interfaces.h" + +#define NX 2 +#define NY 2 +#define MATRIX_SIZE (NX*NY) + +#if defined(STARPU_USE_CPU) +void test_coo_cpu_func(void *buffers[], void *args); +#endif +#ifdef STARPU_USE_CUDA +extern void test_coo_cuda_func(void *buffers[], void *args); +#endif +#ifdef STARPU_USE_OPENCL +extern void test_coo_opencl_func(void *buffers[], void *args); +#endif + +static starpu_data_handle_t coo_handle, coo2_handle; + +static uint32_t columns[MATRIX_SIZE]; +static uint32_t rows[MATRIX_SIZE]; +static int values[MATRIX_SIZE]; +static uint32_t columns2[MATRIX_SIZE]; +static uint32_t rows2[MATRIX_SIZE]; +static int values2[MATRIX_SIZE]; + +struct test_config coo_config = +{ +#ifdef STARPU_USE_CPU + .cpu_func = test_coo_cpu_func, +#endif /* ! STARPU_USE_CPU */ +#ifdef STARPU_USE_CUDA + .cuda_func = test_coo_cuda_func, +#endif /* !STARPU_USE_CUDA */ +#ifdef STARPU_USE_OPENCL + .opencl_func = test_coo_opencl_func, +#endif /* !STARPU_USE_OPENCL */ + .handle = &coo_handle, + .ptr = values, + .dummy_handle = &coo2_handle, + .dummy_ptr = values2, + .copy_failed = SUCCESS, + .name = "coo_interface" +}; + +void +test_coo_cpu_func(void *buffers[], void *args) +{ + int factor = *(int *) args; + int *vals = (int *) STARPU_COO_GET_VALUES(buffers[0]); + unsigned size = STARPU_COO_GET_NVALUES(buffers[0]); + + int i; + for (i = 0; i < (int)size; i++) + { + if (vals[i] != i * factor) + { + coo_config.copy_failed = FAILURE; + return; + } + vals[i] *= -1; + } +} + + +static void +register_data(void) +{ + /* + We use the following matrix : + + +---+---+ + | 0 | 1 | + +---+---+ + | 2 | 3 | + +---+---+ + + Of course, we're not supposed to register the zeros, but it does not + matter for this test. + */ + + columns[0] = 0; + rows[0] = 0; + values[0] = 0; + + columns[1] = 1; + rows[1] = 0; + values[1] = 1; + + columns[2] = 0; + rows[2] = 1; + values[2] = 2; + + columns[3] = 1; + rows[3] = 1; + values[3] = 3; + + + int i; + for (i = 0; i < MATRIX_SIZE; i++) + { + columns2[i] = -1; + rows2[i] = -1; + values2[i] = -1; + } + + starpu_coo_data_register(&coo_handle, + STARPU_MAIN_RAM, + NX, + NY, + MATRIX_SIZE, + columns, + rows, + (uintptr_t) values, + sizeof(values[0])); + starpu_coo_data_register(&coo2_handle, + STARPU_MAIN_RAM, + NX, + NY, + MATRIX_SIZE, + columns2, + rows2, + (uintptr_t) values2, + sizeof(values2[0])); +} + +static void +unregister_data(void) +{ + starpu_data_unregister(coo_handle); + starpu_data_unregister(coo2_handle); +} + +int +main(int argc, char **argv) +{ + struct starpu_conf conf; + struct data_interface_test_summary summary; + + starpu_conf_init(&conf); + conf.ncuda = 2; + conf.nopencl = 1; + + int ret = starpu_initialize(&conf, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + register_data(); + + ret = run_tests(&coo_config, &summary); + + unregister_data(); + + if (ret) data_interface_test_summary_print(stderr, &summary); + + starpu_shutdown(); + return data_interface_test_summary_success(&summary); +} diff --git a/tests/datawizard/interfaces/coo/coo_opencl.c b/tests/datawizard/interfaces/coo/coo_opencl.c new file mode 100644 index 0000000..9aabb55 --- /dev/null +++ b/tests/datawizard/interfaces/coo/coo_opencl.c @@ -0,0 +1,137 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../test_interfaces.h" + +#define KERNEL_LOCATION "tests/datawizard/interfaces/coo/coo_opencl_kernel.cl" + +extern struct test_config coo_config; +static struct starpu_opencl_program coo_program; + +void +test_coo_opencl_func(void *buffers[], void *args) +{ + STARPU_SKIP_IF_VALGRIND; + + int id, devid, factor, ret; + unsigned int n; + + cl_int err; + cl_kernel kernel; + cl_command_queue queue; + cl_event event; + cl_context context; + cl_mem val, fail; + + ret = starpu_opencl_load_opencl_from_file(KERNEL_LOCATION, + &coo_program, + NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); + + factor = *(int *)args; + n = STARPU_COO_GET_NVALUES(buffers[0]); + val = (cl_mem) STARPU_COO_GET_VALUES(buffers[0]); + + id = starpu_worker_get_id_check(); + devid = starpu_worker_get_devid(id); + starpu_opencl_get_context(devid, &context); + + err = starpu_opencl_load_kernel(&kernel, + &queue, + &coo_program, + "test_coo_opencl", + devid); + if (err != CL_SUCCESS) + STARPU_OPENCL_REPORT_ERROR(err); + + fail = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, + sizeof(int), &coo_config.copy_failed, &err); + if (err != CL_SUCCESS) + STARPU_OPENCL_REPORT_ERROR(err); + + /* Setting args */ + int nargs; + nargs = starpu_opencl_set_kernel_args(&err, &kernel, + sizeof(val), &val, + sizeof(n), &n, + sizeof(fail), &fail, + sizeof(factor), &factor, + 0); + if (nargs != 4) + STARPU_OPENCL_REPORT_ERROR(err); + + { + size_t global=n; + size_t local; + size_t s; + cl_device_id device; + + starpu_opencl_get_device(devid, &device); + + err = clGetKernelWorkGroupInfo (kernel, + device, + CL_KERNEL_WORK_GROUP_SIZE, + sizeof(local), + &local, + &s); + if (err != CL_SUCCESS) + STARPU_OPENCL_REPORT_ERROR(err); + + if (local > global) + local = global; + else + global = (global + local-1) / local * local; + + err = clEnqueueNDRangeKernel(queue, + kernel, + 1, + NULL, + &global, + &local, + 0, + NULL, + &event); + + if (err != CL_SUCCESS) + STARPU_OPENCL_REPORT_ERROR(err); + } + + err = clEnqueueReadBuffer(queue, + fail, + CL_TRUE, + 0, + sizeof(int), + &coo_config.copy_failed, + 0, + NULL, + NULL); + if (coo_config.copy_failed != SUCCESS) + { + fprintf(stderr, "FUCK copy_failed %d\n", + coo_config.copy_failed); + } + if (err != CL_SUCCESS) + STARPU_OPENCL_REPORT_ERROR(err); + + clFinish(queue); + starpu_opencl_collect_stats(event); + clReleaseEvent(event); + + starpu_opencl_release_kernel(kernel); + ret = starpu_opencl_unload_opencl(&coo_program); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl"); +} diff --git a/tests/datawizard/interfaces/coo/coo_opencl_kernel.cl b/tests/datawizard/interfaces/coo/coo_opencl_kernel.cl new file mode 100644 index 0000000..c807223 --- /dev/null +++ b/tests/datawizard/interfaces/coo/coo_opencl_kernel.cl @@ -0,0 +1,29 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ +__kernel void test_coo_opencl(__global int *val, + unsigned int nx, + __global int *err, + int factor) +{ + const int i = get_global_id(0); + if (i >= nx) + return; + + if (val[i] != i * factor) + *err = val[1]; + else + val[i] = - val[i]; +} diff --git a/tests/datawizard/interfaces/copy_interfaces.c b/tests/datawizard/interfaces/copy_interfaces.c new file mode 100644 index 0000000..775fc89 --- /dev/null +++ b/tests/datawizard/interfaces/copy_interfaces.c @@ -0,0 +1,144 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../../helper.h" +#include + +static int check_copy(starpu_data_handle_t handle, char *header) +{ + void *old_interface, *new_interface; + starpu_data_handle_t new_handle; + int ret=0; + + starpu_data_register_same(&new_handle, handle); + + if (!getenv("STARPU_SSILENT")) + { + if (new_handle->ops->display) + { + fprintf(stderr, "%s: ", header); + new_handle->ops->display(new_handle, stderr); + fprintf(stderr, "\n"); + } + else + { + fprintf(stderr, "%s does not define a display ops\n", header); + } + } + + old_interface = starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + new_interface = starpu_data_get_interface_on_node(new_handle, STARPU_MAIN_RAM); + + if (new_handle->ops->compare(old_interface, new_interface) == 0) + { + FPRINTF(stderr, "Error when copying %s data\n", header); + ret = 1; + } + starpu_data_unregister(handle); + starpu_data_unregister(new_handle); + return ret; +} + +int main(int argc, char **argv) +{ + int ret; + starpu_data_handle_t handle; + + ret = starpu_initialize(NULL, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + if (ret == 0) + { + int NX=3; + int NY=2; + int matrix[NX][NY]; + starpu_matrix_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)matrix, NX, NX, NY, sizeof(matrix[0][0])); + ret = check_copy(handle, "matrix"); + } + + if (ret == 0) + { + int NX=3; + int NY=2; + int NZ=4; + int block[NX*NY*NZ]; + starpu_block_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)block, NX, NX*NY, NX, NY, NZ, sizeof(block[0])); + ret = check_copy(handle, "block"); + } + + if (ret == 0) + { + int xx[] = {12, 23, 45}; + starpu_vector_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)xx, 3, sizeof(xx[0])); + ret = check_copy(handle, "vector"); + } + + if (ret == 0) + { + uint32_t nnz = 2; + unsigned nrow = 5; + float nzvalA[nnz]; + uint32_t colind[nnz]; + uint32_t rowptr[nrow+1]; + starpu_csr_data_register(&handle, STARPU_MAIN_RAM, nnz, nrow, (uintptr_t)nzvalA, colind, rowptr, 0, sizeof(float)); + ret = check_copy(handle, "csr"); + } + + if (ret == 0) + { + uint32_t nnz = 2; + unsigned nrow = 5; + float nzvalA[nnz]; + uint32_t colind[nnz]; + uint32_t rowptr[nrow+1]; + starpu_bcsr_data_register(&handle, STARPU_MAIN_RAM, nnz, nrow, (uintptr_t)nzvalA, colind, rowptr, 0, 1, 1, sizeof(float)); + ret = check_copy(handle, "bcsr"); + } + + if (ret == 0) + { + int x=42; + starpu_variable_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)&x, sizeof(x)); + ret = check_copy(handle, "variable"); + } + + if (ret == 0) + { + int NX=3; + int NY=2; + int NZ=4; + int NT=3; + int tensor[NX*NY*NZ*NT]; + starpu_tensor_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)tensor, NX, NX*NY, NX*NY*NZ, NX, NY, NZ, NT, sizeof(tensor[0])); + ret = check_copy(handle, "tensor"); + } + + if (ret == 0) + { + int NX=3; + int NY=2; + int array2d[NX*NY]; + unsigned nn[2] = {NX, NY}; + unsigned ldn[2] = {1, NX}; + starpu_ndim_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)array2d, ldn, nn, 2, sizeof(int)); + ret = check_copy(handle, "ndim"); + } + + starpu_shutdown(); + return ret; +} diff --git a/tests/datawizard/interfaces/csr/csr_cuda.cu b/tests/datawizard/interfaces/csr/csr_cuda.cu new file mode 100644 index 0000000..495db15 --- /dev/null +++ b/tests/datawizard/interfaces/csr/csr_cuda.cu @@ -0,0 +1,71 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ +#include +#include "../test_interfaces.h" + +extern struct test_config csr_config; + +__global__ void csr_cuda(int *nzval, uint32_t nnz, int *err, int factor) +{ + unsigned i = blockIdx.x*blockDim.x + threadIdx.x; + + if (i >= nnz) + return; + + if (nzval[i] != (i+1)*factor) + *err = 1; + else + nzval[i] = -nzval[i]; +} + +extern "C" void test_csr_cuda_func(void *buffers[], void *args) +{ + int factor; + int *ret; + int *val; + cudaError_t error; + uint32_t nnz = STARPU_CSR_GET_NNZ(buffers[0]); + unsigned threads_per_block = 64; + unsigned nblocks = (nnz + threads_per_block-1) / threads_per_block; + + factor = *(int *) args; + val = (int *) STARPU_CSR_GET_NZVAL(buffers[0]); + + error = cudaMalloc(&ret, sizeof(int)); + if (error != cudaSuccess) + STARPU_CUDA_REPORT_ERROR(error); + + error = cudaMemcpyAsync(ret, + &csr_config.copy_failed, + sizeof(int), + cudaMemcpyHostToDevice, starpu_cuda_get_local_stream()); + if (error != cudaSuccess) + STARPU_CUDA_REPORT_ERROR(error); + + csr_cuda<<>> (val, nnz, ret, factor); + error = cudaGetLastError(); + if (error != cudaSuccess) STARPU_CUDA_REPORT_ERROR(error); + + error = cudaMemcpyAsync(&csr_config.copy_failed, + ret, + sizeof(int), + cudaMemcpyDeviceToHost, starpu_cuda_get_local_stream()); + if (error != cudaSuccess) + STARPU_CUDA_REPORT_ERROR(error); + + cudaFree(ret); + cudaStreamSynchronize(starpu_cuda_get_local_stream()); +} diff --git a/tests/datawizard/interfaces/csr/csr_interface.c b/tests/datawizard/interfaces/csr/csr_interface.c new file mode 100644 index 0000000..3217352 --- /dev/null +++ b/tests/datawizard/interfaces/csr/csr_interface.c @@ -0,0 +1,166 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../test_interfaces.h" +#include "../../../helper.h" + +#define WIDTH 8 +#define HEIGHT 4 +#define SIZE (WIDTH * HEIGHT) +#define NNZ (SIZE-1) + +#ifdef STARPU_USE_CPU +void test_csr_cpu_func(void *buffers[], void *args); +#endif /* !STARPU_USE_CPU */ +#ifdef STARPU_USE_CUDA +extern void test_csr_cuda_func(void *buffers[], void *_args); +#endif +#ifdef STARPU_USE_OPENCL +extern void test_csr_opencl_func(void *buffers[], void *args); +#endif + + +static int nzval[NNZ]; +static int nzval2[NNZ]; + +static uint32_t colind[NNZ]; +static uint32_t colind2[NNZ]; + +static uint32_t rowptr[HEIGHT+1]; +static uint32_t rowptr2[HEIGHT+1]; + +static starpu_data_handle_t csr_handle; +static starpu_data_handle_t csr2_handle; + +struct test_config csr_config = +{ +#ifdef STARPU_USE_CPU + .cpu_func = test_csr_cpu_func, +#endif /* ! STARPU_USE_CPU */ +#ifdef STARPU_USE_CUDA + .cuda_func = test_csr_cuda_func, +#endif +#ifdef STARPU_USE_OPENCL + .opencl_func = test_csr_opencl_func, +#endif + .handle = &csr_handle, + .ptr = nzval, + .dummy_handle = &csr2_handle, + .dummy_ptr = nzval2, + .copy_failed = SUCCESS, + .name = "csr_interface" +}; + +static void +register_data(void) +{ + int i; + for (i = 1; i < SIZE; i++) + { + nzval[i-1] = i; + nzval2[i-1] = 42; + + colind[i-1] = i % WIDTH; + colind2[i-1] = colind[i-1]; + } + + rowptr[0] = 1; + rowptr2[0] = 1; + for (i = 1; i < HEIGHT; i++) + { + rowptr[i] = i * WIDTH; + rowptr2[i] = rowptr[i]; + } + rowptr[HEIGHT] = NNZ + 1; + rowptr2[HEIGHT] = rowptr[HEIGHT]; + + starpu_csr_data_register(&csr_handle, + STARPU_MAIN_RAM, + NNZ, + HEIGHT, + (uintptr_t) nzval, + colind, + rowptr, + 0, + sizeof(nzval[0])); + starpu_csr_data_register(&csr2_handle, + STARPU_MAIN_RAM, + NNZ, + HEIGHT, + (uintptr_t) nzval2, + colind2, + rowptr2, + 0, + sizeof(nzval2[0])); +} + +static void +unregister_data(void) +{ + starpu_data_unregister(csr_handle); + starpu_data_unregister(csr2_handle); +} + +void +test_csr_cpu_func(void *buffers[], void *args) +{ + STARPU_SKIP_IF_VALGRIND; + + int *val; + int factor; + int i; + + uint32_t nnz = STARPU_CSR_GET_NNZ(buffers[0]); + val = (int *) STARPU_CSR_GET_NZVAL(buffers[0]); + factor = *(int *) args; + + for (i = 0; i < (int)nnz; i++) + { + if (val[i] != (i+1) * factor) + { + csr_config.copy_failed = FAILURE; + return; + } + val[i] *= -1; + } +} + +int main(int argc, char **argv) +{ + struct data_interface_test_summary summary; + struct starpu_conf conf; + starpu_conf_init(&conf); + + conf.ncuda = 2; + conf.nopencl = 1; + + int ret = starpu_initialize(&conf, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + register_data(); + + ret = run_tests(&csr_config, &summary); + + unregister_data(); + + starpu_shutdown(); + + if (ret) data_interface_test_summary_print(stderr, &summary); + + return data_interface_test_summary_success(&summary); +} diff --git a/tests/datawizard/interfaces/csr/csr_opencl.c b/tests/datawizard/interfaces/csr/csr_opencl.c new file mode 100644 index 0000000..0e9c0aa --- /dev/null +++ b/tests/datawizard/interfaces/csr/csr_opencl.c @@ -0,0 +1,132 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../test_interfaces.h" + +#define KERNEL_LOCATION "tests/datawizard/interfaces/csr/csr_opencl_kernel.cl" +extern struct test_config csr_config; +static struct starpu_opencl_program opencl_program; + +void +test_csr_opencl_func(void *buffers[], void *args) +{ + STARPU_SKIP_IF_VALGRIND; + + int id, devid, ret; + int factor = *(int *) args; + + cl_int err; + cl_kernel kernel; + cl_command_queue queue; + cl_event event; + + ret = starpu_opencl_load_opencl_from_file(KERNEL_LOCATION, &opencl_program, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); + + uint32_t nnz = STARPU_CSR_GET_NNZ(buffers[0]); + cl_mem nzval = (cl_mem)STARPU_CSR_GET_NZVAL(buffers[0]); + + cl_context context; + id = starpu_worker_get_id_check(); + devid = starpu_worker_get_devid(id); + starpu_opencl_get_context(devid, &context); + + cl_mem fail = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, + sizeof(int), &csr_config.copy_failed, &err); + + if (err != CL_SUCCESS) + STARPU_OPENCL_REPORT_ERROR(err); + + + err = starpu_opencl_load_kernel(&kernel, + &queue, + &opencl_program, + "test_csr_opencl", + devid); + if (err != CL_SUCCESS) + STARPU_OPENCL_REPORT_ERROR(err); + + int nargs; + nargs = starpu_opencl_set_kernel_args(&err, &kernel, + sizeof(nzval), &nzval, + sizeof(nnz), &nnz, + sizeof(fail), &fail, + sizeof(factor), &factor, + 0); + + if (nargs != 4) + { + fprintf(stderr, "Failed to set argument #%d\n", err); + STARPU_OPENCL_REPORT_ERROR(err); + } + + { + size_t global = nnz; + size_t local; + size_t s; + cl_device_id device; + + starpu_opencl_get_device(devid, &device); + + err = clGetKernelWorkGroupInfo (kernel, + device, + CL_KERNEL_WORK_GROUP_SIZE, + sizeof(local), + &local, + &s); + if (err != CL_SUCCESS) + STARPU_OPENCL_REPORT_ERROR(err); + + if (local > global) + local = global; + else + global = (global + local-1) / local * local; + + err = clEnqueueNDRangeKernel(queue, + kernel, + 1, + NULL, + &global, + &local, + 0, + NULL, + &event); + + if (err != CL_SUCCESS) + STARPU_OPENCL_REPORT_ERROR(err); + } + + err = clEnqueueReadBuffer(queue, + fail, + CL_TRUE, + 0, + sizeof(int), + &csr_config.copy_failed, + 0, + NULL, + NULL); + if (err != CL_SUCCESS) + STARPU_OPENCL_REPORT_ERROR(err); + + clFinish(queue); + starpu_opencl_collect_stats(event); + clReleaseEvent(event); + + starpu_opencl_release_kernel(kernel); + ret = starpu_opencl_unload_opencl(&opencl_program); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl"); +} diff --git a/tests/datawizard/interfaces/csr/csr_opencl_kernel.cl b/tests/datawizard/interfaces/csr/csr_opencl_kernel.cl new file mode 100644 index 0000000..f5b6896 --- /dev/null +++ b/tests/datawizard/interfaces/csr/csr_opencl_kernel.cl @@ -0,0 +1,29 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ +__kernel void test_csr_opencl(__global int *val, + unsigned int nx, + __global int *err, + int factor) +{ + const int i = get_global_id(0); + if (i >= nx) + return; + + if (val[i] != (i+1) * factor) + *err = 1; + else + val[i] = - val[i]; +} diff --git a/tests/datawizard/interfaces/matrix/matrix_cuda.cu b/tests/datawizard/interfaces/matrix/matrix_cuda.cu new file mode 100644 index 0000000..dcf68cf --- /dev/null +++ b/tests/datawizard/interfaces/matrix/matrix_cuda.cu @@ -0,0 +1,74 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ +#include +#include "../test_interfaces.h" + +extern struct test_config matrix_config; + +__global__ void matrix_cuda(int *val, unsigned n, int *err, int factor) +{ + unsigned i = blockIdx.x*blockDim.x + threadIdx.x; + + if (i >= n) + return; + + if (val[i] != i*factor) + *err = 1; + else + val[i] = -val[i]; +} + +extern "C" void test_matrix_cuda_func(void *buffers[], void *args) +{ + int factor; + int *ret; + int *val; + cudaError_t error; + unsigned int nx, ny, n; + + nx = STARPU_MATRIX_GET_NX(buffers[0]); + ny = STARPU_MATRIX_GET_NY(buffers[0]); + n = nx * ny; + unsigned threads_per_block = 64; + unsigned nblocks = (n + threads_per_block-1) / threads_per_block; + factor = *(int *) args; + val = (int *) STARPU_MATRIX_GET_PTR(buffers[0]); + + error = cudaMalloc(&ret, sizeof(int)); + if (error != cudaSuccess) + STARPU_CUDA_REPORT_ERROR(error); + + error = cudaMemcpyAsync(ret, + &matrix_config.copy_failed, + sizeof(int), + cudaMemcpyHostToDevice, starpu_cuda_get_local_stream()); + if (error != cudaSuccess) + STARPU_CUDA_REPORT_ERROR(error); + + matrix_cuda<<>>(val, n, ret, factor); + error = cudaGetLastError(); + if (error != cudaSuccess) STARPU_CUDA_REPORT_ERROR(error); + + error = cudaMemcpyAsync(&matrix_config.copy_failed, + ret, + sizeof(int), + cudaMemcpyDeviceToHost, starpu_cuda_get_local_stream()); + if (error != cudaSuccess) + STARPU_CUDA_REPORT_ERROR(error); + + cudaFree(ret); + cudaStreamSynchronize(starpu_cuda_get_local_stream()); +} diff --git a/tests/datawizard/interfaces/matrix/matrix_interface.c b/tests/datawizard/interfaces/matrix/matrix_interface.c new file mode 100644 index 0000000..9b29f8e --- /dev/null +++ b/tests/datawizard/interfaces/matrix/matrix_interface.c @@ -0,0 +1,140 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../test_interfaces.h" +#include "../../../helper.h" + +#define WIDTH 16 +#define HEIGHT 16 + +#ifdef STARPU_USE_CPU +void test_matrix_cpu_func(void *buffers[], void *args); +#endif /* !STARPU_USE_CPU */ +#ifdef STARPU_USE_CUDA +extern void test_matrix_cuda_func(void *buffers[], void *_args); +#endif +#ifdef STARPU_USE_OPENCL +extern void test_matrix_opencl_func(void *buffers[], void *args); +#endif + + +static starpu_data_handle_t matrix_handle; +static starpu_data_handle_t matrix2_handle; + +static int matrix[WIDTH * HEIGHT]; +static int matrix2[WIDTH * HEIGHT]; + +struct test_config matrix_config = +{ +#ifdef STARPU_USE_CPU + .cpu_func = test_matrix_cpu_func, +#endif /* ! STARPU_USE_CPU */ +#ifdef STARPU_USE_CUDA + .cuda_func = test_matrix_cuda_func, +#endif +#ifdef STARPU_USE_OPENCL + .opencl_func = test_matrix_opencl_func, +#endif + .handle = &matrix_handle, + .ptr = matrix, + .dummy_handle = &matrix2_handle, + .dummy_ptr = matrix2, + .copy_failed = SUCCESS, + .name = "matrix_interface" +}; + +static void +register_data(void) +{ + int i; + int size = WIDTH * HEIGHT; + for (i = 0; i < size; i++) + matrix[i] = i; + + starpu_matrix_data_register(&matrix_handle, + STARPU_MAIN_RAM, + (uintptr_t) matrix, + WIDTH, /* ld */ + WIDTH, + HEIGHT, + sizeof(matrix[0])); + starpu_matrix_data_register(&matrix2_handle, + STARPU_MAIN_RAM, + (uintptr_t) matrix2, + WIDTH, /* ld */ + WIDTH, + HEIGHT, + sizeof(matrix[0])); +} + +static void +unregister_data(void) +{ + starpu_data_unregister(matrix_handle); + starpu_data_unregister(matrix2_handle); +} + +void +test_matrix_cpu_func(void *buffers[], void *args) +{ + STARPU_SKIP_IF_VALGRIND; + + int *val; + int factor; + int i; + int nx, ny; + + nx = STARPU_MATRIX_GET_NX(buffers[0]); + ny = STARPU_MATRIX_GET_NY(buffers[0]); + val = (int *) STARPU_MATRIX_GET_PTR(buffers[0]); + factor = *(int *) args; + + for (i = 0; i < nx*ny; i++) + { + if (val[i] != i * factor) + { + matrix_config.copy_failed = FAILURE; + return; + } + val[i] *= -1; + } +} + +int main(int argc, char **argv) +{ + struct data_interface_test_summary summary; + struct starpu_conf conf; + starpu_conf_init(&conf); + conf.ncuda = 2; + conf.nopencl = 1; + + int ret = starpu_initialize(&conf, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + register_data(); + + ret = run_tests(&matrix_config, &summary); + + unregister_data(); + + starpu_shutdown(); + + if (ret) data_interface_test_summary_print(stderr, &summary); + + return data_interface_test_summary_success(&summary); +} diff --git a/tests/datawizard/interfaces/matrix/matrix_opencl.c b/tests/datawizard/interfaces/matrix/matrix_opencl.c new file mode 100644 index 0000000..cb7f667 --- /dev/null +++ b/tests/datawizard/interfaces/matrix/matrix_opencl.c @@ -0,0 +1,131 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../test_interfaces.h" + +#define KERNEL_LOCATION "tests/datawizard/interfaces/matrix/matrix_opencl_kernel.cl" + +extern struct test_config matrix_config; +static struct starpu_opencl_program matrix_program; + +void test_matrix_opencl_func(void *buffers[], void *args) +{ + STARPU_SKIP_IF_VALGRIND; + + int id, devid, factor, ret; + unsigned int n; + + cl_int err; + cl_kernel kernel; + cl_command_queue queue; + cl_event event; + cl_context context; + cl_mem val, fail; + + ret = starpu_opencl_load_opencl_from_file(KERNEL_LOCATION, + &matrix_program, + NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); + + factor = *(int *)args; + n = STARPU_MATRIX_GET_NX(buffers[0]); + n*= STARPU_MATRIX_GET_NY(buffers[0]); + val = (cl_mem)STARPU_MATRIX_GET_DEV_HANDLE(buffers[0]); + + id = starpu_worker_get_id_check(); + devid = starpu_worker_get_devid(id); + starpu_opencl_get_context(devid, &context); + + err = starpu_opencl_load_kernel(&kernel, + &queue, + &matrix_program, + "matrix_opencl", + devid); + if (err != CL_SUCCESS) + STARPU_OPENCL_REPORT_ERROR(err); + + fail = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, + sizeof(int), &matrix_config.copy_failed, &err); + if (err != CL_SUCCESS) + STARPU_OPENCL_REPORT_ERROR(err); + + /* Setting args */ + int nargs; + nargs = starpu_opencl_set_kernel_args(&err, &kernel, + sizeof(val), &val, + sizeof(n), &n, + sizeof(fail), &fail, + sizeof(factor), &factor, + 0); + if (nargs != 4) + STARPU_OPENCL_REPORT_ERROR(err); + { + size_t global=n; + size_t local; + size_t s; + cl_device_id device; + + starpu_opencl_get_device(devid, &device); + + err = clGetKernelWorkGroupInfo (kernel, + device, + CL_KERNEL_WORK_GROUP_SIZE, + sizeof(local), + &local, + &s); + if (err != CL_SUCCESS) + STARPU_OPENCL_REPORT_ERROR(err); + + if (local > global) + local = global; + else + global = (global + local-1) / local * local; + + err = clEnqueueNDRangeKernel(queue, + kernel, + 1, + NULL, + &global, + &local, + 0, + NULL, + &event); + + if (err != CL_SUCCESS) + STARPU_OPENCL_REPORT_ERROR(err); + } + + err = clEnqueueReadBuffer(queue, + fail, + CL_TRUE, + 0, + sizeof(int), + &matrix_config.copy_failed, + 0, + NULL, + NULL); + if (err != CL_SUCCESS) + STARPU_OPENCL_REPORT_ERROR(err); + + clFinish(queue); + starpu_opencl_collect_stats(event); + clReleaseEvent(event); + + starpu_opencl_release_kernel(kernel); + ret = starpu_opencl_unload_opencl(&matrix_program); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl"); +} diff --git a/tests/datawizard/interfaces/matrix/matrix_opencl_kernel.cl b/tests/datawizard/interfaces/matrix/matrix_opencl_kernel.cl new file mode 100644 index 0000000..a97bf4f --- /dev/null +++ b/tests/datawizard/interfaces/matrix/matrix_opencl_kernel.cl @@ -0,0 +1,31 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +__kernel void matrix_opencl(__global int *val, + unsigned int nx, + __global int *err, + int factor) +{ + const int i = get_global_id(0); + if (i >= nx) + return; + + if (val[i] != i * factor) + *err = i; + else + val[i] *= -1; +} + diff --git a/tests/datawizard/interfaces/multiformat/advanced/generic.c b/tests/datawizard/interfaces/multiformat/advanced/generic.c new file mode 100644 index 0000000..7a7d6bc --- /dev/null +++ b/tests/datawizard/interfaces/multiformat/advanced/generic.c @@ -0,0 +1,208 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "generic.h" +#include "../../../../helper.h" + +struct stats global_stats; + +#ifdef STARPU_USE_CPU +void cpu_func(void *buffers[], void *args) +{ + (void)buffers; + (void)args; + + STARPU_SKIP_IF_VALGRIND; + + global_stats.cpu++; +} +#endif /* !STARPU_USE_CPU */ + +#ifdef STARPU_USE_CUDA +void cuda_func(void *buffers[], void *args) +{ + (void)buffers; + (void)args; + + STARPU_SKIP_IF_VALGRIND; + + global_stats.cuda++; +} + +void cpu_to_cuda_func(void *buffers[], void *args) +{ + (void)buffers; + (void)args; + + STARPU_SKIP_IF_VALGRIND; + + global_stats.cpu_to_cuda++; +} + +void cuda_to_cpu_func(void *buffers[], void *args) +{ + (void)buffers; + (void)args; + + STARPU_SKIP_IF_VALGRIND; + + global_stats.cuda_to_cpu++; +} + +struct starpu_codelet cpu_to_cuda_cl = +{ + .cuda_funcs = {cpu_to_cuda_func}, + .nbuffers = 1, + .modes = { STARPU_RW }, +}; + +struct starpu_codelet cuda_to_cpu_cl = +{ + .cpu_funcs = {cuda_to_cpu_func}, + .nbuffers = 1, + .modes = { STARPU_RW }, +}; +#endif /* !STARPU_USE_CUDA */ + +#ifdef STARPU_USE_OPENCL +void opencl_func(void *buffers[], void *args) +{ + (void)buffers; + (void)args; + + STARPU_SKIP_IF_VALGRIND; + + global_stats.opencl++; +} + +static +void cpu_to_opencl_func(void *buffers[], void *args) +{ + (void)buffers; + (void)args; + + STARPU_SKIP_IF_VALGRIND; + + global_stats.cpu_to_opencl++; +} + +static +void opencl_to_cpu_func(void *buffers[], void *args) +{ + (void)buffers; + (void)args; + + STARPU_SKIP_IF_VALGRIND; + + global_stats.opencl_to_cpu++; +} + +struct starpu_codelet cpu_to_opencl_cl = +{ + .opencl_funcs = {cpu_to_opencl_func}, + .nbuffers = 1, + .modes = { STARPU_RW }, +}; + +struct starpu_codelet opencl_to_cpu_cl = +{ + .cpu_funcs = {opencl_to_cpu_func}, + .nbuffers = 1, + .modes = { STARPU_RW }, +}; +#endif /* !STARPU_USE_OPENCL */ + +struct starpu_multiformat_data_interface_ops ops = +{ +#ifdef STARPU_USE_CUDA + .cuda_elemsize = sizeof(int), + .cpu_to_cuda_cl = &cpu_to_cuda_cl, + .cuda_to_cpu_cl = &cuda_to_cpu_cl, +#endif +#ifdef STARPU_USE_OPENCL + .opencl_elemsize = sizeof(int), + .cpu_to_opencl_cl = &cpu_to_opencl_cl, + .opencl_to_cpu_cl = &opencl_to_cpu_cl, +#endif + .cpu_elemsize = sizeof(int) +}; + +void +print_stats(struct stats *s) +{ +#ifdef STARPU_USE_CPU + FPRINTF(stderr, "cpu : %u\n", s->cpu); +#endif /* !STARPU_USE_CPU */ +#ifdef STARPU_USE_CUDA + FPRINTF(stderr, "cuda : %u\n" + "cpu->cuda : %u\n" + "cuda->cpu : %u\n", + s->cuda, + s->cpu_to_cuda, + s->cuda_to_cpu); +#endif /* !STARPU_USE_CUDA */ +#ifdef STARPU_USE_OPENCL + FPRINTF(stderr, "opencl : %u\n" + "cpu->opencl : %u\n" + "opencl->cpu : %u\n", + s->opencl, + s->cpu_to_opencl, + s->opencl_to_cpu); +#endif /* !STARPU_USE_OPENCL */ +} + +void reset_stats(struct stats *s) +{ +#ifdef STARPU_USE_CPU + s->cpu = 0; +#endif /* !STARPU_USE_CPU */ +#ifdef STARPU_USE_CUDA + s->cuda = 0; + s->cpu_to_cuda = 0; + s->cuda_to_cpu = 0; +#endif /* !STARPU_USE_CUDA */ +#ifdef STARPU_USE_OPENCL + s->opencl = 0; + s->cpu_to_opencl = 0; + s->opencl_to_cpu = 0; +#endif /* !STARPU_USE_OPENCL */ +} + +int +compare_stats(struct stats *s1, struct stats *s2) +{ + if ( +#ifdef STARPU_USE_CPU + s1->cpu == s2->cpu && +#endif /* !STARPU_USE_CPU */ +#ifdef STARPU_USE_CUDA + s1->cuda == s2->cuda && + s1->cpu_to_cuda == s2->cpu_to_cuda && + s1->cuda_to_cpu == s2->cuda_to_cpu && +#endif /* !STARPU_USE_CUDA */ +#ifdef STARPU_USE_OPENCL + s1->opencl == s2->opencl && + s1->cpu_to_opencl == s2->cpu_to_opencl && + s1->opencl_to_cpu == s2->opencl_to_cpu && +#endif /* !STARPU_USE_OPENCL */ + 1 /* Just so the build does not fail if we disable EVERYTHING */ + ) + return 0; + else + return 1; + +} diff --git a/tests/datawizard/interfaces/multiformat/advanced/generic.h b/tests/datawizard/interfaces/multiformat/advanced/generic.h new file mode 100644 index 0000000..c871227 --- /dev/null +++ b/tests/datawizard/interfaces/multiformat/advanced/generic.h @@ -0,0 +1,56 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ +#ifndef MULTIFORMAT_GENERIC_H +#define MULTIFORMAT_GENERIC_H + +#define NX 16 + +#ifdef STARPU_USE_CPU +void cpu_func(void *buffers[], void *args); +#endif /* !STARPU_USE_CPU */ + +#ifdef STARPU_USE_CUDA +void cuda_func(void *buffers[], void *args); +#endif /* !STARPU_USE_CUDA */ + +#ifdef STARPU_USE_OPENCL +void opencl_func(void *buffers[], void *args); +#endif /* !STARPU_USE_OPENCL */ + +extern struct starpu_multiformat_data_interface_ops ops; +/* Counting the calls to the codelets */ +struct stats +{ +#ifdef STARPU_USE_CPU + unsigned int cpu; +#endif +#ifdef STARPU_USE_CUDA + unsigned int cuda; + unsigned int cpu_to_cuda; + unsigned int cuda_to_cpu; +#endif +#ifdef STARPU_USE_OPENCL + unsigned int opencl; + unsigned int cpu_to_opencl; + unsigned int opencl_to_cpu; +#endif +}; + +void print_stats(struct stats *); +void reset_stats(struct stats *); +int compare_stats(struct stats *, struct stats *); + +#endif /* !MULTIFORMAT_GENERIC_H */ diff --git a/tests/datawizard/interfaces/multiformat/advanced/multiformat_cuda_opencl.c b/tests/datawizard/interfaces/multiformat/advanced/multiformat_cuda_opencl.c new file mode 100644 index 0000000..af27898 --- /dev/null +++ b/tests/datawizard/interfaces/multiformat/advanced/multiformat_cuda_opencl.c @@ -0,0 +1,172 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ +#include + +#include "generic.h" +#include "../../../../helper.h" + +#if defined(STARPU_USE_CUDA) && defined(STARPU_USE_OPENCL) + +extern struct stats global_stats; +static int vector[NX]; +static starpu_data_handle_t handle; + +/* + * Initially, our vector should be in RAM. It is then used on a CUDA device, + * then on an OpenCL device, and finally, on a CUDA device again. + * The following operations should be performed, in this specific order : + * - CPU -> CUDA conversion + * - CUDA kernel execution + * - OpenCL kernel execution + * - CUDA kernel execution + * - CUDA -> CPU conversion + * + * Note that we will not run any conversion between CUDA and OpenCL, because + * StarPU assumes that the data structures used on CUDA and OpenCL devices are + * the same. + */ +static int +test(void) +{ + int ret; + struct starpu_task *task_cuda, *task_cuda2, *task_opencl; + + static struct starpu_codelet cl_cuda = + { + .cuda_funcs = {cuda_func}, + .nbuffers = 1, + .modes = {STARPU_RW} + }; + + task_cuda = starpu_task_create(); + task_cuda->cl = &cl_cuda; + task_cuda->handles[0] = handle; + ret = starpu_task_submit(task_cuda); + if (ret != 0) + { + task_cuda->destroy = 0; + starpu_task_destroy(task_cuda); + return 1; + } + + static struct starpu_codelet cl_opencl = + { + .opencl_funcs = {opencl_func}, + .nbuffers = 1, + .modes = {STARPU_RW} + }; + + task_opencl = starpu_task_create(); + task_opencl->cl = &cl_opencl; + task_opencl->handles[0] = handle; + ret = starpu_task_submit(task_opencl); + if (ret != 0) + { + task_opencl->destroy = 0; + starpu_task_destroy(task_opencl); + return 1; + } + + task_cuda2 = starpu_task_create(); + task_cuda2->cl = &cl_cuda; + task_cuda2->handles[0] = handle; + ret = starpu_task_submit(task_cuda2); + if (ret != 0) + { + task_cuda2->destroy = 0; + starpu_task_destroy(task_cuda2); + return 1; + } + + return 0; +} + +static void +register_handle(void) +{ + int i; + for (i = 0; i < NX; i++) + vector[i] = i; + starpu_multiformat_data_register(&handle, STARPU_MAIN_RAM, vector, NX, &ops); +} + +static void +unregister_handle(void) +{ + starpu_data_unregister(handle); +} +#endif /* !(STARPU_USE_CUDA && STARPU_USE_OPENCL) */ + +int +main(void) +{ +#if defined(STARPU_USE_CUDA) && defined(STARPU_USE_OPENCL) + int ret; + struct starpu_conf conf; + starpu_conf_init(&conf); + +#if defined(STARPU_USE_CUDA0) || defined(STARPU_USE_CUDA1) + conf.ncuda = 0; +#else + conf.ncuda = 1; +#endif + conf.nopencl = 1; + + ret = starpu_init(&conf); + if (ret == -ENODEV) + goto enodev; + + reset_stats(&global_stats); + register_handle(); + ret = test(); + unregister_handle(); + starpu_shutdown(); + + if (ret != 0) + return STARPU_TEST_SKIPPED; + + struct stats expected_stats = + { +#ifdef STARPU_USE_CPU + .cpu = 0, +#endif +#ifdef STARPU_USE_CUDA + .cuda = 2, + .cpu_to_cuda = 1, + .cuda_to_cpu = 1, +#endif +#ifdef STARPU_USE_OPENCL + .opencl = 1, + .cpu_to_opencl = 0, + .opencl_to_cpu = 0 +#endif + }; + + ret = compare_stats(&global_stats, &expected_stats); + if (ret != 0) + { + print_stats(&global_stats); + print_stats(&expected_stats); + return EXIT_FAILURE; + } + + return EXIT_SUCCESS; +enodev: + return STARPU_TEST_SKIPPED; +#else + return STARPU_TEST_SKIPPED; +#endif +} diff --git a/tests/datawizard/interfaces/multiformat/advanced/multiformat_data_release.c b/tests/datawizard/interfaces/multiformat/advanced/multiformat_data_release.c new file mode 100644 index 0000000..167d132 --- /dev/null +++ b/tests/datawizard/interfaces/multiformat/advanced/multiformat_data_release.c @@ -0,0 +1,160 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +#include "generic.h" +#include "../../../../helper.h" + +static int vector[NX]; +static starpu_data_handle_t handle; + +#define ENTER() do { FPRINTF(stderr, "Entering %s\n", __starpu_func__); } while (0) + +extern struct stats global_stats; + + +static void +register_handle(void) +{ + int i; + for (i = 0; i < NX; i++) + vector[i] = i; + starpu_multiformat_data_register(&handle, STARPU_MAIN_RAM, vector, NX, &ops); +} + +static void +unregister_handle(void) +{ + starpu_data_unregister(handle); +} + +#if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) +static void +create_and_submit(int where) +{ + static struct starpu_codelet cl = + { + .modes = { STARPU_RW }, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {cuda_func}, +#endif +#ifdef STARPU_USE_OPENCL + .opencl_funcs = {opencl_func}, +#endif + .nbuffers = 1 + }; + cl.where = where; + + struct starpu_task *task = starpu_task_create(); + task->cl = &cl; + task->handles[0] = handle; + + /* We need to be sure the data has been copied to the GPU at the end + * of this function */ + task->synchronous = 1; + if (starpu_task_submit(task) == -ENODEV) + exit(STARPU_TEST_SKIPPED); +} +#endif + +static int +test(void) +{ + struct stats expected_stats; + memset(&expected_stats, 0, sizeof(expected_stats)); + +#ifdef STARPU_USE_CUDA + create_and_submit(STARPU_CUDA); + starpu_data_acquire(handle, STARPU_RW); + + expected_stats.cuda = 1; + expected_stats.cpu_to_cuda = 1; + expected_stats.cuda_to_cpu = 1; + + starpu_data_release(handle); + if (compare_stats(&global_stats, &expected_stats) != 0) + { + FPRINTF(stderr, "CUDA failed\n"); + print_stats(&global_stats); + FPRINTF(stderr ,"\n"); + print_stats(&expected_stats); + return -ENODEV; + } +#endif /* !STARPU_USE_CUDA */ + +#ifdef STARPU_USE_OPENCL + create_and_submit(STARPU_OPENCL); + starpu_data_acquire(handle, STARPU_RW); + expected_stats.opencl = 1; + expected_stats.cpu_to_opencl = 1; + expected_stats.opencl_to_cpu = 1; + + starpu_data_release(handle); + if (compare_stats(&global_stats, &expected_stats) != 0) + { + FPRINTF(stderr, "OPENCL failed\n"); + print_stats(&global_stats); + FPRINTF(stderr ,"\n"); + print_stats(&expected_stats); + return -ENODEV; + } +#endif /* !STARPU_USE_OPENCL */ + + return 0; +} + +int +main(int argc, char **argv) +{ +#ifdef STARPU_USE_CPU + int ret; + struct starpu_conf conf; + starpu_conf_init(&conf); + +#if defined(STARPU_USE_CUDA0) || defined(STARPU_USE_CUDA1) + conf.ncuda = 0; +#else + conf.ncuda = 1; +#endif + conf.nopencl = 1; + memset(&global_stats, 0, sizeof(global_stats)); + ret = starpu_initialize(&conf, &argc, &argv); + if (ret == -ENODEV || starpu_cpu_worker_get_count() == 0) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + register_handle(); + + int err = test(); + + unregister_handle(); + starpu_shutdown(); + + switch (err) + { + case -ENODEV: + return STARPU_TEST_SKIPPED; + case 0: + return EXIT_SUCCESS; + default: + return EXIT_FAILURE; + } +#else /* ! STARPU_USE_CPU */ + /* Without the CPU, there is no point in using the multiformat + * interface, so this test is pointless. */ + return STARPU_TEST_SKIPPED; +#endif +} diff --git a/tests/datawizard/interfaces/multiformat/advanced/multiformat_handle_conversion.c b/tests/datawizard/interfaces/multiformat/advanced/multiformat_handle_conversion.c new file mode 100644 index 0000000..7a59cbd --- /dev/null +++ b/tests/datawizard/interfaces/multiformat/advanced/multiformat_handle_conversion.c @@ -0,0 +1,260 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +#include "generic.h" +#include "../../../../helper.h" + +#define DEBUG 0 + +#if DEBUG +#define SYNCHRONOUS 1 /* Easier to debug with synchronous tasks */ +#define ENTER() do { FPRINTF(stderr, "Entering %s\n", __starpu_func__); } while (0) +#else +#define SYNCHRONOUS 0 +#define ENTER() +#endif + +extern struct stats global_stats; + +#if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) +static void +create_and_submit_tasks(int where, starpu_data_handle_t handles[]) +{ + int ret; + FPRINTF(stderr, "***** Starting Task 1\n"); + struct starpu_codelet cl = + { + .modes = { STARPU_RW }, + .nbuffers = 1, + .where = where + }; + +#ifdef STARPU_USE_CUDA + if (where & STARPU_CUDA) + cl.cuda_funcs[0] = cuda_func; +#endif +#ifdef STARPU_USE_OPENCL + if (where & STARPU_OPENCL) + cl.opencl_funcs[0] = opencl_func; +#endif + + struct starpu_task *task = starpu_task_create(); + task->synchronous = SYNCHRONOUS; + task->cl = &cl; + task->handles[0] = handles[0]; + ret = starpu_task_submit(task); + assert(ret == 0); + +#ifdef STARPU_USE_CPU + FPRINTF(stderr, "***** Starting Task 2\n"); + struct starpu_codelet cl2 = + { + .modes = { STARPU_RW }, + .cpu_funcs = {cpu_func}, + .nbuffers = 1, + .where = STARPU_CPU, + }; + + struct starpu_task *task2 = starpu_task_create(); + task2->synchronous = SYNCHRONOUS; + task2->cl = &cl2; + task2->handles[0] = handles[1]; + ret = starpu_task_submit(task2); + assert(ret == 0); +#endif /* !STARPU_USE_CPU */ + + FPRINTF(stderr, "***** Starting Task 3\n"); + struct starpu_codelet cl3 = + { + .modes = { STARPU_RW, STARPU_RW }, + .nbuffers = 2, + .where = where + }; + +#ifdef STARPU_USE_CUDA + if (where & STARPU_CUDA) + cl3.cuda_funcs[0] = cuda_func; +#endif +#ifdef STARPU_USE_OPENCL + if (where & STARPU_OPENCL) + cl3.opencl_funcs[0] = opencl_func; +#endif + + struct starpu_task *task3 = starpu_task_create(); + task3->synchronous = SYNCHRONOUS; + task3->cl = &cl3; + task3->handles[0] = handles[0]; + task3->handles[1] = handles[1]; + ret = starpu_task_submit(task3); + assert(ret == 0); + + ret = starpu_task_wait_for_all(); + assert(ret == 0); + + FPRINTF(stderr, "***** End of all tasks\n"); + return; +} +#endif + +/* XXX Just a little bit of copy/pasta here... */ +#ifdef STARPU_USE_CUDA +static int +test_cuda(void) +{ + int i; + int vector1[NX]; + int vector2[NX]; + starpu_data_handle_t handles[2]; + + for (i = 0; i < NX; i++) + { + vector1[i] = i; + vector2[i] = i; + } + + starpu_multiformat_data_register(&handles[0], STARPU_MAIN_RAM, vector1, NX, &ops); + starpu_multiformat_data_register(&handles[1], STARPU_MAIN_RAM, vector2, NX, &ops); + + memset(&global_stats, 0, sizeof(global_stats)); + create_and_submit_tasks(STARPU_CUDA, handles); + + starpu_data_unregister(handles[0]); + starpu_data_unregister(handles[1]); + +#if DEBUG + print_stats(&global_stats); +#endif + + struct stats expected_stats; +#ifdef STARPU_USE_CPU + expected_stats.cpu = 1; +#endif /* !STARPU_USE_CPU */ +#ifdef STARPU_USE_OPENCL + expected_stats.opencl = 0; + expected_stats.cpu_to_opencl = 0; + expected_stats.opencl_to_cpu = 0; +#endif /* !STARPU_USE_OPENCL */ + expected_stats.cuda = 2; + expected_stats.cpu_to_cuda = 2; + expected_stats.cuda_to_cpu = 2; + + return compare_stats(&expected_stats, &global_stats); +} +#endif /* !STARPU_USE_CUDA */ + +#ifdef STARPU_USE_OPENCL +static int +test_opencl(void) +{ + int i; + int vector1[NX]; + int vector2[NX]; + starpu_data_handle_t handles[2]; + + for (i = 0; i < NX; i++) + { + vector1[i] = i; + vector2[i] = i; + } + + starpu_multiformat_data_register(&handles[0], STARPU_MAIN_RAM, vector1, NX, &ops); + starpu_multiformat_data_register(&handles[1], STARPU_MAIN_RAM, vector2, NX, &ops); + + memset(&global_stats, 0, sizeof(global_stats)); + create_and_submit_tasks(STARPU_OPENCL, handles); + + starpu_data_unregister(handles[0]); + starpu_data_unregister(handles[1]); + +#if DEBUG + print_stats(&global_stats); +#endif + + struct stats expected_stats; +#ifdef STARPU_USE_CPU + expected_stats.cpu = 1; +#endif /* !STARPU_USE_CPU */ +#ifdef STARPU_USE_CUDA + expected_stats.cuda = 0; + expected_stats.cpu_to_cuda = 0; + expected_stats.cuda_to_cpu = 0; +#endif /* !STARPU_USE_CUDA */ + expected_stats.opencl = 2; + expected_stats.cpu_to_opencl = 2; + expected_stats.opencl_to_cpu = 2; + + return compare_stats(&expected_stats, &global_stats); +} +#endif /* !STARPU_USE_OPENCL */ + +int +main(int argc, char **argv) +{ +#ifdef STARPU_USE_CPU + int ret; + struct starpu_conf conf; + starpu_conf_init(&conf); + +#if defined(STARPU_USE_CUDA0) || defined(STARPU_USE_CUDA1) + conf.ncuda = 0; +#else + conf.ncuda = 2; +#endif + conf.nopencl = 1; + + ret = starpu_initialize(&conf, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + unsigned int ncpu = starpu_cpu_worker_get_count(); + if (ncpu == 0) + { + FPRINTF(stderr, "No CPUS, cannot run this test.\n"); + return STARPU_TEST_SKIPPED; + } + unsigned int ncuda = starpu_cuda_worker_get_count(); + unsigned int nopencl = starpu_opencl_worker_get_count(); + +#ifdef STARPU_USE_OPENCL + if (nopencl > 0 && test_opencl() != 0) + { + FPRINTF(stderr, "OPENCL FAILED\n"); + return EXIT_FAILURE; + } +#endif +#ifdef STARPU_USE_CUDA + if (ncuda > 0 && test_cuda() != 0) + { + FPRINTF(stderr, "CUDA FAILED \n"); + return EXIT_FAILURE; + } +#endif + + starpu_shutdown(); + + if (ncuda == 0 && nopencl == 0) + return STARPU_TEST_SKIPPED; + else + return EXIT_SUCCESS; +#else /* !STARPU_USE_CPU */ + /* Without the CPU, there is no point in using the multiformat + * interface, so this test is pointless. */ + + return STARPU_TEST_SKIPPED; +#endif +} diff --git a/tests/datawizard/interfaces/multiformat/advanced/multiformat_worker.c b/tests/datawizard/interfaces/multiformat/advanced/multiformat_worker.c new file mode 100644 index 0000000..d596adb --- /dev/null +++ b/tests/datawizard/interfaces/multiformat/advanced/multiformat_worker.c @@ -0,0 +1,156 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ +#include + +#include "generic.h" +#include "../../../../helper.h" + +#if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) +extern struct stats global_stats; +static int vector[NX]; static starpu_data_handle_t handle; +#endif + +#ifdef STARPU_USE_CUDA +static int ncuda; +static int cuda_worker; +#endif +#ifdef STARPU_USE_OPENCL +static int nopencl; +static int opencl_worker; +#endif + +#if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) +static struct starpu_codelet cl = +{ + .modes = { STARPU_RW }, +#ifdef STARPU_USE_CUDA + .cuda_funcs = { cuda_func }, +#endif +#ifdef STARPU_USE_OPENCL + .opencl_funcs = { opencl_func }, +#endif + .nbuffers = 1, +}; + +static void +register_handle(void) +{ + int i; + for (i = 0; i < NX; i++) + vector[i] = i; + starpu_multiformat_data_register(&handle, STARPU_MAIN_RAM, vector, NX, &ops); +} + +static void +unregister_handle(void) +{ + starpu_data_unregister(handle); +} + +static int +create_and_submit_tasks(void) +{ + struct starpu_task *task; + + task = starpu_task_create(); + task->cl = &cl; + task->handles[0] = handle; + task->execute_on_a_specific_worker = 1; + +#ifdef STARPU_USE_CUDA + if (ncuda > 0) + { + task->workerid = cuda_worker; + } + else +#endif +#ifdef STARPU_USE_OPENCL + if (nopencl > 0) + { + task->workerid = opencl_worker; + } + else +#endif + { + task->destroy = 0; + starpu_task_destroy(task); + return -ENODEV; + } + + return starpu_task_submit(task); +} +#endif + +int +main(int argc STARPU_ATTRIBUTE_UNUSED, char **argv STARPU_ATTRIBUTE_UNUSED) +{ +#if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) + int err; + err = starpu_initialize(NULL, &argc, &argv); + if (err == -ENODEV) + goto enodev; + +#if defined(STARPU_USE_CUDA) && !(defined(STARPU_USE_CUDA0) || defined(STARPU_USE_CUDA1)) + ncuda = starpu_worker_get_ids_by_type(STARPU_CUDA_WORKER, + &cuda_worker, 1); + if (ncuda < 0) + ncuda = 1; +#endif +#ifdef STARPU_USE_OPENCL + nopencl = starpu_worker_get_ids_by_type(STARPU_OPENCL_WORKER, + &opencl_worker, 1); + if (nopencl < 0) + nopencl = 1; +#endif + + reset_stats(&global_stats); + register_handle(); + err = create_and_submit_tasks(); + unregister_handle(); + starpu_shutdown(); + + if (err == -ENODEV) + goto enodev; + +#if defined(STARPU_USE_CUDA) + if (global_stats.cuda == 1) + { + if (global_stats.cpu_to_cuda == 1 && + global_stats.cuda_to_cpu == 1) + return EXIT_SUCCESS; + else + return EXIT_FAILURE; + } +#endif /* !STARPU_USE_CUDA */ +#if defined(STARPU_USE_OPENCL) + if (global_stats.opencl == 1) + { + if (global_stats.cpu_to_opencl == 1 && + global_stats.opencl_to_cpu == 1) + return EXIT_SUCCESS; + else + return EXIT_FAILURE; + + } +#endif /* !STARPU_USE_OPENCL */ + + /* We should not get here */ + return EXIT_FAILURE; + +enodev: +#endif + return STARPU_TEST_SKIPPED; +} diff --git a/tests/datawizard/interfaces/multiformat/advanced/same_handle.c b/tests/datawizard/interfaces/multiformat/advanced/same_handle.c new file mode 100644 index 0000000..0ce91c1 --- /dev/null +++ b/tests/datawizard/interfaces/multiformat/advanced/same_handle.c @@ -0,0 +1,142 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ +#include + +#include "generic.h" +#include "../../../../helper.h" + +/* + * A single handle can be given twice to a given kernel. In this case, it + * should only be converted once. + */ +#if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) +extern struct stats global_stats; +static int vector[NX]; static starpu_data_handle_t handle; + +static struct starpu_codelet cl = +{ + .modes = { STARPU_RW, STARPU_RW }, +#ifdef STARPU_USE_CUDA + .cuda_funcs = { cuda_func }, +#endif +#ifdef STARPU_USE_OPENCL + .opencl_funcs = { opencl_func }, +#endif + .nbuffers = 2, +}; + +static void +register_handle(void) +{ + int i; + for (i = 0; i < NX; i++) + vector[i] = i; + starpu_multiformat_data_register(&handle, STARPU_MAIN_RAM, vector, NX, &ops); +} + +static void +unregister_handle(void) +{ + starpu_data_unregister(handle); +} + +static int +create_and_submit_tasks(void) +{ + int ret; + struct starpu_task *task; + + cl.where = 0; +#ifdef STARPU_USE_CUDA + cl.where |= STARPU_CUDA; +#endif +#ifdef STARPU_USE_OPENCL + cl.where |= STARPU_OPENCL; +#endif + + task = starpu_task_create(); + task->cl = &cl; + task->handles[0] = handle; + task->handles[1] = handle; + + ret = starpu_task_submit(task); + if (ret == -ENODEV) + { + task->destroy = 0; + starpu_task_destroy(task); + return -ENODEV; + } + + return 0; +} +#endif + +int +main(int argc, char **argv) +{ +#if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) + int err; + + struct starpu_conf conf; + starpu_conf_init(&conf); +#if defined(STARPU_USE_CUDA0) || defined(STARPU_USE_CUDA1) + conf.ncuda = 0; +#endif + err = starpu_initialize(&conf, &argc, &argv); + if (err == -ENODEV) + goto enodev; + + reset_stats(&global_stats); + register_handle(); + err = create_and_submit_tasks(); + unregister_handle(); + starpu_shutdown(); + + if (err == -ENODEV) + goto enodev; + +#ifdef STARPU_USE_CUDA + if (global_stats.cuda == 1) + { + if (global_stats.cpu_to_cuda == 1 && + global_stats.cuda_to_cpu == 1) + return EXIT_SUCCESS; + else + return EXIT_FAILURE; + } + else +#endif +#if defined(STARPU_USE_OPENCL) + if (global_stats.opencl == 1) + { + if (global_stats.cpu_to_opencl == 1 && + global_stats.opencl_to_cpu == 1) + return EXIT_SUCCESS; + else + return EXIT_FAILURE; + + } + else +#endif + { + /* We should not get here */ + return EXIT_FAILURE; + } + +enodev: +#endif + return STARPU_TEST_SKIPPED; +} diff --git a/tests/datawizard/interfaces/multiformat/multiformat_conversion_codelets.c b/tests/datawizard/interfaces/multiformat/multiformat_conversion_codelets.c new file mode 100644 index 0000000..dbbae76 --- /dev/null +++ b/tests/datawizard/interfaces/multiformat/multiformat_conversion_codelets.c @@ -0,0 +1,88 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "multiformat_types.h" +#include "../../../helper.h" + +#ifdef STARPU_USE_CUDA +void cuda_to_cpu(void *buffers[], void *arg) +{ + (void)arg; + + STARPU_SKIP_IF_VALGRIND; + + FPRINTF(stderr, "Entering %s\n", __starpu_func__); + struct struct_of_arrays *src = STARPU_MULTIFORMAT_GET_CUDA_PTR(buffers[0]); + struct point *dst = STARPU_MULTIFORMAT_GET_CPU_PTR(buffers[0]); + int n = STARPU_MULTIFORMAT_GET_NX(buffers[0]); + int i; + for (i = 0; i < n; i++) + { + dst[i].x = src->x[i]; + dst[i].y = src->y[i]; + } +} + +extern void cpu_to_cuda_cuda_func(void *buffers[], void *args); struct starpu_codelet cpu_to_cuda_cl = +{ + .cuda_funcs = {cpu_to_cuda_cuda_func}, + .cuda_flags = {STARPU_CUDA_ASYNC}, + .nbuffers = 1, + .modes = { STARPU_RW }, +}; + +struct starpu_codelet cuda_to_cpu_cl = +{ + .cpu_funcs = {cuda_to_cpu}, + .nbuffers = 1, + .modes = { STARPU_RW }, +}; +#endif + +#ifdef STARPU_USE_OPENCL +void opencl_to_cpu(void *buffers[], void *arg) +{ + (void)arg; + + STARPU_SKIP_IF_VALGRIND; + + struct struct_of_arrays *src = STARPU_MULTIFORMAT_GET_OPENCL_PTR(buffers[0]); + struct point *dst = STARPU_MULTIFORMAT_GET_CPU_PTR(buffers[0]); + int n = STARPU_MULTIFORMAT_GET_NX(buffers[0]); + int i; + for (i = 0; i < n; i++) + { + dst[i].x = src->x[i]; + dst[i].y = src->y[i]; + } +} + +extern void cpu_to_opencl_opencl_func(void *buffers[], void *args); +struct starpu_codelet cpu_to_opencl_cl = +{ + .opencl_funcs = {cpu_to_opencl_opencl_func}, + .nbuffers = 1, + .modes = { STARPU_RW }, +}; + +struct starpu_codelet opencl_to_cpu_cl = +{ + .cpu_funcs = {opencl_to_cpu}, + .nbuffers = 1, + .modes = { STARPU_RW }, +}; +#endif diff --git a/tests/datawizard/interfaces/multiformat/multiformat_conversion_codelets_cuda.cu b/tests/datawizard/interfaces/multiformat/multiformat_conversion_codelets_cuda.cu new file mode 100644 index 0000000..194d2c2 --- /dev/null +++ b/tests/datawizard/interfaces/multiformat/multiformat_conversion_codelets_cuda.cu @@ -0,0 +1,50 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ +#include +#include "multiformat_types.h" +#include "../../../helper.h" + +static __global__ void cpu_to_cuda_cuda(struct point *src, + struct struct_of_arrays *dst, unsigned n) +{ + unsigned i = blockIdx.x*blockDim.x + threadIdx.x; + + if (i < n) + { + dst->x[i] = src[i].x; + dst->y[i] = src[i].y; + } + +} + +extern "C" void cpu_to_cuda_cuda_func(void *buffers[], void *_args) +{ + FPRINTF(stderr, "Entering %s\n", __starpu_func__); + struct point *src; + struct struct_of_arrays *dst; + + src = (struct point *) STARPU_MULTIFORMAT_GET_CPU_PTR(buffers[0]); + dst = (struct struct_of_arrays *) STARPU_MULTIFORMAT_GET_CUDA_PTR(buffers[0]); + + int n = STARPU_MULTIFORMAT_GET_NX(buffers[0]); + + unsigned threads_per_block = 64; + unsigned nblocks = (n + threads_per_block-1) / threads_per_block; + + cpu_to_cuda_cuda<<>>(src, dst, n); + cudaError_t status = cudaGetLastError(); + if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status); +} diff --git a/tests/datawizard/interfaces/multiformat/multiformat_conversion_codelets_kernel.cl b/tests/datawizard/interfaces/multiformat/multiformat_conversion_codelets_kernel.cl new file mode 100644 index 0000000..fce3244 --- /dev/null +++ b/tests/datawizard/interfaces/multiformat/multiformat_conversion_codelets_kernel.cl @@ -0,0 +1,27 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ +#include "multiformat_types.h" +__kernel void cpu_to_opencl_opencl(__global struct point *src, + __global struct struct_of_arrays *dst, + unsigned int n) +{ + const unsigned int i = get_global_id(0); + if (i < n) + { + dst->x[i] = src[i].x; + dst->y[i] = src[i].y; + } +} diff --git a/tests/datawizard/interfaces/multiformat/multiformat_conversion_codelets_opencl.c b/tests/datawizard/interfaces/multiformat/multiformat_conversion_codelets_opencl.c new file mode 100644 index 0000000..5ca8a80 --- /dev/null +++ b/tests/datawizard/interfaces/multiformat/multiformat_conversion_codelets_opencl.c @@ -0,0 +1,111 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../../../helper.h" + +#define KERNEL_LOCATION "tests/datawizard/interfaces/multiformat/multiformat_conversion_codelets_kernel.cl" +static struct starpu_opencl_program opencl_conversion_program; + +void cpu_to_opencl_opencl_func(void *buffers[], void *args) +{ + STARPU_SKIP_IF_VALGRIND; + + (void) args; + int id, devid, ret; + cl_int err; + cl_kernel kernel; + cl_command_queue queue; + cl_event event; + + unsigned n = STARPU_MULTIFORMAT_GET_NX(buffers[0]); + cl_mem src = (cl_mem) STARPU_MULTIFORMAT_GET_CPU_PTR(buffers[0]); + cl_mem dst = (cl_mem) STARPU_MULTIFORMAT_GET_OPENCL_PTR(buffers[0]); + + id = starpu_worker_get_id_check(); + devid = starpu_worker_get_devid(id); + + ret = starpu_opencl_load_opencl_from_file(KERNEL_LOCATION, + &opencl_conversion_program, + NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); + + err = starpu_opencl_load_kernel(&kernel, + &queue, + &opencl_conversion_program, + "cpu_to_opencl_opencl", + devid); + + if (err != CL_SUCCESS) + STARPU_OPENCL_REPORT_ERROR(err); + + err = clSetKernelArg(kernel, 0, sizeof(src), &src); + if (err != CL_SUCCESS) + STARPU_OPENCL_REPORT_ERROR(err); + + err = clSetKernelArg(kernel, 1, sizeof(dst), &dst); + if (err != CL_SUCCESS) + STARPU_OPENCL_REPORT_ERROR(err); + + err = clSetKernelArg(kernel, 2, sizeof(n), &n); + if (err != CL_SUCCESS) + STARPU_OPENCL_REPORT_ERROR(err); + + + { + size_t global=n; + size_t local; + size_t s; + cl_device_id device; + + starpu_opencl_get_device(devid, &device); + + err = clGetKernelWorkGroupInfo (kernel, + device, + CL_KERNEL_WORK_GROUP_SIZE, + sizeof(local), + &local, + &s); + if (err != CL_SUCCESS) + STARPU_OPENCL_REPORT_ERROR(err); + + if (local > global) + local = global; + else + global = (global + local-1) / local * local; + + err = clEnqueueNDRangeKernel(queue, + kernel, + 1, + NULL, + &global, + &local, + 0, + NULL, + &event); + + if (err != CL_SUCCESS) + STARPU_OPENCL_REPORT_ERROR(err); + } + + clFinish(queue); + starpu_opencl_collect_stats(event); + clReleaseEvent(event); + + starpu_opencl_release_kernel(kernel); + ret = starpu_opencl_unload_opencl(&opencl_conversion_program); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl"); +} diff --git a/tests/datawizard/interfaces/multiformat/multiformat_cuda.cu b/tests/datawizard/interfaces/multiformat/multiformat_cuda.cu new file mode 100644 index 0000000..af47e26 --- /dev/null +++ b/tests/datawizard/interfaces/multiformat/multiformat_cuda.cu @@ -0,0 +1,80 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ +#include +#include "multiformat_types.h" +#include "../test_interfaces.h" +#include "../../../helper.h" + +extern struct test_config multiformat_config; + +static __global__ void multiformat_cuda(struct struct_of_arrays *soa, unsigned n, + int *err, int factor) +{ + unsigned i = blockIdx.x*blockDim.x + threadIdx.x; + + if (i >= n) + return; + + if (soa->x[i] != i * factor || soa->y[i] != i * factor) + { + *err = 1; + } + else + { + soa->x[i] = -soa->x[i]; + soa->y[i] = -soa->y[i]; + } +} + +extern "C" void test_multiformat_cuda_func(void *buffers[], void *args) +{ + FPRINTF(stderr, "Entering %s\n", __starpu_func__); + int factor; + int *ret; + cudaError_t error; + unsigned int n = STARPU_MULTIFORMAT_GET_NX(buffers[0]); + struct struct_of_arrays *soa; + + soa = (struct struct_of_arrays *) STARPU_MULTIFORMAT_GET_CUDA_PTR(buffers[0]); + unsigned threads_per_block = 64; + unsigned nblocks = (n + threads_per_block-1) / threads_per_block; + factor = *(int *) args; + + error = cudaMalloc(&ret, sizeof(int)); + if (error != cudaSuccess) + STARPU_CUDA_REPORT_ERROR(error); + + error = cudaMemcpyAsync(ret, + &multiformat_config.copy_failed, + sizeof(int), + cudaMemcpyHostToDevice, starpu_cuda_get_local_stream()); + if (error != cudaSuccess) + STARPU_CUDA_REPORT_ERROR(error); + + multiformat_cuda<<>>(soa, n, ret, factor); + error = cudaGetLastError(); + if (error != cudaSuccess) STARPU_CUDA_REPORT_ERROR(error); + + error = cudaMemcpyAsync(&multiformat_config.copy_failed, + ret, + sizeof(int), + cudaMemcpyDeviceToHost, starpu_cuda_get_local_stream()); + if (error != cudaSuccess) + STARPU_CUDA_REPORT_ERROR(error); + + cudaFree(ret); + cudaStreamSynchronize(starpu_cuda_get_local_stream()); +} diff --git a/tests/datawizard/interfaces/multiformat/multiformat_interface.c b/tests/datawizard/interfaces/multiformat/multiformat_interface.c new file mode 100644 index 0000000..ffac625 --- /dev/null +++ b/tests/datawizard/interfaces/multiformat/multiformat_interface.c @@ -0,0 +1,168 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "multiformat_types.h" +#include "../test_interfaces.h" +#include "../../../helper.h" + +static void test_multiformat_cpu_func(void *buffers[], void *args); +#ifdef STARPU_USE_CUDA +extern void test_multiformat_cuda_func(void *buffers[], void *args); +#endif +#ifdef STARPU_USE_OPENCL +extern void test_multiformat_opencl_func(void *buffers[], void *args); +#endif + +static struct point array_of_structs[N_ELEMENTS]; +static struct point array_of_structs_dummy[N_ELEMENTS]; + +static starpu_data_handle_t multiformat_handle; +static starpu_data_handle_t multiformat_dummy_handle; + +struct test_config multiformat_config = +{ + .cpu_func = test_multiformat_cpu_func, +#ifdef STARPU_USE_CUDA + .cuda_func = test_multiformat_cuda_func, +#endif +#ifdef STARPU_USE_OPENCL + .opencl_func = test_multiformat_opencl_func, +#endif + .handle = &multiformat_handle, + .ptr = array_of_structs, + .dummy_handle = &multiformat_dummy_handle, + .dummy_ptr = array_of_structs_dummy, + .copy_failed = SUCCESS, + .name = "multiformat_interface" +}; + +static void +test_multiformat_cpu_func(void *buffers[], void *args) +{ + STARPU_SKIP_IF_VALGRIND; + + struct point *aos; + int n, i; + int factor; + + aos = (struct point *) STARPU_MULTIFORMAT_GET_CPU_PTR(buffers[0]); + n = STARPU_MULTIFORMAT_GET_NX(buffers[0]); + factor = *(int *) args; + + for (i = 0; i < n; i++) + { + FPRINTF(stderr, "(%d %d) [%d]", aos[i].x, aos[i].y, factor); + if (aos[i].x != i * factor || aos[i].y != i * factor) + { + multiformat_config.copy_failed = FAILURE; + } + aos[i].x = -aos[i].x; + aos[i].y = -aos[i].y; + } + FPRINTF(stderr, "\n"); +} + +#ifdef STARPU_USE_CUDA +extern struct starpu_codelet cpu_to_cuda_cl; +extern struct starpu_codelet cuda_to_cpu_cl; +#endif + +#ifdef STARPU_USE_OPENCL +extern struct starpu_codelet cpu_to_opencl_cl; +extern struct starpu_codelet opencl_to_cpu_cl; +#endif + +struct starpu_multiformat_data_interface_ops format_ops = +{ +#ifdef STARPU_USE_CUDA + .cuda_elemsize = 2* sizeof(float), + .cpu_to_cuda_cl = &cpu_to_cuda_cl, + .cuda_to_cpu_cl = &cuda_to_cpu_cl, +#endif +#ifdef STARPU_USE_OPENCL + .opencl_elemsize = 2 * sizeof(float), + .cpu_to_opencl_cl = &cpu_to_opencl_cl, + .opencl_to_cpu_cl = &opencl_to_cpu_cl, +#endif + .cpu_elemsize = sizeof(struct point), +}; + +static void +register_data(void) +{ + int i; + + for (i = 0; i < N_ELEMENTS; i++) + { + array_of_structs[i].x = i; + array_of_structs[i].y = i; + } + starpu_multiformat_data_register(&multiformat_handle, + STARPU_MAIN_RAM, + &array_of_structs, + N_ELEMENTS, + &format_ops); + starpu_multiformat_data_register(&multiformat_dummy_handle, + STARPU_MAIN_RAM, + &array_of_structs_dummy, + N_ELEMENTS, + &format_ops); +} + +static void +unregister_data(void) +{ + starpu_data_unregister(multiformat_handle); + starpu_data_unregister(multiformat_dummy_handle); +} + +int main(int argc, char **argv) +{ +#ifdef STARPU_USE_CPU + int ret; + struct data_interface_test_summary summary; + struct starpu_conf conf; + + starpu_conf_init(&conf); +#if defined(STARPU_USE_CUDA0) || defined(STARPU_USE_CUDA1) + conf.ncuda = 0; +#else + conf.ncuda = 2; +#endif + conf.nopencl = 1; + + ret = starpu_initialize(&conf, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + register_data(); + + ret = run_tests(&multiformat_config, &summary); + + if (ret) data_interface_test_summary_print(stderr, &summary); + + unregister_data(); + + starpu_shutdown(); + + return data_interface_test_summary_success(&summary); +#else + /* Without the CPU, there is no point in using the multiformat + * interface, so this test is pointless. */ + return STARPU_TEST_SKIPPED; +#endif +} diff --git a/tests/datawizard/interfaces/multiformat/multiformat_opencl.c b/tests/datawizard/interfaces/multiformat/multiformat_opencl.c new file mode 100644 index 0000000..a2b3f0c --- /dev/null +++ b/tests/datawizard/interfaces/multiformat/multiformat_opencl.c @@ -0,0 +1,137 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../test_interfaces.h" + +#define KERNEL_LOCATION "tests/datawizard/interfaces/multiformat/multiformat_opencl_kernel.cl" + +extern struct test_config multiformat_config; +static struct starpu_opencl_program multiformat_program; + +void test_multiformat_opencl_func(void *buffers[], void *args) +{ + STARPU_SKIP_IF_VALGRIND; + + int id, devid, factor, ret; + unsigned int n; + + cl_int err; + cl_kernel kernel; + cl_command_queue queue; + cl_event event; + cl_context context; + cl_mem val, fail; + + ret = starpu_opencl_load_opencl_from_file(KERNEL_LOCATION, + &multiformat_program, + NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); + + factor = *(int *)args; + n = STARPU_MULTIFORMAT_GET_NX(buffers[0]); + val = (cl_mem)STARPU_MULTIFORMAT_GET_OPENCL_PTR(buffers[0]); + + id = starpu_worker_get_id_check(); + devid = starpu_worker_get_devid(id); + starpu_opencl_get_context(devid, &context); + + err = starpu_opencl_load_kernel(&kernel, + &queue, + &multiformat_program, + "multiformat_opencl", + devid); + if (err != CL_SUCCESS) + STARPU_OPENCL_REPORT_ERROR(err); + + fail = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, + sizeof(int), &multiformat_config.copy_failed, &err); + if (err != CL_SUCCESS) + STARPU_OPENCL_REPORT_ERROR(err); + + /* Setting args */ + err = clSetKernelArg(kernel, 0, sizeof(val), &val); + if (err != CL_SUCCESS) + STARPU_OPENCL_REPORT_ERROR(err); + + err = clSetKernelArg(kernel, 1, sizeof(n), &n); + if (err) + STARPU_OPENCL_REPORT_ERROR(err); + + err = clSetKernelArg(kernel, 2, sizeof(fail), &fail); + if (err) + STARPU_OPENCL_REPORT_ERROR(err); + + err = clSetKernelArg(kernel, 3, sizeof(factor), &factor); + if (err) + STARPU_OPENCL_REPORT_ERROR(err); + + { + size_t global=n; + size_t local; + size_t s; + cl_device_id device; + + starpu_opencl_get_device(devid, &device); + + err = clGetKernelWorkGroupInfo (kernel, + device, + CL_KERNEL_WORK_GROUP_SIZE, + sizeof(local), + &local, + &s); + if (err != CL_SUCCESS) + STARPU_OPENCL_REPORT_ERROR(err); + + if (local > global) + local = global; + else + global = (global + local-1) / local * local; + + err = clEnqueueNDRangeKernel(queue, + kernel, + 1, + NULL, + &global, + &local, + 0, + NULL, + &event); + + if (err != CL_SUCCESS) + STARPU_OPENCL_REPORT_ERROR(err); + } + + err = clEnqueueReadBuffer(queue, + fail, + CL_TRUE, + 0, + sizeof(int), + &multiformat_config.copy_failed, + 0, + NULL, + NULL); + if (err != CL_SUCCESS) + STARPU_OPENCL_REPORT_ERROR(err); + + clFinish(queue); + starpu_opencl_collect_stats(event); + clReleaseEvent(event); + + starpu_opencl_release_kernel(kernel); + ret = starpu_opencl_unload_opencl(&multiformat_program); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl"); +} diff --git a/tests/datawizard/interfaces/multiformat/multiformat_opencl_kernel.cl b/tests/datawizard/interfaces/multiformat/multiformat_opencl_kernel.cl new file mode 100644 index 0000000..e78c8dd --- /dev/null +++ b/tests/datawizard/interfaces/multiformat/multiformat_opencl_kernel.cl @@ -0,0 +1,36 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include "multiformat_types.h" +__kernel void multiformat_opencl(__global struct struct_of_arrays *soa, + unsigned int nx, + __global int *err, + int factor) +{ + const int i = get_global_id(0); + if (i >= nx) + return; + + if (soa->x[i] != i * factor || soa->y[i] != i * factor) + { + *err = i; + } + else + { + soa->x[i] = -soa->x[i]; + soa->y[i] = -soa->y[i]; + } +} diff --git a/tests/datawizard/interfaces/multiformat/multiformat_types.h b/tests/datawizard/interfaces/multiformat/multiformat_types.h new file mode 100644 index 0000000..9b4d32f --- /dev/null +++ b/tests/datawizard/interfaces/multiformat/multiformat_types.h @@ -0,0 +1,33 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ +#ifndef TEST_MULTIFORMAT_TYPES_H +#define TEST_MULTIFORMAT_TYPES_H + +#define N_ELEMENTS 2 + +struct struct_of_arrays +{ + int x[N_ELEMENTS]; + int y[N_ELEMENTS]; +}; + +struct point +{ + int x, y; +}; + + +#endif diff --git a/tests/datawizard/interfaces/ndim/ndim_cuda.cu b/tests/datawizard/interfaces/ndim/ndim_cuda.cu new file mode 100644 index 0000000..a85bf1d --- /dev/null +++ b/tests/datawizard/interfaces/ndim/ndim_cuda.cu @@ -0,0 +1,88 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ +#include +#include "../test_interfaces.h" + +extern struct test_config arr4d_config; + +static __global__ void arr4d_cuda(int *arr4d, + int nx, int ny, int nz, int nt, + unsigned ldy, unsigned ldz, unsigned ldt, + int factor, int *err) +{ + int i, j, k, l; + int val = 0; + + for (l = 0; l < nt ;l++) + { + for (k = 0; k < nz ;k++) + { + for (j = 0; j < ny ;j++) + { + for(i = 0; i < nx ;i++) + { + if (arr4d[(l*ldt)+(k*ldz)+(j*ldy)+i] != factor * val) + { + *err = 1; + return; + } + else + { + arr4d[(l*ldt)+(k*ldz)+(j*ldy)+i] *= -1; + val++; + } + } + } + } + } +} + +extern "C" void test_arr4d_cuda_func(void *buffers[], void *args) +{ + cudaError_t error; + int *ret; + + error = cudaMalloc(&ret, sizeof(int)); + if (error != cudaSuccess) + STARPU_CUDA_REPORT_ERROR(error); + + error = cudaMemcpyAsync(ret, &arr4d_config.copy_failed, sizeof(int), cudaMemcpyHostToDevice, starpu_cuda_get_local_stream()); + if (error != cudaSuccess) + STARPU_CUDA_REPORT_ERROR(error); + + int *nn = (int *)STARPU_NDIM_GET_NN(buffers[0]); + unsigned *ldn = STARPU_NDIM_GET_LDN(buffers[0]); + int nx = nn[0]; + int ny = nn[1]; + int nz = nn[2]; + int nt = nn[3]; + unsigned ldy = ldn[1]; + unsigned ldz = ldn[2]; + unsigned ldt = ldn[3]; + int *arr4d = (int *) STARPU_NDIM_GET_PTR(buffers[0]); + int factor = *(int*) args; + + arr4d_cuda<<<1,1, 0, starpu_cuda_get_local_stream()>>> + (arr4d, nx, ny, nz, nt, ldy, ldz, ldt, factor, ret); + error = cudaGetLastError(); + if (error != cudaSuccess) STARPU_CUDA_REPORT_ERROR(error); + error = cudaMemcpyAsync(&arr4d_config.copy_failed, ret, sizeof(int), cudaMemcpyDeviceToHost, starpu_cuda_get_local_stream()); + if (error != cudaSuccess) + STARPU_CUDA_REPORT_ERROR(error); + + cudaFree(ret); + cudaStreamSynchronize(starpu_cuda_get_local_stream()); +} diff --git a/tests/datawizard/interfaces/ndim/ndim_interface.c b/tests/datawizard/interfaces/ndim/ndim_interface.c new file mode 100644 index 0000000..4ac5e3d --- /dev/null +++ b/tests/datawizard/interfaces/ndim/ndim_interface.c @@ -0,0 +1,151 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../test_interfaces.h" +#include "../../../helper.h" + +#define NX 4 +#define NY NX +#define NZ NX +#define NT NX + +/* Prototypes */ +static void register_data(void); +static void unregister_data(void); +void test_arr4d_cpu_func(void *buffers[], void *args); +#ifdef STARPU_USE_CUDA +extern void test_arr4d_cuda_func(void *buffers[], void *_args); +#endif +#ifdef STARPU_USE_OPENCL +extern void test_arr4d_opencl_func(void *buffers[], void *args); +#endif + +static starpu_data_handle_t _arr4d_handle; +static starpu_data_handle_t _arr4d2_handle; + +static int _arr4d[NX*NY*NZ*NT]; +static int _arr4d2[NX*NY*NZ*NT]; + +struct test_config arr4d_config = +{ + .cpu_func = test_arr4d_cpu_func, +#ifdef STARPU_USE_CUDA + .cuda_func = test_arr4d_cuda_func, +#endif +#ifdef STARPU_USE_OPENCL + .opencl_func = test_arr4d_opencl_func, +#endif + .handle = &_arr4d_handle, + .ptr = _arr4d, + .dummy_handle = &_arr4d2_handle, + .dummy_ptr = _arr4d2, + .copy_failed = SUCCESS, + .name = "ndim_interface" +}; + +static void register_data(void) +{ + /* Initializing data */ + int val = 0; + int i, j, k, l; + for (l = 0; l < NT; l++) + for (k = 0; k < NZ; k++) + for (j = 0; j < NY; j++) + for (i = 0; i < NX; i++) + _arr4d[(l*NX*NY*NZ)+(k*NX*NY)+(j*NX)+i] = val++; + + /* Registering data */ + unsigned nn[4] = {NX, NY, NZ, NT}; + unsigned ldn[4] = {1, NX, NX*NY, NX*NY*NZ}; + + starpu_ndim_data_register(&_arr4d_handle, STARPU_MAIN_RAM, (uintptr_t)_arr4d, ldn, nn, 4, sizeof(_arr4d[0])); + starpu_ndim_data_register(&_arr4d2_handle, STARPU_MAIN_RAM, (uintptr_t)_arr4d2, ldn, nn, 4, sizeof(_arr4d2[0])); +} + +static void unregister_data(void) +{ + starpu_data_unregister(_arr4d_handle); + starpu_data_unregister(_arr4d2_handle); +} + +void test_arr4d_cpu_func(void *buffers[], void *args) +{ + STARPU_SKIP_IF_VALGRIND; + + int factor = *(int*)args; + int *nn = (int *)STARPU_NDIM_GET_NN(buffers[0]); + unsigned *ldn = STARPU_NDIM_GET_LDN(buffers[0]); + int nx = nn[0]; + int ny = nn[1]; + int nz = nn[2]; + int nt = nn[3]; + unsigned ldy = ldn[1]; + unsigned ldz = ldn[2]; + unsigned ldt = ldn[3]; + int *arr4d = (int *) STARPU_NDIM_GET_PTR(buffers[0]); + int i, j, k, l; + int val = 0; + arr4d_config.copy_failed = SUCCESS; + for (l = 0; l < nt; l++) + { + for (k = 0; k < nz; k++) + { + for (j = 0; j < ny; j++) + { + for (i = 0; i < nx; i++) + { + if (arr4d[(l*ldt)+(k*ldz)+(j*ldy)+i] != factor * val) + { + arr4d_config.copy_failed = FAILURE; + return; + } + else + { + arr4d[(l*ldt)+(k*ldz)+(j*ldy)+i] *= -1; + val++; + } + } + } + } + } +} + +int main(int argc, char **argv) +{ + struct data_interface_test_summary summary; + struct starpu_conf conf; + starpu_conf_init(&conf); + conf.ncuda = 2; + conf.nopencl = 1; + + int ret = starpu_initialize(&conf, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + register_data(); + + ret = run_tests(&arr4d_config, &summary); + + unregister_data(); + + starpu_shutdown(); + + if (ret) data_interface_test_summary_print(stderr, &summary); + + return data_interface_test_summary_success(&summary); +} + diff --git a/tests/datawizard/interfaces/ndim/ndim_opencl.c b/tests/datawizard/interfaces/ndim/ndim_opencl.c new file mode 100644 index 0000000..8fe915e --- /dev/null +++ b/tests/datawizard/interfaces/ndim/ndim_opencl.c @@ -0,0 +1,126 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../test_interfaces.h" + +#define KERNEL_LOCATION "tests/datawizard/interfaces/ndim/ndim_opencl_kernel.cl" +extern struct test_config arr4d_config; +static struct starpu_opencl_program opencl_program; + +void +test_arr4d_opencl_func(void *buffers[], void *args) +{ + STARPU_SKIP_IF_VALGRIND; + + int id, devid, ret; + int factor = *(int *) args; + + cl_int err; + cl_kernel kernel; + cl_command_queue queue; + cl_event event; + + ret = starpu_opencl_load_opencl_from_file(KERNEL_LOCATION, &opencl_program, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); + + int *nn = (int *)STARPU_NDIM_GET_NN(buffers[0]); + unsigned *ldn = STARPU_NDIM_GET_LDN(buffers[0]); + int nx = nn[0]; + int ny = nn[1]; + int nz = nn[2]; + int nt = nn[3]; + unsigned ldy = ldn[1]; + unsigned ldz = ldn[2]; + unsigned ldt = ldn[3]; + cl_mem arr4d = (cl_mem) STARPU_NDIM_GET_DEV_HANDLE(buffers[0]); + + cl_context context; + id = starpu_worker_get_id_check(); + devid = starpu_worker_get_devid(id); + starpu_opencl_get_context(devid, &context); + + cl_mem fail = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, + sizeof(int), &arr4d_config.copy_failed, &err); + + if (err != CL_SUCCESS) + STARPU_OPENCL_REPORT_ERROR(err); + + + err = starpu_opencl_load_kernel(&kernel, + &queue, + &opencl_program, + "arr4d_opencl", + devid); + if (err != CL_SUCCESS) + STARPU_OPENCL_REPORT_ERROR(err); + + int nargs; + nargs = starpu_opencl_set_kernel_args(&err, &kernel, + sizeof(arr4d), &arr4d, + sizeof(nx), &nx, + sizeof(ny), &ny, + sizeof(nz), &nz, + sizeof(nt), &nt, + sizeof(ldy), &ldy, + sizeof(ldz), &ldz, + sizeof(ldt), &ldt, + sizeof(factor), &factor, + sizeof(fail), &fail, + 0); + + if (nargs != 10) + { + fprintf(stderr, "Failed to set argument #%d\n", nargs); + STARPU_OPENCL_REPORT_ERROR(err); + } + + { + size_t global[3] = {nx, ny, nz*nt}; + err = clEnqueueNDRangeKernel(queue, + kernel, + 3, + NULL, + global, + NULL, + 0, + NULL, + &event); + + if (err != CL_SUCCESS) + STARPU_OPENCL_REPORT_ERROR(err); + } + + err = clEnqueueReadBuffer(queue, + fail, + CL_TRUE, + 0, + sizeof(int), + &arr4d_config.copy_failed, + 0, + NULL, + NULL); + if (err != CL_SUCCESS) + STARPU_OPENCL_REPORT_ERROR(err); + + clFinish(queue); + starpu_opencl_collect_stats(event); + clReleaseEvent(event); + + starpu_opencl_release_kernel(kernel); + ret = starpu_opencl_unload_opencl(&opencl_program); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl"); +} diff --git a/tests/datawizard/interfaces/ndim/ndim_opencl_kernel.cl b/tests/datawizard/interfaces/ndim/ndim_opencl_kernel.cl new file mode 100644 index 0000000..230c0f9 --- /dev/null +++ b/tests/datawizard/interfaces/ndim/ndim_opencl_kernel.cl @@ -0,0 +1,41 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ +__kernel void arr4d_opencl(__global int *arr4d, + int nx, int ny, int nz, int nt, + int ldy, int ldz, int ldt, + int factor, __global int *err) +{ + const int idx = get_global_id(0); + const int idy = get_global_id(1); + const int idz = get_global_id(2) % nz; + const int idt = get_global_id(2) / nz; + if (idx >= nx) + return; + if (idy >= ny) + return; + if (idz >= nz) + return; + if (idt >= nt) + return; + + int val = idt*nz*ny*nx+idz*ny*nx+idy*nx+idx; + int i = (idt*ldt)+(idz*ldz)+(idy*ldy)+idx; + + if (arr4d[i] != factor * val) + *err = 1; + else + arr4d[i] *= -1; +} diff --git a/tests/datawizard/interfaces/tensor/tensor_cuda.cu b/tests/datawizard/interfaces/tensor/tensor_cuda.cu new file mode 100644 index 0000000..0e2265a --- /dev/null +++ b/tests/datawizard/interfaces/tensor/tensor_cuda.cu @@ -0,0 +1,86 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ +#include +#include "../test_interfaces.h" + +extern struct test_config tensor_config; + +static __global__ void tensor_cuda(int *tensor, + int nx, int ny, int nz, int nt, + unsigned ldy, unsigned ldz, unsigned ldt, + int factor, int *err) +{ + int i, j, k, l; + int val = 0; + + for (l = 0; l < nt ;l++) + { + for (k = 0; k < nz ;k++) + { + for (j = 0; j < ny ;j++) + { + for(i = 0; i < nx ;i++) + { + if (tensor[(l*ldt)+(k*ldz)+(j*ldy)+i] != factor * val) + { + *err = 1; + return; + } + else + { + tensor[(l*ldt)+(k*ldz)+(j*ldy)+i] *= -1; + val++; + } + } + } + } + } +} + +extern "C" void test_tensor_cuda_func(void *buffers[], void *args) +{ + cudaError_t error; + int *ret; + + error = cudaMalloc(&ret, sizeof(int)); + if (error != cudaSuccess) + STARPU_CUDA_REPORT_ERROR(error); + + error = cudaMemcpyAsync(ret, &tensor_config.copy_failed, sizeof(int), cudaMemcpyHostToDevice, starpu_cuda_get_local_stream()); + if (error != cudaSuccess) + STARPU_CUDA_REPORT_ERROR(error); + + int nx = STARPU_TENSOR_GET_NX(buffers[0]); + int ny = STARPU_TENSOR_GET_NY(buffers[0]); + int nz = STARPU_TENSOR_GET_NZ(buffers[0]); + int nt = STARPU_TENSOR_GET_NT(buffers[0]); + unsigned ldy = STARPU_TENSOR_GET_LDY(buffers[0]); + unsigned ldz = STARPU_TENSOR_GET_LDZ(buffers[0]); + unsigned ldt = STARPU_TENSOR_GET_LDT(buffers[0]); + int *tensor = (int *) STARPU_TENSOR_GET_PTR(buffers[0]); + int factor = *(int*) args; + + tensor_cuda<<<1,1, 0, starpu_cuda_get_local_stream()>>> + (tensor, nx, ny, nz, nt, ldy, ldz, ldt, factor, ret); + error = cudaGetLastError(); + if (error != cudaSuccess) STARPU_CUDA_REPORT_ERROR(error); + error = cudaMemcpyAsync(&tensor_config.copy_failed, ret, sizeof(int), cudaMemcpyDeviceToHost, starpu_cuda_get_local_stream()); + if (error != cudaSuccess) + STARPU_CUDA_REPORT_ERROR(error); + + cudaFree(ret); + cudaStreamSynchronize(starpu_cuda_get_local_stream()); +} diff --git a/tests/datawizard/interfaces/tensor/tensor_interface.c b/tests/datawizard/interfaces/tensor/tensor_interface.c new file mode 100644 index 0000000..20a4f54 --- /dev/null +++ b/tests/datawizard/interfaces/tensor/tensor_interface.c @@ -0,0 +1,169 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../test_interfaces.h" +#include "../../../helper.h" + +#define NX 4 +#define NY NX +#define NZ NX +#define NT NX + +/* Prototypes */ +static void register_data(void); +static void unregister_data(void); +void test_tensor_cpu_func(void *buffers[], void *args); +#ifdef STARPU_USE_CUDA +extern void test_tensor_cuda_func(void *buffers[], void *_args); +#endif +#ifdef STARPU_USE_OPENCL +extern void test_tensor_opencl_func(void *buffers[], void *args); +#endif + +static starpu_data_handle_t _tensor_handle; +static starpu_data_handle_t _tensor2_handle; + +static int _tensor[NX*NY*NZ*NT]; +static int _tensor2[NX*NY*NZ*NT]; + +struct test_config tensor_config = +{ + .cpu_func = test_tensor_cpu_func, +#ifdef STARPU_USE_CUDA + .cuda_func = test_tensor_cuda_func, +#endif +#ifdef STARPU_USE_OPENCL + .opencl_func = test_tensor_opencl_func, +#endif + .handle = &_tensor_handle, + .ptr = _tensor, + .dummy_handle = &_tensor2_handle, + .dummy_ptr = _tensor2, + .copy_failed = SUCCESS, + .name = "tensor_interface" +}; + +static void +register_data(void) +{ + /* Initializing data */ + int val = 0; + int i, j, k, l; + for (l = 0; l < NT; l++) + for (k = 0; k < NZ; k++) + for (j = 0; j < NY; j++) + for (i = 0; i < NX; i++) + _tensor[(l*NX*NY*NZ)+(k*NX*NY)+(j*NX)+i] = val++; + + /* Registering data */ + starpu_tensor_data_register(&_tensor_handle, + STARPU_MAIN_RAM, + (uintptr_t)_tensor, + NX, + NX * NY, + NX * NY * NZ, + NX, + NY, + NZ, + NT, + sizeof(_tensor[0])); + starpu_tensor_data_register(&_tensor2_handle, + STARPU_MAIN_RAM, + (uintptr_t)_tensor2, + NX, + NX * NY, + NX * NY * NZ, + NX, + NY, + NZ, + NT, + sizeof(_tensor2[0])); +} + +static void +unregister_data(void) +{ + starpu_data_unregister(_tensor_handle); + starpu_data_unregister(_tensor2_handle); +} + +void test_tensor_cpu_func(void *buffers[], void *args) +{ + STARPU_SKIP_IF_VALGRIND; + + int factor = *(int*)args; + int nx = STARPU_TENSOR_GET_NX(buffers[0]); + int ny = STARPU_TENSOR_GET_NY(buffers[0]); + int nz = STARPU_TENSOR_GET_NZ(buffers[0]); + int nt = STARPU_TENSOR_GET_NT(buffers[0]); + unsigned ldy = STARPU_TENSOR_GET_LDY(buffers[0]); + unsigned ldz = STARPU_TENSOR_GET_LDZ(buffers[0]); + unsigned ldt = STARPU_TENSOR_GET_LDT(buffers[0]); + int *tensor = (int *) STARPU_TENSOR_GET_PTR(buffers[0]); + int i, j, k, l; + int val = 0; + tensor_config.copy_failed = SUCCESS; + for (l = 0; l < nt; l++) + { + for (k = 0; k < nz; k++) + { + for (j = 0; j < ny; j++) + { + for (i = 0; i < nx; i++) + { + if (tensor[(l*ldt)+(k*ldz)+(j*ldy)+i] != factor * val) + { + tensor_config.copy_failed = FAILURE; + return; + } + else + { + tensor[(l*ldt)+(k*ldz)+(j*ldy)+i] *= -1; + val++; + } + } + } + } + } +} + +int +main(int argc, char **argv) +{ + struct data_interface_test_summary summary; + struct starpu_conf conf; + starpu_conf_init(&conf); + conf.ncuda = 2; + conf.nopencl = 1; + + int ret = starpu_initialize(&conf, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + register_data(); + + ret = run_tests(&tensor_config, &summary); + + unregister_data(); + + starpu_shutdown(); + + if (ret) data_interface_test_summary_print(stderr, &summary); + + return data_interface_test_summary_success(&summary); +} + diff --git a/tests/datawizard/interfaces/tensor/tensor_opencl.c b/tests/datawizard/interfaces/tensor/tensor_opencl.c new file mode 100644 index 0000000..618ba13 --- /dev/null +++ b/tests/datawizard/interfaces/tensor/tensor_opencl.c @@ -0,0 +1,124 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../test_interfaces.h" + +#define KERNEL_LOCATION "tests/datawizard/interfaces/tensor/tensor_opencl_kernel.cl" +extern struct test_config tensor_config; +static struct starpu_opencl_program opencl_program; + +void +test_tensor_opencl_func(void *buffers[], void *args) +{ + STARPU_SKIP_IF_VALGRIND; + + int id, devid, ret; + int factor = *(int *) args; + + cl_int err; + cl_kernel kernel; + cl_command_queue queue; + cl_event event; + + ret = starpu_opencl_load_opencl_from_file(KERNEL_LOCATION, &opencl_program, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); + + int nx = STARPU_TENSOR_GET_NX(buffers[0]); + int ny = STARPU_TENSOR_GET_NY(buffers[0]); + int nz = STARPU_TENSOR_GET_NZ(buffers[0]); + int nt = STARPU_TENSOR_GET_NT(buffers[0]); + unsigned ldy = STARPU_TENSOR_GET_LDY(buffers[0]); + unsigned ldz = STARPU_TENSOR_GET_LDZ(buffers[0]); + unsigned ldt = STARPU_TENSOR_GET_LDT(buffers[0]); + cl_mem tensor = (cl_mem) STARPU_TENSOR_GET_DEV_HANDLE(buffers[0]); + + cl_context context; + id = starpu_worker_get_id_check(); + devid = starpu_worker_get_devid(id); + starpu_opencl_get_context(devid, &context); + + cl_mem fail = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, + sizeof(int), &tensor_config.copy_failed, &err); + + if (err != CL_SUCCESS) + STARPU_OPENCL_REPORT_ERROR(err); + + + err = starpu_opencl_load_kernel(&kernel, + &queue, + &opencl_program, + "tensor_opencl", + devid); + if (err != CL_SUCCESS) + STARPU_OPENCL_REPORT_ERROR(err); + + int nargs; + nargs = starpu_opencl_set_kernel_args(&err, &kernel, + sizeof(tensor), &tensor, + sizeof(nx), &nx, + sizeof(ny), &ny, + sizeof(nz), &nz, + sizeof(nt), &nt, + sizeof(ldy), &ldy, + sizeof(ldz), &ldz, + sizeof(ldt), &ldt, + sizeof(factor), &factor, + sizeof(fail), &fail, + 0); + + if (nargs != 10) + { + fprintf(stderr, "Failed to set argument #%d\n", nargs); + STARPU_OPENCL_REPORT_ERROR(err); + } + + { + size_t global[3] = {nx, ny, nz*nt}; + err = clEnqueueNDRangeKernel(queue, + kernel, + 3, + NULL, + global, + NULL, + 0, + NULL, + &event); + + if (err != CL_SUCCESS) + STARPU_OPENCL_REPORT_ERROR(err); + } + + err = clEnqueueReadBuffer(queue, + fail, + CL_TRUE, + 0, + sizeof(int), + &tensor_config.copy_failed, + 0, + NULL, + NULL); + if (err != CL_SUCCESS) + STARPU_OPENCL_REPORT_ERROR(err); + + clFinish(queue); + starpu_opencl_collect_stats(event); + clReleaseEvent(event); + + starpu_opencl_release_kernel(kernel); + ret = starpu_opencl_unload_opencl(&opencl_program); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl"); +} diff --git a/tests/datawizard/interfaces/tensor/tensor_opencl_kernel.cl b/tests/datawizard/interfaces/tensor/tensor_opencl_kernel.cl new file mode 100644 index 0000000..76324a7 --- /dev/null +++ b/tests/datawizard/interfaces/tensor/tensor_opencl_kernel.cl @@ -0,0 +1,41 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ +__kernel void tensor_opencl(__global int *tensor, + int nx, int ny, int nz, int nt, + int ldy, int ldz, int ldt, + int factor, __global int *err) +{ + const int idx = get_global_id(0); + const int idy = get_global_id(1); + const int idz = get_global_id(2) % nz; + const int idt = get_global_id(2) / nz; + if (idx >= nx) + return; + if (idy >= ny) + return; + if (idz >= nz) + return; + if (idt >= nt) + return; + + int val = idt*nz*ny*nx+idz*ny*nx+idy*nx+idx; + int i = (idt*ldt)+(idz*ldz)+(idy*ldy)+idx; + + if (tensor[i] != factor * val) + *err = 1; + else + tensor[i] *= -1; +} diff --git a/tests/datawizard/interfaces/test_interfaces.c b/tests/datawizard/interfaces/test_interfaces.c new file mode 100644 index 0000000..e75289b --- /dev/null +++ b/tests/datawizard/interfaces/test_interfaces.c @@ -0,0 +1,557 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +#include + +#include + +#include "test_interfaces.h" +#include "../../helper.h" + +/* + * This is definitely note thread-safe. + */ +static struct test_config *current_config; + +/* TODO : +- OpenCL to OpenCL support +*/ + +static char *enum_to_string(int exit_code) +{ + switch (exit_code) + { + case SUCCESS: + return "Success"; + case FAILURE: + return "Failure"; + case UNTESTED: + return "Untested"; + case NO_DEVICE: + case -ENODEV: + return "No device available"; + case TASK_SUBMISSION_FAILURE: + return "Task submission failed"; + default: + STARPU_ASSERT_MSG(0, "unknown error code %d\n", exit_code); + } +} + +void data_interface_test_summary_print(FILE *f, struct data_interface_test_summary *s) +{ + if (!f) + f = stderr; + + FPRINTF(f, "%s : %s\n", current_config->name, enum_to_string(s->success)); + FPRINTF(f, "Asynchronous :\n"); + FPRINTF(f, "\tCPU -> CUDA : %s\n", enum_to_string(s->cpu_to_cuda_async)); + FPRINTF(f, "\tCUDA -> CUDA : %s\n", enum_to_string(s->cuda_to_cuda_async)); + FPRINTF(f, "\tCUDA -> CPU : %s\n", enum_to_string(s->cuda_to_cpu_async)); + FPRINTF(f, "\n"); + FPRINTF(f, "\tCPU -> OpenCL : %s\n", enum_to_string(s->cpu_to_opencl_async)); + FPRINTF(f, "\tOpenCL -> CPU : %s\n", enum_to_string(s->opencl_to_cpu_async)); + FPRINTF(f, "\n"); + + FPRINTF(f, "Synchronous :\n"); + FPRINTF(f, "\tCPU -> CUDA : %s\n", enum_to_string(s->cpu_to_cuda)); + FPRINTF(f, "\tCUDA -> CUDA : %s\n", enum_to_string(s->cuda_to_cuda)); + FPRINTF(f, "\tCUDA -> CPU : %s\n", enum_to_string(s->cuda_to_cpu)); + FPRINTF(f, "\n"); + FPRINTF(f, "\tCPU -> OpenCL : %s\n", enum_to_string(s->cpu_to_opencl)); + FPRINTF(f, "\tOpenCL -> CPU : %s\n", enum_to_string(s->opencl_to_cpu)); + FPRINTF(f, "\n"); + + FPRINTF(f, "\n"); + FPRINTF(f, "CPU -> CPU : %s\n", enum_to_string(s->cpu_to_cpu)); + FPRINTF(f, "to_pointer() : %s\n", enum_to_string(s->to_pointer)); + FPRINTF(f, "compare() : %s\n", enum_to_string(s->compare)); + FPRINTF(f, "pack_unpack() : %s\n", enum_to_string(s->pack)); +} + +int data_interface_test_summary_success(struct data_interface_test_summary *s) +{ + return s->success; +} + +static void set_field(struct data_interface_test_summary *s, int *field, int ret) +{ + *field = ret; + if (ret == FAILURE) s->success = ret; +} + +static void summary_init(struct data_interface_test_summary *s) +{ + s->cpu_to_cpu = UNTESTED; + s->compare = UNTESTED; + s->cpu_to_cuda = UNTESTED; + s->cuda_to_cuda = UNTESTED; + s->cuda_to_cpu = UNTESTED; + s->cpu_to_cuda_async = UNTESTED; + s->cuda_to_cpu_async = UNTESTED; + s->cuda_to_cuda_async = UNTESTED; + s->cpu_to_opencl = UNTESTED; + s->opencl_to_cpu = UNTESTED; + s->cpu_to_opencl_async = UNTESTED; + s->opencl_to_cpu_async = UNTESTED; + s->to_pointer = UNTESTED; + s->pack = UNTESTED; + s->success = SUCCESS; +}; + +/* + * This variable has to be either -1 or 1. + * The kernels should check that the ith value stored in the data interface is + * equal to i, if factor == 1, or -i, if factor == -1. + */ +static int factor = -1; + +/* + * Creates a complete task, only knowing on what device it should be executed. + * Note that the global variable is heavily used here. + * Arguments : + * - taskp : a pointer to a valid task + * - type : STARPU_{CPU,CUDA,OPENCL}_WORKER. + * - id: when positive, should be the worker id + * Return values : + * -ENODEV + * 0 : success. + */ +static int create_task(struct starpu_task **taskp, enum starpu_worker_archtype type, int id) +{ + static int cpu_workers[STARPU_MAXCPUS]; + static int cuda_workers[STARPU_MAXCUDADEVS]; + static int opencl_workers[STARPU_MAXOPENCLDEVS]; + + static int n_cpus = -1; + static int n_cudas = -1; + static int n_opencls = -1; + + if (n_cpus == -1) /* First time here */ + { + /* We do not check the return values of the calls to + * starpu_worker_get_ids_by_type now, because it is simpler to + * detect a problem in the switch that comes right after this + * block of code. */ + n_cpus = starpu_worker_get_ids_by_type(STARPU_CPU_WORKER, cpu_workers, STARPU_MAXCPUS); + n_cudas = starpu_worker_get_ids_by_type(STARPU_CUDA_WORKER, cuda_workers, STARPU_MAXCUDADEVS); + n_opencls = starpu_worker_get_ids_by_type(STARPU_OPENCL_WORKER, opencl_workers, STARPU_MAXOPENCLDEVS); + } + + int *workers; + static struct starpu_codelet cl; + starpu_codelet_init(&cl); + cl.nbuffers = 1; + cl.modes[0] = STARPU_RW; + + if (type == STARPU_CPU_WORKER) + { + if (n_cpus == 0) return -ENODEV; + if (id != -1 && id >= n_cpus) + { + FPRINTF(stderr, "Not enough CPU workers\n"); + return -ENODEV; + } + workers = cpu_workers; + cl.cpu_funcs[0] = current_config->cpu_func; + } + else if (type == STARPU_CUDA_WORKER) + { + if (n_cudas == 0) return -ENODEV; + if (id != -1 && id >= n_cudas) + { + FPRINTF(stderr, "Not enough CUDA workers\n"); + return -ENODEV; + } + workers = cuda_workers; + cl.cuda_funcs[0] = current_config->cuda_func; + } + else if (type == STARPU_OPENCL_WORKER) + { + if (n_opencls == 0) return -ENODEV; + if (id != -1 && id >= n_opencls) + { + FPRINTF(stderr, "Not enough OpenCL workers\n"); + return -ENODEV; + } + workers = opencl_workers; + cl.opencl_funcs[0] = current_config->opencl_func; + } + else + { + return -ENODEV; + } + + factor = -factor; + + struct starpu_task *task; + task = starpu_task_build(&cl, + STARPU_RW, *current_config->handle, + STARPU_TASK_SYNCHRONOUS, 1, + 0); + task->cl_arg = &factor; + task->cl_arg_size = sizeof(factor); + if (id != -1) + { + task->execute_on_a_specific_worker = 1; + task->workerid = workers[id]; + } + *taskp = task; + return 0; +} + +/* + * _to_ functions. + * They all create and submit a task that has to be executed on , + * forcing a copy between and . + */ +static enum exit_code ram_to_cuda(void) +{ + int err; + struct starpu_task *task; + + err = create_task(&task, STARPU_CUDA_WORKER, 0); + if (err != 0) + return NO_DEVICE; + + err = starpu_task_submit(task); + if (err != 0) + return TASK_SUBMISSION_FAILURE; + + FPRINTF(stderr, "[%s] : %d\n", __starpu_func__, current_config->copy_failed); + return current_config->copy_failed; +} + +static enum exit_code cuda_to_cuda(void) +{ + int err; + struct starpu_task *task; + + err = create_task(&task, STARPU_CUDA_WORKER, 1); + if (err != 0) + return NO_DEVICE; + + err = starpu_task_submit(task); + if (err != 0) + return TASK_SUBMISSION_FAILURE; + + FPRINTF(stderr, "[%s] : %d\n", __starpu_func__, current_config->copy_failed); + return current_config->copy_failed; +} + +static enum exit_code cuda_to_ram(void) +{ + int err; + struct starpu_task *task; + + err = create_task(&task, STARPU_CPU_WORKER, -1); + if (err != 0) + return NO_DEVICE; + + err = starpu_task_submit(task); + if (err != 0) + return TASK_SUBMISSION_FAILURE; + + FPRINTF(stderr, "[%s] : %d\n", __starpu_func__, current_config->copy_failed); + return current_config->copy_failed; +} + +static enum exit_code ram_to_opencl(void) +{ + int err; + struct starpu_task *task; + + err = create_task(&task, STARPU_OPENCL_WORKER, -1); + if (err != 0) + return NO_DEVICE; + + err = starpu_task_submit(task); + if (err != 0) + return TASK_SUBMISSION_FAILURE; + + FPRINTF(stderr, "[%s] : %d\n", __starpu_func__, current_config->copy_failed); + return current_config->copy_failed; +} + +static enum exit_code opencl_to_ram(void) +{ + int err; + struct starpu_task *task; + + err = create_task(&task, STARPU_CPU_WORKER, -1); + if (err != 0) + return NO_DEVICE; + + err = starpu_task_submit(task); + if (err != 0) + return TASK_SUBMISSION_FAILURE; + + FPRINTF(stderr, "[%s] : %d\n", __starpu_func__, current_config->copy_failed); + return current_config->copy_failed; +} +/* End of the _to_ functions. */ + +static void run_cuda(int async, struct data_interface_test_summary *s) +{ + /* RAM -> CUDA (-> CUDA) -> RAM */ + int err; + err = ram_to_cuda(); + set_field(s, async==1?&s->cpu_to_cuda_async:&s->cpu_to_cuda, err); + /* If this failed, there is no point in continuing. */ + if (err != SUCCESS) + return; + + if (starpu_cuda_worker_get_count() >= 2) + { + err = cuda_to_cuda(); + } + else + { + err = UNTESTED; + } + set_field(s, async==1?&s->cuda_to_cuda_async:&s->cuda_to_cuda, err); + /* Even if cuda_to_cuda() failed, a valid copy is left on the first + * cuda device, which means we can safely test cuda_to_ram() */ + + err = cuda_to_ram(); + set_field(s, async==1?&s->cuda_to_cpu_async:&s->cuda_to_cpu, err); +} + +static void run_opencl(int async, struct data_interface_test_summary *s) +{ + /* RAM -> OpenCL -> RAM */ + int err; + + err = ram_to_opencl(); + set_field(s, async==1?&s->cpu_to_opencl_async:&s->cpu_to_opencl, err); + if (err != SUCCESS) + return; + + err = opencl_to_ram(); + set_field(s, async==1?&s->opencl_to_cpu_async:&s->opencl_to_cpu, err); +} + +static void ram_to_ram(struct data_interface_test_summary *s) +{ + int err; + struct starpu_task *task; + starpu_data_handle_t src, dst; + void *src_interface, *dst_interface; + + src = *current_config->handle; + dst = *current_config->dummy_handle; + + /* We do not care about the nodes */ + src_interface = starpu_data_get_interface_on_node(src, STARPU_MAIN_RAM); + dst_interface = starpu_data_get_interface_on_node(dst, STARPU_MAIN_RAM); + if (src->ops->copy_methods->ram_to_ram) + src->ops->copy_methods->ram_to_ram(src_interface, STARPU_MAIN_RAM, dst_interface, STARPU_MAIN_RAM); + else + src->ops->copy_methods->any_to_any(src_interface, STARPU_MAIN_RAM, dst_interface, STARPU_MAIN_RAM, NULL); + + err = create_task(&task, STARPU_CPU_WORKER, -1); + if (err != 0) + goto out; + + task->handles[0] = dst; + err = starpu_task_submit(task); + + if (err != 0) + { + err = TASK_SUBMISSION_FAILURE; + goto out; + } + + FPRINTF(stderr, "[%s] : %d\n", __starpu_func__, current_config->copy_failed); + err = current_config->copy_failed; + +out: + set_field(s, &s->cpu_to_cpu, err); +} + +static void run_async(struct data_interface_test_summary *s) +{ + int async = starpu_asynchronous_copy_disabled(); + if (async == 1) + { + FPRINTF(stderr, "Asynchronous copies have been disabled\n"); + return; + } + run_cuda(1, s); + run_opencl(1, s); +} + +static void run_sync(struct data_interface_test_summary *s) +{ + starpu_data_handle_t handle = *current_config->handle; + struct starpu_data_copy_methods new_copy_methods; + struct starpu_data_copy_methods *old_copy_methods; + + old_copy_methods = (struct starpu_data_copy_methods *) handle->ops->copy_methods; + + memcpy(&new_copy_methods, old_copy_methods, sizeof(struct starpu_data_copy_methods)); + + new_copy_methods.ram_to_cuda_async = NULL; + new_copy_methods.cuda_to_cuda_async = NULL; + new_copy_methods.cuda_to_ram_async = NULL; + new_copy_methods.ram_to_opencl_async = NULL; + new_copy_methods.opencl_to_ram_async = NULL; + + handle->ops->copy_methods = &new_copy_methods; + + run_cuda(0, s); + run_opencl(0, s); + + handle->ops->copy_methods = old_copy_methods; +} + +static void compare(struct data_interface_test_summary *s) +{ + int err; + void *interface_a, *interface_b; + starpu_data_handle_t handle, dummy_handle; + + handle = *current_config->handle; + dummy_handle = *current_config->dummy_handle; + interface_a = starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + interface_b = starpu_data_get_interface_on_node(dummy_handle, STARPU_MAIN_RAM); + + err = handle->ops->compare(interface_a, interface_b); + s->compare = (err == 0) ? FAILURE : SUCCESS; + + set_field(s, &s->compare, s->compare); +} + +static void to_pointer(struct data_interface_test_summary *s) +{ + starpu_data_handle_t handle; + + s->to_pointer = UNTESTED; + handle = *current_config->handle; + if (handle->ops->to_pointer) + { + unsigned int node; + unsigned int tests = 0; + for (node = 0; node < STARPU_MAXNODES; node++) + { + if (starpu_node_get_kind(node) != STARPU_CPU_RAM) + continue; + if (!starpu_data_test_if_allocated_on_node(handle, node)) + continue; + + void *data_interface = starpu_data_get_interface_on_node(handle, node); + void *ptr = handle->ops->to_pointer(data_interface, node); + if (ptr != current_config->ptr) + { + s->to_pointer = FAILURE; + break; + } + tests++; + } + if (tests > 0) + s->to_pointer = SUCCESS; + } + set_field(s, &s->to_pointer, s->to_pointer); +} + +static void pack_unpack(struct data_interface_test_summary *s) +{ + starpu_data_handle_t handle; + starpu_data_handle_t dummy_handle; + int err = UNTESTED; + + handle = *current_config->handle; + dummy_handle = *current_config->dummy_handle; + if (handle->ops->pack_data && handle->ops->unpack_data) + { + void *ptr = NULL; + starpu_ssize_t size = 0; + starpu_data_pack(handle, &ptr, &size); + if (size != 0) + { + struct starpu_task *task; + void *mem = (void *)starpu_malloc_on_node_flags(STARPU_MAIN_RAM, size, 0); + + starpu_data_acquire(dummy_handle, STARPU_W); + starpu_data_unpack(dummy_handle, mem, size); + starpu_data_unpack(dummy_handle, ptr, size); + starpu_data_release(dummy_handle); + + factor = -factor; + err = create_task(&task, STARPU_CPU_WORKER, -1); + if (err != SUCCESS) goto out; + + task->handles[0] = dummy_handle; + err = starpu_task_submit(task); + if (err != 0) + { + err = TASK_SUBMISSION_FAILURE; + goto out; + } + + FPRINTF(stderr, "[%s] : %d\n", __starpu_func__, current_config->copy_failed); + err = current_config->copy_failed; + } + } + + out: + set_field(s, &s->pack, err); +} + +static int load_conf(struct test_config *config) +{ + if (!config || +#ifdef STARPU_USE_CPU + !config->cpu_func || + !config->dummy_handle || +#endif +#ifdef STARPU_USE_CUDA + !config->cuda_func || +#endif +#ifdef STARPU_USE_OPENCL + !config->opencl_func || +#endif + !config->handle) + { + return 1; + } + + current_config = config; + return 0; +} + +int run_tests(struct test_config *conf, struct data_interface_test_summary *s) +{ + summary_init(s); + + if (load_conf(conf) == 1) + { + FPRINTF(stderr, "Failed to load conf.\n"); + return 0; + } + + run_async(s); + run_sync(s); + + ram_to_ram(s); + compare(s); + to_pointer(s); + + pack_unpack(s); + + return 1; +} diff --git a/tests/datawizard/interfaces/test_interfaces.h b/tests/datawizard/interfaces/test_interfaces.h new file mode 100644 index 0000000..8feb46b --- /dev/null +++ b/tests/datawizard/interfaces/test_interfaces.h @@ -0,0 +1,99 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ +#ifndef TEST_INTERFACES_H +#define TEST_INTERFACES_H + +#include "../../helper.h" + +/* + * Users do not know about this enum. They only know that SUCCESS is 0, and + * FAILURE is 1. Therefore, the values of SUCCESS and FAILURE shall not be + * changed. + */ +enum exit_code +{ + SUCCESS = 0, + FAILURE = 1, + UNTESTED = 2, + NO_DEVICE = 3, + TASK_SUBMISSION_FAILURE = 4 +}; + +struct test_config +{ + /** we use pointers as we want to allow static initializations in the main application */ + /* A pointer to a registered handle */ + starpu_data_handle_t *handle; + void *ptr; + + /* A pointer to a registered handle, that will be used to test + * RAM to RAM copy. The values it points to should be different from + * the ones pointed to by the previous handle. */ + starpu_data_handle_t *dummy_handle; + void *dummy_ptr; + + /* StarPU codelets. The following functions should : + * 1) Check that the values are correct + * 2) Negate every element + */ + starpu_cpu_func_t cpu_func; + starpu_cuda_func_t cuda_func; + starpu_hip_func_t hip_func; + starpu_opencl_func_t opencl_func; + + /* The previous codelets must update this field at the end of their + * execution. copy_failed must be FAILURE if the copy failed, SUCCESS otherwise. */ + enum exit_code copy_failed; + + /* A human-readable name for the test */ + const char *name; +}; + +struct data_interface_test_summary +{ + int success; + + /* Copy methods */ + int cpu_to_cpu; + int cpu_to_cuda; + int cuda_to_cuda; + int cuda_to_cpu; + int cpu_to_cuda_async; + int cuda_to_cpu_async; + int cuda_to_cuda_async; + int cpu_to_hip; + int hip_to_hip; + int hip_to_cpu; + int cpu_to_hip_async; + int hip_to_cpu_async; + int hip_to_hip_async; + int cpu_to_opencl; + int opencl_to_cpu; + int cpu_to_opencl_async; + int opencl_to_cpu_async; + + /* Other stuff */ + int compare; + int to_pointer; + int pack; +}; + +void data_interface_test_summary_print(FILE *f, struct data_interface_test_summary *summary); +int data_interface_test_summary_success(struct data_interface_test_summary *summary); + +int run_tests(struct test_config*, struct data_interface_test_summary *summary); + +#endif /* !TEST_INTERFACES_H */ diff --git a/tests/datawizard/interfaces/test_interfaces.sh b/tests/datawizard/interfaces/test_interfaces.sh new file mode 100755 index 0000000..3c52f80 --- /dev/null +++ b/tests/datawizard/interfaces/test_interfaces.sh @@ -0,0 +1,28 @@ +#!/bin/sh +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +for i in bcsr block coo csr matrix multiformat ndim tensor variable vector void +do + $MS_LAUNCHER $STARPU_LAUNCH ./tests/datawizard/interfaces/$i/${i}_interface + ret=$? + if test "$ret" = "0" + then + echo "Interface $i: success" + else + echo "Interface $i: failure" + fi +done diff --git a/tests/datawizard/interfaces/variable/variable_cuda.cu b/tests/datawizard/interfaces/variable/variable_cuda.cu new file mode 100644 index 0000000..7282a91 --- /dev/null +++ b/tests/datawizard/interfaces/variable/variable_cuda.cu @@ -0,0 +1,69 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../test_interfaces.h" + +extern struct test_config variable_config; + +static __global__ void variable_cuda(int *val, int *err, int factor) +{ + unsigned i = blockIdx.x*blockDim.x + threadIdx.x; + + if (i > 0) + return; + + if (*val != 42 * factor) + *err = 1; + else + *val *= -1; +} + +extern "C" void test_variable_cuda_func(void *buffers[], void *args) +{ + cudaError_t error; + int *ret; + + error = cudaMalloc(&ret, sizeof(int)); + if (error != cudaSuccess) + STARPU_CUDA_REPORT_ERROR(error); + + error = cudaMemcpyAsync(ret, + &variable_config.copy_failed, + sizeof(int), + cudaMemcpyHostToDevice, starpu_cuda_get_local_stream()); + if (error != cudaSuccess) + STARPU_CUDA_REPORT_ERROR(error); + + int *val = (int *)STARPU_VARIABLE_GET_PTR(buffers[0]); + int factor = *(int*) args; + + unsigned threads_per_block = 64; + unsigned nblocks = 1; + + variable_cuda<<>>(val, ret, factor); + error = cudaGetLastError(); + if (error != cudaSuccess) STARPU_CUDA_REPORT_ERROR(error); + error = cudaMemcpyAsync(&variable_config.copy_failed, + ret, + sizeof(int), + cudaMemcpyDeviceToHost, starpu_cuda_get_local_stream()); + if (error != cudaSuccess) + STARPU_CUDA_REPORT_ERROR(error); + + cudaFree(ret); + cudaStreamSynchronize(starpu_cuda_get_local_stream()); +} diff --git a/tests/datawizard/interfaces/variable/variable_interface.c b/tests/datawizard/interfaces/variable/variable_interface.c new file mode 100644 index 0000000..2edc4e2 --- /dev/null +++ b/tests/datawizard/interfaces/variable/variable_interface.c @@ -0,0 +1,110 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../test_interfaces.h" + +static int variable; +static int variable2; +static starpu_data_handle_t variable_handle; +static starpu_data_handle_t variable2_handle; + +/* Codelets */ +void test_variable_cpu_func(void *buffers[], void *args); +#ifdef STARPU_USE_CUDA +extern void test_variable_cuda_func(void *buffers[], void *args); +#endif +#ifdef STARPU_USE_OPENCL +extern void test_variable_opencl_func(void *buffers[], void *args); +#endif + +struct test_config variable_config = +{ + .cpu_func = test_variable_cpu_func, +#ifdef STARPU_USE_CUDA + .cuda_func = test_variable_cuda_func, +#endif +#ifdef STARPU_USE_OPENCL + .opencl_func = test_variable_opencl_func, +#endif + .handle = &variable_handle, + .ptr = &variable, + .dummy_handle = &variable2_handle, + .dummy_ptr = &variable2, + .copy_failed = SUCCESS, + .name = "variable_interface" +}; + +void test_variable_cpu_func(void *buffers[], void *args) +{ + STARPU_SKIP_IF_VALGRIND; + + int *val; + int factor; + + val = (int *) STARPU_VARIABLE_GET_PTR(buffers[0]); + factor = *(int *)args; + + if (*val != 42 * factor) + variable_config.copy_failed = FAILURE; + else + *val *= -1; +} + +static void register_data(void) +{ + variable = 42; + variable2 = 12; + + starpu_variable_data_register(&variable_handle, STARPU_MAIN_RAM, + (uintptr_t) &variable, sizeof(variable)); + starpu_variable_data_register(&variable2_handle, STARPU_MAIN_RAM, + (uintptr_t) &variable2, sizeof(variable2)); +} + +static void unregister_data(void) +{ + starpu_data_unregister(variable_handle); + starpu_data_unregister(variable2_handle); +} + +int main(int argc, char **argv) +{ + int ret; + struct data_interface_test_summary summary; + + struct starpu_conf conf; + starpu_conf_init(&conf); + + conf.ncuda = 2; + conf.nopencl = 1; + + ret = starpu_initialize(&conf, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + register_data(); + + run_tests(&variable_config, &summary); + + unregister_data(); + + starpu_shutdown(); + + data_interface_test_summary_print(stderr, &summary); + + return data_interface_test_summary_success(&summary); +} diff --git a/tests/datawizard/interfaces/variable/variable_opencl.c b/tests/datawizard/interfaces/variable/variable_opencl.c new file mode 100644 index 0000000..d9621ac --- /dev/null +++ b/tests/datawizard/interfaces/variable/variable_opencl.c @@ -0,0 +1,115 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../test_interfaces.h" + +#define KERNEL_LOCATION "tests/datawizard/interfaces/variable/variable_opencl_kernel.cl" + +extern struct test_config variable_config; +static struct starpu_opencl_program opencl_program; + +void test_variable_opencl_func(void *buffers[], void *args) +{ + STARPU_SKIP_IF_VALGRIND; + + int id, devid, ret; + int factor = *(int *) args; + + cl_int err; + cl_kernel kernel; + cl_command_queue queue; + cl_event event; + + ret = starpu_opencl_load_opencl_from_file(KERNEL_LOCATION, &opencl_program, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); + + cl_mem val = (cl_mem)STARPU_VARIABLE_GET_PTR(buffers[0]); + + cl_context context; + id = starpu_worker_get_id_check(); + devid = starpu_worker_get_devid(id); + starpu_opencl_get_context(devid, &context); + + cl_mem fail = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, + sizeof(int), &variable_config.copy_failed, &err); + + if (err != CL_SUCCESS) + STARPU_OPENCL_REPORT_ERROR(err); + + + err = starpu_opencl_load_kernel(&kernel, + &queue, + &opencl_program, + "variable_opencl", + devid); + if (err != CL_SUCCESS) + STARPU_OPENCL_REPORT_ERROR(err); + + err = clSetKernelArg(kernel, 0, sizeof(val), &val); + if (err != CL_SUCCESS) + STARPU_OPENCL_REPORT_ERROR(err); + + err = clSetKernelArg(kernel, 1, sizeof(fail), &fail); + if (err) + STARPU_OPENCL_REPORT_ERROR(err); + + err = clSetKernelArg(kernel, 2, sizeof(factor), &factor); + if (err) + STARPU_OPENCL_REPORT_ERROR(err); + + { + size_t global = 1; + size_t local = 1; + cl_device_id device; + + starpu_opencl_get_device(devid, &device); + + err = clEnqueueNDRangeKernel(queue, + kernel, + 1, + NULL, + &global, + &local, + 0, + NULL, + &event); + + if (err != CL_SUCCESS) + STARPU_OPENCL_REPORT_ERROR(err); + } + + err = clEnqueueReadBuffer(queue, + fail, + CL_TRUE, + 0, + sizeof(int), + &variable_config.copy_failed, + 0, + NULL, + NULL); + if (err != CL_SUCCESS) + STARPU_OPENCL_REPORT_ERROR(err); + + clFinish(queue); + starpu_opencl_collect_stats(event); + clReleaseEvent(event); + + starpu_opencl_release_kernel(kernel); + ret = starpu_opencl_unload_opencl(&opencl_program); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl"); + return; +} diff --git a/tests/datawizard/interfaces/variable/variable_opencl_kernel.cl b/tests/datawizard/interfaces/variable/variable_opencl_kernel.cl new file mode 100644 index 0000000..645bb9b --- /dev/null +++ b/tests/datawizard/interfaces/variable/variable_opencl_kernel.cl @@ -0,0 +1,29 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ +__kernel void variable_opencl(__global int *val, + __global int *err, + int factor) +{ + const int i = get_global_id(0); + if (i > 0) + return; + + if (*val != 42 * factor) + *err = 1; + else + *val *= -1; +} + diff --git a/tests/datawizard/interfaces/vector/vector_cuda.cu b/tests/datawizard/interfaces/vector/vector_cuda.cu new file mode 100644 index 0000000..660ebdc --- /dev/null +++ b/tests/datawizard/interfaces/vector/vector_cuda.cu @@ -0,0 +1,68 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ +#include +#include "../test_interfaces.h" + +extern struct test_config vector_config; + +__global__ void framework_cuda(int *val, unsigned n, int *err, int factor) +{ + unsigned i = blockIdx.x*blockDim.x + threadIdx.x; + + if (i >= n) + return; + + if (val[i] != i*factor) + *err = 1; + else + val[i] = -val[i]; +} + +extern "C" void test_vector_cuda_func(void *buffers[], void *args) +{ + cudaError_t error; + int *ret; + + error = cudaMalloc(&ret, sizeof(int)); + if (error != cudaSuccess) + { + fprintf(stderr, "cudaMalloc failed...\n"); + return; + } + + error = cudaMemcpyAsync(ret, &vector_config.copy_failed, sizeof(int), cudaMemcpyHostToDevice, starpu_cuda_get_local_stream()); + if (error != cudaSuccess) + return; + + unsigned n = STARPU_VECTOR_GET_NX(buffers[0]); + int *val = (int *)STARPU_VECTOR_GET_PTR(buffers[0]); + int factor = *(int*) args; + + unsigned threads_per_block = 64; + unsigned nblocks = (n + threads_per_block-1) / threads_per_block; + + framework_cuda<<>>(val, n, ret, factor); + error = cudaGetLastError(); + if (error != cudaSuccess) STARPU_CUDA_REPORT_ERROR(error); + error = cudaMemcpyAsync(&vector_config.copy_failed, ret, sizeof(int), cudaMemcpyDeviceToHost, starpu_cuda_get_local_stream()); + if (error != cudaSuccess) + { + return; + } + + cudaFree(ret); + cudaStreamSynchronize(starpu_cuda_get_local_stream()); +} diff --git a/tests/datawizard/interfaces/vector/vector_interface.c b/tests/datawizard/interfaces/vector/vector_interface.c new file mode 100644 index 0000000..6920301 --- /dev/null +++ b/tests/datawizard/interfaces/vector/vector_interface.c @@ -0,0 +1,124 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../test_interfaces.h" +#include "../../../helper.h" + +/* Prototypes */ +static void register_data(void); +static void unregister_data(void); +void test_vector_cpu_func(void *buffers[], void *args); +#ifdef STARPU_USE_CUDA +extern void test_vector_cuda_func(void *buffers[], void *_args); +#endif +#ifdef STARPU_USE_OPENCL +extern void test_vector_opencl_func(void *buffers[], void *args); +#endif + +starpu_data_handle_t vector_handle; +starpu_data_handle_t vector2_handle; + +#define VECTOR_SIZE 123 +static int vector[VECTOR_SIZE]; +static int vector2[VECTOR_SIZE]; + +struct test_config vector_config = +{ + .cpu_func = test_vector_cpu_func, +#ifdef STARPU_USE_CUDA + .cuda_func = test_vector_cuda_func, +#endif +#ifdef STARPU_USE_OPENCL + .opencl_func = test_vector_opencl_func, +#endif + .handle = &vector_handle, + .ptr = vector, + .dummy_handle = &vector2_handle, + .dummy_ptr = vector2, + .copy_failed = SUCCESS, + .name = "vector_interface" +}; + +static void register_data(void) +{ + /* Initializing data */ + int i; + for (i = 0; i < VECTOR_SIZE; i++) + vector[i] = i; + + /* Registering data */ + starpu_vector_data_register(&vector_handle, + STARPU_MAIN_RAM, + (uintptr_t)vector, + VECTOR_SIZE, + sizeof(int)); + starpu_vector_data_register(&vector2_handle, + STARPU_MAIN_RAM, + (uintptr_t)vector2, + VECTOR_SIZE, + sizeof(int)); +} + +static void unregister_data(void) +{ + starpu_data_unregister(vector_handle); + starpu_data_unregister(vector2_handle); +} + +void test_vector_cpu_func(void *buffers[], void *args) +{ + STARPU_SKIP_IF_VALGRIND; + + unsigned n = STARPU_VECTOR_GET_NX(buffers[0]); + int *val = (int *) STARPU_VECTOR_GET_PTR(buffers[0]); + int factor = *(int*)args; + unsigned int i; + for (i = 0; i < n; i++) + { + if (val[i] != (int)i*factor) + { + vector_config.copy_failed = FAILURE; + return; + } + val[i] = -val[i]; + } +} + +int main(int argc, char **argv) +{ + struct data_interface_test_summary summary; + struct starpu_conf conf; + starpu_conf_init(&conf); + conf.ncuda = 2; + conf.nopencl = 1; + + int ret = starpu_initialize(&conf, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + register_data(); + + run_tests(&vector_config, &summary); + + unregister_data(); + + starpu_shutdown(); + + data_interface_test_summary_print(stderr, &summary); + + return data_interface_test_summary_success(&summary); +} diff --git a/tests/datawizard/interfaces/vector/vector_opencl.c b/tests/datawizard/interfaces/vector/vector_opencl.c new file mode 100644 index 0000000..0ba8ec2 --- /dev/null +++ b/tests/datawizard/interfaces/vector/vector_opencl.c @@ -0,0 +1,130 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../test_interfaces.h" + +#define KERNEL_LOCATION "tests/datawizard/interfaces/vector/vector_opencl_kernel.cl" +extern struct test_config vector_config; +static struct starpu_opencl_program opencl_program; + +void +test_vector_opencl_func(void *buffers[], void *args) +{ + STARPU_SKIP_IF_VALGRIND; + + int id, devid, ret; + int factor = *(int *) args; + + cl_int err; + cl_kernel kernel; + cl_command_queue queue; + cl_event event; + + ret = starpu_opencl_load_opencl_from_file(KERNEL_LOCATION, &opencl_program, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); + + unsigned n = STARPU_VECTOR_GET_NX(buffers[0]); + cl_mem val = (cl_mem)STARPU_VECTOR_GET_DEV_HANDLE(buffers[0]); + + cl_context context; + id = starpu_worker_get_id_check(); + devid = starpu_worker_get_devid(id); + starpu_opencl_get_context(devid, &context); + + cl_mem fail = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, sizeof(int), &vector_config.copy_failed, &err); + + if (err != CL_SUCCESS) + STARPU_OPENCL_REPORT_ERROR(err); + + err = starpu_opencl_load_kernel(&kernel, + &queue, + &opencl_program, + "test_vector_opencl", + devid); + if (err != CL_SUCCESS) + STARPU_OPENCL_REPORT_ERROR(err); + + int nargs; + nargs = starpu_opencl_set_kernel_args(&err, &kernel, + sizeof(val), &val, + sizeof(n), &n, + sizeof(fail), &fail, + sizeof(factor), &factor, + 0); + + if (nargs != 4) + { + fprintf(stderr, "Failed to set argument #%d\n", err); + STARPU_OPENCL_REPORT_ERROR(err); + } + + { + size_t global=n; + size_t local; + size_t s; + cl_device_id device; + + starpu_opencl_get_device(devid, &device); + + err = clGetKernelWorkGroupInfo (kernel, + device, + CL_KERNEL_WORK_GROUP_SIZE, + sizeof(local), + &local, + &s); + if (err != CL_SUCCESS) + STARPU_OPENCL_REPORT_ERROR(err); + + if (local > global) + local = global; + else + global = (global + local-1) / local * local; + + err = clEnqueueNDRangeKernel(queue, + kernel, + 1, + NULL, + &global, + &local, + 0, + NULL, + &event); + + if (err != CL_SUCCESS) + STARPU_OPENCL_REPORT_ERROR(err); + } + + err = clEnqueueReadBuffer(queue, + fail, + CL_TRUE, + 0, + sizeof(int), + &vector_config.copy_failed, + 0, + NULL, + NULL); + if (err != CL_SUCCESS) + STARPU_OPENCL_REPORT_ERROR(err); + + clFinish(queue); + starpu_opencl_collect_stats(event); + clReleaseEvent(event); + + starpu_opencl_release_kernel(kernel); + ret = starpu_opencl_unload_opencl(&opencl_program); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl"); +} diff --git a/tests/datawizard/interfaces/vector/vector_opencl_kernel.cl b/tests/datawizard/interfaces/vector/vector_opencl_kernel.cl new file mode 100644 index 0000000..20ed95b --- /dev/null +++ b/tests/datawizard/interfaces/vector/vector_opencl_kernel.cl @@ -0,0 +1,29 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ +__kernel void test_vector_opencl(__global int *val, + unsigned int nx, + __global int *err, + int factor) +{ + const int i = get_global_id(0); + if (i >= nx) + return; + + if (val[i] != i * factor) + *err = 1; + else + val[i] = - val[i]; +} diff --git a/tests/datawizard/interfaces/void/void_interface.c b/tests/datawizard/interfaces/void/void_interface.c new file mode 100644 index 0000000..537e475 --- /dev/null +++ b/tests/datawizard/interfaces/void/void_interface.c @@ -0,0 +1,79 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ +#include +#include "../test_interfaces.h" +#include "../../../helper.h" + +void fake_func(void *buffers[], void *arg) +{ + (void) buffers; + (void) arg; +} + +static starpu_data_handle_t void_handle; +static starpu_data_handle_t void2_handle; + +struct test_config void_config = +{ + .cpu_func = fake_func, +#ifdef STARPU_USE_CUDA + .cuda_func = fake_func, +#endif +#ifdef STARPU_USE_OPENCL + .opencl_func = fake_func, +#endif + .handle = &void_handle, + .dummy_handle = &void2_handle, + .copy_failed = SUCCESS, + .name = "void_interface" +}; + +static void register_data(void) +{ + starpu_void_data_register(&void_handle); + starpu_void_data_register(&void2_handle); +} + +static void unregister_data(void) +{ + starpu_data_unregister(void_handle); + starpu_data_unregister(void2_handle); +} + +int main(int argc, char **argv) +{ + struct data_interface_test_summary summary; + struct starpu_conf conf; + starpu_conf_init(&conf); + conf.ncuda = 2; + conf.nopencl = 1; + + int ret = starpu_initialize(&conf, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + register_data(); + + run_tests(&void_config, &summary); + + unregister_data(); + + starpu_shutdown(); + + data_interface_test_summary_print(stderr, &summary); + + return data_interface_test_summary_success(&summary); +} diff --git a/tests/datawizard/invalidate_pending_requests.c b/tests/datawizard/invalidate_pending_requests.c new file mode 100644 index 0000000..166be64 --- /dev/null +++ b/tests/datawizard/invalidate_pending_requests.c @@ -0,0 +1,60 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../helper.h" + +/* + * Try invalidating a variable which is pending a request + */ +#define SIZE (100<<20) + +int main(void) +{ + int ret; + char *var = NULL; + starpu_data_handle_t handle; + + ret = starpu_init(NULL); + if (ret == -ENODEV) goto skip; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + if (starpu_worker_get_count_by_type(STARPU_CUDA_WORKER) == 0 && + starpu_worker_get_count_by_type(STARPU_OPENCL_WORKER) == 0) + goto enodev; + + var = malloc(SIZE); + starpu_variable_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)var, SIZE); + + /* Let a request fly */ + starpu_fxt_trace_user_event_string("requesting"); + starpu_data_fetch_on_node(handle, 1, 1); + starpu_fxt_trace_user_event_string("requested"); + /* But suddenly invalidate the data while it's on the fly! */ + starpu_data_invalidate_submit(handle); + starpu_fxt_trace_user_event_string("invalidated"); + + starpu_data_unregister(handle); + free(var); + starpu_shutdown(); + + return 0; + +enodev: + starpu_shutdown(); +skip: + return STARPU_TEST_SKIPPED; +} diff --git a/tests/datawizard/lazy_allocation.c b/tests/datawizard/lazy_allocation.c new file mode 100644 index 0000000..1f0cba2 --- /dev/null +++ b/tests/datawizard/lazy_allocation.c @@ -0,0 +1,238 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include +#include "../helper.h" + +/* + * Trigger lazy allocation by registering NULL, then setting a value, and + * then checking it + */ + +#define VECTORSIZE 1024 + +static starpu_data_handle_t v_handle; + +/* + * Memset + */ + +#ifdef STARPU_USE_CUDA +static void cuda_memset_codelet(void *descr[], void *arg) +{ + (void)arg; + STARPU_SKIP_IF_VALGRIND; + + char *buf = (char *)STARPU_VECTOR_GET_PTR(descr[0]); + unsigned length = STARPU_VECTOR_GET_NX(descr[0]); + + cudaMemsetAsync(buf, 42, length, starpu_cuda_get_local_stream()); +} +#endif + +#ifdef STARPU_USE_OPENCL +static void opencl_memset_codelet(void *buffers[], void *args) +{ + (void) args; + + cl_command_queue queue; + int id = starpu_worker_get_id_check(); + int devid = starpu_worker_get_devid(id); + starpu_opencl_get_queue(devid, &queue); + + cl_mem buffer = (cl_mem) STARPU_VECTOR_GET_DEV_HANDLE(buffers[0]); + unsigned length = STARPU_VECTOR_GET_NX(buffers[0]); + char *v = malloc(length); + STARPU_ASSERT(v != NULL); + memset(v, 42, length); + + cl_int err; + err = clEnqueueWriteBuffer(queue, + buffer, + CL_FALSE, + 0, /* offset */ + length, /* sizeof (char) */ + v, + 0, /* num_events_in_wait_list */ + NULL, /* event_wait_list */ + NULL /* event */); + if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err); +} +#endif + +void cpu_memset_codelet(void *descr[], void *arg) +{ + (void)arg; + STARPU_SKIP_IF_VALGRIND; + + char *buf = (char *)STARPU_VECTOR_GET_PTR(descr[0]); + unsigned length = STARPU_VECTOR_GET_NX(descr[0]); + + memset(buf, 42, length * sizeof(*buf)); +} + +static struct starpu_codelet memset_cl = +{ + .cpu_funcs = {cpu_memset_codelet}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {cuda_memset_codelet}, + .cuda_flags = {STARPU_CUDA_ASYNC}, +#endif +#ifdef STARPU_USE_OPENCL + .opencl_funcs = {opencl_memset_codelet}, + .opencl_flags = {STARPU_OPENCL_ASYNC}, +#endif + .cpu_funcs_name = {"cpu_memset_codelet"}, + .nbuffers = 1, + .modes = {STARPU_W} +}; + +/* + * Check content + */ + +void cpu_check_content_codelet(void *descr[], void *arg) +{ + (void)arg; + STARPU_SKIP_IF_VALGRIND; + + char *buf = (char *)STARPU_VECTOR_GET_PTR(descr[0]); + unsigned length = STARPU_VECTOR_GET_NX(descr[0]); + + unsigned i; + for (i = 0; i < length; i++) + { + if (buf[i] != 42) + { + FPRINTF(stderr, "buf[%u] is %c while it should be %c\n", i, buf[i], 42); + exit(-1); + } + } +} + +#ifdef STARPU_USE_CUDA +static void cuda_check_content_codelet(void *descr[], void *arg) +{ + (void)arg; + STARPU_SKIP_IF_VALGRIND; + + char *buf = (char *)STARPU_VECTOR_GET_PTR(descr[0]); + unsigned length = STARPU_VECTOR_GET_NX(descr[0]); + + unsigned i; + for (i = 0; i < length; i++) + { + char dst; + cudaMemcpyAsync(&dst, &buf[i], sizeof(char), cudaMemcpyDeviceToHost, starpu_cuda_get_local_stream()); + cudaStreamSynchronize(starpu_cuda_get_local_stream()); + if (dst != 42) + { + FPRINTF(stderr, "buf[%u] is %c while it should be %c\n", i, dst, 42); + exit(-1); + } + } +} +#endif +#ifdef STARPU_USE_OPENCL +static void opencl_check_content_codelet(void *buffers[], void *args) +{ + (void)args; + + STARPU_SKIP_IF_VALGRIND; + + cl_command_queue queue; + int id = starpu_worker_get_id_check(); + int devid = starpu_worker_get_devid(id); + starpu_opencl_get_queue(devid, &queue); + + cl_mem buf = (cl_mem) STARPU_VECTOR_GET_DEV_HANDLE(buffers[0]); + unsigned length = STARPU_VECTOR_GET_NX(buffers[0]); + + unsigned i; + for (i = 0; i < length; i++) + { + char dst; + cl_int err; + + err = clEnqueueReadBuffer(queue, + buf, + CL_FALSE, + i * sizeof(dst), + sizeof(dst), + &dst, + 0, /* num_events_in_wait_list */ + NULL, /* event_wait_list */ + NULL /* event */); + if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err); + + clFinish(queue); + if (dst != 42) + { + FPRINTF(stderr, "buf[%u] is '%c' while it should be '%c'\n", i, dst, 42); + exit(-1); + } + } +} +#endif + +static struct starpu_codelet check_content_cl = +{ + .cpu_funcs = {cpu_check_content_codelet}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {cuda_check_content_codelet}, +#endif +#ifdef STARPU_USE_OPENCL + .opencl_funcs = {opencl_check_content_codelet}, +#endif + .cpu_funcs_name = {"cpu_check_content_codelet"}, + .nbuffers = 1, + .modes = {STARPU_R} +}; + + +int main(int argc, char **argv) +{ + int ret; + + ret = starpu_initialize(NULL, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + starpu_vector_data_register(&v_handle, (uint32_t)-1, (uintptr_t)NULL, VECTORSIZE, sizeof(char)); + + ret = starpu_task_insert(&memset_cl, STARPU_W, v_handle, 0); + if (ret == -ENODEV) + return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + + ret = starpu_task_wait_for_all(); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); + + ret = starpu_task_insert(&check_content_cl, STARPU_R, v_handle, 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + + ret = starpu_task_wait_for_all(); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); + + starpu_data_unregister(v_handle); + + starpu_shutdown(); + return EXIT_SUCCESS; +} diff --git a/tests/datawizard/locality.c b/tests/datawizard/locality.c new file mode 100644 index 0000000..34d076c --- /dev/null +++ b/tests/datawizard/locality.c @@ -0,0 +1,174 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * This is a dumb sample of stencil application + * + * Dumb domain split in N pieces: + * + * 0 | 1 | ... | N-1 + * + * for each simulation iteration, a task works on some adjacent pieces + * + * Locality is thus set on the central piece. + */ + +#include +#include "../helper.h" + +#ifdef STARPU_USE_FXT +#define N 5 +#define ITER 5 +#else +#define N 50 +#define ITER 50 +#endif + +int task_worker[N][ITER]; +int worker_task[STARPU_NMAXWORKERS][N*ITER]; +unsigned worker_ntask[STARPU_NMAXWORKERS]; + +void cpu_f(void *descr[], void *_args) +{ + (void)descr; + unsigned i, loop, worker = starpu_worker_get_id(); + enum starpu_worker_archtype worker_type = starpu_worker_get_type(worker); + + starpu_codelet_unpack_args(_args, &loop, &i); + task_worker[i][loop] = worker; + worker_task[worker][worker_ntask[worker]++] = i; + if (worker_type == STARPU_CPU_WORKER) + starpu_sleep(0.001); + else + starpu_sleep(0.0001); +} + +double cost_function(struct starpu_task *t, struct starpu_perfmodel_arch *a, unsigned i) +{ + (void) t; (void) i; + STARPU_ASSERT(a->ndevices == 1); + if (a->devices[0].type == STARPU_CPU_WORKER) + { + STARPU_ASSERT(a->devices[0].ncores == 1); + return 1000; + } + else + return 100; +} + +static struct starpu_perfmodel perf_model = +{ + .type = STARPU_PER_ARCH, + .arch_cost_function = cost_function, +}; + +static struct starpu_codelet cl = +{ + .cpu_funcs = { cpu_f }, + .cpu_funcs_name = { "cpu_f" }, + .cuda_funcs = { cpu_f }, + .opencl_funcs = { cpu_f }, + .nbuffers = 4, + .modes = + { + STARPU_RW, + STARPU_RW | STARPU_COMMUTE | STARPU_LOCALITY, + STARPU_RW | STARPU_COMMUTE | STARPU_LOCALITY, + STARPU_RW | STARPU_COMMUTE | STARPU_LOCALITY, + }, + .flags = STARPU_CODELET_SIMGRID_EXECUTE, + .model = &perf_model, +}; + +int main(int argc, char *argv[]) +{ + int ret; + starpu_data_handle_t A[N]; + starpu_data_handle_t B[N]; + unsigned i, loop, finished; + + ret = starpu_initialize(NULL, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + /* Get most parallelism by using an arbiter */ + starpu_arbiter_t arbiter = starpu_arbiter_create(); + for (i = 0; i < N; i++) + { + starpu_void_data_register(&A[i]); + starpu_void_data_register(&B[i]); + starpu_data_assign_arbiter(A[i], arbiter); + } + + for (loop = 0; loop < ITER; loop++) + { + for (i = 1; i < N-1; i++) + { + ret = starpu_task_insert(&cl, + STARPU_RW, B[i], + STARPU_RW | STARPU_COMMUTE | STARPU_LOCALITY, A[i-1], + STARPU_RW | STARPU_COMMUTE | STARPU_LOCALITY, A[i], + STARPU_RW | STARPU_COMMUTE | STARPU_LOCALITY, A[i+1], + STARPU_VALUE, &loop, sizeof(loop), + STARPU_VALUE, &i, sizeof(i), + 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + } + } + + starpu_task_wait_for_all(); + + for (i = 0; i < N; i++) + { + starpu_data_unregister(A[i]); + starpu_data_unregister(B[i]); + } + starpu_arbiter_destroy(arbiter); + + printf("worker where each domain piece was computed, over time\n"); + for (loop = 0; loop < ITER; loop++) + { + for (i = 1; i < N-1; i++) + { + printf("%02d ", task_worker[i][loop]); + } + printf("\n"); + } + printf("\n"); + + printf("domain piece that each worker has computed, over time\n"); + loop = 0; + do + { + finished = 1; + for (i = 0; i < starpu_worker_get_count(); i++) + { + if (loop < worker_ntask[i]) + { + printf("%02d ", worker_task[i][loop]); + finished = 0; + } + else + printf(" "); + } + loop++; + printf("\n"); + } + while (!finished && loop < 100); + + starpu_shutdown(); + return EXIT_SUCCESS; +} diff --git a/tests/datawizard/locality.sh b/tests/datawizard/locality.sh new file mode 100755 index 0000000..e8f19dc --- /dev/null +++ b/tests/datawizard/locality.sh @@ -0,0 +1,41 @@ +#!/bin/sh -x +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# Copyright (C) 2018-2018 Federal University of Rio Grande do Sul (UFRGS) +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +# Test generation of FxT traces + +# Testing another specific scheduler, no need to run this +[ -z "$STARPU_SCHED" -o "$STARPU_SCHED" = modular-eager ] || exit 77 + +set -e + +PREFIX=$(dirname $0) +rm -rf $PREFIX/locality.traces +mkdir -p $PREFIX/locality.traces + +test -x $PREFIX/../../tools/starpu_fxt_tool || exit 77 + +export STARPU_FXT_PREFIX=$PREFIX/locality.traces +STARPU_FXT_TRACE=1 STARPU_SCHED=modular-eager $MS_LAUNCHER $STARPU_LAUNCH $PREFIX/locality +$STARPU_LAUNCH $PREFIX/../../tools/starpu_fxt_tool -d $STARPU_FXT_PREFIX -memory-states -label-deps -i $STARPU_FXT_PREFIX/prof_file_${USER}_0 + +# Check that they are approved by Grenoble :) + +if type pj_dump > /dev/null 2> /dev/null +then + $PREFIX/../../tools/starpu_paje_sort $STARPU_FXT_PREFIX/paje.trace + pj_dump -e 0 $STARPU_FXT_PREFIX/paje.trace +fi diff --git a/tests/datawizard/manual_reduction.c b/tests/datawizard/manual_reduction.c new file mode 100644 index 0000000..cced6ac --- /dev/null +++ b/tests/datawizard/manual_reduction.c @@ -0,0 +1,321 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../helper.h" + +/* + * Allocate one buffer per worker, doing computations with it, and + * eventually reducing it into a single buffer + */ + +#define INIT_VALUE 42 +#define NTASKS 10000 + +static unsigned variable; +static starpu_data_handle_t variable_handle; + +static uintptr_t per_worker[STARPU_NMAXWORKERS]; +static starpu_data_handle_t per_worker_handle[STARPU_NMAXWORKERS]; + +static unsigned ndone; + +/* Create per-worker handles */ +static void initialize_per_worker_handle(void *arg) +{ + (void)arg; + int workerid = starpu_worker_get_id_check(); + + /* Allocate memory on the worker, and initialize it to 0 */ + switch (starpu_worker_get_type(workerid)) + { + case STARPU_CPU_WORKER: + per_worker[workerid] = (uintptr_t)calloc(1, sizeof(variable)); + break; +#ifdef STARPU_USE_OPENCL + case STARPU_OPENCL_WORKER: + { + cl_context context; + cl_command_queue queue; + cl_int err; + + starpu_opencl_get_current_context(&context); + starpu_opencl_get_current_queue(&queue); + + cl_mem ptr = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(variable), NULL, NULL); + /* Poor's man memset */ + unsigned zero = 0; + err = clEnqueueWriteBuffer(queue, ptr, CL_FALSE, 0, sizeof(variable), (void *)&zero, 0, NULL, NULL); + if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err); + clFinish(queue); + per_worker[workerid] = (uintptr_t)ptr; + } + + break; +#endif +#ifdef STARPU_USE_CUDA + case STARPU_CUDA_WORKER: + { + cudaError_t status; + status = cudaMalloc((void **)&per_worker[workerid], sizeof(variable)); + if (!per_worker[workerid] || (status != cudaSuccess)) + { + STARPU_CUDA_REPORT_ERROR(status); + } + status = cudaMemsetAsync((void *)per_worker[workerid], 0, sizeof(variable), starpu_cuda_get_local_stream()); + if (!status) + status = cudaStreamSynchronize(starpu_cuda_get_local_stream()); + if (status) + STARPU_CUDA_REPORT_ERROR(status); + break; + } +#endif + default: + STARPU_ABORT(); + break; + } + FPRINTF(stderr, "worker %d got data %lx\n", workerid, (unsigned long) per_worker[workerid]); + + STARPU_ASSERT(per_worker[workerid]); +} + +/* + * Implement reduction method + */ + +void cpu_redux_func(void *descr[], void *cl_arg) +{ + (void)cl_arg; + unsigned *a = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[0]); + unsigned *b = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[1]); + + FPRINTF(stderr, "%u = %u + %u\n", *a + *b, *a, *b); + + *a = *a + *b; +} + +static struct starpu_codelet reduction_codelet = +{ + .cpu_funcs = {cpu_redux_func}, + .nbuffers = 2, + .modes = {STARPU_RW, STARPU_R}, + .model = NULL +}; + +/* + * Use per-worker local copy + */ + +void cpu_func_incr(void *descr[], void *cl_arg) +{ + (void)cl_arg; + unsigned *val = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[0]); + *val = *val + 1; + STARPU_ATOMIC_ADD(&ndone, 1); +} + +#ifdef STARPU_USE_CUDA +/* dummy CUDA implementation */ +static void cuda_func_incr(void *descr[], void *cl_arg) +{ + (void)cl_arg; + STARPU_SKIP_IF_VALGRIND; + + unsigned *val = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[0]); + + unsigned h_val, h_val2; + cudaError_t status; + status = cudaMemcpyAsync(&h_val, val, sizeof(unsigned), cudaMemcpyDeviceToHost, starpu_cuda_get_local_stream()); + if (status) + STARPU_CUDA_REPORT_ERROR(status); + status = cudaStreamSynchronize(starpu_cuda_get_local_stream()); + if (status) + STARPU_CUDA_REPORT_ERROR(status); + h_val++; + status = cudaMemcpyAsync(val, &h_val, sizeof(unsigned), cudaMemcpyHostToDevice, starpu_cuda_get_local_stream()); + if (status) + STARPU_CUDA_REPORT_ERROR(status); + status = cudaStreamSynchronize(starpu_cuda_get_local_stream()); + if (status) + STARPU_CUDA_REPORT_ERROR(status); + + status = cudaMemcpyAsync(&h_val2, val, sizeof(unsigned), cudaMemcpyDeviceToHost, starpu_cuda_get_local_stream()); + if (status) + STARPU_CUDA_REPORT_ERROR(status); + status = cudaStreamSynchronize(starpu_cuda_get_local_stream()); + if (status) + STARPU_CUDA_REPORT_ERROR(status); + STARPU_ASSERT_MSG(h_val2 == h_val, "%lx should be %u, not %u, I have just written it ?!\n", (unsigned long)(uintptr_t) val, h_val, h_val2); + + STARPU_ATOMIC_ADD(&ndone, 1); +} +#endif + +#ifdef STARPU_USE_OPENCL +/* dummy OpenCL implementation */ +static void opencl_func_incr(void *descr[], void *cl_arg) +{ + (void)cl_arg; + STARPU_SKIP_IF_VALGRIND; + + cl_mem d_val = (cl_mem)STARPU_VARIABLE_GET_PTR(descr[0]); + unsigned h_val; + + cl_int err; + cl_command_queue queue; + + starpu_opencl_get_current_queue(&queue); + + err = clEnqueueReadBuffer(queue, d_val, CL_FALSE, 0, sizeof(unsigned), (void *)&h_val, 0, NULL, NULL); + if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err); + clFinish(queue); + h_val++; + err = clEnqueueWriteBuffer(queue, d_val, CL_FALSE, 0, sizeof(unsigned), (void *)&h_val, 0, NULL, NULL); + if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err); + clFinish(queue); + STARPU_ATOMIC_ADD(&ndone, 1); +} +#endif + +static struct starpu_codelet use_data_on_worker_codelet = +{ + .cpu_funcs = {cpu_func_incr}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {cuda_func_incr}, + .cuda_flags = {STARPU_CUDA_ASYNC}, +#endif +#ifdef STARPU_USE_OPENCL + .opencl_funcs = {opencl_func_incr}, +#endif + .nbuffers = 1, + .modes = {STARPU_RW}, + .model = NULL +}; + +int main(int argc, char **argv) +{ + unsigned worker; + unsigned i; + int ret; + struct starpu_conf conf; + + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + conf.ncpus = -1; + conf.ncuda = -1; + conf.nopencl = -1; + + variable = INIT_VALUE; + + ret = starpu_initialize(&conf, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + unsigned nworkers = starpu_worker_get_count(); + + starpu_variable_data_register(&variable_handle, STARPU_MAIN_RAM, (uintptr_t)&variable, sizeof(unsigned)); + + /* Allocate a per-worker handle on each worker (and initialize it to 0) */ + starpu_execute_on_each_worker(initialize_per_worker_handle, NULL, STARPU_CPU|STARPU_CUDA|STARPU_OPENCL); + + /* Register all per-worker handles */ + for (worker = 0; worker < nworkers; worker++) + { + STARPU_ASSERT(per_worker[worker]); + + unsigned memory_node = starpu_worker_get_memory_node(worker); + starpu_variable_data_register(&per_worker_handle[worker], memory_node, + per_worker[worker], sizeof(variable)); + } + + /* Submit NTASKS tasks to the different worker to simulate the usage of a data in reduction */ + for (i = 0; i < NTASKS; i++) + { + struct starpu_task *task = starpu_task_create(); + task->cl = &use_data_on_worker_codelet; + + int workerid = (i % nworkers); + task->handles[0] = per_worker_handle[workerid]; + + task->execute_on_a_specific_worker = 1; + task->workerid = (unsigned)workerid; + + ret = starpu_task_submit(task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + + /* Perform the reduction of all per-worker handles into the variable_handle */ + for (worker = 0; worker < nworkers; worker++) + { + struct starpu_task *task = starpu_task_create(); + task->cl = &reduction_codelet; + + task->handles[0] = variable_handle; + task->handles[1] = per_worker_handle[worker]; + + ret = starpu_task_submit(task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + starpu_data_unregister(variable_handle); + + /* Destroy all per-worker handles */ + for (worker = 0; worker < nworkers; worker++) + { + starpu_data_unregister_no_coherency(per_worker_handle[worker]); + switch(starpu_worker_get_type(worker)) + { + case STARPU_CPU_WORKER: + free((void*)per_worker[worker]); + break; +#ifdef STARPU_USE_CUDA + case STARPU_CUDA_WORKER: + cudaFree((void*)per_worker[worker]); + break; +#endif /* !STARPU_USE_CUDA */ +#ifdef STARPU_USE_OPENCL + case STARPU_OPENCL_WORKER: + clReleaseMemObject((void*)per_worker[worker]); + break; +#endif /* !STARPU_USE_OPENCL */ + default: + STARPU_ABORT(); + } + } + + starpu_shutdown(); + + if (variable == INIT_VALUE + NTASKS) + ret = EXIT_SUCCESS; + else + { + FPRINTF(stderr, "%u != %d + %d\n", variable, INIT_VALUE, NTASKS); + FPRINTF(stderr, "ndone: %u\n", ndone); + ret = EXIT_FAILURE; + } + STARPU_RETURN(ret); + +enodev: + fprintf(stderr, "WARNING: No one can execute this task\n"); + starpu_task_wait_for_all(); + /* yes, we do not perform the computation but we did detect that no one + * could perform the kernel, so this is not an error from StarPU */ + starpu_shutdown(); + return STARPU_TEST_SKIPPED; +} diff --git a/tests/datawizard/mpi_like.c b/tests/datawizard/mpi_like.c new file mode 100644 index 0000000..6bc2da5 --- /dev/null +++ b/tests/datawizard/mpi_like.c @@ -0,0 +1,205 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include "../helper.h" +#include "../variable/increment.h" + +/* + * Mimic the behavior of libstarpumpi, tested by a ring of threads which + * increment the same variable one after the other. + * This is the synchronous version: the threads wait for completion of each + * step before continuing. + */ + +#define NTHREADS 4 +#define NITER 2 + +//static starpu_pthread_cond_t cond; +//static starpu_pthread_mutex_t mutex; + +struct thread_data +{ + unsigned index; + unsigned val; + starpu_data_handle_t handle; + starpu_pthread_t thread; + + starpu_pthread_cond_t recv_cond; + starpu_pthread_mutex_t recv_mutex; + unsigned recv_flag; // set when a message is received + unsigned recv_buf; + struct thread_data *neighbour; +}; + +static struct thread_data problem_data[NTHREADS]; + +/* We implement some ring transfer, every thread will try to receive a piece of + * data from its neighbour and increment it before transmitting it to its + * successor. */ + +static void increment_handle(struct thread_data *thread_data) +{ + struct starpu_task *task = starpu_task_create(); + task->cl = &increment_cl; + + task->handles[0] = thread_data->handle; + + task->cl_arg = thread_data; + task->cl_arg_size = sizeof(thread_data); + + task->destroy = 1; + task->detach = 0; + + int ret = starpu_task_submit(task); + if (ret == -ENODEV) + exit(STARPU_TEST_SKIPPED); + STARPU_ASSERT(!ret); + + ret = starpu_task_wait(task); + STARPU_ASSERT(!ret); +} + +static void recv_handle(struct thread_data *thread_data) +{ + starpu_data_acquire(thread_data->handle, STARPU_W); + STARPU_PTHREAD_MUTEX_LOCK(&thread_data->recv_mutex); + + /* We wait for the previous thread to notify that the data is available */ + while (!thread_data->recv_flag) + STARPU_PTHREAD_COND_WAIT(&thread_data->recv_cond, &thread_data->recv_mutex); + + /* We overwrite thread's data with the received value */ + thread_data->val = thread_data->recv_buf; + + /* Notify that we read the value */ + thread_data->recv_flag = 0; + STARPU_PTHREAD_COND_SIGNAL(&thread_data->recv_cond); + +// FPRINTF(stderr, "Thread %d received value %d from thread %d\n", thread_data->index, thread_data->val, (thread_data->index - 1)%NTHREADS); + + STARPU_PTHREAD_MUTEX_UNLOCK(&thread_data->recv_mutex); + starpu_data_release(thread_data->handle); +} + +static void send_handle(struct thread_data *thread_data) +{ + struct thread_data *neighbour_data = thread_data->neighbour; + + starpu_data_acquire(thread_data->handle, STARPU_R); + +// FPRINTF(stderr, "Thread %d sends value %d to thread %d\n", thread_data->index, thread_data->val, neighbour_data->index); + /* send the message */ + STARPU_PTHREAD_MUTEX_LOCK(&neighbour_data->recv_mutex); + neighbour_data->recv_buf = thread_data->val; + neighbour_data->recv_flag = 1; + STARPU_PTHREAD_COND_SIGNAL(&neighbour_data->recv_cond); + + /* wait until it's received (ie. neighbour's recv_flag is set back to 0) */ + while (neighbour_data->recv_flag) + STARPU_PTHREAD_COND_WAIT(&neighbour_data->recv_cond, &neighbour_data->recv_mutex); + + STARPU_PTHREAD_MUTEX_UNLOCK(&neighbour_data->recv_mutex); + + starpu_data_release(thread_data->handle); +} + +static void *thread_func(void *arg) +{ + unsigned iter; + struct thread_data *thread_data = (struct thread_data *) arg; + unsigned index = thread_data->index; + + starpu_variable_data_register(&thread_data->handle, STARPU_MAIN_RAM, (uintptr_t)&thread_data->val, sizeof(unsigned)); + + for (iter = 0; iter < NITER; iter++) + { + /* The first thread initiates the first transfer */ + if (!((index == 0) && (iter == 0))) + { + recv_handle(thread_data); + } + + increment_handle(thread_data); + + if (!((index == (NTHREADS - 1)) && (iter == (NITER - 1)))) + { + send_handle(thread_data); + } + } + + return NULL; +} + +int main(int argc, char **argv) +{ + int ret; + + ret = starpu_initialize(NULL, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + increment_load_opencl(); + + unsigned t; + for (t = 0; t < NTHREADS; t++) + { + problem_data[t].index = t; + problem_data[t].val = 0; + STARPU_PTHREAD_COND_INIT(&problem_data[t].recv_cond, NULL); + STARPU_PTHREAD_MUTEX_INIT(&problem_data[t].recv_mutex, NULL); + problem_data[t].recv_flag = 0; + problem_data[t].neighbour = &problem_data[(t+1)%NTHREADS]; + } + + for (t = 0; t < NTHREADS; t++) + { + STARPU_PTHREAD_CREATE(&problem_data[t].thread, NULL, thread_func, &problem_data[t]); + } + + for (t = 0; t < NTHREADS; t++) + { + void *retval; + STARPU_PTHREAD_JOIN(problem_data[t].thread, &retval); + STARPU_ASSERT(retval == NULL); + } + + /* We check that the value in the "last" thread is valid */ + starpu_data_handle_t last_handle = problem_data[NTHREADS - 1].handle; + starpu_data_acquire(last_handle, STARPU_R); + starpu_data_release(last_handle); + + for (t = 0; t < NTHREADS; t++) + { + starpu_data_unregister(problem_data[t].handle); + } + + increment_unload_opencl(); + starpu_shutdown(); + + ret = EXIT_SUCCESS; + if (problem_data[NTHREADS - 1].val != (NTHREADS * NITER)) + { + FPRINTF(stderr, "Final value : %u should be %d\n", problem_data[NTHREADS - 1].val, (NTHREADS * NITER)); + ret = EXIT_FAILURE; + } + else + FPRINTF(stderr, "Final value : %u (niter %u nthread %u)\n", problem_data[NTHREADS - 1].val, NITER, NTHREADS); + + return ret; +} diff --git a/tests/datawizard/mpi_like_async.c b/tests/datawizard/mpi_like_async.c new file mode 100644 index 0000000..e26808a --- /dev/null +++ b/tests/datawizard/mpi_like_async.c @@ -0,0 +1,363 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../helper.h" +#include +#include "../variable/increment.h" + +/* + * Mimic the behavior of libstarpumpi, tested by a ring of threads which + * increment the same variable one after the other. + * This is the asynchronous version: the threads submit the series of + * synchronizations and tasks. + */ + +#ifdef STARPU_QUICK_CHECK +# define NTHREADS_DEFAULT 4 +# define NITER_DEFAULT 8 +#else +# define NTHREADS_DEFAULT 16 +# define NITER_DEFAULT 128 +#endif + +static unsigned nthreads = NTHREADS_DEFAULT; +static unsigned niter = NITER_DEFAULT; + +//#define DEBUG_MESSAGES 1 + +//static starpu_pthread_cond_t cond; +//static starpu_pthread_mutex_t mutex; + +struct thread_data +{ + unsigned index; + unsigned val; + starpu_data_handle_t handle; + starpu_pthread_t thread; + + starpu_pthread_mutex_t recv_mutex; + unsigned recv_flag; // set when a message is received + unsigned recv_buf; + struct thread_data *neighbour; +}; + +struct data_req +{ + int (*test_func)(void *); + void *test_arg; + struct data_req *next; +}; + +static starpu_pthread_mutex_t data_req_mutex; +static starpu_pthread_cond_t data_req_cond; +struct data_req *data_req_list; +unsigned progress_thread_running; + +static struct thread_data problem_data[NTHREADS_DEFAULT]; + +/* We implement some ring transfer, every thread will try to receive a piece of + * data from its neighbour and increment it before transmitting it to its + * successor. */ + +static void increment_handle_async(struct thread_data *thread_data) +{ + struct starpu_task *task = starpu_task_create(); + task->cl = &increment_cl; + + task->handles[0] = thread_data->handle; + + task->detach = 1; + task->destroy = 1; + + int ret = starpu_task_submit(task); + if (ret == -ENODEV) + exit(STARPU_TEST_SKIPPED); + STARPU_ASSERT(!ret); +} + +static int test_recv_handle_async(void *arg) +{ +// FPRINTF(stderr, "test_recv_handle_async\n"); + + int ret; + struct thread_data *thread_data = (struct thread_data *) arg; + + STARPU_PTHREAD_MUTEX_LOCK(&thread_data->recv_mutex); + + ret = (thread_data->recv_flag == 1); + + if (ret) + { + thread_data->recv_flag = 0; + thread_data->val = thread_data->recv_buf; + } + + STARPU_PTHREAD_MUTEX_UNLOCK(&thread_data->recv_mutex); + + if (ret) + { +#ifdef DEBUG_MESSAGES + FPRINTF(stderr, "Thread %u received value %u from thread %d\n", + thread_data->index, thread_data->val, (thread_data->index - 1)%nthreads); +#endif + starpu_data_release(thread_data->handle); + } + + return ret; +} + +static void recv_handle_async(void *_thread_data) +{ + struct thread_data *thread_data = (struct thread_data *) _thread_data; + + struct data_req *req = (struct data_req *) malloc(sizeof(struct data_req)); + req->test_func = test_recv_handle_async; + req->test_arg = thread_data; + + STARPU_PTHREAD_MUTEX_LOCK(&data_req_mutex); + req->next = data_req_list; + data_req_list = req; + STARPU_PTHREAD_COND_SIGNAL(&data_req_cond); + STARPU_PTHREAD_MUTEX_UNLOCK(&data_req_mutex); +} + +static int test_send_handle_async(void *arg) +{ + int ret; + struct thread_data *thread_data = (struct thread_data *) arg; + struct thread_data *neighbour_data = thread_data->neighbour; + + STARPU_PTHREAD_MUTEX_LOCK(&neighbour_data->recv_mutex); + ret = (neighbour_data->recv_flag == 0); + STARPU_PTHREAD_MUTEX_UNLOCK(&neighbour_data->recv_mutex); + + if (ret) + { +#ifdef DEBUG_MESSAGES + FPRINTF(stderr, "Thread %u sends value %u to thread %u\n", thread_data->index, thread_data->val, neighbour_data->index); +#endif + starpu_data_release(thread_data->handle); + } + + return ret; +} + +static void send_handle_async(void *_thread_data) +{ + struct thread_data *thread_data = (struct thread_data *) _thread_data; + struct thread_data *neighbour_data = thread_data->neighbour; + +// FPRINTF(stderr, "send_handle_async\n"); + + /* send the message */ + STARPU_PTHREAD_MUTEX_LOCK(&neighbour_data->recv_mutex); + neighbour_data->recv_buf = thread_data->val; + neighbour_data->recv_flag = 1; + STARPU_PTHREAD_MUTEX_UNLOCK(&neighbour_data->recv_mutex); + + struct data_req *req = (struct data_req *) malloc(sizeof(struct data_req)); + req->test_func = test_send_handle_async; + req->test_arg = thread_data; + + STARPU_PTHREAD_MUTEX_LOCK(&data_req_mutex); + req->next = data_req_list; + data_req_list = req; + STARPU_PTHREAD_COND_SIGNAL(&data_req_cond); + STARPU_PTHREAD_MUTEX_UNLOCK(&data_req_mutex); +} + +static void *progress_func(void *arg) +{ + (void)arg; + + STARPU_PTHREAD_MUTEX_LOCK(&data_req_mutex); + + progress_thread_running = 1; + STARPU_PTHREAD_COND_SIGNAL(&data_req_cond); + + while (progress_thread_running || data_req_list) + { + struct data_req *req; + + if (data_req_list == NULL) + STARPU_PTHREAD_COND_WAIT(&data_req_cond, &data_req_mutex); + + req = data_req_list; + + if (req) + { + data_req_list = req->next; + req->next = NULL; + + STARPU_PTHREAD_MUTEX_UNLOCK(&data_req_mutex); + + STARPU_VALGRIND_YIELD(); + int ret = req->test_func(req->test_arg); + + if (ret) + { + free(req); + STARPU_PTHREAD_MUTEX_LOCK(&data_req_mutex); + } + else + { + /* ret = 0 : the request is not finished, we put it back at the end of the list */ + STARPU_PTHREAD_MUTEX_LOCK(&data_req_mutex); + + struct data_req *req_aux = data_req_list; + if (!req_aux) + { + /* The list is empty */ + data_req_list = req; + } + else + { + while (req_aux) + { + if (req_aux->next == NULL) + { + req_aux->next = req; + break; + } + + req_aux = req_aux->next; + } + } + } + } + } + STARPU_PTHREAD_MUTEX_UNLOCK(&data_req_mutex); + + return NULL; +} + +static void *thread_func(void *arg) +{ + unsigned iter; + struct thread_data *thread_data = (struct thread_data *) arg; + unsigned index = thread_data->index; + int ret; + + starpu_variable_data_register(&thread_data->handle, STARPU_MAIN_RAM, (uintptr_t)&thread_data->val, sizeof(unsigned)); + + for (iter = 0; iter < niter; iter++) + { + /* The first thread initiates the first transfer */ + if (!((index == 0) && (iter == 0))) + { + starpu_data_acquire_cb( + thread_data->handle, STARPU_W, + recv_handle_async, thread_data + ); + } + + increment_handle_async(thread_data); + + if (!((index == (nthreads - 1)) && (iter == (niter - 1)))) + { + starpu_data_acquire_cb( + thread_data->handle, STARPU_R, + send_handle_async, thread_data + ); + } + } + + ret = starpu_task_wait_for_all(); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); + + return NULL; +} + +int main(int argc, char **argv) +{ + int ret; + void *retval; + + ret = starpu_initialize(NULL, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + increment_load_opencl(); + + /* Create a thread to perform blocking calls */ + starpu_pthread_t progress_thread; + STARPU_PTHREAD_MUTEX_INIT(&data_req_mutex, NULL); + STARPU_PTHREAD_COND_INIT(&data_req_cond, NULL); + data_req_list = NULL; + progress_thread_running = 0; + + unsigned t; + for (t = 0; t < nthreads; t++) + { + problem_data[t].index = t; + problem_data[t].val = 0; + STARPU_PTHREAD_MUTEX_INIT(&problem_data[t].recv_mutex, NULL); + problem_data[t].recv_flag = 0; + problem_data[t].neighbour = &problem_data[(t+1)%nthreads]; + } + + STARPU_PTHREAD_CREATE(&progress_thread, NULL, progress_func, NULL); + + STARPU_PTHREAD_MUTEX_LOCK(&data_req_mutex); + while (!progress_thread_running) + STARPU_PTHREAD_COND_WAIT(&data_req_cond, &data_req_mutex); + STARPU_PTHREAD_MUTEX_UNLOCK(&data_req_mutex); + + for (t = 0; t < nthreads; t++) + { + STARPU_PTHREAD_CREATE(&problem_data[t].thread, NULL, thread_func, &problem_data[t]); + } + + for (t = 0; t < nthreads; t++) + { + STARPU_PTHREAD_JOIN(problem_data[t].thread, &retval); + STARPU_ASSERT(retval == NULL); + } + + STARPU_PTHREAD_MUTEX_LOCK(&data_req_mutex); + progress_thread_running = 0; + STARPU_PTHREAD_COND_SIGNAL(&data_req_cond); + STARPU_PTHREAD_MUTEX_UNLOCK(&data_req_mutex); + + STARPU_PTHREAD_JOIN(progress_thread, &retval); + STARPU_ASSERT(retval == NULL); + + /* We check that the value in the "last" thread is valid */ + starpu_data_handle_t last_handle = problem_data[nthreads - 1].handle; + starpu_data_acquire(last_handle, STARPU_R); + + ret = EXIT_SUCCESS; + if (problem_data[nthreads - 1].val != (nthreads * niter)) + { + FPRINTF(stderr, "Final value : %u should be %u\n", problem_data[nthreads - 1].val, (nthreads * niter)); + ret = EXIT_FAILURE; + } + else + FPRINTF(stderr, "Final value : %u (niter %u nthread %u)\n", problem_data[nthreads - 1].val, niter, nthreads); + + starpu_data_release(last_handle); + + for (t = 0; t < nthreads; t++) + { + starpu_data_unregister(problem_data[t].handle); + } + + increment_load_opencl(); + starpu_shutdown(); + + return ret; +} diff --git a/tests/datawizard/no_unregister.c b/tests/datawizard/no_unregister.c new file mode 100644 index 0000000..b4ac874 --- /dev/null +++ b/tests/datawizard/no_unregister.c @@ -0,0 +1,96 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include + +#include "../helper.h" + +/* + * Check that not unregistering a data is not too crashy + */ + +void dummy_func(void ** buffers, void * args) +{ + (void) buffers; + (void) args; +} + +static struct starpu_codelet dummy_cl = +{ + .modes = { STARPU_RW }, + .cpu_funcs = { dummy_func }, + .cpu_funcs_name = { "dummy_func" }, + .nbuffers = 1 +}; + +int main(void) +{ + int ret; + int buffer[1024]; + starpu_data_handle_t handle; + struct starpu_task *t1,*t2; + struct starpu_conf conf; + +#ifdef STARPU_HAVE_VALGRIND_H + if(RUNNING_ON_VALGRIND) return STARPU_TEST_SKIPPED; +#endif +#ifdef STARPU_SANITIZE_LEAK + return STARPU_TEST_SKIPPED; +#endif + + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + conf.ncpus = -1; + conf.nmpi_ms = -1; + conf.ntcpip_ms = -1; + + ret = starpu_init(&conf); + if (ret == -ENODEV) + return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + starpu_variable_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)buffer, 1024*sizeof(int)); + + t1 = starpu_task_create(); + + t2 = starpu_task_create(); + t2->cl = &dummy_cl; + t2->detach = 0; + t2->handles[0] = handle; + + starpu_task_declare_deps_array(t2, 1, &t1); + + ret = starpu_task_submit(t2); + if (ret == -ENODEV) + return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + ret = starpu_task_submit(t1); + if (ret == -ENODEV) + return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + ret = starpu_task_wait(t2); + if (ret == -ENODEV) + return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait"); + + starpu_shutdown(); + + return EXIT_SUCCESS; +} diff --git a/tests/datawizard/noreclaim.c b/tests/datawizard/noreclaim.c new file mode 100644 index 0000000..fa9beaf --- /dev/null +++ b/tests/datawizard/noreclaim.c @@ -0,0 +1,139 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2014-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include "../helper.h" + +/* + * Stress the memory allocation system and force StarPU to reclaim memory from + * time to time. + */ + +#if !defined(STARPU_HAVE_SETENV) +#warning setenv is not defined. Skipping test +int main(void) +{ + return STARPU_TEST_SKIPPED; +} +#else + +void dummy_func(void *descr[], void *_args) +{ + (void)descr; + (void)_args; +} + +static struct starpu_codelet dummy_cl = +{ + .cpu_funcs = {dummy_func}, + .cpu_funcs_name = {"dummy_func"}, + .nbuffers = 1, + .modes = {STARPU_W} +}; + +static void emit_task(starpu_data_handle_t handle) +{ + struct starpu_task *task = starpu_task_create(); + int ret; + task->cl = &dummy_cl; + task->handles[0] = handle; + ret = starpu_task_submit(task); + STARPU_ASSERT(ret == 0); +} + +static struct starpu_codelet empty_cl = +{ + .cpu_funcs = {dummy_func}, + .cpu_funcs_name = {"dummy_func"}, + .nbuffers = 0, +}; + +static void emit_empty_task(void) +{ + struct starpu_task *task = starpu_task_create(); + int ret; + task->cl = &empty_cl; + ret = starpu_task_submit(task); + STARPU_ASSERT(ret == 0); +} + +#define TOTAL "100" +#define FILL (99*1024*1024) + +int main(int argc, char **argv) +{ + int ret; + struct starpu_conf conf; + starpu_data_handle_t handle; + void *allocated; + + setenv("STARPU_LIMIT_CPU_NUMA_MEM", TOTAL, 1); + + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + conf.ncpus = 1; + + ret = starpu_initialize(&conf, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + if (starpu_cpu_worker_get_count() == 0) + { + FPRINTF(stderr, "We need at least 1 CPU worker.\n"); + starpu_shutdown(); + return STARPU_TEST_SKIPPED; + } + + starpu_variable_data_register(&handle, -1, 0, FILL); + + /* This makes the data allocated */ + emit_task(handle); + ret = starpu_task_wait_for_all(); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); + + ret = starpu_malloc_flags(&allocated, FILL, STARPU_MALLOC_COUNT); + /* Room should be busy due to data */ + STARPU_ASSERT(ret == -ENOMEM); + + ret = starpu_malloc_flags(&allocated, FILL, STARPU_MALLOC_COUNT|STARPU_MALLOC_NORECLAIM); + /* But we should be able to tell we don't care */ + STARPU_ASSERT(ret == 0); + ((char*)allocated)[FILL-1] = 0; + starpu_free_flags(allocated, FILL, STARPU_MALLOC_COUNT); + + /* Release the automatically allocated data */ + starpu_data_unregister(handle); + + /* Memory may not be available immediately, make sure the driver has + * the opportunity to release it */ + emit_empty_task(); + ret = starpu_task_wait_for_all(); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); + emit_empty_task(); + ret = starpu_task_wait_for_all(); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); + + ret = starpu_malloc_flags(&allocated, FILL, STARPU_MALLOC_COUNT); + /* Room should now be available */ + STARPU_ASSERT(ret == 0); + starpu_free_flags(allocated, FILL, STARPU_MALLOC_COUNT); + + starpu_shutdown(); + + return EXIT_SUCCESS; +} +#endif diff --git a/tests/datawizard/nowhere.c b/tests/datawizard/nowhere.c new file mode 100644 index 0000000..412cff7 --- /dev/null +++ b/tests/datawizard/nowhere.c @@ -0,0 +1,145 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include +#include "../helper.h" + +/* + * Try the NOWHERE flag + */ + +static int x, y; + +static void prod(void *descr[], void *arg) +{ + (void)arg; + int *v = (int *)STARPU_VARIABLE_GET_PTR(descr[0]); + + *v = 1; +} + +static struct starpu_codelet cl_prod = +{ + .cpu_funcs = { prod }, + .nbuffers = 1, + .modes = { STARPU_W }, +}; + +static void callback0(void *callback_arg) +{ + (void)callback_arg; + STARPU_ASSERT(x==0); + STARPU_ASSERT(y==0); +} + +static void callback(void *callback_arg) +{ + (void)callback_arg; + STARPU_ASSERT(x>=1); + STARPU_ASSERT(y>=1); +} + +static struct starpu_codelet cl_nowhere = +{ + .where = STARPU_NOWHERE, + .nbuffers = 2, + .modes = { STARPU_R, STARPU_R }, +}; + +static void cons(void *descr[], void *_args) +{ + (void)_args; + + int *v = (int *)STARPU_VARIABLE_GET_PTR(descr[0]); + + STARPU_ASSERT(*v == 1); + *v = 2; +} + +static struct starpu_codelet cl_cons = +{ + .cpu_funcs = { cons }, + .nbuffers = 1, + .modes = { STARPU_RW }, +}; + +int main(int argc, char **argv) +{ + starpu_data_handle_t handle_x, handle_y; + int ret; + + ret = starpu_initialize(NULL, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + if (starpu_memory_nodes_get_numa_count() > 1) + { + /* FIXME: assumes only one RAM node */ + starpu_shutdown(); + return STARPU_TEST_SKIPPED; + } + + starpu_variable_data_register(&handle_x, STARPU_MAIN_RAM, (uintptr_t)&x, sizeof(x)); + starpu_variable_data_register(&handle_y, STARPU_MAIN_RAM, (uintptr_t)&y, sizeof(y)); + + ret = starpu_task_insert(&cl_nowhere, STARPU_R, handle_x, STARPU_R, handle_y, STARPU_CALLBACK, callback0, 0); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + ret = starpu_task_insert(&cl_prod, STARPU_W, handle_x, 0); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + ret = starpu_task_insert(&cl_prod, STARPU_W, handle_y, 0); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + ret = starpu_task_insert(&cl_nowhere, STARPU_R, handle_x, STARPU_R, handle_y, STARPU_CALLBACK, callback, 0); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + ret = starpu_task_insert(&cl_cons, STARPU_RW, handle_x, 0); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + ret = starpu_task_insert(&cl_cons, STARPU_RW, handle_y, 0); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + ret = starpu_task_wait_for_all(); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); + + starpu_data_unregister(handle_x); + starpu_data_unregister(handle_y); + + starpu_shutdown(); + + return EXIT_SUCCESS; + +enodev: + starpu_data_unregister(handle_x); + starpu_data_unregister(handle_y); + + fprintf(stderr, "WARNING: No one can execute this task\n"); + /* yes, we do not perform the computation but we did detect that no one + * could perform the kernel, so this is not an error from StarPU */ + starpu_shutdown(); + return STARPU_TEST_SKIPPED; +} diff --git a/tests/datawizard/numa_overflow.c b/tests/datawizard/numa_overflow.c new file mode 100644 index 0000000..1c4c362 --- /dev/null +++ b/tests/datawizard/numa_overflow.c @@ -0,0 +1,142 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include +#include "../helper.h" + +#if !defined(STARPU_HAVE_SETENV) +#warning setenv is not defined. Skipping test +int main(void) +{ + return STARPU_TEST_SKIPPED; +} +#else + +#ifdef STARPU_SANITIZE_ADDRESS +#define ITER 2 +#define N 4 +#else +#define ITER 10 +#define N 10 +#endif +#define SIZE (10*1024*1024) + +/* + * Check that when overflowing a NUMA node we manage to revert to other nodes. + */ + +static void nop(void *descr[], void *arg) +{ + (void)descr; + (void)arg; +} + +static struct starpu_codelet cl_r = +{ + .cpu_funcs = { nop }, + .nbuffers = 1, + .modes = { STARPU_R }, +}; + +static struct starpu_codelet cl_rw = +{ + .cpu_funcs = { nop }, + .nbuffers = 1, + .modes = { STARPU_RW }, +}; + +int main(int argc, char **argv) +{ + starpu_data_handle_t handles[N]; + uintptr_t data[N]; + int ret; + unsigned i, j; + char s[16]; + int worker; + struct starpu_conf conf; + + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + conf.ncpus = -1; + conf.nmpi_ms = -1; + conf.ntcpip_ms = -1; + + snprintf(s, sizeof(s), "%u", (N*3/4)*SIZE/(1024*1024)); + /* We make NUMA nodes not big enough for all data */ + setenv("STARPU_LIMIT_CPU_NUMA_MEM", s, 1); + + ret = starpu_initialize(&conf, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + if (starpu_worker_get_ids_by_type(STARPU_CPU_WORKER, &worker, 1) == 0 + || starpu_memory_nodes_get_numa_count() <= 1) + { + /* We need several NUMA nodes */ + starpu_shutdown(); + return STARPU_TEST_SKIPPED; + } + + /* We distribute some data on both NUMA nodes */ + for (i = 0; i < N; i++) + { + data[i] = starpu_malloc_on_node(i%2, SIZE); + memset((void*) data[i], 0, SIZE); + starpu_variable_data_register(&handles[i], i%2, data[i], SIZE); + } + + /* And now we try to execute all tasks on worker 0, that will fail if + * StarPU doesn't manage to evict some memory */ + for (j = 0; j < ITER; j++) + for (i = 0; i < N; i++) + { + if (rand() % 2 == 0) + ret = starpu_task_insert(&cl_r, STARPU_R, handles[i], STARPU_EXECUTE_ON_WORKER, worker, 0); + else + ret = starpu_task_insert(&cl_rw, STARPU_RW, handles[i], STARPU_EXECUTE_ON_WORKER, worker, 0); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + for (i = 0; i < N; i++) + { + starpu_data_unregister(handles[i]); + starpu_free_on_node(i%2, data[i], SIZE); + } + + starpu_shutdown(); + + return EXIT_SUCCESS; + +enodev: + for (i = 0; i < N; i++) + { + starpu_data_unregister(handles[i]); + starpu_free_on_node(i%2, data[i], SIZE); + } + + fprintf(stderr, "WARNING: No one can execute this task\n"); + /* yes, we do not perform the computation but we did detect that no one + * could perform the kernel, so this is not an error from StarPU */ + starpu_shutdown(); + return STARPU_TEST_SKIPPED; +} + +#endif diff --git a/tests/datawizard/partition_dep.c b/tests/datawizard/partition_dep.c new file mode 100644 index 0000000..30a27b0 --- /dev/null +++ b/tests/datawizard/partition_dep.c @@ -0,0 +1,109 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../helper.h" +#include "scal.h" + +/* + * Test scaling a partitioned vector + */ + +int main(int argc, char **argv) +{ + unsigned *foo; + starpu_data_handle_t handle; + int ret; + unsigned n, i, size; + + ret = starpu_initialize(NULL, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + +#ifdef STARPU_USE_OPENCL + ret = starpu_opencl_load_opencl_from_file("tests/datawizard/scal_opencl.cl", &opencl_program, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); +#endif + + n = starpu_worker_get_count(); + if (n == 1) + { + starpu_shutdown(); + return STARPU_TEST_SKIPPED; + } + + size = 10 * n; + + foo = (unsigned *) calloc(size, sizeof(*foo)); + for (i = 0; i < size; i++) + foo[i] = i; + + starpu_vector_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)foo, size, sizeof(*foo)); + + ret = starpu_task_insert(&scal_codelet, STARPU_RW, handle, 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + + struct starpu_data_filter f = + { + .filter_func = starpu_vector_filter_block, + .nchildren = n, + }; + + starpu_data_partition(handle, &f); + + for (i = 0; i < f.nchildren; i++) + { + struct starpu_task *task = starpu_task_create(); + + task->handles[0] = starpu_data_get_sub_data(handle, 1, i); + task->cl = &scal_codelet; + task->execute_on_a_specific_worker = 1; + task->workerid = i; + + ret = starpu_task_submit(task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + ret = starpu_task_wait_for_all(); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); + + starpu_data_unpartition(handle, STARPU_MAIN_RAM); + starpu_data_unregister(handle); + starpu_shutdown(); + + ret = EXIT_SUCCESS; + for (i = 0; i < size; i++) + { + if (foo[i] != i*2*2) + { + FPRINTF(stderr,"value %u is %u instead of %u\n", i, foo[i], 2*i); + ret = EXIT_FAILURE; + } + } + free(foo); + + return ret; + +enodev: + starpu_data_unpartition(handle, STARPU_MAIN_RAM); + starpu_data_unregister(handle); + fprintf(stderr, "WARNING: No one can execute this task\n"); + /* yes, we do not perform the computation but we did detect that no one + * could perform the kernel, so this is not an error from StarPU */ + starpu_shutdown(); + return STARPU_TEST_SKIPPED; +} diff --git a/tests/datawizard/partition_init.c b/tests/datawizard/partition_init.c new file mode 100644 index 0000000..2896c63 --- /dev/null +++ b/tests/datawizard/partition_init.c @@ -0,0 +1,110 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include "../helper.h" + +void my_func(void *buffers[], void *cl_arg) +{ + (void)cl_arg; + unsigned nb = STARPU_VECTOR_GET_NX(buffers[0]); + int *v = (int *)STARPU_VECTOR_GET_PTR(buffers[0]); + + unsigned i; + for(i=0 ; i +#include "../helper.h" +#include "scal.h" + +/* + * Test partitioning an uninitialized vector + */ + +struct starpu_codelet mycodelet = +{ + .cpu_funcs = { scal_func_cpu }, +#ifdef STARPU_USE_OPENCL + .opencl_funcs = { scal_func_opencl }, + .opencl_flags = {STARPU_OPENCL_ASYNC}, +#endif +#ifdef STARPU_USE_CUDA + .cuda_funcs = { scal_func_cuda }, + .cuda_flags = {STARPU_CUDA_ASYNC}, +#endif + .cpu_funcs_name = {"scal_func_cpu"}, + .modes = { STARPU_W }, + .model = NULL, + .nbuffers = 1 +}; + +int main(int argc, char **argv) +{ + unsigned *foo; + starpu_data_handle_t handle; + int ret; + int n, size; + unsigned i; + + ret = starpu_initialize(NULL, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + +#ifdef STARPU_USE_OPENCL + ret = starpu_opencl_load_opencl_from_file("tests/datawizard/scal_opencl.cl", &opencl_program, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); +#endif + + n = starpu_worker_get_count(); + size = 10 * n; + + starpu_vector_data_register(&handle, -1, (uintptr_t)NULL, size, sizeof(*foo)); + + struct starpu_data_filter f = + { + .filter_func = starpu_vector_filter_block, + .nchildren = n > 1 ? n : 2, + }; + + starpu_data_partition(handle, &f); + + for (i = 0; i < f.nchildren; i++) + { + ret = starpu_task_insert(&mycodelet, + STARPU_W, + starpu_data_get_sub_data(handle, 1, i), + 0); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + ret = starpu_task_wait_for_all(); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); + + starpu_data_unpartition(handle, STARPU_MAIN_RAM); + starpu_data_unregister(handle); + starpu_shutdown(); + + return 0; + +enodev: + for (i = 0; i < f.nchildren; i++) + { + starpu_data_handle_t h = starpu_data_get_sub_data(handle, 1, i); + starpu_data_acquire(h, STARPU_W); + starpu_data_release(h); + } + starpu_data_unpartition(handle, STARPU_MAIN_RAM); + starpu_data_unregister(handle); + fprintf(stderr, "WARNING: No one can execute this task\n"); + /* yes, we do not perform the computation but we did detect that no one + * could perform the kernel, so this is not an error from StarPU */ + starpu_shutdown(); + return STARPU_TEST_SKIPPED; +} diff --git a/tests/datawizard/partition_wontuse.c b/tests/datawizard/partition_wontuse.c new file mode 100644 index 0000000..ae762f0 --- /dev/null +++ b/tests/datawizard/partition_wontuse.c @@ -0,0 +1,46 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include "../helper.h" + +int main(int argc, char **argv) +{ + int ret = starpu_initialize(NULL, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + struct starpu_data_filter f = + { + .filter_func = starpu_vector_filter_block, + .nchildren = 2 + }; + + int v[10]; + memset(v, 0, 10*sizeof(int)); + starpu_data_handle_t array_handle; + starpu_vector_data_register(&array_handle, STARPU_MAIN_RAM, (uintptr_t)&v, 10, sizeof(int)); + + starpu_data_partition(array_handle, &f); + starpu_data_wont_use(array_handle); + starpu_data_unpartition(array_handle, STARPU_MAIN_RAM); + + starpu_data_unregister(array_handle); + starpu_shutdown(); + + return 0; +} diff --git a/tests/datawizard/partitioned_acquire.c b/tests/datawizard/partitioned_acquire.c new file mode 100644 index 0000000..8c546c9 --- /dev/null +++ b/tests/datawizard/partitioned_acquire.c @@ -0,0 +1,121 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../helper.h" + +#define SIZE (1<<20) +#define NPARTS 16 + +/* + * Test asynchronous partitioning on a temporary data. + */ + +static void codelet(void *descr[], void *_args) +{ + (void)descr; + (void)_args; +} + +static struct starpu_codelet clw = +{ + .where = STARPU_CPU, + .cpu_funcs = {codelet}, + .nbuffers = 1, + .modes = {STARPU_W} +}; + +static struct starpu_codelet clr = +{ + .where = STARPU_CPU, + .cpu_funcs = {codelet}, + .nbuffers = 1, + .modes = {STARPU_R} +}; + +int main(void) +{ + int ret; + starpu_data_handle_t handle, handles[NPARTS]; + int i; + char d[SIZE]; + struct starpu_conf conf; + + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + conf.ncpus = -1; + conf.nmpi_ms = -1; + conf.ntcpip_ms = -1; + + ret = starpu_init(&conf); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + memset(d, 0, SIZE*sizeof(char)); + starpu_vector_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t) &d, SIZE, sizeof(char)); + + /* Fork */ + struct starpu_data_filter f = + { + .filter_func = starpu_vector_filter_block, + .nchildren = NPARTS + }; + starpu_data_partition_plan(handle, &f, handles); + + /* Read in parallel */ + for (i = 0; i < NPARTS; i++) + { + starpu_data_acquire(handles[i], STARPU_R); + } + + /* Release in parallel */ + for (i = 0; i < NPARTS; i++) + { + starpu_data_release(handles[i]); + } + + starpu_data_invalidate(handle); + + /* Acquire in parallel */ + for (i = 0; i < NPARTS; i++) + { + starpu_data_acquire(handles[i], STARPU_W); + } + + /* Release in parallel */ + for (i = 0; i < NPARTS; i++) + { + starpu_data_release(handles[i]); + } + + starpu_data_acquire(handle, STARPU_R); + starpu_data_release(handle); + + /* Read result */ + ret = starpu_task_insert(&clr, STARPU_R, handle, 0); + if (ret != -ENODEV) + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + /* otherwise let's say nevermind */ + + /* Clean */ + starpu_data_partition_clean(handle, NPARTS, handles); + + starpu_data_unregister(handle); + + starpu_shutdown(); + + return 0; +} diff --git a/tests/datawizard/partitioned_initialization.c b/tests/datawizard/partitioned_initialization.c new file mode 100644 index 0000000..f0d1afa --- /dev/null +++ b/tests/datawizard/partitioned_initialization.c @@ -0,0 +1,113 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../helper.h" + +#define SIZE (1<<20) +#define NPARTS 16 + +/* + * Test asynchronous partitioning on a temporary data. + */ + +static void codelet(void *descr[], void *_args) +{ + (void)descr; + (void)_args; +} + +static struct starpu_codelet clw = +{ + .where = STARPU_CPU, + .cpu_funcs = {codelet}, + .nbuffers = 1, + .modes = {STARPU_W} +}; + +static struct starpu_codelet clr = +{ + .where = STARPU_CPU, + .cpu_funcs = {codelet}, + .nbuffers = 1, + .modes = {STARPU_R} +}; + +int main(void) +{ + int ret; + starpu_data_handle_t handle, handles[NPARTS]; + int i; + char d[SIZE]; + struct starpu_conf conf; + + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + conf.ncpus = -1; + conf.nmpi_ms = -1; + conf.ntcpip_ms = -1; + + ret = starpu_init(&conf); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + memset(d, 0, SIZE*sizeof(char)); + starpu_vector_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t) &d, SIZE, sizeof(char)); + starpu_data_invalidate(handle); + + /* Fork */ + struct starpu_data_filter f = + { + .filter_func = starpu_vector_filter_block, + .nchildren = NPARTS + }; + starpu_data_partition_plan(handle, &f, handles); + + /* Process in parallel */ + for (i = 0; i < NPARTS; i++) + { + ret = starpu_task_insert(&clw, + STARPU_W, handles[i], + 0); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + } + + starpu_data_acquire(handle, STARPU_R); + starpu_data_release(handle); + + /* Read result */ + ret = starpu_task_insert(&clr, STARPU_R, handle, 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + + /* Clean */ + starpu_data_partition_clean(handle, NPARTS, handles); + + starpu_data_unregister(handle); + + starpu_shutdown(); + + return 0; + +enodev: + starpu_data_partition_clean(handle, NPARTS, handles); + starpu_data_unregister(handle); + starpu_shutdown(); + /* yes, we do not perform the computation but we did detect that no one + * could perform the kernel, so this is not an error from StarPU */ + fprintf(stderr, "WARNING: No one can execute this task\n"); + return STARPU_TEST_SKIPPED; +} diff --git a/tests/datawizard/readers_and_writers.c b/tests/datawizard/readers_and_writers.c new file mode 100644 index 0000000..d4515fe --- /dev/null +++ b/tests/datawizard/readers_and_writers.c @@ -0,0 +1,105 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../helper.h" + +/* + * Try mixing readers and writers on the same variable + */ + +static unsigned book = 0; +static starpu_data_handle_t book_handle; + +void dummy_kernel(void *descr[], void *arg) +{ + (void)descr; + (void)arg; +} + +static struct starpu_codelet r_cl = +{ + .cuda_funcs = {dummy_kernel}, + .cpu_funcs = {dummy_kernel}, + .opencl_funcs = {dummy_kernel}, + .cpu_funcs_name = {"dummy_kernel"}, + .nbuffers = 1, + .modes = {STARPU_R} +}; + +static struct starpu_codelet w_cl = +{ + .cuda_funcs = {dummy_kernel}, + .cpu_funcs = {dummy_kernel}, + .opencl_funcs = {dummy_kernel}, + .cpu_funcs_name = {"dummy_kernel"}, + .nbuffers = 1, + .modes = {STARPU_W} +}; + +int main(int argc, char **argv) +{ + int ret; + + ret = starpu_initialize(NULL, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + /* initialize the resource */ + starpu_vector_data_register(&book_handle, STARPU_MAIN_RAM, (uintptr_t)&book, 1, sizeof(unsigned)); + +#ifdef STARPU_QUICK_CHECK + unsigned ntasks = 16; +#else + unsigned ntasks = 16*1024; +#endif + + unsigned t; + for (t = 0; t < ntasks; t++) + { + struct starpu_task *task = starpu_task_create(); + + task->handles[0] = book_handle; + + /* we randomly select either a reader or a writer (give 10 + * times more chances to be a reader) */ + enum starpu_data_access_mode mode = ((rand() % 10)==0)?STARPU_W:STARPU_R; + if (mode == STARPU_W) + task->cl = &w_cl; + else + task->cl = &r_cl; + + ret = starpu_task_submit(task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + ret = starpu_task_wait_for_all(); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); + + starpu_data_unregister(book_handle); + starpu_shutdown(); + + return EXIT_SUCCESS; + +enodev: + starpu_data_unregister(book_handle); + fprintf(stderr, "WARNING: No one can execute this task\n"); + /* yes, we do not perform the computation but we did detect that no one + * could perform the kernel, so this is not an error from StarPU */ + starpu_shutdown(); + return STARPU_TEST_SKIPPED; +} diff --git a/tests/datawizard/readonly.c b/tests/datawizard/readonly.c new file mode 100644 index 0000000..423b58c --- /dev/null +++ b/tests/datawizard/readonly.c @@ -0,0 +1,77 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../helper.h" + +/* + * Try accessing a variable in read-only mode + */ + +#ifdef STARPU_USE_OPENCL +static void codelet(void *descr[], void *_args) +{ + (void)descr; + (void)_args; + FPRINTF(stderr, "codelet\n"); +} +#endif + +static struct starpu_codelet cl = +{ +#ifdef STARPU_USE_OPENCL + .opencl_funcs = {codelet}, +#endif + .nbuffers = 1, + .modes = {STARPU_R} +}; + +int main(void) +{ + int ret; + int var = 42; + starpu_data_handle_t handle; + + ret = starpu_init(NULL); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + int copy = starpu_asynchronous_copy_disabled(); + FPRINTF(stderr, "copy %d\n", copy); + + starpu_variable_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)&var, sizeof(var)); + + ret = starpu_task_insert(&cl, + STARPU_R, handle, + 0); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + + starpu_task_wait_for_all(); + + starpu_data_unregister(handle); + + starpu_shutdown(); + + return 0; + +enodev: + starpu_data_unregister(handle); + starpu_shutdown(); + /* yes, we do not perform the computation but we did detect that no one + * could perform the kernel, so this is not an error from StarPU */ + return STARPU_TEST_SKIPPED; +} diff --git a/tests/datawizard/reclaim.c b/tests/datawizard/reclaim.c new file mode 100644 index 0000000..a29bf73 --- /dev/null +++ b/tests/datawizard/reclaim.c @@ -0,0 +1,197 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#ifdef STARPU_HAVE_HWLOC +#include +#endif +#include "../helper.h" + +#if !defined(STARPU_HAVE_SETENV) +#warning setenv is not defined. Skipping test +int main(void) +{ + return STARPU_TEST_SKIPPED; +} +#else + +/* + * Stress the memory allocation system and force StarPU to reclaim memory from + * time to time. + */ + +#ifdef STARPU_QUICK_CHECK +# define BLOCK_SIZE (64*1024) +static unsigned ntasks = 250; +#else +# define BLOCK_SIZE (64*1024*1024) +static unsigned ntasks = 1000; +#endif + + +#ifdef STARPU_HAVE_HWLOC +static uint64_t get_total_memory_size(void) +{ + uint64_t size; + hwloc_topology_t hwtopology; + int err; + + err = hwloc_topology_init(&hwtopology); + STARPU_ASSERT_MSG(err == 0, "Could not initialize Hwloc topology (%s)\n", strerror(errno)); + err = hwloc_topology_load(hwtopology); + STARPU_ASSERT_MSG(err == 0, "Could not load Hwloc topology (%s)\n", strerror(errno)); + + hwloc_obj_t root = hwloc_get_root_obj(hwtopology); +#if HWLOC_API_VERSION >= 0x00020000 + size = root->total_memory; +#else + size = root->memory.total_memory; +#endif + hwloc_topology_destroy(hwtopology); + return size; +} +#endif + +void dummy_func(void *descr[], void *_args) +{ +} + +static unsigned int i = 0; +void func(void *arg) +{ + printf("%u\n", ++i); +} + +static struct starpu_codelet dummy_cl = +{ + .cpu_funcs = {dummy_func}, + .cuda_funcs = {dummy_func}, + .opencl_funcs = {dummy_func}, + .cpu_funcs_name = {"dummy_func"}, + .nbuffers = 3, + .modes = {STARPU_RW, STARPU_R, STARPU_R} +}; + +/* Number of chunks */ +static unsigned mb = 16; + +int main(int argc, char **argv) +{ + unsigned j, taskid; + int ret; + +#ifdef STARPU_HAVE_HWLOC + /* We allocate 50% of the memory */ + uint64_t total_size = get_total_memory_size(); + + /* On x86_64-freebsd8.2, hwloc 1.3 returns 0 as the total memory + * size, so sanity-check what we have. */ + if (total_size > 0) + mb = (int)((0.50 * total_size)/(BLOCK_SIZE)); +#endif + + setenv("STARPU_LIMIT_OPENCL_MEM", "1000", 1); + + ret = starpu_initialize(NULL, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + /* An optional argument indicates the number of MB to allocate */ + if (argc > 1) + mb = atoi(argv[1]); + + if (2*mb > ntasks) + ntasks = 2*mb; + +#ifdef STARPU_QUICK_CHECK + mb /= 100; + if (mb == 0) + mb = 1; +#endif + + FPRINTF(stderr, "Allocate %u buffers of size %d and create %u tasks\n", mb, BLOCK_SIZE, ntasks); + + float **host_ptr_array; + starpu_data_handle_t *handle_array; + + host_ptr_array = calloc(mb, sizeof(float *)); + STARPU_ASSERT(host_ptr_array); + handle_array = calloc(mb, sizeof(starpu_data_handle_t)); + STARPU_ASSERT(handle_array); + + /* Register mb buffers of 1MB */ + for (j = 0; j < mb; j++) + { + size_t size = starpu_lrand48()%BLOCK_SIZE + 1; + host_ptr_array[j] = calloc(size, 1); + if (host_ptr_array[j] == NULL) + { + mb = j; + FPRINTF(stderr, "Cannot allocate more than %u buffers\n", mb); + break; + } + starpu_variable_data_register(&handle_array[j], STARPU_MAIN_RAM, (uintptr_t)host_ptr_array[j], size); + STARPU_ASSERT(handle_array[j]); + } + + for (taskid = 0; taskid < ntasks; taskid++) + { + struct starpu_task *task = starpu_task_create(); + task->cl = &dummy_cl; + task->handles[0] = handle_array[taskid%mb]; + task->handles[1] = handle_array[(taskid+1)%mb]; + task->handles[2] = handle_array[(taskid+2)%mb]; + task->callback_func = func; + task->callback_arg = NULL; + + ret = starpu_task_submit(task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + for (j = 0; j < mb; j++) + { + if (j%20 == 0) + starpu_data_unregister_submit(handle_array[j]); + } + + ret = starpu_task_wait_for_all(); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); + + for (j = 0; j < mb; j++) + { + if (j%20 != 0) + starpu_data_unregister(handle_array[j]); + free(host_ptr_array[j]); + } + + free(host_ptr_array); + free(handle_array); + + starpu_shutdown(); + + return EXIT_SUCCESS; + +enodev: + fprintf(stderr, "WARNING: No one can execute this task\n"); + /* yes, we do not perform the computation but we did detect that no one + * could perform the kernel, so this is not an error from StarPU */ + starpu_shutdown(); + return STARPU_TEST_SKIPPED; +} + +#endif diff --git a/tests/datawizard/redux_acquire.c b/tests/datawizard/redux_acquire.c new file mode 100644 index 0000000..7c1d69b --- /dev/null +++ b/tests/datawizard/redux_acquire.c @@ -0,0 +1,99 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2017-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include "helper.h" + +void init_cpu_func(void *descr[], void *cl_arg) +{ + (void)cl_arg; + long int *dot = (long int *)STARPU_VARIABLE_GET_PTR(descr[0]); + *dot = 42; +} + +void redux_cpu_func(void *descr[], void *cl_arg) +{ + (void)cl_arg; + long int *dota = (long int *)STARPU_VARIABLE_GET_PTR(descr[0]); + long int *dotb = (long int *)STARPU_VARIABLE_GET_PTR(descr[1]); + + *dota = *dota + *dotb; +} + +static struct starpu_codelet init_codelet = +{ + .cpu_funcs = {init_cpu_func}, + .nbuffers = 1, + .modes = {STARPU_W}, + .name = "init_codelet" +}; + +static struct starpu_codelet redux_codelet = +{ + .cpu_funcs = {redux_cpu_func}, + .modes = {STARPU_RW|STARPU_COMMUTE, STARPU_R}, + .nbuffers = 2, + .name = "redux_codelet" +}; + +static void check_dot(void *dot_handle) +{ + long int *x = starpu_data_get_local_ptr(dot_handle); + STARPU_ASSERT_MSG(*x == 42, "Incorrect value %ld", *x); + starpu_data_release(dot_handle); +} + +int main(void) +{ + starpu_data_handle_t dot_handle; + struct starpu_conf conf; + + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + conf.ncpus = -1; + conf.nmpi_ms = -1; + conf.ntcpip_ms = -1; + + int ret = starpu_init(&conf); + if (ret == -ENODEV) + goto skip; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + if (starpu_cpu_worker_get_count() == 0) + goto enodev; + + starpu_variable_data_register(&dot_handle, -1, (uintptr_t)NULL, sizeof(long int)); + starpu_data_set_reduction_methods(dot_handle, &redux_codelet, &init_codelet); + starpu_data_acquire(dot_handle, STARPU_R); + long int *x = starpu_data_get_local_ptr(dot_handle); + STARPU_ASSERT_MSG(*x == 42, "Incorrect value %ld", *x); + starpu_data_release(dot_handle); + starpu_data_unregister(dot_handle); + + starpu_variable_data_register(&dot_handle, -1, (uintptr_t)NULL, sizeof(long int)); + starpu_data_set_reduction_methods(dot_handle, &redux_codelet, &init_codelet); + starpu_data_acquire_cb(dot_handle, STARPU_R, check_dot, dot_handle); + starpu_data_unregister(dot_handle); + + starpu_shutdown(); + return 0; + +enodev: + starpu_shutdown(); +skip: + return STARPU_TEST_SKIPPED; +} diff --git a/tests/datawizard/scal.c b/tests/datawizard/scal.c new file mode 100644 index 0000000..49a87b1 --- /dev/null +++ b/tests/datawizard/scal.c @@ -0,0 +1,102 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "scal.h" +#include "helper.h" + +/* + * Implement a kernel that just multiplies a vector by 2 + */ + +void scal_func_cpu(void *buffers[], void *cl_arg) +{ + (void)cl_arg; + unsigned i; + + struct starpu_vector_interface *vector = (struct starpu_vector_interface *) buffers[0]; + unsigned *val = (unsigned *) STARPU_VECTOR_GET_PTR(vector); + unsigned n = STARPU_VECTOR_GET_NX(vector); + + /* scale the vector */ + for (i = 0; i < n; i++) + val[i] *= 2; +} + +#ifdef STARPU_USE_OPENCL +struct starpu_opencl_program opencl_program; + +void scal_func_opencl(void *buffers[], void *cl_arg) +{ + (void)cl_arg; + int id, devid; + cl_int err; + cl_kernel kernel; + cl_command_queue queue; + + unsigned n = STARPU_VECTOR_GET_NX(buffers[0]); + cl_mem val = (cl_mem)STARPU_VECTOR_GET_DEV_HANDLE(buffers[0]); + unsigned offset = STARPU_VECTOR_GET_OFFSET(buffers[0]); + + id = starpu_worker_get_id_check(); + devid = starpu_worker_get_devid(id); + + err = starpu_opencl_load_kernel(&kernel, &queue, &opencl_program, "vector_mult_opencl", devid); + if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); + + err = clSetKernelArg(kernel, 0, sizeof(val), &val); + err |= clSetKernelArg(kernel, 1, sizeof(offset), &offset); + err |= clSetKernelArg(kernel, 2, sizeof(n), &n); + if (err) STARPU_OPENCL_REPORT_ERROR(err); + + { + size_t global=n; + size_t local; + size_t s; + cl_device_id device; + + starpu_opencl_get_device(devid, &device); + + err = clGetKernelWorkGroupInfo (kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, &s); + if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); + if (local > global) local=global; + else global = (global + local-1) / local * local; + + err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, NULL); + if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); + } + starpu_opencl_release_kernel(kernel); +} +#endif + +struct starpu_codelet scal_codelet = +{ + + .cpu_funcs = { scal_func_cpu }, +#ifdef STARPU_USE_OPENCL + .opencl_funcs = { scal_func_opencl }, + .opencl_flags = {STARPU_OPENCL_ASYNC}, +#endif +#ifdef STARPU_USE_CUDA + .cuda_funcs = { scal_func_cuda }, + .cuda_flags = {STARPU_CUDA_ASYNC}, +#endif + .cpu_funcs_name = {"scal_func_cpu"}, + .modes = { STARPU_RW }, + .model = NULL, + .nbuffers = 1 +}; + diff --git a/tests/datawizard/scal.h b/tests/datawizard/scal.h new file mode 100644 index 0000000..998ed17 --- /dev/null +++ b/tests/datawizard/scal.h @@ -0,0 +1,31 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +extern struct starpu_codelet scal_codelet; +extern struct starpu_opencl_program opencl_program; + +void scal_func_cpu(void *buffers[], void *cl_arg); + +#ifdef STARPU_USE_CUDA +void scal_func_cuda(void *buffers[], void *cl_arg); +#endif + +#ifdef STARPU_USE_OPENCL +void scal_func_opencl(void *buffers[], void *_args); +#endif + diff --git a/tests/datawizard/scal_cuda.cu b/tests/datawizard/scal_cuda.cu new file mode 100644 index 0000000..3e2b6dd --- /dev/null +++ b/tests/datawizard/scal_cuda.cu @@ -0,0 +1,37 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +static __global__ void vector_mult_cuda(unsigned *val, unsigned n) +{ + unsigned i = blockIdx.x*blockDim.x + threadIdx.x; + + if (i < n) + val[i] *= 2; +} + +extern "C" void scal_func_cuda(void *buffers[], void *_args) +{ + unsigned n = STARPU_VECTOR_GET_NX(buffers[0]); + unsigned *val = (unsigned *)STARPU_VECTOR_GET_PTR(buffers[0]); + unsigned threads_per_block = 64; + unsigned nblocks = (n + threads_per_block-1) / threads_per_block; + + vector_mult_cuda<<>>(val, n); + cudaError_t status = cudaGetLastError(); + if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status); +} diff --git a/tests/datawizard/scal_opencl.cl b/tests/datawizard/scal_opencl.cl new file mode 100644 index 0000000..1892983 --- /dev/null +++ b/tests/datawizard/scal_opencl.cl @@ -0,0 +1,25 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +__kernel void vector_mult_opencl(__global unsigned* val, unsigned offset, unsigned nx) +{ + const int i = get_global_id(0); + val = (__global char*) val + offset; + if (i < nx) + { + val[i] *= 2; + } +} diff --git a/tests/datawizard/scratch.c b/tests/datawizard/scratch.c new file mode 100644 index 0000000..ef31b09 --- /dev/null +++ b/tests/datawizard/scratch.c @@ -0,0 +1,158 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include +#include "../helper.h" + +/* + * Test using a scratch data, using it just for temporary storage + */ + +#ifdef STARPU_QUICK_CHECK +# define NLOOPS 8 +# define VECTORSIZE 128 +#else +# define NLOOPS 128 +# define VECTORSIZE 1024 +#endif + +static unsigned *A; +starpu_data_handle_t A_handle, B_handle; + +//static unsigned var = 0; + +#ifdef STARPU_USE_CUDA +extern void cuda_f(void *descr[], void *_args); +#endif +#ifdef STARPU_USE_OPENCL +extern void opencl_f(void *buffers[], void *args); +#endif + +void cpu_f(void *descr[], void *arg) +{ + (void)arg; + STARPU_SKIP_IF_VALGRIND; + + unsigned *v = (unsigned *)STARPU_VECTOR_GET_PTR(descr[0]); + unsigned *tmp = (unsigned *)STARPU_VECTOR_GET_PTR(descr[1]); + + unsigned nx = STARPU_VECTOR_GET_NX(descr[0]); + size_t elemsize = STARPU_VECTOR_GET_ELEMSIZE(descr[0]); + + memcpy(tmp, v, nx*elemsize); + + unsigned i; + for (i = 0; i < nx; i++) + { + v[i] = tmp[i] + 1; + } +} + +static struct starpu_codelet cl_f = +{ + .cpu_funcs = {cpu_f}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {cuda_f}, + .cuda_flags = {STARPU_CUDA_ASYNC}, +#endif +#ifdef STARPU_USE_OPENCL + .opencl_funcs = {opencl_f}, + .opencl_flags = {STARPU_OPENCL_ASYNC}, +#endif + .cpu_funcs_name = {"cpu_f"}, + .nbuffers = 2, + .modes = {STARPU_RW, STARPU_SCRATCH} +}; + +#ifdef STARPU_USE_OPENCL +struct starpu_opencl_program opencl_program; +#endif + +int main(int argc, char **argv) +{ + int ret; + + ret = starpu_initialize(NULL, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + +#ifdef STARPU_USE_OPENCL + ret = starpu_opencl_load_opencl_from_file("tests/datawizard/scratch_opencl_kernel.cl", + &opencl_program, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); +#endif + A = (unsigned *) calloc(VECTORSIZE, sizeof(unsigned)); + + starpu_vector_data_register(&A_handle, STARPU_MAIN_RAM, (uintptr_t)A, VECTORSIZE, sizeof(unsigned)); + starpu_vector_data_register(&B_handle, -1, (uintptr_t)NULL, VECTORSIZE, sizeof(unsigned)); + + unsigned loop; + for (loop = 0; loop < NLOOPS; loop++) + { + struct starpu_task *task_f = starpu_task_create(); + task_f->cl = &cl_f; + task_f->handles[0] = A_handle; + task_f->handles[1] = B_handle; + + ret = starpu_task_submit(task_f); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + ret = starpu_task_wait_for_all(); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); + + starpu_data_unregister(A_handle); + starpu_data_unregister(B_handle); +#ifdef STARPU_USE_OPENCL + ret = starpu_opencl_unload_opencl(&opencl_program); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl"); +#endif + starpu_shutdown(); + + /* Check result */ + unsigned i; + ret = EXIT_SUCCESS; + for (i = 0; i < VECTORSIZE; i++) + { + if (A[i] != NLOOPS) + { + FPRINTF(stderr, "Error: Incorrect value A[%u] = %u != %d\n", i, A[i], NLOOPS); + ret = EXIT_FAILURE; + break; + } + } + + free(A); + STARPU_RETURN(ret); + +enodev: + starpu_data_unregister(A_handle); + starpu_data_unregister(B_handle); +#ifdef STARPU_USE_OPENCL + ret = starpu_opencl_unload_opencl(&opencl_program); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl"); +#endif + starpu_shutdown(); + /* yes, we do not perform the computation but we did detect that no one + * could perform the kernel, so this is not an error from StarPU */ + fprintf(stderr, "WARNING: No one can execute this task\n"); + return STARPU_TEST_SKIPPED; +} diff --git a/tests/datawizard/scratch_cuda.cu b/tests/datawizard/scratch_cuda.cu new file mode 100644 index 0000000..0504fca --- /dev/null +++ b/tests/datawizard/scratch_cuda.cu @@ -0,0 +1,51 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include + +#define MAXNBLOCKS 32 +#define MAXTHREADSPERBLOCK 128 + +static __global__ void increment_vector(unsigned *v, unsigned *tmp, int nx) +{ + const int tid = threadIdx.x + blockIdx.x*blockDim.x; + const int nthreads = gridDim.x * blockDim.x; + + int i; + for (i = tid; i < nx; i += nthreads) + { + v[i] = tmp[i] + 1; + } +} + +extern "C" void cuda_f(void *descr[], void *_args) +{ + unsigned *v = (unsigned *)STARPU_VECTOR_GET_PTR(descr[0]); + unsigned *tmp = (unsigned *)STARPU_VECTOR_GET_PTR(descr[1]); + + unsigned nx = STARPU_VECTOR_GET_NX(descr[0]); + size_t elemsize = STARPU_VECTOR_GET_ELEMSIZE(descr[0]); + + cudaMemcpyAsync(tmp, v, nx*elemsize, cudaMemcpyDeviceToDevice, starpu_cuda_get_local_stream()); + + unsigned nblocks = 128; + unsigned nthread_per_block = STARPU_MIN(MAXTHREADSPERBLOCK, (nx / nblocks)); + + increment_vector<<>>(v, tmp, nx); + cudaError_t status = cudaGetLastError(); + if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status); +} diff --git a/tests/datawizard/scratch_opencl.c b/tests/datawizard/scratch_opencl.c new file mode 100644 index 0000000..c7f1a92 --- /dev/null +++ b/tests/datawizard/scratch_opencl.c @@ -0,0 +1,84 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +/* + * Queue an OpenCL kernel for the scratch test. + */ + +extern struct starpu_opencl_program opencl_program; + +void opencl_f(void *buffers[], void *args) +{ + (void) args; + int id, devid; + cl_int err; + cl_kernel kernel; + cl_command_queue queue; + + unsigned n = STARPU_VECTOR_GET_NX(buffers[0]); + unsigned elemsize = STARPU_VECTOR_GET_ELEMSIZE(buffers[0]); + cl_mem val = (cl_mem) STARPU_VECTOR_GET_DEV_HANDLE(buffers[0]); + cl_mem tmp = (cl_mem) STARPU_VECTOR_GET_DEV_HANDLE(buffers[1]); + + id = starpu_worker_get_id_check(); + devid = starpu_worker_get_devid(id); + + err = starpu_opencl_load_kernel(&kernel, &queue, &opencl_program, "increment_vector_opencl", devid); + if (err != CL_SUCCESS) + STARPU_OPENCL_REPORT_ERROR(err); + + err = clEnqueueCopyBuffer(queue, + val, + tmp, + 0, /* offset in val */ + 0, /* offset in tmp */ + n * elemsize, + 0, /* num_events_in_wait_list */ + NULL, /* event_wait_list */ + NULL); /* event */ + if (err != CL_SUCCESS) + STARPU_OPENCL_REPORT_ERROR(err); + + err = clSetKernelArg(kernel, 0, sizeof(val), &val); + err|= clSetKernelArg(kernel, 1, sizeof(tmp), &tmp); + err|= clSetKernelArg(kernel, 2, sizeof(n), &n); + if (err) + STARPU_OPENCL_REPORT_ERROR(err); + + { + size_t global=n; + size_t local; + size_t s; + cl_device_id device; + + starpu_opencl_get_device(devid, &device); + + err = clGetKernelWorkGroupInfo (kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, &s); + if (err != CL_SUCCESS) + STARPU_OPENCL_REPORT_ERROR(err); + if (local > global) + local=global; + else + global = (global + local-1) / local * local; + + err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, NULL); + if (err != CL_SUCCESS) + STARPU_OPENCL_REPORT_ERROR(err); + } + starpu_opencl_release_kernel(kernel); +} diff --git a/tests/datawizard/scratch_opencl_kernel.cl b/tests/datawizard/scratch_opencl_kernel.cl new file mode 100644 index 0000000..8623294 --- /dev/null +++ b/tests/datawizard/scratch_opencl_kernel.cl @@ -0,0 +1,25 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +__kernel void increment_vector_opencl(__global unsigned *val, + __global unsigned *tmp, + unsigned nx) +{ + const int id = get_global_id(0); + + if (id < nx) + val[id] = tmp[id] + 1; +} diff --git a/tests/datawizard/scratch_reuse.c b/tests/datawizard/scratch_reuse.c new file mode 100644 index 0000000..9be2d9f --- /dev/null +++ b/tests/datawizard/scratch_reuse.c @@ -0,0 +1,82 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../helper.h" + +#if !defined(STARPU_HAVE_SETENV) || !defined(STARPU_USE_CPU) || !defined(STARPU_HAVE_HWLOC) +#warning setenv is not defined or no cpu are available. Skipping test +int main(void) +{ + return STARPU_TEST_SKIPPED; +} +#else + +#ifdef STARPU_QUICK_CHECK +#define ITER 32 +#else +#define ITER 128 +#endif + +static void kernel(void *buffers[], void *cl_args) +{ + (void)cl_args; + STARPU_ASSERT(STARPU_MATRIX_GET_PTR(buffers[0]) != 0); +} + +static struct starpu_codelet codelet = +{ + .name = "codelet", + .cuda_funcs = { kernel }, + .nbuffers = 1, + .modes = { STARPU_SCRATCH }, +}; + +int main(int argc, char *argv[]) +{ + setenv("STARPU_LIMIT_CUDA_MEM", "50", 1); + + int ret = starpu_initialize(NULL, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + if (starpu_cuda_worker_get_count() == 0) + { + starpu_shutdown(); + return STARPU_TEST_SKIPPED; + } + + starpu_data_handle_t handle[ITER]; + + int i; + for (i = 0; i < ITER; i++) + { + starpu_matrix_data_register(&handle[i], -1, 0, 1024, 1024, 1024, sizeof(float)); + ret = starpu_task_insert(&codelet, STARPU_SCRATCH, handle[i], 0); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + } + + starpu_task_wait_for_all(); + + for (i = 0; i < ITER; i++) + starpu_data_unregister(handle[i]); + + starpu_shutdown(); + + return 0; +} +#endif diff --git a/tests/datawizard/simgrid-locality.c b/tests/datawizard/simgrid-locality.c new file mode 100644 index 0000000..d20f84d --- /dev/null +++ b/tests/datawizard/simgrid-locality.c @@ -0,0 +1,27 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* Check that defining a main makes starpu use MSG_process_attach. */ +#include "locality.c" +#include + +#if defined(SIMGRID_VERSION) && ((defined(HAVE_MSG_PROCESS_ATTACH) || defined(MSG_process_attach)) && SIMGRID_VERSION >= 31500 & SIMGRID_VERSION != 31559) +#undef main +int main(int argc, char *argv[]) +{ + return starpu_main(argc, argv); +} +#endif diff --git a/tests/datawizard/specific_node.c b/tests/datawizard/specific_node.c new file mode 100644 index 0000000..c138bcb --- /dev/null +++ b/tests/datawizard/specific_node.c @@ -0,0 +1,266 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include +#include "../helper.h" +#include "../variable/increment.h" + +/* + * Test using the specific_nodes field by forcing the data to main memory + * even if the task is run on a GPU (and actually doing the computation from + * the CPU driving the GPU). It mixes such accesses and normal accesses from + * the GPU + */ + +unsigned data, data2; + +void specific3_kernel(void *descr[] STARPU_ATTRIBUTE_UNUSED, void *arg STARPU_ATTRIBUTE_UNUSED) +{ + (void)arg; +} + +static struct starpu_codelet specific3_cl = +{ + .cpu_funcs = {specific3_kernel}, + .cuda_funcs = {specific3_kernel}, + .opencl_funcs = {specific3_kernel}, + .hip_funcs = {specific3_kernel}, + .nbuffers = 2, + .modes = {STARPU_RW, STARPU_RW}, + .specific_nodes = 1, + .nodes = {STARPU_SPECIFIC_NODE_NONE, STARPU_SPECIFIC_NODE_NONE}, +}; + +void specific2_kernel(void *descr[], void *arg) +{ + (void)arg; + int node = starpu_task_get_current_data_node(0); + STARPU_ASSERT(node >= 0); + STARPU_ASSERT(starpu_node_get_kind(node) == STARPU_CPU_RAM); + unsigned *dataptr = (unsigned*) STARPU_VARIABLE_GET_PTR(descr[0]); + + if (node == STARPU_MAIN_RAM) + STARPU_ASSERT(dataptr == &data); + + (*dataptr)++; + + node = starpu_task_get_current_data_node(1); + STARPU_ASSERT(node >= 0); + STARPU_ASSERT(starpu_node_get_kind(node) == STARPU_CPU_RAM + || (unsigned) node == starpu_worker_get_local_memory_node()); + dataptr = (unsigned*) STARPU_VARIABLE_GET_PTR(descr[1]); + + if (node == STARPU_MAIN_RAM) + STARPU_ASSERT(dataptr == &data || dataptr == &data2); +} + +static struct starpu_codelet specific2_cl = +{ + .cpu_funcs = {specific2_kernel}, + .cuda_funcs = {specific2_kernel}, + .opencl_funcs = {specific2_kernel}, + .hip_funcs = {specific2_kernel}, + .nbuffers = 2, + .modes = {STARPU_RW, STARPU_RW}, + .specific_nodes = 1, + .nodes = {STARPU_SPECIFIC_NODE_CPU, STARPU_SPECIFIC_NODE_LOCAL_OR_CPU}, +}; + +void specific2_ro_kernel(void *descr[], void *arg) +{ + (void)arg; + int node = starpu_task_get_current_data_node(0); + STARPU_ASSERT(node >= 0); + STARPU_ASSERT(starpu_node_get_kind(node) == STARPU_CPU_RAM); + unsigned *dataptr = (unsigned*) STARPU_VARIABLE_GET_PTR(descr[0]); + + if (node == STARPU_MAIN_RAM) + STARPU_ASSERT(dataptr == &data); + + node = starpu_task_get_current_data_node(1); + STARPU_ASSERT(node >= 0); + STARPU_ASSERT(starpu_node_get_kind(node) == STARPU_CPU_RAM + || (unsigned) node == starpu_worker_get_local_memory_node()); + dataptr = (unsigned*) STARPU_VARIABLE_GET_PTR(descr[1]); + + if (node == STARPU_MAIN_RAM) + STARPU_ASSERT(dataptr == &data || dataptr == &data2); +} + +static struct starpu_codelet specific2_cl_ro = +{ + .cpu_funcs = {specific2_ro_kernel}, + .cuda_funcs = {specific2_ro_kernel}, + .opencl_funcs = {specific2_ro_kernel}, + .hip_funcs = {specific2_ro_kernel}, + .nbuffers = 2, + .modes = {STARPU_R, STARPU_R}, + .specific_nodes = 1, + .nodes = {STARPU_SPECIFIC_NODE_CPU, STARPU_SPECIFIC_NODE_LOCAL_OR_CPU}, +}; + +void specific_kernel(void *descr[], void *arg) +{ + (void)arg; + int node = starpu_task_get_current_data_node(0); + STARPU_ASSERT(node >= 0); + STARPU_ASSERT(starpu_node_get_kind(node) == STARPU_CPU_RAM); + unsigned *dataptr = (unsigned*) STARPU_VARIABLE_GET_PTR(descr[0]); + + if (node == STARPU_MAIN_RAM) + STARPU_ASSERT(dataptr == &data); + + (*dataptr)++; + + node = starpu_task_get_current_data_node(1); + STARPU_ASSERT((unsigned) node == starpu_worker_get_local_memory_node()); +} + +void specific_ro_kernel(void *descr[], void *arg) +{ + (void)arg; + int node = starpu_task_get_current_data_node(0); + STARPU_ASSERT(node >= 0); + STARPU_ASSERT(starpu_node_get_kind(node) == STARPU_CPU_RAM); + unsigned *dataptr = (unsigned*) STARPU_VARIABLE_GET_PTR(descr[0]); + + if (node == STARPU_MAIN_RAM) + STARPU_ASSERT(dataptr == &data); + + node = starpu_task_get_current_data_node(1); + STARPU_ASSERT((unsigned) node == starpu_worker_get_local_memory_node()); +} + +static struct starpu_codelet specific_cl = +{ + .cpu_funcs = {specific_kernel}, + .cuda_funcs = {specific_kernel}, + .opencl_funcs = {specific_kernel}, + .hip_funcs = {specific_kernel}, + .nbuffers = 2, + .modes = {STARPU_RW, STARPU_RW}, + .specific_nodes = 1, + .nodes = {STARPU_SPECIFIC_NODE_CPU, STARPU_SPECIFIC_NODE_LOCAL}, +}; + +static struct starpu_codelet specific_cl_ro = +{ + .cpu_funcs = {specific_ro_kernel}, + .cuda_funcs = {specific_ro_kernel}, + .opencl_funcs = {specific_ro_kernel}, + .hip_funcs = {specific_ro_kernel}, + .nbuffers = 2, + .modes = {STARPU_R, STARPU_R}, + .specific_nodes = 1, + .nodes = {STARPU_SPECIFIC_NODE_CPU, STARPU_SPECIFIC_NODE_LOCAL}, +}; + +int main(void) +{ + starpu_data_handle_t data_handle, data_handle2; + +#ifdef STARPU_QUICK_CHECK + unsigned ntasks = 16; +#else + unsigned ntasks = 1024; +#endif + + int ret; + + /* Disable prefetching, it makes the test work just by luck */ +#ifdef STARPU_HAVE_SETENV + setenv("STARPU_PREFETCH", "0", 1); +#endif + + ret = starpu_init(NULL); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + increment_load_opencl(); + + data = 0; + data2 = 0; + + /* Create a void data which will be used as an exclusion mechanism. */ + starpu_variable_data_register(&data_handle, STARPU_MAIN_RAM, (uintptr_t) &data, sizeof(data)); + starpu_variable_data_register(&data_handle2, STARPU_MAIN_RAM, (uintptr_t) &data2, sizeof(data2)); + + unsigned i; + for (i = 0; i < ntasks; i++) + { + struct starpu_task *task = starpu_task_create(); + switch (i%8) { + case 0: + task->cl = &specific_cl; + break; + case 1: + task->cl = &specific2_cl; + break; + case 2: + task->cl = &specific3_cl; + break; + case 3: + task->cl = &increment_cl; + break; + case 4: + task->cl = &specific_cl_ro; + break; + case 5: + task->cl = &specific2_cl_ro; + break; + case 6: + task->cl = &specific3_cl; + break; + case 7: + task->cl = &increment_cl; + break; + } + + task->handles[0] = data_handle; + if (i % 8 >= 4) + task->handles[1] = data_handle; + else + task->handles[1] = data_handle2; + + ret = starpu_task_submit(task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + starpu_data_unregister(data_handle); + starpu_data_unregister(data_handle2); + + ret = (data == (ntasks*4) / 8) ? EXIT_SUCCESS : EXIT_FAILURE; + + increment_unload_opencl(); + starpu_shutdown(); + + return ret; + +enodev: + fprintf(stderr, "WARNING: No one can execute this task\n"); + /* yes, we do not perform the computation but we did detect that no one + * could perform the kernel, so this is not an error from StarPU */ + starpu_data_unregister(data_handle); + starpu_data_unregister(data_handle2); + increment_unload_opencl(); + starpu_shutdown(); + return STARPU_TEST_SKIPPED; +} diff --git a/tests/datawizard/specific_node_same.c b/tests/datawizard/specific_node_same.c new file mode 100644 index 0000000..0a3b66d --- /dev/null +++ b/tests/datawizard/specific_node_same.c @@ -0,0 +1,113 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include +#include "../helper.h" + +/* + * Test using the specific_nodes field with loading the same data several + * times on different nodes. + */ + +unsigned data; + +void specific_ro_kernel(void *descr[], void *arg) +{ + (void)arg; + int node = starpu_task_get_current_data_node(0); + STARPU_ASSERT(node >= 0); + STARPU_ASSERT(starpu_node_get_kind(node) == STARPU_CPU_RAM); + unsigned *dataptr = (unsigned*) STARPU_VARIABLE_GET_PTR(descr[0]); + + if (node == STARPU_MAIN_RAM) + STARPU_ASSERT(dataptr == &data); + + node = starpu_task_get_current_data_node(1); + STARPU_ASSERT((unsigned) node == starpu_worker_get_local_memory_node()); +} + +static struct starpu_codelet specific_cl_ro = +{ + .cpu_funcs = {specific_ro_kernel}, + .cuda_funcs = {specific_ro_kernel}, + .opencl_funcs = {specific_ro_kernel}, + .hip_funcs = {specific_ro_kernel}, + .nbuffers = 2, + .modes = {STARPU_R, STARPU_R}, + .specific_nodes = 1, + .nodes = {STARPU_SPECIFIC_NODE_CPU, STARPU_SPECIFIC_NODE_LOCAL}, +}; + +int main(void) +{ + starpu_data_handle_t data_handle; + +#ifdef STARPU_QUICK_CHECK + unsigned ntasks = 16; +#else + unsigned ntasks = 1024; +#endif + + int ret; + + /* Disable prefetching, it makes the test work just by luck */ +#ifdef STARPU_HAVE_SETENV + setenv("STARPU_PREFETCH", "0", 1); +#endif + + ret = starpu_init(NULL); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + data = 0; + + /* Create a void data which will be used as an exclusion mechanism. */ + starpu_variable_data_register(&data_handle, STARPU_MAIN_RAM, (uintptr_t) &data, sizeof(data)); + + unsigned i; + for (i = 0 ; i < starpu_worker_get_count(); i++) + { + struct starpu_task *task = starpu_task_create(); + task->cl = &specific_cl_ro; + task->execute_on_a_specific_worker = 1; + task->workerid = i; + + task->handles[0] = data_handle; + task->handles[1] = data_handle; + + ret = starpu_task_submit(task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + starpu_data_unregister(data_handle); + + starpu_shutdown(); + + return ret; + +enodev: + fprintf(stderr, "WARNING: No one can execute this task\n"); + /* yes, we do not perform the computation but we did detect that no one + * could perform the kernel, so this is not an error from StarPU */ + starpu_data_unregister(data_handle); + starpu_shutdown(); + return STARPU_TEST_SKIPPED; +} diff --git a/tests/datawizard/sync_and_notify_data.c b/tests/datawizard/sync_and_notify_data.c new file mode 100644 index 0000000..414b365 --- /dev/null +++ b/tests/datawizard/sync_and_notify_data.c @@ -0,0 +1,208 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include + +#include "../helper.h" + +/* + * Mix synchronous tasks and data acquisitions + */ + +#define N_DEF 100 +#define K_DEF 256 + +static unsigned n=N_DEF; +static unsigned k=K_DEF; + +/* + * In this test, we maintain a vector v = (a,b,c). + * + * Each iteration consists of: + * - increment a n times + * - sync v in ram + * - incrementer b + * - notify the modification of v + * - incrementer c n times + * - sync v + * + * At the end, we have to make sure that if we did k iterations, + * v == (kn, k, kn) + */ + +#ifdef STARPU_USE_CUDA +void cuda_codelet_incA(void *descr[], void *_args); +void cuda_codelet_incC(void *descr[], void *_args); +#endif + +#ifdef STARPU_USE_OPENCL +void opencl_codelet_incA(void *descr[], void *_args); +void opencl_codelet_incC(void *descr[], void *_args); +struct starpu_opencl_program opencl_code; +#endif + +#define VECTORSIZE 16 + +starpu_data_handle_t v_handle; +static unsigned v[VECTORSIZE] STARPU_ATTRIBUTE_ALIGNED(128) = {0, 0, 0, 0}; + +void cpu_codelet_incA(void *descr[], void *arg) +{ + (void)arg; + unsigned *val = (unsigned *)STARPU_VECTOR_GET_PTR(descr[0]); + val[0]++; +} + +void cpu_codelet_incC(void *descr[], void *arg) +{ + (void)arg; + unsigned *val = (unsigned *)STARPU_VECTOR_GET_PTR(descr[0]); + val[2]++; +} + +int main(int argc, char **argv) +{ + int ret; + +#ifdef STARPU_QUICK_CHECK + n /= 10; +#endif +#ifndef STARPU_LONG_CHECK + k /= 16; +#endif + +#ifdef STARPU_HAVE_VALGRIND_H + if(RUNNING_ON_VALGRIND) n = 4; + if(RUNNING_ON_VALGRIND) k = 2; +#endif + + ret = starpu_initialize(NULL, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + +#ifdef STARPU_USE_OPENCL + ret = starpu_opencl_load_opencl_from_file("tests/datawizard/sync_and_notify_data_opencl_codelet.cl", &opencl_code, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); +#endif + + starpu_vector_data_register(&v_handle, STARPU_MAIN_RAM, (uintptr_t)v, VECTORSIZE, sizeof(unsigned)); + + unsigned iter; + for (iter = 0; iter < k; iter++) + { + unsigned ind; + for (ind = 0; ind < n; ind++) + { + /* increment a = v[0] */ + struct starpu_codelet cl_inc_a = + { + .cpu_funcs = {cpu_codelet_incA}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {cuda_codelet_incA}, + .cuda_flags = {STARPU_CUDA_ASYNC}, +#endif +#ifdef STARPU_USE_OPENCL + .opencl_funcs = {opencl_codelet_incA}, + .opencl_flags = {STARPU_OPENCL_ASYNC}, +#endif + .cpu_funcs_name = {"cpu_codelet_incA"}, + .nbuffers = 1, + .modes = {STARPU_RW} + }; + + struct starpu_task *task = starpu_task_create(); + task->cl = &cl_inc_a; + task->handles[0] = v_handle; + + task->synchronous = 1; + + ret = starpu_task_submit(task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + /* synchronize v in RAM */ + ret = starpu_data_acquire(v_handle, STARPU_RW); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_acquire"); + + /* increment b */ + v[1]++; + + starpu_data_release(v_handle); + + for (ind = 0; ind < n; ind++) + { + /* increment c = v[2] */ + struct starpu_codelet cl_inc_c = + { + .cpu_funcs = {cpu_codelet_incC}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {cuda_codelet_incC}, + .cuda_flags = {STARPU_CUDA_ASYNC}, +#endif +#ifdef STARPU_USE_OPENCL + .opencl_funcs = {opencl_codelet_incC}, + .opencl_flags = {STARPU_OPENCL_ASYNC}, +#endif + .cpu_funcs_name = {"cpu_codelet_incC"}, + .nbuffers = 1, + .modes = {STARPU_RW} + + }; + + struct starpu_task *task = starpu_task_create(); + task->cl = &cl_inc_c; + + task->handles[0] = v_handle; + + task->synchronous = 1; + + ret = starpu_task_submit(task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + } + + ret = starpu_data_acquire(v_handle, STARPU_RW); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_acquire"); + + FPRINTF(stderr, "V = {%u, %u, %u}\n", v[0], v[1], v[2]); + + starpu_data_release(v_handle); + starpu_data_unregister(v_handle); + + starpu_shutdown(); + + if ((v[0] != n*k) || (v[1] != k) || (v[2] != n*k)) + { + FPRINTF(stderr, "Incorrect result\n"); + return EXIT_FAILURE; + } + + return EXIT_SUCCESS; + +enodev: + starpu_data_unregister(v_handle); + starpu_shutdown(); + fprintf(stderr, "WARNING: No one can execute this task\n"); + /* yes, we do not perform the computation but we did detect that no one + * could perform the kernel, so this is not an error from StarPU */ + STARPU_RETURN(STARPU_TEST_SKIPPED); +} diff --git a/tests/datawizard/sync_and_notify_data_implicit.c b/tests/datawizard/sync_and_notify_data_implicit.c new file mode 100644 index 0000000..f0fa3f4 --- /dev/null +++ b/tests/datawizard/sync_and_notify_data_implicit.c @@ -0,0 +1,201 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include + +#include "../helper.h" + +/* + * Mix tasks with implicit dependencies and data acquisitions + */ + +#define N_DEF 100 +#define K_DEF 256 + +static unsigned n=N_DEF; +static unsigned k=K_DEF; + +/* + * In this test, we maintain a vector v = (a,b,c). + * + * Each iteration consists of: + * - increment a n times + * - sync v in ram + * - incrementer b + * - notify the modification of v + * - incrementer c n times + * - sync v + * + * At the end, we have to make sure that if we did k iterations, + * v == (kn, k, kn) + */ + +#ifdef STARPU_USE_CUDA +void cuda_codelet_incA(void *descr[], void *_args); +void cuda_codelet_incC(void *descr[], void *_args); +#endif + +#ifdef STARPU_USE_OPENCL +void opencl_codelet_incA(void *descr[], void *_args); +void opencl_codelet_incC(void *descr[], void *_args); +struct starpu_opencl_program opencl_code; +#endif + +#define VECTORSIZE 16 + +starpu_data_handle_t v_handle; +static unsigned v[VECTORSIZE] STARPU_ATTRIBUTE_ALIGNED(128) = {0, 0, 0, 0}; + +void cpu_codelet_incA(void *descr[], void *arg) +{ + (void)arg; + unsigned *val = (unsigned *)STARPU_VECTOR_GET_PTR(descr[0]); + val[0]++; +} + +void cpu_codelet_incC(void *descr[], void *arg) +{ + (void)arg; + unsigned *val = (unsigned *)STARPU_VECTOR_GET_PTR(descr[0]); + val[2]++; +} + +/* increment a = v[0] */ +static struct starpu_codelet cl_inc_a = +{ + .cpu_funcs = {cpu_codelet_incA}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {cuda_codelet_incA}, + .cuda_flags = {STARPU_CUDA_ASYNC}, +#endif +#ifdef STARPU_USE_OPENCL + .opencl_funcs = {opencl_codelet_incA}, + .opencl_flags = {STARPU_OPENCL_ASYNC}, +#endif + .cpu_funcs_name = {"cpu_codelet_incA"}, + .nbuffers = 1, + .modes = {STARPU_RW} +}; + +/* increment c = v[2] */ +struct starpu_codelet cl_inc_c = +{ + .cpu_funcs = {cpu_codelet_incC}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {cuda_codelet_incC}, + .cuda_flags = {STARPU_CUDA_ASYNC}, +#endif +#ifdef STARPU_USE_OPENCL + .opencl_funcs = {opencl_codelet_incC}, + .opencl_flags = {STARPU_OPENCL_ASYNC}, +#endif + .cpu_funcs_name = {"cpu_codelet_incC"}, + .nbuffers = 1, + .modes = {STARPU_RW} +}; + +int main(int argc, char **argv) +{ + int ret; + +#ifdef STARPU_QUICK_CHECK + n /= 10; +#endif +#ifndef STARPU_LONG_CHECK + k /= 8; +#endif + +#ifdef STARPU_HAVE_VALGRIND_H + if(RUNNING_ON_VALGRIND) n = 4; + if(RUNNING_ON_VALGRIND) k = 2; +#endif + + ret = starpu_initialize(NULL, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + +#ifdef STARPU_USE_OPENCL + ret = starpu_opencl_load_opencl_from_file("tests/datawizard/sync_and_notify_data_opencl_codelet.cl", &opencl_code, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); +#endif + + starpu_vector_data_register(&v_handle, STARPU_MAIN_RAM, (uintptr_t)v, VECTORSIZE, sizeof(unsigned)); + + unsigned iter; + for (iter = 0; iter < k; iter++) + { + unsigned ind; + for (ind = 0; ind < n; ind++) + { + struct starpu_task *task = starpu_task_create(); + task->cl = &cl_inc_a; + task->handles[0] = v_handle; + + ret = starpu_task_submit(task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + /* synchronize v in RAM */ + ret = starpu_data_acquire(v_handle, STARPU_RW); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_acquire"); + + /* increment b */ + v[1]++; + + starpu_data_release(v_handle); + + for (ind = 0; ind < n; ind++) + { + struct starpu_task *task = starpu_task_create(); + task->cl = &cl_inc_c; + task->handles[0] = v_handle; + + ret = starpu_task_submit(task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + } + + ret = starpu_data_acquire(v_handle, STARPU_RW); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_acquire"); + + FPRINTF(stderr, "V = {%u, %u, %u}\n", v[0], v[1], v[2]); + + starpu_data_release(v_handle); + starpu_data_unregister(v_handle); + starpu_shutdown(); + + ret = EXIT_SUCCESS; + if ((v[0] != n*k) || (v[1] != k) || (v[2] != n*k)) + { + FPRINTF(stderr, "Incorrect result\n"); + ret = EXIT_FAILURE; + } + return ret; + +enodev: + starpu_data_unregister(v_handle); + starpu_shutdown(); + fprintf(stderr, "WARNING: No one can execute this task\n"); + /* yes, we do not perform the computation but we did detect that no one + * could perform the kernel, so this is not an error from StarPU */ + return STARPU_TEST_SKIPPED; +} diff --git a/tests/datawizard/sync_and_notify_data_kernels.cu b/tests/datawizard/sync_and_notify_data_kernels.cu new file mode 100644 index 0000000..08f22ac --- /dev/null +++ b/tests/datawizard/sync_and_notify_data_kernels.cu @@ -0,0 +1,53 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +/* + * increment a (val[0]) + */ + +static __global__ void _cuda_incA(unsigned *v) +{ + v[0]++; +} + +extern "C" void cuda_codelet_incA(void *descr[], void *_args) +{ + unsigned *v = (unsigned *)STARPU_VECTOR_GET_PTR(descr[0]); + + _cuda_incA<<<1,1, 0, starpu_cuda_get_local_stream()>>>(v); + cudaError_t status = cudaGetLastError(); + if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status); +} + +/* + * increment c (val[2]) + */ + +static __global__ void _cuda_incC(unsigned *v) +{ + v[2]++; +} + +extern "C" void cuda_codelet_incC(void *descr[], void *_args) +{ + unsigned *v = (unsigned *)STARPU_VECTOR_GET_PTR(descr[0]); + + _cuda_incC<<<1,1, 0, starpu_cuda_get_local_stream()>>>(v); + cudaError_t status = cudaGetLastError(); + if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status); +} diff --git a/tests/datawizard/sync_and_notify_data_opencl.c b/tests/datawizard/sync_and_notify_data_opencl.c new file mode 100644 index 0000000..34f2e7b --- /dev/null +++ b/tests/datawizard/sync_and_notify_data_opencl.c @@ -0,0 +1,80 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../helper.h" + +/* + * Queue kernels which just increment the vector + */ + +extern struct starpu_opencl_program opencl_code; + +void opencl_codelet_incA(void *descr[], void *arg) +{ + (void)arg; + STARPU_SKIP_IF_VALGRIND; + + cl_mem val = (cl_mem)STARPU_VECTOR_GET_DEV_HANDLE(descr[0]); + cl_kernel kernel; + cl_command_queue queue; + int id, devid, err; + + id = starpu_worker_get_id_check(); + devid = starpu_worker_get_devid(id); + + err = starpu_opencl_load_kernel(&kernel, &queue, &opencl_code, "incA", devid); + if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); + + err = clSetKernelArg(kernel, 0, sizeof(val), &val); + if (err) STARPU_OPENCL_REPORT_ERROR(err); + + { + size_t global=100; + size_t local=100; + err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, NULL); + if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); + } + starpu_opencl_release_kernel(kernel); +} + +void opencl_codelet_incC(void *descr[], void *arg) +{ + (void)arg; + STARPU_SKIP_IF_VALGRIND; + + cl_mem val = (cl_mem)STARPU_VECTOR_GET_DEV_HANDLE(descr[0]); + cl_kernel kernel; + cl_command_queue queue; + int id, devid, err; + + id = starpu_worker_get_id_check(); + devid = starpu_worker_get_devid(id); + + err = starpu_opencl_load_kernel(&kernel, &queue, &opencl_code, "incC", devid); + if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); + + err = clSetKernelArg(kernel, 0, sizeof(val), &val); + if (err) STARPU_OPENCL_REPORT_ERROR(err); + + { + size_t global=100; + size_t local=100; + err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, NULL); + if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); + } + starpu_opencl_release_kernel(kernel); +} diff --git a/tests/datawizard/sync_and_notify_data_opencl_codelet.cl b/tests/datawizard/sync_and_notify_data_opencl_codelet.cl new file mode 100644 index 0000000..3f82c3d --- /dev/null +++ b/tests/datawizard/sync_and_notify_data_opencl_codelet.cl @@ -0,0 +1,30 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +__kernel void incA(__global unsigned* input) +{ + const int i = get_global_id(0); + if (i == 0) + input[i] ++; +} + +__kernel void incC(__global unsigned* input) +{ + const int i = get_global_id(0); + if (i == 2) + input[i] ++; +} + diff --git a/tests/datawizard/sync_with_data_with_mem.c b/tests/datawizard/sync_with_data_with_mem.c new file mode 100644 index 0000000..1b79899 --- /dev/null +++ b/tests/datawizard/sync_with_data_with_mem.c @@ -0,0 +1,142 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include +#include "../helper.h" + +/* + * Mix submitting tasks and synchronously acquiring the corresponding data. + */ + +#define NBUFFERS_DEF 64 +#define NITER_DEF 128 +#define VECTORSIZE_DEF 1024 + +static int nbuffers = NBUFFERS_DEF; +static int niter = NITER_DEF; +static int vectorsize = VECTORSIZE_DEF; + +float *buffer[NBUFFERS_DEF]; + +starpu_data_handle_t v_handle[NBUFFERS_DEF]; + +void dummy_codelet(void *descr[], void *_args) +{ + (void)descr; + (void)_args; +} + +static struct starpu_codelet cl = +{ + .modes = { STARPU_RW }, + .cpu_funcs = {dummy_codelet}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {dummy_codelet}, +#endif +#ifdef STARPU_USE_OPENCL + .opencl_funcs = {dummy_codelet}, +#endif + .cpu_funcs_name = {"dummy_codelet"}, + .nbuffers = 1 +}; + +static +int use_handle(starpu_data_handle_t handle) +{ + int ret; + struct starpu_task *task; + + task = starpu_task_create(); + task->cl = &cl; + task->handles[0] = handle; + + ret = starpu_task_submit(task); + return ret; +} + +int main(int argc, char **argv) +{ + int ret; + +#ifdef STARPU_QUICK_CHECK + nbuffers /= 4; + niter /= 4; + vectorsize /= 8; +#endif + + ret = starpu_initialize(NULL, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + /* Allocate all buffers and register them to StarPU */ + int b; + for (b = 0; b < nbuffers; b++) + { + ret = starpu_malloc((void **)&buffer[b], vectorsize); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_malloc"); + starpu_vector_data_register(&v_handle[b], STARPU_MAIN_RAM, + (uintptr_t)buffer[b], vectorsize, sizeof(char)); + } + + int iter; + for (iter = 0; iter < niter; iter++) + { + /* Use the buffers on the different workers so that it may not + * be in main memory anymore */ + for (b = 0; b < nbuffers; b++) + { + ret = use_handle(v_handle[b]); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + ret = starpu_task_wait_for_all(); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); + + /* Grab the different pieces of data into main memory */ + for (b = 0; b < nbuffers; b++) + { + ret = starpu_data_acquire(v_handle[b], STARPU_RW); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_acquire"); + } + + /* Release them */ + for (b = 0; b < nbuffers; b++) + starpu_data_release(v_handle[b]); + } + + /* do some cleanup */ + for (b = 0; b < nbuffers; b++) + { + starpu_data_unregister(v_handle[b]); + starpu_free_noflag(buffer[b], vectorsize); + } + + starpu_shutdown(); + + return EXIT_SUCCESS; + +enodev: + fprintf(stderr, "WARNING: No one can execute this task\n"); + /* yes, we do not perform the computation but we did detect that no one + * could perform the kernel, so this is not an error from StarPU */ + starpu_shutdown(); + return STARPU_TEST_SKIPPED; +} diff --git a/tests/datawizard/sync_with_data_with_mem_non_blocking.c b/tests/datawizard/sync_with_data_with_mem_non_blocking.c new file mode 100644 index 0000000..796ffb9 --- /dev/null +++ b/tests/datawizard/sync_with_data_with_mem_non_blocking.c @@ -0,0 +1,175 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include +#include "../helper.h" +#include + +/* + * Mix submitting tasks and asynchronously acquiring the corresponding data. + */ + +#define NBUFFERS_DEF 64 +#define NITER_DEF 128 +#define VECTORSIZE_DEF 1024 + +static unsigned nbuffers = NBUFFERS_DEF; +static unsigned niter = NITER_DEF; +static unsigned vectorsize = VECTORSIZE_DEF; + +float *buffer[NBUFFERS_DEF]; + +starpu_data_handle_t v_handle[NBUFFERS_DEF]; + +void dummy_codelet(void *descr[], void *_args) +{ + (void)descr; + (void)_args; +} + +static struct starpu_codelet cl = +{ + .modes = { STARPU_RW }, + .cpu_funcs = {dummy_codelet}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {dummy_codelet}, +#endif +#ifdef STARPU_USE_OPENCL + .opencl_funcs = {dummy_codelet}, +#endif + .cpu_funcs_name = {"dummy_codelet"}, + .nbuffers = 1 +}; + +static +int use_handle(starpu_data_handle_t handle) +{ + int ret; + struct starpu_task *task; + + task = starpu_task_create(); + task->cl = &cl; + task->handles[0] = handle; + + ret = starpu_task_submit(task); + return ret; +} + +static starpu_pthread_mutex_t mutex = STARPU_PTHREAD_MUTEX_INITIALIZER; +static starpu_pthread_cond_t cond = STARPU_PTHREAD_COND_INITIALIZER; +static unsigned n_synced_buffers; + +static +void callback_sync_data(void *arg) +{ + (void)arg; + STARPU_PTHREAD_MUTEX_LOCK(&mutex); + + n_synced_buffers++; + + if (n_synced_buffers == nbuffers) + STARPU_PTHREAD_COND_SIGNAL(&cond); + + STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); +} + +int main(int argc, char **argv) +{ + int ret; + +#ifdef STARPU_QUICK_CHECK + nbuffers /= 4; + niter /= 4; + vectorsize /= 8; +#endif + + ret = starpu_initialize(NULL, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + /* Allocate all buffers and register them to StarPU */ + unsigned b; + for (b = 0; b < nbuffers; b++) + { + ret = starpu_malloc((void **)&buffer[b], vectorsize); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_malloc"); + starpu_vector_data_register(&v_handle[b], STARPU_MAIN_RAM, + (uintptr_t)buffer[b], vectorsize, sizeof(char)); + starpu_data_set_sequential_consistency_flag(v_handle[b], 0); + } + + unsigned iter; + for (iter = 0; iter < niter; iter++) + { + /* Use the buffers on the different workers so that it may not + * be in main memory anymore */ + for (b = 0; b < nbuffers; b++) + { + ret = use_handle(v_handle[b]); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + ret = starpu_task_wait_for_all(); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); + + STARPU_PTHREAD_MUTEX_LOCK(&mutex); + n_synced_buffers = 0; + STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); + + /* Grab the different pieces of data into main memory */ + for (b = 0; b < nbuffers; b++) + { + ret = starpu_data_acquire_cb(v_handle[b], STARPU_RW, + callback_sync_data, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_acquire_cb"); + } + + /* Wait for all buffers to be available */ + STARPU_PTHREAD_MUTEX_LOCK(&mutex); + + while (n_synced_buffers != nbuffers) + STARPU_PTHREAD_COND_WAIT(&cond, &mutex); + + STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); + + /* Release them */ + for (b = 0; b < nbuffers; b++) + starpu_data_release(v_handle[b]); + } + + /* do some cleanup */ + for (b = 0; b < nbuffers; b++) + { + starpu_data_unregister(v_handle[b]); + starpu_free_noflag(buffer[b], vectorsize); + } + + starpu_shutdown(); + + return EXIT_SUCCESS; + +enodev: + fprintf(stderr, "WARNING: No one can execute this task\n"); + /* yes, we do not perform the computation but we did detect that no one + * could perform the kernel, so this is not an error from StarPU */ + starpu_shutdown(); + return STARPU_TEST_SKIPPED; +} diff --git a/tests/datawizard/sync_with_data_with_mem_non_blocking_implicit.c b/tests/datawizard/sync_with_data_with_mem_non_blocking_implicit.c new file mode 100644 index 0000000..3f1f77d --- /dev/null +++ b/tests/datawizard/sync_with_data_with_mem_non_blocking_implicit.c @@ -0,0 +1,176 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include +#include "../helper.h" +#include + +/* + * Mix submitting tasks and asynchronously acquiring the corresponding + * data, but without implicit dependencies. + */ + +#define NBUFFERS_DEF 64 +#define NITER_DEF 128 +#define VECTORSIZE_DEF 1024 + +static unsigned nbuffers = NBUFFERS_DEF; +static unsigned niter = NITER_DEF; +static unsigned vectorsize = VECTORSIZE_DEF; + +float *buffer[NBUFFERS_DEF]; + +starpu_data_handle_t v_handle[NBUFFERS_DEF]; + +void dummy_codelet(void *descr[], void *_args) +{ + (void)descr; + (void)_args; +} + +static struct starpu_codelet cl = +{ + .modes = { STARPU_RW }, + .cpu_funcs = {dummy_codelet}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {dummy_codelet}, +#endif +#ifdef STARPU_USE_OPENCL + .opencl_funcs = {dummy_codelet}, +#endif + .cpu_funcs_name = {"dummy_codelet"}, + .nbuffers = 1 +}; + +static +int use_handle(starpu_data_handle_t handle) +{ + int ret; + struct starpu_task *task; + + task = starpu_task_create(); + task->cl = &cl; + task->handles[0] = handle; + + ret = starpu_task_submit(task); + return ret; +} + +static starpu_pthread_mutex_t mutex = STARPU_PTHREAD_MUTEX_INITIALIZER; +static starpu_pthread_cond_t cond = STARPU_PTHREAD_COND_INITIALIZER; +static unsigned n_synced_buffers; + +static +void callback_sync_data(void *arg) +{ + (void)arg; + STARPU_PTHREAD_MUTEX_LOCK(&mutex); + + n_synced_buffers++; + + if (n_synced_buffers == nbuffers) + STARPU_PTHREAD_COND_SIGNAL(&cond); + + STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); +} + +int main(int argc, char **argv) +{ + int ret; + +#ifdef STARPU_QUICK_CHECK + nbuffers /= 4; + niter /= 4; + vectorsize /= 8; +#endif + + ret = starpu_initialize(NULL, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + /* Allocate all buffers and register them to StarPU */ + unsigned b; + for (b = 0; b < nbuffers; b++) + { + ret = starpu_malloc((void **)&buffer[b], vectorsize); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_malloc"); + starpu_vector_data_register(&v_handle[b], STARPU_MAIN_RAM, + (uintptr_t)buffer[b], vectorsize, sizeof(char)); + } + + unsigned iter; + for (iter = 0; iter < niter; iter++) + { + /* Use the buffers on the different workers so that it may not + * be in main memory anymore */ + for (b = 0; b < nbuffers; b++) + { + ret = use_handle(v_handle[b]); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + STARPU_PTHREAD_MUTEX_LOCK(&mutex); + n_synced_buffers = 0; + STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); + + /* Grab the different pieces of data into main memory */ + for (b = 0; b < nbuffers; b++) + { + ret = starpu_data_acquire_cb(v_handle[b], STARPU_RW, + callback_sync_data, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_acquire_cb"); + } + + starpu_do_schedule(); + /* Wait for all buffers to be available */ + STARPU_PTHREAD_MUTEX_LOCK(&mutex); + + while (n_synced_buffers != nbuffers) + STARPU_PTHREAD_COND_WAIT(&cond, &mutex); + + STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); + + /* Release them */ + for (b = 0; b < nbuffers; b++) + starpu_data_release(v_handle[b]); + } + + ret = starpu_task_wait_for_all(); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); + + /* do some cleanup */ + for (b = 0; b < nbuffers; b++) + { + starpu_data_unregister(v_handle[b]); + starpu_free_noflag(buffer[b], vectorsize); + } + + starpu_shutdown(); + + return EXIT_SUCCESS; + +enodev: + fprintf(stderr, "WARNING: No one can execute this task\n"); + /* yes, we do not perform the computation but we did detect that no one + * could perform the kernel, so this is not an error from StarPU */ + starpu_shutdown(); + return STARPU_TEST_SKIPPED; +} diff --git a/tests/datawizard/task_with_multiple_time_the_same_handle.c b/tests/datawizard/task_with_multiple_time_the_same_handle.c new file mode 100644 index 0000000..32a5417 --- /dev/null +++ b/tests/datawizard/task_with_multiple_time_the_same_handle.c @@ -0,0 +1,149 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../helper.h" + +/* + * Test passing the same handle several times to the same task + */ + +void sum_cpu(void * descr[], void *cl_arg) +{ + (void)cl_arg; + double * v_dst = (double *) STARPU_VECTOR_GET_PTR(descr[0]); + double * v_src = (double *) STARPU_VECTOR_GET_PTR(descr[1]); + STARPU_ASSERT(v_dst == v_src); + *v_dst+=*v_src; +} + +void sum3_cpu(void * descr[], void *cl_arg) +{ + (void)cl_arg; + double * v_src1 = (double *) STARPU_VECTOR_GET_PTR(descr[0]); + double * v_src2 = (double *) STARPU_VECTOR_GET_PTR(descr[1]); + double * v_dst = (double *) STARPU_VECTOR_GET_PTR(descr[2]); + STARPU_ASSERT(v_dst == v_src1); + STARPU_ASSERT(v_dst == v_src2); + *v_dst+=*v_src1+*v_src2; +} + +void sum4_cpu(void * descr[], void *cl_arg) +{ + (void)cl_arg; + double * v_src1 = (double *) STARPU_VECTOR_GET_PTR(descr[0]); + double * v_src2 = (double *) STARPU_VECTOR_GET_PTR(descr[1]); + double * v_dst1 = (double *) STARPU_VECTOR_GET_PTR(descr[2]); + double * v_dst2 = (double *) STARPU_VECTOR_GET_PTR(descr[3]); + STARPU_ASSERT(v_src1 == v_dst1); + STARPU_ASSERT(v_src2 == v_dst2); + *v_dst2 = (*v_dst1+=*v_src1+*v_src2); +} + +static struct starpu_codelet sum_cl = +{ + .cpu_funcs = {sum_cpu}, + .cpu_funcs_name = {"sum_cpu"}, + .nbuffers = 2, + .modes={STARPU_RW,STARPU_R} +}; + +static struct starpu_codelet sum3_cl = +{ + .cpu_funcs = {sum3_cpu}, + .cpu_funcs_name = {"sum3_cpu"}, + .nbuffers = 3, + .modes={STARPU_R,STARPU_R,STARPU_RW} +}; + +static struct starpu_codelet sum4_cl = +{ + .cpu_funcs = {sum4_cpu}, + .cpu_funcs_name = {"sum4_cpu"}, + .nbuffers = 4, + .modes={STARPU_R,STARPU_R,STARPU_RW,STARPU_RW} +}; + +#define N 10 +int main(void) +{ + starpu_data_handle_t handle; + int ret = 0; + double value[N] = { 1.0 }; + int i; + struct starpu_conf conf; + + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + conf.ncpus = -1; + conf.nmpi_ms = -1; + conf.ntcpip_ms = -1; + + ret=starpu_init(&conf); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + + starpu_vector_data_register(&handle,0,(uintptr_t)&value,N,sizeof(double)); + + for (i=0; i<2; i++) + { + ret = starpu_task_insert(&sum_cl, + STARPU_RW, handle, + STARPU_R, handle, + 0); + if (ret == -ENODEV) goto enodev; + ret = starpu_task_insert(&sum3_cl, + STARPU_R, handle, + STARPU_R, handle, + STARPU_RW, handle, + 0); + if (ret == -ENODEV) goto enodev; + } + + starpu_data_acquire(handle, STARPU_R); + if (value[0] != 36) + { + FPRINTF(stderr, "value is %f instead of %f\n", value[0], 36.); + ret = EXIT_FAILURE; + } + starpu_data_release(handle); + + struct starpu_data_filter f = + { + .filter_func = starpu_vector_filter_block, + .nchildren = 2, + }; + starpu_data_partition(handle, &f); + + starpu_task_insert(&sum4_cl, + STARPU_R,starpu_data_get_sub_data(handle,1,0), + STARPU_R,starpu_data_get_sub_data(handle,1,1), + STARPU_RW,starpu_data_get_sub_data(handle,1,0), + STARPU_RW,starpu_data_get_sub_data(handle,1,1), + 0); + + starpu_data_unpartition(handle, STARPU_MAIN_RAM); + + starpu_task_wait_for_all(); + starpu_data_unregister(handle); + + starpu_shutdown(); + return ret; + +enodev: + starpu_data_unregister(handle); + starpu_shutdown(); + return STARPU_TEST_SKIPPED; +} diff --git a/tests/datawizard/temporary_partition.c b/tests/datawizard/temporary_partition.c new file mode 100644 index 0000000..6cf1a86 --- /dev/null +++ b/tests/datawizard/temporary_partition.c @@ -0,0 +1,116 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../helper.h" + +#define SIZE (1<<20) +#define NPARTS 16 + +/* + * Test asynchronous partitioning on a temporary data. + */ + +static void codelet(void *descr[], void *_args) +{ + (void)descr; + (void)_args; +} + +static struct starpu_codelet clw = +{ + .where = STARPU_CPU, + .cpu_funcs = {codelet}, + .nbuffers = 1, + .modes = {STARPU_W} +}; + +static struct starpu_codelet clr = +{ + .where = STARPU_CPU, + .cpu_funcs = {codelet}, + .nbuffers = 1, + .modes = {STARPU_R} +}; + +int main(void) +{ + int ret; + starpu_data_handle_t handle, handles[NPARTS]; + int i; + struct starpu_conf conf; + + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + conf.ncpus = -1; + conf.nmpi_ms = -1; + conf.ntcpip_ms = -1; + + ret = starpu_init(&conf); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + starpu_vector_data_register(&handle, -1, 0, SIZE, sizeof(char)); + + /* Fork */ + struct starpu_data_filter f = + { + .filter_func = starpu_vector_filter_block, + .nchildren = NPARTS + }; + starpu_data_partition_plan(handle, &f, handles); + starpu_data_partition_submit(handle, NPARTS, handles); + + /* Process in parallel */ + for (i = 0; i < NPARTS; i++) + { + ret = starpu_task_insert(&clw, + STARPU_W, handles[i], + 0); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + } + + /* Invalidate one random piece we don't care coherency about */ + starpu_data_invalidate_submit(handles[NPARTS/2]); + + /* Try to wontuse the whole thing */ + starpu_data_wont_use(handle); + + /* Clean */ + starpu_data_unpartition_submit(handle, NPARTS, handles, -1); + starpu_data_partition_clean(handle, NPARTS, handles); + + /* Read result */ + ret = starpu_task_insert(&clr, STARPU_R, handle, 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + + starpu_data_unregister(handle); + + starpu_shutdown(); + + return 0; + +enodev: + starpu_data_unpartition_submit(handle, NPARTS, handles, -1); + starpu_data_partition_clean(handle, NPARTS, handles); + starpu_data_unregister(handle); + starpu_shutdown(); + /* yes, we do not perform the computation but we did detect that no one + * could perform the kernel, so this is not an error from StarPU */ + fprintf(stderr, "WARNING: No one can execute this task\n"); + return STARPU_TEST_SKIPPED; +} diff --git a/tests/datawizard/temporary_partition_implicit.c b/tests/datawizard/temporary_partition_implicit.c new file mode 100644 index 0000000..888ec73 --- /dev/null +++ b/tests/datawizard/temporary_partition_implicit.c @@ -0,0 +1,116 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../helper.h" + +#define SIZE (1<<20) +#define NPARTS 16 + +/* + * Test asynchronous partitioning on a temporary data without submitting explicitly + * partitioning/unpartitioning. + */ + +static void codelet(void *descr[], void *_args) +{ + (void)descr; + (void)_args; +} + +static struct starpu_codelet clw = +{ + .where = STARPU_CPU, + .cpu_funcs = {codelet}, + .nbuffers = 1, + .modes = {STARPU_W} +}; + +static struct starpu_codelet clr = +{ + .where = STARPU_CPU, + .cpu_funcs = {codelet}, + .nbuffers = 1, + .modes = {STARPU_R} +}; + +int main(void) +{ + int ret; + starpu_data_handle_t handle, handles[NPARTS]; + int i; + struct starpu_conf conf; + + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + conf.ncpus = -1; + conf.nmpi_ms = -1; + conf.ntcpip_ms = -1; + + ret = starpu_init(&conf); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + starpu_vector_data_register(&handle, -1, 0, SIZE, sizeof(char)); + + /* Fork */ + struct starpu_data_filter f = + { + .filter_func = starpu_vector_filter_block, + .nchildren = NPARTS + }; + starpu_data_partition_plan(handle, &f, handles); + + /* Process in parallel */ + for (i = 0; i < NPARTS; i++) + { + ret = starpu_task_insert(&clw, + STARPU_W, handles[i], + 0); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + } + + /* Invalidate one random piece we don't care coherency about */ + starpu_data_invalidate_submit(handles[NPARTS/2]); + + /* Try to wontuse the whole thing */ + starpu_data_wont_use(handle); + + /* Clean */ + starpu_data_unpartition_submit(handle, NPARTS, handles, -1); + starpu_data_partition_clean(handle, NPARTS, handles); + + /* Read result */ + ret = starpu_task_insert(&clr, STARPU_R, handle, 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + + starpu_data_unregister(handle); + + starpu_shutdown(); + + return 0; + +enodev: + starpu_data_unpartition_submit(handle, NPARTS, handles, -1); + starpu_data_partition_clean(handle, NPARTS, handles); + starpu_data_unregister(handle); + starpu_shutdown(); + /* yes, we do not perform the computation but we did detect that no one + * could perform the kernel, so this is not an error from StarPU */ + fprintf(stderr, "WARNING: No one can execute this task\n"); + return STARPU_TEST_SKIPPED; +} diff --git a/tests/datawizard/temporary_partition_read.c b/tests/datawizard/temporary_partition_read.c new file mode 100644 index 0000000..7a7a10b --- /dev/null +++ b/tests/datawizard/temporary_partition_read.c @@ -0,0 +1,104 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../helper.h" + +#define SIZE (1<<20) +#define NPARTS 16 + +/* + * Test asynchronous read partitioning on a non initialized temporary + * data without submitting explicitly partitioning/unpartitioning. + */ + +static void codelet(void *descr[], void *_args) +{ + (void)descr; + (void)_args; +} + +static struct starpu_codelet clw = +{ + .where = STARPU_CPU, + .cpu_funcs = {codelet}, + .nbuffers = 1, + .modes = {STARPU_W} +}; + +static struct starpu_codelet clr = +{ + .where = STARPU_CPU, + .cpu_funcs = {codelet}, + .nbuffers = 1, + .modes = {STARPU_R} +}; + +int main(void) +{ + int ret; + starpu_data_handle_t handle, handles[NPARTS]; + int i; + struct starpu_conf conf; + + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + conf.ncpus = -1; + conf.nmpi_ms = -1; + conf.ntcpip_ms = -1; + + ret = starpu_init(&conf); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + starpu_vector_data_register(&handle, -1, 0, SIZE, sizeof(char)); + starpu_data_set_reduction_methods(handle, NULL, &clw); + + /* Fork */ + struct starpu_data_filter f = + { + .filter_func = starpu_vector_filter_block, + .nchildren = NPARTS + }; + starpu_data_partition_plan(handle, &f, handles); + + /* Process in parallel */ + for (i = 0; i < NPARTS; i++) + { + ret = starpu_task_insert(&clr, + STARPU_R, handles[i], + 0); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + } + + starpu_task_wait_for_all(); + starpu_data_partition_clean_node(handle, NPARTS, handles, -1); + starpu_data_unregister(handle); + starpu_shutdown(); + + return 0; + +enodev: + starpu_task_wait_for_all(); + starpu_data_partition_clean_node(handle, NPARTS, handles, -1); + starpu_data_unregister(handle); + starpu_shutdown(); + /* yes, we do not perform the computation but we did detect that no one + * could perform the kernel, so this is not an error from StarPU */ + fprintf(stderr, "WARNING: No one can execute this task\n"); + return STARPU_TEST_SKIPPED; +} diff --git a/tests/datawizard/test_arbiter.cpp b/tests/datawizard/test_arbiter.cpp new file mode 100644 index 0000000..dd42483 --- /dev/null +++ b/tests/datawizard/test_arbiter.cpp @@ -0,0 +1,216 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * for i from 0 to nbA + * insert task handles[i] in STARPU_RW|STARPU_COMMUTE + * for j from 0 to nbA + * if i != j + insert task handles[i] in STARPU_RW|STARPU_COMMUTE, and handles[j] in STARPU_RW|STARPU_COMMUTE + */ + + +// @FUSE_STARPU + + +#include +#include "../helper.h" + +#include +#include + +#ifdef STARPU_QUICK_CHECK +#define SLEEP_SLOW 6000 +#define SLEEP_FAST 1000 +#elif !defined(STARPU_LONG_CHECK) +#define SLEEP_SLOW 60000 +#define SLEEP_FAST 10000 +#else +#define SLEEP_SLOW 600000 +#define SLEEP_FAST 100000 +#endif + +static unsigned nb, nb_slow; + +void callback(void * /*buffers*/[], void * /*cl_arg*/) +{ + unsigned val; + val = STARPU_ATOMIC_ADD(&nb, 1); + FPRINTF(stdout,"callback in (%u)\n", val); fflush(stdout); + starpu_usleep(SLEEP_FAST); + val = STARPU_ATOMIC_ADD(&nb, -1); + FPRINTF(stdout,"callback out (%u)\n", val); fflush(stdout); +} + +void callback_slow(void * /*buffers*/[], void * /*cl_arg*/) +{ + unsigned val; + val = STARPU_ATOMIC_ADD(&nb_slow, 1); + FPRINTF(stdout,"callback_slow in (%u)\n", val); fflush(stdout); + starpu_usleep(SLEEP_SLOW); + val = STARPU_ATOMIC_ADD(&nb_slow, -1); + FPRINTF(stdout,"callback_slow out (%u)\n", val); fflush(stdout); +} + + +int main(int /*argc*/, char** /*argv*/) +{ + int ret; + struct starpu_conf conf; + starpu_arbiter_t arbiter, arbiter2; + ret = starpu_conf_init(&conf); + STARPU_ASSERT(ret == 0); + //conf.ncpus = 1;//// 4 + ret = starpu_init(&conf); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_ASSERT(ret == 0); + + FPRINTF(stdout, "Max Thread %u\n", starpu_worker_get_count()); + + ////////////////////////////////////////////////////// + + starpu_codelet normalCodelete; + { + memset(&normalCodelete, 0, sizeof(normalCodelete)); + normalCodelete.where = STARPU_CPU; + normalCodelete.cpu_funcs[0] = callback; + normalCodelete.nbuffers = 2; + normalCodelete.modes[0] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE); + normalCodelete.modes[1] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE); + normalCodelete.name = "normalCodelete"; + } + starpu_codelet slowCodelete; + { + memset(&slowCodelete, 0, sizeof(slowCodelete)); + slowCodelete.where = STARPU_CPU; + slowCodelete.cpu_funcs[0] = callback_slow; + slowCodelete.nbuffers = 1; + slowCodelete.modes[0] = starpu_data_access_mode (STARPU_RW|STARPU_COMMUTE); + slowCodelete.name = "slowCodelete"; + } + + ////////////////////////////////////////////////////// + ////////////////////////////////////////////////////// + + ///const int nbA = 3; + const int nbA = 10; + FPRINTF(stdout, "Nb A = %d\n", nbA); + + std::vector handleA(nbA); + std::vector dataA(nbA); + arbiter = starpu_arbiter_create(); + arbiter2 = starpu_arbiter_create(); + for(int idx = 0 ; idx < nbA ; ++idx) + { + dataA[idx] = idx; + } + for(int idxHandle = 0 ; idxHandle < nbA ; ++idxHandle) + { + starpu_variable_data_register(&handleA[idxHandle], 0, (uintptr_t)&dataA[idxHandle], sizeof(dataA[idxHandle])); + starpu_data_assign_arbiter(handleA[idxHandle], arbiter); + } + + ////////////////////////////////////////////////////// + ////////////////////////////////////////////////////// + FPRINTF(stdout,"Submit tasks\n"); + + for(int idxHandleA1 = 0 ; idxHandleA1 < nbA ; ++idxHandleA1) + { + ret = starpu_task_insert(&slowCodelete, + (STARPU_RW|STARPU_COMMUTE), handleA[idxHandleA1], + 0); + if (ret == -ENODEV) goto out; + for(int idxHandleA2 = 0 ; idxHandleA2 < nbA ; ++idxHandleA2) + { + if(idxHandleA1 != idxHandleA2) + { + ret = starpu_task_insert(&normalCodelete, + (STARPU_RW|STARPU_COMMUTE), handleA[idxHandleA1], + (STARPU_RW|STARPU_COMMUTE), handleA[idxHandleA2], + 0); + if (ret == -ENODEV) goto out; + } + } + } + + ////////////////////////////////////////////////////// + FPRINTF(stdout,"Wait task\n"); + + starpu_task_wait_for_all(); + + ////////////////////////////////////////////////////// + FPRINTF(stdout,"Release data\n"); + + for(int idxHandle = 0 ; idxHandle < nbA ; ++idxHandle) + { + starpu_data_unregister(handleA[idxHandle]); + } + + ////////////////////////////////////////////////////// + FPRINTF(stdout,"Proceed gain, with several arbiters\n"); + + for(int idxHandle = 0 ; idxHandle < nbA ; ++idxHandle) + { + starpu_variable_data_register(&handleA[idxHandle], 0, (uintptr_t)&dataA[idxHandle], sizeof(dataA[idxHandle])); + starpu_data_assign_arbiter(handleA[idxHandle], (idxHandle%2)?arbiter:arbiter2); + } + + ////////////////////////////////////////////////////// + ////////////////////////////////////////////////////// + FPRINTF(stdout,"Submit tasks\n"); + + for(int idxHandleA1 = 0 ; idxHandleA1 < nbA ; ++idxHandleA1) + { + ret = starpu_task_insert(&slowCodelete, + (STARPU_RW|STARPU_COMMUTE), handleA[idxHandleA1], + 0); + if (ret == -ENODEV) goto out; + for(int idxHandleA2 = 0 ; idxHandleA2 < nbA ; ++idxHandleA2) + { + if(idxHandleA1 != idxHandleA2) + { + ret = starpu_task_insert(&normalCodelete, + (STARPU_RW|STARPU_COMMUTE), handleA[idxHandleA1], + (STARPU_RW|STARPU_COMMUTE), handleA[idxHandleA2], + 0); + if (ret == -ENODEV) goto out; + } + } + } + + ////////////////////////////////////////////////////// + FPRINTF(stdout,"Wait task\n"); + +out: + starpu_task_wait_for_all(); + + ////////////////////////////////////////////////////// + FPRINTF(stdout,"Release data\n"); + + for(int idxHandle = 0 ; idxHandle < nbA ; ++idxHandle) + { + starpu_data_unregister(handleA[idxHandle]); + } + starpu_arbiter_destroy(arbiter); + starpu_arbiter_destroy(arbiter2); + + ////////////////////////////////////////////////////// + FPRINTF(stdout,"Shutdown\n"); + + starpu_shutdown(); + + return 0; +} diff --git a/tests/datawizard/unpartition.c b/tests/datawizard/unpartition.c new file mode 100644 index 0000000..ce3049f --- /dev/null +++ b/tests/datawizard/unpartition.c @@ -0,0 +1,127 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2010-2010 Mehdi Juhoor + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include +#include "../helper.h" + +/* + * Test running a task on a partitioned data, then on the unpartitioned + * data, etc. in a loop + */ + +#ifdef STARPU_QUICK_CHECK +#define NITER 100 +#else +#define NITER 1000 +#endif +#define VECTORSIZE 1024 + +float *buffer; + +starpu_data_handle_t v_handle; + +static struct starpu_task* create_task(starpu_data_handle_t handle) +{ + starpu_codelet_nop.nbuffers = 1; + starpu_codelet_nop.modes[0] = STARPU_RW; + + struct starpu_task *task; + + task = starpu_task_create(); + task->cl = &starpu_codelet_nop; + task->handles[0] = handle; + task->detach = 0; + + return task; +} + +int main(int argc, char **argv) +{ + int ret; + + ret = starpu_initialize(NULL, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + ret = starpu_malloc((void **)&buffer, VECTORSIZE); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_malloc"); + + starpu_vector_data_register(&v_handle, STARPU_MAIN_RAM, (uintptr_t)buffer, VECTORSIZE, sizeof(char)); + + struct starpu_data_filter f = + { + .filter_func = starpu_vector_filter_divide_in_2, + /* there are only 2 children */ + .nchildren = 2, + /* the length of the first part */ + .filter_arg = VECTORSIZE/2 + }; + + unsigned iter; + for (iter = 0; iter < NITER; iter++) + { + struct starpu_task *tasks[3]; + + starpu_data_map_filters(v_handle, 1, &f); + + tasks[0] = create_task(starpu_data_get_sub_data(v_handle, 1, 0)); + ret = starpu_task_submit(tasks[0]); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + tasks[1] = create_task(starpu_data_get_sub_data(v_handle, 1, 1)); + ret = starpu_task_submit(tasks[1]); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + ret = starpu_task_wait_for_all(); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); + + starpu_data_unpartition(v_handle, STARPU_MAIN_RAM); + + tasks[2] = create_task(v_handle); + ret = starpu_task_submit(tasks[2]); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + ret = starpu_task_wait_for_all(); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); + + starpu_task_destroy(tasks[0]); + starpu_task_destroy(tasks[1]); + starpu_task_destroy(tasks[2]); + } + + starpu_data_unregister(v_handle); + starpu_free_noflag(buffer, VECTORSIZE); + starpu_shutdown(); + + return EXIT_SUCCESS; + +enodev: + starpu_data_unregister(v_handle); + starpu_free_noflag(buffer, VECTORSIZE); + starpu_shutdown(); + fprintf(stderr, "WARNING: No one can execute this task\n"); + /* yes, we do not perform the computation but we did detect that no one + * could perform the kernel, so this is not an error from StarPU */ + return STARPU_TEST_SKIPPED; +} diff --git a/tests/datawizard/user_interaction_implicit.c b/tests/datawizard/user_interaction_implicit.c new file mode 100644 index 0000000..ba5ab61 --- /dev/null +++ b/tests/datawizard/user_interaction_implicit.c @@ -0,0 +1,101 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include +#include "../helper.h" + +/* + * Test modifying the data in the callback of starpu_data_acquire_cb + */ + +#ifdef STARPU_QUICK_CHECK +# define NBUFFERS 4 +# define NITER 16 +#else +# define NBUFFERS 16 +# define NITER 128 +#endif + +struct data +{ + unsigned index; + unsigned val; + starpu_data_handle_t handle; +}; + +struct data buffers[NBUFFERS]; + +static +void callback_sync_data(void *arg) +{ + struct data *data = (struct data *) arg; + + data->val++; + + starpu_data_release(data->handle); +} + +int main(int argc, char **argv) +{ + int ret; + + ret = starpu_initialize(NULL, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + unsigned b; + for (b = 0; b < NBUFFERS; b++) + { + buffers[b].index = b; + starpu_variable_data_register(&buffers[b].handle, STARPU_MAIN_RAM, (uintptr_t)&buffers[b].val, sizeof(unsigned)); + } + + unsigned iter; + for (iter = 0; iter < NITER; iter++) + { + for (b = 0; b < NBUFFERS; b++) + { + ret = starpu_data_acquire_cb(buffers[b].handle, STARPU_RW, + callback_sync_data, &buffers[b]); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_acquire_cb"); + } + } + + ret = starpu_task_wait_for_all(); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); + + /* do some cleanup */ + ret = EXIT_SUCCESS; + for (b = 0; b < NBUFFERS; b++) + { + starpu_data_unregister(buffers[b].handle); + + /* check result */ + if (buffers[b].val != NITER) + { + FPRINTF(stderr, "buffer[%u] = %u should be %d\n", b, buffers[b].val, NITER); + ret = EXIT_FAILURE; + } + } + + starpu_shutdown(); + + return ret; +} diff --git a/tests/datawizard/variable_parameters.c b/tests/datawizard/variable_parameters.c new file mode 100644 index 0000000..6e8a4e2 --- /dev/null +++ b/tests/datawizard/variable_parameters.c @@ -0,0 +1,244 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../helper.h" + +/* + * Test the variable interface + */ + +static starpu_data_handle_t handle1, handle2, handle3, handle4; + +/* + * Increment codelet + */ + +#ifdef STARPU_USE_OPENCL +/* dummy OpenCL implementation */ +static void increment_opencl_kernel(void *descr[], void *cl_arg) +{ + (void)cl_arg; + int num = starpu_task_get_current()->nbuffers; + int i; + + for (i = 0; i < num; i++) + { + cl_mem d_token = (cl_mem)STARPU_VARIABLE_GET_PTR(descr[i]); + unsigned h_token; + + cl_int err; + cl_command_queue queue; + starpu_opencl_get_current_queue(&queue); + + err = clEnqueueReadBuffer(queue, d_token, CL_TRUE, 0, sizeof(unsigned), (void *)&h_token, 0, NULL, NULL); + if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err); + h_token++; + err = clEnqueueWriteBuffer(queue, d_token, CL_TRUE, 0, sizeof(unsigned), (void *)&h_token, 0, NULL, NULL); + if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err); + clFinish(queue); + } +} +#endif + +#ifdef STARPU_USE_CUDA +static void increment_cuda_kernel(void *descr[], void *cl_arg) +{ + (void)cl_arg; + int num = starpu_task_get_current()->nbuffers; + int i; + + for (i = 0; i < num; i++) + { + unsigned *tokenptr = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[i]); + unsigned host_token; + + /* This is a dummy technique of course */ + cudaMemcpyAsync(&host_token, tokenptr, sizeof(unsigned), cudaMemcpyDeviceToHost, starpu_cuda_get_local_stream()); + cudaStreamSynchronize(starpu_cuda_get_local_stream()); + + host_token++; + + cudaMemcpyAsync(tokenptr, &host_token, sizeof(unsigned), cudaMemcpyHostToDevice, starpu_cuda_get_local_stream()); + } + cudaStreamSynchronize(starpu_cuda_get_local_stream()); +} +#endif + +void increment_cpu_kernel(void *descr[], void *cl_arg) +{ + (void)cl_arg; + int num = starpu_task_get_current()->nbuffers; + int i; + + for (i = 0; i < num; i++) + { + unsigned *tokenptr = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[i]); + *tokenptr = *tokenptr + 1; + } +} + +static struct starpu_codelet increment_cl = +{ +#ifdef STARPU_USE_CUDA + .cuda_funcs = {increment_cuda_kernel}, +#endif +#ifdef STARPU_USE_OPENCL + .opencl_funcs = {increment_opencl_kernel}, +#endif + .cpu_funcs = {increment_cpu_kernel}, + + /* starpu_task_get_current() doesn't work on MPI Master Slave */ + /*.cpu_funcs_name = {"increment_cpu_kernel"},*/ + .nbuffers = STARPU_VARIABLE_NBUFFERS, +}; + +int main(void) +{ + unsigned *pvar = NULL; + int ret; + unsigned var1 = 0, var2 = 0, var3 = 0, var4 = 0; + + ret = starpu_init(NULL); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + starpu_variable_data_register(&handle1, STARPU_MAIN_RAM, (uintptr_t)&var1, sizeof(unsigned)); + starpu_variable_data_register(&handle2, STARPU_MAIN_RAM, (uintptr_t)&var2, sizeof(unsigned)); + starpu_variable_data_register(&handle3, STARPU_MAIN_RAM, (uintptr_t)&var3, sizeof(unsigned)); + starpu_variable_data_register(&handle4, STARPU_MAIN_RAM, (uintptr_t)&var4, sizeof(unsigned)); + +#ifdef STARPU_QUICK_CHECK + unsigned nloops = 4; +#else + unsigned nloops = 16; +#endif + + unsigned loop; + unsigned t; + + for (loop = 0; loop < nloops; loop++) + { + for (t = 0; t <= 4; t++) + { + struct starpu_task *task = starpu_task_create(); + unsigned i; + + task->cl = &increment_cl; + task->handles[0] = handle1; + task->handles[1] = handle2; + task->handles[2] = handle3; + task->handles[3] = handle4; + for (i = 0; i < t; i++) + task->modes[i] = STARPU_RW; + task->nbuffers = t; + + ret = starpu_task_submit(task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + ret = starpu_task_insert(&increment_cl, + STARPU_RW, handle1, + 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + ret = starpu_task_insert(&increment_cl, + STARPU_RW, handle1, + STARPU_RW, handle2, + 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + ret = starpu_task_insert(&increment_cl, + STARPU_RW, handle1, + STARPU_RW, handle2, + STARPU_RW, handle3, + 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + ret = starpu_task_insert(&increment_cl, + STARPU_RW, handle1, + STARPU_RW, handle2, + STARPU_RW, handle3, + STARPU_RW, handle4, + 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + } + + ret = starpu_data_acquire(handle1, STARPU_R); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_acquire"); + if (var1 != 8*nloops) + { + FPRINTF(stderr, "[end of loop] Value %u != Expected value %u\n", var1, 8*nloops); + starpu_data_release(handle1); + goto err; + } + starpu_data_release(handle1); + + ret = starpu_data_acquire(handle2, STARPU_R); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_acquire"); + if (var2 != 6*nloops) + { + FPRINTF(stderr, "[end of loop] Value %u != Expected value %u\n", var2, 6*nloops); + starpu_data_release(handle2); + goto err; + } + starpu_data_release(handle2); + + ret = starpu_data_acquire(handle3, STARPU_R); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_acquire"); + if (var3 != 4*nloops) + { + FPRINTF(stderr, "[end of loop] Value %u != Expected value %u\n", var3, 4*nloops); + starpu_data_release(handle3); + goto err; + } + starpu_data_release(handle3); + + ret = starpu_data_acquire(handle4, STARPU_R); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_acquire"); + if (var4 != 2*nloops) + { + FPRINTF(stderr, "[end of loop] Value %u != Expected value %u\n", var4, 2*nloops); + starpu_data_release(handle4); + goto err; + } + starpu_data_release(handle4); + + starpu_data_unregister(handle1); + starpu_data_unregister(handle2); + starpu_data_unregister(handle3); + starpu_data_unregister(handle4); + starpu_shutdown(); + + return EXIT_SUCCESS; + +enodev: + starpu_data_unregister(handle1); + starpu_data_unregister(handle2); + starpu_data_unregister(handle3); + starpu_data_unregister(handle4); + fprintf(stderr, "WARNING: No one can execute this task\n"); + /* yes, we do not perform the computation but we did detect that no one + * could perform the kernel, so this is not an error from StarPU */ + starpu_shutdown(); + return STARPU_TEST_SKIPPED; + +err: + starpu_data_unregister(handle1); + starpu_data_unregister(handle2); + starpu_data_unregister(handle3); + starpu_data_unregister(handle4); + starpu_shutdown(); + return EXIT_FAILURE; +} diff --git a/tests/datawizard/variable_size.c b/tests/datawizard/variable_size.c new file mode 100644 index 0000000..9e6e862 --- /dev/null +++ b/tests/datawizard/variable_size.c @@ -0,0 +1,372 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2017-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../helper.h" + +/* + * This is a dumb test for variable size + * We defined a dumb interface for data whose size increase over kernel execution + */ + +#ifdef STARPU_HAVE_MEMCHECK_H +#include +#else +#define VALGRIND_MAKE_MEM_DEFINED_IF_ADDRESSABLE(addr, size) (void)0 +#endif + +#include + +#define FULLSIZE (5*1024*1024ULL) +#define INCREASE 0.80 +#ifdef STARPU_QUICK_CHECK +#define N 5 +#define LIMIT "60" +#else +#define N 10 +#define LIMIT "250" +#endif + +/* Define the interface */ + +#if !defined(STARPU_HAVE_SETENV) +#warning setenv is not defined. Skipping test +int main(int argc, char **argv) +{ + return STARPU_TEST_SKIPPED; +} +#elif STARPU_MAXNODES == 1 +/* Cannot register a disk */ +int main(int argc, char **argv) +{ + return STARPU_TEST_SKIPPED; +} +#else + +/* Sample Data interface with variable size */ +struct variable_size_interface +{ + enum starpu_data_interface_id id; + + /* Just a buffer of a given size */ + uintptr_t ptr; + size_t size; + + /* Coordinates of the represented object, just for modeling growth */ + unsigned x, y; +}; + +static struct starpu_data_interface_ops starpu_interface_variable_size_ops; + +static void register_variable_size(starpu_data_handle_t handle, int home_node, void *data_interface) +{ + struct variable_size_interface *variable_size_interface = data_interface; + int node; + for (node = 0; node < STARPU_MAXNODES; node++) + { + struct variable_size_interface *local_interface = + starpu_data_get_interface_on_node(handle, node); + + if (node == home_node) + local_interface->ptr = variable_size_interface->ptr; + local_interface->size = variable_size_interface->size; + + local_interface->id = variable_size_interface->id; + local_interface->x = variable_size_interface->x; + local_interface->y = variable_size_interface->y; + } +} + +void variable_size_data_register(starpu_data_handle_t *handleptr, unsigned x, unsigned y) +{ + struct variable_size_interface vsinterface = + { + .id = starpu_interface_variable_size_ops.interfaceid, + .x = x, + .y = y, + }; + + /* Simulate that tiles close to the diagonal are more dense */ + vsinterface.size = FULLSIZE * (starpu_lrand48() % 1024 + 1024) / 2048. * (N-sqrt(abs((int)x-(int)y)*N)) / N; + /* Round to page size */ + vsinterface.size -= vsinterface.size & (65536-1); + + _starpu_simgrid_data_new(vsinterface.size); + + starpu_data_register(handleptr, -1, &vsinterface, &starpu_interface_variable_size_ops); +} + +static size_t variable_size_get_size(starpu_data_handle_t handle) +{ + struct variable_size_interface *vsinterface = starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + return vsinterface->size; +} + +static size_t variable_size_get_max_size(starpu_data_handle_t handle) +{ + (void)handle; + return FULLSIZE; +} + +static uint32_t variable_size_footprint(starpu_data_handle_t handle) +{ + return starpu_hash_crc32c_be(variable_size_get_size(handle), 0); +} + +static int variable_size_compare(void *data_interface_a, void *data_interface_b) +{ + struct variable_size_interface *variable_a = data_interface_a; + struct variable_size_interface *variable_b = data_interface_b; + + /* Two variables are considered compatible if they have the same size */ + return variable_a->size == variable_b->size; +} + +static void display_variable_size(starpu_data_handle_t handle, FILE *f) +{ + struct variable_size_interface *variable_interface = + starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + + fprintf(f, "%lu\t", (unsigned long) variable_interface->size); +} + +static starpu_ssize_t describe_variable_size(void *data_interface, char *buf, size_t size) +{ + struct variable_size_interface *variable_interface = data_interface; + return snprintf(buf, size, "vv%lu\t", (unsigned long) variable_interface->size); +} + +/* returns the size of the allocated area */ +static starpu_ssize_t allocate_variable_size_on_node(void *data_interface, unsigned dst_node) +{ + struct variable_size_interface *variable_interface = data_interface; + variable_interface->ptr = starpu_malloc_on_node_flags(dst_node, variable_interface->size, STARPU_MALLOC_PINNED | STARPU_MALLOC_COUNT | STARPU_MEMORY_OVERFLOW); + if (dst_node == STARPU_MAIN_RAM) + _starpu_simgrid_data_alloc(variable_interface->size); + STARPU_ASSERT(variable_interface->ptr); + return 0; +} + +static void free_variable_size_on_node(void *data_interface, unsigned node) +{ + struct variable_size_interface *variable_interface = data_interface; + starpu_free_on_node(node, variable_interface->ptr, variable_interface->size); + if (node == STARPU_MAIN_RAM) + _starpu_simgrid_data_free(variable_interface->size); + variable_interface->ptr = 0; +} + +static int variable_size_copy(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *async_data) +{ + struct variable_size_interface *src = src_interface; + struct variable_size_interface *dst = dst_interface; + + if (src->size != dst->size) + { + /* size has been changed by the application in the meantime */ + starpu_free_on_node(dst_node, dst->ptr, dst->size); + dst->ptr = starpu_malloc_on_node_flags(dst_node, src->size, STARPU_MALLOC_PINNED | STARPU_MALLOC_COUNT | STARPU_MEMORY_OVERFLOW); + dst->size = src->size; + } + + return starpu_interface_copy(src->ptr, 0, src_node, + dst->ptr, 0, dst_node, + src->size, async_data); +} + +static const struct starpu_data_copy_methods variable_size_copy_data_methods = +{ + .any_to_any = variable_size_copy, +}; + +static struct starpu_data_interface_ops starpu_interface_variable_size_ops = +{ + .register_data_handle = register_variable_size, + .allocate_data_on_node = allocate_variable_size_on_node, + .free_data_on_node = free_variable_size_on_node, + .copy_methods = &variable_size_copy_data_methods, + .get_size = variable_size_get_size, + .get_max_size = variable_size_get_max_size, + .footprint = variable_size_footprint, + .compare = variable_size_compare, + .interfaceid = STARPU_UNKNOWN_INTERFACE_ID, + .interface_size = sizeof(struct variable_size_interface), + .display = display_variable_size, + .pack_data = NULL, + .peek_data = NULL, + .unpack_data = NULL, + .describe = describe_variable_size, + + /* We want to observe actual allocations/deallocations */ + .dontcache = 1, +}; + + +static void kernel(void *descr[], void *cl_arg) +{ + struct variable_size_interface *variable_interface = descr[0]; + unsigned workerid = starpu_worker_get_id_check(); + uintptr_t old = variable_interface->ptr; + unsigned dst_node = starpu_worker_get_memory_node(workerid); + + (void) cl_arg; + + /* Simulate that tiles close to the diagonal fill up faster */ + size_t increase = (FULLSIZE - variable_interface->size) * (starpu_lrand48() % 1024 + 1024) / 2048. * INCREASE; + /* Round to page size */ + increase -= increase & (65536-1); + + /* Allocation increase */ + variable_interface->ptr = starpu_malloc_on_node_flags(dst_node, variable_interface->size + increase, STARPU_MALLOC_PINNED | STARPU_MALLOC_COUNT | STARPU_MEMORY_OVERFLOW); + VALGRIND_MAKE_MEM_DEFINED_IF_ADDRESSABLE((void*) variable_interface->ptr, variable_interface->size + increase); + STARPU_ASSERT(variable_interface->ptr); + /* fprintf(stderr,"increase from %lu by %lu\n", variable_interface->size, increase); */ + starpu_free_on_node_flags(dst_node, old, variable_interface->size, STARPU_MALLOC_PINNED | STARPU_MALLOC_COUNT | STARPU_MEMORY_OVERFLOW); + variable_interface->size += increase; + + /* These are only simulation bits */ + if (increase) + _starpu_simgrid_data_increase(increase); + starpu_sleep(0.010); +} + +static double cost_function(struct starpu_task *t, struct starpu_perfmodel_arch *a, unsigned i) +{ + (void)t; (void)a; (void)i; + return 10000; +} + +static struct starpu_perfmodel perf_model = +{ + .type = STARPU_PER_ARCH, + .arch_cost_function = cost_function, +}; + +static struct starpu_codelet cl = +{ + .cpu_funcs = {kernel}, + + /* dynamic size doesn't work on MIC */ + /*.cpu_funcs_name = {"kernel"},*/ + .nbuffers = 1, + .modes = {STARPU_RW}, + .model = &perf_model, + .flags = STARPU_CODELET_SIMGRID_EXECUTE, +}; + +static void init(void *descr[], void *cl_arg) +{ + (void)cl_arg; + struct variable_size_interface *variable_interface = descr[0]; + VALGRIND_MAKE_MEM_DEFINED_IF_ADDRESSABLE((void*) variable_interface->ptr, variable_interface->size); +} + +static struct starpu_codelet cl_init = +{ + .cpu_funcs = {init}, + + /* dynamic size doesn't work on MIC */ + /*.cpu_funcs_name = {"kernel"},*/ + .nbuffers = 1, + .modes = {STARPU_W}, + .model = &starpu_perfmodel_nop, +}; + +int main(void) +{ + int ret; + int i; + int x, y; + starpu_data_handle_t handles[N][N]; + char s[128]; + + snprintf(s, sizeof(s), "/tmp/%s-variable_size", getenv("USER")); + + setenv("STARPU_CALIBRATE_MINIMUM", "1", 1); + setenv("STARPU_LIMIT_CPU_MEM", LIMIT, 1); + setenv("STARPU_DISK_SWAP", s, 0); + setenv("STARPU_DISK_SWAP_SIZE", "100000", 1); +#if 0 //def STARPU_LINUX_SYS + setenv("STARPU_DISK_SWAP_BACKEND", "unistd_o_direct", 0); +#else + setenv("STARPU_DISK_SWAP_BACKEND", "unistd", 0); +#endif + + struct starpu_conf conf; + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + conf.ncpus = -1; + + ret = starpu_init(&conf); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + for (x = 0; x < N; x++) + for (y = 0; y < N; y++) + { + variable_size_data_register(&handles[x][y], x, y); + + ret = starpu_task_insert(&cl_init, STARPU_W, handles[x][y], 0); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); +#ifdef STARPU_SIMGRID + starpu_sleep(0.0005); +#endif + } + + starpu_task_wait_for_all(); + + /* Cholesky-like accesses */ + for (i = 0; i < N; i++) + for (x = i; x < N; x++) + for (y = x; y < N; y++) + { + ret = starpu_task_insert(&cl, STARPU_RW, handles[x][y], STARPU_PRIORITY, (2*N-x-y), 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + } + + starpu_task_wait_for_all(); + +#if 0 + /* Look at the values */ + for (x = 0; x < N; x++) + for (y = 0; y < N; y++) + { + starpu_data_acquire(handles[x][y], STARPU_R); + starpu_data_release(handles[x][y]); + } +#endif + + for (x = 0; x < N; x++) + for (y = 0; y < N; y++) + starpu_data_unregister(handles[x][y]); + + starpu_shutdown(); + + return EXIT_SUCCESS; + +enodev: + for (x = 0; x < N; x++) + for (y = 0; y < N; y++) + starpu_data_unregister(handles[x][y]); + + fprintf(stderr, "WARNING: No one can execute this task\n"); + /* yes, we do not perform the computation but we did detect that no one + * could perform the kernel, so this is not an error from StarPU */ + starpu_shutdown(); + return STARPU_TEST_SKIPPED; +} +#endif diff --git a/tests/datawizard/write_only_tmp_buffer.c b/tests/datawizard/write_only_tmp_buffer.c new file mode 100644 index 0000000..49631e1 --- /dev/null +++ b/tests/datawizard/write_only_tmp_buffer.c @@ -0,0 +1,157 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include +#include "../helper.h" + +/* + * Test initializing a buffer with a task, then printing it with another task + */ + +#define VECTORSIZE 1024 + +starpu_data_handle_t v_handle; + +#ifdef STARPU_USE_OPENCL +static void opencl_codelet_null(void *descr[], void *arg) +{ + (void)arg; + STARPU_SKIP_IF_VALGRIND; + + cl_mem buf = (cl_mem)STARPU_VECTOR_GET_DEV_HANDLE(descr[0]); + char ptr = 42; + cl_command_queue queue; + cl_int err; + int id = starpu_worker_get_id_check(); + int devid = starpu_worker_get_devid(id); + + starpu_opencl_get_queue(devid, &queue); + err = clEnqueueWriteBuffer(queue, buf, CL_TRUE, 0, sizeof(char), &ptr, 0, NULL, NULL); + if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err); +} +#endif + +#ifdef STARPU_USE_CUDA +static void cuda_codelet_null(void *descr[], void *arg) +{ + (void)arg; + STARPU_SKIP_IF_VALGRIND; + + char *buf = (char *)STARPU_VECTOR_GET_PTR(descr[0]); + + cudaMemsetAsync(buf, 42, 1, starpu_cuda_get_local_stream()); +} +#endif + +void cpu_codelet_null(void *descr[], void *arg) +{ + (void)arg; + char *buf = (char *)STARPU_VECTOR_GET_PTR(descr[0]); + + *buf = 42; +} + +void display_var(void *descr[], void *arg) +{ + (void)arg; + STARPU_SKIP_IF_VALGRIND; + + char *buf = (char *)STARPU_VECTOR_GET_PTR(descr[0]); + if (*buf != 42) + { + FPRINTF(stderr, "Value = <%c> (should be <%c>)\n", *buf, 42); + exit(-1); + } +} + +static struct starpu_codelet cl = +{ + .cpu_funcs = {cpu_codelet_null}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {cuda_codelet_null}, + .cuda_flags = {STARPU_CUDA_ASYNC}, +#endif +#ifdef STARPU_USE_OPENCL + .opencl_funcs = {opencl_codelet_null}, + .opencl_flags = {STARPU_OPENCL_ASYNC}, +#endif + .cpu_funcs_name = {"cpu_codelet_null"}, + .nbuffers = 1, + .modes = {STARPU_W} +}; + +static struct starpu_codelet display_cl = +{ + .cpu_funcs = {display_var}, + .cpu_funcs_name = {"display_var"}, + .nbuffers = 1, + .modes = {STARPU_R} +}; + + +int main(int argc, char **argv) +{ + int ret; + + ret = starpu_initialize(NULL, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + /* The buffer should never be explicitly allocated */ + starpu_vector_data_register(&v_handle, (uint32_t)-1, (uintptr_t)NULL, VECTORSIZE, sizeof(char)); + + struct starpu_task *task = starpu_task_create(); + task->cl = &cl; + task->handles[0] = v_handle; + task->detach = 0; + + ret = starpu_task_submit(task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + ret = starpu_task_wait(task); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait"); + + task = starpu_task_create(); + task->cl = &display_cl; + task->handles[0] = v_handle; + task->detach = 0; + + ret = starpu_task_submit(task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + ret = starpu_task_wait(task); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait"); + + /* this should get rid of automatically allocated buffers */ + starpu_data_unregister(v_handle); + starpu_shutdown(); + + return EXIT_SUCCESS; + +enodev: + starpu_data_unregister(v_handle); + starpu_shutdown(); + fprintf(stderr, "WARNING: No one can execute this task\n"); + /* yes, we do not perform the computation but we did detect that no one + * could perform the kernel, so this is not an error from StarPU */ + return STARPU_TEST_SKIPPED; +} diff --git a/tests/datawizard/wt_broadcast.c b/tests/datawizard/wt_broadcast.c new file mode 100644 index 0000000..422cc08 --- /dev/null +++ b/tests/datawizard/wt_broadcast.c @@ -0,0 +1,103 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../helper.h" +#include "../variable/increment.h" + +/* + * Test using starpu_data_set_wt_mask(handle, ~0);, i.e. broadcasting the + * result on all devices as soon as it is available. + */ + +static unsigned var = 0; +static starpu_data_handle_t handle; + +int main(void) +{ + int ret; + + struct starpu_conf conf; + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + + conf.ncpus = -1; + conf.ncuda = -1; + conf.nopencl = -1; + conf.nmpi_ms = -1; + conf.ntcpip_ms = -1; + + ret = starpu_init(&conf); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + increment_load_opencl(); + + starpu_variable_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)&var, sizeof(unsigned)); + + /* Create a mask with all the memory nodes, so that we can ask StarPU + * to broadcast the handle whenever it is modified. */ + starpu_data_set_wt_mask(handle, ~0); + +#ifdef STARPU_QUICK_CHECK + unsigned ntasks = 32; + unsigned nloops = 4; +#else + unsigned ntasks = 1024; + unsigned nloops = 16; +#endif + + unsigned loop; + unsigned t; + + for (loop = 0; loop < nloops; loop++) + { + for (t = 0; t < ntasks; t++) + { + struct starpu_task *task = starpu_task_create(); + + task->cl = &increment_cl; + task->handles[0] = handle; + + ret = starpu_task_submit(task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + } + + starpu_data_unregister(handle); + + ret = EXIT_SUCCESS; + if (var != ntasks*nloops) + { + FPRINTF(stderr, "VAR is %u should be %u\n", var, ntasks); + ret = EXIT_FAILURE; + } + + increment_unload_opencl(); + + starpu_shutdown(); + + return ret; + +enodev: + starpu_data_unregister(handle); + fprintf(stderr, "WARNING: No one can execute this task\n"); + /* yes, we do not perform the computation but we did detect that no one + * could perform the kernel, so this is not an error from StarPU */ + starpu_shutdown(); + return STARPU_TEST_SKIPPED; +} diff --git a/tests/datawizard/wt_host.c b/tests/datawizard/wt_host.c new file mode 100644 index 0000000..eefd6b0 --- /dev/null +++ b/tests/datawizard/wt_host.c @@ -0,0 +1,92 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../helper.h" +#include "../variable/increment.h" + +/* + * Test writing back the result into main memory as soon as it is available + */ + +static unsigned var = 0; +static starpu_data_handle_t handle; + +int main(void) +{ + int ret; + + ret = starpu_init(NULL); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + increment_load_opencl(); + + starpu_variable_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)&var, sizeof(unsigned)); + + /* Copy the handle in main memory every time it is modified */ + uint32_t wt_mask = (1<cl = &increment_cl; + task->handles[0] = handle; + + ret = starpu_task_submit(task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + } + + starpu_data_unregister(handle); + + ret = EXIT_SUCCESS; + if (var != ntasks*nloops) + { + ret = EXIT_FAILURE; + FPRINTF(stderr, "VAR is %u should be %u\n", var, ntasks); + } + + increment_unload_opencl(); + + starpu_shutdown(); + + STARPU_RETURN(ret); + +enodev: + starpu_data_unregister(handle); + fprintf(stderr, "WARNING: No one can execute this task\n"); + /* yes, we do not perform the computation but we did detect that no one + * could perform the kernel, so this is not an error from StarPU */ + starpu_shutdown(); + return STARPU_TEST_SKIPPED; +} diff --git a/tests/disk/disk_compute.c b/tests/disk/disk_compute.c new file mode 100644 index 0000000..199d50b --- /dev/null +++ b/tests/disk/disk_compute.c @@ -0,0 +1,457 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Corentin Salingue + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "../helper.h" + +#ifdef STARPU_HAVE_HDF5 +#include +#endif + +#if STARPU_MAXNODES == 1 +/* Cannot register a disk */ +int main(int argc, char **argv) +{ + return STARPU_TEST_SKIPPED; +} +#else +/* + * Try to write into disk memory + * Use mechanism to push data from main ram to disk ram + * Here we just simulate performing a dumb computation C=A+0, i.e. a mere copy + * actually + */ + +#define NX (16*1024) + +int dotest(struct starpu_disk_ops *ops, char *base) +{ + int *A, *C; + + /* Initialize StarPU with default configuration */ + /* Initialize StarPU without GPU devices to make sure the memory of the GPU devices will not be used */ + // Ignore environment variables as we want to force the exact number of workers + struct starpu_conf conf; + int ret = starpu_conf_init(&conf); + if (ret == -EINVAL) + return EXIT_FAILURE; + conf.precedence_over_environment_variables = 1; + starpu_conf_noworker(&conf); + conf.ncpus = 1; + conf.nmpi_ms = 0; + conf.ntcpip_ms = 0; + ret = starpu_init(&conf); + if (ret == -ENODEV) goto enodev; + + /* Initialize path and name */ + const char *name_file_start = "STARPU_DISK_COMPUTE_DATA_"; + const char *name_file_end = "STARPU_DISK_COMPUTE_DATA_RESULT_"; + + char * path_file_start = malloc(strlen(base) + 1 + strlen(name_file_start) + 1); + strcpy(path_file_start, base); + strcat(path_file_start, "/"); + strcat(path_file_start, name_file_start); + + char * path_file_end = malloc(strlen(base) + 1 + strlen(name_file_end) + 1); + strcpy(path_file_end, base); + strcat(path_file_end, "/"); + strcat(path_file_end, name_file_end); + + /* register a disk */ + int new_dd = starpu_disk_register(ops, (void *) base, STARPU_DISK_SIZE_MIN); + /* can't write on /tmp/ */ + if (new_dd == -ENOENT) goto enoent; + + unsigned dd = (unsigned) new_dd; + + /* allocate two memory spaces */ + starpu_malloc_flags((void **)&A, NX*sizeof(int), STARPU_MALLOC_COUNT); + starpu_malloc_flags((void **)&C, NX*sizeof(int), STARPU_MALLOC_COUNT); + + FPRINTF(stderr, "TEST DISK MEMORY \n"); + + unsigned int j; + /* you register them in a vector */ + for(j = 0; j < NX; ++j) + { + A[j] = j; + C[j] = 0; + } + + /* you create a file to store the vector ON the disk */ + FILE * f = fopen(path_file_start, "wb+"); + if (f == NULL) + goto enoent2; + + /* store it in the file */ + fwrite(A, sizeof(int), NX, f); + + /* close the file */ + fclose(f); + + int descriptor = open(path_file_start, O_RDWR); + if (descriptor < 0) + goto enoent2; +#ifdef STARPU_HAVE_WINDOWS + _commit(descriptor); +#else + fsync(descriptor); +#endif + close(descriptor); + + /* create a file to store result */ + f = fopen(path_file_end, "wb+"); + if (f == NULL) + goto enoent2; + + /* replace all data by 0 */ + fwrite(C, sizeof(int), NX, f); + + /* close the file */ + fclose(f); + + descriptor = open(path_file_end, O_RDWR); +#ifdef STARPU_HAVE_WINDOWS + _commit(descriptor); +#else + fsync(descriptor); +#endif + close(descriptor); + + /* And now, you want to use your data in StarPU */ + /* Open the file ON the disk */ + void * data = starpu_disk_open(dd, (void *) name_file_start, NX*sizeof(int)); + void * data_result = starpu_disk_open(dd, (void *) name_file_end, NX*sizeof(int)); + + starpu_data_handle_t vector_handleA, vector_handleC; + + /* register vector in starpu */ + starpu_vector_data_register(&vector_handleA, dd, (uintptr_t) data, NX, sizeof(int)); + + /* and do what you want with it, here we copy it into an other vector */ + starpu_vector_data_register(&vector_handleC, dd, (uintptr_t) data_result, NX, sizeof(int)); + + starpu_data_cpy(vector_handleC, vector_handleA, 0, NULL, NULL); + + /* free them */ + starpu_data_unregister(vector_handleA); + starpu_data_unregister(vector_handleC); + + /* close them in StarPU */ + starpu_disk_close(dd, data, NX*sizeof(int)); + starpu_disk_close(dd, data_result, NX*sizeof(int)); + + /* check results */ + f = fopen(path_file_end, "rb+"); + if (f == NULL) + goto enoent2; + /* take data */ + size_t read = fread(C, sizeof(int), NX, f); + STARPU_ASSERT(read == NX); + + /* close the file */ + fclose(f); + + int try = 1; + for (j = 0; j < NX; ++j) + if (A[j] != C[j]) + { + FPRINTF(stderr, "Fail A %d != C %d \n", A[j], C[j]); + try = 0; + } + + starpu_free_flags(A, NX*sizeof(int), STARPU_MALLOC_COUNT); + starpu_free_flags(C, NX*sizeof(int), STARPU_MALLOC_COUNT); + + unlink(path_file_start); + unlink(path_file_end); + + free(path_file_start); + free(path_file_end); + + /* terminate StarPU, no task can be submitted after */ + starpu_shutdown(); + + if(try) + FPRINTF(stderr, "TEST SUCCESS\n"); + else + FPRINTF(stderr, "TEST FAIL\n"); + return try ? EXIT_SUCCESS : EXIT_FAILURE; + +enodev: + return STARPU_TEST_SKIPPED; +enoent2: + starpu_free_flags(A, NX*sizeof(int), STARPU_MALLOC_COUNT); + starpu_free_flags(C, NX*sizeof(int), STARPU_MALLOC_COUNT); +enoent: + unlink(path_file_start); + unlink(path_file_end); + + free(path_file_start); + free(path_file_end); + + FPRINTF(stderr, "Couldn't write data: ENOENT\n"); + starpu_shutdown(); + return STARPU_TEST_SKIPPED; +} + +#ifdef STARPU_HAVE_HDF5 +int dotest_hdf5(struct starpu_disk_ops *ops, char *base) +{ + int *A, *C; + herr_t status; + + /* Open and close file, just to create an empty file */ + FILE * f = fopen(base, "wb+"); + if (!f) + goto h5fail2; + fclose(f); + + /* Initialize StarPU with default configuration */ + int ret = starpu_init(NULL); + + if (ret == -ENODEV) goto h5enodev; + + /* Initialize path */ + const char *path_obj_start = "STARPU_DISK_COMPUTE_DATA_"; + const char *path_obj_end = "STARPU_DISK_COMPUTE_DATA_RESULT_"; + + /* register a disk */ + int new_dd = starpu_disk_register(ops, (void *) base, STARPU_DISK_SIZE_MIN); + /* can't write on /tmp/ */ + if (new_dd == -ENOENT) goto h5enoent; + + unsigned dd = (unsigned) new_dd; + + /* allocate two memory spaces */ + starpu_malloc_flags((void **)&A, NX*sizeof(int), STARPU_MALLOC_COUNT); + starpu_malloc_flags((void **)&C, NX*sizeof(int), STARPU_MALLOC_COUNT); + + FPRINTF(stderr, "TEST DISK MEMORY \n"); + + unsigned int j; + /* you register them in a vector */ + for(j = 0; j < NX; ++j) + { + A[j] = j; + C[j] = 0; + } + + /* Open HDF5 file to store data */ + hid_t file = H5Fopen(base, H5F_ACC_RDWR, H5P_DEFAULT); + if (file < 0) + goto h5enoent2; + + /* store initial data in the file */ + hsize_t dims[1] = {NX}; + hid_t dataspace = H5Screate_simple(1, dims, NULL); + if (dataspace < 0) + { + H5Fclose(file); + goto h5fail; + } + + hid_t dataset = H5Dcreate2(file, path_obj_start, H5T_NATIVE_INT, dataspace, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + if (dataset < 0) + { + H5Sclose(dataspace); + H5Fclose(file); + goto h5fail; + } + + status = H5Dwrite(dataset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, A); + + /* close the resources before checking the writing */ + H5Dclose(dataset); + + if (status < 0) + { + H5Fclose(file); + goto h5fail; + } + + /* initialize results in file */ + dataset = H5Dcreate2(file, path_obj_end, H5T_NATIVE_INT, dataspace, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + if (dataset < 0) + { + H5Sclose(dataspace); + H5Fclose(file); + goto h5fail; + } + + status = H5Dwrite(dataset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, A); + + /* close the resources before checking the writing */ + H5Dclose(dataset); + H5Sclose(dataspace); + H5Fclose(file); + + if (status < 0) + goto h5fail; + + /* And now, you want to use your data in StarPU */ + /* Open the file ON the disk */ + void * data = starpu_disk_open(dd, (void *) path_obj_start, NX*sizeof(int)); + void * data_result = starpu_disk_open(dd, (void *) path_obj_end, NX*sizeof(int)); + + starpu_data_handle_t vector_handleA, vector_handleC; + + /* register vector in starpu */ + starpu_vector_data_register(&vector_handleA, dd, (uintptr_t) data, NX, sizeof(int)); + + /* and do what you want with it, here we copy it into an other vector */ + starpu_vector_data_register(&vector_handleC, dd, (uintptr_t) data_result, NX, sizeof(int)); + + starpu_data_cpy(vector_handleC, vector_handleA, 0, NULL, NULL); + + /* free them */ + starpu_data_unregister(vector_handleA); + starpu_data_unregister(vector_handleC); + + /* close them in StarPU */ + starpu_disk_close(dd, data, NX*sizeof(int)); + starpu_disk_close(dd, data_result, NX*sizeof(int)); + + /* check results */ + file = H5Fopen(base, H5F_ACC_RDWR, H5P_DEFAULT); + if (file < 0) + goto h5enoent2; + + dataset = H5Dopen2(file, path_obj_end, H5P_DEFAULT); + if (dataset < 0) + { + H5Fclose(file); + goto h5fail; + } + + status = H5Dread(dataset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, C); + + /* close the resources before checking the writing */ + H5Dclose(dataset); + H5Fclose(file); + + if (status < 0) + goto h5fail; + + int try = 1; + for (j = 0; j < NX; ++j) + if (A[j] != C[j]) + { + FPRINTF(stderr, "Fail A %d != C %d \n", A[j], C[j]); + try = 0; + } + + starpu_free_flags(A, NX*sizeof(int), STARPU_MALLOC_COUNT); + starpu_free_flags(C, NX*sizeof(int), STARPU_MALLOC_COUNT); + + /* terminate StarPU, no task can be submitted after */ + starpu_shutdown(); + + unlink(base); + + if(try) + FPRINTF(stderr, "TEST SUCCESS\n"); + else + FPRINTF(stderr, "TEST FAIL\n"); + return (try ? EXIT_SUCCESS : EXIT_FAILURE); + +h5enodev: + unlink(base); + return STARPU_TEST_SKIPPED; +h5enoent2: + starpu_free_flags(A, NX*sizeof(int), STARPU_MALLOC_COUNT); + starpu_free_flags(C, NX*sizeof(int), STARPU_MALLOC_COUNT); +h5enoent: + FPRINTF(stderr, "Couldn't write data: ENOENT\n"); + starpu_shutdown(); + unlink(base); + return STARPU_TEST_SKIPPED; +h5fail: + starpu_free_flags(A, NX*sizeof(int), STARPU_MALLOC_COUNT); + starpu_free_flags(C, NX*sizeof(int), STARPU_MALLOC_COUNT); + + starpu_shutdown(); + unlink(base); +h5fail2: + FPRINTF(stderr, "Something goes wrong with HDF5 dataset/dataspace/write \n"); + return EXIT_FAILURE; + +} +#endif + +static int merge_result(int old, int new) +{ + if (new == EXIT_FAILURE) + return EXIT_FAILURE; + if (old == 0) + return 0; + return new; +} + +int main(void) +{ + int ret = 0; + int ret2; + char s[128]; + char *ptr; + +#ifdef STARPU_HAVE_SETENV + setenv("STARPU_CALIBRATE_MINIMUM", "1", 1); +#endif + + snprintf(s, sizeof(s), "/tmp/%s-disk-XXXXXX", getenv("USER")); + ptr = _starpu_mkdtemp(s); + if (!ptr) + { + FPRINTF(stderr, "Cannot make directory '%s'\n", s); + return STARPU_TEST_SKIPPED; + } + + ret = merge_result(ret, dotest(&starpu_disk_stdio_ops, s)); + ret = merge_result(ret, dotest(&starpu_disk_unistd_ops, s)); +#ifdef STARPU_LINUX_SYS + if ((NX * sizeof(int)) % getpagesize() == 0) + { + ret = merge_result(ret, dotest(&starpu_disk_unistd_o_direct_ops, s)); + } + else + { + ret = merge_result(ret, STARPU_TEST_SKIPPED); + } +#endif +#ifdef STARPU_HAVE_HDF5 + char hdf5_base[128]; + strcpy(hdf5_base, s); + strcat(hdf5_base, "/STARPU_HDF5_file.h5"); + + ret = merge_result(ret, dotest_hdf5(&starpu_disk_hdf5_ops, hdf5_base)); +#endif + + ret2 = rmdir(s); + if (ret2 < 0) + STARPU_CHECK_RETURN_VALUE(-errno, "rmdir '%s'\n", s); + return ret; +} +#endif diff --git a/tests/disk/disk_copy.c b/tests/disk/disk_copy.c new file mode 100644 index 0000000..018673f --- /dev/null +++ b/tests/disk/disk_copy.c @@ -0,0 +1,200 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Corentin Salingue + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include +#include "../helper.h" + +/* + * Try to write into disk memory + * Use mechanism to push data from main ram to disk ram + * Here we make copies between buffers, that StarPU has to evict while + * progressing because there is not enough room for all of them. + */ + +/* RAM is not enough to hold 6 times NX + * DISK is just enough to hold 6 times NX */ + +/* size of one vector */ +#ifdef STARPU_QUICK_CHECK +# define RAM "1" +# define DISK 64 +# define NX (256*1024/sizeof(double)) +#else +# define NX (32*1048576/sizeof(double)) +# define RAM "160" +# define DISK 200 +#endif + +#if !defined(STARPU_HAVE_SETENV) +#warning setenv is not defined. Skipping test +int main(void) +{ + return STARPU_TEST_SKIPPED; +} +#elif STARPU_MAXNODES == 1 +/* Cannot register a disk */ +int main(int argc, char **argv) +{ + return STARPU_TEST_SKIPPED; +} +#else + +int dotest(struct starpu_disk_ops *ops, void *param) +{ + double *A,*F; + int ret; + + /* limit main ram to force to push in disk */ + setenv("STARPU_LIMIT_CPU_NUMA_MEM", RAM, 1); + + /* Initialize StarPU without GPU devices to make sure the memory of the GPU devices will not be used */ + // Ignore environment variables as we want to force the exact number of workers + struct starpu_conf conf; + ret = starpu_conf_init(&conf); + if (ret == -EINVAL) + return EXIT_FAILURE; + conf.precedence_over_environment_variables = 1; + starpu_conf_noworker(&conf); + conf.ncpus = 1; + conf.nmpi_ms = 0; + conf.ntcpip_ms = 0; + ret = starpu_init(&conf); + if (ret == -ENODEV) goto enodev; + + /* register a disk */ + int new_dd = starpu_disk_register(ops, param, 1024*1024*DISK); + /* can't write on /tmp/ */ + if (new_dd == -ENOENT) goto enoent; + + /* allocate two memory spaces */ + starpu_malloc_flags((void **)&A, NX*sizeof(double), STARPU_MALLOC_COUNT); + starpu_malloc_flags((void **)&F, NX*sizeof(double), STARPU_MALLOC_COUNT); + + FPRINTF(stderr, "TEST DISK MEMORY \n"); + + unsigned int j; + /* initialization with bad values */ + for(j = 0; j < NX; ++j) + { + A[j] = j; + F[j] = -j; + } + + starpu_data_handle_t vector_handleA, vector_handleB, vector_handleC, vector_handleD, vector_handleE, vector_handleF; + + /* register vector in starpu */ + starpu_vector_data_register(&vector_handleA, STARPU_MAIN_RAM, (uintptr_t)A, NX, sizeof(double)); + starpu_vector_data_register(&vector_handleB, -1, (uintptr_t) NULL, NX, sizeof(double)); + starpu_vector_data_register(&vector_handleC, -1, (uintptr_t) NULL, NX, sizeof(double)); + starpu_vector_data_register(&vector_handleD, -1, (uintptr_t) NULL, NX, sizeof(double)); + starpu_vector_data_register(&vector_handleE, -1, (uintptr_t) NULL, NX, sizeof(double)); + starpu_vector_data_register(&vector_handleF, STARPU_MAIN_RAM, (uintptr_t)F, NX, sizeof(double)); + + /* copy vector A->B, B->C... */ + starpu_data_cpy(vector_handleB, vector_handleA, 0, NULL, NULL); + starpu_data_cpy(vector_handleC, vector_handleB, 0, NULL, NULL); + starpu_data_cpy(vector_handleD, vector_handleC, 0, NULL, NULL); + starpu_data_cpy(vector_handleE, vector_handleD, 0, NULL, NULL); + starpu_data_cpy(vector_handleF, vector_handleE, 0, NULL, NULL); + + /* StarPU does not need to manipulate the array anymore so we can stop + * monitoring it */ + + /* free them */ + starpu_data_unregister(vector_handleA); + starpu_data_unregister(vector_handleB); + starpu_data_unregister(vector_handleC); + starpu_data_unregister(vector_handleD); + starpu_data_unregister(vector_handleE); + starpu_data_unregister(vector_handleF); + + /* check if computation is correct */ + int try = 1; + for (j = 0; j < NX; ++j) + if (A[j] != F[j]) + { + FPRINTF(stderr, "Fail A %f != F %f \n", A[j], F[j]); + try = 0; + } + + starpu_free_flags(A, NX*sizeof(double), STARPU_MALLOC_COUNT); + starpu_free_flags(F, NX*sizeof(double), STARPU_MALLOC_COUNT); + + /* terminate StarPU, no task can be submitted after */ + starpu_shutdown(); + + if(try) + FPRINTF(stderr, "TEST SUCCESS\n"); + else + FPRINTF(stderr, "TEST FAIL\n"); + return try ? EXIT_SUCCESS : EXIT_FAILURE; + +enodev: + return STARPU_TEST_SKIPPED; +enoent: + FPRINTF(stderr, "Couldn't write data: ENOENT\n"); + starpu_shutdown(); + return STARPU_TEST_SKIPPED; +} + +static int merge_result(int old, int new) +{ + if (new == EXIT_FAILURE) + return EXIT_FAILURE; + if (old == 0) + return 0; + return new; +} + +int main(void) +{ + int ret = 0; + int ret2; + char s[128]; + char *ptr; + +#ifdef STARPU_HAVE_SETENV + setenv("STARPU_CALIBRATE_MINIMUM", "1", 1); +#endif + + snprintf(s, sizeof(s), "/tmp/%s-disk-XXXXXX", getenv("USER")); + ptr = _starpu_mkdtemp(s); + if (!ptr) + { + FPRINTF(stderr, "Cannot make directory <%s>\n", s); + return STARPU_TEST_SKIPPED; + } + + ret = merge_result(ret, dotest(&starpu_disk_stdio_ops, s)); + ret = merge_result(ret, dotest(&starpu_disk_unistd_ops, s)); +#ifdef STARPU_LINUX_SYS + ret = merge_result(ret, dotest(&starpu_disk_unistd_o_direct_ops, s)); +#endif +#ifdef STARPU_HAVE_HDF5 + ret = merge_result(ret, dotest(&starpu_disk_hdf5_ops, s)); +#endif + + ret2 = rmdir(s); + if (ret2 < 0) + STARPU_CHECK_RETURN_VALUE(-errno, "rmdir '%s'\n", s); + return ret; +} +#endif diff --git a/tests/disk/disk_copy_to_disk.c b/tests/disk/disk_copy_to_disk.c new file mode 100644 index 0000000..9d4097a --- /dev/null +++ b/tests/disk/disk_copy_to_disk.c @@ -0,0 +1,423 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "../helper.h" + +#ifdef STARPU_HAVE_HDF5 +#include +#endif + +#if STARPU_MAXNODES == 1 +/* Cannot register a disk */ +int main(int argc, char **argv) +{ + return STARPU_TEST_SKIPPED; +} +#else +/* + * Try to write into disk memory + * Use mechanism to push data from disk to disk. + */ + +#define NX (16*1024) + +int dotest(struct starpu_disk_ops *ops, char *base) +{ + int *A, *C; + + /* Initialize StarPU with default configuration */ + int ret = starpu_init(NULL); + if (ret == -ENODEV) goto enodev; + + /* Initialize path and name */ + const char *name_file_start = "STARPU_DISK_COMPUTE_DATA"; + const char *name_dir_src = "src"; + const char *name_dir_dst = "dst"; + + char * path_file_start = malloc(strlen(base) + 1 + strlen(name_dir_src) + 1 + strlen(name_file_start) + 1); + strcpy(path_file_start, base); + strcat(path_file_start, "/"); + strcat(path_file_start, name_dir_src); + strcat(path_file_start, "/"); + strcat(path_file_start, name_file_start); + + char * base_src = malloc(strlen(base) + 1 + strlen(name_dir_src) + 1); + strcpy(base_src, base); + strcat(base_src, "/"); + strcat(base_src, name_dir_src); + + char * base_dst = malloc(strlen(base) + 1 + strlen(name_dir_dst) + 1); + strcpy(base_dst, base); + strcat(base_dst, "/"); + strcat(base_dst, name_dir_dst); + + /* register a disks */ + int disk_src = starpu_disk_register(ops, (void *) base_src, STARPU_DISK_SIZE_MIN); + if (disk_src == -ENOENT) goto enoent; + + int disk_dst = starpu_disk_register(ops, (void *) base_dst, STARPU_DISK_SIZE_MIN); + if (disk_dst == -ENOENT) goto enoent; + + /* allocate two memory spaces */ + starpu_malloc_flags((void **)&A, NX*sizeof(int), STARPU_MALLOC_COUNT); + starpu_malloc_flags((void **)&C, NX*sizeof(int), STARPU_MALLOC_COUNT); + + FPRINTF(stderr, "TEST DISK MEMORY \n"); + + unsigned int j; + /* you register them in a vector */ + for(j = 0; j < NX; ++j) + { + A[j] = j; + C[j] = 0; + } + + /* you create a file to store the vector ON the disk */ + FILE * f = fopen(path_file_start, "wb+"); + if (f == NULL) + goto enoent2; + + /* store it in the file */ + fwrite(A, sizeof(int), NX, f); + + /* close the file */ + fclose(f); + + int descriptor = open(path_file_start, O_RDWR); + if (descriptor < 0) + goto enoent2; +#ifdef STARPU_HAVE_WINDOWS + _commit(descriptor); +#else + fsync(descriptor); +#endif + close(descriptor); + + /* And now, you want to use your data in StarPU */ + /* Open the file ON the disk */ + void * data = starpu_disk_open(disk_src, (void *) name_file_start, NX*sizeof(int)); + STARPU_ASSERT(data); + + starpu_data_handle_t vector_handleA; + starpu_vector_data_register(&vector_handleA, disk_src, (uintptr_t) data, NX, sizeof(int)); + + /* Move and invalidate copy to an other disk */ + starpu_data_acquire_on_node(vector_handleA, disk_dst, STARPU_RW); + starpu_data_release_on_node(vector_handleA, disk_dst); + + starpu_data_acquire_on_node(vector_handleA, disk_src, STARPU_RW); + starpu_data_release_on_node(vector_handleA, disk_src); + + /* free them */ + starpu_data_unregister(vector_handleA); + + /* close them in StarPU */ + starpu_disk_close(disk_src, data, NX*sizeof(int)); + + /* check results */ + f = fopen(path_file_start, "rb+"); + if (f == NULL) + goto enoent2; + /* take data */ + size_t read = fread(C, sizeof(int), NX, f); + STARPU_ASSERT(read == NX); + + /* close the file */ + fclose(f); + + int try = 1; + for (j = 0; j < NX; ++j) + if (A[j] != C[j]) + { + FPRINTF(stderr, "Fail A %d != C %d \n", A[j], C[j]); + try = 0; + } + + starpu_free_flags(A, NX*sizeof(int), STARPU_MALLOC_COUNT); + starpu_free_flags(C, NX*sizeof(int), STARPU_MALLOC_COUNT); + + /* terminate StarPU, no task can be submitted after */ + starpu_shutdown(); + + unlink(path_file_start); + rmdir(base_src); + + free(base_src); + free(base_dst); + free(path_file_start); + + if(try) + FPRINTF(stderr, "TEST SUCCESS\n"); + else + FPRINTF(stderr, "TEST FAIL\n"); + return try ? EXIT_SUCCESS : EXIT_FAILURE; + +enodev: + return STARPU_TEST_SKIPPED; +enoent2: + starpu_free_flags(A, NX*sizeof(int), STARPU_MALLOC_COUNT); + starpu_free_flags(C, NX*sizeof(int), STARPU_MALLOC_COUNT); +enoent: + free(base_src); + free(base_dst); + free(path_file_start); + + FPRINTF(stderr, "Couldn't write data: ENOENT\n"); + starpu_shutdown(); + return STARPU_TEST_SKIPPED; +} + +#ifdef STARPU_HAVE_HDF5 +int dotest_hdf5(struct starpu_disk_ops *ops, char *base) +{ + int *A, *C; + herr_t status; + + + /* Initialize path */ + const char *path_obj_start = "STARPU_DISK_COMPUTE_DATA"; + const char *name_hdf5_start = "STARPU_HDF5_src_file.h5"; + const char *name_hdf5_end = "STARPU_HDF5_dst_file.h5"; + + char * hdf5_base_src = malloc(strlen(base) + 1 + strlen(name_hdf5_start) + 1); + strcpy(hdf5_base_src, base); + strcat(hdf5_base_src, "/"); + strcat(hdf5_base_src, name_hdf5_start); + + char * hdf5_base_dst = malloc(strlen(base) + 1 + strlen(name_hdf5_end) + 1); + strcpy(hdf5_base_dst, base); + strcat(hdf5_base_dst, "/"); + strcat(hdf5_base_dst, name_hdf5_end); + + /* Open and close files, just to create empty files */ + FILE * file_src = fopen(hdf5_base_src, "wb+"); + if (!file_src) + goto h5fail2; + fclose(file_src); + + FILE * file_dst = fopen(hdf5_base_dst, "wb+"); + if (!file_dst) + { + goto h5fail2; + } + fclose(file_dst); + + /* Initialize StarPU with default configuration */ + int ret = starpu_init(NULL); + if (ret == -ENODEV) goto h5enodev; + + /* register disks */ + int disk_src = starpu_disk_register(ops, (void *) hdf5_base_src, STARPU_DISK_SIZE_MIN); + if (disk_src == -ENOENT) goto h5enoent; + + int disk_dst = starpu_disk_register(ops, (void *) hdf5_base_dst, STARPU_DISK_SIZE_MIN); + if (disk_dst == -ENOENT) goto h5enoent; + + /* allocate two memory spaces */ + starpu_malloc_flags((void **)&A, NX*sizeof(int), STARPU_MALLOC_COUNT); + starpu_malloc_flags((void **)&C, NX*sizeof(int), STARPU_MALLOC_COUNT); + + FPRINTF(stderr, "TEST DISK MEMORY \n"); + + unsigned int j; + /* you register them in a vector */ + for(j = 0; j < NX; ++j) + { + A[j] = j; + C[j] = 0; + } + + /* Open HDF5 file to store data */ + hid_t file = H5Fopen(hdf5_base_src, H5F_ACC_RDWR, H5P_DEFAULT); + if (file < 0) + goto h5fail; + + /* store initial data in the file */ + hsize_t dims[1] = {NX}; + hid_t dataspace = H5Screate_simple(1, dims, NULL); + if (dataspace < 0) + { + H5Fclose(file); + goto h5fail; + } + + hid_t dataset = H5Dcreate2(file, path_obj_start, H5T_NATIVE_INT, dataspace, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + if (dataset < 0) + { + H5Sclose(dataspace); + H5Fclose(file); + goto h5fail; + } + + status = H5Dwrite(dataset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, A); + + /* close the resources before checking the writing */ + H5Dclose(dataset); + + if (status < 0) + { + H5Fclose(file); + goto h5fail; + } + + H5Sclose(dataspace); + H5Fclose(file); + + /* Open the file ON the disk */ + void * data = starpu_disk_open(disk_src, (void *) path_obj_start, NX*sizeof(int)); + + starpu_data_handle_t vector_handleA; + starpu_vector_data_register(&vector_handleA, disk_src, (uintptr_t) data, NX, sizeof(int)); + + /* Move and invalidate copy to an other disk */ + starpu_data_acquire_on_node(vector_handleA, disk_dst, STARPU_RW); + starpu_data_release_on_node(vector_handleA, disk_dst); + + starpu_data_acquire_on_node(vector_handleA, disk_src, STARPU_RW); + starpu_data_release_on_node(vector_handleA, disk_src); + + starpu_data_unregister(vector_handleA); + + /* close them in StarPU */ + starpu_disk_close(disk_src, data, NX*sizeof(int)); + + /* check results */ + file = H5Fopen(hdf5_base_src, H5F_ACC_RDWR, H5P_DEFAULT); + if (file < 0) + goto h5fail; + + dataset = H5Dopen2(file, path_obj_start, H5P_DEFAULT); + if (dataset < 0) + { + H5Fclose(file); + goto h5fail; + } + + status = H5Dread(dataset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, C); + + /* close the resources before checking the writing */ + H5Dclose(dataset); + H5Fclose(file); + + if (status < 0) + goto h5fail; + + int try = 1; + for (j = 0; j < NX; ++j) + if (A[j] != C[j]) + { + FPRINTF(stderr, "Fail A %d != C %d \n", A[j], C[j]); + try = 0; + } + + starpu_free_flags(A, NX*sizeof(int), STARPU_MALLOC_COUNT); + starpu_free_flags(C, NX*sizeof(int), STARPU_MALLOC_COUNT); + + /* terminate StarPU, no task can be submitted after */ + starpu_shutdown(); + + unlink(hdf5_base_src); + unlink(hdf5_base_dst); + + free(hdf5_base_src); + free(hdf5_base_dst); + + if(try) + FPRINTF(stderr, "TEST SUCCESS\n"); + else + FPRINTF(stderr, "TEST FAIL\n"); + return (try ? EXIT_SUCCESS : EXIT_FAILURE); + +h5fail: + starpu_free_flags(A, NX*sizeof(int), STARPU_MALLOC_COUNT); + starpu_free_flags(C, NX*sizeof(int), STARPU_MALLOC_COUNT); +h5enoent: + FPRINTF(stderr, "Couldn't write data: ENOENT\n"); + starpu_shutdown(); +h5enodev: + unlink(hdf5_base_src); + unlink(hdf5_base_dst); + free(hdf5_base_src); + free(hdf5_base_dst); + return STARPU_TEST_SKIPPED; +h5fail2: + free(hdf5_base_src); + free(hdf5_base_dst); + FPRINTF(stderr, "Something goes wrong with HDF5 dataset/dataspace/write \n"); + return EXIT_FAILURE; + +} +#endif + +static int merge_result(int old, int new) +{ + if (new == EXIT_FAILURE) + return EXIT_FAILURE; + if (old == 0) + return 0; + return new; +} + +int main(void) +{ + int ret = 0; + int ret2; + char s[128]; + char *ptr; + +#ifdef STARPU_HAVE_SETENV + setenv("STARPU_CALIBRATE_MINIMUM", "1", 1); +#endif + + snprintf(s, sizeof(s), "/tmp/%s-disk-XXXXXX", getenv("USER")); + ptr = _starpu_mkdtemp(s); + if (!ptr) + { + FPRINTF(stderr, "Cannot make directory '%s'\n", s); + return STARPU_TEST_SKIPPED; + } + + ret = merge_result(ret, dotest(&starpu_disk_stdio_ops, s)); + ret = merge_result(ret, dotest(&starpu_disk_unistd_ops, s)); +#ifdef STARPU_LINUX_SYS + if ((NX * sizeof(int)) % getpagesize() == 0) + { + ret = merge_result(ret, dotest(&starpu_disk_unistd_o_direct_ops, s)); + } + else + { + ret = merge_result(ret, STARPU_TEST_SKIPPED); + } +#endif +#ifdef STARPU_HAVE_HDF5 + ret = merge_result(ret, dotest_hdf5(&starpu_disk_hdf5_ops, s)); +#endif + + ret2 = rmdir(s); + if (ret2 < 0) + STARPU_CHECK_RETURN_VALUE(-errno, "rmdir '%s'\n", s); + return ret; +} +#endif diff --git a/tests/disk/disk_copy_unpack.c b/tests/disk/disk_copy_unpack.c new file mode 100644 index 0000000..65e4538 --- /dev/null +++ b/tests/disk/disk_copy_unpack.c @@ -0,0 +1,179 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include +#include "../helper.h" + +/* + * Test pack / unpack methods before pushing data on disk with async read/write. + */ + +/* size of one vector */ +#ifdef STARPU_QUICK_CHECK +# define DISK 64 +# define NX (256*1024/sizeof(double)) +#else +# define NX (32*1048576/sizeof(double)) +# define DISK 200 +#endif + +#if !defined(STARPU_HAVE_SETENV) +#warning setenv is not defined. Skipping test +int main(void) +{ + return STARPU_TEST_SKIPPED; +} +#elif STARPU_MAXNODES == 1 +/* Cannot register a disk */ +int main(int argc, char **argv) +{ + return STARPU_TEST_SKIPPED; +} +#else + +int dotest(struct starpu_disk_ops *ops, void *param) +{ + unsigned *A; + int ret; + + /* Initialize StarPU without GPU devices to make sure the memory of the GPU devices will not be used */ + // Ignore environment variables as we want to force the exact number of workers + struct starpu_conf conf; + ret = starpu_conf_init(&conf); + if (ret == -EINVAL) + return EXIT_FAILURE; + conf.precedence_over_environment_variables = 1; + starpu_conf_noworker(&conf); + conf.ncpus = -1; + conf.nmpi_ms = -1; + conf.ntcpip_ms = -1; + ret = starpu_init(&conf); + if (ret == -ENODEV) goto enodev; + + /* register a disk */ + int new_dd = starpu_disk_register(ops, param, 1024*1024*DISK); + /* can't write on /tmp/ */ + if (new_dd == -ENOENT) goto enoent; + + /* allocate two memory spaces */ + starpu_malloc_flags((void **)&A, NX*sizeof(unsigned), STARPU_MALLOC_COUNT); + + FPRINTF(stderr, "TEST DISK MEMORY \n"); + + unsigned int j; + /* initialization with bad values */ + for(j = 0; j < NX; ++j) + { + A[j] = j; + } + + starpu_data_handle_t vector_handleA; + + static const struct starpu_data_copy_methods my_vector_copy_data_methods_s = + { + .any_to_any = NULL, + }; + + starpu_interface_vector_ops.copy_methods = &my_vector_copy_data_methods_s; + + /* register vector in starpu */ + starpu_vector_data_register(&vector_handleA, STARPU_MAIN_RAM, (uintptr_t)A, NX, sizeof(unsigned)); + + /* Move and invalidate copy to an other disk */ + starpu_data_acquire_on_node(vector_handleA, new_dd, STARPU_RW); + starpu_data_release_on_node(vector_handleA, new_dd); + + starpu_data_acquire_on_node(vector_handleA, new_dd, STARPU_RW); + starpu_data_release_on_node(vector_handleA, new_dd); + + /* free them */ + starpu_data_unregister(vector_handleA); + + /* check if computation is correct */ + int try = 1; + for (j = 0; j < NX; ++j) + if (A[j] != j) + { + FPRINTF(stderr, "Fail A %u != %u \n", A[j], j); + try = 0; + } + + starpu_free_flags(A, NX*sizeof(unsigned), STARPU_MALLOC_COUNT); + + /* terminate StarPU, no task can be submitted after */ + starpu_shutdown(); + + if(try) + FPRINTF(stderr, "TEST SUCCESS\n"); + else + FPRINTF(stderr, "TEST FAIL\n"); + return try ? EXIT_SUCCESS : EXIT_FAILURE; + +enodev: + return STARPU_TEST_SKIPPED; +enoent: + FPRINTF(stderr, "Couldn't write data: ENOENT\n"); + starpu_shutdown(); + return STARPU_TEST_SKIPPED; +} + +static int merge_result(int old, int new) +{ + if (new == EXIT_FAILURE) + return EXIT_FAILURE; + if (old == 0) + return 0; + return new; +} + +int main(void) +{ + int ret = 0; + int ret2; + char s[128]; + char *ptr; + +#ifdef STARPU_HAVE_SETENV + setenv("STARPU_CALIBRATE_MINIMUM", "1", 1); +#endif + + snprintf(s, sizeof(s), "/tmp/%s-disk-XXXXXX", getenv("USER")); + ptr = _starpu_mkdtemp(s); + if (!ptr) + { + FPRINTF(stderr, "Cannot make directory <%s>\n", s); + return STARPU_TEST_SKIPPED; + } + + ret = merge_result(ret, dotest(&starpu_disk_stdio_ops, s)); + ret = merge_result(ret, dotest(&starpu_disk_unistd_ops, s)); +#ifdef STARPU_LINUX_SYS + ret = merge_result(ret, dotest(&starpu_disk_unistd_o_direct_ops, s)); +#endif +#ifdef STARPU_HAVE_HDF5 + ret = merge_result(ret, dotest(&starpu_disk_hdf5_ops, s)); +#endif + + ret2 = rmdir(s); + if (ret2 < 0) + STARPU_CHECK_RETURN_VALUE(-errno, "rmdir '%s'\n", s); + return ret; +} +#endif diff --git a/tests/disk/disk_pack.c b/tests/disk/disk_pack.c new file mode 100644 index 0000000..4f1e130 --- /dev/null +++ b/tests/disk/disk_pack.c @@ -0,0 +1,288 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Corentin Salingue + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "../helper.h" + +#if STARPU_MAXNODES == 1 +/* Cannot register a disk */ +int main(int argc, char **argv) +{ + return STARPU_TEST_SKIPPED; +} +#else +/* + * Try to write into disk memory + * Use mechanism to push data from main ram to disk ram + * Here we force using the pack/unpack mechanism + */ + +#define NX (16*1024) + +const struct starpu_data_copy_methods my_vector_copy_data_methods_s; +struct starpu_data_interface_ops starpu_interface_my_vector_ops; + +void starpu_my_vector_data_register(starpu_data_handle_t *handleptr, unsigned home_node, + uintptr_t ptr, uint32_t nx, size_t elemsize) +{ + struct starpu_vector_interface vector = + { + .id = STARPU_VECTOR_INTERFACE_ID, + .ptr = ptr, + .nx = nx, + .elemsize = elemsize, + .dev_handle = ptr, + .slice_base = 0, + .offset = 0, + .allocsize = nx * elemsize, + }; + + starpu_data_register(handleptr, home_node, &vector, &starpu_interface_my_vector_ops); +} + +int dotest(struct starpu_disk_ops *ops, char *base) +{ + int *A, *C; + + /* Initialize StarPU without GPU devices to make sure the memory of the GPU devices will not be used */ + // Ignore environment variables as we want to force the exact number of workers + struct starpu_conf conf; + int ret = starpu_conf_init(&conf); + if (ret == -EINVAL) + return EXIT_FAILURE; + conf.precedence_over_environment_variables = 1; + starpu_conf_noworker(&conf); + conf.ncpus = -1; + conf.nmpi_ms = -1; + conf.ntcpip_ms = -1; + ret = starpu_init(&conf); + if (ret == -ENODEV) goto enodev; + + if (starpu_cpu_worker_get_count() == 0) + { + FPRINTF(stderr, "We need at least 1 CPU worker.\n"); + starpu_shutdown(); + return STARPU_TEST_SKIPPED; + } + + /* Initialize path and name */ + const char *name_file_start = "STARPU_DISK_COMPUTE_DATA_"; + const char *name_file_end = "STARPU_DISK_COMPUTE_DATA_RESULT_"; + + char * path_file_start = malloc(strlen(base) + 1 + strlen(name_file_start) + 1); + strcpy(path_file_start, base); + strcat(path_file_start, "/"); + strcat(path_file_start, name_file_start); + + char * path_file_end = malloc(strlen(base) + 1 + strlen(name_file_end) + 1); + strcpy(path_file_end, base); + strcat(path_file_end, "/"); + strcat(path_file_end, name_file_end); + + /* register a disk */ + int new_dd = starpu_disk_register(ops, (void *) base, STARPU_DISK_SIZE_MIN); + /* can't write on /tmp/ */ + if (new_dd == -ENOENT) goto enoent; + + unsigned dd = (unsigned) new_dd; + + /* allocate two memory spaces */ + starpu_malloc_flags((void **)&A, NX*sizeof(int), STARPU_MALLOC_COUNT); + starpu_malloc_flags((void **)&C, NX*sizeof(int), STARPU_MALLOC_COUNT); + + FPRINTF(stderr, "TEST DISK MEMORY \n"); + + unsigned int j; + /* you register them in a vector */ + for(j = 0; j < NX; ++j) + { + A[j] = j; + C[j] = 0; + } + + /* you create a file to store the vector ON the disk */ + FILE * f = fopen(path_file_start, "wb+"); + if (f == NULL) + goto enoent2; + + /* store it in the file */ + fwrite(A, sizeof(int), NX, f); + + /* close the file */ + fclose(f); + + int descriptor = open(path_file_start, O_RDWR); + if (descriptor < 0) + goto enoent2; + +#ifdef STARPU_HAVE_WINDOWS + _commit(descriptor); +#else + fsync(descriptor); +#endif + close(descriptor); + + /* create a file to store result */ + f = fopen(path_file_end, "wb+"); + if (f == NULL) + goto enoent2; + + /* replace all data by 0 */ + fwrite(C, sizeof(int), NX, f); + + /* close the file */ + fclose(f); + + descriptor = open(path_file_end, O_RDWR); + if (descriptor < 0) + goto enoent2; +#ifdef STARPU_HAVE_WINDOWS + _commit(descriptor); +#else + fsync(descriptor); +#endif + close(descriptor); + + /* And now, you want to use your data in StarPU */ + /* Open the file ON the disk */ + void * data = starpu_disk_open(dd, (void *) name_file_start, NX*sizeof(int)); + void * data_result = starpu_disk_open(dd, (void *) name_file_end, NX*sizeof(int)); + + starpu_data_handle_t vector_handleA, vector_handleC; + + /* Build an vector-like interface which doesn't have the any_to_any helper, to force making use of pack/unpack */ + memcpy(&starpu_interface_my_vector_ops, &starpu_interface_vector_ops, sizeof(starpu_interface_my_vector_ops)); + starpu_interface_my_vector_ops.copy_methods = &my_vector_copy_data_methods_s; + + /* register vector in starpu */ + starpu_my_vector_data_register(&vector_handleA, dd, (uintptr_t) data, NX, sizeof(int)); + + /* and do what you want with it, here we copy it into an other vector */ + starpu_my_vector_data_register(&vector_handleC, dd, (uintptr_t) data_result, NX, sizeof(int)); + + starpu_data_cpy(vector_handleC, vector_handleA, 0, NULL, NULL); + + /* free them */ + starpu_data_unregister(vector_handleA); + starpu_data_unregister(vector_handleC); + + /* close them in StarPU */ + starpu_disk_close(dd, data, NX*sizeof(int)); + starpu_disk_close(dd, data_result, NX*sizeof(int)); + + /* check results */ + f = fopen(path_file_end, "rb+"); + if (f == NULL) + goto enoent2; + /* take data */ + size_t read = fread(C, sizeof(int), NX, f); + STARPU_ASSERT(read == NX); + + /* close the file */ + fclose(f); + + int try = 1; + for (j = 0; j < NX; ++j) + if (A[j] != C[j]) + { + FPRINTF(stderr, "Fail A %d != C %d \n", A[j], C[j]); + try = 0; + } + + starpu_free_flags(A, NX*sizeof(int), STARPU_MALLOC_COUNT); + starpu_free_flags(C, NX*sizeof(int), STARPU_MALLOC_COUNT); + + unlink(path_file_start); + unlink(path_file_end); + + free(path_file_start); + free(path_file_end); + + /* terminate StarPU, no task can be submitted after */ + starpu_shutdown(); + + if(try) + FPRINTF(stderr, "TEST SUCCESS\n"); + else + FPRINTF(stderr, "TEST FAIL\n"); + return try ? EXIT_SUCCESS : EXIT_FAILURE; + +enodev: + return STARPU_TEST_SKIPPED; +enoent2: + starpu_free_flags(A, NX*sizeof(int), STARPU_MALLOC_COUNT); + starpu_free_flags(C, NX*sizeof(int), STARPU_MALLOC_COUNT); +enoent: + unlink(path_file_start); + unlink(path_file_end); + + free(path_file_start); + free(path_file_end); + + FPRINTF(stderr, "Couldn't write data: ENOENT\n"); + starpu_shutdown(); + return STARPU_TEST_SKIPPED; +} + +static int merge_result(int old, int new) +{ + if (new == EXIT_FAILURE) + return EXIT_FAILURE; + if (old == 0) + return 0; + return new; +} + +int main(void) +{ + int ret = 0; + int ret2; + char s[128]; + char *ptr; + +#ifdef STARPU_HAVE_SETENV + setenv("STARPU_CALIBRATE_MINIMUM", "1", 1); +#endif + + snprintf(s, sizeof(s), "/tmp/%s-disk-XXXXXX", getenv("USER")); + ptr = _starpu_mkdtemp(s); + if (!ptr) + { + FPRINTF(stderr, "Cannot make directory <%s>\n", s); + return STARPU_TEST_SKIPPED; + } + + ret = merge_result(ret, dotest(&starpu_disk_stdio_ops, s)); + ret = merge_result(ret, dotest(&starpu_disk_unistd_ops, s)); +#ifdef STARPU_LINUX_SYS + ret = merge_result(ret, dotest(&starpu_disk_unistd_o_direct_ops, s)); +#endif + + ret2 = rmdir(s); + if (ret2 < 0) + STARPU_CHECK_RETURN_VALUE(-errno, "rmdir '%s'\n", s); + return ret; +} +#endif diff --git a/tests/disk/mem_reclaim.c b/tests/disk/mem_reclaim.c new file mode 100644 index 0000000..d52d8be --- /dev/null +++ b/tests/disk/mem_reclaim.c @@ -0,0 +1,295 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Corentin Salingue + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "../helper.h" + +/* + * Try to write into disk memory + * Use mechanism to push data from main ram to disk ram + * Here we stress the memory with more tasks than what the RAM can fit. + */ + +#ifdef STARPU_HAVE_MEMCHECK_H +#include +#else +#define VALGRIND_MAKE_MEM_DEFINED_IF_ADDRESSABLE(addr, size) (void)0 +#endif + +#ifdef STARPU_QUICK_CHECK +# define NDATA 4 +# define NITER 8 +#elif !defined(STARPU_LONG_CHECK) +# define NDATA 32 +# define NITER 128 +#else +# define NDATA 128 +# define NITER 512 +#endif +# define MEMSIZE 1 +# define MEMSIZE_STR "1" + +#if !defined(STARPU_HAVE_SETENV) +#warning setenv is not defined. Skipping test +int main(void) +{ + return STARPU_TEST_SKIPPED; +} +#elif STARPU_MAXNODES == 1 +/* Cannot register a disk */ +int main(int argc, char **argv) +{ + return STARPU_TEST_SKIPPED; +} +#else + +static int (*any_to_any)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *async_data); + +/* We need a ram-to-ram copy for NUMA machine, use any_to_any for that */ +static int ram_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node) +{ + return any_to_any(src_interface, src_node, dst_interface, dst_node, NULL); +} + +const struct starpu_data_copy_methods my_vector_copy_data_methods_s = +{ + .ram_to_ram = ram_to_ram +}; +struct starpu_data_interface_ops starpu_interface_my_vector_ops; + +void starpu_my_vector_data_register(starpu_data_handle_t *handleptr, int home_node, + uintptr_t ptr, uint32_t nx, size_t elemsize) +{ + struct starpu_vector_interface vector = + { + .id = STARPU_VECTOR_INTERFACE_ID, + .ptr = ptr, + .nx = nx, + .elemsize = elemsize, + .dev_handle = ptr, + .slice_base = 0, + .offset = 0, + .allocsize = nx * elemsize, + }; + + starpu_data_register(handleptr, home_node, &vector, &starpu_interface_my_vector_ops); +} + +static unsigned values[NDATA]; + +static void zero(void *buffers[], void *args) +{ + (void)args; + struct starpu_vector_interface *vector = (struct starpu_vector_interface *) buffers[0]; + unsigned *val = (unsigned*) STARPU_VECTOR_GET_PTR(vector); + *val = 0; + VALGRIND_MAKE_MEM_DEFINED_IF_ADDRESSABLE(val, STARPU_VECTOR_GET_NX(vector) * STARPU_VECTOR_GET_ELEMSIZE(vector)); +} + +static void inc(void *buffers[], void *args) +{ + struct starpu_vector_interface *vector = (struct starpu_vector_interface *) buffers[0]; + unsigned *val = (unsigned*) STARPU_VECTOR_GET_PTR(vector); + unsigned i; + starpu_codelet_unpack_args(args, &i); + (*val)++; + STARPU_ATOMIC_ADD(&values[i], 1); +} + +static void check(void *buffers[], void *args) +{ + struct starpu_vector_interface *vector = (struct starpu_vector_interface *) buffers[0]; + unsigned *val = (unsigned*) STARPU_VECTOR_GET_PTR(vector); + unsigned i; + starpu_codelet_unpack_args(args, &i); + STARPU_ASSERT_MSG(*val == values[i], "Incorrect value. Value %u should be %u (index %u)", *val, values[i], i); +} + +static struct starpu_codelet zero_cl = +{ + .cpu_funcs = { zero }, + .nbuffers = 1, + .modes = { STARPU_W }, +}; + +static struct starpu_codelet inc_cl = +{ + .cpu_funcs = { inc }, + .nbuffers = 1, + .modes = { STARPU_RW }, +}; + +static struct starpu_codelet check_cl = +{ + .cpu_funcs = { check }, + .nbuffers = 1, + .modes = { STARPU_R }, +}; + +int dotest(struct starpu_disk_ops *ops, char *base, void (*vector_data_register)(starpu_data_handle_t *handleptr, int home_node, uintptr_t ptr, uint32_t nx, size_t elemsize), const char *text) +{ + starpu_data_handle_t handles[NDATA]; + + if (starpu_getenv_number_default("STARPU_DIDUSE_BARRIER", 0)) + /* This would hang */ + return STARPU_TEST_SKIPPED; + + FPRINTF(stderr, "Testing <%s>\n", text); + /* Initialize StarPU without GPU devices to make sure the memory of the GPU devices will not be used */ + // Ignore environment variables as we want to force the exact number of workers + struct starpu_conf conf; + int ret = starpu_conf_init(&conf); + if (ret == -EINVAL) + return EXIT_FAILURE; + conf.precedence_over_environment_variables = 1; + starpu_conf_noworker(&conf); + conf.ncpus = -1; + conf.nmpi_ms = -1; + conf.ntcpip_ms = -1; + ret = starpu_init(&conf); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + + /* Initialize path and name */ + /* register swap disk */ + int new_dd = starpu_disk_register(ops, (void *) base, STARPU_DISK_SIZE_MIN); + /* can't write on /tmp/ */ + if (new_dd == -ENOENT) goto enoent; + + unsigned int i, j; + + /* Initialize twice as much data as available memory */ + for (i = 0; i < NDATA; i++) + { + vector_data_register(&handles[i], -1, 0, (MEMSIZE*1024*1024*2) / NDATA, sizeof(char)); + ret = starpu_task_insert(&zero_cl, STARPU_W, handles[i], 0); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + } + memset(values, 0, sizeof(values)); + + /* Work out of core */ + for (i = 0; i < NITER; i++) + { + j = rand()%NDATA; + ret = starpu_task_insert(&inc_cl, STARPU_RW, handles[j], STARPU_VALUE, &j, sizeof(j), 0); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + } + starpu_task_wait_for_all(); + + /* forcibly evict some data, just for fun */ + for (i = 0; i < NDATA; i++) + { + if ((rand() % 2) == 0) + starpu_data_evict_from_node(handles[i], STARPU_MAIN_RAM); + } + + /* And work out of core again */ + for (i = 0; i < NITER; i++) + { + j = rand()%NDATA; + ret = starpu_task_insert(&inc_cl, STARPU_RW, handles[j], STARPU_VALUE, &j, sizeof(j), 0); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + } + + /* Check and free data */ + for (i = 0; i < NDATA; i++) + { + ret = starpu_task_insert(&check_cl, STARPU_R, handles[i], STARPU_VALUE, &i, sizeof(i), 0); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + starpu_data_unregister(handles[i]); + } + + /* terminate StarPU, no task can be submitted after */ + starpu_shutdown(); + + return EXIT_SUCCESS; + +enoent: + FPRINTF(stderr, "Couldn't write data: ENOENT\n"); +enodev: + starpu_shutdown(); + return STARPU_TEST_SKIPPED; +} + +static int merge_result(int old, int new) +{ + if (new == EXIT_FAILURE || new == STARPU_TEST_SKIPPED) + return new; + if (old == 0) + return 0; + return new; +} + +int main(void) +{ + int ret = 0; + int ret2; + char s[128]; + char *ptr; + +#ifdef STARPU_HAVE_SETENV + setenv("STARPU_CALIBRATE_MINIMUM", "1", 1); +#endif + + snprintf(s, sizeof(s), "/tmp/%s-disk-XXXXXX", getenv("USER")); + ptr = _starpu_mkdtemp(s); + if (!ptr) + { + FPRINTF(stderr, "Cannot make directory '%s'\n", s); + return STARPU_TEST_SKIPPED; + } + + setenv("STARPU_LIMIT_CPU_MEM", MEMSIZE_STR, 1); + + /* Build an vector-like interface which doesn't have the any_to_any helper, to force making use of pack/unpack */ + any_to_any = starpu_interface_vector_ops.copy_methods->any_to_any; + memcpy(&starpu_interface_my_vector_ops, &starpu_interface_vector_ops, sizeof(starpu_interface_my_vector_ops)); + starpu_interface_my_vector_ops.copy_methods = &my_vector_copy_data_methods_s; + + ret = merge_result(ret, dotest(&starpu_disk_stdio_ops, s, starpu_vector_data_register, "Stdio with read/write vector ops")); + if (ret == STARPU_TEST_SKIPPED) goto skipped; + ret = merge_result(ret, dotest(&starpu_disk_stdio_ops, s, starpu_my_vector_data_register, "Stdio with pack/unpack vector ops")); + if (ret == STARPU_TEST_SKIPPED) goto skipped; + ret = merge_result(ret, dotest(&starpu_disk_unistd_ops, s, starpu_vector_data_register, "unistd with read/write vector ops")); + if (ret == STARPU_TEST_SKIPPED) goto skipped; + ret = merge_result(ret, dotest(&starpu_disk_unistd_ops, s, starpu_my_vector_data_register, "unistd with pack/unpack vector ops")); + if (ret == STARPU_TEST_SKIPPED) goto skipped; +#ifdef STARPU_LINUX_SYS + ret = merge_result(ret, dotest(&starpu_disk_unistd_o_direct_ops, s, starpu_vector_data_register, "unistd_direct with read/write vector ops")); + if (ret == STARPU_TEST_SKIPPED) goto skipped; + ret = merge_result(ret, dotest(&starpu_disk_unistd_o_direct_ops, s, starpu_my_vector_data_register, "unistd_direct with pack/unpack vector ops")); + if (ret == STARPU_TEST_SKIPPED) goto skipped; +#endif + +skipped: + ret2 = rmdir(s); + STARPU_CHECK_RETURN_VALUE(ret2, "rmdir '%s'\n", s); + + return ret; +} +#endif diff --git a/tests/energy/dynamic.sh b/tests/energy/dynamic.sh new file mode 100755 index 0000000..aae9b2d --- /dev/null +++ b/tests/energy/dynamic.sh @@ -0,0 +1,70 @@ +#!/bin/sh +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +# To have 24 cores +export STARPU_HOSTNAME=sirocco + +# To avoid slowing down simulation +export MALLOC_PERTURB_=0 + +# You can play with these +export N=40 +export NITER=30 + +GAMMAS="1000000 100000 76000 10000 0" + +for gamma in $GAMMA +do + (for freq_slow in $(seq 1200 200 3500) ; do + STARPU_SCHED_GAMMA=$gamma STARPU_FREQ_SLOW=$freq_slow \ + ./energy_efficiency $N $NITER | grep "^$(($N * 512)) " & + done) | sort -n -k 2 > dynamic.$gamma.dat +done + +cat > dynamic.gp << EOF +set output "dynamic.eps" +set term postscript eps enhanced color font ",20" +set key bottom right +set xlabel "performance (GFlop/s)" +set ylabel "energy (J)" + +plot \\ +EOF +for gamma in $GAMMAS; do + cat >> dynamic.gp << EOF + "dynamic.$gamma.dat" using 5:7:6:8 with xyerrorlines lw 2 title "$gamma", \\ +EOF +done + +cat >> dynamic.gp << EOF + +set output "dynamic-time.eps" +set xlabel "time (ms)" +set ylabel "energy (J)" + +plot \\ +EOF +for gamma in $GAMMAS; do + cat >> dynamic.gp << EOF + "dynamic.$gamma.dat" using 3:7:4:8 with xyerrorlines lw 2 title "$gamma", \\ +EOF +done + + +gnuplot dynamic.gp +gv dynamic.eps & +gv dynamic-time.eps & diff --git a/tests/energy/energy_efficiency.c b/tests/energy/energy_efficiency.c new file mode 100644 index 0000000..9468cce --- /dev/null +++ b/tests/energy/energy_efficiency.c @@ -0,0 +1,550 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2016-2016 Bérangère Subervie + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include "../helper.h" + +/* + * This tries to run kernels with different efficiency depending on the core + * frequency. + * + * This is based on the Cholesky factorization, which is made to exhibit three + * caricatural cases as follows: + * + * - gemm: always get faster with higher frequency + * - trsm: gets faster with higher frequency, but efficiency gets lower and + * lower + * - potrf: reaches a maximum performance, after which there is no point in + * running it at higher frequency. + * + * We here assume that the power use is the same for the different kernels + * (which wouldn't be true for real kernels, measurements would be needed, to + * feed the performance models). + */ + + +/* These are the different frequency and power parameters, as measured and + * provided to this program */ +static float freq_min, freq_fast; +static float power_min, power_fast; + +/* + * This returns the dynamic power used by a CPU core in W at a given frequency + * in MHz + * This assumes C.V^2.F with V being proportional to F, thus C.F^3 + * + * freq_min = 1200 + * freq_fast = 3500 + * power_min = 2 + * power_fast = 8.2 + * + * freq_min3 = freq_min * freq_min * freq_min + * freq_fast3 = freq_fast * freq_fast * freq_fast + * alpha = (power_fast - power_min) / (freq_fast3 - freq_min3) + * power(frequency) = power_min + alpha * (frequency*frequency*frequency - freq_min3) + * plot [frequency=freq_min:freq_fast] power(frequency) lw 2 + * + */ +static float power(float frequency) +{ + double freq_min3 = freq_min * freq_min * freq_min; + double freq_fast3 = freq_fast * freq_fast * freq_fast; + double alpha = (power_fast - power_min) / (freq_fast3 - freq_min3); + return power_min + alpha * (frequency*frequency*frequency - freq_min3); +} + +/* + * This returns the frequency of the given worker and implementation in MHz. + * This is where we can tune either a given number of cores at a low frequency, + * or which implementation uses which frequency. */ + +/* These are the chosen parameters: how many cores get slowed down, at which + * frequency */ +static int ncpu_slow = -1; +static float freq_slow; + +static float frequency(int worker, unsigned i) +{ + if (ncpu_slow == -1) + { + /* Version that allows the runtime to switch speed between + * tasks, by exposing two implementations with different time + * and energy */ + if (i == 0) + /* Slow implementation */ + return freq_slow; + else + /* Fast implementation */ + return freq_fast; + } + else + { + /* Version that assumes that ncpu_slow workers are running at + * slow speed */ + if (worker < ncpu_slow) + return freq_slow; + else + return freq_fast; + } +} + + +/* This is from magma + + -- Innovative Computing Laboratory + -- Electrical Engineering and Computer Science Department + -- University of Tennessee + -- (C) Copyright 2009 + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the University of Tennessee, Knoxville nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + */ + +#define FMULS_POTRF(__n) ((double)(__n) * (((1. / 6.) * (double)(__n) + 0.5) * (double)(__n) + (1. / 3.))) +#define FADDS_POTRF(__n) ((double)(__n) * (((1. / 6.) * (double)(__n)) * (double)(__n) - (1. / 6.))) + +#define FLOPS_SPOTRF(__n) (FMULS_POTRF((__n)) + FADDS_POTRF((__n))) + +#define FMULS_TRMM_2(__m, __n) (0.5 * (double)(__n) * (double)(__m) * ((double)(__m)+1.)) +#define FADDS_TRMM_2(__m, __n) (0.5 * (double)(__n) * (double)(__m) * ((double)(__m)-1.)) + +#define FMULS_TRMM(__m, __n) (/*((__side) == PlasmaLeft) ? FMULS_TRMM_2((__m), (__n)) :*/ FMULS_TRMM_2((__n), (__m))) +#define FADDS_TRMM(__m, __n) (/*((__side) == PlasmaLeft) ? FADDS_TRMM_2((__m), (__n)) :*/ FADDS_TRMM_2((__n), (__m))) + +#define FMULS_TRSM FMULS_TRMM +#define FADDS_TRSM FMULS_TRMM + +#define FLOPS_STRSM(__m, __n) (FMULS_TRSM((__m), (__n)) + FADDS_TRSM((__m), (__n))) + + +#define FMULS_SYRK(__k, __n) (0.5 * (double)(__k) * (double)(__n) * ((double)(__n)+1.)) +#define FADDS_SYRK(__k, __n) (0.5 * (double)(__k) * (double)(__n) * ((double)(__n)+1.)) + +#define FLOPS_SSYRK(__k, __n) (FMULS_SYRK((__k), (__n)) + FADDS_SYRK((__k), (__n))) + + + +#define FMULS_GEMM(__m, __n, __k) ((double)(__m) * (double)(__n) * (double)(__k)) +#define FADDS_GEMM(__m, __n, __k) ((double)(__m) * (double)(__n) * (double)(__k)) + +#define FLOPS_SGEMM(__m, __n, __k) (FMULS_GEMM((__m), (__n), (__k)) + FADDS_GEMM((__m), (__n), (__k))) + + + +/* Tags for spotting tasks in the trace */ +#define TAG_POTRF(k) ((starpu_tag_t)((1ULL<<60) | (unsigned long long)(k))) +#define TAG_TRSM(k,j) ((starpu_tag_t)(((3ULL<<60) | (((unsigned long long)(k))<<32) \ + | (unsigned long long)(j)))) +#define TAG_GEMM(k,i,j) ((starpu_tag_t)(((4ULL<<60) | ((unsigned long long)(k)<<32) \ + | ((unsigned long long)(i)<<16) \ + | (unsigned long long)(j)))) + +/* Arbitrary tile size */ +#define TILE_SIZE 512 + + +/* + * Kernel time performance models, would normally be provided by measurements + */ + +/* We assume that GEMM/SYRK scale perfectly with frequency */ +#define GEMM_GFLOPS 50. /* At full speed */ +#define GEMM_FLOPS(N) FLOPS_SGEMM(N, N, N) +#define GEMM_TIME(N) (GEMM_FLOPS(TILE_SIZE) / (GEMM_GFLOPS * 1000000000.)) +static double _gemm_time(float frequency) +{ + double ret; + + /* Fix according to real frequency, linear */ + ret = GEMM_TIME(N) / (frequency / freq_fast); + return ret * 1000000.; +} + +static double gemm_time(struct starpu_task *t, unsigned workerid, unsigned i) +{ + (void)t; + return _gemm_time(frequency(workerid, i)); +} +#define SYRK_GFLOPS 50. /* At full speed */ +#define SYRK_FLOPS(N) FLOPS_SSYRK(N, N) +#define SYRK_TIME(N) (SYRK_FLOPS(TILE_SIZE) / (SYRK_GFLOPS * 1000000000.)) +static double _syrk_time(float frequency) +{ + double ret; + + /* Fix according to real frequency, linear */ + ret = SYRK_TIME(N) / (frequency / freq_fast); + return ret * 1000000.; +} + +static double syrk_time(struct starpu_task *t, unsigned workerid, unsigned i) +{ + (void)t; + return _syrk_time(frequency(workerid, i)); +} + +/* We assume that TRSM decays a bit with frequency */ +#define TRSM_DECAY 0.5 +#define TRSM_FLOPS(N) FLOPS_STRSM(N, N) +static double _trsm_time(float frequency) +{ + double ret = GEMM_TIME(N)*0.7; /* as typically observed */ + + /* Fix according to real frequency, root */ + ret = ret / (pow(frequency - freq_min/2, TRSM_DECAY) / pow(freq_fast - freq_min/2, TRSM_DECAY)); + return ret * 1000000.; +} + +static double trsm_time(struct starpu_task *t, unsigned workerid, unsigned i) +{ + (void)t; + return _trsm_time(frequency(workerid, i)); +} + +/* We assume that POTRF decays strongly with frequency */ +#define POTRF_DECAY 0.5 +#define POTRF_FLOPS(N) FLOPS_SPOTRF(N) +static double _potrf_time(float frequency) +{ + double ret = GEMM_TIME(N)*1.2; /* as typically observed */ + + /* Fix according to real frequency, asymptote */ + ret = ret / (1. - POTRF_DECAY * ((freq_min/(frequency-freq_min/2)) - (freq_min/(freq_fast-freq_min/2)))); + return ret * 1000000.; +} +static double potrf_time(struct starpu_task *t, unsigned workerid, unsigned i) +{ + (void)t; + return _potrf_time(frequency(workerid, i)); +} + + +/* stub for kernel, shouldn't be getting called in simgrid mode */ +void dummy_func(void *descr[], void *_args) +{ + (void)descr; (void)_args; + fprintf(stderr, "?? shouldn't be called\n"); +} + +/* Define the codelets */ +#define CODELET(kernel, nb, ...) \ +static double kernel##_energy(struct starpu_task *t, unsigned workerid, unsigned i) \ +{ \ + double time = kernel##_time(t, workerid, i); \ + return power(frequency(workerid, i)) * time / 1000000.; \ +} \ +\ +static struct starpu_perfmodel kernel##_perf_model = \ +{ \ + .symbol = #kernel, \ + .type = STARPU_PER_WORKER, \ + .worker_cost_function = kernel##_time, \ +}; \ +\ +static struct starpu_perfmodel kernel##_energy_model = \ +{ \ + .symbol = #kernel "_energy", \ + .type = STARPU_PER_WORKER, \ + .worker_cost_function = kernel##_energy, \ +}; \ +\ +static struct starpu_codelet kernel##_cl = \ +{ \ + .cpu_funcs = { dummy_func }, \ + .nbuffers = nb, \ + .modes = {__VA_ARGS__}, \ + .model = &kernel##_perf_model, \ + .energy_model = &kernel##_energy_model, \ +}; + +CODELET(potrf, 1, STARPU_RW) +CODELET(trsm, 2, STARPU_R, STARPU_RW) +CODELET(syrk, 2, STARPU_R, STARPU_RW) +CODELET(gemm, 3, STARPU_R, STARPU_R, STARPU_RW) + +int main(int argc, char *argv[]) +{ + /* Initialize environment variables */ + + if (!getenv("STARPU_IDLE_POWER")) + setenv("STARPU_IDLE_POWER", "30", 1); + const char *hostname = getenv("STARPU_HOSTNAME"); + if (!hostname || strcmp(hostname, "sirocco")) + { + printf("Warning: This is expected to be run with export STARPU_HOSTNAME=sirocco\n"); + } + + freq_min = starpu_getenv_number_default("STARPU_FREQ_MIN", 1200); + freq_slow = starpu_getenv_number_default("STARPU_FREQ_SLOW", 1200); + freq_fast = starpu_getenv_number_default("STARPU_FREQ_FAST", 3500); + + power_min = starpu_getenv_float_default("STARPU_POWER_MIN", 2); + power_fast = starpu_getenv_float_default("STARPU_POWER_FAST", 8.2); + + /* Number of slow CPU cores */ + ncpu_slow = starpu_getenv_number_default("STARPU_NCPU_SLOW", -1); + if (ncpu_slow == -1) + { + /* Enable second implementation. */ + potrf_cl.cpu_funcs[1] = dummy_func; + trsm_cl.cpu_funcs[1] = dummy_func; + gemm_cl.cpu_funcs[1] = dummy_func; + syrk_cl.cpu_funcs[1] = dummy_func; + } + + /* Initialize StarPU */ + struct starpu_conf conf; + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + conf.ncpus = -1; + + if (!getenv("STARPU_SCHED")) + conf.sched_policy_name = "dmdas"; + + int ret = starpu_initialize(&conf, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + unsigned N, k, m, n, iter, NITER; + if (argc < 2) +#ifdef STARPU_QUICK_CHECK + N = 10; +#else + N = 40; +#endif + else + N = atoi(argv[1]); + if (argc < 3) +#ifdef STARPU_QUICK_CHECK + NITER = 3; +#else + NITER = 10; +#endif + else + NITER = atoi(argv[2]); + if (N == 0) + { + starpu_shutdown(); + return 0; + } + + /* Give parameter summary to user */ + + printf("freqs (MHz):\n"); + printf("%f %f %f\n", freq_min, freq_slow, freq_fast); + printf("\n"); + + printf("per-core power (W):\n"); + printf("%f %f\n", power_min, power_fast); + printf("%f %f %f\n", power(freq_min), power(freq_slow), power(freq_fast)); + printf("\n"); + + printf("kernel perfs in GFlops (min, slow, fast):\n"); + printf("gemm:\t%f %f %f\n", + GEMM_FLOPS(TILE_SIZE) / _gemm_time(freq_min) / 1000, + GEMM_FLOPS(TILE_SIZE) / _gemm_time(freq_slow) / 1000, + GEMM_FLOPS(TILE_SIZE) / _gemm_time(freq_fast) / 1000); + + printf("syrk:\t%f %f %f\n", + SYRK_FLOPS(TILE_SIZE) / _syrk_time(freq_min) / 1000, + SYRK_FLOPS(TILE_SIZE) / _syrk_time(freq_slow) / 1000, + SYRK_FLOPS(TILE_SIZE) / _syrk_time(freq_fast) / 1000); + + printf("trsm:\t%f %f %f\n", + TRSM_FLOPS(TILE_SIZE) / _trsm_time(freq_min) / 1000, + TRSM_FLOPS(TILE_SIZE) / _trsm_time(freq_slow) / 1000, + TRSM_FLOPS(TILE_SIZE) / _trsm_time(freq_fast) / 1000); + + printf("potrf:\t%f %f %f\n", + POTRF_FLOPS(TILE_SIZE) / _potrf_time(freq_min) / 1000, + POTRF_FLOPS(TILE_SIZE) / _potrf_time(freq_slow) / 1000, + POTRF_FLOPS(TILE_SIZE) / _potrf_time(freq_fast) / 1000); + printf("\n"); + + printf("kernel efficiency in GFlops/W (min, slow, fast):\n"); + printf("gemm:\t%f %f %f\n", + GEMM_FLOPS(TILE_SIZE) / _gemm_time(freq_min) / 1000 / power(freq_min), + GEMM_FLOPS(TILE_SIZE) / _gemm_time(freq_slow) / 1000 / power(freq_slow), + GEMM_FLOPS(TILE_SIZE) / _gemm_time(freq_fast) / 1000 / power(freq_fast)); + + printf("syrk:\t%f %f %f\n", + SYRK_FLOPS(TILE_SIZE) / _syrk_time(freq_min) / 1000 / power(freq_min), + SYRK_FLOPS(TILE_SIZE) / _syrk_time(freq_slow) / 1000 / power(freq_slow), + SYRK_FLOPS(TILE_SIZE) / _syrk_time(freq_fast) / 1000 / power(freq_fast)); + + printf("trsm:\t%f %f %f\n", + TRSM_FLOPS(TILE_SIZE) / _trsm_time(freq_min) / 1000 / power(freq_min), + TRSM_FLOPS(TILE_SIZE) / _trsm_time(freq_slow) / 1000 / power(freq_slow), + TRSM_FLOPS(TILE_SIZE) / _trsm_time(freq_fast) / 1000 / power(freq_fast)); + + printf("potrf:\t%f %f %f\n", + POTRF_FLOPS(TILE_SIZE) / _potrf_time(freq_min) / 1000 / power(freq_min), + POTRF_FLOPS(TILE_SIZE) / _potrf_time(freq_slow) / 1000 / power(freq_slow), + POTRF_FLOPS(TILE_SIZE) / _potrf_time(freq_fast) / 1000 / power(freq_fast)); + printf("\n"); + + + /* Now compute */ + + starpu_data_handle_t A[N][N]; + + for (m = 0; m < N; m++) + for (n = 0; n < N; n++) + starpu_void_data_register(&A[m][n]); + + unsigned unbound_prio = STARPU_MAX_PRIO == INT_MAX && STARPU_MIN_PRIO == INT_MIN; + + double timing_sum = 0.; + double energy_sum = 0.; + double timing_sum2 = 0.; + double energy_sum2 = 0.; + + for (iter = 0; iter < NITER; iter++) + { + double start = starpu_timing_now(); + double start_energy = starpu_energy_used(); + + for (k = 0; k < N; k++) + { + starpu_iteration_push(k); + ret = starpu_task_insert(&potrf_cl, + STARPU_PRIORITY, unbound_prio ? (int)(2*N - 2*k) : STARPU_MAX_PRIO, + STARPU_RW, A[k][k], + STARPU_FLOPS, (double) FLOPS_SPOTRF(TILE_SIZE), + STARPU_TAG_ONLY, TAG_POTRF(k), + 0); + if (ret == -ENODEV) return 77; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + + for (m = k+1; m= 0) + toprint_slow = ncpu_slow; + else + toprint_slow = freq_slow; + + printf("# size\t%s\tms +-\tGFlop/s +-\ten. (J) +-\tGF/W\n", + ncpu_slow >= 0 ? "nslow" : "fslow"); + printf("%u\t%u\t%.0f %.1f\t%.1f %.1f\t%.1f %.1f\t%.2f\n", + TILE_SIZE * N, + toprint_slow, + timing_avg/1000, + timing_dev/1000, + (flop/timing_avg/1000.0f), + (flop/(timing_avg*timing_avg)/1000.f)*timing_dev, + energy_avg, energy_dev, + flop/1000000000./energy_avg); + + for (m = 0; m < N; m++) + for (n = 0; n < N; n++) + starpu_data_unregister(A[m][n]); + + starpu_shutdown(); + return 0; +} diff --git a/tests/energy/perfs.gp b/tests/energy/perfs.gp new file mode 100644 index 0000000..527b611 --- /dev/null +++ b/tests/energy/perfs.gp @@ -0,0 +1,76 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +set term postscript eps enhanced color font ",20" +set key top left +set xlabel "frequency (MHz)" + +freq_min = 1200 +freq_fast = 3500 +power_min = 2 +power_fast = 8.2 +TRSM_DECAY = 0.5 +POTRF_DECAY = 0.5 + + +# Plot the power according to frequency (cubic curve) + +freq_min3 = freq_min * freq_min * freq_min +freq_fast3 = freq_fast * freq_fast * freq_fast +alpha = (power_fast - power_min) / (freq_fast3 - freq_min3) +power(frequency) = power_min + alpha * (frequency*frequency*frequency - freq_min3) + +set output "power.eps" +set ylabel "power (W)" + +plot [frequency=freq_min:freq_fast] [y=0:] power(frequency) lw 2 notitle + + +# Plot the kernel performance according to frequency + +set output "perfs.eps" +set ylabel "performance (GFlop/s)" + +gemm_max_perf = 50 +trsm_max_perf = 35.784040 +potrf_max_perf = 6.964803 + +gemm_factor(frequency) = frequency / freq_fast +trsm_factor(frequency) = (frequency - freq_min/2) ** TRSM_DECAY / (freq_fast - freq_min/2) ** TRSM_DECAY +potrf_factor(frequency) = 1 - POTRF_DECAY * ((freq_min/(frequency-freq_min/2)) - (freq_min/(freq_fast-freq_min/2))) + +plot [frequency=freq_min:freq_fast] \ + gemm_max_perf * gemm_factor(frequency) lw 2 title "gemm", \ + trsm_max_perf * trsm_factor(frequency) lw 2 title "trsm", \ + potrf_max_perf * potrf_factor(frequency) lw 2 title "potrf" + + +# Plot the kernel efficiency according to frequency + +set output "efficiency.eps" +set key top right +set ylabel "efficiency (GFlop/W)" + +gemm_max_efficiency = 6.097561 +trsm_max_efficiency = 4.363907 +potrf_max_efficiency = 0.849366 + +power_factor(frequency) = power(frequency) / power(freq_fast) + +plot [frequency=freq_min:freq_fast] \ + gemm_max_efficiency * gemm_factor(frequency) / power_factor(frequency) lw 2 title "gemm", \ + trsm_max_efficiency * trsm_factor(frequency) / power_factor(frequency) lw 2 title "trsm", \ + potrf_max_efficiency * potrf_factor(frequency) / power_factor(frequency) lw 2 title "potrf" + diff --git a/tests/energy/static.sh b/tests/energy/static.sh new file mode 100755 index 0000000..220ee4c --- /dev/null +++ b/tests/energy/static.sh @@ -0,0 +1,72 @@ +#!/bin/sh +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +# To have 24 cores +export STARPU_HOSTNAME=sirocco + +# To avoid slowing down simulation +export MALLOC_PERTURB_=0 + +# You can play with these +export STARPU_FREQ_SLOW=1200 +export STARPU_POWER_SLOW=2 +export STARPU_POWER_FAST=8.2 +export N=40 +export NITER=30 + +GAMMAS="1000000 100000 10000 0" + +for gamma in $GAMMAS; do + (for ncpu_slow in $(seq 0 24) ; do + STARPU_SCHED_GAMMA=$gamma STARPU_NCPU_SLOW=$ncpu_slow \ + ./energy_efficiency $N $NITER | grep "^$(($N * 512)) " & + done) | sort -n -k 2 > static.$gamma.dat +done + +cat > static.gp << EOF +set output "static.eps" +set term postscript eps enhanced color font ",20" +set key top center +set xlabel "performance (GFlop/s)" +set ylabel "energy (J)" + +plot \\ +EOF +for gamma in $GAMMAS; do + cat >> static.gp << EOF + "static.$gamma.dat" using 5:7:6:8 with xyerrorlines title "$gamma", \\ +EOF +done + +cat >> static.gp << EOF + +set output "static-time.eps" +set xlabel "time (ms)" +set ylabel "energy (J)" + +plot \\ +EOF +for gamma in $GAMMAS; do + cat >> static.gp << EOF + "static.$gamma.dat" using 3:7:4:8 with xyerrorlines title "$gamma", \\ +EOF +done + + +gnuplot static.gp +gv static.eps & +gv static-time.eps & diff --git a/tests/errorcheck/invalid_blocking_calls.c b/tests/errorcheck/invalid_blocking_calls.c new file mode 100644 index 0000000..d6cc82a --- /dev/null +++ b/tests/errorcheck/invalid_blocking_calls.c @@ -0,0 +1,131 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Thibaut Lambert + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../helper.h" + +/* + * Check that we catch calling tag_wait, i.e. a blocking call, from the + * codelet function, which is invalid. This test is thus expected to fail. + */ + +/* mpirun may not exit if it fails, skip the test for master-slave */ +#if defined(STARPU_NO_ASSERT) +int main(void) +{ + return STARPU_TEST_SKIPPED; +} +#else + +#define TAG 0x42 + +static starpu_data_handle_t handle; +static unsigned *data; + +void wrong_func(void *descr[], void *arg) +{ + (void)descr; + (void)arg; + /* The function is expected to fail. This is indicated in tests/Makefile.am */ + /* try to fetch data in the RAM while we are in a codelet, such a + * blocking call is forbidden */ + starpu_data_acquire(handle, STARPU_RW); + starpu_tag_wait(TAG); +} + +static struct starpu_codelet wrong_codelet = +{ + .modes = { STARPU_RW }, + .cpu_funcs = {wrong_func}, + .cuda_funcs = {wrong_func}, + .opencl_funcs = {wrong_func}, + .model = NULL, + .nbuffers = 0 +}; + +static void wrong_callback(void *arg) +{ + (void)arg; + /* The function is expected to fail. This is indicated in tests/Makefile.am */ + starpu_data_acquire(handle, STARPU_RW); + starpu_tag_wait(TAG); +} + +int main(int argc, char **argv) +{ + int ret; + + if (RUNNING_ON_VALGRIND) + return STARPU_TEST_SKIPPED; + + disable_coredump(); + + ret = starpu_initialize(NULL, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + starpu_malloc((void**)&data, sizeof(*data)); + *data = 42; + + /* register a piece of data */ + starpu_vector_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)data, + 1, sizeof(unsigned)); + + struct starpu_task *task = starpu_task_create(); + + task->cl = &wrong_codelet; + + task->handles[0] = handle; + + task->use_tag = 1; + task->tag_id = TAG; + + task->callback_func = wrong_callback; + task->detach = 0; + + ret = starpu_task_submit(task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + ret = starpu_tag_wait(TAG); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_tag_wait"); + + /* This call is valid as it is done by the application outside a + * callback */ + ret = starpu_data_acquire(handle, STARPU_RW); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_acquire"); + + starpu_data_release(handle); + + ret = starpu_task_wait(task); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait"); + starpu_data_unregister(handle); + + starpu_free_noflag(data, sizeof(*data)); + + starpu_shutdown(); + + return EXIT_SUCCESS; + +enodev: + fprintf(stderr, "WARNING: No one can execute this task\n"); + /* yes, we do not perform the computation but we did detect that no one + * could perform the kernel, so this is not an error from StarPU */ + starpu_shutdown(); + return STARPU_TEST_SKIPPED; +} +#endif diff --git a/tests/errorcheck/invalid_tasks.c b/tests/errorcheck/invalid_tasks.c new file mode 100644 index 0000000..6cb3d81 --- /dev/null +++ b/tests/errorcheck/invalid_tasks.c @@ -0,0 +1,89 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../helper.h" + +/* + * Check that we detect that with only a CPU we can't submit a GPU-only task + */ + +#if !defined(STARPU_USE_CPU) +#warning no cpu are available. Skipping test +int main(void) +{ + return STARPU_TEST_SKIPPED; +} +#else + +void dummy_func(void *descr[], void *arg) +{ + (void)descr; + (void)arg; +} + +static struct starpu_codelet gpu_only_cl = +{ + .cuda_funcs = {dummy_func}, + .opencl_funcs = {dummy_func}, + .model = NULL, + .nbuffers = 0 +}; + +int main(void) +{ + int ret; + + /* We force StarPU to use 1 CPU only */ + struct starpu_conf conf; + starpu_conf_init(&conf); + conf.precedence_over_environment_variables = 1; + starpu_conf_noworker(&conf); + conf.ncpus = 1; + conf.nmpi_ms = -1; + conf.ntcpip_ms = -1; + + ret = starpu_init(&conf); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + struct starpu_task *task = starpu_task_create(); + task->cl = &gpu_only_cl; + + /* Only a GPU device could execute that task ! */ + ret = starpu_task_submit(task); + STARPU_ASSERT(ret == -ENODEV); + + task->destroy = 0; + starpu_task_destroy(task); + + struct starpu_task *task_specific = starpu_task_create(); + task_specific->cl = &gpu_only_cl; + task_specific->execute_on_a_specific_worker = 1; + task_specific->workerid = starpu_worker_get_by_type(STARPU_CPU_WORKER, 0); + + /* Only a CUDA device could execute that task ! */ + ret = starpu_task_submit(task_specific); + STARPU_ASSERT(ret == -ENODEV); + + task_specific->destroy = 0; + starpu_task_destroy(task_specific); + + starpu_shutdown(); + + return EXIT_SUCCESS; +} +#endif diff --git a/tests/errorcheck/starpu_init_noworker.c b/tests/errorcheck/starpu_init_noworker.c new file mode 100644 index 0000000..9fee0a2 --- /dev/null +++ b/tests/errorcheck/starpu_init_noworker.c @@ -0,0 +1,56 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include +#include "../helper.h" + +/* + * Test that starpu_initialize returns ENODEV when no worker is available + */ + +int main(int argc, char **argv) +{ + int ret; + + /* We try to initialize StarPU without any worker */ + struct starpu_conf conf; + starpu_conf_init(&conf); + conf.precedence_over_environment_variables = 1; + starpu_conf_noworker(&conf); + + /* starpu_init should return -ENODEV */ + ret = starpu_initialize(&conf, &argc, &argv); + if (ret == -ENODEV) + return EXIT_SUCCESS; + else + { + unsigned ncpu = starpu_cpu_worker_get_count(); + unsigned ncuda = starpu_cuda_worker_get_count(); + unsigned nopencl = starpu_opencl_worker_get_count(); + unsigned nmpi_ms = starpu_mpi_ms_worker_get_count(); + FPRINTF(stderr, "StarPU has found :\n"); + FPRINTF(stderr, "\t%u CPU cores\n", ncpu); + FPRINTF(stderr, "\t%u CUDA devices\n", ncuda); + FPRINTF(stderr, "\t%u OpenCL devices\n", nopencl); + FPRINTF(stderr, "\t%u MPI Master-Slaves devices\n", nmpi_ms); + return EXIT_FAILURE; + } + +} diff --git a/tests/errorcheck/workers_cpuid.c b/tests/errorcheck/workers_cpuid.c new file mode 100644 index 0000000..8cb0e34 --- /dev/null +++ b/tests/errorcheck/workers_cpuid.c @@ -0,0 +1,208 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../helper.h" + +/* + * Try various values for STARPU_WORKERS_CPUID, checking that the + * expected binding does happen + */ + +#if !defined(STARPU_USE_CPU) || !defined(STARPU_HAVE_HWLOC) || !defined(STARPU_HAVE_SETENV) +#warning no cpu are available. Skipping test +int main(void) +{ + return STARPU_TEST_SKIPPED; +} +#else + +#include + +#ifdef STARPU_QUICK_CHECK +#define CPUSTEP 8 +#define NB_TESTS 1 +#else +#define CPUSTEP 1 +#define NB_TESTS 5 +#endif + +int nhwpus; +long workers_cpuid[STARPU_NMAXWORKERS]; +int workers_id[STARPU_NMAXWORKERS]; + +static int check_workers_mapping(long *cpuid, int *workerids, int nb_workers) +{ + int i; + for (i=0; i STARPU_NMAXWORKERS) + nhwpus = STARPU_NMAXWORKERS; + + for (i=0; i +#include "../helper.h" + +/* This task fakes some repeated errors */ +static int retry; +void cpu_increment(void *descr[], void *arg) +{ + (void)arg; + unsigned *var = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[0]); + unsigned *var2 = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[1]); + FPRINTF(stderr,"computing\n"); + *var2 = *var + 1; + if (retry < 10) + { + FPRINTF(stderr,"failing\n"); + retry++; + /* Fake failure */ + starpu_task_ft_failed(starpu_task_get_current()); + } + else + FPRINTF(stderr,"succeed\n"); +} + +static struct starpu_codelet my_codelet = +{ + .cpu_funcs = {cpu_increment}, + //.cpu_funcs_name = {"cpu_increment"}, + .modes = { STARPU_R, STARPU_W }, + .nbuffers = 2 +}; + +/* This implements the retry strategy + * (Identical to the default implementation: just retry) */ +static void check_ft(void *arg) +{ + struct starpu_task *meta_task = arg; + struct starpu_task *current_task = starpu_task_get_current(); + struct starpu_task *new_task; + int ret; + + if (!current_task->failed) + { + FPRINTF(stderr,"didn't fail, release main task\n"); + starpu_task_ft_success(meta_task); + return; + } + + FPRINTF(stderr,"failed, try again\n"); + + new_task = starpu_task_ft_create_retry(meta_task, current_task, check_ft); + + /* Here we could e.g. force the task to use only a CPU implementation + * known to be failsafe */ + + ret = starpu_task_submit_nodeps(new_task); + STARPU_ASSERT(!ret); +} + +int main(void) +{ + int x = 12; + int y = 1; + starpu_data_handle_t h_x, h_y; + int ret, ret1; + + if (starpu_getenv_number_default("STARPU_GLOBAL_ARBITER", 0) > 0) + /* TODO _submit_job_take_data_deps */ + return STARPU_TEST_SKIPPED; + + struct starpu_conf conf; + + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + conf.ncpus = -1; + conf.nmpi_ms = -1; + conf.ntcpip_ms = -1; + + ret = starpu_init(&conf); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + starpu_variable_data_register(&h_x, STARPU_MAIN_RAM, (uintptr_t)&x, sizeof(x)); + starpu_variable_data_register(&h_y, STARPU_MAIN_RAM, (uintptr_t)&y, sizeof(y)); + + retry = 0; + ret1 = starpu_task_insert(&my_codelet, + STARPU_PROLOGUE_CALLBACK, starpu_task_ft_prologue, + STARPU_PROLOGUE_CALLBACK_ARG_NFREE, check_ft, + STARPU_R, h_x, + STARPU_W, h_y, + 0); + if (ret1 != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret1, "starpu_task_insert"); + starpu_task_wait_for_all(); + + starpu_data_unregister(h_x); + starpu_data_unregister(h_y); + + starpu_shutdown(); + + if (x != 12) + ret = 1; + FPRINTF(stderr, "Value x = %d (expected 12)\n", x); + + if (ret1 != -ENODEV) + { + if (y != 13) + ret = 1; + FPRINTF(stderr, "Value y = %d (expected 13)\n", y); + } + + STARPU_RETURN(ret); +} diff --git a/tests/fortran90/init_01.f90 b/tests/fortran90/init_01.f90 new file mode 100644 index 0000000..1dbe4fc --- /dev/null +++ b/tests/fortran90/init_01.f90 @@ -0,0 +1,30 @@ +! StarPU --- Runtime system for heterogeneous multicore architectures. +! +! Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +! +! StarPU is free software; you can redistribute it and/or modify +! it under the terms of the GNU Lesser General Public License as published by +! the Free Software Foundation; either version 2.1 of the License, or (at +! your option) any later version. +! +! StarPU is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of +! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +! +! See the GNU Lesser General Public License in COPYING.LGPL for more details. +! +PROGRAM init_01 + + USE starpu_mod + USE iso_c_binding + + IMPLICIT NONE + + INTEGER(KIND=C_INT) :: res + + res = starpu_init(C_NULL_PTR) + IF (res /= 0) THEN + STOP 77 + END IF + CALL starpu_shutdown() +END PROGRAM init_01 diff --git a/tests/fortran90/starpu_mod.f90 b/tests/fortran90/starpu_mod.f90 new file mode 100644 index 0000000..9cce981 --- /dev/null +++ b/tests/fortran90/starpu_mod.f90 @@ -0,0 +1,145 @@ +! StarPU --- Runtime system for heterogeneous multicore architectures. +! +! Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +! +! StarPU is free software; you can redistribute it and/or modify +! it under the terms of the GNU Lesser General Public License as published by +! the Free Software Foundation; either version 2.1 of the License, or (at +! your option) any later version. +! +! StarPU is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of +! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +! +! See the GNU Lesser General Public License in COPYING.LGPL for more details. +! +MODULE starpu_mod + ! == starpu.h == + + ! starpu_conf_init + INTERFACE + SUBROUTINE starpu_conf_init(conf) BIND(C) + USE iso_c_binding + TYPE(C_PTR), VALUE :: conf + END SUBROUTINE starpu_conf_init + END INTERFACE + + ! starpu_init + INTERFACE + FUNCTION starpu_init(conf) BIND(C) + USE iso_c_binding + TYPE(C_PTR), VALUE :: conf + INTEGER(KIND=C_INT) :: starpu_init + END FUNCTION starpu_init + END INTERFACE + + ! starpu_initialize + + ! starpu_pause + INTERFACE + SUBROUTINE starpu_pause() BIND(C) + USE iso_c_binding + END SUBROUTINE starpu_pause + END INTERFACE + + ! starpu_resume + INTERFACE + SUBROUTINE starpu_resume() BIND(C) + USE iso_c_binding + END SUBROUTINE starpu_resume + END INTERFACE + + ! starpu_shutdown + INTERFACE + SUBROUTINE starpu_shutdown() BIND(C) + USE iso_c_binding + END SUBROUTINE starpu_shutdown + END INTERFACE + + ! starpu_topology_print + + ! starpu_asynchronous_copy_disabled + INTERFACE + SUBROUTINE starpu_asynchronous_copy_disabled() BIND(C) + USE iso_c_binding + END SUBROUTINE starpu_asynchronous_copy_disabled + END INTERFACE + + ! starpu_asynchronous_cuda_copy_disabled + INTERFACE + SUBROUTINE starpu_asynchronous_cuda_copy_disabled() BIND(C) + USE iso_c_binding + END SUBROUTINE starpu_asynchronous_cuda_copy_disabled + END INTERFACE + + ! starpu_asynchronous_opencl_copy_disabled + INTERFACE + SUBROUTINE starpu_asynchronous_opencl_copy_disabled() BIND(C) + USE iso_c_binding + END SUBROUTINE starpu_asynchronous_opencl_copy_disabled + END INTERFACE + + ! starpu_display_stats + INTERFACE + SUBROUTINE starpu_display_stats() BIND(C) + USE iso_c_binding + END SUBROUTINE starpu_display_stats + END INTERFACE + + ! starpu_get_version + INTERFACE + SUBROUTINE starpu_get_version(major,minor,release) BIND(C) + USE iso_c_binding + INTEGER(KIND=C_INT), INTENT(OUT) :: major,minor,release + END SUBROUTINE starpu_get_version + END INTERFACE + + ! starpu_cpu_worker_get_count + INTERFACE + FUNCTION starpu_cpu_worker_get_count() BIND(C) + USE iso_c_binding + INTEGER(KIND=C_INT) :: starpu_cpu_worker_get_count + END FUNCTION starpu_cpu_worker_get_count + END INTERFACE + + ! == starpu_task.h == + + ! starpu_tag_declare_deps + ! starpu_tag_declare_deps_array + ! starpu_task_declare_deps_array + ! starpu_tag_wait + ! starpu_tag_wait_array + ! starpu_tag_notify_from_apps + ! starpu_tag_restart + ! starpu_tag_remove + ! starpu_task_init + ! starpu_task_clean + ! starpu_task_create + ! starpu_task_destroy + ! starpu_task_set_destroy + ! starpu_task_submit + ! starpu_task_submit_to_ctx + ! starpu_task_finished + ! starpu_task_wait + ! starpu_task_wait_for_all + INTERFACE + SUBROUTINE starpu_task_wait_for_all() BIND(C) + USE iso_c_binding + END SUBROUTINE starpu_task_wait_for_all + END INTERFACE + ! starpu_task_wait_for_n_submitted + ! starpu_task_wait_for_all_in_ctx + ! starpu_task_wait_for_n_submitted_in_ctx + ! starpu_task_wait_for_no_ready + ! starpu_task_nready + ! starpu_task_nsubmitted + ! starpu_codelet_init + ! starpu_codelet_display_stats + ! starpu_task_get_current + ! starpu_parallel_task_barrier_init + ! starpu_parallel_task_barrier_init_n + ! starpu_task_dup + ! starpu_task_set_implementation + ! starpu_task_get_implementation + +END MODULE starpu_mod diff --git a/tests/helper.h b/tests/helper.h new file mode 100644 index 0000000..481cb37 --- /dev/null +++ b/tests/helper.h @@ -0,0 +1,136 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef _TESTS_HELPER_H +#define _TESTS_HELPER_H + +#include +#include +#include + +#ifdef HAVE_GETRLIMIT +#include +#endif + +#ifdef STARPU_HAVE_VALGRIND_H +#include +#endif + +#ifdef STARPU_HAVE_HELGRIND_H +#include +#endif + +#define STARPU_TEST_SKIPPED 77 + +//void *ALL_IS_OK = (void *)123456789L; +//void *ALL_IS_NOT_OK = (void *)987654321L; +// +//#define STARPU_CHECK_MALLOC(ptr) {if (!ptr) { fprintf(stderr, "starpu_malloc failed\n"); return 1; }} +//#define STARPU_CHECK_MALLOC_HAS_FAILED(ptr) {if (ptr) { fprintf(stderr, "starpu_malloc should have failed\n"); return 1; }} + +//#define STARPU_CHECK_MALLOC_THREAD(ptr) {if (!ptr) { fprintf(stderr, "starpu_malloc failed\n"); return ALL_IS_NOT_OK; }} +//#define STARPU_CHECK_MALLOC_HAS_FAILED_THREAD(ptr) {if (ptr) { fprintf(stderr, "starpu_malloc should have failed\n"); return ALL_IS_NOT_OK; }} +//#define STARPU_CHECK_RETURN_VALUE_THREAD(err, message) {if (err < 0) { perror(message); return ALL_IS_NOT_OK; }} +//#define STARPU_CHECK_RETURN_VALUE_IS_THREAD(err, value, message) {if (err >= 0 || errno != value) { perror(message); return ALL_IS_NOT_OK; }} + +//#define STARPU_TEST_OUTPUT +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); fflush(ofile); }} while(0) + +#if defined(STARPU_HAVE_VALGRIND_H) && !defined(STARPU_VALGRIND_FULL) +static int _starpu_valgrind_print_once STARPU_ATTRIBUTE_UNUSED = 0; + +# define STARPU_SKIP_IF_VALGRIND \ + do \ + { \ + if(STARPU_RUNNING_ON_VALGRIND) \ + { \ + STARPU_HG_DISABLE_CHECKING(_starpu_valgrind_print_once); \ + if (!_starpu_valgrind_print_once) \ + { \ + FPRINTF(stderr, "Running on valgrind, skipping the actual computations\n"); \ + _starpu_valgrind_print_once = 1; \ + } \ + return; \ + } \ + } while(0) + +# define STARPU_SKIP_IF_VALGRIND_RETURN_ZERO \ + do \ + { \ + if(STARPU_RUNNING_ON_VALGRIND) \ + { \ + STARPU_HG_DISABLE_CHECKING(_starpu_valgrind_print_once); \ + if (!_starpu_valgrind_print_once) \ + { \ + FPRINTF(stderr, "Running on valgrind, skipping the actual computations\n"); \ + _starpu_valgrind_print_once = 1; \ + } \ + return 0; \ + } \ + } while(0) + +# define STARPU_SKIP_IF_VALGRIND_RETURN_SKIP \ + do \ + { \ + if(STARPU_RUNNING_ON_VALGRIND) \ + { \ + STARPU_HG_DISABLE_CHECKING(_starpu_valgrind_print_once); \ + if (!_starpu_valgrind_print_once) \ + { \ + FPRINTF(stderr, "Running on valgrind, skipping the actual computations\n"); \ + _starpu_valgrind_print_once = 1; \ + } \ + return STARPU_TEST_SKIPPED; \ + } \ + } while(0) + +# define STARPU_RETURN(ret) \ + do \ + { \ + if(STARPU_RUNNING_ON_VALGRIND) \ + { \ + FPRINTF(stderr, "Running on valgrind, ignoring return value\n"); \ + return 0; \ + } \ + else return ret; \ + } while(0) + +#else /* defined(STARPU_HAVE_VALGRIND_H) && !defined(STARPU_VALGRIND_FULL) */ +# define STARPU_RETURN(ret) return ret +# define STARPU_SKIP_IF_VALGRIND +# define STARPU_SKIP_IF_VALGRIND_RETURN_ZERO +# define STARPU_SKIP_IF_VALGRIND_RETURN_SKIP +#endif /* defined(STARPU_HAVE_VALGRIND_H) && !defined(STARPU_VALGRIND_FULL) */ + +#ifndef ANNOTATE_HAPPENS_BEFORE +#define ANNOTATE_HAPPENS_BEFORE(obj) ((void)0) +#endif +#ifndef ANNOTATE_HAPPENS_BEFORE_FORGET_ALL +#define ANNOTATE_HAPPENS_BEFORE_FORGET_ALL(obj) ((void)0) +#endif +#ifndef ANNOTATE_HAPPENS_AFTER +#define ANNOTATE_HAPPENS_AFTER(obj) ((void)0) +#endif + +static inline void disable_coredump(void) +{ +#ifdef HAVE_GETRLIMIT + struct rlimit rlim = { 0, 0 }; + setrlimit(RLIMIT_CORE, &rlim); +#endif +} + +#endif /* _TESTS_HELPER_H */ diff --git a/tests/helper/cublasLt_init.c b/tests/helper/cublasLt_init.c new file mode 100644 index 0000000..dbaa4d6 --- /dev/null +++ b/tests/helper/cublasLt_init.c @@ -0,0 +1,72 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. +* + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include +#include +#include "../helper.h" + +/* + * Test initializing cublasLt, and how much time that takes + */ + +static double start; +static double end; + +//static float *data = NULL; + +int main(int argc, char **argv) +{ + int ret; + + ret = starpu_initialize(NULL, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + unsigned ngpus = starpu_cuda_worker_get_count(); + + double init_timing; + double shutdown_timing; + + start = starpu_timing_now(); + starpu_cublasLt_init(); + end = starpu_timing_now(); + init_timing = end - start; + + start = starpu_timing_now(); + starpu_cublasLt_shutdown(); + end = starpu_timing_now(); + shutdown_timing = end - start; + + FPRINTF(stderr, "Total:\n"); + FPRINTF(stderr, "\tinit: %2.2f ms\n", init_timing/(1000)); + FPRINTF(stderr, "\tshutdown: %2.2f ms\n", shutdown_timing/(1000)); + + if (ngpus != 0) + { + FPRINTF(stderr, "per-GPU (#gpu = %u):\n", ngpus); + + FPRINTF(stderr, "\tinit: %2.2f ms\n", init_timing/(1000*ngpus)); + FPRINTF(stderr, "\tshutdown: %2.2f ms\n", shutdown_timing/(1000*ngpus)); + } + + starpu_shutdown(); + + return EXIT_SUCCESS; +} diff --git a/tests/helper/cublas_init.c b/tests/helper/cublas_init.c new file mode 100644 index 0000000..effbbb9 --- /dev/null +++ b/tests/helper/cublas_init.c @@ -0,0 +1,71 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include +#include "../helper.h" + +/* + * Test initializing cublas, and how much time that takes + */ + +static double start; +static double end; + +//static float *data = NULL; + +int main(int argc, char **argv) +{ + int ret; + + ret = starpu_initialize(NULL, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + unsigned ngpus = starpu_cuda_worker_get_count(); + + double init_timing; + double shutdown_timing; + + start = starpu_timing_now(); + starpu_cublas_init(); + end = starpu_timing_now(); + init_timing = end - start; + + start = starpu_timing_now(); + starpu_cublas_shutdown(); + end = starpu_timing_now(); + shutdown_timing = end - start; + + FPRINTF(stderr, "Total:\n"); + FPRINTF(stderr, "\tinit: %2.2f us\n", init_timing/(1000)); + FPRINTF(stderr, "\tshutdown: %2.2f us\n", shutdown_timing/(1000)); + + if (ngpus != 0) + { + FPRINTF(stderr, "per-GPU (#gpu = %u):\n", ngpus); + + FPRINTF(stderr, "\tinit: %2.2f us\n", init_timing/(1000*ngpus)); + FPRINTF(stderr, "\tshutdown: %2.2f us\n", shutdown_timing/(1000*ngpus)); + } + + starpu_shutdown(); + + return EXIT_SUCCESS; +} diff --git a/tests/helper/cusparse_init.c b/tests/helper/cusparse_init.c new file mode 100644 index 0000000..6b02cbc --- /dev/null +++ b/tests/helper/cusparse_init.c @@ -0,0 +1,71 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include +#include "../helper.h" + +/* + * Test initializing cusparse, and how much time that takes + */ + +static double start; +static double end; + +//static float *data = NULL; + +int main(int argc, char **argv) +{ + int ret; + + ret = starpu_initialize(NULL, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + unsigned ngpus = starpu_cuda_worker_get_count(); + + double init_timing; + double shutdown_timing; + + start = starpu_timing_now(); + starpu_cusparse_init(); + end = starpu_timing_now(); + init_timing = end - start; + + start = starpu_timing_now(); + starpu_cusparse_shutdown(); + end = starpu_timing_now(); + shutdown_timing = end - start; + + FPRINTF(stderr, "Total:\n"); + FPRINTF(stderr, "\tinit: %2.2f ms\n", init_timing/(1000)); + FPRINTF(stderr, "\tshutdown: %2.2f ms\n", shutdown_timing/(1000)); + + if (ngpus != 0) + { + FPRINTF(stderr, "per-GPU (#gpu = %u):\n", ngpus); + + FPRINTF(stderr, "\tinit: %2.2f ms\n", init_timing/(1000*ngpus)); + FPRINTF(stderr, "\tshutdown: %2.2f ms\n", shutdown_timing/(1000*ngpus)); + } + + starpu_shutdown(); + + return EXIT_SUCCESS; +} diff --git a/tests/helper/execute_on_all.c b/tests/helper/execute_on_all.c new file mode 100644 index 0000000..5c9b562 --- /dev/null +++ b/tests/helper/execute_on_all.c @@ -0,0 +1,55 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include +#include "../helper.h" + +/* + * Test executing a function on all workers + */ + +void func(void *arg) +{ + int *ptr = (int *) arg; + STARPU_ASSERT(*ptr == 0x42); +} + +int main(int argc, char **argv) +{ + int ret; + + ret = starpu_initialize(NULL, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + int arg = 0x42; + + starpu_execute_on_each_worker(func, &arg, STARPU_CPU|STARPU_CUDA|STARPU_OPENCL); + + starpu_execute_on_each_worker(func, &arg, STARPU_CPU); + + starpu_execute_on_each_worker(func, &arg, STARPU_CUDA); + + starpu_execute_on_each_worker(func, &arg, STARPU_OPENCL); + + starpu_shutdown(); + + return EXIT_SUCCESS; +} diff --git a/tests/helper/hipblas_init.c b/tests/helper/hipblas_init.c new file mode 100644 index 0000000..0da6cf9 --- /dev/null +++ b/tests/helper/hipblas_init.c @@ -0,0 +1,72 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include +#include +#include "../helper.h" + +/* + * Test initializing hipblas, and how much time that takes + */ + +static double start; +static double end; + +//static float *data = NULL; + +int main(int argc, char **argv) +{ + int ret; + + ret = starpu_initialize(NULL, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + unsigned ngpus = starpu_cuda_worker_get_count(); + + double init_timing; + double shutdown_timing; + + start = starpu_timing_now(); + starpu_hipblas_init(); + end = starpu_timing_now(); + init_timing = end - start; + + start = starpu_timing_now(); + starpu_hipblas_shutdown(); + end = starpu_timing_now(); + shutdown_timing = end - start; + + FPRINTF(stderr, "Total:\n"); + FPRINTF(stderr, "\tinit: %2.2f us\n", init_timing/(1000)); + FPRINTF(stderr, "\tshutdown: %2.2f us\n", shutdown_timing/(1000)); + + if (ngpus != 0) + { + FPRINTF(stderr, "per-GPU (#gpu = %u):\n", ngpus); + + FPRINTF(stderr, "\tinit: %2.2f us\n", init_timing/(1000*ngpus)); + FPRINTF(stderr, "\tshutdown: %2.2f us\n", shutdown_timing/(1000*ngpus)); + } + + starpu_shutdown(); + + return EXIT_SUCCESS; +} diff --git a/tests/helper/pinned_memory.c b/tests/helper/pinned_memory.c new file mode 100644 index 0000000..363f711 --- /dev/null +++ b/tests/helper/pinned_memory.c @@ -0,0 +1,49 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include "../helper.h" + +/* + * Test calling starpu_malloc, i.e. allocating pinned memory + */ + +#define NITER 10 +#define SIZE (4*1024*1024*sizeof(float)) + +static float *data = NULL; + +int main(int argc, char **argv) +{ + int ret; + + ret = starpu_initialize(NULL, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + unsigned iter; + for (iter = 0; iter < NITER; iter++) + { + ret = starpu_malloc((void **)&data, SIZE); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_malloc"); + starpu_free_noflag(data, SIZE); + } + + starpu_shutdown(); + + return EXIT_SUCCESS; +} diff --git a/tests/helper/starpu_create_sync_task.c b/tests/helper/starpu_create_sync_task.c new file mode 100644 index 0000000..71616d7 --- /dev/null +++ b/tests/helper/starpu_create_sync_task.c @@ -0,0 +1,86 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include "../helper.h" + +/* + * Test starpu_create_sync_task + */ + +#define NITER 10 + +static int create_dummy_task(starpu_tag_t tag) +{ + struct starpu_task *task = starpu_task_create(); + + task->use_tag = 1; + task->tag_id = tag; + task->cl = &starpu_codelet_nop; + + int ret = starpu_task_submit(task); + return ret; +} + +int main(int argc, char **argv) +{ + int ret; + + ret = starpu_initialize(NULL, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + starpu_tag_t sync_tags[NITER]; + + unsigned iter; + for (iter = 0; iter < NITER; iter++) + { + starpu_tag_t sync_tag = (starpu_tag_t)iter*100; + + sync_tags[iter] = sync_tag; + + unsigned ndeps = 10; + starpu_tag_t deps[ndeps]; + + unsigned d; + for (d = 0; d < ndeps; d++) + { + deps[d] = sync_tag + d + 1; + + ret = create_dummy_task(deps[d]); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + starpu_create_sync_task(sync_tag, ndeps, deps, NULL, NULL); + } + + /* Wait all the synchronization tasks */ + ret = starpu_tag_wait_array(NITER, sync_tags); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_tag_wait_array"); + + starpu_shutdown(); + + return EXIT_SUCCESS; + +enodev: + fprintf(stderr, "WARNING: No one can execute this task\n"); + /* yes, we do not perform the computation but we did detect that no one + * could perform the kernel, so this is not an error from StarPU */ + starpu_shutdown(); + return STARPU_TEST_SKIPPED; +} diff --git a/tests/helper/starpu_data_cpy.c b/tests/helper/starpu_data_cpy.c new file mode 100644 index 0000000..62e94aa --- /dev/null +++ b/tests/helper/starpu_data_cpy.c @@ -0,0 +1,62 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../helper.h" + +/* + * Test starpu_data_cpy + */ + +int main(int argc, char **argv) +{ + int ret; + int var1, var2; + starpu_data_handle_t var1_handle, var2_handle; + + ret = starpu_initialize(NULL, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + if (starpu_cpu_worker_get_count() + starpu_cuda_worker_get_count() + starpu_opencl_worker_get_count() == 0) + { + starpu_shutdown(); + return STARPU_TEST_SKIPPED; + } + + var1 = 42; + var2 = 12; + + starpu_variable_data_register(&var1_handle, STARPU_MAIN_RAM, (uintptr_t)&var1, sizeof(var1)); + starpu_variable_data_register(&var2_handle, STARPU_MAIN_RAM, (uintptr_t)&var2, sizeof(var2)); + + ret = starpu_data_cpy(var2_handle, var1_handle, 0, NULL, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_cpy"); + + starpu_data_acquire(var2_handle, STARPU_R); + ret = EXIT_SUCCESS; + if (var2 != var1) + { + FPRINTF(stderr, "var2 is %d but it should be %d\n", var2, var1); + ret = EXIT_FAILURE; + } + starpu_data_release(var2_handle); + + starpu_data_unregister(var1_handle); + starpu_data_unregister(var2_handle); + starpu_shutdown(); + + STARPU_RETURN(ret); +} diff --git a/tests/helper/starpu_data_dup_ro.c b/tests/helper/starpu_data_dup_ro.c new file mode 100644 index 0000000..00c6a76 --- /dev/null +++ b/tests/helper/starpu_data_dup_ro.c @@ -0,0 +1,136 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../variable/increment.h" +#include "../helper.h" + +/* + * Test starpu_data_dup_ro + */ + +int main(int argc, char **argv) +{ + int ret; + unsigned var1, *var; + starpu_data_handle_t var1_handle, var2_handle, var3_handle, var4_handle, var5_handle; + + ret = starpu_initialize(NULL, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + if (starpu_cpu_worker_get_count() + starpu_cuda_worker_get_count() + starpu_opencl_worker_get_count() == 0) + { + starpu_shutdown(); + return STARPU_TEST_SKIPPED; + } + + increment_load_opencl(); + + var1 = 42; + starpu_variable_data_register(&var1_handle, STARPU_MAIN_RAM, (uintptr_t)&var1, sizeof(var1)); + + /* Make a duplicate of the original data */ + ret = starpu_data_dup_ro(&var2_handle, var1_handle, 1); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_dup_ro"); + + /* Free it */ + starpu_data_unregister(var2_handle); + + /* Make another duplicate of the original data */ + ret = starpu_data_dup_ro(&var2_handle, var1_handle, 1); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_dup_ro"); + + /* Free it through submit */ + starpu_data_unregister_submit(var2_handle); + + /* Make another duplicate of the original data */ + ret = starpu_data_dup_ro(&var2_handle, var1_handle, 1); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_dup_ro"); + + /* Make a second duplicate of the original data */ + ret = starpu_data_dup_ro(&var3_handle, var1_handle, 1); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_dup_ro"); + STARPU_ASSERT(var3_handle == var2_handle); + + /* Make a duplicate of a duplicate */ + ret = starpu_data_dup_ro(&var4_handle, var2_handle, 1); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_dup_ro"); + STARPU_ASSERT(var4_handle == var2_handle); + + ret = starpu_task_insert(&increment_cl, STARPU_RW, var1_handle, 0); + if (ret == -ENODEV) + { + starpu_data_unregister(var1_handle); + starpu_data_unregister(var2_handle); + starpu_data_unregister(var3_handle); + starpu_data_unregister(var4_handle); + starpu_shutdown(); + return STARPU_TEST_SKIPPED; + } + + /* Make a duplicate of the new value */ + ret = starpu_data_dup_ro(&var5_handle, var1_handle, 1); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_dup_ro"); + + starpu_data_acquire(var2_handle, STARPU_R); + var = starpu_data_get_local_ptr(var2_handle); + ret = EXIT_SUCCESS; + if (*var != 42) + { + FPRINTF(stderr, "var2 is %u but it should be %d\n", *var, 42); + ret = EXIT_FAILURE; + } + starpu_data_release(var2_handle); + + starpu_data_acquire(var3_handle, STARPU_R); + var = starpu_data_get_local_ptr(var3_handle); + if (*var != 42) + { + FPRINTF(stderr, "var3 is %u but it should be %d\n", *var, 42); + ret = EXIT_FAILURE; + } + starpu_data_release(var3_handle); + + starpu_data_acquire(var4_handle, STARPU_R); + var = starpu_data_get_local_ptr(var4_handle); + if (*var != 42) + { + FPRINTF(stderr, "var4 is %u but it should be %d\n", *var, 42); + ret = EXIT_FAILURE; + } + starpu_data_release(var4_handle); + + starpu_data_acquire(var5_handle, STARPU_R); + var = starpu_data_get_local_ptr(var5_handle); + if (*var != 43) + { + FPRINTF(stderr, "var5 is %u but it should be %d\n", *var, 43); + ret = EXIT_FAILURE; + } + starpu_data_release(var5_handle); + + starpu_data_unregister(var1_handle); + starpu_data_unregister(var2_handle); + starpu_data_unregister(var3_handle); + starpu_data_unregister(var4_handle); + starpu_data_unregister(var5_handle); + + increment_unload_opencl(); + + starpu_shutdown(); + + STARPU_RETURN(ret); +} diff --git a/tests/loader.c b/tests/loader.c new file mode 100644 index 0000000..804797d --- /dev/null +++ b/tests/loader.c @@ -0,0 +1,505 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if defined(_WIN32) && !defined(__MINGW32__) && !defined(__CYGWIN__) +#include +#else +#include +#endif + +#ifdef STARPU_QUICK_CHECK +/* Quick checks are supposed to be real quick, typically less than 1s each, sometimes 10s + add some extra times for tests which run with all schedulers +*/ +#define DEFAULT_TIMEOUT 100 +#elif !defined(STARPU_LONG_CHECK) +/* Normal checks are supposed to be short enough, typically less than 10s each, sometimes 1-2m */ +#define DEFAULT_TIMEOUT 300 +#else +/* Long checks can be very long */ +#define DEFAULT_TIMEOUT 1000 +#endif +#define AUTOTEST_SKIPPED_TEST 77 + +static pid_t child_pid = 0; +static int timeout; + +#if defined(_WIN32) && !defined(__MINGW32__) && !defined(__CYGWIN__) +static int mygettimeofday(struct timeval *tv, void *tz) +{ + if (tv) + { + FILETIME ft; + unsigned long long res; + GetSystemTimeAsFileTime(&ft); + /* 100-nanosecond intervals since January 1, 1601 */ + res = ft.dwHighDateTime; + res <<= 32; + res |= ft.dwLowDateTime; + res /= 10; + /* Now we have microseconds */ + res -= (((1970-1601)*365) + 89) * 24ULL * 3600ULL * 1000000ULL; + /* Now we are based on epoch */ + tv->tv_sec = res / 1000000ULL; + tv->tv_usec = res % 1000000ULL; + } +} +#else +#define mygettimeofday(tv,tz) gettimeofday(tv,tz) +#endif + +#ifdef STARPU_GDB_PATH +static int try_launch_gdb(const char *exe, const char *core) +{ +# define GDB_COMMANDS \ + "-ex", "py-list", \ + "-ex", "starpu-tasks", \ + "-ex", "starpu-workers", \ + "-ex", "starpu-print-datas-summary", \ + "-ex", "starpu-memusage", \ + "-ex", "starpu-print-archs", \ + "-ex", "starpu-print-registered-models", \ + "-ex", "bt full", \ + "-ex", "py-bt", \ + "-ex", "thread apply all bt full", \ + "-ex", "thread apply all py-bt", \ + + int err; + pid_t pid; + struct stat st; + const char *top_builddir; + char *gdb; + + err = stat(core, &st); + if (err != 0) + { + fprintf(stderr, "while looking for core file of %s: %s: %m\n", + exe, core); + return -1; + } + + if (!(st.st_mode & S_IFREG)) + { + fprintf(stderr, "%s: not a regular file\n", core); + return -1; + } + + top_builddir = getenv("top_builddir"); + + pid = fork(); + switch (pid) + { + case 0: /* kid */ + if (top_builddir != NULL) + { + /* Run gdb with Libtool. */ + gdb = alloca(strlen(top_builddir) + + sizeof("/libtool") + 1); + strcpy(gdb, top_builddir); + strcat(gdb, "/libtool"); + err = execl(gdb, "gdb", "--mode=execute", + STARPU_GDB_PATH, "--batch", + GDB_COMMANDS + exe, core, NULL); + } + else + { + /* Run gdb directly */ + gdb = STARPU_GDB_PATH; + err = execl(gdb, "gdb", "--batch", + GDB_COMMANDS + exe, core, NULL); + } + if (err != 0) + { + fprintf(stderr, "while launching `%s': %m\n", gdb); + exit(EXIT_FAILURE); + } + exit(EXIT_SUCCESS); + break; + + case -1: + fprintf(stderr, "fork: %m\n"); + return -1; + + default: /* parent */ + { + pid_t who; + int status; + who = waitpid(pid, &status, 0); + if (who != pid) + fprintf(stderr, "while waiting for gdb " + "process %d: %m\n", pid); + } + } + return 0; +# undef GDB_COMMANDS +} +#endif /* STARPU_GDB_PATH */ + +static void launch_gdb(const char *exe) +{ +#ifdef STARPU_GDB_PATH + char s[32]; + snprintf(s, sizeof(s), "core.%d", child_pid); + if (try_launch_gdb(exe, s) < 0) + try_launch_gdb(exe, "core"); +#endif /* STARPU_GDB_PATH */ +} + +static char *test_name; + +static void test_cleaner(int sig) +{ + pid_t child_gid; + int status; + (void) sig; + + // send signal to all loader family members + fprintf(stderr, "[error] test %s has been blocked for %d seconds. Mark it as failed\n", test_name, timeout); + child_gid = getpgid(child_pid); + kill(-child_gid, SIGQUIT); + waitpid(child_pid, &status, 0); + launch_gdb(test_name); + raise(SIGALRM); + exit(EXIT_FAILURE); +} + +static void forwardsig(int sig) +{ + pid_t child_gid; + child_gid = getpgid(child_pid); + kill(-child_gid, sig); +} + +static int _decode(char **src, char *motif, const char *value) +{ + char *found; + + found = strstr(*src, motif); + if (found == NULL) return 0; + + char *new_src = calloc(1, strlen(*src)-strlen(motif)+strlen(value)+1); + + strncpy(new_src, *src, found - *src); + strcat(new_src, value); + strcat(new_src, found+strlen(motif)); + + *src = new_src; + return 1; +} + +static void decode(char **src, char *motif, const char *value) +{ + if (*src) + { + if (strstr(*src, motif) && value == NULL) + { + fprintf(stderr, "error: $%s undefined\n", motif); + exit(EXIT_FAILURE); + } + int d = _decode(src, motif, value); + while (d) + d = _decode(src, motif, value); + } +} + +int main(int argc, char *argv[]) +{ + int child_exit_status; + char *test_args; + char *launcher; + char *launcher_args; + char *libtool; + char *cflags; + const char *top_builddir = getenv("top_builddir"); + struct sigaction sa; + int ret; + struct timeval start; + struct timeval end; + double timing; + int x=1; + int asan = 0, lsan = 0, tsan = 0, usan = 0; + + (void) argc; + test_args = NULL; + timeout = 0; + + launcher=getenv("STARPU_CHECK_LAUNCHER"); + launcher_args=getenv("STARPU_CHECK_LAUNCHER_ARGS"); + cflags = getenv("CFLAGS"); + if (cflags) + { + if (strstr(cflags, "-fsanitize=address")) + asan = 1; + if (strstr(cflags, "-fsanitize=leak")) + lsan = 1; + if (strstr(cflags, "-fsanitize=thread")) + tsan = 1; + if (strstr(cflags, "-fsanitize=undefined")) + usan = 1; + } + + if (argv[x] && strcmp(argv[x], "-t") == 0) + { + timeout = strtol(argv[x+1], NULL, 10); + x += 2; + } + else if (getenv("STARPU_TIMEOUT_ENV")) + { + /* get user-defined iter_max value */ + timeout = strtol(getenv("STARPU_TIMEOUT_ENV"), NULL, 10); + } + else if (timeout <= 0) + { + timeout = DEFAULT_TIMEOUT; + if ((launcher && strstr(launcher, "valgrind")) || + (launcher && strstr(launcher, "helgrind")) || + tsan) + timeout *= 20; + if (asan || usan || lsan || + (launcher && strstr(launcher, "compute-sanitizer"))) + timeout *= 5; + + if (timeout > 1750) + timeout = 1750; + } + +#ifdef STARPU_SIMGRID +#ifdef STARPU_DEBUG + timeout *= 20; +#endif +#endif + +#ifdef STARPU_USE_MPI_MASTER_SLAVE + /* compare values between the 2 values of timeout */ + if (getenv("MPIEXEC_TIMEOUT")) + { + int mpiexec_timeout = strtol(getenv("MPIEXEC_TIMEOUT"), NULL, 10); + if (mpiexec_timeout != timeout) + fprintf(stderr, "[warning] MPIEXEC_TIMEOUT and STARPU_TIMEOUT_ENV values are different (%d and %d). The behavior may be different than expected !\n", mpiexec_timeout, timeout); + } +#endif + + if (argv[x] && strcmp(argv[x], "-p") == 0) + { + test_name = malloc(strlen(argv[x+1]) + 1 + strlen(argv[x+2]) + 1); + sprintf(test_name, "%s/%s", argv[x+1], argv[x+2]); + x += 3; + } + else + { + test_name = argv[x]; + x += 1; + } + + if (!test_name) + { + fprintf(stderr, "[error] Need name of program to start\n"); + exit(EXIT_FAILURE); + } + + size_t len = strlen(test_name); + if (len >= 3 && + test_name[len-3] == '.' && + test_name[len-2] == 's' && + test_name[len-1] == 'h') + { + /* This is a shell script, don't run ourself on bash, but make + * the script call us for each program invocation */ + + char *launch = NULL; + if (top_builddir == NULL) + // this may fail if .libs is in the directory path + setenv("STARPU_LAUNCH", argv[0], 1); + else + { + launch = malloc(strlen(top_builddir) + strlen("/tests/loader") + 1); + strcpy(launch, top_builddir); + strcat(launch, "/tests/loader"); + setenv("STARPU_LAUNCH", launch, 1); + } + + execvp(test_name, argv+x-1); + + fprintf(stderr, "[error] '%s' failed to exec. test marked as failed\n", test_name); + free(launch); + exit(EXIT_FAILURE); + } + + if (strstr(test_name, "spmv/dw_block_spmv")) + { + test_args = (char *) calloc(512, sizeof(char)); + snprintf(test_args, 512, "%s/examples/spmv/matrix_market/examples/fidapm05.mtx", STARPU_SRC_DIR); + } + else if (strstr(test_name, "starpu_perfmodel_display")) + { + if (x >= argc) + test_args = strdup("-l"); + } + else if (strstr(test_name, "starpu_perfmodel_plot")) + { + if (x >= argc) + test_args = strdup("-l"); + } + + /* get launcher program */ + if (launcher_args) + launcher_args=strdup(launcher_args); + + if (top_builddir == NULL) + { + fprintf(stderr, + "warning: $top_builddir undefined, " + "so $STARPU_CHECK_LAUNCHER ignored\n"); + launcher = NULL; + launcher_args = NULL; + libtool = NULL; + } + else + { + libtool = malloc(strlen(top_builddir) + 1 + strlen("libtool") + 1); + strcpy(libtool, top_builddir); + strcat(libtool, "/libtool"); + } + + if (launcher) + { + const char *top_srcdir = getenv("top_srcdir"); + decode(&launcher, "@top_srcdir@", top_srcdir); + decode(&launcher_args, "@top_srcdir@", top_srcdir); + } + + setenv("STARPU_OPENCL_PROGRAM_DIR", STARPU_SRC_DIR, 1); + + /* set SIGALARM handler */ + sa.sa_flags = SA_RESETHAND | SA_NODEFER; + sigemptyset(&sa.sa_mask); + sa.sa_handler = test_cleaner; + if (-1 == sigaction(SIGALRM, &sa, NULL)) + perror("sigaction"); + + signal(SIGINT, forwardsig); + signal(SIGHUP, forwardsig); + signal(SIGPIPE, forwardsig); + signal(SIGTERM, forwardsig); + + child_pid = fork(); + if (child_pid == 0) + { + char *launcher_argv[100]; + int i=0; + + setpgid(0, 0); + + /* "Launchers" such as Valgrind need to be inserted + * after the Libtool-generated wrapper scripts, hence + * this special-case. */ + if (launcher && top_builddir != NULL) + { + launcher_argv[i++] = libtool; + launcher_argv[i++] = "--mode=execute"; + launcher_argv[i++] = launcher; + if (launcher_args) + { + launcher_argv[i++] = strtok(launcher_args, " "); + while (launcher_argv[i-1]) + { + launcher_argv[i++] = strtok(NULL, " "); + } + } + } + + launcher_argv[i++] = test_name; + if (test_args) + launcher_argv[i++] = test_args; + else while (argv[x]) + { + launcher_argv[i++] = argv[x++]; + } +#ifdef STARPU_SIMGRID +#ifdef STARPU_DEBUG + launcher_argv[i++] = "--cfg=contexts/factory:thread"; +#endif +#endif + launcher_argv[i++] = NULL; + execvp(*launcher_argv, launcher_argv); + + fprintf(stderr, "[error] '%s' failed to exec. test marked as failed\n", test_name); + exit(EXIT_FAILURE); + } + if (child_pid == -1) + { + fprintf(stderr, "[error] fork. test marked as failed\n"); + exit(EXIT_FAILURE); + } + free(test_args); + free(libtool); + + ret = EXIT_SUCCESS; + gettimeofday(&start, NULL); + alarm(timeout); + if (child_pid == waitpid(child_pid, &child_exit_status, 0)) + { + if (WIFEXITED(child_exit_status)) + { + int status = WEXITSTATUS(child_exit_status); + if (status == EXIT_SUCCESS) + { + alarm(0); + } + else + { + if (status != AUTOTEST_SKIPPED_TEST) + fprintf(stdout, "`%s' exited with return code %d\n", + test_name, status); + ret = status; + } + } + else if (WIFSIGNALED(child_exit_status)) + { + fprintf(stderr, "[error] `%s' killed with signal %d; test marked as failed\n", + test_name, WTERMSIG(child_exit_status)); + launch_gdb(test_name); + ret = EXIT_FAILURE; + } + else + { + fprintf(stderr, "[error] `%s' did not terminate normally; test marked as failed\n", + test_name); + ret = EXIT_FAILURE; + } + } + + gettimeofday(&end, NULL); + timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec)); + fprintf(stderr, "#Execution_time_in_seconds %f %s\n", timing/1000000, test_name); + + return ret; +} diff --git a/tests/main/bind.c b/tests/main/bind.c new file mode 100644 index 0000000..572e9f7 --- /dev/null +++ b/tests/main/bind.c @@ -0,0 +1,84 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include "../helper.h" + +/* + * Test binding the main thread to its dedicated core, making one less CPU core + * available to StarPU. + */ + +int main(void) +{ + int ret; + struct starpu_conf conf; + int ncpus; + unsigned active_bindid; + unsigned passive_bindid1; + unsigned passive_bindid2; + + /* First get the number of cores */ + starpu_conf_init(&conf); + conf.nmpi_ms = 0; + conf.ntcpip_ms = 0; + ret = starpu_init(&conf); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + ncpus = starpu_worker_get_count_by_type(STARPU_CPU_WORKER); + starpu_shutdown(); + + /* Check we have enough of them */ + if (ncpus <= 2) return STARPU_TEST_SKIPPED; + + /* Now re-initialize with two cores less */ + starpu_conf_init(&conf); + conf.reserve_ncpus = 2; + ret = starpu_init(&conf); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + /* Make sure StarPU uses two core less, (or we had hit maxcpus anyway...) */ + STARPU_ASSERT_MSG(ncpus == STARPU_MAXCPUS || starpu_worker_get_count_by_type(STARPU_CPU_WORKER) == ncpus-2, "Expected %d CPUs, got %d\n", ncpus-2, starpu_worker_get_count_by_type(STARPU_CPU_WORKER)); + FPRINTF(stderr, "CPUS: %d as expected\n", starpu_worker_get_count_by_type(STARPU_CPU_WORKER)); + + /* Check we can grab a whole core */ + active_bindid = starpu_get_next_bindid(STARPU_THREAD_ACTIVE, NULL, 0); + starpu_bind_thread_on(active_bindid, STARPU_THREAD_ACTIVE, "main"); + + /* Check we can request for an additional shared core */ + passive_bindid1 = starpu_get_next_bindid(0, NULL, 0); + passive_bindid2 = starpu_get_next_bindid(0, NULL, 0); + STARPU_ASSERT(passive_bindid1 != active_bindid); + STARPU_ASSERT(passive_bindid1 == passive_bindid2); + starpu_bind_thread_on(passive_bindid1, 0, "main"); + starpu_bind_thread_on(passive_bindid2, 0, "main"); + + /* Try to bind on a worker */ + starpu_bind_thread_on_worker(0); + + /* Try to bind back to the reserved core */ + starpu_bind_thread_on_cpu(active_bindid); + + /* Try to bind back to the main core, if any */ + starpu_bind_thread_on_main(); + + starpu_shutdown(); + + return EXIT_SUCCESS; +} diff --git a/tests/main/callback.c b/tests/main/callback.c new file mode 100644 index 0000000..322e804 --- /dev/null +++ b/tests/main/callback.c @@ -0,0 +1,91 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../helper.h" + +void codelet_callback_func(void *arg) +{ + if (arg) + { + int *x = (int *)arg; + FPRINTF(stderr, "calling callback codelet arg %d\n", *x); + } + else + FPRINTF(stderr, "calling callback codelet arg %p\n", arg); +} + +void task_callback_func(void *arg) +{ + FPRINTF(stderr, "\ncalling callback task arg %p\n", arg); + if (starpu_task_get_current()->cl->callback_func) + starpu_task_get_current()->cl->callback_func(arg); +} + +struct starpu_codelet mycodelet = +{ + .where = STARPU_NOWHERE, + .callback_func = codelet_callback_func +}; + +struct starpu_codelet mycodelet2 = +{ + .where = STARPU_NOWHERE, +}; + +int main(void) +{ + int ret; + int value=12; + int value2=24; + + ret = starpu_init(NULL); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + ret = starpu_task_insert(&mycodelet, + 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + + ret = starpu_task_insert(&mycodelet, + STARPU_CALLBACK_ARG_NFREE, &value, + 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + + ret = starpu_task_insert(&mycodelet, + STARPU_CALLBACK, &task_callback_func, + 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + + ret = starpu_task_insert(&mycodelet, + STARPU_CALLBACK, &task_callback_func, + STARPU_CALLBACK_ARG_NFREE, &value2, + 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + + ret = starpu_task_insert(&mycodelet2, + 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + + ret = starpu_task_insert(&mycodelet2, + STARPU_CALLBACK, &task_callback_func, + STARPU_CALLBACK_ARG_NFREE, &value, + 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + + starpu_shutdown(); + return 0; +} diff --git a/tests/main/codelet_null_callback.c b/tests/main/codelet_null_callback.c new file mode 100644 index 0000000..9b953b5 --- /dev/null +++ b/tests/main/codelet_null_callback.c @@ -0,0 +1,106 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../helper.h" + +/* + * Test passing a NULL codelet, but callbacks + */ + +static +void callback(void *ptr) +{ + int *x = (int *)ptr; + FPRINTF(stderr, "x=%d\n", *x); + STARPU_ASSERT_MSG(*x == 40, "%d != %d\n", *x, 40); + (*x)++; +} + +static +void callback2(void *ptr) +{ + int *x2 = (int *)ptr; + FPRINTF(stderr, "x2=%d\n", *x2); + STARPU_ASSERT_MSG(*x2 == 41, "%d != %d\n", *x2, 41); + (*x2)++; +} + +static +void prologue_callback(void *ptr) +{ + int *y = (int *)ptr; + FPRINTF(stderr, "y=%d\n", *y); + STARPU_ASSERT_MSG(*y == 12, "%d != %d\n", *y, 12); + (*y)++; +} + +static +void prologue_callback_pop(void *ptr) +{ + int *z = (int *)ptr; + FPRINTF(stderr, "z=%d\n", *z); + STARPU_ASSERT_MSG(*z == 32, "%d != %d\n", *z, 32); + (*z)++; +} + +int main(int argc, char **argv) +{ + int ret; + int x=40; + int x2=41; + int y=12; + int z=32; + + ret = starpu_initialize(NULL, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + ret = starpu_task_insert(NULL, + STARPU_CALLBACK_WITH_ARG_NFREE, callback, &x, + 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + + ret = starpu_task_insert(NULL, + STARPU_CALLBACK, callback2, + STARPU_CALLBACK_ARG_NFREE, &x2, + 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + + ret = starpu_task_insert(NULL, + STARPU_PROLOGUE_CALLBACK, prologue_callback, + STARPU_PROLOGUE_CALLBACK_ARG_NFREE, &y, + 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + + ret = starpu_task_insert(NULL, + STARPU_PROLOGUE_CALLBACK_POP, prologue_callback_pop, + STARPU_PROLOGUE_CALLBACK_POP_ARG_NFREE, &z, + 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + + starpu_task_wait_for_all(); + + STARPU_ASSERT_MSG(x == 41, "x should be equal to %d and not %d\n", 41, x); + STARPU_ASSERT_MSG(x2 == 42, "x2 should be equal to %d and not %d\n", 42, x2); + STARPU_ASSERT_MSG(y == 13, "y should be equal to %d and not %d\n", 13, y); + STARPU_ASSERT_MSG(z == 33, "z should be equal to %d and not %d\n", 33, z); + + starpu_shutdown(); + + return EXIT_SUCCESS; +} + diff --git a/tests/main/const_codelet.c b/tests/main/const_codelet.c new file mode 100644 index 0000000..96a930c --- /dev/null +++ b/tests/main/const_codelet.c @@ -0,0 +1,132 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include "../helper.h" + +#if !defined(STARPU_HAVE_SETENV) +#warning setenv is not defined. Skipping test +int main(void) +{ + return STARPU_TEST_SKIPPED; +} +#else +/* + * Test task submission + */ + +static int i = 0, j; + +void dummy_func(void *descr[], void *arg) +{ + (void)descr; + (void)arg; + int old_i = STARPU_ATOMIC_ADD(&i, 1); + FPRINTF(stdout, "called third task, i = %d\n", old_i+1); +} + +static const struct starpu_codelet dummy_codelet = +{ + .where = STARPU_CPU | STARPU_CUDA | STARPU_OPENCL, + .cpu_funcs = {dummy_func}, + .cuda_funcs = {dummy_func}, + .opencl_funcs = {dummy_func}, + .model = NULL, + .nbuffers = 0, + .checked = 1 +}; + +static void callback(void *arg) +{ + (void)arg; + struct starpu_task *task = starpu_task_create(); + task->cl = (struct starpu_codelet *) &dummy_codelet; + task->detach = 1; + if (starpu_task_submit(task) == -ENODEV) + exit(STARPU_TEST_SKIPPED); + FPRINTF(stdout, "submitted third task, i = %d\n", i); +} + +static const struct starpu_codelet callback_submit_codelet = +{ + .where = STARPU_CPU | STARPU_CUDA | STARPU_OPENCL, + .cpu_funcs = {dummy_func}, + .cuda_funcs = {dummy_func}, + .opencl_funcs = {dummy_func}, + .model = NULL, + .nbuffers = 0, + .checked = 1 +}; + +static void task_submit_func(void *descr[], void *arg) +{ + (void)descr; + (void)arg; + struct starpu_task *task = starpu_task_create(); + task->cl = (struct starpu_codelet *) &callback_submit_codelet; + task->callback_func = callback; + task->detach = 1; + if (starpu_task_submit(task) == -ENODEV) + exit(STARPU_TEST_SKIPPED); + int old_i = STARPU_ATOMIC_ADD(&i, 1); + FPRINTF(stdout, "submitted second task, i = %d\n", old_i + 1); +} + +static struct starpu_codelet task_submit_codelet = +{ + .where = STARPU_CPU | STARPU_CUDA | STARPU_OPENCL, + .cpu_funcs = {task_submit_func}, + .cuda_funcs = {task_submit_func}, + .opencl_funcs = {task_submit_func}, + .model = NULL, + .nbuffers = 0 +}; + +int main(void) +{ + int ret; + + setenv("STARPU_CODELET_PROFILING", "0", 1); + ret = starpu_init(NULL); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + struct starpu_task *task = starpu_task_create(); + + task->cl = &task_submit_codelet; + task->detach = 1; + + ret = starpu_task_submit(task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + starpu_task_wait_for_all(); + j = i; + + starpu_shutdown(); + + return j == 3 ? EXIT_SUCCESS : EXIT_FAILURE; + +enodev: + fprintf(stderr, "WARNING: No one can execute this task\n"); + /* yes, we do not perform the computation but we did detect that no one + * could perform the kernel, so this is not an error from StarPU */ + starpu_shutdown(); + return STARPU_TEST_SKIPPED; +} +#endif diff --git a/tests/main/deadlock.c b/tests/main/deadlock.c new file mode 100644 index 0000000..b52c6de --- /dev/null +++ b/tests/main/deadlock.c @@ -0,0 +1,71 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../helper.h" + +/* + * Create a cycle of tasks with NULL codelet, using manual dependencies. + * This is meant to try debugging tools with such a deadlock case. + */ + +#define N 4 + +int main(int argc, char **argv) +{ + int i, ret; + + ret = starpu_initialize(NULL, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + struct starpu_task **tasks = (struct starpu_task **) malloc(N*sizeof(struct starpu_task *)); + + for (i = 0; i < N; i++) + { + tasks[i] = starpu_task_create(); + tasks[i]->cl = NULL; + } + + for (i = 0; i < N; i++) + { + if (i > 0) + starpu_task_declare_deps_array(tasks[i], 1, &tasks[i-1]); + else + starpu_task_declare_deps_array(tasks[i], 1, &tasks[N-1]); + } + + for (i = 0; i < N; i++) + { + ret = starpu_task_submit(tasks[i]); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + starpu_task_wait_for_all(); + + starpu_shutdown(); + free(tasks); + + return EXIT_SUCCESS; + +enodev: + fprintf(stderr, "WARNING: No one can execute this task\n"); + /* yes, we do not perform the computation but we did detect that no one + * could perform the kernel, so this is not an error from StarPU */ + starpu_shutdown(); + return STARPU_TEST_SKIPPED; +} diff --git a/tests/main/declare_deps_after_submission.c b/tests/main/declare_deps_after_submission.c new file mode 100644 index 0000000..dc64d72 --- /dev/null +++ b/tests/main/declare_deps_after_submission.c @@ -0,0 +1,95 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include + +#include +#include "../helper.h" + +/* + * Test that we can declare a dependency after submitting a non-auto-destroy task + */ + +#ifdef STARPU_QUICK_CHECK + #define NLOOPS 4 +#else + #define NLOOPS 128 +#endif + +static struct starpu_task *create_dummy_task(void) +{ + struct starpu_task *task = starpu_task_create(); + + task->cl = &starpu_codelet_nop; + task->cl_arg = NULL; + + return task; +} + +int main(int argc, char **argv) +{ + int ret; + unsigned loop, nloops = NLOOPS; + + ret = starpu_initialize(NULL, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + for (loop = 0; loop < nloops; loop++) + { + struct starpu_task *taskA, *taskB; + + taskA = create_dummy_task(); + taskB = create_dummy_task(); + + /* By default, dynamically allocated tasks are destroyed at + * termination, we cannot declare a dependency on something + * that does not exist anymore. */ + taskA->destroy = 0; + taskA->detach = 0; + + /* we wait for the tasks explicitly */ + taskB->detach = 0; + + ret = starpu_task_submit(taskA); + if (ret == -ENODEV) + return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + starpu_task_declare_deps_array(taskB, 1, &taskA); + + ret = starpu_task_submit(taskB); + if (ret == -ENODEV) + return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + ret = starpu_task_wait(taskB); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + ret = starpu_task_wait(taskA); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + starpu_task_destroy(taskA); + } + + ret = starpu_task_wait_for_all(); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); + + starpu_shutdown(); + + return EXIT_SUCCESS; +} diff --git a/tests/main/declare_deps_after_submission_synchronous.c b/tests/main/declare_deps_after_submission_synchronous.c new file mode 100644 index 0000000..283704a --- /dev/null +++ b/tests/main/declare_deps_after_submission_synchronous.c @@ -0,0 +1,93 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include + +#include +#include "../helper.h" + +/* + * Test that we can declare a dependency after submitting a non-auto-destroy synchronous task + */ + +#ifdef STARPU_QUICK_CHECK + #define NLOOPS 4 +#else + #define NLOOPS 128 +#endif + +static struct starpu_task *create_dummy_task(void) +{ + struct starpu_task *task = starpu_task_create(); + + task->cl = &starpu_codelet_nop; + task->cl_arg = NULL; + + return task; +} + +int main(int argc, char **argv) +{ + int ret; + unsigned loop, nloops=NLOOPS; + + ret = starpu_initialize(NULL, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + struct starpu_task *taskA, *taskB; + + for (loop = 0; loop < nloops; loop++) + { + taskA = create_dummy_task(); + taskB = create_dummy_task(); + + /* By default, dynamically allocated tasks are destroyed at + * termination, we cannot declare a dependency on something + * that does not exist anymore. */ + taskA->destroy = 0; + taskA->synchronous = 1; + + ret = starpu_task_submit(taskA); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + starpu_task_declare_deps_array(taskB, 1, &taskA); + + taskB->synchronous = 1; + + ret = starpu_task_submit(taskB); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + starpu_task_destroy(taskA); + } + + ret = starpu_task_wait_for_all(); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); + + starpu_shutdown(); + + return EXIT_SUCCESS; + +enodev: + fprintf(stderr, "WARNING: No one can execute this task\n"); + /* yes, we do not perform the computation but we did detect that no one + * could perform the kernel, so this is not an error from StarPU */ + starpu_shutdown(); + return STARPU_TEST_SKIPPED; +} diff --git a/tests/main/declare_deps_in_callback.c b/tests/main/declare_deps_in_callback.c new file mode 100644 index 0000000..1e2e8ce --- /dev/null +++ b/tests/main/declare_deps_in_callback.c @@ -0,0 +1,93 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include + +#include +#include "../helper.h" + +/* + * Test that we can declare deps from the callback of the task + */ + +#ifdef STARPU_QUICK_CHECK + #define NLOOPS 4 +#else + #define NLOOPS 128 +#endif + +static void callback(void *arg) +{ + struct starpu_task *taskA, *taskB; + int ret; + + taskA = starpu_task_get_current(); + taskB = (struct starpu_task *) arg; + + starpu_task_declare_deps_array(taskB, 1, &taskA); + ret = starpu_task_submit(taskB); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); +} + +static struct starpu_task *create_dummy_task(void) +{ + struct starpu_task *task = starpu_task_create(); + + task->cl = &starpu_codelet_nop; + task->cl_arg = NULL; + + return task; +} + +int main(int argc, char **argv) +{ + int ret; + unsigned loop; + + ret = starpu_initialize(NULL, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + struct starpu_task *taskA, *taskB; + + for (loop = 0; loop < NLOOPS; loop++) + { + taskA = create_dummy_task(); + taskB = create_dummy_task(); + + taskA->callback_func = callback; + taskA->callback_arg = taskB; + + ret = starpu_task_submit(taskA); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + ret = starpu_task_wait_for_all(); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); + + starpu_shutdown(); + + return EXIT_SUCCESS; + +enodev: + fprintf(stderr, "WARNING: No one can execute this task\n"); + /* yes, we do not perform the computation but we did detect that no one + * could perform the kernel, so this is not an error from StarPU */ + starpu_shutdown(); + return STARPU_TEST_SKIPPED; +} diff --git a/tests/main/deploop.c b/tests/main/deploop.c new file mode 100644 index 0000000..2749e66 --- /dev/null +++ b/tests/main/deploop.c @@ -0,0 +1,94 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include + +#include +#include "../helper.h" + +/* + * Create task A and B such that + * - B depends on A by tag dependency. + * - A would depend on B by data dependency, but we disable that by disabling + * sequential consistency. + */ + +void dummy_func(void *descr[], void *arg) +{ + (void)descr; + (void)arg; + FPRINTF(stderr,"executing task %p\n", starpu_task_get_current()); +} + +static struct starpu_codelet dummy_codelet = +{ + .cpu_funcs = {dummy_func}, + .cpu_funcs_name = {"dummy_func"}, + .cuda_funcs = {dummy_func}, + .opencl_funcs = {dummy_func}, + .model = NULL, + .nbuffers = 1, + .modes = { STARPU_RW } +}; + +int main(void) +{ + int ret; + starpu_data_handle_t handle; + + ret = starpu_init(NULL); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + starpu_void_data_register(&handle); + + struct starpu_task *taskA, *taskB; + + /* Make B depend on A */ + starpu_tag_declare_deps(1, 1, (starpu_tag_t) 0); + + taskA = starpu_task_create(); + taskA->cl = &dummy_codelet; + taskA->tag_id = 0; + taskA->use_tag = 1; + taskA->handles[0] = handle; + taskA->sequential_consistency = 0; + FPRINTF(stderr,"A is %p\n", taskA); + + taskB = starpu_task_create(); + taskB->cl = &dummy_codelet; + taskB->tag_id = 1; + taskB->use_tag = 1; + taskB->handles[0] = handle; + FPRINTF(stderr,"B is %p\n", taskB); + + ret = starpu_task_submit(taskB); + if (ret == -ENODEV) + return STARPU_TEST_SKIPPED; + ret = starpu_task_submit(taskA); + if (ret == -ENODEV) + return STARPU_TEST_SKIPPED; + + ret = starpu_task_wait_for_all(); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); + + starpu_data_unregister(handle); + + starpu_shutdown(); + + return EXIT_SUCCESS; +} diff --git a/tests/main/deprecated_func.c b/tests/main/deprecated_func.c new file mode 100644 index 0000000..f903656 --- /dev/null +++ b/tests/main/deprecated_func.c @@ -0,0 +1,162 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../helper.h" + +/* + * Test that we support the cpu_func and where deprecated field + */ + +void cpu_codelet(void *descr[], void *_args) +{ + (void)_args; + int *valin = (int *)STARPU_VARIABLE_GET_PTR(descr[0]); + int *valout = (int *)STARPU_VARIABLE_GET_PTR(descr[1]); + + *valout = *valin; +} + +void cpu2_codelet(void *descr[], void *_args) +{ + (void)_args; + int *valin = (int *)STARPU_VARIABLE_GET_PTR(descr[0]); + int *valout = (int *)STARPU_VARIABLE_GET_PTR(descr[1]); + + *valout = *valin*2; +} + +struct starpu_codelet cl_cpu_funcs = +{ + .where = STARPU_CPU, + .cpu_funcs = {cpu_codelet}, + .cpu_funcs_name = {"cpu_codelet"}, + .nbuffers = 2, + .name = "cpu_funcs", +}; + +struct starpu_codelet cl_cpu_func = +{ + .where = STARPU_CPU, + .cpu_func = cpu_codelet, + .cpu_funcs_name = {"cpu_codelet"}, + .nbuffers = 2, + .name = "cpu_func", +}; + +struct starpu_codelet cl_cpu_multiple = +{ + .where = STARPU_CPU, + .cpu_func = STARPU_MULTIPLE_CPU_IMPLEMENTATIONS, + .cpu_funcs = {cpu_codelet}, + .cpu_funcs_name = {"cpu_codelet"}, + .nbuffers = 2, + .name = "cpu_multiple", +}; + +struct starpu_codelet cl_cpu_func_funcs = +{ + .where = STARPU_CPU, + .cpu_func = cpu2_codelet, + .cpu_funcs = {cpu_codelet}, + .cpu_funcs_name = {"cpu_codelet"}, + .nbuffers = 2, + .name = "cpu_func_funcs", +}; + +static +int submit_codelet(struct starpu_codelet cl, int where) +{ + int x=42, y=14; + starpu_data_handle_t handles[2]; + int ret; + + starpu_variable_data_register(&handles[0], STARPU_MAIN_RAM, (uintptr_t)&x, sizeof(x)); + starpu_variable_data_register(&handles[1], STARPU_MAIN_RAM, (uintptr_t)&y, sizeof(y)); + + cl.where = where; + ret = starpu_task_insert(&cl, + STARPU_R, handles[0], + STARPU_W, handles[1], + 0); + if (ret == -ENODEV) + { + FPRINTF(stderr, "cannot execute codelet <%s> with where=%d\n", cl.name, where); + starpu_data_unregister(handles[0]); + starpu_data_unregister(handles[1]); + return ret; + } + + starpu_task_wait_for_all(); + + starpu_data_unregister(handles[0]); + starpu_data_unregister(handles[1]); + + if (x != y) + { + FPRINTF(stderr, "error when executing codelet <%s> with where=%d\n", cl.name, where); + } + else + { + FPRINTF(stderr, "success when executing codelet <%s> with where=%d\n", cl.name, where); + } + return x != y; +} + +int main(void) +{ + int ret; + unsigned where; + struct starpu_conf conf; + + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + conf.ncpus = -1; + conf.nmpi_ms = -1; + conf.ntcpip_ms = -1; + + ret = starpu_init(&conf); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + for(where=0 ; where<=STARPU_CPU ; where+=STARPU_CPU) + { + ret = submit_codelet(cl_cpu_func, where); + if (ret == -ENODEV) + { + starpu_shutdown(); + fprintf(stderr, "WARNING: No one can execute this task\n"); + return STARPU_TEST_SKIPPED; + } + + if (!ret) + { + ret = submit_codelet(cl_cpu_funcs, where); + } + if (!ret) + { + ret = submit_codelet(cl_cpu_multiple, where); + } + if (!ret) + { + ret = submit_codelet(cl_cpu_func_funcs, where); + } + } + + starpu_shutdown(); + + STARPU_RETURN(ret); +} diff --git a/tests/main/display_binding.c b/tests/main/display_binding.c new file mode 100644 index 0000000..110e80e --- /dev/null +++ b/tests/main/display_binding.c @@ -0,0 +1,42 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include "../helper.h" + + +#if !defined(STARPU_HAVE_SETENV) +#warning setenv is not defined. Skipping test +int main(void) +{ + return STARPU_TEST_SKIPPED; +} +#else +int main(void) +{ + setenv("STARPU_DISPLAY_BINDINGS", "1", 1); + + int ret = starpu_init(NULL); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + starpu_shutdown(); + + return EXIT_SUCCESS; +} +#endif diff --git a/tests/main/driver_api/init_run_deinit.c b/tests/main/driver_api/init_run_deinit.c new file mode 100644 index 0000000..5de011f --- /dev/null +++ b/tests/main/driver_api/init_run_deinit.c @@ -0,0 +1,274 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +#include "../../helper.h" + +#define NTASKS 8 + +#if defined(STARPU_USE_CPU) || defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) || defined(STARPU_USE_HIP) +void dummy(void *buffers[], void *args) +{ + (void) buffers; + (*(int *)args)++; +} + +static struct starpu_codelet cl = +{ + .cpu_funcs = { dummy }, + .cuda_funcs = { dummy }, + .opencl_funcs = { dummy }, + .hip_funcs = { dummy }, + .nbuffers = 0 +}; + +static void init_driver(struct starpu_driver *d) +{ + int ret; + ret = starpu_driver_init(d); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_driver_init"); +} + +static void run(struct starpu_task *task, struct starpu_driver *d) +{ + int ret; + ret = starpu_task_submit(task); + starpu_do_schedule(); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + while (!starpu_task_finished(task)) + { + ret = starpu_driver_run_once(d); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_driver_run_once"); + } + ret = starpu_task_wait(task); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait"); +} + +static void deinit_driver(struct starpu_driver *d) +{ + int ret; + ret = starpu_driver_deinit(d); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_driver_deinit"); +} + +typedef unsigned (*worker_get_count)(void); + +static int test_driver(struct starpu_conf *conf, struct starpu_driver *d, const char *name_driver, worker_get_count worker_get_count_func, int32_t where_driver) +{ + int var = 0, ret, nworker; + + ret = starpu_init(conf); + if (ret == -ENODEV) + { + FPRINTF(stderr, "WARNING: No %s worker found\n", name_driver); + return STARPU_TEST_SKIPPED; + } + + nworker = worker_get_count_func(); + if (nworker == 0) + { + FPRINTF(stderr, "WARNING: No %s worker found\n", name_driver); + starpu_shutdown(); + return STARPU_TEST_SKIPPED; + } + + init_driver(d); + int i; + for (i = 0; i < NTASKS; i++) + { + struct starpu_task *task; + task = starpu_task_create(); + cl.where = where_driver; + task->cl = &cl; + task->cl_arg = &var; + task->detach = 0; + + run(task, d); + } + deinit_driver(d); + + starpu_task_wait_for_all(); + starpu_shutdown(); + + FPRINTF(stderr, "[%s] Var is %d (expected value: %d)\n", name_driver, var, NTASKS); + return !!(var != NTASKS); +} +#endif /* STARPU_USE_CPU || STARPU_USE_CUDA || STARPU_USE_OPENCL || STARPU_USE_HIP*/ + +#ifdef STARPU_USE_CPU +static int test_cpu(void) +{ + int ret; + struct starpu_conf conf; + + ret = starpu_conf_init(&conf); + if (ret == -EINVAL) + return 1; + + struct starpu_driver d = + { + .type = STARPU_CPU_WORKER, + .id.cpu_id = 0 + }; + + conf.precedence_over_environment_variables = 1; + starpu_conf_noworker(&conf); + conf.ncpus = 1; + conf.not_launched_drivers = &d; + conf.n_not_launched_drivers = 1; + + return test_driver(&conf, &d, "CPU", starpu_cpu_worker_get_count, STARPU_CPU); +} +#endif /* STARPU_USE_CPU */ + +#ifdef STARPU_USE_CUDA +static int test_cuda(void) +{ + int ret; + struct starpu_conf conf; + int cudaid = 0; + char *cudaid_str = getenv("STARPU_WORKERS_CUDAID"); + + if (cudaid_str) + cudaid = atoi(cudaid_str); + + ret = starpu_conf_init(&conf); + if (ret == -EINVAL) + return 1; + + struct starpu_driver d = + { + .type = STARPU_CUDA_WORKER, + .id.cuda_id = cudaid + }; + + conf.precedence_over_environment_variables = 1; + starpu_conf_noworker(&conf); + conf.ncuda = 1; + conf.not_launched_drivers = &d; + conf.n_not_launched_drivers = 1; + + return test_driver(&conf, &d, "CUDA", starpu_cuda_worker_get_count, STARPU_CUDA); +} +#endif /* STARPU_USE_CUDA */ + +#ifdef STARPU_USE_HIP +static int test_hip(void) +{ + int ret; + struct starpu_conf conf; + + ret = starpu_conf_init(&conf); + if (ret == -EINVAL) + return 1; + + struct starpu_driver d = + { + .type = STARPU_HIP_WORKER, + .id.hip_id = 0 + }; + + conf.precedence_over_environment_variables = 1; + starpu_conf_noworker(&conf); + conf.nhip = 1; + conf.not_launched_drivers = &d; + conf.n_not_launched_drivers = 1; + + return test_driver(&conf, &d, "HIP", starpu_hip_worker_get_count, STARPU_HIP); +} +#endif /* STARPU_USE_HIP */ + +#ifdef STARPU_USE_OPENCL +static int test_opencl(void) +{ + cl_int err; + cl_platform_id platform; + cl_uint pdummy; + int nopencl; + + err = clGetPlatformIDs(1, &platform, &pdummy); + if (err != CL_SUCCESS) + { + FPRINTF(stderr, "WARNING: No OpenCL platform found\n"); + return STARPU_TEST_SKIPPED; + } + + cl_device_type device_type = CL_DEVICE_TYPE_GPU|CL_DEVICE_TYPE_ACCELERATOR; + if (starpu_getenv_number("STARPU_OPENCL_ON_CPUS") > 0) + device_type |= CL_DEVICE_TYPE_CPU; + if (starpu_getenv_number("STARPU_OPENCL_ONLY_ON_CPUS") > 0) + device_type = CL_DEVICE_TYPE_CPU; + + cl_device_id device_id; + err = clGetDeviceIDs(platform, device_type, 1, &device_id, NULL); + if (err != CL_SUCCESS) + { + FPRINTF(stderr, "WARNING: No GPU devices found on OpenCL platform\n"); + return STARPU_TEST_SKIPPED; + } + + int var = 0, ret; + struct starpu_conf conf; + + ret = starpu_conf_init(&conf); + if (ret == -EINVAL) + return 1; + + struct starpu_driver d = + { + .type = STARPU_OPENCL_WORKER, + .id.opencl_id = device_id + }; + + conf.precedence_over_environment_variables = 1; + starpu_conf_noworker(&conf); + conf.nopencl = 1; + conf.not_launched_drivers = &d; + conf.n_not_launched_drivers = 1; + + return test_driver(&conf, &d, "OpenCL", starpu_opencl_worker_get_count, STARPU_OPENCL); +} +#endif /* STARPU_USE_OPENCL */ + +int main(void) +{ + int ret = STARPU_TEST_SKIPPED; + +#ifdef STARPU_USE_CPU + ret = test_cpu(); + if (ret == 1) + return ret; +#endif +#if defined(STARPU_USE_CUDA) && !(defined(STARPU_USE_CUDA0) || defined(STARPU_USE_CUDA1)) + ret = test_cuda(); + if (ret == 1) + return ret; +#endif +#ifdef STARPU_USE_OPENCL + ret = test_opencl(); + if (ret == 1) + return ret; +#endif +#ifdef STARPU_USE_HIP + ret = test_hip(); + if (ret == 1) + return ret; +#endif + + return ret; +} + diff --git a/tests/main/driver_api/run_driver.c b/tests/main/driver_api/run_driver.c new file mode 100644 index 0000000..d622aae --- /dev/null +++ b/tests/main/driver_api/run_driver.c @@ -0,0 +1,273 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include + +#include "../../helper.h" + +/* + * Users can directly control drivers by using the starpu_driver* functions. + * + * This test makes sure that the starpu_driver_run function works for CPU, CUDA + * and OpenCL drivers, and that the starpu_drivers_request_termination function + * correctly shuts down all drivers. + * + * The test_* functions can return: + * - 0 (success) + * - 1 (failure) + * - STARPU_TEST_SKIPPED (non-critical errors) + */ + +#if defined(STARPU_USE_CPU) || defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) || defined(STARPU_USE_HIP) +static void dummy(void *buffers[], void *args) +{ + (void) buffers; + (*(int *)args)++; + starpu_usleep(100000); +} + +static struct starpu_codelet cl = +{ + .cpu_funcs = { dummy }, + .cuda_funcs = { dummy }, + .opencl_funcs = { dummy }, + .hip_funcs = { dummy }, + .nbuffers = 0 +}; + +static void *run_driver(void *arg) +{ + struct starpu_driver *d = (struct starpu_driver *) arg; + int ret = starpu_driver_run(d); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_driver_run"); + return NULL; +} + +typedef unsigned (*worker_get_count)(void); + +static int test_driver(struct starpu_conf *conf, struct starpu_driver *d, const char *name_driver, worker_get_count worker_get_count_func, int32_t where_driver) +{ + int ret, var = 0; + static starpu_pthread_t driver_thread; + + ret = starpu_init(conf); + if (ret == -ENODEV || worker_get_count_func() == 0) + { + FPRINTF(stderr, "WARNING: No %s worker found\n", name_driver); + if (ret == 0) + starpu_shutdown(); + return STARPU_TEST_SKIPPED; + } + + ret = starpu_pthread_create(&driver_thread, NULL, run_driver, d); + if (ret != 0) + { + ret = 1; + goto out2; + } + + struct starpu_task *task; + task = starpu_task_create(); + cl.where = where_driver; + task->cl = &cl; + task->cl_arg = &var; + task->synchronous = 1; + + ret = starpu_task_submit(task); + if (ret == -ENODEV) + { + FPRINTF(stderr, "WARNING: No worker can execute this task\n"); + ret = STARPU_TEST_SKIPPED; + goto out; + } + + FPRINTF(stderr, "[%s] Var = %d (expected value: 1)\n", name_driver, var); + ret = !!(var != 1); +out: + starpu_drivers_request_termination(); + if (starpu_pthread_join(driver_thread, NULL) != 0) + return 1; +out2: + starpu_shutdown(); + return ret; +} +#endif /* STARPU_USE_CPU || STARPU_USE_CUDA || STARPU_USE_OPENCL || STARPU_USE_HIP */ + +#ifdef STARPU_USE_CPU +static int test_cpu(void) +{ + int ret; + struct starpu_conf conf; + + ret = starpu_conf_init(&conf); + if (ret == -EINVAL) + return 1; + + struct starpu_driver d = + { + .type = STARPU_CPU_WORKER, + .id.cpu_id = 0 + }; + + conf.precedence_over_environment_variables = 1; + starpu_conf_noworker(&conf); + conf.ncpus = 1; + conf.not_launched_drivers = &d; + conf.n_not_launched_drivers = 1; + + return test_driver(&conf, &d, "CPU", starpu_cpu_worker_get_count, STARPU_CPU); +} +#endif /* STARPU_USE_CPU */ + +#ifdef STARPU_USE_CUDA +static int test_cuda(void) +{ + int ret; + struct starpu_conf conf; + int cudaid = 0; + char *cudaid_str = getenv("STARPU_WORKERS_CUDAID"); + + if (cudaid_str) + cudaid = atoi(cudaid_str); + + /* FIXME: starpu_driver would need another field to specify which stream we're driving */ + if (starpu_getenv_number_default("STARPU_NWORKER_PER_CUDA", 1) != 1 && + starpu_getenv_number_default("STARPU_CUDA_THREAD_PER_WORKER", -1) > 0) + return STARPU_TEST_SKIPPED; + + ret = starpu_conf_init(&conf); + if (ret == -EINVAL) + return 1; + + struct starpu_driver d = + { + .type = STARPU_CUDA_WORKER, + .id.cuda_id = cudaid + }; + + conf.precedence_over_environment_variables = 1; + starpu_conf_noworker(&conf); + conf.ncuda = 1; + conf.not_launched_drivers = &d; + conf.n_not_launched_drivers = 1; + + return test_driver(&conf, &d, "CUDA", starpu_cuda_worker_get_count, STARPU_CUDA); +} +#endif /* STARPU_USE_CUDA */ + +#ifdef STARPU_USE_HIP +static int test_hip(void) +{ + int ret; + struct starpu_conf conf; + + ret = starpu_conf_init(&conf); + if (ret == -EINVAL) + return 1; + + struct starpu_driver d = + { + .type = STARPU_HIP_WORKER, + .id.hip_id = 0 + }; + + conf.precedence_over_environment_variables = 1; + starpu_conf_noworker(&conf); + conf.nhip = 1; + conf.not_launched_drivers = &d; + conf.n_not_launched_drivers = 1; + + return test_driver(&conf, &d, "HIP", starpu_hip_worker_get_count, STARPU_HIP); +} +#endif /* STARPU_USE_HIP */ + +#ifdef STARPU_USE_OPENCL +static int test_opencl(void) +{ + int ret; + + cl_int err; + cl_uint pdummy; + cl_platform_id platform; + err = clGetPlatformIDs(1, &platform, &pdummy); + if (err != CL_SUCCESS) + { + FPRINTF(stderr, "WARNING: No OpenCL platform found\n"); + return STARPU_TEST_SKIPPED; + } + + cl_device_type device_type = CL_DEVICE_TYPE_GPU|CL_DEVICE_TYPE_ACCELERATOR; + if (starpu_getenv_number("STARPU_OPENCL_ON_CPUS") > 0) + device_type |= CL_DEVICE_TYPE_CPU; + if (starpu_getenv_number("STARPU_OPENCL_ONLY_ON_CPUS") > 0) + device_type = CL_DEVICE_TYPE_CPU; + + cl_device_id device_id; + err = clGetDeviceIDs(platform, device_type, 1, &device_id, NULL); + if (err != CL_SUCCESS) + { + FPRINTF(stderr, "WARNING: No GPU devices found on OpenCL platform\n"); + return STARPU_TEST_SKIPPED; + } + + struct starpu_conf conf; + ret = starpu_conf_init(&conf); + if (ret == -EINVAL) + return 1; + + struct starpu_driver d = + { + .type = STARPU_OPENCL_WORKER, + .id.opencl_id = device_id + }; + + conf.precedence_over_environment_variables = 1; + starpu_conf_noworker(&conf); + conf.nopencl = 1; + conf.not_launched_drivers = &d; + conf.n_not_launched_drivers = 1; + + return test_driver(&conf, &d, "OpenCL", starpu_opencl_worker_get_count, STARPU_OPENCL); +} +#endif /* STARPU_USE_OPENCL */ + +int main(void) +{ + int ret = STARPU_TEST_SKIPPED; + +#ifdef STARPU_USE_CPU + ret = test_cpu(); + if (ret == 1) + return 1; +#endif +#if defined(STARPU_USE_CUDA) && !(defined(STARPU_USE_CUDA0) || defined(STARPU_USE_CUDA1)) + ret = test_cuda(); + if (ret == 1) + return 1; +#endif +#ifdef STARPU_USE_OPENCL + ret = test_opencl(); + if (ret == 1) + return 1; +#endif +#ifdef STARPU_USE_HIP + ret = test_hip(); + if (ret == 1) + return 1; +#endif + return ret; +} diff --git a/tests/main/empty_task.c b/tests/main/empty_task.c new file mode 100644 index 0000000..9f12498 --- /dev/null +++ b/tests/main/empty_task.c @@ -0,0 +1,106 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include + +#include +#include "../helper.h" + +/* + * Measure the cost of a task with a NULL codelet + */ + +#ifdef STARPU_QUICK_CHECK +static unsigned ntasks = 64; +#else +static unsigned ntasks = 65536; +#endif + +static void usage(char **argv) +{ + FPRINTF(stderr, "%s [-i ntasks] [-h]\n", argv[0]); + exit(-1); +} + +static void parse_args(int argc, char **argv) +{ + int c; + while ((c = getopt(argc, argv, "i:t:h")) != -1) + switch(c) + { + case 'i': + ntasks = atoi(optarg); + break; + case 'h': + usage(argv); + break; + } +} + +int main(int argc, char **argv) +{ + int ret; + double timing; + double start; + double end; + + parse_args(argc, argv); + + ret = starpu_initialize(NULL, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + FPRINTF(stderr, "#tasks : %u\n", ntasks); + + start = starpu_timing_now(); + + unsigned i; + for (i = 0; i < ntasks; i++) + { + struct starpu_task *task = starpu_task_create(); + + task->cl = NULL; + + task->detach = 0; + task->destroy = 1; + + ret = starpu_task_submit(task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + ret = starpu_task_wait(task); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait"); + } + + end = starpu_timing_now(); + + timing = end - start; + + FPRINTF(stderr, "Total: %f secs\n", timing/1000000); + FPRINTF(stderr, "Per task: %f usecs\n", timing/ntasks); + + starpu_shutdown(); + + return EXIT_SUCCESS; + +enodev: + fprintf(stderr, "WARNING: No one can execute this task\n"); + /* yes, we do not perform the computation but we did detect that no one + * could perform the kernel, so this is not an error from StarPU */ + starpu_shutdown(); + return STARPU_TEST_SKIPPED; +} diff --git a/tests/main/empty_task_chain.c b/tests/main/empty_task_chain.c new file mode 100644 index 0000000..c28f4ec --- /dev/null +++ b/tests/main/empty_task_chain.c @@ -0,0 +1,75 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../helper.h" + +/* + * Create a chain of tasks with NULL codelet, using manual dependencies + */ + +#define N 4 + +int main(int argc, char **argv) +{ + int i, ret; + + ret = starpu_initialize(NULL, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + struct starpu_task **tasks = (struct starpu_task **) malloc(N*sizeof(struct starpu_task *)); + + for (i = 0; i < N; i++) + { + tasks[i] = starpu_task_create(); + tasks[i]->cl = NULL; + + if (i > 0) + { + starpu_task_declare_deps_array(tasks[i], 1, &tasks[i-1]); + } + + if (i == (N-1)) + tasks[i]->detach = 0; + } + + for (i = 1; i < N; i++) + { + ret = starpu_task_submit(tasks[i]); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + ret = starpu_task_submit(tasks[0]); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + ret = starpu_task_wait(tasks[N-1]); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait"); + + starpu_shutdown(); + free(tasks); + + return EXIT_SUCCESS; + +enodev: + fprintf(stderr, "WARNING: No one can execute this task\n"); + /* yes, we do not perform the computation but we did detect that no one + * could perform the kernel, so this is not an error from StarPU */ + starpu_shutdown(); + return STARPU_TEST_SKIPPED; +} diff --git a/tests/main/empty_task_sync_point.c b/tests/main/empty_task_sync_point.c new file mode 100644 index 0000000..abf8ecd --- /dev/null +++ b/tests/main/empty_task_sync_point.c @@ -0,0 +1,90 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include + +#include +#include "../helper.h" + +/* + * Test using a task with NULL codelet as a synchronization task through tag dependencies + */ + +static starpu_tag_t tagA = 0x0042; +static starpu_tag_t tagB = 0x1042; +static starpu_tag_t tagC = 0x2042; +static starpu_tag_t tagD = 0x3042; +static starpu_tag_t tagE = 0x4042; +static starpu_tag_t tagF = 0x5042; + +int main(int argc, char **argv) +{ + int ret; + + ret = starpu_initialize(NULL, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + /* {A,B,C} -> D -> {E,F}, D is empty */ + struct starpu_task *taskA = starpu_task_create(); + taskA->cl = &starpu_codelet_nop; + taskA->use_tag = 1; + taskA->tag_id = tagA; + + struct starpu_task *taskB = starpu_task_create(); + taskB->cl = &starpu_codelet_nop; + taskB->use_tag = 1; + taskB->tag_id = tagB; + + struct starpu_task *taskC = starpu_task_create(); + taskC->cl = &starpu_codelet_nop; + taskC->use_tag = 1; + taskC->tag_id = tagC; + + struct starpu_task *taskD = starpu_task_create(); + taskD->cl = NULL; + taskD->use_tag = 1; + taskD->tag_id = tagD; + starpu_tag_declare_deps(tagD, 3, tagA, tagB, tagC); + + struct starpu_task *taskE = starpu_task_create(); + taskE->cl = &starpu_codelet_nop; + taskE->use_tag = 1; + taskE->tag_id = tagE; + starpu_tag_declare_deps(tagE, 1, tagD); + + struct starpu_task *taskF = starpu_task_create(); + taskF->cl = &starpu_codelet_nop; + taskF->use_tag = 1; + taskF->tag_id = tagF; + starpu_tag_declare_deps(tagF, 1, tagD); + + ret = starpu_task_submit(taskA); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + ret = starpu_task_submit(taskB); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + ret = starpu_task_submit(taskC); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + ret = starpu_task_submit(taskD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + ret = starpu_task_submit(taskE); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + ret = starpu_task_submit(taskF); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + starpu_tag_t tag_array[2] = {tagE, tagF}; + ret = starpu_tag_wait_array(2, tag_array); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_tag_wait_array"); + + starpu_shutdown(); + + return EXIT_SUCCESS; +} diff --git a/tests/main/empty_task_sync_point_tasks.c b/tests/main/empty_task_sync_point_tasks.c new file mode 100644 index 0000000..15cb581 --- /dev/null +++ b/tests/main/empty_task_sync_point_tasks.c @@ -0,0 +1,68 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../helper.h" + +/* + * Test using a task with NULL codelet as a synchronization task through task dependencies + */ + +int main(int argc, char **argv) +{ + int ret; + + ret = starpu_initialize(NULL, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + /* {A,B,C} -> D -> {E,F}, D is empty */ + struct starpu_task *taskA = starpu_task_create(); + taskA->cl = &starpu_codelet_nop; + + struct starpu_task *taskB = starpu_task_create(); + taskB->cl = &starpu_codelet_nop; + + struct starpu_task *taskC = starpu_task_create(); + taskC->cl = &starpu_codelet_nop; + + struct starpu_task *taskD = starpu_task_create(); + taskD->cl = NULL; + + struct starpu_task *taskE = starpu_task_create(); + taskE->cl = &starpu_codelet_nop; + + struct starpu_task *taskF = starpu_task_create(); + taskF->cl = &starpu_codelet_nop; + + starpu_task_declare_deps(taskD, 3, taskA, taskB, taskC); + starpu_task_declare_deps_array(taskE, 1, &taskD); + starpu_task_declare_deps_array(taskF, 1, &taskD); + + ret = starpu_task_submit(taskA); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + ret = starpu_task_submit(taskB); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + ret = starpu_task_submit(taskC); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + ret = starpu_task_submit(taskD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + ret = starpu_task_submit(taskE); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + ret = starpu_task_submit(taskF); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + ret = starpu_task_wait_for_all(); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); + + starpu_shutdown(); + + return EXIT_SUCCESS; +} diff --git a/tests/main/execute_on_a_specific_worker.c b/tests/main/execute_on_a_specific_worker.c new file mode 100644 index 0000000..dc258bb --- /dev/null +++ b/tests/main/execute_on_a_specific_worker.c @@ -0,0 +1,180 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include +#include "../helper.h" +#include + +/* + * Test binding tasks on specific workers + */ + +#ifdef STARPU_QUICK_CHECK + #define N 10 +#elif !defined(STARPU_LONG_CHECK) + #define N 100 +#else + #define N 1000 +#endif + +#define VECTORSIZE 1024 + +static starpu_pthread_mutex_t mutex = STARPU_PTHREAD_MUTEX_INITIALIZER; +static starpu_pthread_cond_t cond = STARPU_PTHREAD_COND_INITIALIZER; + +static unsigned finished = 0; + +static unsigned cnt; + +starpu_data_handle_t v_handle; +static unsigned *v; + +static void callback(void *arg) +{ + (void)arg; + + unsigned res = STARPU_ATOMIC_ADD(&cnt, -1); + ANNOTATE_HAPPENS_BEFORE(&cnt); + + if (res == 0) + { + ANNOTATE_HAPPENS_AFTER(&cnt); + STARPU_PTHREAD_MUTEX_LOCK(&mutex); + finished = 1; + STARPU_PTHREAD_COND_SIGNAL(&cond); + STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); + } +} + +void codelet_null(void *descr[], void *_args) +{ + (void)descr; + (void)_args; +// int id = starpu_worker_get_id(); +// FPRINTF(stderr, "worker #%d\n", id); +} + +static struct starpu_codelet cl_r = +{ + .cpu_funcs = {codelet_null}, + .cuda_funcs = {codelet_null}, + .opencl_funcs = {codelet_null}, + .cpu_funcs_name = {"codelet_null"}, + .nbuffers = 1, + .modes = {STARPU_R} +}; + +static struct starpu_codelet cl_w = +{ + .cpu_funcs = {codelet_null}, + .cuda_funcs = {codelet_null}, + .opencl_funcs = {codelet_null}, + .cpu_funcs_name = {"codelet_null"}, + .nbuffers = 1, + .modes = {STARPU_W} +}; + +static struct starpu_codelet cl_rw = +{ + .cpu_funcs = {codelet_null}, + .cuda_funcs = {codelet_null}, + .opencl_funcs = {codelet_null}, + .cpu_funcs_name = {"codelet_null"}, + .nbuffers = 1, + .modes = {STARPU_RW} +}; + +static struct starpu_codelet *select_codelet_with_random_mode(void) +{ + int r = rand(); + + switch (r % 3) + { + case 0: + return &cl_r; + case 1: + return &cl_w; + case 2: + return &cl_rw; + }; + return &cl_rw; +} + +int main(int argc, char **argv) +{ + int ret; + + ret = starpu_initialize(NULL, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + ret = starpu_malloc((void **)&v, VECTORSIZE*sizeof(unsigned)); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_malloc"); + memset(v, 0, VECTORSIZE*sizeof(unsigned)); + + starpu_vector_data_register(&v_handle, STARPU_MAIN_RAM, (uintptr_t)v, VECTORSIZE, sizeof(unsigned)); + + unsigned nworker = starpu_worker_get_count(); + + cnt = nworker*N; + + unsigned iter, worker; + for (iter = 0; iter < N; iter++) + { + for (worker = 0; worker < nworker; worker++) + { + /* execute a task on that worker */ + struct starpu_task *task = starpu_task_create(); + + task->handles[0] = v_handle; + task->cl = select_codelet_with_random_mode(); + + task->callback_func = callback; + task->callback_arg = NULL; + + task->execute_on_a_specific_worker = 1; + task->workerid = worker; + + ret = starpu_task_submit(task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + } + + STARPU_PTHREAD_MUTEX_LOCK(&mutex); + while (!finished) + STARPU_PTHREAD_COND_WAIT(&cond, &mutex); + STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); + + starpu_data_unregister(v_handle); + starpu_free_noflag(v, VECTORSIZE*sizeof(unsigned)); + starpu_shutdown(); + + return EXIT_SUCCESS; + +enodev: + starpu_data_unregister(v_handle); + starpu_free_noflag(v, VECTORSIZE*sizeof(unsigned)); + starpu_shutdown(); + fprintf(stderr, "WARNING: No one can execute this task\n"); + /* yes, we do not perform the computation but we did detect that no one + * could perform the kernel, so this is not an error from StarPU */ + return STARPU_TEST_SKIPPED; +} diff --git a/tests/main/execute_schedule.c b/tests/main/execute_schedule.c new file mode 100644 index 0000000..c6453d0 --- /dev/null +++ b/tests/main/execute_schedule.c @@ -0,0 +1,160 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include +#include "../helper.h" +#include + +/* + * Test binding tasks on specific workers and in a specific order + */ + +#ifdef STARPU_QUICK_CHECK + #define K 2 +#else + #define K 16 +#endif + +#define N 64 + +static unsigned current = 1; + +void codelet(void *descr[], void *_args) +{ + (void)descr; + uintptr_t me = (uintptr_t) _args; + STARPU_ASSERT(current == me); + current++; +} + +static double cost_function(struct starpu_task *task, unsigned nimpl) +{ + (void) task; + (void) nimpl; + return 1000; +} + +static struct starpu_perfmodel model = +{ + .type = STARPU_COMMON, + .cost_function = cost_function, + .symbol = "cost" +}; + +static struct starpu_codelet cl = +{ + .cpu_funcs = {codelet}, + .cuda_funcs = {codelet}, + .opencl_funcs = {codelet}, + .nbuffers = 1, + .modes = {STARPU_R}, + .model = &model, +}; + +int main(int argc, char **argv) +{ + int ret; + struct starpu_task *dep_task[N] = { NULL }; + int *t[N]; + starpu_data_handle_t h[N]; + + unsigned n, i, k; + + ret = starpu_initialize(NULL, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + for (n = 0; n < N; n++) + { + t[n] = malloc((1<<20) * sizeof(*(t[n]))); + starpu_variable_data_register(&h[n], STARPU_MAIN_RAM, (uintptr_t) t[n], (1<<20) * sizeof(*(t[n]))); + } + + for (k = 0; k < K; k++) + { + for (n = 0; n < N; n++) + { + struct starpu_task *task; + + dep_task[n] = starpu_task_create(); + + dep_task[n]->cl = NULL; + + task = starpu_task_create(); + + task->cl = &cl; + + task->execute_on_a_specific_worker = 1; + task->workerid = 0; + /* We request for running the tasks in the opposite order of the submission order */ + task->workerorder = k*N + (N-n); + task->cl_arg = (void*) (uintptr_t) (k*N + (N-n)); + task->handles[0] = h[n]; + + starpu_task_declare_deps_array(task, 1, &dep_task[n]); + + ret = starpu_task_submit(task); + if (ret == -ENODEV) + { + task->destroy = 0; + starpu_task_destroy(task); + goto enodev; + } + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + for (n = 0; n < N; n++) + { + i = (int)starpu_drand48()%(N-n); + ret = starpu_task_submit(dep_task[i]); + memmove(&dep_task[i], &dep_task[i+1], (N-i-1)*sizeof(dep_task[i])); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + } + + starpu_task_wait_for_all(); + for (n = 0; n < N; n++) + { + starpu_data_unregister(h[n]); + free(t[n]); + } + + starpu_shutdown(); + + return EXIT_SUCCESS; + +enodev: + for (n = 0; n < N; n++) + { + if (dep_task[n]) + { + dep_task[n]->destroy = 0; + starpu_task_destroy(dep_task[n]); + } + starpu_data_unregister(h[n]); + free(t[n]); + } + starpu_shutdown(); + fprintf(stderr, "WARNING: No one can execute this task\n"); + /* yes, we do not perform the computation but we did detect that no one + * could perform the kernel, so this is not an error from StarPU */ + return STARPU_TEST_SKIPPED; +} diff --git a/tests/main/get_children_tasks.c b/tests/main/get_children_tasks.c new file mode 100644 index 0000000..79df0b1 --- /dev/null +++ b/tests/main/get_children_tasks.c @@ -0,0 +1,99 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../helper.h" + +/* + * Check that starpu_task_get_task_succs returns the set of children tasks + */ + +void func_cpu(void *descr[], void *_args) +{ + (void)descr; + (void)_args; +} + +struct starpu_codelet codelet_w = +{ + .modes = { STARPU_W }, + .cpu_funcs = {func_cpu}, + .cpu_funcs_name = {"func_cpu"}, + .nbuffers = 1 +}; + +struct starpu_codelet codelet_r = +{ + .modes = { STARPU_R }, + .cpu_funcs = {func_cpu}, + .cpu_funcs_name = {"func_cpu"}, + .nbuffers = 1 +}; + +int main(void) +{ + int ret; + starpu_data_handle_t h; + struct starpu_conf conf; + + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + conf.ncpus = -1; + conf.nmpi_ms = -1; + conf.ntcpip_ms = -1; + + ret = starpu_init(&conf); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + starpu_void_data_register(&h); + + starpu_tag_t tag_init = 0; + + starpu_tag_declare_deps_array((starpu_tag_t) 1, 1, &tag_init); + + struct starpu_task *task1 = starpu_task_build(&codelet_w, STARPU_W, h, STARPU_TAG, (starpu_tag_t) 1, 0); + struct starpu_task *task2 = starpu_task_build(&codelet_r, STARPU_R, h, 0); + struct starpu_task *task3 = starpu_task_build(&codelet_r, STARPU_R, h, 0); + ret = starpu_task_submit(task1); + if (ret == -ENODEV) goto enodev; + ret = starpu_task_submit(task2); + if (ret == -ENODEV) goto enodev; + ret = starpu_task_submit(task3); + if (ret == -ENODEV) goto enodev; + + struct starpu_task *tasks[4]; + + ret = starpu_task_get_task_succs(task1, sizeof(tasks)/sizeof(*tasks), tasks); + STARPU_ASSERT(ret == 2); + STARPU_ASSERT(tasks[0] == task2 || tasks[1] == task2); + STARPU_ASSERT(tasks[0] == task3 || tasks[1] == task3); + + starpu_tag_notify_from_apps(0); + + starpu_data_unregister(h); + + starpu_shutdown(); + + STARPU_RETURN(ret?0:1); + +enodev: + starpu_shutdown(); + fprintf(stderr, "WARNING: No one can execute this task\n"); + /* yes, we do not perform the computation but we did detect that no one + * could perform the kernel, so this is not an error from StarPU */ + return STARPU_TEST_SKIPPED; +} diff --git a/tests/main/get_current_task.c b/tests/main/get_current_task.c new file mode 100644 index 0000000..384fd5b --- /dev/null +++ b/tests/main/get_current_task.c @@ -0,0 +1,122 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include "../helper.h" + +/* + * Check that starpu_task_get_current provides the proper task pointer + */ + +#ifdef STARPU_QUICK_CHECK +static unsigned ntasks = 64; +#else +static unsigned ntasks = 65536; +#endif + +void check_task_func(void *descr[], void *arg) +{ + (void)descr; + /* We check that the returned task is valid from the callback */ + struct starpu_task *task = (struct starpu_task *) arg; + STARPU_ASSERT(task == starpu_task_get_current()); +} + +static void check_task_callback(void *arg) +{ + /* We check that the returned task is valid from the callback */ + struct starpu_task *task = (struct starpu_task *) arg; + STARPU_ASSERT(task == starpu_task_get_current()); +} + +static struct starpu_codelet dummy_cl = +{ + .cuda_funcs = {check_task_func}, + .cpu_funcs = {check_task_func}, + .opencl_funcs = {check_task_func}, + /* starpu_task_get_current()) is not working on MPI Master Slave mode */ + /* .cpu_funcs_name = {"check_task_func"}, */ + .model = NULL, + .nbuffers = 0 +}; + +int main(int argc, char **argv) +{ + int ret; + + ret = starpu_initialize(NULL, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + FPRINTF(stderr, "#tasks : %u\n", ntasks); + + unsigned i; + for (i = 0; i < ntasks; i++) + { + struct starpu_task *task = starpu_task_create(); + + /* We check if the function is valid from the codelet or from + * the callback */ + task->cl = &dummy_cl; + task->cl_arg = task; + task->cl_arg_size = sizeof(task); + + task->callback_func = check_task_callback; + task->callback_arg = task; + + ret = starpu_task_submit(task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + ret = starpu_task_wait_for_all(); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); + + FPRINTF(stderr, "#empty tasks : %u\n", ntasks); + + /* We repeat the same experiment with null codelets */ + + for (i = 0; i < ntasks; i++) + { + struct starpu_task *task = starpu_task_create(); + + task->cl = NULL; + + /* We check if the function is valid from the callback */ + task->callback_func = check_task_callback; + task->callback_arg = task; + + ret = starpu_task_submit(task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + ret = starpu_task_wait_for_all(); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); + + starpu_shutdown(); + + return EXIT_SUCCESS; + +enodev: + fprintf(stderr, "WARNING: No one can execute this task\n"); + /* yes, we do not perform the computation but we did detect that no one + * could perform the kernel, so this is not an error from StarPU */ + starpu_shutdown(); + return STARPU_TEST_SKIPPED; +} diff --git a/tests/main/hwloc_cpuset.c b/tests/main/hwloc_cpuset.c new file mode 100644 index 0000000..26170d6 --- /dev/null +++ b/tests/main/hwloc_cpuset.c @@ -0,0 +1,88 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2017-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include "../helper.h" + +/* + * Test workers hwloc cpusets + */ + +int main(void) +{ + int status = 0; + +#ifdef STARPU_HAVE_HWLOC + struct starpu_conf conf; + starpu_conf_init(&conf); + conf.nmpi_ms = 0; + conf.ntcpip_ms = 0; + + int ret = starpu_init(&conf); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + int nworkers = starpu_worker_get_count_by_type(STARPU_CPU_WORKER); + if (nworkers != 0) + { + hwloc_cpuset_t accumulator_cpuset = hwloc_bitmap_alloc(); + hwloc_cpuset_t temp_cpuset = hwloc_bitmap_alloc(); + hwloc_bitmap_zero(accumulator_cpuset); + status = 0; + int workerids[nworkers]; + starpu_worker_get_ids_by_type(STARPU_CPU_WORKER, workerids, nworkers); + int i; + for (i=0; i +#include "../helper.h" + +/* + * Try the starpu_task_insert interface in various ways + */ + +static int _ifactor = 12; +static float _ffactor = 10.0; + +void func_cpu_args(void *descr[], void *_args) +{ + int *x0 = (int *)STARPU_VARIABLE_GET_PTR(descr[0]); + float *x1 = (float *)STARPU_VARIABLE_GET_PTR(descr[1]); + int ifactor; + float ffactor; + + starpu_codelet_unpack_args(_args, &ifactor, &ffactor); + + *x0 = *x0 * ifactor; + *x1 = *x1 * ffactor; +} + +void func_cpu_noargs(void *descr[], void *_args) +{ + int *x0 = (int *)STARPU_VARIABLE_GET_PTR(descr[0]); + float *x1 = (float *)STARPU_VARIABLE_GET_PTR(descr[1]); + + (void)_args; + + *x0 = *x0 * _ifactor; + *x1 = *x1 * _ffactor; +} + +struct starpu_codelet mycodelet_args = +{ + .modes = { STARPU_RW, STARPU_RW }, + .cpu_funcs = {func_cpu_args}, + .cpu_funcs_name = {"func_cpu_args"}, + .nbuffers = 2 +}; + +/* This one cheats by getting the factor through a global variable, which cannot + * work in master-slave mode, so we can only run it on a local CPU */ +struct starpu_codelet mycodelet_noargs = +{ + .modes = { STARPU_RW, STARPU_RW }, + .cpu_funcs = {func_cpu_noargs}, + .nbuffers = 2 +}; + +static +int test_codelet(struct starpu_codelet *codelet, int task_insert, int args, int x, float f) +{ + starpu_data_handle_t data_handles[2]; + int xx = x; + float ff = f; + int i, ret; + + starpu_variable_data_register(&data_handles[0], STARPU_MAIN_RAM, (uintptr_t)&xx, sizeof(xx)); + starpu_variable_data_register(&data_handles[1], STARPU_MAIN_RAM, (uintptr_t)&ff, sizeof(ff)); + + FPRINTF(stderr, "values: %d (%d) %f (%f)\n", xx, _ifactor, ff, _ffactor); + + if (task_insert) + { + if (args) + ret = starpu_task_insert(codelet, + STARPU_VALUE, &_ifactor, sizeof(_ifactor), + STARPU_VALUE, &_ffactor, sizeof(_ffactor), + STARPU_RW, data_handles[0], STARPU_RW, data_handles[1], + 0); + else + ret = starpu_task_insert(codelet, + STARPU_RW, data_handles[0], STARPU_RW, data_handles[1], + 0); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + } + else + { + struct starpu_task *task; + if (args) + task = starpu_task_build(codelet, + STARPU_VALUE, &_ifactor, sizeof(_ifactor), + STARPU_VALUE, &_ffactor, sizeof(_ffactor), + STARPU_RW, data_handles[0], STARPU_RW, data_handles[1], + 0); + else + task = starpu_task_build(codelet, + STARPU_RW, data_handles[0], STARPU_RW, data_handles[1], + 0); + ret = starpu_task_submit(task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + +enodev: + for(i=0 ; i<2 ; i++) + { + starpu_data_unregister(data_handles[i]); + } + + FPRINTF(stderr, "values: %d (should be %d) %f (should be %f)\n\n", xx, x*_ifactor, ff, f*_ffactor); + return ret == -ENODEV ? ret : xx == x*_ifactor && ff == f*_ffactor; +} + +int main(void) +{ + int ret; + struct starpu_conf conf; + + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + conf.ncpus = -1; + conf.nmpi_ms = -1; + conf.ntcpip_ms = -1; + + ret = starpu_init(&conf); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + FPRINTF(stderr, "Testing codelet with task_insert and with arguments\n"); + ret = test_codelet(&mycodelet_args, 1, 1, 4, 2.0); + if (ret == -ENODEV) goto enodev; + if (ret) + { + FPRINTF(stderr, "Testing codelet with task_insert and without arguments\n"); + ret = test_codelet(&mycodelet_noargs, 1, 0, 9, 7.0); + } + if (ret == -ENODEV) goto enodev; + if (ret) + { + FPRINTF(stderr, "Testing codelet with task_build and with arguments\n"); + ret = test_codelet(&mycodelet_args, 0, 1, 5, 3.0); + } + if (ret == -ENODEV) goto enodev; + if (ret) + { + FPRINTF(stderr, "Testing codelet with task_build and without arguments\n"); + ret = test_codelet(&mycodelet_noargs, 0, 0, 7, 5.0); + } + if (ret == -ENODEV) goto enodev; + + starpu_shutdown(); + + STARPU_RETURN(ret?0:1); + +enodev: + starpu_shutdown(); + fprintf(stderr, "WARNING: No one can execute this task\n"); + /* yes, we do not perform the computation but we did detect that no one + * could perform the kernel, so this is not an error from StarPU */ + return STARPU_TEST_SKIPPED; +} diff --git a/tests/main/insert_task_array.c b/tests/main/insert_task_array.c new file mode 100644 index 0000000..0dce95a --- /dev/null +++ b/tests/main/insert_task_array.c @@ -0,0 +1,99 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../helper.h" + +/* + * Test STARPU_DATA_ARRAY + */ + +void func_cpu(void *descr[], void *_args) +{ + int *x0 = (int *)STARPU_VARIABLE_GET_PTR(descr[0]); + float *x1 = (float *)STARPU_VARIABLE_GET_PTR(descr[1]); + int factor; + + starpu_codelet_unpack_args(_args, &factor); + + *x0 = *x0 * factor; + *x1 = *x1 * (float)factor; +} + +struct starpu_codelet mycodelet = +{ + .modes = { STARPU_RW, STARPU_RW }, + .cpu_funcs = {func_cpu}, + .cpu_funcs_name = {"func_cpu"}, + .nbuffers = 2 +}; + +int main(void) +{ + int x; float f; + int factor=12; + int i, ret; + starpu_data_handle_t data_handles[2]; + struct starpu_conf conf; + + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + conf.ncpus = -1; + conf.nmpi_ms = -1; + conf.ntcpip_ms = -1; + + ret = starpu_init(&conf); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + x = 1; + starpu_variable_data_register(&data_handles[0], STARPU_MAIN_RAM, (uintptr_t)&x, sizeof(x)); + f = 2.0; + starpu_variable_data_register(&data_handles[1], STARPU_MAIN_RAM, (uintptr_t)&f, sizeof(f)); + + ret = starpu_task_insert(&mycodelet, + STARPU_DATA_ARRAY, data_handles, 2, + STARPU_VALUE, &factor, sizeof(factor), + STARPU_PRIORITY, 1, + 0); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + + ret = starpu_task_wait_for_all(); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); + +enodev: + for(i=0 ; i<2 ; i++) + { + starpu_data_unregister(data_handles[i]); + } + + starpu_shutdown(); + + if (ret == -ENODEV) + { + fprintf(stderr, "WARNING: No one can execute this task\n"); + /* yes, we do not perform the computation but we did detect that no one + * could perform the kernel, so this is not an error from StarPU */ + return STARPU_TEST_SKIPPED; + } + else + { + FPRINTF(stderr, "VALUES: %d %f\n", x, f); + ret = !(x == 12 && f == 24.0); + return ret; + } +} diff --git a/tests/main/insert_task_dyn_handles.c b/tests/main/insert_task_dyn_handles.c new file mode 100644 index 0000000..16e35f1 --- /dev/null +++ b/tests/main/insert_task_dyn_handles.c @@ -0,0 +1,358 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include "../helper.h" + +/* + * Try the starpu_task_insert interface in various ways, and notably + * triggering the use of dyn_handles + */ + +void func_cpu(void *descr[], void *_args) +{ + int num = STARPU_TASK_GET_NBUFFERS(starpu_task_get_current()); + int i; + + (void)_args; + + for (i = 0; i < num; i++) + { + int *x = (int *)STARPU_VARIABLE_GET_PTR(descr[i]); + + *x = *x + 1; + } +} + +struct starpu_codelet codelet = +{ + .cpu_funcs = {func_cpu}, + /* starpu_task_get_current() doesn't work on MPI Master Slave */ + /* .cpu_funcs_name = {"func_cpu"}, */ + .nbuffers = STARPU_VARIABLE_NBUFFERS, +}; + +struct starpu_codelet codelet_minus1 = +{ + .cpu_funcs = {func_cpu}, + /* starpu_task_get_current() doesn't work on MPI Master Slave */ + /* .cpu_funcs_name = {"func_cpu"}, */ + .nbuffers = STARPU_NMAXBUFS-1, +}; + +struct starpu_codelet codelet_exactly = +{ + .cpu_funcs = {func_cpu}, + /* starpu_task_get_current() doesn't work on MPI Master Slave */ + /* .cpu_funcs_name = {"func_cpu"}, */ + .nbuffers = STARPU_NMAXBUFS, +}; + +struct starpu_codelet codelet_plus1 = +{ + .cpu_funcs = {func_cpu}, + /* starpu_task_get_current() doesn't work on MPI Master Slave */ + /* .cpu_funcs_name = {"func_cpu"}, */ + .nbuffers = STARPU_NMAXBUFS+1, +}; + +struct starpu_codelet codelet_plus5 = +{ + .cpu_funcs = {func_cpu}, + /* starpu_task_get_current() doesn't work on MPI Master Slave */ + /* .cpu_funcs_name = {"func_cpu"}, */ + .nbuffers = STARPU_NMAXBUFS+5, +}; + +starpu_data_handle_t *data_handles; +struct starpu_data_descr *descrs; +int *expected; + +int test(int n, struct starpu_codelet *static_codelet) +{ + int i, ret; + + for (i = 0; i < n; i++) + expected[i]++; + ret = starpu_task_insert(&codelet, + STARPU_DATA_MODE_ARRAY, descrs, n, + 0); + if (ret == -ENODEV) return ret; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + + /* Same with static number of buffers in codelet */ + for (i = 0; i < n; i++) + expected[i]++; + ret = starpu_task_insert(static_codelet, + STARPU_DATA_MODE_ARRAY, descrs, n, + 0); + if (ret == -ENODEV) return ret; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + + /* Test a whole array after one data */ + expected[0]++; + for (i = 1; i < n; i++) + expected[i]++; + ret = starpu_task_insert(&codelet, + STARPU_RW, data_handles[0], + STARPU_DATA_MODE_ARRAY, &descrs[1], n-1, + 0); + if (ret == -ENODEV) return ret; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + + if (n > 1) + { + /* Same with static number of buffers in codelet */ + expected[0]++; + for (i = 1; i < n; i++) + expected[i]++; + ret = starpu_task_insert(static_codelet, + STARPU_RW, data_handles[0], + STARPU_DATA_MODE_ARRAY, &descrs[1], n-1, + 0); + if (ret == -ENODEV) return ret; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + } + + return 0; +} + +int main(void) +{ + int *x; + int i, ret, loop; + struct starpu_conf conf; + + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + conf.ncpus = -1; + conf.nmpi_ms = -1; + conf.ntcpip_ms = -1; + +#ifdef STARPU_QUICK_CHECK + int nloops = 4; +#else + int nloops = 16; +#endif + + ret = starpu_init(&conf); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + x = calloc(STARPU_NMAXBUFS+5, sizeof(*x)); + data_handles = malloc((STARPU_NMAXBUFS+5) * sizeof(*data_handles)); + descrs = malloc((STARPU_NMAXBUFS+5) * sizeof(*descrs)); + expected = calloc(STARPU_NMAXBUFS+5, sizeof(*expected)); + for(i=0 ; i 2 + STARPU_RW, data_handles[1], +#endif +#if STARPU_NMAXBUFS > 3 + STARPU_RW, data_handles[2], +#endif +#if STARPU_NMAXBUFS > 4 + STARPU_RW, data_handles[3], +#endif +#if STARPU_NMAXBUFS > 5 + STARPU_RW, data_handles[4], +#endif +#if STARPU_NMAXBUFS > 6 + STARPU_RW, data_handles[5], +#endif +#if STARPU_NMAXBUFS > 7 + STARPU_RW, data_handles[6], +#endif +#if STARPU_NMAXBUFS > 8 + STARPU_RW, data_handles[7], +#endif +#if STARPU_NMAXBUFS > 9 + STARPU_RW, data_handles[8], +#endif +#if STARPU_NMAXBUFS > 10 + STARPU_RW, data_handles[9], +#endif + 0); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + +#if STARPU_NMAXBUFS > 1 && STARPU_NMAXBUFS <= 8 + /* Same with static number of buffers in codelet */ + expected[0]++; + for (i = 1; i < STARPU_NMAXBUFS-1 && i < 7; i++) + expected[i]++; + ret = starpu_task_insert(&codelet_minus1, + STARPU_RW, data_handles[0], +#if STARPU_NMAXBUFS > 2 + STARPU_RW, data_handles[1], +#endif +#if STARPU_NMAXBUFS > 3 + STARPU_RW, data_handles[2], +#endif +#if STARPU_NMAXBUFS > 4 + STARPU_RW, data_handles[3], +#endif +#if STARPU_NMAXBUFS > 5 + STARPU_RW, data_handles[4], +#endif +#if STARPU_NMAXBUFS > 6 + STARPU_RW, data_handles[5], +#endif +#if STARPU_NMAXBUFS > 7 + STARPU_RW, data_handles[6], +#endif + 0); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); +#endif + + /* Test data one after the other, but more than NMAXBUFS */ + for (i = 0; i < STARPU_NMAXBUFS+5 && i < 10; i++) + expected[i]++; + ret = starpu_task_insert(&codelet, + STARPU_RW, data_handles[0], + STARPU_RW, data_handles[1], + STARPU_RW, data_handles[2], + STARPU_RW, data_handles[3], + STARPU_RW, data_handles[4], + STARPU_RW, data_handles[5], +#if STARPU_NMAXBUFS > 1 + STARPU_RW, data_handles[6], +#endif +#if STARPU_NMAXBUFS > 2 + STARPU_RW, data_handles[7], +#endif +#if STARPU_NMAXBUFS > 3 + STARPU_RW, data_handles[8], +#endif +#if STARPU_NMAXBUFS > 4 + STARPU_RW, data_handles[9], +#endif + 0); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + +#if STARPU_NMAXBUFS > 1 && STARPU_NMAXBUFS <= 8 + /* Same with static number of buffers in codelet*/ + for (i = 0; i < STARPU_NMAXBUFS+5 && i < 13; i++) + expected[i]++; + ret = starpu_task_insert(&codelet_plus5, + STARPU_RW, data_handles[0], + STARPU_RW, data_handles[1], + STARPU_RW, data_handles[2], + STARPU_RW, data_handles[3], + STARPU_RW, data_handles[4], + STARPU_RW, data_handles[5], +#if STARPU_NMAXBUFS > 1 + STARPU_RW, data_handles[6], +#endif +#if STARPU_NMAXBUFS > 2 + STARPU_RW, data_handles[7], +#endif +#if STARPU_NMAXBUFS > 3 + STARPU_RW, data_handles[8], +#endif +#if STARPU_NMAXBUFS > 4 + STARPU_RW, data_handles[9], +#endif +#if STARPU_NMAXBUFS > 5 + STARPU_RW, data_handles[10], +#endif +#if STARPU_NMAXBUFS > 6 + STARPU_RW, data_handles[11], +#endif +#if STARPU_NMAXBUFS > 7 + STARPU_RW, data_handles[12], +#endif + 0); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); +#endif + + } + +enodev: + for(i=0 ; i +#include +#include "../helper.h" + +/* + * Try to pass many parameters to a task, testing the various codelet + * declarations + */ + +#define NPARAMS 15 + +void func_cpu(void *descr[], void *_args) +{ + (void)_args; + struct starpu_task *task = starpu_task_get_current(); + int num = STARPU_TASK_GET_NBUFFERS(task); + int i; + + for (i = 0; i < num; i++) + if ((STARPU_TASK_GET_MODE(task, i) & STARPU_W) + || (STARPU_TASK_GET_MODE(task, i) & STARPU_SCRATCH)) + { + int *x = (int *)STARPU_VARIABLE_GET_PTR(descr[i]); + + *x = *x + 1; + } +} + +/* We will fill this one with dyn_modes */ +struct starpu_codelet codelet_dyn = +{ + .cpu_funcs = {func_cpu}, + /* starpu_task_get_current() doesn't work on MPI Master Slave */ + /* .cpu_funcs_name = {"func_cpu"}, */ + .nbuffers = NPARAMS, +}; + +/* When maxbuffers is less than NPARAMS we will miss some access modes. + * That is on purpose: we here check that we still behave correctly in that case. + * We are just not able to check the parameter access modes. */ +struct starpu_codelet codelet_toomany = +{ + .cpu_funcs = {func_cpu}, + /* starpu_task_get_current() doesn't work on MPI Master Slave */ + /* .cpu_funcs_name = {"func_cpu"}, */ + .nbuffers = NPARAMS, + .modes = + { + STARPU_R, + STARPU_R, + STARPU_RW|STARPU_COMMUTE, + STARPU_RW|STARPU_COMMUTE, + STARPU_R, + STARPU_RW, + STARPU_R, + STARPU_RW|STARPU_COMMUTE, +#if STARPU_NMAXBUFS >= 9 + STARPU_R, +#endif +#if STARPU_NMAXBUFS >= 10 + STARPU_RW|STARPU_COMMUTE, +#endif +#if STARPU_NMAXBUFS >= 11 + STARPU_R, +#endif +#if STARPU_NMAXBUFS >= 12 + STARPU_R, +#endif +#if STARPU_NMAXBUFS >= 13 + STARPU_SCRATCH, +#endif +#if STARPU_NMAXBUFS >= 14 + STARPU_SCRATCH, +#endif +#if STARPU_NMAXBUFS >= 15 + STARPU_SCRATCH, +#endif + } +}; + +struct starpu_codelet codelet_variable = +{ + .cpu_funcs = {func_cpu}, + /* starpu_task_get_current() doesn't work on MPI Master Slave */ + /* .cpu_funcs_name = {"func_cpu"}, */ + .nbuffers = STARPU_VARIABLE_NBUFFERS, +}; + +int main(void) +{ + int *x; + int i, ret, loop; + struct starpu_conf conf; + + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + conf.ncpus = -1; + conf.nmpi_ms = -1; + conf.ntcpip_ms = -1; + +#ifdef STARPU_QUICK_CHECK + int nloops = 4; +#else + int nloops = 16; +#endif + int val_int = 42; + double val_double = 42.; + starpu_data_handle_t *data_handles; + int *expected; + + ret = starpu_init(&conf); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + codelet_dyn.dyn_modes = malloc(NPARAMS * sizeof(*(codelet_dyn.modes))); + codelet_dyn.dyn_modes[0] = STARPU_R, + codelet_dyn.dyn_modes[1] = STARPU_R, + codelet_dyn.dyn_modes[2] = STARPU_RW|STARPU_COMMUTE, + codelet_dyn.dyn_modes[3] = STARPU_RW|STARPU_COMMUTE, + codelet_dyn.dyn_modes[4] = STARPU_R, + codelet_dyn.dyn_modes[5] = STARPU_RW, + codelet_dyn.dyn_modes[6] = STARPU_R, + codelet_dyn.dyn_modes[7] = STARPU_RW|STARPU_COMMUTE, + codelet_dyn.dyn_modes[8] = STARPU_R, + codelet_dyn.dyn_modes[9] = STARPU_RW|STARPU_COMMUTE, + codelet_dyn.dyn_modes[10] = STARPU_R, + codelet_dyn.dyn_modes[11] = STARPU_R, + codelet_dyn.dyn_modes[12] = STARPU_SCRATCH, + codelet_dyn.dyn_modes[13] = STARPU_SCRATCH, + codelet_dyn.dyn_modes[14] = STARPU_SCRATCH, + + x = calloc(NPARAMS, sizeof(*x)); + data_handles = malloc(NPARAMS * sizeof(*data_handles)); + expected = calloc(NPARAMS, sizeof(*expected)); + for(i=0 ; i +#include "../helper.h" + +/* + * Try starpu_task_insert with a NULL codelet + */ + +int main(void) +{ + int ret; + + ret = starpu_init(NULL); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + ret = starpu_task_insert(NULL, 0); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + + ret = starpu_task_wait_for_all(); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); + + starpu_shutdown(); + + return EXIT_SUCCESS; + +enodev: + starpu_shutdown(); + fprintf(stderr, "WARNING: No one can execute this task\n"); + /* yes, we do not perform the computation but we did detect that no one + * could perform the kernel, so this is not an error from StarPU */ + return STARPU_TEST_SKIPPED; +} diff --git a/tests/main/insert_task_pack.c b/tests/main/insert_task_pack.c new file mode 100644 index 0000000..6f253a2 --- /dev/null +++ b/tests/main/insert_task_pack.c @@ -0,0 +1,64 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../helper.h" + +void func_cpu(void *descr[], void *_args) +{ + (void) descr; + (void) _args; +} + +struct starpu_codelet codelet = +{ + .cpu_funcs = { func_cpu }, + .cpu_funcs_name = { "func_cpu" } +}; + +int main(int argc, char **argv) +{ + int ret; + void *cl_arg = NULL; + size_t cl_arg_size = 0; + struct starpu_task *task; + struct starpu_conf conf; + + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + conf.ncpus = -1; + conf.nmpi_ms = -1; + conf.ntcpip_ms = -1; + + (void)argv; + + ret = starpu_init(&conf); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + starpu_codelet_pack_args(&cl_arg, &cl_arg_size, + STARPU_VALUE, &argc, sizeof(argc), + 0); + + task = starpu_task_build(&codelet, + STARPU_CL_ARGS, cl_arg, cl_arg_size, + STARPU_VALUE, &argc, sizeof(argc), + 0); + starpu_shutdown(); + + FPRINTF(stderr, "Task %p\n", task); + return (task==NULL)?0:1; +} diff --git a/tests/main/insert_task_value.c b/tests/main/insert_task_value.c new file mode 100644 index 0000000..0dee0b5 --- /dev/null +++ b/tests/main/insert_task_value.c @@ -0,0 +1,339 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../helper.h" + +/* + * Test passing values to tasks in different ways + */ + +#define IFACTOR 42 +#define FFACTOR 12.00 + +void func_cpu_int_float(void *descr[], void *_args) +{ + int ifactor[2048]; + float ffactor; + (void) descr; + + starpu_codelet_unpack_args(_args, ifactor, &ffactor); + + FPRINTF(stderr, "[func_cpu_int_float ] Values %d - %3.2f\n", ifactor[0], ffactor); + assert(ifactor[0] == IFACTOR && ffactor == FFACTOR); +} + +void func_cpu_int_float_multiple_unpack(void *descr[], void *_args) +{ + int ifactor[2048]; + float ffactor; + (void) descr; + + starpu_codelet_unpack_args(_args, ifactor, 0); + starpu_codelet_unpack_args(_args, ifactor, &ffactor); + + FPRINTF(stderr, "[func_cpu_int_float_multiple_unpack] Values %d - %3.2f\n", ifactor[0], ffactor); + assert(ifactor[0] == IFACTOR && ffactor == FFACTOR); +} + +void func_cpu_int_float_unpack_copyleft(void *descr[], void *_args) +{ + int ifactor[2048]; + float ffactor; + void *buffer; + size_t buffer_size; + (void) descr; + + buffer_size = sizeof(int)+sizeof(float)+sizeof(size_t); + buffer = calloc(buffer_size, 1); + starpu_codelet_unpack_args_and_copyleft(_args, buffer, buffer_size, ifactor, 0); + starpu_codelet_unpack_args(buffer, &ffactor); + + FPRINTF(stderr, "[func_cpu_int_float_unpack_copyleft] Values %d - %3.2f\n", ifactor[0], ffactor); + assert(ifactor[0] == IFACTOR && ffactor == FFACTOR); + free(buffer); +} + +void func_cpu_float_int(void *descr[], void *_args) +{ + int ifactor[2048]; + float ffactor; + (void) descr; + + starpu_codelet_unpack_args(_args, &ffactor, ifactor); + + FPRINTF(stderr, "[func_cpu_float_int ] Values %d - %3.2f\n", ifactor[0], ffactor); + assert(ifactor[0] == IFACTOR && ffactor == FFACTOR); +} + +void func_cpu_float_int_multiple_unpack(void *descr[], void *_args) +{ + int ifactor[2048]; + float ffactor; + (void) descr; + + starpu_codelet_unpack_args(_args, &ffactor, 0); + starpu_codelet_unpack_args(_args, &ffactor, ifactor); + + FPRINTF(stderr, "[func_cpu_float_int_multiple_unpack] Values %d - %3.2f\n", ifactor[0], ffactor); + assert(ifactor[0] == IFACTOR && ffactor == FFACTOR); +} + +void func_cpu_float_int_unpack_copyleft(void *descr[], void *_args) +{ + int ifactor[2048]; + float ffactor; + void *buffer; + size_t buffer_size; + (void) descr; + + buffer_size = sizeof(int)+2048*sizeof(int)+sizeof(size_t); + buffer = calloc(buffer_size, 1); + starpu_codelet_unpack_args_and_copyleft(_args, buffer, buffer_size, &ffactor, 0); + starpu_codelet_unpack_args(buffer, ifactor); + + FPRINTF(stderr, "[func_cpu_float_int_multiple_unpack] Values %d - %3.2f\n", ifactor[0], ffactor); + assert(ifactor[0] == IFACTOR && ffactor == FFACTOR); + free(buffer); +} + +void do_test_int_float_task_insert(starpu_cpu_func_t func, char* func_name) +{ + int *ifactor; + float ffactor=FFACTOR; + int ret; + struct starpu_codelet codelet; + + FPRINTF(stderr, "\nTesting %s\n", __func__); + + starpu_codelet_init(&codelet); + codelet.cpu_funcs[0] = func; + codelet.cpu_funcs_name[0] = func_name; + + ifactor = calloc(2048, sizeof(int)); + ifactor[0] = IFACTOR; + + ret = starpu_task_insert(&codelet, + STARPU_VALUE, ifactor, 2048*sizeof(ifactor[0]), + STARPU_VALUE, &ffactor, sizeof(ffactor), + 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + starpu_task_wait_for_all(); + free(ifactor); +} + +void do_test_int_float_task_insert_pack(starpu_cpu_func_t func, char* func_name) +{ + int *ifactor; + float ffactor=FFACTOR; + int ret; + struct starpu_codelet codelet; + void *cl_arg = NULL; + size_t cl_arg_size = 0; + + FPRINTF(stderr, "\nTesting %s\n", __func__); + + ifactor = calloc(2048, sizeof(int)); + ifactor[0] = IFACTOR; + + starpu_codelet_pack_args(&cl_arg, &cl_arg_size, + STARPU_VALUE, ifactor, 2048*sizeof(ifactor[0]), + STARPU_VALUE, &ffactor, sizeof(ffactor), + 0); + + starpu_codelet_init(&codelet); + codelet.cpu_funcs[0] = func; + codelet.cpu_funcs_name[0] = func_name; + + ret = starpu_task_insert(&codelet, + STARPU_CL_ARGS, cl_arg, cl_arg_size, + 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + starpu_task_wait_for_all(); + free(ifactor); +} + +void do_test_float_int_task_insert(starpu_cpu_func_t func, char* func_name) +{ + int *ifactor; + float ffactor=FFACTOR; + int ret; + struct starpu_codelet codelet; + + FPRINTF(stderr, "\nTesting %s\n", __func__); + + starpu_codelet_init(&codelet); + codelet.cpu_funcs[0] = func; + codelet.cpu_funcs_name[0] = func_name; + + ifactor = calloc(2048, sizeof(int)); + ifactor[0] = IFACTOR; + + ret = starpu_task_insert(&codelet, + STARPU_VALUE, &ffactor, sizeof(ffactor), + STARPU_VALUE, ifactor, 2048*sizeof(ifactor[0]), + 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + starpu_task_wait_for_all(); + free(ifactor); +} + +void do_test_float_int_task_insert_pack(starpu_cpu_func_t func, char* func_name) +{ + int *ifactor; + float ffactor=FFACTOR; + int ret; + struct starpu_codelet codelet; + void *cl_arg = NULL; + size_t cl_arg_size = 0; + + FPRINTF(stderr, "\nTesting %s\n", __func__); + + ifactor = calloc(2048, sizeof(int)); + ifactor[0] = IFACTOR; + + starpu_codelet_pack_args(&cl_arg, &cl_arg_size, + STARPU_VALUE, &ffactor, sizeof(ffactor), + STARPU_VALUE, ifactor, 2048*sizeof(ifactor[0]), + 0); + + starpu_codelet_init(&codelet); + codelet.cpu_funcs[0] = func; + codelet.cpu_funcs_name[0] = func_name; + + ret = starpu_task_insert(&codelet, + STARPU_CL_ARGS, cl_arg, cl_arg_size, + 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + starpu_task_wait_for_all(); + free(ifactor); +} + +void do_test_int_float_pack(starpu_cpu_func_t func, char* func_name) +{ + struct starpu_task *task; + struct starpu_codelet codelet; + int ret; + int *ifactor; + float ffactor=FFACTOR; + + FPRINTF(stderr, "\nTesting %s\n", __func__); + + ifactor = calloc(2048, sizeof(int)); + ifactor[0] = IFACTOR; + + starpu_codelet_init(&codelet); + codelet.cpu_funcs[0] = func; + codelet.cpu_funcs_name[0] = func_name; + + task = starpu_task_create(); + task->synchronous = 1; + task->cl = &codelet; + task->cl_arg_free = 1; + starpu_codelet_pack_args(&task->cl_arg, &task->cl_arg_size, + STARPU_VALUE, ifactor, 2048*sizeof(ifactor[0]), + STARPU_VALUE, &ffactor, sizeof(ffactor), + 0); + ret = starpu_task_submit(task); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + starpu_task_wait_for_all(); + free(ifactor); +} + +void do_test_float_int_pack(starpu_cpu_func_t func, char* func_name) +{ + struct starpu_task *task; + struct starpu_codelet codelet; + int ret; + int *ifactor; + float ffactor=FFACTOR; + + FPRINTF(stderr, "\nTesting %s\n", __func__); + + ifactor = calloc(2048, sizeof(int)); + ifactor[0] = IFACTOR; + + starpu_codelet_init(&codelet); + codelet.cpu_funcs[0] = func; + codelet.cpu_funcs_name[0] = func_name; + + task = starpu_task_create(); + task->synchronous = 1; + task->cl = &codelet; + task->cl_arg_free = 1; + starpu_codelet_pack_args(&task->cl_arg, &task->cl_arg_size, + STARPU_VALUE, &ffactor, sizeof(ffactor), + STARPU_VALUE, ifactor, 2048*sizeof(ifactor[0]), + 0); + ret = starpu_task_submit(task); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + starpu_task_wait_for_all(); + + free(ifactor); +} + +int main(void) +{ + int ret; + struct starpu_conf conf; + + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + conf.ncpus = -1; + conf.nmpi_ms = -1; + conf.ntcpip_ms = -1; + + ret = starpu_init(&conf); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + if (starpu_worker_get_count_by_type(STARPU_CPU_WORKER) == 0) + goto enodev; + + do_test_int_float_task_insert(func_cpu_int_float, "func_cpu_int_float"); + do_test_int_float_task_insert(func_cpu_int_float_multiple_unpack, "func_cpu_int_float_multiple_unpack"); + do_test_int_float_task_insert(func_cpu_int_float_unpack_copyleft, "func_cpu_int_float_unpack_copyleft"); + + do_test_int_float_task_insert_pack(func_cpu_int_float, "func_cpu_int_float"); + do_test_int_float_task_insert_pack(func_cpu_int_float_multiple_unpack, "func_cpu_int_float_multiple_unpack"); + do_test_int_float_task_insert_pack(func_cpu_int_float_unpack_copyleft, "func_cpu_int_float_unpack_copyleft"); + + do_test_float_int_task_insert(func_cpu_float_int, "func_cpu_float_int"); + do_test_float_int_task_insert(func_cpu_float_int_multiple_unpack, "func_cpu_float_int_multiple_unpack"); + do_test_float_int_task_insert(func_cpu_float_int_unpack_copyleft, "func_cpu_float_int_unpack_copyleft"); + + do_test_float_int_task_insert_pack(func_cpu_float_int, "func_cpu_float_int"); + do_test_float_int_task_insert_pack(func_cpu_float_int_multiple_unpack, "func_cpu_float_int_multiple_unpack"); + do_test_float_int_task_insert_pack(func_cpu_float_int_unpack_copyleft, "func_cpu_float_int_unpack_copyleft"); + + do_test_int_float_pack(func_cpu_int_float, "func_cpu_int_float"); + do_test_int_float_pack(func_cpu_int_float_multiple_unpack, "func_cpu_int_float_multiple_unpack"); + do_test_int_float_pack(func_cpu_int_float_unpack_copyleft, "func_cpu_int_float_unpack_copyleft"); + + do_test_float_int_pack(func_cpu_float_int, "func_cpu_float_int"); + do_test_float_int_pack(func_cpu_float_int_multiple_unpack, "func_cpu_float_int_multiple_unpack"); + do_test_float_int_pack(func_cpu_float_int_unpack_copyleft, "func_cpu_float_int_unpack_copyleft"); + + starpu_shutdown(); + + return 0; + +enodev: + starpu_shutdown(); + fprintf(stderr, "WARNING: No one can execute this task\n"); + /* yes, we do not perform the computation but we did detect that no one + * could perform the kernel, so this is not an error from StarPU */ + return STARPU_TEST_SKIPPED; +} diff --git a/tests/main/insert_task_where.c b/tests/main/insert_task_where.c new file mode 100644 index 0000000..c25d615 --- /dev/null +++ b/tests/main/insert_task_where.c @@ -0,0 +1,89 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../helper.h" +#include "../variable/increment.h" + +void cpu_increment(void *descr[], void *arg) +{ + (void)arg; + unsigned *var = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[0]); + (*var) += 2; +} + +/* Also test that the application can provide its own main function */ +#undef main + +int main(void) +{ + starpu_data_handle_t data_handles[2]; + int x = 12; + int y = 12; + int ret, ret1, ret2; + struct starpu_conf conf; + + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + conf.ncpus = -1; +#ifndef STARPU_SIMGRID + conf.ncuda = -1; +#endif + conf.nmpi_ms = -1; + conf.ntcpip_ms = -1; + + ret = starpu_init(&conf); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + starpu_variable_data_register(&data_handles[0], STARPU_MAIN_RAM, (uintptr_t)&x, sizeof(x)); + starpu_variable_data_register(&data_handles[1], STARPU_MAIN_RAM, (uintptr_t)&y, sizeof(y)); + + // We change the cpu function to have a different computation + increment_cl.cpu_funcs[0] = cpu_increment; + + ret1 = starpu_task_insert(&increment_cl, + STARPU_EXECUTE_WHERE, STARPU_CPU, + STARPU_RW, data_handles[0], + 0); + if (ret1 != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret1, "starpu_task_insert"); + + ret2 = starpu_task_insert(&increment_cl, + STARPU_EXECUTE_WHERE, STARPU_CUDA, + STARPU_RW, data_handles[1], + 0); + if (ret2 != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret2, "starpu_task_insert"); + + starpu_data_unregister(data_handles[0]); + starpu_data_unregister(data_handles[1]); + + starpu_shutdown(); + + if (ret1 != -ENODEV) + { + if (x != 14) + ret = 1; + FPRINTF(stderr, "Value x = %d (expected 14)\n", x); + } + if (ret2 != -ENODEV) + { + if (y != 13) + ret = 1; + FPRINTF(stderr, "Value y = %d (expected 13)\n", y); + } + + STARPU_RETURN(ret); +} diff --git a/tests/main/job.c b/tests/main/job.c new file mode 100644 index 0000000..3887cfa --- /dev/null +++ b/tests/main/job.c @@ -0,0 +1,98 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include "../helper.h" + +/* + * Test that job creation is threadsafe + */ + +#define N 1000 + +static struct starpu_task *tasks[N]; + +void dummy_func(void *arg) +{ + unsigned worker, i; + int worker0; + (void) arg; + + starpu_worker_get_ids_by_type(STARPU_CPU_WORKER, &worker0, 1); + if ((int) starpu_worker_get_id_check() == worker0) + /* One worker creates the tasks */ + for (i = 0; i < N; i++) + { + struct starpu_task *task = starpu_task_create(); + task->destroy = 0; + STARPU_WMB(); + tasks[i] = task; + } + else + /* While others eagerly wait for it before trying to get their id */ + for (i = 0; i < N; i++) + { + struct starpu_task *task; + while (!(task = tasks[i])) + { + STARPU_UYIELD(); + STARPU_SYNCHRONIZE(); + } + STARPU_RMB(); + starpu_task_get_job_id(task); + } +} + +int main(void) +{ + int ret; + unsigned i; + struct starpu_conf conf; + + starpu_conf_init(&conf); + conf.precedence_over_environment_variables = 1; + starpu_conf_noworker(&conf); + conf.ncpus = -1; + ret = starpu_init(&conf); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + STARPU_HG_DISABLE_CHECKING(tasks); + starpu_execute_on_each_worker(dummy_func, NULL, STARPU_CPU); + + for (i = 0; i < N; i++) + { + starpu_task_destroy(tasks[i]); + } + + struct starpu_task *task = starpu_task_create(); + unsigned long id; + task->destroy = 0; + id = starpu_task_get_job_id(task); + starpu_task_destroy(task); + + FPRINTF(stderr, "jobid %lu for %u tasks and %u workers\n", + id, N, starpu_worker_get_count()); + + /* We are not supposed to have created more than one jobid for each + * worker (for execute_on_each) and for each of the N user tasks. */ + ret = id > starpu_worker_get_count() + N + 1; + + starpu_shutdown(); + return ret; +} diff --git a/tests/main/mkdtemp.c b/tests/main/mkdtemp.c new file mode 100644 index 0000000..b7b7416 --- /dev/null +++ b/tests/main/mkdtemp.c @@ -0,0 +1,63 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include "../helper.h" +#include + +int do_test(char *(*func)(char *tmpl)) +{ + int ret; + char *path; + char dirname[128]; + char *ptr; + struct stat sb; + + path = starpu_getenv("TMPDIR"); + if (!path) + path = starpu_getenv("TEMP"); + if (!path) + path = starpu_getenv("TMP"); + if (!path) + path = "/tmp"; + snprintf(dirname, sizeof(dirname), "%s/abcdef_XXXXXX", path); + ptr = func(dirname); + FPRINTF(stderr, "Directory '%s' (res '%s')\n", dirname, ptr); + + // use stat + ret = stat(dirname, &sb); + if (ret != 0 || !S_ISDIR(sb.st_mode)) + { + FPRINTF(stderr, "Directory '%s' has not been created\n", dirname); + return 1; + } + + ret = rmdir(dirname); + STARPU_CHECK_RETURN_VALUE(ret, "rmdir '%s'\n", dirname); + + return ret; +} + +int main(void) +{ + int ret, ret2; + + ret = do_test(_starpu_mkdtemp); + ret2 = do_test(_starpu_mkdtemp_internal); + + return ret + ret2; +} diff --git a/tests/main/multithreaded.c b/tests/main/multithreaded.c new file mode 100644 index 0000000..43bfdd1 --- /dev/null +++ b/tests/main/multithreaded.c @@ -0,0 +1,124 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include + +#include +#include "../helper.h" + +/* + * Try submitting tasks from different threads + */ + +starpu_pthread_t threads[16]; + +#ifdef STARPU_QUICK_CHECK +static unsigned ntasks = 64; +#else +static unsigned ntasks = 65536; +#endif +static unsigned nthreads = 2; + +static void *thread_func(void *arg) +{ + int ret; + unsigned i; + (void)arg; + + for (i = 0; i < ntasks; i++) + { + struct starpu_task *task = starpu_task_create(); + + task->cl = &starpu_codelet_nop; + task->cl_arg = NULL; + task->callback_func = NULL; + task->callback_arg = NULL; + + ret = starpu_task_submit(task); + STARPU_ASSERT_MSG(!ret, "task submission failed with error code %d", ret); + } + + ret = starpu_task_wait_for_all(); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); + + return NULL; +} + +static void usage(char **argv) +{ + FPRINTF(stderr, "%s [-i ntasks] [-t nthreads] [-h]\n", argv[0]); + exit(-1); +} + +static void parse_args(int argc, char **argv) +{ + int c; + while ((c = getopt(argc, argv, "i:t:h")) != -1) + switch(c) + { + case 'i': + ntasks = atoi(optarg); + break; + case 't': + nthreads = atoi(optarg); + break; + case 'h': + usage(argv); + break; + } +} + +int main(int argc, char **argv) +{ + // unsigned i; + double timing; + double start; + double end; + int ret; + + parse_args(argc, argv); + + ret = starpu_initialize(NULL, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + FPRINTF(stderr, "#tasks : %u\n", ntasks); + + start = starpu_timing_now(); + + unsigned t; + for (t = 0; t < nthreads; t++) + { + STARPU_PTHREAD_CREATE(&threads[t], NULL, thread_func, NULL); + } + + for (t = 0; t < nthreads; t++) + { + STARPU_PTHREAD_JOIN(threads[t], NULL); + } + + end = starpu_timing_now(); + + timing = end - start; + + FPRINTF(stderr, "Total: %f secs\n", timing/1000000); + FPRINTF(stderr, "Per task: %f usecs\n", timing/(nthreads*ntasks)); + + starpu_shutdown(); + + return EXIT_SUCCESS; +} diff --git a/tests/main/multithreaded_init.c b/tests/main/multithreaded_init.c new file mode 100644 index 0000000..bdca7bd --- /dev/null +++ b/tests/main/multithreaded_init.c @@ -0,0 +1,93 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include "../helper.h" + +/* + * Try calling starpu_initialize from different threads in parallel + */ + +#define NUM_THREADS 5 + +int *glob_argc; +char ***glob_argv; + +static +void *launch_starpu(void *unused) +{ + int ret; + (void) unused; + ret = starpu_initialize(NULL, glob_argc, glob_argv); + if (ret == -ENODEV) + exit(STARPU_TEST_SKIPPED); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + return NULL; +} + +static +void *shutdown_starpu(void *unused) +{ + (void) unused; + starpu_shutdown(); + return NULL; +} + +int main(int argc, char **argv) +{ + unsigned i; + double timing; + double start; + double end; + + glob_argc = &argc; + glob_argv = &argv; + + starpu_pthread_t threads[NUM_THREADS]; + + start = starpu_timing_now(); + + for (i = 0; i < NUM_THREADS; ++i) + { + STARPU_PTHREAD_CREATE(&threads[i], NULL, launch_starpu, NULL); + } + + for (i = 0; i < NUM_THREADS; ++i) + { + STARPU_PTHREAD_JOIN(threads[i], NULL); + } + + end = starpu_timing_now(); + + timing = end - start; + + FPRINTF(stderr, "Success : %d threads launching simultaneously starpu_init\n", NUM_THREADS); + FPRINTF(stderr, "Total: %f secs\n", timing/1000000); + FPRINTF(stderr, "Per task: %f usecs\n", timing/NUM_THREADS); + + for (i = 0; i < NUM_THREADS; i++) + { + STARPU_PTHREAD_CREATE(&threads[i], NULL, shutdown_starpu, NULL); + } + + for (i = 0; i < NUM_THREADS; i++) + { + STARPU_PTHREAD_JOIN(threads[i], NULL); + } + + return EXIT_SUCCESS; +} diff --git a/tests/main/pack.c b/tests/main/pack.c new file mode 100644 index 0000000..8655399 --- /dev/null +++ b/tests/main/pack.c @@ -0,0 +1,258 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../helper.h" + +/* + * Test starpu_codelet_pack_args and starpu_codelet_unpack_args + */ + +void func_unpack_args(void *descr[], void *_args) +{ + int factor; + char c; + int x; + + (void)descr; + + starpu_codelet_unpack_args(_args, &factor, &c, &x); + + FPRINTF(stderr, "[codelet unpack_args] values: %d %c %d\n", factor, c, x); + assert(factor == 12 && c == 'n' && x == 42); +} + +struct starpu_codelet mycodelet_unpack_args = +{ + .cpu_funcs = {func_unpack_args}, + .cpu_funcs_name = {"func_unpack_args"}, + .nbuffers = 0 +}; + +void func_unpack_arg(void *descr[], void *_args) +{ + int factor; + char c; + int x; + + (void)descr; + + size_t size = sizeof(int) + 3*sizeof(size_t) + sizeof(int) + sizeof(char) + sizeof(int); + struct starpu_codelet_pack_arg_data state; + starpu_codelet_unpack_arg_init(&state, _args, size); + starpu_codelet_unpack_arg(&state, (void**)&factor, sizeof(factor)); + starpu_codelet_unpack_arg(&state, (void**)&c, sizeof(c)); + starpu_codelet_unpack_arg(&state, (void**)&x, sizeof(x)); + starpu_codelet_unpack_arg_fini(&state); + + FPRINTF(stderr, "[codelet unpack_arg] values: %d %c %d\n", factor, c, x); + assert(factor == 12 && c == 'n' && x == 42); +} + +struct starpu_codelet mycodelet_unpack_arg = +{ + .cpu_funcs = {func_unpack_arg}, + .cpu_funcs_name = {"func_unpack_arg"}, + .nbuffers = 0 +}; + +void func_dup_arg(void *descr[], void *_args) +{ + int *factor; + char *c; + int *x; + size_t size; + + (void)descr; + + size_t psize = sizeof(int) + 3*sizeof(size_t) + sizeof(int) + sizeof(char) + sizeof(int); + struct starpu_codelet_pack_arg_data state; + starpu_codelet_unpack_arg_init(&state, _args, psize); + starpu_codelet_dup_arg(&state, (void**)&factor, &size); + assert(size == sizeof(*factor)); + starpu_codelet_dup_arg(&state, (void**)&c, &size); + assert(size == sizeof(*c)); + starpu_codelet_dup_arg(&state, (void**)&x, &size); + assert(size == sizeof(*x)); + starpu_codelet_unpack_arg_fini(&state); + + FPRINTF(stderr, "[codelet dup_arg] values: %d %c %d\n", *factor, *c, *x); + assert(*factor == 12 && *c == 'n' && *x == 42); + free(factor); + free(c); + free(x); +} + +struct starpu_codelet mycodelet_dup_arg = +{ + .cpu_funcs = {func_dup_arg}, + .cpu_funcs_name = {"func_dup_arg"}, + .nbuffers = 0 +}; + +void func_pick_arg(void *descr[], void *_args) +{ + int *factor; + char *c; + int *x; + size_t size; + + (void)descr; + + size_t psize = sizeof(int) + 6*sizeof(size_t) + sizeof(int) + 4*sizeof(char) + sizeof(int); + struct starpu_codelet_pack_arg_data state; + starpu_codelet_unpack_arg_init(&state, _args, psize); + starpu_codelet_pick_arg(&state, (void**)&factor, &size); + assert(size == sizeof(*factor)); + starpu_codelet_pick_arg(&state, (void**)&c, &size); + assert(size == sizeof(*c)); + starpu_codelet_pick_arg(&state, (void**)&c, &size); + assert(size == sizeof(*c)); + starpu_codelet_pick_arg(&state, (void**)&c, &size); + assert(size == sizeof(*c)); + starpu_codelet_pick_arg(&state, (void**)&c, &size); + assert(size == sizeof(*c)); + starpu_codelet_pick_arg(&state, (void**)&x, &size); + assert(size == sizeof(*x)); + starpu_codelet_unpack_arg_fini(&state); + + FPRINTF(stderr, "[codelet pick_arg] values: %d %c %d\n", *factor, *c, *x); + assert(*factor == 12 && *c == 'n' && *x == 42); +} + +struct starpu_codelet mycodelet_pick_arg = +{ + .cpu_funcs = {func_pick_arg}, + .cpu_funcs_name = {"func_pick_arg"}, + .nbuffers = 0 +}; + +int main(void) +{ + int ret; + int x=42; + int factor=12; + char c='n'; + struct starpu_conf conf; + + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + conf.ncpus = -1; + conf.nmpi_ms = -1; + conf.ntcpip_ms = -1; + + ret = starpu_init(&conf); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + FPRINTF(stderr, "[init] values: %d %c %d\n", factor, c, x); + + { + struct starpu_task *task = starpu_task_build(&mycodelet_unpack_args, STARPU_TASK_SYNCHRONOUS, 1, 0); + task->cl_arg_free = 1; + + starpu_codelet_pack_args(&task->cl_arg, &task->cl_arg_size, + STARPU_VALUE, &factor, sizeof(factor), + STARPU_VALUE, &c, sizeof(c), + STARPU_VALUE, &x, sizeof(x), + 0); + ret = starpu_task_submit(task); + if (ret != -ENODEV) + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + /* Test with starpu_codelet_unpack_args */ + { + struct starpu_task *task = starpu_task_build(&mycodelet_unpack_args, STARPU_TASK_SYNCHRONOUS, 1, 0); + task->cl_arg_free = 1; + + struct starpu_codelet_pack_arg_data state; + starpu_codelet_pack_arg_init(&state); + starpu_codelet_pack_arg(&state, &factor, sizeof(factor)); + starpu_codelet_pack_arg(&state, &c, sizeof(c)); + starpu_codelet_pack_arg(&state, &x, sizeof(x)); + starpu_codelet_pack_arg_fini(&state, &task->cl_arg, &task->cl_arg_size); + + ret = starpu_task_submit(task); + if (ret != -ENODEV) + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + /* Test with starpu_codelet_unpack_arg */ + { + struct starpu_task *task = starpu_task_build(&mycodelet_unpack_arg, STARPU_TASK_SYNCHRONOUS, 1, 0); + task->cl_arg_free = 1; + + struct starpu_codelet_pack_arg_data state; + starpu_codelet_pack_arg_init(&state); + starpu_codelet_pack_arg(&state, &factor, sizeof(factor)); + starpu_codelet_pack_arg(&state, &c, sizeof(c)); + starpu_codelet_pack_arg(&state, &x, sizeof(x)); + starpu_codelet_pack_arg_fini(&state, &task->cl_arg, &task->cl_arg_size); + + ret = starpu_task_submit(task); + if (ret != -ENODEV) + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + /* Test with starpu_codelet_dup_arg */ + { + struct starpu_task *task = starpu_task_build(&mycodelet_dup_arg, STARPU_TASK_SYNCHRONOUS, 1, 0); + task->cl_arg_free = 1; + + struct starpu_codelet_pack_arg_data state; + starpu_codelet_pack_arg_init(&state); + starpu_codelet_pack_arg(&state, &factor, sizeof(factor)); + starpu_codelet_pack_arg(&state, &c, sizeof(c)); + starpu_codelet_pack_arg(&state, &x, sizeof(x)); + starpu_codelet_pack_arg_fini(&state, &task->cl_arg, &task->cl_arg_size); + + ret = starpu_task_submit(task); + if (ret != -ENODEV) + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + /* Test with starpu_codelet_pick_arg */ + { + struct starpu_task *task = starpu_task_build(&mycodelet_pick_arg, STARPU_TASK_SYNCHRONOUS, 1, 0); + task->cl_arg_free = 1; + + struct starpu_codelet_pack_arg_data state; + starpu_codelet_pack_arg_init(&state); + starpu_codelet_pack_arg(&state, &factor, sizeof(factor)); + starpu_codelet_pack_arg(&state, &c, sizeof(c)); + starpu_codelet_pack_arg(&state, &c, sizeof(c)); + starpu_codelet_pack_arg(&state, &c, sizeof(c)); + starpu_codelet_pack_arg(&state, &c, sizeof(c)); + starpu_codelet_pack_arg(&state, &x, sizeof(x)); + starpu_codelet_pack_arg_fini(&state, &task->cl_arg, &task->cl_arg_size); + + ret = starpu_task_submit(task); + if (ret != -ENODEV) + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + starpu_shutdown(); + if (ret == -ENODEV) + { + fprintf(stderr, "WARNING: No one can execute this task\n"); + /* yes, we do not perform the computation but we did detect that no one + * could perform the kernel, so this is not an error from StarPU */ + return STARPU_TEST_SKIPPED; + } + else + return 0; +} diff --git a/tests/main/pause_resume.c b/tests/main/pause_resume.c new file mode 100644 index 0000000..ac6ae02 --- /dev/null +++ b/tests/main/pause_resume.c @@ -0,0 +1,103 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include + +#include +#include "../helper.h" + +/* + * Try starpu_pause/resume + */ + +#ifdef STARPU_QUICK_CHECK +static unsigned ntasks = 64; +#elif !defined(STARPU_LONG_CHECK) +static unsigned ntasks = 1000; +#else +static unsigned ntasks = 50000; +#endif + +int main(void) +{ + double timing; + double start; + double end; + int ret; + +#ifdef STARPU_HAVE_VALGRIND_H + if(RUNNING_ON_VALGRIND) ntasks = 5; +#endif + + ret = starpu_init(NULL); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + /* Check that we can submit tasks to a "paused" StarPU and then have + * it run normally. + */ + starpu_pause(); + unsigned i; + for (i = 0; i < ntasks; i++) + { + ret = starpu_task_insert(&starpu_codelet_nop, 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + } + + start = starpu_timing_now(); + starpu_resume(); + starpu_task_wait_for_all(); + end = starpu_timing_now(); + timing = end - start; + + FPRINTF(stderr, "Without interruptions:\n\tTotal: %f secs\n", timing/1000000); + FPRINTF(stderr, "\tPer task: %f usecs\n", timing/ntasks); + + /* Do the same thing, but with a lot of interuptions to see if there + * is any overhead associated with the pause/resume calls. + */ + starpu_pause(); + for (i = 0; i < ntasks; i++) + { + ret = starpu_task_insert(&starpu_codelet_nop, 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + } + starpu_resume(); + + start = starpu_timing_now(); + for (i = 0; i < 100; i++) + { + starpu_pause(); + starpu_resume(); + } + starpu_task_wait_for_all(); + end = starpu_timing_now(); + timing = end - start; + + FPRINTF(stderr, "With 100 interruptions:\n\tTotal: %f secs\n", timing/1000000); + FPRINTF(stderr, "\tPer task: %f usecs\n", timing/ntasks); + + /* Finally, check that the nesting of pause/resume calls works. */ + starpu_pause(); + starpu_pause(); + starpu_resume(); + starpu_resume(); + + starpu_shutdown(); + + return EXIT_SUCCESS; +} diff --git a/tests/main/regenerate.c b/tests/main/regenerate.c new file mode 100644 index 0000000..e9628f6 --- /dev/null +++ b/tests/main/regenerate.c @@ -0,0 +1,132 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include "../helper.h" +#include + +/* + * Run one task with regenerate=1, and thus completes several times + * before we reset regenerate to 0 in the callback + */ + +#ifdef STARPU_QUICK_CHECK +static unsigned ntasks = 64; +#else +static unsigned ntasks = 65536; +#endif +static unsigned cnt = 0; + +static unsigned completed = 0; +static starpu_pthread_mutex_t mutex = STARPU_PTHREAD_MUTEX_INITIALIZER; +static starpu_pthread_cond_t cond = STARPU_PTHREAD_COND_INITIALIZER; + +static +void callback(void *arg) +{ + (void)arg; + struct starpu_task *task = starpu_task_get_current(); + + cnt++; + + if (cnt == ntasks) + { + task->regenerate = 0; + FPRINTF(stderr, "Stop !\n"); + + STARPU_PTHREAD_MUTEX_LOCK(&mutex); + completed = 1; + STARPU_PTHREAD_COND_SIGNAL(&cond); + STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); + } +} + +static void parse_args(int argc, char **argv) +{ + int c; + while ((c = getopt(argc, argv, "i:")) != -1) + switch(c) + { + case 'i': + ntasks = atoi(optarg); + break; + } +} + +int main(int argc, char **argv) +{ + // unsigned i; + double timing; + double start; + double end; + int ret; + + parse_args(argc, argv); + + ret = starpu_initialize(NULL, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + struct starpu_task task; + + starpu_task_init(&task); + + task.cl = &starpu_codelet_nop; + task.regenerate = 1; + task.detach = 1; + + task.callback_func = callback; + + FPRINTF(stderr, "#tasks : %u\n", ntasks); + + start = starpu_timing_now(); + + ret = starpu_task_submit(&task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + starpu_do_schedule(); + STARPU_PTHREAD_MUTEX_LOCK(&mutex); + if (!completed) + STARPU_PTHREAD_COND_WAIT(&cond, &mutex); + STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); + + end = starpu_timing_now(); + + timing = end - start; + + FPRINTF(stderr, "Total: %f secs\n", timing/1000000); + FPRINTF(stderr, "Per task: %f usecs\n", timing/ntasks); + + starpu_task_wait_for_all(); + starpu_task_clean(&task); + + starpu_shutdown(); + + /* Cleanup the statically allocated tasks after shutdown, as StarPU is still working on it after the callback */ + starpu_task_clean(&task); + + return EXIT_SUCCESS; + +enodev: + fprintf(stderr, "WARNING: No one can execute this task\n"); + /* yes, we do not perform the computation but we did detect that no one + * could perform the kernel, so this is not an error from StarPU */ + starpu_shutdown(); + return STARPU_TEST_SKIPPED; +} diff --git a/tests/main/regenerate_pipeline.c b/tests/main/regenerate_pipeline.c new file mode 100644 index 0000000..742a722 --- /dev/null +++ b/tests/main/regenerate_pipeline.c @@ -0,0 +1,163 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include "../helper.h" +#include + +/* + * Create a pipeline of regenerated tasks, i.e. a sort of data flow graph + */ + +#ifdef STARPU_QUICK_CHECK +static unsigned ntasks = 64; +#else +static unsigned ntasks = 65536; +#endif +static unsigned cntA = 0; +static unsigned cntB = 0; +static unsigned cntC = 0; + +static unsigned completed = 0; +static starpu_pthread_mutex_t mutex = STARPU_PTHREAD_MUTEX_INITIALIZER; +static starpu_pthread_cond_t cond = STARPU_PTHREAD_COND_INITIALIZER; + +static +void callback(void *arg) +{ + struct starpu_task *task = starpu_task_get_current(); + unsigned *cnt = arg; + unsigned res; + + res = STARPU_ATOMIC_ADD(cnt, 1); + ANNOTATE_HAPPENS_BEFORE(&cnt); + + if (res == ntasks) + { + ANNOTATE_HAPPENS_AFTER(&cnt); + task->regenerate = 0; + FPRINTF(stderr, "Stop !\n"); + + STARPU_PTHREAD_MUTEX_LOCK(&mutex); + completed++; + STARPU_PTHREAD_COND_SIGNAL(&cond); + STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); + } +} + +static void parse_args(int argc, char **argv) +{ + int c; + while ((c = getopt(argc, argv, "i:")) != -1) + switch(c) + { + case 'i': + ntasks = atoi(optarg); + break; + } +} + +int main(int argc, char **argv) +{ + // unsigned i; + double timing; + double start; + double end; + int ret; + + parse_args(argc, argv); + + ret = starpu_initialize(NULL, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + struct starpu_task taskA, taskB, taskC; + struct starpu_task *taskAp = &taskA; + struct starpu_task *taskBp = &taskB; + + starpu_task_init(&taskA); + taskA.cl = &starpu_codelet_nop; + taskA.regenerate = 1; + taskA.detach = 1; + taskA.callback_func = callback; + taskA.callback_arg = &cntA; + + starpu_task_init(&taskB); + taskB.cl = &starpu_codelet_nop; + taskB.regenerate = 1; + taskB.detach = 1; + taskB.callback_func = callback; + taskB.callback_arg = &cntB; + + starpu_task_declare_deps_array(&taskB, 1, &taskAp); + + starpu_task_init(&taskC); + taskC.cl = &starpu_codelet_nop; + taskC.regenerate = 1; + taskC.detach = 1; + taskC.callback_func = callback; + taskC.callback_arg = &cntC; + starpu_task_declare_deps_array(&taskC, 1, &taskBp); + + FPRINTF(stderr, "#tasks : %u\n", ntasks); + + start = starpu_timing_now(); + + ret = starpu_task_submit(&taskA); + if (ret == -ENODEV) goto enodev; + ret = starpu_task_submit(&taskB); + if (ret == -ENODEV) goto enodev; + ret = starpu_task_submit(&taskC); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + starpu_do_schedule(); + STARPU_PTHREAD_MUTEX_LOCK(&mutex); + while (completed < 3) + STARPU_PTHREAD_COND_WAIT(&cond, &mutex); + STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); + + end = starpu_timing_now(); + + timing = end - start; + + FPRINTF(stderr, "cntA : %u\n", cntA); + FPRINTF(stderr, "cntB : %u\n", cntB); + FPRINTF(stderr, "cntC : %u\n", cntC); + STARPU_ASSERT(cntA == ntasks); + STARPU_ASSERT(cntB == ntasks); + STARPU_ASSERT(cntC == ntasks); + FPRINTF(stderr, "Total: %f secs\n", timing/1000000); + FPRINTF(stderr, "Per task: %f usecs\n", timing/(ntasks*3)); + + starpu_task_wait_for_all(); + starpu_task_clean(&taskA); + starpu_task_clean(&taskB); + starpu_task_clean(&taskC); + + starpu_shutdown(); + + return EXIT_SUCCESS; + +enodev: + fprintf(stderr, "WARNING: No one can execute this task\n"); + /* yes, we do not perform the computation but we did detect that no one + * could perform the kernel, so this is not an error from StarPU */ + starpu_shutdown(); + return STARPU_TEST_SKIPPED; +} diff --git a/tests/main/restart.c b/tests/main/restart.c new file mode 100644 index 0000000..60af5ae --- /dev/null +++ b/tests/main/restart.c @@ -0,0 +1,71 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include + +#include "../helper.h" + +/* + * Try initializing/shutting down starpu several times + */ + +#ifdef STARPU_QUICK_CHECK + #define N 2 +#else + #define N 10 +#endif + +static double start; +static double end; + +int main(int argc, char **argv) +{ + unsigned iter; + + double init_timing = 0.0; + double shutdown_timing = 0.0; + int ret; + + for (iter = 0; iter < N; iter++) + { + start = starpu_timing_now(); + /* Initialize StarPU */ + ret = starpu_initialize(NULL, &argc, &argv); + end = starpu_timing_now(); + if (ret == -ENODEV) + goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + init_timing += end - start; + + start = starpu_timing_now(); + /* Shutdown StarPU */ + starpu_shutdown(); + end = starpu_timing_now(); + shutdown_timing += end - start; + } + + FPRINTF(stderr, "starpu_init: %2.2f seconds\n", init_timing/(N*1000000)); + FPRINTF(stderr, "starpu_shutdown: %2.2f seconds\n", shutdown_timing/(N*1000000)); + + return EXIT_SUCCESS; + +enodev: + return STARPU_TEST_SKIPPED; +} diff --git a/tests/main/starpu_init.c b/tests/main/starpu_init.c new file mode 100644 index 0000000..c744935 --- /dev/null +++ b/tests/main/starpu_init.c @@ -0,0 +1,146 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Thibaut Lambert + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../helper.h" +#include + +/* + * Try initializing starpu with various CPU parameters + */ + +#if !defined(STARPU_HAVE_UNSETENV) || !defined(STARPU_HAVE_SETENV) || !defined(STARPU_USE_CPU) +#warning unsetenv or setenv are not defined. Or CPU are not enabled. Skipping test +int main(void) +{ + return STARPU_TEST_SKIPPED; +} +#else + +static int check_cpu(int env_cpu, int conf_cpu, int precedence_over_env, int expected_cpu, int *cpu) +{ + int ret; + + FPRINTF(stderr, "\nTesting with env=%d - conf=%d - expected %d (ignore env %d)\n", env_cpu, conf_cpu, expected_cpu, precedence_over_env); + + if (env_cpu != -1) + { + char string[11]; + snprintf(string, sizeof(string), "%d", env_cpu); + setenv("STARPU_NCPUS", string, 1); + } + + struct starpu_conf user_conf; + starpu_conf_init(&user_conf); + user_conf.nmpi_ms = 0; + user_conf.ntcpip_ms = 0; + user_conf.precedence_over_environment_variables = precedence_over_env; + + if (conf_cpu != -1) + { + user_conf.ncpus = conf_cpu; + } + ret = starpu_init(&user_conf); + + if (env_cpu != -1) + { + unsetenv("STARPU_NCPUS"); + } + + if (ret == -ENODEV) + { + return STARPU_TEST_SKIPPED; + } + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + *cpu = starpu_cpu_worker_get_count(); + starpu_shutdown(); + + if (expected_cpu == -1) + { + FPRINTF(stderr, "Number of CPUS: %3d\n", *cpu); + return 0; + } + else + { + FPRINTF(stderr, "Number of CPUS: %3d -- Number of expected CPUs: %3d --> %s\n", *cpu, expected_cpu, *cpu==expected_cpu?"SUCCESS":"FAILURE"); + return *cpu != expected_cpu; + } +} + +int main(void) +{ + int ret; + int cpu, cpu_init; + int cpu_test1, cpu_test2, cpu_test3; + + unsetenv("STARPU_NCPUS"); + unsetenv("STARPU_NCPU"); + + ret = check_cpu(-1, -1, 0, -1, &cpu_init); + if (ret) return ret; + if (cpu_init <= 1) return STARPU_TEST_SKIPPED; + + if (cpu_init >= STARPU_MAXCPUS-5) + { + cpu_test1 = cpu_init-1; + cpu_test2 = cpu_init-2; + cpu_test3 = cpu_init-3; + } + else + { + cpu_test1 = cpu_init+1; + cpu_test2 = cpu_init+2; + cpu_test3 = cpu_init+3; + } + + ret = check_cpu(cpu_test1, -1, 0, cpu_test1, &cpu); + if (ret) return ret; + + // Do not set anything --> default value + ret = check_cpu(-1, -1, 0, -1, &cpu); + if (ret) return ret; + if (cpu != cpu_init) + { + FPRINTF(stderr, "The number of CPUs is incorrect\n"); + return 1; + } + + // Do not set environment variable, set starpu_conf::ncpus --> starpu_conf::ncpus + ret = check_cpu(-1, cpu_test2, 0, cpu_test2, &cpu); + if (ret) return ret; + + // Set environment variable, and do not set starpu_conf::ncpus --> starpu_conf::ncpus + ret = check_cpu(cpu_test2, -1, 0, cpu_test2, &cpu); + if (ret) return ret; + + // Set both environment variable and starpu_conf::ncpus --> environment variable + ret = check_cpu(cpu_test3, cpu_test1, 0, cpu_test3, &cpu); + if (ret) return ret; + + // Set both environment variable and starpu_conf::ncpus AND prefer starpu_conf over env --> starpu_conf::ncpus + ret = check_cpu(cpu_test3, cpu_test1, 1, cpu_test1, &cpu); + if (ret) return ret; + + // Set environment variable, and do no set starpu_conf, AND prefer starpu_conf over env --> environment variable + ret = check_cpu(cpu_test2, -1, 1, cpu_test2, &cpu); + if (ret) return ret; + + return 0; +} + + #endif diff --git a/tests/main/starpu_task_bundle.c b/tests/main/starpu_task_bundle.c new file mode 100644 index 0000000..b7044d7 --- /dev/null +++ b/tests/main/starpu_task_bundle.c @@ -0,0 +1,148 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../helper.h" + +/* + * Test the bundle interface, putting tasks working on the same data in + * the same bundle + */ + +#define NB_BUNDLE 10 +#define NB_ITERATION 5 + +void func_cpu(void *descr[], void *args) +{ + float *x = (float *) STARPU_VARIABLE_GET_PTR(descr[0]); + float factor; + + factor = *(float *) args; + *x *= factor; +} + +struct starpu_codelet codelet = +{ + .modes = {STARPU_RW}, + .cpu_funcs = {func_cpu}, + .cpu_funcs_name = {"func_cpu"}, + .nbuffers = 1 +}; + +int main(int argc, char **argv) +{ + int i, j, ret; + struct starpu_conf conf; + + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + conf.ncpus = -1; + conf.nmpi_ms = -1; + conf.ntcpip_ms = -1; + + ret = starpu_initialize(&conf, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_initialize"); + + float *data; + starpu_malloc((void**)&data, sizeof(*data) * NB_BUNDLE); + float factors[NB_BUNDLE]; + starpu_data_handle_t handles[NB_BUNDLE]; + + struct starpu_task *task[NB_ITERATION]; + + starpu_task_bundle_t bundles[NB_BUNDLE]; + + for (i = 0; i < NB_BUNDLE; i++) + { + data[i] = i + 1; + factors[i] = NB_BUNDLE - i; + } + + for (i = 0; i < NB_BUNDLE; i++) + starpu_variable_data_register(&handles[i], STARPU_MAIN_RAM, (uintptr_t)&data[i], sizeof(float)); + + FPRINTF(stderr, "VALUES:"); + for (i = 0; i < NB_BUNDLE; i++) + FPRINTF(stderr, " %f (%f)", data[i], factors[i]); + FPRINTF(stderr, "\n"); + + for (i = 0; i < NB_BUNDLE; i++) + { + starpu_task_bundle_create(&bundles[i]); + + for (j = 0; j < NB_ITERATION; j++) + { + task[j] = starpu_task_create(); + + task[j]->cl = &codelet; + + task[j]->cl_arg = &factors[i]; + task[j]->cl_arg_size = sizeof(float); + + task[j]->handles[0] = handles[i]; + + ret = starpu_task_bundle_insert(bundles[i], task[j]); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + } + + /* Put one aside, just for fun */ + ret = starpu_task_bundle_remove(bundles[i], task[NB_ITERATION / 2]); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_bundle_remove"); + + for (j = 0; j < NB_ITERATION; j++) + { + ret = starpu_task_submit(task[j]); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + starpu_task_bundle_close(bundles[i]); + } + + ret = starpu_task_wait_for_all(); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); + + for(i = 0; i < NB_BUNDLE ; i++) + { + ret = starpu_data_acquire(handles[i], STARPU_R); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_acquire"); + } + + FPRINTF(stderr, "VALUES:"); + for (i = 0; i < NB_BUNDLE; i++) + FPRINTF(stderr, " %f (%f)", data[i], factors[i]); + FPRINTF(stderr, "\n"); + + for(i = 0; i < NB_BUNDLE ; i++) + { + starpu_data_release(handles[i]); + starpu_data_unregister(handles[i]); + } + + starpu_free_noflag(data, sizeof(*data) * NB_BUNDLE); + + starpu_shutdown(); + + return EXIT_SUCCESS; + +enodev: + starpu_shutdown(); + fprintf(stderr, "WARNING: No one can execute this task\n"); + /* yes, we do not perform the computation but we did detect that no one + * could perform the kernel, so this is not an error from StarPU */ + return STARPU_TEST_SKIPPED; +} diff --git a/tests/main/starpu_task_wait.c b/tests/main/starpu_task_wait.c new file mode 100644 index 0000000..0d9b78d --- /dev/null +++ b/tests/main/starpu_task_wait.c @@ -0,0 +1,115 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include + +#include +#include "../helper.h" + +/* + * Test waiting for a task + */ + +#ifdef STARPU_QUICK_CHECK +static unsigned ntasks = 64; +#else +static unsigned ntasks = 65536; +#endif + +static void usage(char **argv) +{ + FPRINTF(stderr, "%s [-i ntasks] [-h]\n", argv[0]); + exit(-1); +} + +static void parse_args(int argc, char **argv) +{ + int c; + while ((c = getopt(argc, argv, "i:t:h")) != -1) + switch(c) + { + case 'i': + ntasks = atoi(optarg); + break; + case 'h': + usage(argv); + break; + } +} + +int main(int argc, char **argv) +{ + double timing; + double start; + double end; + int ret; + + parse_args(argc, argv); + +#ifdef STARPU_HAVE_VALGRIND_H + if(RUNNING_ON_VALGRIND) ntasks = 5; +#endif + + ret = starpu_initialize(NULL, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + FPRINTF(stderr, "#tasks : %u\n", ntasks); + + start = starpu_timing_now(); + + unsigned i; + for (i = 0; i < ntasks; i++) + { + struct starpu_task *task = starpu_task_create(); + + task->cl = &starpu_codelet_nop; + task->cl_arg = NULL; + task->callback_func = NULL; + task->callback_arg = NULL; + + task->detach = 0; + task->destroy = 0; + + ret = starpu_task_submit(task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + ret = starpu_task_wait(task); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait"); + + starpu_task_destroy(task); + } + + end = starpu_timing_now(); + + timing = end - start; + + FPRINTF(stderr, "Total: %f secs\n", timing/1000000); + FPRINTF(stderr, "Per task: %f usecs\n", timing/ntasks); + + starpu_shutdown(); + + return EXIT_SUCCESS; + +enodev: + fprintf(stderr, "WARNING: No one can execute this task\n"); + /* yes, we do not perform the computation but we did detect that no one + * could perform the kernel, so this is not an error from StarPU */ + starpu_shutdown(); + return STARPU_TEST_SKIPPED; +} diff --git a/tests/main/starpu_task_wait_for_all.c b/tests/main/starpu_task_wait_for_all.c new file mode 100644 index 0000000..8da8cb8 --- /dev/null +++ b/tests/main/starpu_task_wait_for_all.c @@ -0,0 +1,116 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include "../helper.h" + +/* + * Test waiting for all tasks + */ + +#ifdef STARPU_QUICK_CHECK +static unsigned ntasks = 64; +#else +static unsigned ntasks = 65536; +#endif + +static int inject_one_task(void) +{ + struct starpu_task *task = starpu_task_create(); + + task->cl = &starpu_codelet_nop; + task->cl_arg = NULL; + task->callback_func = NULL; + task->callback_arg = NULL; + + int ret = starpu_task_submit(task); + return ret; +} + +static void usage(char **argv) +{ + FPRINTF(stderr, "%s [-i ntasks] [-p sched_policy] [-h]\n", argv[0]); + exit(-1); +} + +static void parse_args(int argc, char **argv, struct starpu_conf *conf) +{ + int c; + while ((c = getopt(argc, argv, "i:p:h")) != -1) + switch(c) + { + case 'i': + ntasks = atoi(optarg); + break; + case 'p': + conf->sched_policy_name = optarg; + break; + case 'h': + usage(argv); + break; + } +} + +int main(int argc, char **argv) +{ + unsigned i; + double timing; + double start; + double end; + int ret; + struct starpu_conf conf; + + starpu_conf_init(&conf); + + parse_args(argc, argv, &conf); + + ret = starpu_initialize(&conf, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + FPRINTF(stderr, "#tasks : %u\n", ntasks); + + start = starpu_timing_now(); + for (i = 0; i < ntasks; i++) + { + ret = inject_one_task(); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + ret = starpu_task_wait_for_all(); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); + + end = starpu_timing_now(); + + timing = end - start; + + FPRINTF(stderr, "Total: %f secs\n", timing/1000000); + FPRINTF(stderr, "Per task: %f usecs\n", timing/ntasks); + + starpu_shutdown(); + + return EXIT_SUCCESS; + +enodev: + fprintf(stderr, "WARNING: No one can execute this task\n"); + /* yes, we do not perform the computation but we did detect that no one + * could perform the kernel, so this is not an error from StarPU */ + starpu_shutdown(); + return STARPU_TEST_SKIPPED; +} diff --git a/tests/main/starpu_worker_exists.c b/tests/main/starpu_worker_exists.c new file mode 100644 index 0000000..2ae1e9d --- /dev/null +++ b/tests/main/starpu_worker_exists.c @@ -0,0 +1,89 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#define BUILDING_STARPU +#include +#include "core/workers.h" +#include "../helper.h" + +/* + * Test that _starpu_worker_exists works appropriately + */ + +static int can_always_execute(unsigned workerid, + struct starpu_task *task, + unsigned nimpl) +{ + (void) workerid; + (void) task; + (void) nimpl; + + return 1; +} + +static int can_never_execute(unsigned workerid, + struct starpu_task *task, + unsigned nimpl) +{ + (void) workerid; + (void) task; + (void) nimpl; + + return 0; +} + +int main(int argc, char **argv) +{ + int ret; + struct starpu_task *task; + + ret = starpu_initialize(NULL, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + + task = starpu_task_create(); + task->cl = &starpu_codelet_nop; + task->destroy = 0; + task->sched_ctx = 0; + + starpu_codelet_nop.can_execute = NULL; + ret = _starpu_worker_exists(task); + if (!ret) + { + FPRINTF(stderr, "failure with can_execute=NULL\n"); + return EXIT_FAILURE; + } + + starpu_codelet_nop.can_execute = can_always_execute; + ret = _starpu_worker_exists(task); + if (!ret) + { + FPRINTF(stderr, "failure with can_always_execute\n"); + return EXIT_FAILURE; + } + + starpu_codelet_nop.can_execute = can_never_execute; + ret = _starpu_worker_exists(task); + if (ret) + { + FPRINTF(stderr, "failure with can_never_execute\n"); + return EXIT_FAILURE; + } + + starpu_task_destroy(task); + starpu_shutdown(); + + return EXIT_SUCCESS; +} diff --git a/tests/main/static_restartable.c b/tests/main/static_restartable.c new file mode 100644 index 0000000..185817a --- /dev/null +++ b/tests/main/static_restartable.c @@ -0,0 +1,102 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include + +#include +#include "../helper.h" + +/* + * Test that one can submit+wait the same task several times + */ + +#ifdef STARPU_QUICK_CHECK +static unsigned ntasks = 64; +#else +static unsigned ntasks = 65536; +#endif + +static void parse_args(int argc, char **argv) +{ + int c; + while ((c = getopt(argc, argv, "i:")) != -1) + switch(c) + { + case 'i': + ntasks = atoi(optarg); + break; + } +} + +int main(int argc, char **argv) +{ + unsigned i; + double timing; + double start; + double end; + int ret; + + parse_args(argc, argv); + +#ifdef STARPU_HAVE_VALGRIND_H + if(RUNNING_ON_VALGRIND) ntasks = 5; +#endif + + ret = starpu_initialize(NULL, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + struct starpu_task task; + + starpu_task_init(&task); + + task.cl = &starpu_codelet_nop; + task.detach = 0; + + FPRINTF(stderr, "#tasks : %u\n", ntasks); + + start = starpu_timing_now(); + + for (i = 0; i < ntasks; i++) + { + ret = starpu_task_submit(&task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + ret = starpu_task_wait(&task); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait"); + } + + end = starpu_timing_now(); + + timing = end - start; + + FPRINTF(stderr, "Total: %f secs\n", timing/1000000); + FPRINTF(stderr, "Per task: %f usecs\n", timing/ntasks); + + starpu_task_clean(&task); + starpu_shutdown(); + + return EXIT_SUCCESS; + +enodev: + fprintf(stderr, "WARNING: No one can execute this task\n"); + /* yes, we do not perform the computation but we did detect that no one + * could perform the kernel, so this is not an error from StarPU */ + starpu_shutdown(); + return STARPU_TEST_SKIPPED; +} diff --git a/tests/main/static_restartable_tag.c b/tests/main/static_restartable_tag.c new file mode 100644 index 0000000..a4110de --- /dev/null +++ b/tests/main/static_restartable_tag.c @@ -0,0 +1,111 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include + +#include +#include "../helper.h" + +/* + * Test that one can submit+wait_tag the same task several times + */ + +#ifdef STARPU_QUICK_CHECK +static unsigned ntasks = 64; +#else +static unsigned ntasks = 65536; +#endif +static starpu_tag_t tag = 0x32; + +static void parse_args(int argc, char **argv) +{ + int c; + while ((c = getopt(argc, argv, "i:")) != -1) + switch(c) + { + case 'i': + ntasks = atoi(optarg); + break; + } +} + + +int main(int argc, char **argv) +{ + unsigned i; + double timing; + double start; + double end; + int ret; + + parse_args(argc, argv); + +#ifdef STARPU_HAVE_VALGRIND_H + if(RUNNING_ON_VALGRIND) ntasks = 5; +#endif + + ret = starpu_initialize(NULL, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + struct starpu_task task; + + starpu_task_init(&task); + + task.cl = &starpu_codelet_nop; + + task.use_tag = 1; + task.tag_id = tag; + + FPRINTF(stderr, "#tasks : %u\n", ntasks); + + start = starpu_timing_now(); + + for (i = 0; i < ntasks; i++) + { + ret = starpu_task_submit(&task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + ret = starpu_tag_wait(tag); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_tag_wait"); + } + + end = starpu_timing_now(); + + timing = end - start; + + FPRINTF(stderr, "Total: %f secs\n", timing/1000000); + FPRINTF(stderr, "Per task: %f usecs\n", timing/ntasks); + + starpu_task_wait_for_all(); + starpu_task_clean(&task); + + starpu_shutdown(); + + /* Cleanup the statically allocated tasks after shutdown, as StarPU is still working on it after the callback */ + starpu_task_clean(&task); + + return EXIT_SUCCESS; + +enodev: + fprintf(stderr, "WARNING: No one can execute this task\n"); + /* yes, we do not perform the computation but we did detect that no one + * could perform the kernel, so this is not an error from StarPU */ + starpu_shutdown(); + return STARPU_TEST_SKIPPED; +} diff --git a/tests/main/static_restartable_using_initializer.c b/tests/main/static_restartable_using_initializer.c new file mode 100644 index 0000000..64e59ff --- /dev/null +++ b/tests/main/static_restartable_using_initializer.c @@ -0,0 +1,104 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include + +#include +#include "../helper.h" + +/* + * Test that one can submit+wait the same task several times, using a static + * initialization + */ + +/* This is equivalent to calling starpu_task_init later on */ +struct starpu_task task = STARPU_TASK_INITIALIZER; + +#ifdef STARPU_QUICK_CHECK +static unsigned ntasks = 64; +#else +static unsigned ntasks = 65536; +#endif + +static void parse_args(int argc, char **argv) +{ + int c; + while ((c = getopt(argc, argv, "i:")) != -1) + switch(c) + { + case 'i': + ntasks = atoi(optarg); + break; + } + +} + +int main(int argc, char **argv) +{ + unsigned i; + double timing; + double start; + double end; + int ret; + + parse_args(argc, argv); + +#ifdef STARPU_HAVE_VALGRIND_H + if(RUNNING_ON_VALGRIND) ntasks = 5; +#endif + + ret = starpu_initialize(NULL, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + task.cl = &starpu_codelet_nop; + task.detach = 0; + + FPRINTF(stderr, "#tasks : %u\n", ntasks); + + start = starpu_timing_now(); + + for (i = 0; i < ntasks; i++) + { + ret = starpu_task_submit(&task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + ret = starpu_task_wait(&task); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait"); + } + + end = starpu_timing_now(); + + timing = end - start; + + FPRINTF(stderr, "Total: %f secs\n", timing/1000000); + FPRINTF(stderr, "Per task: %f usecs\n", timing/ntasks); + + starpu_task_clean(&task); + starpu_shutdown(); + + return EXIT_SUCCESS; + +enodev: + fprintf(stderr, "WARNING: No one can execute this task\n"); + /* yes, we do not perform the computation but we did detect that no one + * could perform the kernel, so this is not an error from StarPU */ + starpu_task_clean(&task); + starpu_shutdown(); + return STARPU_TEST_SKIPPED; +} diff --git a/tests/main/subgraph_repeat.c b/tests/main/subgraph_repeat.c new file mode 100644 index 0000000..08486ae --- /dev/null +++ b/tests/main/subgraph_repeat.c @@ -0,0 +1,162 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include "../variable/increment.h" +#include "../helper.h" + +/* + * Test that one can resubmit a whole task graph repeatedly + */ + +#ifdef STARPU_QUICK_CHECK +static unsigned niter = 64; +#else +static unsigned niter = 16384; +#endif + +/* + * + * /-->B--\ + * | | + * -----> A D---\---> + * ^ | | | + * | \-->C--/ | + * | | + * \--------------/ + * + * - {B, C} depend on A + * - D depends on {B, C} + * - A, B, C and D are resubmitted at the end of the loop (or not) + */ + +static struct starpu_task taskA, taskB, taskC, taskD; + +static unsigned loop_cnt = 0; +static unsigned *check_cnt; +static starpu_pthread_cond_t cond = STARPU_PTHREAD_COND_INITIALIZER; +static starpu_pthread_mutex_t mutex = STARPU_PTHREAD_MUTEX_INITIALIZER; + +static void callback_task_D(void *arg) +{ + (void)arg; + STARPU_PTHREAD_MUTEX_LOCK(&mutex); + loop_cnt++; + + if (loop_cnt == niter) + { + /* We are done */ + STARPU_PTHREAD_COND_SIGNAL(&cond); + STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); + } + else + { + int ret; + STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); + /* Let's go for another iteration */ + ret = starpu_task_submit(&taskA); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + ret = starpu_task_submit(&taskB); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + ret = starpu_task_submit(&taskC); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + ret = starpu_task_submit(&taskD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } +} + +int main(int argc, char **argv) +{ +// unsigned i; +// double timing; +// double start; +// double end; + int ret; + + ret = starpu_initialize(NULL, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + starpu_data_set_default_sequential_consistency_flag(0); + + starpu_malloc((void**)&check_cnt, sizeof(*check_cnt)); + *check_cnt = 0; + + increment_load_opencl(); + + starpu_data_handle_t check_data; + starpu_variable_data_register(&check_data, STARPU_MAIN_RAM, (uintptr_t)check_cnt, sizeof(*check_cnt)); + + starpu_task_init(&taskA); + taskA.cl = &increment_cl; + taskA.handles[0] = check_data; + + starpu_task_init(&taskB); + taskB.cl = &increment_cl; + taskB.handles[0] = check_data; + + starpu_task_init(&taskC); + taskC.cl = &increment_cl; + taskC.handles[0] = check_data; + + starpu_task_init(&taskD); + taskD.cl = &increment_cl; + taskD.callback_func = callback_task_D; + taskD.handles[0] = check_data; + + starpu_task_declare_deps(&taskB, 1, &taskA); + starpu_task_declare_deps(&taskC, 1, &taskA); + + starpu_task_declare_deps(&taskD, 2, &taskB, &taskC); + + ret = starpu_task_submit(&taskA); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + ret = starpu_task_submit(&taskB); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + ret = starpu_task_submit(&taskC); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + ret = starpu_task_submit(&taskD); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + starpu_do_schedule(); + /* Wait for the termination of all loops */ + STARPU_PTHREAD_MUTEX_LOCK(&mutex); + if (loop_cnt < niter) + STARPU_PTHREAD_COND_WAIT(&cond, &mutex); + STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); + + starpu_data_acquire(check_data, STARPU_R); + starpu_data_release(check_data); + + STARPU_ASSERT(*check_cnt == (4*loop_cnt)); + + starpu_free_noflag(check_cnt, sizeof(*check_cnt)); + starpu_data_unregister(check_data); + + starpu_task_wait_for_all(); + starpu_task_clean(&taskA); + starpu_task_clean(&taskB); + starpu_task_clean(&taskC); + starpu_task_clean(&taskD); + + increment_unload_opencl(); + + starpu_shutdown(); + + return EXIT_SUCCESS; + +enodev: + fprintf(stderr, "WARNING: No one can execute this task\n"); + /* yes, we do not perform the computation but we did detect that no one + * could perform the kernel, so this is not an error from StarPU */ + starpu_data_unregister(check_data); + increment_unload_opencl(); + starpu_shutdown(); + return STARPU_TEST_SKIPPED; +} diff --git a/tests/main/subgraph_repeat_regenerate.c b/tests/main/subgraph_repeat_regenerate.c new file mode 100644 index 0000000..ab4318f --- /dev/null +++ b/tests/main/subgraph_repeat_regenerate.c @@ -0,0 +1,184 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include "../variable/increment.h" +#include "../helper.h" + +/* + * Test that one can let a whole task graph repeatedly regenerate itself + */ + +#ifdef STARPU_QUICK_CHECK +static unsigned niter = 64; +#else +static unsigned niter = 16384; +#endif + +/* + * + * /-->B--\ + * | | + * -----> A D---\---> + * ^ | | | + * | \-->C--/ | + * | | + * \--------------/ + * + * - {B, C} depend on A + * - D depends on {B, C} + * - A, B, C and D are resubmitted at the end of the loop (or not) + */ + +static struct starpu_task taskA, taskB, taskC, taskD; + +static unsigned loop_cntB = 0; +static unsigned loop_cntC = 0; +static unsigned loop_cntD = 0; +static unsigned *check_cnt; +static starpu_pthread_cond_t cond = STARPU_PTHREAD_COND_INITIALIZER; +static starpu_pthread_mutex_t mutex = STARPU_PTHREAD_MUTEX_INITIALIZER; + +static void callback_task_B(void *arg) +{ + (void)arg; + if (++loop_cntB == niter) + taskB.regenerate = 0; +} + +static void callback_task_C(void *arg) +{ + (void)arg; + if (++loop_cntC == niter) + taskC.regenerate = 0; +} + +static void callback_task_D(void *arg) +{ + (void)arg; + STARPU_PTHREAD_MUTEX_LOCK(&mutex); + loop_cntD++; + + if (loop_cntD == niter) + { + /* We are done */ + taskD.regenerate = 0; + STARPU_PTHREAD_COND_SIGNAL(&cond); + STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); + } + else + { + int ret; + STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); + /* Let's go for another iteration */ + ret = starpu_task_submit(&taskA); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } +} + +int main(int argc, char **argv) +{ +// unsigned i; +// double timing; +// double start; +// double end; + int ret; + + ret = starpu_initialize(NULL, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + increment_load_opencl(); + + /* Implicit data dependencies and regeneratable tasks are not compatible */ + starpu_data_set_default_sequential_consistency_flag(0); + + starpu_malloc((void**)&check_cnt, sizeof(*check_cnt)); + *check_cnt = 0; + + starpu_data_handle_t check_data; + starpu_variable_data_register(&check_data, STARPU_MAIN_RAM, (uintptr_t)check_cnt, sizeof(*check_cnt)); + + starpu_task_init(&taskA); + taskA.cl = &increment_cl; + taskA.regenerate = 0; /* this task will be explicitly resubmitted if needed */ + taskA.handles[0] = check_data; + + starpu_task_init(&taskB); + taskB.cl = &increment_cl; + taskB.callback_func = callback_task_B; + taskB.regenerate = 1; + taskB.handles[0] = check_data; + + starpu_task_init(&taskC); + taskC.cl = &increment_cl; + taskC.callback_func = callback_task_C; + taskC.regenerate = 1; + taskC.handles[0] = check_data; + + starpu_task_init(&taskD); + taskD.cl = &increment_cl; + taskD.callback_func = callback_task_D; + taskD.regenerate = 1; + taskD.handles[0] = check_data; + + starpu_task_declare_deps(&taskB, 1, &taskA); + starpu_task_declare_deps(&taskC, 1, &taskA); + + starpu_task_declare_deps(&taskD, 2, &taskB, &taskC); + + ret = starpu_task_submit(&taskA); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + ret = starpu_task_submit(&taskB); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + ret = starpu_task_submit(&taskC); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + ret = starpu_task_submit(&taskD); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + starpu_do_schedule(); + /* Wait for the termination of all loops */ + STARPU_PTHREAD_MUTEX_LOCK(&mutex); + while (loop_cntD < niter) + STARPU_PTHREAD_COND_WAIT(&cond, &mutex); + STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); + + starpu_data_acquire(check_data, STARPU_R); + starpu_data_release(check_data); + + STARPU_ASSERT(*check_cnt == (4*niter)); + + starpu_free_noflag(check_cnt, sizeof(*check_cnt)); + starpu_data_unregister(check_data); + + starpu_task_wait_for_all(); + starpu_task_clean(&taskA); + starpu_task_clean(&taskB); + starpu_task_clean(&taskC); + starpu_task_clean(&taskD); + + increment_unload_opencl(); + starpu_shutdown(); + + return EXIT_SUCCESS; + +enodev: + fprintf(stderr, "WARNING: No one can execute this task\n"); + /* yes, we do not perform the computation but we did detect that no one + * could perform the kernel, so this is not an error from StarPU */ + starpu_data_unregister(check_data); + increment_unload_opencl(); + starpu_shutdown(); + return STARPU_TEST_SKIPPED; +} + diff --git a/tests/main/subgraph_repeat_regenerate_tag.c b/tests/main/subgraph_repeat_regenerate_tag.c new file mode 100644 index 0000000..0b44c5b --- /dev/null +++ b/tests/main/subgraph_repeat_regenerate_tag.c @@ -0,0 +1,231 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include "../variable/increment.h" +#include "../helper.h" + +/* + * Test that one can let a whole task graph repeatedly regenerate itself, using + * tag dependencies + */ + +#ifdef STARPU_QUICK_CHECK +static unsigned niter = 64; +#else +static unsigned niter = 16384; +#endif + +#define TAG_START 0 +#define TAG_A 1 +#define TAG_B 2 +#define TAG_C 3 +#define TAG_D 4 + +/* + * + * /-->B--\ + * | | + * -----> A D---\---> + * ^ | | | + * | \-->C--/ | + * | | + * \--------------/ + * + * - {B, C} depend on A + * - D depends on {B, C} + * - A, B, C and D are resubmitted at the end of the loop (or not) + */ + +static struct starpu_task taskA, taskB, taskC, taskD; + +static unsigned loop_cnt = 0; +static unsigned loop_cnt_A = 0; +static unsigned loop_cnt_B = 0; +static unsigned loop_cnt_C = 0; +static unsigned *check_cnt; +static starpu_pthread_cond_t cond = STARPU_PTHREAD_COND_INITIALIZER; +static starpu_pthread_mutex_t mutex = STARPU_PTHREAD_MUTEX_INITIALIZER; + +static void callback_task_A(void *arg) +{ + (void)arg; + + loop_cnt_A++; + + if (loop_cnt_A == niter) + { + /* We are done */ + taskA.regenerate = 0; + } +} + +static void callback_task_B(void *arg) +{ + (void)arg; + + loop_cnt_B++; + + if (loop_cnt_B == niter) + { + /* We are done */ + taskB.regenerate = 0; + } +} + +static void callback_task_C(void *arg) +{ + (void)arg; + + loop_cnt_C++; + + if (loop_cnt_C == niter) + { + /* We are done */ + taskC.regenerate = 0; + } +} + +static void callback_task_D(void *arg) +{ + (void)arg; + + STARPU_PTHREAD_MUTEX_LOCK(&mutex); + loop_cnt++; + + if (loop_cnt == niter) + { + /* We are done */ + taskD.regenerate = 0; + STARPU_PTHREAD_COND_SIGNAL(&cond); + STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); + } + else + { + STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); + /* Let's go for another iteration */ + starpu_tag_restart((starpu_tag_t) TAG_START); + starpu_tag_notify_from_apps((starpu_tag_t)TAG_START); + } +} + +int main(int argc, char **argv) +{ +// unsigned i; +// double timing; +// double start; +// double end; + int ret; + + ret = starpu_initialize(NULL, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + /* Implicit data dependencies and regeneratable tasks are not compatible */ + starpu_data_set_default_sequential_consistency_flag(0); + + starpu_malloc((void**)&check_cnt, sizeof(*check_cnt)); + *check_cnt = 0; + + increment_load_opencl(); + + starpu_data_handle_t check_data; + starpu_variable_data_register(&check_data, STARPU_MAIN_RAM, (uintptr_t)check_cnt, sizeof(*check_cnt)); + + starpu_task_init(&taskA); + taskA.cl = &increment_cl; + taskA.regenerate = 1; /* this task will be explicitly resubmitted if needed */ + taskA.use_tag = 1; + taskA.tag_id = TAG_A; + taskA.callback_func = callback_task_A; + taskA.handles[0] = check_data; + + starpu_task_init(&taskB); + taskB.cl = &increment_cl; + taskB.regenerate = 1; + taskB.use_tag = 1; + taskB.tag_id = TAG_B; + taskB.callback_func = callback_task_B; + taskB.handles[0] = check_data; + + starpu_task_init(&taskC); + taskC.cl = &increment_cl; + taskC.regenerate = 1; + taskC.use_tag = 1; + taskC.tag_id = TAG_C; + taskC.callback_func = callback_task_C; + taskC.handles[0] = check_data; + + starpu_task_init(&taskD); + taskD.cl = &increment_cl; + taskD.callback_func = callback_task_D; + taskD.regenerate = 1; + taskD.use_tag = 1; + taskD.tag_id = TAG_D; + taskD.handles[0] = check_data; + + starpu_tag_declare_deps((starpu_tag_t) TAG_A, 1, (starpu_tag_t) TAG_START); + + starpu_tag_declare_deps((starpu_tag_t) TAG_B, 1, (starpu_tag_t) TAG_A); + starpu_tag_declare_deps((starpu_tag_t) TAG_C, 1, (starpu_tag_t) TAG_A); + + starpu_tag_declare_deps((starpu_tag_t) TAG_D, 2, (starpu_tag_t) TAG_B, (starpu_tag_t) TAG_C); + + ret = starpu_task_submit(&taskA); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + ret = starpu_task_submit(&taskB); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + ret = starpu_task_submit(&taskC); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + ret = starpu_task_submit(&taskD); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + starpu_tag_notify_from_apps((starpu_tag_t) TAG_START); + + starpu_do_schedule(); + /* Wait for the termination of all loops */ + STARPU_PTHREAD_MUTEX_LOCK(&mutex); + if (loop_cnt < niter) + STARPU_PTHREAD_COND_WAIT(&cond, &mutex); + STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); + + starpu_data_acquire(check_data, STARPU_R); + starpu_data_release(check_data); + + STARPU_ASSERT(*check_cnt == (4*loop_cnt)); + + starpu_free_noflag(check_cnt, sizeof(*check_cnt)); + + starpu_data_unregister(check_data); + + starpu_task_wait_for_all(); + starpu_task_clean(&taskA); + starpu_task_clean(&taskB); + starpu_task_clean(&taskC); + starpu_task_clean(&taskD); + + increment_unload_opencl(); + starpu_shutdown(); + + return EXIT_SUCCESS; + +enodev: + fprintf(stderr, "WARNING: No one can execute this task\n"); + /* yes, we do not perform the computation but we did detect that no one + * could perform the kernel, so this is not an error from StarPU */ + starpu_data_unregister(check_data); + increment_unload_opencl(); + starpu_shutdown(); + return STARPU_TEST_SKIPPED; +} + diff --git a/tests/main/subgraph_repeat_regenerate_tag_cycle.c b/tests/main/subgraph_repeat_regenerate_tag_cycle.c new file mode 100644 index 0000000..5240b09 --- /dev/null +++ b/tests/main/subgraph_repeat_regenerate_tag_cycle.c @@ -0,0 +1,229 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include "../variable/increment.h" +#include "../helper.h" + +/* + * Test that one can let a whole task graph repeatedly regenerate itself, using + * tag dependencies, with a complete cycle. + */ + +#ifdef STARPU_QUICK_CHECK +static unsigned niter = 64; +#else +static unsigned niter = 16384; +#endif + +#define TAG_A 1 +#define TAG_B 2 +#define TAG_C 3 +#define TAG_D 4 + +/* + * + * /-->B--\ + * | | + * -----> A D---\---> + * ^ | | | + * | \-->C--/ | + * | | + * \--------------/ + * + * - {B, C} depend on A + * - D depends on {B, C} + * - A, B, C and D are resubmitted at the end of the loop (or not) + */ + +static struct starpu_task taskA, taskB, taskC, taskD; + +static unsigned loop_cnt = 0; +static unsigned loop_cnt_A = 0; +static unsigned loop_cnt_B = 0; +static unsigned loop_cnt_C = 0; +static unsigned *check_cnt; +static starpu_pthread_cond_t cond = STARPU_PTHREAD_COND_INITIALIZER; +static starpu_pthread_mutex_t mutex = STARPU_PTHREAD_MUTEX_INITIALIZER; + +static void callback_task_A(void *arg) +{ + (void)arg; + + loop_cnt_A++; + + if (loop_cnt_A == niter) + { + /* We are done */ + taskA.regenerate = 0; + } +} + +static void callback_task_B(void *arg) +{ + (void)arg; + + loop_cnt_B++; + + if (loop_cnt_B == niter) + { + /* We are done */ + taskB.regenerate = 0; + } +} + +static void callback_task_C(void *arg) +{ + (void)arg; + + loop_cnt_C++; + + if (loop_cnt_C == niter) + { + /* We are done */ + taskC.regenerate = 0; + } +} + +static void callback_task_D(void *arg) +{ + (void)arg; + + STARPU_PTHREAD_MUTEX_LOCK(&mutex); + loop_cnt++; + + if (loop_cnt == niter) + { + /* We are done */ + taskD.regenerate = 0; + STARPU_PTHREAD_COND_SIGNAL(&cond); + STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); + } + else + { + STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); + /* Let's go for another iteration */ + } +} + +int main(int argc, char **argv) +{ +// unsigned i; +// double timing; +// double start; +// double end; + int ret; + + ret = starpu_initialize(NULL, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + /* Implicit data dependencies and regeneratable tasks are not compatible */ + starpu_data_set_default_sequential_consistency_flag(0); + + starpu_malloc((void**)&check_cnt, sizeof(*check_cnt)); + *check_cnt = 0; + + increment_load_opencl(); + + starpu_data_handle_t check_data; + starpu_variable_data_register(&check_data, STARPU_MAIN_RAM, (uintptr_t)check_cnt, sizeof(*check_cnt)); + + starpu_task_init(&taskA); + taskA.cl = &increment_cl; + taskA.regenerate = 1; /* this task will be explicitly resubmitted if needed */ + taskA.use_tag = 1; + taskA.tag_id = TAG_A; + taskA.callback_func = callback_task_A; + taskA.handles[0] = check_data; + + starpu_task_init(&taskB); + taskB.cl = &increment_cl; + taskB.regenerate = 1; + taskB.use_tag = 1; + taskB.tag_id = TAG_B; + taskB.callback_func = callback_task_B; + taskB.handles[0] = check_data; + + starpu_task_init(&taskC); + taskC.cl = &increment_cl; + taskC.regenerate = 1; + taskC.use_tag = 1; + taskC.tag_id = TAG_C; + taskC.callback_func = callback_task_C; + taskC.handles[0] = check_data; + + starpu_task_init(&taskD); + taskD.cl = &increment_cl; + taskD.callback_func = callback_task_D; + taskD.regenerate = 1; + taskD.use_tag = 1; + taskD.tag_id = TAG_D; + taskD.handles[0] = check_data; + + starpu_tag_declare_deps((starpu_tag_t) TAG_B, 1, (starpu_tag_t) TAG_A); + starpu_tag_declare_deps((starpu_tag_t) TAG_C, 1, (starpu_tag_t) TAG_A); + + starpu_tag_declare_deps((starpu_tag_t) TAG_D, 2, (starpu_tag_t) TAG_B, (starpu_tag_t) TAG_C); + + starpu_tag_declare_deps((starpu_tag_t) TAG_A, 1, (starpu_tag_t) TAG_D); + + ret = starpu_task_submit(&taskA); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + ret = starpu_task_submit(&taskB); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + ret = starpu_task_submit(&taskC); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + ret = starpu_task_submit(&taskD); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + /* Break the loop */ + starpu_tag_notify_restart_from_apps((starpu_tag_t) TAG_D); + + starpu_do_schedule(); + /* Wait for the termination of all loops */ + STARPU_PTHREAD_MUTEX_LOCK(&mutex); + if (loop_cnt < niter) + STARPU_PTHREAD_COND_WAIT(&cond, &mutex); + STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); + + starpu_data_acquire(check_data, STARPU_R); + starpu_data_release(check_data); + + STARPU_ASSERT(*check_cnt == (4*loop_cnt)); + + starpu_free_noflag(check_cnt, sizeof(*check_cnt)); + + starpu_data_unregister(check_data); + + starpu_task_wait_for_all(); + starpu_task_clean(&taskA); + starpu_task_clean(&taskB); + starpu_task_clean(&taskC); + starpu_task_clean(&taskD); + + increment_unload_opencl(); + starpu_shutdown(); + + return EXIT_SUCCESS; + +enodev: + fprintf(stderr, "WARNING: No one can execute this task\n"); + /* yes, we do not perform the computation but we did detect that no one + * could perform the kernel, so this is not an error from StarPU */ + starpu_data_unregister(check_data); + increment_unload_opencl(); + starpu_shutdown(); + return STARPU_TEST_SKIPPED; +} + diff --git a/tests/main/subgraph_repeat_tag.c b/tests/main/subgraph_repeat_tag.c new file mode 100644 index 0000000..0061165 --- /dev/null +++ b/tests/main/subgraph_repeat_tag.c @@ -0,0 +1,194 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../variable/increment.h" +#include "../helper.h" + +/* + * Test that one can resubmit a whole task graph repeatedly, using tag dependencies + */ + +#ifdef STARPU_QUICK_CHECK +static unsigned niter = 64; +#else +static unsigned niter = 16384; +#endif + +/* + * + * /-->B--\ + * | | + * -----> A D---\---> + * ^ | | | + * | \-->C--/ | + * | | + * \--------------/ + * + * - {B, C} depend on A + * - D depends on {B, C} + * - A, B, C and D are resubmitted at the end of the loop (or not) + */ + +static struct starpu_task taskA, taskB, taskC, taskD; + +static unsigned loop_cnt = 0; +static unsigned loop_cnt_B = 0; +static unsigned loop_cnt_C = 0; +static unsigned *check_cnt; +static starpu_pthread_cond_t cond = STARPU_PTHREAD_COND_INITIALIZER; +static starpu_pthread_mutex_t mutex = STARPU_PTHREAD_MUTEX_INITIALIZER; + +static void callback_task_B(void *arg) +{ + (void)arg; + loop_cnt_B++; + + if (loop_cnt_B == niter) + { + /* We are done */ + taskB.regenerate = 0; + } +} + +static void callback_task_C(void *arg) +{ + (void)arg; + loop_cnt_C++; + + if (loop_cnt_C == niter) + { + /* We are done */ + taskC.regenerate = 0; + } +} + +static void callback_task_D(void *arg) +{ + (void)arg; + STARPU_PTHREAD_MUTEX_LOCK(&mutex); + loop_cnt++; + + if (loop_cnt == niter) + { + /* We are done */ + taskD.regenerate = 0; + STARPU_PTHREAD_COND_SIGNAL(&cond); + STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); + } + else + { + int ret; + STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); + /* Let's go for another iteration */ + ret = starpu_task_submit(&taskA); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } +} + +int main(int argc, char **argv) +{ +// unsigned i; +// double timing; +// double start; +// double end; + int ret; + + ret = starpu_initialize(NULL, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + /* Implicit data dependencies and regeneratable tasks are not compatible */ + starpu_data_set_default_sequential_consistency_flag(0); + + starpu_malloc((void**)&check_cnt, sizeof(*check_cnt)); + *check_cnt = 0; + + increment_load_opencl(); + + starpu_data_handle_t check_data; + starpu_variable_data_register(&check_data, STARPU_MAIN_RAM, (uintptr_t)check_cnt, sizeof(*check_cnt)); + + starpu_task_init(&taskA); + taskA.cl = &increment_cl; + taskA.regenerate = 0; /* this task will be explicitly resubmitted if needed */ + taskA.handles[0] = check_data; + + starpu_task_init(&taskB); + taskB.cl = &increment_cl; + taskB.regenerate = 1; + taskB.callback_func = callback_task_B; + taskB.handles[0] = check_data; + + starpu_task_init(&taskC); + taskC.cl = &increment_cl; + taskC.regenerate = 1; + taskC.callback_func = callback_task_C; + taskC.handles[0] = check_data; + + starpu_task_init(&taskD); + taskD.cl = &increment_cl; + taskD.callback_func = callback_task_D; + taskD.regenerate = 1; + taskD.handles[0] = check_data; + + starpu_task_declare_deps(&taskB, 1, &taskA); + starpu_task_declare_deps(&taskC, 1, &taskA); + + starpu_task_declare_deps(&taskD, 2, &taskB, &taskC); + + ret = starpu_task_submit(&taskA); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + ret = starpu_task_submit(&taskB); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + ret = starpu_task_submit(&taskC); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + ret = starpu_task_submit(&taskD); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + starpu_do_schedule(); + /* Wait for the termination of all loops */ + STARPU_PTHREAD_MUTEX_LOCK(&mutex); + if (loop_cnt < niter) + STARPU_PTHREAD_COND_WAIT(&cond, &mutex); + STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); + + starpu_data_acquire(check_data, STARPU_R); + starpu_data_release(check_data); + + STARPU_ASSERT(*check_cnt == (4*loop_cnt)); + + starpu_free_noflag(check_cnt, sizeof(*check_cnt)); + + starpu_data_unregister(check_data); + + starpu_task_wait_for_all(); + starpu_task_clean(&taskA); + starpu_task_clean(&taskB); + starpu_task_clean(&taskC); + starpu_task_clean(&taskD); + + increment_unload_opencl(); + starpu_shutdown(); + + return EXIT_SUCCESS; + +enodev: + fprintf(stderr, "WARNING: No one can execute this task\n"); + /* yes, we do not perform the computation but we did detect that no one + * could perform the kernel, so this is not an error from StarPU */ + starpu_data_unregister(check_data); + increment_unload_opencl(); + starpu_shutdown(); + return STARPU_TEST_SKIPPED; +} + diff --git a/tests/main/submit.c b/tests/main/submit.c new file mode 100644 index 0000000..e852210 --- /dev/null +++ b/tests/main/submit.c @@ -0,0 +1,118 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include "../helper.h" + +/* + * Test task submission + */ + +static int i = 0, j; + +void dummy_func(void *descr[], void *arg) +{ + (void)descr; + (void)arg; + int old_i = STARPU_ATOMIC_ADD(&i, 1); + FPRINTF(stdout, "called third task, i = %d\n", old_i+1); +} + +static struct starpu_codelet dummy_codelet = +{ + .cpu_funcs = {dummy_func}, + .cuda_funcs = {dummy_func}, + .opencl_funcs = {dummy_func}, + .model = NULL, + .nbuffers = 0 +}; + +static void callback(void *arg) +{ + (void)arg; + struct starpu_task *task = starpu_task_create(); + task->cl = &dummy_codelet; + task->detach = 1; + if (starpu_task_submit(task) == -ENODEV) + exit(STARPU_TEST_SKIPPED); + FPRINTF(stdout, "submitted third task, i = %d\n", i); +} + +static struct starpu_codelet callback_submit_codelet = +{ + .cpu_funcs = {dummy_func}, + .cuda_funcs = {dummy_func}, + .opencl_funcs = {dummy_func}, + .model = NULL, + .nbuffers = 0 +}; + +static void task_submit_func(void *descr[], void *arg) +{ + (void)descr; + (void)arg; + struct starpu_task *task = starpu_task_create(); + task->cl = &callback_submit_codelet; + task->callback_func = callback; + task->detach = 1; + if (starpu_task_submit(task) == -ENODEV) + exit(STARPU_TEST_SKIPPED); + int old_i = STARPU_ATOMIC_ADD(&i, 1); + FPRINTF(stdout, "submitted second task, i = %d\n", old_i + 1); +} + +static struct starpu_codelet task_submit_codelet = +{ + .cpu_funcs = {task_submit_func}, + .cuda_funcs = {task_submit_func}, + .opencl_funcs = {task_submit_func}, + .model = NULL, + .nbuffers = 0 +}; + +int main(void) +{ + int ret; + + ret = starpu_init(NULL); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + struct starpu_task *task = starpu_task_create(); + + task->cl = &task_submit_codelet; + task->detach = 1; + + ret = starpu_task_submit(task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + starpu_task_wait_for_all(); + j = i; + + starpu_shutdown(); + + return j == 3 ? EXIT_SUCCESS : EXIT_FAILURE; + +enodev: + fprintf(stderr, "WARNING: No one can execute this task\n"); + /* yes, we do not perform the computation but we did detect that no one + * could perform the kernel, so this is not an error from StarPU */ + starpu_shutdown(); + return STARPU_TEST_SKIPPED; +} diff --git a/tests/main/tag_get_task.c b/tests/main/tag_get_task.c new file mode 100644 index 0000000..b5c205a --- /dev/null +++ b/tests/main/tag_get_task.c @@ -0,0 +1,69 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include + +#include +#include "../helper.h" + +/* + * Test that starpu_tag_get_task returns the proper task + */ + +static void callback(void *tag) +{ + fflush(stderr); + FPRINTF(stderr, "Callback for tag %p\n", tag); + fflush(stderr); +} + +int main(int argc, char **argv) +{ + struct starpu_task *task; + starpu_tag_t tag = 0x42; + int ret; + + ret = starpu_initialize(NULL, &argc, &argv); + if (ret == -ENODEV) + return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + /* create a new dummy task with a tag */ + task = starpu_task_create(); + task->callback_func = callback; + task->callback_arg = (void *)tag; + task->cl = &starpu_codelet_nop; + task->cl_arg = NULL; + task->destroy = 0; /* tell StarPU to not destroy the task */ + task->use_tag = 1; + task->tag_id = tag; + + /* execute the task */ + ret = starpu_task_submit(task); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + ret = starpu_task_wait_for_all(); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); + + /* check that starpu_tag_get_task() returns the correct task */ + ret = (starpu_tag_get_task(task->tag_id) != task); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_tag_get_task"); + + starpu_task_destroy(task); + starpu_shutdown(); + + return EXIT_SUCCESS; +} diff --git a/tests/main/tag_task_data_deps.c b/tests/main/tag_task_data_deps.c new file mode 100644 index 0000000..6a6d8e8 --- /dev/null +++ b/tests/main/tag_task_data_deps.c @@ -0,0 +1,268 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include + +#include +#include "../helper.h" + +/* + * Test combinations of various tag/task/data dependencies + */ + +void dummy_func(void *descr[], void *arg) +{ + unsigned duration = (uintptr_t) arg; + if (duration) + starpu_usleep(duration); +} + +static struct starpu_codelet dummy_Rcodelet = +{ + .cpu_funcs = {dummy_func}, + .model = NULL, + .nbuffers = 1, + .modes = {STARPU_R} +}; + +static struct starpu_codelet dummy_Wcodelet = +{ + .cpu_funcs = {dummy_func}, + .model = NULL, + .nbuffers = 1, + .modes = {STARPU_W} +}; + +static struct starpu_codelet dummy_codelet = +{ + .cpu_funcs = {dummy_func}, + .model = NULL, + .nbuffers = 0, +}; + +static struct starpu_task *create_dummy_task(int write, int data, unsigned duration, starpu_data_handle_t handle) +{ + struct starpu_task *task = starpu_task_create(); + + if (data) + { + if (write) + task->cl = &dummy_Wcodelet; + else + task->cl = &dummy_Rcodelet; + task->handles[0] = handle; + } + else + task->cl = &dummy_codelet; + task->cl_arg = (void*) (uintptr_t) duration; + + return task; +} + +int main(void) +{ + int ret; + /* We have 17 toggles to try below, thus 2^17 possibilities */ + unsigned loop, nloops = 128*1024; + unsigned duration = 100; + + starpu_data_handle_t handle1, handle2; + +#ifdef STARPU_QUICK_CHECK + return STARPU_TEST_SKIPPED; +#endif + struct starpu_conf conf; + + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + conf.ncpus = -1; + conf.nmpi_ms = -1; + conf.ntcpip_ms = -1; + + ret = starpu_init(&conf); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + starpu_void_data_register(&handle1); + starpu_void_data_register(&handle2); + starpu_data_set_sequential_consistency_flag(handle2, 0); + +#if 1 + for (loop = 0; loop < nloops; loop++) + { +#else + loop = 0x258; + do + { +#endif + int durationA = (loop & 1) ? duration:0; + int durationB = (loop & 2) ? duration:0; + int durationC = (loop & 4) ? duration:0; + int writeA, dataA; + int writeB, dataB; + int writeC, dataC; + starpu_data_handle_t handleA, handleB, handleC; + struct starpu_task *taskA, *taskB, *taskC; + + handleA = handle1; + writeA = !!(loop & 8); + dataA = !!(loop & 16); + if (!dataA && writeA) + { + handleA = handle2; + dataA = 1; + } + handleB = handle1; + writeB = !!(loop & 32); + dataB = !!(loop & 64); + if (!dataB && writeB) + { + handleB = handle2; + dataB = 1; + } + handleC = handle1; + writeC = !!(loop & 128); + dataC = !!(loop & 256); + if (!dataC && writeC) + { + handleC = handle2; + dataC = 1; + } + + FPRINTF(stderr,"\r%u", loop); +#if 0 + if (durationA) + FPRINTF(stderr, " longA "); + if (durationB) + FPRINTF(stderr, " longB "); + if (durationC) + FPRINTF(stderr, " longC "); + if (dataA) + { + if (writeA) + FPRINTF(stderr, " WA"); + else + FPRINTF(stderr, " RA"); + } + else if (writeA) + FPRINTF(stderr, " wA"); + if (dataB) + { + if (writeB) + FPRINTF(stderr, " WB"); + else + FPRINTF(stderr, " RB"); + } + else if (writeB) + FPRINTF(stderr, " wB"); + if (dataC) + { + if (writeC) + FPRINTF(stderr, " WC"); + else + FPRINTF(stderr, " RC"); + } + else if (writeC) + FPRINTF(stderr, " wC"); + if (loop & 512) + FPRINTF(stderr, " Tag AB"); + if (loop & 1024) + FPRINTF(stderr, " Tag AC"); + if (loop & 2048) + FPRINTF(stderr, " Tag BC"); + if (loop & 4096) + FPRINTF(stderr, " Task AB"); + if (loop & 8192) + FPRINTF(stderr, " Task AC"); + if (loop & 16384) + FPRINTF(stderr, " Task BC"); + if (loop & 32768) + FPRINTF(stderr, " delayB"); + if (loop & 65536) + FPRINTF(stderr, " delayC"); + FPRINTF(stderr," "); +#endif + fflush(stderr); + + taskA = create_dummy_task(writeA, dataA, durationA, handleA); + taskB = create_dummy_task(writeB, dataB, durationB, handleB); + taskC = create_dummy_task(writeC, dataC, durationC, handleC); + + taskA->tag_id = 3*loop; + taskA->use_tag = 1; + taskB->tag_id = 3*loop+1; + taskB->use_tag = 1; + taskC->tag_id = 3*loop+2; + taskC->use_tag = 1; + + if (loop & 512) + starpu_tag_declare_deps(taskB->tag_id, 1, taskA->tag_id); + if (loop & 1024) + starpu_tag_declare_deps(taskC->tag_id, 1, taskA->tag_id); + if (loop & 2048) + starpu_tag_declare_deps(taskC->tag_id, 1, taskB->tag_id); + + if (loop & 4096) + starpu_task_declare_deps_array(taskB, 1, &taskA); + if (loop & 8192) + starpu_task_declare_deps_array(taskC, 1, &taskA); + if (loop & 16384) + starpu_task_declare_deps_array(taskC, 1, &taskB); + + taskA->detach = 0; + taskB->detach = 0; + taskC->detach = 0; + + ret = starpu_task_submit(taskA); + if (ret == -ENODEV) + goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + if (loop & 32768) + starpu_usleep(duration); + + ret = starpu_task_submit(taskB); + if (ret == -ENODEV) + goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + if (loop & 65536) + starpu_usleep(duration); + + ret = starpu_task_submit(taskC); + if (ret == -ENODEV) + goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + ret = starpu_task_wait(taskA); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait"); + + ret = starpu_task_wait(taskB); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait"); + + ret = starpu_task_wait(taskC); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait"); + } + while(0); + +enodev: + starpu_data_unregister(handle1); + starpu_data_unregister(handle2); + starpu_shutdown(); + + return ret == -ENODEV ? STARPU_TEST_SKIPPED : EXIT_SUCCESS; +} diff --git a/tests/main/tag_wait_api.c b/tests/main/tag_wait_api.c new file mode 100644 index 0000000..aebfeec --- /dev/null +++ b/tests/main/tag_wait_api.c @@ -0,0 +1,137 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include + +#include +#include "../helper.h" + +/* + * Test tag dependencies and waiting for a tag + */ + +static void callback(void *tag) +{ + fflush(stderr); + FPRINTF(stderr, "Callback for tag %p\n", tag); + fflush(stderr); +} + +static struct starpu_task *create_dummy_task(starpu_tag_t tag) +{ + struct starpu_task *task = starpu_task_create(); + + task->cl = &starpu_codelet_nop; + task->cl_arg = NULL; + task->callback_func = callback; + task->callback_arg = (void *)(uintptr_t)tag; + + task->use_tag = 1; + task->tag_id = tag; + + return task; +} + +#define tagA ((starpu_tag_t)0x42) +#define tagB ((starpu_tag_t)0x12300) + +#define tagC ((starpu_tag_t)0x32) +#define tagD ((starpu_tag_t)0x52) +#define tagE ((starpu_tag_t)0x19999) +#define tagF ((starpu_tag_t)0x2312) +#define tagG ((starpu_tag_t)0x1985) + +#define tagH ((starpu_tag_t)0x32234) +#define tagI ((starpu_tag_t)0x5234) +#define tagJ ((starpu_tag_t)0x199) +#define tagK ((starpu_tag_t)0x231234) +#define tagL ((starpu_tag_t)0x2345) + +int main(int argc, char **argv) +{ + int ret; + + ret = starpu_initialize(NULL, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + FPRINTF(stderr, "{ A } -> { B }\n"); + fflush(stderr); + + struct starpu_task *taskA, *taskB; + + taskA = create_dummy_task(tagA); + taskB = create_dummy_task(tagB); + + /* B depends on A */ + starpu_tag_declare_deps(tagB, 1, tagA); + + ret = starpu_task_submit(taskB); STARPU_CHECK_RETURN_VALUE(ret, "starpu_submit"); + ret = starpu_task_submit(taskA); STARPU_CHECK_RETURN_VALUE(ret, "starpu_submit"); + + ret = starpu_tag_wait(tagB); STARPU_CHECK_RETURN_VALUE(ret, "starpu_wait"); + + FPRINTF(stderr, "{ C, D, E, F } -> { G }\n"); + + struct starpu_task *taskC, *taskD, *taskE, *taskF, *taskG; + + taskC = create_dummy_task(tagC); + taskD = create_dummy_task(tagD); + taskE = create_dummy_task(tagE); + taskF = create_dummy_task(tagF); + taskG = create_dummy_task(tagG); + + /* NB: we could have used starpu_tag_declare_deps_array instead */ + starpu_tag_declare_deps(tagG, 4, tagC, tagD, tagE, tagF); + + ret = starpu_task_submit(taskC); STARPU_CHECK_RETURN_VALUE(ret, "starpu_submit"); + ret = starpu_task_submit(taskD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_submit"); + ret = starpu_task_submit(taskG); STARPU_CHECK_RETURN_VALUE(ret, "starpu_submit"); + ret = starpu_task_submit(taskE); STARPU_CHECK_RETURN_VALUE(ret, "starpu_submit"); + ret = starpu_task_submit(taskF); STARPU_CHECK_RETURN_VALUE(ret, "starpu_submit"); + + ret = starpu_tag_wait(tagG); STARPU_CHECK_RETURN_VALUE(ret, "starpu_tag_wait"); + + FPRINTF(stderr, "{ H, I } -> { J, K, L }\n"); + + struct starpu_task *taskH, *taskI, *taskJ, *taskK, *taskL; + + taskH = create_dummy_task(tagH); + taskI = create_dummy_task(tagI); + taskJ = create_dummy_task(tagJ); + taskK = create_dummy_task(tagK); + taskL = create_dummy_task(tagL); + + starpu_tag_declare_deps(tagJ, 2, tagH, tagI); + starpu_tag_declare_deps(tagK, 2, tagH, tagI); + starpu_tag_declare_deps(tagL, 2, tagH, tagI); + + starpu_tag_t tagJKL[3] = {tagJ, tagK, tagL}; + + ret = starpu_task_submit(taskH); STARPU_CHECK_RETURN_VALUE(ret, "starpu_submit"); + ret = starpu_task_submit(taskI); STARPU_CHECK_RETURN_VALUE(ret, "starpu_submit"); + ret = starpu_task_submit(taskJ); STARPU_CHECK_RETURN_VALUE(ret, "starpu_submit"); + ret = starpu_task_submit(taskK); STARPU_CHECK_RETURN_VALUE(ret, "starpu_submit"); + ret = starpu_task_submit(taskL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_submit"); + + ret = starpu_tag_wait_array(3, tagJKL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_tag_wait_array"); + + starpu_shutdown(); + + return EXIT_SUCCESS; +} diff --git a/tests/main/task_end_dep.c b/tests/main/task_end_dep.c new file mode 100644 index 0000000..d207d32 --- /dev/null +++ b/tests/main/task_end_dep.c @@ -0,0 +1,131 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2018-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* This checks that adding an end dependency for an already-terminated task + * works */ + +#include + +#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) + +#define INIT 12 + +void cpu_codelet2(void *descr[], void *args) +{ + int *val = (int *)STARPU_VARIABLE_GET_PTR(descr[0]); + (void)args; + STARPU_ASSERT(*val == INIT); + starpu_sleep(0.1); + STARPU_ASSERT(*val == INIT); + *val *= 2; +} + +struct starpu_codelet cl2 = +{ + .cpu_funcs = {cpu_codelet2}, + .cpu_funcs_name = {"cpu_codelet2"}, + .nbuffers = 1, + .modes = {STARPU_RW}, + .name = "codelet2" +}; + +void cpu_codelet(void *descr[], void *args) +{ + (void)args; + int *val = (int *)STARPU_VARIABLE_GET_PTR(descr[0]); + + STARPU_ASSERT(*val == 2*INIT); + starpu_sleep(0.1); + STARPU_ASSERT(*val == 2*INIT); + *val *= 2; +} + +struct starpu_codelet cl = +{ + .cpu_funcs = {cpu_codelet}, + .cpu_funcs_name = {"cpu_codelet"}, + .nbuffers = 1, + .modes = {STARPU_RW}, + .name = "codelet" +}; + +int main(void) +{ + int value=INIT; + int ret; + starpu_data_handle_t value_handle; + struct starpu_conf conf; + struct starpu_task *task, *task2; + + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + conf.ncpus = -1; + conf.nmpi_ms = 0; + conf.ntcpip_ms = 0; + + ret = starpu_init(&conf); + if (STARPU_UNLIKELY(ret == -ENODEV)) + { + return 77; + } + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + if (starpu_cpu_worker_get_count() < 1 && starpu_mpi_ms_worker_get_count() < 1) + { + FPRINTF(stderr, "This application requires at least 1 cpu worker\n"); + starpu_shutdown(); + return 77; + } + + starpu_variable_data_register(&value_handle, STARPU_MAIN_RAM, (uintptr_t)&value, sizeof(value)); + + task = starpu_task_build(&cl, + STARPU_RW, value_handle, + 0); + STARPU_ASSERT(task); + task->detach = 0; + + task2 = starpu_task_build(&cl2, + STARPU_RW, value_handle, + 0); + STARPU_ASSERT(task2); + task2->detach = 0; + task2->destroy = 0; + + ret = starpu_task_submit(task2); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + ret = starpu_task_wait(task2); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait"); + + starpu_task_declare_end_deps(task, 1, task2); + starpu_task_destroy(task2); + + ret = starpu_task_submit(task); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + ret = starpu_task_wait(task); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait"); + + starpu_data_unregister(value_handle); + + STARPU_ASSERT(value == 2*2*INIT); + + starpu_shutdown(); + + FPRINTF(stderr, "Value = %d\n", value); + + return ret; +} diff --git a/tests/main/task_wait_api.c b/tests/main/task_wait_api.c new file mode 100644 index 0000000..d36015e --- /dev/null +++ b/tests/main/task_wait_api.c @@ -0,0 +1,131 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include + +#include +#include "../helper.h" + +/* + * Test task dependencies and waiting for a task + */ + +static struct starpu_task *create_dummy_task(void) +{ + struct starpu_task *task = starpu_task_create(); + + task->cl = &starpu_codelet_nop; + task->cl_arg = NULL; + task->detach = 0; + + return task; +} + +int main(int argc, char **argv) +{ + int ret; + + ret = starpu_initialize(NULL, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + FPRINTF(stderr, "{ A } -> { B }\n"); + fflush(stderr); + + struct starpu_task *taskA, *taskB; + + taskA = create_dummy_task(); + taskB = create_dummy_task(); + + /* B depends on A */ + starpu_task_declare_deps_array(taskB, 1, &taskA); + + ret = starpu_task_submit(taskB); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + ret = starpu_task_submit(taskA); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + ret = starpu_task_wait(taskB); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait"); + + FPRINTF(stderr, "{ C, D, E, F } -> { G }\n"); + + struct starpu_task *taskC, *taskD, *taskE, *taskF, *taskG; + + taskC = create_dummy_task(); + taskD = create_dummy_task(); + taskE = create_dummy_task(); + taskF = create_dummy_task(); + taskG = create_dummy_task(); + + starpu_task_declare_deps(taskG, 4, taskC, taskD, taskE, taskF); + + ret = starpu_task_submit(taskC); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + ret = starpu_task_submit(taskD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + ret = starpu_task_submit(taskG); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + ret = starpu_task_submit(taskE); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + ret = starpu_task_submit(taskF); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + ret = starpu_task_wait(taskG); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait"); + + FPRINTF(stderr, "{ H, I } -> { J, K, L }\n"); + + struct starpu_task *taskH, *taskI, *taskJ, *taskK, *taskL; + + taskH = create_dummy_task(); + taskI = create_dummy_task(); + taskJ = create_dummy_task(); + taskK = create_dummy_task(); + taskL = create_dummy_task(); + + struct starpu_task *tasksHI[2] = {taskH, taskI}; + + starpu_task_declare_deps_array(taskJ, 2, tasksHI); + starpu_task_declare_deps_array(taskK, 2, tasksHI); + starpu_task_declare_deps_array(taskL, 2, tasksHI); + + ret = starpu_task_submit(taskH); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + ret = starpu_task_submit(taskI); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + ret = starpu_task_submit(taskJ); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + ret = starpu_task_submit(taskK); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + ret = starpu_task_submit(taskL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + struct starpu_task *tasksJKL[3] = {taskJ, taskK, taskL}; + + ret = starpu_task_wait_array(tasksJKL, 3); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_array"); + + starpu_task_wait_for_all(); + + /* Destroy all the tasks that were not detached */ + starpu_task_destroy(taskA); + starpu_task_destroy(taskC); + starpu_task_destroy(taskD); + starpu_task_destroy(taskE); + starpu_task_destroy(taskF); + starpu_task_destroy(taskH); + starpu_task_destroy(taskI); + + starpu_shutdown(); + + return EXIT_SUCCESS; + +enodev: + fprintf(stderr, "WARNING: No one can execute this task\n"); + /* yes, we do not perform the computation but we did detect that no one + * could perform the kernel, so this is not an error from StarPU */ + starpu_shutdown(); + return STARPU_TEST_SKIPPED; +} diff --git a/tests/main/wait_all_regenerable_tasks.c b/tests/main/wait_all_regenerable_tasks.c new file mode 100644 index 0000000..c1b753c --- /dev/null +++ b/tests/main/wait_all_regenerable_tasks.c @@ -0,0 +1,129 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include "../helper.h" + +/* + * Test that starpu_task_wait_for_all can work with a regenerating task + */ + +#ifdef STARPU_QUICK_CHECK +static unsigned ntasks = 64; +#else +static unsigned ntasks = 1024; +#endif + +static void callback(void *arg) +{ + struct starpu_task *task = starpu_task_get_current(); + + unsigned *cnt = (unsigned *) arg; + + (*cnt)++; + + if (*cnt == ntasks) + task->regenerate = 0; +} + +static void parse_args(int argc, char **argv) +{ + int c; + while ((c = getopt(argc, argv, "i:")) != -1) + switch(c) + { + case 'i': + ntasks = atoi(optarg); + break; + } +} + +#define K 128 + +int main(int argc, char **argv) +{ + int ret; + double timing; + double start; + double end; + + parse_args(argc, argv); + + ret = starpu_initialize(NULL, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + struct starpu_task task[K]; + unsigned cnt[K]; + + int i; + for (i = 0; i < K; i++) + { + starpu_task_init(&task[i]); + cnt[i] = 0; + + task[i].cl = &starpu_codelet_nop; + task[i].regenerate = 1; + task[i].detach = 1; + + task[i].callback_func = callback; + task[i].callback_arg = &cnt[i]; + } + + FPRINTF(stderr, "#tasks : %d x %u tasks\n", K, ntasks); + + start = starpu_timing_now(); + + for (i = 0; i < K; i++) + { + ret = starpu_task_submit(&task[i]); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + ret = starpu_task_wait_for_all(); + for (i = 0; i < K; i++) + starpu_task_clean(&task[i]); + + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); + + end = starpu_timing_now(); + + /* Check that all the tasks have been properly executed */ + unsigned total_cnt = 0; + for (i = 0; i < K; i++) + total_cnt += cnt[i]; + + STARPU_ASSERT(total_cnt == K*ntasks); + + timing = end - start; + + FPRINTF(stderr, "Total: %f secs\n", timing/1000000); + FPRINTF(stderr, "Per task: %f usecs\n", timing/(K*ntasks)); + + starpu_shutdown(); + + return EXIT_SUCCESS; + +enodev: + fprintf(stderr, "WARNING: No one can execute this task\n"); + /* yes, we do not perform the computation but we did detect that no one + * could perform the kernel, so this is not an error from StarPU */ + starpu_shutdown(); + return STARPU_TEST_SKIPPED; +} diff --git a/tests/maxfpga/LMemLoopbackCpuCode.c b/tests/maxfpga/LMemLoopbackCpuCode.c new file mode 100644 index 0000000..d2938e2 --- /dev/null +++ b/tests/maxfpga/LMemLoopbackCpuCode.c @@ -0,0 +1,62 @@ +/** + * Document: MaxCompiler Tutorial (maxcompiler-tutorial.pdf) + * Chapter: 13 Example: 2 Name: LMem Loopback + * MaxFile name: LMemLoopback + * Summary: + * Adds two LMem input streams and writes the result to LMem. + */ + +#include +#include +#include + +#include "LMemLoopback.h" +#include + +int check(int size, int32_t *outData, int32_t *inA, int32_t *inB) +{ + int status = 0; + for (int i = 0; i < size; i++) + { + if (outData[i] != inA[i] + inB[i]) + { + fprintf(stderr, "[%d] Verification error, out: %u != expected: %u\n", + i, outData[i], inA[i] + inB[i]); + status = 1; + } + } + return status; +} + +int main() +{ + const int size = 384; + int sizeBytes = size * sizeof(int32_t); + int32_t *inA = (int32_t*) malloc(sizeBytes); + int32_t *inB = (int32_t*) malloc(sizeBytes); + + for (int i = 0; i < size; i++) + { + inA[i] = i; + inB[i] = size - i; + } + + printf("Loading DFE memory.\n"); + LMemLoopback_writeLMem(size, 0, inA); + LMemLoopback_writeLMem(size, size, inB); + + printf("Running DFE.\n"); + LMemLoopback(size); + + printf("Reading DFE memory.\n"); + int32_t *outData = (int32_t*) malloc(sizeBytes); + LMemLoopback_readLMem(size, 2 * size, outData); + + int status = check(size, outData, inA, inB); + if (status) + printf("Test failed.\n"); + else + printf("Test passed OK!\n"); + + return status; +} diff --git a/tests/maxfpga/MyTasksManager.maxj b/tests/maxfpga/MyTasksManager.maxj new file mode 100644 index 0000000..a174490 --- /dev/null +++ b/tests/maxfpga/MyTasksManager.maxj @@ -0,0 +1,152 @@ +package maxfpga; + +import com.maxeler.maxcompiler.v2.build.EngineParameters; +//import com.maxeler.maxcompiler.v2.kernelcompiler.Kernel; +import com.maxeler.maxcompiler.v2.managers.custom.DFELink; +import com.maxeler.maxcompiler.v2.managers.custom.blocks.KernelBlock; +import com.maxeler.maxcompiler.v2.managers.custom.stdlib.LMemCommandGroup; +import com.maxeler.maxcompiler.v2.managers.custom.stdlib.LMemInterface; +import com.maxeler.maxcompiler.v2.managers.engine_interfaces.CPUTypes; +import com.maxeler.maxcompiler.v2.managers.engine_interfaces.EngineInterface; +import com.maxeler.maxcompiler.v2.managers.engine_interfaces.EngineInterface.Direction; +import com.maxeler.maxcompiler.v2.managers.engine_interfaces.InterfaceParam; +import com.maxeler.platform.max5.manager.MAX5CManager; + +public class MyTasksManager extends MAX5CManager +{ + private static final CPUTypes TYPE = CPUTypes.INT32; + + public MyTasksManager(EngineParameters params) + { + super(params); + KernelBlock kernel1 = addKernel(new Task1(makeKernelParameters("Task1"))); + KernelBlock kernel2 = addKernel(new Task2(makeKernelParameters("Task2"))); + KernelBlock kernel3 = addKernel(new Task3(makeKernelParameters("Task3"))); + + LMemInterface iface = addLMemInterface(); + + + kernel1.getInput("inAT1") <== addStreamFromCPU("inAT1"); + + kernel1.getInput("inBT1") <== addStreamFromCPU("inBT1"); + + DFELink outCT1 = iface.addStreamToLMem("outCT1", LMemCommandGroup.MemoryAccessPattern.LINEAR_1D); + outCT1 <== kernel1.getOutput("outCT1"); + + + DFELink inAT2 = iface.addStreamFromLMem("inAT2", LMemCommandGroup.MemoryAccessPattern.LINEAR_1D); + kernel2.getInput("inAT2") <== inAT2; + + DFELink inBT2 = iface.addStreamFromLMem("inBT2", LMemCommandGroup.MemoryAccessPattern.LINEAR_1D); + kernel2.getInput("inBT2") <== inBT2; + + DFELink outCT2 = iface.addStreamToLMem("outCT2", LMemCommandGroup.MemoryAccessPattern.LINEAR_1D); + outCT2 <== kernel2.getOutput("outCT2"); + + + DFELink inAT3 = iface.addStreamFromLMem("inAT3", LMemCommandGroup.MemoryAccessPattern.LINEAR_1D); + kernel3.getInput("inAT3") <== inAT3; + + DFELink inBT3 = iface.addStreamFromLMem("inBT3", LMemCommandGroup.MemoryAccessPattern.LINEAR_1D); + kernel3.getInput("inBT3") <== inBT3; + + addStreamToCPU("outCT3") <== kernel3.getOutput("outCT3"); + + + createSlicInterface(interfaceT1("interfaceT1")); + createSlicInterface(interfaceT2("interfaceT2")); + createSlicInterface(interfaceT3("interfaceT3")); + } + + public static void main(String[] args) + { + MyTasksManager manager = new MyTasksManager(new EngineParameters(args)); + + manager.build(); + } + + private static EngineInterface interfaceT1(String name) + { + EngineInterface ei = new EngineInterface(name); + + InterfaceParam ptrC1; + InterfaceParam N; + InterfaceParam sizeInBytes; + + N = ei.addParam("N", TYPE); + sizeInBytes = N * TYPE.sizeInBytes(); + ptrC1 = ei.addParam("ptrC1", TYPE); + + ei.setTicks("Task1", N); + + ei.setStream("inAT1", TYPE, sizeInBytes); + ei.setStream("inBT1", TYPE, sizeInBytes); + ei.setLMemLinear("outCT1", ptrC1, sizeInBytes); + + ei.ignoreAll(Direction.IN_OUT); + + ei.ignoreKernel("Task2"); + ei.ignoreKernel("Task3"); + + return ei; + } + + + private static EngineInterface interfaceT2(String name) + { + EngineInterface ei = new EngineInterface(name); + + InterfaceParam ptrA2; + InterfaceParam ptrB2; + InterfaceParam ptrC2; + InterfaceParam N; + InterfaceParam sizeInBytes; + + N = ei.addParam("N", TYPE); + sizeInBytes = N * TYPE.sizeInBytes(); + ptrA2 = ei.addParam("ptrA2", TYPE); + ptrB2 = ei.addParam("ptrB2", TYPE); + ptrC2 = ei.addParam("ptrC2", TYPE); + + ei.setTicks("Task2", N); + + ei.setLMemLinear("inAT2", ptrA2, sizeInBytes); + ei.setLMemLinear("inBT2", ptrB2, sizeInBytes); + ei.setLMemLinear("outCT2", ptrC2, sizeInBytes); + + ei.ignoreAll(Direction.IN_OUT); + + ei.ignoreKernel("Task1"); + ei.ignoreKernel("Task3"); + + return ei; + } + + private static EngineInterface interfaceT3(String name) + { + EngineInterface ei = new EngineInterface(name); + + InterfaceParam ptrA3; + InterfaceParam ptrB3; + InterfaceParam N; + InterfaceParam sizeInBytes; + + N = ei.addParam("N", TYPE); + sizeInBytes = N * TYPE.sizeInBytes(); + ptrA3 = ei.addParam("ptrA3", TYPE); + ptrB3 = ei.addParam("ptrB3", TYPE); + + ei.setTicks("Task3", N); + + ei.setLMemLinear("inAT3", ptrA3, sizeInBytes); + ei.setLMemLinear("inBT3", ptrB3, sizeInBytes); + ei.setStream("outCT3", TYPE, sizeInBytes); + + ei.ignoreAll(Direction.IN_OUT); + + ei.ignoreKernel("Task1"); + ei.ignoreKernel("Task2"); + + return ei; + } +} diff --git a/tests/maxfpga/MyTasksMuxManager.maxj b/tests/maxfpga/MyTasksMuxManager.maxj new file mode 100644 index 0000000..d368e07 --- /dev/null +++ b/tests/maxfpga/MyTasksMuxManager.maxj @@ -0,0 +1,96 @@ +package maxfpga; + +import com.maxeler.maxcompiler.v2.build.EngineParameters; +//import com.maxeler.maxcompiler.v2.kernelcompiler.Kernel; +import com.maxeler.maxcompiler.v2.managers.custom.DFELink; +import com.maxeler.maxcompiler.v2.managers.custom.blocks.KernelBlock; +import com.maxeler.maxcompiler.v2.managers.custom.blocks.Mux; +import com.maxeler.maxcompiler.v2.managers.custom.blocks.Demux; +import com.maxeler.maxcompiler.v2.managers.custom.stdlib.LMemCommandGroup; +import com.maxeler.maxcompiler.v2.managers.custom.stdlib.LMemInterface; +import com.maxeler.platform.max5.manager.MAX5CManager; + +public class MyTasksMuxManager extends MAX5CManager +{ + public MyTasksMuxManager(EngineParameters params) + { + super(params); + KernelBlock kernel1 = addKernel(new Task1(makeKernelParameters("Task1"))); + KernelBlock kernel2 = addKernel(new Task2(makeKernelParameters("Task2"))); + KernelBlock kernel3 = addKernel(new Task3(makeKernelParameters("Task3"))); + + LMemInterface iface = addLMemInterface(); + + + + Mux joinInAT1 = mux("joinInAT1"); + Mux joinInBT1 = mux("joinInBT1"); + + joinInAT1.addInput("inCPU") <== addStreamFromCPU("inAT1CPU"); + joinInAT1.addInput("inLMem") <== iface.addStreamFromLMem("inAT1LMem", LMemCommandGroup.MemoryAccessPattern.LINEAR_1D); + + joinInBT1.addInput("inCPU") <== addStreamFromCPU("inBT1CPU"); + joinInBT1.addInput("inLMem") <== iface.addStreamFromLMem("inBT1LMem", LMemCommandGroup.MemoryAccessPattern.LINEAR_1D); + + kernel1.getInput("inAT1") <== joinInAT1.getOutput(); + kernel1.getInput("inBT1") <== joinInBT1.getOutput(); + + Demux forkOutCT1 = demux("forkOutCT1"); + DFELink outCT1CPU = forkOutCT1.addOutput("outCPU"); + DFELink outCT1LMem = forkOutCT1.addOutput("outLMem"); + + forkOutCT1.getInput() <== kernel1.getOutput("outCT1"); + addStreamToCPU("outCT1CPU") <== outCT1CPU; + iface.addStreamToLMem("outCT1LMem", LMemCommandGroup.MemoryAccessPattern.LINEAR_1D) <== outCT1LMem; + + + Mux joinInAT2 = mux("joinInAT2"); + Mux joinInBT2 = mux("joinInBT2"); + + joinInAT2.addInput("inCPU") <== addStreamFromCPU("inAT2CPU"); + joinInAT2.addInput("inLMem") <== iface.addStreamFromLMem("inAT2LMem", LMemCommandGroup.MemoryAccessPattern.LINEAR_1D); + + joinInBT2.addInput("inCPU") <== addStreamFromCPU("inBT2CPU"); + joinInBT2.addInput("inLMem") <== iface.addStreamFromLMem("inBT2LMem", LMemCommandGroup.MemoryAccessPattern.LINEAR_1D); + + kernel2.getInput("inAT2") <== joinInAT2.getOutput(); + kernel2.getInput("inBT2") <== joinInBT2.getOutput(); + + Demux forkOutCT2 = demux("forkOutCT2"); + DFELink outCT2CPU = forkOutCT2.addOutput("outCPU"); + DFELink outCT2LMem = forkOutCT2.addOutput("outLMem"); + + forkOutCT2.getInput() <== kernel2.getOutput("outCT2"); + addStreamToCPU("outCT2CPU") <== outCT2CPU; + iface.addStreamToLMem("outCT2LMem", LMemCommandGroup.MemoryAccessPattern.LINEAR_1D) <== outCT2LMem; + + + + Mux joinInAT3 = mux("joinInAT3"); + Mux joinInBT3 = mux("joinInBT3"); + + joinInAT3.addInput("inCPU") <== addStreamFromCPU("inAT3CPU"); + joinInAT3.addInput("inLMem") <== iface.addStreamFromLMem("inAT3LMem", LMemCommandGroup.MemoryAccessPattern.LINEAR_1D); + + joinInBT3.addInput("inCPU") <== addStreamFromCPU("inBT3CPU"); + joinInBT3.addInput("inLMem") <== iface.addStreamFromLMem("inBT3LMem", LMemCommandGroup.MemoryAccessPattern.LINEAR_1D); + + kernel3.getInput("inAT3") <== joinInAT3.getOutput(); + kernel3.getInput("inBT3") <== joinInBT3.getOutput(); + + Demux forkOutCT3 = demux("forkOutCT3"); + DFELink outCT3CPU = forkOutCT3.addOutput("outCPU"); + DFELink outCT3LMem = forkOutCT3.addOutput("outLMem"); + + forkOutCT3.getInput() <== kernel3.getOutput("outCT3"); + addStreamToCPU("outCT3CPU") <== outCT3CPU; + iface.addStreamToLMem("outCT3LMem", LMemCommandGroup.MemoryAccessPattern.LINEAR_1D) <== outCT3LMem; + } + + public static void main(String[] args) + { + MyTasksMuxManager manager = new MyTasksMuxManager(new EngineParameters(args)); + + manager.build(); + } +} diff --git a/tests/maxfpga/README.txt b/tests/maxfpga/README.txt new file mode 100644 index 0000000..352d3ba --- /dev/null +++ b/tests/maxfpga/README.txt @@ -0,0 +1,94 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + + +###################### +##### Examples ###### +###################### + +4 examples are provided to show the use of the different SLiC interfaces: + +- max_fpga_basic_static.c lets SLiC initialize the maxeler stack itself. This + is a very simple interface but does not allow for multiple fpga support. + +- max_fpga_advanced_static.c loads the maxeler design itself. This is a bit + more complex to call, but allows for multiple fpga support. + +- max_fpga_dynamic.c achieves the same as max_fpga_advanced_static.c, but using + the dynamic interface. + +- max_fpga_mux.c goes one step further by making input/output on the CPU or + local memory at will. + + +###################### +##### Maxeler ###### +###################### +$ export XILINXD_LICENSE_FILE=2100@jumax +$ module load vivado maxcompiler +$ module load devtoolset/8 + + +The Makefiles then build the program automatically. They do the equivalent of +the following, written here only for information: + +Building the JAVA program: (for kernel and Manager (.maxj)) + +$ cd starpu/tests/ +$ maxjc -1.7 -cp $MAXCLASSPATH fpga + +Running the Java program to generate a DFE implementation (a .max file) +that can be called from a StarPU/FPGA application and slic headers +(.h) for simulation: + +$ java -XX:+UseSerialGC -Xmx2048m -cp $MAXCLASSPATH:. fpga.MyTasksManager DFEModel=MAIA maxFileName=MyTasks target=DFE_SIM + +$ cp MyTasks_MAX5C_DFE_SIM/results/*{.max,.h} fpga + +$ cd fpga + +Building the slic object file (simulation): + +$ sliccompile MyTasks.max + + + +Once built, to start the simulation: + +$ maxcompilersim -c LIMA -n $USER-MyTasks restart +$ export LD_LIBRARY_PATH=$MAXELEROSDIR/lib:$LD_LIBRARY_PATH +$ export SLIC_CONF="use_simulation=$USER-MyTasks" + +PS: To stop simulation + +$ maxcompilersim -c LIMA -n $USER-MyTasks stop + + +################################# +##### StarPU with Maxeler ###### +################################# + +$ ./autogen.sh +$ ../configure --prefix=$PWD/../install +$ make + +By default they are built for simulation (target DFE_SIM). To build for native +execution, use instead: + +make MAX_TARGET=DFE + +To test the code (.c): +$ ./tests/fpga/max_fpga diff --git a/tests/maxfpga/StreamFMACpuCode.cpp b/tests/maxfpga/StreamFMACpuCode.cpp new file mode 100644 index 0000000..e6a995b --- /dev/null +++ b/tests/maxfpga/StreamFMACpuCode.cpp @@ -0,0 +1,60 @@ +#include +#include +#include +#include +#include + +#include "StreamFMA.h" +#include "MaxSLiCInterface.h" + +int main() +{ + const int size = 400; + int sizeBytes = size * sizeof(int32_t); + int32_t *a = (int32_t*) malloc(sizeBytes); + int32_t *b = (int32_t*) malloc(sizeBytes); + int32_t *c = (int32_t*) malloc(sizeBytes); + + // TODO Generate input data + for(int i = 0; i < size; ++i) + { + a[i] = random() % 100; + b[i] = random() % 100; + } + max_file_t *maxfile = StreamFMA_init(); + max_engine_t *engine = max_load(maxfile, "*"); + + max_actions_t* act = max_actions_init(maxfile, NULL); + + max_set_ticks (act, "StreamFMAKernel", size); + max_queue_input(act, "a", a, size * sizeof(int32_t)); + max_queue_input(act, "b", b, size * sizeof(int32_t)); + max_queue_output(act, "output", c, size * sizeof(int32_t)); + max_run(engine, act); + + max_actions_free(act); + max_unload(engine); + + int ret = 0; + // TODO Use result data + for(std::size_t i = 0; i < size; ++i) + { + int32_t ref =a[i] + b[i]; + if (c[i] != ref) + { + std::cout << "Invalid Output at index " << i << ": " << std::endl; + std::cout << " reference: " << ref << std::endl; + std::cout << " value: " << c[i] << std::endl; + ret = 1; + break; + } + } + + if(0 == ret) + { + std::cout << "All " << size << " values calculated correctly on the DFE!" << std::endl; + } + + std::cout << "Done." << std::endl; + return ret; +} diff --git a/tests/maxfpga/Task1.maxj b/tests/maxfpga/Task1.maxj new file mode 100644 index 0000000..696a827 --- /dev/null +++ b/tests/maxfpga/Task1.maxj @@ -0,0 +1,24 @@ +package maxfpga; + +import com.maxeler.maxcompiler.v2.kernelcompiler.Kernel; +import com.maxeler.maxcompiler.v2.kernelcompiler.KernelParameters; +import com.maxeler.maxcompiler.v2.kernelcompiler.types.base.DFEType; +import com.maxeler.maxcompiler.v2.kernelcompiler.types.base.DFEVar; + +class Task1 extends Kernel +{ + private static final DFEType type = dfeInt(32); + + protected Task1(KernelParameters parameters) + { + super(parameters); + + DFEVar inAT1 = io.input("inAT1", type); + DFEVar inBT1 = io.input("inBT1", type); + DFEVar outCT1; + + outCT1 = inAT1+inBT1; + + io.output("outCT1", outCT1, type); + } +} diff --git a/tests/maxfpga/Task2.maxj b/tests/maxfpga/Task2.maxj new file mode 100644 index 0000000..0fd72a5 --- /dev/null +++ b/tests/maxfpga/Task2.maxj @@ -0,0 +1,24 @@ +package maxfpga; + +import com.maxeler.maxcompiler.v2.kernelcompiler.Kernel; +import com.maxeler.maxcompiler.v2.kernelcompiler.KernelParameters; +import com.maxeler.maxcompiler.v2.kernelcompiler.types.base.DFEType; +import com.maxeler.maxcompiler.v2.kernelcompiler.types.base.DFEVar; + +class Task2 extends Kernel +{ + private static final DFEType type = dfeInt(32); + + protected Task2(KernelParameters parameters) + { + super(parameters); + + DFEVar inAT2 = io.input("inAT2", type); + DFEVar inBT2 = io.input("inBT2", type); + DFEVar outCT2; + + outCT2 = inAT2*inBT2; + + io.output("outCT2", outCT2, type); + } +} diff --git a/tests/maxfpga/Task3.maxj b/tests/maxfpga/Task3.maxj new file mode 100644 index 0000000..946526e --- /dev/null +++ b/tests/maxfpga/Task3.maxj @@ -0,0 +1,24 @@ +package maxfpga; + +import com.maxeler.maxcompiler.v2.kernelcompiler.Kernel; +import com.maxeler.maxcompiler.v2.kernelcompiler.KernelParameters; +import com.maxeler.maxcompiler.v2.kernelcompiler.types.base.DFEType; +import com.maxeler.maxcompiler.v2.kernelcompiler.types.base.DFEVar; + +class Task3 extends Kernel +{ + private static final DFEType type = dfeInt(32); + + protected Task3(KernelParameters parameters) + { + super(parameters); + + DFEVar inAT3 = io.input("inAT3", type); + DFEVar inBT3 = io.input("inBT3", type); + DFEVar outCT3; + + outCT3 = inAT3+inBT3; + + io.output("outCT3", outCT3, type); + } +} diff --git a/tests/maxfpga/max_fpga_advanced_static.c b/tests/maxfpga/max_fpga_advanced_static.c new file mode 100644 index 0000000..f246722 --- /dev/null +++ b/tests/maxfpga/max_fpga_advanced_static.c @@ -0,0 +1,284 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ +#include +#include +#include +#include +#include "../helper.h" + +/* This examples shows the case of determining statically whether data is in CPU + * memory or DFE memory, and using the advanced Maxeler interface */ + +#include "MyTasks.h" +#include +#define SIZE (192/sizeof(int32_t)) + +void fpga_impl(void *buffers[], void *cl_arg) +{ + (void)cl_arg; + + int32_t *ptrA = (int32_t*) STARPU_VECTOR_GET_PTR(buffers[0]); + int32_t *ptrB = (int32_t*) STARPU_VECTOR_GET_PTR(buffers[1]); + int32_t *ptrC = (int32_t*) STARPU_VECTOR_GET_PTR(buffers[2]); + + int size = STARPU_VECTOR_GET_NX(buffers[0]); + + // XXX: would rather use a scratch buffer + size_t ptrCT1 = 0x00000000000000c0; + + size_t ptrAT2 = ptrCT1; + size_t ptrBT2 = ptrCT1; + size_t ptrCT2 = 0x0000000000000180; + + size_t ptrAT3 = ptrCT2; + size_t ptrBT3 = ptrCT2; + + max_engine_t *engine = starpu_max_fpga_get_local_engine();; + + printf("Loading DFE memory.\n"); + + /* C = A+B */ + MyTasks_interfaceT1_actions_t actT1 = + { + .param_N = size, + .param_ptrC1 = ptrCT1, + .instream_inAT1 = ptrA, + .instream_inBT1 = ptrB, + }; + MyTasks_interfaceT1_run(engine, &actT1); + printf("T1 finished\n"); + + /* C = A*B */ + MyTasks_interfaceT2_actions_t actT2 = + { + .param_N = size, + .param_ptrA2 = ptrAT2, + .param_ptrB2 = ptrBT2, + .param_ptrC2 = ptrCT2, + }; + MyTasks_interfaceT2_run(engine, &actT2); + printf("T2 finished\n"); + + /* C = A+B */ + MyTasks_interfaceT3_actions_t actT3 = + { + .param_N = size, + .param_ptrA3 = ptrAT3, + .param_ptrB3 = ptrBT3, + .outstream_outCT3 = ptrC, + }; + MyTasks_interfaceT3_run(engine, &actT3); + printf("T3 finished\n"); + + printf("Running DFE.\n"); +} + +static struct starpu_codelet cl = +{ + .max_fpga_funcs = {fpga_impl}, + .nbuffers = 3, + .modes = {STARPU_R, STARPU_R, STARPU_W}, + .specific_nodes = 1, + .nodes = {STARPU_SPECIFIC_NODE_CPU, STARPU_SPECIFIC_NODE_CPU, STARPU_SPECIFIC_NODE_CPU}, +}; + +void fpga_impl1(void *buffers[], void *cl_arg) +{ + (void)cl_arg; + + int32_t *ptrA = (int32_t*) STARPU_VECTOR_GET_PTR(buffers[0]); + int32_t *ptrB = (int32_t*) STARPU_VECTOR_GET_PTR(buffers[1]); + size_t ptrC = (size_t) STARPU_VECTOR_GET_PTR(buffers[2]); /* FPGA */ + + int size = STARPU_VECTOR_GET_NX(buffers[0]); + + max_engine_t *engine = starpu_max_fpga_get_local_engine();; + + printf("T1 with %p %p %zu\n", ptrA, ptrB, ptrC); + /* C = A+B */ + MyTasks_interfaceT1_actions_t act = + { + .param_N = size, + .param_ptrC1 = ptrC, + .instream_inAT1 = ptrA, + .instream_inBT1 = ptrB, + }; + MyTasks_interfaceT1_run(engine, &act); + printf("T1 finished\n"); +} + +static struct starpu_codelet cl1 = +{ + .max_fpga_funcs = {fpga_impl1}, + .nbuffers = 3, + .modes = {STARPU_R, STARPU_R, STARPU_W}, + .specific_nodes = 1, + .nodes = {STARPU_SPECIFIC_NODE_CPU, STARPU_SPECIFIC_NODE_CPU, STARPU_SPECIFIC_NODE_LOCAL}, +}; + +void fpga_impl2(void *buffers[], void *cl_arg) +{ + (void)cl_arg; + + size_t ptrA = (size_t) STARPU_VECTOR_GET_PTR(buffers[0]); /* FPGA */ + size_t ptrB = (size_t) STARPU_VECTOR_GET_PTR(buffers[1]); /* FPGA */ + size_t ptrC = (size_t) STARPU_VECTOR_GET_PTR(buffers[2]); /* FPGA */ + + int size = STARPU_VECTOR_GET_NX(buffers[0]); + + max_engine_t *engine = starpu_max_fpga_get_local_engine();; + + printf("T2 with %zu %zu %zu\n", ptrA, ptrB, ptrC); + /* C = A*B */ + MyTasks_interfaceT2_actions_t act = + { + .param_N = size, + .param_ptrA2 = ptrA, + .param_ptrB2 = ptrB, + .param_ptrC2 = ptrC, + }; + MyTasks_interfaceT2_run(engine, &act); + printf("T2 finished\n"); +} + +static struct starpu_codelet cl2 = +{ + .max_fpga_funcs = {fpga_impl2}, + .nbuffers = 3, + .modes = {STARPU_R, STARPU_R, STARPU_W} + /* local by default */ +}; + +void fpga_impl3(void *buffers[], void *cl_arg) +{ + (void)cl_arg; + + size_t ptrA = (size_t) STARPU_VECTOR_GET_PTR(buffers[0]); /* FPGA */ + size_t ptrB = (size_t) STARPU_VECTOR_GET_PTR(buffers[1]); /* FPGA */ + int32_t *ptrC = (int32_t*) STARPU_VECTOR_GET_PTR(buffers[2]); + + int size = STARPU_VECTOR_GET_NX(buffers[0]); + + max_engine_t *engine = starpu_max_fpga_get_local_engine();; + + printf("T3 with %zu %zu %p\n", ptrA, ptrB, ptrC); + /* C = A+B */ + MyTasks_interfaceT3_actions_t act = + { + .param_N = size, + .param_ptrA3 = ptrA, + .param_ptrB3 = ptrB, + .outstream_outCT3 = ptrC, + }; + MyTasks_interfaceT3_run(engine, &act); + printf("T3 finished\n"); +} + +static struct starpu_codelet cl3 = +{ + .max_fpga_funcs = {fpga_impl3}, + .nbuffers = 3, + .modes = {STARPU_R, STARPU_R, STARPU_W}, + .specific_nodes = 1, + .nodes = {STARPU_SPECIFIC_NODE_LOCAL, STARPU_SPECIFIC_NODE_LOCAL, STARPU_SPECIFIC_NODE_CPU}, +}; + +int main(int argc, char **argv) +{ + struct starpu_conf conf; + starpu_data_handle_t handle_a, handle_b, handle_ct1, handle_ct2, handle_c; + int ret; + + struct starpu_max_load load[2]; + load[0].file = MyTasks_init(); + load[0].engine_id_pattern = "*"; + load[1].file = NULL; + load[1].engine_id_pattern = NULL; + + starpu_conf_init(&conf); + conf.sched_policy_name = "eager"; + conf.calibrate = 0; + conf.max_fpga_load = load; + + ret = starpu_initialize(&conf, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + /* Enable profiling */ + starpu_profiling_status_set(STARPU_PROFILING_ENABLE); + + int32_t a[SIZE]; + int32_t b[SIZE]; + int32_t c[SIZE]; + + int i; + for(i = 0; i < SIZE; ++i) + { + a[i] = random() % 100; + b[i] = random() % 100; + c[i] = 0; + } + + starpu_vector_data_register(&handle_a, STARPU_MAIN_RAM, (uintptr_t) &a, SIZE, sizeof(a[0])); + starpu_vector_data_register(&handle_b, STARPU_MAIN_RAM, (uintptr_t) &b, SIZE, sizeof(b[0])); + + starpu_vector_data_register(&handle_ct1, -1, 0, SIZE, sizeof(c[0])); + starpu_vector_data_register(&handle_ct2, -1, 0, SIZE, sizeof(c[0])); + + starpu_vector_data_register(&handle_c, STARPU_MAIN_RAM, (uintptr_t) &c, SIZE, sizeof(c[0])); + +#if 0 + ret = starpu_task_insert(&cl, STARPU_R, handle_a, STARPU_R, handle_b, STARPU_W, handle_c, STARPU_TASK_SYNCHRONOUS, 1, 0); + fprintf(stderr,"task submitted %d\n", ret); +#else + ret = starpu_task_insert(&cl1, STARPU_R, handle_a, STARPU_R, handle_b, STARPU_W, handle_ct1, 0); + fprintf(stderr,"task submitted %d\n", ret); + ret = starpu_task_insert(&cl2, STARPU_R, handle_ct1, STARPU_R, handle_ct1, STARPU_W, handle_ct2, 0); + fprintf(stderr,"task submitted %d\n", ret); + ret = starpu_task_insert(&cl3, STARPU_R, handle_ct2, STARPU_R, handle_ct2, STARPU_W, handle_c, 0); + fprintf(stderr,"task submitted %d\n", ret); +#endif + + starpu_data_unregister(handle_a); + starpu_data_unregister(handle_b); + starpu_data_unregister(handle_c); + + ret = EXIT_SUCCESS; + + for (i = 0; i < SIZE; ++i) + { + int ct1 = a[i] + b[i]; + int ct2 = ct1 * ct1; + int ct3 = ct2 + ct2; + + if (c[i] != ct3) + ret = EXIT_FAILURE; + + if (i < 10) + { + printf("%d == %d\n", c[i], ct3); + if (c[i] != ct3) + printf("OOOPS\n"); + } + } + + starpu_shutdown(); + + if (ret == EXIT_SUCCESS) + printf("OK!\n"); + + return ret; +} diff --git a/tests/maxfpga/max_fpga_basic_static.c b/tests/maxfpga/max_fpga_basic_static.c new file mode 100644 index 0000000..e022664 --- /dev/null +++ b/tests/maxfpga/max_fpga_basic_static.c @@ -0,0 +1,227 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ +#include +#include +#include +#include +#include "../helper.h" + +/* This examples shows the case of determining statically whether data is in CPU + * memory or DFE memory, and using the basic Maxeler interface */ + +#include "MyTasks.h" +#include +#define SIZE (192/sizeof(int32_t)) + +void fpga_impl(void *buffers[], void *cl_arg) +{ + (void)cl_arg; + + int32_t *ptrA = (int32_t*) STARPU_VECTOR_GET_PTR(buffers[0]); + int32_t *ptrB = (int32_t*) STARPU_VECTOR_GET_PTR(buffers[1]); + int32_t *ptrC = (int32_t*) STARPU_VECTOR_GET_PTR(buffers[2]); + + int size = STARPU_VECTOR_GET_NX(buffers[0]); + + // XXX: would rather use a scratch buffer + size_t ptrCT1 = 0x00000000000000c0; + + size_t ptrAT2 = ptrCT1; + size_t ptrBT2 = ptrCT1; + size_t ptrCT2 = 0x0000000000000180; + + size_t ptrAT3 = ptrCT2; + size_t ptrBT3 = ptrCT2; + + printf("Loading DFE memory.\n"); + + /* C = A+B */ + MyTasks_interfaceT1(size, ptrCT1, ptrA, ptrB); + printf("T1 finished\n"); + + /* C = A*B */ + MyTasks_interfaceT2(size, ptrAT2, ptrBT2, ptrCT2); + printf("T2 finished\n"); + + /* C = A+B */ + MyTasks_interfaceT3(size, ptrAT3, ptrBT3, ptrC); + printf("T3 finished\n"); + + printf("Running DFE.\n"); +} + +static struct starpu_codelet cl = +{ + .max_fpga_funcs = {fpga_impl}, + .nbuffers = 3, + .modes = {STARPU_R, STARPU_R, STARPU_W}, + .specific_nodes = 1, + .nodes = {STARPU_SPECIFIC_NODE_CPU, STARPU_SPECIFIC_NODE_CPU, STARPU_SPECIFIC_NODE_CPU}, +}; + +void fpga_impl1(void *buffers[], void *cl_arg) +{ + (void)cl_arg; + + int32_t *ptrA = (int32_t*) STARPU_VECTOR_GET_PTR(buffers[0]); + int32_t *ptrB = (int32_t*) STARPU_VECTOR_GET_PTR(buffers[1]); + size_t ptrC = (size_t) STARPU_VECTOR_GET_PTR(buffers[2]); /* FPGA */ + + int size = STARPU_VECTOR_GET_NX(buffers[0]); + + printf("T1 with %p %p %zu\n", ptrA, ptrB, ptrC); + /* C = A+B */ + MyTasks_interfaceT1(size, ptrC, ptrA, ptrB); + printf("T1 finished\n"); +} + +static struct starpu_codelet cl1 = +{ + .max_fpga_funcs = {fpga_impl1}, + .nbuffers = 3, + .modes = {STARPU_R, STARPU_R, STARPU_W}, + .specific_nodes = 1, + .nodes = {STARPU_SPECIFIC_NODE_CPU, STARPU_SPECIFIC_NODE_CPU, STARPU_SPECIFIC_NODE_LOCAL}, +}; + +void fpga_impl2(void *buffers[], void *cl_arg) +{ + (void)cl_arg; + + size_t ptrA = (size_t) STARPU_VECTOR_GET_PTR(buffers[0]); /* FPGA */ + size_t ptrB = (size_t) STARPU_VECTOR_GET_PTR(buffers[1]); /* FPGA */ + size_t ptrC = (size_t) STARPU_VECTOR_GET_PTR(buffers[2]); /* FPGA */ + + int size = STARPU_VECTOR_GET_NX(buffers[0]); + + printf("T2 with %zu %zu %zu\n", ptrA, ptrB, ptrC); + /* C = A*B */ + MyTasks_interfaceT2(size, ptrA, ptrB, ptrC); + printf("T2 finished\n"); +} + +static struct starpu_codelet cl2 = +{ + .max_fpga_funcs = {fpga_impl2}, + .nbuffers = 3, + .modes = {STARPU_R, STARPU_R, STARPU_W} + /* local by default */ +}; + +void fpga_impl3(void *buffers[], void *cl_arg) +{ + (void)cl_arg; + + size_t ptrA = (size_t) STARPU_VECTOR_GET_PTR(buffers[0]); /* FPGA */ + size_t ptrB = (size_t) STARPU_VECTOR_GET_PTR(buffers[1]); /* FPGA */ + int32_t *ptrC = (int32_t*) STARPU_VECTOR_GET_PTR(buffers[2]); + + int size = STARPU_VECTOR_GET_NX(buffers[0]); + + printf("T3 with %zu %zu %p\n", ptrA, ptrB, ptrC); + /* C = A+B */ + MyTasks_interfaceT3(size, ptrA, ptrB, ptrC); + printf("T3 finished\n"); +} + +static struct starpu_codelet cl3 = +{ + .max_fpga_funcs = {fpga_impl3}, + .nbuffers = 3, + .modes = {STARPU_R, STARPU_R, STARPU_W}, + .specific_nodes = 1, + .nodes = {STARPU_SPECIFIC_NODE_LOCAL, STARPU_SPECIFIC_NODE_LOCAL, STARPU_SPECIFIC_NODE_CPU}, +}; + +int main(int argc, char **argv) +{ + struct starpu_conf conf; + starpu_data_handle_t handle_a, handle_b, handle_ct1, handle_ct2, handle_c; + int ret; + + starpu_conf_init(&conf); + conf.sched_policy_name = "eager"; + conf.calibrate = 0; + + ret = starpu_initialize(&conf, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + /* Enable profiling */ + starpu_profiling_status_set(STARPU_PROFILING_ENABLE); + + int32_t a[SIZE]; + int32_t b[SIZE]; + int32_t c[SIZE]; + + int i; + for(i = 0; i < SIZE; ++i) + { + a[i] = random() % 100; + b[i] = random() % 100; + c[i] = 0; + } + + starpu_vector_data_register(&handle_a, STARPU_MAIN_RAM, (uintptr_t) &a, SIZE, sizeof(a[0])); + starpu_vector_data_register(&handle_b, STARPU_MAIN_RAM, (uintptr_t) &b, SIZE, sizeof(b[0])); + + starpu_vector_data_register(&handle_ct1, -1, 0, SIZE, sizeof(c[0])); + starpu_vector_data_register(&handle_ct2, -1, 0, SIZE, sizeof(c[0])); + + starpu_vector_data_register(&handle_c, STARPU_MAIN_RAM, (uintptr_t) &c, SIZE, sizeof(c[0])); + +#if 0 + ret = starpu_task_insert(&cl, STARPU_R, handle_a, STARPU_R, handle_b, STARPU_W, handle_c, STARPU_TASK_SYNCHRONOUS, 1, 0); + fprintf(stderr,"task submitted %d\n", ret); +#else + ret = starpu_task_insert(&cl1, STARPU_R, handle_a, STARPU_R, handle_b, STARPU_W, handle_ct1, 0); + fprintf(stderr,"task submitted %d\n", ret); + ret = starpu_task_insert(&cl2, STARPU_R, handle_ct1, STARPU_R, handle_ct1, STARPU_W, handle_ct2, 0); + fprintf(stderr,"task submitted %d\n", ret); + ret = starpu_task_insert(&cl3, STARPU_R, handle_ct2, STARPU_R, handle_ct2, STARPU_W, handle_c, 0); + fprintf(stderr,"task submitted %d\n", ret); +#endif + + starpu_data_unregister(handle_a); + starpu_data_unregister(handle_b); + starpu_data_unregister(handle_c); + + ret = EXIT_SUCCESS; + + for (i = 0; i < SIZE; ++i) + { + int ct1 = a[i] + b[i]; + int ct2 = ct1 * ct1; + int ct3 = ct2 + ct2; + + if (c[i] != ct3) + ret = EXIT_FAILURE; + + if (i < 10) + { + printf("%d == %d\n", c[i], ct3); + if (c[i] != ct3) + printf("OOOPS\n"); + } + } + + starpu_shutdown(); + + if (ret == EXIT_SUCCESS) + printf("OK!\n"); + + return ret; +} diff --git a/tests/maxfpga/max_fpga_dynamic.c b/tests/maxfpga/max_fpga_dynamic.c new file mode 100644 index 0000000..a7d67b8 --- /dev/null +++ b/tests/maxfpga/max_fpga_dynamic.c @@ -0,0 +1,253 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ +#include +#include +#include +#include +#include "../helper.h" + +/* This examples shows the case of determining statically whether data is in CPU + * memory or DFE memory, and using the dynamic Maxeler interface */ + +#include "MyTasks.h" +#include +#define SIZE (192/sizeof(int32_t)) + +static max_file_t *maxfile; + +void fpga_impl1(void *buffers[], void *cl_arg) +{ + (void)cl_arg; + + int32_t *ptrA = (int32_t*) STARPU_VECTOR_GET_PTR(buffers[0]); + int32_t *ptrB = (int32_t*) STARPU_VECTOR_GET_PTR(buffers[1]); + size_t ptrC = (size_t) STARPU_VECTOR_GET_PTR(buffers[2]); /* FPGA */ + + int size = STARPU_VECTOR_GET_NX(buffers[0]); + + max_engine_t *engine = starpu_max_fpga_get_local_engine();; + + printf("T1 with %p %p %zu\n", ptrA, ptrB, ptrC); + /* C = A+B */ + + max_actions_t *acts = max_actions_init(maxfile, NULL); + max_set_ticks(acts, "Task1", size); + max_ignore_scalar(acts, "Task2", "run_cycle_count"); + max_ignore_scalar(acts, "Task3", "run_cycle_count"); + + max_queue_input(acts, "inAT1", ptrA, size * sizeof(uint32_t)); + max_queue_input(acts, "inBT1", ptrB, size * sizeof(uint32_t)); + max_memctl_linear(acts, "MemoryControllerPro0", "outCT1", ptrC, size * sizeof(uint32_t)); + + max_ignore_memctl(acts, "MemoryControllerPro0", "inAT2"); + max_ignore_memctl(acts, "MemoryControllerPro0", "inBT2"); + max_ignore_memctl(acts, "MemoryControllerPro0", "outCT2"); + + max_ignore_memctl(acts, "MemoryControllerPro0", "inAT3"); + max_ignore_memctl(acts, "MemoryControllerPro0", "inBT3"); + max_ignore_stream(acts, "outCT3"); + + max_run(engine, acts); + max_actions_free(acts); + + printf("T1 finished\n"); +} + +static struct starpu_codelet cl1 = +{ + .max_fpga_funcs = {fpga_impl1}, + .nbuffers = 3, + .modes = {STARPU_R, STARPU_R, STARPU_W}, + .specific_nodes = 1, + .nodes = {STARPU_SPECIFIC_NODE_CPU, STARPU_SPECIFIC_NODE_CPU, STARPU_SPECIFIC_NODE_LOCAL}, +}; + +void fpga_impl2(void *buffers[], void *cl_arg) +{ + (void)cl_arg; + + size_t ptrA = (size_t) STARPU_VECTOR_GET_PTR(buffers[0]); /* FPGA */ + size_t ptrB = (size_t) STARPU_VECTOR_GET_PTR(buffers[1]); /* FPGA */ + size_t ptrC = (size_t) STARPU_VECTOR_GET_PTR(buffers[2]); /* FPGA */ + + int size = STARPU_VECTOR_GET_NX(buffers[0]); + + max_engine_t *engine = starpu_max_fpga_get_local_engine();; + + printf("T2 with %zu %zu %zu\n", ptrA, ptrB, ptrC); + /* C = A*B */ + + max_actions_t *acts = max_actions_init(maxfile, NULL); + max_ignore_scalar(acts, "Task1", "run_cycle_count"); + max_set_ticks(acts, "Task2", size); + max_ignore_scalar(acts, "Task3", "run_cycle_count"); + + max_ignore_stream(acts, "inAT1"); + max_ignore_stream(acts, "inBT1"); + max_ignore_memctl(acts, "MemoryControllerPro0", "outCT1"); + + max_memctl_linear(acts, "MemoryControllerPro0", "inAT2", ptrA, size * sizeof(uint32_t)); + max_memctl_linear(acts, "MemoryControllerPro0", "inBT2", ptrB, size * sizeof(uint32_t)); + max_memctl_linear(acts, "MemoryControllerPro0", "outCT2", ptrC, size * sizeof(uint32_t)); + + max_ignore_memctl(acts, "MemoryControllerPro0", "inAT3"); + max_ignore_memctl(acts, "MemoryControllerPro0", "inBT3"); + max_ignore_stream(acts, "outCT3"); + + max_run(engine, acts); + max_actions_free(acts); + + printf("T2 finished\n"); +} + +static struct starpu_codelet cl2 = +{ + .max_fpga_funcs = {fpga_impl2}, + .nbuffers = 3, + .modes = {STARPU_R, STARPU_R, STARPU_W} + /* local by default */ +}; + +void fpga_impl3(void *buffers[], void *cl_arg) +{ + (void)cl_arg; + + size_t ptrA = (size_t) STARPU_VECTOR_GET_PTR(buffers[0]); /* FPGA */ + size_t ptrB = (size_t) STARPU_VECTOR_GET_PTR(buffers[1]); /* FPGA */ + int32_t *ptrC = (int32_t*) STARPU_VECTOR_GET_PTR(buffers[2]); + + int size = STARPU_VECTOR_GET_NX(buffers[0]); + + max_engine_t *engine = starpu_max_fpga_get_local_engine();; + + printf("T3 with %zu %zu %p\n", ptrA, ptrB, ptrC); + /* C = A+B */ + + max_actions_t *acts = max_actions_init(maxfile, NULL); + max_ignore_scalar(acts, "Task1", "run_cycle_count"); + max_ignore_scalar(acts, "Task2", "run_cycle_count"); + max_set_ticks(acts, "Task3", size); + + max_ignore_stream(acts, "inAT1"); + max_ignore_stream(acts, "inBT1"); + max_ignore_memctl(acts, "MemoryControllerPro0", "outCT1"); + + max_ignore_memctl(acts, "MemoryControllerPro0", "inAT2"); + max_ignore_memctl(acts, "MemoryControllerPro0", "inBT2"); + max_ignore_memctl(acts, "MemoryControllerPro0", "outCT2"); + + max_memctl_linear(acts, "MemoryControllerPro0", "inAT3", ptrA, size * sizeof(uint32_t)); + max_memctl_linear(acts, "MemoryControllerPro0", "inBT3", ptrB, size * sizeof(uint32_t)); + max_queue_output(acts, "outCT3", ptrC, size * sizeof(uint32_t)); + + max_run(engine, acts); + max_actions_free(acts); + + printf("T3 finished\n"); +} + +static struct starpu_codelet cl3 = +{ + .max_fpga_funcs = {fpga_impl3}, + .nbuffers = 3, + .modes = {STARPU_R, STARPU_R, STARPU_W}, + .specific_nodes = 1, + .nodes = {STARPU_SPECIFIC_NODE_LOCAL, STARPU_SPECIFIC_NODE_LOCAL, STARPU_SPECIFIC_NODE_CPU}, +}; + +int main(int argc, char **argv) +{ + struct starpu_conf conf; + starpu_data_handle_t handle_a, handle_b, handle_ct1, handle_ct2, handle_c; + int ret; + + maxfile = MyTasks_init(); + + struct starpu_max_load load[2]; + load[0].file = maxfile; + load[0].engine_id_pattern = "*"; + load[1].file = NULL; + load[1].engine_id_pattern = NULL; + + starpu_conf_init(&conf); + conf.sched_policy_name = "eager"; + conf.calibrate = 0; + conf.max_fpga_load = load; + + ret = starpu_initialize(&conf, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + /* Enable profiling */ + starpu_profiling_status_set(STARPU_PROFILING_ENABLE); + + int32_t a[SIZE]; + int32_t b[SIZE]; + int32_t c[SIZE]; + + int i; + for(i = 0; i < SIZE; ++i) + { + a[i] = random() % 100; + b[i] = random() % 100; + c[i] = 0; + } + + starpu_vector_data_register(&handle_a, STARPU_MAIN_RAM, (uintptr_t) &a, SIZE, sizeof(a[0])); + starpu_vector_data_register(&handle_b, STARPU_MAIN_RAM, (uintptr_t) &b, SIZE, sizeof(b[0])); + + starpu_vector_data_register(&handle_ct1, -1, 0, SIZE, sizeof(c[0])); + starpu_vector_data_register(&handle_ct2, -1, 0, SIZE, sizeof(c[0])); + + starpu_vector_data_register(&handle_c, STARPU_MAIN_RAM, (uintptr_t) &c, SIZE, sizeof(c[0])); + + ret = starpu_task_insert(&cl1, STARPU_R, handle_a, STARPU_R, handle_b, STARPU_W, handle_ct1, 0); + fprintf(stderr,"task submitted %d\n", ret); + ret = starpu_task_insert(&cl2, STARPU_R, handle_ct1, STARPU_R, handle_ct1, STARPU_W, handle_ct2, 0); + fprintf(stderr,"task submitted %d\n", ret); + ret = starpu_task_insert(&cl3, STARPU_R, handle_ct2, STARPU_R, handle_ct2, STARPU_W, handle_c, 0); + fprintf(stderr,"task submitted %d\n", ret); + + starpu_data_unregister(handle_a); + starpu_data_unregister(handle_b); + starpu_data_unregister(handle_c); + + ret = EXIT_SUCCESS; + + for (i = 0; i < SIZE; ++i) + { + int ct1 = a[i] + b[i]; + int ct2 = ct1 * ct1; + int ct3 = ct2 + ct2; + + if (c[i] != ct3) + ret = EXIT_FAILURE; + + if (i < 10) + { + printf("%d == %d\n", c[i], ct3); + if (c[i] != ct3) + printf("OOOPS\n"); + } + } + + starpu_shutdown(); + + if (ret == EXIT_SUCCESS) + printf("OK!\n"); + + return ret; +} diff --git a/tests/maxfpga/max_fpga_mux.c b/tests/maxfpga/max_fpga_mux.c new file mode 100644 index 0000000..d8cb17b --- /dev/null +++ b/tests/maxfpga/max_fpga_mux.c @@ -0,0 +1,312 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ +#include +#include +#include +#include +#include "../helper.h" + +/* This examples shows the case of letting the runtime determining whether data + * should be in CPU memory or DFE memory, by integrating a multiplexer in a + * design that can be toggled dynamically. */ + +#include "MyTasksMux.h" +#include +#define SIZE (192/sizeof(int32_t)) + +static max_file_t *maxfile; + +/* + * Dynamically configure multiplexer and streaming from CPU or from LMem (ignoring the other) + */ +#define setupReadData(name) do { \ + if (kind##name == STARPU_CPU_RAM) { \ + max_route(acts, "joinIn"#name".inCPU", "joinIn"#name".join"); \ + max_queue_input(acts, "in"#name"CPU", ptr##name, size * sizeof(uint32_t)); \ + max_ignore_memctl(acts, "MemoryControllerPro0", "in"#name"LMem"); \ + } else { \ + max_route(acts, "joinIn"#name".inLMem", "joinIn"#name".join"); \ + max_ignore_stream(acts, "in"#name"CPU"); \ + max_memctl_linear(acts, "MemoryControllerPro0", "in"#name"LMem", (size_t) ptr##name, size * sizeof(int32_t)); \ + } \ +} while (0) + +/* + * Ignore data from unused input + */ +#define ignoreReadData(name) do { \ + max_route(acts, "joinIn"#name".inLMem", "joinIn"#name".join"); \ + max_ignore_stream(acts, "in"#name"CPU"); \ + max_ignore_memctl(acts, "MemoryControllerPro0", "in"#name"LMem"); \ +} while (0) + +/* + * Configure demultiplexer and streaming to CPU or to LMem (ignoring the other) + */ +#define setupWriteData(name) do { \ + if (kind##name == STARPU_CPU_RAM) { \ + max_route(acts, "forkOut"#name, "outCPU"); \ + max_queue_output(acts, "out"#name"CPU", ptr##name, size * sizeof(uint32_t)); \ + max_ignore_memctl(acts, "MemoryControllerPro0", "out"#name"LMem"); \ + } else { \ + max_route(acts, "forkOut"#name, "outLMem"); \ + max_ignore_stream(acts, "out"#name"CPU"); \ + max_memctl_linear(acts, "MemoryControllerPro0", "out"#name"LMem", (size_t) ptr##name, size * sizeof(uint32_t)); \ + } \ +} while (0) + +/* + * Ignore data from unused output + */ +#define ignoreWriteData(name) do { \ + max_route(acts, "forkOut"#name, "outLMem"); \ + max_ignore_stream(acts, "out"#name"CPU"); \ + max_ignore_memctl(acts, "MemoryControllerPro0", "out"#name"LMem"); \ +} while (0) + +void fpga_impl1(void *buffers[], void *cl_arg) +{ + (void)cl_arg; + + int32_t *ptrAT1 = (int32_t*) STARPU_VECTOR_GET_PTR(buffers[0]); + int32_t *ptrBT1 = (int32_t*) STARPU_VECTOR_GET_PTR(buffers[1]); + int32_t *ptrCT1 = (int32_t*) STARPU_VECTOR_GET_PTR(buffers[2]); + enum starpu_node_kind kindAT1 = starpu_node_get_kind(starpu_task_get_current_data_node(0)); + enum starpu_node_kind kindBT1 = starpu_node_get_kind(starpu_task_get_current_data_node(1)); + enum starpu_node_kind kindCT1 = starpu_node_get_kind(starpu_task_get_current_data_node(2)); + + int size = STARPU_VECTOR_GET_NX(buffers[0]); + + max_engine_t *engine = starpu_max_fpga_get_local_engine();; + + printf("T1 with %p %p %p\n", ptrAT1, ptrBT1, ptrCT1); + /* C = A+B */ + + max_actions_t *acts = max_actions_init(maxfile, NULL); + max_set_ticks(acts, "Task1", size); + max_ignore_scalar(acts, "Task2", "run_cycle_count"); + max_ignore_scalar(acts, "Task3", "run_cycle_count"); + + setupReadData(AT1); + setupReadData(BT1); + setupWriteData(CT1); + + ignoreReadData(AT2); + ignoreReadData(BT2); + ignoreWriteData(CT2); + + ignoreReadData(AT3); + ignoreReadData(BT3); + ignoreWriteData(CT3); + + max_run(engine, acts); + max_actions_free(acts); + + printf("T1 finished\n"); +} + +static struct starpu_codelet cl1 = +{ + .max_fpga_funcs = {fpga_impl1}, + .nbuffers = 3, + .modes = {STARPU_R, STARPU_R, STARPU_W}, + .specific_nodes = 1, + .nodes = {STARPU_SPECIFIC_NODE_LOCAL_OR_CPU, STARPU_SPECIFIC_NODE_LOCAL_OR_CPU, STARPU_SPECIFIC_NODE_LOCAL}, +}; + +void fpga_impl2(void *buffers[], void *cl_arg) +{ + (void)cl_arg; + + int32_t *ptrAT2 = (int32_t*) STARPU_VECTOR_GET_PTR(buffers[0]); + int32_t *ptrBT2 = (int32_t*) STARPU_VECTOR_GET_PTR(buffers[1]); + int32_t *ptrCT2 = (int32_t*) STARPU_VECTOR_GET_PTR(buffers[2]); + enum starpu_node_kind kindAT2 = starpu_node_get_kind(starpu_task_get_current_data_node(0)); + enum starpu_node_kind kindBT2 = starpu_node_get_kind(starpu_task_get_current_data_node(1)); + enum starpu_node_kind kindCT2 = starpu_node_get_kind(starpu_task_get_current_data_node(2)); + + int size = STARPU_VECTOR_GET_NX(buffers[0]); + + max_engine_t *engine = starpu_max_fpga_get_local_engine();; + + printf("T2 with %p %p %p\n", ptrAT2, ptrBT2, ptrCT2); + /* C = A*B */ + + max_actions_t *acts = max_actions_init(maxfile, NULL); + max_ignore_scalar(acts, "Task1", "run_cycle_count"); + max_set_ticks(acts, "Task2", size); + max_ignore_scalar(acts, "Task3", "run_cycle_count"); + + setupReadData(AT2); + setupReadData(BT2); + setupWriteData(CT2); + + ignoreReadData(AT1); + ignoreReadData(BT1); + ignoreWriteData(CT1); + + ignoreReadData(AT3); + ignoreReadData(BT3); + ignoreWriteData(CT3); + + max_run(engine, acts); + max_actions_free(acts); + + printf("T2 finished\n"); +} + +static struct starpu_codelet cl2 = +{ + .max_fpga_funcs = {fpga_impl2}, + .nbuffers = 3, + .modes = {STARPU_R, STARPU_R, STARPU_W}, + .specific_nodes = 1, + .nodes = {STARPU_SPECIFIC_NODE_LOCAL_OR_CPU, STARPU_SPECIFIC_NODE_LOCAL_OR_CPU, STARPU_SPECIFIC_NODE_LOCAL}, +}; + +void fpga_impl3(void *buffers[], void *cl_arg) +{ + (void)cl_arg; + + int32_t *ptrAT3 = (int32_t*) STARPU_VECTOR_GET_PTR(buffers[0]); + int32_t *ptrBT3 = (int32_t*) STARPU_VECTOR_GET_PTR(buffers[1]); + int32_t *ptrCT3 = (int32_t*) STARPU_VECTOR_GET_PTR(buffers[2]); + enum starpu_node_kind kindAT3 = starpu_node_get_kind(starpu_task_get_current_data_node(0)); + enum starpu_node_kind kindBT3 = starpu_node_get_kind(starpu_task_get_current_data_node(1)); + enum starpu_node_kind kindCT3 = starpu_node_get_kind(starpu_task_get_current_data_node(2)); + + int size = STARPU_VECTOR_GET_NX(buffers[0]); + + max_engine_t *engine = starpu_max_fpga_get_local_engine();; + + printf("T3 with %p %p %p\n", ptrAT3, ptrBT3, ptrCT3); + /* C = A+B */ + + max_actions_t *acts = max_actions_init(maxfile, NULL); + max_ignore_scalar(acts, "Task1", "run_cycle_count"); + max_ignore_scalar(acts, "Task2", "run_cycle_count"); + max_set_ticks(acts, "Task3", size); + + setupReadData(AT3); + setupReadData(BT3); + setupWriteData(CT3); + + ignoreReadData(AT1); + ignoreReadData(BT1); + ignoreWriteData(CT1); + + ignoreReadData(AT2); + ignoreReadData(BT2); + ignoreWriteData(CT2); + + max_run(engine, acts); + max_actions_free(acts); + + printf("T3 finished\n"); +} + +static struct starpu_codelet cl3 = +{ + .max_fpga_funcs = {fpga_impl3}, + .nbuffers = 3, + .modes = {STARPU_R, STARPU_R, STARPU_W}, + .specific_nodes = 1, + .nodes = {STARPU_SPECIFIC_NODE_LOCAL_OR_CPU, STARPU_SPECIFIC_NODE_LOCAL_OR_CPU, STARPU_SPECIFIC_NODE_CPU}, +}; + +int main(int argc, char **argv) +{ + struct starpu_conf conf; + starpu_data_handle_t handle_a, handle_b, handle_ct1, handle_ct2, handle_c; + int ret; + + maxfile = MyTasksMux_init(); + + struct starpu_max_load load[2]; + load[0].file = maxfile; + load[0].engine_id_pattern = "*"; + load[1].file = NULL; + load[1].engine_id_pattern = NULL; + + starpu_conf_init(&conf); + conf.sched_policy_name = "eager"; + conf.calibrate = 0; + conf.max_fpga_load = load; + + ret = starpu_initialize(&conf, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + /* Enable profiling */ + starpu_profiling_status_set(STARPU_PROFILING_ENABLE); + + int32_t a[SIZE]; + int32_t b[SIZE]; + int32_t c[SIZE]; + + int i; + for(i = 0; i < SIZE; ++i) + { + a[i] = random() % 100; + b[i] = random() % 100; + c[i] = 0; + } + + starpu_vector_data_register(&handle_a, STARPU_MAIN_RAM, (uintptr_t) &a, SIZE, sizeof(a[0])); + starpu_vector_data_register(&handle_b, STARPU_MAIN_RAM, (uintptr_t) &b, SIZE, sizeof(b[0])); + + starpu_vector_data_register(&handle_ct1, -1, 0, SIZE, sizeof(c[0])); + starpu_vector_data_register(&handle_ct2, -1, 0, SIZE, sizeof(c[0])); + + starpu_vector_data_register(&handle_c, STARPU_MAIN_RAM, (uintptr_t) &c, SIZE, sizeof(c[0])); + + ret = starpu_task_insert(&cl1, STARPU_R, handle_a, STARPU_R, handle_b, STARPU_W, handle_ct1, 0); + fprintf(stderr,"task submitted %d\n", ret); + ret = starpu_task_insert(&cl2, STARPU_R, handle_ct1, STARPU_R, handle_ct1, STARPU_W, handle_ct2, 0); + fprintf(stderr,"task submitted %d\n", ret); + ret = starpu_task_insert(&cl3, STARPU_R, handle_ct2, STARPU_R, handle_ct2, STARPU_W, handle_c, 0); + fprintf(stderr,"task submitted %d\n", ret); + + starpu_data_unregister(handle_a); + starpu_data_unregister(handle_b); + starpu_data_unregister(handle_c); + + ret = EXIT_SUCCESS; + + for (i = 0; i < SIZE; ++i) + { + int ct1 = a[i] + b[i]; + int ct2 = ct1 * ct1; + int ct3 = ct2 + ct2; + + if (c[i] != ct3) + ret = EXIT_FAILURE; + + if (i < 10) + { + printf("%d == %d\n", c[i], ct3); + if (c[i] != ct3) + printf("OOOPS\n"); + } + } + + starpu_shutdown(); + + if (ret == EXIT_SUCCESS) + printf("OK!\n"); + + return ret; +} diff --git a/tests/memory/memstress.gp b/tests/memory/memstress.gp new file mode 100644 index 0000000..9e2081a --- /dev/null +++ b/tests/memory/memstress.gp @@ -0,0 +1,26 @@ +#!/usr/bin/gnuplot -persist +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +set term postscript landscape color 22 +set output "memstress.ps" +set xlabel "Memory Pressure (MB)" +set ylabel "execution time degradation (%)" +set grid y +set key left top box +set datafile missing 'x' +plot "timings/memstress.data" usi 1:(( 100*(($2 / 2130) - 1))) with linespoint title "matrix size : 4096" ,\ + "timings/memstress.data" usi 1:(( 100*(($3 / 16420) - 1) )) with linespoint title "8192" diff --git a/tests/memory/memstress.sh b/tests/memory/memstress.sh new file mode 100755 index 0000000..4e976d1 --- /dev/null +++ b/tests/memory/memstress.sh @@ -0,0 +1,66 @@ +#!/bin/bash +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +DIR=$PWD +ROOTDIR=$DIR/../.. +TIMINGDIR=$DIR/timings/ +mkdir -p $TIMINGDIR +filename=$TIMINGDIR/memstress.data + +sizelist="4096 8192" +stresslist="0 50 100 150 200 250 300 350 400 450 500 550 600 650 655 660 665 670 675" +#stresslist="672" + +trace_stress() +{ + memstress=$1 + + export STARPU_NCPUS=0 + export STARPU_NCUDA=1 + export STRESS_MEM=$memstress + + line="$memstress" + + for size in $sizelist + do + nblocks=$(($size / 1024)) + echo "Computing size $size with $memstress MB of memory LESS" + + echo "$ROOTDIR/examples/mult/dw_mult -x $size -y $size -z $size -nblocks $nblocks 2>/dev/null" + timing=`$MS_LAUNCHER $STARPU_LAUNCH $ROOTDIR/examples/mult/dw_mult -x $size -y $size -z $size -nblocks $nblocks 2>/dev/null` + + echo "size : $size memstress $memstress => $timing us" + + line="$line $timing" + + done + + echo "$line" >> $filename +} + +cd $ROOTDIR + +make clean 1> /dev/null 2> /dev/null +make examples STARPU_ATLAS=1 CUDA=1 CPUS=3 1> /dev/null #2> /dev/null + +cd $DIR + +echo "#memstress $sizelist " > $filename + +for memstress in $stresslist +do + trace_stress $memstress; +done diff --git a/tests/memory/memstress2.gp b/tests/memory/memstress2.gp new file mode 100644 index 0000000..b037301 --- /dev/null +++ b/tests/memory/memstress2.gp @@ -0,0 +1,26 @@ +#!/usr/bin/gnuplot -persist +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +set term postscript landscape color 22 +set output "memstress2.ps" +set xlabel "Problem size" +set ylabel "execution time" +set logscale x +set key left top +set datafile missing 'x' +plot "timings/memstress2.data" usi 1:2 with lines title "reference" ,\ + "timings/memstress2.data" usi 1:3 with lines title "350 MB" diff --git a/tests/memory/memstress2.sh b/tests/memory/memstress2.sh new file mode 100755 index 0000000..eb4d49b --- /dev/null +++ b/tests/memory/memstress2.sh @@ -0,0 +1,65 @@ +#!/bin/bash +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +DIR=$PWD +ROOTDIR=$DIR/../.. +TIMINGDIR=$DIR/timings/ +mkdir -p $TIMINGDIR +filename=$TIMINGDIR/memstress2.data + +sizelist="512 1024 2048 4096 8192 16384" +stresslist="0 350" +#stresslist="672" + +trace_stress() +{ + size=$1 + + line="$size" + + for stress in $stresslist + do + export STRESS_MEM=$stress + + nblocks=$(($size / 1024)) + echo "Computing size $size with $stress MB of memory LESS" + + + echo "$ROOTDIR/examples/mult/dw_mult -x $size -y $size -z $size -nblocks $nblocks 2>/dev/null" + timing=`$MS_LAUNCHER $STARPU_LAUNCH $ROOTDIR/examples/mult/dw_mult -x $size -y $size -z $size -nblocks $nblocks 2>/dev/null` + + echo "size : $size memstress $stress => $timing us" + + line="$line $timing" + + done + + echo "$line" >> $filename +} + +cd $ROOTDIR + +make clean 1> /dev/null 2> /dev/null +make examples STARPU_ATLAS=1 CUDA=1 CPUS=0 1> /dev/null 2> /dev/null + +cd $DIR + +echo "#size $stresslist " > $filename + +for size in $sizelist +do + trace_stress $size; +done diff --git a/tests/microbenchs/async_tasks_data_overhead.sh b/tests/microbenchs/async_tasks_data_overhead.sh new file mode 100755 index 0000000..75e77eb --- /dev/null +++ b/tests/microbenchs/async_tasks_data_overhead.sh @@ -0,0 +1,22 @@ +#!/bin/sh +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +if test -n "$STARPU_MICROBENCHS_DISABLED" ; then exit 77 ; fi + +ROOT=${0%.sh} +ROOT=$(echo $ROOT | sed 's/tasks_data_overhead/tasks_overhead/') +exec $MS_LAUNCHER $STARPU_LAUNCH $ROOT -b 1 "$@" diff --git a/tests/microbenchs/async_tasks_overhead.c b/tests/microbenchs/async_tasks_overhead.c new file mode 100644 index 0000000..b2c6e28 --- /dev/null +++ b/tests/microbenchs/async_tasks_overhead.c @@ -0,0 +1,222 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include "../helper.h" + +/* + * Measure the cost of submitting asynchronous tasks + */ + +starpu_data_handle_t data_handles[8]; +float *buffers[8]; + +#ifdef STARPU_QUICK_CHECK +static unsigned ntasks = 128; +#else +static unsigned ntasks = 65536; +#endif +static unsigned nbuffers = 0; + +#define BUFFERSIZE 16 + +//static unsigned finished = 0; + +static double cumulated = 0.0; +static double cumulated_push = 0.0; +static double cumulated_pop = 0.0; + +void dummy_func(void *descr[], void *arg) +{ + (void)descr; + (void)arg; +} + +static struct starpu_codelet dummy_codelet = +{ + .cpu_funcs = {dummy_func}, + .cuda_funcs = {dummy_func}, + .opencl_funcs = {dummy_func}, + .cpu_funcs_name = {"dummy_func"}, + .model = NULL, + .nbuffers = 0, + .modes = {STARPU_RW, STARPU_RW, STARPU_RW, STARPU_RW, STARPU_RW, STARPU_RW, STARPU_RW, STARPU_RW} +}; + +static void usage(char **argv) +{ + fprintf(stderr, "Usage: %s [-i ntasks] [-p sched_policy] [-b nbuffers] [-h]\n", argv[0]); + exit(EXIT_FAILURE); +} + +static void parse_args(int argc, char **argv, struct starpu_conf *conf) +{ + int c; + while ((c = getopt(argc, argv, "i:b:p:h")) != -1) + switch(c) + { + case 'i': + ntasks = atoi(optarg); + break; + case 'b': + nbuffers = atoi(optarg); + dummy_codelet.nbuffers = nbuffers; + break; + case 'p': + conf->sched_policy_name = optarg; + break; + case 'h': + usage(argv); + break; + } +} + +int main(int argc, char **argv) +{ + int ret; + unsigned i; + double timing; + double start; + double end; + + struct starpu_conf conf; + starpu_conf_init(&conf); + conf.ncpus = 2; + + parse_args(argc, argv, &conf); + + ret = starpu_initialize(&conf, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + unsigned buffer; + for (buffer = 0; buffer < nbuffers; buffer++) + { + starpu_malloc((void**)&buffers[buffer], BUFFERSIZE*sizeof(float)); + starpu_vector_data_register(&data_handles[buffer], STARPU_MAIN_RAM, (uintptr_t)buffers[buffer], BUFFERSIZE, sizeof(float)); + } + + starpu_profiling_status_set(STARPU_PROFILING_ENABLE); + + fprintf(stderr, "#tasks : %u\n#buffers : %u\n", ntasks, nbuffers); + + /* Create an array of tasks */ + struct starpu_task **tasks = (struct starpu_task **) malloc(ntasks*sizeof(struct starpu_task *)); + + for (i = 0; i < ntasks; i++) + { + struct starpu_task *task = starpu_task_create(); + task->cl = &dummy_codelet; + task->detach = 0; + + /* we have 8 buffers at most */ + for (buffer = 0; buffer < nbuffers; buffer++) + { + task->handles[buffer] = data_handles[buffer]; + } + + tasks[i] = task; + } + + start = starpu_timing_now(); + for (i = 0; i < ntasks; i++) + { + ret = starpu_task_submit(tasks[i]); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + ret = starpu_task_wait_for_all(); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); + + end = starpu_timing_now(); + + /* Read profiling feedback */ + for (i = 0; i < ntasks; i++) + { + struct starpu_profiling_task_info *info; + info = tasks[i]->profiling_info; + + double queued = starpu_timing_timespec_delay_us(&info->push_end_time, &info->pop_end_time); + double length = starpu_timing_timespec_delay_us(&info->submit_time, &info->end_time); + double push_duration = starpu_timing_timespec_delay_us(&info->push_start_time, &info->push_end_time); + double pop_duration = starpu_timing_timespec_delay_us(&info->pop_start_time, &info->pop_end_time); + starpu_task_destroy(tasks[i]); + cumulated += (length - queued); + cumulated_push += push_duration; + cumulated_pop += pop_duration; + } + + timing = end - start; + + fprintf(stderr, "Total: %f secs\n", timing/1000000); + fprintf(stderr, "Per task: %f usecs\n", timing/ntasks); + fprintf(stderr, "Per task (except scheduler): %f usecs\n", cumulated/ntasks); + fprintf(stderr, "Per task (push): %f usecs\n", cumulated_push/ntasks); + fprintf(stderr, "Per task (pop): %f usecs\n", cumulated_pop/ntasks); + + { + char *output_dir = getenv("STARPU_BENCH_DIR"); + char *bench_id = getenv("STARPU_BENCH_ID"); + + if (output_dir && bench_id) + { + char number[1+sizeof(nbuffers)*3+1]; + const char *numberp; + char file[1024]; + FILE *f; + + if (nbuffers) + { + snprintf(number, sizeof(number), "_%u", nbuffers); + numberp = number; + } + else + numberp = ""; + + snprintf(file, sizeof(file), "%s/async_tasks_overhead_total%s.dat", output_dir, numberp); + f = fopen(file, "a"); + fprintf(f, "%s\t%f\n", bench_id, timing/1000000); + fclose(f); + + snprintf(file, sizeof(file), "%s/async_tasks_overhead_per_task%s.dat", output_dir, numberp); + f = fopen(file, "a"); + fprintf(f, "%s\t%f\n", bench_id, timing/ntasks); + fclose(f); + } + } + + for (buffer = 0; buffer < nbuffers; buffer++) + { + starpu_data_unregister(data_handles[buffer]); + starpu_free_noflag((void*)buffers[buffer], BUFFERSIZE*sizeof(float)); + } + + starpu_shutdown(); + free(tasks); + + return EXIT_SUCCESS; + +enodev: + fprintf(stderr, "WARNING: No one can execute this task\n"); + /* yes, we do not perform the computation but we did detect that no one + * could perform the kernel, so this is not an error from StarPU */ + starpu_shutdown(); + free(tasks); + return STARPU_TEST_SKIPPED; +} diff --git a/tests/microbenchs/bandwidth.c b/tests/microbenchs/bandwidth.c new file mode 100644 index 0000000..c96dcc4 --- /dev/null +++ b/tests/microbenchs/bandwidth.c @@ -0,0 +1,360 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include + +#include +#include "../helper.h" + +/* + * Measure the memory bandwidth available to kernels depending on the number of + * kernels and number of idle workers. + */ + +#if defined(STARPU_QUICK_CHECK) || defined(STARPU_SANITIZE_LEAK) || defined(STARPU_SANITIZE_ADDRESS) || defined(STARPU_SANITIZE_UNDEFINED) +static size_t size = 1024; +#else +/* Must be bigger than available cache size per core, 64MiB should be enough */ +static size_t size = 64UL << 20; +#endif +static unsigned cpustep = 0; + +static unsigned noalone = 0; +static unsigned iter = 30; +static unsigned total_ncpus; +static starpu_pthread_barrier_t barrier_begin, barrier_end; +static float *result; +static void **buffers; /* Indexed by logical core number */ +static char padding1[STARPU_CACHELINE_SIZE]; +static volatile char finished; +static char padding2[STARPU_CACHELINE_SIZE]; + +static unsigned interleave(unsigned i); + +/* Initialize the buffer locally */ +void initialize_buffer(void *foo) +{ + (void) foo; + unsigned id = starpu_worker_get_id(); +#ifdef STARPU_HAVE_POSIX_MEMALIGN + int ret = posix_memalign(&buffers[id], getpagesize(), 2*size); + STARPU_ASSERT(ret == 0); +#else + buffers[id] = malloc(2*size); +#endif + memset(buffers[id], 0, 2*size); +} + +/* Actual transfer codelet */ +void bw_func(void *descr[], void *arg) +{ + (void)descr; + int id = (uintptr_t) arg; + void *src = buffers[id]; + void *dst = (void*) ((uintptr_t)src + size); + unsigned i; + double start, stop; + + STARPU_PTHREAD_BARRIER_WAIT(&barrier_begin); + start = starpu_timing_now(); + for (i = 0; i < iter; i++) + { + memcpy(dst, src, size); + STARPU_SYNCHRONIZE(); + } + stop = starpu_timing_now(); + STARPU_PTHREAD_BARRIER_WAIT(&barrier_end); + finished = 1; + + result[id] = (size*iter) / (stop - start); +} + +static struct starpu_codelet bw_codelet = +{ + .cpu_funcs = {bw_func}, + .model = NULL, + .nbuffers = 0, +}; + +/* Codelet that waits for completion while doing lots of cpu yields (nop). */ +void nop_func(void *descr[], void *arg) +{ + (void)descr; + (void)arg; + STARPU_PTHREAD_BARRIER_WAIT(&barrier_begin); + while (!finished) + { + unsigned i; + for (i = 0; i < 1000000; i++) + STARPU_UYIELD(); + STARPU_SYNCHRONIZE(); + } +} + +static struct starpu_codelet nop_codelet = +{ + .cpu_funcs = {nop_func}, + .model = NULL, + .nbuffers = 0, +}; + +/* Codelet that waits for completion while aggressively reading the finished variable. */ +void sync_func(void *descr[], void *arg) +{ + (void)descr; + (void)arg; + STARPU_PTHREAD_BARRIER_WAIT(&barrier_begin); + while (!finished) + { + STARPU_VALGRIND_YIELD(); + STARPU_SYNCHRONIZE(); + } +} + +static struct starpu_codelet sync_codelet = +{ + .cpu_funcs = {sync_func}, + .model = NULL, + .nbuffers = 0, +}; + +static void usage(char **argv) +{ + fprintf(stderr, "Usage: %s [-n niter] [-s size (MB)] [-c cpustep] [-a]\n", argv[0]); + fprintf(stderr, "\t-n niter\tNumber of iterations\n"); + fprintf(stderr, "\t-s size\tBuffer size in MB\n"); + fprintf(stderr, "\t-c cpustep\tCpu number increment\n"); + fprintf(stderr, "\t-a Do not run the alone test\n"); + exit(EXIT_FAILURE); +} + +static void parse_args(int argc, char **argv) +{ + int c; + while ((c = getopt(argc, argv, "n:s:c:ah")) != -1) + switch(c) + { + case 'n': + iter = atoi(optarg); + break; + case 's': + size = (long)atoi(optarg) << 20; + break; + case 'c': + cpustep = atoi(optarg); + break; + case 'a': + noalone = 1; + break; + case 'h': + usage(argv); + break; + } +} + +static unsigned interleave(unsigned i) +{ + /* TODO: rather distribute over hierarchy */ + if (total_ncpus > 1) + return (i % (total_ncpus/2))*2 + i / (total_ncpus/2); + else + return 0; +} + +enum sleep_type +{ + PAUSE, + NOP, + SYNC, + SCHED, +}; + +static float bench(int *argc, char ***argv, unsigned nbusy, unsigned ncpus, int intl, enum sleep_type sleep) +{ + int ret; + unsigned i; + struct starpu_conf conf; + float bw; + + starpu_conf_init(&conf); + conf.precedence_over_environment_variables = 1; + starpu_conf_noworker(&conf); + conf.ncpus = ncpus; + + if (intl && sleep == PAUSE) + { + conf.use_explicit_workers_bindid = 1; + for (i = 0; i < ncpus; i++) + conf.workers_bindid[i] = interleave(i); + } + + ret = starpu_initialize(&conf, argc, argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + if (sleep == PAUSE || sleep == SCHED) + /* In these cases we don't have a task on each cpu */ + STARPU_PTHREAD_BARRIER_INIT(&barrier_begin, NULL, nbusy); + else + STARPU_PTHREAD_BARRIER_INIT(&barrier_begin, NULL, ncpus); + + STARPU_PTHREAD_BARRIER_INIT(&barrier_end, NULL, nbusy); + + finished = 0; + for (i = 0; i < ncpus; i++) + result[i] = NAN; + + for (i = 0; i < nbusy; i++) + { + struct starpu_task *task = starpu_task_create(); + task->cl = &bw_codelet; + + if (intl) + task->cl_arg = (void*) (uintptr_t) interleave(i); + else + task->cl_arg = (void*) (uintptr_t) i; + + task->execute_on_a_specific_worker = 1; + if (intl && sleep != PAUSE) /* In the pause case we interleaved above */ + task->workerid = interleave(i); + else + task->workerid = i; + + ret = starpu_task_submit(task); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + if (sleep != PAUSE && sleep != SCHED) + { + /* Add waiting tasks */ + for (; i < ncpus; i++) + { + struct starpu_task *task = starpu_task_create(); + switch (sleep) + { + case NOP: + task->cl = &nop_codelet; + break; + case SYNC: + task->cl = &sync_codelet; + break; + default: + STARPU_ASSERT(0); + } + task->execute_on_a_specific_worker = 1; + task->workerid = interleave(i); + ret = starpu_task_submit(task); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + } + + + starpu_task_wait_for_all(); + starpu_shutdown(); + + STARPU_PTHREAD_BARRIER_DESTROY(&barrier_begin); + STARPU_PTHREAD_BARRIER_DESTROY(&barrier_end); + + for (bw = 0., i = 0; i < nbusy; i++) + { + if (intl) + bw += result[interleave(i)]; + else + bw += result[i]; + } + return bw; +} + +int main(int argc, char **argv) +{ + int ret; + unsigned n; + struct starpu_conf conf; + float alone, alone_int, alone_int_nop, alone_int_sync, sched, sched_int; + + parse_args(argc, argv); + + starpu_conf_init(&conf); + conf.precedence_over_environment_variables = 1; + starpu_conf_noworker(&conf); + conf.ncpus = -1; + + ret = starpu_initialize(&conf, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + total_ncpus = starpu_cpu_worker_get_count(); + + buffers = malloc(total_ncpus * sizeof(*buffers)); + starpu_execute_on_each_worker_ex(initialize_buffer, NULL, STARPU_CPU, "initialize_buffer"); + starpu_shutdown(); + + if (total_ncpus == 0) + return STARPU_TEST_SKIPPED; + + result = malloc(total_ncpus * sizeof(result[0])); + + if (cpustep == 0) + { +#if defined(STARPU_QUICK_CHECK) || defined(STARPU_SANITIZE_LEAK) || defined(STARPU_SANITIZE_ADDRESS) + cpustep = total_ncpus / 2; +#elif defined(STARPU_LONG_CHECK) + cpustep = 1; +#else + cpustep = total_ncpus / 8; +#endif + if (cpustep == 0) + cpustep = 1; + } + + printf("# nw\ta comp.\t+sched\teff%%\ta scat.\t+nop\t+sync\t+sched\teff%% vs nop\n"); + for (n = cpustep; n <= total_ncpus; n += cpustep) + { + if (noalone) + { + alone = 0.; + alone_int = 0.; + alone_int_nop = 0.; + alone_int_sync = 0.; + } + else + { + alone = bench(&argc, &argv, n, n, 0, PAUSE); + alone_int = bench(&argc, &argv, n, n, 1, PAUSE); + alone_int_nop = bench(&argc, &argv, n, total_ncpus, 1, NOP); + alone_int_sync = bench(&argc, &argv, n, total_ncpus, 1, SYNC); + } + sched = bench(&argc, &argv, n, total_ncpus, 0, SCHED); + sched_int = bench(&argc, &argv, n, total_ncpus, 1, SCHED); + printf("%u\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\n", + n, + alone/1000, + sched/1000, sched*100/alone, + alone_int/1000, + alone_int_nop/1000, + alone_int_sync/1000, + sched_int/1000, sched_int*100/alone_int_nop); + fflush(stdout); + } + + free(result); + + for (n = 0; n < total_ncpus; n++) + free(buffers[n]); + free(buffers); + + return EXIT_SUCCESS; +} diff --git a/tests/microbenchs/bandwidth_scheds.sh b/tests/microbenchs/bandwidth_scheds.sh new file mode 100755 index 0000000..4d0f364 --- /dev/null +++ b/tests/microbenchs/bandwidth_scheds.sh @@ -0,0 +1,108 @@ +#!/bin/bash +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +if test -n "$STARPU_MICROBENCHS_DISABLED" ; then exit 77 ; fi + +set -e + +DIR=$(dirname $0) + +if [ -n "$STARPU_SCHED" ] +then + SCHEDS=$STARPU_SCHED + DEFAULT=$STARPU_SCHED +else + SCHEDS=`$DIR/../../tools/starpu_sched_display` + DEFAULT=eager +fi + +if [ -n "$STARPU_BENCH_DIR" ]; then + cat > bandwidth.gp << EOF +set term svg font ",12" size 1500,500 linewidth 0.5 +set output "bandwidth.svg" +set pointsize 0.3 +EOF +else + fast="-n 3 -c 4" + cat > bandwidth.gp << EOF +set term postscript eps enhanced color font ",18" +set output "bandwidth.eps" +set size 2,1 +EOF +fi + +cat >> bandwidth.gp << EOF +set key outside +set ylabel "GB/s" +set xlabel "ncores" + +plot \\ + "bandwidth-$DEFAULT.dat" using 1:5 with lines title "alone interleave", \\ + "bandwidth-$DEFAULT.dat" using 1:6 with lines title "nop", \\ + "bandwidth-$DEFAULT.dat" using 1:7 with lines title "sync", \\ + "bandwidth-$DEFAULT.dat" using 1:2 with lines title "alone contiguous", \\ +EOF + +run() +{ + sched=$1 + type=$2 + + if [ "$sched" != eager -a "$sched" != "$SCHEDS" ]; then + extra=-a + else + extra= + fi + + STARPU_BACKOFF_MIN=0 STARPU_BACKOFF_MAX=0 STARPU_SCHED=$sched $STARPU_SUB_PARALLEL $MS_LAUNCHER $STARPU_LAUNCH $DIR/bandwidth $fast $extra "$@" | tee bandwidth-$sched.dat + echo "\"bandwidth-$sched.dat\" using 1:3 with linespoints lt $type pt $type title \"$sched\", \\" >> bandwidth.gp + echo "\"bandwidth-$sched.dat\" using 1:8 with linespoints lt $type pt $type notitle, \\" >> bandwidth.gp +} + +if [ -n "$STARPU_SUB_PARALLEL" ] +then + type=1 + for sched in $SCHEDS + do + run $sched $type & + type=$((type+1)) + done + RESULT=0 + while true + do + set +e + wait -n + RET=$? + set -e + if [ $RET = 127 ] ; then break ; fi + if [ $RET != 0 -a $RET != 77 ] ; then RESULT=1 ; fi + done + exit $RESULT +else + type=1 + for sched in $SCHEDS + do + run $sched $type + type=$((type+1)) + done +fi + +if gnuplot bandwidth.gp ; then + if [ -n "$STARPU_BENCH_DIR" ]; then + cp bandwidth.svg $STARPU_BENCH_DIR/ + fi +fi diff --git a/tests/microbenchs/display_structures_size.c b/tests/microbenchs/display_structures_size.c new file mode 100644 index 0000000..2dc4e60 --- /dev/null +++ b/tests/microbenchs/display_structures_size.c @@ -0,0 +1,32 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include + +/* + * Display the sizes of various StarPU data structures + */ + +int main(int argc, char **argv) +{ + (void)argc; + (void)argv; + + _starpu_debug_display_structures_size(stderr); + + return EXIT_SUCCESS; +} diff --git a/tests/microbenchs/local_pingpong.c b/tests/microbenchs/local_pingpong.c new file mode 100644 index 0000000..65ece70 --- /dev/null +++ b/tests/microbenchs/local_pingpong.c @@ -0,0 +1,118 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include +#include +#include "../helper.h" + +/* + * Trigger a ping-pong test between two CUDA GPUs + */ + +static size_t vector_size = 1; + +#ifdef STARPU_QUICK_CHECK +static int niter = 16; +#else +static int niter = 1000; +#endif +//static unsigned cnt; + +//static unsigned finished = 0; + +starpu_data_handle_t v_handle; +static unsigned *v; + +static char worker_0_name[128]; +static char worker_1_name[128]; +static unsigned memory_node_0; +static unsigned memory_node_1; + +double start; +double end; + +int main(int argc, char **argv) +{ + int ret; + + ret = starpu_initialize(NULL, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + /* Create a piece of data */ + ret = starpu_malloc((void **)&v, vector_size); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_malloc"); + starpu_vector_data_register(&v_handle, STARPU_MAIN_RAM, (uintptr_t)v, vector_size, 1); + + /* Find a pair of memory nodes */ + if (starpu_cuda_worker_get_count() > 1) + { + /* Take the two devices that come first */ + int nworkers = (int)starpu_worker_get_count(); + + unsigned found_node_0 = 0; + + int w; + for (w = 0; w < nworkers; w++) + { + if (starpu_worker_get_type(w) == STARPU_CUDA_WORKER) + { + if (!found_node_0) + { + memory_node_0 = starpu_worker_get_memory_node(w); + starpu_worker_get_name(w, worker_0_name, 128); + found_node_0 = 1; + } + else + { + memory_node_1 = starpu_worker_get_memory_node(w); + starpu_worker_get_name(w, worker_1_name, 128); + break; + } + } + } + + fprintf(stderr, "Ping-pong will be done between %s (node %u) and %s (node %u)\n", + worker_0_name, memory_node_0, worker_1_name, memory_node_1); + } + + // unsigned iter; + + /* warm up */ + // unsigned nwarmupiter = 128; + _starpu_benchmark_ping_pong(v_handle, memory_node_0, memory_node_1, 128); + + start = starpu_timing_now(); + + _starpu_benchmark_ping_pong(v_handle, memory_node_0, memory_node_1, niter); + + end = starpu_timing_now(); + + double timing = end - start; + + fprintf(stderr, "Took %f ms\n", timing/1000); + fprintf(stderr, "Avg. transfer time : %f us\n", timing/(2*niter)); + + starpu_data_unregister(v_handle); + starpu_free_noflag(v, vector_size); + starpu_shutdown(); + + return EXIT_SUCCESS; +} diff --git a/tests/microbenchs/matrix_as_vector.c b/tests/microbenchs/matrix_as_vector.c new file mode 100644 index 0000000..d8acfc8 --- /dev/null +++ b/tests/microbenchs/matrix_as_vector.c @@ -0,0 +1,289 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../helper.h" + +#ifdef STARPU_USE_CUDA +# include +#endif + +/* + * Compare the efficiency of matrix and vector interfaces + */ + +#ifdef STARPU_QUICK_CHECK +#define LOOPS 5 +#elif !defined(STARPU_LONG_CHECK) +#define LOOPS 30 +#else +#define LOOPS 100 +#endif + +void vector_cpu_func(void *descr[], void *cl_arg) +{ + (void)cl_arg; + STARPU_SKIP_IF_VALGRIND; + + float *matrix = (float *)STARPU_VECTOR_GET_PTR(descr[0]); + int nx = STARPU_VECTOR_GET_NX(descr[0]); + int i; + float sum=0; + + for(i=0 ; i mean=%7f != %7f\n", nx, matrix[0], mean); + ret = EXIT_FAILURE; + } +end: + if (ret == -ENODEV) + fprintf(stderr, "# Uh, ENODEV?!"); + starpu_free_noflag(matrix, nx*sizeof(matrix[0])); + starpu_task_wait_for_all(); + return ret; +} + +#define NX_MIN 1024 +#ifdef STARPU_QUICK_CHECK +#define NX_MAX 1024*32 +#else +#define NX_MAX 1024*1024 +#endif + +static +int check_size_on_device(uint32_t where, char *device_name) +{ + int nx, ret; + struct starpu_codelet vector_codelet; + struct starpu_codelet matrix_codelet; + + fprintf(stderr, "# Device: %s\n", device_name); + fprintf(stderr, "# nx vector_timing matrix_timing\n"); + starpu_codelet_init(&vector_codelet); + vector_codelet.modes[0] = STARPU_RW; + vector_codelet.nbuffers = 1; + if (where == STARPU_CPU) vector_codelet.cpu_funcs[0] = vector_cpu_func; +#ifdef STARPU_USE_CUDA + if (where == STARPU_CUDA) + { + vector_codelet.cuda_funcs[0] = vector_cuda_func; + vector_codelet.cuda_flags[0] = STARPU_CUDA_ASYNC; + } +#endif +// if (where == STARPU_OPENCL) vector_codelet.opencl_funcs[0] = vector_opencl_func; + + starpu_codelet_init(&matrix_codelet); + matrix_codelet.modes[0] = STARPU_RW; + matrix_codelet.nbuffers = 1; + if (where == STARPU_CPU) matrix_codelet.cpu_funcs[0] = matrix_cpu_func; +#ifdef STARPU_USE_CUDA + if (where == STARPU_CUDA) + { + matrix_codelet.cuda_funcs[0] = matrix_cuda_func; + matrix_codelet.cuda_flags[0] = STARPU_CUDA_ASYNC; + } +#endif +// if (where == STARPU_OPENCL) matrix_codelet.opencl_funcs[0] = matrix_opencl_func; + + for(nx=NX_MIN ; nx<=NX_MAX ; nx*=2) + { + ret = check_size(nx, &vector_codelet, &matrix_codelet, device_name); + if (ret != EXIT_SUCCESS) break; + } + return ret; +} + +int main(void) +{ + int ret; + unsigned devices; +#ifdef STARPU_USE_CUDA + int cublas_version; +#endif + + ret = starpu_init(NULL); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + devices = starpu_cpu_worker_get_count(); + if (devices) + { + ret = check_size_on_device(STARPU_CPU, "STARPU_CPU"); + if (ret) goto error; + } + +#ifdef STARPU_USE_CUDA + devices = starpu_cuda_worker_get_count(); + if (devices) + { + cublasHandle_t handle; + cublasCreate(&handle); + cublasGetVersion(handle, &cublas_version); + cublasDestroy(handle); + + if (cublas_version >= 7050) + { + starpu_cublas_init(); + ret = check_size_on_device(STARPU_CUDA, "STARPU_CUDA"); + if (ret) goto error; + starpu_cublas_shutdown(); + } + } +#endif +#if 0 + devices = starpu_opencl_worker_get_count(); + if (devices) + { + ret = check_size_on_device(STARPU_OPENCL, "STARPU_OPENCL"); + if (ret) goto error; + } +#endif + +error: + if (ret == -ENODEV) ret=STARPU_TEST_SKIPPED; + + starpu_shutdown(); + STARPU_RETURN(ret); +} diff --git a/tests/microbenchs/microbench.sh b/tests/microbenchs/microbench.sh new file mode 100755 index 0000000..64ad411 --- /dev/null +++ b/tests/microbenchs/microbench.sh @@ -0,0 +1,118 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +# This provides a helper function to be used for microbenchs that should be run +# under the various schedulers. +# +# The caller should fill either the XFAIL or XSUCCESS variable with the list of +# schedulers which are supposed to fail or succeed, and then call test_scheds + +set -e + +# disable core generation +ulimit -c 0 + +# Testing a specific scheduler +if [ -n "$STARPU_SCHED" ] +then + SCHEDS=$STARPU_SCHED +else + SCHEDS=`$(dirname $0)/../../tools/starpu_sched_display` +fi + +run() +{ + sched=$1 + + set +e + STARPU_SCHED=$sched $STARPU_SUB_PARALLEL $MS_LAUNCHER $STARPU_LAUNCH $(dirname $0)/$TEST "$@" + ret=$? + set -e + if test $ret = 0 + then + ( echo PASS: STARPU_SCHED=$sched ./microbenchs/$TEST >&9 ) 2> /dev/null || true + echo "SUCCESS: STARPU_SCHED=$sched ./microbenchs/$TEST" + return 0 + fi + if test $ret = 77 + then + echo "SKIP: STARPU_SCHED=$sched ./microbenchs/$TEST" + return 0 + fi + + RESULT=0 + if [ -n "$XSUCCESS" ] + then + # We have a list of schedulers that are expected to + # succeed, others are allowed to fail + case " $XSUCCESS " in + *\ $sched\ *) + echo "FAIL: STARPU_SCHED=$sched ./microbenchs/$TEST" | ( tee /dev/tty || true ) + RESULT=1 + ;; + *) + echo "XFAIL: STARPU_SCHED=$sched ./microbenchs/$TEST" + ;; + esac + else + # We have a list of schedulers that are expected to + # fail, others are expected to succeed + case " $XFAIL " in + *\ $sched\ *) + echo "XFAIL: STARPU_SCHED=$sched ./microbenchs/$TEST" + ;; + *) + echo "FAIL: STARPU_SCHED=$sched ./microbenchs/$TEST" | ( tee /dev/tty || true ) + RESULT=1 + ;; + esac + fi + return $RESULT +} + +test_scheds() +{ + TEST=$1 + shift + + RESULT=0 + if [ -n "$STARPU_SUB_PARALLEL" ] + then + for sched in $SCHEDS + do + run $sched & + done + while true + do + set +e + wait -n + RET=$? + set -e + if [ $RET = 127 ] ; then break ; fi + if [ $RET != 0 -a $RET != 77 ] ; then RESULT=1 ; fi + done + else + for sched in $SCHEDS + do + set +e + run $sched + RET=$? + set -e + if [ $RET != 0 -a $RET != 77 ] ; then RESULT=1 ; fi + done + fi + exit $RESULT +} diff --git a/tests/microbenchs/parallel_dependent_homogeneous_tasks_data.c b/tests/microbenchs/parallel_dependent_homogeneous_tasks_data.c new file mode 100644 index 0000000..1c4d8f7 --- /dev/null +++ b/tests/microbenchs/parallel_dependent_homogeneous_tasks_data.c @@ -0,0 +1,161 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2016-2016 Bérangère Subervie + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include "../helper.h" + +/* Run a series of tasks with homogeneous execution time and share data to some extent */ + +#define TIME 0.010 +#ifdef STARPU_QUICK_CHECK +#define TASK_COEFFICIENT 5 +#define DATA_COEFFICIENT 5.5 +#define MARGIN 0.15 +#else +#define TASK_COEFFICIENT 10 +#define DATA_COEFFICIENT 10.5 +#define MARGIN 0.05 +#endif +#define SECONDS_SCALE_COEFFICIENT_TIMING_NOW 1000000 +#define NB_FLOAT 4000000 + +void wait_homogeneous(void *descr[], void *_args) +{ + (void)descr; + (void)_args; + starpu_sleep(TIME); +} + +double cost_function(struct starpu_task *t, struct starpu_perfmodel_arch *a, unsigned i) +{ + (void)t; (void)a; (void)i; + return TIME * 1000000; +} + +static struct starpu_perfmodel perf_model = +{ + .type = STARPU_PER_ARCH, + .arch_cost_function = cost_function, +}; + +static struct starpu_codelet cl = +{ + .cpu_funcs = { wait_homogeneous }, + .cuda_funcs = { wait_homogeneous }, + .opencl_funcs = { wait_homogeneous }, + .cpu_funcs_name = { "wait_homogeneous" }, + .nbuffers = 1, + .modes = {STARPU_RW}, + .flags = STARPU_CODELET_SIMGRID_EXECUTE, + .model = &perf_model, +}; + +int main(int argc, char *argv[]) +{ + int ret; + + ret = starpu_initialize(NULL, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + unsigned nb_tasks, nb_data, nb_workers; + double begin_time, end_time, time_m, time_s, speed_up, expected_speed_up, percentage_expected_speed_up; + bool check, check_sup; + + nb_workers = starpu_worker_get_count_by_type(STARPU_CPU_WORKER) + starpu_worker_get_count_by_type(STARPU_CUDA_WORKER) + starpu_worker_get_count_by_type(STARPU_OPENCL_WORKER); + nb_tasks = nb_workers*TASK_COEFFICIENT*DATA_COEFFICIENT; + nb_data = nb_workers*DATA_COEFFICIENT; + + /* We consider a vector of float that is initialized just as any of C + * data */ + float *vector[nb_data]; + starpu_data_handle_t vector_handle[nb_data]; + unsigned i,j; + for (j = 0; j < nb_data; j++) + { + vector[j] = malloc(NB_FLOAT * sizeof(float)); +#ifndef STARPU_SIMGRID + for (i = 0; i < NB_FLOAT; i++) + vector[j][i] = (i+1.0f); +#endif + + /* Tell StaPU to associate the "vector" vector with the "vector_handle" + * identifier. When a task needs to access a piece of data, it should + * refer to the handle that is associated to it. + * In the case of the "vector" data interface: + * - the first argument of the registration method is a pointer to the + * handle that should describe the data + * - the second argument is the memory node where the data (ie. "vector") + * resides initially: STARPU_MAIN_RAM stands for an address in main memory, as + * opposed to an address on a GPU for instance. + * - the third argument is the address of the vector in RAM + * - the fourth argument is the number of elements in the vector + * - the fifth argument is the size of each element. + */ + starpu_vector_data_register(&vector_handle[j], STARPU_MAIN_RAM, (uintptr_t)vector[j], NB_FLOAT, sizeof(vector[0][0])); + } + + begin_time = starpu_timing_now(); + + /*execution des tasks*/ + + for (i=0; i= ((1 - MARGIN) * expected_speed_up); + check_sup = speed_up <= ((1 + MARGIN) * expected_speed_up); + + FPRINTF(stderr, "measured time = %f seconds\n", time_m); + FPRINTF(stderr, "sequential time = %f seconds\n", time_s); + FPRINTF(stderr, "speed up = %f\n", speed_up); + FPRINTF(stderr, "number of workers = %u\n", nb_workers); + FPRINTF(stderr, "number of tasks = %u\n", nb_tasks); + FPRINTF(stderr, "expected speed up = %f\n", expected_speed_up); + FPRINTF(stderr, "percentage of expected speed up %.2f%%\n", percentage_expected_speed_up); + + starpu_shutdown(); + for (j = 0; j < nb_data; j++) + free(vector[j]); + + //test reussi ou test echoue + if (check && check_sup) + { + return EXIT_SUCCESS; + } + else + { + return EXIT_FAILURE; + } +} diff --git a/tests/microbenchs/parallel_dependent_homogeneous_tasks_data.sh b/tests/microbenchs/parallel_dependent_homogeneous_tasks_data.sh new file mode 100755 index 0000000..a3447a2 --- /dev/null +++ b/tests/microbenchs/parallel_dependent_homogeneous_tasks_data.sh @@ -0,0 +1,24 @@ +#!/bin/bash +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +if test -n "$STARPU_MICROBENCHS_DISABLED" ; then exit 77 ; fi + +source $(dirname $0)/microbench.sh + +XSUCCESS="dmda dmdap dmdar dmdas dmdasd modular-dmda modular-dmdap modular-dmdar modular-dmdas" + +test_scheds parallel_dependent_homogeneous_tasks_data diff --git a/tests/microbenchs/parallel_independent_heterogeneous_tasks.c b/tests/microbenchs/parallel_independent_heterogeneous_tasks.c new file mode 100644 index 0000000..10db6d3 --- /dev/null +++ b/tests/microbenchs/parallel_independent_heterogeneous_tasks.c @@ -0,0 +1,147 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2016-2016 Bérangère Subervie + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include "../helper.h" + +/* Run a series of independent tasks with heterogeneous execution time */ + +#define TIME 0.010 +#ifdef STARPU_QUICK_CHECK +#define TASK_COEFFICIENT 20 +#define MARGIN 0.20 +#else +#define TASK_COEFFICIENT 100 +#define MARGIN 0.10 +#endif +#define TIME_CUDA_COEFFICIENT 10 +#define TIME_OPENCL_COEFFICIENT 5 +#define SECONDS_SCALE_COEFFICIENT_TIMING_NOW 1000000 + +void wait_CPU(void *descr[], void *_args) +{ + (void)descr; + (void)_args; + starpu_sleep(TIME); +} + +void wait_CUDA(void *descr[], void *_args) +{ + (void)descr; + (void)_args; + starpu_sleep(TIME/TIME_CUDA_COEFFICIENT); +} + +void wait_OPENCL(void *descr[], void *_args) +{ + (void)descr; + (void)_args; + starpu_sleep(TIME/TIME_OPENCL_COEFFICIENT); +} + +double cost_function(struct starpu_task *t, struct starpu_perfmodel_arch *a, unsigned i) +{ + (void) t; (void) i; + STARPU_ASSERT(a->ndevices == 1); + if (a->devices[0].type == STARPU_CPU_WORKER) + { + STARPU_ASSERT(a->devices[0].ncores == 1); + return TIME * 1000000; + } + else if (a->devices[0].type == STARPU_CUDA_WORKER) + { + return TIME/TIME_CUDA_COEFFICIENT * 1000000; + } + else if (a->devices[0].type == STARPU_OPENCL_WORKER) + { + return TIME/TIME_OPENCL_COEFFICIENT * 1000000; + } + STARPU_ASSERT(0); + return 0.0; +} + +static struct starpu_perfmodel perf_model = +{ + .type = STARPU_PER_ARCH, + .arch_cost_function = cost_function, +}; + +static struct starpu_codelet cl = +{ + .cpu_funcs = { wait_CPU }, + .cuda_funcs = { wait_CUDA }, + .opencl_funcs = { wait_OPENCL }, + .cpu_funcs_name = { "wait_CPU" }, + .nbuffers = 0, + .flags = STARPU_CODELET_SIMGRID_EXECUTE, + .model = &perf_model, +}; + +int main(int argc, char *argv[]) +{ + int ret; + + ret = starpu_initialize(NULL, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + unsigned nb_tasks, nb_workers_CPU, nb_workers_CUDA, nb_workers_OPENCL, i; + double begin_time, end_time, time_m, time_s, speed_up, expected_speed_up, percentage_expected_speed_up; + bool check, check_sup; + + nb_workers_CPU = starpu_worker_get_count_by_type(STARPU_CPU_WORKER); + nb_workers_CUDA = starpu_worker_get_count_by_type(STARPU_CUDA_WORKER); + nb_workers_OPENCL = starpu_worker_get_count_by_type(STARPU_OPENCL_WORKER); + nb_tasks = (nb_workers_CPU + nb_workers_CUDA + nb_workers_OPENCL)*TASK_COEFFICIENT; + + begin_time = starpu_timing_now(); + + /*execution des tasks*/ + + for (i=0; i= (1 - MARGIN) * expected_speed_up; + check_sup = speed_up <= (1 + MARGIN) * expected_speed_up; + + printf("measured time = %f seconds\nsequential time = %f seconds\nspeed up = %f\nnumber of workers CPU = %u\nnumber of workers CUDA = %u\nnumber of workers OPENCL = %u\nnumber of tasks = %u\nexpected speed up = %f\npercentage of expected speed up = %.2f%%\n", time_m, time_s, speed_up, nb_workers_CPU, nb_workers_CUDA, nb_workers_OPENCL, nb_tasks, expected_speed_up, percentage_expected_speed_up); + + starpu_shutdown(); + + //test reussi ou test echoue + if (check && check_sup) + { + return EXIT_SUCCESS; + } + else + { + return EXIT_FAILURE; + } +} diff --git a/tests/microbenchs/parallel_independent_heterogeneous_tasks.sh b/tests/microbenchs/parallel_independent_heterogeneous_tasks.sh new file mode 100755 index 0000000..444a324 --- /dev/null +++ b/tests/microbenchs/parallel_independent_heterogeneous_tasks.sh @@ -0,0 +1,24 @@ +#!/bin/bash +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +if test -n "$STARPU_MICROBENCHS_DISABLED" ; then exit 77 ; fi + +source $(dirname $0)/microbench.sh + +XFAIL="lws ws eager prio modular-prio modular-eager modular-eager-prio modular-eager-prefetching modular-prio-prefetching modular-random modular-random-prio modular-random-prefetching modular-random-prio-prefetching modular-prandom modular-prandom-prio modular-ws modular-heft modular-heft-prio modular-heft2 modular-heteroprio modular-gemm random peager heteroprio graph_test" + +test_scheds parallel_independent_heterogeneous_tasks diff --git a/tests/microbenchs/parallel_independent_heterogeneous_tasks_data.c b/tests/microbenchs/parallel_independent_heterogeneous_tasks_data.c new file mode 100644 index 0000000..1e82932 --- /dev/null +++ b/tests/microbenchs/parallel_independent_heterogeneous_tasks_data.c @@ -0,0 +1,186 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2016-2016 Bérangère Subervie + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include "../helper.h" + +/* Run a series of independent tasks with heterogeneous execution time and independent data */ + +#define TIME 0.010 +#ifdef STARPU_QUICK_CHECK +#define TASK_COEFFICIENT 20 +#define MARGIN 0.20 +#else +#define TASK_COEFFICIENT 100 +#define MARGIN 0.10 +#endif +#define TIME_CUDA_COEFFICIENT 10 +#define TIME_OPENCL_COEFFICIENT 5 +#define SECONDS_SCALE_COEFFICIENT_TIMING_NOW 1000000 +#define NB_FLOAT 400000 + +void wait_CPU(void *descr[], void *_args) +{ + (void)descr; + (void)_args; + starpu_sleep(TIME); +} + +void wait_CUDA(void *descr[], void *_args) +{ + (void)descr; + (void)_args; + starpu_sleep(TIME/TIME_CUDA_COEFFICIENT); +} + +void wait_OPENCL(void *descr[], void *_args) +{ + (void)descr; + (void)_args; + starpu_sleep(TIME/TIME_OPENCL_COEFFICIENT); +} + +double cost_function(struct starpu_task *t, struct starpu_perfmodel_arch *a, unsigned i) +{ + (void) t; (void) i; + STARPU_ASSERT(a->ndevices == 1); + if (a->devices[0].type == STARPU_CPU_WORKER) + { + STARPU_ASSERT(a->devices[0].ncores == 1); + return TIME * 1000000; + } + else if (a->devices[0].type == STARPU_CUDA_WORKER) + { + return TIME/TIME_CUDA_COEFFICIENT * 1000000; + } + else if (a->devices[0].type == STARPU_OPENCL_WORKER) + { + return TIME/TIME_OPENCL_COEFFICIENT * 1000000; + } + STARPU_ASSERT(0); + return 0.0; +} + +static struct starpu_perfmodel perf_model = +{ + .type = STARPU_PER_ARCH, + .arch_cost_function = cost_function, +}; + +static struct starpu_codelet cl = +{ + .cpu_funcs = { wait_CPU }, + .cuda_funcs = { wait_CUDA }, + .opencl_funcs = { wait_OPENCL }, + .cpu_funcs_name = { "wait_CPU" }, + .nbuffers = 1, + .modes = {STARPU_RW}, + .flags = STARPU_CODELET_SIMGRID_EXECUTE, + .model = &perf_model, +}; + +int main(int argc, char *argv[]) +{ + int ret; + + ret = starpu_initialize(NULL, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + unsigned nb_tasks, nb_workers_CPU, nb_workers_CUDA, nb_workers_OPENCL; + double begin_time, end_time, time_m, time_s, speed_up, expected_speed_up, percentage_expected_speed_up; + bool check, check_sup; + + nb_workers_CPU = starpu_worker_get_count_by_type(STARPU_CPU_WORKER); + nb_workers_CUDA = starpu_worker_get_count_by_type(STARPU_CUDA_WORKER); + nb_workers_OPENCL = starpu_worker_get_count_by_type(STARPU_OPENCL_WORKER); + nb_tasks = (nb_workers_CPU + nb_workers_CUDA + nb_workers_OPENCL)*TASK_COEFFICIENT; + + /* We consider a vector of float that is initialized just as any of C + * data */ + float *vector[nb_tasks]; + starpu_data_handle_t vector_handle[nb_tasks]; + unsigned i,j; + for (j = 0; j < nb_tasks; j++) + { + starpu_malloc((void **)&vector[j], NB_FLOAT * sizeof(float)); +#ifndef STARPU_SIMGRID + for (i = 0; i < NB_FLOAT; i++) + vector[j][i] = (i+1.0f); +#endif + + /* Tell StaPU to associate the "vector" vector with the "vector_handle" + * identifier. When a task needs to access a piece of data, it should + * refer to the handle that is associated to it. + * In the case of the "vector" data interface: + * - the first argument of the registration method is a pointer to the + * handle that should describe the data + * - the second argument is the memory node where the data (ie. "vector") + * resides initially: STARPU_MAIN_RAM stands for an address in main memory, as + * opposed to an address on a GPU for instance. + * - the third argument is the address of the vector in RAM + * - the fourth argument is the number of elements in the vector + * - the fifth argument is the size of each element. + */ + starpu_vector_data_register(&vector_handle[j], STARPU_MAIN_RAM, (uintptr_t)vector[j], NB_FLOAT, sizeof(vector[0][0])); + } + + + + begin_time = starpu_timing_now(); + + /*execution des tasks*/ + + for (i=0; i= ((1 - MARGIN) * expected_speed_up); + check_sup = speed_up <= ((1 + MARGIN) * expected_speed_up); + + printf("measured time = %f seconds\nsequential time = %f seconds\nspeed up = %f\nnumber of workers CPU = %u\nnumber of workers CUDA = %u\nnumber of workers OPENCL = %u\nnumber of tasks = %u\nexpected speed up = %f\npercentage of expected speed up %.2f%%\n", time_m, time_s, speed_up, nb_workers_CPU, nb_workers_CUDA, nb_workers_OPENCL, nb_tasks, expected_speed_up, percentage_expected_speed_up); + + starpu_shutdown(); + for (j = 0; j < nb_tasks; j++) + starpu_free_noflag(vector[j], NB_FLOAT * sizeof(float)); + + //test reussi ou test echoue + if (check && check_sup) + { + return EXIT_SUCCESS; + } + else + { + return EXIT_FAILURE; + } +} diff --git a/tests/microbenchs/parallel_independent_heterogeneous_tasks_data.sh b/tests/microbenchs/parallel_independent_heterogeneous_tasks_data.sh new file mode 100755 index 0000000..e397a4b --- /dev/null +++ b/tests/microbenchs/parallel_independent_heterogeneous_tasks_data.sh @@ -0,0 +1,24 @@ +#!/bin/bash +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +if test -n "$STARPU_MICROBENCHS_DISABLED" ; then exit 77 ; fi + +source $(dirname $0)/microbench.sh + +XSUCCESS="dmda dmdap dmdar dmdas dmdasd modular-dmda modular-dmdap modular-dmdar modular-dmdas pheft" + +test_scheds parallel_independent_heterogeneous_tasks_data diff --git a/tests/microbenchs/parallel_independent_homogeneous_tasks.c b/tests/microbenchs/parallel_independent_homogeneous_tasks.c new file mode 100644 index 0000000..b9f8fa0 --- /dev/null +++ b/tests/microbenchs/parallel_independent_homogeneous_tasks.c @@ -0,0 +1,116 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2016-2016 Bérangère Subervie + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include "../helper.h" + +/* Run a series of independent tasks with homogeneous execution time */ + +#define TIME 0.010 +#ifdef STARPU_QUICK_CHECK +#define TASK_COEFFICIENT 20 +#define MARGIN 0.15 +#else +#define TASK_COEFFICIENT 100 +#define MARGIN 0.05 +#endif +#define SECONDS_SCALE_COEFFICIENT_TIMING_NOW 1000000 + +void wait_homogeneous(void *descr[], void *_args) +{ + (void)descr; + (void)_args; + starpu_sleep(TIME); +} + +double cost_function(struct starpu_task *t, struct starpu_perfmodel_arch *a, unsigned i) +{ + (void)t; + (void)a; + (void)i; + return TIME * 1000000; +} + +static struct starpu_perfmodel perf_model = +{ + .type = STARPU_PER_ARCH, + .arch_cost_function = cost_function, +}; + +static struct starpu_codelet cl = +{ + .cpu_funcs = { wait_homogeneous }, + .cuda_funcs = { wait_homogeneous }, + .opencl_funcs = { wait_homogeneous }, + .cpu_funcs_name = { "wait_homogeneous" }, + .nbuffers = 0, + .flags = STARPU_CODELET_SIMGRID_EXECUTE, + .model = &perf_model, +}; + +int main(int argc, char *argv[]) +{ + int ret; + + ret = starpu_initialize(NULL, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + unsigned nb_tasks, nb_workers, i; + double begin_time, end_time, time_m, time_s, speed_up, expected_speed_up, percentage_expected_speed_up; + bool check, check_sup; + + nb_workers = starpu_worker_get_count_by_type(STARPU_CPU_WORKER) + starpu_worker_get_count_by_type(STARPU_CUDA_WORKER) + starpu_worker_get_count_by_type(STARPU_OPENCL_WORKER); + nb_tasks = nb_workers*TASK_COEFFICIENT; + + begin_time = starpu_timing_now(); + + /*execution des tasks*/ + + for (i=0; i= ((1 - MARGIN) * expected_speed_up); + check_sup = speed_up <= ((1 + MARGIN) * expected_speed_up); + + printf("measured time = %f seconds\nsequential time = %f seconds\nspeed up = %f\nnumber of workers = %u\nnumber of tasks = %u\nexpected speed up = %f\npercentage of expected speed up = %.2f%%\n", time_m, time_s, speed_up, nb_workers, nb_tasks, expected_speed_up, percentage_expected_speed_up); + + starpu_shutdown(); + + //test reussi ou test echoue + if (check && check_sup) + { + return EXIT_SUCCESS; + } + else + { + return EXIT_FAILURE; + } +} diff --git a/tests/microbenchs/parallel_independent_homogeneous_tasks.sh b/tests/microbenchs/parallel_independent_homogeneous_tasks.sh new file mode 100755 index 0000000..6b08ee5 --- /dev/null +++ b/tests/microbenchs/parallel_independent_homogeneous_tasks.sh @@ -0,0 +1,24 @@ +#!/bin/bash +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +if test -n "$STARPU_MICROBENCHS_DISABLED" ; then exit 77 ; fi + +source $(dirname $0)/microbench.sh + +XFAIL="modular-eager-prefetching modular-prio-prefetching modular-random modular-random-prio modular-random-prefetching modular-random-prio-prefetching modular-prandom modular-prandom-prio modular-ws modular-heft modular-heft-prio modular-heft2 modular-heteroprio modular-gemm random peager heteroprio graph_test" + +test_scheds parallel_independent_homogeneous_tasks diff --git a/tests/microbenchs/parallel_independent_homogeneous_tasks_data.c b/tests/microbenchs/parallel_independent_homogeneous_tasks_data.c new file mode 100644 index 0000000..66e5f2a --- /dev/null +++ b/tests/microbenchs/parallel_independent_homogeneous_tasks_data.c @@ -0,0 +1,153 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2016-2016 Bérangère Subervie + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include "../helper.h" + +/* Run a series of independent tasks with homogeneous execution time and independent data */ + +#define TIME 0.010 +#ifdef STARPU_QUICK_CHECK +#define TASK_COEFFICIENT 20 +#define MARGIN 0.20 +#else +#define TASK_COEFFICIENT 100 +#define MARGIN 0.10 +#endif +#define SECONDS_SCALE_COEFFICIENT_TIMING_NOW 1000000 +#define NB_FLOAT 4000000 + +void wait_homogeneous(void *descr[], void *_args) +{ + (void)descr; + (void)_args; + starpu_sleep(TIME); +} + +double cost_function(struct starpu_task *t, struct starpu_perfmodel_arch *a, unsigned i) +{ + (void)t; (void)a; (void)i; + return TIME * 1000000; +} + +static struct starpu_perfmodel perf_model = +{ + .type = STARPU_PER_ARCH, + .arch_cost_function = cost_function, +}; + +static struct starpu_codelet cl = +{ + .cpu_funcs = { wait_homogeneous }, + .cuda_funcs = { wait_homogeneous }, + .opencl_funcs = { wait_homogeneous }, + .cpu_funcs_name = { "wait_homogeneous" }, + .nbuffers = 1, + .modes = {STARPU_RW}, + .flags = STARPU_CODELET_SIMGRID_EXECUTE, + .model = &perf_model, +}; + +int main(int argc, char *argv[]) +{ + int ret; + + ret = starpu_initialize(NULL, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + unsigned nb_tasks, nb_workers; + double begin_time, end_time, time_m, time_s, speed_up, expected_speed_up, percentage_expected_speed_up; + bool check, check_sup; + + nb_workers = starpu_worker_get_count_by_type(STARPU_CPU_WORKER) + starpu_worker_get_count_by_type(STARPU_CUDA_WORKER) + starpu_worker_get_count_by_type(STARPU_OPENCL_WORKER); + nb_tasks = nb_workers*TASK_COEFFICIENT; + + /* We consider a vector of float that is initialized just as any of C + * data */ + float *vector[nb_tasks]; + starpu_data_handle_t vector_handle[nb_tasks]; + unsigned i,j; + for (j = 0; j < nb_tasks; j++) + { + vector[j] = malloc(NB_FLOAT * sizeof(float)); +#ifndef STARPU_SIMGRID + for (i = 0; i < NB_FLOAT; i++) + vector[j][i] = (i+1.0f); +#endif + + /* Tell StaPU to associate the "vector" vector with the "vector_handle" + * identifier. When a task needs to access a piece of data, it should + * refer to the handle that is associated to it. + * In the case of the "vector" data interface: + * - the first argument of the registration method is a pointer to the + * handle that should describe the data + * - the second argument is the memory node where the data (ie. "vector") + * resides initially: STARPU_MAIN_RAM stands for an address in main memory, as + * opposed to an address on a GPU for instance. + * - the third argument is the address of the vector in RAM + * - the fourth argument is the number of elements in the vector + * - the fifth argument is the size of each element. + */ + starpu_vector_data_register(&vector_handle[j], STARPU_MAIN_RAM, (uintptr_t)vector[j], NB_FLOAT, sizeof(vector[0][0])); + } + + + + begin_time = starpu_timing_now(); + + /*execution des tasks*/ + + for (i=0; i= ((1 - MARGIN) * expected_speed_up); + check_sup = speed_up <= ((1 + MARGIN) * expected_speed_up); + + printf("measured time = %f seconds\nsequential time = %f seconds\nspeed up = %f\nnumber of workers = %u\nnumber of tasks = %u\nexpected speed up = %f\npercentage of expected speed up %.2f%%\n", time_m, time_s, speed_up, nb_workers, nb_tasks, expected_speed_up, percentage_expected_speed_up); + + starpu_shutdown(); + for (j = 0; j < nb_tasks; j++) + free(vector[j]); + + //test reussi ou test echoue + if (check && check_sup) + { + return EXIT_SUCCESS; + } + else + { + return EXIT_FAILURE; + } +} diff --git a/tests/microbenchs/parallel_independent_homogeneous_tasks_data.sh b/tests/microbenchs/parallel_independent_homogeneous_tasks_data.sh new file mode 100755 index 0000000..47b6d90 --- /dev/null +++ b/tests/microbenchs/parallel_independent_homogeneous_tasks_data.sh @@ -0,0 +1,24 @@ +#!/bin/bash +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +if test -n "$STARPU_MICROBENCHS_DISABLED" ; then exit 77 ; fi + +source $(dirname $0)/microbench.sh + +XSUCCESS="dmda dmdap dmdar dmdas dmdasd modular-dmda modular-dmdap modular-dmdar modular-dmdas pheft" + +test_scheds parallel_independent_homogeneous_tasks_data diff --git a/tests/microbenchs/parallel_redux_heterogeneous_tasks_data.c b/tests/microbenchs/parallel_redux_heterogeneous_tasks_data.c new file mode 100644 index 0000000..3188000 --- /dev/null +++ b/tests/microbenchs/parallel_redux_heterogeneous_tasks_data.c @@ -0,0 +1,219 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2016-2016 Bérangère Subervie + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include "../helper.h" + +/* Run a series of tasks with heterogeneous execution time and redux data */ + +#define TIME 0.010 +#ifdef STARPU_QUICK_CHECK +#define TASK_COEFFICIENT 20 +#define MARGIN 0.20 +#else +#define TASK_COEFFICIENT 100 +#define MARGIN 0.10 +#endif +#define TIME_CUDA_COEFFICIENT 10 +#define TIME_OPENCL_COEFFICIENT 5 +#define SECONDS_SCALE_COEFFICIENT_TIMING_NOW 1000000 +#define NB_FLOAT 400000 + +void wait_CPU(void *descr[], void *_args) +{ + (void)descr; + (void)_args; + starpu_sleep(TIME); +} + +void wait_CUDA(void *descr[], void *_args) +{ + (void)descr; + (void)_args; + starpu_sleep(TIME/TIME_CUDA_COEFFICIENT); +} + +void wait_OPENCL(void *descr[], void *_args) +{ + (void)descr; + (void)_args; + starpu_sleep(TIME/TIME_OPENCL_COEFFICIENT); +} + +double cost_function(struct starpu_task *t, struct starpu_perfmodel_arch *a, unsigned i) +{ + (void) t; (void) i; + STARPU_ASSERT(a->ndevices == 1); + if (a->devices[0].type == STARPU_CPU_WORKER) + { + STARPU_ASSERT(a->devices[0].ncores == 1); + return TIME * 1000000; + } + else if (a->devices[0].type == STARPU_CUDA_WORKER) + { + return TIME/TIME_CUDA_COEFFICIENT * 1000000; + } + else if (a->devices[0].type == STARPU_OPENCL_WORKER) + { + return TIME/TIME_OPENCL_COEFFICIENT * 1000000; + } + STARPU_ASSERT(0); + return 0.0; +} + +static struct starpu_perfmodel perf_model = +{ + .type = STARPU_PER_ARCH, + .arch_cost_function = cost_function, +}; + +static struct starpu_codelet cl = +{ + .cpu_funcs = { wait_CPU }, + .cuda_funcs = { wait_CUDA }, + .opencl_funcs = { wait_OPENCL }, + .cpu_funcs_name = { "wait_CPU" }, + .nbuffers = 1, + .modes = {STARPU_REDUX}, + .flags = STARPU_CODELET_SIMGRID_EXECUTE, + .model = &perf_model, + .name = "cl", +}; + +static struct starpu_perfmodel perf_model_init = +{ + .type = STARPU_PER_ARCH, + .arch_cost_function = cost_function, +}; + +static struct starpu_codelet cl_init = +{ + .cpu_funcs = { wait_CPU }, + .cuda_funcs = { wait_CUDA }, + .opencl_funcs = { wait_OPENCL }, + .cpu_funcs_name = { "wait_CPU" }, + .nbuffers = 1, + .modes = {STARPU_RW}, + .flags = STARPU_CODELET_SIMGRID_EXECUTE, + .model = &perf_model_init, + .name = "init", +}; + +static struct starpu_perfmodel perf_model_redux = +{ + .type = STARPU_PER_ARCH, + .arch_cost_function = cost_function, +}; + +static struct starpu_codelet cl_redux = +{ + .cpu_funcs = { wait_CPU }, + .cuda_funcs = { wait_CUDA }, + .opencl_funcs = { wait_OPENCL }, + .cpu_funcs_name = { "wait_CPU" }, + .nbuffers = 2, + .modes = {STARPU_RW|STARPU_COMMUTE, STARPU_R}, + .flags = STARPU_CODELET_SIMGRID_EXECUTE, + .model = &perf_model_redux, + .name = "redux", +}; + +int main(int argc, char *argv[]) +{ + int ret; + + ret = starpu_initialize(NULL, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + unsigned nb_tasks, nb_workers_CPU, nb_workers_CUDA, nb_workers_OPENCL; + double begin_time, end_time, time_m, time_s, speed_up, expected_speed_up, percentage_expected_speed_up; + bool check, check_sup; + + nb_workers_CPU = starpu_worker_get_count_by_type(STARPU_CPU_WORKER); + nb_workers_CUDA = starpu_worker_get_count_by_type(STARPU_CUDA_WORKER); + nb_workers_OPENCL = starpu_worker_get_count_by_type(STARPU_OPENCL_WORKER); + nb_tasks = (nb_workers_CPU + nb_workers_CUDA + nb_workers_OPENCL)*TASK_COEFFICIENT; + + /* We consider a vector of float that is initialized just as any of C + * data */ + float *vector; + starpu_data_handle_t vector_handle; + unsigned i; + vector = calloc(NB_FLOAT, sizeof(float)); +#ifndef STARPU_SIMGRID + for (i = 0; i < NB_FLOAT; i++) + vector[i] = (i+1.0f); +#endif + + /* Tell StaPU to associate the "vector" vector with the "vector_handle" + * identifier. When a task needs to access a piece of data, it should + * refer to the handle that is associated to it. + * In the case of the "vector" data interface: + * - the first argument of the registration method is a pointer to the + * handle that should describe the data + * - the second argument is the memory node where the data (ie. "vector") + * resides initially: STARPU_MAIN_RAM stands for an address in main memory, as + * opposed to an address on a GPU for instance. + * - the third argument is the address of the vector in RAM + * - the fourth argument is the number of elements in the vector + * - the fifth argument is the size of each element. + */ + starpu_vector_data_register(&vector_handle, STARPU_MAIN_RAM, (uintptr_t)vector, NB_FLOAT, sizeof(vector[0])); + starpu_data_set_reduction_methods(vector_handle, &cl_redux, &cl_init); + + begin_time = starpu_timing_now(); + + /*execution des tasks*/ + + for (i=0; i= ((1 - MARGIN) * expected_speed_up); + check_sup = speed_up <= ((1 + MARGIN) * expected_speed_up); + + printf("measured time = %f seconds\nsequential time = %f seconds\nspeed up = %f\nnumber of workers CPU = %u\nnumber of workers CUDA = %u\nnumber of workers OPENCL = %u\nnumber of tasks = %u\nexpected speed up = %f\npercentage of expected speed up %.2f%%\n", time_m, time_s, speed_up, nb_workers_CPU, nb_workers_CUDA, nb_workers_OPENCL, nb_tasks, expected_speed_up, percentage_expected_speed_up); + + starpu_shutdown(); + free(vector); + + //test reussi ou test echoue + if (check && check_sup) + { + return EXIT_SUCCESS; + } + else + { + return EXIT_FAILURE; + } +} diff --git a/tests/microbenchs/parallel_redux_heterogeneous_tasks_data.sh b/tests/microbenchs/parallel_redux_heterogeneous_tasks_data.sh new file mode 100755 index 0000000..e397a4b --- /dev/null +++ b/tests/microbenchs/parallel_redux_heterogeneous_tasks_data.sh @@ -0,0 +1,24 @@ +#!/bin/bash +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +if test -n "$STARPU_MICROBENCHS_DISABLED" ; then exit 77 ; fi + +source $(dirname $0)/microbench.sh + +XSUCCESS="dmda dmdap dmdar dmdas dmdasd modular-dmda modular-dmdap modular-dmdar modular-dmdas pheft" + +test_scheds parallel_independent_heterogeneous_tasks_data diff --git a/tests/microbenchs/parallel_redux_homogeneous_tasks_data.c b/tests/microbenchs/parallel_redux_homogeneous_tasks_data.c new file mode 100644 index 0000000..119e1ad --- /dev/null +++ b/tests/microbenchs/parallel_redux_homogeneous_tasks_data.c @@ -0,0 +1,186 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2016-2016 Bérangère Subervie + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include "../helper.h" + +/* Run a series of tasks with homogeneous execution time and redux data */ + +#define TIME 0.010 +#ifdef STARPU_QUICK_CHECK +#define TASK_COEFFICIENT 20 +#define MARGIN 0.20 +#else +#define TASK_COEFFICIENT 100 +#define MARGIN 0.10 +#endif +#define SECONDS_SCALE_COEFFICIENT_TIMING_NOW 1000000 +#define NB_FLOAT 4000000 + +void wait_homogeneous(void *descr[], void *_args) +{ + (void)descr; + (void)_args; + starpu_sleep(TIME); +} + +double cost_function(struct starpu_task *t, struct starpu_perfmodel_arch *a, unsigned i) +{ + (void)t; (void)a; (void)i; + return TIME * 1000000; +} + +static struct starpu_perfmodel perf_model = +{ + .type = STARPU_PER_ARCH, + .arch_cost_function = cost_function, +}; + +static struct starpu_codelet cl = +{ + .cpu_funcs = { wait_homogeneous }, + .cuda_funcs = { wait_homogeneous }, + .opencl_funcs = { wait_homogeneous }, + .cpu_funcs_name = { "wait_homogeneous" }, + .nbuffers = 1, + .modes = {STARPU_REDUX}, + .flags = STARPU_CODELET_SIMGRID_EXECUTE, + .model = &perf_model, + .name = "cl", +}; + +static struct starpu_perfmodel perf_model_init = +{ + .type = STARPU_PER_ARCH, + .arch_cost_function = cost_function, +}; + +static struct starpu_codelet cl_init = +{ + .cpu_funcs = { wait_homogeneous }, + .cuda_funcs = { wait_homogeneous }, + .opencl_funcs = { wait_homogeneous }, + .cpu_funcs_name = { "wait_homogeneous" }, + .nbuffers = 1, + .modes = {STARPU_RW}, + .flags = STARPU_CODELET_SIMGRID_EXECUTE, + .model = &perf_model_init, + .name = "init", +}; + +static struct starpu_perfmodel perf_model_redux = +{ + .type = STARPU_PER_ARCH, + .arch_cost_function = cost_function, +}; + +static struct starpu_codelet cl_redux = +{ + .cpu_funcs = { wait_homogeneous }, + .cuda_funcs = { wait_homogeneous }, + .opencl_funcs = { wait_homogeneous }, + .cpu_funcs_name = { "wait_homogeneous" }, + .nbuffers = 2, + .modes = {STARPU_RW|STARPU_COMMUTE, STARPU_R}, + .flags = STARPU_CODELET_SIMGRID_EXECUTE, + .model = &perf_model_redux, + .name = "redux", +}; + +int main(int argc, char *argv[]) +{ + int ret; + + ret = starpu_initialize(NULL, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + unsigned nb_tasks, nb_workers; + double begin_time, end_time, time_m, time_s, speed_up, expected_speed_up, percentage_expected_speed_up; + bool check, check_sup; + + nb_workers = starpu_worker_get_count_by_type(STARPU_CPU_WORKER) + starpu_worker_get_count_by_type(STARPU_CUDA_WORKER) + starpu_worker_get_count_by_type(STARPU_OPENCL_WORKER); + nb_tasks = nb_workers*TASK_COEFFICIENT; + + /* We consider a vector of float that is initialized just as any of C + * data */ + float *vector; + starpu_data_handle_t vector_handle; + unsigned i; + vector = calloc(NB_FLOAT, sizeof(float)); +#ifndef STARPU_SIMGRID + for (i = 0; i < NB_FLOAT; i++) + vector[i] = (i+1.0f); +#endif + + /* Tell StaPU to associate the "vector" vector with the "vector_handle" + * identifier. When a task needs to access a piece of data, it should + * refer to the handle that is associated to it. + * In the case of the "vector" data interface: + * - the first argument of the registration method is a pointer to the + * handle that should describe the data + * - the second argument is the memory node where the data (ie. "vector") + * resides initially: STARPU_MAIN_RAM stands for an address in main memory, as + * opposed to an address on a GPU for instance. + * - the third argument is the address of the vector in RAM + * - the fourth argument is the number of elements in the vector + * - the fifth argument is the size of each element. + */ + starpu_vector_data_register(&vector_handle, STARPU_MAIN_RAM, (uintptr_t)vector, NB_FLOAT, sizeof(vector[0])); + starpu_data_set_reduction_methods(vector_handle, &cl_redux, &cl_init); + + begin_time = starpu_timing_now(); + + /*execution des tasks*/ + + for (i=0; i= ((1 - MARGIN) * expected_speed_up); + check_sup = speed_up <= ((1 + MARGIN) * expected_speed_up); + + printf("measured time = %f seconds\nsequential time = %f seconds\nspeed up = %f\nnumber of workers = %u\nnumber of tasks = %u\nexpected speed up = %f\npercentage of expected speed up %.2f%%\n", time_m, time_s, speed_up, nb_workers, nb_tasks, expected_speed_up, percentage_expected_speed_up); + + starpu_shutdown(); + free(vector); + + //test reussi ou test echoue + if (check && check_sup) + { + return EXIT_SUCCESS; + } + else + { + return EXIT_FAILURE; + } +} diff --git a/tests/microbenchs/parallel_redux_homogeneous_tasks_data.sh b/tests/microbenchs/parallel_redux_homogeneous_tasks_data.sh new file mode 100755 index 0000000..47b6d90 --- /dev/null +++ b/tests/microbenchs/parallel_redux_homogeneous_tasks_data.sh @@ -0,0 +1,24 @@ +#!/bin/bash +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +if test -n "$STARPU_MICROBENCHS_DISABLED" ; then exit 77 ; fi + +source $(dirname $0)/microbench.sh + +XSUCCESS="dmda dmdap dmdar dmdas dmdasd modular-dmda modular-dmdap modular-dmdar modular-dmdas pheft" + +test_scheds parallel_independent_homogeneous_tasks_data diff --git a/tests/microbenchs/prefetch_data_on_node.c b/tests/microbenchs/prefetch_data_on_node.c new file mode 100644 index 0000000..3ae1d03 --- /dev/null +++ b/tests/microbenchs/prefetch_data_on_node.c @@ -0,0 +1,192 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include +#include "../helper.h" + +/* + * Try calling starpu_data_prefetch_on_node before running a task there + */ + +#ifdef STARPU_QUICK_CHECK +#define N 10 +#elif !defined(STARPU_LONG_CHECK) +#define N 100 +#else +#define N 1000 +#endif + +#define VECTORSIZE 1024 + +starpu_data_handle_t v_handle; +static unsigned *v; + +static +void callback(void *arg) +{ + unsigned node = (unsigned)(uintptr_t) arg; + + starpu_data_prefetch_on_node(v_handle, node, 1); +} + +void codelet_null(void *descr[], void *_args) +{ + (void)descr; + (void)_args; +} + +static struct starpu_codelet cl_r = +{ + .cpu_funcs = {codelet_null}, + .cuda_funcs = {codelet_null}, + .opencl_funcs = {codelet_null}, + .cpu_funcs_name = {"codelet_null"}, + .nbuffers = 1, + .modes = {STARPU_R} +}; + +static struct starpu_codelet cl_w = +{ + .cpu_funcs = {codelet_null}, + .cuda_funcs = {codelet_null}, + .opencl_funcs = {codelet_null}, + .cpu_funcs_name = {"codelet_null"}, + .nbuffers = 1, + .modes = {STARPU_W} +}; + +static struct starpu_codelet cl_rw = +{ + .cpu_funcs = {codelet_null}, + .cuda_funcs = {codelet_null}, + .opencl_funcs = {codelet_null}, + .cpu_funcs_name = {"codelet_null"}, + .nbuffers = 1, + .modes = {STARPU_RW} +}; + +static struct starpu_codelet *select_codelet_with_random_mode(void) +{ + int r = rand(); + + switch (r % 3) + { + case 0: + return &cl_r; + case 1: + return &cl_w; + case 2: + return &cl_rw; + }; + return &cl_rw; +} + +int main(int argc, char **argv) +{ + int ret; + + struct starpu_conf conf; + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + + conf.ncpus = -1; + conf.ncuda = -1; + conf.nopencl = -1; + conf.nmpi_ms = -1; + conf.ntcpip_ms = -1; + + ret = starpu_initialize(&conf, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + starpu_malloc((void **)&v, VECTORSIZE*sizeof(unsigned)); + starpu_vector_data_register(&v_handle, STARPU_MAIN_RAM, (uintptr_t)v, VECTORSIZE, sizeof(unsigned)); + + unsigned nworker = starpu_worker_get_count(); + + unsigned iter, worker; + for (iter = 0; iter < N; iter++) + { + for (worker = 0; worker < nworker; worker++) + { + /* synchronous prefetch */ + unsigned node = starpu_worker_get_memory_node(worker); + ret = starpu_data_prefetch_on_node(v_handle, node, 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_prefetch_on_node"); + + /* execute a task */ + struct starpu_task *task = starpu_task_create(); + + task->handles[0] = v_handle; + task->cl = select_codelet_with_random_mode(); + task->synchronous = 1; + task->execute_on_a_specific_worker = 1; + task->workerid = worker; + + ret = starpu_task_submit(task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + } + + for (iter = 0; iter < N; iter++) + { + for (worker = 0; worker < nworker; worker++) + { + /* asynchronous prefetch */ + unsigned node = starpu_worker_get_memory_node(worker); + ret = starpu_data_prefetch_on_node(v_handle, node, 1); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_prefetch_on_node"); + + /* execute a task */ + struct starpu_task *task = starpu_task_create(); + + task->handles[0] = v_handle; + task->cl = select_codelet_with_random_mode(); + task->callback_func = callback; + task->callback_arg = (void*)(uintptr_t) starpu_worker_get_memory_node((worker+1)%nworker); + task->execute_on_a_specific_worker = 1; + task->workerid = worker; + + task->synchronous = 0; + + ret = starpu_task_submit(task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + } + + ret = starpu_task_wait_for_all(); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); + + starpu_data_unregister(v_handle); + starpu_free_noflag(v, VECTORSIZE*sizeof(unsigned)); + starpu_shutdown(); + + return EXIT_SUCCESS; + +enodev: + starpu_free_noflag(v, VECTORSIZE*sizeof(unsigned)); + fprintf(stderr, "WARNING: No one can execute this task\n"); + /* yes, we do not perform the computation but we did detect that no one + * could perform the kernel, so this is not an error from StarPU */ + starpu_shutdown(); + return STARPU_TEST_SKIPPED; +} diff --git a/tests/microbenchs/redundant_buffer.c b/tests/microbenchs/redundant_buffer.c new file mode 100644 index 0000000..2a61da8 --- /dev/null +++ b/tests/microbenchs/redundant_buffer.c @@ -0,0 +1,82 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include +#include "../helper.h" + +/* + * Try passing the same buffer twice to the same task + */ + +#define N 10000 + +#define VECTORSIZE 1024 + +starpu_data_handle_t v_handle; +static unsigned *v; + +int main(int argc, char **argv) +{ + int ret; + + ret = starpu_initialize(NULL, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + starpu_malloc((void **)&v, VECTORSIZE*sizeof(unsigned)); + + starpu_vector_data_register(&v_handle, STARPU_MAIN_RAM, (uintptr_t)v, VECTORSIZE, sizeof(unsigned)); + + starpu_codelet_nop.nbuffers = 2; + starpu_codelet_nop.modes[0] = STARPU_R; + starpu_codelet_nop.modes[1] = STARPU_R; + + unsigned iter; + for (iter = 0; iter < N; iter++) + { + struct starpu_task *task = starpu_task_create(); + task->cl = &starpu_codelet_nop; + + task->handles[0] = v_handle; + task->handles[1] = v_handle; + + ret = starpu_task_submit(task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + ret = starpu_task_wait_for_all(); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); + + starpu_data_unregister(v_handle); + starpu_free_noflag(v, VECTORSIZE*sizeof(unsigned)); + starpu_shutdown(); + + return EXIT_SUCCESS; + +enodev: + starpu_data_unregister(v_handle); + starpu_free_noflag(v, VECTORSIZE*sizeof(unsigned)); + fprintf(stderr, "WARNING: No one can execute this task\n"); + /* yes, we do not perform the computation but we did detect that no one + * could perform the kernel, so this is not an error from StarPU */ + starpu_shutdown(); + return STARPU_TEST_SKIPPED; +} diff --git a/tests/microbenchs/starpu_check.sh b/tests/microbenchs/starpu_check.sh new file mode 100755 index 0000000..8651c7d --- /dev/null +++ b/tests/microbenchs/starpu_check.sh @@ -0,0 +1,113 @@ +#!/bin/sh +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +TEST_DIR=$PWD + +ntests=0 +nfailed=0 +nsucess=0 + +print_summary() +{ + if test $nfailed = 0; then + echo "**** All tests are successful ****" + else + echo "$nfailed test(s) failed out of $ntests" + fi +} + +test_with_timeout() +{ + timeout=$1 + application=$2 + + ntests=$(($ntests + 1)) + + echo "$application" + + $MS_LAUNCHER $STARPU_LAUNCH $application > /dev/null 2> /dev/null & _pid_appli=$!; + (sleep $timeout ; kill -9 $_pid_appli 2> /dev/null) & _pid_killer=$! + wait $_pid_appli + ret=$? + kill $_pid_killer 2> /dev/null + if test $ret = 0; then + echo " SUCCESS" + nsuccess=$(($nsuccess + 1)) + else + case $ret in + 137) # sigkill + echo " TEST TIMEOUT" + ;; + 139) + echo " TEST FAILED: SIGSEV" + ;; + *) + echo " TEST FAILED (ret = $ret)" + esac + nfailed=$(($nfailed + 1)) + fi +} + +echo +echo "**********************" +echo "TEST synchronous tasks" +echo "**********************" +echo + +test_with_timeout 10 "./sync_tasks_overhead -i 10000" 2> /dev/null + +echo +echo "***********************" +echo "TEST asynchronous tasks" +echo "***********************" +echo + +test_with_timeout 10 "./async_tasks_overhead -i 20000" 2> /dev/null + +echo +echo "**************" +echo "TEST increment" +echo "**************" +echo + +test_with_timeout 10 "../../examples/incrementer/incrementer" 2> /dev/null + +echo +echo "**********" +echo "TEST tag 1" +echo "**********" +echo + +test_with_timeout 60 "../../examples/tag_example/tag_example -iter 1000" 2> /dev/null + +echo +echo "**********" +echo "TEST tag 2" +echo "**********" +echo + +test_with_timeout 10 "../../examples/tag_example/tag_example2 -iter 100" 2> /dev/null + + + +echo +echo "*******" +echo "SUMMARY" +echo "*******" +echo + +print_summary diff --git a/tests/microbenchs/sync_tasks_data_overhead.sh b/tests/microbenchs/sync_tasks_data_overhead.sh new file mode 100755 index 0000000..75e77eb --- /dev/null +++ b/tests/microbenchs/sync_tasks_data_overhead.sh @@ -0,0 +1,22 @@ +#!/bin/sh +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +if test -n "$STARPU_MICROBENCHS_DISABLED" ; then exit 77 ; fi + +ROOT=${0%.sh} +ROOT=$(echo $ROOT | sed 's/tasks_data_overhead/tasks_overhead/') +exec $MS_LAUNCHER $STARPU_LAUNCH $ROOT -b 1 "$@" diff --git a/tests/microbenchs/sync_tasks_overhead.c b/tests/microbenchs/sync_tasks_overhead.c new file mode 100644 index 0000000..ff375d1 --- /dev/null +++ b/tests/microbenchs/sync_tasks_overhead.c @@ -0,0 +1,197 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include + +#include +#include "../helper.h" + +/* + * Measure the cost of submitting synchronous tasks + */ + +starpu_data_handle_t data_handles[8]; +float *buffers[8]; + +#ifdef STARPU_QUICK_CHECK +static unsigned ntasks = 128; +#else +static unsigned ntasks = 65536; +#endif +static unsigned nbuffers = 0; + +#define BUFFERSIZE 16 + +void dummy_func(void *descr[], void *arg) +{ + (void)descr; + (void)arg; +} + +static struct starpu_codelet dummy_codelet = +{ + .cpu_funcs = {dummy_func}, + .cuda_funcs = {dummy_func}, + .opencl_funcs = {dummy_func}, + .cpu_funcs_name = {"dummy_func"}, + .model = NULL, + .nbuffers = 0, + .modes = {STARPU_RW, STARPU_RW, STARPU_RW, STARPU_RW, STARPU_RW, STARPU_RW, STARPU_RW, STARPU_RW} +}; + +static int inject_one_task(void) +{ + int ret; + struct starpu_task *task = starpu_task_create(); + + task->cl = &dummy_codelet; + task->cl_arg = NULL; + task->callback_func = NULL; + task->synchronous = 1; + + ret = starpu_task_submit(task); + return ret; + +} + +static void usage(char **argv) +{ + fprintf(stderr, "Usage: %s [-i ntasks] [-p sched_policy] [-b nbuffers] [-h]\n", argv[0]); + exit(EXIT_FAILURE); +} + +static void parse_args(int argc, char **argv, struct starpu_conf *conf) +{ + int c; + while ((c = getopt(argc, argv, "i:b:p:h")) != -1) + switch(c) + { + case 'i': + ntasks = atoi(optarg); + break; + case 'b': + nbuffers = atoi(optarg); + dummy_codelet.nbuffers = nbuffers; + break; + case 'p': + conf->sched_policy_name = optarg; + break; + case 'h': + usage(argv); + break; + } +} + +int main(int argc, char **argv) +{ + int ret; + unsigned i; + double timing; + double start; + double end; + struct starpu_conf conf; + starpu_conf_init(&conf); + conf.ncpus = 2; + + parse_args(argc, argv, &conf); + + ret = starpu_initialize(&conf, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + unsigned buffer; + for (buffer = 0; buffer < nbuffers; buffer++) + { + starpu_malloc((void**)&buffers[buffer], BUFFERSIZE*sizeof(float)); + memset(buffers[buffer], 0, BUFFERSIZE*sizeof(float)); + starpu_vector_data_register(&data_handles[buffer], STARPU_MAIN_RAM, (uintptr_t)buffers[buffer], BUFFERSIZE, sizeof(float)); + } + + fprintf(stderr, "#tasks : %u\n#buffers : %u\n", ntasks, nbuffers); + + start = starpu_timing_now(); + for (i = 0; i < ntasks; i++) + { + struct starpu_task *task = starpu_task_create(); + task->cl = &dummy_codelet; + task->synchronous = 1; + + /* we have 8 buffers at most */ + for (buffer = 0; buffer < nbuffers; buffer++) + { + task->handles[buffer] = data_handles[buffer]; + } + + ret = starpu_task_submit(task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + end = starpu_timing_now(); + + timing = end - start; + + fprintf(stderr, "Total: %f secs\n", timing/1000000); + fprintf(stderr, "Per task: %f usecs\n", timing/ntasks); + + { + char *output_dir = getenv("STARPU_BENCH_DIR"); + char *bench_id = getenv("STARPU_BENCH_ID"); + + if (output_dir && bench_id) + { + char number[1+sizeof(nbuffers)*3+1]; + const char *numberp; + char file[1024]; + FILE *f; + + if (nbuffers) + { + snprintf(number, sizeof(number), "_%u", nbuffers); + numberp = number; + } + else + numberp = ""; + + snprintf(file, sizeof(file), "%s/sync_tasks_overhead_total%s.dat", output_dir, numberp); + f = fopen(file, "a"); + fprintf(f, "%s\t%f\n", bench_id, timing/1000000); + fclose(f); + + snprintf(file, sizeof(file), "%s/sync_tasks_overhead_per_task%s.dat", output_dir, numberp); + f = fopen(file, "a"); + fprintf(f, "%s\t%f\n", bench_id, timing/ntasks); + fclose(f); + } + } + + for (buffer = 0; buffer < nbuffers; buffer++) + { + starpu_data_unregister(data_handles[buffer]); + starpu_free_noflag((void*)buffers[buffer], BUFFERSIZE*sizeof(float)); + } + + starpu_shutdown(); + + return EXIT_SUCCESS; + +enodev: + fprintf(stderr, "WARNING: No one can execute this task\n"); + /* yes, we do not perform the computation but we did detect that no one + * could perform the kernel, so this is not an error from StarPU */ + starpu_shutdown(); + return STARPU_TEST_SKIPPED; +} diff --git a/tests/microbenchs/tasks_data_overhead.sh b/tests/microbenchs/tasks_data_overhead.sh new file mode 100755 index 0000000..75e77eb --- /dev/null +++ b/tests/microbenchs/tasks_data_overhead.sh @@ -0,0 +1,22 @@ +#!/bin/sh +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +if test -n "$STARPU_MICROBENCHS_DISABLED" ; then exit 77 ; fi + +ROOT=${0%.sh} +ROOT=$(echo $ROOT | sed 's/tasks_data_overhead/tasks_overhead/') +exec $MS_LAUNCHER $STARPU_LAUNCH $ROOT -b 1 "$@" diff --git a/tests/microbenchs/tasks_overhead.c b/tests/microbenchs/tasks_overhead.c new file mode 100644 index 0000000..50b04e2 --- /dev/null +++ b/tests/microbenchs/tasks_overhead.c @@ -0,0 +1,258 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include + +#include +#include "../helper.h" + +/* + * Measure the submission time and execution time of asynchronous tasks + */ + +starpu_data_handle_t data_handles[8]; +float *buffers[8]; + +#ifdef STARPU_QUICK_CHECK +static unsigned ntasks = 128; +#else +static unsigned ntasks = 65536; +#endif +static unsigned nbuffers = 0; + +#define BUFFERSIZE 16 + +struct starpu_task *tasks; + +void dummy_func(void *descr[], void *arg) +{ + (void)descr; + (void)arg; +} + +static struct starpu_codelet dummy_codelet = +{ + .cpu_funcs = {dummy_func}, + .cuda_funcs = {dummy_func}, + .opencl_funcs = {dummy_func}, + .cpu_funcs_name = {"dummy_func"}, + .model = NULL, + .nbuffers = 0, + .modes = {STARPU_RW, STARPU_RW, STARPU_RW, STARPU_RW, STARPU_RW, STARPU_RW, STARPU_RW, STARPU_RW} +}; + +static void usage(char **argv) +{ + fprintf(stderr, "Usage: %s [-i ntasks] [-p sched_policy] [-b nbuffers] [-h]\n", argv[0]); + exit(EXIT_FAILURE); +} + +static void parse_args(int argc, char **argv, struct starpu_conf *conf) +{ + int c; + while ((c = getopt(argc, argv, "i:b:p:h")) != -1) + switch(c) + { + case 'i': + ntasks = atoi(optarg); + break; + case 'b': + nbuffers = atoi(optarg); + dummy_codelet.nbuffers = nbuffers; + break; + case 'p': + conf->sched_policy_name = optarg; + break; + case 'h': + usage(argv); + break; + } +} + +int main(int argc, char **argv) +{ + int ret; + unsigned i; + + double timing_submit; + double start_submit; + double end_submit; + + double timing_exec; + double start_exec; + double end_exec; + struct starpu_conf conf; + starpu_conf_init(&conf); + conf.ncpus = 2; + + parse_args(argc, argv, &conf); + + ret = starpu_initialize(&conf, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + unsigned buffer; + for (buffer = 0; buffer < nbuffers; buffer++) + { + starpu_malloc((void**)&buffers[buffer], BUFFERSIZE*sizeof(float)); + starpu_vector_data_register(&data_handles[buffer], STARPU_MAIN_RAM, (uintptr_t)buffers[buffer], BUFFERSIZE, sizeof(float)); + } + + fprintf(stderr, "#tasks : %u\n#buffers : %u\n", ntasks, nbuffers); + + /* submit tasks (but don't execute them yet !) */ + tasks = (struct starpu_task *) calloc(1, ntasks*sizeof(struct starpu_task)); + + for (i = 0; i < ntasks; i++) + { + starpu_task_init(&tasks[i]); + tasks[i].cl = &dummy_codelet; + tasks[i].synchronous = 0; + tasks[i].use_tag = 1; + tasks[i].tag_id = (starpu_tag_t)i; + + /* we have 8 buffers at most */ + for (buffer = 0; buffer < nbuffers; buffer++) + { + tasks[i].handles[buffer] = data_handles[buffer]; + } + } + tasks[ntasks-1].detach = 0; + + start_submit = starpu_timing_now(); + if (nbuffers) + { + /* Data dependency, just submit them all */ + for (i = 0; i < ntasks; i++) + { + ret = starpu_task_submit(&tasks[i]); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + } + else + { + /* No data dependency, we have to introduce dependencies by hand */ + for (i = 1; i < ntasks; i++) + { + starpu_tag_declare_deps((starpu_tag_t)i, 1, (starpu_tag_t)(i-1)); + + ret = starpu_task_submit(&tasks[i]); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + /* submit the first task */ + ret = starpu_task_submit(&tasks[0]); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + end_submit = starpu_timing_now(); + + /* wait for the execution of the tasks */ + start_exec = starpu_timing_now(); + ret = starpu_task_wait(&tasks[ntasks-1]); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_tag_wait"); + end_exec = starpu_timing_now(); + + starpu_task_wait_for_all(); + + for (i = 0; i < ntasks; i++) + starpu_task_clean(&tasks[i]); + + timing_submit = end_submit - start_submit; + timing_exec = end_exec - start_exec; + + fprintf(stderr, "Total submit: %f secs\n", timing_submit/1000000); + fprintf(stderr, "Per task submit: %f usecs\n", timing_submit/ntasks); + fprintf(stderr, "\n"); + fprintf(stderr, "Total execution: %f secs\n", timing_exec/1000000); + fprintf(stderr, "Per task execution: %f usecs\n", timing_exec/ntasks); + fprintf(stderr, "\n"); + fprintf(stderr, "Total: %f secs\n", (timing_submit+timing_exec)/1000000); + fprintf(stderr, "Per task: %f usecs\n", (timing_submit+timing_exec)/ntasks); + + { + char *output_dir = getenv("STARPU_BENCH_DIR"); + char *bench_id = getenv("STARPU_BENCH_ID"); + + if (output_dir && bench_id) + { + char number[1+sizeof(nbuffers)*3+1]; + const char *numberp; + char file[1024]; + FILE *f; + + if (nbuffers) + { + snprintf(number, sizeof(number), "_%u", nbuffers); + numberp = number; + } + else + numberp = ""; + + snprintf(file, sizeof(file), "%s/tasks_overhead_total_submit%s.dat", output_dir, numberp); + f = fopen(file, "a"); + fprintf(f, "%s\t%f\n", bench_id, timing_submit/1000000); + fclose(f); + + snprintf(file, sizeof(file), "%s/tasks_overhead_per_task_submit%s.dat", output_dir, numberp); + f = fopen(file, "a"); + fprintf(f, "%s\t%f\n", bench_id, timing_submit/ntasks); + fclose(f); + + snprintf(file, sizeof(file), "%s/tasks_overhead_total_execution%s.dat", output_dir, numberp); + f = fopen(file, "a"); + fprintf(f, "%s\t%f\n", bench_id, timing_exec/1000000); + fclose(f); + + snprintf(file, sizeof(file), "%s/tasks_overhead_per_task_execution%s.dat", output_dir, numberp); + f = fopen(file, "a"); + fprintf(f, "%s\t%f\n", bench_id, timing_exec/ntasks); + fclose(f); + + snprintf(file, sizeof(file), "%s/tasks_overhead_total_submit_execution%s.dat", output_dir, numberp); + f = fopen(file, "a"); + fprintf(f, "%s\t%f\n", bench_id, (timing_submit+timing_exec)/1000000); + fclose(f); + + snprintf(file, sizeof(file), "%s/tasks_overhead_per_task_submit_execution%s.dat", output_dir, numberp); + f = fopen(file, "a"); + fprintf(f, "%s\t%f\n", bench_id, (timing_submit+timing_exec)/ntasks); + fclose(f); + } + } + + for (buffer = 0; buffer < nbuffers; buffer++) + { + starpu_data_unregister(data_handles[buffer]); + starpu_free_noflag((void*)buffers[buffer], BUFFERSIZE*sizeof(float)); + } + + starpu_shutdown(); + free(tasks); + return EXIT_SUCCESS; + +enodev: + fprintf(stderr, "WARNING: No one can execute this task\n"); + /* yes, we do not perform the computation but we did detect that no one + * could perform the kernel, so this is not an error from StarPU */ + starpu_shutdown(); + free(tasks); + return STARPU_TEST_SKIPPED; +} diff --git a/tests/microbenchs/tasks_size_overhead.c b/tests/microbenchs/tasks_size_overhead.c new file mode 100644 index 0000000..6456d1b --- /dev/null +++ b/tests/microbenchs/tasks_size_overhead.c @@ -0,0 +1,345 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include + +#include +#include "../helper.h" + +/* + * This benchmark creates a thousand tasks of the same (small) duration, with + * various number of cpus and various durations. + * + * Use ./tasks_size_overhead.sh to generate a plot of the result. + * + * Thanks Martin Tillenius for the idea. + */ + +#define START 4 +#define STOP 4096 +#ifdef STARPU_QUICK_CHECK +#define FACTOR 64 +#else +#define FACTOR 2 +#endif + +#ifdef STARPU_QUICK_CHECK +static unsigned ntasks = 1; +#elif !defined(STARPU_LONG_CHECK) +static unsigned ntasks = 64; +#else +static unsigned ntasks = 256; +#endif + +static unsigned nbuffers = 0; +static unsigned total_nbuffers = 0; + +static unsigned mincpus = 1, maxcpus, cpustep; +static unsigned mintime = START, maxtime = STOP, factortime = FACTOR; + +struct starpu_task *tasks; + +void func(void *descr[], void *arg) +{ + (void)descr; + unsigned n = (uintptr_t)arg; + long usec = 0; + double tv1 = starpu_timing_now(); + do + { + double tv2 = starpu_timing_now(); + usec = tv2 - tv1; + } + while (usec < (long) n); +} + +double cost_function(struct starpu_task *t, struct starpu_perfmodel_arch *a, unsigned i) +{ + (void) t; (void) i; (void) a; + unsigned n = (uintptr_t) t->cl_arg; + return n; +} + +static struct starpu_perfmodel perf_model = +{ + .type = STARPU_PER_ARCH, + .arch_cost_function = cost_function, +}; + +static struct starpu_codelet codelet = +{ + .cpu_funcs = {func}, + .nbuffers = 0, + .modes = {STARPU_R, STARPU_R, STARPU_R, STARPU_R, STARPU_R, STARPU_R, STARPU_R, STARPU_R}, + .model = &perf_model, +}; + +static void parse_args(int argc, char **argv) +{ + int c; + while ((c = getopt(argc, argv, "i:b:B:c:C:s:t:T:f:h")) != -1) + switch(c) + { + case 'i': + ntasks = atoi(optarg); + break; + case 'b': + nbuffers = atoi(optarg); + codelet.nbuffers = nbuffers; + break; + case 'B': + total_nbuffers = atoi(optarg); + break; + case 'c': + mincpus = atoi(optarg); + break; + case 'C': + maxcpus = atoi(optarg); + break; + case 's': + cpustep = atoi(optarg); + break; + case 't': + mintime = atoi(optarg); + break; + case 'T': + maxtime = atoi(optarg); + break; + case 'f': + factortime = atoi(optarg); + break; + case 'h': + fprintf(stderr, "\ +Usage: %s [-h]\n\ + [-i ntasks] [-b nbuffers] [-B total_nbuffers]\n\ + [-c mincpus] [ -C maxcpus] [-s cpustep]\n\ + [-t mintime] [-T maxtime] [-f factortime]\n\n", argv[0]); + fprintf(stderr,"\ +runs 'ntasks' tasks\n\ +- using 'nbuffers' data each, randomly among 'total_nbuffers' choices,\n\ +- with varying task durations, from 'mintime' to 'maxtime' (using 'factortime')\n\ +- on varying numbers of cpus, from 'mincpus' to 'maxcpus' (using 'cpustep')\n\ +\n\ +currently selected parameters: %u tasks using %u buffers among %u, from %uus to %uus (factor %u), from %u cpus to %u cpus (step %u)\n\ +", ntasks, nbuffers, total_nbuffers, mintime, maxtime, factortime, mincpus, maxcpus, cpustep); + exit(EXIT_SUCCESS); + break; + } +} + +int main(int argc, char **argv) +{ + int ret; + unsigned i; + unsigned size; + unsigned ncpus; + + double timing; + double start; + double end; + + struct starpu_conf conf; + + unsigned buffer; + char *starpu_sched = getenv("STARPU_SCHED"); + + if (getenv("STARPU_MICROBENCHS_DISABLED")) return STARPU_TEST_SKIPPED; + + /* Get number of CPUs */ + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + conf.ncpus = -1; +#ifdef STARPU_SIMGRID + /* This will get serialized, avoid spending too much time on it. */ + maxcpus = 2; +#else + ret = starpu_init(&conf); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + maxcpus = starpu_worker_get_count_by_type(STARPU_CPU_WORKER); + starpu_shutdown(); +#endif + +#ifdef STARPU_HAVE_UNSETENV + /* That was useful to force the max number of cpus to use, but now we + * want to make it vary */ + unsetenv("STARPU_NCPUS"); + unsetenv("STARPU_NCPU"); +#endif + + cpustep = sqrt(maxcpus)/2; +#ifdef STARPU_QUICK_CHECK + cpustep *= 8; +#endif + + if (STARPU_RUNNING_ON_VALGRIND) + { + factortime *= 4; + cpustep *= 4; + } + + if (cpustep >= maxcpus/2) + cpustep = maxcpus/2; + if (cpustep == 0) + cpustep = 1; + + parse_args(argc, argv); + + float *buffers[total_nbuffers?total_nbuffers:1]; + + /* Allocate data */ + for (buffer = 0; buffer < total_nbuffers; buffer++) + buffers[buffer] = (float *) calloc(16, sizeof(float)); + + if (!starpu_getenv("STARPU_SINK")) + tasks = (struct starpu_task *) calloc(1, ntasks*maxcpus*sizeof(struct starpu_task)); + + /* Emit headers and compute raw tasks speed */ + FPRINTF(stdout, "# tasks : %u buffers : %u total_nbuffers : %u\n", ntasks, nbuffers, total_nbuffers); + FPRINTF(stdout, "# ncpus\t"); + for (size = mintime; size <= maxtime; size *= factortime) + FPRINTF(stdout, "%u iters(us)\ttotal(s)\t", size); + FPRINTF(stdout, "\n"); + FPRINTF(stdout, "\"seq\"\t"); + for (size = mintime; size <= maxtime; size *= factortime) + { + double dstart, dend; + dstart = starpu_timing_now(); + for (i = 0; i < ntasks; i++) + func(NULL, (void*) (uintptr_t) size); + dend = starpu_timing_now(); + FPRINTF(stdout, "%.0f \t%f\t", (dend-dstart)/ntasks, (dend-dstart)/1000000); + } + FPRINTF(stdout, "\n"); + fflush(stdout); + + starpu_data_handle_t data_handles[total_nbuffers?total_nbuffers:1]; + + if (nbuffers && !total_nbuffers) + { + fprintf(stderr,"can not have %u buffers with %u total buffers\n", nbuffers, total_nbuffers); + goto error; + } + + if (mincpus == 0) + mincpus = 1; + /* For each number of cpus, benchmark */ + for (ncpus= mincpus; ncpus <= maxcpus; ncpus += cpustep) + { + FPRINTF(stdout, "%u\t", ncpus); + fflush(stdout); + + conf.ncpus = ncpus; + ret = starpu_init(&conf); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + for (buffer = 0; buffer < total_nbuffers; buffer++) + starpu_vector_data_register(&data_handles[buffer], STARPU_MAIN_RAM, (uintptr_t)buffers[buffer], 16, sizeof(float)); + + for (size = mintime; size <= maxtime; size *= factortime) + { + /* submit tasks */ + start = starpu_timing_now(); + for (i = 0; i < ntasks * ncpus; i++) + { + starpu_data_handle_t *handles; + starpu_task_init(&tasks[i]); + tasks[i].callback_func = NULL; + tasks[i].cl = &codelet; + tasks[i].cl_arg = (void*) (uintptr_t) size; + tasks[i].synchronous = 0; + + if (nbuffers > STARPU_NMAXBUFS) + { + tasks[i].dyn_handles = malloc(nbuffers * sizeof(*data_handles)); + handles = tasks[i].dyn_handles; + tasks[i].dyn_modes = malloc(nbuffers * sizeof(tasks[i].dyn_modes[0])); + for (buffer = 0; buffer < nbuffers; buffer++) + tasks[i].dyn_modes[buffer] = STARPU_R; + } + else + handles = tasks[i].handles; + + if (nbuffers >= total_nbuffers) + for (buffer = 0; buffer < nbuffers; buffer++) + handles[buffer] = data_handles[buffer%total_nbuffers]; + else + for (buffer = 0; buffer < nbuffers; buffer++) + handles[buffer] = data_handles[starpu_lrand48()%total_nbuffers]; + + ret = starpu_task_submit(&tasks[i]); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task"); + } + ret = starpu_task_wait_for_all(); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); + end = starpu_timing_now(); + + for (i = 0; i < ntasks * ncpus; i++) + starpu_task_clean(&tasks[i]); + + timing = end - start; + + FPRINTF(stdout, "%u\t%f\t", size, timing/ncpus/1000000); + fflush(stdout); + + { + char *output_dir = getenv("STARPU_BENCH_DIR"); + char *bench_id = getenv("STARPU_BENCH_ID"); + char *sched = getenv("STARPU_SCHED"); + + if (output_dir && bench_id) + { + char file[1024]; + FILE *f; + + snprintf(file, sizeof(file), "%s/tasks_size_overhead_total%s%s.dat", output_dir, sched?"_":"", sched?sched:""); + f = fopen(file, "a"); + fprintf(f, "%s\t%u\t%u\t%f\n", bench_id, ncpus, size, timing/1000000 /(ntasks*ncpus) *1000); + fclose(f); + } + } + } + + for (buffer = 0; buffer < total_nbuffers; buffer++) + { + starpu_data_unregister(data_handles[buffer]); + } + + starpu_shutdown(); + + FPRINTF(stdout, "\n"); + fflush(stdout); + } + + free(tasks); + for (buffer = 0; buffer < total_nbuffers; buffer++) + free(buffers[buffer]); + return EXIT_SUCCESS; + +enodev: + fprintf(stderr, "WARNING: No one can execute this task\n"); + /* yes, we do not perform the computation but we did detect that no one + * could perform the kernel, so this is not an error from StarPU */ +error: + free(tasks); + for (buffer = 0; buffer < total_nbuffers; buffer++) + free(buffers[buffer]); + return STARPU_TEST_SKIPPED; +} diff --git a/tests/microbenchs/tasks_size_overhead.gp b/tests/microbenchs/tasks_size_overhead.gp new file mode 100755 index 0000000..a0619dd --- /dev/null +++ b/tests/microbenchs/tasks_size_overhead.gp @@ -0,0 +1,44 @@ +#!/bin/sh +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +OUTPUT=tasks_size_overhead.output +VALS=$(sed -n -e '3p' < $OUTPUT) + +PLOTS="" +for x in 1 2 3 4 5 6 7 8 9 10 11 +do + pos=$((2 * $x + 1)) + double=$((2 * $x)) + value=$(echo "$VALS" | cut -d ' ' -f $pos) + if test -n "$value" + then + PLOTS=",\"$OUTPUT\" using 1:($value)/(\$$pos) with linespoints title columnheader($double) $PLOTS" + fi +done + +[ -n "$TERMINAL" ] || TERMINAL=eps +[ -n "$OUTFILE" ] || OUTFILE=tasks_size_overhead.eps +gnuplot << EOF +set terminal $TERMINAL +set output "$OUTFILE" +set key top left +set xlabel "number of cores" +set ylabel "speedup" +plot \ + x title "linear" $PLOTS +EOF + diff --git a/tests/microbenchs/tasks_size_overhead.sh b/tests/microbenchs/tasks_size_overhead.sh new file mode 100755 index 0000000..f00a276 --- /dev/null +++ b/tests/microbenchs/tasks_size_overhead.sh @@ -0,0 +1,23 @@ +#!/bin/sh +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +if test -n "$STARPU_MICROBENCHS_DISABLED" ; then exit 77 ; fi + +ROOT=${0%.sh} +$MS_LAUNCHER $STARPU_LAUNCH $ROOT "$@" > tasks_size_overhead.output +$ROOT.gp +gv tasks_size_overhead.eps diff --git a/tests/microbenchs/tasks_size_overhead_sched.sh b/tests/microbenchs/tasks_size_overhead_sched.sh new file mode 100755 index 0000000..4af2de2 --- /dev/null +++ b/tests/microbenchs/tasks_size_overhead_sched.sh @@ -0,0 +1,41 @@ +#!/bin/sh +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +if test -n "$STARPU_MICROBENCHS_DISABLED" ; then exit 77 ; fi + +ROOT=${0%.sh} +ROOT=${ROOT%_sched} +unset STARPU_SSILENT +$_MS_LAUNCHER $STARPU_LAUNCH $_STARPU_LAUNCH $ROOT "$@" > tasks_size_overhead.output +ret=$? +if test "$ret" = "0" && [ -z "$(echo $MAKEFLAGS | sed -ne 's/.*-j\([0-9]\+\).*/\1/p')" ] +then + # if the program was successful and we are not running in parallel, try to run gnuplot + DIR= + [ -z "$STARPU_BENCH_DIR" ] || DIR="$STARPU_BENCH_DIR/" + export TERMINAL=png + export OUTFILE=${DIR}tasks_size_overhead_${STARPU_SCHED}.png + gnuplot_av=$(command -v gnuplot) + if test -n "$gnuplot_av" + then + # If gnuplot is available, plot the result + $ROOT.gp + ret=$? + fi +fi + +exit $ret diff --git a/tests/microbenchs/tasks_size_overhead_scheds.sh b/tests/microbenchs/tasks_size_overhead_scheds.sh new file mode 100755 index 0000000..44aa097 --- /dev/null +++ b/tests/microbenchs/tasks_size_overhead_scheds.sh @@ -0,0 +1,33 @@ +#!/bin/bash +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +if test -n "$STARPU_MICROBENCHS_DISABLED" ; then exit 77 ; fi + +. $(dirname $0)/microbench.sh + +XFAIL="heteroprio" + +if [ -z "$STARPU_BENCH_DIR" ] +then + FAST="-i 8" +fi + +_STARPU_LAUNCH="$STARPU_LAUNCH" +unset STARPU_LAUNCH +_MS_LAUNCHER="$MS_LAUNCHER" +unset MS_LAUNCHER +test_scheds tasks_size_overhead_sched.sh $FAST diff --git a/tests/model-checking/Makefile.am b/tests/model-checking/Makefile.am new file mode 100644 index 0000000..587217b --- /dev/null +++ b/tests/model-checking/Makefile.am @@ -0,0 +1,77 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2017-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +include $(top_srcdir)/make/starpu-tests.mk + +EXTRA_DIST = \ + platform.xml \ + prio_list.sh \ + barrier.sh \ + starpu-mc.sh.in + +AM_CPPFLAGS = -I$(top_builddir)/src -I$(top_srcdir)/src -I$(top_builddir)/include -I$(top_srcdir)/include $(SIMGRID_CFLAGS) +AM_LDFLAGS = -Wl,-znorelro -Wl,-znoseparate-code +LIBS += $(SIMGRID_LDFLAGS) + +noinst_PROGRAMS = \ + prio_list \ + prio_list2 \ + prio_list3 \ + starpu_barrier + +TESTS = $(SHELL_TESTS) +SHELL_TESTS = + +# takes 1s +SHELL_TESTS += prio_list.sh + +# https://github.com/simgrid/simgrid/issues/166 +#SHELL_TESTS += barrier.sh + +if !STARPU_QUICK_CHECK +# takes 7m +SHELL_TESTS += prio_list2.sh +endif + +if STARPU_LONG_CHECK +# takes 25m +SHELL_TESTS += prio_list3.sh +endif + +#MC_FLAGS=--cfg=model-check/reduction:none + +#MC_FLAGS+=--cfg=contexts/factory:ucontext +#MC_FLAGS+=--cfg=model-check/sparse-checkpoint:yes +#MC_FLAGS+=--cfg=model-check/visited:1000 + +# To record the failing trace +#MC_FLAGS+=--cfg=model-check/record:1 +## And replay it without simgrid-mc +#MC_FLAGS+=--cfg=model-check/reply:'1;3;4' + +# To see which simix calls are made +#MC_FLAGS+=--log=simix_popping.thres:debug + +test: prio_list + simgrid-mc ./prio_list platform.xml MAIN $(MC_FLAGS) + +debug: prio_list + simgrid-mc ./prio_list platform.xml MAIN --log=mc_safety.thres:debug $(MC_FLAGS) + +test-barrier: starpu_barrier + simgrid-mc ./starpu_barrier platform.xml MAIN $(MC_FLAGS) + +debug-barrier: starpu_barrier + simgrid-mc ./starpu_barrier platform.xml MAIN --log=mc_safety.thres:debug $(MC_FLAGS) diff --git a/tests/model-checking/Makefile.in b/tests/model-checking/Makefile.in new file mode 100644 index 0000000..f0fda7f --- /dev/null +++ b/tests/model-checking/Makefile.in @@ -0,0 +1,1439 @@ +# Makefile.in generated by automake 1.16.5 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2021 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +target_triplet = @target@ +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@am__append_1 = --compiler-options -fno-strict-aliasing -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ $(STARPU_NVCC_H_CPPFLAGS) +@STARPU_USE_HIP_TRUE@am__append_2 = -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ +noinst_PROGRAMS = prio_list$(EXEEXT) prio_list2$(EXEEXT) \ + prio_list3$(EXEEXT) starpu_barrier$(EXEEXT) + +# https://github.com/simgrid/simgrid/issues/166 +#SHELL_TESTS += barrier.sh + +# takes 7m +@STARPU_QUICK_CHECK_FALSE@am__append_3 = prio_list2.sh + +# takes 25m +@STARPU_LONG_CHECK_TRUE@am__append_4 = prio_list3.sh +subdir = tests/model-checking +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ + $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ + $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ + $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/src/common/config.h \ + $(top_builddir)/src/common/config-src-build.h \ + $(top_builddir)/include/starpu_config.h \ + $(top_builddir)/starpurm/include/starpurm_config.h +CONFIG_CLEAN_FILES = starpu-mc.sh +CONFIG_CLEAN_VPATH_FILES = +PROGRAMS = $(noinst_PROGRAMS) +prio_list_SOURCES = prio_list.c +prio_list_OBJECTS = prio_list.$(OBJEXT) +prio_list_LDADD = $(LDADD) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +prio_list2_SOURCES = prio_list2.c +prio_list2_OBJECTS = prio_list2.$(OBJEXT) +prio_list2_LDADD = $(LDADD) +prio_list3_SOURCES = prio_list3.c +prio_list3_OBJECTS = prio_list3.$(OBJEXT) +prio_list3_LDADD = $(LDADD) +starpu_barrier_SOURCES = starpu_barrier.c +starpu_barrier_OBJECTS = starpu_barrier.$(OBJEXT) +starpu_barrier_LDADD = $(LDADD) +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)/src/common -I$(top_builddir)/include -I$(top_builddir)/starpurm/include +depcomp = $(SHELL) $(top_srcdir)/build-aux/depcomp +am__maybe_remake_depfiles = depfiles +am__depfiles_remade = ./$(DEPDIR)/prio_list.Po \ + ./$(DEPDIR)/prio_list2.Po ./$(DEPDIR)/prio_list3.Po \ + ./$(DEPDIR)/starpu_barrier.Po +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = prio_list.c prio_list2.c prio_list3.c starpu_barrier.c +DIST_SOURCES = prio_list.c prio_list2.c prio_list3.c starpu_barrier.c +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +am__tty_colors_dummy = \ + mgn= red= grn= lgn= blu= brg= std=; \ + am__color_tests=no +am__tty_colors = { \ + $(am__tty_colors_dummy); \ + if test "X$(AM_COLOR_TESTS)" = Xno; then \ + am__color_tests=no; \ + elif test "X$(AM_COLOR_TESTS)" = Xalways; then \ + am__color_tests=yes; \ + elif test "X$$TERM" != Xdumb && { test -t 1; } 2>/dev/null; then \ + am__color_tests=yes; \ + fi; \ + if test $$am__color_tests = yes; then \ + red=''; \ + grn=''; \ + lgn=''; \ + blu=''; \ + mgn=''; \ + brg=''; \ + std=''; \ + fi; \ +} +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__recheck_rx = ^[ ]*:recheck:[ ]* +am__global_test_result_rx = ^[ ]*:global-test-result:[ ]* +am__copy_in_global_log_rx = ^[ ]*:copy-in-global-log:[ ]* +# A command that, given a newline-separated list of test names on the +# standard input, print the name of the tests that are to be re-run +# upon "make recheck". +am__list_recheck_tests = $(AWK) '{ \ + recheck = 1; \ + while ((rc = (getline line < ($$0 ".trs"))) != 0) \ + { \ + if (rc < 0) \ + { \ + if ((getline line2 < ($$0 ".log")) < 0) \ + recheck = 0; \ + break; \ + } \ + else if (line ~ /$(am__recheck_rx)[nN][Oo]/) \ + { \ + recheck = 0; \ + break; \ + } \ + else if (line ~ /$(am__recheck_rx)[yY][eE][sS]/) \ + { \ + break; \ + } \ + }; \ + if (recheck) \ + print $$0; \ + close ($$0 ".trs"); \ + close ($$0 ".log"); \ +}' +# A command that, given a newline-separated list of test names on the +# standard input, create the global log from their .trs and .log files. +am__create_global_log = $(AWK) ' \ +function fatal(msg) \ +{ \ + print "fatal: making $@: " msg | "cat >&2"; \ + exit 1; \ +} \ +function rst_section(header) \ +{ \ + print header; \ + len = length(header); \ + for (i = 1; i <= len; i = i + 1) \ + printf "="; \ + printf "\n\n"; \ +} \ +{ \ + copy_in_global_log = 1; \ + global_test_result = "RUN"; \ + while ((rc = (getline line < ($$0 ".trs"))) != 0) \ + { \ + if (rc < 0) \ + fatal("failed to read from " $$0 ".trs"); \ + if (line ~ /$(am__global_test_result_rx)/) \ + { \ + sub("$(am__global_test_result_rx)", "", line); \ + sub("[ ]*$$", "", line); \ + global_test_result = line; \ + } \ + else if (line ~ /$(am__copy_in_global_log_rx)[nN][oO]/) \ + copy_in_global_log = 0; \ + }; \ + if (copy_in_global_log) \ + { \ + rst_section(global_test_result ": " $$0); \ + while ((rc = (getline line < ($$0 ".log"))) != 0) \ + { \ + if (rc < 0) \ + fatal("failed to read from " $$0 ".log"); \ + print line; \ + }; \ + printf "\n"; \ + }; \ + close ($$0 ".trs"); \ + close ($$0 ".log"); \ +}' +# Restructured Text title. +am__rst_title = { sed 's/.*/ & /;h;s/./=/g;p;x;s/ *$$//;p;g' && echo; } +# Solaris 10 'make', and several other traditional 'make' implementations, +# pass "-e" to $(SHELL), and POSIX 2008 even requires this. Work around it +# by disabling -e (using the XSI extension "set +e") if it's set. +am__sh_e_setup = case $$- in *e*) set +e;; esac +# Default flags passed to test drivers. +am__common_driver_flags = \ + --color-tests "$$am__color_tests" \ + --enable-hard-errors "$$am__enable_hard_errors" \ + --expect-failure "$$am__expect_failure" +# To be inserted before the command running the test. Creates the +# directory for the log if needed. Stores in $dir the directory +# containing $f, in $tst the test, in $log the log. Executes the +# developer- defined test setup AM_TESTS_ENVIRONMENT (if any), and +# passes TESTS_ENVIRONMENT. Set up options for the wrapper that +# will run the test scripts (or their associated LOG_COMPILER, if +# thy have one). +am__check_pre = \ +$(am__sh_e_setup); \ +$(am__vpath_adj_setup) $(am__vpath_adj) \ +$(am__tty_colors); \ +srcdir=$(srcdir); export srcdir; \ +case "$@" in \ + */*) am__odir=`echo "./$@" | sed 's|/[^/]*$$||'`;; \ + *) am__odir=.;; \ +esac; \ +test "x$$am__odir" = x"." || test -d "$$am__odir" \ + || $(MKDIR_P) "$$am__odir" || exit $$?; \ +if test -f "./$$f"; then dir=./; \ +elif test -f "$$f"; then dir=; \ +else dir="$(srcdir)/"; fi; \ +tst=$$dir$$f; log='$@'; \ +if test -n '$(DISABLE_HARD_ERRORS)'; then \ + am__enable_hard_errors=no; \ +else \ + am__enable_hard_errors=yes; \ +fi; \ +case " $(XFAIL_TESTS) " in \ + *[\ \ ]$$f[\ \ ]* | *[\ \ ]$$dir$$f[\ \ ]*) \ + am__expect_failure=yes;; \ + *) \ + am__expect_failure=no;; \ +esac; \ +$(AM_TESTS_ENVIRONMENT) $(TESTS_ENVIRONMENT) +# A shell command to get the names of the tests scripts with any registered +# extension removed (i.e., equivalently, the names of the test logs, with +# the '.log' extension removed). The result is saved in the shell variable +# '$bases'. This honors runtime overriding of TESTS and TEST_LOGS. Sadly, +# we cannot use something simpler, involving e.g., "$(TEST_LOGS:.log=)", +# since that might cause problem with VPATH rewrites for suffix-less tests. +# See also 'test-harness-vpath-rewrite.sh' and 'test-trs-basic.sh'. +am__set_TESTS_bases = \ + bases='$(TEST_LOGS)'; \ + bases=`for i in $$bases; do echo $$i; done | sed 's/\.log$$//'`; \ + bases=`echo $$bases` +AM_TESTSUITE_SUMMARY_HEADER = ' for $(PACKAGE_STRING)' +RECHECK_LOGS = $(TEST_LOGS) +AM_RECURSIVE_TARGETS = check recheck +TEST_SUITE_LOG = test-suite.log +TEST_EXTENSIONS = @EXEEXT@ .test +LOG_DRIVER = $(SHELL) $(top_srcdir)/build-aux/test-driver +LOG_COMPILE = $(LOG_COMPILER) $(AM_LOG_FLAGS) $(LOG_FLAGS) +am__set_b = \ + case '$@' in \ + */*) \ + case '$*' in \ + */*) b='$*';; \ + *) b=`echo '$@' | sed 's/\.log$$//'`; \ + esac;; \ + *) \ + b='$*';; \ + esac +am__test_logs1 = $(TESTS:=.log) +am__test_logs2 = $(am__test_logs1:@EXEEXT@.log=.log) +TEST_LOGS = $(am__test_logs2:.test.log=.log) +TEST_LOG_DRIVER = $(SHELL) $(top_srcdir)/build-aux/test-driver +TEST_LOG_COMPILE = $(TEST_LOG_COMPILER) $(AM_TEST_LOG_FLAGS) \ + $(TEST_LOG_FLAGS) +am__DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/starpu-mc.sh.in \ + $(top_srcdir)/build-aux/depcomp \ + $(top_srcdir)/build-aux/test-driver \ + $(top_srcdir)/make/starpu-tests.mk \ + $(top_srcdir)/make/starpu.mk +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +pkglibdir = @pkglibdir@ +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +APP_CFLAGS = @APP_CFLAGS@ +APP_CXXFLAGS = @APP_CXXFLAGS@ +APP_FCFLAGS = @APP_FCFLAGS@ +APP_FFLAGS = @APP_FFLAGS@ +AR = @AR@ +AS = @AS@ +ATLASDIR = @ATLASDIR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BLAS_LIB = @BLAS_LIB@ +BLAS_LIBS = @BLAS_LIBS@ +BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ +BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CC_OR_MPICC = @CC_OR_MPICC@ +CC_OR_NVCC = @CC_OR_NVCC@ +CFLAGS = @CFLAGS@ +COVERAGE = @COVERAGE@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CSCOPE = @CSCOPE@ +CTAGS = @CTAGS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DGELS_LIBS = @DGELS_LIBS@ +DLB_CFLAGS = @DLB_CFLAGS@ +DLB_LIBS = @DLB_LIBS@ +DLLTOOL = @DLLTOOL@ +DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +ECLIPSE = @ECLIPSE@ +EGREP = @EGREP@ +ETAGS = @ETAGS@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FC = @FC@ +FCFLAGS = @FCFLAGS@ +FFLAGS = @FFLAGS@ +FFTWF_CFLAGS = @FFTWF_CFLAGS@ +FFTWF_LIBS = @FFTWF_LIBS@ +FFTWL_CFLAGS = @FFTWL_CFLAGS@ +FFTWL_LIBS = @FFTWL_LIBS@ +FFTW_CFLAGS = @FFTW_CFLAGS@ +FFTW_LIBS = @FFTW_LIBS@ +FGREP = @FGREP@ +FILECMD = @FILECMD@ +FXTDIR = @FXTDIR@ +FXT_CFLAGS = @FXT_CFLAGS@ +FXT_LDFLAGS = @FXT_LDFLAGS@ +FXT_LIBS = @FXT_LIBS@ +GDB = @GDB@ +GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ +GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ +GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ +GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ +GOTODIR = @GOTODIR@ +GREP = @GREP@ +HAVE_CXX11 = @HAVE_CXX11@ +HAVE_FFTWFL = @HAVE_FFTWFL@ +HELP2MAN = @HELP2MAN@ +HIPCC = @HIPCC@ +HIPCCFLAGS = @HIPCCFLAGS@ $(am__append_2) +HIPCONFIG = @HIPCONFIG@ +HWLOC_CFLAGS = @HWLOC_CFLAGS@ +HWLOC_LIBS = @HWLOC_LIBS@ +HWLOC_REQUIRES = @HWLOC_REQUIRES@ +ICC = @ICC@ +ICC_ARGS = @ICC_ARGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JULIA = @JULIA@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ $(SIMGRID_LDFLAGS) +LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ +LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ +LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ +LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ +LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ +LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ +LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ +LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ +LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ +LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ +LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ +LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ +LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ +LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ +LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ +LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ +LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ +LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ +LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ +LIBSTARPU_LINK = @LIBSTARPU_LINK@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAGMA_CFLAGS = @MAGMA_CFLAGS@ +MAGMA_LIBS = @MAGMA_LIBS@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPICC_LDFLAGS = @MPICC_LDFLAGS@ +MPICXX = @MPICXX@ +MPIEXEC = @MPIEXEC@ +MPIEXEC_ARGS = @MPIEXEC_ARGS@ +MPIFORT = @MPIFORT@ +MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ +MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ +NM = @NM@ +NMAD_CFLAGS = @NMAD_CFLAGS@ +NMAD_LIBS = @NMAD_LIBS@ +NMEDIT = @NMEDIT@ +NVCC = @NVCC@ +NVCCFLAGS = @NVCCFLAGS@ $(am__append_1) +NVCC_CC = @NVCC_CC@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ +OPENBLAS_LIBS = @OPENBLAS_LIBS@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PAPI_CFLAGS = @PAPI_CFLAGS@ +PAPI_LIBS = @PAPI_LIBS@ +PARALLEL = @PARALLEL@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PKG_CONFIG = @PKG_CONFIG@ +POTI_CFLAGS = @POTI_CFLAGS@ +POTI_LIBS = @POTI_LIBS@ +PROG_CLANG = @PROG_CLANG@ +PROG_DATE = @PROG_DATE@ +PROG_FIND = @PROG_FIND@ +PROG_STAT = @PROG_STAT@ +PYTHON = @PYTHON@ +PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ +PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ +PYTHON_VERSION = @PYTHON_VERSION@ +RANLIB = @RANLIB@ +REALBASH = @REALBASH@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ +SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ +SIMGRID_LIBS = @SIMGRID_LIBS@ +SIMGRID_MC = @SIMGRID_MC@ +SLIC_CONFIG = @SLIC_CONFIG@ +SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ +SOCL_VENDORS = @SOCL_VENDORS@ +STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ +STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ +STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ +STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ +STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ +STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ +STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ +STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ +STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ +STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ +STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ +STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ +STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ +STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ +STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ +STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ +STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ +STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ +STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ +STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ +STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ +STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ +STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ +STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ +STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ +STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ +STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ +STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ +STARPU_LIB_PATH = @STARPU_LIB_PATH@ +STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ +STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ +STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ +STARPU_MS_LIB = @STARPU_MS_LIB@ +STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ +STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ +STARPU_OPENBLAS = @STARPU_OPENBLAS@ +STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ +STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ +STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ +STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ +STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ +STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ +STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ +STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ +STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ +STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ +STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ +STARPU_SRC_DIR = @STARPU_SRC_DIR@ +STARPU_USE_CPU = @STARPU_USE_CPU@ +STARPU_USE_CUDA = @STARPU_USE_CUDA@ +STARPU_USE_FXT = @STARPU_USE_FXT@ +STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ +STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ +STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ +STRIP = @STRIP@ +VERSION = @VERSION@ +XMKMF = @XMKMF@ +X_CFLAGS = @X_CFLAGS@ +X_EXTRA_LIBS = @X_EXTRA_LIBS@ +X_LIBS = @X_LIBS@ +X_PRE_LIBS = @X_PRE_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_ct_F77 = @ac_ct_F77@ +ac_ct_FC = @ac_ct_FC@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +doxygencommand = @doxygencommand@ +dvidir = @dvidir@ +eclipsepath = @eclipsepath@ +epstopdfcommand = @epstopdfcommand@ +exec_prefix = @exec_prefix@ +gitcommand = @gitcommand@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +hwloccalccommand = @hwloccalccommand@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +juliapath = @juliapath@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +mpicc_path = @mpicc_path@ +mpicxx_path = @mpicxx_path@ +mpiexec_path = @mpiexec_path@ +mpifort_path = @mpifort_path@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +pdflatexcommand = @pdflatexcommand@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +runstatedir = @runstatedir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target = @target@ +target_alias = @target_alias@ +target_cpu = @target_cpu@ +target_os = @target_os@ +target_vendor = @target_vendor@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +LAUNCHER_ENV = +LAUNCHER = +AM_CFLAGS = $(GLOBAL_AM_CFLAGS) +AM_CXXFLAGS = $(GLOBAL_AM_CXXFLAGS) +AM_FFLAGS = $(GLOBAL_AM_FFLAGS) +AM_FCFLAGS = $(GLOBAL_AM_FCFLAGS) +@STARPU_USE_CUDA_TRUE@V_nvcc_ = $(V_nvcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_USE_CUDA_TRUE@V_nvcc_0 = @echo " NVCC " $@; +@STARPU_USE_CUDA_TRUE@V_nvcc_1 = +@STARPU_USE_CUDA_TRUE@V_nvcc = $(V_nvcc_$(V)) + +# Avoid using nvcc when making a coverity build, nvcc produces millions of +# lines of code which we don't want to analyze. Instead, build dumb .o files +# containing empty functions. +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_ = $(V_mynvcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_0 = @echo " myNVCC " $@; +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_1 = +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc = $(V_mynvcc_$(V)) +@STARPU_USE_HIP_TRUE@V_hipcc_ = $(V_hipcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_USE_HIP_TRUE@V_hipcc_0 = @echo " HIPCC " $@; +@STARPU_USE_HIP_TRUE@V_hipcc_1 = +@STARPU_USE_HIP_TRUE@V_hipcc = $(V_hipcc_$(V)) +V_icc_ = $(V_icc_$(AM_DEFAULT_VERBOSITY)) +V_icc_0 = @echo " ICC " $@; +V_icc_1 = +V_icc = $(V_icc_$(V)) +V_ln_ = $(V_ln_$(AM_DEFAULT_VERBOSITY)) +V_ln_0 = @echo " LN " $@; +V_ln_1 = +V_ln = $(V_ln_$(V)) +V_help2man_ = $(V_help2man_$(AM_DEFAULT_VERBOSITY)) +V_help2man_0 = @echo " HELP2MAN" $@; +V_help2man_1 = +V_help2man = $(V_help2man_$(V)) +# These are always defined, both for starpu-mpi and for mpi-ms +# For MPI tests we don't want to oversubscribe the system +MPI_RUN_ENV = STARPU_WORKERS_GETBIND=0 STARPU_WORKERS_NOBIND=1 STARPU_NCPU=3 +@STARPU_SIMGRID_FALSE@STARPU_MPIEXEC = $(MPIEXEC) $(MPIEXEC_ARGS) -np $(STARPU_MPI_NP) +@STARPU_SIMGRID_TRUE@STARPU_MPIEXEC = $(abs_top_builddir)/tools/starpu_smpirun -np $(STARPU_MPI_NP) -platform $(abs_top_srcdir)/tools/perfmodels/cluster.xml -hostfile $(abs_top_srcdir)/tools/perfmodels/hostfile + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2017-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +EXTRA_DIST = \ + platform.xml \ + prio_list.sh \ + barrier.sh \ + starpu-mc.sh.in + +AM_CPPFLAGS = -I$(top_builddir)/src -I$(top_srcdir)/src -I$(top_builddir)/include -I$(top_srcdir)/include $(SIMGRID_CFLAGS) +AM_LDFLAGS = -Wl,-znorelro -Wl,-znoseparate-code +TESTS = $(SHELL_TESTS) + +# takes 1s +SHELL_TESTS = prio_list.sh $(am__append_3) $(am__append_4) +all: all-am + +.SUFFIXES: +.SUFFIXES: .c .cu .cubin .hip .lo .log .o .obj .test .test$(EXEEXT) .trs +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/make/starpu-tests.mk $(top_srcdir)/make/starpu.mk $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign tests/model-checking/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign tests/model-checking/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; +$(top_srcdir)/make/starpu-tests.mk $(top_srcdir)/make/starpu.mk $(am__empty): + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): +starpu-mc.sh: $(top_builddir)/config.status $(srcdir)/starpu-mc.sh.in + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ + +clean-noinstPROGRAMS: + @list='$(noinst_PROGRAMS)'; test -n "$$list" || exit 0; \ + echo " rm -f" $$list; \ + rm -f $$list || exit $$?; \ + test -n "$(EXEEXT)" || exit 0; \ + list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ + echo " rm -f" $$list; \ + rm -f $$list + +prio_list$(EXEEXT): $(prio_list_OBJECTS) $(prio_list_DEPENDENCIES) $(EXTRA_prio_list_DEPENDENCIES) + @rm -f prio_list$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(prio_list_OBJECTS) $(prio_list_LDADD) $(LIBS) + +prio_list2$(EXEEXT): $(prio_list2_OBJECTS) $(prio_list2_DEPENDENCIES) $(EXTRA_prio_list2_DEPENDENCIES) + @rm -f prio_list2$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(prio_list2_OBJECTS) $(prio_list2_LDADD) $(LIBS) + +prio_list3$(EXEEXT): $(prio_list3_OBJECTS) $(prio_list3_DEPENDENCIES) $(EXTRA_prio_list3_DEPENDENCIES) + @rm -f prio_list3$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(prio_list3_OBJECTS) $(prio_list3_LDADD) $(LIBS) + +starpu_barrier$(EXEEXT): $(starpu_barrier_OBJECTS) $(starpu_barrier_DEPENDENCIES) $(EXTRA_starpu_barrier_DEPENDENCIES) + @rm -f starpu_barrier$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(starpu_barrier_OBJECTS) $(starpu_barrier_LDADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/prio_list.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/prio_list2.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/prio_list3.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/starpu_barrier.Po@am__quote@ # am--include-marker + +$(am__depfiles_remade): + @$(MKDIR_P) $(@D) + @echo '# dummy' >$@-t && $(am__mv) $@-t $@ + +am--depfiles: $(am__depfiles_remade) + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ +@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-am +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-am + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-am + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +# Recover from deleted '.trs' file; this should ensure that +# "rm -f foo.log; make foo.trs" re-run 'foo.test', and re-create +# both 'foo.log' and 'foo.trs'. Break the recipe in two subshells +# to avoid problems with "make -n". +.log.trs: + rm -f $< $@ + $(MAKE) $(AM_MAKEFLAGS) $< + +# Leading 'am--fnord' is there to ensure the list of targets does not +# expand to empty, as could happen e.g. with make check TESTS=''. +am--fnord $(TEST_LOGS) $(TEST_LOGS:.log=.trs): $(am__force_recheck) +am--force-recheck: + @: + +$(TEST_SUITE_LOG): $(TEST_LOGS) + @$(am__set_TESTS_bases); \ + am__f_ok () { test -f "$$1" && test -r "$$1"; }; \ + redo_bases=`for i in $$bases; do \ + am__f_ok $$i.trs && am__f_ok $$i.log || echo $$i; \ + done`; \ + if test -n "$$redo_bases"; then \ + redo_logs=`for i in $$redo_bases; do echo $$i.log; done`; \ + redo_results=`for i in $$redo_bases; do echo $$i.trs; done`; \ + if $(am__make_dryrun); then :; else \ + rm -f $$redo_logs && rm -f $$redo_results || exit 1; \ + fi; \ + fi; \ + if test -n "$$am__remaking_logs"; then \ + echo "fatal: making $(TEST_SUITE_LOG): possible infinite" \ + "recursion detected" >&2; \ + elif test -n "$$redo_logs"; then \ + am__remaking_logs=yes $(MAKE) $(AM_MAKEFLAGS) $$redo_logs; \ + fi; \ + if $(am__make_dryrun); then :; else \ + st=0; \ + errmsg="fatal: making $(TEST_SUITE_LOG): failed to create"; \ + for i in $$redo_bases; do \ + test -f $$i.trs && test -r $$i.trs \ + || { echo "$$errmsg $$i.trs" >&2; st=1; }; \ + test -f $$i.log && test -r $$i.log \ + || { echo "$$errmsg $$i.log" >&2; st=1; }; \ + done; \ + test $$st -eq 0 || exit 1; \ + fi + @$(am__sh_e_setup); $(am__tty_colors); $(am__set_TESTS_bases); \ + ws='[ ]'; \ + results=`for b in $$bases; do echo $$b.trs; done`; \ + test -n "$$results" || results=/dev/null; \ + all=` grep "^$$ws*:test-result:" $$results | wc -l`; \ + pass=` grep "^$$ws*:test-result:$$ws*PASS" $$results | wc -l`; \ + fail=` grep "^$$ws*:test-result:$$ws*FAIL" $$results | wc -l`; \ + skip=` grep "^$$ws*:test-result:$$ws*SKIP" $$results | wc -l`; \ + xfail=`grep "^$$ws*:test-result:$$ws*XFAIL" $$results | wc -l`; \ + xpass=`grep "^$$ws*:test-result:$$ws*XPASS" $$results | wc -l`; \ + error=`grep "^$$ws*:test-result:$$ws*ERROR" $$results | wc -l`; \ + if test `expr $$fail + $$xpass + $$error` -eq 0; then \ + success=true; \ + else \ + success=false; \ + fi; \ + br='==================='; br=$$br$$br$$br$$br; \ + result_count () \ + { \ + if test x"$$1" = x"--maybe-color"; then \ + maybe_colorize=yes; \ + elif test x"$$1" = x"--no-color"; then \ + maybe_colorize=no; \ + else \ + echo "$@: invalid 'result_count' usage" >&2; exit 4; \ + fi; \ + shift; \ + desc=$$1 count=$$2; \ + if test $$maybe_colorize = yes && test $$count -gt 0; then \ + color_start=$$3 color_end=$$std; \ + else \ + color_start= color_end=; \ + fi; \ + echo "$${color_start}# $$desc $$count$${color_end}"; \ + }; \ + create_testsuite_report () \ + { \ + result_count $$1 "TOTAL:" $$all "$$brg"; \ + result_count $$1 "PASS: " $$pass "$$grn"; \ + result_count $$1 "SKIP: " $$skip "$$blu"; \ + result_count $$1 "XFAIL:" $$xfail "$$lgn"; \ + result_count $$1 "FAIL: " $$fail "$$red"; \ + result_count $$1 "XPASS:" $$xpass "$$red"; \ + result_count $$1 "ERROR:" $$error "$$mgn"; \ + }; \ + { \ + echo "$(PACKAGE_STRING): $(subdir)/$(TEST_SUITE_LOG)" | \ + $(am__rst_title); \ + create_testsuite_report --no-color; \ + echo; \ + echo ".. contents:: :depth: 2"; \ + echo; \ + for b in $$bases; do echo $$b; done \ + | $(am__create_global_log); \ + } >$(TEST_SUITE_LOG).tmp || exit 1; \ + mv $(TEST_SUITE_LOG).tmp $(TEST_SUITE_LOG); \ + if $$success; then \ + col="$$grn"; \ + else \ + col="$$red"; \ + test x"$$VERBOSE" = x || cat $(TEST_SUITE_LOG); \ + fi; \ + echo "$${col}$$br$${std}"; \ + echo "$${col}Testsuite summary"$(AM_TESTSUITE_SUMMARY_HEADER)"$${std}"; \ + echo "$${col}$$br$${std}"; \ + create_testsuite_report --maybe-color; \ + echo "$$col$$br$$std"; \ + if $$success; then :; else \ + echo "$${col}See $(subdir)/$(TEST_SUITE_LOG)$${std}"; \ + if test -n "$(PACKAGE_BUGREPORT)"; then \ + echo "$${col}Please report to $(PACKAGE_BUGREPORT)$${std}"; \ + fi; \ + echo "$$col$$br$$std"; \ + fi; \ + $$success || exit 1 + +check-TESTS: + @list='$(RECHECK_LOGS)'; test -z "$$list" || rm -f $$list + @list='$(RECHECK_LOGS:.log=.trs)'; test -z "$$list" || rm -f $$list + @test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) + @set +e; $(am__set_TESTS_bases); \ + log_list=`for i in $$bases; do echo $$i.log; done`; \ + trs_list=`for i in $$bases; do echo $$i.trs; done`; \ + log_list=`echo $$log_list`; trs_list=`echo $$trs_list`; \ + $(MAKE) $(AM_MAKEFLAGS) $(TEST_SUITE_LOG) TEST_LOGS="$$log_list"; \ + exit $$?; +recheck: all + @test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) + @set +e; $(am__set_TESTS_bases); \ + bases=`for i in $$bases; do echo $$i; done \ + | $(am__list_recheck_tests)` || exit 1; \ + log_list=`for i in $$bases; do echo $$i.log; done`; \ + log_list=`echo $$log_list`; \ + $(MAKE) $(AM_MAKEFLAGS) $(TEST_SUITE_LOG) \ + am__force_recheck=am--force-recheck \ + TEST_LOGS="$$log_list"; \ + exit $$? +prio_list.sh.log: prio_list.sh + @p='prio_list.sh'; \ + b='prio_list.sh'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +prio_list2.sh.log: prio_list2.sh + @p='prio_list2.sh'; \ + b='prio_list2.sh'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +prio_list3.sh.log: prio_list3.sh + @p='prio_list3.sh'; \ + b='prio_list3.sh'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +.test.log: + @p='$<'; \ + $(am__set_b); \ + $(am__check_pre) $(TEST_LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_TEST_LOG_DRIVER_FLAGS) $(TEST_LOG_DRIVER_FLAGS) -- $(TEST_LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +@am__EXEEXT_TRUE@.test$(EXEEXT).log: +@am__EXEEXT_TRUE@ @p='$<'; \ +@am__EXEEXT_TRUE@ $(am__set_b); \ +@am__EXEEXT_TRUE@ $(am__check_pre) $(TEST_LOG_DRIVER) --test-name "$$f" \ +@am__EXEEXT_TRUE@ --log-file $$b.log --trs-file $$b.trs \ +@am__EXEEXT_TRUE@ $(am__common_driver_flags) $(AM_TEST_LOG_DRIVER_FLAGS) $(TEST_LOG_DRIVER_FLAGS) -- $(TEST_LOG_COMPILE) \ +@am__EXEEXT_TRUE@ "$$tst" $(AM_TESTS_FD_REDIRECT) +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am + $(MAKE) $(AM_MAKEFLAGS) check-TESTS +check: check-am +all-am: Makefile $(PROGRAMS) +installdirs: +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + -test -z "$(TEST_LOGS)" || rm -f $(TEST_LOGS) + -test -z "$(TEST_LOGS:.log=.trs)" || rm -f $(TEST_LOGS:.log=.trs) + -test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-generic clean-libtool clean-noinstPROGRAMS \ + mostlyclean-am + +distclean: distclean-am + -rm -f ./$(DEPDIR)/prio_list.Po + -rm -f ./$(DEPDIR)/prio_list2.Po + -rm -f ./$(DEPDIR)/prio_list3.Po + -rm -f ./$(DEPDIR)/starpu_barrier.Po + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f ./$(DEPDIR)/prio_list.Po + -rm -f ./$(DEPDIR)/prio_list2.Po + -rm -f ./$(DEPDIR)/prio_list3.Po + -rm -f ./$(DEPDIR)/starpu_barrier.Po + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: + +.MAKE: check-am install-am install-strip + +.PHONY: CTAGS GTAGS TAGS all all-am am--depfiles check check-TESTS \ + check-am clean clean-generic clean-libtool \ + clean-noinstPROGRAMS cscopelist-am ctags ctags-am distclean \ + distclean-compile distclean-generic distclean-libtool \ + distclean-tags distdir dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-html \ + install-html-am install-info install-info-am install-man \ + install-pdf install-pdf-am install-ps install-ps-am \ + install-strip installcheck installcheck-am installdirs \ + maintainer-clean maintainer-clean-generic mostlyclean \ + mostlyclean-compile mostlyclean-generic mostlyclean-libtool \ + pdf pdf-am ps ps-am recheck tags tags-am uninstall \ + uninstall-am + +.PRECIOUS: Makefile + +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@.cu.o: +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ @$(MKDIR_P) `dirname $@` +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ $(V_mynvcc)grep 'extern *"C" *void *' $< | sed -ne 's/extern *"C" *void *\([a-zA-Z0-9_]*\) *(.*/void \1(void) {}/p' | $(CC) -x c - -o $@ -c + +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.cubin: +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) -cubin $< -o $@ $(NVCCFLAGS) + +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.o: +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) $< -c -o $@ $(NVCCFLAGS) +@STARPU_USE_HIP_TRUE@.hip.o: +@STARPU_USE_HIP_TRUE@ $(V_hipcc) $(HIPCC) $< -c -o $@ $(HIPCCFLAGS) + +STARPU_MPI_NP ?= 4 + +showcheckfailed: + @ for x in $(shell grep -l "^FAIL " $(TEST_LOGS) /dev/null 2>/dev/null) ; do cat $$x ; done + @RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i showcheckfailed || RET=1 ; \ + done ; \ + exit $$RET + +showfailed: + @! grep "^FAIL " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "ERROR: AddressSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "WARNING: AddressSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "ERROR: ThreadSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "WARNING: ThreadSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "ERROR: LeakSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "WARNING: LeakSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l " runtime error: " $(TEST_LOGS) /dev/null 2>/dev/null + @RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -s -C $$i showfailed || RET=1 ; \ + done ; \ + exit $$RET + +showcheck: + -cat $(TEST_LOGS) /dev/null + @! grep -q "ERROR: AddressSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q "WARNING: AddressSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q "ERROR: ThreadSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q "WARNING: ThreadSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q "ERROR: LeakSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q "WARNING: LeakSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q " runtime error: " $(TEST_LOGS) /dev/null + RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i showcheck || RET=1 ; \ + done ; \ + exit $$RET + +showsuite: + -cat $(TEST_SUITE_LOG) /dev/null + @! grep -q "ERROR: AddressSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q "WARNING: AddressSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q "ERROR: ThreadSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q "WARNING: ThreadSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q "ERROR: LeakSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q "WARNING: LeakSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q " runtime error: " $(TEST_SUITE_LOG) /dev/null + RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i showsuite || RET=1 ; \ + done ; \ + exit $$RET + +@STARPU_SIMGRID_TRUE@export STARPU_PERF_MODEL_DIR=$(abs_top_srcdir)/tools/perfmodels/sampling +@STARPU_SIMGRID_TRUE@export STARPU_HOSTNAME=mirage +@STARPU_SIMGRID_TRUE@export MALLOC_PERTURB_=0 + +@STARPU_SIMGRID_TRUE@env: +@STARPU_SIMGRID_TRUE@ @echo export STARPU_PERF_MODEL_DIR=$(STARPU_PERF_MODEL_DIR) +@STARPU_SIMGRID_TRUE@ @echo export STARPU_HOSTNAME=$(STARPU_HOSTNAME) +@STARPU_SIMGRID_TRUE@ @echo export MALLOC_PERTURB_=$(MALLOC_PERTURB_) + +@STARPU_SIMGRID_TRUE@export STARPU_SIMGRID=1 + +@STARPU_QUICK_CHECK_TRUE@export STARPU_QUICK_CHECK=1 + +@STARPU_LONG_CHECK_TRUE@export STARPU_LONG_CHECK=1 + +#MC_FLAGS=--cfg=model-check/reduction:none + +#MC_FLAGS+=--cfg=contexts/factory:ucontext +#MC_FLAGS+=--cfg=model-check/sparse-checkpoint:yes +#MC_FLAGS+=--cfg=model-check/visited:1000 + +# To record the failing trace +#MC_FLAGS+=--cfg=model-check/record:1 +#MC_FLAGS+=--cfg=model-check/reply:'1;3;4' + +# To see which simix calls are made +#MC_FLAGS+=--log=simix_popping.thres:debug + +test: prio_list + simgrid-mc ./prio_list platform.xml MAIN $(MC_FLAGS) + +debug: prio_list + simgrid-mc ./prio_list platform.xml MAIN --log=mc_safety.thres:debug $(MC_FLAGS) + +test-barrier: starpu_barrier + simgrid-mc ./starpu_barrier platform.xml MAIN $(MC_FLAGS) + +debug-barrier: starpu_barrier + simgrid-mc ./starpu_barrier platform.xml MAIN --log=mc_safety.thres:debug $(MC_FLAGS) + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/tests/model-checking/barrier.sh b/tests/model-checking/barrier.sh new file mode 100755 index 0000000..f0f3a87 --- /dev/null +++ b/tests/model-checking/barrier.sh @@ -0,0 +1,18 @@ +#!/bin/bash -x +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +source $(dirname $0)/starpu-mc.sh +test starpu_barrier diff --git a/tests/model-checking/platform.xml b/tests/model-checking/platform.xml new file mode 100644 index 0000000..a995aec --- /dev/null +++ b/tests/model-checking/platform.xml @@ -0,0 +1,20 @@ + + + + + + + + + + + + + + + + + + + + diff --git a/tests/model-checking/prio_list.c b/tests/model-checking/prio_list.c new file mode 100644 index 0000000..8e4c9a2 --- /dev/null +++ b/tests/model-checking/prio_list.c @@ -0,0 +1,182 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2017-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#define _STARPU_MALLOC(p, s) do {p = malloc(s);} while (0) +#define _STARPU_CALLOC(p, n, s) do {p = calloc(n, s);} while (0) +#define _STARPU_MALLOC_CAST(p, s, t) do {p = (t) malloc(s);} while (0) + +#ifndef NOCONFIG +#include +#else +#define _GNU_SOURCE 1 +// Assuming recent simgrid +#endif +#include +#include +#include +#include +#include +#include + +#include + +#include + +#include +#include +#include +#include + +#include +#include + +#include + +#ifndef NLISTS +#define NLISTS 1 +#endif +#ifndef NTHREADS +#define NTHREADS 2 +#endif +#ifndef NELEMENTS +#define NELEMENTS 6 +#endif +#ifndef NITERS +#define NITERS 1 +#endif + +// MC_ignore + +sg_mutex_t mutex[NLISTS]; + +LIST_TYPE(foo, + unsigned prio; + unsigned back; /* Push at back instead of front? */ + ); +PRIO_LIST_TYPE(foo, prio); + +struct foo_prio_list mylist[NLISTS]; + +void check_list_prio(struct foo_prio_list *list) +{ + struct foo *cur; + unsigned lastprio = UINT_MAX; + unsigned back = 0; + for (cur = foo_prio_list_begin(list); + cur != foo_prio_list_end(list); + cur = foo_prio_list_next(list, cur)) + { + if (cur->prio == lastprio) + /* For same prio, back elements should never get before + * front elements */ + MC_assert(!(back && !cur->back)); + else + MC_assert(lastprio > cur->prio); + lastprio = cur->prio; + back = cur->back; + } +} + +void worker(int argc, char *argv[]) +{ + unsigned myrank = atoi(argv[0]); + unsigned i, n, l, iter; + struct foo *elem; + struct drand48_data buffer; + long res; + + srand48_r(myrank, &buffer); + + l = myrank%NLISTS; + + for (iter = 0; iter < NITERS; iter++) + { + for (i = 0; i < NELEMENTS; i++) + { + elem = malloc(sizeof(*elem)); + lrand48_r(&buffer, &res); + elem->prio = res%10; + lrand48_r(&buffer, &res); + elem->back = res%2; + sg_mutex_lock(mutex[l]); + if (elem->back) + foo_prio_list_push_back(&mylist[l], elem); + else + foo_prio_list_push_front(&mylist[l], elem); + check_list_prio(&mylist[l]); + sg_mutex_unlock(mutex[l]); + } + + for (i = 0; i < NELEMENTS; i++) + { + lrand48_r(&buffer, &res); + n = res%(NELEMENTS-i); + + sg_mutex_lock(mutex[l]); + for (elem = foo_prio_list_begin(&mylist[l]); + n--; + elem = foo_prio_list_next(&mylist[l], elem)) + ; + foo_prio_list_erase(&mylist[l], elem); + check_list_prio(&mylist[l]); + sg_mutex_unlock(mutex[l]); + } + + /* horrible way to wait for list getting empty */ + sg_actor_sleep_for(1000); + } +} + +void master(int argc, char *argv[]) +{ +} + +int main(int argc, char *argv[]) +{ + unsigned l, i; + + if (argc < 3) + { + fprintf(stderr,"usage: %s platform.xml host\n", argv[0]); + exit(EXIT_FAILURE); + } + + printf("Running with:\n- %d threads\n- %d lists\n- %d elements\n- %d iterations\n", NTHREADS, NLISTS, NELEMENTS, NITERS); + + srand48(0); + simgrid_init(&argc, argv); + sg_cfg_set_int("contexts/stack-size", 128); + simgrid_load_platform(argv[1]); + + for (l = 0; l < NLISTS; l++) + { + mutex[l] = sg_mutex_init(); + foo_prio_list_init(&mylist[l]); + } + + for (i = 0; i < NTHREADS; i++) + { + char *s; + asprintf(&s, "%d\n", i); + char **args = malloc(sizeof(char*)*2); + args[0] = s; + args[1] = NULL; + sg_actor_create("test", sg_host_by_name(argv[2]), worker, 1, args); + } + + simgrid_run(); + return 0; +} diff --git a/tests/model-checking/prio_list.sh b/tests/model-checking/prio_list.sh new file mode 100755 index 0000000..a2592bc --- /dev/null +++ b/tests/model-checking/prio_list.sh @@ -0,0 +1,18 @@ +#!/bin/bash -x +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +source $(dirname $0)/starpu-mc.sh +test prio_list diff --git a/tests/model-checking/prio_list2.c b/tests/model-checking/prio_list2.c new file mode 100644 index 0000000..28dcb5e --- /dev/null +++ b/tests/model-checking/prio_list2.c @@ -0,0 +1,21 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#define NLISTS 2 +#define NTHREADS 2 +#define NELEMENTS 4 +#define NITERS 1 +#include "prio_list.c" diff --git a/tests/model-checking/prio_list3.c b/tests/model-checking/prio_list3.c new file mode 100644 index 0000000..37e601f --- /dev/null +++ b/tests/model-checking/prio_list3.c @@ -0,0 +1,21 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#define NLISTS 1 +#define NTHREADS 3 +#define NELEMENTS 4 +#define NITERS 1 +#include "prio_list.c" diff --git a/tests/model-checking/starpu-mc.sh.in b/tests/model-checking/starpu-mc.sh.in new file mode 100755 index 0000000..f437b8c --- /dev/null +++ b/tests/model-checking/starpu-mc.sh.in @@ -0,0 +1,36 @@ +#!/bin/bash -x +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +# Test a model-checking program with simgrid model checker + +SIMGRID_MC=@SIMGRID_MC@ +abs_top_srcdir=@abs_top_srcdir@ +abs_builddir=@abs_builddir@ + +set -e + +[ -x "$SIMGRID_MC" ] || exit 77 + +#MC_FLAGS=--cfg=model-check/reduction:none + +# makes it much longer actually +#MC_FLAGS+=--cfg=contexts/factory:ucontext +#MC_FLAGS+=--cfg=model-check/sparse-checkpoint:yes +#MC_FLAGS+=--cfg=model-check/visited:1000 + +test() { + time $SIMGRID_MC $abs_builddir/$1 $abs_top_srcdir/tests/model-checking/platform.xml MAIN $MC_FLAGS +} diff --git a/tests/model-checking/starpu_barrier.c b/tests/model-checking/starpu_barrier.c new file mode 100644 index 0000000..8e06a41 --- /dev/null +++ b/tests/model-checking/starpu_barrier.c @@ -0,0 +1,149 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2017-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#define __COMMON_UTILS_H__ +#define _STARPU_MALLOC(p, s) do {p = malloc(s);} while (0) +#define _STARPU_CALLOC(p, n, s) do {p = calloc(n, s);} while (0) +#define _STARPU_REALLOC(p, s) do {p = realloc(p, s);} while (0) +#define STARPU_HG_DISABLE_CHECKING(v) ((void) 0) +#define STARPU_HG_ENABLE_CHECKING(v) ((void) 0) +#define ANNOTATE_HAPPENS_AFTER(v) ((void) 0) +#define ANNOTATE_HAPPENS_BEFORE(v) ((void) 0) + +#define STARPU_DEBUG_PREFIX "[starpu]" +#ifdef STARPU_VERBOSE +# define _STARPU_DEBUG(fmt, ...) do { if (!_starpu_silent) {fprintf(stderr, STARPU_DEBUG_PREFIX"[%s] " fmt ,__starpu_func__ ,## __VA_ARGS__); fflush(stderr); }} while(0) +#else +# define _STARPU_DEBUG(fmt, ...) do { } while (0) +#endif + +#define STARPU_UYIELD() ((void)0) + +#ifndef NOCONFIG +#include +#else +#ifndef _GNU_SOURCE +#define _GNU_SOURCE 1 +#endif +// Assuming recent simgrid +#endif +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include + +#include +#include + +/* common/thread.c references these, but doesn't need to have them working anyway */ +starpu_pthread_mutex_t _starpu_simgrid_time_advance_mutex; +starpu_pthread_cond_t _starpu_simgrid_time_advance_cond; + +void _starpu_simgrid_thread_start(int argc, char *argv[]) +{ +} + +size_t _starpu_default_stack_size = 8192; + +void +_starpu_simgrid_set_stack_size(size_t stack_size) +{ +} + +starpu_sg_host_t _starpu_simgrid_get_host_by_name(const char *name) +{ + return NULL; +} + +static void _starpu_clock_gettime(struct timespec *ts) +{ + double now = simgrid_get_clock(); + ts->tv_sec = floor(now); + ts->tv_nsec = floor((now - ts->tv_sec) * 1000000000); +} + +void starpu_sleep(float nb_sec) +{ + sg_actor_sleep_for(nb_sec); +} + +#include +#undef STARPU_DEBUG +int starpu_worker_get_id(void) { return 0; } +static inline unsigned _starpu_worker_mutex_is_sched_mutex(int workerid, starpu_pthread_mutex_t *mutex) { return 0; } +#include + +#ifndef NTHREADS +#define NTHREADS 2 +#endif + +#ifndef NITERS +#define NITERS 1 +#endif + +struct _starpu_barrier barrier; + +void worker(int argc, char *argv[]) +{ + unsigned iter; + + for (iter = 0; iter < NITERS; iter++) + { + MC_assert(barrier.count <= NTHREADS); + _starpu_barrier_wait(&barrier); + } +} + +#undef main +int main(int argc, char *argv[]) +{ + unsigned i; + + if (argc < 3) + { + fprintf(stderr,"usage: %s platform.xml host\n", argv[0]); + exit(EXIT_FAILURE); + } + srand48(0); + simgrid_init(&argc, argv); + sg_cfg_set_int("contexts/stack-size", 128); + simgrid_load_platform(argv[1]); + + _starpu_barrier_init(&barrier, NTHREADS); + + for (i = 0; i < NTHREADS; i++) + { + char *s; + asprintf(&s, "%d\n", i); + char **args = malloc(sizeof(char*)*2); + args[0] = s; + args[1] = NULL; + sg_actor_create("test", sg_host_by_name(argv[2]), worker, 1, args); + } + + simgrid_run(); + return 0; +} diff --git a/tests/openmp/api_01.c b/tests/openmp/api_01.c new file mode 100644 index 0000000..fcc2d6c --- /dev/null +++ b/tests/openmp/api_01.c @@ -0,0 +1,142 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2014-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../helper.h" +#include +#include + +/* + * Check the OpenMP API getters return proper default results. + */ + +#if !defined(STARPU_OPENMP) +int main(void) +{ + return STARPU_TEST_SKIPPED; +} +#else +__attribute__((constructor)) +static void omp_constructor(void) +{ + int ret; + /* we clear the whole OMP environment for this test, to check the + * default behaviour of API functions */ + unsetenv("OMP_DYNAMIC"); + unsetenv("OMP_NESTED"); + unsetenv("OMP_SCHEDULE"); + unsetenv("OMP_STACKSIZE"); + unsetenv("OMP_WAIT_POLICY"); + unsetenv("OMP_THREAD_LIMIT"); + unsetenv("OMP_MAX_ACTIVE_LEVELS"); + unsetenv("OMP_CANCELLATION"); + unsetenv("OMP_DEFAULT_DEVICE"); + unsetenv("OMP_MAX_TASK_PRIORITY"); + unsetenv("OMP_PROC_BIND"); + unsetenv("OMP_NUM_THREADS"); + unsetenv("OMP_PLACES"); + unsetenv("OMP_DISPLAY_ENV"); + ret = starpu_omp_init(); + if (ret == -EINVAL) exit(STARPU_TEST_SKIPPED); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init"); +} + +__attribute__((destructor)) +static void omp_destructor(void) +{ + starpu_omp_shutdown(); +} + +#define check_omp_func(f,_tv) \ +{ \ + const int v = (f()); \ + const int tv = (_tv); \ + printf(#f ": %d (should be %d)\n", v, tv); \ + STARPU_ASSERT(v == tv); \ +} + +const char * get_sched_name(int sched_value) +{ + const char *sched_name = NULL; + + switch (sched_value) + { + case starpu_omp_sched_undefined: sched_name = ""; break; + case starpu_omp_sched_static: sched_name = "static"; break; + case starpu_omp_sched_dynamic: sched_name = "dynamic"; break; + case starpu_omp_sched_guided: sched_name = "guided"; break; + case starpu_omp_sched_auto: sched_name = "auto"; break; + case starpu_omp_sched_runtime: sched_name = "runtime"; break; + default: _STARPU_ERROR("invalid omp schedule value"); + } + return sched_name; +} + +int main(void) +{ + const int nb_cpus = starpu_cpu_worker_get_count(); + + check_omp_func(starpu_omp_get_num_threads, 1); + check_omp_func(starpu_omp_get_thread_num, 0); + /* since OMP_NUM_THREADS is cleared, starpu_omp_get_max_threads() should return nb_cpus */ + check_omp_func(starpu_omp_get_max_threads, nb_cpus); + check_omp_func(starpu_omp_get_num_procs, nb_cpus); + check_omp_func(starpu_omp_in_parallel, 0); + check_omp_func(starpu_omp_get_dynamic, 0); + check_omp_func(starpu_omp_get_nested, 0); + check_omp_func(starpu_omp_get_cancellation, 0); + { + const enum starpu_omp_sched_value target_kind = starpu_omp_sched_static; + const int target_modifier = 0; + enum starpu_omp_sched_value kind; + int modifier; + const char *sched_name; + const char *target_sched_name; + starpu_omp_get_schedule(&kind, &modifier); + sched_name = get_sched_name(kind); + target_sched_name = get_sched_name(target_kind); + printf("starpu_omp_get_schedule: %s,%d (should be %s,%d)\n", sched_name, modifier, target_sched_name, target_modifier); + STARPU_ASSERT(kind == target_kind && modifier == target_modifier); + } + check_omp_func(starpu_omp_get_thread_limit, nb_cpus); + check_omp_func(starpu_omp_get_max_active_levels, 1); + check_omp_func(starpu_omp_get_level, 0); + { + const int tv = 0; + const int v = starpu_omp_get_ancestor_thread_num(0); + printf("starpu_omp_get_ancestor_thread_num(0): %d (should be %d)\n", v, tv); + STARPU_ASSERT(v == tv); + } + { + const int tv = 1; + const int v = starpu_omp_get_team_size(0); + printf("starpu_omp_get_team_size(0): %d (should be %d)\n", v, tv); + STARPU_ASSERT(v == tv); + } + check_omp_func(starpu_omp_get_active_level, 0); + check_omp_func(starpu_omp_in_final, 0); + check_omp_func(starpu_omp_get_proc_bind, starpu_omp_proc_bind_false); + check_omp_func(starpu_omp_get_default_device, 0); + /* TODO: support more than one device */ + check_omp_func(starpu_omp_get_num_devices, 1); + check_omp_func(starpu_omp_get_num_teams, 1); + check_omp_func(starpu_omp_get_team_num, 0); + check_omp_func(starpu_omp_is_initial_device, 1); + check_omp_func(starpu_omp_get_initial_device, 0); + check_omp_func(starpu_omp_get_max_task_priority, 0); + return 0; +} +#endif diff --git a/tests/openmp/array_slice_01.c b/tests/openmp/array_slice_01.c new file mode 100644 index 0000000..5dcca4b --- /dev/null +++ b/tests/openmp/array_slice_01.c @@ -0,0 +1,255 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2014-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include "../helper.h" +#include + +/* + * Test recursive OpenMP tasks, data dependences, data slice dependences. + */ + +#if !defined(STARPU_OPENMP) +int main(void) +{ + return STARPU_TEST_SKIPPED; +} +#else +#define NX 64 +int global_vector[NX]; + +__attribute__((constructor)) +static void omp_constructor(void) +{ + int ret = starpu_omp_init(); + if (ret == -EINVAL) exit(STARPU_TEST_SKIPPED); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init"); +} + +__attribute__((destructor)) +static void omp_destructor(void) +{ + starpu_omp_shutdown(); +} + +void task_region_h(void *buffers[], void *_args) +{ + void **args = _args; + struct starpu_vector_interface *_vector = buffers[0]; + int nx = STARPU_VECTOR_GET_NX(_vector); + int elemsize = STARPU_VECTOR_GET_ELEMSIZE(_vector); + int slice_base = STARPU_VECTOR_GET_SLICE_BASE(_vector); + int *v = (int *)STARPU_VECTOR_GET_PTR(_vector); + int f = (int)(intptr_t)args[0]; + int imin = (int)(intptr_t)args[1]; + int imax = (int)(intptr_t)args[2]; + int i; + + assert(elemsize == sizeof(v[0])); + + printf("depth 2 task, entry: vector ptr = %p, slice_base = %d, imin = %d, imax = %d\n", v, slice_base, imin, imax); + + for (i = imin; i < imax; i++) + { + assert(i-slice_base>=0); + assert(i-slice_base= 2); + memset(&attr, 0, sizeof(attr)); +#ifdef STARPU_SIMGRID + attr.cl.model = &starpu_perfmodel_nop; +#endif + attr.cl.flags = STARPU_CODELET_SIMGRID_EXECUTE; + attr.cl.cpu_funcs[0] = parallel_region_f; + attr.cl.where = STARPU_CPU; + attr.if_clause = 1; + starpu_omp_parallel_region(&attr); + return 0; +} +#endif diff --git a/tests/openmp/cuda_task_01.c b/tests/openmp/cuda_task_01.c new file mode 100644 index 0000000..cdb5784 --- /dev/null +++ b/tests/openmp/cuda_task_01.c @@ -0,0 +1,205 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2014-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include "../helper.h" +#include + +/* + * Check executing a CUDA target task. + */ + +#if !defined(STARPU_OPENMP) || !defined(STARPU_USE_CUDA) +int main(void) +{ + return STARPU_TEST_SKIPPED; +} +#else +#define NX 64 +int global_vector_1[NX]; +int global_vector_2[NX]; + +__attribute__((constructor)) +static void omp_constructor(void) +{ + int ret = starpu_omp_init(); + if (ret == -EINVAL) exit(STARPU_TEST_SKIPPED); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init"); +} + +__attribute__((destructor)) +static void omp_destructor(void) +{ + starpu_omp_shutdown(); +} + +void task_region_g(void *buffers[], void *args) +{ + struct starpu_vector_interface *_vector_1 = buffers[0]; + int nx1 = STARPU_VECTOR_GET_NX(_vector_1); + int *v1 = (int *)STARPU_VECTOR_GET_PTR(_vector_1); + + struct starpu_vector_interface *_vector_2 = buffers[1]; + int nx2 = STARPU_VECTOR_GET_NX(_vector_2); + int *v2 = (int *)STARPU_VECTOR_GET_PTR(_vector_2); + + int f = (int)(intptr_t)args; + + STARPU_ASSERT(nx1 == nx2); + + printf("depth 1 task, entry: vector_1 ptr = %p\n", v1); + printf("depth 1 task, entry: vector_2 ptr = %p\n", v2); + printf("depth 1 task, entry: f = %d\n", f); + + fprintf(stderr, "cudaMemcpy: -->\n"); + cudaMemcpyAsync(v2,v1,nx1*sizeof(*_vector_1), cudaMemcpyDeviceToDevice, starpu_cuda_get_local_stream()); + fprintf(stderr, "cudaMemcpy: <--\n"); + cudaStreamSynchronize(starpu_cuda_get_local_stream()); +} + +void master_g1(void *arg) +{ + (void)arg; + { + starpu_data_handle_t region_vector_handle; + int i; + + printf("master_g1: vector ptr = %p\n", global_vector_1); + for (i = 0; i < NX; i++) + { + global_vector_1[i] = 1; + } + + starpu_vector_data_register(®ion_vector_handle, STARPU_MAIN_RAM, (uintptr_t)global_vector_1, NX, sizeof(global_vector_1[0])); + starpu_omp_handle_register(region_vector_handle); + printf("master_g1: region_vector_handle = %p\n", region_vector_handle); + } + { + starpu_data_handle_t region_vector_handle; + int i; + + printf("master_g1: vector ptr = %p\n", global_vector_2); + for (i = 0; i < NX; i++) + { + global_vector_2[i] = 0; + } + + starpu_vector_data_register(®ion_vector_handle, STARPU_MAIN_RAM, (uintptr_t)global_vector_2, NX, sizeof(global_vector_2[0])); + starpu_omp_handle_register(region_vector_handle); + printf("master_g1: region_vector_handle = %p\n", region_vector_handle); + } +} + +void master_g2(void *arg) +{ + (void)arg; + starpu_data_handle_t region_vector_handles[2]; + struct starpu_omp_task_region_attr attr; + int i; + + region_vector_handles[0] = starpu_omp_data_lookup(global_vector_1); + printf("master_g2: region_vector_handles[0] = %p\n", region_vector_handles[0]); + region_vector_handles[1] = starpu_omp_data_lookup(global_vector_2); + printf("master_g2: region_vector_handles[1] = %p\n", region_vector_handles[1]); + + memset(&attr, 0, sizeof(attr)); +#ifdef STARPU_SIMGRID + attr.cl.model = &starpu_perfmodel_nop; + attr.cl.flags = STARPU_CODELET_SIMGRID_EXECUTE; +#endif + attr.cl.cpu_funcs[0] = NULL; + attr.cl.cuda_funcs[0] = task_region_g; + attr.cl.where = STARPU_CUDA; + attr.cl.nbuffers = 2; + attr.cl.modes[0] = STARPU_R; + attr.cl.modes[1] = STARPU_W; + attr.handles = region_vector_handles; + attr.cl_arg_size = sizeof(void *); + attr.cl_arg_free = 0; + attr.if_clause = 1; + attr.final_clause = 0; + attr.untied_clause = 1; + attr.mergeable_clause = 0; + + i = 0; + + attr.cl_arg = (void *)(intptr_t)i; + starpu_omp_task_region(&attr); +} + +void parallel_region_f(void *buffers[], void *args) +{ + (void)buffers; + (void)args; + starpu_omp_master(master_g1, NULL); + starpu_omp_barrier(); + { + starpu_data_handle_t region_vector_handle_1; + region_vector_handle_1 = starpu_omp_data_lookup(global_vector_1); + printf("parallel_region block 1: region_vector_handle_1 = %p\n", region_vector_handle_1); + } + { + starpu_data_handle_t region_vector_handle_2; + region_vector_handle_2 = starpu_omp_data_lookup(global_vector_2); + printf("parallel_region block 1: region_vector_handle_2 = %p\n", region_vector_handle_2); + } + starpu_omp_barrier(); + starpu_omp_master(master_g2, NULL); + starpu_omp_barrier(); + { + starpu_data_handle_t region_vector_handle_1; + region_vector_handle_1 = starpu_omp_data_lookup(global_vector_1); + printf("parallel_region block 2: region_vector_handle_1 = %p\n", region_vector_handle_1); + } + { + starpu_data_handle_t region_vector_handle_2; + region_vector_handle_2 = starpu_omp_data_lookup(global_vector_2); + printf("parallel_region block 2: region_vector_handle_2 = %p\n", region_vector_handle_2); + } +} + +int main(void) +{ + struct starpu_omp_parallel_region_attr attr; + + if (starpu_cuda_worker_get_count() < 1) + { + return STARPU_TEST_SKIPPED; + } + + memset(&attr, 0, sizeof(attr)); +#ifdef STARPU_SIMGRID + attr.cl.model = &starpu_perfmodel_nop; +#endif + attr.cl.flags = STARPU_CODELET_SIMGRID_EXECUTE; + attr.cl.cpu_funcs[0] = parallel_region_f; + attr.cl.where = STARPU_CPU; + attr.if_clause = 1; + starpu_omp_parallel_region(&attr); + + int i; + for (i = 0; i < NX; i++) + { + if (global_vector_1[i] != global_vector_2[i]) + { + fprintf(stderr, "check failed: global_vector_1[%d] = %d, global_vector_2[%d] = %d\n", i, global_vector_1[i], i, global_vector_2[i]); + return EXIT_FAILURE; + } + } + return 0; +} +#endif diff --git a/tests/openmp/environment.c b/tests/openmp/environment.c new file mode 100644 index 0000000..d0e5159 --- /dev/null +++ b/tests/openmp/environment.c @@ -0,0 +1,54 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../helper.h" +#include +#include + +/* + * Check OpenMP environment variables are properly parsed. + */ + +#if !defined(STARPU_OPENMP) +int main(void) +{ + return STARPU_TEST_SKIPPED; +} +#else +int main(void) +{ + setenv("OMP_DYNAMIC","false", 1); + setenv("OMP_NESTED","false", 1); + setenv("OMP_SCHEDULE","auto", 1); + setenv("OMP_STACKSIZE","2M", 1); + setenv("OMP_WAIT_POLICY","passive", 1); + setenv("OMP_THREAD_LIMIT","0", 1); + setenv("OMP_MAX_ACTIVE_LEVELS","4", 1); + setenv("OMP_CANCELLATION","false", 1); + setenv("OMP_DEFAULT_DEVICE","0", 1); + setenv("OMP_MAX_TASK_PRIORITY", "20", 1); + setenv("OMP_PROC_BIND","spread, spread, close", 1); + setenv("OMP_NUM_THREADS","4, 16, 2", 1); + setenv("OMP_PLACES","{1,2,3,4},{5,6,7,8}", 1); + setenv("OMP_DISPLAY_ENV","verbose", 1); + int ret = starpu_omp_init(); + if (ret == -EINVAL) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init"); + starpu_omp_shutdown(); + return 0; +} +#endif diff --git a/tests/openmp/init_exit_01.c b/tests/openmp/init_exit_01.c new file mode 100644 index 0000000..6ff67bf --- /dev/null +++ b/tests/openmp/init_exit_01.c @@ -0,0 +1,39 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../helper.h" +#include + +/* + * Check the starpu_omp_init/shutdown calls. + */ + +#if !defined(STARPU_OPENMP) +int main(void) +{ + return STARPU_TEST_SKIPPED; +} +#else +int main(void) +{ + int ret = starpu_omp_init(); + if (ret == -EINVAL) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init"); + starpu_omp_shutdown(); + return 0; +} +#endif diff --git a/tests/openmp/init_exit_02.c b/tests/openmp/init_exit_02.c new file mode 100644 index 0000000..496c1a0 --- /dev/null +++ b/tests/openmp/init_exit_02.c @@ -0,0 +1,49 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../helper.h" +#include + +/* + * Check the starpu_omp_init/shutdown calls when called from constructor/destructor. + */ + +#if !defined(STARPU_OPENMP) +int main(void) +{ + return STARPU_TEST_SKIPPED; +} +#else +__attribute__((constructor)) +static void omp_constructor(void) +{ + int ret = starpu_omp_init(); + if (ret == -EINVAL) exit(STARPU_TEST_SKIPPED); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init"); +} + +__attribute__((destructor)) +static void omp_destructor(void) +{ + starpu_omp_shutdown(); +} + +int main(void) +{ + return 0; +} +#endif diff --git a/tests/openmp/parallel_01.c b/tests/openmp/parallel_01.c new file mode 100644 index 0000000..a7c04a2 --- /dev/null +++ b/tests/openmp/parallel_01.c @@ -0,0 +1,71 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include "../helper.h" +#include + +/* + * Check the OpenMP parallel region support. + */ + +#if !defined(STARPU_OPENMP) +int main(void) +{ + return STARPU_TEST_SKIPPED; +} +#else +__attribute__((constructor)) +static void omp_constructor(void) +{ + int ret = starpu_omp_init(); + if (ret == -EINVAL) exit(STARPU_TEST_SKIPPED); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init"); +} + +__attribute__((destructor)) +static void omp_destructor(void) +{ + starpu_omp_shutdown(); +} + +void parallel_region_f(void *buffers[], void *args) +{ + (void) buffers; + (void) args; + int worker_id; + pthread_t tid; + tid = pthread_self(); + worker_id = starpu_worker_get_id(); + printf("[tid %p] task thread = %d\n", (void *)tid, worker_id); +} + +int main(void) +{ + struct starpu_omp_parallel_region_attr attr; + memset(&attr, 0, sizeof(attr)); +#ifdef STARPU_SIMGRID + attr.cl.model = &starpu_perfmodel_nop; +#endif + attr.cl.flags = STARPU_CODELET_SIMGRID_EXECUTE; + attr.cl.cpu_funcs[0] = parallel_region_f; + attr.cl.where = STARPU_CPU; + attr.if_clause = 1; + starpu_omp_parallel_region(&attr); + return 0; +} +#endif diff --git a/tests/openmp/parallel_02.c b/tests/openmp/parallel_02.c new file mode 100644 index 0000000..6695ef0 --- /dev/null +++ b/tests/openmp/parallel_02.c @@ -0,0 +1,92 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include "../helper.h" +#include + +/* + * Check the nested OpenMP parallel regions support. + */ + +#if !defined(STARPU_OPENMP) +int main(void) +{ + return STARPU_TEST_SKIPPED; +} +#else +__attribute__((constructor)) +static void omp_constructor(void) +{ + int ret = starpu_omp_init(); + if (ret == -EINVAL) exit(STARPU_TEST_SKIPPED); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init"); +} + +__attribute__((destructor)) +static void omp_destructor(void) +{ + starpu_omp_shutdown(); +} + +void parallel_region_2_f(void *buffers[], void *args) +{ + (void) buffers; + (void) args; + int worker_id; + pthread_t tid; + tid = pthread_self(); + worker_id = starpu_worker_get_id(); + printf("[tid %p] parallel region 2: task thread = %d\n", (void *)tid, worker_id); +} + +void parallel_region_1_f(void *buffers[], void *args) +{ + (void) buffers; + (void) args; + int worker_id; + pthread_t tid; + struct starpu_omp_parallel_region_attr attr; + tid = pthread_self(); + worker_id = starpu_worker_get_id(); + printf("[tid %p] parallel region 1: task thread = %d\n", (void *)tid, worker_id); + memset(&attr, 0, sizeof(attr)); +#ifdef STARPU_SIMGRID + attr.cl.model = &starpu_perfmodel_nop; +#endif + attr.cl.flags = STARPU_CODELET_SIMGRID_EXECUTE; + attr.cl.cpu_funcs[0] = parallel_region_2_f; + attr.cl.where = STARPU_CPU; + attr.if_clause = 1; + starpu_omp_parallel_region(&attr); +} + +int main(void) +{ + struct starpu_omp_parallel_region_attr attr; + memset(&attr, 0, sizeof(attr)); +#ifdef STARPU_SIMGRID + attr.cl.model = &starpu_perfmodel_nop; +#endif + attr.cl.flags = STARPU_CODELET_SIMGRID_EXECUTE; + attr.cl.cpu_funcs[0] = parallel_region_1_f; + attr.cl.where = STARPU_CPU; + attr.if_clause = 1; + starpu_omp_parallel_region(&attr); + return 0; +} +#endif diff --git a/tests/openmp/parallel_03.c b/tests/openmp/parallel_03.c new file mode 100644 index 0000000..09f8f02 --- /dev/null +++ b/tests/openmp/parallel_03.c @@ -0,0 +1,72 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include "../helper.h" +#include + +/* + * Check running multiple OpenMP parallel regions one at a time. + */ + +#if !defined(STARPU_OPENMP) +int main(void) +{ + return STARPU_TEST_SKIPPED; +} +#else +__attribute__((constructor)) +static void omp_constructor(void) +{ + int ret = starpu_omp_init(); + if (ret == -EINVAL) exit(STARPU_TEST_SKIPPED); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init"); +} + +__attribute__((destructor)) +static void omp_destructor(void) +{ + starpu_omp_shutdown(); +} + +void parallel_region_f(void *buffers[], void *args) +{ + (void) buffers; + (void) args; + int worker_id; + pthread_t tid; + tid = pthread_self(); + worker_id = starpu_worker_get_id(); + printf("[tid %p] task thread = %d\n", (void *)tid, worker_id); +} + +int main(void) +{ + struct starpu_omp_parallel_region_attr attr; + memset(&attr, 0, sizeof(attr)); +#ifdef STARPU_SIMGRID + attr.cl.model = &starpu_perfmodel_nop; +#endif + attr.cl.flags = STARPU_CODELET_SIMGRID_EXECUTE; + attr.cl.cpu_funcs[0] = parallel_region_f; + attr.cl.where = STARPU_CPU; + attr.if_clause = 1; + starpu_omp_parallel_region(&attr); + starpu_omp_parallel_region(&attr); + return 0; +} +#endif diff --git a/tests/openmp/parallel_barrier_01.c b/tests/openmp/parallel_barrier_01.c new file mode 100644 index 0000000..b25c2da --- /dev/null +++ b/tests/openmp/parallel_barrier_01.c @@ -0,0 +1,80 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include "../helper.h" +#include + +/* + * Check the OpenMP parallel barrier support. + */ + +#if !defined(STARPU_OPENMP) +int main(void) +{ + return STARPU_TEST_SKIPPED; +} +#else +__attribute__((constructor)) +static void omp_constructor(void) +{ + int ret = starpu_omp_init(); + if (ret == -EINVAL) exit(STARPU_TEST_SKIPPED); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init"); +} + +__attribute__((destructor)) +static void omp_destructor(void) +{ + starpu_omp_shutdown(); +} + +void parallel_region_f(void *buffers[], void *args) +{ + (void) buffers; + (void) args; + int worker_id; + pthread_t tid; + tid = pthread_self(); + worker_id = starpu_worker_get_id(); + printf("[tid %p] task thread = %d -- barrier 1\n", (void *)tid, worker_id); + starpu_omp_barrier(); + printf("[tid %p] task thread = %d -- barrier 2\n", (void *)tid, worker_id); + starpu_omp_barrier(); + printf("[tid %p] task thread = %d -- barrier 3\n", (void *)tid, worker_id); + starpu_omp_barrier(); + printf("[tid %p] task thread = %d -- barrier 4\n", (void *)tid, worker_id); + starpu_omp_barrier(); +} + +int main(void) +{ + pthread_t tid; + struct starpu_omp_parallel_region_attr attr; + tid = pthread_self(); + memset(&attr, 0, sizeof(attr)); +#ifdef STARPU_SIMGRID + attr.cl.model = &starpu_perfmodel_nop; +#endif + attr.cl.flags = STARPU_CODELET_SIMGRID_EXECUTE; + attr.cl.cpu_funcs[0] = parallel_region_f; + attr.cl.where = STARPU_CPU; + attr.if_clause = 1; + starpu_omp_parallel_region(&attr); + return 0; +} +#endif diff --git a/tests/openmp/parallel_critical_01.c b/tests/openmp/parallel_critical_01.c new file mode 100644 index 0000000..259c75d --- /dev/null +++ b/tests/openmp/parallel_critical_01.c @@ -0,0 +1,92 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include "../helper.h" +#include + +/* + * Check the OpenMP critical support. + */ + +#if !defined(STARPU_OPENMP) +int main(void) +{ + return STARPU_TEST_SKIPPED; +} +#else +__attribute__((constructor)) +static void omp_constructor(void) +{ + int ret = starpu_omp_init(); + if (ret == -EINVAL) exit(STARPU_TEST_SKIPPED); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init"); +} + +__attribute__((destructor)) +static void omp_destructor(void) +{ + starpu_omp_shutdown(); +} + +void critical_g(void *arg) +{ + (void) arg; + int worker_id; + pthread_t tid; + tid = pthread_self(); + worker_id = starpu_worker_get_id(); + printf("[tid %p] task thread = %d -- critical\n", (void *)tid, worker_id); +} + +void parallel_region_f(void *buffers[], void *args) +{ + (void) buffers; + (void) args; + int worker_id; + pthread_t tid; + tid = pthread_self(); + worker_id = starpu_worker_get_id(); + printf("[tid %p] task thread = %d -- parallel -->\n", (void *)tid, worker_id); + starpu_omp_critical(critical_g, NULL, NULL); + starpu_omp_critical(critical_g, NULL, NULL); + starpu_omp_critical(critical_g, NULL, NULL); + starpu_omp_critical(critical_g, NULL, NULL); + printf("[tid %p] task thread = %d -- parallel <--\n", (void *)tid, worker_id); +} + +int main(void) +{ + struct starpu_omp_parallel_region_attr attr; + pthread_t tid; + tid = pthread_self(); + printf("
    \n"); + memset(&attr, 0, sizeof(attr)); +#ifdef STARPU_SIMGRID + attr.cl.model = &starpu_perfmodel_nop; +#endif + attr.cl.flags = STARPU_CODELET_SIMGRID_EXECUTE; + attr.cl.cpu_funcs[0] = parallel_region_f; + attr.cl.where = STARPU_CPU; + attr.if_clause = 1; + starpu_omp_parallel_region(&attr); + printf("
    \n"); + starpu_omp_parallel_region(&attr); + printf("
    \n"); + return 0; +} +#endif diff --git a/tests/openmp/parallel_critical_inline_01.c b/tests/openmp/parallel_critical_inline_01.c new file mode 100644 index 0000000..93ce40a --- /dev/null +++ b/tests/openmp/parallel_critical_inline_01.c @@ -0,0 +1,95 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include "../helper.h" +#include + +/* + * Check the inline OpenMP critical support + */ + +#if !defined(STARPU_OPENMP) +int main(void) +{ + return STARPU_TEST_SKIPPED; +} +#else +__attribute__((constructor)) +static void omp_constructor(void) +{ + int ret = starpu_omp_init(); + if (ret == -EINVAL) exit(STARPU_TEST_SKIPPED); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init"); +} + +__attribute__((destructor)) +static void omp_destructor(void) +{ + starpu_omp_shutdown(); +} + +void parallel_region_f(void *buffers[], void *args) +{ + (void) buffers; + (void) args; + int worker_id; + pthread_t tid; + tid = pthread_self(); + worker_id = starpu_worker_get_id(); + printf("[tid %p] task thread = %d -- parallel -->\n", (void *)tid, worker_id); + + starpu_omp_critical_inline_begin(NULL); + printf("[tid %p] task thread = %d -- critical\n", (void *)tid, worker_id); + starpu_omp_critical_inline_end(NULL); + + starpu_omp_critical_inline_begin(NULL); + printf("[tid %p] task thread = %d -- critical\n", (void *)tid, worker_id); + starpu_omp_critical_inline_end(NULL); + + starpu_omp_critical_inline_begin(NULL); + printf("[tid %p] task thread = %d -- critical\n", (void *)tid, worker_id); + starpu_omp_critical_inline_end(NULL); + + starpu_omp_critical_inline_begin(NULL); + printf("[tid %p] task thread = %d -- critical\n", (void *)tid, worker_id); + starpu_omp_critical_inline_end(NULL); + + printf("[tid %p] task thread = %d -- parallel <--\n", (void *)tid, worker_id); +} + +int main(void) +{ + struct starpu_omp_parallel_region_attr attr; + pthread_t tid; + tid = pthread_self(); + printf("
    \n"); + memset(&attr, 0, sizeof(attr)); +#ifdef STARPU_SIMGRID + attr.cl.model = &starpu_perfmodel_nop; +#endif + attr.cl.flags = STARPU_CODELET_SIMGRID_EXECUTE; + attr.cl.cpu_funcs[0] = parallel_region_f; + attr.cl.where = STARPU_CPU; + attr.if_clause = 1; + starpu_omp_parallel_region(&attr); + printf("
    \n"); + starpu_omp_parallel_region(&attr); + printf("
    \n"); + return 0; +} +#endif diff --git a/tests/openmp/parallel_critical_named_01.c b/tests/openmp/parallel_critical_named_01.c new file mode 100644 index 0000000..0950649 --- /dev/null +++ b/tests/openmp/parallel_critical_named_01.c @@ -0,0 +1,102 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include "../helper.h" +#include + +/* + * Check the OpenMP named critical support. + */ + +#if !defined(STARPU_OPENMP) +int main(void) +{ + return STARPU_TEST_SKIPPED; +} +#else +__attribute__((constructor)) +static void omp_constructor(void) +{ + int ret = starpu_omp_init(); + if (ret == -EINVAL) exit(STARPU_TEST_SKIPPED); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init"); +} + +__attribute__((destructor)) +static void omp_destructor(void) +{ + starpu_omp_shutdown(); +} + +void critical_g(void *arg) +{ + (void) arg; + int worker_id; + pthread_t tid; + tid = pthread_self(); + worker_id = starpu_worker_get_id(); + printf("[tid %p] task thread = %d -- critical \"g\"\n", (void *)tid, worker_id); +} + +void critical_h(void *arg) +{ + (void) arg; + int worker_id; + pthread_t tid; + tid = pthread_self(); + worker_id = starpu_worker_get_id(); + printf("[tid %p] task thread = %d -- critical \"h\"\n", (void *)tid, worker_id); +} + +void parallel_region_f(void *buffers[], void *args) +{ + (void) buffers; + (void) args; + int worker_id; + pthread_t tid; + tid = pthread_self(); + worker_id = starpu_worker_get_id(); + printf("[tid %p] task thread = %d -- parallel -->\n", (void *)tid, worker_id); + starpu_omp_critical(critical_g, NULL, "g"); + starpu_omp_critical(critical_h, NULL, "h"); + starpu_omp_critical(critical_g, NULL, "g"); + starpu_omp_critical(critical_h, NULL, "h"); + printf("[tid %p] task thread = %d -- parallel <--\n", (void *)tid, worker_id); +} + +int main(void) +{ + struct starpu_omp_parallel_region_attr attr; + pthread_t tid; + tid = pthread_self(); + printf("
    \n"); + memset(&attr, 0, sizeof(attr)); +#ifdef STARPU_SIMGRID + attr.cl.model = &starpu_perfmodel_nop; +#endif + attr.cl.flags = STARPU_CODELET_SIMGRID_EXECUTE; + attr.cl.cpu_funcs[0] = parallel_region_f; + attr.cl.where = STARPU_CPU; + attr.if_clause = 1; + starpu_omp_parallel_region(&attr); + printf("
    \n"); + starpu_omp_parallel_region(&attr); + printf("
    \n"); + return 0; +} +#endif diff --git a/tests/openmp/parallel_critical_named_inline_01.c b/tests/openmp/parallel_critical_named_inline_01.c new file mode 100644 index 0000000..24ce98a --- /dev/null +++ b/tests/openmp/parallel_critical_named_inline_01.c @@ -0,0 +1,95 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include "../helper.h" +#include + +/* + * Check the inline OpenMP named critical support. + */ + +#if !defined(STARPU_OPENMP) +int main(void) +{ + return STARPU_TEST_SKIPPED; +} +#else +__attribute__((constructor)) +static void omp_constructor(void) +{ + int ret = starpu_omp_init(); + if (ret == -EINVAL) exit(STARPU_TEST_SKIPPED); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init"); +} + +__attribute__((destructor)) +static void omp_destructor(void) +{ + starpu_omp_shutdown(); +} + +void parallel_region_f(void *buffers[], void *args) +{ + (void) buffers; + (void) args; + int worker_id; + pthread_t tid; + tid = pthread_self(); + worker_id = starpu_worker_get_id(); + printf("[tid %p] task thread = %d -- parallel -->\n", (void *)tid, worker_id); + + starpu_omp_critical_inline_begin("g"); + printf("[tid %p] task thread = %d -- critical \"g\"\n", (void *)tid, worker_id); + starpu_omp_critical_inline_end("g"); + + starpu_omp_critical_inline_begin("h"); + printf("[tid %p] task thread = %d -- critical \"h\"\n", (void *)tid, worker_id); + starpu_omp_critical_inline_end("h"); + + starpu_omp_critical_inline_begin("g"); + printf("[tid %p] task thread = %d -- critical \"g\"\n", (void *)tid, worker_id); + starpu_omp_critical_inline_end("g"); + + starpu_omp_critical_inline_begin("h"); + printf("[tid %p] task thread = %d -- critical \"h\"\n", (void *)tid, worker_id); + starpu_omp_critical_inline_end("h"); + + printf("[tid %p] task thread = %d -- parallel <--\n", (void *)tid, worker_id); +} + +int main(void) +{ + struct starpu_omp_parallel_region_attr attr; + pthread_t tid; + tid = pthread_self(); + printf("
    \n"); + memset(&attr, 0, sizeof(attr)); +#ifdef STARPU_SIMGRID + attr.cl.model = &starpu_perfmodel_nop; +#endif + attr.cl.flags = STARPU_CODELET_SIMGRID_EXECUTE; + attr.cl.cpu_funcs[0] = parallel_region_f; + attr.cl.where = STARPU_CPU; + attr.if_clause = 1; + starpu_omp_parallel_region(&attr); + printf("
    \n"); + starpu_omp_parallel_region(&attr); + printf("
    \n"); + return 0; +} +#endif diff --git a/tests/openmp/parallel_for_01.c b/tests/openmp/parallel_for_01.c new file mode 100644 index 0000000..b0706ed --- /dev/null +++ b/tests/openmp/parallel_for_01.c @@ -0,0 +1,198 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include "../helper.h" +#include + +/* + * Check the OpenMP parallel for support, with multiple schedule and chunk settings. + */ + +#if !defined(STARPU_OPENMP) +int main(void) +{ + return STARPU_TEST_SKIPPED; +} +#else +#define NB_ITERS 256 +#define CHUNK 16 +unsigned long long array[NB_ITERS]; + +__attribute__((constructor)) +static void omp_constructor(void) +{ + int ret = starpu_omp_init(); + if (ret == -EINVAL) exit(STARPU_TEST_SKIPPED); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init"); +} + +__attribute__((destructor)) +static void omp_destructor(void) +{ + starpu_omp_shutdown(); +} + +void for_g(unsigned long long i, unsigned long long nb_i, void *arg) +{ + int worker_id; + pthread_t tid; + tid = pthread_self(); + worker_id = starpu_worker_get_id(); + printf("[tid %p] task thread = %d, for [%s] iterations first=%llu:nb=%llu\n", (void *)tid, worker_id, (const char *)arg, i, nb_i); + for (; nb_i > 0; i++, nb_i--) + { + array[i] = 1; + } +} + +void parallel_region_1_f(void *buffers[], void *args) +{ + (void) buffers; + (void) args; + int worker_id; + pthread_t tid; + tid = pthread_self(); + worker_id = starpu_worker_get_id(); + printf("[tid %p] task thread = %d\n", (void *)tid, worker_id); + starpu_omp_for(for_g, (void*)"static chunk", NB_ITERS, CHUNK, starpu_omp_sched_static, 0, 0); +} + +void parallel_region_2_f(void *buffers[], void *args) +{ + (void) buffers; + (void) args; + int worker_id; + pthread_t tid; + tid = pthread_self(); + worker_id = starpu_worker_get_id(); + printf("[tid %p] task thread = %d\n", (void *)tid, worker_id); + starpu_omp_for(for_g, (void*)"static nochunk", NB_ITERS, 0, starpu_omp_sched_static, 0, 0); +} + +void parallel_region_3_f(void *buffers[], void *args) +{ + (void) buffers; + (void) args; + int worker_id; + pthread_t tid; + tid = pthread_self(); + worker_id = starpu_worker_get_id(); + printf("[tid %p] task thread = %d\n", (void *)tid, worker_id); + starpu_omp_for(for_g, (void*)"dynamic chunk", NB_ITERS, CHUNK, starpu_omp_sched_dynamic, 0, 0); +} + +void parallel_region_4_f(void *buffers[], void *args) +{ + (void) buffers; + (void) args; + int worker_id; + pthread_t tid; + tid = pthread_self(); + worker_id = starpu_worker_get_id(); + printf("[tid %p] task thread = %d\n", (void *)tid, worker_id); + starpu_omp_for(for_g, (void*)"dynamic nochunk", NB_ITERS, 0, starpu_omp_sched_dynamic, 0, 0); +} + +void parallel_region_5_f(void *buffers[], void *args) +{ + (void) buffers; + (void) args; + int worker_id; + pthread_t tid; + tid = pthread_self(); + worker_id = starpu_worker_get_id(); + printf("[tid %p] task thread = %d\n", (void *)tid, worker_id); + starpu_omp_for(for_g, (void*)"guided nochunk", NB_ITERS, 0, starpu_omp_sched_guided, 0, 0); +} + +void parallel_region_6_f(void *buffers[], void *args) +{ + (void) buffers; + (void) args; + int worker_id; + pthread_t tid; + tid = pthread_self(); + worker_id = starpu_worker_get_id(); + printf("[tid %p] task thread = %d\n", (void *)tid, worker_id); + starpu_omp_for(for_g, (void*)"guided nochunk", NB_ITERS, 0, starpu_omp_sched_guided, 0, 0); +} + +static void clear_array(void) +{ + memset(array, 0, NB_ITERS*sizeof(unsigned long long)); +} + +static void check_array(void) +{ + unsigned long long i; + unsigned long long s = 0; + for (i = 0; i < NB_ITERS; i++) + { + s += array[i]; + } + if (s != NB_ITERS) + { + printf("missing iterations\n"); + exit(1); + } +} + +int main(void) +{ + struct starpu_omp_parallel_region_attr attr; + + memset(&attr, 0, sizeof(attr)); +#ifdef STARPU_SIMGRID + attr.cl.model = &starpu_perfmodel_nop; +#endif + attr.cl.flags = STARPU_CODELET_SIMGRID_EXECUTE; + attr.cl.where = STARPU_CPU; + attr.if_clause = 1; + + clear_array(); + attr.cl.cpu_funcs[0] = parallel_region_1_f; + starpu_omp_parallel_region(&attr); + check_array(); + + clear_array(); + attr.cl.cpu_funcs[0] = parallel_region_2_f; + starpu_omp_parallel_region(&attr); + check_array(); + + clear_array(); + attr.cl.cpu_funcs[0] = parallel_region_3_f; + starpu_omp_parallel_region(&attr); + check_array(); + + clear_array(); + attr.cl.cpu_funcs[0] = parallel_region_4_f; + starpu_omp_parallel_region(&attr); + check_array(); + + clear_array(); + attr.cl.cpu_funcs[0] = parallel_region_5_f; + starpu_omp_parallel_region(&attr); + check_array(); + + clear_array(); + attr.cl.cpu_funcs[0] = parallel_region_6_f; + starpu_omp_parallel_region(&attr); + check_array(); + return 0; +} +#endif diff --git a/tests/openmp/parallel_for_02.c b/tests/openmp/parallel_for_02.c new file mode 100644 index 0000000..e41fdf7 --- /dev/null +++ b/tests/openmp/parallel_for_02.c @@ -0,0 +1,99 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include "../helper.h" +#include + +/* + * Check multiple OpenMP parallel for support. + */ + +#if !defined(STARPU_OPENMP) +int main(void) +{ + return STARPU_TEST_SKIPPED; +} +#else +#define NB_ITERS 4321 +#define CHUNK 42 +__attribute__((constructor)) +static void omp_constructor(void) +{ + int ret = starpu_omp_init(); + if (ret == -EINVAL) exit(STARPU_TEST_SKIPPED); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init"); +} + +__attribute__((destructor)) +static void omp_destructor(void) +{ + starpu_omp_shutdown(); +} + +void for_g(unsigned long long i, unsigned long long nb_i, void *arg) +{ + int worker_id; + pthread_t tid; + tid = pthread_self(); + worker_id = starpu_worker_get_id(); + printf("[tid %p] task thread = %d, for [%s] iterations first=%llu:nb=%llu\n", (void *)tid, worker_id, (const char *)arg, i, nb_i); + for (; nb_i > 0; i++, nb_i--) + { + printf("[tid %p] task thread = %d, for [%s] iteration %llu\n", (void *)tid, worker_id, (const char *)arg, i); + } +} + +void parallel_region_f(void *buffers[], void *args) +{ + (void) buffers; + (void) args; + int worker_id; + pthread_t tid; + tid = pthread_self(); + worker_id = starpu_worker_get_id(); + printf("[tid %p] task thread = %d\n", (void *)tid, worker_id); + starpu_omp_for(for_g, (void*)"static chunk", NB_ITERS, CHUNK, starpu_omp_sched_static, 0, 1); + printf("[tid %p] task thread = %d\n", (void *)tid, worker_id); + starpu_omp_for(for_g, (void*)"static nochunk", NB_ITERS, 0, starpu_omp_sched_static, 0, 1); + + printf("[tid %p] task thread = %d\n", (void *)tid, worker_id); + starpu_omp_for(for_g, (void*)"dynamic chunk", NB_ITERS, CHUNK, starpu_omp_sched_dynamic, 0, 1); + printf("[tid %p] task thread = %d\n", (void *)tid, worker_id); + starpu_omp_for(for_g, (void*)"dynamic nochunk", NB_ITERS, 0, starpu_omp_sched_dynamic, 0, 1); + + printf("[tid %p] task thread = %d\n", (void *)tid, worker_id); + starpu_omp_for(for_g, (void*)"guided chunk", NB_ITERS, CHUNK, starpu_omp_sched_guided, 0, 1); + printf("[tid %p] task thread = %d\n", (void *)tid, worker_id); + starpu_omp_for(for_g, (void*)"guided nochunk", NB_ITERS, 0, starpu_omp_sched_guided, 0, 1); +} + +int main(void) +{ + struct starpu_omp_parallel_region_attr attr; + memset(&attr, 0, sizeof(attr)); +#ifdef STARPU_SIMGRID + attr.cl.model = &starpu_perfmodel_nop; +#endif + attr.cl.flags = STARPU_CODELET_SIMGRID_EXECUTE; + attr.cl.cpu_funcs[0] = parallel_region_f; + attr.cl.where = STARPU_CPU; + attr.if_clause = 1; + starpu_omp_parallel_region(&attr); + return 0; +} +#endif diff --git a/tests/openmp/parallel_for_ordered_01.c b/tests/openmp/parallel_for_ordered_01.c new file mode 100644 index 0000000..dfa95bc --- /dev/null +++ b/tests/openmp/parallel_for_ordered_01.c @@ -0,0 +1,216 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include "../helper.h" +#include + +/* + * Check the OpenMP ordered parallel for support. + */ + +#if !defined(STARPU_OPENMP) +int main(void) +{ + return STARPU_TEST_SKIPPED; +} +#else +#define NB_ITERS 256 +#define CHUNK 16 +unsigned long long array[NB_ITERS]; + +__attribute__((constructor)) +static void omp_constructor(void) +{ + int ret = starpu_omp_init(); + if (ret == -EINVAL) exit(STARPU_TEST_SKIPPED); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init"); +} + +__attribute__((destructor)) +static void omp_destructor(void) +{ + starpu_omp_shutdown(); +} + +struct s_ordered_arg +{ + const char *msg; + unsigned long long i; +}; + +void ordered_f(void *_arg) +{ + struct s_ordered_arg *arg = _arg; + int worker_id; + pthread_t tid; + tid = pthread_self(); + worker_id = starpu_worker_get_id(); + printf("[tid %p] task thread = %d, for [%s] iteration (ordered) %llu\n", (void *)tid, worker_id, arg->msg, arg->i); +} + +void for_g(unsigned long long i, unsigned long long nb_i, void *arg) +{ + int worker_id; + pthread_t tid; + tid = pthread_self(); + worker_id = starpu_worker_get_id(); + printf("[tid %p] task thread = %d, for [%s] iterations first=%llu:nb=%llu\n", (void *)tid, worker_id, (const char *)arg, i, nb_i); + for (; nb_i > 0; i++, nb_i--) + { + struct s_ordered_arg ordered_arg = { arg, i }; + array[i] = 1; + starpu_omp_ordered(ordered_f, &ordered_arg); + } +} + +void parallel_region_1_f(void *buffers[], void *args) +{ + (void) buffers; + (void) args; + int worker_id; + pthread_t tid; + tid = pthread_self(); + worker_id = starpu_worker_get_id(); + printf("[tid %p] task thread = %d\n", (void *)tid, worker_id); + starpu_omp_for(for_g, (void*)"static chunk", NB_ITERS, CHUNK, starpu_omp_sched_static, 1, 0); +} + +void parallel_region_2_f(void *buffers[], void *args) +{ + (void) buffers; + (void) args; + int worker_id; + pthread_t tid; + tid = pthread_self(); + worker_id = starpu_worker_get_id(); + printf("[tid %p] task thread = %d\n", (void *)tid, worker_id); + starpu_omp_for(for_g, (void*)"static nochunk", NB_ITERS, 0, starpu_omp_sched_static, 1, 0); +} + +void parallel_region_3_f(void *buffers[], void *args) +{ + (void) buffers; + (void) args; + int worker_id; + pthread_t tid; + tid = pthread_self(); + worker_id = starpu_worker_get_id(); + printf("[tid %p] task thread = %d\n", (void *)tid, worker_id); + starpu_omp_for(for_g, (void*)"dynamic chunk", NB_ITERS, CHUNK, starpu_omp_sched_dynamic, 1, 0); +} + +void parallel_region_4_f(void *buffers[], void *args) +{ + (void) buffers; + (void) args; + int worker_id; + pthread_t tid; + tid = pthread_self(); + worker_id = starpu_worker_get_id(); + printf("[tid %p] task thread = %d\n", (void *)tid, worker_id); + starpu_omp_for(for_g, (void*)"dynamic nochunk", NB_ITERS, 0, starpu_omp_sched_dynamic, 1, 0); +} + +void parallel_region_5_f(void *buffers[], void *args) +{ + (void) buffers; + (void) args; + int worker_id; + pthread_t tid; + tid = pthread_self(); + worker_id = starpu_worker_get_id(); + printf("[tid %p] task thread = %d\n", (void *)tid, worker_id); + starpu_omp_for(for_g, (void*)"guided nochunk", NB_ITERS, 0, starpu_omp_sched_guided, 1, 0); +} + +void parallel_region_6_f(void *buffers[], void *args) +{ + (void) buffers; + (void) args; + int worker_id; + pthread_t tid; + tid = pthread_self(); + worker_id = starpu_worker_get_id(); + printf("[tid %p] task thread = %d\n", (void *)tid, worker_id); + starpu_omp_for(for_g, (void*)"guided nochunk", NB_ITERS, 0, starpu_omp_sched_guided, 1, 0); +} + +static void clear_array(void) +{ + memset(array, 0, NB_ITERS*sizeof(unsigned long long)); +} + +static void check_array(void) +{ + unsigned long long i; + unsigned long long s = 0; + for (i = 0; i < NB_ITERS; i++) + { + s += array[i]; + } + if (s != NB_ITERS) + { + printf("missing iterations\n"); + exit(1); + } +} + +int main(void) +{ + struct starpu_omp_parallel_region_attr attr; + + memset(&attr, 0, sizeof(attr)); +#ifdef STARPU_SIMGRID + attr.cl.model = &starpu_perfmodel_nop; +#endif + attr.cl.flags = STARPU_CODELET_SIMGRID_EXECUTE; + attr.cl.where = STARPU_CPU; + attr.if_clause = 1; + + clear_array(); + attr.cl.cpu_funcs[0] = parallel_region_1_f; + starpu_omp_parallel_region(&attr); + check_array(); + + clear_array(); + attr.cl.cpu_funcs[0] = parallel_region_2_f; + starpu_omp_parallel_region(&attr); + check_array(); + + clear_array(); + attr.cl.cpu_funcs[0] = parallel_region_3_f; + starpu_omp_parallel_region(&attr); + check_array(); + + clear_array(); + attr.cl.cpu_funcs[0] = parallel_region_4_f; + starpu_omp_parallel_region(&attr); + check_array(); + + clear_array(); + attr.cl.cpu_funcs[0] = parallel_region_5_f; + starpu_omp_parallel_region(&attr); + check_array(); + + clear_array(); + attr.cl.cpu_funcs[0] = parallel_region_6_f; + starpu_omp_parallel_region(&attr); + check_array(); + return 0; +} +#endif diff --git a/tests/openmp/parallel_master_01.c b/tests/openmp/parallel_master_01.c new file mode 100644 index 0000000..7dea89f --- /dev/null +++ b/tests/openmp/parallel_master_01.c @@ -0,0 +1,92 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include "../helper.h" +#include + +/* + * Check the OpenMP master support. + */ + +#if !defined(STARPU_OPENMP) +int main(void) +{ + return STARPU_TEST_SKIPPED; +} +#else +__attribute__((constructor)) +static void omp_constructor(void) +{ + int ret = starpu_omp_init(); + if (ret == -EINVAL) exit(STARPU_TEST_SKIPPED); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init"); +} + +__attribute__((destructor)) +static void omp_destructor(void) +{ + starpu_omp_shutdown(); +} + +void master_g(void *arg) +{ + (void) arg; + int worker_id; + pthread_t tid; + tid = pthread_self(); + worker_id = starpu_worker_get_id(); + printf("[tid %p] task thread = %d -- master\n", (void *)tid, worker_id); +} + +void parallel_region_f(void *buffers[], void *args) +{ + (void) buffers; + (void) args; + int worker_id; + pthread_t tid; + tid = pthread_self(); + worker_id = starpu_worker_get_id(); + printf("[tid %p] task thread = %d -- parallel -->\n", (void *)tid, worker_id); + starpu_omp_master(master_g, NULL); + starpu_omp_master(master_g, NULL); + starpu_omp_master(master_g, NULL); + starpu_omp_master(master_g, NULL); + printf("[tid %p] task thread = %d -- parallel <--\n", (void *)tid, worker_id); +} + +int main(void) +{ + struct starpu_omp_parallel_region_attr attr; + pthread_t tid; + tid = pthread_self(); + memset(&attr, 0, sizeof(attr)); +#ifdef STARPU_SIMGRID + attr.cl.model = &starpu_perfmodel_nop; +#endif + attr.cl.flags = STARPU_CODELET_SIMGRID_EXECUTE; + attr.cl.cpu_funcs[0] = parallel_region_f; + attr.cl.where = STARPU_CPU; + attr.if_clause = 1; + printf("
    \n"); + starpu_omp_parallel_region(&attr); + printf("
    \n"); + starpu_omp_parallel_region(&attr); + printf("
    \n"); + return 0; +} +#endif diff --git a/tests/openmp/parallel_master_inline_01.c b/tests/openmp/parallel_master_inline_01.c new file mode 100644 index 0000000..3d3c639 --- /dev/null +++ b/tests/openmp/parallel_master_inline_01.c @@ -0,0 +1,86 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include "../helper.h" +#include + +/* + * Check the OpenMP inline master support. + */ + +#if !defined(STARPU_OPENMP) +int main(void) +{ + return STARPU_TEST_SKIPPED; +} +#else +__attribute__((constructor)) +static void omp_constructor(void) +{ + int ret = starpu_omp_init(); + if (ret == -EINVAL) exit(STARPU_TEST_SKIPPED); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init"); +} + +__attribute__((destructor)) +static void omp_destructor(void) +{ + starpu_omp_shutdown(); +} + +void parallel_region_f(void *buffers[], void *args) +{ + (void) buffers; + (void) args; + int worker_id; + pthread_t tid; + tid = pthread_self(); + worker_id = starpu_worker_get_id(); + printf("[tid %p] task thread = %d -- parallel -->\n", (void *)tid, worker_id); + if (starpu_omp_master_inline()) + printf("[tid %p] task thread = %d -- master\n", (void *)tid, worker_id); + if (starpu_omp_master_inline()) + printf("[tid %p] task thread = %d -- master\n", (void *)tid, worker_id); + if (starpu_omp_master_inline()) + printf("[tid %p] task thread = %d -- master\n", (void *)tid, worker_id); + if (starpu_omp_master_inline()) + printf("[tid %p] task thread = %d -- master\n", (void *)tid, worker_id); + printf("[tid %p] task thread = %d -- parallel <--\n", (void *)tid, worker_id); +} + +int main(void) +{ + struct starpu_omp_parallel_region_attr attr; + pthread_t tid; + tid = pthread_self(); + memset(&attr, 0, sizeof(attr)); +#ifdef STARPU_SIMGRID + attr.cl.model = &starpu_perfmodel_nop; +#endif + attr.cl.flags = STARPU_CODELET_SIMGRID_EXECUTE; + attr.cl.cpu_funcs[0] = parallel_region_f; + attr.cl.where = STARPU_CPU; + attr.if_clause = 1; + printf("
    \n"); + starpu_omp_parallel_region(&attr); + printf("
    \n"); + starpu_omp_parallel_region(&attr); + printf("
    \n"); + return 0; +} +#endif diff --git a/tests/openmp/parallel_nested_lock_01.c b/tests/openmp/parallel_nested_lock_01.c new file mode 100644 index 0000000..b08a9af --- /dev/null +++ b/tests/openmp/parallel_nested_lock_01.c @@ -0,0 +1,128 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include "../helper.h" +#include + +/* + * Check the OpenMP nested lock support. + */ + +#if !defined(STARPU_OPENMP) +int main(void) +{ + return STARPU_TEST_SKIPPED; +} +#else +__attribute__((constructor)) +static void omp_constructor(void) +{ + int ret = starpu_omp_init(); + if (ret == -EINVAL) exit(STARPU_TEST_SKIPPED); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init"); +} + +__attribute__((destructor)) +static void omp_destructor(void) +{ + starpu_omp_shutdown(); +} + +starpu_omp_nest_lock_t omp_nest_lock; + +void locked_func_n2(void) +{ + const int worker_id = starpu_worker_get_id(); + const pthread_t tid = pthread_self(); + printf("[tid %p] task thread = %d -- locked function n2\n", (void *)tid, worker_id); +} +void locked_func_n1(void) +{ + const int worker_id = starpu_worker_get_id(); + const pthread_t tid = pthread_self(); + printf("[tid %p] task thread = %d -- locked function n1 -->\n", (void *)tid, worker_id); + starpu_omp_set_nest_lock(&omp_nest_lock); + locked_func_n2(); + starpu_omp_unset_nest_lock(&omp_nest_lock); + printf("[tid %p] task thread = %d -- locked function n1 <--\n", (void *)tid, worker_id); +} + +void master_g1(void *arg) +{ + (void)arg; + starpu_omp_init_nest_lock(&omp_nest_lock); +} + +void master_g2(void *arg) +{ + (void)arg; + starpu_omp_destroy_nest_lock(&omp_nest_lock); +} + +void parallel_region_f(void *buffers[], void *args) +{ + const int worker_id = starpu_worker_get_id(); + const pthread_t tid = pthread_self(); + (void) buffers; + (void) args; + printf("[tid %p] task thread = %d -- parallel -->\n", (void *)tid, worker_id); + starpu_omp_master(master_g1, NULL); + starpu_omp_barrier(); + + starpu_omp_set_nest_lock(&omp_nest_lock); + locked_func_n1(); + starpu_omp_unset_nest_lock(&omp_nest_lock); + + starpu_omp_set_nest_lock(&omp_nest_lock); + locked_func_n1(); + starpu_omp_unset_nest_lock(&omp_nest_lock); + + starpu_omp_set_nest_lock(&omp_nest_lock); + locked_func_n1(); + starpu_omp_unset_nest_lock(&omp_nest_lock); + + starpu_omp_set_nest_lock(&omp_nest_lock); + locked_func_n1(); + starpu_omp_unset_nest_lock(&omp_nest_lock); + + starpu_omp_barrier(); + starpu_omp_master(master_g2, NULL); + printf("[tid %p] task thread = %d -- parallel <--\n", (void *)tid, worker_id); +} + +int main(void) +{ + struct starpu_omp_parallel_region_attr attr; + pthread_t tid; + tid = pthread_self(); + printf("
    \n"); + memset(&attr, 0, sizeof(attr)); +#ifdef STARPU_SIMGRID + attr.cl.model = &starpu_perfmodel_nop; +#endif + attr.cl.flags = STARPU_CODELET_SIMGRID_EXECUTE; + attr.cl.cpu_funcs[0] = parallel_region_f; + attr.cl.where = STARPU_CPU; + attr.if_clause = 1; + starpu_omp_parallel_region(&attr); + printf("
    \n"); + starpu_omp_parallel_region(&attr); + printf("
    \n"); + return 0; +} +#endif diff --git a/tests/openmp/parallel_sections_01.c b/tests/openmp/parallel_sections_01.c new file mode 100644 index 0000000..a279db4 --- /dev/null +++ b/tests/openmp/parallel_sections_01.c @@ -0,0 +1,115 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include "../helper.h" +#include + +/* + * Check the OpenMP parallel sections support. + */ + +#if !defined(STARPU_OPENMP) +int main(void) +{ + return STARPU_TEST_SKIPPED; +} +#else +__attribute__((constructor)) +static void omp_constructor(void) +{ + int ret = starpu_omp_init(); + if (ret == -EINVAL) exit(STARPU_TEST_SKIPPED); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init"); +} + +__attribute__((destructor)) +static void omp_destructor(void) +{ + starpu_omp_shutdown(); +} + +void func(void *arg) +{ + int worker_id; + pthread_t tid; + tid = pthread_self(); + worker_id = starpu_worker_get_id(); + printf("[tid %p] task thread = %d, section [%s]\n", (void *)tid, worker_id, (const char *)arg); +} + +void parallel_region_f(void *buffers[], void *args) +{ + (void) buffers; + (void) args; + void (*section_f[4])(void *); + void *section_args[4]; + int worker_id; + pthread_t tid; + tid = pthread_self(); + worker_id = starpu_worker_get_id(); + printf("[tid %p] task thread = %d\n", (void *)tid, worker_id); + + section_f[0] = func; + section_f[1] = func; + section_f[2] = func; + section_f[3] = func; + + section_args[0] = (void *)"A"; + section_args[1] = (void *)"B"; + section_args[2] = (void *)"C"; + section_args[3] = (void *)"D"; + + starpu_omp_sections(4, section_f, section_args, 0); + + section_args[0] = (void *)"E"; + section_args[1] = (void *)"F"; + section_args[2] = (void *)"G"; + section_args[3] = (void *)"H"; + + starpu_omp_sections(4, section_f, section_args, 0); + + section_args[0] = (void *)"I"; + section_args[1] = (void *)"J"; + section_args[2] = (void *)"K"; + section_args[3] = (void *)"L"; + + starpu_omp_sections(4, section_f, section_args, 0); + + section_args[0] = (void *)"M"; + section_args[1] = (void *)"N"; + section_args[2] = (void *)"O"; + section_args[3] = (void *)"P"; + + starpu_omp_sections(4, section_f, section_args, 0); +} + +int main(void) +{ + struct starpu_omp_parallel_region_attr attr; + memset(&attr, 0, sizeof(attr)); +#ifdef STARPU_SIMGRID + attr.cl.model = &starpu_perfmodel_nop; +#endif + attr.cl.flags = STARPU_CODELET_SIMGRID_EXECUTE; + attr.cl.cpu_funcs[0] = parallel_region_f; + attr.cl.where = STARPU_CPU; + attr.if_clause = 1; + starpu_omp_parallel_region(&attr); + return 0; +} +#endif diff --git a/tests/openmp/parallel_sections_combined_01.c b/tests/openmp/parallel_sections_combined_01.c new file mode 100644 index 0000000..1626be4 --- /dev/null +++ b/tests/openmp/parallel_sections_combined_01.c @@ -0,0 +1,109 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include "../helper.h" +#include + +/* + * Check the OpenMP combined parallel sections support. + */ + +#if !defined(STARPU_OPENMP) +int main(void) +{ + return STARPU_TEST_SKIPPED; +} +#else +__attribute__((constructor)) +static void omp_constructor(void) +{ + int ret = starpu_omp_init(); + if (ret == -EINVAL) exit(STARPU_TEST_SKIPPED); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init"); +} + +__attribute__((destructor)) +static void omp_destructor(void) +{ + starpu_omp_shutdown(); +} + +void func(unsigned long long section_num, void *arg) +{ + int worker_id; + pthread_t tid; + tid = pthread_self(); + worker_id = starpu_worker_get_id(); + printf("[tid %p] task thread = %d, section [%llu: %s]\n", (void *)tid, worker_id, section_num, (const char *)arg); +} + +void parallel_region_f(void *buffers[], void *args) +{ + (void) buffers; + (void) args; + void *section_args[4]; + int worker_id; + pthread_t tid; + tid = pthread_self(); + worker_id = starpu_worker_get_id(); + printf("[tid %p] task thread = %d\n", (void *)tid, worker_id); + + section_args[0] = (void *)"A"; + section_args[1] = (void *)"B"; + section_args[2] = (void *)"C"; + section_args[3] = (void *)"D"; + + starpu_omp_sections_combined(4, func, section_args, 0); + + section_args[0] = (void *)"E"; + section_args[1] = (void *)"F"; + section_args[2] = (void *)"G"; + section_args[3] = (void *)"H"; + + starpu_omp_sections_combined(4, func, section_args, 0); + + section_args[0] = (void *)"I"; + section_args[1] = (void *)"J"; + section_args[2] = (void *)"K"; + section_args[3] = (void *)"L"; + + starpu_omp_sections_combined(4, func, section_args, 0); + + section_args[0] = (void *)"M"; + section_args[1] = (void *)"N"; + section_args[2] = (void *)"O"; + section_args[3] = (void *)"P"; + + starpu_omp_sections_combined(4, func, section_args, 0); +} + +int main(void) +{ + struct starpu_omp_parallel_region_attr attr; + memset(&attr, 0, sizeof(attr)); +#ifdef STARPU_SIMGRID + attr.cl.model = &starpu_perfmodel_nop; +#endif + attr.cl.flags = STARPU_CODELET_SIMGRID_EXECUTE; + attr.cl.cpu_funcs[0] = parallel_region_f; + attr.cl.where = STARPU_CPU; + attr.if_clause = 1; + starpu_omp_parallel_region(&attr); + return 0; +} +#endif diff --git a/tests/openmp/parallel_simple_lock_01.c b/tests/openmp/parallel_simple_lock_01.c new file mode 100644 index 0000000..0c17f3d --- /dev/null +++ b/tests/openmp/parallel_simple_lock_01.c @@ -0,0 +1,118 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include "../helper.h" +#include + +/* + * Check the OpenMP simple lock support. + */ + +#if !defined(STARPU_OPENMP) +int main(void) +{ + return STARPU_TEST_SKIPPED; +} +#else +__attribute__((constructor)) +static void omp_constructor(void) +{ + int ret = starpu_omp_init(); + if (ret == -EINVAL) exit(STARPU_TEST_SKIPPED); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init"); +} + +__attribute__((destructor)) +static void omp_destructor(void) +{ + starpu_omp_shutdown(); +} + +starpu_omp_lock_t omp_lock; + +void locked_func(void) +{ + const int worker_id = starpu_worker_get_id(); + const pthread_t tid = pthread_self(); + printf("[tid %p] task thread = %d -- locked function\n", (void *)tid, worker_id); +} + +void master_g1(void *arg) +{ + (void)arg; + starpu_omp_init_lock(&omp_lock); +} + +void master_g2(void *arg) +{ + (void)arg; + starpu_omp_destroy_lock(&omp_lock); +} + +void parallel_region_f(void *buffers[], void *args) +{ + const int worker_id = starpu_worker_get_id(); + const pthread_t tid = pthread_self(); + (void) buffers; + (void) args; + printf("[tid %p] task thread = %d -- parallel -->\n", (void *)tid, worker_id); + starpu_omp_master(master_g1, NULL); + starpu_omp_barrier(); + + starpu_omp_set_lock(&omp_lock); + locked_func(); + starpu_omp_unset_lock(&omp_lock); + + starpu_omp_set_lock(&omp_lock); + locked_func(); + starpu_omp_unset_lock(&omp_lock); + + starpu_omp_set_lock(&omp_lock); + locked_func(); + starpu_omp_unset_lock(&omp_lock); + + starpu_omp_set_lock(&omp_lock); + locked_func(); + starpu_omp_unset_lock(&omp_lock); + + starpu_omp_barrier(); + starpu_omp_master(master_g2, NULL); + printf("[tid %p] task thread = %d -- parallel <--\n", (void *)tid, worker_id); +} + +int main(void) +{ + struct starpu_omp_parallel_region_attr attr; + pthread_t tid; + tid = pthread_self(); + printf("
    \n"); + memset(&attr, 0, sizeof(attr)); +#ifdef STARPU_SIMGRID + attr.cl.model = &starpu_perfmodel_nop; +#endif + attr.cl.flags = STARPU_CODELET_SIMGRID_EXECUTE; + attr.cl.cpu_funcs[0] = parallel_region_f; + attr.cl.where = STARPU_CPU; + attr.if_clause = 1; + starpu_omp_parallel_region(&attr); + printf("
    \n"); + starpu_omp_parallel_region(&attr); + printf("
    \n"); + return 0; +} +#endif diff --git a/tests/openmp/parallel_single_copyprivate_01.c b/tests/openmp/parallel_single_copyprivate_01.c new file mode 100644 index 0000000..6a8afc5 --- /dev/null +++ b/tests/openmp/parallel_single_copyprivate_01.c @@ -0,0 +1,100 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include "../helper.h" +#include + +/* + * Check the OpenMP single with copyprivate support. + */ + +#if !defined(STARPU_OPENMP) +int main(void) +{ + return STARPU_TEST_SKIPPED; +} +#else +__attribute__((constructor)) +static void omp_constructor(void) +{ + int ret = starpu_omp_init(); + if (ret == -EINVAL) exit(STARPU_TEST_SKIPPED); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init"); +} + +__attribute__((destructor)) +static void omp_destructor(void) +{ + starpu_omp_shutdown(); +} + +void single_g(void *arg, void *_data, unsigned long long data_size) +{ + (void) arg; + int *data = _data; + STARPU_ASSERT(data_size >= sizeof(*data)); + int worker_id; + pthread_t tid; + tid = pthread_self(); + worker_id = starpu_worker_get_id(); + *data = worker_id; + printf("[tid %p] task thread = %d -- single\n", (void *)tid, worker_id); +} + +void parallel_region_f(void *buffers[], void *args) +{ + (void) buffers; + (void) args; + int worker_id; + int single_worker_id; + pthread_t tid; + tid = pthread_self(); + worker_id = starpu_worker_get_id(); + printf("[tid %p] task thread = %d -- parallel -->\n", (void *)tid, worker_id); + starpu_omp_single_copyprivate(single_g, NULL, &single_worker_id, sizeof(single_worker_id)); + printf("[tid %p] task thread = %d -- copyprivate: single_worker_id = %d\n", (void *)tid, worker_id, single_worker_id); + starpu_omp_single_copyprivate(single_g, NULL, &single_worker_id, sizeof(single_worker_id)); + printf("[tid %p] task thread = %d -- copyprivate: single_worker_id = %d\n", (void *)tid, worker_id, single_worker_id); + starpu_omp_single_copyprivate(single_g, NULL, &single_worker_id, sizeof(single_worker_id)); + printf("[tid %p] task thread = %d -- copyprivate: single_worker_id = %d\n", (void *)tid, worker_id, single_worker_id); + starpu_omp_single_copyprivate(single_g, NULL, &single_worker_id, sizeof(single_worker_id)); + printf("[tid %p] task thread = %d -- copyprivate: single_worker_id = %d\n", (void *)tid, worker_id, single_worker_id); + printf("[tid %p] task thread = %d -- parallel <--\n", (void *)tid, worker_id); +} + +int main(void) +{ + struct starpu_omp_parallel_region_attr attr; + pthread_t tid; + tid = pthread_self(); + memset(&attr, 0, sizeof(attr)); +#ifdef STARPU_SIMGRID + attr.cl.model = &starpu_perfmodel_nop; +#endif + attr.cl.flags = STARPU_CODELET_SIMGRID_EXECUTE; + attr.cl.cpu_funcs[0] = parallel_region_f; + attr.cl.where = STARPU_CPU; + attr.if_clause = 1; + printf("
    \n"); + starpu_omp_parallel_region(&attr); + printf("
    \n"); + starpu_omp_parallel_region(&attr); + printf("
    \n"); + return 0; +} +#endif diff --git a/tests/openmp/parallel_single_copyprivate_inline_01.c b/tests/openmp/parallel_single_copyprivate_inline_01.c new file mode 100644 index 0000000..5559e38 --- /dev/null +++ b/tests/openmp/parallel_single_copyprivate_inline_01.c @@ -0,0 +1,99 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include "../helper.h" +#include + +/* + * Check the OpenMP inline single with copyprivate support. + */ + +#if !defined(STARPU_OPENMP) +int main(void) +{ + return STARPU_TEST_SKIPPED; +} +#else +__attribute__((constructor)) +static void omp_constructor(void) +{ + int ret = starpu_omp_init(); + if (ret == -EINVAL) exit(STARPU_TEST_SKIPPED); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init"); +} + +__attribute__((destructor)) +static void omp_destructor(void) +{ + starpu_omp_shutdown(); +} + +void parallel_region_f(void *buffers[], void *args) +{ + (void) buffers; + (void) args; + int worker_id; + pthread_t tid; + int single_worker_id; + int i; + + tid = pthread_self(); + worker_id = starpu_worker_get_id(); + printf("[tid %p] task thread = %d -- parallel -->\n", (void *)tid, worker_id); + + for (i=0; i<4; i++) + { + int *single_data; + + if ((single_data = starpu_omp_single_copyprivate_inline_begin(&single_worker_id)) == NULL) + { + printf("[tid %p] task thread = %d -- single\n", (void *)tid, worker_id); + single_worker_id = worker_id; + } + else + { + memcpy(&single_worker_id, single_data, sizeof(single_worker_id)); + } + starpu_omp_single_copyprivate_inline_end(); + printf("[tid %p] task thread = %d -- single_worker_id = %d\n", (void *)tid, worker_id, single_worker_id); + } + + printf("[tid %p] task thread = %d -- parallel <--\n", (void *)tid, worker_id); +} + +int main(void) +{ + struct starpu_omp_parallel_region_attr attr; + pthread_t tid; + tid = pthread_self(); + memset(&attr, 0, sizeof(attr)); +#ifdef STARPU_SIMGRID + attr.cl.model = &starpu_perfmodel_nop; +#endif + attr.cl.flags = STARPU_CODELET_SIMGRID_EXECUTE; + attr.cl.cpu_funcs[0] = parallel_region_f; + attr.cl.where = STARPU_CPU; + attr.if_clause = 1; + printf("
    \n"); + starpu_omp_parallel_region(&attr); + printf("
    \n"); + starpu_omp_parallel_region(&attr); + printf("
    \n"); + return 0; +} +#endif diff --git a/tests/openmp/parallel_single_inline_01.c b/tests/openmp/parallel_single_inline_01.c new file mode 100644 index 0000000..a883ae0 --- /dev/null +++ b/tests/openmp/parallel_single_inline_01.c @@ -0,0 +1,103 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include "../helper.h" +#include + +/* + * Check the OpenMP inline single support. + */ + +#if !defined(STARPU_OPENMP) +int main(void) +{ + return STARPU_TEST_SKIPPED; +} +#else +__attribute__((constructor)) +static void omp_constructor(void) +{ + int ret = starpu_omp_init(); + if (ret == -EINVAL) exit(STARPU_TEST_SKIPPED); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init"); +} + +__attribute__((destructor)) +static void omp_destructor(void) +{ + starpu_omp_shutdown(); +} + +void parallel_region_f(void *buffers[], void *args) +{ + (void) buffers; + (void) args; + int worker_id; + pthread_t tid; + tid = pthread_self(); + worker_id = starpu_worker_get_id(); + printf("[tid %p] task thread = %d -- parallel -->\n", (void *)tid, worker_id); + + /* nowait = 0 */ + if (starpu_omp_single_inline()) + printf("[tid %p] task thread = %d -- single nowait\n", (void *)tid, worker_id); + starpu_omp_barrier(); + if (starpu_omp_single_inline()) + printf("[tid %p] task thread = %d -- single nowait\n", (void *)tid, worker_id); + starpu_omp_barrier(); + if (starpu_omp_single_inline()) + printf("[tid %p] task thread = %d -- single nowait\n", (void *)tid, worker_id); + starpu_omp_barrier(); + if (starpu_omp_single_inline()) + printf("[tid %p] task thread = %d -- single nowait\n", (void *)tid, worker_id); + starpu_omp_barrier(); + + /* nowait = 1 */ + if (starpu_omp_single_inline()) + printf("[tid %p] task thread = %d -- single nowait\n", (void *)tid, worker_id); + if (starpu_omp_single_inline()) + printf("[tid %p] task thread = %d -- single nowait\n", (void *)tid, worker_id); + if (starpu_omp_single_inline()) + printf("[tid %p] task thread = %d -- single nowait\n", (void *)tid, worker_id); + if (starpu_omp_single_inline()) + printf("[tid %p] task thread = %d -- single nowait\n", (void *)tid, worker_id); + + printf("[tid %p] task thread = %d -- parallel <--\n", (void *)tid, worker_id); +} + +int main(void) +{ + struct starpu_omp_parallel_region_attr attr; + pthread_t tid; + tid = pthread_self(); + memset(&attr, 0, sizeof(attr)); +#ifdef STARPU_SIMGRID + attr.cl.model = &starpu_perfmodel_nop; +#endif + attr.cl.flags = STARPU_CODELET_SIMGRID_EXECUTE; + attr.cl.cpu_funcs[0] = parallel_region_f; + attr.cl.where = STARPU_CPU; + attr.if_clause = 1; + printf("
    \n"); + starpu_omp_parallel_region(&attr); + printf("
    \n"); + starpu_omp_parallel_region(&attr); + printf("
    \n"); + return 0; +} +#endif diff --git a/tests/openmp/parallel_single_nowait_01.c b/tests/openmp/parallel_single_nowait_01.c new file mode 100644 index 0000000..6c98008 --- /dev/null +++ b/tests/openmp/parallel_single_nowait_01.c @@ -0,0 +1,92 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include "../helper.h" +#include + +/* + * Check the OpenMP single nowait support. + */ + +#if !defined(STARPU_OPENMP) +int main(void) +{ + return STARPU_TEST_SKIPPED; +} +#else +__attribute__((constructor)) +static void omp_constructor(void) +{ + int ret = starpu_omp_init(); + if (ret == -EINVAL) exit(STARPU_TEST_SKIPPED); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init"); +} + +__attribute__((destructor)) +static void omp_destructor(void) +{ + starpu_omp_shutdown(); +} + +void single_g(void *arg) +{ + (void) arg; + int worker_id; + pthread_t tid; + tid = pthread_self(); + worker_id = starpu_worker_get_id(); + printf("[tid %p] task thread = %d -- single nowait\n", (void *)tid, worker_id); +} + +void parallel_region_f(void *buffers[], void *args) +{ + (void) buffers; + (void) args; + int worker_id; + pthread_t tid; + tid = pthread_self(); + worker_id = starpu_worker_get_id(); + printf("[tid %p] task thread = %d -- parallel -->\n", (void *)tid, worker_id); + starpu_omp_single(single_g, NULL, 1); + starpu_omp_single(single_g, NULL, 1); + starpu_omp_single(single_g, NULL, 1); + starpu_omp_single(single_g, NULL, 1); + printf("[tid %p] task thread = %d -- parallel <--\n", (void *)tid, worker_id); +} + +int main(void) +{ + struct starpu_omp_parallel_region_attr attr; + pthread_t tid; + tid = pthread_self(); + memset(&attr, 0, sizeof(attr)); +#ifdef STARPU_SIMGRID + attr.cl.model = &starpu_perfmodel_nop; +#endif + attr.cl.flags = STARPU_CODELET_SIMGRID_EXECUTE; + attr.cl.cpu_funcs[0] = parallel_region_f; + attr.cl.where = STARPU_CPU; + attr.if_clause = 1; + printf("
    \n"); + starpu_omp_parallel_region(&attr); + printf("
    \n"); + starpu_omp_parallel_region(&attr); + printf("
    \n"); + return 0; +} +#endif diff --git a/tests/openmp/parallel_single_wait_01.c b/tests/openmp/parallel_single_wait_01.c new file mode 100644 index 0000000..b69445d --- /dev/null +++ b/tests/openmp/parallel_single_wait_01.c @@ -0,0 +1,92 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include "../helper.h" +#include + +/* + * Check the OpenMP single wait support. + */ + +#if !defined(STARPU_OPENMP) +int main(void) +{ + return STARPU_TEST_SKIPPED; +} +#else +__attribute__((constructor)) +static void omp_constructor(void) +{ + int ret = starpu_omp_init(); + if (ret == -EINVAL) exit(STARPU_TEST_SKIPPED); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init"); +} + +__attribute__((destructor)) +static void omp_destructor(void) +{ + starpu_omp_shutdown(); +} + +void single_g(void *arg) +{ + (void) arg; + int worker_id; + pthread_t tid; + tid = pthread_self(); + worker_id = starpu_worker_get_id(); + printf("[tid %p] task thread = %d -- single\n", (void *)tid, worker_id); +} + +void parallel_region_f(void *buffers[], void *args) +{ + (void) buffers; + (void) args; + int worker_id; + pthread_t tid; + tid = pthread_self(); + worker_id = starpu_worker_get_id(); + printf("[tid %p] task thread = %d -- parallel -->\n", (void *)tid, worker_id); + starpu_omp_single(single_g, NULL, 0); + starpu_omp_single(single_g, NULL, 0); + starpu_omp_single(single_g, NULL, 0); + starpu_omp_single(single_g, NULL, 0); + printf("[tid %p] task thread = %d -- parallel <--\n", (void *)tid, worker_id); +} + +int main(void) +{ + struct starpu_omp_parallel_region_attr attr; + pthread_t tid; + tid = pthread_self(); + memset(&attr, 0, sizeof(attr)); +#ifdef STARPU_SIMGRID + attr.cl.model = &starpu_perfmodel_nop; +#endif + attr.cl.flags = STARPU_CODELET_SIMGRID_EXECUTE; + attr.cl.cpu_funcs[0] = parallel_region_f; + attr.cl.where = STARPU_CPU; + attr.if_clause = 1; + printf("
    \n"); + starpu_omp_parallel_region(&attr); + printf("
    \n"); + starpu_omp_parallel_region(&attr); + printf("
    \n"); + return 0; +} +#endif diff --git a/tests/openmp/task_01.c b/tests/openmp/task_01.c new file mode 100644 index 0000000..3e12b26 --- /dev/null +++ b/tests/openmp/task_01.c @@ -0,0 +1,100 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include "../helper.h" +#include + +/* + * Check the OpenMP task support. + */ + +#if !defined(STARPU_OPENMP) +int main(void) +{ + return STARPU_TEST_SKIPPED; +} +#else +__attribute__((constructor)) +static void omp_constructor(void) +{ + int ret = starpu_omp_init(); + if (ret == -EINVAL) exit(STARPU_TEST_SKIPPED); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init"); +} + +__attribute__((destructor)) +static void omp_destructor(void) +{ + starpu_omp_shutdown(); +} + +void task_region_g(void *buffers[], void *args) +{ + (void) buffers; + (void) args; + int worker_id; + pthread_t tid; + tid = pthread_self(); + worker_id = starpu_worker_get_id(); + printf("[tid %p] task thread = %d: explicit task \"g\"\n", (void *)tid, worker_id); +} + +void parallel_region_f(void *buffers[], void *args) +{ + (void) buffers; + (void) args; + int worker_id; + pthread_t tid; + struct starpu_omp_task_region_attr attr; + + tid = pthread_self(); + worker_id = starpu_worker_get_id(); + printf("[tid %p] task thread = %d: implicit task \"f\"\n", (void *)tid, worker_id); + + memset(&attr, 0, sizeof(attr)); +#ifdef STARPU_SIMGRID + attr.cl.model = &starpu_perfmodel_nop; +#endif + attr.cl.flags = STARPU_CODELET_SIMGRID_EXECUTE; + attr.cl.cpu_funcs[0] = task_region_g; + attr.cl.where = STARPU_CPU; + attr.if_clause = 1; + attr.final_clause = 0; + attr.untied_clause = 1; + attr.mergeable_clause = 0; + starpu_omp_task_region(&attr); + starpu_omp_task_region(&attr); + starpu_omp_task_region(&attr); + starpu_omp_task_region(&attr); +} + +int main(void) +{ + struct starpu_omp_parallel_region_attr attr; + memset(&attr, 0, sizeof(attr)); +#ifdef STARPU_SIMGRID + attr.cl.model = &starpu_perfmodel_nop; +#endif + attr.cl.flags = STARPU_CODELET_SIMGRID_EXECUTE; + attr.cl.cpu_funcs[0] = parallel_region_f; + attr.cl.where = STARPU_CPU; + attr.if_clause = 1; + starpu_omp_parallel_region(&attr); + return 0; +} +#endif diff --git a/tests/openmp/task_02.c b/tests/openmp/task_02.c new file mode 100644 index 0000000..afe8666 --- /dev/null +++ b/tests/openmp/task_02.c @@ -0,0 +1,219 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2014-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include "../helper.h" +#include + +/* + * Check the OpenMP nested task support. + */ + +#if !defined(STARPU_OPENMP) +int main(void) +{ + return STARPU_TEST_SKIPPED; +} +#else +#define NX 64 +int global_vector[NX]; + +__attribute__((constructor)) +static void omp_constructor(void) +{ + int ret = starpu_omp_init(); + if (ret == -EINVAL) exit(STARPU_TEST_SKIPPED); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init"); +} + +__attribute__((destructor)) +static void omp_destructor(void) +{ + starpu_omp_shutdown(); +} + +void task_region_h(void *buffers[], void *args) +{ + struct starpu_vector_interface *_vector = buffers[0]; + int nx = STARPU_VECTOR_GET_NX(_vector); + int *v = (int *)STARPU_VECTOR_GET_PTR(_vector); + int f = (int)(intptr_t)args; + int i; + + printf("depth 2 task, entry: vector ptr = %p\n", v); + + for (i = 0; i < nx; i++) + { + v[i] += f; + } + + printf("depth 2 task ending\n"); +} + +void task_region_g(void *buffers[], void *args) +{ + struct starpu_vector_interface *_vector = buffers[0]; + + int nx = STARPU_VECTOR_GET_NX(_vector); + int *v = (int *)STARPU_VECTOR_GET_PTR(_vector); + int f = (int)(intptr_t)args; + + printf("depth 1 task, entry: vector ptr = %p\n", v); + + { + starpu_data_handle_t task_vector_handle; + int i; + + for (i = 0; i < nx; i++) + { + v[i] += f; + } + + starpu_vector_data_register(&task_vector_handle, STARPU_MAIN_RAM, (uintptr_t)v, NX, sizeof(v[0])); + starpu_omp_handle_register(task_vector_handle); + printf("depth 1 task, block 1: task_vector_handle = %p\n", task_vector_handle); + } + + { + starpu_data_handle_t task_vector_handle; + struct starpu_omp_task_region_attr attr; + int i; + + task_vector_handle = starpu_omp_data_lookup(v); + printf("depth 1 task, block 2: task_vector_handle = %p\n", task_vector_handle); + + memset(&attr, 0, sizeof(attr)); +#ifdef STARPU_SIMGRID + attr.cl.model = &starpu_perfmodel_nop; +#endif + attr.cl.flags = STARPU_CODELET_SIMGRID_EXECUTE; + attr.cl.cpu_funcs[0] = task_region_h; + attr.cl.where = STARPU_CPU; + attr.cl.nbuffers = 1; + attr.cl.modes[0] = STARPU_RW; + attr.handles = &task_vector_handle; + attr.cl_arg_size = sizeof(void *); + attr.cl_arg_free = 0; + attr.if_clause = 1; + attr.final_clause = 0; + attr.untied_clause = 1; + attr.mergeable_clause = 0; + + i = 0; + + attr.cl_arg = (void *)(intptr_t)i++; + starpu_omp_task_region(&attr); + attr.cl_arg = (void *)(intptr_t)i++; + starpu_omp_task_region(&attr); + } + + starpu_omp_taskwait(); +} + +void master_g1(void *arg) +{ + (void)arg; + starpu_data_handle_t region_vector_handle; + int i; + + printf("master_g1: vector ptr = %p\n", global_vector); + for (i = 0; i < NX; i++) + { + global_vector[i] = 1; + } + + starpu_vector_data_register(®ion_vector_handle, STARPU_MAIN_RAM, (uintptr_t)global_vector, NX, sizeof(global_vector[0])); + starpu_omp_handle_register(region_vector_handle); + printf("master_g1: region_vector_handle = %p\n", region_vector_handle); +} + +void master_g2(void *arg) +{ + (void)arg; + starpu_data_handle_t region_vector_handle; + struct starpu_omp_task_region_attr attr; + int i; + + region_vector_handle = starpu_omp_data_lookup(global_vector); + printf("master_g2: region_vector_handle = %p\n", region_vector_handle); + + memset(&attr, 0, sizeof(attr)); +#ifdef STARPU_SIMGRID + attr.cl.model = &starpu_perfmodel_nop; +#endif + attr.cl.flags = STARPU_CODELET_SIMGRID_EXECUTE; + attr.cl.cpu_funcs[0] = task_region_g; + attr.cl.where = STARPU_CPU; + attr.cl.nbuffers = 1; + attr.cl.modes[0] = STARPU_RW; + attr.handles = ®ion_vector_handle; + attr.cl_arg_size = sizeof(void *); + attr.cl_arg_free = 0; + attr.if_clause = 1; + attr.final_clause = 0; + attr.untied_clause = 1; + attr.mergeable_clause = 0; + + i = 0; + + attr.cl_arg = (void *)(intptr_t)i++; + starpu_omp_task_region(&attr); + attr.cl_arg = (void *)(intptr_t)i++; + starpu_omp_task_region(&attr); + attr.cl_arg = (void *)(intptr_t)i++; + starpu_omp_task_region(&attr); + attr.cl_arg = (void *)(intptr_t)i++; + starpu_omp_task_region(&attr); +} + +void parallel_region_f(void *buffers[], void *args) +{ + (void)buffers; + (void)args; + starpu_omp_master(master_g1, NULL); + starpu_omp_barrier(); + { + starpu_data_handle_t region_vector_handle; + region_vector_handle = starpu_omp_data_lookup(global_vector); + printf("parallel_region block 1: region_vector_handle = %p\n", region_vector_handle); + } + starpu_omp_barrier(); + starpu_omp_master(master_g2, NULL); + starpu_omp_barrier(); + { + starpu_data_handle_t region_vector_handle; + region_vector_handle = starpu_omp_data_lookup(global_vector); + printf("parallel_region block 2: region_vector_handle = %p\n", region_vector_handle); + } +} + +int main(void) +{ + struct starpu_omp_parallel_region_attr attr; + + memset(&attr, 0, sizeof(attr)); +#ifdef STARPU_SIMGRID + attr.cl.model = &starpu_perfmodel_nop; +#endif + attr.cl.flags = STARPU_CODELET_SIMGRID_EXECUTE; + attr.cl.cpu_funcs[0] = parallel_region_f; + attr.cl.where = STARPU_CPU; + attr.if_clause = 1; + starpu_omp_parallel_region(&attr); + return 0; +} +#endif diff --git a/tests/openmp/task_03.c b/tests/openmp/task_03.c new file mode 100644 index 0000000..7eac15a --- /dev/null +++ b/tests/openmp/task_03.c @@ -0,0 +1,77 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include "../helper.h" +#include + +/* + * Check the OpenMP orphaned task support. + */ + +#if !defined(STARPU_OPENMP) +int main(void) +{ + return STARPU_TEST_SKIPPED; +} +#else +__attribute__((constructor)) +static void omp_constructor(void) +{ + int ret = starpu_omp_init(); + if (ret == -EINVAL) exit(STARPU_TEST_SKIPPED); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init"); +} + +__attribute__((destructor)) +static void omp_destructor(void) +{ + starpu_omp_shutdown(); +} + +void task_region_f(void *buffers[], void *args) +{ + (void) buffers; + (void) args; + int worker_id; + pthread_t tid; + tid = pthread_self(); + worker_id = starpu_worker_get_id(); + printf("[tid %p] task thread = %d: explicit task \"f\"\n", (void *)tid, worker_id); +} + +int main(void) +{ + struct starpu_omp_task_region_attr attr; + memset(&attr, 0, sizeof(attr)); +#ifdef STARPU_SIMGRID + attr.cl.model = &starpu_perfmodel_nop; +#endif + attr.cl.flags = STARPU_CODELET_SIMGRID_EXECUTE; + attr.cl.cpu_funcs[0] = task_region_f; + attr.cl.where = STARPU_CPU; + attr.if_clause = 1; + attr.final_clause = 0; + attr.untied_clause = 1; + attr.mergeable_clause = 0; + starpu_omp_task_region(&attr); + starpu_omp_task_region(&attr); + starpu_omp_task_region(&attr); + starpu_omp_task_region(&attr); + return 0; +} +#endif diff --git a/tests/openmp/taskgroup_01.c b/tests/openmp/taskgroup_01.c new file mode 100644 index 0000000..d1607e7 --- /dev/null +++ b/tests/openmp/taskgroup_01.c @@ -0,0 +1,136 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include "../helper.h" +#include + +/* + * Check the OpenMP taskgroup support. + */ + +#if !defined(STARPU_OPENMP) +int main(void) +{ + return STARPU_TEST_SKIPPED; +} +#else +__attribute__((constructor)) +static void omp_constructor(void) +{ + int ret = starpu_omp_init(); + if (ret == -EINVAL) exit(STARPU_TEST_SKIPPED); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init"); +} + +__attribute__((destructor)) +static void omp_destructor(void) +{ + starpu_omp_shutdown(); +} + +void task_region_g(void *buffers[], void *args) +{ + (void) buffers; + int i = (int)(intptr_t) args; + int worker_id; + pthread_t tid; + tid = pthread_self(); + worker_id = starpu_worker_get_id(); + printf("[tid %p] task thread = %d: explicit task \"g[%d]\"\n", (void *)tid, worker_id, i); +} + +void taskgroup_f(void *arg) +{ + struct starpu_omp_task_region_attr attr; + int *p_i = (int *)arg; + memset(&attr, 0, sizeof(attr)); +#ifdef STARPU_SIMGRID + attr.cl.model = &starpu_perfmodel_nop; +#endif + attr.cl.flags = STARPU_CODELET_SIMGRID_EXECUTE; + attr.cl.cpu_funcs[0] = task_region_g; + attr.cl.where = STARPU_CPU; + attr.cl_arg_size = sizeof(void *); + attr.cl_arg_free = 0; + attr.if_clause = 1; + attr.final_clause = 0; + attr.untied_clause = 1; + attr.mergeable_clause = 0; + + attr.cl_arg = (void *)(intptr_t)(*p_i)++; + starpu_omp_task_region(&attr); + + attr.cl_arg = (void *)(intptr_t)(*p_i)++; + starpu_omp_task_region(&attr); +} + +void parallel_region_f(void *buffers[], void *args) +{ + (void) buffers; + (void) args; + int worker_id; + pthread_t tid; + struct starpu_omp_task_region_attr attr; + int i = 0; + + tid = pthread_self(); + worker_id = starpu_worker_get_id(); + printf("[tid %p] task thread = %d: implicit task \"f\"\n", (void *)tid, worker_id); + + starpu_omp_taskgroup(taskgroup_f, (void *)&i); + printf("[tid %p] task thread = %d: implicit task \"f\": taskgroup\n", (void *)tid, worker_id); + + starpu_omp_taskgroup(taskgroup_f, (void *)&i); + printf("[tid %p] task thread = %d: implicit task \"f\": taskgroup\n", (void *)tid, worker_id); + + memset(&attr, 0, sizeof(attr)); +#ifdef STARPU_SIMGRID + attr.cl.model = &starpu_perfmodel_nop; +#endif + attr.cl.flags = STARPU_CODELET_SIMGRID_EXECUTE; + attr.cl.cpu_funcs[0] = task_region_g; + attr.cl.where = STARPU_CPU; + attr.cl_arg_size = sizeof(void *); + attr.cl_arg_free = 0; + attr.if_clause = 1; + attr.final_clause = 0; + attr.untied_clause = 1; + attr.mergeable_clause = 0; + + attr.cl_arg = (void *)(intptr_t)i++; + starpu_omp_task_region(&attr); + + attr.cl_arg = (void *)(intptr_t)i++; + starpu_omp_task_region(&attr); +} + +int main(void) +{ + struct starpu_omp_parallel_region_attr attr; + memset(&attr, 0, sizeof(attr)); +#ifdef STARPU_SIMGRID + attr.cl.model = &starpu_perfmodel_nop; +#endif + attr.cl.flags = STARPU_CODELET_SIMGRID_EXECUTE; + attr.cl.cpu_funcs[0] = parallel_region_f; + attr.cl.where = STARPU_CPU; + attr.if_clause = 1; + starpu_omp_parallel_region(&attr); + return 0; +} +#endif diff --git a/tests/openmp/taskgroup_02.c b/tests/openmp/taskgroup_02.c new file mode 100644 index 0000000..c2e5048 --- /dev/null +++ b/tests/openmp/taskgroup_02.c @@ -0,0 +1,140 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include "../helper.h" +#include + +/* + * Check the OpenMP multiple taskgroup support. + */ + +#if !defined(STARPU_OPENMP) +int main(void) +{ + return STARPU_TEST_SKIPPED; +} +#else +__attribute__((constructor)) +static void omp_constructor(void) +{ + int ret = starpu_omp_init(); + if (ret == -EINVAL) exit(STARPU_TEST_SKIPPED); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init"); +} + +__attribute__((destructor)) +static void omp_destructor(void) +{ + starpu_omp_shutdown(); +} + +void task_region_g(void *buffers[], void *args) +{ + (void) buffers; + int i = (int)(intptr_t) args; + int worker_id; + pthread_t tid; + tid = pthread_self(); + worker_id = starpu_worker_get_id(); + printf("[tid %p] task thread = %d: explicit task \"g[%d]\"\n", (void *)tid, worker_id, i); +} + +void taskgroup_f(void *arg) +{ + struct starpu_omp_task_region_attr attr; + int *p_i = (int *)arg; + memset(&attr, 0, sizeof(attr)); +#ifdef STARPU_SIMGRID + attr.cl.model = &starpu_perfmodel_nop; +#endif + attr.cl.flags = STARPU_CODELET_SIMGRID_EXECUTE; + attr.cl.cpu_funcs[0] = task_region_g; + attr.cl.where = STARPU_CPU; + attr.cl_arg_size = sizeof(void *); + attr.cl_arg_free = 0; + attr.if_clause = 1; + attr.final_clause = 0; + attr.untied_clause = 1; + attr.mergeable_clause = 0; + + attr.cl_arg = (void *)(intptr_t)(*p_i)++; + starpu_omp_task_region(&attr); + + attr.cl_arg = (void *)(intptr_t)(*p_i)++; + starpu_omp_task_region(&attr); +} + +void parallel_region_f(void *buffers[], void *args) +{ + (void) buffers; + (void) args; + int worker_id; + pthread_t tid; + struct starpu_omp_task_region_attr attr; + int i = 0; + + tid = pthread_self(); + worker_id = starpu_worker_get_id(); + printf("[tid %p] task thread = %d: implicit task \"f\"\n", (void *)tid, worker_id); + + starpu_omp_taskgroup_inline_begin(); + taskgroup_f((void *)&i); + starpu_omp_taskgroup_inline_end(); + printf("[tid %p] task thread = %d: implicit task \"f\": taskgroup\n", (void *)tid, worker_id); + + starpu_omp_taskgroup_inline_begin(); + taskgroup_f((void *)&i); + starpu_omp_taskgroup_inline_end(); + printf("[tid %p] task thread = %d: implicit task \"f\": taskgroup\n", (void *)tid, worker_id); + + memset(&attr, 0, sizeof(attr)); +#ifdef STARPU_SIMGRID + attr.cl.model = &starpu_perfmodel_nop; +#endif + attr.cl.flags = STARPU_CODELET_SIMGRID_EXECUTE; + attr.cl.cpu_funcs[0] = task_region_g; + attr.cl.where = STARPU_CPU; + attr.cl_arg_size = sizeof(void *); + attr.cl_arg_free = 0; + attr.if_clause = 1; + attr.final_clause = 0; + attr.untied_clause = 1; + attr.mergeable_clause = 0; + + attr.cl_arg = (void *)(intptr_t)i++; + starpu_omp_task_region(&attr); + + attr.cl_arg = (void *)(intptr_t)i++; + starpu_omp_task_region(&attr); +} + +int main(void) +{ + struct starpu_omp_parallel_region_attr attr; + memset(&attr, 0, sizeof(attr)); +#ifdef STARPU_SIMGRID + attr.cl.model = &starpu_perfmodel_nop; +#endif + attr.cl.flags = STARPU_CODELET_SIMGRID_EXECUTE; + attr.cl.cpu_funcs[0] = parallel_region_f; + attr.cl.where = STARPU_CPU; + attr.if_clause = 1; + starpu_omp_parallel_region(&attr); + return 0; +} +#endif diff --git a/tests/openmp/taskloop.c b/tests/openmp/taskloop.c new file mode 100644 index 0000000..566e59d --- /dev/null +++ b/tests/openmp/taskloop.c @@ -0,0 +1,87 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2018-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ +#include +#include +#include +#include "../helper.h" + +/* + * Check the OpenMP orphaned task support. + */ + +#if !defined(STARPU_OPENMP) +int main(void) +{ + return STARPU_TEST_SKIPPED; +} +#else +__attribute__((constructor)) +static void omp_constructor(void) +{ + int ret = starpu_omp_init(); + if (ret == -EINVAL) exit(STARPU_TEST_SKIPPED); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init"); +} + +__attribute__((destructor)) +static void omp_destructor(void) +{ + starpu_omp_shutdown(); +} + +void taskloop_callback(unsigned long long begin_i, unsigned long long end_i) +{ + int worker_id; + pthread_t tid; + tid = pthread_self(); + worker_id = starpu_worker_get_id(); + printf ("begin = %llu , end = %llu, %p\n", begin_i, end_i, (void *)starpu_task_get_current()); +} + +void taskloop_callback_wrapper(void *buffers[], void *_args) +{ + (void) buffers; + struct starpu_omp_task_region_attr * args = _args; + taskloop_callback(args->begin_i, args->end_i); +} + +int main(void) +{ + struct starpu_omp_task_region_attr attr; + memset(&attr, 0, sizeof(attr)); +#ifdef STARPU_SIMGRID + attr.cl.model = &starpu_perfmodel_nop; +#endif + attr.cl.flags = STARPU_CODELET_SIMGRID_EXECUTE; + attr.cl.cpu_funcs[0] = taskloop_callback_wrapper; + attr.cl_arg = &attr; + attr.cl.where = STARPU_CPU; + attr.if_clause = 1; + attr.final_clause = 0; + attr.untied_clause = 1; + attr.mergeable_clause = 0; + attr.nogroup_clause = 0; + attr.is_loop = 0; + attr.collapse = 0; + attr.num_tasks = 5; + attr.nb_iterations = 400; + attr.grainsize = 130; + + starpu_omp_taskloop_inline_begin(&attr); + starpu_omp_taskloop_inline_end(&attr); + return 0; +} +#endif diff --git a/tests/openmp/taskwait_01.c b/tests/openmp/taskwait_01.c new file mode 100644 index 0000000..1a1874c --- /dev/null +++ b/tests/openmp/taskwait_01.c @@ -0,0 +1,118 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include "../helper.h" +#include + +/* + * Check the OpenMP taskwait support. + */ + +#if !defined(STARPU_OPENMP) +int main(void) +{ + return STARPU_TEST_SKIPPED; +} +#else +__attribute__((constructor)) +static void omp_constructor(void) +{ + int ret = starpu_omp_init(); + if (ret == -EINVAL) exit(STARPU_TEST_SKIPPED); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init"); +} + +__attribute__((destructor)) +static void omp_destructor(void) +{ + starpu_omp_shutdown(); +} + +void task_region_g(void *buffers[], void *args) +{ + (void) buffers; + int i = (int)(intptr_t) args; + int worker_id; + pthread_t tid; + tid = pthread_self(); + worker_id = starpu_worker_get_id(); + printf("[tid %p] task thread = %d: explicit task \"g[%d]\"\n", (void *)tid, worker_id, i); +} + +void parallel_region_f(void *buffers[], void *args) +{ + (void) buffers; + (void) args; + int worker_id; + pthread_t tid; + struct starpu_omp_task_region_attr attr; + int i = 0; + + tid = pthread_self(); + worker_id = starpu_worker_get_id(); + printf("[tid %p] task thread = %d: implicit task \"f\"\n", (void *)tid, worker_id); + + memset(&attr, 0, sizeof(attr)); +#ifdef STARPU_SIMGRID + attr.cl.model = &starpu_perfmodel_nop; +#endif + attr.cl.flags = STARPU_CODELET_SIMGRID_EXECUTE; + attr.cl.cpu_funcs[0] = task_region_g; + attr.cl.where = STARPU_CPU; + attr.cl_arg_size = sizeof(void *); + attr.cl_arg_free = 0; + attr.if_clause = 1; + attr.final_clause = 0; + attr.untied_clause = 1; + attr.mergeable_clause = 0; + + attr.cl_arg = (void *)(intptr_t)i++; + starpu_omp_task_region(&attr); + attr.cl_arg = (void *)(intptr_t)i++; + starpu_omp_task_region(&attr); + starpu_omp_taskwait(); + printf("[tid %p] task thread = %d: implicit task \"f\": taskwait\n", (void *)tid, worker_id); + + attr.cl_arg = (void *)(intptr_t)i++; + starpu_omp_task_region(&attr); + attr.cl_arg = (void *)(intptr_t)i++; + starpu_omp_task_region(&attr); + starpu_omp_taskwait(); + printf("[tid %p] task thread = %d: implicit task \"f\": taskwait\n", (void *)tid, worker_id); + + attr.cl_arg = (void *)(intptr_t)i++; + starpu_omp_task_region(&attr); + attr.cl_arg = (void *)(intptr_t)i++; + starpu_omp_task_region(&attr); +} + +int main(void) +{ + struct starpu_omp_parallel_region_attr attr; + memset(&attr, 0, sizeof(attr)); +#ifdef STARPU_SIMGRID + attr.cl.model = &starpu_perfmodel_nop; +#endif + attr.cl.flags = STARPU_CODELET_SIMGRID_EXECUTE; + attr.cl.cpu_funcs[0] = parallel_region_f; + attr.cl.where = STARPU_CPU; + attr.if_clause = 1; + starpu_omp_parallel_region(&attr); + return 0; +} +#endif diff --git a/tests/overlap/gpu_concurrency.c b/tests/overlap/gpu_concurrency.c new file mode 100644 index 0000000..7705fc8 --- /dev/null +++ b/tests/overlap/gpu_concurrency.c @@ -0,0 +1,125 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include +#include "../helper.h" +#include + +/* + * Check that concurrency does happen when using multi-stream CUDA. + */ + +#ifdef STARPU_QUICK_CHECK +#define NITERS 100000 +#else +#define NITERS 1000000 +#endif +#define NTASKS 64 +#define SYNC 16 + +#ifdef STARPU_USE_CUDA +extern void long_kernel_cuda(unsigned long niters); + +void codelet_long_kernel_async(void *descr[], void *arg) +{ + (void)descr; + (void)arg; + long_kernel_cuda(NITERS); +} + +void codelet_long_kernel_sync(void *descr[], void *arg) +{ + (void)descr; + (void)arg; + long_kernel_cuda(NITERS); + cudaStreamSynchronize(starpu_cuda_get_local_stream()); +} + +static struct starpu_perfmodel model_async = +{ + .type = STARPU_HISTORY_BASED, + .symbol = "long_kernel_async", +}; + +static struct starpu_perfmodel model_sync = +{ + .type = STARPU_HISTORY_BASED, + .symbol = "long_kernel_sync", +}; + +static struct starpu_codelet cl_async = +{ + .cuda_funcs = {codelet_long_kernel_async}, + .cuda_flags = {STARPU_CUDA_ASYNC}, + .nbuffers = 0, + .model = &model_async, +}; + +static struct starpu_codelet cl = +{ + .cuda_funcs = {codelet_long_kernel_sync}, + .nbuffers = 0, + .model = &model_sync, +}; +#endif + +int main(int argc STARPU_ATTRIBUTE_UNUSED, char **argv STARPU_ATTRIBUTE_UNUSED) +{ +#ifndef STARPU_USE_CUDA + return STARPU_TEST_SKIPPED; +#else + setenv("STARPU_NWORKER_PER_CUDA", "4", 1); + int ret = starpu_initialize(NULL, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + if (starpu_cuda_worker_get_count() == 0) + { + starpu_shutdown(); + return STARPU_TEST_SKIPPED; + } + + unsigned iter; + for (iter = 0; iter < NTASKS; iter++) + { + struct starpu_task *task = starpu_task_create(); + + if (!(iter % SYNC)) + /* Insert a synchronous task, just for fun */ + task->cl = &cl; + else + task->cl = &cl_async; + + ret = starpu_task_submit(task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + starpu_shutdown(); + + STARPU_RETURN(EXIT_SUCCESS); + +enodev: + fprintf(stderr, "WARNING: No one can execute this task\n"); + /* yes, we do not perform the computation but we did detect that no one + * could perform the kernel, so this is not an error from StarPU */ + starpu_shutdown(); + STARPU_RETURN(STARPU_TEST_SKIPPED); +#endif +} diff --git a/tests/overlap/long_kernel.cu b/tests/overlap/long_kernel.cu new file mode 100644 index 0000000..d67d693 --- /dev/null +++ b/tests/overlap/long_kernel.cu @@ -0,0 +1,34 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +extern "C" __global__ +void long_kernel(unsigned long niters) +{ + unsigned long i; + for (i = 0; i < niters; i++) + __syncthreads(); +} + +extern "C" void long_kernel_cuda(unsigned long niters) +{ + dim3 dimBlock(1,1); + dim3 dimGrid(1,1); + long_kernel<<>>(niters); + cudaError_t status = cudaGetLastError(); + if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status); +} diff --git a/tests/overlap/overlap.c b/tests/overlap/overlap.c new file mode 100644 index 0000000..461f77b --- /dev/null +++ b/tests/overlap/overlap.c @@ -0,0 +1,162 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2010-2010 Mehdi Juhoor + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include +#include +#include "../helper.h" +#include + +/* + * Check that working on a partitioned vector gets overlapping of prefetches etc. + */ + +#ifdef STARPU_QUICK_CHECK +#define NTASKS 100 +#else +#define NTASKS 10000 +#endif +#define VECTORSIZE 1024 +#define TASKDURATION 24U + +#define SYMBOL "sleep" + +static starpu_pthread_mutex_t mutex = STARPU_PTHREAD_MUTEX_INITIALIZER; +static starpu_pthread_cond_t cond = STARPU_PTHREAD_COND_INITIALIZER; + +static unsigned finished = 0; +static unsigned cnt = NTASKS; + +static void callback(void *arg) +{ + (void)arg; + + unsigned res = STARPU_ATOMIC_ADD(&cnt, -1); + ANNOTATE_HAPPENS_BEFORE(&cnt); + + if (res == 0) + { + ANNOTATE_HAPPENS_AFTER(&cnt); + STARPU_PTHREAD_MUTEX_LOCK(&mutex); + finished = 1; + STARPU_PTHREAD_COND_SIGNAL(&cond); + STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); + } +} + +void codelet_sleep(void *descr[], void *_args) +{ + (void)descr; + (void)_args; + + STARPU_SKIP_IF_VALGRIND; + + starpu_usleep(TASKDURATION); +} + +static struct starpu_perfmodel model = +{ + .type = STARPU_HISTORY_BASED, + .symbol = NULL /* to be defined later */ +}; + +static struct starpu_codelet cl = +{ + .cpu_funcs = {codelet_sleep}, + .cuda_funcs = {codelet_sleep}, +#ifndef STARPU_SIMGRID + .opencl_funcs = {codelet_sleep}, +#endif + .cpu_funcs_name = {"codelet_sleep"}, + .nbuffers = 1, + .modes = {STARPU_R}, + .model = &model +}; + +static char symbolname[128]; + +int main(int argc, char **argv) +{ + int ret; + starpu_data_handle_t handle; + float *buffer; + + ret = starpu_initialize(NULL, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + /* create data */ + starpu_malloc((void **)&buffer, NTASKS*VECTORSIZE*sizeof(char)); + + /* declare data to StarPU */ + starpu_vector_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)buffer, + NTASKS*VECTORSIZE, sizeof(char)); + + struct starpu_data_filter f = + { + .filter_func = starpu_vector_filter_block, + .nchildren = NTASKS + }; + + starpu_data_partition(handle, &f); + + snprintf(symbolname, sizeof(symbolname), "overlap_sleep_%d_%u", VECTORSIZE, TASKDURATION); + + model.symbol = symbolname; + + unsigned iter; + for (iter = 0; iter < NTASKS; iter++) + { + struct starpu_task *task = starpu_task_create(); + task->cl = &cl; + + task->handles[0] = starpu_data_get_sub_data(handle, 1, iter); + + task->callback_func = callback; + task->callback_arg = NULL; + + ret = starpu_task_submit(task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + starpu_do_schedule(); + STARPU_PTHREAD_MUTEX_LOCK(&mutex); + if (!finished) + STARPU_PTHREAD_COND_WAIT(&cond, &mutex); + STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); + + starpu_data_unpartition(handle, STARPU_MAIN_RAM); + starpu_data_unregister(handle); + starpu_free_noflag(buffer, NTASKS*VECTORSIZE*sizeof(char)); + starpu_shutdown(); + + STARPU_RETURN(EXIT_SUCCESS); + +enodev: + starpu_data_unpartition(handle, STARPU_MAIN_RAM); + starpu_data_unregister(handle); + starpu_free_noflag(buffer, NTASKS*VECTORSIZE*sizeof(char)); + fprintf(stderr, "WARNING: No one can execute this task\n"); + /* yes, we do not perform the computation but we did detect that no one + * could perform the kernel, so this is not an error from StarPU */ + starpu_shutdown(); + STARPU_RETURN(STARPU_TEST_SKIPPED); +} diff --git a/tests/overlap/overlap.sh b/tests/overlap/overlap.sh new file mode 100755 index 0000000..ab9ca7b --- /dev/null +++ b/tests/overlap/overlap.sh @@ -0,0 +1,76 @@ +#!/bin/sh -x +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# Copyright (C) 2018-2018 Federal University of Rio Grande do Sul (UFRGS) +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +# Test parsing of FxT traces + +if test -n "$STARPU_MICROBENCHS_DISABLED" ; then exit 77 ; fi + +# Testing another specific scheduler, no need to run this +[ -z "$STARPU_SCHED" -o "$STARPU_SCHED" = dmdas ] || exit 77 + +# XXX: Also see examples/mult/sgemm.sh + +set -e + +PREFIX=$(dirname $0) +rm -rf $PREFIX/overlap.traces +mkdir -p $PREFIX/overlap.traces + +export STARPU_FXT_PREFIX=$PREFIX/overlap.traces + +STARPU_FXT_TRACE=1 STARPU_SCHED=dmdas $MS_LAUNCHER $STARPU_LAUNCH $PREFIX/overlap +if [ -x $PREFIX/../../tools/starpu_fxt_tool ]; +then + $STARPU_LAUNCH $PREFIX/../../tools/starpu_perfmodel_plot -o $STARPU_FXT_PREFIX -s overlap_sleep_1024_24 -i $STARPU_FXT_PREFIX/prof_file_${USER}_0 + [ -f $STARPU_FXT_PREFIX/starpu_overlap_sleep_1024_24.gp -a -f $STARPU_FXT_PREFIX/starpu_overlap_sleep_1024_24.data -a -f $STARPU_FXT_PREFIX/starpu_overlap_sleep_1024_24_avg.data ] + + # Generate paje, dag, data, etc. + $STARPU_LAUNCH $PREFIX/../../tools/starpu_fxt_tool -d $STARPU_FXT_PREFIX -memory-states -label-deps -i $STARPU_FXT_PREFIX/prof_file_${USER}_0 + + $PREFIX/../../tools/starpu_paje_sort $STARPU_FXT_PREFIX/paje.trace + ! type pj_dump || pj_dump -e 0 < $STARPU_FXT_PREFIX/paje.trace + + $PREFIX/../../tools/starpu_codelet_profile $STARPU_FXT_PREFIX/distrib.data overlap_sleep_1024_24 + [ -f $STARPU_FXT_PREFIX/distrib.data.gp -a \( -f $STARPU_FXT_PREFIX/distrib.data.0 -o -f $STARPU_FXT_PREFIX/distrib.data.1 -o -f $STARPU_FXT_PREFIX/distrib.data.2 -o -f $STARPU_FXT_PREFIX/distrib.data.3 -o -f $STARPU_FXT_PREFIX/distrib.data.4 -o -f $STARPU_FXT_PREFIX/distrib.data.5 -o -f $STARPU_FXT_PREFIX/distrib.data.6 \) ] + + $STARPU_LAUNCH $PREFIX/../../tools/starpu_fxt_data_trace -d $STARPU_FXT_PREFIX $STARPU_FXT_PREFIX/prof_file_${USER}_0 overlap_sleep_1024_24 + [ -f $STARPU_FXT_PREFIX/data_trace.gp ] + + $STARPU_LAUNCH $PREFIX/../../tools/starpu_fxt_stats -i $STARPU_FXT_PREFIX/prof_file_${USER}_0 + $MS_LAUNCHER $STARPU_LAUNCH $PREFIX/../../tools/starpu_tasks_rec_complete $STARPU_FXT_PREFIX/tasks.rec $STARPU_FXT_PREFIX/tasks2.rec + python3 $PREFIX/../../tools/starpu_trace_state_stats.py $STARPU_FXT_PREFIX/trace.rec + ! type gnuplot || ( $PREFIX/../../tools/starpu_workers_activity -d $STARPU_FXT_PREFIX $STARPU_FXT_PREFIX/activity.data && [ -f $STARPU_FXT_PREFIX/activity.eps ] ) + + # needs some R packages + $PREFIX/../../tools/starpu_paje_draw_histogram $STARPU_FXT_PREFIX/paje.trace || true + $PREFIX/../../tools/starpu_paje_state_stats $STARPU_FXT_PREFIX/paje.trace || true + $PREFIX/../../tools/starpu_paje_summary $STARPU_FXT_PREFIX/paje.trace || true + $PREFIX/../../tools/starpu_codelet_histo_profile $STARPU_FXT_PREFIX/distrib.data || true + [ -f $STARPU_FXT_PREFIX/distrib.data.overlap_sleep_1024_24.0.a3d3725e.1024.pdf ] || true + + if [ -x $PREFIX/../../tools/starpu_replay ]; then + $STARPU_LAUNCH $PREFIX/../../tools/starpu_replay $STARPU_FXT_PREFIX/tasks.rec + fi + + [ ! -x $PREFIX/../../tools/starpu_perfmodel_recdump ] || $MS_LAUNCHER $STARPU_LAUNCH $PREFIX/../../tools/starpu_perfmodel_recdump $STARPU_FXT_PREFIX/tasks.rec -o $STARPU_FXT_PREFIX/perfs2.rec + [ -f $STARPU_FXT_PREFIX/perfs2.rec ] +fi + +[ ! -x $PREFIX/../../tools/starpu_perfmodel_display ] || $STARPU_LAUNCH $PREFIX/../../tools/starpu_perfmodel_display -s overlap_sleep_1024_24 +[ ! -x $PREFIX/../../tools/starpu_perfmodel_display ] || $STARPU_LAUNCH $PREFIX/../../tools/starpu_perfmodel_display -x -s overlap_sleep_1024_24 +[ ! -x $PREFIX/../../tools/starpu_perfmodel_recdump ] || $MS_LAUNCHER $STARPU_LAUNCH $PREFIX/../../tools/starpu_perfmodel_recdump -o $STARPU_FXT_PREFIX/perfs.rec +[ -f $STARPU_FXT_PREFIX/perfs.rec ] diff --git a/tests/parallel_tasks/combined_worker_assign_workerid.c b/tests/parallel_tasks/combined_worker_assign_workerid.c new file mode 100644 index 0000000..3dd464c --- /dev/null +++ b/tests/parallel_tasks/combined_worker_assign_workerid.c @@ -0,0 +1,157 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Thibaut Lambert + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include "../helper.h" + +/* + * Check that one create a combined worker by hand and run tasks on it. + */ + +#ifndef STARPU_QUICK_CHECK +#define N 1000 +#else +#define N 100 +#endif +#define VECTORSIZE 1024 + +static int combined_workerid; +static int combined_ncpus; + +void codelet_null(void *descr[], void *_args) +{ + (void)descr; + (void)_args; + + STARPU_SKIP_IF_VALGRIND; + + STARPU_ASSERT(starpu_combined_worker_get_id() == combined_workerid); + int worker_size = starpu_combined_worker_get_size(); + STARPU_ASSERT(worker_size == combined_ncpus); + starpu_usleep(1000./worker_size); +#if 1 + int id = starpu_worker_get_id(); + int combined_id = starpu_combined_worker_get_id(); + FPRINTF(stderr, "worker id %d - combined id %d - worker size %d\n", id, combined_id, worker_size); +#endif +} + +static struct starpu_codelet cl = +{ + .type = STARPU_FORKJOIN, + .max_parallelism = INT_MAX, + .cpu_funcs = {codelet_null}, + .cpu_funcs_name = {"codelet_null"}, + .cuda_funcs = {codelet_null}, + .opencl_funcs = {codelet_null}, + .nbuffers = 1, + .modes = {STARPU_R} +}; + +int main(void) +{ + starpu_data_handle_t v_handle; + unsigned *v; + int ret; + struct starpu_conf conf; + + ret = starpu_conf_init(&conf); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_conf_init"); + conf.sched_policy_name = "pheft"; + conf.calibrate = 1; + + ret = starpu_init(&conf); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + struct starpu_sched_policy *policy = starpu_sched_get_sched_policy(); + if (strcmp(conf.sched_policy_name, policy->policy_name)) goto shutdown; + + combined_ncpus = starpu_cpu_worker_get_count(); + if (combined_ncpus < 4) goto shutdown; + + int *workerids = malloc(sizeof(int) * combined_ncpus); + starpu_worker_get_ids_by_type(STARPU_CPU_WORKER, workerids, combined_ncpus); + combined_ncpus /= 2; + + unsigned ctx_id = starpu_sched_ctx_get_context(); + if (ctx_id == STARPU_NMAX_SCHED_CTXS) + ctx_id = 0; + + combined_workerid = starpu_combined_worker_assign_workerid(combined_ncpus, workerids); + free(workerids); + + struct starpu_worker_collection* workers = starpu_sched_ctx_get_worker_collection(ctx_id); + workers->add(workers, combined_workerid); + + starpu_malloc((void **)&v, VECTORSIZE*sizeof(unsigned)); + starpu_vector_data_register(&v_handle, STARPU_MAIN_RAM, (uintptr_t)v, VECTORSIZE, sizeof(unsigned)); + + /* Allow tasks only on this combined worker */ + int nuint32 = (combined_workerid + 31) / 32; + uint32_t *forced_workerids = malloc(sizeof(uint32_t) * nuint32); + memset(forced_workerids, 0, sizeof(uint32_t) * nuint32); + forced_workerids[combined_workerid / 32] |= 1U << (combined_workerid%32); + + unsigned iter; + for (iter = 0; iter < N; iter++) + { + /* execute a task on that worker */ + struct starpu_task *task = starpu_task_create(); + task->cl = &cl; + + task->handles[0] = v_handle; + + if (iter % 2) + { + task->workerids = forced_workerids; + task->workerids_len = nuint32; + } + else + { + task->execute_on_a_specific_worker = 1; + task->workerid = combined_workerid; + } + + ret = starpu_task_submit(task); + if (ret == -ENODEV) { task->destroy = 0; starpu_task_destroy(task); goto enodev; } + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + ret = starpu_task_wait_for_all(); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); + + starpu_data_unregister(v_handle); + starpu_free_noflag(v, VECTORSIZE*sizeof(unsigned)); + free(forced_workerids); + starpu_shutdown(); + + return EXIT_SUCCESS; + +enodev: + starpu_data_unregister(v_handle); + starpu_free_noflag(v, VECTORSIZE*sizeof(unsigned)); + free(forced_workerids); + fprintf(stderr, "WARNING: No one can execute the task on workerid %u\n", combined_workerid); +shutdown: + /* yes, we do not perform the computation but we did detect that no one + * could perform the kernel, so this is not an error from StarPU */ + starpu_shutdown(); + return STARPU_TEST_SKIPPED; +} diff --git a/tests/parallel_tasks/cuda_only.c b/tests/parallel_tasks/cuda_only.c new file mode 100644 index 0000000..32dbc76 --- /dev/null +++ b/tests/parallel_tasks/cuda_only.c @@ -0,0 +1,114 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Thibaut Lambert + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include "../helper.h" + +/* + * Check that pheft works with only GPUs + */ + +void codelet_null(void *descr[], void *_args) +{ + (void)descr; + (void)_args; +} + +struct starpu_perfmodel model = +{ + .type = STARPU_HISTORY_BASED, + .symbol = "test" +}; + +static struct starpu_codelet cl = +{ + .cuda_funcs = {codelet_null}, + .model = &model, + .nbuffers = 1, + .modes = {STARPU_R} +}; + +struct starpu_perfmodel model2 = +{ + .type = STARPU_HISTORY_BASED, + .symbol = "test2" +}; + +static struct starpu_codelet cl2 = +{ + .cuda_funcs = {codelet_null}, + .model = &model2, + .nbuffers = 1, + .modes = {STARPU_W} +}; + +int main(void) +{ + int ret; + starpu_data_handle_t handle; + unsigned data; + + struct starpu_conf conf; + starpu_conf_init(&conf); + conf.sched_policy_name = "pheft"; + + ret = starpu_init(&conf); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + starpu_variable_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)&data, sizeof(data)); + + unsigned iter; + struct starpu_task *task; + for (iter = 0; iter < 100; iter++) + { + task = starpu_task_create(); + task->cl = &cl; + task->handles[0] = handle; + + ret = starpu_task_submit(task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + task = starpu_task_create(); + task->cl = &cl2; + task->handles[0] = handle; + + ret = starpu_task_submit(task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + ret = starpu_task_wait_for_all(); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); + + starpu_data_unregister(handle); + starpu_shutdown(); + + STARPU_RETURN(EXIT_SUCCESS); + +enodev: + task->destroy = 0; + starpu_task_destroy(task); + starpu_data_unregister(handle); + /* yes, we do not perform the computation but we did detect that no one + * could perform the kernel, so this is not an error from StarPU */ + starpu_shutdown(); + STARPU_RETURN(STARPU_TEST_SKIPPED); +} diff --git a/tests/parallel_tasks/explicit_combined_worker.c b/tests/parallel_tasks/explicit_combined_worker.c new file mode 100644 index 0000000..3de3265 --- /dev/null +++ b/tests/parallel_tasks/explicit_combined_worker.c @@ -0,0 +1,122 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Thibaut Lambert + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include "../helper.h" + +/* + * Check that one can bind a parallel task on a parallel worker + */ + +#ifndef STARPU_QUICK_CHECK +#define N 1000 +#else +#define N 100 +#endif +#define VECTORSIZE 1024 + +void codelet_null(void *descr[], void *_args) +{ + (void)descr; + (void)_args; + + STARPU_SKIP_IF_VALGRIND; + + int worker_size = starpu_combined_worker_get_size(); + STARPU_ASSERT(worker_size > 0); + starpu_usleep(1000./worker_size); +#if 1 + int id = starpu_worker_get_id(); + int combined_id = starpu_combined_worker_get_id(); + FPRINTF(stderr, "worker id %d - combined id %d - worker size %d\n", id, combined_id, worker_size); +#endif +} + +static struct starpu_codelet cl = +{ + .type = STARPU_FORKJOIN, + .max_parallelism = INT_MAX, + .cpu_funcs = {codelet_null}, + .cpu_funcs_name = {"codelet_null"}, + .cuda_funcs = {codelet_null}, + .opencl_funcs = {codelet_null}, + .nbuffers = 1, + .modes = {STARPU_R} +}; + + +int main(void) +{ + starpu_data_handle_t v_handle; + unsigned *v; + int ret; + struct starpu_conf conf; + + ret = starpu_conf_init(&conf); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_conf_init"); + conf.sched_policy_name = "pheft"; + conf.calibrate = 1; + + ret = starpu_init(&conf); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + starpu_malloc((void **)&v, VECTORSIZE*sizeof(unsigned)); + starpu_vector_data_register(&v_handle, STARPU_MAIN_RAM, (uintptr_t)v, VECTORSIZE, sizeof(unsigned)); + + unsigned nworker = starpu_worker_get_count() + starpu_combined_worker_get_count(); + + unsigned iter, worker; + for (iter = 0; iter < N; iter++) + { + for (worker = 0; worker < nworker; worker++) + { + /* execute a task on that worker */ + struct starpu_task *task = starpu_task_create(); + task->cl = &cl; + + task->handles[0] = v_handle; + + task->execute_on_a_specific_worker = 1; + task->workerid = worker; + + ret = starpu_task_submit(task); + if (ret == -ENODEV) { task->destroy = 0; starpu_task_destroy(task); goto enodev; } + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + } + + ret = starpu_task_wait_for_all(); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); + + starpu_data_unregister(v_handle); + starpu_free_noflag(v, VECTORSIZE*sizeof(unsigned)); + starpu_shutdown(); + + return EXIT_SUCCESS; + +enodev: + starpu_data_unregister(v_handle); + starpu_free_noflag(v, VECTORSIZE*sizeof(unsigned)); + fprintf(stderr, "WARNING: No one can execute the task on workerid %u\n", worker); + /* yes, we do not perform the computation but we did detect that no one + * could perform the kernel, so this is not an error from StarPU */ + starpu_shutdown(); + return STARPU_TEST_SKIPPED; +} diff --git a/tests/parallel_tasks/parallel_kernels.c b/tests/parallel_tasks/parallel_kernels.c new file mode 100644 index 0000000..e47734f --- /dev/null +++ b/tests/parallel_tasks/parallel_kernels.c @@ -0,0 +1,129 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Thibaut Lambert + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include "../helper.h" + +/* + * Submit dumb parallel forkjoin tasks + */ + +#ifndef STARPU_QUICK_CHECK +#define N 1000 +#else +#define N 10 +#endif +#define VECTORSIZE 1024 + +void codelet_null(void *descr[], void *_args) +{ + (void)descr; + (void)_args; + + STARPU_SKIP_IF_VALGRIND; + + int worker_size = starpu_combined_worker_get_size(); + STARPU_ASSERT(worker_size > 0); + starpu_usleep(1000./worker_size); +#if 0 + int id = starpu_worker_get_id(); + int combined_id = starpu_combined_worker_get_id(); + FPRINTF(stderr, "worker id %d - combined id %d - worker size %d\n", id, combined_id, worker_size); +#endif +} + +struct starpu_perfmodel model = +{ + .type = STARPU_HISTORY_BASED, + .symbol = "parallel_kernel_test" +}; + +static struct starpu_codelet cl = +{ + .type = STARPU_FORKJOIN, + .max_parallelism = INT_MAX, + .cpu_funcs = {codelet_null}, + .cuda_funcs = {codelet_null}, + .cpu_funcs_name = {"codelet_null"}, + .opencl_funcs = {codelet_null}, + .model = &model, + .nbuffers = 1, + .modes = {STARPU_R} +}; + +int main(void) +{ + int ret; + starpu_data_handle_t v_handle; + unsigned *v; + + struct starpu_conf conf; + starpu_conf_init(&conf); + conf.sched_policy_name = "pheft"; + conf.calibrate = 1; + + ret = starpu_init(&conf); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + starpu_malloc((void **)&v, VECTORSIZE*sizeof(unsigned)); + starpu_vector_data_register(&v_handle, STARPU_MAIN_RAM, (uintptr_t)v, VECTORSIZE, sizeof(unsigned)); + + unsigned nworker = starpu_worker_get_count() + starpu_combined_worker_get_count(); + + unsigned iter, worker, n; + n = N; + if (STARPU_RUNNING_ON_VALGRIND) + { + n /= 300; + } + for (iter = 0; iter < n; iter++) + { + for (worker = 0; worker < nworker; worker++) + { + /* execute a task on that worker */ + struct starpu_task *task = starpu_task_create(); + task->cl = &cl; + + task->handles[0] = v_handle; + + ret = starpu_task_submit(task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + } + + ret = starpu_task_wait_for_all(); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); + + starpu_data_unregister(v_handle); + starpu_free_noflag(v, VECTORSIZE*sizeof(unsigned)); + starpu_shutdown(); + + STARPU_RETURN(EXIT_SUCCESS); + +enodev: + starpu_data_unregister(v_handle); + starpu_free_noflag(v, VECTORSIZE*sizeof(unsigned)); + fprintf(stderr, "WARNING: No one can execute this task\n"); + /* yes, we do not perform the computation but we did detect that no one + * could perform the kernel, so this is not an error from StarPU */ + starpu_shutdown(); + STARPU_RETURN(STARPU_TEST_SKIPPED); +} diff --git a/tests/parallel_tasks/parallel_kernels_spmd.c b/tests/parallel_tasks/parallel_kernels_spmd.c new file mode 100644 index 0000000..36b6197 --- /dev/null +++ b/tests/parallel_tasks/parallel_kernels_spmd.c @@ -0,0 +1,130 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Thibaut Lambert + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include "../helper.h" + +/* + * Submit dumb parallel spmd tasks + */ + +#ifndef STARPU_QUICK_CHECK +#define N 1000 +#else +#define N 100 +#endif +#define VECTORSIZE 1024 + +void codelet_null(void *descr[], void *_args) +{ + (void)descr; + (void)_args; + + STARPU_SKIP_IF_VALGRIND; + + int worker_size = starpu_combined_worker_get_size(); + STARPU_ASSERT(worker_size > 0); + + starpu_usleep(1000./worker_size); +#if 0 + int id = starpu_worker_get_id(); + int combined_id = starpu_combined_worker_get_id(); + int rank = starpu_combined_worker_get_rank(); + FPRINTF(stderr, "worker id %d - combined id %d - worker size %d - SPMD rank %d\n", id, combined_id, worker_size, rank); +#endif +} + +struct starpu_perfmodel model = +{ + .type = STARPU_HISTORY_BASED, + .symbol = "parallel_kernel_test_spmd" +}; + +static struct starpu_codelet cl = +{ + .type = STARPU_SPMD, + .max_parallelism = INT_MAX, + .cpu_funcs = {codelet_null}, + .cpu_funcs_name = {"codelet_null"}, + .cuda_funcs = {codelet_null}, + .opencl_funcs = {codelet_null}, + .model = &model, + .nbuffers = 1, + .modes = {STARPU_R} +}; + +int main(void) +{ + int ret; + starpu_data_handle_t v_handle; + unsigned *v; + + struct starpu_conf conf; + starpu_conf_init(&conf); + conf.sched_policy_name = "pheft"; + conf.calibrate = 1; + + ret = starpu_init(&conf); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + starpu_malloc((void **)&v, VECTORSIZE*sizeof(unsigned)); + starpu_vector_data_register(&v_handle, STARPU_MAIN_RAM, (uintptr_t)v, VECTORSIZE, sizeof(unsigned)); + + unsigned nworker = starpu_worker_get_count() + starpu_combined_worker_get_count(); + + unsigned iter, worker, n; + n = N; + if (STARPU_RUNNING_ON_VALGRIND) + { + n /= 300; + } + for (iter = 0; iter < n; iter++) + { + for (worker = 0; worker < nworker; worker++) + { + /* execute a task on that worker */ + struct starpu_task *task = starpu_task_create(); + task->cl = &cl; + + task->handles[0] = v_handle; + + ret = starpu_task_submit(task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + } + + ret = starpu_task_wait_for_all(); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); + + starpu_data_unregister(v_handle); + starpu_free_noflag(v, VECTORSIZE*sizeof(unsigned)); + starpu_shutdown(); + STARPU_RETURN(EXIT_SUCCESS); + +enodev: + starpu_data_unregister(v_handle); + starpu_free_noflag(v, VECTORSIZE*sizeof(unsigned)); + fprintf(stderr, "WARNING: No one can execute this task\n"); + /* yes, we do not perform the computation but we did detect that no one + * could perform the kernel, so this is not an error from StarPU */ + starpu_shutdown(); + STARPU_RETURN(STARPU_TEST_SKIPPED); +} diff --git a/tests/parallel_tasks/parallel_kernels_trivial.c b/tests/parallel_tasks/parallel_kernels_trivial.c new file mode 100644 index 0000000..9e7a9ab --- /dev/null +++ b/tests/parallel_tasks/parallel_kernels_trivial.c @@ -0,0 +1,128 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Thibaut Lambert + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include "../helper.h" + +/* + * Submit a simple testcase for parallel tasks. + */ + +#define VECTORSIZE 1024 + +void codelet_null(void *descr[], void *_args) +{ + (void)descr; + (void)_args; + + STARPU_SKIP_IF_VALGRIND; + + int worker_size = starpu_combined_worker_get_size(); + STARPU_ASSERT(worker_size > 0); + starpu_usleep(1000./worker_size); +#if 0 + int id = starpu_worker_get_id(); + int combined_id = starpu_combined_worker_get_id(); + FPRINTF(stderr, "worker id %d - combined id %d - worker size %d\n", id, combined_id, worker_size); +#endif +} + +struct starpu_perfmodel model = +{ + .type = STARPU_HISTORY_BASED, + .symbol = "parallel_kernel_test" +}; + +static struct starpu_codelet cl = +{ + .type = STARPU_FORKJOIN, + .max_parallelism = INT_MAX, + .cpu_funcs = {codelet_null}, + .cuda_funcs = {codelet_null}, + .cpu_funcs_name = {"codelet_null"}, + .opencl_funcs = {codelet_null}, + .model = &model, + .nbuffers = 1, + .modes = {STARPU_R} +}; + +static struct starpu_codelet cl_seq = +{ + .cpu_funcs = {codelet_null}, + .cuda_funcs = {codelet_null}, + .cpu_funcs_name = {"codelet_null"}, + .opencl_funcs = {codelet_null}, + .model = &model, + .nbuffers = 1, + .modes = {STARPU_R} +}; + +int main(void) +{ + int ret; + starpu_data_handle_t v_handle; + unsigned *v; + + struct starpu_conf conf; + starpu_conf_init(&conf); + conf.ncpus = 2; + conf.sched_policy_name = "pheft"; + conf.calibrate = 1; + + ret = starpu_init(&conf); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + starpu_malloc((void **)&v, VECTORSIZE*sizeof(unsigned)); + starpu_vector_data_register(&v_handle, STARPU_MAIN_RAM, (uintptr_t)v, VECTORSIZE, sizeof(unsigned)); + + /* First submit a sequential task */ + ret = starpu_task_insert(&cl_seq, STARPU_R, v_handle, 0); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + /* Then a parallel task, which is not interesting to run in parallel when we have only two cpus */ + ret = starpu_task_insert(&cl, STARPU_R, v_handle, 0); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + /* Then another parallel task, which is interesting to run in parallel + since the two cpus are now finishing at the same time. */ + ret = starpu_task_insert(&cl, STARPU_R, v_handle, 0); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + ret = starpu_task_wait_for_all(); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); + + starpu_data_unregister(v_handle); + starpu_free_noflag(v, VECTORSIZE*sizeof(unsigned)); + starpu_shutdown(); + + STARPU_RETURN(EXIT_SUCCESS); + +enodev: + starpu_data_unregister(v_handle); + starpu_free_noflag(v, VECTORSIZE*sizeof(unsigned)); + fprintf(stderr, "WARNING: No one can execute this task\n"); + /* yes, we do not perform the computation but we did detect that no one + * could perform the kernel, so this is not an error from StarPU */ + starpu_shutdown(); + STARPU_RETURN(STARPU_TEST_SKIPPED); +} diff --git a/tests/parallel_tasks/spmd_peager.c b/tests/parallel_tasks/spmd_peager.c new file mode 100644 index 0000000..e2276fa --- /dev/null +++ b/tests/parallel_tasks/spmd_peager.c @@ -0,0 +1,118 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Thibaut Lambert + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include "../helper.h" + +/* + * Submit dumb parallel spmd tasks with the peager scheduler + */ + +#ifndef STARPU_QUICK_CHECK +#define N 1000 +#else +#define N 100 +#endif +#define VECTORSIZE 1024 + +starpu_data_handle_t v_handle; +static unsigned *v; + +void codelet_null(void *descr[], void *_args) +{ + (void)descr; + (void)_args; + + STARPU_SKIP_IF_VALGRIND; + + int worker_size = starpu_combined_worker_get_size(); + STARPU_ASSERT(worker_size > 0); + +// FPRINTF(stderr, "WORKERSIZE : %d\n", worker_size); + + starpu_usleep(1000./worker_size); +#if 0 + int id = starpu_worker_get_id(); + int combined_id = starpu_combined_worker_get_id(); + int rank = starpu_combined_worker_get_rank(); + FPRINTF(stderr, "worker id %d - combined id %d - worker size %d - SPMD rank %d\n", id, combined_id, worker_size, rank); +#endif +} + +static struct starpu_codelet cl = +{ + .type = STARPU_SPMD, + .max_parallelism = INT_MAX, + .cpu_funcs = {codelet_null}, + .cpu_funcs_name = {"codelet_null"}, + .cuda_funcs = {codelet_null}, + .opencl_funcs = {codelet_null}, + .nbuffers = 1, + .modes = {STARPU_R} +}; + + +int main(void) +{ + int ret; + + struct starpu_conf conf; + starpu_conf_init(&conf); + conf.sched_policy_name = "peager"; + + ret = starpu_init(&conf); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + ret = starpu_malloc((void **)&v, VECTORSIZE*sizeof(unsigned)); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_malloc"); + starpu_vector_data_register(&v_handle, STARPU_MAIN_RAM, (uintptr_t)v, VECTORSIZE, sizeof(unsigned)); + + unsigned iter;//, worker; + for (iter = 0; iter < N; iter++) + { + /* execute a task on that worker */ + struct starpu_task *task = starpu_task_create(); + task->cl = &cl; + + task->handles[0] = v_handle; + + ret = starpu_task_submit(task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + ret = starpu_task_wait_for_all(); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); + + starpu_data_unregister(v_handle); + starpu_free_noflag(v, VECTORSIZE*sizeof(unsigned)); + starpu_shutdown(); + + return EXIT_SUCCESS; + +enodev: + starpu_data_unregister(v_handle); + starpu_free_noflag(v, VECTORSIZE*sizeof(unsigned)); + fprintf(stderr, "WARNING: No one can execute this task\n"); + /* yes, we do not perform the computation but we did detect that no one + * could perform the kernel, so this is not an error from StarPU */ + starpu_shutdown(); + return STARPU_TEST_SKIPPED; +} diff --git a/tests/parallel_tasks/swap.c b/tests/parallel_tasks/swap.c new file mode 100644 index 0000000..c6e80be --- /dev/null +++ b/tests/parallel_tasks/swap.c @@ -0,0 +1,91 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2025-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../helper.h" + +double *pU, *pV, *pA; + +// This test checks if buffers are passed in the correct order to the task +// it is based on https://github.com/starpu-runtime/starpu/issues/78 + +void scal_cpu_func(void* buffers[], void* _args) +{ + double* A = (double*)STARPU_VARIABLE_GET_PTR(buffers[0]); + double* U = (double*)STARPU_VARIABLE_GET_PTR(buffers[1]); + double* V = (double*)STARPU_VARIABLE_GET_PTR(buffers[2]); + int nthreads = starpu_combined_worker_get_size(); + FPRINTF(stderr, "nthreads = %d\nA = %p, U = %p, V = %p\n", nthreads, A, U, V); + STARPU_ASSERT_MSG(U == pU, "Pointers are different %p %p\n", U, pU); + STARPU_ASSERT_MSG(V == pV, "Pointers are different %p %p\n", V, pV); + STARPU_ASSERT_MSG(A == pA, "Pointers are different %p %p\n", A, pA); +} + +static struct starpu_codelet cl = +{ + .modes = {STARPU_R, STARPU_R, STARPU_R}, + .type = STARPU_FORKJOIN, + .max_parallelism = INT_MAX, + .cpu_funcs = {scal_cpu_func}, + .cpu_funcs_name = {"scal_cpu_func"}, + .nbuffers = 3 +}; + +int main() +{ + int ret; + + struct starpu_conf conf; + starpu_conf_init(&conf); + conf.ncpus = 2; + conf.sched_policy_name = "peager"; + conf.calibrate = 1; + + ret = starpu_init(&conf); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + pU = malloc(sizeof(double)); + pV = malloc(sizeof(double)); + pA = malloc(sizeof(double)); + + starpu_data_handle_t U; + starpu_data_handle_t V; + starpu_data_handle_t A; + starpu_variable_data_register(&U, STARPU_MAIN_RAM, (uintptr_t)pU, sizeof(double)); + starpu_variable_data_register(&V, STARPU_MAIN_RAM, (uintptr_t)pV, sizeof(double)); + starpu_variable_data_register(&A, STARPU_MAIN_RAM, (uintptr_t)pA, sizeof(double)); + + FPRINTF(stderr, "A = %p, U = %p, V = %p\n", pA, pU, pV); + ret = starpu_task_insert(&cl, + STARPU_R, A, + STARPU_R, U, + STARPU_R, V, + 0); + if (ret == -ENODEV) + ret = STARPU_TEST_SKIPPED; + else + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + starpu_data_unregister(A); + starpu_data_unregister(U); + starpu_data_unregister(V); + + free(pU); + free(pV); + free(pA); + starpu_shutdown(); + STARPU_RETURN(ret); +} diff --git a/tests/perfmodels/feed.c b/tests/perfmodels/feed.c new file mode 100644 index 0000000..4070724 --- /dev/null +++ b/tests/perfmodels/feed.c @@ -0,0 +1,96 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Thibaut Lambert + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../helper.h" + +/* + * Test the starpu_perfmodel_update_history function + */ + +static struct starpu_perfmodel model = +{ + .type = STARPU_REGRESSION_BASED, + .symbol = "feed" +}; + +static struct starpu_perfmodel nl_model = +{ + .type = STARPU_NL_REGRESSION_BASED, + .symbol = "nlfeed" +}; + +static struct starpu_codelet cl = +{ + .model = &model, + .nbuffers = 1, + .modes = {STARPU_W} +}; + +int main(void) +{ + struct starpu_task task; + int ret; + + ret = starpu_init(NULL); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + if (starpu_worker_get_count_by_type(STARPU_CUDA_WORKER) < 2) + { + starpu_shutdown(); + return STARPU_TEST_SKIPPED; + } + + starpu_task_init(&task); + task.cl = &cl; + + int size; + for (size = 1024; size < 16777216; size *= 2) + { + float measured_fast, measured_slow; + starpu_data_handle_t handle; + starpu_vector_data_register(&handle, -1, 0, size, sizeof(float)); + task.handles[0] = handle; + + /* Simulate Fast GPU. In real applications this would be + * replaced by fetching from actual measurement */ + measured_fast = 0.002+size*0.00000001; + measured_slow = 0.001+size*0.0000001; + + struct starpu_perfmodel_arch arch; + arch.ndevices = 1; + arch.devices = (struct starpu_perfmodel_device*)malloc(sizeof(struct starpu_perfmodel_device)); + arch.devices[0].type = STARPU_CUDA_WORKER; + arch.devices[0].ncores = 0; + /* Simulate Fast GPU */ + arch.devices[0].devid = 0; + starpu_perfmodel_update_history(&model, &task, &arch, 0, 0, measured_fast); + starpu_perfmodel_update_history(&nl_model, &task, &arch, 0, 0, measured_fast); + + /* Simulate Slow GPU */ + arch.devices[0].devid = 1; + starpu_perfmodel_update_history(&model, &task, &arch, 0, 0, measured_slow); + starpu_perfmodel_update_history(&nl_model, &task, &arch, 0, 0, measured_slow); + starpu_task_clean(&task); + starpu_data_unregister(handle); + } + + starpu_shutdown(); + + return EXIT_SUCCESS; +} diff --git a/tests/perfmodels/memory.c b/tests/perfmodels/memory.c new file mode 100644 index 0000000..e502d96 --- /dev/null +++ b/tests/perfmodels/memory.c @@ -0,0 +1,70 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include "../helper.h" + +/* + * Test providing the memory perfmodel function + */ + +void func(void *descr[], void *arg) +{ + (void)descr; + (void)arg; +} + +static struct starpu_perfmodel my_model = +{ + .type = STARPU_HISTORY_BASED, + .symbol = "my_model", +}; + +static struct starpu_codelet my_codelet = +{ + .cpu_funcs = {func}, + .cpu_funcs_name = {"func"}, + .model = &my_model +}; + +double cuda_cost_function(struct starpu_task *t, struct starpu_perfmodel_arch *a, unsigned i) +{ + (void) t; + (void) a; + return (double)i; +} + +int main(void) +{ + int ret; + + ret = starpu_init(NULL); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + starpu_perfmodel_init(&my_model); + starpu_perfmodel_set_per_devices_cost_function(&my_model, 0, cuda_cost_function, STARPU_CUDA_WORKER, 0, 1, -1); + + ret = starpu_task_insert(&my_codelet, 0); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + starpu_task_wait_for_all(); + starpu_shutdown(); + + return EXIT_SUCCESS; +} diff --git a/tests/perfmodels/non_linear_regression_based.c b/tests/perfmodels/non_linear_regression_based.c new file mode 100644 index 0000000..9b007d9 --- /dev/null +++ b/tests/perfmodels/non_linear_regression_based.c @@ -0,0 +1,145 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../helper.h" + +/* + * Benchmark memset with a non-linear regression + */ + +#define START_LOG 0 +#ifdef STARPU_QUICK_CHECK +#define END_LOG 20 +#else +#define END_LOG 25 +#endif + +#ifdef STARPU_USE_CUDA +static void memset_cuda(void *descr[], void *arg) +{ + (void)arg; + STARPU_SKIP_IF_VALGRIND; + + int *ptr = (int *)STARPU_VECTOR_GET_PTR(descr[0]); + unsigned n = STARPU_VECTOR_GET_NX(descr[0]); + + cudaMemsetAsync(ptr, 42, n * sizeof(*ptr), starpu_cuda_get_local_stream()); +} +#endif + +void memset_cpu(void *descr[], void *arg) +{ + (void)arg; + STARPU_SKIP_IF_VALGRIND; + + int *ptr = (int *)STARPU_VECTOR_GET_PTR(descr[0]); + unsigned n = STARPU_VECTOR_GET_NX(descr[0]); + + memset(ptr, 42, n * sizeof(*ptr)); +} + +static struct starpu_perfmodel model = +{ + .type = STARPU_NL_REGRESSION_BASED, + .symbol = "non_linear_memset_regression_based" +}; + +#ifdef STARPU_USE_OPENCL +extern void memset_opencl(void *buffers[], void *args); +#endif + +static struct starpu_codelet memset_cl = +{ +#ifdef STARPU_USE_CUDA + .cuda_funcs = {memset_cuda}, + .cuda_flags = {STARPU_CUDA_ASYNC}, +#endif +#ifdef STARPU_USE_OPENCL + .opencl_funcs = {memset_opencl}, + .opencl_flags = {STARPU_OPENCL_ASYNC}, +#endif + .cpu_funcs = {memset_cpu}, + .cpu_funcs_name = {"memset_cpu"}, + .model = &model, + .nbuffers = 1, + .modes = {STARPU_W} +}; + +static void test_memset(int nelems) +{ + starpu_data_handle_t handle; + + starpu_vector_data_register(&handle, -1, (uintptr_t)NULL, nelems, sizeof(int)); + + int nloops = 200; + int loop; + for (loop = 0; loop < nloops; loop++) + { + struct starpu_task *task = starpu_task_create(); + + task->cl = &memset_cl; + task->handles[0] = handle; + + int ret = starpu_task_submit(task); + if (ret == -ENODEV) + exit(STARPU_TEST_SKIPPED); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + starpu_data_unregister(handle); +} + +#ifdef STARPU_USE_OPENCL +struct starpu_opencl_program opencl_program; +#endif + +int main(int argc, char **argv) +{ + int ret; + + struct starpu_conf conf; + starpu_conf_init(&conf); + + conf.sched_policy_name = "eager"; + conf.calibrate = 2; + + ret = starpu_initialize(&conf, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + +#ifdef STARPU_USE_OPENCL + ret = starpu_opencl_load_opencl_from_file("tests/perfmodels/opencl_memset_kernel.cl", + &opencl_program, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); +#endif + + int slog; + for (slog = START_LOG; slog < END_LOG; slog++) + { + int size = 1 << slog; + test_memset(size); + } + +#ifdef STARPU_USE_OPENCL + ret = starpu_opencl_unload_opencl(&opencl_program); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl"); +#endif + + starpu_shutdown(); + + return EXIT_SUCCESS; +} diff --git a/tests/perfmodels/opencl_memset.c b/tests/perfmodels/opencl_memset.c new file mode 100644 index 0000000..52674b5 --- /dev/null +++ b/tests/perfmodels/opencl_memset.c @@ -0,0 +1,81 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +/* + * Enqueue an OpenCL kernel which just does a memset + */ + +extern struct starpu_opencl_program opencl_program; + +void _memset_opencl(void *buffers[], void *args, const char *name) +{ + (void) args; + int id, devid; + cl_int err; + cl_kernel kernel; + cl_command_queue queue; + + unsigned n = STARPU_VECTOR_GET_NX(buffers[0]); + cl_mem val = (cl_mem)STARPU_VECTOR_GET_DEV_HANDLE(buffers[0]); + + id = starpu_worker_get_id_check(); + devid = starpu_worker_get_devid(id); + + err = starpu_opencl_load_kernel(&kernel, &queue, &opencl_program, name, devid); + if (err != CL_SUCCESS) + STARPU_OPENCL_REPORT_ERROR(err); + + err = clSetKernelArg(kernel, 0, sizeof(val), &val); + err|= clSetKernelArg(kernel, 1, sizeof(n), &n); + if (err) + STARPU_OPENCL_REPORT_ERROR(err); + + { + size_t global=n; + size_t local; + size_t s; + cl_device_id device; + + starpu_opencl_get_device(devid, &device); + + err = clGetKernelWorkGroupInfo (kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, &s); + if (err != CL_SUCCESS) + STARPU_OPENCL_REPORT_ERROR(err); + if (local > global) + local=global; + else + global = (global + local-1) / local * local; + + err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, NULL); + if (err != CL_SUCCESS) + STARPU_OPENCL_REPORT_ERROR(err); + } + starpu_opencl_release_kernel(kernel); +} + +void memset_opencl(void *buffers[], void *args, const char *kernel) +{ + (void)kernel; + _memset_opencl(buffers, args, "_memset_opencl"); +} + +void memset0_opencl(void *buffers[], void *args, const char *kernel) +{ + (void)kernel; + _memset_opencl(buffers, args, "_memset0_opencl"); +} diff --git a/tests/perfmodels/opencl_memset_kernel.cl b/tests/perfmodels/opencl_memset_kernel.cl new file mode 100644 index 0000000..4202ea3 --- /dev/null +++ b/tests/perfmodels/opencl_memset_kernel.cl @@ -0,0 +1,29 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + + __kernel void _memset0_opencl(__global int *val, int nx) +{ + const int i = get_global_id(0); + if (i < nx) + val[0] += i; +} + +__kernel void _memset_opencl(__global int *val, int nx) +{ + const int i = get_global_id(0); + if (i < nx) + val[i] = 42; +} diff --git a/tests/perfmodels/path.c b/tests/perfmodels/path.c new file mode 100644 index 0000000..c0ff6f5 --- /dev/null +++ b/tests/perfmodels/path.c @@ -0,0 +1,181 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2022-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../helper.h" +#include + +#if !defined(STARPU_HAVE_UNSETENV) || !defined(STARPU_HAVE_SETENV) +#warning unsetenv or setenv are not defined. Skipping test +int main(void) +{ + return STARPU_TEST_SKIPPED; +} +#else + +void *_set_sampling_dir(char *sampling_dir, size_t s) +{ + char *tpath = starpu_getenv("TMPDIR"); + if (!tpath) + tpath = starpu_getenv("TEMP"); + if (!tpath) + tpath = starpu_getenv("TMP"); + if (!tpath) + tpath = "/tmp"; + snprintf(sampling_dir, s, "%s/starpu_sampling_XXXXXX", tpath); + return _starpu_mkdtemp(sampling_dir); +} + +void randomstring(char *name, int nb) +{ + int n; + static char charset[] = "abcdefghijklmnopqrstuvwxyz"; + + for(n = 0 ;n < nb-1 ; n++) + { + int key = starpu_lrand48() % (int)(sizeof(charset) -1); + name[n] = charset[key]; + } + name[nb-1]='\0'; +} + +int do_test(const char *test_name, const char *bus_dir, const char *codelet_dir, const char *model_name) +{ + int ret; + char hostname[10]; + struct starpu_conf conf; + + FPRINTF(stderr, "\nTesting %s with <%s> and <%s>\n", test_name, bus_dir, codelet_dir); + + starpu_srand48((long int)time(NULL)); + randomstring(hostname, 10); + setenv("STARPU_HOSTNAME", hostname, 1); + + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + conf.ncpus = -1; + conf.nmpi_ms = -1; + conf.ntcpip_ms = -1; + + ret = starpu_init(&conf); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + { + char filename[1024]; + struct stat statbuf; + snprintf(filename, 1024, "%s/bus/%s.config", bus_dir, hostname); + ret = stat(filename, &statbuf); + if (ret != 0) + { + FPRINTF(stderr, "Performance model file <%s> for bus benchmarking is not available\n", filename); + starpu_shutdown(); + return 1; + } + else + { + FPRINTF(stderr, "Performance model file <%s> for bus benchmarking is valid\n", filename); + } + } + + // create performance model file for codelet + char _codelet_dir[512]; + snprintf(_codelet_dir, 512, "%s/codelets/%d", codelet_dir, _STARPU_PERFMODEL_VERSION); + _starpu_mkpath_and_check(_codelet_dir, S_IRWXU); + char codelet_model[1024]; + snprintf(codelet_model, 1024, "%s/%s.%s", _codelet_dir, model_name, hostname); + FILE *output = fopen(codelet_model, "w"); + if (output == NULL) + { + FPRINTF(stderr, "Cannot create performance model file <%s> for codelet <%s>\n", codelet_model, model_name); + starpu_shutdown(); + return 1; + } + + fprintf(output, "##################\n"); + fprintf(output, "# Performance Model Version\n"); + fprintf(output, "45\n"); + fclose(output); + + char path[256]; + starpu_perfmodel_get_model_path(model_name, path, 256); + if (strlen(path) == 0) + { + FPRINTF(stderr, "Performance model file <%s> for codelet <%s> is not available\n", path, model_name); + starpu_shutdown(); + return 1; + } + else + { + if (strcmp(path, codelet_model) != 0) + { + FPRINTF(stderr, "Performance model file <%s> for codelet <%s> is not at expected location <%s>\n", path, model_name, codelet_model); + starpu_shutdown(); + return 1; + } + } + + FPRINTF(stderr, "Performance model file <%s> for codelet <%s> is valid\n", path, model_name); + starpu_shutdown(); + return 0; +} + +int main(void) +{ + char sampling_dir[256]; + int global_ret = 0; + int ret; + + unsetenv("STARPU_PERF_MODEL_DIR"); + unsetenv("STARPU_PERF_MODEL_PATH"); + + _set_sampling_dir(sampling_dir, sizeof(sampling_dir)); + + { + char perf_model_dir[512]; + snprintf(perf_model_dir, 512, "%s/sampling", sampling_dir); + setenv("STARPU_PERF_MODEL_DIR", perf_model_dir, 1); + + ret = do_test("STARPU_PERF_MODEL_DIR", perf_model_dir, perf_model_dir, "mymodel"); + if (ret == STARPU_TEST_SKIPPED) return ret; + global_ret += ret; + unsetenv("STARPU_PERF_MODEL_DIR"); + } + + char starpu_home[512]; + + { + snprintf(starpu_home, 512, "%s/.starpu/sampling", sampling_dir); + setenv("STARPU_HOME", sampling_dir, 1); + + ret = do_test("STARPU_HOME", starpu_home, starpu_home, "mymodel"); + if (ret == STARPU_TEST_SKIPPED) return ret; + global_ret += ret; + } + + { + char perf_model_path[512]; + snprintf(perf_model_path, 512, "%s/sampling", sampling_dir); + setenv("STARPU_PERF_MODEL_PATH", perf_model_path, 1); + + ret = do_test("STARPU_PERF_MODEL_PATH", starpu_home, perf_model_path, "mymodel2"); + if (ret == STARPU_TEST_SKIPPED) return ret; + global_ret += ret; + } + + return global_ret; +} +#endif diff --git a/tests/perfmodels/regression_based_check.c b/tests/perfmodels/regression_based_check.c new file mode 100644 index 0000000..f76975f --- /dev/null +++ b/tests/perfmodels/regression_based_check.c @@ -0,0 +1,271 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2011-2011 Télécom Sud Paris + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include "../helper.h" + +/* + * - Calibrate the linear model only for large sizes: STARTline 1048576 + * - Separate the test_memset loop in two loops: + * - linear: start from 1048576 + * - non-linear: keep start at 1024 + */ + +#define STARTlin 131072 +#define START 1024 +#ifdef STARPU_QUICK_CHECK +#define END 1048576 +#else +#define END 16777216 +#endif + + +void memset_cpu(void *descr[], void *arg) +{ + (void)arg; + STARPU_SKIP_IF_VALGRIND; + + unsigned *ptr = (unsigned *)STARPU_VECTOR_GET_PTR(descr[0]); + unsigned n = STARPU_VECTOR_GET_NX(descr[0]); + unsigned i; + + starpu_usleep(1000); + + for (i=0; icl = codelet; + task->handles[0] = handle; + + int ret = starpu_task_submit(task); + if (ret == -ENODEV) + exit(STARPU_TEST_SKIPPED); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + starpu_do_schedule(); + starpu_data_unregister(handle); +} + +static void compare_performance(int size, struct starpu_codelet *codelet, struct starpu_task *compar_task) +{ + unsigned i; + unsigned niter = 100; + starpu_data_handle_t handle; + + starpu_vector_data_register(&handle, -1, (uintptr_t)NULL, size, sizeof(int)); + + struct starpu_task *tasks[niter]; + + for (i = 0; i < niter; i++) + { + struct starpu_task *task = starpu_task_create(); + + task->cl = codelet; + task->handles[0] = handle; + + task->synchronous = 1; + + /* We will destroy the task structure by hand so that we can + * query the profiling info before the task is destroyed. */ + task->destroy = 0; + + tasks[i] = task; + + int ret = starpu_task_submit(task); + + if (STARPU_UNLIKELY(ret == -ENODEV)) + { + FPRINTF(stderr, "No worker may execute this task\n"); + exit(0); + } + } + + starpu_data_unregister(handle); + + starpu_task_wait_for_all(); + + double length_sum = 0.0; + + for (i = 0; i < niter; i++) + { + struct starpu_task *task = tasks[i]; + struct starpu_profiling_task_info *info = task->profiling_info; + + + /* How long was the task execution ? */ + length_sum += starpu_timing_timespec_delay_us(&info->start_time, &info->end_time); + + /* We don't need the task structure anymore */ + starpu_task_destroy(task); + } + + + /* Display the occupancy of all workers during the test */ + unsigned worker; + for (worker = 0; worker < starpu_worker_get_count(); worker++) + { + struct starpu_profiling_worker_info worker_info; + int ret = starpu_profiling_worker_get_info(worker, &worker_info); + STARPU_ASSERT(!ret); + + char workername[128]; + starpu_worker_get_name(worker, workername, sizeof(workername)); + unsigned nimpl; + + + if (starpu_worker_get_type(worker)==STARPU_CPU_WORKER) + { + FPRINTF(stdout, "\n Worker :%s ::::::::::\n\n", workername); + + for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++) + { + + FPRINTF(stdout, "Expected time for %d on %s (impl %u): %f, Measured time: %f\n", + size, workername, nimpl,starpu_task_expected_length(compar_task, starpu_worker_get_perf_archtype(worker, compar_task->sched_ctx), nimpl), ((length_sum)/niter)); + + } + } + } + + +} + + +int main(int argc, char **argv) +{ + /* Enable profiling */ + starpu_profiling_status_set(STARPU_PROFILING_ENABLE); + + struct starpu_conf conf; + starpu_data_handle_t handle; + int ret; + + starpu_conf_init(&conf); + + conf.sched_policy_name = "eager"; + conf.calibrate = 2; + + ret = starpu_initialize(&conf, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + int size; + for (size = STARTlin; size < END; size *= 2) + { + /* Use a linear regression */ + test_memset(size, &memset_cl); + } + + for (size = START; size < END; size *= 2) + { + /* Use a non-linear regression */ + test_memset(size, &nl_memset_cl); + } + + ret = starpu_task_wait_for_all(); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); + + starpu_shutdown(); + + + /* Test Phase */ + starpu_conf_init(&conf); + + conf.sched_policy_name = "eager"; + conf.calibrate = 0; + + ret = starpu_initialize(&conf, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + /* Now create a dummy task just to estimate its duration according to the regression */ + + size = 1234567; + + starpu_vector_data_register(&handle, -1, (uintptr_t)NULL, size, sizeof(int)); + + struct starpu_task *task = starpu_task_create(); + task->cl = &memset_cl; + task->handles[0] = handle; + task->destroy = 0; + + FPRINTF(stdout, "\n ////linear regression results////\n"); + compare_performance(size, &memset_cl, task); + + task->cl = &nl_memset_cl; + + FPRINTF(stdout, "\n ////non linear regression results////\n"); + + compare_performance(size, &nl_memset_cl, task); + + starpu_task_destroy(task); + + starpu_data_unregister(handle); + + starpu_shutdown(); + + return EXIT_SUCCESS; +} diff --git a/tests/perfmodels/regression_based_energy.c b/tests/perfmodels/regression_based_energy.c new file mode 100644 index 0000000..906d97a --- /dev/null +++ b/tests/perfmodels/regression_based_energy.c @@ -0,0 +1,301 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2011-2011 Télécom Sud Paris + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include "../helper.h" + +/* + * A multi-implementation benchmark with dmda scheduler + * we aim to test the energy model with the different size of gamma + * for large size of gamma, dmda choose the second implementation which consumes less energy + * otherwise, it choose the first implementation which minimizes the execution time + */ + +#define STARTlin 131072 +#define START 1024 +#ifdef STARPU_QUICK_CHECK +#define END 1048576 +#else +#define END 16777216 +#endif + +// first implementation with an initial delay (100 us) +void memset0_cpu(void *descr[], void *arg) +{ + (void)arg; + STARPU_SKIP_IF_VALGRIND; + + unsigned *ptr = (unsigned *)STARPU_VECTOR_GET_PTR(descr[0]); + unsigned n = STARPU_VECTOR_GET_NX(descr[0]); + unsigned i; + + starpu_usleep(100); + + for (i=0; icl = codelet; + task->handles[0] = handle; + + int ret = starpu_task_submit(task); + if (ret == -ENODEV) + exit(STARPU_TEST_SKIPPED); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + starpu_do_schedule(); + starpu_data_unregister(handle); +} + +static void compare_performance(int size, struct starpu_codelet *codelet, struct starpu_task *compar_task) +{ + unsigned i; + unsigned niter = 100; + starpu_data_handle_t handle; + + starpu_vector_data_register(&handle, -1, (uintptr_t)NULL, size, sizeof(int)); + + struct starpu_task *tasks[niter]; + + for (i = 0; i < niter; i++) + { + struct starpu_task *task = starpu_task_create(); + + task->cl = codelet; + task->handles[0] = handle; + + task->synchronous = 1; + + /* We will destroy the task structure by hand so that we can + * query the profiling info before the task is destroyed. */ + task->destroy = 0; + + tasks[i] = task; + + int ret = starpu_task_submit(task); + + if (STARPU_UNLIKELY(ret == -ENODEV)) + { + FPRINTF(stderr, "No worker may execute this task\n"); + exit(0); + } + } + + starpu_data_unregister(handle); + + starpu_task_wait_for_all(); + + double length_sum = 0.0; + + for (i = 0; i < niter; i++) + { + struct starpu_task *task = tasks[i]; + struct starpu_profiling_task_info *info = task->profiling_info; + + + /* How long was the task execution ? */ + length_sum += starpu_timing_timespec_delay_us(&info->start_time, &info->end_time); + + /* We don't need the task structure anymore */ + starpu_task_destroy(task); + } + + + /* Display the occupancy of all workers during the test */ + unsigned worker; + for (worker = 0; worker < starpu_worker_get_count(); worker++) + { + struct starpu_profiling_worker_info worker_info; + int ret = starpu_profiling_worker_get_info(worker, &worker_info); + STARPU_ASSERT(!ret); + + char workername[128]; + starpu_worker_get_name(worker, workername, sizeof(workername)); + unsigned nimpl; + + if (starpu_worker_get_type(worker)==STARPU_CPU_WORKER) + { + FPRINTF(stdout, "\n Worker :%s ::::::::::\n\n", workername); + + for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++) + { + + FPRINTF(stdout, "Expected time for %d on %s (impl %u): %f, Measured time: %f, Expected energy: %f\n", + size, workername, nimpl,starpu_task_expected_length(compar_task, starpu_worker_get_perf_archtype(worker, compar_task->sched_ctx), nimpl), ((length_sum)/niter), + starpu_task_expected_energy(compar_task, starpu_worker_get_perf_archtype(worker, compar_task->sched_ctx), nimpl)); + } + } + } + + +} + + +int main(int argc, char **argv) +{ + /* Enable profiling */ + starpu_profiling_status_set(STARPU_PROFILING_ENABLE); + + struct starpu_conf conf; + starpu_data_handle_t handle; + int ret; + + starpu_conf_init(&conf); + + conf.sched_policy_name = "dmda"; + conf.calibrate = 2; + + ret = starpu_initialize(&conf, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + int size; + for (size = STARTlin; size < END; size *= 2) + { + /* Use a linear regression */ + test_memset(size, &memset_cl); + } + + for (size = START; size < END; size *= 2) + { + /* Use a non-linear regression */ + test_memset(size, &nl_memset_cl); + } + + ret = starpu_task_wait_for_all(); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); + + starpu_shutdown(); + + + /* Test Phase */ + starpu_conf_init(&conf); + + conf.sched_policy_name = "dmda"; + conf.calibrate = 0; + + ret = starpu_initialize(&conf, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + /* Now create a dummy task just to estimate its duration according to the regression */ + + size = 1234567; + + starpu_vector_data_register(&handle, -1, (uintptr_t)NULL, size, sizeof(int)); + + struct starpu_task *task = starpu_task_create(); + task->handles[0] = handle; + task->destroy = 0; + + FPRINTF(stdout, "\n ////non linear regression results////\n"); + task->cl = &nl_memset_cl; + compare_performance(size, &nl_memset_cl, task); + + starpu_task_destroy(task); + + starpu_data_unregister(handle); + + starpu_shutdown(); + + return EXIT_SUCCESS; +} diff --git a/tests/perfmodels/regression_based_gpu.c b/tests/perfmodels/regression_based_gpu.c new file mode 100644 index 0000000..a235651 --- /dev/null +++ b/tests/perfmodels/regression_based_gpu.c @@ -0,0 +1,399 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2011-2011 Télécom Sud Paris + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include "../helper.h" + +/* + * A multi-implementation benchmark with dmda scheduler + * we aim to test OPENCL workers and calculate the estimated time for each type of worker (CPU or OPENCL or CUDA) + * dmda choose OPENCL workers for lage size (variable size of compare_performance) size=1234567 + * dmda choose CPU workers for small size (size=1234) + */ + +#define STARTlin 131072 +#define START 1024 +#ifdef STARPU_QUICK_CHECK +#define END 1048576 +#else +#define END 16777216 +#endif + +#ifdef STARPU_USE_CUDA +static void memset_cuda(void *descr[], void *arg) +{ + (void)arg; + STARPU_SKIP_IF_VALGRIND; + + unsigned *ptr = (unsigned *)STARPU_VECTOR_GET_PTR(descr[0]); + unsigned n = STARPU_VECTOR_GET_NX(descr[0]); + + cudaMemsetAsync(ptr, 42, n * sizeof(*ptr), starpu_cuda_get_local_stream()); +} +#endif + +#ifdef STARPU_USE_OPENCL +extern void memset0_opencl(void *buffers[], void *args); +extern void memset_opencl(void *buffers[], void *args); +#endif + +void memset0_cpu(void *descr[], void *arg) +{ + (void)arg; + STARPU_SKIP_IF_VALGRIND; + + unsigned *ptr = (unsigned *)STARPU_VECTOR_GET_PTR(descr[0]); + unsigned n = STARPU_VECTOR_GET_NX(descr[0]); + unsigned i; + + //starpu_usleep(100); + + for (i = 0; i < n; i++) + + ptr[0] += i; +} + +void memset_cpu(void *descr[], void *arg) +{ + (void)arg; + STARPU_SKIP_IF_VALGRIND; + + unsigned *ptr = (unsigned *)STARPU_VECTOR_GET_PTR(descr[0]); + unsigned n = STARPU_VECTOR_GET_NX(descr[0]); + + //starpu_usleep(10); + memset(ptr, 42, n * sizeof(*ptr)); +} + +static struct starpu_perfmodel model = +{ + .type = STARPU_REGRESSION_BASED, + .symbol = "memset_regression_based" +}; + +static struct starpu_perfmodel nl_model = +{ + .type = STARPU_NL_REGRESSION_BASED, + .symbol = "non_linear_memset_regression_based" +}; + +static struct starpu_codelet memset_cl = +{ +#ifdef STARPU_USE_CUDA + .cuda_funcs = {memset_cuda}, + .cuda_flags = {STARPU_CUDA_ASYNC}, +#endif +#ifdef STARPU_USE_OPENCL + .opencl_funcs = {memset0_opencl, memset_opencl}, + .opencl_flags = {STARPU_OPENCL_ASYNC}, +#endif + .cpu_funcs = {memset0_cpu, memset_cpu}, + .cpu_funcs_name = {"memset0_cpu", "memset_cpu"}, + .model = &model, + .nbuffers = 1, + .modes = {STARPU_SCRATCH} +}; + +static struct starpu_codelet nl_memset_cl = +{ +#ifdef STARPU_USE_CUDA + .cuda_funcs = {memset_cuda}, + .cuda_flags = {STARPU_CUDA_ASYNC}, +#endif +#ifdef STARPU_USE_OPENCL + .opencl_funcs = {memset0_opencl, memset_opencl}, + .opencl_flags = {STARPU_OPENCL_ASYNC}, +#endif + .cpu_funcs = {memset0_cpu, memset_cpu}, + .cpu_funcs_name = {"memset0_cpu", "memset_cpu"}, + .model = &nl_model, + .nbuffers = 1, + .modes = {STARPU_SCRATCH} +}; + +static void test_memset(int nelems, struct starpu_codelet *codelet) +{ +#ifdef STARPU_QUICK_CHECK + int nloops = 10; +#else + int nloops = 100; +#endif + int loop; + starpu_data_handle_t handle; + + starpu_vector_data_register(&handle, -1, (uintptr_t)NULL, nelems, sizeof(int)); + for (loop = 0; loop < nloops; loop++) + { + struct starpu_task *task = starpu_task_create(); + + task->cl = codelet; + task->handles[0] = handle; + + int ret = starpu_task_submit(task); + if (ret == -ENODEV) + exit(STARPU_TEST_SKIPPED); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + starpu_do_schedule(); + starpu_data_unregister(handle); +} + +static void compare_performance(int size, struct starpu_codelet *codelet, struct starpu_task *compar_task) +{ + unsigned i; +#ifdef STARPU_QUICK_CHECK + unsigned niter = 10; +#else + unsigned niter = 100; +#endif + starpu_data_handle_t handle; + + starpu_vector_data_register(&handle, -1, (uintptr_t)NULL, size, sizeof(int)); + + struct starpu_task *tasks[niter]; + + for (i = 0; i < niter; i++) + { + struct starpu_task *task = starpu_task_create(); + + task->cl = codelet; + task->handles[0] = handle; + + task->synchronous = 1; + + /* We will destroy the task structure by hand so that we can + * query the profiling info before the task is destroyed. */ + task->destroy = 0; + + tasks[i] = task; + + int ret = starpu_task_submit(task); + + if (STARPU_UNLIKELY(ret == -ENODEV)) + { + FPRINTF(stderr, "No worker may execute this task\n"); + exit(0); + } + } + + starpu_data_unregister(handle); + + starpu_task_wait_for_all(); + + double length_cpu_sum = 0.0; + double length_gpu_sum = 0.0; + + enum starpu_worker_archtype archi; + + for (i = 0; i < niter; i++) + { + struct starpu_task *task = tasks[i]; + struct starpu_profiling_task_info *info = task->profiling_info; + + //archi=starpu_worker_get_type(0); + archi=starpu_worker_get_type(info->workerid); + + switch (archi) + { + case STARPU_CPU_WORKER: + FPRINTF(stdout, "cpuuu\n"); + /* How long was the task execution ? */ + length_cpu_sum += starpu_timing_timespec_delay_us(&info->start_time, &info->end_time); + break; + + case STARPU_OPENCL_WORKER: + + FPRINTF(stdout, "openclllllll\n"); + /* How long was the task execution ? */ + length_gpu_sum += starpu_timing_timespec_delay_us(&info->start_time, &info->end_time); + break; + + case STARPU_CUDA_WORKER: + + FPRINTF(stdout, "cudaaaaaa\n"); + /* How long was the task execution ? */ + length_gpu_sum += starpu_timing_timespec_delay_us(&info->start_time, &info->end_time); + break; + + + default: + FPRINTF(stdout, "unsupported!\n"); + break; + } + + /* We don't need the task structure anymore */ + starpu_task_destroy(task); + + } + + unsigned worker; + + /* Display the occupancy of all workers during the test */ + unsigned ncpus = starpu_cpu_worker_get_count(); + unsigned ngpus = starpu_opencl_worker_get_count()+starpu_cuda_worker_get_count(); + //unsigned ncpu= starpu_worker_get_count_by_type(STARPU_CPU_WORKER); + + FPRINTF(stderr, "ncpus %u \n", ncpus); + FPRINTF(stderr, "ngpus %u \n", ngpus); + for (worker= 0; worker< starpu_worker_get_count(); worker++) + { + + struct starpu_profiling_worker_info worker_info; + int ret = starpu_profiling_worker_get_info(worker, &worker_info); + STARPU_ASSERT(!ret); + + char workername[128]; + starpu_worker_get_name(worker, workername, sizeof(workername)); + unsigned nimpl; + + FPRINTF(stdout, "\n Worker :%s ::::::::::\n\n", workername); + + for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++) + { + switch (starpu_worker_get_type(worker)) + + { + case STARPU_CPU_WORKER: + + FPRINTF(stdout, "Expected time for %d on %s (impl %u): %f, Measured time: %f \n", + size, workername, nimpl,starpu_task_expected_length(compar_task, starpu_worker_get_perf_archtype(worker, compar_task->sched_ctx), nimpl), ((length_cpu_sum)/niter)); + + break; + + case STARPU_OPENCL_WORKER: + + FPRINTF(stdout, "Expectedd time for %d on %s (impl %u): %f, Measuredd time: %f \n", + size, workername, nimpl,starpu_task_expected_length(compar_task, starpu_worker_get_perf_archtype(worker, compar_task->sched_ctx), nimpl), ((length_gpu_sum)/niter)); + + break; + + case STARPU_CUDA_WORKER: + + FPRINTF(stdout, "Expectedd time for %d on %s (impl %u): %f, Measuredd time: %f \n", + size, workername, nimpl,starpu_task_expected_length(compar_task, starpu_worker_get_perf_archtype(worker, compar_task->sched_ctx), nimpl), ((length_gpu_sum)/niter)); + + break; + + default: + FPRINTF(stdout, "unsupported!\n"); + break; + } + } + } + + +} + +#ifdef STARPU_USE_OPENCL +struct starpu_opencl_program opencl_program; +#endif + +int main(int argc, char **argv) +{ + /* Enable profiling */ + starpu_profiling_status_set(STARPU_PROFILING_ENABLE); + + struct starpu_conf conf; + starpu_data_handle_t handle; + int ret; + + starpu_conf_init(&conf); + + conf.sched_policy_name = "dmda"; + conf.calibrate = 2; + + ret = starpu_initialize(&conf, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + +#ifdef STARPU_USE_OPENCL + ret = starpu_opencl_load_opencl_from_file("tests/perfmodels/opencl_memset_kernel.cl", + &opencl_program, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); +#endif + + int size; + for (size = STARTlin; size < END; size *= 2) + { + /* Use a linear regression */ + test_memset(size, &memset_cl); + } + + for (size = START*1.5; size < END; size *= 2) + { + /* Use a non-linear regression */ + test_memset(size, &nl_memset_cl); + } + + ret = starpu_task_wait_for_all(); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); + +#ifdef STARPU_USE_OPENCL + ret = starpu_opencl_unload_opencl(&opencl_program); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl"); +#endif + starpu_shutdown(); + + + /* Test Phase */ + starpu_conf_init(&conf); + + conf.sched_policy_name = "dmda"; + conf.calibrate = 0; + + ret = starpu_initialize(&conf, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + +#ifdef STARPU_USE_OPENCL + ret = starpu_opencl_load_opencl_from_file("tests/perfmodels/opencl_memset_kernel.cl", + &opencl_program, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); +#endif + + /* Now create a dummy task just to estimate its duration according to the regression */ + + size = 1234567; + + starpu_vector_data_register(&handle, -1, (uintptr_t)NULL, size, sizeof(int)); + + struct starpu_task *task = starpu_task_create(); + task->handles[0] = handle; + task->destroy = 0; + + //FPRINTF(stdout, "\n ////linear regression results////\n"); + //task->cl = &memset_cl; + //compare_performance(size, &memset_cl, task); + + FPRINTF(stdout, "\n ////non linear regression results////\n"); + task->cl = &nl_memset_cl; + compare_performance(size, &nl_memset_cl, task); + + starpu_task_destroy(task); + + starpu_data_unregister(handle); + +#ifdef STARPU_USE_OPENCL + ret = starpu_opencl_unload_opencl(&opencl_program); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl"); +#endif + starpu_shutdown(); + + return EXIT_SUCCESS; +} diff --git a/tests/perfmodels/regression_based_memset.c b/tests/perfmodels/regression_based_memset.c new file mode 100644 index 0000000..16875f7 --- /dev/null +++ b/tests/perfmodels/regression_based_memset.c @@ -0,0 +1,380 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2011-2011 Télécom Sud Paris + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include "../helper.h" + +#define ERROR_RETURN(retval) { fprintf(stderr, "Error %d %s:line %d: \n", retval,__FILE__,__LINE__); return(retval); } + +/* + * Benchmark memset with a linear and non-linear regression + */ + +#define STARTlin 1024 +#define START 1024 +#ifdef STARPU_QUICK_CHECK +#define END 1048576 +#define NENERGY 3 +#else +#define END 16777216 +#define NENERGY 100 +#endif + +#ifdef STARPU_USE_CUDA +static void memset_cuda(void *descr[], void *arg) +{ + (void)arg; + STARPU_SKIP_IF_VALGRIND; + + unsigned *ptr = (unsigned *)STARPU_VECTOR_GET_PTR(descr[0]); + unsigned n = STARPU_VECTOR_GET_NX(descr[0]); + + cudaMemsetAsync(ptr, 42, n * sizeof(*ptr), starpu_cuda_get_local_stream()); +} +#endif + +#ifdef STARPU_USE_OPENCL +extern void memset_opencl(void *buffers[], void *args); +#endif + +void memset0_cpu(void *descr[], void *arg) +{ + (void)arg; + STARPU_SKIP_IF_VALGRIND; + + unsigned *ptr = (unsigned *)STARPU_VECTOR_GET_PTR(descr[0]); + unsigned n = STARPU_VECTOR_GET_NX(descr[0]); + unsigned i; + + for (i = 0; i < n; i++) + ptr[i] = 42; +} + +void memset_cpu(void *descr[], void *arg) +{ + (void)arg; + STARPU_SKIP_IF_VALGRIND; + + unsigned *ptr = (unsigned *)STARPU_VECTOR_GET_PTR(descr[0]); + unsigned n = STARPU_VECTOR_GET_NX(descr[0]); + + starpu_usleep(10); + memset(ptr, 42, n * sizeof(*ptr)); +} + +static struct starpu_perfmodel model = +{ + .type = STARPU_REGRESSION_BASED, + .symbol = "memset_regression_based" +}; + +static struct starpu_perfmodel nl_model = +{ + .type = STARPU_NL_REGRESSION_BASED, + .symbol = "non_linear_memset_regression_based" +}; + +static struct starpu_perfmodel energy_model = +{ + .type = STARPU_REGRESSION_BASED, + .symbol = "memset_regression_based_energy" +}; + +static struct starpu_perfmodel nl_energy_model = +{ + .type = STARPU_NL_REGRESSION_BASED, + .symbol = "non_linear_memset_regression_based_energy" +}; + +static struct starpu_codelet memset_cl = +{ +#ifdef STARPU_USE_CUDA + .cuda_funcs = {memset_cuda}, + .cuda_flags = {STARPU_CUDA_ASYNC}, +#endif +#ifdef STARPU_USE_OPENCL + .opencl_funcs = {memset_opencl}, + .opencl_flags = {STARPU_OPENCL_ASYNC}, +#endif + .cpu_funcs = {memset0_cpu, memset_cpu}, + .cpu_funcs_name = {"memset0_cpu", "memset_cpu"}, + .model = &model, + .energy_model = &energy_model, + .nbuffers = 1, + .modes = {STARPU_SCRATCH} +}; + +static struct starpu_codelet nl_memset_cl = +{ +#ifdef STARPU_USE_CUDA + .cuda_funcs = {memset_cuda}, + .cuda_flags = {STARPU_CUDA_ASYNC}, +#endif +#ifdef STARPU_USE_OPENCL + .opencl_funcs = {memset_opencl}, + .opencl_flags = {STARPU_OPENCL_ASYNC}, +#endif + .cpu_funcs = {memset0_cpu, memset_cpu}, + .cpu_funcs_name = {"memset0_cpu", "memset_cpu"}, + .model = &nl_model, + .energy_model = &nl_energy_model, + .nbuffers = 1, + .modes = {STARPU_SCRATCH} +}; + +static void test_memset(int nelems, struct starpu_codelet *codelet) +{ + int nloops = 100; + int loop; + starpu_data_handle_t handle; + + starpu_vector_data_register(&handle, -1, (uintptr_t)NULL, nelems, sizeof(int)); + for (loop = 0; loop < nloops; loop++) + { + struct starpu_task *task = starpu_task_create(); + + task->cl = codelet; + task->handles[0] = handle; + + int ret = starpu_task_submit(task); + if (ret == -ENODEV) + exit(STARPU_TEST_SKIPPED); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + starpu_do_schedule(); + starpu_data_unregister(handle); +} + +static int test_memset_energy(int nelems, int workerid, int where, enum starpu_worker_archtype archtype, int impl, struct starpu_codelet *codelet) +{ + (void)impl; + int nloops; + int loop; + starpu_data_handle_t handle; + + nloops = NENERGY; + if (workerid == -1) + nloops *= starpu_worker_get_count_by_type(archtype); + + starpu_vector_data_register(&handle, -1, (uintptr_t)NULL, nelems, sizeof(int)); + + for (loop = 0; loop < nloops; loop++) + { + struct starpu_task *task = starpu_task_create(); + + task->cl = codelet; + task->where = where; + task->handles[0] = handle; + task->flops = nelems; + if (workerid != -1) + { + task->execute_on_a_specific_worker = 1; + task->workerid = workerid; + } + + int ret = starpu_task_submit(task); + if (ret == -ENODEV) + exit(STARPU_TEST_SKIPPED); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + starpu_do_schedule(); + starpu_data_unregister(handle); + + return nloops; +} + +static int bench_energy(int workerid, int where, enum starpu_worker_archtype archtype, int impl, struct starpu_codelet *codelet) +{ + int size; + int retval; + int ntasks; + + for (size = STARTlin; size < END; size *= 2) + { + starpu_data_handle_t handle; + starpu_vector_data_register(&handle, -1, (uintptr_t)NULL, size, sizeof(int)); + + if ((retval = starpu_energy_start(workerid, archtype)) != 0) + { + starpu_data_unregister(handle); + _STARPU_DISP("Energy measurement not supported for archtype %s\n", starpu_perfmodel_get_archtype_name(archtype)); + return -1; + } + + /* Use a linear regression */ + ntasks = test_memset_energy(size, workerid, where, archtype, impl, codelet); + + struct starpu_task *task = starpu_task_create(); + task->cl = codelet; + task->handles[0] = handle; + task->synchronous = 1; + task->destroy = 0; + task->flops = size; + + retval = starpu_energy_stop(codelet->energy_model, task, impl, ntasks, workerid, archtype); + + starpu_task_destroy (task); + starpu_data_unregister(handle); + + if (retval != 0) + ERROR_RETURN(retval); + } + return 0; +} + +static void show_task_perfs(int size, struct starpu_task *task) +{ + unsigned workerid; + for (workerid = 0; workerid < starpu_worker_get_count(); workerid++) + { + char name[32]; + starpu_worker_get_name(workerid, name, sizeof(name)); + + unsigned nimpl; + for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++) + { + FPRINTF(stdout, "Expected time for %d on %s (impl %u):\t%f\n", + size, name, nimpl, starpu_task_expected_length(task, starpu_worker_get_perf_archtype(workerid, task->sched_ctx), nimpl)); + } + } +} + +#ifdef STARPU_USE_OPENCL +struct starpu_opencl_program opencl_program; +#endif + +int main(int argc, char **argv) +{ + struct starpu_conf conf; + starpu_data_handle_t handle; + int ret; + unsigned i; + + starpu_conf_init(&conf); + + conf.sched_policy_name = "dmda"; + conf.calibrate = 2; + + ret = starpu_initialize(&conf, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + +#ifdef STARPU_USE_OPENCL + ret = starpu_opencl_load_opencl_from_file("tests/perfmodels/opencl_memset_kernel.cl", + &opencl_program, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); +#endif + + int size; + for (size = STARTlin; size < END; size *= 2) + { + /* Use a linear regression */ + test_memset(size, &memset_cl); + } + + for (size = START; size < END; size *= 2) + { + /* Use a non-linear regression */ + test_memset(size, &nl_memset_cl); + } + + ret = starpu_task_wait_for_all(); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); + + /* Now create a dummy task just to estimate its duration according to the regression */ + + size = 12345; + + starpu_vector_data_register(&handle, -1, (uintptr_t)NULL, size, sizeof(int)); + + struct starpu_task *task = starpu_task_create(); + task->cl = &memset_cl; + task->handles[0] = handle; + task->destroy = 0; + + show_task_perfs(size, task); + + task->cl = &nl_memset_cl; + + show_task_perfs(size, task); + + starpu_task_destroy(task); + + starpu_data_unregister(handle); + +#ifdef STARPU_USE_OPENCL + ret = starpu_opencl_unload_opencl(&opencl_program); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl"); +#endif + starpu_shutdown(); + + starpu_conf_init(&conf); + + /* Use a scheduler which doesn't choose the implementation */ +#ifdef STARPU_HAVE_UNSETENV + unsetenv("STARPU_SCHED"); +#endif + conf.sched_policy_name = "eager"; + conf.calibrate = 1; + + ret = starpu_initialize(&conf, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + +#ifdef STARPU_USE_OPENCL + ret = starpu_opencl_load_opencl_from_file("tests/perfmodels/opencl_memset_kernel.cl", + &opencl_program, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); +#endif + + if (starpu_cpu_worker_get_count() > 0) + { + memset_cl.cpu_funcs[1] = NULL; + bench_energy(-1, STARPU_CPU, STARPU_CPU_WORKER, 0, &memset_cl); +#ifdef STARPU_HAVE_UNSETENV + memset_cl.cpu_funcs[1] = memset_cpu; + memset_cl.cpu_funcs[0] = NULL; + bench_energy(-1, STARPU_CPU, STARPU_CPU_WORKER, 1, &memset_cl); +#endif + + nl_memset_cl.cpu_funcs[1] = NULL; + bench_energy(-1, STARPU_CPU, STARPU_CPU_WORKER, 0, &nl_memset_cl); +#ifdef STARPU_HAVE_UNSETENV + nl_memset_cl.cpu_funcs[1] = memset_cpu; + nl_memset_cl.cpu_funcs[0] = NULL; + bench_energy(-1, STARPU_CPU, STARPU_CPU_WORKER, 1, &nl_memset_cl); +#endif + } + + for (i = 0; i < starpu_cuda_worker_get_count(); i++) + { + int workerid = starpu_worker_get_by_type(STARPU_CUDA_WORKER, i); + bench_energy(workerid, STARPU_CUDA, STARPU_CUDA_WORKER, 0, &memset_cl); + bench_energy(workerid, STARPU_CUDA, STARPU_CUDA_WORKER, 0, &nl_memset_cl); + } + +#ifdef STARPU_USE_OPENCL + ret = starpu_opencl_unload_opencl(&opencl_program); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl"); +#endif + starpu_shutdown(); + + return EXIT_SUCCESS; +} diff --git a/tests/perfmodels/regression_based_multiimpl.c b/tests/perfmodels/regression_based_multiimpl.c new file mode 100644 index 0000000..9a7a83b --- /dev/null +++ b/tests/perfmodels/regression_based_multiimpl.c @@ -0,0 +1,302 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2011-2011 Télécom Sud Paris + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include "../helper.h" + +/* + * A multi-implementation benchmark with dmda scheduler + * we aim to test the dmda behavior when we have two implementations + * dmda choose the implementation which minimises the execution time + */ + +#define STARTlin 131072 +#define START 1024 +#ifdef STARPU_QUICK_CHECK +#define END 1048576 +#else +#define END 16777216 +#endif + +// first implementation with an initial delay (100 us) +void memset0_cpu(void *descr[], void *arg) +{ + (void)arg; + STARPU_SKIP_IF_VALGRIND; + + unsigned *ptr = (unsigned *)STARPU_VECTOR_GET_PTR(descr[0]); + unsigned n = STARPU_VECTOR_GET_NX(descr[0]); + unsigned i; + + starpu_usleep(100); + + for (i=0; icl = codelet; + task->handles[0] = handle; + + int ret = starpu_task_submit(task); + if (ret == -ENODEV) + exit(STARPU_TEST_SKIPPED); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + starpu_do_schedule(); + starpu_data_unregister(handle); +} + +static void compare_performance(int size, struct starpu_codelet *codelet, struct starpu_task *compar_task) +{ + unsigned i; + unsigned niter = 100; + starpu_data_handle_t handle; + + starpu_vector_data_register(&handle, -1, (uintptr_t)NULL, size, sizeof(int)); + + struct starpu_task *tasks[niter]; + + for (i = 0; i < niter; i++) + { + struct starpu_task *task = starpu_task_create(); + + task->cl = codelet; + task->handles[0] = handle; + + task->synchronous = 1; + + /* We will destroy the task structure by hand so that we can + * query the profiling info before the task is destroyed. */ + task->destroy = 0; + + tasks[i] = task; + + int ret = starpu_task_submit(task); + + if (STARPU_UNLIKELY(ret == -ENODEV)) + { + FPRINTF(stderr, "No worker may execute this task\n"); + exit(0); + } + } + + starpu_data_unregister(handle); + + starpu_task_wait_for_all(); + + double length_sum = 0.0; + + for (i = 0; i < niter; i++) + { + struct starpu_task *task = tasks[i]; + struct starpu_profiling_task_info *info = task->profiling_info; + + + /* How long was the task execution ? */ + length_sum += starpu_timing_timespec_delay_us(&info->start_time, &info->end_time); + + /* We don't need the task structure anymore */ + starpu_task_destroy(task); + } + + + /* Display the occupancy of all workers during the test */ + unsigned worker; + for (worker = 0; worker < starpu_worker_get_count(); worker++) + { + struct starpu_profiling_worker_info worker_info; + int ret = starpu_profiling_worker_get_info(worker, &worker_info); + STARPU_ASSERT(!ret); + + char workername[128]; + starpu_worker_get_name(worker, workername, sizeof(workername)); + unsigned nimpl; + + if (starpu_worker_get_type(worker)==STARPU_CPU_WORKER) + { + FPRINTF(stdout, "\n Worker :%s ::::::::::\n\n", workername); + + for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++) + { + + FPRINTF(stdout, "Expected time for %d on %s (impl %u): %f, Measured time: %f, Expected energy: %f\n", + size, workername, nimpl,starpu_task_expected_length(compar_task, starpu_worker_get_perf_archtype(worker, compar_task->sched_ctx), nimpl), ((length_sum)/niter), + starpu_task_expected_energy(compar_task, starpu_worker_get_perf_archtype(worker, compar_task->sched_ctx), nimpl)); + } + } + } + + +} + + +int main(int argc, char **argv) +{ + /* Enable profiling */ + starpu_profiling_status_set(STARPU_PROFILING_ENABLE); + + struct starpu_conf conf; + starpu_data_handle_t handle; + int ret; + + starpu_conf_init(&conf); + + conf.sched_policy_name = "dmda"; + conf.calibrate = 2; + + ret = starpu_initialize(&conf, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + int size; +#if 0 + for (size = STARTlin; size < END; size *= 2) + { + /* Use a linear regression */ + test_memset(size, &memset_cl); + } +#endif + + for (size = START; size < END; size *= 2) + { + /* Use a non-linear regression */ + test_memset(size, &nl_memset_cl); + } + + ret = starpu_task_wait_for_all(); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); + + starpu_shutdown(); + + + /* Test Phase */ + starpu_conf_init(&conf); + + conf.sched_policy_name = "dmda"; + conf.calibrate = 0; + + ret = starpu_initialize(&conf, &argc, &argv); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + /* Now create a dummy task just to estimate its duration according to the regression */ + + size = 1234567; + + starpu_vector_data_register(&handle, -1, (uintptr_t)NULL, size, sizeof(int)); + + struct starpu_task *task = starpu_task_create(); + task->handles[0] = handle; + task->destroy = 0; + + FPRINTF(stdout, "\n ////non linear regression results////\n"); + task->cl = &nl_memset_cl; + compare_performance(size, &nl_memset_cl, task); + + starpu_task_destroy(task); + + starpu_data_unregister(handle); + + starpu_shutdown(); + + return EXIT_SUCCESS; +} diff --git a/tests/perfmodels/user_base.c b/tests/perfmodels/user_base.c new file mode 100644 index 0000000..435f8c8 --- /dev/null +++ b/tests/perfmodels/user_base.c @@ -0,0 +1,136 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Thibaut Lambert + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include "../helper.h" + +/* + * Test using a user-provided base for the perfmodel + */ + +void func(void *descr[], void *arg) +{ + (void)descr; + (void)arg; + starpu_usleep(1000); +} + +size_t get_size_base(struct starpu_task *task, unsigned nimpl) +{ + (void)task; + (void)nimpl; + return 3; +}; + +uint32_t get_footprint(struct starpu_task *task) +{ + uint32_t orig = starpu_task_data_footprint(task); + return starpu_hash_crc32c_be(42, orig); +}; + +static struct starpu_perfmodel rb_model = +{ + .type = STARPU_REGRESSION_BASED, + .symbol = "user_base_valid_model_regression_based", + .size_base = get_size_base, +}; + +static struct starpu_perfmodel nlrb_model = +{ + .type = STARPU_NL_REGRESSION_BASED, + .symbol = "user_base_valid_model_non_linear_regression_based", + .size_base = get_size_base, +}; + +static struct starpu_perfmodel hb_model = +{ + .type = STARPU_HISTORY_BASED, + .symbol = "user_base_valid_model_history_based", + .size_base = get_size_base, +}; + +static struct starpu_perfmodel hb_model_foot = +{ + .type = STARPU_HISTORY_BASED, + .symbol = "user_base_valid_model_history_based_footprint", + .footprint = get_footprint, +}; + +static struct starpu_codelet mycodelet = +{ + .cuda_funcs = {func}, + .opencl_funcs = {func}, + .cpu_funcs = {func}, + .cpu_funcs_name = {"func"}, + .nbuffers = 1, + .modes = {STARPU_W} +}; + +static int submit(struct starpu_codelet *codelet, struct starpu_perfmodel *model) +{ + int nloops = 123; + int loop; + starpu_data_handle_t handle; + int ret; + struct starpu_conf conf; + + starpu_conf_init(&conf); + conf.sched_policy_name = "eager"; + conf.calibrate = 1; + + ret = starpu_init(&conf); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + codelet->model = model; + + starpu_vector_data_register(&handle, -1, (uintptr_t)NULL, 100, sizeof(int)); + for (loop = 0; loop < nloops; loop++) + { + ret = starpu_task_insert(codelet, STARPU_W, handle, 0); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + starpu_data_unregister(handle); + starpu_shutdown(); + return EXIT_SUCCESS; +} + +int main(void) +{ + int ret; + + /* Use a linear regression model */ + ret = submit(&mycodelet, &rb_model); + if (ret) return ret; + + /* Use a non-linear regression model */ + ret = submit(&mycodelet, &nlrb_model); + if (ret) return ret; + + /* Use a history model */ + ret = submit(&mycodelet, &hb_model); + if (ret) return ret; + + /* Use a history model with footprints*/ + ret = submit(&mycodelet, &hb_model_foot); + if (ret) return ret; + + return EXIT_SUCCESS; +} diff --git a/tests/perfmodels/valid_model.c b/tests/perfmodels/valid_model.c new file mode 100644 index 0000000..ca548b7 --- /dev/null +++ b/tests/perfmodels/valid_model.c @@ -0,0 +1,181 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Thibaut Lambert + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include "../helper.h" + +/* + * Check that measurements get recorded in the performance model + */ + +void func(void *descr[], void *arg) +{ + (void)descr; + (void)arg; + starpu_usleep(1000); +} + +static struct starpu_perfmodel rb_model = +{ + .type = STARPU_REGRESSION_BASED, + .symbol = "valid_model_regression_based" +}; + +static struct starpu_perfmodel nlrb_model = +{ + .type = STARPU_NL_REGRESSION_BASED, + .symbol = "valid_model_non_linear_regression_based" +}; + +#if 0 +static struct starpu_perfmodel hb_model = +{ + .type = STARPU_HISTORY_BASED, + .symbol = "valid_model_history_based" +}; +#endif + +static struct starpu_codelet mycodelet = +{ + .cuda_funcs = {func}, + .opencl_funcs = {func}, + .cpu_funcs = {func}, + .cpu_funcs_name = {"func"}, + .nbuffers = 1, + .modes = {STARPU_W} +}; + +static int submit(struct starpu_codelet *codelet, struct starpu_perfmodel *model) +{ + int nloops = 123; + int loop; + starpu_data_handle_t handle; + struct starpu_perfmodel lmodel; + int ret; + int old_nsamples, new_nsamples; + struct starpu_conf conf; + + starpu_conf_init(&conf); + conf.sched_policy_name = "eager"; + conf.calibrate = 1; + + ret = starpu_init(&conf); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + codelet->model = model; + + old_nsamples = 0; + memset(&lmodel, 0, sizeof(struct starpu_perfmodel)); + lmodel.type = model->type; + ret = starpu_perfmodel_load_symbol(codelet->model->symbol, &lmodel); + if (ret != 1) + { + int i, impl; + for(i = 0; i < lmodel.state->ncombs; i++) + { + int comb = lmodel.state->combs[i]; + for(impl = 0; impl < lmodel.state->nimpls[comb]; impl++) + old_nsamples += lmodel.state->per_arch[comb][impl].regression.nsample; + } + } + + starpu_vector_data_register(&handle, -1, (uintptr_t)NULL, 100, sizeof(int)); + for (loop = 0; loop < nloops; loop++) + { + ret = starpu_task_insert(codelet, STARPU_W, handle, 0); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + starpu_data_unregister(handle); + starpu_perfmodel_unload_model(&lmodel); + starpu_shutdown(); // To force dumping perf models on disk + + // We need to call starpu_init again to initialise values used by perfmodels + ret = starpu_init(NULL); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + char path[256]; + starpu_perfmodel_get_model_path(codelet->model->symbol, path, 256); + FPRINTF(stderr, "Perfmodel File <%s>\n", path); + ret = starpu_perfmodel_load_file(path, &lmodel); + + if (ret == 1) + { + FPRINTF(stderr, "The performance model for the symbol <%s> could not be loaded\n", codelet->model->symbol); + starpu_shutdown(); + return 1; + } + else + { + int i; + new_nsamples = 0; + for(i = 0; i < lmodel.state->ncombs; i++) + { + int comb = lmodel.state->combs[i]; + int impl; + for(impl = 0; impl < lmodel.state->nimpls[comb]; impl++) + new_nsamples += lmodel.state->per_arch[comb][impl].regression.nsample; + } + } + + ret = starpu_perfmodel_unload_model(&lmodel); + starpu_shutdown(); + if (ret == 1) + { + FPRINTF(stderr, "The performance model for the symbol <%s> could not be UNloaded\n", codelet->model->symbol); + return 1; + } + + if (old_nsamples + nloops == new_nsamples) + { + FPRINTF(stderr, "Sampling for <%s> OK %d + %d == %d\n", codelet->model->symbol, old_nsamples, nloops, new_nsamples); + return EXIT_SUCCESS; + } + else + { + FPRINTF(stderr, "Sampling for <%s> failed %d + %d != %d\n", codelet->model->symbol, old_nsamples, nloops, new_nsamples); + return EXIT_FAILURE; + } +} + +int main(void) +{ + int ret; + + /* Use a linear regression model */ + ret = submit(&mycodelet, &rb_model); + if (ret) return ret; + + /* Use a non-linear regression model */ + ret = submit(&mycodelet, &nlrb_model); + if (ret) return ret; + +#ifdef STARPU_DEVEL +# warning history based model cannot be validated with regression.nsample +#endif +#if 0 + /* Use a history model */ + ret = submit(&mycodelet, &hb_model); + if (ret) return ret; +#endif + + return EXIT_SUCCESS; +} diff --git a/tests/perfmodels/value_nan.c b/tests/perfmodels/value_nan.c new file mode 100644 index 0000000..ed8b078 --- /dev/null +++ b/tests/perfmodels/value_nan.c @@ -0,0 +1,106 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include "../helper.h" +#include + +#ifdef STARPU_HAVE_WINDOWS +#include +#include +#endif + +/* + * Test that _starpu_write_double and _starpu_read_double properly manipulate + * NaN values + */ + +#define STRING "booh" + +static +int _check_number(double val, int checknan) +{ + char *tmp = "starpu_XXXXXX"; + char filename[100]; + + strcpy(filename, tmp); + +#ifdef STARPU_HAVE_WINDOWS + _mktemp(filename); +#else + { + int id = mkstemp(filename); + /* fail */ + if (id < 0) + { + FPRINTF(stderr, "Error when creating temp file\n"); + return 1; + } + } +#endif + + /* write the double value in the file followed by a predefined string */ + FILE *f = fopen(filename, "w"); + if (!f) + { + FPRINTF(stderr, "Error when opening file %s\n", filename); + return 1; + } + // A double is written with the format %e ... + _starpu_write_double(f, "%e", val); + fprintf(f, " %s\n", STRING); + fclose(f); + + /* read the double value and the string back from the file */ + f = fopen(filename, "r"); + if (!f) + { + FPRINTF(stderr, "Error when opening file %s\n", filename); + return 1; + } + double lat; + char str[10]; + // ... but is read with the format %le + int x = _starpu_read_double(f, "%le", &lat); + int y = fscanf(f, " %9s", str); + fclose(f); + unlink(filename); + + /* check that what has been read is identical to what has been written */ + int pass; + pass = (x == 1) && (y == 1); + pass = pass && strcmp(str, STRING) == 0; + if (checknan) + pass = pass && isnan(val) && isnan(lat); + else + pass = pass && (int)lat == (int)val; + return pass?0:1; +} + +int main(void) +{ + int ret1, ret2; + double nanvalue = nan(""); + + ret1 = _check_number(42.0, 0); + FPRINTF(stderr, "%s when reading %e\n", ret1==0?"Success":"Error", 42.0); + + ret2 = _check_number(nanvalue, 1); + FPRINTF(stderr, "%s when reading %e\n", ret2==0?"Success":"Error", nanvalue); + + return ret1+ret2; +} diff --git a/tests/regression/profiles.build.only.in b/tests/regression/profiles.build.only.in new file mode 100644 index 0000000..1d5d72f --- /dev/null +++ b/tests/regression/profiles.build.only.in @@ -0,0 +1,31 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +# Build configuration + +# Build configuration +--enable-coverage +# Build configuration +--disable-opencl +# Build configuration +--disable-cuda +# Build configuration +--disable-cuda --disable-opencl +# Build configuration +--enable-cuda --disable-opencl +# Build configuration +--disable-cuda --enable-opencl +# Build configuration +--with-fxt diff --git a/tests/regression/profiles.in b/tests/regression/profiles.in new file mode 100644 index 0000000..2110281 --- /dev/null +++ b/tests/regression/profiles.in @@ -0,0 +1,62 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +# Build configuration +--enable-coverage +# Execution configuration + +# Build configuration +--enable-debug --enable-verbose +# Execution configuration +STARPU_LOGFILENAME=/tmp/starpu_run.log +# +# Build configuration + +# Execution configuration +STARPU_NCUDA=0 +# Execution configuration +STARPU_NCUDA=1 +# Execution configuration +STARPU_SCHED=ws +# Execution configuration +STARPU_SCHED=lws +# Execution configuration +STARPU_SCHED=prio +# Execution configuration +STARPU_SCHED=no-prio +# Execution configuration +STARPU_SCHED=dm +# Execution configuration +STARPU_SCHED=dmda +# Execution configuration +STARPU_SCHED=random +# Execution configuration +STARPU_SCHED=eager +# Execution configuration +STARPU_SCHED=dmda STARPU_SCHED_ALPHA=10 STARPU_SCHED_BETA=15 +# Execution configuration +STARPU_CALIBRATE=1 +# Execution configuration +STARPU_PREFETCH=1 +# +# Build configuration +--disable-cuda +# Execution configuration + +# +# Build configuration +--disable-opencl +# Execution configuration + diff --git a/tests/regression/regression.sh.in b/tests/regression/regression.sh.in new file mode 100755 index 0000000..944ec0c --- /dev/null +++ b/tests/regression/regression.sh.in @@ -0,0 +1,140 @@ +#!/bin/bash +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +WORKDIR=`mktemp -d` +SRCDIR=@STARPU_SRC_DIR@ +MAKE="${MAKE:-make -j3}" + +################################################## + +# arg: returned status from the previous command +check_exec() +{ + PROFILE=$PROFILE_NUM + if [ $SUBPROFILE_NUM -ne 0 ] ; then + PROFILE="${PROFILE}.${SUBPROFILE_NUM}" + fi + if [ $1 -eq 0 ]; then + echo "PASS: Profile $PROFILE" + else + echo "FAIL: Profile $PROFILE" + if [ ${ABORT_ON_ERROR} -eq 1 ]; then + echo "Aborting ..." + exit 1; + fi + fi +} + +do_build() +{ + PROFILE_NUM=`expr ${PROFILE_NUM} + 1` + echo ">>> Build configuration ${PROFILE_NUM}: <$@>" + + rm -rf ${WORKDIR}/build/* + + cd ${WORKDIR}/build + ${SRCDIR}/configure "$@" > $WORKDIR/logs/profile.${PROFILE_NUM} 2>&1 + cd - + code_build=$? + + if [ $code_build -ne 0 ]; then + check_exec $code_build + else + ${MAKE} -C ${WORKDIR}/build >> $WORKDIR/logs/profile.${PROFILE_NUM} 2>&1 + code_build=$? + check_exec $code_build + fi +} + +do_test() +{ + SUBPROFILE_NUM=`expr ${SUBPROFILE_NUM} + 1` + echo ">>>> Execution configuration ${PROFILE_NUM}.${SUBPROFILE_NUM} : <$@>" + + ( + export $* ; + ${MAKE} -C ${WORKDIR}/build check + ) > $WORKDIR/logs/profile.${PROFILE_NUM}.${SUBPROFILE_NUM} 2>&1 + code_check=$? + check_exec $code_check + + if [ $code_check -ne 0 ] ; then + grep FAIL: $WORKDIR/logs/profile.${PROFILE_NUM}.${SUBPROFILE_NUM} + fi + + coverage=$(find ${WORKDIR}/build -name "*.gcda" 2>/dev/null) + if [ -n "$coverage" ] ; then + lcov -c -d ${WORKDIR}/build -o ${WORKDIR}/cov/profile_${PROFILE_NUM}.${SUBPROFILE_NUM}.lcov >> $WORKDIR/logs/profile.${PROFILE_NUM}.${SUBPROFILE_NUM} 2>&1 + fi +} + +################################################## + +ABORT_ON_ERROR=0 +while [ $# -ne 0 ]; do + case $1 in + --abort-on-error) + ABORT_ON_ERROR=1 + shift ;; + --help) + echo + echo "Error. Syntax $0 [ --abort-on-error ] " + echo + exit 0 ;; + *) + break ;; + esac +done + +if [ -z "$1" ] ; then + echo "Error. Syntax $0 [ --abort-on-error ] " + exit 0 +fi + +################################################# + +## Create and jump to the workdir +mkdir ${WORKDIR}/build ; mkdir ${WORKDIR}/cov ; mkdir ${WORKDIR}/html ; mkdir ${WORKDIR}/logs + +PROFILE_NUM=0 +code_build=1 +for file in $* ; do + ( + while read line ; do + if [ "$line" == "# Build configuration" ] ; then + read line + SUBPROFILE_NUM=0 + do_build $line + elif [ "$line" == "# Execution configuration" ] ; then + read line + if [ $code_build -eq 0 ] ; then + do_test $line + fi + fi + done + ) < $file +done +echo $WORKDIR + +### End of script + +coverage=$(ls ${WORKDIR}/cov/*.lcov 2>/dev/null) +if [ -n "${coverage}" ] ; then + genhtml --function-coverage --legend ${WORKDIR}/cov/*.lcov -o ${WORKDIR}/html -t "StarPU coverage test results" > ${WORKDIR}/logs/genhtml.log + echo "The coverage report is located at : ${WORKDIR}/html" +fi + +echo "Tests done" diff --git a/tests/sched_ctx/sched_ctx_hierarchy.c b/tests/sched_ctx/sched_ctx_hierarchy.c new file mode 100644 index 0000000..ca63f20 --- /dev/null +++ b/tests/sched_ctx/sched_ctx_hierarchy.c @@ -0,0 +1,175 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2017-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../helper.h" + +struct starpu_codelet mycodelet_bis; + +void func_cpu_bis(void *descr[], void *_args) +{ + (void)descr; + char msg; + char worker_name[256]; + int worker_id = starpu_worker_get_id_check(); + int worker_id_expected; + int ntasks; + + starpu_worker_get_name(worker_id, worker_name, 256); + starpu_codelet_unpack_args(_args, &msg, &ntasks, &worker_id_expected); + + STARPU_ASSERT(worker_id == worker_id_expected); + + FPRINTF(stderr, "[msg '%c'] [worker id %d] [worker name %s] [tasks %d]\n", msg, worker_id, worker_name, ntasks); + if (ntasks > 0) + { + int ret; + int nntasks = ntasks - 1; + ret = starpu_task_insert(&mycodelet_bis, + STARPU_VALUE, &msg, sizeof(msg), + STARPU_VALUE, &nntasks, sizeof(ntasks), + STARPU_VALUE, &worker_id, sizeof(worker_id), + 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + } +} + +void func_cpu(void *descr[], void *_args) +{ + (void)descr; + char msg; + char worker_name[256]; + int worker_id = starpu_worker_get_id_check(); + int worker_id_expected; + int ntasks; + unsigned sched_ctx_id; + unsigned *sched_ctx_id_p; + + starpu_worker_get_name(worker_id, worker_name, 256); + starpu_codelet_unpack_args(_args, &msg, &ntasks, &sched_ctx_id, &worker_id_expected, &sched_ctx_id_p); + + STARPU_ASSERT(worker_id == worker_id_expected); + + *sched_ctx_id_p = sched_ctx_id; + starpu_sched_ctx_set_context(sched_ctx_id_p); + + FPRINTF(stderr, "[msg '%c'] [worker id %d] [worker name %s] [sched_ctx_id %u] [tasks %d] [buffer %p]\n", msg, worker_id, worker_name, sched_ctx_id, ntasks, sched_ctx_id_p); + if (ntasks > 0) + { + int ret; + int nntasks = ntasks - 1; + ret = starpu_task_insert(&mycodelet_bis, + STARPU_VALUE, &msg, sizeof(msg), + STARPU_VALUE, &nntasks, sizeof(nntasks), + STARPU_VALUE, &worker_id, sizeof(worker_id), + 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + } +} + +struct starpu_codelet mycodelet_bis = +{ + .cpu_funcs = {func_cpu_bis}, + .cpu_funcs_name = {"func_cpu_bis"}, +}; + +struct starpu_codelet mycodelet = +{ + .cpu_funcs = {func_cpu}, + .cpu_funcs_name = {"func_cpu"}, +}; + +int main(void) +{ + int i, ret; + int nprocs, nprocs_per_context=1; + int procs[STARPU_NMAXWORKERS]; + int ntasks=10; + char msg[3] = "ab"; + unsigned *buffer[2]; + struct starpu_conf conf; + + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + conf.ncpus = -1; + conf.nmpi_ms = -1; + conf.ntcpip_ms = -1; + + ret = starpu_init(&conf); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + nprocs = starpu_cpu_worker_get_count(); + if (nprocs < 2) goto enodev; + + nprocs_per_context = 1; + FPRINTF(stderr, "# Workers = %d -> %d worker for each sched context\n", nprocs, nprocs_per_context); + starpu_worker_get_ids_by_type(STARPU_CPU_WORKER, procs, nprocs); + + unsigned sched_ctx_0 = starpu_sched_ctx_create(procs, nprocs_per_context, "ctx_0", 0); + unsigned sched_ctx_1 = starpu_sched_ctx_create(&procs[nprocs_per_context], nprocs_per_context, "ctx_1", 0); + + if (!getenv("STARPU_SSILENT")) + { + char name0[256]; + char name1[256]; + + starpu_worker_get_name(procs[0], name0, 256); + starpu_worker_get_name(procs[1], name1, 256); + + FPRINTF(stderr, "Creating first sched_ctx with %d worker [id %d name %s]\n", nprocs_per_context, procs[0], name0); + FPRINTF(stderr, "Creating second sched_ctx with %d worker [id %d name %s]\n", nprocs_per_context, procs[1], name1); + + starpu_sched_ctx_display_workers(sched_ctx_0, stderr); + starpu_sched_ctx_display_workers(sched_ctx_1, stderr); + } + + buffer[0] = malloc(sizeof(unsigned)); + buffer[1] = malloc(sizeof(unsigned)); + FPRINTF(stderr, "allocating %p and %p\n", buffer[0], buffer[1]); + + ret = starpu_task_insert(&mycodelet, STARPU_SCHED_CTX, sched_ctx_0, + STARPU_VALUE, &msg[0], sizeof(msg[0]), + STARPU_VALUE, &ntasks, sizeof(ntasks), + STARPU_VALUE, &sched_ctx_0, sizeof(sched_ctx_0), + STARPU_VALUE, &procs[0], sizeof(procs[0]), + STARPU_VALUE, &buffer[0], sizeof(buffer[0]), + 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + ret = starpu_task_insert(&mycodelet, STARPU_SCHED_CTX, sched_ctx_1, + STARPU_VALUE, &msg[1], sizeof(msg[1]), + STARPU_VALUE, &ntasks, sizeof(ntasks), + STARPU_VALUE, &sched_ctx_1, sizeof(sched_ctx_1), + STARPU_VALUE, &procs[1], sizeof(procs[1]), + STARPU_VALUE, &buffer[1], sizeof(buffer[1]), + 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); + + starpu_task_wait_for_all(); + starpu_sched_ctx_delete(sched_ctx_0); + starpu_sched_ctx_delete(sched_ctx_1); + starpu_shutdown(); + free(buffer[0]); + free(buffer[1]); + return 0; + +enodev: + starpu_shutdown(); + fprintf(stderr, "WARNING: No one can execute this task\n"); + /* yes, we do not perform the computation but we did detect that no one + * could perform the kernel, so this is not an error from StarPU */ + return STARPU_TEST_SKIPPED; +} diff --git a/tests/sched_ctx/sched_ctx_list.c b/tests/sched_ctx/sched_ctx_list.c new file mode 100644 index 0000000..ad9d399 --- /dev/null +++ b/tests/sched_ctx/sched_ctx_list.c @@ -0,0 +1,195 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../helper.h" +#include "../../src/core/sched_ctx_list.h" + +int main(void) +{ + struct _starpu_sched_ctx_list *ctx_list = NULL, *found_list; + struct _starpu_sched_ctx_elt *elt; + struct _starpu_sched_ctx_list_iterator it; + int ret=1, global=1; + + /* Check prio list addition */ + ret &= (_starpu_sched_ctx_list_add_prio(&ctx_list, 50, 0) != NULL); + ret &= (ctx_list->priority == 50); + ret &= (_starpu_sched_ctx_list_add_prio(&ctx_list, 999, 2) != NULL); + ret &= (ctx_list->priority == 999); + ret &= (ctx_list->next->priority == 50); + ret &= !_starpu_sched_ctx_list_add(&ctx_list, 1); + ret &= (ctx_list->next->next->priority == 0); + + /* Check elements added */ + ret &= (ctx_list->head->sched_ctx == 2); + ret &= (ctx_list->next->head->sched_ctx == 0); + ret &= (ctx_list->next->next->head->sched_ctx == 1); + + /* Check singleton status */ + ret &= (ctx_list->next->head->prev->sched_ctx == 0); + ret &= (ctx_list->next->head->next->sched_ctx == 0); + global &= ret; + STARPU_CHECK_RETURN_VALUE_IS(ret, 1, "_starpu_sched_ctx_list_add"); + + /* Check addition to existing list */ + ret = 1; + _starpu_sched_ctx_elt_add(ctx_list->next, 3); + ret &= (ctx_list->next->head->next->sched_ctx == 3); + ret &= (ctx_list->next->head->prev->sched_ctx == 3); + global &= ret; + STARPU_CHECK_RETURN_VALUE_IS(ret, 1, "_starpu_sched_ctx_elt_add"); + + /* Find element */ + ret = 1; + elt = _starpu_sched_ctx_elt_find(ctx_list, 3); + ret &= (elt != NULL && elt->sched_ctx == 3); + elt = _starpu_sched_ctx_elt_find(ctx_list, 5); + ret &= (elt == NULL); + global &= ret; + STARPU_CHECK_RETURN_VALUE_IS(ret, 1, "_starpu_sched_ctx_elt_find"); + + /* Find list */ + ret = 1; + found_list = _starpu_sched_ctx_list_find(ctx_list, 0); + ret &= (found_list->priority == 0); + ret &= (found_list->prev->priority == 50); + found_list = _starpu_sched_ctx_list_find(ctx_list, 999); + ret &= (found_list->priority==999); + found_list = _starpu_sched_ctx_list_find(ctx_list, 42); + ret &= (found_list == NULL); + global &= ret; + STARPU_CHECK_RETURN_VALUE_IS(ret, 1, "_starpu_sched_ctx_list_find"); + + /* List exists */ + ret = 1; + ret &= _starpu_sched_ctx_list_exists(ctx_list, 999); + ret &= _starpu_sched_ctx_list_exists(ctx_list, 50); + ret &= _starpu_sched_ctx_list_exists(ctx_list, 0); + ret &= !_starpu_sched_ctx_list_exists(ctx_list, 42); + global &= ret; + STARPU_CHECK_RETURN_VALUE_IS(ret, 1, "_starpu_sched_ctx_list_exists"); + + /* Iterator */ + ret = 1; + ret &= !_starpu_sched_ctx_list_iterator_init(ctx_list, &it); + ret &= _starpu_sched_ctx_list_iterator_has_next(&it); + elt = _starpu_sched_ctx_list_iterator_get_next(&it); + ret &= (elt->sched_ctx == 2); + ret &= _starpu_sched_ctx_list_iterator_has_next(&it); + elt = _starpu_sched_ctx_list_iterator_get_next(&it); + ret &= (elt->sched_ctx == 0); + ret &= _starpu_sched_ctx_list_iterator_has_next(&it); + elt = _starpu_sched_ctx_list_iterator_get_next(&it); + ret &= (elt->sched_ctx == 3); + ret &= _starpu_sched_ctx_list_iterator_has_next(&it); + elt = _starpu_sched_ctx_list_iterator_get_next(&it); + ret &= (elt->sched_ctx == 1); + ret &= !_starpu_sched_ctx_list_iterator_has_next(&it); + global &= ret; + STARPU_CHECK_RETURN_VALUE_IS(ret, 1, "_starpu_sched_ctx_list_iterator"); + + /* Add element before head */ + ret = 1; + _starpu_sched_ctx_elt_add_before(ctx_list->next, 4); + ret &= (ctx_list->next->head->prev->sched_ctx == 4); + ret &= (ctx_list->next->head->next->next->sched_ctx == 4); + global &= ret; + STARPU_CHECK_RETURN_VALUE_IS(ret, 1, "_starpu_sched_ctx_elt_add_before"); + + /* Let's move it */ + ret = 1; + ret &= !_starpu_sched_ctx_list_move(&ctx_list, 4, 1002); + ret &= (ctx_list->priority == 1002); + ret &= (ctx_list->head->sched_ctx == 4); + ret &= (ctx_list->head->next->sched_ctx == 4); + ret &= (ctx_list->next->next->head->prev->sched_ctx != 4); + STARPU_CHECK_RETURN_VALUE_IS(ret, 1, "_starpu_sched_ctx_list_move"); + + /* Let's remove it */ + ret = 1; + elt = _starpu_sched_ctx_elt_find(ctx_list, 4); + _starpu_sched_ctx_list_remove_elt(&ctx_list, elt); + //ret &= (elt == NULL); + ret &= (_starpu_sched_ctx_elt_find(ctx_list, 4) == NULL); + ret &= (ctx_list->next->head->next->sched_ctx == 3); + ret &= (ctx_list->next->head->prev->sched_ctx == 3); + global &= ret; + STARPU_CHECK_RETURN_VALUE_IS(ret, 1, "_starpu_sched_ctx_elt_remove"); + + /* Let's remove head of that same ctx */ + ret = 1; + ret &= !_starpu_sched_ctx_list_remove(&ctx_list, 0); + ret &= (_starpu_sched_ctx_elt_find(ctx_list, 0) == NULL); + ret &= (ctx_list->next->head->sched_ctx == 3); + ret &= (ctx_list->next->head->next->sched_ctx == 3); + ret &= (ctx_list->next->head->prev->sched_ctx == 3); + global &= ret; + STARPU_CHECK_RETURN_VALUE_IS(ret, 1, "_starpu_sched_ctx_list_remove"); + + /* Remove the last one of this list, we get an empty ctx */ + ret = 1; + ret &= !_starpu_sched_ctx_list_remove(&ctx_list, 3); + ret &= (_starpu_sched_ctx_elt_find(ctx_list, 3) == NULL); + found_list = _starpu_sched_ctx_list_find(ctx_list, 50); + ret &= (found_list == NULL && ctx_list->priority != 50); + ret &= (ctx_list->next->priority == 0); + global &= ret; + STARPU_CHECK_RETURN_VALUE_IS(ret, 1, "_starpu_sched_ctx_list_remove"); + + /* Add an element to a new prio then remove it to ensure prio list is cleaned correctly */ + ret = 1; + ret &= (_starpu_sched_ctx_list_add_prio(&ctx_list, 100000, 75) != NULL); + ret &= (ctx_list->priority == 100000); + ret &= (_starpu_sched_ctx_elt_find(ctx_list, 75) != NULL); + ret &= (ctx_list->head->sched_ctx == 75); + ret &= !_starpu_sched_ctx_list_remove(&ctx_list, 75); + ret &= (_starpu_sched_ctx_elt_find(ctx_list, 75) == NULL); + found_list = _starpu_sched_ctx_list_find(ctx_list, 100000); + ret &= (found_list == NULL && ctx_list->priority != 100000); + ret &= (ctx_list->priority == 999); + global &= ret; + STARPU_CHECK_RETURN_VALUE_IS(ret, 1, "_starpu_sched_ctx_list_remove"); + + /* Delete this list, the function is internal only so we need to modify the list pointers too */ + ret = 1; + found_list = ctx_list->next; + found_list->prev = ctx_list->prev; + _starpu_sched_ctx_list_remove_all(ctx_list); + ctx_list = found_list; + found_list = _starpu_sched_ctx_list_find(ctx_list, 999); + ret &= (found_list == NULL && ctx_list->priority != 999); + ret &= (_starpu_sched_ctx_elt_find(ctx_list, 2) == NULL); + ret &= (ctx_list->priority == 0); + ret &= (ctx_list->head->sched_ctx == 1); //as before + ret &= (ctx_list->head->next->sched_ctx == 1); + ret &= (ctx_list->head->prev->sched_ctx == 1); + global &= ret; + STARPU_CHECK_RETURN_VALUE_IS(ret, 1, "_starpu_sched_ctx_list_remove_all"); + + /* Let's add some things again then clean everything */ + ret = 1; + ret &= (_starpu_sched_ctx_list_add_prio(&ctx_list, 1000, 42) != NULL); + ret &= (_starpu_sched_ctx_list_add_prio(&ctx_list, 1000, 43) != NULL); + _starpu_sched_ctx_list_delete(&ctx_list); + ret &= (ctx_list == NULL); + global &= ret; + STARPU_CHECK_RETURN_VALUE_IS(ret, 1, "_starpu_sched_ctx_list_delete"); + + STARPU_CHECK_RETURN_VALUE_IS(global, 1, "_starpu_sched_ctx_(list|elt) global status"); + + return 0; +} diff --git a/tests/sched_ctx/sched_ctx_policy_data.c b/tests/sched_ctx/sched_ctx_policy_data.c new file mode 100644 index 0000000..e45e740 --- /dev/null +++ b/tests/sched_ctx/sched_ctx_policy_data.c @@ -0,0 +1,76 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../helper.h" + +int main(void) +{ + int ret; + int nprocs; + int *procs; + + unsigned sched_ctx; + unsigned main_sched_ctx; + int *ptr; + int *main_ptr; + + struct starpu_conf conf; + + starpu_conf_init(&conf); + starpu_conf_noworker(&conf); + conf.ncpus = -1; + conf.nmpi_ms = -1; + conf.ntcpip_ms = -1; + + ret = starpu_init(&conf); + if (ret == -ENODEV) return STARPU_TEST_SKIPPED; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + nprocs = starpu_cpu_worker_get_count(); + if (nprocs == 0) + { + starpu_shutdown(); + return STARPU_TEST_SKIPPED; + } + + procs = (int*)malloc(nprocs*sizeof(int)); + starpu_worker_get_ids_by_type(STARPU_CPU_WORKER, procs, nprocs); + + sched_ctx = starpu_sched_ctx_create(procs, nprocs, "my_context", 0); + ptr = starpu_sched_ctx_get_policy_data(sched_ctx); + STARPU_ASSERT_MSG(ptr == NULL, "The policy data for the sched ctx should be NULL\n"); + + starpu_sched_ctx_set_policy_data(sched_ctx, procs); + ptr = starpu_sched_ctx_get_policy_data(sched_ctx); + FPRINTF(stderr, "sched_ctx %u : data %p (procs %p)\n", sched_ctx, ptr, procs); + STARPU_ASSERT_MSG(ptr == procs, "The policy data for the sched ctx is incorrect\n"); + + main_sched_ctx = starpu_sched_ctx_get_context(); + main_ptr = starpu_sched_ctx_get_policy_data(main_sched_ctx); + STARPU_ASSERT_MSG(main_ptr == NULL, "The policy data for the sched ctx should be NULL\n"); + + starpu_sched_ctx_set_policy_data(main_sched_ctx, procs); + main_ptr = starpu_sched_ctx_get_policy_data(sched_ctx); + FPRINTF(stderr, "sched_ctx %u : data %p (procs %p)\n", main_sched_ctx, main_ptr, procs); + STARPU_ASSERT_MSG(main_ptr == procs, "The policy data for the sched ctx is incorrect\n"); + + starpu_sched_ctx_delete(sched_ctx); + free(procs); + starpu_shutdown(); + + return (ptr == procs) ? 0 : 1; +} diff --git a/tests/sched_policies/data_locality.c b/tests/sched_policies/data_locality.c new file mode 100644 index 0000000..bc2ba29 --- /dev/null +++ b/tests/sched_policies/data_locality.c @@ -0,0 +1,220 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include + +#include "../helper.h" + +/* + * Check that scheduling policies tend to put tasks on the worker which has a + * copy of the data + */ + +#define NTASKS 8 + +/* + * It is very inefficient to keep moving data between memory nodes. This + * test makes sure the scheduler will take account of the data locality + * when scheduling tasks. + * + * Applies to : dmda, pheft. + */ + +void dummy(void *buffers[], void *args) +{ + (void) buffers; + (void) args; +} + +/* + * Dummy cost function, used to make sure the scheduler does schedule the + * task, instead of getting rid of it as soon as possible because it doesn't + * know its expected length. + */ +static double +cost_function(struct starpu_task *task, unsigned nimpl) +{ + (void) task; + (void) nimpl; + return 1.0; +} + +static struct starpu_perfmodel model = +{ + .type = STARPU_COMMON, + .cost_function = cost_function +}; + +static struct starpu_codelet cl = +{ + .cpu_funcs = { dummy }, + .cuda_funcs = { dummy }, + .opencl_funcs = { dummy }, + .modes = { STARPU_RW }, + .model = &model, + .nbuffers = 1 +}; + +static int var = 42; +static starpu_data_handle_t rw_handle; + +static void +init_data(void) +{ + starpu_variable_data_register(&rw_handle, STARPU_MAIN_RAM, (uintptr_t) &var, + sizeof(var)); +} + +static void +free_data(void) +{ + starpu_data_unregister(rw_handle); +} + +static int +run(struct starpu_sched_policy *policy) +{ + int ret; + struct starpu_conf conf; + + starpu_conf_init(&conf); + conf.sched_policy = policy; + + ret = starpu_init(&conf); + if (ret == -ENODEV) + { + FPRINTF(stderr, "No device found\n"); + return -ENODEV; + } + + if (starpu_cpu_worker_get_count() == 0 || (starpu_cuda_worker_get_count() == 0 && starpu_opencl_worker_get_count() == 0)) + goto enodev; + + starpu_profiling_status_set(1); + init_data(); + + /* Send the handle to a GPU. */ + cl.where = STARPU_CUDA | STARPU_OPENCL; + struct starpu_task *tasks[NTASKS]; + tasks[0] = starpu_task_create(); + tasks[0]->cl = &cl; + tasks[0]->synchronous = 1; + tasks[0]->handles[0] = rw_handle; + tasks[0]->destroy = 0; + ret = starpu_task_submit(tasks[0]); + if (ret == -ENODEV) + goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + + /* Now, run multiple tasks using this handle. */ + cl.where |= STARPU_CPU; + int i; + for (i = 1; i < NTASKS; i++) + { + tasks[i] = starpu_task_create(); + tasks[i]->cl = &cl; + tasks[i]->handles[0] = rw_handle; + tasks[i]->destroy = 0; + ret = starpu_task_submit(tasks[i]); + if (ret == -ENODEV) + goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + starpu_task_wait_for_all(); + + /* All tasks should have been executed on the same GPU. */ + ret = 0; + unsigned nodeid = starpu_worker_get_memory_node(tasks[0]->profiling_info->workerid); + for (i = 0; i < NTASKS; i++) + { + if (starpu_worker_get_memory_node(tasks[i]->profiling_info->workerid) != nodeid) + { + FPRINTF(stderr, "Error for task %d. Worker id %d node id %d different from expected node id %d\n", i, tasks[i]->profiling_info->workerid, starpu_worker_get_memory_node(tasks[i]->profiling_info->workerid), nodeid); + ret = 1; + break; + } + starpu_task_destroy(tasks[i]); + } + + /* Clean everything up. */ + for (; i < NTASKS; i++) + starpu_task_destroy(tasks[i]); + + free_data(); + starpu_shutdown(); + + return ret; + +enodev: + FPRINTF(stderr, "No device found\n"); + starpu_shutdown(); + return -ENODEV; +} + +/* XXX: Does this test apply to other schedulers ? */ +//extern struct starpu_sched_policy _starpu_sched_ws_policy; +//extern struct starpu_sched_policy _starpu_sched_prio_policy; +//extern struct starpu_sched_policy _starpu_sched_random_policy; +//extern struct starpu_sched_policy _starpu_sched_dm_policy; +extern struct starpu_sched_policy _starpu_sched_dmda_policy; +//extern struct starpu_sched_policy _starpu_sched_dmda_ready_policy; +//extern struct starpu_sched_policy _starpu_sched_dmda_sorted_policy; +//extern struct starpu_sched_policy _starpu_sched_eager_policy; +extern struct starpu_sched_policy _starpu_sched_parallel_heft_policy; +//extern struct starpu_sched_policy _starpu_sched_peager_policy; + +static struct starpu_sched_policy *policies[] = +{ + //&_starpu_sched_ws_policy, + //&_starpu_sched_prio_policy, + //&_starpu_sched_dm_policy, + &_starpu_sched_dmda_policy, + //&_starpu_sched_dmda_ready_policy, + //&_starpu_sched_dmda_sorted_policy, + //&_starpu_sched_random_policy, + //&_starpu_sched_eager_policy, + &_starpu_sched_parallel_heft_policy, + //&_starpu_sched_peager_policy +}; + +int main(void) +{ + int i; + int n_policies = sizeof(policies)/sizeof(policies[0]); + int global_ret = 0; + + char *sched = getenv("STARPU_SCHED"); + + for (i = 0; i < n_policies; ++i) + { + struct starpu_sched_policy *policy = policies[i]; + + if (sched && strcmp(sched, policy->policy_name)) + /* Testing another specific scheduler, no need to run this */ + continue; + + FPRINTF(stdout, "Running with policy %s.\n", policy->policy_name); + int ret = run(policy); + if (ret == -ENODEV && global_ret == 0) + global_ret = STARPU_TEST_SKIPPED; + if (ret == 1 && global_ret == 0) + global_ret = ret; + } + + return global_ret; +} diff --git a/tests/sched_policies/execute_all_tasks.c b/tests/sched_policies/execute_all_tasks.c new file mode 100644 index 0000000..b28bcd1 --- /dev/null +++ b/tests/sched_policies/execute_all_tasks.c @@ -0,0 +1,104 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Thibaut Lambert + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include "../helper.h" + +/* + * All tasks submitted by StarPU should be executed once. + * Applies to: all schedulers. + */ + +#define NTASKS 8 + +static int run(struct starpu_sched_policy *p) +{ + int ret; + struct starpu_conf conf; + + (void) starpu_conf_init(&conf); + conf.sched_policy = p; + + ret = starpu_init(&conf); + if (ret == -ENODEV) + exit(STARPU_TEST_SKIPPED); + + struct starpu_task *tasks[NTASKS] = { NULL }; + int i; + for (i = 0; i < NTASKS; i++) + { + struct starpu_task *task = starpu_task_create(); + tasks[i] = task; + task->cl = &starpu_codelet_nop; + task->synchronous = 1; + task->destroy = 0; + ret = starpu_task_submit(task); + if (ret != 0) + { + FPRINTF(stderr,"task submission returned %d\n", ret); + return 1; + } + } + + starpu_task_wait_for_all(); + + ret = 0; + for (i = 0; i < NTASKS; i++) + { + struct _starpu_job *j = tasks[i]->starpu_private; + if (j == NULL || j->terminated == 0) + { + FPRINTF(stderr, "Error with policy %s.\n", p->policy_name); + ret = 1; + break; + } + } + + for (i = 0; i < NTASKS; i++) + { + starpu_task_destroy(tasks[i]); + } + + starpu_shutdown(); + return ret; +} + +int main(void) +{ + struct starpu_sched_policy **policies; + struct starpu_sched_policy **policy; + + char *sched = getenv("STARPU_SCHED"); + + policies = starpu_sched_get_predefined_policies(); + for(policy=policies ; *policy!=NULL ; policy++) + { + if (sched && strcmp(sched, (*policy)->policy_name)) + /* Testing another specific scheduler, no need to run this */ + continue; + + FPRINTF(stderr, "Running with policy %s.\n", (*policy)->policy_name); + int ret; + ret = run(*policy); + if (ret == 1) + return EXIT_FAILURE; + } + + return EXIT_SUCCESS; +} diff --git a/tests/sched_policies/prio.c b/tests/sched_policies/prio.c new file mode 100644 index 0000000..e210721 --- /dev/null +++ b/tests/sched_policies/prio.c @@ -0,0 +1,145 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Simon Archipoff + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include "../helper.h" + +/* + * Task1 must be executed before task0, even if task0 is submitted first. + * Applies to : all schedulers. + */ + +#ifdef STARPU_QUICK_CHECK +#define NTASKS 10 +#elif !defined(STARPU_LONG_CHECK) +#define NTASKS 100 +#else +#define NTASKS 1000 +#endif + +void funcA(void *buffers[], void *args) +{ + (void) buffers; + (void) args; + FPRINTF(stdout,"A"); + starpu_usleep(1000); +} + +void funcB(void *buffers[], void *args) +{ + (void) buffers; + (void) args; + FPRINTF(stdout,"B"); + starpu_usleep(1000); +} + +static int run(struct starpu_sched_policy *policy) +{ + int ret; + struct starpu_conf conf; + int i; + + starpu_conf_init(&conf); + conf.sched_policy = policy; + ret = starpu_init(&conf); + if (ret != 0) + exit(STARPU_TEST_SKIPPED); + starpu_profiling_status_set(1); + + struct starpu_codelet clA = + { + .cpu_funcs = {funcA}, + .cpu_funcs_name = {"funcA"}, + .opencl_funcs = {funcA}, + .cuda_funcs = {funcA}, + .max_fpga_funcs = {funcA}, + .hip_funcs = {funcA}, + .nbuffers = 0 + }; + + struct starpu_codelet clB = + { + .cpu_funcs = {funcB}, + .cpu_funcs_name = {"funcB"}, + .opencl_funcs = {funcB}, + .cuda_funcs = {funcB}, + .max_fpga_funcs = {funcB}, + .hip_funcs = {funcB}, + .nbuffers = 0 + }; + + starpu_srand48(0); + + for (i = 0; i < NTASKS; i++) + { + struct starpu_task *task = starpu_task_create(); + + if (((int)(starpu_drand48()*2))%2) + { + task->cl = &clA; + task->priority=STARPU_MIN_PRIO; + } + else + { + task->cl = &clB; + task->priority=STARPU_MAX_PRIO; + } + task->detach=1; + ret = starpu_task_submit(task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + starpu_task_wait_for_all(); + FPRINTF(stdout,"\n"); + + starpu_shutdown(); + return 0; + +enodev: + starpu_shutdown(); + return -ENODEV; +} + +int main(void) +{ + struct starpu_sched_policy **policies; + struct starpu_sched_policy **policy; + + char *sched = getenv("STARPU_SCHED"); + + policies = starpu_sched_get_predefined_policies(); + for(policy=policies ; *policy!=NULL ; policy++) + { + int ret; + + if (sched && strcmp(sched, (*policy)->policy_name)) + /* Testing another specific scheduler, no need to run this */ + continue; + + FPRINTF(stderr, "Running with policy %s.\n", (*policy)->policy_name); + ret = run(*policy); + if (ret == -ENODEV) + return STARPU_TEST_SKIPPED; + if (ret == 1) + return EXIT_FAILURE; + } + + return EXIT_SUCCESS; +} diff --git a/tests/sched_policies/simple_cpu_gpu_sched.c b/tests/sched_policies/simple_cpu_gpu_sched.c new file mode 100644 index 0000000..18269b8 --- /dev/null +++ b/tests/sched_policies/simple_cpu_gpu_sched.c @@ -0,0 +1,290 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Thibaut Lambert + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include "../helper.h" +#include + +/* + * Schedulers that are aware of the expected task length provided by the + * perfmodels must make sure that : + * - cpu_task is cheduled on a CPU. + * - gpu_task is scheduled on a GPU. + * + * Applies to : dmda and to what other schedulers ? + */ + +void dummy(void *buffers[], void *args) +{ + (void) buffers; + (void) args; +} + +/* + * Fake cost functions. + */ +static double +cpu_task_cpu(struct starpu_task *task, + struct starpu_perfmodel_arch* arch, + unsigned nimpl) +{ + (void) task; + (void) arch; + (void) nimpl; + return 1.0; +} + +static double +cpu_task_gpu(struct starpu_task *task, + struct starpu_perfmodel_arch* arch, + unsigned nimpl) +{ + (void) task; + (void) arch; + (void) nimpl; + + return 10000000.0; +} + +static double +gpu_task_cpu(struct starpu_task *task, + struct starpu_perfmodel_arch* arch, + unsigned nimpl) +{ + (void) task; + (void) arch; + (void) nimpl; + + return 10000000.0; +} + +static double +gpu_task_gpu(struct starpu_task *task, + struct starpu_perfmodel_arch* arch, + unsigned nimpl) +{ + (void) task; + (void) arch; + (void) nimpl; + + return 1.0; +} + +static struct starpu_perfmodel model_cpu_task = +{ + .type = STARPU_PER_ARCH, + .symbol = "model_cpu_task" +}; + +static struct starpu_perfmodel model_gpu_task = +{ + .type = STARPU_PER_ARCH, + .symbol = "model_gpu_task" +}; + +static void +init_perfmodels_gpu(int gpu_type) +{ + int nb_worker_gpu = starpu_worker_get_count_by_type(gpu_type); + int *worker_gpu_ids = malloc(nb_worker_gpu * sizeof(int)); + int worker_gpu; + + starpu_worker_get_ids_by_type(gpu_type, worker_gpu_ids, nb_worker_gpu); + for(worker_gpu = 0 ; worker_gpu < nb_worker_gpu ; worker_gpu ++) + { + starpu_perfmodel_set_per_devices_cost_function(&model_cpu_task, 0, cpu_task_gpu, + gpu_type, starpu_worker_get_devid(worker_gpu_ids[worker_gpu]), 1, + -1); + + starpu_perfmodel_set_per_devices_cost_function(&model_gpu_task, 0, gpu_task_gpu, + gpu_type, starpu_worker_get_devid(worker_gpu_ids[worker_gpu]), 1, + -1); + } + free(worker_gpu_ids); +} + +static void +init_perfmodels(void) +{ + starpu_perfmodel_init(&model_cpu_task); + starpu_perfmodel_init(&model_gpu_task); + + starpu_perfmodel_set_per_devices_cost_function(&model_cpu_task, 0, cpu_task_cpu, STARPU_CPU_WORKER, 0, 1, -1); + starpu_perfmodel_set_per_devices_cost_function(&model_gpu_task, 0, gpu_task_cpu, STARPU_CPU_WORKER, 0, 1, -1); + + // We need to set the cost function for each combination with a CUDA or a OpenCL worker + init_perfmodels_gpu(STARPU_CUDA_WORKER); + init_perfmodels_gpu(STARPU_OPENCL_WORKER); +} + +/* + * Dummy codelets. + */ +static struct starpu_codelet cpu_cl = +{ + .cpu_funcs = { dummy }, + .cuda_funcs = { dummy }, + .opencl_funcs = { dummy }, + .max_fpga_funcs = { dummy }, + .nbuffers = 0, + .model = &model_cpu_task +}; + +static struct starpu_codelet gpu_cl = +{ + .cpu_funcs = { dummy }, + .cuda_funcs = { dummy }, + .opencl_funcs = { dummy }, + .max_fpga_funcs = { dummy }, + .nbuffers = 0, + .model = &model_gpu_task +}; + +static int +run(struct starpu_sched_policy *policy) +{ + struct starpu_conf conf; + starpu_conf_init(&conf); + conf.sched_policy = policy; + + int ret = starpu_init(&conf); + if (ret == -ENODEV) + exit(STARPU_TEST_SKIPPED); + + /* At least 1 CPU and 1 GPU are needed. */ + if (starpu_cpu_worker_get_count() == 0) + { + starpu_shutdown(); + exit(STARPU_TEST_SKIPPED); + } + if (starpu_cuda_worker_get_count() == 0 && starpu_opencl_worker_get_count() == 0) + { + starpu_shutdown(); + exit(STARPU_TEST_SKIPPED); + } + + starpu_profiling_status_set(1); + init_perfmodels(); + + struct starpu_task *cpu_task = starpu_task_create(); + cpu_task->cl = &cpu_cl; + cpu_task->destroy = 0; + + struct starpu_task *gpu_task = starpu_task_create(); + gpu_task->cl = &gpu_cl; + gpu_task->destroy = 0; + + ret = starpu_task_submit(cpu_task); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + ret = starpu_task_submit(gpu_task); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + starpu_task_wait_for_all(); + + enum starpu_worker_archtype cpu_task_worker, gpu_task_worker; + cpu_task_worker = starpu_worker_get_type(cpu_task->profiling_info->workerid); + gpu_task_worker = starpu_worker_get_type(gpu_task->profiling_info->workerid); + if (cpu_task_worker != STARPU_CPU_WORKER || (gpu_task_worker != STARPU_CUDA_WORKER && gpu_task_worker != STARPU_OPENCL_WORKER)) + { + FPRINTF(stderr, "Tasks did not execute on expected worker\n"); + if (cpu_task_worker != STARPU_CPU_WORKER) + { + FPRINTF(stderr, "The CPU task did not run on a CPU worker\n"); + } + if (gpu_task_worker != STARPU_CUDA_WORKER && gpu_task_worker != STARPU_OPENCL_WORKER) + { + FPRINTF(stderr, "The GPU task did not run on a Cuda or OpenCL worker\n"); + } + + ret = 1; + } + else + { + FPRINTF(stderr, "Tasks DID execute on expected worker\n"); + ret = 0; + } + + starpu_task_destroy(cpu_task); + starpu_task_destroy(gpu_task); + starpu_shutdown(); + return ret; +} + +/* +extern struct starpu_sched_policy _starpu_sched_ws_policy; +extern struct starpu_sched_policy _starpu_sched_prio_policy; +extern struct starpu_sched_policy _starpu_sched_random_policy; +extern struct starpu_sched_policy _starpu_sched_dm_policy; +extern struct starpu_sched_policy _starpu_sched_dmda_ready_policy; +extern struct starpu_sched_policy _starpu_sched_dmda_sorted_policy; +extern struct starpu_sched_policy _starpu_sched_eager_policy; +extern struct starpu_sched_policy _starpu_sched_parallel_heft_policy; +extern struct starpu_sched_policy _starpu_sched_peager_policy; +*/ +extern struct starpu_sched_policy _starpu_sched_dmda_policy; + +/* XXX: what policies are we interested in ? */ +static struct starpu_sched_policy *policies[] = +{ + //&_starpu_sched_ws_policy, + //&_starpu_sched_prio_policy, + //&_starpu_sched_dm_policy, + &_starpu_sched_dmda_policy, + //&_starpu_sched_dmda_ready_policy, + //&_starpu_sched_dmda_sorted_policy, + //&_starpu_sched_random_policy, + //&_starpu_sched_eager_policy, + //&_starpu_sched_parallel_heft_policy, + //&_starpu_sched_peager_policy +}; + +int main(void) +{ +#ifndef STARPU_HAVE_SETENV +/* XXX: is this macro used by all the schedulers we are interested in ? */ +#warning "setenv() is not available, skipping this test" + return STARPU_TEST_SKIPPED; +#else + setenv("STARPU_SCHED_BETA", "0", 1); + + char *sched = getenv("STARPU_SCHED"); + + if (starpu_getenv_number_default("STARPU_NWORKER_PER_CUDA", 1) != 1) + return STARPU_TEST_SKIPPED; + + int i; + int n_policies = sizeof(policies)/sizeof(policies[0]); + for (i = 0; i < n_policies; ++i) + { + struct starpu_sched_policy *policy = policies[i]; + + if (sched && strcmp(sched, policy->policy_name)) + /* Testing another specific scheduler, no need to run this */ + continue; + + FPRINTF(stdout, "Running with policy %s.\n", + policy->policy_name); + int ret; + ret = run(policy); + if (ret == 1) + return EXIT_FAILURE; + } + + return EXIT_SUCCESS; +#endif +} diff --git a/tests/sched_policies/simple_deps.c b/tests/sched_policies/simple_deps.c new file mode 100644 index 0000000..9b946c9 --- /dev/null +++ b/tests/sched_policies/simple_deps.c @@ -0,0 +1,119 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include "../helper.h" + +/* + * Task1 must be executed before task0, even if task0 is submitted first. + * Applies to : all schedulers. + */ + +void dummy(void *buffers[], void *args) +{ + (void) buffers; + (void) args; + starpu_usleep(10000); +} + +static int +run(struct starpu_sched_policy *policy) +{ + int ret; + struct starpu_conf conf; + starpu_conf_init(&conf); + conf.sched_policy = policy; + ret = starpu_init(&conf); + if (ret != 0) + exit(STARPU_TEST_SKIPPED); + starpu_profiling_status_set(1); + + struct starpu_codelet cl = + { + .cpu_funcs = {dummy}, + .cpu_funcs_name = {"dummy"}, + .opencl_funcs = {dummy}, + .cuda_funcs = {dummy}, + .max_fpga_funcs = {dummy}, + .nbuffers = 0 + }; + + struct starpu_task *task0 = starpu_task_create(); + task0->cl = &cl; + task0->destroy = 0; + + struct starpu_task *task1 = starpu_task_create(); + task1->cl = &cl; + task1->destroy = 0; + + starpu_task_declare_deps_array(task0, 1, &task1); + + ret = starpu_task_submit(task0); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + ret = starpu_task_submit(task1); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + + starpu_task_wait_for_all(); + + double task1_end, task0_start; + task1_end = starpu_timing_timespec_to_us(&task1->profiling_info->end_time); + task0_start = starpu_timing_timespec_to_us(&task0->profiling_info->start_time); + + starpu_task_destroy(task0); + starpu_task_destroy(task1); + starpu_shutdown(); + + return !!(task1_end > task0_start); + +enodev: + starpu_shutdown(); + return -ENODEV; +} + +int main(void) +{ + struct starpu_sched_policy **policies; + struct starpu_sched_policy **policy; + + char *sched = getenv("STARPU_SCHED"); + + policies = starpu_sched_get_predefined_policies(); + for(policy=policies ; *policy!=NULL ; policy++) + { + int ret; + + if (sched && strcmp(sched, (*policy)->policy_name)) + /* Testing another specific scheduler, no need to run this */ + continue; + + if (!strcmp("heteroprio", (*policy)->policy_name)) + /* https://gitlab.inria.fr/starpu/starpu/-/issues/18 */ + continue; + + FPRINTF(stderr, "Running with policy %s.\n", (*policy)->policy_name); + ret = run(*policy); + if (ret == -ENODEV) + return STARPU_TEST_SKIPPED; + if (ret == 1) + return EXIT_FAILURE; + } + + return EXIT_SUCCESS; +} diff --git a/tests/sched_policies/workerids.c b/tests/sched_policies/workerids.c new file mode 100644 index 0000000..0e4e6b2 --- /dev/null +++ b/tests/sched_policies/workerids.c @@ -0,0 +1,136 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Simon Archipoff + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include "../helper.h" + +/* + * Check that the starpu_task::workerids field is respected by schedulers. + */ + +#ifdef STARPU_QUICK_CHECK +#define NTASKS 10 +#elif !defined(STARPU_LONG_CHECK) +#define NTASKS 100 +#else +#define NTASKS 1000 +#endif + +void funcA(void *buffers[], void *args) +{ + (void) buffers; + (void) args; + STARPU_ASSERT(starpu_worker_get_id() == 0); + starpu_usleep(1000); +} + +double cost_function(struct starpu_task *t STARPU_ATTRIBUTE_UNUSED, struct starpu_perfmodel_arch *a STARPU_ATTRIBUTE_UNUSED, unsigned i STARPU_ATTRIBUTE_UNUSED) +{ + return 1000; +} + +static struct starpu_perfmodel perf_model = +{ + .type = STARPU_PER_ARCH, + .arch_cost_function = cost_function, +}; + +static struct starpu_codelet clA = +{ + .cpu_funcs = {funcA}, + .cpu_funcs_name = {"funcA"}, + .opencl_funcs = {funcA}, + .cuda_funcs = {funcA}, + .hip_funcs = {funcA}, + .max_fpga_funcs = {funcA}, + .nbuffers = 0, + .model = &perf_model, +}; + +static int run(struct starpu_sched_policy *policy) +{ + int ret; + struct starpu_conf conf; + int i; + + starpu_conf_init(&conf); + conf.sched_policy = policy; + ret = starpu_init(&conf); + if (ret != 0) + exit(STARPU_TEST_SKIPPED); + + uint32_t zeromask = 1; + for (i = 0; i < NTASKS; i++) + { + struct starpu_task *task = starpu_task_create(); + + task->cl = &clA; + task->workerids = &zeromask; + task->workerids_len = 1; + ret = starpu_task_submit(task); + if (ret == -ENODEV) goto enodev; + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); + } + + starpu_task_wait_for_all(); + FPRINTF(stdout,"\n"); + + starpu_shutdown(); + return 0; + +enodev: + starpu_shutdown(); + return -ENODEV; +} + +int main(void) +{ + struct starpu_sched_policy **policies; + struct starpu_sched_policy **policy; + + char *sched = getenv("STARPU_SCHED"); + + policies = starpu_sched_get_predefined_policies(); + for(policy=policies ; *policy!=NULL ; policy++) + { + int ret; + + if (strcmp((*policy)->policy_name, "lws") == 0 + || strcmp((*policy)->policy_name, "ws") == 0 + || strcmp((*policy)->policy_name, "heteroprio") == 0 + || strcmp((*policy)->policy_name, "modular-gemm") == 0) +#ifdef STARPU_DEVEL +#warning FIXME performance for ws +#endif + continue; + + if (sched && strcmp(sched, (*policy)->policy_name)) + /* Testing another specific scheduler, no need to run this */ + continue; + + FPRINTF(stderr, "Running with policy %s.\n", (*policy)->policy_name); + ret = run(*policy); + if (ret == -ENODEV) + return STARPU_TEST_SKIPPED; + if (ret == 1) + return EXIT_FAILURE; + } + + return EXIT_SUCCESS; +} diff --git a/tests/variable/increment.c b/tests/variable/increment.c new file mode 100644 index 0000000..6dacb4d --- /dev/null +++ b/tests/variable/increment.c @@ -0,0 +1,156 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../helper.h" +#include "increment.h" + +#ifdef STARPU_USE_CUDA +extern void increment_cuda(void *descr[], void *_args); +extern void redux_cuda(void *descr[], void *arg); +extern void neutral_cuda(void *descr[], void *arg); +#endif +#ifdef STARPU_USE_HIP +extern void increment_hip(void *descr[], void *_args); +extern void redux_hip(void *descr[], void *arg); +extern void neutral_hip(void *descr[], void *arg); +#endif +#ifdef STARPU_USE_OPENCL +extern void increment_opencl(void *buffers[], void *args); +extern void redux_opencl(void *descr[], void *arg); +extern void neutral_opencl(void *descr[], void *arg); +#endif + +void increment_cpu(void *descr[], void *arg) +{ + (void)arg; + unsigned *tokenptr = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[0]); + (*tokenptr)++; +} + +struct starpu_codelet increment_cl = +{ + .modes = {STARPU_RW}, + .cpu_funcs = {increment_cpu}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {increment_cuda}, + .cuda_flags = {STARPU_CUDA_ASYNC}, +#endif +#ifdef STARPU_USE_HIP + .hip_funcs = {increment_hip}, + .hip_flags = {STARPU_HIP_ASYNC}, +#endif +#ifdef STARPU_USE_OPENCL + .opencl_funcs = {increment_opencl}, + .opencl_flags = {STARPU_OPENCL_ASYNC}, +#endif + .cpu_funcs_name = {"increment_cpu"}, + .nbuffers = 1, + + .flags = STARPU_CODELET_SIMGRID_EXECUTE, +}; + +struct starpu_codelet increment_redux_cl = +{ + .modes = {STARPU_REDUX}, + .cpu_funcs = {increment_cpu}, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {increment_cuda}, + .cuda_flags = {STARPU_CUDA_ASYNC}, +#endif +#ifdef STARPU_USE_HIP + .hip_funcs = {increment_hip}, + .hip_flags = {STARPU_HIP_ASYNC}, +#endif +#ifdef STARPU_USE_OPENCL + .opencl_funcs = {increment_opencl}, + .opencl_flags = {STARPU_OPENCL_ASYNC}, +#endif + .cpu_funcs_name = {"increment_cpu"}, + .nbuffers = 1, + + .flags = STARPU_CODELET_SIMGRID_EXECUTE, +}; + +void redux_cpu(void *descr[], void *arg) +{ + (void)arg; + + unsigned *dst = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[0]); + unsigned *src = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[1]); + *dst = *dst + *src; +} + +struct starpu_codelet redux_cl = +{ + .modes = {STARPU_RW|STARPU_COMMUTE, STARPU_R}, + .nbuffers = 2, +#ifdef STARPU_USE_CUDA + .cuda_funcs = {redux_cuda}, + .cuda_flags = {STARPU_CUDA_ASYNC}, +#endif +#ifdef STARPU_USE_HIP + .hip_funcs = {redux_hip}, + .hip_flags = {STARPU_HIP_ASYNC}, +#endif +#ifdef STARPU_USE_OPENCL + .opencl_funcs = {redux_opencl}, + .opencl_flags = {STARPU_OPENCL_ASYNC}, +#endif + .cpu_funcs = {redux_cpu}, + .cpu_funcs_name = {"redux_cpu"}, + + .flags = STARPU_CODELET_SIMGRID_EXECUTE, +}; + +void neutral_cpu(void *descr[], void *arg) +{ + (void)arg; + + unsigned *dst = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[0]); + *dst = 0; +} + +struct starpu_codelet neutral_cl = +{ +#ifdef STARPU_USE_CUDA + .cuda_funcs = {neutral_cuda}, + .cuda_flags = {STARPU_CUDA_ASYNC}, +#endif +#ifdef STARPU_USE_HIP + .hip_funcs = {neutral_hip}, + .hip_flags = {STARPU_HIP_ASYNC}, +#endif +#ifdef STARPU_USE_OPENCL + .opencl_funcs = {neutral_opencl}, + .opencl_flags = {STARPU_OPENCL_ASYNC}, +#endif + .cpu_funcs = {neutral_cpu}, + .cpu_funcs_name = {"neutral_cpu"}, + .modes = {STARPU_W}, + .nbuffers = 1, + + .flags = STARPU_CODELET_SIMGRID_EXECUTE, +}; + +#ifndef STARPU_USE_OPENCL +void increment_load_opencl() +{ +} +void increment_unload_opencl() +{ +} +#endif diff --git a/tests/variable/increment.h b/tests/variable/increment.h new file mode 100644 index 0000000..7186b06 --- /dev/null +++ b/tests/variable/increment.h @@ -0,0 +1,26 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include + +extern struct starpu_codelet increment_cl; +extern struct starpu_codelet increment_redux_cl; +extern struct starpu_codelet redux_cl; +extern struct starpu_codelet neutral_cl; + +void increment_load_opencl(); +void increment_unload_opencl(); + diff --git a/tests/variable/increment_cuda.cu b/tests/variable/increment_cuda.cu new file mode 100644 index 0000000..18d8191 --- /dev/null +++ b/tests/variable/increment_cuda.cu @@ -0,0 +1,67 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../helper.h" + +static __global__ void _increment_cuda(unsigned *val) +{ + val[0]++; +} + +extern "C" void increment_cuda(void *descr[], void *cl_arg) +{ + unsigned *val = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[0]); + + STARPU_SKIP_IF_VALGRIND; + + _increment_cuda<<<1,1, 0, starpu_cuda_get_local_stream()>>>(val); + cudaError_t status = cudaGetLastError(); + if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status); +} + +static __global__ void _redux_cuda(unsigned *dst, unsigned *src) +{ + dst[0] += src[0]; +} + +extern "C" void redux_cuda(void *descr[], void *arg) +{ + unsigned *dst = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[0]); + unsigned *src = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[1]); + + STARPU_SKIP_IF_VALGRIND; + + _redux_cuda<<<1,1, 0, starpu_cuda_get_local_stream()>>>(dst, src); + cudaError_t status = cudaGetLastError(); + if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status); +} + +static __global__ void _neutral_cuda(unsigned *dst) +{ + dst[0] = 0; +} + +extern "C" void neutral_cuda(void *descr[], void *arg) +{ + unsigned *dst = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[0]); + + STARPU_SKIP_IF_VALGRIND; + + _neutral_cuda<<<1,1, 0, starpu_cuda_get_local_stream()>>>(dst); + cudaError_t status = cudaGetLastError(); + if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status); +} diff --git a/tests/variable/increment_hip.hip b/tests/variable/increment_hip.hip new file mode 100644 index 0000000..25c856e --- /dev/null +++ b/tests/variable/increment_hip.hip @@ -0,0 +1,70 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../helper.h" + +static __global__ void _increment_hip(unsigned *val) +{ + val[0]++; +} + +extern "C" void increment_hip(void *descr[], void *cl_arg) +{ + (void)cl_arg; + unsigned *val = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[0]); + + hipLaunchKernelGGL(_increment_hip, 1, 1, 0, starpu_hip_get_local_stream(), val); + hipError_t status = hipGetLastError(); + if (status != hipSuccess) STARPU_HIP_REPORT_ERROR(status); +} + +static __global__ void _redux_hip(unsigned *dst, unsigned *src) +{ + dst[0] += src[0]; +} + +extern "C" void redux_hip(void *descr[], void *cl_arg) +{ + (void)cl_arg; + + STARPU_SKIP_IF_VALGRIND; + + unsigned *dst = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[0]); + unsigned *src = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[1]); + + hipLaunchKernelGGL(_redux_hip, 1, 1, 0, starpu_hip_get_local_stream(), dst, src); + hipError_t status = hipGetLastError(); + if (status != hipSuccess) STARPU_HIP_REPORT_ERROR(status); +} + +static __global__ void _neutral_hip(unsigned *dst) +{ + dst[0] = 0; +} + +extern "C" void neutral_hip(void *descr[], void *cl_arg) +{ + (void)cl_arg; + + STARPU_SKIP_IF_VALGRIND; + + unsigned *dst = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[0]); + + hipLaunchKernelGGL(_neutral_hip, 1, 1, 0, starpu_hip_get_local_stream(), dst); + hipError_t status = hipGetLastError(); + if (status != hipSuccess) STARPU_HIP_REPORT_ERROR(status); +} diff --git a/tests/variable/increment_opencl.c b/tests/variable/increment_opencl.c new file mode 100644 index 0000000..6cf20ba --- /dev/null +++ b/tests/variable/increment_opencl.c @@ -0,0 +1,140 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "../helper.h" + +/* + * Queue an OpenCL kernel that just increments a variable + */ + +struct starpu_opencl_program opencl_increment_program; +struct starpu_opencl_program opencl_redux_program; +struct starpu_opencl_program opencl_neutral_program; + +void increment_load_opencl() +{ + int ret = starpu_opencl_load_opencl_from_file("tests/variable/increment_opencl_kernel.cl", &opencl_increment_program, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); + ret = starpu_opencl_load_opencl_from_file("tests/variable/redux_opencl_kernel.cl", &opencl_redux_program, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); + ret = starpu_opencl_load_opencl_from_file("tests/variable/neutral_opencl_kernel.cl", &opencl_neutral_program, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); +} + +void increment_unload_opencl() +{ + int ret = starpu_opencl_unload_opencl(&opencl_increment_program); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl"); + ret = starpu_opencl_unload_opencl(&opencl_redux_program); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl"); + ret = starpu_opencl_unload_opencl(&opencl_neutral_program); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl"); +} + +void increment_opencl(void *buffers[], void *args) +{ + (void) args; + int id, devid; + cl_int err; + cl_kernel kernel; + cl_command_queue queue; + + cl_mem val = (cl_mem)STARPU_VARIABLE_GET_PTR(buffers[0]); + + id = starpu_worker_get_id_check(); + devid = starpu_worker_get_devid(id); + + err = starpu_opencl_load_kernel(&kernel, &queue, &opencl_increment_program, "_increment_opencl", devid); + if (err != CL_SUCCESS) + STARPU_OPENCL_REPORT_ERROR(err); + + err = clSetKernelArg(kernel, 0, sizeof(val), &val); + if (err != CL_SUCCESS) + STARPU_OPENCL_REPORT_ERROR(err); + + { + size_t global=1; + size_t local=1; + + err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, NULL); + if (err != CL_SUCCESS) + STARPU_OPENCL_REPORT_ERROR(err); + } + starpu_opencl_release_kernel(kernel); +} + +void redux_opencl(void *buffers[], void *args) +{ + (void) args; + int id, devid; + cl_int err; + cl_kernel kernel; + cl_command_queue queue; + + cl_mem dst = (cl_mem)STARPU_VARIABLE_GET_PTR(buffers[0]); + cl_mem src = (cl_mem)STARPU_VARIABLE_GET_PTR(buffers[1]); + + id = starpu_worker_get_id_check(); + devid = starpu_worker_get_devid(id); + + err = starpu_opencl_load_kernel(&kernel, &queue, &opencl_redux_program, "_redux_opencl", devid); + if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); + + err = clSetKernelArg(kernel, 0, sizeof(dst), &dst); + if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); + err = clSetKernelArg(kernel, 1, sizeof(src), &src); + if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); + + { + size_t global=1; + size_t local=1; + + err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, NULL); + if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); + } + starpu_opencl_release_kernel(kernel); +} + +void neutral_opencl(void *buffers[], void *args) +{ + (void) args; + int id, devid; + cl_int err; + cl_kernel kernel; + cl_command_queue queue; + + cl_mem dst = (cl_mem)STARPU_VARIABLE_GET_PTR(buffers[0]); + + id = starpu_worker_get_id_check(); + devid = starpu_worker_get_devid(id); + + err = starpu_opencl_load_kernel(&kernel, &queue, &opencl_neutral_program, "_neutral_opencl", devid); + if (err != CL_SUCCESS) + STARPU_OPENCL_REPORT_ERROR(err); + + err = clSetKernelArg(kernel, 0, sizeof(dst), &dst); + if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); + + { + size_t global=1; + size_t local=1; + + err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, NULL); + if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); + } + starpu_opencl_release_kernel(kernel); +} diff --git a/tests/variable/increment_opencl_kernel.cl b/tests/variable/increment_opencl_kernel.cl new file mode 100644 index 0000000..0b47762 --- /dev/null +++ b/tests/variable/increment_opencl_kernel.cl @@ -0,0 +1,20 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +__kernel void _increment_opencl(__global unsigned *val) +{ + val[0]++; +} diff --git a/tests/variable/neutral_opencl_kernel.cl b/tests/variable/neutral_opencl_kernel.cl new file mode 100644 index 0000000..49d82e5 --- /dev/null +++ b/tests/variable/neutral_opencl_kernel.cl @@ -0,0 +1,20 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +__kernel void _neutral_opencl(__global unsigned *dst) +{ + dst[0] = 0; +} diff --git a/tests/variable/redux_opencl_kernel.cl b/tests/variable/redux_opencl_kernel.cl new file mode 100644 index 0000000..c709d03 --- /dev/null +++ b/tests/variable/redux_opencl_kernel.cl @@ -0,0 +1,20 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +__kernel void _redux_opencl(__global unsigned *dst, __global unsigned *src) +{ + dst[0] += src[0]; +} diff --git a/tools/Makefile.am b/tools/Makefile.am new file mode 100644 index 0000000..eb8873e --- /dev/null +++ b/tools/Makefile.am @@ -0,0 +1,563 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# Copyright (C) 2013-2013 Thibaut Lambert +# Copyright (C) 2013-2013 Joris Pablo +# Copyright (C) 2017-2017 Erwan Leria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +include $(top_srcdir)/make/starpu-tests.mk +include $(top_srcdir)/make/starpu-loader.mk + +SUBDIRS = + +AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_srcdir)/tools/ -I$(top_srcdir)/mpi/ -I$(top_builddir)/src -I$(top_srcdir)/src $(STARPU_H_CPPFLAGS) +AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@ +LIBS += $(top_builddir)/src/@LIBSTARPU_LINK@ $(STARPU_EXPORTED_LIBS) + +bin_PROGRAMS = +dist_bin_SCRIPTS = + +dist_pkgdata_DATA = gdbinit + +pkgdata_perfmodels_sampling_busdir = $(datarootdir)/starpu/perfmodels/sampling/bus +pkgdata_perfmodels_sampling_codeletsdir = $(datarootdir)/starpu/perfmodels/sampling/codelets/45 + +dist_pkgdata_perfmodels_sampling_bus_DATA = \ + perfmodels/sampling/bus/attila.affinity \ + perfmodels/sampling/bus/attila.bandwidth \ + perfmodels/sampling/bus/attila.config \ + perfmodels/sampling/bus/attila.latency \ + perfmodels/sampling/bus/attila.platform.xml \ + perfmodels/sampling/bus/attila.platform.v4.xml \ + perfmodels/sampling/bus/hannibal.affinity \ + perfmodels/sampling/bus/hannibal.bandwidth \ + perfmodels/sampling/bus/hannibal.config \ + perfmodels/sampling/bus/hannibal.latency \ + perfmodels/sampling/bus/hannibal.platform.xml \ + perfmodels/sampling/bus/hannibal.platform.v4.xml \ + perfmodels/sampling/bus/hannibal-pitch.affinity \ + perfmodels/sampling/bus/hannibal-pitch.bandwidth \ + perfmodels/sampling/bus/hannibal-pitch.config \ + perfmodels/sampling/bus/hannibal-pitch.latency \ + perfmodels/sampling/bus/hannibal-pitch.platform.xml \ + perfmodels/sampling/bus/hannibal-pitch.platform.v4.xml \ + perfmodels/sampling/bus/idgraf.affinity \ + perfmodels/sampling/bus/idgraf.bandwidth \ + perfmodels/sampling/bus/idgraf.config \ + perfmodels/sampling/bus/idgraf.latency \ + perfmodels/sampling/bus/idgraf.platform.xml \ + perfmodels/sampling/bus/idgraf.platform.v4.xml \ + perfmodels/sampling/bus/mirage.affinity \ + perfmodels/sampling/bus/mirage.bandwidth \ + perfmodels/sampling/bus/mirage.config \ + perfmodels/sampling/bus/mirage.latency \ + perfmodels/sampling/bus/mirage.platform.xml \ + perfmodels/sampling/bus/mirage.platform.v4.xml \ + perfmodels/sampling/bus/sirocco.affinity \ + perfmodels/sampling/bus/sirocco.bandwidth \ + perfmodels/sampling/bus/sirocco.config \ + perfmodels/sampling/bus/sirocco.latency \ + perfmodels/sampling/bus/sirocco.platform.xml \ + perfmodels/sampling/bus/sirocco.platform.v4.xml + +dist_pkgdata_perfmodels_sampling_codelets_DATA = \ + perfmodels/sampling/codelets/45/chol_model_potrf.attila \ + perfmodels/sampling/codelets/45/chol_model_trsm.attila \ + perfmodels/sampling/codelets/45/chol_model_syrk.attila \ + perfmodels/sampling/codelets/45/chol_model_gemm.attila \ + perfmodels/sampling/codelets/45/cl_update.attila \ + perfmodels/sampling/codelets/45/save_cl_bottom.attila \ + perfmodels/sampling/codelets/45/save_cl_top.attila \ + perfmodels/sampling/codelets/45/starpu_sgemm_gemm.attila \ + perfmodels/sampling/codelets/45/starpu_dgemm_gemm.attila \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf.attila \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll.attila \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru.attila \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm.attila \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf.attila \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll.attila \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru.attila \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm.attila \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_atlas.attila \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_atlas.attila \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_atlas.attila \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_atlas.attila \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_atlas.attila \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_atlas.attila \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_atlas.attila \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_atlas.attila \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_goto.attila \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_goto.attila \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_goto.attila \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_goto.attila \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_goto.attila \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_goto.attila \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_goto.attila \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_goto.attila \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_openblas.attila \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_openblas.attila \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_openblas.attila \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_openblas.attila \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_openblas.attila \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_openblas.attila \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_openblas.attila \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_openblas.attila \ + perfmodels/sampling/codelets/45/overlap_sleep_1024_24.attila \ +\ + perfmodels/sampling/codelets/45/chol_model_potrf.hannibal \ + perfmodels/sampling/codelets/45/chol_model_trsm.hannibal \ + perfmodels/sampling/codelets/45/chol_model_syrk.hannibal \ + perfmodels/sampling/codelets/45/chol_model_gemm.hannibal \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf.hannibal \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll.hannibal \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru.hannibal \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm.hannibal \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_atlas.hannibal \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_atlas.hannibal \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_atlas.hannibal \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_atlas.hannibal \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_goto.hannibal \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_goto.hannibal \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_goto.hannibal \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_goto.hannibal \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_openblas.hannibal \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_openblas.hannibal \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_openblas.hannibal \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_openblas.hannibal \ +\ + perfmodels/sampling/codelets/45/chol_model_potrf.hannibal-pitch \ + perfmodels/sampling/codelets/45/chol_model_trsm.hannibal-pitch \ + perfmodels/sampling/codelets/45/chol_model_syrk.hannibal-pitch \ + perfmodels/sampling/codelets/45/chol_model_gemm.hannibal-pitch \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf.hannibal-pitch \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll.hannibal-pitch \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru.hannibal-pitch \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm.hannibal-pitch \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_atlas.hannibal-pitch \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_atlas.hannibal-pitch \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_atlas.hannibal-pitch \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_atlas.hannibal-pitch \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_goto.hannibal-pitch \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_goto.hannibal-pitch \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_goto.hannibal-pitch \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_goto.hannibal-pitch \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_openblas.hannibal-pitch \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_openblas.hannibal-pitch \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_openblas.hannibal-pitch \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_openblas.hannibal-pitch \ +\ + perfmodels/sampling/codelets/45/chol_model_potrf.idgraf \ + perfmodels/sampling/codelets/45/chol_model_trsm.idgraf \ + perfmodels/sampling/codelets/45/chol_model_syrk.idgraf \ + perfmodels/sampling/codelets/45/chol_model_gemm.idgraf \ + perfmodels/sampling/codelets/45/cl_update.idgraf \ + perfmodels/sampling/codelets/45/save_cl_bottom.idgraf \ + perfmodels/sampling/codelets/45/save_cl_top.idgraf \ + perfmodels/sampling/codelets/45/starpu_sgemm_gemm.idgraf \ + perfmodels/sampling/codelets/45/starpu_dgemm_gemm.idgraf \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf.idgraf \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll.idgraf \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru.idgraf \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm.idgraf \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf.idgraf \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll.idgraf \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru.idgraf \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm.idgraf \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_atlas.idgraf \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_atlas.idgraf \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_atlas.idgraf \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_atlas.idgraf \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_atlas.idgraf \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_atlas.idgraf \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_atlas.idgraf \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_atlas.idgraf \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_goto.idgraf \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_goto.idgraf \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_goto.idgraf \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_goto.idgraf \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_goto.idgraf \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_goto.idgraf \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_goto.idgraf \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_goto.idgraf \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_openblas.idgraf \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_openblas.idgraf \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_openblas.idgraf \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_openblas.idgraf \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_openblas.idgraf \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_openblas.idgraf \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_openblas.idgraf \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_openblas.idgraf \ +\ + perfmodels/sampling/codelets/45/chol_model_potrf.mirage \ + perfmodels/sampling/codelets/45/chol_model_trsm.mirage \ + perfmodels/sampling/codelets/45/chol_model_syrk.mirage \ + perfmodels/sampling/codelets/45/chol_model_gemm.mirage \ + perfmodels/sampling/codelets/45/cl_update.mirage \ + perfmodels/sampling/codelets/45/save_cl_bottom.mirage \ + perfmodels/sampling/codelets/45/save_cl_top.mirage \ + perfmodels/sampling/codelets/45/starpu_sgemm_gemm.mirage \ + perfmodels/sampling/codelets/45/starpu_dgemm_gemm.mirage \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf.mirage \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll.mirage \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru.mirage \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm.mirage \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf.mirage \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll.mirage \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru.mirage \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm.mirage \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_atlas.mirage \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_atlas.mirage \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_atlas.mirage \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_atlas.mirage \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_atlas.mirage \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_atlas.mirage \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_atlas.mirage \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_atlas.mirage \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_goto.mirage \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_goto.mirage \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_goto.mirage \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_goto.mirage \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_goto.mirage \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_goto.mirage \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_goto.mirage \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_goto.mirage \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_openblas.mirage \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_openblas.mirage \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_openblas.mirage \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_openblas.mirage \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_openblas.mirage \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_openblas.mirage \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_openblas.mirage \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_openblas.mirage \ + perfmodels/sampling/codelets/45/overlap_sleep_1024_24.mirage \ + perfmodels/sampling/codelets/45/add_scal.mirage \ + perfmodels/sampling/codelets/45/func.mirage \ + perfmodels/sampling/codelets/45/log_arr.mirage \ + perfmodels/sampling/codelets/45/log_list.mirage \ + perfmodels/sampling/codelets/45/multi.mirage \ + perfmodels/sampling/codelets/45/multi_2arr.mirage \ + perfmodels/sampling/codelets/45/multi_list.mirage \ + perfmodels/sampling/codelets/45/scal.mirage \ + perfmodels/sampling/codelets/45/scal_arr.mirage \ + perfmodels/sampling/codelets/45/sqrt.mirage \ +\ + perfmodels/sampling/codelets/45/chol_model_potrf.sirocco \ + perfmodels/sampling/codelets/45/chol_model_trsm.sirocco \ + perfmodels/sampling/codelets/45/chol_model_syrk.sirocco \ + perfmodels/sampling/codelets/45/chol_model_gemm.sirocco \ + perfmodels/sampling/codelets/45/cl_update.sirocco \ + perfmodels/sampling/codelets/45/save_cl_bottom.sirocco \ + perfmodels/sampling/codelets/45/save_cl_top.sirocco \ + perfmodels/sampling/codelets/45/starpu_sgemm_gemm.sirocco \ + perfmodels/sampling/codelets/45/starpu_dgemm_gemm.sirocco \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf.sirocco \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll.sirocco \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru.sirocco \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm.sirocco \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf.sirocco \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll.sirocco \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru.sirocco \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm.sirocco \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_atlas.sirocco \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_atlas.sirocco \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_atlas.sirocco \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_atlas.sirocco \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_atlas.sirocco \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_atlas.sirocco \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_atlas.sirocco \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_atlas.sirocco \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_goto.sirocco \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_goto.sirocco \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_goto.sirocco \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_goto.sirocco \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_goto.sirocco \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_goto.sirocco \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_goto.sirocco \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_goto.sirocco \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_openblas.sirocco \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_openblas.sirocco \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_openblas.sirocco \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_openblas.sirocco \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_openblas.sirocco \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_openblas.sirocco \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_openblas.sirocco \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_openblas.sirocco \ + perfmodels/sampling/codelets/45/overlap_sleep_1024_24.sirocco \ +\ + perfmodels/sampling/codelets/45/null.idgraf \ + perfmodels/sampling/codelets/45/null.sirocco + +EXTRA_DIST = \ + dev/checker/rename.sed \ + dev/checker/rename.sh \ + dev/cppcheck/suppressions.txt \ + dev/valgrind/blas.suppr \ + dev/valgrind/nvidia.suppr \ + dev/valgrind/bash.suppr \ + dev/valgrind/fxt.suppr \ + dev/valgrind/glpk.suppr \ + dev/valgrind/hdf5.suppr \ + dev/valgrind/hwloc.suppr \ + dev/valgrind/libc.suppr \ + dev/valgrind/libgomp.suppr \ + dev/valgrind/libnuma.suppr \ + dev/valgrind/madmpi.suppr \ + dev/valgrind/opencl.suppr \ + dev/valgrind/openmpi.suppr \ + dev/valgrind/openmp.suppr \ + dev/valgrind/p11-kit.suppr \ + dev/valgrind/padico.suppr \ + dev/valgrind/papi.suppr \ + dev/valgrind/pthread.suppr \ + dev/valgrind/starpu.suppr \ + dev/valgrind/starpu_pw.suppr \ + dev/valgrind/starpupy.suppr \ + dev/valgrind/valgrind.suppr \ + dev/valgrind/valgrind.sh \ + dev/valgrind/valgrind_xml.sh \ + dev/valgrind/helgrind.sh \ + dev/tsan/starpu.suppr \ + dev/lsan/suppressions \ + perfmodels/README \ + perfmodels/cluster.xml \ + perfmodels/hostfile \ + perfmodels/sampling/codelets/tmp/mlr_init.out \ + msvc/starpu_clean.bat \ + msvc/starpu_open.bat \ + msvc/starpu_exec.bat \ + msvc/starpu_var.bat \ + msvc/starpu.sln \ + msvc/starpu/starpu.vcxproj \ + release/Makefile \ + release/README.md \ + patch-ayudame \ + perfs/bench_sgemm.sh \ + perfs/error_model.gp \ + perfs/error_model.sh \ + distrib/distrib.r \ + distrib/distrib.sh \ + starpu_msexec + +CLEANFILES = *.gcno *.gcda *.linkinfo starpu_idle_microsec.log figure/* mlr_* + +##################################### +# What to install and what to check # +##################################### + +STARPU_TOOLS = +TESTS = $(STARPU_TOOLS) + +check_PROGRAMS = $(STARPU_TOOLS) + +if STARPU_USE_FXT +bin_PROGRAMS += \ + starpu_fxt_tool \ + starpu_fxt_stats \ + starpu_fxt_data_trace + +STARPU_TOOLS += \ + starpu_fxt_tool \ + starpu_fxt_stats \ + starpu_fxt_data_trace + +starpu_fxt_tool_CPPFLAGS = $(AM_CPPFLAGS) $(FXT_CFLAGS) +starpu_fxt_tool_LDADD = $(FXT_LIBS) +starpu_fxt_tool_LDFLAGS = $(FXT_LDFLAGS) + +starpu_fxt_stats_CPPFLAGS = $(AM_CPPFLAGS) $(FXT_CFLAGS) +starpu_fxt_stats_LDADD = $(FXT_LIBS) +starpu_fxt_stats_LDFLAGS = $(FXT_LDFLAGS) + +starpu_fxt_data_trace_CPPFLAGS = $(AM_CPPFLAGS) $(FXT_CFLAGS) +starpu_fxt_data_trace_LDADD = $(FXT_LIBS) +starpu_fxt_data_trace_LDFLAGS = $(FXT_LDFLAGS) +endif + +bin_PROGRAMS += \ + starpu_perfmodel_display \ + starpu_perfmodel_plot \ + starpu_calibrate_bus \ + starpu_machine_display \ + starpu_sched_display \ + starpu_tasks_rec_complete \ + starpu_lp2paje \ + starpu_perfmodel_recdump + +if STARPU_SIMGRID +bin_PROGRAMS += \ + starpu_replay + +starpu_replay_SOURCES = \ + starpu_replay.c \ + starpu_replay_sched.c + +endif + +starpu_perfmodel_plot_CPPFLAGS = $(AM_CPPFLAGS) $(FXT_CFLAGS) + +if STARPU_LONG_CHECK +STARPU_TOOLS += \ + starpu_calibrate_bus +endif + +STARPU_TOOLS += \ + starpu_machine_display \ + starpu_sched_display + +if !STARPU_HAVE_WINDOWS +STARPU_TOOLS += \ + starpu_perfmodel_display \ + starpu_perfmodel_plot +endif + +dist_bin_SCRIPTS += \ + starpu_workers_activity \ + starpu_codelet_histo_profile \ + starpu_codelet_profile \ + starpu_env \ + starpu_config \ + starpu_mpi_comm_matrix.py \ + starpu_fxt_number_events_to_names.py \ + starpu_paje_draw_histogram \ + starpu_paje_draw_histogram.R \ + starpu_paje_summary \ + starpu_paje_summary.Rmd \ + starpu_mlr_analysis \ + starpu_mlr_analysis.Rmd \ + starpu_paje_state_stats \ + starpu_paje_state_stats.R \ + starpu_send_recv_data_use.py \ + starpu_trace_state_stats.py + +if STARPU_USE_AYUDAME2 +dist_bin_SCRIPTS += \ + starpu_temanejo2.sh +dist_pkgdata_DATA += \ + ayudame.cfg +endif + +if STARPU_HAVE_WINDOWS +STARPU_MSVC_dir = $(bindir) +nobase_STARPU_MSVC__DATA = \ + msvc/starpu_clean.bat \ + msvc/starpu_open.bat \ + msvc/starpu_exec.bat \ + msvc/starpu_var.bat \ + msvc/starpu.sln \ + msvc/starpu/starpu.vcxproj +endif + +if STARPU_HAVE_HELP2MAN +starpu_calibrate_bus.1: starpu_calibrate_bus$(EXEEXT) + $(V_help2man) LC_ALL=C help2man --no-discard-stderr -N -n "Force StarPU bus calibration" --output=$@ ./$< +starpu_machine_display.1: starpu_machine_display$(EXEEXT) + $(V_help2man) LC_ALL=C help2man --no-discard-stderr -N -n "Display machine StarPU information" --output=$@ ./$< +starpu_perfmodel_display.1: starpu_perfmodel_display$(EXEEXT) + $(V_help2man) LC_ALL=C help2man --no-discard-stderr -N -n "Display StarPU performance model" --output=$@ ./$< +starpu_perfmodel_plot.1: starpu_perfmodel_plot$(EXEEXT) + $(V_help2man) LC_ALL=C help2man --no-discard-stderr -N -n "Plot StarPU performance model" --output=$@ ./$< +starpu_tasks_rec_complete.1: starpu_tasks_rec_complete$(EXEEXT) + $(V_help2man) LC_ALL=C help2man --no-discard-stderr -N -n "Complete StarPU tasks.rec file" --output=$@ ./$< +starpu_lp2paje.1: starpu_lp2paje$(EXEEXT) + $(V_help2man) LC_ALL=C help2man --no-discard-stderr -N -n "Convert lp StarPU schedule into Paje format" --output=$@ ./$< +starpu_workers_activity.1: starpu_workers_activity + @chmod +x $< + $(V_help2man) LC_ALL=C help2man --no-discard-stderr -N -n "Display StarPU workers activity" --output=$@ ./$< +starpu_codelet_profile.1: starpu_codelet_profile + @chmod +x $< + $(V_help2man) LC_ALL=C help2man --no-discard-stderr -N -n "Draw StarPU codelet profile" --output=$@ ./$< +starpu_env.1: starpu_env + @chmod +x $< + $(V_help2man) LC_ALL=C help2man --no-discard-stderr -N -n "Set StarPU environment variables" --output=$@ ./$< +starpu_codelet_histo_profile.1: starpu_codelet_histo_profile + @chmod +x $< + $(V_help2man) LC_ALL=C help2man --no-discard-stderr -N -n "Draw StarPU codelet histogram" --output=$@ ./$< +starpu_mpi_comm_matrix.1: starpu_mpi_comm_matrix.py + @chmod +x $< + $(V_help2man) LC_ALL=C help2man --no-discard-stderr -N -n "Draw StarPU MPI communications matrix" --output=$@ ./$< +starpu_fxt_number_events_to_names.1: starpu_fxt_number_events_to_names.py + @chmod +x $< + $(V_help2man) LC_ALL=C help2man --no-discard-stderr -N -n "Convert events in StarPU traces" --output=$@ ./$< +starpu_paje_draw_histogram.1: starpu_paje_draw_histogram + @chmod +x $< + $(V_help2man) LC_ALL=C help2man --no-discard-stderr -N -n "Draw StarPU trace histogram" --output=$@ ./$< +starpu_paje_state_stats.1: starpu_paje_state_stats + @chmod +x $< + $(V_help2man) LC_ALL=C help2man --no-discard-stderr -N -n "Print statistics from StarPU trace" --output=$@ ./$< +starpu_config.1: starpu_config + @chmod +x $< + $(V_help2man) LC_ALL=C help2man --no-discard-stderr -N -n "Display StarPU configuration" --output=$@ ./$< + +if STARPU_USE_FXT +starpu_fxt_tool.1: starpu_fxt_tool$(EXEEXT) + $(V_help2man) LC_ALL=C help2man --no-discard-stderr -N -n "Convert raw StarPU FxT trace to various traces" --output=$@ ./$< +starpu_fxt_stats.1: starpu_fxt_stats$(EXEEXT) + $(V_help2man) LC_ALL=C help2man --no-discard-stderr -N -n "Print statistics from raw StarPU FxT trace" --output=$@ ./$< +starpu_fxt_data_trace.1: starpu_fxt_data_trace$(EXEEXT) + $(V_help2man) LC_ALL=C help2man --no-discard-stderr -N -n "Print data trace from raw StarPU FxT trace" --output=$@ ./$< +endif + +if STARPU_USE_TCPIP_MASTER_SLAVE +starpu_tcpipexec.1: starpu_tcpipexec + @chmod +x $< + $(V_help2man) LC_ALL=C help2man --no-discard-stderr -N -n "Execute TCPIP application" --output=$@ ./$< +endif + +dist_man1_MANS = \ + starpu_calibrate_bus.1 \ + starpu_machine_display.1 \ + starpu_perfmodel_display.1 \ + starpu_perfmodel_plot.1 \ + starpu_tasks_rec_complete.1 \ + starpu_lp2paje.1 \ + starpu_workers_activity.1 \ + starpu_codelet_profile.1 \ + starpu_codelet_histo_profile.1 \ + starpu_env.1 \ + starpu_mpi_comm_matrix.1 \ + starpu_fxt_number_events_to_names.1 \ + starpu_paje_draw_histogram.1 \ + starpu_paje_state_stats.1 \ + starpu_config.1 + +if STARPU_USE_FXT +dist_man1_MANS +=\ + starpu_fxt_tool.1 \ + starpu_fxt_stats.1 \ + starpu_fxt_data_trace.1 +endif + +if STARPU_USE_TCPIP_MASTER_SLAVE +dist_man1_MANS +=\ + starpu_tcpipexec.1 +endif + +clean-local: + $(RM) $(dist_man1_MANS) starpu_config.cfg + +endif + +if STARPU_SIMGRID +dist_pkgdata_DATA += starpu_smpi.xslt +dist_bin_SCRIPTS += starpu_smpirun +endif + +if STARPU_USE_TCPIP_MASTER_SLAVE +dist_bin_SCRIPTS += starpu_tcpipexec +endif + +dist_pkgdata_DATA += starpu_config.cfg +starpu_config.cfg: $(top_builddir)/src/common/config.h + grep STARPU $< | grep '#' > $@ + diff --git a/tools/Makefile.in b/tools/Makefile.in new file mode 100644 index 0000000..39d3b0a --- /dev/null +++ b/tools/Makefile.in @@ -0,0 +1,2642 @@ +# Makefile.in generated by automake 1.16.5 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2021 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + + + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +target_triplet = @target@ +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@am__append_1 = --compiler-options -fno-strict-aliasing -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ $(STARPU_NVCC_H_CPPFLAGS) +@STARPU_USE_HIP_TRUE@am__append_2 = -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ +noinst_PROGRAMS = $(am__EXEEXT_6) +# Make tests run through mpiexec +@STARPU_USE_MPI_MASTER_SLAVE_TRUE@am__append_3 = $(abs_top_srcdir)/tools/starpu_msexec +@STARPU_USE_MPI_MASTER_SLAVE_TRUE@am__append_4 = $(MPI_RUN_ENV) STARPU_NMPIMSTHREADS=4 +@STARPU_USE_TCPIP_MASTER_SLAVE_TRUE@am__append_5 = $(abs_top_srcdir)/tools/starpu_msexec +# switch off local socket usage +#MS_LAUNCHER = $(abs_top_builddir)/tools/starpu_tcpipexec -np 2 -nobind -ncpus 1 -nolocal +@STARPU_USE_TCPIP_MASTER_SLAVE_TRUE@am__append_6 = STARPU_RESERVE_NCPU=2 +@STARPU_HAVE_WINDOWS_FALSE@am__append_7 = loader +bin_PROGRAMS = $(am__EXEEXT_1) starpu_perfmodel_display$(EXEEXT) \ + starpu_perfmodel_plot$(EXEEXT) starpu_calibrate_bus$(EXEEXT) \ + starpu_machine_display$(EXEEXT) starpu_sched_display$(EXEEXT) \ + starpu_tasks_rec_complete$(EXEEXT) starpu_lp2paje$(EXEEXT) \ + starpu_perfmodel_recdump$(EXEEXT) $(am__EXEEXT_2) +TESTS = $(am__EXEEXT_5) +check_PROGRAMS = $(am__EXEEXT_5) +@STARPU_USE_FXT_TRUE@am__append_8 = \ +@STARPU_USE_FXT_TRUE@ starpu_fxt_tool \ +@STARPU_USE_FXT_TRUE@ starpu_fxt_stats \ +@STARPU_USE_FXT_TRUE@ starpu_fxt_data_trace + +@STARPU_USE_FXT_TRUE@am__append_9 = \ +@STARPU_USE_FXT_TRUE@ starpu_fxt_tool \ +@STARPU_USE_FXT_TRUE@ starpu_fxt_stats \ +@STARPU_USE_FXT_TRUE@ starpu_fxt_data_trace + +@STARPU_SIMGRID_TRUE@am__append_10 = \ +@STARPU_SIMGRID_TRUE@ starpu_replay + +@STARPU_LONG_CHECK_TRUE@am__append_11 = \ +@STARPU_LONG_CHECK_TRUE@ starpu_calibrate_bus + +@STARPU_HAVE_WINDOWS_FALSE@am__append_12 = \ +@STARPU_HAVE_WINDOWS_FALSE@ starpu_perfmodel_display \ +@STARPU_HAVE_WINDOWS_FALSE@ starpu_perfmodel_plot + +@STARPU_USE_AYUDAME2_TRUE@am__append_13 = \ +@STARPU_USE_AYUDAME2_TRUE@ starpu_temanejo2.sh + +@STARPU_USE_AYUDAME2_TRUE@am__append_14 = \ +@STARPU_USE_AYUDAME2_TRUE@ ayudame.cfg + +@STARPU_HAVE_HELP2MAN_TRUE@@STARPU_USE_FXT_TRUE@am__append_15 = \ +@STARPU_HAVE_HELP2MAN_TRUE@@STARPU_USE_FXT_TRUE@ starpu_fxt_tool.1 \ +@STARPU_HAVE_HELP2MAN_TRUE@@STARPU_USE_FXT_TRUE@ starpu_fxt_stats.1 \ +@STARPU_HAVE_HELP2MAN_TRUE@@STARPU_USE_FXT_TRUE@ starpu_fxt_data_trace.1 + +@STARPU_HAVE_HELP2MAN_TRUE@@STARPU_USE_TCPIP_MASTER_SLAVE_TRUE@am__append_16 = \ +@STARPU_HAVE_HELP2MAN_TRUE@@STARPU_USE_TCPIP_MASTER_SLAVE_TRUE@ starpu_tcpipexec.1 + +@STARPU_SIMGRID_TRUE@am__append_17 = starpu_smpi.xslt +@STARPU_SIMGRID_TRUE@am__append_18 = starpu_smpirun +@STARPU_USE_TCPIP_MASTER_SLAVE_TRUE@am__append_19 = starpu_tcpipexec +subdir = tools +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ + $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ + $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ + $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(am__dist_bin_SCRIPTS_DIST) \ + $(am__dist_pkgdata_DATA_DIST) \ + $(dist_pkgdata_perfmodels_sampling_bus_DATA) \ + $(dist_pkgdata_perfmodels_sampling_codelets_DATA) \ + $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/src/common/config.h \ + $(top_builddir)/src/common/config-src-build.h \ + $(top_builddir)/include/starpu_config.h \ + $(top_builddir)/starpurm/include/starpurm_config.h +CONFIG_CLEAN_FILES = starpu_env starpu_codelet_profile \ + starpu_codelet_histo_profile starpu_mpi_comm_matrix.py \ + starpu_fxt_number_events_to_names.py starpu_workers_activity \ + starpu_paje_draw_histogram starpu_paje_state_stats \ + starpu_paje_summary starpu_config starpu_mlr_analysis \ + starpu_paje_sort starpu_smpirun starpu_tcpipexec +CONFIG_CLEAN_VPATH_FILES = +@STARPU_USE_FXT_TRUE@am__EXEEXT_1 = starpu_fxt_tool$(EXEEXT) \ +@STARPU_USE_FXT_TRUE@ starpu_fxt_stats$(EXEEXT) \ +@STARPU_USE_FXT_TRUE@ starpu_fxt_data_trace$(EXEEXT) +@STARPU_SIMGRID_TRUE@am__EXEEXT_2 = starpu_replay$(EXEEXT) +am__installdirs = "$(DESTDIR)$(bindir)" "$(DESTDIR)$(bindir)" \ + "$(DESTDIR)$(man1dir)" "$(DESTDIR)$(pkgdatadir)" \ + "$(DESTDIR)$(pkgdata_perfmodels_sampling_busdir)" \ + "$(DESTDIR)$(pkgdata_perfmodels_sampling_codeletsdir)" \ + "$(DESTDIR)$(STARPU_MSVC_dir)" +@STARPU_LONG_CHECK_TRUE@am__EXEEXT_3 = starpu_calibrate_bus$(EXEEXT) +@STARPU_HAVE_WINDOWS_FALSE@am__EXEEXT_4 = \ +@STARPU_HAVE_WINDOWS_FALSE@ starpu_perfmodel_display$(EXEEXT) \ +@STARPU_HAVE_WINDOWS_FALSE@ starpu_perfmodel_plot$(EXEEXT) +am__EXEEXT_5 = $(am__EXEEXT_1) $(am__EXEEXT_3) \ + starpu_machine_display$(EXEEXT) starpu_sched_display$(EXEEXT) \ + $(am__EXEEXT_4) +@STARPU_HAVE_WINDOWS_FALSE@am__EXEEXT_6 = loader$(EXEEXT) +PROGRAMS = $(bin_PROGRAMS) $(noinst_PROGRAMS) +loader_SOURCES = loader.c +loader_OBJECTS = loader-loader.$(OBJEXT) +loader_LDADD = $(LDADD) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +starpu_calibrate_bus_SOURCES = starpu_calibrate_bus.c +starpu_calibrate_bus_OBJECTS = starpu_calibrate_bus.$(OBJEXT) +starpu_calibrate_bus_LDADD = $(LDADD) +starpu_fxt_data_trace_SOURCES = starpu_fxt_data_trace.c +starpu_fxt_data_trace_OBJECTS = \ + starpu_fxt_data_trace-starpu_fxt_data_trace.$(OBJEXT) +am__DEPENDENCIES_1 = +@STARPU_USE_FXT_TRUE@starpu_fxt_data_trace_DEPENDENCIES = \ +@STARPU_USE_FXT_TRUE@ $(am__DEPENDENCIES_1) +starpu_fxt_data_trace_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ + $(AM_CFLAGS) $(CFLAGS) $(starpu_fxt_data_trace_LDFLAGS) \ + $(LDFLAGS) -o $@ +starpu_fxt_stats_SOURCES = starpu_fxt_stats.c +starpu_fxt_stats_OBJECTS = \ + starpu_fxt_stats-starpu_fxt_stats.$(OBJEXT) +@STARPU_USE_FXT_TRUE@starpu_fxt_stats_DEPENDENCIES = \ +@STARPU_USE_FXT_TRUE@ $(am__DEPENDENCIES_1) +starpu_fxt_stats_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ + $(AM_CFLAGS) $(CFLAGS) $(starpu_fxt_stats_LDFLAGS) $(LDFLAGS) \ + -o $@ +starpu_fxt_tool_SOURCES = starpu_fxt_tool.c +starpu_fxt_tool_OBJECTS = starpu_fxt_tool-starpu_fxt_tool.$(OBJEXT) +@STARPU_USE_FXT_TRUE@starpu_fxt_tool_DEPENDENCIES = \ +@STARPU_USE_FXT_TRUE@ $(am__DEPENDENCIES_1) +starpu_fxt_tool_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ + $(AM_CFLAGS) $(CFLAGS) $(starpu_fxt_tool_LDFLAGS) $(LDFLAGS) \ + -o $@ +starpu_lp2paje_SOURCES = starpu_lp2paje.c +starpu_lp2paje_OBJECTS = starpu_lp2paje.$(OBJEXT) +starpu_lp2paje_LDADD = $(LDADD) +starpu_machine_display_SOURCES = starpu_machine_display.c +starpu_machine_display_OBJECTS = starpu_machine_display.$(OBJEXT) +starpu_machine_display_LDADD = $(LDADD) +starpu_perfmodel_display_SOURCES = starpu_perfmodel_display.c +starpu_perfmodel_display_OBJECTS = starpu_perfmodel_display.$(OBJEXT) +starpu_perfmodel_display_LDADD = $(LDADD) +starpu_perfmodel_plot_SOURCES = starpu_perfmodel_plot.c +starpu_perfmodel_plot_OBJECTS = \ + starpu_perfmodel_plot-starpu_perfmodel_plot.$(OBJEXT) +starpu_perfmodel_plot_LDADD = $(LDADD) +starpu_perfmodel_recdump_SOURCES = starpu_perfmodel_recdump.c +starpu_perfmodel_recdump_OBJECTS = starpu_perfmodel_recdump.$(OBJEXT) +starpu_perfmodel_recdump_LDADD = $(LDADD) +am__starpu_replay_SOURCES_DIST = starpu_replay.c starpu_replay_sched.c +@STARPU_SIMGRID_TRUE@am_starpu_replay_OBJECTS = \ +@STARPU_SIMGRID_TRUE@ starpu_replay.$(OBJEXT) \ +@STARPU_SIMGRID_TRUE@ starpu_replay_sched.$(OBJEXT) +starpu_replay_OBJECTS = $(am_starpu_replay_OBJECTS) +starpu_replay_LDADD = $(LDADD) +starpu_sched_display_SOURCES = starpu_sched_display.c +starpu_sched_display_OBJECTS = starpu_sched_display.$(OBJEXT) +starpu_sched_display_LDADD = $(LDADD) +starpu_tasks_rec_complete_SOURCES = starpu_tasks_rec_complete.c +starpu_tasks_rec_complete_OBJECTS = \ + starpu_tasks_rec_complete.$(OBJEXT) +starpu_tasks_rec_complete_LDADD = $(LDADD) +am__dist_bin_SCRIPTS_DIST = starpu_workers_activity \ + starpu_codelet_histo_profile starpu_codelet_profile starpu_env \ + starpu_config starpu_mpi_comm_matrix.py \ + starpu_fxt_number_events_to_names.py \ + starpu_paje_draw_histogram starpu_paje_draw_histogram.R \ + starpu_paje_summary starpu_paje_summary.Rmd \ + starpu_mlr_analysis starpu_mlr_analysis.Rmd \ + starpu_paje_state_stats starpu_paje_state_stats.R \ + starpu_send_recv_data_use.py starpu_trace_state_stats.py \ + starpu_temanejo2.sh starpu_smpirun starpu_tcpipexec +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +SCRIPTS = $(dist_bin_SCRIPTS) +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)/src/common -I$(top_builddir)/include -I$(top_builddir)/starpurm/include +depcomp = $(SHELL) $(top_srcdir)/build-aux/depcomp +am__maybe_remake_depfiles = depfiles +am__depfiles_remade = ./$(DEPDIR)/loader-loader.Po \ + ./$(DEPDIR)/starpu_calibrate_bus.Po \ + ./$(DEPDIR)/starpu_fxt_data_trace-starpu_fxt_data_trace.Po \ + ./$(DEPDIR)/starpu_fxt_stats-starpu_fxt_stats.Po \ + ./$(DEPDIR)/starpu_fxt_tool-starpu_fxt_tool.Po \ + ./$(DEPDIR)/starpu_lp2paje.Po \ + ./$(DEPDIR)/starpu_machine_display.Po \ + ./$(DEPDIR)/starpu_perfmodel_display.Po \ + ./$(DEPDIR)/starpu_perfmodel_plot-starpu_perfmodel_plot.Po \ + ./$(DEPDIR)/starpu_perfmodel_recdump.Po \ + ./$(DEPDIR)/starpu_replay.Po \ + ./$(DEPDIR)/starpu_replay_sched.Po \ + ./$(DEPDIR)/starpu_sched_display.Po \ + ./$(DEPDIR)/starpu_tasks_rec_complete.Po +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = loader.c starpu_calibrate_bus.c starpu_fxt_data_trace.c \ + starpu_fxt_stats.c starpu_fxt_tool.c starpu_lp2paje.c \ + starpu_machine_display.c starpu_perfmodel_display.c \ + starpu_perfmodel_plot.c starpu_perfmodel_recdump.c \ + $(starpu_replay_SOURCES) starpu_sched_display.c \ + starpu_tasks_rec_complete.c +DIST_SOURCES = loader.c starpu_calibrate_bus.c starpu_fxt_data_trace.c \ + starpu_fxt_stats.c starpu_fxt_tool.c starpu_lp2paje.c \ + starpu_machine_display.c starpu_perfmodel_display.c \ + starpu_perfmodel_plot.c starpu_perfmodel_recdump.c \ + $(am__starpu_replay_SOURCES_DIST) starpu_sched_display.c \ + starpu_tasks_rec_complete.c +RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \ + ctags-recursive dvi-recursive html-recursive info-recursive \ + install-data-recursive install-dvi-recursive \ + install-exec-recursive install-html-recursive \ + install-info-recursive install-pdf-recursive \ + install-ps-recursive install-recursive installcheck-recursive \ + installdirs-recursive pdf-recursive ps-recursive \ + tags-recursive uninstall-recursive +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +man1dir = $(mandir)/man1 +NROFF = nroff +MANS = $(dist_man1_MANS) +am__dist_pkgdata_DATA_DIST = gdbinit ayudame.cfg starpu_smpi.xslt \ + starpu_config.cfg +DATA = $(dist_pkgdata_DATA) \ + $(dist_pkgdata_perfmodels_sampling_bus_DATA) \ + $(dist_pkgdata_perfmodels_sampling_codelets_DATA) \ + $(nobase_STARPU_MSVC__DATA) +RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ + distclean-recursive maintainer-clean-recursive +am__recursive_targets = \ + $(RECURSIVE_TARGETS) \ + $(RECURSIVE_CLEAN_TARGETS) \ + $(am__extra_recursive_targets) +AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \ + check recheck distdir distdir-am +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +am__tty_colors_dummy = \ + mgn= red= grn= lgn= blu= brg= std=; \ + am__color_tests=no +am__tty_colors = { \ + $(am__tty_colors_dummy); \ + if test "X$(AM_COLOR_TESTS)" = Xno; then \ + am__color_tests=no; \ + elif test "X$(AM_COLOR_TESTS)" = Xalways; then \ + am__color_tests=yes; \ + elif test "X$$TERM" != Xdumb && { test -t 1; } 2>/dev/null; then \ + am__color_tests=yes; \ + fi; \ + if test $$am__color_tests = yes; then \ + red=''; \ + grn=''; \ + lgn=''; \ + blu=''; \ + mgn=''; \ + brg=''; \ + std=''; \ + fi; \ +} +am__recheck_rx = ^[ ]*:recheck:[ ]* +am__global_test_result_rx = ^[ ]*:global-test-result:[ ]* +am__copy_in_global_log_rx = ^[ ]*:copy-in-global-log:[ ]* +# A command that, given a newline-separated list of test names on the +# standard input, print the name of the tests that are to be re-run +# upon "make recheck". +am__list_recheck_tests = $(AWK) '{ \ + recheck = 1; \ + while ((rc = (getline line < ($$0 ".trs"))) != 0) \ + { \ + if (rc < 0) \ + { \ + if ((getline line2 < ($$0 ".log")) < 0) \ + recheck = 0; \ + break; \ + } \ + else if (line ~ /$(am__recheck_rx)[nN][Oo]/) \ + { \ + recheck = 0; \ + break; \ + } \ + else if (line ~ /$(am__recheck_rx)[yY][eE][sS]/) \ + { \ + break; \ + } \ + }; \ + if (recheck) \ + print $$0; \ + close ($$0 ".trs"); \ + close ($$0 ".log"); \ +}' +# A command that, given a newline-separated list of test names on the +# standard input, create the global log from their .trs and .log files. +am__create_global_log = $(AWK) ' \ +function fatal(msg) \ +{ \ + print "fatal: making $@: " msg | "cat >&2"; \ + exit 1; \ +} \ +function rst_section(header) \ +{ \ + print header; \ + len = length(header); \ + for (i = 1; i <= len; i = i + 1) \ + printf "="; \ + printf "\n\n"; \ +} \ +{ \ + copy_in_global_log = 1; \ + global_test_result = "RUN"; \ + while ((rc = (getline line < ($$0 ".trs"))) != 0) \ + { \ + if (rc < 0) \ + fatal("failed to read from " $$0 ".trs"); \ + if (line ~ /$(am__global_test_result_rx)/) \ + { \ + sub("$(am__global_test_result_rx)", "", line); \ + sub("[ ]*$$", "", line); \ + global_test_result = line; \ + } \ + else if (line ~ /$(am__copy_in_global_log_rx)[nN][oO]/) \ + copy_in_global_log = 0; \ + }; \ + if (copy_in_global_log) \ + { \ + rst_section(global_test_result ": " $$0); \ + while ((rc = (getline line < ($$0 ".log"))) != 0) \ + { \ + if (rc < 0) \ + fatal("failed to read from " $$0 ".log"); \ + print line; \ + }; \ + printf "\n"; \ + }; \ + close ($$0 ".trs"); \ + close ($$0 ".log"); \ +}' +# Restructured Text title. +am__rst_title = { sed 's/.*/ & /;h;s/./=/g;p;x;s/ *$$//;p;g' && echo; } +# Solaris 10 'make', and several other traditional 'make' implementations, +# pass "-e" to $(SHELL), and POSIX 2008 even requires this. Work around it +# by disabling -e (using the XSI extension "set +e") if it's set. +am__sh_e_setup = case $$- in *e*) set +e;; esac +# Default flags passed to test drivers. +am__common_driver_flags = \ + --color-tests "$$am__color_tests" \ + --enable-hard-errors "$$am__enable_hard_errors" \ + --expect-failure "$$am__expect_failure" +# To be inserted before the command running the test. Creates the +# directory for the log if needed. Stores in $dir the directory +# containing $f, in $tst the test, in $log the log. Executes the +# developer- defined test setup AM_TESTS_ENVIRONMENT (if any), and +# passes TESTS_ENVIRONMENT. Set up options for the wrapper that +# will run the test scripts (or their associated LOG_COMPILER, if +# thy have one). +am__check_pre = \ +$(am__sh_e_setup); \ +$(am__vpath_adj_setup) $(am__vpath_adj) \ +$(am__tty_colors); \ +srcdir=$(srcdir); export srcdir; \ +case "$@" in \ + */*) am__odir=`echo "./$@" | sed 's|/[^/]*$$||'`;; \ + *) am__odir=.;; \ +esac; \ +test "x$$am__odir" = x"." || test -d "$$am__odir" \ + || $(MKDIR_P) "$$am__odir" || exit $$?; \ +if test -f "./$$f"; then dir=./; \ +elif test -f "$$f"; then dir=; \ +else dir="$(srcdir)/"; fi; \ +tst=$$dir$$f; log='$@'; \ +if test -n '$(DISABLE_HARD_ERRORS)'; then \ + am__enable_hard_errors=no; \ +else \ + am__enable_hard_errors=yes; \ +fi; \ +case " $(XFAIL_TESTS) " in \ + *[\ \ ]$$f[\ \ ]* | *[\ \ ]$$dir$$f[\ \ ]*) \ + am__expect_failure=yes;; \ + *) \ + am__expect_failure=no;; \ +esac; \ +$(AM_TESTS_ENVIRONMENT) $(TESTS_ENVIRONMENT) +# A shell command to get the names of the tests scripts with any registered +# extension removed (i.e., equivalently, the names of the test logs, with +# the '.log' extension removed). The result is saved in the shell variable +# '$bases'. This honors runtime overriding of TESTS and TEST_LOGS. Sadly, +# we cannot use something simpler, involving e.g., "$(TEST_LOGS:.log=)", +# since that might cause problem with VPATH rewrites for suffix-less tests. +# See also 'test-harness-vpath-rewrite.sh' and 'test-trs-basic.sh'. +am__set_TESTS_bases = \ + bases='$(TEST_LOGS)'; \ + bases=`for i in $$bases; do echo $$i; done | sed 's/\.log$$//'`; \ + bases=`echo $$bases` +AM_TESTSUITE_SUMMARY_HEADER = ' for $(PACKAGE_STRING)' +RECHECK_LOGS = $(TEST_LOGS) +TEST_SUITE_LOG = test-suite.log +TEST_EXTENSIONS = @EXEEXT@ .test +LOG_DRIVER = $(SHELL) $(top_srcdir)/build-aux/test-driver +LOG_COMPILE = $(LOG_COMPILER) $(AM_LOG_FLAGS) $(LOG_FLAGS) +am__set_b = \ + case '$@' in \ + */*) \ + case '$*' in \ + */*) b='$*';; \ + *) b=`echo '$@' | sed 's/\.log$$//'`; \ + esac;; \ + *) \ + b='$*';; \ + esac +am__test_logs1 = $(TESTS:=.log) +am__test_logs2 = $(am__test_logs1:@EXEEXT@.log=.log) +TEST_LOGS = $(am__test_logs2:.test.log=.log) +TEST_LOG_DRIVER = $(SHELL) $(top_srcdir)/build-aux/test-driver +TEST_LOG_COMPILE = $(TEST_LOG_COMPILER) $(AM_TEST_LOG_FLAGS) \ + $(TEST_LOG_FLAGS) +DIST_SUBDIRS = $(SUBDIRS) +am__DIST_COMMON = $(dist_man1_MANS) $(srcdir)/Makefile.in \ + $(srcdir)/starpu_codelet_histo_profile.in \ + $(srcdir)/starpu_codelet_profile.in $(srcdir)/starpu_config.in \ + $(srcdir)/starpu_env.in \ + $(srcdir)/starpu_fxt_number_events_to_names.py.in \ + $(srcdir)/starpu_mlr_analysis.in \ + $(srcdir)/starpu_mpi_comm_matrix.py.in \ + $(srcdir)/starpu_paje_draw_histogram.in \ + $(srcdir)/starpu_paje_sort.in \ + $(srcdir)/starpu_paje_state_stats.in \ + $(srcdir)/starpu_paje_summary.in $(srcdir)/starpu_smpirun.in \ + $(srcdir)/starpu_tcpipexec.in \ + $(srcdir)/starpu_workers_activity.in \ + $(top_srcdir)/build-aux/depcomp \ + $(top_srcdir)/build-aux/test-driver \ + $(top_srcdir)/make/starpu-loader.mk \ + $(top_srcdir)/make/starpu-tests.mk \ + $(top_srcdir)/make/starpu.mk +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +am__relativize = \ + dir0=`pwd`; \ + sed_first='s,^\([^/]*\)/.*$$,\1,'; \ + sed_rest='s,^[^/]*/*,,'; \ + sed_last='s,^.*/\([^/]*\)$$,\1,'; \ + sed_butlast='s,/*[^/]*$$,,'; \ + while test -n "$$dir1"; do \ + first=`echo "$$dir1" | sed -e "$$sed_first"`; \ + if test "$$first" != "."; then \ + if test "$$first" = ".."; then \ + dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ + dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ + else \ + first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ + if test "$$first2" = "$$first"; then \ + dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ + else \ + dir2="../$$dir2"; \ + fi; \ + dir0="$$dir0"/"$$first"; \ + fi; \ + fi; \ + dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ + done; \ + reldir="$$dir2" +pkglibdir = @pkglibdir@ +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +APP_CFLAGS = @APP_CFLAGS@ +APP_CXXFLAGS = @APP_CXXFLAGS@ +APP_FCFLAGS = @APP_FCFLAGS@ +APP_FFLAGS = @APP_FFLAGS@ +AR = @AR@ +AS = @AS@ +ATLASDIR = @ATLASDIR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BLAS_LIB = @BLAS_LIB@ +BLAS_LIBS = @BLAS_LIBS@ +BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ +BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CC_OR_MPICC = @CC_OR_MPICC@ +CC_OR_NVCC = @CC_OR_NVCC@ +CFLAGS = @CFLAGS@ +COVERAGE = @COVERAGE@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CSCOPE = @CSCOPE@ +CTAGS = @CTAGS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DGELS_LIBS = @DGELS_LIBS@ +DLB_CFLAGS = @DLB_CFLAGS@ +DLB_LIBS = @DLB_LIBS@ +DLLTOOL = @DLLTOOL@ +DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +ECLIPSE = @ECLIPSE@ +EGREP = @EGREP@ +ETAGS = @ETAGS@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FC = @FC@ +FCFLAGS = @FCFLAGS@ +FFLAGS = @FFLAGS@ +FFTWF_CFLAGS = @FFTWF_CFLAGS@ +FFTWF_LIBS = @FFTWF_LIBS@ +FFTWL_CFLAGS = @FFTWL_CFLAGS@ +FFTWL_LIBS = @FFTWL_LIBS@ +FFTW_CFLAGS = @FFTW_CFLAGS@ +FFTW_LIBS = @FFTW_LIBS@ +FGREP = @FGREP@ +FILECMD = @FILECMD@ +FXTDIR = @FXTDIR@ +FXT_CFLAGS = @FXT_CFLAGS@ +FXT_LDFLAGS = @FXT_LDFLAGS@ +FXT_LIBS = @FXT_LIBS@ +GDB = @GDB@ +GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ +GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ +GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ +GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ +GOTODIR = @GOTODIR@ +GREP = @GREP@ +HAVE_CXX11 = @HAVE_CXX11@ +HAVE_FFTWFL = @HAVE_FFTWFL@ +HELP2MAN = @HELP2MAN@ +HIPCC = @HIPCC@ +HIPCCFLAGS = @HIPCCFLAGS@ $(am__append_2) +HIPCONFIG = @HIPCONFIG@ +HWLOC_CFLAGS = @HWLOC_CFLAGS@ +HWLOC_LIBS = @HWLOC_LIBS@ +HWLOC_REQUIRES = @HWLOC_REQUIRES@ +ICC = @ICC@ +ICC_ARGS = @ICC_ARGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JULIA = @JULIA@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ $(top_builddir)/src/@LIBSTARPU_LINK@ \ + $(STARPU_EXPORTED_LIBS) +LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ +LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ +LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ +LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ +LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ +LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ +LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ +LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ +LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ +LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ +LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ +LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ +LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ +LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ +LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ +LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ +LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ +LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ +LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ +LIBSTARPU_LINK = @LIBSTARPU_LINK@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ +LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAGMA_CFLAGS = @MAGMA_CFLAGS@ +MAGMA_LIBS = @MAGMA_LIBS@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPICC_LDFLAGS = @MPICC_LDFLAGS@ +MPICXX = @MPICXX@ +MPIEXEC = @MPIEXEC@ +MPIEXEC_ARGS = @MPIEXEC_ARGS@ +MPIFORT = @MPIFORT@ +MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ +MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ +NM = @NM@ +NMAD_CFLAGS = @NMAD_CFLAGS@ +NMAD_LIBS = @NMAD_LIBS@ +NMEDIT = @NMEDIT@ +NVCC = @NVCC@ +NVCCFLAGS = @NVCCFLAGS@ $(am__append_1) +NVCC_CC = @NVCC_CC@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ +OPENBLAS_LIBS = @OPENBLAS_LIBS@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PAPI_CFLAGS = @PAPI_CFLAGS@ +PAPI_LIBS = @PAPI_LIBS@ +PARALLEL = @PARALLEL@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PKG_CONFIG = @PKG_CONFIG@ +POTI_CFLAGS = @POTI_CFLAGS@ +POTI_LIBS = @POTI_LIBS@ +PROG_CLANG = @PROG_CLANG@ +PROG_DATE = @PROG_DATE@ +PROG_FIND = @PROG_FIND@ +PROG_STAT = @PROG_STAT@ +PYTHON = @PYTHON@ +PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ +PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ +PYTHON_VERSION = @PYTHON_VERSION@ +RANLIB = @RANLIB@ +REALBASH = @REALBASH@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ +SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ +SIMGRID_LIBS = @SIMGRID_LIBS@ +SIMGRID_MC = @SIMGRID_MC@ +SLIC_CONFIG = @SLIC_CONFIG@ +SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ +SOCL_VENDORS = @SOCL_VENDORS@ +STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ +STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ +STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ +STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ +STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ +STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ +STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ +STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ +STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ +STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ +STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ +STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ +STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ +STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ +STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ +STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ +STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ +STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ +STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ +STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ +STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ +STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ +STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ +STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ +STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ +STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ +STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ +STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ +STARPU_LIB_PATH = @STARPU_LIB_PATH@ +STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ +STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ +STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ +STARPU_MS_LIB = @STARPU_MS_LIB@ +STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ +STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ +STARPU_OPENBLAS = @STARPU_OPENBLAS@ +STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ +STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ +STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ +STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ +STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ +STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ +STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ +STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ +STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ +STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ +STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ +STARPU_SRC_DIR = @STARPU_SRC_DIR@ +STARPU_USE_CPU = @STARPU_USE_CPU@ +STARPU_USE_CUDA = @STARPU_USE_CUDA@ +STARPU_USE_FXT = @STARPU_USE_FXT@ +STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ +STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ +STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ +STRIP = @STRIP@ +VERSION = @VERSION@ +XMKMF = @XMKMF@ +X_CFLAGS = @X_CFLAGS@ +X_EXTRA_LIBS = @X_EXTRA_LIBS@ +X_LIBS = @X_LIBS@ +X_PRE_LIBS = @X_PRE_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_ct_F77 = @ac_ct_F77@ +ac_ct_FC = @ac_ct_FC@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +doxygencommand = @doxygencommand@ +dvidir = @dvidir@ +eclipsepath = @eclipsepath@ +epstopdfcommand = @epstopdfcommand@ +exec_prefix = @exec_prefix@ +gitcommand = @gitcommand@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +hwloccalccommand = @hwloccalccommand@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +juliapath = @juliapath@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +mpicc_path = @mpicc_path@ +mpicxx_path = @mpicxx_path@ +mpiexec_path = @mpiexec_path@ +mpifort_path = @mpifort_path@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +pdflatexcommand = @pdflatexcommand@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +runstatedir = @runstatedir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target = @target@ +target_alias = @target_alias@ +target_cpu = @target_cpu@ +target_os = @target_os@ +target_vendor = @target_vendor@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +LAUNCHER_ENV = $(am__append_4) $(am__append_6) +LAUNCHER = $(am__append_3) $(am__append_5) +AM_CFLAGS = $(GLOBAL_AM_CFLAGS) +AM_CXXFLAGS = $(GLOBAL_AM_CXXFLAGS) +AM_FFLAGS = $(GLOBAL_AM_FFLAGS) +AM_FCFLAGS = $(GLOBAL_AM_FCFLAGS) +@STARPU_USE_CUDA_TRUE@V_nvcc_ = $(V_nvcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_USE_CUDA_TRUE@V_nvcc_0 = @echo " NVCC " $@; +@STARPU_USE_CUDA_TRUE@V_nvcc_1 = +@STARPU_USE_CUDA_TRUE@V_nvcc = $(V_nvcc_$(V)) + +# Avoid using nvcc when making a coverity build, nvcc produces millions of +# lines of code which we don't want to analyze. Instead, build dumb .o files +# containing empty functions. +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_ = $(V_mynvcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_0 = @echo " myNVCC " $@; +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_1 = +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc = $(V_mynvcc_$(V)) +@STARPU_USE_HIP_TRUE@V_hipcc_ = $(V_hipcc_$(AM_DEFAULT_VERBOSITY)) +@STARPU_USE_HIP_TRUE@V_hipcc_0 = @echo " HIPCC " $@; +@STARPU_USE_HIP_TRUE@V_hipcc_1 = +@STARPU_USE_HIP_TRUE@V_hipcc = $(V_hipcc_$(V)) +V_icc_ = $(V_icc_$(AM_DEFAULT_VERBOSITY)) +V_icc_0 = @echo " ICC " $@; +V_icc_1 = +V_icc = $(V_icc_$(V)) +V_ln_ = $(V_ln_$(AM_DEFAULT_VERBOSITY)) +V_ln_0 = @echo " LN " $@; +V_ln_1 = +V_ln = $(V_ln_$(V)) +V_help2man_ = $(V_help2man_$(AM_DEFAULT_VERBOSITY)) +V_help2man_0 = @echo " HELP2MAN" $@; +V_help2man_1 = +V_help2man = $(V_help2man_$(V)) +# These are always defined, both for starpu-mpi and for mpi-ms +# For MPI tests we don't want to oversubscribe the system +MPI_RUN_ENV = STARPU_WORKERS_GETBIND=0 STARPU_WORKERS_NOBIND=1 STARPU_NCPU=3 +@STARPU_SIMGRID_FALSE@STARPU_MPIEXEC = $(MPIEXEC) $(MPIEXEC_ARGS) -np $(STARPU_MPI_NP) +@STARPU_SIMGRID_TRUE@STARPU_MPIEXEC = $(abs_top_builddir)/tools/starpu_smpirun -np $(STARPU_MPI_NP) -platform $(abs_top_srcdir)/tools/perfmodels/cluster.xml -hostfile $(abs_top_srcdir)/tools/perfmodels/hostfile + +# When GNU parallel is available and -j is passed to make, run tests through +# parallel, using a "starpu" semaphore. +# Also make test shell scripts run its tests through parallel, using a +# "substarpu" semaphore. This brings some overload, but only one level. +@HAVE_PARALLEL_TRUE@STARPU_SUB_PARALLEL = $(shell echo $(MAKEFLAGS) | sed -ne 's/.*-j\([0-9]\+\).*/parallel --semaphore --id substarpu --fg --fg-exit -j \1/p') +@STARPU_USE_MPI_MASTER_SLAVE_TRUE@MS_LAUNCHER = $(STARPU_MPIEXEC) +@STARPU_USE_TCPIP_MASTER_SLAVE_TRUE@MS_LAUNCHER = $(abs_top_builddir)/tools/starpu_tcpipexec -np 2 -nobind -ncpus 1 +@STARPU_HAVE_WINDOWS_FALSE@LOADER_BIN = $(LAUNCHER) $(LOADER) $(EXTERNAL) +@STARPU_HAVE_WINDOWS_TRUE@LOADER_BIN = $(LAUNCHER) $(EXTERNAL) +@STARPU_HAVE_WINDOWS_FALSE@loader_CPPFLAGS = $(AM_CPPFLAGS) -I$(top_builddir)/src/ +@STARPU_HAVE_AM111_FALSE@TESTS_ENVIRONMENT = $(LAUNCHER_ENV) top_builddir="$(abs_top_builddir)" top_srcdir="$(abs_top_srcdir)" $(LOADER_BIN) +@STARPU_HAVE_AM111_TRUE@TESTS_ENVIRONMENT = $(LAUNCHER_ENV) top_builddir="$(abs_top_builddir)" top_srcdir="$(abs_top_srcdir)" +@STARPU_HAVE_AM111_TRUE@LOG_COMPILER = $(LOADER_BIN) +AM_TESTS_FD_REDIRECT = 9>&2 + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# Copyright (C) 2013-2013 Thibaut Lambert +# Copyright (C) 2013-2013 Joris Pablo +# Copyright (C) 2017-2017 Erwan Leria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +SUBDIRS = +AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_srcdir)/tools/ -I$(top_srcdir)/mpi/ -I$(top_builddir)/src -I$(top_srcdir)/src $(STARPU_H_CPPFLAGS) +AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@ +dist_bin_SCRIPTS = starpu_workers_activity \ + starpu_codelet_histo_profile starpu_codelet_profile starpu_env \ + starpu_config starpu_mpi_comm_matrix.py \ + starpu_fxt_number_events_to_names.py \ + starpu_paje_draw_histogram starpu_paje_draw_histogram.R \ + starpu_paje_summary starpu_paje_summary.Rmd \ + starpu_mlr_analysis starpu_mlr_analysis.Rmd \ + starpu_paje_state_stats starpu_paje_state_stats.R \ + starpu_send_recv_data_use.py starpu_trace_state_stats.py \ + $(am__append_13) $(am__append_18) $(am__append_19) +dist_pkgdata_DATA = gdbinit $(am__append_14) $(am__append_17) \ + starpu_config.cfg +pkgdata_perfmodels_sampling_busdir = $(datarootdir)/starpu/perfmodels/sampling/bus +pkgdata_perfmodels_sampling_codeletsdir = $(datarootdir)/starpu/perfmodels/sampling/codelets/45 +dist_pkgdata_perfmodels_sampling_bus_DATA = \ + perfmodels/sampling/bus/attila.affinity \ + perfmodels/sampling/bus/attila.bandwidth \ + perfmodels/sampling/bus/attila.config \ + perfmodels/sampling/bus/attila.latency \ + perfmodels/sampling/bus/attila.platform.xml \ + perfmodels/sampling/bus/attila.platform.v4.xml \ + perfmodels/sampling/bus/hannibal.affinity \ + perfmodels/sampling/bus/hannibal.bandwidth \ + perfmodels/sampling/bus/hannibal.config \ + perfmodels/sampling/bus/hannibal.latency \ + perfmodels/sampling/bus/hannibal.platform.xml \ + perfmodels/sampling/bus/hannibal.platform.v4.xml \ + perfmodels/sampling/bus/hannibal-pitch.affinity \ + perfmodels/sampling/bus/hannibal-pitch.bandwidth \ + perfmodels/sampling/bus/hannibal-pitch.config \ + perfmodels/sampling/bus/hannibal-pitch.latency \ + perfmodels/sampling/bus/hannibal-pitch.platform.xml \ + perfmodels/sampling/bus/hannibal-pitch.platform.v4.xml \ + perfmodels/sampling/bus/idgraf.affinity \ + perfmodels/sampling/bus/idgraf.bandwidth \ + perfmodels/sampling/bus/idgraf.config \ + perfmodels/sampling/bus/idgraf.latency \ + perfmodels/sampling/bus/idgraf.platform.xml \ + perfmodels/sampling/bus/idgraf.platform.v4.xml \ + perfmodels/sampling/bus/mirage.affinity \ + perfmodels/sampling/bus/mirage.bandwidth \ + perfmodels/sampling/bus/mirage.config \ + perfmodels/sampling/bus/mirage.latency \ + perfmodels/sampling/bus/mirage.platform.xml \ + perfmodels/sampling/bus/mirage.platform.v4.xml \ + perfmodels/sampling/bus/sirocco.affinity \ + perfmodels/sampling/bus/sirocco.bandwidth \ + perfmodels/sampling/bus/sirocco.config \ + perfmodels/sampling/bus/sirocco.latency \ + perfmodels/sampling/bus/sirocco.platform.xml \ + perfmodels/sampling/bus/sirocco.platform.v4.xml + +dist_pkgdata_perfmodels_sampling_codelets_DATA = \ + perfmodels/sampling/codelets/45/chol_model_potrf.attila \ + perfmodels/sampling/codelets/45/chol_model_trsm.attila \ + perfmodels/sampling/codelets/45/chol_model_syrk.attila \ + perfmodels/sampling/codelets/45/chol_model_gemm.attila \ + perfmodels/sampling/codelets/45/cl_update.attila \ + perfmodels/sampling/codelets/45/save_cl_bottom.attila \ + perfmodels/sampling/codelets/45/save_cl_top.attila \ + perfmodels/sampling/codelets/45/starpu_sgemm_gemm.attila \ + perfmodels/sampling/codelets/45/starpu_dgemm_gemm.attila \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf.attila \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll.attila \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru.attila \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm.attila \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf.attila \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll.attila \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru.attila \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm.attila \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_atlas.attila \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_atlas.attila \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_atlas.attila \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_atlas.attila \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_atlas.attila \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_atlas.attila \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_atlas.attila \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_atlas.attila \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_goto.attila \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_goto.attila \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_goto.attila \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_goto.attila \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_goto.attila \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_goto.attila \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_goto.attila \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_goto.attila \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_openblas.attila \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_openblas.attila \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_openblas.attila \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_openblas.attila \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_openblas.attila \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_openblas.attila \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_openblas.attila \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_openblas.attila \ + perfmodels/sampling/codelets/45/overlap_sleep_1024_24.attila \ +\ + perfmodels/sampling/codelets/45/chol_model_potrf.hannibal \ + perfmodels/sampling/codelets/45/chol_model_trsm.hannibal \ + perfmodels/sampling/codelets/45/chol_model_syrk.hannibal \ + perfmodels/sampling/codelets/45/chol_model_gemm.hannibal \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf.hannibal \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll.hannibal \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru.hannibal \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm.hannibal \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_atlas.hannibal \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_atlas.hannibal \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_atlas.hannibal \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_atlas.hannibal \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_goto.hannibal \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_goto.hannibal \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_goto.hannibal \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_goto.hannibal \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_openblas.hannibal \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_openblas.hannibal \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_openblas.hannibal \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_openblas.hannibal \ +\ + perfmodels/sampling/codelets/45/chol_model_potrf.hannibal-pitch \ + perfmodels/sampling/codelets/45/chol_model_trsm.hannibal-pitch \ + perfmodels/sampling/codelets/45/chol_model_syrk.hannibal-pitch \ + perfmodels/sampling/codelets/45/chol_model_gemm.hannibal-pitch \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf.hannibal-pitch \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll.hannibal-pitch \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru.hannibal-pitch \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm.hannibal-pitch \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_atlas.hannibal-pitch \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_atlas.hannibal-pitch \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_atlas.hannibal-pitch \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_atlas.hannibal-pitch \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_goto.hannibal-pitch \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_goto.hannibal-pitch \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_goto.hannibal-pitch \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_goto.hannibal-pitch \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_openblas.hannibal-pitch \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_openblas.hannibal-pitch \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_openblas.hannibal-pitch \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_openblas.hannibal-pitch \ +\ + perfmodels/sampling/codelets/45/chol_model_potrf.idgraf \ + perfmodels/sampling/codelets/45/chol_model_trsm.idgraf \ + perfmodels/sampling/codelets/45/chol_model_syrk.idgraf \ + perfmodels/sampling/codelets/45/chol_model_gemm.idgraf \ + perfmodels/sampling/codelets/45/cl_update.idgraf \ + perfmodels/sampling/codelets/45/save_cl_bottom.idgraf \ + perfmodels/sampling/codelets/45/save_cl_top.idgraf \ + perfmodels/sampling/codelets/45/starpu_sgemm_gemm.idgraf \ + perfmodels/sampling/codelets/45/starpu_dgemm_gemm.idgraf \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf.idgraf \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll.idgraf \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru.idgraf \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm.idgraf \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf.idgraf \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll.idgraf \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru.idgraf \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm.idgraf \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_atlas.idgraf \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_atlas.idgraf \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_atlas.idgraf \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_atlas.idgraf \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_atlas.idgraf \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_atlas.idgraf \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_atlas.idgraf \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_atlas.idgraf \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_goto.idgraf \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_goto.idgraf \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_goto.idgraf \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_goto.idgraf \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_goto.idgraf \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_goto.idgraf \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_goto.idgraf \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_goto.idgraf \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_openblas.idgraf \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_openblas.idgraf \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_openblas.idgraf \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_openblas.idgraf \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_openblas.idgraf \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_openblas.idgraf \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_openblas.idgraf \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_openblas.idgraf \ +\ + perfmodels/sampling/codelets/45/chol_model_potrf.mirage \ + perfmodels/sampling/codelets/45/chol_model_trsm.mirage \ + perfmodels/sampling/codelets/45/chol_model_syrk.mirage \ + perfmodels/sampling/codelets/45/chol_model_gemm.mirage \ + perfmodels/sampling/codelets/45/cl_update.mirage \ + perfmodels/sampling/codelets/45/save_cl_bottom.mirage \ + perfmodels/sampling/codelets/45/save_cl_top.mirage \ + perfmodels/sampling/codelets/45/starpu_sgemm_gemm.mirage \ + perfmodels/sampling/codelets/45/starpu_dgemm_gemm.mirage \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf.mirage \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll.mirage \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru.mirage \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm.mirage \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf.mirage \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll.mirage \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru.mirage \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm.mirage \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_atlas.mirage \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_atlas.mirage \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_atlas.mirage \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_atlas.mirage \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_atlas.mirage \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_atlas.mirage \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_atlas.mirage \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_atlas.mirage \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_goto.mirage \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_goto.mirage \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_goto.mirage \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_goto.mirage \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_goto.mirage \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_goto.mirage \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_goto.mirage \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_goto.mirage \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_openblas.mirage \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_openblas.mirage \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_openblas.mirage \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_openblas.mirage \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_openblas.mirage \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_openblas.mirage \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_openblas.mirage \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_openblas.mirage \ + perfmodels/sampling/codelets/45/overlap_sleep_1024_24.mirage \ + perfmodels/sampling/codelets/45/add_scal.mirage \ + perfmodels/sampling/codelets/45/func.mirage \ + perfmodels/sampling/codelets/45/log_arr.mirage \ + perfmodels/sampling/codelets/45/log_list.mirage \ + perfmodels/sampling/codelets/45/multi.mirage \ + perfmodels/sampling/codelets/45/multi_2arr.mirage \ + perfmodels/sampling/codelets/45/multi_list.mirage \ + perfmodels/sampling/codelets/45/scal.mirage \ + perfmodels/sampling/codelets/45/scal_arr.mirage \ + perfmodels/sampling/codelets/45/sqrt.mirage \ +\ + perfmodels/sampling/codelets/45/chol_model_potrf.sirocco \ + perfmodels/sampling/codelets/45/chol_model_trsm.sirocco \ + perfmodels/sampling/codelets/45/chol_model_syrk.sirocco \ + perfmodels/sampling/codelets/45/chol_model_gemm.sirocco \ + perfmodels/sampling/codelets/45/cl_update.sirocco \ + perfmodels/sampling/codelets/45/save_cl_bottom.sirocco \ + perfmodels/sampling/codelets/45/save_cl_top.sirocco \ + perfmodels/sampling/codelets/45/starpu_sgemm_gemm.sirocco \ + perfmodels/sampling/codelets/45/starpu_dgemm_gemm.sirocco \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf.sirocco \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll.sirocco \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru.sirocco \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm.sirocco \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf.sirocco \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll.sirocco \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru.sirocco \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm.sirocco \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_atlas.sirocco \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_atlas.sirocco \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_atlas.sirocco \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_atlas.sirocco \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_atlas.sirocco \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_atlas.sirocco \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_atlas.sirocco \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_atlas.sirocco \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_goto.sirocco \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_goto.sirocco \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_goto.sirocco \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_goto.sirocco \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_goto.sirocco \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_goto.sirocco \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_goto.sirocco \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_goto.sirocco \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_openblas.sirocco \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_openblas.sirocco \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_openblas.sirocco \ + perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_openblas.sirocco \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_openblas.sirocco \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_openblas.sirocco \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_openblas.sirocco \ + perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_openblas.sirocco \ + perfmodels/sampling/codelets/45/overlap_sleep_1024_24.sirocco \ +\ + perfmodels/sampling/codelets/45/null.idgraf \ + perfmodels/sampling/codelets/45/null.sirocco + +EXTRA_DIST = \ + dev/checker/rename.sed \ + dev/checker/rename.sh \ + dev/cppcheck/suppressions.txt \ + dev/valgrind/blas.suppr \ + dev/valgrind/nvidia.suppr \ + dev/valgrind/bash.suppr \ + dev/valgrind/fxt.suppr \ + dev/valgrind/glpk.suppr \ + dev/valgrind/hdf5.suppr \ + dev/valgrind/hwloc.suppr \ + dev/valgrind/libc.suppr \ + dev/valgrind/libgomp.suppr \ + dev/valgrind/libnuma.suppr \ + dev/valgrind/madmpi.suppr \ + dev/valgrind/opencl.suppr \ + dev/valgrind/openmpi.suppr \ + dev/valgrind/openmp.suppr \ + dev/valgrind/p11-kit.suppr \ + dev/valgrind/padico.suppr \ + dev/valgrind/papi.suppr \ + dev/valgrind/pthread.suppr \ + dev/valgrind/starpu.suppr \ + dev/valgrind/starpu_pw.suppr \ + dev/valgrind/starpupy.suppr \ + dev/valgrind/valgrind.suppr \ + dev/valgrind/valgrind.sh \ + dev/valgrind/valgrind_xml.sh \ + dev/valgrind/helgrind.sh \ + dev/tsan/starpu.suppr \ + dev/lsan/suppressions \ + perfmodels/README \ + perfmodels/cluster.xml \ + perfmodels/hostfile \ + perfmodels/sampling/codelets/tmp/mlr_init.out \ + msvc/starpu_clean.bat \ + msvc/starpu_open.bat \ + msvc/starpu_exec.bat \ + msvc/starpu_var.bat \ + msvc/starpu.sln \ + msvc/starpu/starpu.vcxproj \ + release/Makefile \ + release/README.md \ + patch-ayudame \ + perfs/bench_sgemm.sh \ + perfs/error_model.gp \ + perfs/error_model.sh \ + distrib/distrib.r \ + distrib/distrib.sh \ + starpu_msexec + +CLEANFILES = *.gcno *.gcda *.linkinfo starpu_idle_microsec.log figure/* mlr_* + +##################################### +# What to install and what to check # +##################################### +STARPU_TOOLS = $(am__append_9) $(am__append_11) starpu_machine_display \ + starpu_sched_display $(am__append_12) +@STARPU_USE_FXT_TRUE@starpu_fxt_tool_CPPFLAGS = $(AM_CPPFLAGS) $(FXT_CFLAGS) +@STARPU_USE_FXT_TRUE@starpu_fxt_tool_LDADD = $(FXT_LIBS) +@STARPU_USE_FXT_TRUE@starpu_fxt_tool_LDFLAGS = $(FXT_LDFLAGS) +@STARPU_USE_FXT_TRUE@starpu_fxt_stats_CPPFLAGS = $(AM_CPPFLAGS) $(FXT_CFLAGS) +@STARPU_USE_FXT_TRUE@starpu_fxt_stats_LDADD = $(FXT_LIBS) +@STARPU_USE_FXT_TRUE@starpu_fxt_stats_LDFLAGS = $(FXT_LDFLAGS) +@STARPU_USE_FXT_TRUE@starpu_fxt_data_trace_CPPFLAGS = $(AM_CPPFLAGS) $(FXT_CFLAGS) +@STARPU_USE_FXT_TRUE@starpu_fxt_data_trace_LDADD = $(FXT_LIBS) +@STARPU_USE_FXT_TRUE@starpu_fxt_data_trace_LDFLAGS = $(FXT_LDFLAGS) +@STARPU_SIMGRID_TRUE@starpu_replay_SOURCES = \ +@STARPU_SIMGRID_TRUE@ starpu_replay.c \ +@STARPU_SIMGRID_TRUE@ starpu_replay_sched.c + +starpu_perfmodel_plot_CPPFLAGS = $(AM_CPPFLAGS) $(FXT_CFLAGS) +@STARPU_HAVE_WINDOWS_TRUE@STARPU_MSVC_dir = $(bindir) +@STARPU_HAVE_WINDOWS_TRUE@nobase_STARPU_MSVC__DATA = \ +@STARPU_HAVE_WINDOWS_TRUE@ msvc/starpu_clean.bat \ +@STARPU_HAVE_WINDOWS_TRUE@ msvc/starpu_open.bat \ +@STARPU_HAVE_WINDOWS_TRUE@ msvc/starpu_exec.bat \ +@STARPU_HAVE_WINDOWS_TRUE@ msvc/starpu_var.bat \ +@STARPU_HAVE_WINDOWS_TRUE@ msvc/starpu.sln \ +@STARPU_HAVE_WINDOWS_TRUE@ msvc/starpu/starpu.vcxproj + +@STARPU_HAVE_HELP2MAN_TRUE@dist_man1_MANS = starpu_calibrate_bus.1 \ +@STARPU_HAVE_HELP2MAN_TRUE@ starpu_machine_display.1 \ +@STARPU_HAVE_HELP2MAN_TRUE@ starpu_perfmodel_display.1 \ +@STARPU_HAVE_HELP2MAN_TRUE@ starpu_perfmodel_plot.1 \ +@STARPU_HAVE_HELP2MAN_TRUE@ starpu_tasks_rec_complete.1 \ +@STARPU_HAVE_HELP2MAN_TRUE@ starpu_lp2paje.1 \ +@STARPU_HAVE_HELP2MAN_TRUE@ starpu_workers_activity.1 \ +@STARPU_HAVE_HELP2MAN_TRUE@ starpu_codelet_profile.1 \ +@STARPU_HAVE_HELP2MAN_TRUE@ starpu_codelet_histo_profile.1 \ +@STARPU_HAVE_HELP2MAN_TRUE@ starpu_env.1 \ +@STARPU_HAVE_HELP2MAN_TRUE@ starpu_mpi_comm_matrix.1 \ +@STARPU_HAVE_HELP2MAN_TRUE@ starpu_fxt_number_events_to_names.1 \ +@STARPU_HAVE_HELP2MAN_TRUE@ starpu_paje_draw_histogram.1 \ +@STARPU_HAVE_HELP2MAN_TRUE@ starpu_paje_state_stats.1 \ +@STARPU_HAVE_HELP2MAN_TRUE@ starpu_config.1 $(am__append_15) \ +@STARPU_HAVE_HELP2MAN_TRUE@ $(am__append_16) +all: all-recursive + +.SUFFIXES: +.SUFFIXES: .c .cu .cubin .hip .lo .log .o .obj .test .test$(EXEEXT) .trs +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/make/starpu-tests.mk $(top_srcdir)/make/starpu.mk $(top_srcdir)/make/starpu-loader.mk $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign tools/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign tools/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; +$(top_srcdir)/make/starpu-tests.mk $(top_srcdir)/make/starpu.mk $(top_srcdir)/make/starpu-loader.mk $(am__empty): + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): +starpu_env: $(top_builddir)/config.status $(srcdir)/starpu_env.in + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ +starpu_codelet_profile: $(top_builddir)/config.status $(srcdir)/starpu_codelet_profile.in + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ +starpu_codelet_histo_profile: $(top_builddir)/config.status $(srcdir)/starpu_codelet_histo_profile.in + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ +starpu_mpi_comm_matrix.py: $(top_builddir)/config.status $(srcdir)/starpu_mpi_comm_matrix.py.in + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ +starpu_fxt_number_events_to_names.py: $(top_builddir)/config.status $(srcdir)/starpu_fxt_number_events_to_names.py.in + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ +starpu_workers_activity: $(top_builddir)/config.status $(srcdir)/starpu_workers_activity.in + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ +starpu_paje_draw_histogram: $(top_builddir)/config.status $(srcdir)/starpu_paje_draw_histogram.in + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ +starpu_paje_state_stats: $(top_builddir)/config.status $(srcdir)/starpu_paje_state_stats.in + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ +starpu_paje_summary: $(top_builddir)/config.status $(srcdir)/starpu_paje_summary.in + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ +starpu_config: $(top_builddir)/config.status $(srcdir)/starpu_config.in + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ +starpu_mlr_analysis: $(top_builddir)/config.status $(srcdir)/starpu_mlr_analysis.in + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ +starpu_paje_sort: $(top_builddir)/config.status $(srcdir)/starpu_paje_sort.in + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ +starpu_smpirun: $(top_builddir)/config.status $(srcdir)/starpu_smpirun.in + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ +starpu_tcpipexec: $(top_builddir)/config.status $(srcdir)/starpu_tcpipexec.in + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ +install-binPROGRAMS: $(bin_PROGRAMS) + @$(NORMAL_INSTALL) + @list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(bindir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(bindir)" || exit 1; \ + fi; \ + for p in $$list; do echo "$$p $$p"; done | \ + sed 's/$(EXEEXT)$$//' | \ + while read p p1; do if test -f $$p \ + || test -f $$p1 \ + ; then echo "$$p"; echo "$$p"; else :; fi; \ + done | \ + sed -e 'p;s,.*/,,;n;h' \ + -e 's|.*|.|' \ + -e 'p;x;s,.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/' | \ + sed 'N;N;N;s,\n, ,g' | \ + $(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1 } \ + { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \ + if ($$2 == $$4) files[d] = files[d] " " $$1; \ + else { print "f", $$3 "/" $$4, $$1; } } \ + END { for (d in files) print "f", d, files[d] }' | \ + while read type dir files; do \ + if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \ + test -z "$$files" || { \ + echo " $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files '$(DESTDIR)$(bindir)$$dir'"; \ + $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files "$(DESTDIR)$(bindir)$$dir" || exit $$?; \ + } \ + ; done + +uninstall-binPROGRAMS: + @$(NORMAL_UNINSTALL) + @list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \ + files=`for p in $$list; do echo "$$p"; done | \ + sed -e 'h;s,^.*/,,;s/$(EXEEXT)$$//;$(transform)' \ + -e 's/$$/$(EXEEXT)/' \ + `; \ + test -n "$$list" || exit 0; \ + echo " ( cd '$(DESTDIR)$(bindir)' && rm -f" $$files ")"; \ + cd "$(DESTDIR)$(bindir)" && rm -f $$files + +clean-binPROGRAMS: + @list='$(bin_PROGRAMS)'; test -n "$$list" || exit 0; \ + echo " rm -f" $$list; \ + rm -f $$list || exit $$?; \ + test -n "$(EXEEXT)" || exit 0; \ + list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ + echo " rm -f" $$list; \ + rm -f $$list + +clean-checkPROGRAMS: + @list='$(check_PROGRAMS)'; test -n "$$list" || exit 0; \ + echo " rm -f" $$list; \ + rm -f $$list || exit $$?; \ + test -n "$(EXEEXT)" || exit 0; \ + list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ + echo " rm -f" $$list; \ + rm -f $$list + +clean-noinstPROGRAMS: + @list='$(noinst_PROGRAMS)'; test -n "$$list" || exit 0; \ + echo " rm -f" $$list; \ + rm -f $$list || exit $$?; \ + test -n "$(EXEEXT)" || exit 0; \ + list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ + echo " rm -f" $$list; \ + rm -f $$list + +loader$(EXEEXT): $(loader_OBJECTS) $(loader_DEPENDENCIES) $(EXTRA_loader_DEPENDENCIES) + @rm -f loader$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(loader_OBJECTS) $(loader_LDADD) $(LIBS) + +starpu_calibrate_bus$(EXEEXT): $(starpu_calibrate_bus_OBJECTS) $(starpu_calibrate_bus_DEPENDENCIES) $(EXTRA_starpu_calibrate_bus_DEPENDENCIES) + @rm -f starpu_calibrate_bus$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(starpu_calibrate_bus_OBJECTS) $(starpu_calibrate_bus_LDADD) $(LIBS) + +starpu_fxt_data_trace$(EXEEXT): $(starpu_fxt_data_trace_OBJECTS) $(starpu_fxt_data_trace_DEPENDENCIES) $(EXTRA_starpu_fxt_data_trace_DEPENDENCIES) + @rm -f starpu_fxt_data_trace$(EXEEXT) + $(AM_V_CCLD)$(starpu_fxt_data_trace_LINK) $(starpu_fxt_data_trace_OBJECTS) $(starpu_fxt_data_trace_LDADD) $(LIBS) + +starpu_fxt_stats$(EXEEXT): $(starpu_fxt_stats_OBJECTS) $(starpu_fxt_stats_DEPENDENCIES) $(EXTRA_starpu_fxt_stats_DEPENDENCIES) + @rm -f starpu_fxt_stats$(EXEEXT) + $(AM_V_CCLD)$(starpu_fxt_stats_LINK) $(starpu_fxt_stats_OBJECTS) $(starpu_fxt_stats_LDADD) $(LIBS) + +starpu_fxt_tool$(EXEEXT): $(starpu_fxt_tool_OBJECTS) $(starpu_fxt_tool_DEPENDENCIES) $(EXTRA_starpu_fxt_tool_DEPENDENCIES) + @rm -f starpu_fxt_tool$(EXEEXT) + $(AM_V_CCLD)$(starpu_fxt_tool_LINK) $(starpu_fxt_tool_OBJECTS) $(starpu_fxt_tool_LDADD) $(LIBS) + +starpu_lp2paje$(EXEEXT): $(starpu_lp2paje_OBJECTS) $(starpu_lp2paje_DEPENDENCIES) $(EXTRA_starpu_lp2paje_DEPENDENCIES) + @rm -f starpu_lp2paje$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(starpu_lp2paje_OBJECTS) $(starpu_lp2paje_LDADD) $(LIBS) + +starpu_machine_display$(EXEEXT): $(starpu_machine_display_OBJECTS) $(starpu_machine_display_DEPENDENCIES) $(EXTRA_starpu_machine_display_DEPENDENCIES) + @rm -f starpu_machine_display$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(starpu_machine_display_OBJECTS) $(starpu_machine_display_LDADD) $(LIBS) + +starpu_perfmodel_display$(EXEEXT): $(starpu_perfmodel_display_OBJECTS) $(starpu_perfmodel_display_DEPENDENCIES) $(EXTRA_starpu_perfmodel_display_DEPENDENCIES) + @rm -f starpu_perfmodel_display$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(starpu_perfmodel_display_OBJECTS) $(starpu_perfmodel_display_LDADD) $(LIBS) + +starpu_perfmodel_plot$(EXEEXT): $(starpu_perfmodel_plot_OBJECTS) $(starpu_perfmodel_plot_DEPENDENCIES) $(EXTRA_starpu_perfmodel_plot_DEPENDENCIES) + @rm -f starpu_perfmodel_plot$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(starpu_perfmodel_plot_OBJECTS) $(starpu_perfmodel_plot_LDADD) $(LIBS) + +starpu_perfmodel_recdump$(EXEEXT): $(starpu_perfmodel_recdump_OBJECTS) $(starpu_perfmodel_recdump_DEPENDENCIES) $(EXTRA_starpu_perfmodel_recdump_DEPENDENCIES) + @rm -f starpu_perfmodel_recdump$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(starpu_perfmodel_recdump_OBJECTS) $(starpu_perfmodel_recdump_LDADD) $(LIBS) + +starpu_replay$(EXEEXT): $(starpu_replay_OBJECTS) $(starpu_replay_DEPENDENCIES) $(EXTRA_starpu_replay_DEPENDENCIES) + @rm -f starpu_replay$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(starpu_replay_OBJECTS) $(starpu_replay_LDADD) $(LIBS) + +starpu_sched_display$(EXEEXT): $(starpu_sched_display_OBJECTS) $(starpu_sched_display_DEPENDENCIES) $(EXTRA_starpu_sched_display_DEPENDENCIES) + @rm -f starpu_sched_display$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(starpu_sched_display_OBJECTS) $(starpu_sched_display_LDADD) $(LIBS) + +starpu_tasks_rec_complete$(EXEEXT): $(starpu_tasks_rec_complete_OBJECTS) $(starpu_tasks_rec_complete_DEPENDENCIES) $(EXTRA_starpu_tasks_rec_complete_DEPENDENCIES) + @rm -f starpu_tasks_rec_complete$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(starpu_tasks_rec_complete_OBJECTS) $(starpu_tasks_rec_complete_LDADD) $(LIBS) +install-dist_binSCRIPTS: $(dist_bin_SCRIPTS) + @$(NORMAL_INSTALL) + @list='$(dist_bin_SCRIPTS)'; test -n "$(bindir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(bindir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(bindir)" || exit 1; \ + fi; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + if test -f "$$d$$p"; then echo "$$d$$p"; echo "$$p"; else :; fi; \ + done | \ + sed -e 'p;s,.*/,,;n' \ + -e 'h;s|.*|.|' \ + -e 'p;x;s,.*/,,;$(transform)' | sed 'N;N;N;s,\n, ,g' | \ + $(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1; } \ + { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \ + if ($$2 == $$4) { files[d] = files[d] " " $$1; \ + if (++n[d] == $(am__install_max)) { \ + print "f", d, files[d]; n[d] = 0; files[d] = "" } } \ + else { print "f", d "/" $$4, $$1 } } \ + END { for (d in files) print "f", d, files[d] }' | \ + while read type dir files; do \ + if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \ + test -z "$$files" || { \ + echo " $(INSTALL_SCRIPT) $$files '$(DESTDIR)$(bindir)$$dir'"; \ + $(INSTALL_SCRIPT) $$files "$(DESTDIR)$(bindir)$$dir" || exit $$?; \ + } \ + ; done + +uninstall-dist_binSCRIPTS: + @$(NORMAL_UNINSTALL) + @list='$(dist_bin_SCRIPTS)'; test -n "$(bindir)" || exit 0; \ + files=`for p in $$list; do echo "$$p"; done | \ + sed -e 's,.*/,,;$(transform)'`; \ + dir='$(DESTDIR)$(bindir)'; $(am__uninstall_files_from_dir) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/loader-loader.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/starpu_calibrate_bus.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/starpu_fxt_data_trace-starpu_fxt_data_trace.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/starpu_fxt_stats-starpu_fxt_stats.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/starpu_fxt_tool-starpu_fxt_tool.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/starpu_lp2paje.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/starpu_machine_display.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/starpu_perfmodel_display.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/starpu_perfmodel_plot-starpu_perfmodel_plot.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/starpu_perfmodel_recdump.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/starpu_replay.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/starpu_replay_sched.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/starpu_sched_display.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/starpu_tasks_rec_complete.Po@am__quote@ # am--include-marker + +$(am__depfiles_remade): + @$(MKDIR_P) $(@D) + @echo '# dummy' >$@-t && $(am__mv) $@-t $@ + +am--depfiles: $(am__depfiles_remade) + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ +@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +loader-loader.o: loader.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(loader_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT loader-loader.o -MD -MP -MF $(DEPDIR)/loader-loader.Tpo -c -o loader-loader.o `test -f 'loader.c' || echo '$(srcdir)/'`loader.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/loader-loader.Tpo $(DEPDIR)/loader-loader.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='loader.c' object='loader-loader.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(loader_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o loader-loader.o `test -f 'loader.c' || echo '$(srcdir)/'`loader.c + +loader-loader.obj: loader.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(loader_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT loader-loader.obj -MD -MP -MF $(DEPDIR)/loader-loader.Tpo -c -o loader-loader.obj `if test -f 'loader.c'; then $(CYGPATH_W) 'loader.c'; else $(CYGPATH_W) '$(srcdir)/loader.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/loader-loader.Tpo $(DEPDIR)/loader-loader.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='loader.c' object='loader-loader.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(loader_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o loader-loader.obj `if test -f 'loader.c'; then $(CYGPATH_W) 'loader.c'; else $(CYGPATH_W) '$(srcdir)/loader.c'; fi` + +starpu_fxt_data_trace-starpu_fxt_data_trace.o: starpu_fxt_data_trace.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(starpu_fxt_data_trace_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT starpu_fxt_data_trace-starpu_fxt_data_trace.o -MD -MP -MF $(DEPDIR)/starpu_fxt_data_trace-starpu_fxt_data_trace.Tpo -c -o starpu_fxt_data_trace-starpu_fxt_data_trace.o `test -f 'starpu_fxt_data_trace.c' || echo '$(srcdir)/'`starpu_fxt_data_trace.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/starpu_fxt_data_trace-starpu_fxt_data_trace.Tpo $(DEPDIR)/starpu_fxt_data_trace-starpu_fxt_data_trace.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='starpu_fxt_data_trace.c' object='starpu_fxt_data_trace-starpu_fxt_data_trace.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(starpu_fxt_data_trace_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o starpu_fxt_data_trace-starpu_fxt_data_trace.o `test -f 'starpu_fxt_data_trace.c' || echo '$(srcdir)/'`starpu_fxt_data_trace.c + +starpu_fxt_data_trace-starpu_fxt_data_trace.obj: starpu_fxt_data_trace.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(starpu_fxt_data_trace_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT starpu_fxt_data_trace-starpu_fxt_data_trace.obj -MD -MP -MF $(DEPDIR)/starpu_fxt_data_trace-starpu_fxt_data_trace.Tpo -c -o starpu_fxt_data_trace-starpu_fxt_data_trace.obj `if test -f 'starpu_fxt_data_trace.c'; then $(CYGPATH_W) 'starpu_fxt_data_trace.c'; else $(CYGPATH_W) '$(srcdir)/starpu_fxt_data_trace.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/starpu_fxt_data_trace-starpu_fxt_data_trace.Tpo $(DEPDIR)/starpu_fxt_data_trace-starpu_fxt_data_trace.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='starpu_fxt_data_trace.c' object='starpu_fxt_data_trace-starpu_fxt_data_trace.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(starpu_fxt_data_trace_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o starpu_fxt_data_trace-starpu_fxt_data_trace.obj `if test -f 'starpu_fxt_data_trace.c'; then $(CYGPATH_W) 'starpu_fxt_data_trace.c'; else $(CYGPATH_W) '$(srcdir)/starpu_fxt_data_trace.c'; fi` + +starpu_fxt_stats-starpu_fxt_stats.o: starpu_fxt_stats.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(starpu_fxt_stats_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT starpu_fxt_stats-starpu_fxt_stats.o -MD -MP -MF $(DEPDIR)/starpu_fxt_stats-starpu_fxt_stats.Tpo -c -o starpu_fxt_stats-starpu_fxt_stats.o `test -f 'starpu_fxt_stats.c' || echo '$(srcdir)/'`starpu_fxt_stats.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/starpu_fxt_stats-starpu_fxt_stats.Tpo $(DEPDIR)/starpu_fxt_stats-starpu_fxt_stats.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='starpu_fxt_stats.c' object='starpu_fxt_stats-starpu_fxt_stats.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(starpu_fxt_stats_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o starpu_fxt_stats-starpu_fxt_stats.o `test -f 'starpu_fxt_stats.c' || echo '$(srcdir)/'`starpu_fxt_stats.c + +starpu_fxt_stats-starpu_fxt_stats.obj: starpu_fxt_stats.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(starpu_fxt_stats_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT starpu_fxt_stats-starpu_fxt_stats.obj -MD -MP -MF $(DEPDIR)/starpu_fxt_stats-starpu_fxt_stats.Tpo -c -o starpu_fxt_stats-starpu_fxt_stats.obj `if test -f 'starpu_fxt_stats.c'; then $(CYGPATH_W) 'starpu_fxt_stats.c'; else $(CYGPATH_W) '$(srcdir)/starpu_fxt_stats.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/starpu_fxt_stats-starpu_fxt_stats.Tpo $(DEPDIR)/starpu_fxt_stats-starpu_fxt_stats.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='starpu_fxt_stats.c' object='starpu_fxt_stats-starpu_fxt_stats.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(starpu_fxt_stats_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o starpu_fxt_stats-starpu_fxt_stats.obj `if test -f 'starpu_fxt_stats.c'; then $(CYGPATH_W) 'starpu_fxt_stats.c'; else $(CYGPATH_W) '$(srcdir)/starpu_fxt_stats.c'; fi` + +starpu_fxt_tool-starpu_fxt_tool.o: starpu_fxt_tool.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(starpu_fxt_tool_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT starpu_fxt_tool-starpu_fxt_tool.o -MD -MP -MF $(DEPDIR)/starpu_fxt_tool-starpu_fxt_tool.Tpo -c -o starpu_fxt_tool-starpu_fxt_tool.o `test -f 'starpu_fxt_tool.c' || echo '$(srcdir)/'`starpu_fxt_tool.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/starpu_fxt_tool-starpu_fxt_tool.Tpo $(DEPDIR)/starpu_fxt_tool-starpu_fxt_tool.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='starpu_fxt_tool.c' object='starpu_fxt_tool-starpu_fxt_tool.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(starpu_fxt_tool_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o starpu_fxt_tool-starpu_fxt_tool.o `test -f 'starpu_fxt_tool.c' || echo '$(srcdir)/'`starpu_fxt_tool.c + +starpu_fxt_tool-starpu_fxt_tool.obj: starpu_fxt_tool.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(starpu_fxt_tool_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT starpu_fxt_tool-starpu_fxt_tool.obj -MD -MP -MF $(DEPDIR)/starpu_fxt_tool-starpu_fxt_tool.Tpo -c -o starpu_fxt_tool-starpu_fxt_tool.obj `if test -f 'starpu_fxt_tool.c'; then $(CYGPATH_W) 'starpu_fxt_tool.c'; else $(CYGPATH_W) '$(srcdir)/starpu_fxt_tool.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/starpu_fxt_tool-starpu_fxt_tool.Tpo $(DEPDIR)/starpu_fxt_tool-starpu_fxt_tool.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='starpu_fxt_tool.c' object='starpu_fxt_tool-starpu_fxt_tool.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(starpu_fxt_tool_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o starpu_fxt_tool-starpu_fxt_tool.obj `if test -f 'starpu_fxt_tool.c'; then $(CYGPATH_W) 'starpu_fxt_tool.c'; else $(CYGPATH_W) '$(srcdir)/starpu_fxt_tool.c'; fi` + +starpu_perfmodel_plot-starpu_perfmodel_plot.o: starpu_perfmodel_plot.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(starpu_perfmodel_plot_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT starpu_perfmodel_plot-starpu_perfmodel_plot.o -MD -MP -MF $(DEPDIR)/starpu_perfmodel_plot-starpu_perfmodel_plot.Tpo -c -o starpu_perfmodel_plot-starpu_perfmodel_plot.o `test -f 'starpu_perfmodel_plot.c' || echo '$(srcdir)/'`starpu_perfmodel_plot.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/starpu_perfmodel_plot-starpu_perfmodel_plot.Tpo $(DEPDIR)/starpu_perfmodel_plot-starpu_perfmodel_plot.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='starpu_perfmodel_plot.c' object='starpu_perfmodel_plot-starpu_perfmodel_plot.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(starpu_perfmodel_plot_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o starpu_perfmodel_plot-starpu_perfmodel_plot.o `test -f 'starpu_perfmodel_plot.c' || echo '$(srcdir)/'`starpu_perfmodel_plot.c + +starpu_perfmodel_plot-starpu_perfmodel_plot.obj: starpu_perfmodel_plot.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(starpu_perfmodel_plot_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT starpu_perfmodel_plot-starpu_perfmodel_plot.obj -MD -MP -MF $(DEPDIR)/starpu_perfmodel_plot-starpu_perfmodel_plot.Tpo -c -o starpu_perfmodel_plot-starpu_perfmodel_plot.obj `if test -f 'starpu_perfmodel_plot.c'; then $(CYGPATH_W) 'starpu_perfmodel_plot.c'; else $(CYGPATH_W) '$(srcdir)/starpu_perfmodel_plot.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/starpu_perfmodel_plot-starpu_perfmodel_plot.Tpo $(DEPDIR)/starpu_perfmodel_plot-starpu_perfmodel_plot.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='starpu_perfmodel_plot.c' object='starpu_perfmodel_plot-starpu_perfmodel_plot.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(starpu_perfmodel_plot_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o starpu_perfmodel_plot-starpu_perfmodel_plot.obj `if test -f 'starpu_perfmodel_plot.c'; then $(CYGPATH_W) 'starpu_perfmodel_plot.c'; else $(CYGPATH_W) '$(srcdir)/starpu_perfmodel_plot.c'; fi` + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs +install-man1: $(dist_man1_MANS) + @$(NORMAL_INSTALL) + @list1='$(dist_man1_MANS)'; \ + list2=''; \ + test -n "$(man1dir)" \ + && test -n "`echo $$list1$$list2`" \ + || exit 0; \ + echo " $(MKDIR_P) '$(DESTDIR)$(man1dir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(man1dir)" || exit 1; \ + { for i in $$list1; do echo "$$i"; done; \ + if test -n "$$list2"; then \ + for i in $$list2; do echo "$$i"; done \ + | sed -n '/\.1[a-z]*$$/p'; \ + fi; \ + } | while read p; do \ + if test -f $$p; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; echo "$$p"; \ + done | \ + sed -e 'n;s,.*/,,;p;h;s,.*\.,,;s,^[^1][0-9a-z]*$$,1,;x' \ + -e 's,\.[0-9a-z]*$$,,;$(transform);G;s,\n,.,' | \ + sed 'N;N;s,\n, ,g' | { \ + list=; while read file base inst; do \ + if test "$$base" = "$$inst"; then list="$$list $$file"; else \ + echo " $(INSTALL_DATA) '$$file' '$(DESTDIR)$(man1dir)/$$inst'"; \ + $(INSTALL_DATA) "$$file" "$(DESTDIR)$(man1dir)/$$inst" || exit $$?; \ + fi; \ + done; \ + for i in $$list; do echo "$$i"; done | $(am__base_list) | \ + while read files; do \ + test -z "$$files" || { \ + echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(man1dir)'"; \ + $(INSTALL_DATA) $$files "$(DESTDIR)$(man1dir)" || exit $$?; }; \ + done; } + +uninstall-man1: + @$(NORMAL_UNINSTALL) + @list='$(dist_man1_MANS)'; test -n "$(man1dir)" || exit 0; \ + files=`{ for i in $$list; do echo "$$i"; done; \ + } | sed -e 's,.*/,,;h;s,.*\.,,;s,^[^1][0-9a-z]*$$,1,;x' \ + -e 's,\.[0-9a-z]*$$,,;$(transform);G;s,\n,.,'`; \ + dir='$(DESTDIR)$(man1dir)'; $(am__uninstall_files_from_dir) +install-dist_pkgdataDATA: $(dist_pkgdata_DATA) + @$(NORMAL_INSTALL) + @list='$(dist_pkgdata_DATA)'; test -n "$(pkgdatadir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(pkgdatadir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(pkgdatadir)" || exit 1; \ + fi; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; \ + done | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(pkgdatadir)'"; \ + $(INSTALL_DATA) $$files "$(DESTDIR)$(pkgdatadir)" || exit $$?; \ + done + +uninstall-dist_pkgdataDATA: + @$(NORMAL_UNINSTALL) + @list='$(dist_pkgdata_DATA)'; test -n "$(pkgdatadir)" || list=; \ + files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ + dir='$(DESTDIR)$(pkgdatadir)'; $(am__uninstall_files_from_dir) +install-dist_pkgdata_perfmodels_sampling_busDATA: $(dist_pkgdata_perfmodels_sampling_bus_DATA) + @$(NORMAL_INSTALL) + @list='$(dist_pkgdata_perfmodels_sampling_bus_DATA)'; test -n "$(pkgdata_perfmodels_sampling_busdir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(pkgdata_perfmodels_sampling_busdir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(pkgdata_perfmodels_sampling_busdir)" || exit 1; \ + fi; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; \ + done | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(pkgdata_perfmodels_sampling_busdir)'"; \ + $(INSTALL_DATA) $$files "$(DESTDIR)$(pkgdata_perfmodels_sampling_busdir)" || exit $$?; \ + done + +uninstall-dist_pkgdata_perfmodels_sampling_busDATA: + @$(NORMAL_UNINSTALL) + @list='$(dist_pkgdata_perfmodels_sampling_bus_DATA)'; test -n "$(pkgdata_perfmodels_sampling_busdir)" || list=; \ + files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ + dir='$(DESTDIR)$(pkgdata_perfmodels_sampling_busdir)'; $(am__uninstall_files_from_dir) +install-dist_pkgdata_perfmodels_sampling_codeletsDATA: $(dist_pkgdata_perfmodels_sampling_codelets_DATA) + @$(NORMAL_INSTALL) + @list='$(dist_pkgdata_perfmodels_sampling_codelets_DATA)'; test -n "$(pkgdata_perfmodels_sampling_codeletsdir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(pkgdata_perfmodels_sampling_codeletsdir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(pkgdata_perfmodels_sampling_codeletsdir)" || exit 1; \ + fi; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; \ + done | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(pkgdata_perfmodels_sampling_codeletsdir)'"; \ + $(INSTALL_DATA) $$files "$(DESTDIR)$(pkgdata_perfmodels_sampling_codeletsdir)" || exit $$?; \ + done + +uninstall-dist_pkgdata_perfmodels_sampling_codeletsDATA: + @$(NORMAL_UNINSTALL) + @list='$(dist_pkgdata_perfmodels_sampling_codelets_DATA)'; test -n "$(pkgdata_perfmodels_sampling_codeletsdir)" || list=; \ + files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ + dir='$(DESTDIR)$(pkgdata_perfmodels_sampling_codeletsdir)'; $(am__uninstall_files_from_dir) +install-nobase_STARPU_MSVC_DATA: $(nobase_STARPU_MSVC__DATA) + @$(NORMAL_INSTALL) + @list='$(nobase_STARPU_MSVC__DATA)'; test -n "$(STARPU_MSVC_dir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(STARPU_MSVC_dir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(STARPU_MSVC_dir)" || exit 1; \ + fi; \ + $(am__nobase_list) | while read dir files; do \ + xfiles=; for file in $$files; do \ + if test -f "$$file"; then xfiles="$$xfiles $$file"; \ + else xfiles="$$xfiles $(srcdir)/$$file"; fi; done; \ + test -z "$$xfiles" || { \ + test "x$$dir" = x. || { \ + echo " $(MKDIR_P) '$(DESTDIR)$(STARPU_MSVC_dir)/$$dir'"; \ + $(MKDIR_P) "$(DESTDIR)$(STARPU_MSVC_dir)/$$dir"; }; \ + echo " $(INSTALL_DATA) $$xfiles '$(DESTDIR)$(STARPU_MSVC_dir)/$$dir'"; \ + $(INSTALL_DATA) $$xfiles "$(DESTDIR)$(STARPU_MSVC_dir)/$$dir" || exit $$?; }; \ + done + +uninstall-nobase_STARPU_MSVC_DATA: + @$(NORMAL_UNINSTALL) + @list='$(nobase_STARPU_MSVC__DATA)'; test -n "$(STARPU_MSVC_dir)" || list=; \ + $(am__nobase_strip_setup); files=`$(am__nobase_strip)`; \ + dir='$(DESTDIR)$(STARPU_MSVC_dir)'; $(am__uninstall_files_from_dir) + +# This directory's subdirectories are mostly independent; you can cd +# into them and run 'make' without going through this Makefile. +# To change the values of 'make' variables: instead of editing Makefiles, +# (1) if the variable is set in 'config.status', edit 'config.status' +# (which will cause the Makefiles to be regenerated when you run 'make'); +# (2) otherwise, pass the desired values on the 'make' command line. +$(am__recursive_targets): + @fail=; \ + if $(am__make_keepgoing); then \ + failcom='fail=yes'; \ + else \ + failcom='exit 1'; \ + fi; \ + dot_seen=no; \ + target=`echo $@ | sed s/-recursive//`; \ + case "$@" in \ + distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ + *) list='$(SUBDIRS)' ;; \ + esac; \ + for subdir in $$list; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + dot_seen=yes; \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || eval $$failcom; \ + done; \ + if test "$$dot_seen" = "no"; then \ + $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ + fi; test -z "$$fail" + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-recursive +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ + include_option=--etags-include; \ + empty_fix=.; \ + else \ + include_option=--include; \ + empty_fix=; \ + fi; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + test ! -f $$subdir/TAGS || \ + set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ + fi; \ + done; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-recursive + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-recursive + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +# Recover from deleted '.trs' file; this should ensure that +# "rm -f foo.log; make foo.trs" re-run 'foo.test', and re-create +# both 'foo.log' and 'foo.trs'. Break the recipe in two subshells +# to avoid problems with "make -n". +.log.trs: + rm -f $< $@ + $(MAKE) $(AM_MAKEFLAGS) $< + +# Leading 'am--fnord' is there to ensure the list of targets does not +# expand to empty, as could happen e.g. with make check TESTS=''. +am--fnord $(TEST_LOGS) $(TEST_LOGS:.log=.trs): $(am__force_recheck) +am--force-recheck: + @: + +$(TEST_SUITE_LOG): $(TEST_LOGS) + @$(am__set_TESTS_bases); \ + am__f_ok () { test -f "$$1" && test -r "$$1"; }; \ + redo_bases=`for i in $$bases; do \ + am__f_ok $$i.trs && am__f_ok $$i.log || echo $$i; \ + done`; \ + if test -n "$$redo_bases"; then \ + redo_logs=`for i in $$redo_bases; do echo $$i.log; done`; \ + redo_results=`for i in $$redo_bases; do echo $$i.trs; done`; \ + if $(am__make_dryrun); then :; else \ + rm -f $$redo_logs && rm -f $$redo_results || exit 1; \ + fi; \ + fi; \ + if test -n "$$am__remaking_logs"; then \ + echo "fatal: making $(TEST_SUITE_LOG): possible infinite" \ + "recursion detected" >&2; \ + elif test -n "$$redo_logs"; then \ + am__remaking_logs=yes $(MAKE) $(AM_MAKEFLAGS) $$redo_logs; \ + fi; \ + if $(am__make_dryrun); then :; else \ + st=0; \ + errmsg="fatal: making $(TEST_SUITE_LOG): failed to create"; \ + for i in $$redo_bases; do \ + test -f $$i.trs && test -r $$i.trs \ + || { echo "$$errmsg $$i.trs" >&2; st=1; }; \ + test -f $$i.log && test -r $$i.log \ + || { echo "$$errmsg $$i.log" >&2; st=1; }; \ + done; \ + test $$st -eq 0 || exit 1; \ + fi + @$(am__sh_e_setup); $(am__tty_colors); $(am__set_TESTS_bases); \ + ws='[ ]'; \ + results=`for b in $$bases; do echo $$b.trs; done`; \ + test -n "$$results" || results=/dev/null; \ + all=` grep "^$$ws*:test-result:" $$results | wc -l`; \ + pass=` grep "^$$ws*:test-result:$$ws*PASS" $$results | wc -l`; \ + fail=` grep "^$$ws*:test-result:$$ws*FAIL" $$results | wc -l`; \ + skip=` grep "^$$ws*:test-result:$$ws*SKIP" $$results | wc -l`; \ + xfail=`grep "^$$ws*:test-result:$$ws*XFAIL" $$results | wc -l`; \ + xpass=`grep "^$$ws*:test-result:$$ws*XPASS" $$results | wc -l`; \ + error=`grep "^$$ws*:test-result:$$ws*ERROR" $$results | wc -l`; \ + if test `expr $$fail + $$xpass + $$error` -eq 0; then \ + success=true; \ + else \ + success=false; \ + fi; \ + br='==================='; br=$$br$$br$$br$$br; \ + result_count () \ + { \ + if test x"$$1" = x"--maybe-color"; then \ + maybe_colorize=yes; \ + elif test x"$$1" = x"--no-color"; then \ + maybe_colorize=no; \ + else \ + echo "$@: invalid 'result_count' usage" >&2; exit 4; \ + fi; \ + shift; \ + desc=$$1 count=$$2; \ + if test $$maybe_colorize = yes && test $$count -gt 0; then \ + color_start=$$3 color_end=$$std; \ + else \ + color_start= color_end=; \ + fi; \ + echo "$${color_start}# $$desc $$count$${color_end}"; \ + }; \ + create_testsuite_report () \ + { \ + result_count $$1 "TOTAL:" $$all "$$brg"; \ + result_count $$1 "PASS: " $$pass "$$grn"; \ + result_count $$1 "SKIP: " $$skip "$$blu"; \ + result_count $$1 "XFAIL:" $$xfail "$$lgn"; \ + result_count $$1 "FAIL: " $$fail "$$red"; \ + result_count $$1 "XPASS:" $$xpass "$$red"; \ + result_count $$1 "ERROR:" $$error "$$mgn"; \ + }; \ + { \ + echo "$(PACKAGE_STRING): $(subdir)/$(TEST_SUITE_LOG)" | \ + $(am__rst_title); \ + create_testsuite_report --no-color; \ + echo; \ + echo ".. contents:: :depth: 2"; \ + echo; \ + for b in $$bases; do echo $$b; done \ + | $(am__create_global_log); \ + } >$(TEST_SUITE_LOG).tmp || exit 1; \ + mv $(TEST_SUITE_LOG).tmp $(TEST_SUITE_LOG); \ + if $$success; then \ + col="$$grn"; \ + else \ + col="$$red"; \ + test x"$$VERBOSE" = x || cat $(TEST_SUITE_LOG); \ + fi; \ + echo "$${col}$$br$${std}"; \ + echo "$${col}Testsuite summary"$(AM_TESTSUITE_SUMMARY_HEADER)"$${std}"; \ + echo "$${col}$$br$${std}"; \ + create_testsuite_report --maybe-color; \ + echo "$$col$$br$$std"; \ + if $$success; then :; else \ + echo "$${col}See $(subdir)/$(TEST_SUITE_LOG)$${std}"; \ + if test -n "$(PACKAGE_BUGREPORT)"; then \ + echo "$${col}Please report to $(PACKAGE_BUGREPORT)$${std}"; \ + fi; \ + echo "$$col$$br$$std"; \ + fi; \ + $$success || exit 1 + +check-TESTS: $(check_PROGRAMS) + @list='$(RECHECK_LOGS)'; test -z "$$list" || rm -f $$list + @list='$(RECHECK_LOGS:.log=.trs)'; test -z "$$list" || rm -f $$list + @test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) + @set +e; $(am__set_TESTS_bases); \ + log_list=`for i in $$bases; do echo $$i.log; done`; \ + trs_list=`for i in $$bases; do echo $$i.trs; done`; \ + log_list=`echo $$log_list`; trs_list=`echo $$trs_list`; \ + $(MAKE) $(AM_MAKEFLAGS) $(TEST_SUITE_LOG) TEST_LOGS="$$log_list"; \ + exit $$?; +recheck: all $(check_PROGRAMS) + @test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) + @set +e; $(am__set_TESTS_bases); \ + bases=`for i in $$bases; do echo $$i; done \ + | $(am__list_recheck_tests)` || exit 1; \ + log_list=`for i in $$bases; do echo $$i.log; done`; \ + log_list=`echo $$log_list`; \ + $(MAKE) $(AM_MAKEFLAGS) $(TEST_SUITE_LOG) \ + am__force_recheck=am--force-recheck \ + TEST_LOGS="$$log_list"; \ + exit $$? +starpu_fxt_tool.log: starpu_fxt_tool$(EXEEXT) + @p='starpu_fxt_tool$(EXEEXT)'; \ + b='starpu_fxt_tool'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +starpu_fxt_stats.log: starpu_fxt_stats$(EXEEXT) + @p='starpu_fxt_stats$(EXEEXT)'; \ + b='starpu_fxt_stats'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +starpu_fxt_data_trace.log: starpu_fxt_data_trace$(EXEEXT) + @p='starpu_fxt_data_trace$(EXEEXT)'; \ + b='starpu_fxt_data_trace'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +starpu_calibrate_bus.log: starpu_calibrate_bus$(EXEEXT) + @p='starpu_calibrate_bus$(EXEEXT)'; \ + b='starpu_calibrate_bus'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +starpu_machine_display.log: starpu_machine_display$(EXEEXT) + @p='starpu_machine_display$(EXEEXT)'; \ + b='starpu_machine_display'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +starpu_sched_display.log: starpu_sched_display$(EXEEXT) + @p='starpu_sched_display$(EXEEXT)'; \ + b='starpu_sched_display'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +starpu_perfmodel_display.log: starpu_perfmodel_display$(EXEEXT) + @p='starpu_perfmodel_display$(EXEEXT)'; \ + b='starpu_perfmodel_display'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +starpu_perfmodel_plot.log: starpu_perfmodel_plot$(EXEEXT) + @p='starpu_perfmodel_plot$(EXEEXT)'; \ + b='starpu_perfmodel_plot'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +.test.log: + @p='$<'; \ + $(am__set_b); \ + $(am__check_pre) $(TEST_LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_TEST_LOG_DRIVER_FLAGS) $(TEST_LOG_DRIVER_FLAGS) -- $(TEST_LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +@am__EXEEXT_TRUE@.test$(EXEEXT).log: +@am__EXEEXT_TRUE@ @p='$<'; \ +@am__EXEEXT_TRUE@ $(am__set_b); \ +@am__EXEEXT_TRUE@ $(am__check_pre) $(TEST_LOG_DRIVER) --test-name "$$f" \ +@am__EXEEXT_TRUE@ --log-file $$b.log --trs-file $$b.trs \ +@am__EXEEXT_TRUE@ $(am__common_driver_flags) $(AM_TEST_LOG_DRIVER_FLAGS) $(TEST_LOG_DRIVER_FLAGS) -- $(TEST_LOG_COMPILE) \ +@am__EXEEXT_TRUE@ "$$tst" $(AM_TESTS_FD_REDIRECT) +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done + @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + $(am__make_dryrun) \ + || test -d "$(distdir)/$$subdir" \ + || $(MKDIR_P) "$(distdir)/$$subdir" \ + || exit 1; \ + dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ + $(am__relativize); \ + new_distdir=$$reldir; \ + dir1=$$subdir; dir2="$(top_distdir)"; \ + $(am__relativize); \ + new_top_distdir=$$reldir; \ + echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ + echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ + ($(am__cd) $$subdir && \ + $(MAKE) $(AM_MAKEFLAGS) \ + top_distdir="$$new_top_distdir" \ + distdir="$$new_distdir" \ + am__remove_distdir=: \ + am__skip_length_check=: \ + am__skip_mode_fix=: \ + distdir) \ + || exit 1; \ + fi; \ + done +check-am: all-am + $(MAKE) $(AM_MAKEFLAGS) $(check_PROGRAMS) + $(MAKE) $(AM_MAKEFLAGS) check-TESTS +check: check-recursive +all-am: Makefile $(PROGRAMS) $(SCRIPTS) $(MANS) $(DATA) +installdirs: installdirs-recursive +installdirs-am: + for dir in "$(DESTDIR)$(bindir)" "$(DESTDIR)$(bindir)" "$(DESTDIR)$(man1dir)" "$(DESTDIR)$(pkgdatadir)" "$(DESTDIR)$(pkgdata_perfmodels_sampling_busdir)" "$(DESTDIR)$(pkgdata_perfmodels_sampling_codeletsdir)" "$(DESTDIR)$(STARPU_MSVC_dir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-recursive +install-exec: install-exec-recursive +install-data: install-data-recursive +uninstall: uninstall-recursive + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-recursive +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + -test -z "$(TEST_LOGS)" || rm -f $(TEST_LOGS) + -test -z "$(TEST_LOGS:.log=.trs)" || rm -f $(TEST_LOGS:.log=.trs) + -test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) + +clean-generic: + -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +@STARPU_HAVE_HELP2MAN_FALSE@clean-local: +clean: clean-recursive + +clean-am: clean-binPROGRAMS clean-checkPROGRAMS clean-generic \ + clean-libtool clean-local clean-noinstPROGRAMS mostlyclean-am + +distclean: distclean-recursive + -rm -f ./$(DEPDIR)/loader-loader.Po + -rm -f ./$(DEPDIR)/starpu_calibrate_bus.Po + -rm -f ./$(DEPDIR)/starpu_fxt_data_trace-starpu_fxt_data_trace.Po + -rm -f ./$(DEPDIR)/starpu_fxt_stats-starpu_fxt_stats.Po + -rm -f ./$(DEPDIR)/starpu_fxt_tool-starpu_fxt_tool.Po + -rm -f ./$(DEPDIR)/starpu_lp2paje.Po + -rm -f ./$(DEPDIR)/starpu_machine_display.Po + -rm -f ./$(DEPDIR)/starpu_perfmodel_display.Po + -rm -f ./$(DEPDIR)/starpu_perfmodel_plot-starpu_perfmodel_plot.Po + -rm -f ./$(DEPDIR)/starpu_perfmodel_recdump.Po + -rm -f ./$(DEPDIR)/starpu_replay.Po + -rm -f ./$(DEPDIR)/starpu_replay_sched.Po + -rm -f ./$(DEPDIR)/starpu_sched_display.Po + -rm -f ./$(DEPDIR)/starpu_tasks_rec_complete.Po + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-recursive + +dvi-am: + +html: html-recursive + +html-am: + +info: info-recursive + +info-am: + +install-data-am: install-dist_pkgdataDATA \ + install-dist_pkgdata_perfmodels_sampling_busDATA \ + install-dist_pkgdata_perfmodels_sampling_codeletsDATA \ + install-man install-nobase_STARPU_MSVC_DATA + +install-dvi: install-dvi-recursive + +install-dvi-am: + +install-exec-am: install-binPROGRAMS install-dist_binSCRIPTS + +install-html: install-html-recursive + +install-html-am: + +install-info: install-info-recursive + +install-info-am: + +install-man: install-man1 + +install-pdf: install-pdf-recursive + +install-pdf-am: + +install-ps: install-ps-recursive + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-recursive + -rm -f ./$(DEPDIR)/loader-loader.Po + -rm -f ./$(DEPDIR)/starpu_calibrate_bus.Po + -rm -f ./$(DEPDIR)/starpu_fxt_data_trace-starpu_fxt_data_trace.Po + -rm -f ./$(DEPDIR)/starpu_fxt_stats-starpu_fxt_stats.Po + -rm -f ./$(DEPDIR)/starpu_fxt_tool-starpu_fxt_tool.Po + -rm -f ./$(DEPDIR)/starpu_lp2paje.Po + -rm -f ./$(DEPDIR)/starpu_machine_display.Po + -rm -f ./$(DEPDIR)/starpu_perfmodel_display.Po + -rm -f ./$(DEPDIR)/starpu_perfmodel_plot-starpu_perfmodel_plot.Po + -rm -f ./$(DEPDIR)/starpu_perfmodel_recdump.Po + -rm -f ./$(DEPDIR)/starpu_replay.Po + -rm -f ./$(DEPDIR)/starpu_replay_sched.Po + -rm -f ./$(DEPDIR)/starpu_sched_display.Po + -rm -f ./$(DEPDIR)/starpu_tasks_rec_complete.Po + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-recursive + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-recursive + +pdf-am: + +ps: ps-recursive + +ps-am: + +uninstall-am: uninstall-binPROGRAMS uninstall-dist_binSCRIPTS \ + uninstall-dist_pkgdataDATA \ + uninstall-dist_pkgdata_perfmodels_sampling_busDATA \ + uninstall-dist_pkgdata_perfmodels_sampling_codeletsDATA \ + uninstall-man uninstall-nobase_STARPU_MSVC_DATA + +uninstall-man: uninstall-man1 + +.MAKE: $(am__recursive_targets) check-am install-am install-strip + +.PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am \ + am--depfiles check check-TESTS check-am clean \ + clean-binPROGRAMS clean-checkPROGRAMS clean-generic \ + clean-libtool clean-local clean-noinstPROGRAMS cscopelist-am \ + ctags ctags-am distclean distclean-compile distclean-generic \ + distclean-libtool distclean-tags distdir dvi dvi-am html \ + html-am info info-am install install-am install-binPROGRAMS \ + install-data install-data-am install-dist_binSCRIPTS \ + install-dist_pkgdataDATA \ + install-dist_pkgdata_perfmodels_sampling_busDATA \ + install-dist_pkgdata_perfmodels_sampling_codeletsDATA \ + install-dvi install-dvi-am install-exec install-exec-am \ + install-html install-html-am install-info install-info-am \ + install-man install-man1 install-nobase_STARPU_MSVC_DATA \ + install-pdf install-pdf-am install-ps install-ps-am \ + install-strip installcheck installcheck-am installdirs \ + installdirs-am maintainer-clean maintainer-clean-generic \ + mostlyclean mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool pdf pdf-am ps ps-am recheck tags tags-am \ + uninstall uninstall-am uninstall-binPROGRAMS \ + uninstall-dist_binSCRIPTS uninstall-dist_pkgdataDATA \ + uninstall-dist_pkgdata_perfmodels_sampling_busDATA \ + uninstall-dist_pkgdata_perfmodels_sampling_codeletsDATA \ + uninstall-man uninstall-man1 uninstall-nobase_STARPU_MSVC_DATA + +.PRECIOUS: Makefile + +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@.cu.o: +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ @$(MKDIR_P) `dirname $@` +@STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ $(V_mynvcc)grep 'extern *"C" *void *' $< | sed -ne 's/extern *"C" *void *\([a-zA-Z0-9_]*\) *(.*/void \1(void) {}/p' | $(CC) -x c - -o $@ -c + +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.cubin: +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) -cubin $< -o $@ $(NVCCFLAGS) + +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.o: +@STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) $< -c -o $@ $(NVCCFLAGS) +@STARPU_USE_HIP_TRUE@.hip.o: +@STARPU_USE_HIP_TRUE@ $(V_hipcc) $(HIPCC) $< -c -o $@ $(HIPCCFLAGS) + +STARPU_MPI_NP ?= 4 + +showcheckfailed: + @ for x in $(shell grep -l "^FAIL " $(TEST_LOGS) /dev/null 2>/dev/null) ; do cat $$x ; done + @RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i showcheckfailed || RET=1 ; \ + done ; \ + exit $$RET + +showfailed: + @! grep "^FAIL " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "ERROR: AddressSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "WARNING: AddressSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "ERROR: ThreadSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "WARNING: ThreadSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "ERROR: LeakSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l "WARNING: LeakSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null + @! grep -l " runtime error: " $(TEST_LOGS) /dev/null 2>/dev/null + @RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -s -C $$i showfailed || RET=1 ; \ + done ; \ + exit $$RET + +showcheck: + -cat $(TEST_LOGS) /dev/null + @! grep -q "ERROR: AddressSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q "WARNING: AddressSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q "ERROR: ThreadSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q "WARNING: ThreadSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q "ERROR: LeakSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q "WARNING: LeakSanitizer: " $(TEST_LOGS) /dev/null + @! grep -q " runtime error: " $(TEST_LOGS) /dev/null + RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i showcheck || RET=1 ; \ + done ; \ + exit $$RET + +showsuite: + -cat $(TEST_SUITE_LOG) /dev/null + @! grep -q "ERROR: AddressSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q "WARNING: AddressSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q "ERROR: ThreadSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q "WARNING: ThreadSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q "ERROR: LeakSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q "WARNING: LeakSanitizer: " $(TEST_SUITE_LOG) /dev/null + @! grep -q " runtime error: " $(TEST_SUITE_LOG) /dev/null + RET=0 ; \ + for i in $(SUBDIRS) ; do \ + make -C $$i showsuite || RET=1 ; \ + done ; \ + exit $$RET + +@STARPU_SIMGRID_TRUE@export STARPU_PERF_MODEL_DIR=$(abs_top_srcdir)/tools/perfmodels/sampling +@STARPU_SIMGRID_TRUE@export STARPU_HOSTNAME=mirage +@STARPU_SIMGRID_TRUE@export MALLOC_PERTURB_=0 + +@STARPU_SIMGRID_TRUE@env: +@STARPU_SIMGRID_TRUE@ @echo export STARPU_PERF_MODEL_DIR=$(STARPU_PERF_MODEL_DIR) +@STARPU_SIMGRID_TRUE@ @echo export STARPU_HOSTNAME=$(STARPU_HOSTNAME) +@STARPU_SIMGRID_TRUE@ @echo export MALLOC_PERTURB_=$(MALLOC_PERTURB_) + +@STARPU_SIMGRID_TRUE@export STARPU_SIMGRID=1 + +@STARPU_QUICK_CHECK_TRUE@export STARPU_QUICK_CHECK=1 + +@STARPU_LONG_CHECK_TRUE@export STARPU_LONG_CHECK=1 + +# +# Test loading goes through a lot of launchers: +# +# - $(LAUNCHER) is called first, to run the test through starpu_msexec, i.e. +# either mpirun or starpu_tcpipexec +# +# - $(LOADER), i.e. tests/loader, is then called to implement timeout, running +# gdb, etc. But if it detects that the test is a .sh script, it just executes +# it +# +# - $(STARPU_CHECK_LAUNCHER) $(STARPU_CHECK_LAUNCHER_ARGS) is called by loader +# to run the program through e.g. valgrind.sh +# +# When the program is a shell script, additionally: +# +# - $(STARPU_SUB_PARALLEL) is called to control parallelism (see below) +# +# - $(MS_LAUNCHER) is called to run the test through starpu_msexec +# +# - $(STARPU_LAUNCH) was set by tests/loader to its own path, to run the program +# through it. +# +# - $(STARPU_CHECK_LAUNCHER) $(STARPU_CHECK_LAUNCHER_ARGS) is called by loader +# + +export LAUNCHER +@HAVE_PARALLEL_TRUE@export STARPU_SUB_PARALLEL + +export MS_LAUNCHER + +LAUNCHER ?= +MS_LAUNCHER ?= +@STARPU_HAVE_WINDOWS_FALSE@LOADER ?= ./loader + +LSAN_OPTIONS ?= suppressions=$(abs_top_srcdir)/tools/dev/lsan/suppressions +TSAN_OPTIONS ?= suppressions=$(abs_top_srcdir)/tools/dev/tsan/starpu.suppr +export LSAN_OPTIONS +export TSAN_OPTIONS + +@STARPU_HAVE_HELP2MAN_TRUE@starpu_calibrate_bus.1: starpu_calibrate_bus$(EXEEXT) +@STARPU_HAVE_HELP2MAN_TRUE@ $(V_help2man) LC_ALL=C help2man --no-discard-stderr -N -n "Force StarPU bus calibration" --output=$@ ./$< +@STARPU_HAVE_HELP2MAN_TRUE@starpu_machine_display.1: starpu_machine_display$(EXEEXT) +@STARPU_HAVE_HELP2MAN_TRUE@ $(V_help2man) LC_ALL=C help2man --no-discard-stderr -N -n "Display machine StarPU information" --output=$@ ./$< +@STARPU_HAVE_HELP2MAN_TRUE@starpu_perfmodel_display.1: starpu_perfmodel_display$(EXEEXT) +@STARPU_HAVE_HELP2MAN_TRUE@ $(V_help2man) LC_ALL=C help2man --no-discard-stderr -N -n "Display StarPU performance model" --output=$@ ./$< +@STARPU_HAVE_HELP2MAN_TRUE@starpu_perfmodel_plot.1: starpu_perfmodel_plot$(EXEEXT) +@STARPU_HAVE_HELP2MAN_TRUE@ $(V_help2man) LC_ALL=C help2man --no-discard-stderr -N -n "Plot StarPU performance model" --output=$@ ./$< +@STARPU_HAVE_HELP2MAN_TRUE@starpu_tasks_rec_complete.1: starpu_tasks_rec_complete$(EXEEXT) +@STARPU_HAVE_HELP2MAN_TRUE@ $(V_help2man) LC_ALL=C help2man --no-discard-stderr -N -n "Complete StarPU tasks.rec file" --output=$@ ./$< +@STARPU_HAVE_HELP2MAN_TRUE@starpu_lp2paje.1: starpu_lp2paje$(EXEEXT) +@STARPU_HAVE_HELP2MAN_TRUE@ $(V_help2man) LC_ALL=C help2man --no-discard-stderr -N -n "Convert lp StarPU schedule into Paje format" --output=$@ ./$< +@STARPU_HAVE_HELP2MAN_TRUE@starpu_workers_activity.1: starpu_workers_activity +@STARPU_HAVE_HELP2MAN_TRUE@ @chmod +x $< +@STARPU_HAVE_HELP2MAN_TRUE@ $(V_help2man) LC_ALL=C help2man --no-discard-stderr -N -n "Display StarPU workers activity" --output=$@ ./$< +@STARPU_HAVE_HELP2MAN_TRUE@starpu_codelet_profile.1: starpu_codelet_profile +@STARPU_HAVE_HELP2MAN_TRUE@ @chmod +x $< +@STARPU_HAVE_HELP2MAN_TRUE@ $(V_help2man) LC_ALL=C help2man --no-discard-stderr -N -n "Draw StarPU codelet profile" --output=$@ ./$< +@STARPU_HAVE_HELP2MAN_TRUE@starpu_env.1: starpu_env +@STARPU_HAVE_HELP2MAN_TRUE@ @chmod +x $< +@STARPU_HAVE_HELP2MAN_TRUE@ $(V_help2man) LC_ALL=C help2man --no-discard-stderr -N -n "Set StarPU environment variables" --output=$@ ./$< +@STARPU_HAVE_HELP2MAN_TRUE@starpu_codelet_histo_profile.1: starpu_codelet_histo_profile +@STARPU_HAVE_HELP2MAN_TRUE@ @chmod +x $< +@STARPU_HAVE_HELP2MAN_TRUE@ $(V_help2man) LC_ALL=C help2man --no-discard-stderr -N -n "Draw StarPU codelet histogram" --output=$@ ./$< +@STARPU_HAVE_HELP2MAN_TRUE@starpu_mpi_comm_matrix.1: starpu_mpi_comm_matrix.py +@STARPU_HAVE_HELP2MAN_TRUE@ @chmod +x $< +@STARPU_HAVE_HELP2MAN_TRUE@ $(V_help2man) LC_ALL=C help2man --no-discard-stderr -N -n "Draw StarPU MPI communications matrix" --output=$@ ./$< +@STARPU_HAVE_HELP2MAN_TRUE@starpu_fxt_number_events_to_names.1: starpu_fxt_number_events_to_names.py +@STARPU_HAVE_HELP2MAN_TRUE@ @chmod +x $< +@STARPU_HAVE_HELP2MAN_TRUE@ $(V_help2man) LC_ALL=C help2man --no-discard-stderr -N -n "Convert events in StarPU traces" --output=$@ ./$< +@STARPU_HAVE_HELP2MAN_TRUE@starpu_paje_draw_histogram.1: starpu_paje_draw_histogram +@STARPU_HAVE_HELP2MAN_TRUE@ @chmod +x $< +@STARPU_HAVE_HELP2MAN_TRUE@ $(V_help2man) LC_ALL=C help2man --no-discard-stderr -N -n "Draw StarPU trace histogram" --output=$@ ./$< +@STARPU_HAVE_HELP2MAN_TRUE@starpu_paje_state_stats.1: starpu_paje_state_stats +@STARPU_HAVE_HELP2MAN_TRUE@ @chmod +x $< +@STARPU_HAVE_HELP2MAN_TRUE@ $(V_help2man) LC_ALL=C help2man --no-discard-stderr -N -n "Print statistics from StarPU trace" --output=$@ ./$< +@STARPU_HAVE_HELP2MAN_TRUE@starpu_config.1: starpu_config +@STARPU_HAVE_HELP2MAN_TRUE@ @chmod +x $< +@STARPU_HAVE_HELP2MAN_TRUE@ $(V_help2man) LC_ALL=C help2man --no-discard-stderr -N -n "Display StarPU configuration" --output=$@ ./$< + +@STARPU_HAVE_HELP2MAN_TRUE@@STARPU_USE_FXT_TRUE@starpu_fxt_tool.1: starpu_fxt_tool$(EXEEXT) +@STARPU_HAVE_HELP2MAN_TRUE@@STARPU_USE_FXT_TRUE@ $(V_help2man) LC_ALL=C help2man --no-discard-stderr -N -n "Convert raw StarPU FxT trace to various traces" --output=$@ ./$< +@STARPU_HAVE_HELP2MAN_TRUE@@STARPU_USE_FXT_TRUE@starpu_fxt_stats.1: starpu_fxt_stats$(EXEEXT) +@STARPU_HAVE_HELP2MAN_TRUE@@STARPU_USE_FXT_TRUE@ $(V_help2man) LC_ALL=C help2man --no-discard-stderr -N -n "Print statistics from raw StarPU FxT trace" --output=$@ ./$< +@STARPU_HAVE_HELP2MAN_TRUE@@STARPU_USE_FXT_TRUE@starpu_fxt_data_trace.1: starpu_fxt_data_trace$(EXEEXT) +@STARPU_HAVE_HELP2MAN_TRUE@@STARPU_USE_FXT_TRUE@ $(V_help2man) LC_ALL=C help2man --no-discard-stderr -N -n "Print data trace from raw StarPU FxT trace" --output=$@ ./$< + +@STARPU_HAVE_HELP2MAN_TRUE@@STARPU_USE_TCPIP_MASTER_SLAVE_TRUE@starpu_tcpipexec.1: starpu_tcpipexec +@STARPU_HAVE_HELP2MAN_TRUE@@STARPU_USE_TCPIP_MASTER_SLAVE_TRUE@ @chmod +x $< +@STARPU_HAVE_HELP2MAN_TRUE@@STARPU_USE_TCPIP_MASTER_SLAVE_TRUE@ $(V_help2man) LC_ALL=C help2man --no-discard-stderr -N -n "Execute TCPIP application" --output=$@ ./$< + +@STARPU_HAVE_HELP2MAN_TRUE@clean-local: +@STARPU_HAVE_HELP2MAN_TRUE@ $(RM) $(dist_man1_MANS) starpu_config.cfg +starpu_config.cfg: $(top_builddir)/src/common/config.h + grep STARPU $< | grep '#' > $@ + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/tools/ayudame.cfg b/tools/ayudame.cfg new file mode 100644 index 0000000..8451ab4 --- /dev/null +++ b/tools/ayudame.cfg @@ -0,0 +1,37 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +# default configuration file for Ayudame2 + +connect { + stdout_human = "true"; + stdout_raw = "false"; + temanejo1 = "false"; + temanejo = "true"; + dot = "false"; + dot_filename = "ayudame.dot"; + xml = "true"; + xml_filename = "ayudame.xml"; + ayu_port = "8888"; + ayu_host = "localhost"; +} + +logging { + error = "true"; + warning = "true"; + info = "true"; + verbosity_level = "3"; + debug = "true"; +} diff --git a/tools/dev/checker/rename.sed b/tools/dev/checker/rename.sed new file mode 100644 index 0000000..4dab42c --- /dev/null +++ b/tools/dev/checker/rename.sed @@ -0,0 +1,182 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +s/\bstruct starpu_per_arch_perfmodel\b/struct starpu_perfmodel_per_arch/g +s/\bstruct starpu_regression_model\b/struct starpu_perfmodel_regression_model/g +s/\bstruct starpu_history_table\b/struct starpu_perfmodel_history_table/g +s/\bstruct starpu_history_entry\b/struct starpu_perfmodel_history_entry/g +s/\bstruct starpu_history_list\b/struct starpu_perfmodel_history_list/g +s/\bstarpu_list_models\b/starpu_perfmodel_list/g +s/\bstruct starpu_model_list\b/struct _starpu_perfmodel_list/g +s/\bstarpu_load_history_debug\b/starpu_perfmodel_load_symbol/g +s/\bstarpu_access_mode\b/enum starpu_access_mode/g +s/\bstruct starpu_codelet_t\b/struct starpu_codelet/g +s/\bstarpu_codelet\b/struct starpu_codelet/g +s/\bstarpu_codelet_t\b/struct starpu_codelet/g +s/\bstarpu_data_handle\b/starpu_data_handle_t/g +s/\bstarpu_block_interface_t\b/struct starpu_block_interface/g +s/\bstarpu_matrix_interface_t\b/struct starpu_matrix_interface/g +s/\bstarpu_vector_interface_t\b/struct starpu_vector_interface/g +s/\bstarpu_variable_interface_t\b/struct starpu_variable_interface/g +s/\bstarpu_csr_interface_t\b/struct starpu_csr_interface/g +s/\bstarpu_bcsr_interface_t\b/struct starpu_bcsr_interface/g +s/\bstarpu_multiformat_interface_t\b/struct starpu_multiformat_interface/g +s/\bstarpu_machine_topology_s\b/starpu_machine_topology/g +s/\bstarpu_htbl32_node_s\b/starpu_htbl32_node/g +s/\bstarpu_history_list_t\b/starpu_history_list/g +s/\bstarpu_buffer_descr_t\b/starpu_buffer_descr/g +s/\bstarpu_history_entry_t\b/starpu_history_entry/g +s/\bstarpu_history_list_t\b/starpu_history_list/g +s/\bstarpu_model_list_t\b/starpu_model_list/g +s/\bstarpu_regression_model_t\b/starpu_regression_model/g +s/\bstarpu_per_arch_perfmodel_t\b/starpu_per_arch_perfmodel/g +#s/\bstarpu_buffer_descr\b/struct starpu_buffer_descr/g +s/\bstarpu_perfmodel_t\b/starpu_perfmodel/g +s/\bstarpu_sched_policy_s\b/starpu_sched_policy/g +s/\bstarpu_data_interface_ops_t\b/starpu_data_interface_ops/g +s/\bstarpu_submit_task\b/starpu_task_submit/g +s/\bstarpu_wait_task\b/starpu_task_wait/g +s/\bstarpu_helper_init_cublas\b/starpu_helper_cublas_init/g +s/\bstarpu_helper_shutdown_cublas\b/starpu_helper_cublas_shutdown/g +s/\bstarpu_deregister_progression_hook\b/starpu_progression_hook_deregister/g +s/\bstarpu_register_progression_hook\b/starpu_progression_hook_register/g +s/\bstarpu_get_worker_id\b/starpu_worker_get_id/g +s/\bstarpu_get_worker_devid\b/starpu_worker_get_devid/g +s/\bstarpu_get_worker_memory_node\b/starpu_worker_get_memory_node/g +s/\bstarpu_get_worker_name\b/starpu_worker_get_name/g +s/\bstarpu_get_worker_type\b/starpu_worker_get_type/g +s/\bstarpu_get_worker_count\b/starpu_worker_get_count/g +s/\bstarpu_get_cpu_worker_count\b/starpu_cpu_worker_get_count/g +s/\bstarpu_get_spu_worker_count\b/starpu_spu_worker_get_count/g +s/\bstarpu_get_opencl_worker_count\b/starpu_opencl_worker_get_count/g +s/\bstarpu_get_cuda_worker_count\b/starpu_cuda_worker_get_count/g +s/\bstarpu_get_local_cuda_stream\b/starpu_cuda_get_local_stream/g +s/\bstarpu_wait_all_tasks\b/starpu_task_wait_for_all/g +s/\bstarpu_delete_data\b/starpu_data_unregister/g +s/\bstarpu_malloc_pinned_if_possible\b/starpu_data_malloc_pinned_if_possible/g +s/\bstarpu_free_pinned_if_possible\b/starpu_data_free_pinned_if_possible/g +s/\bstarpu_sync_data_with_mem\b/starpu_data_acquire/g +s/\bstarpu_data_sync_with_mem\b/starpu_data_acquire/g +s/\bstarpu_sync_data_with_mem_non_blocking\b/starpu_data_acquire_cb/g +s/\bstarpu_data_sync_with_mem_non_blocking\b/starpu_data_acquire_cb/g +s/\bstarpu_release_data_from_mem\b/starpu_data_release/g +s/\bstarpu_data_release_from_mem\b/starpu_data_release/g +s/\bstarpu_advise_if_data_is_important\b/starpu_data_advise_as_important/g +s/\bstarpu_request_data_allocation\b/starpu_data_request_allocation/g +s/\bstarpu_prefetch_data_on_node\b/starpu_data_prefetch_on_node/g +s/\bstarpu_get_sub_data\b/starpu_data_get_sub_data/g +s/\bstarpu_partition_data\b/starpu_data_partition/g +s/\bstarpu_unpartition_data\b/starpu_data_unpartition/g +s/\bstarpu_map_filters\b/starpu_data_map_filters/g +s/\bstarpu_test_if_data_is_allocated_on_node\b/starpu_data_test_if_allocated_on_node/g +s/\bstarpu_get_block_elemsize\b/starpu_block_get_elemsize/g +s/\bstarpu_get_block_local_ldy\b/starpu_block_get_local_ldy/g +s/\bstarpu_get_block_local_ldz\b/starpu_block_get_local_ldz/g +s/\bstarpu_get_block_local_ptr\b/starpu_block_get_local_ptr/g +s/\bstarpu_get_block_nx\b/starpu_block_get_nx/g +s/\bstarpu_get_block_ny\b/starpu_block_get_ny/g +s/\bstarpu_get_block_nz\b/starpu_block_get_nz/g +s/\bstarpu_register_block_data\b/starpu_block_data_register/g +s/\bstarpu_get_bcsr_c\b/starpu_bcsr_get_c/g +s/\bstarpu_get_bcsr_elemsize\b/starpu_bcsr_get_elemsize/g +s/\bstarpu_get_bcsr_firstentry\b/starpu_bcsr_get_firstentry/g +s/\bstarpu_get_bcsr_local_colind\b/starpu_bcsr_get_local_colind/g +s/\bstarpu_get_bcsr_local_nzval\b/starpu_bcsr_get_local_nzval/g +s/\bstarpu_get_bcsr_local_rowptr\b/starpu_bcsr_get_local_rowptr/g +s/\bstarpu_get_bcsr_nnz\b/starpu_bcsr_get_nnz/g +s/\bstarpu_get_bcsr_nrow\b/starpu_bcsr_get_nrow/g +s/\bstarpu_get_bcsr_r\b/starpu_bcsr_get_r/g +s/\bstarpu_register_bcsr_data\b/starpu_bcsr_data_register/g +s/\bstarpu_get_csr_elemsize\b/starpu_csr_get_elemsize/g +s/\bstarpu_get_csr_firstentry\b/starpu_csr_get_firstentry/g +s/\bstarpu_get_csr_local_colind\b/starpu_csr_get_local_colind/g +s/\bstarpu_get_csr_local_nzval\b/starpu_csr_get_local_nzval/g +s/\bstarpu_get_csr_local_rowptr\b/starpu_csr_get_local_rowptr/g +s/\bstarpu_get_csr_nnz\b/starpu_csr_get_nnz/g +s/\bstarpu_get_csr_nrow\b/starpu_csr_get_nrow/g +s/\bstarpu_register_csr_data\b/starpu_csr_data_register/g +s/\bstarpu_get_matrix_elemsize\b/starpu_matrix_get_elemsize/g +s/\bstarpu_get_matrix_local_ld\b/starpu_matrix_get_local_ld/g +s/\bstarpu_get_matrix_local_ptr\b/starpu_matrix_get_local_ptr/g +s/\bstarpu_get_matrix_nx\b/starpu_matrix_get_nx/g +s/\bstarpu_get_matrix_ny\b/starpu_matrix_get_ny/g +s/\bstarpu_register_matrix_data\b/starpu_matrix_data_register/g +s/\bstarpu_divide_in_2_filter_func_vector\b/starpu_vector_divide_in_2_filter_func/g +s/\bstarpu_register_vector_data\b/starpu_vector_data_register/g +s/\bstarpu_get_vector_elemsize\b/starpu_vector_get_elemsize/g +s/\bstarpu_get_vector_local_ptr\b/starpu_vector_get_local_ptr/g +s/\bstarpu_get_vector_nx\b/starpu_vector_get_nx/g +s/\bstarpu_data_set_wb_mask\b/starpu_data_set_wt_mask/g +s/\bstarpu_list_filter_func_vector\b/starpu_vector_list_filter_func/g +s/\bSTARPU_GET_MATRIX_PTR\b/STARPU_MATRIX_GET_PTR/g +s/\bSTARPU_GET_MATRIX_NX\b/STARPU_MATRIX_GET_NX/g +s/\bSTARPU_GET_MATRIX_NY\b/STARPU_MATRIX_GET_NY/g +s/\bSTARPU_GET_MATRIX_LD\b/STARPU_MATRIX_GET_LD/g +s/\bSTARPU_GET_MATRIX_ELEMSIZE\b/STARPU_MATRIX_GET_ELEMSIZE/g +s/\bSTARPU_GET_BLOCK_PTR\b/STARPU_BLOCK_GET_PTR/g +s/\bSTARPU_GET_BLOCK_NX\b/STARPU_BLOCK_GET_NX/g +s/\bSTARPU_GET_BLOCK_NY\b/STARPU_BLOCK_GET_NY/g +s/\bSTARPU_GET_BLOCK_NZ\b/STARPU_BLOCK_GET_NZ/g +s/\bSTARPU_GET_BLOCK_LDY\b/STARPU_BLOCK_GET_LDY/g +s/\bSTARPU_GET_BLOCK_LDZ\b/STARPU_BLOCK_GET_LDZ/g +s/\bSTARPU_GET_BLOCK_ELEMSIZE\b/STARPU_BLOCK_GET_ELEMSIZE/g +s/\bSTARPU_GET_VECTOR_PTR\b/STARPU_VECTOR_GET_PTR/g +s/\bSTARPU_GET_VECTOR_NX\b/STARPU_VECTOR_GET_NX/g +s/\bSTARPU_GET_VECTOR_ELEMSIZE\b/STARPU_VECTOR_GET_ELEMSIZE/g +s/\bSTARPU_GET_VARIABLE_PTR\b/STARPU_VARIABLE_GET_PTR/g +s/\bSTARPU_GET_VARIABLE_ELEMSIZE\b/STARPU_VARIABLE_GET_ELEMSIZE/g +s/\bSTARPU_GET_CSR_NNZ\b/STARPU_CSR_GET_NNZ/g +s/\bSTARPU_GET_CSR_NROW\b/STARPU_CSR_GET_NROW/g +s/\bSTARPU_GET_CSR_NZVAL\b/STARPU_CSR_GET_NZVAL/g +s/\bSTARPU_GET_CSR_COLIND\b/STARPU_CSR_GET_COLIND/g +s/\bSTARPU_GET_CSR_ROWPTR\b/STARPU_CSR_GET_ROWPTR/g +s/\bSTARPU_GET_CSR_FIRSTENTRY\b/STARPU_CSR_GET_FIRSTENTRY/g +s/\bSTARPU_GET_CSR_ELEMSIZE\b/STARPU_CSR_GET_ELEMSIZE/g +s/\bstarpu_print_bus_bandwidth\b/starpu_bus_print_bandwidth/g +s/\bstarpu_get_handle_interface_id\b/starpu_handle_get_interface_id/g +s/\bstarpu_get_current_task\b/starpu_task_get_current/g +s/\bstarpu_pack_cl_args\b/starpu_codelet_pack_args/g +s/\bstarpu_unpack_cl_args\b/starpu_codelet_unpack_args/g +s/\bstarpu_task_deinit\b/starpu_task_clean/g + +s/\bstarpu_helper_cublas_init\b/starpu_cublas_init/g +s/\bstarpu_helper_cublas_shutdown\b/starpu_cublas_shutdown/g + +s/\bstarpu_allocate_buffer_on_node\b/starpu_malloc_on_node/g +s/\bstarpu_free_buffer_on_node\b/starpu_free_on_node/g + +s/\benum starpu_access_mode\b/enum starpu_data_access_mode/g +s/\bstruct starpu_buffer_descr\b/struct starpu_data_descr/g +s/\bstarpu_memory_display_stats\b/starpu_data_display_memory_stats/g +s/\bstarpu_handle_to_pointer\b/starpu_data_handle_to_pointer/g +s/\bstarpu_handle_get_local_ptr\b/starpu_data_get_local_ptr/g +s/\bstarpu_crc32_be_n\b/starpu_hash_crc32c_be_n/g +s/\bstarpu_crc32_be\b/starpu_hash_crc32c_be/g +s/\bstarpu_crc32_string\b/starpu_hash_crc32c_string/g +s/\benum starpu_perf_archtype\b/enum starpu_perfmodel_archtype/g +s/\bstarpu_history_based_expected_perf\b/starpu_permodel_history_based_expected_perf/g +s/\bstruct starpu_task_profiling_info\b/struct starpu_profiling_task_info/g +s/\bstruct starpu_worker_profiling_info\b/struct starpu_profiling_worker_info/g +s/\bstruct starpu_bus_profiling_info\b/struct starpu_profiling_bus_info/g +s/\bstarpu_set_profiling_id\b/starpu_profiling_set_id/g +s/\bstarpu_worker_get_profiling_info\b/starpu_profiling_worker_get_info/g +s/\bstarpu_bus_profiling_helper_display_summary\b/starpu_profiling_bus_helper_display_summary/g +s/\bstarpu_worker_profiling_helper_display_summary\b/starpu_profiling_worker_helper_display_summary/g +s/\benum starpu_archtype\b/enum starpu_worker_archtype/g + +s/\bstarpu_handle_get_interface_id\b/starpu_data_get_interface_id/g +s/\bstarpu_handle_get_size\b/starpu_data_get_size/g +s/\bstarpu_handle_pack_data\b/starpu_data_pack/g +s/\bstarpu_handle_unpack_data\b/starpu_data_unpack/g diff --git a/tools/dev/checker/rename.sh b/tools/dev/checker/rename.sh new file mode 100755 index 0000000..3be0044 --- /dev/null +++ b/tools/dev/checker/rename.sh @@ -0,0 +1,16 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +find . -type f -not -name rename.sed |xargs sed -i -f $(dirname $0)/rename.sed diff --git a/tools/dev/cppcheck/suppressions.txt b/tools/dev/cppcheck/suppressions.txt new file mode 100644 index 0000000..8e69328 --- /dev/null +++ b/tools/dev/cppcheck/suppressions.txt @@ -0,0 +1,146 @@ +// StarPU --- Runtime system for heterogeneous multicore architectures. +// +// Copyright (C) 2017-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +// +// StarPU is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation; either version 2.1 of the License, or (at +// your option) any later version. +// +// StarPU is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +// +// See the GNU Lesser General Public License in COPYING.LGPL for more details. +// +knownConditionTrueFalse +variableScope +ConfigurationNotChecked +shadowVariable +unusedFunction +unmatchedSuppression +unusedStructMember +missingInclude +constArgument +nullPointer +nullPointerRedundantCheck +cstyleCast +constParameter +unknownMacro + +*:build*/* + +*:starpufft/* +*:min-dgels/* +*:socl/src/CL/* +*:tools/* + +// TODO. We should cppcheck the code +*:sc_hypervisor/* +varFuncNullUB:examples/sched_ctx/two_cpu_contexts.c:76 +negativeIndex:examples/stencil/stencil-tasks.c +constStatement:examples/stencil/* +shiftTooManyBitsSigned:examples/pi/SobolQRNG/sobol_gold.c:88 + +unreadVariable:tests/openmp/* +unusedLabel:tests/datawizard/gpu_register.c +unusedLabel:tests/datawizard/gpu_ptr_register.c +redundantAssignment:tests/datawizard/interfaces/test_interfaces.c:757 +redundantAssignment:tests/datawizard/mpi_like_async.c:165 +redundantAssignment:tests/datawizard/mpi_like_async.c:211 +unusedPrivateFunction:tests/main/combined_workers/bfs/timer.h +cstyleCast:tests/main/combined_workers/bfs/bfs.cpp +redundantAssignment:tests/main/driver_api/init_run_deinit.c +redundantAssignment:tests/main/driver_api/run_driver.c +unreadVariable:tests/datawizard/variable_size.c + +uselessAssignmentPtrArg:mpi/src/starpu_mpi.c:171 +unreadVariable:mpi/src/mpi/starpu_mpi_mpi.c:641 +unusedVariable:mpi/src/mpi/starpu_mpi_mpi.c:715 +unreadVariable:mpi/src/mpi/starpu_mpi_mpi.c:716 +unreadVariable:mpi/src/mpi/starpu_mpi_mpi.c:704 +unreadVariable:mpi/src/mpi/starpu_mpi_mpi.c:738 +unreadVariable:mpi/src/load_balancer/policy/load_heat_propagation.c:597 + +signConversion:mpi/examples/matrix_decomposition/mpi_cholesky_codelets.c:195 + +nullPointer:src/common/utils.c:554 +redundantAssignment:src/common/utils.c:547 +nullPointerRedundantCheck:src/common/rbtree.c + +negativeIndex:include/starpu_bitmap.h:275 + +AssignmentIntegerToAddress:src/core/jobs.c:148 +redundantAssignment:src/core/workers.c +uselessAssignmentPtrArg:src/core/sched_ctx_list.c:144 +wrongPrintfScanfArgNum:src/core/simgrid.c:1296 +unreadVariable:src/core/task.c:657 +unusedVariable:src/core/task.c:658 +uninitvar:src/core/sched_ctx.c:387 + +invalidPointerCast:src/core/perfmodel/perfmodel_nan.c:74 +unreadVariable:src/core/perfmodel/energy_model.c:179 +unusedLabel:src/core/perfmodel/perfmodel_bus.c:757 +unreadVariable:src/core/perfmodel/perfmodel_bus.c:1399 +unreadVariable:src/core/perfmodel/perfmodel_bus.c:1739 +unreadVariable:src/core/perfmodel/perfmodel_bus.c:1420 +unreadVariable:src/core/perfmodel/perfmodel_bus.c:1761 +unreadVariable:src/core/perfmodel/perfmodel_bus.c:1435 +unreadVariable:src/core/perfmodel/perfmodel_bus.c:1775 +unreadVariable:src/core/perfmodel/perfmodel_bus.c:1458 +unreadVariable:src/core/perfmodel/perfmodel_bus.c:1799 +unsignedPositive:src/core/perfmodel/perfmodel_bus.c:1390 +unsignedPositive:src/core/perfmodel/perfmodel_bus.c:1730 + +unreadVariable:src/core/dependencies/tags.c:120 + +nullPointerRedundantCheck:src/datawizard/data_request.c:228 +nullPointerRedundantCheck:src/datawizard/data_request.c:230 +nullPointerRedundantCheck:src/datawizard/copy_driver.c:682 +unreadVariable:src/datawizard/interfaces/* + +unreadVariable:src/drivers/driver_common/driver_common.c:493 + +redundantAssignment:src/drivers/hip/driver_hip.c:1188 +unreadVariable:src/drivers/hip/driver_hip.c:346 +unreadVariable:src/drivers/hip/driver_hip.c:343 +unreadVariable:src/drivers/hip/driver_hip.c:1147 +unreadVariable:src/drivers/hip/driver_hip.c:1095 + +unreadVariable:src/drivers/opencl/driver_opencl.c:767 +redundantAssignment:src/drivers/opencl/driver_opencl.c:831 +clarifyCondition:src/drivers/opencl/driver_opencl.c:945 +unreadVariable:src/drivers/opencl/driver_opencl.c:1489 +unreadVariable:src/drivers/opencl/driver_opencl.c:1406 +redundantAssignment:src/drivers/opencl/driver_opencl.c:1534 + +clarifyCondition:src/drivers/cuda/driver_cuda.c:498 +unreadVariable:src/drivers/cuda/driver_cuda.c:2151 +unreadVariable:src/drivers/cuda/driver_cuda.c:2041 +unreadVariable:src/drivers/cuda/driver_cuda.c:1696 +unreadVariable:src/drivers/cuda/driver_cuda0.c:249 +redundantAssignment:src/drivers/cuda/driver_cuda1.c:1236 +unreadVariable:src/drivers/cuda/driver_cuda1.c:357 +unreadVariable:src/drivers/cuda/driver_cuda1.c:1195 +unreadVariable:src/drivers/cuda/driver_cuda1.c:1143 + +nullPointerRedundantCheck:src/sched_policies/fifo_queues.c:507 +nullPointerRedundantCheck:src/sched_policies/deque_modeling_policy_data_aware.c:207 +redundantAssignment:src/sched_policies/component_eager.c:109 +varFuncNullUB:src/sched_policies/modular_heteroprio_heft.c:37 +nullPointerRedundantCheck:src/sched_policies/prio_deque.c:176 +negativeIndex:src/sched_policies/parallel_heft.c:478 + +sizeofDereferencedVoidPointer:src/util/fstarpu.c +duplicateExpression:src/util/starpu_task_insert.c:52 + +pointerSize:socl/src/cl_getcontextinfo.c:33 +unreadVariable:socl/src/gc.c:193 +// the following warnings are weird and not understandable, let's ignore them +ignoredReturnValue:socl/src/cl_createkernel.c:170 +leakReturnValNotUsed:socl/src/cl_createkernel.c:170 +ignoredReturnValue:socl/src/cl_createprogramwithsource.c:136 +leakReturnValNotUsed:socl/src/cl_createprogramwithsource.c:136 + +varFuncNullUB:bubble/tests/basic/gemm_dag.c:213 +varFuncNullUB:bubble/tests/basic/gemm_dag.c:208 diff --git a/tools/dev/lsan/suppressions b/tools/dev/lsan/suppressions new file mode 100644 index 0000000..8f66af9 --- /dev/null +++ b/tools/dev/lsan/suppressions @@ -0,0 +1,43 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +# libc +leak:lt__malloc + +# openmpi +leak:ompi_free_list_grow +leak:opal_dss_unpack_value +leak:opal_dss_copy_value +leak:mca_bml_base_btl_array_reserve +leak:opal_hash_table_init + +# hwloc +leak:hwloc_topology_load +leak:hwloc_topology_set_xml +leak:hwloc_components_init +leak:hwloc_plugins_init +leak:hwloc_plugins_exit + +# papi +leak:_pe_libpfm4_init +leak:allocate_thread +leak:pfmlib_build_fstr + +# starpupy +leak:/numpy/random/ +leak:_abc__abc_init +leak:marshal_loads +# leak:PyTuple_New diff --git a/tools/dev/tsan/starpu.suppr b/tools/dev/tsan/starpu.suppr new file mode 100644 index 0000000..372304b --- /dev/null +++ b/tools/dev/tsan/starpu.suppr @@ -0,0 +1,114 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +# Disabled checking +race:^active_hook_cnt$ +race:^worker_exp_start$ +race:^worker_exp_end$ +race:^worker_exp_len$ +race:^ntasks$ +race:^mc_cache_size$ +race:^mc_nb$ +race:^mc_clean_nb$ +race:^prefetch_out_of_memory$ +race:^data_requests$ +race:^prefetch_requests$ +race:^idle_requests$ +race:^data_requests_npending$ +race:^used_size$ +race:^hit_cnt$ +race:^miss_cnt$ +race:^alloc_cache_hit_cnt$ +race:^alloc_cnt$ +race:^comm_amount$ +race:^_starpu_valgrind_print_once$ +race:^_starpu_silent$ +race:^keys_initialized$ +race:^tidying$ +race:^reclaiming$ +race:^_starpu_worker_drives_memory$ +race:^starpu_memory_get_total$ +race:^starpu_unistd_opened_files$ + +# don't care about cache hit stats +race:^_starpu_msi_cache_hit$ +race:^_starpu_msi_cache_miss$ + +# This is racy, but since we'll always put the same values, this is not a problem. +race:^_starpu_codelet_check_deprecated_fields$ + +# This is racy, but we don't care, it's only a statistic +race:^starpu_task_nsubmitted$ +race:^starpu_task_nready$ +race:^_starpu_bus_update_profiling_info$ +race:^lws_select_victim$ +race:^select_worker_round_robin$ + +# The config.running/pause_depth state is only protected by memory barriers +race:^_starpu_machine_is_running$ +race:^_starpu_kill_all_workers$ +race:^starpu_pause$ +race:^_starpu_may_pause$ +race:^starpu_resume$ +race:^is_running$ +race:^_starpu_sink_deinit$ + +# worker_is_initialized is not actually racy since deinit happens only after main set running to 0 +race:^_starpu_opencl_driver_deinit$ +race:^_starpu_cuda_driver_deinit$ +race:^_starpu_cpu_driver_deinit$ + +# The integer access is atomic, and we use the sched mutex to avoid missing wake ups +race:^starpu_st_fifo_taskq_empty$ +race:^push_task_eager_policy$ + +# These are just statistics +race:^starpu_memory_get_available$ +race:^_starpu_profiling$ +race:^_starpu_history_based_job_expected_perf$ +race:^compute_ntasks_end$ +race:^compute_expected_end$ +race:^compute_all_performance_predictions$ + +# There is actually no race with busy_count, see comment +race:_starpu_data_unregister + +# ignore other libraries' races +called_from_lib:^libmpi.so$ +called_from_lib:^libhwloc*.so$ + +# see valgrind/starpu.suppr +deadlock:starpu_pthread_mutex_lock_sched +deadlock:_starpu_sched_component_lock_worker +deadlock:_starpu_sched_component_worker_lock_scheduling +deadlock:simple_worker_pull_task + +# the assert on lock_write_owner to check for recursive write lock is inherently racy, but fine +# for the intended purpose +race:^_starpu_sched_ctx_lock_write$ +race:^_starpu_sched_ctx_lock_read$ + +# only protected by memory barrier +race:^_starpu_keys_initialized$ + +# disable race detection on cg->ntags, see comment in code +race:^_starpu_notify_cg$ + +# this does not need to be safe +race:^evictable$ + +# don't care about data status query race, this is just a hint +race:^starpu_data_query_status$ +race:^starpu_data_query_status2$ diff --git a/tools/dev/valgrind/bash.suppr b/tools/dev/valgrind/bash.suppr new file mode 100644 index 0000000..ad97a1e --- /dev/null +++ b/tools/dev/valgrind/bash.suppr @@ -0,0 +1,26 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +{ + + Memcheck:Free + ... + obj:/bin/bash + fun:execute_command_internal + fun:execute_command + fun:reader_loop + fun:main +} diff --git a/tools/dev/valgrind/blas.suppr b/tools/dev/valgrind/blas.suppr new file mode 100644 index 0000000..1e578b4 --- /dev/null +++ b/tools/dev/valgrind/blas.suppr @@ -0,0 +1,23 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2014-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +{ + Ignore blas internal races + Helgrind:Race + ... + fun:blas_memory_alloc + ... +} diff --git a/tools/dev/valgrind/fxt.suppr b/tools/dev/valgrind/fxt.suppr new file mode 100644 index 0000000..9a0c0e8 --- /dev/null +++ b/tools/dev/valgrind/fxt.suppr @@ -0,0 +1,129 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +{ + + Memcheck:Leak + match-leak-kinds: reachable + fun:calloc + fun:fxt_setinfos + fun:fut_setup +} + +{ + + Memcheck:Leak + match-leak-kinds: reachable + fun:realloc + fun:fxt_get_cpu_info + fun:fxt_setinfos + fun:fut_setup +} +{ + + Memcheck:Leak + match-leak-kinds: reachable + fun:malloc + fun:strdup + fun:fut_set_filename +} +{ + + Memcheck:Leak + match-leak-kinds: reachable + fun:malloc + fun:fxt_setinfos + fun:fut_setup +} + +{ + + Memcheck:Cond + fun:fxt_get_cpu_info + fun:fxt_setinfos + fun:fut_setup +} +{ + + Memcheck:Leak + match-leak-kinds: reachable + fun:malloc + fun:realloc + fun:fxt_next_ev +} +{ + + Memcheck:Leak + match-leak-kinds: possible + fun:malloc + fun:fxt_blockev_enter +} +{ + + Memcheck:Leak + match-leak-kinds: reachable + fun:malloc + fun:strdup + fun:fxt_fdopen +} +{ + + Memcheck:Leak + match-leak-kinds: reachable + fun:realloc + fun:fxt_next_ev +} +{ + + Memcheck:Leak + match-leak-kinds: reachable + fun:calloc + fun:fxt_load_time + fun:fxt_fdopen +} +{ + + Memcheck:Leak + match-leak-kinds: reachable + fun:malloc + fun:fkt_load_pids + fun:fxt_fdopen +} +{ + + Memcheck:Leak + match-leak-kinds: reachable + fun:calloc + fun:fxt_fdopen +} +{ + + Memcheck:Leak + match-leak-kinds: reachable + fun:malloc + fun:fdopen@@GLIBC_2.2.5 + fun:fxt_fdopen +} +{ + + Memcheck:Leak + match-leak-kinds: reachable + fun:malloc + fun:fut_setup + fun:_starpu_fxt_init_profiling + fun:starpu_initialize + fun:main +} diff --git a/tools/dev/valgrind/glpk.suppr b/tools/dev/valgrind/glpk.suppr new file mode 100644 index 0000000..2e85c5f --- /dev/null +++ b/tools/dev/valgrind/glpk.suppr @@ -0,0 +1,23 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +{ + + Memcheck:Leak + match-leak-kinds: reachable + ... + fun:glp_init_env +} diff --git a/tools/dev/valgrind/hdf5.suppr b/tools/dev/valgrind/hdf5.suppr new file mode 100644 index 0000000..e36f04d --- /dev/null +++ b/tools/dev/valgrind/hdf5.suppr @@ -0,0 +1,61 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +{ + thread-specific value + Memcheck:Leak + match-leak-kinds: reachable + ... + fun:H5E_get_stack + ... +} + +{ + thread-specific value + Memcheck:Leak + match-leak-kinds: reachable + ... + fun:H5E__get_stack + ... +} + +{ + thread-specific value + Memcheck:Leak + match-leak-kinds: reachable + ... + fun:H5TS_thread_id + ... +} + +{ + thread-specific value + Memcheck:Leak + match-leak-kinds: reachable + ... + fun:H5TS_cancel_count_inc + ... +} + +{ + thread-specific value + Memcheck:Leak + match-leak-kinds: reachable + ... + fun:H5CX_push + ... +} + diff --git a/tools/dev/valgrind/helgrind.sh b/tools/dev/valgrind/helgrind.sh new file mode 100755 index 0000000..d15e0b0 --- /dev/null +++ b/tools/dev/valgrind/helgrind.sh @@ -0,0 +1,40 @@ +#!/bin/bash +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +EXEC=$(basename $0 .sh) +DIRNAME=$(dirname $0) + +CLIMIT=$(ulimit -c) +if [ "$CLIMIT" = unlimited ] +then + # valgrind cores are often *huge*, 100MB will already be quite big... + ulimit -c 100000 +fi + +if test "$EXEC" == "valgrind" +then + RUN="valgrind --track-origins=yes --show-reachable=yes --leak-check=full --errors-for-leak-kinds=all --show-leak-kinds=all --error-exitcode=42 $STARPU_VALGRIND_OPTIONS" +elif test "$EXEC" == "valgrind_xml" +then + mkdir -p ${DIRNAME}/../../../valgrind + XML_FILE=$(mktemp -p ${DIRNAME}/../../../valgrind starpu-valgrind_XXXXXXXXXX.xml) + RUN="valgrind --track-origins=yes --show-reachable=yes --leak-check=full --errors-for-leak-kinds=all --show-leak-kinds=all --xml=yes --xml-file=${XML_FILE} $STARPU_VALGRIND_OPTIONS" +else + RUN="valgrind --tool=$EXEC --error-exitcode=42" +fi +SUPPRESSIONS=$(for f in $(dirname $0)/*.suppr /usr/share/hwloc/hwloc-valgrind.supp; do if test -f $f ; then echo "--suppressions=$f" ; fi ; done) + +exec $RUN --keep-debuginfo=yes --num-callers=42 --error-limit=no --gen-suppressions=all $SUPPRESSIONS $* diff --git a/tools/dev/valgrind/hwloc.suppr b/tools/dev/valgrind/hwloc.suppr new file mode 100644 index 0000000..c5ae9d9 --- /dev/null +++ b/tools/dev/valgrind/hwloc.suppr @@ -0,0 +1,145 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +{ + + Memcheck:Leak + match-leak-kinds: reachable + ... + fun:hwloc_components_init +} +{ + + Memcheck:Leak + match-leak-kinds: indirect + ... + fun:hwloc_components_init +} +{ + + Memcheck:Leak + match-leak-kinds: definite + fun:malloc + fun:lt__malloc + fun:lt__zalloc + obj:/usr/lib/x86_64-linux-gnu/libltdl.so.7.3.0 + fun:lt_dlopenadvise + obj:* + obj:* + obj:/usr/lib/x86_64-linux-gnu/libhwloc.so.5.6.8 + fun:hwloc_topology_init +} +{ + + Memcheck:Leak + fun:malloc + ... + fun:tls_get_addr_tail + ... + fun:hwloc_backends_notify_new_object + ... +} +{ + + Memcheck:Leak + fun:memalign + fun:tls_get_addr_tail + ... + fun:hwloc_backends_notify_new_object + ... +} +{ + + Memcheck:Leak + fun:malloc + ... + fun:hwloc_topology_set_xml + ... +} +{ + + Memcheck:Leak + fun:calloc + ... + fun:hwloc_topology_set_xml + ... +} +{ + + Memcheck:Leak + fun:realloc + ... + fun:hwloc_topology_set_xml + ... +} +{ + + Memcheck:Leak + fun:malloc + ... + fun:hwloc_topology_load + ... +} +{ + + Memcheck:Leak + fun:calloc + ... + fun:hwloc_topology_load + ... +} +{ + + Memcheck:Leak + fun:realloc + ... + fun:hwloc_topology_load + ... +} +{ + + Memcheck:Leak + match-leak-kinds: reachable + fun:memalign + fun:tls_get_addr_tail + obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 + fun:clGetPlatformIDs + obj:/usr/lib/x86_64-linux-gnu/hwloc/hwloc_opencl.so + fun:hwloc_backends_notify_new_object + obj:/usr/lib/x86_64-linux-gnu/libhwloc.so.5.7.3 + fun:hwloc_insert_pci_device_list + obj:/usr/lib/x86_64-linux-gnu/hwloc/hwloc_pci.so + fun:hwloc_topology_load + fun:_starpu_init_topology + fun:_starpu_topology_get_nhwcpu + fun:check_bus_config_file + fun:_starpu_load_bus_performance_files + fun:starpu_initialize + fun:starpu_init + fun:omp_initial_thread_setup + fun:omp_initial_region_setup + fun:starpu_omp_init + fun:omp_constructor + fun:__libc_csu_init + fun:(below main) +} + +{ + + Helgrind:Race + fun:hwloc_linux_set_area_membind + ... +} diff --git a/tools/dev/valgrind/libc.suppr b/tools/dev/valgrind/libc.suppr new file mode 100644 index 0000000..7df3ea7 --- /dev/null +++ b/tools/dev/valgrind/libc.suppr @@ -0,0 +1,385 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2014-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +{ + Ignore libc aio-internal races + Helgrind:Race + ... + fun:aio_read + ... +} + +{ + Ignore libc aio-internal cond oddity + Helgrind:Misc + ... + fun:aio_read + ... +} + +{ + Ignore libc aio-internal races + Helgrind:Race + ... + fun:aio_write + ... +} + +{ + Ignore libc aio-internal cond oddity + Helgrind:Misc + ... + fun:aio_write + ... +} + +{ + Ignore libc aio-internal races + Helgrind:Race + ... + fun:__aio_notify + ... +} + +{ + Ignore libc aio-internal races + Helgrind:Race + ... + fun:aio_suspend + ... +} + +{ + Ignore libc aio errors + Memcheck:Leak + match-leak-kinds: reachable + ... + fun:__aio_enqueue_request + ... +} + +{ + Ignore libc aio-internal leak + Memcheck:Leak + match-leak-kinds: possible + ... + fun:__aio_create_helper_thread + ... +} + +{ + Ignore libc printf races + Helgrind:Race + ... + fun:__vfprintf_internal + ... +} + +{ + Ignore libc printf races + Helgrind:Race + ... + fun:_IO_file_xsputn + ... +} + +{ + + Memcheck:Leak + ... + fun:dlopen@@GLIBC_2.2.5 + ... +} +{ + + Memcheck:Leak + match-leak-kinds: definite + fun:malloc + fun:lt__malloc + fun:lt__zalloc + obj:/usr/lib/x86_64-linux-gnu/libltdl.so.7.3.1 + ... +} +{ + + Memcheck:Leak + match-leak-kinds: reachable + fun:malloc + obj:/usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.22 + fun:call_init.part.0 + fun:_dl_init + obj:/lib/x86_64-linux-gnu/ld-2.21.so +} + +{ + + Memcheck:Leak + match-leak-kinds: reachable + fun:calloc + fun:_dl_new_object + fun:_dl_map_object_from_fd + fun:_dl_map_object + fun:openaux + fun:_dl_catch_error + fun:_dl_map_object_deps + fun:dl_open_worker + fun:_dl_catch_error + fun:_dl_open + fun:dlopen_doit + fun:_dl_catch_error +} +{ + + Memcheck:Leak + match-leak-kinds: reachable + fun:malloc + fun:_dl_new_object + fun:_dl_map_object_from_fd + fun:_dl_map_object + fun:openaux + fun:_dl_catch_error + fun:_dl_map_object_deps + fun:dl_open_worker + fun:_dl_catch_error + fun:_dl_open + fun:dlopen_doit + fun:_dl_catch_error +} +{ + + Memcheck:Leak + match-leak-kinds: reachable + fun:malloc + fun:strdup + fun:_dl_load_cache_lookup + fun:_dl_map_object + fun:openaux + fun:_dl_catch_error + fun:_dl_map_object_deps + fun:dl_open_worker + fun:_dl_catch_error + fun:_dl_open + fun:dlopen_doit + fun:_dl_catch_error +} +{ + + Memcheck:Leak + match-leak-kinds: reachable + fun:malloc + obj:/usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0 + obj:/usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0 + obj:/usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0 + fun:call_init.part.0 + fun:call_init + fun:_dl_init + obj:/lib/x86_64-linux-gnu/ld-2.26.so +} +{ + + Memcheck:Value8 + obj:/lib/x86_64-linux-gnu/libc-2.28.so + ... +} +{ + + Memcheck:Cond + obj:/lib/x86_64-linux-gnu/libc-2.28.so + ... +} +{ + + Memcheck:Cond + obj:/lib/x86_64-linux-gnu/libc-2.28.so + ... +} +{ + + Memcheck:Value8 + obj:/lib/x86_64-linux-gnu/ld-2.28.so + obj:* + obj:* + obj:* +} +{ + + Memcheck:Cond + obj:/lib/x86_64-linux-gnu/ld-2.28.so + obj:* +} +{ + + Memcheck:Value8 + obj:/lib/x86_64-linux-gnu/ld-2.28.so + obj:* +} +{ + + Memcheck:Param + openat(filename) + obj:/lib/x86_64-linux-gnu/libc-2.28.so + obj:* +} +{ + + Memcheck:Value8 + obj:/lib/x86_64-linux-gnu/ld-2.28.so +} +{ + + Memcheck:Cond + obj:/lib/x86_64-linux-gnu/ld-2.28.so +} +{ + + Memcheck:Param + openat(filename) + obj:/lib/x86_64-linux-gnu/ld-2.28.so + ... +} +{ + + Memcheck:Cond + obj:/lib/x86_64-linux-gnu/libdl-2.28.so +} +{ + + Memcheck:Param + read(count) + obj:/lib/x86_64-linux-gnu/libc-2.28.so +} +{ + + Memcheck:Param + read(buf) + obj:/lib/x86_64-linux-gnu/libc-2.28.so +} +{ + + Memcheck:Param + lseek(offset) + obj:/lib/x86_64-linux-gnu/libc-2.28.so + obj:/lib/x86_64-linux-gnu/libc-2.28.so +} +{ + + Memcheck:Cond + obj:/usr/lib/x86_64-linux-gnu/libltdl.so.7.3.1 + obj:* +} +{ + + Memcheck:Value8 + obj:/usr/lib/x86_64-linux-gnu/libltdl.so.7.3.1 + obj:* +} +{ + + Memcheck:Cond + obj:/usr/lib/x86_64-linux-gnu/libltdl.so.7.3.1 +} +{ + + Memcheck:Leak + match-leak-kinds: reachable + fun:malloc + ... + fun:_dl_close + fun:_dl_catch_exception + fun:_dl_catch_error + fun:dlerror_run + fun:free_mem + fun:__libc_freeres + fun:_vgnU_freeres + fun:__run_exit_handlers + fun:exit + fun:(below main) +} +{ + + Memcheck:Addr8 + fun:strncmp + fun:is_dst + fun:_dl_dst_count + fun:expand_dynamic_string_token + fun:fillin_rpath.isra.0 + ... +} +{ + + Memcheck:Addr8 + fun:strncmp + fun:is_dst + fun:_dl_dst_substitute + fun:fillin_rpath.isra.0 + ... +} +{ + + Memcheck:Cond + fun:free + fun:free_res + fun:__libc_freeres + fun:_vgnU_freeres + fun:__run_exit_handlers + fun:exit + fun:(below main) +} +{ + + Memcheck:Free + fun:free + fun:free_res + fun:__libc_freeres + fun:_vgnU_freeres + fun:__run_exit_handlers + fun:exit + fun:(below main) +} +{ + + Memcheck:Leak + match-leak-kinds: reachable + fun:malloc + fun:malloc + fun:_dl_close_worker + fun:_dl_close + fun:_dl_catch_exception + fun:_dl_catch_error + fun:_dlerror_run + ... +} +{ + + Memcheck:Leak + match-leak-kinds: possible + ... + fun:_dl_allocate_tls + fun:allocate_stack + fun:pthread_create@@GLIBC_2.34 + ... +} +{ + + Memcheck:Leak + match-leak-kinds: reachable + ... + fun:allocate_dtv_entry + fun:allocate_and_init + fun:tls_get_addr_tail + fun:__tls_get_addr + ... +} diff --git a/tools/dev/valgrind/libgomp.suppr b/tools/dev/valgrind/libgomp.suppr new file mode 100644 index 0000000..e932f69 --- /dev/null +++ b/tools/dev/valgrind/libgomp.suppr @@ -0,0 +1,80 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +{ + gomp2 + Memcheck:Leak + match-leak-kinds: reachable + fun:malloc + fun:gomp_malloc + fun:gomp_init_num_threads + fun:initialize_env + fun:call_init.part.0 + fun:call_init + fun:_dl_init + ... +} + +{ + gomp3 + Memcheck:Leak + match-leak-kinds: reachable + fun:malloc + obj:/usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.21 + fun:call_init.part.0 + fun:_dl_init + ... +} + +{ + + Memcheck:Leak + match-leak-kinds: reachable + fun:malloc + obj:/usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0 + obj:/usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0 + obj:/usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0 + fun:call_init + fun:call_init + fun:_dl_init + obj:/usr/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2 +} +{ + + Memcheck:Leak + match-leak-kinds: reachable + fun:malloc + obj:/usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0 + obj:/usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0 + obj:/usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0 + fun:call_init.part.0 + fun:call_init + fun:_dl_init + obj:/usr/lib/x86_64-linux-gnu/ld-2.28.so +} +{ + + Memcheck:Leak + match-leak-kinds: reachable + fun:calloc + obj:/usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0 + obj:/usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0 + obj:/usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0 + fun:call_init + fun:call_init + fun:_dl_init + obj:/usr/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2 +} diff --git a/tools/dev/valgrind/libnuma.suppr b/tools/dev/valgrind/libnuma.suppr new file mode 100644 index 0000000..f484316 --- /dev/null +++ b/tools/dev/valgrind/libnuma.suppr @@ -0,0 +1,40 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +{ + libnuma error + Memcheck:Cond + ... + fun:numa_node_size64 + ... +} + +{ + libnuma error + Memcheck:Value8 + ... + fun:numa_node_size64 + ... +} +{ + + Memcheck:Cond + obj:/usr/lib/x86_64-linux-gnu/libnuma.so.1.0.0 + obj:/usr/lib/x86_64-linux-gnu/libnuma.so.1.0.0 + obj:* + obj:/usr/lib/x86_64-linux-gnu/libquadmath.so.0.0.0 + obj:* +} diff --git a/tools/dev/valgrind/madmpi.suppr b/tools/dev/valgrind/madmpi.suppr new file mode 100644 index 0000000..f9370ed --- /dev/null +++ b/tools/dev/valgrind/madmpi.suppr @@ -0,0 +1,62 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +{ + + Memcheck:Leak + match-leak-kinds: indirect + fun:malloc + fun:hwloc_bitmap_alloc + fun:piom_topo_current_obj + fun:piom_ltask_schedule + fun:nm_schedule + fun:nm_sr_progress + fun:nm_sr_stest + fun:MPI_Test +} +{ + + Memcheck:Leak + match-leak-kinds: definite + ... + fun:nm_schedule + fun:nm_sr_progress + fun:nm_sr_stest + fun:MPI_Test +} +{ + + Memcheck:Leak + match-leak-kinds: indirect + fun:malloc + fun:hwloc_bitmap_alloc + fun:piom_topo_current_obj + fun:piom_ltask_schedule + fun:nm_schedule + fun:nm_sr_progress + fun:nm_sr_rtest + fun:MPI_Test +} +{ + + Memcheck:Leak + match-leak-kinds: definite + ... + fun:nm_schedule + fun:nm_sr_progress + fun:nm_sr_rtest + fun:MPI_Test +} diff --git a/tools/dev/valgrind/nvidia.suppr b/tools/dev/valgrind/nvidia.suppr new file mode 100644 index 0000000..2714e28 --- /dev/null +++ b/tools/dev/valgrind/nvidia.suppr @@ -0,0 +1,84 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +{ + + Memcheck:Cond + obj:/usr/lib/x86_64-linux-gnu/nvidia/current/libnvidia-opencl.so.390.87 + ... + obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 + obj:/lib/x86_64-linux-gnu/ld-2.28.so +} +{ + + Memcheck:Value8 + obj:/usr/lib/x86_64-linux-gnu/nvidia/current/libnvidia-opencl.so.390.87 + ... + obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 + obj:/lib/x86_64-linux-gnu/ld-2.28.so +} +{ + + Memcheck:Param + readlink(path) + obj:/lib/x86_64-linux-gnu/libc-2.28.so + obj:/usr/lib/x86_64-linux-gnu/nvidia/current/libnvidia-opencl.so.390.87 + obj:/usr/lib/x86_64-linux-gnu/nvidia/current/libnvidia-opencl.so.390.87 + obj:/usr/lib/x86_64-linux-gnu/nvidia/current/libnvidia-opencl.so.390.87 + obj:/usr/lib/x86_64-linux-gnu/nvidia/current/libnvidia-opencl.so.390.87 + obj:/usr/lib/x86_64-linux-gnu/nvidia/current/libnvidia-opencl.so.390.87 + obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 + obj:/lib/x86_64-linux-gnu/ld-2.28.so +} +{ + + Memcheck:Param + lstat(file_name) + obj:/lib/x86_64-linux-gnu/libc-2.28.so + obj:/usr/lib/x86_64-linux-gnu/nvidia/current/libnvidia-opencl.so.390.87 + obj:/usr/lib/x86_64-linux-gnu/nvidia/current/libnvidia-opencl.so.390.87 + obj:/usr/lib/x86_64-linux-gnu/nvidia/current/libnvidia-opencl.so.390.87 + obj:/usr/lib/x86_64-linux-gnu/nvidia/current/libnvidia-opencl.so.390.87 + obj:/usr/lib/x86_64-linux-gnu/nvidia/current/libnvidia-opencl.so.390.87 + obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 + obj:/lib/x86_64-linux-gnu/ld-2.28.so +} +{ + + Memcheck:Value8 + obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 + obj:/lib/x86_64-linux-gnu/ld-2.28.so +} +{ + + Memcheck:Cond + obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 + obj:/lib/x86_64-linux-gnu/ld-2.28.so +} +{ + + Memcheck:Value8 + obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 + obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 + obj:/lib/x86_64-linux-gnu/ld-2.28.so +} +{ + + Memcheck:Cond + obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 + obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 + obj:/lib/x86_64-linux-gnu/ld-2.28.so +} diff --git a/tools/dev/valgrind/opencl.suppr b/tools/dev/valgrind/opencl.suppr new file mode 100644 index 0000000..f95057e --- /dev/null +++ b/tools/dev/valgrind/opencl.suppr @@ -0,0 +1,760 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +{ + + Memcheck:Leak + match-leak-kinds: reachable + fun:malloc + obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 + obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 + fun:call_init.part.0 + fun:_dl_init + ... +} +{ + + Memcheck:Leak + match-leak-kinds: indirect + fun:malloc + fun:strdup + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 + fun:call_init.part.0 + fun:_dl_init + ... +} +{ + + Memcheck:Leak + match-leak-kinds: definite + fun:malloc + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 +} +{ + + Memcheck:Leak + match-leak-kinds: possible + fun:calloc + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + ... + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 +} +{ + + Memcheck:Leak + match-leak-kinds: definite + fun:malloc + fun:__alloc_dir + obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 + ... +} +{ + + Memcheck:Leak + match-leak-kinds: definite + fun:calloc + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 + fun:call_init.part.0 + fun:_dl_init + ... +} + +{ + + Memcheck:Leak + match-leak-kinds: reachable + fun:malloc + obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 + fun:call_init.part.0 + fun:_dl_init + ... +} +{ + + Memcheck:Leak + match-leak-kinds: reachable + fun:malloc + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 + fun:call_init.part.0 + fun:_dl_init + ... +} +{ + + Memcheck:Leak + match-leak-kinds: reachable + fun:malloc + obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 + fun:call_init.part.0 + fun:_dl_init + ... +} +{ + + Memcheck:Leak + match-leak-kinds: reachable + fun:malloc + fun:_dl_new_object + fun:_dl_map_object_from_fd + fun:_dl_map_object + fun:dl_open_worker + fun:_dl_catch_error + fun:_dl_open + fun:dlopen_doit + fun:_dl_catch_error + fun:_dlerror_run + fun:dlopen@@GLIBC_2.2.5 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 +} +{ + + Memcheck:Leak + match-leak-kinds: reachable + fun:realloc + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 + fun:call_init.part.0 + fun:_dl_init + ... +} +{ + + Memcheck:Leak + match-leak-kinds: possible + fun:calloc + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 + fun:call_init.part.0 + fun:_dl_init + ... +} +{ + + Memcheck:Leak + match-leak-kinds: possible + fun:calloc + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 + fun:call_init.part.0 +} +{ + + Memcheck:Leak + match-leak-kinds: possible + fun:calloc + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 + fun:call_init.part.0 + fun:_dl_init + ... +} +{ + + Memcheck:Leak + match-leak-kinds: possible + fun:calloc + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 + fun:call_init.part.0 + fun:_dl_init + ... +} +{ + + Memcheck:Leak + match-leak-kinds: possible + fun:calloc + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 + fun:call_init.part.0 + fun:_dl_init + ... +} +{ + + Memcheck:Leak + match-leak-kinds: reachable + fun:malloc + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 + fun:call_init.part.0 + fun:_dl_init + ... +} +{ + + Memcheck:Leak + match-leak-kinds: reachable + fun:malloc + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 + fun:call_init.part.0 + fun:_dl_init + ... +} +{ + + Memcheck:Leak + match-leak-kinds: reachable + fun:malloc + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 + fun:call_init.part.0 + fun:_dl_init +} +{ + + Memcheck:Leak + match-leak-kinds: reachable + fun:malloc + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 + fun:call_init.part.0 + fun:_dl_init +} +{ + + Memcheck:Leak + match-leak-kinds: reachable + fun:malloc + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 + fun:call_init.part.0 + fun:_dl_init + ... +} +{ + + Memcheck:Leak + match-leak-kinds: reachable + fun:calloc + fun:_dl_check_map_versions + fun:dl_open_worker + fun:_dl_catch_error + fun:_dl_open + fun:dlopen_doit + fun:_dl_catch_error + fun:_dlerror_run + fun:dlopen@@GLIBC_2.2.5 + obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 + obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 + fun:call_init.part.0 +} +{ + + Memcheck:Leak + match-leak-kinds: reachable + fun:calloc + fun:_dl_new_object + fun:_dl_map_object_from_fd + fun:_dl_map_object + fun:dl_open_worker + fun:_dl_catch_error + fun:_dl_open + fun:dlopen_doit + fun:_dl_catch_error + fun:_dlerror_run + fun:dlopen@@GLIBC_2.2.5 + obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 +} +{ + + Memcheck:Leak + match-leak-kinds: reachable + fun:calloc + fun:_dl_new_object + fun:_dl_map_object_from_fd + fun:_dl_map_object + fun:dl_open_worker + fun:_dl_catch_error + fun:_dl_open + fun:dlopen_doit + fun:_dl_catch_error + fun:_dlerror_run + fun:dlopen@@GLIBC_2.2.5 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 +} +{ + + Memcheck:Leak + match-leak-kinds: reachable + fun:calloc + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 + fun:call_init.part.0 +} +{ + + Memcheck:Leak + match-leak-kinds: reachable + fun:calloc + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 + fun:call_init.part.0 + fun:_dl_init + ... +} +{ + + Memcheck:Leak + match-leak-kinds: reachable + fun:calloc + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 + fun:call_init.part.0 + fun:_dl_init + ... +} +{ + + Memcheck:Leak + match-leak-kinds: reachable + fun:calloc + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 + fun:call_init.part.0 + fun:_dl_init + ... +} +{ + + Memcheck:Leak + match-leak-kinds: reachable + fun:malloc + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 + fun:call_init.part.0 + fun:_dl_init + ... +} +{ + + Memcheck:Leak + match-leak-kinds: reachable + fun:malloc + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + ... + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 +} +{ + + Memcheck:Leak + match-leak-kinds: reachable + fun:malloc + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + ... + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 +} +{ + + Memcheck:Leak + match-leak-kinds: reachable + fun:malloc + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + ... + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 + fun:call_init.part.0 +} +{ + + Memcheck:Leak + match-leak-kinds: reachable + fun:calloc + fun:_dl_check_map_versions + fun:dl_open_worker + fun:_dl_catch_error + fun:_dl_open + fun:dlopen_doit + fun:_dl_catch_error + fun:_dlerror_run + fun:dlopen@@GLIBC_2.2.5 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 +} +{ + + Memcheck:Leak + match-leak-kinds: reachable + fun:calloc + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + ... + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 + fun:call_init.part.0 + fun:_dl_init + ... +} +{ + + Memcheck:Leak + match-leak-kinds: reachable + fun:malloc + fun:_dl_new_object + fun:_dl_map_object_from_fd + fun:_dl_map_object + fun:dl_open_worker + fun:_dl_catch_error + fun:_dl_open + fun:dlopen_doit + fun:_dl_catch_error + fun:_dlerror_run + fun:dlopen@@GLIBC_2.2.5 + obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 +} +{ + + Memcheck:Leak + match-leak-kinds: reachable + fun:malloc + fun:strdup + fun:_dl_load_cache_lookup + fun:_dl_map_object + fun:dl_open_worker + fun:_dl_catch_error + fun:_dl_open + fun:dlopen_doit + fun:_dl_catch_error + fun:_dlerror_run + fun:dlopen@@GLIBC_2.2.5 + obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 +} +{ + + Memcheck:Leak + match-leak-kinds: indirect + fun:calloc + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + ... + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 + fun:call_init.part.0 + fun:_dl_init + ... +} +{ + + Memcheck:Leak + match-leak-kinds: reachable + fun:malloc + fun:strdup + fun:_dl_load_cache_lookup + fun:_dl_map_object + fun:dl_open_worker + fun:_dl_catch_error + fun:_dl_open + fun:dlopen_doit + fun:_dl_catch_error + fun:_dlerror_run + fun:dlopen@@GLIBC_2.2.5 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 +} +{ + + Memcheck:Leak + match-leak-kinds: reachable + fun:calloc + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 +} +{ + + Memcheck:Leak + match-leak-kinds: reachable + fun:malloc + fun:strdup + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 + obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 +} +{ + + Memcheck:Leak + match-leak-kinds: reachable + fun:calloc + fun:_dlerror_run + fun:dlopen@@GLIBC_2.2.5 + obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 + obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 + fun:call_init.part.0 + fun:_dl_init + ... +} +{ + + Helgrind:UnlockUnlocked + fun:mutex_unlock_WRK + obj:/usr/lib/x86_64-linux-gnu/libamdocl64.so + fun:aclCompilerInit + obj:/usr/lib/x86_64-linux-gnu/libamdocl64.so + obj:/usr/lib/x86_64-linux-gnu/libamdocl64.so + obj:/usr/lib/x86_64-linux-gnu/libamdocl64.so + obj:/usr/lib/x86_64-linux-gnu/libamdocl64.so + fun:clIcdGetPlatformIDsKHR + obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 + fun:call_init.part.0 + fun:_dl_init + fun:dl_open_worker + fun:_dl_catch_error + fun:_dl_open + fun:dlopen_doit + fun:_dl_catch_error + fun:_dlerror_run + fun:dlopen@@GLIBC_2.2.5 + ... +} +{ + + Helgrind:PthAPIerror + fun:mutex_unlock_WRK + obj:/usr/lib/x86_64-linux-gnu/libamdocl64.so + fun:aclCompilerInit + obj:/usr/lib/x86_64-linux-gnu/libamdocl64.so + obj:/usr/lib/x86_64-linux-gnu/libamdocl64.so + obj:/usr/lib/x86_64-linux-gnu/libamdocl64.so + obj:/usr/lib/x86_64-linux-gnu/libamdocl64.so + fun:clIcdGetPlatformIDsKHR + obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 + fun:call_init.part.0 + fun:_dl_init + fun:dl_open_worker + fun:_dl_catch_error + fun:_dl_open + fun:dlopen_doit + fun:_dl_catch_error + fun:_dlerror_run + fun:dlopen@@GLIBC_2.2.5 + ... +} +{ + + Helgrind:Race + ... + fun:init_one_static_tls + fun:__pthread_init_static_tls + fun:dl_open_worker + fun:_dl_catch_error + fun:_dl_open + fun:dlopen_doit + fun:_dl_catch_error + fun:_dlerror_run + fun:dlopen@@GLIBC_2.2.5 + obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 + obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 + obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 + fun:call_init.part.0 + fun:_dl_init + fun:dl_open_worker + fun:_dl_catch_error + fun:_dl_open + fun:dlopen_doit + fun:_dl_catch_error + fun:_dlerror_run + fun:dlopen@@GLIBC_2.2.5 + ... +} +{ + + Memcheck:Cond + ... + obj:/usr/lib/x86_64-linux-gnu/libamdocl64.so + ... + fun:call_init.part.0 + fun:_dl_init + fun:dl_open_worker + fun:_dl_catch_error + fun:_dl_open + fun:dlopen_doit + fun:_dl_catch_error + fun:_dlerror_run + fun:dlopen@@GLIBC_2.2.5 + ... +} +{ + + Memcheck:Value8 + ... + obj:/usr/lib/x86_64-linux-gnu/libamdocl64.so + ... + fun:call_init.part.0 + fun:_dl_init + fun:dl_open_worker + fun:_dl_catch_error + fun:_dl_open + fun:dlopen_doit + fun:_dl_catch_error + fun:_dlerror_run + fun:dlopen@@GLIBC_2.2.5 + ... +} +{ + + Memcheck:Leak + ... + obj:/usr/lib/x86_64-linux-gnu/libamdocl64.so + ... + fun:call_init.part.0 + fun:_dl_init + fun:dl_open_worker + fun:_dl_catch_error + fun:_dl_open + fun:dlopen_doit + fun:_dl_catch_error + fun:_dlerror_run + fun:dlopen@@GLIBC_2.2.5 + ... +} +{ + + Memcheck:Leak + match-leak-kinds: reachable + fun:calloc + fun:_dlerror_run + fun:dlclose + obj:/usr/lib/x86_64-linux-gnu/libamdocl64.so + fun:__run_exit_handlers + fun:exit + ... +} + +{ + + Memcheck:Leak + match-leak-kinds: reachable + fun:malloc + obj:/usr/lib/x86_64-linux-gnu/libamdocl64.so + ... +} diff --git a/tools/dev/valgrind/openmp.suppr b/tools/dev/valgrind/openmp.suppr new file mode 100644 index 0000000..c7536bd --- /dev/null +++ b/tools/dev/valgrind/openmp.suppr @@ -0,0 +1,215 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2017-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +{ + + Memcheck:Leak + match-leak-kinds: reachable + fun:realloc + obj:/usr/lib/x86_64-linux-gnu/libpciaccess.so.0.11.1 + obj:/usr/lib/x86_64-linux-gnu/libpciaccess.so.0.11.1 + fun:pci_device_get_device_name + fun:hwloc_look_pci + fun:hwloc_discover + fun:hwloc_topology_load + fun:_starpu_init_topology + fun:_starpu_topology_get_nhwcpu + fun:check_bus_config_file + fun:_starpu_load_bus_performance_files + fun:starpu_initialize + fun:starpu_init + fun:omp_initial_thread_setup + fun:omp_initial_region_setup + fun:starpu_omp_init + fun:omp_constructor + fun:__libc_csu_init + fun:(below main) +} +{ + + Memcheck:Leak + match-leak-kinds: reachable + fun:malloc + fun:strdup + obj:/usr/lib/x86_64-linux-gnu/libpciaccess.so.0.11.1 + obj:/usr/lib/x86_64-linux-gnu/libpciaccess.so.0.11.1 + fun:pci_device_get_device_name + fun:hwloc_look_pci + fun:hwloc_discover + fun:hwloc_topology_load + fun:_starpu_init_topology + fun:_starpu_topology_get_nhwcpu + fun:check_bus_config_file + fun:_starpu_load_bus_performance_files + fun:starpu_initialize + fun:starpu_init + fun:omp_initial_thread_setup + fun:omp_initial_region_setup + fun:starpu_omp_init + fun:omp_constructor + fun:__libc_csu_init + fun:(below main) +} +{ + + Memcheck:Leak + match-leak-kinds: reachable + fun:malloc + fun:strdup + obj:/usr/lib/x86_64-linux-gnu/libpciaccess.so.0.11.1 + obj:/usr/lib/x86_64-linux-gnu/libpciaccess.so.0.11.1 + fun:pci_device_get_device_name + fun:hwloc_look_pci + fun:hwloc_discover + fun:hwloc_topology_load + fun:_starpu_init_topology + fun:_starpu_topology_get_nhwcpu + fun:check_bus_config_file + fun:_starpu_load_bus_performance_files + fun:starpu_initialize + fun:starpu_init + fun:omp_initial_thread_setup + fun:omp_initial_region_setup + fun:starpu_omp_init + fun:omp_constructor + fun:__libc_csu_init + fun:(below main) +} +{ + + Memcheck:Leak + match-leak-kinds: reachable + fun:calloc + obj:/usr/lib/x86_64-linux-gnu/libpciaccess.so.0.11.1 + obj:/usr/lib/x86_64-linux-gnu/libpciaccess.so.0.11.1 + fun:hwloc_look_pci + fun:hwloc_discover + fun:hwloc_topology_load + fun:_starpu_init_topology + fun:_starpu_topology_get_nhwcpu + fun:check_bus_config_file + fun:_starpu_load_bus_performance_files + fun:starpu_initialize + fun:starpu_init + fun:omp_initial_thread_setup + fun:omp_initial_region_setup + fun:starpu_omp_init + fun:omp_constructor + fun:__libc_csu_init + fun:(below main) +} +{ + + Memcheck:Leak + match-leak-kinds: reachable + fun:calloc + obj:/usr/lib/x86_64-linux-gnu/libpciaccess.so.0.11.1 + obj:/usr/lib/x86_64-linux-gnu/libpciaccess.so.0.11.1 + fun:hwloc_look_pci + fun:hwloc_discover + fun:hwloc_topology_load + fun:_starpu_init_topology + fun:_starpu_topology_get_nhwcpu + fun:check_bus_config_file + fun:_starpu_load_bus_performance_files + fun:starpu_initialize + fun:starpu_init + fun:omp_initial_thread_setup + fun:omp_initial_region_setup + fun:starpu_omp_init + fun:omp_constructor + fun:__libc_csu_init + fun:(below main) +} +{ + + Memcheck:Leak + match-leak-kinds: reachable + fun:calloc + obj:/usr/lib/x86_64-linux-gnu/libpciaccess.so.0.11.1 + obj:/usr/lib/x86_64-linux-gnu/libpciaccess.so.0.11.1 + fun:hwloc_look_pci + fun:hwloc_discover + fun:hwloc_topology_load + fun:_starpu_init_topology + fun:_starpu_topology_get_nhwcpu + fun:check_bus_config_file + fun:_starpu_load_bus_performance_files + fun:starpu_initialize + fun:starpu_init + fun:omp_initial_thread_setup + fun:omp_initial_region_setup + fun:starpu_omp_init + fun:omp_constructor + fun:__libc_csu_init + fun:(below main) +} +{ + + Memcheck:Leak + match-leak-kinds: reachable + fun:malloc + fun:strdup + obj:/usr/lib/x86_64-linux-gnu/libpciaccess.so.0.11.1 + obj:/usr/lib/x86_64-linux-gnu/libpciaccess.so.0.11.1 + fun:hwloc_look_pci + fun:hwloc_discover + fun:hwloc_topology_load + fun:_starpu_init_topology + fun:_starpu_topology_get_nhwcpu + fun:check_bus_config_file + fun:_starpu_load_bus_performance_files + fun:starpu_initialize + fun:starpu_init + fun:omp_initial_thread_setup + fun:omp_initial_region_setup + fun:starpu_omp_init + fun:omp_constructor + fun:__libc_csu_init + fun:(below main) +} + +{ + ignore GOMP barrier race + Helgrind:Race + ... + fun:gomp_barrier_wait + ... +} + +{ + ignore GOMP barrier race + Helgrind:Race + ... + fun:gomp_barrier_wait_start + ... +} + +{ + ignore GOMP barrier race + Helgrind:Race + ... + fun:gomp_barrier_wait_end + ... +} + +{ + ignore GOMP barrier race + Helgrind:Race + ... + fun:gomp_barrier_wait_final + ... +} diff --git a/tools/dev/valgrind/openmpi.suppr b/tools/dev/valgrind/openmpi.suppr new file mode 100644 index 0000000..8794a3c --- /dev/null +++ b/tools/dev/valgrind/openmpi.suppr @@ -0,0 +1,673 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +{ + suppr1 + Memcheck:Param + writev(vector[...]) + ... + fun:ompi_mpi_init + ... +} + +{ + suppr2 + Memcheck:Addr4 + ... + fun:orte_init + ... +} + +{ + suppr3 + Memcheck:Param + sched_setaffinity(mask) + ... + fun:orte_init + ... +} + +{ + suppr4 + Memcheck:Addr8 + ... + fun:orte_init +} + +{ + suppr5 + Memcheck:Leak + ... + fun:ompi_mpi_init +} + +{ + suppr5 + Helgrind:Race + ... + fun:ompi_mpi_init +} + +{ + suppr6 + Memcheck:Leak + ... + fun:mca_pml_base_open +} + +{ + suppr7 + Memcheck:Leak + ... + fun:orte_init +} + +{ + suppr7 + Memcheck:Leak + ... + fun:orte_progress_thread_engine +} + +{ + suppr7 + Helgrind:Race + ... + fun:orte_progress_thread_engine +} + +{ + suppr7 + Helgrind:Race + ... + fun:event_base_loop + fun:progress_engine + ... +} + +{ + suppr8 + Memcheck:Leak + ... + fun:orte_ess_base_app_setup +} + +{ + suppr9 + Memcheck:Leak + ... + fun:opal_paffinity_base_open +} + +{ + suppr10 + Memcheck:Leak + ... + fun:ompi_mpi_finalize +} + +{ + suppr10 + Helgrind:Race + ... + fun:ompi_mpi_finalize +} + +{ + suppr10 + Helgrind:Misc + ... + fun:ompi_mpi_finalize +} + +{ + suppr10 + Helgrind:PthAPIerror + ... + fun:ompi_mpi_finalize +} + +{ + suppr11 + Memcheck:Leak + ... + fun:mca_base_components_open +} + +{ + suppr12 + Memcheck:Param + writev(vector[...]) + ... + fun:PMPI_Init_thread +} + +{ + suppr13 + Memcheck:Param + writev(vector[...]) + ... + fun:PMPI_Init_thread +} + +{ + suppr14 + Memcheck:Param + sched_setaffinity(mask) + ... + fun:PMPI_Init_thread +} + +{ + suppr15 + Memcheck:Leak + fun:malloc + fun:ompi_free_list_grow + ... + fun:opal_progress + fun:ompi_request_default_test + fun:PMPI_Test +} + +{ + suppr15 + Memcheck:Leak + fun:malloc + fun:opal_free_list_grow + ... + fun:opal_progress + fun:ompi_request_default_test + fun:PMPI_Test +} + +{ + suppr16 + Memcheck:Leak + fun:malloc + fun:ompi_ddt_set_args + fun:PMPI_Type_vector +} + +{ + suppr17 + Memcheck:Leak + fun:malloc + fun:ompi_ddt_optimize_short.constprop.0 + fun:ompi_ddt_commit + fun:PMPI_Type_commit +} + +{ + suppr18 + Memcheck:Leak + fun:calloc + fun:ompi_ddt_create + fun:ompi_ddt_create_vector + fun:PMPI_Type_vector +} + +{ + suppr19 + Memcheck:Leak + fun:malloc + fun:ompi_ddt_create + fun:ompi_ddt_create_vector + fun:PMPI_Type_vector +} + +{ + suppr20 + Memcheck:Leak + fun:malloc + fun:ompi_free_list_grow + ... + fun:PMPI_Isend +} + +{ + suppr20 + Memcheck:Leak + fun:malloc + fun:opal_free_list_grow + ... + fun:PMPI_Isend +} + +{ + suppr20 + Memcheck:Leak + fun:malloc + fun:ompi_free_list_grow + ... + fun:PMPI_Barrier +} + +{ + suppr20 + Memcheck:Leak + fun:malloc + fun:opal_free_list_grow + ... + fun:PMPI_Barrier +} + +{ + suppr21 + Memcheck:Leak + ... + fun:hwloc_topology_set_xmlbuffer + fun:opal_hwloc_unpack + fun:opal_dss_unpack_buffer +} + +{ + suppr22 + Memcheck:Leak + ... + fun:hwloc_topology_set_xmlbuffer + fun:opal_hwloc_unpack +} + +{ + suppr23 + Memcheck:Leak + ... + fun:hwloc_topology_load + fun:opal_hwloc_unpack +} + + +{ + suppr24 + Memcheck:Leak + fun:malloc + ... + fun:xmlParseElement +} + +{ + suppr25 + Memcheck:Leak + match-leak-kinds: indirect + ... + fun:ompi_datatype_commit + fun:PMPI_Type_commit +} + +{ + suppr26 + Memcheck:Leak + match-leak-kinds: definite + ... + fun:ompi_datatype_create_vector + fun:PMPI_Type_vector +} + +{ + suppr27 + Memcheck:Leak + match-leak-kinds: indirect + ... + fun:ompi_datatype_create_vector + fun:PMPI_Type_vector +} + +{ + suppr28 + Memcheck:Leak + match-leak-kinds: indirect + fun:malloc + fun:ompi_datatype_set_args + fun:PMPI_Type_vector +} + +{ + suppr29 + Memcheck:Leak + ... + fun:PMPI_Comm_split + fun:main +} + +{ + + Memcheck:Leak + match-leak-kinds: definite + fun:malloc + fun:orte_grpcomm_base_update_modex_entries + fun:orte_grpcomm_base_modex_unpack + obj:* + obj:* + obj:* + obj:* + obj:* + fun:opal_event_base_loop + fun:opal_progress + obj:* + fun:ompi_modex_recv_key_value +} +{ + + Memcheck:Leak + match-leak-kinds: definite + fun:malloc + obj:/usr/lib/openmpi/lib/libmpi.so.1.0.8 + fun:orte_grpcomm_base_update_modex_entries + fun:orte_grpcomm_base_modex_unpack + obj:* + obj:* + obj:* + obj:* + obj:* + fun:opal_event_base_loop + fun:opal_progress + obj:* +} +{ + + Memcheck:Leak + match-leak-kinds: indirect + fun:malloc + fun:orte_grpcomm_base_update_modex_entries + fun:orte_grpcomm_base_modex_unpack + obj:* + obj:* + obj:* + obj:* + obj:* + fun:opal_event_base_loop + fun:opal_progress + obj:* + fun:ompi_modex_recv_key_value +} +{ + + Memcheck:Leak + match-leak-kinds: indirect + fun:malloc + obj:/usr/lib/openmpi/lib/libmpi.so.1.0.8 + fun:orte_grpcomm_base_update_modex_entries + fun:orte_grpcomm_base_modex_unpack + obj:* + obj:* + obj:* + obj:* + obj:* + fun:opal_event_base_loop + fun:opal_progress + obj:* +} +{ + + Memcheck:Leak + match-leak-kinds: reachable + fun:malloc + fun:lt__malloc + fun:lt__zalloc + obj:/usr/lib/x86_64-linux-gnu/libltdl.so.7.3.0 + fun:lt_dlopenadvise + obj:/usr/lib/x86_64-linux-gnu/hwloc/hwloc_cuda.so + obj:/usr/lib/x86_64-linux-gnu/hwloc/hwloc_cuda.so + obj:/usr/lib/x86_64-linux-gnu/libhwloc.so.5.6.8 + fun:hwloc_topology_init + fun:opal_hwloc_unpack + fun:opal_dss_unpack_buffer + fun:opal_dss_unpack +} +{ + + Memcheck:Leak + match-leak-kinds: reachable + fun:malloc + fun:strdup + obj:/usr/lib/x86_64-linux-gnu/libhwloc.so.5.6.8 + obj:/usr/lib/x86_64-linux-gnu/libltdl.so.7.3.0 + obj:/usr/lib/x86_64-linux-gnu/libltdl.so.7.3.0 + fun:lt_dlforeachfile + obj:/usr/lib/x86_64-linux-gnu/libhwloc.so.5.6.8 + obj:/usr/lib/x86_64-linux-gnu/libhwloc.so.5.6.8 + fun:hwloc_topology_init + fun:opal_hwloc_unpack + fun:opal_dss_unpack_buffer + fun:opal_dss_unpack +} +{ + + Memcheck:Leak + match-leak-kinds: reachable + fun:malloc + obj:/usr/lib/x86_64-linux-gnu/libhwloc.so.5.6.8 + obj:/usr/lib/x86_64-linux-gnu/libltdl.so.7.3.0 + obj:/usr/lib/x86_64-linux-gnu/libltdl.so.7.3.0 + fun:lt_dlforeachfile + obj:/usr/lib/x86_64-linux-gnu/libhwloc.so.5.6.8 + obj:/usr/lib/x86_64-linux-gnu/libhwloc.so.5.6.8 + fun:hwloc_topology_init + fun:opal_hwloc_unpack + fun:opal_dss_unpack_buffer + fun:opal_dss_unpack + fun:orte_util_nidmap_init +} +{ + + Memcheck:Leak + match-leak-kinds: definite + fun:malloc + fun:opal_dss_unpack_string + fun:opal_dss_unpack_buffer + fun:opal_dss_unpack + fun:orte_grpcomm_base_update_modex_entries + fun:orte_grpcomm_base_modex_unpack + obj:* + obj:* + obj:* + obj:* + obj:* + fun:opal_event_base_loop +} +{ + + Memcheck:Leak + match-leak-kinds: indirect + fun:malloc + fun:strdup + fun:orte_grpcomm_base_update_modex_entries + fun:orte_grpcomm_base_modex_unpack + obj:* + obj:* + obj:* + obj:* + obj:* + fun:opal_event_base_loop + fun:opal_progress + obj:* +} + +# the following suppression occurs on bertha with openmpi 1.10.2 +# no idea why it is failing +# Invalid read of size 8 +# at 0x4C2E726: memcpy@@GLIBC_2.14 (vg_replace_strmem.c:1018) +# by 0x77F31C0: opal_convertor_pack (in /usr/lib/openmpi/lib/libopen-pal.so.13.0.2) +# by 0x105D8C41: mca_btl_vader_sendi (in /usr/lib/openmpi/lib/openmpi/mca_btl_vader.so) +# by 0x10C16F05: mca_pml_ob1_send_inline (in /usr/lib/openmpi/lib/openmpi/mca_pml_ob1.so) +# by 0x10C17662: mca_pml_ob1_isend (in /usr/lib/openmpi/lib/openmpi/mca_pml_ob1.so) +# by 0x5B6471D: PMPI_Isend (in /usr/lib/openmpi/lib/libmpi.so.12.0.2) +# by 0x4E3F08E: _starpu_mpi_isend_size_func (starpu_mpi.c:384) +# by 0x4E4602D: _starpu_mpi_handle_ready_request (starpu_mpi.c:1132) +# by 0x4E47C3C: _starpu_mpi_progress_thread_func (starpu_mpi.c:1342) +# by 0x58E6283: start_thread (pthread_create.c:333) +# by 0x60F5A4C: clone (in /lib/x86_64-linux-gnu/libc-2.21.so) +# Address 0x1ab67c40 is 16 bytes inside a block of size 24 alloc'd +# at 0x4C2BBD5: calloc (vg_replace_malloc.c:711) +# by 0x4E3EDBA: _starpu_mpi_isend_size_func (starpu_mpi.c:369) +# by 0x4E4602D: _starpu_mpi_handle_ready_request (starpu_mpi.c:1132) +# by 0x4E47C3C: _starpu_mpi_progress_thread_func (starpu_mpi.c:1342) +# by 0x58E6283: start_thread (pthread_create.c:333) +# by 0x60F5A4C: clone (in /lib/x86_64-linux-gnu/libc-2.21.so) +{ + + Memcheck:Addr8 + fun:memcpy@@GLIBC_2.14 + fun:opal_convertor_pack + fun:mca_btl_vader_sendi + fun:mca_pml_ob1_send_inline + fun:mca_pml_ob1_isend + fun:PMPI_Isend + fun:_starpu_mpi_isend_size_func + fun:_starpu_mpi_handle_ready_request + fun:_starpu_mpi_progress_thread_func + fun:start_thread +} +{ + + Memcheck:Addr2 + fun:memcpy@@GLIBC_2.14 + fun:opal_convertor_pack + fun:mca_btl_vader_sendi + fun:mca_pml_ob1_send_inline + fun:mca_pml_ob1_isend + fun:PMPI_Isend + fun:_starpu_mpi_isend_data_func + fun:_starpu_mpi_isend_size_func + fun:_starpu_mpi_handle_ready_request + fun:_starpu_mpi_progress_thread_func + fun:start_thread +} +{ + + Memcheck:Addr8 + fun:memcpy@@GLIBC_2.14 + fun:vader_prepare_src + fun:mca_pml_ob1_send_request_start_prepare + fun:mca_pml_ob1_isend + fun:PMPI_Isend + fun:_starpu_mpi_isend_data_func + fun:_starpu_mpi_isend_size_func + fun:_starpu_mpi_handle_ready_request + fun:_starpu_mpi_progress_thread_func + fun:start_thread +} +{ + + Memcheck:Addr8 + fun:memcpy@@GLIBC_2.14 + fun:mca_btl_vader_sendi + fun:mca_pml_ob1_send_inline + fun:mca_pml_ob1_isend + fun:PMPI_Isend + fun:_starpu_mpi_isend_size_func + fun:_starpu_mpi_handle_ready_request + fun:_starpu_mpi_progress_thread_func + fun:start_thread +} +{ + + Memcheck:Addr8 + fun:memcpy@@GLIBC_2.14 + fun:opal_convertor_pack + ... + fun:PMPI_Isend + fun:_starpu_mpi_isend_size_func + fun:_starpu_mpi_handle_ready_request + fun:_starpu_mpi_progress_thread_func + fun:start_thread +} +{ + + Memcheck:Addr2 + fun:memcpy@@GLIBC_2.14 + fun:opal_convertor_pack + ... + fun:PMPI_Isend + fun:_starpu_mpi_isend_data_func + fun:_starpu_mpi_isend_size_func + fun:_starpu_mpi_handle_ready_request + fun:_starpu_mpi_progress_thread_func + fun:start_thread +} +{ + + Memcheck:Addr8 + fun:_starpu_mpi_isend_size_func + fun:_starpu_mpi_handle_ready_request + fun:_starpu_mpi_progress_thread_func + fun:start_thread +} + +# that happens at initialization +{ + + Memcheck:Param + writev(vector[...]) + fun:__writev + fun:writev + fun:send_msg + fun:pmix_ptl_base_send_handler + fun:event_persist_closure + fun:event_process_active_single_queue + fun:event_process_active + fun:event_base_loop + fun:progress_engine + fun:start_thread + fun:clone +} +{ + + Memcheck:Leak + match-leak-kinds: definite + fun:malloc + fun:fetch_nodeinfo + fun:hash_fetch + fun:_getnb_cbfunc + fun:pmix_ptl_base_process_msg + fun:event_process_active_single_queue + fun:event_process_active + fun:event_base_loop + fun:progress_engine + fun:start_thread + fun:clone +} + + +{ + + Helgrind:LockOrder + ... + fun:mca_bml_r2_add_procs + ... +} +{ + + Memcheck:Leak + match-leak-kinds: indirect + fun:calloc + fun:ompi_group_allocate + fun:ompi_group_incl_plist + fun:PMPI_Group_incl + fun:PMPI_Group_incl +} +{ + + Memcheck:Leak + match-leak-kinds: definite + fun:calloc + fun:UnknownInlinedFun + fun:ompi_group_allocate_plist_w_procs + fun:ompi_group_allocate + fun:ompi_group_incl_plist + fun:PMPI_Group_incl + fun:PMPI_Group_incl +} diff --git a/tools/dev/valgrind/p11-kit.suppr b/tools/dev/valgrind/p11-kit.suppr new file mode 100644 index 0000000..ec3ea84 --- /dev/null +++ b/tools/dev/valgrind/p11-kit.suppr @@ -0,0 +1,22 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +{ + + Helgrind:Misc + fun:mutex_destroy_WRK + fun:p11_library_uninit +} diff --git a/tools/dev/valgrind/padico.suppr b/tools/dev/valgrind/padico.suppr new file mode 100644 index 0000000..77f7eb8 --- /dev/null +++ b/tools/dev/valgrind/padico.suppr @@ -0,0 +1,579 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +{ + + Memcheck:Param + match-leak-kinds: reachable + ... + fun:padico_tasklet_worker.2516.2614 +} +{ + + Memcheck:Leak + match-leak-kinds: reachable + ... + fun:padico_tasklet_worker.2516.2614 +} +{ + + Memcheck:Param + ioctl(SIOCETHTOOL) + ... + fun:padico_tasklet_worker.2516.2614 +} +{ + + Memcheck:Param + write(buf) + ... + fun:nm_launcher_init +} +{ + + Memcheck:Param + match-leak-kinds: reachable + ... + fun:nm_launcher_init +} +{ + + Memcheck:Leak + match-leak-kinds: reachable + ... + fun:nm_launcher_init +} +{ + + Memcheck:Leak + match-leak-kinds: reachable + ... + fun:puk_xml_parse_file +} +{ + + Memcheck:Leak + match-leak-kinds: reachable + ... + fun:puk_xml_end_handler.3579 +} +{ + + Memcheck:Leak + match-leak-kinds: reachable + ... + fun:puk_xml_parse_buffer +} +{ + + Memcheck:Leak + match-leak-kinds: indirect + ... + fun:padico_tasklet_worker.2516.2614 +} +{ + + Memcheck:Leak + match-leak-kinds: reachable + fun:malloc + fun:puk_mod_builtin_declare + fun:call_init.part.0 + fun:_dl_init + ... +} +{ + + Memcheck:Leak + match-leak-kinds: reachable + fun:malloc + fun:hwloc_bitmap_alloc + fun:piom_topo_current_obj + fun:__piom_ltask_timer_worker.3325 + fun:start_thread + fun:clone +} +{ + + Memcheck:Leak + match-leak-kinds: reachable + fun:malloc + ... + fun:_dl_close + fun:_dl_catch_error + fun:dlerror_run + fun:free_mem + fun:__libc_freeres + fun:_vgnU_freeres + fun:__run_exit_handlers + fun:exit + fun:(below main) +} +{ + + Memcheck:Leak + match-leak-kinds: indirect + fun:malloc + fun:strdup + fun:topo_property_end_handler + fun:puk_xml_end_handler.3579 + obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 + obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 + obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 + obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 + fun:XML_ParseBuffer + fun:puk_xml_parse_buffer + fun:na_control_event_listener + fun:padico_tasklet_flush + fun:sysio_control_start + fun:start_thread + fun:clone +} +{ + + Memcheck:Leak + match-leak-kinds: reachable + fun:malloc + fun:piom_topo_current_obj +} +{ + + Memcheck:Leak + match-leak-kinds: reachable + fun:malloc + fun:hwloc_bitmap_alloc + fun:piom_topo_current_obj + fun:piom_ltask_schedule + fun:__piom_ltask_idle_worker.3322 + fun:start_thread + fun:clone +} +{ + + Memcheck:Leak + match-leak-kinds: definite + fun:malloc + fun:padico_addrdb_publish_handler + fun:puk_xml_start_handler.3574 + obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 + obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 + obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 + obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 + fun:XML_ParseBuffer + fun:puk_xml_parse_buffer + fun:na_control_event_listener + fun:padico_tasklet_worker.2516.2614 + fun:start_thread + fun:clone +} +{ + + Memcheck:Leak + match-leak-kinds: reachable + fun:malloc + fun:_puk_component_instantiate + fun:sysio_control_bootstrap_acceptor + fun:sfplain_acceptor_finalizer + fun:start_thread + fun:clone +} +{ + + Memcheck:Leak + match-leak-kinds: definite + fun:malloc + fun:topo_property_end_handler + fun:puk_xml_end_handler.3579 + obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 + obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 + obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 + obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 + fun:XML_ParseBuffer + fun:puk_xml_parse_buffer + fun:sysio_control_start + fun:start_thread + fun:clone +} +{ + + Memcheck:Leak + match-leak-kinds: definite + fun:malloc + fun:topo_property_end_handler + fun:puk_xml_end_handler.3579 + obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 + obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 + obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 + obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 + fun:XML_ParseBuffer + fun:puk_xml_parse_buffer + fun:na_control_event_listener + fun:padico_tasklet_worker.2516.2614 + fun:start_thread + fun:clone +} +{ + + Memcheck:Leak + match-leak-kinds: reachable + fun:malloc + fun:sysio_control_instantiate + fun:_puk_component_instantiate + fun:sysio_control_bootstrap_acceptor + fun:sfplain_acceptor_finalizer + fun:start_thread + fun:clone +} +{ + + Memcheck:Leak + match-leak-kinds: reachable + fun:malloc + fun:padico_tasklet_schedule + fun:sysio_control_read_callback + fun:na_sysio_thread + fun:start_thread + fun:clone +} +{ + + Memcheck:Leak + match-leak-kinds: reachable + fun:malloc + fun:padico_control_deliver_message + fun:sysio_control_read_callback + fun:na_sysio_thread + fun:start_thread + fun:clone +} +{ + + Memcheck:Leak + match-leak-kinds: reachable + fun:malloc + fun:padico_tasklet_schedule + fun:sysio_control_read_callback + fun:na_sysio_thread + fun:start_thread + fun:clone +} +{ + + Memcheck:Leak + match-leak-kinds: reachable + fun:malloc + fun:puk_iface_register + fun:puk_component_declare2 + fun:padico_module_init + fun:unit_binary_load.5249.2496 + fun:padico_puk_mod_load + fun:_puk_component_resolve + fun:padico_control_send_msg + fun:padico_control_send_ext + fun:control_router_event_listener + fun:padico_tasklet_flush + fun:control_router_event_listener + fun:padico_tasklet_flush + fun:sysio_control_start + fun:start_thread + fun:clone +} +{ + + Memcheck:Leak + match-leak-kinds: reachable + fun:malloc + fun:realloc + fun:na_control_event_listener + fun:padico_tasklet_flush + fun:sysio_control_start + fun:start_thread + fun:clone +} +{ + + Memcheck:Leak + match-leak-kinds: reachable + fun:malloc + fun:strdup + fun:puk_iface_register + fun:puk_component_declare2 + fun:padico_module_init + fun:unit_binary_load.5249.2496 + fun:padico_puk_mod_load + fun:_puk_component_resolve + fun:padico_control_send_msg + fun:padico_control_send_ext + fun:control_router_event_listener + fun:padico_tasklet_flush + fun:control_router_event_listener + fun:padico_tasklet_flush + fun:sysio_control_start + fun:start_thread + fun:clone +} +{ + + Memcheck:Leak + match-leak-kinds: definite + fun:malloc + fun:topo_property_end_handler + fun:puk_xml_end_handler.3579 + obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 + obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 + obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 + obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 + fun:XML_ParseBuffer + fun:puk_xml_parse_buffer + fun:na_control_event_listener + fun:padico_tasklet_flush + fun:padico_module_init + fun:unit_binary_load.5249.2496 + fun:padico_puk_mod_load + fun:_puk_component_resolve + fun:padico_control_send_msg + fun:padico_control_send_ext + fun:control_router_event_listener + fun:padico_tasklet_worker.2516.2614 + fun:start_thread + fun:clone +} +{ + + Memcheck:Leak + match-leak-kinds: definite + fun:malloc + fun:padico_tm_tasks_process.2519.2436 + fun:padico_tasklet_worker.2516.2614 + fun:start_thread + fun:clone +} +{ + + Memcheck:Leak + match-leak-kinds: definite + fun:malloc + fun:padico_addrdb_publish_handler + fun:puk_xml_start_handler.3574 + obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 + obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 + obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 + obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 + fun:XML_ParseBuffer + fun:puk_xml_parse_buffer + fun:na_control_event_listener + fun:padico_tm_req_wait + fun:padico_tm_mod_action_args + fun:run_end_handler.2909.2464 + fun:puk_xml_end_handler.3579 + obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 + obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 + obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 + obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 + fun:XML_ParseBuffer + fun:puk_xml_parse_buffer + fun:na_control_event_listener + fun:padico_tasklet_worker.2516.2614 + fun:start_thread + fun:clone +} +{ + + Memcheck:Leak + match-leak-kinds: reachable + fun:malloc + fun:sysio_control_read_callback + fun:na_sysio_thread + fun:start_thread + fun:clone +} +{ + + Memcheck:Leak + match-leak-kinds: definite + fun:malloc + fun:topo_property_end_handler + fun:puk_xml_end_handler.3579 + obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 + obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 + obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 + obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 + fun:XML_ParseBuffer + fun:puk_xml_parse_buffer + fun:na_control_event_listener + fun:padico_tasklet_flush + fun:padico_module_init + fun:unit_binary_load.5249.2496 + fun:padico_puk_mod_load + fun:_puk_component_resolve + fun:padico_control_send_msg + fun:padico_control_send_ext + fun:control_router_event_listener + fun:padico_tasklet_worker.2516.2614 + fun:start_thread + fun:clone +} +{ + + Memcheck:Leak + match-leak-kinds: definite + fun:malloc + fun:padico_tm_tasks_process.2519.2436 + fun:padico_tasklet_worker.2516.2614 + fun:start_thread + fun:clone +} +{ + + Memcheck:Leak + match-leak-kinds: definite + fun:malloc + fun:padico_addrdb_publish_handler + fun:puk_xml_start_handler.3574 + obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 + obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 + obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 + obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 + fun:XML_ParseBuffer + fun:puk_xml_parse_buffer + fun:na_control_event_listener + fun:padico_tm_req_wait + fun:padico_tm_mod_action_args + fun:run_end_handler.2909.2464 + fun:puk_xml_end_handler.3579 + obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 + obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 + obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 + obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 + fun:XML_ParseBuffer + fun:puk_xml_parse_buffer + fun:na_control_event_listener + fun:padico_tasklet_worker.2516.2614 + fun:start_thread + fun:clone +} +{ + + Memcheck:Leak + match-leak-kinds: reachable + fun:malloc + fun:sysio_control_read_callback + fun:na_sysio_thread + fun:start_thread + fun:clone +} +{ + + Memcheck:Leak + match-leak-kinds: definite + fun:malloc + fun:topo_property_end_handler + fun:puk_xml_end_handler.3579 + obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 + obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 + obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 + obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 + fun:XML_ParseBuffer + fun:puk_xml_parse_buffer + fun:na_control_event_listener + fun:padico_tasklet_flush + fun:padico_module_init + fun:unit_binary_load.5249.2496 + fun:padico_puk_mod_load + fun:_puk_component_resolve + fun:padico_control_send_msg + fun:padico_control_send_ext + fun:control_router_event_listener + fun:padico_tasklet_worker.2516.2614 + fun:start_thread + fun:clone +} +{ + + Memcheck:Leak + match-leak-kinds: definite + fun:malloc + fun:padico_tm_tasks_process.2519.2436 + fun:padico_tasklet_worker.2516.2614 + fun:start_thread + fun:clone +} +{ + + Memcheck:Leak + match-leak-kinds: definite + fun:malloc + fun:padico_addrdb_publish_handler + fun:puk_xml_start_handler.3574 + obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 + obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 + obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 + obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 + fun:XML_ParseBuffer + fun:puk_xml_parse_buffer + fun:na_control_event_listener + fun:padico_tm_req_wait + fun:padico_tm_mod_action_args + fun:run_end_handler.2909.2464 + fun:puk_xml_end_handler.3579 + obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 + obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 + obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 + obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 + fun:XML_ParseBuffer + fun:puk_xml_parse_buffer + fun:na_control_event_listener + fun:padico_tasklet_worker.2516.2614 + fun:start_thread + fun:clone +} +{ + + Memcheck:Leak + match-leak-kinds: reachable + fun:malloc + fun:sysio_control_read_callback + fun:na_sysio_thread + fun:start_thread + fun:clone +} +{ + + Memcheck:Leak + match-leak-kinds: definite + fun:malloc + fun:topo_property_end_handler + fun:puk_xml_end_handler.3579 + obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 + obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 + obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 + obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 + fun:XML_ParseBuffer + fun:puk_xml_parse_buffer + fun:na_control_event_listener + fun:padico_tasklet_flush + fun:router_update_end_handler + fun:puk_xml_end_handler.3579 + obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 + obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 + obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 + obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 + fun:XML_ParseBuffer + fun:puk_xml_parse_buffer + fun:na_control_event_listener + fun:padico_tasklet_worker.2516.2614 + fun:start_thread + fun:clone +} diff --git a/tools/dev/valgrind/papi.suppr b/tools/dev/valgrind/papi.suppr new file mode 100644 index 0000000..30fb6d6 --- /dev/null +++ b/tools/dev/valgrind/papi.suppr @@ -0,0 +1,51 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +{ + + Memcheck:Leak + fun:malloc + ... + fun:_pe_libpfm4_init + ... +} + +# This happens in multithreaded_init: papi does not support getting initialized in one thread and shut down in another thread. +{ + + Memcheck:Leak + ... + fun:allocate_thread + ... +} + +# This seems like a leak in papi +{ + + Memcheck:Leak + match-leak-kinds: definite + ... + fun:pfmlib_build_fstr.part.0 + fun:pfmlib_perf_event_encode + fun:allocate_native_event + fun:_pe_libpfm4_ntv_name_to_code + fun:_papi_hwi_native_name_to_code + fun:PAPI_event_name_to_code + fun:PAPI_event_name_to_code + fun:PAPI_add_named_event + fun:add_event + ... +} diff --git a/tools/dev/valgrind/pthread.suppr b/tools/dev/valgrind/pthread.suppr new file mode 100644 index 0000000..a437eb9 --- /dev/null +++ b/tools/dev/valgrind/pthread.suppr @@ -0,0 +1,44 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +{ + pthread_create + Memcheck:Leak + ... + fun:pthread_create@@GLIBC_2.2.5 + ... +} + +{ + name + Memcheck:Leak + ... + fun:pthread_cancel_init + ... +} + +{ + Helgrind 3.9 wrongly compares to-be-destroyed mutex with init value + Helgrind:Race + fun:my_memcmp + fun:pthread_mutex_destroy + ... +} +{ + + Memcheck:Value8 + obj:/lib/x86_64-linux-gnu/libpthread-2.28.so +} diff --git a/tools/dev/valgrind/starpu.suppr b/tools/dev/valgrind/starpu.suppr new file mode 100644 index 0000000..8a53b42 --- /dev/null +++ b/tools/dev/valgrind/starpu.suppr @@ -0,0 +1,220 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +{ + don't care about cache hit stats + Helgrind:Race + fun:_starpu_msi_cache_hit + ... +} + +{ + don't care about cache miss stats + Helgrind:Race + fun:_starpu_msi_cache_miss + ... +} + +{ + don't care about data status query race, this is just a hint + Helgrind:Race + fun:starpu_data_query_status + ... +} + +{ + don't care about data status query race, this is just a hint + Helgrind:Race + fun:starpu_data_query_status2 + ... +} + +{ + known race, but not problematic in practice, see comment in _starpu_tag_clear + Helgrind:LockOrder + ... + fun:_starpu_tag_free + fun:_starpu_htbl_clear_tags + ... + fun:_starpu_tag_clear + fun:starpu_shutdown + ... +} + + +{ + There is actually no race on current_mode, because the mode can not change unexpectedly, until _starpu_notify_data_dependencies() is called further down. Valgrind can not know about such software rwlock. + Helgrind:Race + fun:_starpu_release_data_on_node + fun:_starpu_push_task_output + ... +} + +{ + We do not care about races on profiling statistics + Helgrind:Race + fun:_starpu_worker_get_status + fun:_starpu_worker_reset_profiling_info_with_lock + ... +} + +{ + This is racy, but since we'll always put the same values, this is not a problem. + Helgrind:Race + fun:_starpu_codelet_check_deprecated_fields + ... +} + +{ + This is racy, but we don't care, it's only a statistic + Helgrind:Race + fun:starpu_task_nsubmitted + ... +} + +{ + This is racy, but we don't care, it's only a statistic + Helgrind:Race + fun:starpu_task_nready + ... +} + +{ + This is racy, but we don't care, it's only a statistic + Helgrind:Race + fun:_starpu_bus_update_profiling_info + ... +} + +{ + fscanf error + Memcheck:Cond + ... + fun:fscanf + fun:_starpu_load_bus_performance_files + ... +} + +{ + locking order. It's not a problem when it's a trylock... but helgrind doesn't handle that :/ https://bugs.kde.org/show_bug.cgi?id=243232 + Helgrind:LockOrder + fun:mutex_trylock_WRK + ... +} + +{ + locking order. It's not a problem when it's a trylock... but helgrind doesn't handle that :/ https://bugs.kde.org/show_bug.cgi?id=243232 + Helgrind:LockOrder + fun:pthread_rwlock_trywrlock_WRK + ... +} + +{ + locking order. It's not a problem when it's a trylock... but helgrind doesn't handle that :/ https://bugs.kde.org/show_bug.cgi?id=243232 + Helgrind:LockOrder + fun:pthread_rwlock_tryrdlock_WRK + ... +} + +{ + mc / handle locking order1 + Helgrind:LockOrder + ... + fun:__starpu_spin_lock + fun:try_to_free_mem_chunk + ... +} + +{ + mc / handle locking order1 + Helgrind:LockOrder + ... + fun:__starpu_spin_lock + fun:try_to_throw_mem_chunk + ... +} + +{ + mc / handle locking order2 + Helgrind:LockOrder + ... + fun:__starpu_spin_lock + fun:try_to_find_reusable_mem_chunk + ... +} + +{ + mc / handle locking order2 + Helgrind:LockOrder + ... + fun:__starpu_spin_lock + fun:try_to_find_reusable_mc + ... +} + +{ + mc / handle locking order3 + Helgrind:LockOrder + ... + fun:__starpu_spin_lock + fun:free_potentially_in_use_mc + ... +} + +{ + mc / handle locking order4 + Helgrind:LockOrder + ... + fun:__starpu_spin_lock + fun:free_potentially_in_use_mc + ... +} + +{ + mc / handle locking order5 + Helgrind:LockOrder + ... + fun:__starpu_spin_lock + fun:register_mem_chunk + ... +} + +{ + mc / handle locking order6 + Helgrind:LockOrder + ... + fun:__starpu_spin_lock + fun:_starpu_request_mem_chunk_removal + ... +} + +{ + mc / handle locking order7 + Helgrind:LockOrder + ... + fun:__starpu_spin_lock + fun:_starpu_allocate_interface + ... +} + +{ + mc / handle locking order8 + Helgrind:LockOrder + ... + fun:__starpu_spin_lock + fun:_starpu_memchunk_recently_used + ... +} diff --git a/tools/dev/valgrind/starpu_pw.suppr b/tools/dev/valgrind/starpu_pw.suppr new file mode 100644 index 0000000..bf28d5d --- /dev/null +++ b/tools/dev/valgrind/starpu_pw.suppr @@ -0,0 +1,62 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +# the code needs to be fixed that these suppression are not needed + +{ + + Memcheck:Leak + match-leak-kinds: definite + fun:malloc + fun:_starpu_parallel_worker_init + fun:_starpu_parallel_worker + fun:_starpu_parallel_worker_group + fun:_starpu_parallel_worker_topology + fun:_starpu_parallel_worker_config + fun:_starpu_parallel_worker_init_varg + fun:starpu_parallel_worker_init +} + +{ + + Memcheck:Leak + match-leak-kinds: definite + fun:calloc + fun:_starpu_parallel_worker_init_varg + fun:starpu_parallel_worker_init +} + +{ + + Memcheck:Leak + match-leak-kinds: definite + fun:calloc + fun:_starpu_parallel_worker_init_varg + fun:starpu_cluster_machine +} +{ + + Memcheck:Leak + match-leak-kinds: definite + fun:malloc + fun:_starpu_parallel_worker_init + fun:_starpu_parallel_worker + fun:_starpu_parallel_worker_group + fun:_starpu_parallel_worker_topology + fun:_starpu_parallel_worker_config + fun:_starpu_parallel_worker_init_varg + fun:starpu_cluster_machine +} diff --git a/tools/dev/valgrind/starpupy.suppr b/tools/dev/valgrind/starpupy.suppr new file mode 100644 index 0000000..92195c8 --- /dev/null +++ b/tools/dev/valgrind/starpupy.suppr @@ -0,0 +1,75 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +{ + + Memcheck:Leak + fun:malloc + ... + obj:*/numpy/random/* + ... +} + +{ + + Memcheck:Leak + fun:malloc + ... + fun:_abc__abc_init.lto_priv.0 + ... +} + +{ + + Memcheck:Leak + fun:malloc + ... + fun:PyTuple_New + ... + fun:marshal_loads + ... +} + +{ + + Memcheck:Leak + fun:malloc + ... + fun:PyTuple_New + ... + fun:PyMarshal_ReadObjectFromString + ... +} + +{ + + Memcheck:Param + epoll_ctl(event) + fun:epoll_ctl + fun:pyepoll_internal_ctl.lto_priv.0 + ... +} + +{ + + Memcheck:Addr8 + fun:strncmp + fun:is_dst + ... + fun:dlopen@@GLIBC_2.2.5 + fun:_PyImport_FindSharedFuncptr + ... +} diff --git a/tools/dev/valgrind/valgrind.sh b/tools/dev/valgrind/valgrind.sh new file mode 100755 index 0000000..d15e0b0 --- /dev/null +++ b/tools/dev/valgrind/valgrind.sh @@ -0,0 +1,40 @@ +#!/bin/bash +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +EXEC=$(basename $0 .sh) +DIRNAME=$(dirname $0) + +CLIMIT=$(ulimit -c) +if [ "$CLIMIT" = unlimited ] +then + # valgrind cores are often *huge*, 100MB will already be quite big... + ulimit -c 100000 +fi + +if test "$EXEC" == "valgrind" +then + RUN="valgrind --track-origins=yes --show-reachable=yes --leak-check=full --errors-for-leak-kinds=all --show-leak-kinds=all --error-exitcode=42 $STARPU_VALGRIND_OPTIONS" +elif test "$EXEC" == "valgrind_xml" +then + mkdir -p ${DIRNAME}/../../../valgrind + XML_FILE=$(mktemp -p ${DIRNAME}/../../../valgrind starpu-valgrind_XXXXXXXXXX.xml) + RUN="valgrind --track-origins=yes --show-reachable=yes --leak-check=full --errors-for-leak-kinds=all --show-leak-kinds=all --xml=yes --xml-file=${XML_FILE} $STARPU_VALGRIND_OPTIONS" +else + RUN="valgrind --tool=$EXEC --error-exitcode=42" +fi +SUPPRESSIONS=$(for f in $(dirname $0)/*.suppr /usr/share/hwloc/hwloc-valgrind.supp; do if test -f $f ; then echo "--suppressions=$f" ; fi ; done) + +exec $RUN --keep-debuginfo=yes --num-callers=42 --error-limit=no --gen-suppressions=all $SUPPRESSIONS $* diff --git a/tools/dev/valgrind/valgrind.suppr b/tools/dev/valgrind/valgrind.suppr new file mode 100644 index 0000000..2572f26 --- /dev/null +++ b/tools/dev/valgrind/valgrind.suppr @@ -0,0 +1,35 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +{ + valgrind_suppr1 + Memcheck:Leak + match-leak-kinds: reachable + fun:malloc + fun:ensure_sReq_space + fun:add_shadow_Request + fun:PMPI_Irecv +} + +{ + _dl_init_suppr_01 + Memcheck:Leak + fun:malloc + ... + fun:call_init.part.0 + fun:_dl_init + ... +} diff --git a/tools/dev/valgrind/valgrind_xml.sh b/tools/dev/valgrind/valgrind_xml.sh new file mode 100755 index 0000000..d15e0b0 --- /dev/null +++ b/tools/dev/valgrind/valgrind_xml.sh @@ -0,0 +1,40 @@ +#!/bin/bash +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +EXEC=$(basename $0 .sh) +DIRNAME=$(dirname $0) + +CLIMIT=$(ulimit -c) +if [ "$CLIMIT" = unlimited ] +then + # valgrind cores are often *huge*, 100MB will already be quite big... + ulimit -c 100000 +fi + +if test "$EXEC" == "valgrind" +then + RUN="valgrind --track-origins=yes --show-reachable=yes --leak-check=full --errors-for-leak-kinds=all --show-leak-kinds=all --error-exitcode=42 $STARPU_VALGRIND_OPTIONS" +elif test "$EXEC" == "valgrind_xml" +then + mkdir -p ${DIRNAME}/../../../valgrind + XML_FILE=$(mktemp -p ${DIRNAME}/../../../valgrind starpu-valgrind_XXXXXXXXXX.xml) + RUN="valgrind --track-origins=yes --show-reachable=yes --leak-check=full --errors-for-leak-kinds=all --show-leak-kinds=all --xml=yes --xml-file=${XML_FILE} $STARPU_VALGRIND_OPTIONS" +else + RUN="valgrind --tool=$EXEC --error-exitcode=42" +fi +SUPPRESSIONS=$(for f in $(dirname $0)/*.suppr /usr/share/hwloc/hwloc-valgrind.supp; do if test -f $f ; then echo "--suppressions=$f" ; fi ; done) + +exec $RUN --keep-debuginfo=yes --num-callers=42 --error-limit=no --gen-suppressions=all $SUPPRESSIONS $* diff --git a/tools/distrib/distrib.r b/tools/distrib/distrib.r new file mode 100755 index 0000000..27f429e --- /dev/null +++ b/tools/distrib/distrib.r @@ -0,0 +1,63 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +t<-read.table("output.update_block"); + +plot.multi.dens <- function(s) +{ + junk.x = NULL + junk.y = NULL + for(i in 1:length(s)) + { + junk.x = c(junk.x, density(s[[i]])$x) + junk.y = c(junk.y, density(s[[i]])$y) + } + xr <- range(junk.x) + yr <- range(junk.y) + + plot(density(s[[1]]), xlim = xr, ylim = yr, main="") + for(i in 1:length(s)) + { + lines(density(s[[i]]), xlim = xr, ylim = yr, col = i) + } +} + +cpus <- seq(1, 8); + +per_cpu <- function(cpuid) +{ + t[4][t[2] == cpuid]; +} + +density_cpu <- function(cpuid) +{ + density(per_cpu(cpuid)) +} + +l <- list() +leg <- c() + +for (cpu in 1:4) +{ + l <- c(l, list(per_cpu(cpu))) + leg <- c(leg, cpu) +} + + +library(Hmisc) + +plot.multi.dens( l); +le <- largest.empty(per_cpu(0), per_cpu(1), 0.1, 0.1) +legend(le,legend=leg, col=(1:8), lwd=2, lty = 1) diff --git a/tools/distrib/distrib.sh b/tools/distrib/distrib.sh new file mode 100755 index 0000000..d56be92 --- /dev/null +++ b/tools/distrib/distrib.sh @@ -0,0 +1,28 @@ +#!/bin/bash +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +filename=$1 + +symbol_list=`cut -f1 $filename|sort -u` + +for symbol in $symbol_list +do + echo $symbol + + grep "^$symbol" $filename > output.$symbol + + +done diff --git a/tools/gdbinit b/tools/gdbinit new file mode 100644 index 0000000..86b86e7 --- /dev/null +++ b/tools/gdbinit @@ -0,0 +1,1238 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +# To set a breakpoint when starting gdb with option "-ex run", +# here what you need to do: +#set breakpoint pending on +#break starpu_mpi.c:419 + +define starpu-print-job + set language c + set $job = (struct _starpu_job *)$arg0 + + printf "StarPU Job (%p)\n", $job + if ($job != 0) + printf "\ttask:\t\t\t\t<%p>\n", $job->task + printf "\tsubmitted:\t\t\t<%d>\n", $job->submitted + printf "\tterminated:\t\t\t<%d>\n", $job->terminated + printf "\tjob_id:\t\t\t\t<%d>\n", $job->job_id + if $job->task && $job->task->name + printf "\tname:\t\t\t\t<%s>\n", $job->task->name + end + end +end + +define starpu-print-task + set language c + set $task = (struct starpu_task *)$arg0 + set $job = (struct _starpu_job *)$task->starpu_private + set $status=0 + + printf "StarPU Task (%p)\n", $task + if $task->name + printf "\tname:\t\t\t\t<%s>\n", $task->name + end + printf "\tcodelet:\t\t\t<%p>\n", $task->cl + if $task->cl && $task->cl->name + printf "\tcodelet name:\t\t\t<%s>\n", $task->cl->name + end + set $nbuffers = $task->nbuffers + if $task->cl && $task->cl->nbuffers != -1 + set $nbuffers = $task->cl->nbuffers + end + printf "\tnbuffers:\t\t\t<%d>\n", $nbuffers + printf "\tcallback:\t\t\t<%p>\n", $task->callback_func + printf "\tcl_arg:\t\t\t\t<%p>\n", $task->cl_arg + printf "\tsynchronous:\t\t\t<%d>\n", $task->synchronous + printf "\texecute_on_a_specific_worker:\t<%d>\n", $task->execute_on_a_specific_worker + printf "\tworkerid:\t\t\t<%d>\n", $task->workerid + printf "\tpriority:\t\t\t<%d>\n", $task->priority + printf "\tdetach:\t\t\t\t<%d>\n", $task->detach + printf "\tdestroy:\t\t\t<%d>\n", $task->destroy + printf "\tregenerate:\t\t\t<%d>\n", $task->regenerate + printf "\tstatus:\t\t\t\t" + if $task->status == 0 + printf "STARPU_TASK_INIT" + end + if $task->status == 1 + printf "STARPU_TASK_BLOCKED" + end + if $task->status == 2 + printf "STARPU_TASK_READY" + end + if $task->status == 3 + printf "STARPU_TASK_RUNNING" + end + if $task->status == 4 + printf "STARPU_TASK_FINISHED" + end + if $task->status == 5 + printf "STARPU_TASK_BLOCKED_ON_TAG" + end + if $task->status == 6 + printf "STARPU_TASK_BLOCKED_ON_TASK" + end + if $task->status == 7 + printf "STARPU_TASK_BLOCKED_ON_DATA" + end + if $task->status == 8 + printf "STARPU_TASK_STOPPED" + end + printf "\n" + printf "\tjob:\t\t\t\t<%p>\n", $job + printf "\ttag_id:\t\t\t\t<%d>\n", $task->tag_id + printf "\tndeps:\t\t\t\t<%u>\n", $job->job_successors->ndeps + printf "\tndeps_remaining:\t\t<%u>\n", $job->job_successors->ndeps - $job->job_successors->ndeps_completed + if _starpu_debug + set $n = 0 + while $n < $job->job_successors->ndeps + if ! $job->job_successors->done[$n] + set $cg = $job->job_successors->deps[$n] + set $m = 0 + printf "\t\tcg:\t\t\t<%u>\n", $cg->ndeps + while $m < $cg->ndeps + if ! $cg->done[$m] + set $depj = (struct _starpu_job *) $cg->deps[$m] + printf "\t\t\ttask %p\n", $depj->task + end + set $m = $m + 1 + end + end + set $n = $n + 1 + end + end + printf "\tndeps_completed:\t\t<%u>\n", $job->job_successors->ndeps_completed + printf "\tnsuccs:\t\t\t\t<%u>\n", $job->job_successors->nsuccs + printf "\tparent:\t\t\t\t<%lu>\n", $task->bubble_parent + if $job + starpu-print-job $job + end +end + +define starpu-print-task-and-successor + set language c + set $t = (struct starpu_task *) ($arg0) + starpu-print-task $t + set $j = (struct _starpu_job *) $t->starpu_private + set $nsuccs = $j->job_successors.nsuccs + set $i = 0 + while $i < $nsuccs + set $cg = $j->job_successors.succ[$i] + if ($cg->cg_type == 1) + # STARPU_CG_APPS + printf "waited for by application" + end + if ($cg->cg_type == 2) + # STARPU_CG_TAG + printf "will produce tag %x\n", $cg->succ.tag + end + if ($cg->cg_type == 4) + # STARPU_CG_TASK + printf "dep of job %p task %p\n", $cg->succ.job, $cg->succ.job->task + starpu-print-task $cg->succ.job->task + end + set $i = $i + 1 + end +end + +define starpu-print-tasks-treelist + set $list = $arg0 + set $task = $list->_head + while $task != 0 + starpu-print-task $task + set $task = $task->_next + end +end + +define starpu-print-tasks-tree + if $arg0 + starpu-print-tasks-tree $arg0->children[0] + set $stage = (struct _starpu_task_prio_list_stage *) $arg0 + starpu-print-tasks-treelist (&($stage->list)) + starpu-print-tasks-tree $arg0->children[1] + end +end + +define starpu-print-tasks-list + if _starpu_debug + starpu-print-tasks-treelist &$arg0.list + else + starpu-print-tasks-tree $arg0.tree.root + end +end + +define starpu-tasks-on-worker + set language c + set $worker=&_starpu_config->workers[$arg0] + starpu-print-tasks-list $worker->local_tasks +end + +define starpu-tasks-on-workers + set language c + set $num=0 + while $num<_starpu_config->topology->nworkers + printf "Worker %s\n", _starpu_config->workers[$num].name + starpu-tasks-on-worker $num + set $num = $num + 1 + end +end + +define starpu-workers + set language c + set $num=0 + printf "[Id] Name Arch Mask Devid Bindid Workerid Isrunning Isinitialized Status\n" + while $num<_starpu_config->topology->nworkers + set $worker=&_starpu_config->workers[$num] + printf "[%2d] %-40s %-4d %-4d %-5d %-6d %-8d %-9d %-13d", $num, $worker->name, $worker->arch, $worker->worker_mask, \ + $worker->devid, $worker->bindid, $worker->workerid, $worker->worker_is_running, $worker->worker_is_initialized + if $worker->status == STATUS_INVALID + printf " INVALID" + end + if $worker->status == STATUS_UNKNOWN + printf " UNKNOWN" + end + if $worker->status & STATUS_INITIALIZING + printf " INITIALIZING" + end + if $worker->status & STATUS_EXECUTING + printf " EXECUTING" + end + if $worker->status & STATUS_CALLBACK + printf " CALLBACK" + end + if $worker->status & STATUS_SCHEDULING + printf " SCHEDULING" + end + if $worker->status & STATUS_WAITING + printf " WAITING" + end + if $worker->status & STATUS_SLEEPING + printf " SLEEPING" + end + printf "\n" + set $num = $num + 1 + end +end + +define starpu-print-tag + set language c + set $tag_struct = (struct _starpu_tag *)_gettag_struct($arg0) + printf "tag %d ((struct _starpu_tag *) %p)\n", $arg0, $tag_struct + printf "\tstate " + if $tag_struct->state == STARPU_INVALID_STATE + printf "STARPU_INVALID_STATE" + end + if $tag_struct->state == STARPU_ASSOCIATED + printf "STARPU_ASSOCIATED" + end + if $tag_struct->state == STARPU_BLOCKED + printf "STARPU_BLOCKED" + end + if $tag_struct->state == STARPU_READY + printf "STARPU_READY" + end + if $tag_struct->state == STARPU_DONE + printf "STARPU_DONE" + end + printf "\n" + printf "\tdeps %d\n", $tag_struct->tag_successors.ndeps + printf "\tcompleted %d\n", $tag_struct->tag_successors.ndeps_completed + printf "\tndeps_remaining:\t\t<%u>\n", $tag_struct->tag_successors->ndeps - $tag_struct->tag_successors->ndeps_completed + if _starpu_debug + set $n = 0 + while $n < $tag_struct->tag_successors->ndeps + if ! $tag_struct->tag_successors->done[$n] + set $cg = $tag_struct->tag_successors->deps[$n] + set $m = 0 + printf "\t\tcg:\t\t\t<%u>\n", $cg->ndeps + while $m < $cg->ndeps + if ! $cg->done[$m] + set $dept = (starpu_tag_t) $cg->deps[$m] + printf "\t\t\ttag %u\n", $dept + end + set $m = $m + 1 + end + end + set $n = $n + 1 + end + end + printf "\tndeps_completed:\t\t<%u>\n", $tag_struct->tag_successors->ndeps_completed + printf "\tnsuccs:\t\t\t\t<%u>\n", $tag_struct->tag_successors->nsuccs +end + +define starpu-tags + set language c + printf "tags htbl %p\n", tag_htbl + set $tags = tag_htbl + while $tags + starpu-print-tag $tags->id + set $tags = (struct _starpu_tag_table *) $tags.hh.next + end +end + +define starpu-tasks + set language c + set $num=0 + set $nsubmitted=0 + set $nready=0 + while $num<_starpu_config->topology->nsched_ctxs + set $nsubmitted = $nsubmitted + _starpu_config->sched_ctxs[$num]->tasks_barrier->barrier->reached_start + set $nready = $nready + _starpu_config->sched_ctxs[$num]->ready_tasks_barrier->barrier->reached_start + set $num = $num + 1 + end + printf "%d submitted tasks\n", $nsubmitted + printf "%d ready tasks\n", $nready + printf "Tasks being run:\n" + set $n = 0 + while $n < _starpu_config.topology.nworkers + set $w = &_starpu_config.workers[$n] + printf "worker %d %s:\n", $n, $w->short_name + if $w->pipeline_length > 0 + set $m = 0 + while $m < $w->ntasks + set $t = $w->current_tasks[($w->first_task + $m) % (sizeof($w->current_tasks)/sizeof($w->current_tasks[0]))] + printf " task %p\n", $t + set $m = $m + 1 + end + end + set $task = $w->current_task + if ($task) + printf " task %p\n", $task + end + set $n = $n + 1 + end + if (tag_htbl) + printf "TODO: tags\n" + end + print "TODO: complete\n" +end + +define starpu-print-all-tasks + set language c + if ! _starpu_debug + printf "you need to configure with --enable-debug to get starpu-print-all-tasks working\n" + else + set $l = all_jobs_list->next + while $l != &all_jobs_list + set $j = (struct _starpu_job*) (((unsigned long) $l) - ((unsigned long) &((struct _starpu_job *)0)->all_submitted)) + printf "task %p\n", $j->task + starpu-print-task $j->task + set $l = $l->next + end + end +end + +define starpu-all-tasks + set language c + if ! _starpu_debug + printf "you need to configure with --enable-debug to get starpu-all-tasks working\n" + else + set $l = all_jobs_list->next + while $l != &all_jobs_list + set $j = (struct _starpu_job*) (((unsigned long) $l) - ((unsigned long) &((struct _starpu_job *)0)->all_submitted)) + set $task = $j->task + if $task->name + printf "task %p %s\n", $task, $task->name + else + printf "task %p\n", $task + end + set $l = $l->next + end + end +end + +define starpu + printf "Here I am...\n" +end + +define starpu-print-mode + if ($arg0 & 1) + printf "R" + end + if ($arg0 & 2) + printf "W" + end + if ($arg0 & 4) + printf " SCRATCH" + end + if ($arg0 & 8) + printf " REDUX" + end +end + +define starpu-print-data + set language c + set $data = (starpu_data_handle_t) $arg0 + printf "Data handle %p\n", $data + if $data->ops->interfaceid == 0 + printf "Matrix\n" + end + if $data->ops->interfaceid == 1 + printf "Block\n" + end + if $data->ops->interfaceid == 2 + printf "Vector\n" + end + if $data->ops->interfaceid == 3 + printf "CSR\n" + end + if $data->ops->interfaceid == 4 + printf "BCSR\n" + end + if $data->ops->interfaceid == 5 + printf "Variable\n" + end + if $data->ops->interfaceid == 6 + printf "Void\n" + end + if $data->ops->interfaceid == 7 + printf "Multiformat\n" + end + if $data->ops->interfaceid == 8 + printf "COO\n" + end + if $data->ops->interfaceid == 9 + printf "Tensor\n" + end + if $data->ops->interfaceid > 9 + printf "Interface id %d\n", $data->ops->interfaceid + end + printf "Home node %d\n", $data->home_node + printf "RWlock refs %d\n", $data->refcnt + printf "Busy count %d\n", $data->busy_count + printf "Initialization submitted %d\n", $data->initialized + printf "Current mode " + starpu-print-mode $data->current_mode + printf "\n" + if $data->mpi_data + printf "TAG %ld\n",((struct _starpu_mpi_data *) ($data->mpi_data))->node_tag.data_tag + end + if $data->current_mode & (4|8) + set $n = 0 + while $n < _starpu_config.topology.nworkers + set $replicate = $data->per_worker[$n] + printf "Worker %2d %10s:", $n, _starpu_config->workers[$n]->name + if $replicate.state == 0 + printf " OWNER" + end + if $replicate.state == 1 + printf " SHARED" + end + if $replicate.state == 2 + printf " INVALID" + end + if $replicate.initialized + printf " initialized" + end + printf "\n" + set $n = $n + 1 + end + else + set $n = 0 + while $n < _starpu_descr.nnodes + set $replicate = &$data->per_node[$n] + printf "Node %2d (%2d):", $n, $replicate->refcnt + if $replicate.state == 0 + printf " OWNER" + end + if $replicate.state == 1 + printf " SHARED" + end + if $replicate.state == 2 + printf " INVALID" + end + if $replicate.initialized + printf " initialized" + end + printf "\n" + set $m = 0 + while $m < _starpu_descr.nnodes + if $replicate->request[$m] + printf " request %p from %d\n", $replicate->request[$m], $m + end + set $m = $m + 1 + end + set $n = $n + 1 + end + set $r = $data->write_invalidation_req + if $r + printf "w_req %p for %d\n", $r, $r->dst_replicate->memory_node + end + end + printf "Post sync tasks\n" + set $tasklist = $data->post_sync_tasks + while $tasklist != 0x0 + starpu-print-task $tasklist->task + set $tasklist = $tasklist->next + end + if _starpu_debug + printf "Requester tasks\n" + set $requesterlist = $data->req_list.list._head + while $requesterlist != 0x0 + printf "mode: " + starpu-print-mode $requesterlist->mode + printf "\n" + starpu-print-job $requesterlist->j + set $requesterlist = $requesterlist->_next + end + printf "Arbitered requester tasks\n" + set $requesterlist = $data->arbitered_req_list.list._head + while $requesterlist != 0x0 + printf "mode: " + starpu-print-mode $requesterlist->mode + printf "\n" + starpu-print-job $requesterlist->j + set $requesterlist = $requesterlist->_next + end + else + printf "Printing requester tasks not supported yet without --enable-debug, sorry!\n" + end + if ($data->nchildren) + printf "%d children\n", $data->nchildren + end +end + +define starpu-print-datas + set $entry = registered_handles + while $entry + starpu-print-data $entry->handle + printf "\n" + set $entry = (struct handle_entry *) $entry.hh.next + end +end + +define starpu-print-datas-summary + set language c + set $entry = registered_handles + + set $data_n = 0 + set $pw_data_n = 0 + set $data_n_allocated = 0 + set $replicate_n_owners = 0 + set $replicate_n_shared = 0 + set $replicate_n_invalid = 0 + set $replicate_n_initialized = 0 + set $replicate_n_allocated = 0 + set $pw_replicate_n_owners = 0 + set $pw_replicate_n_shared = 0 + set $pw_replicate_n_invalid = 0 + set $pw_replicate_n_initialized = 0 + set $pw_replicate_n_allocated = 0 + + while $entry + set $data = (starpu_data_handle_t) $entry->handle + + if $data->current_mode & (4|8) + set $pw_data_n = $pw_data_n + 1 + set $n = 0 + while $n < _starpu_config.topology.nworkers + set $replicate = $data->per_worker[$n] + if $replicate.state == 0 + set $pw_replicate_n_owners = $pw_replicate_n_owners + 1 + end + if $replicate.state == 1 + set $pw_replicate_n_shared = $pw_replicate_n_shared + 1 + end + if $replicate.state == 2 + set $pw_replicate_n_invalid = $pw_replicate_n_invalid + 1 + end + if $replicate.initialized + set $pw_replicate_n_initialized = $pw_replicate_n_initialized + 1 + end + if $replicate.allocated + set $pw_replicate_n_allocated = $pw_replicate_n_allocated + 1 + end + set $n = $n + 1 + end + else + set $data_n = $data_n + 1 + set $n = 0 + while $n < _starpu_descr.nnodes + set $replicate = &$data->per_node[$n] + if $replicate.state == 0 + set $replicate_n_owners = $replicate_n_owners + 1 + end + if $replicate.state == 1 + set $replicate_n_shared = $replicate_n_shared + 1 + end + if $replicate.state == 2 + set $replicate_n_invalid = $replicate_n_invalid + 1 + end + if $replicate.initialized + set $replicate_n_initialized = $replicate_n_initialized + 1 + end + if $replicate.allocated + set $replicate_n_allocated = $replicate_n_allocated + 1 + set $data_allocated = 1 + end + set $n = $n + 1 + end + if $data_allocated + set $data_n_allocated = $data_n_allocated + 1 + end + end + set $entry = (struct handle_entry *) $entry.hh.next + end + printf "Number of handles: %d\n", $data_n + printf "Number of allocated handles: %d\n", $data_n_allocated + printf "Number of OWNER replicates: %d\n", $replicate_n_owners + printf "Number of SHARED replicates: %d\n", $replicate_n_shared + printf "Number of INVALID replicates: %d\n", $replicate_n_invalid + printf "Number of initialized replicates: %d\n", $replicate_n_initialized + printf "Number of allocated replicates: %d\n", $replicate_n_allocated + + printf "Number of per-worker handles: %d\n", $pw_data_n + printf "Number of OWNER per-worker replicates: %d\n", $pw_replicate_n_owners + printf "Number of SHARED per-worker replicates: %d\n", $pw_replicate_n_shared + printf "Number of INVALID per-worker replicates: %d\n", $pw_replicate_n_invalid + printf "Number of initialized per-worker replicates: %d\n", $pw_replicate_n_initialized + printf "Number of allocated per-worker replicates: %d\n", $pw_replicate_n_allocated +end + +define starpu-print-replicate + set $repl = (struct _starpu_data_replicate*) $arg0 + printf "Handle %p\n", $repl->handle + printf "Interface %p\n", $repl->data_interface + printf "Refcnt %u\n", $repl->refcnt + printf "Node %u\n", $repl->memory_node + if $repl->state == 0 + printf "Owner\n" + end + if $repl->state == 1 + printf "Shared\n" + end + if $repl->state == 2 + printf "Invalid\n" + end + if $repl->relaxed_coherency + printf " relaxed_coherency" + end + if $repl->initialized + printf " initialized" + end + if $repl->allocated + printf " allocated" + end + if $repl->automatically_allocated + printf " automatically allocated" + end + printf "\n" + printf "MC %p\n", $repl->mc +end + +define starpu-print-request + set $r = (struct _starpu_data_request *)$arg0 + printf "Request %p\n", $r + printf "Origin %s\n", $r->origin + printf "Refcnt %d\n", $r->refcnt + printf "Handle %p\n", $r->handle + printf "src_replicate %p", $r->src_replicate + if $r->src_replicate + printf " (%d)", $r->src_replicate->memory_node + end + printf "\n" + printf "dst_replicate %p", $r->dst_replicate + if $r->dst_replicate + printf " (%d)", $r->dst_replicate->memory_node + end + printf "\n" + printf "handling_node %d\n", $r->handling_node + if ($r->mode & 1) + printf "R" + end + if ($r->mode & 2) + printf "W" + end + if ($r->mode & 4) + printf "S" + end + if ($r->mode & 8) + printf "X" + end + printf "\n" + printf "completed: %d\n", $r->completed + printf "prefetch: %d\n", $r->prefetch + printf "retval: %d\n", $r->retval + printf "ndeps: %d\n", $r->ndeps + printf "next_req_count: %d\n", $r->next_req_count + set $c = 0 + while $c < $r->next_req_count + printf " %p\n", $r->next_req[$c] + set $c = $c + 1 + end + printf "comid: %u\n", $r->com_id + set $c = $r->callbacks + while $c != 0 + printf "callback: %p %p\n", $c->callback_func, $c->callback_arg + set $c = $c->next + end +end + +define starpu-print-requests-treelist + set $list = $arg0 + set $request = $list->_head + while $request != 0 + printf " Request %p: handle %p ", $request, $request->handle + starpu-print-mode $request->mode + printf "\n" + set $request = $request->_next + end +end + +define starpu-print-requests-tree + if $arg0 + starpu-print-requests-tree $arg0->children[0] + set $stage = (struct _starpu_data_request_prio_list_stage *) $arg0 + starpu-print-requests-treelist (&($stage->list)) + starpu-print-requests-tree $arg0->children[1] + end +end + +define starpu-print-requests-list + if _starpu_debug + starpu-print-requests-treelist &$arg0.list + else + starpu-print-requests-tree $arg0.tree.root + end +end + +define starpu-print-requests + set $node = 0 + while $node < _starpu_descr.nnodes + printf "Node %u:\n", $node + set $node2 = 0 + while $node2 < _starpu_descr.nnodes + starpu-print-requests-list _starpu_config.nodes[$node].data_requests[$node2][0] + starpu-print-requests-list _starpu_config.nodes[$node].data_requests[$node2][1] + set $node2 = $node2 + 1 + end + set $node = $node + 1 + end +end + +define starpu-print-prequests + set $node = 0 + while $node < _starpu_descr.nnodes + printf "Node %u:\n", $node + set $node2 = 0 + while $node2 < _starpu_descr.nnodes + if _starpu_config.nodes[$node].data_requests_npending[$node2][0] + printf "%u pending requests from %u\n", _starpu_config.nodes[$node].data_requests_npending[$node2][0], $node2 + end + starpu-print-requests-list _starpu_config.nodes[$node].data_requests_pending[$node2][0] + if _starpu_config.nodes[$node].data_requests_npending[$node2][1] + printf "%u pending requests to %u\n", _starpu_config.nodes[$node].data_requests_npending[$node2][1], $node2 + end + starpu-print-requests-list _starpu_config.nodes[$node].data_requests_pending[$node2][1] + set $node2 = $node2 + 1 + end + set $node = $node + 1 + end +end + +define starpu-print-arch + set $arch = (struct starpu_perfmodel_arch *)$arg0 + set $device = 0 + while $device < $arch->ndevices + printf " Device type %d - devid: %d - ncores: %d\n", $arch->devices[$device].type, $arch->devices[$device].devid, $arch->devices[$device].ncores + set $device = $device + 1 + end +end + +define starpu-print-archs + set $comb = 0 + while $comb < current_arch_comb + printf "Combination %d with %d devices\n", $comb, arch_combs[$comb]->ndevices + starpu-print-arch arch_combs[$comb] + set $comb = $comb + 1 + end +end + +define starpu-print-frequests + set $node = 0 + while $node < _starpu_descr.nnodes + printf "Node %u:\n", $node + set $node2 = 0 + while $node2 < _starpu_descr.nnodes + starpu-print-requests-list _starpu_config.nodes[$node].prefetch_requests[$node2][0] + starpu-print-requests-list _starpu_config.nodes[$node].prefetch_requests[$node2][1] + set $node2 = $node2 + 1 + end + set $node = $node + 1 + end +end + +define starpu-print-irequests + set $node = 0 + while $node < _starpu_descr.nnodes + printf "Node %u:\n", $node + set $node2 = 0 + while $node2 < _starpu_descr.nnodes + starpu-print-requests-list _starpu_config.nodes[$node].idle_requests[$node2][0] + starpu-print-requests-list _starpu_config.nodes[$node].idle_requests[$node2][1] + set $node2 = $node2 + 1 + end + set $node = $node + 1 + end +end + +define starpu-memusage + set scheduler-locking on + set $node = 0 + while $node < _starpu_descr.nnodes + printf "\n\nNode %u (%d/MiB/%dMiB):\n", $node, _starpu_config.nodes[$node].used_size >> 20, _starpu_config.nodes[$node].global_size >> 20 + set $total = 0 + set $total_b = 0 + set $wt = 0 + set $wt_b = 0 + set $home = 0 + set $home_b = 0 + set $ooc = 0 + set $ooc_b = 0 + set $prefetch = 0 + set $prefetch_b = 0 + set $redux = 0 + set $redux_b = 0 + set $relax = 0 + set $relax_b = 0 + set $noref = 0 + set $noref_b = 0 + set $normal = 0 + set $normal_b = 0 + set $owner = 0 + set $owner_b = 0 + set $shared = 0 + set $shared_b = 0 + set $invalid = 0 + set $invalid_b = 0 + set $nodataref = 0 + set $nodataref_b = 0 + set $nosubdataref = 0 + set $nosubdataref_b = 0 + set $reading = 0 + set $reading_b = 0 + set $writing = 0 + set $writing_b = 0 + set $overwriting = 0 + set $overwriting_b = 0 + set $mc = _starpu_config.nodes[$node].mc_list->_head + set pagination off + while $mc != 0 + set $handle = $mc->data + set $replicate = &$handle->per_node[$node] + set $size = _starpu_data_get_size($handle) + set $total = $total + 1 + if $total % 100 == 0 + printf "\rinspected %d data...", $total + end + set $total_b = $total_b + $size + if $node < sizeof($handle->wt_mask) * 8 && $handle->wt_mask & (1 << $node) + set $wt = $wt + 1 + set $wt_b = $wt_b + $size + end + if $node == $handle->home_node + set $home = $home + 1 + set $home_b = $home_b + $size + end + if $handle->ooc + set $ooc = $ooc + 1 + set $ooc_b = $ooc_b + $size + end + if $replicate->nb_tasks_prefetch + set $prefetch = $prefetch + 1 + set $prefetch_b = $prefetch_b + $size + end + if $mc->relaxed_coherency == 2 + set $redux = $redux + 1 + set $redux_b = $redux_b + $size + end + if $mc->relaxed_coherency == 1 + set $relax = $relax + 1 + set $relax_b = $relax_b + $size + if $mc->replicate + if $mc->replicate->refcnt == 0 + set $noref = $noref + 1 + set $noref_b = $noref_b + $size + end + end + end + if $mc->relaxed_coherency == 0 + set $normal = $normal + 1 + set $normal_b = $normal_b + $size + if $replicate->state == STARPU_OWNER + set $owner = $owner + 1 + set $owner_b = $owner_b + $size + end + if $replicate->state == STARPU_SHARED + set $shared = $shared + 1 + set $shared_b = $shared_b + $size + end + if $replicate->state == STARPU_INVALID + set $invalid = $invalid + 1 + set $invalid_b = $invalid_b + $size + end + if (may_free_subtree($handle,$node)) + set $nosubdataref = $nosubdataref + 1 + set $nosubdataref_b = $nosubdataref_b + $size + end + if $replicate->refcnt == 0 + set $nodataref = $nodataref + 1 + set $nodataref_b = $nodataref_b + $size + else + set $r = 0 + set $w = 0 + set $_node = 0 + while $_node < _starpu_descr.nnodes + set $_node = $_node + 1 + if $_node != $node + if $handle->per_node[$_node].request[$node] != 0 + set $r = $r + 1 + end + if $handle->per_node[$node].request[$_node] != 0 + set $w = $w + 1 + end + end + end + if $r != 0 + set $reading = $reading + 1 + set $reading_b = $reading_b + $size + end + if $w != 0 + set $writing = $writing + 1 + set $writing_b = $writing_b + $size + end + if $replicate->request[$node] != 0 + set $overwriting = $overwriting + 1 + set $overwriting_b = $overwriting_b + $size + end + end + end + set $mc = $mc->_next + end + set pagination on + printf "\r \r" + printf " Total used: %u, %uMiB\n", $total, $total_b / 1048576 + printf " WT: %u, %uMiB\n", $wt, $wt_b / 1048576 + printf " home: %u, %uMiB\n", $home, $home_b / 1048576 + printf " OOC: %u, %uMiB\n", $ooc, $ooc_b / 1048576 + printf " prefetch: %u, %uMiB\n", $prefetch, $prefetch_b / 1048576 + printf " redux: %u, %uMiB\n", $redux, $redux_b / 1048576 + printf " relax: %u, %uMiB\n", $relax, $relax_b / 1048576 + printf " noref: %u, %uMiB\n", $noref, $noref_b / 1048576 + printf " normal: %u, %uMiB\n", $normal, $normal_b / 1048576 + printf " owner: %u, %uMiB\n", $owner, $owner_b / 1048576 + printf " shared: %u, %uMiB\n", $shared, $shared_b / 1048576 + printf " invalid: %u, %uMiB\n", $invalid, $invalid_b / 1048576 + printf " nosubdataref: %u, %uMiB\n", $nosubdataref, $nosubdataref_b / 1048576 + printf " nodataref: %u, %uMiB\n", $nodataref, $nodataref_b / 1048576 + printf " reading: %u, %uMiB\n", $reading, $reading_b / 1048576 + printf " writing: %u, %uMiB\n", $writing, $writing_b / 1048576 + printf " overwriting: %u, %uMiB\n", $overwriting, $overwriting_b / 1048576 + printf "\n cached: %u, %uMiB\n", _starpu_config.nodes[$node].mc_cache_nb, _starpu_config.nodes[$node].mc_cache_size / 1048576 + set $node = $node + 1 + end +end + +define starpu-print-model + set $model = (struct starpu_perfmodel *)$arg0 + printf "Model %p type %d symbol ", $model, $model->type + if $model->symbol + printf "%s", $model->symbol + else + printf "NULL" + end + printf "\n" +end + +define starpu-print-registered-models + set $node = registered_models._head + while $node + starpu-print-model $node->model + set $node = $node->_next + end +end + +define starpu-sched-data + print _starpu_config.sched_ctxs[$arg0]->policy_data +end + +define starpu-print-spaces + set $j = 0 + while $j < $arg0 + printf " " + set $j = $j + 1 + end +end + +define starpu-sched-print-component + set $c = (struct starpu_sched_component *) $arg1 + starpu-print-spaces $arg0 + printf "%s %c %c (struct starpu_sched_component *) %p\n", $c->name, $c->properties & STARPU_SCHED_COMPONENT_HOMOGENEOUS ? 'o':'e', $c->properties & STARPU_SCHED_COMPONENT_SINGLE_MEMORY_NODE ? 's' : 'm', $c + if $c->push_task == fifo_push_task + set $f = &((struct _starpu_fifo_data *) $c->data)->fifo + starpu-print-spaces $arg0 + printf "%d tasks start %f len %f end %f processed %d\n", $f->ntasks, $f->exp_start, $f->exp_len, $f->exp_end, $f->nprocessed + end + if $c->push_task == prio_push_task + set $q = &((struct _starpu_prio_data *) $c->data)->prio + starpu-print-spaces $arg0 + printf "%d tasks start %f len %f end %f processed %d\n", $q->ntasks, $q->exp_start, $q->exp_len, $q->exp_end, $q->nprocessed + end + if $c->push_task == simple_worker_push_task + set $d = (struct _starpu_worker_component_data *) $c->data + set $l = $d->list + starpu-print-spaces $arg0 + printf "%d tasks pipeline %f start %f len %f end %f\n", $l->ntasks, $l->pipeline_len, $l->exp_start, $l->exp_len, $l->exp_end + end +end + +define starpu-sched-print-recur-component + starpu-sched-print-component $arg0 $arg1 + set $i[$arg0] = 0 + while $i[$arg0] < $arg1->nchildren + starpu-sched-print-recur-component ($arg0+1) $arg1->children[$i[$arg0]] + set $i[$arg0] = $i[$arg0] + 1 + end +end + +define starpu-sched-print-modular + set $t = (struct starpu_sched_tree *) _starpu_config.sched_ctxs[$arg0]->policy_data + set $i = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0 } + starpu-sched-print-recur-component 0 $t->root +end + +define starpu-sched-print-lws + set $ws = (struct _starpu_work_stealing_data *) _starpu_config.sched_ctxs[$arg0]->policy_data + set $num = 0 + while $num < _starpu_config->topology->nworkers + printf "Worker %s:", _starpu_config->workers[$num].name + printf " %d tasks", $ws->per_worker[$num].queue.ntasks + if $ws->per_worker[$num].notask + printf " notask" + end + if $ws->per_worker[$num].running + printf " running" + end + if $ws->per_worker[$num].busy + printf " busy" + end + printf " lastpop %d", $ws->per_worker[$num].last_pop_worker + printf "\n" + set $num = $num + 1 + end +end + +define starpu-sched-print-dmda + set $dt = (struct _starpu_dmda_data *) _starpu_config.sched_ctxs[$arg0]->policy_data + set $num = 0 + printf "alpha %f beta %f gamma %f power %fW\n", $dt->alpha, $dt->beta, $dt->_gamma, $dt->idle_power + while $num < _starpu_config->topology->nworkers + printf "Worker %s:", _starpu_config->workers[$num].name + printf " %d tasks", $dt->queue_array[$num].ntasks + printf " start %f", $dt->queue_array[$num].exp_start + printf " len %f", $dt->queue_array[$num].exp_len + printf " end %f", $dt->queue_array[$num].exp_end + printf " piped %f", $dt->queue_array[$num].pipeline_len + printf "\n" + set $num = $num + 1 + end +end + +define starpu-mpi-print-request + set $request = (struct _starpu_mpi_req *)$arg0 + printf "Request (struct _starpu_mpi_req *) %p data %p tag %ld to MPI node %d type ", $request, $request->data_handle, $request->data_handle && $request->data_handle->mpi_data ? ((struct _starpu_mpi_data *) ($request->data_handle->mpi_data))->node_tag.data_tag : -1, $request->node_tag.node.rank, + if $request->request_type == SEND_REQ + printf "SEND_REQ" + end + if $request->request_type == RECV_REQ + printf "RECV_REQ" + end + if $request->request_type == WAIT_REQ + printf "WAIT_REQ" + end + if $request->request_type == TEST_REQ + printf "TEST_REQ" + end + if $request->request_type == BARRIER_REQ + printf "BARRIER_REQ" + end + if $request->request_type == PROBE_REQ + printf "PROBE_REQ" + end + if $request->request_type == UNKNOWN_REQ + printf "UNKNOWN_REQ" + end + printf " submitted %d completed %d posted %d detached %d\n", $request->submitted, $request->completed, $request->posted, $request->detached +end + +define starpu-mpi-print-ready-recv-requests + set $list = (struct _starpu_mpi_req_list) ready_recv_requests + if $list + set $request = $list.list._head + while $request + starpu-mpi-print-request $request + set $request = $request->_next + end + else + printf "No ready recv requests\n" + end +end + +define starpu-mpi-print-requests-list + set $list = $arg0 + set $request = $list._head + while $request + starpu-mpi-print-request $request + set $request = $request->_next + end +end + +define starpu-mpi-print-requests-tree + if $arg0 + starpu-mpi-print-requests-tree $arg0->children[0] + set $stage = (struct _starpu_mpi_req_prio_list_stage *) $arg0 + starpu-mpi-print-requests-list (&($stage->list)) + starpu-mpi-print-requests-tree $arg0->children[1] + end +end + +define starpu-mpi-print-ready-send-requests + set $prio_list = (struct _starpu_mpi_req_prio_list) ready_send_requests + if _starpu_debug + if $prio_list + starpu-mpi-print-requests-list $prio_list.list + else + printf "No ready send requests\n" + end + else + if $prio_list.empty == 0 + starpu-mpi-print-requests-tree $prio_list.tree.root + else + printf "No ready send requests\n" + end + end +end + +define starpu-mpi-print-detached-requests + set $list = (struct _starpu_mpi_req_list) detached_requests + if $list + set $request = $list._head + while $request + starpu-mpi-print-request $request + set $request = $request->_next + end + else + printf "No detached requests\n" + end +end + +define starpu-mpi-print-early-data + set $hash = (struct _starpu_mpi_early_data_handle_hashlist *)_starpu_mpi_early_data_handle_hashmap + if $hash + while $hash + printf "Communicator %p Rank %d Data_tag %d\n", $hash->node_tag->comm, $hash->node_tag->rank, $hash->node_tag->data_tag + set $list = (struct _starpu_mpi_early_data_handle_list *) $hash->list + if $list + set $data = (struct _starpu_mpi_early_data_handle *)$list->_head + while $data + starpu-mpi-print-request $data->req + set $data = $data->_next + end + end + set $hash = (struct _starpu_mpi_early_data_handle_hashlist *) $hash->hh.next + end + else + printf "No early data\n" + end +end + +define starpu-mpi-print-early-requests + set $hash = (struct _starpu_mpi_early_request_hashlist *)_starpu_mpi_early_request_hash + if $hash + while $hash + printf "Communicator %p Rank %d Data_tag %d\n", $hash->node_tag->comm, $hash->node_tag->rank, $hash->node_tag->data_tag + set $list = (struct _starpu_mpi_req_list*) $hash->list + if $list + set $request = $list->_head + while $request + starpu-mpi-print-request $request + set $request = $request->_next + end + end + set $hash = (struct _starpu_mpi_early_request_hashlist *) $hash->hh.next + end + else + printf "No early request\n" + end +end + +define starpu-mpi-print-sync-data + set $hash = (struct _starpu_mpi_sync_data_handle_hashlist *)_starpu_mpi_sync_data_handle_hashmap + if $hash + while $hash + printf "Communicator %p Rank %d Data_tag %d\n", $hash->node_tag->comm, $hash->node_tag->rank, $hash->node_tag->data_tag + set $list = (struct _starpu_mpi_req_list *) $hash->list + if $list + set $request = $list->_head + while $request + starpu-mpi-print-request $request + set $request = $request->_next + end + end + set $hash = (struct _starpu_mpi_sync_data_handle_hashlist *) $hash->hh.next + end + else + printf "No sync data\n" + end +end + +document starpu +List of StarPU-specific gdb functions: +starpu-workers prints a list of the StarPU workers +starpu-tasks-on-workers prints a list of the tasks queued on workers +starpu-tasks-on-worker prints a list of the tasks queued on the given worker +starpu-print-job prints a StarPU job +starpu-print-task prints a StarPU task +starpu-print-all-tasks prints all StarPU tasks +starpu-print-task-and-successor prints a StarPU task and its successors +starpu-print-data prints a StarPU data handle +starpu-print-datas prints all StarPU data handles +starpu-print-datas-summary prints a summary of data handles +starpu-print-replicate prints a StarPU data replicate +starpu-print-request prints a StarPU data request +starpu-print-prequests prints all pending StarPU data requests +starpu-print-requests prints all queued StarPU data requests +starpu-print-frequests prints all queued StarPU prefetch data requests +starpu-print-irequests prints all queued StarPU idle data requests +starpu-tasks prints a summary of the tasks flowing in StarPU +starpu-all-tasks prints a list of all the tasks flowing in StarPU +starpu-tags prints a list of the tags known to StarPU +starpu-print-tag prints a given tag +starpu-memusage prints the memory node usage +starpu-print-archs prints all known arch combinations +starpu-print-arch prints a given arch combination +starpu-print-registered-models prints all registered performance models +starpu-print-model prints a given performance model +starpu-sched-data prints the data of the given scheduler +starpu-sched-print-modular prints the hierarchy of modular scheduling components +starpu-sched-print-lws prints the state of the lws scheduler +starpu-sched-print-dmda prints the state of any of the dm* schedulers +starpu-mpi-print-ready-recv-requests prints all MPI ready recv requests +starpu-mpi-print-ready-send-requests prints all MPI ready send requests +starpu-mpi-print-detached-requests prints all MPI detached requests +starpu-mpi-print-early-data prints all MPI early received data +starpu-mpi-print-early-requests prints all MPI early requests +starpu-mpi-print-sync-data prints all MPI sync data +end diff --git a/tools/loader.c b/tools/loader.c new file mode 100644 index 0000000..804797d --- /dev/null +++ b/tools/loader.c @@ -0,0 +1,505 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if defined(_WIN32) && !defined(__MINGW32__) && !defined(__CYGWIN__) +#include +#else +#include +#endif + +#ifdef STARPU_QUICK_CHECK +/* Quick checks are supposed to be real quick, typically less than 1s each, sometimes 10s + add some extra times for tests which run with all schedulers +*/ +#define DEFAULT_TIMEOUT 100 +#elif !defined(STARPU_LONG_CHECK) +/* Normal checks are supposed to be short enough, typically less than 10s each, sometimes 1-2m */ +#define DEFAULT_TIMEOUT 300 +#else +/* Long checks can be very long */ +#define DEFAULT_TIMEOUT 1000 +#endif +#define AUTOTEST_SKIPPED_TEST 77 + +static pid_t child_pid = 0; +static int timeout; + +#if defined(_WIN32) && !defined(__MINGW32__) && !defined(__CYGWIN__) +static int mygettimeofday(struct timeval *tv, void *tz) +{ + if (tv) + { + FILETIME ft; + unsigned long long res; + GetSystemTimeAsFileTime(&ft); + /* 100-nanosecond intervals since January 1, 1601 */ + res = ft.dwHighDateTime; + res <<= 32; + res |= ft.dwLowDateTime; + res /= 10; + /* Now we have microseconds */ + res -= (((1970-1601)*365) + 89) * 24ULL * 3600ULL * 1000000ULL; + /* Now we are based on epoch */ + tv->tv_sec = res / 1000000ULL; + tv->tv_usec = res % 1000000ULL; + } +} +#else +#define mygettimeofday(tv,tz) gettimeofday(tv,tz) +#endif + +#ifdef STARPU_GDB_PATH +static int try_launch_gdb(const char *exe, const char *core) +{ +# define GDB_COMMANDS \ + "-ex", "py-list", \ + "-ex", "starpu-tasks", \ + "-ex", "starpu-workers", \ + "-ex", "starpu-print-datas-summary", \ + "-ex", "starpu-memusage", \ + "-ex", "starpu-print-archs", \ + "-ex", "starpu-print-registered-models", \ + "-ex", "bt full", \ + "-ex", "py-bt", \ + "-ex", "thread apply all bt full", \ + "-ex", "thread apply all py-bt", \ + + int err; + pid_t pid; + struct stat st; + const char *top_builddir; + char *gdb; + + err = stat(core, &st); + if (err != 0) + { + fprintf(stderr, "while looking for core file of %s: %s: %m\n", + exe, core); + return -1; + } + + if (!(st.st_mode & S_IFREG)) + { + fprintf(stderr, "%s: not a regular file\n", core); + return -1; + } + + top_builddir = getenv("top_builddir"); + + pid = fork(); + switch (pid) + { + case 0: /* kid */ + if (top_builddir != NULL) + { + /* Run gdb with Libtool. */ + gdb = alloca(strlen(top_builddir) + + sizeof("/libtool") + 1); + strcpy(gdb, top_builddir); + strcat(gdb, "/libtool"); + err = execl(gdb, "gdb", "--mode=execute", + STARPU_GDB_PATH, "--batch", + GDB_COMMANDS + exe, core, NULL); + } + else + { + /* Run gdb directly */ + gdb = STARPU_GDB_PATH; + err = execl(gdb, "gdb", "--batch", + GDB_COMMANDS + exe, core, NULL); + } + if (err != 0) + { + fprintf(stderr, "while launching `%s': %m\n", gdb); + exit(EXIT_FAILURE); + } + exit(EXIT_SUCCESS); + break; + + case -1: + fprintf(stderr, "fork: %m\n"); + return -1; + + default: /* parent */ + { + pid_t who; + int status; + who = waitpid(pid, &status, 0); + if (who != pid) + fprintf(stderr, "while waiting for gdb " + "process %d: %m\n", pid); + } + } + return 0; +# undef GDB_COMMANDS +} +#endif /* STARPU_GDB_PATH */ + +static void launch_gdb(const char *exe) +{ +#ifdef STARPU_GDB_PATH + char s[32]; + snprintf(s, sizeof(s), "core.%d", child_pid); + if (try_launch_gdb(exe, s) < 0) + try_launch_gdb(exe, "core"); +#endif /* STARPU_GDB_PATH */ +} + +static char *test_name; + +static void test_cleaner(int sig) +{ + pid_t child_gid; + int status; + (void) sig; + + // send signal to all loader family members + fprintf(stderr, "[error] test %s has been blocked for %d seconds. Mark it as failed\n", test_name, timeout); + child_gid = getpgid(child_pid); + kill(-child_gid, SIGQUIT); + waitpid(child_pid, &status, 0); + launch_gdb(test_name); + raise(SIGALRM); + exit(EXIT_FAILURE); +} + +static void forwardsig(int sig) +{ + pid_t child_gid; + child_gid = getpgid(child_pid); + kill(-child_gid, sig); +} + +static int _decode(char **src, char *motif, const char *value) +{ + char *found; + + found = strstr(*src, motif); + if (found == NULL) return 0; + + char *new_src = calloc(1, strlen(*src)-strlen(motif)+strlen(value)+1); + + strncpy(new_src, *src, found - *src); + strcat(new_src, value); + strcat(new_src, found+strlen(motif)); + + *src = new_src; + return 1; +} + +static void decode(char **src, char *motif, const char *value) +{ + if (*src) + { + if (strstr(*src, motif) && value == NULL) + { + fprintf(stderr, "error: $%s undefined\n", motif); + exit(EXIT_FAILURE); + } + int d = _decode(src, motif, value); + while (d) + d = _decode(src, motif, value); + } +} + +int main(int argc, char *argv[]) +{ + int child_exit_status; + char *test_args; + char *launcher; + char *launcher_args; + char *libtool; + char *cflags; + const char *top_builddir = getenv("top_builddir"); + struct sigaction sa; + int ret; + struct timeval start; + struct timeval end; + double timing; + int x=1; + int asan = 0, lsan = 0, tsan = 0, usan = 0; + + (void) argc; + test_args = NULL; + timeout = 0; + + launcher=getenv("STARPU_CHECK_LAUNCHER"); + launcher_args=getenv("STARPU_CHECK_LAUNCHER_ARGS"); + cflags = getenv("CFLAGS"); + if (cflags) + { + if (strstr(cflags, "-fsanitize=address")) + asan = 1; + if (strstr(cflags, "-fsanitize=leak")) + lsan = 1; + if (strstr(cflags, "-fsanitize=thread")) + tsan = 1; + if (strstr(cflags, "-fsanitize=undefined")) + usan = 1; + } + + if (argv[x] && strcmp(argv[x], "-t") == 0) + { + timeout = strtol(argv[x+1], NULL, 10); + x += 2; + } + else if (getenv("STARPU_TIMEOUT_ENV")) + { + /* get user-defined iter_max value */ + timeout = strtol(getenv("STARPU_TIMEOUT_ENV"), NULL, 10); + } + else if (timeout <= 0) + { + timeout = DEFAULT_TIMEOUT; + if ((launcher && strstr(launcher, "valgrind")) || + (launcher && strstr(launcher, "helgrind")) || + tsan) + timeout *= 20; + if (asan || usan || lsan || + (launcher && strstr(launcher, "compute-sanitizer"))) + timeout *= 5; + + if (timeout > 1750) + timeout = 1750; + } + +#ifdef STARPU_SIMGRID +#ifdef STARPU_DEBUG + timeout *= 20; +#endif +#endif + +#ifdef STARPU_USE_MPI_MASTER_SLAVE + /* compare values between the 2 values of timeout */ + if (getenv("MPIEXEC_TIMEOUT")) + { + int mpiexec_timeout = strtol(getenv("MPIEXEC_TIMEOUT"), NULL, 10); + if (mpiexec_timeout != timeout) + fprintf(stderr, "[warning] MPIEXEC_TIMEOUT and STARPU_TIMEOUT_ENV values are different (%d and %d). The behavior may be different than expected !\n", mpiexec_timeout, timeout); + } +#endif + + if (argv[x] && strcmp(argv[x], "-p") == 0) + { + test_name = malloc(strlen(argv[x+1]) + 1 + strlen(argv[x+2]) + 1); + sprintf(test_name, "%s/%s", argv[x+1], argv[x+2]); + x += 3; + } + else + { + test_name = argv[x]; + x += 1; + } + + if (!test_name) + { + fprintf(stderr, "[error] Need name of program to start\n"); + exit(EXIT_FAILURE); + } + + size_t len = strlen(test_name); + if (len >= 3 && + test_name[len-3] == '.' && + test_name[len-2] == 's' && + test_name[len-1] == 'h') + { + /* This is a shell script, don't run ourself on bash, but make + * the script call us for each program invocation */ + + char *launch = NULL; + if (top_builddir == NULL) + // this may fail if .libs is in the directory path + setenv("STARPU_LAUNCH", argv[0], 1); + else + { + launch = malloc(strlen(top_builddir) + strlen("/tests/loader") + 1); + strcpy(launch, top_builddir); + strcat(launch, "/tests/loader"); + setenv("STARPU_LAUNCH", launch, 1); + } + + execvp(test_name, argv+x-1); + + fprintf(stderr, "[error] '%s' failed to exec. test marked as failed\n", test_name); + free(launch); + exit(EXIT_FAILURE); + } + + if (strstr(test_name, "spmv/dw_block_spmv")) + { + test_args = (char *) calloc(512, sizeof(char)); + snprintf(test_args, 512, "%s/examples/spmv/matrix_market/examples/fidapm05.mtx", STARPU_SRC_DIR); + } + else if (strstr(test_name, "starpu_perfmodel_display")) + { + if (x >= argc) + test_args = strdup("-l"); + } + else if (strstr(test_name, "starpu_perfmodel_plot")) + { + if (x >= argc) + test_args = strdup("-l"); + } + + /* get launcher program */ + if (launcher_args) + launcher_args=strdup(launcher_args); + + if (top_builddir == NULL) + { + fprintf(stderr, + "warning: $top_builddir undefined, " + "so $STARPU_CHECK_LAUNCHER ignored\n"); + launcher = NULL; + launcher_args = NULL; + libtool = NULL; + } + else + { + libtool = malloc(strlen(top_builddir) + 1 + strlen("libtool") + 1); + strcpy(libtool, top_builddir); + strcat(libtool, "/libtool"); + } + + if (launcher) + { + const char *top_srcdir = getenv("top_srcdir"); + decode(&launcher, "@top_srcdir@", top_srcdir); + decode(&launcher_args, "@top_srcdir@", top_srcdir); + } + + setenv("STARPU_OPENCL_PROGRAM_DIR", STARPU_SRC_DIR, 1); + + /* set SIGALARM handler */ + sa.sa_flags = SA_RESETHAND | SA_NODEFER; + sigemptyset(&sa.sa_mask); + sa.sa_handler = test_cleaner; + if (-1 == sigaction(SIGALRM, &sa, NULL)) + perror("sigaction"); + + signal(SIGINT, forwardsig); + signal(SIGHUP, forwardsig); + signal(SIGPIPE, forwardsig); + signal(SIGTERM, forwardsig); + + child_pid = fork(); + if (child_pid == 0) + { + char *launcher_argv[100]; + int i=0; + + setpgid(0, 0); + + /* "Launchers" such as Valgrind need to be inserted + * after the Libtool-generated wrapper scripts, hence + * this special-case. */ + if (launcher && top_builddir != NULL) + { + launcher_argv[i++] = libtool; + launcher_argv[i++] = "--mode=execute"; + launcher_argv[i++] = launcher; + if (launcher_args) + { + launcher_argv[i++] = strtok(launcher_args, " "); + while (launcher_argv[i-1]) + { + launcher_argv[i++] = strtok(NULL, " "); + } + } + } + + launcher_argv[i++] = test_name; + if (test_args) + launcher_argv[i++] = test_args; + else while (argv[x]) + { + launcher_argv[i++] = argv[x++]; + } +#ifdef STARPU_SIMGRID +#ifdef STARPU_DEBUG + launcher_argv[i++] = "--cfg=contexts/factory:thread"; +#endif +#endif + launcher_argv[i++] = NULL; + execvp(*launcher_argv, launcher_argv); + + fprintf(stderr, "[error] '%s' failed to exec. test marked as failed\n", test_name); + exit(EXIT_FAILURE); + } + if (child_pid == -1) + { + fprintf(stderr, "[error] fork. test marked as failed\n"); + exit(EXIT_FAILURE); + } + free(test_args); + free(libtool); + + ret = EXIT_SUCCESS; + gettimeofday(&start, NULL); + alarm(timeout); + if (child_pid == waitpid(child_pid, &child_exit_status, 0)) + { + if (WIFEXITED(child_exit_status)) + { + int status = WEXITSTATUS(child_exit_status); + if (status == EXIT_SUCCESS) + { + alarm(0); + } + else + { + if (status != AUTOTEST_SKIPPED_TEST) + fprintf(stdout, "`%s' exited with return code %d\n", + test_name, status); + ret = status; + } + } + else if (WIFSIGNALED(child_exit_status)) + { + fprintf(stderr, "[error] `%s' killed with signal %d; test marked as failed\n", + test_name, WTERMSIG(child_exit_status)); + launch_gdb(test_name); + ret = EXIT_FAILURE; + } + else + { + fprintf(stderr, "[error] `%s' did not terminate normally; test marked as failed\n", + test_name); + ret = EXIT_FAILURE; + } + } + + gettimeofday(&end, NULL); + timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec)); + fprintf(stderr, "#Execution_time_in_seconds %f %s\n", timing/1000000, test_name); + + return ret; +} diff --git a/tools/msvc/starpu.sln b/tools/msvc/starpu.sln new file mode 100755 index 0000000..92a64cb --- /dev/null +++ b/tools/msvc/starpu.sln @@ -0,0 +1,20 @@ + +Microsoft Visual Studio Solution File, Format Version 11.00 +# Visual C++ Express 2010 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "starpu", "starpu\starpu.vcxproj", "{1A4DC8EB-1250-4DC5-B3CE-2E4BB4C51CA3}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Win32 = Debug|Win32 + Release|Win32 = Release|Win32 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {1A4DC8EB-1250-4DC5-B3CE-2E4BB4C51CA3}.Debug|Win32.ActiveCfg = Debug|Win32 + {1A4DC8EB-1250-4DC5-B3CE-2E4BB4C51CA3}.Debug|Win32.Build.0 = Debug|Win32 + {1A4DC8EB-1250-4DC5-B3CE-2E4BB4C51CA3}.Release|Win32.ActiveCfg = Release|Win32 + {1A4DC8EB-1250-4DC5-B3CE-2E4BB4C51CA3}.Release|Win32.Build.0 = Release|Win32 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection +EndGlobal diff --git a/tools/msvc/starpu/starpu.vcxproj b/tools/msvc/starpu/starpu.vcxproj new file mode 100755 index 0000000..fa4055c --- /dev/null +++ b/tools/msvc/starpu/starpu.vcxproj @@ -0,0 +1,83 @@ + + + + + Debug + Win32 + + + Release + Win32 + + + + {1A4DC8EB-1250-4DC5-B3CE-2E4BB4C51CA3} + Win32Proj + starpu + + + + Application + true + Unicode + + + Application + false + true + Unicode + + + + + + + + + + + + + true + + + false + + + + NotUsing + Level3 + Disabled + WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(STARPU_PATH)\include\starpu\1.3;%(AdditionalIncludeDirectories) + + + Console + true + $(STARPU_PATH)\lib;%(AdditionalLibraryDirectories) + libstarpu-1.3.lib;%(AdditionalDependencies) + + + + + Level3 + Use + MaxSpeed + true + true + WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + + + Console + true + true + true + + + + + + + + + diff --git a/tools/msvc/starpu_clean.bat b/tools/msvc/starpu_clean.bat new file mode 100644 index 0000000..8833ca4 --- /dev/null +++ b/tools/msvc/starpu_clean.bat @@ -0,0 +1,24 @@ +@ECHO OFF +REM StarPU --- Runtime system for heterogeneous multicore architectures. +REM +REM Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +REM +REM StarPU is free software; you can redistribute it and/or modify +REM it under the terms of the GNU Lesser General Public License as published by +REM the Free Software Foundation; either version 2.1 of the License, or (at +REM your option) any later version. +REM +REM StarPU is distributed in the hope that it will be useful, but +REM WITHOUT ANY WARRANTY; without even the implied warranty of +REM MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +REM +REM See the GNU Lesser General Public License in COPYING.LGPL for more details. +REM +TITLE MSVC StarPU Cleaning +ECHO. +ECHO MSVC StarPU Cleaning +ECHO. + +FOR %%d in (debug starpu\debug ipch) DO IF EXIST %%d RMDIR /S /Q %%d +FOR %%f in (starpu.sdf starpu.suo) DO IF EXIST %%f DEL %%f + diff --git a/tools/msvc/starpu_exec.bat b/tools/msvc/starpu_exec.bat new file mode 100644 index 0000000..65ed529 --- /dev/null +++ b/tools/msvc/starpu_exec.bat @@ -0,0 +1,46 @@ +@ECHO OFF +REM StarPU --- Runtime system for heterogeneous multicore architectures. +REM +REM Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +REM +REM StarPU is free software; you can redistribute it and/or modify +REM it under the terms of the GNU Lesser General Public License as published by +REM the Free Software Foundation; either version 2.1 of the License, or (at +REM your option) any later version. +REM +REM StarPU is distributed in the hope that it will be useful, but +REM WITHOUT ANY WARRANTY; without even the implied warranty of +REM MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +REM +REM See the GNU Lesser General Public License in COPYING.LGPL for more details. +REM +TITLE MSVC StarPU Execution +ECHO. +ECHO MSVC StarPU Execution + +IF "%1" == "" GOTO invalidparam +IF NOT EXIST %1 GOTO invalidparam + +call .\starpu_var.bat + +mkdir starpu +FOR %%F IN (%STARPU_PATH%\bin\*dll) DO COPY %%F starpu\%%~nF +FOR %%F IN (%HWLOC%\bin\*dll) DO COPY %%F starpu + +set STARPU_OLDPATH=%PATH% +call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Auxiliary\Build\vcvarsall.bat" x86 +cl %1 %STARPU_CFLAGS% %STARPU_LDFLAGS% + +set PATH=starpu;c:\MinGW\bin;%PATH% +.\%~n1.exe + +set PATH=%STARPU_OLDPATH% +GOTO end + +:invalidparam + ECHO. + ECHO Syntax error. You need to give the name of a StarPU application + EXIT /B 2 + GOTO end + +:end diff --git a/tools/msvc/starpu_open.bat b/tools/msvc/starpu_open.bat new file mode 100644 index 0000000..2019afa --- /dev/null +++ b/tools/msvc/starpu_open.bat @@ -0,0 +1,50 @@ +@ECHO OFF +REM StarPU --- Runtime system for heterogeneous multicore architectures. +REM +REM Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +REM +REM StarPU is free software; you can redistribute it and/or modify +REM it under the terms of the GNU Lesser General Public License as published by +REM the Free Software Foundation; either version 2.1 of the License, or (at +REM your option) any later version. +REM +REM StarPU is distributed in the hope that it will be useful, but +REM WITHOUT ANY WARRANTY; without even the implied warranty of +REM MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +REM +REM See the GNU Lesser General Public License in COPYING.LGPL for more details. +REM +TITLE MSVC StarPU Execution +ECHO. +ECHO MSVC StarPU Execution + +IF NOT EXIST %STARPU_PATH%\AUTHORS GOTO starpunotfound + +ECHO. +ECHO %STARPU_PATH% + +IF "%1" == "" GOTO invalidparam +IF NOT EXIST %1 GOTO invalidparam + +COPY %1 starpu\starpu_appli.c +FOR %%F IN (%STARPU_PATH%\bin\*dll) DO COPY %%F starpu\%%~nF +FOR %%F IN (%STARPU_PATH%\bin\*dll) DO COPY %%F starpu +COPY c:\MinGW\bin\pthreadGC2.dll starpu +IF EXIST Debug RMDIR /S /Q Debug +IF EXIST starpu\Debug RMDIR /S /Q starpu\Debug + +"C:\Program Files (x86)\Microsoft Visual Studio 10.0\Common7\IDE\VCExpress.exe" starpu.sln + +GOTO end + +:invalidparam + ECHO. + ECHO Syntax error. You need to give the name of a StarPU application + GOTO end + +:starpunotfound + ECHO. + ECHO You need to set the variable STARPU_PATH to a valid StarPU installation directory + GOTO end + +:end diff --git a/tools/msvc/starpu_var.bat b/tools/msvc/starpu_var.bat new file mode 100644 index 0000000..cec858f --- /dev/null +++ b/tools/msvc/starpu_var.bat @@ -0,0 +1,39 @@ +@ECHO OFF +REM StarPU --- Runtime system for heterogeneous multicore architectures. +REM +REM Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +REM +REM StarPU is free software; you can redistribute it and/or modify +REM it under the terms of the GNU Lesser General Public License as published by +REM the Free Software Foundation; either version 2.1 of the License, or (at +REM your option) any later version. +REM +REM StarPU is distributed in the hope that it will be useful, but +REM WITHOUT ANY WARRANTY; without even the implied warranty of +REM MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +REM +REM See the GNU Lesser General Public License in COPYING.LGPL for more details. +REM +TITLE MSVC StarPU Environment +ECHO. +ECHO MSVC StarPU Environment + +IF NOT EXIST %STARPU_PATH%\AUTHORS GOTO starpunotfound + +ECHO. +ECHO Setting environment from %STARPU_PATH% + +set STARPU_LIBDIR=%STARPU_PATH%/lib +set STARPU_INCLUDEDIR=%STARPU_PATH%/include +set STARPU_CFLAGS=/I%STARPU_INCLUDEDIR%\starpu\1.4 /I%HWLOC%\include +set STARPU_LDFLAGS=/link %STARPU_PATH%\lib\libstarpu-1.4.lib + +GOTO end + +:starpunotfound + ECHO. + ECHO You need to set the variable STARPU_PATH to a valid StarPU installation directory + exit /B 1 + GOTO end + +:end diff --git a/tools/msvc/starpu_var.bat.in b/tools/msvc/starpu_var.bat.in new file mode 100644 index 0000000..b48068a --- /dev/null +++ b/tools/msvc/starpu_var.bat.in @@ -0,0 +1,39 @@ +@ECHO OFF +REM StarPU --- Runtime system for heterogeneous multicore architectures. +REM +REM Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +REM +REM StarPU is free software; you can redistribute it and/or modify +REM it under the terms of the GNU Lesser General Public License as published by +REM the Free Software Foundation; either version 2.1 of the License, or (at +REM your option) any later version. +REM +REM StarPU is distributed in the hope that it will be useful, but +REM WITHOUT ANY WARRANTY; without even the implied warranty of +REM MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +REM +REM See the GNU Lesser General Public License in COPYING.LGPL for more details. +REM +TITLE MSVC StarPU Environment +ECHO. +ECHO MSVC StarPU Environment + +IF NOT EXIST %STARPU_PATH%\AUTHORS GOTO starpunotfound + +ECHO. +ECHO Setting environment from %STARPU_PATH% + +set STARPU_LIBDIR=%STARPU_PATH%/lib +set STARPU_INCLUDEDIR=%STARPU_PATH%/include +set STARPU_CFLAGS=/I%STARPU_INCLUDEDIR%\starpu\@STARPU_EFFECTIVE_VERSION@ @STARPU_CUDA_CPPFLAGS@ /I%HWLOC%\include +set STARPU_LDFLAGS=/link %STARPU_PATH%\lib\libstarpu-@STARPU_EFFECTIVE_VERSION@.lib + +GOTO end + +:starpunotfound + ECHO. + ECHO You need to set the variable STARPU_PATH to a valid StarPU installation directory + exit /B 1 + GOTO end + +:end diff --git a/tools/patch-ayudame b/tools/patch-ayudame new file mode 100644 index 0000000..8e8f1c2 --- /dev/null +++ b/tools/patch-ayudame @@ -0,0 +1,48 @@ +--- Ayudame.h 2012-10-31 10:29:19.000000000 -0600 ++++ Ayudame.h 2012-11-16 17:34:04.963495624 -0700 +@@ -14,6 +14,7 @@ + + #include + #include ++#include + + //#define _REENTRANT // defined by default by gcc + +@@ -68,7 +69,7 @@ + // --- + + +-const size_t AYU_buf_size=8; // size of message buffer ++#define AYU_buf_size ((size_t)8) // size of message buffer + extern unsigned long AYU_n_threads; // current number of threads + extern unsigned long AYU_max_threads; // maximum number of threads + +@@ -202,7 +203,9 @@ + }; + /******/ + ++#ifdef __cplusplus + extern "C" { ++#endif + + /****f* AYUDAME/AYU_event + * NAME +@@ -224,7 +227,7 @@ + * void + * SOURCE + */ +- void AYU_event(ayu_event_t event, const int64_t taskId, void *p) ++ void AYU_event(enum ayu_event_t event, const int64_t taskId, void *p) + __attribute__ ((weak)); + /******/ + +@@ -342,7 +345,9 @@ + void AYU_registerTask(void*) __attribute__ ((weak)); + /******/ + ++#ifdef __cplusplus + } ++#endif + + #ifndef AYU_MASTER_TASKID + #define AYU_MASTER_TASKID 0 diff --git a/tools/perfmodels/README b/tools/perfmodels/README new file mode 100644 index 0000000..64c80c3 --- /dev/null +++ b/tools/perfmodels/README @@ -0,0 +1,54 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2014-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +This directory contains performance models for given architectures and +examples. + +The architecture 'mirage' is composed of: +- 2 Intel Xeon X5650 @2.67GHz, thus 12 CPU cores + - MKL 11.1.3 +- 3 NVidia GF100 Tesla M2070, thus 3 GPUs + - CUDA 6.0 + - Magma 1.6.0 + +The architecture 'attila' is composed of: +- 2 Intel Xeon X5650 @2.67GHz, thus 12 CPU cores + - OpenBlas 0.2.12-1 +- 3 NVidia GF100 Tesla C2050, thus 3 GPUs + - CUDA 6.0 + +The architecture 'idgraf' is composed of: +- 2 Intel Xeon X5650 @2.67GHz, thus 12 CPU cores + - MKL 11.1.0 +- 8 NVIDIA GF100 Tesla C2050, thus 8 GPUs + - CUDA 6.0 + +The architecture 'sirocco' is composed of: +- 2 Intel Xeon E5-2680 @2.50GHz, thus 24 CPU cores + - MKL 11.2 +- 4 NVIDIA GK110BGL Tesla K40m, thus 4 GPUs + - CUDA 6.5 + +To use performance models stored in this directory, one needs to set +the environment variable 'STARPU_PERF_MODEL_DIR' to the location of +the directory, e.g.: + +export STARPU_PERF_MODEL_DIR=.../tools/perfmodels/sampling + +and then select the desired architecture: + +export STARPU_HOSTNAME=mirage + diff --git a/tools/perfmodels/cluster.xml b/tools/perfmodels/cluster.xml new file mode 100644 index 0000000..830cb7f --- /dev/null +++ b/tools/perfmodels/cluster.xml @@ -0,0 +1,11 @@ + + + + + + + diff --git a/tools/perfmodels/hostfile b/tools/perfmodels/hostfile new file mode 100644 index 0000000..a7a0c62 --- /dev/null +++ b/tools/perfmodels/hostfile @@ -0,0 +1,4 @@ +mirage01.plafrim.cluster +mirage02.plafrim.cluster +mirage03.plafrim.cluster +mirage04.plafrim.cluster diff --git a/tools/perfmodels/sampling/bus/attila.affinity b/tools/perfmodels/sampling/bus/attila.affinity new file mode 100644 index 0000000..df5b219 --- /dev/null +++ b/tools/perfmodels/sampling/bus/attila.affinity @@ -0,0 +1,7 @@ +# GPU CPU0 CPU1 CPU2 CPU3 CPU4 CPU5 CPU6 CPU7 CPU8 CPU9 CPU10 CPU11 +0 0 1 2 3 4 5 6 7 8 9 10 11 +1 0 1 2 3 4 5 6 7 8 9 10 11 +2 0 1 2 3 4 5 6 7 8 9 10 11 +0 0 1 2 3 4 5 6 7 8 9 10 11 +1 0 1 2 3 4 5 6 7 8 9 10 11 +2 0 1 2 3 4 5 6 7 8 9 10 11 diff --git a/tools/perfmodels/sampling/bus/attila.bandwidth b/tools/perfmodels/sampling/bus/attila.bandwidth new file mode 100644 index 0000000..39ce459 --- /dev/null +++ b/tools/perfmodels/sampling/bus/attila.bandwidth @@ -0,0 +1,33 @@ +# to 0 to 1 to 2 to 3 to 4 to 5 to 6 to 7 to 8 to 9 to 10 to 11 to 12 to 13 to 14 to 15 to 16 to 17 to 18 to 19 to 20 to 21 to 22 to 23 to 24 to 25 to 26 to 27 to 28 to 29 to 30 to 31 +0.000000 6008.942513 6015.054829 5191.265149 3549.286897 4151.586883 4150.380415 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +6526.606701 0.000000 5296.854371 3827.552062 2299.031785 2537.486754 2537.035995 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +6526.729932 5297.158728 0.000000 3836.700007 2299.047076 2537.505381 2537.054615 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +4533.335723 3081.579529 3082.603074 0.000000 1990.703988 2167.035675 2166.706914 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +3931.561374 2376.592429 2377.547977 2237.220832 0.000000 2019.289776 2019.004313 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +4431.559719 2550.546612 2551.647193 2390.711859 1970.828109 0.000000 2143.181889 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +4121.971523 2444.862313 2445.873558 2297.616593 1907.126408 2068.363099 0.000000 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan diff --git a/tools/perfmodels/sampling/bus/attila.config b/tools/perfmodels/sampling/bus/attila.config new file mode 100644 index 0000000..2e898b5 --- /dev/null +++ b/tools/perfmodels/sampling/bus/attila.config @@ -0,0 +1,5 @@ +# Current configuration +12 # Number of CPUs +3 # Number of CUDA devices +3 # Number of OpenCL devices +0 # Number of MIC devices diff --git a/tools/perfmodels/sampling/bus/attila.latency b/tools/perfmodels/sampling/bus/attila.latency new file mode 100644 index 0000000..b84310e --- /dev/null +++ b/tools/perfmodels/sampling/bus/attila.latency @@ -0,0 +1,33 @@ +# to 0 to 1 to 2 to 3 to 4 to 5 to 6 to 7 to 8 to 9 to 10 to 11 to 12 to 13 to 14 to 15 to 16 to 17 to 18 to 19 to 20 to 21 to 22 to 23 to 24 to 25 to 26 to 27 to 28 to 29 to 30 to 31 +0.000000 9.500836 9.473047 10.237367 9.863812 9.678141 10.473812 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +10.286836 0.000000 14.883266 24.716781 20.150648 19.964977 20.760648 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +10.005258 14.426969 0.000000 24.677023 19.869070 19.683398 20.479070 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +11.149320 20.650156 20.622367 0.000000 21.013133 20.827461 21.623133 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +14.257031 23.757867 23.730078 24.494398 0.000000 23.935172 24.730844 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +13.983570 23.484406 23.456617 24.220938 23.847383 0.000000 24.457383 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +14.954109 24.454945 24.427156 25.191477 24.817922 24.632250 0.000000 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan diff --git a/tools/perfmodels/sampling/bus/attila.platform.v4.xml b/tools/perfmodels/sampling/bus/attila.platform.v4.xml new file mode 100644 index 0000000..48d2a19 --- /dev/null +++ b/tools/perfmodels/sampling/bus/attila.platform.v4.xml @@ -0,0 +1,277 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tools/perfmodels/sampling/bus/attila.platform.xml b/tools/perfmodels/sampling/bus/attila.platform.xml new file mode 100644 index 0000000..aefdf7b --- /dev/null +++ b/tools/perfmodels/sampling/bus/attila.platform.xml @@ -0,0 +1,275 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tools/perfmodels/sampling/bus/hannibal-pitch.affinity b/tools/perfmodels/sampling/bus/hannibal-pitch.affinity new file mode 100644 index 0000000..bf5296e --- /dev/null +++ b/tools/perfmodels/sampling/bus/hannibal-pitch.affinity @@ -0,0 +1,7 @@ +# GPU CPU0 CPU1 CPU2 CPU3 CPU4 CPU5 CPU6 CPU7 +0 0 1 2 3 4 5 6 7 +1 4 5 6 7 0 1 2 3 +2 4 5 6 7 0 1 2 3 +0 0 1 2 3 4 5 6 7 +1 4 5 6 7 0 1 2 3 +2 4 5 6 7 0 1 2 3 diff --git a/tools/perfmodels/sampling/bus/hannibal-pitch.bandwidth b/tools/perfmodels/sampling/bus/hannibal-pitch.bandwidth new file mode 100644 index 0000000..444a274 --- /dev/null +++ b/tools/perfmodels/sampling/bus/hannibal-pitch.bandwidth @@ -0,0 +1,17 @@ +# to 0 to 1 to 2 to 3 to 4 to 5 to 6 to 7 to 8 to 9 to 10 to 11 to 12 to 13 to 14 to 15 +0.000000 5988.779905 3149.675860 5988.971975 3975.378655 2636.838726 3992.447567 nan nan nan nan nan nan nan nan nan +3599.738919 0.000000 1679.850942 2248.345554 1889.122528 1521.977521 1892.968372 nan nan nan nan nan nan nan nan nan +3352.127736 2149.165370 0.000000 2149.190105 1818.623736 1475.884075 1822.187624 nan nan nan nan nan nan nan nan nan +3554.530216 2230.599117 1669.939421 0.000000 1876.596887 1513.836926 1880.391850 nan nan nan nan nan nan nan nan nan +2937.163572 1970.662958 1519.854976 1970.683755 0.000000 1389.455231 1692.226493 nan nan nan nan nan nan nan nan nan +2610.203571 1817.881699 1427.338068 1817.899396 1575.646193 0.000000 1578.320689 nan nan nan nan nan nan nan nan nan +2812.550617 1913.772761 1485.791058 1913.792375 1647.181820 1360.930908 0.000000 nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan diff --git a/tools/perfmodels/sampling/bus/hannibal-pitch.config b/tools/perfmodels/sampling/bus/hannibal-pitch.config new file mode 100644 index 0000000..39e3554 --- /dev/null +++ b/tools/perfmodels/sampling/bus/hannibal-pitch.config @@ -0,0 +1,4 @@ +# Current configuration +8 # Number of CPUs +3 # Number of CUDA devices +3 # Number of OpenCL devices diff --git a/tools/perfmodels/sampling/bus/hannibal-pitch.latency b/tools/perfmodels/sampling/bus/hannibal-pitch.latency new file mode 100644 index 0000000..91d00c3 --- /dev/null +++ b/tools/perfmodels/sampling/bus/hannibal-pitch.latency @@ -0,0 +1,17 @@ +# to 0 to 1 to 2 to 3 to 4 to 5 to 6 to 7 to 8 to 9 to 10 to 11 to 12 to 13 to 14 to 15 +0.000000 12.460938 12.570312 12.468750 20.000000 20.328125 19.593750 nan nan nan nan nan nan nan nan nan +12.476562 0.000000 25.046875 24.945312 32.476562 32.804688 32.070312 nan nan nan nan nan nan nan nan nan +12.593750 25.054688 0.000000 25.062500 32.593750 32.921875 32.187500 nan nan nan nan nan nan nan nan nan +12.539062 25.000000 25.109375 0.000000 32.539062 32.867188 32.132812 nan nan nan nan nan nan nan nan nan +63.601562 76.062500 76.171875 76.070312 0.000000 83.929688 83.195312 nan nan nan nan nan nan nan nan nan +35.992188 48.453125 48.562500 48.460938 55.992188 0.000000 55.585938 nan nan nan nan nan nan nan nan nan +36.765625 49.226562 49.335938 49.234375 56.765625 57.093750 0.000000 nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan diff --git a/tools/perfmodels/sampling/bus/hannibal-pitch.platform.v4.xml b/tools/perfmodels/sampling/bus/hannibal-pitch.platform.v4.xml new file mode 100644 index 0000000..cc8de7a --- /dev/null +++ b/tools/perfmodels/sampling/bus/hannibal-pitch.platform.v4.xml @@ -0,0 +1,114 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tools/perfmodels/sampling/bus/hannibal-pitch.platform.xml b/tools/perfmodels/sampling/bus/hannibal-pitch.platform.xml new file mode 100644 index 0000000..8dab880 --- /dev/null +++ b/tools/perfmodels/sampling/bus/hannibal-pitch.platform.xml @@ -0,0 +1,114 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tools/perfmodels/sampling/bus/hannibal.affinity b/tools/perfmodels/sampling/bus/hannibal.affinity new file mode 100644 index 0000000..bf5296e --- /dev/null +++ b/tools/perfmodels/sampling/bus/hannibal.affinity @@ -0,0 +1,7 @@ +# GPU CPU0 CPU1 CPU2 CPU3 CPU4 CPU5 CPU6 CPU7 +0 0 1 2 3 4 5 6 7 +1 4 5 6 7 0 1 2 3 +2 4 5 6 7 0 1 2 3 +0 0 1 2 3 4 5 6 7 +1 4 5 6 7 0 1 2 3 +2 4 5 6 7 0 1 2 3 diff --git a/tools/perfmodels/sampling/bus/hannibal.bandwidth b/tools/perfmodels/sampling/bus/hannibal.bandwidth new file mode 100644 index 0000000..444a274 --- /dev/null +++ b/tools/perfmodels/sampling/bus/hannibal.bandwidth @@ -0,0 +1,17 @@ +# to 0 to 1 to 2 to 3 to 4 to 5 to 6 to 7 to 8 to 9 to 10 to 11 to 12 to 13 to 14 to 15 +0.000000 5988.779905 3149.675860 5988.971975 3975.378655 2636.838726 3992.447567 nan nan nan nan nan nan nan nan nan +3599.738919 0.000000 1679.850942 2248.345554 1889.122528 1521.977521 1892.968372 nan nan nan nan nan nan nan nan nan +3352.127736 2149.165370 0.000000 2149.190105 1818.623736 1475.884075 1822.187624 nan nan nan nan nan nan nan nan nan +3554.530216 2230.599117 1669.939421 0.000000 1876.596887 1513.836926 1880.391850 nan nan nan nan nan nan nan nan nan +2937.163572 1970.662958 1519.854976 1970.683755 0.000000 1389.455231 1692.226493 nan nan nan nan nan nan nan nan nan +2610.203571 1817.881699 1427.338068 1817.899396 1575.646193 0.000000 1578.320689 nan nan nan nan nan nan nan nan nan +2812.550617 1913.772761 1485.791058 1913.792375 1647.181820 1360.930908 0.000000 nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan diff --git a/tools/perfmodels/sampling/bus/hannibal.config b/tools/perfmodels/sampling/bus/hannibal.config new file mode 100644 index 0000000..39e3554 --- /dev/null +++ b/tools/perfmodels/sampling/bus/hannibal.config @@ -0,0 +1,4 @@ +# Current configuration +8 # Number of CPUs +3 # Number of CUDA devices +3 # Number of OpenCL devices diff --git a/tools/perfmodels/sampling/bus/hannibal.latency b/tools/perfmodels/sampling/bus/hannibal.latency new file mode 100644 index 0000000..91d00c3 --- /dev/null +++ b/tools/perfmodels/sampling/bus/hannibal.latency @@ -0,0 +1,17 @@ +# to 0 to 1 to 2 to 3 to 4 to 5 to 6 to 7 to 8 to 9 to 10 to 11 to 12 to 13 to 14 to 15 +0.000000 12.460938 12.570312 12.468750 20.000000 20.328125 19.593750 nan nan nan nan nan nan nan nan nan +12.476562 0.000000 25.046875 24.945312 32.476562 32.804688 32.070312 nan nan nan nan nan nan nan nan nan +12.593750 25.054688 0.000000 25.062500 32.593750 32.921875 32.187500 nan nan nan nan nan nan nan nan nan +12.539062 25.000000 25.109375 0.000000 32.539062 32.867188 32.132812 nan nan nan nan nan nan nan nan nan +63.601562 76.062500 76.171875 76.070312 0.000000 83.929688 83.195312 nan nan nan nan nan nan nan nan nan +35.992188 48.453125 48.562500 48.460938 55.992188 0.000000 55.585938 nan nan nan nan nan nan nan nan nan +36.765625 49.226562 49.335938 49.234375 56.765625 57.093750 0.000000 nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan diff --git a/tools/perfmodels/sampling/bus/hannibal.platform.v4.xml b/tools/perfmodels/sampling/bus/hannibal.platform.v4.xml new file mode 100644 index 0000000..8d28603 --- /dev/null +++ b/tools/perfmodels/sampling/bus/hannibal.platform.v4.xml @@ -0,0 +1,114 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tools/perfmodels/sampling/bus/hannibal.platform.xml b/tools/perfmodels/sampling/bus/hannibal.platform.xml new file mode 100644 index 0000000..3ce9e34 --- /dev/null +++ b/tools/perfmodels/sampling/bus/hannibal.platform.xml @@ -0,0 +1,114 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tools/perfmodels/sampling/bus/idgraf.affinity b/tools/perfmodels/sampling/bus/idgraf.affinity new file mode 100644 index 0000000..d7a32f6 --- /dev/null +++ b/tools/perfmodels/sampling/bus/idgraf.affinity @@ -0,0 +1,9 @@ +# GPU CPU0 CPU1 CPU2 CPU3 CPU4 CPU5 CPU6 CPU7 CPU8 CPU9 CPU10 CPU11 +0 0 1 2 3 4 5 6 7 8 9 10 11 +1 0 1 2 3 4 5 6 7 8 9 10 11 +2 0 1 2 3 4 5 6 7 8 9 10 11 +3 0 1 2 3 4 5 6 7 8 9 10 11 +4 6 7 8 9 10 11 0 1 2 3 4 5 +5 6 7 8 9 10 11 0 1 2 3 4 5 +6 6 7 8 9 10 11 0 1 2 3 4 5 +7 6 7 8 9 10 11 0 1 2 3 4 5 diff --git a/tools/perfmodels/sampling/bus/idgraf.bandwidth b/tools/perfmodels/sampling/bus/idgraf.bandwidth new file mode 100644 index 0000000..ba08fe1 --- /dev/null +++ b/tools/perfmodels/sampling/bus/idgraf.bandwidth @@ -0,0 +1,17 @@ +# to 0 to 1 to 2 to 3 to 4 to 5 to 6 to 7 to 8 to 9 to 10 to 11 to 12 to 13 to 14 to 15 +0.000000 5985.150529 5982.867814 6004.601450 6002.864228 6017.068835 6017.279500 6014.519783 6025.792854 nan nan nan nan nan nan nan +6518.853316 0.000000 6634.627873 5290.661415 5290.656130 3070.759552 3077.363038 3081.101044 3071.316544 nan nan nan nan nan nan nan +6507.263283 6634.402756 0.000000 5290.497370 5290.566592 3070.780185 3077.514743 3081.028777 3071.425106 nan nan nan nan nan nan nan +6468.888505 5290.537572 5290.537715 0.000000 6634.508569 3071.001714 3077.753708 3081.299378 3071.319606 nan nan nan nan nan nan nan +6379.975977 5290.504747 5290.535878 6634.456149 0.000000 3070.823130 3077.536005 3081.267119 3071.334077 nan nan nan nan nan nan nan +6520.767791 3803.989690 3804.097536 3799.684659 3800.145340 0.000000 6635.277188 5293.782380 5293.769441 nan nan nan nan nan nan nan +6520.979807 3803.774735 3804.546566 3799.981880 3800.522991 6635.252627 0.000000 5293.483381 5293.507488 nan nan nan nan nan nan nan +6520.981045 3803.433709 3804.330189 3799.708364 3800.220748 5293.757566 5293.607121 0.000000 6635.079661 nan nan nan nan nan nan nan +6518.969813 3803.670471 3803.908300 3799.582824 3800.130361 5293.416171 5293.465355 6635.049331 0.000000 nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan diff --git a/tools/perfmodels/sampling/bus/idgraf.config b/tools/perfmodels/sampling/bus/idgraf.config new file mode 100644 index 0000000..d3daa16 --- /dev/null +++ b/tools/perfmodels/sampling/bus/idgraf.config @@ -0,0 +1,5 @@ +# Current configuration +12 # Number of CPUs +8 # Number of CUDA devices +0 # Number of OpenCL devices +0 # Number of MIC devices diff --git a/tools/perfmodels/sampling/bus/idgraf.latency b/tools/perfmodels/sampling/bus/idgraf.latency new file mode 100644 index 0000000..4539bf3 --- /dev/null +++ b/tools/perfmodels/sampling/bus/idgraf.latency @@ -0,0 +1,17 @@ +# to 0 to 1 to 2 to 3 to 4 to 5 to 6 to 7 to 8 to 9 to 10 to 11 to 12 to 13 to 14 to 15 +0.000000 11.018609 10.962211 10.954469 11.007844 11.005922 11.098250 11.109961 11.063805 nan nan nan nan nan nan nan +11.927148 0.000000 18.151891 18.879328 18.799250 28.088742 27.684023 27.682789 27.425391 nan nan nan nan nan nan nan +11.955969 18.258547 0.000000 18.841516 18.742984 27.801375 27.632695 27.804492 27.925742 nan nan nan nan nan nan nan +11.869312 18.796500 18.733070 0.000000 18.166664 27.588664 27.297711 27.356961 27.379555 nan nan nan nan nan nan nan +11.895391 18.823414 18.725125 18.160563 0.000000 27.561664 27.485000 27.345914 27.047594 nan nan nan nan nan nan nan +12.104258 27.886172 28.017508 27.861555 27.938109 0.000000 17.586773 18.179898 18.207852 nan nan nan nan nan nan nan +12.097562 28.033187 27.980211 27.902445 28.026523 17.499156 0.000000 18.177344 18.203297 nan nan nan nan nan nan nan +12.132922 27.677352 27.910406 27.592461 27.664289 18.172375 18.238891 0.000000 17.600648 nan nan nan nan nan nan nan +12.117094 23.135703 23.079305 23.071563 23.124938 23.123016 23.215344 23.227055 0.000000 nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan diff --git a/tools/perfmodels/sampling/bus/idgraf.platform.v4.xml b/tools/perfmodels/sampling/bus/idgraf.platform.v4.xml new file mode 100644 index 0000000..f2c68ec --- /dev/null +++ b/tools/perfmodels/sampling/bus/idgraf.platform.v4.xml @@ -0,0 +1,1534 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tools/perfmodels/sampling/bus/idgraf.platform.xml b/tools/perfmodels/sampling/bus/idgraf.platform.xml new file mode 100644 index 0000000..a7a6846 --- /dev/null +++ b/tools/perfmodels/sampling/bus/idgraf.platform.xml @@ -0,0 +1,1532 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tools/perfmodels/sampling/bus/mirage.affinity b/tools/perfmodels/sampling/bus/mirage.affinity new file mode 100644 index 0000000..93f9f9d --- /dev/null +++ b/tools/perfmodels/sampling/bus/mirage.affinity @@ -0,0 +1,7 @@ +# GPU CPU0 CPU1 CPU2 CPU3 CPU4 CPU5 CPU6 CPU7 CPU8 CPU9 CPU10 CPU11 +0 0 1 2 3 4 5 6 7 8 9 10 11 +1 6 7 8 9 10 11 0 1 2 3 4 5 +2 6 7 8 9 10 11 0 1 2 3 4 5 +0 0 1 2 3 4 5 6 7 8 9 10 11 +1 6 7 8 9 10 11 0 1 2 3 4 5 +2 6 7 8 9 10 11 0 1 2 3 4 5 diff --git a/tools/perfmodels/sampling/bus/mirage.bandwidth b/tools/perfmodels/sampling/bus/mirage.bandwidth new file mode 100644 index 0000000..2780f56 --- /dev/null +++ b/tools/perfmodels/sampling/bus/mirage.bandwidth @@ -0,0 +1,33 @@ +# to 0 to 1 to 2 to 3 to 4 to 5 to 6 to 7 to 8 to 9 to 10 to 11 to 12 to 13 to 14 to 15 to 16 to 17 to 18 to 19 to 20 to 21 to 22 to 23 to 24 to 25 to 26 to 27 to 28 to 29 to 30 to 31 +0.000000 6030.996807 6011.099701 6023.264949 4533.752864 4530.361672 4457.700383 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +6517.591026 0.000000 3074.666060 3073.669260 2673.805763 2672.625905 2647.170533 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +6517.940403 3834.443072 0.000000 5296.205823 2673.864562 2672.684652 2647.228166 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +6517.196551 3831.214832 5296.294945 0.000000 2673.739370 2672.559571 2647.105457 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +4380.832064 2537.573804 2534.044575 2536.203977 0.000000 2227.171158 2209.465982 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +4385.089408 2539.001663 2535.468464 2537.630294 2229.091070 0.000000 2210.548390 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +4367.520334 2533.101675 2529.584866 2531.736671 2224.542196 2223.725456 0.000000 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan diff --git a/tools/perfmodels/sampling/bus/mirage.config b/tools/perfmodels/sampling/bus/mirage.config new file mode 100644 index 0000000..2e898b5 --- /dev/null +++ b/tools/perfmodels/sampling/bus/mirage.config @@ -0,0 +1,5 @@ +# Current configuration +12 # Number of CPUs +3 # Number of CUDA devices +3 # Number of OpenCL devices +0 # Number of MIC devices diff --git a/tools/perfmodels/sampling/bus/mirage.latency b/tools/perfmodels/sampling/bus/mirage.latency new file mode 100644 index 0000000..deb1140 --- /dev/null +++ b/tools/perfmodels/sampling/bus/mirage.latency @@ -0,0 +1,33 @@ +# to 0 to 1 to 2 to 3 to 4 to 5 to 6 to 7 to 8 to 9 to 10 to 11 to 12 to 13 to 14 to 15 to 16 to 17 to 18 to 19 to 20 to 21 to 22 to 23 to 24 to 25 to 26 to 27 to 28 to 29 to 30 to 31 +0.000000 9.836008 9.800445 9.824289 11.355898 11.248469 10.962445 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +10.565859 0.000000 24.149859 24.293578 21.921758 21.814328 21.528305 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +12.277023 25.184922 0.000000 17.178188 23.632922 23.525492 23.239469 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +10.580664 20.416672 20.381109 0.000000 21.936563 21.829133 21.543109 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +15.931570 25.767578 25.732016 25.755859 0.000000 27.180039 26.894016 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +15.837656 25.673664 25.638102 25.661945 27.193555 0.000000 26.800102 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +15.669383 25.505391 25.469828 25.493672 27.025281 26.917852 0.000000 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan diff --git a/tools/perfmodels/sampling/bus/mirage.platform.v4.xml b/tools/perfmodels/sampling/bus/mirage.platform.v4.xml new file mode 100644 index 0000000..a5b524c --- /dev/null +++ b/tools/perfmodels/sampling/bus/mirage.platform.v4.xml @@ -0,0 +1,277 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tools/perfmodels/sampling/bus/mirage.platform.xml b/tools/perfmodels/sampling/bus/mirage.platform.xml new file mode 100644 index 0000000..256aa77 --- /dev/null +++ b/tools/perfmodels/sampling/bus/mirage.platform.xml @@ -0,0 +1,275 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tools/perfmodels/sampling/bus/sirocco.affinity b/tools/perfmodels/sampling/bus/sirocco.affinity new file mode 100644 index 0000000..228fd31 --- /dev/null +++ b/tools/perfmodels/sampling/bus/sirocco.affinity @@ -0,0 +1,9 @@ +# GPU CPU0 CPU1 CPU2 CPU3 CPU4 CPU5 CPU6 CPU7 CPU8 CPU9 CPU10 CPU11 CPU12 CPU13 CPU14 CPU15 CPU16 CPU17 CPU18 CPU19 CPU20 CPU21 CPU22 CPU23 +0 6 7 8 9 10 11 0 1 2 3 4 5 18 19 20 21 22 23 12 13 14 15 16 17 +1 6 7 8 9 10 11 0 1 2 3 4 5 18 19 20 21 22 23 12 13 14 15 16 17 +2 18 19 20 21 22 23 12 13 14 15 16 17 0 1 2 3 4 5 6 7 8 9 10 11 +3 18 19 20 21 22 23 12 13 14 15 16 17 0 1 2 3 4 5 6 7 8 9 10 11 +0 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 +1 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 +2 12 13 14 15 16 17 18 19 20 21 22 23 0 1 2 3 4 5 6 7 8 9 10 11 +3 12 13 14 15 16 17 18 19 20 21 22 23 0 1 2 3 4 5 6 7 8 9 10 11 diff --git a/tools/perfmodels/sampling/bus/sirocco.bandwidth b/tools/perfmodels/sampling/bus/sirocco.bandwidth new file mode 100644 index 0000000..1bdf281 --- /dev/null +++ b/tools/perfmodels/sampling/bus/sirocco.bandwidth @@ -0,0 +1,17 @@ +# to 0 to 1 to 2 to 3 to 4 to 5 to 6 to 7 to 8 to 9 to 10 to 11 to 12 to 13 to 14 to 15 +0.000000e+00 1.051768e+04 1.051743e+04 1.051732e+04 1.051718e+04 7.997534e+03 7.978223e+03 8.025122e+03 8.002101e+03 nan nan nan nan nan nan nan +1.052170e+04 0.000000e+00 1.024409e+04 7.662719e+03 8.527736e+03 4.543798e+03 4.537558e+03 4.552690e+03 4.545272e+03 nan nan nan nan nan nan nan +1.052123e+04 1.024068e+04 0.000000e+00 7.630370e+03 8.542254e+03 4.543711e+03 4.537471e+03 4.552602e+03 4.545185e+03 nan nan nan nan nan nan nan +1.052183e+04 8.504225e+03 8.517476e+03 0.000000e+00 1.023200e+04 4.543822e+03 4.537582e+03 4.552715e+03 4.545296e+03 nan nan nan nan nan nan nan +1.052172e+04 8.496221e+03 8.514240e+03 1.024287e+04 0.000000e+00 4.543801e+03 4.537561e+03 4.552693e+03 4.545275e+03 nan nan nan nan nan nan nan +7.434276e+03 4.355589e+03 4.355546e+03 4.355527e+03 4.355503e+03 0.000000e+00 3.848326e+03 3.859204e+03 3.853873e+03 nan nan nan nan nan nan nan +7.232140e+03 4.285414e+03 4.285373e+03 4.285355e+03 4.285331e+03 3.797802e+03 0.000000e+00 3.804012e+03 3.798832e+03 nan nan nan nan nan nan nan +7.300126e+03 4.309194e+03 4.309152e+03 4.309134e+03 4.309110e+03 3.816466e+03 3.812063e+03 0.000000e+00 3.817506e+03 nan nan nan nan nan nan nan +7.333166e+03 4.320685e+03 4.320643e+03 4.320625e+03 4.320601e+03 3.825477e+03 3.821053e+03 3.831778e+03 0.000000e+00 nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan diff --git a/tools/perfmodels/sampling/bus/sirocco.config b/tools/perfmodels/sampling/bus/sirocco.config new file mode 100644 index 0000000..8ef44e6 --- /dev/null +++ b/tools/perfmodels/sampling/bus/sirocco.config @@ -0,0 +1,5 @@ +# Current configuration +24 # Number of CPUs +4 # Number of CUDA devices +4 # Number of OpenCL devices +0 # Number of MIC devices diff --git a/tools/perfmodels/sampling/bus/sirocco.latency b/tools/perfmodels/sampling/bus/sirocco.latency new file mode 100644 index 0000000..d4182e0 --- /dev/null +++ b/tools/perfmodels/sampling/bus/sirocco.latency @@ -0,0 +1,17 @@ +# to 0 to 1 to 2 to 3 to 4 to 5 to 6 to 7 to 8 to 9 to 10 to 11 to 12 to 13 to 14 to 15 +0.000000e+00 1.029027e+01 1.031898e+01 9.529422e+00 1.039846e+01 9.643953e+00 1.113670e+01 1.055939e+01 1.004796e+01 nan nan nan nan nan nan nan +1.085040e+01 0.000000e+00 1.152573e+01 2.350899e+01 2.337711e+01 2.049435e+01 2.198709e+01 2.140979e+01 2.089836e+01 nan nan nan nan nan nan nan +9.920578e+00 1.167180e+01 0.000000e+00 2.304539e+01 2.330630e+01 1.956453e+01 2.105727e+01 2.047997e+01 1.996854e+01 nan nan nan nan nan nan nan +1.093016e+01 2.380006e+01 2.358666e+01 0.000000e+00 1.101548e+01 2.057412e+01 2.206686e+01 2.148955e+01 2.097812e+01 nan nan nan nan nan nan nan +1.097311e+01 2.126338e+01 2.129209e+01 2.050253e+01 0.000000e+00 2.061706e+01 2.210980e+01 2.153250e+01 2.102107e+01 nan nan nan nan nan nan nan +1.162996e+01 2.192023e+01 2.194894e+01 2.115938e+01 2.202842e+01 0.000000e+00 2.276666e+01 2.218935e+01 2.167792e+01 nan nan nan nan nan nan nan +1.359506e+01 2.388534e+01 2.391404e+01 2.312448e+01 2.399352e+01 2.323902e+01 0.000000e+00 2.415445e+01 2.364302e+01 nan nan nan nan nan nan nan +1.245815e+01 2.274842e+01 2.277712e+01 2.198757e+01 2.285661e+01 2.210210e+01 2.359484e+01 0.000000e+00 2.250611e+01 nan nan nan nan nan nan nan +1.236026e+01 2.265053e+01 2.267923e+01 2.188968e+01 2.275872e+01 2.200421e+01 2.349695e+01 2.291965e+01 0.000000e+00 nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan diff --git a/tools/perfmodels/sampling/bus/sirocco.platform.v4.xml b/tools/perfmodels/sampling/bus/sirocco.platform.v4.xml new file mode 100644 index 0000000..39dba1f --- /dev/null +++ b/tools/perfmodels/sampling/bus/sirocco.platform.v4.xml @@ -0,0 +1,241 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tools/perfmodels/sampling/bus/sirocco.platform.xml b/tools/perfmodels/sampling/bus/sirocco.platform.xml new file mode 100644 index 0000000..4b0cb98 --- /dev/null +++ b/tools/perfmodels/sampling/bus/sirocco.platform.xml @@ -0,0 +1,239 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tools/perfmodels/sampling/codelets/45/add_scal.mirage b/tools/perfmodels/sampling/codelets/45/add_scal.mirage new file mode 100644 index 0000000..32d6cb3 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/add_scal.mirage @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OpenCL - 2, FPGA - 4, MPI_MS - 5, TCPIP_MS - 6, HIP - 7) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_cores1_impl0 (Comb0) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us or J) dev (us or J) sum sum2 n +50d9324f 25 0.000000e+00 1.540344e+02 3.339691e+01 1.632765e+04 2.633248e+06 106 + diff --git a/tools/perfmodels/sampling/codelets/45/chol_model_gemm.attila b/tools/perfmodels/sampling/codelets/45/chol_model_gemm.attila new file mode 100644 index 0000000..5372d6e --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/chol_model_gemm.attila @@ -0,0 +1,144 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +4 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 11059200 1.769472e+09 8.407559e+04 3.415249e+03 1.399859e+08 1.178881e+13 1665 +f0ac7beb 4915200 5.242880e+08 2.610119e+04 1.422415e+03 4.251883e+07 1.113088e+12 1629 +d46431bb 1228800 6.553600e+07 3.432588e+03 1.640071e+02 9.130685e+06 3.141343e+10 2660 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 11059200 1.769472e+09 2.795670e+03 5.624760e+01 1.818024e+07 5.084653e+10 6503 +f0ac7beb 4915200 5.242880e+08 8.880682e+02 3.243424e+01 5.760010e+06 5.122105e+09 6486 +d46431bb 1228800 6.553600e+07 2.022322e+02 1.071833e+01 1.116119e+06 2.263493e+08 5519 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 11059200 1.769472e+09 2.815870e+03 4.694553e+01 1.827781e+07 5.148226e+10 6491 +f0ac7beb 4915200 5.242880e+08 8.961392e+02 3.565427e+01 5.741564e+06 5.153386e+09 6407 +d46431bb 1228800 6.553600e+07 2.020566e+02 9.551669e+00 1.107876e+06 2.243540e+08 5483 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 11059200 1.769472e+09 2.810209e+03 3.946230e+01 1.806121e+07 5.076578e+10 6427 +f0ac7beb 4915200 5.242880e+08 8.833768e+02 3.092949e+01 5.707497e+06 5.048051e+09 6461 +d46431bb 1228800 6.553600e+07 1.637484e+02 6.969807e+00 1.084015e+06 1.778273e+08 6620 diff --git a/tools/perfmodels/sampling/codelets/45/chol_model_gemm.hannibal b/tools/perfmodels/sampling/codelets/45/chol_model_gemm.hannibal new file mode 100644 index 0000000..743d1e1 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/chol_model_gemm.hannibal @@ -0,0 +1,104 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 11059200 1.769472e+09 5.763709e+03 3.768350e+03 4.501024e+09 3.703209e+13 780925 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 11059200 1.769472e+09 5.889910e+03 4.485232e+03 4.352661e+09 4.050353e+13 739003 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb3) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 11059200 1.769472e+09 5.782569e+03 3.939612e+03 4.412291e+09 3.735706e+13 763033 + diff --git a/tools/perfmodels/sampling/codelets/45/chol_model_gemm.hannibal-pitch b/tools/perfmodels/sampling/codelets/45/chol_model_gemm.hannibal-pitch new file mode 100644 index 0000000..743d1e1 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/chol_model_gemm.hannibal-pitch @@ -0,0 +1,104 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 11059200 1.769472e+09 5.763709e+03 3.768350e+03 4.501024e+09 3.703209e+13 780925 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 11059200 1.769472e+09 5.889910e+03 4.485232e+03 4.352661e+09 4.050353e+13 739003 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb3) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 11059200 1.769472e+09 5.782569e+03 3.939612e+03 4.412291e+09 3.735706e+13 763033 + diff --git a/tools/perfmodels/sampling/codelets/45/chol_model_gemm.idgraf b/tools/perfmodels/sampling/codelets/45/chol_model_gemm.idgraf new file mode 100644 index 0000000..3c5f94e --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/chol_model_gemm.idgraf @@ -0,0 +1,314 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +9 +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 11059200 1.769472e+09 8.987520e+04 9.682708e+02 2.085105e+07 1.874210e+12 232 +d46431bb 1228800 6.553600e+07 3.465410e+03 7.427679e+01 2.737674e+06 9.491521e+09 790 +f0ac7beb 4915200 5.242880e+08 2.744657e+04 5.713498e+02 7.575252e+06 2.080048e+11 276 + +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb4) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 11059200 1.769472e+09 2.825219e+03 1.227364e+02 1.106356e+07 3.131597e+10 3916 +d46431bb 1228800 6.553600e+07 2.060677e+02 2.909125e+01 5.497887e+05 1.155516e+08 2668 +f0ac7beb 4915200 5.242880e+08 9.076961e+02 5.977819e+01 3.441076e+06 3.136998e+09 3791 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 11059200 1.769472e+09 2.823818e+03 1.560928e+02 1.063450e+07 3.012164e+10 3766 +d46431bb 1228800 6.553600e+07 1.632767e+02 2.137437e+01 5.224853e+05 8.677162e+07 3200 +f0ac7beb 4915200 5.242880e+08 9.226606e+02 6.080500e+01 3.410153e+06 3.160079e+09 3696 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 11059200 1.769472e+09 2.821988e+03 1.631366e+02 1.046111e+07 2.961977e+10 3707 +d46431bb 1228800 6.553600e+07 1.661504e+02 2.199315e+01 5.172263e+05 8.744312e+07 3113 +f0ac7beb 4915200 5.242880e+08 9.172785e+02 5.967474e+01 3.492079e+06 3.216766e+09 3807 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 11059200 1.769472e+09 2.817019e+03 1.506473e+02 1.119202e+07 3.161828e+10 3973 +d46431bb 1228800 6.553600e+07 2.042642e+02 2.707555e+01 5.498791e+05 1.142941e+08 2692 +f0ac7beb 4915200 5.242880e+08 9.044446e+02 5.780357e+01 3.477590e+06 3.158134e+09 3845 + +#################### +# COMB_7 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +4 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda4_impl0 (Comb7) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 11059200 1.769472e+09 2.816710e+03 1.414989e+02 1.032888e+07 2.916687e+10 3667 +d46431bb 1228800 6.553600e+07 1.639497e+02 2.257894e+01 4.474187e+05 7.474542e+07 2729 +f0ac7beb 4915200 5.242880e+08 9.331501e+02 5.611510e+01 3.235231e+06 3.029874e+09 3467 + +#################### +# COMB_5 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +6 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda6_impl0 (Comb5) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 11059200 1.769472e+09 2.815225e+03 1.445443e+02 1.009821e+07 2.850368e+10 3587 +d46431bb 1228800 6.553600e+07 1.659035e+02 2.475202e+01 4.006569e+05 6.794997e+07 2415 +f0ac7beb 4915200 5.242880e+08 9.137585e+02 6.301297e+01 3.125968e+06 2.869963e+09 3421 + +#################### +# COMB_8 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +5 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda5_impl0 (Comb8) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 11059200 1.769472e+09 2.807699e+03 1.292512e+02 1.006279e+07 2.831317e+10 3584 +d46431bb 1228800 6.553600e+07 1.680450e+02 2.634123e+01 3.922170e+05 6.752957e+07 2334 +f0ac7beb 4915200 5.242880e+08 8.912551e+02 5.629783e+01 3.090873e+06 2.765747e+09 3468 + +#################### +# COMB_6 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +7 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda7_impl0 (Comb6) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 11059200 1.769472e+09 2.827622e+03 1.304764e+02 1.027841e+07 2.912533e+10 3635 +d46431bb 1228800 6.553600e+07 1.666216e+02 2.357918e+01 4.083895e+05 6.940921e+07 2451 +f0ac7beb 4915200 5.242880e+08 9.077285e+02 5.688987e+01 3.089908e+06 2.815814e+09 3404 + diff --git a/tools/perfmodels/sampling/codelets/45/chol_model_gemm.mirage b/tools/perfmodels/sampling/codelets/45/chol_model_gemm.mirage new file mode 100644 index 0000000..ec0fbc8 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/chol_model_gemm.mirage @@ -0,0 +1,148 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +4 +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb0) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d46431bb 1228800 6.553600e+07 3.262643e+03 7.810468e+01 1.532463e+07 5.002746e+10 4697 +6e96d715 12288 6.553600e+04 3.262643e+00 7.810468e-02 1.532463e+04 5.002746e+6 4697 +24c84a50 11059200 1.769472e+09 8.778020e+04 9.598441e+02 2.889724e+08 2.536909e+13 3292 +f0ac7beb 4915200 5.242880e+08 2.647095e+04 4.073263e+02 4.762124e+07 1.260878e+12 1799 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d46431bb 1228800 6.553600e+07 2.011248e+02 1.498037e+01 1.633334e+06 3.303264e+08 8121 +6e96d715 12288 6.553600e+04 2.011248e-01 1.498037e-02 1.633334e+03 3.303264e+04 8121 +24c84a50 11059200 1.769472e+09 2.805491e+03 6.931361e+01 3.683329e+07 1.033985e+11 13129 +f0ac7beb 4915200 5.242880e+08 8.944354e+02 3.828761e+01 5.687714e+06 5.096615e+09 6359 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d46431bb 1228800 6.553600e+07 2.004836e+02 1.337883e+01 1.620709e+06 3.263726e+08 8084 +6e96d715 12288 6.553600e+04 2.004836e-01 1.337883e-02 1.620709e+03 3.263726e+04 8084 +24c84a50 11059200 1.769472e+09 2.828590e+03 6.702999e+01 3.650578e+07 1.033178e+11 12906 +f0ac7beb 4915200 5.242880e+08 9.090465e+02 3.898196e+01 5.691540e+06 5.183389e+09 6261 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb3) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d46431bb 1228800 6.553600e+07 1.603761e+02 1.178063e+01 1.522450e+06 2.454821e+08 9493 +6e96d715 12288 6.553600e+04 1.603761e-01 1.178063e-02 1.522450e+03 2.454821e+04 9493 +24c84a50 11059200 1.769472e+09 2.828209e+03 7.003112e+01 3.675541e+07 1.040157e+11 12996 +f0ac7beb 4915200 5.242880e+08 8.858930e+02 3.323656e+01 5.675030e+06 5.034546e+09 6406 + diff --git a/tools/perfmodels/sampling/codelets/45/chol_model_gemm.sirocco b/tools/perfmodels/sampling/codelets/45/chol_model_gemm.sirocco new file mode 100644 index 0000000..c5a1f79 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/chol_model_gemm.sirocco @@ -0,0 +1,183 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +5 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +8cfc3ba0 24883200 5.971968e+09 2.632193e+03 3.963412e+02 2.414774e+07 6.500262e+10 9174 +24c84a50 11059200 1.769472e+09 7.545142e+02 4.622075e+01 1.174100e+07 8.891991e+09 15561 +f0ac7beb 4915200 5.242880e+08 2.651541e+02 2.896639e+01 2.197862e+06 5.897272e+08 8289 +d46431bb 1228800 6.553600e+07 5.633559e+01 1.027680e+01 7.345034e+05 4.275566e+07 13038 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +8cfc3ba0 24883200 5.971968e+09 2.577659e+03 3.627579e+02 2.443621e+07 6.423574e+10 9480 +24c84a50 11059200 1.769472e+09 7.434516e+02 4.620554e+01 1.184541e+07 8.840509e+09 15933 +f0ac7beb 4915200 5.242880e+08 2.633265e+02 2.997768e+01 2.185610e+06 5.829880e+08 8300 +d46431bb 1228800 6.553600e+07 5.716015e+01 1.157773e+01 6.223597e+05 3.703364e+07 10888 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb3) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +8cfc3ba0 24883200 5.971968e+09 2.575702e+03 3.709810e+02 2.479886e+07 6.519954e+10 9628 +24c84a50 11059200 1.769472e+09 7.437036e+02 4.816754e+01 1.192752e+07 8.907749e+09 16038 +f0ac7beb 4915200 5.242880e+08 2.593699e+02 2.791728e+01 2.302427e+06 6.040986e+08 8877 +d46431bb 1228800 6.553600e+07 5.656092e+01 1.160148e+01 5.523739e+05 3.255722e+07 9766 + +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb4) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +8cfc3ba0 24883200 5.971968e+09 2.620269e+03 3.685780e+02 2.431872e+07 6.498239e+10 9281 +24c84a50 11059200 1.769472e+09 7.558763e+02 4.610795e+01 1.204791e+07 9.140616e+09 15939 +f0ac7beb 4915200 5.242880e+08 2.625144e+02 2.860172e+01 2.207221e+06 5.863054e+08 8408 +d46431bb 1228800 6.553600e+07 5.829194e+01 1.220705e+01 7.805874e+05 4.749737e+07 13391 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb0) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +8cfc3ba0 24883200 5.971968e+09 1.120418e+05 2.694960e+04 3.755640e+08 4.451334e+13 3352 +24c84a50 11059200 1.769472e+09 3.494026e+04 6.587010e+03 1.697048e+08 6.140270e+12 4857 +f0ac7beb 4915200 5.242880e+08 1.147661e+04 2.242393e+03 3.799907e+07 4.527495e+11 3311 +d46431bb 1228800 6.553600e+07 1.593513e+03 3.073908e+02 2.396962e+07 3.961722e+10 15042 + diff --git a/tools/perfmodels/sampling/codelets/45/chol_model_potrf.attila b/tools/perfmodels/sampling/codelets/45/chol_model_potrf.attila new file mode 100644 index 0000000..7d01cc8 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/chol_model_potrf.attila @@ -0,0 +1,144 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +4 +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 3686400 2.953730e+08 7.593370e+04 4.030251e+03 2.353945e+06 1.792473e+11 31 +afdd228b 1638400 8.758624e+07 2.346245e+04 1.988237e+03 1.032348e+06 2.439534e+10 44 +cea37d6d 409600 1.097392e+07 3.401100e+03 3.819888e+02 1.564506e+05 5.388163e+08 46 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 3686400 2.953730e+08 4.425357e+04 2.763302e+03 7.523107e+05 3.342224e+10 17 +afdd228b 1638400 8.758624e+07 2.425311e+04 2.094515e+03 2.910373e+05 7.111204e+09 12 +cea37d6d 409600 1.097392e+07 1.130795e+04 5.745206e+02 1.130795e+05 1.281997e+09 10 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 3686400 2.953730e+08 4.555777e+04 5.907293e+03 7.744821e+05 3.587692e+10 17 +afdd228b 1638400 8.758624e+07 2.509024e+04 3.469720e+03 2.509024e+05 6.415590e+09 10 +cea37d6d 409600 1.097392e+07 1.082278e+04 2.477308e+02 1.082278e+05 1.171939e+09 10 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 3686400 2.953730e+08 4.828333e+04 3.379151e+03 5.311166e+05 2.576968e+10 11 +afdd228b 1638400 8.758624e+07 2.737149e+04 3.217773e+03 2.737149e+05 7.595526e+09 10 +cea37d6d 409600 1.097392e+07 1.143954e+04 1.654563e+02 1.143954e+05 1.308904e+09 10 + diff --git a/tools/perfmodels/sampling/codelets/45/chol_model_potrf.hannibal b/tools/perfmodels/sampling/codelets/45/chol_model_potrf.hannibal new file mode 100644 index 0000000..276c2a9 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/chol_model_potrf.hannibal @@ -0,0 +1,104 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 3686400 2.953730e+08 1.701016e+05 7.229737e+03 4.082438e+06 6.956835e+11 24 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 3686400 2.953730e+08 1.188776e+05 9.331204e+02 2.113643e+08 2.512803e+13 1778 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb3) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 3686400 2.953730e+08 1.205438e+05 2.044578e+03 2.189075e+08 2.639552e+13 1816 + diff --git a/tools/perfmodels/sampling/codelets/45/chol_model_potrf.hannibal-pitch b/tools/perfmodels/sampling/codelets/45/chol_model_potrf.hannibal-pitch new file mode 100644 index 0000000..276c2a9 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/chol_model_potrf.hannibal-pitch @@ -0,0 +1,104 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 3686400 2.953730e+08 1.701016e+05 7.229737e+03 4.082438e+06 6.956835e+11 24 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 3686400 2.953730e+08 1.188776e+05 9.331204e+02 2.113643e+08 2.512803e+13 1778 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb3) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 3686400 2.953730e+08 1.205438e+05 2.044578e+03 2.189075e+08 2.639552e+13 1816 + diff --git a/tools/perfmodels/sampling/codelets/45/chol_model_potrf.idgraf b/tools/perfmodels/sampling/codelets/45/chol_model_potrf.idgraf new file mode 100644 index 0000000..5ef8ba9 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/chol_model_potrf.idgraf @@ -0,0 +1,314 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +9 +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 3686400 2.953730e+08 2.069576e+04 7.440388e+01 4.346110e+05 8.994720e+09 21 +cea37d6d 409600 1.097392e+07 1.068290e+03 2.074934e+01 3.098041e+04 3.310855e+07 29 +afdd228b 1638400 8.758624e+07 6.632886e+03 6.634864e+01 1.392906e+05 9.239911e+08 21 + +#################### +# COMB_6 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +7 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda7_impl0 (Comb6) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 3686400 2.953730e+08 5.998499e+04 6.600211e+03 5.998499e+05 3.641761e+10 10 +cea37d6d 409600 1.097392e+07 1.500406e+04 5.740284e+02 1.500406e+05 2.254514e+09 10 +afdd228b 1638400 8.758624e+07 3.368249e+04 5.947857e+03 3.368249e+05 1.169887e+10 10 + +#################### +# COMB_8 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +5 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda5_impl0 (Comb8) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 3686400 2.953730e+08 5.745646e+04 7.363450e+03 5.745646e+05 3.355466e+10 10 +cea37d6d 409600 1.097392e+07 1.559370e+04 1.137871e+03 1.559370e+05 2.444583e+09 10 +afdd228b 1638400 8.758624e+07 3.216379e+04 4.954206e+03 3.216379e+05 1.059054e+10 10 + +#################### +# COMB_5 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +6 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda6_impl0 (Comb5) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 3686400 2.953730e+08 5.761859e+04 8.603827e+03 5.761859e+05 3.393928e+10 10 +cea37d6d 409600 1.097392e+07 1.498399e+04 7.885417e+02 1.498399e+05 2.251416e+09 10 +afdd228b 1638400 8.758624e+07 3.033086e+04 2.968298e+03 3.033086e+05 9.287718e+09 10 + +#################### +# COMB_7 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +4 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda4_impl0 (Comb7) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 3686400 2.953730e+08 5.207197e+04 2.780602e+03 5.207197e+05 2.719222e+10 10 +cea37d6d 409600 1.097392e+07 1.609271e+04 2.681035e+03 1.609271e+05 2.661633e+09 10 +afdd228b 1638400 8.758624e+07 3.107603e+04 1.620445e+03 3.107603e+05 9.683455e+09 10 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 3686400 2.953730e+08 5.062446e+04 2.429137e+03 5.062446e+05 2.568736e+10 10 +cea37d6d 409600 1.097392e+07 1.506158e+04 8.561331e+02 1.506158e+05 2.275840e+09 10 +afdd228b 1638400 8.758624e+07 3.034398e+04 4.027845e+03 3.034398e+05 9.369809e+09 10 + +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb4) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 3686400 2.953730e+08 5.174953e+04 6.183695e+03 5.174953e+05 2.716252e+10 10 +cea37d6d 409600 1.097392e+07 1.518996e+04 9.403764e+02 1.518996e+05 2.316193e+09 10 +afdd228b 1638400 8.758624e+07 3.100983e+04 5.124047e+03 3.100983e+05 9.878653e+09 10 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 3686400 2.953730e+08 5.475211e+04 6.512121e+03 5.475211e+05 3.040202e+10 10 +cea37d6d 409600 1.097392e+07 1.504708e+04 5.064339e+02 1.504708e+05 2.266711e+09 10 +afdd228b 1638400 8.758624e+07 2.918927e+04 4.035680e+03 2.918927e+05 8.683004e+09 10 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +cea37d6d 409600 1.097392e+07 1.467875e+04 2.659893e+02 1.467875e+05 2.155366e+09 10 +afdd228b 1638400 8.758624e+07 3.186232e+04 5.396938e+03 3.186232e+05 1.044334e+10 10 +617e5fe6 3686400 2.953730e+08 5.896762e+04 1.233845e+04 5.896762e+05 3.629418e+10 10 + diff --git a/tools/perfmodels/sampling/codelets/45/chol_model_potrf.mirage b/tools/perfmodels/sampling/codelets/45/chol_model_potrf.mirage new file mode 100644 index 0000000..bdab99a --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/chol_model_potrf.mirage @@ -0,0 +1,148 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +4 +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb0) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +cea37d6d 409600 1.097392e+07 9.221097e+02 4.521529e+01 7.930143e+04 7.330044e+07 86 +e5a07e31 4096 1.097392e+04 9.221097e-01 4.521529e-02 7.930143e+01 7.330044e+03 86 +617e5fe6 3686400 2.953730e+08 1.884969e+04 7.370619e+02 2.111165e+06 3.985565e+10 112 +afdd228b 1638400 8.758624e+07 5.940712e+03 2.434960e+02 2.376285e+05 1.414054e+09 40 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 3686400 2.953730e+08 5.366740e+03 9.457424e+01 1.556354e+05 8.355143e+08 29 +cea37d6d 409600 1.097392e+07 2.003232e+03 2.738589e+01 5.809372e+04 1.163969e+08 29 +e5a07e31 4096 1.097392e+04 2.003232e+00 2.738589e-02 5.809372e+01 1.163969e+04 29 +afdd228b 1638400 8.758624e+07 3.522008e+03 5.701992e+01 1.021382e+05 3.598259e+08 29 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb3) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +afdd228b 1638400 8.758624e+07 3.775579e+03 6.170997e+01 1.094918e+05 4.135053e+08 29 +cea37d6d 409600 1.097392e+07 2.237869e+03 3.535637e+01 6.489820e+04 1.452699e+08 29 +e5a07e31 4096 1.097392e+04 2.237869e+00 3.535637e-02 6.489820e+01 1.452699e+04 29 +617e5fe6 3686400 2.953730e+08 5.557011e+03 6.001156e+01 1.611533e+05 8.956353e+08 29 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb2) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 3686400 2.953730e+08 5.500099e+03 6.565775e+01 1.595029e+05 8.774066e+08 29 +cea37d6d 409600 1.097392e+07 2.220120e+03 2.867670e+01 6.438348e+04 1.429629e+08 29 +e5a07e31 4096 1.097392e+04 2.220120e+00 2.867670e-02 6.438348e+01 1.429629e+04 29 +afdd228b 1638400 8.758624e+07 3.742877e+03 5.898784e+01 1.085434e+05 4.063656e+08 29 + diff --git a/tools/perfmodels/sampling/codelets/45/chol_model_potrf.sirocco b/tools/perfmodels/sampling/codelets/45/chol_model_potrf.sirocco new file mode 100644 index 0000000..0d4d30d --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/chol_model_potrf.sirocco @@ -0,0 +1,183 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +5 +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb0) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +25ebb669 8294400 9.963650e+08 1.842284e+04 2.612920e+03 1.271176e+06 2.388975e+10 69 +617e5fe6 3686400 2.953730e+08 7.379027e+03 1.089221e+03 5.903221e+05 4.450915e+09 80 +afdd228b 1638400 8.758624e+07 2.799281e+03 5.546704e+02 2.323403e+05 6.759213e+08 83 +cea37d6d 409600 1.097392e+07 4.391691e+02 4.300491e+01 7.597626e+04 3.368638e+07 173 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb3) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +25ebb669 8294400 9.963650e+08 4.852293e+04 1.266847e+04 4.852293e+05 2.514965e+10 10 +617e5fe6 3686400 2.953730e+08 7.699799e+03 1.513211e+03 9.239759e+04 7.389205e+08 12 +afdd228b 1638400 8.758624e+07 5.010966e+03 7.544427e+02 5.010965e+04 2.567896e+08 10 +cea37d6d 409600 1.097392e+07 3.343709e+03 3.943178e+02 3.343709e+04 1.133588e+08 10 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +25ebb669 8294400 9.963650e+08 3.188534e+04 1.123169e+04 3.188534e+05 1.142826e+10 10 +617e5fe6 3686400 2.953730e+08 8.514691e+03 1.568765e+03 9.366160e+04 8.245709e+08 11 +afdd228b 1638400 8.758624e+07 5.436465e+03 1.325711e+03 5.436465e+04 3.131266e+08 10 +cea37d6d 409600 1.097392e+07 3.336739e+03 3.113015e+02 3.336739e+04 1.123074e+08 10 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +25ebb669 8294400 9.963650e+08 3.051372e+04 1.107281e+04 3.051372e+05 1.053694e+10 10 +617e5fe6 3686400 2.953730e+08 8.456328e+03 1.411439e+03 1.014759e+05 8.820196e+08 12 +afdd228b 1638400 8.758624e+07 4.996835e+03 9.396038e+02 4.996835e+04 2.585122e+08 10 +cea37d6d 409600 1.097392e+07 3.060839e+03 4.968177e+01 3.060839e+04 9.371202e+07 10 + +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb4) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +25ebb669 8294400 9.963650e+08 1.332935e+04 3.063746e+03 2.132696e+05 2.992929e+09 16 +617e5fe6 3686400 2.953730e+08 8.333388e+03 1.108400e+03 1.000007e+05 8.480868e+08 12 +afdd228b 1638400 8.758624e+07 5.517925e+03 1.047059e+03 5.517925e+04 3.154382e+08 10 +cea37d6d 409600 1.097392e+07 3.435367e+03 2.405829e+02 3.435367e+04 1.185962e+08 10 + diff --git a/tools/perfmodels/sampling/codelets/45/chol_model_syrk.attila b/tools/perfmodels/sampling/codelets/45/chol_model_syrk.attila new file mode 100644 index 0000000..19d4e2a --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/chol_model_syrk.attila @@ -0,0 +1,144 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +4 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 7372800 8.856576e+08 4.203779e+04 3.415249e+03 6.999295e+07 1.178881e+13 1665 +d39bff17 3276800 2.625536e+08 1.305059e+04 1.422415e+03 2.125942e+07 1.113088e+12 1629 +2c1922b7 819200 3.287040e+07 1.716294e+03 1.640071e+02 4.565342e+06 3.141343e+10 2660 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 7372800 8.856576e+08 1.397835e+03 5.624760e+01 9.090120e+06 5.084653e+10 6503 +d39bff17 3276800 2.625536e+08 4.440341e+02 3.243424e+01 2.880005e+06 5.122105e+09 6486 +2c1922b7 819200 3.287040e+07 1.011161e+02 1.071833e+01 5.580595e+05 2.263493e+08 5519 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 7372800 8.856576e+08 1.407935e+03 4.694553e+01 9.138905e+06 5.148226e+10 6491 +d39bff17 3276800 2.625536e+08 4.480696e+02 3.565427e+01 2.870782e+06 5.153386e+09 6407 +2c1922b7 819200 3.287040e+07 1.010283e+02 9.551669e+00 5.539380e+05 2.243540e+08 5483 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 7372800 8.856576e+08 1.405104e+03 3.946230e+01 9.030605e+06 5.076578e+10 6427 +d39bff17 3276800 2.625536e+08 4.416884e+02 3.092949e+01 2.853748e+06 5.048051e+09 6461 +2c1922b7 819200 3.287040e+07 8.187420e+01 6.969807e+00 5.420075e+05 1.778273e+08 6620 diff --git a/tools/perfmodels/sampling/codelets/45/chol_model_syrk.hannibal b/tools/perfmodels/sampling/codelets/45/chol_model_syrk.hannibal new file mode 100644 index 0000000..280ee8e --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/chol_model_syrk.hannibal @@ -0,0 +1,104 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 7372800 8.856576e+08 2.881854e+03 3.768350e+03 2.250512e+09 3.703209e+13 780925 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 7372800 8.856576e+08 2.944955e+03 4.485232e+03 2.176330e+09 4.050353e+13 739003 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb3) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 7372800 8.856576e+08 2.891285e+03 3.939612e+03 2.206146e+09 3.735706e+13 763033 + diff --git a/tools/perfmodels/sampling/codelets/45/chol_model_syrk.hannibal-pitch b/tools/perfmodels/sampling/codelets/45/chol_model_syrk.hannibal-pitch new file mode 100644 index 0000000..280ee8e --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/chol_model_syrk.hannibal-pitch @@ -0,0 +1,104 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 7372800 8.856576e+08 2.881854e+03 3.768350e+03 2.250512e+09 3.703209e+13 780925 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 7372800 8.856576e+08 2.944955e+03 4.485232e+03 2.176330e+09 4.050353e+13 739003 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb3) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 7372800 8.856576e+08 2.891285e+03 3.939612e+03 2.206146e+09 3.735706e+13 763033 + diff --git a/tools/perfmodels/sampling/codelets/45/chol_model_syrk.idgraf b/tools/perfmodels/sampling/codelets/45/chol_model_syrk.idgraf new file mode 100644 index 0000000..6127165 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/chol_model_syrk.idgraf @@ -0,0 +1,314 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +9 +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 7372800 8.856576e+08 4.493760e+04 9.682708e+02 1.042552e+07 1.874210e+12 232 +2c1922b7 819200 3.287040e+07 1.732705e+03 7.427679e+01 1.368837e+06 9.491521e+09 790 +d39bff17 3276800 2.625536e+08 1.372328e+04 5.713498e+02 3.787626e+06 2.080048e+11 276 + +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb4) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 7372800 8.856576e+08 1.412610e+03 1.227364e+02 5.531780e+06 3.131597e+10 3916 +2c1922b7 819200 3.287040e+07 1.030339e+02 2.909125e+01 2.748943e+05 1.155516e+08 2668 +d39bff17 3276800 2.625536e+08 4.538481e+02 5.977819e+01 1.720538e+06 3.136998e+09 3791 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 7372800 8.856576e+08 1.411909e+03 1.560928e+02 5.317250e+06 3.012164e+10 3766 +2c1922b7 819200 3.287040e+07 8.163835e+01 2.137437e+01 2.612426e+05 8.677162e+07 3200 +d39bff17 3276800 2.625536e+08 4.613303e+02 6.080500e+01 1.705076e+06 3.160079e+09 3696 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 7372800 8.856576e+08 1.410994e+03 1.631366e+02 5.230555e+06 2.961977e+10 3707 +2c1922b7 819200 3.287040e+07 8.307520e+01 2.199315e+01 2.586131e+05 8.744312e+07 3113 +d39bff17 3276800 2.625536e+08 4.586393e+02 5.967474e+01 1.746040e+06 3.216766e+09 3807 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 7372800 8.856576e+08 1.408509e+03 1.506473e+02 5.596010e+06 3.161828e+10 3973 +2c1922b7 819200 3.287040e+07 1.021321e+02 2.707555e+01 2.749395e+05 1.142941e+08 2692 +d39bff17 3276800 2.625536e+08 4.522223e+02 5.780357e+01 1.738795e+06 3.158134e+09 3845 + +#################### +# COMB_7 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +4 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda4_impl0 (Comb7) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 7372800 8.856576e+08 1.408355e+03 1.414989e+02 5.164440e+06 2.916687e+10 3667 +2c1922b7 819200 3.287040e+07 8.197485e+01 2.257894e+01 2.237094e+05 7.474542e+07 2729 +d39bff17 3276800 2.625536e+08 4.665750e+02 5.611510e+01 1.617616e+06 3.029874e+09 3467 + +#################### +# COMB_5 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +6 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda6_impl0 (Comb5) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 7372800 8.856576e+08 1.407612e+03 1.445443e+02 5.049105e+06 2.850368e+10 3587 +2c1922b7 819200 3.287040e+07 8.295175e+01 2.475202e+01 2.003285e+05 6.794997e+07 2415 +d39bff17 3276800 2.625536e+08 4.568793e+02 6.301297e+01 1.562984e+06 2.869963e+09 3421 + +#################### +# COMB_8 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +5 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda5_impl0 (Comb8) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 7372800 8.856576e+08 1.403850e+03 1.292512e+02 5.031395e+06 2.831317e+10 3584 +2c1922b7 819200 3.287040e+07 8.402250e+01 2.634123e+01 1.961085e+05 6.752957e+07 2334 +d39bff17 3276800 2.625536e+08 4.456275e+02 5.629783e+01 1.545436e+06 2.765747e+09 3468 + +#################### +# COMB_6 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +7 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda7_impl0 (Comb6) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 7372800 8.856576e+08 1.413811e+03 1.304764e+02 5.139205e+06 2.912533e+10 3635 +2c1922b7 819200 3.287040e+07 8.331080e+01 2.357918e+01 2.041948e+05 6.940921e+07 2451 +d39bff17 3276800 2.625536e+08 4.538643e+02 5.688987e+01 1.544954e+06 2.815814e+09 3404 + diff --git a/tools/perfmodels/sampling/codelets/45/chol_model_syrk.mirage b/tools/perfmodels/sampling/codelets/45/chol_model_syrk.mirage new file mode 100644 index 0000000..2f24374 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/chol_model_syrk.mirage @@ -0,0 +1,148 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +4 +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb0) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +2c1922b7 819200 3.287040e+07 1.631322e+03 7.810468e+01 7.662315e+06 5.002746e+10 4697 +1827caf5 8192 3.287040e+04 1.631322e+00 7.810468e-02 7.662315e+03 5.002746e+6 4697 +ff82dda0 7372800 8.856576e+08 4.389010e+04 9.598441e+02 1.444862e+08 2.536909e+13 3292 +d39bff17 3276800 2.625536e+08 1.323548e+04 4.073263e+02 2.381062e+07 1.260878e+12 1799 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +2c1922b7 819200 3.287040e+07 1.005624e+02 1.498037e+01 8.166670e+05 3.303264e+08 8121 +1827caf5 8192 3.287040e+04 1.005624e-01 1.498037e-02 8.166670e+02 3.303264e+04 8121 +ff82dda0 7372800 8.856576e+08 1.402745e+03 6.931361e+01 1.841664e+07 1.033985e+11 13129 +d39bff17 3276800 2.625536e+08 4.472177e+02 3.828761e+01 2.843857e+06 5.096615e+09 6359 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +2c1922b7 819200 3.287040e+07 1.002418e+02 1.337883e+01 8.103545e+05 3.263726e+08 8084 +1827caf5 8192 3.287040e+04 1.002418e-01 1.337883e-02 8.103545e+02 3.263726e+04 8084 +ff82dda0 7372800 8.856576e+08 1.414295e+03 6.702999e+01 1.825289e+07 1.033178e+11 12906 +d39bff17 3276800 2.625536e+08 4.545233e+02 3.898196e+01 2.845770e+06 5.183389e+09 6261 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb3) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +2c1922b7 819200 3.287040e+07 8.018805e+01 1.178063e+01 7.612250e+05 2.454821e+08 9493 +1827caf5 8192 3.287040e+04 8.018805e-02 1.178063e-02 7.612250e+02 2.454821e+04 9493 +ff82dda0 7372800 8.856576e+08 1.414104e+03 7.003112e+01 1.837770e+07 1.040157e+11 12996 +d39bff17 3276800 2.625536e+08 4.429465e+02 3.323656e+01 2.837515e+06 5.034546e+09 6406 + diff --git a/tools/perfmodels/sampling/codelets/45/chol_model_syrk.sirocco b/tools/perfmodels/sampling/codelets/45/chol_model_syrk.sirocco new file mode 100644 index 0000000..ba41f1a --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/chol_model_syrk.sirocco @@ -0,0 +1,183 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +5 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +0e8bce2b 16588800 2.988058e+09 1.316097e+03 3.963412e+02 1.207387e+07 6.500262e+10 9174 +ff82dda0 7372800 8.856576e+08 3.772571e+02 4.622075e+01 5.870500e+06 8.891991e+09 15561 +d39bff17 3276800 2.625536e+08 1.325771e+02 2.896639e+01 1.098931e+06 5.897272e+08 8289 +2c1922b7 819200 3.287040e+07 2.816780e+01 1.027680e+01 3.672517e+05 4.275566e+07 13038 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +0e8bce2b 16588800 2.988058e+09 1.288830e+03 3.627579e+02 1.221810e+07 6.423574e+10 9480 +ff82dda0 7372800 8.856576e+08 3.717258e+02 4.620554e+01 5.922705e+06 8.840509e+09 15933 +d39bff17 3276800 2.625536e+08 1.316633e+02 2.997768e+01 1.092805e+06 5.829880e+08 8300 +2c1922b7 819200 3.287040e+07 2.858008e+01 1.157773e+01 3.111798e+05 3.703364e+07 10888 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb3) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +0e8bce2b 16588800 2.988058e+09 1.287851e+03 3.709810e+02 1.239943e+07 6.519954e+10 9628 +ff82dda0 7372800 8.856576e+08 3.718518e+02 4.816754e+01 5.963760e+06 8.907749e+09 16038 +d39bff17 3276800 2.625536e+08 1.296849e+02 2.791728e+01 1.151214e+06 6.040986e+08 8877 +2c1922b7 819200 3.287040e+07 2.828046e+01 1.160148e+01 2.761870e+05 3.255722e+07 9766 + +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb4) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +0e8bce2b 16588800 2.988058e+09 1.310134e+03 3.685780e+02 1.215936e+07 6.498239e+10 9281 +ff82dda0 7372800 8.856576e+08 3.779382e+02 4.610795e+01 6.023955e+06 9.140616e+09 15939 +d39bff17 3276800 2.625536e+08 1.312572e+02 2.860172e+01 1.103610e+06 5.863054e+08 8408 +2c1922b7 819200 3.287040e+07 2.914597e+01 1.220705e+01 3.902937e+05 4.749737e+07 13391 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb0) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +0e8bce2b 16588800 2.988058e+09 5.602090e+04 2.694960e+04 1.877820e+08 4.451334e+13 3352 +ff82dda0 7372800 8.856576e+08 1.747013e+04 6.587010e+03 8.485240e+07 6.140270e+12 4857 +d39bff17 3276800 2.625536e+08 5.738305e+03 2.242393e+03 1.899954e+07 4.527495e+11 3311 +2c1922b7 819200 3.287040e+07 7.967565e+02 3.073908e+02 1.198481e+07 3.961722e+10 15042 + diff --git a/tools/perfmodels/sampling/codelets/45/chol_model_trsm.attila b/tools/perfmodels/sampling/codelets/45/chol_model_trsm.attila new file mode 100644 index 0000000..ba37cb9 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/chol_model_trsm.attila @@ -0,0 +1,144 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +4 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 7372800 8.856576e+08 4.361594e+04 1.992990e+03 1.840593e+07 8.044680e+11 422 +d39bff17 3276800 2.625536e+08 1.391260e+04 9.732436e+02 6.552836e+06 9.161314e+10 471 +2c1922b7 819200 3.287040e+07 2.026126e+03 2.243730e+02 1.355478e+06 2.780050e+09 669 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 7372800 8.856576e+08 6.302421e+03 2.284702e+02 1.739468e+06 1.097727e+10 276 +d39bff17 3276800 2.625536e+08 2.553136e+03 1.599096e+02 6.714747e+05 1.721091e+09 263 +2c1922b7 819200 3.287040e+07 6.889531e+02 1.162392e+02 1.198778e+05 8.494121e+07 174 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 7372800 8.856576e+08 6.325409e+03 1.859324e+02 1.688884e+06 1.069211e+10 267 +d39bff17 3276800 2.625536e+08 2.539349e+03 1.556256e+02 6.297585e+05 1.605183e+09 248 +2c1922b7 819200 3.287040e+07 6.837878e+02 1.012279e+02 1.319710e+05 9.221787e+07 193 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 7372800 8.856576e+08 6.297128e+03 2.218514e+02 1.542796e+06 9.727245e+09 245 +d39bff17 3276800 2.625536e+08 2.528040e+03 1.085312e+02 7.255475e+05 1.837594e+09 287 +2c1922b7 819200 3.287040e+07 6.470080e+02 5.924722e+01 1.598110e+05 1.042660e+08 247 diff --git a/tools/perfmodels/sampling/codelets/45/chol_model_trsm.hannibal b/tools/perfmodels/sampling/codelets/45/chol_model_trsm.hannibal new file mode 100644 index 0000000..384b2b2 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/chol_model_trsm.hannibal @@ -0,0 +1,104 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 7372800 8.856576e+08 1.551780e+04 9.258624e+03 5.415867e+08 1.139602e+13 34901 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 7372800 8.856576e+08 1.787309e+04 1.121893e+04 5.782658e+08 1.440761e+13 32354 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb3) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 7372800 8.856576e+08 1.675795e+04 1.012077e+04 5.931309e+08 1.356507e+13 35394 + diff --git a/tools/perfmodels/sampling/codelets/45/chol_model_trsm.hannibal-pitch b/tools/perfmodels/sampling/codelets/45/chol_model_trsm.hannibal-pitch new file mode 100644 index 0000000..384b2b2 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/chol_model_trsm.hannibal-pitch @@ -0,0 +1,104 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 7372800 8.856576e+08 1.551780e+04 9.258624e+03 5.415867e+08 1.139602e+13 34901 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 7372800 8.856576e+08 1.787309e+04 1.121893e+04 5.782658e+08 1.440761e+13 32354 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb3) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 7372800 8.856576e+08 1.675795e+04 1.012077e+04 5.931309e+08 1.356507e+13 35394 + diff --git a/tools/perfmodels/sampling/codelets/45/chol_model_trsm.idgraf b/tools/perfmodels/sampling/codelets/45/chol_model_trsm.idgraf new file mode 100644 index 0000000..0a678cb --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/chol_model_trsm.idgraf @@ -0,0 +1,314 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +9 +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 7372800 8.856576e+08 4.711469e+04 4.337925e+02 3.203799e+06 1.509588e+11 68 +2c1922b7 819200 3.287040e+07 1.979166e+03 8.798869e+01 6.828124e+05 1.354070e+09 345 +d39bff17 3276800 2.625536e+08 1.482664e+04 2.506296e+02 2.298130e+06 3.408328e+10 155 + +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb4) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 7372800 8.856576e+08 6.573848e+03 6.169449e+02 1.360787e+06 9.024393e+09 207 +2c1922b7 819200 3.287040e+07 6.955196e+02 8.976154e+01 1.286711e+05 9.098386e+07 185 +d39bff17 3276800 2.625536e+08 2.647434e+03 2.520462e+02 4.685958e+05 1.251821e+09 177 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 7372800 8.856576e+08 6.555664e+03 6.950469e+02 1.252132e+06 8.300825e+09 191 +2c1922b7 819200 3.287040e+07 6.812499e+02 8.342802e+01 1.273937e+05 8.808853e+07 187 +d39bff17 3276800 2.625536e+08 2.596800e+03 1.668067e+02 5.894736e+05 1.537061e+09 227 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 7372800 8.856576e+08 6.446442e+03 5.413553e+02 1.276395e+06 8.286236e+09 198 +2c1922b7 819200 3.287040e+07 6.941204e+02 8.002896e+01 1.277182e+05 8.983023e+07 184 +d39bff17 3276800 2.625536e+08 2.630763e+03 2.300111e+02 4.603835e+05 1.220418e+09 175 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 7372800 8.856576e+08 6.554622e+03 7.028631e+02 1.238824e+06 8.213390e+09 189 +2c1922b7 819200 3.287040e+07 6.905951e+02 7.284704e+01 1.353566e+05 9.451674e+07 196 +d39bff17 3276800 2.625536e+08 2.623425e+03 2.211699e+02 4.905805e+05 1.296149e+09 187 + +#################### +# COMB_7 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +4 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda4_impl0 (Comb7) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 7372800 8.856576e+08 6.504271e+03 6.367049e+02 9.951534e+05 6.534773e+09 153 +2c1922b7 819200 3.287040e+07 7.029111e+02 9.289767e+01 7.169693e+04 5.127683e+07 102 +d39bff17 3276800 2.625536e+08 2.684586e+03 3.481310e+02 4.080571e+05 1.113886e+09 152 + +#################### +# COMB_8 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +5 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda5_impl0 (Comb8) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 7372800 8.856576e+08 6.618862e+03 8.843940e+02 8.405955e+05 5.663119e+09 127 +2c1922b7 819200 3.287040e+07 7.079333e+02 9.356613e+01 6.796160e+04 4.895273e+07 96 +d39bff17 3276800 2.625536e+08 2.800887e+03 4.371231e+02 3.221020e+05 9.241450e+08 115 + +#################### +# COMB_5 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +6 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda6_impl0 (Comb5) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 7372800 8.856576e+08 6.576395e+03 7.489644e+02 8.878133e+05 5.914339e+09 135 +2c1922b7 819200 3.287040e+07 7.050156e+02 1.025857e+02 8.037177e+04 5.786307e+07 114 +d39bff17 3276800 2.625536e+08 2.645162e+03 2.750078e+02 4.205807e+05 1.124529e+09 159 + +#################### +# COMB_6 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +7 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda7_impl0 (Comb6) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 7372800 8.856576e+08 6.544427e+03 6.576164e+02 9.358531e+05 6.186464e+09 143 +2c1922b7 819200 3.287040e+07 7.150712e+02 1.054194e+02 8.223319e+04 6.008061e+07 115 +d39bff17 3276800 2.625536e+08 2.613530e+03 2.505172e+02 3.972565e+05 1.047781e+09 152 + diff --git a/tools/perfmodels/sampling/codelets/45/chol_model_trsm.mirage b/tools/perfmodels/sampling/codelets/45/chol_model_trsm.mirage new file mode 100644 index 0000000..7e7d4c8 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/chol_model_trsm.mirage @@ -0,0 +1,148 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +4 +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb0) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +2c1922b7 819200 3.287040e+07 1.868713e+03 9.872163e+01 2.535843e+06 4.751987e+09 1357 +1827caf5 8192 3.287040e+04 1.868713e+00 9.872163e-02 2.535843e+03 4.751987e+05 1357 +ff82dda0 7372800 8.856576e+08 4.564676e+04 7.031596e+02 5.687586e+07 2.596815e+12 1246 +d39bff17 3276800 2.625536e+08 1.417018e+04 5.255501e+02 8.232874e+06 1.168218e+11 581 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +2c1922b7 819200 3.287040e+07 6.829247e+02 7.632991e+01 1.420483e+05 9.822018e+07 208 +1827caf5 8192 3.287040e+04 6.829247e-01 7.632991e-02 1.420483e+02 9.822018e+03 208 +ff82dda0 7372800 8.856576e+08 6.393638e+03 3.362739e+02 3.650767e+06 2.340625e+10 571 +d39bff17 3276800 2.625536e+08 2.602204e+03 1.876810e+02 6.895839e+05 1.803772e+09 265 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +2c1922b7 819200 3.287040e+07 6.842707e+02 6.860830e+01 1.211159e+05 8.370922e+07 177 +1827caf5 8192 3.287040e+04 6.842707e-01 6.860830e-02 1.211159e+02 8.370922e+03 177 +ff82dda0 7372800 8.856576e+08 6.416842e+03 2.746569e+02 2.951747e+06 1.897560e+10 460 +d39bff17 3276800 2.625536e+08 2.601751e+03 2.008905e+02 6.114115e+05 1.600224e+09 235 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb3) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +2c1922b7 819200 3.287040e+07 6.827700e+02 7.214633e+01 1.297263e+05 8.956219e+07 190 +1827caf5 8192 3.287040e+04 6.827700e-01 7.214633e-02 1.297263e+02 8.956219e+03 190 +ff82dda0 7372800 8.856576e+08 6.409443e+03 3.434222e+02 3.243178e+06 2.084664e+10 506 +d39bff17 3276800 2.625536e+08 2.604891e+03 2.068197e+02 6.069396e+05 1.590978e+09 233 + diff --git a/tools/perfmodels/sampling/codelets/45/chol_model_trsm.sirocco b/tools/perfmodels/sampling/codelets/45/chol_model_trsm.sirocco new file mode 100644 index 0000000..c6d1f39 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/chol_model_trsm.sirocco @@ -0,0 +1,183 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +5 +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb0) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +0e8bce2b 16588800 2.988058e+09 6.085177e+04 1.761936e+04 4.551712e+07 3.002008e+12 748 +ff82dda0 7372800 8.856576e+08 1.775772e+04 3.736007e+03 2.386637e+07 4.425714e+11 1344 +d39bff17 3276800 2.625536e+08 5.276862e+03 9.789431e+02 7.070995e+06 3.859682e+10 1340 +2c1922b7 819200 3.287040e+07 7.675336e+02 1.464194e+02 2.842177e+06 2.260854e+09 3703 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb3) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +0e8bce2b 16588800 2.988058e+09 5.422549e+03 1.109859e+03 2.917331e+06 1.648207e+10 538 +ff82dda0 7372800 8.856576e+08 2.018325e+03 2.870643e+02 1.687320e+06 3.474450e+09 836 +d39bff17 3276800 2.625536e+08 1.179394e+03 1.705358e+02 3.538181e+05 4.260157e+08 300 +2c1922b7 819200 3.287040e+07 4.644748e+02 7.687001e+01 3.297771e+04 1.573685e+07 71 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +0e8bce2b 16588800 2.988058e+09 5.480822e+03 1.130650e+03 2.899355e+06 1.656711e+10 529 +ff82dda0 7372800 8.856576e+08 2.005118e+03 2.787124e+02 1.836689e+06 3.753933e+09 916 +d39bff17 3276800 2.625536e+08 1.227664e+03 1.874122e+02 2.970946e+05 3.732321e+08 242 +2c1922b7 819200 3.287040e+07 4.209987e+02 9.547071e+01 6.441281e+04 2.851225e+07 153 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +0e8bce2b 16588800 2.988058e+09 5.510320e+03 1.138149e+03 2.992104e+06 1.719084e+10 543 +ff82dda0 7372800 8.856576e+08 2.005118e+03 2.787124e+02 1.836689e+06 3.753933e+09 916 +d39bff17 3276800 2.625536e+08 1.227664e+03 1.874122e+02 2.970946e+05 3.732321e+08 242 +2c1922b7 819200 3.287040e+07 4.209987e+02 9.547071e+01 6.441281e+04 2.851225e+07 153 + +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb4) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +0e8bce2b 16588800 2.988058e+09 5.534879e+03 1.226333e+03 3.210230e+06 1.864049e+10 580 +ff82dda0 7372800 8.856576e+08 2.051755e+03 2.742098e+02 1.811700e+06 3.783559e+09 883 +d39bff17 3276800 2.625536e+08 1.153240e+03 1.913332e+02 3.194475e+05 3.785401e+08 277 +2c1922b7 819200 3.287040e+07 4.950127e+02 6.747714e+01 5.445140e+04 2.745498e+07 110 + diff --git a/tools/perfmodels/sampling/codelets/45/cl_update.attila b/tools/perfmodels/sampling/codelets/45/cl_update.attila new file mode 100644 index 0000000..1aad3b2 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/cl_update.attila @@ -0,0 +1,144 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +4 +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +49ec0825 34613280 0.000000e+00 7.774706e+04 9.102018e+02 4.664824e+06 3.627260e+11 60 +6d78e48f 4461600 0.000000e+00 9.929947e+03 1.596124e+02 1.797320e+06 1.785191e+10 181 +8ec75d42 14753312 0.000000e+00 3.310870e+04 5.189822e+02 1.920304e+06 6.359440e+10 58 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +49ec0825 34613280 0.000000e+00 5.051327e+03 6.437605e+02 2.692357e+06 1.382086e+10 533 +6d78e48f 4461600 0.000000e+00 1.009230e+03 9.724548e+01 3.835076e+05 3.906410e+08 380 +8ec75d42 14753312 0.000000e+00 1.883088e+03 3.340290e+02 1.069594e+06 2.077513e+09 568 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +49ec0825 34613280 0.000000e+00 5.105276e+03 6.922431e+02 2.807902e+06 1.459867e+10 550 +6d78e48f 4461600 0.000000e+00 1.012651e+03 9.766669e+01 3.686049e+05 3.767403e+08 364 +8ec75d42 14753312 0.000000e+00 2.097710e+03 2.383227e+02 9.880215e+05 2.099334e+09 471 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +49ec0825 34613280 0.000000e+00 4.938766e+03 7.348710e+02 2.780525e+06 1.403640e+10 563 +6d78e48f 4461600 0.000000e+00 1.022015e+03 1.088844e+02 3.740577e+05 3.866319e+08 366 +8ec75d42 14753312 0.000000e+00 1.829845e+03 3.604651e+02 8.893047e+05 1.690438e+09 486 + diff --git a/tools/perfmodels/sampling/codelets/45/cl_update.idgraf b/tools/perfmodels/sampling/codelets/45/cl_update.idgraf new file mode 100644 index 0000000..12052f7 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/cl_update.idgraf @@ -0,0 +1,314 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +9 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +8ec75d42 14753312 0.000000e+00 1.774670e+03 3.622348e+02 1.348749e+06 2.493306e+09 760 +6d78e48f 4461600 0.000000e+00 1.036351e+03 9.390524e+01 2.839601e+05 2.966985e+08 274 +49ec0825 34613280 0.000000e+00 4.962997e+03 6.650844e+02 5.096998e+06 2.575067e+10 1027 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +8ec75d42 14753312 0.000000e+00 1.813689e+03 3.729019e+02 1.331248e+06 2.516537e+09 734 +6d78e48f 4461600 0.000000e+00 1.023951e+03 1.005326e+02 3.553110e+05 3.673281e+08 347 +49ec0825 34613280 0.000000e+00 5.017264e+03 7.095917e+02 4.365019e+06 2.233852e+10 870 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +8ec75d42 14753312 0.000000e+00 4.692078e+04 5.010795e+02 1.501465e+06 7.045793e+10 32 +6d78e48f 4461600 0.000000e+00 1.405585e+04 1.896523e+02 7.590156e+05 1.067055e+10 54 +49ec0825 34613280 0.000000e+00 1.108029e+05 1.348959e+03 3.545692e+06 3.929311e+11 32 + +#################### +# COMB_8 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +7 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda7_impl0 (Comb8) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +8ec75d42 14753312 0.000000e+00 1.859413e+03 3.366203e+02 1.309027e+06 2.513795e+09 704 +6d78e48f 4461600 0.000000e+00 1.027564e+03 1.046018e+02 3.483442e+05 3.616551e+08 339 +49ec0825 34613280 0.000000e+00 5.060000e+03 7.405627e+02 4.164380e+06 2.152312e+10 823 + +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +6 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda6_impl0 (Comb4) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +8ec75d42 14753312 0.000000e+00 1.862800e+03 3.783314e+02 8.438483e+05 1.636760e+09 453 +6d78e48f 4461600 0.000000e+00 9.567271e+02 3.148502e+01 5.606421e+05 5.369624e+08 586 +49ec0825 34613280 0.000000e+00 4.965851e+03 6.509733e+02 5.810046e+06 2.934763e+10 1170 + +#################### +# COMB_6 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb6) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +8ec75d42 14753312 0.000000e+00 1.889366e+03 3.958521e+02 1.186522e+06 2.340180e+09 628 +6d78e48f 4461600 0.000000e+00 1.028680e+03 8.044529e+01 2.880303e+05 2.981029e+08 280 +49ec0825 34613280 0.000000e+00 5.035634e+03 7.113130e+02 4.899672e+06 2.516526e+10 973 + +#################### +# COMB_5 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb5) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +8ec75d42 14753312 0.000000e+00 1.814024e+03 3.173708e+02 1.186372e+06 2.217980e+09 654 +6d78e48f 4461600 0.000000e+00 1.025445e+03 7.185494e+01 3.466003e+05 3.571646e+08 338 +49ec0825 34613280 0.000000e+00 5.092715e+03 7.051028e+02 3.513973e+06 1.823871e+10 690 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +4 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda4_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +8ec75d42 14753312 0.000000e+00 1.793350e+03 3.531620e+02 1.388053e+06 2.585801e+09 774 +6d78e48f 4461600 0.000000e+00 1.033622e+03 1.055186e+02 3.783058e+05 3.951004e+08 366 +49ec0825 34613280 0.000000e+00 4.986601e+03 7.025210e+02 5.345636e+06 2.718562e+10 1072 + +#################### +# COMB_7 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +5 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda5_impl0 (Comb7) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +8ec75d42 14753312 0.000000e+00 1.781570e+03 3.261441e+02 1.501864e+06 2.765346e+09 843 +6d78e48f 4461600 0.000000e+00 1.022184e+03 1.018115e+02 3.751415e+05 3.872679e+08 367 +49ec0825 34613280 0.000000e+00 5.102994e+03 7.050225e+02 4.327339e+06 2.250389e+10 848 + diff --git a/tools/perfmodels/sampling/codelets/45/cl_update.mirage b/tools/perfmodels/sampling/codelets/45/cl_update.mirage new file mode 100644 index 0000000..d6f5236 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/cl_update.mirage @@ -0,0 +1,144 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +4 +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +6d78e48f 4461600 0.000000e+00 6.670318e+03 3.279077e+02 6.103341e+06 4.080961e+10 915 +8ec75d42 14753312 0.000000e+00 2.178007e+04 1.559694e+03 1.008417e+07 2.207603e+11 463 +49ec0825 34613280 0.000000e+00 5.101465e+04 2.613713e+03 2.443602e+07 1.249867e+12 479 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +6d78e48f 4461600 0.000000e+00 1.028619e+03 1.201323e+02 5.626547e+05 5.866515e+08 547 +8ec75d42 14753312 0.000000e+00 1.871093e+03 3.437894e+02 1.981488e+06 3.832713e+09 1059 +49ec0825 34613280 0.000000e+00 5.018828e+03 7.664203e+02 4.672528e+06 2.399748e+10 931 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +6d78e48f 4461600 0.000000e+00 1.024201e+03 1.096599e+02 6.452464e+05 6.684377e+08 630 +8ec75d42 14753312 0.000000e+00 1.877457e+03 3.608958e+02 1.907496e+06 3.713572e+09 1016 +49ec0825 34613280 0.000000e+00 5.018101e+03 7.255196e+02 5.314169e+06 2.722447e+10 1059 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +6d78e48f 4461600 0.000000e+00 1.010004e+03 1.090743e+02 5.383321e+05 5.500588e+08 533 +8ec75d42 14753312 0.000000e+00 1.986058e+03 3.264552e+02 1.288952e+06 2.629100e+09 649 +49ec0825 34613280 0.000000e+00 5.064765e+03 7.492118e+02 4.948276e+06 2.561026e+10 977 + diff --git a/tools/perfmodels/sampling/codelets/45/cl_update.sirocco b/tools/perfmodels/sampling/codelets/45/cl_update.sirocco new file mode 100644 index 0000000..2b0f064 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/cl_update.sirocco @@ -0,0 +1,178 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +5 +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +8ec75d42 14753312 0.000000e+00 1.292737e+03 8.111826e+01 1.783977e+06 2.315293e+09 1380 +6d78e48f 4461600 0.000000e+00 7.254397e+02 8.693801e+01 2.717497e+06 1.999693e+09 3746 +49ec0825 34613280 0.000000e+00 2.847204e+03 1.159244e+02 5.255939e+06 1.498954e+10 1846 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +8ec75d42 14753312 0.000000e+00 2.010356e+04 1.739800e+03 4.201644e+06 8.510064e+10 209 +6d78e48f 4461600 0.000000e+00 6.471465e+03 9.708551e+02 2.344612e+07 1.551456e+11 3623 +49ec0825 34613280 0.000000e+00 4.705100e+04 5.067137e+03 1.383299e+07 6.584049e+11 294 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +8ec75d42 14753312 0.000000e+00 1.333639e+03 8.095890e+01 1.871095e+06 2.504561e+09 1403 +6d78e48f 4461600 0.000000e+00 7.466797e+02 9.599725e+01 2.594712e+06 1.969443e+09 3475 +49ec0825 34613280 0.000000e+00 2.914989e+03 1.085303e+02 5.037101e+06 1.470345e+10 1728 + +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb4) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +8ec75d42 14753312 0.000000e+00 1.319437e+03 8.470023e+01 1.921100e+06 2.545216e+09 1456 +6d78e48f 4461600 0.000000e+00 7.342135e+02 9.435309e+01 2.608661e+06 1.946944e+09 3553 +49ec0825 34613280 0.000000e+00 2.843027e+03 1.019747e+02 5.080489e+06 1.446255e+10 1787 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +8ec75d42 14753312 0.000000e+00 1.324805e+03 7.460278e+01 1.748742e+06 2.324088e+09 1320 +6d78e48f 4461600 0.000000e+00 7.321696e+02 8.668478e+01 2.571380e+06 1.909076e+09 3512 +49ec0825 34613280 0.000000e+00 2.873920e+03 1.173279e+02 5.566783e+06 1.602515e+10 1937 + diff --git a/tools/perfmodels/sampling/codelets/45/func.mirage b/tools/perfmodels/sampling/codelets/45/func.mirage new file mode 100644 index 0000000..aff7d32 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/func.mirage @@ -0,0 +1,41 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OpenCL - 2, FPGA - 4, MPI_MS - 5, TCPIP_MS - 6, HIP - 7) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_cores1_impl0 (Comb0) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us or J) dev (us or J) sum sum2 n +2669802c 1 0.000000e+00 2.118378e+02 3.509374e+01 7.202485e+03 1.567632e+06 34 +4cd30058 2 0.000000e+00 3.688151e+02 7.632401e+01 1.143327e+04 4.397349e+06 31 + diff --git a/tools/perfmodels/sampling/codelets/45/log_arr.mirage b/tools/perfmodels/sampling/codelets/45/log_arr.mirage new file mode 100644 index 0000000..0b33584 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/log_arr.mirage @@ -0,0 +1,62 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OpenCL - 2, FPGA - 4, MPI_MS - 5, TCPIP_MS - 6, HIP - 7) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_cores1_impl0 (Comb0) +# number of entries +23 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us or J) dev (us or J) sum sum2 n +06c91a73 225 0.000000e+00 2.857770e+02 5.674783e+01 1.428885e+04 4.244441e+06 50 +bb714cfa 200 0.000000e+00 2.519470e+02 5.692391e+01 1.385708e+04 3.669469e+06 55 +8c736af3 175 0.000000e+00 2.361612e+02 4.646855e+01 1.346119e+04 3.302092e+06 57 +6aebe038 150 0.000000e+00 9.788850e+01 2.193830e+01 2.545101e+03 2.616496e+05 26 +0d619432 125 0.000000e+00 1.261692e+02 2.840256e+01 6.939307e+03 9.198957e+05 55 +5db8a67d 100 0.000000e+00 1.912077e+02 4.355993e+01 8.986764e+03 1.807520e+06 47 +3575f01c 75 0.000000e+00 1.486505e+02 3.245366e+01 8.027128e+03 1.250112e+06 54 +a1b2649e 50 0.000000e+00 1.601281e+02 2.952993e+01 8.486791e+03 1.405191e+06 53 +50d9324f 25 0.000000e+00 5.679895e+01 1.315071e+01 1.135979e+03 6.798124e+04 20 +8e55dcaa 22 0.000000e+00 1.354191e+02 2.833115e+01 1.218772e+03 1.722689e+05 9 +a83c5c86 23 0.000000e+00 1.611343e+02 3.529197e+01 3.544955e+03 5.986154e+05 22 +c286dcf2 20 0.000000e+00 1.669332e+02 3.870204e+01 8.179726e+03 1.438862e+06 49 +7d495c6e 17 0.000000e+00 1.598129e+02 3.241270e+01 3.036446e+03 5.052245e+05 19 +17f3dc1a 18 0.000000e+00 1.605213e+02 3.804432e+01 5.618246e+03 9.525062e+05 35 +de8ceee5 15 0.000000e+00 8.465237e+01 1.742428e+01 5.079142e+03 4.481777e+05 60 +b4366e91 12 0.000000e+00 1.584736e+02 3.363140e+01 3.169472e+03 5.248991e+05 20 +925feebd 13 0.000000e+00 1.141695e+02 1.417130e+01 9.133560e+02 1.058840e+05 8 +61436e79 10 0.000000e+00 1.475159e+02 3.333011e+01 8.555921e+03 1.326566e+06 58 +f31c80c4 7 0.000000e+00 1.253070e+02 3.041314e+01 3.007369e+03 3.990435e+05 24 +2d906e21 8 0.000000e+00 8.466750e+01 1.652881e+01 2.540025e+03 2.232536e+05 30 +bfcf809c 5 0.000000e+00 8.161995e+01 1.779563e+01 3.264798e+03 2.791400e+05 40 +4cd30058 2 0.000000e+00 1.732845e+02 3.197810e+01 4.332113e+03 7.762531e+05 25 +6aba8074 3 0.000000e+00 1.697346e+02 2.859508e+01 3.564427e+03 6.221779e+05 21 + diff --git a/tools/perfmodels/sampling/codelets/45/log_list.mirage b/tools/perfmodels/sampling/codelets/45/log_list.mirage new file mode 100644 index 0000000..7dc6547 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/log_list.mirage @@ -0,0 +1,62 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OpenCL - 2, FPGA - 4, MPI_MS - 5, TCPIP_MS - 6, HIP - 7) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_cores1_impl0 (Comb0) +# number of entries +23 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us or J) dev (us or J) sum sum2 n +06c91a73 225 0.000000e+00 2.019197e+02 4.410081e+01 1.029790e+04 2.178539e+06 51 +bb714cfa 200 0.000000e+00 2.154911e+02 4.319096e+01 1.077456e+04 2.415094e+06 50 +8c736af3 175 0.000000e+00 1.504727e+02 3.433867e+01 7.674106e+03 1.214880e+06 51 +6aebe038 150 0.000000e+00 1.811410e+02 3.948222e+01 9.238192e+03 1.752917e+06 51 +0d619432 125 0.000000e+00 1.395840e+02 2.983957e+01 6.979199e+03 1.018704e+06 50 +5db8a67d 100 0.000000e+00 1.437386e+02 3.298439e+01 8.624317e+03 1.304926e+06 60 +3575f01c 75 0.000000e+00 1.142060e+02 2.420593e+01 5.824505e+03 6.950756e+05 51 +a1b2649e 50 0.000000e+00 5.032226e+01 1.023252e+01 1.559990e+03 8.174806e+04 31 +50d9324f 25 0.000000e+00 1.034515e+02 2.275354e+01 6.413990e+03 6.956354e+05 62 +a83c5c86 23 0.000000e+00 1.001690e+02 2.097800e+01 1.402366e+03 1.466347e+05 14 +8e55dcaa 22 0.000000e+00 1.677354e+02 3.762885e+01 3.690179e+03 6.501242e+05 22 +c286dcf2 20 0.000000e+00 8.453644e+01 2.071754e+01 4.564968e+03 4.090839e+05 54 +17f3dc1a 18 0.000000e+00 9.082029e+01 1.856443e+01 2.542968e+03 2.406029e+05 28 +7d495c6e 17 0.000000e+00 1.579351e+02 3.138662e+01 3.632508e+03 5.963584e+05 23 +de8ceee5 15 0.000000e+00 6.749869e+01 1.578830e+01 3.712428e+03 2.642939e+05 55 +b4366e91 12 0.000000e+00 1.045285e+02 2.047968e+01 2.195099e+03 2.382582e+05 21 +925feebd 13 0.000000e+00 5.843426e+01 1.319529e+01 1.577725e+03 9.689431e+04 27 +61436e79 10 0.000000e+00 8.635067e+01 1.606126e+01 3.367676e+03 3.008617e+05 39 +f31c80c4 7 0.000000e+00 8.667993e+01 1.877054e+01 2.340358e+03 2.123751e+05 27 +2d906e21 8 0.000000e+00 9.561275e+01 1.242246e+01 1.147353e+03 1.115534e+05 12 +bfcf809c 5 0.000000e+00 9.516219e+01 2.051119e+01 4.091974e+03 4.074917e+05 43 +4cd30058 2 0.000000e+00 3.056131e+02 6.553774e+01 4.278584e+03 1.367724e+06 14 +6aba8074 3 0.000000e+00 5.585311e+01 1.149143e+01 1.005356e+03 5.852921e+04 18 + diff --git a/tools/perfmodels/sampling/codelets/45/multi.mirage b/tools/perfmodels/sampling/codelets/45/multi.mirage new file mode 100644 index 0000000..51dadc7 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/multi.mirage @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OpenCL - 2, FPGA - 4, MPI_MS - 5, TCPIP_MS - 6, HIP - 7) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_cores1_impl0 (Comb0) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us or J) dev (us or J) sum sum2 n +50d9324f 25 0.000000e+00 5.623175e+01 1.128390e+01 4.723467e+03 2.763042e+05 84 + diff --git a/tools/perfmodels/sampling/codelets/45/multi_2arr.mirage b/tools/perfmodels/sampling/codelets/45/multi_2arr.mirage new file mode 100644 index 0000000..acdc43b --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/multi_2arr.mirage @@ -0,0 +1,41 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OpenCL - 2, FPGA - 4, MPI_MS - 5, TCPIP_MS - 6, HIP - 7) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_cores1_impl0 (Comb0) +# number of entries +2 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us or J) dev (us or J) sum sum2 n +bfcf809c 5 0.000000e+00 9.442641e+01 1.743873e+01 5.476732e+03 5.347865e+05 58 +50d9324f 25 0.000000e+00 9.340223e+01 2.207403e+01 1.550477e+04 1.529066e+06 166 + diff --git a/tools/perfmodels/sampling/codelets/45/multi_list.mirage b/tools/perfmodels/sampling/codelets/45/multi_list.mirage new file mode 100644 index 0000000..cc18c78 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/multi_list.mirage @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OpenCL - 2, FPGA - 4, MPI_MS - 5, TCPIP_MS - 6, HIP - 7) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_cores1_impl0 (Comb0) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us or J) dev (us or J) sum sum2 n +50d9324f 25 0.000000e+00 1.154125e+02 2.553199e+01 1.131042e+04 1.369248e+06 98 + diff --git a/tools/perfmodels/sampling/codelets/45/null.idgraf b/tools/perfmodels/sampling/codelets/45/null.idgraf new file mode 100644 index 0000000..61ff8f4 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/null.idgraf @@ -0,0 +1,8 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +0 diff --git a/tools/perfmodels/sampling/codelets/45/null.sirocco b/tools/perfmodels/sampling/codelets/45/null.sirocco new file mode 100644 index 0000000..61ff8f4 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/null.sirocco @@ -0,0 +1,8 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +0 diff --git a/tools/perfmodels/sampling/codelets/45/overlap_sleep_1024_24.attila b/tools/perfmodels/sampling/codelets/45/overlap_sleep_1024_24.attila new file mode 100644 index 0000000..ce43951 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/overlap_sleep_1024_24.attila @@ -0,0 +1,136 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +4 +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb1) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +a3d3725e 1024 0.000000e+00 8.457435e+01 8.456846e+01 6.080896e+05 5.145841e+07 7190 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb0) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +a3d3725e 1024 0.000000e+00 8.959785e+01 8.957735e+01 1.959505e+05 1.756781e+07 2187 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +a3d3725e 1024 0.000000e+00 9.097446e+01 8.933429e+01 2.547285e+03 2.318591e+05 28 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb3) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +a3d3725e 1024 0.000000e+00 9.082936e+01 9.075245e+01 5.368015e+04 4.878224e+06 591 + diff --git a/tools/perfmodels/sampling/codelets/45/overlap_sleep_1024_24.mirage b/tools/perfmodels/sampling/codelets/45/overlap_sleep_1024_24.mirage new file mode 100644 index 0000000..ce43951 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/overlap_sleep_1024_24.mirage @@ -0,0 +1,136 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +4 +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb1) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +a3d3725e 1024 0.000000e+00 8.457435e+01 8.456846e+01 6.080896e+05 5.145841e+07 7190 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb0) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +a3d3725e 1024 0.000000e+00 8.959785e+01 8.957735e+01 1.959505e+05 1.756781e+07 2187 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +a3d3725e 1024 0.000000e+00 9.097446e+01 8.933429e+01 2.547285e+03 2.318591e+05 28 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb3) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +a3d3725e 1024 0.000000e+00 9.082936e+01 9.075245e+01 5.368015e+04 4.878224e+06 591 + diff --git a/tools/perfmodels/sampling/codelets/45/overlap_sleep_1024_24.sirocco b/tools/perfmodels/sampling/codelets/45/overlap_sleep_1024_24.sirocco new file mode 100644 index 0000000..99555b8 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/overlap_sleep_1024_24.sirocco @@ -0,0 +1,168 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +4 +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb1) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +a3d3725e 1024 0.000000e+00 8.457435e+01 8.456846e+01 6.080896e+05 5.145841e+07 7190 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb0) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +a3d3725e 1024 0.000000e+00 8.959785e+01 8.957735e+01 1.959505e+05 1.756781e+07 2187 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +a3d3725e 1024 0.000000e+00 9.097446e+01 8.933429e+01 2.547285e+03 2.318591e+05 28 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb3) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +a3d3725e 1024 0.000000e+00 9.082936e+01 9.075245e+01 5.368015e+04 4.878224e+06 591 + +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb4) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +a3d3725e 1024 0.000000e+00 9.097446e+01 8.933429e+01 2.547285e+03 2.318591e+05 28 + diff --git a/tools/perfmodels/sampling/codelets/45/save_cl_bottom.attila b/tools/perfmodels/sampling/codelets/45/save_cl_bottom.attila new file mode 100644 index 0000000..8e61330 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/save_cl_bottom.attila @@ -0,0 +1,144 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +4 +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +f2ff9ae5 34480152 0.000000e+00 4.419323e+01 9.394629e+00 1.825180e+04 8.430572e+05 413 +fb4b8624 4427800 0.000000e+00 1.267467e+01 2.411186e+00 5.754301e+03 7.557335e+04 454 +4af260f6 14678040 0.000000e+00 2.442142e+01 5.135780e+00 1.394463e+04 3.556084e+05 571 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +f2ff9ae5 34480152 0.000000e+00 3.910144e+01 6.371108e+00 1.329449e+03 5.336347e+04 34 +fb4b8624 4427800 0.000000e+00 3.998483e+01 8.150933e+00 2.519044e+03 1.049091e+05 63 +4af260f6 14678040 0.000000e+00 3.398450e+01 5.156207e+00 8.156280e+02 2.835679e+04 24 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +f2ff9ae5 34480152 0.000000e+00 3.173112e+01 3.816078e+00 7.615470e+02 2.451424e+04 24 +fb4b8624 4427800 0.000000e+00 2.860497e+01 5.248990e+00 1.029779e+03 3.044867e+04 36 +4af260f6 14678040 0.000000e+00 3.652883e+01 8.229435e+00 1.716855e+03 6.589771e+04 47 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +f2ff9ae5 34480152 0.000000e+00 3.719045e+01 7.851419e+00 2.566141e+03 9.968943e+04 69 +fb4b8624 4427800 0.000000e+00 4.509905e+01 6.110617e+00 9.470800e+02 4.349654e+04 21 +4af260f6 14678040 0.000000e+00 2.634116e+01 3.746211e+00 6.479926e+03 1.741412e+05 246 + diff --git a/tools/perfmodels/sampling/codelets/45/save_cl_bottom.idgraf b/tools/perfmodels/sampling/codelets/45/save_cl_bottom.idgraf new file mode 100644 index 0000000..8fe36cd --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/save_cl_bottom.idgraf @@ -0,0 +1,314 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +9 +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +4af260f6 14678040 0.000000e+00 2.985465e+01 6.158621e+00 3.552703e+03 1.105782e+05 119 +fb4b8624 4427800 0.000000e+00 1.132689e+01 2.249702e+00 2.423954e+03 2.853894e+04 214 +f2ff9ae5 34480152 0.000000e+00 5.622304e+01 1.121739e+01 9.276802e+03 5.423319e+05 165 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +4af260f6 14678040 0.000000e+00 2.632587e+01 4.244468e+00 1.174134e+04 3.171360e+05 446 +fb4b8624 4427800 0.000000e+00 2.560067e+01 2.946464e+00 6.809779e+03 1.766442e+05 266 +f2ff9ae5 34480152 0.000000e+00 2.687395e+01 3.041318e+00 1.378634e+04 3.752385e+05 513 + +#################### +# COMB_6 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb6) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +4af260f6 14678040 0.000000e+00 2.682825e+01 3.688517e+00 9.309402e+03 2.544759e+05 347 +fb4b8624 4427800 0.000000e+00 2.601287e+01 3.034296e+00 6.711320e+03 1.769561e+05 258 +f2ff9ae5 34480152 0.000000e+00 2.650277e+01 3.250317e+00 1.327789e+04 3.571937e+05 501 + +#################### +# COMB_5 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb5) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +4af260f6 14678040 0.000000e+00 2.844841e+01 4.987346e+00 1.118023e+04 3.278350e+05 393 +fb4b8624 4427800 0.000000e+00 2.545228e+01 3.038424e+00 7.533874e+03 1.944869e+05 296 +f2ff9ae5 34480152 0.000000e+00 2.671593e+01 2.977811e+00 9.510870e+03 2.572485e+05 356 + +#################### +# COMB_7 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +5 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda5_impl0 (Comb7) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +4af260f6 14678040 0.000000e+00 2.683154e+01 3.643920e+00 1.145707e+04 3.130806e+05 427 +fb4b8624 4427800 0.000000e+00 2.439165e+01 2.519213e+00 6.951620e+03 1.713702e+05 285 +f2ff9ae5 34480152 0.000000e+00 2.686670e+01 3.337051e+00 1.332588e+04 3.635460e+05 496 + +#################### +# COMB_8 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +7 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda7_impl0 (Comb8) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +4af260f6 14678040 0.000000e+00 2.743422e+01 5.313897e+00 8.504608e+03 2.420709e+05 310 +fb4b8624 4427800 0.000000e+00 2.591823e+01 3.009457e+00 7.879143e+03 2.069667e+05 304 +f2ff9ae5 34480152 0.000000e+00 2.671837e+01 2.963498e+00 1.282482e+04 3.468736e+05 480 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +4 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda4_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +4af260f6 14678040 0.000000e+00 2.572873e+01 2.716127e+00 1.232406e+04 3.206163e+05 479 +fb4b8624 4427800 0.000000e+00 2.555023e+01 2.989409e+00 6.362008e+03 1.647760e+05 249 +f2ff9ae5 34480152 0.000000e+00 2.648407e+01 2.593556e+00 1.504295e+04 4.022192e+05 568 + +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +6 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda6_impl0 (Comb4) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +4af260f6 14678040 0.000000e+00 2.535931e+01 2.801001e+00 7.303480e+03 1.874707e+05 288 +fb4b8624 4427800 0.000000e+00 2.706983e+01 4.764143e+00 1.228970e+04 3.429847e+05 454 +f2ff9ae5 34480152 0.000000e+00 2.645378e+01 3.704623e+00 1.251264e+04 3.374982e+05 473 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +4af260f6 14678040 0.000000e+00 2.565071e+01 2.881053e+00 1.064505e+04 2.764977e+05 415 +fb4b8624 4427800 0.000000e+00 2.622930e+01 4.388391e+00 8.262230e+03 2.227788e+05 315 +f2ff9ae5 34480152 0.000000e+00 2.628917e+01 2.974884e+00 1.204044e+04 3.205863e+05 458 + diff --git a/tools/perfmodels/sampling/codelets/45/save_cl_bottom.mirage b/tools/perfmodels/sampling/codelets/45/save_cl_bottom.mirage new file mode 100644 index 0000000..26e4b4e --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/save_cl_bottom.mirage @@ -0,0 +1,144 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +4 +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +fb4b8624 4427800 0.000000e+00 1.291871e+01 2.707056e+00 1.452063e+04 1.958246e+05 1124 +4af260f6 14678040 0.000000e+00 2.737328e+01 5.740626e+00 2.241872e+04 6.406639e+05 819 +f2ff9ae5 34480152 0.000000e+00 4.727593e+01 1.006422e+01 4.179193e+04 2.065291e+06 884 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +fb4b8624 4427800 0.000000e+00 4.529760e+01 1.106616e+01 3.940891e+03 1.891669e+05 87 +4af260f6 14678040 0.000000e+00 5.186998e+01 9.163546e+00 2.386019e+03 1.276254e+05 46 +f2ff9ae5 34480152 0.000000e+00 5.555996e+01 1.123960e+01 2.777998e+03 1.606619e+05 50 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +fb4b8624 4427800 0.000000e+00 4.174505e+01 7.717897e+00 1.711547e+03 7.389082e+04 41 +4af260f6 14678040 0.000000e+00 4.338701e+01 9.950636e+00 8.677401e+03 3.962895e+05 200 +f2ff9ae5 34480152 0.000000e+00 5.523574e+01 1.243105e+01 2.154194e+03 1.250152e+05 39 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +fb4b8624 4427800 0.000000e+00 5.145250e+01 1.035300e+01 1.955195e+03 1.046727e+05 38 +4af260f6 14678040 0.000000e+00 4.717138e+01 8.365012e+00 2.217055e+03 1.078703e+05 47 +f2ff9ae5 34480152 0.000000e+00 4.374296e+01 8.852326e+00 3.455694e+03 1.573530e+05 79 + diff --git a/tools/perfmodels/sampling/codelets/45/save_cl_bottom.sirocco b/tools/perfmodels/sampling/codelets/45/save_cl_bottom.sirocco new file mode 100644 index 0000000..5873348 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/save_cl_bottom.sirocco @@ -0,0 +1,178 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +5 +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +4af260f6 14678040 0.000000e+00 3.447447e+01 7.398265e+00 3.237153e+04 1.167387e+06 939 +fb4b8624 4427800 0.000000e+00 5.439097e+01 1.253425e+01 3.094846e+05 1.772711e+07 5690 +f2ff9ae5 34480152 0.000000e+00 5.041329e+01 1.085485e+01 6.226042e+04 3.284270e+06 1235 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +4af260f6 14678040 0.000000e+00 3.176283e+01 6.812714e+00 1.673901e+04 5.561382e+05 527 +fb4b8624 4427800 0.000000e+00 5.311651e+01 1.346481e+01 1.290731e+04 7.296474e+05 243 +f2ff9ae5 34480152 0.000000e+00 4.192896e+01 9.759572e+00 3.207566e+04 1.417765e+06 765 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +4af260f6 14678040 0.000000e+00 3.786531e+01 9.328071e+00 1.991715e+04 7.999380e+05 526 +fb4b8624 4427800 0.000000e+00 5.555598e+01 1.303330e+01 9.444517e+03 5.535768e+05 170 +f2ff9ae5 34480152 0.000000e+00 4.359390e+01 1.022197e+01 2.218929e+04 1.020503e+06 509 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +4af260f6 14678040 0.000000e+00 4.035980e+01 9.947105e+00 1.989738e+04 8.518341e+05 493 +fb4b8624 4427800 0.000000e+00 8.863692e+01 1.909792e+01 1.161144e+04 1.076982e+06 131 +f2ff9ae5 34480152 0.000000e+00 3.838146e+01 9.359960e+00 2.890124e+04 1.175241e+06 753 + +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb4) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +4af260f6 14678040 0.000000e+00 3.505264e+01 8.845541e+00 1.945422e+04 7.253469e+05 555 +fb4b8624 4427800 0.000000e+00 4.717545e+01 1.027132e+01 6.227160e+03 3.076951e+05 132 +f2ff9ae5 34480152 0.000000e+00 3.110432e+01 6.170515e+00 2.370149e+04 7.662320e+05 762 + diff --git a/tools/perfmodels/sampling/codelets/45/save_cl_top.attila b/tools/perfmodels/sampling/codelets/45/save_cl_top.attila new file mode 100644 index 0000000..0318005 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/save_cl_top.attila @@ -0,0 +1,144 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +4 +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +f2ff9ae5 34480152 0.000000e+00 4.346555e+01 9.370422e+00 2.103733e+04 9.568966e+05 484 +fb4b8624 4427800 0.000000e+00 1.080055e+01 2.408554e+00 6.631537e+03 7.518614e+04 614 +4af260f6 14678040 0.000000e+00 2.045608e+01 4.186697e+00 1.294870e+04 2.759751e+05 633 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +f2ff9ae5 34480152 0.000000e+00 3.184284e+01 5.707419e+00 4.840111e+03 1.590742e+05 152 +fb4b8624 4427800 0.000000e+00 3.194475e+01 5.964283e+00 1.150011e+03 3.801743e+04 36 +4af260f6 14678040 0.000000e+00 3.430576e+01 6.297323e+00 5.523228e+03 1.958632e+05 161 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +f2ff9ae5 34480152 0.000000e+00 2.735681e+01 5.823931e+00 3.063963e+03 8.761910e+04 112 +fb4b8624 4427800 0.000000e+00 3.161427e+01 5.733859e+00 2.212999e+03 7.226375e+04 70 +4af260f6 14678040 0.000000e+00 3.666193e+01 6.692591e+00 6.819119e+03 2.583331e+05 186 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +f2ff9ae5 34480152 0.000000e+00 3.900993e+01 8.465923e+00 7.489907e+03 3.059418e+05 192 +fb4b8624 4427800 0.000000e+00 3.364966e+01 7.354940e+00 1.278687e+03 4.508300e+04 38 +4af260f6 14678040 0.000000e+00 2.853135e+01 5.469952e+00 8.730594e+03 2.582513e+05 306 + diff --git a/tools/perfmodels/sampling/codelets/45/save_cl_top.idgraf b/tools/perfmodels/sampling/codelets/45/save_cl_top.idgraf new file mode 100644 index 0000000..d390332 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/save_cl_top.idgraf @@ -0,0 +1,314 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +9 +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +4af260f6 14678040 0.000000e+00 3.246622e+01 6.891153e+00 3.928413e+03 1.332868e+05 121 +fb4b8624 4427800 0.000000e+00 1.139753e+01 2.243693e+00 2.644226e+03 3.130556e+04 232 +f2ff9ae5 34480152 0.000000e+00 5.591168e+01 1.328211e+01 1.241239e+04 7.331618e+05 222 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +4 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda4_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +4af260f6 14678040 0.000000e+00 2.604823e+01 3.286196e+00 1.130493e+04 2.991603e+05 434 +fb4b8624 4427800 0.000000e+00 2.652276e+01 4.354433e+00 7.850738e+03 2.138358e+05 296 +f2ff9ae5 34480152 0.000000e+00 2.714414e+01 3.836601e+00 1.555359e+04 4.306232e+05 573 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +4af260f6 14678040 0.000000e+00 2.599288e+01 3.808778e+00 1.115095e+04 2.960687e+05 429 +fb4b8624 4427800 0.000000e+00 2.539365e+01 2.861737e+00 6.678529e+03 1.717461e+05 263 +f2ff9ae5 34480152 0.000000e+00 2.629746e+01 2.517281e+00 1.159718e+04 3.077710e+05 441 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +4af260f6 14678040 0.000000e+00 2.561750e+01 2.633232e+00 1.155349e+04 2.990988e+05 451 +fb4b8624 4427800 0.000000e+00 2.673210e+01 4.378492e+00 1.031859e+04 2.832378e+05 386 +f2ff9ae5 34480152 0.000000e+00 2.631930e+01 2.903449e+00 1.339652e+04 3.568781e+05 509 + +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +6 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda6_impl0 (Comb4) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +4af260f6 14678040 0.000000e+00 2.607685e+01 3.121182e+00 7.979517e+03 2.110617e+05 306 +fb4b8624 4427800 0.000000e+00 2.621449e+01 3.800716e+00 7.654632e+03 2.048804e+05 292 +f2ff9ae5 34480152 0.000000e+00 2.661811e+01 2.706929e+00 1.810031e+04 4.867788e+05 680 + +#################### +# COMB_5 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb5) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +4af260f6 14678040 0.000000e+00 2.759081e+01 5.463486e+00 5.435390e+03 1.558472e+05 197 +fb4b8624 4427800 0.000000e+00 2.575898e+01 3.723342e+00 7.779212e+03 2.045713e+05 302 +f2ff9ae5 34480152 0.000000e+00 2.684177e+01 3.098778e+00 1.181038e+04 3.212366e+05 440 + +#################### +# COMB_6 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb6) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +4af260f6 14678040 0.000000e+00 2.613306e+01 2.901462e+00 1.100202e+04 2.910606e+05 421 +fb4b8624 4427800 0.000000e+00 2.615768e+01 3.461177e+00 7.010257e+03 1.865826e+05 268 +f2ff9ae5 34480152 0.000000e+00 2.749333e+01 3.923485e+00 1.492888e+04 4.188033e+05 543 + +#################### +# COMB_7 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +5 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda5_impl0 (Comb7) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +4af260f6 14678040 0.000000e+00 2.628755e+01 3.829892e+00 1.025215e+04 2.752243e+05 390 +fb4b8624 4427800 0.000000e+00 2.540957e+01 3.333356e+00 8.258109e+03 2.134461e+05 325 +f2ff9ae5 34480152 0.000000e+00 2.728087e+01 3.903560e+00 1.404965e+04 3.911340e+05 515 + +#################### +# COMB_8 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +7 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda7_impl0 (Comb8) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +4af260f6 14678040 0.000000e+00 2.705248e+01 4.049710e+00 1.163257e+04 3.217418e+05 430 +fb4b8624 4427800 0.000000e+00 2.626990e+01 3.774104e+00 6.908983e+03 1.852444e+05 263 +f2ff9ae5 34480152 0.000000e+00 2.670502e+01 3.597311e+00 1.303205e+04 3.543362e+05 488 + diff --git a/tools/perfmodels/sampling/codelets/45/save_cl_top.mirage b/tools/perfmodels/sampling/codelets/45/save_cl_top.mirage new file mode 100644 index 0000000..618db91 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/save_cl_top.mirage @@ -0,0 +1,144 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +4 +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +fb4b8624 4427800 0.000000e+00 1.161593e+01 2.312881e+00 1.684310e+04 2.034051e+05 1450 +4af260f6 14678040 0.000000e+00 2.793439e+01 6.208645e+00 2.807406e+04 8.229715e+05 1005 +f2ff9ae5 34480152 0.000000e+00 5.388292e+01 1.191766e+01 4.930288e+04 2.786541e+06 915 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +fb4b8624 4427800 0.000000e+00 3.349897e+01 6.495369e+00 7.939257e+03 2.759560e+05 237 +4af260f6 14678040 0.000000e+00 3.814493e+01 8.460348e+00 1.609716e+04 6.442306e+05 422 +f2ff9ae5 34480152 0.000000e+00 3.894616e+01 8.022125e+00 1.339748e+04 5.439182e+05 344 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +fb4b8624 4427800 0.000000e+00 4.505725e+01 1.044174e+01 7.209160e+02 3.422697e+04 16 +4af260f6 14678040 0.000000e+00 3.820932e+01 8.787776e+00 1.138638e+04 4.580788e+05 298 +f2ff9ae5 34480152 0.000000e+00 4.714002e+01 1.060923e+01 1.343491e+04 6.654002e+05 285 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +fb4b8624 4427800 0.000000e+00 2.397521e+01 2.109607e+00 5.754050e+02 1.390227e+04 24 +4af260f6 14678040 0.000000e+00 3.827520e+01 8.943097e+00 9.453975e+03 3.816076e+05 247 +f2ff9ae5 34480152 0.000000e+00 5.567087e+01 1.159966e+01 8.127947e+03 4.721345e+05 146 + diff --git a/tools/perfmodels/sampling/codelets/45/save_cl_top.sirocco b/tools/perfmodels/sampling/codelets/45/save_cl_top.sirocco new file mode 100644 index 0000000..4544970 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/save_cl_top.sirocco @@ -0,0 +1,178 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +5 +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +4af260f6 14678040 0.000000e+00 3.501046e+01 7.466097e+00 2.835847e+04 1.037995e+06 810 +fb4b8624 4427800 0.000000e+00 2.773216e+01 6.482940e+00 1.680846e+05 4.916085e+06 6061 +f2ff9ae5 34480152 0.000000e+00 5.337509e+01 1.160081e+01 6.591824e+04 3.684596e+06 1235 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +4af260f6 14678040 0.000000e+00 3.362211e+01 7.569501e+00 1.795421e+04 6.342550e+05 534 +fb4b8624 4427800 0.000000e+00 3.231969e+01 5.862640e+00 1.877774e+04 6.268602e+05 581 +f2ff9ae5 34480152 0.000000e+00 3.995777e+01 9.699452e+00 3.180638e+04 1.345799e+06 796 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +4af260f6 14678040 0.000000e+00 3.654429e+01 8.110997e+00 1.710273e+04 6.557958e+05 468 +fb4b8624 4427800 0.000000e+00 3.606370e+01 8.402269e+00 2.171034e+04 8.254553e+05 602 +f2ff9ae5 34480152 0.000000e+00 3.192218e+01 6.956874e+00 1.695068e+04 5.668019e+05 531 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +4af260f6 14678040 0.000000e+00 3.551068e+01 7.784366e+00 1.278384e+04 4.757777e+05 360 +fb4b8624 4427800 0.000000e+00 3.460669e+01 7.284566e+00 1.996806e+04 7.216470e+05 577 +f2ff9ae5 34480152 0.000000e+00 3.547098e+01 8.430109e+00 2.805755e+04 1.051443e+06 791 + +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb4) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +4af260f6 14678040 0.000000e+00 3.712899e+01 8.969310e+00 2.030956e+04 7.980787e+05 547 +fb4b8624 4427800 0.000000e+00 3.619911e+01 8.162351e+00 2.287784e+04 8.702638e+05 632 +f2ff9ae5 34480152 0.000000e+00 3.132201e+01 6.179930e+00 2.584066e+04 8.408892e+05 825 + diff --git a/tools/perfmodels/sampling/codelets/45/scal.mirage b/tools/perfmodels/sampling/codelets/45/scal.mirage new file mode 100644 index 0000000..6679bb3 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/scal.mirage @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OpenCL - 2, FPGA - 4, MPI_MS - 5, TCPIP_MS - 6, HIP - 7) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_cores1_impl0 (Comb0) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us or J) dev (us or J) sum sum2 n +50d9324f 25 0.000000e+00 1.236737e+02 2.505671e+01 2.597148e+04 3.343836e+06 210 + diff --git a/tools/perfmodels/sampling/codelets/45/scal_arr.mirage b/tools/perfmodels/sampling/codelets/45/scal_arr.mirage new file mode 100644 index 0000000..c98a8c8 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/scal_arr.mirage @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OpenCL - 2, FPGA - 4, MPI_MS - 5, TCPIP_MS - 6, HIP - 7) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_cores1_impl0 (Comb0) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us or J) dev (us or J) sum sum2 n +50d9324f 25 0.000000e+00 1.147772e+02 2.771885e+01 1.296983e+04 1.575463e+06 113 + diff --git a/tools/perfmodels/sampling/codelets/45/sqrt.mirage b/tools/perfmodels/sampling/codelets/45/sqrt.mirage new file mode 100644 index 0000000..c1550fc --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/sqrt.mirage @@ -0,0 +1,40 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +1 +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OpenCL - 2, FPGA - 4, MPI_MS - 5, TCPIP_MS - 6, HIP - 7) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_cores1_impl0 (Comb0) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us or J) dev (us or J) sum sum2 n +50d9324f 25 0.000000e+00 1.164091e+02 2.495470e+01 1.268860e+04 1.544947e+06 109 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_dgemm_gemm.attila b/tools/perfmodels/sampling/codelets/45/starpu_dgemm_gemm.attila new file mode 100644 index 0000000..5d13801 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_dgemm_gemm.attila @@ -0,0 +1,145 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +4 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +492beed5 66355200 7.077888e+09 6.739553e+05 1.180373e+04 5.391643e+07 3.634841e+13 80 +0b0b0ce8 7372800 2.621440e+08 2.919185e+04 3.106016e+03 2.335348e+06 6.894489e+10 80 +4220e23d 29491200 2.097152e+09 2.058556e+05 4.934163e+03 1.646845e+07 3.392071e+12 80 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +492beed5 66355200 7.077888e+09 2.313178e+04 3.749098e+01 2.451969e+06 5.671856e+10 106 +0b0b0ce8 7372800 2.621440e+08 1.039822e+03 4.572723e+01 9.982293e+04 1.039988e+08 96 +4220e23d 29491200 2.097152e+09 7.017044e+03 9.150160e+00 7.367896e+05 5.170094e+09 105 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +492beed5 66355200 7.077888e+09 2.311999e+04 3.792305e+01 2.427599e+06 5.612623e+10 105 +0b0b0ce8 7372800 2.621440e+08 1.036523e+03 7.589914e+00 8.810443e+04 9.132713e+07 85 +4220e23d 29491200 2.097152e+09 7.017425e+03 3.431116e+01 7.298122e+05 5.121525e+09 104 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +492beed5 66355200 7.077888e+09 2.312215e+04 3.532854e+01 2.427826e+06 5.613668e+10 105 +0b0b0ce8 7372800 2.621440e+08 1.036998e+03 4.343669e+01 1.078478e+05 1.120342e+08 104 +4220e23d 29491200 2.097152e+09 7.010229e+03 3.598880e+01 7.360740e+05 5.160183e+09 105 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_dgemm_gemm.idgraf b/tools/perfmodels/sampling/codelets/45/starpu_dgemm_gemm.idgraf new file mode 100644 index 0000000..59cce02 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_dgemm_gemm.idgraf @@ -0,0 +1,314 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +9 +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +0b0b0ce8 7372800 2.621440e+08 1.052061e+03 3.198115e+01 6.838395e+04 7.201055e+07 65 +4220e23d 29491200 2.097152e+09 7.092203e+03 4.667104e+02 6.028372e+05 4.293959e+09 85 +492beed5 66355200 7.077888e+09 2.348390e+04 1.879558e+03 2.230970e+06 5.272750e+10 95 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +0b0b0ce8 7372800 2.621440e+08 1.052063e+03 4.974434e+01 7.680058e+04 8.097966e+07 73 +4220e23d 29491200 2.097152e+09 7.169429e+03 6.510141e+02 6.165709e+05 4.456910e+09 86 +492beed5 66355200 7.077888e+09 2.369721e+04 2.666656e+03 2.203840e+06 5.288620e+10 93 + +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +4 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda4_impl0 (Comb4) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +0b0b0ce8 7372800 2.621440e+08 1.073351e+03 1.039589e+02 5.796097e+04 6.279609e+07 54 +4220e23d 29491200 2.097152e+09 7.178253e+03 6.674450e+02 6.245080e+05 4.521634e+09 87 +492beed5 66355200 7.077888e+09 2.322028e+04 3.606800e+02 2.252367e+06 5.231319e+10 97 + +#################### +# COMB_7 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +5 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda5_impl0 (Comb7) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +0b0b0ce8 7372800 2.621440e+08 1.047275e+03 4.810046e+01 6.074194e+04 6.374769e+07 58 +4220e23d 29491200 2.097152e+09 7.215871e+03 7.571281e+02 6.277808e+05 4.579858e+09 87 +492beed5 66355200 7.077888e+09 2.323291e+04 1.169036e+03 2.230359e+06 5.194892e+10 96 + +#################### +# COMB_5 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb5) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +0b0b0ce8 7372800 2.621440e+08 1.045464e+03 2.548321e+01 6.168239e+04 6.452506e+07 59 +4220e23d 29491200 2.097152e+09 7.130284e+03 4.158059e+02 5.632924e+05 4.030093e+09 79 +492beed5 66355200 7.077888e+09 2.322391e+04 7.530407e+02 2.090152e+06 4.859253e+10 90 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +7 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda7_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +0b0b0ce8 7372800 2.621440e+08 1.058842e+03 8.984549e+01 6.353054e+04 6.775316e+07 60 +4220e23d 29491200 2.097152e+09 7.197321e+03 6.902584e+02 6.549562e+05 4.757287e+09 91 +492beed5 66355200 7.077888e+09 2.322727e+04 1.128695e+03 2.253045e+06 5.245566e+10 97 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +6 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda6_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +0b0b0ce8 7372800 2.621440e+08 1.063382e+03 9.562944e+01 5.529587e+04 5.927619e+07 52 +4220e23d 29491200 2.097152e+09 7.227464e+03 8.541890e+02 6.287894e+05 4.608031e+09 87 +492beed5 66355200 7.077888e+09 2.322877e+04 9.079114e+02 2.253191e+06 5.241882e+10 97 + +#################### +# COMB_6 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb6) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +0b0b0ce8 7372800 2.621440e+08 1.057961e+03 6.059722e+01 5.289807e+04 5.614771e+07 50 +4220e23d 29491200 2.097152e+09 7.169935e+03 6.166650e+02 5.592549e+05 4.039483e+09 78 +492beed5 66355200 7.077888e+09 2.322622e+04 8.447450e+02 2.090360e+06 4.861539e+10 90 + +#################### +# COMB_8 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb8) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +0b0b0ce8 7372800 2.621440e+08 3.132122e+04 3.995607e+03 8.456730e+05 2.691857e+10 27 +4220e23d 29491200 2.097152e+09 2.241875e+05 7.780157e+03 6.053063e+06 1.358656e+12 27 +492beed5 66355200 7.077888e+09 7.222063e+05 7.344712e+03 1.661074e+07 1.199762e+13 23 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_dgemm_gemm.mirage b/tools/perfmodels/sampling/codelets/45/starpu_dgemm_gemm.mirage new file mode 100644 index 0000000..8b0643f --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_dgemm_gemm.mirage @@ -0,0 +1,145 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +4 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +0b0b0ce8 7372800 2.621440e+08 2.783376e+04 1.016266e+03 2.254534e+06 6.283582e+10 81 +492beed5 66355200 7.077888e+09 7.068870e+05 1.582112e+04 5.725785e+07 4.049511e+13 81 +4220e23d 29491200 2.097152e+09 2.135531e+05 4.787239e+03 1.729780e+07 3.695855e+12 81 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +0b0b0ce8 7372800 2.621440e+08 1.040745e+03 1.710737e+01 1.040745e+05 1.083442e+08 100 +492beed5 66355200 7.077888e+09 2.322675e+04 6.514638e+01 2.438809e+06 5.664606e+10 105 +4220e23d 29491200 2.097152e+09 7.042883e+03 4.736092e+01 7.395027e+05 5.208467e+09 105 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +0b0b0ce8 7372800 2.621440e+08 1.057967e+03 4.209841e+01 1.057967e+05 1.121067e+08 100 +492beed5 66355200 7.077888e+09 2.322865e+04 8.861437e+01 2.439008e+06 5.665569e+10 105 +4220e23d 29491200 2.097152e+09 7.053091e+03 5.410169e+01 7.405746e+05 5.223647e+09 105 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +0b0b0ce8 7372800 2.621440e+08 1.050834e+03 7.708100e+01 1.019309e+05 1.076889e+08 97 +492beed5 66355200 7.077888e+09 2.323864e+04 5.619683e+01 2.440057e+06 5.670394e+10 105 +4220e23d 29491200 2.097152e+09 7.040571e+03 3.296604e+01 7.392600e+05 5.204926e+09 105 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_dgemm_gemm.sirocco b/tools/perfmodels/sampling/codelets/45/starpu_dgemm_gemm.sirocco new file mode 100644 index 0000000..c856004 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_dgemm_gemm.sirocco @@ -0,0 +1,183 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +5 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb2) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +492beed5 66355200 7.077888e+09 6.467396e+03 5.820387e+02 1.403425e+06 9.150018e+09 217 +0b0b0ce8 7372800 2.621440e+08 2.828637e+02 4.132770e+01 2.376055e+04 6.864469e+06 84 +4220e23d 29491200 2.097152e+09 2.091138e+03 2.430963e+02 3.764048e+05 7.977516e+08 180 +87a7dc42 149299200 2.388787e+10 2.171545e+04 9.979353e+02 3.431041e+06 7.466394e+10 158 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb0) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +492beed5 66355200 7.077888e+09 6.530201e+03 6.982602e+02 1.214617e+06 8.022384e+09 186 +0b0b0ce8 7372800 2.621440e+08 2.596160e+02 3.720670e+01 2.907699e+04 7.703898e+06 112 +4220e23d 29491200 2.097152e+09 2.068075e+03 2.561461e+02 4.156832e+05 8.728519e+08 201 +87a7dc42 149299200 2.388787e+10 2.178854e+04 1.485331e+03 3.355435e+06 7.344977e+10 154 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb1) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +492beed5 66355200 7.077888e+09 6.594324e+03 6.341124e+02 1.384808e+06 9.216313e+09 210 +0b0b0ce8 7372800 2.621440e+08 2.592059e+02 3.728165e+01 2.773503e+04 7.337807e+06 107 +4220e23d 29491200 2.097152e+09 2.149687e+03 2.853500e+02 3.847940e+05 8.417616e+08 179 +87a7dc42 149299200 2.388787e+10 2.210351e+04 9.525598e+02 3.426044e+06 7.586825e+10 155 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb3) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +492beed5 66355200 7.077888e+09 6.615698e+03 6.959563e+02 1.210673e+06 8.098082e+09 183 +0b0b0ce8 7372800 2.621440e+08 2.665077e+02 3.721734e+01 3.278045e+04 8.906615e+06 123 +4220e23d 29491200 2.097152e+09 2.090283e+03 2.730830e+02 4.285080e+05 9.109906e+08 205 +87a7dc42 149299200 2.388787e+10 2.206407e+04 1.175820e+03 3.397867e+06 7.518367e+10 154 + +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb4) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +492beed5 66355200 7.077888e+09 2.793361e+05 4.545353e+04 2.039154e+07 5.846913e+12 73 +0b0b0ce8 7372800 2.621440e+08 1.003329e+04 9.763114e+02 3.471519e+06 3.516056e+10 346 +4220e23d 29491200 2.097152e+09 8.266143e+04 1.577004e+04 6.860899e+06 5.877733e+11 83 +87a7dc42 149299200 2.388787e+10 9.422627e+05 1.729617e+05 1.517043e+08 1.477617e+14 161 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm.attila b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm.attila new file mode 100644 index 0000000..e9b7aeb --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm.attila @@ -0,0 +1,145 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +4 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 22118400 0.000000e+00 1.687897e+05 8.023245e+03 3.110794e+08 5.262564e+13 1843 +f0ac7beb 9830400 0.000000e+00 5.125521e+04 2.656019e+03 7.375625e+07 3.790543e+12 1439 +d46431bb 2457600 0.000000e+00 6.821106e+03 3.878220e+02 1.100926e+07 7.533811e+10 1614 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 22118400 0.000000e+00 5.852261e+03 2.067645e+02 2.783335e+07 1.630914e+11 4756 +f0ac7beb 9830400 0.000000e+00 1.831142e+03 6.447275e+01 9.274735e+06 1.700441e+10 5065 +d46431bb 2457600 0.000000e+00 2.556618e+02 1.838263e+01 1.252232e+06 3.218030e+08 4898 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 22118400 0.000000e+00 5.846675e+03 2.141855e+02 2.732736e+07 1.599886e+11 4674 +f0ac7beb 9830400 0.000000e+00 1.834114e+03 5.380375e+01 9.566740e+06 1.756159e+10 5216 +d46431bb 2457600 0.000000e+00 2.610266e+02 2.020042e+01 1.246402e+06 3.272925e+08 4775 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 22118400 0.000000e+00 5.840318e+03 1.756302e+02 2.921911e+07 1.708032e+11 5003 +f0ac7beb 9830400 0.000000e+00 1.840833e+03 4.879997e+01 9.542881e+06 1.757920e+10 5184 +d46431bb 2457600 0.000000e+00 2.617920e+02 1.853601e+01 1.281472e+06 3.371609e+08 4895 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm.idgraf b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm.idgraf new file mode 100644 index 0000000..cda393e --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm.idgraf @@ -0,0 +1,314 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +9 +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 22118400 0.000000e+00 5.901996e+03 2.140574e+02 2.574451e+07 1.521439e+11 4362 +f0ac7beb 9830400 0.000000e+00 1.855425e+03 1.035707e+02 7.464374e+06 1.389274e+10 4023 +d46431bb 2457600 0.000000e+00 2.667843e+02 3.133790e+01 9.321442e+05 2.521128e+08 3494 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +4 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda4_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 22118400 0.000000e+00 5.924812e+03 2.030546e+02 2.715934e+07 1.611030e+11 4584 +f0ac7beb 9830400 0.000000e+00 1.850857e+03 1.138114e+02 6.774137e+06 1.258537e+10 3660 +d46431bb 2457600 0.000000e+00 2.743267e+02 3.237528e+01 7.903352e+05 2.198298e+08 2881 + +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb4) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 22118400 0.000000e+00 5.905959e+03 2.324106e+02 2.885061e+07 1.706544e+11 4885 +f0ac7beb 9830400 0.000000e+00 1.844033e+03 9.904039e+01 7.516278e+06 1.390024e+10 4076 +d46431bb 2457600 0.000000e+00 2.662813e+02 2.651200e+01 1.098144e+06 2.953140e+08 4124 + +#################### +# COMB_5 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb5) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 22118400 0.000000e+00 5.905860e+03 2.088868e+02 2.619840e+07 1.549176e+11 4436 +f0ac7beb 9830400 0.000000e+00 1.843182e+03 9.714398e+01 7.671323e+06 1.417892e+10 4162 +d46431bb 2457600 0.000000e+00 2.666213e+02 3.154593e+01 1.003829e+06 2.713890e+08 3765 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 22118400 0.000000e+00 5.916743e+03 2.291447e+02 2.686793e+07 1.592091e+11 4541 +f0ac7beb 9830400 0.000000e+00 1.837574e+03 9.255327e+01 7.197777e+06 1.326000e+10 3917 +d46431bb 2457600 0.000000e+00 2.645367e+02 2.904285e+01 1.078252e+06 2.886751e+08 4076 + +#################### +# COMB_8 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb8) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 22118400 0.000000e+00 1.776444e+05 1.603881e+03 1.085407e+08 1.928322e+13 611 +f0ac7beb 9830400 0.000000e+00 5.438487e+04 1.553469e+03 1.598915e+07 8.702776e+11 294 +d46431bb 2457600 0.000000e+00 6.892168e+03 1.879454e+02 4.218007e+06 2.909283e+10 612 + +#################### +# COMB_6 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +6 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda6_impl0 (Comb6) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 22118400 0.000000e+00 5.908798e+03 2.191022e+02 2.432652e+07 1.439381e+11 4117 +f0ac7beb 9830400 0.000000e+00 1.870298e+03 1.158137e+02 6.306645e+06 1.184053e+10 3372 +d46431bb 2457600 0.000000e+00 2.622005e+02 3.221213e+01 7.630036e+05 2.030794e+08 2910 + +#################### +# COMB_7 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +5 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda5_impl0 (Comb7) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +# not multiple-regression-base +0 +nan nan nan +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 22118400 0.000000e+00 5.908890e+03 2.133901e+02 2.731089e+07 1.615875e+11 4622 +f0ac7beb 9830400 0.000000e+00 1.853662e+03 1.234628e+02 6.493379e+06 1.208993e+10 3503 +d46431bb 2457600 0.000000e+00 2.672063e+02 3.249622e+01 7.262666e+05 1.969332e+08 2718 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +7 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda7_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 22118400 0.000000e+00 5.908429e+03 2.149126e+02 2.371053e+07 1.402773e+11 4013 +f0ac7beb 9830400 0.000000e+00 1.855601e+03 1.161756e+02 6.509447e+06 1.212628e+10 3508 +d46431bb 2457600 0.000000e+00 2.697690e+02 3.186509e+01 7.013994e+05 1.918558e+08 2600 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm.mirage b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm.mirage new file mode 100644 index 0000000..208f9d0 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm.mirage @@ -0,0 +1,144 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +4 +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +f0ac7beb 9830400 0.000000e+00 5.319005e+04 1.072845e+03 1.074439e+08 5.717271e+12 2020 +24c84a50 22118400 0.000000e+00 1.747556e+05 3.288616e+03 2.457064e+08 4.295378e+13 1406 +d46431bb 2457600 0.000000e+00 6.731248e+03 2.017842e+02 1.758875e+07 1.185006e+11 2613 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +f0ac7beb 9830400 0.000000e+00 1.857771e+03 5.953793e+01 1.442559e+07 2.682697e+10 7765 +24c84a50 22118400 0.000000e+00 5.825821e+03 1.536397e+02 3.023019e+07 1.762382e+11 5189 +d46431bb 2457600 0.000000e+00 2.626388e+02 2.130047e+01 1.891262e+06 4.999858e+08 7201 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +f0ac7beb 9830400 0.000000e+00 1.841710e+03 6.898710e+01 1.448873e+07 2.672149e+10 7867 +24c84a50 22118400 0.000000e+00 5.866678e+03 1.842980e+02 2.977339e+07 1.748433e+11 5075 +d46431bb 2457600 0.000000e+00 2.614108e+02 2.029949e+01 1.936531e+06 5.092829e+08 7408 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +f0ac7beb 9830400 0.000000e+00 1.853277e+03 6.983878e+01 1.439996e+07 2.672502e+10 7770 +24c84a50 22118400 0.000000e+00 5.858635e+03 1.761006e+02 3.008995e+07 1.764453e+11 5136 +d46431bb 2457600 0.000000e+00 2.701366e+02 1.779276e+01 1.899060e+06 5.152311e+08 7030 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm.sirocco b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm.sirocco new file mode 100644 index 0000000..dc2e940 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm.sirocco @@ -0,0 +1,183 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +5 +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb0) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 22118400 0.000000e+00 1.754881e+03 1.567907e+02 8.516439e+06 1.506464e+10 4853 +d46431bb 2457600 0.000000e+00 9.227862e+01 1.339393e+01 6.585925e+05 6.205436e+07 7137 +f0ac7beb 9830400 0.000000e+00 5.560171e+02 4.481480e+01 2.452035e+06 1.372230e+09 4410 +8cfc3ba0 49766400 0.000000e+00 5.688840e+03 4.278238e+02 1.962081e+07 1.122509e+11 3449 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb3) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 22118400 0.000000e+00 1.725012e+03 1.556789e+02 8.775134e+06 1.526049e+10 5087 +d46431bb 2457600 0.000000e+00 9.099306e+01 1.290433e+01 7.117477e+05 6.606663e+07 7822 +f0ac7beb 9830400 0.000000e+00 5.497124e+02 4.364744e+01 2.308242e+06 1.276869e+09 4199 +8cfc3ba0 49766400 0.000000e+00 5.591076e+03 4.188165e+02 1.997692e+07 1.123192e+11 3573 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb2) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 22118400 0.000000e+00 1.724207e+03 1.559700e+02 8.529651e+06 1.482723e+10 4947 +d46431bb 2457600 0.000000e+00 9.395983e+01 1.410875e+01 5.884704e+05 5.653928e+07 6263 +f0ac7beb 9830400 0.000000e+00 5.531811e+02 3.935565e+01 3.264875e+06 1.815209e+09 5902 +8cfc3ba0 49766400 0.000000e+00 5.682607e+03 4.627422e+02 2.006529e+07 1.147792e+11 3531 + +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb4) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 22118400 0.000000e+00 6.659236e+04 1.142300e+04 1.507651e+08 1.033522e+13 2264 +d46431bb 2457600 0.000000e+00 3.623237e+03 8.721045e+02 1.668138e+07 6.394225e+10 4604 +f0ac7beb 9830400 0.000000e+00 2.355764e+04 4.984182e+03 4.405279e+07 1.084235e+12 1870 +8cfc3ba0 49766400 0.000000e+00 2.164742e+05 3.673582e+04 3.325044e+08 7.405148e+13 1536 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb1) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 22118400 0.000000e+00 1.746747e+03 1.539679e+02 8.628932e+06 1.518967e+10 4940 +d46431bb 2457600 0.000000e+00 9.539483e+01 1.447066e+01 7.032507e+05 6.863017e+07 7372 +f0ac7beb 9830400 0.000000e+00 5.601014e+02 3.783630e+01 3.218342e+06 1.810824e+09 5746 +8cfc3ba0 49766400 0.000000e+00 5.710157e+03 4.302245e+02 2.027106e+07 1.164080e+11 3550 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_atlas.attila b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_atlas.attila new file mode 100644 index 0000000..e9b7aeb --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_atlas.attila @@ -0,0 +1,145 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +4 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 22118400 0.000000e+00 1.687897e+05 8.023245e+03 3.110794e+08 5.262564e+13 1843 +f0ac7beb 9830400 0.000000e+00 5.125521e+04 2.656019e+03 7.375625e+07 3.790543e+12 1439 +d46431bb 2457600 0.000000e+00 6.821106e+03 3.878220e+02 1.100926e+07 7.533811e+10 1614 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 22118400 0.000000e+00 5.852261e+03 2.067645e+02 2.783335e+07 1.630914e+11 4756 +f0ac7beb 9830400 0.000000e+00 1.831142e+03 6.447275e+01 9.274735e+06 1.700441e+10 5065 +d46431bb 2457600 0.000000e+00 2.556618e+02 1.838263e+01 1.252232e+06 3.218030e+08 4898 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 22118400 0.000000e+00 5.846675e+03 2.141855e+02 2.732736e+07 1.599886e+11 4674 +f0ac7beb 9830400 0.000000e+00 1.834114e+03 5.380375e+01 9.566740e+06 1.756159e+10 5216 +d46431bb 2457600 0.000000e+00 2.610266e+02 2.020042e+01 1.246402e+06 3.272925e+08 4775 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 22118400 0.000000e+00 5.840318e+03 1.756302e+02 2.921911e+07 1.708032e+11 5003 +f0ac7beb 9830400 0.000000e+00 1.840833e+03 4.879997e+01 9.542881e+06 1.757920e+10 5184 +d46431bb 2457600 0.000000e+00 2.617920e+02 1.853601e+01 1.281472e+06 3.371609e+08 4895 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_atlas.idgraf b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_atlas.idgraf new file mode 100644 index 0000000..cda393e --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_atlas.idgraf @@ -0,0 +1,314 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +9 +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 22118400 0.000000e+00 5.901996e+03 2.140574e+02 2.574451e+07 1.521439e+11 4362 +f0ac7beb 9830400 0.000000e+00 1.855425e+03 1.035707e+02 7.464374e+06 1.389274e+10 4023 +d46431bb 2457600 0.000000e+00 2.667843e+02 3.133790e+01 9.321442e+05 2.521128e+08 3494 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +4 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda4_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 22118400 0.000000e+00 5.924812e+03 2.030546e+02 2.715934e+07 1.611030e+11 4584 +f0ac7beb 9830400 0.000000e+00 1.850857e+03 1.138114e+02 6.774137e+06 1.258537e+10 3660 +d46431bb 2457600 0.000000e+00 2.743267e+02 3.237528e+01 7.903352e+05 2.198298e+08 2881 + +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb4) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 22118400 0.000000e+00 5.905959e+03 2.324106e+02 2.885061e+07 1.706544e+11 4885 +f0ac7beb 9830400 0.000000e+00 1.844033e+03 9.904039e+01 7.516278e+06 1.390024e+10 4076 +d46431bb 2457600 0.000000e+00 2.662813e+02 2.651200e+01 1.098144e+06 2.953140e+08 4124 + +#################### +# COMB_5 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb5) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 22118400 0.000000e+00 5.905860e+03 2.088868e+02 2.619840e+07 1.549176e+11 4436 +f0ac7beb 9830400 0.000000e+00 1.843182e+03 9.714398e+01 7.671323e+06 1.417892e+10 4162 +d46431bb 2457600 0.000000e+00 2.666213e+02 3.154593e+01 1.003829e+06 2.713890e+08 3765 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 22118400 0.000000e+00 5.916743e+03 2.291447e+02 2.686793e+07 1.592091e+11 4541 +f0ac7beb 9830400 0.000000e+00 1.837574e+03 9.255327e+01 7.197777e+06 1.326000e+10 3917 +d46431bb 2457600 0.000000e+00 2.645367e+02 2.904285e+01 1.078252e+06 2.886751e+08 4076 + +#################### +# COMB_8 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb8) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 22118400 0.000000e+00 1.776444e+05 1.603881e+03 1.085407e+08 1.928322e+13 611 +f0ac7beb 9830400 0.000000e+00 5.438487e+04 1.553469e+03 1.598915e+07 8.702776e+11 294 +d46431bb 2457600 0.000000e+00 6.892168e+03 1.879454e+02 4.218007e+06 2.909283e+10 612 + +#################### +# COMB_6 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +6 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda6_impl0 (Comb6) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 22118400 0.000000e+00 5.908798e+03 2.191022e+02 2.432652e+07 1.439381e+11 4117 +f0ac7beb 9830400 0.000000e+00 1.870298e+03 1.158137e+02 6.306645e+06 1.184053e+10 3372 +d46431bb 2457600 0.000000e+00 2.622005e+02 3.221213e+01 7.630036e+05 2.030794e+08 2910 + +#################### +# COMB_7 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +5 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda5_impl0 (Comb7) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +# not multiple-regression-base +0 +nan nan nan +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 22118400 0.000000e+00 5.908890e+03 2.133901e+02 2.731089e+07 1.615875e+11 4622 +f0ac7beb 9830400 0.000000e+00 1.853662e+03 1.234628e+02 6.493379e+06 1.208993e+10 3503 +d46431bb 2457600 0.000000e+00 2.672063e+02 3.249622e+01 7.262666e+05 1.969332e+08 2718 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +7 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda7_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 22118400 0.000000e+00 5.908429e+03 2.149126e+02 2.371053e+07 1.402773e+11 4013 +f0ac7beb 9830400 0.000000e+00 1.855601e+03 1.161756e+02 6.509447e+06 1.212628e+10 3508 +d46431bb 2457600 0.000000e+00 2.697690e+02 3.186509e+01 7.013994e+05 1.918558e+08 2600 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_atlas.mirage b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_atlas.mirage new file mode 100644 index 0000000..208f9d0 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_atlas.mirage @@ -0,0 +1,144 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +4 +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +f0ac7beb 9830400 0.000000e+00 5.319005e+04 1.072845e+03 1.074439e+08 5.717271e+12 2020 +24c84a50 22118400 0.000000e+00 1.747556e+05 3.288616e+03 2.457064e+08 4.295378e+13 1406 +d46431bb 2457600 0.000000e+00 6.731248e+03 2.017842e+02 1.758875e+07 1.185006e+11 2613 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +f0ac7beb 9830400 0.000000e+00 1.857771e+03 5.953793e+01 1.442559e+07 2.682697e+10 7765 +24c84a50 22118400 0.000000e+00 5.825821e+03 1.536397e+02 3.023019e+07 1.762382e+11 5189 +d46431bb 2457600 0.000000e+00 2.626388e+02 2.130047e+01 1.891262e+06 4.999858e+08 7201 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +f0ac7beb 9830400 0.000000e+00 1.841710e+03 6.898710e+01 1.448873e+07 2.672149e+10 7867 +24c84a50 22118400 0.000000e+00 5.866678e+03 1.842980e+02 2.977339e+07 1.748433e+11 5075 +d46431bb 2457600 0.000000e+00 2.614108e+02 2.029949e+01 1.936531e+06 5.092829e+08 7408 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +f0ac7beb 9830400 0.000000e+00 1.853277e+03 6.983878e+01 1.439996e+07 2.672502e+10 7770 +24c84a50 22118400 0.000000e+00 5.858635e+03 1.761006e+02 3.008995e+07 1.764453e+11 5136 +d46431bb 2457600 0.000000e+00 2.701366e+02 1.779276e+01 1.899060e+06 5.152311e+08 7030 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_atlas.sirocco b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_atlas.sirocco new file mode 100644 index 0000000..dc2e940 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_atlas.sirocco @@ -0,0 +1,183 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +5 +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb0) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 22118400 0.000000e+00 1.754881e+03 1.567907e+02 8.516439e+06 1.506464e+10 4853 +d46431bb 2457600 0.000000e+00 9.227862e+01 1.339393e+01 6.585925e+05 6.205436e+07 7137 +f0ac7beb 9830400 0.000000e+00 5.560171e+02 4.481480e+01 2.452035e+06 1.372230e+09 4410 +8cfc3ba0 49766400 0.000000e+00 5.688840e+03 4.278238e+02 1.962081e+07 1.122509e+11 3449 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb3) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 22118400 0.000000e+00 1.725012e+03 1.556789e+02 8.775134e+06 1.526049e+10 5087 +d46431bb 2457600 0.000000e+00 9.099306e+01 1.290433e+01 7.117477e+05 6.606663e+07 7822 +f0ac7beb 9830400 0.000000e+00 5.497124e+02 4.364744e+01 2.308242e+06 1.276869e+09 4199 +8cfc3ba0 49766400 0.000000e+00 5.591076e+03 4.188165e+02 1.997692e+07 1.123192e+11 3573 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb2) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 22118400 0.000000e+00 1.724207e+03 1.559700e+02 8.529651e+06 1.482723e+10 4947 +d46431bb 2457600 0.000000e+00 9.395983e+01 1.410875e+01 5.884704e+05 5.653928e+07 6263 +f0ac7beb 9830400 0.000000e+00 5.531811e+02 3.935565e+01 3.264875e+06 1.815209e+09 5902 +8cfc3ba0 49766400 0.000000e+00 5.682607e+03 4.627422e+02 2.006529e+07 1.147792e+11 3531 + +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb4) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 22118400 0.000000e+00 6.659236e+04 1.142300e+04 1.507651e+08 1.033522e+13 2264 +d46431bb 2457600 0.000000e+00 3.623237e+03 8.721045e+02 1.668138e+07 6.394225e+10 4604 +f0ac7beb 9830400 0.000000e+00 2.355764e+04 4.984182e+03 4.405279e+07 1.084235e+12 1870 +8cfc3ba0 49766400 0.000000e+00 2.164742e+05 3.673582e+04 3.325044e+08 7.405148e+13 1536 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb1) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 22118400 0.000000e+00 1.746747e+03 1.539679e+02 8.628932e+06 1.518967e+10 4940 +d46431bb 2457600 0.000000e+00 9.539483e+01 1.447066e+01 7.032507e+05 6.863017e+07 7372 +f0ac7beb 9830400 0.000000e+00 5.601014e+02 3.783630e+01 3.218342e+06 1.810824e+09 5746 +8cfc3ba0 49766400 0.000000e+00 5.710157e+03 4.302245e+02 2.027106e+07 1.164080e+11 3550 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_goto.attila b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_goto.attila new file mode 100644 index 0000000..e9b7aeb --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_goto.attila @@ -0,0 +1,145 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +4 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 22118400 0.000000e+00 1.687897e+05 8.023245e+03 3.110794e+08 5.262564e+13 1843 +f0ac7beb 9830400 0.000000e+00 5.125521e+04 2.656019e+03 7.375625e+07 3.790543e+12 1439 +d46431bb 2457600 0.000000e+00 6.821106e+03 3.878220e+02 1.100926e+07 7.533811e+10 1614 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 22118400 0.000000e+00 5.852261e+03 2.067645e+02 2.783335e+07 1.630914e+11 4756 +f0ac7beb 9830400 0.000000e+00 1.831142e+03 6.447275e+01 9.274735e+06 1.700441e+10 5065 +d46431bb 2457600 0.000000e+00 2.556618e+02 1.838263e+01 1.252232e+06 3.218030e+08 4898 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 22118400 0.000000e+00 5.846675e+03 2.141855e+02 2.732736e+07 1.599886e+11 4674 +f0ac7beb 9830400 0.000000e+00 1.834114e+03 5.380375e+01 9.566740e+06 1.756159e+10 5216 +d46431bb 2457600 0.000000e+00 2.610266e+02 2.020042e+01 1.246402e+06 3.272925e+08 4775 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 22118400 0.000000e+00 5.840318e+03 1.756302e+02 2.921911e+07 1.708032e+11 5003 +f0ac7beb 9830400 0.000000e+00 1.840833e+03 4.879997e+01 9.542881e+06 1.757920e+10 5184 +d46431bb 2457600 0.000000e+00 2.617920e+02 1.853601e+01 1.281472e+06 3.371609e+08 4895 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_goto.idgraf b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_goto.idgraf new file mode 100644 index 0000000..cda393e --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_goto.idgraf @@ -0,0 +1,314 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +9 +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 22118400 0.000000e+00 5.901996e+03 2.140574e+02 2.574451e+07 1.521439e+11 4362 +f0ac7beb 9830400 0.000000e+00 1.855425e+03 1.035707e+02 7.464374e+06 1.389274e+10 4023 +d46431bb 2457600 0.000000e+00 2.667843e+02 3.133790e+01 9.321442e+05 2.521128e+08 3494 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +4 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda4_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 22118400 0.000000e+00 5.924812e+03 2.030546e+02 2.715934e+07 1.611030e+11 4584 +f0ac7beb 9830400 0.000000e+00 1.850857e+03 1.138114e+02 6.774137e+06 1.258537e+10 3660 +d46431bb 2457600 0.000000e+00 2.743267e+02 3.237528e+01 7.903352e+05 2.198298e+08 2881 + +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb4) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 22118400 0.000000e+00 5.905959e+03 2.324106e+02 2.885061e+07 1.706544e+11 4885 +f0ac7beb 9830400 0.000000e+00 1.844033e+03 9.904039e+01 7.516278e+06 1.390024e+10 4076 +d46431bb 2457600 0.000000e+00 2.662813e+02 2.651200e+01 1.098144e+06 2.953140e+08 4124 + +#################### +# COMB_5 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb5) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 22118400 0.000000e+00 5.905860e+03 2.088868e+02 2.619840e+07 1.549176e+11 4436 +f0ac7beb 9830400 0.000000e+00 1.843182e+03 9.714398e+01 7.671323e+06 1.417892e+10 4162 +d46431bb 2457600 0.000000e+00 2.666213e+02 3.154593e+01 1.003829e+06 2.713890e+08 3765 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 22118400 0.000000e+00 5.916743e+03 2.291447e+02 2.686793e+07 1.592091e+11 4541 +f0ac7beb 9830400 0.000000e+00 1.837574e+03 9.255327e+01 7.197777e+06 1.326000e+10 3917 +d46431bb 2457600 0.000000e+00 2.645367e+02 2.904285e+01 1.078252e+06 2.886751e+08 4076 + +#################### +# COMB_8 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb8) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 22118400 0.000000e+00 1.776444e+05 1.603881e+03 1.085407e+08 1.928322e+13 611 +f0ac7beb 9830400 0.000000e+00 5.438487e+04 1.553469e+03 1.598915e+07 8.702776e+11 294 +d46431bb 2457600 0.000000e+00 6.892168e+03 1.879454e+02 4.218007e+06 2.909283e+10 612 + +#################### +# COMB_6 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +6 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda6_impl0 (Comb6) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 22118400 0.000000e+00 5.908798e+03 2.191022e+02 2.432652e+07 1.439381e+11 4117 +f0ac7beb 9830400 0.000000e+00 1.870298e+03 1.158137e+02 6.306645e+06 1.184053e+10 3372 +d46431bb 2457600 0.000000e+00 2.622005e+02 3.221213e+01 7.630036e+05 2.030794e+08 2910 + +#################### +# COMB_7 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +5 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda5_impl0 (Comb7) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +# not multiple-regression-base +0 +nan nan nan +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 22118400 0.000000e+00 5.908890e+03 2.133901e+02 2.731089e+07 1.615875e+11 4622 +f0ac7beb 9830400 0.000000e+00 1.853662e+03 1.234628e+02 6.493379e+06 1.208993e+10 3503 +d46431bb 2457600 0.000000e+00 2.672063e+02 3.249622e+01 7.262666e+05 1.969332e+08 2718 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +7 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda7_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 22118400 0.000000e+00 5.908429e+03 2.149126e+02 2.371053e+07 1.402773e+11 4013 +f0ac7beb 9830400 0.000000e+00 1.855601e+03 1.161756e+02 6.509447e+06 1.212628e+10 3508 +d46431bb 2457600 0.000000e+00 2.697690e+02 3.186509e+01 7.013994e+05 1.918558e+08 2600 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_goto.mirage b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_goto.mirage new file mode 100644 index 0000000..208f9d0 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_goto.mirage @@ -0,0 +1,144 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +4 +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +f0ac7beb 9830400 0.000000e+00 5.319005e+04 1.072845e+03 1.074439e+08 5.717271e+12 2020 +24c84a50 22118400 0.000000e+00 1.747556e+05 3.288616e+03 2.457064e+08 4.295378e+13 1406 +d46431bb 2457600 0.000000e+00 6.731248e+03 2.017842e+02 1.758875e+07 1.185006e+11 2613 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +f0ac7beb 9830400 0.000000e+00 1.857771e+03 5.953793e+01 1.442559e+07 2.682697e+10 7765 +24c84a50 22118400 0.000000e+00 5.825821e+03 1.536397e+02 3.023019e+07 1.762382e+11 5189 +d46431bb 2457600 0.000000e+00 2.626388e+02 2.130047e+01 1.891262e+06 4.999858e+08 7201 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +f0ac7beb 9830400 0.000000e+00 1.841710e+03 6.898710e+01 1.448873e+07 2.672149e+10 7867 +24c84a50 22118400 0.000000e+00 5.866678e+03 1.842980e+02 2.977339e+07 1.748433e+11 5075 +d46431bb 2457600 0.000000e+00 2.614108e+02 2.029949e+01 1.936531e+06 5.092829e+08 7408 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +f0ac7beb 9830400 0.000000e+00 1.853277e+03 6.983878e+01 1.439996e+07 2.672502e+10 7770 +24c84a50 22118400 0.000000e+00 5.858635e+03 1.761006e+02 3.008995e+07 1.764453e+11 5136 +d46431bb 2457600 0.000000e+00 2.701366e+02 1.779276e+01 1.899060e+06 5.152311e+08 7030 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_goto.sirocco b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_goto.sirocco new file mode 100644 index 0000000..dc2e940 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_goto.sirocco @@ -0,0 +1,183 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +5 +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb0) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 22118400 0.000000e+00 1.754881e+03 1.567907e+02 8.516439e+06 1.506464e+10 4853 +d46431bb 2457600 0.000000e+00 9.227862e+01 1.339393e+01 6.585925e+05 6.205436e+07 7137 +f0ac7beb 9830400 0.000000e+00 5.560171e+02 4.481480e+01 2.452035e+06 1.372230e+09 4410 +8cfc3ba0 49766400 0.000000e+00 5.688840e+03 4.278238e+02 1.962081e+07 1.122509e+11 3449 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb3) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 22118400 0.000000e+00 1.725012e+03 1.556789e+02 8.775134e+06 1.526049e+10 5087 +d46431bb 2457600 0.000000e+00 9.099306e+01 1.290433e+01 7.117477e+05 6.606663e+07 7822 +f0ac7beb 9830400 0.000000e+00 5.497124e+02 4.364744e+01 2.308242e+06 1.276869e+09 4199 +8cfc3ba0 49766400 0.000000e+00 5.591076e+03 4.188165e+02 1.997692e+07 1.123192e+11 3573 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb2) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 22118400 0.000000e+00 1.724207e+03 1.559700e+02 8.529651e+06 1.482723e+10 4947 +d46431bb 2457600 0.000000e+00 9.395983e+01 1.410875e+01 5.884704e+05 5.653928e+07 6263 +f0ac7beb 9830400 0.000000e+00 5.531811e+02 3.935565e+01 3.264875e+06 1.815209e+09 5902 +8cfc3ba0 49766400 0.000000e+00 5.682607e+03 4.627422e+02 2.006529e+07 1.147792e+11 3531 + +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb4) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 22118400 0.000000e+00 6.659236e+04 1.142300e+04 1.507651e+08 1.033522e+13 2264 +d46431bb 2457600 0.000000e+00 3.623237e+03 8.721045e+02 1.668138e+07 6.394225e+10 4604 +f0ac7beb 9830400 0.000000e+00 2.355764e+04 4.984182e+03 4.405279e+07 1.084235e+12 1870 +8cfc3ba0 49766400 0.000000e+00 2.164742e+05 3.673582e+04 3.325044e+08 7.405148e+13 1536 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb1) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 22118400 0.000000e+00 1.746747e+03 1.539679e+02 8.628932e+06 1.518967e+10 4940 +d46431bb 2457600 0.000000e+00 9.539483e+01 1.447066e+01 7.032507e+05 6.863017e+07 7372 +f0ac7beb 9830400 0.000000e+00 5.601014e+02 3.783630e+01 3.218342e+06 1.810824e+09 5746 +8cfc3ba0 49766400 0.000000e+00 5.710157e+03 4.302245e+02 2.027106e+07 1.164080e+11 3550 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_openblas.attila b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_openblas.attila new file mode 100644 index 0000000..e9b7aeb --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_openblas.attila @@ -0,0 +1,145 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +4 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 22118400 0.000000e+00 1.687897e+05 8.023245e+03 3.110794e+08 5.262564e+13 1843 +f0ac7beb 9830400 0.000000e+00 5.125521e+04 2.656019e+03 7.375625e+07 3.790543e+12 1439 +d46431bb 2457600 0.000000e+00 6.821106e+03 3.878220e+02 1.100926e+07 7.533811e+10 1614 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 22118400 0.000000e+00 5.852261e+03 2.067645e+02 2.783335e+07 1.630914e+11 4756 +f0ac7beb 9830400 0.000000e+00 1.831142e+03 6.447275e+01 9.274735e+06 1.700441e+10 5065 +d46431bb 2457600 0.000000e+00 2.556618e+02 1.838263e+01 1.252232e+06 3.218030e+08 4898 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 22118400 0.000000e+00 5.846675e+03 2.141855e+02 2.732736e+07 1.599886e+11 4674 +f0ac7beb 9830400 0.000000e+00 1.834114e+03 5.380375e+01 9.566740e+06 1.756159e+10 5216 +d46431bb 2457600 0.000000e+00 2.610266e+02 2.020042e+01 1.246402e+06 3.272925e+08 4775 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 22118400 0.000000e+00 5.840318e+03 1.756302e+02 2.921911e+07 1.708032e+11 5003 +f0ac7beb 9830400 0.000000e+00 1.840833e+03 4.879997e+01 9.542881e+06 1.757920e+10 5184 +d46431bb 2457600 0.000000e+00 2.617920e+02 1.853601e+01 1.281472e+06 3.371609e+08 4895 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_openblas.idgraf b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_openblas.idgraf new file mode 100644 index 0000000..cda393e --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_openblas.idgraf @@ -0,0 +1,314 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +9 +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 22118400 0.000000e+00 5.901996e+03 2.140574e+02 2.574451e+07 1.521439e+11 4362 +f0ac7beb 9830400 0.000000e+00 1.855425e+03 1.035707e+02 7.464374e+06 1.389274e+10 4023 +d46431bb 2457600 0.000000e+00 2.667843e+02 3.133790e+01 9.321442e+05 2.521128e+08 3494 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +4 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda4_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 22118400 0.000000e+00 5.924812e+03 2.030546e+02 2.715934e+07 1.611030e+11 4584 +f0ac7beb 9830400 0.000000e+00 1.850857e+03 1.138114e+02 6.774137e+06 1.258537e+10 3660 +d46431bb 2457600 0.000000e+00 2.743267e+02 3.237528e+01 7.903352e+05 2.198298e+08 2881 + +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb4) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 22118400 0.000000e+00 5.905959e+03 2.324106e+02 2.885061e+07 1.706544e+11 4885 +f0ac7beb 9830400 0.000000e+00 1.844033e+03 9.904039e+01 7.516278e+06 1.390024e+10 4076 +d46431bb 2457600 0.000000e+00 2.662813e+02 2.651200e+01 1.098144e+06 2.953140e+08 4124 + +#################### +# COMB_5 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb5) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 22118400 0.000000e+00 5.905860e+03 2.088868e+02 2.619840e+07 1.549176e+11 4436 +f0ac7beb 9830400 0.000000e+00 1.843182e+03 9.714398e+01 7.671323e+06 1.417892e+10 4162 +d46431bb 2457600 0.000000e+00 2.666213e+02 3.154593e+01 1.003829e+06 2.713890e+08 3765 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 22118400 0.000000e+00 5.916743e+03 2.291447e+02 2.686793e+07 1.592091e+11 4541 +f0ac7beb 9830400 0.000000e+00 1.837574e+03 9.255327e+01 7.197777e+06 1.326000e+10 3917 +d46431bb 2457600 0.000000e+00 2.645367e+02 2.904285e+01 1.078252e+06 2.886751e+08 4076 + +#################### +# COMB_8 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb8) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 22118400 0.000000e+00 1.776444e+05 1.603881e+03 1.085407e+08 1.928322e+13 611 +f0ac7beb 9830400 0.000000e+00 5.438487e+04 1.553469e+03 1.598915e+07 8.702776e+11 294 +d46431bb 2457600 0.000000e+00 6.892168e+03 1.879454e+02 4.218007e+06 2.909283e+10 612 + +#################### +# COMB_6 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +6 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda6_impl0 (Comb6) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 22118400 0.000000e+00 5.908798e+03 2.191022e+02 2.432652e+07 1.439381e+11 4117 +f0ac7beb 9830400 0.000000e+00 1.870298e+03 1.158137e+02 6.306645e+06 1.184053e+10 3372 +d46431bb 2457600 0.000000e+00 2.622005e+02 3.221213e+01 7.630036e+05 2.030794e+08 2910 + +#################### +# COMB_7 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +5 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda5_impl0 (Comb7) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +# not multiple-regression-base +0 +nan nan nan +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 22118400 0.000000e+00 5.908890e+03 2.133901e+02 2.731089e+07 1.615875e+11 4622 +f0ac7beb 9830400 0.000000e+00 1.853662e+03 1.234628e+02 6.493379e+06 1.208993e+10 3503 +d46431bb 2457600 0.000000e+00 2.672063e+02 3.249622e+01 7.262666e+05 1.969332e+08 2718 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +7 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda7_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 22118400 0.000000e+00 5.908429e+03 2.149126e+02 2.371053e+07 1.402773e+11 4013 +f0ac7beb 9830400 0.000000e+00 1.855601e+03 1.161756e+02 6.509447e+06 1.212628e+10 3508 +d46431bb 2457600 0.000000e+00 2.697690e+02 3.186509e+01 7.013994e+05 1.918558e+08 2600 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_openblas.mirage b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_openblas.mirage new file mode 100644 index 0000000..208f9d0 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_openblas.mirage @@ -0,0 +1,144 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +4 +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +f0ac7beb 9830400 0.000000e+00 5.319005e+04 1.072845e+03 1.074439e+08 5.717271e+12 2020 +24c84a50 22118400 0.000000e+00 1.747556e+05 3.288616e+03 2.457064e+08 4.295378e+13 1406 +d46431bb 2457600 0.000000e+00 6.731248e+03 2.017842e+02 1.758875e+07 1.185006e+11 2613 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +f0ac7beb 9830400 0.000000e+00 1.857771e+03 5.953793e+01 1.442559e+07 2.682697e+10 7765 +24c84a50 22118400 0.000000e+00 5.825821e+03 1.536397e+02 3.023019e+07 1.762382e+11 5189 +d46431bb 2457600 0.000000e+00 2.626388e+02 2.130047e+01 1.891262e+06 4.999858e+08 7201 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +f0ac7beb 9830400 0.000000e+00 1.841710e+03 6.898710e+01 1.448873e+07 2.672149e+10 7867 +24c84a50 22118400 0.000000e+00 5.866678e+03 1.842980e+02 2.977339e+07 1.748433e+11 5075 +d46431bb 2457600 0.000000e+00 2.614108e+02 2.029949e+01 1.936531e+06 5.092829e+08 7408 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +f0ac7beb 9830400 0.000000e+00 1.853277e+03 6.983878e+01 1.439996e+07 2.672502e+10 7770 +24c84a50 22118400 0.000000e+00 5.858635e+03 1.761006e+02 3.008995e+07 1.764453e+11 5136 +d46431bb 2457600 0.000000e+00 2.701366e+02 1.779276e+01 1.899060e+06 5.152311e+08 7030 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_openblas.sirocco b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_openblas.sirocco new file mode 100644 index 0000000..dc2e940 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_openblas.sirocco @@ -0,0 +1,183 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +5 +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb0) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 22118400 0.000000e+00 1.754881e+03 1.567907e+02 8.516439e+06 1.506464e+10 4853 +d46431bb 2457600 0.000000e+00 9.227862e+01 1.339393e+01 6.585925e+05 6.205436e+07 7137 +f0ac7beb 9830400 0.000000e+00 5.560171e+02 4.481480e+01 2.452035e+06 1.372230e+09 4410 +8cfc3ba0 49766400 0.000000e+00 5.688840e+03 4.278238e+02 1.962081e+07 1.122509e+11 3449 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb3) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 22118400 0.000000e+00 1.725012e+03 1.556789e+02 8.775134e+06 1.526049e+10 5087 +d46431bb 2457600 0.000000e+00 9.099306e+01 1.290433e+01 7.117477e+05 6.606663e+07 7822 +f0ac7beb 9830400 0.000000e+00 5.497124e+02 4.364744e+01 2.308242e+06 1.276869e+09 4199 +8cfc3ba0 49766400 0.000000e+00 5.591076e+03 4.188165e+02 1.997692e+07 1.123192e+11 3573 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb2) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 22118400 0.000000e+00 1.724207e+03 1.559700e+02 8.529651e+06 1.482723e+10 4947 +d46431bb 2457600 0.000000e+00 9.395983e+01 1.410875e+01 5.884704e+05 5.653928e+07 6263 +f0ac7beb 9830400 0.000000e+00 5.531811e+02 3.935565e+01 3.264875e+06 1.815209e+09 5902 +8cfc3ba0 49766400 0.000000e+00 5.682607e+03 4.627422e+02 2.006529e+07 1.147792e+11 3531 + +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb4) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 22118400 0.000000e+00 6.659236e+04 1.142300e+04 1.507651e+08 1.033522e+13 2264 +d46431bb 2457600 0.000000e+00 3.623237e+03 8.721045e+02 1.668138e+07 6.394225e+10 4604 +f0ac7beb 9830400 0.000000e+00 2.355764e+04 4.984182e+03 4.405279e+07 1.084235e+12 1870 +8cfc3ba0 49766400 0.000000e+00 2.164742e+05 3.673582e+04 3.325044e+08 7.405148e+13 1536 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb1) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 22118400 0.000000e+00 1.746747e+03 1.539679e+02 8.628932e+06 1.518967e+10 4940 +d46431bb 2457600 0.000000e+00 9.539483e+01 1.447066e+01 7.032507e+05 6.863017e+07 7372 +f0ac7beb 9830400 0.000000e+00 5.601014e+02 3.783630e+01 3.218342e+06 1.810824e+09 5746 +8cfc3ba0 49766400 0.000000e+00 5.710157e+03 4.302245e+02 2.027106e+07 1.164080e+11 3550 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf.attila b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf.attila new file mode 100644 index 0000000..8d368b2 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf.attila @@ -0,0 +1,144 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +4 +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 7372800 0.000000e+00 2.515766e+05 2.096151e+04 2.515766e+06 6.373017e+11 10 +afdd228b 3276800 0.000000e+00 7.350482e+04 4.292777e+03 9.555626e+05 7.047802e+10 13 +cea37d6d 819200 0.000000e+00 9.586125e+03 1.023620e+03 2.108948e+05 2.044715e+09 22 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 7372800 0.000000e+00 6.990473e+04 4.071360e+03 1.118476e+06 7.845196e+10 16 +afdd228b 3276800 0.000000e+00 2.992444e+04 7.760944e+02 4.787910e+05 1.433719e+10 16 +cea37d6d 819200 0.000000e+00 9.620220e+03 2.335102e+02 1.058224e+05 1.018635e+09 11 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 7372800 0.000000e+00 6.793522e+04 8.600858e+02 6.793522e+05 4.615934e+10 10 +afdd228b 3276800 0.000000e+00 2.989699e+04 1.490344e+03 3.587638e+05 1.075261e+10 12 +cea37d6d 819200 0.000000e+00 9.974140e+03 1.055336e+03 1.097155e+05 1.106569e+09 11 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 7372800 0.000000e+00 6.962168e+04 1.952172e+02 1.322812e+06 9.209711e+10 19 +afdd228b 3276800 0.000000e+00 3.047853e+04 4.777511e+01 4.571780e+05 1.393415e+10 15 +cea37d6d 819200 0.000000e+00 1.119488e+04 2.171263e+03 1.231437e+05 1.430437e+09 11 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf.idgraf b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf.idgraf new file mode 100644 index 0000000..4de4adf --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf.idgraf @@ -0,0 +1,314 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +9 +#################### +# COMB_8 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb8) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 7372800 0.000000e+00 2.127055e+05 1.216918e+04 3.190582e+06 6.808756e+11 15 +afdd228b 3276800 0.000000e+00 6.346686e+04 7.329654e+02 6.346686e+05 4.028580e+10 10 +cea37d6d 819200 0.000000e+00 7.969263e+03 1.770463e+02 1.354775e+05 1.080188e+09 17 + +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb4) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 7372800 0.000000e+00 8.656100e+04 6.943816e+03 1.471537e+06 1.281974e+11 17 +afdd228b 3276800 0.000000e+00 3.567215e+04 3.302464e+03 3.567215e+05 1.283409e+10 10 +cea37d6d 819200 0.000000e+00 1.101988e+04 5.146633e+02 1.101988e+05 1.217027e+09 10 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +7 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda7_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +afdd228b 3276800 0.000000e+00 3.935885e+04 6.351673e+03 3.935885e+05 1.589463e+10 10 +cea37d6d 819200 0.000000e+00 1.194615e+04 1.359754e+03 1.194615e+05 1.445595e+09 10 +617e5fe6 7372800 0.000000e+00 8.781176e+04 9.198610e+03 1.317176e+06 1.169328e+11 15 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 7372800 0.000000e+00 8.754335e+04 8.654029e+03 1.575780e+06 1.392972e+11 18 +afdd228b 3276800 0.000000e+00 3.542725e+04 1.501284e+03 3.542725e+05 1.257344e+10 10 +cea37d6d 819200 0.000000e+00 1.193774e+04 1.685032e+03 1.193774e+05 1.453490e+09 10 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +4 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda4_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 7372800 0.000000e+00 8.763521e+04 5.876858e+03 9.639873e+05 8.485914e+10 11 +afdd228b 3276800 0.000000e+00 3.909159e+04 6.650440e+03 4.300075e+05 1.729619e+10 11 +cea37d6d 819200 0.000000e+00 1.211577e+04 1.649480e+03 1.211577e+05 1.495126e+09 10 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 7372800 0.000000e+00 8.616388e+04 4.981316e+03 1.550950e+06 1.340825e+11 18 +afdd228b 3276800 0.000000e+00 3.647899e+04 2.965394e+03 4.377479e+05 1.607412e+10 12 +cea37d6d 819200 0.000000e+00 1.073272e+04 1.010096e+02 1.073272e+05 1.152015e+09 10 + +#################### +# COMB_6 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +6 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda6_impl0 (Comb6) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 7372800 0.000000e+00 8.786078e+04 7.200822e+03 1.317912e+06 1.165705e+11 15 +afdd228b 3276800 0.000000e+00 3.795195e+04 3.399141e+03 3.795195e+05 1.451905e+10 10 +cea37d6d 819200 0.000000e+00 1.163527e+04 1.023060e+03 1.163527e+05 1.364262e+09 10 + +#################### +# COMB_7 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +5 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda5_impl0 (Comb7) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 7372800 0.000000e+00 8.814631e+04 6.725805e+03 1.498487e+06 1.328551e+11 17 +cea37d6d 819200 0.000000e+00 1.170806e+04 1.094676e+03 1.170806e+05 1.382770e+09 10 +afdd228b 3276800 0.000000e+00 4.283079e+04 7.621190e+03 4.283079e+05 1.892559e+10 10 + +#################### +# COMB_5 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb5) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 7372800 0.000000e+00 9.172766e+04 1.075608e+04 1.375915e+06 1.279449e+11 15 +cea37d6d 819200 0.000000e+00 1.117240e+04 8.447401e+02 1.117240e+05 1.255362e+09 10 +afdd228b 3276800 0.000000e+00 3.472448e+04 1.278416e+03 3.819693e+05 1.328166e+10 11 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf.mirage b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf.mirage new file mode 100644 index 0000000..9bcc4b2 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf.mirage @@ -0,0 +1,144 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +4 +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +afdd228b 3276800 0.000000e+00 7.065042e+04 7.118479e+03 1.271707e+06 9.075877e+10 18 +617e5fe6 7372800 0.000000e+00 2.321971e+05 3.386520e+04 2.554168e+06 6.056858e+11 11 +cea37d6d 819200 0.000000e+00 8.644872e+03 1.175676e+03 4.581782e+05 4.034149e+09 53 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +afdd228b 3276800 0.000000e+00 3.469534e+04 1.243993e+03 9.714695e+05 3.374879e+10 28 +617e5fe6 7372800 0.000000e+00 8.533007e+04 9.757927e+03 1.194621e+06 1.032701e+11 14 +cea37d6d 819200 0.000000e+00 1.145973e+04 6.017234e+02 1.145973e+05 1.316874e+09 10 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +afdd228b 3276800 0.000000e+00 3.498011e+04 2.179531e+03 6.996022e+05 2.456717e+10 20 +617e5fe6 7372800 0.000000e+00 8.442764e+04 6.770170e+03 1.350842e+06 1.147818e+11 16 +cea37d6d 819200 0.000000e+00 1.080764e+04 2.151492e+02 1.188840e+05 1.285365e+09 11 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +afdd228b 3276800 0.000000e+00 3.506064e+04 2.956747e+03 6.310915e+05 2.228383e+10 18 +617e5fe6 7372800 0.000000e+00 8.151052e+04 4.250155e+02 1.059637e+06 8.637388e+10 13 +cea37d6d 819200 0.000000e+00 1.153062e+04 1.200141e+03 1.153062e+05 1.343956e+09 10 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf.sirocco b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf.sirocco new file mode 100644 index 0000000..5c3b9e8 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf.sirocco @@ -0,0 +1,183 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +5 +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb4) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 7372800 0.000000e+00 1.778570e+05 1.735127e+04 1.778570e+06 3.193419e+11 10 +cea37d6d 819200 0.000000e+00 5.904224e+03 6.575598e+02 5.668055e+05 3.388055e+09 96 +afdd228b 3276800 0.000000e+00 4.953149e+04 6.709149e+03 6.439093e+05 3.247895e+10 13 +25ebb669 16588800 0.000000e+00 7.801727e+05 1.214440e+05 7.801727e+06 6.234180e+12 10 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb0) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 7372800 0.000000e+00 5.552387e+04 5.714037e+03 1.054954e+06 5.919546e+10 19 +cea37d6d 819200 0.000000e+00 9.707597e+03 9.439210e+02 9.707597e+04 9.512842e+08 10 +afdd228b 3276800 0.000000e+00 2.633937e+04 3.608518e+03 3.950905e+05 1.060175e+10 15 +25ebb669 16588800 0.000000e+00 1.397955e+05 9.676594e+03 1.537750e+06 2.160006e+11 11 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb2) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 7372800 0.000000e+00 5.675882e+04 6.232185e+03 1.248694e+06 7.172888e+10 22 +cea37d6d 819200 0.000000e+00 9.541018e+03 9.285702e+02 9.541018e+04 9.189326e+08 10 +afdd228b 3276800 0.000000e+00 2.651477e+04 2.554649e+03 3.181772e+05 8.514711e+09 12 +25ebb669 16588800 0.000000e+00 1.382255e+05 7.304438e+03 1.382255e+06 1.915965e+11 10 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb1) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 7372800 0.000000e+00 5.624387e+04 5.549003e+03 8.436581e+05 4.791247e+10 15 +cea37d6d 819200 0.000000e+00 9.661577e+03 7.114114e+02 9.661577e+04 9.385217e+08 10 +afdd228b 3276800 0.000000e+00 2.574090e+04 2.071791e+03 5.148179e+05 1.333772e+10 20 +25ebb669 16588800 0.000000e+00 1.361676e+05 1.958095e+03 1.770178e+06 2.410907e+11 13 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb3) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 7372800 0.000000e+00 5.895007e+04 8.369498e+03 7.663509e+05 4.608707e+10 13 +cea37d6d 819200 0.000000e+00 9.910778e+03 1.200981e+03 9.910778e+04 9.966588e+08 10 +afdd228b 3276800 0.000000e+00 2.572979e+04 2.095041e+03 5.917851e+05 1.532746e+10 23 +25ebb669 16588800 0.000000e+00 1.422314e+05 1.728252e+04 1.422314e+06 2.052844e+11 10 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_atlas.attila b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_atlas.attila new file mode 100644 index 0000000..8d368b2 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_atlas.attila @@ -0,0 +1,144 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +4 +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 7372800 0.000000e+00 2.515766e+05 2.096151e+04 2.515766e+06 6.373017e+11 10 +afdd228b 3276800 0.000000e+00 7.350482e+04 4.292777e+03 9.555626e+05 7.047802e+10 13 +cea37d6d 819200 0.000000e+00 9.586125e+03 1.023620e+03 2.108948e+05 2.044715e+09 22 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 7372800 0.000000e+00 6.990473e+04 4.071360e+03 1.118476e+06 7.845196e+10 16 +afdd228b 3276800 0.000000e+00 2.992444e+04 7.760944e+02 4.787910e+05 1.433719e+10 16 +cea37d6d 819200 0.000000e+00 9.620220e+03 2.335102e+02 1.058224e+05 1.018635e+09 11 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 7372800 0.000000e+00 6.793522e+04 8.600858e+02 6.793522e+05 4.615934e+10 10 +afdd228b 3276800 0.000000e+00 2.989699e+04 1.490344e+03 3.587638e+05 1.075261e+10 12 +cea37d6d 819200 0.000000e+00 9.974140e+03 1.055336e+03 1.097155e+05 1.106569e+09 11 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 7372800 0.000000e+00 6.962168e+04 1.952172e+02 1.322812e+06 9.209711e+10 19 +afdd228b 3276800 0.000000e+00 3.047853e+04 4.777511e+01 4.571780e+05 1.393415e+10 15 +cea37d6d 819200 0.000000e+00 1.119488e+04 2.171263e+03 1.231437e+05 1.430437e+09 11 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_atlas.idgraf b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_atlas.idgraf new file mode 100644 index 0000000..4de4adf --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_atlas.idgraf @@ -0,0 +1,314 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +9 +#################### +# COMB_8 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb8) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 7372800 0.000000e+00 2.127055e+05 1.216918e+04 3.190582e+06 6.808756e+11 15 +afdd228b 3276800 0.000000e+00 6.346686e+04 7.329654e+02 6.346686e+05 4.028580e+10 10 +cea37d6d 819200 0.000000e+00 7.969263e+03 1.770463e+02 1.354775e+05 1.080188e+09 17 + +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb4) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 7372800 0.000000e+00 8.656100e+04 6.943816e+03 1.471537e+06 1.281974e+11 17 +afdd228b 3276800 0.000000e+00 3.567215e+04 3.302464e+03 3.567215e+05 1.283409e+10 10 +cea37d6d 819200 0.000000e+00 1.101988e+04 5.146633e+02 1.101988e+05 1.217027e+09 10 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +7 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda7_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +afdd228b 3276800 0.000000e+00 3.935885e+04 6.351673e+03 3.935885e+05 1.589463e+10 10 +cea37d6d 819200 0.000000e+00 1.194615e+04 1.359754e+03 1.194615e+05 1.445595e+09 10 +617e5fe6 7372800 0.000000e+00 8.781176e+04 9.198610e+03 1.317176e+06 1.169328e+11 15 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 7372800 0.000000e+00 8.754335e+04 8.654029e+03 1.575780e+06 1.392972e+11 18 +afdd228b 3276800 0.000000e+00 3.542725e+04 1.501284e+03 3.542725e+05 1.257344e+10 10 +cea37d6d 819200 0.000000e+00 1.193774e+04 1.685032e+03 1.193774e+05 1.453490e+09 10 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +4 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda4_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 7372800 0.000000e+00 8.763521e+04 5.876858e+03 9.639873e+05 8.485914e+10 11 +afdd228b 3276800 0.000000e+00 3.909159e+04 6.650440e+03 4.300075e+05 1.729619e+10 11 +cea37d6d 819200 0.000000e+00 1.211577e+04 1.649480e+03 1.211577e+05 1.495126e+09 10 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 7372800 0.000000e+00 8.616388e+04 4.981316e+03 1.550950e+06 1.340825e+11 18 +afdd228b 3276800 0.000000e+00 3.647899e+04 2.965394e+03 4.377479e+05 1.607412e+10 12 +cea37d6d 819200 0.000000e+00 1.073272e+04 1.010096e+02 1.073272e+05 1.152015e+09 10 + +#################### +# COMB_6 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +6 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda6_impl0 (Comb6) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 7372800 0.000000e+00 8.786078e+04 7.200822e+03 1.317912e+06 1.165705e+11 15 +afdd228b 3276800 0.000000e+00 3.795195e+04 3.399141e+03 3.795195e+05 1.451905e+10 10 +cea37d6d 819200 0.000000e+00 1.163527e+04 1.023060e+03 1.163527e+05 1.364262e+09 10 + +#################### +# COMB_7 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +5 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda5_impl0 (Comb7) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 7372800 0.000000e+00 8.814631e+04 6.725805e+03 1.498487e+06 1.328551e+11 17 +cea37d6d 819200 0.000000e+00 1.170806e+04 1.094676e+03 1.170806e+05 1.382770e+09 10 +afdd228b 3276800 0.000000e+00 4.283079e+04 7.621190e+03 4.283079e+05 1.892559e+10 10 + +#################### +# COMB_5 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb5) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 7372800 0.000000e+00 9.172766e+04 1.075608e+04 1.375915e+06 1.279449e+11 15 +cea37d6d 819200 0.000000e+00 1.117240e+04 8.447401e+02 1.117240e+05 1.255362e+09 10 +afdd228b 3276800 0.000000e+00 3.472448e+04 1.278416e+03 3.819693e+05 1.328166e+10 11 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_atlas.mirage b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_atlas.mirage new file mode 100644 index 0000000..9bcc4b2 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_atlas.mirage @@ -0,0 +1,144 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +4 +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +afdd228b 3276800 0.000000e+00 7.065042e+04 7.118479e+03 1.271707e+06 9.075877e+10 18 +617e5fe6 7372800 0.000000e+00 2.321971e+05 3.386520e+04 2.554168e+06 6.056858e+11 11 +cea37d6d 819200 0.000000e+00 8.644872e+03 1.175676e+03 4.581782e+05 4.034149e+09 53 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +afdd228b 3276800 0.000000e+00 3.469534e+04 1.243993e+03 9.714695e+05 3.374879e+10 28 +617e5fe6 7372800 0.000000e+00 8.533007e+04 9.757927e+03 1.194621e+06 1.032701e+11 14 +cea37d6d 819200 0.000000e+00 1.145973e+04 6.017234e+02 1.145973e+05 1.316874e+09 10 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +afdd228b 3276800 0.000000e+00 3.498011e+04 2.179531e+03 6.996022e+05 2.456717e+10 20 +617e5fe6 7372800 0.000000e+00 8.442764e+04 6.770170e+03 1.350842e+06 1.147818e+11 16 +cea37d6d 819200 0.000000e+00 1.080764e+04 2.151492e+02 1.188840e+05 1.285365e+09 11 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +afdd228b 3276800 0.000000e+00 3.506064e+04 2.956747e+03 6.310915e+05 2.228383e+10 18 +617e5fe6 7372800 0.000000e+00 8.151052e+04 4.250155e+02 1.059637e+06 8.637388e+10 13 +cea37d6d 819200 0.000000e+00 1.153062e+04 1.200141e+03 1.153062e+05 1.343956e+09 10 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_atlas.sirocco b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_atlas.sirocco new file mode 100644 index 0000000..5c3b9e8 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_atlas.sirocco @@ -0,0 +1,183 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +5 +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb4) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 7372800 0.000000e+00 1.778570e+05 1.735127e+04 1.778570e+06 3.193419e+11 10 +cea37d6d 819200 0.000000e+00 5.904224e+03 6.575598e+02 5.668055e+05 3.388055e+09 96 +afdd228b 3276800 0.000000e+00 4.953149e+04 6.709149e+03 6.439093e+05 3.247895e+10 13 +25ebb669 16588800 0.000000e+00 7.801727e+05 1.214440e+05 7.801727e+06 6.234180e+12 10 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb0) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 7372800 0.000000e+00 5.552387e+04 5.714037e+03 1.054954e+06 5.919546e+10 19 +cea37d6d 819200 0.000000e+00 9.707597e+03 9.439210e+02 9.707597e+04 9.512842e+08 10 +afdd228b 3276800 0.000000e+00 2.633937e+04 3.608518e+03 3.950905e+05 1.060175e+10 15 +25ebb669 16588800 0.000000e+00 1.397955e+05 9.676594e+03 1.537750e+06 2.160006e+11 11 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb2) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 7372800 0.000000e+00 5.675882e+04 6.232185e+03 1.248694e+06 7.172888e+10 22 +cea37d6d 819200 0.000000e+00 9.541018e+03 9.285702e+02 9.541018e+04 9.189326e+08 10 +afdd228b 3276800 0.000000e+00 2.651477e+04 2.554649e+03 3.181772e+05 8.514711e+09 12 +25ebb669 16588800 0.000000e+00 1.382255e+05 7.304438e+03 1.382255e+06 1.915965e+11 10 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb1) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 7372800 0.000000e+00 5.624387e+04 5.549003e+03 8.436581e+05 4.791247e+10 15 +cea37d6d 819200 0.000000e+00 9.661577e+03 7.114114e+02 9.661577e+04 9.385217e+08 10 +afdd228b 3276800 0.000000e+00 2.574090e+04 2.071791e+03 5.148179e+05 1.333772e+10 20 +25ebb669 16588800 0.000000e+00 1.361676e+05 1.958095e+03 1.770178e+06 2.410907e+11 13 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb3) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 7372800 0.000000e+00 5.895007e+04 8.369498e+03 7.663509e+05 4.608707e+10 13 +cea37d6d 819200 0.000000e+00 9.910778e+03 1.200981e+03 9.910778e+04 9.966588e+08 10 +afdd228b 3276800 0.000000e+00 2.572979e+04 2.095041e+03 5.917851e+05 1.532746e+10 23 +25ebb669 16588800 0.000000e+00 1.422314e+05 1.728252e+04 1.422314e+06 2.052844e+11 10 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_goto.attila b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_goto.attila new file mode 100644 index 0000000..8d368b2 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_goto.attila @@ -0,0 +1,144 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +4 +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 7372800 0.000000e+00 2.515766e+05 2.096151e+04 2.515766e+06 6.373017e+11 10 +afdd228b 3276800 0.000000e+00 7.350482e+04 4.292777e+03 9.555626e+05 7.047802e+10 13 +cea37d6d 819200 0.000000e+00 9.586125e+03 1.023620e+03 2.108948e+05 2.044715e+09 22 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 7372800 0.000000e+00 6.990473e+04 4.071360e+03 1.118476e+06 7.845196e+10 16 +afdd228b 3276800 0.000000e+00 2.992444e+04 7.760944e+02 4.787910e+05 1.433719e+10 16 +cea37d6d 819200 0.000000e+00 9.620220e+03 2.335102e+02 1.058224e+05 1.018635e+09 11 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 7372800 0.000000e+00 6.793522e+04 8.600858e+02 6.793522e+05 4.615934e+10 10 +afdd228b 3276800 0.000000e+00 2.989699e+04 1.490344e+03 3.587638e+05 1.075261e+10 12 +cea37d6d 819200 0.000000e+00 9.974140e+03 1.055336e+03 1.097155e+05 1.106569e+09 11 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 7372800 0.000000e+00 6.962168e+04 1.952172e+02 1.322812e+06 9.209711e+10 19 +afdd228b 3276800 0.000000e+00 3.047853e+04 4.777511e+01 4.571780e+05 1.393415e+10 15 +cea37d6d 819200 0.000000e+00 1.119488e+04 2.171263e+03 1.231437e+05 1.430437e+09 11 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_goto.idgraf b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_goto.idgraf new file mode 100644 index 0000000..4de4adf --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_goto.idgraf @@ -0,0 +1,314 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +9 +#################### +# COMB_8 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb8) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 7372800 0.000000e+00 2.127055e+05 1.216918e+04 3.190582e+06 6.808756e+11 15 +afdd228b 3276800 0.000000e+00 6.346686e+04 7.329654e+02 6.346686e+05 4.028580e+10 10 +cea37d6d 819200 0.000000e+00 7.969263e+03 1.770463e+02 1.354775e+05 1.080188e+09 17 + +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb4) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 7372800 0.000000e+00 8.656100e+04 6.943816e+03 1.471537e+06 1.281974e+11 17 +afdd228b 3276800 0.000000e+00 3.567215e+04 3.302464e+03 3.567215e+05 1.283409e+10 10 +cea37d6d 819200 0.000000e+00 1.101988e+04 5.146633e+02 1.101988e+05 1.217027e+09 10 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +7 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda7_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +afdd228b 3276800 0.000000e+00 3.935885e+04 6.351673e+03 3.935885e+05 1.589463e+10 10 +cea37d6d 819200 0.000000e+00 1.194615e+04 1.359754e+03 1.194615e+05 1.445595e+09 10 +617e5fe6 7372800 0.000000e+00 8.781176e+04 9.198610e+03 1.317176e+06 1.169328e+11 15 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 7372800 0.000000e+00 8.754335e+04 8.654029e+03 1.575780e+06 1.392972e+11 18 +afdd228b 3276800 0.000000e+00 3.542725e+04 1.501284e+03 3.542725e+05 1.257344e+10 10 +cea37d6d 819200 0.000000e+00 1.193774e+04 1.685032e+03 1.193774e+05 1.453490e+09 10 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +4 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda4_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 7372800 0.000000e+00 8.763521e+04 5.876858e+03 9.639873e+05 8.485914e+10 11 +afdd228b 3276800 0.000000e+00 3.909159e+04 6.650440e+03 4.300075e+05 1.729619e+10 11 +cea37d6d 819200 0.000000e+00 1.211577e+04 1.649480e+03 1.211577e+05 1.495126e+09 10 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 7372800 0.000000e+00 8.616388e+04 4.981316e+03 1.550950e+06 1.340825e+11 18 +afdd228b 3276800 0.000000e+00 3.647899e+04 2.965394e+03 4.377479e+05 1.607412e+10 12 +cea37d6d 819200 0.000000e+00 1.073272e+04 1.010096e+02 1.073272e+05 1.152015e+09 10 + +#################### +# COMB_6 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +6 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda6_impl0 (Comb6) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 7372800 0.000000e+00 8.786078e+04 7.200822e+03 1.317912e+06 1.165705e+11 15 +afdd228b 3276800 0.000000e+00 3.795195e+04 3.399141e+03 3.795195e+05 1.451905e+10 10 +cea37d6d 819200 0.000000e+00 1.163527e+04 1.023060e+03 1.163527e+05 1.364262e+09 10 + +#################### +# COMB_7 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +5 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda5_impl0 (Comb7) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 7372800 0.000000e+00 8.814631e+04 6.725805e+03 1.498487e+06 1.328551e+11 17 +cea37d6d 819200 0.000000e+00 1.170806e+04 1.094676e+03 1.170806e+05 1.382770e+09 10 +afdd228b 3276800 0.000000e+00 4.283079e+04 7.621190e+03 4.283079e+05 1.892559e+10 10 + +#################### +# COMB_5 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb5) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 7372800 0.000000e+00 9.172766e+04 1.075608e+04 1.375915e+06 1.279449e+11 15 +cea37d6d 819200 0.000000e+00 1.117240e+04 8.447401e+02 1.117240e+05 1.255362e+09 10 +afdd228b 3276800 0.000000e+00 3.472448e+04 1.278416e+03 3.819693e+05 1.328166e+10 11 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_goto.mirage b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_goto.mirage new file mode 100644 index 0000000..9bcc4b2 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_goto.mirage @@ -0,0 +1,144 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +4 +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +afdd228b 3276800 0.000000e+00 7.065042e+04 7.118479e+03 1.271707e+06 9.075877e+10 18 +617e5fe6 7372800 0.000000e+00 2.321971e+05 3.386520e+04 2.554168e+06 6.056858e+11 11 +cea37d6d 819200 0.000000e+00 8.644872e+03 1.175676e+03 4.581782e+05 4.034149e+09 53 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +afdd228b 3276800 0.000000e+00 3.469534e+04 1.243993e+03 9.714695e+05 3.374879e+10 28 +617e5fe6 7372800 0.000000e+00 8.533007e+04 9.757927e+03 1.194621e+06 1.032701e+11 14 +cea37d6d 819200 0.000000e+00 1.145973e+04 6.017234e+02 1.145973e+05 1.316874e+09 10 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +afdd228b 3276800 0.000000e+00 3.498011e+04 2.179531e+03 6.996022e+05 2.456717e+10 20 +617e5fe6 7372800 0.000000e+00 8.442764e+04 6.770170e+03 1.350842e+06 1.147818e+11 16 +cea37d6d 819200 0.000000e+00 1.080764e+04 2.151492e+02 1.188840e+05 1.285365e+09 11 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +afdd228b 3276800 0.000000e+00 3.506064e+04 2.956747e+03 6.310915e+05 2.228383e+10 18 +617e5fe6 7372800 0.000000e+00 8.151052e+04 4.250155e+02 1.059637e+06 8.637388e+10 13 +cea37d6d 819200 0.000000e+00 1.153062e+04 1.200141e+03 1.153062e+05 1.343956e+09 10 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_goto.sirocco b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_goto.sirocco new file mode 100644 index 0000000..5c3b9e8 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_goto.sirocco @@ -0,0 +1,183 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +5 +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb4) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 7372800 0.000000e+00 1.778570e+05 1.735127e+04 1.778570e+06 3.193419e+11 10 +cea37d6d 819200 0.000000e+00 5.904224e+03 6.575598e+02 5.668055e+05 3.388055e+09 96 +afdd228b 3276800 0.000000e+00 4.953149e+04 6.709149e+03 6.439093e+05 3.247895e+10 13 +25ebb669 16588800 0.000000e+00 7.801727e+05 1.214440e+05 7.801727e+06 6.234180e+12 10 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb0) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 7372800 0.000000e+00 5.552387e+04 5.714037e+03 1.054954e+06 5.919546e+10 19 +cea37d6d 819200 0.000000e+00 9.707597e+03 9.439210e+02 9.707597e+04 9.512842e+08 10 +afdd228b 3276800 0.000000e+00 2.633937e+04 3.608518e+03 3.950905e+05 1.060175e+10 15 +25ebb669 16588800 0.000000e+00 1.397955e+05 9.676594e+03 1.537750e+06 2.160006e+11 11 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb2) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 7372800 0.000000e+00 5.675882e+04 6.232185e+03 1.248694e+06 7.172888e+10 22 +cea37d6d 819200 0.000000e+00 9.541018e+03 9.285702e+02 9.541018e+04 9.189326e+08 10 +afdd228b 3276800 0.000000e+00 2.651477e+04 2.554649e+03 3.181772e+05 8.514711e+09 12 +25ebb669 16588800 0.000000e+00 1.382255e+05 7.304438e+03 1.382255e+06 1.915965e+11 10 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb1) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 7372800 0.000000e+00 5.624387e+04 5.549003e+03 8.436581e+05 4.791247e+10 15 +cea37d6d 819200 0.000000e+00 9.661577e+03 7.114114e+02 9.661577e+04 9.385217e+08 10 +afdd228b 3276800 0.000000e+00 2.574090e+04 2.071791e+03 5.148179e+05 1.333772e+10 20 +25ebb669 16588800 0.000000e+00 1.361676e+05 1.958095e+03 1.770178e+06 2.410907e+11 13 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb3) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 7372800 0.000000e+00 5.895007e+04 8.369498e+03 7.663509e+05 4.608707e+10 13 +cea37d6d 819200 0.000000e+00 9.910778e+03 1.200981e+03 9.910778e+04 9.966588e+08 10 +afdd228b 3276800 0.000000e+00 2.572979e+04 2.095041e+03 5.917851e+05 1.532746e+10 23 +25ebb669 16588800 0.000000e+00 1.422314e+05 1.728252e+04 1.422314e+06 2.052844e+11 10 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_openblas.attila b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_openblas.attila new file mode 100644 index 0000000..8d368b2 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_openblas.attila @@ -0,0 +1,144 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +4 +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 7372800 0.000000e+00 2.515766e+05 2.096151e+04 2.515766e+06 6.373017e+11 10 +afdd228b 3276800 0.000000e+00 7.350482e+04 4.292777e+03 9.555626e+05 7.047802e+10 13 +cea37d6d 819200 0.000000e+00 9.586125e+03 1.023620e+03 2.108948e+05 2.044715e+09 22 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 7372800 0.000000e+00 6.990473e+04 4.071360e+03 1.118476e+06 7.845196e+10 16 +afdd228b 3276800 0.000000e+00 2.992444e+04 7.760944e+02 4.787910e+05 1.433719e+10 16 +cea37d6d 819200 0.000000e+00 9.620220e+03 2.335102e+02 1.058224e+05 1.018635e+09 11 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 7372800 0.000000e+00 6.793522e+04 8.600858e+02 6.793522e+05 4.615934e+10 10 +afdd228b 3276800 0.000000e+00 2.989699e+04 1.490344e+03 3.587638e+05 1.075261e+10 12 +cea37d6d 819200 0.000000e+00 9.974140e+03 1.055336e+03 1.097155e+05 1.106569e+09 11 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 7372800 0.000000e+00 6.962168e+04 1.952172e+02 1.322812e+06 9.209711e+10 19 +afdd228b 3276800 0.000000e+00 3.047853e+04 4.777511e+01 4.571780e+05 1.393415e+10 15 +cea37d6d 819200 0.000000e+00 1.119488e+04 2.171263e+03 1.231437e+05 1.430437e+09 11 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_openblas.idgraf b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_openblas.idgraf new file mode 100644 index 0000000..4de4adf --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_openblas.idgraf @@ -0,0 +1,314 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +9 +#################### +# COMB_8 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb8) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 7372800 0.000000e+00 2.127055e+05 1.216918e+04 3.190582e+06 6.808756e+11 15 +afdd228b 3276800 0.000000e+00 6.346686e+04 7.329654e+02 6.346686e+05 4.028580e+10 10 +cea37d6d 819200 0.000000e+00 7.969263e+03 1.770463e+02 1.354775e+05 1.080188e+09 17 + +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb4) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 7372800 0.000000e+00 8.656100e+04 6.943816e+03 1.471537e+06 1.281974e+11 17 +afdd228b 3276800 0.000000e+00 3.567215e+04 3.302464e+03 3.567215e+05 1.283409e+10 10 +cea37d6d 819200 0.000000e+00 1.101988e+04 5.146633e+02 1.101988e+05 1.217027e+09 10 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +7 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda7_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +afdd228b 3276800 0.000000e+00 3.935885e+04 6.351673e+03 3.935885e+05 1.589463e+10 10 +cea37d6d 819200 0.000000e+00 1.194615e+04 1.359754e+03 1.194615e+05 1.445595e+09 10 +617e5fe6 7372800 0.000000e+00 8.781176e+04 9.198610e+03 1.317176e+06 1.169328e+11 15 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 7372800 0.000000e+00 8.754335e+04 8.654029e+03 1.575780e+06 1.392972e+11 18 +afdd228b 3276800 0.000000e+00 3.542725e+04 1.501284e+03 3.542725e+05 1.257344e+10 10 +cea37d6d 819200 0.000000e+00 1.193774e+04 1.685032e+03 1.193774e+05 1.453490e+09 10 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +4 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda4_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 7372800 0.000000e+00 8.763521e+04 5.876858e+03 9.639873e+05 8.485914e+10 11 +afdd228b 3276800 0.000000e+00 3.909159e+04 6.650440e+03 4.300075e+05 1.729619e+10 11 +cea37d6d 819200 0.000000e+00 1.211577e+04 1.649480e+03 1.211577e+05 1.495126e+09 10 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 7372800 0.000000e+00 8.616388e+04 4.981316e+03 1.550950e+06 1.340825e+11 18 +afdd228b 3276800 0.000000e+00 3.647899e+04 2.965394e+03 4.377479e+05 1.607412e+10 12 +cea37d6d 819200 0.000000e+00 1.073272e+04 1.010096e+02 1.073272e+05 1.152015e+09 10 + +#################### +# COMB_6 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +6 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda6_impl0 (Comb6) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 7372800 0.000000e+00 8.786078e+04 7.200822e+03 1.317912e+06 1.165705e+11 15 +afdd228b 3276800 0.000000e+00 3.795195e+04 3.399141e+03 3.795195e+05 1.451905e+10 10 +cea37d6d 819200 0.000000e+00 1.163527e+04 1.023060e+03 1.163527e+05 1.364262e+09 10 + +#################### +# COMB_7 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +5 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda5_impl0 (Comb7) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 7372800 0.000000e+00 8.814631e+04 6.725805e+03 1.498487e+06 1.328551e+11 17 +cea37d6d 819200 0.000000e+00 1.170806e+04 1.094676e+03 1.170806e+05 1.382770e+09 10 +afdd228b 3276800 0.000000e+00 4.283079e+04 7.621190e+03 4.283079e+05 1.892559e+10 10 + +#################### +# COMB_5 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb5) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 7372800 0.000000e+00 9.172766e+04 1.075608e+04 1.375915e+06 1.279449e+11 15 +cea37d6d 819200 0.000000e+00 1.117240e+04 8.447401e+02 1.117240e+05 1.255362e+09 10 +afdd228b 3276800 0.000000e+00 3.472448e+04 1.278416e+03 3.819693e+05 1.328166e+10 11 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_openblas.mirage b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_openblas.mirage new file mode 100644 index 0000000..9bcc4b2 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_openblas.mirage @@ -0,0 +1,144 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +4 +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +afdd228b 3276800 0.000000e+00 7.065042e+04 7.118479e+03 1.271707e+06 9.075877e+10 18 +617e5fe6 7372800 0.000000e+00 2.321971e+05 3.386520e+04 2.554168e+06 6.056858e+11 11 +cea37d6d 819200 0.000000e+00 8.644872e+03 1.175676e+03 4.581782e+05 4.034149e+09 53 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +afdd228b 3276800 0.000000e+00 3.469534e+04 1.243993e+03 9.714695e+05 3.374879e+10 28 +617e5fe6 7372800 0.000000e+00 8.533007e+04 9.757927e+03 1.194621e+06 1.032701e+11 14 +cea37d6d 819200 0.000000e+00 1.145973e+04 6.017234e+02 1.145973e+05 1.316874e+09 10 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +afdd228b 3276800 0.000000e+00 3.498011e+04 2.179531e+03 6.996022e+05 2.456717e+10 20 +617e5fe6 7372800 0.000000e+00 8.442764e+04 6.770170e+03 1.350842e+06 1.147818e+11 16 +cea37d6d 819200 0.000000e+00 1.080764e+04 2.151492e+02 1.188840e+05 1.285365e+09 11 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +afdd228b 3276800 0.000000e+00 3.506064e+04 2.956747e+03 6.310915e+05 2.228383e+10 18 +617e5fe6 7372800 0.000000e+00 8.151052e+04 4.250155e+02 1.059637e+06 8.637388e+10 13 +cea37d6d 819200 0.000000e+00 1.153062e+04 1.200141e+03 1.153062e+05 1.343956e+09 10 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_openblas.sirocco b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_openblas.sirocco new file mode 100644 index 0000000..5c3b9e8 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_openblas.sirocco @@ -0,0 +1,183 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +5 +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb4) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 7372800 0.000000e+00 1.778570e+05 1.735127e+04 1.778570e+06 3.193419e+11 10 +cea37d6d 819200 0.000000e+00 5.904224e+03 6.575598e+02 5.668055e+05 3.388055e+09 96 +afdd228b 3276800 0.000000e+00 4.953149e+04 6.709149e+03 6.439093e+05 3.247895e+10 13 +25ebb669 16588800 0.000000e+00 7.801727e+05 1.214440e+05 7.801727e+06 6.234180e+12 10 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb0) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 7372800 0.000000e+00 5.552387e+04 5.714037e+03 1.054954e+06 5.919546e+10 19 +cea37d6d 819200 0.000000e+00 9.707597e+03 9.439210e+02 9.707597e+04 9.512842e+08 10 +afdd228b 3276800 0.000000e+00 2.633937e+04 3.608518e+03 3.950905e+05 1.060175e+10 15 +25ebb669 16588800 0.000000e+00 1.397955e+05 9.676594e+03 1.537750e+06 2.160006e+11 11 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb2) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 7372800 0.000000e+00 5.675882e+04 6.232185e+03 1.248694e+06 7.172888e+10 22 +cea37d6d 819200 0.000000e+00 9.541018e+03 9.285702e+02 9.541018e+04 9.189326e+08 10 +afdd228b 3276800 0.000000e+00 2.651477e+04 2.554649e+03 3.181772e+05 8.514711e+09 12 +25ebb669 16588800 0.000000e+00 1.382255e+05 7.304438e+03 1.382255e+06 1.915965e+11 10 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb1) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 7372800 0.000000e+00 5.624387e+04 5.549003e+03 8.436581e+05 4.791247e+10 15 +cea37d6d 819200 0.000000e+00 9.661577e+03 7.114114e+02 9.661577e+04 9.385217e+08 10 +afdd228b 3276800 0.000000e+00 2.574090e+04 2.071791e+03 5.148179e+05 1.333772e+10 20 +25ebb669 16588800 0.000000e+00 1.361676e+05 1.958095e+03 1.770178e+06 2.410907e+11 13 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb3) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 7372800 0.000000e+00 5.895007e+04 8.369498e+03 7.663509e+05 4.608707e+10 13 +cea37d6d 819200 0.000000e+00 9.910778e+03 1.200981e+03 9.910778e+04 9.966588e+08 10 +afdd228b 3276800 0.000000e+00 2.572979e+04 2.095041e+03 5.917851e+05 1.532746e+10 23 +25ebb669 16588800 0.000000e+00 1.422314e+05 1.728252e+04 1.422314e+06 2.052844e+11 10 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll.attila b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll.attila new file mode 100644 index 0000000..0e198a0 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll.attila @@ -0,0 +1,145 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +4 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 8.869540e+04 4.010843e+03 1.765039e+07 1.568709e+12 199 +d39bff17 6553600 0.000000e+00 2.736718e+04 1.452565e+03 3.886139e+06 1.066523e+11 142 +2c1922b7 1638400 0.000000e+00 4.006489e+03 3.502972e+02 8.493756e+05 3.429028e+09 212 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 7.250005e+03 1.530886e+03 8.555006e+05 6.478930e+09 118 +d39bff17 6553600 0.000000e+00 2.060505e+03 3.149423e+02 4.265246e+05 8.993882e+08 207 +2c1922b7 1638400 0.000000e+00 5.794447e+02 1.035504e+02 9.155226e+04 5.474365e+07 158 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 6.906255e+03 1.105050e+03 1.042844e+06 7.386541e+09 151 +d39bff17 6553600 0.000000e+00 2.044032e+03 3.248232e+02 3.863220e+05 8.095958e+08 189 +2c1922b7 1638400 0.000000e+00 6.103626e+02 1.085471e+02 1.062031e+05 6.687255e+07 174 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 6.859160e+03 1.144150e+03 1.008296e+06 7.108501e+09 147 +d39bff17 6553600 0.000000e+00 2.022724e+03 3.006626e+02 4.308402e+05 8.907256e+08 213 +2c1922b7 1638400 0.000000e+00 5.771721e+02 9.999833e+01 9.638774e+04 5.730226e+07 167 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll.idgraf b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll.idgraf new file mode 100644 index 0000000..911975c --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll.idgraf @@ -0,0 +1,314 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +9 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 6.925318e+03 8.376976e+02 5.748014e+05 4.038926e+09 83 +d39bff17 6553600 0.000000e+00 2.271937e+03 3.454949e+02 2.340095e+05 5.439496e+08 103 +2c1922b7 1638400 0.000000e+00 7.049814e+02 1.197767e+02 1.254867e+05 9.101946e+07 178 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 7.291615e+03 1.041939e+03 4.593717e+05 3.417957e+09 63 +d39bff17 6553600 0.000000e+00 2.282720e+03 4.096195e+02 3.903452e+05 9.197407e+08 171 +2c1922b7 1638400 0.000000e+00 6.999720e+02 1.145665e+02 1.343946e+05 9.659256e+07 192 + +#################### +# COMB_7 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +5 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda5_impl0 (Comb7) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 7.177388e+03 9.455873e+02 3.947563e+05 2.882497e+09 55 +d39bff17 6553600 0.000000e+00 2.335362e+03 3.317057e+02 2.825788e+05 6.732374e+08 121 +2c1922b7 1638400 0.000000e+00 7.266144e+02 9.381637e+01 4.432348e+04 3.274297e+07 61 + +#################### +# COMB_8 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb8) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 9.210227e+04 5.563000e+02 1.252591e+07 1.153707e+12 136 +d39bff17 6553600 0.000000e+00 2.809162e+04 4.267578e+02 1.573131e+06 4.420199e+10 56 +2c1922b7 1638400 0.000000e+00 3.732094e+03 1.582101e+02 3.993341e+05 1.493031e+09 107 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +7 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda7_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 7.047943e+03 9.923280e+02 4.017327e+05 2.887518e+09 57 +d39bff17 6553600 0.000000e+00 2.358363e+03 2.904964e+02 2.381946e+05 5.702726e+08 101 +2c1922b7 1638400 0.000000e+00 7.376273e+02 1.192099e+02 4.425764e+04 3.349831e+07 60 + +#################### +# COMB_5 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb5) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 7.125894e+03 1.170430e+03 6.769599e+05 4.954085e+09 95 +d39bff17 6553600 0.000000e+00 2.913435e+03 7.837592e+02 2.651226e+05 8.283167e+08 91 +2c1922b7 1638400 0.000000e+00 7.396845e+02 1.557697e+02 7.692719e+04 5.942533e+07 104 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +4 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda4_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 6.906666e+03 1.069281e+03 3.177066e+05 2.246888e+09 46 +d39bff17 6553600 0.000000e+00 2.331985e+03 3.108312e+02 2.914982e+05 6.918465e+08 125 +2c1922b7 1638400 0.000000e+00 7.036069e+02 1.117682e+02 5.277052e+04 3.806661e+07 75 + +#################### +# COMB_6 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +6 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda6_impl0 (Comb6) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 7.634969e+03 1.278868e+03 4.122883e+05 3.236126e+09 54 +d39bff17 6553600 0.000000e+00 2.361692e+03 2.763159e+02 1.747652e+05 4.183915e+08 74 +2c1922b7 1638400 0.000000e+00 7.215132e+02 1.060983e+02 7.287283e+04 5.371565e+07 101 + +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb4) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 7.011366e+03 8.280915e+02 6.871138e+05 4.884809e+09 98 +d39bff17 6553600 0.000000e+00 2.294721e+03 3.366230e+02 4.451759e+05 1.043537e+09 194 +2c1922b7 1638400 0.000000e+00 6.840134e+02 1.166270e+02 1.114942e+05 7.848061e+07 163 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll.mirage b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll.mirage new file mode 100644 index 0000000..cafd3f6 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll.mirage @@ -0,0 +1,144 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +4 +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 6553600 0.000000e+00 2.758103e+04 7.024890e+02 7.033162e+06 1.941076e+11 255 +ff82dda0 14745600 0.000000e+00 9.143755e+04 1.725750e+03 1.234407e+07 1.129114e+12 135 +2c1922b7 1638400 0.000000e+00 3.516018e+03 1.528455e+02 1.613852e+06 5.685057e+09 459 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 6553600 0.000000e+00 2.131008e+03 3.294125e+02 6.755295e+05 1.473957e+09 317 +ff82dda0 14745600 0.000000e+00 7.209283e+03 1.090675e+03 1.564414e+06 1.153644e+10 217 +2c1922b7 1638400 0.000000e+00 6.237527e+02 1.148972e+02 1.210080e+05 7.804013e+07 194 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 6553600 0.000000e+00 2.163459e+03 3.374464e+02 5.538454e+05 1.227372e+09 256 +ff82dda0 14745600 0.000000e+00 6.895326e+03 1.111793e+03 1.234263e+06 8.731908e+09 179 +2c1922b7 1638400 0.000000e+00 6.290993e+02 1.019490e+02 1.333690e+05 8.610581e+07 212 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 6553600 0.000000e+00 2.119354e+03 3.243594e+02 5.912998e+05 1.282527e+09 279 +ff82dda0 14745600 0.000000e+00 6.998019e+03 1.239620e+03 1.070697e+06 7.727865e+09 153 +2c1922b7 1638400 0.000000e+00 6.140937e+02 1.075567e+02 1.430838e+05 9.056234e+07 233 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll.sirocco b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll.sirocco new file mode 100644 index 0000000..62dfb06 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll.sirocco @@ -0,0 +1,183 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +5 +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb4) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 3.402547e+04 6.005726e+03 7.111323e+06 2.495045e+11 209 +2c1922b7 1638400 0.000000e+00 6.443940e+03 1.476966e+03 1.610985e+05 1.092645e+09 25 +d39bff17 6553600 0.000000e+00 1.041247e+04 1.992240e+03 3.092503e+06 3.337940e+10 297 +0e8bce2b 33177600 0.000000e+00 1.103734e+05 1.699353e+04 1.037510e+07 1.172281e+12 94 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb1) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 3.238292e+03 4.902889e+02 6.768030e+05 2.241926e+09 209 +2c1922b7 1638400 0.000000e+00 5.889641e+02 1.063542e+02 1.272162e+05 7.736903e+07 216 +d39bff17 6553600 0.000000e+00 1.349909e+03 1.936514e+02 2.942801e+05 4.054266e+08 218 +0e8bce2b 33177600 0.000000e+00 7.038455e+03 8.353918e+02 1.182460e+06 8.439938e+09 168 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb2) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 3.179744e+03 4.016259e+02 6.804652e+05 2.198224e+09 214 +2c1922b7 1638400 0.000000e+00 5.796961e+02 1.048897e+02 1.199971e+05 7.183924e+07 207 +d39bff17 6553600 0.000000e+00 1.343917e+03 2.039127e+02 2.244341e+05 3.085646e+08 167 +0e8bce2b 33177600 0.000000e+00 6.913467e+03 8.366528e+02 1.244424e+06 8.729283e+09 180 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb3) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 3.362936e+03 5.457359e+02 6.524096e+05 2.251791e+09 194 +2c1922b7 1638400 0.000000e+00 5.405600e+02 9.344101e+01 1.513568e+05 8.426217e+07 280 +d39bff17 6553600 0.000000e+00 1.275634e+03 1.830051e+02 2.270629e+05 2.956105e+08 178 +0e8bce2b 33177600 0.000000e+00 6.852169e+03 8.897789e+02 8.291125e+05 5.777016e+09 121 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb0) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 3.306190e+03 4.921154e+02 7.009122e+05 2.368690e+09 212 +2c1922b7 1638400 0.000000e+00 5.641572e+02 1.012475e+02 1.376544e+05 8.015997e+07 244 +d39bff17 6553600 0.000000e+00 1.355727e+03 1.656730e+02 2.331851e+05 3.208564e+08 172 +0e8bce2b 33177600 0.000000e+00 6.732998e+03 6.928655e+02 1.144610e+06 7.788266e+09 170 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_atlas.attila b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_atlas.attila new file mode 100644 index 0000000..0e198a0 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_atlas.attila @@ -0,0 +1,145 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +4 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 8.869540e+04 4.010843e+03 1.765039e+07 1.568709e+12 199 +d39bff17 6553600 0.000000e+00 2.736718e+04 1.452565e+03 3.886139e+06 1.066523e+11 142 +2c1922b7 1638400 0.000000e+00 4.006489e+03 3.502972e+02 8.493756e+05 3.429028e+09 212 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 7.250005e+03 1.530886e+03 8.555006e+05 6.478930e+09 118 +d39bff17 6553600 0.000000e+00 2.060505e+03 3.149423e+02 4.265246e+05 8.993882e+08 207 +2c1922b7 1638400 0.000000e+00 5.794447e+02 1.035504e+02 9.155226e+04 5.474365e+07 158 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 6.906255e+03 1.105050e+03 1.042844e+06 7.386541e+09 151 +d39bff17 6553600 0.000000e+00 2.044032e+03 3.248232e+02 3.863220e+05 8.095958e+08 189 +2c1922b7 1638400 0.000000e+00 6.103626e+02 1.085471e+02 1.062031e+05 6.687255e+07 174 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 6.859160e+03 1.144150e+03 1.008296e+06 7.108501e+09 147 +d39bff17 6553600 0.000000e+00 2.022724e+03 3.006626e+02 4.308402e+05 8.907256e+08 213 +2c1922b7 1638400 0.000000e+00 5.771721e+02 9.999833e+01 9.638774e+04 5.730226e+07 167 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_atlas.idgraf b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_atlas.idgraf new file mode 100644 index 0000000..911975c --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_atlas.idgraf @@ -0,0 +1,314 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +9 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 6.925318e+03 8.376976e+02 5.748014e+05 4.038926e+09 83 +d39bff17 6553600 0.000000e+00 2.271937e+03 3.454949e+02 2.340095e+05 5.439496e+08 103 +2c1922b7 1638400 0.000000e+00 7.049814e+02 1.197767e+02 1.254867e+05 9.101946e+07 178 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 7.291615e+03 1.041939e+03 4.593717e+05 3.417957e+09 63 +d39bff17 6553600 0.000000e+00 2.282720e+03 4.096195e+02 3.903452e+05 9.197407e+08 171 +2c1922b7 1638400 0.000000e+00 6.999720e+02 1.145665e+02 1.343946e+05 9.659256e+07 192 + +#################### +# COMB_7 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +5 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda5_impl0 (Comb7) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 7.177388e+03 9.455873e+02 3.947563e+05 2.882497e+09 55 +d39bff17 6553600 0.000000e+00 2.335362e+03 3.317057e+02 2.825788e+05 6.732374e+08 121 +2c1922b7 1638400 0.000000e+00 7.266144e+02 9.381637e+01 4.432348e+04 3.274297e+07 61 + +#################### +# COMB_8 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb8) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 9.210227e+04 5.563000e+02 1.252591e+07 1.153707e+12 136 +d39bff17 6553600 0.000000e+00 2.809162e+04 4.267578e+02 1.573131e+06 4.420199e+10 56 +2c1922b7 1638400 0.000000e+00 3.732094e+03 1.582101e+02 3.993341e+05 1.493031e+09 107 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +7 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda7_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 7.047943e+03 9.923280e+02 4.017327e+05 2.887518e+09 57 +d39bff17 6553600 0.000000e+00 2.358363e+03 2.904964e+02 2.381946e+05 5.702726e+08 101 +2c1922b7 1638400 0.000000e+00 7.376273e+02 1.192099e+02 4.425764e+04 3.349831e+07 60 + +#################### +# COMB_5 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb5) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 7.125894e+03 1.170430e+03 6.769599e+05 4.954085e+09 95 +d39bff17 6553600 0.000000e+00 2.913435e+03 7.837592e+02 2.651226e+05 8.283167e+08 91 +2c1922b7 1638400 0.000000e+00 7.396845e+02 1.557697e+02 7.692719e+04 5.942533e+07 104 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +4 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda4_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 6.906666e+03 1.069281e+03 3.177066e+05 2.246888e+09 46 +d39bff17 6553600 0.000000e+00 2.331985e+03 3.108312e+02 2.914982e+05 6.918465e+08 125 +2c1922b7 1638400 0.000000e+00 7.036069e+02 1.117682e+02 5.277052e+04 3.806661e+07 75 + +#################### +# COMB_6 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +6 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda6_impl0 (Comb6) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 7.634969e+03 1.278868e+03 4.122883e+05 3.236126e+09 54 +d39bff17 6553600 0.000000e+00 2.361692e+03 2.763159e+02 1.747652e+05 4.183915e+08 74 +2c1922b7 1638400 0.000000e+00 7.215132e+02 1.060983e+02 7.287283e+04 5.371565e+07 101 + +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb4) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 7.011366e+03 8.280915e+02 6.871138e+05 4.884809e+09 98 +d39bff17 6553600 0.000000e+00 2.294721e+03 3.366230e+02 4.451759e+05 1.043537e+09 194 +2c1922b7 1638400 0.000000e+00 6.840134e+02 1.166270e+02 1.114942e+05 7.848061e+07 163 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_atlas.mirage b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_atlas.mirage new file mode 100644 index 0000000..cafd3f6 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_atlas.mirage @@ -0,0 +1,144 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +4 +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 6553600 0.000000e+00 2.758103e+04 7.024890e+02 7.033162e+06 1.941076e+11 255 +ff82dda0 14745600 0.000000e+00 9.143755e+04 1.725750e+03 1.234407e+07 1.129114e+12 135 +2c1922b7 1638400 0.000000e+00 3.516018e+03 1.528455e+02 1.613852e+06 5.685057e+09 459 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 6553600 0.000000e+00 2.131008e+03 3.294125e+02 6.755295e+05 1.473957e+09 317 +ff82dda0 14745600 0.000000e+00 7.209283e+03 1.090675e+03 1.564414e+06 1.153644e+10 217 +2c1922b7 1638400 0.000000e+00 6.237527e+02 1.148972e+02 1.210080e+05 7.804013e+07 194 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 6553600 0.000000e+00 2.163459e+03 3.374464e+02 5.538454e+05 1.227372e+09 256 +ff82dda0 14745600 0.000000e+00 6.895326e+03 1.111793e+03 1.234263e+06 8.731908e+09 179 +2c1922b7 1638400 0.000000e+00 6.290993e+02 1.019490e+02 1.333690e+05 8.610581e+07 212 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 6553600 0.000000e+00 2.119354e+03 3.243594e+02 5.912998e+05 1.282527e+09 279 +ff82dda0 14745600 0.000000e+00 6.998019e+03 1.239620e+03 1.070697e+06 7.727865e+09 153 +2c1922b7 1638400 0.000000e+00 6.140937e+02 1.075567e+02 1.430838e+05 9.056234e+07 233 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_atlas.sirocco b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_atlas.sirocco new file mode 100644 index 0000000..62dfb06 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_atlas.sirocco @@ -0,0 +1,183 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +5 +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb4) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 3.402547e+04 6.005726e+03 7.111323e+06 2.495045e+11 209 +2c1922b7 1638400 0.000000e+00 6.443940e+03 1.476966e+03 1.610985e+05 1.092645e+09 25 +d39bff17 6553600 0.000000e+00 1.041247e+04 1.992240e+03 3.092503e+06 3.337940e+10 297 +0e8bce2b 33177600 0.000000e+00 1.103734e+05 1.699353e+04 1.037510e+07 1.172281e+12 94 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb1) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 3.238292e+03 4.902889e+02 6.768030e+05 2.241926e+09 209 +2c1922b7 1638400 0.000000e+00 5.889641e+02 1.063542e+02 1.272162e+05 7.736903e+07 216 +d39bff17 6553600 0.000000e+00 1.349909e+03 1.936514e+02 2.942801e+05 4.054266e+08 218 +0e8bce2b 33177600 0.000000e+00 7.038455e+03 8.353918e+02 1.182460e+06 8.439938e+09 168 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb2) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 3.179744e+03 4.016259e+02 6.804652e+05 2.198224e+09 214 +2c1922b7 1638400 0.000000e+00 5.796961e+02 1.048897e+02 1.199971e+05 7.183924e+07 207 +d39bff17 6553600 0.000000e+00 1.343917e+03 2.039127e+02 2.244341e+05 3.085646e+08 167 +0e8bce2b 33177600 0.000000e+00 6.913467e+03 8.366528e+02 1.244424e+06 8.729283e+09 180 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb3) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 3.362936e+03 5.457359e+02 6.524096e+05 2.251791e+09 194 +2c1922b7 1638400 0.000000e+00 5.405600e+02 9.344101e+01 1.513568e+05 8.426217e+07 280 +d39bff17 6553600 0.000000e+00 1.275634e+03 1.830051e+02 2.270629e+05 2.956105e+08 178 +0e8bce2b 33177600 0.000000e+00 6.852169e+03 8.897789e+02 8.291125e+05 5.777016e+09 121 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb0) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 3.306190e+03 4.921154e+02 7.009122e+05 2.368690e+09 212 +2c1922b7 1638400 0.000000e+00 5.641572e+02 1.012475e+02 1.376544e+05 8.015997e+07 244 +d39bff17 6553600 0.000000e+00 1.355727e+03 1.656730e+02 2.331851e+05 3.208564e+08 172 +0e8bce2b 33177600 0.000000e+00 6.732998e+03 6.928655e+02 1.144610e+06 7.788266e+09 170 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_goto.attila b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_goto.attila new file mode 100644 index 0000000..0e198a0 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_goto.attila @@ -0,0 +1,145 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +4 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 8.869540e+04 4.010843e+03 1.765039e+07 1.568709e+12 199 +d39bff17 6553600 0.000000e+00 2.736718e+04 1.452565e+03 3.886139e+06 1.066523e+11 142 +2c1922b7 1638400 0.000000e+00 4.006489e+03 3.502972e+02 8.493756e+05 3.429028e+09 212 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 7.250005e+03 1.530886e+03 8.555006e+05 6.478930e+09 118 +d39bff17 6553600 0.000000e+00 2.060505e+03 3.149423e+02 4.265246e+05 8.993882e+08 207 +2c1922b7 1638400 0.000000e+00 5.794447e+02 1.035504e+02 9.155226e+04 5.474365e+07 158 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 6.906255e+03 1.105050e+03 1.042844e+06 7.386541e+09 151 +d39bff17 6553600 0.000000e+00 2.044032e+03 3.248232e+02 3.863220e+05 8.095958e+08 189 +2c1922b7 1638400 0.000000e+00 6.103626e+02 1.085471e+02 1.062031e+05 6.687255e+07 174 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 6.859160e+03 1.144150e+03 1.008296e+06 7.108501e+09 147 +d39bff17 6553600 0.000000e+00 2.022724e+03 3.006626e+02 4.308402e+05 8.907256e+08 213 +2c1922b7 1638400 0.000000e+00 5.771721e+02 9.999833e+01 9.638774e+04 5.730226e+07 167 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_goto.idgraf b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_goto.idgraf new file mode 100644 index 0000000..911975c --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_goto.idgraf @@ -0,0 +1,314 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +9 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 6.925318e+03 8.376976e+02 5.748014e+05 4.038926e+09 83 +d39bff17 6553600 0.000000e+00 2.271937e+03 3.454949e+02 2.340095e+05 5.439496e+08 103 +2c1922b7 1638400 0.000000e+00 7.049814e+02 1.197767e+02 1.254867e+05 9.101946e+07 178 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 7.291615e+03 1.041939e+03 4.593717e+05 3.417957e+09 63 +d39bff17 6553600 0.000000e+00 2.282720e+03 4.096195e+02 3.903452e+05 9.197407e+08 171 +2c1922b7 1638400 0.000000e+00 6.999720e+02 1.145665e+02 1.343946e+05 9.659256e+07 192 + +#################### +# COMB_7 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +5 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda5_impl0 (Comb7) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 7.177388e+03 9.455873e+02 3.947563e+05 2.882497e+09 55 +d39bff17 6553600 0.000000e+00 2.335362e+03 3.317057e+02 2.825788e+05 6.732374e+08 121 +2c1922b7 1638400 0.000000e+00 7.266144e+02 9.381637e+01 4.432348e+04 3.274297e+07 61 + +#################### +# COMB_8 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb8) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 9.210227e+04 5.563000e+02 1.252591e+07 1.153707e+12 136 +d39bff17 6553600 0.000000e+00 2.809162e+04 4.267578e+02 1.573131e+06 4.420199e+10 56 +2c1922b7 1638400 0.000000e+00 3.732094e+03 1.582101e+02 3.993341e+05 1.493031e+09 107 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +7 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda7_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 7.047943e+03 9.923280e+02 4.017327e+05 2.887518e+09 57 +d39bff17 6553600 0.000000e+00 2.358363e+03 2.904964e+02 2.381946e+05 5.702726e+08 101 +2c1922b7 1638400 0.000000e+00 7.376273e+02 1.192099e+02 4.425764e+04 3.349831e+07 60 + +#################### +# COMB_5 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb5) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 7.125894e+03 1.170430e+03 6.769599e+05 4.954085e+09 95 +d39bff17 6553600 0.000000e+00 2.913435e+03 7.837592e+02 2.651226e+05 8.283167e+08 91 +2c1922b7 1638400 0.000000e+00 7.396845e+02 1.557697e+02 7.692719e+04 5.942533e+07 104 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +4 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda4_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 6.906666e+03 1.069281e+03 3.177066e+05 2.246888e+09 46 +d39bff17 6553600 0.000000e+00 2.331985e+03 3.108312e+02 2.914982e+05 6.918465e+08 125 +2c1922b7 1638400 0.000000e+00 7.036069e+02 1.117682e+02 5.277052e+04 3.806661e+07 75 + +#################### +# COMB_6 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +6 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda6_impl0 (Comb6) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 7.634969e+03 1.278868e+03 4.122883e+05 3.236126e+09 54 +d39bff17 6553600 0.000000e+00 2.361692e+03 2.763159e+02 1.747652e+05 4.183915e+08 74 +2c1922b7 1638400 0.000000e+00 7.215132e+02 1.060983e+02 7.287283e+04 5.371565e+07 101 + +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb4) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 7.011366e+03 8.280915e+02 6.871138e+05 4.884809e+09 98 +d39bff17 6553600 0.000000e+00 2.294721e+03 3.366230e+02 4.451759e+05 1.043537e+09 194 +2c1922b7 1638400 0.000000e+00 6.840134e+02 1.166270e+02 1.114942e+05 7.848061e+07 163 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_goto.mirage b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_goto.mirage new file mode 100644 index 0000000..cafd3f6 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_goto.mirage @@ -0,0 +1,144 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +4 +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 6553600 0.000000e+00 2.758103e+04 7.024890e+02 7.033162e+06 1.941076e+11 255 +ff82dda0 14745600 0.000000e+00 9.143755e+04 1.725750e+03 1.234407e+07 1.129114e+12 135 +2c1922b7 1638400 0.000000e+00 3.516018e+03 1.528455e+02 1.613852e+06 5.685057e+09 459 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 6553600 0.000000e+00 2.131008e+03 3.294125e+02 6.755295e+05 1.473957e+09 317 +ff82dda0 14745600 0.000000e+00 7.209283e+03 1.090675e+03 1.564414e+06 1.153644e+10 217 +2c1922b7 1638400 0.000000e+00 6.237527e+02 1.148972e+02 1.210080e+05 7.804013e+07 194 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 6553600 0.000000e+00 2.163459e+03 3.374464e+02 5.538454e+05 1.227372e+09 256 +ff82dda0 14745600 0.000000e+00 6.895326e+03 1.111793e+03 1.234263e+06 8.731908e+09 179 +2c1922b7 1638400 0.000000e+00 6.290993e+02 1.019490e+02 1.333690e+05 8.610581e+07 212 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 6553600 0.000000e+00 2.119354e+03 3.243594e+02 5.912998e+05 1.282527e+09 279 +ff82dda0 14745600 0.000000e+00 6.998019e+03 1.239620e+03 1.070697e+06 7.727865e+09 153 +2c1922b7 1638400 0.000000e+00 6.140937e+02 1.075567e+02 1.430838e+05 9.056234e+07 233 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_goto.sirocco b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_goto.sirocco new file mode 100644 index 0000000..62dfb06 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_goto.sirocco @@ -0,0 +1,183 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +5 +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb4) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 3.402547e+04 6.005726e+03 7.111323e+06 2.495045e+11 209 +2c1922b7 1638400 0.000000e+00 6.443940e+03 1.476966e+03 1.610985e+05 1.092645e+09 25 +d39bff17 6553600 0.000000e+00 1.041247e+04 1.992240e+03 3.092503e+06 3.337940e+10 297 +0e8bce2b 33177600 0.000000e+00 1.103734e+05 1.699353e+04 1.037510e+07 1.172281e+12 94 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb1) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 3.238292e+03 4.902889e+02 6.768030e+05 2.241926e+09 209 +2c1922b7 1638400 0.000000e+00 5.889641e+02 1.063542e+02 1.272162e+05 7.736903e+07 216 +d39bff17 6553600 0.000000e+00 1.349909e+03 1.936514e+02 2.942801e+05 4.054266e+08 218 +0e8bce2b 33177600 0.000000e+00 7.038455e+03 8.353918e+02 1.182460e+06 8.439938e+09 168 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb2) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 3.179744e+03 4.016259e+02 6.804652e+05 2.198224e+09 214 +2c1922b7 1638400 0.000000e+00 5.796961e+02 1.048897e+02 1.199971e+05 7.183924e+07 207 +d39bff17 6553600 0.000000e+00 1.343917e+03 2.039127e+02 2.244341e+05 3.085646e+08 167 +0e8bce2b 33177600 0.000000e+00 6.913467e+03 8.366528e+02 1.244424e+06 8.729283e+09 180 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb3) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 3.362936e+03 5.457359e+02 6.524096e+05 2.251791e+09 194 +2c1922b7 1638400 0.000000e+00 5.405600e+02 9.344101e+01 1.513568e+05 8.426217e+07 280 +d39bff17 6553600 0.000000e+00 1.275634e+03 1.830051e+02 2.270629e+05 2.956105e+08 178 +0e8bce2b 33177600 0.000000e+00 6.852169e+03 8.897789e+02 8.291125e+05 5.777016e+09 121 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb0) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 3.306190e+03 4.921154e+02 7.009122e+05 2.368690e+09 212 +2c1922b7 1638400 0.000000e+00 5.641572e+02 1.012475e+02 1.376544e+05 8.015997e+07 244 +d39bff17 6553600 0.000000e+00 1.355727e+03 1.656730e+02 2.331851e+05 3.208564e+08 172 +0e8bce2b 33177600 0.000000e+00 6.732998e+03 6.928655e+02 1.144610e+06 7.788266e+09 170 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_openblas.attila b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_openblas.attila new file mode 100644 index 0000000..0e198a0 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_openblas.attila @@ -0,0 +1,145 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +4 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 8.869540e+04 4.010843e+03 1.765039e+07 1.568709e+12 199 +d39bff17 6553600 0.000000e+00 2.736718e+04 1.452565e+03 3.886139e+06 1.066523e+11 142 +2c1922b7 1638400 0.000000e+00 4.006489e+03 3.502972e+02 8.493756e+05 3.429028e+09 212 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 7.250005e+03 1.530886e+03 8.555006e+05 6.478930e+09 118 +d39bff17 6553600 0.000000e+00 2.060505e+03 3.149423e+02 4.265246e+05 8.993882e+08 207 +2c1922b7 1638400 0.000000e+00 5.794447e+02 1.035504e+02 9.155226e+04 5.474365e+07 158 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 6.906255e+03 1.105050e+03 1.042844e+06 7.386541e+09 151 +d39bff17 6553600 0.000000e+00 2.044032e+03 3.248232e+02 3.863220e+05 8.095958e+08 189 +2c1922b7 1638400 0.000000e+00 6.103626e+02 1.085471e+02 1.062031e+05 6.687255e+07 174 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 6.859160e+03 1.144150e+03 1.008296e+06 7.108501e+09 147 +d39bff17 6553600 0.000000e+00 2.022724e+03 3.006626e+02 4.308402e+05 8.907256e+08 213 +2c1922b7 1638400 0.000000e+00 5.771721e+02 9.999833e+01 9.638774e+04 5.730226e+07 167 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_openblas.idgraf b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_openblas.idgraf new file mode 100644 index 0000000..911975c --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_openblas.idgraf @@ -0,0 +1,314 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +9 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 6.925318e+03 8.376976e+02 5.748014e+05 4.038926e+09 83 +d39bff17 6553600 0.000000e+00 2.271937e+03 3.454949e+02 2.340095e+05 5.439496e+08 103 +2c1922b7 1638400 0.000000e+00 7.049814e+02 1.197767e+02 1.254867e+05 9.101946e+07 178 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 7.291615e+03 1.041939e+03 4.593717e+05 3.417957e+09 63 +d39bff17 6553600 0.000000e+00 2.282720e+03 4.096195e+02 3.903452e+05 9.197407e+08 171 +2c1922b7 1638400 0.000000e+00 6.999720e+02 1.145665e+02 1.343946e+05 9.659256e+07 192 + +#################### +# COMB_7 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +5 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda5_impl0 (Comb7) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 7.177388e+03 9.455873e+02 3.947563e+05 2.882497e+09 55 +d39bff17 6553600 0.000000e+00 2.335362e+03 3.317057e+02 2.825788e+05 6.732374e+08 121 +2c1922b7 1638400 0.000000e+00 7.266144e+02 9.381637e+01 4.432348e+04 3.274297e+07 61 + +#################### +# COMB_8 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb8) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 9.210227e+04 5.563000e+02 1.252591e+07 1.153707e+12 136 +d39bff17 6553600 0.000000e+00 2.809162e+04 4.267578e+02 1.573131e+06 4.420199e+10 56 +2c1922b7 1638400 0.000000e+00 3.732094e+03 1.582101e+02 3.993341e+05 1.493031e+09 107 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +7 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda7_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 7.047943e+03 9.923280e+02 4.017327e+05 2.887518e+09 57 +d39bff17 6553600 0.000000e+00 2.358363e+03 2.904964e+02 2.381946e+05 5.702726e+08 101 +2c1922b7 1638400 0.000000e+00 7.376273e+02 1.192099e+02 4.425764e+04 3.349831e+07 60 + +#################### +# COMB_5 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb5) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 7.125894e+03 1.170430e+03 6.769599e+05 4.954085e+09 95 +d39bff17 6553600 0.000000e+00 2.913435e+03 7.837592e+02 2.651226e+05 8.283167e+08 91 +2c1922b7 1638400 0.000000e+00 7.396845e+02 1.557697e+02 7.692719e+04 5.942533e+07 104 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +4 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda4_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 6.906666e+03 1.069281e+03 3.177066e+05 2.246888e+09 46 +d39bff17 6553600 0.000000e+00 2.331985e+03 3.108312e+02 2.914982e+05 6.918465e+08 125 +2c1922b7 1638400 0.000000e+00 7.036069e+02 1.117682e+02 5.277052e+04 3.806661e+07 75 + +#################### +# COMB_6 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +6 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda6_impl0 (Comb6) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 7.634969e+03 1.278868e+03 4.122883e+05 3.236126e+09 54 +d39bff17 6553600 0.000000e+00 2.361692e+03 2.763159e+02 1.747652e+05 4.183915e+08 74 +2c1922b7 1638400 0.000000e+00 7.215132e+02 1.060983e+02 7.287283e+04 5.371565e+07 101 + +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb4) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 7.011366e+03 8.280915e+02 6.871138e+05 4.884809e+09 98 +d39bff17 6553600 0.000000e+00 2.294721e+03 3.366230e+02 4.451759e+05 1.043537e+09 194 +2c1922b7 1638400 0.000000e+00 6.840134e+02 1.166270e+02 1.114942e+05 7.848061e+07 163 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_openblas.mirage b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_openblas.mirage new file mode 100644 index 0000000..cafd3f6 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_openblas.mirage @@ -0,0 +1,144 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +4 +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 6553600 0.000000e+00 2.758103e+04 7.024890e+02 7.033162e+06 1.941076e+11 255 +ff82dda0 14745600 0.000000e+00 9.143755e+04 1.725750e+03 1.234407e+07 1.129114e+12 135 +2c1922b7 1638400 0.000000e+00 3.516018e+03 1.528455e+02 1.613852e+06 5.685057e+09 459 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 6553600 0.000000e+00 2.131008e+03 3.294125e+02 6.755295e+05 1.473957e+09 317 +ff82dda0 14745600 0.000000e+00 7.209283e+03 1.090675e+03 1.564414e+06 1.153644e+10 217 +2c1922b7 1638400 0.000000e+00 6.237527e+02 1.148972e+02 1.210080e+05 7.804013e+07 194 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 6553600 0.000000e+00 2.163459e+03 3.374464e+02 5.538454e+05 1.227372e+09 256 +ff82dda0 14745600 0.000000e+00 6.895326e+03 1.111793e+03 1.234263e+06 8.731908e+09 179 +2c1922b7 1638400 0.000000e+00 6.290993e+02 1.019490e+02 1.333690e+05 8.610581e+07 212 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 6553600 0.000000e+00 2.119354e+03 3.243594e+02 5.912998e+05 1.282527e+09 279 +ff82dda0 14745600 0.000000e+00 6.998019e+03 1.239620e+03 1.070697e+06 7.727865e+09 153 +2c1922b7 1638400 0.000000e+00 6.140937e+02 1.075567e+02 1.430838e+05 9.056234e+07 233 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_openblas.sirocco b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_openblas.sirocco new file mode 100644 index 0000000..62dfb06 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_openblas.sirocco @@ -0,0 +1,183 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +5 +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb4) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 3.402547e+04 6.005726e+03 7.111323e+06 2.495045e+11 209 +2c1922b7 1638400 0.000000e+00 6.443940e+03 1.476966e+03 1.610985e+05 1.092645e+09 25 +d39bff17 6553600 0.000000e+00 1.041247e+04 1.992240e+03 3.092503e+06 3.337940e+10 297 +0e8bce2b 33177600 0.000000e+00 1.103734e+05 1.699353e+04 1.037510e+07 1.172281e+12 94 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb1) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 3.238292e+03 4.902889e+02 6.768030e+05 2.241926e+09 209 +2c1922b7 1638400 0.000000e+00 5.889641e+02 1.063542e+02 1.272162e+05 7.736903e+07 216 +d39bff17 6553600 0.000000e+00 1.349909e+03 1.936514e+02 2.942801e+05 4.054266e+08 218 +0e8bce2b 33177600 0.000000e+00 7.038455e+03 8.353918e+02 1.182460e+06 8.439938e+09 168 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb2) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 3.179744e+03 4.016259e+02 6.804652e+05 2.198224e+09 214 +2c1922b7 1638400 0.000000e+00 5.796961e+02 1.048897e+02 1.199971e+05 7.183924e+07 207 +d39bff17 6553600 0.000000e+00 1.343917e+03 2.039127e+02 2.244341e+05 3.085646e+08 167 +0e8bce2b 33177600 0.000000e+00 6.913467e+03 8.366528e+02 1.244424e+06 8.729283e+09 180 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb3) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 3.362936e+03 5.457359e+02 6.524096e+05 2.251791e+09 194 +2c1922b7 1638400 0.000000e+00 5.405600e+02 9.344101e+01 1.513568e+05 8.426217e+07 280 +d39bff17 6553600 0.000000e+00 1.275634e+03 1.830051e+02 2.270629e+05 2.956105e+08 178 +0e8bce2b 33177600 0.000000e+00 6.852169e+03 8.897789e+02 8.291125e+05 5.777016e+09 121 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb0) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 3.306190e+03 4.921154e+02 7.009122e+05 2.368690e+09 212 +2c1922b7 1638400 0.000000e+00 5.641572e+02 1.012475e+02 1.376544e+05 8.015997e+07 244 +d39bff17 6553600 0.000000e+00 1.355727e+03 1.656730e+02 2.331851e+05 3.208564e+08 172 +0e8bce2b 33177600 0.000000e+00 6.732998e+03 6.928655e+02 1.144610e+06 7.788266e+09 170 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru.attila b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru.attila new file mode 100644 index 0000000..9b8b222 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru.attila @@ -0,0 +1,145 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +4 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 8.795690e+04 4.598673e+03 1.715160e+07 1.512725e+12 195 +d39bff17 6553600 0.000000e+00 2.744119e+04 1.740624e+03 4.390591e+06 1.209678e+11 160 +2c1922b7 1638400 0.000000e+00 4.091631e+03 5.062810e+02 7.283104e+05 3.025603e+09 178 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 6.847320e+03 1.168001e+03 8.285257e+05 5.838253e+09 121 +d39bff17 6553600 0.000000e+00 2.123746e+03 3.153004e+02 4.226254e+05 9.173322e+08 199 +2c1922b7 1638400 0.000000e+00 5.446913e+02 9.530021e+01 1.040360e+05 5.840221e+07 191 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 6.816836e+03 1.179433e+03 1.172496e+06 8.231973e+09 172 +d39bff17 6553600 0.000000e+00 2.055421e+03 3.154533e+02 3.864192e+05 8.129623e+08 188 +2c1922b7 1638400 0.000000e+00 6.121868e+02 1.314710e+02 9.550115e+04 6.116094e+07 156 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 6.940618e+03 1.147691e+03 1.006390e+06 7.175959e+09 145 +d39bff17 6553600 0.000000e+00 2.093041e+03 3.377347e+02 3.851195e+05 8.270585e+08 184 +2c1922b7 1638400 0.000000e+00 6.098259e+02 1.286153e+02 1.091588e+05 6.952888e+07 179 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru.idgraf b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru.idgraf new file mode 100644 index 0000000..5a63d6a --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru.idgraf @@ -0,0 +1,314 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +9 +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb4) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 6.700359e+03 1.036459e+03 3.886208e+05 2.666205e+09 58 +d39bff17 6553600 0.000000e+00 2.067623e+03 3.658691e+02 3.825102e+05 8.156510e+08 185 +2c1922b7 1638400 0.000000e+00 6.344928e+02 1.313164e+02 1.091328e+05 7.220992e+07 172 + +#################### +# COMB_5 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb5) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 6.634729e+03 1.380283e+03 4.777005e+05 3.306586e+09 72 +d39bff17 6553600 0.000000e+00 2.102108e+03 3.770829e+02 2.690698e+05 5.838144e+08 128 +2c1922b7 1638400 0.000000e+00 6.251127e+02 1.334964e+02 1.168961e+05 7.640580e+07 187 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +4 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda4_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 5.973111e+03 7.873858e+02 4.420102e+05 2.686054e+09 74 +d39bff17 6553600 0.000000e+00 2.088129e+03 3.411148e+02 2.129891e+05 4.566174e+08 102 +2c1922b7 1638400 0.000000e+00 5.816119e+02 1.098601e+02 6.165086e+04 3.713622e+07 106 + +#################### +# COMB_6 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +6 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda6_impl0 (Comb6) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 5.813439e+03 5.835403e+02 2.441645e+05 1.433737e+09 42 +d39bff17 6553600 0.000000e+00 2.170079e+03 5.032568e+02 7.161259e+04 1.637628e+08 33 +2c1922b7 1638400 0.000000e+00 6.080488e+02 1.225789e+02 3.101049e+04 1.962219e+07 51 + +#################### +# COMB_8 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb8) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 9.133611e+04 7.141260e+02 1.032098e+07 9.427358e+11 113 +d39bff17 6553600 0.000000e+00 2.797330e+04 6.068477e+02 1.482585e+06 4.149232e+10 53 +2c1922b7 1638400 0.000000e+00 3.803279e+03 2.345034e+02 3.308852e+05 1.263233e+09 87 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 6.609495e+03 1.035460e+03 4.296172e+05 2.909244e+09 65 +d39bff17 6553600 0.000000e+00 2.129873e+03 3.868465e+02 3.407797e+05 7.497615e+08 160 +2c1922b7 1638400 0.000000e+00 6.443548e+02 1.239934e+02 8.054435e+04 5.382094e+07 125 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +7 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda7_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 5.938773e+03 5.045720e+02 2.078570e+05 1.243326e+09 35 +d39bff17 6553600 0.000000e+00 2.180034e+03 4.239424e+02 1.286220e+05 2.910041e+08 59 +2c1922b7 1638400 0.000000e+00 5.996256e+02 1.220514e+02 5.816368e+04 3.632139e+07 97 + +#################### +# COMB_7 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +5 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda5_impl0 (Comb7) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 6.467618e+03 9.651621e+02 2.910428e+05 1.924273e+09 45 +d39bff17 6553600 0.000000e+00 2.057931e+03 3.333471e+02 1.872717e+05 3.955042e+08 91 +2c1922b7 1638400 0.000000e+00 6.141799e+02 1.365857e+02 5.159111e+04 3.325329e+07 84 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 6.429538e+03 9.929716e+02 5.015040e+05 3.301346e+09 78 +d39bff17 6553600 0.000000e+00 2.056349e+03 3.356881e+02 4.565094e+05 9.637588e+08 222 +2c1922b7 1638400 0.000000e+00 6.374873e+02 1.360140e+02 9.498561e+04 6.330859e+07 149 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru.mirage b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru.mirage new file mode 100644 index 0000000..dd00181 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru.mirage @@ -0,0 +1,144 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +4 +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 6553600 0.000000e+00 2.762618e+04 9.983022e+02 6.575032e+06 1.818802e+11 238 +ff82dda0 14745600 0.000000e+00 9.068591e+04 1.985325e+03 1.324014e+07 1.201270e+12 146 +2c1922b7 1638400 0.000000e+00 3.579286e+03 2.361748e+02 1.449611e+06 5.211162e+09 405 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 6553600 0.000000e+00 2.106536e+03 3.670134e+02 5.856171e+05 1.271070e+09 278 +ff82dda0 14745600 0.000000e+00 7.299043e+03 1.073594e+03 1.284632e+06 9.579441e+09 176 +2c1922b7 1638400 0.000000e+00 6.166979e+02 1.186269e+02 1.362902e+05 8.715990e+07 221 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 6553600 0.000000e+00 2.072463e+03 3.421274e+02 6.590431e+05 1.403064e+09 318 +ff82dda0 14745600 0.000000e+00 6.791353e+03 1.183637e+03 1.195278e+06 8.364131e+09 176 +2c1922b7 1638400 0.000000e+00 6.128580e+02 1.104622e+02 1.378931e+05 8.725430e+07 225 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 6553600 0.000000e+00 2.129169e+03 3.327197e+02 5.024838e+05 1.095999e+09 236 +ff82dda0 14745600 0.000000e+00 6.700018e+03 1.125184e+03 1.139003e+06 7.846568e+09 170 +2c1922b7 1638400 0.000000e+00 6.207819e+02 1.142635e+02 1.440214e+05 9.243491e+07 232 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru.sirocco b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru.sirocco new file mode 100644 index 0000000..b2d32ed --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru.sirocco @@ -0,0 +1,183 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +5 +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb4) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 4.935132e+04 1.056942e+04 1.875350e+06 9.679609e+10 38 +2c1922b7 1638400 0.000000e+00 1.386830e+03 2.636504e+02 1.256468e+06 1.805486e+09 906 +d39bff17 6553600 0.000000e+00 1.762919e+04 3.396463e+03 6.170218e+05 1.128135e+10 35 +0e8bce2b 33177600 0.000000e+00 2.090375e+05 4.666688e+04 3.762675e+06 8.257406e+11 18 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb0) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 2.580226e+03 3.710017e+02 5.495882e+05 1.447380e+09 213 +2c1922b7 1638400 0.000000e+00 3.441326e+02 6.695097e+01 4.267244e+04 1.524080e+07 124 +d39bff17 6553600 0.000000e+00 9.089165e+02 1.570596e+02 1.590604e+05 1.488895e+08 175 +0e8bce2b 33177600 0.000000e+00 5.659456e+03 7.119452e+02 1.058318e+06 6.084289e+09 187 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb2) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 2.485190e+03 3.288259e+02 6.188122e+05 1.564789e+09 249 +2c1922b7 1638400 0.000000e+00 3.565977e+02 7.541526e+01 3.672956e+04 1.368348e+07 103 +d39bff17 6553600 0.000000e+00 9.441529e+02 1.527244e+02 2.398148e+05 2.323464e+08 254 +0e8bce2b 33177600 0.000000e+00 5.713813e+03 7.827526e+02 8.456444e+05 4.922534e+09 148 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb3) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 2.615067e+03 4.428563e+02 5.334737e+05 1.435078e+09 204 +2c1922b7 1638400 0.000000e+00 3.667814e+02 6.561130e+01 3.227676e+04 1.221734e+07 88 +d39bff17 6553600 0.000000e+00 9.018562e+02 1.587421e+02 1.470026e+05 1.366826e+08 163 +0e8bce2b 33177600 0.000000e+00 5.604694e+03 6.527594e+02 1.132148e+06 6.431415e+09 202 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb1) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 2.480709e+03 3.749281e+02 4.514890e+05 1.145597e+09 182 +2c1922b7 1638400 0.000000e+00 3.718262e+02 7.249781e+01 2.342505e+04 9.041172e+06 63 +d39bff17 6553600 0.000000e+00 9.130900e+02 1.739240e+02 2.182285e+05 2.064919e+08 239 +0e8bce2b 33177600 0.000000e+00 5.804941e+03 6.806052e+02 9.113758e+05 5.363209e+09 157 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_atlas.attila b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_atlas.attila new file mode 100644 index 0000000..9b8b222 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_atlas.attila @@ -0,0 +1,145 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +4 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 8.795690e+04 4.598673e+03 1.715160e+07 1.512725e+12 195 +d39bff17 6553600 0.000000e+00 2.744119e+04 1.740624e+03 4.390591e+06 1.209678e+11 160 +2c1922b7 1638400 0.000000e+00 4.091631e+03 5.062810e+02 7.283104e+05 3.025603e+09 178 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 6.847320e+03 1.168001e+03 8.285257e+05 5.838253e+09 121 +d39bff17 6553600 0.000000e+00 2.123746e+03 3.153004e+02 4.226254e+05 9.173322e+08 199 +2c1922b7 1638400 0.000000e+00 5.446913e+02 9.530021e+01 1.040360e+05 5.840221e+07 191 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 6.816836e+03 1.179433e+03 1.172496e+06 8.231973e+09 172 +d39bff17 6553600 0.000000e+00 2.055421e+03 3.154533e+02 3.864192e+05 8.129623e+08 188 +2c1922b7 1638400 0.000000e+00 6.121868e+02 1.314710e+02 9.550115e+04 6.116094e+07 156 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 6.940618e+03 1.147691e+03 1.006390e+06 7.175959e+09 145 +d39bff17 6553600 0.000000e+00 2.093041e+03 3.377347e+02 3.851195e+05 8.270585e+08 184 +2c1922b7 1638400 0.000000e+00 6.098259e+02 1.286153e+02 1.091588e+05 6.952888e+07 179 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_atlas.idgraf b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_atlas.idgraf new file mode 100644 index 0000000..5a63d6a --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_atlas.idgraf @@ -0,0 +1,314 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +9 +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb4) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 6.700359e+03 1.036459e+03 3.886208e+05 2.666205e+09 58 +d39bff17 6553600 0.000000e+00 2.067623e+03 3.658691e+02 3.825102e+05 8.156510e+08 185 +2c1922b7 1638400 0.000000e+00 6.344928e+02 1.313164e+02 1.091328e+05 7.220992e+07 172 + +#################### +# COMB_5 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb5) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 6.634729e+03 1.380283e+03 4.777005e+05 3.306586e+09 72 +d39bff17 6553600 0.000000e+00 2.102108e+03 3.770829e+02 2.690698e+05 5.838144e+08 128 +2c1922b7 1638400 0.000000e+00 6.251127e+02 1.334964e+02 1.168961e+05 7.640580e+07 187 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +4 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda4_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 5.973111e+03 7.873858e+02 4.420102e+05 2.686054e+09 74 +d39bff17 6553600 0.000000e+00 2.088129e+03 3.411148e+02 2.129891e+05 4.566174e+08 102 +2c1922b7 1638400 0.000000e+00 5.816119e+02 1.098601e+02 6.165086e+04 3.713622e+07 106 + +#################### +# COMB_6 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +6 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda6_impl0 (Comb6) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 5.813439e+03 5.835403e+02 2.441645e+05 1.433737e+09 42 +d39bff17 6553600 0.000000e+00 2.170079e+03 5.032568e+02 7.161259e+04 1.637628e+08 33 +2c1922b7 1638400 0.000000e+00 6.080488e+02 1.225789e+02 3.101049e+04 1.962219e+07 51 + +#################### +# COMB_8 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb8) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 9.133611e+04 7.141260e+02 1.032098e+07 9.427358e+11 113 +d39bff17 6553600 0.000000e+00 2.797330e+04 6.068477e+02 1.482585e+06 4.149232e+10 53 +2c1922b7 1638400 0.000000e+00 3.803279e+03 2.345034e+02 3.308852e+05 1.263233e+09 87 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 6.609495e+03 1.035460e+03 4.296172e+05 2.909244e+09 65 +d39bff17 6553600 0.000000e+00 2.129873e+03 3.868465e+02 3.407797e+05 7.497615e+08 160 +2c1922b7 1638400 0.000000e+00 6.443548e+02 1.239934e+02 8.054435e+04 5.382094e+07 125 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +7 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda7_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 5.938773e+03 5.045720e+02 2.078570e+05 1.243326e+09 35 +d39bff17 6553600 0.000000e+00 2.180034e+03 4.239424e+02 1.286220e+05 2.910041e+08 59 +2c1922b7 1638400 0.000000e+00 5.996256e+02 1.220514e+02 5.816368e+04 3.632139e+07 97 + +#################### +# COMB_7 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +5 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda5_impl0 (Comb7) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 6.467618e+03 9.651621e+02 2.910428e+05 1.924273e+09 45 +d39bff17 6553600 0.000000e+00 2.057931e+03 3.333471e+02 1.872717e+05 3.955042e+08 91 +2c1922b7 1638400 0.000000e+00 6.141799e+02 1.365857e+02 5.159111e+04 3.325329e+07 84 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 6.429538e+03 9.929716e+02 5.015040e+05 3.301346e+09 78 +d39bff17 6553600 0.000000e+00 2.056349e+03 3.356881e+02 4.565094e+05 9.637588e+08 222 +2c1922b7 1638400 0.000000e+00 6.374873e+02 1.360140e+02 9.498561e+04 6.330859e+07 149 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_atlas.mirage b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_atlas.mirage new file mode 100644 index 0000000..dd00181 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_atlas.mirage @@ -0,0 +1,144 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +4 +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 6553600 0.000000e+00 2.762618e+04 9.983022e+02 6.575032e+06 1.818802e+11 238 +ff82dda0 14745600 0.000000e+00 9.068591e+04 1.985325e+03 1.324014e+07 1.201270e+12 146 +2c1922b7 1638400 0.000000e+00 3.579286e+03 2.361748e+02 1.449611e+06 5.211162e+09 405 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 6553600 0.000000e+00 2.106536e+03 3.670134e+02 5.856171e+05 1.271070e+09 278 +ff82dda0 14745600 0.000000e+00 7.299043e+03 1.073594e+03 1.284632e+06 9.579441e+09 176 +2c1922b7 1638400 0.000000e+00 6.166979e+02 1.186269e+02 1.362902e+05 8.715990e+07 221 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 6553600 0.000000e+00 2.072463e+03 3.421274e+02 6.590431e+05 1.403064e+09 318 +ff82dda0 14745600 0.000000e+00 6.791353e+03 1.183637e+03 1.195278e+06 8.364131e+09 176 +2c1922b7 1638400 0.000000e+00 6.128580e+02 1.104622e+02 1.378931e+05 8.725430e+07 225 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 6553600 0.000000e+00 2.129169e+03 3.327197e+02 5.024838e+05 1.095999e+09 236 +ff82dda0 14745600 0.000000e+00 6.700018e+03 1.125184e+03 1.139003e+06 7.846568e+09 170 +2c1922b7 1638400 0.000000e+00 6.207819e+02 1.142635e+02 1.440214e+05 9.243491e+07 232 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_atlas.sirocco b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_atlas.sirocco new file mode 100644 index 0000000..b2d32ed --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_atlas.sirocco @@ -0,0 +1,183 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +5 +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb4) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 4.935132e+04 1.056942e+04 1.875350e+06 9.679609e+10 38 +2c1922b7 1638400 0.000000e+00 1.386830e+03 2.636504e+02 1.256468e+06 1.805486e+09 906 +d39bff17 6553600 0.000000e+00 1.762919e+04 3.396463e+03 6.170218e+05 1.128135e+10 35 +0e8bce2b 33177600 0.000000e+00 2.090375e+05 4.666688e+04 3.762675e+06 8.257406e+11 18 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb0) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 2.580226e+03 3.710017e+02 5.495882e+05 1.447380e+09 213 +2c1922b7 1638400 0.000000e+00 3.441326e+02 6.695097e+01 4.267244e+04 1.524080e+07 124 +d39bff17 6553600 0.000000e+00 9.089165e+02 1.570596e+02 1.590604e+05 1.488895e+08 175 +0e8bce2b 33177600 0.000000e+00 5.659456e+03 7.119452e+02 1.058318e+06 6.084289e+09 187 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb2) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 2.485190e+03 3.288259e+02 6.188122e+05 1.564789e+09 249 +2c1922b7 1638400 0.000000e+00 3.565977e+02 7.541526e+01 3.672956e+04 1.368348e+07 103 +d39bff17 6553600 0.000000e+00 9.441529e+02 1.527244e+02 2.398148e+05 2.323464e+08 254 +0e8bce2b 33177600 0.000000e+00 5.713813e+03 7.827526e+02 8.456444e+05 4.922534e+09 148 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb3) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 2.615067e+03 4.428563e+02 5.334737e+05 1.435078e+09 204 +2c1922b7 1638400 0.000000e+00 3.667814e+02 6.561130e+01 3.227676e+04 1.221734e+07 88 +d39bff17 6553600 0.000000e+00 9.018562e+02 1.587421e+02 1.470026e+05 1.366826e+08 163 +0e8bce2b 33177600 0.000000e+00 5.604694e+03 6.527594e+02 1.132148e+06 6.431415e+09 202 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb1) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 2.480709e+03 3.749281e+02 4.514890e+05 1.145597e+09 182 +2c1922b7 1638400 0.000000e+00 3.718262e+02 7.249781e+01 2.342505e+04 9.041172e+06 63 +d39bff17 6553600 0.000000e+00 9.130900e+02 1.739240e+02 2.182285e+05 2.064919e+08 239 +0e8bce2b 33177600 0.000000e+00 5.804941e+03 6.806052e+02 9.113758e+05 5.363209e+09 157 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_goto.attila b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_goto.attila new file mode 100644 index 0000000..9b8b222 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_goto.attila @@ -0,0 +1,145 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +4 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 8.795690e+04 4.598673e+03 1.715160e+07 1.512725e+12 195 +d39bff17 6553600 0.000000e+00 2.744119e+04 1.740624e+03 4.390591e+06 1.209678e+11 160 +2c1922b7 1638400 0.000000e+00 4.091631e+03 5.062810e+02 7.283104e+05 3.025603e+09 178 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 6.847320e+03 1.168001e+03 8.285257e+05 5.838253e+09 121 +d39bff17 6553600 0.000000e+00 2.123746e+03 3.153004e+02 4.226254e+05 9.173322e+08 199 +2c1922b7 1638400 0.000000e+00 5.446913e+02 9.530021e+01 1.040360e+05 5.840221e+07 191 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 6.816836e+03 1.179433e+03 1.172496e+06 8.231973e+09 172 +d39bff17 6553600 0.000000e+00 2.055421e+03 3.154533e+02 3.864192e+05 8.129623e+08 188 +2c1922b7 1638400 0.000000e+00 6.121868e+02 1.314710e+02 9.550115e+04 6.116094e+07 156 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 6.940618e+03 1.147691e+03 1.006390e+06 7.175959e+09 145 +d39bff17 6553600 0.000000e+00 2.093041e+03 3.377347e+02 3.851195e+05 8.270585e+08 184 +2c1922b7 1638400 0.000000e+00 6.098259e+02 1.286153e+02 1.091588e+05 6.952888e+07 179 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_goto.idgraf b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_goto.idgraf new file mode 100644 index 0000000..5a63d6a --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_goto.idgraf @@ -0,0 +1,314 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +9 +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb4) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 6.700359e+03 1.036459e+03 3.886208e+05 2.666205e+09 58 +d39bff17 6553600 0.000000e+00 2.067623e+03 3.658691e+02 3.825102e+05 8.156510e+08 185 +2c1922b7 1638400 0.000000e+00 6.344928e+02 1.313164e+02 1.091328e+05 7.220992e+07 172 + +#################### +# COMB_5 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb5) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 6.634729e+03 1.380283e+03 4.777005e+05 3.306586e+09 72 +d39bff17 6553600 0.000000e+00 2.102108e+03 3.770829e+02 2.690698e+05 5.838144e+08 128 +2c1922b7 1638400 0.000000e+00 6.251127e+02 1.334964e+02 1.168961e+05 7.640580e+07 187 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +4 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda4_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 5.973111e+03 7.873858e+02 4.420102e+05 2.686054e+09 74 +d39bff17 6553600 0.000000e+00 2.088129e+03 3.411148e+02 2.129891e+05 4.566174e+08 102 +2c1922b7 1638400 0.000000e+00 5.816119e+02 1.098601e+02 6.165086e+04 3.713622e+07 106 + +#################### +# COMB_6 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +6 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda6_impl0 (Comb6) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 5.813439e+03 5.835403e+02 2.441645e+05 1.433737e+09 42 +d39bff17 6553600 0.000000e+00 2.170079e+03 5.032568e+02 7.161259e+04 1.637628e+08 33 +2c1922b7 1638400 0.000000e+00 6.080488e+02 1.225789e+02 3.101049e+04 1.962219e+07 51 + +#################### +# COMB_8 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb8) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 9.133611e+04 7.141260e+02 1.032098e+07 9.427358e+11 113 +d39bff17 6553600 0.000000e+00 2.797330e+04 6.068477e+02 1.482585e+06 4.149232e+10 53 +2c1922b7 1638400 0.000000e+00 3.803279e+03 2.345034e+02 3.308852e+05 1.263233e+09 87 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 6.609495e+03 1.035460e+03 4.296172e+05 2.909244e+09 65 +d39bff17 6553600 0.000000e+00 2.129873e+03 3.868465e+02 3.407797e+05 7.497615e+08 160 +2c1922b7 1638400 0.000000e+00 6.443548e+02 1.239934e+02 8.054435e+04 5.382094e+07 125 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +7 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda7_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 5.938773e+03 5.045720e+02 2.078570e+05 1.243326e+09 35 +d39bff17 6553600 0.000000e+00 2.180034e+03 4.239424e+02 1.286220e+05 2.910041e+08 59 +2c1922b7 1638400 0.000000e+00 5.996256e+02 1.220514e+02 5.816368e+04 3.632139e+07 97 + +#################### +# COMB_7 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +5 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda5_impl0 (Comb7) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 6.467618e+03 9.651621e+02 2.910428e+05 1.924273e+09 45 +d39bff17 6553600 0.000000e+00 2.057931e+03 3.333471e+02 1.872717e+05 3.955042e+08 91 +2c1922b7 1638400 0.000000e+00 6.141799e+02 1.365857e+02 5.159111e+04 3.325329e+07 84 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 6.429538e+03 9.929716e+02 5.015040e+05 3.301346e+09 78 +d39bff17 6553600 0.000000e+00 2.056349e+03 3.356881e+02 4.565094e+05 9.637588e+08 222 +2c1922b7 1638400 0.000000e+00 6.374873e+02 1.360140e+02 9.498561e+04 6.330859e+07 149 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_goto.mirage b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_goto.mirage new file mode 100644 index 0000000..dd00181 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_goto.mirage @@ -0,0 +1,144 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +4 +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 6553600 0.000000e+00 2.762618e+04 9.983022e+02 6.575032e+06 1.818802e+11 238 +ff82dda0 14745600 0.000000e+00 9.068591e+04 1.985325e+03 1.324014e+07 1.201270e+12 146 +2c1922b7 1638400 0.000000e+00 3.579286e+03 2.361748e+02 1.449611e+06 5.211162e+09 405 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 6553600 0.000000e+00 2.106536e+03 3.670134e+02 5.856171e+05 1.271070e+09 278 +ff82dda0 14745600 0.000000e+00 7.299043e+03 1.073594e+03 1.284632e+06 9.579441e+09 176 +2c1922b7 1638400 0.000000e+00 6.166979e+02 1.186269e+02 1.362902e+05 8.715990e+07 221 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 6553600 0.000000e+00 2.072463e+03 3.421274e+02 6.590431e+05 1.403064e+09 318 +ff82dda0 14745600 0.000000e+00 6.791353e+03 1.183637e+03 1.195278e+06 8.364131e+09 176 +2c1922b7 1638400 0.000000e+00 6.128580e+02 1.104622e+02 1.378931e+05 8.725430e+07 225 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 6553600 0.000000e+00 2.129169e+03 3.327197e+02 5.024838e+05 1.095999e+09 236 +ff82dda0 14745600 0.000000e+00 6.700018e+03 1.125184e+03 1.139003e+06 7.846568e+09 170 +2c1922b7 1638400 0.000000e+00 6.207819e+02 1.142635e+02 1.440214e+05 9.243491e+07 232 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_goto.sirocco b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_goto.sirocco new file mode 100644 index 0000000..b2d32ed --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_goto.sirocco @@ -0,0 +1,183 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +5 +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb4) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 4.935132e+04 1.056942e+04 1.875350e+06 9.679609e+10 38 +2c1922b7 1638400 0.000000e+00 1.386830e+03 2.636504e+02 1.256468e+06 1.805486e+09 906 +d39bff17 6553600 0.000000e+00 1.762919e+04 3.396463e+03 6.170218e+05 1.128135e+10 35 +0e8bce2b 33177600 0.000000e+00 2.090375e+05 4.666688e+04 3.762675e+06 8.257406e+11 18 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb0) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 2.580226e+03 3.710017e+02 5.495882e+05 1.447380e+09 213 +2c1922b7 1638400 0.000000e+00 3.441326e+02 6.695097e+01 4.267244e+04 1.524080e+07 124 +d39bff17 6553600 0.000000e+00 9.089165e+02 1.570596e+02 1.590604e+05 1.488895e+08 175 +0e8bce2b 33177600 0.000000e+00 5.659456e+03 7.119452e+02 1.058318e+06 6.084289e+09 187 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb2) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 2.485190e+03 3.288259e+02 6.188122e+05 1.564789e+09 249 +2c1922b7 1638400 0.000000e+00 3.565977e+02 7.541526e+01 3.672956e+04 1.368348e+07 103 +d39bff17 6553600 0.000000e+00 9.441529e+02 1.527244e+02 2.398148e+05 2.323464e+08 254 +0e8bce2b 33177600 0.000000e+00 5.713813e+03 7.827526e+02 8.456444e+05 4.922534e+09 148 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb3) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 2.615067e+03 4.428563e+02 5.334737e+05 1.435078e+09 204 +2c1922b7 1638400 0.000000e+00 3.667814e+02 6.561130e+01 3.227676e+04 1.221734e+07 88 +d39bff17 6553600 0.000000e+00 9.018562e+02 1.587421e+02 1.470026e+05 1.366826e+08 163 +0e8bce2b 33177600 0.000000e+00 5.604694e+03 6.527594e+02 1.132148e+06 6.431415e+09 202 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb1) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 2.480709e+03 3.749281e+02 4.514890e+05 1.145597e+09 182 +2c1922b7 1638400 0.000000e+00 3.718262e+02 7.249781e+01 2.342505e+04 9.041172e+06 63 +d39bff17 6553600 0.000000e+00 9.130900e+02 1.739240e+02 2.182285e+05 2.064919e+08 239 +0e8bce2b 33177600 0.000000e+00 5.804941e+03 6.806052e+02 9.113758e+05 5.363209e+09 157 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_openblas.attila b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_openblas.attila new file mode 100644 index 0000000..9b8b222 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_openblas.attila @@ -0,0 +1,145 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +4 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 8.795690e+04 4.598673e+03 1.715160e+07 1.512725e+12 195 +d39bff17 6553600 0.000000e+00 2.744119e+04 1.740624e+03 4.390591e+06 1.209678e+11 160 +2c1922b7 1638400 0.000000e+00 4.091631e+03 5.062810e+02 7.283104e+05 3.025603e+09 178 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 6.847320e+03 1.168001e+03 8.285257e+05 5.838253e+09 121 +d39bff17 6553600 0.000000e+00 2.123746e+03 3.153004e+02 4.226254e+05 9.173322e+08 199 +2c1922b7 1638400 0.000000e+00 5.446913e+02 9.530021e+01 1.040360e+05 5.840221e+07 191 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 6.816836e+03 1.179433e+03 1.172496e+06 8.231973e+09 172 +d39bff17 6553600 0.000000e+00 2.055421e+03 3.154533e+02 3.864192e+05 8.129623e+08 188 +2c1922b7 1638400 0.000000e+00 6.121868e+02 1.314710e+02 9.550115e+04 6.116094e+07 156 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 6.940618e+03 1.147691e+03 1.006390e+06 7.175959e+09 145 +d39bff17 6553600 0.000000e+00 2.093041e+03 3.377347e+02 3.851195e+05 8.270585e+08 184 +2c1922b7 1638400 0.000000e+00 6.098259e+02 1.286153e+02 1.091588e+05 6.952888e+07 179 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_openblas.idgraf b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_openblas.idgraf new file mode 100644 index 0000000..5a63d6a --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_openblas.idgraf @@ -0,0 +1,314 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +9 +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb4) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 6.700359e+03 1.036459e+03 3.886208e+05 2.666205e+09 58 +d39bff17 6553600 0.000000e+00 2.067623e+03 3.658691e+02 3.825102e+05 8.156510e+08 185 +2c1922b7 1638400 0.000000e+00 6.344928e+02 1.313164e+02 1.091328e+05 7.220992e+07 172 + +#################### +# COMB_5 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb5) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 6.634729e+03 1.380283e+03 4.777005e+05 3.306586e+09 72 +d39bff17 6553600 0.000000e+00 2.102108e+03 3.770829e+02 2.690698e+05 5.838144e+08 128 +2c1922b7 1638400 0.000000e+00 6.251127e+02 1.334964e+02 1.168961e+05 7.640580e+07 187 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +4 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda4_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 5.973111e+03 7.873858e+02 4.420102e+05 2.686054e+09 74 +d39bff17 6553600 0.000000e+00 2.088129e+03 3.411148e+02 2.129891e+05 4.566174e+08 102 +2c1922b7 1638400 0.000000e+00 5.816119e+02 1.098601e+02 6.165086e+04 3.713622e+07 106 + +#################### +# COMB_6 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +6 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda6_impl0 (Comb6) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 5.813439e+03 5.835403e+02 2.441645e+05 1.433737e+09 42 +d39bff17 6553600 0.000000e+00 2.170079e+03 5.032568e+02 7.161259e+04 1.637628e+08 33 +2c1922b7 1638400 0.000000e+00 6.080488e+02 1.225789e+02 3.101049e+04 1.962219e+07 51 + +#################### +# COMB_8 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb8) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 9.133611e+04 7.141260e+02 1.032098e+07 9.427358e+11 113 +d39bff17 6553600 0.000000e+00 2.797330e+04 6.068477e+02 1.482585e+06 4.149232e+10 53 +2c1922b7 1638400 0.000000e+00 3.803279e+03 2.345034e+02 3.308852e+05 1.263233e+09 87 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 6.609495e+03 1.035460e+03 4.296172e+05 2.909244e+09 65 +d39bff17 6553600 0.000000e+00 2.129873e+03 3.868465e+02 3.407797e+05 7.497615e+08 160 +2c1922b7 1638400 0.000000e+00 6.443548e+02 1.239934e+02 8.054435e+04 5.382094e+07 125 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +7 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda7_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 5.938773e+03 5.045720e+02 2.078570e+05 1.243326e+09 35 +d39bff17 6553600 0.000000e+00 2.180034e+03 4.239424e+02 1.286220e+05 2.910041e+08 59 +2c1922b7 1638400 0.000000e+00 5.996256e+02 1.220514e+02 5.816368e+04 3.632139e+07 97 + +#################### +# COMB_7 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +5 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda5_impl0 (Comb7) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 6.467618e+03 9.651621e+02 2.910428e+05 1.924273e+09 45 +d39bff17 6553600 0.000000e+00 2.057931e+03 3.333471e+02 1.872717e+05 3.955042e+08 91 +2c1922b7 1638400 0.000000e+00 6.141799e+02 1.365857e+02 5.159111e+04 3.325329e+07 84 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 6.429538e+03 9.929716e+02 5.015040e+05 3.301346e+09 78 +d39bff17 6553600 0.000000e+00 2.056349e+03 3.356881e+02 4.565094e+05 9.637588e+08 222 +2c1922b7 1638400 0.000000e+00 6.374873e+02 1.360140e+02 9.498561e+04 6.330859e+07 149 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_openblas.mirage b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_openblas.mirage new file mode 100644 index 0000000..dd00181 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_openblas.mirage @@ -0,0 +1,144 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +4 +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 6553600 0.000000e+00 2.762618e+04 9.983022e+02 6.575032e+06 1.818802e+11 238 +ff82dda0 14745600 0.000000e+00 9.068591e+04 1.985325e+03 1.324014e+07 1.201270e+12 146 +2c1922b7 1638400 0.000000e+00 3.579286e+03 2.361748e+02 1.449611e+06 5.211162e+09 405 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 6553600 0.000000e+00 2.106536e+03 3.670134e+02 5.856171e+05 1.271070e+09 278 +ff82dda0 14745600 0.000000e+00 7.299043e+03 1.073594e+03 1.284632e+06 9.579441e+09 176 +2c1922b7 1638400 0.000000e+00 6.166979e+02 1.186269e+02 1.362902e+05 8.715990e+07 221 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 6553600 0.000000e+00 2.072463e+03 3.421274e+02 6.590431e+05 1.403064e+09 318 +ff82dda0 14745600 0.000000e+00 6.791353e+03 1.183637e+03 1.195278e+06 8.364131e+09 176 +2c1922b7 1638400 0.000000e+00 6.128580e+02 1.104622e+02 1.378931e+05 8.725430e+07 225 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 6553600 0.000000e+00 2.129169e+03 3.327197e+02 5.024838e+05 1.095999e+09 236 +ff82dda0 14745600 0.000000e+00 6.700018e+03 1.125184e+03 1.139003e+06 7.846568e+09 170 +2c1922b7 1638400 0.000000e+00 6.207819e+02 1.142635e+02 1.440214e+05 9.243491e+07 232 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_openblas.sirocco b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_openblas.sirocco new file mode 100644 index 0000000..b2d32ed --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_openblas.sirocco @@ -0,0 +1,183 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +5 +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb4) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 4.935132e+04 1.056942e+04 1.875350e+06 9.679609e+10 38 +2c1922b7 1638400 0.000000e+00 1.386830e+03 2.636504e+02 1.256468e+06 1.805486e+09 906 +d39bff17 6553600 0.000000e+00 1.762919e+04 3.396463e+03 6.170218e+05 1.128135e+10 35 +0e8bce2b 33177600 0.000000e+00 2.090375e+05 4.666688e+04 3.762675e+06 8.257406e+11 18 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb0) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 2.580226e+03 3.710017e+02 5.495882e+05 1.447380e+09 213 +2c1922b7 1638400 0.000000e+00 3.441326e+02 6.695097e+01 4.267244e+04 1.524080e+07 124 +d39bff17 6553600 0.000000e+00 9.089165e+02 1.570596e+02 1.590604e+05 1.488895e+08 175 +0e8bce2b 33177600 0.000000e+00 5.659456e+03 7.119452e+02 1.058318e+06 6.084289e+09 187 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb2) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 2.485190e+03 3.288259e+02 6.188122e+05 1.564789e+09 249 +2c1922b7 1638400 0.000000e+00 3.565977e+02 7.541526e+01 3.672956e+04 1.368348e+07 103 +d39bff17 6553600 0.000000e+00 9.441529e+02 1.527244e+02 2.398148e+05 2.323464e+08 254 +0e8bce2b 33177600 0.000000e+00 5.713813e+03 7.827526e+02 8.456444e+05 4.922534e+09 148 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb3) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 2.615067e+03 4.428563e+02 5.334737e+05 1.435078e+09 204 +2c1922b7 1638400 0.000000e+00 3.667814e+02 6.561130e+01 3.227676e+04 1.221734e+07 88 +d39bff17 6553600 0.000000e+00 9.018562e+02 1.587421e+02 1.470026e+05 1.366826e+08 163 +0e8bce2b 33177600 0.000000e+00 5.604694e+03 6.527594e+02 1.132148e+06 6.431415e+09 202 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb1) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 14745600 0.000000e+00 2.480709e+03 3.749281e+02 4.514890e+05 1.145597e+09 182 +2c1922b7 1638400 0.000000e+00 3.718262e+02 7.249781e+01 2.342505e+04 9.041172e+06 63 +d39bff17 6553600 0.000000e+00 9.130900e+02 1.739240e+02 2.182285e+05 2.064919e+08 239 +0e8bce2b 33177600 0.000000e+00 5.804941e+03 6.806052e+02 9.113758e+05 5.363209e+09 157 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_sgemm_gemm.attila b/tools/perfmodels/sampling/codelets/45/starpu_sgemm_gemm.attila new file mode 100644 index 0000000..a9a9d84 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_sgemm_gemm.attila @@ -0,0 +1,165 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +4 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb3) +# number of entries +8 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +492beed5 33177600 7.077888e+09 3.328725e+05 1.185902e+04 2.563119e+07 8.542747e+12 77 +9c6670ef 29491200 7.077888e+09 3.328725e+05 1.185902e+04 2.563119e+07 8.542747e+12 77 +c00cf6b7 29491200 7.077888e+09 3.328725e+05 1.185902e+04 2.563119e+07 8.542747e+12 77 +78a2cc08 29491200 7.077888e+09 3.328725e+05 1.185902e+04 2.563119e+07 8.542747e+12 77 +a7cdf15b 44236800 1.415578e+10 6.657450e+05 2.371804e+04 5.126238e+07 3.417099e+13 77 +24c84a50 11059200 1.769472e+09 8.321812e+04 2.964755e+03 6.407798e+06 5.339217e+11 77 +0b0b0ce8 3686400 2.621440e+08 1.421718e+04 3.409134e+02 9.098993e+05 1.294364e+10 64 +4220e23d 14745600 2.097152e+09 1.008105e+05 2.361630e+03 8.064841e+06 8.134670e+11 80 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb0) +# number of entries +8 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +492beed5 33177600 7.077888e+09 1.123499e+04 6.785566e+01 1.190909e+06 1.338033e+10 106 +9c6670ef 29491200 7.077888e+09 1.123499e+04 6.785566e+01 1.190909e+06 1.338033e+10 106 +c00cf6b7 29491200 7.077888e+09 1.123499e+04 6.785566e+01 1.190909e+06 1.338033e+10 106 +78a2cc08 29491200 7.077888e+09 1.123499e+04 6.785566e+01 1.190909e+06 1.338033e+10 106 +a7cdf15b 44236800 1.415578e+10 2.246998e+04 1.357113e+02 2.381818e+06 5.352132e+10 106 +24c84a50 11059200 1.769472e+09 2.808747e+03 1.696392e+01 2.977272e+05 8.362706e+08 106 +0b0b0ce8 3686400 2.621440e+08 6.738679e+02 4.393713e+01 6.873452e+04 4.651489e+07 102 +4220e23d 14745600 2.097152e+09 5.557425e+03 3.241733e+02 5.835297e+05 3.253957e+09 105 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +8 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +492beed5 33177600 7.077888e+09 1.123077e+04 9.504466e+01 1.179231e+06 1.324463e+10 105 +9c6670ef 29491200 7.077888e+09 1.123077e+04 9.504466e+01 1.179231e+06 1.324463e+10 105 +c00cf6b7 29491200 7.077888e+09 1.123077e+04 9.504466e+01 1.179231e+06 1.324463e+10 105 +78a2cc08 29491200 7.077888e+09 1.123077e+04 9.504466e+01 1.179231e+06 1.324463e+10 105 +a7cdf15b 44236800 1.415578e+10 2.246154e+04 1.900893e+02 2.358462e+06 5.297852e+10 105 +24c84a50 11059200 1.769472e+09 2.807693e+03 2.376116e+01 2.948078e+05 8.277894e+08 105 +0b0b0ce8 3686400 2.621440e+08 6.672056e+02 3.376608e+01 6.805497e+04 4.552295e+07 102 +4220e23d 14745600 2.097152e+09 5.553764e+03 3.500896e+02 5.831453e+05 3.251521e+09 105 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb1) +# number of entries +8 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +492beed5 33177600 7.077888e+09 1.124174e+04 2.629960e+01 1.180383e+06 1.326963e+10 105 +9c6670ef 29491200 7.077888e+09 1.124174e+04 2.629960e+01 1.180383e+06 1.326963e+10 105 +c00cf6b7 29491200 7.077888e+09 1.124174e+04 2.629960e+01 1.180383e+06 1.326963e+10 105 +78a2cc08 29491200 7.077888e+09 1.124174e+04 2.629960e+01 1.180383e+06 1.326963e+10 105 +a7cdf15b 44236800 1.415578e+10 2.248348e+04 5.259920e+01 2.360766e+06 5.307852e+10 105 +24c84a50 11059200 1.769472e+09 2.810435e+03 6.574900e+00 2.950958e+05 8.293519e+08 105 +0b0b0ce8 3686400 2.621440e+08 6.002221e+02 2.259043e+01 6.242310e+04 3.752080e+07 104 +4220e23d 14745600 2.097152e+09 5.577722e+03 1.615194e+02 5.912385e+05 3.300529e+09 106 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_sgemm_gemm.idgraf b/tools/perfmodels/sampling/codelets/45/starpu_sgemm_gemm.idgraf new file mode 100644 index 0000000..3b86b0e --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_sgemm_gemm.idgraf @@ -0,0 +1,359 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +9 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb2) +# number of entries +8 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +0b0b0ce8 3686400 2.621440e+08 6.801013e+02 7.013561e+01 4.760709e+04 3.272198e+07 70 +4220e23d 14745600 2.097152e+09 5.623635e+03 5.419920e+02 4.442672e+05 2.521603e+09 79 +492beed5 33177600 7.077888e+09 1.150361e+04 5.884814e+02 1.000814e+06 1.154310e+10 87 +9c6670ef 29491200 7.077888e+09 1.150361e+04 5.884814e+02 1.000814e+06 1.154310e+10 87 +c00cf6b7 29491200 7.077888e+09 1.150361e+04 5.884814e+02 1.000814e+06 1.154310e+10 87 +78a2cc08 29491200 7.077888e+09 1.150361e+04 5.884814e+02 1.000814e+06 1.154310e+10 87 +a7cdf15b 44236800 1.415578e+10 2.300722e+04 1.176963e+03 2.001628e+06 4.617240e+10 87 +24c84a50 11059200 1.769472e+09 2.875903e+03 1.471204e+02 2.502035e+05 7.214438e+08 87 + +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb4) +# number of entries +8 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +0b0b0ce8 3686400 2.621440e+08 6.717051e+02 6.137607e+01 4.500424e+04 3.048197e+07 67 +4220e23d 14745600 2.097152e+09 5.648275e+03 4.677390e+02 4.575103e+05 2.601865e+09 81 +492beed5 33177600 7.077888e+09 1.157020e+04 6.521027e+02 1.018178e+06 1.181795e+10 88 +9c6670ef 29491200 7.077888e+09 1.157020e+04 6.521027e+02 1.018178e+06 1.181795e+10 88 +c00cf6b7 29491200 7.077888e+09 1.157020e+04 6.521027e+02 1.018178e+06 1.181795e+10 88 +78a2cc08 29491200 7.077888e+09 1.157020e+04 6.521027e+02 1.018178e+06 1.181795e+10 88 +a7cdf15b 44236800 1.415578e+10 2.314040e+04 1.304205e+03 2.036356e+06 4.727180e+10 88 +24c84a50 11059200 1.769472e+09 2.892550e+03 1.630257e+02 2.545445e+05 7.386219e+08 88 + +#################### +# COMB_6 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb6) +# number of entries +8 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +0b0b0ce8 3686400 2.621440e+08 6.265559e+02 5.536840e+01 4.824481e+04 3.046412e+07 77 +4220e23d 14745600 2.097152e+09 5.631203e+03 4.767455e+02 4.561275e+05 2.586957e+09 81 +492beed5 33177600 7.077888e+09 1.162826e+04 6.757302e+02 1.023286e+06 1.193922e+10 88 +9c6670ef 29491200 7.077888e+09 1.162826e+04 6.757302e+02 1.023286e+06 1.193922e+10 88 +c00cf6b7 29491200 7.077888e+09 1.162826e+04 6.757302e+02 1.023286e+06 1.193922e+10 88 +78a2cc08 29491200 7.077888e+09 1.162826e+04 6.757302e+02 1.023286e+06 1.193922e+10 88 +a7cdf15b 44236800 1.415578e+10 2.325652e+04 1.351460e+03 2.046572e+06 4.775688e+10 88 +24c84a50 11059200 1.769472e+09 2.907065e+03 1.689325e+02 2.558215e+05 7.462012e+08 88 + +#################### +# COMB_7 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb7) +# number of entries +8 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +0b0b0ce8 3686400 2.621440e+08 6.780899e+02 4.241206e+01 4.543202e+04 3.092751e+07 67 +4220e23d 14745600 2.097152e+09 5.857201e+03 8.346836e+02 4.744333e+05 2.835284e+09 81 +492beed5 33177600 7.077888e+09 1.150498e+04 4.254093e+02 9.894285e+05 1.139892e+10 86 +9c6670ef 29491200 7.077888e+09 1.150498e+04 4.254093e+02 9.894285e+05 1.139892e+10 86 +c00cf6b7 29491200 7.077888e+09 1.150498e+04 4.254093e+02 9.894285e+05 1.139892e+10 86 +78a2cc08 29491200 7.077888e+09 1.150498e+04 4.254093e+02 9.894285e+05 1.139892e+10 86 +a7cdf15b 44236800 1.415578e+10 2.300996e+04 8.508186e+02 1.978857e+06 4.559568e+10 86 +24c84a50 11059200 1.769472e+09 2.876245e+03 1.063523e+02 2.473571e+05 7.124325e+08 86 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +4 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda4_impl0 (Comb0) +# number of entries +8 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +0b0b0ce8 3686400 2.621440e+08 6.759139e+02 4.092799e+01 4.190666e+04 2.842915e+07 62 +4220e23d 14745600 2.097152e+09 5.527477e+03 2.733928e+02 4.421982e+05 2.450220e+09 80 +492beed5 33177600 7.077888e+09 1.146770e+04 1.768909e+02 1.100899e+06 1.262778e+10 96 +9c6670ef 29491200 7.077888e+09 1.146770e+04 1.768909e+02 1.100899e+06 1.262778e+10 96 +c00cf6b7 29491200 7.077888e+09 1.146770e+04 1.768909e+02 1.100899e+06 1.262778e+10 96 +78a2cc08 29491200 7.077888e+09 1.146770e+04 1.768909e+02 1.100899e+06 1.262778e+10 96 +a7cdf15b 44236800 1.415578e+10 2.293540e+04 3.537818e+02 2.201798e+06 5.051112e+10 96 +24c84a50 11059200 1.769472e+09 2.866925e+03 4.422272e+01 2.752248e+05 7.892362e+08 96 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +5 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda5_impl0 (Comb1) +# number of entries +8 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +0b0b0ce8 3686400 2.621440e+08 6.339465e+02 7.125158e+01 4.184047e+04 2.685969e+07 66 +4220e23d 14745600 2.097152e+09 5.624130e+03 4.755864e+02 4.668028e+05 2.644133e+09 83 +492beed5 33177600 7.077888e+09 1.149102e+04 5.375188e+02 1.114629e+06 1.283625e+10 97 +9c6670ef 29491200 7.077888e+09 1.149102e+04 5.375188e+02 1.114629e+06 1.283625e+10 97 +c00cf6b7 29491200 7.077888e+09 1.149102e+04 5.375188e+02 1.114629e+06 1.283625e+10 97 +78a2cc08 29491200 7.077888e+09 1.149102e+04 5.375188e+02 1.114629e+06 1.283625e+10 97 +a7cdf15b 44236800 1.415578e+10 2.298204e+04 1.075038e+03 2.229258e+06 5.134500e+10 97 +24c84a50 11059200 1.769472e+09 2.872755e+03 1.343797e+02 2.786572e+05 8.022656e+08 97 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +6 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda6_impl0 (Comb3) +# number of entries +8 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +0b0b0ce8 3686400 2.621440e+08 6.389750e+02 8.615382e+01 4.728415e+04 3.076266e+07 74 +4220e23d 14745600 2.097152e+09 5.648331e+03 5.220897e+02 4.631632e+05 2.638450e+09 82 +492beed5 33177600 7.077888e+09 1.155069e+04 5.660846e+02 1.108866e+06 1.283893e+10 96 +9c6670ef 29491200 7.077888e+09 1.155069e+04 5.660846e+02 1.108866e+06 1.283893e+10 96 +c00cf6b7 29491200 7.077888e+09 1.155069e+04 5.660846e+02 1.108866e+06 1.283893e+10 96 +78a2cc08 29491200 7.077888e+09 1.155069e+04 5.660846e+02 1.108866e+06 1.283893e+10 96 +a7cdf15b 44236800 1.415578e+10 2.310138e+04 1.132169e+03 2.217732e+06 5.135572e+10 96 +24c84a50 11059200 1.769472e+09 2.887673e+03 1.415212e+02 2.772165e+05 8.024331e+08 96 + +#################### +# COMB_5 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +7 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda7_impl0 (Comb5) +# number of entries +8 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +0b0b0ce8 3686400 2.621440e+08 6.386625e+02 8.094896e+01 4.342905e+04 2.818209e+07 68 +4220e23d 14745600 2.097152e+09 5.638657e+03 3.709019e+02 4.454539e+05 2.522630e+09 79 +492beed5 33177600 7.077888e+09 1.144012e+04 2.531108e+02 1.109691e+06 1.270122e+10 97 +9c6670ef 29491200 7.077888e+09 1.144012e+04 2.531108e+02 1.109691e+06 1.270122e+10 97 +c00cf6b7 29491200 7.077888e+09 1.144012e+04 2.531108e+02 1.109691e+06 1.270122e+10 97 +78a2cc08 29491200 7.077888e+09 1.144012e+04 2.531108e+02 1.109691e+06 1.270122e+10 97 +a7cdf15b 44236800 1.415578e+10 2.288024e+04 5.062216e+02 2.219382e+06 5.080488e+10 97 +24c84a50 11059200 1.769472e+09 2.860030e+03 6.327770e+01 2.774228e+05 7.938262e+08 97 + +#################### +# COMB_8 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb8) +# number of entries +8 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +0b0b0ce8 3686400 2.621440e+08 1.414338e+04 6.441210e+02 3.535844e+05 5.011251e+09 25 +4220e23d 14745600 2.097152e+09 1.091117e+05 2.701159e+03 3.382462e+06 3.692924e+11 31 +492beed5 33177600 7.077888e+09 3.621356e+05 7.764608e+03 8.329119e+06 3.017657e+12 23 +9c6670ef 29491200 7.077888e+09 3.621356e+05 7.764608e+03 8.329119e+06 3.017657e+12 23 +c00cf6b7 29491200 7.077888e+09 3.621356e+05 7.764608e+03 8.329119e+06 3.017657e+12 23 +78a2cc08 29491200 7.077888e+09 3.621356e+05 7.764608e+03 8.329119e+06 3.017657e+12 23 +a7cdf15b 44236800 1.415578e+10 7.242712e+05 1.552922e+04 1.665824e+07 1.207063e+13 23 +24c84a50 11059200 1.769472e+09 9.053390e+04 1.941152e+03 2.082280e+06 1.886036e+11 23 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_sgemm_gemm.mirage b/tools/perfmodels/sampling/codelets/45/starpu_sgemm_gemm.mirage new file mode 100644 index 0000000..da97b05 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_sgemm_gemm.mirage @@ -0,0 +1,165 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +4 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb3) +# number of entries +8 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +0b0b0ce8 3686400 2.621440e+08 1.352609e+04 3.616534e+02 1.082087e+06 1.464687e+10 80 +492beed5 33177600 7.077888e+09 3.550396e+05 8.949994e+03 2.840317e+07 1.009066e+13 80 +9c6670ef 29491200 7.077888e+09 3.550396e+05 8.949994e+03 2.840317e+07 1.009066e+13 80 +c00cf6b7 29491200 7.077888e+09 3.550396e+05 8.949994e+03 2.840317e+07 1.009066e+13 80 +78a2cc08 29491200 7.077888e+09 3.550396e+05 8.949994e+03 2.840317e+07 1.009066e+13 80 +a7cdf15b 44236800 1.415578e+10 7.100792e+05 1.789999e+04 5.680634e+07 4.036264e+13 80 +24c84a50 11059200 1.769472e+09 8.875990e+04 2.237499e+03 7.100792e+06 6.306662e+11 80 +4220e23d 14745600 2.097152e+09 1.078112e+05 1.983800e+03 8.624897e+06 9.301755e+11 80 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +8 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +0b0b0ce8 3686400 2.621440e+08 6.589631e+02 8.406511e+00 6.787320e+04 4.473321e+07 103 +492beed5 33177600 7.077888e+09 1.151398e+04 9.050114e+01 1.220482e+06 1.405348e+10 106 +9c6670ef 29491200 7.077888e+09 1.151398e+04 9.050114e+01 1.220482e+06 1.405348e+10 106 +c00cf6b7 29491200 7.077888e+09 1.151398e+04 9.050114e+01 1.220482e+06 1.405348e+10 106 +78a2cc08 29491200 7.077888e+09 1.151398e+04 9.050114e+01 1.220482e+06 1.405348e+10 106 +a7cdf15b 44236800 1.415578e+10 2.302796e+04 1.810023e+02 2.440964e+06 5.621392e+10 106 +24c84a50 11059200 1.769472e+09 2.878495e+03 2.262529e+01 3.051205e+05 8.783425e+08 106 +4220e23d 14745600 2.097152e+09 5.574713e+03 3.353004e+02 5.909196e+05 3.306125e+09 106 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb0) +# number of entries +8 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +0b0b0ce8 3686400 2.621440e+08 6.663664e+02 8.616537e+01 6.796937e+04 4.604980e+07 102 +492beed5 33177600 7.077888e+09 1.150036e+04 8.404527e+01 1.207538e+06 1.388786e+10 105 +9c6670ef 29491200 7.077888e+09 1.150036e+04 8.404527e+01 1.207538e+06 1.388786e+10 105 +c00cf6b7 29491200 7.077888e+09 1.150036e+04 8.404527e+01 1.207538e+06 1.388786e+10 105 +78a2cc08 29491200 7.077888e+09 1.150036e+04 8.404527e+01 1.207538e+06 1.388786e+10 105 +a7cdf15b 44236800 1.415578e+10 2.300072e+04 1.680905e+02 2.415076e+06 5.555144e+10 105 +24c84a50 11059200 1.769472e+09 2.875090e+03 2.101132e+01 3.018845e+05 8.679912e+08 105 +4220e23d 14745600 2.097152e+09 5.579034e+03 3.672012e+02 5.857985e+05 3.282348e+09 105 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb2) +# number of entries +8 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +0b0b0ce8 3686400 2.621440e+08 6.181769e+02 5.174143e+01 6.181769e+04 3.848198e+07 100 +492beed5 33177600 7.077888e+09 1.148096e+04 7.289415e+01 1.205501e+06 1.384086e+10 105 +9c6670ef 29491200 7.077888e+09 1.148096e+04 7.289415e+01 1.205501e+06 1.384086e+10 105 +c00cf6b7 29491200 7.077888e+09 1.148096e+04 7.289415e+01 1.205501e+06 1.384086e+10 105 +78a2cc08 29491200 7.077888e+09 1.148096e+04 7.289415e+01 1.205501e+06 1.384086e+10 105 +a7cdf15b 44236800 1.415578e+10 2.296192e+04 1.457883e+02 2.411002e+06 5.536344e+10 105 +24c84a50 11059200 1.769472e+09 2.870240e+03 1.822354e+01 3.013752e+05 8.650538e+08 105 +4220e23d 14745600 2.097152e+09 5.580581e+03 3.970717e+02 5.859610e+05 3.286558e+09 105 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_sgemm_gemm.sirocco b/tools/perfmodels/sampling/codelets/45/starpu_sgemm_gemm.sirocco new file mode 100644 index 0000000..d438efe --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_sgemm_gemm.sirocco @@ -0,0 +1,208 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +5 +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb2) +# number of entries +9 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +492beed5 33177600 7.077888e+09 2.745578e+03 3.064191e+02 6.616844e+05 1.839335e+09 241 +9c6670ef 29491200 7.077888e+09 2.745578e+03 3.064191e+02 6.616844e+05 1.839335e+09 241 +c00cf6b7 29491200 7.077888e+09 2.745578e+03 3.064191e+02 6.616844e+05 1.839335e+09 241 +78a2cc08 29491200 7.077888e+09 2.745578e+03 3.064191e+02 6.616844e+05 1.839335e+09 241 +a7cdf15b 44236800 1.415578e+10 5.491156e+03 6.128382e+02 1.323369e+06 7.357340e+09 241 +24c84a50 11059200 1.769472e+09 6.863945e+02 7.660478e+01 1.654211e+05 1.149584e+08 241 +0b0b0ce8 3686400 2.621440e+08 1.582927e+02 3.333442e+01 3.434951e+04 5.678402e+06 217 +4220e23d 14745600 2.097152e+09 8.206871e+02 1.017181e+02 1.148962e+05 9.574235e+07 140 +87a7dc42 74649600 2.388787e+10 9.813897e+03 7.998509e+02 1.570224e+06 1.551237e+10 160 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb1) +# number of entries +9 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +492beed5 33177600 7.077888e+09 2.686428e+03 2.002215e+02 6.716071e+05 1.814247e+09 250 +9c6670ef 29491200 7.077888e+09 2.686428e+03 2.002215e+02 6.716071e+05 1.814247e+09 250 +c00cf6b7 29491200 7.077888e+09 2.686428e+03 2.002215e+02 6.716071e+05 1.814247e+09 250 +78a2cc08 29491200 7.077888e+09 2.686428e+03 2.002215e+02 6.716071e+05 1.814247e+09 250 +a7cdf15b 44236800 1.415578e+10 5.372856e+03 4.004430e+02 1.343214e+06 7.256988e+09 251 +24c84a50 11059200 1.769472e+09 6.716070e+02 5.005537e+01 1.679018e+05 1.133904e+08 250 +0b0b0ce8 3686400 2.621440e+08 1.630480e+02 3.438768e+01 3.097912e+04 5.275762e+06 190 +4220e23d 14745600 2.097152e+09 8.448030e+02 7.773742e+01 2.433033e+05 2.072837e+08 288 +87a7dc42 74649600 2.388787e+10 9.873153e+03 8.026227e+02 1.579704e+06 1.569974e+10 160 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb0) +# number of entries +9 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +492beed5 33177600 7.077888e+09 2.791098e+03 3.147711e+02 6.503258e+05 1.838209e+09 233 +9c6670ef 29491200 7.077888e+09 2.791098e+03 3.147711e+02 6.503258e+05 1.838209e+09 233 +c00cf6b7 29491200 7.077888e+09 2.791098e+03 3.147711e+02 6.503258e+05 1.838209e+09 233 +78a2cc08 29491200 7.077888e+09 2.791098e+03 3.147711e+02 6.503258e+05 1.838209e+09 233 +a7cdf15b 44236800 1.415578e+10 5.582196e+03 6.295422e+02 1.300652e+06 7.352836e+09 233 +24c84a50 11059200 1.769472e+09 6.977745e+02 7.869277e+01 1.625815e+05 1.148881e+08 233 +0b0b0ce8 3686400 2.621440e+08 1.624855e+02 3.298013e+01 2.940987e+04 4.975550e+06 181 +4220e23d 14745600 2.097152e+09 8.152506e+02 1.017614e+02 1.173961e+05 9.719839e+07 144 +87a7dc42 74649600 2.388787e+10 1.001360e+04 7.827579e+02 1.582149e+06 1.593981e+10 158 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb3) +# number of entries +9 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +492beed5 33177600 7.077888e+09 2.754203e+03 2.682327e+02 6.830422e+05 1.899080e+09 248 +9c6670ef 29491200 7.077888e+09 2.754203e+03 2.682327e+02 6.830422e+05 1.899080e+09 248 +c00cf6b7 29491200 7.077888e+09 2.754203e+03 2.682327e+02 6.830422e+05 1.899080e+09 248 +78a2cc08 29491200 7.077888e+09 2.754203e+03 2.682327e+02 6.830422e+05 1.899080e+09 248 +a7cdf15b 44236800 1.415578e+10 5.508406e+03 5.364654e+02 1.366084e+06 7.596320e+09 248 +24c84a50 11059200 1.769472e+09 6.885507e+02 6.705818e+01 1.707605e+05 1.186925e+08 248 +0b0b0ce8 3686400 2.621440e+08 1.622246e+02 3.553894e+01 3.714942e+04 6.315779e+06 229 +4220e23d 14745600 2.097152e+09 8.611626e+02 9.290485e+01 2.411255e+05 2.100651e+08 280 +87a7dc42 74649600 2.388787e+10 9.935915e+03 7.366769e+02 1.569875e+06 1.568389e+10 158 + +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb4) +# number of entries +9 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +492beed5 33177600 7.077888e+09 1.712078e+05 4.163047e+04 2.773567e+07 5.029326e+12 162 +9c6670ef 29491200 7.077888e+09 1.712078e+05 4.163047e+04 2.773567e+07 5.029326e+12 162 +c00cf6b7 29491200 7.077888e+09 1.712078e+05 4.163047e+04 2.773567e+07 5.029326e+12 162 +78a2cc08 29491200 7.077888e+09 1.712078e+05 4.163047e+04 2.773567e+07 5.029326e+12 162 +a7cdf15b 44236800 1.415578e+10 3.424156e+05 8.326094e+04 5.547134e+07 2.011730e+13 162 +24c84a50 11059200 1.769472e+09 4.280195e+04 1.040762e+04 6.933918e+06 3.143329e+11 162 +0b0b0ce8 3686400 2.621440e+08 6.441655e+03 1.152866e+03 3.220827e+05 2.141201e+09 50 +4220e23d 14745600 2.097152e+09 4.927734e+04 1.166029e+04 5.913281e+06 3.077063e+11 120 +87a7dc42 74649600 2.388787e+10 5.091210e+05 1.022002e+05 6.974957e+07 3.694192e+13 137 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm.attila b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm.attila new file mode 100644 index 0000000..c4772b2 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm.attila @@ -0,0 +1,145 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +4 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +f0ac7beb 4915200 0.000000e+00 2.587052e+04 1.487038e+03 5.386241e+07 1.398052e+12 2082 +24c84a50 11059200 0.000000e+00 8.218890e+04 3.347888e+03 1.244340e+08 1.024406e+13 1514 +d46431bb 1228800 0.000000e+00 3.265838e+03 1.561177e+02 8.347482e+06 2.732382e+10 2556 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +f0ac7beb 4915200 0.000000e+00 9.047163e+02 4.943457e+01 7.022408e+06 6.372255e+09 7762 +24c84a50 11059200 0.000000e+00 2.963966e+03 7.453353e+01 1.530888e+07 4.540369e+10 5165 +d46431bb 1228800 0.000000e+00 1.924610e+02 1.043827e+01 8.556817e+05 1.651698e+08 4446 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +f0ac7beb 4915200 0.000000e+00 8.810829e+02 4.167975e+01 6.874209e+06 6.070301e+09 7802 +24c84a50 11059200 0.000000e+00 2.960803e+03 8.260112e+01 1.519780e+07 4.503271e+10 5133 +d46431bb 1228800 0.000000e+00 1.894698e+02 9.561378e+00 8.340462e+05 1.584290e+08 4402 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +f0ac7beb 4915200 0.000000e+00 8.953024e+02 5.096374e+01 6.835634e+06 6.139790e+09 7635 +24c84a50 11059200 0.000000e+00 2.963787e+03 5.048433e+01 1.524275e+07 4.518938e+10 5143 +d46431bb 1228800 0.000000e+00 1.803248e+02 8.617192e+00 8.859357e+05 1.601210e+08 4913 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm.hannibal b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm.hannibal new file mode 100644 index 0000000..ddae6ee --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm.hannibal @@ -0,0 +1,104 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 11059200 0.000000e+00 5.116253e+03 1.361494e+03 9.170526e+08 5.024130e+12 179243 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 11059200 0.000000e+00 5.228920e+03 1.967478e+03 8.761527e+08 5.229949e+12 167559 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb3) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 11059200 0.000000e+00 5.131691e+03 1.494139e+03 8.920059e+08 4.965550e+12 173823 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm.hannibal-pitch b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm.hannibal-pitch new file mode 100644 index 0000000..ddae6ee --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm.hannibal-pitch @@ -0,0 +1,104 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 11059200 0.000000e+00 5.116253e+03 1.361494e+03 9.170526e+08 5.024130e+12 179243 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 11059200 0.000000e+00 5.228920e+03 1.967478e+03 8.761527e+08 5.229949e+12 167559 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb3) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 11059200 0.000000e+00 5.131691e+03 1.494139e+03 8.920059e+08 4.965550e+12 173823 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm.idgraf b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm.idgraf new file mode 100644 index 0000000..47cf9d6 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm.idgraf @@ -0,0 +1,314 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +9 +#################### +# COMB_8 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb8) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d46431bb 1228800 0.000000e+00 3.393927e+03 8.566524e+01 3.533078e+06 1.199865e+10 1041 +f0ac7beb 4915200 0.000000e+00 2.682238e+04 4.332821e+02 9.951104e+06 2.669820e+11 371 +24c84a50 11059200 0.000000e+00 8.930213e+04 1.450773e+03 2.679064e+07 2.393092e+12 300 + +#################### +# COMB_5 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb5) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d46431bb 1228800 0.000000e+00 1.946363e+02 2.537099e+01 6.294539e+05 1.245963e+08 3234 +f0ac7beb 4915200 0.000000e+00 9.257288e+02 6.590058e+01 3.791785e+06 3.527953e+09 4096 +24c84a50 11059200 0.000000e+00 2.991139e+03 1.645886e+02 1.221282e+07 3.664085e+10 4083 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +4 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda4_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d46431bb 1228800 0.000000e+00 1.954243e+02 2.831383e+01 5.661443e+05 1.129608e+08 2897 +f0ac7beb 4915200 0.000000e+00 9.376794e+02 7.341921e+01 3.415966e+06 3.222718e+09 3643 +24c84a50 11059200 0.000000e+00 2.995872e+03 1.614697e+02 1.133938e+07 3.407000e+10 3785 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +6 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda6_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d46431bb 1228800 0.000000e+00 1.867261e+02 2.556099e+01 5.342234e+05 1.016227e+08 2861 +f0ac7beb 4915200 0.000000e+00 8.996740e+02 6.639270e+01 3.427758e+06 3.100659e+09 3810 +24c84a50 11059200 0.000000e+00 2.987519e+03 1.530428e+02 1.113747e+07 3.336072e+10 3728 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d46431bb 1228800 0.000000e+00 1.927028e+02 2.478568e+01 6.783137e+05 1.328754e+08 3520 +f0ac7beb 4915200 0.000000e+00 9.234475e+02 6.432680e+01 3.846159e+06 3.568960e+09 4165 +24c84a50 11059200 0.000000e+00 2.982449e+03 1.542480e+02 1.210278e+07 3.619247e+10 4058 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +5 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda5_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d46431bb 1228800 0.000000e+00 1.868734e+02 2.558187e+01 5.049318e+05 9.612659e+07 2702 +f0ac7beb 4915200 0.000000e+00 9.407115e+02 6.874274e+01 3.317889e+06 3.137844e+09 3527 +24c84a50 11059200 0.000000e+00 2.972987e+03 1.569773e+02 1.177600e+07 3.510750e+10 3961 + +#################### +# COMB_6 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb6) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d46431bb 1228800 0.000000e+00 1.924732e+02 2.459364e+01 6.245755e+05 1.221768e+08 3245 +f0ac7beb 4915200 0.000000e+00 9.173887e+02 7.039530e+01 3.781476e+06 3.489510e+09 4122 +24c84a50 11059200 0.000000e+00 3.001859e+03 1.612679e+02 1.156916e+07 3.482922e+10 3854 + +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +7 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda7_impl0 (Comb4) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d46431bb 1228800 0.000000e+00 1.877972e+02 2.764994e+01 5.324050e+05 1.021516e+08 2835 +f0ac7beb 4915200 0.000000e+00 9.245688e+02 6.946750e+01 3.363581e+06 3.127419e+09 3638 +24c84a50 11059200 0.000000e+00 3.005524e+03 1.690713e+02 1.154422e+07 3.480621e+10 3841 + +#################### +# COMB_7 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb7) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d46431bb 1228800 0.000000e+00 1.865351e+02 2.381101e+01 6.651841e+05 1.261020e+08 3566 +f0ac7beb 4915200 0.000000e+00 9.257403e+02 6.896157e+01 3.669635e+06 3.415980e+09 3964 +24c84a50 11059200 0.000000e+00 3.007743e+03 1.477912e+02 1.238889e+07 3.735258e+10 4119 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm.mirage b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm.mirage new file mode 100644 index 0000000..f145897 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm.mirage @@ -0,0 +1,144 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +4 +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +f0ac7beb 4915200 0.000000e+00 2.645658e+04 4.968429e+02 5.820449e+07 1.540435e+12 2200 +24c84a50 11059200 0.000000e+00 8.756135e+04 9.752924e+02 1.866808e+08 1.634805e+13 2132 +d46431bb 1228800 0.000000e+00 3.234444e+03 8.877025e+01 1.325799e+07 4.291452e+10 4099 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +f0ac7beb 4915200 0.000000e+00 8.760921e+02 3.574580e+01 7.074444e+06 6.208182e+09 8075 +24c84a50 11059200 0.000000e+00 2.988744e+03 8.136061e+01 2.363499e+07 7.069126e+10 7908 +d46431bb 1228800 0.000000e+00 1.911930e+02 1.434147e+01 1.248108e+06 2.399722e+08 6528 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +f0ac7beb 4915200 0.000000e+00 9.198175e+02 4.677043e+01 6.931745e+06 6.392425e+09 7536 +24c84a50 11059200 0.000000e+00 3.016176e+03 6.737054e+01 2.311597e+07 6.975663e+10 7664 +d46431bb 1228800 0.000000e+00 1.910500e+02 1.400155e+01 1.317099e+06 2.529832e+08 6894 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +f0ac7beb 4915200 0.000000e+00 9.143628e+02 4.685332e+01 6.720566e+06 6.161171e+09 7350 +24c84a50 11059200 0.000000e+00 3.002393e+03 6.861698e+01 2.339765e+07 7.028562e+10 7793 +d46431bb 1228800 0.000000e+00 1.898967e+02 1.421585e+01 1.327568e+06 2.535136e+08 6991 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm.sirocco b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm.sirocco new file mode 100644 index 0000000..a326d3c --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm.sirocco @@ -0,0 +1,183 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +5 +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb4) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +8cfc3ba0 24883200 0.000000e+00 1.164877e+05 2.576301e+04 2.054842e+08 2.510721e+13 1764 +f0ac7beb 4915200 0.000000e+00 1.087142e+04 2.109400e+03 2.505863e+07 2.826792e+11 2305 +d46431bb 1228800 0.000000e+00 1.613402e+03 3.115535e+02 8.438094e+06 1.412169e+10 5230 +24c84a50 11059200 0.000000e+00 3.517390e+04 7.045528e+03 6.925741e+07 2.533794e+12 1969 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +8cfc3ba0 24883200 0.000000e+00 2.688252e+03 2.597845e+02 1.459721e+07 3.960743e+10 5430 +f0ac7beb 4915200 0.000000e+00 2.657700e+02 2.996380e+01 1.356225e+06 3.650255e+08 5103 +d46431bb 1228800 0.000000e+00 6.142508e+01 1.012391e+01 4.393736e+05 2.772170e+07 7153 +24c84a50 11059200 0.000000e+00 7.851775e+02 4.684799e+01 4.315336e+06 3.400367e+09 5496 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb0) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +8cfc3ba0 24883200 0.000000e+00 2.707789e+03 2.773178e+02 1.421860e+07 3.890480e+10 5251 +f0ac7beb 4915200 0.000000e+00 2.693001e+02 2.710216e+01 1.308798e+06 3.560293e+08 4860 +d46431bb 1228800 0.000000e+00 6.592485e+01 1.426453e+01 1.071279e+05 7.393038e+06 1625 +24c84a50 11059200 0.000000e+00 7.926860e+02 4.760061e+01 4.363736e+06 3.471546e+09 5505 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +8cfc3ba0 24883200 0.000000e+00 2.706383e+03 2.631153e+02 1.444938e+07 3.947516e+10 5339 +f0ac7beb 4915200 0.000000e+00 2.686331e+02 2.912062e+01 1.401996e+06 3.810483e+08 5219 +d46431bb 1228800 0.000000e+00 6.317490e+01 1.087216e+01 2.866877e+05 1.864788e+07 4538 +24c84a50 11059200 0.000000e+00 7.922324e+02 5.091772e+01 4.156844e+06 3.306790e+09 5247 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb3) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +8cfc3ba0 24883200 0.000000e+00 2.681149e+03 2.665822e+02 1.451306e+07 3.929636e+10 5413 +f0ac7beb 4915200 0.000000e+00 2.642224e+02 2.666799e+01 1.450317e+06 3.871098e+08 5489 +d46431bb 1228800 0.000000e+00 5.975719e+01 9.345113e+00 4.033610e+05 2.469321e+07 6750 +24c84a50 11059200 0.000000e+00 7.867204e+02 4.699968e+01 4.148377e+06 3.275261e+09 5273 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_atlas.attila b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_atlas.attila new file mode 100644 index 0000000..c4772b2 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_atlas.attila @@ -0,0 +1,145 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +4 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +f0ac7beb 4915200 0.000000e+00 2.587052e+04 1.487038e+03 5.386241e+07 1.398052e+12 2082 +24c84a50 11059200 0.000000e+00 8.218890e+04 3.347888e+03 1.244340e+08 1.024406e+13 1514 +d46431bb 1228800 0.000000e+00 3.265838e+03 1.561177e+02 8.347482e+06 2.732382e+10 2556 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +f0ac7beb 4915200 0.000000e+00 9.047163e+02 4.943457e+01 7.022408e+06 6.372255e+09 7762 +24c84a50 11059200 0.000000e+00 2.963966e+03 7.453353e+01 1.530888e+07 4.540369e+10 5165 +d46431bb 1228800 0.000000e+00 1.924610e+02 1.043827e+01 8.556817e+05 1.651698e+08 4446 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +f0ac7beb 4915200 0.000000e+00 8.810829e+02 4.167975e+01 6.874209e+06 6.070301e+09 7802 +24c84a50 11059200 0.000000e+00 2.960803e+03 8.260112e+01 1.519780e+07 4.503271e+10 5133 +d46431bb 1228800 0.000000e+00 1.894698e+02 9.561378e+00 8.340462e+05 1.584290e+08 4402 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +f0ac7beb 4915200 0.000000e+00 8.953024e+02 5.096374e+01 6.835634e+06 6.139790e+09 7635 +24c84a50 11059200 0.000000e+00 2.963787e+03 5.048433e+01 1.524275e+07 4.518938e+10 5143 +d46431bb 1228800 0.000000e+00 1.803248e+02 8.617192e+00 8.859357e+05 1.601210e+08 4913 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_atlas.hannibal b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_atlas.hannibal new file mode 100644 index 0000000..ddae6ee --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_atlas.hannibal @@ -0,0 +1,104 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 11059200 0.000000e+00 5.116253e+03 1.361494e+03 9.170526e+08 5.024130e+12 179243 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 11059200 0.000000e+00 5.228920e+03 1.967478e+03 8.761527e+08 5.229949e+12 167559 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb3) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 11059200 0.000000e+00 5.131691e+03 1.494139e+03 8.920059e+08 4.965550e+12 173823 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_atlas.hannibal-pitch b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_atlas.hannibal-pitch new file mode 100644 index 0000000..ddae6ee --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_atlas.hannibal-pitch @@ -0,0 +1,104 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 11059200 0.000000e+00 5.116253e+03 1.361494e+03 9.170526e+08 5.024130e+12 179243 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 11059200 0.000000e+00 5.228920e+03 1.967478e+03 8.761527e+08 5.229949e+12 167559 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb3) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 11059200 0.000000e+00 5.131691e+03 1.494139e+03 8.920059e+08 4.965550e+12 173823 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_atlas.idgraf b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_atlas.idgraf new file mode 100644 index 0000000..47cf9d6 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_atlas.idgraf @@ -0,0 +1,314 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +9 +#################### +# COMB_8 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb8) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d46431bb 1228800 0.000000e+00 3.393927e+03 8.566524e+01 3.533078e+06 1.199865e+10 1041 +f0ac7beb 4915200 0.000000e+00 2.682238e+04 4.332821e+02 9.951104e+06 2.669820e+11 371 +24c84a50 11059200 0.000000e+00 8.930213e+04 1.450773e+03 2.679064e+07 2.393092e+12 300 + +#################### +# COMB_5 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb5) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d46431bb 1228800 0.000000e+00 1.946363e+02 2.537099e+01 6.294539e+05 1.245963e+08 3234 +f0ac7beb 4915200 0.000000e+00 9.257288e+02 6.590058e+01 3.791785e+06 3.527953e+09 4096 +24c84a50 11059200 0.000000e+00 2.991139e+03 1.645886e+02 1.221282e+07 3.664085e+10 4083 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +4 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda4_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d46431bb 1228800 0.000000e+00 1.954243e+02 2.831383e+01 5.661443e+05 1.129608e+08 2897 +f0ac7beb 4915200 0.000000e+00 9.376794e+02 7.341921e+01 3.415966e+06 3.222718e+09 3643 +24c84a50 11059200 0.000000e+00 2.995872e+03 1.614697e+02 1.133938e+07 3.407000e+10 3785 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +6 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda6_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d46431bb 1228800 0.000000e+00 1.867261e+02 2.556099e+01 5.342234e+05 1.016227e+08 2861 +f0ac7beb 4915200 0.000000e+00 8.996740e+02 6.639270e+01 3.427758e+06 3.100659e+09 3810 +24c84a50 11059200 0.000000e+00 2.987519e+03 1.530428e+02 1.113747e+07 3.336072e+10 3728 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d46431bb 1228800 0.000000e+00 1.927028e+02 2.478568e+01 6.783137e+05 1.328754e+08 3520 +f0ac7beb 4915200 0.000000e+00 9.234475e+02 6.432680e+01 3.846159e+06 3.568960e+09 4165 +24c84a50 11059200 0.000000e+00 2.982449e+03 1.542480e+02 1.210278e+07 3.619247e+10 4058 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +5 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda5_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d46431bb 1228800 0.000000e+00 1.868734e+02 2.558187e+01 5.049318e+05 9.612659e+07 2702 +f0ac7beb 4915200 0.000000e+00 9.407115e+02 6.874274e+01 3.317889e+06 3.137844e+09 3527 +24c84a50 11059200 0.000000e+00 2.972987e+03 1.569773e+02 1.177600e+07 3.510750e+10 3961 + +#################### +# COMB_6 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb6) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d46431bb 1228800 0.000000e+00 1.924732e+02 2.459364e+01 6.245755e+05 1.221768e+08 3245 +f0ac7beb 4915200 0.000000e+00 9.173887e+02 7.039530e+01 3.781476e+06 3.489510e+09 4122 +24c84a50 11059200 0.000000e+00 3.001859e+03 1.612679e+02 1.156916e+07 3.482922e+10 3854 + +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +7 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda7_impl0 (Comb4) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d46431bb 1228800 0.000000e+00 1.877972e+02 2.764994e+01 5.324050e+05 1.021516e+08 2835 +f0ac7beb 4915200 0.000000e+00 9.245688e+02 6.946750e+01 3.363581e+06 3.127419e+09 3638 +24c84a50 11059200 0.000000e+00 3.005524e+03 1.690713e+02 1.154422e+07 3.480621e+10 3841 + +#################### +# COMB_7 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb7) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d46431bb 1228800 0.000000e+00 1.865351e+02 2.381101e+01 6.651841e+05 1.261020e+08 3566 +f0ac7beb 4915200 0.000000e+00 9.257403e+02 6.896157e+01 3.669635e+06 3.415980e+09 3964 +24c84a50 11059200 0.000000e+00 3.007743e+03 1.477912e+02 1.238889e+07 3.735258e+10 4119 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_atlas.mirage b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_atlas.mirage new file mode 100644 index 0000000..f145897 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_atlas.mirage @@ -0,0 +1,144 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +4 +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +f0ac7beb 4915200 0.000000e+00 2.645658e+04 4.968429e+02 5.820449e+07 1.540435e+12 2200 +24c84a50 11059200 0.000000e+00 8.756135e+04 9.752924e+02 1.866808e+08 1.634805e+13 2132 +d46431bb 1228800 0.000000e+00 3.234444e+03 8.877025e+01 1.325799e+07 4.291452e+10 4099 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +f0ac7beb 4915200 0.000000e+00 8.760921e+02 3.574580e+01 7.074444e+06 6.208182e+09 8075 +24c84a50 11059200 0.000000e+00 2.988744e+03 8.136061e+01 2.363499e+07 7.069126e+10 7908 +d46431bb 1228800 0.000000e+00 1.911930e+02 1.434147e+01 1.248108e+06 2.399722e+08 6528 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +f0ac7beb 4915200 0.000000e+00 9.198175e+02 4.677043e+01 6.931745e+06 6.392425e+09 7536 +24c84a50 11059200 0.000000e+00 3.016176e+03 6.737054e+01 2.311597e+07 6.975663e+10 7664 +d46431bb 1228800 0.000000e+00 1.910500e+02 1.400155e+01 1.317099e+06 2.529832e+08 6894 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +f0ac7beb 4915200 0.000000e+00 9.143628e+02 4.685332e+01 6.720566e+06 6.161171e+09 7350 +24c84a50 11059200 0.000000e+00 3.002393e+03 6.861698e+01 2.339765e+07 7.028562e+10 7793 +d46431bb 1228800 0.000000e+00 1.898967e+02 1.421585e+01 1.327568e+06 2.535136e+08 6991 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_atlas.sirocco b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_atlas.sirocco new file mode 100644 index 0000000..a326d3c --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_atlas.sirocco @@ -0,0 +1,183 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +5 +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb4) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +8cfc3ba0 24883200 0.000000e+00 1.164877e+05 2.576301e+04 2.054842e+08 2.510721e+13 1764 +f0ac7beb 4915200 0.000000e+00 1.087142e+04 2.109400e+03 2.505863e+07 2.826792e+11 2305 +d46431bb 1228800 0.000000e+00 1.613402e+03 3.115535e+02 8.438094e+06 1.412169e+10 5230 +24c84a50 11059200 0.000000e+00 3.517390e+04 7.045528e+03 6.925741e+07 2.533794e+12 1969 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +8cfc3ba0 24883200 0.000000e+00 2.688252e+03 2.597845e+02 1.459721e+07 3.960743e+10 5430 +f0ac7beb 4915200 0.000000e+00 2.657700e+02 2.996380e+01 1.356225e+06 3.650255e+08 5103 +d46431bb 1228800 0.000000e+00 6.142508e+01 1.012391e+01 4.393736e+05 2.772170e+07 7153 +24c84a50 11059200 0.000000e+00 7.851775e+02 4.684799e+01 4.315336e+06 3.400367e+09 5496 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb0) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +8cfc3ba0 24883200 0.000000e+00 2.707789e+03 2.773178e+02 1.421860e+07 3.890480e+10 5251 +f0ac7beb 4915200 0.000000e+00 2.693001e+02 2.710216e+01 1.308798e+06 3.560293e+08 4860 +d46431bb 1228800 0.000000e+00 6.592485e+01 1.426453e+01 1.071279e+05 7.393038e+06 1625 +24c84a50 11059200 0.000000e+00 7.926860e+02 4.760061e+01 4.363736e+06 3.471546e+09 5505 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +8cfc3ba0 24883200 0.000000e+00 2.706383e+03 2.631153e+02 1.444938e+07 3.947516e+10 5339 +f0ac7beb 4915200 0.000000e+00 2.686331e+02 2.912062e+01 1.401996e+06 3.810483e+08 5219 +d46431bb 1228800 0.000000e+00 6.317490e+01 1.087216e+01 2.866877e+05 1.864788e+07 4538 +24c84a50 11059200 0.000000e+00 7.922324e+02 5.091772e+01 4.156844e+06 3.306790e+09 5247 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb3) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +8cfc3ba0 24883200 0.000000e+00 2.681149e+03 2.665822e+02 1.451306e+07 3.929636e+10 5413 +f0ac7beb 4915200 0.000000e+00 2.642224e+02 2.666799e+01 1.450317e+06 3.871098e+08 5489 +d46431bb 1228800 0.000000e+00 5.975719e+01 9.345113e+00 4.033610e+05 2.469321e+07 6750 +24c84a50 11059200 0.000000e+00 7.867204e+02 4.699968e+01 4.148377e+06 3.275261e+09 5273 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_goto.attila b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_goto.attila new file mode 100644 index 0000000..c4772b2 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_goto.attila @@ -0,0 +1,145 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +4 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +f0ac7beb 4915200 0.000000e+00 2.587052e+04 1.487038e+03 5.386241e+07 1.398052e+12 2082 +24c84a50 11059200 0.000000e+00 8.218890e+04 3.347888e+03 1.244340e+08 1.024406e+13 1514 +d46431bb 1228800 0.000000e+00 3.265838e+03 1.561177e+02 8.347482e+06 2.732382e+10 2556 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +f0ac7beb 4915200 0.000000e+00 9.047163e+02 4.943457e+01 7.022408e+06 6.372255e+09 7762 +24c84a50 11059200 0.000000e+00 2.963966e+03 7.453353e+01 1.530888e+07 4.540369e+10 5165 +d46431bb 1228800 0.000000e+00 1.924610e+02 1.043827e+01 8.556817e+05 1.651698e+08 4446 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +f0ac7beb 4915200 0.000000e+00 8.810829e+02 4.167975e+01 6.874209e+06 6.070301e+09 7802 +24c84a50 11059200 0.000000e+00 2.960803e+03 8.260112e+01 1.519780e+07 4.503271e+10 5133 +d46431bb 1228800 0.000000e+00 1.894698e+02 9.561378e+00 8.340462e+05 1.584290e+08 4402 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +f0ac7beb 4915200 0.000000e+00 8.953024e+02 5.096374e+01 6.835634e+06 6.139790e+09 7635 +24c84a50 11059200 0.000000e+00 2.963787e+03 5.048433e+01 1.524275e+07 4.518938e+10 5143 +d46431bb 1228800 0.000000e+00 1.803248e+02 8.617192e+00 8.859357e+05 1.601210e+08 4913 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_goto.hannibal b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_goto.hannibal new file mode 100644 index 0000000..ddae6ee --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_goto.hannibal @@ -0,0 +1,104 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 11059200 0.000000e+00 5.116253e+03 1.361494e+03 9.170526e+08 5.024130e+12 179243 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 11059200 0.000000e+00 5.228920e+03 1.967478e+03 8.761527e+08 5.229949e+12 167559 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb3) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 11059200 0.000000e+00 5.131691e+03 1.494139e+03 8.920059e+08 4.965550e+12 173823 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_goto.hannibal-pitch b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_goto.hannibal-pitch new file mode 100644 index 0000000..ddae6ee --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_goto.hannibal-pitch @@ -0,0 +1,104 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 11059200 0.000000e+00 5.116253e+03 1.361494e+03 9.170526e+08 5.024130e+12 179243 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 11059200 0.000000e+00 5.228920e+03 1.967478e+03 8.761527e+08 5.229949e+12 167559 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb3) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 11059200 0.000000e+00 5.131691e+03 1.494139e+03 8.920059e+08 4.965550e+12 173823 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_goto.idgraf b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_goto.idgraf new file mode 100644 index 0000000..47cf9d6 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_goto.idgraf @@ -0,0 +1,314 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +9 +#################### +# COMB_8 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb8) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d46431bb 1228800 0.000000e+00 3.393927e+03 8.566524e+01 3.533078e+06 1.199865e+10 1041 +f0ac7beb 4915200 0.000000e+00 2.682238e+04 4.332821e+02 9.951104e+06 2.669820e+11 371 +24c84a50 11059200 0.000000e+00 8.930213e+04 1.450773e+03 2.679064e+07 2.393092e+12 300 + +#################### +# COMB_5 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb5) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d46431bb 1228800 0.000000e+00 1.946363e+02 2.537099e+01 6.294539e+05 1.245963e+08 3234 +f0ac7beb 4915200 0.000000e+00 9.257288e+02 6.590058e+01 3.791785e+06 3.527953e+09 4096 +24c84a50 11059200 0.000000e+00 2.991139e+03 1.645886e+02 1.221282e+07 3.664085e+10 4083 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +4 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda4_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d46431bb 1228800 0.000000e+00 1.954243e+02 2.831383e+01 5.661443e+05 1.129608e+08 2897 +f0ac7beb 4915200 0.000000e+00 9.376794e+02 7.341921e+01 3.415966e+06 3.222718e+09 3643 +24c84a50 11059200 0.000000e+00 2.995872e+03 1.614697e+02 1.133938e+07 3.407000e+10 3785 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +6 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda6_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d46431bb 1228800 0.000000e+00 1.867261e+02 2.556099e+01 5.342234e+05 1.016227e+08 2861 +f0ac7beb 4915200 0.000000e+00 8.996740e+02 6.639270e+01 3.427758e+06 3.100659e+09 3810 +24c84a50 11059200 0.000000e+00 2.987519e+03 1.530428e+02 1.113747e+07 3.336072e+10 3728 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d46431bb 1228800 0.000000e+00 1.927028e+02 2.478568e+01 6.783137e+05 1.328754e+08 3520 +f0ac7beb 4915200 0.000000e+00 9.234475e+02 6.432680e+01 3.846159e+06 3.568960e+09 4165 +24c84a50 11059200 0.000000e+00 2.982449e+03 1.542480e+02 1.210278e+07 3.619247e+10 4058 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +5 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda5_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d46431bb 1228800 0.000000e+00 1.868734e+02 2.558187e+01 5.049318e+05 9.612659e+07 2702 +f0ac7beb 4915200 0.000000e+00 9.407115e+02 6.874274e+01 3.317889e+06 3.137844e+09 3527 +24c84a50 11059200 0.000000e+00 2.972987e+03 1.569773e+02 1.177600e+07 3.510750e+10 3961 + +#################### +# COMB_6 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb6) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d46431bb 1228800 0.000000e+00 1.924732e+02 2.459364e+01 6.245755e+05 1.221768e+08 3245 +f0ac7beb 4915200 0.000000e+00 9.173887e+02 7.039530e+01 3.781476e+06 3.489510e+09 4122 +24c84a50 11059200 0.000000e+00 3.001859e+03 1.612679e+02 1.156916e+07 3.482922e+10 3854 + +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +7 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda7_impl0 (Comb4) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d46431bb 1228800 0.000000e+00 1.877972e+02 2.764994e+01 5.324050e+05 1.021516e+08 2835 +f0ac7beb 4915200 0.000000e+00 9.245688e+02 6.946750e+01 3.363581e+06 3.127419e+09 3638 +24c84a50 11059200 0.000000e+00 3.005524e+03 1.690713e+02 1.154422e+07 3.480621e+10 3841 + +#################### +# COMB_7 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb7) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d46431bb 1228800 0.000000e+00 1.865351e+02 2.381101e+01 6.651841e+05 1.261020e+08 3566 +f0ac7beb 4915200 0.000000e+00 9.257403e+02 6.896157e+01 3.669635e+06 3.415980e+09 3964 +24c84a50 11059200 0.000000e+00 3.007743e+03 1.477912e+02 1.238889e+07 3.735258e+10 4119 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_goto.mirage b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_goto.mirage new file mode 100644 index 0000000..f145897 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_goto.mirage @@ -0,0 +1,144 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +4 +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +f0ac7beb 4915200 0.000000e+00 2.645658e+04 4.968429e+02 5.820449e+07 1.540435e+12 2200 +24c84a50 11059200 0.000000e+00 8.756135e+04 9.752924e+02 1.866808e+08 1.634805e+13 2132 +d46431bb 1228800 0.000000e+00 3.234444e+03 8.877025e+01 1.325799e+07 4.291452e+10 4099 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +f0ac7beb 4915200 0.000000e+00 8.760921e+02 3.574580e+01 7.074444e+06 6.208182e+09 8075 +24c84a50 11059200 0.000000e+00 2.988744e+03 8.136061e+01 2.363499e+07 7.069126e+10 7908 +d46431bb 1228800 0.000000e+00 1.911930e+02 1.434147e+01 1.248108e+06 2.399722e+08 6528 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +f0ac7beb 4915200 0.000000e+00 9.198175e+02 4.677043e+01 6.931745e+06 6.392425e+09 7536 +24c84a50 11059200 0.000000e+00 3.016176e+03 6.737054e+01 2.311597e+07 6.975663e+10 7664 +d46431bb 1228800 0.000000e+00 1.910500e+02 1.400155e+01 1.317099e+06 2.529832e+08 6894 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +f0ac7beb 4915200 0.000000e+00 9.143628e+02 4.685332e+01 6.720566e+06 6.161171e+09 7350 +24c84a50 11059200 0.000000e+00 3.002393e+03 6.861698e+01 2.339765e+07 7.028562e+10 7793 +d46431bb 1228800 0.000000e+00 1.898967e+02 1.421585e+01 1.327568e+06 2.535136e+08 6991 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_goto.sirocco b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_goto.sirocco new file mode 100644 index 0000000..a326d3c --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_goto.sirocco @@ -0,0 +1,183 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +5 +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb4) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +8cfc3ba0 24883200 0.000000e+00 1.164877e+05 2.576301e+04 2.054842e+08 2.510721e+13 1764 +f0ac7beb 4915200 0.000000e+00 1.087142e+04 2.109400e+03 2.505863e+07 2.826792e+11 2305 +d46431bb 1228800 0.000000e+00 1.613402e+03 3.115535e+02 8.438094e+06 1.412169e+10 5230 +24c84a50 11059200 0.000000e+00 3.517390e+04 7.045528e+03 6.925741e+07 2.533794e+12 1969 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +8cfc3ba0 24883200 0.000000e+00 2.688252e+03 2.597845e+02 1.459721e+07 3.960743e+10 5430 +f0ac7beb 4915200 0.000000e+00 2.657700e+02 2.996380e+01 1.356225e+06 3.650255e+08 5103 +d46431bb 1228800 0.000000e+00 6.142508e+01 1.012391e+01 4.393736e+05 2.772170e+07 7153 +24c84a50 11059200 0.000000e+00 7.851775e+02 4.684799e+01 4.315336e+06 3.400367e+09 5496 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb0) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +8cfc3ba0 24883200 0.000000e+00 2.707789e+03 2.773178e+02 1.421860e+07 3.890480e+10 5251 +f0ac7beb 4915200 0.000000e+00 2.693001e+02 2.710216e+01 1.308798e+06 3.560293e+08 4860 +d46431bb 1228800 0.000000e+00 6.592485e+01 1.426453e+01 1.071279e+05 7.393038e+06 1625 +24c84a50 11059200 0.000000e+00 7.926860e+02 4.760061e+01 4.363736e+06 3.471546e+09 5505 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +8cfc3ba0 24883200 0.000000e+00 2.706383e+03 2.631153e+02 1.444938e+07 3.947516e+10 5339 +f0ac7beb 4915200 0.000000e+00 2.686331e+02 2.912062e+01 1.401996e+06 3.810483e+08 5219 +d46431bb 1228800 0.000000e+00 6.317490e+01 1.087216e+01 2.866877e+05 1.864788e+07 4538 +24c84a50 11059200 0.000000e+00 7.922324e+02 5.091772e+01 4.156844e+06 3.306790e+09 5247 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb3) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +8cfc3ba0 24883200 0.000000e+00 2.681149e+03 2.665822e+02 1.451306e+07 3.929636e+10 5413 +f0ac7beb 4915200 0.000000e+00 2.642224e+02 2.666799e+01 1.450317e+06 3.871098e+08 5489 +d46431bb 1228800 0.000000e+00 5.975719e+01 9.345113e+00 4.033610e+05 2.469321e+07 6750 +24c84a50 11059200 0.000000e+00 7.867204e+02 4.699968e+01 4.148377e+06 3.275261e+09 5273 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_openblas.attila b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_openblas.attila new file mode 100644 index 0000000..c4772b2 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_openblas.attila @@ -0,0 +1,145 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +4 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +f0ac7beb 4915200 0.000000e+00 2.587052e+04 1.487038e+03 5.386241e+07 1.398052e+12 2082 +24c84a50 11059200 0.000000e+00 8.218890e+04 3.347888e+03 1.244340e+08 1.024406e+13 1514 +d46431bb 1228800 0.000000e+00 3.265838e+03 1.561177e+02 8.347482e+06 2.732382e+10 2556 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +f0ac7beb 4915200 0.000000e+00 9.047163e+02 4.943457e+01 7.022408e+06 6.372255e+09 7762 +24c84a50 11059200 0.000000e+00 2.963966e+03 7.453353e+01 1.530888e+07 4.540369e+10 5165 +d46431bb 1228800 0.000000e+00 1.924610e+02 1.043827e+01 8.556817e+05 1.651698e+08 4446 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +f0ac7beb 4915200 0.000000e+00 8.810829e+02 4.167975e+01 6.874209e+06 6.070301e+09 7802 +24c84a50 11059200 0.000000e+00 2.960803e+03 8.260112e+01 1.519780e+07 4.503271e+10 5133 +d46431bb 1228800 0.000000e+00 1.894698e+02 9.561378e+00 8.340462e+05 1.584290e+08 4402 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +f0ac7beb 4915200 0.000000e+00 8.953024e+02 5.096374e+01 6.835634e+06 6.139790e+09 7635 +24c84a50 11059200 0.000000e+00 2.963787e+03 5.048433e+01 1.524275e+07 4.518938e+10 5143 +d46431bb 1228800 0.000000e+00 1.803248e+02 8.617192e+00 8.859357e+05 1.601210e+08 4913 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_openblas.hannibal b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_openblas.hannibal new file mode 100644 index 0000000..ddae6ee --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_openblas.hannibal @@ -0,0 +1,104 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 11059200 0.000000e+00 5.116253e+03 1.361494e+03 9.170526e+08 5.024130e+12 179243 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 11059200 0.000000e+00 5.228920e+03 1.967478e+03 8.761527e+08 5.229949e+12 167559 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb3) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 11059200 0.000000e+00 5.131691e+03 1.494139e+03 8.920059e+08 4.965550e+12 173823 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_openblas.hannibal-pitch b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_openblas.hannibal-pitch new file mode 100644 index 0000000..ddae6ee --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_openblas.hannibal-pitch @@ -0,0 +1,104 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 11059200 0.000000e+00 5.116253e+03 1.361494e+03 9.170526e+08 5.024130e+12 179243 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 11059200 0.000000e+00 5.228920e+03 1.967478e+03 8.761527e+08 5.229949e+12 167559 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb3) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +24c84a50 11059200 0.000000e+00 5.131691e+03 1.494139e+03 8.920059e+08 4.965550e+12 173823 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_openblas.idgraf b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_openblas.idgraf new file mode 100644 index 0000000..47cf9d6 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_openblas.idgraf @@ -0,0 +1,314 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +9 +#################### +# COMB_8 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb8) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d46431bb 1228800 0.000000e+00 3.393927e+03 8.566524e+01 3.533078e+06 1.199865e+10 1041 +f0ac7beb 4915200 0.000000e+00 2.682238e+04 4.332821e+02 9.951104e+06 2.669820e+11 371 +24c84a50 11059200 0.000000e+00 8.930213e+04 1.450773e+03 2.679064e+07 2.393092e+12 300 + +#################### +# COMB_5 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb5) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d46431bb 1228800 0.000000e+00 1.946363e+02 2.537099e+01 6.294539e+05 1.245963e+08 3234 +f0ac7beb 4915200 0.000000e+00 9.257288e+02 6.590058e+01 3.791785e+06 3.527953e+09 4096 +24c84a50 11059200 0.000000e+00 2.991139e+03 1.645886e+02 1.221282e+07 3.664085e+10 4083 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +4 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda4_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d46431bb 1228800 0.000000e+00 1.954243e+02 2.831383e+01 5.661443e+05 1.129608e+08 2897 +f0ac7beb 4915200 0.000000e+00 9.376794e+02 7.341921e+01 3.415966e+06 3.222718e+09 3643 +24c84a50 11059200 0.000000e+00 2.995872e+03 1.614697e+02 1.133938e+07 3.407000e+10 3785 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +6 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda6_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d46431bb 1228800 0.000000e+00 1.867261e+02 2.556099e+01 5.342234e+05 1.016227e+08 2861 +f0ac7beb 4915200 0.000000e+00 8.996740e+02 6.639270e+01 3.427758e+06 3.100659e+09 3810 +24c84a50 11059200 0.000000e+00 2.987519e+03 1.530428e+02 1.113747e+07 3.336072e+10 3728 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d46431bb 1228800 0.000000e+00 1.927028e+02 2.478568e+01 6.783137e+05 1.328754e+08 3520 +f0ac7beb 4915200 0.000000e+00 9.234475e+02 6.432680e+01 3.846159e+06 3.568960e+09 4165 +24c84a50 11059200 0.000000e+00 2.982449e+03 1.542480e+02 1.210278e+07 3.619247e+10 4058 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +5 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda5_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d46431bb 1228800 0.000000e+00 1.868734e+02 2.558187e+01 5.049318e+05 9.612659e+07 2702 +f0ac7beb 4915200 0.000000e+00 9.407115e+02 6.874274e+01 3.317889e+06 3.137844e+09 3527 +24c84a50 11059200 0.000000e+00 2.972987e+03 1.569773e+02 1.177600e+07 3.510750e+10 3961 + +#################### +# COMB_6 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb6) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d46431bb 1228800 0.000000e+00 1.924732e+02 2.459364e+01 6.245755e+05 1.221768e+08 3245 +f0ac7beb 4915200 0.000000e+00 9.173887e+02 7.039530e+01 3.781476e+06 3.489510e+09 4122 +24c84a50 11059200 0.000000e+00 3.001859e+03 1.612679e+02 1.156916e+07 3.482922e+10 3854 + +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +7 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda7_impl0 (Comb4) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d46431bb 1228800 0.000000e+00 1.877972e+02 2.764994e+01 5.324050e+05 1.021516e+08 2835 +f0ac7beb 4915200 0.000000e+00 9.245688e+02 6.946750e+01 3.363581e+06 3.127419e+09 3638 +24c84a50 11059200 0.000000e+00 3.005524e+03 1.690713e+02 1.154422e+07 3.480621e+10 3841 + +#################### +# COMB_7 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb7) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d46431bb 1228800 0.000000e+00 1.865351e+02 2.381101e+01 6.651841e+05 1.261020e+08 3566 +f0ac7beb 4915200 0.000000e+00 9.257403e+02 6.896157e+01 3.669635e+06 3.415980e+09 3964 +24c84a50 11059200 0.000000e+00 3.007743e+03 1.477912e+02 1.238889e+07 3.735258e+10 4119 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_openblas.mirage b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_openblas.mirage new file mode 100644 index 0000000..f145897 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_openblas.mirage @@ -0,0 +1,144 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +4 +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +f0ac7beb 4915200 0.000000e+00 2.645658e+04 4.968429e+02 5.820449e+07 1.540435e+12 2200 +24c84a50 11059200 0.000000e+00 8.756135e+04 9.752924e+02 1.866808e+08 1.634805e+13 2132 +d46431bb 1228800 0.000000e+00 3.234444e+03 8.877025e+01 1.325799e+07 4.291452e+10 4099 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +f0ac7beb 4915200 0.000000e+00 8.760921e+02 3.574580e+01 7.074444e+06 6.208182e+09 8075 +24c84a50 11059200 0.000000e+00 2.988744e+03 8.136061e+01 2.363499e+07 7.069126e+10 7908 +d46431bb 1228800 0.000000e+00 1.911930e+02 1.434147e+01 1.248108e+06 2.399722e+08 6528 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +f0ac7beb 4915200 0.000000e+00 9.198175e+02 4.677043e+01 6.931745e+06 6.392425e+09 7536 +24c84a50 11059200 0.000000e+00 3.016176e+03 6.737054e+01 2.311597e+07 6.975663e+10 7664 +d46431bb 1228800 0.000000e+00 1.910500e+02 1.400155e+01 1.317099e+06 2.529832e+08 6894 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +f0ac7beb 4915200 0.000000e+00 9.143628e+02 4.685332e+01 6.720566e+06 6.161171e+09 7350 +24c84a50 11059200 0.000000e+00 3.002393e+03 6.861698e+01 2.339765e+07 7.028562e+10 7793 +d46431bb 1228800 0.000000e+00 1.898967e+02 1.421585e+01 1.327568e+06 2.535136e+08 6991 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_openblas.sirocco b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_openblas.sirocco new file mode 100644 index 0000000..a326d3c --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_openblas.sirocco @@ -0,0 +1,183 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +5 +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb4) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +8cfc3ba0 24883200 0.000000e+00 1.164877e+05 2.576301e+04 2.054842e+08 2.510721e+13 1764 +f0ac7beb 4915200 0.000000e+00 1.087142e+04 2.109400e+03 2.505863e+07 2.826792e+11 2305 +d46431bb 1228800 0.000000e+00 1.613402e+03 3.115535e+02 8.438094e+06 1.412169e+10 5230 +24c84a50 11059200 0.000000e+00 3.517390e+04 7.045528e+03 6.925741e+07 2.533794e+12 1969 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +8cfc3ba0 24883200 0.000000e+00 2.688252e+03 2.597845e+02 1.459721e+07 3.960743e+10 5430 +f0ac7beb 4915200 0.000000e+00 2.657700e+02 2.996380e+01 1.356225e+06 3.650255e+08 5103 +d46431bb 1228800 0.000000e+00 6.142508e+01 1.012391e+01 4.393736e+05 2.772170e+07 7153 +24c84a50 11059200 0.000000e+00 7.851775e+02 4.684799e+01 4.315336e+06 3.400367e+09 5496 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb0) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +8cfc3ba0 24883200 0.000000e+00 2.707789e+03 2.773178e+02 1.421860e+07 3.890480e+10 5251 +f0ac7beb 4915200 0.000000e+00 2.693001e+02 2.710216e+01 1.308798e+06 3.560293e+08 4860 +d46431bb 1228800 0.000000e+00 6.592485e+01 1.426453e+01 1.071279e+05 7.393038e+06 1625 +24c84a50 11059200 0.000000e+00 7.926860e+02 4.760061e+01 4.363736e+06 3.471546e+09 5505 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +8cfc3ba0 24883200 0.000000e+00 2.706383e+03 2.631153e+02 1.444938e+07 3.947516e+10 5339 +f0ac7beb 4915200 0.000000e+00 2.686331e+02 2.912062e+01 1.401996e+06 3.810483e+08 5219 +d46431bb 1228800 0.000000e+00 6.317490e+01 1.087216e+01 2.866877e+05 1.864788e+07 4538 +24c84a50 11059200 0.000000e+00 7.922324e+02 5.091772e+01 4.156844e+06 3.306790e+09 5247 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb3) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +8cfc3ba0 24883200 0.000000e+00 2.681149e+03 2.665822e+02 1.451306e+07 3.929636e+10 5413 +f0ac7beb 4915200 0.000000e+00 2.642224e+02 2.666799e+01 1.450317e+06 3.871098e+08 5489 +d46431bb 1228800 0.000000e+00 5.975719e+01 9.345113e+00 4.033610e+05 2.469321e+07 6750 +24c84a50 11059200 0.000000e+00 7.867204e+02 4.699968e+01 4.148377e+06 3.275261e+09 5273 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf.attila b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf.attila new file mode 100644 index 0000000..3e5a1e9 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf.attila @@ -0,0 +1,144 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +4 +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +afdd228b 1638400 0.000000e+00 4.182946e+04 4.195402e+03 1.171225e+06 4.948453e+10 28 +617e5fe6 3686400 0.000000e+00 1.431791e+05 1.961610e+04 1.431791e+06 2.088506e+11 10 +cea37d6d 409600 0.000000e+00 4.839229e+03 3.061560e+02 1.258200e+05 6.113086e+08 26 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +afdd228b 1638400 0.000000e+00 2.565619e+04 2.729977e+03 4.618114e+05 1.198247e+10 18 +617e5fe6 3686400 0.000000e+00 5.517976e+04 5.023576e+03 8.828762e+05 4.912068e+10 16 +cea37d6d 409600 0.000000e+00 9.325377e+03 4.741281e+02 9.325377e+04 8.718745e+08 10 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +afdd228b 1638400 0.000000e+00 2.512124e+04 2.223761e+03 4.773036e+05 1.208442e+10 19 +617e5fe6 3686400 0.000000e+00 5.116041e+04 1.272422e+03 7.674062e+05 3.928511e+10 15 +cea37d6d 409600 0.000000e+00 9.353760e+03 7.152342e+02 9.353760e+04 8.800438e+08 10 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +afdd228b 1638400 0.000000e+00 2.814234e+04 3.880171e+03 5.065622e+05 1.452685e+10 18 +617e5fe6 3686400 0.000000e+00 5.467956e+04 6.741916e+03 8.201934e+05 4.552961e+10 15 +cea37d6d 409600 0.000000e+00 1.004502e+04 9.839619e+02 1.004502e+05 1.018706e+09 10 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf.hannibal b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf.hannibal new file mode 100644 index 0000000..6f3a8b6 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf.hannibal @@ -0,0 +1,104 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 3686400 0.000000e+00 1.250229e+05 4.416720e+03 1.500275e+06 1.878028e+11 12 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 3686400 0.000000e+00 8.424585e+04 1.140908e+03 4.802014e+07 4.046239e+12 570 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb3) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 3686400 0.000000e+00 8.331807e+04 6.460292e+02 5.782274e+07 4.817969e+12 694 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf.hannibal-pitch b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf.hannibal-pitch new file mode 100644 index 0000000..6f3a8b6 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf.hannibal-pitch @@ -0,0 +1,104 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 3686400 0.000000e+00 1.250229e+05 4.416720e+03 1.500275e+06 1.878028e+11 12 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 3686400 0.000000e+00 8.424585e+04 1.140908e+03 4.802014e+07 4.046239e+12 570 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb3) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 3686400 0.000000e+00 8.331807e+04 6.460292e+02 5.782274e+07 4.817969e+12 694 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf.idgraf b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf.idgraf new file mode 100644 index 0000000..e05e4b2 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf.idgraf @@ -0,0 +1,314 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +9 +#################### +# COMB_8 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb8) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +cea37d6d 409600 0.000000e+00 4.307978e+03 6.474305e+01 1.249314e+05 5.383232e+08 29 +afdd228b 1638400 0.000000e+00 3.550524e+04 4.451382e+02 3.550524e+05 1.260821e+10 10 +617e5fe6 3686400 0.000000e+00 1.169735e+05 9.368471e+02 1.169735e+06 1.368368e+11 10 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +cea37d6d 409600 0.000000e+00 1.140547e+04 1.799023e+03 1.140547e+05 1.333212e+09 10 +afdd228b 1638400 0.000000e+00 2.728447e+04 8.307498e+02 2.728447e+05 7.451326e+09 10 +617e5fe6 3686400 0.000000e+00 6.234962e+04 7.670296e+03 6.858458e+05 4.340939e+10 11 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +5 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda5_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +afdd228b 1638400 0.000000e+00 3.084154e+04 4.741973e+03 3.084154e+05 9.736872e+09 10 +cea37d6d 409600 0.000000e+00 1.194801e+04 1.916839e+03 1.194801e+05 1.464291e+09 10 +617e5fe6 3686400 0.000000e+00 6.590141e+04 1.170188e+04 6.590141e+05 4.479930e+10 10 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +6 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda6_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 3686400 0.000000e+00 7.169178e+04 1.134864e+04 7.886096e+05 5.795353e+10 11 +cea37d6d 409600 0.000000e+00 1.144166e+04 1.161786e+03 1.144166e+05 1.322613e+09 10 +afdd228b 1638400 0.000000e+00 2.872444e+04 2.010264e+03 3.159688e+05 9.120481e+09 11 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +4 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda4_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +cea37d6d 409600 0.000000e+00 1.150326e+04 1.434617e+03 1.150326e+05 1.343832e+09 10 +afdd228b 1638400 0.000000e+00 3.088151e+04 4.858348e+03 3.088151e+05 9.772711e+09 10 +617e5fe6 3686400 0.000000e+00 6.102500e+04 7.308309e+03 6.102500e+05 3.777463e+10 10 + +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +7 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda7_impl0 (Comb4) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 3686400 0.000000e+00 5.751510e+04 2.006299e+03 6.901812e+05 3.974415e+10 12 +cea37d6d 409600 0.000000e+00 1.125363e+04 1.219431e+03 1.125363e+05 1.281312e+09 10 +afdd228b 1638400 0.000000e+00 3.238968e+04 5.459084e+03 3.238968e+05 1.078893e+10 10 + +#################### +# COMB_6 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb6) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +afdd228b 1638400 0.000000e+00 2.926764e+04 3.325362e+03 3.219440e+05 9.544181e+09 11 +cea37d6d 409600 0.000000e+00 1.088648e+04 1.129883e+03 1.088648e+05 1.197920e+09 10 +617e5fe6 3686400 0.000000e+00 6.506731e+04 1.183046e+04 8.458750e+05 5.685829e+10 13 + +#################### +# COMB_5 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb5) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +afdd228b 1638400 0.000000e+00 2.775893e+04 1.476662e+03 3.331071e+05 9.272862e+09 12 +cea37d6d 409600 0.000000e+00 1.026126e+04 8.160679e+01 1.026126e+05 1.053001e+09 10 +617e5fe6 3686400 0.000000e+00 6.215917e+04 1.023772e+04 6.215917e+05 3.968573e+10 10 + +#################### +# COMB_7 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb7) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +cea37d6d 409600 0.000000e+00 1.022286e+04 3.601879e+01 1.022286e+05 1.045081e+09 10 +afdd228b 1638400 0.000000e+00 2.891317e+04 4.592264e+03 2.891317e+05 8.570604e+09 10 +617e5fe6 3686400 0.000000e+00 5.724831e+04 3.045025e+03 7.442280e+05 4.272633e+10 13 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf.mirage b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf.mirage new file mode 100644 index 0000000..d13ea20 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf.mirage @@ -0,0 +1,144 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +4 +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +afdd228b 1638400 0.000000e+00 3.789658e+04 4.182352e+03 1.250587e+06 4.797021e+10 33 +617e5fe6 3686400 0.000000e+00 1.286436e+05 1.271269e+04 2.958803e+06 3.843483e+11 23 +cea37d6d 409600 0.000000e+00 4.236597e+03 2.366692e+02 2.372495e+05 1.008267e+09 56 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +afdd228b 1638400 0.000000e+00 2.864580e+04 3.233071e+03 5.156243e+05 1.495862e+10 18 +617e5fe6 3686400 0.000000e+00 5.948740e+04 4.910517e+03 1.070773e+06 6.413154e+10 18 +cea37d6d 409600 0.000000e+00 1.060245e+04 4.247968e+02 1.060245e+05 1.125924e+09 10 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +afdd228b 1638400 0.000000e+00 3.046163e+04 4.754796e+03 5.483094e+05 1.710934e+10 18 +617e5fe6 3686400 0.000000e+00 5.865963e+04 4.672589e+03 1.349171e+06 7.964405e+10 23 +cea37d6d 409600 0.000000e+00 1.042618e+04 1.817032e+02 1.042618e+05 1.087383e+09 10 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +afdd228b 1638400 0.000000e+00 2.939722e+04 4.040622e+03 4.409582e+05 1.320784e+10 15 +617e5fe6 3686400 0.000000e+00 5.704610e+04 3.429433e+03 1.255014e+06 7.185241e+10 22 +cea37d6d 409600 0.000000e+00 1.049902e+04 4.776188e+02 1.049902e+05 1.104575e+09 10 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf.sirocco b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf.sirocco new file mode 100644 index 0000000..4406acd --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf.sirocco @@ -0,0 +1,183 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +5 +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb4) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +25ebb669 8294400 0.000000e+00 4.111343e+05 7.639666e+04 4.111343e+06 1.748679e+12 10 +afdd228b 1638400 0.000000e+00 2.923093e+04 1.278718e+03 5.553877e+05 1.626557e+10 19 +cea37d6d 409600 0.000000e+00 4.037068e+03 3.335771e+02 2.906689e+05 1.181462e+09 72 +617e5fe6 3686400 0.000000e+00 1.029624e+05 6.177928e+03 1.029624e+06 1.063943e+11 10 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb3) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +cea37d6d 409600 0.000000e+00 9.866251e+03 7.665217e+02 9.866251e+04 9.793047e+08 10 +afdd228b 1638400 0.000000e+00 2.088164e+04 1.502169e+03 4.176328e+05 8.765989e+09 20 +617e5fe6 3686400 0.000000e+00 4.153583e+04 9.473225e+02 9.968599e+05 4.142694e+10 24 +25ebb669 8294400 0.000000e+00 9.378398e+04 2.901838e+03 1.594328e+06 1.496655e+11 17 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +25ebb669 8294400 0.000000e+00 9.434448e+04 6.197321e+03 2.075578e+06 1.966643e+11 22 +afdd228b 1638400 0.000000e+00 2.242688e+04 2.707726e+03 3.139763e+05 7.144153e+09 14 +cea37d6d 409600 0.000000e+00 9.238189e+03 1.713378e+02 9.238189e+04 8.537349e+08 10 +617e5fe6 3686400 0.000000e+00 4.357190e+04 5.271768e+03 7.842942e+05 3.467343e+10 18 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +25ebb669 8294400 0.000000e+00 9.395404e+04 4.337001e+03 1.973035e+06 1.857696e+11 21 +afdd228b 1638400 0.000000e+00 2.096495e+04 7.732458e+02 3.773690e+05 7.922284e+09 18 +cea37d6d 409600 0.000000e+00 9.471831e+03 5.475075e+02 9.471831e+04 9.001535e+08 10 +617e5fe6 3686400 0.000000e+00 4.647825e+04 9.283373e+03 5.577390e+05 2.695691e+10 12 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb0) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +25ebb669 8294400 0.000000e+00 9.896522e+04 1.438963e+04 1.187583e+06 1.200141e+11 12 +afdd228b 1638400 0.000000e+00 2.172039e+04 1.567348e+03 2.823650e+05 6.165013e+09 13 +cea37d6d 409600 0.000000e+00 9.338877e+03 3.249828e+02 9.338877e+04 8.732025e+08 10 +617e5fe6 3686400 0.000000e+00 4.258012e+04 2.921691e+03 8.090223e+05 3.461046e+10 19 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_atlas.attila b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_atlas.attila new file mode 100644 index 0000000..3e5a1e9 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_atlas.attila @@ -0,0 +1,144 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +4 +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +afdd228b 1638400 0.000000e+00 4.182946e+04 4.195402e+03 1.171225e+06 4.948453e+10 28 +617e5fe6 3686400 0.000000e+00 1.431791e+05 1.961610e+04 1.431791e+06 2.088506e+11 10 +cea37d6d 409600 0.000000e+00 4.839229e+03 3.061560e+02 1.258200e+05 6.113086e+08 26 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +afdd228b 1638400 0.000000e+00 2.565619e+04 2.729977e+03 4.618114e+05 1.198247e+10 18 +617e5fe6 3686400 0.000000e+00 5.517976e+04 5.023576e+03 8.828762e+05 4.912068e+10 16 +cea37d6d 409600 0.000000e+00 9.325377e+03 4.741281e+02 9.325377e+04 8.718745e+08 10 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +afdd228b 1638400 0.000000e+00 2.512124e+04 2.223761e+03 4.773036e+05 1.208442e+10 19 +617e5fe6 3686400 0.000000e+00 5.116041e+04 1.272422e+03 7.674062e+05 3.928511e+10 15 +cea37d6d 409600 0.000000e+00 9.353760e+03 7.152342e+02 9.353760e+04 8.800438e+08 10 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +afdd228b 1638400 0.000000e+00 2.814234e+04 3.880171e+03 5.065622e+05 1.452685e+10 18 +617e5fe6 3686400 0.000000e+00 5.467956e+04 6.741916e+03 8.201934e+05 4.552961e+10 15 +cea37d6d 409600 0.000000e+00 1.004502e+04 9.839619e+02 1.004502e+05 1.018706e+09 10 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_atlas.hannibal b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_atlas.hannibal new file mode 100644 index 0000000..6f3a8b6 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_atlas.hannibal @@ -0,0 +1,104 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 3686400 0.000000e+00 1.250229e+05 4.416720e+03 1.500275e+06 1.878028e+11 12 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 3686400 0.000000e+00 8.424585e+04 1.140908e+03 4.802014e+07 4.046239e+12 570 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb3) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 3686400 0.000000e+00 8.331807e+04 6.460292e+02 5.782274e+07 4.817969e+12 694 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_atlas.hannibal-pitch b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_atlas.hannibal-pitch new file mode 100644 index 0000000..6f3a8b6 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_atlas.hannibal-pitch @@ -0,0 +1,104 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 3686400 0.000000e+00 1.250229e+05 4.416720e+03 1.500275e+06 1.878028e+11 12 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 3686400 0.000000e+00 8.424585e+04 1.140908e+03 4.802014e+07 4.046239e+12 570 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb3) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 3686400 0.000000e+00 8.331807e+04 6.460292e+02 5.782274e+07 4.817969e+12 694 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_atlas.idgraf b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_atlas.idgraf new file mode 100644 index 0000000..e05e4b2 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_atlas.idgraf @@ -0,0 +1,314 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +9 +#################### +# COMB_8 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb8) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +cea37d6d 409600 0.000000e+00 4.307978e+03 6.474305e+01 1.249314e+05 5.383232e+08 29 +afdd228b 1638400 0.000000e+00 3.550524e+04 4.451382e+02 3.550524e+05 1.260821e+10 10 +617e5fe6 3686400 0.000000e+00 1.169735e+05 9.368471e+02 1.169735e+06 1.368368e+11 10 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +cea37d6d 409600 0.000000e+00 1.140547e+04 1.799023e+03 1.140547e+05 1.333212e+09 10 +afdd228b 1638400 0.000000e+00 2.728447e+04 8.307498e+02 2.728447e+05 7.451326e+09 10 +617e5fe6 3686400 0.000000e+00 6.234962e+04 7.670296e+03 6.858458e+05 4.340939e+10 11 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +5 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda5_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +afdd228b 1638400 0.000000e+00 3.084154e+04 4.741973e+03 3.084154e+05 9.736872e+09 10 +cea37d6d 409600 0.000000e+00 1.194801e+04 1.916839e+03 1.194801e+05 1.464291e+09 10 +617e5fe6 3686400 0.000000e+00 6.590141e+04 1.170188e+04 6.590141e+05 4.479930e+10 10 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +6 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda6_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 3686400 0.000000e+00 7.169178e+04 1.134864e+04 7.886096e+05 5.795353e+10 11 +cea37d6d 409600 0.000000e+00 1.144166e+04 1.161786e+03 1.144166e+05 1.322613e+09 10 +afdd228b 1638400 0.000000e+00 2.872444e+04 2.010264e+03 3.159688e+05 9.120481e+09 11 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +4 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda4_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +cea37d6d 409600 0.000000e+00 1.150326e+04 1.434617e+03 1.150326e+05 1.343832e+09 10 +afdd228b 1638400 0.000000e+00 3.088151e+04 4.858348e+03 3.088151e+05 9.772711e+09 10 +617e5fe6 3686400 0.000000e+00 6.102500e+04 7.308309e+03 6.102500e+05 3.777463e+10 10 + +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +7 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda7_impl0 (Comb4) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 3686400 0.000000e+00 5.751510e+04 2.006299e+03 6.901812e+05 3.974415e+10 12 +cea37d6d 409600 0.000000e+00 1.125363e+04 1.219431e+03 1.125363e+05 1.281312e+09 10 +afdd228b 1638400 0.000000e+00 3.238968e+04 5.459084e+03 3.238968e+05 1.078893e+10 10 + +#################### +# COMB_6 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb6) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +afdd228b 1638400 0.000000e+00 2.926764e+04 3.325362e+03 3.219440e+05 9.544181e+09 11 +cea37d6d 409600 0.000000e+00 1.088648e+04 1.129883e+03 1.088648e+05 1.197920e+09 10 +617e5fe6 3686400 0.000000e+00 6.506731e+04 1.183046e+04 8.458750e+05 5.685829e+10 13 + +#################### +# COMB_5 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb5) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +afdd228b 1638400 0.000000e+00 2.775893e+04 1.476662e+03 3.331071e+05 9.272862e+09 12 +cea37d6d 409600 0.000000e+00 1.026126e+04 8.160679e+01 1.026126e+05 1.053001e+09 10 +617e5fe6 3686400 0.000000e+00 6.215917e+04 1.023772e+04 6.215917e+05 3.968573e+10 10 + +#################### +# COMB_7 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb7) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +cea37d6d 409600 0.000000e+00 1.022286e+04 3.601879e+01 1.022286e+05 1.045081e+09 10 +afdd228b 1638400 0.000000e+00 2.891317e+04 4.592264e+03 2.891317e+05 8.570604e+09 10 +617e5fe6 3686400 0.000000e+00 5.724831e+04 3.045025e+03 7.442280e+05 4.272633e+10 13 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_atlas.mirage b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_atlas.mirage new file mode 100644 index 0000000..d13ea20 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_atlas.mirage @@ -0,0 +1,144 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +4 +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +afdd228b 1638400 0.000000e+00 3.789658e+04 4.182352e+03 1.250587e+06 4.797021e+10 33 +617e5fe6 3686400 0.000000e+00 1.286436e+05 1.271269e+04 2.958803e+06 3.843483e+11 23 +cea37d6d 409600 0.000000e+00 4.236597e+03 2.366692e+02 2.372495e+05 1.008267e+09 56 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +afdd228b 1638400 0.000000e+00 2.864580e+04 3.233071e+03 5.156243e+05 1.495862e+10 18 +617e5fe6 3686400 0.000000e+00 5.948740e+04 4.910517e+03 1.070773e+06 6.413154e+10 18 +cea37d6d 409600 0.000000e+00 1.060245e+04 4.247968e+02 1.060245e+05 1.125924e+09 10 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +afdd228b 1638400 0.000000e+00 3.046163e+04 4.754796e+03 5.483094e+05 1.710934e+10 18 +617e5fe6 3686400 0.000000e+00 5.865963e+04 4.672589e+03 1.349171e+06 7.964405e+10 23 +cea37d6d 409600 0.000000e+00 1.042618e+04 1.817032e+02 1.042618e+05 1.087383e+09 10 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +afdd228b 1638400 0.000000e+00 2.939722e+04 4.040622e+03 4.409582e+05 1.320784e+10 15 +617e5fe6 3686400 0.000000e+00 5.704610e+04 3.429433e+03 1.255014e+06 7.185241e+10 22 +cea37d6d 409600 0.000000e+00 1.049902e+04 4.776188e+02 1.049902e+05 1.104575e+09 10 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_atlas.sirocco b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_atlas.sirocco new file mode 100644 index 0000000..4406acd --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_atlas.sirocco @@ -0,0 +1,183 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +5 +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb4) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +25ebb669 8294400 0.000000e+00 4.111343e+05 7.639666e+04 4.111343e+06 1.748679e+12 10 +afdd228b 1638400 0.000000e+00 2.923093e+04 1.278718e+03 5.553877e+05 1.626557e+10 19 +cea37d6d 409600 0.000000e+00 4.037068e+03 3.335771e+02 2.906689e+05 1.181462e+09 72 +617e5fe6 3686400 0.000000e+00 1.029624e+05 6.177928e+03 1.029624e+06 1.063943e+11 10 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb3) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +cea37d6d 409600 0.000000e+00 9.866251e+03 7.665217e+02 9.866251e+04 9.793047e+08 10 +afdd228b 1638400 0.000000e+00 2.088164e+04 1.502169e+03 4.176328e+05 8.765989e+09 20 +617e5fe6 3686400 0.000000e+00 4.153583e+04 9.473225e+02 9.968599e+05 4.142694e+10 24 +25ebb669 8294400 0.000000e+00 9.378398e+04 2.901838e+03 1.594328e+06 1.496655e+11 17 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +25ebb669 8294400 0.000000e+00 9.434448e+04 6.197321e+03 2.075578e+06 1.966643e+11 22 +afdd228b 1638400 0.000000e+00 2.242688e+04 2.707726e+03 3.139763e+05 7.144153e+09 14 +cea37d6d 409600 0.000000e+00 9.238189e+03 1.713378e+02 9.238189e+04 8.537349e+08 10 +617e5fe6 3686400 0.000000e+00 4.357190e+04 5.271768e+03 7.842942e+05 3.467343e+10 18 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +25ebb669 8294400 0.000000e+00 9.395404e+04 4.337001e+03 1.973035e+06 1.857696e+11 21 +afdd228b 1638400 0.000000e+00 2.096495e+04 7.732458e+02 3.773690e+05 7.922284e+09 18 +cea37d6d 409600 0.000000e+00 9.471831e+03 5.475075e+02 9.471831e+04 9.001535e+08 10 +617e5fe6 3686400 0.000000e+00 4.647825e+04 9.283373e+03 5.577390e+05 2.695691e+10 12 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb0) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +25ebb669 8294400 0.000000e+00 9.896522e+04 1.438963e+04 1.187583e+06 1.200141e+11 12 +afdd228b 1638400 0.000000e+00 2.172039e+04 1.567348e+03 2.823650e+05 6.165013e+09 13 +cea37d6d 409600 0.000000e+00 9.338877e+03 3.249828e+02 9.338877e+04 8.732025e+08 10 +617e5fe6 3686400 0.000000e+00 4.258012e+04 2.921691e+03 8.090223e+05 3.461046e+10 19 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_goto.attila b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_goto.attila new file mode 100644 index 0000000..3e5a1e9 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_goto.attila @@ -0,0 +1,144 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +4 +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +afdd228b 1638400 0.000000e+00 4.182946e+04 4.195402e+03 1.171225e+06 4.948453e+10 28 +617e5fe6 3686400 0.000000e+00 1.431791e+05 1.961610e+04 1.431791e+06 2.088506e+11 10 +cea37d6d 409600 0.000000e+00 4.839229e+03 3.061560e+02 1.258200e+05 6.113086e+08 26 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +afdd228b 1638400 0.000000e+00 2.565619e+04 2.729977e+03 4.618114e+05 1.198247e+10 18 +617e5fe6 3686400 0.000000e+00 5.517976e+04 5.023576e+03 8.828762e+05 4.912068e+10 16 +cea37d6d 409600 0.000000e+00 9.325377e+03 4.741281e+02 9.325377e+04 8.718745e+08 10 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +afdd228b 1638400 0.000000e+00 2.512124e+04 2.223761e+03 4.773036e+05 1.208442e+10 19 +617e5fe6 3686400 0.000000e+00 5.116041e+04 1.272422e+03 7.674062e+05 3.928511e+10 15 +cea37d6d 409600 0.000000e+00 9.353760e+03 7.152342e+02 9.353760e+04 8.800438e+08 10 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +afdd228b 1638400 0.000000e+00 2.814234e+04 3.880171e+03 5.065622e+05 1.452685e+10 18 +617e5fe6 3686400 0.000000e+00 5.467956e+04 6.741916e+03 8.201934e+05 4.552961e+10 15 +cea37d6d 409600 0.000000e+00 1.004502e+04 9.839619e+02 1.004502e+05 1.018706e+09 10 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_goto.hannibal b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_goto.hannibal new file mode 100644 index 0000000..6f3a8b6 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_goto.hannibal @@ -0,0 +1,104 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 3686400 0.000000e+00 1.250229e+05 4.416720e+03 1.500275e+06 1.878028e+11 12 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 3686400 0.000000e+00 8.424585e+04 1.140908e+03 4.802014e+07 4.046239e+12 570 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb3) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 3686400 0.000000e+00 8.331807e+04 6.460292e+02 5.782274e+07 4.817969e+12 694 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_goto.hannibal-pitch b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_goto.hannibal-pitch new file mode 100644 index 0000000..6f3a8b6 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_goto.hannibal-pitch @@ -0,0 +1,104 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 3686400 0.000000e+00 1.250229e+05 4.416720e+03 1.500275e+06 1.878028e+11 12 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 3686400 0.000000e+00 8.424585e+04 1.140908e+03 4.802014e+07 4.046239e+12 570 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb3) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 3686400 0.000000e+00 8.331807e+04 6.460292e+02 5.782274e+07 4.817969e+12 694 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_goto.idgraf b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_goto.idgraf new file mode 100644 index 0000000..e05e4b2 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_goto.idgraf @@ -0,0 +1,314 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +9 +#################### +# COMB_8 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb8) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +cea37d6d 409600 0.000000e+00 4.307978e+03 6.474305e+01 1.249314e+05 5.383232e+08 29 +afdd228b 1638400 0.000000e+00 3.550524e+04 4.451382e+02 3.550524e+05 1.260821e+10 10 +617e5fe6 3686400 0.000000e+00 1.169735e+05 9.368471e+02 1.169735e+06 1.368368e+11 10 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +cea37d6d 409600 0.000000e+00 1.140547e+04 1.799023e+03 1.140547e+05 1.333212e+09 10 +afdd228b 1638400 0.000000e+00 2.728447e+04 8.307498e+02 2.728447e+05 7.451326e+09 10 +617e5fe6 3686400 0.000000e+00 6.234962e+04 7.670296e+03 6.858458e+05 4.340939e+10 11 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +5 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda5_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +afdd228b 1638400 0.000000e+00 3.084154e+04 4.741973e+03 3.084154e+05 9.736872e+09 10 +cea37d6d 409600 0.000000e+00 1.194801e+04 1.916839e+03 1.194801e+05 1.464291e+09 10 +617e5fe6 3686400 0.000000e+00 6.590141e+04 1.170188e+04 6.590141e+05 4.479930e+10 10 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +6 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda6_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 3686400 0.000000e+00 7.169178e+04 1.134864e+04 7.886096e+05 5.795353e+10 11 +cea37d6d 409600 0.000000e+00 1.144166e+04 1.161786e+03 1.144166e+05 1.322613e+09 10 +afdd228b 1638400 0.000000e+00 2.872444e+04 2.010264e+03 3.159688e+05 9.120481e+09 11 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +4 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda4_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +cea37d6d 409600 0.000000e+00 1.150326e+04 1.434617e+03 1.150326e+05 1.343832e+09 10 +afdd228b 1638400 0.000000e+00 3.088151e+04 4.858348e+03 3.088151e+05 9.772711e+09 10 +617e5fe6 3686400 0.000000e+00 6.102500e+04 7.308309e+03 6.102500e+05 3.777463e+10 10 + +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +7 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda7_impl0 (Comb4) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 3686400 0.000000e+00 5.751510e+04 2.006299e+03 6.901812e+05 3.974415e+10 12 +cea37d6d 409600 0.000000e+00 1.125363e+04 1.219431e+03 1.125363e+05 1.281312e+09 10 +afdd228b 1638400 0.000000e+00 3.238968e+04 5.459084e+03 3.238968e+05 1.078893e+10 10 + +#################### +# COMB_6 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb6) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +afdd228b 1638400 0.000000e+00 2.926764e+04 3.325362e+03 3.219440e+05 9.544181e+09 11 +cea37d6d 409600 0.000000e+00 1.088648e+04 1.129883e+03 1.088648e+05 1.197920e+09 10 +617e5fe6 3686400 0.000000e+00 6.506731e+04 1.183046e+04 8.458750e+05 5.685829e+10 13 + +#################### +# COMB_5 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb5) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +afdd228b 1638400 0.000000e+00 2.775893e+04 1.476662e+03 3.331071e+05 9.272862e+09 12 +cea37d6d 409600 0.000000e+00 1.026126e+04 8.160679e+01 1.026126e+05 1.053001e+09 10 +617e5fe6 3686400 0.000000e+00 6.215917e+04 1.023772e+04 6.215917e+05 3.968573e+10 10 + +#################### +# COMB_7 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb7) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +cea37d6d 409600 0.000000e+00 1.022286e+04 3.601879e+01 1.022286e+05 1.045081e+09 10 +afdd228b 1638400 0.000000e+00 2.891317e+04 4.592264e+03 2.891317e+05 8.570604e+09 10 +617e5fe6 3686400 0.000000e+00 5.724831e+04 3.045025e+03 7.442280e+05 4.272633e+10 13 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_goto.mirage b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_goto.mirage new file mode 100644 index 0000000..d13ea20 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_goto.mirage @@ -0,0 +1,144 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +4 +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +afdd228b 1638400 0.000000e+00 3.789658e+04 4.182352e+03 1.250587e+06 4.797021e+10 33 +617e5fe6 3686400 0.000000e+00 1.286436e+05 1.271269e+04 2.958803e+06 3.843483e+11 23 +cea37d6d 409600 0.000000e+00 4.236597e+03 2.366692e+02 2.372495e+05 1.008267e+09 56 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +afdd228b 1638400 0.000000e+00 2.864580e+04 3.233071e+03 5.156243e+05 1.495862e+10 18 +617e5fe6 3686400 0.000000e+00 5.948740e+04 4.910517e+03 1.070773e+06 6.413154e+10 18 +cea37d6d 409600 0.000000e+00 1.060245e+04 4.247968e+02 1.060245e+05 1.125924e+09 10 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +afdd228b 1638400 0.000000e+00 3.046163e+04 4.754796e+03 5.483094e+05 1.710934e+10 18 +617e5fe6 3686400 0.000000e+00 5.865963e+04 4.672589e+03 1.349171e+06 7.964405e+10 23 +cea37d6d 409600 0.000000e+00 1.042618e+04 1.817032e+02 1.042618e+05 1.087383e+09 10 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +afdd228b 1638400 0.000000e+00 2.939722e+04 4.040622e+03 4.409582e+05 1.320784e+10 15 +617e5fe6 3686400 0.000000e+00 5.704610e+04 3.429433e+03 1.255014e+06 7.185241e+10 22 +cea37d6d 409600 0.000000e+00 1.049902e+04 4.776188e+02 1.049902e+05 1.104575e+09 10 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_goto.sirocco b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_goto.sirocco new file mode 100644 index 0000000..4406acd --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_goto.sirocco @@ -0,0 +1,183 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +5 +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb4) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +25ebb669 8294400 0.000000e+00 4.111343e+05 7.639666e+04 4.111343e+06 1.748679e+12 10 +afdd228b 1638400 0.000000e+00 2.923093e+04 1.278718e+03 5.553877e+05 1.626557e+10 19 +cea37d6d 409600 0.000000e+00 4.037068e+03 3.335771e+02 2.906689e+05 1.181462e+09 72 +617e5fe6 3686400 0.000000e+00 1.029624e+05 6.177928e+03 1.029624e+06 1.063943e+11 10 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb3) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +cea37d6d 409600 0.000000e+00 9.866251e+03 7.665217e+02 9.866251e+04 9.793047e+08 10 +afdd228b 1638400 0.000000e+00 2.088164e+04 1.502169e+03 4.176328e+05 8.765989e+09 20 +617e5fe6 3686400 0.000000e+00 4.153583e+04 9.473225e+02 9.968599e+05 4.142694e+10 24 +25ebb669 8294400 0.000000e+00 9.378398e+04 2.901838e+03 1.594328e+06 1.496655e+11 17 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +25ebb669 8294400 0.000000e+00 9.434448e+04 6.197321e+03 2.075578e+06 1.966643e+11 22 +afdd228b 1638400 0.000000e+00 2.242688e+04 2.707726e+03 3.139763e+05 7.144153e+09 14 +cea37d6d 409600 0.000000e+00 9.238189e+03 1.713378e+02 9.238189e+04 8.537349e+08 10 +617e5fe6 3686400 0.000000e+00 4.357190e+04 5.271768e+03 7.842942e+05 3.467343e+10 18 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +25ebb669 8294400 0.000000e+00 9.395404e+04 4.337001e+03 1.973035e+06 1.857696e+11 21 +afdd228b 1638400 0.000000e+00 2.096495e+04 7.732458e+02 3.773690e+05 7.922284e+09 18 +cea37d6d 409600 0.000000e+00 9.471831e+03 5.475075e+02 9.471831e+04 9.001535e+08 10 +617e5fe6 3686400 0.000000e+00 4.647825e+04 9.283373e+03 5.577390e+05 2.695691e+10 12 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb0) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +25ebb669 8294400 0.000000e+00 9.896522e+04 1.438963e+04 1.187583e+06 1.200141e+11 12 +afdd228b 1638400 0.000000e+00 2.172039e+04 1.567348e+03 2.823650e+05 6.165013e+09 13 +cea37d6d 409600 0.000000e+00 9.338877e+03 3.249828e+02 9.338877e+04 8.732025e+08 10 +617e5fe6 3686400 0.000000e+00 4.258012e+04 2.921691e+03 8.090223e+05 3.461046e+10 19 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_openblas.attila b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_openblas.attila new file mode 100644 index 0000000..3e5a1e9 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_openblas.attila @@ -0,0 +1,144 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +4 +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +afdd228b 1638400 0.000000e+00 4.182946e+04 4.195402e+03 1.171225e+06 4.948453e+10 28 +617e5fe6 3686400 0.000000e+00 1.431791e+05 1.961610e+04 1.431791e+06 2.088506e+11 10 +cea37d6d 409600 0.000000e+00 4.839229e+03 3.061560e+02 1.258200e+05 6.113086e+08 26 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +afdd228b 1638400 0.000000e+00 2.565619e+04 2.729977e+03 4.618114e+05 1.198247e+10 18 +617e5fe6 3686400 0.000000e+00 5.517976e+04 5.023576e+03 8.828762e+05 4.912068e+10 16 +cea37d6d 409600 0.000000e+00 9.325377e+03 4.741281e+02 9.325377e+04 8.718745e+08 10 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +afdd228b 1638400 0.000000e+00 2.512124e+04 2.223761e+03 4.773036e+05 1.208442e+10 19 +617e5fe6 3686400 0.000000e+00 5.116041e+04 1.272422e+03 7.674062e+05 3.928511e+10 15 +cea37d6d 409600 0.000000e+00 9.353760e+03 7.152342e+02 9.353760e+04 8.800438e+08 10 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +afdd228b 1638400 0.000000e+00 2.814234e+04 3.880171e+03 5.065622e+05 1.452685e+10 18 +617e5fe6 3686400 0.000000e+00 5.467956e+04 6.741916e+03 8.201934e+05 4.552961e+10 15 +cea37d6d 409600 0.000000e+00 1.004502e+04 9.839619e+02 1.004502e+05 1.018706e+09 10 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_openblas.hannibal b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_openblas.hannibal new file mode 100644 index 0000000..6f3a8b6 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_openblas.hannibal @@ -0,0 +1,104 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 3686400 0.000000e+00 1.250229e+05 4.416720e+03 1.500275e+06 1.878028e+11 12 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 3686400 0.000000e+00 8.424585e+04 1.140908e+03 4.802014e+07 4.046239e+12 570 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb3) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 3686400 0.000000e+00 8.331807e+04 6.460292e+02 5.782274e+07 4.817969e+12 694 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_openblas.hannibal-pitch b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_openblas.hannibal-pitch new file mode 100644 index 0000000..6f3a8b6 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_openblas.hannibal-pitch @@ -0,0 +1,104 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 3686400 0.000000e+00 1.250229e+05 4.416720e+03 1.500275e+06 1.878028e+11 12 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 3686400 0.000000e+00 8.424585e+04 1.140908e+03 4.802014e+07 4.046239e+12 570 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb3) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 3686400 0.000000e+00 8.331807e+04 6.460292e+02 5.782274e+07 4.817969e+12 694 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_openblas.idgraf b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_openblas.idgraf new file mode 100644 index 0000000..e05e4b2 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_openblas.idgraf @@ -0,0 +1,314 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +9 +#################### +# COMB_8 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb8) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +cea37d6d 409600 0.000000e+00 4.307978e+03 6.474305e+01 1.249314e+05 5.383232e+08 29 +afdd228b 1638400 0.000000e+00 3.550524e+04 4.451382e+02 3.550524e+05 1.260821e+10 10 +617e5fe6 3686400 0.000000e+00 1.169735e+05 9.368471e+02 1.169735e+06 1.368368e+11 10 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +cea37d6d 409600 0.000000e+00 1.140547e+04 1.799023e+03 1.140547e+05 1.333212e+09 10 +afdd228b 1638400 0.000000e+00 2.728447e+04 8.307498e+02 2.728447e+05 7.451326e+09 10 +617e5fe6 3686400 0.000000e+00 6.234962e+04 7.670296e+03 6.858458e+05 4.340939e+10 11 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +5 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda5_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +afdd228b 1638400 0.000000e+00 3.084154e+04 4.741973e+03 3.084154e+05 9.736872e+09 10 +cea37d6d 409600 0.000000e+00 1.194801e+04 1.916839e+03 1.194801e+05 1.464291e+09 10 +617e5fe6 3686400 0.000000e+00 6.590141e+04 1.170188e+04 6.590141e+05 4.479930e+10 10 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +6 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda6_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 3686400 0.000000e+00 7.169178e+04 1.134864e+04 7.886096e+05 5.795353e+10 11 +cea37d6d 409600 0.000000e+00 1.144166e+04 1.161786e+03 1.144166e+05 1.322613e+09 10 +afdd228b 1638400 0.000000e+00 2.872444e+04 2.010264e+03 3.159688e+05 9.120481e+09 11 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +4 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda4_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +cea37d6d 409600 0.000000e+00 1.150326e+04 1.434617e+03 1.150326e+05 1.343832e+09 10 +afdd228b 1638400 0.000000e+00 3.088151e+04 4.858348e+03 3.088151e+05 9.772711e+09 10 +617e5fe6 3686400 0.000000e+00 6.102500e+04 7.308309e+03 6.102500e+05 3.777463e+10 10 + +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +7 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda7_impl0 (Comb4) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +617e5fe6 3686400 0.000000e+00 5.751510e+04 2.006299e+03 6.901812e+05 3.974415e+10 12 +cea37d6d 409600 0.000000e+00 1.125363e+04 1.219431e+03 1.125363e+05 1.281312e+09 10 +afdd228b 1638400 0.000000e+00 3.238968e+04 5.459084e+03 3.238968e+05 1.078893e+10 10 + +#################### +# COMB_6 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb6) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +afdd228b 1638400 0.000000e+00 2.926764e+04 3.325362e+03 3.219440e+05 9.544181e+09 11 +cea37d6d 409600 0.000000e+00 1.088648e+04 1.129883e+03 1.088648e+05 1.197920e+09 10 +617e5fe6 3686400 0.000000e+00 6.506731e+04 1.183046e+04 8.458750e+05 5.685829e+10 13 + +#################### +# COMB_5 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb5) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +afdd228b 1638400 0.000000e+00 2.775893e+04 1.476662e+03 3.331071e+05 9.272862e+09 12 +cea37d6d 409600 0.000000e+00 1.026126e+04 8.160679e+01 1.026126e+05 1.053001e+09 10 +617e5fe6 3686400 0.000000e+00 6.215917e+04 1.023772e+04 6.215917e+05 3.968573e+10 10 + +#################### +# COMB_7 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb7) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +cea37d6d 409600 0.000000e+00 1.022286e+04 3.601879e+01 1.022286e+05 1.045081e+09 10 +afdd228b 1638400 0.000000e+00 2.891317e+04 4.592264e+03 2.891317e+05 8.570604e+09 10 +617e5fe6 3686400 0.000000e+00 5.724831e+04 3.045025e+03 7.442280e+05 4.272633e+10 13 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_openblas.mirage b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_openblas.mirage new file mode 100644 index 0000000..d13ea20 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_openblas.mirage @@ -0,0 +1,144 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +4 +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +afdd228b 1638400 0.000000e+00 3.789658e+04 4.182352e+03 1.250587e+06 4.797021e+10 33 +617e5fe6 3686400 0.000000e+00 1.286436e+05 1.271269e+04 2.958803e+06 3.843483e+11 23 +cea37d6d 409600 0.000000e+00 4.236597e+03 2.366692e+02 2.372495e+05 1.008267e+09 56 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +afdd228b 1638400 0.000000e+00 2.864580e+04 3.233071e+03 5.156243e+05 1.495862e+10 18 +617e5fe6 3686400 0.000000e+00 5.948740e+04 4.910517e+03 1.070773e+06 6.413154e+10 18 +cea37d6d 409600 0.000000e+00 1.060245e+04 4.247968e+02 1.060245e+05 1.125924e+09 10 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +afdd228b 1638400 0.000000e+00 3.046163e+04 4.754796e+03 5.483094e+05 1.710934e+10 18 +617e5fe6 3686400 0.000000e+00 5.865963e+04 4.672589e+03 1.349171e+06 7.964405e+10 23 +cea37d6d 409600 0.000000e+00 1.042618e+04 1.817032e+02 1.042618e+05 1.087383e+09 10 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +afdd228b 1638400 0.000000e+00 2.939722e+04 4.040622e+03 4.409582e+05 1.320784e+10 15 +617e5fe6 3686400 0.000000e+00 5.704610e+04 3.429433e+03 1.255014e+06 7.185241e+10 22 +cea37d6d 409600 0.000000e+00 1.049902e+04 4.776188e+02 1.049902e+05 1.104575e+09 10 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_openblas.sirocco b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_openblas.sirocco new file mode 100644 index 0000000..4406acd --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_openblas.sirocco @@ -0,0 +1,183 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +5 +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb4) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +25ebb669 8294400 0.000000e+00 4.111343e+05 7.639666e+04 4.111343e+06 1.748679e+12 10 +afdd228b 1638400 0.000000e+00 2.923093e+04 1.278718e+03 5.553877e+05 1.626557e+10 19 +cea37d6d 409600 0.000000e+00 4.037068e+03 3.335771e+02 2.906689e+05 1.181462e+09 72 +617e5fe6 3686400 0.000000e+00 1.029624e+05 6.177928e+03 1.029624e+06 1.063943e+11 10 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb3) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +cea37d6d 409600 0.000000e+00 9.866251e+03 7.665217e+02 9.866251e+04 9.793047e+08 10 +afdd228b 1638400 0.000000e+00 2.088164e+04 1.502169e+03 4.176328e+05 8.765989e+09 20 +617e5fe6 3686400 0.000000e+00 4.153583e+04 9.473225e+02 9.968599e+05 4.142694e+10 24 +25ebb669 8294400 0.000000e+00 9.378398e+04 2.901838e+03 1.594328e+06 1.496655e+11 17 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +25ebb669 8294400 0.000000e+00 9.434448e+04 6.197321e+03 2.075578e+06 1.966643e+11 22 +afdd228b 1638400 0.000000e+00 2.242688e+04 2.707726e+03 3.139763e+05 7.144153e+09 14 +cea37d6d 409600 0.000000e+00 9.238189e+03 1.713378e+02 9.238189e+04 8.537349e+08 10 +617e5fe6 3686400 0.000000e+00 4.357190e+04 5.271768e+03 7.842942e+05 3.467343e+10 18 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +25ebb669 8294400 0.000000e+00 9.395404e+04 4.337001e+03 1.973035e+06 1.857696e+11 21 +afdd228b 1638400 0.000000e+00 2.096495e+04 7.732458e+02 3.773690e+05 7.922284e+09 18 +cea37d6d 409600 0.000000e+00 9.471831e+03 5.475075e+02 9.471831e+04 9.001535e+08 10 +617e5fe6 3686400 0.000000e+00 4.647825e+04 9.283373e+03 5.577390e+05 2.695691e+10 12 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb0) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +25ebb669 8294400 0.000000e+00 9.896522e+04 1.438963e+04 1.187583e+06 1.200141e+11 12 +afdd228b 1638400 0.000000e+00 2.172039e+04 1.567348e+03 2.823650e+05 6.165013e+09 13 +cea37d6d 409600 0.000000e+00 9.338877e+03 3.249828e+02 9.338877e+04 8.732025e+08 10 +617e5fe6 3686400 0.000000e+00 4.258012e+04 2.921691e+03 8.090223e+05 3.461046e+10 19 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll.attila b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll.attila new file mode 100644 index 0000000..3301f40 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll.attila @@ -0,0 +1,145 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +4 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 3276800 0.000000e+00 1.416946e+04 8.998511e+02 5.341885e+06 7.599687e+10 377 +ff82dda0 7372800 0.000000e+00 4.394377e+04 1.700468e+03 1.138144e+07 5.008920e+11 259 +2c1922b7 819200 0.000000e+00 1.978198e+03 1.079993e+02 7.517154e+05 1.491475e+09 380 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 3276800 0.000000e+00 2.676312e+03 2.039650e+02 4.549731e+05 1.224722e+09 170 +ff82dda0 7372800 0.000000e+00 6.450199e+03 3.193507e+02 5.482669e+05 3.545099e+09 85 +2c1922b7 819200 0.000000e+00 7.090855e+02 1.344985e+02 5.247233e+04 3.854602e+07 74 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 3276800 0.000000e+00 2.648361e+03 2.330106e+02 2.913197e+05 7.774920e+08 110 +ff82dda0 7372800 0.000000e+00 3.907893e+03 1.767346e+02 3.790657e+05 1.484378e+09 97 +2c1922b7 819200 0.000000e+00 5.977702e+02 1.137267e+02 6.695026e+04 4.146945e+07 112 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 3276800 0.000000e+00 2.649815e+03 2.112061e+02 4.054218e+05 1.081118e+09 153 +ff82dda0 7372800 0.000000e+00 6.517136e+03 3.918474e+02 3.454082e+05 2.259210e+09 53 +2c1922b7 819200 0.000000e+00 6.507707e+02 8.750699e+01 4.750626e+04 3.147468e+07 73 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll.hannibal b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll.hannibal new file mode 100644 index 0000000..3545904 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll.hannibal @@ -0,0 +1,104 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 7372800 0.000000e+00 1.072902e+04 3.731292e+03 7.780684e+07 9.357572e+11 7252 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 7372800 0.000000e+00 1.250147e+04 5.489974e+03 7.944684e+07 1.184741e+12 6355 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb3) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 7372800 0.000000e+00 1.131230e+04 4.120480e+03 8.165221e+07 1.046224e+12 7218 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll.hannibal-pitch b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll.hannibal-pitch new file mode 100644 index 0000000..3545904 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll.hannibal-pitch @@ -0,0 +1,104 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 7372800 0.000000e+00 1.072902e+04 3.731292e+03 7.780684e+07 9.357572e+11 7252 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 7372800 0.000000e+00 1.250147e+04 5.489974e+03 7.944684e+07 1.184741e+12 6355 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb3) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 7372800 0.000000e+00 1.131230e+04 4.120480e+03 8.165221e+07 1.046224e+12 7218 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll.idgraf b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll.idgraf new file mode 100644 index 0000000..0c556c8 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll.idgraf @@ -0,0 +1,314 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +9 +#################### +# COMB_8 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb8) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +2c1922b7 819200 0.000000e+00 2.469013e+03 5.595193e+01 2.765294e+05 6.831054e+08 112 +d39bff17 3276800 0.000000e+00 1.667528e+04 1.964808e+02 1.300672e+06 2.169208e+10 78 +ff82dda0 7372800 0.000000e+00 5.216745e+04 4.664151e+02 3.443052e+06 1.796296e+11 66 + +#################### +# COMB_5 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb5) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +2c1922b7 819200 0.000000e+00 7.490410e+02 1.344248e+02 7.415506e+04 5.733412e+07 99 +d39bff17 3276800 0.000000e+00 2.737524e+03 2.974057e+02 3.942034e+05 1.091878e+09 144 +ff82dda0 7372800 0.000000e+00 7.212728e+03 1.319942e+03 6.924219e+05 5.161506e+09 96 + +#################### +# COMB_6 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb6) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +2c1922b7 819200 0.000000e+00 7.688939e+02 1.457751e+02 6.843156e+04 5.450789e+07 89 +d39bff17 3276800 0.000000e+00 2.735563e+03 2.889694e+02 2.899697e+05 8.020820e+08 106 +ff82dda0 7372800 0.000000e+00 6.820126e+03 9.314994e+02 7.638542e+05 5.306763e+09 112 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +5 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda5_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +2c1922b7 819200 0.000000e+00 7.150281e+02 1.235393e+02 6.363750e+04 4.686092e+07 89 +d39bff17 3276800 0.000000e+00 2.835249e+03 4.125186e+02 1.899617e+05 5.499903e+08 67 +ff82dda0 7372800 0.000000e+00 6.720945e+03 7.632032e+02 6.989783e+05 4.758372e+09 104 + +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +7 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda7_impl0 (Comb4) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +2c1922b7 819200 0.000000e+00 7.190609e+02 1.144317e+02 7.406327e+04 5.460474e+07 103 +d39bff17 3276800 0.000000e+00 2.867186e+03 4.168496e+02 2.838514e+05 8.310575e+08 99 +ff82dda0 7372800 0.000000e+00 6.809425e+03 9.031920e+02 6.400859e+05 4.435298e+09 94 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +4 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda4_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +2c1922b7 819200 0.000000e+00 7.136273e+02 1.258701e+02 7.350362e+04 5.408605e+07 103 +d39bff17 3276800 0.000000e+00 2.942246e+03 4.585544e+02 1.706502e+05 5.142907e+08 58 +ff82dda0 7372800 0.000000e+00 6.744194e+03 8.416374e+02 5.597681e+05 3.833978e+09 83 + +#################### +# COMB_7 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb7) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +2c1922b7 819200 0.000000e+00 7.204798e+02 9.746533e+01 1.080720e+05 7.928859e+07 150 +d39bff17 3276800 0.000000e+00 2.539831e+03 4.296517e+02 3.885942e+05 1.015208e+09 153 +ff82dda0 7372800 0.000000e+00 7.293979e+03 1.385713e+03 6.929280e+05 5.236621e+09 95 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +6 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda6_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +2c1922b7 819200 0.000000e+00 7.460951e+02 1.203288e+02 7.386342e+04 5.654256e+07 99 +d39bff17 3276800 0.000000e+00 2.972783e+03 5.066224e+02 2.259315e+05 6.911522e+08 76 +ff82dda0 7372800 0.000000e+00 6.643349e+03 8.230064e+02 6.510482e+05 4.391520e+09 98 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +2c1922b7 819200 0.000000e+00 7.518059e+02 1.406096e+02 8.495406e+04 6.610309e+07 113 +d39bff17 3276800 0.000000e+00 2.794983e+03 3.357608e+02 4.164524e+05 1.180775e+09 149 +ff82dda0 7372800 0.000000e+00 6.735838e+03 7.525487e+02 6.803197e+05 4.639723e+09 101 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll.mirage b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll.mirage new file mode 100644 index 0000000..a9327f2 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll.mirage @@ -0,0 +1,144 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +4 +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 3276800 0.000000e+00 1.615269e+04 4.099119e+02 4.748890e+06 7.675673e+10 294 +ff82dda0 7372800 0.000000e+00 5.118532e+04 6.422962e+02 1.530441e+07 7.834845e+11 299 +2c1922b7 819200 0.000000e+00 2.296074e+03 7.445272e+01 1.021753e+06 2.348487e+09 445 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 3276800 0.000000e+00 2.740712e+03 2.663471e+02 5.755494e+05 1.592313e+09 210 +ff82dda0 7372800 0.000000e+00 6.504044e+03 4.912781e+02 1.385361e+06 9.061859e+09 213 +2c1922b7 819200 0.000000e+00 6.801212e+02 1.149855e+02 1.129001e+05 7.898057e+07 166 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 3276800 0.000000e+00 2.716114e+03 2.688407e+02 4.237138e+05 1.162130e+09 156 +ff82dda0 7372800 0.000000e+00 6.512491e+03 5.367987e+02 8.270864e+05 5.422988e+09 127 +2c1922b7 819200 0.000000e+00 7.284912e+02 1.021807e+02 1.049027e+05 7.792421e+07 144 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 3276800 0.000000e+00 2.294336e+03 5.071880e+02 4.909878e+05 1.181540e+09 214 +ff82dda0 7372800 0.000000e+00 6.469485e+03 5.370376e+02 7.698688e+05 5.014976e+09 119 +2c1922b7 819200 0.000000e+00 7.112055e+02 1.136474e+02 1.002800e+05 7.314078e+07 141 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll.sirocco b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll.sirocco new file mode 100644 index 0000000..113921f --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll.sirocco @@ -0,0 +1,183 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +5 +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb4) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +0e8bce2b 16588800 0.000000e+00 5.637990e+04 1.036285e+04 8.118706e+06 4.731958e+11 144 +d39bff17 3276800 0.000000e+00 5.106660e+03 6.848530e+02 2.134584e+06 1.109665e+10 418 +2c1922b7 819200 0.000000e+00 4.245334e+03 7.020174e+02 6.368000e+04 2.777353e+08 15 +ff82dda0 7372800 0.000000e+00 1.726784e+04 3.264426e+03 3.021872e+06 5.404608e+10 175 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb3) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +0e8bce2b 16588800 0.000000e+00 5.668892e+03 6.964909e+02 1.394547e+06 8.024874e+09 246 +d39bff17 3276800 0.000000e+00 1.216432e+03 1.410794e+02 1.934127e+05 2.384382e+08 159 +2c1922b7 819200 0.000000e+00 4.901281e+02 6.729653e+01 6.616730e+04 3.304185e+07 135 +ff82dda0 7372800 0.000000e+00 2.106719e+03 2.638200e+02 5.646006e+05 1.208108e+09 268 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +0e8bce2b 16588800 0.000000e+00 5.677828e+03 6.218985e+02 1.311578e+06 7.536257e+09 231 +d39bff17 3276800 0.000000e+00 1.199302e+03 1.658297e+02 1.774966e+05 2.169419e+08 148 +2c1922b7 819200 0.000000e+00 4.968224e+02 7.860110e+01 5.415364e+04 2.757816e+07 109 +ff82dda0 7372800 0.000000e+00 2.138085e+03 2.696288e+02 6.371492e+05 1.383944e+09 298 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb0) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +0e8bce2b 16588800 0.000000e+00 5.593766e+03 7.653530e+02 1.510317e+06 8.606516e+09 270 +d39bff17 3276800 0.000000e+00 1.148300e+03 2.163448e+02 2.021009e+05 2.403102e+08 176 +2c1922b7 819200 0.000000e+00 8.901347e+01 1.918734e+01 2.412265e+04 2.247011e+06 271 +ff82dda0 7372800 0.000000e+00 2.196957e+03 3.265420e+02 4.349975e+05 9.767837e+08 198 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +0e8bce2b 16588800 0.000000e+00 5.652338e+03 6.245997e+02 1.520479e+06 8.699205e+09 269 +d39bff17 3276800 0.000000e+00 1.203544e+03 1.679024e+02 2.286733e+05 2.805746e+08 190 +2c1922b7 819200 0.000000e+00 4.930666e+02 7.623523e+01 7.642532e+04 3.858360e+07 155 +ff82dda0 7372800 0.000000e+00 2.164310e+03 2.607466e+02 4.869698e+05 1.069251e+09 225 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_atlas.attila b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_atlas.attila new file mode 100644 index 0000000..3301f40 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_atlas.attila @@ -0,0 +1,145 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +4 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 3276800 0.000000e+00 1.416946e+04 8.998511e+02 5.341885e+06 7.599687e+10 377 +ff82dda0 7372800 0.000000e+00 4.394377e+04 1.700468e+03 1.138144e+07 5.008920e+11 259 +2c1922b7 819200 0.000000e+00 1.978198e+03 1.079993e+02 7.517154e+05 1.491475e+09 380 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 3276800 0.000000e+00 2.676312e+03 2.039650e+02 4.549731e+05 1.224722e+09 170 +ff82dda0 7372800 0.000000e+00 6.450199e+03 3.193507e+02 5.482669e+05 3.545099e+09 85 +2c1922b7 819200 0.000000e+00 7.090855e+02 1.344985e+02 5.247233e+04 3.854602e+07 74 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 3276800 0.000000e+00 2.648361e+03 2.330106e+02 2.913197e+05 7.774920e+08 110 +ff82dda0 7372800 0.000000e+00 3.907893e+03 1.767346e+02 3.790657e+05 1.484378e+09 97 +2c1922b7 819200 0.000000e+00 5.977702e+02 1.137267e+02 6.695026e+04 4.146945e+07 112 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 3276800 0.000000e+00 2.649815e+03 2.112061e+02 4.054218e+05 1.081118e+09 153 +ff82dda0 7372800 0.000000e+00 6.517136e+03 3.918474e+02 3.454082e+05 2.259210e+09 53 +2c1922b7 819200 0.000000e+00 6.507707e+02 8.750699e+01 4.750626e+04 3.147468e+07 73 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_atlas.hannibal b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_atlas.hannibal new file mode 100644 index 0000000..3545904 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_atlas.hannibal @@ -0,0 +1,104 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 7372800 0.000000e+00 1.072902e+04 3.731292e+03 7.780684e+07 9.357572e+11 7252 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 7372800 0.000000e+00 1.250147e+04 5.489974e+03 7.944684e+07 1.184741e+12 6355 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb3) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 7372800 0.000000e+00 1.131230e+04 4.120480e+03 8.165221e+07 1.046224e+12 7218 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_atlas.hannibal-pitch b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_atlas.hannibal-pitch new file mode 100644 index 0000000..3545904 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_atlas.hannibal-pitch @@ -0,0 +1,104 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 7372800 0.000000e+00 1.072902e+04 3.731292e+03 7.780684e+07 9.357572e+11 7252 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 7372800 0.000000e+00 1.250147e+04 5.489974e+03 7.944684e+07 1.184741e+12 6355 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb3) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 7372800 0.000000e+00 1.131230e+04 4.120480e+03 8.165221e+07 1.046224e+12 7218 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_atlas.idgraf b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_atlas.idgraf new file mode 100644 index 0000000..0c556c8 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_atlas.idgraf @@ -0,0 +1,314 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +9 +#################### +# COMB_8 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb8) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +2c1922b7 819200 0.000000e+00 2.469013e+03 5.595193e+01 2.765294e+05 6.831054e+08 112 +d39bff17 3276800 0.000000e+00 1.667528e+04 1.964808e+02 1.300672e+06 2.169208e+10 78 +ff82dda0 7372800 0.000000e+00 5.216745e+04 4.664151e+02 3.443052e+06 1.796296e+11 66 + +#################### +# COMB_5 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb5) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +2c1922b7 819200 0.000000e+00 7.490410e+02 1.344248e+02 7.415506e+04 5.733412e+07 99 +d39bff17 3276800 0.000000e+00 2.737524e+03 2.974057e+02 3.942034e+05 1.091878e+09 144 +ff82dda0 7372800 0.000000e+00 7.212728e+03 1.319942e+03 6.924219e+05 5.161506e+09 96 + +#################### +# COMB_6 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb6) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +2c1922b7 819200 0.000000e+00 7.688939e+02 1.457751e+02 6.843156e+04 5.450789e+07 89 +d39bff17 3276800 0.000000e+00 2.735563e+03 2.889694e+02 2.899697e+05 8.020820e+08 106 +ff82dda0 7372800 0.000000e+00 6.820126e+03 9.314994e+02 7.638542e+05 5.306763e+09 112 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +5 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda5_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +2c1922b7 819200 0.000000e+00 7.150281e+02 1.235393e+02 6.363750e+04 4.686092e+07 89 +d39bff17 3276800 0.000000e+00 2.835249e+03 4.125186e+02 1.899617e+05 5.499903e+08 67 +ff82dda0 7372800 0.000000e+00 6.720945e+03 7.632032e+02 6.989783e+05 4.758372e+09 104 + +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +7 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda7_impl0 (Comb4) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +2c1922b7 819200 0.000000e+00 7.190609e+02 1.144317e+02 7.406327e+04 5.460474e+07 103 +d39bff17 3276800 0.000000e+00 2.867186e+03 4.168496e+02 2.838514e+05 8.310575e+08 99 +ff82dda0 7372800 0.000000e+00 6.809425e+03 9.031920e+02 6.400859e+05 4.435298e+09 94 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +4 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda4_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +2c1922b7 819200 0.000000e+00 7.136273e+02 1.258701e+02 7.350362e+04 5.408605e+07 103 +d39bff17 3276800 0.000000e+00 2.942246e+03 4.585544e+02 1.706502e+05 5.142907e+08 58 +ff82dda0 7372800 0.000000e+00 6.744194e+03 8.416374e+02 5.597681e+05 3.833978e+09 83 + +#################### +# COMB_7 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb7) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +2c1922b7 819200 0.000000e+00 7.204798e+02 9.746533e+01 1.080720e+05 7.928859e+07 150 +d39bff17 3276800 0.000000e+00 2.539831e+03 4.296517e+02 3.885942e+05 1.015208e+09 153 +ff82dda0 7372800 0.000000e+00 7.293979e+03 1.385713e+03 6.929280e+05 5.236621e+09 95 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +6 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda6_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +2c1922b7 819200 0.000000e+00 7.460951e+02 1.203288e+02 7.386342e+04 5.654256e+07 99 +d39bff17 3276800 0.000000e+00 2.972783e+03 5.066224e+02 2.259315e+05 6.911522e+08 76 +ff82dda0 7372800 0.000000e+00 6.643349e+03 8.230064e+02 6.510482e+05 4.391520e+09 98 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +2c1922b7 819200 0.000000e+00 7.518059e+02 1.406096e+02 8.495406e+04 6.610309e+07 113 +d39bff17 3276800 0.000000e+00 2.794983e+03 3.357608e+02 4.164524e+05 1.180775e+09 149 +ff82dda0 7372800 0.000000e+00 6.735838e+03 7.525487e+02 6.803197e+05 4.639723e+09 101 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_atlas.mirage b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_atlas.mirage new file mode 100644 index 0000000..a9327f2 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_atlas.mirage @@ -0,0 +1,144 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +4 +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 3276800 0.000000e+00 1.615269e+04 4.099119e+02 4.748890e+06 7.675673e+10 294 +ff82dda0 7372800 0.000000e+00 5.118532e+04 6.422962e+02 1.530441e+07 7.834845e+11 299 +2c1922b7 819200 0.000000e+00 2.296074e+03 7.445272e+01 1.021753e+06 2.348487e+09 445 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 3276800 0.000000e+00 2.740712e+03 2.663471e+02 5.755494e+05 1.592313e+09 210 +ff82dda0 7372800 0.000000e+00 6.504044e+03 4.912781e+02 1.385361e+06 9.061859e+09 213 +2c1922b7 819200 0.000000e+00 6.801212e+02 1.149855e+02 1.129001e+05 7.898057e+07 166 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 3276800 0.000000e+00 2.716114e+03 2.688407e+02 4.237138e+05 1.162130e+09 156 +ff82dda0 7372800 0.000000e+00 6.512491e+03 5.367987e+02 8.270864e+05 5.422988e+09 127 +2c1922b7 819200 0.000000e+00 7.284912e+02 1.021807e+02 1.049027e+05 7.792421e+07 144 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 3276800 0.000000e+00 2.294336e+03 5.071880e+02 4.909878e+05 1.181540e+09 214 +ff82dda0 7372800 0.000000e+00 6.469485e+03 5.370376e+02 7.698688e+05 5.014976e+09 119 +2c1922b7 819200 0.000000e+00 7.112055e+02 1.136474e+02 1.002800e+05 7.314078e+07 141 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_atlas.sirocco b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_atlas.sirocco new file mode 100644 index 0000000..113921f --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_atlas.sirocco @@ -0,0 +1,183 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +5 +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb4) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +0e8bce2b 16588800 0.000000e+00 5.637990e+04 1.036285e+04 8.118706e+06 4.731958e+11 144 +d39bff17 3276800 0.000000e+00 5.106660e+03 6.848530e+02 2.134584e+06 1.109665e+10 418 +2c1922b7 819200 0.000000e+00 4.245334e+03 7.020174e+02 6.368000e+04 2.777353e+08 15 +ff82dda0 7372800 0.000000e+00 1.726784e+04 3.264426e+03 3.021872e+06 5.404608e+10 175 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb3) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +0e8bce2b 16588800 0.000000e+00 5.668892e+03 6.964909e+02 1.394547e+06 8.024874e+09 246 +d39bff17 3276800 0.000000e+00 1.216432e+03 1.410794e+02 1.934127e+05 2.384382e+08 159 +2c1922b7 819200 0.000000e+00 4.901281e+02 6.729653e+01 6.616730e+04 3.304185e+07 135 +ff82dda0 7372800 0.000000e+00 2.106719e+03 2.638200e+02 5.646006e+05 1.208108e+09 268 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +0e8bce2b 16588800 0.000000e+00 5.677828e+03 6.218985e+02 1.311578e+06 7.536257e+09 231 +d39bff17 3276800 0.000000e+00 1.199302e+03 1.658297e+02 1.774966e+05 2.169419e+08 148 +2c1922b7 819200 0.000000e+00 4.968224e+02 7.860110e+01 5.415364e+04 2.757816e+07 109 +ff82dda0 7372800 0.000000e+00 2.138085e+03 2.696288e+02 6.371492e+05 1.383944e+09 298 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb0) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +0e8bce2b 16588800 0.000000e+00 5.593766e+03 7.653530e+02 1.510317e+06 8.606516e+09 270 +d39bff17 3276800 0.000000e+00 1.148300e+03 2.163448e+02 2.021009e+05 2.403102e+08 176 +2c1922b7 819200 0.000000e+00 8.901347e+01 1.918734e+01 2.412265e+04 2.247011e+06 271 +ff82dda0 7372800 0.000000e+00 2.196957e+03 3.265420e+02 4.349975e+05 9.767837e+08 198 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +0e8bce2b 16588800 0.000000e+00 5.652338e+03 6.245997e+02 1.520479e+06 8.699205e+09 269 +d39bff17 3276800 0.000000e+00 1.203544e+03 1.679024e+02 2.286733e+05 2.805746e+08 190 +2c1922b7 819200 0.000000e+00 4.930666e+02 7.623523e+01 7.642532e+04 3.858360e+07 155 +ff82dda0 7372800 0.000000e+00 2.164310e+03 2.607466e+02 4.869698e+05 1.069251e+09 225 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_goto.attila b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_goto.attila new file mode 100644 index 0000000..3301f40 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_goto.attila @@ -0,0 +1,145 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +4 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 3276800 0.000000e+00 1.416946e+04 8.998511e+02 5.341885e+06 7.599687e+10 377 +ff82dda0 7372800 0.000000e+00 4.394377e+04 1.700468e+03 1.138144e+07 5.008920e+11 259 +2c1922b7 819200 0.000000e+00 1.978198e+03 1.079993e+02 7.517154e+05 1.491475e+09 380 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 3276800 0.000000e+00 2.676312e+03 2.039650e+02 4.549731e+05 1.224722e+09 170 +ff82dda0 7372800 0.000000e+00 6.450199e+03 3.193507e+02 5.482669e+05 3.545099e+09 85 +2c1922b7 819200 0.000000e+00 7.090855e+02 1.344985e+02 5.247233e+04 3.854602e+07 74 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 3276800 0.000000e+00 2.648361e+03 2.330106e+02 2.913197e+05 7.774920e+08 110 +ff82dda0 7372800 0.000000e+00 3.907893e+03 1.767346e+02 3.790657e+05 1.484378e+09 97 +2c1922b7 819200 0.000000e+00 5.977702e+02 1.137267e+02 6.695026e+04 4.146945e+07 112 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 3276800 0.000000e+00 2.649815e+03 2.112061e+02 4.054218e+05 1.081118e+09 153 +ff82dda0 7372800 0.000000e+00 6.517136e+03 3.918474e+02 3.454082e+05 2.259210e+09 53 +2c1922b7 819200 0.000000e+00 6.507707e+02 8.750699e+01 4.750626e+04 3.147468e+07 73 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_goto.hannibal b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_goto.hannibal new file mode 100644 index 0000000..3545904 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_goto.hannibal @@ -0,0 +1,104 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 7372800 0.000000e+00 1.072902e+04 3.731292e+03 7.780684e+07 9.357572e+11 7252 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 7372800 0.000000e+00 1.250147e+04 5.489974e+03 7.944684e+07 1.184741e+12 6355 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb3) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 7372800 0.000000e+00 1.131230e+04 4.120480e+03 8.165221e+07 1.046224e+12 7218 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_goto.hannibal-pitch b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_goto.hannibal-pitch new file mode 100644 index 0000000..3545904 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_goto.hannibal-pitch @@ -0,0 +1,104 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 7372800 0.000000e+00 1.072902e+04 3.731292e+03 7.780684e+07 9.357572e+11 7252 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 7372800 0.000000e+00 1.250147e+04 5.489974e+03 7.944684e+07 1.184741e+12 6355 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb3) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 7372800 0.000000e+00 1.131230e+04 4.120480e+03 8.165221e+07 1.046224e+12 7218 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_goto.idgraf b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_goto.idgraf new file mode 100644 index 0000000..0c556c8 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_goto.idgraf @@ -0,0 +1,314 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +9 +#################### +# COMB_8 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb8) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +2c1922b7 819200 0.000000e+00 2.469013e+03 5.595193e+01 2.765294e+05 6.831054e+08 112 +d39bff17 3276800 0.000000e+00 1.667528e+04 1.964808e+02 1.300672e+06 2.169208e+10 78 +ff82dda0 7372800 0.000000e+00 5.216745e+04 4.664151e+02 3.443052e+06 1.796296e+11 66 + +#################### +# COMB_5 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb5) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +2c1922b7 819200 0.000000e+00 7.490410e+02 1.344248e+02 7.415506e+04 5.733412e+07 99 +d39bff17 3276800 0.000000e+00 2.737524e+03 2.974057e+02 3.942034e+05 1.091878e+09 144 +ff82dda0 7372800 0.000000e+00 7.212728e+03 1.319942e+03 6.924219e+05 5.161506e+09 96 + +#################### +# COMB_6 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb6) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +2c1922b7 819200 0.000000e+00 7.688939e+02 1.457751e+02 6.843156e+04 5.450789e+07 89 +d39bff17 3276800 0.000000e+00 2.735563e+03 2.889694e+02 2.899697e+05 8.020820e+08 106 +ff82dda0 7372800 0.000000e+00 6.820126e+03 9.314994e+02 7.638542e+05 5.306763e+09 112 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +5 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda5_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +2c1922b7 819200 0.000000e+00 7.150281e+02 1.235393e+02 6.363750e+04 4.686092e+07 89 +d39bff17 3276800 0.000000e+00 2.835249e+03 4.125186e+02 1.899617e+05 5.499903e+08 67 +ff82dda0 7372800 0.000000e+00 6.720945e+03 7.632032e+02 6.989783e+05 4.758372e+09 104 + +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +7 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda7_impl0 (Comb4) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +2c1922b7 819200 0.000000e+00 7.190609e+02 1.144317e+02 7.406327e+04 5.460474e+07 103 +d39bff17 3276800 0.000000e+00 2.867186e+03 4.168496e+02 2.838514e+05 8.310575e+08 99 +ff82dda0 7372800 0.000000e+00 6.809425e+03 9.031920e+02 6.400859e+05 4.435298e+09 94 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +4 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda4_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +2c1922b7 819200 0.000000e+00 7.136273e+02 1.258701e+02 7.350362e+04 5.408605e+07 103 +d39bff17 3276800 0.000000e+00 2.942246e+03 4.585544e+02 1.706502e+05 5.142907e+08 58 +ff82dda0 7372800 0.000000e+00 6.744194e+03 8.416374e+02 5.597681e+05 3.833978e+09 83 + +#################### +# COMB_7 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb7) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +2c1922b7 819200 0.000000e+00 7.204798e+02 9.746533e+01 1.080720e+05 7.928859e+07 150 +d39bff17 3276800 0.000000e+00 2.539831e+03 4.296517e+02 3.885942e+05 1.015208e+09 153 +ff82dda0 7372800 0.000000e+00 7.293979e+03 1.385713e+03 6.929280e+05 5.236621e+09 95 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +6 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda6_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +2c1922b7 819200 0.000000e+00 7.460951e+02 1.203288e+02 7.386342e+04 5.654256e+07 99 +d39bff17 3276800 0.000000e+00 2.972783e+03 5.066224e+02 2.259315e+05 6.911522e+08 76 +ff82dda0 7372800 0.000000e+00 6.643349e+03 8.230064e+02 6.510482e+05 4.391520e+09 98 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +2c1922b7 819200 0.000000e+00 7.518059e+02 1.406096e+02 8.495406e+04 6.610309e+07 113 +d39bff17 3276800 0.000000e+00 2.794983e+03 3.357608e+02 4.164524e+05 1.180775e+09 149 +ff82dda0 7372800 0.000000e+00 6.735838e+03 7.525487e+02 6.803197e+05 4.639723e+09 101 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_goto.mirage b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_goto.mirage new file mode 100644 index 0000000..a9327f2 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_goto.mirage @@ -0,0 +1,144 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +4 +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 3276800 0.000000e+00 1.615269e+04 4.099119e+02 4.748890e+06 7.675673e+10 294 +ff82dda0 7372800 0.000000e+00 5.118532e+04 6.422962e+02 1.530441e+07 7.834845e+11 299 +2c1922b7 819200 0.000000e+00 2.296074e+03 7.445272e+01 1.021753e+06 2.348487e+09 445 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 3276800 0.000000e+00 2.740712e+03 2.663471e+02 5.755494e+05 1.592313e+09 210 +ff82dda0 7372800 0.000000e+00 6.504044e+03 4.912781e+02 1.385361e+06 9.061859e+09 213 +2c1922b7 819200 0.000000e+00 6.801212e+02 1.149855e+02 1.129001e+05 7.898057e+07 166 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 3276800 0.000000e+00 2.716114e+03 2.688407e+02 4.237138e+05 1.162130e+09 156 +ff82dda0 7372800 0.000000e+00 6.512491e+03 5.367987e+02 8.270864e+05 5.422988e+09 127 +2c1922b7 819200 0.000000e+00 7.284912e+02 1.021807e+02 1.049027e+05 7.792421e+07 144 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 3276800 0.000000e+00 2.294336e+03 5.071880e+02 4.909878e+05 1.181540e+09 214 +ff82dda0 7372800 0.000000e+00 6.469485e+03 5.370376e+02 7.698688e+05 5.014976e+09 119 +2c1922b7 819200 0.000000e+00 7.112055e+02 1.136474e+02 1.002800e+05 7.314078e+07 141 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_goto.sirocco b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_goto.sirocco new file mode 100644 index 0000000..113921f --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_goto.sirocco @@ -0,0 +1,183 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +5 +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb4) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +0e8bce2b 16588800 0.000000e+00 5.637990e+04 1.036285e+04 8.118706e+06 4.731958e+11 144 +d39bff17 3276800 0.000000e+00 5.106660e+03 6.848530e+02 2.134584e+06 1.109665e+10 418 +2c1922b7 819200 0.000000e+00 4.245334e+03 7.020174e+02 6.368000e+04 2.777353e+08 15 +ff82dda0 7372800 0.000000e+00 1.726784e+04 3.264426e+03 3.021872e+06 5.404608e+10 175 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb3) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +0e8bce2b 16588800 0.000000e+00 5.668892e+03 6.964909e+02 1.394547e+06 8.024874e+09 246 +d39bff17 3276800 0.000000e+00 1.216432e+03 1.410794e+02 1.934127e+05 2.384382e+08 159 +2c1922b7 819200 0.000000e+00 4.901281e+02 6.729653e+01 6.616730e+04 3.304185e+07 135 +ff82dda0 7372800 0.000000e+00 2.106719e+03 2.638200e+02 5.646006e+05 1.208108e+09 268 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +0e8bce2b 16588800 0.000000e+00 5.677828e+03 6.218985e+02 1.311578e+06 7.536257e+09 231 +d39bff17 3276800 0.000000e+00 1.199302e+03 1.658297e+02 1.774966e+05 2.169419e+08 148 +2c1922b7 819200 0.000000e+00 4.968224e+02 7.860110e+01 5.415364e+04 2.757816e+07 109 +ff82dda0 7372800 0.000000e+00 2.138085e+03 2.696288e+02 6.371492e+05 1.383944e+09 298 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb0) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +0e8bce2b 16588800 0.000000e+00 5.593766e+03 7.653530e+02 1.510317e+06 8.606516e+09 270 +d39bff17 3276800 0.000000e+00 1.148300e+03 2.163448e+02 2.021009e+05 2.403102e+08 176 +2c1922b7 819200 0.000000e+00 8.901347e+01 1.918734e+01 2.412265e+04 2.247011e+06 271 +ff82dda0 7372800 0.000000e+00 2.196957e+03 3.265420e+02 4.349975e+05 9.767837e+08 198 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +0e8bce2b 16588800 0.000000e+00 5.652338e+03 6.245997e+02 1.520479e+06 8.699205e+09 269 +d39bff17 3276800 0.000000e+00 1.203544e+03 1.679024e+02 2.286733e+05 2.805746e+08 190 +2c1922b7 819200 0.000000e+00 4.930666e+02 7.623523e+01 7.642532e+04 3.858360e+07 155 +ff82dda0 7372800 0.000000e+00 2.164310e+03 2.607466e+02 4.869698e+05 1.069251e+09 225 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_openblas.attila b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_openblas.attila new file mode 100644 index 0000000..3301f40 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_openblas.attila @@ -0,0 +1,145 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +4 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 3276800 0.000000e+00 1.416946e+04 8.998511e+02 5.341885e+06 7.599687e+10 377 +ff82dda0 7372800 0.000000e+00 4.394377e+04 1.700468e+03 1.138144e+07 5.008920e+11 259 +2c1922b7 819200 0.000000e+00 1.978198e+03 1.079993e+02 7.517154e+05 1.491475e+09 380 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 3276800 0.000000e+00 2.676312e+03 2.039650e+02 4.549731e+05 1.224722e+09 170 +ff82dda0 7372800 0.000000e+00 6.450199e+03 3.193507e+02 5.482669e+05 3.545099e+09 85 +2c1922b7 819200 0.000000e+00 7.090855e+02 1.344985e+02 5.247233e+04 3.854602e+07 74 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 3276800 0.000000e+00 2.648361e+03 2.330106e+02 2.913197e+05 7.774920e+08 110 +ff82dda0 7372800 0.000000e+00 3.907893e+03 1.767346e+02 3.790657e+05 1.484378e+09 97 +2c1922b7 819200 0.000000e+00 5.977702e+02 1.137267e+02 6.695026e+04 4.146945e+07 112 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 3276800 0.000000e+00 2.649815e+03 2.112061e+02 4.054218e+05 1.081118e+09 153 +ff82dda0 7372800 0.000000e+00 6.517136e+03 3.918474e+02 3.454082e+05 2.259210e+09 53 +2c1922b7 819200 0.000000e+00 6.507707e+02 8.750699e+01 4.750626e+04 3.147468e+07 73 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_openblas.hannibal b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_openblas.hannibal new file mode 100644 index 0000000..3545904 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_openblas.hannibal @@ -0,0 +1,104 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 7372800 0.000000e+00 1.072902e+04 3.731292e+03 7.780684e+07 9.357572e+11 7252 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 7372800 0.000000e+00 1.250147e+04 5.489974e+03 7.944684e+07 1.184741e+12 6355 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb3) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 7372800 0.000000e+00 1.131230e+04 4.120480e+03 8.165221e+07 1.046224e+12 7218 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_openblas.hannibal-pitch b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_openblas.hannibal-pitch new file mode 100644 index 0000000..3545904 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_openblas.hannibal-pitch @@ -0,0 +1,104 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 7372800 0.000000e+00 1.072902e+04 3.731292e+03 7.780684e+07 9.357572e+11 7252 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 7372800 0.000000e+00 1.250147e+04 5.489974e+03 7.944684e+07 1.184741e+12 6355 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb3) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 7372800 0.000000e+00 1.131230e+04 4.120480e+03 8.165221e+07 1.046224e+12 7218 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_openblas.idgraf b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_openblas.idgraf new file mode 100644 index 0000000..0c556c8 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_openblas.idgraf @@ -0,0 +1,314 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +9 +#################### +# COMB_8 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb8) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +2c1922b7 819200 0.000000e+00 2.469013e+03 5.595193e+01 2.765294e+05 6.831054e+08 112 +d39bff17 3276800 0.000000e+00 1.667528e+04 1.964808e+02 1.300672e+06 2.169208e+10 78 +ff82dda0 7372800 0.000000e+00 5.216745e+04 4.664151e+02 3.443052e+06 1.796296e+11 66 + +#################### +# COMB_5 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb5) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +2c1922b7 819200 0.000000e+00 7.490410e+02 1.344248e+02 7.415506e+04 5.733412e+07 99 +d39bff17 3276800 0.000000e+00 2.737524e+03 2.974057e+02 3.942034e+05 1.091878e+09 144 +ff82dda0 7372800 0.000000e+00 7.212728e+03 1.319942e+03 6.924219e+05 5.161506e+09 96 + +#################### +# COMB_6 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb6) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +2c1922b7 819200 0.000000e+00 7.688939e+02 1.457751e+02 6.843156e+04 5.450789e+07 89 +d39bff17 3276800 0.000000e+00 2.735563e+03 2.889694e+02 2.899697e+05 8.020820e+08 106 +ff82dda0 7372800 0.000000e+00 6.820126e+03 9.314994e+02 7.638542e+05 5.306763e+09 112 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +5 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda5_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +2c1922b7 819200 0.000000e+00 7.150281e+02 1.235393e+02 6.363750e+04 4.686092e+07 89 +d39bff17 3276800 0.000000e+00 2.835249e+03 4.125186e+02 1.899617e+05 5.499903e+08 67 +ff82dda0 7372800 0.000000e+00 6.720945e+03 7.632032e+02 6.989783e+05 4.758372e+09 104 + +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +7 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda7_impl0 (Comb4) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +2c1922b7 819200 0.000000e+00 7.190609e+02 1.144317e+02 7.406327e+04 5.460474e+07 103 +d39bff17 3276800 0.000000e+00 2.867186e+03 4.168496e+02 2.838514e+05 8.310575e+08 99 +ff82dda0 7372800 0.000000e+00 6.809425e+03 9.031920e+02 6.400859e+05 4.435298e+09 94 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +4 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda4_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +2c1922b7 819200 0.000000e+00 7.136273e+02 1.258701e+02 7.350362e+04 5.408605e+07 103 +d39bff17 3276800 0.000000e+00 2.942246e+03 4.585544e+02 1.706502e+05 5.142907e+08 58 +ff82dda0 7372800 0.000000e+00 6.744194e+03 8.416374e+02 5.597681e+05 3.833978e+09 83 + +#################### +# COMB_7 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb7) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +2c1922b7 819200 0.000000e+00 7.204798e+02 9.746533e+01 1.080720e+05 7.928859e+07 150 +d39bff17 3276800 0.000000e+00 2.539831e+03 4.296517e+02 3.885942e+05 1.015208e+09 153 +ff82dda0 7372800 0.000000e+00 7.293979e+03 1.385713e+03 6.929280e+05 5.236621e+09 95 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +6 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda6_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +2c1922b7 819200 0.000000e+00 7.460951e+02 1.203288e+02 7.386342e+04 5.654256e+07 99 +d39bff17 3276800 0.000000e+00 2.972783e+03 5.066224e+02 2.259315e+05 6.911522e+08 76 +ff82dda0 7372800 0.000000e+00 6.643349e+03 8.230064e+02 6.510482e+05 4.391520e+09 98 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +2c1922b7 819200 0.000000e+00 7.518059e+02 1.406096e+02 8.495406e+04 6.610309e+07 113 +d39bff17 3276800 0.000000e+00 2.794983e+03 3.357608e+02 4.164524e+05 1.180775e+09 149 +ff82dda0 7372800 0.000000e+00 6.735838e+03 7.525487e+02 6.803197e+05 4.639723e+09 101 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_openblas.mirage b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_openblas.mirage new file mode 100644 index 0000000..a9327f2 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_openblas.mirage @@ -0,0 +1,144 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +4 +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 3276800 0.000000e+00 1.615269e+04 4.099119e+02 4.748890e+06 7.675673e+10 294 +ff82dda0 7372800 0.000000e+00 5.118532e+04 6.422962e+02 1.530441e+07 7.834845e+11 299 +2c1922b7 819200 0.000000e+00 2.296074e+03 7.445272e+01 1.021753e+06 2.348487e+09 445 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 3276800 0.000000e+00 2.740712e+03 2.663471e+02 5.755494e+05 1.592313e+09 210 +ff82dda0 7372800 0.000000e+00 6.504044e+03 4.912781e+02 1.385361e+06 9.061859e+09 213 +2c1922b7 819200 0.000000e+00 6.801212e+02 1.149855e+02 1.129001e+05 7.898057e+07 166 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 3276800 0.000000e+00 2.716114e+03 2.688407e+02 4.237138e+05 1.162130e+09 156 +ff82dda0 7372800 0.000000e+00 6.512491e+03 5.367987e+02 8.270864e+05 5.422988e+09 127 +2c1922b7 819200 0.000000e+00 7.284912e+02 1.021807e+02 1.049027e+05 7.792421e+07 144 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 3276800 0.000000e+00 2.294336e+03 5.071880e+02 4.909878e+05 1.181540e+09 214 +ff82dda0 7372800 0.000000e+00 6.469485e+03 5.370376e+02 7.698688e+05 5.014976e+09 119 +2c1922b7 819200 0.000000e+00 7.112055e+02 1.136474e+02 1.002800e+05 7.314078e+07 141 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_openblas.sirocco b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_openblas.sirocco new file mode 100644 index 0000000..113921f --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_openblas.sirocco @@ -0,0 +1,183 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +5 +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb4) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +0e8bce2b 16588800 0.000000e+00 5.637990e+04 1.036285e+04 8.118706e+06 4.731958e+11 144 +d39bff17 3276800 0.000000e+00 5.106660e+03 6.848530e+02 2.134584e+06 1.109665e+10 418 +2c1922b7 819200 0.000000e+00 4.245334e+03 7.020174e+02 6.368000e+04 2.777353e+08 15 +ff82dda0 7372800 0.000000e+00 1.726784e+04 3.264426e+03 3.021872e+06 5.404608e+10 175 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb3) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +0e8bce2b 16588800 0.000000e+00 5.668892e+03 6.964909e+02 1.394547e+06 8.024874e+09 246 +d39bff17 3276800 0.000000e+00 1.216432e+03 1.410794e+02 1.934127e+05 2.384382e+08 159 +2c1922b7 819200 0.000000e+00 4.901281e+02 6.729653e+01 6.616730e+04 3.304185e+07 135 +ff82dda0 7372800 0.000000e+00 2.106719e+03 2.638200e+02 5.646006e+05 1.208108e+09 268 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +0e8bce2b 16588800 0.000000e+00 5.677828e+03 6.218985e+02 1.311578e+06 7.536257e+09 231 +d39bff17 3276800 0.000000e+00 1.199302e+03 1.658297e+02 1.774966e+05 2.169419e+08 148 +2c1922b7 819200 0.000000e+00 4.968224e+02 7.860110e+01 5.415364e+04 2.757816e+07 109 +ff82dda0 7372800 0.000000e+00 2.138085e+03 2.696288e+02 6.371492e+05 1.383944e+09 298 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb0) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +0e8bce2b 16588800 0.000000e+00 5.593766e+03 7.653530e+02 1.510317e+06 8.606516e+09 270 +d39bff17 3276800 0.000000e+00 1.148300e+03 2.163448e+02 2.021009e+05 2.403102e+08 176 +2c1922b7 819200 0.000000e+00 8.901347e+01 1.918734e+01 2.412265e+04 2.247011e+06 271 +ff82dda0 7372800 0.000000e+00 2.196957e+03 3.265420e+02 4.349975e+05 9.767837e+08 198 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +0e8bce2b 16588800 0.000000e+00 5.652338e+03 6.245997e+02 1.520479e+06 8.699205e+09 269 +d39bff17 3276800 0.000000e+00 1.203544e+03 1.679024e+02 2.286733e+05 2.805746e+08 190 +2c1922b7 819200 0.000000e+00 4.930666e+02 7.623523e+01 7.642532e+04 3.858360e+07 155 +ff82dda0 7372800 0.000000e+00 2.164310e+03 2.607466e+02 4.869698e+05 1.069251e+09 225 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru.attila b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru.attila new file mode 100644 index 0000000..2fb4146 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru.attila @@ -0,0 +1,145 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +4 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 3276800 0.000000e+00 1.377909e+04 1.008911e+03 6.145473e+06 8.513300e+10 446 +ff82dda0 7372800 0.000000e+00 4.298380e+04 1.919778e+03 1.177756e+07 5.072542e+11 274 +2c1922b7 819200 0.000000e+00 1.936516e+03 1.503574e+02 4.725100e+05 9.205395e+08 244 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 3276800 0.000000e+00 2.656425e+03 2.270595e+02 3.320531e+05 8.885184e+08 125 +ff82dda0 7372800 0.000000e+00 6.358340e+03 3.816293e+02 5.023088e+05 3.205356e+09 79 +2c1922b7 819200 0.000000e+00 3.867923e+02 4.867053e+01 4.564149e+04 1.793330e+07 118 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 3276800 0.000000e+00 1.902887e+03 4.574719e+02 4.585957e+05 9.230924e+08 241 +ff82dda0 7372800 0.000000e+00 3.810456e+03 1.334249e+02 3.353201e+05 1.279289e+09 88 +2c1922b7 819200 0.000000e+00 3.835296e+02 4.543249e+01 2.262825e+04 8.800385e+06 59 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 3276800 0.000000e+00 2.657310e+03 2.918518e+02 3.162199e+05 8.504305e+08 119 +ff82dda0 7372800 0.000000e+00 3.819809e+03 1.073068e+02 3.055848e+05 1.168197e+09 80 +2c1922b7 819200 0.000000e+00 4.020211e+02 5.372009e+01 3.256371e+04 1.332505e+07 81 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru.hannibal b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru.hannibal new file mode 100644 index 0000000..988cb19 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru.hannibal @@ -0,0 +1,104 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 7372800 0.000000e+00 1.103789e+04 3.664518e+03 7.889881e+07 9.668643e+11 7148 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 7372800 0.000000e+00 1.284524e+04 5.462619e+03 8.441889e+07 1.280490e+12 6572 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb3) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 7372800 0.000000e+00 1.171798e+04 4.121992e+03 8.325626e+07 1.096315e+12 7105 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru.hannibal-pitch b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru.hannibal-pitch new file mode 100644 index 0000000..988cb19 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru.hannibal-pitch @@ -0,0 +1,104 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 7372800 0.000000e+00 1.103789e+04 3.664518e+03 7.889881e+07 9.668643e+11 7148 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 7372800 0.000000e+00 1.284524e+04 5.462619e+03 8.441889e+07 1.280490e+12 6572 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb3) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 7372800 0.000000e+00 1.171798e+04 4.121992e+03 8.325626e+07 1.096315e+12 7105 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru.idgraf b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru.idgraf new file mode 100644 index 0000000..f3bfa2d --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru.idgraf @@ -0,0 +1,314 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +9 +#################### +# COMB_8 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb8) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +2c1922b7 819200 0.000000e+00 1.946809e+03 8.216247e+01 4.049363e+05 7.897378e+08 208 +d39bff17 3276800 0.000000e+00 1.423970e+04 2.281585e+02 1.395491e+06 1.987647e+10 98 +ff82dda0 7372800 0.000000e+00 4.640991e+04 5.437505e+02 4.919451e+06 2.283426e+11 106 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +4 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda4_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +2c1922b7 819200 0.000000e+00 4.484181e+02 9.807341e+01 3.004401e+04 1.411671e+07 67 +d39bff17 3276800 0.000000e+00 1.658665e+03 2.005859e+02 2.388477e+05 4.019622e+08 144 +ff82dda0 7372800 0.000000e+00 3.922083e+03 2.271290e+02 2.588575e+05 1.018665e+09 66 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +2c1922b7 819200 0.000000e+00 5.042679e+02 1.229686e+02 7.362312e+04 3.933348e+07 146 +d39bff17 3276800 0.000000e+00 2.167031e+03 5.827483e+02 2.773800e+05 6.445595e+08 128 +ff82dda0 7372800 0.000000e+00 4.035358e+03 4.245106e+02 4.035358e+05 1.646433e+09 100 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +6 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda6_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +2c1922b7 819200 0.000000e+00 4.873044e+02 1.174149e+02 2.631443e+04 1.356760e+07 54 +d39bff17 3276800 0.000000e+00 1.705876e+03 1.721886e+02 1.808228e+05 3.116041e+08 106 +ff82dda0 7372800 0.000000e+00 3.936492e+03 2.608005e+02 2.440625e+05 9.649671e+08 62 + +#################### +# COMB_7 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb7) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +2c1922b7 819200 0.000000e+00 5.272057e+02 1.307030e+02 5.535660e+04 3.097805e+07 105 +d39bff17 3276800 0.000000e+00 1.638590e+03 9.390080e+01 1.163399e+05 1.912593e+08 71 +ff82dda0 7372800 0.000000e+00 4.055643e+03 3.711103e+02 4.177313e+05 1.708355e+09 103 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +5 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda5_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +2c1922b7 819200 0.000000e+00 4.303558e+02 7.490536e+01 2.840348e+04 1.259392e+07 66 +d39bff17 3276800 0.000000e+00 1.669452e+03 1.444951e+02 1.419035e+05 2.386758e+08 85 +ff82dda0 7372800 0.000000e+00 4.288060e+03 7.671104e+02 2.744359e+05 1.214459e+09 64 + +#################### +# COMB_5 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb5) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +2c1922b7 819200 0.000000e+00 4.394264e+02 8.387153e+01 6.283798e+04 2.861859e+07 143 +d39bff17 3276800 0.000000e+00 2.098818e+03 5.403136e+02 2.140795e+05 4.790917e+08 102 +ff82dda0 7372800 0.000000e+00 4.766912e+03 1.123433e+03 7.579390e+05 3.813703e+09 159 + +#################### +# COMB_6 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb6) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +2c1922b7 819200 0.000000e+00 4.660051e+02 1.021627e+02 6.477470e+04 3.163611e+07 139 +d39bff17 3276800 0.000000e+00 2.103985e+03 5.293854e+02 2.377503e+05 5.318912e+08 113 +ff82dda0 7372800 0.000000e+00 3.972257e+03 3.858968e+02 2.899747e+05 1.162725e+09 73 + +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +7 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda7_impl0 (Comb4) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +2c1922b7 819200 0.000000e+00 5.222345e+02 1.241013e+02 2.715620e+04 1.498276e+07 52 +d39bff17 3276800 0.000000e+00 1.941135e+03 4.386059e+02 1.824667e+05 3.722759e+08 94 +ff82dda0 7372800 0.000000e+00 4.892155e+03 1.147723e+03 2.397156e+05 1.237272e+09 49 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru.mirage b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru.mirage new file mode 100644 index 0000000..2694bef --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru.mirage @@ -0,0 +1,144 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +4 +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 3276800 0.000000e+00 1.373117e+04 2.756172e+02 7.167670e+06 9.846014e+10 522 +ff82dda0 7372800 0.000000e+00 4.545501e+04 7.462378e+02 1.750018e+07 7.956851e+11 385 +2c1922b7 819200 0.000000e+00 1.798916e+03 8.480081e+01 1.219665e+06 2.198952e+09 678 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 3276800 0.000000e+00 2.883473e+03 4.679640e+02 2.537456e+05 7.509396e+08 88 +ff82dda0 7372800 0.000000e+00 6.462089e+03 4.136967e+02 5.751259e+05 3.731746e+09 89 +2c1922b7 819200 0.000000e+00 4.040830e+02 6.411732e+01 3.717564e+04 1.540026e+07 92 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 3276800 0.000000e+00 2.231007e+03 5.378925e+02 3.234960e+05 7.636746e+08 145 +ff82dda0 7372800 0.000000e+00 3.904524e+03 2.515208e+02 4.021660e+05 1.576783e+09 103 +2c1922b7 819200 0.000000e+00 5.307827e+02 1.276617e+02 5.467062e+04 3.069686e+07 103 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 3276800 0.000000e+00 1.665736e+03 1.392688e+02 2.881723e+05 4.833744e+08 173 +ff82dda0 7372800 0.000000e+00 3.891632e+03 2.259287e+02 7.199519e+05 2.811230e+09 185 +2c1922b7 819200 0.000000e+00 5.125766e+02 1.240167e+02 5.587085e+04 3.031453e+07 109 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru.sirocco b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru.sirocco new file mode 100644 index 0000000..4f7965d --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru.sirocco @@ -0,0 +1,183 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +5 +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb4) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +0e8bce2b 16588800 0.000000e+00 8.483517e+04 1.709999e+04 1.781539e+06 1.572777e+11 21 +d39bff17 3276800 0.000000e+00 8.986208e+03 1.629610e+03 1.797242e+05 1.668151e+09 20 +2c1922b7 819200 0.000000e+00 3.523655e+03 5.077738e+02 5.990214e+04 2.154576e+08 17 +ff82dda0 7372800 0.000000e+00 1.583302e+04 2.624137e+03 3.974089e+06 6.465024e+10 251 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +0e8bce2b 16588800 0.000000e+00 4.641113e+03 5.516013e+02 1.257742e+06 5.919777e+09 271 +d39bff17 3276800 0.000000e+00 8.365056e+02 1.344660e+02 1.396964e+05 1.198764e+08 167 +2c1922b7 819200 0.000000e+00 2.882912e+02 5.271451e+01 7.409085e+04 2.207390e+07 257 +ff82dda0 7372800 0.000000e+00 1.570696e+03 2.281691e+02 4.115224e+05 6.600167e+08 262 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb0) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +0e8bce2b 16588800 0.000000e+00 4.556926e+03 5.099622e+02 1.048093e+06 4.835897e+09 230 +d39bff17 3276800 0.000000e+00 7.019049e+02 1.632697e+02 1.109010e+05 8.205375e+07 158 +2c1922b7 819200 0.000000e+00 9.967334e+01 2.197557e+01 2.372225e+04 2.479413e+06 238 +ff82dda0 7372800 0.000000e+00 1.571709e+03 2.150516e+02 4.007858e+05 6.417117e+08 255 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb3) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +0e8bce2b 16588800 0.000000e+00 4.650733e+03 7.073225e+02 1.232444e+06 5.864350e+09 265 +d39bff17 3276800 0.000000e+00 8.352707e+02 1.515223e+02 1.587014e+05 1.369209e+08 190 +2c1922b7 819200 0.000000e+00 2.858293e+02 5.241353e+01 7.460146e+04 2.204030e+07 261 +ff82dda0 7372800 0.000000e+00 1.569547e+03 2.419662e+02 2.589752e+05 4.161341e+08 165 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +0e8bce2b 16588800 0.000000e+00 4.671203e+03 5.859459e+02 1.331293e+06 6.316588e+09 285 +d39bff17 3276800 0.000000e+00 8.453596e+02 1.395049e+02 1.420204e+05 1.233279e+08 168 +2c1922b7 819200 0.000000e+00 2.930233e+02 5.590601e+01 5.362326e+04 1.628483e+07 183 +ff82dda0 7372800 0.000000e+00 1.591448e+03 2.256700e+02 2.387172e+05 3.875451e+08 150 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_atlas.attila b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_atlas.attila new file mode 100644 index 0000000..2fb4146 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_atlas.attila @@ -0,0 +1,145 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +4 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 3276800 0.000000e+00 1.377909e+04 1.008911e+03 6.145473e+06 8.513300e+10 446 +ff82dda0 7372800 0.000000e+00 4.298380e+04 1.919778e+03 1.177756e+07 5.072542e+11 274 +2c1922b7 819200 0.000000e+00 1.936516e+03 1.503574e+02 4.725100e+05 9.205395e+08 244 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 3276800 0.000000e+00 2.656425e+03 2.270595e+02 3.320531e+05 8.885184e+08 125 +ff82dda0 7372800 0.000000e+00 6.358340e+03 3.816293e+02 5.023088e+05 3.205356e+09 79 +2c1922b7 819200 0.000000e+00 3.867923e+02 4.867053e+01 4.564149e+04 1.793330e+07 118 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 3276800 0.000000e+00 1.902887e+03 4.574719e+02 4.585957e+05 9.230924e+08 241 +ff82dda0 7372800 0.000000e+00 3.810456e+03 1.334249e+02 3.353201e+05 1.279289e+09 88 +2c1922b7 819200 0.000000e+00 3.835296e+02 4.543249e+01 2.262825e+04 8.800385e+06 59 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 3276800 0.000000e+00 2.657310e+03 2.918518e+02 3.162199e+05 8.504305e+08 119 +ff82dda0 7372800 0.000000e+00 3.819809e+03 1.073068e+02 3.055848e+05 1.168197e+09 80 +2c1922b7 819200 0.000000e+00 4.020211e+02 5.372009e+01 3.256371e+04 1.332505e+07 81 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_atlas.hannibal b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_atlas.hannibal new file mode 100644 index 0000000..988cb19 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_atlas.hannibal @@ -0,0 +1,104 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 7372800 0.000000e+00 1.103789e+04 3.664518e+03 7.889881e+07 9.668643e+11 7148 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 7372800 0.000000e+00 1.284524e+04 5.462619e+03 8.441889e+07 1.280490e+12 6572 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb3) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 7372800 0.000000e+00 1.171798e+04 4.121992e+03 8.325626e+07 1.096315e+12 7105 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_atlas.hannibal-pitch b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_atlas.hannibal-pitch new file mode 100644 index 0000000..988cb19 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_atlas.hannibal-pitch @@ -0,0 +1,104 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 7372800 0.000000e+00 1.103789e+04 3.664518e+03 7.889881e+07 9.668643e+11 7148 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 7372800 0.000000e+00 1.284524e+04 5.462619e+03 8.441889e+07 1.280490e+12 6572 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb3) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 7372800 0.000000e+00 1.171798e+04 4.121992e+03 8.325626e+07 1.096315e+12 7105 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_atlas.idgraf b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_atlas.idgraf new file mode 100644 index 0000000..f3bfa2d --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_atlas.idgraf @@ -0,0 +1,314 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +9 +#################### +# COMB_8 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb8) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +2c1922b7 819200 0.000000e+00 1.946809e+03 8.216247e+01 4.049363e+05 7.897378e+08 208 +d39bff17 3276800 0.000000e+00 1.423970e+04 2.281585e+02 1.395491e+06 1.987647e+10 98 +ff82dda0 7372800 0.000000e+00 4.640991e+04 5.437505e+02 4.919451e+06 2.283426e+11 106 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +4 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda4_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +2c1922b7 819200 0.000000e+00 4.484181e+02 9.807341e+01 3.004401e+04 1.411671e+07 67 +d39bff17 3276800 0.000000e+00 1.658665e+03 2.005859e+02 2.388477e+05 4.019622e+08 144 +ff82dda0 7372800 0.000000e+00 3.922083e+03 2.271290e+02 2.588575e+05 1.018665e+09 66 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +2c1922b7 819200 0.000000e+00 5.042679e+02 1.229686e+02 7.362312e+04 3.933348e+07 146 +d39bff17 3276800 0.000000e+00 2.167031e+03 5.827483e+02 2.773800e+05 6.445595e+08 128 +ff82dda0 7372800 0.000000e+00 4.035358e+03 4.245106e+02 4.035358e+05 1.646433e+09 100 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +6 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda6_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +2c1922b7 819200 0.000000e+00 4.873044e+02 1.174149e+02 2.631443e+04 1.356760e+07 54 +d39bff17 3276800 0.000000e+00 1.705876e+03 1.721886e+02 1.808228e+05 3.116041e+08 106 +ff82dda0 7372800 0.000000e+00 3.936492e+03 2.608005e+02 2.440625e+05 9.649671e+08 62 + +#################### +# COMB_7 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb7) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +2c1922b7 819200 0.000000e+00 5.272057e+02 1.307030e+02 5.535660e+04 3.097805e+07 105 +d39bff17 3276800 0.000000e+00 1.638590e+03 9.390080e+01 1.163399e+05 1.912593e+08 71 +ff82dda0 7372800 0.000000e+00 4.055643e+03 3.711103e+02 4.177313e+05 1.708355e+09 103 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +5 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda5_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +2c1922b7 819200 0.000000e+00 4.303558e+02 7.490536e+01 2.840348e+04 1.259392e+07 66 +d39bff17 3276800 0.000000e+00 1.669452e+03 1.444951e+02 1.419035e+05 2.386758e+08 85 +ff82dda0 7372800 0.000000e+00 4.288060e+03 7.671104e+02 2.744359e+05 1.214459e+09 64 + +#################### +# COMB_5 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb5) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +2c1922b7 819200 0.000000e+00 4.394264e+02 8.387153e+01 6.283798e+04 2.861859e+07 143 +d39bff17 3276800 0.000000e+00 2.098818e+03 5.403136e+02 2.140795e+05 4.790917e+08 102 +ff82dda0 7372800 0.000000e+00 4.766912e+03 1.123433e+03 7.579390e+05 3.813703e+09 159 + +#################### +# COMB_6 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb6) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +2c1922b7 819200 0.000000e+00 4.660051e+02 1.021627e+02 6.477470e+04 3.163611e+07 139 +d39bff17 3276800 0.000000e+00 2.103985e+03 5.293854e+02 2.377503e+05 5.318912e+08 113 +ff82dda0 7372800 0.000000e+00 3.972257e+03 3.858968e+02 2.899747e+05 1.162725e+09 73 + +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +7 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda7_impl0 (Comb4) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +2c1922b7 819200 0.000000e+00 5.222345e+02 1.241013e+02 2.715620e+04 1.498276e+07 52 +d39bff17 3276800 0.000000e+00 1.941135e+03 4.386059e+02 1.824667e+05 3.722759e+08 94 +ff82dda0 7372800 0.000000e+00 4.892155e+03 1.147723e+03 2.397156e+05 1.237272e+09 49 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_atlas.mirage b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_atlas.mirage new file mode 100644 index 0000000..2694bef --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_atlas.mirage @@ -0,0 +1,144 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +4 +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 3276800 0.000000e+00 1.373117e+04 2.756172e+02 7.167670e+06 9.846014e+10 522 +ff82dda0 7372800 0.000000e+00 4.545501e+04 7.462378e+02 1.750018e+07 7.956851e+11 385 +2c1922b7 819200 0.000000e+00 1.798916e+03 8.480081e+01 1.219665e+06 2.198952e+09 678 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 3276800 0.000000e+00 2.883473e+03 4.679640e+02 2.537456e+05 7.509396e+08 88 +ff82dda0 7372800 0.000000e+00 6.462089e+03 4.136967e+02 5.751259e+05 3.731746e+09 89 +2c1922b7 819200 0.000000e+00 4.040830e+02 6.411732e+01 3.717564e+04 1.540026e+07 92 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 3276800 0.000000e+00 2.231007e+03 5.378925e+02 3.234960e+05 7.636746e+08 145 +ff82dda0 7372800 0.000000e+00 3.904524e+03 2.515208e+02 4.021660e+05 1.576783e+09 103 +2c1922b7 819200 0.000000e+00 5.307827e+02 1.276617e+02 5.467062e+04 3.069686e+07 103 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 3276800 0.000000e+00 1.665736e+03 1.392688e+02 2.881723e+05 4.833744e+08 173 +ff82dda0 7372800 0.000000e+00 3.891632e+03 2.259287e+02 7.199519e+05 2.811230e+09 185 +2c1922b7 819200 0.000000e+00 5.125766e+02 1.240167e+02 5.587085e+04 3.031453e+07 109 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_atlas.sirocco b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_atlas.sirocco new file mode 100644 index 0000000..4f7965d --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_atlas.sirocco @@ -0,0 +1,183 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +5 +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb4) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +0e8bce2b 16588800 0.000000e+00 8.483517e+04 1.709999e+04 1.781539e+06 1.572777e+11 21 +d39bff17 3276800 0.000000e+00 8.986208e+03 1.629610e+03 1.797242e+05 1.668151e+09 20 +2c1922b7 819200 0.000000e+00 3.523655e+03 5.077738e+02 5.990214e+04 2.154576e+08 17 +ff82dda0 7372800 0.000000e+00 1.583302e+04 2.624137e+03 3.974089e+06 6.465024e+10 251 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +0e8bce2b 16588800 0.000000e+00 4.641113e+03 5.516013e+02 1.257742e+06 5.919777e+09 271 +d39bff17 3276800 0.000000e+00 8.365056e+02 1.344660e+02 1.396964e+05 1.198764e+08 167 +2c1922b7 819200 0.000000e+00 2.882912e+02 5.271451e+01 7.409085e+04 2.207390e+07 257 +ff82dda0 7372800 0.000000e+00 1.570696e+03 2.281691e+02 4.115224e+05 6.600167e+08 262 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb0) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +0e8bce2b 16588800 0.000000e+00 4.556926e+03 5.099622e+02 1.048093e+06 4.835897e+09 230 +d39bff17 3276800 0.000000e+00 7.019049e+02 1.632697e+02 1.109010e+05 8.205375e+07 158 +2c1922b7 819200 0.000000e+00 9.967334e+01 2.197557e+01 2.372225e+04 2.479413e+06 238 +ff82dda0 7372800 0.000000e+00 1.571709e+03 2.150516e+02 4.007858e+05 6.417117e+08 255 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb3) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +0e8bce2b 16588800 0.000000e+00 4.650733e+03 7.073225e+02 1.232444e+06 5.864350e+09 265 +d39bff17 3276800 0.000000e+00 8.352707e+02 1.515223e+02 1.587014e+05 1.369209e+08 190 +2c1922b7 819200 0.000000e+00 2.858293e+02 5.241353e+01 7.460146e+04 2.204030e+07 261 +ff82dda0 7372800 0.000000e+00 1.569547e+03 2.419662e+02 2.589752e+05 4.161341e+08 165 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +0e8bce2b 16588800 0.000000e+00 4.671203e+03 5.859459e+02 1.331293e+06 6.316588e+09 285 +d39bff17 3276800 0.000000e+00 8.453596e+02 1.395049e+02 1.420204e+05 1.233279e+08 168 +2c1922b7 819200 0.000000e+00 2.930233e+02 5.590601e+01 5.362326e+04 1.628483e+07 183 +ff82dda0 7372800 0.000000e+00 1.591448e+03 2.256700e+02 2.387172e+05 3.875451e+08 150 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_goto.attila b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_goto.attila new file mode 100644 index 0000000..2fb4146 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_goto.attila @@ -0,0 +1,145 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +4 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 3276800 0.000000e+00 1.377909e+04 1.008911e+03 6.145473e+06 8.513300e+10 446 +ff82dda0 7372800 0.000000e+00 4.298380e+04 1.919778e+03 1.177756e+07 5.072542e+11 274 +2c1922b7 819200 0.000000e+00 1.936516e+03 1.503574e+02 4.725100e+05 9.205395e+08 244 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 3276800 0.000000e+00 2.656425e+03 2.270595e+02 3.320531e+05 8.885184e+08 125 +ff82dda0 7372800 0.000000e+00 6.358340e+03 3.816293e+02 5.023088e+05 3.205356e+09 79 +2c1922b7 819200 0.000000e+00 3.867923e+02 4.867053e+01 4.564149e+04 1.793330e+07 118 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 3276800 0.000000e+00 1.902887e+03 4.574719e+02 4.585957e+05 9.230924e+08 241 +ff82dda0 7372800 0.000000e+00 3.810456e+03 1.334249e+02 3.353201e+05 1.279289e+09 88 +2c1922b7 819200 0.000000e+00 3.835296e+02 4.543249e+01 2.262825e+04 8.800385e+06 59 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 3276800 0.000000e+00 2.657310e+03 2.918518e+02 3.162199e+05 8.504305e+08 119 +ff82dda0 7372800 0.000000e+00 3.819809e+03 1.073068e+02 3.055848e+05 1.168197e+09 80 +2c1922b7 819200 0.000000e+00 4.020211e+02 5.372009e+01 3.256371e+04 1.332505e+07 81 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_goto.hannibal b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_goto.hannibal new file mode 100644 index 0000000..988cb19 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_goto.hannibal @@ -0,0 +1,104 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 7372800 0.000000e+00 1.103789e+04 3.664518e+03 7.889881e+07 9.668643e+11 7148 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 7372800 0.000000e+00 1.284524e+04 5.462619e+03 8.441889e+07 1.280490e+12 6572 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb3) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 7372800 0.000000e+00 1.171798e+04 4.121992e+03 8.325626e+07 1.096315e+12 7105 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_goto.hannibal-pitch b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_goto.hannibal-pitch new file mode 100644 index 0000000..988cb19 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_goto.hannibal-pitch @@ -0,0 +1,104 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 7372800 0.000000e+00 1.103789e+04 3.664518e+03 7.889881e+07 9.668643e+11 7148 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 7372800 0.000000e+00 1.284524e+04 5.462619e+03 8.441889e+07 1.280490e+12 6572 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb3) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 7372800 0.000000e+00 1.171798e+04 4.121992e+03 8.325626e+07 1.096315e+12 7105 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_goto.idgraf b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_goto.idgraf new file mode 100644 index 0000000..f3bfa2d --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_goto.idgraf @@ -0,0 +1,314 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +9 +#################### +# COMB_8 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb8) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +2c1922b7 819200 0.000000e+00 1.946809e+03 8.216247e+01 4.049363e+05 7.897378e+08 208 +d39bff17 3276800 0.000000e+00 1.423970e+04 2.281585e+02 1.395491e+06 1.987647e+10 98 +ff82dda0 7372800 0.000000e+00 4.640991e+04 5.437505e+02 4.919451e+06 2.283426e+11 106 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +4 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda4_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +2c1922b7 819200 0.000000e+00 4.484181e+02 9.807341e+01 3.004401e+04 1.411671e+07 67 +d39bff17 3276800 0.000000e+00 1.658665e+03 2.005859e+02 2.388477e+05 4.019622e+08 144 +ff82dda0 7372800 0.000000e+00 3.922083e+03 2.271290e+02 2.588575e+05 1.018665e+09 66 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +2c1922b7 819200 0.000000e+00 5.042679e+02 1.229686e+02 7.362312e+04 3.933348e+07 146 +d39bff17 3276800 0.000000e+00 2.167031e+03 5.827483e+02 2.773800e+05 6.445595e+08 128 +ff82dda0 7372800 0.000000e+00 4.035358e+03 4.245106e+02 4.035358e+05 1.646433e+09 100 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +6 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda6_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +2c1922b7 819200 0.000000e+00 4.873044e+02 1.174149e+02 2.631443e+04 1.356760e+07 54 +d39bff17 3276800 0.000000e+00 1.705876e+03 1.721886e+02 1.808228e+05 3.116041e+08 106 +ff82dda0 7372800 0.000000e+00 3.936492e+03 2.608005e+02 2.440625e+05 9.649671e+08 62 + +#################### +# COMB_7 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb7) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +2c1922b7 819200 0.000000e+00 5.272057e+02 1.307030e+02 5.535660e+04 3.097805e+07 105 +d39bff17 3276800 0.000000e+00 1.638590e+03 9.390080e+01 1.163399e+05 1.912593e+08 71 +ff82dda0 7372800 0.000000e+00 4.055643e+03 3.711103e+02 4.177313e+05 1.708355e+09 103 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +5 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda5_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +2c1922b7 819200 0.000000e+00 4.303558e+02 7.490536e+01 2.840348e+04 1.259392e+07 66 +d39bff17 3276800 0.000000e+00 1.669452e+03 1.444951e+02 1.419035e+05 2.386758e+08 85 +ff82dda0 7372800 0.000000e+00 4.288060e+03 7.671104e+02 2.744359e+05 1.214459e+09 64 + +#################### +# COMB_5 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb5) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +2c1922b7 819200 0.000000e+00 4.394264e+02 8.387153e+01 6.283798e+04 2.861859e+07 143 +d39bff17 3276800 0.000000e+00 2.098818e+03 5.403136e+02 2.140795e+05 4.790917e+08 102 +ff82dda0 7372800 0.000000e+00 4.766912e+03 1.123433e+03 7.579390e+05 3.813703e+09 159 + +#################### +# COMB_6 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb6) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +2c1922b7 819200 0.000000e+00 4.660051e+02 1.021627e+02 6.477470e+04 3.163611e+07 139 +d39bff17 3276800 0.000000e+00 2.103985e+03 5.293854e+02 2.377503e+05 5.318912e+08 113 +ff82dda0 7372800 0.000000e+00 3.972257e+03 3.858968e+02 2.899747e+05 1.162725e+09 73 + +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +7 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda7_impl0 (Comb4) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +2c1922b7 819200 0.000000e+00 5.222345e+02 1.241013e+02 2.715620e+04 1.498276e+07 52 +d39bff17 3276800 0.000000e+00 1.941135e+03 4.386059e+02 1.824667e+05 3.722759e+08 94 +ff82dda0 7372800 0.000000e+00 4.892155e+03 1.147723e+03 2.397156e+05 1.237272e+09 49 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_goto.mirage b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_goto.mirage new file mode 100644 index 0000000..2694bef --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_goto.mirage @@ -0,0 +1,144 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +4 +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 3276800 0.000000e+00 1.373117e+04 2.756172e+02 7.167670e+06 9.846014e+10 522 +ff82dda0 7372800 0.000000e+00 4.545501e+04 7.462378e+02 1.750018e+07 7.956851e+11 385 +2c1922b7 819200 0.000000e+00 1.798916e+03 8.480081e+01 1.219665e+06 2.198952e+09 678 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 3276800 0.000000e+00 2.883473e+03 4.679640e+02 2.537456e+05 7.509396e+08 88 +ff82dda0 7372800 0.000000e+00 6.462089e+03 4.136967e+02 5.751259e+05 3.731746e+09 89 +2c1922b7 819200 0.000000e+00 4.040830e+02 6.411732e+01 3.717564e+04 1.540026e+07 92 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 3276800 0.000000e+00 2.231007e+03 5.378925e+02 3.234960e+05 7.636746e+08 145 +ff82dda0 7372800 0.000000e+00 3.904524e+03 2.515208e+02 4.021660e+05 1.576783e+09 103 +2c1922b7 819200 0.000000e+00 5.307827e+02 1.276617e+02 5.467062e+04 3.069686e+07 103 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 3276800 0.000000e+00 1.665736e+03 1.392688e+02 2.881723e+05 4.833744e+08 173 +ff82dda0 7372800 0.000000e+00 3.891632e+03 2.259287e+02 7.199519e+05 2.811230e+09 185 +2c1922b7 819200 0.000000e+00 5.125766e+02 1.240167e+02 5.587085e+04 3.031453e+07 109 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_goto.sirocco b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_goto.sirocco new file mode 100644 index 0000000..4f7965d --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_goto.sirocco @@ -0,0 +1,183 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +5 +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb4) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +0e8bce2b 16588800 0.000000e+00 8.483517e+04 1.709999e+04 1.781539e+06 1.572777e+11 21 +d39bff17 3276800 0.000000e+00 8.986208e+03 1.629610e+03 1.797242e+05 1.668151e+09 20 +2c1922b7 819200 0.000000e+00 3.523655e+03 5.077738e+02 5.990214e+04 2.154576e+08 17 +ff82dda0 7372800 0.000000e+00 1.583302e+04 2.624137e+03 3.974089e+06 6.465024e+10 251 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +0e8bce2b 16588800 0.000000e+00 4.641113e+03 5.516013e+02 1.257742e+06 5.919777e+09 271 +d39bff17 3276800 0.000000e+00 8.365056e+02 1.344660e+02 1.396964e+05 1.198764e+08 167 +2c1922b7 819200 0.000000e+00 2.882912e+02 5.271451e+01 7.409085e+04 2.207390e+07 257 +ff82dda0 7372800 0.000000e+00 1.570696e+03 2.281691e+02 4.115224e+05 6.600167e+08 262 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb0) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +0e8bce2b 16588800 0.000000e+00 4.556926e+03 5.099622e+02 1.048093e+06 4.835897e+09 230 +d39bff17 3276800 0.000000e+00 7.019049e+02 1.632697e+02 1.109010e+05 8.205375e+07 158 +2c1922b7 819200 0.000000e+00 9.967334e+01 2.197557e+01 2.372225e+04 2.479413e+06 238 +ff82dda0 7372800 0.000000e+00 1.571709e+03 2.150516e+02 4.007858e+05 6.417117e+08 255 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb3) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +0e8bce2b 16588800 0.000000e+00 4.650733e+03 7.073225e+02 1.232444e+06 5.864350e+09 265 +d39bff17 3276800 0.000000e+00 8.352707e+02 1.515223e+02 1.587014e+05 1.369209e+08 190 +2c1922b7 819200 0.000000e+00 2.858293e+02 5.241353e+01 7.460146e+04 2.204030e+07 261 +ff82dda0 7372800 0.000000e+00 1.569547e+03 2.419662e+02 2.589752e+05 4.161341e+08 165 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +0e8bce2b 16588800 0.000000e+00 4.671203e+03 5.859459e+02 1.331293e+06 6.316588e+09 285 +d39bff17 3276800 0.000000e+00 8.453596e+02 1.395049e+02 1.420204e+05 1.233279e+08 168 +2c1922b7 819200 0.000000e+00 2.930233e+02 5.590601e+01 5.362326e+04 1.628483e+07 183 +ff82dda0 7372800 0.000000e+00 1.591448e+03 2.256700e+02 2.387172e+05 3.875451e+08 150 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_openblas.attila b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_openblas.attila new file mode 100644 index 0000000..2fb4146 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_openblas.attila @@ -0,0 +1,145 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +4 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 3276800 0.000000e+00 1.377909e+04 1.008911e+03 6.145473e+06 8.513300e+10 446 +ff82dda0 7372800 0.000000e+00 4.298380e+04 1.919778e+03 1.177756e+07 5.072542e+11 274 +2c1922b7 819200 0.000000e+00 1.936516e+03 1.503574e+02 4.725100e+05 9.205395e+08 244 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 3276800 0.000000e+00 2.656425e+03 2.270595e+02 3.320531e+05 8.885184e+08 125 +ff82dda0 7372800 0.000000e+00 6.358340e+03 3.816293e+02 5.023088e+05 3.205356e+09 79 +2c1922b7 819200 0.000000e+00 3.867923e+02 4.867053e+01 4.564149e+04 1.793330e+07 118 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 3276800 0.000000e+00 1.902887e+03 4.574719e+02 4.585957e+05 9.230924e+08 241 +ff82dda0 7372800 0.000000e+00 3.810456e+03 1.334249e+02 3.353201e+05 1.279289e+09 88 +2c1922b7 819200 0.000000e+00 3.835296e+02 4.543249e+01 2.262825e+04 8.800385e+06 59 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 3276800 0.000000e+00 2.657310e+03 2.918518e+02 3.162199e+05 8.504305e+08 119 +ff82dda0 7372800 0.000000e+00 3.819809e+03 1.073068e+02 3.055848e+05 1.168197e+09 80 +2c1922b7 819200 0.000000e+00 4.020211e+02 5.372009e+01 3.256371e+04 1.332505e+07 81 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_openblas.hannibal b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_openblas.hannibal new file mode 100644 index 0000000..988cb19 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_openblas.hannibal @@ -0,0 +1,104 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 7372800 0.000000e+00 1.103789e+04 3.664518e+03 7.889881e+07 9.668643e+11 7148 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 7372800 0.000000e+00 1.284524e+04 5.462619e+03 8.441889e+07 1.280490e+12 6572 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb3) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 7372800 0.000000e+00 1.171798e+04 4.121992e+03 8.325626e+07 1.096315e+12 7105 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_openblas.hannibal-pitch b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_openblas.hannibal-pitch new file mode 100644 index 0000000..988cb19 --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_openblas.hannibal-pitch @@ -0,0 +1,104 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +3 +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 7372800 0.000000e+00 1.103789e+04 3.664518e+03 7.889881e+07 9.668643e+11 7148 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 7372800 0.000000e+00 1.284524e+04 5.462619e+03 8.441889e+07 1.280490e+12 6572 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb3) +# number of entries +1 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +ff82dda0 7372800 0.000000e+00 1.171798e+04 4.121992e+03 8.325626e+07 1.096315e+12 7105 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_openblas.idgraf b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_openblas.idgraf new file mode 100644 index 0000000..f3bfa2d --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_openblas.idgraf @@ -0,0 +1,314 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +9 +#################### +# COMB_8 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb8) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +2c1922b7 819200 0.000000e+00 1.946809e+03 8.216247e+01 4.049363e+05 7.897378e+08 208 +d39bff17 3276800 0.000000e+00 1.423970e+04 2.281585e+02 1.395491e+06 1.987647e+10 98 +ff82dda0 7372800 0.000000e+00 4.640991e+04 5.437505e+02 4.919451e+06 2.283426e+11 106 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +4 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda4_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +2c1922b7 819200 0.000000e+00 4.484181e+02 9.807341e+01 3.004401e+04 1.411671e+07 67 +d39bff17 3276800 0.000000e+00 1.658665e+03 2.005859e+02 2.388477e+05 4.019622e+08 144 +ff82dda0 7372800 0.000000e+00 3.922083e+03 2.271290e+02 2.588575e+05 1.018665e+09 66 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +2c1922b7 819200 0.000000e+00 5.042679e+02 1.229686e+02 7.362312e+04 3.933348e+07 146 +d39bff17 3276800 0.000000e+00 2.167031e+03 5.827483e+02 2.773800e+05 6.445595e+08 128 +ff82dda0 7372800 0.000000e+00 4.035358e+03 4.245106e+02 4.035358e+05 1.646433e+09 100 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +6 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda6_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +2c1922b7 819200 0.000000e+00 4.873044e+02 1.174149e+02 2.631443e+04 1.356760e+07 54 +d39bff17 3276800 0.000000e+00 1.705876e+03 1.721886e+02 1.808228e+05 3.116041e+08 106 +ff82dda0 7372800 0.000000e+00 3.936492e+03 2.608005e+02 2.440625e+05 9.649671e+08 62 + +#################### +# COMB_7 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb7) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +2c1922b7 819200 0.000000e+00 5.272057e+02 1.307030e+02 5.535660e+04 3.097805e+07 105 +d39bff17 3276800 0.000000e+00 1.638590e+03 9.390080e+01 1.163399e+05 1.912593e+08 71 +ff82dda0 7372800 0.000000e+00 4.055643e+03 3.711103e+02 4.177313e+05 1.708355e+09 103 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +5 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda5_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +2c1922b7 819200 0.000000e+00 4.303558e+02 7.490536e+01 2.840348e+04 1.259392e+07 66 +d39bff17 3276800 0.000000e+00 1.669452e+03 1.444951e+02 1.419035e+05 2.386758e+08 85 +ff82dda0 7372800 0.000000e+00 4.288060e+03 7.671104e+02 2.744359e+05 1.214459e+09 64 + +#################### +# COMB_5 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb5) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +2c1922b7 819200 0.000000e+00 4.394264e+02 8.387153e+01 6.283798e+04 2.861859e+07 143 +d39bff17 3276800 0.000000e+00 2.098818e+03 5.403136e+02 2.140795e+05 4.790917e+08 102 +ff82dda0 7372800 0.000000e+00 4.766912e+03 1.123433e+03 7.579390e+05 3.813703e+09 159 + +#################### +# COMB_6 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb6) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +2c1922b7 819200 0.000000e+00 4.660051e+02 1.021627e+02 6.477470e+04 3.163611e+07 139 +d39bff17 3276800 0.000000e+00 2.103985e+03 5.293854e+02 2.377503e+05 5.318912e+08 113 +ff82dda0 7372800 0.000000e+00 3.972257e+03 3.858968e+02 2.899747e+05 1.162725e+09 73 + +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +7 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda7_impl0 (Comb4) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +2c1922b7 819200 0.000000e+00 5.222345e+02 1.241013e+02 2.715620e+04 1.498276e+07 52 +d39bff17 3276800 0.000000e+00 1.941135e+03 4.386059e+02 1.824667e+05 3.722759e+08 94 +ff82dda0 7372800 0.000000e+00 4.892155e+03 1.147723e+03 2.397156e+05 1.237272e+09 49 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_openblas.mirage b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_openblas.mirage new file mode 100644 index 0000000..2694bef --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_openblas.mirage @@ -0,0 +1,144 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +4 +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb3) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 3276800 0.000000e+00 1.373117e+04 2.756172e+02 7.167670e+06 9.846014e+10 522 +ff82dda0 7372800 0.000000e+00 4.545501e+04 7.462378e+02 1.750018e+07 7.956851e+11 385 +2c1922b7 819200 0.000000e+00 1.798916e+03 8.480081e+01 1.219665e+06 2.198952e+09 678 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb0) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 3276800 0.000000e+00 2.883473e+03 4.679640e+02 2.537456e+05 7.509396e+08 88 +ff82dda0 7372800 0.000000e+00 6.462089e+03 4.136967e+02 5.751259e+05 3.731746e+09 89 +2c1922b7 819200 0.000000e+00 4.040830e+02 6.411732e+01 3.717564e+04 1.540026e+07 92 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 3276800 0.000000e+00 2.231007e+03 5.378925e+02 3.234960e+05 7.636746e+08 145 +ff82dda0 7372800 0.000000e+00 3.904524e+03 2.515208e+02 4.021660e+05 1.576783e+09 103 +2c1922b7 819200 0.000000e+00 5.307827e+02 1.276617e+02 5.467062e+04 3.069686e+07 103 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb1) +# number of entries +3 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +d39bff17 3276800 0.000000e+00 1.665736e+03 1.392688e+02 2.881723e+05 4.833744e+08 173 +ff82dda0 7372800 0.000000e+00 3.891632e+03 2.259287e+02 7.199519e+05 2.811230e+09 185 +2c1922b7 819200 0.000000e+00 5.125766e+02 1.240167e+02 5.587085e+04 3.031453e+07 109 + diff --git a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_openblas.sirocco b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_openblas.sirocco new file mode 100644 index 0000000..4f7965d --- /dev/null +++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_openblas.sirocco @@ -0,0 +1,183 @@ +################## +# Performance Model Version +45 + +#################### +# COMBs +# number of combinations +5 +#################### +# COMB_4 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +0 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cpu0_impl0 (Comb4) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +0e8bce2b 16588800 0.000000e+00 8.483517e+04 1.709999e+04 1.781539e+06 1.572777e+11 21 +d39bff17 3276800 0.000000e+00 8.986208e+03 1.629610e+03 1.797242e+05 1.668151e+09 20 +2c1922b7 819200 0.000000e+00 3.523655e+03 5.077738e+02 5.990214e+04 2.154576e+08 17 +ff82dda0 7372800 0.000000e+00 1.583302e+04 2.624137e+03 3.974089e+06 6.465024e+10 251 + +#################### +# COMB_1 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +0 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda0_impl0 (Comb1) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +0e8bce2b 16588800 0.000000e+00 4.641113e+03 5.516013e+02 1.257742e+06 5.919777e+09 271 +d39bff17 3276800 0.000000e+00 8.365056e+02 1.344660e+02 1.396964e+05 1.198764e+08 167 +2c1922b7 819200 0.000000e+00 2.882912e+02 5.271451e+01 7.409085e+04 2.207390e+07 257 +ff82dda0 7372800 0.000000e+00 1.570696e+03 2.281691e+02 4.115224e+05 6.600167e+08 262 + +#################### +# COMB_0 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +2 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda2_impl0 (Comb0) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +0e8bce2b 16588800 0.000000e+00 4.556926e+03 5.099622e+02 1.048093e+06 4.835897e+09 230 +d39bff17 3276800 0.000000e+00 7.019049e+02 1.632697e+02 1.109010e+05 8.205375e+07 158 +2c1922b7 819200 0.000000e+00 9.967334e+01 2.197557e+01 2.372225e+04 2.479413e+06 238 +ff82dda0 7372800 0.000000e+00 1.571709e+03 2.150516e+02 4.007858e+05 6.417117e+08 255 + +#################### +# COMB_3 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +3 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda3_impl0 (Comb3) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +0e8bce2b 16588800 0.000000e+00 4.650733e+03 7.073225e+02 1.232444e+06 5.864350e+09 265 +d39bff17 3276800 0.000000e+00 8.352707e+02 1.515223e+02 1.587014e+05 1.369209e+08 190 +2c1922b7 819200 0.000000e+00 2.858293e+02 5.241353e+01 7.460146e+04 2.204030e+07 261 +ff82dda0 7372800 0.000000e+00 1.569547e+03 2.419662e+02 2.589752e+05 4.161341e+08 165 + +#################### +# COMB_2 +# number of types devices +1 +#################### +# DEV_0 +# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) +1 +#################### +# DEV_0 +# device id +1 +#################### +# DEV_0 +# number of cores +1 +########## +# number of implementations +1 +##### +# Model for cuda1_impl0 (Comb2) +# number of entries +4 +# sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx +0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 +# a b c +nan nan nan +# not multiple-regression-base +0 +# hash size flops mean (us) dev (us) sum sum2 n +0e8bce2b 16588800 0.000000e+00 4.671203e+03 5.859459e+02 1.331293e+06 6.316588e+09 285 +d39bff17 3276800 0.000000e+00 8.453596e+02 1.395049e+02 1.420204e+05 1.233279e+08 168 +2c1922b7 819200 0.000000e+00 2.930233e+02 5.590601e+01 5.362326e+04 1.628483e+07 183 +ff82dda0 7372800 0.000000e+00 1.591448e+03 2.256700e+02 2.387172e+05 3.875451e+08 150 + diff --git a/tools/perfmodels/sampling/codelets/tmp/mlr_init.out b/tools/perfmodels/sampling/codelets/tmp/mlr_init.out new file mode 100644 index 0000000..4826e3c --- /dev/null +++ b/tools/perfmodels/sampling/codelets/tmp/mlr_init.out @@ -0,0 +1,1765 @@ +Duration, M, N, K +0.422000, 6.000000, 5.000000, 4.000000 +0.474000, 6.000000, 5.000000, 4.000000 +0.673000, 6.000000, 5.000000, 4.000000 +0.482000, 6.000000, 5.000000, 4.000000 +0.721000, 6.000000, 5.000000, 4.000000 +0.441000, 6.000000, 5.000000, 4.000000 +0.459000, 6.000000, 5.000000, 4.000000 +0.694000, 6.000000, 5.000000, 4.000000 +0.458000, 6.000000, 5.000000, 4.000000 +0.468000, 6.000000, 5.000000, 4.000000 +0.684000, 6.000000, 5.000000, 4.000000 +0.470000, 6.000000, 5.000000, 4.000000 +0.469000, 6.000000, 5.000000, 4.000000 +0.667000, 6.000000, 5.000000, 4.000000 +0.422000, 6.000000, 5.000000, 4.000000 +0.432000, 6.000000, 5.000000, 4.000000 +0.469000, 6.000000, 5.000000, 4.000000 +0.691000, 6.000000, 5.000000, 4.000000 +0.450000, 6.000000, 5.000000, 4.000000 +0.629000, 6.000000, 5.000000, 4.000000 +0.451000, 6.000000, 5.000000, 4.000000 +0.505000, 6.000000, 5.000000, 4.000000 +0.686000, 6.000000, 5.000000, 4.000000 +0.435000, 6.000000, 5.000000, 4.000000 +0.478000, 6.000000, 5.000000, 4.000000 +0.686000, 6.000000, 5.000000, 4.000000 +0.418000, 6.000000, 5.000000, 4.000000 +0.462000, 6.000000, 5.000000, 4.000000 +0.680000, 6.000000, 5.000000, 4.000000 +0.523000, 6.000000, 5.000000, 4.000000 +0.604000, 6.000000, 5.000000, 4.000000 +0.485000, 6.000000, 5.000000, 4.000000 +0.692000, 6.000000, 5.000000, 4.000000 +0.422000, 6.000000, 5.000000, 4.000000 +0.460000, 6.000000, 5.000000, 4.000000 +0.685000, 6.000000, 5.000000, 4.000000 +0.420000, 6.000000, 5.000000, 4.000000 +0.453000, 6.000000, 5.000000, 4.000000 +0.661000, 6.000000, 5.000000, 4.000000 +0.495000, 6.000000, 5.000000, 4.000000 +0.670000, 6.000000, 5.000000, 4.000000 +0.427000, 6.000000, 5.000000, 4.000000 +1.951000, 4.000000, 8.000000, 10.000000 +2.683000, 4.000000, 8.000000, 10.000000 +1.936000, 4.000000, 8.000000, 10.000000 +1.949000, 4.000000, 8.000000, 10.000000 +2.724000, 4.000000, 8.000000, 10.000000 +1.906000, 4.000000, 8.000000, 10.000000 +1.951000, 4.000000, 8.000000, 10.000000 +2.631000, 4.000000, 8.000000, 10.000000 +1.989000, 4.000000, 8.000000, 10.000000 +2.679000, 4.000000, 8.000000, 10.000000 +1.897000, 4.000000, 8.000000, 10.000000 +1.951000, 4.000000, 8.000000, 10.000000 +2.894000, 4.000000, 8.000000, 10.000000 +1.903000, 4.000000, 8.000000, 10.000000 +1.948000, 4.000000, 8.000000, 10.000000 +2.666000, 4.000000, 8.000000, 10.000000 +1.922000, 4.000000, 8.000000, 10.000000 +1.951000, 4.000000, 8.000000, 10.000000 +2.769000, 4.000000, 8.000000, 10.000000 +1.937000, 4.000000, 8.000000, 10.000000 +2.739000, 4.000000, 8.000000, 10.000000 +1.912000, 4.000000, 8.000000, 10.000000 +1.942000, 4.000000, 8.000000, 10.000000 +2.694000, 4.000000, 8.000000, 10.000000 +1.903000, 4.000000, 8.000000, 10.000000 +1.956000, 4.000000, 8.000000, 10.000000 +2.784000, 4.000000, 8.000000, 10.000000 +1.894000, 4.000000, 8.000000, 10.000000 +1.946000, 4.000000, 8.000000, 10.000000 +2.704000, 4.000000, 8.000000, 10.000000 +1.899000, 4.000000, 8.000000, 10.000000 +1.923000, 4.000000, 8.000000, 10.000000 +2.748000, 4.000000, 8.000000, 10.000000 +1.903000, 4.000000, 8.000000, 10.000000 +1.941000, 4.000000, 8.000000, 10.000000 +2.818000, 4.000000, 8.000000, 10.000000 +1.926000, 4.000000, 8.000000, 10.000000 +1.927000, 4.000000, 8.000000, 10.000000 +2.793000, 4.000000, 8.000000, 10.000000 +1.914000, 4.000000, 8.000000, 10.000000 +1.948000, 4.000000, 8.000000, 10.000000 +2.701000, 4.000000, 8.000000, 10.000000 +0.244000, 8.000000, 2.000000, 2.000000 +0.269000, 8.000000, 2.000000, 2.000000 +0.380000, 8.000000, 2.000000, 2.000000 +0.229000, 8.000000, 2.000000, 2.000000 +0.281000, 8.000000, 2.000000, 2.000000 +0.395000, 8.000000, 2.000000, 2.000000 +0.235000, 8.000000, 2.000000, 2.000000 +0.275000, 8.000000, 2.000000, 2.000000 +0.383000, 8.000000, 2.000000, 2.000000 +0.268000, 8.000000, 2.000000, 2.000000 +0.285000, 8.000000, 2.000000, 2.000000 +0.345000, 8.000000, 2.000000, 2.000000 +0.243000, 8.000000, 2.000000, 2.000000 +0.268000, 8.000000, 2.000000, 2.000000 +0.424000, 8.000000, 2.000000, 2.000000 +0.242000, 8.000000, 2.000000, 2.000000 +0.303000, 8.000000, 2.000000, 2.000000 +0.404000, 8.000000, 2.000000, 2.000000 +0.250000, 8.000000, 2.000000, 2.000000 +0.277000, 8.000000, 2.000000, 2.000000 +0.369000, 8.000000, 2.000000, 2.000000 +0.253000, 8.000000, 2.000000, 2.000000 +0.278000, 8.000000, 2.000000, 2.000000 +0.441000, 8.000000, 2.000000, 2.000000 +0.270000, 8.000000, 2.000000, 2.000000 +0.246000, 8.000000, 2.000000, 2.000000 +0.284000, 8.000000, 2.000000, 2.000000 +0.400000, 8.000000, 2.000000, 2.000000 +0.240000, 8.000000, 2.000000, 2.000000 +0.252000, 8.000000, 2.000000, 2.000000 +0.295000, 8.000000, 2.000000, 2.000000 +0.395000, 8.000000, 2.000000, 2.000000 +0.248000, 8.000000, 2.000000, 2.000000 +0.324000, 8.000000, 2.000000, 2.000000 +0.408000, 8.000000, 2.000000, 2.000000 +0.312000, 8.000000, 2.000000, 2.000000 +0.420000, 8.000000, 2.000000, 2.000000 +0.240000, 8.000000, 2.000000, 2.000000 +0.266000, 8.000000, 2.000000, 2.000000 +0.404000, 8.000000, 2.000000, 2.000000 +0.273000, 8.000000, 2.000000, 2.000000 +0.334000, 8.000000, 2.000000, 2.000000 +0.452000, 1.000000, 5.000000, 2.000000 +0.423000, 1.000000, 5.000000, 2.000000 +0.474000, 1.000000, 5.000000, 2.000000 +0.268000, 1.000000, 5.000000, 2.000000 +0.337000, 1.000000, 5.000000, 2.000000 +0.420000, 1.000000, 5.000000, 2.000000 +0.323000, 1.000000, 5.000000, 2.000000 +0.301000, 1.000000, 5.000000, 2.000000 +0.428000, 1.000000, 5.000000, 2.000000 +0.318000, 1.000000, 5.000000, 2.000000 +0.291000, 1.000000, 5.000000, 2.000000 +0.436000, 1.000000, 5.000000, 2.000000 +0.323000, 1.000000, 5.000000, 2.000000 +0.351000, 1.000000, 5.000000, 2.000000 +0.456000, 1.000000, 5.000000, 2.000000 +0.309000, 1.000000, 5.000000, 2.000000 +0.329000, 1.000000, 5.000000, 2.000000 +0.514000, 1.000000, 5.000000, 2.000000 +0.286000, 1.000000, 5.000000, 2.000000 +0.358000, 1.000000, 5.000000, 2.000000 +0.485000, 1.000000, 5.000000, 2.000000 +0.261000, 1.000000, 5.000000, 2.000000 +0.374000, 1.000000, 5.000000, 2.000000 +0.470000, 1.000000, 5.000000, 2.000000 +0.269000, 1.000000, 5.000000, 2.000000 +0.347000, 1.000000, 5.000000, 2.000000 +0.402000, 1.000000, 5.000000, 2.000000 +0.274000, 1.000000, 5.000000, 2.000000 +0.338000, 1.000000, 5.000000, 2.000000 +0.510000, 1.000000, 5.000000, 2.000000 +0.281000, 1.000000, 5.000000, 2.000000 +0.331000, 1.000000, 5.000000, 2.000000 +0.311000, 1.000000, 5.000000, 2.000000 +0.509000, 1.000000, 5.000000, 2.000000 +0.316000, 1.000000, 5.000000, 2.000000 +0.342000, 1.000000, 5.000000, 2.000000 +0.353000, 1.000000, 5.000000, 2.000000 +0.448000, 1.000000, 5.000000, 2.000000 +0.308000, 1.000000, 5.000000, 2.000000 +0.349000, 1.000000, 5.000000, 2.000000 +0.383000, 1.000000, 5.000000, 2.000000 +0.320000, 1.000000, 5.000000, 2.000000 +1.197000, 6.000000, 8.000000, 5.000000 +1.719000, 6.000000, 8.000000, 5.000000 +1.152000, 6.000000, 8.000000, 5.000000 +1.214000, 6.000000, 8.000000, 5.000000 +1.732000, 6.000000, 8.000000, 5.000000 +1.175000, 6.000000, 8.000000, 5.000000 +1.157000, 6.000000, 8.000000, 5.000000 +1.739000, 6.000000, 8.000000, 5.000000 +1.181000, 6.000000, 8.000000, 5.000000 +1.872000, 6.000000, 8.000000, 5.000000 +1.163000, 6.000000, 8.000000, 5.000000 +1.217000, 6.000000, 8.000000, 5.000000 +1.717000, 6.000000, 8.000000, 5.000000 +1.130000, 6.000000, 8.000000, 5.000000 +1.194000, 6.000000, 8.000000, 5.000000 +1.723000, 6.000000, 8.000000, 5.000000 +1.133000, 6.000000, 8.000000, 5.000000 +1.177000, 6.000000, 8.000000, 5.000000 +1.803000, 6.000000, 8.000000, 5.000000 +1.140000, 6.000000, 8.000000, 5.000000 +1.218000, 6.000000, 8.000000, 5.000000 +1.742000, 6.000000, 8.000000, 5.000000 +1.127000, 6.000000, 8.000000, 5.000000 +1.185000, 6.000000, 8.000000, 5.000000 +1.778000, 6.000000, 8.000000, 5.000000 +1.132000, 6.000000, 8.000000, 5.000000 +1.184000, 6.000000, 8.000000, 5.000000 +1.781000, 6.000000, 8.000000, 5.000000 +1.131000, 6.000000, 8.000000, 5.000000 +1.210000, 6.000000, 8.000000, 5.000000 +1.791000, 6.000000, 8.000000, 5.000000 +1.179000, 6.000000, 8.000000, 5.000000 +1.858000, 6.000000, 8.000000, 5.000000 +1.121000, 6.000000, 8.000000, 5.000000 +1.188000, 6.000000, 8.000000, 5.000000 +1.742000, 6.000000, 8.000000, 5.000000 +1.167000, 6.000000, 8.000000, 5.000000 +1.198000, 6.000000, 8.000000, 5.000000 +1.751000, 6.000000, 8.000000, 5.000000 +1.127000, 6.000000, 8.000000, 5.000000 +1.204000, 6.000000, 8.000000, 5.000000 +1.583000, 6.000000, 8.000000, 5.000000 +0.386000, 8.000000, 4.000000, 3.000000 +0.428000, 8.000000, 4.000000, 3.000000 +0.555000, 8.000000, 4.000000, 3.000000 +0.369000, 8.000000, 4.000000, 3.000000 +0.355000, 8.000000, 4.000000, 3.000000 +0.378000, 8.000000, 4.000000, 3.000000 +0.545000, 8.000000, 4.000000, 3.000000 +0.389000, 8.000000, 4.000000, 3.000000 +0.365000, 8.000000, 4.000000, 3.000000 +0.402000, 8.000000, 4.000000, 3.000000 +0.561000, 8.000000, 4.000000, 3.000000 +0.339000, 8.000000, 4.000000, 3.000000 +0.385000, 8.000000, 4.000000, 3.000000 +0.582000, 8.000000, 4.000000, 3.000000 +0.341000, 8.000000, 4.000000, 3.000000 +0.417000, 8.000000, 4.000000, 3.000000 +0.578000, 8.000000, 4.000000, 3.000000 +0.341000, 8.000000, 4.000000, 3.000000 +0.400000, 8.000000, 4.000000, 3.000000 +0.614000, 8.000000, 4.000000, 3.000000 +0.339000, 8.000000, 4.000000, 3.000000 +0.416000, 8.000000, 4.000000, 3.000000 +0.619000, 8.000000, 4.000000, 3.000000 +0.335000, 8.000000, 4.000000, 3.000000 +0.418000, 8.000000, 4.000000, 3.000000 +0.576000, 8.000000, 4.000000, 3.000000 +0.341000, 8.000000, 4.000000, 3.000000 +0.404000, 8.000000, 4.000000, 3.000000 +0.552000, 8.000000, 4.000000, 3.000000 +0.343000, 8.000000, 4.000000, 3.000000 +0.446000, 8.000000, 4.000000, 3.000000 +0.529000, 8.000000, 4.000000, 3.000000 +0.382000, 8.000000, 4.000000, 3.000000 +0.389000, 8.000000, 4.000000, 3.000000 +0.610000, 8.000000, 4.000000, 3.000000 +0.345000, 8.000000, 4.000000, 3.000000 +0.406000, 8.000000, 4.000000, 3.000000 +0.560000, 8.000000, 4.000000, 3.000000 +0.380000, 8.000000, 4.000000, 3.000000 +0.364000, 8.000000, 4.000000, 3.000000 +0.391000, 8.000000, 4.000000, 3.000000 +0.387000, 8.000000, 4.000000, 3.000000 +0.329000, 1.000000, 1.000000, 6.000000 +0.244000, 1.000000, 1.000000, 6.000000 +0.371000, 1.000000, 1.000000, 6.000000 +0.193000, 1.000000, 1.000000, 6.000000 +0.250000, 1.000000, 1.000000, 6.000000 +0.293000, 1.000000, 1.000000, 6.000000 +0.209000, 1.000000, 1.000000, 6.000000 +0.243000, 1.000000, 1.000000, 6.000000 +0.309000, 1.000000, 1.000000, 6.000000 +0.219000, 1.000000, 1.000000, 6.000000 +0.233000, 1.000000, 1.000000, 6.000000 +0.355000, 1.000000, 1.000000, 6.000000 +0.196000, 1.000000, 1.000000, 6.000000 +0.235000, 1.000000, 1.000000, 6.000000 +0.202000, 1.000000, 1.000000, 6.000000 +0.241000, 1.000000, 1.000000, 6.000000 +0.341000, 1.000000, 1.000000, 6.000000 +0.174000, 1.000000, 1.000000, 6.000000 +0.251000, 1.000000, 1.000000, 6.000000 +0.322000, 1.000000, 1.000000, 6.000000 +0.220000, 1.000000, 1.000000, 6.000000 +0.224000, 1.000000, 1.000000, 6.000000 +0.326000, 1.000000, 1.000000, 6.000000 +0.171000, 1.000000, 1.000000, 6.000000 +0.256000, 1.000000, 1.000000, 6.000000 +0.310000, 1.000000, 1.000000, 6.000000 +0.222000, 1.000000, 1.000000, 6.000000 +0.241000, 1.000000, 1.000000, 6.000000 +0.342000, 1.000000, 1.000000, 6.000000 +0.181000, 1.000000, 1.000000, 6.000000 +0.252000, 1.000000, 1.000000, 6.000000 +0.364000, 1.000000, 1.000000, 6.000000 +0.188000, 1.000000, 1.000000, 6.000000 +0.245000, 1.000000, 1.000000, 6.000000 +0.176000, 1.000000, 1.000000, 6.000000 +0.245000, 1.000000, 1.000000, 6.000000 +0.330000, 1.000000, 1.000000, 6.000000 +0.239000, 1.000000, 1.000000, 6.000000 +0.358000, 1.000000, 1.000000, 6.000000 +0.198000, 1.000000, 1.000000, 6.000000 +0.260000, 1.000000, 1.000000, 6.000000 +0.322000, 1.000000, 1.000000, 6.000000 +0.480000, 3.000000, 7.000000, 2.000000 +0.512000, 3.000000, 7.000000, 2.000000 +0.682000, 3.000000, 7.000000, 2.000000 +0.440000, 3.000000, 7.000000, 2.000000 +0.515000, 3.000000, 7.000000, 2.000000 +0.711000, 3.000000, 7.000000, 2.000000 +0.438000, 3.000000, 7.000000, 2.000000 +0.514000, 3.000000, 7.000000, 2.000000 +0.759000, 3.000000, 7.000000, 2.000000 +0.439000, 3.000000, 7.000000, 2.000000 +0.510000, 3.000000, 7.000000, 2.000000 +0.719000, 3.000000, 7.000000, 2.000000 +0.491000, 3.000000, 7.000000, 2.000000 +0.530000, 3.000000, 7.000000, 2.000000 +0.683000, 3.000000, 7.000000, 2.000000 +0.440000, 3.000000, 7.000000, 2.000000 +0.518000, 3.000000, 7.000000, 2.000000 +0.711000, 3.000000, 7.000000, 2.000000 +0.434000, 3.000000, 7.000000, 2.000000 +0.546000, 3.000000, 7.000000, 2.000000 +0.746000, 3.000000, 7.000000, 2.000000 +0.439000, 3.000000, 7.000000, 2.000000 +0.528000, 3.000000, 7.000000, 2.000000 +0.708000, 3.000000, 7.000000, 2.000000 +0.441000, 3.000000, 7.000000, 2.000000 +0.506000, 3.000000, 7.000000, 2.000000 +0.733000, 3.000000, 7.000000, 2.000000 +0.444000, 3.000000, 7.000000, 2.000000 +0.495000, 3.000000, 7.000000, 2.000000 +0.727000, 3.000000, 7.000000, 2.000000 +0.445000, 3.000000, 7.000000, 2.000000 +0.490000, 3.000000, 7.000000, 2.000000 +0.696000, 3.000000, 7.000000, 2.000000 +0.444000, 3.000000, 7.000000, 2.000000 +0.469000, 3.000000, 7.000000, 2.000000 +0.488000, 3.000000, 7.000000, 2.000000 +0.689000, 3.000000, 7.000000, 2.000000 +0.482000, 3.000000, 7.000000, 2.000000 +0.442000, 3.000000, 7.000000, 2.000000 +0.521000, 3.000000, 7.000000, 2.000000 +0.708000, 3.000000, 7.000000, 2.000000 +0.437000, 3.000000, 7.000000, 2.000000 +2.273000, 4.000000, 10.000000, 6.000000 +3.163000, 4.000000, 10.000000, 6.000000 +2.216000, 4.000000, 10.000000, 6.000000 +2.265000, 4.000000, 10.000000, 6.000000 +3.135000, 4.000000, 10.000000, 6.000000 +2.283000, 4.000000, 10.000000, 6.000000 +3.072000, 4.000000, 10.000000, 6.000000 +2.236000, 4.000000, 10.000000, 6.000000 +2.312000, 4.000000, 10.000000, 6.000000 +3.145000, 4.000000, 10.000000, 6.000000 +2.211000, 4.000000, 10.000000, 6.000000 +2.221000, 4.000000, 10.000000, 6.000000 +2.270000, 4.000000, 10.000000, 6.000000 +3.336000, 4.000000, 10.000000, 6.000000 +2.216000, 4.000000, 10.000000, 6.000000 +2.281000, 4.000000, 10.000000, 6.000000 +3.408000, 4.000000, 10.000000, 6.000000 +2.236000, 4.000000, 10.000000, 6.000000 +2.271000, 4.000000, 10.000000, 6.000000 +3.160000, 4.000000, 10.000000, 6.000000 +2.248000, 4.000000, 10.000000, 6.000000 +2.247000, 4.000000, 10.000000, 6.000000 +3.170000, 4.000000, 10.000000, 6.000000 +2.195000, 4.000000, 10.000000, 6.000000 +2.253000, 4.000000, 10.000000, 6.000000 +3.419000, 4.000000, 10.000000, 6.000000 +2.281000, 4.000000, 10.000000, 6.000000 +2.265000, 4.000000, 10.000000, 6.000000 +3.303000, 4.000000, 10.000000, 6.000000 +2.251000, 4.000000, 10.000000, 6.000000 +2.266000, 4.000000, 10.000000, 6.000000 +3.209000, 4.000000, 10.000000, 6.000000 +2.241000, 4.000000, 10.000000, 6.000000 +2.281000, 4.000000, 10.000000, 6.000000 +3.321000, 4.000000, 10.000000, 6.000000 +2.263000, 4.000000, 10.000000, 6.000000 +3.158000, 4.000000, 10.000000, 6.000000 +2.234000, 4.000000, 10.000000, 6.000000 +2.275000, 4.000000, 10.000000, 6.000000 +3.164000, 4.000000, 10.000000, 6.000000 +2.209000, 4.000000, 10.000000, 6.000000 +2.213000, 4.000000, 10.000000, 6.000000 +0.469000, 6.000000, 6.000000, 2.000000 +0.673000, 6.000000, 6.000000, 2.000000 +0.482000, 6.000000, 6.000000, 2.000000 +0.668000, 6.000000, 6.000000, 2.000000 +0.426000, 6.000000, 6.000000, 2.000000 +0.411000, 6.000000, 6.000000, 2.000000 +0.471000, 6.000000, 6.000000, 2.000000 +0.653000, 6.000000, 6.000000, 2.000000 +0.430000, 6.000000, 6.000000, 2.000000 +0.487000, 6.000000, 6.000000, 2.000000 +0.644000, 6.000000, 6.000000, 2.000000 +0.419000, 6.000000, 6.000000, 2.000000 +0.501000, 6.000000, 6.000000, 2.000000 +0.664000, 6.000000, 6.000000, 2.000000 +0.451000, 6.000000, 6.000000, 2.000000 +0.485000, 6.000000, 6.000000, 2.000000 +0.635000, 6.000000, 6.000000, 2.000000 +0.414000, 6.000000, 6.000000, 2.000000 +0.474000, 6.000000, 6.000000, 2.000000 +0.624000, 6.000000, 6.000000, 2.000000 +0.410000, 6.000000, 6.000000, 2.000000 +0.514000, 6.000000, 6.000000, 2.000000 +0.680000, 6.000000, 6.000000, 2.000000 +0.416000, 6.000000, 6.000000, 2.000000 +0.464000, 6.000000, 6.000000, 2.000000 +0.644000, 6.000000, 6.000000, 2.000000 +0.440000, 6.000000, 6.000000, 2.000000 +0.471000, 6.000000, 6.000000, 2.000000 +0.660000, 6.000000, 6.000000, 2.000000 +0.411000, 6.000000, 6.000000, 2.000000 +0.504000, 6.000000, 6.000000, 2.000000 +0.655000, 6.000000, 6.000000, 2.000000 +0.415000, 6.000000, 6.000000, 2.000000 +0.462000, 6.000000, 6.000000, 2.000000 +0.632000, 6.000000, 6.000000, 2.000000 +0.409000, 6.000000, 6.000000, 2.000000 +0.515000, 6.000000, 6.000000, 2.000000 +0.664000, 6.000000, 6.000000, 2.000000 +0.414000, 6.000000, 6.000000, 2.000000 +0.481000, 6.000000, 6.000000, 2.000000 +0.681000, 6.000000, 6.000000, 2.000000 +0.418000, 6.000000, 6.000000, 2.000000 +0.368000, 6.000000, 3.000000, 4.000000 +0.418000, 6.000000, 3.000000, 4.000000 +0.270000, 6.000000, 3.000000, 4.000000 +0.352000, 6.000000, 3.000000, 4.000000 +0.437000, 6.000000, 3.000000, 4.000000 +0.276000, 6.000000, 3.000000, 4.000000 +0.341000, 6.000000, 3.000000, 4.000000 +0.441000, 6.000000, 3.000000, 4.000000 +0.271000, 6.000000, 3.000000, 4.000000 +0.348000, 6.000000, 3.000000, 4.000000 +0.474000, 6.000000, 3.000000, 4.000000 +0.309000, 6.000000, 3.000000, 4.000000 +0.328000, 6.000000, 3.000000, 4.000000 +0.441000, 6.000000, 3.000000, 4.000000 +0.272000, 6.000000, 3.000000, 4.000000 +0.362000, 6.000000, 3.000000, 4.000000 +0.410000, 6.000000, 3.000000, 4.000000 +0.302000, 6.000000, 3.000000, 4.000000 +0.324000, 6.000000, 3.000000, 4.000000 +0.267000, 6.000000, 3.000000, 4.000000 +0.313000, 6.000000, 3.000000, 4.000000 +0.497000, 6.000000, 3.000000, 4.000000 +0.268000, 6.000000, 3.000000, 4.000000 +0.376000, 6.000000, 3.000000, 4.000000 +0.451000, 6.000000, 3.000000, 4.000000 +0.271000, 6.000000, 3.000000, 4.000000 +0.329000, 6.000000, 3.000000, 4.000000 +0.447000, 6.000000, 3.000000, 4.000000 +0.317000, 6.000000, 3.000000, 4.000000 +0.271000, 6.000000, 3.000000, 4.000000 +0.322000, 6.000000, 3.000000, 4.000000 +0.475000, 6.000000, 3.000000, 4.000000 +0.298000, 6.000000, 3.000000, 4.000000 +0.280000, 6.000000, 3.000000, 4.000000 +0.548000, 6.000000, 3.000000, 4.000000 +0.262000, 6.000000, 3.000000, 4.000000 +0.320000, 6.000000, 3.000000, 4.000000 +0.441000, 6.000000, 3.000000, 4.000000 +0.271000, 6.000000, 3.000000, 4.000000 +0.331000, 6.000000, 3.000000, 4.000000 +0.465000, 6.000000, 3.000000, 4.000000 +0.269000, 6.000000, 3.000000, 4.000000 +0.544000, 5.000000, 7.000000, 2.000000 +0.757000, 5.000000, 7.000000, 2.000000 +0.476000, 5.000000, 7.000000, 2.000000 +0.545000, 5.000000, 7.000000, 2.000000 +0.737000, 5.000000, 7.000000, 2.000000 +0.477000, 5.000000, 7.000000, 2.000000 +0.545000, 5.000000, 7.000000, 2.000000 +0.845000, 5.000000, 7.000000, 2.000000 +0.477000, 5.000000, 7.000000, 2.000000 +0.547000, 5.000000, 7.000000, 2.000000 +0.812000, 5.000000, 7.000000, 2.000000 +0.529000, 5.000000, 7.000000, 2.000000 +0.504000, 5.000000, 7.000000, 2.000000 +0.506000, 5.000000, 7.000000, 2.000000 +0.533000, 5.000000, 7.000000, 2.000000 +0.657000, 5.000000, 7.000000, 2.000000 +0.539000, 5.000000, 7.000000, 2.000000 +0.765000, 5.000000, 7.000000, 2.000000 +0.552000, 5.000000, 7.000000, 2.000000 +0.789000, 5.000000, 7.000000, 2.000000 +0.475000, 5.000000, 7.000000, 2.000000 +0.556000, 5.000000, 7.000000, 2.000000 +0.738000, 5.000000, 7.000000, 2.000000 +0.512000, 5.000000, 7.000000, 2.000000 +0.487000, 5.000000, 7.000000, 2.000000 +0.546000, 5.000000, 7.000000, 2.000000 +0.796000, 5.000000, 7.000000, 2.000000 +0.484000, 5.000000, 7.000000, 2.000000 +0.549000, 5.000000, 7.000000, 2.000000 +0.766000, 5.000000, 7.000000, 2.000000 +0.518000, 5.000000, 7.000000, 2.000000 +0.521000, 5.000000, 7.000000, 2.000000 +0.799000, 5.000000, 7.000000, 2.000000 +0.486000, 5.000000, 7.000000, 2.000000 +0.562000, 5.000000, 7.000000, 2.000000 +0.813000, 5.000000, 7.000000, 2.000000 +0.476000, 5.000000, 7.000000, 2.000000 +0.532000, 5.000000, 7.000000, 2.000000 +0.741000, 5.000000, 7.000000, 2.000000 +0.509000, 5.000000, 7.000000, 2.000000 +0.555000, 5.000000, 7.000000, 2.000000 +0.780000, 5.000000, 7.000000, 2.000000 +2.039000, 8.000000, 10.000000, 5.000000 +2.102000, 8.000000, 10.000000, 5.000000 +3.005000, 8.000000, 10.000000, 5.000000 +2.058000, 8.000000, 10.000000, 5.000000 +2.091000, 8.000000, 10.000000, 5.000000 +2.928000, 8.000000, 10.000000, 5.000000 +2.092000, 8.000000, 10.000000, 5.000000 +2.067000, 8.000000, 10.000000, 5.000000 +3.010000, 8.000000, 10.000000, 5.000000 +2.026000, 8.000000, 10.000000, 5.000000 +2.147000, 8.000000, 10.000000, 5.000000 +2.996000, 8.000000, 10.000000, 5.000000 +2.111000, 8.000000, 10.000000, 5.000000 +2.902000, 8.000000, 10.000000, 5.000000 +2.034000, 8.000000, 10.000000, 5.000000 +2.079000, 8.000000, 10.000000, 5.000000 +2.858000, 8.000000, 10.000000, 5.000000 +2.060000, 8.000000, 10.000000, 5.000000 +2.078000, 8.000000, 10.000000, 5.000000 +2.292000, 8.000000, 10.000000, 5.000000 +3.105000, 8.000000, 10.000000, 5.000000 +2.028000, 8.000000, 10.000000, 5.000000 +2.098000, 8.000000, 10.000000, 5.000000 +3.042000, 8.000000, 10.000000, 5.000000 +2.034000, 8.000000, 10.000000, 5.000000 +2.112000, 8.000000, 10.000000, 5.000000 +2.800000, 8.000000, 10.000000, 5.000000 +2.037000, 8.000000, 10.000000, 5.000000 +2.062000, 8.000000, 10.000000, 5.000000 +2.866000, 8.000000, 10.000000, 5.000000 +2.059000, 8.000000, 10.000000, 5.000000 +2.061000, 8.000000, 10.000000, 5.000000 +2.920000, 8.000000, 10.000000, 5.000000 +2.041000, 8.000000, 10.000000, 5.000000 +2.073000, 8.000000, 10.000000, 5.000000 +2.959000, 8.000000, 10.000000, 5.000000 +2.066000, 8.000000, 10.000000, 5.000000 +2.063000, 8.000000, 10.000000, 5.000000 +2.905000, 8.000000, 10.000000, 5.000000 +2.028000, 8.000000, 10.000000, 5.000000 +2.058000, 8.000000, 10.000000, 5.000000 +2.989000, 8.000000, 10.000000, 5.000000 +1.629000, 6.000000, 8.000000, 8.000000 +1.675000, 6.000000, 8.000000, 8.000000 +2.372000, 6.000000, 8.000000, 8.000000 +1.627000, 6.000000, 8.000000, 8.000000 +1.665000, 6.000000, 8.000000, 8.000000 +2.568000, 6.000000, 8.000000, 8.000000 +1.639000, 6.000000, 8.000000, 8.000000 +1.684000, 6.000000, 8.000000, 8.000000 +2.432000, 6.000000, 8.000000, 8.000000 +1.659000, 6.000000, 8.000000, 8.000000 +2.617000, 6.000000, 8.000000, 8.000000 +2.096000, 6.000000, 8.000000, 8.000000 +2.645000, 6.000000, 8.000000, 8.000000 +2.291000, 6.000000, 8.000000, 8.000000 +2.590000, 6.000000, 8.000000, 8.000000 +8.075000, 6.000000, 8.000000, 8.000000 +2.076000, 6.000000, 8.000000, 8.000000 +1.842000, 6.000000, 8.000000, 8.000000 +2.526000, 6.000000, 8.000000, 8.000000 +2.011000, 6.000000, 8.000000, 8.000000 +2.423000, 6.000000, 8.000000, 8.000000 +1.651000, 6.000000, 8.000000, 8.000000 +1.744000, 6.000000, 8.000000, 8.000000 +2.338000, 6.000000, 8.000000, 8.000000 +1.672000, 6.000000, 8.000000, 8.000000 +1.669000, 6.000000, 8.000000, 8.000000 +2.845000, 6.000000, 8.000000, 8.000000 +1.731000, 6.000000, 8.000000, 8.000000 +2.267000, 6.000000, 8.000000, 8.000000 +1.654000, 6.000000, 8.000000, 8.000000 +1.739000, 6.000000, 8.000000, 8.000000 +2.585000, 6.000000, 8.000000, 8.000000 +1.710000, 6.000000, 8.000000, 8.000000 +1.763000, 6.000000, 8.000000, 8.000000 +2.461000, 6.000000, 8.000000, 8.000000 +1.681000, 6.000000, 8.000000, 8.000000 +1.756000, 6.000000, 8.000000, 8.000000 +2.449000, 6.000000, 8.000000, 8.000000 +1.644000, 6.000000, 8.000000, 8.000000 +2.466000, 6.000000, 8.000000, 8.000000 +1.680000, 6.000000, 8.000000, 8.000000 +1.866000, 5.000000, 10.000000, 3.000000 +2.054000, 6.000000, 8.000000, 8.000000 +2.002000, 5.000000, 10.000000, 3.000000 +1.352000, 5.000000, 10.000000, 3.000000 +1.569000, 5.000000, 10.000000, 3.000000 +1.298000, 5.000000, 10.000000, 3.000000 +2.725000, 5.000000, 10.000000, 3.000000 +1.386000, 5.000000, 10.000000, 3.000000 +2.134000, 5.000000, 10.000000, 3.000000 +1.318000, 5.000000, 10.000000, 3.000000 +1.829000, 5.000000, 10.000000, 3.000000 +1.402000, 5.000000, 10.000000, 3.000000 +1.981000, 5.000000, 10.000000, 3.000000 +1.385000, 5.000000, 10.000000, 3.000000 +1.273000, 5.000000, 10.000000, 3.000000 +1.246000, 5.000000, 10.000000, 3.000000 +1.942000, 5.000000, 10.000000, 3.000000 +1.326000, 5.000000, 10.000000, 3.000000 +1.370000, 5.000000, 10.000000, 3.000000 +1.644000, 5.000000, 10.000000, 3.000000 +1.717000, 5.000000, 10.000000, 3.000000 +1.654000, 5.000000, 10.000000, 3.000000 +1.672000, 5.000000, 10.000000, 3.000000 +1.379000, 5.000000, 10.000000, 3.000000 +1.848000, 5.000000, 10.000000, 3.000000 +1.283000, 5.000000, 10.000000, 3.000000 +1.391000, 5.000000, 10.000000, 3.000000 +1.927000, 5.000000, 10.000000, 3.000000 +1.671000, 5.000000, 10.000000, 3.000000 +1.388000, 5.000000, 10.000000, 3.000000 +1.627000, 5.000000, 10.000000, 3.000000 +1.487000, 5.000000, 10.000000, 3.000000 +1.626000, 5.000000, 10.000000, 3.000000 +1.345000, 5.000000, 10.000000, 3.000000 +2.669000, 2.000000, 8.000000, 9.000000 +1.373000, 5.000000, 10.000000, 3.000000 +2.108000, 2.000000, 8.000000, 9.000000 +1.383000, 5.000000, 10.000000, 3.000000 +2.529000, 2.000000, 8.000000, 9.000000 +1.329000, 5.000000, 10.000000, 3.000000 +2.625000, 2.000000, 8.000000, 9.000000 +1.353000, 5.000000, 10.000000, 3.000000 +2.606000, 2.000000, 8.000000, 9.000000 +1.563000, 5.000000, 10.000000, 3.000000 +3.374000, 2.000000, 8.000000, 9.000000 +1.380000, 5.000000, 10.000000, 3.000000 +1.445000, 5.000000, 10.000000, 3.000000 +1.383000, 5.000000, 10.000000, 3.000000 +1.317000, 5.000000, 10.000000, 3.000000 +1.771000, 2.000000, 8.000000, 9.000000 +1.765000, 2.000000, 8.000000, 9.000000 +1.864000, 2.000000, 8.000000, 9.000000 +1.880000, 2.000000, 8.000000, 9.000000 +1.848000, 2.000000, 8.000000, 9.000000 +1.708000, 2.000000, 8.000000, 9.000000 +1.712000, 2.000000, 8.000000, 9.000000 +1.773000, 2.000000, 8.000000, 9.000000 +1.691000, 2.000000, 8.000000, 9.000000 +1.845000, 2.000000, 8.000000, 9.000000 +1.771000, 2.000000, 8.000000, 9.000000 +1.782000, 2.000000, 8.000000, 9.000000 +1.773000, 2.000000, 8.000000, 9.000000 +1.786000, 2.000000, 8.000000, 9.000000 +1.825000, 2.000000, 8.000000, 9.000000 +1.801000, 2.000000, 8.000000, 9.000000 +1.789000, 2.000000, 8.000000, 9.000000 +1.793000, 2.000000, 8.000000, 9.000000 +1.782000, 2.000000, 8.000000, 9.000000 +2.515000, 2.000000, 8.000000, 9.000000 +1.788000, 2.000000, 8.000000, 9.000000 +2.425000, 2.000000, 8.000000, 9.000000 +1.783000, 2.000000, 8.000000, 9.000000 +2.576000, 2.000000, 8.000000, 9.000000 +1.814000, 2.000000, 8.000000, 9.000000 +2.500000, 2.000000, 8.000000, 9.000000 +1.761000, 2.000000, 8.000000, 9.000000 +2.568000, 2.000000, 8.000000, 9.000000 +1.768000, 2.000000, 8.000000, 9.000000 +2.505000, 2.000000, 8.000000, 9.000000 +1.791000, 2.000000, 8.000000, 9.000000 +2.602000, 2.000000, 8.000000, 9.000000 +1.833000, 2.000000, 8.000000, 9.000000 +2.475000, 2.000000, 8.000000, 9.000000 +1.848000, 2.000000, 8.000000, 9.000000 +2.618000, 2.000000, 8.000000, 9.000000 +0.443000, 3.000000, 5.000000, 5.000000 +0.452000, 3.000000, 5.000000, 5.000000 +0.516000, 3.000000, 5.000000, 5.000000 +1.051000, 3.000000, 5.000000, 5.000000 +0.753000, 3.000000, 5.000000, 5.000000 +0.745000, 3.000000, 5.000000, 5.000000 +0.479000, 3.000000, 5.000000, 5.000000 +0.742000, 3.000000, 5.000000, 5.000000 +0.473000, 3.000000, 5.000000, 5.000000 +0.624000, 3.000000, 5.000000, 5.000000 +0.471000, 3.000000, 5.000000, 5.000000 +0.498000, 3.000000, 5.000000, 5.000000 +1.867000, 3.000000, 5.000000, 5.000000 +0.519000, 3.000000, 5.000000, 5.000000 +0.500000, 3.000000, 5.000000, 5.000000 +0.431000, 3.000000, 5.000000, 5.000000 +0.602000, 3.000000, 5.000000, 5.000000 +0.577000, 3.000000, 5.000000, 5.000000 +0.442000, 3.000000, 5.000000, 5.000000 +0.519000, 3.000000, 5.000000, 5.000000 +0.701000, 3.000000, 5.000000, 5.000000 +0.479000, 3.000000, 5.000000, 5.000000 +0.741000, 3.000000, 5.000000, 5.000000 +0.538000, 3.000000, 5.000000, 5.000000 +0.657000, 3.000000, 5.000000, 5.000000 +0.526000, 3.000000, 5.000000, 5.000000 +0.625000, 3.000000, 5.000000, 5.000000 +0.505000, 3.000000, 5.000000, 5.000000 +0.795000, 3.000000, 5.000000, 5.000000 +0.444000, 3.000000, 5.000000, 5.000000 +0.453000, 3.000000, 5.000000, 5.000000 +0.488000, 3.000000, 5.000000, 5.000000 +0.674000, 3.000000, 5.000000, 5.000000 +0.430000, 3.000000, 5.000000, 5.000000 +1.041000, 3.000000, 5.000000, 5.000000 +0.637000, 3.000000, 5.000000, 5.000000 +0.408000, 3.000000, 5.000000, 5.000000 +0.467000, 3.000000, 5.000000, 5.000000 +0.692000, 3.000000, 5.000000, 5.000000 +0.555000, 3.000000, 5.000000, 5.000000 +0.694000, 3.000000, 5.000000, 5.000000 +0.460000, 3.000000, 5.000000, 5.000000 +0.819000, 8.000000, 5.000000, 9.000000 +0.854000, 8.000000, 5.000000, 9.000000 +0.795000, 8.000000, 5.000000, 9.000000 +1.014000, 8.000000, 5.000000, 9.000000 +0.773000, 8.000000, 5.000000, 9.000000 +0.963000, 8.000000, 5.000000, 9.000000 +0.700000, 8.000000, 5.000000, 9.000000 +0.715000, 8.000000, 5.000000, 9.000000 +0.993000, 8.000000, 5.000000, 9.000000 +0.679000, 8.000000, 5.000000, 9.000000 +0.815000, 8.000000, 5.000000, 9.000000 +1.104000, 8.000000, 5.000000, 9.000000 +0.739000, 8.000000, 5.000000, 9.000000 +1.126000, 8.000000, 5.000000, 9.000000 +1.082000, 8.000000, 5.000000, 9.000000 +0.729000, 8.000000, 5.000000, 9.000000 +0.852000, 8.000000, 5.000000, 9.000000 +1.015000, 8.000000, 5.000000, 9.000000 +0.673000, 8.000000, 5.000000, 9.000000 +0.745000, 8.000000, 5.000000, 9.000000 +0.959000, 8.000000, 5.000000, 9.000000 +0.722000, 8.000000, 5.000000, 9.000000 +0.732000, 8.000000, 5.000000, 9.000000 +1.037000, 8.000000, 5.000000, 9.000000 +0.839000, 8.000000, 5.000000, 9.000000 +1.040000, 8.000000, 5.000000, 9.000000 +0.740000, 8.000000, 5.000000, 9.000000 +0.748000, 8.000000, 5.000000, 9.000000 +0.801000, 8.000000, 5.000000, 9.000000 +0.980000, 8.000000, 5.000000, 9.000000 +0.696000, 8.000000, 5.000000, 9.000000 +0.739000, 8.000000, 5.000000, 9.000000 +0.736000, 8.000000, 5.000000, 9.000000 +1.078000, 8.000000, 5.000000, 9.000000 +0.752000, 8.000000, 5.000000, 9.000000 +1.116000, 8.000000, 5.000000, 9.000000 +0.711000, 8.000000, 5.000000, 9.000000 +0.789000, 8.000000, 5.000000, 9.000000 +1.081000, 8.000000, 5.000000, 9.000000 +0.688000, 8.000000, 5.000000, 9.000000 +0.743000, 8.000000, 5.000000, 9.000000 +0.913000, 8.000000, 5.000000, 9.000000 +0.446000, 2.000000, 4.000000, 8.000000 +0.669000, 2.000000, 4.000000, 8.000000 +0.419000, 2.000000, 4.000000, 8.000000 +0.697000, 2.000000, 4.000000, 8.000000 +0.395000, 2.000000, 4.000000, 8.000000 +0.713000, 2.000000, 4.000000, 8.000000 +0.869000, 2.000000, 4.000000, 8.000000 +0.476000, 2.000000, 4.000000, 8.000000 +0.544000, 2.000000, 4.000000, 8.000000 +0.577000, 2.000000, 4.000000, 8.000000 +0.434000, 2.000000, 4.000000, 8.000000 +0.498000, 2.000000, 4.000000, 8.000000 +0.579000, 2.000000, 4.000000, 8.000000 +0.383000, 2.000000, 4.000000, 8.000000 +0.462000, 2.000000, 4.000000, 8.000000 +0.388000, 2.000000, 4.000000, 8.000000 +0.582000, 2.000000, 4.000000, 8.000000 +0.373000, 2.000000, 4.000000, 8.000000 +0.463000, 2.000000, 4.000000, 8.000000 +0.564000, 2.000000, 4.000000, 8.000000 +0.431000, 2.000000, 4.000000, 8.000000 +0.447000, 2.000000, 4.000000, 8.000000 +0.681000, 2.000000, 4.000000, 8.000000 +0.390000, 2.000000, 4.000000, 8.000000 +0.423000, 2.000000, 4.000000, 8.000000 +0.696000, 2.000000, 4.000000, 8.000000 +0.712000, 2.000000, 4.000000, 8.000000 +0.421000, 2.000000, 4.000000, 8.000000 +0.574000, 2.000000, 4.000000, 8.000000 +0.596000, 2.000000, 4.000000, 8.000000 +0.715000, 2.000000, 4.000000, 8.000000 +0.565000, 2.000000, 4.000000, 8.000000 +0.441000, 2.000000, 4.000000, 8.000000 +0.562000, 2.000000, 4.000000, 8.000000 +0.376000, 2.000000, 4.000000, 8.000000 +0.400000, 2.000000, 4.000000, 8.000000 +0.574000, 2.000000, 4.000000, 8.000000 +0.433000, 2.000000, 4.000000, 8.000000 +0.423000, 2.000000, 4.000000, 8.000000 +0.697000, 2.000000, 4.000000, 8.000000 +0.413000, 2.000000, 4.000000, 8.000000 +0.371000, 2.000000, 4.000000, 8.000000 +1.093000, 2.000000, 7.000000, 8.000000 +1.085000, 2.000000, 7.000000, 8.000000 +1.099000, 2.000000, 7.000000, 8.000000 +1.746000, 2.000000, 7.000000, 8.000000 +1.086000, 2.000000, 7.000000, 8.000000 +1.123000, 2.000000, 7.000000, 8.000000 +1.677000, 2.000000, 7.000000, 8.000000 +1.089000, 2.000000, 7.000000, 8.000000 +1.081000, 2.000000, 7.000000, 8.000000 +1.078000, 2.000000, 7.000000, 8.000000 +1.142000, 2.000000, 7.000000, 8.000000 +1.621000, 2.000000, 7.000000, 8.000000 +1.155000, 2.000000, 7.000000, 8.000000 +1.696000, 2.000000, 7.000000, 8.000000 +1.106000, 2.000000, 7.000000, 8.000000 +1.415000, 2.000000, 7.000000, 8.000000 +1.617000, 2.000000, 7.000000, 8.000000 +1.512000, 2.000000, 7.000000, 8.000000 +1.485000, 2.000000, 7.000000, 8.000000 +1.483000, 2.000000, 7.000000, 8.000000 +1.614000, 2.000000, 7.000000, 8.000000 +1.354000, 2.000000, 7.000000, 8.000000 +1.726000, 2.000000, 7.000000, 8.000000 +1.400000, 2.000000, 7.000000, 8.000000 +1.220000, 2.000000, 7.000000, 8.000000 +1.757000, 2.000000, 7.000000, 8.000000 +1.251000, 2.000000, 7.000000, 8.000000 +1.289000, 2.000000, 7.000000, 8.000000 +1.612000, 2.000000, 7.000000, 8.000000 +1.190000, 2.000000, 7.000000, 8.000000 +1.480000, 2.000000, 7.000000, 8.000000 +1.597000, 2.000000, 7.000000, 8.000000 +1.380000, 2.000000, 7.000000, 8.000000 +1.567000, 2.000000, 7.000000, 8.000000 +1.693000, 2.000000, 7.000000, 8.000000 +1.563000, 2.000000, 7.000000, 8.000000 +1.742000, 2.000000, 7.000000, 8.000000 +1.662000, 2.000000, 7.000000, 8.000000 +1.201000, 2.000000, 7.000000, 8.000000 +1.433000, 2.000000, 7.000000, 8.000000 +1.669000, 2.000000, 7.000000, 8.000000 +1.411000, 2.000000, 7.000000, 8.000000 +0.420000, 7.000000, 2.000000, 9.000000 +0.436000, 7.000000, 2.000000, 9.000000 +0.345000, 7.000000, 2.000000, 9.000000 +0.546000, 7.000000, 2.000000, 9.000000 +0.436000, 7.000000, 2.000000, 9.000000 +0.462000, 7.000000, 2.000000, 9.000000 +0.369000, 7.000000, 2.000000, 9.000000 +0.442000, 7.000000, 2.000000, 9.000000 +0.334000, 7.000000, 2.000000, 9.000000 +0.450000, 7.000000, 2.000000, 9.000000 +0.425000, 7.000000, 2.000000, 9.000000 +0.463000, 7.000000, 2.000000, 9.000000 +0.456000, 7.000000, 2.000000, 9.000000 +0.494000, 7.000000, 2.000000, 9.000000 +0.535000, 7.000000, 2.000000, 9.000000 +0.406000, 7.000000, 2.000000, 9.000000 +0.450000, 7.000000, 2.000000, 9.000000 +0.458000, 7.000000, 2.000000, 9.000000 +0.703000, 7.000000, 2.000000, 9.000000 +0.453000, 7.000000, 2.000000, 9.000000 +0.521000, 7.000000, 2.000000, 9.000000 +0.568000, 7.000000, 2.000000, 9.000000 +0.395000, 7.000000, 2.000000, 9.000000 +0.511000, 7.000000, 2.000000, 9.000000 +0.467000, 7.000000, 2.000000, 9.000000 +0.493000, 7.000000, 2.000000, 9.000000 +0.377000, 7.000000, 2.000000, 9.000000 +0.427000, 7.000000, 2.000000, 9.000000 +0.534000, 7.000000, 2.000000, 9.000000 +0.409000, 7.000000, 2.000000, 9.000000 +0.385000, 7.000000, 2.000000, 9.000000 +0.511000, 7.000000, 2.000000, 9.000000 +0.444000, 7.000000, 2.000000, 9.000000 +0.491000, 7.000000, 2.000000, 9.000000 +0.407000, 7.000000, 2.000000, 9.000000 +0.360000, 7.000000, 2.000000, 9.000000 +0.444000, 7.000000, 2.000000, 9.000000 +0.501000, 7.000000, 2.000000, 9.000000 +0.403000, 7.000000, 2.000000, 9.000000 +0.428000, 7.000000, 2.000000, 9.000000 +0.419000, 7.000000, 2.000000, 9.000000 +0.342000, 7.000000, 2.000000, 9.000000 +0.955000, 7.000000, 8.000000, 2.000000 +1.092000, 7.000000, 8.000000, 2.000000 +0.838000, 7.000000, 8.000000, 2.000000 +0.957000, 7.000000, 8.000000, 2.000000 +0.964000, 7.000000, 8.000000, 2.000000 +1.008000, 7.000000, 8.000000, 2.000000 +1.092000, 7.000000, 8.000000, 2.000000 +0.967000, 7.000000, 8.000000, 2.000000 +1.118000, 7.000000, 8.000000, 2.000000 +1.030000, 7.000000, 8.000000, 2.000000 +1.059000, 7.000000, 8.000000, 2.000000 +1.173000, 7.000000, 8.000000, 2.000000 +0.964000, 7.000000, 8.000000, 2.000000 +1.232000, 7.000000, 8.000000, 2.000000 +1.165000, 7.000000, 8.000000, 2.000000 +1.073000, 7.000000, 8.000000, 2.000000 +1.187000, 7.000000, 8.000000, 2.000000 +1.045000, 7.000000, 8.000000, 2.000000 +1.104000, 7.000000, 8.000000, 2.000000 +1.196000, 7.000000, 8.000000, 2.000000 +0.996000, 7.000000, 8.000000, 2.000000 +1.128000, 7.000000, 8.000000, 2.000000 +1.018000, 7.000000, 8.000000, 2.000000 +1.007000, 7.000000, 8.000000, 2.000000 +1.048000, 7.000000, 8.000000, 2.000000 +0.911000, 7.000000, 8.000000, 2.000000 +0.966000, 7.000000, 8.000000, 2.000000 +1.066000, 7.000000, 8.000000, 2.000000 +0.961000, 7.000000, 8.000000, 2.000000 +1.014000, 7.000000, 8.000000, 2.000000 +1.030000, 7.000000, 8.000000, 2.000000 +1.008000, 7.000000, 8.000000, 2.000000 +0.953000, 7.000000, 8.000000, 2.000000 +0.885000, 7.000000, 8.000000, 2.000000 +1.052000, 7.000000, 8.000000, 2.000000 +1.050000, 7.000000, 8.000000, 2.000000 +0.986000, 7.000000, 8.000000, 2.000000 +0.988000, 7.000000, 8.000000, 2.000000 +1.309000, 7.000000, 8.000000, 2.000000 +1.384000, 7.000000, 8.000000, 2.000000 +1.036000, 7.000000, 8.000000, 2.000000 +1.032000, 7.000000, 8.000000, 2.000000 +1.398000, 3.000000, 7.000000, 6.000000 +1.322000, 3.000000, 7.000000, 6.000000 +1.450000, 3.000000, 7.000000, 6.000000 +1.361000, 3.000000, 7.000000, 6.000000 +1.249000, 3.000000, 7.000000, 6.000000 +1.399000, 3.000000, 7.000000, 6.000000 +1.049000, 3.000000, 7.000000, 6.000000 +1.308000, 3.000000, 7.000000, 6.000000 +1.408000, 3.000000, 7.000000, 6.000000 +1.226000, 3.000000, 7.000000, 6.000000 +1.345000, 3.000000, 7.000000, 6.000000 +1.387000, 3.000000, 7.000000, 6.000000 +1.372000, 3.000000, 7.000000, 6.000000 +1.408000, 3.000000, 7.000000, 6.000000 +1.351000, 3.000000, 7.000000, 6.000000 +1.330000, 3.000000, 7.000000, 6.000000 +1.350000, 3.000000, 7.000000, 6.000000 +1.221000, 3.000000, 7.000000, 6.000000 +1.314000, 3.000000, 7.000000, 6.000000 +1.285000, 3.000000, 7.000000, 6.000000 +1.418000, 3.000000, 7.000000, 6.000000 +1.576000, 3.000000, 7.000000, 6.000000 +1.321000, 3.000000, 7.000000, 6.000000 +1.422000, 3.000000, 7.000000, 6.000000 +1.424000, 3.000000, 7.000000, 6.000000 +1.245000, 3.000000, 7.000000, 6.000000 +1.409000, 3.000000, 7.000000, 6.000000 +1.401000, 3.000000, 7.000000, 6.000000 +1.299000, 3.000000, 7.000000, 6.000000 +1.473000, 3.000000, 7.000000, 6.000000 +1.409000, 3.000000, 7.000000, 6.000000 +1.212000, 3.000000, 7.000000, 6.000000 +1.338000, 3.000000, 7.000000, 6.000000 +1.372000, 3.000000, 7.000000, 6.000000 +1.149000, 3.000000, 7.000000, 6.000000 +1.430000, 3.000000, 7.000000, 6.000000 +1.233000, 3.000000, 7.000000, 6.000000 +1.377000, 3.000000, 7.000000, 6.000000 +1.357000, 3.000000, 7.000000, 6.000000 +1.348000, 3.000000, 7.000000, 6.000000 +0.955000, 3.000000, 7.000000, 6.000000 +1.098000, 3.000000, 7.000000, 6.000000 +0.465000, 7.000000, 4.000000, 1.000000 +0.386000, 7.000000, 4.000000, 1.000000 +0.549000, 7.000000, 4.000000, 1.000000 +0.466000, 7.000000, 4.000000, 1.000000 +0.451000, 7.000000, 4.000000, 1.000000 +0.455000, 7.000000, 4.000000, 1.000000 +0.467000, 7.000000, 4.000000, 1.000000 +0.533000, 7.000000, 4.000000, 1.000000 +0.480000, 7.000000, 4.000000, 1.000000 +0.449000, 7.000000, 4.000000, 1.000000 +0.490000, 7.000000, 4.000000, 1.000000 +0.468000, 7.000000, 4.000000, 1.000000 +0.446000, 7.000000, 4.000000, 1.000000 +0.514000, 7.000000, 4.000000, 1.000000 +0.488000, 7.000000, 4.000000, 1.000000 +0.464000, 7.000000, 4.000000, 1.000000 +0.497000, 7.000000, 4.000000, 1.000000 +0.480000, 7.000000, 4.000000, 1.000000 +0.447000, 7.000000, 4.000000, 1.000000 +0.521000, 7.000000, 4.000000, 1.000000 +0.464000, 7.000000, 4.000000, 1.000000 +0.496000, 7.000000, 4.000000, 1.000000 +0.483000, 7.000000, 4.000000, 1.000000 +0.429000, 7.000000, 4.000000, 1.000000 +0.446000, 7.000000, 4.000000, 1.000000 +0.449000, 7.000000, 4.000000, 1.000000 +0.475000, 7.000000, 4.000000, 1.000000 +0.470000, 7.000000, 4.000000, 1.000000 +0.466000, 7.000000, 4.000000, 1.000000 +0.436000, 7.000000, 4.000000, 1.000000 +0.560000, 7.000000, 4.000000, 1.000000 +0.428000, 7.000000, 4.000000, 1.000000 +0.457000, 7.000000, 4.000000, 1.000000 +0.443000, 7.000000, 4.000000, 1.000000 +0.423000, 7.000000, 4.000000, 1.000000 +0.453000, 7.000000, 4.000000, 1.000000 +0.499000, 7.000000, 4.000000, 1.000000 +0.491000, 7.000000, 4.000000, 1.000000 +0.413000, 7.000000, 4.000000, 1.000000 +0.433000, 7.000000, 4.000000, 1.000000 +0.440000, 7.000000, 4.000000, 1.000000 +0.447000, 7.000000, 4.000000, 1.000000 +1.178000, 3.000000, 5.000000, 9.000000 +1.125000, 3.000000, 5.000000, 9.000000 +1.165000, 3.000000, 5.000000, 9.000000 +1.082000, 3.000000, 5.000000, 9.000000 +0.954000, 3.000000, 5.000000, 9.000000 +0.876000, 3.000000, 5.000000, 9.000000 +0.923000, 3.000000, 5.000000, 9.000000 +1.087000, 3.000000, 5.000000, 9.000000 +1.120000, 3.000000, 5.000000, 9.000000 +1.068000, 3.000000, 5.000000, 9.000000 +0.697000, 3.000000, 5.000000, 9.000000 +1.114000, 3.000000, 5.000000, 9.000000 +0.809000, 3.000000, 5.000000, 9.000000 +1.085000, 3.000000, 5.000000, 9.000000 +0.685000, 3.000000, 5.000000, 9.000000 +0.814000, 3.000000, 5.000000, 9.000000 +1.071000, 3.000000, 5.000000, 9.000000 +0.717000, 3.000000, 5.000000, 9.000000 +0.669000, 3.000000, 5.000000, 9.000000 +0.720000, 3.000000, 5.000000, 9.000000 +1.067000, 3.000000, 5.000000, 9.000000 +1.208000, 3.000000, 5.000000, 9.000000 +1.135000, 3.000000, 5.000000, 9.000000 +1.076000, 3.000000, 5.000000, 9.000000 +0.897000, 3.000000, 5.000000, 9.000000 +1.085000, 3.000000, 5.000000, 9.000000 +0.884000, 3.000000, 5.000000, 9.000000 +1.121000, 3.000000, 5.000000, 9.000000 +1.141000, 3.000000, 5.000000, 9.000000 +1.092000, 3.000000, 5.000000, 9.000000 +0.866000, 3.000000, 5.000000, 9.000000 +0.746000, 3.000000, 5.000000, 9.000000 +1.057000, 3.000000, 5.000000, 9.000000 +1.127000, 3.000000, 5.000000, 9.000000 +1.072000, 3.000000, 5.000000, 9.000000 +0.816000, 3.000000, 5.000000, 9.000000 +1.061000, 3.000000, 5.000000, 9.000000 +1.050000, 3.000000, 5.000000, 9.000000 +1.076000, 3.000000, 5.000000, 9.000000 +1.100000, 3.000000, 5.000000, 9.000000 +0.934000, 3.000000, 5.000000, 9.000000 +0.914000, 3.000000, 5.000000, 9.000000 +0.459000, 3.000000, 5.000000, 2.000000 +0.520000, 3.000000, 5.000000, 2.000000 +0.487000, 3.000000, 5.000000, 2.000000 +0.495000, 3.000000, 5.000000, 2.000000 +0.482000, 3.000000, 5.000000, 2.000000 +0.503000, 3.000000, 5.000000, 2.000000 +0.488000, 3.000000, 5.000000, 2.000000 +0.562000, 3.000000, 5.000000, 2.000000 +0.503000, 3.000000, 5.000000, 2.000000 +0.496000, 3.000000, 5.000000, 2.000000 +0.570000, 3.000000, 5.000000, 2.000000 +0.502000, 3.000000, 5.000000, 2.000000 +0.488000, 3.000000, 5.000000, 2.000000 +0.608000, 3.000000, 5.000000, 2.000000 +0.471000, 3.000000, 5.000000, 2.000000 +0.602000, 3.000000, 5.000000, 2.000000 +0.542000, 3.000000, 5.000000, 2.000000 +0.484000, 3.000000, 5.000000, 2.000000 +0.417000, 3.000000, 5.000000, 2.000000 +0.506000, 3.000000, 5.000000, 2.000000 +0.465000, 3.000000, 5.000000, 2.000000 +0.591000, 3.000000, 5.000000, 2.000000 +0.561000, 3.000000, 5.000000, 2.000000 +0.515000, 3.000000, 5.000000, 2.000000 +0.479000, 3.000000, 5.000000, 2.000000 +0.505000, 3.000000, 5.000000, 2.000000 +0.469000, 3.000000, 5.000000, 2.000000 +0.347000, 3.000000, 5.000000, 2.000000 +0.581000, 3.000000, 5.000000, 2.000000 +0.457000, 3.000000, 5.000000, 2.000000 +0.466000, 3.000000, 5.000000, 2.000000 +0.542000, 3.000000, 5.000000, 2.000000 +0.487000, 3.000000, 5.000000, 2.000000 +0.366000, 3.000000, 5.000000, 2.000000 +0.436000, 3.000000, 5.000000, 2.000000 +0.482000, 3.000000, 5.000000, 2.000000 +0.329000, 3.000000, 5.000000, 2.000000 +0.441000, 3.000000, 5.000000, 2.000000 +0.453000, 3.000000, 5.000000, 2.000000 +0.410000, 3.000000, 5.000000, 2.000000 +0.448000, 3.000000, 5.000000, 2.000000 +0.442000, 3.000000, 5.000000, 2.000000 +1.679000, 9.000000, 7.000000, 10.000000 +2.330000, 9.000000, 7.000000, 10.000000 +1.743000, 9.000000, 7.000000, 10.000000 +1.731000, 9.000000, 7.000000, 10.000000 +2.290000, 9.000000, 7.000000, 10.000000 +1.796000, 9.000000, 7.000000, 10.000000 +1.707000, 9.000000, 7.000000, 10.000000 +2.153000, 9.000000, 7.000000, 10.000000 +1.701000, 9.000000, 7.000000, 10.000000 +2.221000, 9.000000, 7.000000, 10.000000 +2.096000, 9.000000, 7.000000, 10.000000 +2.336000, 9.000000, 7.000000, 10.000000 +2.146000, 9.000000, 7.000000, 10.000000 +1.731000, 9.000000, 7.000000, 10.000000 +2.210000, 9.000000, 7.000000, 10.000000 +1.650000, 9.000000, 7.000000, 10.000000 +2.422000, 9.000000, 7.000000, 10.000000 +1.920000, 9.000000, 7.000000, 10.000000 +2.336000, 9.000000, 7.000000, 10.000000 +2.095000, 9.000000, 7.000000, 10.000000 +2.173000, 9.000000, 7.000000, 10.000000 +2.138000, 9.000000, 7.000000, 10.000000 +2.020000, 9.000000, 7.000000, 10.000000 +1.945000, 9.000000, 7.000000, 10.000000 +2.239000, 9.000000, 7.000000, 10.000000 +1.865000, 9.000000, 7.000000, 10.000000 +2.184000, 9.000000, 7.000000, 10.000000 +2.116000, 9.000000, 7.000000, 10.000000 +2.211000, 9.000000, 7.000000, 10.000000 +1.847000, 9.000000, 7.000000, 10.000000 +2.030000, 9.000000, 7.000000, 10.000000 +2.346000, 9.000000, 7.000000, 10.000000 +2.497000, 9.000000, 7.000000, 10.000000 +2.510000, 9.000000, 7.000000, 10.000000 +2.294000, 9.000000, 7.000000, 10.000000 +2.353000, 9.000000, 7.000000, 10.000000 +2.517000, 9.000000, 7.000000, 10.000000 +2.307000, 9.000000, 7.000000, 10.000000 +2.259000, 9.000000, 7.000000, 10.000000 +2.089000, 9.000000, 7.000000, 10.000000 +2.190000, 9.000000, 7.000000, 10.000000 +1.871000, 9.000000, 7.000000, 10.000000 +2.536000, 1.000000, 9.000000, 6.000000 +1.880000, 1.000000, 9.000000, 6.000000 +2.211000, 1.000000, 9.000000, 6.000000 +1.821000, 1.000000, 9.000000, 6.000000 +1.792000, 1.000000, 9.000000, 6.000000 +2.414000, 1.000000, 9.000000, 6.000000 +1.771000, 1.000000, 9.000000, 6.000000 +1.792000, 1.000000, 9.000000, 6.000000 +2.404000, 1.000000, 9.000000, 6.000000 +1.795000, 1.000000, 9.000000, 6.000000 +2.383000, 1.000000, 9.000000, 6.000000 +1.917000, 1.000000, 9.000000, 6.000000 +2.274000, 1.000000, 9.000000, 6.000000 +2.344000, 1.000000, 9.000000, 6.000000 +1.592000, 1.000000, 9.000000, 6.000000 +1.628000, 1.000000, 9.000000, 6.000000 +2.399000, 1.000000, 9.000000, 6.000000 +1.657000, 1.000000, 9.000000, 6.000000 +2.356000, 1.000000, 9.000000, 6.000000 +1.657000, 1.000000, 9.000000, 6.000000 +2.552000, 1.000000, 9.000000, 6.000000 +1.636000, 1.000000, 9.000000, 6.000000 +2.389000, 1.000000, 9.000000, 6.000000 +1.618000, 1.000000, 9.000000, 6.000000 +1.653000, 1.000000, 9.000000, 6.000000 +2.476000, 1.000000, 9.000000, 6.000000 +1.598000, 1.000000, 9.000000, 6.000000 +1.647000, 1.000000, 9.000000, 6.000000 +2.460000, 1.000000, 9.000000, 6.000000 +1.593000, 1.000000, 9.000000, 6.000000 +1.650000, 1.000000, 9.000000, 6.000000 +2.284000, 1.000000, 9.000000, 6.000000 +1.642000, 1.000000, 9.000000, 6.000000 +1.621000, 1.000000, 9.000000, 6.000000 +2.441000, 1.000000, 9.000000, 6.000000 +1.637000, 1.000000, 9.000000, 6.000000 +2.473000, 1.000000, 9.000000, 6.000000 +1.602000, 1.000000, 9.000000, 6.000000 +1.646000, 1.000000, 9.000000, 6.000000 +2.284000, 1.000000, 9.000000, 6.000000 +1.614000, 1.000000, 9.000000, 6.000000 +1.620000, 1.000000, 9.000000, 6.000000 +0.241000, 6.000000, 1.000000, 6.000000 +0.372000, 6.000000, 1.000000, 6.000000 +0.202000, 6.000000, 1.000000, 6.000000 +0.222000, 6.000000, 1.000000, 6.000000 +0.341000, 6.000000, 1.000000, 6.000000 +0.170000, 6.000000, 1.000000, 6.000000 +0.227000, 6.000000, 1.000000, 6.000000 +0.338000, 6.000000, 1.000000, 6.000000 +0.179000, 6.000000, 1.000000, 6.000000 +0.227000, 6.000000, 1.000000, 6.000000 +0.399000, 6.000000, 1.000000, 6.000000 +0.189000, 6.000000, 1.000000, 6.000000 +0.196000, 6.000000, 1.000000, 6.000000 +0.191000, 6.000000, 1.000000, 6.000000 +0.213000, 6.000000, 1.000000, 6.000000 +0.282000, 6.000000, 1.000000, 6.000000 +0.362000, 6.000000, 1.000000, 6.000000 +0.350000, 6.000000, 1.000000, 6.000000 +0.171000, 6.000000, 1.000000, 6.000000 +0.234000, 6.000000, 1.000000, 6.000000 +0.340000, 6.000000, 1.000000, 6.000000 +0.209000, 6.000000, 1.000000, 6.000000 +0.226000, 6.000000, 1.000000, 6.000000 +0.340000, 6.000000, 1.000000, 6.000000 +0.241000, 6.000000, 1.000000, 6.000000 +0.313000, 6.000000, 1.000000, 6.000000 +0.202000, 6.000000, 1.000000, 6.000000 +0.235000, 6.000000, 1.000000, 6.000000 +0.472000, 6.000000, 1.000000, 6.000000 +0.170000, 6.000000, 1.000000, 6.000000 +0.228000, 6.000000, 1.000000, 6.000000 +0.357000, 6.000000, 1.000000, 6.000000 +0.189000, 6.000000, 1.000000, 6.000000 +0.189000, 6.000000, 1.000000, 6.000000 +0.211000, 6.000000, 1.000000, 6.000000 +0.189000, 6.000000, 1.000000, 6.000000 +0.224000, 6.000000, 1.000000, 6.000000 +0.321000, 6.000000, 1.000000, 6.000000 +0.185000, 6.000000, 1.000000, 6.000000 +0.226000, 6.000000, 1.000000, 6.000000 +0.347000, 6.000000, 1.000000, 6.000000 +0.217000, 6.000000, 1.000000, 6.000000 +4.750000, 6.000000, 10.000000, 9.000000 +3.281000, 6.000000, 10.000000, 9.000000 +5.009000, 6.000000, 10.000000, 9.000000 +3.320000, 6.000000, 10.000000, 9.000000 +3.246000, 6.000000, 10.000000, 9.000000 +5.181000, 6.000000, 10.000000, 9.000000 +3.264000, 6.000000, 10.000000, 9.000000 +4.548000, 6.000000, 10.000000, 9.000000 +3.266000, 6.000000, 10.000000, 9.000000 +4.732000, 6.000000, 10.000000, 9.000000 +3.270000, 6.000000, 10.000000, 9.000000 +5.188000, 6.000000, 10.000000, 9.000000 +3.290000, 6.000000, 10.000000, 9.000000 +4.882000, 6.000000, 10.000000, 9.000000 +3.293000, 6.000000, 10.000000, 9.000000 +5.173000, 6.000000, 10.000000, 9.000000 +3.300000, 6.000000, 10.000000, 9.000000 +3.258000, 6.000000, 10.000000, 9.000000 +5.130000, 6.000000, 10.000000, 9.000000 +3.247000, 6.000000, 10.000000, 9.000000 +3.304000, 6.000000, 10.000000, 9.000000 +4.678000, 6.000000, 10.000000, 9.000000 +3.281000, 6.000000, 10.000000, 9.000000 +4.766000, 6.000000, 10.000000, 9.000000 +3.298000, 6.000000, 10.000000, 9.000000 +3.287000, 6.000000, 10.000000, 9.000000 +5.051000, 6.000000, 10.000000, 9.000000 +3.280000, 6.000000, 10.000000, 9.000000 +5.257000, 6.000000, 10.000000, 9.000000 +3.289000, 6.000000, 10.000000, 9.000000 +4.834000, 6.000000, 10.000000, 9.000000 +3.299000, 6.000000, 10.000000, 9.000000 +4.629000, 6.000000, 10.000000, 9.000000 +3.265000, 6.000000, 10.000000, 9.000000 +4.915000, 6.000000, 10.000000, 9.000000 +3.256000, 6.000000, 10.000000, 9.000000 +3.282000, 6.000000, 10.000000, 9.000000 +5.059000, 6.000000, 10.000000, 9.000000 +3.250000, 6.000000, 10.000000, 9.000000 +3.281000, 6.000000, 10.000000, 9.000000 +5.043000, 6.000000, 10.000000, 9.000000 +3.236000, 6.000000, 10.000000, 9.000000 +0.688000, 5.000000, 6.000000, 6.000000 +1.074000, 5.000000, 6.000000, 6.000000 +0.661000, 5.000000, 6.000000, 6.000000 +0.691000, 5.000000, 6.000000, 6.000000 +0.692000, 5.000000, 6.000000, 6.000000 +1.025000, 5.000000, 6.000000, 6.000000 +0.687000, 5.000000, 6.000000, 6.000000 +0.646000, 5.000000, 6.000000, 6.000000 +0.700000, 5.000000, 6.000000, 6.000000 +1.062000, 5.000000, 6.000000, 6.000000 +0.646000, 5.000000, 6.000000, 6.000000 +0.714000, 5.000000, 6.000000, 6.000000 +1.039000, 5.000000, 6.000000, 6.000000 +0.692000, 5.000000, 6.000000, 6.000000 +0.679000, 5.000000, 6.000000, 6.000000 +0.688000, 5.000000, 6.000000, 6.000000 +0.682000, 5.000000, 6.000000, 6.000000 +0.706000, 5.000000, 6.000000, 6.000000 +1.078000, 5.000000, 6.000000, 6.000000 +0.647000, 5.000000, 6.000000, 6.000000 +0.689000, 5.000000, 6.000000, 6.000000 +1.067000, 5.000000, 6.000000, 6.000000 +0.649000, 5.000000, 6.000000, 6.000000 +0.711000, 5.000000, 6.000000, 6.000000 +1.126000, 5.000000, 6.000000, 6.000000 +0.682000, 5.000000, 6.000000, 6.000000 +0.702000, 5.000000, 6.000000, 6.000000 +1.054000, 5.000000, 6.000000, 6.000000 +0.727000, 5.000000, 6.000000, 6.000000 +0.973000, 5.000000, 6.000000, 6.000000 +0.670000, 5.000000, 6.000000, 6.000000 +0.661000, 5.000000, 6.000000, 6.000000 +0.687000, 5.000000, 6.000000, 6.000000 +1.057000, 5.000000, 6.000000, 6.000000 +0.661000, 5.000000, 6.000000, 6.000000 +0.694000, 5.000000, 6.000000, 6.000000 +1.055000, 5.000000, 6.000000, 6.000000 +0.674000, 5.000000, 6.000000, 6.000000 +0.649000, 5.000000, 6.000000, 6.000000 +0.646000, 5.000000, 6.000000, 6.000000 +0.714000, 5.000000, 6.000000, 6.000000 +1.044000, 5.000000, 6.000000, 6.000000 +0.758000, 10.000000, 5.000000, 10.000000 +0.811000, 10.000000, 5.000000, 10.000000 +1.226000, 10.000000, 5.000000, 10.000000 +0.750000, 10.000000, 5.000000, 10.000000 +0.807000, 10.000000, 5.000000, 10.000000 +1.208000, 10.000000, 5.000000, 10.000000 +0.780000, 10.000000, 5.000000, 10.000000 +0.805000, 10.000000, 5.000000, 10.000000 +1.215000, 10.000000, 5.000000, 10.000000 +0.748000, 10.000000, 5.000000, 10.000000 +0.748000, 10.000000, 5.000000, 10.000000 +0.795000, 10.000000, 5.000000, 10.000000 +1.232000, 10.000000, 5.000000, 10.000000 +0.750000, 10.000000, 5.000000, 10.000000 +0.813000, 10.000000, 5.000000, 10.000000 +1.232000, 10.000000, 5.000000, 10.000000 +0.769000, 10.000000, 5.000000, 10.000000 +0.797000, 10.000000, 5.000000, 10.000000 +1.176000, 10.000000, 5.000000, 10.000000 +0.766000, 10.000000, 5.000000, 10.000000 +0.794000, 10.000000, 5.000000, 10.000000 +1.220000, 10.000000, 5.000000, 10.000000 +0.753000, 10.000000, 5.000000, 10.000000 +0.764000, 10.000000, 5.000000, 10.000000 +0.769000, 10.000000, 5.000000, 10.000000 +0.804000, 10.000000, 5.000000, 10.000000 +1.182000, 10.000000, 5.000000, 10.000000 +0.770000, 10.000000, 5.000000, 10.000000 +0.790000, 10.000000, 5.000000, 10.000000 +0.770000, 10.000000, 5.000000, 10.000000 +0.795000, 10.000000, 5.000000, 10.000000 +1.129000, 10.000000, 5.000000, 10.000000 +0.752000, 10.000000, 5.000000, 10.000000 +0.748000, 10.000000, 5.000000, 10.000000 +0.789000, 10.000000, 5.000000, 10.000000 +1.198000, 10.000000, 5.000000, 10.000000 +0.762000, 10.000000, 5.000000, 10.000000 +0.795000, 10.000000, 5.000000, 10.000000 +1.226000, 10.000000, 5.000000, 10.000000 +0.803000, 10.000000, 5.000000, 10.000000 +1.254000, 10.000000, 5.000000, 10.000000 +0.766000, 10.000000, 5.000000, 10.000000 +0.298000, 4.000000, 4.000000, 3.000000 +0.455000, 4.000000, 4.000000, 3.000000 +0.291000, 4.000000, 4.000000, 3.000000 +0.311000, 4.000000, 4.000000, 3.000000 +0.490000, 4.000000, 4.000000, 3.000000 +0.314000, 4.000000, 4.000000, 3.000000 +0.331000, 4.000000, 4.000000, 3.000000 +0.480000, 4.000000, 4.000000, 3.000000 +0.476000, 4.000000, 4.000000, 3.000000 +0.432000, 4.000000, 4.000000, 3.000000 +0.434000, 4.000000, 4.000000, 3.000000 +0.439000, 4.000000, 4.000000, 3.000000 +0.476000, 4.000000, 4.000000, 3.000000 +0.509000, 4.000000, 4.000000, 3.000000 +0.394000, 4.000000, 4.000000, 3.000000 +0.441000, 4.000000, 4.000000, 3.000000 +0.573000, 4.000000, 4.000000, 3.000000 +0.445000, 4.000000, 4.000000, 3.000000 +0.428000, 4.000000, 4.000000, 3.000000 +0.450000, 4.000000, 4.000000, 3.000000 +0.464000, 4.000000, 4.000000, 3.000000 +0.487000, 4.000000, 4.000000, 3.000000 +0.465000, 4.000000, 4.000000, 3.000000 +0.384000, 4.000000, 4.000000, 3.000000 +0.544000, 4.000000, 4.000000, 3.000000 +0.470000, 4.000000, 4.000000, 3.000000 +0.360000, 4.000000, 4.000000, 3.000000 +0.521000, 4.000000, 4.000000, 3.000000 +0.480000, 4.000000, 4.000000, 3.000000 +0.358000, 4.000000, 4.000000, 3.000000 +0.509000, 4.000000, 4.000000, 3.000000 +0.377000, 4.000000, 4.000000, 3.000000 +0.475000, 4.000000, 4.000000, 3.000000 +0.529000, 4.000000, 4.000000, 3.000000 +0.481000, 4.000000, 4.000000, 3.000000 +0.340000, 4.000000, 4.000000, 3.000000 +0.409000, 4.000000, 4.000000, 3.000000 +0.455000, 4.000000, 4.000000, 3.000000 +0.332000, 4.000000, 4.000000, 3.000000 +0.477000, 4.000000, 4.000000, 3.000000 +0.450000, 4.000000, 4.000000, 3.000000 +0.438000, 4.000000, 4.000000, 3.000000 +0.442000, 10.000000, 2.000000, 7.000000 +0.461000, 10.000000, 2.000000, 7.000000 +0.480000, 10.000000, 2.000000, 7.000000 +0.536000, 10.000000, 2.000000, 7.000000 +0.503000, 10.000000, 2.000000, 7.000000 +0.537000, 10.000000, 2.000000, 7.000000 +0.458000, 10.000000, 2.000000, 7.000000 +0.515000, 10.000000, 2.000000, 7.000000 +0.427000, 10.000000, 2.000000, 7.000000 +0.504000, 10.000000, 2.000000, 7.000000 +0.439000, 10.000000, 2.000000, 7.000000 +0.482000, 10.000000, 2.000000, 7.000000 +0.601000, 10.000000, 2.000000, 7.000000 +0.457000, 10.000000, 2.000000, 7.000000 +0.515000, 10.000000, 2.000000, 7.000000 +0.455000, 10.000000, 2.000000, 7.000000 +0.585000, 10.000000, 2.000000, 7.000000 +0.451000, 10.000000, 2.000000, 7.000000 +0.506000, 10.000000, 2.000000, 7.000000 +0.437000, 10.000000, 2.000000, 7.000000 +0.448000, 10.000000, 2.000000, 7.000000 +0.439000, 10.000000, 2.000000, 7.000000 +0.462000, 10.000000, 2.000000, 7.000000 +0.426000, 10.000000, 2.000000, 7.000000 +0.445000, 10.000000, 2.000000, 7.000000 +0.543000, 10.000000, 2.000000, 7.000000 +0.441000, 10.000000, 2.000000, 7.000000 +0.490000, 10.000000, 2.000000, 7.000000 +0.415000, 10.000000, 2.000000, 7.000000 +0.519000, 10.000000, 2.000000, 7.000000 +0.403000, 10.000000, 2.000000, 7.000000 +0.453000, 10.000000, 2.000000, 7.000000 +0.445000, 10.000000, 2.000000, 7.000000 +0.531000, 10.000000, 2.000000, 7.000000 +0.423000, 10.000000, 2.000000, 7.000000 +0.461000, 10.000000, 2.000000, 7.000000 +0.496000, 10.000000, 2.000000, 7.000000 +0.444000, 10.000000, 2.000000, 7.000000 +0.555000, 10.000000, 2.000000, 7.000000 +0.411000, 10.000000, 2.000000, 7.000000 +0.429000, 10.000000, 2.000000, 7.000000 +0.572000, 10.000000, 2.000000, 7.000000 +0.773000, 1.000000, 8.000000, 2.000000 +0.698000, 1.000000, 8.000000, 2.000000 +0.845000, 1.000000, 8.000000, 2.000000 +0.787000, 1.000000, 8.000000, 2.000000 +0.879000, 1.000000, 8.000000, 2.000000 +0.862000, 1.000000, 8.000000, 2.000000 +0.836000, 1.000000, 8.000000, 2.000000 +0.763000, 1.000000, 8.000000, 2.000000 +0.859000, 1.000000, 8.000000, 2.000000 +0.792000, 1.000000, 8.000000, 2.000000 +0.835000, 1.000000, 8.000000, 2.000000 +0.860000, 1.000000, 8.000000, 2.000000 +0.953000, 1.000000, 8.000000, 2.000000 +0.760000, 1.000000, 8.000000, 2.000000 +0.788000, 1.000000, 8.000000, 2.000000 +0.863000, 1.000000, 8.000000, 2.000000 +0.820000, 1.000000, 8.000000, 2.000000 +0.858000, 1.000000, 8.000000, 2.000000 +0.736000, 1.000000, 8.000000, 2.000000 +0.894000, 1.000000, 8.000000, 2.000000 +0.823000, 1.000000, 8.000000, 2.000000 +0.721000, 1.000000, 8.000000, 2.000000 +0.806000, 1.000000, 8.000000, 2.000000 +0.852000, 1.000000, 8.000000, 2.000000 +0.708000, 1.000000, 8.000000, 2.000000 +0.748000, 1.000000, 8.000000, 2.000000 +0.778000, 1.000000, 8.000000, 2.000000 +0.866000, 1.000000, 8.000000, 2.000000 +0.643000, 1.000000, 8.000000, 2.000000 +0.786000, 1.000000, 8.000000, 2.000000 +0.824000, 1.000000, 8.000000, 2.000000 +0.640000, 1.000000, 8.000000, 2.000000 +0.726000, 1.000000, 8.000000, 2.000000 +0.840000, 1.000000, 8.000000, 2.000000 +0.743000, 1.000000, 8.000000, 2.000000 +0.845000, 1.000000, 8.000000, 2.000000 +0.840000, 1.000000, 8.000000, 2.000000 +0.810000, 1.000000, 8.000000, 2.000000 +0.846000, 1.000000, 8.000000, 2.000000 +0.710000, 1.000000, 8.000000, 2.000000 +0.711000, 1.000000, 8.000000, 2.000000 +0.876000, 1.000000, 8.000000, 2.000000 +0.415000, 6.000000, 3.000000, 1.000000 +0.392000, 6.000000, 3.000000, 1.000000 +0.430000, 6.000000, 3.000000, 1.000000 +0.283000, 6.000000, 3.000000, 1.000000 +0.406000, 6.000000, 3.000000, 1.000000 +0.238000, 6.000000, 3.000000, 1.000000 +0.280000, 6.000000, 3.000000, 1.000000 +0.388000, 6.000000, 3.000000, 1.000000 +0.247000, 6.000000, 3.000000, 1.000000 +0.283000, 6.000000, 3.000000, 1.000000 +0.404000, 6.000000, 3.000000, 1.000000 +0.266000, 6.000000, 3.000000, 1.000000 +0.229000, 6.000000, 3.000000, 1.000000 +0.264000, 6.000000, 3.000000, 1.000000 +0.343000, 6.000000, 3.000000, 1.000000 +0.287000, 6.000000, 3.000000, 1.000000 +0.361000, 6.000000, 3.000000, 1.000000 +0.237000, 6.000000, 3.000000, 1.000000 +0.281000, 6.000000, 3.000000, 1.000000 +0.404000, 6.000000, 3.000000, 1.000000 +0.250000, 6.000000, 3.000000, 1.000000 +0.277000, 6.000000, 3.000000, 1.000000 +0.406000, 6.000000, 3.000000, 1.000000 +0.208000, 6.000000, 3.000000, 1.000000 +0.284000, 6.000000, 3.000000, 1.000000 +0.407000, 6.000000, 3.000000, 1.000000 +0.279000, 6.000000, 3.000000, 1.000000 +0.253000, 6.000000, 3.000000, 1.000000 +0.406000, 6.000000, 3.000000, 1.000000 +0.279000, 6.000000, 3.000000, 1.000000 +0.379000, 6.000000, 3.000000, 1.000000 +0.292000, 6.000000, 3.000000, 1.000000 +0.378000, 6.000000, 3.000000, 1.000000 +0.279000, 6.000000, 3.000000, 1.000000 +0.386000, 6.000000, 3.000000, 1.000000 +0.293000, 6.000000, 3.000000, 1.000000 +0.387000, 6.000000, 3.000000, 1.000000 +0.250000, 6.000000, 3.000000, 1.000000 +0.276000, 6.000000, 3.000000, 1.000000 +0.424000, 6.000000, 3.000000, 1.000000 +0.223000, 6.000000, 3.000000, 1.000000 +0.262000, 6.000000, 3.000000, 1.000000 +0.287000, 7.000000, 3.000000, 3.000000 +0.316000, 7.000000, 3.000000, 3.000000 +0.489000, 7.000000, 3.000000, 3.000000 +0.281000, 7.000000, 3.000000, 3.000000 +0.358000, 7.000000, 3.000000, 3.000000 +0.456000, 7.000000, 3.000000, 3.000000 +0.253000, 7.000000, 3.000000, 3.000000 +0.295000, 7.000000, 3.000000, 3.000000 +0.258000, 7.000000, 3.000000, 3.000000 +0.302000, 7.000000, 3.000000, 3.000000 +0.486000, 7.000000, 3.000000, 3.000000 +0.330000, 7.000000, 3.000000, 3.000000 +0.337000, 7.000000, 3.000000, 3.000000 +0.461000, 7.000000, 3.000000, 3.000000 +0.258000, 7.000000, 3.000000, 3.000000 +0.307000, 7.000000, 3.000000, 3.000000 +0.451000, 7.000000, 3.000000, 3.000000 +0.282000, 7.000000, 3.000000, 3.000000 +0.270000, 7.000000, 3.000000, 3.000000 +0.315000, 7.000000, 3.000000, 3.000000 +0.430000, 7.000000, 3.000000, 3.000000 +0.256000, 7.000000, 3.000000, 3.000000 +0.341000, 7.000000, 3.000000, 3.000000 +0.473000, 7.000000, 3.000000, 3.000000 +0.253000, 7.000000, 3.000000, 3.000000 +0.323000, 7.000000, 3.000000, 3.000000 +0.446000, 7.000000, 3.000000, 3.000000 +0.257000, 7.000000, 3.000000, 3.000000 +0.349000, 7.000000, 3.000000, 3.000000 +0.425000, 7.000000, 3.000000, 3.000000 +0.327000, 7.000000, 3.000000, 3.000000 +0.322000, 7.000000, 3.000000, 3.000000 +0.408000, 7.000000, 3.000000, 3.000000 +0.322000, 7.000000, 3.000000, 3.000000 +0.399000, 7.000000, 3.000000, 3.000000 +0.271000, 7.000000, 3.000000, 3.000000 +0.434000, 7.000000, 3.000000, 3.000000 +0.276000, 7.000000, 3.000000, 3.000000 +0.340000, 7.000000, 3.000000, 3.000000 +0.411000, 7.000000, 3.000000, 3.000000 +5.125000, 9.000000, 10.000000, 9.000000 +0.307000, 7.000000, 3.000000, 3.000000 +0.301000, 7.000000, 3.000000, 3.000000 +5.368000, 9.000000, 10.000000, 9.000000 +3.378000, 9.000000, 10.000000, 9.000000 +3.406000, 9.000000, 10.000000, 9.000000 +5.017000, 9.000000, 10.000000, 9.000000 +3.472000, 9.000000, 10.000000, 9.000000 +3.432000, 9.000000, 10.000000, 9.000000 +5.309000, 9.000000, 10.000000, 9.000000 +3.419000, 9.000000, 10.000000, 9.000000 +5.282000, 9.000000, 10.000000, 9.000000 +5.424000, 9.000000, 10.000000, 9.000000 +3.427000, 9.000000, 10.000000, 9.000000 +3.431000, 9.000000, 10.000000, 9.000000 +5.356000, 9.000000, 10.000000, 9.000000 +3.426000, 9.000000, 10.000000, 9.000000 +4.954000, 9.000000, 10.000000, 9.000000 +3.465000, 9.000000, 10.000000, 9.000000 +5.028000, 9.000000, 10.000000, 9.000000 +3.413000, 9.000000, 10.000000, 9.000000 +3.448000, 9.000000, 10.000000, 9.000000 +5.344000, 9.000000, 10.000000, 9.000000 +3.424000, 9.000000, 10.000000, 9.000000 +3.450000, 9.000000, 10.000000, 9.000000 +5.401000, 9.000000, 10.000000, 9.000000 +3.433000, 9.000000, 10.000000, 9.000000 +5.119000, 9.000000, 10.000000, 9.000000 +3.407000, 9.000000, 10.000000, 9.000000 +3.418000, 9.000000, 10.000000, 9.000000 +5.321000, 9.000000, 10.000000, 9.000000 +3.431000, 9.000000, 10.000000, 9.000000 +5.465000, 9.000000, 10.000000, 9.000000 +3.433000, 9.000000, 10.000000, 9.000000 +5.473000, 9.000000, 10.000000, 9.000000 +3.419000, 9.000000, 10.000000, 9.000000 +5.302000, 9.000000, 10.000000, 9.000000 +3.435000, 9.000000, 10.000000, 9.000000 +5.428000, 9.000000, 10.000000, 9.000000 +3.422000, 9.000000, 10.000000, 9.000000 +3.425000, 9.000000, 10.000000, 9.000000 +5.529000, 9.000000, 10.000000, 9.000000 +3.383000, 9.000000, 10.000000, 9.000000 +3.436000, 9.000000, 10.000000, 9.000000 +1.000000, 10.000000, 6.000000, 4.000000 +0.657000, 10.000000, 6.000000, 4.000000 +0.748000, 10.000000, 6.000000, 4.000000 +1.059000, 10.000000, 6.000000, 4.000000 +0.658000, 10.000000, 6.000000, 4.000000 +0.718000, 10.000000, 6.000000, 4.000000 +1.066000, 10.000000, 6.000000, 4.000000 +0.660000, 10.000000, 6.000000, 4.000000 +0.697000, 10.000000, 6.000000, 4.000000 +1.040000, 10.000000, 6.000000, 4.000000 +0.660000, 10.000000, 6.000000, 4.000000 +0.755000, 10.000000, 6.000000, 4.000000 +1.084000, 10.000000, 6.000000, 4.000000 +0.655000, 10.000000, 6.000000, 4.000000 +0.714000, 10.000000, 6.000000, 4.000000 +1.024000, 10.000000, 6.000000, 4.000000 +0.674000, 10.000000, 6.000000, 4.000000 +0.772000, 10.000000, 6.000000, 4.000000 +1.032000, 10.000000, 6.000000, 4.000000 +0.677000, 10.000000, 6.000000, 4.000000 +0.662000, 10.000000, 6.000000, 4.000000 +0.736000, 10.000000, 6.000000, 4.000000 +1.037000, 10.000000, 6.000000, 4.000000 +0.693000, 10.000000, 6.000000, 4.000000 +0.742000, 10.000000, 6.000000, 4.000000 +1.064000, 10.000000, 6.000000, 4.000000 +0.654000, 10.000000, 6.000000, 4.000000 +0.662000, 10.000000, 6.000000, 4.000000 +0.728000, 10.000000, 6.000000, 4.000000 +1.067000, 10.000000, 6.000000, 4.000000 +0.657000, 10.000000, 6.000000, 4.000000 +0.724000, 10.000000, 6.000000, 4.000000 +1.066000, 10.000000, 6.000000, 4.000000 +0.651000, 10.000000, 6.000000, 4.000000 +0.705000, 10.000000, 6.000000, 4.000000 +0.985000, 10.000000, 6.000000, 4.000000 +0.660000, 10.000000, 6.000000, 4.000000 +0.712000, 10.000000, 6.000000, 4.000000 +1.095000, 10.000000, 6.000000, 4.000000 +0.663000, 10.000000, 6.000000, 4.000000 +0.717000, 10.000000, 6.000000, 4.000000 +1.080000, 10.000000, 6.000000, 4.000000 +0.195000, 2.000000, 1.000000, 1.000000 +0.230000, 2.000000, 1.000000, 1.000000 +0.323000, 2.000000, 1.000000, 1.000000 +0.181000, 2.000000, 1.000000, 1.000000 +0.216000, 2.000000, 1.000000, 1.000000 +0.328000, 2.000000, 1.000000, 1.000000 +0.173000, 2.000000, 1.000000, 1.000000 +0.230000, 2.000000, 1.000000, 1.000000 +0.294000, 2.000000, 1.000000, 1.000000 +0.200000, 2.000000, 1.000000, 1.000000 +0.206000, 2.000000, 1.000000, 1.000000 +0.295000, 2.000000, 1.000000, 1.000000 +0.221000, 2.000000, 1.000000, 1.000000 +0.272000, 2.000000, 1.000000, 1.000000 +0.157000, 2.000000, 1.000000, 1.000000 +0.240000, 2.000000, 1.000000, 1.000000 +0.309000, 2.000000, 1.000000, 1.000000 +0.226000, 2.000000, 1.000000, 1.000000 +0.306000, 2.000000, 1.000000, 1.000000 +0.217000, 2.000000, 1.000000, 1.000000 +0.308000, 2.000000, 1.000000, 1.000000 +0.200000, 2.000000, 1.000000, 1.000000 +0.221000, 2.000000, 1.000000, 1.000000 +0.353000, 2.000000, 1.000000, 1.000000 +0.156000, 2.000000, 1.000000, 1.000000 +0.220000, 2.000000, 1.000000, 1.000000 +0.318000, 2.000000, 1.000000, 1.000000 +0.217000, 2.000000, 1.000000, 1.000000 +0.217000, 2.000000, 1.000000, 1.000000 +0.302000, 2.000000, 1.000000, 1.000000 +0.205000, 2.000000, 1.000000, 1.000000 +0.220000, 2.000000, 1.000000, 1.000000 +0.338000, 2.000000, 1.000000, 1.000000 +0.241000, 2.000000, 1.000000, 1.000000 +0.310000, 2.000000, 1.000000, 1.000000 +0.195000, 2.000000, 1.000000, 1.000000 +0.266000, 2.000000, 1.000000, 1.000000 +0.357000, 2.000000, 1.000000, 1.000000 +0.234000, 2.000000, 1.000000, 1.000000 +0.335000, 2.000000, 1.000000, 1.000000 +0.221000, 2.000000, 1.000000, 1.000000 +0.239000, 2.000000, 1.000000, 1.000000 +0.698000, 2.000000, 7.000000, 2.000000 +0.492000, 2.000000, 7.000000, 2.000000 +0.401000, 2.000000, 7.000000, 2.000000 +0.458000, 2.000000, 7.000000, 2.000000 +0.715000, 2.000000, 7.000000, 2.000000 +0.403000, 2.000000, 7.000000, 2.000000 +0.469000, 2.000000, 7.000000, 2.000000 +0.703000, 2.000000, 7.000000, 2.000000 +0.466000, 2.000000, 7.000000, 2.000000 +0.721000, 2.000000, 7.000000, 2.000000 +0.401000, 2.000000, 7.000000, 2.000000 +0.482000, 2.000000, 7.000000, 2.000000 +0.705000, 2.000000, 7.000000, 2.000000 +0.404000, 2.000000, 7.000000, 2.000000 +0.463000, 2.000000, 7.000000, 2.000000 +0.706000, 2.000000, 7.000000, 2.000000 +0.401000, 2.000000, 7.000000, 2.000000 +0.456000, 2.000000, 7.000000, 2.000000 +0.684000, 2.000000, 7.000000, 2.000000 +0.400000, 2.000000, 7.000000, 2.000000 +0.468000, 2.000000, 7.000000, 2.000000 +0.659000, 2.000000, 7.000000, 2.000000 +0.442000, 2.000000, 7.000000, 2.000000 +0.419000, 2.000000, 7.000000, 2.000000 +0.479000, 2.000000, 7.000000, 2.000000 +0.448000, 2.000000, 7.000000, 2.000000 +0.694000, 2.000000, 7.000000, 2.000000 +0.472000, 2.000000, 7.000000, 2.000000 +0.666000, 2.000000, 7.000000, 2.000000 +0.440000, 2.000000, 7.000000, 2.000000 +0.404000, 2.000000, 7.000000, 2.000000 +0.471000, 2.000000, 7.000000, 2.000000 +0.679000, 2.000000, 7.000000, 2.000000 +0.429000, 2.000000, 7.000000, 2.000000 +0.468000, 2.000000, 7.000000, 2.000000 +0.707000, 2.000000, 7.000000, 2.000000 +0.401000, 2.000000, 7.000000, 2.000000 +0.493000, 2.000000, 7.000000, 2.000000 +0.711000, 2.000000, 7.000000, 2.000000 +0.423000, 2.000000, 7.000000, 2.000000 +0.462000, 2.000000, 7.000000, 2.000000 +0.655000, 2.000000, 7.000000, 2.000000 +0.660000, 10.000000, 6.000000, 4.000000 +0.734000, 10.000000, 6.000000, 4.000000 +1.087000, 10.000000, 6.000000, 4.000000 +0.664000, 10.000000, 6.000000, 4.000000 +0.718000, 10.000000, 6.000000, 4.000000 +1.057000, 10.000000, 6.000000, 4.000000 +0.650000, 10.000000, 6.000000, 4.000000 +0.723000, 10.000000, 6.000000, 4.000000 +1.042000, 10.000000, 6.000000, 4.000000 +0.679000, 10.000000, 6.000000, 4.000000 +0.694000, 10.000000, 6.000000, 4.000000 +1.084000, 10.000000, 6.000000, 4.000000 +0.656000, 10.000000, 6.000000, 4.000000 +0.652000, 10.000000, 6.000000, 4.000000 +0.705000, 10.000000, 6.000000, 4.000000 +1.108000, 10.000000, 6.000000, 4.000000 +0.673000, 10.000000, 6.000000, 4.000000 +0.710000, 10.000000, 6.000000, 4.000000 +1.017000, 10.000000, 6.000000, 4.000000 +0.676000, 10.000000, 6.000000, 4.000000 +0.725000, 10.000000, 6.000000, 4.000000 +1.117000, 10.000000, 6.000000, 4.000000 +0.653000, 10.000000, 6.000000, 4.000000 +0.723000, 10.000000, 6.000000, 4.000000 +1.047000, 10.000000, 6.000000, 4.000000 +0.680000, 10.000000, 6.000000, 4.000000 +0.727000, 10.000000, 6.000000, 4.000000 +1.086000, 10.000000, 6.000000, 4.000000 +0.674000, 10.000000, 6.000000, 4.000000 +0.770000, 10.000000, 6.000000, 4.000000 +0.964000, 10.000000, 6.000000, 4.000000 +0.696000, 10.000000, 6.000000, 4.000000 +0.727000, 10.000000, 6.000000, 4.000000 +1.025000, 10.000000, 6.000000, 4.000000 +0.701000, 10.000000, 6.000000, 4.000000 +0.685000, 10.000000, 6.000000, 4.000000 +1.063000, 10.000000, 6.000000, 4.000000 +0.716000, 10.000000, 6.000000, 4.000000 +1.109000, 10.000000, 6.000000, 4.000000 +0.771000, 10.000000, 6.000000, 4.000000 +1.713000, 10.000000, 6.000000, 4.000000 +3.440000, 10.000000, 6.000000, 4.000000 \ No newline at end of file diff --git a/tools/perfs/bench_sgemm.sh b/tools/perfs/bench_sgemm.sh new file mode 100755 index 0000000..b8a0aac --- /dev/null +++ b/tools/perfs/bench_sgemm.sh @@ -0,0 +1,93 @@ +#!/bin/bash +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +DIR=$PWD +ROOTDIR=$DIR/../.. +BUILDDIR=$PWD/build/ +INSTALLDIR=$PWD/local/ +PERFDIR=$DIR/sampling/ + +# Testing another specific scheduler, no need to run this +[ -z "$STARPU_SCHED" -o "$STARPU_SCHED" = dm ] || exit 77 + +make -C ../../ distclean + +mkdir -p $PERFDIR +mkdir -p $BUILDDIR +cd $BUILDDIR +$DIR/../../configure -C --prefix=$INSTALLDIR --with-goto-dir=/home/gonnet/These/Libs/GotoBLAS/GotoBLAS/ --enable-verbose + +make -j 10 +make install + +sizelist="16 32 48 64 96 128 192 256 384 512 1024 2048 4096" + +cpu_output=$DIR/output.cpu +gpu_output=$DIR/output.gpu + +rm -f $cpu_output +rm -f $gpu_output + +export STARPU_WORKERS_CPUID="2" +export STARPU_CALIBRATE=1 +export STARPU_SCHED="dm" + +# benchmark GotoBLAS +for size in $sizelist; +do + niter=1000 + if test $size -ge 512; then + niter=20 + fi + if test $size -ge 2048; then + niter=5 + fi + + echo "GotoBLAS -> size $size niter $niter" + timing=`STARPU_NCPUS=1 STARPU_NCUDA=0 $MS_LAUNCHER $STARPU_LAUNCH $INSTALLDIR/lib/starpu/examples/dw_mult_no_filters -x $size -y $size -z $size -nblocks 1 -iter $niter 2> /dev/null` + echo "$size $timing $niter" >> $cpu_output +done + +# benchmark CUBLAS +for size in $sizelist; +do + niter=2500 + if test $size -ge 512; then + niter=250 + fi + + if test $size -ge 2048; then + niter=25 + fi + + echo "CUBLAS -> size $size niter $niter" + timing=`STARPU_NCPUS=0 STARPU_NCUDA=1 $MS_LAUNCHER $STARPU_LAUNCH $INSTALLDIR/lib/starpu/examples/dw_mult_no_filters -x $size -y $size -z $size -nblocks 1 -iter $niter 2 -pin 2> /dev/null` + echo "$size $timing $niter" >> $gpu_output +done + +gnuplot > /dev/null << EOF + +set term postscript eps enhanced color +set output "bench_sgemm.eps" + +set logscale x +set logscale y + +plot "$cpu_output" usi 1:(\$2/\$3) with linespoint, \ + "$gpu_output" usi 1:(\$2/\$3) with linespoint + +EOF + diff --git a/tools/perfs/error_model.gp b/tools/perfs/error_model.gp new file mode 100755 index 0000000..8e96b6b --- /dev/null +++ b/tools/perfs/error_model.gp @@ -0,0 +1,46 @@ +#!/usr/bin/gnuplot -persist +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +set term postscript eps enhanced color +set output "model_error.eps" + +set yrange [0.05:100] +set xrange [4:10000] + +set grid y +set grid x + +set logscale y +set logscale x + +#set title "Cholesky on Cell (PS3)" +set xlabel "Number of samples" +set ylabel "Prediction error" +set grid + + +set ytics (0.01, 0.1, 1, 5,10,25,50,100) +set xtics (10, 100, 1000, 10000) + +set format y "%.1f %%" +set format x "10^{%L}" + +set key title "Execution time Prediction Error (%)" + +set size 0.75 + +plot "gnuplot.data" usi 1:($2*100) with linespoint pt -1 lt 1 lw 3 title "CPUs" ,\ + "gnuplot.data" usi 3:($4*100) with linespoint pt -1 lt 2 lw 3 title "GPU" diff --git a/tools/perfs/error_model.sh b/tools/perfs/error_model.sh new file mode 100755 index 0000000..8dd03d5 --- /dev/null +++ b/tools/perfs/error_model.sh @@ -0,0 +1,109 @@ +#!/bin/bash +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +# for bc +scale=8 + +#nblockslist="2 4 8 16 16 16 16 16 16 16 16 16 16 16 16" +nblockslist="4 8 16 16 16 16 16 16 16 16 16" +niter=5 + +#nblockslist="4 4" +#niter=2 + +# Testing another specific scheduler, no need to run this +[ -z "$STARPU_SCHED" -o "$STARPU_SCHED" = dm ] || exit 77 + + +rm -f log + +echo "#iter cpu0 (#tasks0) cpu1 (#tasks1) cpu2 (#tasks2) gpu0 (#tasksgpu0) #totaltask gflops" > gnuplot.data + +i=0 +for nblocks in $nblockslist +do + i=$(($i + 1)) + + sumcpu[$i]='0' + ntaskcpu[$i]='0' + sumcuda[$i]='0' + ntaskcuda[$i]='0' + cpu_ntasktotal[$i]='0' + gpu_ntasktotal[$i]='0' + sumgflops[$i]='0' +done + +for iter in `seq 1 $niter` +do +cpu_taskcnt=0 +gpu_taskcnt=0 +i=0 +rm -f ../../.sampling/* +for nblocks in $nblockslist +do + i=$(($i + 1)) + + ntheta=$(($((32 * $nblocks)) + 2)) + + echo "ITER $iter -> I $i NBLOCKS $nblocks" + + STARPU_CALIBRATE=1 STARPU_SCHED="dm" $MS_LAUNCHER $STARPU_LAUNCH ../../examples/heat/heat -nblocks $nblocks -nthick 34 -ntheta $ntheta -pin 2> output.log.err > output.log + gflops=`grep "Synthetic GFlops :" output.log.err| sed -e "s/Synthetic GFlops ://"` + + sumgflops[$i]=$(echo "${sumgflops[$i]} + $gflops"|bc -l) + + # retrieve ratio for cpu 0, 1 and 2 + avgcpu0=`grep "MODEL ERROR: CPU 0" starpu.log | sed -e "s/^.*RATIO \(.*\) NTASKS\(.*\)$/\1/"` + avgcpu1=`grep "MODEL ERROR: CPU 1" starpu.log | sed -e "s/^.*RATIO \(.*\) NTASKS\(.*\)$/\1/"` + avgcpu2=`grep "MODEL ERROR: CPU 2" starpu.log | sed -e "s/^.*RATIO \(.*\) NTASKS\(.*\)$/\1/"` + avgcuda0=`grep "MODEL ERROR: CUDA 0" starpu.log | sed -e "s/^.*RATIO \(.*\) NTASKS\(.*\)$/\1/"` + + ntaskcpu0=`grep "MODEL ERROR: CPU 0" starpu.log | sed -e "s/^.*RATIO \(.*\) NTASKS\(.*\)$/\2/"` + ntaskcpu1=`grep "MODEL ERROR: CPU 1" starpu.log | sed -e "s/^.*RATIO \(.*\) NTASKS\(.*\)$/\2/"` + ntaskcpu2=`grep "MODEL ERROR: CPU 2" starpu.log | sed -e "s/^.*RATIO \(.*\) NTASKS\(.*\)$/\2/"` + ntaskcuda0=`grep "MODEL ERROR: CUDA 0" starpu.log | sed -e "s/^.*RATIO \(.*\) NTASKS\(.*\)$/\2/"` + + sumcpu[$i]=$(echo "${sumcpu[$i]} + ( $avgcpu0 * $ntaskcpu0 ) + ( $avgcpu1 * $ntaskcpu1 ) + ( $avgcpu2 * $ntaskcpu2 )"| bc -l) + ntaskcpu[$i]=$(echo "${ntaskcpu[$i]} + $ntaskcpu0 + $ntaskcpu1 + $ntaskcpu2"|bc -l) + sumcuda[$i]=$(echo "${sumcuda[$i]} + ( $avgcuda0 * $ntaskcuda0 )"| bc -l) + ntaskcuda[$i]=$(echo "${ntaskcuda[$i]} + $ntaskcuda0"|bc -l) + + cpu_taskcnt=$(($cpu_taskcnt + $ntaskcpu0 + $ntaskcpu1 + $ntaskcpu2 )) + gpu_taskcnt=$(($gpu_taskcnt + $ntaskcuda0)) + + cpu_ntasktotal[$i]=$( echo "$cpu_taskcnt + ${cpu_ntasktotal[$i]}" | bc -l) + gpu_ntasktotal[$i]=$( echo "$gpu_taskcnt + ${gpu_ntasktotal[$i]}" | bc -l) +done +done + +i=0 +echo "#ntaskscpu #avg. error cpu #ntaskgpu #avg. error gpu #avg. gflops" > gnuplot.data +for nblocks in $nblockslist +do + i=$(($i + 1)) + + avggflops=$(echo "${sumgflops[$i]}/$niter"|bc -l) + + cpu_ntasks=$(echo "${cpu_ntasktotal[$i]}/$niter" | bc -l) + gpu_ntasks=$(echo "${gpu_ntasktotal[$i]}/$niter" | bc -l) + + avgcpu=$(echo "${sumcpu[$i]}/${ntaskcpu[$i]}"|bc -l) + avgcuda=$(echo "${sumcuda[$i]}/${ntaskcuda[$i]}"|bc -l) + + echo "$cpu_ntasks $avgcpu $gpu_ntasks $avgcuda $avggflops" >> gnuplot.data +done + +./error_model.gp diff --git a/tools/release/Makefile b/tools/release/Makefile new file mode 100644 index 0000000..6d23599 --- /dev/null +++ b/tools/release/Makefile @@ -0,0 +1,113 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +STARPU ?= undefined +EXAMPLE ?= undefined + +TARGETS = +TARGETS += hello_world +TARGETS += block +TARGETS += mult +TARGETS += variable +TARGETS += incrementer + +ifeq ($(STARPU),undefined) +all: + @echo + @echo "ERROR. You need to set the variable STARPU to the name of the pkg-config StarPU package" + @echo +clean:; rm -f $(TARGETS) *.o + +else +ifeq ($(EXAMPLE),undefined) +all: + @echo + @echo "ERROR. You need to set the variable EXAMPLE to the directory hosting the example sources" + @echo +clean:; rm -f $(TARGETS) *.o + +else + +CFLAGS += $$(pkg-config --cflags $(STARPU)) +LDFLAGS += $$(pkg-config --libs $(STARPU)) + +HAS_CUDA = $(shell starpu_config -d | grep -c STARPU_USE_CUDA) +NVCC ?= nvcc +HAS_OPENCL = $(shell starpu_config -d | grep -c STARPU_USE_OPENCL) + +ifneq ($(strip $(HAS_CUDA)),0) +LDFLAGS += -lcudart +endif + +ifneq ($(strip $(HAS_OPENCL)),0) +LDFLAGS += -lOpenCL +endif + +%: %.o + $(CC) $< $(LDFLAGS) -o $@ +%.o: $(EXAMPLE)/basic_examples/%.cu + $(NVCC) -std=c++11 $(CFLAGS) $< -c +%.o: $(EXAMPLE)/basic_examples/%.c + $(CC) $(CFLAGS) $< -c + +%.o: $(EXAMPLE)/incrementer/%.cu + $(NVCC) -std=c++11 $(CFLAGS) $< -c +%.o: $(EXAMPLE)/incrementer/%.c + $(CC) $(CFLAGS) $< -c + +all: $(TARGETS) + +BLOCK_PREREQUISITES = block.o block_cpu.o +ifneq ($(strip $(HAS_CUDA)),0) +BLOCK_PREREQUISITES += block_cuda.o +endif +ifneq ($(strip $(HAS_OPENCL)),0) +BLOCK_PREREQUISITES += block_opencl.o +endif +block: $(BLOCK_PREREQUISITES) + $(CC) $^ $(LDFLAGS) -o $@ + +VARIABLE_PREREQUISITES = variable.o variable_kernels_cpu.o +ifneq ($(strip $(HAS_CUDA)),) +VARIABLE_PREREQUISITES += variable_kernels.o +endif +ifneq ($(strip $(HAS_OPENCL)),) +VARIABLE_PREREQUISITES += variable_kernels_opencl.o +endif +variable: $(VARIABLE_PREREQUISITES) + $(CC) $^ $(LDFLAGS) -o $@ + +INCREMENTER_PREREQUISITES = incrementer.o +ifneq ($(strip $(HAS_CUDA)),) +INCREMENTER_PREREQUISITES += incrementer_kernels.o +endif +ifneq ($(strip $(HAS_OPENCL)),) +INCREMENTER_PREREQUISITES += incrementer_kernels_opencl.o +endif +incrementer: $(INCREMENTER_PREREQUISITES) + $(CC) $^ $(LDFLAGS) -o $@ + +MULT_PREREQUISITES = mult.o +ifneq ($(strip $(HAS_CUDA)),0) +MULT_PREREQUISITES += mult_cuda.o +endif +mult: $(MULT_PREREQUISITES) + $(CC) $^ $(LDFLAGS) -o $@ + +clean:; rm -f $(TARGETS) *.o + + +endif +endif diff --git a/tools/release/README.md b/tools/release/README.md new file mode 100644 index 0000000..c136fbc --- /dev/null +++ b/tools/release/README.md @@ -0,0 +1,50 @@ + + +The makefile in this directory should be used to test the compilation +and execution of StarPU examples against an installed version of +StarPU. + +For example, if StarPU is installed in + +``` +STARPU_INST=$HOME/softs/starpu-1.4 +``` + +and the source code of StarPU is in + +``` +STARPU_SRC=$HOME/src/starpu/master +``` + +one first need to call the following script + +``` +source $STARPU_INST/bin/starpu_env +``` + +and then call + +``` +make STARPU=starpu-1.4 EXAMPLE=$STARPU_SRC/examples +``` + +to produce the executables. + +Examples using an old StarPU API can also be tested, for example the branch 1.0 + +``` +make STARPU=starpu-1.0 EXAMPLE=$HOME/src/starpu/branches/starpu-1.0/examples/ +``` + +Note the variable STARPU is set to starpu-1.0 to use the 1.0 API. diff --git a/tools/starpu_calibrate_bus.1 b/tools/starpu_calibrate_bus.1 new file mode 100644 index 0000000..d1c4c2a --- /dev/null +++ b/tools/starpu_calibrate_bus.1 @@ -0,0 +1,18 @@ +.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.3. +.TH STARPU_CALIBRATE_BUS "1" "December 2025" "StarPU 1.4.10" "User Commands" +.SH NAME +starpu_calibrate_bus \- Force StarPU bus calibration +.SH SYNOPSIS +.B starpu_calibrate_bus +[\fI\,OPTION\/\fR] +.SH DESCRIPTION +Force a bus calibration. +.SH OPTIONS +.TP +\fB\-h\fR, \fB\-\-help\fR +display this help and exit +.TP +\fB\-v\fR, \fB\-\-version\fR +output version information and exit +.SH "REPORTING BUGS" +Report bugs to . diff --git a/tools/starpu_calibrate_bus.c b/tools/starpu_calibrate_bus.c new file mode 100644 index 0000000..06ec2ed --- /dev/null +++ b/tools/starpu_calibrate_bus.c @@ -0,0 +1,88 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#if defined(_WIN32) && !defined(__CYGWIN__) +#include +#endif + +#define PROGNAME "starpu_calibrate_bus" + +static void usage(void) +{ + (void) fprintf(stdout, +"Force a bus calibration.\n\ +\n\ +Usage: %s [OPTION]\n\ +\n\ +Options:\n\ + -h, --help display this help and exit\n\ + -v, --version output version information and exit\n\ +\n\ +Report bugs to <%s>.\n", PROGNAME, PACKAGE_BUGREPORT); +} + +static void parse_args(int argc, char **argv) +{ + if (argc == 1) + return; + + if (argc > 2) + { + usage(); + exit(EXIT_FAILURE); + } + + if (strcmp(argv[1], "-h") == 0 || + strcmp(argv[1], "--help") == 0) + { + usage(); + exit(EXIT_SUCCESS); + } + else if (strcmp(argv[1], "-v") == 0 || + strcmp(argv[1], "--version") == 0) + { + fputs(PROGNAME " (" PACKAGE_NAME ") " PACKAGE_VERSION "\n", stderr); + exit(EXIT_SUCCESS); + } + else + { + (void) fprintf(stderr, "Unknown arg %s\n", argv[1]); + exit(EXIT_FAILURE); + } + +} + +int main(int argc, char **argv) +{ + int ret; + struct starpu_conf conf; + + parse_args(argc, argv); + + starpu_conf_init(&conf); + conf.bus_calibrate = 1; + + ret = starpu_init(&conf); + if (ret == -ENODEV) return 77; + if (ret != 0) return ret; + + starpu_shutdown(); + + return 0; +} diff --git a/tools/starpu_codelet_histo_profile b/tools/starpu_codelet_histo_profile new file mode 100755 index 0000000..24193bd --- /dev/null +++ b/tools/starpu_codelet_histo_profile @@ -0,0 +1,100 @@ +#!/bin/sh +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +PROGNAME=$0 + +usage() +{ + echo "Offline tool to draw codelet profile histogram over a traced execution" + echo "" + echo "Usage: $PROGNAME distrib.data" + echo "" + echo "Options:" + echo " -h, --help display this help and exit" + echo " -v, --version output version information and exit" + echo "" + echo "Report bugs to " + exit 1 +} + +if [ "$1" = "-v" ] || [ "$1" = "--version" ] ; then + echo "$PROGNAME (StarPU) 1.4.10" + exit 0 +fi + +if [ "$1" = "-h" ] || [ "$1" = "--help" ] || [ "$1" = "" ] ; then + usage +fi + +create_histograms() +{ + +inputfile=$1 + +R --no-save > /dev/null << EOF + +handle_hash <- function (codelet, arch, hash) +{ + +mytable <- table +mytable <- mytable[mytable[,1]==codelet,] +mytable <- mytable[mytable[,2]==arch,] +mytable <- mytable[mytable[,4]==hash,] + +val <- mytable[,5] + + +# there is certainly a better way to do this ! +size <- unique(mytable[,3]) + +pdf(paste("$inputfile", codelet, arch, hash, size, "pdf", sep=".")); + +try ( { h <- hist(val[val > quantile(val,0.01) & val diff --git a/tools/starpu_codelet_histo_profile.in b/tools/starpu_codelet_histo_profile.in new file mode 100755 index 0000000..50344fc --- /dev/null +++ b/tools/starpu_codelet_histo_profile.in @@ -0,0 +1,100 @@ +#!/bin/sh +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +PROGNAME=$0 + +usage() +{ + echo "Offline tool to draw codelet profile histogram over a traced execution" + echo "" + echo "Usage: $PROGNAME distrib.data" + echo "" + echo "Options:" + echo " -h, --help display this help and exit" + echo " -v, --version output version information and exit" + echo "" + echo "Report bugs to <@PACKAGE_BUGREPORT@>" + exit 1 +} + +if [ "$1" = "-v" ] || [ "$1" = "--version" ] ; then + echo "$PROGNAME (@PACKAGE_NAME@) @PACKAGE_VERSION@" + exit 0 +fi + +if [ "$1" = "-h" ] || [ "$1" = "--help" ] || [ "$1" = "" ] ; then + usage +fi + +create_histograms() +{ + +inputfile=$1 + +R --no-save > /dev/null << EOF + +handle_hash <- function (codelet, arch, hash) +{ + +mytable <- table +mytable <- mytable[mytable[,1]==codelet,] +mytable <- mytable[mytable[,2]==arch,] +mytable <- mytable[mytable[,4]==hash,] + +val <- mytable[,5] + + +# there is certainly a better way to do this ! +size <- unique(mytable[,3]) + +pdf(paste("$inputfile", codelet, arch, hash, size, "pdf", sep=".")); + +try ( { h <- hist(val[val > quantile(val,0.01) & val" + exit 1 +} + +if [ "$1" = "-v" ] || [ "$1" = "--version" ] ; then + echo "$PROGNAME (StarPU) 1.4.10" + exit 0 +fi + +if [ "$1" = "-h" ] || [ "$1" = "--help" ] || [ "$2" = "" ] ; then + usage +fi + +inputfile=$1 +codelet_name=$2 + +archlist=`< $inputfile grep "^$codelet_name " | cut -f 2 | sort | uniq | xargs` + +# extract subfiles from the history file +for arch in $archlist +do + echo "Arch $arch" + grep "^$codelet_name $arch" $inputfile > $inputfile.$arch +done + +# create the gnuplot file + +gpfile=$inputfile.gp + +echo "#!/usr/bin/gnuplot -persist" > $gpfile +echo "set term postscript eps enhanced color" >> $gpfile +echo "set logscale x" >> $gpfile +echo "set logscale y" >> $gpfile +echo "set output \"$inputfile.eps\"" >> $gpfile +echo "set key top left" >> $gpfile +echo "set xlabel \"Total data size\"" >> $gpfile +echo "set ylabel \"Execution time (ms)\"" >> $gpfile + +echo -n "plot " >> $gpfile + +first=1 + +for arch in $archlist +do + if [ $first = 0 ] + then + echo -n " , " >> $gpfile + else + first=0 + fi + + echo -n " \"$inputfile.$arch\" using 3:5 title \"${codelet_name//_/\\\\_} arch $arch\"" >> $gpfile +done diff --git a/tools/starpu_codelet_profile.1 b/tools/starpu_codelet_profile.1 new file mode 100644 index 0000000..4a6d819 --- /dev/null +++ b/tools/starpu_codelet_profile.1 @@ -0,0 +1,18 @@ +.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.3. +.TH STARPU_CODELET_PROFILE "1" "December 2025" "StarPU 1.4.10" "User Commands" +.SH NAME +starpu_codelet_profile \- Draw StarPU codelet profile +.SH SYNOPSIS +.B starpu_codelet_profile +\fI\,distrib.data codelet_name\/\fR +.SH DESCRIPTION +Offline tool to draw codelet profile over a traced execution +.SH OPTIONS +.TP +\fB\-h\fR, \fB\-\-help\fR +display this help and exit +.TP +\fB\-v\fR, \fB\-\-version\fR +output version information and exit +.SH "REPORTING BUGS" +Report bugs to diff --git a/tools/starpu_codelet_profile.in b/tools/starpu_codelet_profile.in new file mode 100755 index 0000000..e1979f7 --- /dev/null +++ b/tools/starpu_codelet_profile.in @@ -0,0 +1,81 @@ +#!@REALBASH@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +PROGNAME=$0 + +usage() +{ + echo "Offline tool to draw codelet profile over a traced execution" + echo "" + echo "Usage: $PROGNAME distrib.data codelet_name" + echo "" + echo "Options:" + echo " -h, --help display this help and exit" + echo " -v, --version output version information and exit" + echo "" + echo "Report bugs to <@PACKAGE_BUGREPORT@>" + exit 1 +} + +if [ "$1" = "-v" ] || [ "$1" = "--version" ] ; then + echo "$PROGNAME (@PACKAGE_NAME@) @PACKAGE_VERSION@" + exit 0 +fi + +if [ "$1" = "-h" ] || [ "$1" = "--help" ] || [ "$2" = "" ] ; then + usage +fi + +inputfile=$1 +codelet_name=$2 + +archlist=`< $inputfile grep "^$codelet_name " | cut -f 2 | sort | uniq | xargs` + +# extract subfiles from the history file +for arch in $archlist +do + echo "Arch $arch" + grep "^$codelet_name $arch" $inputfile > $inputfile.$arch +done + +# create the gnuplot file + +gpfile=$inputfile.gp + +echo "#!/usr/bin/gnuplot -persist" > $gpfile +echo "set term postscript eps enhanced color" >> $gpfile +echo "set logscale x" >> $gpfile +echo "set logscale y" >> $gpfile +echo "set output \"$inputfile.eps\"" >> $gpfile +echo "set key top left" >> $gpfile +echo "set xlabel \"Total data size\"" >> $gpfile +echo "set ylabel \"Execution time (ms)\"" >> $gpfile + +echo -n "plot " >> $gpfile + +first=1 + +for arch in $archlist +do + if [ $first = 0 ] + then + echo -n " , " >> $gpfile + else + first=0 + fi + + echo -n " \"$inputfile.$arch\" using 3:5 title \"${codelet_name//_/\\\\_} arch $arch\"" >> $gpfile +done diff --git a/tools/starpu_config b/tools/starpu_config new file mode 100755 index 0000000..0b5e650 --- /dev/null +++ b/tools/starpu_config @@ -0,0 +1,86 @@ +#!/bin/bash +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +PROGNAME=$0 + +set -e + +usage() +{ + echo "Show the configuration used by StarPU." + echo "" + echo "Usage: $PROGNAME " + echo "" + echo "" + echo " The starpu_config utility shows all the configuration parameters used when installing StarPU" + echo "" + echo "Options:" + echo " -h, --help display this help and exit" + echo " -v, --version output version information and exit" + echo " -d only shows define parameters" + echo " -u only shows undefined parameters" + echo "" + echo " if parameters are given, only configuration parameters with the given name are displayed" + echo "" + echo "Report bugs to " + exit 0 +} + +if [ "$1" = "-v" ] || [ "$1" = "--version" ] ; then + echo "$PROGNAME (StarPU) 1.4.10" + exit 0 +fi + +if [ "$1" = "-h" ] || [ "$1" = "--help" ] ; then + usage +fi + +prefix=$(realpath /usr/local) +if test -d $prefix +then + starpu_datarootdir=$(realpath ${prefix}/share) + config_file="$starpu_datarootdir/starpu/starpu_config.cfg" + if test ! -f "$config_file" + then + config_file=$(realpath $(dirname $0))/starpu_config.cfg + fi +else + config_file=$(realpath $(dirname $0))/starpu_config.cfg +fi +if test ! -f "$config_file" +then + echo "Configuration file unavailable" + exit 1 +fi + +echo "processing $config_file" + +if test "$1" == "-d" +then + grep 'define' $config_file +elif test "$1" == "-u" +then + grep 'undef' $config_file +elif test "$1" +then + for x in $* + do + grep $x $config_file + done +else + sort $config_file +fi + diff --git a/tools/starpu_config.1 b/tools/starpu_config.1 new file mode 100644 index 0000000..80478a7 --- /dev/null +++ b/tools/starpu_config.1 @@ -0,0 +1,28 @@ +.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.3. +.TH STARPU_CONFIG "1" "December 2025" "StarPU 1.4.10" "User Commands" +.SH NAME +starpu_config \- Display StarPU configuration +.SH SYNOPSIS +.B starpu_config +\fI\,\/\fR +.SH DESCRIPTION +Show the configuration used by StarPU. +.IP +The starpu_config utility shows all the configuration parameters used when installing StarPU +.SH OPTIONS +.TP +\fB\-h\fR, \fB\-\-help\fR +display this help and exit +.TP +\fB\-v\fR, \fB\-\-version\fR +output version information and exit +.TP +\fB\-d\fR +only shows define parameters +.TP +\fB\-u\fR +only shows undefined parameters +.IP +if parameters are given, only configuration parameters with the given name are displayed +.SH "REPORTING BUGS" +Report bugs to diff --git a/tools/starpu_config.cfg b/tools/starpu_config.cfg new file mode 100644 index 0000000..2db1639 --- /dev/null +++ b/tools/starpu_config.cfg @@ -0,0 +1,203 @@ +/* #undef STARPURM_DLB_VERBOSE */ +/* #undef STARPURM_HAVE_DLB */ +/* #undef STARPURM_HAVE_DLB_CALLBACK_ARG */ +/* #undef STARPURM_STARPU_HAVE_WORKER_CALLBACKS */ +/* #undef STARPURM_VERBOSE */ +/* #undef STARPU_ARMPL */ +/* #undef STARPU_ATLAS */ +/* #undef STARPU_BUBBLE */ +/* #undef STARPU_BUBBLE_VERBOSE */ +/* #undef STARPU_BUILT_IN_MIN_DGELS */ +/* #undef STARPU_COVERITY */ +/* #undef STARPU_DATA_LOCALITY_ENFORCE */ +#define STARPU_DEBUG 1 +/* #undef STARPU_DEVEL */ +/* #undef STARPU_DISABLE_ASYNCHRONOUS_COPY */ +/* #undef STARPU_DISABLE_ASYNCHRONOUS_CUDA_COPY */ +/* #undef STARPU_DISABLE_ASYNCHRONOUS_MAX_FPGA_COPY */ +/* #undef STARPU_DISABLE_ASYNCHRONOUS_MPI_MS_COPY */ +/* #undef STARPU_DISABLE_ASYNCHRONOUS_OPENCL_COPY */ +/* #undef STARPU_DISABLE_ASYNCHRONOUS_TCPIP_MS_COPY */ +/* #undef STARPU_EXTRA_VERBOSE */ +/* #undef STARPU_FXT_LOCK_TRACES */ +#define STARPU_FXT_MAX_FILES 64 +#define STARPU_GDB_PATH "/usr/bin/gdb" +/* #undef STARPU_GOTO */ +#define STARPU_HAVE_ATOMIC_COMPARE_EXCHANGE_N 1 +#define STARPU_HAVE_ATOMIC_COMPARE_EXCHANGE_N_8 1 +#define STARPU_HAVE_ATOMIC_EXCHANGE_N 1 +#define STARPU_HAVE_ATOMIC_EXCHANGE_N_8 1 +#define STARPU_HAVE_ATOMIC_FETCH_ADD 1 +#define STARPU_HAVE_ATOMIC_FETCH_ADD_8 1 +#define STARPU_HAVE_ATOMIC_FETCH_OR 1 +#define STARPU_HAVE_ATOMIC_FETCH_OR_8 1 +#define STARPU_HAVE_ATOMIC_TEST_AND_SET 1 +#define STARPU_HAVE_BLAS 1 +#define STARPU_HAVE_BUSID 1 +#define STARPU_HAVE_CBLAS_H 1 +#define STARPU_HAVE_CUDA_CANMAPHOST 1 +#define STARPU_HAVE_CUDA_MEMCPY_PEER 1 +#define STARPU_HAVE_CUDA_MNGMEM 1 +#define STARPU_HAVE_CUDA_PAGEABLEMEM 1 +#define STARPU_HAVE_CUDA_POINTER_TYPE 1 +#define STARPU_HAVE_CUDA_UNIFIEDADDR 1 +#define STARPU_HAVE_CUFFTDOUBLECOMPLEX 1 +#define STARPU_HAVE_CURAND 1 +#define STARPU_HAVE_CXX11 1 +/* #undef STARPU_HAVE_DARWIN */ +#define STARPU_HAVE_DOMAINID 1 +/* #undef STARPU_HAVE_F77_H */ +#define STARPU_HAVE_FC 1 +#define STARPU_HAVE_FFTW 1 +#define STARPU_HAVE_FFTWF 1 +#define STARPU_HAVE_FFTWL 1 +#define STARPU_HAVE_GLPK_H 1 +/* #undef STARPU_HAVE_HDF5 */ +#define STARPU_HAVE_HELGRIND_H 1 +/* #undef STARPU_HAVE_HIP_MEMCPY_PEER */ +#define STARPU_HAVE_HWLOC 1 +/* #undef STARPU_HAVE_ICC */ +/* #undef STARPU_HAVE_LEVELDB */ +#define STARPU_HAVE_LIBCUBLASLT 1 +#define STARPU_HAVE_LIBCUSOLVER 1 +#define STARPU_HAVE_LIBCUSPARSE 1 +#define STARPU_HAVE_LIBNUMA 1 +/* #undef STARPU_HAVE_MAGMA */ +#define STARPU_HAVE_MALLOC_H 1 +#define STARPU_HAVE_MEMALIGN 1 +#define STARPU_HAVE_MEMCHECK_H 1 +#define STARPU_HAVE_MPI_COMM_CREATE_GROUP 1 +#define STARPU_HAVE_MPI_EXT 1 +/* #undef STARPU_HAVE_MPI_SYNC_CLOCKS */ +/* #undef STARPU_HAVE_MSG_MSG_H */ +#define STARPU_HAVE_NEARBYINTF 1 +#define STARPU_HAVE_NVML_H 1 +#define STARPU_HAVE_POSIX_MEMALIGN 1 +/* #undef STARPU_HAVE_POTI */ +#define STARPU_HAVE_PROGRAM_INVOCATION_SHORT_NAME 1 +#define STARPU_HAVE_PTHREAD_BARRIER 1 +#define STARPU_HAVE_PTHREAD_SETNAME_NP 1 +#define STARPU_HAVE_PTHREAD_SPIN_LOCK 1 +#define STARPU_HAVE_RINTF 1 +/* #undef STARPU_HAVE_S4U_ON_TIME_ADVANCE_CB */ +#define STARPU_HAVE_SCHED_YIELD 1 +#define STARPU_HAVE_SETENV 1 +/* #undef STARPU_HAVE_SIMGRID_ACTOR_H */ +/* #undef STARPU_HAVE_SIMGRID_BARRIER_H */ +/* #undef STARPU_HAVE_SIMGRID_COND_H */ +/* #undef STARPU_HAVE_SIMGRID_ENGINE_H */ +/* #undef STARPU_HAVE_SIMGRID_HOST_H */ +/* #undef STARPU_HAVE_SIMGRID_LINK_H */ +/* #undef STARPU_HAVE_SIMGRID_MSG_H */ +/* #undef STARPU_HAVE_SIMGRID_MUTEX_H */ +/* #undef STARPU_HAVE_SIMGRID_SEMAPHORE_H */ +/* #undef STARPU_HAVE_SIMGRID_SIMDAG_H */ +/* #undef STARPU_HAVE_SIMGRID_VERSION_H */ +/* #undef STARPU_HAVE_SIMGRID_ZONE_H */ +/* #undef STARPU_HAVE_SMX_ACTOR_T */ +#define STARPU_HAVE_STATEMENT_EXPRESSIONS 1 +#define STARPU_HAVE_STRERROR_R 1 +#define STARPU_HAVE_STRUCT_TIMESPEC 1 +#define STARPU_HAVE_SYNC_BOOL_COMPARE_AND_SWAP 1 +#define STARPU_HAVE_SYNC_BOOL_COMPARE_AND_SWAP_8 1 +#define STARPU_HAVE_SYNC_FETCH_AND_ADD 1 +#define STARPU_HAVE_SYNC_FETCH_AND_ADD_8 1 +#define STARPU_HAVE_SYNC_FETCH_AND_OR 1 +#define STARPU_HAVE_SYNC_FETCH_AND_OR_8 1 +#define STARPU_HAVE_SYNC_LOCK_TEST_AND_SET 1 +#define STARPU_HAVE_SYNC_SYNCHRONIZE 1 +#define STARPU_HAVE_SYNC_VAL_COMPARE_AND_SWAP 1 +#define STARPU_HAVE_SYNC_VAL_COMPARE_AND_SWAP_8 1 +#define STARPU_HAVE_UNISTD_H 1 +#define STARPU_HAVE_UNSETENV 1 +#define STARPU_HAVE_VALGRIND_H 1 +/* #undef STARPU_HAVE_WINDOWS */ +#define STARPU_HAVE_X11 1 +/* #undef STARPU_HAVE_XBT_BASE_H */ +/* #undef STARPU_HAVE_XBT_CONFIG_H */ +/* #undef STARPU_HAVE_XBT_SYNCHRO_H */ +#define STARPU_HISTORYMAXERROR 50 +#define STARPU_LINUX_SYS 1 +/* #undef STARPU_LONG_CHECK */ +#define STARPU_MAJOR_VERSION 1 +#define STARPU_MAXCPUS 128 +#define STARPU_MAXCUDADEVS 4 +#define STARPU_MAXHIPDEVS 8 +#define STARPU_MAXIMPLEMENTATIONS 4 +#define STARPU_MAXMAXFPGADEVS 12 +#define STARPU_MAXMPIDEVS 0 +#define STARPU_MAXNODES 16 +#define STARPU_MAXNUMANODES 2 +#define STARPU_MAXOPENCLDEVS 8 +#define STARPU_MAXTCPIPDEVS 0 +/* #undef STARPU_MEMORY_STATS */ +#define STARPU_MINOR_VERSION 4 +/* #undef STARPU_MKL */ +/* #undef STARPU_MLR_MODEL */ +/* #undef STARPU_MODEL_DEBUG */ +/* #undef STARPU_MPI_EXTRA_VERBOSE */ +/* #undef STARPU_MPI_PEDANTIC_ISEND */ +/* #undef STARPU_MPI_VERBOSE */ +/* #undef STARPU_NATIVE_WINTHREADS */ +/* #undef STARPU_NEW_CHECK */ +#define STARPU_NMAXBUFS 8 +#define STARPU_NMAXDEVS 8 +#define STARPU_NMAXWORKERS 160 +#define STARPU_NMAX_COMBINEDWORKERS 128 +#define STARPU_NMAX_SCHED_CTXS 10 +#define STARPU_NON_BLOCKING_DRIVERS 1 +/* #undef STARPU_NO_ASSERT */ +/* #undef STARPU_OPENBLAS */ +/* #undef STARPU_OPENBSD_SYS */ +/* #undef STARPU_OPENCL_SIMULATOR */ +/* #undef STARPU_OPENGL_RENDER */ +#define STARPU_OPENMP 1 +/* #undef STARPU_OPENMP_LLVM */ +/* #undef STARPU_PAPI */ +#define STARPU_PARALLEL_WORKER 1 +/* #undef STARPU_PERF_DEBUG */ +/* #undef STARPU_PERF_MODEL_DIR */ +#define STARPU_PROF_TOOL 1 +#define STARPU_PTHREAD_COND_INITIALIZER_ZERO 1 +#define STARPU_PTHREAD_MUTEX_INITIALIZER_ZERO 1 +#define STARPU_PTHREAD_RWLOCK_INITIALIZER_ZERO 1 +#define STARPU_PYTHON_HAVE_CLOUDPICKLE 1 +#define STARPU_PYTHON_HAVE_JOBLIB 1 +#define STARPU_PYTHON_HAVE_NUMPY 1 +#define STARPU_QUICK_CHECK 1 +#define STARPU_RELEASE_VERSION 10 +/* #undef STARPU_SC_HYPERVISOR_DEBUG */ +/* #undef STARPU_SIMGRID */ +/* #undef STARPU_SIMGRID_HAVE_SIMGRID_INIT */ +/* #undef STARPU_SIMGRID_HAVE_XBT_BARRIER_INIT */ +/* #undef STARPU_SIMGRID_MC */ +#define STARPU_SPINLOCK_CHECK 1 +/* #undef STARPU_STATIC_ONLY */ +#define STARPU_SYSTEM_BLAS 1 +#define STARPU_USE_ALLOCATION_CACHE 1 +/* #undef STARPU_USE_AYUDAME1 */ +/* #undef STARPU_USE_AYUDAME2 */ +#define STARPU_USE_CPU 1 +#define STARPU_USE_CUDA 1 +/* #undef STARPU_USE_CUDA0 */ +/* #undef STARPU_USE_CUDA1 */ +#define STARPU_USE_CUDA_MAP 1 +#define STARPU_USE_DRAND48 1 +#define STARPU_USE_ERAND48_R 1 +#define STARPU_USE_FXT 1 +/* #undef STARPU_USE_HIP */ +/* #undef STARPU_USE_HIPBLAS */ +/* #undef STARPU_USE_MAX_FPGA */ +/* #undef STARPU_USE_MP */ +#define STARPU_USE_MPI 1 +/* #undef STARPU_USE_MPI_FT */ +/* #undef STARPU_USE_MPI_FT_STATS */ +/* #undef STARPU_USE_MPI_MASTER_SLAVE */ +#define STARPU_USE_MPI_MPI 1 +/* #undef STARPU_USE_MPI_NMAD */ +#define STARPU_USE_OPENCL 1 +/* #undef STARPU_USE_SC_HYPERVISOR */ +/* #undef STARPU_USE_TCPIP_MASTER_SLAVE */ +/* #undef STARPU_VALGRIND_FULL */ +#define STARPU_VERBOSE 1 +/* #undef STARPU_WORKER_CALLBACKS */ diff --git a/tools/starpu_config.in b/tools/starpu_config.in new file mode 100644 index 0000000..223620f --- /dev/null +++ b/tools/starpu_config.in @@ -0,0 +1,86 @@ +#!@REALBASH@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +PROGNAME=$0 + +set -e + +usage() +{ + echo "Show the configuration used by StarPU." + echo "" + echo "Usage: $PROGNAME " + echo "" + echo "" + echo " The starpu_config utility shows all the configuration parameters used when installing StarPU" + echo "" + echo "Options:" + echo " -h, --help display this help and exit" + echo " -v, --version output version information and exit" + echo " -d only shows define parameters" + echo " -u only shows undefined parameters" + echo "" + echo " if parameters are given, only configuration parameters with the given name are displayed" + echo "" + echo "Report bugs to <@PACKAGE_BUGREPORT@>" + exit 0 +} + +if [ "$1" = "-v" ] || [ "$1" = "--version" ] ; then + echo "$PROGNAME (@PACKAGE_NAME@) @PACKAGE_VERSION@" + exit 0 +fi + +if [ "$1" = "-h" ] || [ "$1" = "--help" ] ; then + usage +fi + +prefix=$(realpath @prefix@) +if test -d $prefix +then + starpu_datarootdir=$(realpath @datarootdir@) + config_file="$starpu_datarootdir/starpu/starpu_config.cfg" + if test ! -f "$config_file" + then + config_file=$(realpath $(dirname $0))/starpu_config.cfg + fi +else + config_file=$(realpath $(dirname $0))/starpu_config.cfg +fi +if test ! -f "$config_file" +then + echo "Configuration file unavailable" + exit 1 +fi + +echo "processing $config_file" + +if test "$1" == "-d" +then + grep 'define' $config_file +elif test "$1" == "-u" +then + grep 'undef' $config_file +elif test "$1" +then + for x in $* + do + grep $x $config_file + done +else + sort $config_file +fi + diff --git a/tools/starpu_env b/tools/starpu_env new file mode 100755 index 0000000..152e779 --- /dev/null +++ b/tools/starpu_env @@ -0,0 +1,93 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +PROGNAME=starpu_env + +usage() +{ + echo "Tool to set StarPU environment variables" + echo "" + echo "Usage: source $PROGNAME" + echo "" + echo "" + echo "Options:" + echo " -h, --help display this help and exit" + echo " -v, --version output version information and exit" + echo " -d directory overwrite StarPU installation directory" + echo "" + echo "Report bugs to " +} + +if [ "$1" = "-v" ] || [ "$1" = "--version" ] +then + echo "$PROGNAME (StarPU) 1.4.10" +elif [ "$1" = "-h" ] || [ "$1" = "--help" ] +then + usage +elif [ "$1" = "-d" ] +then + if [ "$2" != "" ] + then + # if a argument is given for the StarPU root directory, suppose sub-directories are in standard location + prefix=$(realpath $2) + exec_prefix=$prefix + starpu_bindir=${exec_prefix}/bin + starpu_libdir=${exec_prefix}/lib + starpu_datarootdir=${exec_prefix}/share + fi +else + prefix=$(realpath /usr/local) + exec_prefix=$(realpath ${prefix}) + starpu_bindir=$(realpath ${exec_prefix}/bin) + starpu_libdir=$(realpath ${exec_prefix}/lib) + starpu_datarootdir=$(realpath ${prefix}/share) +fi + +if test -f $starpu_bindir/starpu_machine_display -a -f $starpu_libdir/pkgconfig/libstarpu.pc -a -d $starpu_bindir -a -d $starpu_libdir -a -d $starpu_datarootdir +then + echo "Setting StarPU environment for $prefix" + export STARPU_ROOT=$prefix + export PKG_CONFIG_PATH=$starpu_libdir/pkgconfig:$PKG_CONFIG_PATH + export LD_LIBRARY_PATH=$starpu_libdir:$LD_LIBRARY_PATH + export PATH=$starpu_bindir:$PATH + export MANPATH=$starpu_datarootdir/man:$MANPATH + if [ -n "python3" ] + then + for d in $starpu_libdir/python3*/site-packages ; do export PYTHONPATH=$d:$PYTHONPATH ; done + fi +else + echo "[Error] '$prefix' is not a valid StarPU installation directory ..." + if test ! -f $starpu_bindir/starpu_machine_display + then + echo " ... cannot find executable '$starpu_bindir/starpu_machine_display'" + fi + if test ! -f $starpu_libdir/pkgconfig/libstarpu.pc + then + echo " ... cannot find file '$starpu_libdir/pkgconfig/libstarpu.pc'" + fi + if test ! -d $starpu_bindir + then + echo " ... cannot find directory '$starpu_bindir'" + fi + if test ! -d $starpu_libdir + then + echo " ... cannot find directory '$starpu_libdir'" + fi + if test ! -d $starpu_datarootdir + then + echo " ... cannot find directory '$starpu_datarootdir'" + fi +fi diff --git a/tools/starpu_env.1 b/tools/starpu_env.1 new file mode 100644 index 0000000..31b3cd2 --- /dev/null +++ b/tools/starpu_env.1 @@ -0,0 +1,32 @@ +.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.3. +.TH STARPU_ENV "1" "December 2025" "StarPU 1.4.10" "User Commands" +.SH NAME +starpu_env \- Set StarPU environment variables +.SH SYNOPSIS +.B source +\fI\,starpu_env\/\fR +.SH DESCRIPTION +Tool to set StarPU environment variables +.SH OPTIONS +.TP +\fB\-h\fR, \fB\-\-help\fR +display this help and exit +.TP +\fB\-v\fR, \fB\-\-version\fR +output version information and exit +.TP +\fB\-d\fR directory +overwrite StarPU installation directory +.SH "REPORTING BUGS" +Report bugs to +\&./starpu_env: 59: test: \fB\-a\fR: unexpected operator +[Error] '' is not a valid StarPU installation directory ... +.IP +\&... cannot find executable '/starpu_machine_display' +\&... cannot find file '/pkgconfig/libstarpu.pc' +.PP +\&./starpu_env: 59: test: \fB\-a\fR: unexpected operator +[Error] '' is not a valid StarPU installation directory ... +.IP +\&... cannot find executable '/starpu_machine_display' +\&... cannot find file '/pkgconfig/libstarpu.pc' diff --git a/tools/starpu_env.in b/tools/starpu_env.in new file mode 100755 index 0000000..75676e4 --- /dev/null +++ b/tools/starpu_env.in @@ -0,0 +1,93 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +PROGNAME=starpu_env + +usage() +{ + echo "Tool to set StarPU environment variables" + echo "" + echo "Usage: source $PROGNAME" + echo "" + echo "" + echo "Options:" + echo " -h, --help display this help and exit" + echo " -v, --version output version information and exit" + echo " -d directory overwrite StarPU installation directory" + echo "" + echo "Report bugs to <@PACKAGE_BUGREPORT@>" +} + +if [ "$1" = "-v" ] || [ "$1" = "--version" ] +then + echo "$PROGNAME (@PACKAGE_NAME@) @PACKAGE_VERSION@" +elif [ "$1" = "-h" ] || [ "$1" = "--help" ] +then + usage +elif [ "$1" = "-d" ] +then + if [ "$2" != "" ] + then + # if a argument is given for the StarPU root directory, suppose sub-directories are in standard location + prefix=$(realpath $2) + exec_prefix=$prefix + starpu_bindir=${exec_prefix}/bin + starpu_libdir=${exec_prefix}/lib + starpu_datarootdir=${exec_prefix}/share + fi +else + prefix=$(realpath @prefix@) + exec_prefix=$(realpath @exec_prefix@) + starpu_bindir=$(realpath @bindir@) + starpu_libdir=$(realpath @libdir@) + starpu_datarootdir=$(realpath @datarootdir@) +fi + +if test -f $starpu_bindir/starpu_machine_display -a -f $starpu_libdir/pkgconfig/libstarpu.pc -a -d $starpu_bindir -a -d $starpu_libdir -a -d $starpu_datarootdir +then + echo "Setting StarPU environment for $prefix" + export STARPU_ROOT=$prefix + export PKG_CONFIG_PATH=$starpu_libdir/pkgconfig:$PKG_CONFIG_PATH + export LD_LIBRARY_PATH=$starpu_libdir:$LD_LIBRARY_PATH + export PATH=$starpu_bindir:$PATH + export MANPATH=$starpu_datarootdir/man:$MANPATH + if [ -n "@PYTHON@" ] + then + for d in $starpu_libdir/@PYTHON@*/site-packages ; do export PYTHONPATH=$d:$PYTHONPATH ; done + fi +else + echo "[Error] '$prefix' is not a valid StarPU installation directory ..." + if test ! -f $starpu_bindir/starpu_machine_display + then + echo " ... cannot find executable '$starpu_bindir/starpu_machine_display'" + fi + if test ! -f $starpu_libdir/pkgconfig/libstarpu.pc + then + echo " ... cannot find file '$starpu_libdir/pkgconfig/libstarpu.pc'" + fi + if test ! -d $starpu_bindir + then + echo " ... cannot find directory '$starpu_bindir'" + fi + if test ! -d $starpu_libdir + then + echo " ... cannot find directory '$starpu_libdir'" + fi + if test ! -d $starpu_datarootdir + then + echo " ... cannot find directory '$starpu_datarootdir'" + fi +fi diff --git a/tools/starpu_fxt_data_trace.1 b/tools/starpu_fxt_data_trace.1 new file mode 100644 index 0000000..646c4d2 --- /dev/null +++ b/tools/starpu_fxt_data_trace.1 @@ -0,0 +1,26 @@ +.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.3. +.TH STARPU_FXT_DATA_TRACE "1" "December 2025" "StarPU 1.4.10" "User Commands" +.SH NAME +starpu_fxt_data_trace \- Print data trace from raw StarPU FxT trace +.SH SYNOPSIS +.B starpu_fxt_data_trace +[ \fI\,options \/\fR] \fI\, \/\fR[\fI\, \/\fR.... \fI\,\/\fR] +.SH DESCRIPTION +Get statistics about tasks lengths and data size +.SH OPTIONS +.TP +\fB\-h\fR, \fB\-\-help\fR +display this help and exit +.TP +\fB\-v\fR, \fB\-\-version\fR +output version information and exit +.HP +\fB\-d\fR directory where to save output files (by default current directory) +.TP +filename +specify the FxT trace input file. +.TP +codeletX +specify the codelet name to profile (by default, all codelets are profiled) +.SH "REPORTING BUGS" +Report bugs to . diff --git a/tools/starpu_fxt_data_trace.c b/tools/starpu_fxt_data_trace.c new file mode 100644 index 0000000..e003c5b --- /dev/null +++ b/tools/starpu_fxt_data_trace.c @@ -0,0 +1,199 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013 Joris Pablo + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include + +#define PROGNAME "starpu_fxt_data_trace" +#define MAX_LINE_SIZE 100 + +static void usage() +{ + fprintf(stderr, "Get statistics about tasks lengths and data size\n\n"); + fprintf(stderr, "Usage: %s [ options ] [ .... ]\n", PROGNAME); + fprintf(stderr, "\n"); + fprintf(stderr, "Options:\n"); + fprintf(stderr, " -h, --help display this help and exit\n"); + fprintf(stderr, " -v, --version output version information and exit\n\n"); + fprintf(stderr, " -d directory where to save output files (by default current directory)\n"); + fprintf(stderr, " filename specify the FxT trace input file.\n"); + fprintf(stderr, " codeletX specify the codelet name to profile (by default, all codelets are profiled)\n"); + fprintf(stderr, "Report bugs to <%s>.", PACKAGE_BUGREPORT); + fprintf(stderr, "\n"); +} + +static int parse_args(int argc, char **argv, int *pos, char **directory) +{ + int i; + + if(argc < 2) + { + fprintf(stderr, "Incorrect usage, aborting\n"); + usage(); + return 77; + } + + for (i = 1; i < argc; i++) + { + if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i], "--help") == 0) + { + usage(); + exit(EXIT_FAILURE); + } + + if (strcmp(argv[i], "-v") == 0 || strcmp(argv[i], "--version") == 0) + { + fputs(PROGNAME " (" PACKAGE_NAME ") " PACKAGE_VERSION "\n", stderr); + exit(EXIT_FAILURE); + } + + if (strcmp(argv[i], "-d") == 0) + { + free(*directory); + *directory = strdup(argv[++i]); + *pos += 2; + continue; + } + + } + return 0; +} + +static void write_gp(char *dir, int argc, char **argv) +{ + char codelet_filename[256]; + snprintf(codelet_filename, sizeof(codelet_filename), "%s/codelet_list", dir); + FILE *codelet_list = fopen(codelet_filename, "r"); + if(!codelet_list) + { + STARPU_ABORT_MSG("Failed to open '%s' (err %s)", codelet_filename, strerror(errno)); + exit(-1); + } + char codelet_name[MAX_LINE_SIZE]; + char file_name[256]; + snprintf(file_name, sizeof(file_name), "%s/data_trace.gp", dir); + FILE *plt = fopen(file_name, "w+"); + if(!plt) + { + STARPU_ABORT_MSG("Failed to open '%s' (err %s)", file_name, strerror(errno)); + exit(-1); + } + + fprintf(plt, "#!/usr/bin/gnuplot -persist\n\n"); + fprintf(plt, "set term postscript eps enhanced color\n"); + fprintf(plt, "set output \"%s/data_trace.eps\"\n", dir); + fprintf(plt, "set title \"Data trace\"\n"); + fprintf(plt, "set logscale x\n"); + fprintf(plt, "set logscale y\n"); + fprintf(plt, "set xlabel \"data size (B)\"\n"); + fprintf(plt, "set ylabel \"tasks size (ms)\"\n"); + fprintf(plt, "plot "); + int c_iter; + char *v_iter; + int begin = 1; + while(fgets(codelet_name, MAX_LINE_SIZE, codelet_list) != NULL) + { + if(argc == 0) + { + if(begin) + begin = 0; + else + fprintf(plt, ", "); + } + int size = strlen(codelet_name); + if(size > 0) + codelet_name[size-1] = '\0'; + if(argc != 0) + { + for(c_iter = 0, v_iter = argv[c_iter]; + c_iter < argc; + c_iter++, v_iter = argv[c_iter]) + { + if(!strcmp(v_iter, codelet_name)) + { + if(begin) + begin = 0; + else + fprintf(plt, ", "); + fprintf(plt, "\"%s\" using 2:1 with dots lw 1 title \"%s\"", codelet_name, codelet_name); + } + } + } + else + { + fprintf(plt, "\"%s/%s\" using 2:1 with dots lw 1 title \"%s\"", dir, codelet_name, codelet_name); + } + } + fprintf(plt, "\n"); + + if(fclose(codelet_list)) + { + perror("close failed :"); + exit(-1); + } + + if(fclose(plt)) + { + perror("close failed :"); + exit(-1); + } + + struct stat sb; + int ret = stat(file_name, &sb); + if (ret) + { + perror("stat"); + STARPU_ABORT(); + } + + /* Make the gnuplot script executable for the owner */ + ret = chmod(file_name, sb.st_mode|S_IXUSR +#ifdef S_IXGRP + |S_IXGRP +#endif +#ifdef S_IXOTH + |S_IXOTH +#endif + ); + + if (ret) + { + perror("chmod"); + STARPU_ABORT(); + } + fprintf(stdout, "Gnuplot file <%s/data_trace.gp> has been successfully created.\n", dir); +} + +int main(int argc, char **argv) +{ + char *directory = strdup("."); + int pos=0; + int ret = parse_args(argc, argv, &pos, &directory); + if (ret) + { + free(directory); + return ret; + } + starpu_fxt_write_data_trace_in_dir(argv[1+pos], directory); + write_gp(directory, argc - (2 + pos), argv + 2 + pos); + starpu_perfmodel_free_sampling(); + free(directory); + return 0; +} diff --git a/tools/starpu_fxt_number_events_to_names.1 b/tools/starpu_fxt_number_events_to_names.1 new file mode 100644 index 0000000..e349ae3 --- /dev/null +++ b/tools/starpu_fxt_number_events_to_names.1 @@ -0,0 +1,18 @@ +.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.3. +.TH STARPU_FXT_NUMBER_EVENTS_TO_NAMES.PY "1" "December 2025" "StarPU 1.4.10" "User Commands" +.SH NAME +starpu_fxt_number_events_to_names.py \- Convert events in StarPU traces +.SH SYNOPSIS +.B starpu_fxt_number_events_to_names.py +\fI\,\/\fR +.SH DESCRIPTION +Convert event keys in number_events.data to event names +.SH OPTIONS +.TP +\fB\-h\fR, \fB\-\-help\fR +display this help and exit +.TP +\fB\-v\fR, \fB\-\-version\fR +output version information and exit +.SH "REPORTING BUGS" +Report bugs to diff --git a/tools/starpu_fxt_number_events_to_names.py b/tools/starpu_fxt_number_events_to_names.py new file mode 100755 index 0000000..08fdf73 --- /dev/null +++ b/tools/starpu_fxt_number_events_to_names.py @@ -0,0 +1,255 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +import sys + +""" +Convert event keys into event names +Running starpu_fxt_tool with the option -number-events produces a file number_events.data +This file contains the number of events for each event type. +Events are represented with their key. +To convert event keys to event names, call starpu_fxt_number_events_to_names.py +""" + +# STARPU_FXT_EVENT_DEFINES is generated by configure and is the output of +# the following command: +# grep -E "#define\s+_STARPU_(MPI_)?FUT_" src/common/fxt.h mpi/src/starpu_mpi_fxt.h | grep 0x | grep -v 0x1 |cut -d : -f 2 + +fxt_codes_raw = """ +#define _STARPU_FUT_WORKER_INIT_START 0x5100 +#define _STARPU_FUT_WORKER_INIT_END 0x5101 +#define _STARPU_FUT_START_CODELET_BODY 0x5102 +#define _STARPU_FUT_END_CODELET_BODY 0x5103 +#define _STARPU_FUT_JOB_PUSH 0x5104 +#define _STARPU_FUT_JOB_POP 0x5105 +#define _STARPU_FUT_UPDATE_TASK_CNT 0x5106 +#define _STARPU_FUT_START_FETCH_INPUT_ON_TID 0x5107 +#define _STARPU_FUT_END_FETCH_INPUT_ON_TID 0x5108 +#define _STARPU_FUT_START_PUSH_OUTPUT_ON_TID 0x5109 +#define _STARPU_FUT_END_PUSH_OUTPUT_ON_TID 0x5110 +#define _STARPU_FUT_TAG 0x5111 +#define _STARPU_FUT_TAG_DEPS 0x5112 +#define _STARPU_FUT_TASK_DEPS 0x5113 +#define _STARPU_FUT_DATA_COPY 0x5114 +#define _STARPU_FUT_WORK_STEALING 0x5115 +#define _STARPU_FUT_WORKER_DEINIT_START 0x5116 +#define _STARPU_FUT_WORKER_DEINIT_END 0x5117 +#define _STARPU_FUT_WORKER_SLEEP_START 0x5118 +#define _STARPU_FUT_WORKER_SLEEP_END 0x5119 +#define _STARPU_FUT_TASK_SUBMIT 0x511a +#define _STARPU_FUT_CODELET_DATA_HANDLE 0x511b +#define _STARPU_FUT_MODEL_NAME 0x511c +#define _STARPU_FUT_DATA_NAME 0x511d +#define _STARPU_FUT_DATA_COORDINATES 0x511e +#define _STARPU_FUT_HANDLE_DATA_UNREGISTER 0x511f +#define _STARPU_FUT_CODELET_DATA_HANDLE_NUMA_ACCESS 0x5120 +#define _STARPU_FUT_NEW_MEM_NODE 0x5122 +#define _STARPU_FUT_START_CALLBACK 0x5123 +#define _STARPU_FUT_END_CALLBACK 0x5124 +#define _STARPU_FUT_TASK_DONE 0x5125 +#define _STARPU_FUT_TAG_DONE 0x5126 +#define _STARPU_FUT_START_ALLOC 0x5127 +#define _STARPU_FUT_END_ALLOC 0x5128 +#define _STARPU_FUT_START_ALLOC_REUSE 0x5129 +#define _STARPU_FUT_END_ALLOC_REUSE 0x5130 +#define _STARPU_FUT_USED_MEM 0x512a +#define _STARPU_FUT_TASK_NAME 0x512b +#define _STARPU_FUT_DATA_WONT_USE 0x512c +#define _STARPU_FUT_TASK_COLOR 0x512d +#define _STARPU_FUT_DATA_DOING_WONT_USE 0x512e +#define _STARPU_FUT_TASK_LINE 0x512f +#define _STARPU_FUT_START_MEMRECLAIM 0x5131 +#define _STARPU_FUT_END_MEMRECLAIM 0x5132 +#define _STARPU_FUT_START_DRIVER_COPY 0x5133 +#define _STARPU_FUT_END_DRIVER_COPY 0x5134 +#define _STARPU_FUT_START_DRIVER_COPY_ASYNC 0x5135 +#define _STARPU_FUT_END_DRIVER_COPY_ASYNC 0x5136 +#define _STARPU_FUT_START_PROGRESS_ON_TID 0x5137 +#define _STARPU_FUT_END_PROGRESS_ON_TID 0x5138 +#define _STARPU_FUT_USER_EVENT 0x5139 +#define _STARPU_FUT_SET_PROFILING 0x513a +#define _STARPU_FUT_TASK_WAIT_FOR_ALL 0x513b +#define _STARPU_FUT_EVENT 0x513c +#define _STARPU_FUT_THREAD_EVENT 0x513d +#define _STARPU_FUT_CODELET_DETAILS 0x513e +#define _STARPU_FUT_CODELET_DATA 0x513f +#define _STARPU_FUT_LOCKING_MUTEX 0x5140 +#define _STARPU_FUT_MUTEX_LOCKED 0x5141 +#define _STARPU_FUT_UNLOCKING_MUTEX 0x5142 +#define _STARPU_FUT_MUTEX_UNLOCKED 0x5143 +#define _STARPU_FUT_TRYLOCK_MUTEX 0x5144 +#define _STARPU_FUT_RDLOCKING_RWLOCK 0x5145 +#define _STARPU_FUT_RWLOCK_RDLOCKED 0x5146 +#define _STARPU_FUT_WRLOCKING_RWLOCK 0x5147 +#define _STARPU_FUT_RWLOCK_WRLOCKED 0x5148 +#define _STARPU_FUT_UNLOCKING_RWLOCK 0x5149 +#define _STARPU_FUT_RWLOCK_UNLOCKED 0x514a +#define _STARPU_FUT_LOCKING_SPINLOCK 0x514b +#define _STARPU_FUT_SPINLOCK_LOCKED 0x514c +#define _STARPU_FUT_UNLOCKING_SPINLOCK 0x514d +#define _STARPU_FUT_SPINLOCK_UNLOCKED 0x514e +#define _STARPU_FUT_TRYLOCK_SPINLOCK 0x514f +#define _STARPU_FUT_COND_WAIT_BEGIN 0x5150 +#define _STARPU_FUT_COND_WAIT_END 0x5151 +#define _STARPU_FUT_MEMORY_FULL 0x5152 +#define _STARPU_FUT_DATA_LOAD 0x5153 +#define _STARPU_FUT_START_UNPARTITION_ON_TID 0x5154 +#define _STARPU_FUT_END_UNPARTITION_ON_TID 0x5155 +#define _STARPU_FUT_START_FREE 0x5156 +#define _STARPU_FUT_END_FREE 0x5157 +#define _STARPU_FUT_START_WRITEBACK 0x5158 +#define _STARPU_FUT_END_WRITEBACK 0x5159 +#define _STARPU_FUT_SCHED_COMPONENT_PUSH_PRIO 0x515a +#define _STARPU_FUT_SCHED_COMPONENT_POP_PRIO 0x515b +#define _STARPU_FUT_START_WRITEBACK_ASYNC 0x515c +#define _STARPU_FUT_END_WRITEBACK_ASYNC 0x515d +#define _STARPU_FUT_HYPERVISOR_BEGIN 0x5160 +#define _STARPU_FUT_HYPERVISOR_END 0x5161 +#define _STARPU_FUT_BARRIER_WAIT_BEGIN 0x5162 +#define _STARPU_FUT_BARRIER_WAIT_END 0x5163 +#define _STARPU_FUT_WORKER_SCHEDULING_START 0x5164 +#define _STARPU_FUT_WORKER_SCHEDULING_END 0x5165 +#define _STARPU_FUT_WORKER_SCHEDULING_PUSH 0x5166 +#define _STARPU_FUT_WORKER_SCHEDULING_POP 0x5167 +#define _STARPU_FUT_START_EXECUTING 0x5168 +#define _STARPU_FUT_END_EXECUTING 0x5169 +#define _STARPU_FUT_SCHED_COMPONENT_NEW 0x516a +#define _STARPU_FUT_SCHED_COMPONENT_CONNECT 0x516b +#define _STARPU_FUT_SCHED_COMPONENT_PUSH 0x516c +#define _STARPU_FUT_SCHED_COMPONENT_PULL 0x516d +#define _STARPU_FUT_TASK_SUBMIT_START 0x516e +#define _STARPU_FUT_TASK_SUBMIT_END 0x516f +#define _STARPU_FUT_TASK_BUILD_START 0x5170 +#define _STARPU_FUT_TASK_BUILD_END 0x5171 +#define _STARPU_FUT_TASK_MPI_DECODE_START 0x5172 +#define _STARPU_FUT_TASK_MPI_DECODE_END 0x5173 +#define _STARPU_FUT_TASK_MPI_PRE_START 0x5174 +#define _STARPU_FUT_TASK_MPI_PRE_END 0x5175 +#define _STARPU_FUT_TASK_MPI_POST_START 0x5176 +#define _STARPU_FUT_TASK_MPI_POST_END 0x5177 +#define _STARPU_FUT_TASK_WAIT_START 0x5178 +#define _STARPU_FUT_TASK_WAIT_END 0x5179 +#define _STARPU_FUT_TASK_WAIT_FOR_ALL_START 0x517a +#define _STARPU_FUT_TASK_WAIT_FOR_ALL_END 0x517b +#define _STARPU_FUT_HANDLE_DATA_REGISTER 0x517c +#define _STARPU_FUT_START_FETCH_INPUT 0x517e +#define _STARPU_FUT_END_FETCH_INPUT 0x517f +#define _STARPU_FUT_TASK_THROTTLE_START 0x5180 +#define _STARPU_FUT_TASK_THROTTLE_END 0x5181 +#define _STARPU_FUT_DATA_STATE_INVALID 0x5182 +#define _STARPU_FUT_DATA_STATE_OWNER 0x5183 +#define _STARPU_FUT_DATA_STATE_SHARED 0x5184 +#define _STARPU_FUT_DATA_REQUEST_CREATED 0x5185 +#define _STARPU_FUT_PAPI_TASK_EVENT_VALUE 0x5186 +#define _STARPU_FUT_TASK_EXCLUDE_FROM_DAG 0x5187 +#define _STARPU_FUT_TASK_END_DEP 0x5188 +#define _STARPU_FUT_TASK_BUBBLE 0x5189 +#define _STARPU_FUT_START_PARALLEL_SYNC 0x518a +#define _STARPU_FUT_END_PARALLEL_SYNC 0x518b +#define _STARPU_MPI_FUT_START 0x5201 +#define _STARPU_MPI_FUT_STOP 0x5202 +#define _STARPU_MPI_FUT_BARRIER 0x5203 +#define _STARPU_MPI_FUT_ISEND_SUBMIT_BEGIN 0x5204 +#define _STARPU_MPI_FUT_ISEND_SUBMIT_END 0x5205 +#define _STARPU_MPI_FUT_IRECV_SUBMIT_BEGIN 0x5206 +#define _STARPU_MPI_FUT_IRECV_SUBMIT_END 0x5207 +#define _STARPU_MPI_FUT_ISEND_COMPLETE_BEGIN 0x5208 +#define _STARPU_MPI_FUT_ISEND_COMPLETE_END 0x5209 +#define _STARPU_MPI_FUT_DATA_SET_RANK 0x521a +#define _STARPU_MPI_FUT_IRECV_TERMINATED 0x521b +#define _STARPU_MPI_FUT_ISEND_TERMINATED 0x521c +#define _STARPU_MPI_FUT_TESTING_DETACHED_BEGIN 0x521d +#define _STARPU_MPI_FUT_TESTING_DETACHED_END 0x521e +#define _STARPU_MPI_FUT_TEST_BEGIN 0x521f +#define _STARPU_MPI_FUT_TEST_END 0x5220 +#define _STARPU_MPI_FUT_IRECV_COMPLETE_BEGIN 0x520a +#define _STARPU_MPI_FUT_IRECV_COMPLETE_END 0x520b +#define _STARPU_MPI_FUT_SLEEP_BEGIN 0x520c +#define _STARPU_MPI_FUT_SLEEP_END 0x520d +#define _STARPU_MPI_FUT_DTESTING_BEGIN 0x520e +#define _STARPU_MPI_FUT_DTESTING_END 0x520f +#define _STARPU_MPI_FUT_UTESTING_BEGIN 0x5210 +#define _STARPU_MPI_FUT_UTESTING_END 0x5211 +#define _STARPU_MPI_FUT_UWAIT_BEGIN 0x5212 +#define _STARPU_MPI_FUT_UWAIT_END 0x5213 +#define _STARPU_MPI_FUT_POLLING_BEGIN 0x5214 +#define _STARPU_MPI_FUT_POLLING_END 0x5215 +#define _STARPU_MPI_FUT_DRIVER_RUN_BEGIN 0x5216 +#define _STARPU_MPI_FUT_DRIVER_RUN_END 0x5217 +#define _STARPU_MPI_FUT_DATA_SET_TAG 0x5218 +#define _STARPU_MPI_FUT_IRECV_NUMA_NODE 0x5219 +#define _STARPU_MPI_FUT_ISEND_NUMA_NODE 0x5221 +#define _STARPU_MPI_FUT_CHECKPOINT_BEGIN 0x5222 +#define _STARPU_MPI_FUT_CHECKPOINT_END 0x5223 +""" + +PROGNAME=sys.argv[0] + +number_events_path = None + +def usage(): + print("Convert event keys in number_events.data to event names") + print("") + print("Usage: %s " % PROGNAME) + print("") + print("Options:") + print(" -h, --help display this help and exit") + print(" -v, --version output version information and exit") + print("") + print("Report bugs to ") + sys.exit(1) + +if len(sys.argv) == 2: + if sys.argv[1] == '-v' or sys.argv[1] == '--version': + print("%s (StarPU) 1.4.10" % PROGNAME) + sys.exit(0) + elif sys.argv[1] == '-h' or sys.argv[1] == '--help': + usage() + else: + number_events_path = sys.argv[1] +else: + usage() + +# Process fxt_code_raw content to ease the conversion: +fxt_codes = dict() +for line in fxt_codes_raw.split("\n"): + elements = line.split() + + if len(elements) == 3: + key = int(elements[2][2:], 16) + assert key not in fxt_codes + + fxt_codes[key] = elements[1] + + +# Convert content of the file: +nb_events = 0 + +with open(number_events_path, 'r') as f: + for line in f: + elements = line.split() + if len(elements) == 2: + key = int(elements[0][2:], 16) + nb = int(elements[1]) + nb_events += nb + if key in fxt_codes: + print("%12d %s" % (nb, fxt_codes[key])) + else: + print("%12d %s" % (nb, elements[0])) + +print(" TOTAL: %d" % nb_events) diff --git a/tools/starpu_fxt_number_events_to_names.py.in b/tools/starpu_fxt_number_events_to_names.py.in new file mode 100644 index 0000000..037774e --- /dev/null +++ b/tools/starpu_fxt_number_events_to_names.py.in @@ -0,0 +1,91 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +import sys + +""" +Convert event keys into event names +Running starpu_fxt_tool with the option -number-events produces a file number_events.data +This file contains the number of events for each event type. +Events are represented with their key. +To convert event keys to event names, call starpu_fxt_number_events_to_names.py +""" + +# STARPU_FXT_EVENT_DEFINES is generated by configure and is the output of +# the following command: +# grep -E "#define\s+_STARPU_(MPI_)?FUT_" src/common/fxt.h mpi/src/starpu_mpi_fxt.h | grep 0x | grep -v 0x1 |cut -d : -f 2 + +fxt_codes_raw = """ +@STARPU_FXT_EVENT_DEFINES@ +""" + +PROGNAME=sys.argv[0] + +number_events_path = None + +def usage(): + print("Convert event keys in number_events.data to event names") + print("") + print("Usage: %s " % PROGNAME) + print("") + print("Options:") + print(" -h, --help display this help and exit") + print(" -v, --version output version information and exit") + print("") + print("Report bugs to <@PACKAGE_BUGREPORT@>") + sys.exit(1) + +if len(sys.argv) == 2: + if sys.argv[1] == '-v' or sys.argv[1] == '--version': + print("%s (@PACKAGE_NAME@) @PACKAGE_VERSION@" % PROGNAME) + sys.exit(0) + elif sys.argv[1] == '-h' or sys.argv[1] == '--help': + usage() + else: + number_events_path = sys.argv[1] +else: + usage() + +# Process fxt_code_raw content to ease the conversion: +fxt_codes = dict() +for line in fxt_codes_raw.split("\n"): + elements = line.split() + + if len(elements) == 3: + key = int(elements[2][2:], 16) + assert key not in fxt_codes + + fxt_codes[key] = elements[1] + + +# Convert content of the file: +nb_events = 0 + +with open(number_events_path, 'r') as f: + for line in f: + elements = line.split() + if len(elements) == 2: + key = int(elements[0][2:], 16) + nb = int(elements[1]) + nb_events += nb + if key in fxt_codes: + print("%12d %s" % (nb, fxt_codes[key])) + else: + print("%12d %s" % (nb, elements[0])) + +print(" TOTAL: %d" % nb_events) diff --git a/tools/starpu_fxt_stats.1 b/tools/starpu_fxt_stats.1 new file mode 100644 index 0000000..6ecb199 --- /dev/null +++ b/tools/starpu_fxt_stats.1 @@ -0,0 +1,27 @@ +.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.3. +.TH STARPU_FXT_STAT "1" "December 2025" "StarPU 1.4.10" "User Commands" +.SH NAME +starpu_fxt_stat \- Print statistics from raw StarPU FxT trace +.SH SYNOPSIS +.B starpu_fxt_stat +[ \fI\,options \/\fR] +.SH DESCRIPTION +Parse the log generated by FxT +.SH OPTIONS +.TP +\fB\-i\fR +specify the input file. +.TP +\fB\-o\fR +specify the output file +.TP +\fB\-h\fR, \fB\-\-help\fR +display this help and exit +.TP +\fB\-v\fR, \fB\-\-version\fR +output version information and exit +.SH "REPORTING BUGS" +Report bugs to . +open failed :: Bad address +.PP +open failed :: Bad address diff --git a/tools/starpu_fxt_stats.c b/tools/starpu_fxt_stats.c new file mode 100644 index 0000000..c37cdf0 --- /dev/null +++ b/tools/starpu_fxt_stats.c @@ -0,0 +1,220 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +//#include "fxt_tool.h" + +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +static fxt_t fut; +struct fxt_ev_64 ev; + +static uint64_t transfers[16][16]; + +#define PROGNAME "starpu_fxt_stat" + +static void usage() +{ + fprintf(stderr, "Parse the log generated by FxT\n\n"); + fprintf(stderr, "Usage: %s [ options ]\n", PROGNAME); + fprintf(stderr, "\n"); + fprintf(stderr, "Options:\n"); + fprintf(stderr, " -i specify the input file.\n"); + fprintf(stderr, " -o specify the output file\n"); + fprintf(stderr, " -h, --help display this help and exit\n"); + fprintf(stderr, " -v, --version output version information and exit\n\n"); + fprintf(stderr, "Report bugs to <%s>.", PACKAGE_BUGREPORT); + fprintf(stderr, "\n"); +} + +static int parse_args(int argc, char **argv, char **fin, char **fout) +{ + int i; + + *fin = NULL; + *fout = NULL; + for (i = 1; i < argc; i++) + { + if (strcmp(argv[i], "-o") == 0) + { + *fout = argv[++i]; + continue; + } + + if (strcmp(argv[i], "-i") == 0) + { + *fin = argv[++i]; + continue; + } + + if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i], "--help") == 0) + { + usage(); + return EXIT_SUCCESS; + } + + if (strcmp(argv[i], "-v") == 0 || strcmp(argv[i], "--version") == 0) + { + fputs(PROGNAME " (" PACKAGE_NAME ") " PACKAGE_VERSION "\n", stderr); + return EXIT_SUCCESS; + } + } + + if (!*fin) + { + fprintf(stderr, "Incorrect usage, aborting\n"); + usage(); + return 77; + } + return 0; +} + +static void handle_data_copy(void) +{ + unsigned src = ev.param[0]; + unsigned dst = ev.param[1]; + unsigned size = ev.param[2]; + + transfers[src][dst] += size; + +// printf("transfer %d -> %d : %d \n", src, dst, size); +} + +/* + * This program should be used to parse the log generated by FxT + */ +int main(int argc, char **argv) +{ + char *fin, *fout; + int ret; + int fd_in; + FILE *fd_out; + + ret = parse_args(argc, argv, &fin, &fout); + if (ret) return ret; + + fd_in = open(fin, O_RDONLY); + if (fd_in < 0) + { + perror("open failed :"); + exit(-1); + } + + fut = fxt_fdopen(fd_in); + if (!fut) + { + perror("fxt_fdopen :"); + exit(-1); + } + + if (!fout) + { + fd_out = stdout; + } + else + { + fd_out = fopen(fout, "w"); + if (fd_out == NULL) + { + perror("open failed :"); + exit(-1); + } + } + + fxt_blockev_t block; + block = fxt_blockev_enter(fut); + + unsigned njob = 0; + unsigned nws = 0; + + double start_time = 10e30; + double end_time = -10e30; + + while(1) + { + ret = fxt_next_ev(block, FXT_EV_TYPE_64, (struct fxt_ev *)&ev); + if (ret != FXT_EV_OK) + { + fprintf(stderr, "no more block ...\n"); + break; + } + + end_time = STARPU_MAX(end_time, ev.time); + start_time = STARPU_MIN(start_time, ev.time); + + STARPU_ATTRIBUTE_UNUSED int nbparam = ev.nb_params; + + switch (ev.code) + { + case _STARPU_FUT_DATA_COPY: + handle_data_copy(); + break; + case _STARPU_FUT_JOB_POP: + njob++; + break; + case _STARPU_FUT_WORK_STEALING: + nws++; + break; + default: + break; + } + } + +#ifdef HAVE_FXT_BLOCKEV_LEAVE + fxt_blockev_leave(block); +#endif + +#ifdef HAVE_FXT_CLOSE + fxt_close(fut); +#else + if (close(fd_in)) + { + perror("close failed :"); + exit(-1); + } +#endif + + fprintf(fd_out, "Start : start time %e end time %e length %e\n", start_time, end_time, end_time - start_time); + + unsigned src, dst; + for (src = 0; src < 16; src++) + { + for (dst = 0; dst < 16; dst++) + { + if (transfers[src][dst] != 0) + { + fprintf(fd_out, "%u -> %u \t %lu MB\n", src, dst, (unsigned long)(transfers[src][dst]/(1024*1024))); + } + } + } + + fprintf(fd_out, "There was %u tasks and %u work stealing\n", njob, nws); + if (fd_out != stdout) + fclose(fd_out); + + return 0; +} diff --git a/tools/starpu_fxt_tool.1 b/tools/starpu_fxt_tool.1 new file mode 100644 index 0000000..4f5d359 --- /dev/null +++ b/tools/starpu_fxt_tool.1 @@ -0,0 +1,65 @@ +.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.3. +.TH STARPU_FXT_TOOL "1" "December 2025" "StarPU 1.4.10" "User Commands" +.SH NAME +starpu_fxt_tool \- Convert raw StarPU FxT trace to various traces +.SH SYNOPSIS +.B starpu_fxt_tool +[ \fI\,options \/\fR] +.SH DESCRIPTION +Generate a trace in the Paje format +.SH OPTIONS +.TP +\fB\-i\fR +specify the input file[s]. Several files can be provided, +or the option specified several times for MPI execution +case +.TP +\fB\-o\fR +specify the paje output filename +.TP +\fB\-d\fR +specify the directory in which to save files +.TP +\fB\-c\fR +use a different colour for every type of task +.TP +\fB\-no\-events\fR +do not show events +.TP +\fB\-no\-counter\fR +do not show scheduler counters +.TP +\fB\-no\-bus\fR +do not show PCI bus transfers +.TP +\fB\-no\-flops\fR +do not show flops +.TP +\fB\-no\-smooth\fR +avoid smoothing values for gflops etc. +.TP +\fB\-no\-acquire\fR +do not show application data acquisitions tasks in DAG +.TP +\fB\-label\-deps\fR +add label on dependencies. +.TP +\fB\-memory\-states\fR +show detailed memory states of handles +.TP +\fB\-internal\fR +show StarPU\-internal tasks in DAG +.TP +\fB\-number\-events\fR +generate a file counting FxT events by type +.TP +\fB\-use\-task\-color\fR +propagate the specified task color to the contexts +.TP +\fB\-h\fR, \fB\-\-help\fR +display this help and exit +.TP +\fB\-v\fR, \fB\-\-version\fR +output version information and exit +.SH "REPORTING BUGS" +Report bugs to . diff --git a/tools/starpu_fxt_tool.c b/tools/starpu_fxt_tool.c new file mode 100644 index 0000000..114c413 --- /dev/null +++ b/tools/starpu_fxt_tool.c @@ -0,0 +1,145 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2020,2021 Federal University of Rio Grande do Sul (UFRGS) + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * This program should be used to parse the log generated by FxT + */ + +#include +#include +#include + +#define PROGNAME "starpu_fxt_tool" + +static void usage() +{ + fprintf(stderr, "Generate a trace in the Paje format\n\n"); + fprintf(stderr, "Usage: %s [ options ]\n", PROGNAME); + fprintf(stderr, "\n"); + fprintf(stderr, "Options:\n"); + fprintf(stderr, " -i specify the input file[s]. Several files can be provided,\n"); + fprintf(stderr, " or the option specified several times for MPI execution\n"); + fprintf(stderr, " case\n"); + fprintf(stderr, " -o specify the paje output filename\n"); + fprintf(stderr, " -d specify the directory in which to save files\n"); + fprintf(stderr, " -c use a different colour for every type of task\n"); + fprintf(stderr, " -no-events do not show events\n"); + fprintf(stderr, " -no-counter do not show scheduler counters\n"); + fprintf(stderr, " -no-bus do not show PCI bus transfers\n"); + fprintf(stderr, " -no-flops do not show flops\n"); + fprintf(stderr, " -no-smooth avoid smoothing values for gflops etc.\n"); + fprintf(stderr, " -no-acquire do not show application data acquisitions tasks in DAG\n"); + fprintf(stderr, " -label-deps add label on dependencies.\n"); + fprintf(stderr, " -memory-states show detailed memory states of handles\n"); + fprintf(stderr, " -internal show StarPU-internal tasks in DAG\n"); + fprintf(stderr, " -number-events generate a file counting FxT events by type\n"); + fprintf(stderr, " -use-task-color propagate the specified task color to the contexts\n"); + fprintf(stderr, " -h, --help display this help and exit\n"); + fprintf(stderr, " -v, --version output version information and exit\n\n"); + fprintf(stderr, "Report bugs to <%s>.", PACKAGE_BUGREPORT); + fprintf(stderr, "\n"); +} + +static struct starpu_fxt_options options; + +static int parse_args(int argc, char **argv) +{ + /* Default options */ + starpu_fxt_options_init(&options); + + /* We want to support arguments such as "fxt_tool -i trace_*" */ + unsigned reading_input_filenames = 0; + + int i; + for (i = 1; i < argc; i++) + { + int ret = _starpu_generate_paje_trace_read_option(argv[i], &options); + if (ret == 0) + { + reading_input_filenames = 0; + } + else if (strcmp(argv[i], "-o") == 0) + { + free(options.out_paje_path); + options.out_paje_path = strdup(argv[++i]); + reading_input_filenames = 0; + } + else if (strcmp(argv[i], "-d") == 0) + { + options.dir = argv[++i]; + reading_input_filenames = 0; + } + else if (strcmp(argv[i], "-i") == 0) + { + if (options.ninputfiles >= STARPU_FXT_MAX_FILES) + { + fprintf(stderr, "Error: The number of trace files is superior to STARPU_FXT_MAX_FILES (%d)\nPlease recompile StarPU with a bigger --enable-fxt-max-files\n", STARPU_FXT_MAX_FILES); + return 7; + } + options.filenames[options.ninputfiles++] = argv[++i]; + reading_input_filenames = 1; + } + else if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i], "--help") == 0) + { + usage(); + return 77; + } + else if (strcmp(argv[i], "-v") == 0 || strcmp(argv[i], "--version") == 0) + { + fputs(PROGNAME " (" PACKAGE_NAME ") " PACKAGE_VERSION "\n", stderr); + return 77; + } + + /* That's pretty dirty: if the reading_input_filenames flag is + * set, and that the argument does not match an option, we + * assume this may be another filename */ + else if (reading_input_filenames) + { + if (options.ninputfiles >= STARPU_FXT_MAX_FILES) + { + fprintf(stderr, "Error: The number of trace files is superior to STARPU_FXT_MAX_FILES (%d)\nPlease recompile StarPU with a bigger --enable-fxt-max-files\n", STARPU_FXT_MAX_FILES); + return 7; + } + options.filenames[options.ninputfiles++] = argv[i]; + } + } + + if (!options.ninputfiles) + { + fprintf(stderr, "Incorrect usage, aborting\n"); + usage(); + return 77; + } + + return 0; +} + +int main(int argc, char **argv) +{ + int ret = parse_args(argc, argv); + if (ret) + { + starpu_fxt_options_shutdown(&options); + return ret; + } + + starpu_fxt_generate_trace(&options); + + starpu_fxt_options_shutdown(&options); + + return 0; +} diff --git a/tools/starpu_lp2paje.1 b/tools/starpu_lp2paje.1 new file mode 100644 index 0000000..c8c889e --- /dev/null +++ b/tools/starpu_lp2paje.1 @@ -0,0 +1,11 @@ +.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.3. +.TH STARPU_LP2PAJE "1" "December 2025" "StarPU 1.4.10" "User Commands" +.SH NAME +starpu_lp2paje \- Convert lp StarPU schedule into Paje format +.SH SYNOPSIS +.B lp_solve +\fI\,file.lp | starpu_lp2paje > paje.trace\/\fR +.SH DESCRIPTION +Convert schedule optimized by lp into the Paje format +.SH "REPORTING BUGS" +Report bugs to . diff --git a/tools/starpu_lp2paje.c b/tools/starpu_lp2paje.c new file mode 100644 index 0000000..9bdc506 --- /dev/null +++ b/tools/starpu_lp2paje.c @@ -0,0 +1,162 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include + +#define PROGNAME "starpu_lp2paje" + +struct task +{ + double start; + double stop; + int num; + int worker; +}; + +int main(int argc, char *argv[]) +{ + int nw, nt; + double tmax; + int i, w, ww, t, tt; + int foo; + double bar; + + if (argc != 1) + { + if (strcmp(argv[1], "-v") == 0 || strcmp(argv[1], "--version") == 0) + { + fprintf(stderr, "%s (%s) %s\n", PROGNAME, PACKAGE_NAME, PACKAGE_VERSION); + exit(EXIT_SUCCESS); + } + fprintf(stderr, "Convert schedule optimized by lp into the Paje format\n\n"); + fprintf(stderr, "Usage: lp_solve file.lp | %s > paje.trace\n", PROGNAME); + fprintf(stderr, "Report bugs to <%s>.", PACKAGE_BUGREPORT); + fprintf(stderr, "\n"); + exit(EXIT_SUCCESS); + } + assert(scanf("Suboptimal solution\n") == 0); + assert(scanf("\nValue of objective function: %lf\n", &tmax) == 1); + + assert(scanf("Actual values of the variables:\n") == 0); + assert(scanf("tmax %lf\n", &tmax) == 1); + assert(scanf("nt %d\n", &nt) == 1); + assert(nt >= 0); + assert(scanf("nw %d\n", &nw) == 1); + assert(nw >= 0); + printf( +"%%EventDef PajeDefineContainerType 1\n" +"%% Alias string\n" +"%% ContainerType string\n" +"%% Name string\n" +"%%EndEventDef\n" +"%%EventDef PajeCreateContainer 2\n" +"%% Time date\n" +"%% Alias string\n" +"%% Type string\n" +"%% Container string\n" +"%% Name string\n" +"%%EndEventDef\n" +"%%EventDef PajeDefineStateType 3\n" +"%% Alias string\n" +"%% ContainerType string\n" +"%% Name string\n" +"%%EndEventDef\n" +"%%EventDef PajeDestroyContainer 4\n" +"%% Time date\n" +"%% Name string\n" +"%% Type string\n" +"%%EndEventDef\n" +"%%EventDef PajeDefineEntityValue 5\n" +"%% Alias string\n" +"%% EntityType string\n" +"%% Name string\n" +"%% Color color\n" +"%%EndEventDef\n" +"%%EventDef PajeSetState 6\n" +"%% Time date\n" +"%% Type string\n" +"%% Container string\n" +"%% Value string\n" +"%%EndEventDef\n" +"1 W 0 Worker\n" +); + printf("3 S W \"Worker State\"\n"); + for (t = 0; t < nt; t++) + printf("5 R%d S Running_%d \"0.0 1.0 0.0\"\n", t, t); + printf("5 F S Idle \"1.0 0.0 0.0\"\n"); + for (i = 0; i < nw; i++) + printf("2 0 W%d W 0 \"%d\"\n", i, i); + + for (w = 0; w < nw; w++) + printf("4 %f W%d W\n", tmax, w); + + fprintf(stderr,"%d workers, %d tasks\n", nw, nt); + { + struct task task[nt]; + memset(&task, 0, sizeof(task)); + for (t = nt-1; t >= 0; t--) + { + assert(scanf("c%d %lf\n", &foo, &task[t].stop) == 2); + } + + for (t = nt-1; t >= 0; t--) + for (w = 0; w < nw; w++) + { + assert(scanf("t%dw%d %lf\n", &tt, &ww, &bar) == 3); + assert(ww == w); + + if (bar > 0.5) + { + task[t].num = tt; + task[t].worker = w; + } + } + for (t = nt-1; t >= 0; t--) + { + assert(scanf("s%d %lf\n", &tt, &task[t].start) == 2); + fprintf(stderr,"%d: task %d on %d: %f - %f\n", nt-1-t, tt, task[t].worker, task[t].start, task[t].stop); + assert(tt == task[t].num); + } + + for (t = 0; t < nt; t++) + { + printf("6 %f S W%d R%d\n", task[t].start, task[t].worker, t); + printf("6 %f S W%d F\n", task[t].stop, task[t].worker); + } + + for (t = 0; t < nt; t++) + { + int t2; + for (t2 = 0; t2 < nt; t2++) + { + if (t != t2 && task[t].worker == task[t2].worker) + { + if (!(task[t].start >= task[t2].stop + || task[t2].start >= task[t].stop)) + { + fprintf(stderr,"oops, %d and %d sharing worker %d !!\n", task[t].num, task[t2].num, task[t].worker); + } + } + } + } + } + + return 0; +} diff --git a/tools/starpu_machine_display.1 b/tools/starpu_machine_display.1 new file mode 100644 index 0000000..539c90b --- /dev/null +++ b/tools/starpu_machine_display.1 @@ -0,0 +1,33 @@ +.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.3. +.TH STARPU_MACHINE_DISPLAY "1" "December 2025" "StarPU 1.4.10" "User Commands" +.SH NAME +starpu_machine_display \- Display machine StarPU information +.SH SYNOPSIS +.B starpu_machine_display +[\fI\,OPTION\/\fR] +.SH DESCRIPTION +Show the processing units that StarPU can use, +and the bandwidth and affinity measured between the memory nodes. +.SH OPTIONS +.TP +\fB\-h\fR, \fB\-\-help\fR +display this help and exit +.TP +\fB\-v\fR, \fB\-\-version\fR +output version information and exit +.TP +\fB\-i\fR, \fB\-\-info\fR +display the name of the files containing the information +.TP +\fB\-f\fR, \fB\-\-force\fR +force bus sampling and show measures +.HP +\fB\-w\fR, \fB\-\-worker\fR only show workers of the given type +.TP +\fB\-c\fR, \fB\-\-count\fR +only display the number of workers +.TP +\fB\-n\fR, \fB\-\-notopology\fR +do not display the bandwidth and affinity +.SH "REPORTING BUGS" +Report bugs to . diff --git a/tools/starpu_machine_display.c b/tools/starpu_machine_display.c new file mode 100644 index 0000000..70410df --- /dev/null +++ b/tools/starpu_machine_display.c @@ -0,0 +1,259 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include + +#define PROGNAME "starpu_machine_display" + +static void usage() +{ + fprintf(stderr, "Show the processing units that StarPU can use,\n"); + fprintf(stderr, "and the bandwidth and affinity measured between the memory nodes.\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "Usage: %s [OPTION]\n", PROGNAME); + fprintf(stderr, "\n"); + fprintf(stderr, "Options:\n"); + fprintf(stderr, "\t-h, --help display this help and exit\n"); + fprintf(stderr, "\t-v, --version output version information and exit\n"); + fprintf(stderr, "\t-i, --info display the name of the files containing the information\n"); + fprintf(stderr, "\t-f, --force force bus sampling and show measures \n"); + fprintf(stderr, "\t-w, --worker only show workers of the given type\n"); + fprintf(stderr, "\t-c, --count only display the number of workers\n"); + fprintf(stderr, "\t-n, --notopology do not display the bandwidth and affinity\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "Report bugs to <%s>.\n", PACKAGE_BUGREPORT); +} + +static void display_combined_worker(unsigned workerid) +{ + int worker_size; + int *combined_workerid; + starpu_combined_worker_get_description(workerid, &worker_size, &combined_workerid); + + fprintf(stdout, "\t\t"); + + int i; + for (i = 0; i < worker_size; i++) + { + char name[256]; + + starpu_worker_get_name(combined_workerid[i], name, 256); + + fprintf(stdout, "%s\t", name); + } + + fprintf(stdout, "\n"); +} + +static void display_all_combined_workers(void) +{ + unsigned ncombined_workers = starpu_combined_worker_get_count(); + + if (ncombined_workers == 0) + return; + + unsigned nworkers = starpu_worker_get_count(); + + fprintf(stdout, "\t%u Combined workers\n", ncombined_workers); + + unsigned i; + for (i = 0; i < ncombined_workers; i++) + display_combined_worker(nworkers + i); +} + +static void parse_args(int argc, char **argv, int *force, int *info, int *count, int *topology, char **worker_type) +{ + int i; + + if (argc == 1) + return; + + for (i = 1; i < argc; i++) + { + if (strncmp(argv[i], "--force", 7) == 0 || strncmp(argv[i], "-f", 2) == 0) + { + *force = 1; + } + else if (strncmp(argv[i], "--info", 6) == 0 || strncmp(argv[i], "-i", 2) == 0) + { + *info = 1; + } + else if (strncmp(argv[i], "--help", 6) == 0 || strncmp(argv[i], "-h", 2) == 0) + { + usage(); + exit(EXIT_FAILURE); + } + else if (strncmp(argv[i], "--version", 9) == 0 || strncmp(argv[i], "-v", 2) == 0) + { + fputs(PROGNAME " (" PACKAGE_NAME ") " PACKAGE_VERSION "\n", stderr); + exit(EXIT_FAILURE); + } + else if (strncmp(argv[i], "--count", 7) == 0 || strncmp(argv[i], "-c", 2) == 0) + { + *count = 1; + } + else if (strncmp(argv[i], "--worker", 8) == 0 || strncmp(argv[i], "-w", 2) == 0) + { + *worker_type = strdup(argv[++i]); + } + else if (strncmp(argv[i], "--notopology", 12) == 0 || strncmp(argv[i], "-n", 2) == 0) + { + *topology = 0; + } + else + { + fprintf(stderr, "Unknown arg %s\n", argv[1]); + usage(); + exit(EXIT_FAILURE); + } + } +} + +int main(int argc, char **argv) +{ + int ret; + int force = 0; + int info = 0; + int count = 0; + int topology = 1; + char *worker_type = NULL; + struct starpu_conf conf; + + parse_args(argc, argv, &force, &info, &count, &topology, &worker_type); + + starpu_conf_init(&conf); + if (force) + conf.bus_calibrate = 1; + + /* Even if starpu_init returns -ENODEV, we should go on : we will just + * print that we found no device. */ + ret = starpu_init(&conf); + if (ret != 0 && ret != -ENODEV) + { + return ret; + } + starpu_worker_wait_for_initialisation(); + + if (info) + { + starpu_bus_print_filenames(stdout); + starpu_shutdown(); + return 0; + } + + char real_hostname[128]; + char starpu_hostname[128]; + gethostname(real_hostname, sizeof(real_hostname)); + _starpu_gethostname(starpu_hostname, sizeof(starpu_hostname)); + fprintf(stdout, "Real hostname: %s (StarPU hostname: %s)\n", real_hostname, starpu_hostname); + + const char *env[] = + { + "STARPU_NCPU", + "STARPU_NCPUS", + "STARPU_NCUDA", + "STARPU_NHIP", + "STARPU_NOPENCL", + "STARPU_NMAX_FPGA", + "STARPU_NMPI_MS", + "STARPU_NTCPIP_MS", + + "STARPU_WORKERS_CPUID", + "STARPU_WORKERS_COREID", + "STARPU_NTHREADS_PER_CORE", + "STARPU_RESERVE_NCPU", + "STARPU_MAIN_THREAD_BIND", + "STARPU_MAIN_THREAD_CPUID", + "STARPU_MAIN_THREAD_COREID", + + "STARPU_WORKERS_CUDAID", + "STARPU_CUDA_THREAD_PER_WORKER", + "STARPU_CUDA_THREAD_PER_DEV", + + "STARPU_WORKERS_OPENCLID", + "STARPU_WORKERS_MAX_FPGAID", + + "STARPU_MPI_MS_MULTIPLE_THREAD", + "STARPU_NMPIMSTHREADS", + "STARPU_TCPIP_MS_MULTIPLE_THREAD", + "STARPU_NTCPIPMSTHREADS", + + "STARPU_MPI_HOSTNAMES", + "STARPU_HOSTNAME", + NULL + }; + + int i; + static int message=0; + for (i = 0; env[i]; i++) + { + const char *e = getenv(env[i]); + if (e) + { + if (!message) + { + fprintf(stdout, "Environment variables\n"); + message=1; + } + fprintf(stdout, "\t%s=%s\n", env[i], e); + } + } + if (message) + fprintf(stdout,"\n"); + + void (*func)(FILE *output, enum starpu_worker_archtype type) = &starpu_worker_display_names; + if (count == 1) + func = &starpu_worker_display_count; + + enum starpu_worker_archtype type; + + if (worker_type) + { + type = starpu_worker_get_type_from_string(worker_type); + if (type == STARPU_UNKNOWN_WORKER) + fprintf(stderr, "Unknown worker type '%s'\n", worker_type); + else + func(stdout, type); + } + else + { + fprintf(stdout, "StarPU has found :\n"); + + for (type = 0; type < STARPU_NARCH; type++) + func(stdout, type); + + display_all_combined_workers(); + } + + if (ret != -ENODEV) + { + if (topology == 1) + { + fprintf(stdout, "\ntopology ... (hwloc logical indexes)\n"); + starpu_topology_print(stdout); + + fprintf(stdout, "\nbandwidth (MB/s) and latency (us)...\n"); + starpu_bus_print_bandwidth(stdout); + } + starpu_shutdown(); + } + + return 0; +} diff --git a/tools/starpu_mlr_analysis b/tools/starpu_mlr_analysis new file mode 100755 index 0000000..4b9521c --- /dev/null +++ b/tools/starpu_mlr_analysis @@ -0,0 +1,87 @@ +#!/bin/bash +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2014-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +# Script for giving statistical analysis of the paje trace + +set -e # fail fast + +# File names +SOURCE_DIR=$(dirname $0) + +outputfile="mlr_analysis.html" +analysis_script="$SOURCE_DIR/starpu_mlr_analysis.Rmd" + +# Command line arguments +inputfile="" + +help_script() +{ +cat << EOF +Give an example of the trace analysis for computing multiple linear regression model + +Options: + -h Show this message + +Examples: +$0 .starpu/sampling/codelets/tmp/test_mlr.out + +Report bugs to +EOF +} + +if [ "$1" = "--version" ] ; then + echo "$PROGNAME (StarPU) 1.4.10" + exit 0 +fi + +if [ "$1" = "-h" ] || [ "$1" = "--help" ] ; then + help_script + exit 0 +fi + +while getopts "h" opt; do + case $opt in + \?) + echo "Invalid option: -$OPTARG" + help_script + exit 3 + ;; + esac +done + +# Reading files that need to be analyzed +shift $((OPTIND - 1)) +inputfile=$1 +# Error if there is more than one input file +if [[ $# < 1 || $# > 1 ]]; then + echo "Error!" + help_script + exit 2 +fi + +if [ ! -s $inputfile ] + then + echo "Error: file $inputfile does not exist!" + exit 5 +fi + +##################################### +# Running analysis file to get actual results +in="$(cd "$(dirname "$inputfile")"; pwd)/$(basename "$inputfile")" + +Rscript -e "library(knitr); input_trace = '$in' ; outputhtml='$outputfile';\ + outputRmd = gsub('.html\$','.Rmd',outputhtml);\ + knit('$analysis_script',output=outputRmd); knitr::knit2html(outputRmd)" diff --git a/tools/starpu_mlr_analysis.Rmd b/tools/starpu_mlr_analysis.Rmd new file mode 100644 index 0000000..b27c18c --- /dev/null +++ b/tools/starpu_mlr_analysis.Rmd @@ -0,0 +1,256 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +```{r Setup, echo=FALSE} +opts_chunk$set(echo=FALSE) +``` + +```{r Load_R_files_and_functions} +print_codelet <- function(reg,codelet){ + cat(paste("/* ############################################ */", "\n")) + cat(paste("/*\t Automatically generated code */", "\n")) + cat(paste("\t Check for potential errors and be sure parameter value are written in good order (alphabetical one by default)", "\n")) + cat(paste("\t Adjusted R-squared: ", summary(reg)$adj.r.squared, "*/\n\n")) + + ncomb <- reg$rank - 1 + cat(paste("\t ", codelet, ".model->ncombinations = ", ncomb, ";\n", sep="")) + + cat(paste("\t ", codelet, ".model->combinations = (unsigned **) malloc(", codelet, ".model->ncombinations*sizeof(unsigned *))", ";\n\n", sep="")) + + cat(paste("\t if (", codelet, ".model->combinations)", "\n", "\t {\n", sep="")) + cat(paste("\t for (unsigned i = 0; i < ", codelet, ".model->ncombinations; i++)", "\n", "\t {\n", sep="")) + cat(paste("\t ", codelet, ".model->combinations[i] = (unsigned *) malloc(", codelet, ".model->nparameters*sizeof(unsigned))", ";\n", "\t }\n", "\t }\n\n", sep="")) + + # Computing combinations + df <- data.frame(attr(reg$terms, "factors")) + df <- df/2 + df$Params <- row.names(df) + df <-df[c(2:nrow(df)),] + + i=1 + options(warn=-1) + for(i in (1:nrow(df))) + { + name <- df[i,]$Params + if (grepl("I\\(*", name)) + { + exp <- as.numeric(gsub("(.*?)\\^(.*?)\\)", "\\2", name)) + df[i,] <- as.numeric(df[i,]) * exp + df[i,]$Params <- as.character(gsub("I\\((.*?)\\^(.*?)\\)", "\\1", name)) + } + } + df <- aggregate(. ~ Params, transform(df, Params), sum) + options(warn=0) + + i=1 + j=1 + for(j in (2:length(df))) + { + for(i in (1:nrow(df))) + { + cat(paste("\t ", codelet, ".model->combinations[", j-2, "][", i-1, "] = ", as.numeric(df[i,j]), ";\n", sep="")) + } + } + + cat(paste("/* ############################################ */", "\n")) +} + +df<-read.csv(input_trace, header=TRUE) + +opts_chunk$set(echo=TRUE) +``` + +# Multiple Linear Regression Model Example + +## Introduction + +This document demonstrates the type of the analysis needed to compute +the multiple linear regression model of the task. It relies on the +input data benchmarked by the StarPU (or any other tool, but following +the same format). The input data used in this example is generated by +the task "mlr_init", from the "examples/mlr/mlr.c". + +This document can be used as an template for the analysis of any other +task. + +### How to compile + + ./starpu_mlr_analysis .starpu/sampling/codelets/tmp/mlr_init.out + +### Software dependencies + +In order to run the analysis you need to have R installed: + + sudo apt-get install r-base + +In order to compile this document, you need *knitr* (although you can +perfectly only use the R code from this document without knitr). If +you decided that you want to generate this document, then start R +(e.g., from terminal) and install knitr package: + + R> install.packages("knitr") + +No additional R packages are needed. + +## First glimpse at the data + +First, we show the relations between all parameters in a single plot. + +```{r InitPlot} +plot(df) +``` + +For this example, all three parameters M, N, K have some influence, +but their relation is not easy to understand. + +In general, this type of plots can typically show if there are +outliers. It can also show if there is a group of parameters which are +mutually perfectly correlated, in which case only a one parameter from +the group should be kept for the further analysis. Additionally, plot +can show the parameters that have a constant value, and since these +cannot have an influence on the model, they should also be ignored. + +However, making conclusions based solely on the visual analysis can be +treacherous and it is better to rely on the statistical tools. The +multiple linear regression methods used in the following sections will +also be able to detect and ignore these irrelevant +parameters. Therefore, this initial visual look should only be used to +get a basic idea about the model, but all the parameters should be +kept for now. + +## Initial model + +At this point, an initial model is computed, using all the parameters, +but not taking into account their exponents or the relations between +them. + +```{r Model1} +model1 <- lm(data=df, Duration ~ M+N+K) +summary(model1) +``` + +For each parameter and the constant in the first column, an estimation +of the corresponding coefficient is provided along with the 95% +confidence interval. If there are any parameters with NA value, which +suggests that the parameters are correlated to another parameter or +that their value is constant, these parameters should not be used in +the following model computations. The stars in the last column +indicate the significance of each parameter. However, having maximum +three stars for each parameter does not necessarily mean that the +model is perfect and we should always inspect the adjusted R^2 value +(the closer it is to 1, the better the model is). To the users that +are not common to the multiple linear regression analysis and R tools, +we suggest to the R documentation. Some explanations are also provided +in the following article https://hal.inria.fr/hal-01180272. + +In this example, all parameters M, N, K are very important. However, +it is not clear if there are some relations between them or if some of +these parameters should be used with an exponent. Moreover, adjusted +R^2 value is not extremely high and we hope we can get a better +one. Thus, we proceed to the more advanced analysis. + +## Refining the model + +Now, we can seek for the relations between the parameters. Note that +trying all the possible combinations for the cases with a huge number +of parameters can be prohibitively long. Thus, it may be better to first +get rid of the parameters which seem to have very small influence +(typically the ones with no stars from the table in the previous +section). + +```{r Model2} +model2 <- lm(data=df, Duration ~ M*N*K) +summary(model2) +``` + +This model is more accurate, as the R^2 value increased. We can also +try some of these parameters with the exponents. + +```{r Model3} +model3 <- lm(data=df, Duration ~ I(M^2)+I(M^3)+I(N^2)+I(N^3)+I(K^2)+I(K^3)) +summary(model3) +``` + +It seems like some parameters are important. Now we combine these and +try to find the optimal combination (here we go directly to the final +solution, although this process typically takes several iterations of +trying different combinations). + +```{r Model4} +model4 <- lm(data=df, Duration ~ I(M^2):N+I(N^3):K) +summary(model4) +``` + +This seems to be the most accurate model, with a high R^2 value. We +can proceed to its validation. + +## Validation + +Once the model has been computed, we should validate it. Apart from +the low adjusted R^2 value, the model weakness can also be observed +even better when inspecting the residuals. The results on two +following plots (and thus the accuracy of the model) will greatly +depend on the measurements variability and the design of experiments. + +```{r Validation} +par(mfrow=c(1,2)) +plot(model4, which=c(1:2)) +``` + +Generally speaking, if there are some structures on the left plot, +this can indicate that there are certain phenomena not explained by +the model. Many points on the same horizontal line represent +repetitive occurrences of the task with the same parameter values, +which is typical for a single experiment run with a homogeneous +data. The fact that there is some variability is common, as executing +exactly the same code on a real machine will always have slightly +different duration. However, having a huge variability means that the +benchmarks were very noisy, thus deriving an accurate models from them +will be hard. + +Plot on the right may show that the residuals do not follow the normal +distribution. Therefore, such model in overall would have a limited +predictive power. + +If we are not satisfied with the accuracy of the observed models, we +should go back to the previous section and try to find a better +one. In some cases, the benchmarked data is just be too noisy or the +choice of the parameters is not appropriate, and thus the experiments +should be redesigned and rerun. + +When we are finally satisfied with the model accuracy, we should +modify our task code, so that StarPU knows which parameters +combinations are used in the model. + +## Generating C code + +Depending on the way the task codelet is programmed, this section may +be somehow useful. This is a simple helper to generate C code for the +parameters combinations and it should be copied to the task +description in the application. The function generating the code is +not so robust, so make sure that the generated code correctly +corresponds to computed model (e.g., parameters are considered in the +alphabetical order). + +```{r Code} +print_codelet(model4, "mlr_cl") +``` + +## Conclusion + +We have computed the model for our benchmarked data using multiple +linear regression. After encoding this model into the task code, +StarPU will be able to automatically compute the coefficients and use +the model to predict task duration. diff --git a/tools/starpu_mlr_analysis.in b/tools/starpu_mlr_analysis.in new file mode 100644 index 0000000..f060e8f --- /dev/null +++ b/tools/starpu_mlr_analysis.in @@ -0,0 +1,87 @@ +#!/bin/bash +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2014-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +# Script for giving statistical analysis of the paje trace + +set -e # fail fast + +# File names +SOURCE_DIR=$(dirname $0) + +outputfile="mlr_analysis.html" +analysis_script="$SOURCE_DIR/starpu_mlr_analysis.Rmd" + +# Command line arguments +inputfile="" + +help_script() +{ +cat << EOF +Give an example of the trace analysis for computing multiple linear regression model + +Options: + -h Show this message + +Examples: +$0 .starpu/sampling/codelets/tmp/test_mlr.out + +Report bugs to <@PACKAGE_BUGREPORT@> +EOF +} + +if [ "$1" = "--version" ] ; then + echo "$PROGNAME (@PACKAGE_NAME@) @PACKAGE_VERSION@" + exit 0 +fi + +if [ "$1" = "-h" ] || [ "$1" = "--help" ] ; then + help_script + exit 0 +fi + +while getopts "h" opt; do + case $opt in + \?) + echo "Invalid option: -$OPTARG" + help_script + exit 3 + ;; + esac +done + +# Reading files that need to be analyzed +shift $((OPTIND - 1)) +inputfile=$1 +# Error if there is more than one input file +if [[ $# < 1 || $# > 1 ]]; then + echo "Error!" + help_script + exit 2 +fi + +if [ ! -s $inputfile ] + then + echo "Error: file $inputfile does not exist!" + exit 5 +fi + +##################################### +# Running analysis file to get actual results +in="$(cd "$(dirname "$inputfile")"; pwd)/$(basename "$inputfile")" + +Rscript -e "library(knitr); input_trace = '$in' ; outputhtml='$outputfile';\ + outputRmd = gsub('.html\$','.Rmd',outputhtml);\ + knit('$analysis_script',output=outputRmd); knitr::knit2html(outputRmd)" diff --git a/tools/starpu_mpi_comm_matrix.1 b/tools/starpu_mpi_comm_matrix.1 new file mode 100644 index 0000000..422581a --- /dev/null +++ b/tools/starpu_mpi_comm_matrix.1 @@ -0,0 +1,21 @@ +.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.3. +.TH STARPU_MPI_COMM_MATRIX.PY "1" "December 2025" "StarPU 1.4.10" "User Commands" +.SH NAME +starpu_mpi_comm_matrix.py \- Draw StarPU MPI communications matrix +.SH SYNOPSIS +.B starpu_mpi_comm_matrix.py +\fI\,\/\fR +.SH DESCRIPTION +Offline tool to draw a communication matrix +.SH OPTIONS +.TP +\fB\-h\fR, \fB\-\-help\fR +display this help and exit +.TP +\fB\-v\fR, \fB\-\-version\fR +output version information and exit +.TP +\fB\-png\fR +produce plots in png format (default is pdf) +.SH "REPORTING BUGS" +Report bugs to diff --git a/tools/starpu_mpi_comm_matrix.py b/tools/starpu_mpi_comm_matrix.py new file mode 100755 index 0000000..5c59378 --- /dev/null +++ b/tools/starpu_mpi_comm_matrix.py @@ -0,0 +1,118 @@ +#!/usr/bin/env python3 +# coding=utf-8 +# +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +""" +Plot statistics produced when running an application with STARPU_MPI_STATS=1 +""" + +import sys +import re +import os + +PROGNAME=sys.argv[0] + +def usage(): + print("Offline tool to draw a communication matrix") + print("") + print("Usage: %s " % PROGNAME) + print("") + print("Options:") + print(" -h, --help display this help and exit") + print(" -v, --version output version information and exit") + print(" -png produce plots in png format (default is pdf)") + print("") + print("Report bugs to ") + sys.exit(1) + +if len(sys.argv) >= 2: + if sys.argv[1] == '-v' or sys.argv[1] == '--version': + print("%s (StarPU) 1.4.10" % PROGNAME) + sys.exit(0) + if sys.argv[1] == '-h' or sys.argv[1] == '--help': + usage() +if len(sys.argv) == 1: + usage() + +if len(sys.argv) >= 2 and sys.argv[1] == '-png': + outputformat='png' + outputext='png' + outputfile=sys.argv[2] +else: + outputformat='pdf color' + outputext='pdf' + outputfile=sys.argv[1] + +# find the number of nodes +nodes=0 +file = open(outputfile, "r") +for line in file.readlines(): + if re.search('TOTAL', line): + (node,stuff)=line.split(sep="[")[2].split("]") + if int(node) > nodes: + nodes=int(node) +file.close() +nodes=nodes+1 + +# extract volume of comm and bandwidth between all pair of nodes +volumes = [[0 for _ in range(nodes)] for _ in range(nodes)] +bandwidth = [[0 for _ in range(nodes)] for _ in range(nodes)] +file = open(outputfile, "r") +for line in file.readlines(): + if re.search(r'\[starpu_comm_stats]', line) and not re.search('TOTAL', line) and re.search('MB/s', line): + (head,volB,B,volMB,MB,bwB,B,bwMB,MB) = line.split() + (src,dst)=head.split(sep="[")[2].split(sep="]")[0].split(sep=":") + volumes[int(src)][int(dst)] = float(volB) + bandwidth[int(src)][int(dst)] = float(bwB) +file.close() + +def write_data(filename, nodes, data): + ofile=open(filename, "w") + for dst in range(nodes): + for src in range(nodes): + ofile.write("%f "% data[src][dst]) + ofile.write("\n") + ofile.close() + +def generate_gnuplot_script(filename, datafilename, outputfile, nodes): + ofile=open(filename, "w") + srctics="" + dsttics="" + for node in range(nodes-1): + srctics += "\"src%d\" %d, " % (node, node) + dsttics += "\"dst%d\" %d, " % (node, node) + ofile.write("set term %s\n" % outputformat) + ofile.write("set output \"%s.%s\"\n" % (outputfile, outputext)) + ofile.write("set view map scale 1\nset style data lines\n") + ofile.write("set palette gray\n") + ofile.write("set xtics (%s\"src%d\" %d)\n" % (srctics, nodes-1, nodes-1)) + ofile.write("set ytics (%s\"dst%d\" %d)\n" % (dsttics, nodes-1, nodes-1)) + ofile.write("plot '%s' matrix with image\n" % datafilename) + ofile.close() + +# generate gnuplot volume data and script file +write_data(outputfile+"_volume.data", nodes, volumes) +generate_gnuplot_script(outputfile+"_volume.gp", outputfile+"_volume.data", outputfile+"_volume_heatmap", nodes) +os.system("gnuplot " + outputfile+"_volume.gp") +print("Generated file \"%s.%s\"" % (outputfile+"_volume.data", outputext)) + +# generate gnuplot bandwidth data and script file +write_data(outputfile+"_bw.data", nodes, bandwidth) +generate_gnuplot_script(outputfile+"_bw.gp", outputfile+"_bw.data", outputfile+"_bw_heatmap", nodes) +os.system("gnuplot " + outputfile+"_bw.gp") +print("Generated file \"%s.%s\"" % (outputfile+"_bw.data", outputext)) diff --git a/tools/starpu_mpi_comm_matrix.py.in b/tools/starpu_mpi_comm_matrix.py.in new file mode 100755 index 0000000..761f494 --- /dev/null +++ b/tools/starpu_mpi_comm_matrix.py.in @@ -0,0 +1,118 @@ +#!/usr/bin/env python3 +# coding=utf-8 +# +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +""" +Plot statistics produced when running an application with STARPU_MPI_STATS=1 +""" + +import sys +import re +import os + +PROGNAME=sys.argv[0] + +def usage(): + print("Offline tool to draw a communication matrix") + print("") + print("Usage: %s " % PROGNAME) + print("") + print("Options:") + print(" -h, --help display this help and exit") + print(" -v, --version output version information and exit") + print(" -png produce plots in png format (default is pdf)") + print("") + print("Report bugs to <@PACKAGE_BUGREPORT@>") + sys.exit(1) + +if len(sys.argv) >= 2: + if sys.argv[1] == '-v' or sys.argv[1] == '--version': + print("%s (@PACKAGE_NAME@) @PACKAGE_VERSION@" % PROGNAME) + sys.exit(0) + if sys.argv[1] == '-h' or sys.argv[1] == '--help': + usage() +if len(sys.argv) == 1: + usage() + +if len(sys.argv) >= 2 and sys.argv[1] == '-png': + outputformat='png' + outputext='png' + outputfile=sys.argv[2] +else: + outputformat='pdf color' + outputext='pdf' + outputfile=sys.argv[1] + +# find the number of nodes +nodes=0 +file = open(outputfile, "r") +for line in file.readlines(): + if re.search('TOTAL', line): + (node,stuff)=line.split(sep="[")[2].split("]") + if int(node) > nodes: + nodes=int(node) +file.close() +nodes=nodes+1 + +# extract volume of comm and bandwidth between all pair of nodes +volumes = [[0 for _ in range(nodes)] for _ in range(nodes)] +bandwidth = [[0 for _ in range(nodes)] for _ in range(nodes)] +file = open(outputfile, "r") +for line in file.readlines(): + if re.search(r'\[starpu_comm_stats]', line) and not re.search('TOTAL', line) and re.search('MB/s', line): + (head,volB,B,volMB,MB,bwB,B,bwMB,MB) = line.split() + (src,dst)=head.split(sep="[")[2].split(sep="]")[0].split(sep=":") + volumes[int(src)][int(dst)] = float(volB) + bandwidth[int(src)][int(dst)] = float(bwB) +file.close() + +def write_data(filename, nodes, data): + ofile=open(filename, "w") + for dst in range(nodes): + for src in range(nodes): + ofile.write("%f "% data[src][dst]) + ofile.write("\n") + ofile.close() + +def generate_gnuplot_script(filename, datafilename, outputfile, nodes): + ofile=open(filename, "w") + srctics="" + dsttics="" + for node in range(nodes-1): + srctics += "\"src%d\" %d, " % (node, node) + dsttics += "\"dst%d\" %d, " % (node, node) + ofile.write("set term %s\n" % outputformat) + ofile.write("set output \"%s.%s\"\n" % (outputfile, outputext)) + ofile.write("set view map scale 1\nset style data lines\n") + ofile.write("set palette gray\n") + ofile.write("set xtics (%s\"src%d\" %d)\n" % (srctics, nodes-1, nodes-1)) + ofile.write("set ytics (%s\"dst%d\" %d)\n" % (dsttics, nodes-1, nodes-1)) + ofile.write("plot '%s' matrix with image\n" % datafilename) + ofile.close() + +# generate gnuplot volume data and script file +write_data(outputfile+"_volume.data", nodes, volumes) +generate_gnuplot_script(outputfile+"_volume.gp", outputfile+"_volume.data", outputfile+"_volume_heatmap", nodes) +os.system("gnuplot " + outputfile+"_volume.gp") +print("Generated file \"%s.%s\"" % (outputfile+"_volume.data", outputext)) + +# generate gnuplot bandwidth data and script file +write_data(outputfile+"_bw.data", nodes, bandwidth) +generate_gnuplot_script(outputfile+"_bw.gp", outputfile+"_bw.data", outputfile+"_bw_heatmap", nodes) +os.system("gnuplot " + outputfile+"_bw.gp") +print("Generated file \"%s.%s\"" % (outputfile+"_bw.data", outputext)) diff --git a/tools/starpu_msexec b/tools/starpu_msexec new file mode 100755 index 0000000..fa0f6dc --- /dev/null +++ b/tools/starpu_msexec @@ -0,0 +1,28 @@ +#! /bin/bash + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2021-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +# Usually run program through $MS_LAUNCHER, unless it is a shell script, in +# which case it is the shell script that will run the program through $MS_LAUNCHER + +case "$2" in + *.sh) + exec "$@" + ;; + *) + exec $MS_LAUNCHER "$@" + ;; +esac diff --git a/tools/starpu_paje_draw_histogram b/tools/starpu_paje_draw_histogram new file mode 100755 index 0000000..41771d0 --- /dev/null +++ b/tools/starpu_paje_draw_histogram @@ -0,0 +1,146 @@ +#!/bin/bash +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2014-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# Copyright (C) 2014-2014 Université Joseph Fourier +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +# Script for giving statistical analysis of the paje trace + +set -e # fail fast +PROGNAME=$0 + +# File names +r_script="$(dirname $(command -v $0))/starpu_paje_draw_histogram.R" +r_input="" + +# Command line arguments +range="0:-1" +name="All" +verbose=0 +inputfiles="" + +help_script() +{ +cat << EOF +Give statistical analysis of the paje trace + +$0 [ options ] paje.trace [paje.trace2 ...] + +Options: + -r To fix range x1:x2 ("-1" for infinity) + -n To choose a certain state + -v Print output to command line + -h Show this message + +Examples: + +$0 -n chol_model_22 example.native.trace + +$0 -r 100:300 -n FetchingInput,Overhead -v example.native.trace example.simgrid.trace + +Report bugs to +EOF +} + +if [ "$1" = "--version" ] ; then + echo "$PROGNAME (StarPU) 1.4.10" + exit 0 +fi + +if [ "$1" = "-h" ] || [ "$1" = "--help" ] || [ "$1" = "" ] ; then + help_script + exit 0 +fi + +while getopts "r:n:vh" opt; do + case $opt in + r) + range="$OPTARG" + ;; + n) + name="$OPTARG" + ;; + v) + verbose=1 + ;; + h) + help_script + exit 4 + ;; + \?) + echo "Invalid option: -$OPTARG" + help_script + exit 3 + ;; + esac +done + +# Reading files that need to be analyzed +shift $((OPTIND - 1)) +inputfiles=$@ +if [[ $# < 1 ]]; then + echo "Error!" + help_script + exit 2 +fi + +# Getting range +range1=$(eval echo $range | cut -d: -f1) +range2=$(eval echo $range | cut -d: -f2) + +##################################### +# Transforming input files into .csv +for file in $inputfiles; do + if [ ! -s $file ] + then + echo "Error: file $file does not exist!" + exit 5 + fi + dir=$(dirname $file) + # Sorting traces + grep -e '^\(\(%\)\|\(\(0\|1\|2\|3\|4\|5\|6\|7\)\>\)\)' $file > $dir/start.trace + grep -e '^\(\(%\)\|\(\(0\|1\|2\|3\|4\|5\|6\|7\)\>\)\)' -v $file > $dir/end.trace + sort -s -V --key=2,2 $dir/end.trace > $dir/endSorted.trace + if grep -q start_profiling $dir/endSorted.trace + then + echo Using start_profiling/stop_profiling trace selection. + sed -ne '/start_profiling/,/stop_profiling/p' < $dir/endSorted.trace > $dir/endSorted2.trace + else + cp $dir/endSorted.trace $dir/endSorted2.trace + fi + cat $dir/start.trace $dir/endSorted2.trace > $dir/outputSorted.trace + + # Transferring to .csv + pj_dump -n $dir/outputSorted.trace > $file.csv + perl -i -ne 'print if /^State/' $file.csv + + r_input=$(eval echo "$r_input $file.csv") + + # Cleanup: delete temporary files + rm -f $dir/outputSorted.trace + rm -f $dir/start.trace + rm -f $dir/end.trace + rm -f $dir/endSorted.trace + rm -f $dir/endSorted2.trace +done + +##################################### +# Running R file to get actual results +Rscript $r_script $range1 $range2 $name $r_input + +# Directly opening .pdf result +if [[ $verbose == 1 ]]; then + evince Rplots.pdf +fi + diff --git a/tools/starpu_paje_draw_histogram.1 b/tools/starpu_paje_draw_histogram.1 new file mode 100644 index 0000000..45975aa --- /dev/null +++ b/tools/starpu_paje_draw_histogram.1 @@ -0,0 +1,27 @@ +.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.3. +.TH STARPU_PAJE_DRAW_HISTOGRAM "1" "December 2025" "StarPU 1.4.10" "User Commands" +.SH NAME +starpu_paje_draw_histogram \- Draw StarPU trace histogram +.SH DESCRIPTION +Give statistical analysis of the paje trace +.PP +\&./starpu_paje_draw_histogram [ options ] paje.trace [paje.trace2 ...] +.SH OPTIONS +.TP +\fB\-r\fR +To fix range x1:x2 ("\-1" for infinity) +.TP +\fB\-n\fR +To choose a certain state +.TP +\fB\-v\fR +Print output to command line +.TP +\fB\-h\fR +Show this message +.SH EXAMPLES +\&./starpu_paje_draw_histogram \-n chol_model_22 example.native.trace +.PP +\&./starpu_paje_draw_histogram \-r 100:300 \-n FetchingInput,Overhead \-v example.native.trace example.simgrid.trace +.SH "REPORTING BUGS" +Report bugs to diff --git a/tools/starpu_paje_draw_histogram.R b/tools/starpu_paje_draw_histogram.R new file mode 100755 index 0000000..c370cd1 --- /dev/null +++ b/tools/starpu_paje_draw_histogram.R @@ -0,0 +1,125 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2014-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# Copyright (C) 2014-2014 Université Joseph Fourier +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +# R script that is giving statistical analysis of the paje trace + +# Can be called from the command line with: +# Rscript $this_script $range1 $range2 $name $outputfile $inputfiles + +# Package containing ddply function +library(plyr) +library(ggplot2) +library(data.table) + +# Function for reading .csv file +read_df <- function(file,range1,range2) { + df<-read.csv(file, header=FALSE, strip.white=TRUE) + names(df) <- c("Nature","ResourceId","Type","Start","End","Duration", "Depth", "Value") + df = df[!(names(df) %in% c("Nature","Type", "Depth"))] + df$Origin<-file + +# Changing names if needed: + df$Value <- as.character(df$Value) + df$Value <- ifelse(df$Value == "F", "Freeing", as.character(df$Value)) + df$Value <- ifelse(df$Value == "A", "Allocating", as.character(df$Value)) + df$Value <- ifelse(df$Value == "W", "WritingBack", as.character(df$Value)) + df$Value <- ifelse(df$Value == "No", "Nothing", as.character(df$Value)) + df$Value <- ifelse(df$Value == "I", "Initializing", as.character(df$Value)) + df$Value <- ifelse(df$Value == "D", "Deinitializing", as.character(df$Value)) + df$Value <- ifelse(df$Value == "Fi", "FetchingInput", as.character(df$Value)) + df$Value <- ifelse(df$Value == "Po", "PushingOutput", as.character(df$Value)) + df$Value <- ifelse(df$Value == "C", "Callback", as.character(df$Value)) + df$Value <- ifelse(df$Value == "B", "Overhead", as.character(df$Value)) + df$Value <- ifelse(df$Value == "Sl", "Sleeping", as.character(df$Value)) + df$Value <- ifelse(df$Value == "P", "Progressing", as.character(df$Value)) + df$Value <- ifelse(df$Value == "U", "Unpartitioning", as.character(df$Value)) + df$Value <- ifelse(df$Value == "Ar", "AllocatingReuse", as.character(df$Value)) + df$Value <- ifelse(df$Value == "R", "Reclaiming", as.character(df$Value)) + df$Value <- ifelse(df$Value == "Co", "DriverCopy", as.character(df$Value)) + df$Value <- ifelse(df$Value == "CoA", "DriverCopyAsync", as.character(df$Value)) + +# Considering only the states with a given name + if (name != "All") + df<-df[df$Value %in% name[[1]],] + +# Aligning to begin time from 0 + m <- min(df$Start) + df$Start <- df$Start - m + df$End <- df$Start+df$Duration + +# Taking only the states inside a given range + df <- df[df$Start>=range1 & df$End<=range2,] + +# Return data frame + df +} + +######################################### +######################################### +# Main +######################################### +# Reading command line arguments +args <- commandArgs(trailingOnly = TRUE) +range1<-as.numeric(args[1]) +if (range1==-1) + range1<-Inf +range2<-as.numeric(args[2]) +if (range2==-1) + range2<-Inf +name<-strsplit(args[3], ",") + +# Reading first file +filename<-args[4] +df<-read_df(filename,range1,range2) + +i=5 +while (i <= length(args)) + { +# Reading next input file + filename<-args[i] + dft<-read_df(filename,range1,range2) + + df<-rbindlist(list(df,dft)) + + i <- i+1 + } + +# Error: if there is no results for a given range and state +if (nrow(df)==0) + stop("Result is empty!") + +# Plotting histograms +plot <- ggplot(df, aes(x=Duration)) + geom_histogram(aes(y=..count.., fill=..count..),binwidth = diff(range(df$Duration))/30) +plot <- plot + theme_bw() + scale_fill_gradient(high = "#132B43", low = "#56B1F7") + ggtitle("Histograms for state distribution") + ylab("Count") + xlab("Time [ms]") + theme(legend.position="none") + facet_grid(Origin~Value,scales = "free_y") + +# Adding text for total duration +ad<-ggplot_build(plot)$data[[1]] +al<-ggplot_build(plot)$panel$layout +ad<-merge(ad,al) +anno1 <- ddply(ad, .(ROW), summarise, x = max(x)*0.7, y = max(y)*0.9) +anno1<-merge(anno1,al) +anno2 <- ddply(df, .(Origin,Value), summarise, tot=as.integer(sum(Duration))) +anno2$PANEL <- row.names(anno2) +anno2$lab <- sprintf("Total duration: \n%ims",anno2$tot) +anno <- merge(anno1,anno2) +plot <- plot + geom_text(data = anno, aes(x=x, y=y, label=lab, colour="red")) + +# Printing plot +plot + +# End +write("Done producing a histogram plot. Open Rplots.pdf located in this folder to see the results", stdout()) diff --git a/tools/starpu_paje_draw_histogram.in b/tools/starpu_paje_draw_histogram.in new file mode 100755 index 0000000..fa5371e --- /dev/null +++ b/tools/starpu_paje_draw_histogram.in @@ -0,0 +1,146 @@ +#!/bin/bash +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2014-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# Copyright (C) 2014-2014 Université Joseph Fourier +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +# Script for giving statistical analysis of the paje trace + +set -e # fail fast +PROGNAME=$0 + +# File names +r_script="$(dirname $(command -v $0))/starpu_paje_draw_histogram.R" +r_input="" + +# Command line arguments +range="0:-1" +name="All" +verbose=0 +inputfiles="" + +help_script() +{ +cat << EOF +Give statistical analysis of the paje trace + +$0 [ options ] paje.trace [paje.trace2 ...] + +Options: + -r To fix range x1:x2 ("-1" for infinity) + -n To choose a certain state + -v Print output to command line + -h Show this message + +Examples: + +$0 -n chol_model_22 example.native.trace + +$0 -r 100:300 -n FetchingInput,Overhead -v example.native.trace example.simgrid.trace + +Report bugs to <@PACKAGE_BUGREPORT@> +EOF +} + +if [ "$1" = "--version" ] ; then + echo "$PROGNAME (@PACKAGE_NAME@) @PACKAGE_VERSION@" + exit 0 +fi + +if [ "$1" = "-h" ] || [ "$1" = "--help" ] || [ "$1" = "" ] ; then + help_script + exit 0 +fi + +while getopts "r:n:vh" opt; do + case $opt in + r) + range="$OPTARG" + ;; + n) + name="$OPTARG" + ;; + v) + verbose=1 + ;; + h) + help_script + exit 4 + ;; + \?) + echo "Invalid option: -$OPTARG" + help_script + exit 3 + ;; + esac +done + +# Reading files that need to be analyzed +shift $((OPTIND - 1)) +inputfiles=$@ +if [[ $# < 1 ]]; then + echo "Error!" + help_script + exit 2 +fi + +# Getting range +range1=$(eval echo $range | cut -d: -f1) +range2=$(eval echo $range | cut -d: -f2) + +##################################### +# Transforming input files into .csv +for file in $inputfiles; do + if [ ! -s $file ] + then + echo "Error: file $file does not exist!" + exit 5 + fi + dir=$(dirname $file) + # Sorting traces + grep -e '^\(\(%\)\|\(\(0\|1\|2\|3\|4\|5\|6\|7\)\>\)\)' $file > $dir/start.trace + grep -e '^\(\(%\)\|\(\(0\|1\|2\|3\|4\|5\|6\|7\)\>\)\)' -v $file > $dir/end.trace + sort -s -V --key=2,2 $dir/end.trace > $dir/endSorted.trace + if grep -q start_profiling $dir/endSorted.trace + then + echo Using start_profiling/stop_profiling trace selection. + sed -ne '/start_profiling/,/stop_profiling/p' < $dir/endSorted.trace > $dir/endSorted2.trace + else + cp $dir/endSorted.trace $dir/endSorted2.trace + fi + cat $dir/start.trace $dir/endSorted2.trace > $dir/outputSorted.trace + + # Transferring to .csv + pj_dump -n $dir/outputSorted.trace > $file.csv + perl -i -ne 'print if /^State/' $file.csv + + r_input=$(eval echo "$r_input $file.csv") + + # Cleanup: delete temporary files + rm -f $dir/outputSorted.trace + rm -f $dir/start.trace + rm -f $dir/end.trace + rm -f $dir/endSorted.trace + rm -f $dir/endSorted2.trace +done + +##################################### +# Running R file to get actual results +Rscript $r_script $range1 $range2 $name $r_input + +# Directly opening .pdf result +if [[ $verbose == 1 ]]; then + evince Rplots.pdf +fi + diff --git a/tools/starpu_paje_sort.in b/tools/starpu_paje_sort.in new file mode 100755 index 0000000..461b572 --- /dev/null +++ b/tools/starpu_paje_sort.in @@ -0,0 +1,108 @@ +#!/bin/bash +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2014-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# Copyright (C) 2014-2014 Université Joseph Fourier +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +# Script for sorting paje traces + +set -e # fail fast + +inputfiles="" + +help_script() +{ +cat << EOF +Give statistical analysis of the paje trace + +$0 [ options ] paje.trace [paje.trace2 ...] + +Options: + -h Show this message + +Examples: + +$0 example.trace + +Report bugs to <@PACKAGE_BUGREPORT@> +EOF +} + +if [ "$1" = "--version" ] ; then + echo "$PROGNAME (@PACKAGE_NAME@) @PACKAGE_VERSION@" + exit 0 +fi + +if [ "$1" = "-h" ] || [ "$1" = "--help" ] || [ "$1" = "" ] ; then + help_script + exit 0 +fi + +while getopts "h" opt; do + case $opt in + h) + help_script + exit 4 + ;; + \?) + echo "Invalid option: -$OPTARG" + help_script + exit 3 + ;; + esac +done + +# Reading files that need to be analyzed +shift $((OPTIND - 1)) +inputfiles=$@ +if [[ $# < 1 ]]; then + echo "Error!" + help_script + exit 2 +fi + +get_event_num() { + grep "^%EventDef[ ]$2" $1 | sed -e "s/.*$2[ ]*//" +} + +##################################### +# Transforming input files into .csv +for file in $inputfiles; do + if [ ! -s $file ] + then + echo "Error: file $file does not exist!" + exit 5 + fi + dir=$(dirname $file) + DefCont="$(get_event_num $file PajeDefineContainerType) " + DefEvent="$(get_event_num $file PajeDefineEventType) " + DefState="$(get_event_num $file PajeDefineStateType) " + DefVar="$(get_event_num $file PajeDefineVariableType) " + DefLink="$(get_event_num $file PajeDefineLinkType) " + DefEnt="$(get_event_num $file PajeDefineEntityValue) " + CreateCont="$(get_event_num $file PajeCreateContainer) " + AddVar="$(get_event_num $file PajeAddVariable) " + grepstr="^\\(%\\|$DefCont\\|$DefEvent\\|$DefState\\|$DefVar\\|$DefLink\\|$DefEnt\\|$CreateCont\\|$AddVar\\)" + grepstr=${grepstr//[ ]/[ ]} + # Sorting traces + grep -e "$grepstr" $file > $dir/start.trace + grep -e "$grepstr" -v $file > $dir/end.trace + sort -s -V --key=2,2 $dir/end.trace > $dir/endSorted.trace + cat $dir/start.trace $dir/endSorted.trace > $file + + # Cleanup: delete temporary files + rm -f $dir/start.trace + rm -f $dir/end.trace + rm -f $dir/endSorted.trace +done diff --git a/tools/starpu_paje_state_stats b/tools/starpu_paje_state_stats new file mode 100755 index 0000000..4f81543 --- /dev/null +++ b/tools/starpu_paje_state_stats @@ -0,0 +1,146 @@ +#!/bin/bash +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2014-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# Copyright (C) 2014-2014 Université Joseph Fourier +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +# Script for giving statistical analysis of the paje trace + +set -e # fail fast + +# File names +outputfile="starpu_paje_state_stats.csv" +r_script="$(dirname $(command -v $0))/starpu_paje_state_stats.R" +r_input="" + +# Command line arguments +range="0:-1" +name="All" +verbose=0 +inputfiles="" + +help_script() +{ +cat << EOF +Give statistical analysis of the paje trace + +$0 [ options ] paje.trace [paje.trace2 ...] + +Options: + -r To fix range x1:x2 ("-1" for infinity) + -n To choose a certain state + -v Print output to command line + -h Show this message + +Examples: + +$0 example.native.trace + +$0 -r 100:300 -n FetchingInput -v example.native.trace example.simgrid.trace + +Report bugs to +EOF +} + +if [ "$1" = "--version" ] ; then + echo "$PROGNAME (StarPU) 1.4.10" + exit 0 +fi + +if [ "$1" = "-h" ] || [ "$1" = "--help" ] || [ "$1" = "" ] ; then + help_script + exit 0 +fi + +while getopts "r:n:vh" opt; do + case $opt in + r) + range="$OPTARG" + ;; + n) + name="$OPTARG" + ;; + v) + verbose=1 + ;; + h) + help_script + exit 4 + ;; + \?) + echo "Invalid option: -$OPTARG" + help_script + exit 3 + ;; + esac +done + +# Reading files that need to be analyzed +shift $((OPTIND - 1)) +inputfiles=$@ +if [[ $# < 1 ]]; then + echo "Error!" + help_script + exit 2 +fi + +# Getting range +range1=$(eval echo $range | cut -d: -f1) +range2=$(eval echo $range | cut -d: -f2) + +##################################### +# Transforming input files into .csv +for file in $inputfiles; do + if [ ! -s $file ] + then + echo "Error: file $file does not exist!" + exit 5 + fi + dir=$(dirname $file) + # Sorting traces + grep -e '^\(\(%\)\|\(\(0\|1\|2\|3\|4\|5\|6\|7\)\>\)\)' $file > $dir/start.trace + grep -e '^\(\(%\)\|\(\(0\|1\|2\|3\|4\|5\|6\|7\)\>\)\)' -v $file > $dir/end.trace + sort -s -V --key=2,2 $dir/end.trace > $dir/endSorted.trace + if grep -q start_profiling $dir/endSorted.trace + then + echo Using start_profiling/stop_profiling trace selection. + sed -ne '/start_profiling/,/stop_profiling/p' < $dir/endSorted.trace > $dir/endSorted2.trace + else + cp $dir/endSorted.trace $dir/endSorted2.trace + fi + cat $dir/start.trace $dir/endSorted2.trace > $dir/outputSorted.trace + + # Transferring to .csv + pj_dump -n $dir/outputSorted.trace > $file.csv + perl -i -ne 'print if /^State/' $file.csv + + r_input=$(eval echo "$r_input $file.csv") + + # Cleanup: delete temporary files + rm -f $dir/outputSorted.trace + rm -f $dir/start.trace + rm -f $dir/end.trace + rm -f $dir/endSorted.trace + rm -f $dir/endSorted2.trace +done + +##################################### +# Running R file to get actual results +Rscript $r_script $range1 $range2 $name $outputfile $r_input + +# If verbose then write results to stdout +if [[ $verbose == 1 ]]; then + column -s, -t $outputfile +fi + diff --git a/tools/starpu_paje_state_stats.1 b/tools/starpu_paje_state_stats.1 new file mode 100644 index 0000000..c301a90 --- /dev/null +++ b/tools/starpu_paje_state_stats.1 @@ -0,0 +1,27 @@ +.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.3. +.TH STARPU_PAJE_STATE_STATS "1" "December 2025" "starpu_paje_state_stats (StarPU) 1.4.10" "User Commands" +.SH NAME +starpu_paje_state_stats \- Print statistics from StarPU trace +.SH DESCRIPTION +Give statistical analysis of the paje trace +.PP +\&./starpu_paje_state_stats [ options ] paje.trace [paje.trace2 ...] +.SH OPTIONS +.TP +\fB\-r\fR +To fix range x1:x2 ("\-1" for infinity) +.TP +\fB\-n\fR +To choose a certain state +.TP +\fB\-v\fR +Print output to command line +.TP +\fB\-h\fR +Show this message +.SH EXAMPLES +\&./starpu_paje_state_stats example.native.trace +.PP +\&./starpu_paje_state_stats \-r 100:300 \-n FetchingInput \-v example.native.trace example.simgrid.trace +.SH "REPORTING BUGS" +Report bugs to diff --git a/tools/starpu_paje_state_stats.R b/tools/starpu_paje_state_stats.R new file mode 100755 index 0000000..f203f2b --- /dev/null +++ b/tools/starpu_paje_state_stats.R @@ -0,0 +1,125 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2014-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# Copyright (C) 2014-2014 Université Joseph Fourier +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +# R script that is giving statistical analysis of the paje trace + +# Can be called from the command line with: +# Rscript $this_script $range1 $range2 $name $outputfile $inputfiles + +# Package containing ddply function +library(plyr) + +# Function for reading .csv file +read_df <- function(file,range1,range2) { + df<-read.csv(file, header=FALSE, strip.white=TRUE) + names(df) <- c("Nature","ResourceId","Type","Start","End","Duration", "Depth", "Value") + df = df[!(names(df) %in% c("Nature","Type", "Depth"))] + +# Changing names if needed: + df$Value <- as.character(df$Value) + df$Value <- ifelse(df$Value == "F", "Freeing", as.character(df$Value)) + df$Value <- ifelse(df$Value == "A", "Allocating", as.character(df$Value)) + df$Value <- ifelse(df$Value == "W", "WritingBack", as.character(df$Value)) + df$Value <- ifelse(df$Value == "No", "Nothing", as.character(df$Value)) + df$Value <- ifelse(df$Value == "I", "Initializing", as.character(df$Value)) + df$Value <- ifelse(df$Value == "D", "Deinitializing", as.character(df$Value)) + df$Value <- ifelse(df$Value == "Fi", "FetchingInput", as.character(df$Value)) + df$Value <- ifelse(df$Value == "Po", "PushingOutput", as.character(df$Value)) + df$Value <- ifelse(df$Value == "C", "Callback", as.character(df$Value)) + df$Value <- ifelse(df$Value == "B", "Overhead", as.character(df$Value)) + df$Value <- ifelse(df$Value == "Sl", "Sleeping", as.character(df$Value)) + df$Value <- ifelse(df$Value == "P", "Progressing", as.character(df$Value)) + df$Value <- ifelse(df$Value == "U", "Unpartitioning", as.character(df$Value)) + df$Value <- ifelse(df$Value == "Ar", "AllocatingReuse", as.character(df$Value)) + df$Value <- ifelse(df$Value == "R", "Reclaiming", as.character(df$Value)) + df$Value <- ifelse(df$Value == "Co", "DriverCopy", as.character(df$Value)) + df$Value <- ifelse(df$Value == "CoA", "DriverCopyAsync", as.character(df$Value)) + df$Value <- ifelse(df$Value == "Su", "SubmittingTask", as.character(df$Value)) + +# Considering only the states with a given name + if (name != "All") + df<-df[df$Value %in% name[[1]],] + +# Aligning to begin time from 0 + m <- min(df$Start) + df$Start <- df$Start - m + df$End <- df$Start+df$Duration + +# Taking only the states inside a given range + df <- df[df$Start>=range1 & df$End<=range2,] + +# Return data frame + df +} + +######################################### +######################################### +# Main +######################################### +# Reading command line arguments +args <- commandArgs(trailingOnly = TRUE) +range1<-as.numeric(args[1]) +if (range1==-1) + range1<-Inf +range2<-as.numeric(args[2]) +if (range2==-1) + range2<-Inf +name<-strsplit(args[3], ",") +outputfile<-args[4] + +# Reading first file +filename<-args[5] +df<-read_df(filename,range1,range2) + +# Getting summary of the first file +dfout<-ddply(df, c("Value"), summarize, Events_ = length(as.numeric(Duration)), Duration_ = sum(as.numeric(Duration))) +names(dfout)<-c("Value",sprintf("Events_%s",filename),sprintf("Duration_%s",filename)) + +i=6 +while (i <= length(args)) + { +# Reading next input file + filename<-args[i] + df<-read_df(filename,range1,range2) + +# Getting summary of the next file + dp<-ddply(df, c("Value"), summarize, Events_ = length(as.numeric(Duration)), Duration_ = sum(as.numeric(Duration))) + names(dp)<-c("Value",sprintf("Events_%s",filename),sprintf("Duration_%s",filename)) + +# Merging results into one single data frame + if (nrow(dp)>0) + { + if (nrow(dfout)>0) + dfout<-merge(dfout,dp, by = "Value", all=TRUE) + else + dfout<-dp + } + + i <- i+1 + } + +# Cosmetics: change NA to 0 +dfout[is.na(dfout)] <- 0 + +# Error: if there is no results for a given range and state +if (nrow(dfout)==0) + stop("Result is empty!") + +# Write results into the new .csv file +write.table(dfout, file=outputfile, row.names=FALSE, sep = ", ") + + diff --git a/tools/starpu_paje_state_stats.in b/tools/starpu_paje_state_stats.in new file mode 100755 index 0000000..94a4ecd --- /dev/null +++ b/tools/starpu_paje_state_stats.in @@ -0,0 +1,146 @@ +#!/bin/bash +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2014-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# Copyright (C) 2014-2014 Université Joseph Fourier +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +# Script for giving statistical analysis of the paje trace + +set -e # fail fast + +# File names +outputfile="starpu_paje_state_stats.csv" +r_script="$(dirname $(command -v $0))/starpu_paje_state_stats.R" +r_input="" + +# Command line arguments +range="0:-1" +name="All" +verbose=0 +inputfiles="" + +help_script() +{ +cat << EOF +Give statistical analysis of the paje trace + +$0 [ options ] paje.trace [paje.trace2 ...] + +Options: + -r To fix range x1:x2 ("-1" for infinity) + -n To choose a certain state + -v Print output to command line + -h Show this message + +Examples: + +$0 example.native.trace + +$0 -r 100:300 -n FetchingInput -v example.native.trace example.simgrid.trace + +Report bugs to <@PACKAGE_BUGREPORT@> +EOF +} + +if [ "$1" = "--version" ] ; then + echo "$PROGNAME (@PACKAGE_NAME@) @PACKAGE_VERSION@" + exit 0 +fi + +if [ "$1" = "-h" ] || [ "$1" = "--help" ] || [ "$1" = "" ] ; then + help_script + exit 0 +fi + +while getopts "r:n:vh" opt; do + case $opt in + r) + range="$OPTARG" + ;; + n) + name="$OPTARG" + ;; + v) + verbose=1 + ;; + h) + help_script + exit 4 + ;; + \?) + echo "Invalid option: -$OPTARG" + help_script + exit 3 + ;; + esac +done + +# Reading files that need to be analyzed +shift $((OPTIND - 1)) +inputfiles=$@ +if [[ $# < 1 ]]; then + echo "Error!" + help_script + exit 2 +fi + +# Getting range +range1=$(eval echo $range | cut -d: -f1) +range2=$(eval echo $range | cut -d: -f2) + +##################################### +# Transforming input files into .csv +for file in $inputfiles; do + if [ ! -s $file ] + then + echo "Error: file $file does not exist!" + exit 5 + fi + dir=$(dirname $file) + # Sorting traces + grep -e '^\(\(%\)\|\(\(0\|1\|2\|3\|4\|5\|6\|7\)\>\)\)' $file > $dir/start.trace + grep -e '^\(\(%\)\|\(\(0\|1\|2\|3\|4\|5\|6\|7\)\>\)\)' -v $file > $dir/end.trace + sort -s -V --key=2,2 $dir/end.trace > $dir/endSorted.trace + if grep -q start_profiling $dir/endSorted.trace + then + echo Using start_profiling/stop_profiling trace selection. + sed -ne '/start_profiling/,/stop_profiling/p' < $dir/endSorted.trace > $dir/endSorted2.trace + else + cp $dir/endSorted.trace $dir/endSorted2.trace + fi + cat $dir/start.trace $dir/endSorted2.trace > $dir/outputSorted.trace + + # Transferring to .csv + pj_dump -n $dir/outputSorted.trace > $file.csv + perl -i -ne 'print if /^State/' $file.csv + + r_input=$(eval echo "$r_input $file.csv") + + # Cleanup: delete temporary files + rm -f $dir/outputSorted.trace + rm -f $dir/start.trace + rm -f $dir/end.trace + rm -f $dir/endSorted.trace + rm -f $dir/endSorted2.trace +done + +##################################### +# Running R file to get actual results +Rscript $r_script $range1 $range2 $name $outputfile $r_input + +# If verbose then write results to stdout +if [[ $verbose == 1 ]]; then + column -s, -t $outputfile +fi + diff --git a/tools/starpu_paje_summary b/tools/starpu_paje_summary new file mode 100755 index 0000000..d479278 --- /dev/null +++ b/tools/starpu_paje_summary @@ -0,0 +1,111 @@ +#!/bin/bash +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2014-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# Copyright (C) 2014-2014 Université Joseph Fourier +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +# Script for giving statistical analysis of the paje trace + +set -e # fail fast + +# File names +SOURCE_DIR=$(dirname $0) + +outputfile="summary.html" +analysis_script="$SOURCE_DIR/starpu_paje_summary.Rmd" +analysis_input="" + +# Command line arguments +inputfiles="" + +help_script() +{ +cat << EOF +Give statistical analysis of the paje trace + +Options: + -h Show this message + +Examples: +$0 example.native.trace +$0 example.native.trace example.simgrid.trace + +Report bugs to +EOF +} + +if [ "$1" = "--version" ] ; then + echo "$PROGNAME (StarPU) 1.4.10" + exit 0 +fi + +if [ "$1" = "-h" ] || [ "$1" = "--help" ] || [ "$1" = "" ] ; then + help_script + exit 0 +fi + +while getopts "h" opt; do + case $opt in + \?) + echo "Invalid option: -$OPTARG" + help_script + exit 3 + ;; + esac +done + +# Reading files that need to be analyzed +shift $((OPTIND - 1)) +inputfiles=$@ +# Error if there is no input files specified +if [[ $# < 1 ]]; then + echo "Error!" + help_script + exit 2 +fi + +##################################### +# Transforming input files into .csv +for file in $inputfiles; do + if [ ! -s $file ] + then + echo "Error: file $file does not exist!" + exit 5 + fi + dir=$(dirname $file) + # Sorting traces + grep -e '^\(\(%\)\|\(\(0\|1\|2\|3\|4\|5\|6\|7\|9\)\>\)\)' $file > $dir/start.trace + grep -e '^\(\(%\)\|\(\(0\|1\|2\|3\|4\|5\|6\|7\|9\|18\|19\)\>\)\)' -v $file > $dir/end.trace + sort -s -V --key=2,2 $dir/end.trace > $dir/endSorted.trace + cat $dir/start.trace $dir/endSorted.trace > $dir/outputSorted.trace + + # Transferring to .csv + pj_dump -n $dir/outputSorted.trace > $file.csv + perl -i -ne 'print if /^State/' $file.csv + + # Cleanup: delete temporary files + rm -f $dir/outputSorted.trace + rm -f $dir/start.trace + rm -f $dir/end.trace + rm -f $dir/endSorted.trace +done + +analysis_input=`echo \"$inputfiles".csv\"" | sed 's/ */.csv", "/g'` + +##################################### +# Running analysis file to get actual results +Rscript -e "library(knitr); input_traces = c($analysis_input) ; outputhtml='$outputfile';\ + outputRmd = gsub('.html\$','.Rmd',outputhtml);\ + knit('$analysis_script',output=outputRmd); knitr::knit2html(outputRmd)" + diff --git a/tools/starpu_paje_summary.Rmd b/tools/starpu_paje_summary.Rmd new file mode 100644 index 0000000..2aafafb --- /dev/null +++ b/tools/starpu_paje_summary.Rmd @@ -0,0 +1,299 @@ +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2014-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +```{r Setup, echo=FALSE} +opts_chunk$set(echo=FALSE) +``` + + + +```{r Install_R_libraries} +InstalledPackage <- function(package) +{ + available <- suppressMessages(suppressWarnings(sapply(package, require, quietly = TRUE, character.only = TRUE, warn.conflicts = FALSE))) + missing <- package[!available] + if (length(missing) > 0) return(FALSE) + return(TRUE) +} + +CRANChoosen <- function() +{ + return(getOption("repos")["CRAN"] != "@CRAN@") +} + +UsePackage <- function(package, defaultCRANmirror = "http://cran.at.r-project.org") +{ + if(!InstalledPackage(package)) + { + if(!CRANChoosen()) + { + chooseCRANmirror() + if(!CRANChoosen()) + { + options(repos = c(CRAN = defaultCRANmirror)) + } + } + + suppressMessages(suppressWarnings(install.packages(package))) + if(!InstalledPackage(package)) return(FALSE) + } + return(TRUE) +} + +# Now install desired libraries +libraries <- c("ggplot2", "plyr", "data.table", "RColorBrewer") +for(libr in libraries) +{ + if(!UsePackage(libr)) + { + stop("Error!", libr) + } +} +``` + +```{r Load_R_files} +# Load ggplot and plyr just for the following cases + library(ggplot2) + library(plyr) + library(data.table) + library(RColorBrewer) + +# Defining non-computation states: +def_states<-c("Initializing","Deinitializing","Overhead","Nothing","Sleeping","Freeing","Allocating","WritingBack","FetchingInput","PushingOutput","Callback","Progressing","Unpartitioning","AllocatingReuse","Reclaiming","DriverCopy","DriverCopyAsync","Scheduling","Executing") + +# Function for reading .csv file +read_df <- function(file,range1,range2) { + df<-read.csv(file, header=FALSE, strip.white=TRUE) + names(df) <- c("Nature","ResourceId","Type","Start","End","Duration", "Depth", "Value") + df = df[!(names(df) %in% c("Nature","Type", "Depth"))] + df$Origin<-as.factor(as.character(file)) + +# Changing names if needed: + df$Value <- as.character(df$Value) + df$Value <- ifelse(df$Value == "F", "Freeing", as.character(df$Value)) + df$Value <- ifelse(df$Value == "A", "Allocating", as.character(df$Value)) + df$Value <- ifelse(df$Value == "W", "WritingBack", as.character(df$Value)) + df$Value <- ifelse(df$Value == "No", "Nothing", as.character(df$Value)) + df$Value <- ifelse(df$Value == "I", "Initializing", as.character(df$Value)) + df$Value <- ifelse(df$Value == "D", "Deinitializing", as.character(df$Value)) + df$Value <- ifelse(df$Value == "Fi", "FetchingInput", as.character(df$Value)) + df$Value <- ifelse(df$Value == "Po", "PushingOutput", as.character(df$Value)) + df$Value <- ifelse(df$Value == "C", "Callback", as.character(df$Value)) + df$Value <- ifelse(df$Value == "B", "Overhead", as.character(df$Value)) + df$Value <- ifelse(df$Value == "Sc", "Scheduling", as.character(df$Value)) + df$Value <- ifelse(df$Value == "E", "Executing", as.character(df$Value)) + df$Value <- ifelse(df$Value == "Sl", "Sleeping", as.character(df$Value)) + df$Value <- ifelse(df$Value == "P", "Progressing", as.character(df$Value)) + df$Value <- ifelse(df$Value == "U", "Unpartitioning", as.character(df$Value)) + df$Value <- ifelse(df$Value == "Ar", "AllocatingReuse", as.character(df$Value)) + df$Value <- ifelse(df$Value == "R", "Reclaiming", as.character(df$Value)) + df$Value <- ifelse(df$Value == "Co", "DriverCopy", as.character(df$Value)) + df$Value <- ifelse(df$Value == "CoA", "DriverCopyAsync", as.character(df$Value)) + +# Small cleanup +df$Start<-round(df$Start,digit=1) +df$End<-round(df$End,digit=1) +df$ResourceId<-as.factor(df$ResourceId) +df$Value<-as.factor(df$Value) + +# Start from zero + m <- min(df$Start) + df$Start <- df$Start - m + df$End <- df$Start+df$Duration + +# Return data frame + df +} +``` + +```{r Load_traces} +df<-data.frame() +if( !exists("input_traces") ) + input_traces<-c("example.native.trace.csv", "example.simgrid.trace.csv") + +for (i in 1:length(input_traces)){ + dfs<-read_df(input_traces[i]) + df<-rbindlist(list(df,dfs)) +} + +# Color palettes +colourCount = length(unique(df$Value)) +getPalette = colorRampPalette(brewer.pal(9, "Set1")) + +# Order of Value so we can have good colors +ker_states<-as.character(unique(df[!(df$Value %in% def_states),Value])) +ordered_states<-append(sort(ker_states), def_states) +df$Value <- factor(df$Value, levels=ordered_states) + +# Order of ResourceId so we can have y-axis +df$ResourceId <- factor(df$ResourceId, levels=sort(as.character(unique(df$ResourceId)))) +``` + +# Introduction + +This document presents a basic analysis of multiple StarPU +traces. First, paje *traces* will be transferred into *.csv* files and +then we analyze them with **R**. This summary is a first step that +should help researchers verify their hypothesis or find problematic +areas that require more exhaustive investigation. + +Be cautious, as the following results are only a brief analysis of +the traces and many important phenomena could still be hidden. Also, +be very careful when comparing different states or traces. Even +though some large discrepancies can be irrelevant, in other cases +even the smallest differences can be essential in understanding what +exactly happened during the StarPU execution. + +### How to compile + + ./starpu_summary.sh example.native.trace example.simgrid.trace + +### Software dependencies + +In order to run this analysis you need to have R installed: + + sudo apt-get install r-base + +Easiest way to transform *paje* traces generated by StarPU to *.csv* is to use *pjdump* program (), so we encourage users to install it. + +When R is installed, one will need to start R (e.g., from terminal) and install *knitr* package: + + R> install.packages("knitr") + +Additional R packages used in this analysis (*ggplot2, plyr, data.table, RColorBrewer*) will be installed automatically when the document is compiled for the first time. If there is any trouble, install them by hand directly from R (the same way as *knitr*) + +# Gantt Charts of the whole Trace + +First, we show a simple gantt chart of every trace. X-axis is a +simple timeline of the execution, *Resources* on y-axis correspond +to different CPUs/GPUs that were used and finally different colors +represent different *States* of the application. + +This kind of figures can often point to the idle time or +synchronization problems. Small disadvantage is that in most cases +there are too many states, thus it is impossible to display them all +on a single plot without aggregation. Therefore for any strange +behavior at a certain part of the trace, we strongly suggest to zoom +on the interval it occurred. + +```{r Gantt1} +ggplot(df,aes(x=Start,xend=End, y=factor(ResourceId), yend=factor(ResourceId),color=Value)) + + theme_bw() + scale_color_manual(name="State",values=getPalette(colourCount)) + + geom_segment(size=8) + ylab("Resource") + xlab("Time [ms]") + + facet_wrap(~Origin,ncol=1,scale="free_y") +``` + +Second, we will concentrate only on computation kernel states, to +get rid of visualization artifacts that can be introduced by other +(sometimes irrelevant) states. Normally, this plot should not be too +different from the previous one. + +```{r Gantt2} +# Select only computation kernels + df1 <- df[!(df$Value %in% c("Initializing","Deinitializing","Overhead","Nothing","Sleeping","Freeing","Allocating","WritingBack","FetchingInput","PushingOutput","Callback","Progressing","Unpartitioning","AllocatingReuse","Reclaiming","DriverCopy","DriverCopyAsync","Scheduling","Executing")),] + +# Start from zero + m <- min(df1$Start) + df1$Start <- df1$Start - m + df1$End <- df1$Start+df1$Duration + +# Plot + ggplot(df1,aes(x=Start,xend=End, y=factor(ResourceId), yend=factor(ResourceId),color=Value)) + + theme_bw() + scale_color_manual(name="State",values=getPalette(colourCount)) + + geom_segment(size=8) + ylab("Resource") + xlab("Time [ms]") + + facet_wrap(~Origin,ncol=1,scale="free_y") +``` + +# Table Summary + +Here we present how much time application spent in each state +(OverallDuration), how many times it was in that state (Count), +mean and median values of duration (Mean and Median), and finally +what is a standard deviation (StandDev). + +General information provided by this table can sometimes give an +idea to application experts which parts of code are not working as +desired. Be aware that this kind of tables hide many important +things, such as outliers, multiple modes, etc. + +```{r Table} +options(width=120) +ddply(df,.(Value,Origin), summarize, OverallDuration=sum(Duration), Count=length(Duration), Mean=mean(Duration), Median=median(Duration), StandDev=sd(Duration)) +``` + +# State Duration during the Execution Time + +Now, we show how duration of each state was changing during the +execution. This can display a general behavior of a state; show if +there are outliers or multiple modes; are some events occurring in +groups, etc. . It can also suggest a strange behavior of a state +during a certain time interval, which should be later investigated +more carefully. + + However, since each event is represented by a single point (and +there is no "alpha" factor), those events that happen almost +simultaneously are overplotted. Therefore density of events along +execution time may not be easy to read. + +```{r Dur} +ggplot(df,aes(x=Start,y=Duration)) + geom_point(aes(color=Value)) + theme_bw() + scale_color_manual(name="State",values=getPalette(colourCount)) + ggtitle("State Duration during the Execution Time") + theme(legend.position="none") + ylab("Duration [ms]") + xlab("Time [ms]") + facet_grid(Value~Origin, scale="free_y") +``` + +# Distribution Histograms + +Finally, we show a distribution of *Duration* for each state in form +of histograms. X-axis is partitioned into bins with equidistant time +intervals in milliseconds, while y-axis represents the number of +occurrences inside such intervals for a certain state. Note that for +the first plot y-axis is not fixed, meaning that the scale changes +from one row to another. This plot allows to not only to see what +was the most frequent duration of a state, but also to compare +duration between different states. + +```{r Hist1} +ggplot(df, aes(x=Duration)) + geom_histogram(aes(y=..count..,fill=factor(Value)),binwidth = diff(range(df$Duration))/30) + theme_bw() + scale_fill_manual(name="State",values=getPalette(colourCount)) + ggtitle("Histograms for State Distribution") + ylab("Count") + xlab("Duration [ms]") + theme(legend.position="none") + facet_grid(Value~Origin,scales = "free_y") +``` + +Similar to the previous figure, only now traces are showed vertically +instead of horizontally. Note that for this plot x-axis is not fixed, +meaning that the scale changes from one column to another. This plot +allows to compare frequency of different states and in case of +multiple traces to easily compare duration distribution for each +state. + +```{r Hist2} +ggplot(df, aes(x=Duration)) + geom_histogram(aes(y=..count..,fill=factor(Value)),binwidth = diff(range(df$Duration))/30) + theme_bw() + scale_fill_manual(name="State",values=getPalette(colourCount)) + ggtitle("Histograms for State Distribution") + ylab("Count") + xlab("Duration [ms]") + theme(legend.position="none") + facet_grid(Origin~Value,scales = "free_x") +``` diff --git a/tools/starpu_paje_summary.in b/tools/starpu_paje_summary.in new file mode 100755 index 0000000..c556b56 --- /dev/null +++ b/tools/starpu_paje_summary.in @@ -0,0 +1,111 @@ +#!/bin/bash +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2014-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# Copyright (C) 2014-2014 Université Joseph Fourier +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +# Script for giving statistical analysis of the paje trace + +set -e # fail fast + +# File names +SOURCE_DIR=$(dirname $0) + +outputfile="summary.html" +analysis_script="$SOURCE_DIR/starpu_paje_summary.Rmd" +analysis_input="" + +# Command line arguments +inputfiles="" + +help_script() +{ +cat << EOF +Give statistical analysis of the paje trace + +Options: + -h Show this message + +Examples: +$0 example.native.trace +$0 example.native.trace example.simgrid.trace + +Report bugs to <@PACKAGE_BUGREPORT@> +EOF +} + +if [ "$1" = "--version" ] ; then + echo "$PROGNAME (@PACKAGE_NAME@) @PACKAGE_VERSION@" + exit 0 +fi + +if [ "$1" = "-h" ] || [ "$1" = "--help" ] || [ "$1" = "" ] ; then + help_script + exit 0 +fi + +while getopts "h" opt; do + case $opt in + \?) + echo "Invalid option: -$OPTARG" + help_script + exit 3 + ;; + esac +done + +# Reading files that need to be analyzed +shift $((OPTIND - 1)) +inputfiles=$@ +# Error if there is no input files specified +if [[ $# < 1 ]]; then + echo "Error!" + help_script + exit 2 +fi + +##################################### +# Transforming input files into .csv +for file in $inputfiles; do + if [ ! -s $file ] + then + echo "Error: file $file does not exist!" + exit 5 + fi + dir=$(dirname $file) + # Sorting traces + grep -e '^\(\(%\)\|\(\(0\|1\|2\|3\|4\|5\|6\|7\|9\)\>\)\)' $file > $dir/start.trace + grep -e '^\(\(%\)\|\(\(0\|1\|2\|3\|4\|5\|6\|7\|9\|18\|19\)\>\)\)' -v $file > $dir/end.trace + sort -s -V --key=2,2 $dir/end.trace > $dir/endSorted.trace + cat $dir/start.trace $dir/endSorted.trace > $dir/outputSorted.trace + + # Transferring to .csv + pj_dump -n $dir/outputSorted.trace > $file.csv + perl -i -ne 'print if /^State/' $file.csv + + # Cleanup: delete temporary files + rm -f $dir/outputSorted.trace + rm -f $dir/start.trace + rm -f $dir/end.trace + rm -f $dir/endSorted.trace +done + +analysis_input=`echo \"$inputfiles".csv\"" | sed 's/ */.csv", "/g'` + +##################################### +# Running analysis file to get actual results +Rscript -e "library(knitr); input_traces = c($analysis_input) ; outputhtml='$outputfile';\ + outputRmd = gsub('.html\$','.Rmd',outputhtml);\ + knit('$analysis_script',output=outputRmd); knitr::knit2html(outputRmd)" + diff --git a/tools/starpu_perfmodel_display.1 b/tools/starpu_perfmodel_display.1 new file mode 100644 index 0000000..d47cde9 --- /dev/null +++ b/tools/starpu_perfmodel_display.1 @@ -0,0 +1,41 @@ +.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.3. +.TH STARPU_PERFMODEL_DISPLAY "1" "December 2025" "StarPU 1.4.10" "User Commands" +.SH NAME +starpu_perfmodel_display \- Display StarPU performance model +.SH SYNOPSIS +.B starpu_perfmodel_display +[ \fI\,options \/\fR] +.SH DESCRIPTION +Display a given perfmodel +.PP +One must specify either \fB\-l\fR or \fB\-s\fR. \fB\-x\fR can be used with \fB\-s\fR +Options: +.TP +\fB\-l\fR +display all available models +.TP +\fB\-s\fR +specify the symbol +.TP +\fB\-x\fR +display output in XML format +.TP +\fB\-p\fR +specify the parameter (e.g. a, b, c, mean, stddev) +.TP +\fB\-a\fR +specify the architecture (e.g. cpu, cpu:k, cuda) +.TP +\fB\-f\fR +display the history\-based model for the specified footprint +.TP +\fB\-d\fR +display the directory storing performance models +.TP +\fB\-h\fR, \fB\-\-help\fR +display this help and exit +.TP +\fB\-v\fR, \fB\-\-version\fR +output version information and exit +.SH "REPORTING BUGS" +Report bugs to . diff --git a/tools/starpu_perfmodel_display.c b/tools/starpu_perfmodel_display.c new file mode 100644 index 0000000..564bf76 --- /dev/null +++ b/tools/starpu_perfmodel_display.c @@ -0,0 +1,197 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2011-2011 Télécom Sud Paris + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include + +#include +#include + +#if defined(_WIN32) && !defined(__CYGWIN__) +#include +#endif + +#define PROGNAME "starpu_perfmodel_display" + +/* XML format */ +static int xml = 0; +/* display all available models */ +static int plist = 0; +/* display directory */ +static int pdirectory = 0; +/* what kernel ? */ +static char *psymbol = NULL; +/* what parameter should be displayed ? (NULL = all) */ +static char *pparameter = NULL; +/* which architecture ? (NULL = all)*/ +static char *parch = NULL; +/* should we display a specific footprint ? */ +static unsigned pdisplay_specific_footprint; +static uint32_t pspecific_footprint; + +static void usage() +{ + fprintf(stderr, "Display a given perfmodel\n\n"); + fprintf(stderr, "Usage: %s [ options ]\n", PROGNAME); + fprintf(stderr, "\n"); + fprintf(stderr, "One must specify either -l or -s. -x can be used with -s\n"); + fprintf(stderr, "Options:\n"); + fprintf(stderr, " -l display all available models\n"); + fprintf(stderr, " -s specify the symbol\n"); + fprintf(stderr, " -x display output in XML format\n"); + fprintf(stderr, " -p specify the parameter (e.g. a, b, c, mean, stddev)\n"); + fprintf(stderr, " -a specify the architecture (e.g. cpu, cpu:k, cuda)\n"); + fprintf(stderr, " -f display the history-based model for the specified footprint\n"); + fprintf(stderr, " -d display the directory storing performance models\n"); + fprintf(stderr, " -h, --help display this help and exit\n"); + fprintf(stderr, " -v, --version output version information and exit\n\n"); + fprintf(stderr, "Report bugs to <%s>.", PACKAGE_BUGREPORT); + fprintf(stderr, "\n"); +} + +static void parse_args(int argc, char **argv) +{ + int c; + int res; + + static struct option long_options[] = + { + {"arch", required_argument, NULL, 'a'}, + {"footprint", required_argument, NULL, 'f'}, + {"help", no_argument, NULL, 'h'}, + /* XXX Would be cleaner to set a flag */ + {"list", no_argument, NULL, 'l'}, + {"dir", no_argument, NULL, 'd'}, + {"parameter", required_argument, NULL, 'p'}, + {"symbol", required_argument, NULL, 's'}, + {"version", no_argument, NULL, 'v'}, + {0, 0, 0, 0} + }; + + int option_index; + while ((c = getopt_long(argc, argv, "dls:p:a:f:hx", long_options, &option_index)) != -1) + { + switch (c) + { + case 'l': + /* list all models */ + plist = 1; + break; + + case 's': + /* symbol */ + psymbol = optarg; + break; + + case 'p': + /* parameter (eg. a, b, c, mean, stddev) */ + pparameter = optarg; + break; + + case 'a': + /* architecture (cpu, cuda) */ + parch = optarg; + break; + + case 'f': + /* footprint */ + pdisplay_specific_footprint = 1; + res = sscanf(optarg, "%08x", &pspecific_footprint); + STARPU_ASSERT(res==1); + break; + + case 'd': + /* directory */ + pdirectory = 1; + break; + + case 'x': + /* symbol */ + xml = 1; + break; + + case 'h': + usage(); + exit(EXIT_SUCCESS); + + case 'v': + fputs(PROGNAME " (" PACKAGE_NAME ") " PACKAGE_VERSION "\n", stderr); + exit(EXIT_SUCCESS); + + case '?': + default: + fprintf(stderr, "Unrecognized option: -%c\n", optopt); + } + } + + if (!psymbol && !plist && !pdirectory) + { + fprintf(stderr, "Incorrect usage, aborting\n"); + usage(); + exit(-1); + } +} + +int main(int argc, char **argv) +{ +#if defined(_WIN32) && !defined(__CYGWIN__) + WSADATA wsadata; + WSAStartup(MAKEWORD(1,0), &wsadata); +#endif + + parse_args(argc, argv); + starpu_drivers_preinit(); + starpu_perfmodel_initialize(); + + if (plist) + { + starpu_perfmodel_list(stdout); + } + else if (pdirectory) + { + starpu_perfmodel_directory(stdout); + } + else + { + struct starpu_perfmodel model = { .type = STARPU_PERFMODEL_INVALID }; + int ret = starpu_perfmodel_load_symbol(psymbol, &model); + if (ret == 1) + { + fprintf(stderr, "The performance model for the symbol <%s> could not be loaded\n", psymbol); + return 1; + } + if (xml) + { + starpu_perfmodel_dump_xml(stdout, &model); + } + else + { + uint32_t *footprint = NULL; + if (pdisplay_specific_footprint == 1) + { + footprint = &pspecific_footprint; + } + starpu_perfmodel_print_all(&model, parch, pparameter, footprint, stdout); + } + starpu_perfmodel_unload_model(&model); + } + + starpu_perfmodel_free_sampling(); + return 0; +} diff --git a/tools/starpu_perfmodel_plot.1 b/tools/starpu_perfmodel_plot.1 new file mode 100644 index 0000000..51aa177 --- /dev/null +++ b/tools/starpu_perfmodel_plot.1 @@ -0,0 +1,50 @@ +.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.3. +.TH STARPU_PERFMODEL_PLOT "1" "December 2025" "StarPU 1.4.10" "User Commands" +.SH NAME +starpu_perfmodel_plot \- Plot StarPU performance model +.SH SYNOPSIS +.B starpu_perfmodel_plot +[ \fI\,options \/\fR] +.SH DESCRIPTION +Draw a graph corresponding to the execution time of a given perfmodel +.PP +One must specify a symbol with the \fB\-s\fR option or use \fB\-l\fR or \fB\-d\fR +Options: +.TP +\fB\-d\fR +display the directory storing performance models +.TP +\fB\-l\fR +display all available models +.TP +\fB\-s\fR +specify the symbol +.TP +\fB\-e\fR +display perfmodel as energy instead of time +.TP +\fB\-se\fR +specify both a time symbol and an energy symbol +.TP +\fB\-f\fR +draw GFlop/s instead of time +.TP +\fB\-i\fR +input FxT files generated by StarPU +.TP +\fB\-lc\fR +display all combinations of a given model +.TP +\fB\-c\fR +specify the combination (use the option \fB\-lc\fR to list all combinations of a given model) +.TP +\fB\-o\fR +specify directory in which to create output files (current directory by default) +.TP +\fB\-h\fR, \fB\-\-help\fR +display this help and exit +.TP +\fB\-v\fR, \fB\-\-version\fR +output version information and exit +.SH "REPORTING BUGS" +Report bugs to . diff --git a/tools/starpu_perfmodel_plot.c b/tools/starpu_perfmodel_plot.c new file mode 100644 index 0000000..7df0d04 --- /dev/null +++ b/tools/starpu_perfmodel_plot.c @@ -0,0 +1,699 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2013-2013 Thibaut Lambert + * Copyright (C) 2011-2011 Télécom Sud Paris + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include +#include +#ifdef STARPU_USE_FXT +#include +#endif +#include + +#include +#include // we need to browse the list associated to history-based models + +#if defined(_WIN32) && !defined(__CYGWIN__) +#include +#endif + +#define PROGNAME "starpu_perfmodel_plot" + +struct _perfmodel_plot_options +{ + /* display all available models */ + int list; + /* display directory */ + int directory; + /* what kernel ? */ + char *symbol; + /* what energy model ? */ + char *energy_symbol; + /* which combination */ + int comb_is_set; + int comb; + /* display all available combinations of a specific model */ + int list_combs; + int gflops; + int energy; + /* Unless a FxT file is specified, we just display the model */ + int with_fxt_file; + + char avg_file_name[256]; + +#ifdef STARPU_USE_FXT + struct starpu_fxt_codelet_event *dumped_codelets; + struct starpu_fxt_options fxt_options; + char data_file_name[256]; +#endif +}; + +static void usage() +{ + fprintf(stderr, "Draw a graph corresponding to the execution time of a given perfmodel\n"); + fprintf(stderr, "Usage: %s [ options ]\n", PROGNAME); + fprintf(stderr, "\n"); + fprintf(stderr, "One must specify a symbol with the -s option or use -l or -d\n"); + fprintf(stderr, "Options:\n"); + fprintf(stderr, " -d display the directory storing performance models\n"); + fprintf(stderr, " -l display all available models\n"); + fprintf(stderr, " -s specify the symbol\n"); + fprintf(stderr, " -e display perfmodel as energy instead of time\n"); + fprintf(stderr, " -se \n"); + fprintf(stderr, " specify both a time symbol and an energy symbol\n"); + fprintf(stderr, " -f draw GFlop/s instead of time\n"); + fprintf(stderr, " -i input FxT files generated by StarPU\n"); + fprintf(stderr, " -lc display all combinations of a given model\n"); + fprintf(stderr, " -c specify the combination (use the option -lc to list all combinations of a given model)\n"); + fprintf(stderr, " -o specify directory in which to create output files (current directory by default)\n"); + fprintf(stderr, " -h, --help display this help and exit\n"); + fprintf(stderr, " -v, --version output version information and exit\n\n"); + fprintf(stderr, "Report bugs to <%s>.", PACKAGE_BUGREPORT); + fprintf(stderr, "\n"); +} + +static void parse_args(int argc, char **argv, struct _perfmodel_plot_options *options, char **directory) +{ + int correct_usage = 0; + memset(options, 0, sizeof(struct _perfmodel_plot_options)); + +#ifdef STARPU_USE_FXT + /* Default options */ + starpu_fxt_options_init(&options->fxt_options); + + free(options->fxt_options.out_paje_path); + options->fxt_options.out_paje_path = NULL; + free(options->fxt_options.activity_path); + options->fxt_options.activity_path = NULL; + free(options->fxt_options.distrib_time_path); + options->fxt_options.distrib_time_path = NULL; + free(options->fxt_options.dag_path); + options->fxt_options.dag_path = NULL; + + options->fxt_options.dumped_codelets = &options->dumped_codelets; +#endif + + /* We want to support arguments such as "-i trace_*" */ + unsigned reading_input_filenames = 0; + + int i; + for (i = 1; i < argc; i++) + { + if (strcmp(argv[i], "-s") == 0) + { + if (i >= argc-1) + { + fprintf(stderr,"-s requires an argument\n"); + usage(); + exit(EXIT_FAILURE); + } + options->symbol = argv[++i]; + correct_usage = 1; + continue; + } + + if (strcmp(argv[i], "-se") == 0) + { + if (i >= argc-2) + { + fprintf(stderr,"-se requires two arguments\n"); + usage(); + exit(EXIT_FAILURE); + } + options->symbol = argv[++i]; + options->energy_symbol = argv[++i]; + correct_usage = 1; + continue; + } + + if (strcmp(argv[i], "-o") == 0) + { + free(*directory); + *directory = strdup(argv[++i]); +#ifdef STARPU_USE_FXT + options->fxt_options.dir = strdup(*directory); +#endif + continue; + } + + if (strcmp(argv[i], "-i") == 0) + { + if (i >= argc-1) + { + fprintf(stderr,"-i requires an argument\n"); + usage(); + exit(EXIT_FAILURE); + } + reading_input_filenames = 1; +#ifdef STARPU_USE_FXT + options->fxt_options.filenames[options->fxt_options.ninputfiles++] = argv[++i]; + options->with_fxt_file = 1; +#else + fprintf(stderr, "Warning: FxT support was not enabled in StarPU: FxT traces will thus be ignored!\n"); +#endif + continue; + } + + if (strcmp(argv[i], "-l") == 0) + { + options->list = 1; + correct_usage = 1; + continue; + } + + if (strcmp(argv[i], "-lc") == 0) + { + options->list_combs = 1; + continue; + } + + if (strcmp(argv[i], "-f") == 0) + { + options->gflops = 1; + continue; + } + + if (strcmp(argv[i], "-e") == 0) + { + options->energy = 1; + continue; + } + + if (strcmp(argv[i], "-c") == 0) + { + if (i >= argc-1) + { + fprintf(stderr,"-c requires an argument\n"); + usage(); + exit(EXIT_FAILURE); + } + options->comb_is_set = 1; + options->comb = atoi(argv[++i]); + continue; + } + + if (strcmp(argv[i], "-d") == 0) + { + options->directory = 1; + correct_usage = 1; + continue; + } + + if (strcmp(argv[i], "-h") == 0 || + strcmp(argv[i], "--help") == 0) + { + usage(); + exit(EXIT_SUCCESS); + } + + if (strcmp(argv[i], "-v") == 0 || + strcmp(argv[i], "--version") == 0) + { + fputs(PROGNAME " (" PACKAGE_NAME ") " PACKAGE_VERSION "\n", stderr); + exit(EXIT_SUCCESS); + } + + /* If the reading_input_filenames flag is set, and that the + * argument does not match an option, we assume this may be + * another filename */ + if (reading_input_filenames) + { +#ifdef STARPU_USE_FXT + options->fxt_options.filenames[options->fxt_options.ninputfiles++] = argv[i]; +#endif + continue; + } + } + + if (correct_usage == 0) + { + fprintf(stderr, "Incorrect usage, aborting\n"); + usage(); + exit(-1); + } +} + +static char *replace_char(char *str, char old, char new) +{ + char *p = strdup(str); + char *ptr = p; + while (*ptr) + { + if (*ptr == old) *ptr = new; + ptr ++; + } + return p; +} + +static void print_comma(FILE *gnuplot_file, int *first) +{ + if (*first) + { + *first = 0; + } + else + { + fprintf(gnuplot_file, ",\\\n\t"); + } +} + +static void display_perf_model(FILE *gnuplot_file, struct starpu_perfmodel_arch* arch, struct starpu_perfmodel_per_arch *arch_model, int impl, int *first, struct _perfmodel_plot_options *options) +{ + char arch_name[256]; + const char *factor; + + if (options->energy) + factor = ""; + else + factor = "0.001 * "; + + starpu_perfmodel_get_arch_name(arch, arch_name, 256, impl); + +#ifdef STARPU_USE_FXT + if (options->with_fxt_file && impl == 0) + { + if (options->gflops) + { + _STARPU_DISP("gflops unit selected, ignoring fxt trace\n"); + } + else + { + char *arch_name2 = replace_char(arch_name, '_', '-'); + print_comma(gnuplot_file, first); + fprintf(gnuplot_file, "\"< grep '^%s' %s\" using 3:4 title \"Profiling %s\"", arch_name, options->data_file_name, arch_name2); + free(arch_name2); + } + } +#endif + + /* Only display the regression model if we could actually build a model */ + if (!options->gflops && arch_model->regression.valid && !arch_model->regression.nl_valid) + { + print_comma(gnuplot_file, first); + + fprintf(stderr, "\tLinear: y = alpha size ^ beta\n"); + fprintf(stderr, "\t\talpha = %e\n", arch_model->regression.alpha * 0.001); + fprintf(stderr, "\t\tbeta = %e\n", arch_model->regression.beta); + + fprintf(gnuplot_file, "%s%g * x ** %g title \"Linear Regression %s\"", factor, + arch_model->regression.alpha, arch_model->regression.beta, arch_name); + } + + if (!options->gflops && arch_model->regression.nl_valid) + { + print_comma(gnuplot_file, first); + + fprintf(stderr, "\tNon-Linear: y = a size ^b + c\n"); + fprintf(stderr, "\t\ta = %e\n", arch_model->regression.a * 0.001); + fprintf(stderr, "\t\tb = %e\n", arch_model->regression.b); + fprintf(stderr, "\t\tc = %e\n", arch_model->regression.c * 0.001); + + fprintf(gnuplot_file, "%s%g * x ** %g + %s%g title \"Non-Linear Regression %s\"", factor, + arch_model->regression.a, arch_model->regression.b, factor, arch_model->regression.c, arch_name); + } +} + +static void display_history_based_perf_models(FILE *gnuplot_file, struct starpu_perfmodel *model, struct starpu_perfmodel *energy_model, int *first, struct _perfmodel_plot_options *options) +{ + FILE *datafile; + struct starpu_perfmodel_history_list *ptr; + char arch_name[32]; + int col; + unsigned long minimum = 0; + + datafile = fopen(options->avg_file_name, "w"); + col = 2; + + int i; + for(i = 0; i < model->state->ncombs; i++) + { + int comb = model->state->combs[i]; + if (options->comb_is_set == 0 || options->comb == comb) + { + struct starpu_perfmodel_arch *arch; + int impl; + + arch = starpu_perfmodel_arch_comb_fetch(comb); + for(impl = 0; impl < model->state->nimpls[comb]; impl++) + { + struct starpu_perfmodel_per_arch *arch_model = &model->state->per_arch[comb][impl]; + starpu_perfmodel_get_arch_name(arch, arch_name, 32, impl); + + if (arch_model->list) + { + char *arch_name2 = replace_char(arch_name, '_', '-'); + print_comma(gnuplot_file, first); + fprintf(gnuplot_file, "\"%s\" using 1:%d:%d with errorlines title \"Average %s\"", options->avg_file_name, col, col+1, arch_name2); + col += 2; + free(arch_name2); + } + } + } + } + + /* Dump entries in size order */ + while (1) + { + unsigned long last = minimum; + + minimum = ULONG_MAX; + /* Get the next minimum */ + for(i = 0; i < model->state->ncombs; i++) + { + int comb = model->state->combs[i]; + if (options->comb_is_set == 0 || options->comb == comb) + { + int impl; + for(impl = 0; impl < model->state->nimpls[comb]; impl++) + { + struct starpu_perfmodel_per_arch *arch_model = &model->state->per_arch[comb][impl]; + for (ptr = arch_model->list; ptr; ptr = ptr->next) + { + unsigned long size = ptr->entry->size; + if (size > last && size < minimum) + minimum = size; + } + } + } + } + if (minimum == ULONG_MAX) + break; + + fprintf(stderr, "%lu ", minimum); + fprintf(datafile, "%-15lu ", minimum); + /* Find that minimum */ + for(i = 0; i < model->state->ncombs; i++) + { + int comb = model->state->combs[i]; + if (options->comb_is_set == 0 || options->comb == comb) + { + int impl; + + for(impl = 0; impl < model->state->nimpls[comb]; impl++) + { + int found = 0; + struct starpu_perfmodel_per_arch *arch_model = &model->state->per_arch[comb][impl]; + for (ptr = arch_model->list; ptr; ptr = ptr->next) + { + struct starpu_perfmodel_history_entry *entry = ptr->entry; + if (entry->size == minimum) + { + if (options->energy_symbol) + { + /* Look for the same in the energy model */ + + if (impl >= energy_model->state->nimpls[comb]) + /* Doesn't have measurements for this impl */ + break; + + struct starpu_perfmodel_per_arch *arch_model2 = &energy_model->state->per_arch[comb][impl]; + struct starpu_perfmodel_history_list *ptr2; + for (ptr2 = arch_model2->list; ptr2; ptr2 = ptr2->next) + { + struct starpu_perfmodel_history_entry *entry2 = ptr2->entry; + if (entry2->size == minimum) + { + /* Found the same size, can print */ + + double rel_delta = sqrt( + (entry2->deviation * entry2->deviation) / (entry2->mean * entry2->mean) + + (entry->deviation * entry->deviation) / (entry->mean * entry->mean)); + + fprintf(datafile, "\t%-15le\t%-15le", entry2->mean / (entry->mean / 1000000), + entry2->mean / (entry->mean / 1000000) * rel_delta); + found = 1; + break; + } + } + + } + else + { + if (options->gflops) + if (options->energy) + fprintf(datafile, "\t%-15le\t%-15le", entry->flops / entry->mean / 1000000000, + entry->flops * entry->deviation / (entry->mean * entry->mean) / 1000000000 + ); + else + fprintf(datafile, "\t%-15le\t%-15le", entry->flops / (entry->mean * 1000), + entry->flops * entry->deviation / (entry->mean * entry->mean * 1000) + ); + else + if (options->energy) + fprintf(datafile, "\t%-15le\t%-15le", entry->mean, entry->deviation); + else + fprintf(datafile, "\t%-15le\t%-15le", 0.001*entry->mean, 0.001*entry->deviation); + found = 1; + } + break; + } + } + if (!found && arch_model->list) + /* No value for this arch. */ + fprintf(datafile, "\t\"\"\t\"\""); + } + } + } + fprintf(datafile, "\n"); + } + fprintf(stderr, "\n"); + + fclose(datafile); +} + +static void display_all_perf_models(FILE *gnuplot_file, struct starpu_perfmodel *model, int *first, struct _perfmodel_plot_options *options) +{ + int i; + for(i = 0; i < model->state->ncombs; i++) + { + int comb = model->state->combs[i]; + if (options->comb_is_set == 0 || options->comb == comb) + { + struct starpu_perfmodel_arch *arch; + int impl; + + arch = starpu_perfmodel_arch_comb_fetch(comb); + for(impl = 0; impl < model->state->nimpls[comb]; impl++) + { + struct starpu_perfmodel_per_arch *archmodel = &model->state->per_arch[comb][impl]; + display_perf_model(gnuplot_file, arch, archmodel, impl, first, options); + } + } + } +} + +#ifdef STARPU_USE_FXT +static void dump_data_file(FILE *data_file, struct _perfmodel_plot_options *options) +{ + int i; + for (i = 0; i < options->fxt_options.dumped_codelets_count; i++) + { + /* Dump only if the codelet symbol matches user's request (with or without the machine name) */ + char *tmp = strdup(options->symbol); + char *dot = strchr(tmp, '.'); + if (dot) tmp[strlen(tmp)-strlen(dot)] = '\0'; + if ((strncmp(options->dumped_codelets[i].symbol, options->symbol, (FXT_MAX_PARAMS - 4)*sizeof(unsigned long)-1) == 0) + || (strncmp(options->dumped_codelets[i].symbol, tmp, (FXT_MAX_PARAMS - 4)*sizeof(unsigned long)-1) == 0)) + { + char *archname = options->dumped_codelets[i].perfmodel_archname; + size_t size = options->dumped_codelets[i].size; + float time = options->dumped_codelets[i].time; + + fprintf(data_file, "%s %f %f\n", archname, (float)size, time); + } + free(tmp); + } + free(options->dumped_codelets); +} +#endif + +static void display_selected_models(FILE *gnuplot_file, struct starpu_perfmodel *model, struct starpu_perfmodel *energy_model, struct _perfmodel_plot_options *options) +{ + char hostname[64]; + char *symbol = replace_char(options->symbol, '_', '-'); + + _starpu_gethostname(hostname, sizeof(hostname)); + fprintf(gnuplot_file, "#!/usr/bin/gnuplot -persist\n"); + fprintf(gnuplot_file, "\n"); + fprintf(gnuplot_file, "set term postscript eps enhanced color\n"); + fprintf(gnuplot_file, "set output \"starpu_%s%s.eps\"\n", options->energy_symbol?"power_":options->gflops?"gflops_":"", options->symbol); + fprintf(gnuplot_file, "set title \"Model for codelet %s on %s\"\n", symbol, hostname); + fprintf(gnuplot_file, "set xlabel \"Total data size\"\n"); + if (options->energy_symbol) + fprintf(gnuplot_file, "set ylabel \"Power (W)\"\n"); + else if (options->gflops) + if (options->energy) + fprintf(gnuplot_file, "set ylabel \"GFlop/s/W\"\n"); + else + fprintf(gnuplot_file, "set ylabel \"GFlop/s\"\n"); + else + if (options->energy) + fprintf(gnuplot_file, "set ylabel \"Energy (J)\"\n"); + else + fprintf(gnuplot_file, "set ylabel \"Time (ms)\"\n"); + fprintf(gnuplot_file, "\n"); + fprintf(gnuplot_file, "set key top left\n"); + fprintf(gnuplot_file, "set logscale x\n"); + fprintf(gnuplot_file, "set logscale y\n"); + fprintf(gnuplot_file, "\n"); + + /* If no input data is given to gnuplot, we at least need to specify an + * arbitrary range. */ + if (options->with_fxt_file == 0 || options->gflops) + fprintf(gnuplot_file, "set xrange [1 < * < 10**9 : 1 < * < 10**9]\n\n"); + + int first = 1; + fprintf(gnuplot_file, "plot\t"); + + /* display all or selected combinations */ + if (!options->energy_symbol) + display_all_perf_models(gnuplot_file, model, &first, options); + display_history_based_perf_models(gnuplot_file, model, energy_model, &first, options); + + fprintf(gnuplot_file, "\nset term png\n"); + fprintf(gnuplot_file, "set output \"starpu_%s%s.png\"\n", options->energy_symbol?"power_":options->gflops?"gflops_":"", options->symbol); + fprintf(gnuplot_file, "replot\n"); + free(symbol); +} + +int main(int argc, char **argv) +{ + int ret = 0; + struct starpu_perfmodel model = { .type = STARPU_PERFMODEL_INVALID }; + struct starpu_perfmodel energy_model = { .type = STARPU_PERFMODEL_INVALID }; + char gnuplot_file_name[256]; + struct _perfmodel_plot_options options; + char *directory = strdup("./"); + +#if defined(_WIN32) && !defined(__CYGWIN__) + WSADATA wsadata; + WSAStartup(MAKEWORD(1,0), &wsadata); +#endif + + parse_args(argc, argv, &options, &directory); + starpu_drivers_preinit(); + starpu_perfmodel_initialize(); + + if (options.directory) + { + starpu_perfmodel_directory(stdout); + } + else if (options.list) + { + ret = starpu_perfmodel_list(stdout); + if (ret) + { + _STARPU_DISP("The performance model directory is invalid\n"); + } + } + else + { + /* Load the performance model associated to the symbol */ + ret = starpu_perfmodel_load_symbol(options.symbol, &model); + if (options.energy_symbol) + ret = starpu_perfmodel_load_symbol(options.energy_symbol, &energy_model); + if (ret) + { + _STARPU_DISP("The performance model for the symbol <%s> could not be loaded\n", options.symbol); + } + else if (options.list_combs) + { + ret = starpu_perfmodel_list_combs(stdout, &model); + if (ret) + { + fprintf(stderr, "Error when listing combinations for model <%s>\n", options.symbol); + } + } + else + { + /* If some FxT input was specified, we put the points on the graph */ +#ifdef STARPU_USE_FXT + if (options.with_fxt_file) + { + starpu_fxt_generate_trace(&options.fxt_options); + + snprintf(options.data_file_name, sizeof(options.data_file_name), "%s/starpu_%s.data", directory, options.symbol); + + FILE *data_file = fopen(options.data_file_name, "w+"); + STARPU_ASSERT(data_file); + dump_data_file(data_file, &options); + fclose(data_file); + } +#endif + + if (options.energy_symbol) + { + snprintf(gnuplot_file_name, sizeof(gnuplot_file_name), "%s/starpu_power_%s.gp", directory, options.symbol); + snprintf(options.avg_file_name, sizeof(options.avg_file_name), "%s/starpu_power_%s_avg.data", directory, options.symbol); + } + else if (options.gflops) + { + snprintf(gnuplot_file_name, sizeof(gnuplot_file_name), "%s/starpu_gflops_%s.gp", directory, options.symbol); + snprintf(options.avg_file_name, sizeof(options.avg_file_name), "%s/starpu_gflops_%s_avg.data", directory, options.symbol); + } + else + { + snprintf(gnuplot_file_name, sizeof(gnuplot_file_name), "%s/starpu_%s.gp", directory, options.symbol); + snprintf(options.avg_file_name, sizeof(options.avg_file_name), "%s/starpu_%s_avg.data", directory, options.symbol); + } + + FILE *gnuplot_file = fopen(gnuplot_file_name, "w+"); + STARPU_ASSERT_MSG(gnuplot_file, "Cannot create file <%s>\n", gnuplot_file_name); + display_selected_models(gnuplot_file, &model, &energy_model, &options); + fprintf(gnuplot_file,"\n"); + fclose(gnuplot_file); + + /* Retrieve the current mode of the gnuplot executable */ + struct stat sb; + ret = stat(gnuplot_file_name, &sb); + if (ret) + { + perror("stat"); + STARPU_ABORT(); + } + + /* Make the gnuplot script executable */ + ret = chmod(gnuplot_file_name, sb.st_mode|S_IXUSR +#ifdef S_IXGRP + |S_IXGRP +#endif +#ifdef S_IXOTH + |S_IXOTH +#endif + ); + if (ret) + { + perror("chmod"); + STARPU_ABORT(); + } + _STARPU_DISP("Gnuplot file <%s> generated\n", gnuplot_file_name); + } + starpu_perfmodel_unload_model(&model); + if (options.energy_symbol) + starpu_perfmodel_unload_model(&energy_model); + } + starpu_perfmodel_free_sampling(); + free(directory); +#ifdef STARPU_USE_FXT + free(options.fxt_options.dir); + starpu_fxt_options_shutdown(&options.fxt_options); +#endif + return ret; +} diff --git a/tools/starpu_perfmodel_recdump.c b/tools/starpu_perfmodel_recdump.c new file mode 100644 index 0000000..7c714de --- /dev/null +++ b/tools/starpu_perfmodel_recdump.c @@ -0,0 +1,486 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2011-2011 Télécom Sud Paris + * Copyright (C) 2020-2020 Federal University of Rio Grande do Sul (UFRGS) + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#if !defined(_WIN32) || defined(__MINGW32__) || defined(__CYGWIN__) +#include +#include +#endif +#include +#include +#include +#include +#include + +#include +#include +#include +// we need to browse the list associated to history-based models +// just like in starpu_perfmodel_plot +#include + +#define STRHEADCMP(s, head) strncmp(s, head, strlen(head)) + + +#if defined(_WIN32) && !defined(__CYGWIN__) +#include +#endif + +#define PROGNAME "starpu_perfmodel_recdump" + +struct _footprint_list +{ + struct _footprint_list* next; + uint32_t footprint; +}; + +struct _footprint_list* add_footprint(struct _footprint_list* list, uint32_t footprint) +{ + struct _footprint_list * l = list; + while(l) + { + if(l->footprint == footprint) break; + l = l->next; + } + if(l) return list; + else + { + struct _footprint_list *res; + _STARPU_MALLOC(res, sizeof(struct _footprint_list)); + res->footprint = footprint; + res->next = list; + return res; + } +} + +static struct model +{ + UT_hash_handle hh; + char *name; + struct starpu_perfmodel model; + struct _footprint_list* footprints; +} *models; + +void get_comb_name(int comb, char* name, int name_size) +{ + struct starpu_perfmodel_arch *arch_comb = starpu_perfmodel_arch_comb_fetch(comb); + STARPU_ASSERT_MSG(arch_comb->ndevices == 1, "Cannot work with multi-device workers\n"); + snprintf(name, name_size, "%s%d", starpu_perfmodel_get_archtype_name(arch_comb->devices[0].type), arch_comb->devices[0].devid); +} + +void print_archs(FILE* output) +{ + int nb_workers = 0; + unsigned workerid, node, src, dst; int comb, old_comb = -1; + + fprintf(output, "%%rec: worker_count\n\n"); + for (workerid = 0; workerid < starpu_worker_get_count(); workerid++) + { + struct starpu_perfmodel_arch* arch = starpu_worker_get_perf_archtype(workerid, STARPU_NMAX_SCHED_CTXS); + comb = starpu_perfmodel_arch_comb_get(arch->ndevices, arch->devices); + if (comb < 0) continue; // Ignore architecture which is not present in any perfmodel + + if(comb != old_comb) + { + if(nb_workers > 0) + { + char name[32]; + get_comb_name(old_comb, name, 32); + fprintf(output, "Architecture: %s\n", name); + fprintf(output, "NbWorkers: %d\n\n", nb_workers); + } + old_comb = comb; + nb_workers = 1; + } + else + { + nb_workers += 1; + } + } + + if(nb_workers > 0) + { + char name[32]; + get_comb_name(old_comb, name, 32); + fprintf(output, "Architecture: %s\n", name); + fprintf(output, "NbWorkers: %d\n\n", nb_workers); + } + + fprintf(output, "%%rec: memory_workers\n\n"); + for (node = 0; node < starpu_memory_nodes_get_count(); node++) + { + unsigned printed = 0; + char name[32]; + fprintf(output, "MemoryNode: %u\n", node); + starpu_memory_node_get_name(node, name, sizeof(name)); + fprintf(output, "Name: %s\n", name); + fprintf(output, "Size: %ld\n", (long) starpu_memory_get_total(node)); + for (workerid = 0; workerid < starpu_worker_get_count(); workerid++) + { + if (starpu_worker_get_memory_node(workerid) == node) + { + if (!printed) + { + fprintf(output, "Workers:"); + printed = 1; + } + fprintf(output, " %u", workerid); + } + } + if (printed) + fprintf(output, "\n"); + fprintf(output, "\n"); + } + fprintf(output, "%%rec: memory_performance\n\n"); + for (src = 0; src < starpu_memory_nodes_get_count(); src++) + { + for (dst = 0; dst < starpu_memory_nodes_get_count(); dst++) + { + if (src != dst) + { + fprintf(output, "MemoryNodeSrc: %u\n", src); + fprintf(output, "MemoryNodeDst: %u\n", dst); + fprintf(output, "Bandwidth: %f\n", starpu_transfer_bandwidth(src, dst)); + fprintf(output, "Latency: %f\n", starpu_transfer_latency(src, dst)); + fprintf(output, "\n"); + } + } + } +} + +/* output file name */ +static char* poutput = NULL; +static char* pinput = NULL; + +static void usage() +{ + fprintf(stderr, "Dumps perfmodels to a rec file\n\n"); + fprintf(stderr, "Usage: %s [ input-file ] [ -o output-file ]\n", PROGNAME); + fprintf(stderr, "\n"); + fprintf(stderr, "If input or output file names are not given, stdin and stdout are used."); + fprintf(stderr, "\n"); + fprintf(stderr, "Report bugs to <"PACKAGE_BUGREPORT">."); + fprintf(stderr, "\n"); +} + +static void print_entry(const char *name, const char *archname, FILE *output, struct starpu_perfmodel_history_entry *entry) +{ + fprintf(output, "Model: %s\n", name); + fprintf(output, "Architecture: %s\n", archname); + fprintf(output, "Footprint: %08x\n", entry->footprint); + fprintf(output, "Size: %lu\n", (unsigned long) entry->size); + if (!isnan(entry->flops)) + fprintf(output, "Flops: %-15e\n", entry->flops); + fprintf(output, "Mean: %-15e\nStddev: %-15e\n", + entry->mean, entry->deviation); + fprintf(output, "Samples: %u\n", entry->nsample); + fprintf(output, "\n"); +} + +static void parse_args(int argc, char **argv) +{ + int c; + + static struct option long_options[] = + { + {"help", no_argument, NULL, 'h'}, + {"output", required_argument, NULL, 'o'}, + {0, 0, 0, 0} + }; + + int option_index; + while ((c = getopt_long(argc, argv, "ho:", long_options, &option_index)) != -1) + { + switch (c) + { + case 'h': /* display help */ + usage(); + exit(EXIT_SUCCESS); + break; + + case 'o': + poutput = optarg; + break; + case '?': + default: + fprintf(stderr, "Unrecognized option: -%c\n", optopt); + } + } + + if(optind < argc) + { + pinput = argv[optind++]; + if(optind < argc) + { + fprintf(stderr, "Unrecognized argument: %s\n", argv[optind]); + exit(EXIT_FAILURE); + } + } +} + +int main(int argc, char **argv) +{ +#if defined(_WIN32) && !defined(__CYGWIN__) && !defined(__MINGW32__) + WSADATA wsadata; + WSAStartup(MAKEWORD(1,0), &wsadata); + _STARPU_MSG("Listing perfmodels is not implemented on pure Windows yet\n"); + return 1; +#else + FILE* output; + parse_args(argc, argv); + + if(poutput != NULL) + { + output = fopen(poutput, "w+"); + if (!output) + { + fprintf(stderr, "couldn't open %s for write: %s\n", poutput, strerror(errno)); + exit(EXIT_FAILURE); + } + } + else + { + output = stdout; + } + + if (starpu_init(NULL) != 0) + { + fprintf(stderr, "StarPU initialization failure\n"); + exit(EXIT_FAILURE); + } + starpu_pause(); + + if(pinput) + { + FILE* input = fopen(pinput, "r"); + char s[1024], *c; + struct model *model, *tmp=NULL; + uint32_t footprint = 0; + char *model_name = NULL; + int ret; + + if (!input) + { + fprintf(stderr, "couldn't open %s for read: %s\n", pinput, strerror(errno)); + exit(EXIT_FAILURE); + } + + while (fgets(s, sizeof(s), input)) + { + if (strlen(s) == sizeof(s) - 1) + { + fprintf(stderr, "oops, very long line '%s', it's odd\n", s); + exit(EXIT_FAILURE); + } + + if (s[0] == '\n') + { + /* empty line, end of task */ + if (model_name) + { + /* Try to get already-loaded model */ + HASH_FIND_STR(models, model_name, model); + if (model == NULL) + { + _STARPU_MALLOC(model, sizeof(*model)); + model->name = model_name; + model->footprints = NULL; + memset(&model->model, 0, sizeof(model->model)); + model->model.type = STARPU_PERFMODEL_INVALID; + ret = starpu_perfmodel_load_symbol(model_name, &model->model); + if (ret == 1) + { + fprintf(stderr, "The performance model for the symbol <%s> could not be loaded\n", model_name); + exit(EXIT_FAILURE); + } + HASH_ADD_STR(models, name, model); + } + else + { + free(model_name); + } + model->footprints = add_footprint(model->footprints, footprint); + model_name = NULL; + } + continue; + } + + /* Get rec field name */ + c = strchr(s, ':'); + if (!c) + { + fprintf(stderr, "odd line '%s'\n", s); + exit(EXIT_FAILURE); + } + + if (!STRHEADCMP(s, "Footprint: ")) + { + footprint = strtoul(s + strlen("Footprint: "), NULL, 16); + } + else if (!STRHEADCMP(s, "Model: ")) + { + model_name = strdup(s + strlen("Model: ")); + model_name[strlen(model_name) - 1] = '\0'; /* Drop '\n' */ + } + } + + /* All models loaded */ + { + print_archs(output); + + fprintf(output, "%%rec: timing\n\n"); + + int nb_combs = starpu_perfmodel_get_narch_combs(); + + HASH_ITER(hh, models, model, tmp) + { + struct _footprint_list* lf = model->footprints, *ltmp; + int comb; + while(lf) + { + for(comb = 0; comb < nb_combs; comb++) + { + char archname[32]; + get_comb_name(comb, archname, 32); + + if(!model->model.state || model->model.state->nimpls[comb] == 0) + { + _STARPU_DISP("Symbol %s does not have any implementation on comb %d, not dumping\n", model->name, comb); + continue; + } + + if(model->model.state->nimpls[comb] > 1) + _STARPU_DISP("Warning, more than one implementations in comb %d of symbol %s, using only the first one\n", comb, model->name); + + struct starpu_perfmodel_per_arch *arch_model = &model->model.state->per_arch[comb][0]; + struct starpu_perfmodel_history_list *ptr; + + ptr = arch_model->list; + if(!ptr) + _STARPU_DISP("Implementation %d of symbol %s does not have history based model, not dumping\n", comb, model->name); + else while(ptr) + { + struct starpu_perfmodel_history_entry *entry = ptr->entry; + if(entry->footprint == lf->footprint) + { + print_entry(model->name, archname, output, entry); + break; + } + ptr=ptr->next; + } + } + ltmp = lf->next; + free(lf); + lf = ltmp; + } + + starpu_perfmodel_unload_model(&model->model); + free(model->name); + HASH_DEL(models, model); + free(model); + } + } + fclose(input); + } + else + { + fprintf(output, "%%rec: timing\n\n"); + + char **paths; + DIR *dp; + struct dirent *ep; + int i; + + paths = _starpu_get_perf_model_dirs_codelet(); + for(i=0 ; paths[i] != NULL ; i++) + { + _STARPU_DISP("Processing directory %s\n", paths[i]); + dp = opendir(paths[i]); + if (dp != NULL) + { + while ((ep = readdir(dp))) + { + if (strcmp(ep->d_name, ".") && strcmp(ep->d_name, "..")) + { + int comb, nb_combs; + char* symbol = strdup(ep->d_name); + char *dot = strrchr(symbol, '.'); + struct starpu_perfmodel model = {.type = STARPU_PERFMODEL_INVALID }; + + if(dot) *dot = '\0'; + if (starpu_perfmodel_load_symbol(symbol, &model) != 0) + { + free(symbol); + continue; + } + if(model.state == NULL) + { + free(symbol); + continue; + } + + _STARPU_DISP("Dumping %s\n", symbol); + + nb_combs = starpu_perfmodel_get_narch_combs(); + for(comb = 0; comb < nb_combs; ++comb) + { + char name[32]; + get_comb_name(comb, name, 32); + + if(!model.state || model.state->nimpls[comb] == 0) + { + _STARPU_DISP("Symbol %s does not have any implementation on comb %d, not dumping\n", symbol, comb); + fprintf(output, "\n"); + continue; + } + + struct starpu_perfmodel_per_arch *arch_model = &model.state->per_arch[comb][0]; + struct starpu_perfmodel_history_list *ptr; + + ptr = arch_model->list; + if(!ptr) + _STARPU_DISP("Symbol %s for comb %d does not have history based model, not dumping\n", symbol, comb); + else + { + while(ptr) + { + print_entry(symbol, name, output, ptr->entry); + ptr=ptr->next; + } + } + } + starpu_perfmodel_unload_model(&model); + free(symbol); + } + } + closedir(dp); + } + else + { + _STARPU_DISP("Could not open the perfmodel directory <%s>: %s\n", paths[i], strerror(errno)); + } + } + print_archs(output); + } + + starpu_resume(); + starpu_shutdown(); + + fclose(output); + return 0; +#endif +} diff --git a/tools/starpu_replay.c b/tools/starpu_replay.c new file mode 100644 index 0000000..1e63873 --- /dev/null +++ b/tools/starpu_replay.c @@ -0,0 +1,1198 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2017-2017 Erwan Leria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * This reads a tasks.rec file and replays the recorded task graph. + * Currently, this version is done to run with simgrid. + * + * For further information, contact erwan.leria@inria.fr + */ + +#include +#include +#include +#include + +#include +#include +#include +#include + + +#define REPLAY_NMAX_DEPENDENCIES 8 + +#define ARRAY_DUP(in, out, n) memcpy(out, in, n * sizeof(*out)) +#define ARRAY_INIT(array, n) memset(array, 0, n * sizeof(*array)) + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * Declarations of global variables, structures, pointers, ... * + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +static int static_workerid; + +/* TODO: move to core header while moving starpu_replay_sched to core */ +extern void schedRecInit(const char * filename); +extern void applySchedRec(struct starpu_task * starpu_task, long submit_order); + +/* Enum for normal and "wontuse" tasks */ +enum task_type {NormalTask, WontUseTask}; + +typedef unsigned long jobid_t; + +enum task_type control; +static char *name = NULL; +static char *model = NULL; +static jobid_t jobid; +static jobid_t *dependson; +static long submitorder = -1; +static starpu_tag_t tag; +static int workerid; +static uint32_t footprint; +static double flops, total_flops = 0.; + +static double startTime; //start time (The instant when the task starts) +static double endTime; //end time (The instant when the task ends) + +static int iteration = -1; + +static starpu_data_handle_t handles[STARPU_NMAXBUFS]; +static enum starpu_data_access_mode modes[STARPU_NMAXBUFS]; +static char normal_reg_signal[STARPU_NMAXBUFS]; + +/* Use the following arrays when the number of data is greater than STARPU_NMAXBUFS */ + +starpu_data_handle_t * handles_ptr; +enum starpu_data_access_mode * modes_ptr; +size_t * sizes_set; + +static size_t dependson_size; +static size_t ndependson; + +static unsigned nb_parameters = 0; /* Number of parameters */ +static int alloc_mode; /* If alloc_mode value is 1, then the handles are stored in dyn_handles, else they are in handles */ + +static int priority = 0; + +char * reg_signal = NULL; /* The register signal (0 or 1 coded on 8 bit) is used to know which handle of the task has to be registered in StarPU (in fact to avoid handle twice)*/ + +/* Record all tasks, hashed by jobid. */ +static struct task +{ + struct starpu_rbtree_node node; + UT_hash_handle hh; + jobid_t jobid; + int iteration; + long submit_order; + jobid_t *deps; + size_t ndependson; + struct starpu_task task; + enum task_type type; + int reg_signal; +} *tasks; + +/* Record handles */ +static struct handle +{ + UT_hash_handle hh; + starpu_data_handle_t mem_ptr; /* This value should be the registered handle */ + starpu_data_handle_t handle; /* The key is the original value of the handle in the file */ +} * handles_hash; + +/* Record models */ + +static struct perfmodel +{ + UT_hash_handle hh; + struct starpu_perfmodel perfmodel; + char * model_name; +} * model_hash; + + + +/* + * Replay data interface + * We don't care about many things anyway, essentially only sizes. + */ + +struct replay_interface +{ + enum starpu_data_interface_id id; + starpu_data_handle_t orig_handle; + size_t size; + size_t alloc_size; + size_t max_size; +}; + +static struct starpu_data_interface_ops replay_interface_ops; +static void register_replay(starpu_data_handle_t handle, int home_node, void *data_interface) +{ + (void) home_node; + struct replay_interface *replay_interface = data_interface; + unsigned node; + for (node = 0; node < STARPU_MAXNODES; node++) + { + struct replay_interface *local_interface = + starpu_data_get_interface_on_node(handle, node); + + local_interface->id = replay_interface->id; + local_interface->orig_handle = replay_interface->orig_handle; + local_interface->size = replay_interface->size; + local_interface->alloc_size = replay_interface->alloc_size; + local_interface->max_size = replay_interface->max_size; + } +} + +static void replay_data_register(starpu_data_handle_t *handleptr, starpu_data_handle_t orig_handle, int home_node, size_t size, size_t alloc_size, size_t max_size) +{ + struct replay_interface interface = + { + .id = replay_interface_ops.interfaceid, + .orig_handle = orig_handle, + .size = size, + .alloc_size = alloc_size, + .max_size = max_size, + }; + + starpu_data_register(handleptr, home_node, &interface, &replay_interface_ops); +} + +static size_t replay_get_size(starpu_data_handle_t handle) +{ + struct replay_interface *interface = + starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + return interface->size; +} + +static size_t replay_get_alloc_size(starpu_data_handle_t handle) +{ + struct replay_interface *interface = + starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + return interface->alloc_size; +} + +static size_t replay_get_max_size(starpu_data_handle_t handle) +{ + struct replay_interface *interface = + starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + return interface->max_size; +} + +static uint32_t replay_footprint(starpu_data_handle_t handle) +{ + return starpu_hash_crc32c_be(replay_get_size(handle), 0); +} + +static int replay_compare(void *data_interface_a, void *data_interface_b) +{ + struct replay_interface *replay_a = data_interface_a; + struct replay_interface *replay_b = data_interface_b; + + /* Two variables are considered compatible if they have the same size */ + return replay_a->size == replay_b->size; +} + +static void display_replay(starpu_data_handle_t handle, FILE *f) +{ + struct replay_interface *replay_interface = + starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + + fprintf(f, "%lu/%lu/%lu\t", + (unsigned long) replay_interface->size, + (unsigned long) replay_interface->alloc_size, + (unsigned long) replay_interface->max_size); +} + +static starpu_ssize_t describe_replay(void *data_interface, char *buf, size_t size) +{ + struct replay_interface *replay_interface = data_interface; + return snprintf(buf, size, "r%lu/%lu/%lu\t", + (unsigned long) replay_interface->size, + (unsigned long) replay_interface->alloc_size, + (unsigned long) replay_interface->max_size); +} + +static starpu_ssize_t allocate_replay_on_node(void *data_interface, unsigned dst_node) +{ + struct replay_interface *replay_interface = data_interface; + starpu_memory_allocate(dst_node, replay_interface->alloc_size, STARPU_MEMORY_OVERFLOW); + return 0; +} + +static void free_replay_on_node(void *data_interface, unsigned dst_node) +{ + struct replay_interface *replay_interface = data_interface; + starpu_memory_deallocate(dst_node, replay_interface->alloc_size); +} + +static int replay_copy(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *async_data) +{ + (void) dst_interface; + struct replay_interface *src = src_interface; + + /* We don't care about pointers */ + return starpu_interface_copy(1, 0, src_node, 1, 0, dst_node, src->size, async_data); +} + +static const struct starpu_data_copy_methods replay_copy_data_methods = +{ + .any_to_any = replay_copy, +}; + +static struct starpu_data_interface_ops replay_interface_ops = +{ + .register_data_handle = register_replay, + .allocate_data_on_node = allocate_replay_on_node, + .free_data_on_node = free_replay_on_node, + .copy_methods = &replay_copy_data_methods, + .get_size = replay_get_size, + .get_alloc_size = replay_get_alloc_size, + .get_max_size = replay_get_max_size, + .footprint = replay_footprint, + .compare = replay_compare, + .interfaceid = STARPU_UNKNOWN_INTERFACE_ID, + .interface_size = sizeof(struct replay_interface), + .display = display_replay, + .pack_data = NULL, + .peek_data = NULL, + .unpack_data = NULL, + .describe = describe_replay, + + /* We want to observe actual allocations/deallocations */ + .dontcache = 1, +}; + + +/* [SUBMITORDER] The tree of the submit order */ + +static struct starpu_rbtree tree = STARPU_RBTREE_INITIALIZER; + +/* the cmp_fn arg for rb_tree_insert() */ +unsigned int diff(struct starpu_rbtree_node * left_elm, struct starpu_rbtree_node * right_elm) +{ + long oleft = ((struct task *) left_elm)->submit_order; + long oright = ((struct task *) right_elm)->submit_order; + if (oleft == -1 && oright == -1) + { + if (left_elm < right_elm) + return -1; + else + return 1; + } + return oleft - oright; +} + +/* Settings for the perfmodel */ +struct task_arg +{ + uint32_t footprint; + unsigned narch; + double perf[]; +}; + +uint32_t get_footprint(struct starpu_task * task) +{ + return ((struct task_arg*) (task->cl_arg))->footprint; +} + +double arch_cost_function(struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned nimpl) +{ + int device = starpu_perfmodel_arch_comb_get(arch->ndevices, arch->devices); + STARPU_ASSERT(device != -1); + (void) nimpl; + + /* Then, get the pointer to the value of the expected time */ + struct task_arg *arg = task->cl_arg; + if (device < (int) arg->narch) + { + double val = arg->perf[device]; + + if (!(val == 0 || isnan(val))) + return val; + } + + fprintf(stderr, "[starpu] Error, expected_time is 0 or lower (replay.c line : %d)", __LINE__- 6); + + return 0.0; +} + +/* End of settings */ + +static unsigned long nexecuted_tasks; +void dumb_kernel(void *buffers[], void *args) +{ + (void) buffers; + (void) args; + nexecuted_tasks++; + if (!(nexecuted_tasks % 1000)) + { + fprintf(stderr, "\rExecuted task %lu...", nexecuted_tasks); + fflush(stdout); + } + + unsigned this_worker = starpu_worker_get_id_check(); + struct starpu_perfmodel_arch *perf_arch = starpu_worker_get_perf_archtype(this_worker, STARPU_NMAX_SCHED_CTXS); + + struct starpu_task *task = starpu_task_get_current(); + unsigned impl = starpu_task_get_implementation(task); + + double length = starpu_task_expected_length(task, perf_arch, impl); + + STARPU_ASSERT_MSG(!_STARPU_IS_ZERO(length) && !isnan(length), + "Codelet %s does not have a perfmodel, or is not calibrated enough, please re-run in non-simgrid mode until it is calibrated", + starpu_task_get_name(task)); + + starpu_sleep(length / 1000000); +} + +/* [CODELET] Initialization of an unique codelet for all the tasks*/ +static int can_execute(unsigned worker_id, struct starpu_task *task, unsigned nimpl) +{ + struct starpu_perfmodel_arch * arch = starpu_worker_get_perf_archtype(worker_id, STARPU_NMAX_SCHED_CTXS); + int device = starpu_perfmodel_arch_comb_get(arch->ndevices, arch->devices); + if (device == -1) + /* Doesn't exist yet, thus unknown, assuming it can not work there. */ + return 0; + (void) nimpl; + + /* Then, get the pointer to the value of the expected time */ + struct task_arg *arg = task->cl_arg; + if (device < (int) arg->narch) + { + double val = arg->perf[device]; + + if (!(val == 0 || isnan(val))) + return 1; + } + + return 0; +} + +static struct starpu_perfmodel myperfmodel = +{ + .type = STARPU_PER_ARCH, + .arch_cost_function = arch_cost_function, + .footprint = get_footprint, +}; + +static struct starpu_codelet cl = +{ + .cpu_funcs = { dumb_kernel }, + .cpu_funcs_name = { "dumb_kernel" }, + .cuda_funcs = { dumb_kernel }, + .opencl_funcs = { dumb_kernel }, + .nbuffers = STARPU_VARIABLE_NBUFFERS, + .can_execute = can_execute, + .model = &myperfmodel, + .flags = STARPU_CODELET_SIMGRID_EXECUTE, +}; + + +/* * * * * * * * * * * * * * +* * * * * Functions * * * * * +* * * * * * * * * * * * * * */ + + +/* The following function checks if the program has to use static or dynamic arrays*/ +static int set_alloc_mode(int total_parameters) +{ + return total_parameters <= STARPU_NMAXBUFS; +} + +/* According to the allocation mode, modify handles_ptr and modes_ptr in static or dynamic */ +static void arrays_managing(int mode) +{ + if (mode) + { + handles_ptr = &handles[0]; + modes_ptr = &modes[0]; + reg_signal = &normal_reg_signal[0]; + } + else + { + _STARPU_MALLOC(handles_ptr, sizeof(*handles_ptr) * nb_parameters); + _STARPU_MALLOC(modes_ptr, sizeof(*modes_ptr) * nb_parameters); + _STARPU_CALLOC(reg_signal, nb_parameters, sizeof(char)); + + } +} + +static unsigned count_number_tokens(const char* buffer, const char* delim) +{ + char* dup = strdup(buffer); + int result = 0; + char* token = strtok(dup, delim); + while(token != NULL) + { + ++result; + token = strtok(NULL, delim); + } + free(dup); + return result; +} + +/* Check if a handle hasn't been registered yet */ +static void variable_data_register_check(size_t * array_of_size, int nb_handles) +{ + int h, i; + starpu_data_handle_t orig_handles[nb_handles]; + + ARRAY_DUP(handles_ptr, orig_handles, nb_handles); + + for (h = 0 ; h < nb_handles ; h++) + { + if(reg_signal[h]) /* Get the register signal, if it's 1 do ... */ + { + struct handle * handles_cell; + + for (i = 0; i < h; i++) + { + /* Maybe we just registered it in this very h loop */ + if (handles_ptr[h] == orig_handles[i]) + { + handles_ptr[h] = handles_ptr[i]; + break; + } + } + + if (i == h) + { + _STARPU_MALLOC(handles_cell, sizeof(*handles_cell)); + STARPU_ASSERT(handles_cell != NULL); + + handles_cell->handle = handles_ptr[h]; /* Get the hidden key (initial handle from the file) to store it as a key*/ + + replay_data_register(handles_ptr+h, handles_ptr[h], + modes_ptr[h] & STARPU_R ? STARPU_MAIN_RAM : -1, + array_of_size[h], array_of_size[h], array_of_size[h]); + + handles_cell->mem_ptr = handles_ptr[h]; /* Store the new value of the handle into the hash table */ + + HASH_ADD(hh, handles_hash, handle, sizeof(handles_ptr[h]), handles_cell); + } + } + } +} + +void reset(void) +{ + control = NormalTask; + + if (name != NULL) + { + free(name); + name = NULL; + } + + if (model != NULL) + { + free(model); + model = NULL; + } + + if (sizes_set != NULL) + { + free(sizes_set); + sizes_set = NULL; + } + + if (reg_signal != NULL) + { + if (!alloc_mode) + { + free(reg_signal); + reg_signal = NULL; + } + else + { + ARRAY_INIT(reg_signal, nb_parameters); + } + } + + jobid = 0; + ndependson = 0; + tag = -1; + workerid = -1; + footprint = 0; + startTime = 0.0; + endTime = 0.0; + + if (submitorder != -1) + submitorder = -1; + + iteration = -1; + nb_parameters = 0; + alloc_mode = 1; +} + +void fix_wontuse_handle(struct task * wontuseTask) +{ + STARPU_ASSERT(wontuseTask); + + if (!wontuseTask->reg_signal) + /* Data was already registered when we created this task, so it's already a handle */ + return; + + struct handle *handle_tmp; + + /* Data was not registered when we created this task, so this is the application pointer, look it up now */ + HASH_FIND(hh, handles_hash, &wontuseTask->task.handles[0], sizeof(wontuseTask->task.handles[0]), handle_tmp); + + if (handle_tmp) + wontuseTask->task.handles[0] = handle_tmp->mem_ptr; + else + /* This data wasn't actually used, don't care about it */ + wontuseTask->task.handles[0] = NULL; +} + +/* Function that submits all the tasks (used when the program reaches EOF) */ +int submit_tasks(void) +{ + /* Add dependencies */ + + const struct starpu_rbtree * tmptree = &tree; + struct starpu_rbtree_node * currentNode = starpu_rbtree_first(tmptree); + long last_submitorder = 0; + + while (currentNode != NULL) + { + struct task * currentTask = (struct task *) currentNode; + + if (currentTask->type == NormalTask) + { + if (currentTask->submit_order != -1) + { + STARPU_ASSERT(currentTask->submit_order >= last_submitorder + 1); + + while (currentTask->submit_order > last_submitorder + 1) + { + /* Oops, some tasks were not submitted by original application, fake some */ + struct starpu_task *task = starpu_task_create(); + int ret; + task->cl = NULL; + task->name = "fake task for submit order"; + ret = starpu_task_submit(task); + STARPU_ASSERT(ret == 0); + last_submitorder++; + } + } + + if (currentTask->ndependson > 0) + { + struct starpu_task * taskdeps[currentTask->ndependson]; + unsigned i, j = 0; + + for (i = 0; i < currentTask->ndependson; i++) + { + struct task * taskdep; + + /* Get the ith jobid of deps_jobid */ + HASH_FIND(hh, tasks, ¤tTask->deps[i], sizeof(jobid), taskdep); + + if(taskdep) + { + taskdeps[j] = &taskdep->task; + j ++; + } + } + + starpu_task_declare_deps_array(¤tTask->task, j, taskdeps); + } + + if (!(currentTask->iteration == -1)) + starpu_iteration_push(currentTask->iteration); + + applySchedRec(¤tTask->task, currentTask->submit_order); + if (currentTask->submit_order == -1) + currentTask->task.no_submitorder = 1; + int ret_val = starpu_task_submit(¤tTask->task); + + if (!(currentTask->iteration == -1)) + starpu_iteration_pop(); + + if (ret_val != 0) + { + fprintf(stderr, "\nWhile submitting task %ld (%s): return %d\n", + currentTask->submit_order, + currentTask->task.name? currentTask->task.name : "unknown", + ret_val); + return -1; + } + + + //fprintf(stderr, "submitting task %s (%lu, %llu)\n", currentTask->task.name?currentTask->task.name:"anonymous", currentTask->jobid, (unsigned long long) currentTask->task.tag_id); + if (!(currentTask->submit_order % 1000)) + { + fprintf(stderr, "\rSubmitted task order %ld...", currentTask->submit_order); + fflush(stdout); + } + if (currentTask->submit_order != -1) + last_submitorder++; + } + + else + { + fix_wontuse_handle(currentTask); /* Add the handle in the wontuse task */ + if (currentTask->task.handles[0]) + { + starpu_data_wont_use(currentTask->task.handles[0]); + last_submitorder++; + } + } + + currentNode = starpu_rbtree_next(currentNode); + + } + fprintf(stderr, " done.\n"); + + return 1; +} + + +/* * * * * * * * * * * * * * * */ +/* * * * * * MAIN * * * * * * */ +/* * * * * * * * * * * * * * */ + +static void usage(const char *program) +{ + fprintf(stderr,"Usage: %s [--static-workerid] tasks.rec [sched.rec]\n", program); + exit(EXIT_FAILURE); +} + +int main(int argc, char **argv) +{ + FILE *rec; + char *s; + const char *tasks_rec = NULL; + const char *sched_rec = NULL; + unsigned i; + size_t s_allocated = 128; + + unsigned long nread_tasks = 0; + + /* FIXME: we do not support data with sequential consistency disabled */ + + _STARPU_MALLOC(s, s_allocated); + dependson_size = REPLAY_NMAX_DEPENDENCIES; /* Change the value of REPLAY_NMAX_DEPENCIES to modify the number of dependencies */ + _STARPU_MALLOC(dependson, dependson_size * sizeof (* dependson)); + alloc_mode = 1; + + for (i = 1; i < (unsigned) argc; i++) + { + if (!strcmp(argv[i], "--help") || !strcmp(argv[i], "-h")) + { + usage(argv[0]); + } + else if (!strcmp(argv[i], "--static-workerid")) + { + static_workerid = 1; + } + else + { + if (!tasks_rec) + tasks_rec = argv[i]; + else if (!sched_rec) + sched_rec = argv[i]; + else + usage(argv[0]); + } + } + + if (!tasks_rec) + usage(argv[0]); + + if (sched_rec) + schedRecInit(sched_rec); + + rec = fopen(tasks_rec, "r"); + if (!rec) + { + fprintf(stderr,"unable to open file %s: %s\n", tasks_rec, strerror(errno)); + exit(EXIT_FAILURE); + } + + int ret = starpu_init(NULL); + if (ret == -ENODEV) goto enodev; + + /* Read line by line, and on empty line submit the task with the accumulated information */ + reset(); + + double start = starpu_timing_now(); + int linenum = 0; + + while(1) + { + char *ln; + + if (!fgets(s, s_allocated, rec)) + { + fprintf(stderr, " done.\n"); + int submitted = submit_tasks(); + + if (submitted == -1) + { + goto enodev; + } + + goto eof; + } + + while (!(ln = strchr(s, '\n'))) + { + /* fprintf(stderr,"buffer size %d too small, doubling it\n", s_allocated); */ + _STARPU_REALLOC(s, s_allocated * 2); + + if (!fgets(s + s_allocated-1, s_allocated+1, rec)) + { + fprintf(stderr, "\n"); + int submitted = submit_tasks(); + + if (submitted == -1) + { + goto enodev; + } + + goto eof; + } + + s_allocated *= 2; + } + + linenum++; + + if (ln == s) + { + /* Empty line, do task */ + + struct task * task; + _STARPU_MALLOC(task, sizeof(*task)); + + starpu_task_init(&task->task); + task->deps = NULL; + + task->submit_order = submitorder; + + starpu_rbtree_node_init(&task->node); + starpu_rbtree_insert(&tree, &task->node, diff); + + + task->jobid = jobid; + task->iteration = iteration; + + if (name != NULL) + task->task.name = strdup(name); + + task->type = control; + + if (control == NormalTask) + { + if (workerid >= 0) + { + task->task.priority = priority; + task->task.cl = &cl; + if (static_workerid) + { + task->task.workerid = workerid; + task->task.execute_on_a_specific_worker = 1; + } + + if (alloc_mode) + { + /* Duplicating the handles stored (and registered in the current context) into the task */ + + ARRAY_DUP(modes_ptr, task->task.modes, nb_parameters); + ARRAY_DUP(modes_ptr, task->task.cl->modes, nb_parameters); + variable_data_register_check(sizes_set, nb_parameters); + ARRAY_DUP(handles_ptr, task->task.handles, nb_parameters); + } + else + { + task->task.dyn_modes = modes_ptr; + _STARPU_MALLOC(task->task.cl->dyn_modes, (sizeof(*task->task.cl->dyn_modes) * nb_parameters)); + ARRAY_DUP(modes_ptr, task->task.cl->dyn_modes, nb_parameters); + variable_data_register_check(sizes_set, nb_parameters); + task->task.dyn_handles = handles_ptr; + } + + task->task.nbuffers = nb_parameters; + + struct perfmodel * realmodel; + + HASH_FIND_STR(model_hash, model, realmodel); + + if (realmodel == NULL) + { + int len = strlen(model); + _STARPU_CALLOC(realmodel, 1, sizeof(struct perfmodel)); + + _STARPU_MALLOC(realmodel->model_name, sizeof(char) * (len+1)); + realmodel->model_name = strcpy(realmodel->model_name, model); + + starpu_perfmodel_init(&realmodel->perfmodel); + + int error = starpu_perfmodel_load_symbol(model, &realmodel->perfmodel); + + if (!error) + { + HASH_ADD_STR(model_hash, model_name, realmodel); + } + else + { + + fprintf(stderr, "[starpu][Warning] Error loading perfmodel symbol %s\n", model); + fprintf(stderr, "[starpu][Warning] Taking only measurements from the given execution, and forcing execution on worker %d\n", workerid); + starpu_perfmodel_unload_model(&realmodel->perfmodel); + free(realmodel->model_name); + free(realmodel); + realmodel = NULL; + } + + } + + struct starpu_perfmodel_arch *arch = starpu_worker_get_perf_archtype(workerid, 0); + + unsigned comb = starpu_perfmodel_arch_comb_add(arch->ndevices, arch->devices); + unsigned narch = starpu_perfmodel_get_narch_combs(); + + struct task_arg *arg; + _STARPU_MALLOC(arg, sizeof(struct task_arg) + sizeof(double) * narch); + arg->footprint = footprint; + arg->narch = narch; + double * perfTime = arg->perf; + + if (realmodel == NULL) + { + /* Erf, do without perfmodel, for execution there */ + task->task.workerid = workerid; + task->task.execute_on_a_specific_worker = 1; + for (i = 0; i < narch ; i++) + { + if (i == comb) + perfTime[i] = endTime - startTime; + else + perfTime[i] = NAN; + } + } + else + { + int one = 0; + for (i = 0; i < narch ; i++) + { + arch = starpu_perfmodel_arch_comb_fetch(i); + perfTime[i] = starpu_perfmodel_history_based_expected_perf(&realmodel->perfmodel, arch, footprint); + if (!(perfTime[i] == 0 || isnan(perfTime[i]))) + one = 1; + } + if (!one) + { + fprintf(stderr, "We do not have any performance measurement for symbol '%s' for footprint %x, we can not execute this", model, footprint); + exit(EXIT_FAILURE); + } + } + + task->task.cl_arg = arg; + task->task.flops = flops; + total_flops += flops; + } + + task->task.cl_arg_size = 0; + task->task.tag_id = tag; + task->task.use_tag = 1; + + task->ndependson = ndependson; + if (ndependson > 0) + { + _STARPU_MALLOC(task->deps, ndependson * sizeof (* task->deps)); + ARRAY_DUP(dependson, task->deps, ndependson); + } + } + + else + { + STARPU_ASSERT(nb_parameters == 1); + task->reg_signal = reg_signal[0]; + ARRAY_DUP(handles_ptr, task->task.handles, nb_parameters); + } + + /* Add this task to task hash */ + HASH_ADD(hh, tasks, jobid, sizeof(jobid), task); + + nread_tasks++; + if (!(nread_tasks % 1000)) + { + fprintf(stderr, "\rRead task %lu...", nread_tasks); + fflush(stdout); + } + + reset(); + } + + /* Record various information */ +#define TEST(field) (!strncmp(s, field": ", strlen(field) + 2)) + + else if(TEST("Control")) + { + char * c = s+9; + + if(!strncmp(c, "WontUse", 7)) + { + control = WontUseTask; + nb_parameters = 1; + alloc_mode = set_alloc_mode(nb_parameters); + arrays_managing(alloc_mode); + } + else + control = NormalTask; + } + else if (TEST("Name")) + { + *ln = 0; + name = strdup(s+6); + } + else if (TEST("Model")) + { + *ln = 0; + model = strdup(s+7); + } + else if (TEST("JobId")) + jobid = atol(s+7); + else if(TEST("SubmitOrder")) + submitorder = atoi(s+13); + else if (TEST("DependsOn")) + { + char *c = s + 11; + + for (ndependson = 0; *c != '\n'; ndependson++) + { + if (ndependson >= dependson_size) + { + dependson_size *= 2; + _STARPU_REALLOC(dependson, dependson_size * sizeof(*dependson)); + } + dependson[ndependson] = strtol(c, &c, 10); + } + } + else if (TEST("Tag")) + { + tag = strtol(s+5, NULL, 16); + } + else if (TEST("WorkerId")) + { + workerid = atoi(s+10); + } + else if (TEST("Footprint")) + { + footprint = strtoul(s+11, NULL, 16); + } + else if (TEST("Parameters")) + { + /* Nothing to do */ + } + else if (TEST("Handles")) + { + *ln = 0; + char *buffer = s + 9; + const char *delim = " "; + unsigned nb_parameters_line = count_number_tokens(buffer, delim); + + if(nb_parameters == 0) + { + nb_parameters = nb_parameters_line; + arrays_managing(set_alloc_mode(nb_parameters)); + } + else + STARPU_ASSERT(nb_parameters == nb_parameters_line); + + char* token = strtok(buffer, delim); + for (i = 0 ; i < nb_parameters ; i++) + { + STARPU_ASSERT(token); + struct handle *handles_cell; /* A cell of the hash table for the handles */ + starpu_data_handle_t handle_value = (starpu_data_handle_t) strtol(token, NULL, 16); /* Get the ith handle on the line (in the file) */ + + HASH_FIND(hh, handles_hash, &handle_value, sizeof(handle_value), handles_cell); /* Find if the handle_value was already registered as a key in the hash table */ + + /* If it wasn't, then add it to the hash table */ + if (handles_cell == NULL) + { + /* Hide the initial handle from the file into the handles array to find it when necessary */ + handles_ptr[i] = handle_value; + reg_signal[i] = 1; + } + else + { + handles_ptr[i] = handles_cell->mem_ptr; + reg_signal[i] = 0; + } + + token = strtok(NULL, delim); + } + } + else if (TEST("Modes")) + { + *ln = 0; + char * buffer = s + 7; + unsigned mode_i = 0; + const char * delim = " "; + unsigned nb_parameters_line = count_number_tokens(buffer, delim); + + if(nb_parameters == 0) + { + nb_parameters = nb_parameters_line; + arrays_managing(set_alloc_mode(nb_parameters)); + } + else + STARPU_ASSERT(nb_parameters == nb_parameters_line); + + char* token = strtok(buffer, delim); + + while (token != NULL && mode_i < nb_parameters) + { + /* Subject to the names of starpu modes enumerator are not modified */ + if (!strncmp(token, "RW", 2)) + { + *(modes_ptr+mode_i) = STARPU_RW; + mode_i++; + } + else if (!strncmp(token, "R", 1)) + { + *(modes_ptr+mode_i) = STARPU_R; + mode_i++; + } + else if (!strncmp(token, "W", 1)) + { + *(modes_ptr+mode_i) = STARPU_W; + mode_i++; + } + /* Other cases produce a warning*/ + else + { + fprintf(stderr, "[Warning] A mode is different from R/W (jobid task : %lu)", jobid); + } + token = strtok(NULL, delim); + } + } + else if (TEST("Sizes")) + { + *ln = 0; + char * buffer = s + 7; + const char * delim = " "; + unsigned nb_parameters_line = count_number_tokens(buffer, delim); + unsigned k = 0; + + if(nb_parameters == 0) + { + nb_parameters = nb_parameters_line; + arrays_managing(set_alloc_mode(nb_parameters)); + } + else + STARPU_ASSERT(nb_parameters == nb_parameters_line); + + _STARPU_MALLOC(sizes_set, nb_parameters * sizeof(size_t)); + + char * token = strtok(buffer, delim); + while (token != NULL && k < nb_parameters) + { + sizes_set[k] = strtol(token, NULL, 10); + token = strtok(NULL, delim); + + k++; + } + } + else if (TEST("StartTime")) + { + startTime = strtod(s+11, NULL); + } + else if (TEST("EndTime")) + { + endTime = strtod(s+9, NULL); + } + else if (TEST("GFlop")) + { + flops = 1000000000 * strtod(s+7, NULL); + } + else if (TEST("Iteration")) + { + iteration = (unsigned) strtol(s+11, NULL, 10); + } + else if (TEST("Priority")) + { + priority = strtol(s + 10, NULL, 10); + } + } + +eof: + + starpu_task_wait_for_all(); + fprintf(stderr, " done.\n"); + + printf("%g ms", (starpu_timing_now() - start) / 1000.); + if (total_flops != 0.) + printf("\t%g GF/s", (total_flops / (starpu_timing_now() - start)) / 1000.); + printf("\n"); + + /* FREE allocated memory */ + + free(dependson); + free(s); + + /* End of FREE */ + + struct handle *handle=NULL, *handletmp=NULL; + HASH_ITER(hh, handles_hash, handle, handletmp) + { + starpu_data_unregister(handle->mem_ptr); + HASH_DEL(handles_hash, handle); + free(handle); + } + + struct perfmodel *model_s=NULL, *modeltmp=NULL; + HASH_ITER(hh, model_hash, model_s, modeltmp) + { + starpu_perfmodel_unload_model(&model_s->perfmodel); + HASH_DEL(model_hash, model_s); + free(model_s->model_name); + free(model_s); + } + + struct task *task=NULL, *tasktmp=NULL; + HASH_ITER(hh, tasks, task, tasktmp) + { + free(task->task.cl_arg); + free((char*)task->task.name); + + if (task->task.dyn_handles != NULL) + { + free(task->task.dyn_handles); + free(task->task.dyn_modes); + } + + HASH_DEL(tasks, task); + starpu_task_clean(&task->task); + free(task->deps); + starpu_rbtree_remove(&tree, &task->node); + free(task); + } + + starpu_shutdown(); + return 0; + +enodev: + starpu_shutdown(); + return 77; +} diff --git a/tools/starpu_replay_sched.c b/tools/starpu_replay_sched.c new file mode 100644 index 0000000..3b5c674 --- /dev/null +++ b/tools/starpu_replay_sched.c @@ -0,0 +1,439 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2017-2017 Erwan Leria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/* + * This reads a sched.rec file and mangles submitted tasks according to the hint + * from that file. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +// +// sched.rec files look like this: +// +// SubmitOrder: 1234 +// Priority: 12 +// SpecificWorker: 1 +// Workers: 0 1 2 +// DependsOn: 1235 +// +// Prefetch: 1234 +// DependsOn: 1233 +// MemoryNode: 1 +// Parameters: 1 + +#define CPY(src, dst, n) memcpy(dst, src, n * sizeof(*dst)) + +#if 0 +#define debug(fmt, ...) fprintf(stderr, fmt, ##__VA_ARGS__) +#else +#define debug(fmt, ...) (void)0 +#endif + +static unsigned long submitorder; /* Also use as prefetchtag */ +static int priority; +static int eosw; +static unsigned workerorder; +static int memnode; +/* FIXME: MAXs */ +static uint32_t workers[STARPU_NMAXWORKERS/32]; +static unsigned nworkers; +static unsigned dependson[STARPU_NMAXBUFS]; +static unsigned ndependson; +static unsigned params[STARPU_NMAXBUFS]; +static unsigned nparams; + +static enum sched_type +{ + NormalTask, + PrefetchTask, +} sched_type; + +static struct starpu_codelet cl_prefetch = +{ + .where = STARPU_NOWHERE, + .nbuffers = 1, + .modes = { STARPU_R }, +}; + +static struct task +{ + UT_hash_handle hh; + + unsigned long submitorder; + int priority; + int memnode; + unsigned dependson[STARPU_NMAXBUFS]; + unsigned ndependson; + struct starpu_task *depends_tasks[STARPU_NMAXBUFS]; + + /* For real tasks */ + int eosw; + unsigned workerorder; + uint32_t workers[STARPU_NMAXWORKERS/32]; + unsigned nworkers; + + /* For prefetch tasks */ + unsigned params[STARPU_NMAXBUFS]; + unsigned nparams; + struct starpu_task *pref_task; /* Actual prefetch task */ +} *mangled_tasks, *prefetch_tasks; + +LIST_TYPE(dep, + struct task *task; + unsigned i; +); + +struct deps +{ + UT_hash_handle hh; + unsigned long submitorder; + struct dep_list list; +} *dependencies = NULL; + +static void reset(void) +{ + submitorder = 0; + priority = INT_MIN; + eosw = -1; + memset(&workers, 0, sizeof(workers)); + nworkers = 0; + ndependson = 0; + sched_type = NormalTask; + nparams = 0; + memnode = -1; + workerorder = 0; +} + +/* TODO : respecter l'ordre de soumission des tâches SubmitOrder */ + + +static void checkField(char * s) +{ + /* Record various information */ +#define TEST(field) (!strncmp(s, field": ", strlen(field) + 2)) + + if (TEST("SubmitOrder")) + { + s = s + strlen("SubmitOrder: "); + submitorder = strtol(s, NULL, 10); + } + + else if (TEST("Priority")) + { + s = s + strlen("Priority: "); + priority = strtol(s, NULL, 10); + } + + else if (TEST("SpecificWorker")) + { + s = s + strlen("SpecificWorker: "); + eosw = strtol(s, NULL, 10); + } + + else if (TEST("Workers")) + { + s = s + strlen("Workers: "); + char * delim = " "; + char * token = strtok(s, delim); + int i = 0; + + while (token != NULL) + { + int k = strtol(token, NULL, 10); + STARPU_ASSERT_MSG(k < STARPU_NMAXWORKERS, "%d is bigger than maximum %d\n", k, STARPU_NMAXWORKERS); + workers[k/(sizeof(*workers)*8)] |= (1 << (k%(sizeof(*workers)*8))); + i++; + token = strtok(NULL, delim); + } + + nworkers = i; + } + + else if (TEST("DependsOn")) + { + /* NOTE : dependsons (in the sched.rec) should be the submit orders of the dependencies, + otherwise it can occur an undefined behaviour + (contrary to the tasks.rec where dependencies are jobids */ + unsigned i = 0; + char * delim = " "; + char * token = strtok(s+strlen("DependsOn: "), delim); + + while (token != NULL) + { + dependson[i] = strtol(token, NULL, 10); + i++; + token = strtok(NULL, delim); + } + ndependson = i; + } + + else if (TEST("Prefetch")) + { + s = s + strlen("Prefetch: "); + submitorder = strtol(s, NULL, 10); + sched_type = PrefetchTask; + } + + else if (TEST("Parameters")) + { + s = s + strlen("Parameters: "); + char * delim = " "; + char * token = strtok(s, delim); + int i = 0; + + while (token != NULL) + { + params[i] = strtol(token, NULL, 10); + i++; + token = strtok(NULL, delim); + } + nparams = i; + } + + else if (TEST("MemoryNode")) + { + s = s + strlen("MemoryNode: "); + memnode = strtol(s, NULL, 10); + } + + else if (TEST("Workerorder")) + { + s = s + strlen("Workerorder: "); + workerorder = strtol(s, NULL, 10); + } +} + + +void schedRecInit(const char * filename) +{ + FILE * f = fopen(filename, "r"); + + if(f == NULL) + { + fprintf(stderr,"unable to open file %s: %s\n", filename, strerror(errno)); + return; + } + + size_t lnsize = 128; + char *s; + _STARPU_MALLOC(s, sizeof(*s) * lnsize); + int eof = 0; + + reset(); + + while(!eof && !feof(f)) + { + char *ln; + + /* Get the line */ + if (!fgets(s, lnsize, f)) + { + eof = 1; + } + while (!(ln = strchr(s, '\n'))) + { + _STARPU_REALLOC(s, lnsize * 2); + if (!fgets(s + lnsize-1, lnsize+1, f)) + { + eof = 1; + break; + } + lnsize *= 2; + } + + if ((ln == s || eof) && submitorder) + { + /* Empty line, doit */ + struct task * task; + unsigned i; + + _STARPU_MALLOC(task, sizeof(*task)); + task->submitorder = submitorder; + task->priority = priority; + task->memnode = memnode; + CPY(dependson, task->dependson, ndependson); + task->ndependson = ndependson; + + /* Also record submitorder of tasks that this one will need to depend on */ + for (i = 0; i < ndependson; i++) + { + struct dep *dep; + struct starpu_task *starpu_task; + _STARPU_MALLOC(dep, sizeof(*dep)); + dep->task = task; + dep->i = i; + + struct deps *deps; + HASH_FIND(hh, dependencies, &task->dependson[i], sizeof(submitorder), deps); + if (!deps) + { + /* No task depends on this one yet, add a cell for it */ + _STARPU_MALLOC(deps, sizeof(*deps)); + dep_list_init(&deps->list); + deps->submitorder = task->dependson[i]; + HASH_ADD(hh, dependencies, submitorder, sizeof(submitorder), deps); + } + dep_list_push_back(&deps->list, dep); + + /* Create the intermediate task */ + starpu_task = dep->task->depends_tasks[i] = starpu_task_create(); + starpu_task->cl = NULL; + starpu_task->destroy = 0; + starpu_task->no_submitorder = 1; + } + + switch (sched_type) + { + case NormalTask: + /* A new task to mangle, record what needs to be done */ + task->eosw = eosw; + task->workerorder = workerorder; + CPY(workers, task->workers, STARPU_NMAXWORKERS/32); + task->nworkers = nworkers; + STARPU_ASSERT(nparams == 0); + + debug("adding mangled task %lu\n", submitorder); + HASH_ADD(hh, mangled_tasks, submitorder, sizeof(submitorder), task); + break; + + case PrefetchTask: + STARPU_ASSERT(memnode >= 0); + STARPU_ASSERT(eosw == -1); + STARPU_ASSERT(workerorder == 0); + STARPU_ASSERT(nworkers == 0); + CPY(params, task->params, nparams); + task->nparams = nparams; + /* TODO: more params */ + STARPU_ASSERT_MSG(nparams == 1, "only supports one parameter at a time"); + + debug("adding prefetch task for %lu\n", submitorder); + HASH_ADD(hh, prefetch_tasks, submitorder, sizeof(submitorder), task); + break; + default: + STARPU_ASSERT(0); + break; + } + + reset(); + } + else checkField(s); + } + + fclose(f); + + free(s); +} + +static void do_prefetch(void *arg) +{ + unsigned node = (uintptr_t) arg; + starpu_data_idle_prefetch_on_node(starpu_task_get_current()->handles[0], node, 1); +} + +void applySchedRec(struct starpu_task *starpu_task, unsigned long submit_order) +{ + struct task *task; + struct deps *deps; + int ret; + + HASH_FIND(hh, dependencies, &submit_order, sizeof(submit_order), deps); + if (deps) + { + struct dep *dep; + for (dep = dep_list_begin(&deps->list); + dep != dep_list_end(&deps->list); + dep = dep_list_next(dep)) + { + debug("task %lu is %d-th dep for %lu\n", submit_order, dep->i, dep->task->submitorder); + /* Some task will depend on this one, make the dependency */ + starpu_task_declare_deps_array(dep->task->depends_tasks[dep->i], 1, &starpu_task); + ret = starpu_task_submit(dep->task->depends_tasks[dep->i]); + STARPU_ASSERT(ret == 0); + } + } + + HASH_FIND(hh, prefetch_tasks, &submit_order, sizeof(submit_order), task); + if (task) + { + /* We want to submit a prefetch for this task */ + debug("task %lu has a prefetch for parameter %d to node %d\n", submit_order, task->params[0], task->memnode); + struct starpu_task *pref_task; + pref_task = task->pref_task = starpu_task_create(); + pref_task->cl = &cl_prefetch; + pref_task->destroy = 1; + pref_task->no_submitorder = 1; + pref_task->callback_arg = (void*)(uintptr_t) task->memnode; + pref_task->callback_func = do_prefetch; + + /* TODO: more params */ + pref_task->handles[0] = starpu_task->handles[task->params[0]]; + /* Make it depend on intermediate tasks */ + if (task->ndependson) + { + debug("%u dependencies\n", task->ndependson); + starpu_task_declare_deps_array(pref_task, task->ndependson, task->depends_tasks); + } + ret = starpu_task_submit(pref_task); + STARPU_ASSERT(ret == 0); + } + + HASH_FIND(hh, mangled_tasks, &submit_order, sizeof(submit_order), task); + if (task == NULL) + /* Nothing to do for this */ + return; + + debug("mangling task %lu\n", submit_order); + if (task->eosw >= 0) + { + debug("execute on a specific worker %d\n", task->eosw); + starpu_task->workerid = task->eosw; + starpu_task->execute_on_a_specific_worker = 1; + } + if (task->workerorder > 0) + { + debug("workerorder %d\n", task->workerorder); + starpu_task->workerorder = task->workerorder; + } + if (task->priority != INT_MIN) + { + debug("priority %d\n", task->priority); + starpu_task->priority = task->priority; + } + if (task->nworkers) + { + debug("%u workers %x\n", task->nworkers, task->workers[0]); + starpu_task->workerids_len = sizeof(task->workers) / sizeof(task->workers[0]); + _STARPU_MALLOC(starpu_task->workerids, task->nworkers * sizeof(*starpu_task->workerids)); + CPY(task->workers, starpu_task->workerids, STARPU_NMAXWORKERS/32); + } + + if (task->ndependson) + { + debug("%u dependencies\n", task->ndependson); + starpu_task_declare_deps_array(starpu_task, task->ndependson, task->depends_tasks); + } + + /* And now, let it go! */ +} diff --git a/tools/starpu_sched_display.c b/tools/starpu_sched_display.c new file mode 100644 index 0000000..42fa66e --- /dev/null +++ b/tools/starpu_sched_display.c @@ -0,0 +1,32 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include + +int +main(void) +{ + struct starpu_sched_policy **policies; + struct starpu_sched_policy **policy; + + policies = starpu_sched_get_predefined_policies(); + for(policy=policies ; *policy!=NULL ; policy++) + printf("%s\n", (*policy)->policy_name); + + return EXIT_SUCCESS; +} diff --git a/tools/starpu_send_recv_data_use.py b/tools/starpu_send_recv_data_use.py new file mode 100755 index 0000000..e4ad77f --- /dev/null +++ b/tools/starpu_send_recv_data_use.py @@ -0,0 +1,142 @@ +#!/usr/bin/env python3 +# coding=utf-8 +# +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +""" +Offline tool to draw graph showing elapsed time between sent or received data and their use by tasks +""" + +import sys +PROGNAME = sys.argv[0] + +def usage(): + print("Offline tool to draw graph showing elapsed time between sent or received data and their use by tasks") + print("") + print("Usage: %s " % PROGNAME) + +if len(sys.argv) != 2: + usage() + sys.exit(1) + +import re +import numpy as np +import matplotlib.pyplot as plt +from matplotlib.gridspec import GridSpec +import os + +def convert_rec_file(filename): + lines = [] + item = dict() + + with open(filename, "r") as f: + for l in f.readlines(): + if l == "\n": + lines.append(item) + item = dict() + else: + ls = l.split(":") + key = ls[0].lower() + value = ls[1].strip() + + if key in item: + print("Warning: duplicated key '" + key + "'") + else: + if re.match(r'^\d+$', value) != None: + item[key] = int(value) + elif re.match(r'^\d+\.\d+$', value) != None: + item[key] = float(value) + else: + item[key] = value + + return lines + +working_directory = sys.argv[1] + +comms = convert_rec_file(os.path.join(working_directory, "comms.rec")) +tasks = [t for t in + convert_rec_file(os.path.join(working_directory, "tasks.rec")) if "control" not in t and "starttime" in t] + +if len(tasks) == 0: + print("There is no task using data after communication.") + sys.exit(0) + +def plot_graph(comm_time_key, match, filename, title, xlabel): + workers = dict() + durations = [] + min_time = 0. + max_time = 0. + + for c in comms: + t_matched = None + for t in tasks: + if match(t, c): + t_matched = t + break + + if t_matched is not None: + worker = str(t_matched['mpirank']) + "-" + str(t_matched['workerid']) + if worker not in workers: + workers[worker] = [] + + eps = t["starttime"] - c[comm_time_key] + assert eps > 0 + durations.append(eps) + workers[worker].append((c[comm_time_key], eps)) + + if min_time == 0 or c[comm_time_key] < min_time: + min_time = c[comm_time_key] + if max_time == 0 or c[comm_time_key] > max_time: + max_time = c[comm_time_key] + + fig = plt.figure(constrained_layout=True) + + gs = GridSpec(2, 2, figure=fig) + axs = [fig.add_subplot(gs[0, :-1]), fig.add_subplot(gs[1, :-1]), fig.add_subplot(gs[0:, -1])] + i = 0 + for y, x in workers.items(): + # print(y, x) + axs[0].broken_barh(x, [i*10, 8], facecolors=(0.1, 0.2, 0.5, 0.2)) + i += 1 + + i = 0 + for y, x in workers.items(): + for xx in x: + axs[1].broken_barh([xx], [i, 1]) + i += 1 + + axs[0].set_yticks([i*10+4 for i in range(len(workers))]) + axs[0].set_yticklabels(list(workers)) + axs[0].set(xlabel="Time (ms) - Duration: " + str(max_time - min_time) + "ms", + ylabel="Worker [mpi]-[*pu]", title=title) + + if len(durations) != 0: + axs[2].hist(durations, bins=np.logspace(np.log10(1), np.log10(max(durations)), 50), rwidth=0.8) + axs[2].set_xscale("log") + axs[2].set(xlabel=xlabel, ylabel="Number of occurrences", title="Histogramm") + + fig.set_size_inches(15, 9) + + plt.savefig(os.path.join(working_directory, filename), dpi=100) + plt.show() + +plot_graph("recvtime", lambda t, + c: (t["mpirank"] == c["dst"] and t["starttime"] >= c["recvtime"] and str(c["recvhandle"]) in t["handles"]), + "recv_use.png", "Elapsed time between recv and use (ms)", "Time between data reception and its use by a task") +plot_graph("sendtime", lambda t, + c: (t["mpirank"] == c["src"] and t["starttime"] >= c["sendtime"] and str(c["sendhandle"]) in t["handles"]), + "send_use.png", "Elapsed time between send and use (ms)", "Time between data sending and its use by a task") diff --git a/tools/starpu_smpi.xslt b/tools/starpu_smpi.xslt new file mode 100644 index 0000000..c711c3c --- /dev/null +++ b/tools/starpu_smpi.xslt @@ -0,0 +1,59 @@ + + + + + + + + + + + + + + + + + + + + + + + + - + + + - + + + - + + + - + + + - + + + + + diff --git a/tools/starpu_smpirun b/tools/starpu_smpirun new file mode 100755 index 0000000..e2d1af1 --- /dev/null +++ b/tools/starpu_smpirun @@ -0,0 +1,178 @@ +#!/bin/bash +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2014-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# Copyright (C) 2020-2020 Federal University of Rio Grande do Sul (UFRGS) +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +# Script for running starpu-mpi application in simgrid mode + +prefix=/usr/local +SMPIRUN=/usr/bin/mpiexec +STARPU_DATADIR=${prefix}/share +STARPU_XSLTDIR=$STARPU_DATADIR/starpu +SOURCE_DATADIR=/home/benchmarks/builds/t3_Pvo1xe/0/starpu/starpu/tools +BUILDDIR=/home/benchmarks/builds/t3_Pvo1xe/0/starpu/starpu/tools + +SMPI_VERSION=$($SMPIRUN -version | grep " version " | sed -e 's/.* \([0-9]*\.[0-9]*\).*/\1/') +SMPI_MAJOR=${SMPI_VERSION%.*} +SMPI_MINOR=${SMPI_VERSION#*.} + +if [ "$SMPI_MAJOR" -ge 4 -o \( "$SMPI_MAJOR" = 3 -a "$SMPI_MINOR" -ge 13 \) ] +then + DTD=http://simgrid.gforge.inria.fr/simgrid/simgrid.dtd + V=4 + VF=.v4 + DASH=- +else + DTD=http://simgrid.gforge.inria.fr/simgrid.dtd + V=3 + VF="" + DASH=_ +fi + +EXTRA_OPT="" + +if [ "$SMPI_MAJOR" -ge 4 -o \( "$SMPI_MAJOR" = 3 -a "$SMPI_MINOR" -ge 16 \) ] +then + EXTRA_OPT+=" --cfg=smpi/privatization:yes" +else + EXTRA_OPT+=" --cfg=smpi/privatize${DASH}global${DASH}variables:yes" +fi + +if [ -n "$TEST_LOGS" ] +then + # Testsuite, use our loader + WRAPPER="-wrapper $BUILDDIR/../tests/loader" +fi + +# When executed from source, take xslt from source +[ "$0" -ef $BUILDDIR/starpu_smpirun ] && STARPU_XSLTDIR=$SOURCE_DATADIR + +MPI_PLATFORM="" +MPI_HOSTFILE="" +NP="" +GDB="" +HOSTFILE_PLATFORM_DETECT="" +while true; do + case "$1" in + "-help"|"-h"|"--help") + echo "$0 [OPTIONS] program" + echo + echo "Available options are": + echo " -platform FILE specify the simgrid cluster file to be used" + echo " -hostfile FILE specify the list of machines to be used": + echo " -np N specify the number of nodes to run": + echo " -hostfile-platform use performance models of each host specified in the" + echo " hostfile" + echo " -gdb run through gdb" + exit 0 + ;; + "-platform") + MPI_PLATFORM=$2 + if [ ! -r "$MPI_PLATFORM" ]; then + echo "$MPI_PLATFORM can't be read" + exit 1 + fi + shift 2 + ;; + "-hostfile") + MPI_HOSTFILE=$2 + if [ ! -r "$MPI_HOSTFILE" ]; then + echo "$MPI_HOSTFILE can't be read" + exit 1 + fi + shift 2 + ;; + "-np") + NP=$2 + shift 2 + ;; + "-hostfile-platform") + HOSTFILE_PLATFORM_DETECT=1 + shift 1 + ;; + "-gdb") + GDB="-gdb" + shift 1 + ;; + *) + break + ;; + esac +done + +if [ -z "$MPI_PLATFORM" ] || [ -z "$MPI_HOSTFILE" ]; then + echo "$0 -platform PLATFORM -hostfile HOSTFILE [ -np N ] [ -gdb ] [ ... ] program [ args ]" + exit 2 +fi + +PLATFORM=$(mktemp /tmp/StarPU-MPI-platform-XXXXXXXX.xml) + +[ -n "$STARPU_HOME" ] || STARPU_HOME=$HOME +[ -n "$STARPU_PERF_MODEL_DIR" ] || STARPU_PERF_MODEL_DIR=$STARPU_HOME/.starpu/sampling +[ -n "$STARPU_HOSTNAME" ] || STARPU_HOSTNAME=$(hostname) +NODE_PLATFORM=$STARPU_PERF_MODEL_DIR/bus/${STARPU_HOSTNAME}.platform$VF.xml + +[ -n "$NP" ] || NP=$(grep -v "^$" $MPI_HOSTFILE | wc -l) + +if ! type xsltproc > /dev/null 2> /dev/null +then + echo xsltproc is needed for starpu simgrid mpi. + exit 1 +fi + +if [ -n "$HOSTFILE_PLATFORM_DETECT" ] +then + HOSTS=$(grep -v "^$" $MPI_HOSTFILE) + export STARPU_MPI_HOSTNAMES=$(echo $HOSTS | tr -d '\011\012\015') +fi + +( + cat << EOF + + + + +EOF + tail -n +3 $MPI_PLATFORM | grep -v ' + +EOF +) > $PLATFORM + +STACKSIZE=$(ulimit -s) +[ "$STACKSIZE" != unlimited ] || STACKSIZE=8192 +$SMPIRUN $WRAPPER $GDB -platform $PLATFORM -hostfile $MPI_HOSTFILE -np $NP "$@" $EXTRA_OPT --cfg=smpi/simulate${DASH}computation:no --cfg=contexts/stack${DASH}size:$STACKSIZE +RET=$? + +rm -f $PLATFORM +exit $RET diff --git a/tools/starpu_smpirun.in b/tools/starpu_smpirun.in new file mode 100644 index 0000000..5d19a0d --- /dev/null +++ b/tools/starpu_smpirun.in @@ -0,0 +1,178 @@ +#!/bin/bash +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2014-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# Copyright (C) 2020-2020 Federal University of Rio Grande do Sul (UFRGS) +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +# Script for running starpu-mpi application in simgrid mode + +prefix=@prefix@ +SMPIRUN=@mpiexec_path@ +STARPU_DATADIR=@datarootdir@ +STARPU_XSLTDIR=$STARPU_DATADIR/starpu +SOURCE_DATADIR=@abs_srcdir@ +BUILDDIR=@abs_builddir@ + +SMPI_VERSION=$($SMPIRUN -version | grep " version " | sed -e 's/.* \([0-9]*\.[0-9]*\).*/\1/') +SMPI_MAJOR=${SMPI_VERSION%.*} +SMPI_MINOR=${SMPI_VERSION#*.} + +if [ "$SMPI_MAJOR" -ge 4 -o \( "$SMPI_MAJOR" = 3 -a "$SMPI_MINOR" -ge 13 \) ] +then + DTD=http://simgrid.gforge.inria.fr/simgrid/simgrid.dtd + V=4 + VF=.v4 + DASH=- +else + DTD=http://simgrid.gforge.inria.fr/simgrid.dtd + V=3 + VF="" + DASH=_ +fi + +EXTRA_OPT="" + +if [ "$SMPI_MAJOR" -ge 4 -o \( "$SMPI_MAJOR" = 3 -a "$SMPI_MINOR" -ge 16 \) ] +then + EXTRA_OPT+=" --cfg=smpi/privatization:yes" +else + EXTRA_OPT+=" --cfg=smpi/privatize${DASH}global${DASH}variables:yes" +fi + +if [ -n "$TEST_LOGS" ] +then + # Testsuite, use our loader + WRAPPER="-wrapper $BUILDDIR/../tests/loader" +fi + +# When executed from source, take xslt from source +[ "$0" -ef $BUILDDIR/starpu_smpirun ] && STARPU_XSLTDIR=$SOURCE_DATADIR + +MPI_PLATFORM="" +MPI_HOSTFILE="" +NP="" +GDB="" +HOSTFILE_PLATFORM_DETECT="" +while true; do + case "$1" in + "-help"|"-h"|"--help") + echo "$0 [OPTIONS] program" + echo + echo "Available options are": + echo " -platform FILE specify the simgrid cluster file to be used" + echo " -hostfile FILE specify the list of machines to be used": + echo " -np N specify the number of nodes to run": + echo " -hostfile-platform use performance models of each host specified in the" + echo " hostfile" + echo " -gdb run through gdb" + exit 0 + ;; + "-platform") + MPI_PLATFORM=$2 + if [ ! -r "$MPI_PLATFORM" ]; then + echo "$MPI_PLATFORM can't be read" + exit 1 + fi + shift 2 + ;; + "-hostfile") + MPI_HOSTFILE=$2 + if [ ! -r "$MPI_HOSTFILE" ]; then + echo "$MPI_HOSTFILE can't be read" + exit 1 + fi + shift 2 + ;; + "-np") + NP=$2 + shift 2 + ;; + "-hostfile-platform") + HOSTFILE_PLATFORM_DETECT=1 + shift 1 + ;; + "-gdb") + GDB="-gdb" + shift 1 + ;; + *) + break + ;; + esac +done + +if [ -z "$MPI_PLATFORM" ] || [ -z "$MPI_HOSTFILE" ]; then + echo "$0 -platform PLATFORM -hostfile HOSTFILE [ -np N ] [ -gdb ] [ ... ] program [ args ]" + exit 2 +fi + +PLATFORM=$(mktemp /tmp/StarPU-MPI-platform-XXXXXXXX.xml) + +[ -n "$STARPU_HOME" ] || STARPU_HOME=$HOME +[ -n "$STARPU_PERF_MODEL_DIR" ] || STARPU_PERF_MODEL_DIR=$STARPU_HOME/.starpu/sampling +[ -n "$STARPU_HOSTNAME" ] || STARPU_HOSTNAME=$(hostname) +NODE_PLATFORM=$STARPU_PERF_MODEL_DIR/bus/${STARPU_HOSTNAME}.platform$VF.xml + +[ -n "$NP" ] || NP=$(grep -v "^$" $MPI_HOSTFILE | wc -l) + +if ! type xsltproc > /dev/null 2> /dev/null +then + echo xsltproc is needed for starpu simgrid mpi. + exit 1 +fi + +if [ -n "$HOSTFILE_PLATFORM_DETECT" ] +then + HOSTS=$(grep -v "^$" $MPI_HOSTFILE) + export STARPU_MPI_HOSTNAMES=$(echo $HOSTS | tr -d '\011\012\015') +fi + +( + cat << EOF + + + + +EOF + tail -n +3 $MPI_PLATFORM | grep -v ' + +EOF +) > $PLATFORM + +STACKSIZE=$(ulimit -s) +[ "$STACKSIZE" != unlimited ] || STACKSIZE=8192 +$SMPIRUN $WRAPPER $GDB -platform $PLATFORM -hostfile $MPI_HOSTFILE -np $NP "$@" $EXTRA_OPT --cfg=smpi/simulate${DASH}computation:no --cfg=contexts/stack${DASH}size:$STACKSIZE +RET=$? + +rm -f $PLATFORM +exit $RET diff --git a/tools/starpu_tasks_rec_complete.1 b/tools/starpu_tasks_rec_complete.1 new file mode 100644 index 0000000..54a8177 --- /dev/null +++ b/tools/starpu_tasks_rec_complete.1 @@ -0,0 +1,13 @@ +.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.3. +.TH STARPU_TASKS_REC_COMPLETE "1" "December 2025" "StarPU 1.4.10" "User Commands" +.SH NAME +starpu_tasks_rec_complete \- Complete StarPU tasks.rec file +.SH SYNOPSIS +.B starpu_tasks_rec_complete +[\fI\,input-file \/\fR[\fI\,output-file\/\fR]] +.SH DESCRIPTION +Complete a tasks.rec file with additional information, notably estimated termination times. +.PP +If input or output file names are not given, stdin and stdout are used. +.SH "REPORTING BUGS" +Report bugs to . diff --git a/tools/starpu_tasks_rec_complete.c b/tools/starpu_tasks_rec_complete.c new file mode 100644 index 0000000..ffc8e6e --- /dev/null +++ b/tools/starpu_tasks_rec_complete.c @@ -0,0 +1,202 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define PROGNAME "starpu_tasks_rec_complete" + +/* + * This program takes a tasks.rec file, and emits a tasks.rec file with + * additional information, notably estimated termination times. + */ + +static struct model +{ + UT_hash_handle hh; + char *name; + struct starpu_perfmodel model; +} *models; + +int main(int argc, char *argv[]) +{ + FILE *input; + FILE *output; + char s[1024], *c; + uint32_t footprint = 0; + int already_there = 0; + char *model_name = NULL; + struct model *model, *tmp=NULL; + int ret; + + if (argc >= 2) + { + if (!strcmp(argv[1], "-h") || + !strcmp(argv[1], "--help")) + { + fprintf(stderr, "Complete a tasks.rec file with additional information, notably estimated termination times.\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "Usage: %s [input-file [output-file]]\n", PROGNAME); + fprintf(stderr, "\n"); + fprintf(stderr, "If input or output file names are not given, stdin and stdout are used."); + fprintf(stderr, "\n"); + fprintf(stderr, "Report bugs to <%s>.\n", PACKAGE_BUGREPORT); + exit(EXIT_SUCCESS); + } + else if (strncmp(argv[1], "--version", 9) == 0 || strncmp(argv[1], "-v", 2) == 0) + { + fputs(PROGNAME " (" PACKAGE_NAME ") " PACKAGE_VERSION "\n", stderr); + exit(EXIT_FAILURE); + } + } + +#ifdef STARPU_HAVE_SETENV + setenv("STARPU_FXT_TRACE", "0", 1); +#endif + if (starpu_init(NULL) != 0) + { + fprintf(stderr, "StarPU initialization failure\n"); + exit(EXIT_FAILURE); + } + starpu_pause(); + + if (argc >= 2) + { + input = fopen(argv[1], "r"); + if (!input) + { + fprintf(stderr, "couldn't open %s for read: %s\n", argv[1], strerror(errno)); + exit(EXIT_FAILURE); + } + } + else + input = stdin; + + if (argc >= 3) + { + output = fopen(argv[2], "w+"); + if (!output) + { + fprintf(stderr, "couldn't open %s for write: %s\n", argv[1], strerror(errno)); + exit(EXIT_FAILURE); + } + } + else + output = stdout; + + while (fgets(s, sizeof(s), input)) + { + if (strlen(s) == sizeof(s) - 1) + { + fprintf(stderr, "oops, very long line '%s', it's odd\n", s); + exit(EXIT_FAILURE); + } + + if (s[0] == '\n') + { + /* empty line, end of task */ + if (model_name) + { + if (already_there) + { + free(model_name); + } + else + { + /* Try to get already-loaded model */ + HASH_FIND_STR(models, model_name, model); + if (model == NULL) + { + _STARPU_MALLOC(model, sizeof(*model)); + model->name = model_name; + memset(&model->model, 0, sizeof(model->model)); + model->model.type = STARPU_PERFMODEL_INVALID; + ret = starpu_perfmodel_load_symbol(model_name, &model->model); + if (ret == 1) + { + fprintf(stderr, "The performance model for the symbol <%s> could not be loaded\n", model_name); + exit(EXIT_FAILURE); + } + HASH_ADD_STR(models, name, model); + } + else + free(model_name); + fprintf(output, "EstimatedTime: "); + starpu_perfmodel_print_estimations(&model->model, footprint, output); + fprintf(output, "\n"); + } + model_name = NULL; + } + already_there = 0; + fprintf(output, "\n"); + continue; + } + + /* Get rec field name */ + c = strchr(s, ':'); + if (!c) + { + fprintf(stderr, "odd line '%s'\n", s); + exit(EXIT_FAILURE); + } + +#define STRHEADCMP(s, head) strncmp(s, head, strlen(head)) + + if (!STRHEADCMP(s, "Footprint: ")) + { + footprint = strtoul(s + strlen("Footprint: "), NULL, 16); + } + else if (!STRHEADCMP(s, "Model: ")) + { + model_name = strdup(s + strlen("Model: ")); + model_name[strlen(model_name) - 1] = '\0'; /* Drop '\n' */ + } + else if (!STRHEADCMP(s, "EstimatedTime: ")) + { + already_there = 1; + } + fprintf(output, "%s", s); + } + + if (fclose(input)) + { + fprintf(stderr, "couldn't close input: %s\n", strerror(errno)); + exit(EXIT_FAILURE); + } + if (fclose(output)) + { + fprintf(stderr, "couldn't close output: %s\n", strerror(errno)); + exit(EXIT_FAILURE); + } + starpu_resume(); + HASH_ITER(hh, models, model, tmp) + { + HASH_DEL(models, model); + starpu_perfmodel_unload_model(&model->model); + free(model->name); + free(model); + } + starpu_shutdown(); + return 0; +} + diff --git a/tools/starpu_tcpipexec b/tools/starpu_tcpipexec new file mode 100755 index 0000000..779189f --- /dev/null +++ b/tools/starpu_tcpipexec @@ -0,0 +1,104 @@ +#! /bin/bash + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2021-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +set -e # fail fast +PROGNAME=$0 + +help_script() +{ +cat << EOF +Execute a StarPU TCP IP application + +$0 [option ....] application + +Options: + -np To set the number of workers + -nobind + -ncpus To set the number of threads to use on the TCP/IP Slave devices (environment variable STARPU_NTCPIPMSTHREADS) + -nolocal + -ex To specify an external launcher for the application + -v Output version information and exit + -h Display the help and exit + +Examples: + +$0 -np 2 -nobind -ncpus 1 myapp +$0 -np 2 -nobind -ncpus 1 -ex 'xterm -e gdb' myapp + +Report bugs to +EOF +} + +if [ "$1" = "--version" ] ; then + echo "$PROGNAME (StarPU) 1.4.10" + exit 0 +fi + +if [ "$1" = "-h" ] || [ "$1" = "--help" ] || [ "$1" = "" ] ; then + help_script + exit 0 +fi + +NP="" +EXECUTE="" +while true; do + case "$1" in + "-np") + NP=$2 + shift 2 + ;; + "-nobind") + export STARPU_WORKERS_NOBIND=1 + shift + ;; + "-ncpus") + export STARPU_NTCPIPMSTHREADS=$2 + shift 2 + ;; + "-nolocal") + export STARPU_TCPIP_USE_LOCAL_SOCKET=0 + shift + ;; + "-ex") + EXECUTE="$2" + shift 2 + ;; + *) + break + ;; + esac +done + +trap 'kill -INT $CHILDPIDS' INT +trap 'kill -QUIT $CHILDPIDS' QUIT + +export STARPU_TCPIP_MS_PORT=$((10000 + $$ % 20000)) +#echo "STARPU_TCPIP_MS_SLAVES=$NP $@" +STARPU_TCPIP_MS_SLAVES=$NP $EXECUTE "$@" & +CHILDPIDS="$!" + +sleep 1 +for i in $(seq 1 $NP): +do + STARPU_TCPIP_MS_SLAVES=$NP STARPU_TCPIP_MS_MASTER="127.0.0.1" $EXECUTE "$@" & + CHILDPIDS="$CHILDPIDS $!" +done +wait %1 +RET=$? +wait +exit $RET diff --git a/tools/starpu_tcpipexec.in b/tools/starpu_tcpipexec.in new file mode 100755 index 0000000..9a7a1da --- /dev/null +++ b/tools/starpu_tcpipexec.in @@ -0,0 +1,104 @@ +#! /bin/bash + +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2021-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +set -e # fail fast +PROGNAME=$0 + +help_script() +{ +cat << EOF +Execute a StarPU TCP IP application + +$0 [option ....] application + +Options: + -np To set the number of workers + -nobind + -ncpus To set the number of threads to use on the TCP/IP Slave devices (environment variable STARPU_NTCPIPMSTHREADS) + -nolocal + -ex To specify an external launcher for the application + -v Output version information and exit + -h Display the help and exit + +Examples: + +$0 -np 2 -nobind -ncpus 1 myapp +$0 -np 2 -nobind -ncpus 1 -ex 'xterm -e gdb' myapp + +Report bugs to <@PACKAGE_BUGREPORT@> +EOF +} + +if [ "$1" = "--version" ] ; then + echo "$PROGNAME (@PACKAGE_NAME@) @PACKAGE_VERSION@" + exit 0 +fi + +if [ "$1" = "-h" ] || [ "$1" = "--help" ] || [ "$1" = "" ] ; then + help_script + exit 0 +fi + +NP="" +EXECUTE="" +while true; do + case "$1" in + "-np") + NP=$2 + shift 2 + ;; + "-nobind") + export STARPU_WORKERS_NOBIND=1 + shift + ;; + "-ncpus") + export STARPU_NTCPIPMSTHREADS=$2 + shift 2 + ;; + "-nolocal") + export STARPU_TCPIP_USE_LOCAL_SOCKET=0 + shift + ;; + "-ex") + EXECUTE="$2" + shift 2 + ;; + *) + break + ;; + esac +done + +trap 'kill -INT $CHILDPIDS' INT +trap 'kill -QUIT $CHILDPIDS' QUIT + +export STARPU_TCPIP_MS_PORT=$((10000 + $$ % 20000)) +#echo "STARPU_TCPIP_MS_SLAVES=$NP $@" +STARPU_TCPIP_MS_SLAVES=$NP $EXECUTE "$@" & +CHILDPIDS="$!" + +sleep 1 +for i in $(seq 1 $NP): +do + STARPU_TCPIP_MS_SLAVES=$NP STARPU_TCPIP_MS_MASTER="127.0.0.1" $EXECUTE "$@" & + CHILDPIDS="$CHILDPIDS $!" +done +wait %1 +RET=$? +wait +exit $RET diff --git a/tools/starpu_temanejo2.sh b/tools/starpu_temanejo2.sh new file mode 100755 index 0000000..e4921f1 --- /dev/null +++ b/tools/starpu_temanejo2.sh @@ -0,0 +1,27 @@ +#!/bin/bash +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# +d=${AYUDAME2_INSTALL_DIR?} +cmd=${1?"usage: $0 [args*]"} +shift +if test ! -r ayudame.cfg; then + echo "warning: no 'ayudame.cfg' file found in current working directory, an example is available in /share/starpu/ayudame.cfg" +fi +PATH=$d/bin:$PATH +LD_LIBRARY_PATH=$d/lib:$LD_LIBRARY_PATH +PYTHONPATH=$d/lib/python2.7/site-packages:$PYTHONPATH +export PATH LD_LIBRARY_PATH PYTHONPATH +exec $d/bin/Temanejo2 -p 8888 -d 8889 -P $d/lib/libayudame.so -L $d/lib -A $cmd "$@" diff --git a/tools/starpu_trace_state_stats.py b/tools/starpu_trace_state_stats.py new file mode 100755 index 0000000..02b810f --- /dev/null +++ b/tools/starpu_trace_state_stats.py @@ -0,0 +1,397 @@ +#!/usr/bin/env python3 +# coding=utf-8 +# +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +""" +This script parses the generated trace.rec file and reports statistics about +the number of different events/tasks and their durations. The report is +similar to the starpu_paje_state_stats.in script, except that this one +doesn't need R and pj_dump (from the pajeng repository), and it is also much +faster. +""" + +import getopt +import os +import sys + +class Event(object): + def __init__(self, type, name, category, start_time): + self._type = type + self._name = name + self._category = category + self._start_time = start_time + +class EventStats(object): + def __init__(self, name, duration_time, category, count = 1): + self._name = name + self._duration_time = duration_time + self._category = category + self._count = count + + def aggregate(self, duration_time): + self._duration_time += duration_time + self._count += 1 + + def show(self): + if not self._name == None and not self._category == None: + print("\"" + self._name + "\"," + str(self._count) + ",\"" + + self._category + "\"," + str(round(self._duration_time, 6))) + +class Worker(object): + def __init__(self, id): + self._id = id + self._events = [] + self._stats = [] + self._stack = [] + self._current_state = None + + def get_event_stats(self, name): + for stat in self._stats: + if stat._name == name: + return stat + return None + + def add_event(self, type, name, category, start_time): + self._events.append(Event(type, name, category, start_time)) + + def add_event_to_stats(self, curr_event): + if curr_event._type == "PushState": + self._stack.append(curr_event) + # Will look later to find a PopState event. + return + elif curr_event._type == "PopState": + if len(self._stack) == 0: + print("warning: PopState without a PushState, probably a trace with start/stop profiling") + self._current_state = None + return + next_event = curr_event + curr_event = self._stack.pop() + elif curr_event._type == "SetState": + if self._current_state == None: + # First SetState event found + self._current_state = curr_event + return + saved_state = curr_event + next_event = curr_event + curr_event = self._current_state + self._current_state = saved_state + else: + sys.exit("ERROR: Invalid event type!") + + # Compute duration with the next event. + a = curr_event._start_time + b = next_event._start_time + + # Add the event to the list of stats. + for i in range(len(self._stats)): + if self._stats[i]._name == curr_event._name: + self._stats[i].aggregate(b - a) + return + self._stats.append(EventStats(curr_event._name, b - a, + curr_event._category)) + + def calc_stats(self, start_profiling_times, stop_profiling_times): + num_events = len(self._events) + use_start_stop = len(start_profiling_times) != 0 + for i in range(0, num_events): + event = self._events[i] + if i > 0 and self._events[i-1]._name == "Deinitializing": + # Drop all events after the Deinitializing event is found + # because they do not make sense. + break + + if not use_start_stop: + self.add_event_to_stats(event) + continue + + # Check if the event is in between start/stop profiling events + for t in range(len(start_profiling_times)): + if (event._start_time > start_profiling_times[t] and + event._start_time < stop_profiling_times[t]): + self.add_event_to_stats(event) + break + + if not use_start_stop: + return + + # Special case for SetState events which need a next one for computing + # the duration. + curr_event = self._events[-1] + if curr_event._type == "SetState": + for i in range(len(start_profiling_times)): + if (curr_event._start_time > start_profiling_times[i] and + curr_event._start_time < stop_profiling_times[i]): + curr_event = Event(curr_event._type, curr_event._name, + curr_event._category, + stop_profiling_times[i]) + self.add_event_to_stats(curr_event) + +def read_blocks(input_file): + empty_lines = 0 + first_line = 1 + blocks = [] + for line in open(input_file): + if first_line: + blocks.append([]) + blocks[-1].append(line) + first_line = 0 + + # Check for empty lines + if not line or line[0] == '\n': + # If 1st one: new block + if empty_lines == 0: + blocks.append([]) + empty_lines += 1 + else: + # Non empty line: add line in current(last) block + empty_lines = 0 + blocks[-1].append(line) + return blocks + +def read_field(field, index): + return field[index+1:-1] + +def insert_worker_event(workers, prog_events, block): + worker_id = -1 + name = None + start_time = 0.0 + category = None + + for line in block: + key = line[:2] + value = read_field(line, 2) + if key == "E:": # EventType + event_type = value + elif key == "C:": # Category + category = value + elif key == "W:": # WorkerId + worker_id = int(value) + elif key == "N:": # Name + name = value + elif key == "S:": # StartTime + start_time = float(value) + + # Program events don't belong to workers, they are globals. + if category == "Program": + prog_events.append(Event(event_type, name, category, start_time)) + return + + for worker in workers: + if worker._id == worker_id: + worker.add_event(event_type, name, category, start_time) + return + worker = Worker(worker_id) + worker.add_event(event_type, name, category, start_time) + workers.append(worker) + +def calc_times(stats): + tr = 0.0 # Runtime + tt = 0.0 # Task + ti = 0.0 # Idle + ts = 0.0 # Scheduling + for stat in stats: + if stat._category == None: + continue + if stat._category == "Runtime": + if stat._name == "Scheduling": + # Scheduling time is part of runtime but we want to have + # it separately. + ts += stat._duration_time + else: + tr += stat._duration_time + elif stat._category == "Task": + tt += stat._duration_time + elif stat._category == "Other": + ti += stat._duration_time + else: + print("WARNING: Unknown category '" + stat._category + "'!") + return ti, tr, tt, ts + +def save_times(ti, tr, tt, ts): + f = open("times.csv", "w+") + f.write("\"Time\",\"Duration\"\n") + f.write("\"Runtime\"," + str(tr) + "\n") + f.write("\"Task\"," + str(tt) + "\n") + f.write("\"Idle\"," + str(ti) + "\n") + f.write("\"Scheduling\"," + str(ts) + "\n") + f.close() + +def calc_et(tt_1, tt_p): + """ Compute the task efficiency (et). This measures the exploitation of + data locality. """ + return tt_1 / tt_p + +def calc_es(tt_p, ts_p): + """ Compute the scheduling efficiency (es). This measures time spent in + the runtime scheduler. """ + return tt_p / (tt_p + ts_p) + +def calc_er(tt_p, tr_p, ts_p): + """ Compute the runtime efficiency (er). This measures how the runtime + overhead affects performance.""" + return (tt_p + ts_p) / (tt_p + tr_p + ts_p) + +def calc_ep(tt_p, tr_p, ti_p, ts_p): + """ Compute the pipeline efficiency (et). This measures how much + concurrency is available and how well it's exploited. """ + return (tt_p + tr_p + ts_p) / (tt_p + tr_p + ti_p + ts_p) + +def calc_e(et, er, ep, es): + """ Compute the parallel efficiency. """ + return et * er * ep * es + +def save_efficiencies(e, ep, er, et, es): + f = open("efficiencies.csv", "w+") + f.write("\"Efficiency\",\"Value\"\n") + f.write("\"Parallel\"," + str(e) + "\n") + f.write("\"Task\"," + str(et) + "\n") + f.write("\"Runtime\"," + str(er) + "\n") + f.write("\"Scheduling\"," + str(es) + "\n") + f.write("\"Pipeline\"," + str(ep) + "\n") + f.close() + +def usage(): + print("USAGE:") + print("starpu_trace_state_stats.py [ -te -s=

  • !RCJtfJBNvcSn7|@r;>9oWE+>?jvB@HcI|!r&@cZ*v0k7_ z6x4lTVYJuEsRB!!AEOV3Lj=(a`m9P0;mG^dI3d>F?Uz##<$98gq+vgKi?P-SN*qao zF`q&(SZ_5GCStNyGY=Ytk=tIXO~cSZS8Yrn0gv%B+)Ho4X9D6SS%KQ2UjFb%BhvQ-RIK3foLIBBWz0=!t&N3M0;-7MHgArlZ3n3L|9nHt*tjiL z)=i;r!Nx4S0x8Lx64{L*On|EZtHW z(pAcX6q{&4813u&&`I>!3*OG-yuh6-)4$Y|Vt1Um=tUhU`d|KbweAOu0Ag1L`^L4o zxROk&VJ5q#JxexUcb(~c4Ga=9W{C9q+sYT+1$bt%o zmLiBmj}u?viGvV^fvjSFAx{eX?YklyeFV2<&JNi(dSX3WeYpD+`wr+g&F1WF_U~3# zsq_e?Qb@=}hGo%y*ZQLV&9)Q)T3lDyKk?4HIL3wjApE0x5yjM1E#qhtiF)l!dnl6~ zGfw?+dWn(8WL2C9pif5P_&FU_n;;*~sbx$P!}Iu~tkVgPn(sR+svDO?Ih|^=>%9f( zv^JELQDbrw2R^;Cocq2l4j*Lx=E3tXZSJQZh<0;q^JVKxj`MLSs+iplqQ5_Yy?!@T zA>#82?~7tKMtpkyE%7S8%|-Uw1@)CF-n1)lC>uj|!Q8Oj3(}xksj0CN-o3%=d0#D& zK%u-J`0-Lqq6mi{pH{6HfldRhKr~NJAoa-#rnhXVK^zsRkD>_xZ;=LDG5ya%-!O@a z3ISV;<<=0L)?aF)*PbYwn(NT6A&K^X^3-}H@RXBmr7-C5d%?O-{CV}Ri~r!bX>h&+NHTB}C zW3!-wS?%;$1Ahy7ERrHRnxA<5?X@rBPvl&a&fr?Z^rU`yvZOCJUc58x(bCN4b}5`B z+iVa2K{SP-JoY^{;HKl-E`In121@L+l^92j_=adF6L2wnTO1Jp=D+I7@{dCd?+#ZC z0s=mOetXv91i|OBr=_zJl$Ku5L#uvc(%1VgBuJ*k)h@o(-J!k z(;@ysqa5nc&i5$_S68Y6oz>=MJLNaP5tR#Iv8b>2`BjtK7beV~%p>X;NlA3GnFZLY zhO0QW>LGR#G}%c~46EuvilS2iMek_Htk2b%?sk`a8&YJUGObkn>0H z>q`mzrhSbQ?ND+OI18-S?bYM@_xj19AWGz=2rQ~!W~d`|7j|Sg*)`kN*CzAowAV(D z-nK=m^zpL#RO?7M=sdX6SGYbm+3e82Yw=V{Vc6;j#l%kBcljN_d#H(XaCG?Ig8j}r z@pD{A-Q>J4|LJZpuOSHWdG~GE=*VLjHtcDRrLD1n{gyyjS;!ZPjG^Y}pS= z0|CZT;vE=R!Q@iPAgL6Z3_(+We^n%WR|=AR?FKaVH@9M9dO3U^-IqJzpIKS07TyFu zDAuZnfj2S%)@(NKrr!kIjVXDM4uA~|mQYfRs;Ipnr(vN?LxPLi`cpAPttE1uPFKYwr3$2uXC3`@)^qLsziS~?IcGu~HSmLT z-ksq^8R_@NYh@7+FtimGkS1YxK#x>rP zL$hv5%Y+My)*)T7KYBda#wnu&Zf~)xl6QQMbq}^2kyNSs#)+UzKyTYa*~qdtOvG~d zNjaH}KPg0Pd3AENsJuCkzne)rTSap6z1#isTRT=(ksNuisa=aZE&qNMeuu>H&L3{Q zukH%}c5A$#V+8J-_1dqrm@TV!xYAG!>~@EgH~&mP``TYxicG6IpeC2hI6;#J{i9Ow z1opQ}wJFLB4DrK3yAb=478mYCRX=&1z@Slhjy6F$@{d~`2n(GijiFM!KuyKwnL9G+ zYBNr+nMXd+VTtjviE;Jmw>D3%mu_wt(5X-RkAI8q>q4hq{V|Z=q!=%xk&4MihYj{V zBgu)Y6XB#LCca(A>T$wrf##fQQ@CPHncDV??VULP_w{%BPE4tzpviAf;7ta-j-A{uSisy?A7w7LX8 zeC;O^kj3Y@MWn{e3n-T9rd6#D4WE#R+q|_VBUey~6=DDuIQ!7>IycJ;DlnSKbanO) zAZpK0-o1Kz(;`E&=+w^3YX(WA=eK#o_lyY4SX9%ih&-!3SJbPMVv~Zepy7Z_nnf*s zv34g+UE=Lp>Fet1@*+&$6qd@fh7(!>-AAHbG7US42exr2y1iA!?V z+HHB0IR!&II^IU&En43(Lw$_MJ4TfgHz-QbV_78A_c8LdNRr_(%eH6J;lv@3d!5OC z0ZkRDR&^Ib2q>u4Qlh^5h;rm~Ja1l~gr6gOTja>uKaQ@g*?MQwWdGZ}uBJ=np7O#p z9pzjZRcL!Tv&AuXO=WVfdQ>ccG!^qHJr{h{km{E#&0Xp$ zvdhb*K-!sX`B}Bgf;ygD2>{mrbS7h?7}ew;vlT3D_H=e4!w-(s?dBTtkY&5 zCIp?8Y0L=8<0Gs={c3xYuQWRP5mTgI5RvO2Arf?aYU(I5jE=l2O02JGu89hI0?Yjc zG!#NO4`vo`E)Y#bb%bnp0n_i|!Kv|r`3FJGX($1#U)V7fkj5U0) zQb(m4Glga%Ph2C+70NQ~%8=xC9=s9Kr1V?f3Uu z0d*4akwPPhL`JZ>_r)8%jsff?r!(*Uz0|Yj%{C91yQ{em@83ScKz_mz0bWXP;b{4H zuM}F@|2gZZzimIM1d8$I0mFYF9}4EBUgmTTl7;=XwfngLhV%1w7oW z7YoBDirxeJ+ds?oFEUq3q{>tOfbiQ+g!0<`=BR{|TX*oHS_MsKh0{>b$mseeo}50@ zGAX5#=jNVSHe0!8{o+qE>;L}+LBqYV#N{)C34oP~o_2#}ifzsJ9EaqmLDeb-O2*sz zpVtW7%O4-Z|Ae6@af|?Nb|a%@gz*1r?{NmebS58-hY~y4nsZNSGq7mrSP9 z$XMkd4vbyvpTguL7;oCTB*og@ec}u6I z+Gq%I5ub!U|2ra8nHd&2KduHoZdfRw%xExvZ!8O>+wLxVd$&890W@pmo6&^^&e%LT zf!Va;@KEqU`^&g<;NgB<+x5-bhsmN$wl?1$xc%0o z)UbJ6gkDX3%I3fm<(S{UZ(AnOs&$-ihy8fE7wwHW`!j=SDIP)!`IvSd~aep73`_*n#YpX}q zcE;cgbj0haE7vCiAGdxw_(W z{>ZhMDc4vzA5LMsIh@)0QJTUxh+d`5Ef(|@vE$|W;+7te23PQV1Z&n;8%NYK8(COz zb20s}*I!?1w1v~rg3rTIB#h+$yzpiaOQ@g3kkP74u_&DP(xK<&Jq*-b*Xp>{Jgn_n zx$|@#rO8O{B?VbMU1JU4U-xjiV`yk7BQ4F+i8)-WST;=oM?j1lJb3$Dr(M$4nPgf^ zw!}2+F!YGd%yhReVCa&|u?H>!3Ud3SuPj;tff$~3rlicM!^zaDq$Gcek=<{)mYb<` zwQ)h750`AcPGieHNYq64P}D?&SV+!)BsKj44u^v2=WX6wvT?MEYd z#;)p)FY@qeL0)xIzWE(XtX}(8n<427@qy75yWmu@4B-65ipxbo(Kj|GtG95&!jl3o z`tmwGnjSIi1xl*lJ~&KrvTE2fu3*_sdk8p@Og|bTlC0gl*=k8@#Mr--(P@c9btLc zwxLppR$X9S+wOvqi0r{ue-g&g(vW_&Lp2`63=w!dkrUGu)*TE|td?xs9WRa8d~xn> z9dI=mfiOGiHl8%e5qgg{UM!wJHVVAl-#$4`0FOcumWz+vp)GwZl@!3kCG{3jqRSfDP)D)-PM@{o+&);a!q6+Kh@6-l@4bYKt=zwop>3* zg)&>kzdQXs95IyiQ3fa3cr7qkxzsFYhQ3Jg0N8<8@fnV8j^+TlWpbGhuQ6kRImD0| z>54=`F$@gjo787_6F#UIzrE48*(3W#$L)&?>&tv2GyUi_bRoRnCrvksegWx}T4}UA zV04eHD`Ku+SjSo`5#jD5l<nu^@8_}>`r6pcG)d_g-+%4cshPI1%Qyn#u9%r;nrgp>RtP0pquu5 zxf7AhIXF1DySvL}t6xTLs}HT;)K{TgiUd4YP|KlmHpXKq0!GtprF0UWG$=7m?M3ZU zDh}Vgf(@E@L5YRfm*)_KO%s!F)AYyU(JLwX99{@0A!;*yvPf1XMHiT30q$8(2n6*A>}u0#FvcW@ZlW ztk(!xUD-t4do=5=#PCs_lX zd1_H5R&?dus>4tmrXhZ1RChKor(wzEHhKvvU(pCQSQHp~c408C25|yfiA(*rYr0nq z)OhPN6A2YvIJ?ST1a}+rdE8c5&Q)>Yg8gEb zMZ8uK(!8Q|2qVwxoQkMy^v&q1T9TWWBDJaz__MGjbZ#8|6VUY+-6mt&z45(K`{q$6 zQVKKkdnr9(Kb{fNa^{%qAAC@$$w#lRfuEpq37)W-1!RaDwZMd#larG&KOjYP!v;8E z1{zrF;R%ZOGoK{Tf?~<@yVRkyrFhI8sfM9gK0w)}8Lf)((Xa~D3R9{nQft%}dN$D! z#=w21p()p>Gf{2=w%Zb%17>FtjaG6r75PT88GGAKPEm|VEVdLOt?u^dGN&n&(@a=FK$xS&w(5&E_ z$X^6HkeaCYGQ^t%mEE>47`e%V!zu7bK)xNXpVM+kX>DJn>~eK9 zq6=i2U2<@Cn1&g3^_MvlsRO8$ViSdpB|@G5C8mfvnw#hp5E7d9ECfM1y=5IN=YkVs zT8b|25iC?IbsNTv#}^I^`}#;eLuobW_aeHw%IBfwBdqiTm5I<-itghDvt>>%nBgJB z?qC3v@9FtfA^^32AefGnhM`^cvo`CZgt5sT6Oe67r+vZm28g65O!j2)31yB53jY$S z{0RewvB12%h)c{Ywc8t8E%=Y;)HqV7bA;sOku0~o=~#vFWhs*C_4dmq{KVd~-0XVy z0hq=#D3SDL`1>H*PcuIvfzn8X2Nj0Knz0;#ocKo`X&~!7la%xifxA6c2*SZLy!g)R z=D*sW2g;ye>HhT#zRL`XiGvmWMc%7tBiGAgxhR3{P#nn1;88JNfI1-T>QbVCcixOYZ} zx<G#>Oj`o#NPS^Vi{5iJ9#yjil z)lNsMWy+F4Qt-q+3-=eC?#lIxRRmSqg1zBGzCcyd;&`ZBBa_jQmzP&>;dg~OC z2Q#q(c6JUZC;&O0>DH+LNF^g^l3Eo>Q1JXhA!xZJXtlkp4MBq5Wl_C9ngB4!tE;Yt z$}K3^0*+@qjAdbhDm1X>c#MO^pj_#^?+XL>OKl|l3fTIS`8?Qjm`OQ5uP^Nkhk1Kf z*GlI2hlQE-gxZ>p@v7GqPwkF6@$qK!c@*D=4)>6yb7KdW;Y^Y>szyf3XGJ?e+jo-`Uw&tI;ND z&`5a*XlInjWQE(VVGIMrh&hIH42QLfrH0C_=`w@Fc1ok1p6=}k2(t9>{`q#GGJ262r3*HA`Fik_*8WjQoB?vr zJZjjg0+O#t^}N_#sPk@h7J+?N=EjnW+tR{}$5Hiqui(T1sI&kt@vAHFU>s?aU4Vt} z$L+!>r_CM&0s;alDJgn8S6e0EWtQZDEh(!ARpU{Wn}&?<4T$q8FEzL1o2SxvGnuQ&|eXt_N3ea_V;k(Jbb#uVUhmm%U(PV zo1U4<_YPOf%h)m4Q0ciZr&n4-BMJbBOB-uZ9jx0P9vL~J3nLXhzL!Fy zBDpF*++2Xyr@~+}vV)M0vXXhO;?!x>zo@V?g1kMvg|(v9=u!U7&X8{k?OXEctE=z? z1_TRjy4?=b&`XD)L!{9hu0tn>9)+m$ptn5hg&hkP7K=T>#gr*^H3l29yhOC(&cNY= zZ8HAXq#%Y?Z}%UQf<$y7L;&Krcy9gPEQ)MA2dWf{5pA?gIr2cSw`{x^KzyY2Qx0 zCf*E0VyE^urCzfUY5x-!V{@puaLLjs9rzInAd;|_5EBQ*$R>aH0I(1NZ(fg4x-en1 zPoEa+OyxdM@(`Q=N@2f>4y#?Go{TH%s_f6tT0=m`!K`j))15=?y&p8~Lq&m_CZTS3 zs0)MWbe}en?|wg*%wX_w)^yYB@7~`o9;$4^@9XDBhxAuM?$a8Ip*967SgQ|Zxd8Tr zd!-qgwKL<1+AChw3qI=f5(Y<~#nSmSs}a9Byt6#60YV)zIX@L zn-$W{ys%CU8qJ3ihrknJX58GowXs2r6)`2mY?NO*<`DU05WX03$#B=xIk$R9<|`FtAci6{GXMxbdyLrk(s@*Lz(E~- zQvh+Zb8ih4SW(~*$N9h<_pu&$H#2X3tSS0JhO9k z8zn?qt5QPXtaEC6`(<)V^nN#A?HxuWUFcxx9$Gq=nIQP7?3n0%s#M4*hV0WL#BuNq z;nIrPKT^C5lU6JOg+!xUU~5!RP?U#Jgd4;|dB9n5D21brLtJIdA-)?4a0?T^3@FYKbbE?C!|uXUt!L<)y#j^V4ikh-^-*Z zgqj%KxJ^&j04i!X4H)dk5n)3dQ4&-oySS%9Me>JE(wH_p^G$zxt!)rsL`NL*b4)q4 z?LA||`i*z5#YMcj78*HTK_8BFYr^r+$7Fx!>#hJKr1Gt&6jGsgGvqQ(Rp3WWP~T&K%ZTmK(#t{p}*` z$l_5stpT3~QY<~CiquP0EPyHvxas#YSTAC5xiZb&nbi7px$S%fMz z-P$YQA4csfamj2=ffj-vprYdM`B_?7k9QsU!`NY__OT$ z%>ZhsaQKdBR4zyg!0wxutq=fY6mSs#=iqDG2W6V8ZMcd67SG0Ns`&`n+w>kW8P{Fk zD`{B=>wzCDEY-^4gzTG{w>LM1km=%M01Fg-fV2PDSvK{iR9*aKck_*mqtgE*TE#(RLNeKv_C4UDhwQj#|ANsS zTt*W4DTu`wNW`Fl!WvrN#-_vW`ICc#0}Z0$kNf+tT=puGh=4uLL!Ia7#Kgt!?|_-7 zr?*ojrUoC(Yf?L{G#>c90Qb)&p$7&7D>^{+pq=@BX7SU@_0O!2@BZng3HEI0L!4DQ zOUH9Ic7v;#9yAXoi_UWR`PLR&3PH3!3oO8N2q_xoxJD;SJ8bON#KhPLY(O5J4<8c| z5CWIYt59M8*JDgn6i98UI)a*0X9M6+k{NUW9K*;!2q-gTdcy(zI8uQ+GP20?qw9-y zN8iQ|&$hPJw{YX@Kgu0XG=FNaW@$A}k^geLDK}ecz|^Y$gpG9zSggJ(6kK0lW8?${ z<=Jgbe;YI$!~bF>4y69^vQ>p$wN~=-^7i)j0kFsEfw+0HPf}KPwb`MhzTOGQmZ8)&HPcg5t$=4k zsYKSq#6-D=4CQ-b=P?5HIfb;ZdSsarCI|uH{!R}tt|OPyH!O%W1`tqFwFhydH;&iP zzQZtb@*SBHkFcuhIDJvw(FFP$D(gP41`8}S=0cN$2X$T5)zvZ?>|C6jsfFodS^Zlx zr3zaEaiom8ZMh^AZlj?S=wxVi6t4xFB=3GFQmfa&a+i54!@c`G_Tlvbd=QSmpq=X*pfg? zgMf$#@b9u&0Y>1EkdeRRan|qupj`eI1?7wR8&K1=7D5oG1FTxrXDWCU!ot_-t~Xi@ zxiWY(IzrQvd4F9*h4OUyfQhDq2{rC)ii*k`jCz|pQgrJ2t2IJ{{5C9n3<8pLapFCS zH8ZE*)CELj6p;dKR;u-(y-&(T;2*;Nah5PkP5-{Ya(x!#3x&@MpGJ2RVu;dV{-p8I z)N1-#``m#i13Sr@8{fT{B5rq-*&Izc#Lo(QZk10}~_n0(Wy52yVq=`Cgh zLA3Y;zw-=02!sA##t~VMbE@zaL)~i=IydJ8D_MLd(S$w-x+3OZ=iT-itP$*gSm!B3 ztA^yF!z`c-$QC`|p=Q|dP@V%K;3OL2ba|h@>`c1sZ}vz+^r&R|#=eAP)L#+jOJdE! zDq!RRT^9Y%m3w22+sCLSWl^%uR4&UZI%^_bp#g9QULpc(1%cKW+xN*8@d!j_9d{AT zvh&eGMd1tgiIcG>rs@xW|K9RFGP;tNA_<3hp`a!zDV~JS<8(5igjj?wLV-EX)8H!E zx;r=qD@<4!BhbkA9S$r~_mDc1w$08Z?~x)mIcVP*MD_ZGTpQjTjW$LCeh}+s98rX1 zqHJof8xTb?F|)-)-iwlpTn0^f!oQ!)4h^=&1-o6Df&_`${iv z)gf>SRSQ+`YUViM1ho7@&yRX!pAC{y+X3F?mstQf@erL=4Qzijh(ePRZy+D2HAw~_ zh9ci6&SEg%TcfOjzNLgGex#p%MYOZrR4iAfrXh$YQURYft@}8X;lob^J{qE4nwHuc z9vmGt+AK`3u0|t&*e%ge`tZumqWKz{E!+Kh*f1aHnLEjQ5-_AMAd+K7Y&y-?7Vu+n%m& zPmZRjfPz%%&g`CeUB7=hhz>^H*aaw*zjtVqrrv!MkqpJcjf_!gPxm^GF{7b~XOzAy z`Mr!VAxae4(Pn@_5S2t*V@JB$E=BO0iddn~K>GXE+lw$*3892}PN*i&cPU`~St|_2 zXrI^KfjBA!f5e1XB;%)u99lky5sP+&!S^_A&-9-c-sQ=+2zE0t7R z;}P;Jdtv-F70u*}m;o6W3BQvb0ApBJ08e5hj*ORvI6rAzXxT^jL3N=w+)iC-tmbcy z=0Z9JH0y8o3_N{_; zd>|+CxiDT?tUuDs6AJn=*w>rq<8!wtlGD$a1Ntp!7VS_*%c9%*L7*)lp;fPH^K* zxpKW`qqXfeNNSPo*kir@a>@nH^YEuu9`)$RF_q5a5HcWcr%^3GlwVm`DF|6>Lk7U3 z=6mzQ*U2EYvI9VFoGEW08GLc%q(waSqitatpUm zdbaq*ICzE5!`{oWu=IM?^P!iGb{;O(GS&KMt(=J~J@TT$7iAjHrJt(rTi{J3plKEB zaiib21D$tEnf_1F$cv+Ecc0IQL|Q<<+13x(9pi9T+74n3QDF^qq zmwqZd^-KQCpmlyM+N}EPg;*KVY`QE;W9VNV59><~CZR@jP^uK^ynCmim|phG&(kGN z`@@tdHkn$l0&ADGlv&H}%i=TAgBvAfuTt3Fdrws+!5k3RvvH=E9}%SAoP#VLG)+iR zG$%1c;2NztR-5AHemkUmBYX?aI>oeex1g{jqY$B4-<``74YqRMU-dcM;kmQPUY%XO zG>{vgDHCYtc4*r9>fYWck9`2f#S+!Ene#HZ$$m+(U1@j!(J>aq<@QQ7se`!9Pe{OZ zR(>!$_1i@j%-UY7KYt+?bnaLbPfa$-R-M}rb+dK#OwGSy!82Q`TqN7tk&ylR>WiH10?i6 z96a>#=7N(>$~kP7&U#v1Wgs)x8Em@`Pmm|)Zp+uX(esJrJ!Yu4#_o~ed*`PjTc|Du z8F$Mgi0#3#+}jJw$Aj~G(Vb(9n>($xGGO~w*mJm0lDRi2dX(1sbmOWL%-qVbjMHlL zs~J8nnNwpFxNpWGvh-};SE)8K;c4g1Esr}O_H(UTMpm}?sy+?#Wyx3QF*b|&IkWAP zPKsO8rP;n&V%Kk9BZ6;eicT7Cjt~+a5_s$9)dWutr!SW5c}C3{qV)L3T9XS33LM^d zvZ+lc4<5*{F10GN?i_tePdVmy);K;o8Hx#1*b~j6u3cEzjR+n^f=p zRHeB%p08Indd{_W;9NI%ZE;BTXYUhsJwfviTa&7qMb;5X!<6U~t-ADo4l$FWGB={l z{jvU*p>4+S!3^&`|5|?6{7&{97XHW+5y2>2K0og>=4aaeSf))d6cl2Dgs`A;(To18 z^PthCW=Vylj7p}v;K?Ak3~`z!^Cl_J5t;4*CmhhN(L=zMQ;h4ACp^S1``ER5sELPObb73)U}F88Rgw$2R~&vMvq zD68O_Cify$t9`aSF<<#)yKU)RL8)|0BH<`=(@JOBlfO~?Pvs`v43ec%PV)SZ&CTJT zD-GU!1}r}Dv|b%;c`7Mx^m~M zq?@Ed%iwnKELr-rO-+CrZL(J7pI#!h1wPhCiy`ny;S8l9OF3eRW)A>SlYz_MkxF?) zj4j$y#Jiu9q5#ct1995;uP7C*fRX8>}9g3xj&tbzryQEWnGV zBDds9F;Q{!t5@8MCTTjaT>O_Wvk8nxxN|9FQ3~GMW~B*f=%gUZhsh){Yj=7}TiZuV z#c@SbQF*;#S<=(9gIS-gbpj6^dd#`Rv;cOL6~}1Z)6oVi?6O5rc=45_5~gK~h4F!6 z8Fja6OXf}jZ^VI}Z$2L-W10#Gd#q7dQJUz5KJ(+_=AvUb>lu{-D~7^5kc* zd6i$rE~gqh+q9`t7cE!QV7CnD=i^$ss_*gWD1kLB65;(m6%=3&f?P+a%KC@EW5*f! z+lu1`!Jd;n!B4l<2fL%^ag?2I9E2qJczQj2}&r&#H2)5<7-JlJHJ5>A@_~?_-avJpt zLPAjj{;%L%8Ggc5QY1BogS|usV?Y>eX*XCEKC(Qm&L$|;D#y)Ze%Eo^07wu?*U(~q zmX{olSQ#FD1-}~UR$-U@R~*c_Bc>-4H*Q6clw%+oH!+uN#$T0!PmmpL`^Y&I1J{Ot z2=sLOckWqk?$^G73e&s0qSt)=BsO)YFzjcvE0yx^yA-jEN^H@J6YNa&f+1qq=hWFA)taMOBK-=2R7|e=e3t-{E#U1+hd1FWr_zZ zciRqd$)=gL*ca4O9VgQh;(!jF@K0tj6@y^N^G%$_d=j+)O{5~bFVuPD5hOX{rt?z9 zI^Fx(a^x!A{osNpPnaSJevQ9_rDptTk>8MQhdALtN;r&2cUe!sv~Z5%;uWiA_7iX{!loed~cOFVb&iS5@U?>2-%&Kxe<6xIL=6p6;uURS6 zf)R(@VUMHzUO$<;MvF1!n_ zNoV0LZoetK$l&k)$GMBgM)Z_0ltM>zZiUNnBEu|>>G9+#&*Ko{JoL{?BtDH%M>R&u zdR>n(twQ$*a8V2Xq9T|~ve)`JM0|ow=a%Ebc31PCqUHRzCC!&Guukqz%wx4yNf*Ny z9F!&=t;puOXCd;JNA)BBDPV^2_U^iuwR67G+;v#DpdbO27#|95>+$E`%Dal!GyGA+W5 zbxGY>j|`TSF-|Q;(6V08-e^z0q#o7kH4I4*P8OBfuAa|NEI8~|SPR%X{*0y9DJwSZ zEL-5L`#*HgxHj>pz^pF`hJIFOb%*auu?_8T#dPX?JtDZe9-F&kTVg*gn>0<-+nm*#ayRB@Ez%slb7Ss^IPyO? zA`naQX{fY*JtQe%Ed z^1S>#4Z>xw-UeMu0oJ$Pt#SR}ouJMRU57ibPz=O^HQ-KFYPs=!^7yRW+X)U2b566> z7d)vGx}=pMpe{9nCw>rr%^&yTW_ZFGF56f%KPpUD2L6>ys&@LuO@ P0Y4HVvcgqD`u_h9kW|U$ literal 0 HcmV?d00001 diff --git a/doc/doxygen/chapters/images/trace_bw_heatmap.png b/doc/doxygen/chapters/images/trace_bw_heatmap.png new file mode 100644 index 0000000000000000000000000000000000000000..79692d133c35de468cd7555c6d4d7cb550c8f34d GIT binary patch literal 5919 zcmZ`-2|UzW`#--KOLhravP3FmiONZQ_^UE#_SVY~=g zf3CWfJ;Ei+P{@*{?2P%JvD~`e-ua9f&pGG!JkR+)-{+j?nSYy}(BonkVFv(k8R#D) z0)RsSK&YGWa0JJ*rxgG^Ff}sMrPJwfBr!3O@r40^(lI~>V`CTrymm9 zerRGsMdiWRgRH*>Cd?9i9InJ-~DVMd`X=0lhjD=`%5Kx2({qD2QWvx-$`#q^E$fz_D=9+?Ck% zz?5L_q=fPVl%F3Zz&DDLF_cV48U7RKiSSLPC%Vxwj1Koh$Lb2>C+HK21$CA5$~r9P z2Az;dmn<(YhsJ3%+S1a}!otG*{QT_f?DX{X)YR11uU{u8C&$OfzkK;JIyyQ$JUlcs zG&nd2_xtJ7r@p?v-rnAxo}R9*uFlTRj*gD@_V%{6ww9Kb_wU~~H8nLhHa0Xg)Ya8N zh^nir-@JMA`t|EquU@@;`SOoH{(#U`R8*9em6ev378e&66%`d07CwLeJU>4_H#avY zCx=3zWMyS#W@ct&WITQPG%YPHB_$;}IXNjQ35GHuA>r}k$B!O8ij9qpj*gCsii(Jc z2oDbr3k!Sj;6Z3;=)HUQ?%ur{92|V-&Yj!0Z{NCg>*mdyfq{W2io)>u`T62EPmo8npaN&ZZqocjO{rU6f?d^IVPR%w#^dog91c=9?pOPx@Qck&|BO2T+%Fk_NQ6AMC;(et8649w^M3TD ziFBCsrJlswm^4Onp%lyu>OP#o$V&9A`mf-37h0r;OA&qyj)SBwr6n1O$Ra7#`v2;Cyrd`zM7G(Q7%ck zW(_=Pw6;}MxFyGQ@~-U`5viL*8SqnEYm}oDs8$JQhFa5Vj4fumhIg=_XP`;fjO;#^ zoS6GakEVMRhg(OCnz>6F*^-@!-0A{8?>BF==R5uE?Ra|8eynD9RGvSgJkn{tXhLK= zcejz#)QQ9GhT#iY%j0uCwJ$nu;*>}DahWyM(e=2|psWjr&Xt!h$1m=fl7V|Tf_|)T z`>SkbI;mHHtmeE)gH6Li5eq#o#^cuUw&U3WzaDiH=@05`8deJ+_F{>eIG#K1pl?TC z{LRdBbpXq3W^HW834Rt+q23)Wn1h%mvnmn`tK8yuk!& zHRuY|)`t%S(R|3jL1Lw1)%(%Fi!dFtRAsnkh9~VjKI;MO(Ffo*Vr1~qW#C;2_aW*T2B4;E6t zuOi}>{A8y`ENI{x`>F|~Q4Owo77hYT&#Kys7U3a=A=* zJjZ%e6kJl0!FnRtJs*DU<*-HwUxrwVuEhpTD)xi-=U9N+2{ToH0klI)37br2LOYJL zHO}lUe4r|0ocA1O>MM)*8_B&l(6>I9fv3L&Li=c-&?ZRsFWfLg>u>C8q^2THAg6>! z<6zvu(+VgYhpx)O*+^oAf6K;wen%tnXK0ukq7||!{$a{Yx&-3y|Oq65Ahz2<-#st|BMKmB$zti2pq6%7`U? zTTj16+2(34ROdL`#1kT0t7~(VwF`28_JwnBYdK{sw_mkTZHEn>+z8a2KgpffLbJ3$zGTd&!@OIOml6HWke$TDz%F$KxiGXSs8%dMsH` z9X!=TH0z4ih2kc0d^b|?OiUTzSqY9Dr|+t@QyzZ7WKdr&h&}YJIY-}~2-AS{Rq!%S{TT}SX!zAMNLw~Q2w9M+M zQHEMM7+5YEw8$8{_QKlObEBcckVrDd>4*`g;W02M!UoO=}&aF(dN#g1sdg; zs*Elhnug46AugC40dtn1#X1O~Iw6chJ0Bj7Tk%C#Qj;&h>)H{S(~+IF&{!`q@YIW7 zxoGW7uQuJF)g!DSw_rotAMEN87OS|-}_WlM2qYo%09XSOqs z4@H1wG~A7^iKnEs#=%v6dyL8L{Aj(m;vn*r^GgV88}5S1k=p(p$?{(!8)taQQ@~pU z#^dnCjO^?RdPnDgjdu~peQ^d9iz(vu0*<4-WUsPq-==o#xCCzOMWYnR8vYj|Np^}m zzjnmC_4S<{^OQh<4F_3O41emChup#Id*ZF{bce$N=SfLaN!N-^Z(q4=@j~}hXAZB! zijI-l)>(+)*SB0NR+9Nj#dCzw&^GSZgAs8<0@fdc1mUgUlViU>H4l{7k}N}`%9JV? zSDtIiUhl@Z9Kr+4!!q&5~PtnPre-}7p{awz-xO&lia_~W7*qGykHolZ+m=z zbpw2u35T$*YZ3rsN zhbxT?4mcQUN&-dtFKy<2?N--n!#xP#?D9Pw8<*HCyK0OKCVgel@d!T9p~Y;y%ZZ$t z{HBZ#9)}6)q^7xue+$7R9FG9yE*yV#BPt}~(m6J2p+4I~fo{*rz3>!vXG9!d;!`Rl z=9QYP#)?_s)w>l{7K{vBxltunQwwmA3v-QJtIKh2t8&-|IsBjU?2l+LngpoB(oh>a zztzi=SC5s@0E7+z9~La<1*p(rN58F$NTVtUg8jxc5jv^O?uh?m zelWN$;u9SKXB@{w7jePSFnF%)77!|!15ZrMw#+Xo;mCi1&%&SzmYkNdS7-(OkIb85 z!tDINT?3pScB01GtkIsS$#;6JHlem3aE(*OFFK#fz@x8kRuDdV7PXO<>DvBW{uk8H zVO=*D_+nkH_s;6;jic zkX)!s3tWPWF?#{P+4Tz=j3c)~{0ukZw+fgdMhVAJ4>GRiAMv~O6Z~pYfI1Iqy8@>u zI0U_0M@~f+)US&yD^c4@jWSHZ?d~+Tz5X#v7C2Z70wq_~$ewW~EVE`4^;fSOuxo+n zNkss%>gdJSq9XSeI~|JaJm{Yfjl(YE78uD@VdQ-89m?f8dyYA3w#3Tt>&KM;jH{IQ z%vLQgP|Nig4N2l}n&MQCaK}O~zn;4tz`J`2d33pA|7BKVi;nXI&&pjVBvvYx*&d40 zXV~hTe}^nIgnMW+dYCK111}*sr!T1y#tn{KcC1W&R)=Ma)8&hJ{i^wKr8caYC6k(o zUE`6oC!(&f4O>EDGiWUEA7pyBba~N6WIIR^M3FwPQ&}8{n>JBjGB8rWv=oF>wxozn5_qC&=7GG2UV-$TBd31ZDb0}wD$kXG)1f- zY$I{jGfnlsm;#;HzR?M|Gmrlw&bkxED^3(}fhhOwGTi8yLWJ5|n0P~cr7cP){vk#T zEoU452D`~+5L&@CzxH3ko!;v!O=_Bc79xIFppG%V3+SJjz3+7O#UIbV1P;Aw^5i?B z6j@NCJatL7gx8qT+Uvf3^V;j=vKVb)tXP5PhDev0Uwa|z;nGz3#q7hEug`l;*iRQb z-M=BG$B4JYZ2m=3%Z%n;T+i9SU4o%Z>W8z7ow=>3r`yF#O81C(J+BIW>eI}nojBR7 z*>_xCd=KRg)5K(7P*2Fj_(=O#@83}~wQZkUs@|El6h80Toj=ojM@i8)r&Nj$dmWNz zH9uj6?W*~B3uo6BVS0~sch|LtTfg)_=ZW0vXxkuY5iRr@<%w#Guy5F&XxgLp(I?@6 zXW`>x`_Q!oY7g#!zPDf1**1%En=7&^-10T!ciFl`zOI{j9=jdl^yE?C!E@)X(0*^q z?O1N!7GO6Ukh|BWM?ryd$4hXJQg*N>`x3QRAVeBm74bK1HW{6f4)zsxC@Ym3uotqu zVqU9Coa_~FMpBF{;&eVtCydha6`F2+zFiZ7pLo|7tG_I+KoPsoX&nA*{phojW&g&j z8bSZjPCHE*_s=gK$_lDf+sq@_W)i{?U^`d&b#xj#ny|q5Iq;var1v!KYKnFB6I1+S R_$wbU&^>W1`v^Jk{{eQ7LSO&@ literal 0 HcmV?d00001 diff --git a/doc/doxygen/chapters/images/trace_recv_use.png b/doc/doxygen/chapters/images/trace_recv_use.png new file mode 100644 index 0000000000000000000000000000000000000000..bcb0520e29410d808122119c33ad89fcf9321416 GIT binary patch literal 48867 zcmbTecU;eX`#%28Xh}m9rIO5|jH1%eB3YHTlnQNWr%hUtgltXG9<-!ElhD%A9@1Wv zmg;vr>ALUx^Znl6&mX_{ z*`2bwVCQIHbDm;pV0X#F%Fe>naD&5n8(UMWiyQU|?h@RyV}psE-6e4$p?|+Y(8|VG zNYwK{B89SnaztK6&FRTt%Vk})^9$1R_1kuZx2K5lmu=Z})lVnO^LhA@Tj6SEeVWBP ziK7P@dDH+atb*_Zo%e@2fVBI-TpE|JW_Fb>p8`9P5K{*2Ona z9jhFODJqhGe{Rpyprtg7iuO;RJlSRQ?Y&MwdUi~v*xtRb3Y?{Xk1lQXSa3_Ms7-ep zo1Yz#+j6I4B!3|(!(Jn8ZrtwnhjSA@M8fu4aZ5-@C~Ih3NRe8Y?%~nW(J?H&$wfVI z$4~M2+?g#RBI*M_-@SWx$;RgPy|8aCACvRvuTQpl@JvrntNY#c>*tSZWc1{}ck7mC zhGo+|fyvR)7;lq)A0GE9Dgn);nnx)XZFyYB9|`YvRi;*}`o&ti=Iht5TG@8nSFy5I zg1!Z|<61a|G-y<4kIUOr@EQ`>3xxe3jpk`lvj?}Jm)(h?%x z`^9AJl1c88kd*8g8amTtP{a9D+&*rIIfnY~y?c?*o-MO^os%KQE4foYZ~eV{_nd~( z>ls=dot#A3c4&E8<wAuw+~vcF05=!GhO;xmywBy$>Qrvg{an+ zH;yc8)+{$RHh%EvQ9)xPM{8?qL(b(_hjMFb)W%2Jc@h#xW7MVo#(9EY&t*tN*tB{Xz4wwrpOuB5AMgKIJKG)W)qjcI;UD;NXph2Cchy@0uix%+JkgrJSW|%&=VXTK6XI!LTMqr8}ee!v{J< zl;VjKB|j^qnfC78n`YU>rtdmdY&5BU;ldXD%ss&~*O#wgd;9h4I-A$)XD3H!C^2gM zeYqxvhF9u$aEj{#9C+&Ft}gWn5wS{uZzDw$&ze3F{68 zaR*+#x+E~0KZM>YcY@8r!s6`3i#OlCJ@^~DD5%BQbNHxyjmJ_ZeKy-4-;OM!p^=MK zlNG#QSYFOJH#1R~ZeIUPT>WxeTfPU+KwV;CWrUnad1$_QPR<7pA7Z(vu<^u)TXO|! zO>vPkzlH*8<247;NBsQEK1Q{jCJS-)-IA==uaA@p3JD4AK>WY;GAc_m{X(IXmzQ@7 zy6q4UD6Eb?8r_wq8e8PY!Y3@%c7-NvEP!q2)u<@GW5(ko*N%Nnp4Uq? zuNSoIuc5ne;ld?H$I_%TZzvL?)Y)&|L?KvvtD^(Tk23i6FP$G(FO98OFLi9ood|HRF0(irgjGC+{muLCY1HMvjGj9=@{{H?ZH8Ct2i8|iLAMcZN@W`2Lb=kIK zM-hTPfbZ~4#qOOtX)!4+3EHpPM@E8>hR3gBtG}cwhw|h6ssprSu-QIlk*8Tgwmw$eLS6|PLd?5Sk!e?&I$@t{F>+ig1 z_#`CO8&`xDyNu-e+V+%p^j1af+P9CIvO_10*7p0ScQGnayNt`%HzTxoTt}(&T*nmF zA8=sc=H^~=ap9;??lqF(>XY>kyjVj#B>N zgUo%wGnL^oOFun5z%D2#C~EVKmgLA!pN?(Xw29($sgh^cS&IAowDGYpDXzS{y!T~g zvd4}w{Qmv>)}u%C*tOfApL%hkqs6hU_|wC^RSDXAfA>f)ynC?QsJ*}cdZ?s}ltY*s z?%(9g^Oe}MK3k4IYJc4mT8O+?6|X6z`^I4l5G_5u^I%@wje-K%@u4Pm>>#bzHk%)bnEO0=vVO2F zpC{9*?Owlnx0}a!uS#>a{fhbdd7UtQyeB6oCj~kDTTf3H)NVw&CO#ew`-l|xxVUXr-ix6-H3+q!LO{yBBCDiId-|z4XH-Kh_E#R8fTj_ zEUPDZCZdB_w;X%h6XtRA!=0^sqM~abK78mjk~?{+{Vmn+Urkmi>FLWUpF$-$t*x!s z?!UO66bRj1=QT#9Hx;a{cca*y5=`ptHJO^4Iy=!{Yf>3bbyq2Ryvr5Q@Myo)O;pFT z2&~bcKMRAT=VO9qqf%d0rE<;NyH8j1$Pxc-f`V*#a4abI z)lQ6_yYP6$#KbrzaFYVMXU;_LIeGFV3kQde!6(b6j5T=IDy#`dH!^3WLI7K2TACUy zZxN?ePQ~_ME?d=(34P zOiD(^y?a6H#Wqs>n52IzN3|m9dSm?_NVz$Cr&O)2tO$$5=g zR`<1y!rFD~{QdlB1Dz2kY^bs9*oy9hzw~pQR4B{~b0Z$!%(CmHT(v072u>1Ss=0Y? zv-JA<`lgprP79iwbB9VMGA@`6l`Z*f5S{h(kMmRqv$C@MkTlID*xA{)atNDjz$cSb=7%!d z=qa7PJnqc0wAQ_qyi^Af0jG_nv1pOnuPzw&Rz`4ilS|vOWlJZ4jl0TyLqbCQJ39@7 zLqaxF{GJ|gRH}$=TYB`qV6Dks>rQzCGc$fvA>999Ywl?Z^T+!_^)Z>3I%KI178Mn# zt@WDO+f|j%Gtu@IxBz$m?D=zg3c;G<_3E{gbI6R#D1rIbyz?XJLzNL;wrc>DGiTn_{2{sK0Y}LGwaq9HQS_}$73iPkfJwVW8NqvbP|4b4^+`w0o0^&~ z@ieK5^hR~KTjn^}z)SIa_Uu_{$uTu{Yd;sR$+m@g_R=7pdZ+c0&f}>=1UuP{v~mH# z?b@^FYHI54y2R7IH*Vapv7a69i?ZIAfI8^Z6}bP>Sf_%iq8jpZip%81Pfx^H0R!LW zFU-fM-V9(D>ij+ZbIN03WPysJq@?t$R3JI$>Og&RSAq8m3ilJ!`dZ`Tbc~F3S`Lnm zG?a1tjqYVVy-?v#Zm52@KAc;-&*DMJJ`t@XK&dLHn1Rsh1ii4mi~Kb#&j?uUp%hiDjU?eRkxIT~gco_q61f(QG`6 z_#Pb{t(EUCNzJg9!pZrVzUz&37j;2lp@Ef^&|ILHxcCy!y8KrNH}f4XmoIk!+gISv6m-em=&UKa(A zAj+lgvNaT@)>GIHBuD06MWINo$rz}PUUA~7 zxVJy+)=rOwSspe1kXE<2Aikh-`nKq!>{)N#bd~aWEE{d#Qy3Y!k%3!6D@qRlR`|jL zj|LQ~q;yy0S|`AEvah&Sc>EqaoLKs9>)fE`Ktn2@PKJecQfOFMSP3n^{p-B8{eJx% zTYBxEym@mVCpTAwTPSB#*4q+`#dn5UF@0=Q)i8g_=fZ+<_Az+U%Thc z^b6$9qWT8eQCd;#36T9Y)D(rBLu$YKbT|9&*^zt>I%jk^d9%ZBZ06#UjWs8qK7JhV z`?u?Z2M_MXKfZJ4l;9ytx+Gmep~$7 zRKw)m87o=Io`Qw>Non1Dcdn>IfpIBkw6)hQS+XR>vS}XyH`4P{d`J~OckiyIRAvVT z4Kqz#bL`K?)41Kr&J;hKzrtJg3i5X(;`V`vIa_18In99s2aweie;!nua9corEI`-O z6D@jjX0o@l^D2<5Ostw=^|K?FtgWvE1O(VwelG;Vj@I(?V>sS4x!=mlYTN4T^i2w= zkjU_gM~=uSD>I|jsC;pfCpRfp!{bGz!|$|LukM6uF2oP1Za#Bg$nBW-sW|XqI*_|#~X759} z6#rQBLAI+TkcE>o02lo2`*$VdFTmHAfTH@2Y1`SQm1{q4X>NXIuTf)s#?ah+#lvGh zVIK3^0}P!r<@}*{?nI5nTOBz>Iy+Ju-nCzV+luLVBpTcQI>e;^)GVGa{!? zpGL>i_r&5vF3;nAme;GRRlj}vrY0(V#n+c6+ph2GwQI|6@JMsh(b1*6dL?)8;6d-y zr((9$1WX$mZY(G$c=iKXvj3H-&ew*9yDs{anT0bktks7X%F{Q08EVQb1sdIO?Vz)> zI4FqSR;?2FIfL(?9;&;yH8nPNB3&S%%VU)$uIuXyLTuU(Cm500PM(3ki_wlU)4v24A5IO zym1sl#f;r;CjoqET>gZ_e4TqOo%5wjyAUhhc!-G;HPUZuo)Ns4ZAZ6n-##59Wsg{8 zrV5Yoqh9;4FAJ)o4qH!+8n}XgpTG2}z(3f1x$&oOFxmF+T}zf+JW`S|#jy3LGJ zjkcfI3|6UmG6}SX(#jS0Kb-5n=|z72PQxmd?lA^uF30bm)+N63%+AURH~sQlrwQQ0 z2*C7#sP#s)rhX_dhV#Ft304r^G-m*|rnlwI{#KU}NmI7TpFI`k&1Q+%(zJLYXonIK z5^ef8BJhFY*NAy&c+D!^1ZSTQo3Z?^WM=-d~gJ;&_8|KO>f07PSS>%^#FQ z7g}dg*Qs5NnN~qaF%k}vxB)baEXdFTpF3M4tzE4Zg-XOAX-CS56(_gi;&M^unhwV!2c&SerVu5NC- z&zG$RVL|Hh`%j;W0I)p)ucH8Joqm4apK9H8i12|#m3 z-O|M!J;X+GelliUY#|LNLE@u7e#j!bC+WVvD(Uj8opfM;J!72#LWWbL9fXcTonl$P zo&k*1ovkOyV)^;`-KrBi_nuk)&>^yaw(;}xI}U~JLm%M{*mI%$2_5gUgv7)#e3=3A z*~uJBDk`epudlW~KN)rt4Wg5aOBeRaJFMV@SsNNi+!QZAKmF#-n^VATqP&r?4s?Fw zFjNqy9?#6daUIA{j`j+Efmd3Z2i0P^^2B8BjPDDSAg9ZhCBHIO8KV48p)OY@>r0D@ zinbH*VshPAs7ZO!(#R-kGRQ<;-d+aXqO2*v{DbGJ){f`4J%3io^^=|WQW(QSFh?ulH{MgXnY)YlmADS_Cc(- zKyB^5fZ5LC0Pj0@?%ev=UC`2UCgIE*jYr343@Akaaz$8&Yse8`H`qwOhW4o@X20$C zW8k26Vgp>$6=Y95oyi7`g$D)IDw z-uoXFa6!djyo%85Ce`m4R!~qd9BfF9#GaJha>q2SN+e)*f17KR{eqM@XO=ThOU~s} zQNII(@|XcS*93CzX9HvNz;D1SUqd&RILn)hv>h&KpC$=vk-{|GDJ_< ziSGJVKTz#;kOm^<;fHK&2z1W;FzDC71R|TP!meFKI;96!&eZq3VrI$6+>P5_R&!&{R1#O5MI43Ld-RH)*+MN&Q9tfMRlA0ZS)yU}W7rGG` zx)^-Km4t*Hfz#{^9q0z+4<4j|3=>$HxpiaY^)q>{>+t9mK(hdDs5f;mOTKy#F5Lwo zNaZ-Q>{}3sfcaP7N$VpFJHWYtdFDvd-xxX52{{G&!(Qars%J-7$yP<1?yVGE@NloO zUHSJD@*+X?kdsu>aaCGx9CqHmeH#+c^GT6F^@$4Ug>Zyc^$?f8UrY`v10TW=oP(UZ zyVPp><>3EP(A00~kx##2TnUbYy!t7v7XttOYYgMrt0(0DywqrR{Y$=D`XBPu$%!4q zc#iMO%YD4Pm!I3CZRsl~FaM04cNrv&l|a^BpFQ=%MyK#K6TwoYlP|<&D)8RiTsKQ! z8F~3}c|fY`AZTkH*4i@$jmXQZg{j-Qdc?8XfpVoVIUK~6G!x@M1kOPB2Fbdu?D$?9Zij6e|}6`q0~{Y;n4qn^~e5w(_4D~zE$3y z!fN+dr;8N#q!a(E)2VQcs822sk1E75XwAcE~VTCHVIz zH_Gi*yxIy}xkE2!ZES3;;_>4OC{w%7zNI{obkQ*8L__X6H&IV;A*qxI1p{O%AKLqW zpV<3$hCM}+n>NWCUG6CG_6rI+tfWMbO8X-HGvX+E;efcfA>c7o6+#E1l9&tY{p**f zM0(MLPkn!ErL3hz%frKy+lW$c0Q5XMF_E}SOo*RfRz)Q^EkQLlZoSaog$d?eYr!@N za*1e7(08CEG~~G*fDXxTu@^Fz@Wtk}z=v5j-SiH_&DsWYSy`f>?EBEx>py*baPjdN zG29D(gF?G==gRHdw?EirumpMQ(r~lbHD6zRv97a(1*l6x#)E?NLO)~((SupGcaTq9 zoMZX&<<(u-@glj}i;u9j;H+HpaVS_|1t_DxewBc=Pj#KP`4lG215HAwsiCp408*P} zW7-vT*@Htv!MATm+M49(3i|2_CZ}3%_`4=?+tVF*?0A=5zkWSjjXy$-eAq& zVtBmpmRU89W|w!{Hrijmer37M?#r<9Vw)h=0MH}kFd%k}sh~|>5tz4Lt+a<8jo3?I^LiW}fc#M7q;H~$|3no;mUC!es zPR`DqsGsW#u%*;0Lhjx@iw5nIgM-h*hwF4QF9xEZM&Rc%Y`wTp+Wok@ zEY!}evCe+;fqdPEUQfGO6ZLu2zp~FbHLvR&zhU9^OH;j;D>*OjiKcbC*t<>zDztNTf3^!T-`Bg&Om8#F|bqJTa{HT!MtLhf9siF{@&?Mee;>ax^uU)kh~1W zI*Z9pB?uhAoroHPUtftgF>r~m2D1ne;E;{Yp6crAU9MA?l-oHPHZwG%+iN=WX7Q4Lz$_dcQ+s~grGaKN;Dk~rCv*ZBy^#@{Nee3nX;ML2Q z%V`=$gIma4rKXYUj}m2%^dl9B=~#r^5?=Tx^@2PY>dlSD8* zOQTh_lXPR%Hg4T|*qG;uypNw>%p69g5Ld zH^`9v_%SDz@7C?xSJBUG*s+75r~IiLoB;TNXx~T2wg_Z0@?NX6N&k=R!qK9$w2qFBsP_iIUjV=EmcFPCYViyX)Gb9t%aW3kC=^gR>!sb-18A3mn1v4@ zS|t-)y*em z*k!|GVrZ43Up8NIKq_6mdUZR=1z-{>?Z^n*wr%rFPEIbjw^_ArG1(+D+zzh*(SXQJ z$;>PTyODbSBink>OOy{EKIja*dGm&E*DhuvW7j2UFQpLb@~W4Ym=|gxc}a%g^NX1z z%Y9B9^&RpRD481&Bl;~~p#=k2T@F}B@dPi@^{PT@uYA^G#-!fLD(lzC82KDU7U>yR z8;+Wq&c(>W60GO+J8mtqMAes4)9;fyANR6P?!B~0E{>E~WBZKP9t%>`D>(?qwsxcZV!G8ij}M^(;P{CBA&+Y$+XfjU z(bCY!h>8N3D8j}4cRf`SQHtkNfRA0#!GcD*hKe5X<{uBw9h592>;F5_)y#R&tDy0` zeNUZVJv}MxZ)J{ojzZ~(rtMjDG%qY8(D4qUnImfT=;);fj~^cmit`Hy@RE(V#f@T8 zyIOMbGCuEVSZBw|%9@p(y@Ucn*cvP$aRh*CIIO0omOc*cGw}U;MN{o))!j$_US6d0 zyY~9<0d>efck_u}AT$lzH8bWy_bpYi;F*4p0IlOq4`a zp8*S7u;irEFAEXawsWU~jt&RqZEbCOnOw&|K#{|1Q}657uh&YsiYDpjGkVKX!K|=Y zxx|}D64rO0j!tWiJ++lWgrbg~9?dVji_WKYcaPEXnjP?a6sa1EyrQhE>G}j+u)k@Za4L*9~VFUyP$q{M*jwjB@mLqhTLhOMn#IER=B6#s>!+1+QMcnpRf|j5xSC<^^Q`p;@_>M#&h>^u_O}S$n5RYT3Ws8$>2n^a-4QU=%k`x zfi@EcgwPlG?(Tl{SKPqU#F>OQSc8konW#IB=qq}8{^NN6oyB01s#2^hEGXa?(RD+= z_i1I33g1_5*bUE@C%6T(q54ojSrNb5c<;My(4% zu#zK;V%M$mrm11G7#$m{dUHlbh6*H!K*A|CwQEo`NNA~xOD*o9#AQK8(*DAuWi250}b%Ur`_(WWNBBgSp!MH?5W5+j67HK^QB4K52pnj z7+!%Tiy2~%=u^6EL0}0vG|2oSk7wXA0ZX&Y#l@vL&yDkRhQ+%pOKJX4=I#y-`_PNE zqXdEw?D+oqakVWlRkFqV#jCK_i)I~%VdZU2O`57DSg+jx5<~)l@e52K0T|#a((Bq@ z88Zp$<@kX!k3^VpRM7GAbM9G8Bu}@pitlNy*K;{;q)mGTS|Vu%3IB`conh5k^?#-Q zUi*9p;Jj?l(&X#b3(PE%Zci?_C#Or;T1ziItZF+h=H4gam885)_#UUQ2?Z&Q(1@rj zwVd1k{#4c86UJ9VZ{0fkUNn>rsXZvMtNWs{r(&;lO|trVJtjB3w!>o+TmHmTdg+P4 zP^52Aq()dQQR~hkR{K@JCdiD4@Fhyot0E#I5WvvD_Uzre`@*NS;9Cmegn`k;&)@$Y zpj_BC>c!a1KGw5t99)+HfZG(n>{(M&Itl{IH#u2opH(XZ7C-lZnA7MVrWn5v1(zWR z&_kqIM!&K$+$qxIFG)24zE;KLmHRjBx7d$IqVKuw?1d=xzi$5qXhVjQ`1E zC%K-|&7fnyK}1NUaw@>Lm4w}!9BzrPF~ybywrk9;9^!8TZX&N3smh1p2Si1~LB{A_ zhLQs%ak_|$o|hSh1wiPxU%s3gaX|DH0P2o*`117s=Z2QFezF){KF@4#XJO}J4VR$j zRBq#!L}b8zWg?Un zgUWQbo?Og(kI%_Ib(1bAE2Aek5m$HP2KN&a&4q<3|CS#=evJ3;z|S3U7mj zM@UEGSB(ODyevop_Q!JWdC zL{$NY6H?Q!kv1miGGOI7Bo6+)l}KZm*UKcgY@r4(PV^5(9;q8J)p3IRy?ZhJ*CtqC zi6RR|ksH+*^g%H^A?@G4YZ$x%5Foi3H|-e`vPRFjZ}SfAm&+U+CVtkbAy=fkpHAHR zdQHQfxFO5L_jj5P1u~3Sk7(w(ldu{Wu5xaBc~W|AHF82Pv~xX=IeTKvQ}6$Vo|uqw z0CXu3#3Ao&*sy_uinb=>=j3E5QNjTE&Vu}c2DS=HjusJc|GfG)CcY?saqQuehsUTX z4=7^2W%Mu&-74Ir-KJi3Ur6Xk;)V0!rqn%w5OmgriU59}NN`~h4#kGsf zmhWQ7R*GIiK~5k>NG5(^lW<#z9!;5HF<)RrrFf>Nr?*-n{8IqOL2>R8TLf$`ZXUXN z@%(vjEMRX-7OKD$T33PE2<9d5_z?yH(g4qC4+#4v%~`vlwGtBpNZwP%Es>EMk-7tE z7Sp_2xV4!s?Ue;M_;&5zUmp(_zeSaf9*yqaYg`@<{$j1LiSo`v3TWD9g;$Nk zcgxWyPkRN$r-zBfMzt`UOCxF;;IK59k`F-Ml+2K&e3bD^BqYg8HRPR8tE&|Wq(b*X}F(^iU zLV9wftrVyQ88{Y}e8QIa*Ug4Y^MfLSJX`-~p9dilAPz^s%U0b58ld`X3CbB*9>V@$ zllY*p6v8$`!^Hhf+Xmn9q6nzwR#;dNh?o9^!Xsz>(PT!{ud|z|{IPXn6T-_XD9n8( zR%xzIdo_IRn|JBil!9%PP&vRrK$D6+Nb&;ohr1jG z8|Vk%VE7m7aFB(NEBEw_E9(=qiAG*)w}b3Jh9ioK$ei-|?sNPJrHhxMc~#1;xAM`= zuGrh5!HKE5`aXS99GSc|_uWp_tChMWyiDG&FR(H01dy$wq9W;gZrm835l4eQUh_qC zqtwFhpCM*D?tCC&gWRd{2RmXx3a5Ye5a&CrbRWWT~Hf0ZQe{pft@S} zLjK$8>X)k4xL}o*myqjW+FOoFoHV425bb9Gsz<^Z$(t1fUBJ_%Onap`fPElm>vSO_ zw}c=1+z2GZZQHlMLqcX@X7s)(kYlB(8c{x(Y6=Y5`%RA74zkNFgR~fN< zAQPC^7l0II7u0nc{OY)vR%hh-Wb$37_dMEX8RyuA3sHAEz4{D2J%(v8cOX8nUd#q& ze_5b|aA*!NV@|4yQAdD#=>&t*|SY3NVI6|ZAq)x0tsq^qz_jt<|P6UgGyW9 zql=_a-~bN%K|C+(#Z-;oK%_J*ySp7zP!tN8ED3Y_r5!z!v{G+4BLbU<8J zdgs7#^8IR=y~ir^=YE}r{I(3K3W<}7f@lKDVr@Iz~T4|As>3 zi;(hVus-yGwJJd-7A`X~G{gYP4W-Th^kizgpVB#>IKc>GzDklS`pKzwFD5hLnCu;r ze5YqT&im3zH*4?&CFQ>y68Hf=*QxxqX?kKZznNK#y}Dj}_Mx!)ygVyHqq0=JO1hG? z6}`+T%GZ4XX_ye82No;+S43Z6)U*?1Cj98crN3k6PJvpOImk=JE%GOWGnA6ukPx1T zS}$qIwqH8h&M{89)YQ~eMJ1)`nA5>%6NxXYxB2y6xF>f(cU`t@SvxW^YGi7;1Ifi$ z72+dD53KIcL@E|u{VM^qQN^ezkOqm0pC&~^ffoki#~O#*@Gzx5xpj*YtqQx}iEvo> z#hZ#tN|sRIFcQ#}{^tT;?3EEhIZQpV0-ICfL3FP8nl)=8Vqz3Yyw_&CFRuQh$e$eJ zk2YRA2s05VO{$Y8E6mO~>guist2z#vFS%~wpO#|vQ{zf1LK2EzYNuRm&Qck^F$0s> zxI7o;cL)ZDr6Yx4@FeT}pD=1mRAoa_Wm&gw`_6bZ{w;kZNDwAM2ov_5I@iQ(zmpCE zT@2=X?u5EbErj6O|I7Z0*{ai1_{~P_o6Y5rdWd)AbGI14Nc zx48A)Ce*!$2&o8PCNFR(NCFwS#wx)z5wZWM?%(^6xAz9klqRX7q2WX9`4F~!VLc-T z$k4V7%f~WH66iyemctW>nv6-RXBmaKo9Y$+Le8T9LHB>}wf$W!RwRkO1ukq~^!>nL zp-zU2J8h%01mX(*P<@M*>@{;Ul9KsaH5ubK1{)cvJaqG><%dFTFY0^_R{I3`Fno?h z-4}mn+?hL@U_rt@IHE6yx&;Gg04KH8AJ7#suUfSP3MxheVSMOHwM3b;CAa}A3-$zE zZSuW;Lsr3(2L!Hh5mLeXi0R!_m z_)tU1D^84qE;~J4WPX0uDwh+j?FMWyB`VNKXc>+>H++?xPIQ3-Kn3n`NhrrU(G80~ z!JuH|=K@vRVWf4Xw`_!F7s_KqT3VVC6`A`(->fvCeCPipK=~b4IwQxYJKDgmfq^I$ zojlZDl%lhimMdv!Xc8?^841= zowe-vs>8K>l6-G6W*#Zjjs z-Ln|Zp@QOjTad9g!uJHrL-7is98W^$NQ`uMt9QMVJSMruP2t03A|7gcgeaZ&t6Yqn zxMRi{vltnmP@W%qxH06zxjnSJxM?INou5^63&imV6lw&~XiXhlPI49&30ETjUhu{^ zDWplLMVy;F0p<;=A{DBCz_y1l*+6nK5lAu51(quI(wVBNDws-ZOw6Ht&&ez%ef7O4 zz4p2;NkI?+h;dve0!bUUb%7w4O}TKX9gA)x*genQ*>al(%z&^v9yM*Ejt^+C8IDI*ZVNFfVzKFaM-Dp)_G{f+_qi|l0jWtc5AOIFGwR%*G zuC6Xr5q~)3m%|lQEz$!wC6TlEYd-+c-MV+rhh%6bpNbdJ(L@f=835VH3Br%i(y#)D ztHxrJcmYm&>CG$xw$v|@!E%(1N#S7<)u!fU2C7xzbuGNpDx4>a@tW+T-_kLV)zmxX-nwn5s{;Ah5{ z_fBzz>-7IqTOC0IA(bALtLDkT00IQb8WZKeOCVptfx`RZ_+uaBRpMV{mL)})cy7RA zjD{`zF1@w8?kK2(l~2}+9RpdI7trM^NmubJZr{mG6gj6q4`PIyItI%yu=rJimg_H9!y>L`5IG%EB5+E}x$n5Mtom#{jPe+1}y~ zgMMVb#z6}|lCFepxsr3=SwKSy1*WXP9spz?%)lNTIRPmItoAd#@%e;OD=p6kYAa{HO0b_hA*320rB3Gud{8 zVgu$~%Fcd&QAO@MR+$M|+n{hTtoi3ZUI3pCO!TV4HnJ>le#RW5fcb~K%07I!;_puf z3x#mm({cR@=l`?I^^6J4T#fm}N0(Oxup8 zmsV4=-fZOt@IZjj_wEf14p!z|wk4ye=){9cw8ue1!IP%hvzTr`^UpmiR`N(qA<_P}sEt!s{R-@=wsW!k>O>8Cv+H?cPU_cii@@d) z%(a$+SWmI*t2XN+-X$|K=GxxT5ux%OZ5HgJW{DL%?SFWV;P5bs%c|y^k@~@N^-W** zd1#K#_`eivUz>?F^vl7Kd10KL9ifb&mP43#fe%{fb3e&U6Q{tYTXz&J4}3`mwozo# zSCf9DB|JF1zUG_NKZU&3xF6!ftp^Wc(m%t@$AH3)?7S9Jc|r&)FH8!Pj{1<{CT#7( z2jcepu;-^e6i5~misBvy?c82xxr^eSYM zRsPE$Qb2C6vLo`Uf+~x<2eQZtxPXaM0Gu2Pgeeb?g^f>7v&{JV^`7Z=3x=>RG*Yj* zT48!F+AcT?c^@!}1A`~g92>A?>c`;8mahnv6yc)A0zOo2=O2b}Ap%Q6Dl8MO0a;bP zU%BT_`j;nLoj#=yyvr z)cZGEEMIfm$G&f)!Ii4*Q{(S_iwRF_c%qE5!R4k^ zk;{D;)?Gz8>%?d+j%Bz4&I4YdEamloKW_ST3F|mwK0bj3GB=#vYX+XcaZb08nVR|z zJtz4-h~GFm;KcdVzaL=pL**G9tN4v+|rV2KcImd%U*Gh>Mgp~UH1KIm{*Sk z(TtH?@6b@aE8Xz*+`>p_&b<%kqPhQcSuV$Q`sLefc=9#`D_#qX8p5L?Pe*UH z=v8i3Rxe0!g_!Sc=z$juG#km#{43647!Te%Hoen2^CK+#FZ(;d-CLDyZ;mq?$fzjT z+*{Sz2p7cOCUU|5B8haj?UJeZ{MkDx>0;hoc6K)OVRdZ^a*DtY+q!O>*9EJ8Ir3%_M@wm|7sK>F zqz)F5d$nkB3sf@VXtJ=_foX>&IFAI8g4`J05QzB$8NvJPuXCcKH>2|jXgQ%hX*#*( z5o1R##{nHSJ;micch_`_IjG0KcZ&$DPYSGm#yGeiX{kERm2L)@;VcFybAFAEdZO{w z@gi6RuQ-z*$E~d}{f#o$J}?jf^_35v#ia&|fvdh_`cP$8XD2aSq6+-d~C9!Xdqx>iJt@92&nwf8(C6HWEC68qsptlPjEJ3v#8~nPLFoFc~ zV>0GYAcr?LDKbGs!%DdIR^qu80q)$wsWtdi;m42hZ$p}?&W^UfyQLHj=q&@!^)uD& zuCA`vTlW3EJr!-u_KntW9~*}{_Eu7XVqt;TA{D9;BzkHHQ`;Qm;A368+i=;To9pQ) zxTv=X2~vR}0=ss@jKhMcC!P)LPl~6Ac^#ZFYtVJfg)`BSd%&xVKRQU;1#3n)W|X=} zU?FmD43uWQ9H%Sq-@l){=`3&uwgyle@O1JIk{$;})sRrz<R+O{;jtXgtyL$ZZT> z?F1hdd8u3uv09dkc8r{cf#wY8g@zagi4=y`S~@EjH6CV^t@>m#S@G!6Q_d%JbRJb3 zhr&m?#_5EH1{-9aYxR2`4tg0a2Ds`ASH511HXjb;5NSv4C8{`hm(5D$ZCh6Puhs*o z_LJX;6{g1gSb>J}bY_Lh7C7)$`O7yTdem0!U%WQckN3l+U4Ai;(~1}v7>KjHCg}_( z=!~_}?h@_`bB+=quo?n!aLiJ1aq;Oxc}R2yIF1F)fnv0(!BPq=-xAQ~_0KSYRUTdH zh50hbm9$OhLauWO{Qa5WE#Hn-u3Wj2jEU^fN}&Mw7l7+eLGfL?b}a!_F{fTIK6&~y zpw=%mlv^jwS22s8AR)a#T55Oj=L~7A6Qa>dD9h`e!W)PhI2szab0Y0EdOlqjgc<}^{?us zo2io$0goy%7sD`#GJ+DR*;#^PD*_;8vAuT1o=ic-UX^*YrlCsp3$OBWjk-3E^OD5x z6M223{?$CEF+*HP6lO>s`T710?-<##`{6;{k~*x`fZyb$ZsIMt>TsN`)Rd-*vf=v2CZCM_yIWOKle#Q8>{8g*5WVmNuh0r7x&@XU)w6_|@aW(Qk`_P<8nT zK*tGsR{_~pIbLHY3{CCxb3ev>c~SW?aMr>%o=dp8s)d-HGGP2Tx14z52d^*TW1yQ6 zhdvZzdWlk`wEX!Q0Wyw&KktIH-ivl1TNb+{a)BIuV9}Iu+-V5M>cA%*1*hs1FrM(* zb@-KE%BQcQ&A&;cQygw}2~Qp!1@;D1K<#!h^zo$Wg`tWlpJYgR>&eg}v<5Y6NIs&{ z%5@gOn<*58C^=gLQPSSs?Y;ffa}!ZkdiRLXIFs?rKg(#r_w$&v=T~ALqkJ2aR%MIiA;MS=KQib zh>h>j^1#Sf;0|*K?1{OP%^3Q5S6Ru7$Rz^=^Jv_be8hl<%*J5a)2%$KNb%qE{Q2`N z+a5;B2GFl)xu_^GgMmFV*&~b}PK>Bi=#<|WTwso32G9T^|1C_44Xyi#bhl~KSx^X- zRaIBu%lT&Wx_Sn#2?)Tv7MDE~Njvv}P$%e3e83-YOS>}waZxp=g1gMVBkss1TO>=9Z$@Q=g-NA**|NuS*HHW7df;gL>5O$qo2i)R~2Tup$5JC z^eLgz`NyI1w!Dfb*bz!$@DEoreZhcP%oP*-LE4?h=UFt%my=N-!jYhrAZ|S9H3Sza zeI(XV%wt_s<1a?IkyATJ>i}?}bhrWxVT|1ztZ~FR3xI1q`^%DoQHdg#dS6g*k&&lP zUzLLY9#3I|u<&YRtzrnnIC%^?!92QqVqyZ$WpCv1LcnEIZ{Jt12$dlk217z+BU?Z@Sx=IjB?Vs4X}mYM(QtdfoSeK`qs8+Pjcn=5GE5?ee;CjQ2OE;!sih=`nDV6WSasOy$;q7LKIUbCTszfQRZKkd zo&HsunW)_%h1Q@0 zElUg-=)505j^undjby!*kYUL}!TU(SKm0I@`=7W*_43^VL3nhcDJ_PPMdgiy<&M)C z3>YP;v2((6(UGyFb4L0`&wmBadWq9dt$V;!gzSV3X}ZjDpl&5JGFH9%cugh%5e3Li zf5woglCx$llikQ`S z-n@Atuj zHG0xlfGq&?i;*daY?Tk`IA;aVivEKWq*mMuv7N6xg#P83WKG6(7aSP@h+2um*$jjw zHgAr8>r#d@RG6@)HHoL$$$SoWvbPZ?4f^RHlL#f}h+&q|6LvH38%z{b!);KU0XJhh zdda$aHLe=}e;@~BiOHcRU$8vw;K%`(ZWio*t($c77E({|0($%o7@NrsOKz!8e51{?VJyuP$xxUjkMSDQYJ^$+$M-z>=Ml=!O9H0$f9 zxbAV^n~>p?dX;jYgIyo|2cwX|LWb=4+feL$u(|joCAm<6=u5$)1gL9boIM2|w2pKT zG)jbI;jj2JbEp!5Au@lA?UED0kbvlePOR~lFFsL;RQgHeJT9&^qVCUrCk|`XQ>Vzm zTi8CdD?V@Ai_;-+nh__OK%6e(^lR`TN0fT*P}RbQ?mV;Ph@|uCA$6Lo0b1x6@0BAh4sr^|f3y}RESBmZB25!tp>uX2Lw`~XMokM(vGS|+Aa93Y4z zM-@fI3EWtsp`ihxV#g@PkH2pKi3tGTPfv{JcY7Pt1u)YZe>)qKC8HQ<$9(m2l>T?a zEmrd;jW}|m`Uf~A^ft^H;5uVAF=EfZJhYRq_iv^t#i?*>ez{qlDNOw!a_nwZeyPhfLC(}zK7hE%QJ?}%6)!v25c9iehQ_3KwB zW3bIa7%?D8IcQ>Hl4k_0f=K4kb;N>0yKj3VxDxwNAv|OIFlDud>vGEfL?%*d-~u+S z@=pPi9wK~>ti-Lyk59L2G=FmiDUI0-(teU-fY6lU09FHxTSt>A4VCHed&_cx?}2AN z?AHPLK}@nZhfWr!PB!^=N}3@8qeq~oZx&wwEdk<#8S)u944}8KkAM@Q5FyL^0NUP>@dJn`U3lbK4nrJ>FJH8p z7_p}2J9J~4%^8%SoyCgS)MY~H$E}%g@C4@0{SZ;SyLYnymo9cI^b%V!k%EnZqKvO) zIn;o&%_5Rg@Il(Q@QkG3G!v3;ux|?7@kq$AZyYynpuD=o42kuUksD+O;)ui$j_82{ zn97{V{qmKtAfxX?@DC~qWWZ(n_h(i!KEy+xZvz94W=WbjQR#V-&~mRup{zQ44r|aT zXGtecxxsaS4SnuTn`t6Ep!bhvzxBwiG1vdOobqcRfJcm1l&JEPyp4EN6l}2#K>hbL zh2dZ!6hG6&gk7iJj5~;NMd@O>$u0TFY5|_+EuSv^sYEIRw8Q}yauVc?4|+Jl7ok96 zKCA=F*pOz*3XC7!H;T(mTXLAp3CAesI!_b;5D=sVNJ}O^v5W|Gs9e6!3rR_&U;k%B zauOoKJ9Jm0m>l2}6g=^z1Blaf=S?zyQRULt+e<89Bn3T}BX$F`KXb@2%GhaH_5=5x z7vcu~^@lhIp%5{WilaCI=5hAHb~2lwW|Koh24U!HQ6VV@F(V8b^x7OIaQ}w=BYIf4 z6wUt(ZAZ$YPAGoiCD$OCc%l+@mU5||3pX!tIY0Ay%GzBG2k*JgXfJuL1o@L(=i&NA=aRujRBo6aXC13AZy&Y&{^Mfny+TypBqN(DGMpPfk) z|1+artGutawXkrhVd`4MI1o<#K{ax*-W7iy4wJ^K3&$~7ss6=)tjO}4;pV$jwQhjg zMRQ;-ewY5rT%y#V`GlcI{7LQM0nAwmAL@b`naZZRMCRWV5U z*L80H=+=UR7X9QSU4z`s(fWbKc@YsqB5B1RP{yyOs(QO%fZQuKM~rIpbswagjnz_% zaR93y;v3Izz&e7p54BnV4Zj-)^sU7G$Uk#?5Pp9ziY``^{xPi1mvjaH zzt-M7F6X@c|2}PF%3uaD_CZXFEXmSTcB6z)5|U*Sk+nn%S>m!QC831UqGZd`Zl@4Y zQOK4yiB^^B_dGJ@n(uvofA?Q^kL!A9s`ETQpU?aKK9<+3DK`8=9X5Puice$6Ne~ zui;o9DvFzAYc|8Oy0S9cDS}{Y<}o9Ts?Y@l%-Onit8l<6X`V9fg&~o*1nK11Z3JAV z!^XR+%&RA{k2H49UApuqgd?O*T{->5bW_V)H*bo$<0&YM{h~K8?ny*MSZ~7(bcAO+ zZD`qPsPQo)^J?>RbdsVjJUFWsPxlI}q@UMdwv9SsY1zg^3`m)(FNIF|^B!Wf9|7QzKHSJ^>4 zNI}{pmhCy=0QIsMF0D`Ru!ws4rE*ajT}WEVxd2=4LQt4}_hWELuYzaVMcakBs6FcIJ3=KySQ$C7w zaUx%S1y6VsyWryTtkdO7(gKc`4pC4w*NBP7;DxV<39IRY*5ug~>tlxx9(+YkADGfh zyxn!Dhe^lSG#a}O<+vwhvj-D^^2GNj5(xbSenb4HJqQ|zR(C+H73d&eut`-`oLP9k z1L1~fX{=bW!nh#PQ=3*JR5YeK8kyEa13OR^Xa$elbw;19WSC$KTOl#P8XBqEDx}a+ zJI}?%_s>3UHC!SZ2^%u)w8c(pElTxJ!P1v8?XeOf0ED9I4yhB9GKtY3-)*2kAhs_g z!u}%YMGda}cw#WbCbx7lk!`fOgV;X;`SdEwW*6)wyB23nTW#)tFy`~`di4B4@|^1Q zsDsIj4LJk)_gDK=W6^fQ0Vxl&@VF|or}1>?JUL}HInWj-U~1B7WWR0F!-y?I8$iTg zFvM+Lhnz5@G3`@nX`93yH|N`-c?eM2R3VH+4C0r~E!UR(rxWRdXSZyV)OeEr5fnC5 z{8m4wf-TillFa-o2Uop8P!ZzWXmol64?Mk^j_e%FY@g4=?a7(0fHJR0F_l941k3KNYOwcJ-b?3NoEn8mKFNFgFtI>mdQ zWn_fvl&-p)l++9&YkS}>$sGmx22GYUb4H(WG?79fo@-+hy26ief0Si>b=g2|ZK++k zxx|AsvwM!w2Z@})4+^icz;gqEvDZ@4l=PLP)GN)(%RI5Fqk>XhLlS=Bl+gZ**w!s% zt2dL9DXdDY@A9}2HgyBke0_L%TmuE5*KcIX5KYyEjvIV@{-!w(V)S}GwXkh?5I(cE=@S?Ut3;L(XO@#{XWWlqTV>v;bSL~#1{Tb%PUX{ z-YCQX@a`>far!7M8U}VIX5O)5+ViAgqeqXXv+EVFyB52vXwOe4gMyl!P$vUiixg*^ zV;6nrgUjFTY!ec?0i{IGvj;skcBKkN+N7koW%_YMp zP!8Sl#U?GRcRdi0Q#cGKq<*b&>=C*MMqxfcl6o3>-#gr0Ng)A05i@&9q2RHS5+s0K zx(yU%KhkaEt)A0N>%c7EP{&o?8$<@%m6KmG(JZhIJ2n7Nu7}b7WfW!Y?Cn+Jx=@$D zy!P|)QI-UzzwjTUd@va!$*k}k`LT-T9!c3!WH^5O_zDU?&eM*Xrh4Hk=`{L3FvBW+Ne<@f$tIyn|ejt7yLjDLcdKbVBRf2FEsv{zcyRZ)U{o| zIV04)$O;;+)tayBWhkN}=W076*Lq|A0L-0e#ezRJBI1ZJ*0|iLsHlLK%q97d{y~rZ z`=xsa?WAU)8zV>!8Yn326X=VYYJJ-_ZUTnQHdtzBT3w-hjmoQ#pCO!!iU45o*7*1h zZsu1zE7;=OD5Spyz}pHbL$np3c|mVz>6%})3WQR`OrZ7%Y)QH>k2rwFsB^bYTeMa# zD5ABkJAew*Xz4wpD*5@D_0eNe0bPA7=}oc+9~hWCa`}s>$WJat?2(@0g?x~|k%mD~ z!e7`!+X;%^e#?D+P@rq2LVImjDM|!QJ}PB3Q4ocu>!u@O6oe7;BD1N%#aEa=QMpT~ z_WI*T&6pO-$_*7FvVj*eHmEaHZ6)(1@Tj6Xk3^?bmM@r2iHV2+P!iS$Dl$sA=FwO^ zDRqGz^)4+#M}G<$d!qanUspStI_z2U9e`zp!p_c4WP2qU;cwkIqCd3kvWrk4xMIF` zA54mZcAb-FHc;8Vb|*-^Dg~MrJii}*`soG?l%{wSI`v~l>CJDh7874qS}Lm~xE>*b zG`~^5*vP#!t7+~Td$ebF_wn-rD|KON5HB91o{Y49I@_(8f_x3p{p z&r2j&DDKSXhHQ3@{SL>R7Tpt(q=zDAJHTY4u)hbMcVCPCt^xE?AeEX|N2cCz)gqp z&!|-`6x79T;S3)EuKkU;4XsxZP_{yho=!pMYC7Q>f$skkR_{0L`TOsqRa=cz_p%A7 zp;iN6*D(3$@P3hlrR9ib_o=7i!{Ea;3Y{Jvn%Dn7!MlD%Vqf4{Hp*+T_sdQdnxfDI zS}VK$kieJHXf^n&9)mv%q$<=uL%9l$&Rn`ejYc`r(%BE^TK$w~_*H;k&w z%g;xzt`h|&5oM+K+w^DKN8-^CWt*499E=r(8QPII_i>bKuhQY%a} zDA<6tpXQ=F=sz{k5E;uiP6deEYY_44SH9cte4E926z#B_iZjiOCphhfe=3zoC0y!b zOf}&kiDsizN{80wr)n4-9^9Nvwf~?&TSy!L>{f!Qx;auY+eToXzzXFx?Y6h;@Npyx z(tizf1H}cI*Ez3U%skisXUC3FNeB8)p)~nxhYlhyC(?*7GcCs?N{<&uX){G3nDs(n zg~%zLF=QlvWbX3fVAI^~QCEU0%W?mM$2>vHdNRaR5)Fw5XJYFaV{iWf_Hi$N^ zGthJBDKnXB$@P#U?4SutI&UE`h|rWsW`b&OSP_dZ=$M=Wz|MR3K5R~|Gp1uF*!+Ai z+QxUWGv~2J5@+Bb-39^?$*)B7o>)HSQXCz43$W9Ru*z`@XoDPsG|$5`^F|AY2TEN_ z(cYnI*=9x-2VlUWhrbD1&JMDKVr)`Rt6azu}J7cIpFpp7ZvPS0%NqERX%;W(_eF z-;3HOgA;yWgUMVDX$+@H8IekG?oHH(UsJ$P$-o=SQ~)vu;bO^v8VqZ-)7xrjn?jS~ zEdKe_d&op5n(N3dTsLlPAg79P6-%FA{srD1M2VXambOz_cpZZ?I7aV=B#4BgkK#9M zaJwinOSEi`Re8Z>|HQQl(*TEd8!1sYLc!Kf94pMw+z>S zy9E#0A%Z?UMM&1$&_>m-%IyII)H3)sT{DQGjGH^xzyG1SHCeHJG4xdC;AK&z93AIA z1MsKohOfU6g1B+%aHRH9IzBa{MpPL@ngujv3`y{(%ABJMuo3L|R*9V0xtvE@p#qrk8Y{VFOm;NEEJP zx{ri0geHuLC*e;Qpmju~ZT73rQ7!8h^n^=i>V?io&B?q9af(ueh8->r4cxWw^x@Ep zj?r36&3cQkZ)ago5gNROWU5>y0xpEi{n59VmW;0@li_dt469D2w9KA8dxgX`N7(r% z$VON(H*)?C+BRG8oDHjYU)aW@;jd)63&e~?I-SHXTx;z6q&o-&c)f64L?#oz3v$+w zMbn~rV>dB;*o?EI)auwP3or&i(Kl#N`58462$h}a8fkKI<-0q-1ZpVN(eNVVX)&{; zhjP|Kh2z~fz}vMGOO ztB&c`yKgA*V0S1kGCziHYvJo)Ru(|M7Frye_7S3~-OP7k$soe=zUue38ZRXE6N7?5 z*n<9sh-*q^58yMvco{GR9`qIt@)3%?(!qzW!Y_L=G2^PL0W;D)X$~cNR*C4L060F* z7N9seCT;;q($iR{A<}zEel7D~Bw=IoO$i$;<|2kzJ@&KJUw@5IZRM5gSBn+sfm$sa z@arobIKoIBxN^_$ohJ9Qh-!mt8cRdF^r3+Q(SH7haPc;#21-mz+&Tq(MFK2xD1>e_ ztGq&>0)g*tq$T=8>-)0aEJzzaQM#@`K^xY4J19ERllf~ZM`t+qx&?dQjp7XO%TTC1 z^&&UyaOs*p!?iYS?TPtC_^^9cExSgkNBlZlsdSUkG~i>15E>)WCB_j@c?8shpJ4i& zx1Nk=680$t<7vU?0cK_P3dtnf*)(nP@+D8tbbD}gxaEfu7yx{7w-{&~QP)ayxHX{~ z);omtU)P$Pn%*4d5lW$O8OClb)5PE4W0A1vqxg+h6X9iuP64r{CzZ<9ZFe1_Y9r_m z2-*q2bcKQ>9mwm%opd2-eXS~pV0j$elN-2V;W}ci`>wcpjr?8+ht1mdNdOo<3wH#0 zd4J3J^+WA)jon+nX3q(MMCK=@yzOThcZ0NCTr^Nen}QCCxr7VS{r?$TdW?r88sR%F z;DB!Z_JJQV5YE_xjj4%@e-rvLfTpIu+$AsQ*fKa=~&BbCNGsEr6F zm>g{2zjUi8VEm%e^3R6`eLDac#3-;|DYt}+}X2Jel2v0c42TD9abkWSU_pwXxp63YP9@g@(=r#Z}6i4 ztahWe8w`*BFJgwi^c^ce7>DZ`!RwC9(o!W8MV>>>+a)=wzE`w^-}-f zRJ}wBKm=vSmNx*8X+(bK?4A*Wc!MyoNj9=4sAc7FqC~joEGMSn@SpC?+7f|9C;lEo z>)`;GDm#gqxpEix3~4_TePIy(-Y!L$XJxAEW2818KGXEwm#m9+c~iyvGM%C6vX?R2 zQ3ITxPUp=}AtpQU$a5+G|ER3oYT52PB_Au)6mEGH>#B?64nT3d)^s@$Y^{IGdUdAp zzsTby<8-Ub37=6r7}Q$TXk8uXXh(=$)72{C-2NlbD0xoYrNZMEmM>kp3)n@5Zb;6< zjktj*Q+ykD@c5A-R?+;xaDh_u<~K=}n(c~IBU}^K@T>cq<|(%1-Ap+AZgP8&0=IOT zNhQqHZ;cS6EiJo}w*HCZI=jHJqEREOuH9oImhXHE&Xx;NmL81HnAmm&&n251Hmp5Q zq*ZzLO=D6anHU5*7d-pQKvKvy5A5yiOfCek`r3C8wJ{0k>izo@)p(MNMphSB zVbI;BxF`+)ui6dUD-fVe=4cr;N&T=C)mbg%k4(N?xmEo@ss-%gkb5gvt=a^xgrPvh zQ)MvSf(?_&4cb#p4Dip~Z8f+Ux1*xs5IMoO0kY)TB<7sK@R& zF_g~{(k`Mz4J1fuDUY2XTk4Kn&y$B6INU!^9QFr z)GFP;Y61WZwDtxu=9WNW?K(j4g8s6yv2S804v`Tp#1unhwg#ao^d1>D0pnY$n1;y( zJbTHah_~?G&MT_+@Tg#UHJbsdMr0eufT< zYO)+QA_m1vqY#pRZAOqW%B7<&BYN(fNQ43{p?}1j*x$fV(7AW+kwK^@v&v-T8_{R! z=53JPgoG{q{*s5&gD=N5>_^Ksa6m=ls@akEe%?nYA$P}KgGVY9ojZ4SYu{{LS7Wic z0;KVjA%3!PjROr;s77i$IXtBLbzzO14;dTCJ;4AGB$(2j$-&w}_|Nq2^kCB^99~=~ z(KnICPh5GoxR}iLLd~J#`xaCFG;&Eh67MYo39&cx8H(SicHqcIvj61g1!rM~W78!3 zcy+arbLpc$F{0O~{0@MtFOw0u;j#Hm6^uj}O%4PTa~n!WGFl6BB}MmqZ=(=W*=vBP zbW$hacG$63r&gZXI_MOhRRRhwwJD4wK!`SLGarzr=?CxT**hb#E#U`ImSHP89;RHm z&C&g&Ad=Gf&TqR0lkx<9L&IKg;8U+k!JZ zggv2H-nJ&=?1F8Hemy#~%7C!P5PuOSGeQYx5Y=VJn;gt&p`f;L9UCS|Ho2ypxv^FI z=E7kYQiPBZhz;lRFKtfBR>IUn%a++f!dIN9?&2quG9Jlw_AVM6(q$HA89!&Yu()zTFB}c0PZnoP6jKRr6mfH#ohH`|@S&SDZj)Vw;o1vR@eOC?axzMsgvv zyuqXMmCDrE*9B)_tCLIRIF-I63H*io1(s6Ydtgb%1wci>P^B8^+N|nt_WHps+z_xmAhbap8T7 ze~dEfwV8Zj+_+m!BZ@|hF?spa$gFnE;df2+OjfNoDCieacG$9>VmS6cN#|SrY*+Hre@3- zt5gm$5ywVMJ{)p^B(7~yQr>&6&ki=@8%`9}H6!;#Yb&eXeU^UtXsZLpld-8@bX&!S znzob`#bpp~GL`c!tP!blzyVTf*>Z39ROtdUI@rGfOFSBK-wrzzJAOspdYZ?l?aHPZ zkC$jq%#4T#*fFri$K}iV4_#Bg$(g$yq5)GBv*upi92DY`fG6W!7=meVJ|mBBcmJWhrumi@9!*IdjaT2 zgx!>@$v(I8|M_fr!>;>#x82#T?arND2XQioU9U&SlufBaLwtV{VQD!p-bB4`+MAUW z(W!k*e`>vG(SPs{IhphYnLPTtieFh>y@23TDpR~^BeGDbDWda@?v=;i{DZ$M$QZFL z;JR_hg)qOQkpgU8_PVR9YpYZjP-K-c<9XxS`WEuzqo`n^4$vL059{6Z)50iWMufEZ zR;zB?>piOyc*5gkBOs^xuYa`Cts4HLl|JyBR(dQ1#0bGwiG>&IwBgUgZ`I%J%L!&? z?SQklj8{#uQ{x?H?*6vUiEzvK<5as}>wmb{T<962tiU^<&W<6!l`@M2IB$vUJM5@u zo&5bkEGP!B!-hhEu>8#zkyV&8NRHZ1^!ga`oS48-Wh9YKv{J8twzZlKYO1Kxe<%g-MWAP#2 zC~h^9_@EM4|6o-f<~P<`p6E+Is{FUQLt@I4&jd&^x}>mg((z*i__BWkpQCS=5Pnf!! z2;w?8h(P>M&rVMA6{&gY9Q#7jU9XYrKK)fagGL3eixP~H+bq{lWCPM~fRQ9qc4bb( z|DeTmKYQj(0+H6ax>p$%!|ybVNNSgFu|Q#_{T6DVJ+Yt*n||9RIMQ<7+e#rr`aWuYvWiq3>a z{vki}(HE3k+ojCRkGegx^@;nPUia#5FD<%Yv*b)?mNNvWGO;<8B{qez^&&YS+v`tF zZ*<=S@_v>wZkt}Y0g*!!!`5Z!?%WvMUFNveGkH{aXO>dk18h--g$sNSa-_1SN#n+g zzU`Q+jeUcUJbbb`X~ICA`|h^g3(uk^Ae}xCy@Kb&>cHP$VWirs-bzeu@b~^9^O}c_ z2w3R2Ktr!V^%Bw3SxS`yp??NUaJq2t#G^3*2GS`@Sa;_g?D*g|pD*>1#M1`V{x3|c z{I>qn-~OnhG{}DM>czd6-tg&>LSt$>+%}9*foKT0ITfZuNkmUGDx4?vXUJ~T z1Q>!@jsZgpK$jn%ECTVmS)EfFQVJE?4p}Mgihz! zZ%ZKuKYlHH$ZcxJktq9)A-VxW?l0nkm3#VqBa{f;x=f;0g;A%M^+RUGc?ybWdZlV zhlAh#K()82;_2m;($Do76hJ2Yi3TQJY{&p@Cs}Qv5dClZjU-KL(V|75ikRzI$V(>x zcn-Bd(qsIo#qYY5l9*A%X8_~0r@yz|DPzz@+#IUMEkaD`(+sDW#L9;uJ zEuIBeY}+$xb?G5$Hw$ppGtSO$0`KT_p0O;zfpM)$>$Q5w1?qc6P^p-uwqW7H14Xl; z^t}G+fWF)J{u2`wbZ4iyt*I(qOm_G)3B@QLO>y5=?T`ZewxHX_nXPR1jre`#TAagP z4ULkEM6zU!e@2cB-FLtAMb8db-UkywEH)(KcFRoRDuV46sgAsK=8WRgLeChU#(dS^Nwi<$m6&#i}26iu8UB+}kN z4yCXDHtRbVpn(FV5~=^PJsK(iv9KP30lft>K36z!h(AHGgdnbCe!LkejWrRp!6BvAv!SjQM7{7cfq&fB*go z5O*6W$Rl^e9+qp-NmhE&B|=Jd_W^tFsYCM zG99JRgpd1P7kzNpQ6H#D;!IIcLb-Sw_}d$BCmHQdm*nrdjXEp-DXdn28Z5|8^a_+E z`$c6gm!}+I8SCgq9T((ES|R>!rtqA(=_8}3uFRhEY9;#zoC3LuHZx9=6`+@~njyhs z@~5W#89Xd;qRAk7agC$uqKo3Lb6u&OAm6&L2@S}TcFN$dNMOn^t@`G1_vsIF7o`0s z_^&5da$jxRGiddeL*t`sh~k5%hadVzhNK4Pf(328;OSqUCJ9s90IxUMBSjd^-L*bsBzA$Lxh>^G=WyLA4&T_y6okxKEX6ShW2$^06WYItY z2Y(EdMrmJ##N(wl2M=Dn>sx3Pj1j9Ww?NI10Q4zBTGF9TMt5?CeL1sjUzdrpCS9rc z!huw4k1Ow1a_s%~KU(f<)K8tC_eE=);bSla;Q&cp1zwFCx$*h)tGDOhBX5^%ijoT) zFZI6rL3~MYUPn@QoN}1$Yo$`}JsczGfe^BTNZJuKNZo-!RB>bM=b&>ZC?g%N0Jlg4 zwLm|rci)a!aCqaM!FtDwPy4srnv~e|d{OnQsIVKsg@)C7$MY)Y&MP|W5!W;+(di&} z>1A$Z>seLU^}*K#o7H!~=8$+6q$PoIi^uH}tG7vgh$blx!5R2}0Xb%QiZ%3xu* zIZN|;cKPhD=uP}5v3;*xlWxHaR1fqB4ZW@3Ges9*94hD3<$r2=Ydp6`MZ)Bp^wRN$ zwolKD%(h=o<_#@w6VQ&dgiB^aENQwrlbU2*uU5|(qfiFpvnOsx8TK# zXRf>VYTlB@hjw&F?R?I?w?F=SDtoSw)JCWSmY0iSNjk8zm4%6 zT+{2t4+@3C^;+tRDE{SZ#HKl#&V|v3C!DyjcyQhj`H^SyR(A5Q!db!2^Yz=)&a2CQ z^+xg&!<~&g%fE`Qyv{J-tyX(4~? z+VZ!N5hHpIYjUVf+K^8=@}pp+8RcKa7sc)Rj@h2QdgX^-dO6k6F^b_z$0?{5#Mf2F zo1VB8lHB~B5f(3%%+kxpCxl$AfPw5Lu#k?ZSI6v2Y#2E0x%uy3CpAzg{MA?X8My)P zUdG(ZJj6*Jl4PQvJV>wPo)#(pfuQ@e_sC9){dBFFp@G4awQDbfJf8p~7-l-S{))1% zoo!qr0gNWGjL(m8=P!D)vpKdY9 ztA+ojefzk5e;qC*K0owSDJE+X^$L#w&gOJ*%t!p)?*{j8LQzYJO z4%=hZoZ=~K)?8!?k9>J;T%c4{SG}QbGUaJf5HC0ecCb11KHq2^Ra2hX(|`omBLEK@ zQ7G3zY(J{Phd2^I{=o9r!L1%-J3ZjZDExJY9qPHt7&Vpa5uY)j{!(>;pj_tS_wGO2nM#4&{;^mo>r zQqDVxn;|A(sR86cm}6obDbiCRGFzEFHvOQKik+P!L@o-~ zp1jO={d#%B5wANPmdPv=m`itN+}H)rexKY51D+pX_vJlSd~<2w%_+1 zk-~K1k7-u*r@>g0x1b$#G3dmhVXGIq+d|a_$@e;VaK=hEuO2B&zIdd(-0S+e8K39b z@Am1DP34mnb+^wp?woMt53L$~Z@a@|I`N}Veg8QRt?bsCymJW1>2XtUNNGoYXI!?= zzy9-HH5&ggX{7n=t{=7}PW@fp$z<$*Usk$mM62EBrh8O3>i>~nxhC%Z?^)fWUQqhv z%DJE07=S+pzw`t0~j%Az%8g_|EZH8<;rU z?>hT|Q#YRbu;0FacK?x0T?bChSZ)&>Ha?m6Nofne`8c` zp0JmT=H8T-DLh-UA@Q0P>$1I~s`5{j6>X@Sg$YjsG7ck{bIPFnHjz*sYifJzUnOcks#uH`}`*cXph*SdrAvf0L7O-ELf5~tFdtq>rw#< zUpSZCpBd{IDVZw#@FkR7JOYRbdh6*~Ky~r6eZK4xmEmI?gM4=Os?gHa=cRdE-W#jIAwSxnwib!`h9wBxdw_6?b{DocH|`$Qx|$aiJZ;d3%@A3D2y$J z4jzfX&VDMqCxL+qs9>3Ly%3v1r~o--=zdCQq(h}g>?7yHp8OY@!pgv}E$kA~j})3j zXt^sIsZCgT5;q}wuuwfTO{o+;{HR1*G<=VKjenNVy zB`K7Z6pBgzSf71FMDK#Ka`x-&pHXk$zMHI#aGyO(qw;r{bTQEWUqYf`-OR;KDm;Jq z{t8f1`2p7MpKq3+`rGM9?ZoEKS97-ttBwCP#w`>%u2011Tm5696TbiUzu#!*j%Yuu z=Jd;(udA%&y{)@{H=B3^w?(zDil+uW4|pTLdeHQrk5$qMbxWDuRnvW|+*0t`e@y1= z&3gu#Wn73-eK6!vuV3UyzAE^a@m4cg7w{@)bJ4Hz8{^c!@v>QJI*P5_~{{JER(!uch5O=VE;I|+Yx2&)^5^glnkBP z$y!Ru-iHo_(#4~uumAEqV@XC)hW!>{fo=Ye2ZSohCe_?>floJvIOf}_B71@c{C)kd(fjj}ep#YG?)OQf}w5+;X2mVu| zPah*%Qp^~Qo4y7^rQEEUhTYL+muA7aDXD7Untzf==G2Ev<2-*)SY(_jn^XOC~qh0CebnNrh z1`T?0-Wf2D_}c^LslxCnS#RK{zAHu#Qw%i=`u)CX&ynHkw>enGFNW4?FR_+spj>gm zfmzEQ;eS%0oc5h@W(543N#6B`WQ0#~h>{}}_Vl;l35!{L3B$FTG)Ns|ALnFmueXe= z`O@WfeA91<#g`eIT9^cA6|J)H-Tx%t`2)XojrbpD*5qQ*OntrHy|m^1UrqSeCjKAzwo4P&e5Inn2yOxPhYwZHnm-p;JMYJCV`Z14Wx7)#@4rJ?+x!e8yb z(UqMd`2CTpTR%KdXS;qHw}~)E{R!;c(=_9&vAHz8Copx%@0;c0n*5Ipb2s+x<=gA- zxUtoQjZhi?sLAfP;nBn)FW%V=qK((D|dxbV}R3>f5c`NSK?GyxCOA zuiH4tB}fjY6WXMek^cvi16otR#75{)lm6~o3+HNMhPw75l}ZTu+c{41o)Y3)xH&Sv zc;56WF2>VuTRyK%s;+Ec(i~f0vc#)rbIskwwMX@8ty`(s!fq8$m$QZ_uU8zLzo!?b`DRfA9&;(@l*oB{$iw9eRk!(VN&A#`>CG2r?sIEi-bND#Kt24%akkA zxv>CkI&=snI0(Tq8BUz&MP@QBu#uZi`j+T`#r;DDa4E)-!v>G6qmmojg`%1zA>won zaug$g!C7L96Ez&)?jB-yUpsxg2jZ_2$Br8()*wIgQbgTL7CcFm$C?u9{g;&}-G_vIEpNO*VRX-v~?g#MI0p5Kg3lWXagw z5Te4hkGDU&Ack;Pn7snT3tJEDE@cBLwapONcI7KWRjKl$|H!d~7< zrnymsmlKH`!jeu+Rj>9<&`=NT)x6&JKk8K1r4)CbM+Z2fI@BJ zy?doLN8R0JZq}#@?RCXy6IrcW&r+D3j{Nq>r}N0~8!=w^F&u7Bboc3|j3|SCkj|dbzyXHSzkRY0q~~ zLwpK&asqvJp{_gR7m5{oCO^L!8#T0~LviBzKkc`hbd{eb?s)64vi@@ws<|pVF0@y3 zid)l5IX;Kq4K@3h9r15f5?T7xY~Oqym;b=|n_ArVEmA$NGx=6%$ys@Vrp^DAZmpc7 zJRyF<;e3yV^$+*EnBkSN@c`L0(T$8UL0o7BRw443PZe8H?E_uTiYsm{tn(O=e zgU4FXaP`pF;bjQLoF!!QlDJXQJfc~Did+X~pI5396+a@1rd^#ME?LKcrS)}I?qCQZ zgcF~2A3S)Vncl%=W9xK8qU63uJS%yoRK+krL2GE^Ue^~X?D~V6$&edKV^pTR95!#h zU;_%Z9g~l~(KOKXPAWL-qcY>batt%`s*1mUz;tvs6nstbDyDMjrk4=$kn>g$e`x_F3at4s8C@vnRyyM>3!1`v}zEQ-lxD7pMH!Y3C_b0mp?x1Iw4B0KGx6O-zAg4!|%AG0ViSiPn=> zCJ%w~Q%JWYPXeoHN_VrbrV?u*O;8#-I(YrmslZ=lV0J?-T9LjMSISNac7z^RzffRl zg5*#`IZ17z@I`cbUJA$TCjkSDUkkv423syd*Cw!$wBeSBS%rHo7wfvII)BFF++H5TUXFU@WJ#J)G zq})jwHuYW9x!l`t$}L|eTecdTy07kQ*zoxDJ9+6}oNv!H$VX4te`uV}xFd6pW}WPQ zE1^>TZkB_$X=;3}hcHvQSRL_iIe!rXO0fLc%JYg00|dZI1q7r5o?~8nqxIzaAQv#| zRjNKpg?Ov*+<^@gYVEA6t7T9msvQ6TlfeVv3r_N$_L^drM8XgULg4B0lk7)XD%;{t z&nR`cdAF#56-~@u)w_H5Yk3}tW9b*kW>D*IeHknbI5!~$-l;Y%kI8RiVswEKmn%1K zURG8861u#^z*R@ahM(H?wtDI9yi>`mPgkoKuIXuOIXil_xn=v?9wwEy9#?({8kBh; zb%y%s=0y`9=uaLSKg7oe7J^+`e37Z+CRE&ZtNZAB0?wDI-|x4^bi@X;PE}F!xy0Xx z4!vwxVUJZ!CHf7YeP!y9=PNc_SAzU$(@NK{@P%yYOX=qTUfM0n;i~_N_qFvkxpK>4 zgGg6q%1Q_DZd%{M^>4afmO%JIp>e2ES)udLCEF7yLwt5 zy`BE@aDe*y=8pD^FAb++feSP}H2I~vR#o(yhv_OiYfleN4UZ5j1-Y*`kvECQp#clF z5+=+!rfqUb|If;KeU9}R;1^1_E|^r%&JmT;vWl0=E=s=JN@XMOP-=hkc)Z3W`}_i; zl*N+YGk*%;*rB(&dZN2d=(M9JoCh2*KJe@0)Yjh(um9G^P3jM(LLx%GzBK;+%|Ack z8!sc|t3WE#Cj)+4$NKbUyHP+R^)}%W%JBP6g(BW2&uhhsGl={rq7sdNm3V+Frx@wyxC!> zsKiW@fSyf&oa~sLRzOG4XPfgp8tK3$!J5KW)rQvJ8Qrqydy~B}mO$B)5apaoP@zqi zm2Fnsi&GQL%d%nS$QBYu&~DD!V@w?kTs2(Za+*gi(>bg+es;;Uy|ga!b?l9Gbyex{ zPv9^LI8s+KyzZ^tu;6#gF2DYP_ZzXH>haMdnoYbAaj1SmTY?4e6ZPR-Qt8L&A{~oX ziN7LXB5*TP>q@>ZeRl5V`DHM*pqoX>gjkM?ny&6_I%rTY*rsdy@$%ai3~V&$y8g>C z{jZ-!%lACVm2ZrlAAp@4qenmr6KZxY=B3LCyQTWT+2(D~7&doA7EPP?lKYrC+j2q* z2z`NenpxP4*OBYl1<@?#qO#w=$5PIL6u&KKqIqdH(2^b1ix7OISmG{@x!rct*#|Q` z_jcyNH-H8sn9TSh zZy;@|)z#upGotLFjL!^oDvG_7kk!s))mFo7_dmOw zO`0~%WZD3nBQ8kWfKDKt$%iB2suES7VIPg-Zki4BU>&UdZd;|sQ$xJSA(YnzO(aVI z;hlk`#k#HsChMG!xPK3}-a=UBeO4X5*Vh^ds7qsmF(Xar9(Y0b&{7_hpA}@VOYm5q z0#PnnxR9dg8Tl{h!FskY*H_z8$T;BTWqRw+DeI8YJE%Gfyw%6c!cre_EDinYh@7Z` z)6rp2sj^+h>lfau9jsOG=)|@ctCRJ{F1zluy+kBc7^=H3=QAO+Ev3*F)3=1m{FL*D zmqp#>F}cAaPD4Y#z^=X8Xw_V=hX^_avnd$eS8y?N*<(sxJ$(%${w^0SE?5(t%25lq zh@S3Inrdd*d+5*^FsaKgz{}mqan-XmzI-k}izjBN=UBQgvyk5qN35?YGs96w&u?+k zgDEZ3IZpOq*^9u86Yt#l6gFYv#8e`Km^n1H{s{27w#*)EfI_;iZB=>UDb6k)cKY*; zUk-lHUjoh*B+0rg=~`)<7GW8|j_@oPPX6Prf&j2}0ldMzQ-P1X{QNWxz)VUqP&B@GUX$NobTrRo zQ8ISQ@L$2Ep=Rdx@oj02tLMIb{tViffQCtkpVP--9K&D|MnXDwyU>X5IhL)%`+}7V zEr(z%ICxRH?NnK==`wOxj-3dqUAdr3i! zS44u4SrF5ByF!T`Ic&~odc(B?j97T!}`CbR>2*Br9 z5At^$mi4*uJB^)Z1xW0;?D9y##%R0K2v}y)sa2`rX&jvm_p<)$=RcEQJWn>71VE-6 zVu+Kft=Ld^DPetW$Dg93ztTtBnXgO8WFWCCgR~-Zp5^1>F{RgH%$p+^*1_ zqVn_4ue9UV_3J%cP&gYKn~f6&qE4gd{qyXVH&v=|3m4$n6YGq}Ru~i3`Kqgi?5wY zsn@xo*)GoT<;z0_vY%r)xr~yN20#f?f_`OKKLXJ0xM=}B3Rc($Ty-0vn0 zR9wt-jvVDq%sDKD&#yct4YS4uTmtUNWbm1fk6zi6X>B`oT8fd4gy`{tiPA%(CpNV|RxQ_7|n-NbC!e7%mIps-z(W77^D) z+r|@va1YmJg<8cK;&5{~aebK3d+#2!%)Y&P-J+@4R(=O!4axHUQkG#-=Zif)##Jw4 z(BwrLCm}4Cd}RjcjmIbZR57&7cRFRuXaXx+@||+v?-?K`HE+At1&cFYStH}!p3&Ia zw`b3rI8ZA_RbAo=%o*>cTVs36$-q3NeO4Q31KBT^U1n2(vfnIjPa{g#WZgu&WHIikctw_O{-uTl^nJfF8~)yhxjDCgEL7xingJ z)LnaqlBYRka5$__X-?9pGImap=i;IKmDwBj?j@ey0K=AfdG=bDGHoGvAD|#!7}qv$ zsf!CuzB~8sy#y=DoVya#a4?LvhL$~60mXD=>^Mly44Smd&`BL}7`nw@FL(SV}3;Z?ILWH?UV4d+-$K7K6E;OQ*mu+j6I|e@TI6l8*LORlR{F zU);{Z&YiI4B_}60vg^wUi4T10S|8cJe*tK#Op9PkzhZAuOJ#tfce^6vF@int`{N-rVx$+EM0Y2tx(Z@lQ0EG${=yZ!xzL zlEsC&R~s6nzziG`gc$#Aw8@<@qg7w@?^gL z|M|w`%!_CTl%3?3RBE4NlhPr0YJ6SiVoq*>?oW|jd#`b16WUNRf|7<>->hlzQ&;`j z;I^R&I}*1gWv37d=Ge8*rb*Ep>^#1%Hu^D&ZT0zz6)nrYW!7Ily#|Ld`35~4(X2Sq z|5$j&{-e%gCvHO7AoO+@r+ddj6SK%iBAPU7yS9s2c}vwJx2nhHefh(JI~`%_ap1uF z$_jdq$}Yv%OoUTk(Wlu@KY4=d5^nmcwbcJe=~wy3A6G=}I?uwajM}nFrBkO5c1ppw z>Z=Ng#`vBb^3@sCauUvI0d`DQAC97c6mlW>lYe;Qqe_J>0G=$lt{ps~0whcPat3M< z`An3&gr<~KLH{|yY8P+8G|Joa$46sMhsrO%h`!Fca7MDHY^X`!5CZNRiv1C-CLs%X z$Z`YIn$kBJo)hNEZK7vq(4qfaSL?2HgLv{B>HO47A}t6Zk$P5AUtp)=4eT699N)kK=NE3h_zmAke{QWs2VFPr-*O= z@N_^#85)oIkAwDh7`!x?aJww`(P*sNr!acZL=<9a2uZ61ypZG$9gk9$(mQxV$?aF9FHcZglgy6% zX$oG!x`s2**TCkX|HhmTEkouWKLQmD+b&hQP>bk`ghHj1Ssc^)QGTKTSRWVSMow`V zQRNv+V)gIoJf_60J^zM;)q)`7N1pJBMX4uMBR0^8{libc`qKz4K{s)dPC>!Ss`Vvx zEUJwjY9D@|;}8uZvao>|4p!za%Syfd{?Xy_cU;=8nTazlg!z-Z?g-*`%``8MpkRkp z?LZ!D{Q~f?ETK`$U=B@YbZJCuD$4l`h;R zQ;g>mWL^Z}xmmt*vVfkRJ87?;1p{+qHoj`0fp|tOEj(Vbk%xU=%1yG+1>&$668jYD z$UJBy7YY2T>BZ|yiO|n6zjyL_NfDR8Msop`DCZ$&Qj@=5xMmiyj=MGxp|}SMQqQOi z6mmSG!7QKAO0sm}!gcG`t;zN>%K$>?BM$7t(|PQ_bL`7MxNF2KZ-~2P)z898$2FdZ zDE5@NZ8><(8lY4yElH(NM|t=$zHYsi7Js5;026Jnme%1v98MEu-GJ^fpO&5rbW_ft z%0fXh;odzF!dyZeTT>RXArga?$>u2JAfCaq&7^GN<3k>Mk0WgjDs+wq{S2gcF1(qG zGzPR?_Z8OnMM{?$uya3*bol4@ zZayrY=8-UB%cBz%Ag-*hx13IPo9x7-M=RnbctBEBN>Nv44G60 zT@zuPr~@7z^Vy~6XhY0k6Ii~}KXLrHvaU5h`v8D2KsA9!eDJaM8$d7uOrTagtf%>+MZrA$WP~@D)zxIx*-8k2(x;Y|6K6k+ zqr|=(kUSXvSBwM|Fcg(vSTDZ4c1lsZg$r@GXQ1yqk^4RBT)aJU>9Z7x=aMg?RB~S5 zrRpgiqlFZQFSgYGl7Q;B?ZUbWKjak`i<`5sPc>abiSQ{L*PS2sCej&+vl`aBuwLTa zZ?J)ZL9e^(Yb&jj&qaTXN|L2mxR3_fiy-OG0E}bH^{`gYIL$a<#h{ie$^0#}kLF>(Z{G3d+a})n}k! zx&>&nFZ3|f9XY&YZ3)n0LT#~kyyxZeH@$L`^(u~7th(FNEzVEg>B?-)%=iuQ>BSIU zGJ(x^I!`ii+f(+{;1jl5j$^^-y_mUK?DFkFZfVB|D76Wb{E)k2Nx|rY}bmXAmgIuuV z`Z*hF>?0l@Y1X=3JF{1xv3RNm@EUc9Fd=KimPZrnpZ_?7UahR386?SB8)@7GJ3D%!_7Kl4crNsa#mub zbzMvjVi;8%zXrkarH}lCa3R(BNNlrQ2oRRj#bv1ia|(G@78xC-2rfS#lc}~Q8bTa0 zhj&UYt40)J1Q~*GX;g$+KZClqYd%ntr65(d%gy5{GrZ}QNk!8rSsa6$uO-8=v}t48 zS-e{}`+0$?LBH5i53$dmT%r_B))LS3a1A_ zDkYAuHa4Zeo>7vKb$>`!7+q8O*P8WvLcqpn48%+tdXSF%ruDd|_v|-ijtI_F#jH=K zAW{1@XLtb-h?kv>kqa@N--Pzct4}Y8>5nR^k1P>)1B-1u4_P3nGJM^54*^2?W;Qc; z1G}ty_lqsDQiA^JEAKm4tbOg~2xMYHMmRP7)y~TX@WEX74s|)lBs^uKIM8_^Mc*yS z0-+I&fH{R!`9h9nX0KXTo{2uhsEFl#m(72S!$8vRlvMmc93{Skj**KBs0kY&Co21E zM$Y}iIofCUzw=p|ot+&*Yq8rLE$6Dg%9py>o!;8A>*+df4DRwtbOPS3iKZA8L=tq= zK3a?|Gt}JuT*}QY5SpSo@Z@N1Y0&&jrwB6A-+IJcBmy4$F+#P*X>jF&W2*PU?qmlQ z1)WI`3AXK-M=kY{kQQ_NoFQadplp17o)}m_mZkwSmA4hoYr5MWr69y&y@`fy0n!Fbfi$=eGld;^C9s434&SrWN zdGuvhhMSA@XoN_Kfd+(TC8r@a z5>GRfD1~T1*B%|C5Y{~7%IY9!T(fgm&jxf8;3Q|9y;UGuezZ~G^M=UQ zQ97`}!una=47_P=L*@bOQP2GWDf1A@vUgSV`JEk#m_wJsmB8Cv!3|8r-*0elT?!1a z?nRq^fBZ2EhhA*c8X04u@ZiH^Q5`rT84M01|KYLCR5?Om==0ielgM_1)Bdo_eYNg6 zx;lBi$UM3N*5DT$0mGw(@2&B?>drV^IM)aDGMSSNj*_;|st0p-I{)F8GH9v8bM z%-V0=IKYS}C-&-xViy+ER6aAxE<}Q5w%>;%RfXW(kECA%!u6@sQHIN7%MQfV)jCB? zP|}S1GC!8K7P&S`Q@p0O;dXnjvJB#JE=ZJTX5n;lcsuso5+FoLL@`ys+pkv}mP{zl zVEdxOe?P_c(hK1eYv3AKu*WK9vyTyGx6O>Noa6j?tW@DRMQH?qo@8-F@$c9kd2CO<{@zToDZ!t^altsgj+kDQ10&}l(hB@do$!ywmSE?sIBApei# zg3ho1?GYHR_Nvl*s}!M}dXFzIfySAtfZ zOL?jKvGnjtm5YTwLuMr=C7mZ8eF;vm7|W1`*Oc@kXVPuA?K^gyqrjR0t$aS$BE7nY z@dsvyfmZ3WJ0kTokUB~)8T7K+vYi&L$$vj}$P2susS%c0tA~_0!@{{BCwbY&H2{RG zugoqF8eS7JqOa^MaNd^!z9D>bMsP%QdG!h&r=%=Jq9_eA3&piHuie^Z;DY;VXe8ZJ zet9qH-UlAyS+bKh>%J~rI9d1e7D@LQQ@VuO@OdJOgj1G7mOh=!XubDqqQ$qDgUPqO zd^cV4MA;J9)mbJco<6>g7I**~P@fAlcslVx%CT3U=Iw~uINe}QCJ;*+xPCZm*|tra zzd&S4qC#FhlT1o;_T`5p=}S2>7f7Zu=}dTIdYdz41DyGO)fF(yc?+k1I=MK$ru=a8 z_5*KFcIyjDNwi*I@9_EBtzy`3AC^vY$KyW3?)4oy^eOSkaiEBNUQY0ytV4R~QpCL#n+y5dB0NpEm zyu7R~JzupGZQtLNt+4G@GQUbPS|m_Z?rro2bwxgYI@rFQwogd3qPvV4G@c~_fuHSyN8IC!M` z$GC;{)T~e+IU=MX5(BKL8FJ)0bAaUQi{hUq#(y^z$*-If>YA#@RUV&r;za#>7>u1X K=Jcq!oBj_W4tzZT literal 0 HcmV?d00001 diff --git a/doc/doxygen/chapters/images/trace_send_use.png b/doc/doxygen/chapters/images/trace_send_use.png new file mode 100644 index 0000000000000000000000000000000000000000..8779e9c662ef76b6a621294e76b37b1f9ddf8cd2 GIT binary patch literal 48178 zcmaI82|Sf=+cms3kyIp^$`}IE%wrgM4IUL72)>_9IprU+aH7y%0g+f_fyTRT%LORinQJA`*`=el5TZ!IAr@?Reiwz4%5+3T@C zl|td79F>z%ce?-m(`7C7zB%dHuTNLM+<8r6`!oN<7#_3yWs#m|Vq(5%Ur`qmI$F6t zL+)fd{c*vFb#Lhu+wI(@#nn!VbzHl4?e>||o-Cdlp3}6@IL*zDR{xRM>GeX~Lz1)A zAuZ6Z&MkV{!T8R;Uw7@Ir(b&0FOge#smEP6lc4Y{yv}(lFTN|dLuSjud&$f1#I(x( z`0?Y$tMd{O@$s9JEZ;pkE^7Sd4qIS$@~!W7+S=NY<7+nV^|<`&i>cd_97*P1U*38h zWoD{* zJu5@=dj9_GI}s7g5|_Sfy)$TH-6Ub3_>JCQA&^h$c8uNE+TA}di(meE-v0Z0hAk%^ zc&DeQKcC*hBcP=FcUVnJizf2e3P#39406p^54A_{n)~}}?V}UX!M}&v4n0&UYiZda zB_;K~(9tTXU}q#<*p?Gj1rCGt&y18__St4xequY5YuTJQ(fNESCa3J}+tpvbe0iFk zeYiS!Q-;y&4MEP^wr%V0D3!iwVPRlt8N6nb#M$ZJLkUyIm6fYriYs+qZ37i7&pl$Es?2q;xGcHMQ31)7*RavV8dP zAhqOp3o)}Mnjp5_iSygD)~LFu1y6HQU*jH@GKqsHNIFl!JGESKf^*e#2?lrJ@+R>{dRb`&q_xAvRAKG8XFs1+u6PN`{$Rc zx_W#@vEN$Ga}BYFN5{szb%c-fR0WA%`u<*Rx-rZ2dTTfW@v>Uz9^B;f>T2p& zuU;ig1%-t8`T0@z+V0cNHuFg@oz+fvx>}vC8!hG5+1W{1wQ5zCSu^b}lUg|$nXB@} z7~-;;8rnUU?FTzb+zgD2mYbNE9E+CPWR|TRXK8HA@4K3Pg}J%;*7dY>oKhQ4pFZte zdD8Z>PmY`Gl-f<+Bf({Lb!%6%i+lU{EZ5P|+2J&L_JGsq!Q{9R^Yr7FE{R_n`5dGg zCvSR7vvS%F>!80eVbyxt~Xa@8WFrJToTIIm1T9J`#V zE+FbWcA>q*?M$3L_p%KldUTqans@Hqi!-ba-fQ1a|M&0T&aYp$Gz69Z%l*cs~=7uo*8Ks*l)WlZb6GN zoc!~r>ua>Tynz9?aqaz;M~)o902TfIXrAXXVVU%G+t#f#(-R{{{MKxWHyoOsnWj1* zRg#TyzkU05e|vsrtrDI8z>?W9jX)-5<_N``=XbJR4+?U~QB7%n`RbL7oZPazVy2#0 zG2NqGFIdDb&?9#2z@r6CVHv-`$Yh*-zC=q~``p{RyPbN1_MAO;&XfC)FNIS6;zfo{ zkNkG+EE;V5gEBHySEjygAt9^rQZR?)H4hJp{kO*D=B;TyvJcDfsG_cu)?~sYCUx>{ zmrJ_*mP5>B;N%Q(9v?8k)!!Fg{@%3k=b&A9ksXVK3Z!M%20=(KY!b-&C`3^cDI_c+|1zZAh? zJrh$G!obs~Ps^}#ci8ub^Vzfdd^-~sR#pRi+}hExI_c!&rxzM{ z;u8|Mg@xCY^(>wBpjtZmVy$#%@x*7Z^mLKNckdXOn3hXROUK8@mz9^xUi;Qv8F)v^ zO@b_f{rmU3{-|VT*<-o!?jEaP%TER0T+?zaKV1u8+`_YcI~_KxUuYgjRdAE>jn zSM$c1%mx2JxQwytD7hoyAh2CX$Y$u{ZZfVWwfCQg@*WWz*sx(ktoz(7xfim^d#XbW zyUKlV-8=gog(;mQrE~J>dc`*)BLnW-VadAqVcp!!h&$H8s7aL?2NGX@YdWXi^YeFO^b$4G_44(nA8$I~M2|3y1<*R>bhsn>v2s*7Rx4RhZ|?0` zi_oud;)KY!>r#*CXikJWDsJx8R)5XQ5_v=@~iQ!M9zka=#=x|RD?Oz%aQvLb!=gIGxbvN$a3;g!(F)y-= zZh`&Pq@-;ne}5{)KYq;1!?OefTa8H*`>0>!#DMGHk#rG9cq2(!#%l#PGFgZFoFs`v zk;i;-X-*j#nY_!tj3cYJ#A++4q%txyZ=qb!#pd5VP*6~C?ZyrIs5Sus0g?dEwddKy z9`akm7D7dj`Q#B4REjHk{P^*O&qev_U;6s^R;<~mW&GyBg9qeOu}`0>#2whWRblVm zz5d)1?WD3`#W0A3{ zQr4XZ4b062Q==su6e;waZeyYH#Ym4xn%tjVMv8YD23@(B384<;^StDzqj`? zd`qt7r*-`N{8-_~;y%wzPxurU7qf_8e7|dA4pDW9$K=#tx(~hV{k=A_6#9|ki3iAv zVFTmCpUzB9O(_mC@*kr=b?Ow4q@=E)3$_R|rK0b31bzWic9=rH-*LDp(ed~9(^Lna zoX(UNuyb%=!qQ@E`wIZzlbV`(XNQ3-Yuz1sfRPR3gRLT&!6l*{MVC)f=q=iEm<}I4 ztX7OUjkPw^{4naY#CEMtrYC zlvze0vFTd|riAL?#f}o)xZ}u>K1i-v9PI3Z6#rtE39}}b!PMMk6b5EyO=CK~IhPZH ze=csj9PRe&?Vcqb3dfH(=coQeS}6KeyT2UkuqU4>_UB05&DQU{FtrgPM9-@Pk9p+N{$ zRZ-yzANi1ZghG$h<6`_qE}OwGul1b1`^;`!IVGci*Q-XrpwXYNMM&DjO4zY`cO_7R zHFmF>gUj#lNj4kazkkny)mR4%+j%>~pb8cCAOc)f+xQo5!L96+kzXc&#-3(%?65j$LOFmF@YSoRX6A zxpn*8?=~wI(R0hb4-FYyyhwNQoX)kC+)bB;h!G9clPncSXSDaN$UdTGH5XX0J=-yUcRO z;da(k?d)=dxV4=>5`4|F*^tZMB~COdiF_-vd)4{eEBki3Zs7)kao@asn=o=8fw8z{ zfVat4d%_s8u?o^ za5v0YVZKP}=YRyC=H~hZ1gtz~Xqc_H^rDqjdgk{!={fJPFh*dHUK^)S(B5K!!`)`ZghL@SDp0J zXU}BuNpE@yzl=TREyvPlqd)IqWMmY(_+F(zE~fX7;L*Jg9@WvCcm4g9g_ei4g7u`V>tnU78UqE>RS zjmD8s#-8i}zNv^m8m6M6;sHc%P;+aW)5Ir7s#|l}?jJEnx@PXt>J1Yn2=h-(>NuWg zXVc<_e6>^M?}=ONaQS_)=tmVhn}yPpd&?XrR!qbACgq1JT)e!8ObTDVQ~*fj;pV0y zfHNaQ6xE_{f>MOkul)wEejIB{a1Ra+Rx8(5JJDt%-|EO=(e`(;Cq&e3%HG@8_Z$Gr zg{DLX%od?96yEx4KRJ9Ffw_qg2LJ+X0d*_Q7RDUw^C8XWpY?eX~ZnKPPuhaU%R+P?t> z$zg!=U{2}a#Kf(r|423AOSZe{U`nq;V*gwpLJ+}c;4!{E+2^77DLTjCg)gA)F`sU# zS$)~$Qz^S=Ph|7GOE3CKOQ70%W@cVKzU3*J=~YchCk;$Y{95(hiyPD3;^c#phopL! zuAnOeo{c$`5_(ymtm!Qz1XJ=1Q zy@CYu%<-mwO$DZmH|*6}W8+6ZWbTHCCpc_yq)g6EkAy}=3C*0-*3r3+bp8C(r{X5R z{iBl+&7|}Pw${oxx18gE6J_n%wOJP$mN1LxRp8y%*XueuI1$?3eS9qS;K20hPmJyu zLxWd0`6g#3I+Ru~6%4_XbyYI!-)hOV>IlYeGW~f=Q{RmVxqt+9hc(=Ssx7k<-#;*d zPq6#)<|>{~jP(pa@eac(I-t8aa2aeS54=1*2~U$(SQrODCo#R6mbM&alPYdlShwJo z`J7-)W8=E3SFbuvj&cK#`2)hdXl~XRxij9^)kTH12OP7!tE;QgWs9WbCQvS}#^w5I zbKf{G;xd|DcmB@IWVlUV`rej%!thR^RM@s9H%fE>G>;mduL{eYkbaBQNpR=m$6LfL zK6-;}=)wJ`kC~5v4nSt%5)nCZ_f~rHt3F$j6##*%9Xs6oRxt}-*xX+_^PBzp_3QDR zv;qDDo=7x?g!{k{i8>6Z13^Co6_p_G8Q$LzN9|D^@GC5O21~rMIYmoBnXepJ-!;!?RW7RE8{?~kLeG)gQ@ zEG*p5w#V{G_Hh*z`b$F}eNY0O!&kq2`Le9sXH{2cr-%NPKMCqH2z3vSgmB#1WfCkF zIGt_YDrTA8lAy$l*?f+KxSCC@3^zkc+R4dDh0@-%Gr4QXU}Hgo*kchhZCr+068K4= zHd@J z;D!y`wLCs#n)KijhYlX3u!&vpKxDta&rY80`RJ1wO@Z*ypFhi=dr^0djEEGA7dwy3 zDl4zCwY44nQO#*yk%LNpql7(8NlD3_okp~XOruC%TtY&tsn={QL)_}>?e)e^-h^-~ z3jq29nKu?Z9zj_^8wAYLvFxCv7ys5yi2jxP*?ARe1t*kkWRRac*!F=L5yQx2C72VzI_)R^WB|}4rG}HU^EE^#)24g->>l;M&xpHF zA9MA_>%Lf!d*?o8nRbUr&((e2ect73Z4~44=g-S)Y8183oC)0X+3C=E6O)MK zQa}K*jxS!k2o{qWS&?jO!csX8mD0;%SdUZa8yXte+AJGleJEUrk?P4EE)&B6+fF{B zLBcjLGZVmKsSFe;*=k((KnTbs!I>1he}4AG>K0xqZ*SKDow*XEh^RsNvLD!-3gf6u z4wqh;Q5=6)?BW;{X^{PAASKTfc_Kp_NfY;o#x-48NF5wz{<%VJ(WNXN^kQx z)~(|Kt)i6O`g<0DG>7GY)6+5cT_87Ye)b+MHnw5z{&8^}TeiHhQ4K*z4qp`@QZgyq z!I4j}c>o=?Mf*y#`V_;_jbQ9j$uzP?)%-n^k8*;6g=H(qKAtyk z-gv7et;b4dlKwmDC0_{sFc5WNcmexTto!EEy93Mvr2<5u4}FNV9m2SPcFx6Z*wcrB1}a{L@*b9#_m!VU z=ncTwtOQ0TS3((439|AqBAuA=+1ByXLKQtDn*onwkB0G+nJF4AUv;kTfn-8(R6%fc z-@Y~dQEw2nf)EG)(?x?LV9CbZ<8gPx2}39ZxWoh~dWd zR|2nHBk@~KUVi7kEj-TUm4VEn07S^0YmXj1s+DWG2?MsAot^zU`<}E%FLAkJjP_vTDIw7mcJ_O0!M^_fiLUAbOvu3rJ*+?8Lltv~kg6X4Dj_4n zI%RBV_`+*1?&kEynwh!L`G3_tUG1P(FTbds{Vz9v46`!-GNaW&P~JFio-c(m7wexE%}2z|e3hW(H}z zv(2hhDD$|zy%_jkQl~Vf>vzx_tXsHZGmF>eer084t!L*~A{yPm#DJ7J3+S5X`sZ1# z_*zb>iu#9#NKR(qkz2eWHlrKd!-!XGgNQx;K|zOYZFeG)#$~^S{6a;6CfK}*WAToI zc1`Z?udJ*DF-%QMOADRnWJ24ilxCHb=EefWgyhDd#W~bq_Iler+Y=&jRO-BZ#Hj0Dz zgq>LPp%D>VgjiWwHP%Zm-qQHa_wpBqxj z%f6oiA2uxfMksgld$)QYdfc;$Ksf&FW?Hd$t<tXGfs$gplO?`+ERt9bV{D zs6Jah7=tYDuYb5SHa7M~baV(bycn>L3P+EM%Du!oQCF@G;XGn(y<_91O<44#WDKhI zp}(`oiUaR}Or!#Pu2bTg?;Fd9MecT9mPYlRj5>3;;&LC^t3^czKovFF@5Pn+)LMfL zJFZL%%_7}_uG1?6iI4$#g>K_s6=&CPjR^#aKzl%BBSEYS;XCijj0ASu_&{8m@UDQ+NGNDv#gxn|+S+WuFw{iA)6y#Y^l6i- zsw&Eehur13gIED*iOVU-aPPL}W@d(QyaTXWLF*%{jlG_-TbYN);%@EV^cJ7QWY*0x zj<&YtH~EyvZn<@9Jt;|1?$z%TeFI26+TXzY1h_duM89fFw5uoHI)??~OA~=*n;dYA z|D6`I>@5Kowe#mU|NQwgo{yI|uHevS41(wi)^3iC8=quwY}yorhagbw_`R;os`P(e z0P%zY>SfEgxVR2#TeUo4CF~VGw_U%a=wlCgh-4!xD-c4F_9%ps{$iLCYLlgp3P&6g zj?i>*#k6E!m8p%9PRh$;LA^yvVkG1-%ZP{w>N98B%&uQ2w-|Wu9y_=%oBn$4r0-oX z{Pte@&VtZkzGEl8;p2<5VHGo=4am5nWM3*MDER$zF*_E0OUe-R&ub_kdLW?)WO8h| z$&IJl_vgj~~q0p>cDT4(JdR_7_in2Rm=GdDujihu_H1vlIU+p@ARCUYkA|I9qBdS?=}4WyUv%O&N(5o zL5m`%!L(|#!NFgfOHRr8K2}SLQQ7LW(Pc?ABTw?m0^^G(&NSAQg{tU2suO)uHS8tq z?V+9OtCj>LvVD9KD$yEuclV;n?m+**Ko?`%HP$?=i(ZRDx|=a|cXzKsF=l<~QWZGI z42Qu}oB8-4WUHFb0Bk`beO_6)9QAND0!MtcqVZL%2=a1%z_MAU4rExOYMk}g+D(v7 zu|57p80|UibIi{QFv3&-CPCF^7v!WCqg!@zhI#$!tF1|hin>)jTBfeb{)zmfZ1%~t zQyX8G&E#jXAjKE5~Ca>#1+a7lz{^m-<7(%V@A8NGy%h@ zfFFE7PytXj&~t0PtX;}1*kozlaYHQg2-0I>t#KLeSWv>zx17`yJWYRg@uKAp`du)+8Xg{-@-siM$m~L zZAT$NgYAIiLHINXTo)j-MOu7zpK}B5j|HDZ^1yFMF~)8PT;Yld8BlS_t9khfrFzK& z2X5TGd#$(kJkc+(Sf7UrYSOc@(c+q>mm9CM?y~MGU%nAKAiOIqlFni^1r{#Hj?t~B z^~u`1f4}C0<-)}nG*)`8w%N05*CB*hD$1t)mnu*&llpGEP9FI}?Y2eJ24OBq>4e|r z-M8=b*ZN6Nh~9pFN`nI7*N!e?j^>psN>>!a7zC0TD!pQYTKZI3kHPnX#UvRRx;R)W z|NmBau>L5K~l6M)vQKc?NeA-_?hwb;+;41tvFhrnVBhS*0hx{0sx1g4HS+Y zlN+r_0WjNse>{x5$Z7xcpsNEDAQB-eG8 z44FxIv;DIA>^T3q*yOWMPq!~(E`>e+g}FSc=qFGTgu5)o9`g0^IUK%RU32Jwb?A$t zfEt6ZR|6Y69R3uae8%Z;nUg$q;-z|pdJqR%X;t90O8@>`b$7SxI504QIOG3o7_XJP(2xBUQw!uq_V!{a7|Iw`gl7srLnk zEG(5oewK?HCSU_&fE0;R92`g`B0@ufgL$e0i~=$00!rb6;ff%KzkTQZZ+Uq8mBo7* zI3GJmg^ckGuIn-BI|bDy*(!+UhA^T(j+#UZ4Iwcd-1`|)%8Y1gda8r zaf(qq2<{URS&KS1eE8ntNaZv{#tiPX?NzXciEIkVg4QQ42Qg=?0$JNx< ztz%@&Zn0`kW?Hgj$!N;3j3k7V|scA4%&l?zYUSy`XIc(HWVhV3_M%@)D1 zYIA*LOm$@?>&`Viv5J3s0}Ux21Poelo)6?qhGS1?u@Tx#vB`FPC4^UCyZ3*u*iLul7$ zw+Y>2kvo@1NjS*1rs-mdsHQ0suz^aPd*41*qBy9kGGIKwPmqm$Coc(rf}3XrKLlozW6a~B^J`L+Jz|BpC4!EUlUYH$q3hYXEmd*~1>ZhS%~?ur_&5Z1}_ zfWAhkU?^I*yDw}1x7@WC0DJy*=;q2zA1nz$2wNizMb9gGe<)1eEEN(G+CA{4x3|Cf zw3Zgj@9!VHK)bzo`}Vj%smnxK)rVn}2<8^nH0j+AnnLTWo` za*wzqh&d8;7ct76kovO>Z&e}+3osG7054UG`0=dCTiZ@@g}tIsh))+QT%h^rxpT2q z&N@09Zz=NCM(uxW_udtX1p;*ZI5A@)zk)ld{Mq+fkZJ9|3&q*2&`nUIq8vMR?Ciyh zfAc&;Lujy<3OI}!5*HT`61faBtD zqgVrtg`XA+GhQ(bdJl}D z!M1;4j6=C^Mg5qaBQ%Fh>M6~&!as?nbpx*A(z)3)CifQWst+GX>YcBv zF3qnal!@#^?=q$NcI=>sT5tjNy^lmdP|$J;$}rPgNMQzV?ucr)5wH%gZyDqPG2a(U zL+V~GW;)3e=Ve1KwjyakV|`-nsdo~rBAjg4)MpJ1PrmB{P7hxdVwmt_A9w8CdiyW# z*|#4HeKYA)NECkemE}<@{Q{CjeXZxr*FV^j>J=5G>rlNIrN`0}T6g#J%3n_$-aAkf zMJ?QY%GlE9qcVF(@wGVaA3wj@6O6M=l6p5P1rlOvFF~!Prc_i{4>teX zPr3=AF|eo2yKf4Apq}dN*P1m~|Ml}Hg#zD6pXHei+1jZ#^7;vC8sB~|M&0DVs_nzj z6A-$b_guPk$u}dzCSPK`EZ2bp8=;x_L_}!jf0R=CZ|Up5|0?Sk2I=|h*Q=oZ<$*qr zTn&ewU{e=zlFhe9K4Q!hSOw8fR@O7?L3FhKLyIHw@;nC)=n#l}ta0HU5|vR{z|<6i zlmUBCmQ@D_3Jh7AZ$EB2V1@u2Y#kBr$y5;{&f3=YW(_lxQS24P?^TANPi^yy8rv2J zaHm*QD;b8&8VLqjE_h=L9Y^E=8r$33#WW93D8(x%xQYTg15x>5CULvnKp711`4C(Z76HWrA02cM+`BMku?4YB7A+5;)j2%T zD`tbQ$mbZp7esXgm(H#(fGgOCs3>6m+4^j3Y*=;~U$4Cc5L^k<0AH$Oe?1k&8nl{P zTe7LwbWRF7Dw3}|ru^)Px;h^X1gaHUmlEf3QBe_K;Jz&L)-@;;_qxv>z&lS7&21fc zyz2{Yh2LKqGUA?=s%xRNh=lwFl)kc#O?-A)3y=p_^i}!GP#j5(PPa)S>!TNJrsOr1 z3n+?G%m8@;AQaV zI@@mA{Cr;}?|)$8Fm4wTMh_UP>U_Je#PLE!0pE?pzUED|^t>-4@yzYT4A$S2#E70B z;uOOEqXrp%H6bCP_7Y-2844WKc{CKK>7iUwVgLOqJ-4k@`CsvQNrG_<*?0*B(adYN zdCLViItrvE@Yb)t$8UZv4A&)ff?5D(=^^G_yG^IqdA{~kR+a}PMxlUzAyFKpq~363 z@8a#7?TT7Cg>0Cd`VMR}#RC@7S_$lb;I13MQBL0yBCx2eA@ilKEcg#v(_oTcNZ4}f zZuWX!db-~9DA|qxnRoT|C1mQIIU6k3s20Gd>nNw z;tnw+q{NfVMrd|)5Q%`MPcVOp5kkjHR*vcaC=@w5jD@yt_1~4Z@SqKLN4{>h0V10> zbMQqGE(3)rcB{3u^^M!WiQykev)6^T93fH?0V|ckKEe(Q%FX|v0LCbjdTgfpbbV=J z=YjPF9*SlsS4^U_Bae5YiZwC*g;h6;BN26ds>y&3m#nt9+sWX}hT~xXyNQ_;ww;Ej z1}|t&&@BckxjIr=!|R-3_f>gpu(j^S4|>-2E&3Z%ZVnwonp4aN2Ui$6>f#e-k_`aln^CY3GGR4G`H9*~n@lD&?Is4-%(fL%Zo z;&!esrDof?W;M7@;^D^x@P_Y`kSIsWf#2yoQA*XRnV6UiAT+(tGG!!S8RisX@q%5) z1AtP-4P!IMhD*T>cc|(iH)1<PaM^OIvo>bFrmu$fNKoKCURk8>~;9DR)}Vc)_ed+TyxE(~x8Zl*Su zE~N$EAXi(0qTL7W7BNuSi67XPj@1+Eh>MRe;n5+in`9=jb@pHq{=&9{ENo*V15a~ z5K|k%Ts`d0Q5H@ADQ?G#8dkJn=^ zN1-|&L>Y1P|BMUh@uEPMLeIdd@Cwv9kGdl*L>W>Q;(_j!653$_1FVpEt@uN5-bv0;6IoQ7K0S)t(Md6 zN*CMn7?9LbqFrs?@@nb9z)XtqNKa;tbaY{pKmr0`k5)QF`7~Eh; zOE9kmq6lFUJOyGAyMqXUV7tqDx_$v~LP@z@D~|q1vfxo(Q7F>0lf2kCphtTik%Vh$NJ~0(+Re#;oFg(M>_v*i7an=Ws4XSUU^v><=81OB8P6BL23NSvt(|n9hi|OGwY&O>qmp1*=$mBy5y%Ha zBN2XzpmX*O^rBLrfuS!hju*HN#K}qH8Bm2l8-$6)R`miwi0&j$U})R%e`K%bt;;7e ziP4eR7iTAX)*e6yC=iKk)cW=7V^9yV?7y@fOv>LkQz+$MzIjstMuSlKBsgH5;LXV| z_pAigFI`w3susxHClnOQAWBeC&^p}cbP=Phm2)xj^Wu(B8JPkf`99z7aTZK`!LTx2 z-?(obnpBE37ey&aNBcE4(Liy(M>_PAU12^!#!PI9$$AAu6cltk@>M>-q+&v&)pjf*GB0pIs$Ovy#Qn*hEA_#n%&sx#Qp&jocGSb$^{cA+$Nbu?h; z6aYPi!97-=iBbZ0VHbi%GD?(*hQ}v3$;w-~awQvM$!^Pb7ATc{1ONOwIqwweQ_O$n z{%6~n3?TPGOV>}HMP*|>HD*fLx;uUr8wiNNG`r&H!ZuMqnS~laU40oE)=BJq{W_&F z3Pg(xjjF}Cxz~cs;2bGl;&R;YU^ZoL?flr9ai(VG-Me$A4Ein-uy3$m|D55-=N_|~ zDQOS);3n0A8}|F+Z^pkA+VyhxpH%E@?#p+axy z=-C^#Z-o-##T5AB)TSr=^rvm^GqO1-J4lVRnwM6cb{Q%z8T>x*X5vY9EB|~vZd6D} z>n^eGk|lH$`o`4@{MlcfDHeUyUR&94d?94NJvm@k--==j3+W(#*mK|+0y0KA- zyp3JDdELq@2phQ^yU@#vz$vS#$&6rD9V6pqjp}h{_P=eaaZAE;7Lyxsh8H@(1ps_v zJ4TQyc_&(1TTAp}w%umlpmNE&K}15j=@uAX9O0o`cVpnEjc;eOadh+pR*npgJaGBh zq!rw6u5f>o>!IcZDmi1Bu^8qxRwVHE^P!FhH%JMPTY@zTtED`X%`0re7nnriL4&nB zJeOE8@KSG3JYEkyZGI1e3UU}|y2?d^t`T%%2wnl&B4TJcAr&@(2@eE*5;Je1gG;Br zpZGRW7Jca)(pRpdBqZI{c!!7pxfI}5S~v7Q z!wkYO5yTL*R`RgoKzt%-ZIH#eX486$#>_^%baz|qFR)=Mh@O7g0zv|`X zrS%1bGr@)!y1oq_=s3aea+lnGb-ue)X`KaXJA1Ge*zA=c`-ypbt7HU>324ozAb0#9 zVn=c7ZW$3_VPEV(+5DUwaZ+9jQ49@x9IwL-Nz#jzJ~;(Na_WaXWYa9Q$?>oENW@6bI$Tty|uclTP{HzNZ)1eO-fQHR8g+ ze!DN_#)f?0k(MSr^?T^dqnyPAne3^?IEL~Eol&Hh4L5~mu$8bIlQvwW!rMuqAk_%4 z?4}*X3=mib*UpoyD`-EnCVhHCZOq5-iaqyd;8iKSWI;q=(7i@x|MW|Ntq#`;6B76c zmP4_jQ+j2SCs!HjFX#^2?RY`r`f<^W=;-r@0hGnQ8A=RG*WBaaw+NXu?#Rx|bj0)P z`YAw*Q$4gU)LB$nLoQ->h=>`;>BKM2i#B)C zTY~MCB$&UH7)pHCat0IM?wg^1n~E<%cpY_->zB)@lYcy?=8QpBCvB1?bF(gR>=(XS z4MF=XkRgcuB@`GRKTqULqc($(Y~1N7Sy zjUUSteMoB`NV#2u#J?xlS@0zK(coIh%g=umUYMWXg&Jb+fb+rvj5U)rSXPW}xz}-c zBjSAnsi$y~3P@LW_^4-Xcu|JQsj7yS@57HbLPLLq)J+!vvY`?POt-VOtr=>cA2vBT zrlSVSW^nBtqM}!^>9K-&L9q1{-~6R{bo09WR!^>jJfVb3n3|gE{(S=VzzfuK?VyJ{ z_MHE>9aI$Rk@|^&n9L8D`6JK^JJGj+BB=+pNhK;mjW6;jWn#d6?a+P@8MzW+hoEgh zy!P2`Fb=?8A9%*hY(QFyd12&31-2ctLCd80BB4Q{0D}_yx!jQ>h7bWs0R8^`yLDep zxT7WBqef6gUN46xSNRhwo1ANcl@E@?ZKPHqK}{i{aYAa5h7L_GwqbJYG%B}5KPjwH zO{r&?L9of8aUzl(G3~`2QN#K)-Tma07|%tDJkaolF`vJuyF7V_(Pf_ zydp80DDxArU26V z1cDq8LPPWVoE92v0BUYTMXf$`=#YaSm=|^azZ*|hOnCb@J$Ig^O6Ys_dMXK`YR%`q zUB?WTxnU5k9UKmWLJ~Z1aB*6lk8;aIqjZtzMnyozZ8~oj>`n5#V%+G)v1>RDM*^;t82CMiKMr-FV(LBg-QhpTen+oA?zIw@ zP4r#Hl=~z4iPb@;Yubcy2$fem?O9^pD*WdDK1Oon0oe6=lxbkC(BXI3bOvQe99VAr zMO0AY+Hr;6O}OJ^S!Da<-?v5dgMr-w(vKjWfWLVNqideik61Qd@DGg#1e8dPzr&_m z>@HIB=-II6lqFBJ^V8l-tzUbL7gdFJF97=aV9Z9BKe?9+ymblH_7!d!orxi0cXK zj7UlV#iWJnZXCJ%c7c5AVwi9;GBRL%EYZn_eG}ZucwFyF~`RO0`0uM=fhF{IdiP73`j4XF8`DRl)6Eo@EOKqRQdf-~R| zzzo2beGPS{zy2+p_h2~Clw=kh?C-CTnYdY|wV~lEh&b4<$yqxY2S5WN*m&Yt3Se%g zV3LUu$3Q!0;FyV~#D|Sk?#N?iC%{~Ni|B>5NBxi(xyj>3tm!zI1AX`Y$XG;gggXO{ zu5IHFexmgx4m~?q5*5iBDWiyB+`PO!Q6s0(2@&gIP!VQw<9L7)ytvczLtOJ)%OEVpj)%Z~8>igpnq3BoNMy^EA*?LPUK@F5$ z(!5&rHsRT};pWN&@9DYzyxVJ@*#^gV|9vNQ(@edhu91WS=aQ3O!(NZO4vdM3a9V!) zuJJhykRd!b$ueTyux3(ec#V3c6k-7>=eura63Yqtm5!?-;Ff^|z5>lks;XHS;pPqq2sy~>lT6YcplXiD{5 zzG?^Bq#gV~CB&<$i~lEpAe0`0zClcM2q2^r8eP?)jfSMl9Ft7WD530tNrrrh``;ga z$>k)3v=Fe4F+2-Tlhg0-tmM;}h)x)Wo}*5lZ>I>AS3OO9C7vXxf$S}NU7btFNKhuI z>Q5y`g9XHXBzgyw9%>4qJkjsQON_lPF3l(Q!5ElKPGIteF3^RAu@x{BR7$}i5^NO& zBD?keZxEmuvJXcBVe?n?Zlp-q_fz0jR|_Fn?b%VB1p|hcLRt{d*CY)pNs zasuI(^rAIWIDk-<#<9}glLwa4nj(N2-Itt(Cj|WVgzhkxx5cIcGlGNbjc>d zA{vvx4UdY?Pqayp>0eD!yQy&a=6>8_J$lDj{?wxH3pcZ5=~A4!Bw|T9i-XZXM{d23 z8hlobB|rr2b|GjewNW~LTxhNqDu4#>x`1W*`T4Yc2upY%wRvITt`gtMg5cmaSVB0X zh)i^=f<|fdn8HHntiIJfV64L@2Jk zd$ojFMFa$7(Z+!@AD&dbL^Kr(sX1kXw$1-HGzf2Tw6;Eob_hZok%J4c7M_fcbVLgh z1t&ZV!7K*e4~CsYoMspvC4f*w;mER~>Oidx7A z^yvAb0}Y>JxZ5G94BgoCG@wp^8Xl&k1cLuR2f(yLT>O-y6P}3eFb|Dq z%rKXL;(x-%+ixf`)%!;6jxlsF@~BY{{-M5gdyB~6<8FmS0hfrn9pOQQ`&I25>wU#| zADuW@Fj$$l+xFl{JrKuO2$%1WPnbRh#kUkFMjk+fj*hOE!$=@hVg8?zZYVO20n{1* zSdfQ-=*7UsCLpX3_XaX6IbRD-4uZN+br5Qx z{s3Fg4m3g%(-HKBfi72=Y5Ma?Ns|2Kq?&}4`V^JBz)4!lTv8Te@8rUhA_Bml@7 zYbF94FdVY;Ry+Z!aK3&-f6{vZmW}8#_4O)XcEPt6H5sDKqT%5oa>^d@@}u&qhC@t6 zODi$?DZv8SC@p&+91W}?2{$Ho+c3yS;vvF&eKk1y#2UHFQKA|$BvFS+0dwX|Ax}9d ztj4`y3tk{L5Hkl1INMySa5SUJ@ldiuIIu$&AUp#Ad~eO$1r%Rp_!Y-RVfqia{DwGg zUj~jxDfJ!*AYl6+4$8{4)60q2^cJ~zB?Z_r@#zVk(3r$Tg=TJY+3n%SZK%0&cW$T9 zcoKrZ%F3#La4@hfg;4i-d3!hFtPUI!N%%uW7kd{he-dZjHa0#hp90OUz6ob6L==dk z_mD?TDExX1$ee>nxCU7V(DVVl3O!d92**m2Efp2ZNFC_qwG@?t&6hWuY)Bb1u z{kZ`{s+w|2$brz{@fqqnJ=5aimcmJkhSFEzaHWx92Z~1#9FQ0O=zQyI@vz+_wQ45KoS=wUZdmVq}3e( zEWAY;buUa1V7)>h9P&WzAe~L`G5IF@3QF)Rx3`n0fj_c4GMqLAbJ9#m#K3nz) zTrB`39$pT)#s-Qe_8krtR5W#iaRcK)jl-lE(7;0)dI{18^?w76cC-ZXXrsR#6~=oH z6oZE_mfNj9JmZ^J?1xY)b?MJdF{2BIT2fMyoG%GT+4p%3_NGSsO_zElB6h;R;P$ux z4TDf+FDyTzeyK$76Hs&#OZq+r4HKO1q-Z4~C@hSV`V8`Oz=iq{Z!Q=(#8_{DCb)G~ zksWOVK)Ux|F`!)if1^x*zZrdbH-mu!S0ixcrOwTvw+bki(IFgBX+ zl@xmqD8BUyJme@+a?Tp)dx4^2bRQFE3j9FJDWJwyK#4ev(>}me zq>SAG;Y7}_W8z?EXqbNb!2O`<<)6>4ShdfYMb!*jxW)f`&OqGht1&M=WZ#2T5HPJP zFJFRHVQaKz4QT@0HqP>2wXE4;KM+5TF?de~Q)+$JRaMiT*^i%y4Z;FsR7ZbM&+8E`?Fg@PrpR)-iF0s{LdU}Y>XzMLM*j@@Bzlf>bFoUWr(m@XVF zaScqJmcjT3B-TSbp)V~!xIw!^Xd#EG;szn~!#_s%;K1eBnzXGrms!QX4P7AZA~Z|z zf+aTwKn1_tgUDw4HSzk)5(-#tpAryG;Y+317>6zX4f$4fQO!(;ejmr5QnY6B}g(DI)6nR*BG^4#z9E74rrw&eL78P zy6k|SprNt7u4+#$r{MTNT2c7hWYUzuXG=|g-*xBCofuq(xczaHk6W%3D|OtOsm0wr z(aBePa*{RuL*d^j+zw(VNmWhB8mkcNs&MR#rooI_E-2JEL}l7OsOH3D(u}>y5c(xq ze5Et22H38?s6x3=w~FW$d4L-LJ=8luGB!%Z?i`dU`7_8svS^G%vor%Ja1h8SPUORN zRg65 zghyRHksZf1IBsGV*4a8N2!xDNDM-f*n44}K;8cb25;bqxo;{W#2Q1us4n_NoHfeH1 z!KXrIG^g%TesCZFr=F0rISFzgioMyLK6Fm-%t$2oZ9a^;{b%-uk z=E^!`PnRMqYyo$f*kp(e#B%|*6AWCY7BVk?Q=;l2*x=SLlbM-W=j~W%4CZ4UMuA)N z)Q9v)uC23VRqjM<=R+K&Vry|ca_3>RsvkP+cvJ`{vI=98Pd#+~|4Mrku$=Su{r{e^ zFJmohF=I{HLYC~tR<^WC8?vXUgi48F42lt1LU!$iv`I1|loV|!Lb8-3N{aM*T`}`K z|KoYS-{X56f5$P$Bz51P`}29fuj{%7i4!Gvr4ty4Fb2+FD-Jd)k-{P72rV#tdU zlZ)}CUrmY)*BFidn*79KOkz-Kl|GIZ&I1CY48A{9>Yng?IqiQO(l z)f@>{G;D;Z4r+2q3BR;WC``S)e!`b<8`HvybtCRkZT`Tx_qi}b^-27|wF4TC36@3_ zxiWf&6DCQ&bxLrNmd0$$oL9){L`9r8_%LO<7o3dr610vuHzEN3IkiWCf&Wf(TvJlS zLP5Y0JKDiuJNjiWMwn^eZCgaHo6M%BBWBd@b1~CH4Ur4sP@^Q8XI4qf00a)qemLY9 z_1beB(CCm8GU#Km5xO#}ltw}jZgKOe@s0nw_VT5$Td>)McV_Fq1l+6j?HiIlCh#{H zB^yus7gIrQAjVyE2x_7S!m)P{B+Jhws*&iR@{{YObx#8=2gsr-7 zAM6~w__mll&BoS2ioLI2zlx_UtMG?Qp0yQ~{`mOP?wg(Kv>JKp3@}~Yfho3e9^q6NfPQ5E>ci-}Z$9S}=RL@rd zUWLiMXZZH`8pkt3WF|cw8O5!0xOzU>ZW<-;1#_dkM~|*=7)er~VDlhv1B1Jbzus7DI96%Vjyn2 zhYughw6a1>4q2dj0hREQeLT+u7~nG{CRu>7ZNae#V;1-vJlH46T)Enbu%j}qqd1(}S_G~LUkmHb@L#95ToaWP~YQbyPn__QeKlF#}i{+(e zO^{{q@ov?;QC3p=84KR9$Hh63gDQ)W>@lRCWVWHyBiSicx9-WFx{x2qyzO?YnTEa5 zvpq|5qQ<@c{0leUr|6@M&d<=ZZL@lOtATX>q%eXbb@P9HSztIS_Td&3$ved*lIU_> z8@XR^VvmR!=AYYd#71Vu5ViX+Gi&=Fe2>z6|45{6p0VoXHk2#$dBh{>@tkmV%Lsz9 zOj(0F3*Xiov;Nb-g@X=W&0K4wu5)MQql;SrESC&^Y>Rwa;gC~?%Qh56`(mknaUi0N zq}+C0N7i;@xVQzk`FnbH_P)3<(lgt2d!APC?V4)J%O`K%c+%d8kF;A9vv}pV8^HBD zyi^O!U{MgcO{V5&-pa@@c)xQ>Fl{S#d(I`sE%;;*saDttR9NITx`&Q zj^el@mL6gtjewCzd0>QNFKM~c5~p&Sk8?ti@^;wDl> zfevE+PXF;!+!7jWXwszJP3zysyLV$?air6f(6~y;4V!}M|l=8g+PCIt&FsN23 zms1*G)V{+Dqx+rSt2Z3?I*RVX_*V6qxSYfQeK4b8eP6wL)tG+$Cj2DDawPTxWZqP| zlo#gBoA)zeurW+oJCwYc=_hN;-o0x@>#n}7t?l?;dzvt++L{4A^|y2n+fhVxL#o*f zR3IgL)ZCF#W=CQ<3NA(I26$)p8Gso+kOnknaoMZOoe=lS@m?& zgerQd4QNPdO)Ou${&uI`JTpCYs1$I>)Xj6ASsI(*5~-?(+w2D zZ)4g6q+a#z$#|_PY8ZPq0-+F8!qN|_O`I5h{_n!RG_p2J+vibk_0*M>{g2MiTD)E6 z(R?1!c#;v&`?xhYvCZ+OT$O=sRNdcs`EDJ^p+g@$D{zqsn)thC5rxq(^{hL@&WElsbXmxU9rJqCV<$J3YM>54LF` z;cwu;fqLKQ^&L&Led5@9Q+u>Q+!B1c25wbS5k$Olb`qMnRt(tAe;M94YQ=+1+D=6Q zBb#>%8poWdFsovLiSu7x+}f5>1wgdELy!|Nauhez>uUWY$ut!%+0$~^jBD}hrj=DoRpPDYe`e3Xg20zvQ>GJo_`U=gR?81|C7u^qM~^!j7Zi&-xmE|tvw zxTCX;sK~am-{*6^$`5B^FCt^le*~n76Yc5Gn56crjM$Hg7>ya7V)Kv)ly%bgkV#x*O0<)`u>3?| zGc%;1uu%HsSjekl!Sp*7uXt*q^wGB-$pJnh6cX7uH}Vr>^O74_y4#!x-*pXmXc9h* ziHje$_yyu%&S(!n;H-$p&12i}hCab2??22wPh&!Dv=xjNUF`|ZB&&f-(|;Bi1;uua zs&3Wuo2~KL+5d3LJr!+jZPCEWU@6r#B+yf^f{56Oo);6Pi?234J#ad{dDl}(%n)hE z-&FK@%ZU`fQKTZ`6Bm(JMMWn-Fru&(ds9xA;R&pNf2%hQ!-Mzt{oGg-*4V%{hX$6? z8%1ex=Yr47xq9(vkJFG?V0vpv2}wK=r6Ju7jt%HQ=sF8*X29IUoL4bUzGc-UqfE%-;ivG!MwKG%x1dy&8l=4r5)Vq+(r2Np zQ05V3ntm?I4&8z9OX>T0Ln4F{dYwjHt73BLF`8L2yMtu2K51!v^=p4}%~g^G$+iV$ zC6&vqBO_6AvhX(HSxW7@YuBv6(#}S9JXz*J#oOmNa)!67goJC;cD4RNPNM;SNX0Co zc@%q?3vEB@UfqN)-#A0Vy?HkouWu$o2GXOjW%MnsCw#3Kz~JKEhe<41pD^t&ur)Nv z8cJY=FcRTW^Nxe+6CHS;D{`ynCpIi==0Vk$$RSA(zhW-MPl{?R*3q3bG@KR)s;89K zs@^#m2g_Q8!0+>cQLa0a=mN3bxIy z+<8wug85WPmwH%PUS)z|AUavku4B6ZR~#xt4ky0blyPF_E5{X)*yiM|Br17$cu-f& zkQGJMbb{o&z!cCAaoN*k7IBDKD(b@R&a-FV?Od!@z(BjS@A0ok1i%s$s_ia%)j`*p z16m@Zl&x2GNA%N8I41_(le3^NI#6HZKWZR85R}gjpN|q{X5kJp>VIMFp;rZQ6$lU7sB$dTekwI zm;S(GXkF8?D<^k+;25pr@PY~tkqbbRbmwgu^k)epToBhSO&1R4_5D)3WM%1N3b4qQnapMoz zE=sJ5bsUPPg)~WS-t_SCF^Y|!6}}wDItXdQ7X>h>cj3CroM^M8<3Cka!I0Y)=#vFJ zqywDq!+sHbRj=M}!+NcQDBQ&4aNUfI92rAP^z8C1b@Xl9D};Kue){jhG?4hK4F8r2 zIXMm5b6?O^zjCjD&@gcPAsO6!FIrsI0Z5lRgC`t)AX$DuRE4Obq zmnR@%1co>Da~}dG2?@t6>ID+l1T(YF{QFgUkaNyd$`JS;U$=ID zb!reif>N#R;>8Vt5wWjRAa9=Cd_r7&_od2F1XVoDXB7KGd-UqH?_mBU(_g`3KYbzS9(3n z_WTzWruC7Z%dtny{m|-Dl_n`TUbky#c`sU_&-VB230xx%){i}J67EF&n@_zmtmyO6 z+Aoa+ix}jtJP!qoB6$Hw#Gz2eyb)febBif?q&Dg>)_oND{F*ic@~DS}n{ntQoU$Cqb&i#^;?YH;kOCgAjB7ygi1fZi zmr;K(9`y`jPT@fmdWw~aZu{e|j|hu`hQGjlQ9N%xdh}?XJMDzn>&kS^#gv4QebdLs zJOz%pfUGzfRg@+w4Jhf#`0P=lZM?U7db-Y{3s6KG_J_>*s=jSvw=heK_xUj1qFTu# z4}DDfEYoEGhKi)9{L^kM1*vmcl|=9c;+sfjehR8zP~+Vvmo013X&Ppm=5J~dmHtSo zQjDAK4a-{DzFj-@(QmVSZvQY3Hd-q;jAX<`d8 z36+2iiA5SS2#ju|M&weFd3D$ju^eY5)q}M}M7ZC@lg~upScQa0aRs(g^b;(SW4NR| zd*;dL$|NZG!kt>fhD{;L3(gYkNAPhd+gDL}4G=5B3*c=tppT?c#F+gmUV5wVVsUTZ zpuydnR&zpp>I|tXjC3gi>4=M;8g=;dq0^?N=5d3ibL!%K-B4(Swpq7e0Icz$O&(#k7Y9|zE)1U65J#!uGd#mxCh~iiKymyw^3OO zj~vEZ?#%1voJ@F8%RA&&l-Y-`E*YpO|AL2!M0F8%keR4@``h2y2d zJ4S<^V}ceOL3I!HQqncLEXdpS0q)Y`@r1i{H`;wHx%U3H#Xww{REk>BkqW2m-P2$D zn#a65awq?kPFsR|BV1OcQIEm6lJ_`volJ6>JDERd-;y&WWClToQrDN?oq|E>SZO51 zDnHG?E_T>t>;!w?&j{nnn3q_%aG@N^)~@X=3JRji9~BaB;U1}!G!7R~3u5ox3F~1m z94rNn@eu$g%Eh9$gIxzdbIwH)avEatfXaX|bm7IG2x417AF<&)yo5xPPs@hc$DL%X zXD~gRr}wI-U})|St9HU}A`61=N~g`9aJC~MhmDzz$GEr>GoV7E`iq%Xg#vbL(l@^y zD#&K6Mr&;G(a-+{3i;tn>8+;WG@^G8Z$@PKQD84!cLibFJZ_EnD^5af0pU@zFWBfJ z*#L1)WA4FLKff+mX11`fc+}Kv&&K3g^prF?1x|FPn|_|hwEF4C+apkiDo=q!JQaK3 zn8E1^swMpNl@KtCt^lP`_c5D7h7cRKFvhY|WyB^nd~HEgac1CeY#*^^Wa?n+kt3?& zkcPy$p2BbJhMi=2nOLZe@iJk|6eY}$qJ$?-h!Ld#OXB68&hsAueubo`l$W_E)G+J( z2Q)EelvB?8M5RN17nfrWc6eSy$-97qbRgX)xw~J!xBH1q4NKmD-s>}F!3qe+rUrre zy)-pj!)dIyR%N}$Du15V?)5ZX-8OrYq25JuLvL0$;ZGX4bx4a?JI>0gs~}KNZDAMH z{V4c&&H~7wo}>!ri9J8F29TZdC|u45p&d$io=+yCThxM1*+$a!4ku|1N1uIywpVbA z7`?6U`&cG3nZ}T41u~?GEMvO|TA`qMN|Wh6G9CJHKgyEC4UWy^P`N$93+i6xKtv%h zc5GLAbR7mQ{Nw5gmqkWoS~+Kl5gp;Ah_5oKnHg+S8h>Y+@=9s}G5BU!vgNjkn*f>H zqo-Wb?Y6tlOI`n$ZF>8v4ooz<0t|fz5ZQ0`KIAshbO(Sym677GvqOoT$LZ_~nyXWc zI`@Vhp<^jWH8ua~r!3VLt(P_aTiI7>`PnMe;6Z%ouJSdd?>i>7ULCYXjuB)oi>$tH z+1_(~Nq#9`ruOT0A~~R2`&H0IC(h5`G&5@^u}LOnOqO=0#~YUZp>69x{RIoUeKVjE z>Z(zrMu7ca-yG1Bn@Ro?eeGYLPd{dkEqB$VNh8+X&VM<-s;}75}&TMkcCf z(le%gr>5wvJw=d)vrC`7_%rSj58K^M%SMO~aHqa#w24@TYDWjo-}EV99En^1r*Cvx z2ta{DH%!#vUc(1z#6iCqwLko|t<6S9c$ul9%7rZEjtJF;^c7v8Tqx^H;Xy^^ds0xu^tw;cSkYE=V@?I# zLzHc}A*HPbZF146MV})K!`y&SVXj~-s z3d^`-=T76bjd|;)~`eI{ihM`^wB>*@uvB>@oq?aFe*6v`WFA_epVaO5I)D zA{RbLD=j_!Px(wnrphC-zR;``%KU2Mcw!!YY-aOIq{Y^)PQf$vx5C20@sNqfzW@wD zAK%q3n%~4)0G8`um1{YOrx;chUI9&MR)>zfz>C0a12L<8l`7o|keEn7)zs9kPB zq)@eTp6GXx_|wL9clkSA{Emlrf`%89$B9f*P)OtSZTA1+a-TxwAVjkg3Kf3z#y1+7 z1E%;@_i?~U7NB@&k{>#7%GAiZagM9s$lJcOp%K$FTKp5OhIgOJtR;l=9rO^hzbS&Z zbbmFm@Cs8vK2iHGg&ft*?S7)j2D(Nb5gg0kJo;5=2~A}7Om0p}ZOx5a*5aG2?(DT| z6J_sAzw-m?&t^n8aJQlLLY|iI)^97y1|J$o1Eu7)Fb7PYYw>Snt>M?}2yyHvb-qNy z;6To9q+~crv`}~eN9AOM>l|K^soag*Jh|?W^HcN5`acn9#<#k59$AZeW@oMCN~1=N zGVIt%e#p9RPtL~%vTk;$b)Ub!-7fgsz>)POMr^9Prr%jai5qG^Tfx@t&Zg8)r^5^c%YJadXkdzIt_xb`ssQXJfWagrbI(-|{`UQO1ZrgIJGyby{Do z&Shu^mhAb&_vLgUz`h_m%Ea!l=XT4F{0xEBm|*f7*+Ndz!2lshW1%RKU()E5Q@8gJ zF;3`?vpt#w0IWN|3XyM*9zEtU=O#;!5g(0gQ`kfX)Wm8J9Pt<2JJtFGJG-!pS7l|xhB;$8d_%#zg~laLbyGuy+#U1vm`>G{nszfiUCv7U@9f9%XwIMK$%5zJWO-( zPLZ=v!$bfVUW_aO3Lg!n?D&1=ZQu zuml8FD3t1&E8VhYn{66{q>HJ*lXUhN6__x4c5BkR^bHgWU=jn)hNHi8dz?7_EwXl+Wm|cyl&KMMrVvA6so?0EUl$+%=Nju0zQJ`s;|YQJ zugGeD&pB#{$MK1xaq2!N_zd5b-bwY{sNMwH!#2wCF)}>@SIXQ+Lj@&9X-d_v|7Za| zkpd1XKzV|7&=6HuA<>V@h!LL>gFbjpWWvTKw%Gsi<}}6g(*zsR*LsP4a{0jEI*B0 zRoU+k4M)g%rnY_nxEea=vZsA~9=0i`x|fsPgNaIftS0!y1WlER4*!kE>@hPS*FJ&~fl$wSIC^DLbsn!B&p0XKgb3*e->sJ#pCt ziT(me;U6b(-r7t*d(fjV79c>W|l(FInPR{dkp{j4k)nU49s56wt!$=Wh3UAam1Ki-Yd? zQX3vWa#wiS?b@+lcmCYOyVbOI3zqqG+t)a2Y0to*(^>cK>^-s1>h-|i?mrCDoNSo8 zH~W3)KI18Of68CBtmE7i!xi&=Cud&X`}5SKf!FlAJ+H2Ly8CnS&BY^b#kLK9SzzDA zA?)+T($9{Ma-8mzjmU9(>w3EbOMN!5i_1lxStucJIYmP?v~-}r+a5ixWgW*;an>`X zwF<@C@u2H$bR9~qS{-Z9tle{$njwi+A_+n z;E%^oe;%tRU(Bl~Z^57YiiGivp0s(;-ek}A$_Cln9pCWHv+Wn-`zT-h&}Oy#U$Mf2 zPbGgk|2)3#m98|5nX01L|K!#@cH<+I2sRYd z)flwx{qxUbG-^hdU2BUZ=~D33Z!e*@cTLt#L};f-lc`5W=BNi-#6?v^Yfk3&=dedA z@*LB2ym?jwC?(u*?4VmWZ}wKtHg`Hkzk0UDH7xNQH*lQ&%kv$}Q4s{OWnI2jAc1fv2?`_09t@ z8pxb;A8qZqN8LUi?9qo`^?qJjT6)%x6IUYNEauE5_@cv!bx+>o*_B29a<)OByfcm_ zJ#`pjk(&g`cbbOuoC?ynO7`HS=V~eKfSHu}N|x#J&I+YA`%{lKYS=Ic_N~PtW;xP9 z_&s$9&)$2jqrPCQI%08Fgcq7+TGQFgY@cq`?b~XpGAF~mzuBE}kfXmopxJ`kb3Yw@ zT;65nGt6z45^f%Qf@WJF|Hay6AhTmst3}JY_uXD^|B+{7&|5^(%g6d7id}UE?pP;c zNm@2(u&2e?NHa~2eu~CN6@{2BUBuV-zj-b0>02@i-vqtW}1AJ-V&ue%hbru%!o^3J5w z~EJX6YK0kp)ik)~?lVXcuDL==OhX*1y;PxWBi^ zs@=ntd^-=gwkoc@<=Koao#btgsSPAl@#lZrLI1qMKM^5s@lR_@l5zvTx2m?jEpzuQb2dc2S!4lNGV!<%#UkDm&tsdh*j%!@tw@&9(~ zdI#22zwegYzWuui^B1g_4<7c{-V&S3x@jRh$rpyB@wTkt zXiMn2n1Rn~6K{`@-@anXZ$mrInKim%FWdR09&OA3&}Zbz{Kx9enl{yk7{Envhc;qW z{xtQOwlX3L^@c4?7y3aX_EEtH!oIbLD3a(APGiSBdOu^rD3Xh-sK5?)9$5FATdpl? z;-ag&o!0spwlPA-5BqWkC*4dO~i!0ttxl)?|8Vvph1#6KaaSl=MC~W zKHLc>4dR1ya_iPlPXv&)TeexSO&C=S;f^vq(Z!u#o3N`HG3pn2u}OD2oTR;9#1WgR_IK}hzMhn{ zocdr=hmPCly0>k+ew(|6<&8fI>g^pBb(y@cP&QIVWo1_juscIWe+t-)F zr4N&Q3yS{aGbjso(Cb0>CIs=jE(PZi8QWwULg#%&g%2mC?KoLaai!wZPlQ#qHngMk zfm|?QN*y{Z9ZjS6c@atLY=gz=wtr|ioH(nzS|eeF=Oo3Qg`W0Kjd=>4{(lCza@X6T zk87)E+ULJ{w$WX7@SCk2|3|d+Z+J&m(Z2zzCqr#!KEG{tKsnp?Y)pJR`N#Wh|8;E_ z_Y8OZ$Q=j8mDW8fwtB<=`PF_NAN`BnmO7|X+2#KPzvQL=?Q;IXce-)QZ?K5xau70Z zks2`dl6~pZ=_9QP)dp0!6#AH4K+hwmc0=^X;7A%s(=|r@%_#d)ho4tQT3Ou@M5j>m zNLieo!%nC9W5ddx8}AZ{>@^5FzZmww_y|m#4TqS|jahXZ8At%NS^!kX#Wf|q|G=)8 z6%FOQl~wD;P+JmqX23;KROs>0Q9GWb#51Rby{CtqdrJ|VjUGLd$rO{#KRszN(Dkhg z7t?>+WiICI;M_0LhPUv>6{)w5E*uskApnz=Sxx?lk``^c%WY9?xxQ%dt6_tZmU3{a z3+*gr4#?Hfoi@z?b(&G2e$TBJDeU=siqlALNb=2_XZ?!3oG!imc_vow z*`>)evEWR5(j>;&zzPkpKGXe$FJJoOralQFS5#P0-P@1T0bj`(oZhEkF1EfWX9Qs? z?0pK+yTp5>R)?e`iVL4PouEC~Z?&F62m;eSbuisz=h^4%PI@>5!{b!dO5-QiT~*A^ zlmXwW7sU#PLt6LiMbH1yBhKJ+Md79WKPo&%CjFhTukgi{cDh&JINI;(T6yMS_<*#? zJkx>^;Uyz&x^9VgQ&cvLyR`qmLZd%-eVMs-lcT-5I$G4;P~=x@2|Uwhsz;&+vp z!hP||N&OOHygG0ZPh9?iJGYPJpBZ_?qW{dnd$0E~mMc{2^PlfB(>`Iw;jeC`v5$^w z4V)}LmDl9oN;vt(e+vo!d6l^#k9yg~I+Y%8Dd0(oXe(+P8qTc?A~eQnh+s#Au536I_N8%oR)t3DIb0+(7-yOswBO)-@#4}=Z0j_>20dj-!8U+{IFW{s3YUk7}t97F0aF$lYMich4%XS z*^Nf=v%TB5<@wgvc1e1$VPLnlYb#YE3fZ2wHG}<9sbA1QPiozz?oDSd%I#PV!7ZO3 z7I=-C6%mjfSR(0K-bjTv+JA}+9E3D#IY)>?$ZIj%p*q^d5EkTLACg-E7AU4-aVhW` zw%6A;2yQlX&bbMZ%d`IVQ?|40(?0r-t4j_TJ0<2!1|LdmY-aL&hw%Gv4fPuzDOf8&bLjevlU zD;uqKZN9zu{TuBtOLvVoV_GzeYkByo@tB-P({-vc)2^dKQ&LfRQ#6pxu0dBRX!gn9 zgA-$H5RB+yzN_4|YdAA#!Cyk7AKzoTH4MJ$#hl*E9MK3@OCI=xLd~$sEo}d*j^$WH zoIZ8xW^S$-bzqAW*hYV70r}RA8#gAk*0$2I-EZiSYpkE!^KR6$fKyS^)ctG+C%u9& z;s|C3C$*WgPwOh9QG0oy-&nkecA8eK;Ov3BI1Poq#8bH%FOy*Al+2En2=z)A!0&Ua zt+TyKOZ~rWBMaQ$t~QH~dmNgbu%}Ev-66@~ZpqfNg;t*an~L+3U%G6YnLf=&-ROy@ zC{oXL`V<``7Z7W!5m@5A)Xd(Ht5Qygs;aKOs9oQ`k6Ba*B7h5w9E|?_Cc?N1(MlFM z$UKLcl|Q#`-TJF3O$k*$j){Dk>tEGxfL+1yV^I(7$F98?nrl&NT%4b}uFujD1%dA> z6W5&|kdzUcHQ<$F{KVd`SaX)J<$~Qlal)l$cfza}2I1At=n?DQJ&BuZd05HF z>~QSSk&m)V!qx^?l&-ln(9b-sYtkM!O~1ni2@`7@jvmW4-V+`k4%FO{HO!zvA3lh# zU9Rr(xE>FUReUnn==6-Uy`De7o#mVA$Td-TKV5}`uzcQW|TVpVTJ8#xy_a>WCEva29>U``NQV?^e*?c2SnG7}3p z0BBVJLYj^h*mTof0Pq^2U~!_BJh4Kz1uE1L&ssEYe0E=FYo)erPqGT33Hzw4>!Pc3 zxc|FqZciP8+nu{ZQ9c)2ZoGavEj!#Q9J%#e5k)#WW}qo3KAUIic7gf{@kcUD8Q364 zeDSYdzGLlfn{xYhCR(d3#u$7CSXuIGMH&9PW*#2?^sHB2brq7^wE>Pi!Otb-3QM#QPL&`NBV{Xj`rc5ThxofFO!l`9VMD7ah?p-acJ&(I=|#qc3xb z8HoFBvd&o_eU$w9QbhGaBq#`M_K0LUu5^qWsB24h|N9wZzm~Pk3b(Qn%IoXL=!95p z6(vUuXG7GDf==agj8s^Q2G`B0OQ``eIeAnIt5W)Y`4*~e4u@RpAxAT1pwx+6l+rHL zGJY7+% zmhD4B92Lfgr5w3Ch2@;lcGjrlJV0JyDU_5;X?*1nry|4sktIB%460+PJ*7m)(mEU7 ztm5|IwC4ezIl^Fgd4bi}_w8*S@Tki(4Q9OtNk5FSKPd+#GYlfLdk#8r=bhf%zx&Gz ze5i)^MR60ukl6{ShvkTSE;4T=HdnM4VnZyV??rXam3)F2-jHZk{ixLarCyqy+Zv71 z4|iVeVFYM&)$={eGtPLCrpw{8O;O8LGfdGm)b{Ltvf6Smay2e=k+jsXHPDe(xP@xF z?dMn+Zn%_&wW!x~9L1m(<7FjurM5R3}sXJeo9aJh1fAhn+@in3yXlF18U3 zzqO}ZuFoNO%7f@=@;Se>qxJSBM|B2KW}fpK8N7>HWf@|QRCGb1+LSrH)YNWjTW%?N z5moqtjWWplHpw{TJpG@8px46KJBSdc2kM``jrczFJXC!LD%m}Le(seU+y5NBrlKS_ z2ZY-*&FPNcXdAG$HIHGbkLpno+=tQQF!M#r?UzXXBnS#FSd&7-`n8tg-M&;hlG+eMaedRtszy3OCtsT{BF-I~YyWlu5HT=^1?TO7_a1RTMi;;29 zHC+UGaF!cDva=ep@g-x-^~gv<$d+GiHbC41yu;GUk#x)AJeB@$za`I0+x=T|RY^}*FS@)ye z^V?|)M;o(8|PMRn+Qy=~Ley)-+Ga5&Y)t$a34z;geT9 zUea$C<)Wao72&KVYDEq2`;HlC>|{^OjdeIJ(sSY7AilNTsxK(xR-KTj4e-ex6c<&z znDd!KSyaKqRMSP*Bh&1jhm5JKI6Dt}o0ugBZ=+kXK9kTWK(A$Ga15O ztAWlwzwc#s6d_m$zWz35z~6FIghhnAOioLbRtGCmo%?>T;sDz%m>(Sxveq%-VW`Eh zL-d}YUQ{hjylX!9jjnWFQlVoR9?6aeMH+0$>M?xw_^}`6Tv)mrel!elrQWBHj_tz> z;xDzD9HEn3@vzGO{jHGJvui3 z;?k_tvPUWrs<}=P^m5S|&1^B)jGN|A;5jDQwzG4nSthHXf+eoTvt$jdo->rbLUAc- zu{rINZ}}bfd6Tn?uoL~YMUy78ne#WKQGo;2s40V7Pd@wj@G}Gati}NpGunG|kLV`@f z-ww2PXKH8b;KQp{#^#pQ&%4>NpbyVLlA5$9i2m($^3=3@+BI}zN6x*tIuJofFciyk zvuN&x@SJ$Z{jAdzf&jzeQk9CYACA^OKQP1oG&>=fRj9q{iHDO`<)I}ZvURSTt2}gK z8ZOi{BFSZN@-+K5gO^my9ye}WSni(=ckZ;0gf3CML@BCtCpk7|3s7;NIWkF6VE-jU<=lkPqwd`?-)JE&%?|~ zqu^GDldSwe{P?3{o(w)5P9}#igj11(2s^ImiQqs~ugDX;77| zvD58E$QS{u2necq5`I#~0rQ2uUoxzf4$EZMXCnrj-aWRu^ZWjSBD9IH z-XLb+bkpANa#u5vMoBP76MLmVrq){2_>yKZ1<@`)cqCT~HNNoSWSIupk-r{QE|-MR zq-D#s_b0D80*@KMuWY$DROtwdnPgy7_4Q2vsm2$TY8Tfxn$k36!l*E%`*O`kGl{*l4;^FT+Uk0oi0@_bw5p(M z?Nai=3^-Vw88afga18Dt&7=cN&Sx>A{1>9613-WX^phWGj`}jM?Fc_@GZHX2glw-# z3?C~0u_K4_+140Ecrv8YNh_Hl%Y&$V4pP7zpg`)3$jb54yZP3hnv%53Dk`*il0%u% z8i@76Ktz=5~EI4fo$0K~J$U|^E`#o|_8N4M4Z z?G%)ju0-lPpWeuP1~XQm-@NTw8vpWq)S|_Ued*IAA!JNkksFozG?Ko?d{50@W5zt5 z`;uI@7#WKofKeZA56&I~HMyKp>o(GLyC}Oyj7ynX!(Pva@_y#M=E&F$kLy0yjT?vL zj;tw}+qO#=JK4|aOQ_Wck#GlsZ8T9BX|Zgz04kA5{u&RB^m5Oi_V+(TQ}Q6%rouV- zVQ$9hpD$_NdA&z&kbEB#=mSKbLj*5m$jJw=3Avd5@b{}H54~L|T9WR!7#A6t>G~mc z*bHdc9onOm27C4%-nO7gY#+@g7f+dYrJUAE>LA!0G3-hUUSghpqE^N~+d`lVf*~E>mITI%U(^p{-a&%8==CNgCMHr) z9pK3iUy-=8Bsq$MZMxrL;x_I0rQsD1Sfh@r8-NX3NyEc;p>DTBWN1wvdpbkdZ2vyx zW+8<+QTSAhvT`L)ak+aA99T*~S;PYL0`~`6C0y=E{*A@DJJB6Q`vO0gqCU(MFXHpu zCatO1^)cC7QVm9)4u+pWix$tLMY5!4Fu3NJm-HMd(_(1Qq_ng&riu@;@LmQOWR|E0 z*ghEZ^^>){7D@GxwOiar)A6MrxYAi*jp<9521p?s2>%dfweZ-wn5wZ?Aa;`;J$i(& z>Hcv`Jo1pYs1?1_yFE~9$==VCQV*E-H);x<_amnL%b>A~fNVzDWO(-V2=nywpwJ?W ztlC#hxZGiul#LDs-NK(yDZaH}_~~r_rD{4vT&Xhnc!mf`yi`W$Lpm6femHSC46$G_ z7!Tg-&t@lox2UUOdn;|0W`o`dw%6-(H@c+EeoWwH_`smBZ@ zR6EyppBO%G)L#cX%u;Loce_?^ft|=jM2_%(&|Lgqna+9jV4e0+$O0*hpMNGYN1d5< z)bjb~$OwcP?C0BA^OX)Bx?x4AL24jdxBZkgFZr^Z2l^rwYj54ett|&!{o`<&Pif~# za(td3$KXUY{kq=o7Id=x@~7bX0*w=bzX0$nMmuk$LwL^%v3=Bj={pl@EtK!qQ5$T}WQTTKsTxquhCq3GK)rqgk7cqf`O|(eG1@ZtHEtOEXMxiM0-xY4vF{?SnrQ!?=`+cQBiWZ2Hp(IaL6(?B}7zE zk(s4YX$CXscoS2amSa90qxyN>>IS7u7W>^O$WVv&f-$qXs|n0D&L8`7a#g%Pxzs4k z+*G=eNG>mzYev) z=&0rUJmKv-c97s}j?vf_qClmnG%wcRw64!R`f84kEXNnrEQT8rt(f@N6@OZELp-RY zc%osG;a2%b3X>6+x-{!lI!sE9E-tTr8BNL?VR^FW?-2Lnb|0j9MM_Sc?RWU_iXI2? z`yx+EjP}0G+|&x1b4w*7(#f&mS&m#l4Hrfv8d;mlBeDmCUj z<~NsOH)134-`w2XPm^Gsf*r#lNU(^N`C~pXblVvgE&_bTdqH}>*gd6-rkus1Gj-}f zCK86hWDH_~4y9@sDzK!Z*7Ci}=q7_xvR=6}wj4@ofu&o8 z5iiOQP4*^=?gG&8zeC(<5@`Mb$+}P_NgQEy5Io({+EX%)^OKE@ZciH!zfe*a>M&bu zE0z`Igs((>45q=oAO|zT14yn8Qgt0*HP@wXUijsMuMnIEA^LrpEx*R3X^yAr$ZYwv z2&LybeXUB{;0rp-d-FIxSN=n1osT5+5J{UiA3?;}y-;ozC^8i(nwMshz!jOJTg*-7sJt@4T3Kqm>NS zEkvU|gRrSmok3n)&pgQs>i%9shh_^&p?!yJG?3P+76@b*Q&&T%1P2qRtilRP0SHM4 ziVLCuqi0Bt9Mt|7v&z22lmm!w=YDqMhI%2FFs1J(ce*3VilI79dc(IOLJ`Oerk@xF zP##1uxRCT1UycxM*n%YXI<5u>7&|8xhbD4_d`ZN2>l(r_Kd8`XURh`$uaL$M_p zIXOAEQS5cFzAcO+W{9$GQ=uqQdAZy4JQsQP`Q)_k(3?u)z;eM}59J}@uXRNpA^4kz zzn%!A*!Dg_)%?Q;7UX_Og7BW$misAKLZTAP^x0v?#1^OKnjL7NZf=($i z7lt0nUzbo;GpJ|J>qC~NU!j|m%Vfffj;{7or2V3|*>2c)D-MV=>dj-BT6edS8@5nc zN?kP1GuCe%p}-e0;8t6PZt^*@r+eA~80keg2fWhDCYop9yKjBFqBi6+Ym}*ik(sfz zUq7c(>96qAT#Ow|izcC(Nry+f);;qt9Cp}CLnDQ#_khENeR(XMtUaqZiOv%i!f5R9 z;rDfo&r9;eiK4T$CoplRptD=a$>;YiS-7wRvVaKSAMaQ{9$x7MT?6AI1o6ilJ~2A& zBUR8LHp+;-5FqMPTLZU_A3n>+S{(26g|~cD2q6 z2$9I_DmXXQ>NJiqx%ByMT=oa}3SY{#7l-gGW|&|IQRTcJM|%1)m6~~6EOJr-S!_An zW$wkq#T~*=!KMmr(gw387d3NsSBe5VeH59|t^)ah%I_Qmy8=l#|_ zYho0j_m_*D1m*0P?Awn}d*cJ4X{tAUKPP77N#hUZ$AK}c?AQY)yu1O}OEhF8re(OX z36NL~{$Sz>OGkb0ay$U0&>1Vrhl(2Wc>RYNHey>LOh6$|uOH_jWWb!NK9eIg&^(2{ zq;AXBOim>hIM~*DK{kjaHIPXf5$k{qICkt9W!#IkbA)l1gu0w3uWlR?Kzd$EbE7J3 z7$-vh)M4N_utNrkq#!+t&LN04?P&)m=tMFDXBk8lJsNIzeuMsQx8`(1P7>S@Z816nsE3YXs8Ip2T|^?fi^>?sjgN5;EeV{M>8ossU z`LM@5p-85Qz$6QEmUuR7c3Mka^*Yt|JnKctd(VAcL#Je&pT#~OpPo9|32bZ=T9v9r zh^g{EUXG~ke%Qz7H1A$K?DR18SM8=EdurPn%2kNjrR5>B}iX}I`iV+vV zzD4fMnNe&irFQMk$SJWm-04XzuQ`&hdC>s)n6csOhpsb@j>y$V#O+ zE5tOTdSrq%QSHc?mvO0wqMaPy8>U|tr6XL$GV$KSNJuo6gy@&U$3*9LS<=&<$#XsA zF&7{rukif< z>E?vWdmw@OH;%Z}C%=~YBU;te2Ud5?y=&@;}M)h}J69F`! z_^=s~eDcJJAuvBa{VnF!$(CT(<*gnjY)DY{C-g!mgb`i>X;a0%Jqb7w>c;7ZeoK~d zHaJ#mCq${eXjs=>9453B0h9~&Q$2Oqve03Be?0*a@|0ADJ=@ zj@l60T(1tA6TrLaFfA20O$o8eRqf|ix%Qsnv)b+OdL-!=X^QexdlarHEsD`Mi@f~4t$XDqG5vm-ZQFgIkQ;W=EF1L#g2-ey?QOD%+G$0URIxo#37y$a_i#z~)39Y(t2 zF|E&H8NxXx!EflTkH}87xin3mc1}67aKW_umgh!=%Fb}_zis&Sx+lsZ1KAqkmrlUQ zEM>Asd)>QZ()XgLxtzaNg~ct)=p}U=05ykxO@Q#PorbPB&_lQbXhRdoE7>2aW7f|B z2vw(8eO+K-8wLbK=| zYB4&>a!WZrlO}5gpWOyRv^po@>uDmj=`GtRRzWeb-ye%=k({Ygb zA)@6EF9gE-xaL})8eS*vR2$DUXFvk1H8r3SY@&oASs7@eZmo?chWA0=M7`@f-2Q|_ zO4?;pFS-Z4p*;!9b6m<}@WECJy6y6Kk;=ec z>t6KxQz!_YOg76jlur~uJaSHY_V#TUvZ7#oH;9IQGV{SW;xc#-@37i-uH1O(ZjjSX zLb^aZ&jf=+=RFoYLE3v5?;1_fpv9hto_@VACoM$;4cMGjyv~?{WleTz0V>cB=1b6Z z@G)Zn*qCytHKqZWoZzd<}%awD=x`6zwr+FW! z6Ggtsf(f~@xocSBw+WWKNbG_*w!#R5azpM(o$7&8#$>vtMTy@&R36}Ec?vw2?jxc{ z%(NHX;;I~=$7vp?48OTN-#tYXC$yhlMD_pF@TD0iH&R%K<3>p+eUSC^nBuVTqOfZ} i{a2l){L}p_wT+su+`6=lt!0V)U1!qFiD47wZTf!&>#g?y literal 0 HcmV?d00001 diff --git a/doc/doxygen/chapters/images/trace_volume_heatmap.png b/doc/doxygen/chapters/images/trace_volume_heatmap.png new file mode 100644 index 0000000000000000000000000000000000000000..d08f09eca6fb64035d1fb55289d1059602ad4880 GIT binary patch literal 5717 zcmaJ_2Ut@{*PbLaX;Hw2pMV8iT0js3DiA?_>97GsX@aa$6p4mj?}96=h_Li3=tn^5 z9cgk=vCMq2K=oVRbEEo=lR-oO9-!^S(24=Kg7JdQga8oF4!nWT1b5 z3;-4a07K`)q5oj<3eNz*0dr$ZJp@6}f3mW&IA1UT5CQ`PFqtq3Ja_>1yS_wVDFBgR z2vK%`A^{*l0D+JYq>CmehpLk39jKnTxVWmSs)>oo$&)8TLPAnfQp(HAVHj>{X&D+C zLZ@JCh=9NDM$?Vh{;^Ly!XuIUo?ne-e^~z8#P(3If9j8V>@uR$pNutgNcmCZwqqE(t_P zS%}oFTer~37Zw)g=jUfF)0C>gsy>^l3*&M|*pFYilc- zqDPM&J$Ufo{{8#S&CPf3-o1VMHk!J|#>R$*hWh&Y+S=NiH*Z!~S65b6R#a4!m6es0 zloS^i7Zw&46cps;Xe(Cn~RH!v$OMw6DORUoQ@wq?%?2X?AS3o zJ3AX28*6LpBS(%{Sy>%AbjZTOf=nixo12@NnV}3ZGBPqWG}PDEKX~wZqF<{h`bVh%2;b#2OoF`dCICd{3=ZtK^i5|B z2o+BlAB8_oQM>TD8`Svx4p{7z!)z7j{Zna;piADG2<-kpm43ZrB-V=l-Vk$SSLm;+ zbebY~I;8HI*41S&?d#e#`%>&x%aM>z{Tgh)HiO~xMdmGrhUy``1OI;QI_cKuFnAiT zJ5c?;I^xyIQPqm89tZsxql(vVDuTL^!rHTgTQBxwS<2~)wlB6`OurHCuii3jH^>J42V${l!eW8IUl7H(*;;a zHWL`Xmf?bqvZY|}1(x3UPC-!KCP}}#ci?>XM=5dW=rF$H*642S3mXRu+LfBSt(BTF z%I|HV!9>Hwza>DqFa#`x(ALU5-AD#p4lRI>)XizANGw(gIPs7`-b#CmotKmaSml#| zE6a`32r!cPwGb+hBL*-VQ~}mpZKbWJ6SDyyoQzow<#AdKXe4lxz&*h)Qy=?gYD9KL zfUy$_?`<JqtHh;;xfI2*I1x~#>w}5WxHi%xf$@EpF95Z~n&GDa z$s$b@ey>+Ig-26>oyXreW$Ar1dm~465mlYXn6kx1;Tw^kC46cud&DzKEUj#O5(HSs zpA;?N9G=wx#BNk1L8EYs^X0K4O1+gKpkr6b%h{zI4tEh~9Cc2oou{ zA9_qBb(*zf82K%ebS<_wniU#fhY~oMA;2>F7BiU@%$E72_YGB)^ilM8;-tW#l}5!Z z=KqT>>*z;sl<;Z3$m1;}B!nT51@FwPI%lGcXn|FIZs z{j&kXQGdi6rE|_6lf*qHk$E-b*hhyW6@ws@&YsXc?7q0p^enQ|e}@va;Mcs8wosnM zgxl$ge$>mTE=4Fxz}zeLTvSw|jRY9(EL_^xEDrRs?Z!s?2H)a|RE@;QvJ7Pe|sUfT8Zyp>&(ZYg#)l?@F%6&|g`E&Cz}ITM&(tAV%SN{?;wD$5rh_EU~-yim^%?Z?r5V@~x}RfD-(g)P3_!!0vU&T?Ja z1cu{0Y@$Q1lqdq5BFwhjRV=AWWHH06tTX+fE^%Oj0OrOEY#)b@5zCzUAVy$+S`lWh z*B(azc&i-_fJYc=TSIAip2BAaHZ@6tJ3OReaf-xYD)xCqrw3-qC1b;ngqsYLy)ULx zN3;WQ3Z@7ToQ3J|6Ly&$(a6e}EtAM|Of9L@vJKn?!V)p?xS>JYtbRefr6C%R<)|A% zSX#*{`%lQy@T}=3UZ3OkIqwv6PJ`Xdt~bVHd*f7~Eho(!objw%TF1@tIJnc$;PIzj z3YHee8em^xJQs}ZdkGnxRJoF~r2bhs+FSoOqij4coX9 zXxXb+KD45#v0eD$Qh7E3q+F{`e#)!fZGBpRBqOVoV+_2<*RVX*9;gU4b7ZsM{NeWY z6r59O73SWoqc#-J3;V^+-x&F=fS;96SH(_ZS7!9hy|1&Cb&VRkXK271fYqwBS9L|e z5kVNQzv{Yp&)|8pNwkzP<0(0+dB(Nd$`DJZVm`FBeMmJJH;EVxa{*fGVaHmP0zC1V zajzy=hhtUKVnZHbRw@+uI9NY;1$&4H{biuB_}X&I9u?JH)B^_vpjWvj7I8|{*Q!Rr72ucP-U`c} zvORq7&mVZ70v2n#GHA7llYbqNm9+-(TBq#u1fabW#=!klAUkRFl~}!QI~S`bo2k&< zZ1O-D!y2_{GxE|466|DKjj4oNt3SlC9&$X<3tG+Klqfl4+4;smYa@K(2EqCFB9@Vt z9j@R_-+wsiu%d~bQK-GjjN(>rxZNMc-sjXgDUF`N+7?9jJ{xGk*IX?;ahnuipWZW5 zH90-(>UbJx@xrEVA<($V<)8|6!2vF`s_ZZJ)?%9UKtb-s7z`h57w)NrJ=|HYje~b! zrsc`0J6=V% z{sz3dufXE~XYdtpoca?5&m(TRWW`b}0BX{Pwp^WpelZYVSb}!yJ(8S$71}qK*nSVMZllw$>VEHj5{HIjhpl` z&J?6QD@yzOu!5y%u`_mhoa_TkO&+hqDqFX6byVH+aiV1>RJh7nm^N|PfZvSS5!nA3 z?({B!j8H#8(It~yzVA#qXWQjmKz%I%~4xN_iSV2g)J!7zkKG=xO8oB8O?DI5Gau> zo^H9SDS`25K`?NE(Q3NqCwGRDM9;wdm2D(Hp6#4cz9gYnk{+1g9PM+b!=x7+IWvMe zJ^cEGr_l4BDe)K!O4HH>;&5mD{M(4jX)z+Gm2*Ps`23eUjhj&K^_OdaGS$wIwfJk1 z3SX`be$^6ZOq#gif^lA#(U}?wtyb%)2zo)~o(>~pVO5Q0SJ@=MxhJ`qw$yRK1$|g^ zs@o^&rTc{%D)I3$UD-^~9VseTzk}*AF%YMIw4ZS$n3bdA?-;V4&eq3JHgjq( z&RSY_tkLFM!hGf>0k--qjS_kLvxC-Hhi95V`OnWuCctB{wY`n5#!HzV=S{xX^~MMd zHUx52Xe&X3r(%M0yVT(AiMdBn#kC-Y^U&eVkgyp;%-HO(8*$6TZ?vfm7p@fEl{nB@ zo^W?of92svyj1`aSgp~u_tvEsyNA1***D(5ze=2QS3GEfD1E{!rMAyK%wp1Uka6OPXWnL?1F97Z(?Y3>Zd(^!nxhzp{sJ#v z^iLyO=jUpxHG|Dbd6G&KExu}9g1k<6e%`sB2yI($r+`D-p=zRY^rx56kWbZpJby%n zw~ne^)5yyr{E6zSMvqJG?EHUr3k8V9z-CXc-*9`8oxh-R^-FTarzP^0ZLCe~f=OL+`KBs4P zY#JY~qcIMBWQGp4sy#XLyGNj3ck^1H5jA*yX0P|+y5=B3YD0tETc-_b3@7=yW3>*J|R}G=cO06di^y&(EGx%Ugh+EZ9PwBj)V_g8}b!$YBVbi zzBTu3>g_Q#!QhCSr8SN(Y^Qne5UZRsd6~0MCD(lSa%CfYt1pR2N3J5E&&Gg(p6P)i I9f#2W2VYR^Hvj+t literal 0 HcmV?d00001 diff --git a/doc/doxygen/chapters/starpu_applications/applications_intro.doxy b/doc/doxygen/chapters/starpu_applications/applications_intro.doxy new file mode 100644 index 0000000..7692707 --- /dev/null +++ b/doc/doxygen/chapters/starpu_applications/applications_intro.doxy @@ -0,0 +1,30 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/*! \intropage{IntroApplications, --------- StarPU Applications ---------} + +\webforeword + +This part presents how to write a StarPU application from an existing application. + +Some of the applications presented in the following chapters and some others are +available in the git repository +https://gitlab.inria.fr/starpu/starpu-applications + +A full StarPU tutorial which can be run with Docker is available at +https://starpu.gitlabpages.inria.fr/tutorials/docker/ + +*/ diff --git a/doc/doxygen/chapters/starpu_applications/code/stencil5.c b/doc/doxygen/chapters/starpu_applications/code/stencil5.c new file mode 100644 index 0000000..3a3d585 --- /dev/null +++ b/doc/doxygen/chapters/starpu_applications/code/stencil5.c @@ -0,0 +1,57 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#include +#include "stencil5.h" + +//! [To be included. You should update doxygen if you see this text.] +#define _(row,col,ld) ((row)+(col)*(ld)) + +void stencil5_cpu(double *xy, double *xm1y, double *xp1y, double *xym1, double *xyp1) +{ + *xy = (*xy + *xm1y + *xp1y + *xym1 + *xyp1) / 5; +} + +int main(int argc, char **argv) +{ + int niter, n; + int x, y, loop; + + read_params(argc, argv, &n, &niter); + + double *A = calloc(n*n, sizeof(*A)); + fill(A, n, n); + + for(loop=0 ; loop +#include +#include "stencil5.h" + +//! [To be included. You should update doxygen if you see this text.] +//! [starpu_codelet. You should update doxygen if you see this text.] +#define _(row,col,ld) ((row)+(col)*(ld)) + +void stencil5_cpu(void *descr[], void *_args) +{ + (void)_args; + double *xy = (double *)STARPU_VARIABLE_GET_PTR(descr[0]); + double *xm1y = (double *)STARPU_VARIABLE_GET_PTR(descr[1]); + double *xp1y = (double *)STARPU_VARIABLE_GET_PTR(descr[2]); + double *xym1 = (double *)STARPU_VARIABLE_GET_PTR(descr[3]); + double *xyp1 = (double *)STARPU_VARIABLE_GET_PTR(descr[4]); + + *xy = (*xy + *xm1y + *xp1y + *xym1 + *xyp1) / 5; +} + +struct starpu_codelet stencil5_cl = +{ + .cpu_funcs = {stencil5_cpu}, + .nbuffers = 5, + .modes = {STARPU_RW, STARPU_R, STARPU_R, STARPU_R, STARPU_R}, + .model = &starpu_perfmodel_nop, +}; +//! [starpu_codelet. You should update doxygen if you see this text.] + +int main(int argc, char **argv) +{ + starpu_data_handle_t *data_handles; + int ret; + int niter, n; + int x, y, loop; + + ret = starpu_init(NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); + + read_params(argc, argv, &verbose, &n, &niter); + + double *A = calloc(n*n, sizeof(*A)); + fill(A, n, n); + +//! [starpu_register. You should update doxygen if you see this text.] + data_handles = malloc(n*n*sizeof(*data_handles)); + for(x = 0; x < n; x++) + { + for (y = 0; y < n; y++) + { + starpu_variable_data_register(&data_handles[_(x,y,n)], + STARPU_MAIN_RAM, + (uintptr_t)&(A[_(x,y,n)]), sizeof(double)); + } + } +//! [starpu_register. You should update doxygen if you see this text.] + + for(loop=0 ; loop +#include +#include "stencil5.h" + +//! [To be included. You should update doxygen if you see this text.] +#define _(row,col,ld) ((row)+(col)*(ld)) +void stencil5_cpu(void *descr[], void *_args); // Same as in sequential StarPU +struct starpu_codelet stencil5_cl; // Same as in sequential StarPU + +/* Returns the MPI node number where data indexes index is */ +int my_distrib(int x, int y, int nb_nodes) +{ + return ((int)(x / sqrt(nb_nodes) + (y / sqrt(nb_nodes)) * sqrt(nb_nodes))) % nb_nodes; +} + +int main(int argc, char **argv) +{ + starpu_data_handle_t *data_handles; + int niter, n; + int my_rank, size, x, y, loop; + +//! [mpi_init. You should update doxygen if you see this text.] + int ret = starpu_mpi_init_conf(&argc, &argv, 1, MPI_COMM_WORLD, NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); + starpu_mpi_comm_rank(MPI_COMM_WORLD, &my_rank); + starpu_mpi_comm_size(MPI_COMM_WORLD, &size); +//! [mpi_init. You should update doxygen if you see this text.] + + read_params(argc, argv, &n, &niter); + + double *A = calloc(n*n, sizeof(*A)); + fill(A, n, n); + + data_handles = malloc(n*n*sizeof(*data_handles)); + for(x = 0; x < n; x++) + { + for (y = 0; y < n; y++) + { +//! [mpi_register. You should update doxygen if you see this text.] + starpu_variable_data_register(&data_handles[_(x,y,n)], + STARPU_MAIN_RAM, + (uintptr_t)&(A[_(x,y,n)]), sizeof(double)); + int mpi_rank = my_distrib(x, y, size); + starpu_mpi_data_register(data_handles[_(x,y,n)], (y*n)+x, mpi_rank); +//! [mpi_register. You should update doxygen if you see this text.] + } + } + + for(loop=0 ; loop + +//! [starpu scal code To be included. You should update doxygen if you see this text.] +//! [Prototype To be included. You should update doxygen if you see this text.] +void vector_scal_cpu(void *buffers[], void *cl_arg) +{ +//! [Prototype To be included. You should update doxygen if you see this text.] +//! [Extract To be included. You should update doxygen if you see this text.] + struct starpu_vector_interface *vector = buffers[0]; + float *val = (float *)STARPU_VECTOR_GET_PTR(vector); + unsigned n = STARPU_VECTOR_GET_NX(vector); +//! [Extract To be included. You should update doxygen if you see this text.] + +//! [Unpack To be included. You should update doxygen if you see this text.] + float factor; + starpu_codelet_unpack_args(cl_arg, &factor); +//! [Unpack To be included. You should update doxygen if you see this text.] + +//! [Compute To be included. You should update doxygen if you see this text.] + unsigned i; + for (i = 0; i < n; i++) + val[i] *= factor; +//! [Compute To be included. You should update doxygen if you see this text.] +} +//! [starpu scal code To be included. You should update doxygen if you see this text.] diff --git a/doc/doxygen/chapters/starpu_applications/code/vector_scal_starpu.c b/doc/doxygen/chapters/starpu_applications/code/vector_scal_starpu.c new file mode 100644 index 0000000..dd804ff --- /dev/null +++ b/doc/doxygen/chapters/starpu_applications/code/vector_scal_starpu.c @@ -0,0 +1,93 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +//! [To be included. You should update doxygen if you see this text.] +#include + +extern void vector_scal_cpu(void *buffers[], void *_args); +extern void vector_scal_cuda(void *buffers[], void *_args); +extern void vector_scal_opencl(void *buffers[], void *_args); + +//! [Codelet To be included. You should update doxygen if you see this text.] +static struct starpu_codelet cl = +{ + .cpu_funcs = {vector_scal_cpu}, + .cuda_funcs = {vector_scal_cuda}, + .opencl_funcs = {vector_scal_opencl}, + + .nbuffers = 1, + .modes = {STARPU_RW} +}; +//! [Codelet To be included. You should update doxygen if you see this text.] + +#ifdef STARPU_USE_OPENCL +struct starpu_opencl_program programs; +#endif + +#define NX 2048 +int main(void) +{ + float *vector; + unsigned i; + +//! [init To be included. You should update doxygen if you see this text.] + int ret = starpu_init(NULL); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); +//! [init To be included. You should update doxygen if you see this text.] + +#ifdef STARPU_USE_OPENCL + starpu_opencl_load_opencl_from_file("vector_scal_opencl_kernel.cl", &programs, NULL); +#endif + +//! [alloc To be included. You should update doxygen if you see this text.] + vector = malloc(sizeof(vector[0]) * NX); + for (i = 0; i < NX; i++) + vector[i] = 1.0f; + fprintf(stderr, "BEFORE : First element was %f\n", vector[0]); +//! [alloc To be included. You should update doxygen if you see this text.] + +//! [register To be included. You should update doxygen if you see this text.] + starpu_data_handle_t vector_handle; + starpu_vector_data_register(&vector_handle, STARPU_MAIN_RAM, (uintptr_t)vector, NX, sizeof(vector[0])); +//! [register To be included. You should update doxygen if you see this text.] + +//! [task_insert To be included. You should update doxygen if you see this text.] + float factor = 3.14; + ret = starpu_task_insert(&cl, + STARPU_RW, vector_handle, + STARPU_VALUE, &factor, sizeof(factor), + 0); + STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); +//! [task_insert To be included. You should update doxygen if you see this text.] + +//! [wait To be included. You should update doxygen if you see this text.] + starpu_task_wait_for_all(); + starpu_data_unregister(vector_handle); +//! [wait To be included. You should update doxygen if you see this text.] + + fprintf(stderr, "AFTER First element is %f\n", vector[0]); + free(vector); + +#ifdef STARPU_USE_OPENCL + starpu_opencl_unload_opencl(&programs); +#endif + +//! [shutdown To be included. You should update doxygen if you see this text.] + starpu_shutdown(); +//! [shutdown To be included. You should update doxygen if you see this text.] + return 0; +} +//! [To be included. You should update doxygen if you see this text.] diff --git a/doc/doxygen/chapters/starpu_applications/stencil.doxy b/doc/doxygen/chapters/starpu_applications/stencil.doxy new file mode 100644 index 0000000..77d22af --- /dev/null +++ b/doc/doxygen/chapters/starpu_applications/stencil.doxy @@ -0,0 +1,101 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/*! \page StencilApplication A Stencil Application + +\section StencilOriginal The Original Application + +\snippet stencil5.c To be included. You should update doxygen if you see this text. + +\section StencilStarPU The StarPU Application + +The computation function must be defined through a codelet. + +\snippet stencil5_starpu.c starpu_codelet. You should update doxygen if you see this text. + +Data must be registered to StarPU. + +\snippet stencil5_starpu.c starpu_register. You should update doxygen if you see this text. + +Instead of directly calling the function, a StarPU task must be created. + +\snippet stencil5_starpu.c starpu_task. You should update doxygen if you see this text. + +And finally data must be released from StarPU. + +\snippet stencil5_starpu.c starpu_unregister. You should update doxygen if you see this text. + +The whole StarPU application looks as follows. + +\snippet stencil5_starpu.c To be included. You should update doxygen if you see this text. + +\section StencilStarPUMPI The StarPU MPI Application + +The initialisation for StarPU-MPI is as follows. + +\snippet stencil5_starpu_mpi.c mpi_init. You should update doxygen if you see this text. + +An additional call to starpu_mpi_data_register() is necessary. + +\snippet stencil5_starpu_mpi.c mpi_register. You should update doxygen if you see this text. + +And to insert a task, the function starpu_mpi_task_insert() must be used. + +\snippet stencil5_starpu_mpi.c mpi_insert. You should update doxygen if you see this text. + +The whole StarPU-MPI application looks as follows. + +\snippet stencil5_starpu_mpi.c To be included. You should update doxygen if you see this text. + +\section StencilRunning Running the application + +\verbatim +$ docker run -it registry.gitlab.inria.fr/starpu/starpu-docker/starpu:latest +\endverbatim + +If your machine has GPU devices, you can use the following command to enable the GPU devices within the docker image. + +\verbatim +$ docker run -it --gpus all registry.gitlab.inria.fr/starpu/starpu-docker/starpu:latest +\endverbatim + +From your docker image, you can then call the following commands. + +\verbatim +$ git clone https://gitlab.inria.fr/starpu/starpu-applications.git +$ cd starpu-applications/stencil5 +$ make +\endverbatim + +To run the non-StarPU application + +\verbatim +$ ./stencil5 -v +\endverbatim + +To run the sequential StarPU application + +\verbatim +$ ./stencil5_starpu -v +\endverbatim + +To run the StarPU MPI application. Setting the variable \ref STARPU_COMM_STATS to 1 will display the amount of communication between the different MPI processes. + +\verbatim +$ STARPU_COMM_STATS=1 mpirun -np 4 ./stencil5_starpu_mpi -v 4 3 +\endverbatim + +*/ diff --git a/doc/doxygen/chapters/starpu_applications/vector_scaling.doxy b/doc/doxygen/chapters/starpu_applications/vector_scaling.doxy new file mode 100644 index 0000000..cca5228 --- /dev/null +++ b/doc/doxygen/chapters/starpu_applications/vector_scaling.doxy @@ -0,0 +1,185 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2022-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +/*! \page VectorApplication A Vector Scaling Application + +\section BaseVersion Base version + +The non-StarPU version shows a basic example that we will be using to illustrate how to use StarPU. It +simply allocates a vector, and calls a scaling function over it. + +\snippet vector_scal_c.c To be included. You should update doxygen if you see this text. + +\section StarPUCversion StarPU C version + +\subsection ComputationKernels Computation Kernels + +We are going to transform here the computation function \c +vector_scal_cpu. + +\snippet vector_scal_c.c Original scal code To be included. You should update doxygen if you see this text. + +The StarPU corresponding function takes as parameters a list of DSM interfaces and a non-DSM parameter. + +\snippet vector_scal_cpu.c Prototype To be included. You should update doxygen if you see this text. + +The first DSM parameter is the vector and is available through \c +buffer[0]. StarPU provides functions to get the vector data, and +extract the pointer and size of the vector. + +\snippet vector_scal_cpu.c Extract To be included. You should update doxygen if you see this text. + +The non-DSM parameters are stored in the second argument of the +function, and need to be unpacked. + +\snippet vector_scal_cpu.c Unpack To be included. You should update doxygen if you see this text. + +It is then possible to perform the vector scaling as in the original function. + +\snippet vector_scal_cpu.c Compute To be included. You should update doxygen if you see this text. + + + + + + + + + + +
    +Original code + +StarPU code +
    +\snippet vector_scal_c_align.c Original scal code To be included. You should update doxygen if you see this text. + +\snippet vector_scal_cpu.c starpu scal code To be included. You should update doxygen if you see this text. +
    + +The GPU and OpenCL implementations can be seen in \ref FullSourceCodeVectorScal. + +\subsection MainCode Main Code + +Let's look now at the main code. + +

    sq5}3$kR(ts%3yPkpVZr1Y3p%8F<*>} z9}S(=SuF)lOSzr^VQ&I&HNT!xoVbyEFF>LL#Q_ z{cfcG&>QYhZ&9^iTaGyK8@61bkGqerklG2-m6gz~O(Gi3SLe3gvsRn=_FBLFGwb=D zaVC~(F8X$J)gFJfOaV7TuB^=sete#sIK>ttHo*^Ar$FJuPeniJumdQ;(+I!d!m zjBLr6F!21c7(AW2c?n17{N=V#3SPSA9Bljv6;Rf~;(thLO5AU!KkAPgmrFm+N>=n% zgoco@0l_99amcVKlrP&C_<5sdLp0oRRXeP1>dq(D&OPP?^{-pBi5TK5w?wZ>imG_4 z+J5VW&Ewu`aSK?|VHsj`V9hWni|Yj!X+;(hMDl}xf$(r;1GV-f(CIZ-1*_;!6n==TpJs8T4Yd9)3NW}8g{aLTiL^pVUvpY4)j#lH!GKVK$ zUo;N*7tTG~?1XO)67U09^A#b9+oZE5gexmOo_9B)V4dk9e0TN{-mju2>!~=(0UdB& znPQ6KLezmcJf-~vG1RK*tmDb|eSTr-qO$oVGH0wA8w$sa3mP16+x+x*u8XO;LN&ti z3%BX)D&Xm8dt&ZN{^gmUwvErLq6a0Is6{D;)_Z+W?Hcu$6Iyoy*)s*%GW=y65)=>R2JSUC1P9a)F&63tM3JqPd(X zHSvvzbiROjYW%u&*N0gmZR0m|9}eMI%=v3+!qH13syEtjuM@TP8#K9I^T?*3PMRHa zzu9@5rSBjoL)OV!t5~oZ__rHwS6sjp>?a24uh(t&Qbt}pTPB_6&zkynj-PH~GYu)! z>~FkPWua{7BrNL?+7)NqG(xBntUwlygKmOJc(KejFkOw_W(GfNpSyn=zO!}3Kx-f0 z6#8r7IZ#eWV{yz7aEOZI1>!8u^1-kx@(`dZjaJk(4A;aB*y2vk>#|Z==`!>lSK}_+ts;DeNc5nv{Icq;Ssio{9p^I;*;A(uVw@?aDEj9f2N#xvamRu_NvG zlV)j)>CwFRNCM9KYfNi|Ct}XffeO#qIwHr1Mf0!UyK4Mf(j{Y^fCeGpK>G7tpZ$ol zA;w2jSMWFlYLu!uDt5+emc8z-hEGCaMYGg=;*+SP3&5a$LF9yine`%N8Hu9RG&q~Fqf>%cOIT1YQbD#WBAasT@AFpgJ` z6z@xA(5g*SK&ZisW(k)}m#@k0-P@3Vg`FfEHLbV8N6Mk$x-~B$7e|I1PH7JQz5u@O zz~H4;X9<~yYT_%{hX}_5zj!#h!oZVSyeXEY_yKl>eEcfl1zj>X4%9vST!A;pa28=D zev4Pj4(Os`rKmUk!Ej{I*!5{z)$_)*r01-4>$GHq;0k9kYjl~vsXC?3t=9_|gv<7c zZ);5rwYa?O7(<0z$r$|1y?QEZ32ZU?CU|+{c$gS;R^gmDx0GKPLLKw3Iy-}nQjT5aFQ_i@u9`8A@D+++(ve|+C}-4T4(>V0Nb=(VB1`uZ%Ny7gS2 zm6Cg}hSwmk7g!am->EsoM8bSVtDE*rg^b(`Y1?ONc+G*pOkQ~L-AC6L_;68e>)bS zD3I%Jdvu>U(wBWaFJcZP%lA3YD+bVdu&2pu4ZezFp`SFL?MshS^f!(!9}b50`J0RWQN(wYO@k`er?4Ng4CisiLGnBvJ^x~_Tn(!@uBwB&Kb`oMf#$V zKX;et>(LK2$TXf?4|4eYYMbml%RX8e)3xyKGXH45rV6^&qmA+|DY_}TaPS8P?3g%+ z>RE+2o<5h-geRH;mOskU^=UWuU%II^;ov842vXg9OTh5ED`Wi;a%RnWU-6}Nev25{ zi?room90oTrLf%lGj_58u70PL;pm>#j;>3^B$_<+L4mwdjU}J{Nk?!A2(acxO-RMi5c6NC z-B~;pn_BZ?Q`CE?=;c8lLSar@Cvle4QW4{YLO$w87#Bjcr7rH03)!{dKy80k)%--A zGF~ziih2KLe@Vtp) zNEpI)DXaDRtevTP0Z9m56z~pxO(bIjq~q) z$Oc%+_;n^P-w*K~c)N>#8q8PJ``G`2Tdc%gFV#sWKO@%55+Jphk2vOw`n98UCs1M9 z!j|X$Y783k)7?{AiQ!FiA3b$QSf1Hf7J4;;yoSk%yBUi%{~6zE%}fw z%Tz|*FB0Cp)c){m;XG+*v#MV*TY*(qs1tD6BFD^A@Wf~C%eh#Q;xhS9U^YOVtHUfS zpXkjAg?YM|0K-K6mGPCHkM=m7r1I;P#=Bp!sW03gJaVQm)`5Gdn~G)XNmNhux&m92 z1+#OPRCwNy%cd-{?@=Ce>*4omT)`+d3h!uZR@Z*m#0$4#N{jlP(UVG7L^g;ocs}3b z;l%!(PR@Z>K@ZqGps38>39V&CIUQbf4qzX+58O0RUcued39#AfT+R1W-~4|&$UH6! zeOR@;stOBE3XCiJ3RgA&O4NH=zw`+^-cbkTyCfkKN3Z_I58j{L7I?61TBG6`HGW$53 z?<|su?5whn&Fiq9}6JfPaCv>b#&IDh-*b}MT@!NTnGxBm94ye?>xfBJo^K(%yw zgQUA?v}t#0(Ch7lR+o>?&-KN{K`bhln}SKi1iO90a9yOb<1f?LgiqE{a*b~C@R$+Mnnr|_E&Yu?wH{A{I zj-%rl-dsovCoF)74>?86-`0N=U7Q&5Tu?3ux@Hu;;&0WpW4~T!N0eG_@SEZ}^c~wU z;oWT&?=|o~7U(r=s{qjkMnYV_p}^Y$O!|c#XjL zW4lMhg(&Uycv<33;LHQ^&WQrRjq}h#9rj)MyH_5X{grE$V2hv50Pn=CU3dDxMa}kw;!e6$Ssz?9br&Xp3{Z;J~^}25pFJA zd>_jlVf|f?wZ!?I1yqKIxb+}Fcx8P7``IxK=4@wC`^iV03lkd0dd4DuG7gZ}78mH8}1 z!M9p}34v=R{sy`F*Pu4i!uw$9=rX%5B%L8I4fuXYA~y>3sjgm31wM*C{Ko@gb0MjxSkZ+a9s=}5QH{^GB+jP6 zZRw^?1Q9K@yl)awPOVpjM?0GXRiF0b)WOU5jee@wlyia{$elF+U_xBu>n5T*%Sg0$ zIL!)GnuzcMK*%gYW!;fphPFioseeIcchw-0JGX*wcTlQ6EAaZ@i{#N| zs0WV_AcK9$qp<$QWD-naPMaON2rs7nN1m%pdTC@N(H)b}T-#3>{CRi&8~up>1ZEF_5S9?cNi_p|9NNSfOK*IHvxy2M1y}?lUnu? zJ@_b4O)^6}-psN0FOhD?wZ1}NS{7~Ky*bFy3UC8ieSedkng&@bs130e2S8`ziYuBX zeoq1o&ir1z*}Xd-e##p*$TfzEOu!+AboB%-OG>o^nRMO(ntyF{kQufALto7t$JfQF zaY!&J!#G1HUP1`MmvvEk22;o0RQnbQBuH&Ala>38D3Jeg1{{xKk|?nu8h8i#N5O73 zA$XIlAr*yGXS?KTt3K=oeOiMHX-O2pAD=6$c=M+LWuvK#SKyPdQK)%mX~!R1FFEWa zyp0YB=x@d?2Hp=@aSU=e&P2(vh62*B@9y_=wWK2GsKXoPm9phmS zm)m0aJ|r9F-*nB*ww{5ZUIP+F@p``~ZGRXKEOd&JX6`i(vV|A4!b7W6RCOPV&jQSi z3<#_S8+&kM60|I+ZSUf=j01`!cJkn#41g%cE5HOn&bxRxT3t0Mwpic>!rl7lUx!a{{YnoF#}reHx`xtc79^00KhD z^)@=P))n36Q&aee@j^sTJ>wQ(;2-)@?$U4{{D8U-1{nbE5_^See;lGO8lVasHoZl( z*?hZFOr{V*>}r=n5KM!U+E842#$a<(e3)v9rt3b$K{7-hL(20heu^nEhZeb-1~B zX-!=D7VklSZz5%qT?Vg4jekFDw8tpB@%xQoTIOT{T=Yx+0A@RV+~8{$o#96qw-w$x zQZyWePeqflCW)r?&wkfIWZI^?Jk|*yV0nOLHuW7g(u3AB?<&MxE4!%ehr!AtCR>mz z?&{`q(7}h}k2eHUfK4pk@F71>geXux?$EdRL)yUUTjL(kU8$GH@TndeXw&Rtmtus zn!I5G$`0Nk&kEK~`P=X{z%`KywP_B!KOnb9>ER&D1_S1p%cmo1n~pel^;pa94;iC2E>kG}y#{KH8*`FqP|Csc+zxXkaRb#{}h@ z?}YzcitYZ3|D0n~3;0nsh)uuM=|=!%=8$0^FE!QwG69yM0?~{r)My+V&zG{6G#7j# z2?PA(dcBw-h%G)-Xnc5F#N_A2j*ok@k>neCl`{p@7Bo8!HoQ-VGJ(3`YOOiLi){G- z-|;yVo3x3*pNGSg&*IkHlWW-(7ewKR_jJCYvckq2XK|=F_ z=!Di;OAadF3#++g0{wwzH%t63yDy%gCX&ILV8&z$Ax&+70oB7clG}5jCA(mbm~J{a zGskH2ORLFF|8Nmut=(L$VS<036LQ^vWQv|c7inLm313y>KUWd6a}HWulFxe`K9}?v z7mgAP4VlULZ)XWsOe@=xN+PDD=u!yO1lb8?T4IO}-{E`3U1vsv;e|?(p*tG3{DiE6xspGUM?Vs!Lgz~jU6i3-S!?-s*waJK?T+N75guYaSB%5gT z5q!bmkI%c|=6uPyLIzc#q4L!6JjnM+E6nB>ihF(_r-@`UzLI;0K0KuK?5A_JHfQb= z#M*;>^0-9=qI2T4IJcfq)Ew^c1Y!`j*2u{`>kPKpyv!5-SKV92MfFAh!e;9TqLJ&e(P_H?>|I- z9QdV>>-OU!5wAW;697p)?=HLRMFeJ68k>3}a={q1si&EnS%knH>D%Q*o@Hg)Mrv+d zL}|NwpQR+L>feQVEC9C9rG?_XHFnjfL*FB?8(>`>IV;7{y;kOz)x!cXziyo+ch7x0 zj+GAiakkYpi6f5fX%R*$99<}aC@YC0~2uvEvfew%<6nNo>pPm|;YNEA!c%WbE|EOvs{-M0 zY!*NO-S>CEPr!Pzc4za?*mLZ^tq~336@>!C(0Rfb2QE;Rlc-;~&$tRaHvV&mL+$Ge zmqA>_A8w#`N3KuqRBKa6#XN(jT^$Y8uEM)o2dKZJ*)2X?^|GK0zs_po_mCb3*5Ynm z%+xJG7Rb;SZY=io+euO5A zh#E`FbOS3qNBy%i<%J%OuT5fJFb^(IJDpT6=O6tXa^SDIP=!>_Vy}GNw4M(o-^!F! zzi1*)Wvugg^Jt~UUC+Z3H z9>%|*b|;jGs$i3DN*}21l>U%X0d1A~&8*C{U{2uRp|ol~cWm$Q_~dO93%B{U{hREd z->sG0=4sdfh@XqVQf2J-i!dd$M;&?cWDWV`q0G_A-9O0?Ikk!TB8cw-bNIf|C&KR-(d&tFSm1WmS}+t`=v?lkbl!lNFQ^YJpWtWx2* zInJ)$&*&Kgxi$nJi9kyM1`j-c8eh=#h1!ZKM|)bnAk62bBdH*ac^U9$;8DcAM~X?m zSscVrPC%1F8XNK>gEWz7G4}QHFRe)zk5f26@~3WT$Dn4`F>bFSiTkmHedN#J(xC6s zh7{seZnL`Rfmx%EG-vyouc(a-mpw40LWT?439e8oEurY7m~@g3r}o>|Rhc^|U5BPN z4&x_umX59cEG;Q=E~uTqt0WdzPH%IU7v7`c`Pqe##y$)3#q}7_R6bo676*Q4wE8`? zBFAU0d|eVRx;wojzFce}?ZgapzP$3OFJxJV@?Jjg3uPoUIC^qNDmb%hh;87Tv$@s9 zP{J=vrXn#Wckv!Ymw(}J<6U|q&w4ONM zuy}*^&d442lMX&Bk{mt|&(b~|Lw2P>X}8hT`XgS+4|KTRexmeZkZuOr8PK;ijyS)} zpZc$bmm44{L*#Ja0<)(F(fuSg)Zb%CYWc}O*V-Pnptjs`1Rpr?L%(`ijUBRM_B<)t zxWosdsJNql2&mhg0Yzj3K{lsHRx{0522kE+#?Stw@Mc(JK`p5IxR1i9%N!W_%GsgL zPky7nR|pUR7Lv?EiZa?gz{Lo1fj?_2gOApm+6G7GfbqVnPvFR?-a6;@XsH5Qlwf+B zWFH~3=Hm+JI^pswLt7SYVRe#KAu}c^F5bBZTw^BC=YQA4Pez5q_8+cTB>gIKS6V)7 zf&~yRYE@rHB?r-uYy7T=;zS~U>7^^RCjJ9}3y;f=h7j*D z`BiXIT@gO;(n!qAnbK-_h*3ao1AqQp`Bwr&I-=^X@5tH>Oz+zAbv9BDWAHK-Bi1Lv zaeioBaQ(5L9U*TogpYvWSGVn~YG8i2JQX*t;EDaN1MG+OoJ)rFj6P;Bb_hmRGU=5I zM(*+V{EfC-Z7tDyZp#P0D83yZHo^%_a`$r|vLt)kM#C#rI&w!bOgjp~ur?^jI+6yT zpApSmvu^#Kqkpj?AGuq-Kk5z-RqKcdjuSJ@Sz0Z}K;|NHm$Bau`KJ4(!(doyb^Q8W zvL4r650ne_&eiq0&-i1`Ol~?`sZE|uk#Xe|q+d&e6S>(V1Qt{)e;Zed>nFSXWBMX) zfqfg~r&!@SQnUH(5a-)-ox3&mo46WW6PUPB^G2}L(eFtvSQi+%`CL~~m+0ln>uX1Q z9ZpId!qqa4*gZ(>aM0uN^y!e8UMca%1M78%e`W*?i&l0UF?0C0w0f>t(v1(3)JD04axluo3x0Ch*cZ%-J}^ zjK!DUBnr-Ya5o2Ym0jG7i9AKDEz>E}H^%lp)Ia_F>90A6%! zOEPmVd`o#scq-)a>Bk}l=HJ>+h@)=6H?~Vm$b8#B#htjUTsKg9#zv-fbx)Bq{i-ru z-!8H@-R}12CO1jsIOvmw%_bgzol-rj7@x`J?^&s@phUJ_V8g71wk8iodLdJ0dif+) zn_;?LMRH}QyvzlSg>y#iDUjCZfW|nNGtcFZLpYMY(EP2D&OuR7g9@W$riqUGxQ6kQ z=NIMUzt`2}b&b}{Pn>ahHi6OjSknabq;;Ayfs?|nf_??qS)9q+ME4Gg7+g>1Co-X` zI$?ZLe^gf%!V}VF8w^XQ8LKn~il9LNNVayd!0<>D9U~%f$&99*czmZ1^6oP3i1EO)4l-)t$TfketuhlYH0Q5&;DQ7z1!q!&bf9N60@l;+o2JV_Twj47`^meP1P# zu3|4zAhP*>s>IZhR=vsO?ZyNB=d6Uf7<3c?vz{$IEuQO{m#zSb{@r1bC7CX}hH(<; z2Fx+@XJ{RJFYrz&VJbpsNk#CZW|4( zJ3o%O+CQ8GUtWx5Kf~JnGJ|`{YGBNP?{Q`=qV)-Pp{wU4de+97vhH;<%T#BC48a;0 z>>#--VKFetb*i&BIoY8!7TY*gk_)LsIH3n-_YkC{YhNvpE2KmLbtGVrOr60}!U@)I zLXc;a@G%+<$xXJOpwUhhvOn(Kmt@nxCfN9RQS_6T5#?rrAZ;9rV&fX*xN=E-HVzB~ zqPe-CG`My#3}|@0)tz(K=C?!%(7Qwl-dQAY_4|vW9GFFV+@mViyV_xYw307ukV_E+ zrU%ZF+MCeWushByH}>Pzt_VHK$k{U!c%UL^a~&J2dqCe+6nF~GIv8s6q`<|(VplW? zenaYHlanp0eY&OR=PX*&>$A2A0oAsDEJrVV?T}GljTIW;I1A;)la+`EO{M*Dq9x|B z@zj^&rb55+WPudE3)h(z;@GbESGHF3W_UQHziw;%{{6dzmUb9OPtkL=q^BF$wfZsyxE76@YBLR?nXnUC z-bgKzk^9P^^^%Z4-F^tkveaS#55f3=AY$4vzTqif-@0=8^OWras`? zn9YWkt?(LH@pG%Ocv&oGtNtVpK(h2^$8GavS#;+|g8FZa< zUDzm%y4;Xj9!A<1dt!mC{PqK5oqzHvs}D|IF836RRhdt0g_6d`MQig2=fLq>E5jlb zgPG2D3nv2N@u)+fzx3HKxQGE!CD(k`vCZ1Y#v{TEWxv7_^&)LU>eD2S8Xj@Y=YDuy z9igSWdtspsdZ=1nJ=;C(6zB`ozTDjA+ED%2`V29~NGx;-*pE$PkgmqvM1!&G~W35ZA>uV)GNZF1;JY9(>& z($S$pHq77VGlypbO8lvIl4mTQW;8#Gqz1tlu@A>7LD$* zD!x+-lABcI6aHAxBOa!y#u=&pIvKY=z0Tpx>y~h&w$S=rc{fZyw#!Z@JE62@7WgJs zdyHRyIbWje5aG$NRMO^tU7=ej~}^-@f$tIAbp6n!fhf`54yQd;&XFT01c^I$}KxY z0@`yG_wlln-ZGyt^Z+J9++bq+E!Pw)spXmQY9h#9qrlnnm9rR>qNwjVxFsqX`VFI% z@!!>^Bke4BG8Rwx_pKrViJaFp7eI^?>Pm5LO3h_eB?H0wF`-NhTj%P|PI{_VUl(g3 zWqjQD0uW#se59FKB|ot!NdQlR)AXWn&`L+jj(3)E0}hCml_y2A^Ejl6S|AEuO1@uR z2nL?>o=7W^m4mn^3)~Jy!028yv-02;2l6auIgIl)exH7nNMyxodD2Yelr|sVogRV* zhVIYCJQHBQNsQL`DWH9;_F#sZ;h^(253~t(A8b9gYjmaFRTDq$z}uQqaOqPR>4znSK;O~)P&ziu3xvOU)e)IT;!Ci#C< z&ewW!N!gOe!TqmMC%&m!g@Wg^uvd>GM?{Dn%t4aRWQyyQ zqTb&G%BJ{IZ?0tnyX9A1V+&pUbsH~m9U)f22Lemg=4a})2~x?D2Sh&ds6x&gkN8*4 z^Qk}HT#0b<%3=c!uz+rQIkJx`+@pgEh3+37=*N_kG8q^3Tbmu!1)(1@=v#p6{XY;b zzj_NErG2F%?tJNo`=PdI^Ic=Bbk%B(*7H?vTTn@QK+WM#abiNmj0EKj%si?>_*J_f zv*je)9}F`d@^9j49%cgiS$_Kwod}JBoT6gMRXW>Hu#$2LPbvqu?(pUYc(4qLc-o0| z?5^zGNpZLFciw!z33P_y8EvZ@x_p0kl?4_LSKoHkx&##jtM{Uv#Q zbB?_95~^psRytRFW`aPbmXx1O*ls%M6kfwsOJk=`RRctlehz8OOm4Ohqq4qW_ogPN&9EkO8nn#$`~?vRO84Cl{@RVz2cqCA@g7gi}&N552HJZZIO z`e1Tr(R_YbeqE5_a`6wtOd!{t^{T9E>a_082Ifi|7A)$Z+qFFhylQ^p_ND&fXZqqV z*jeWMu8vO47*Pf4&oirrf+vHin4e*}v*$G=4c9iE>Kz1s1@}7mIb~@NE?5ajHe*KH zYqmrAy73@D>bRNDj?sMI!rUS~fSe+7+A~Cb&Pe<6!QzH=-<~X!d4lvQ{TufSU9Uxk)+<0lnRubiY%(w&N`?CiTx&ETXz(#coa2Zl7Ehmrk6gWJ zMI4KaGZdeq^nfO3jlG3}NB5|ld#+<9OTOH=q-T;A>Cd6p6*%daI-RT+=f|0uiqmWX zEt9d#kr3}#*bJOE3Erc5rpg(G*LVKX&;wJ{MzL75ads3!U&Xz=V!g1$1FPbJ7tlP* zh$Of5UpieGU{4n_I(D-}t?q)NN*M#7^Wg`U;)?D#s(Wmmc}#P12$5;M)F2~|LsSt3 z>f~RBK(VcS#&q)&+rqY6ib9>M?%0jqihu+AA647YZzrz$Jd%rqO80TddI9{BRKIiZaWokDV8p;^C~q{kC~m8+r9q1-G-X@#O8fHTh48$<4ApnI=P0JZ?6FC_b+bq9V*W>ybPECr}<~W938r+&*9*0tIX-abfs8bB7ZXeLAe@t?SMB&H@9mGHD>Zn1HpTw-8EY-9wx*ybU9n*k*{l-r;V}%!F0y?`ICGLepGll zi-(eb=Yl#6aE;an>IAkzYAfE3Rbf}~viz2Bd0h78psM?2`U7PNl%#vAb)WP!{M2Ws z=#HXrMiKIDov$+!L2l_8kL}r?j~abIC2aBbcAqPpo@(sX&)(#nz?6OX?hBLxwBts} zc)WHo0}G(wn^lz%oAI=?KwLUmuF7_KPxcBzy`4AknUjfd{XLQBNeltTRKNeg#nFIg z*JtU4*cXAzu^BTq;TtoktC${|5@DAsLdM5h7wiuKuMGHxm(Vx%KCzh;|rbIChJ> zf~yk=T1MStUMoDlcYku)HRswt6O2%jhGRpT4+Si!y98!NHWV4Ns5l^i?Zsr*dWl{t z*jBHl-S?3oM!n6`x3|Sn3MKZlWMR5epu2N^odaeP0?N1Y-I-4EZISfqGXTk}6_^&p4%YL&M}axaGXT+lTx zvELI@o$sev0C*;@gtASB62*+YcX-YY$kA(h!(U1d^Z< z^*8l?aI_n=h*S4g?YGbz`o%sZiaM|h+3uW2qEP`+@rRuLKqUDCJkyAadicDKAlkYU zzT9xV?a$f~%=M{>F&}P8U~9;Q)|=^EvibE@#CUgz&3;Lt^wDN-L`bl+DUE_xPec=7 zf2I0uNha{Dew-IgGTzzj@DHVYXP@c$Wmvf>#%JJLWS!I?Ds)%NyR&@G2#t+Pca#b) zco&kB{^f4(bUC2B5jRS@vz(z&?B8{X`PpNYAt=Vl9+)QKaN^ws)_eWNL(k*6dMtU* z-%sao4dR^(EfnQP@m&AS;XSRjZ>XQ6 zuD;Ag1~c4s%X!+yQ5#6M8LQmsnuXO@O%eh>Do0+A61;bALk@AGt!x&QiCWVWKrztT z_5y1r8t*a=DtvPKIHNCYms}0&&^ge(aCr;N#54p;UCbOUv>q9cj8b$x=X70gVQR^p5M4sw>tV@e?RhVD(J;li3CfRWfZGJFE$!6ka7*FPj&w*<0oOa6=K}lg4@E{L19O_xAl@BrodsX zh!WxVKaKb;w0xOQU*x!N@cxmLEX-5{^55nClW*1m|Hprj|1UR6V0${Gt4ila0O&}E zRxXI((<)#9V8|^fjoE)pBB#X;h>vluE5iMdg~F4PIf${fj*<^A^V(A1XmC9EQvlV; z{1DJAf~9k(1O1SAp=%USHUyZ;%=fcpXdtNdmO23!pinKNvogJ`>{_Dvk!5hUWwOt>eLlb3tF^7!dgJ=2JP(=yXG#wNVq3+`TDAe7Q>>!i;~&^BI8*!ZmLu zt`}2|V|eKXblJ}+k_R@c66ZyO6wsToMZ^%--$qhr;~@L=U-Mf2@MtR@1Kh(>WEHvf{w>;?E z@QE!JVV?BF9piwh>6`g>yFHCmZj&mo2fX5p=b`;tuo#hJw|?!v zgY!6Et`T`LiNsX}94Ny2U6Rz{>`EEr%}XA_$d%LdVw&TB_6brGSE9(EMQvm~)l}DB zK$$Axz@~n<@P4JrWG&b112jL2suD{-4HaG?Ki4(M43o&58-;6G)(gc-b)F5T2%#f@{INbV?qHjrtBQM=@U8Piw}#T@{{Hjj(B{jRp$l6E_xL zS&F2AVLB%-GUm!>fo69^kgUC^oO$SXcbA#bU&ALwu`stM<#f8o|Gl?Jh{HI;Kwm2^xhqTO2!59?0OO?o%x7-%oyD|P3!|g`5u1n-Xk2~;CL@KV$JezxZnTC94t`Q2d z2oiP+SGIiUl}Q=6--!9v>wrTl{0ki@so&V*4~$lzUYTMS`;Cj-ACBPEPXDF~Jz^U- zcOy2n(w4n(HYU(HrTV&J_^0_zKSyOnsQd}b0M3-D1E-ZuKsHm+PD4J}S`NEEAIh`M zLXFA4P;4aHb$^H3lbmgHcgk%1O}txY13b1wMIPEEnHXNsQ5)yh z@KI>UzYzqiF~M5Rd8x^QQj}Abwqlu>2tXfxyBZNMNhvzp#pdKS?VesTHZLg~e4x7$ z*eoKy*JJjpt+G9q5(J0psLI;gOC^*R;$D~}_Qj)@2p_Hs*ql8A!Iu5b+AH`!DfOEsYMgLES?P zG|5Ru!q(Z!^pH~!%z#4x{F2n9yf;Es#A8z`zV}q{7NiHDSX|M`g6TV*AdMt7rAc-{ z%Grexe6a{w<8{niJ3;^l84((wO;JPk2U5Uz34v)IMEty_`@@z}glA6;-L4(7>%}y& z4S2$9;q`}}e$a#nKdLPdt(IpZOBU#p0{aoBFnJP<=%k}Os9J{nAONHV2w3A<{+O2O z4l0ts^bJJXzx!HI=c5Z1UWvB#?NvLt#bDuIo)bg1hx6e06@(~W<(=`!(;oGXw@br5 znUiu!o0P|%KXUAA;|pB2WXknUUTtiFdv5+Qr>8>+iD{1qa2*LLUv_izvShkFmpVU|i9kcz_`gQrMI(IvuYM18OSlXB~d-6NURBd5}|M6O1+*%JfGfFjKVbZUn0WCA9wy z)pVJV(iPWMkg#>K=|$rbqZUI?3sXX+YiVstWzx ziJ%ytHRS$BqJo45`1h>?(_3{_wAIxjXOjCHzZ2)Y>#*jZ`Ic6O?&L{<@G#|45!`h_ z$&6Tw=fQ|+hK%|Ht^N*eKT^g4MUx!F@_V9QSfK&53KL$pTDoy7HJ;MMXN;|@UpNCA zXLH6d2^8w$fv@m@68E1XPFC4VZG&e-CI!E}o(mhSDVXSX^s~gkcPTz%-n)_o{`>!_ z$$BjQ)Hm&yGR573ge|3C5>;d@;~01rW3aFTx4kD2I4fi74k2b=){bP5L3gxb)s;Q# zl5>2i1p!*23FJD7)!f7jk;w`lk3!5yjnG9X4^xifR3aL5glV#kcEm6SejO~GYJ!yP zA_!`@_W*gq4n*<7B#S&7VeF{fnFf~zIx950n2$Uvp?ocoyWWLc64pa(kkYW_e!+1ri6D%_$0Uor`wQ3W#dU5z{%b&x{8eg_$)> zg`QMD4!-Z|NoT*Q2rfu8Y9ZOG+N$RT>GaY9Y?F56wFNlS(Z!}!{V6LRLf9|wkwD}B zay~oP-rOU_e?IC%YS_I<{J-Ap-`#kfk{CU?Gz9>F!AbVLlaYawv9O_oG57=UaPe@n zbG=~a=6b^|Ak4)p%+1fr#Usqc_3f!Z&Hq)w+SbV2#O?pN!iYi?4p@Qez6U!;;g{yG zjUAtO~t5Q)jq)54xiZ09)B}qt9kx>feGSUUPoJlvO(2bN_DqRSL zsKl8{kxP12?w64a!nh2EF*9erdwPE!-_PUo{r!I5zkVLdIs3NOUVH7eo@?!WE<5hD znK@&@3;@7P+iljn0FZ}|a%_?u{BwYyYX<1E4}j@?6+XS0#j2{dwY9tZ*LSu zlarGR3JTb4c1=wUm&=7DMpms#7)r=#>KSnv>6A)`+M}gK@AHO+q>TVl1Bc=Ns7|bo zQLO9V_t^RKgCHva)L!~J0Awsm!PkGKvu{dK#}O7wx&=(Lg8w9*8R@xNTI~1wqVmK< zTRMv>%>+ZyL$|<>_Vo4yN4krjF>5wp&7RGo!(SH5lg;v!vSj?zrRnh3Q<@$mWwWJl zK2mmVaq5V4B)zEiz4U!8`$dG5o-U=9m6d_i6MG8(^rV52?#&=LpSy_cnK*bA&RHpF z+rAS3sFutAA-9*SY63t7w$__=houldPviyhUHq{^O`# z>WirLVb}~M_t|ii|Hsdx4EIO`gy~mvH9*3pd}Hx;13GA4CbA=t>48;vG$1Bux`9NH z&T<4_i!-0j`)Rt{V)2ouoBHNwrCy(*QeUL9G@HpjFn;w`hC+tV(=%JQgML}ns-M(x z6_>hg#%ia&PtRf<{4;-<)y&OxjuTf`e{(F5Dx6REd8D$2WMNfrx{Kf_EC2r(K#Bgh z&BX=hQ#1PKFGHTnh*UYA`zB*#2|&7IA&)uV5c5;o+`_0+X2Pi%Do#U&d+No;Vi^S<1RPGQQcB0yNE0T2~b zp8R7os@NIju!og60D{s;gm2#oa_pG|#m=TbiN816pjMLrN~*5Y(Hu1E0OYWY3qwe^ zhy>*)D_9-@R2ARdc577rdI59>{?gbSe@T)!H1J%W7=(5DoHyz@TO88|^~|0o-pT1ih)tyK{gulg*f(|MC|?=e?ZqBBlK;iT>NLas=uA^$fVT zZI>r?Mw!WXj;nl&ReYgFW?Shfu}hDbcn2$iRnf-hl2gb*!KLo0`Uw}lfxwE4AdtTW z5Gv;)B&r*c%JNrd{imQI05RV;u`8~zq9R{Kf!#hdgFc|nuGL794H-Ct{e84~GJ=(d z%YpKZ3t43q1ne#iQ2TO$yOwZ|QO7@rrT!*)8bU>S!~Hp(kHiJ3S_dN34s zD=i$TppEq9Cvrl{22(Td&O?G&~W)aOZl2^TPAbUMK!9?aL;(!v|Lcw0$)Eo z#|a^<^CNtFI+NY}=n{9jV;hN1W!x6}U1sjH1K&po$yry!mZ`DpqkAU^zIzbD-t3wI z7hg3xFwf2EuQz5w9XQ=DC=SI2W{g-tY1{t58$`I&at*97dMlWmba9hH!3Y0ajA2vD zEz`Q38QT^Eq}Cf^`)0{4Kdf6F4-tZDmaMjUC_#z-d~{Xcmm|^5g9sMWD6aU zlaG1Ke;cBZuHnxYz7052ZZ*`M&e($>G2h-wW_I_?eVyVC*P#9^Ao86~aTzObp&4xp zSrYA_ILwGhO}O@0VhBgzX?A6%J_Xh7Ks+}0@Dy0&#YC|+LXB_(P_ipX*nHu&9N#g= zo3`L6)UZvK)t*og%2uLQ_j|nz7Eu=<)*si2vfmNhQxt=ysdG2WJzdwrd5{OVjO$jv zK>7QB@wq&e7yNympsCYC+;*6euv;5;^30ax7#GLS*#MV#-w{LbhaO06ZGsu%kS{VFIXy%=B?Ni>V!s<3U%YF3yeT<>k= z;POLl{lUoi$%CtxBWHKi_|Q_1qW6*<3thm`y({qW#}XdHs(KHis~smOM%4}7Lbige?@EcEc^ySwFnciZLXAhQhW~(NAMF z*wnARq#hSP8Cx7}OQVKig2q#Wt#Ul3^iXl&tHF4DZ|ZzMhlCJ$`Jj7mC5CXsdr2i> z+1v;6f`wbtCod=l-Rx`O;1hvu{oU3k1&cdR$DfDFS^rkhSTr@90oi;1cXD|4r)31J zBU**RdQm0`$SwaDUuzsTyeDUvP^`*c^Ii4Z@+#Q4_Fi$n6OyaWV(mX(gh^sM-qqLZ z#yEgg$|}Bdj*42}4NBY&)JDy|SqHw)3J|qoxrsRh$BK5`Xl%IQpuNXlC>4F@fI`d) zxA+MuyN1$j9~vcc+SaIeJ_K?%AaQXaEaA&@IQ26@aeev+Aq+gF`e zE}+V#qkvM!<#3-G#Cjp-d#*_I+~k$$#Tk--yq2-yU;MlTzq1Dz&5PO$#RE+JSQl0x zrpoe77JQc}>Y`fOdobRjXijW@5ZtE5zCk3;mYJK^oJqq6f?~?K%DLW}28u&IWEMTT zoeqzMy4~NOKc@fT8ds@nCGQfB3A!{7fIP zYXmXa?R^(LUW~oEr*^@ucDBg)96~Osdbbpr7bbFpPR(^Vx_q-Qbn5sgJ`OVD z0WrvBP3kmPBP#7E;d3kA0DYWB&Q8gpFW6KIr$vJJ($uKco*;9skLCR#!qvBuoc2X5 z!5BER;(Xk2xT0qRJ~x18_VX{pC34@6FJeo;^9RdQ5vb-)-OvPvt=$VSj;;K>5Gb6;`B#5$WUV-_u)6uK zWFbGVEM^@JeGmJ1eYJ&X12H{X3@5KupNM|_ROc11aq6!yv{t1E3*QuZh}~67CcNTl z1TtH@eh)N;i4yg^V4=^MLvDj)77!acNLH0H?@T8!(c11^vk(&7^%dNK38+itvR3>q zZq;OkG+`FrZ`|j~QTMyNY{VO|FB9s@nd3v)ThW>itt(QM+1%SV^3`M7qC~AfSJ!TG z%g4TAQ>NN{<3!NCjZ6(*1g-yg8DBByZcpCzcHhO(AZ5|4Ub4R`iIo;68n7IDCO#-? zA1+mI>+bv#$-1dH?81VOp!2k*kENma6Uziu_KPERHw)O@siW$w&-cbFk-ecs7~dyw z)#ldYp_ZW6C+74@d3owcg0QPslQXqA^zy?cK#oU{e^Ff`x$PnLu{QG=>(;H*1(Qkd z$t<>ML<@(?P3vnIOw9rnwh8!;juSPR>WFzowj^MY{&fE2U}rBxN`Z>r=n{q?S7=Wl zvu>iI<buudB<$dMnL0@1VP&#TOz1{Vn7ynn71TH~#k$5?5b-II>*B#pt z8RWXU+H;#db!jRuFG#5OW+f%w`23=}%XC0^*%bLZ{(&DPGk(@fl%~oK-nfK-kgOlH zL~*Jwo)k`2KC{;m`P&$LQuTbQxy^eJi-PJhG*5p-}NWH>^L&`YlJ{(g(zMYN066QRoV_ zXMxKrw8y!bKP;SrD){qV3!|Cu5a`?s;X*_!3AYSu59NqS92MoeP75I6^F`}t!Dv`37dszND;R36X%?AM(jgms?2;T zI5E8QxR=PUyhyj}*W4x=;OJ7R1MMk?59dkji^aZ1bTW~?pO}+WU^U{ujj3)7`)^u)O)49>9|7Yf;m*`^SeE11(MrmUXiM? z%?==6jhx_8x)ka2MSuKftIj=tx|>3M;vwFske)4TXkLwVHIE^J0?MauQML@j0qL~g5pO(rkx{DxQEMmM)AT?EA7RU0N;7Dn9^4_Nv*<8vS`y8TjO z*e|{}M7U)non1~rNQWOm^z|S|@_9;=@6h+^>bKJ$bii5n=PjJ;d*PPXC_eEIq-M2- ziTX?`C8Cdgca2P64BP|=`<`AoBHGZ`%D7&NK5iEJ5Q|PK!j@+rN+fe{@*TP-ibWz!bFeC7rn3E!PJyjrRob+q*WN~9vB@{emoqJ;+ZWH_31 zv!|DtHaJ@tP=iNdF85FZ_VU=$C?7b@qo;(uDUzJ5evw_}0QO@2Z)mu`_dt`?7=?jb z`qIMBa89}b4<&|o#sBC_$N#?8wBu2i#3k#c8S7-CIuIc83+Ggy=Ec%Me;hP(+x9f# zMXr0eoCH*3mSZ?nI{JgPEu105rForj#`Iqr-Tr8W6)jhC_c0mro1hyaYHddY_^x%r z+|zJ6!QQ;Q2w~FpX_=5y!$ce2SUnQk>C4Ei%N4kGmmAxDl5zIA9*7`0hG} zF4-wcahDO?rNzs$7AASho;O2hh0;cgq3E6k6=DP~NakFey|v$_y0)< zfgJ0aS~){wP|f3I8<9I0o0fMQeIrwez8+sx6kaDb==lduHT~`@N$GB!J6ra=w-q>0HZ^SUBU!Js&D(?x4W_pZ@kg zirW8!ZVmG9C$m@vMS{t}4(#o;>7m6x`p)b=<)&Xf5dhBbgIKi8OW3#@-RghX;6PlK z(NtCQV<;M;n<)IP*biUu^5wIWYqF|NEC0Swxd*AaBlzBcpN=>8z^}E|COIna00iyQ z9i#d8r@4oPNs~W^i3mx5WT>W0MaK_yh+JZaDLy;)$ zS~TAQhPl?y*G)a_n4J9MjXFR&Y07XfWe1?KOT}eatTUG~+Pf8cj8!+C8IhXp3oEK< ze-37z2SlAUg%~Z(7I(tGz@2736Z+d;#!VJCBSd1Ny)}_cAjkY$D9Q1mm2vWGF)J=3 zAw$-!tKiJOW{hDIS$$;rnW@CFUeqnrR0-=MB{mo;Fc39!^(5jdv z7h-8)I4?J9<-{c=vqG39VOwNz2QNwcHB8;?q0(=<2F2+qvcoUH7wuPE#!ZwrQ&ANs znHijf+c2FwsK{fw!>4&ZBDZh&XBfe6&X8PN1jo%^A@xOnLn&?A(ZUfZv~Wn!#_0d> z_>N~mrXO_N|ImR4kJ#*nCgi8+h)3;9ra+J+j)fVIm@^uAt_MI%3#YvuEyT>AMRP*$ zM+;mL0-3z#3nm*Z1`d{9HIBSqA>AvPwU|Ioa88~y7DG=vGOv#3@z`ah=+ zo3j8h?!+q?dj{yHD_NLBS8(>@XTju%CgTR`1P#NvEL%})59jO&%AKI!AC5!ix>_RP zcB-9%qsCkjm@R=NfXCcVqO%#VEGORwY4Gwk6RDlqfheaCTLzOM4mSnG=@NT6gcSO0 zMTzvY=ya-o^i1U>Xv5>nB(H|>*6@6#H3sf*TP=d13H~WDu*0uS@-{Y~Uoa;ZGGtL8 z2uVi69oD|TcwvA<_pG=rTXju@L6jr7#h}Fq+jmTQk!VGB$gQs5N-WZPWP{4{peUpeGEG2=3;Px~O+os%=l1EF6QX zrg};^8J8}}Dad}kh}FOoxYe#vRid+p+(mw8g6000ZV!yTY428i*FkF*i)G)4xA2)I zV$)j2VzV0wNcSA(vI=d4w0E30ddW5B^{Osh=yjjp4?yodnX#6AEcnsGJm;n0?1eF2-pC@l;j&dWU>3{KADYFA7Ea($ zW6V`6`y4pXn7Z z#h^9Y+6&D~oq=Fg0w5}UPDV?(Y27AWU#~uV5C_xAeaVg8ZHlMxFypAT(GO$Ro9w(i zg~b5k{vRhOTK?#gOX|$M?&JVH=pe8C;2Cq%lFX&mBzn4U751rLqfX#DRrutFW~}|y zBk8+$-=^{1zSCs%nsuf(&c#sWQ^!K#%yiu98BgBywBxIU+WpPL;3lBMu+JWAeE;Uf?_rL zu)sB#bPH9Ezjc;KYv=lM|{z1edHKgarlOV1aflnyU|u-=-HL4u*$ZnnL6P5ZQ&cWvTH-R*Tgrq zkkQ^>MR0YkcY6Co8Cl*GpD4z5L~ZO!CFz3e*PBJDA;QL`=>_74qR5(soD~Fe^V7$} zqSmibgS}~eq!`TWL&yh6dSy6=Gt?Q++^3jcAjuSmyLU1dATc{?VAiaOlW(w0s-sb; z0!O^CMdWreHZ7aM>#d?yVZQGazPl|2*X8U)Zs_PhI4op{RuYDYN7< z21)cs*#=q#lv}8;I%`|Qu~AM(a^BUxYG-^I9q2eE3W`g56Ct2;J`1ex7PT;1e`j_X zH*Inw!S@?QbEVCTnB^YH;veUMb@W`QovuRH#bAv#xSr}F3U?E_jwAYSww$*DI_n1D zP+m@Wa%!hv?GF4XG8g2IVu`(!7o!KyWz`4m?PW4uM_w#9?vRI3oa7@ z3FVAuHtSZAIka-cD_kg0=AUZDO#$e-G$;9g&20>faxQWe{Nk3>! zZxv_895Hb`R*x5%DH=#)u+{1;S@57Bd)^&pUH|XzPI1u;_fCH_Ky9rC1hMpB` z^ACYpkgy^jdvtvLiat|6^jsqPh|xu&KZPPHapR7~PuW3$apWe4+0ln-3WpCnBJ*+6{i?s6;qOClr?1(`AV zf@Mf<8^Wn*y;LR7*j+!5br9lHV}}*zHi@&k>J>fjK!vQHgT(aJXEUOHO+J%uPek+H zbG0`wYd~~@UFuihqSc3GJcaLwrUp*D#3r;Vdxu3rNdWVo7Lg6`jtAC1r@c6qoUP&a zCa(9CH+;d`)LAk6)@X^=cBefQn^u_N)s}qY6GUp4%P_wDqB#35arScQ-M+=Zv|>Ho z+V4z%QkR=cy=$&`;9R`8TC>`doc&S~;EfJ|Tg#=nDk^`dK>f*|%fkboP?uT(TbWDA zw^6>T@AC2Ta4Rh0Zlj+F{ZUR)c(-hp&gx`IUz{X}*f|O(_K}FhOzqA#mAc$sgVg?O z_z!wV8>rv^G-#t;EP&N1!BX5AxTeDnFJKFm&!5m6 z1(b5i>CiNlvlqUReAbE%`l0s2?5&q+gOGfx&pvT+M9i|gvp2qQ>QWPt=o!!pKVLHR>S=To52XJfJc zT65iaP=0Pjtkhr4^k%h1IYpHUMzzwKtXXZC2Vc&D@_#OVLeq?-MoH+d#a4SDC!YRjY%a9sAOC?zly%vX31cXx zlr5ku5bk^n7%!~rQiBoD>*jY7LnElMlw2w}p8czQvbBJc>b4jUnu)}0m|0PFxh9H? zzK#Lq*AfdTNpmqr{MRZ1xp<#3ULs?JOb1;?)t}J%3(aa>YC4eCqy1l_Qe?zP+TeKX zOG!@NI(}U-^j7oow}LL!0YM|5`uo+gDq1D1mrD~g=0Ukoq5Oj&W9vjO-&}Sz?prut z5rBC=Wp@E8!F6~DoiV{1&g937tw=;FK{GBgJ1@mR+LA+NCD;I+=bv6OpG@M{Q4$MS zZ}TXiSZbb`(@p7CXOVngPSrKwXA)6)BnFV^=S}ZPV2Z_~HQAjIUH%|zh2A|K;Pg{i zY$p1307o`$+v4)(_{@F%aM9Ax#wr?>2my>)o{nCg;2{$oxK{=POzX?w{EHBu20SrQ zJk~sK^iujG!i&Qc>i%KxFC9&Iiw{j-N|yUFBdBf9ItW!}3kUq`qNe;WPgzmFE4@;o zC+BQ_XenLmEo-JkAbeJGWQ<6#H zPapKL*lG|a_M`hhq1cE>BK*l4KBHmEr99XSHtz`UPhw2DV5Gug%<`T#9t2I&uB^aO zPTp9~bD3j|nTe3*!)+V86fBdbBM3>oJ=uX4fZx9_Nf&=CCDFI_!IRnQhAt$EmnUYk z42jg-?8i@{UMi%IHqGEt0&PH7{;@MXSk`_$Dwm=Il__hf^s}inCw`ULBF&7yy5|0~ z2pjU^z_}R{$VofAX@eDI^W(2Uwb6#d?JdHpMa!gQcyg>UScF-&Ql%p>%I}5ydb+8Y z+inT1?Ik^RmbqU8o@yw4q$sbo33NTW55bcfjEBrC>hZg&6-zf?n>k_)x^5L0NY-q* z8C~KA`5zo=`@%%BO4wT?w^`Y3mK z5xy=4znD>jt(V0Q#a1x14GqWc$4X(|TDTH{Iq#9xaFhOk=u>+=kt%-YT|lWei!@!N zNixyzTtFaOW)(>S%HTq-f<5oN!j^C;yEcJCuCLnhAtsBjc3lU&^!bkG%G4-*J`%GK zQqC4M{uXvq9&7=Je&k6cCuA1PF%Fa`zLlJ+iyEY*@ztClQ)|;C`pcvVkjPG$sf#EL zL?_c6gqL8H8Pt3X0@VZ}fqc4^H;j+L^@pjK?B*_vPZx>$<+Ze^=fQdVS1+fSPb5pE z>553qD`6LQj%SRJEJ*a{E4*pm!d_@g@l7)cWV`xL&GFIVb90H*9`9Ib6+cv#>vuc{ z%4@tuCDQT6?pZO5keFxJp!DjtsUb0!8&8fh!d{CPOVd@5nD>Pe!yXt>P;OX*2WBz^ zoghyj-`^*4bHue3iPQydki`PX;y*~t{x$0a0Ua`79wbs*KGj`-i4Sxwm-15w{9A;j z{opb0Y(mVa;Dli4bzI~_$(Ak>J4JHJ7rnlWVB!;rUn`*8lr7V=toXyQvor^TqQ5A6 zi(D{~#5t6R-a;!0$eov2xl+qC4CX zr87^w947g?Dc|Y9I}7TZ8;<|va_*oWT#E40YBFozG=x}P7<1okXfNPBni?l`&`t7~ zf)O8yt#V0h6&J}%CN`KoGh|-8-aqIz5_0E$nMAVeo_Ij|N?)3=kYHkV(-|uiHtMC1 zhOQdDR7hqeKYOfKHcWW&=Hnm@?uKXDVC>Lm)3n4$yk4oY#pm#4Q1hh;Z-|x7@TMhf zAebzMfq05hl(#_cjSd)F38PJZ-MAplPONQ9^jy1wmk0e=Tja41T#o1u(&Tnh2f zA6ikSCW?s{BL!PQ&%t{9wAnq0*zUF1?|VG4W%H@5a90;H%T}|6 z^ZSYAYeQUhJmgr^v%pmOxTpGBHhl43F`4xx>$9MEV-xOSC{?X_?h&qCvvK6WtHF$j zVl$4}#pNXSZu^~)E)|>%!DN?RD=u~DYhTt~do&{Pyet?B>JE6da|Wn}OU(&}-#9jmlDgb*G9>Fj*KW5|v_- zYkhhP7Fgee{S7_~8cW3C6S-xlb!j!i#@gS0(Qb zJg)d&?GvbIL|VDyz$lC!or4}o0+hNrnLcuL-c;lc3+nr#qo`kA*CE_OB4z6SdQVwQ za4dzmGgubaCqMmyp$;f#31DHZ2?@p;0%5OCef7cG81=S{aGZ#HxvRsI#(Q zGG*WNCH-~}g;}h-_XE+qG7~Ok^BXfcC3>j=j7x=z?SWV61V=+3QHf-=s(6bya5=2Y zyl94-n?r=S=~B3i&=(cna7MG*TZVqH6>%2J0J^2FZKCjrQ&4Kzokyt1({Eq?x}ur$ z=XF1UabB;xsJdIR@Jun2Gry}h9_)SsFC1Nhd0@3GrJS=JANHDLZ68P?uPwy58K!9- zk^8#T9L|_Y6%(b)ZpCKZ0Po&Ek{GHMVy=utzM5KWDPhGkV6}?JeI}!#6cbMIv{lyn zS#si?GWM_Jmrvi zCGZvoJSC_v9+$Zp;6U_jgLhaw#IA{_05pZJjGKJ5W-55tkT#w91^w;Zp9!iWL{>jWWq?=7j@Q@#GW_7V>b7u z{b{B+Oe#aa{@DkDA%kr5z`+$_2=p6tXwlGu{$2@hhBd)p>oe!J zj~tA*et#>%B86t%Y1!fu{;qP$x5eOL=YFwI2Kv9ennDJH@0)zhbH-IBPy( zB5D`(2_yT)1R?Ljgs<&{Hq*#y zV&}tmtsG@!n;0=7cHV3eH2MjC`$JIA(nO|G$|MtaCHh~xDUgCavD53KptvmG6`!L= zNEnxcXP^$p3$cbvJB>=!Q8v6zFkc{O+=nmRDn=HO*f!=c!D8i54*LQcvGX5sE63TC z(yOS@eF40K=OL3LpD)wc6eP5yNFq5?h*iKCSIs;cymM>B6AJ{ckHlV$lprz^nwJ6N zhZq>$Q-bCpq1j3BpwjHwzc z0kNxpB5G5#eHwv*PDrvLuF0?Czg`SOei=-g^lgQA5#l>EU~u#96T~Ol&iM5x_7P&| zLWpUK_;um2VVagx$dDLs;wWL{kW4ZVOI;|%@AC7+bG`5!N@0O|m)dK2L_fHE?#Lhp4DS8Xj@54-hT%a|$ zG9LzQ$7=0uWJ~>e4x-Pkr-=5rL=x`Sd#!~D8@a%(pvt$E!wm09S_{-<5oyo{f@692 zPvZl)i}%Ex92h3D6v-??RwFEXY=)|gDwbaY?{bf#6<(Yj|32yzuY{A)CXD14NenN- zBI(Mo%B@hH*|@^W;nCP+%ZpQTUDm(@CXHI}?HKbZbdoiANMhS1!XwdFm^Yu? zd-F1_^B}QPK28wQ0!n23hH4?9sjnpWAqRKVSx@X+7!}ERDKgEoJ_|M;ZGh=3VXg$$ zZX>gLcMlX>n2p)IhN&L47zHGByBiKqsa@-)t>HTf3EgTYI>qP%<8Fg^L9cA+OJHMF{#0EJ#c{8!v5qxZR9HNf_qEIg%tGn6NNg*J_7a6= zZwA#-`Xds1v7m+V*b=yq*m9>^ITOw?H0mK@dhuI{B(M;x5ffU-tTK-QeqGH_T10!# zB1j<)jITZlGiEj8Lya4y9pw}l74#ix$KO_CkMYGgkSEb8iGq;RldGg1!|+6*%Lbfj z@ z<<#E$9dy9kW+LY6oA8d+1^`|OJ7*<}0}=q?ML{zRNDx~c;5HygYF49$?Q(KSy@IH z5elPiw;_^kZWO#gQ0yasXmwH9efA8>6@T&2d8y z2-25hFm|D*>gsJOFJ91=2cAc%n0 zg3}dcr;xpzLvO?NrL2g<^I0yFMZGR-bkqL~J%F8Vc!50waMy{#Two@KUd2~t|N3bR zo%q1iVkzs^5e-(P$Snzd+}LI%RP0__3?gb<7|yGgHFXq3=tH@_eMbmthSv$tK}MbS zAWgMVx5k>|If}3Z}GqvA|%i&<3)idkI9; zb2(Y?4$&v$F^D=@&kvJXLxSQrT9j=<#luU5Ai}VfV=FX+=JAcK1!#o&xk0E%9R*_1 zJ&LHuNLopw(rgtw`*WC`YyX zqR<&E_=x8wF=3t8Glfn+n5P;q&f~`~WYLShtFV(?MQ(+%jnr-M1)O|^CH}*BsYtHe z8;$JEnJYb&meXso1g=6Rl&a9x|7+B2YYMBI5cFy)4~x8x?!XD#R)C~fslF7v&YZ)_ zZjQ%KSJ8lq3Q@>~jdvid(&W2qQuyFp?%ikr+#+bqLpb1AGhL_*NFIbNhdq|f9ceP*%^%ibCSzG_HewYl{vVAR6- z1~^k2MU039 zU%t(SstIjM=PE;F>KF}r|Ihq9h%tjw65$HVPVb)ZhJ$(wVK!B+)6O356s!(rXrSu7 z-fj<~Fm4F6(khd1Q=8k`fv6~O`Jwjw`o!U-#aYyFP=+U3y1^_W|uulolx`6XfmDmsJFmO*H zL2nB(6$RHR;gKHTq^8}6!Q0rNfV|DZ6RbqwnPyYKuuV)#FV*%XBnXk|O_XlGC*YCS#&S8OcIy{nR zMUYv5?{WhL`Ah0|$i6E4-#_aA7OdDh6naPV>HKG@%&fbpH1O7pt*vp6pUjHfBejd# zr=-6%vs4c`jr>#pebvq_QoJ22jpjTL7N_>lJEo1_%j}|+W%@qF4ra}Nx?n#psore& zyl?)WDdi=SdzrszQQIpV=ZNdDhllU1v7lugJGAA;RA0h3iz;m2wnlu&{^C60HspT`=Kp-4`{H_wui&%oK+x5Y zr_0WG%?2aE`T(In%=bH3J-R(4(ws9Yo*pU3ajwF`I=41e5I^uEPmPUNbqOB07ry4FmGBz^9Hh_(R>N~u^S0+Q_jAYBvH<51+RG$K&ZGk{A{PcHdY6uWy>KK}^_J+E! zEZXQr&Le{R*B+y^z75L$*uww8a{idwleea> UL06Vc$;xfF?6l6^>>2&P07P6-DgXcg literal 0 HcmV?d00001 diff --git a/doc/doxygen/chapters/images/tasks_size_overhead_py_noret_pickle.png b/doc/doxygen/chapters/images/tasks_size_overhead_py_noret_pickle.png new file mode 100644 index 0000000000000000000000000000000000000000..2dbe6fef86b9a1bac929d8c4937d5717aa49dc37 GIT binary patch literal 94794 zcmce7WmFt%&}QQv+#Q03Ai*WL1ef3%+}&M*JHg%EgFC_9U4ugi?lQ2$z4zPQKl^tN zGiNy6Jw0#LTlLga(qRg65-5lShyVbfNPZDj0szQU0D$O$hXo(0NtYo200e4t5fKG* zLqh=g5|)@~st~V%J9u`*cN8RuAsnJ05=T$vmw={8J5ydFc$ldmW)<2D`5`=%O)ORn z0#`ec!cq(iDIx$8$LTu*aJbIk@^ImgvyTb!rfD@4O z*LYz}N?v0te^drF=C#LEO}$3@_oq$utwUe;ztR(3439r{j>mO;un_?wx<|ryufea5 zXW`0R0oO|@_ontiJ1*r&-7y8SUQGW{MC2TR7|g9*8^B`1jwE`J+=}7r&CD5$7wZr>z3*S zFM#LtoE19t)uSX41qI#@@=y?#7E&b#!O0(;87iq8hYP^{4dEq(wglXh!M=xt7=mp1 z21g5seIw<9FBAM|2FT`MXTWCu=4*zm`o8*`5f3)TA5A78SrB@|9|k_41`R(?=u<34 z!Mh_taWul8!9--7LxhL{!ntO&2qS(DxtR*+%7`jL&I*{-M*q}ft z-ht>T(6jn5>QD zZAIEeH-FQ{U_@1mwj9vuL)j{_Lvpik=h=Wf^N;Da?MePkc)otmdoTG6DHPC6p^ol_ zU>`^vh$#GFi$Fi}JowA|GPLn9-9Ghg7Xz3JtP5ECjC+4kD)ac}0yHI9bEYG%qfbZV zM<}T>3uLQ5z7HSmcI;ALs$71(^khj($-!D_Ykl8)i;_m+Wnf#107`g2?taL4Sm6sBiRd@==02MZ}N! zq?;e(KR#1<%DSjVGnCVxoa{do6ar7~0Y zsFL6M9&ArVHW8=IBqEctO!C!V&k{S+d}>~NK>VOdSZY`VSg@{8N`o1L-GkSIeF@D8 zFQUq#POGV_U9>x_ofrQs4)6@b4M;?ircBAK$-1Opr+9s0XT?rwOq)n4O$|z$Pl@_@ zWF_yy@)XRyvNqwB{@{Lvic^Ghgj4+S<>y>OWRq26hHkGWW=oO&cwl09Mgr6Yb z%D!idB=&xlz#cRk#F2=L51Tb99-S~VRXYGp1r(~|rwQ*ZTP_BQGE=B@zgH&O)xGUPebH4Hc2FC4TSB_Hqz|B|lZ@ik58 zum5t6C#NQdA*VdnIIdgVTqA2E*Y6FAN{G@YoD9qpff7y^!R_Vi`O~}K}hCz}KP2z?ow1{{?wG zQU`x0bs=>=^{no>Zo){~h{pKNc4njAbXGaqa6&#^G5sOk(;?B(bvN2}%c^u0(HdoQ zrB2pa>!oq{L~d37FWKMF%?qt8Azky0)l3N9wkzz*rUUuE4lDCYWU6Gecyfe>FOa9VQQYLVL8tV8!;tBKn`VwFC%W)Gu~sHcC36sEVj0ZuM`eP6(H6>t01m zMa|`o`XKJ0h>gd7_WW#u<0NUU$|IRcx#+xtP5u z*y+LCpz%l_OG``JOPALCofMgr8c$Ah_G0zws8()csY94wd5Fc8X#c05DYq*!!{zh3 zW3YG8O~vDKnv15VKHOYwsmL7uEZtwkKLmUS4pqggKifcR9eOd@YW#ZMLYKnBVm|TEqtC-k0YX!16hM=P}Lh}H-Ctm_dQMqoClB9p18b|ZPeGfd}p`!TeSXa zWj4v$x?ZPWH@%u%YCZWK2@Vq1@#?sapShq`;kR=cu{!N#fjW-w-0y5rtBFa7E(qP3 z2|m$#pO#Y@3+>GSO4W`mC{W*Q<>|jS|Aa(K~1IcdUxP9^%SRe17*Gfnb@Bf^DQg3Gc_bM3yzyJS-1rJV$&tXNepvWNnumx+;diGDe z4gOtaO5@=jx+}%Vdy44+wbs#lB0vjBf!GC>WJOJEN#XtzpuS%)v>De^JqzfD&JolR z6r}@{03&$vpP@tWIq-t~e$2L{@c&)w77&QGu@RER&76m+V8t9JH|hfAhh`8t`xBbB z=d&Wb;?xle%YVvXnmViebQ&AyOqr2)A!-zW;cttb>xyyAvaw=Nul8N zfSZktusZ0^09?YRev#?4FhUNSyQjni#y!uB=lPk-Sx?vMdV~P@q$`?VrEqBK#!@W= z&06mJY)CoD($9U&fV*%g)dv6XItU7S4BRdP|&yh9U!YGcFx{|JV0`cpU(L9puBRBl=%ew4@B zee++r@QUP_a{_r2hPZz(Q-4J!6I!rUJNjse@}G*GQXweU3i0G2Vm}0)V|||V!_Z)& z$b&xGlZ8{5lKkJ7UDHE}H|JA1oU-J~g*&plQ<8?8O~P~|6#b|DY!fn-L^@csMO`Xn zx(&mQj=FzOAkX5%M%vv4%oh@MliQNsX+S4q9e;NCHN4UhgsnYY#<_L;o-y0h6wyG&Q)FE{hf0WC8l4CaU0)#n>=w})=yRb z{BwOQ-|2rp#`;ZvYa@m%4~v4{!WREu30cp-ZEbWh=N77ZYmNSAs1UG7cRtZ*6e93K z8yl$qDzV6PYj+Of9eO{&3|Z4Sol+*UlQFQw^8-FV+O}Gh=HHvm@)Oyg9iS;vqa!Sy zh0z9w$^lD|Iq=;G6jH8rovERX&5<8|WSY&h(23Tsnvda7tjLPeo!<73H~wd2tbsvK z-BhH4dG*tg{xdoB>ngD#=nGT|X=>;R6F*rTys-%rRyOQ`o*cHCpO8c36kmB)_f6FP zm2Vx;GKOWuFB21kR}>c2P!RQi#6o`uqyphVXkmY(ghLG|M*Jv734Zn;ZWku9!o{&P-o;XRH+(U@VH5zG>Va_&otygAiErzju$(0SAX1T*{+3S?Szrx%%SZlr$Vo*T${e>v zhyo~D!BHq6hYuQ0N6q2`LgUAza7if1|M1G`1H;;+t}@{u2H5?Ig=R3b-;qhf$8B9| z1T>)r{Bz%ZEC2YflS+YTR0@5b;f_5Pg!Pw}qXo)e;T8=4-MUr?oKsWIFoizXP6nh> zALEf9u_iRDqjaS0SdR}n%Vhs;6Z;aqBMxO?4Td5E)(07W&F4R=#Y})YWZ}q|4Q<>V z9<7h7T({9bw>+Zy0&l-xml1?EjSDOwl4o7qc3#h1l&Kg4g@H!3+6$#WV*XV>2%EwK zts6x+G@`xMJGz;bSUB9VJl*);lQK-*B(DFt;}Kkq-9O3=)=%*Pkp5qXO$3GA&BaHg zkG~j}GDiLTg(GYAW>1KQX+^^9^iWzc4&jlwSL*u3t$L`df#xxD8`I7dIV&yV^t`07 z>rWegA95`n#?~iILnccj3eT1j!-qKHYK+M-f$<@jYAv0S$p;7I}D{I8*&N= zvG+I)EPOD6(a($TU&rBU`c(v7opdQiAZlQw>ZqJ2C@4m3P1`k2M{Qh%_4tkcx)7u@ z-Z@5aPBwkfRRzd!PeNU3rn5dQ{%Z(^3BZ@)_IfPmUx+KI2x6G^h}+!jwht$__}x7n zuC)+D0Ti^Wc|bQFxo8R+g&aIOEE!bzZ#jYQ?yP}m?*l`jn8l;z!47*MboqqE&955$ z(f{)Nw^Y$uwK}aBjSRqyO_X5f7@R3 zcu3$R{O;gP)2h`0G}pcJy`1sI=mqsvo1WU30Xtsq2)%RDg^6M930J(?iRpw|?(=)J zI0Px^jwN?LG~tH0zGjWZbxWrFtM-jPmG&#Kz&Ay%3~;BN%C$?AdhQ2*=<>YUd1xzH z#x)$Jq_1`%8NFLQ4GHoj6@OeDzx+TdCECI3doeG-XzFkMnw{Z)f!{ju zK1}`}^FQioO$BCx(B>w%4y0%}XtS=?SToh4qx>)7j1#k5om(^*Cx6@$E`t28+*LgVF-NMOlYul(J1u+)cMbV=;x{oa6p`*v6QzleZTphDO6faRob?Pt z5*wie^bMOq{sRw{ED%GI^b9ecFID{mQiki%2P9r+r+GA%E*!97iYkq*S2V|7oqAQi zZUPj(k63G$_6Hju zJBRMS7_`4Sxd5>ul~-bSpQ%Jns4v%!%GY5zy1qe_ryyv$R?I0mx?z_ErT~h@os3b= z)?7R(Z`T8}dV(X@#?F!_0|!=#TC*dNmHGg$a{>} z7a=Hz1zD`vHla))55HbjN39xE_g0^u)$|-V^lW(RzgyotkK4>7*L$Mg)t%#V9eaH! zf~2H7gDk+YYXU7A3>lvcD#JJNT4cQcFxboSBueIR z)K}g90K~|cj<(yyF0UYI^VV8OJj0}CEQ5*kL`5|mI-CF)#+#K&QKQ5wV`21X@p>n( z@qpw*GY(#f4ejbc+abNvb7@EZa>&4rElF(=|1=(7G! zAz|w>67CioxRZZA5E+q9zkAk+_zD#KZTbAWIGpZLuUZ#PvHfn(Z&2W}ZEw4lVm}iB z0+)cTz&rK}tG3{Ks3{C{MC#nX-64+}u40xGt4?8!c?@}{iF)E3zPzrgpwTApKdy^$ zP%DxT0ER%b%g@A1tkp4yzGKM znx7t1jO4L?R(39$Td)tPk@uAv&6%*_e~Q*~J9%&=!rM97IiufT zgd6VZi1;ti7YY1yeTc<9wi3bk({ufpCghPlvOiNqN86A!6@|ZdR+Bju7QRA#XsTi+ z(R)byR1^jY61dg+bZ^+D$Bt?4f*86#jPlC5?(QHN<>z7-L~XTMjK^+Yw50Rkbl+G^ z-nR*q!IF~-w;#8kR2UWwN~~*mhc!(Ksa-VMkt(R-Yh?35gip3Y3)Z&1mnpVa*}#e7v**ju_c`Ws2hPb zmy(ykiar46{O|R^EEkKvIPa} zce1j&O;&5sdAzg;W;O9&*5?pJPxm7*JPUn=I(R%BzWbK5Bl^JnbVn*z@sv$fN^e~= zZ9fD-c3b$nJ!alI%6jekMBzz32;i?Hu(@|Mx5DC$`o!?FJ`~NIB3) z+%`l5BghW0{pi|^D*{c%++^~XO(=pceg$X0kW`W4lu>?$o~p0*F%A7b#L{YiMLNjg zB=?p?;g;>6wZWv{%{4S#D$Omo0Ma$*3uIGkE6-*K<3U_O@Cq~pNB9~Z^5AD>0;=<2u&LMOI_pB`e~a+ z`Wle8*h5sL+P*F+0r5ukl*V*ucO8wx_qWhaO&oXHU@y;@N->)Uayf$9mN%t455h6)njSxtU zWvf+brI>lqLI_?Ijc}g1%eI$kmA!)eFUVP;g6FZ>^bYT9g`iF{j>})4`^FwQW#F8? zb}kjC`y4q&Z5c97O0)8C%4)v(ZPmT)6jC|Ro?TCtS0ICacN7;PHUJg0Ck_nEBT;Cs z+fw>R7W6n^BHdN7q6M}|y~El{@lTS=HUVmh)tf$`TYdC%T=`tO#VA~}pdX8!#LWsYXt3WZ1mspPQ|wI3FRl>O1rie-f^n!Yrcl708Q@Ik~m0ZS|6 z>wh`Z2&hy1NN$+H{a{wCX_rrCHFjGKPAOY{e!t(f>^Xb5+>Q-8E+fY7fBMtu$-!|L z9+npWA$bmKy>+!yE&X zLn{CWkEymQC}y0G)zNaRq)sRPIdz@hi2*Ku8HlsK^4s+d-+Y>e1s8l6H&;VxMQlcq zaV?c>XR_Q(rzgNwzH1tAOt*Nda9HOVk=CO9k*opE*)aVwY+ukh+y~C&wA+iD7KH?Q$_f7;Yk%b&XdHdz|!gRbcb z{lD1Y^um&?gLr+*&Ao~T-?NT*H(ZDymtBqp z^@WTgKVCsa%p|*<_IvcxZA51DEW;obnqmc^kPmeWa6;j&f26>`o#_z~2WEv(oU(PT zL(FJmTqk%r&`?EC%->5s9j)g_XfHg#2rE0!-m+?cSbQrvN78_kH=XxPqnFnY;KN3h# zaKWf)G>Dl?u6tAsOrVpsftWmjOX=m8hk3C^QD>oarS9K=@m1 z)8y>3To>-CT-F~VijfclcSYV}O`Ycxdfwjqag5grALS7kQgT^n;t4o{UY^R& zEcsoH4!up+HJ=XH^~XK+9!KtqK&U}y9fmgnq1%~fG9P>=UbTdACVpnV~a$HA$6KGBK@T8>~o%Y3#%vqrY;4)I@7u- zMTkOtUMemtG41(Vkr>Ng&n{~AJ8}U7J=Dj+c!Xm0H8E*9Wrg zGOS+rvlDs&Z2BAgG-F4|@LTU_qY?tqz@^6bf$*5Z4E8i6L2p&Z3L=TD0e5`)dHsn# z)(uQYNG{p>!4|jUjtTRy5@BT#cR$06f549`A=%_lcTK>((4EZfEbId^Xcbd+=;_ba zD9A@0%u#{vm=#IZYLbMXb}oleY74v;J<;sME5$UuHQ(0B;JK#W6TBmu70r68KWz(g zRTcQsb;z^;sFSoTi5-|V%!8LHvv_!?nhGIh>h)AiUiqm?$6(TIf}^o8+yxVBdZU;@ zBCBlBsea<0R5t@bsUBBRS2NENXtt-0`t3f^@vW4l zKS%MSsx}{2f`0AfmyX2hb}T8ZJ2lh!1-{nllj2|Ru0nq+1_;D6?$?_Y4MG;w*eX(d zz#X!-IUOdUP5QoHPT&HjckIOd?cRZ)^saX0qRsbsyR+R4>iYL514G7tNPWQ>r8&B$ zlnphSV!*5^)rLhkbiLkXT>slbXJNQQ-oCbW!m&z@Nr1gkc9uK>thJwU~q=*vvofTu0<|yssH}vP?65- zm-0}{M@8RSL$$93pv$5UdM@#9BX`XZ-F~$r$ZR}+Rzoo8IE5~b%drg{ATm@Hi!dEq zC*X?DFtrED(4V1;S+WLh3BVK^;ZQKsWqDn;VcgBy_buJ^kH9)EbF;nK_k=kaTT?t2 zOmgnI5YjTd>wKDDM=An_qTWnYKZ)b?mv__?R5fn;9ZcQ@ zFL}+Rd-n7G#0$#N4Y@84m8VFsHQvcQ=?FeJXkiFz??#=}sRZN4I;r~AeS71(9sywP zRRLBq9r(>n;lBLip{e4z?k3=ed}q4Hc}KYWtSeItNWc(-ofGaUOW^sBs}CPe9t!7z z;6Uqf{!BE|eU~ELaPZ)hxT6QkC>=hX zB3;qD6x>MkcaGziROT5E^+2{Uvx}^Mt%&*gdfs3+H+k=U?>f&j!wKQkM_Wc;RYPSS zl5bR^YIwhTzbZ}oQA<)xE}eTX9TyT8gNRb(2HTGWU_;MbGcH2y?Rg^^;C=-h9mvGz zVgFT&r1oaHMz4%<22|rA)c1qV-JHr1$~Jt%OzY-#;pybFhPb|bC5qQ&qtw$hHmVvG z?=x-5pS68iSGj7fbXVOSxPchRQ!z=|iLD2eI=D7TzVXAEXlGVYGc-u!lgQ=2C!oKR z|5L|5f&{}A^tVgpeS4S_Hq^}4?XLX&igh@$+f7u_kC#&S_ad|L_W90PH8A#n`cihJA*asYJM#Ktq4i!{`XqDgdbiwMFsvS z`O2P$^@WCXLYkY5v2I`}t)5q%>3bSqy_7Yizm}|j_Ef&&gx=zUBx@Xcb$#dxP@1SU z(7O@ z3vuUb`@wxFRL62d>6|&+rbgDPb?&`;Xxc*Z&!?ugN{yGwm}L_RL@i5KEN?X(Q*0Js zhK}juM7-PU?Ennvpq7ho3C(6qCy$2PlpDqQ5q3Q*)xB3QvF925^L?$Xp)I4@}Q-r4i{vBs}OLd(C2iN98aHX)oa$pYY&IpAqxGHBkVZrHWg{DwO*4vDc4&(3go zVhRTp);!PMvU#i{Oh*{32T_vua-4sj83l`zULaWdy$bObj~22uFjRBilqQd;o=uuQ z*dv#m9!>Z?PZVozVQ;C*gee6@IU2=+`wnsD*Ju^q;#k&ba^?H4JV70oX?o&oY+dAr#W>5^B#sZ|w%P4ZOLgQc7px%e~DPC^;U!m?GAOYg8It$5%>qlpicYli6bV3pg+bpdY z6K77xCeG{^$%Uc(EMg9X1yOT4n|xu%j@~4~0*0`KOV$;HM2{2*s0*R&WQB*(gpG#n z#km=SLX|Rjl1+rsPiid^GDDSeoRr3~gq@7#1WP3d^Cynq?_NO!x?jC_q4v;i{Na~m z^7YLOq2k&Zk}qcf)a{O7uhZ3Kq6W8H&TKPpnSl{QR}NS>cAp$1TLKQsK3xt(zXwB} zv&CL^o}HzY_fF@aU!jfZoX57n|epDQextVjVP8Q zkxAmR%3^!D4<^7+W^l!b$OtIxODX0oR690N!dPW zvp8KwWvYou^|6ueqPI?9We9AufugT?Tgv4PPzlY?UDLf8RK)40_1yCh+IpmAPebTd zZ|l-~tMgK}WeI~>I!r85j@cGvdeO7}{%!I`o5$+D;`sdPS%=&5TZT*p4inuj)OUC;NtZ6;khWL39{#lmUyIs0)i$kt|Uk@nB%JI>RGd%K-1eEr8SAoxuL ze*&}HAf9NiJapM!(u$NWZov#Lg<=E{l|$|5>Y>7pzAY*3tiZPz&rNE~kBXLrSex$Y zJsK-&Gr3C!atIFOq7dlDLz5Yldtq7hqxGCLniR)X>{{a_NllW=QlDI0GCAPmZW z{Vi#k->J6*8}zOQ^2zxrCy{KHT6Lvwt;K@He5c}3iF$n6J1;^?mR8nU$)of0H{U&9 zvFhRf;4mU){d{_2X!!Wn1?0ewo!84zmK-Lhh))O4dly5ypeK|xTi)QZ*Rmr??O_on z(0asG0AHt;xr5L+C`JO5?5Rnmc6G5IEW|Pd4dXQD7qhO6^ zOQxzey!?rK^0_VL<8&*vANwG1I`gNxb2GP+ne8DAGmnB%X-V#p>z05Zaej%3zD#m#NvXeKB}7T?|h2VLh8(+$^hLwV>Q+RK@P#q}d%Xg-CgI-QEnzzFN+7 z7A3uU;0z{|AhCEZye(a+x93Gc!We6Kb*oS{L?)bho_&5rd74dgn=LZejLH`m>3P>i z324g7mPerAfZRO`1UV4i&H-=?*cW@Do?zX9s`^7W; z#c9i%Hanw+4-_nrkc4O1(}l2z`syP$6tyG7mLwQl-s9sP)A^|Q^s=93o=&)En}um7 zR8i_K-M>`Clm(i5J@u_ZaI=^K*PW>%d|D2x&u@H@rl2AH7!L7fv6&LN38CK@GS1B-w|1*FEt8`%wu`c?&Wf$z!JcN-=ZF=^Htp@*rMb5s@lf48o$W!Ey$xOqGN3x z6V<9#(pD{CHdvZN_(5xk=oE^=$9*8A)Yv;nRml4*PD|p7DR;)aa+)FclopK>AyW== zIz1nGqb?nN7_=zyza`t51bF%$O}Z&92Ohv}hpaLbflbg%!9kjd7uN#6VjGRa!osVL z!+~^BFW#pPjnL&Vegb_f?bm6=_2#9H)}wd`bZ81VKUD9alS{0p)?D73+CtZ-t^(iO z+l{n|6N^Ri9!c_=5-1T~LNCKdX0Rht!ze)4)CKh;-l0Uz(K8^}?9&BDdLWS-WVRpv zY3BemY<}qQvK7zzG>GExo&7WKbIk>wEQq?C*`5k^vkh?3aF*S=zp@-yDuO}fxto$b zgViBCf3SoIE@vX1tM?sHL?rT7#xu#Lc^7_|CZShro>!D;KB(8sX{L!C`eN_n{nF>& zz_e!NM8st$ObGft&bjvrN@}l9@F>%v=hPRQTop@Sf^@w7HC(FGM>t$6Gf1Z;nR@7D zJ{&OAA8_&+tUf4UMEa|BcFV+e=H*P}8P_H1i>tHC_*x#q>+-Y3y}p*+JSFLhE>k2p zvx>Xi|IXW}=6zkg$1h+`?{rY{FlTpgA3>P*o8#^) z4G=oZ#df!LzL{+W(`2Pfs=f89IhBL?^jfz+m6yV~=~=D8=F`@(zta}YK~fQOjqiP0 z?>CC<@#!z=!ZQdr|3s^4Qz5leU+4rJ82g8_DR3j2*&*buRXB~{*}di|2PN&$w%oMz zgmWZDBtCf)f`@$mK?c3hTyVW8u6@ifCKjBCeR^BYetW}%Cy<`iWm$W4CG8u=cP(iA zMMJq?UsNTsvLNSN;pF#f@3o?##R-i7@gDC(LA2A?hJp~N=>0#z)x?zf$4r^bPIv54 zX@1l*V;0v!Cl84s`)3*~^Mg&(TRUp*B|Ks9pN&;&Yf*yj;UxYYQar*f2>@@r178%_ zGE8_p4r;C}IYLYmHwVpMnf?STbJ{%>NJsp)3Q}$1XOZQiu_p5Ztrf)vW zsW6w};;})Mvn0F2S5vxzwy!rv0sFRAx^1)~Zw$bmt4K*QkF}?gvb<7et5PyGKdS&au zsZQwf0P_neI_N0)1t=W+0_%6vRocw)JlyT;b0dAS5L(Pmm|w)%*+e&`OOl2AqiIJg zyIfBTY{mCe6Yhon;^Ef=_743ioe@ptBj>Y5q#>bWqDH5`m+-)|ta(HfQZ#ufHW_Ua z1Gi+BISCNQ=eX!u*O&ZY)5vI0(AAWXy59$PRLc>{z#{+|yS_;$50ke18%>K&Yzfbn zatjZfAGsILo{tZLH;5FR))S-?QKr}c(g3r~Z%>`1jjGluJ`vT77OEh^2LIbjC~&*w zZxg>7sUjy9meuBVr6W1?^~MUvY!i4qkv|k`joVBoBx6N`22P4bSx|86*E8P z$EcK*bnqJAlBh^bZg7d{xO9~c;PZaYkkuo^0U6I*!kxVm5))%}rE}O`B6UgeKI(g4 zkDaBFh(9##u%X~qvMGDz_1Ui)xHvIxU`iQ?sW&Iudg)U5 zHZ?J`GgyrQQTMtzJoDgcn#JiB+EwMUJv@`v25}vVEeubDgx~m2hqM=?#biO+MVg znHe&^L6x6Cr1NmL@%(qt0F}b?iH30KS9DMOOnJCia`4cnd#ByaoI04`1>1-Gbx9-t zk-SXHbdq4b&k5%v@Um2#y=l^66BE-rwbbnSH_kx7dg^{D=Yt8XneYTS2v5NaYX2@r zc^&`{9c+V}mA#}l+!tpWH?NiRxmyMh759!yO+{doMFa#br--1LM^*>Nm@|XGbF<`d z;p&rq6#KKeqC18<1L{F&EyNksGy@)@Np@p@i>_D-kdDVH`PjNj(i^kUKUqO(1%v4l zk03Y~_;{20`KLDT{*(8sx1$Q33VQ|@euH<1%bWmgdi3#K)boTrjEYUV4Ev5Q2N8%; zF|*5dw#(WQ*;fmRT>SgD!0uddsq~T70{)>p%N@jd`_RDaT~#A%?RF40)SrjM~Rgb#9rME;9!;ZM~oo?%Ce#PQYW*IXI_v`h+uECMk&rWXl{X6wNXm-08Pq}`; zH1m=g=Uw86$klOnCXrbMpK^zUjjbChamYubCD_KuPoH5u*Wlh&g_T&;bBb$fIOM3C zu6WIy{cHZS?;3*x(>qIiypuY=O;?+&d%T8OmkS3+@$7n@BvrAdZc@fSbe1vmtJ==%?PSr5o6P#j%y4Eg-$4Av;WMM1U5Ww>D+d< z_%FKqEZvS)i((?>G2&DZ%qV(Vyl0u|jg{%v&Gyssva2SC!?B;KoHnXM%w;#Di0}L- zcOKzhiE>KI5Uu85q8Ho05M(2zN8QFfk_XQSfH0fP5zJh$sEqbayMTPJjMVgiI)Ls&#}k73f1OG%BzhY=JJGlqTaHYr!}C zkzMNR_L=>kr6Q$*$3bR3^!(T)eSEx9BkA00BFq$MbC!Lm!=*{!6di9qdPG?4Xm&46 zak~1wn6=IuP8~HRcy|cXnfwYT>b&DU;Y<470_BCEi}nNp3V`x@2FTzAr_glhfU&9W zjrfpDF}t~C)8Yc<2u(i<2q_reo2X8FLC8W)M8GKQ@BK?f;Umg>QPg^v<8dg3G-P)v z+DOMrGe-xN#vM zMeK~3@O~0Jy=_IfN(cE3N4Q=Kwy1dE7E9-%8B>3L*-g;|+y4Th^_{2s*2iKas={152 z5$GZ+2ix}hAqKqmUnVPTg!v;+QynJ*0we%?1Hpx0!!i!evgt%Ge;NQRSp?LQGAvP~C(l8_ zSl&b_K4Ox~{UQ8%o!ikrt&@pg-0l9}eW&qCmF?!_MdNmh*@W~?q~dBZ-R>%-Lc3;AePhZ#0ZvPOL zlMZlX->0F1l3(({5ZSmk9QHM5{!fQPq^U+_`XJYxN?pRz?nX+ldvLq8QNxQc@|GG9k@9R7Wv!#WmRQj1pxKa51XO+3Q^ z{?A5=o`G}IId`u`cyOr#>D^(c*6pYUI8HGN!dI*K_Hlj(IU&@OMbnhK7~J&FTY9G& zc{DPc3(>72$~M${KxMijnebM}YKPe?qylKsheuahC6a)*!Di0=WC4_+63Zi0JsqFZ zILB?(_15JJWKf3BcDo}<`^3-UzMKm}UE`+tAHc)WcW3_H@(avCdF|zE-!$}JPt;wc z=ezfxKz)~l<+CW@q$C}JjQZNYW<+Fk9LQPh-oSjPM{egiRD%MV*pMtuf~GZChTbz( zgcS2@D)%lRlMnf!yh2Z-U^IyPwQP?}&=Mj}z7%Jk(UJZxTIpl0O`{=ruHopNkzB!W z?;x(=w4ZM7CX(w>zDp%hzN&^K+|*a%u{4NhU#q7 z6^y^TUmGMitsit+=_AW|&#ghZ(}s7VU4#Mfq$iM17s2pzd?r@+a!+tir$WERsoIX~ zMY<-Dbv!jW>Nb(X zv}uO+gyVcPfUOBq`q)Cj%U>RK6QywV#J8y51ljX$>J_Tb6tKum@Ybdf^W`tp#K`Px zLx38j&QF?g2|@36vOQ4;&o!K)3Ga>s3le)98wN+*bwDjaJog}^fEneOpy+$1Iwf6A ziWQu*BGbOH%2$hat0{^TTZ;NsX$jm&Jx{hJ&Jou@{pYx9`4=Y3j!oSYAw*9L>)-KB zz;>hOhwbXjwd5RdTboN^ho-~2jL<}6-5-^TVkA@<=yw=IH^l!uW0d{uXWt1B4V%b`7uf6 zJ@=gw1v3v7Tr%i4>AiknCEQp+4HY0Qe-ag4c0E3;dOTT#RdkGy;gj_Q#pi_qRhPFH zx7f7v2Eh(zD%Q+W&?#)`In4+LpO^J_FR8##m8UEnFcj^o=0*uiLIhPw$QVbA<_jH& z&KOp#)P4XS-=4dfgkr-+qt;mLfEEmEzaxOyy#zIS;>Gr?52x=wN-H7vKiN24c9u7o zJ{gPQKm1kzr3+an@E!9 z>fF0Ho49o`^1nT2V+%EtuKVn-++JcJfe7wb&rndZOtj*Bk^x5K7_h?@jLN57*9ob! z(e=lcnkP*QqFv}fb@^8AJR4^FQJ(xd2La5%wLeGetB?Me9(%dbJ-S2bKYR>N3^|K8 zUq`S3(gdz$j=WRFo($_@e>;WQPr?6iU^F%nI5~FCH?GxH0Bb%z1Gv+v9x+#fFy$M> zhBwt4Tf%w=Eqt4G>C|6bQe!{hiKJ~AfNP*BuHN@Uz;z;Sd|z(Gm5PUKa<#6>+U#gv zF�A%fgg_{YBI;iuk8FAlyM-K7MSSn)f}Lax2_Fdj0vowwXoitWi*iP#FoT+v&AA zDrj`5CZk8K(K_{_c`2L<_bDXRwk-K{AuUCem771zfo?S;Y+_j(bVRhezEu@)w}0aM z(e>=}LL!MvEa05Bq&aa``zg@ynAftAoD|sbxQu+UX)LN}cPE10VP0W7&4vM;jJk8| zcI%z@ZkQ+aHa}oH<9!lqGy8}5{>75Jh~H04$O#khh<$x?1cOffA-g+6XM1I<>-X_P z{91jdinF#li~(jOsx|I^Q{O2ksnV2AJA5UeK(4sy^NvuO_80P4PQxVYu`wV9M=wsg zsIqx)QJSpc*E#R9fc(W?qa~!o_Y#+0X_eV6M=A7*tnj2sk+0Zw548#l)eX|qj=Np$ z&HXa_)l6@*1VFcPxaFu$DI@QWOP4o3ULJ(hc{wT#!<+gjUklD8owgF|J|uf30+&R6OVDi z{*}yK^u*hjZX0WqgZf$wQ@9hEY;V9Lfjs|fNFkAN#~pMvWfiOYW|5O~B|fvSviaWU zj%YcI@xyaYh}%evF%sMP*=Ck zG=;)!&h~k%FN(ytb=k{P7;e(6T*5fN-qT;L&H`pGpK*uyu_FufgBFm)!wJDh?i#=) zfu}?j@kU-DuVqao$LfrF<#`PMH}w_L+{;6WBD+fkKlWgM$4VARv`jHUmdmdarY1+$ z_8He0U$zBe6QDoQ?$sm|VJdM+pE{UtSgo;?P$aeXZ_&rfy;O8Jzi{4n%69Pv`gH^S z#yfw${e01$1lAD<#xmTYuxLYH7s28I1=%ydJ3*PnCWf;7bZr)L* z#772LF}SjI>*1--;~iVIlTC<&o;jxgY(%j*$R?J|fBvp|m0_uiE5e2`dPd~m6k15y zr;tB94-q}d?>m(*KQ`?+&rmqy2BTMyFVFq;cd1U{2C*Ae>T!MFRPq+?v`H;M4e$k_^>INsdIb2 z6vCJ`%1MP)i~~XMX$GETaY7#z83kl zhSzcbIpQ9hHMTZ^XW{!B;{Uyev{ZM;YDLV|DtN z3j~)XTU(gv*X=;{bO8=RI1|L~)kus!XB44= zR-jEscqNH46w%X>bBH7XD$318J#Ic}gUNG5H*P`s&rN8dfa$V}**`cN17?uriCyV(FgHIWB@P;tHK~ z9ns?J)Gc^TI20z>@n)KO?Ly1sX;hO|*uE<*lZsOusmcG5wXy8*D8b(#zMI4$;1B$tHUAGX*Q)Gfi9M1Qs zM&{R`cmsYg3NwUZLXPZdsFVkL_Wl7YA0bWyvru_4FqO1#dUa%NCu2+%KV$xS#>C6X z-d=O)XQ=W+T+0*Q`SB-g?_rz!#+&B74zWENANbb44{bAwG9)4)Is6@q>ZOy#c5RE( z8ilHZ`oNV^$tJm*opF51&VFUW#~d7h+>tx6KawpbB1#ZJYTwX~EdU!6cuKfy6!%<4 zOB1^*?uX`UFZmmlbLG&UOwPrWa48;CpQy_ z{7~yc`!H~utuMZhEWGwpNFy zkm}y=-RZpz(~oTD1KAwm22f4Y(&jf=C;j1Cg2xk^bt4(0qCg$9RYOQy#_V3UIIG8{1HyTd_DAL@w%6h6mvSG%3uZh-~m?n6e4mA7o!Kpx?a8e zAOlptmC`atIt2pYPf#2||J<#?-9}KO3Xpr#y48=Q z(B#-IOJBFBpqDgf3EMop?A20aeUq$6O{@2@6-=mh?eY}$Q2*aFgzzuh)q)JVOmaswPqc|;|G$N6L z%YZT94NJI-@G_L;6k}MFQh_+&#fhnwT7UXD_vv)C{ZEb4Idp75e{ZgsQ}|6qJ#N3v z2i00jKN~D!7Xvs-1JC0c-47RLwZWH|iN|IO6UU_N9%1d{u#ZPFZ?vs`!6|dLIDfF& zN7g&282kS6s-$oPX%maTE1%S+kmQrc=6$X6*RweHE<#*z)uKPE23xE;$U1Uw*D2C^ zo_(uZMH*I@5xRP@uCo(i-pbVa_uGP*29KPypDN@F$Jj3NVeT+9kcD7$9EN90R?4+g zXI{mm7l_g~qS`3a(TExv&FrdOfn9{m?g&7Xa>q);UmAYkqYuI@-} zJ!CiiTt3dSoxAbv+uuX}cHh{wx}S7XV;fo-i9Qh{o2 zoq#q_&}440$|n?yI3vY(Nh${KG0`{zbGZXyt_$6mtmQm$eR#`LZ|Bpv0==xKNAt;cVImbh#1r|@^kN%@!IEFYR@?kfEciX;0(l?k>-#ZRl> z$3AI{4lhD`51#Tmt8V^rbsZOH=+1w56my!*>%#Eb(j|7i`Vn0<61a0tN3AnG3K_k* z&K*OxKtGw7_Ppa6L2&Zo>Q84Zd#O^A-zmx~$|U3lPfaGG3*_+kR9i0$=`iQc99~c} zApMgB!05^pOuDY9BQ zdBU#1OfZLcCXNjf?IHTA%5i|MW7+9rSEzOPRhDkt())3@d!!(Hq7kg-oPnVGbA+Is zV&zC5*aSjxxW#s@x4eq*wIXX!>WLX}1=k3OHHe@v1qA=AYGVc{;DQnCe+1pgDwg!* z6;J7<-S2A&fZd0e%AGZ8&alq^|8WN+Gh5ukMqD^9`7Z^U%uLTOYO0=eynM2#l>8=c zL~V;}0yC|Buz;iFmQA^@y%eesQgz~f4;uJ$LN--`=)Rb#X8ISI-dw*b4pC8s0(^jW z3-iB`uLR}FJyUsTt+zx%97!QD2>y22!Mh)UHItXR5wJwS9T)oz$^!KaSId0T8iPRx z8Vm`WH_mF{`#%4S6}KqP4x;?NgFiySM3p@S3_FJS*|4s6bB|uXuPZN896RrtTd1GQ zIY-a`dsQ270YY;wXL;a$vdeb->QmzK`h2#v{D7Twa=n)_gx5YR40%f-%fFp?_ob9I zvPIyAMV81*z+NR}#I$aC>a&#;_PkZlp4siJ3kPXMiQY7O%~4%vowkz}$L{bLynxc% zt+17%R~MC;f#H$xJotL`$|bZCx4#uaT)9?ND$c^RKZ?J&9_D82(E9L ztw)u9-63Yur{GeZpOm$Lrp7O;s~|CzT!YMtJS%={#Ve&XAwUj3)<*3Iq~AT8s@$sE zBYIoxqD`ZqH86bz)SA){u)9`SCd4IpG{GR*zt~@JU+#kR^zG0xcEZLuUKc}9rIv0f)xJv_V;xWmd_LHCz zbZC9GZD014BF1bgl`~0}9rBS$t*+bxv;VkX*PGnAWc%Wy zwb~3~>CFU0fWQNLN_|%^6FqB4OdZhy0@G}IAcB>%hP#1eoT`3*>Uuf*leLe7(q#71 z7H$DwE)O`x@&plH@`t%L$=OL#o~{blHq0q}a@VST8%1-B-sK(!hN}El?I-nEUkyC= ze#ACFs~2u+z}@}_-0kKNsgCni)Cq`g;zKXDQxIuy8!pOF60z{sF=`4`;}Xd!XfsSl6`0^)8iKFPhoTB>!+INUCvBkLkI4YZE3)I!;C@O72n`e zpV>FvC$Ui`g%FG^V!0_L*wQ@{3yBq>IUa%%;_*Dm$RE_EQgGRuovFm zo+8A_OR^JKQu|ZDo!;)E7DFPz@=w6{#hJ5?+1_mfAolY?$FxpaMi52{BvJmOhp~WB zt-n$6+W|JF;0hS_Q?}}0OGX(hSk;saO67ekI|6na0Lt8@UEo~+_WK;$LGIr%PNhSn zj^D5=eyLylna>*$fV_K)54v%626)`yM%|2BGhnH}ht2#4QzEk!Blzew?O$W@4eOFD z9ee2%_pF#rM^84@VkA} zf@};qw0FQ&W$n8=33C2NrRD`Jt`yrqM<^4_TW1JN^9YXMzmh#ezK)1TCco?>GLs9z z;q-W7uk$*lqtg>g*3mi#<-U}UHy9=Y8NaJGRZX08*J|A9Q0qxSQVr}+L`eT9S_59z z(JnEt4WWD*>zONEYGI~yP#TL^R*2By5Ug+wcyC^uWWrH?dyL1nDb)MtR~BU!42c#9W6R)GIOln$ zMP0ovLqGMMX+HPa?T?BpCVnuL?3{@_sdGO+4|0+ zn6DFT%2l{Ooo@=Gl~2}|Jes(x{acN|OOen>oFxd%z;hoOIqp>#3-WthrtvZuk05!w zt_<&E>}ONkSvvX?(}L5m^Is|nz2bUlG3bP_&23L{Vr*M`lg1i66(|-&)lES^Ro%ow z(G;Gc12N$dA1}8za2!+l5jYK{AkMzJew+tBQwfufn}t}Uo;HZUaYt{%QtcZ?z5?P zf7t(Ynt)OjcYz&>*y`o(Tjk&S?Ww;q+?D?flW5)i-IRFiq*RO?|8jEcbM~E!r-wHEu?}pD^|Kh^UTsils#H$-oS0u6d5RMu5 z6#;;|lsu|Au~vTeQ^o0&_?rLSYhm{WDFx;QGnHO`vjdgTyH3B~HV}wQbCNJWqw`k< z%n9z9&>wOJ2sfPC9Vs?XTWibcjI+7^Z-j*6rg-H8U(PRirc$yVh6EOhtZagkwdV6) zC5dho%Kfw!Vr95bcmbne=}ph1ucgI~;Zj=svjJ)}61JaWODdm@jL!ra@@B>)LGq3< z{s^d+eume@TqaGh)>g}u4CE7kM`e*qG}?GqmjOD?jhJwXH(#rbRZd$q{1k5-WPDGo zczAl(Qh*9hEpREXt(gYupn;KX9Ja^7_MYn^Y1!%eGY7y7vc1QhhAy7Z>Jl~COp~a9) z?~*7J>)J)OxfXvidL?^>7#qt(VS>2vaKv{;6d&?`V z7;*!lVgio*az7j!gn|uHAU7xoA#k^$${y;a*iyom`;4Udvhae|s6t{$W@FoSkYD<# z1tU}wgaQ3S8joYQJ?4F?TD zrUdNf>u7TYZBdqiNOKXTzb?9hzU>DtUw#!d+Q<_=z5}>wknhMKaMbpY_Hno*ZYeu> zSoS}7PwF{eiM~p)c^SU(IIs>rPjB-ukG)1KU57Bx)1!F6r7r$CtQ1lvHg6ZkL0LsE zlv}2(a0!wG&K7@NAV^bg7bX9 z{_mWKD1gTRn8->C9s*p&j%pkhT|xiBAI*-WmpW%L{0wY12{rBtV2u=xy#lO~DmVFg zpPwwMY{R+JJ>=KX8u}_FMvTclO>c11z7+F43+KkFTQ~jM5S1U6G!{tv9c<83Hsg!v zii53KY#%_8h;pl0Vh8NlSA-Wp!|IuS=E1M&(L?eP0ESbD$i**n538bX_6KtCqQRy@ z@T$}M_7U=! zh{)#epANIA0XGN=GABQ>b(A`MWB+{L1VNmYQcGlsnsKNw=L?b6U9-<`RF&Ae34tw8 zg!1;ZCg=TUE*VNOL74FHm@}P@9G?k;6fg4faZ=3-g8vYX6XFRJKl0+j>t_2zIrjV6DW$Y%{e1$TkU;KgVCf>NRHc> znd6H&VLPi>C$qKX z@+_9&vO4R2h24KZ>g<60g|xm{5u{&h{yX%S8lZ#R9~iRM8HLBkN+-9!z!5-ML81!K zSc-P&{?9Hs9R5YY)<8p&kaMHUVL(rgBkYE5F+YAkAmig2H{CzJ#W%xd0PP9_``j1@OOTTM!Thvi*-Ff#MqN#FroEX~j~UM5Kki;!`37sV z-{;&a!UGi72!hr2k8aUNqioGdXi!hu!*2IG1$Dr^%vaUfWnwgj>gl+9tm>>ZbvWeO zbU6PUbGPV6`$vlxzLR=l+V8MquuY)2IBWAo@XZ}_G{Tc8qrWiG1r9x#X*MnT5^&)+ zwY5N{K&AS(5PwG&7kQyNWo<*2fc2<2t0v6fF4KVt;q%OH81Y~ZN(S01c*WwrCPWtX zD6-b>im6ftNI8=f>RTA6X&iU9s=N47OFQG=Rnu7GX!ETTLz05~LCvi=g@|Ak5hN*u zz5olV3r2QbVCD5YLUL=_5F;JQ*!>Dx_OT|@VIL-W*%H@QhR(>M;{mSd-zEMpOZ_c& z7N-?<>m|9r$hA#tE1|%j&tn4!ouha;ddcZ~qB(qV&Q=KWH~nflp~Qz*ONw8G-vaW# zH@O69S~@1v9Ap%4q=_p^dw**}9|@qIGghNwo*ALak);wd2>rYb1jpW`GCXC1oVQqX zKs+un@Ur`^`RmA|&%xtrN4Nl+?HUs=bVd@3|H&8FVn9rY(ug=agSTq+VYYQp>BUhL zbB7Z`102$WF8(M6Jn#ap5O>l}c_1H@p0q{CrBFT`QuAlDC~?SQjWYexd(nYV+xm{l z->7W`dcYxu@}qmxixkIH2xrNxKcz`owGRT|nAUUH@WF5|426c15h*i+aZM6c@$-cP zRnE0hVr0a2Eyx$I7*CVXHjZ^sacCPoszA>cvoI)20Z|BKb2!IQWKAqyb8Iw{izmmk zY`)ck=i~pf9H6g0U+*46=Axxyk2l!A^pUf}s*C}PF>w$u;D87)(`V>xD zfc*y-7(XVw!ao{UNrM;PG3C`_m=x=Ersl(+J={9gO;hzrTVfeu+Rc_PR zh>~HZ&Iz)Qlxi`AXo2RA=-ub9^U|EkuM6QDd&TJo3qI~sK)MeVyX)ydC{E6@Nbpaf z0@=PINI5)2k@{)~)YMc#_qs$fe_IVxB%N6>fHJEqg@*W|z9Kf0l9Pf#D3(~V0dVeSO!V^;C=J6aetc}qQhkH z@5w?WckIJn@JQS^k++k#$og0k<3nOZSjy9nsZq_hyZcm$SS6Qr^P;Oeb>43a$3tx< zKM+m;3rLiGqQsIMFn5%<#3YCL_J1k-gKqb$g~zRdMYLdN>AII$wDcw-VHodYtgAR= zMs|8a{bTv!s3iBHYnQKC?n0OW?f1w8(k2<< z0CEmy;*;a$s@wYGA(Fj+q4$d$*Ef17KSN5;of3X{3J0)&q|@QTUq4*5*ZqM*-DCxhx_`e=+QjZxK6s4K!X}=GF zJ3mI5MJ6lQn>eJQ$e)}B)(1&ise;1gh+jR?WA#I?abCE8mp$XO)1d;?R%OaVX9E+v#%6pocv8vXP^Bn&unxsAG*{!|L$eQUXOg5laHC zT^gPnYH`Mj7zol3;t6-UsVx>gM%&Jur@%-Ka7 zqS{v23#g(|l43*QYHo;rA(GfmmDi}ZfM(+}ASGS=4ZrYFJCITw~x=V3IkM`R^q!%2q zsl2!=kGfX`R%+k8u{y9S(r(F5*FYcA1crB$#nK_i^8LSfdLQeP3d=xS5V?X7jZI>Q z-n=*5G29{zBeNG0t+GBT>Vh2N^C@A0Eu~sdQHA15GXQzf$9|vxB<8A5n<@6q0Uvm5 zh?ed*f$j7H)j@UonQ2+zxw-?FJoCE45fTH}$ymLP2L1unfA9U3MDU zghFp_>W|1JwA>!Nr0d86$H^Fmn^tkabq4{9bmBuaXYMQs^9PA`sl)|^(J8$uLfj-m zZ4H;@;DcEqWc5Xha6yFZB+;yZR_Xxvg2aaX(A%*1l`RfpA4nF8rN+K=bE-oLWz^SG zp}e_B$JI9^ECIps9V@8vXR}#x1{*t|l68p^S?DLci(idxnZCVzvB7~~U-;ck&=1kb zM1_|1DWBup*DrP1Ft761j~P)9CEJU^izgqjK@bb>kA#*JhL$13B98h2*%xvBd#ah~ zv1i1+A@%2I|AqZ_hdK&6rfIpY8T7kWdAf6Aj#`$2WQ;4whNdjTgreS4&5Q zsOYT;zQ{HT2Na{sAO3!lkD&T2Xp_9ga&>(yb*KebjOp?>)UQpvI?j`rRyxaZ$+=5lR(4_3wNZ6P{G)7M?#P2-Qy-;UuoibwGd?@}@_X)QMEp8{TP?*f8fa8*Ad ze@YmHY(l6SA}UHir>T+@%Ge#LO-OfY_G?wwkDLuSV;ZC)CfRi(EmF1|vWo>Oa5kUK zarr}bK%2xJOzqj;UIs49I=SbqP{gHDfjk}^1C#tDS$ZXh(!pj1AOpUNEVQU$O;wHfb-(H1louEr#TKa( zQ+10k{q?PM@p6&?@w6s{6W}f!HhDF(S&fiC5FuGC`Y{pI(8*@zU;F=*d|^cvui$Or z)mC*G+4mi^Nlr|gceSTH-r?y$n43(W9y%;ISvK=*J1Yh{Sub=b04%TF%R+4<$5CaC zHiX|k{Rq)f6OGRFa0@CylW@RQ|HY6aCe!5bl>7kra?r_Ioi?q0hU%AHEGFm4fRA2a zR}E4EY?Z0l+0Liw=pGz;F1_q6g}OcswS(aGv?sr(?`}2lu&r?Ji9?>7$;8Gv)t97U z*J)sNNcbETCP6<zcB;7Xj~sc;liX?v>2>0%UVv z-x3vuEV0gV${{bsvUFRT9k4Ghfz|HCK^9p_+5I_&+%K}vDPQw*BME5a{N_&kMp#Tj ztM@IQUTOBy8xa(1^Hd%Tgy4Qzu`SnLka_V2hqk}u137Uj^`YcZZ_>tgvRK^2SW{aM z2)?iT((ctge+Tv;t#N5y{Sv2|5d_ixg_|t2_z9WX)Ao*veTa9Ijv06cdCR)C96b#Nf4t4q?~>FV?K6RajP@M^@DtRm151rA6AZ)7%T%y zC2kz`r6GL+Ak*LS^{6m}2YBmPoHS{E8DTi=`deLDHraNqId+uvA@%Br!vDAEp84Iy zM4_bG)3vC}ZqmumxAmj%7Evs?JT1hX)>B}UjxFrrqt^HReM+;Obsk+YGQGasWZH5K zGnUI9%)cB5{OmjsS!Q&Yv@2qG|E;hjwqGV@Im-^=&z>N%W~Qc3Hz;eb+F(a^2sKrd z2)g5GryM$rGjzF!_-CYP!;O&7hi0P)w_m66zNg>TI@IhAY*9e{GCDYBh;sQATgdmJ>Hjq)A>6Ji4zXXp95+5a$v#|$lZ zBye2^|0w%ma#;|{g$ULfkHELFaJrUdXLQW_s%gaeUIXiCkIlL-L;AziO}>$M?F5-1 zAmMy9xDo0JpWx>sxOC%jzzi(s=Ksh_dC7+>#sCHe8Mq=tKzqP<6*aNq1S2(fnPsmR zY0a6TrfB;4s)GjLb|_QXOvaVxF(v9{+N4B=^Pycks#XAr{EOlXAr{;K3RN!;iwul< z0S`=)dCwA@h}rA=ehaI(qKDq-e8&~)pGROPm}LzqF9^!LVIQc ze4MRzS2O~JS^s5&DpNHOuc3~0A6iw=0Z$FQ`u-7Am!kq;s0t1Q>p-D`gG`FxoU1qf zXj3c^4?ehV|GOA(IBbRF*`2-%FSQ?r${JAtN=vI5;%{%&@)3?=z0}ZTB^9mlu22!W z!+`sA0%D*HG8!+z0G4^-Vp_`rK+?hgQ5>}%LEKQdFx^7|b4jb7foJk^T9C0i)I)r7 zd>_5B+!W#!S;w!m7>B)n&)@a4!zY8C9+m$~O^tQ`lUw}; z9ZOAs{hEntcQD>pR~F{h9idITFOT>nI3#>r=OuUYoQaWXcCGO38wMr*U09>8Ua#jI zHv0E_lcDvVUm0x}lfhSvc^zA*76Ft|^S6Wgo|`yz9>QO5iKICf(X$Z5_^ARq`Xec5 zwppwSt&53ST**LIh>CNIeWLzOfP6*57zLgxGD=~(8*zxnya|6Ir_AT|?Ksp;v)srb z`^z@B0I=MvLbt#wN(nC{18GQLhllm)XB{J<1qU82u)>~)#@s0pBR#XTpZP*>Uv+!^ zdNHTPzOe^=-0IF*L}NSEI`4_;6CEig%d)otBC_yR1Va8Ygp-EjBl_r9!Y^IYaH^-h z*dE-$=s@yZy;~38*-C?(MmwjKOl`-QM=f+fWKix6qIBP^y01Ik|+*vx$^Z)2wlg&FKR{sZIQJb+eI(}xpujfBJ6P&yZpZA zEN4BH_2+0O3DB>ICuCat-~qS|n2f6kqgncW^mnA-5Do@~nn4mRFEH;oz-wsH#SRt5 zLP-)*53iOCXxl^?5=rFu#j50^ghQ&j^G}0OtF240bC$&kZdDvhRJ4YfHK?(VrRJ>;lFd`Qj2Ef)$Uy5tm3zdTzlQS-H3`F%2SHLc|(1<}2ca=0nGDbnziO}q8+4}FSB2)o$j z(RVgf=^S2dXe+dp15OYYj6+%&Bj#bogDUOFyWVtvuvOvAxZgi<5@BI0j z!Fx1;r!4TCHcyC~^r_^6H+=@zACe|blMkLstBejq0Sz?uDnp?%`f~*Q7OQ1!j6si$ zzed>2UI-o(FP6O#Vd76KUrKyzwRG*D=dKX2Jhx_d4>p_y43Wo{sjA4aBNTH^E3QhJ z=|hTpEclB#A)Su&q=ShuUKiRraQV&X$k;@ICsJT!DhrItmtW$h=}!gh`i4TlsV!r> z+u64R=_75o$pb@Vs5>x3vFYIB(Z3lv5do?S=<8r=x%*+_%eH|})=`k`{~*rkxarUA zt^Z)z(mBtfQ@qpGxJq zyHuge3p_S!saiH=4QQP;$`tNh+A_~>yZ&qT(gLmR$akPB}@bvcbJoqA- zA^k7JH_X8i6;{S{c)BE3I7&F$xJ|0K8=95cStzw79P5Z;<=j_^hPN~doNX_4H}MF( zJ5SbwGCRoXU2ChzA+VtH*qkV9iAYABo1;EDtcY|C)FqJv4QLkD0Hm4iy!6% zx$Kt=$NfwF_r4YJ4E6V0v!{m1;L0PL>|HzS+gynsCD}rM6KxvVoXTSC> zRhVeJoH`Qx$iv#=sod|sD+thL5J1jJD1D!UM^m>61o|%mgQjJuSq%UV`ih<65_^dN z`ya(48<~Y!3!*?`O)v((lIE4Nj2-wsW(ZXM*QiWtAG+ATf*xTBqTat>1e3rbL1z+r=b(LP z5M*gdJ;AFQpkxCc&`1cqbK#?mN!$$eT+cof2ag5&dU-mAA ziB$MJ@(t=Y2D{;DZ`V=Y6fd%oQMl6gT$>&ipj9**rn!vvX&_E%+@^FLO(I#%r~*Wj z>35gc1u|8J2DLttH!DzpJ|`f#PA(A=~P^H5lDUS!b{bKyyg$CeEbgRHi$b7uU8!b)`C&Eiolsu}32 zL@PY7lcrfiL;dbBs@XY0SjFIwds(o(w*spg4eAHW^6z>j+xG=_WbJ+n{v^e*PcYim zq1ds4Un&DR;LT42&`&vHQU1=>Tq_3NtLI7!AaR6MLRfr{xV+MS?!dvtStNh85l z%q9O^rBv0&%AXYle<;d+Jx>*p4dvKL<_ zy%h<@rHg9eD3F)`?_E(p|2~g@ObWsgtyqi2nSA%QS6jSw(Zt{h_;C>Aq0rCds%7vOjxyNC$I&bgM|29sWM;th@i;omaPROeTC ztO^#An0MBHXM&1;;%FMOMG@UzB!ZSR|*v#6y+FK^J8%^VXL-3ihbbvv5xeV{o3 zmxoxE!i4|oPqis3!3()kSGmcjZ1=~}b(i)>{wVR6a<6pt;j>=xls`7B`I3V_kQfx& zf}9Agi;foRZ|zDF%Tq4k=eT_>z_{=YoC{_0b=+d0&LL_pBGy3o67G=GD%6Q!zWPR4 z$4=zkqdCKwNK{W*S!Z zF4tA2XV*z9T(QDHmjzs1zQT553mm|PR5s>{YQxY=dYmgUyUc)^aNRUtJSQ;FVKAz2 zRd(_DTPcv)L4_I+Q|S-1B*B(;uU9nHX^=X|$NbZFp{7)ncH-DU35mAYI1^XkuJl%p?N+ z$o+kK#FB-3?+|r^(!p^jB6cqFa{i9~&PQq5=(LXWY-~vnIBq$kStA< zn4Q%#SzuQrxE(tDRo-2Pq&Zv4X+BY#Xa zX!3dYeARqg_XVAj0tFZheZ4+3Kfs+rdMQD(U2Jk{u0sA-#rE`ucdbw+hvlwlI4Q@ykN5N!@z?Oz7zIVnxkTTov+ z7#t~C)fOtHZYY@DCWWK+M;oxRBTnf)0p-3ukslVwXJxI2b zC^Kjl%bmHlvepy#07e18mjr{7Z#CS@dxG|_4)K?WhIa4*3RD@~j$DX(&Tu;%I9|=V zGvvUPzV)$a$~;l%(Ix=pJrbQvQ1!@g)w~tGur+9;9K|xENd+y3CAi_3l7Az=m z!ukyDwBbJ`&97^GW*4`w{t0H~M$a+jlPNfeM7ATZ4;-A!S6HI{f_e@vOGwW7K;c}a zRF+Ts!-5$-G!_^5A%dbqx}O3ZAVt##^jjD&5Nny{jrJe>ZnnWj_yh;5T<``bOTc$x z0xpZKi|F#srma0W7eVb$jnvQ`_lfLp!yQ3jP@D{EJxlf}d(}c=l7g z!%^A``g#QX3z7Nb053n$+c}-Z?z{(DWZStfEm?2UE-er#8Q9mFz4Sq5NdT_eh-1yY z45-)4wdUZET^5tPooC}%sy(8aF9CVmTg*$wz$Z{DynH@Ui?-FPX*P@qcyGe5#3T28 zcFWkJ7Md?Uthlpeezok3D)CL#XfE0j2n_~B?Js?Ax70zMAEg{eKo>KA{4l}_V4qvq ztpsZ3WQ!Aw6)R-+NEdNI50y0|cwxbEC25YTRaXdV!Og$%(qRN48icI4ee{1|SzGFL zI0*diggrA%l!{GVpWbvU>u6Q_;22x<8@2e4P{C)~^rDG?N7WK?XkE??{V+~YJBF~& zl%S35=YF<`OS6cq)rN0GTRTVIM9E+)Uw7wyjpssu7wzamZG9FLh*?VTn{^8ow-*_2E!_ zeEsP08h>o~Vum=7muYpl*c??Xj_paq2c|sIf$=J!WddBPDyJo~x@rRd-a00e%(Ctp zt697o{cij-q9e(7pi8)mbwTNDQzNte!_LpQYhrcHt&}^-EqEobyioyk6*>iI?dI=8 z(qRr+lxcD&bky7BZ76p-N6pW6qh94$^_`vgH|gzOOO3V*P~~Y8I}7V1oUD1^Ehe@H z;4S9bT4*gw9k);kN_MxPOUjo< zi1R(r8C+mP2={kJ5eH`tYN4o^Rd13S~i@9)3X z^TMTXa1C?K?EQ(^duNdYHOaJsOe>~WlzFauvjj8+OfmSZ*J$;B&CJPF4=08S9a3|z zf$@HBLx*Ag;BOCf-7$&l-Pmbbif0CoL%H%#=5|44+0;OTJ9V6t!KAgUfWy2H6#pic zckSai!Ip_T27^Yc3ft$9djsP(PqE4g-0ChO|7RE6jALzvoO+na11ppA0RkF()ooyh zr#r~!l!H^i5KEvg3sz#*Jj@j3&Xxgqa)z?Pn|u+5VFbCC)M{jh33zK81UO%gz|gzc zPX8@o;uptXrLTy9>iol2@npO-hj#X*9iN}N8VE03r}ziDL0vcvk^~|)CLh&L(@=$d z+0`}8fw%l<)V-(TWMMC8q$ML3K1fL|0jzQF9`a_b{~JNI9YSOe;ZXcYvqJ)KYy6@5 zVlzN*{W_)n(nG5LGfDaYbUrqYPqe@t8}Yn$<2ci}*?{h=f8{si@#?u58|A5AIhE7q zRa?eQ6P;+M>#ZFwnG7(`yX^s9L{<*N-?4i}{WucD&sHxHoj#Ptrr4UbAb5DxxdxS3rwE>3^h>gzA6B2 z>(Qmw|8!JE&+~e-xDxzmmIx7pm+Sktj1B-< z$+(_=5*P#mgQUejtLE+?leVUC0Vhfjcz4%(^!b&?<=e)jq+g5E_#LW^m&UV=_IXcg zQuUJ>66j8kE42*2c4f3Drhnul@d9$MQ|B=fqY{4&;J&SV^u4XY?QfbqOrj^jUKA#a zAx5A$p&L1nx?tH)Bf*~`=q+v2d_u-cjVM#eJ}P< zhCbVp-;y5(*L{6%XWRDl+*+4Wk1PCrx2tP&cMWp+orD1tbOC|WIT!iy@d&pIsn8&Z zZgGygOm5B^stqNn8wW;hRGePW0-u zYGYNa8x+AjrxbsGL^l#nBUxjd{2CaBwOowLC#oojP`>53tL;jY_#Uqe?PD{6&U<)kIv55I9E3l{{SC zZ0UO$<-0lF*pEcII2bk-RXt|McFzSZb}SrR)_WqrfDf!c7u7XL00#vMae(O%cqG%s zhTNjsUbY_!f6Oc=vNp>#Y1K; z-_zK?6&-0^_U0r0C%+D#+|b8%JZZK1LZBuOp(Jx=2pUzP0x<#w#;jBJCJ7xG?mm9` z>?ZIX=va8;< zH5k4ttSaCbNtES(rrkt@IG%5ay;%b7Im08&8lW_I62wm@J6q|MEuI1cZy7K|nn2C` zgP921q~NJKC|%s-AI2Q#D68Ip-moa`h{^_q0Of(AjFQ=~BhYLfI!-|xD7FdkX`ls9 zL(Q$i$n)v)EsJ8_X;~c9EfKXj5t?_V_+YcP;4*KbbO>pxA)ZCV7P?HE|_YQjN zAl~>n7TFz`ZXdOVi>U;!E#7{}8Vfb4B09X^C^$!F6evrr&=z|r^iFegEggY?mkW&t zr~C{N&X=y8>6acz2*z-KAYzd;`**mDi)}%?saAbLW5Kt5){E zDI7r3k;7N^t8kJlT5zNl2_~HyT z;TC~C+rHwEdW>@k^|@L%5t{Phh+%q8IqB4njw_2OTUf{98qBT;j2Z6B0QR(nEIf1cW|a}yChOeBBTA)Y?+k(T#J|0_aP(TDn@L8r(dbv9O0vMnL%fKRpAA2YaF~PUk^H*m z5>(CXMsxy!k=GcG=Y>(fPd>Wldlp?mndAf`+U!*ks12~m#eTbSRK3q#Ki{pWeHUq$ zu^;r`s@s1H?FUYdOy)7W#^a5ZELnkiv$H+yO%ZQ^)V|#vpN8M{nuKF^@c`%Hb5oiI zL>(y}c|;3&zFKbM{rJz0g}1YUosM>k&N@b!=sBqEh^HOdqQEOspbpRrbdhA09-DI1 zh%~Lfmr(^Aj2Q#RDVZAJK_-mX_nhXD_l4G5UGye-#V7{Q?@2A4qJrli5?P zCHOFf%cB{S_m9<0yji@=Ke!-7ctchK~ zV)+1hgkHo13YC&`GZ8%$XgI&CKB|gYHvWKuXn?Q^Z~oYfF9#h@Uee?P z_|pnqfi19~*MY~n<=$y^Eim0X-$j6v)Au8x5i`DNExN9vK`kMc1N`!$8^u=Aoj!ny zGzCl%0OA=4g6czBxF8R#42<%7r{>@O*ZCPc_ct<0-<*UugB4sksyU{7c?u8c4Em0_ zSewD3&90(0BDKz6FP~${0vOG1J|e*nd)`f#G78i(A?|otVaNvr0!8W3;1;-Z1n5)J zjc-uEC<~uGUu_>jY#fmdTZ^lObQ zf|7q^Mqm}6ARFXVP>vJ)gX(#}c{T#6?MVDbdsbML zq8*c-Z&&G;)9k_lbx?TCyJVpG4Ov7G8Qs;+maa?T*P`0#HJF%DO>FHo@n1uxR?`;Y zqd()kh~_-!LcXr5RhQ_zWuO!w1ND5~m;fOLhpHR=F33I*0s`I8S2~8lrcK$i`whd> zpquOEAhfTj&3NnWKW~}aNkE*08Rmcl#h~84Sf0;UUC{=05n=G>zIJsK$n46Zm)9{K&9$|SDHDjM5z6D^TOgw{QCMr zSv#(OEun*Bj9Rkq-1sw8&jA?Ex;p&xr`boVkPt?~Py5dQ`WEu3g>5s&d$?A5y2Wa_ zfHw|6CJvB%^(wQ6k;}(0G5&VXGMXVu&WC{FO6cGi;(vWZSU%Qpm ziTo$OeLW^+Ay6QaCS}qDhpikoDoHWJ(%rYWszv+U^#FOm5|0A86&CB}VclbrjUQZL z?;Fif+OuI)v6++C|1Gk;4n&L*VLhoYJD+SQe%G_o-Zp(%_FpCA1gzp4P;_)xkMaaW zr4osx%4EY8wm0eSE!MavKu}HftOsRf3a0ceYoh2ij z)LU#DK|R4R=GZ2~gZQB+uvjEOo}ec_7OL6bX7X6){7^p760P!1>GVJmiey)A zy(sAmes0iZNM|A8z;ZCy3M$G)K8q;KW0`JddxxZWLXenkn=nGEWn@B?*+tVJa@=jB z73kJ!?VP?&wR+U@aEp}_n#HZ&cgH~6zjhq!&&c8$JRI|}rSfN=$4aFS<9o->*q zQIPc-rBcDQ^$`Pci-hmCeFvL)YD_n=hfKO*CsCPNWZI|Tb3*u`)9An!3(sT_rp`r+ z;!KM;w@@Y6C|D3w6`Z2LB%HcrJmz^1hC^}HUlo*%xF|c}9gcBv2mwd~1U_2EpF==1 zga;Y}HY$&CVV?U#x5A5f8`bKKCjWJq71H_pk)R;&T!-(3;lTlV)1719|oR>JbxPJnE7?X`b{@uxP1i6X>#I#%+q2 zHj+ySx|B}Xu51#jWjR0x&OuOsn%bWpyx#ZotmWzB{$y8=_3MI~fv067D+)wx)mI2f z6Q^y$&DnioIrY5R;cV`0idCABIPy;MN1&&W8-8t<*2}H1?t7=z9d@yrd*^fEmtcNb zl^Vo9QyU(1kN!B$zA*oQj!AIZbOSd0SOF36>nfu#|GtKy*IIQ6Sx$_^!Rj4o*;tpKi#mvG&@@<{KRw* zIamjnnz>ztkF$-|&r{>^Ey41-71TTvppqjYVzr@Sc%mi0T?>C->{G{T&FzoGNej|r zHz#o3!h^Nd=h+`l zclc>|MYU>(TxiE$L6jrrfRLze8K5+}(yljwKeTYk|Inu4lQ)fM=s7F23_ji4oc%X~ z`v#tn@*bDN8gD(N?*DK?gS7_@{F-zIdpMT%+Y0Z}l|D30xJ9>2d{uB#7k`_C^YCe5 zL8y6Kphxy}k=~|f<^Mh10aMVVHQ`&;bXvZ>pA!HMB-3 zsK-qpf(8DdwdK$Q>V`Ylbf+Bis^~eJ{lV3WGIK+{RJr{slq9}vC-(5XesdFm?y^WX zBk_=__9fz+U?`Fe09Dm@&3fhxLg6)Uu#V8BX1D+XIMYU`FvfEAbW3Ax|v z(uNo3$S`w(Xl6jeVz+;|+I0*f`X(k31W#ALf5Bu_tEU^b#BfLg4-!JIVResB-@($l z?h{9>nCs9^+$)2C!cSPmFAy#$0Vs*M-4<4bQfxbAZIjBa1|l zrGP^EmU(|O?Jox6O~moGO4oNU_oife3&KLKHZ>!9mvah7REhq zQ&lfrb4#h^M85m^gC$&eMfqcBYYQe}QtL#1dYD4A$VzG+*$FvA6~A+FQiGgrgC$dy zX^2wtHLRY`QC*+-(%^oJ`bOMsAub-H&a*@({lWO_lgGZy)nj7HSx$q8%>#o4)hQ^I z;n7F$_O3ezolgJRMsdb+@3?Atg}hs(6ym+dC?hBJo)t51YC_MxTn|CB*%HH_dCbV( zmW|%tBS%$3O>gO z+5JIWhki$-0xlt(1SgpOo}oNL0EI9=`E{LP8;1i;kUY5hMM?^-POo7%oM(dv?ASh3 zR&Oc2>k8bau8IjzY(mpdWTDL1DM!hGma#I~OH)C4dbDs)L^`b%Tj8hT`{am@|43kH z2D`-+09~k*pXJ(?sW^z~S+ zaJFa9-|+E%KMHF;rMRJ*EZOtVnVsz8Xq2@g_KFQd5#t0?#Ko1;Me74PrHswtM3%O$L0`nFjlfapkfy5mc>y)=XP}GI7O>6j0<-7kkK19Ipv8yGPhON7tsjw zFP)*Q_}&P$u7JaUC*!sdm3aQuWvB2Z7cEM*`z{*~SNLvu%2jqA3ia$^!mjvUcobf$ z-B2|!$>s2G4FZm1gaT=r&nM?1I~@tSQLcAJC)J+Kw(uqDb-z&{7v~R`}sQGLAZ7 zDxM4b3yZcX!ly;V8&?tQydnj<+9vh|jf$f4D?gUbi7*k~=GFU+fN9hT4Xk-2%KJ~q z6bDEfD$q5S{Wp9_cZ+q39N+Q$+t`1qpi}fOX&{If=Cm$W%Qr2{1X8Uj)7;PT*zTl} zk;wPi8AhV&ebs{Rt*HA4oad9V^D3x$!qXr(hF(%&<5*BM$%50uwYw1#xdcMa6hNKI zMEYztl1uM+{yvut^mSz9Xt&eV`F+(BRX>es?Uj9af^BJELLYBrAQ<{pcZ7-zUx2ttL zavlYr)2|OdqkVQDzVQkyjUulIUmE(xg6!TqADKlD#-(A3M+3!!A+JIqAM3^93sV}u-<~}^URW@7!(e1ca!HH3uzw^o&U;fBR|CR7sEgFgT zwe{(QO+ndrBV{L5vg1~svRT?m+=B6X8Tui8=%`5~C`K1~sqHWCH_Fvw84E(#75L*L zmE#kGjLnbCNtHw8%3A-DrPMqxZ+$adH?*Lg#%vXf_PD94@3x`s5JF^l1}7tAJyvGT z&?>T?5408Z!?kY=s*sdRz%7&}7*^E?GAK_Ks&3(g@V=2SvU+EF&i+Cp_i|sVWRnxg_N;(+i*C@0%?g1U0GKo8e0L zL!ZBwSx8V&95$Cuelk8CqM082-TUp!R1^1l>iyOX>VeDJa?L0Ii`YvPYZ$$i3$o@c z6%j3g@fXAQ%Offu*Q~h<*=1;I6--PoK>GJ?EI9ANQ5Bd~q|4QkGpAqfAvUU52R|VN zcy?Wb^rv2e)?+A%xLYwj@cuKx$IZp7rk$8i%f^B7x;|4>eCBt8#4;4p`&t>B=~YN0 z^-|HC;jK(E_{`^usJ4DMn^$Cpb#C4F;Xj1- z{BTAW8O}dgDX-@wYhr_Qr$f}byZiOe#!(eEfglsy-M{8G(d9UaCGTW~@7PFHdKh-E zZWy2Bq`hcv4T?>=3`X>|ml!rd^F}(cQ2w|&Agz32h~deL5}@TYqPrYE)m@FpH9K&9 zE*pMMzO@C}LutiwXbXnby?f5Nd$u#0j7V9aW_xHmN@sO?ml#@Ye{tVAM^1BRM>6*n z>2IvgcoH|$`jv!MZE_iz)aIOgDYQTAh4B4;>rtQ3Ro;*vE=x&M4C`*#pLksygsE>7L@=cV>Wo}SX&x-d=Pruxfw&bjmB?+J+(xrPDqRcxEGp`sKXfXO zzVuG?tMW1h!gBiNFgz9TBl6axxBQ>b+?^`KR+hsG(^}07!)(G%j2q7m zCaCiYux8L&x>rRgE>GibV!|U+sy(fXZ~Jo~AJ{#)bp+K@qU_@A+(k3Fjr#6VYm6@I zuABL8noJMI^f{3YApH-5f^D~*3PPRc$16&1&*53zM3N&jO6V_S8IfxqiZQILxslok z-t2t&s5qO})1S`9f}T_wKSKMtGXQ60mKHWLF z^X=U{=FI+ruMx1EVd`iGa(e?fem|`J#&n{QzR?7sI{w`c%URO2_cO1tCQorRSa=8s zcvvqKhN2w)FNL6Zg0K-dx#OAMp%SV=W0#t+&F%GfWjG`6N|gJ`)Lpdxpt0eROJ9f4 zw0dy-Bt0EOY=j+{%r!r_-Q8c1_!6uuqeT-;1533(fiek~sc0g!P~u>{*OqgF=xm=X<8Aveb?^-RGBu^)+vxsKe}3!Q=Q^gQ1b1RIQirG+B({RiM^G+@KQ&dv zMuilGQx(DZJHPFL_QQ9BzQ61xH|$CeD;C3*q3@{vpz;)Ti2*Rh1cv=lF2$INalK11 z@m3y^7JhJhEc{%u_2hZm>3Yq*PxMDe(8=Q zpMV|7mp_l|N6!zeijvMOd1`JB2nmRLdC&R98YltSP53g=s^VVK>qci|*&lOoXhNA} zFPTI}<}EAEVAVf1!BGexy+`^$W!zXdEAF2h?0hYr*8%n|L+b6oI_1IrHy^Kvpa(e(ArXlw5OQz^=U>D z{Yk+taI5zwkrRl}2rXadV;YIS_^h@pdRMH50^g}ZooA5)N|CIcsKy>yYk-=mBDXX4v`AdV1&hTuYGT^9odu5qu^ zl(k**_O=GIav(58ELj^l6m7gkukBMvKoB`JFOD$ZD)p-Q+@P9?Z_4Y$$3dlwu8TkR zTN6Bm9IuhgD+hw?;%93Y)kr6{l~R=ctBP;*!_A#Jq%@V4<*NkH0Ey-Y?d3i&8Wf_rr>lE_&@$J5Hx3@=il|H6i_0&8!n)zOP zPkfBOl^%7k)fC|J9V>awx>CGNOfc{7Z!_9DZP17|`cv|GOz0Kn3p$MwT!JE2`oWp5 z%$fY00PhVWWny`mrnuFiGI(Ot%RT4ER)Tb9=js=7OkQ*Un^1pyp^HEd_l+PZ2YPU@g3@$Aa%eA5(r^F6J@}^Vg;b^+9IWxroK<49F>4E& zEb-QDW`>Wa-f(2J^;0I91QwHZ|NY-^b)1FX_@HT21)>Tru&rnnm=G((_22qLcq_ld z1|;B%XkG=>2WOd)KlEUHaeXv+&sVXB!G>plULo@VX;ao~PVYII5qvByZ)^R?3E&s| z7H2LDpaJtsd}A%opI5LI70oW*m$%=`DtfUMn@dWe)6xCzg1#gE_&^)mu$0nzbd6N@ zYp?A4pqrV8Q?x|^t&J07h3-S|WX-ZCA2*_hS|1@sR8BK5)>_LVe+kD=cC9{xYaKl$ z{F?dN+!K}Ku4j_t+#qsLMwDJD;n$jCPv}+zj|v5P4K1Vch@*`maQCS_i41f@@JpFC zv$4_PVgO|p5Qa9zR|t8+2+5eKQHGEgYBKY&6<(ucRk`4FFQNmA#y74yir4WFDJVI^ z;nR;pU|SSEgqQAIhchg5@K|i~bS~HdRB)2KIHfH&=l2MHP7C488&LSeS<3?KhucEY zxt3+_j!tLJ8USSjw>6|oV*^q1hJrCUIc(Is%1q|Mro^jX$F?v;Ax^6C=`WCQh!9jJ zx>>(u_VEXoBBvr&Gy;OI#q`Z`5rtV}5ptGha@owRunrgmv=}c`*$$8O=0n#W7Rkxa zlts`v{l+tqpdyS5srJEekmIQ~;?m??Tvg$~mnu@=KJ8W$VKzDf+yWXhvIVvB`f{)9 zJKcgg$iY$_=&hkYq~YChbuiu?CFppxmwDE44^`O=Btm3>xA{XS<%hK>MU9ahf1lKy z!uTM1mEc$n{t8*xr}<0ziwTLF@UYmDF_fnQYZ@3*KZ(V+PFjHyhlB)OQV30fwgd`& zJ473K+u`lPPh)3J*_sD*kCD6={}gI+R;~1M0NS0|+&bfJMsagStm6N6Ljbl7$Tvb| zI@~N?B1pA9{ArgV6lS%b`j4-K5${)|)b+Fkz_2a3lp5Uw0NmVPFK(Z;^9BN4vf56e zMwa0Ma73JI!RuNU0v$5@j7djdtViP{#ot>GIVH{Mgw7}O0SgKnP_QI)lbXj$V!LX4*H(KLGOWq-zBx3)|LQy8+%^m9(xj=sOa~l8f7&oMfIUi z=~hocH3ahTt|?w4$q78$%*Zfti@+k#45bNWA?ITAy7t*)w3u1GLO$ZE_GRK)Rv-As znb%&&82Zukg@Ny#?`s8OW_HjeYm2yLh`E4+ebKKf^#|R-KzP+zt4u|uXI~M@I$06F zb)(wB_%a?~)rO)3p7TwTg8w@PXh`E#{+?GedbqmI@q6v5FMGb4o-bxX(Ua-Uqq$P?TYm0ZD@AvT5s5wbM3k5i62(ZkFzJ?ix-(x9s4XRqD% zSN&G#UsQ1&eUDS|H|bzWA+PVYwDw4`GdBKiQY3jr3O{Z`L!C0wfNzZva$RQfH;q&k z-1`~-zwAXofFZ6g&qYTj_(eY5ns!v`ljrVE&Frkf%THH5y=dkd6pDd+)9pZyh2LMN z@c?bQ<%@<>)RF%(oie#sZ7riL^%~*EN~!%;RX2aRWrX>1w6j@J&dq(prY-Pg--mbT z6GL6z-#0u^r?;<44~15=>G$#<-8QaH3`SeQ5MTvk8+a6`xeOsK`5FIW=yp z#?dw0+KIX?BOw9XYPI}AD1PHcOGbL+2s?QNjxteNwyMR3-ToReM40!xXQiZ*Q;V;z zhc41p{rXNLk$2l(WTh=VXMxm1DviQ5bcUV*G8hxMQckk$0?Z&{nGYe;Rqm;igc6Gr z0v`s|XrdpOR52;U;2XX|h#vvK<=%J%Y%$NuWcdF2;F7)wBg_Am{$EJm0H62Z)xTcG zR9>pLSX*1B%PpxV2%lYfJayA$dn}E%SjujlfBSf<{!dFkcjy@X%a7(NJj?KF)U&2c z&)jIknHM{X{(I4-8b(?3FQ2&z5t4!_Dr5XkA4oP86w1q+mts=-7@(-E=m*LAR?yyp zazJHRHJwGr=RS6T@b^p`pYw>X0y`^7Mv=vsM-j5+7!AoaX6)O6d_r?{-+=T&ZT<~$ z$4_;CSxuy$_H*skd3?Qpq+5T%4`9XaSWkpAVJ6BHIyz!zW;nw)_J7mXZYq??}4m6((pqj;iMTM|dA+e(yGK%dhb5w=ekl_ybdL zO&2;ZjqcnU8O+c|8di1N1!p|!_KnzPc1WpC+N&PVq8HFC-v$ zGSrYgiWPn=&{nvD9gta?Fy*1B5YXP=Fvxlf)F{*z_z&*<`=hmPf9$v3v;N$^(oPdI|L;ABa<33`?P?Trw?UKx#HR=zJ%@$MfE1P1KaJuKytL>eu{>%c;7!K z>^1)MQnb_25K!=z5k`85+LT;QysWBxGfY`Pz!?B&0xLj}1e<7AxO^D55$v0vUw|O$ zasJbpxAaWDhVLMAfS>ubvN*IZrMK$yupzQ*@Ty4u=NmvAR|nx?73;cte+C-_5+Bj{ zwSj5>+gAYNVTWe6@+*G?*1Je1Vrc#v-0NxH`GouJuOL27ubmoff0>&VWsnvC)-zuO zW13967{a|heaYC7>uGBh{?;@IZ_k`PUJlInUdl(Fgmvgoh*0wGiSECF!f3?ruh?T!<;u<&5sa`gchga=nKgSo^mir;o6 z=I)B=Z2jM6X6ZebA6%W##U)Ayf=u(Z?UG`a%kDu-d%OD?W;HW;xXYh8W}Okk|04PY z2mmwEvNnk29_{reptqFtwDA=kW z2)qO4Yk=e;s#ozSrmb~-2AYKJTmaU%i(xzQ-x?d)|1W==$npGcVRg^;z6+^UIc@XS z`{~c6yKuhZ-y_{hQyBZ2IRS{QcLgcqGX+{mpfQ@E7;PRT!<#bn53iSVRg5Q_k5|-t zuRXQ5uKW$Xd#7wCQOvKr#(y&mY5V{scIA0J(&;E;3t3TIP{m&rOm?!C{gEgZ+#_{| zv;p4^9Stn?8rrA+S=zf0z2H5ZdO>nN zCv>-`OG#`SxTbSFGo2c{L zch|>7Ugb|N)y~p2Eg6lqy6a8c#AZDDds9 z@$fiJZiaVY7?HB5^>9=WjlUOVNLH4Qn(Usu%fT=*q9{0n^iBbMmK{Npu5_#^8gUSv zkdHv~TW5FMx+FvrB+Qad0WbY_1MZJP>+3y0Kn8^Rkf#0FRRx;;OO4&sn;Y|HX8{+~ zY+z`vh;lA5MRlU=g&&dR$!&WHkhq@c@@GQJ0i6-V=VeFZ#wUAsMJkl_4tkeQ`NP{1 z9UNGufHe*<(3&!VG5Ug>64U5{)98wIO*pVbwPM*;GFI7#WX%@sJ%STtBZo+UJX%qwdpWk)?Umt*AJT+5&%#R??X|J;a@QU zv>PO`>wlli`gAF5yr`4e_xIb8SVeYC7bo-L;IV44{lsAx{Z_NpR$>o_)MI`i%3;xT z?d|l#HE3l12V&adV0hhsJR>M53Qb|9IBBL&-}^g8gCeXTzG;`4A(f&@oX;v3-kVK9 zQmp&w$c>O^fB(+$@+!dHYrM>>mUTO$hTo?dkdXd6vyz>37=YzjwcYMdV!Omd9f+B+QBr2Z~&Zy$a3fXj!fpCCi7(`lTaLb4Dl@Ndw zkly@XN|p%avJ@o|i_@L_a@d}=Tk+E6MYTq{pa`qpM)L(nt zf(N4lJB-VLk_tKA2RV`MzJEBU))xz;;=y3p1yUj1cMwncg7UY6O0dkc;uaVBu<2fA zXtDNNb|FM9OD%Xz))~nK0gX>U-rw^6yApnzE>qEQ%5jaWd$_ej+jqI-gj1xN5EH!a z8s+Ki4^ody^)3L_5vMo-LLsyW;_$@8Q=mq3ma6^2O`?jz(naB!#%&$h2?!ymF)~kX z?Xp1|%9(H)h#$Zb(75?JRjCM;Sk0(L7|oQT46H|C+OQomF}9RDgmkcb-8b?Cr?^R- z@Bc8r-p(o1cHdF`=K$;f@UZd9L-L@Nciivq zngh3l1w}|Wh=|tplprIL?9EtuOCVX#J6C5b${n>6`9k_wRX*bDoHD7$4-LNyG01fM=S8u|gEgT@ilfpe z5lMw$xw)5zur9;q<-FO+BpCxvnxycdDg z3-zzjLjhP#08I&iIsli!%TVyL5y}B&kroRaIYgb5g|}uB7=fsor!xfw!Z~s80vKbW zi-O28b09Ab4|mA+v{ElS!rfu`>70e9uTD_i(ybO_yw?*rAlP93K|(PDdRcg2W8O`) zlzH(15JKtMeX>>-=Sn^H?bDpupQznKs8fAHYYe^S+7#ex`vIAy$5l-3DCRxQ#~746 z*}QSMUr0Oz`N4WvcC7k5MQ;caEyh+Hxf7Xm927!W^MerQ(=E%i&=;9MeJQ`i>)3yG z<4ZpRN=s;PhekV(xyx<+k5WH}%zeSj&K~m@SF@3{NXJ_lYSXd)PM1SpJlRzK!)pT| z&PU;-#tGYbqDrkjeKbB%_m}w^b8`K7FEu{JPNCK2@%kiLYmXe%uXRMCGgUxm& z(1tcA&_bA?T4Y<%_YsQrZ*NiVBmvMT77Bbo;AC#(!E8Hpj1at2PRA*O4J?MhlKz67 zF_-v;RMFt_h_lqQDrKT~xIabc@;o)NTFdZJOzCS`iySTAGtf*-RI`+|=E7pI-Pqyhf!i3Q@_*@=hvx&&r?8IrYyRG?257k@S6b7248U2(P-Db-Zrgs@$`i{2 z)G(n`my`Q-Zo7N0QI&9JmxSog&m#}NMwtng&Ud{*w0L?Su9^e^bwq9WUlk`K17Ya% z74EK|7gsiRMqq9-;3e5#OQqO;`UNn3L!ah~e2&5mtjqa##r{@b1le=79(v=*L5uG4C4GK5==DKyRsJ`;Y_AeyG?@zUIrwBWWU#xvEw+taD^+m*1u zw=443g%4*j=&dG@rjxrIrh}MRt|WCak)~iwc;0m&cb}>pyC(GiFJ&l20!fdMhbLu2 z{%DD}Ve~XRXxiudX~t1)uFlMM{G-Ct=_$6#Vdd)bv^4LdQS&U7t5A-1ix#yCaL|`l zyEJuD#(_{6%FAn#vcQI zZ4maarF6TG4wzh)Qi$Rs=(d8ev!EUh$=OG~|6NN0f*rv7hCv`iyzuoxce+0FKk?A# zSUZI4L}szw5CG#)m&_WU?*hjQ@WUd2zdo0b0?JYqqikuJK(vm$0iL6`+B>ADo<4?C zo-34kud}TdTKklj&Eg&=8sG44tWN>80j016)TD!qFeh#bm>Y@nw`F>e6Tm4RMO#MS z{2vM=fCs!_fLDTlsQAk0J}J`H^FK$P_nC=OvNnc*MMq*< z?GVlf4zz{YK4WNYodLS<*=M*Ma=ZM|>F+bok(Z*WC1BVzQQ`grZI>K}_=EcA02m%G z3md!%LBICC#>#Of`;T|0kOZGM)^kM%6@GXlv>S-MwffJG_bMs^qkh@`y($fw*CqE2 zJK6s3_lt+ZJ?T8>t=Pr~m7PNIAL0ItjWPjYXEnuEK>GU41ieeGH3Py>2(_)>O~$Ky zjh4>*Rp0Edkm<+q$G`pI)c#HM=i@JHIR1YwR0G$|%hOYT<3^r*hc{HJH9FHaWQ_B7 zi0%9@n|oRCAC)VQtRUL5Dam_~6I`Bx-f(&^X3)C&6WhgOtICnE!UwjKp94^(S(3x4 z1Lmu~%Qdi{V4}YU86jCu=m;hNhS8q{HyFSNTyfmpTe+E!<*^)F8QCU_fU(wG# zwl3QuMlB17g=LW9ap_@;Nv(=H;GMn!hAIEG9^H2xZS{tf|eK*JsTEf3A&Edu$Ha zwZ@%F-F!z6y#ZCVis2)MPZvP95<_cRR^0^L0cIH_`Pf@rHbeACQ2+LwQY=TwfO!I@0;H+6~3nSk1=?l9m0gV7{FBl z?=u9S9f+NQO%C2tJS^vA)uSmE*u=20renc5w<5Edqrz4z!t7u@AdAITRC1_I9+N<1 z#XiqN7Gp|Mj;XU~fF4R##ziDe$wU6k^v=2gM-8)Einub}&Def$yAfKg(`gCH-7RU$ zK||j4P5lP!rs{y9(P7y7ze#PfCrci}f(LdQuP^9>)9V1@qv~Nzd2<&ZpU_2x=mC-r z7^o0RiqRTRN{eR;Ro-j?8$kfJd{Mvup2S9GZ%tKR>`67Z5}la z!V&ku5YNi5)i-g^0g;8_WD9a3Ew%?_$MW4f{TTW>@BZu*Dz|5 zV=;(`qyl_=?s+T^Y0j;;!=DGgcXM^pth!RP{gMmPB!V>W=8;V|fRBS1($kR`ri(mM z=3YKN)(l(K{7y3~2y>1vkwH^^l?DfCSLAG*+jtzqnQt4?WDbF{n=OHP@bR8i)tso_quB zX!P#wAmp2^Vm>sV0z47OmzaWwV&0RoVMg1;#0@n9RM+Z{uego|bkvS7?GDoJ&h5`G zdo(`WzJg(F#ic_n+!IvZaJ;m|l&ksKP|V~Uu*}~q4~!f7NS3w;Qd8EOiO#cum*Ci8 zpX~3~_9>m3BU)paZsgGD4A6a?N6ZCO?1cgLVEgOGbO?tEZ0?PIKH}MeRWZb+Dk^k> z^}uEO&8y*opPXsg$zsP(5!^iCu$;gTU&v(*y}||u7Th%mzfx@$*ou7C({&1;bN`90 zu#j>cA+@BDg1Ql@*X2=s4lAT{tK;-?CEy74+6EEHqo362W63k{J}U(S%n2R}$PCjl z^>UGEh$X}_y+bKqm(xROkQnfvMRp-%J1DNi7=p^pTZ&XLd^|2;l*pC-{vE$$ojLx5 z@#yFw4BVUH*v8t+jaJO) zeZTbjWFY9Mm>9L|XrN*&y91SD+te*(+%z$ED57sy_Y8`dl%&7ZNq-fbTjWzUv0J)% z*NO96`t0QuSbX<7_~X_O@G4RP$KM{gyF0!>bQ29r$Bk!e$Qt0PKX~uWTW=^HRERbPmSYwJt=&5nM+XfI9vxF(P*A%=al5?g_7z zZ)C&}=X{}#3(l*9J{ZH;qI)6TKeMbyESP|J`^A!|K?5<~`ep1r=wrS?EvBf6M18-@ z=m z^StBz&@s{vcd++bbIp0p-!<3$6zeajm+v=EYtY$%Qc1vZ$RPxqk436uDy={IcdeE( z2tYeh8)9}E;1(!r4&4w`LrAAvgmQ@k5(aFS2C@HM6koD{nB;N7Zb|Ao9K1v*t(Y14 z9N^G)IG=DC>oRG1d2c57b+PHm1^*V05v~r{Q1QSe{l*=q-?QmcGc9Tc&JTTa3YuCW z2a1yk%~k2VuD7qwsOF^DviTgU4ElT5UY}T@uvXuC^Rvh|(r8Wn4MSw7M2#_6wdGpJ zipMlG8eS#apTpMPHH;~-orRM)Vok_&a{aRuI;$}X6ySCBm!-tZlAZJEWdAw#(akP% z6pE7V59c|z@l89n6tN>aIZ%9LC!%{%$|Bk%C467@Z<~$m{QRe{R*8E6z}ri;qOTK3 znE-m5#5EXhuqjgGqs#a3J8Q^@C;6|JeL^G4X_DEC(W5fs>Y4M;%q^Fk_>=3(3+kt+ z7--#3COqhRqJRzJ;3j3_x+31(AtAwS!PLucvC;O}}b z($*!&LdE>@gZ1vUn>%@Ga!O!b8W;bpO=vb}DB}1lKYT!Kw(rl0t_ihq0yP|ls|ayy zykLMCUa^ZE`H>*5S-l)4+aTGAK3uod=>98ILjgQ}qJA7d$r;!<+f&W4K-eoB$sl|( zW?x6|oGJU#fEiA9UlyFh0v8Y7gMUu|?EIw}h~^AI^}b)1fk1MGtsBY%=lD5NtP2Mm z=cswTeqlwH7U{)5FiM316Jd%{ASh;6!}To4iIDLMpBk*5Q}$;-8_#+`qBefHm)7@N zf{tdwq4T+f4b*#Nc%aM5TJzFxW53SQCZxIE;{sOXjB@YhGtG5C z3&W>89NBWh_z?bA*Dh#&u>$A&#hWLNmfw9s^gWl3tO+GIdIons_^qfX5Ca19o?hK4 zJJtaZWGk?k1U|#nn&TPltwsz#=(G$z4E3xu0c7j9~CbWrdaG@~G_8}A_GW`=G;*{(SG%mdYe^ZCTO znCKAYxpP!?(M>tPWs)CvKa=d2xU|%trSv2UJ%th5HIAxIos#1t^$lB0K8`A?IC~eo z_}GzzM_vCtNj<5h$`|buc@J^x$t70CL|Y9)JD2=2KC-z75hmGowljBdJSsHz)YWuE z7O%{2?<~{oUjE>$n(y5Na>Xx-+F9nWU&C_3d{7-lE@*Nw)0vA1&3Gc<^_;Q*vzO$9 zGHA8jVzB~M7*1h&aEo{L^-IEG*B87UUfs?AdjUY(Vb6!eU3Y|mj>BQ`bMw`;2+!+r z92iOAikDRB^9~1%#_FqiZ+OC^(+}oXQOUWl{;ZKH{g6Q+tEA`4B!5 z{%+2(|LX6&ncDc>nWy_q?<7X{$gD>jD58YXiEnLG11PgmwLM6hwTtJ@zkqj(TAd2@ zY+KgZ>;B+biV+hMbq8NAOm+{8;gHF`Id+VQG6-YgD$R-E3>*?>_ku(C0X0j8-jL=U9Ktx(#BA^ zHt!2g9Fa^mOTQMqx$mSsUcBsK&t*Jez=5#p(bWLGA}61%M(YLsetM@VIKz7Wv*bGC z|H;jShz{j-)*0Awx*a>rc8K>LG?vMX8CoqYv4yGy{}bgtny+IntmQOtkD_IN*Re_wci?Qg_iZ<(n4SFYkG5F-?Tq6Fn%XKz5LjT6jL z^-`${T%m9&F@%K%yoY43gKnt-53DuWzf{vrltgWW_;iv3n*f?XL`=B*gHtSU0TpvD zQ}Xn)dPEs_R1dtSHYS>4ctiQpyPCc7}wqit3#?ax)ODZb;<{XzS71g!?p3YQ>bIXwyn3`r;|)G7WBy?BnB6S5XOdf>?$=P(Kxa|KrdzssjK{G?-q z@$ail{I+En;BTS*&*@O{bo)Wo(hgYwEx4rA*~zWz4=KWP-}26252b^fg}hArRz;8zd@(fI!bCFM02fflB&g~D}^rbU1yc=`1|Oy z!CqFlzauhXEv)rQnzO!;PiB;S_oudIuLp1Jcr8u75{_~r=v$#rJOvk)R-{&f`a#|x zD*xaS26&F(0vg>+_*q_6BvJMRuL zMzQZGe_}>FJ`$Qew`wW!xH0;gTt8!qNybGzCDq^uJxPK^_BLywg|0ev@TI4yr3inPI!j`{b8|FbbD7%gCP? z?&jT#c`_sh%}Sj))Xvvq(kRCP(+F{?jX3{fwJbkj$KLijT~-6A31LDdR^pIuJkDwd z@45Wml|M8^Oftba&}jKSgMWRFl3`g5er`}| zQBo*AJY+Yo?Cx}L{D9Cv6)_YE1PFYxEqzoS#u1C{p7`|Wu6T06h862P;TC~!@35SB zjratSxmX3ThhG<@lcxg>pB(ZVn)C^)n)QOoc{6AewW;>=-S? zRPqNnF_=f^nBb2+BvTQ^ob7@yh?2$mp0pF9dbry65yVzXgyptB{X#8D#EOgb;4k<|MM z@ruFI+_XKCzqSXE??~-#67>2Z)E`f}_t2<~9Fr5ykC&vB3aGTAZnj|YVsKqTrBvWE z^Bqdo6MiWXcB{YHp~#v9U1A;VNRx~D!j!faOzK2 zh8<3x61l)7NQw-GyoO%EC>6I6X+{e|Ds`0(GhtgY5x+it$h~I$RE+I=Sj*gi8ezVx zSX|Bc_3r2^pUZMHdtP3x5QYL1S*~X9pORA!W?ufTPg*O z6+Y(OqaKO1{T$*&YYgf9r^G=xHca;KAWis_5%}N2sVaI9k4` z-vR#(SbnyE%v~{b?&_cup+F$gzl#ux|^$Op~f5#hSgS2M_qI=vWbS75i zOG1mO4mlKO;Uj*>s`pu#Er?6l6pS@n<0w5aVgSWBhU$K_0cf>m?RuJ>ZMI5Sv?Y7Q z+W9&4O{hz9*8)UeXuj~YXf+bEGrE1FHTFO|PEk7iFr(4md$P0r%`8Vl~No=Rhz_TVN z-14*SX@84W(`vTA<$6nQ!1wo=Z+Iv(fFqyO-abE(b>#ZjA2r}b_r;$Kv;x$&fc1rsZmdV6>N?eT>H<$=?}qhTSaD>rl;OA)$g zmA}ivV5P8@6i1z7J$)B4?bI4kg}#Br65aM1dYp;5L52Mdeb!2MF9MMr`t}0=qe^~E75SD(UL)FEiB`HO1lbejEYY&lk-?-!`i3=a;r?Rkb65WGFBWM z_WU^u*^vp$R8}#U*5}VX)i>+S+}#;d6Z@hx#=K*7cpU5s?Ky>G|wARBx8}sa#{8yq-22eZjcrEPw%Hwj#kupM3SWzTA zAlCC?`vg;al`8xDnm`>2s*Bpgy2$i&O(I9l`qtznVTk?Xhm6T)$xcjx<^)Epwqd+SC-!QUvNp|H- z4F>(;vLZ09=<~gCng2hS-_PE1#sB^HjCs}KQWe^5_%m*?5kSKH9ipmjp4OkGp=_1c zUM921hflYacq9}F^%DIqeHjBK|L5$KC~E9izE8@S2Yc8mqoB5YBXut-TrK5AMnX~C zsfq7Ge~ZJOFH|~%Vt;^2!lXh^C_jv7!q9qXg7dYtLLoU@Sd0nvzeHg6Bt{Pd?bRLD z6z#jNG@E3d07N4aRg$C z!wTFd^U?OIP==&KSD=YAvI|-68`rVT7Ji|RM7;f^l%Qr`j~})6qm@@qOj>&&R<&-6 zstHOMQo7G?(Cg*alLHpxa2jgf7J(Oaw`F_!oq2ZuI~p7lgc7#6trXv*Pt`=QV;>|* z-W5*Qq+OlWzhj)dD*|Q^+9}+Zhp=jw_M$@D|^7deF&0V$##dQF}q^VZ^>@#Sj|e;Sd_4 zq=|iI0G}k6#Z9~cBPyFL3SvI;OY@}FB#5OX-14>wBH#Lhdhj0Fy;gr6_}g8hN5zu?r5}4lu+Ko_7Tp$_AkW6 z&mmN5t(OIZyR~Avs9+Sypl##z*p&3$ga&+R&|3fF{OzZ-UH%ao7)AGnWj6{j+yCUi z!-tX;r&{GUeXPc}o!Nk!8NRAK@qh{xN_Sc3xzFG-vdzE)GXKkn%bml;Pj{0zu4tEYRz­bWVs5xF$_nU_MK(`OCj(5 z&CkRm=y?$U3LV{%>xmc2$V(0KavW6q1FEDR)BU5BxRK97KswH1GdXvAl(31esD^tT1a7S^&bznDaDMsA-jyFupwW)|dF!SqBI3NqxP zOQSfsFouR&Rn9A#X!u{6lpb)AwqFB_kcE>ha8i;2={LNiF0+3Pnld`sP1%jHM$Y<# zm9ZGpkm&`FG`7%3I^3ydpyDA-Rmd_exoabJL)sA9G{yf9R+Xj#Xm!m(>tAjk{4O9r zPNN@x_<}!p}^j!O@WmqJuYbJfiL6Spb ze`jV?_;;*IcG~8i_G~Cq@z}vo=nrcc!^jjR6j5X8wwyUBQlWH-8ik1@lfmm4Z;lU3 zcD5%}LLQ08`fyC_+VZHuaE|AN$@&%-v^tq26zRup3d+aPh4#dgnMLFUcG8`=k9@lD zil+P#9@xd34axel%^1m?>E+xSK;h}f{a*@P3<(LbXhkbqh~`4mVlh89ylO;7=`ZY+ ztCh;*_m!J!k<4|YrQHO*4+)A=bVL#d3VQQ{Z2dAEKx)DQunu-&EO+x%m+jZWpG41W zb1x%u9(ewVHj=CaJv~6R-N>8wrZRm`ZYaoR_gkzkB*bLnNu`q*s*i*i=qs{&t}g-w z9;{0+c=CH2!uYiW3OEm#tQwzl1uMGpSg1qLx;ok&NrBu9LQh=Tla6TmdTS~?;rB?& zfvvQz5J<+T|MGLO!UsO-MT5Q9oBy=~ZybO~H{4raOX<$=1EA;*k>hEPSl@nj+tWhM z(A&xi^c*~lg%37u^2-#y#w=0)sI=eczZu_aCI}R{r@yO$Mis}*hxPLKJjQ1k5#)Bu zDp&#p=hv@2i)ez;F-&c|P2!Pc*QIp4}PtmeP@r9``27PJtx;4d2yQ;@0<$5%b3+!nS$ zO=$8so9#nE%|ggOVS0~+>hI;E?-yrAX5hk^gi`Ov8EbZ@{?$0RXNqVqZSVnS(|+Uo z@Sj1#aI;#Y@bV6QOk+R}o%Y#$bon*dCDOW?@?3RSsr&WK)^mT|K)C5?!a@AMeJ-FD zD0B`RrZoY;?f)5L0jQ3mT>TTx+p$pJxEDg)H<8yxqU|BweDd5U`EQM`y73o>61(4C zrKtzLbvDZ9+w`6t70M@Vrxv#xbRX!QaQ)SZ?v0gkOn0PBSH5q;Lu-SN_w)S%hzhq(k_2DZ&@7yVQQ_+9*&cEN{x ziF0~Iti9^WHZx&5zQtv%80un5PM=qcc^eW)D&0tbEg0!lj!K5Lp0EB7#pVcj87F8R zJmyvkkYWb_oaUc6td>tUi6d3!dMpK4OA>fjxy%QiSkbZhi25tNho`YcNeSFodsB+N zYuQO9tfg#Cx|Rj?<^55XEN#gf#pBpd*3#3lfbc;rIQ%%vxJJA*nOzH36^bxUj&>Dd zG-4~~1f2bJB>F#CFC@$wW0LL^|0Kubz}RD;t%=?qP(M?-u7^v8Ip`|$3;Vxluf49BngH2qX~g?{>Sf+(r;T&q7*U#?anpVs}(5jvn0W_eoR z4zAAdGursg9DvrEy7X*47)zPuG@(oWQ9|USX)ug&8=QB6X)Gt6 z$bQf@8W&ozcrxa1GHhwEGw*#Ikt0fi9~tRS!|QFW_!8Q;I59Xse5HE5f1;E4ll1^b zpM=zfo{vuCp&1}N03H92Mq0$X2UabEZr;Sd?Yt}gvxCgOCn=*zB&h-`d3ECfirriH z?y2^_IS13HX6oo~&)Wu{yZt-rHr;#70tiwxbOZQdT)vqqV>`l^4NW0$sLoi7p=atG zfaWM3SFpmN(LMPc%M7xhO}9NZXxxWpiWeF)Sh&r><%|E44T79dKA52c;Hq!v%v5-? zx@=RQ(>Uax+18kOzcCw=)goa}aq=l)Lus3lEhSX(X!sgmYqygUP+ZUEzcsOtnrC0| zz)$%&<461whrKV)QkMImeStD``R?|?!OQ77vzaN|G=CcibOS|wdVWo1YlAN%UWN+i z3X-B%1qPox=GqZTnWvy$ei_L=-j|3=huIataLHyPB1*^_6iY=)!IH@ZHWwwUlQ(3* zdFW=jwu##MoFIEH<-oj{N&fb!CBt}tM}=TJMfTL~QWRi`N6h9iK&ET3x+Y>p_3&sT zG_~Pr0zUEreB>0yd;Nncwl(Wo@L(C5mANl4!xIm$>`^0K)+fEN{Jp89^o_^^M;fX_ zlvx#>yy5;H)GvGbOPm2U#V131TjM!CZK*mFozdn z0XcU*Bca(pjH83QYV7bB&xEBh90F=w_B2>u#ec7hr^Cyl=XejeP$>nYW`9D^>2Jp4E<%pbX)}jsXW2utY)KC7D~!I zTc?d$I74(G-|nq959ibVj~^SvVA0?Mqjc5B9VKNZ^6QCLXg$|G!zIHeXl10+u)bYi z@(Zo*t<@ZE?{QfPc;Ob18PUbfmtXTzDfe-=vsk!CV;daMGiVZ+BRWQ6dyc;Uae=7q zsrm`*XzAYfB$>Z(JrK1{>_dn%sor7tJVv%d$wTD|4!3YLs{@eek|zm6n2gkm#cp8T zjWqWJf}$onaNhxHl_j zOAAn+6Pm%64;Ym(v)Q3Mf9Jr;b#&sXTX@)`6@T^@vr`yh7fcMkv4OYppqP)qFgJjg1;fGK4b@ydG7e1 zi|(8rb`A3`%#`d*qD>SH5QTcu+WK;Wnz}1NF2fy4HkD`x$gr|;m&sVLC7|Krpp*I* z1rl!Wp6Y6Zb-D%=_eYf6q;R=`=(=eFX?<|gt*#!pXQ2D11#bVzXqFLNi7x*yWVwS%5J1J%d?yJ>`|$%)>+@za~Gc z6V>G0ncr+jChazPgO7P+(@*pU@NHGd9@f=Na0Hh3c5&MVF7Unq z(W&MXSZ*Dn*$fF_mn8%bmnObnz<=x1Oamg?XjC%l@y0=)PpwI>E8kAIQ~Tu3seL9w zxlYU?B&I@|L>+m&`JuIubMcg4lZ4stdgP@|r3`>$i^G*Al!fyZ8aQ%@iOe zC~Om3byjYa?R?uaym?)bX_0HdQ*ZyBt^f=20-R1S9uNyYT`DBYfTR)Dg0pNUVWp#%~IL!)6zWn z9NwO|ju7M53K&NWLbMghNgQBySH_0u2qJ!W@ur6$$9mlP+jZ zoLE0S|GtM>AfcvVbi}+9TCf8a5_ZWfX|T$)P}r7#0SVcr%sMNa z!qs!ClB-irYxWk#eT|ynI@c)+R+G0f#QPx$6V+z(Shni&!i@FTY@d6J}~0B5?q3WSS;E_?0{hgRz6`P4_q*P`S#8ZS3uzvs2gdKG(C2^E}!7_||9 z=_M(>Y(roPI)J4?bS|N{POFxqz)@AF^x)0x#^--zEfEb=BM=Gx-v#X={VrC_rtA^g$w5B}_k^~Do!KvxGC@Z(_< zu43-tam}0SKBB!CdG#C%(_rE7t_ykwIcg*NqR5MI9*u45!L=T#VTZ&IHv{b~EY2j_ zfKMBfNH-6><8F$Tc*7jBE0^TkB<7X4B9 zZiKGX6kK^zJ6+vXv|DCYuvzt`n+dGevrW*hhWf&2l6?c6P~Ds4SIza5C<0A&%{JbP z{@;YOT<3N~?`;K%*ow!D`60HGAxHf$v54jccHt0!CBME94;!qxK4*&AM`Nstn-wjC(V9G09&3j)oG<@)_ z7pp%lpxXV^J-*2&30kC8BT`kzCDR$A-&(=a&ZpRzFp*dSr@R^f-j)EVEhG7tuYn#q zaks*ZB;4$Vz16C^@0QYZbk+z5gc%KxGR>l{zu$;cVXsgS`m_)zuc{j;Pjh$0rv`m? z{$Rd7oxa5G3Zr3|>JUXVtead_$nX)XnnZ>yNsYuD6oCwJqBNK674~FmKeHFEjgzqE ztYarEUqU$9YtNIZZd`yI+fQ{WMi%+S7k^vpwcU@?4lA^ixfbO?=U1-eY0(ISU8WFO zIS6Z-a@644lo|(t#%H?w2^OlTM@E5PpMchMeXclWH&T|<1!8qzJel;bbt9)B3F(VR zn6EI5K@wEZ0GWl3F0)d;b^As-*%`In=b?!5uMw!L2?JsR#sELQZPXUo*gk^?j0vo> zF*Ny@HsX2U)uiRNJLrJ5v8=X!~Kg7`MB)Glj~dy4nw};@BcRU-G{iDhMF#ARosA?hOp~D<~%r`w5iIh`BZ;M z{zM>;-#Bqo(l9XXJpVnl!ewN;0*@FzIY=76nr|ml;%z4tLS5Q}f$jye+)ic12NiOs zCUKzNOC}LW$|yBuw`eP@iO2rBY}MrSAlD%hh$uV#ZWh*JgPEIOwwLv+@p_JyR5Lp@ z`TW=&clP?SE-h`N1!y5VJOk|86m6?-@@aW+l90SrOc zQCN|_;6O&@LDtDJ@tH>j%`$dK3ySi+x=xstfvFWF+8E$##Qju`K;Sb6{jIC`F$F1K z?5d>6E*r~mSJ7rs8RlPs%xijgz28>KPT~4r2HB-Z&KC!4C=&go$^pVR)aVL2Ob2;$ z_iKqsVe7dUstAR+OH`@ZBiTYd^0>Hv5u#85&VdVGFSX#d;*YQL1u4nex*m+TsWzr@ z^#yI2nEy~jN;%u>eJW_azRIin5e1#;N+Vf+kTi6r4XUhc?6v#R5V{}L5T1N|usc^Q z6#kikh%m{ZaKYziO;n;aVm6Qo88c8L25)DCfr)gz6r zA=B%=f^9y@dAI-O{evCAh@*e94>*0p6&ClyW_ z62pGlb3CmJv;$6nlB7z<^zm|R{DYcPuO$hy^7~UGphlNkj;2v2r#Vhb^|G$F&?dDG zB#2(8s?%)Y8>AR8GfQJZcP2?^;kJk1qqUW1vnsmX7f@BJK0{lwl~5d|kW8}S0*{vA zht5}$(pQ8GT);ZbEp{mht5N| zubpZqG)&Uqprn9ObWiv^{I?A@)?At(foRLtOFc>fwSBx1DUy7Nd>{_x_CTdD-^T}*iOUlBfJV~_@VVFopYFR^FsWYTx zyut)y&X=)3%?ZeXfZwlO`eq6rz4 z`4}8jH_#S?)TQbXWnRl|EXO;auRU!PRw%Dvv}p>7!9YS6#VXHM!rYhWe3}FC-7{~f zNTNNAW{5vKj?Bz6vJtn?#1S@$U>VPiqyhZmytgepkmx4tL8L&t$K-6gte zh_cN;5)Qm*ydpEfg9c(%RdQa_7tMC)wh?ca{B63t_I`6=Y6wVm%f@+ZhAi(emZRC92{cO(jaZ zK8+<&y6mi@GwY}5j+j!opbEz)E>N!!VPQdBhJs>|z1O4KNZRzK(wu%Gqbu1AW?x)u zCWBjf1*y6f;$KLOa{q64IPv5fyWH%PLY%NPq4b<`TV`F=HYpo0Uw%V2iC0j1}rP}08 z`|M7psgY=x;qt=m;*nGsWy>o8{_EVbZ{V2Ph)Z-A=jH=`x)pV5Tkdqoqpin}Ndo;L zNTHxgW7y$lYC^HBYG2U?(64~V296;MD|WEthAo;h$N{48R|DxF>e0$6j|ni;@N>#5 zZe5e_IEN-h!=3}Ke3T#@{6%DD^U`(Geku>*MV*yUQiZM!a5?C$rHg~6*Vff(FXm|4 zMMe1F*Tn^lrUjmBDBijlD;Lgx_8CU|dJpEdXotD80MZkjRA}i;FvCq?wz8&*xCwLE z8-xfC>I9ud=3(G$ptWpKf`g|AW0EPA=uN|!x^?!AkxfX zi5ChS4`_EG5)V+EGUm~e1R*SHL25P)t;qQjI;9cMqG1!H62u^G6fp+HQn7J^O;6MW z*RH)U_E}hIQaDvK5JQL9W{u+U2kDyd;`z%{<8~aEO{8*)*v~j?MhJFoV_ncRJ*wt( z0h3TQtIk9tfukxB(vFFkiuN=3GJz#igN6M{vKM_vWrb*{u;s8{FWx8;48&gGcAm)y z{@j*N%Epy^(!t;^R2MDISX+?X!D{{qn*1w0ngu)PQ(L=I%-q)moF|tF6MCvJb?Z}v zuQ4Y5V<jL)-K~)cAq}%0GEMb9?A*HiN zK0k$@$(aK zZ0i@1j_rh9(rzA3-=7dEp0RmY?pHj+^}%wp8ZwN}N5X^#f$z+y1ji9FOJa-Hl6%b% zXz@dxqIMw`%o9AQH_MiQ2*RI*`UC}ebY3z0yIb_)aE9&Ruz^6yH1n_ zn@3MyU(kpUTeixl!G$!%x0r)&QCodFRH4%ey^Lt)PqROsJqk&KxPCq2wus$CO%Ggz zbdMMRR;F}rUTuaZmsFm;1DBzMxRlTx3LtEt`IyG&Q~_G4L|1MGIxv;utLiZS9kYM- z{t(6?qW4vga~NI<{jdg#M`Y=9zg}m1{E-?;<;77k@Q!NBx+1>C_%(4t4~4rtuAk2A zW&-Hhmr26k2yq}Ahuak(d8W%V?;(sB#SFi7Q-uy(U?Jne*Fg{egh!UA&|Lb@ycd}{ zm!vk~rJ(p~e-ngx2v1rD{i5D`0~_(JIAkcb%_eXtZD<`#AUsbcMNXx`C?{f55aJv2 z?Q9hpD*aR$sFgl2F;3Ej*b1siW~Y?q`_iKwn^ZCMov@Fqx)DC&KZI75Cgh#c%?@o`uI-3xxJt3$D41cO%_W{r-Q5~ zm$>=Qu{iJbzF@7uo;ZgszAc(_zmaYvrZ2zk1n(f2C_Exa4Z%Jk^i9+9{4OaZBE`3w zx7!MoKe2bNZ15zsKs)?4>9wGt$Vn;?+`=76AaxF8>lo;ddZ_Z!P%OX(ibhRnU*4PeA5x zroKus|A4)&JLO^eQ(gW2c7=W(L!RW~Uk<_x)WfF-?zGR%-^iJ86>rqqj{TT@;Ws?O zeyz#ARWh$=7BY~cK2y5dgx=&58QazWDoD}Od&baNROJsxLerNOM~x!-;ryJpwuYfI zq-kPDxepR;&u*A$WpT6O%dqq37h}VgF8mZ&(sU$FF4Pc9#RJ}X5^jnZ z(FKEH#lX!lETue3V#ZrbxsQCmVIaCh0l1#PFlEzi3~_H86@Od;A(jE^Fk zRRvcG)RYjT$>D)3qI!ve9dZ)OUXi}lmwD@9J zewUhl-QQMz#RGED{m44{s>9?-4)P3!HunXxN=_i67Aujzl6tn`tW0{&?D{&Nd0jA39-=@BIm7`g02 z?Tp@0!3@rt{@o`wu^Fy`$b_icxQL95$UnbRL_4ORUT7eWMLLo)G^X06M!|_K zDrv_6W(+Pk_8wZ%#iv5NJ_k&fR|5!0)0IZR+Q-c4qp0zfhutV9nP-AVMZ|Cyou_IM z4a32fq}uvuFWW*+D6O9?yuyeT;=!KZ!x|z$)NKJ_#u`PtluJqyYL9BE^c@M zJ?CM9zMr4o0vIIN_X$ecYk%SjTGC`47uP$6BU!;5P;Qzo#Wg?{0^LXpr8EiWF$dwd zV=O(@SiC!CByn@nUC~!-()g$Q4);CKa&9)eyQU0}C*NYpkdzIZqEd$U?7E6xgma(= zn==Mm^dzM!fqBnUY|%o%Izz%nR0HbScSh#opeVU94GaE3QN*wT514HN7 zfEmF}DGd9#^DY7{HmeMI>ofSb8fliQ!?P!i_LN1MP?RDopZhmFcu3}-)2FEdWxIze z==^r_6tNU(et>UaBO!?hGe36#2%tDw-z=xs>&8Tfz?4*0JMou^2k9l{pEjj9*q(e0 z0Sfs{6!7DcSJ2jYE(y?^;}Dt~=-uR`=hERBGCD+ggSI^szeeqxFlNn0fRW<5N|+$S zSRExt@F0YELEJxLw9Z4m$9VH@08dM7e z6#mkf>txCVzBP9u17;1MNxcf-lO;n9b`^j&KW%!!neM+-PfqOL9|sgN12Dw<bu(#RZ-v}2(Nr@AO`fwYzJ|zbFgBPF-c-%lm!n$8 za`#^nZSS;q|6guJ-ZB zP=g_7`HC#(>{tct98icG6>P1BlHU&V>k=d|DEAg3{H1*`*aunGf4dWQ`Fe!T#btR( zYG+pg>`@$SU|y|SdD{eqCiI$MD&imU$0#`o=xB7NMT!Ohasu9? zFI5`l;H6(@oY4r5?A1>QQLkzIary2f7J}Etr2E+u-=1!J6DR~>mT+b_P0hv}{KX$= zm1hdH5X?G4?>r2jw1vPxhU56jZ_9U53|G#Aux0S?ayL)WXODBz$G zum>`hKPw_W?~*kMl1hYGo-F&lejSJq6@f}ANvV64>4LMFK@n?v>o&-;55X2KhcQGT zYB>l^rupuB-ido>rm9C73%n?@%AX2YQV6beo;emSdPXGup@sNbqS=V49` z$~2!n%*@RBPDM^ihsKOd2P&d$xY+=lNa?fr>IP+L<>nn1B7$vs*VUgHf$k@apWXA2 zewoCM81q35{@zDQU3`_CSP^!Gopfsx-wV2njAVIGp^lO=>7V@07m3bYo=>}gyH1)g z>~&mKKdIM{7uN^yEl@(!PcxrMkC?`H<9PDz=MpOs!`^9yZ^(tD2krZ6C8DjpM zAS{&Q#`5I*N!lk2NN7c0ev)1}!>j&{cDYF>7f-s?H;x_pO9w%HaPF<~^XjWi8CnPR zmGcitWmHzlnveN7l78bxH}ql+CI7@vl43i&o)Z!islmUN1&e*EF%lUa3x9L0mpa>w z9^rFzEEL<>WhB!r)OKV?CmA#U#&_XoNpK0L#w;2xkZ`5Cp{43<^Us(GixIeLM_$#F zQ+2p~eA`Mrv7n2`BJa#S0TJ?!Y`2j%u^Xy|G8+u$e)zNt>gT%wBLpEi%V4lMl9TEZ z?O=%&tWyep2_jJze7!m{NuSTj~J+P$Q?c0LVY$ z#MG<2T(~P1vp(nJ5`%CGp)Erl_BOG$RXD(sme}n_D`NK$?^51P_Dz~^Zk4~D-WS%& z0n6p^{^xg}i5e##AV@Q)q2s3IGfs|4CLP((Tz~5~hk)`J&f&pI9%e3 z-4$3v=KM?(nsol3KBPaEQ3sLq3rgjH{&H!;qKn~H+ph^K;5x^2vrV1#sra3O<+X7i z{}sRUf$o*Xv8u{gdQ2%V?U3fF>Uvdk9%S7AN76M$$N7ETiIc`j<21Hy+iBDoZEQ8R z8r!z5##Uq7b|#)U@BIGnm-#Sj)~x5==U|_G@7X5??ODbHLadX9TKHCG1#~RZ2o;r* z7sYB<#;4OI(R17!QDMD0RjV>&fIV_gju;=2BrJK&n?N6!K!^pA3l3k|G`CJi7SfrY z`Gr~>)a$NLbLbT0ZP<|q>JL7?!9_TATfPigQAJ;{qUoE$TFc?`&|1h2^(wJo$SYzq zTHn5sa*-Zpl1A$la3|CQ){0BrqDCqeRcPTZp#){ElUtgGRaj}~mk$>HMa#gwz!=i2 ze)FBAggPc&F}DICGh*5(FMQgGeglO&jcuUKVRcfGqbY6LQ3X23aY&MR%w9F`RtBTL zq<=9Pk)yGFsnaJ#vTlJE#i1~iiu-neNamU>ZsI%4dla_Q_GZ z`R(T8(|V#xu_2-N-dEjwElwuF)}<>c1A4fVGbtg;C(6%K0{uE&qa(5BYy?Q`dm6f( z^_Tr7)tTAL?ZbE}#qotI&GK*pX6Fj2T=9ki~tT=-3v>&g($ zB+_a_9-Miy!c5e{o@Jg<9Is@QL_^A6Whesm{C9jQdcs&r^x{+10_qG^7U?k&0|IG^ zd`j$`^K@P;%Z$P=p`|~m)WS)B5%mdi8Iaxio5JxJ!2MUP?90Tq_Ou+W4Rd1`4Q)Sv zs9Jo1-p&QniJ=|E(AD_JxLh&Qk0%3g#kKRorZB)(Aly#DfxF$R+CVQ4jwT#YNe6H) z4v2F0A~52jKXaX!Z5iEz(g|T@hZeY8LwhhVn|a{4-Y%_3!#K4e{>#TZuU-BfWqX?F zZxtEsEzlQ&b~;||2-e3>(m<&sUGFayS$?5hVf-K-s#xYfcNx_(g9HPsn_j~}ogep* z>;uF#*!T~;?bSGa$TBa5gf>+LCWsH?@x7E~nH8GCxMBsLFZ_M*-v2(%;3KY_Vd&^I zO~q8?#^w?r4G!tBSs7leCzYhEKu^IGzjbdS)*CK` z{TEE=ZIWlMHC}&DG@@A)+MjJm)qSdw!J^4ogaq-h1n*At!&`eBO)D)5!6#8kZLNd3 zItpXpU;yD^zM@GFVYuQ&O=cmcT4lJt#0qbSj`+%8H5D#Ie4Hoqy(Wdv`{(RJo!Mrz z&SnFjTVnuR08fW)M$K|gujJp?#VjOYj{a_QC}t>}bN@gOjlqB0ZD^vXy@ibS$Othw zF0phrhh47GLO%=^x)@HDur3KLk!`N5k4)xm7T)_pOoZ!D=qFotkSM(T6D?`GMkO{I z;mB5F^D>2`4Eh;^VW>%YG2x9`oX1invjK_f&Cc z7Og`ZO^tt%r3WVQ?Ilj+J}{hPxNUhYN&bmVkysq~=IHhxMb{eU)u$^yjnkj#=IK3q zc0FBjvU}QZz}yS9t(1Ph?x76tCVqE!0Ve(bBT9#-5H>Qgf1UIGL8P9vzLXS^zd{NpWm*sGw%0|3GKP#Je_1m<(=CuXxo84krZ5#9{Ga z{Oq@AB{|G*)ErXlo4~`d!Pfd*8 zVW8-N<|8c=xvjSYi$xfR%RI9i(FcCW&{_D_r4v7zlS}ZamgEuHg`|ZQ6ZuhH!-%~t zO#M(}3_uiCMWOO6$Y90e+l6g2H+dI1vIk2hs#sz6~i1IcO+GlVw--rmZ<4b2G+Dt>S?h?L}7@4xLetG zUoI91(hn%7EZG!`P)zZbX`S_*kaljv%ELV4?}?Z$Ss`UG9{35evncn|pL|q6KVz;( z;pa-BC=cf{{l-Evl||C4Z(lOO ztF6Lf=IT7*Y_DbccMUMOJwDC;Pve6jX}*dzdH*e}1175!g#xq-Q$i&;85mD+8%I9E z?oJK=Nji5`DU>dORGvp7Gmg1UFYpMlz^LuP#~@uW)+yVjuUk=yb6>So8K9>7Y2rr( zzJG5cMw}C(1NDke*EVStLi#krZZCWwT18}sh_;6k$9^9){PyZ$v2?0QdJ=G;ol&U! zM?7}7;SP&wnfDf4Xf*Ra4iC}^@L5|8w*5OwnC_^L`wG`fvICpJ z`8nLI9&7|ifDmb^3#;@HC@({JilRTN2d6Pp3p-Jx=K;+T1HSBhPOM{k@MZ?=`$sqZc4I|@%L+|z`f~F(g;@LD zHC%=lY6bbNUx1hWm<%rw?XKCRp3bV}6Mg-}wa;8Mn2WTJDpHRYRg6JotTJE-oH{be z2#+a6R8|JZ@vX4ViOU4SS*b$VQ7hUVxr5z_X#I>q0{SFeN=!tpZnKfxeqa5jiEBx| zFhq8_P=GrlgZ3QI-TuGnXZv&a_C?}9-F)*9qK}48k9Odya_Odq`>RW7m9E>%hdsT1 zM#fz-IC2hhCNT479ncR5yL@{f{3g_w~oh=k#&qrh6BOw2Gz#7so115!$`#hy~_U?L_J(j^!T{N;&Lm zQXt=H2|>b(9|%ZJ_E-TedMK!VC)yORfBqqp3A9cGZ>e@0ZKcrOJj!Nd`yzQLkq0u7 z{2)F2#wF|K+yt{e1K$9B_b5hGRKlfZX`qo-MXs42T*+I3rMNEufwg*%k?z0@T{9S% zbY|RwcJhZrTw3)quBsS7{4@WCE85a32|=t16M$L=b!e1ec~N@m775jkb}YpH1;Q7b z(~_4`ERz@B_sbr$Ok1>Bpk7(+110=$p!-_N6Ab_UZ;E3`w?asSU8iEifbL$U1-Sr^ zo{3{q>UVA4IO)dqL^#qvoXUDG!Hv9oRz%}IGF#I6;<1uiKXr>h^KwEsh_ul(_4BpS z@Xm${UHsqF(E9JY7>rN?-6c!pA3hH|=kMmYLmlQc|3=Ef@_8442cq)Rfb0F9 z{bKpnhs|Z4=S;>Kv^+ABbVF_8q+S_l$cObB>w9)^znNc@SJJoU)EQbkIA-d%w7bN; z=en(tAGx{cAGxe^!pQKG-`;^MViA98elGNk$OVjZW!Uj_eI@QmJu|rLHoSfZFdx_8H8?$l40YhYHwXDVy--9XO|vJ4Qn|(g-Rdc34#;Xq{e?v_;_T23 zD?Q^2XlLT^nK58ibft8R*7xd9Q^spyRk$sekk@NAXefB({eXEg^ki}C*xYwq_q?-8 zk#red*~rvioTvN`|1y$`=cAg^kE%Qcjh~U4J~(hnkLp-xxnwFVRE(4DfXd(vh%+>8 z19e>EbJd^@f|=t(FSjuidAgftDJwP5NrmPUu=rC6#P>IfMvRVyRLVf>KQKtZFcyiH zdcJCuGf}mciu9QLi9BIaB5;R^;uvgMP`RI~@9SOxdeE_C&+#9E=VbSDFg!5V#+Un- zqKYzC5^-Oi&}--?DoMo_vC?W<*r8?dCM#`Wf@>xOzWf#qJCxCuy0b!h*;W-K^&sHE zagQ)tO;&O1g}H}Qig5jQe&>uA$SK2DI8{yN^9DtiY0pB^^oGP-D zOT(LqbAHDq;8!vvG5agQCUu^T^FrrKkCd*&O5tHw+8Q3di}H%rQl68tU6F^&Oe9(8 zDu<>wSB+ti|MA(~I4K#l4%0HgS8`DB6gv_`sNWH2NBZR-Qyj-9*zZIA!2YGEWq*6ZlVu%sO*zh@#N*>^-B_DUq) zQy%e?>h#ZbyVL+Jx6k<~$(9G#lelj2^*Ufhj$!U$c zToiOypRlie@pl|Buo71tC_GJZBAFL&gZCPZf9^BIndHWN(89EzMCErL+{j|dgh84! zHzrTux1q#&b~A)`;zb z=udI#1q`22`z1CCFoN+}QVc~L!pCY9)P)7T%y}~9a|;#0ol}E5*JS@0=z*dPY>2e2 zisvdw)C>&CLbWLdL(n8KdD$LmBhB6)LSEjndbKiS(>;D!q_}HIO0oxhl39q%VR829 zS|w)jL8JwseTt4@f`_q|{%K~lA9r%;|K%q>rTa4*-kvkPIfEiIHq-Ha4dX*5(GQb0 z*NDhoi$Hwc?qK`tn$3Va3PZwU_?`7VP{)LiOX}K)w%tMvQNJyk7eX%PNK_HSY|fh) zo(S9t#a7zaWLqfLq*25t56t8iBiP4f>^IZsU7$)mX;Z~~c$!3ijzdWLh-gQ}rT2~t zlvm)U^dLZCmGtZoH4dtK2cdc9qEg?gJ-8uSH`qx3PpAn67A#zalWsr!Y4LWb zXkXoEod}j_NB9KYAF2WM+MwUgTIe5P(i+x>h%tbB`6a7hE}i!T{|Og+}-HN`%-ycnXBAyK|HQ{)T9 z4SYQ;maY9Co$jlz-UyNyWBpw0xXC9bMmSa;OHsZw_6AT`H19?fNzAn4g6NnKyrhRw zQB_mMHIiXIgI{Pnpr{#$GKnx(!Q|&En`b;0xtQ_kaQ=GDtnI~Hel(mW1XuWA3DxnzaMZB?~AyDdP|bMR_k z6D4%f^t_dZ84=KcEfIxwgmkbc&M9oG1yXPcJAOnak?rUH5e5@~T{vNA;xBrC=>M7@5G_n*uYRUA+@RS?0egvX;Fg;4PBlB+8l<7a$ChkWJF+-?Tv%e-h`u? zu>ldtJ~m^ybV*<(-AfTiV6MmJGJaghxy%3 zcH_Y0d6e;A2~W@U2n>8HB^wGlF_20dHc?G&Uvg$lFz0*hzH6X``sW%0CT=yF;|&=E z#b(}fTHB0xas!x%9gC-llvDtQbO$39kVEnC|8X5C@HX%_FaVz_Vg_7PHycKco(Y(t zsSaONkqkygG6V6PgM|{5wEuXr6392XO1NO7lQoyXA1c^*2fJjJmxXq3{+%!wH2d_v z<4FnYBy8lqRWORrdiy6ApXy9m0+zAT-77yB(kXJyVOjkGOPnNUwI_}>O~)pQK50A& zskDr~b=hn0-;+mV1RJk`g&X~)(Mo~TQ&o^F))Hf`;ntqSf#0Ea3hN0afM8TXaI4tD zEpF%2G)|xvKC_8Yf=%)hc00f3l9PDejJn0jUnXNMlUjE9}_@gW!5Uc}asQb)P6|BVdc5JaC+hKcURe zdVeDmEQKlpOghN;iZ$}~NkqPiMZWqwW`!BV zyO8bHurQ*_>xXG;eQj>E*iS~c;dls+cfgBw`~)9Rs)2)Y-?sX}p$XZccFE`^#%}{@ zt!2iF-xGZU1h6Py$t@&T3T_Yn0Ew4b;^7ZY_tt}{~qyR~2>u>q?6 z_E_IxKJ5(dgnwQ?7jL~PH!WIDR-2z0Y&=AM>(r$7hTyKD625^jy?c_f#RM+hoBV#K zhc*##tkEI5azWm-=Z&@P21G+}cOBrR40_L`r|!xl?c8=ayz2k1QKD$$YI0RA9FZ&> zaS4NADj;%@q&OV=VxJj9rPaqAuS&vqbf6xlQ*_beZqvSdBLs1IE@b&^XBt{2Co6qk z25!UpujJUEq3&usb|k4zCfDtJy3HBq;C%oeK8`4OI#2J*cxWd9T_nSfw$;Yug59^7 zeGps%#tgDIH~*4LP#0Ygvbo|tt=k#O0t?n$g!xO)Tj6Y9nDiQ6chg^RS7p5?YEWE1 z-hRwZq-9gz>;Cz{o%p@yyuZ%%2mbN3MR79HKRJ+%(iw7WfBGS!R6^0s405c~Iu&*3 zgYiGZ3`7@8RU1t0mWS-6A~MAD;Nj~-5Bb+ImP`Ahutw4zgtMxF;h)LZwpg>=dc~b9 z)OZ95K^6!}QXGp*5eo}u8BHx!6B5-ZV=^gVJ7zPr%;O1?@5mdSIf2dD>qa?NH zZjrS@f8(yoHIVT@7vnhb6$uL)!Kdx$Y5vTm)aZU6P^^^QT{etdL;-X04;a(YjqM{K z+r`8#y6@c|)m@PZiXfc*j4>&gO7P4Qzrp)g$?C`%bca^*tN%2>QK%e0mum$Sv^p?S zr%^f1m$%P0L!w%bvL+(@57ssp-`K8Uk-ItazE7^M2&6(u zYz`Wc81=S66fgIbo&=-|-efP1FVM8xP}j3bt%bhxrcjAWUX%CFhcdhza>NjqDL>7V zm#9sKNMF(@5>(@&A>I%m(a~DH%aS{MSqXE404kw7oIUrH7q%O|cCo?xLQavb!`PEW zZ<`65sI?;TXCYeS7)X1@5;YfD)Pc~`VtnYb`-EFl3Amv?%zBl!oNX-1L_g4&vg=d6 ztjgkmNxk)UJoNTpqAM~EbBD38s{ww)4Y4f^YHL*xm!0r&>tNz`{fo=?+UK-B_yv^a zkp-oUCU_E9e~11R)5J`jD|tsJ^~Bml#vR|ZLw_$MO_$IF`>icE%~#nh%v+S)WbaGY zeQp43(Hm+n2sxAaB9-SvFjz)%l@yko6|!D~%-ZTbxl46J9~p0(Q-&5@Ok+E7p;F9> z^kk#rT$qakaLl;+el3|X{=5F1Aju2U9fI-9ZuP->QL zYg`QbXjHAA)uHVTQJS_{TjsQ`Dcb3-sKlImj6l&AI1}y0e0^NMg_yRh{q%G%pf|-4 z5zqVE;&^FSL!XtB6L)hI^J1^OrgfK(6IozRCIu=d|Ms5(A&=JJd-);xbQ>LTdyu=_ z>!MvsI2I@vN<4PjnamRqBA7w_32yF>U-}L6eF!UdOd?kO+iwGp(xM~_#)FT(W z*JR@+ZI-C{Li+Y0qSmmErD;Fq1|c$bMPMbQ^{6o^y`4ctVmZ2P`DUYa~;nbPH z#O?S1KMN%zimxMZDjs&`e@`s~>9~e99+G59m@;}v8k}&bZ$75TN|<1_x>0#qXMMAB zFZ45b-fqvz09gl;AscM<5MdZBwL9Nz(Olqbq{q%s=?${C=^b~3$MOHZ+k7vr5-uJ= zhBvgOVS`}`L?};80UC&(f6*CYVYnGgXxR8H@cq;-`PXA>n-l{Z+UoY5kYTUet$)jz zkDIHbMju>Wp5r6Nu|+HIf5-q?4RYE)O{89Me& z46VtOBk}gDwp~;y2G4*@K^Aye4fZu@98c?w4;Q5#+FWRW?6_Mkpkcb&?@w%k-4`K& zd-HUNH^VjuYRI$~Q8Fm8*4qFd88iQ;oJ06}Lz0}7f2YH9XO7*>tH)CoLNQ_&Vq=;9 znkr0$gvYN5S0BHYs6JLs%2eR%MacZjgrqj#hrmyY`~DQi#^n*~z!?}dpJD_JGW5X( zoKiDnI7vr>TH~r!}dhv8Cxqo4!+g31)`XFHbzH-_g*jgU}2KY7O+53Zg+Ey)gdh2 z(LST@+6rE9H@qNk`u$N3j*cS(<@W$IUs@FW`R-^R;3qlD@U3!xDQy={T9Mh8Kpbm{QJ+uS)l!78)k48hOp^Y~k39XdF=QZuC9 zB5u$|WD;tg4LP|jGupeSRjKh;-OQi>R(V%pq;~i$bP0>>3pT#DCw#-L_f!H!C80ld zTR)hV(f@14iCDq_?J-OHq%M&==G}OL$IN%rqfyw?QeVAxsq4FwnL6q}KPOJZ6kdWQ zBve&g;CwQDzlda2W4DXxTE&H&)o%VWu0|>=+m{52V$P) zpJ`61R84pj>FwtaeB1$jnot=_IdAi0>w6O_u0j8V=O1pe+k~H0df-)(jlL3Q$UYBN zWDV}P>*c(G<3QH0k2&!2I$7#(Ypg@LJEv}^daPeqPLGI#aa1#!yjzaf&bBXz7M?^m zj-FH$nO&ddnHx)9GY*TV+Z+x3?B3ac=5V=dKHp zvhFX!dA8t_fTX3>{(S0Pl%gs~rTdUd`U^EOM5t~76LaUY z>N#q9w*uvlQ)Gd}Q8W}8EJv#6s|N1H1*wu|KwqS)!cxj%i{cR{ypzmN+UA9*|K=8R zkMb%%>{4t#wIfoCe8(x!k=)Pl!;Iu&Rp1J=B~S@TyB-~$V%k9F0~qBcK7*9w4*^H- z_7Z!4Xr?Jhtw5ZYZ<8IbxoybS2sx%gGSY*)R992T_h_V?`T5vJ;Bz(Rt<15w%{-M{O~4 zwI9m`pU&sGyWKwKaP~B2xJB>#2bo%PrcA`t+k;?R4Z^Vq{hw&ofntrru>{3;#R!uCbp9u-6kOpaibNZ%ncovEu0Rbn%zA{Z@{F zbNg$H{gWDKG2mWg7U%Bq{tGSR=iG`Acb0DM@^w(VxqIDfDL|p3n|_DNPOmOruEvIE z3PcII za3zvQtYZ`s*}MB129@U4Fkn0Np$z_n^W`Vr+dEVFyWdYQ#WZoEsq13KCIC15gc(QTbt=N#tDQ{cd7}?e_6$mJi z&2{26FI6qW0_^#=&Asm8;eGM2JVx6HhGQmRalY*WXA=bJ+wk9-G7px6Xa59%63ipM zeN*el1ISq5yRWlwOR6$Q?5OgJJvO>(dt3v5f*k_cZ1ME@Rf#aSRf2ujr)o{=we!9^ z0Vo6el{xLt6c*c>LQUu7TP6p|AV-m&z0LUEN9j6;E{g7jA5@q$+TQ2Az45FRbX>z< ztPz?%9wNs<>UIYgWPT~x;Wq0iev{AQ`=I-wx@=}dU#Azbg^1#M$X_xrjj`Bsz7)Nr zSD|R&BWG{Fb(CS>YogI;WUx})wuj#cWwi1wxOlFd${iec>l$w`?3&8#@y&KN>rp%< z4t3rI<{Xu(`6<5H2uJ(1rV~+TML(~)g5%>`-WA?$#|QbFnV4Lj9)8g#$ssZ2=2r^) zZ0|!2RPWEA!{mn%pLK@D?`2q)2c;j4k1-LlcBEP<7&JXi6j%4zyRRA;!f(kut1LuX zsu|1;{^3yGhD!w8EfC$z*73I}cW!cbL;ZbSNk@{=yDfL1J-lV0@KOXQ)?oO|BzI5TQa`rFQo9>jY(Y-kF)Pk&?1Lc;_7Zkg&U5 zc>&wTy{>qVx426|za}cObaJ(^Q}e#QMvII~cv{WlP)Vj6PALB6XB*!;l5$+F=`0HY z_=7)?ONPb|Xcs};1URO{kHB-j>t9!cD?b6B(tae25mQh*e`mM>t$VUzs37JTo@0@j2Qi2Mb%4Fk6&jh1?T)NEW!IS3rINyd2T(H zJ34Lyjw+oJFJX+jzla2>_?~@61C~xA{fs+23~6rc(7w&xc{FUlq_Ckgf?RJEDnEcQ zwIDJp*jb3$2kHu#xTOC!4B*E#R&Qj`Z_uN06?jL4t*r_PQ$`&v40$F87#q%Q3I3w+J*v%|--9l_ea3Vn2kx1vf^x_3telwOmB_@<65L8!C`<@m%#b+ne{zX(4OMFl&i>28fBX z7T<{W{^~0@F-_AJCZN`8A>-kIY|mH7wZV}*-VKdI6r_rPuj#cPSG{(5`kksxyPVWw zsTiw)v~Te4hT~1>HSMux$(Vy=`DTUZsD^|Kw;-Je!O?`|%LjSHzdzrKw)WiO;m7-x z+wsTY!b{dxWPK56Z?VNpl?>=>+HTewFt|lF!pB=!Z=eQ5&CdN;|*TWNPD-aewfyz?dpj#K;6N!V+xD%4C$u4{A&Y3&DXJ#Fj1kc5!1-1PN{}$t zj&P=N{4unkW_X@`xPrq$=%Ip0Fv8KPod$NL0zXs|=3wiZH3E(|d5z>h%mec|b#>0@ z{%;ACzghhS5V`exsNDyhy1JXO*CW2y;F&YH)m8qzsWxEl%qv~R;dnDDsuV!@Go%cy zseX!iI_}dHB(PxO!NR>Nq{;kh%I#ax^MC<}h>lF1XlT8sv;G8jHh;G4s?ihka_qhB z5jJe<`HbwdSK2wB{`~$Tw)0q#2|A=&M|4iSL_s4;^DHt&&nr#$JP@J|GlLQ_)6Uu# z8VSbt$uDD;vn$UCpHO|J0{mlWu5Byb%aC%;NY`j-J5pEp2gwl|)g3o>+Jy_077DH% zuS7v~K$Oi8w^ou}aS2h-J5dj#0Ne?Y5CQWA&;X78YXG6{C!dn# zTfrhE?gZ!e6#jQmP6PpG;0aveEto&~16PbFp?g0RNEG9$eQBteqN?6!^3WW>lsoqAJDj3pNWDU}08>dwVCsuCs(XRG z$b!iuPq`D&Y==p3YJ0l>0S(>fOMw*eRuL7T+=%GQ40`RQ@&WWNf$`>jNMvd|rwHT3 zJ?}C_Vb_1r2jZ=5cED0~p zqUO)zfIz%d3nnQ;r$o{%sT|B!`HnFwnP69E^A~0xn~fa8BjJNKy5G1(?3qW#@%*>@ z>{82RnzAMgbw^{XtRXL);Z(HfUKwgh`rz=)YZfbxi7x14JJ5);rNP>^s`n`?O{85J zs&1AtbhxSNQmK~v*|{j(xX^u?QjvZIg{O1$S2sJ|)X{^E_^sw`u43uVP(EYB#7o6< z@&1>7jOY8v5~HJ_2 zNl>SwaqPc{G?6tja6RWlR1(}fWuE6|+Au@Tu6uf2pc2Kr_YLS?l@95l2H)148t?*w z1{mUZe?UJ-M;_aM36w|d8O4oK-|JX$a*1|e?#=*2OMtYywQH$A~t#@+c0xl+n7g4RBPDuP6 zApZt9px|ac!l4qVi~s$2&v0{=ohdg&vi;XXDHI(E$m0pU&HpB!nuM-<3*r9)_QO#N z4_jxxnF2j1ov3!90PN%Md=Sckbq$|LIS9;OkK>~KzmeFS3kNX=qM|V6a~fq$^_JgQ zUZgcNa0K-6hcU#S{iGk_O_CHrf-;AanXF4ifih-=@A%KeRI|jjC1{L?qXDYmSm!V{ zrkWYS%)H1wMx-aE%w`-D!a`~XTl?d|3GlUuSpT$o-!!4x z@l6EQ|GMggo7)z#``${NS@?Eiw{JX1zd8EGQlJtX9Jj(fZ`T%owh+Wt&A-luewTjK z(EXJvv|&FEgox3Q#kZ^JCU-G;j}X>|Z)wLxln+$$dBt1Dah`3Ut@vrO-%-1vm+o79 zMs1{(@;Ulg?_)pX0P+pLY4+Pn(YyQ2=zjy=A+4sj@K@%t*H>rWE}Z%9yxkYv-R9kv z54*>M&|Elw>22*tN4W=J4a?P}Omz+v*iNYweaLOb#^#g(dO~2lfMl~428J&=#*9xk z)c&7PanMKS(WuhRVV2OdRU6^;yf3uKo*7{C+(bOrX?RDvM)Eg^FdCtKA;Q?q1m`2g zC%U&IDYEwD^l8ZboQ1PjallTH0czh4ueHA!gtLUx5UQJc(eMeXUe|`aCv(jkvNFzF?PGtY&#|{5*B~t-^$Cfqsr!92G4QK{{H+*z0uHK4;8V;?NruQZCw4k z_;*8(hlNfHpK00ParFH0l}5X@n?``m{+Z#ruifUHkq-n*2N~9Ou(BJl>-jih8(FCB zC^6eGWV-Iebng5wug#vKni_T&1fX)O)7uN?L3mN?^S>-%Rn3|j{h_#fwnr%ErUfVT zm}RTsH@|V!*|FCyPO9gO1>jIUfb)wsK=0zVXl0dZ`&qNUC+?FGNgVn}R{Vz5m4n|z z_4XH6JfkFZY85yw9J40%?f1!fu?#EI>r9B$`$2N*d>LH;xR<~Wk?Mn>F$YgK4P zp_wG$a~L%safE-@aJ>9IQf*OsT{OFl9wfX+Wb9l z12NF=aaTMXb5Sq_qRGwX-MZ_Z)pZg5QUk}ME4LQz1kb+~aHmxm0KE7Eiguohw6JZQf2P(1!KqUYp>=si-LP5D+SUhQ&@qO}tx@gzB06S$kmP~*GL zn3oc#>ZAH!YC{d#c*=gw2j~5X?GS^|`3m77vez#QIt7Ar5)Lp|j(S6^%|poNpD+Cx z36sW=%bmo|1GI|Pg}?N?W@)jp4u_Kw^EZ~!Ede&MCyO(AFJe}_H{gAec7 z>%H|K>mKUG}8j!%?#&6Q;xQ(_}!RbrJNg8i588 zVJ4OVv9jN+N-%NXa)(#tcDot5SFd7Pn6<@lI&g_Gf9?vhQo8lsQPRSI(<~H#L`YGs z=(mB+$Nk26?3?Jebew1~aauD@P*s>`MbV(=?Qa}wW>Gp6qV|{M%0C9+m>&-9J*e|` z)(W;1G8bWxnU3jd4?x6B%0?E#znVp$wO8PJI&{wfK%O2Ds@^r%n*~E6p}7?Ga0XsL z#_iA5%0HoVR~sg;;o!6)6YK`?$4g|pDAsIM{_`EAY7qDP{pU;S2?#$TaHLgPTGm3R z=kk-T1|cLD2H;=P785BH>t?r|jjqcE3~bi~4f3~0;EiMoEu}~bf<}!KRjCxTwXI$i zU4|j31N$F-gCpH{d~d7K3xbas!-vRVf=b}#5|VXTj+xE-Ij$}!>LD($n}5_K_AgbPJPcZbfA4Cq z8ng3dL!Y%=Xshj#sF>nHrn*~r3D=hyv>;%%V)tVgI7%}dfm z1CRX|5+!S$yU}l0<xWkE&$}<#65G5BbnS8D5Qu+K zt33AaoowqebeD(@x))!<%=PGdbTrgY|L>WP)4mhUcb<#VP-@eLYvPWSl-q56w5ju77s29sj>`n8$A>@g$Eu& z@`a?j5puVF)6neRuEr2xw;$`gXA?6@MREd`%FWl4&(f1?O-^fiR%wj+zLo#?|6Ai5 zTnLPz949V!?n@_dcp~fkPT*392sqHX|NUJem;B-GqOk|5=Wwh~k)Xzd*E3xbnD}2} z9mRU3-gw%&0bK9(U@t_Kd$IRh?93k*VRhyDeq7yFGD(i#$>b&qt`Upyp>x?H$=19| z1N}B&-<#M9)^zF2nRqLI7ITvSHTlx?&UD4aO*jPr-9e_6^hbCy6tsfAM@4w8E;;)l zonrEuOb5tZcr+0XxLt^RZ^ZOl??L+!(|Zw#`k}{ z!h7>8`|hoKgcS6-=B*K2#J*2cd0FKeFzgZ`a zumG&{raYv^3Fw(2>%#r1mSTusu`|Yr>7qY=g{c&IK-p0sUPX1Jr{deYIv`ZT?idXl zk*Tv5&fa^8csacu!J{3RwVOw&c<{`1SM~00;fd3X<39 z`_*W6nd^^-$$h2T*C%$wpI!3&a82JwnIP6mNZ`=L(pi(?x1o5-K5`qWE*=RoX{u2B z7Jq|ri^~tYDDr+1mhPV<0q~&iwO3-t?NEaUj5N^c7WBX`D=%;SX*R*u9F3d%XNj`e z`7)tfDBM{Un4b=J1NZQrN6!OUm zKooddZR4{Zz~bu>)w=Yl%Q_CnMl;LHe7%D%0jH9KT~*gIYktyB1`>&n-}SYc!s%}q z2N!RAbDZPvOG`61IHn*2&kELDdh6U*5EdsQBdKWG#>wCw7%ZO)bP#&?67>iqp}79BT;%RV2`&Fyoz zpX;a5N(`FMdcXK;>VK-wcN{C#%J{*EJbvu#a{GScdiUNZgRr=PjLz!5jL0>Jd_F#k zqqlQsKT{@hBGmlMxEsrWc`baJvGO6k^w1+tF856U81eeIZ;~E&X0IN<^Ii)~)ry4> z>H$d9XiX3S0!Fwtylt4q<{2+CoKX>#Itph}{$|kG@3jWXsQNfu1BDy&6~G!I{ajE9 zzWHe`D4qRzGY+oX_J%6nu3j(yD7#cJNg^3`2UC61o7S!aY_HUwDj3xHk`VV>EynT@ zNM9VF{2l2!_t|6Fnxk|Db149g8X7p|PSlY1$i1&!QGP9~reMIXPhvHejeG8Ee9}jG zPlr1?UYLWRP2eg|JkMj*34R~5kjte~1JPC;UJ!$Loi$vLuIf7bl6^eiKeZH4xCIdolDX9lt)FIs+oX(c^*n~npwQUyB8+u;_P z(jZ0nMZ(noKpQ&i3N;;n&$?~(ba&IpKn6@fs#M&z;zz4W?Z>QUTB$h`eV{dFTm5ql z%>P^#aRU-xHU3oJ-xE+cCf9H;h1*;d3#M4Gz5;No-aM2_Y{_e3E5U=+>&_yFcLHaG;12?P#jifA5Wt&BY8$0<-GZ_o+ng=p zO%wMDyLN-MgR4u|MHy@?kCZwFq`tyn>=o+PEN7T%Wr=f<=?z9n z%KZ^v*fV+X>LSW+-}XGp8~oF<998bu__a4(Zg*=W6l|7%zwULung>fuLll7K= z5$aS9P%Ey+f$<0(tH8I_HfyzeM4aD@1$dJPuAf+$S0JVA2$YVcWXWB4TGRoEBSltG z06HMv7DVEAo3VA9sy+Nz(P1G@D(8??E<4J{)`TD7A0h>(+Dz%um7r#Dq7;z^%LNR1 zzy4O;SlW@N*4Jurudzx@OlOt$OB*h|p=l|Wp|!PT0Bq!A|M~d)gSJKrrWHaW)*MXW z1eEq43h^xz0sb=B=~S7p(Acp91k6bF2)KhZy-EUzq^@gb_sn!CI0G$~#t$DC6(?6F zqOfi(y%YH&weJFP8FZu&w*~7dQ~T$g=dT@xJf??vBCqC&H^8ZQ4Y0QRLr(Y1jNAwg zD%WI1ek9EV+QJ}@tjj)g<5dp&em{5oVrY1VQ^YS93K@F$@xS`_$hQ)k((@h6&1Yy^ z^~}*&1D^IHT2Q#j%W7dJrLW4eJhRWpQPFmhTl!6zeYD5rbt%#YmaoETpJ9h#{8a;! zxyvaJYKAfgEBqM5tSLP(I6~H~?)9G4H~jBDoM^5%%oredSple!?zwoB1md+h*i*?+ zUTr9!0=;dAa7(`qxXam4$c;Ap^3-58bzV?oOh)=36f?P?ctYVRjR=j52qCsPb;$l! zBWHTd@09fWASiCX6_x&!V)1t;yD*YEofb7mYHmZhKy=ekuk@gS?S(~~g1E+Hm!q$O zs>u5K18aZdnUteY9G!!n1#A!-chu`6m{A6CD$(|SLS0U-Wd2TwyQHaq;boxX)QwLg zWi&m|n}$@-(V5Jo>;cALFon{8rw?v; z)iisbCFdj}IcI?-=gcA^BA^5T2?_{^mHt|80oXo z9XD~SgG17a2)x$Y%U8-lq(Fb#x1QH@l*^Av;~j??dKu)9^Pa& zqd>i@Pa5T%1eFoYiD*dwRXd0lL6o-jhx}ih!PIKYL67wsZxw44nKKEx39xn}-`_Ra zslYE7@-04Iy|=6OsSwbf`0^fIzKdZ%r2^ao^Cy447%9D3cOyVX&2@x^MVt4^S^j(9 zeK>&$y7m_09(bzaXGVVFaSfw~ARy5(Js63#Ua}B+L|p5kP$*J?pE0`Zq3x`pOpc%s_R}h^LmGoAHA-N4W}F^A)^-0%>!(G1Xw7|^z?PriNXlglJ~kk?SUe=n>yJe?{bv4yO(?8bhE zT=4Z+LsO!Vs$+7_C?;%^w%Lw)*M!r~^U6qTUUCguzM^+or6oVJzM zZ}IR7yKn0Co@U_Ml9Cd15Shl)e@1gCd+y5sm_g7>A4_>!3Zcu{;b^oreOe89)YR{u zV|drS2dD5uI&Gd@^wcbef_pMhJU|(y6`UpD&UAG!GE4VGtzLqg2VbDIE!(Zw5`N2XV%W!je%pd%Ot1`0LK7)ddkXVw>yT$J~W)`0Kl^hsI6UZGV z^HEfVb17kq`8H?!Gh#g=`~6S^Yq-4br|j9I2{nyo`QPXnV*8DC8{h7mJ~dm3d(Qcz zqZ=F{Ul}TOo5+5#UYrk1Z@VH*XS)C(I*bGmvxEdi_~YLXCSOeG8Xn)4!^8pGnj1lY zYtW;UJmXsTZSjrsSAK|2p%`H5ctTT|4L|Yud0%9Nmwknq1S{Y$;du)+J{)dBK$C_v4q{Q?& z3y?M4@X9uD>dA|~q2E^d^DAtFpYj+Nb1Cr*OmR9sS$iN}B>P}=*r!S|qpJPgm!0E# z_FcERq!c%7MA7Q5ld97?CVq%x)z}ba2I|JWEUC>`XGMM^yeGA(EHwKb-2jr zOIhahk-_7q*131f;ymk1K3ga=4Bpt>`LM4Nodo9Jc zmv8#=7d~7W;Ak{b`ib?2hK)hU1#SFjWVqYFBcC(-4b#vMo|tp89Wj|ia@iC2fr9X# z3&I63VEhL~y7)0k1%6=K{caRqq4nef+kf!Bmd4FD_v0Uq7%AW>X|XBY;_lgMLB^)n zU+jn9=3Z{*3WYRrZiRkSr^z}v5hp;`BwFb}u8eLrm=fwn7^0D`cASgZ=-xR1Cf=k@ z$85v7OFk+hJR`_8Ny@^`bXhD1EPsc$@4+~2cZ&YKVaf^Z2EIg_I(b7*rC=R(=FaGX zDJBn&qgbd$c^hh$Q%~Oy;3^prRWj^txv_XT7sL?QbIP!w{jxOi=%IF_Tj1z}bl1yL zy~~!{1^rtJvdZUu5b`vvjIjs>A;VgXl>}PQ{Ua4f+pruQfx8~Y*O6tj1YVeZ)N-W+M<|H zK7gquZ|Ii~>TtBqr8x0#CZVY-}T@nIjUWgcxO5f@LAD^x?*5_tHE+90}@)X9xg;3q>Sx%x;Az#$rRDqVtGg*GzspWX{wMNT^u%$bS#Q(MuS0*~KU z!!qj!(mkC0reiZch%xX4K8PLom@5<`_h#FD;bZRWXcp?Ftow^CclbKVBRG^D9Y%-N zErSC$Q^j&UY1!7{B{B)x?MS7dz>!&9+XZ_W=jui&Fk?s#NBr`L+-CNaIUVE~CyqwiqR+j+jz)UIq*~lR< z?O1W2{o&UiM)Z|E3krlJYi}t|>-7+14jO!_a2|wjl77dFi}z}tVw~5Mc719B(r%AFq)%74{>( z6-xpLJ9L6%7!ZCHpptoMb}%D{?PxX?>>8=}6P)f8KTY{nf#Pp?5e_0b7 z8fnT)5cCY*O7=0a*&J8hS-|)^$=mLs6*2EgIUral-amyhEb#SfHQ2@A?ou;4lxCx- zjAsaYbTcHO2u-U2?XVxS+TrxiZ^0$(cRZ!B-a~7b4ZJ~jjZ2#zLW`v&w>^FjC5#WJ zN0#wwX2H#@=BU0EdVNSLsQi^ooq!1rX)q;v`nBFYb*y0&1vU{BKU01q10dx4_(j00 zzLm<|o4UfF52rj@>bL0CrfbZY8i6YcXZd-xi~p8{((+mQur_iQLz{9_&iSfTa1!#e zFAComkIH>2J2(DEDYI{+LF5o++w>o*xqD2Xd4Rq_=_)DYU8z)$5w16SZKw)`e--2)J)H!DM?6e3;ZG5&sKWQu{htUK?@C6uc37gXu02HhYLnRe zBE9kQc-fBzq5gtX18R+}&&Il)u2jvWZI<$H3y(gua80#v;#A#sJB@@|$FJ`A7K$Dx zb(@i{loNY!)hr9&6dv2NKA6ma6$?3-YCU%UX=%kfF8mIW!Rk!w@*O>yr+%5UfMr=?eErC1PPYum@9*~~# zo27n(rfVp^S$RLklS+RiP?d-?4S88&ZtO5zyKnpfdK8cQqR@QVVy5UdPeK8z{s! z{GO2CauN>%j;R{R(h*ZOS4#M7+@>q<$5F*iZ|>nV33hfU4bC9VZq=~kz)bg`)k%D} z3^C=V+p$mX2H&{!n(NY0LifJMu@ls9O^&#ufPhvo88zMv^(k0c!c}WsaK`Xg?iSp> z;5Q>fcmKiv0CdFZn#Pa?BY!LhA3(&xA$=3w!@y{-q+LIz+fUt=>@oH6^>m4Pi!{1- z;p=YJ`%ktQ*zQ7`p3c@#Er7D2OFD1Akfjh@rx(XY5i9T~E(HMlX8?i2%2}vyUd^vxrTz)9NHo9-wM2PT3$?dFc8_GsPG^rV$^Gi?N8+Yl= zg?jaqkD|q1HRe?1+qcqeA19XWuXa2a&h)$wSImEAPcUG&1Rr;d3&C~8ljI_EmW7R_ zF(ICw*PcQI6vv`hUz_~sYaJ1l704RYsdFYeA-a@ZVV(7};Ez;FlitCv(yoFuVl{wl z{B{aizic|_#UL?m=bkT5ithkj*yi9Xb_DOcaj&<7I}K^6BZF(|x??ao*JB$l!k_q? zEIi+aWZzM<$ZnpUdDN^dua-_LFNeZ)4 z$eg<8$X4h-5+=YQJSjuf=4?;0>h!kbJlFNaj)Dj zxgM&C`ER-K6ASY!q2m?b6pSxYpiDAW+wK8Tj;gRkw8@J!nYHG*-huK4`w!;wN;z}Y zarN1EKD_3xvD*BYwC@P)DUf0hXID_n^?ATOLlH+Iq zpWQmu@`9$s<;lKMpoW}hq)3y7Nam2CO}haD0l@G`_Wb5&y5&ZP`*~0W@ImKEbGkgc zSkf?7#|G~$oYlTQ@`^IWFoyuY>75g}5(C5JR4mbwN*EeHGBYz=;^)KCCaoExvpKR> z>Rq5^!L2y|TRZ%=e?gbJtLrBuw|7~oy8H(H;FydbdU5Xc*i#3uHTbosTD~l3eA(9+ z+1DDR!#5W?wk%FGOnSaaQucW5&}kRLJnMssFPC^qLLo+&+J0t@zgY+#!h=hHXJeK? zFJ8SWkdCajAU4f!yqW(L8vBc2#W2loq;t}eGMy=FxzQ-eM%Lfr@YbwsQ;|l^HoE@3 z9BRx0(?d_R23ToGo69V?tdm2y%6ilAd54^M+OYT`>@9ER1^a7@{jtRFF3S9*%-4+* zZ?CzZ|5jm&MUhGFn|=A{b+a<=50aq>DDkr%YFEf zgIuXi`=r4^O6oq}_s)namdN%r6BOL>K!)_g_x|)Tw#%!w((S>Ib2<^t-1QpDO;QII zK9agovS}2y=eT!o?0j|$ik~VQY0;lnrPBCsGUNJEyuyl<VnyDJioa>vDGO; zV*kdGh;3?OZKs#N<&V;ot%0+;E!w%B1G-TImS8={uc8b3*bf);>35ONoS1Wz+wg`- zCH8C141Fo$HytH+o$0<3ZyXb`u)91A{p-rdis5r3CK)bIZv0fYZbWY^w9S`sL;Qjl zio|{;Qutn$RkWa&_xNUn3c1v`NTO>kG?#o{kFm8Y$i1f}QTsv|O_v}#;wp4@b64_Y z@Q#q)YcufW-O;hf8j6GIPs5tbm|i0C$58BkXR6eGPm{6+^HIe#P4}tccXQhVwgg?3 z>$+tIZjSvLe~zm(96zmobG`ATh^tDvwT%fRpPCmZ7+;&TcLS$*7ZOaaLPZUpEb%Z zL)i8Cq~4ZuJ*!$^BFAa}7ygkP=nH&qa5qxEd5F9K;Lm>DSOQ5v+vCXAovAP-%a<%` zZmUtH&Q-Ui*5~$rC^7WE=RY;t{h4O(6D?r2+yM?+nlGHlsLdi?RVMOgqPARY-UZNE zTKG*|*SpSFwKpS9@HgCm$mV8}cCreL0~Uj&UG7v$cS$TX|8@bI2hj&ihg;L#4fJSx zyU5rW*R-|Iq`r6G8*UfvzZK>?4>gPn92C8w5zM65g?$ z^xdQVp`GZ7$H`(7F=EkZ8_CQ~TT`+VH+h&TIc&Qoz<$UY#Z~?j0(hLLUzauyxR4VB zxrMZf@nOJ2(_(jG0>W2g@^u#;KpH6gAMSOZH&_`y)>LB4-!63u%6=JmC!7;~l==Vq0oIktaWD~W+G2H_%azKM7|e7NF%j7%Po38XhS zGq$6Z!HvH#ueIC6d4Ac6Yg_z&{rU1N3}faB#^`eTc6SF^P0Qw1+Y&t$h1tBkvKr0$oQ@|s{NH7$@Ys>R`jvVD}HFer{Isbg}QA;E6XuHy5pJ#5) z6Z#KDMx2jFHZK-V*H$8O>7YqC>ei^LPW->T7;!qG(I!c&L=8#q;Qw3YBW-hv3|BD! zk|n|Tozh$R%<;iLOh|&V!wQ=5|7{^7j^cdSD&K!-%K%yn|EZCO%^1=ApE4%^?ElrJ z_vb|Z%Ps(pj`IhJc49t}cEAPTLCAac53;aH*&u+>gWC9KbGhi0>z3#%{_I@=qGJ(d z^SGpJ@?JL1Xa?qcdNIuV!@rg@nX8cou%)a(>s!76_5no(YuJ>Ld`fRd8bUGxe@-ov zvI7f1{CV;R>2$gdK7SMCY^+YB$0GMYM0y6#u6Uc&8QKSBy# z)THdgrsxuEKRFJI8mStOMYc$(AU7^DAnTE3u|qAt*V5t;Eq4&VB0e2W4yj7A(*pohI}k|ZDPJC;p5eoVpH_VHh{@CrO$9F^8-1_ z=rh1{BY_~N0O&=J>mM9wYL1ejV@@Zw1>$1&3kYK%F!R8|6&|w zgm>p<1M9pQFXqD|bkup z<}r&lg}#NtwtM;vQqKYL)W-P@TM3;)XRcij3PtRAQSm1ecIS=oSN^k*Ynb_>?}k!M zDr7E5Ha0C1?Yg|T*~eS~p9AL$G=o&XJ{GOEmj`!F`ja9oU%cvt2yAJO13!IXQ{e@{ zreea)S5aS%YS||QMS;9lJtLAvU{CM^av$z};Bk0|U31S{h&Vk%1d@(mj|?MA^)&9O zz902mrtJdrVKxCh=c3E?&7{=3^E=$bps*e z9&cpLWBVz;MgN6xF5Ibdqg3t%dTktpdB?45FnI7)$KdQcGkqoldMsO}BO+ijrv~7$ z3Ki>$>TN>jgsPCD5tEX5)-xPGGF_~mPaagBQqjN@h8la7ZDsM7Ojs-nS9kCHXh^78?M>+B)ZtZU^4O<(25G%h58W7zF{j`V8oa*R74; zLFLNFSu)c+#okA&<&*%K`Qs%W7OsO*e8$vb@9|W?n>egYipIzO6Li)!tp!1g%I_d2 zZUJ~{l#;2;hJ()Xbc;TyXasGt$1$(53t0!oD8XoQ`)sKUnvWKaDD>TFU6UKovz?~y z_vw$-$I6rt4sJF?mPv(4(&1Wd&x=|eH=uW@cEhFO-8W5Vl&T2kRyiSa-6-a_^aW!e zXWXtU_3AXwH8J0MVm2i=000%twPWG(8qp5 z^TpTtMfr&D>zCgl@)3hjqbaP5gCo(m@vfl=$!XU1tS2G3vj3t6mz8IrWuurxM48n0 z*qX|Vrcwkih5KJX^`o~St#PEWN0DPF@E@sVNd0$+%;aYIKO!p0FQb2l0+J8FUoHIy zffb46$^SC_du>7f?{NLUh@Y5-afXL&xVuZtX)VE+`RAbtyxw` z>il9b4hC~Iq!&95Cs)%5Y?PuuhO0XSY*3I1VL0#}*bs15aDVWK1S7AA>F==7sig>O zVm9asieQNtxnXIMb7_`jKyAT6$+DC;VtLBvx`1$HNz|%e07w(BAl32LZyVP?sqrmb0k?Enjgvz!vnFq=NbKXA1mHb?k_a;Yw8~HTsz!kwP`v8<3(y!VtND}^Vhqq9MxC0IF(P@GK`rR0?#EK&xK}6bD-~Gz$eDC|2VH+iayh0PN zDTIT?iGftlmb~k=w6qD!wxg$d2q=G&tG?I%Ugfioy+27ZOun@3l&%Zl%NKdv2R|&@ z=Vm=3r#`lnS2762$Vt&(C!U`!r596&Rp{GzkQ>tOiu#M{IhIt%qSvyEiF`&r*{0`Z zw8mu%A+_NAY+Z$~?P&5?Rj`k1+v<3?qt&OcQ9VuG5a#Kj^Urr|S|f}6+fAw08y1DJ zCk;D6&jxEiLofHy_ZT{y`rymuFZFBzEB7%S({>v{yXrfZ5naloQwpz&JK5rL@40wO zm3^ZX1j^Uq^IQjnF)Qr3cIIDtDJ&T;Ko3F^M@TB{Yx97n`K2K=LBgBE$Y`l5?bu3% z)W9~xVk3a4zhKL1{GzEo7L;{+htF6ai;$;YhFMPJd7&k^wLOdPkBuidJ|CZ{{)HHdXPuC&rSbyf2sp zMXe%(n9rnbr0>sdb9%yRI}X6b2*3Ow42!et$GNuYLwXmIU~qnVS5z9(mRiuCt^u2c zlkZ|!32YW;dF=a}E5f+CeQ-jY*ARP^0gNtG|5(3^;1f@J;sVW**qTu?4mzt{H!n`0 zDj^InkE;RKO2StQs>ehc9Z)#2k;mrS40oVY_o>ktbeA-UG35y;JQ)^Zb^Yp8Mi1wROz zt>1e7|0@i9s_X+RF#P4<;4Jyr`niR(qP?q~xwW08qP+(oB+Bn0C?NU}r2mvTniwof u`=6puE!-^L0}AH$Z!CC)`1l!>HD5gX`vWh-p?M7~2dFAPgH%4d0 zdOb61s;j4}YwzCrt39D|Kg5s`@DKn10J4O*umS)8MG62wYQRB)dzy&W(ZOFIOax^F z0f3rl#20;N@HvEof|wAXa-850d_rO%t{?*dxKjcEz5xKh6S&Lw003}e0sxNm006EO z0074}qeY$vd;tb1B_<4b|NF{mFNz2Ez}t$eI{*NP;M*XgLrjCbU%rPb+vjfOo(PD9cz8Bp{3CN2(1 zfkwifa=&S2Vq!9pk&y|?h{(t_O@r|hAVGus04eAphuH^-_H+nU1O0E;PnF*;`!E0) z<`4flpC{$R{!jDn|3`D)CRmt^RP>+OH)Vrw2h3jhn3$-({#I;tF1YeGG&a`h@E#kS z{B4YE1LXxZEs!XWI}({!tZ>KtE6>0h$ z_3aG`;Qts_c*sx7H-C7&&r9Q>R4P|<1Bw_OEidD&;=Ho~T3WgvT<|mez-s5^y~dG} zkny@aN|cQ}f^mN`%8i_6BfO!1yxP9l2$>C8bc|I4TILuSbzAHi%nXkF2=2+FDs^<# zKo8xSPQ))7*EL@$bu;2@y22tbBcb6s8cDwRF7~6|VlK}qoDxZN>BxhUgB0dP#?tlb zrNFr&P1V_jR&_15@FOAN?Ps2s$<6IkjRp!1ikK$oFf{@_3U8ch&pS3;uy= z7pXXfxGGSa`Y+L2bLF<1o16Qy6)PJH%?5j;U6!dDW@0{=Cr)$w-J{(nr=x?zOGy`2 znSI2jY>#^~wgTnvHXarRQi{AV{;_dcSm`NmH_Enh;=L~rS&0N=pOE@reT)57g`B0! z-h*4T&_fNUh8xIoZTIR+8aX(Cq2hGd;eEhZVTzC8tPd|<&!~NUUq&-IN>19wLS;fA zd@%440=v~`3YVAF!u!x`gelO$U$Me;d)JR1s02~r;a@mD*xq+RIj}0KCQC|F#@7>+ zME7Oz`*(ISuJ`_!4w>lnn8L~*r$eiX}%K7}rTz;A6q{^{A{jG&T{h7U)=V ztpyhGqDZ@5QeRql7KRwRd3;r}tzLFLDQWolEG50X=O8!*e+a@)G+_GVvo&(IwR^m| zPR8-tK0i1ZQfLO_-vz8)vP6j=LYETG**H}H%*IAXL(>@NKz-8t`R@aTvV>#8MdAqW zF|JXQPEsR-5%F+WL4ijOr{wsM)#+(wVDNnN$cmylFgRF!BeO$=uG{I!=dw9We61`z zeJxc}spk-v zoangcLm|+ah>GOMLS6IZ_^s)|5AfvEC5wHrYqb>>8OY%B(C?q6RydF8(qp3P_w$EE zA>jFAT!VZY>DJz2H_8R2< zrmn93uG2oOU+2bC>DSTW*D;k`rTb=L!pKOC3I7WS%%E+a5inLURsdctTvXkfKfj?j zz@iY|{|Zj=?~Y#&DnL3{zJ3fox8V!w?=PaHET~gQ(zHc3?rJKP&a)( z(@>HUM5GEUAPSQ`vdf%0%76T2wxShvx!&B`#)LLjd?(XZHk%iEvQyUw;LhXaC;rC} z4)*5Q>3Pjx2gH^5<`jWuL>$zh~7{$P>N6KLd>RubCWM{t} zoaz}anx396_@;o5a1-t4(o4bA09qoV8mN%d$hE;uQtDvY=AUl#kDws zQG8~+%fSbj2aYF;HA2l-#>RU{Fyssjlr$WrN^AZ!)@NWtV6mEC13q{ZDX<$%*W7yw zkzt--oEte%iTJMWD&P2FgMAov^GuhgySvBBS`v*)v{|KRZU!!PT(U7dIGG%3Kcx3f z@7g;#enko+O%lg5HsXCl^Lbl`&E#gAp3yf#>;uAoC&Ao~j%2v|M%U)~Xb-GO=`%d% zuYNzQE3!W~2djxRWu3~HhS&3*5sSKU;a;x3?nrlQ2_PLnmCixk2$9HLvtzm5qAer^ zR-Q)vX8R~$sOjS5W{(((Dj&P$0h1X@_>eJ+`kv8RkOEFvp&2~=M~PmuL+Yia)ZJlj zm14QgvGfI1Yb}>5r4nEuf*eL*QT<9}+81>A_&9W_V_qgU zw))0;?`y6c^9Gv3!$bMlsmsgDIg1P0uUs;pIQ3uqos#yj8_ee#-`C&Y+U-N(gKR1^ zj5c8fT^-#y`CHwUBd8I07-W;nMvQAdAq5GOeT4fE)Gvzqli&Ll-OAK#C`Jx5(DEa) zZ-4gF&9rYVYAi7VX8cgTT#z>+dO8}^8wv+zXy0)AHhB>8gn=-&KRobz2{Tn#-X~0e z9I)mqJ+E@`ukBOq_FA!B*k~fHPNzD_T^bG*6*igVkFv)umo;&5@!g#w!eU}#A_Kks z{R&8;Whz#kLBS$oViFW=0#X#{_}nYg?)c`eZoIBn2U~$71Slip{r$a@2Tu*hGU?sJ z)BW@P^DC__neDH;wC^mqNV?f+M)?B`BJ{1!9nO}NW8ifVpoI6u`PiZa*F9gEU@uJe zucZNp`&*&Om;s0N2mEI}DpcoaScH8u+hM(VZ?8{M`+5>SpGQ1MdeNEVhr&W0RBAk_ zk+kx1Wv=ArEy1QB3YVMGM|J)pwx`K*O$bS?8u{0+UpF_+!<9#kge!?4It?MP^aH=k zVFJSPQjjP`1RMFF6T~SbWo754rmW`k-GB;P6)T)VLPDj=73QXTa@ZW~k9~Ye0wkXA zo3HUE?|ok=ez&NU?ra_HYWH-nD#*$*l9BPSu?g=TDHM1H<5I>Ssy21lM=_}0A0?Jy zC#kS|nJ+IcYW;T1!k&3U#!L7J8=hC}SD__B7CZYDQ-pfL9Xsq$vkLe@Xe!n~HKB@S z2h5l8><9l^V6(k$M?f;;eoAU$*F0xZ6T&V_y>Pc7)4CU2xmt&ox+xTmRa|hbG1yzk zY2yO_bX$EQ(&=a;=MS%{j0nJHO)bAC_8<9;Tu`MQ7Izo%Gmk2a)S;d#3_Ud9WfX~3 zvCdlCU8}|O@@;#-DvdKaN-Dv+?uCW;SW~!n<~^(P{bH@Zzfq+^1FZP!OR1rup~QS0 zIPkiM$M|aM>esA~SLaeF!HSJprw9z2A7j}|pVc%qb(`&%MrXGkuT6A2z9uo^N82fA zIKLM(vD68>U0KoJ+>c+(=(EZ)n5JZ8#3dvo#Kw|hh61Y9>AsVbgRLj_F{-8#+@JNT zwYEN5^ZD}1Up+$`MaP#n5VNJ~QaS=HjSN>FH%ku5Nen59*vr2E?F7hNnu=Ekv4~ zO^O+IJbybjJe+iAAdlHMx0@8}Op|-aI>GXTPlkA55!B!RG+hub6g$|p=(_bVbo{2H zNzTjunVp`3f`XQIWNfY?R@Yy_EP$;;KvmTQw91DLPoHb1@$ItANF2my9hxTlDE=( zy~B%I4j7t}1kIR}RHZRj;+4))%bzBhUadxlkPwiRAd`-{;){s=TF^5!w|yjK3J+~Q zvPa+Ri?>p}y9N~ui+V)fkSj-_$GnsyAayX7&LNK(51CYYB z$JCeV3Ek}1`)rhS#igEhb%S?OC>W(Og>USi8-$?0I5;5weX}}Pq$_aaN~Qi*y3E*? znCK4pwVbHw^UZ0YrVN;Sp!vW_NN6mnv6UYhzl|Q()>ayw&1AM*k1Nm2>~gkF>m?fV z){$$5$L4kDwi9R)`yNQaQSiVPX4FT|>xir<~@)^v0`L21}iNK9m(9xBn`aRg8x>TDmQD+~qcjYL~ z#IZgcDwitpdL2cyT6|`>FLJ)}dc9gIbFUy?wv3yutLzN1+&TV8kV+Ffi7B$AiMQsC z2eg6okTNji$M)}vxf4;ubUK;ecewAIb#A(FW`4$at^vql&gJ|#EiPU}PIuAJNKwS} z?*p$fkd!prdaa|*`n9m|4Oz3vw9R|w&VS$s;)iY;BI|;;TVYEfq=)oHCN3K1o;^a9 zy**5SAhY_?(oflc1~xf^M4ORO33xwazBzNez*kvNOH*Ehf>9eR9uYo3Qu+_=UO757 zXMg`bq03V?qQ6jaYGG_l6B{z}VS*t-ZF$>hXv{HJI zvt{oTjEZZ&b8uOFP%Oz%IX~Yh5n90E4CQ-6=e{yFYZIC>TaZ^m`X!MODRKzT6Myg6 z+;Gr9@dfS+~8MQgBOEt`&yY;lY%kYVRVMH-=OaR zHo{?Vy`?rsozslQ`|1lL1w*+tmZk};BA$|^!d{ULH4c$jzEK$cXyobGS4cnKKHwDF z>z5XFB$!`TWyb7nWuTVljpfLd?4E82C8~3|8m;C6#658_GIYi13c3jE)sA?2Or#Ug zui4I`e}t70e{IyNCy)i7lA=gm6EE2ce|R3t-l>@&YW#liyvbU3VgHcRY%ykF1|wX= z$k3Jvm49V`%lW0ap&`*^ZkV^l*|-Lw6g1mw_%PYUsMC((`g*ipd|pyw_WE=yAtg)e zbx)!wAp13CPXwv`@rE7V_jY5&8o0Q)0G8>;M(0L=t5$oNYGud! zg$@-Oht<3i6=r{LU-63NPIQxWZpd68VhZ^+Qg?e^P1({6-s4 zUik`>37?9w%KR>EvVx)_7^a{frSApCs?pKZjqRB{t8I%$3jat^eRRofVHAP!-&6#x z6t!@Q$g|0q7#ev$;%Pjcbv1n;O_$)jsoki>wJTW?T%TFBa;sC#N}FB!9G{&Q`(p-u zDEXpl&O*6}YIt^$XGhVrGpk*3MnN`%ugAC!_V!p+1B%NtDumzti6gzo8r0TNP;++g z%11Rk*xkR_UC4v%C!Uc?j~xPzcL<5D#@DqMVn*`cMjzDDX38{+EOR&B{Kay;i)y?!^L=H>ndj^`zWfoX+iH38TNwcF%;>9uv{-!#GY7$uk-(M#1mwxe~| z#(|I{_+(ECto!1Y4rUq=g^T)SGKRJ-80?OmQShwnjr+u1V$64UWg5>M|UoeD% z<7Mn)s4zGORjtqpEc{$xX&aMQygP#ZJg@SB?xsT9;<2_b&2!=9r*}C|NFOjXAPJ@| zR@^Xkx)Kh?KW-8UrZ2}VZOBm4gs@q)6!V697PM5r1`C!Za>79srl=Ge2lA*b5f z+Fl@h{7`jcl2WCq_iJa!g8sSUBe27Fk4b;j{}-utS%-mKE7;jGsH!f{sba60 zf+?NUcVN)!m~RxBsl9Ofi+6=0`V%EO!00oH@8tf;7QV!VdG z0IfF~|E80+v$(W$28-3+QQ`;~L}Rac$yusV`k-|_Q z7J8Z@uo^4q2m~)qyjP)7F7{-4PDX3%Z!_YhMbUC?YMV=nH=qa03V|20}xsdsYc~j}j5&-<+N zz=7AU`f`j(f=a^tHMc&`Svyw^t=VF?g_ScqU?88#%{bBP$oZJ__b;1A{!2epc<*CqV6gl z9L^HvZCt!Fw9CM87DsrWA+tK2*$te8wB=LLADn`XB&cnbnSdo$W--;RFp?U5z>* zV%O-^i-}Q$`Tiv{<|^WK-WoGUv2~x&p@i(k(fkl{TG4xA$E&H24J$s48D9BDXBiZ+ zi@p+AT7?M$;mMY+{O;!ur{t<3&nJ3GNv?$1kWls};wWPWcp4@)xSnW|!PG}6nwrTx zkUn_!O^4C8yi%``XoeK4-&#zCCUzn2aXzLG%5$syzV^CGGUL@ckye)u%Bqn8CGo!T zC2Z?Wb#K!av6b&CmPnowZO`|@eWuZ&iyQTq`oeK>X?K5@-F&S-zC!6Vafc%R)#N~%|8PFi|2ToxDfRLod7ElhJ-^+S9Iw-%b0y;854 zQkx;S<#ftI+M8XK6Xyqvje}Hn!fGnoxAmWz3CmTt3Sk<=W!d3V8_k-;Z732W;~qHc zRz$x^C$0DUR&0o~Z1inA7e=2S8GY)U|I7=tYU9{+Hv8mjbn}_*`A9txhh2Sje!dJ} zd3O~iI&Ro$ZhKix?#GYPbtAs8wZmywhnmO9sQTgek|_Cl9J6tMrTYz<>V1KG1(a+J zOD!8e{UHp}G*L)tK2hv5@-1~viX=u!eJ2;G|3HNWm&Hi9OO*|gw%Lt$J9hfQ(h|<` zI6jQth3C41q>Z%9Spt^^5@?gva4OT}-OJV6OFZ#H|5sHhZON5`Sv!Q5ChJApVM>En z*|5WG!!_jcdwG$&Mbyjl*=Y|}yUhOWAKx~xC$E2kOoLt*9i6=j>5 z7~jZ908y&ZqIK#olwW}GT^pXX^^o;>%p>dK;b+Y&*lO2kf!rONb`7_5v$u!hd~uf| zaZ!t=1E%eb>O|EZ2+#If;|;<7OA)KE%W>*{*jYK*M4M$u4f`z3oX|rA<&MSEi!UUD z;=em=7$!=sazD>`fs!R>&s3?#qrO(#4fL(Z@_R*YsU?)1uRBlA3ck-6K^|!KcpQuk zAV)X?Lh8C{7P4M0Zx4Br?~BjaclP2b`?-cwwcFZ7LEc+7`A{U>kB!Oim8guU5VrQ0 zzp>He1e4)?v5Q)j?w)ThMIauJFy++EvaAiA7Py*SGu&?r)3QYU&UaZ@<2#9U+1)iw z?+3&@_HYygwCxkwW{Z4*G&1?qm`4GQcUR@?4o<54*;gu}X`Y_QoPW^sd9JLhZ@NYh z`=!IMiyAMi+3wf2-9-tI&y~7@6)5tA?~kCM)-x0tNMws&v^2~B_C%Jr?F_kdCCwG= zX8X|f@3c0M8Na8~Q#MajuFVAmM3#8Rij|rLF9v>c;p~bIO15my-2AyVfpz@kOHpQ0 z5wvXsDzo}S{ReoI)e8Ko)|p6swLbY0JUyY-4O_h!E%3INCdQmtq1tN0niwq$5uG07 zB8IV2S}xU#da}zo=-~J~lMaQl20HJbxzSfz03v$xW0sm^(J1lJLD%o_oK_SgYZhGf znFjN_cM9#k(KPDpLM_}sryOJ3CeNwY-pCJn2TBt@`zd0hWIzg_BU4hwNZ$aEJEyQ2UU0E{>U1Q_SA3BkS6Tm5_-dYq%P@$kdyY($WXTJ;DT0C zNehNdV4S`$tJgm~NL4AB@jTC}qzN{k}QSpT(3%4_R!{Jwo29}q<1i)uebs$UO?-Fj3}xfB(P8eD4a-!&cozVV%S zK}_RT`~2>UcT{qvPo495zZp6$rBZG;d;!IWjkN0=LPFB;BCY1Tji>ZwNtLFpn^n8> z8yuV20Mf`+BJ9{&eXS&-9-D6|&)uxYMBB|Gw3ecJv==`+x2#woZS*S%yXM|i5ijtX z220k=38A<=A&&|#2cfl5t)eu=9lJrAc~->?!AU5Bwn?j@{WA%?mwoZ=g*^FM2BX@; zpWJN(Ww1JobA7CNe=KVWs`bVcJR(1Ta!@qZVFD_otx{{lBr+7}8_R0480f=Fb~dYiz$Ru(X(DfIhjvo(mD@ULp1<^*9xXRFF8e@;9@OVK{EB^6 zSfScBKF97?0oMN(#W)3$&Limf=#@H+OyN5Z{1$-qj# zB-fbD4q1LwlX;fr^?ba%?{#FRmk34XZZwY5e>|I3tP57c=l0%y4xy+K{`7j-cQgdL z*5zvtzHYNi=j$^HVF5-}e4-i4SRU73>m}TzR#&H9=a5y~S+o|qOi(HE9hM|?{B&=? zg4lj1mLs8!cciG)pqwU@8S^84q4R~{OOp5}=_Jbairj2I%;jqWCmS96zR)Wh8~{NN zSNurPXa`QkX66xaDBbzMu23zG=-%x)V(RL}utqp}WZUi=u3fgv7?FlA3Jrg<3f`Z6 zDcazyt*mt}A+b5A{K`y=vR53v4+hDX$gI&euQmg?Ykw+L+VV7{cOxIZ%jEV2*Sn4t zZiwYf{+}Fu_{b=b=p-S08SlY>-|S+D)$Q?LXB5iM`wwb|)X^VX zO;lvlr;8$kgJqLL@kcdF)rt?lxxF*(UqH~~D^;ws{_)IVtJ_ee)BJNbLBYV zB3-r;h2Z+v+@4Yrhj_b9u2j@I#wNd1Z8%p}-09|gEFQWxufngcc`iNct4{H?+Wk%)RL$qXftN0of9f*od0ai{DQKND|ybqD8=+{E0dD zvY{X00%`%cw10C1@T`-3y#r+b-%UEe{n;u>(7&0)za=eyS2HgMwm`I8t?zppapE@t z>yhj4C8U27kE%fBsioKZd%X-T9-*J>@vVRF4NhESH&H_bX)+o1)YcwdRIV%*i->bK z7pi6RBhmjZ<>JfLK(Cw6)5F_&xlTJwB=KtNBYAHe3eR8K(cXl`PhjXTb}oDP#p(Wb zfJp>g*8D>Q30ghs|87Q$@+HNGi%A?&ZSMZ15d2dg0+HhZvNd#HcVs+^nos`@SCE+U zq^s?Z{Ffp@jNuzky6L%p6Q)hr+~tA1KC|>f!90W`LQwuqikoWeb1e6K)LlM z_p4w;TLjaq2yzw2IyAby&A+REX)eC2(V;u{Ve2}+0Tszsp0N3_@ca06ZO^l3HthDb zE^tOyvblHaWbyB(?sk;j>wtLo5^dWS!c^TV&6yp8Ct^fgvR>~uA8$#OyMrS;5cDou zh~9r_GYnr2SdXQivKdzp7R8`&Xv8_0Z~q*(T4pm}@_O8SH%#5Cf2B39x0uS44`+y1 zC5Yp8yTxPkc5~N!x6Z`lFkds;m`&fr9X#wUUC60v<1Ewh@1KaOJO`EdXY0m8=3`+w z*_+xeXZ6KvvRSQ_YiAq5X2<+s|A*<47^T{&iACMmm4thyOj zX!ynkm)#!npsqY0m4D5@VY%DOWR+!Ut<5(U)^(tSmsqSCuddrMx&WB$@Yh41P4`|+ z0ZT(JMeD4KtLr6l?ffShj!eDob8FiRKdkI!xk$%6>+H6c)RO53CbaoFIuu49nem}^ zc2(P1wSM1bYc7mSTa9Tln{fin)b3#Kt!V~Kbny+_-PdbcU2HJvD#VN9uZ?GVrEs%6 zWHUQ2CTPlBeBv9${|c^Nin(1@u%4#UA_A*BRfu16w`fOQ0Gb`{>LlN zf--Z&M?bepNu;lwZ6?7-b8TSZ*IGReuj|f#G+1!_&0>`r(_cUx&QnyyW`M;fWqG|c z5s!mhuGeL7O}8AWLp`?Y&JG;b^*TXzazs9b86_t6c1mu02|EN4j=3UnR2df?re3Pf zhQ3;uo1|w%%lX!m<^C9Nr2VL9;?%@EJH=do$ZA4n7XFXOCr~PRu%7YU^~_qDadDqY zyg~LSYE~|Hyr0dyM=@GG-|xs@dG3}i@+|PEMiMZsTU@*xqY}fywxhcHgctTrdpcAd z(l18$6RkfkShUJ$51Zw=$Q2eR@qLlqE0UI%fl{Xw>kOh|Rq`Gd45pTmLn5LdQu#6; zIWR4(5i9rQ%U4Au-hFc0n>KOHf9y;y8(5#x=4)2ls9QS=3{8rJEi>CleEj|o*M}`I zbiJ!;m0eryIUoUMB({`eGY?U6{KtE@g_GQ2b7}RZlm6AtS8RqU6NR(`?a&eTG={Fm1SScup{@=6d6M0%Cu)!jKUyU$l< z$&T&$8+h*RGct94bXgi4KOLOElNfo#yu3js8I9{qjgZQ;}4c8LKu8W#~=k-%9Erm&Db0}QDxx8PNYQ37a`WT;{5MNr$*yIT{!hH z_uOBVM96^8fXGV|EvV5jR*hj5wA)#|c_HEw#5f#J-8tNG7;-HmHhBGxR~%yMZno4j zUlw&}N>Djsu(-U2pDc5<@;Q~ma>+-M8{oVD;AEx3K(yB21#`oL@4U#?q0uE&=u*X9 z z)o;V>vh^C#qhi?8$){l0uG_=K8VQd>rvU#Z{ja5_p4m?n?m0>Dp!d*jaWWI}uAyjr zJC_hKaRL4#rI>%JqOZ@CO3dHc&-dS1RqI043c^Cu8AU0Kj6RDIsQvDn@8mR#`d6ay z)HD*5+lZ2DS7m2b$PAxiv}4T@#4?kP2b4+2V*han3Q*xG8#oHTRsz0*DABn8@oI8U zR5Knz82_=fB>y@>@QgWL_05$R!9q!PAH8=Sgyh{QQQ}}E{<@W)j|O`NuOFjG{vr9p zPfV8mR}+CJo;NM3u;n@f-WsUC7s3!OYrN6(iO*zrz;%azAu~S#Dk8&i?*EMXA7z}m zHxA&EGnON^!q;WksJ*U?+T$R1niXKzVEeRz;$BgqE+dVGog`^ic1Tt zV1Gh7IP6*_U|_ho@GYY)u5Hrg+FYo?@d(x%Bp6lqh3>`sAS5wa8@;=;b?JZS`eqmB zgB1!9{2aN2h8h1xeL!dyVieUYOu!v9Q*`;j%)){|V`df_ggo^NR-@r(6iN-jW-ysk zHAM_vuMnVjj0{Z@5|{SH$P)M2W!uTbJHsD5v0%gt_thrU(|vFSy=Y{2B!1vmH2~1S z?&T|&)6Y!8y>SsO0%p=pSW=hj{9weI3&t0bQ6Umc{J_7f+FUWHqqsM_;(guzij&OV zJEIW}dA(Y$Fn_9w7ZTd#3sJ0v3yVU+-8z{3g+lKVn7|laAVrFX=73&5qH{t>z^Qmu zgU;77^o}ihMrMueCueCNi511l&G{r=SHYVZs_hLaK~@71KU(*=RQ^A9LWtEaH+w$u z;tdVCUAytjmiVW!Z=N7=b6>ckg~p_YGU}pz3K5IGxcb}+6nTt58qJD?g0+2MYHJ@t z{%i_uhKilJ@pCe)`NyJ{uBj9ZGJF6q&Z4|}Wto@yICwo|WLP%}yU03~x2-WbxmDF) zu&${nr7030_7o6z7aNV))`q)^jPTAXl&X)yyG|%<;U-!v%^XVF$ez{j#XL(x%YavLD zye}0TeQS0d#5hV&rKagTyn<80R`BgZn5(;CcAE~kJ6+xx`p-s-w^AY`L=dXj`C2mp z9SoG!%GQk;1?AS06P@}`2&l<=>ZAF>*L1B38_CrVrLYZ8P zEs1U2aG$?Spy_)L=8*cy@z_7AfiwNF*5~nq1}eq4g}O%@iTCgFL<83R-geK#(Sru< zNj%AWooZ|Erau=BTV=DgmdxG^JKA1a$DlYrET6WyKA-kn(<7bTr@8U_fNbVb7V%7* zj<-%Z@1vEZ7FBIU0?icwKJ9fNuS4gU(p1^HtfPu=SLdxwsH!&{nF~cD39wwW@68W) zr`txn$i_G8N*5+(|1Xgf4G#Bx!DZ04>KZKAdyCl21F%{_60ld`Fcya^uSh}3dj^Z7 z8W4_EYI5lRpBHGlo`EjU2*7%LwDgG`fz{Zza40-N&Jw|?IvUEq&q0oEI>li4DRHE;jj1*mg{rX;&NMv;r)-dhuerwg^FMD0pTT>I#1xj z_I@xlkYFRIp|R{M*t6CKU#wDb)WOS*goHG!{nd2wftPz86_0~2`nq(AZ{3qGjjd~Y zAdCPO_5_6J^ZsIF00ee-cTdmEG;38x{ymVBN5(Y8$-0v0Q}_ryJIEYvOf8p%*Ng&y zF#}y&&?@`Ei&QvF^-@x|Ewua@!ET0=F#?kWSXp8qYL5vvn~u)0g15woomAq-Zug*C z8^?bUB(tY>`wl@uw?|dnpMrd0Lu8nJqqBFSELXrH8BUkBFYQ?Iv83&}!~36t`im|wEtb&~eMN?{6STuzvJ%=AZ(Ci;4M z)eg^l)HBAFI?d9V{>loyz?+lQkuLK&1Xz?mqtaQinVDX%{kL8?cf8)0YFGTBIc)+! zD*b%c#VJdgu&t|Rdi=(dg$~^cjc@JNNHF~of;~Y8v+&t`LoA#&LG8T8qa;4hS3J5~D0J7Zf$D9=RaMahi-`hLJE|^dRl{|O;vzF?* z;>+Y5RdHjb9NTLS;xs3KPD zdU^cnqhq(&#CUfCA$8X)K@q17>rB5~OVuD^eiWl*sqD00TeCZ!wCxe4IbOZWcJY&2eY^OsNFr8rC0jT+}xjm$A*21 zoF3v0HQSsW(cD*>ec*ilN zrnmWQvY9o)ITY^IzhWjq1Vx>Q3#LS`Iu$v$#?-ck`h>Qt^9MV2{#qY1JjV@ zPx*=NRW`&8D&A@iXK&U?5ZW)E?XT|8YEHZ?s*S@X{pDWT5m^mo`-*js8=tZtuday5 zohQn(I0|I(QBkuQFgR11v=TZICWfAazXhY3F6EfNWq?aUE` zs`^2zSP|Ca{wFnl60i53>x{e%Kk;h@2Pg7V8oZ;x)>?1TOqPm`>g$rvgkdT+_HS2t z=BrN_{$?{Fr|^M!<6X-Q7Tf!iux<$9!rA+yxMHhxkh!i@;K<3F04oy|8tX&)xZh^s zt#7y2R;5v3>mn_EafC%cSRi)XSf%OdWw+HKEYn`bBixl2-gl^6bUat`Wu(~_;&geJ zXAF~wU$jDFCkkU$-q+vh@#-ZEkzO_M8+G!z(KE>N-2}?tI~>O*L^%H{hjb9r6*8A; zP>1WoPnDt3-r>Zsp3hP{DD#n;oY>hq8Ls8OQLq8{y_u26Y?X>~a^v?}D~$(PtZ1?` zYQJMvA%1cpI9EpzNEq(vrVDSLcPdU#qr|L%Y59ak94;_P)9OObr;}8v0NoQL5wEYW z?`3=^!IK7OJhY=pa;B%%N_zU?>eNxCO69{+izhAw#119em141f-)3<~%s)|**563i zG_lkyZ9XQmoPmYAUK8w}-gaX0cuLh||Z!BAWan`j6qL$kF0x7WYp8;7^J)D%Mg zsuJp!!AD7csNkq)33jiS;*cUeqd;p={?PtJc4wcasZ9-p59a&1zfV!YuB8v#k3%RT zUWCwqRmYzh=>IhZ)IU>z!OWL@cGzS_+G$?*L4WoLZ5|G9i8c<}X_1$UhdE8v5fl(; z^N?CfOmu4u-+l&Op?*tGg7j~kr}Pg6(XCm4%I&=4B`FQH7C-QoR}) zl8^}{Rmwr|WU;y3^iM@h;ie%h!O;QO@KvQvlWb4-8X&x20dl>buxJF*XQH?!jCUwe z=@MefNksO!$H5%L$!p^Ss-&Zo}AJ!1eopl zUjy%Fhr5cuDp6tjHh?S&`6k$Dr$K~vb#Na9&J3_Hh_ zNF9;64Kg84s38fau*_00|9@N%hnL$ChDU707IJ(hYz!a+{ z;5o%o%Pm7|F&1@_vc|8j*Gj+%1?j7W63_t%SM_XA?0AgZg)BOO;p!oQ$WzrH@nWBa zsOn;z0zdprih

    0d-W?`9a*$0b68$;(UyPscg8{<>~@** zuDD?G?&ls+75-DtLZ(--$|ubfr`6#mfaU|GQf~0_77faaQt(*a77;-0_%!fIa>&l|MdLFS53 zLxSC;pfQIb&{Hg1SjZR}Mrplwk(Z-Pye^gRzEmiYShcV>VH^mB2+e7_f5` z)euJA;r{m>;foY0|Ao0igI~Vfh;6VcRFIb!b8v7-K++g3hl&p%_!H(t5E1WOs4yj% z)5*7zHD}S!Q3!JPDK=N*(0`XNeC_>*f39z-e=y%%4e2EWqA2if=Ky+*KVzwe>v68Y zGQpbDrL09B!`)!HtGz3;>C-g zfrd6E8(>@+5g-2q#%VFN0+NvbRbJn-$vn8LBeh2@rux7nq4qz&@P)p6YAVmL-&_`= zUA`z)tDypB%fTNLfY>-HCn;Y#Tls;g-1LG9F=z-CT_hp}(>L4g#vS-?xsS*j|K8L7 zxy0K1Cq};&a3{yVms=MWv(#A*UKu)W&=SJq#}D~ec8~q^ed#~X?4vIodEI1CZI*u$ zPB^x>HXMj@z~%jAGCfYbuxBjPl&I&MIo3_KF*6t}g)2g;gCY%qT1a>g?61Bg%b^0S z#r(39d*lkx^pl_*#ZkRWrktq?Z5>4;1%Vg>UDwIDHm89}Qn54E|KCX{w%!)vy`VOu zTX*jgBgV`G*Zo_*9+WY5Sf0MGYseTjHWFc%64UqIB3<+a_lFa@pN}YKau8@HEJbzq zZmVG;)=(KF^I*nx66-gPT0?e#?H*Qih5JdStd9C+#jO*aD(7a3_P*sn#T~nLUX=} zlrUktIJz4*d>}RnN>6kPhE9g-zaF!)LlGkAI?Xk6E{JuX{-a+HVq&?cDQ-|S`zY|A zffozP=tk${eBMNxKQ0i!U=-ZA_mCAYFoGg4C);N^Ua=~20`hyQQafjFNNBzNTT}bbMD{ECx?|SmT@ZQ^+^NMD9GmW2=Wr9w z_oSWp84JW_(NcD)NzYeO@UnZ~-~qV;1nO?v^CVB9%Hf&Hf3Hm4z$}kAdnhU$V>bHU z8PEz{5W9XM`1+l*K`$PWP%v>nRr^HmBw2@-ovkxevZ!LH<=B%{!mUYaP^Y9=dop=a z#g^pMDfW*+XZ$WcxbAOoFCxZ$Zm&Tm;K?=9bAEeU8;!ojd&BPSG2yjrYX|YSZrVMF zvzEv1Z>Qw zg!P&*F*0I{`vf;?FJW2Dc*kBfO$2&}`CfL=gU7$9vG0}@-7uwx?da<(>bA3}lrl26 z6`qhF0|FbUJKJ6zp$M$8yMp1RvAtVcSV}V`>&Zw=&z|#^l#o#Wf=%?Xb1OO7 zhCgB0b8kd%1wtNd?IE1`&;q+Tmdo6vD~%8+V8I6*WKb=KeS9%(%^h`k6g=gN$)@9C zc+Fnp#-TqEw#>vdm$rp>qLYCkyRM!B90%tToT-6ia+)8eOY*43g6IqBNn&%b^*7je z2sJ>(RtLFpyeNcQ>J~&dZ?=8JQI(KLjRY(#9gcj9{#+lx6#Gmt+g)-CgZbFiS!t9< zt5Q!KYKF?ooiw=&a;zlqkI-W{X!=T=d5ps8$ z-;cL%{|N1o>m9Kb77)9)CHnr?Kqx|^WN?(tFNjHd2xSy-$@d`4a}i7&b9MS>)CmhS zpP!w-bm;KhvYmR#c1;%F{?W~wfp?W~z=My{2LP2br%qz~J%Y71$(d90{oig~qLaPN z&wr>ZCkJkh*vIWwd!8gxr?TCiCVW#`c(w4~S08za;6PKCVaGnLVYok61FDiGLx?t8 z|GW(QJC*P9pAXVxxq?F%Z*$VAlq$XJA?6qj?ZaC{6J!3~82E=X@T4vYN%*a&ur0!v z^;;D93X%REzTdx0MgL3zh8nhYxAKoOiRId?!b!w9u#c{dmLEXPDoiA}0+;~vgZWEU z@!$XBts}N{1p1qrXV8O|-)Q z&!>7_q7K7Hj*WeG#~QbSf^)yL?IY9@$cdmi9RzhNaaLhFF$!JSL$S5p_sv1<%`hCj z-I2+wzZ^cuN4$Fq-e!jg#?=&2av$ST0$mB@xZ`;3yGds<3FMvkd4G&SK=IJk8o` zMDX-0&UP1z=Z~LkIPV=sycs5GFTZ_slxci}o4OsDY}nv_M8xZOnG#{vvdi>ms>?@` zbDGEJ4XLN~wyo{u={FGZJ1svjFu08aUB>TRC3p*i?UdCqcZ-K73jTB0%2{;?%4QVJ zF+TUbVY`4dD!8jyBoV~@&#~wFdKG?XqCL2SvqXi!=;F5DxNs)da)_HBL@6njNBbL3 z68IxSDR_PXZYkA)JKnuzm9~*@(`z!<9!o5ypTkaVWiwE# z3m&c4v?dD0@!8M%<-3+^ld)=3?(VFXu5F_C#(2zI%PH&+jAvJt+Xw5Nz5OnPitR~QAkLkbik$W+&Gx75zgH29uTQbB zH=Zw2IZMdZfhv2tYChn4B2$x~cu2iM=g+JPe`M5YuzKhpe>iL?apZa`UD%o2zK=DN z>$24=NJUpmmRMkOuX{KwB1$NST>28YjBa zLf!1#*Np6Hwq5)fIG1O~FvrfS7%aU3w1!dRn;W3-%`^F#{1la`md6W<8S#%>fQQw9 zhNfn=$YS6gV6u=&ow)v|8(e3OjtI8*?0|8S4B&7H_Nw3`JW}dB_fA3fBbXhRVUAIH z=u%Ath^@LlJy*R-)jPFFkaE1vebsL`eqA`D^bnl>N=k-eNMo6EV+S&{u zPK|dOG>x~ZJ_`Xu0+fJ;W6>VRrA*>r9mgIe;HN{NcVc)%L=D(U1Gp-sibyjJph7^{ zfyIzu)Uk?zT(s8}R_#`3C?M7V$P+sVr3`BV&`HZ)=$$@3%+c1%(9k+g8J3Fu=dwaO zijYM<1QY`n9pFv9{DHb`jj*w`k*a7`f!3?Bz1;{J=x}KViu&ShwDCByHpL^CRJg5G zm5obHubs*$yP+G4r@`&fA=xKVLyAb0#N4>DzH$_Gg4eaX?T5^2byJGfWUzn{`(zk% zA?8~UJq`o`rumR&fyJOAVE?yGal{Tzi zvzbH4YQp-Z%J+S1LY-?1H|9=hQ?481IA!+NinpI_EuZy&iuyixWxddk$>sZ$Q3O6M z0X3!xx;m0%=*QggqvWedGA}J_A>ne4R#!(vl3I(NM6BbQWP|8v`#Sqvx#@_#n4qA& zbsL}M0eL+o?iFhDP|v-YP+}_;mVjrTnO^nVVWc zQmx((fA)I3<@m+ByVynF#$A7VG-~ddw3ZU9QTVad~Ic+PqmvhZb8s@D8cZC!x=s_wtDu^UipF{v-9m`V6;= z3BSXdrO>e*g{F(l{DRF`58Eo9Y7seg-02o15e;JdQ?HP5Xx^;+2xjz513`e~P?oTl z<~ht4&z5BVEREANMpY^TnTxY>#u{^UPXcWFYVdP3ZfxHwtPJ1^UDEgyH%t#P3y4iv zeQAjZ58Qe)`FvpiB-IC>lr z5g{IylAKJ>%1X>kVVO5j&1ifgyKM6j6mh*Yjl~uq-@bj@rkNof2P@cmuCFid!*w4* ziH!q~>AAVStL_c2seV`vy`P_-?<{5kSNo!N&Nf;kk0Jn;ex3Tp61USxOm^Rn+_cke z$JN2^h*@fIe1k5`2;^llt0`^I7z|{uHmeX<)HLmBHP#K(EKsKM5gvje_ zKf`8NSq#S7U#O#O>m=DXH(L5j*`sKV^MfYi(HN5h+q6g%ZNWX+N%^5}H-v(vRvA%> zVCDFixqB{)#SYZJLKb}YqMED@jF|o7wi>VU`1KRLeDn4`>J=xqXGOr~Mb_|dt8OTa z(}|-Q^#=my>NQWxm7RXp6P%}Il~Ll{cJF}*(JwbnkKmygzSEqVyHIqrNBG6}_lzo~&zpA)AOp1BGjJRnrWFX`utzntM>Yt#Yw@P8Nbk(E_ zT2_Sbr2mP58+{0tVjo%AK2$vuub~Nr;$M(Ml-PzOfU^vgIqIA5Pu;DT6(In@L6%-WUtj0!*wY3sBpga$PmJ{c{GdV8z zheIuGb8|Du9!_R{omzBS8I)94SC2@NPRY0Fzk3t)1F%YG`X8Stn5d4Yg)S&DTlUo(t3GAHWKPd&(13?pR60&9hZG&y0L!c182@v z?^>(gblV{ZYnoo|_&j{?Ow3)q*~Y*a*V$XkM-wWQgE{;6Ex2uXGaIUOSD2`@?CA`M zUNBs#e))2}@8Qncj7HDsNNYs+XL6a6XN#ey!30(`6DhlvkZn zQt8M?#1eGZfX9Ox8A>-6>zroV!lF~T)nkP!h8(u@#c(HqNDk0@6J>099&8z~&g&zY z!XPSc-<)ZwX@^+frs+=9I&{cQ@IQaWu8vg2Z_sUTRYpn*57r194y+Jt`yF=udfmdl zS0IgE5Ql`u^~iQ9VvIU05Gb0GdPiukZ@j-g;n;5960oLrAhVTduJrSpB}3!T;B)lL zMz7zw(QDlRycGy>zy0SFF%;dmst{9zBZFMIe=rEFgVGy`Sh)Z>Ks>O zWYNqgt_bP4k${Q9i%QnvbeYU0T9mr}txq|+vj`I8%62SP`w#5;Ret@X;(hb+mR=QY z(JVsuXJGHW#y!mwEo3Xi5>po^j<1ZjP)2WXd2IOXC`FEFySoe*+{|Ig{NAi!NI!hR z@@>}jf<;JXc~~T}<m5e6vJgsh840wKmF9SGGBJnx(ihhrsh zXG?j#fyer+(y#YXhhmR;qexYgeq{}h752^h)H<8FSF+!uBe><M-pp7s@d9v;HtNlPX;MsX%Y%RnEi3$VXyPPRnRlx;SB-np(M4t4?!+!kVTz zRUKD>33Okk;YS@c-4)#U{F81*s*gqY9t1|a*eIf8vOK@$4{wbMnrM6)EEJD*>_FwH z-PmFs$lcW7J?_Sn34IXdxFV2K>DeLrklo|Rx@dkdwl8r2SC;Cuvq)EN#fed`pMw~e zWk;KO|6Z5BQERAS;Q$|Uk34=1_3GMOoc>6Oqh7-mO`8+&Bidj5m9zwFIfgiB(V=Hy zNdta>>#J3U%9ipP5~vnE_v2k~{lEfPfOXn~Ix(=F_$u3n`q9eB*L<^Au@M6_eR=lm z8P|KSY~jEg;sLqP$_R=MBtWr+mGm{yB@yI<=}%mjtUxIE{PgluS8k+hHN%g(F{+=U zTIEIQbnk**!&ys(;*F07AE&ylpbT`ThaYt*JWb&bERdyxOoYZy0k3I1)sxdp>l-pTzl3iwU&bLW*XP_y zFKch$k)^X2+fUb=63Cy~4y0SJ8^O%hJY=;!nhv)(FtYqCbg#E42B#Tl1Lc9>Q)L#DUbitET z*@a)7Y1~d*72}L15VfD-wNl+LpL$(*$jq4gKIM2`WU0>6UNz>JRYW43*|I)UmwBUXnPLkBrAH$2rVIs%8UFmWn>;65QMjhHw zEs_8Snp>g(f7?fmVMgaSB&RzKTGaCeqrIMNMR{I^d>Xe!yZptf^xj#4!rg{Q1A?pF zpS?6!KfjMxj@U0V#5bfqk&{%->{{z+;mcHEI~BFeSWR%%5wCr;THXUyc00AFoW*uw z^=9+#`wLNn1wW@~Y`p5;6xOJ}EoOLpi{eD%z^N}{{8&mDOm){+xVUKPOji%5 z*O$H6=-somKXQ;sX~Cg|6|v0|epcnNJb~TWZKXPYf-Lw=lr65Gb?Wbp`umS5_x}SV z!r}{J*c>+KD7&|!@LgmVlDEI>zknyx$asABlW_H$LlCH86FyzG2RYj1WRa~<>D=+Zp9ptWu?3+`b1-8YbH#Ep=d={R5z{RxpZwtcTnc2y&L5k>K& ztO#m;YW9_j@`sM5c8Hlz!lP7}%T45_@f-cM@1rNzR(I%b6yJ(>f4G03)*)sg7>}r1 z6G&=U^Hz|q+DJdHnEDrDqcl}K@f%@dR{QQ3NaGbfkY@~42=UiepkK_a4(9L*0KFE2 zKVAaN`F41C*eJBvS)T>dT9z=PHnhLG5EjF?Igz`5eNr44q^hf`1h=BTY ze%m_5Ws5=ou{@EwWg)lgmxT8@q?7C|@@CgqnbHkZo|X5xZ0=1kv6%PrMn1XpicyZ^ zs^5`je|e3YW`bTf%_yGlo48A(#*Qi&@_KC*XZPZc#T14q9tle{swJ-PZ{P>lzMOvN z!*A%^&(F_oC_$EvDDalUvJI~df81_{X!~2j*7C)qceAAWcm&$>&I0a;0e=DZSh@7F z`hgD3x?yu|-5m?)#Cf)}flM!^D-!yPbh+4@HZ*tI#9^AB^CoB8KUziP zZO6#59pSw`gP$(MnHaHJ@NH^y{0chMG`4sTytn}}wToj6jV!_8ejIDNNh z#j(}9=`2r@($w*=Qk?7BFXMrPNZk8<6Q1N8TSga4X?YHJ2J@*#F;`k$Nwv|GCEI>f z8sh=>l}jh$s5}hWteI=?RI(1RnWie0m*$+2U9;G+@A9>o;Od>qoUv&+J#M4^Sg`P6 z!C@BXnvBaL3q3=?%fiZc($#Yg3lBRpXznIjhlc+;Ys6x;zfbthj$IHH6^rh0Bdgf2 zJeMY|eQNcy6fzaN?teezMM<$Ni1cURTf%@Vcn_^`uyhx;(;nbiaos8wk50y13V@+Y zX&^^0uzjEaY@t6xF1;mJ7-ze5`gXh9;iKn+bh_7!LWTb?`Q&-=mB&d59f+i8>1Os3 zUh6}`csq=w%lXF?g3?`V?6{;;c)rmA4|AVW-zoOLyWd42AyLY^MH7{0xzM4vhgar> z4I18R`M-*><(Lyj5DvXRZVl|K=pWx8tWg&`NZT4edIDQpm(en@sj)DwXPAG7m;Y&G zWvg}NOu?XAYE9J8_{W9L^1_|{7E4r3pYDa#(rfi2IOln4^z(EN`X;8rD!NPNGHrV` z&01M?nJLhEWFGl7UuBOSoNDT=KRafats$xnt~v{Ote23oh}fOCQ0*$J_!sl8P5upr z-U5#J7W8I?K43R*-C}Qxt+EjR!N=&|dRF{kvC5hEpyK=hFM{-}=|k*Z(#a(qI^Xc@ z*5S9KI{)P%wWsDH*+ukH!d~R=JhPiMNU~?~5j19~|H#ldO%ojlN?fRY=#{u`&u zuZqtlx|~mbUY%oGJkht4Yeb$(gKsKyO&Bvw$;6hR9Uir}h~GeDxsQ&N(mA(kvSf?7 zq&JjSzxVUq@yJi*q4~#pbIQ>uykUO4!5_W|F9S5g-&u6?CSeMeJDNT-y@zsL$|}4n z;w9*Ls7hM0R)J!{2CBfq2v&}8l|z06doWHeHa*g>qiyxBa*q;fp)nr)HSVRk@eG8Q zl;OQO^dEz@KDZ1~55Ht*ranFi>lyb=f17%HvCQ7C-b!Q#^ODHp#>0-cxdq99#_z2t zT4T5=6tfGc2zo1!t6a`vwkMtxcOGsHhd?}^c_)F`V9jc&gIh^CMfMTjx@S+_omB)S z$-oiXEAAJfr>}lBS6krU1Im>5cNA=Qijk4AdTHm^NAbYUV(2sLv^F|2Exh+X@({6; zYGFmPmK&t5^i9l}Td~cLhBtr3HYK-N)vV*=tdqG`4<--QoltHhQ2tE8?IbzxF_~p- z-_RaaQ}OooYNZbW)!FmhRa_X{5aA#1`6}klsTPt~hPFLv6R=&!~60$|^UoWF6 zE#F-)vXs=;Dnjb=Up`luT2gPF{arp<05_YTs-Eygwb1%eAls70gC}MO8~>iY>Kwnh ztsn9MdgFVj4HO#3rCb}LC13#{-31m407N~=#7><&0Tl%`CnxH`N7+*Fr`|FGMO_AAf#ZP}Y4#y}CH_!>Q{(kk&arabK==?|SP41|hZjwXKzUi?>0J`e9S{ z&9c*fc5WEv3+e|#b%SQ zS7lVz1;b^oHm^daio!9@@=hf0mG2b#b%VCTXvZ3*XkI*5?%G-KDBn7eb1898Cr@lv z=eyoaq0P}3!dP|R9xg8><6Z2zJ<`EhHdd(fV^^Bmohsh;hD3?FQH9cJ;y0Q+g<9C~ zMW_Jx?ww!5PS^<&E*|;k>@ha+I}p<1vS+8NvOCt&M+vDyV7cE`64OhC2* zl$PZ$)q|x1h3^?btLcW{ZL1jJ5C~*WS!Sl6NNLLP8&PoeaP1%>Jm(!a?8G9ev8A^0 zOKcRg>bguMkS@&2vK$MW$iV&r@zGHev4=-j#*dB%=TBFPuU&0BV#wLr2<>W$zfd7+ z<&jzKmQ)0IeBInyMVhex{w8I0dENpg`?o+o#cTTV(s(u-%C= z0jhWso8jDb_XaBK9SucYW@+@#xXxQO=RQz;&ECiu%sanJ%2g)k{n*-{$=Z5YXy*%a zW|mvFo4feXq5vIi7=ZzUVnd5_U#a{0dOMGQZ=^(A1(`}Mz6%Wmg8f?%g?>fd1)e5m`ljM%O!p7|ZM2j2Y?KijEhP1{P7ZNEy9%+7(&;}J zck}nfZI-H|M&%(L!SbU-yNao#UkMOcN;ZI4gwFwcb5q%6`Ax6-;!!oA);eZ<@Q$l9nN-VcGoBqUcK?k;|ZEVhcoFeIBx$#LS3YrFKFHVhY$c4v; z*OQ0ISiEGQ-k?Te6Z?jw!A(zzv>++~z1Iuiv=cWsH~)VN^?ijx($tEt+7+S; z^c7bR1fVGS+O~Ek9gS1g%_4mUNfAtxjGEffkHag@F9WE`3`}zOd=AQNvUH?E~_$vyy*(bLXg8`aG2R9HeYx_ z%gY#W9#5}>Wy!-%+RQIH;KWRY+pG>zsq=l!leL9kP$jw-oGg87Z`()*7P>yF)WEZh zh}(@PN*%^`cNTv%hwkXmAtGs`7tT4BvH%o*5ux9^R#h-#->4;9fu^5DsI&$r;X)7S zFi%u78-ed&4JCnMGCRiJhQ|=hlT4xAc>`cm%>o)Yx6m z*-HdSpIg;cro^FhsiiC?os=|ovL8H+uJL5L#ZxhHv!iR>d1r$acriUqICtH>PMo)L zr{d7Hs|MqFA+G8a}>NU2&7QJg=uihPc|lPcHNAUPYuK&0HEr0>h6 z?)O;21;U-_%=By=-G#iZgSAUr52(6*MS0>yo7zv7`F|ozOfij)KGDU02Ft8{(4&f) zCi^}vXMKTWWwIN;{`+i}ba^<3y!33VtoyxKVI8=oru~=3iwT}G2KQZ_Ns=H%yvbVpA;Q-Tp-my zT^B_dxTo#^*ed(*T4iZYW!GA@_s2JJxY@zETo=3@Gse3@$89Pj)2q$pD%*&aTSatF z*DEE34K((I_hr2x&dc<6U%tQZ!2PO}n)XS-i={msU!+XF*zC7H(Ht*4)hlQI(E<>z zm6~1EG~QVoCFFfCDE2Z%D9>xL1|huf-W)P zXIvoOW6a+o9vSTS<2F`?Gas8GwLSHj1g&~JS*?z%i=(1>`4$cG6;`V_ z=G`*KVM%nSv;DG@X+3;O<4R5qJuw?FV+4tAED3?z&kx7l#aA>c)E-aF3f`+Hyip*2 zGxc_-@U2jpq@0gswT}%lOGl59S~!SevR|)Xu&Y#EvQ`tdRAS*dDxvPZ>$^R5Jxh8j zvBvvGt{2zQ(WQ5*d5q48rfk;{wW&;Cg~ZieAmQ3m8B0X?lai_6RWj>se?mpYuOgC_ z{0VxCe@3b62ozQi^NjbhccxJtZQWYxIP4w{%XyE}9}zfJD!p;3Duc;wy&YfVsW!FA z`mjum!NOY$YENCfu47vv%fa4fhS<|OOL+%J^#&Gb3!?n`N8R=jdL}nQL6|EDvp{St zgw>|Dyl0kKTuRuGdxqtw>JTVz*kknY2u4en)sA_$-s9~}1&39GC~EhZ@c&A0*|l4i z6cU@~cvz`n)Wmub@BQ*gXO?diTc^9B3I{U3s^{*GWo6ypi52)*gp;Oef_7c4v%#E4 z?coc*!Ospf49RSsjCL{48SzHWI!2N<QQn1=J#D7ODedX zq}x2-m}~27)n(c?(>EldtzX~k{pFaL?T$HJ}dtoi7_#}V6 zVc#P2yJ<3YrK0@LnYGNUSuuA9m$?a$q=&8{)lukBD`O>lGV8n-qshUQJe+@1L5`zU z_WV(eq~C;;s^Hkyub#`?yl9W2@khMJicHh59{)HwhSKp~jHXe*YY#(LZIvS= zI2wn)Rz{#Rr~a4$*W&2QuKCjWcjq_13bqHPGWys^7`S}Ul>dQvxj4(nU-G)Nu#yV`la)&Tj)2zrzqnw-W!PTW}_WI@1doIS3lF-}&p#tRr~iK&=c2oxgwo z4oaKZv^XoXU;l4%>K#rlpe{Ju*M#2e%UTliO3nJ|O=l0bF0+T(1Hm4Fh$;4UZ4ir-{amqMa4Ean)>sk4Jj-#^syBxf0-(x-F|KX_CZO6PwDq@XSQ112q7u1|9bkGqvq0Ic8;#z#Yq;1>J#xgj`S!VW=>MVZEu*Sz)4yRtPy~_g5Kuy;yHTX1Te`cE zlGvaqARvecD5;>Jba$tMbax9#OZRi^TkrqO6Cd6$?^@%`=n`RH*LfcK3zrAD+K!(3 ztRL<1A~UY^eIy#b;Av-^|8GXdp_6; zOPQNDKXuBKblh+(wf}B%u;I(aeG+v1`{>l%BIk2LBN6m4Huj@`$fF}O1-GIw0ub6%L zjVr^G=+PUPMf6SYU{&?ve^ynp|JSN&|69$uSh;x!@c?F^^TQG!%2at}0937>7t=WJ zr5uzQz&^?c)AsU?X>u2%W){nqX>wMEMOlZ$MRhW(3YS*$Nb zBbHiOz87C=+8Wk1VRbEsD@Q!*^g8qNHebT4?(9H^yY- z2BC-JdgPt&WE@{q&&nrMS-Da`%v)7UOB$>fC~I(%yY+@lcdWw$hLvYyMdpM`8;GaN zx8*&EO)$fS(5;+ISZe(Y3HsjrT1#{D#JBK~50Q5mU*5hhP-psW9M$0sUBL#+z*pzU zy}{2Jnl2M${!}Q#Qy7xn`LX63BcFfr8eU)fQE;3$(SK-s6*G{yV?eiKkRqtz;(XJn z;$W((F*nT1=TbvCk70*k$_1i~JRo4Yp7u*cxV;~hhcG_dWxgAXL!E@(N{%0`=v}u) z>^`PX1z&=KR{`UhLdK96n_N(N+ID=&F$!?Z@^Q2q&p`>-FnXb)V|m)XBURaTt8Nor4)t zhgKQrQ*AZWPJC&6Yt>LDHpu81(pIgfFTC%)o|`+ZJdfrLvxy&}JQ2SVHLdcbB4usu z3;bI2*rNmvEsIhoh~2XO$bm$^u$d%w#_IrU6_7CsZ ztZy`v!OVHjkg#9u>AU^CgMN*I?ICUTTa3J{0*=#V=*~~A;ip@No zp~i(cb<;RE*Y}#&8{)=$uhDSH{f`X0e2D{^k?;y-)>dlBWzS99%;hL8S z4RD<#>6{nm-B4b7XHDneO7y%zX+!cD?)>agAxEhk4H@%*@Ul({~L4Fft~$;&-Ju*$4exf z%mowF7;o{Gh`xM@)!fno^~%RgV?k>uK|l~;_m?|QN8AO91Q3*T>du!!>*5qI3^^17 zCF+4Fqj~E4RUfm~#p#8nL8igU1#&n1ZT8PnJ7g=Pj(UxpN72ElS@&dj{W|xvf~5Q> zzXkS3Xxx4c70pplUZN!T8hJKiKUCO@n_=K|{xHj=Xq1_SrG6#+CVe9$PmPxIir$a6 z-rRYg{?z0Wj6~_VxH3)sr*80#pKoT2-KUJqD4*IS2ZNRA&9v<7ckkZu2~fqHVPIim ztzJmXhIKDtVerMs-y65z&lkHPYh$CDbjGuPQuRx$1>r9kg#Tq9mC&bVeK2nHoE1?^ z7R1}uo7N4KBFp}Y$1*5@Xe37KxECFz>TXB0y6N}edGwDTwL|Y25$uP)i;%%vIMCeI zvuT?L9)GRO6U5Wis&#d|8i?%W7F0g}88+&^*uwL=Pz#rCqZ1hJ zx_PVe-Fj6T3wzoA&aOAkohyzXWcfd;WcozzvkE+p3Iq-;A?a|0M7_)nyf643BkNuH zkE5a-+uI*XP8(_6atJWyz{|XWtM`ohOQo-^`{&OuE}@<0bDkoMzNqnYaZ$u|kZ76c z`$Y^MolWK6`huF8HeWElu8H6q2?=+b--{g@*Ot`G@$wKzEZ65Wk=LUPq7BI`Iy+F| z{X`?|fd=+hm$J7+85yPN@{1{8HU>17*`1@1P}k_F-Z+Sc9{WP=?bX=^RT1IgwGSQ) zT!=mz@1}TBi@YMP<$stg^uaapPOX@d_aaA4Zk+xWn=0AyTAlP7%04F!xMIaqpt_!= zi7_-LJoVdDWJ(mJxkcu2H#(tUX2yME!BdPtMhMCDjh*Lr%`XljO%At@!g~sTuxaCf zSWIz7F4S$a&T|JRnFqcB^X=Q$o;7TpmHX8a(9`P+P;~)`Bf;_my+Y1`&Wl$|t}PmC z2X>}qLWpxcZIa!lD=w2dH514HU^{RZ?cF_eqbHn(Ms&uS?MZH~jBQlIl1`@&vmlwVu#Czp zC{V&{D#-NAK}pZHH)4K=gNYpyp;YhaIpfhbw->9XpQx;_PkOPRg_fATgiZg!hU)zDBH z{uNbKthMntd$Q`E?Zw8IPxw@Jo8fm zo(_;yf?g7`1M_S6KJidYOg*1oleP?=4a>?(&C{P3XUa-SXwQo6;-2@f;OxO3;T}Bd z-#Z6Axdv*{tzT(SRy|Q6km3JNEluOI#tA4dStX^Pz_eWoW3QbFqIA5$)u)^K7#jcT z<6l{zHd6@hJhkV?c8--R=_*ys+MlXTjqZH?UXlst@7z&llSk@B|FruSW=UB zsHu~34+PR&#&YT|EIB0VnbUq~hYV{!z{_&Yl`q@Z_vn;0HA_PjE#3U>lZErt|4+8^3yvC8<*WBwl$Z1FjmF=JXgBqxSGJ>{6{{H%DbUbNb9iDD>@)aS zK}hsPUtyZg@g(~t976vOZ|Bt5J*o(+I&}(uWIrX}HH#lYG>q80Darr*DgNr?W8L4I zyV(yA{)2(j)4!5IIc>V}={}2?%864ak0K~6DghG=sk0Vm=q)96i=)^nML|IU=buX_ z%bR(QBpOCFz2JkA1K;NJ?IxrA*y^xXuRa$KWFoF!LbIQ$eh(;T6mZ}|GMYe+P;JJU-_o;Gzl2+T$!(M>p_{!LkfA7F>>3GJ?B+ zI97i(I78w6eK-?FetVkB)!$}Q1)b_K)5guHc&|QV?0w30{8^~s<0dM;%~@s;EG{w> z6Qm(XBMk+Ayes&Tbi(%j{m|uR`25ZSRLDkM`|JQ0qv4-y_2n;5`mIeoJSjaOCUaZ< zc$Z%aW@~^S0L!k{MOUayMrwTc!K3c7O(}h6e>T6dgGWe+>fs%F8JvC|I}KFIpa%5o zBK2*w{tEDBp^Bi>(Xm5<^=XvJgG`v03BG=q(f(%x<^Cu1e*`8KFqYxiB>JBpLCBon z^G4zqV0OU8#dQb_L*TYHHr@wBEa?~na$l*NH`ZtbblQ_MB$L_~s5l@C;5wv}_N~iM z24+PT7KShCU7OXn{-Ia~WBpn`C*opl)Fm{kU za!}9ea%rQM@%BEx_a3F5cYysjSM(3ILFXg;F(fAZAC^VExdy^7>Rcc7Qx?$EE2`@q>OEQ8^FB(sIHCogHAR37lRK}u zWb4%>!;|zCONkAPWk^O`AYqnKlJAkDH~2pn-BU?zGMbZ-7ygs)D~c>{onT%vy&~M6 zJA)R&d?VG}W_7r7$h3rZ;&$m^ciZ_cLoThJ-D+comcE4EX-E#X+5;?lyBlkGL(gx~ zJ=FbiI2v19tmXM>;Up!RCkyyU zCI>L=peHvQ-(S}NA5(&VU8uWE-~40o^^fpImt{qex6^yet9i_42D`6>6Q%` zXthGK%Y!mjXqN-;$)WwV!)n%%CO~?3{qwcWPcln%>Y-;I8`DLcts1H8>OKVU=t_Kl zjDfJ{!~c9kwLjmmS;wNI?GAH%$(N>FqW3=fzVJ*rp$4Gt_65M}>pGbZN_cMG&nkkSX}^!8(O zOyYC>K2z2O5#1|`fpXkBha)Erl|lV0=16%1h#*WcqDKqSNZPuCZJA&S;+Q3xQSoXW z6i+?5>nZX`CqyJS8e~5v%rD>(2<*_m$H=y!0P#ZPmxI+Ihu4-dRRkIAESuzv4Tf)F z-TkX)Vuh+M^T#c$e;%mi44|_M4HJ?W2lV*E7IwtbQG3i8WZ{5xIfSu6^!H|nj#JCN z2E(H@dC$%kgIy{`1lF_)`=Y;fDuE|*KZ)nhp5&2eGrerW-OD5AI{3*HRtdSY2Pa4! zq%U8;3VyX;?)eBb+p@2dKbb?HeK$7+ybaQ#SBB=vCxdZuZ7M<6=OB533z8_Xl8@-M zUR!<*Kd(el5hB6c2rjG>Vuv!>|NM!xp)cn&tHDDZ8}f(R7*eX>-iea2XNB>vXa{Tb zC8WtyuN*x1S0L?j20!4c1S7p~5O+AUH6Nxx$ z9@uPgag!E{Z>dEZKf@Y8kZtLsu0}9%QB$A!k-O9M@LC0;?7CADW%OqU@kUrF!pUr` zOLS~Ul4|UAILH=2&IR0CVcE_}revrfHS7*MN1z01mHUG59#Q}q9%~A4m2*b-{lsW5 zU#EGXsmaC7omO7-4dND%lVp{Zp%;=N^7k2q04$agspnYn!gK0LM(6t5n3$;8SVk;! zy64t1thfA?6N!Ci>QNzhHz4)OZ>33Z?+u4VApA98A=+loCy(~;y%Ku;M_$EEt?f-6 zN@gbTnFITen!-mR=2PLyL@zl``k^?fWO(N3gMrgMoJZ??-mtQz92l)Ukw`mE5Iatu zPH;5&-oMk>X8G!rwR^QCTt#R8!6-|3f(|e={r^;KW zLcQ(7Bplr5R@HB}ACrh5sSccNgU$>-Ev+)Ji> zhm_0tUO@y%y|c%V3neu*A=pr;o&cLyhSrZLxal4cDdc2jjX`1ozs(8c?m)F21@3x- zIhV_@fv1$OKS)dC(27w7AWa_NJHuI|aXEoMM6M;V2pNH)s|~=qQ4o|$JMsDTHG@7v z26WWSm>7M)tOT@yOkunLZDK6aw)fGaZ^#=p7sl=d5K37Eg-gbeop}V3Ap5Bs8AVQS zZq8KU|A|>4I|(=cQJUJ3kdnfPiHX@=9cE8G9^cn`9hDr;^dV^%j)vvjG-?Shfw)>-=wr^JhMKs;rabdLQxa=P~@?Q-%0{|{_pM=DZ zzAJkHNJLgq(ZRKD0i^b>)J=B}8~JyZO(Qz|4$e5#b%xz}6wMafV{phg0+7gf5bvY$ znt2*xrlA=FjO(bDFwz}J?`mLa0hc(z*&am152mlLKhalJZ2;%*isjTWX3)m}at7)m z|9k~_tZ>;4-AxAdqAh}vsOCHyil-7ERL|0nzY2DX?BDJ9aN%s=XZ@Xwp1a*NW+(k-_WoLRwXcr@2Vae`8&nc^k{UPfz6^gS7PS_2^n+r1 zJmka}#2yZP#gHQG5W2_(UCq5%*36JBR?s+lf~2rRn2%l`7#b>cp1lw0;K!!_#YuFm zY}dDMGvHLl9h$lY8tTbOU|@KxG3V52;5pd>^uZ?>b*5AQVgXh@$1V3~ zTnAE-|A0etYwPbuZ1#N6xi~)_t^-0F3EK%MzAFn#L#~>h1j04a4RKbd-XGF4%*B0R zVF3>oebAhqj}2_a3C?N0Fkj7-YXO4;z+msuUcves;jR7ut34A4f z3Mr{s)BNt%h3MIcoJEPd(wwP16E*ugq3}2CLxon`j2>*v(wxjh`us!zm43mfvTIRS z158z9my_?09*l2SRe#z4d9Z-ttTlxCx<~ZZfRdwa^8uO)3}qz_HYQQOb#}h`eG^kbKcx;1Cl|?c{^#FR~%a%)AYeLeIV`HT4;INvP}SAa!NF zfBPoU&m%&kT;2t*q!anHLFz>Uv=PClQ_oTG$=9`?YvW~+06ro)AaS%iwR#AGGnj#> zMWZ-)$(%UO5b?oLUHCQ+JjW$bNcBh|BD=hrSh!|sHWZqgnn;&jqz8p+oPJ~OWEB!r zM<+(JSMG_=GwOk|9iaqO<*PQ8H=O}&Q;;M+m=g>dkf(|m>5M-1In}6PzKg=(C&9A48>}h zC^{F%pJL=QwV{dMd}G&vE8{K>FG_+L>ncyYAjQ`^x#iE2ggpyQnlNdAk_KD&2HNwj zU}sII~I%I3-=Jw3{8cLw*&bt`jo6>aU|DHANZ z<|ng2;lka-z8IfAFYPs~M|y%ly1>#ldev)v^Q$~5Fo>6<66CvSM8B<$M$=SOKqiQR zW*c5Y{v+LsDYm~=)!c5>KMG$Dm_DrvK@w2B}kxmGt#t5o?! zgoa3}!L*;Ffk~S7eNAIdisgO=!j$3u<@ksJuS!F;`s0JE z+XIzkM>EaNI{P8M1eKpvPP_O&?RR{1RVQ~piR4(Ax`VnUSd;DViG{N93ljxm3SPkJ z_rUKJ<)oO=QM^v43C$B4p;qiKGsTfQ7YdAo$d3smYEXKqQmufg5_^=s0ktmT7$l@F z8$+pLZeg*F3=?3W2%Fs|C~Hs~xl&fx317%kfOCo}&qHDUmugAQ=k` z$b0VYyccKF7u2e!;ODRg21$@-lIkwbHdPIT2{Ojck@{9a*C@wCb4#6lCIikwVRl+~(LrR! zr@ZE`!PpE6_yu?>p#tYn;6I1&dNEUbvJqyxvLc&gdjgGfT5)mlPrYiy_}1>E%bOBR7FL;YeUDtYAHuDyN@R#QvVfEr zF}%9?InMAV3(I>X?``^Q!xI4eY$Ak*%KpcM>KRIl@piehwEh>Dfmhw!!Z(F|cP1Hasi)|X{dlD|JQa{Ch`0ecbHCR-@z%YWGn+CPs_N6eZOEQrR$G54% z$$_E4zO)mJFN}olCGO=Sj=o~-2!E78^G0(n6AlUvW=z<3IYx#_YQkd2JUS}&i5kJ8 z+nqdt=LgFQ2u)3JqG&TIODR}oH>wDAGN=nsi(xjgwQX(RgZpsX3rbx;KE5pgi9;d2=a{&L8ArvXj(#vfsh5d z#GBPSxX3ZrP__qea*qG z4*1kO6v6^(&^S@%wNdX0CJg1I9|W{cw|T_bOZ~=yHs51Jzgqf_Ug+G9hs&{jKOg5{ z;bvp?GDS2EE6Ik~PEQcFw6zh^(oUOFr}cirAl2_l-|tV5qp$~ls?SD6FtjBEl$0GX z73=xp3qN9KDLE;r3}BO+Ho*}Q==-*%2>I=d&!2gph{Ug!eX7{+wp7wa*DG_PV_-!pC%JTgIA>a7JLsC*>KJj6&S$r2 zvCyHM`JmNw6pjm{6wVJuMy^$!;trg}E%7B%1H=5_oJA-W8=W+1i{>@|vvc+!I8q}m z^R|o3xU+z%(nAy(b#dKAV&a=jCF|GSyNDG<#WPOcJ9d`7A@Eq6XvwFO%pcV?G(CUC zKQ)u=v(d2bu~VOikeU{&f!yC?CXjh z&Y-&%7Q)$Hu0*sF)<^lCCBHJ7?+x#L@5Z+u^QStW8<73e*Uk~W@3eHRJ+cIdwr<`; z>pn=+J_Kos3J~N0n-hDDsXG4)%kCtw3`_C-Etj>P^6)gn`|NQH2~MH=iREvfeQ8cI zmeB%un-8GG%FHz7iRU20dH$PTC_bH5@sqW6d{FmN+tQwC!B}t3)p1Y9G*3hdxA%3? z$B)TfRheh2d^Qb9SkHr=o%&Kme7oqOCmV37Fc@+0zzK?umgtO$tn@NZ7<(*a@F*vi zBzh653SPTmF2uwm*N6mF#Oc(SE{E*a@|%seex7S()RUZ+l3!B)eSXeOUcCJUcedo^CuU}^&V;TdY4AMBJV*jH8Qw(I23&Wh|me$lv z#%e<+FU=Ga1XV?azMn;DSCcj%#W(Sk4)-!BF|pMylX{1 z?Lo!7s1Pnm*yrA(Ua{`BjnVaOV?m*L%|98}W#jcf374*!rEq*5YT1M0l9Ysm4hZnW z_7&5cPNjF>JQ(=e>Yq6*uDw2Ha9`J!1T!sa(#ONBY`owR>&MSGUdhDy@-a(%x$Y+V z7G;P>TvtL;v(u}FM#PX`D=en+Sa&~m{iB}J0d7Ugz}ud+Q_KuPFmXGfMKqV+VPj(D zB@W1J3O<_4dU0Au8@z9weDK}3(!JAUX+v~&u{@9Jv6R=*_)xp}*&6rp{)Qq{hV+?6 z?uQ%~=iZ#5sUKE_o{0M7KUw8-Joz4}TuGWT9htlONJ;h)mAvs?z1nAx5n@@58rn6p z;FQi3s-``vbtS$f>}gt}>r$b=;IZ1eEYF79?U)GP*0m5^7ko&)RhB|K*vv+MFPd9f z6j*Ht$|x;flZ{qXD6>=-1aX>% z<*&km#5!Ku_TkmINSYUW)~;D4zX{h_@63Z%u)ct^ar9!&D~G`T?{iLZDtg*sz1EGU z1swZ@@=bTRiuczZxSqe9uPiGr9<};dv9#1|Rat_tW!DEn;8Jz|WxhjFOVF|0kzI?nUIVev)+?Mg59f;X;*#hL)yDiSE|X zss7_ZugaT@rN{Qvc@>Ym!`lKI+`=eTw~?jL-ZErX+LJ$!Hul^Jqad{81xk zY;26n}Uz_=M)r%m9pf$txaGhSL`zT!FI+qCyiAzkaFfMt<*F`G@75;2|effP45 zsW(plN7)Pcij6V0wb80Ln1AOch;JoBz)(Qkh%NBDFor9`$%+(n<2 zK*RU_(ILJl27QUH8y-JVlypN_4uI=RU2# zH}_$sR#UzmMH9Ldzlwm5f$XNcw@B6XwA-qcfA&{>AjR6=W<`|p4r&kb^4d=qMC&2k zBoD7!B`R4(`ysWiA>0WFyHBx@{}})#V#4vLa$}l&>?-JNOe<}+OnRL8oyc&X9Oc$- zn2(!E&plNYB6R6!tRmai?94Bjp)Hmz#vG((K5c473y~?Cn<+&>DuBfQ77*(ompf2erDSCFYk5O;V(^uxXNirVqyeJT4 z=!U;{?HV#n9e`UcoJC!c40%&}nFuPAwl0IUi5kj;&eo6Z?b7bYL`23#qXD!8wiq?P zn~L_l7mWgPSE;hUSa0jHVqr#hl3cHe5XCik?E0!=PkxeTI7)z!)RBCa=qQUZX& zv@DY6u{U5QW(<~SC`dtOeV9BW#ZFJVFHWx`chnNhERS5iaAlr5SX&Ex<`mWJ4R<#% zSUw^bv$sAhnzSu3EqFwaDAgrcDd*d(=&#bw+q!*O5R3<}5-lvDOq_EMY72QH9w$8ETQ@;c3&;-Q$K?J?0A2+ z5f$CJQlAJ>7OtpQ`Z}KA$hEO;)6<44$g&DE(A&%Z$51k51w{7I7xFt88 zOPW4VB?4S4TU=Jlq3)aaQN3$4^?p~cj+8&ao`iA!SNS+TqclObIBZQ`UR^O^#@46l zHeS_JS{-`h=z9+!C;3UDhoZ^HvN^aJ+CJnGOJPsEVZSl@a`%+5jrWb!$6HbR>Yt1l z)_XS%^Wwy-k8gttF^$2noPw+bHs+|hR#0ZC&hdRgAsBeH)f=Wc6pM{ z#Yx?zQ?WTyv+c_~IG8UQVal0uo%LYzYlO|slbdBX;|m;}`M@(Eer;NIGwoJXgq?bd za9f!Z7e&D%iZ(C_!@5+gwOPPYz{#mE^oxMcVXCojvySPO;CmBE%;2~}(!W>x>?SI5 z`R1mEREk1Me0}adh^`Kbap?kgewB0zKTcX`*nCX4a!Hja1ITTe23rdJ>uZedW z#8sV2gHe)JH&N|*$T$-^A!g>5uc@7%Z(pds_D3aOS%E{s!-|)?y6>&zBhu+2K;k=7EcMYd&+smEe`A$GYHy+iIbUc%WSi>aI-id856=ub=l|MJfs-;k zT%N-?=9&C@&Amc@B(iJBt{XAnp5z2()H052pu(PJKP?7D%=m;MIC2Ri{6DA~!5SC` z6J0Cp^Wbmm624OCC5=@gy*<2(k)>*s2zJir8{<_eV`(+zBai0h=0Uan9`DzYbw1o! z?dRuSi`KujREc1MX3rN5TDamr{S;t!`sB%zfq{X1mwD;^_37?UcDY!rl+=Xi%nIed zAKqnccJcL3`t>e(#IV>smD~>2KsWH}=eLvJ_BceIgt+M|-&&j;TB@*eWP{Qk^c;zt zCcf2?`Zp;%)vgfH|6R}|$`xKJhN%5#u?Q2Z?KIl65Tx(o@v%1yhUnRxvS3h+l;TCs z*kC-UTp>!=#Rs67ckjqfpQ2p-X#UdG?Q_=$8l3eb3Y?(W%>nn_w3jBLhfQ_727xyK z6|H-Nr)qUMxq*{&o4Rn=)LsFnhOHdB2FZ?_OpG`4%d&9-0@4dy`hRlM^mA^zxWN4@ zG99!%Jx780gS|B!n4&hdcIs^%A!ME3u8jb-um7 zEooVO+GE($vdZ66qo++IWdljw0+999CXz+Nb~g6&^K-I_awaXAgR@|lWROukr_y{t zxPAohlu1ln(SUypVY@D-R8j-#n1Cv5JV(C_6(TGuA)@RvcJoubf&m;C7k9y!y)~-1 zblOv}*Mr}#^fJUj+V1P`la*JrSp3C{kG!BP#ZwniLpBcCk_C)KuASElj(x@809Q@O z_=qz(RLBTajHl?wJH6GBg3env0zpz11LYd=YrCc=hxNe(6uh174@1A>L5i;oy=6+AT!$#ei>J*o6*X@Xg z`Ub^?*qI6cER<)_FAYEVi6ApH6o%qHtIdeKS5(Re zp>J=W)qP*RTbbvKEN9=k+nB?-<#XE6uj#OnqO!LDq3x!rU|P@dIlXHPMcSb4;xlZv z`lTJ0{A#qwZW-7IYh@8@YLfdLS=>E4Kl#$Sq|sJfavoW%bkoeQ<#XI`#aXqfloL9D zBSa22@oF1Lkro}j-At>2daQWy$;Nobb?mxxim;YvnVIGKY-}_vdftB07uYK;_OEc3OBJK6;dHrRq;E~?c$?pe%olMPL^7il8nkmcM1y zg`d2w+-3e9SU=Zf}K^dXfQfDGZ%W?s`j&HVafw*?0| z9oVD7ONLil=JyEZoq5KJD8`n=l!k}W$a zyg;agn!racUSXqDi{>lfISS0b69BgBJIFLU-=@QM(c;b`nV=zM48%(LDcOG?3!jMp z>Q$jq+Vn$$F#<9v@nfSaXrUXDs?R=TeY}b7Z7pa+g!Z(aWwT|{5K_>#O9oREP0pmx zzKZs=gueY<-jWjgk>ZzdX>7NtqI=2QPvNvBuvUr}3VP%`w~Vb3PdMhJzzv>%+s;6| zGLo6;{ZJ44fe%725o~l|A>-^(PvoS+@a=WmWzBRG7 zbr;(8v&2N^u_YZJym`zL3qY0L=wcFeVOhj+{}4a3WcJqndQ{=_vD zStbmbwG?&Jaw^!V7%)?Vq5P+Y!2sTNo%5wYkP8AOQJ;=>z}G31yHY`+E+llf($Z=+ zKV_;}(aN(lBInP<4W^^Hf81*xFcITmA{XM|2nX`$(grj+uo+H3{sK%5()k3G`E%7B z)kBJ+nk?Jq_e@I0dQe&M9`M@;5OC$UMYe8^%1NHlP1Jf3wcvgU2zqv1Q}Ed&N&Q+m zfz#PTebj4t?0C9rgXR*dI08x69_0+-)^-#r-}e=13QF{oZ@)$>RGcO$u#}WzhICN= z=YA(<{XH>QE0tZV(0d@%2`FzIm4_=L&;QH#6LJ9j^XqFyO}sdHnVdj6~_1 zJ>L$1uSzRUH2`-7061^mF_JlZNLAmy|7!o(E(xQg`5kY_Ngs0Y;(7wniTBJcZqEE3 zLU#msd?*2mFBvi6y~@X*EXB#mx4o)qvia^LQ}7qK6s)n1%d5qiYebLrma+W%8m`bW zoK8;uiv{RX{ZlN_jSVJwPJKtBL=gxv*x_z0J3kj-?NjG=5d!*O^}9|j6`~$uQ!0;B zFel%-o4tklF4+3AuG*E`M(~0 z)1OBV-wcLXEbQzVfEr5Z9O|9D;-X9T3bXR3TUue}v$bq0eI+KL4{5L8#6KLIZ8Ens zNBMvfI=eh=a9c&&NEyIHp{d3kP}8H2+~otassMoOGBTDnZT!0nM84*ezu!wK46su_ zEFa4R(>2>fzfXQavenzu6EJAoVhmneAm-n&w1bcHiy4yvGY!AR7`kj&K|sI9ASQ+i z44&@!7B?QOi5ZW+A|FGU>DYg78q5UR{!XCohwy>9ce3bjLElek(chmx!w*a~R>Ap{ z@mbx#;8kYkJB8bp(HPOTt;ReiT=8IQ1|xy=%MkFX#MI96XE8Q$y7 zmZGK(5|D3hYaxU|)yI!CMVZf1pIf2&_!$yO@P2)#LC6>#+Ofh&$B7>;CxvZIcI~P+ zM~L_g6E}BNniHU&{(bdsXnz42EM?=0!0h-Vgo<PZ6W7rB&#$t_!=#(R>qh!0-gp zJNScaraAOE8K2i%lo8Dt2i3i%)G7TPm!mBUw+ap@sr_ePkRBed=8d~eLIcbJD`-&q z9!%e9S%FQ}Jp(^##=+;P7*{%RmE;xE3JY)Z+!--$w&qY#*OQEU{vceML98zFL%O=U zcHrdCddC>iqJKAjSLm9*{)-M21(E){dZpn2RwB9ks2%_mFzm;U>jvz^-s&)bHNFEf z>1dwzBBX78r9Nc54cs5Rb~^dV0fA{_BmUT0xzcDTqebYRf53VQvFiThkaQb+#R36h zFUtrY2Q#g91H{Xb_KIK*@?>BswEqqtAMT)it z5Yb>Q0iaC0D|iDuYU*mjze)amx=0w3>91uJ*rHDOD>~55b4NGj@)Npsn&ekiuOc0N$(@pf z_5IHc{AUl+^&0aCb9^08x##^`<;lgZwxelzDd*p^sCz-CO}Og=FhaS959bOy*cbp> z2>DLh{etx<@8XypNHs_$L})jW-VWuH8`AE%mA`JE?}%SSgCj_9hB7Vul@Z_>BT&Ax%fX(mBSnmIMUcF*}W(tXyLDld}cJ?QuI>f7pO%O~$ zzQ(dC?=a}H00KON8=CJ2h#4A~IKk_qr0!1OxYA;6-1f{AML&3Tw9fUifef*+qBGvs z@zJsjyLwT^n=Hd~a?&XmjD(AVM5(%?8xztODdDD#cgp5vUom~*HEgW-SRI6My(ZAM z%z3b*tiVZ1u80$toa=dGV`Fp#`=k5-rev>YNbymC_#nadw?M4?_GIrjO>Kh~!1f_+ znNAKit$>Pjx_NQ#lm-j8Z6s1JuI}^&bkj(LxSAS4W$xoPYR|2ml#4`QwE)K>CExt} z8^(iI$-6fvB3AO+F4j`i_~InE*BZTvj zu<^`1-JEy-J#5eMQn{nPV#06#4~!q#eR=VdGX$J6zJwe_pZ;XU2v)u9Oxv{iSOH4W z)_p5LJOQb1?f3w5_k+^rYH)Cup5gty+^azoKDQWX6GLbQSdM&IDa9VO^ZHj}2mF6R z`7xR)C@Wh5I1K6oYm$Aq%-u-$BKQMIPQAO~G@_N0$Clv5Ohigr1S5`LtE=EyV*wZ= zrok#C)<#})X<^~{MqcsTYm|KX00TsUl_(jntw!|l5r9u+<>YMOQQh)LpjPZxAtHo9 z8QV*Pp1qzQyPrO>!So-fpMXVZ&wL1*i%>L@teiB-zmqFukWUKv<8u9f;O&@VmX?;d z!Eh45f;3 zVNI$#YYPM7F2}H6N9VG{{R5RbZccvQUx!GvGzb;z)@bQX!oqI?V1n?W)ipI=K-;l- zkfOoP&c6A{3=Hnjtly3(Dk(Ys76QNX^ba3CB&CX#fw|_h${?`%#HS2w`0#u`Rm<(! zJ?gySz>dY{r5zJ3O^LSbc+M=vP5!?Fcx3c>p!W^O7(QoeG|=@yf#P!2WEY4(>Sbp! zUxnUGLsK*9j~0G>^6irbJlH@u}gT_trY+{rTaVAEIb`sQoeM~D*A@e56& zbE@ZTwNlB!hIr`G<;mw3Je=o)g(3; zR*n~)hzGVSLTL<;!}7X@yP_wrI`uFAN-lZOHz=VODn79jrow z;mx(bWdZo>7R@IVP!R<{kTcpA0IuyP$~hA!gtf1cae#?kSEc)^((T8eTKo6jpbgb& z%TCrd%`T>(7RoF{_prX0^Q<))G^(8oyL|p|@|^QT zuSiAlc9657p$sRPiVDYjg~MtZUqg1bvdae7O?k*ssG$&RxFP=LFN5Tt$HVY93w_%> zP}k77R1ZF2gkoam-|!sEXE7lxz^#K?30cCylbfl3st-w)n%K39gW}Z$M0N0ZNCAD> zWDUc=STCN3C<)oO`4PJ}E)KV857%mSP!^OaevDZ8qXlGCKJ{1h$x_v8o0^V|Q;Kvs zBj4F&z3Y97E>^R2El7!ywzli#&+e3NFoJXI+Xlnb-wD&FK~W4cK~`pP29jo*{6qRp z3$_5^{2mOAVqmolb~WDtD6F?02Wx!9LDe*;>w+x2yrA4wg?OH?gF6xY@Q@Lw^R$Gd zq-`*A{&u)MpODB}4M<(ER3p|LPkNEc{Kb;{S}rD>$L%BZM?XlQJ{e6V7zhkdl9CdH zQ6nsp@8ub)cK$?SM*pVpRNnS<_pCiJ^vj-ZRPe{mPfQ+fQ@FBQSBw9O-ILm-=DC^a zaaT*Psu>A=Z=0tR6B8@ewjqL9Kvhd`GQ|9Lq|p}8zX~3uPV1FeI^BvD5H5WnGqB`s zo9MMF+Prj=SEtr(MGXRuG=PYTip0>Lq96?pSy)~+H#dJVv;IEY6fEd_$N~A;A=y3C zhC(_ms)M~Q5s8K!cdTO7*8-nqfQXD%KjA()I$9_82rB@b!YRW4aIa6k8$3=JP#&?m zWQYb4#?aoj`rdcT+-j*OPgz{V@cQz-xp@MYibRxwfk8Dxm)CFBZ$T-}iI~u|5Id!{Wt%UA5G{k2aGqY~i^ z>46OYofBUC(`8=iT@m^Hje(l_el!i@aJIj118N)~!hmjn`F1O%@(~4)IFN8;aFTgF zF`*ANv_~J#BKUNryd@HwzS-PX`6--{}2&Dr< z$C}l~{&UFg7ss5I{^JEcN5v1Qe9Mlp%7LAiAPdwENbvE`IbCa8vkah_LNH&|leY;$ zb0j$RdFwq@iD7p)(U9&Xv*~j`9BMAAu$Hl2lmEK+N}v1?49x!=Wufrz=C=EThFHe< z@5kb8e^?Sn4lC|OH65PSGOoff`AxL?NWtSa@(7jxIs5nBJbqK&LmmRR+yB*DDO*@9 zsc5=is_Rx(8NTOKn>)M;Ym1bMnZH-&pIdV8FY+3ir~iE=Xv{;Akgcby8#=w20Zt-T z0`2={huJ*19o^)_WD%c$=78*|&>S3XP1$Pu>~7auNutYH-aGnUmo4>z)}qovO_n1bw#PmvUik){duXDm{7lTgSa6#tC_wd)|U>%XpAEm_z%1 zdW`)7j=1RjFp+rqTp{PDbo)=m*>7X+Xn(Lly|G)8ZbJb_-nP*Bo}n@uwL1&O5e79? z!GHa5_Wy3~{);%ZVy|HZbQd7(MkOaBx|jlxgs0HyBfit~@AVVXi|pysfxW43`h>%% zu|r)xHM_{nZpv-Md@ohj-kIHFPrWQDzlZQ@3S$m!#2{p!YuoXs?%k7lMIi+RAJWqM zYfjEB=sP~k#}NQi3YOpij#&gS?Ho`tFlXSu7{$brf!9HePI0TDv@rRDc*`IfeOP|UwQ6r>w#`4-RxMvwv$__=-JK0_ zvEZ~4pY#ewii@rQqT(PkH=+4Ati!@fY=K|#pQHK#jtS%h*rAe&X&D(Q!{SZb@QKA` z=^@1Ng*g)8Y|J?M>5dD&kvMpl!k7S3e>1cPKs))7MEUT38*oU#H+Kgc7QHQ4Xwbq7yxT8&M5_TH%@ zRf?b)MNf+B)*J*8R}3P4J)iLa)n(j;xlJ7hpE}_~#EKvE-E{MA?-L zeZI#D_7LJ6aELB3-Lz&#A(rNfG_v*Bi9Cjxn@?P*@5WWKw`o=|)EaHU0&_P=%O-%*HqbU9VgrZ{F zBZp`>!M!gr&`>SAlc7jP@|Un^r-4rh`>t>*!8bspQ-zuu!Xdy@fzNO)t#{<9YKj>2 zIRA^WHvy+|ZQI9}CY7+GD3MaABt;@bb4oN*=0ap1GG%O(h*SunA`ND;Oqq&gPMPOf z=6Rm}=S_R>_xQf|cl^I~9J|`e@~r2%@9Vy<^E|KfdUR+--^_+ppX?&*Loo2}-kiZc zzkypRk8}Hos>ZER zD9AoOLM!K!lsErkFh|?DSNxZJdxIwg_FzrP4xrJ|(Sg=$5Z!mCXf3y>T4JoX=E%0y z@46;$+?Zoc8(*rRVh#m+SOe$WIIYxVvAbNJ`O)liYnBETKg>{1*Wb7Jao{_R9Mxmz zwhsJ*6ffT_M|YA!g1-Ngh^~A;5k~L4EC|IfKuh7jS;-iz+|$&T01*e*L&>NYX8> z=;~U{`kLjI=Q5TS@E369*_C{MTjCp*9nQ9aKXkp4e$1!tVmAMLujr6BJ7r9cDVwg3 zySV0pnaG9JOITSd3madyGj5dINJDe%(DdhsE@k81k*TTMD(~vOr&29{e7qS$kKyPJ~O;*I(pn)fP88G<#|TwAn_5uxO!%4jEr56 z?;ijA@3_uSd?GUj|Kk#trwu z&ZTPPM&0NA%kDN_clvv}J-%n{Q9d%y`{Rf8Lf50T@>tNHL%;)|ljvNl>@0`tBFPRv zTgUcj#VPXbY1+`2{h@yFL68064>kY#9)9T0dzv(VTfJt@ z62~0Prk?nEaxB5krV?pFjZboKQfSQKFk=XAT{DN)xgOUo8~mpJNv%MCPpd|u@YRC& z=VJ0zIc5pBk{g@O1u5@o@f$oT<#Vb)u}(BGQF&-OD_Svp>pTDKYtd(`v(vuo_Sm}I zSC0LVIct5L*}45fziFuR8QG%HLbJ}AE7KC6jm=ErLi6%-xWom#DQ5!pk{)z<2AHpi z<>ssp&HU%-n2KK@cRSd3$LZNcqWkxsi@h!joJ_NvM@lLX^h8rfhjX!&p3y4Py@re~ zoPILjeCQ}8{U+@>;KU+N8Zdn8Zt$bkz81G8%y^bZmj{3ykKGn4s~kSgJXf zqam?-WV`90>B+p@;JRliG%v*TSDhJNw>(`>&Eb34TQ#PZ*6c)AmtlkY$BfxC{%nN~ zTR+&y**%hz{d#43$emqA>~WUv=Godv9eI&^qeYK5Nd)f+wKUpNr2od??)9CwuJUXY zuA%&%{JHJh*v2ngEXNi#e3OWJpE^42mU|rQL z!?HxI*bGac2wUI*=%7XqC^0}mJTl@wW;Jt-*Zz2FPQFH$YSwS{mfxLAlvLEmIfi{_ zv>EuTb37`Ra0NWv`u@`LI{HO(>(X_zo*Fg&i1FXsTlzpXcH@>2^*07_5#056vh<_% zY$i_C1Ex|r=Kvf3_s+y-QCc^=FXY2ooyYGM+^)G`B@|% zP2KL}Nt5;dQTFn*ES39V!Nf$jr2WG!oI5izbXo$o2l*t9n}tY9P+!$Ob$7;?YQ zxksZ#jdD3&TU;nrExqEKf1d3b?Dd7$$1U9R8$S_s7kprxU0pvZt*rq-Jv}`Qu!3FG z*ij&L{ElLrZZTNXQ?8?vEg@PaI}I6Sr{bII%AKmHj-xmJl6HD$>&&DElFe1-1j(><2gCA zX4GXUYLO>;du4}1F2510KVhM|OUAw~{o4&r7?RoFu24~`+}A857bt_)pP%AQyXvM# znUxleSM%$w`#EASog4PV>hBwI)bP^$BD<)8PG?dSH#DeYq*G7lqD71J>zy7v_!)LZ z2z1}gJRV1zG^s}~cqP?;DM>|jgmd;AD7$z>J7mARdbYz~fceK~!^CH4l#X|Wjh#KJ zTPKvx@i5+UrSn;Tw7@EW&ikizg3=kubweWT9PGIz4*9gdF3;sLEVKAbdq%|4RF>!T zL>pHRTRFon6W4>0=*mdXb82yE&vV$!z0Tg!C`4dVWPRn8u&TKG>T{c__USK6&neM_ zOo3U_Iy>TwN}81XSzcAqJiDtL-_QShWEh$)e7FlNF884zgFXPB24_F?7>x)yZm6k# zn#MlEyM0P7y6cy@zJJ_HgTC-rV>a&9Rcj)aQx(Ix-88l}O0IKVHYmGv#jls2!nj5@ zn%kX}6FqeO=P^WVT#<gf+Sa-JPi)EFGY4 z(A0gZ@`fedk>+m3!Ol7FzL}YAd!&rE4b|LFt%-Wdeqm$Vv~6ld)SUo>IZ-2#t*`dY z9XM;;JLp6AA*?PXuBMqWEm>qp!J#PEf3DTTFh#%hwrc9p2lY98#hg!!wu@?u9_V87 zbEq}Y8&YqVW|_5=O>D=s$%WSk!}D(UwbZ9w9K6HPdd#!N3Jkj{GjjN8_a}+n4zb&! z^E27N=Ud;PvV*+r<@CV;PUd}KG9HtK4m~neeIL4~ce`;?3rKVh_^|kd?BL&8fFnEf zXXmC9UOGg&ytY^8D13RQwXT13Sg*byLU!sK|GKIX8eb`nrRkp<&+G}&UNaj|cY6Kw z5`VR6q0KaL(T6=!wTcrgcGG7MXJe})00wZ5zYp&fn0a6_7utL!xn0{<_tuZNW0etI zrbm(`(=#3B1UP)}6>oYUp;CJ%?R0DOnA+>I=CS6!M%kwOYj}p&AAiSll_K#Ygqe9# z>U803b8Eo7Q5A_7!D4&5t5d#Ky)jSDbt9g_)F`Q|y!5{Jsyt_scmuo)s=W2sSv0;OXWo*YuPl-Bh~9UsiB5RW4fr^P~RFWbPt2UXsXq+5xN zovj$C7rFl8t)HdEr7Mu;7=E7)FzmHTN>np*kYbF~JH_s8zDk5u!MBt?F=9Zbxu^Fw zBFS0fW%IsP^SNZjd_H#c=&4e^H*|R3kstat)Y~gxQ;AI)Szq%uJMO`}3+;RYt?(yP zJ|%3|B2->8mPN}c5M8JfR$-9zZ(y*T4~GxG)3?4C`gd%aAsf>6O?a2{9`-Y{yiHQ7 zN|jgtq_g<#;>vI`CFes33>ye|1izQ@$w?+Qwntyr^6fD*GpkHufccddkP)>jV0|2360i;v#!K*E}WDVkjq0oB&#({QBR#S$gF4yg6QJ z34~o0F+8R4`^*9Qxl1}?DOaTfr04IP_;Bk zXxUobWd1#n1^%2__jA-JScAG{VL3p%D6n$su1^b$2IueZPw(jGr}Q@S$~iBI0h$$o z22m*vS<`VUGx>^hckj9|(-d@>Zrw*SMme*wepqa_pwoEwb=rCfQRdLBmFXhqgaAP7 z?bwjB918^_?HbvfRU!`|MWf=~Vnvyaf8ZDMY|R2?nGWZE0LjiLCp z?ujdZftlQg4jp3RVh*u;GyU`MFIzmPs%)dN356xG8|B=kD za)ed`Wh|x|z-hq`N`gV4k!Z`ml;feA+Kd+lqC=94@FiwgcL6hr#!Ucda4>bcRIQFK(53bO3%xmbXMbvu6?<;!j(<< z5ez_BU0GKjQdi#uA}l^`t}`@p9X#faYh+|c!!_;cix=Mk?26h=?oFGWBhZ_Dy9X*(Qkpe&)=pDw;-zr$8%mB0wz8O!pY6h9L(#;w zoWn{xu`LkYxgdgwaR&^@2JpVmyu7Z5I<$D4c!;08W5*+iz`b@}`2yI0iJG1B7PT9t$D~sx9Hr|aL7FX(ysRVF@d-mkXL0#P)I991g?W~Dg)Q=xOy3#iNy*gGWa^G0ke_GL( z@zK7)P6mpR%2I19s|K+5*mc3bKl0p-Zm%sF$0OlzdO2En9!7N9O71h%nVu~kwo%?+kx!^Cl_xt zKoIA@a%W~{e6Uc-^4q#~NoeV*H2axH!wBu{X?Y9(y|G=vKJw-q6YWoYJvpAve;oRE z3m>PYyE`9Du6gOcy1F`I27&0qpslSfAt7OS{^r`E*Z|JRE3B$<1uZ3K^y_WP8MjeW zT(s#QL|T3L(#*GJ-MY(7iK&rkvtrCVryGEX?=`Gh7gT&%KN;5~Xxj4qS{IBJPQz3I zTwp}h3!IxR-zrV>gIoG!Ked1I;m3=FI{pON?j zdgxyMo2? zc>lXe?PQmlZrEmTbm;F5lK9&6bXhJ#BlVf*JDXkP?@I&cFM<5=&#Uy_|Ncth|KsGo zdv`GfMq@C2VeM${?Uj?3{$9Xv>CfiCZ#y2&zt{X>ynN+Ku1%X9F_bVCGz84GTw-H| zaA?8CsWQS2QY3!GPyY$iwXlRNJgz@yspFlsX{3vJU|@ip{fvx^$K5v?=;`hA^Vw&> z)>9Q18fN@YigGk~(Ka$l{JiX19tWD2FJHdD63YXv-41FGjCSw%`E3AhoOZIIvQ&iLluM}mRMWB8@G!4;i_JfCRC>QS5m9;T*_Qc%Dy#ow zIwGp3rhTwvrwsLk91rrw(wACV)YZS(sh&=%cS)*aC>bTm1=t83A=b>RJ+3W0Qu-_5@=&WBh27cWWvezJ6M}5$G|w}Y zWNZWSGeEZTZ9h-G<%90ZqVa-!{#y@>u191=&^tPXu69`~_~^z0SSb>y(IJav+9I#< zzayNM4SAr7@rI9|J^Ll=evYi_k0GDB?`(~XOfEL0)1gRN(kAbt__=FpM1N@ssQb)e zTyCU6_xm~cQQ%v|~ z>-h-J`tt=At@YVzUmTy3$iT&Q={D7IVaeh9HiWKCt@g=T%&+@nlz~;rI!~-{Sg-XP zW3u3$R@T~H>vPlGO{3a-@WAO8fBNc59<{VqF&VYJ@QRM@FlJb`*fl9{aqnQd?owW- zJDL5RGnL$b(aKngAM=w?P&*4X_=+@oLltPEIGdkMW88g50K~Dvl4NT6p(a@+jtC!FT?g_rew*Ok&hwZDH>i~Os9;}MaX!W=Ub%B3S{t3pho zPQt9~SFH9KV&w%t^AAYLrRJv1xR2YK!Tib$K3Gg$@Thqw=g-B*7b>j+tv(VN1&%q0 zDwLCMc6>B|9}VxBfSnd@8+vf1TJT;J6&GiHoOg|Trewb$lYn~MWtS4A(-SYuWG@f# zzb$-bG02{{zOvO#p#FK$eT|$KF^}rn2lM|0;*%Hlxi6H8F?M||Ek_G{g?pHuxw*Mn zC><6T6>a!!r5C^et5EmINCAXmY7F?kznGZrWSxJq6MjQ2cnLsBH?j&c&+`IFUss}ntL$mR}*Sc)R`>$dX$zm=L z`RDGElkaOE={e%<29Y}d43XWS_(|u`!GlC|_xW7>+vPB)g-huKY~3yf_v+MaoQT7| z^unQCCO=7o-Dv7D*N8e71z*-s^=qe*uCaD4=WvQ{ac0z?;cv5*mzv|OyUCeh^ElvY z(pX@ngx$cU_PJX>#3^Xa!m!I@^}DcaJ(xO%oXD2As1@IKVICAC+AlE3%jxK1J!d-3 zI9!8VK~q!niI2}otcb<53}KnuA~6el$?e;>cdDhneE)tMC3UisZyDo8*W%0lV}{5G z-$1#H>U7a==*L3j{Bu$FQXW2j+zL+iI{drtIXOM^^V3sWUHj%39ax_%WI!ZGvKi~; zXZi|FSx3nSW={L@&8TXFF*DnCywOQwC|`)W55get-8ma3{7^~xh!{Vi9@2j8->5ke1yeAwly z-jrqQ1T~kdB;BOhBr2M4y4ey;eCF23z(6&hZxm7U&NZlO5uMg51^RhqMJF8C+f(mm zqsI9t<8r_8uXl$waRl@p1G4Lu$@3%l%#zoO`*vEmDjIYO*=EVK+V4(kms%g0X; zTacT(ipt3$D10^c`Q-__lZ2tEQ*%@H#o>yji6%8KJwMNnBf9LMq8yKrjL3qhj z-@W_lmnF%P-n^I2_={cY&aH?MwHe(hA6mLAW8<1NN3o_os4y$tH8w6E8ux|CUtVv3 z(dYjS+DG@uF#Y)4D;)mlq?9>>g3%aPjvAg}XP7lVt%ZLHr%>(bf;;S|j4mz;Ul_5V zbbsMsd=7L^OpHxLLPZ*Kx=w;&e-6JWD@ZaDp9o$91PGSJ{MhkAmyJ{Ml-c z)O7z{6|Q8}lv}`xVSuOF^r#ZD%Rz{_)yx@43cg5q(1!&FBG28K3xtipALEA;;lyWh3lS^3Ae%ZC$h+g`|6-1}dV`=@Q?i?6I+ zow%0X+4$U|ah?rTg{?{1KiV0{S@)syk;7q9FURiO%f^FK_&E>*a z=+XX*_XZ+dTT_v*J#5@-sV3PAKdoMCPKxhh%# z)hLZv-FRX(RN4LNre90jRYWDj?$-9eZw@BO`gDhS=c_&SZz3hq9&Mr&giip)YMNAY3W_sh^()+CsSbmZoJ%gDG5U?&FMvAv&v&915+0)4U1=DvY>xPy+0 znA~rTa-QkN2zKR|yU&yuOmCVmlASvKBiOQ9u%NazaW>31%CWUsibwylH`j>&T$T5y z`U{m4SJ|cX{alJOz8|>Yq-;L>&93T4|Lh$hzY&T&%JfTf?B}1MTHNVc)T(YOdBs|c zIe{y2{{|h^yxW^s@4A`f)*>)HV0|P|oa89MABFx;u=-ji_pED;IAW1pG&eS$i%3n} zU%8egyWl2{cYvXJ9F{e=LqqjEZs8r)WfQriZO96lV8*jMlu8(Z#$GY7JOX&}1{Rq+q?-VmW`D5Br%dr0*y)#Rr6 zm$WPXf{ztmyh!H9^AiL;vBd_wQ>QF{K{S0}!`!A8GwFVyuDc|L^uP( z0>j!Q&s!=$WZ~EOBU!wuvz4V?!Gf_;7WXeGH0MHLWP)h z8=R~&?<$Kn?~+G4QvgS3(h_;*z{vBb=_23J?2?TTKxjJnu9>x+zFy#)g_V3IMso%H zq?c|&0FsiDI(+yr<%Oup8-AM4@Y-F*z%C8u0gCr1gKh*5A~b`b58C+6KuLk+$G~(_ z>F^jVQk#&;@71q(g5FEx?utE{Ot%U&M|QMt!L2&a7F4VwGrME`d{utQ=MNSGqXl3wi_$?= z!YvhMn`cmyfT%zNTY%KuL=k~waa`e#XDoVaPGa5(-~PcaB%*obR8@#n^|aokz0c1Y zjgQ2t5Ivsm;X8$dm>tm&msdu`@`J6uJJgnI*7M?-Bkx7;6Crm1@I-~qVK&TidU|f0 zVL*82wWq*Dt2@zfc&<8uOhF>fqdB3~uT6^6zJJ#fZ# zz7|Q>0KZ~hycZgUYiet0(1t~GDpiWbWhs;`sIXwuUT+PEBc~wcW^MWWO=0J@BkQR} z{&q0bU$qQ*)Ban-I&U)-S0`EY{NOB+P(uTvuZ1(r%Erc9?sW+ymjYJ*LwS@@XUS0?HMWj&KUloxaABCJQ5Nwh4soRa)ECi0nC~w@eLOL z8E|RP$AOJk=;(IaabFy90336L0}&RatyQd-^|yxJo=3nwGR<=fiS%enhq3$N%a_+t z3=It_C^J_mC@8$zbkY|SXK(CL6+noOP)V*plPP407CluMZlQ)rMe6{Cj^nX()FzAc zRL5(hZX~1;QnC-HH5>-ASc#`~MH0x6>cUt2&Nv}3%0UDhxYe0A;MEEDjVL#Gcg4ySYF>(xSlL7C^ zB1b1-2i#Y*UaVoQ7{>GT*dfRe6MQVpE$B+Y;q7gsikuI50)u*d6_Nf0zgcTY6TT2J zFPbk9VP>edNzt%={SBqHr41E@V%go3$b8Xq?tA|=2x;E>fwmrc0llMqY_a7mI2k?vaup7J_ z_3E^l_c1*C^`gd`h+&{oJPJ5^lC_kR=^c>KRn-BquC^QL?@tIxZ1g$Q{kIliXmF5n z(>{(I7H3b*&JWNGKRnNx{ST(5r}F_Lr(0+!AJ#8DX?ereMGH*OQ?BrJ-n>!%{{A@y z1#zJcm?5wq6%{u6nM8R%#c;SQnng!KFlVL#7jt6Gi-`Bvnl+iQ3_062i}jt5n79WW zP@q&?las~4UdHLgS`S^93y>gp!FsAE-bHy1*=RaQ+RKQ(NLfj5?ZFIPucqf0RbRqT zTO;~%1&o~0;rki|*s;km%#=a=saA!FIc0o&93Bf1*4(C7n?(8ex_l>zaPs*vlWHmQ zhldxBDK4PDS9Y>Mmt}%0{qgG1u}-{O4qzKi8%_=mSsZpo*B^@1;@Ie@jOCS@8D@qa z*}rx~tpHCsQjhL8-^A3E5D1D;!(!V+7{B0?9hCIBolBiX^AP!c&o$-bC^Eef&P(Ykn4&a8z*K*7x^10yaNjGpdPQg@XT^ zVcpM!Uq5)(^Vx;5(R5|p*z_> zH~F?xFYhF?4nQ(DHa2E}FedybR^P4*b7+Er=dj^Ml3wuRN#cp@WaPQAm*WT*?4+q9XOE{xW;df3FUcuEJuQnRzOo*Z(k zLG>~7$~A!NQCC-gn`zwfliPl7#u75WQz<8aA@pkP^CR7X_-qYrO9+_}|JYkC01XPC zY_=m2DLrH;XOTFCO66`_I9GSB8>gh>;40wUUvXvgPp-wHt+5fj^9d~{lj{D#!7q7v zp)p^#dB9f(`^l5-E+Pt`R)GuEUueV860xmfG6_{9MjX()WSwvE3=A|Hi%W>f>w07Z zqM*8*Pc8lta;~4BtUJ?Q`UTPJ(YoA(Y~nt*M; zlCsj>40V}NwpGjOka1pPPD1>~is?<8o890$Ju{Xzr*ch2T6(#Mhex^atS9D(Mz7?E z{J5)T&9;4#OV?ew(4mmnhG;i zWGmDxzUGqk1z@y>cTLJ*XomW>Jri`wmQ3Rgv+WKMw`4DMY{Uw=$eqQl8F_kX>-oqLq10(tktxf z&U35GZldmvwy}OgH5N8%EdR3A_zH7s7Pr0UJlQ4T&Fe^)<~6!wd$f(#Q-za2k?=q+ zZC{AwZ-ax4J4?0$1soV14Z*>AOx`pkB*e78;gGV53Mogjvfe&@6K?A-X1-{pe6nHP z=fv|GK9Y7~Wb**%JJwm27V{)_N={yJk<~w1B0bg*Ht2rQL3Zxk9yW`d2R+%V9nmHA zg-fL9Hj&O|g9?IC#P=#L@ZiVa$s0o?I7OP$K6H*+ec$;0*NocgfnF^pT z+&~iiH~i9Z0{~_WVe*(+&D1qb%GH zBGJ-_$glx5*~W3m42+D7uw5RYzB~A#nRdyN=UtP?y=0JvB@7jc%RKIq7lbPnHdi8s z%k2J)!IARKozopD2!&Vf zWy7;Gl2E6+0{S8r3}iU$OMg;)JFzE9kapg!_YH{Pg-+U&&0 zNqy|^>zjL*QBs=Jg%%d*2RpD==Z7N_L(=Myq255GTckF`7ojL|7H6zm&NyNs{ z3kuu^l^)gzQY9^t7APmilw3-~G2J*bR7~)vtEp%gv&q z6=h^MP`5f@3@ny)GtkYOJyj%QBxR-~C@7>YdK+wTRX9H?zO5Ix*a-^@&C-vT;CkVU zW1Q?OTyz0Wtrg3cZxR;vBt#B`CQT4ZAQ@l2V#QSeM!0 z1&5nDRw5Y^Mo~yfjmcE=MH9>utR>toXzvIhLFUb9p^g8oTVH3VVq2FKIUnps*3OT+ zm6Ov-5?QN-QxHR3#jy@+pA3jw_U1!+HI5;X<`JJBUgY!URnPNXl{z?6+IfoY1QPtC z!8`smncPC#paJ8%mCS~ij6)5-sgiO_7{_Tldd`@EH0ApWyO1s{WHPy{rXIldVauL9 ztFa-mmAX4o0$}X39QUzNCloN=Fgket;vwgF-@scp50GoZcRw2&Q|I1m|IA7N*TN2?kU6kaDx%XH zWM0r>y_H^tO!?@sW2-@00I9f9{AOu?_K~`Yp>}3Awo{6VY{7UH(R@B%(dPylW?`=r9c)U*lj92=L6PA8r=bj5&WeaRl&I9` zNDDhH=d`^jc&>hZP&t`0(dI|SG-A-*`;q}>X68KG88okvqMsUg$XZKN6RyZE(_?R+ zz`+uC0L%6)*ek9PSPm%2O1ujWM*d-6I*$E8a9AeA3A{1yj0I|N1Rk=!S8 zGpD0MMNU8r{Hj;BgB2y?Gq7>V$Q|+k5FnN;U24)>vwspfEy?ionL=V<0nD;X`8z0y zMF7hpJmDXR>OA{cG+F?HJSK6qa&yZ?u%p)(8 zQ8efG-UsNK@9^V>M($^MYpSsaDiTb#V64d-s4HPO#Q>oYSI@Fl>(Y_5-0-Hz$;fOG z7q5;;2?`1_aA}Hpk!?_0#Cwh*d&S;r0vVdT3@9zo+WwJ|x2TR@ikW{r9jfe(Xa|lWAC*pbm74vW zOmgZVl`mWk3G<8nW;|Fx7HJAukJZY$MEnYNf|P8{^RD(p)#QggVFvQJ+CdHY(FClz zb3y3sVS9|&`2a2wQ8+H~$gf_#N~RWDNEMM{8kzp*l$1o{#zlzKnvj=~793K#KYMlr zI2`^lWqPL1p&bg*r7Pvt-Pi58175%cSuA;uc<1ifvziE1HP-F^#JP+JRpS(U?0IPb z-eiNp>7KPy?M0vX8=U?Uv*XRJlM_g+yqcJzxVd#?l(s9^lt0Zq;BQMVd6z(O{%yJ$st~+3SKyb;S+N6+B z3D$$kJw{Ljd_zM+&1j4d0p}tykjac4^p(mM9R|OQ{|y5b zN60qn`a|v)gCiC$q!%=mPCzk=Ruj;e$azbTR{(pql*(MA`z*G83a4Y2XjF24!&r{29UT z{YZmf2kJGisHojY^eUq@XmR*}lr{rej16lAJv4Nd0;V%qyoApuWE7s}&u}FXQaXNf zpVK%&L81>I3F6&H*ndE`Z=fqW7NLa`n!9f9m0W&`-hro_%>2 z6wdpQj9mIs(1M8Ndh}&Lz6e;lCGT%%osSxc#604aOj{soeuwu!R_Q?q??zQ`GFuF1 z3(oxSD0p+8aw9=5mak7V(`I5~s=zREY+)-at0;8guJlQUG=ZDP+C_<5=_$~p#`bmv zebbOeyT-kr1zvItH9eB0EKaa0X}E29+;mo&DOpnG%w*0*iY?ocpN$==nmj5kh>o(8 zNBKD*F5E(J@0bYLoaOq?`5tGT`8)5RHwTplLR{-v`Pocxo%gEI6 zeQ%Hi4tG~R8)(jc_VVTFvuB@#%OYnn7%IehhBLq7@g6`UWVf;eQy}JKAS=OEzv1&uplgJeo15I+?g$Dr%ew(1^CLv?h>QCn z@(y6~Q`7I?(omp+rqD+Qfar|F5*g%mXacCBq+|WK8~aH>tB!XxZz7%TY1`&Hl&gpI zNCRXL$;3p(#G3m0ilBx@D|4%W!-Pw19OHVyOpAz{jTSB5p$IEDd__e?*Z8GhPPN?+ ze4urf#KKE&x%DFBRTl!_&$&uccxr%ndW!-Iv^~) znRV!{wN@_Gnw^`=rqT42J$Ud?&EbQ`WQ0!dHNUR>=4Z$;ULO`c-Q7whqtU6QEuY@Y z`d#Tf?IQ0MsGli#L6Fz`XANcbNn@>Zhm7s7Js_s_R>Gn9J}$Pk`+T(3cQU!OZl4}9XdLQzSz zOUmu#p^DcN!ou<`&R_&~|L}0wzR-#3E&{8;5u#@@e`;##f~u+;oZTQC0H1;?)v7OL z>?3zk7n_>hK=oJk-6hs@oqF|k0}2HUk^EFb77h1Qvw~%snmHf$<}lED(cg0e}5Q4}tNEi;GF~ zz})Ou8hp5NjM}NzG4@)IHh`(byVTUa4|zhWJ;hn=I1*jDNqWp zpF{}#fy|wlub|j98neEH)7}}FE}9Cc+#azpF$mx9-xNF@XX6|lDM+1A9#QRH zuFg zIYflc6D1`_^}xp&<)kzfItpfOUh(Nm@3tfs?Zc z#JiqdrQh^Y(yakZ6vwM>c9B27_LR*@eFp*OpBK0()UjhQwKuGfHKVOggbN3^*~qTs zqPTK=FUE}M#F3U`Sd~6*YPv=t`UM=kdbM2%`+(XcBqa2srDzawx1aon94Q7Y61>75 zPlB0~m>K>>tmvP6d6iYC0)@Z{4vb*YSxnJ}l6%9n^+OkzW=Q5Xfzt+FP`*uokFOb0 zRBHJ|9=8;vIm9v?4CPlA(&A&FkOsg9IQktJ2QlN`;J}$cIYjJX^}_3abb;{BMoteR zZyB}%g=%x7Z+sWG`Qx;RA~nw>`?QNkE2jd%m-l-i7_Pv`>l=KP8`Qsa?(`5i{wXPa$VSy}7s##9hk(DoH>CT}ET za`UDp%2w)8P0yGM+_s)kPa>IE&cD>Ior~|BaZX9Jh_4?z6Ro^w^pt7*^%&h;E^2|& zudso|cpmaHZkHyehl+qsa|0d%FhRzgheoFAD)=EqYXPEa7x&h{X}+&c9kfndz|cFX z8A5f|In3E5PZAD;7}^2|Kt6}g0Y-u?B^QPZg6hOBBJy-ym5ny2>PR)Iia0`z!GuG1 zRHzRtDlUteciw~a2$ZaY1Nm4;F9*iP-T|y3`Xr1ec~v+`pCXAGcFW$q&yi(9&ijQ} z-J-*)dt?3E5b8kr$VBM*@;+8a;?hRgLn!UQ1p|R26a;k;utI=!+j@vYPcKMDA>q2D zr-4%B^o!lMzkipWo|y^FLUfQoGW{D$Xp}NXj~_=r;g1Frym{DDPtUhA4u76<73FI{ zOOMxk`^Ad$xPSe<;pZGTliy6ngxOrM{FywLzAUb}Nztq?uZmcI%F929j~6K{EELWK z%!{R&01gLb$+E1os`9#)?B0*Qd^rq%X#<%y)klJYg6jYwA?x$51tPG|fD1YZ3{z5! z>_hLJC<;R?3Ixpf!}nOPFOUI1`>1b=1*pK=&>piI5u45SF=`2fHh@Z1UMmd^3H*n% ziqK`UJS+VsC&a-xB0zyiUWzf&Y^`WBLB|ks9_?y{Hn3kcm6 z?L@R}Y-Kj#C-RfR17ZS%JCPV~xJ#Xp7vLon0rK|`LW#XZM-)GH%q`a*rLpjJ3srr5 zGCYNSoyxSdPAq3?R6gcRwf!t93Uqfr6CRru3YlmzKJ@WyRu-1B-)3D?w%0W^cSEbr z{qK`1AmDf6!(F4R8oX}qN2kpVR}g9+3S&quQ*98hlBwgXrsp0RH^oXm*#!SD2~&;RjO7djA%)2Zt9c zbpBZ@BvHewYPnr4lD6Q-Fhtx0h8bgakaK_aJizE>A-+JmNM53Kku7&Zt$BJpc62%)dd%f<*nXubBA` zK+P0HO~Me$$&sPtjzDBV-x3dOP}H(|5B|e*r(m9m@!0oq$Ar>IEQZ2^tgy(hu1*&I z!Zd$v^KZ{G-^(Dq!3(XskoAe8?-+_fb!afd7n;KU+^kJbWef*#7Zlwp^+VpgAy)86 z=dZ^m#vS=D@5Zumqa4tA=$jBG%Och4t2WQ`9rJG{O*j9MT13*-cM}dl?xkpP)$4_y zLw^19_rdgnY>#nnqtlbJZRgId(2Rg2Qu2rQ8`87)@845ig@w7IXIb9)pU(NCM}3_x z=*{<77ZhmPri zTYhE!iO>JydwV{~GI)Z-3OcDZn9=s!(-R#)k7Hxa;>2(uen!t?_?rLt0H2o3BWP8- zk%>zjIG`lH>Yw|)I#K!)?LW;U3%6s#d=ufnpZ}rs|LH4@B~>)!Zsl$JAL}tQr^8!n zQbtCGEDLzElLo?v50^t8sI;f4%_H#^tIh1t;%{G=7QHz<{x$U3u3M`}N{zMwrouq3M$ zX?x|=jqm}L9g3#YNc$U+)L=YCy!6zce2wepKAm}8;09sy&Wp(HC`gKsi9)hM=0;Pn zR8d(jBK~)9;s1t9GHCSEgR_W&8ac9$0LQ5Vc8AjtUp5yq_?c z(A?Bf_G(=l=pL|3I3eP^q=0(S>qaILQ7zGbkp6bkzKy*)3uuk7-XeNZ1^Xr8fl;X+ zvCk|;5{Ask*qND_uG!2AgiL-*1LA`!0C!O4ZXRkKMGBGcktsAANk_o^>(fc!`5=AV zKX6)b-6}*<;2QebAD^aCaKaPi?n`JW-h8Wg*|ucn{;_h2#Rof^=s*A7>%r(GWyB|y z)U|2oO~@Yceu2;3=Dc}^HvWTe&MQqsKmSVp%^4$G&!q}d1yadd`+e&*<)&Y1Tdu+p zgX-A?GucbGDM6KjWYx;rdLTY+CVXJA!9DoA?lY|UULaDx$`L(XNcaw*?lyRwgz}9O z@?#xi&Vv1^AlPP6e?TE*3HnX5d_NW*JpKS09ZIvGL7EGwONfF~7yWe^4zoj?N;8-T zU<^FHT<6`}>^a2BA!KaANwyFk+rp5Y-tC{JaeMK3miETin3$N$$rk*`_=0gLVrI>& zvRm2iobO(}a+I{2RBl=BSM>dg16NzIDCO|A2=)98mSzQCHE4EwDnxacmG8HE?^qmJ zpXE8`x@l&JKg_1>ml|8t<=qOtm1Ct0H8Tg+4;3XR4AyyE-qE9GC;~nRGv~3$6@qrc zeGJWZhM4S!A&?vPQo)ezHf~5)R0AG45)kO;C!?jsO_CaD#IS#eQUXV{a!@ZG^#m3z z+ZxWj2DTJWPfuI)WYpaJC5vp@3983_oD-ybD!EX^=QN=P@o*87oGSINW0_EpCJ=A> z{vSseqaEh#Nm}>w*Dvel$0E9)eknG`mxGS01hwZ9_fPkew-i4OKTi1vu#<&-KhQ3y zl1h^-b*!!T2ilFU$G*n2m2p$E(v&SZl}~dR&C<4R+HvpD1&1qKBHA)M?-==3Sz_yF{@K z*i7HVC7Yi~iHQRh8t!{QAHd1rT$gGbNFtt1n_!J9@&n*uLSO3C@46Imh^rXMRUx6E zH{^v7mDGz7iEV6X23F_WfH*`lb(90m5K!q?L}kd2+4Aq1g_@bfFA8qPhxhL{VM0XI zuJ!(KOwAY^>N7ce_AiLuF7Nb`ztRLv%))Dz-NMFtaNo&^8N<^dc|k=BW?O*|x84?F zFSO#3HfH%pF_=obW>G_R$`;Bp+QpJ8i??FtyglU?t%NuOy?QCuJrNqU>%cYI$ zA)f}2CI}@Q6cBm_LBika|KcC8{rgOnu6{Fb&G#=7kh!9I4>ARGfYK43Dp?+Sya44cH0b(i%r7YQSw5VJ{n4PE4R^IgUYm*XRDWWs~+_*vofOU*jPG zpJYZm|3ZxoqissrlHVv~L3aSjaK;tVpyIFS&SO!G=Sq%@H~j=ECnhWEpa14 zyO`$jhclVUn3@`78{~xyOa#_xmOej5xmUm2u|CE2MC@oYa1Ut3q=C42kaoII!bAZC z|59A+fnY=+6kO@AustQMMB)Vo0xao;^XI={Yi2>et6PGD_6?*U(oeoGT}S7c1`_Q6@Axd{kEb_T&6dq&s5%?ZIwpqPftY22u+pG(!7NNqa0aeW*6aO^*7vQ zjDb*{r7I{JnFYYQW8pb!Q9O~_`7caP@<<6hkg=QoQze}n{3lu6B|TrE?0VyJU~vMs zO2nUf<>=AOQ{<0T8|Mq3e^bZRH;&9Nm;e0AmW8h*x^|zg%KKJiNwrw_cf!ME>TIgp zL&5b0Yhku8A^6~f3j7827-1tV6ZgthJ=jSNmkVmSx!wQP65~9LG^4Un=6c05s`4~P zj8Z<%<~~+GGyQLJNR?hi!A|2A6@4Mzn-G*WsblC@esEaWxM|(Rmv{Shx&{I-y!xE4 zD4%uh_6Xl=wN>Lw72W-J-P7)knU$E_x6V?bS!2R@*ODbiYOKBcvX+Q?(QmG8^oeJy zAI&nhpXMFfY{#4Fc0lYI?4OFIaARAXBjn)}aHnvPzYuOtN^O&R48MtU zYdqX6BQ3oNz&<39LuE?BoKYa>g|&VpDFIl$bmCVldQtq>TK(teA;}`bzBeXyj#LU4ZGxVMjCxpu8^W z`O`h_^Ek2bDk83a#o4LF`!GRbqy6|*$dkX$7fEET(7(AMiQf!Lp#I#mE^LLwKNTjm zU`c4CGF$J_!W*a8dMex5aq^@Irk07Z(7hB>y=Qdd%qj+R-iPo|{8IaCtpa1aZg$^9 zcEYV@QR}ef0#b}GtMhnOLGiqBo z85#Cj-83J27wgp6*x273Hpm10{pnx71{I2#e~6s(670TZ)foB2EM07hMLkdK;cv#< z{(5DM?#o!q)#{4um}Vz;4s))uT=l@CIX(GE!O%_?(Ettq{Yrj@tN(ggGd!^O;)P~Q zBSr_`MRaPnMI_UV)!K8zg&r(ElKx}5KJ9%u5JdX{@tHX=u5ResLD&JF7A{`~Jz8>F z12YH5V>Eq*Yq7EcBv_J@V=ko*P?eto<(yXj3m&=lzhU`>@;3OfnAF7e-Rp?S&9&$X zm0k2gTPzs3-#w=2U-9`;7J0tI`V#BLxpBi&E_v2B`$O-I^xnH0w0O!=tGTs3V+&n# z2h;i`2Q9vts3wirO|DA0^M;w5{Z!u9;+)(6A7}3Ymvh_q0cXWUMpiU0T!d5zl~QC= zA!#p7?Ua^ONTMi7rHx9`(q1Z|L5qg=UbKgH=Xt*0qr!DR_w&5(=kxZtulv64=6U}A z|Km7*zwi2W%1Ul}<;THMr1WVem)+^;w?U&y52*qE0ZaOt5w$%v*wa+J=IpjL1<<`>2$rlKa#7Og9h24dT%ChJmyv+IU)r+qP`)TIfLdA z#3j;S;LPXKE8mVaB4F0RfP$ZnOopz5Zj;WSF*5)FZYy$wu-PK|2uF0;H#?}T;B(<3 zcohmjAVcTbg5u0!yegt7UJbl*#d-}kEQDq^&s-m*I_ z-CV|`a=_R2W1d3H;7j9Y;}7=7W?I3Fr|Q3l2O{*a!0XO0UTkK+{(TdU(h38^{<>6; zMjT=!et31fsy$^t&I49qt5taOM9V=u{9vz7q1*sl^cIpI)W-YIG8FC4gz|e3m3Fxg zT_#FX!fnHyVt?`d9OyGqRJQsHSx<6^-yqKhXlKH-?swEVK%7XNR9u#lBnbfnKKepz2Z>z@n(*Dd}T_2SwgqT4nV`fb0NkkjJ5%|G@W zTmEC-;T~>T^6w1k6z?h9=i?EiFIP|?0YZ8|X%KUpLZwB7q8PYVLJSp(EzEjIlZwjD^!t}hn26xPT#zq`+$?O*ARS3;9XaTr$xHV16Mw14 z=f}mx6&@Lx5JBsRvs$C{m8pf}?&6>rl>@tqeSa?Mrp5~$P;jf(=yDk-4{E-uNq%Nj<>qzj#s3>F)w z&G(!dgQPVkYVw~N9m~DeIu%+gc=5cHK0`Enaoy-RN4}`(V~GOGt3y>A>86j~*dVC8 z>Nw|_s(4*d+nDs*T=q;~o5sydO$UaDgO$@Q_D@-ux1-wbzVECe`<5taCz@C)=n zG6Z!c9fUmkiahHh&(=mOCc9S0M^%e*aBz^&dvF=n5MB24g@u~;=}}_h&lbgXw?y(O zaj!o`WmKyFd(zi;M6b+-Zkm7H0lVc9v<3ZH3+??4lw~$HeP^snW?3)K%K!Y znCH=%D{hPF@eYqh(55?_X@Pl7FB=A$mVJr6STw}a8a1`ZVWg&m^_7a<^C`3N^j24) zaY6;Pdns<`=H9wzPi&3%KLqVZw*+bY*nP|nIq0IL_^0;ZTCDbCj6bEviXlF_8C7DCsOq9qhfKg&f)nyjRq z=vVHlX;|dokexIZlNvX%X40H{4Ha5awC;zEQrp*~MHnworQZIe85dCmO7*7R{h)X6 z-s!ckcL}YD1#F1!<=fLu3C?y7QSnk^eYd$A-+zg1QtNE|XlZ!&NAgiScHt3v|6MmZ zX$kF#=xZL5jtQA>Z!Vu98P? zMh%K8uq*6;_K5dSc%B^=Fppf~l9Sf?%}dhf zQzz^Px~|EM_?X}2dG}+@m+b2k0zNAbz3}B6?PXCLWJ&bS$e6Fx{gkb4^TPFic2JQbrokkS~y!0*q z{1Z|!IYT3WNQjz4GV_7%`}pXdrCYmx{bPhMpiNb7FWoEI$u#_GSPuP&xE>eQaDeN^#m9D2m7mX@4? zf~hB}@$hWtiqBoYZXM}bBqb~Iz5y18+hoV~?XN6^!I)t;CT%7`F`z(+U}~%cSugmg=tXVp93*D(BVNxq%Tb? zx{Uo5U4KIHkXI&^l%4pI zn)_Q~B51=}T3TNtWO>tLP9Oc~dP}G8c7!^MsLjYrp{Rl#jjQGzntJzvS6=5yR?3&O zyn#-acj0VG`$MQn3P;U8+BaU@-FqNK|6oeWH|3Z;%r-PEe?3?BL!UAC+5~(t#AYOA z066--vr}{s3aLT9MC@A74^+ECufm5;oNS^nxMHiy)ma&-8bZ<$@4LBm0$3!fbm7TY zwimzlvZ>l4gA_HHQ9wKQ#Kc^n=mhtkp3@CR_q+I?CAI>hosd8`d?cCcb-!^6zd2{d z)JlW%LOjt%78P7PJTD_78DND#eb1s~x2at#)FN+CQD+xVtNW3d(&5+VQSGuzw)wvo z{kV8>UV-r*FMY1Bq=^nZ?*5u+QwUuNe};9qS*NRj#WM5uYm3p~T*7xrzx@$`p zbSm(ueq$f!Va$}LS1tGdSl`v1eTkY@%+Ex zgy&nI0duI?3w{VxZfSuw<^}kiwmEYGSG&vdY?F%_%{fyppGybP@L@5fYQY46m;1S30p!>cx4qID+_DiqwU>HW-9 zx?;FUAgR00xJA$Y9Rcz%#O|WbjVQ{m2SoSmOkdr$IjxHCrs895y4rpv`9&Ne9;eP* z+8g;Ee4F@T4zHL;zA=RR%(?34hlsEIn1XM-zEGb%<=eLv2)Um7uv@@WlDPO6nKb*! z5c=6j$6wz=xY$|f`oy~b0yd)KgX0P_CweArDef~=0Jq72z6n%YPxkInV8(? zJ|51ViH7As-vQQzf*t_Q3lzclf{ctzunJs6pb%h4k3CJHK<%Mf8giN>Y(me=AI|*; zh|y)8!D@7a;i7cTR-?o0A0itgV@}|wtU#ecWLKCO^%!i#{_rfgJz=rUI-vUo zQZ~O{6A*cmIcDe5R{ID$7!S}uN=FO>gJVQX0%wt5lqv#?32kQ3`wIicGe?<)IFo>9nU#&HAWv?06I8Nt6wUki#omj0#~P-Uk`w486{Q@Dli9JkAz zepYZ!sAT}^WQpUqV!dOG8snNZ3*>a@ot&I%yNuzzhlP?44ePcsWjVQ1a&mGWwSVrn z``uY{&2l8%ljt&N0qW}N@Lox<11ubgR*gtlSiHD*@642*nOj@RJO}n-8?)*P58M9l z4YNDazG#1?3I5ra+6aal)h(PxS4@|(T3A@T_jq$eK520! z5*`qnb$0&x7ErumXUUMEgbRxnErKrXyp+^!H@B5=Z3+ldhECET)Y^_PK?_NosTc^P z{_lZ6zr+cIBnGw6{Nu+b#4JFNOXk2rTr;15b4cZby)`ZXkLbTC@_enir%Vt2~@(Dg-4Th*3MW3EID7u$l%A0;^ z0f>iRJAOjl=}&_5X70l?ux!m6xZBB9ga{tZ`~u`^zLLpA+y*)A@dBV6gsknwe@Z`Q z;}BW?ET@hR8N-+TP?EVJ5CRGr5 zyvxp>lqH-#^67{>LV-*A_g!JsrJvEYOw)r*f-Vcog|jR2q9?-~JcVy`FYu;@0*3zI z6&7^RI18FI-He)sv>C6^1ghEug~C5iR_V2x87TGtdiduN5*fzruU}&(D~iuAa(~vEWgtths!OCA9M_&#{u3h(m0( z35#Yf^30=8$XG%%!gmOs@8Llpvq=Xbw!#Zf5(}w_vNzhuMH`|n>h>2>EDqy;D^}_e zq!jXs#{lVNb*Mg*DuG-p3Wc0dB*iphLU3Yjfp0}K;^V+)t{7AoFW2j)hX%!voG;Y& z-vmV9;sJ$D+n{BsD?h0RHQa&am`3G%+I4AHR`?oXb4gk;YI zDfLaHbi^4zQW?;xSoAkMM8ylmG99?i6&R89oPnlNe!2nq3l z^#m!(dz}HqqgM9qML2*nj@KB6cXEMvhw%b>KTta)1OccmG{Jwp1B3-(%3%-n#WU$%H=7{FA@XYCJCqxSvDWY%k-(Y zRoG761JOE%DDf35CIjc_yHomqlv%I2FOZmJKAjb0ngSsweF9Opiu()1o)D{4k9~8w zR%Q3$c zz|ZUYE%4(2CPtPSh{no^pBqT^CTmQJ-BQNb+dr)+qY9XeD&$%5DAI14w092eWUm!+*=yR@FRhz(47!#S9-3cUBH~C=aP_}pfCwH zYQlc9dCwkqxH{iJ8%Km-dgeq`TM@xzi^M4yvi(JDYh>2LmkXFt&xQs9bT34kwNrjZ zY4`D~!QG?+$U|uop7g}jwjH*g>E@O8f1@k}cZPNKU5~PWKu5GO8kGY}jT18#pt-L- zBn%#c{EdO8Y~nz{LyH^_A9~d=uP-uzZma~05~OwDfEP4B13-ZSiEi|SDCuxK&hb19oi)53J7!lfdl>x>*-+Bf zD<#PGK=9($t&XIPAkGCK0*af*J3r+8wPGl&P+Goq9cx=jiIbabcUb3B>IPP}d#t{2VNAr^D#{*q9CwFNyq_DWT zHaz7js;J==wD8?uCi)^EeOa(-mWO|o&%SbOwkXF*erYRmJas-l>Efdx6VMRXNeS1D)I}WAi z%5ME5eCi!3%AbDL;jhLc2cpas!itR{+US){^b!)V$ zY9qX9=#m_8e5l?ST!ghvT;LYo=kSNnzCqx05*8Wgr3vd)(Gn3EsR=%XWE;TD0L~w% z_kt$$0c|)a+px8f5H5n)A1p}TpWxW3fv5m7zks;0$(LXm+@qQQ=!A;9?=0*e*gH`R zqKimXURQ*7Q*o70QnMq9#1677^70|WI2twedF3n>a6qwcu9zqlP?(9)S&oc&v?FSJ zeIPT9-})3vs+kO2q7{Y25LUrhl(H=?E!%}{y-b*x0)QTJFEEvGj#u3GL#w?8sK8DK2NZ{10p zPR;7iK)~q#Ch+=D@uipNf{Jm-%VTs2h(E+-A<~nX^{8@JHwU79pQ0FHnhonAzGnbg zMUU^LduxJ599Y|RYAOq$L2{2y7S_E(=$+Ma-^R<>vEy&Z^XTx7`4W@;X z1gLA*W|FcVZj1x24LoDPn{ApfO4$!V*~kAXC>MHGHQYC1%YzIDffKlE?HI9p@?jd* zf4bq0#^JF+bMybAIk?%AGiu%s6LQW3+dwJ&7Tr&kl)991)A#&uM8d=CiS;yy+(=*q zb~n0isRn>{iKE0$SKto%=?yr zIYbGH#g)`s@}r{RVO+e{G$oW%Kf(It_gyNz^?IysDb}leA}p_7p|Fp}oX&1i`&)gw zf^O}Mf~-ERKRi48=DA9`IQzPjj?aek0D8}?8B-yd9(Gx&-OD2#Y`0ypfke_T!1N+4&vPeQs+Qo!Os4v zS_D?Qa>R&uWQJ<$=xFTTKx`bZ*4cC;Ms9H_zq9t(Z)>Wmr-x^&XwVLtPrroONWSLH;4t4t`mrAH)s`5#Wpt& zVbKN;4|JJ5r$(SFu9_rA+D*gzDtJH?#lp6ZY8&cJjXvN7tdAtEZ@y2{%7bEdLav** zb;>U4*|TRgB}nZ|8iSU&BDdauY&R}GN~qVDA~3DXr8kiv`{vzezE6&i?}jTEGy#e5 z#2D&Vu^-L#)9QlU$e~HBY=N~X3wNzQ>);WI4xK!+*cyjw^ken_6(+^6VOY|X>PYCr zw{KNP?L<-SM%lsvROr?8OUm$V)(p*}3B(n33uS@*oX2gWJt|T_k1l{Ht4Gc1G1VLO zF5NNn{yQ$g3q)x*mb6w&F=#K#jSrok9^RzH%66|y#cX}p$&fi^%wZc|0YVc%$P{zTuX@ zpOB=QkAQ#@1S{(OWZby%FS)TYTj1fC+3r(6sQiky6Qv4?x*_w>fBw9NFw;AJe&z=( z5Y~b*Po0QyjWE}3e5p5c3lYu@6>6>c&ta?;??WP#pBJH=K;KKE<&oT%E3+Qi5~#!H z>6=C+wHaub|I5ENx{hb(PQ5QGQ7GNs6j`|;bQM8f$F*s|?4IDTPtWdD?kmZ|BAq~v z0N%w)wR$&9VZ-*Nv2D%~Iq1Nlzfej2;3iID=W3Xc?^NS9kpu^G8=@LQd{-dTk zs4?efw3&tM$0CRALK+KSdELA^t4wTW{Qlwf4wrG=2Ilw18{YM+WG=Y*pSCvp_!(Op z*5bQ-?`=}oRT?$P(GJEv`a`^GXgw8=o%u12*zkyZ0n7zID#X7i#i04EhnWe|~ zl*6WH51dvPUy5(qz5C5`?=Te!!^Xft!Gj0)!q5szMBlJg$}Y19XTcGOKuNqQcvjA! zX|4Ms=&GNRcyX{T$WW5)WRG|_0u6T@QgLT7oqw>%VW9rjH;nXBaYXoH(+DQ6I8+m8 z%b&|@EJVVQ-Zk|JQ#%pV2v!|FIz&^6KZdTXX0$E}pL~ypftU-Tqody z0~_&;*fz+FsO6cKu!rwKx=SunGV(a={c%+)t_R`er-E3=Qv%F_eC!kbUwv$xBcJT6i5Gs$!?!0X-UyrV|1>xD9dqYt-u!`Jx3fCWy&)1&VVHl7- zYhqHg^^M>@LQbBe_MdFc8Fs$yVC=VaL zY4~GsxE)Cqh_}Y_L^xW+pg)o0gC|5bm`wMseuV!3orhhd z@irj;d&Fxtp{G8#NW7o-T^JDe(b5dDLQOvr^ErR4qT=STtrK?^?VD7Rz z-ZE>kbDT0Ur_id>O9o!O9_epwJk1-zx$xAgN2{FYONHz>F715t;m&7kFD#5Tjab2BN9sT3TVDhZ$=YG)rDn%Ls%4qS!!V)&y+oUf%(PAbHyPHhF$;yJksPOy`qfuiujZ9aZ{M=k8M3hIgglCbVfJT<==gj!P5IBV%elHC49XbG_;@9(Kx&Ipbo!sP5-qFyzdd zzIST*b2k@!wfsCmpAb|B`7;73L0}}8tmVTD-j5&G3g!VH!)YeH!w7q#@!LUqWNQ(7 zH|nVHGp+x^OeezW0JBORMHuxGtV<$&0b)&R43t|RQLYDnCC2)tBj?1$L(tw_;=!bQ zkpL6|du%7K`SKg9gvS11>O$0K-b(m$5|TxjF}*~B6p$%h#54@P0Q>W^41}v_Be^;O zwEX9*8tUrbYm<4gY{Q4-W{)iL-%@*Kap${f=ch9U5-~pbfL~NqW)_V~qU(?={^s*l ziESRX;kt8|kzB!qpQ<`Qo%P^MR~BKCRB-VyTO z(ymJh=dfQ6^as)N!`qE=@D1dHn{CPW1BHM`Iz}pz3>AaR?F&XMA{IybKdzqX%+*Vs z4=f)%?@5>$pSPm6!M5+2vhUh)GAa~Mxre)ZHbSCb3kUw|&->5*`STbVf?Hi>aDWbV zDq#4{*w4;q0fa+mRjFhqk(gtI2n~0!nV8GjpI2h$ag{ubi}5-L_XQ#YO9Fd7z-4t$5H3smwooG-$D>RiC;(Y_z^j_ot;i#pxo_HKu3*M z0WSuOlvb1XFS~4MmJKB(+{1qHXX>2-X>Vob%_~8E05mcfD^U zYiG?GY}R>YWo2J~{J9EB7iM0FyUY8Ml9I1G{`G~cGX%?@PmonKpExtiw>x?DX*Skz zum#-x^W~#gW?$G&1Ra3cL`#K8sxTyKrKGH-W>N7e#K}P*%!SSs3AlMjs%3&6@>1PG z9CWS-^=UhOgeQW5v~MJj2tYQFltnZo*e3Jq3gNl+);PQ+jc2>amf)~*ZvleznP8eT|s8u?Cg$6SBV zl>a%y(^b@{P5`!&bHalQlj|)ML-TeNqf{i1f1;~=8Ga=&R@Q&L-4C-~$>sae-4L%N z0$LGTIDn|&ufY4CJ)b$6b{ZiTv z&HeJbJAK~BM|!&2H@%zpQrt%jf7T_Ty|WY3N^gP)1A3PeXdGXSWGBwuyI1k@#)uh0 ztWwyb4eS&WWD*Zwm8Tt#66@so^XJ_+T=K*rj;~?Oj&}A-T?d)UV=eyc;<*>x*CzXJ z`FVTZOhnWp{NYWt!Kel#CSVh4vn(wvc;b%z%=$Dmr8DGQuzQL|)jbig*eW^u*!rJ7 zx#_YJO#uT=rvx9nk@W0j+|K~Z&z{btSrodJAzW@>AcXNCP1DX_oyEXl)>yFC7vTV8-XeeQ{+K6oXKa zWEuMLm)IG>jY<|70)a3zktCOroT)3*WL8t@S>CJl=9M@?mSF~i0A^yP#>DO;bP<~L zXRrYz7}fhC&je1b6^Eh2#Pr1|z+y=pcXt49kk0o#^7csxbEFDhaEv zY5C2UDjChL-fU1rkWC%}0!wP+FGJbDUxYAwMhDMgBs2kqg@x)mQB=hQR=X{>4J;2W z4Mih!3Y!vrqe6ltH?{drCTz4>Q~haU&&0I_bcu>8$AiXBl%>dW(bY6GdWiHtd|B_1 z{6vTflnpT=(EZ#*P~K`LraWk0)QtNPtxrVRP-%^iTcPffeswb)*(54f&5K9~43cLc z!D)lPitZVXA)I@A015jZFeJIKkbTzK(`+~$95c)h7+m@pc^wGB#$gqQGoFr|fkYb%m;+Hc8&K(NK6sFbAYv5M z7Z3p$mOtv(+d1kxVjH0qBh^6SpM5H|ZrUac$xruA`5N9hExLL0MBGDsV~XjE(NK0Z)*t;9n%I*PaBOQJeZ{j6{6>Qm{v zeFHTyOv+oxWV&~lDgdIM%;*4S>n>*Nge-5c3v{m#!fCGLTaSFo-m_;c(YTU+uokH= zc}TH;hoL9g=t+SDo=xNxRc1#K1rhjROEc zhAI3=Xs+a$Gw-oAFf;3ku16>+LgL&ru@iVYUtkAnJQ`Dv%i1F=j2IILoJj-{s-mnb z)=yrd(m>@`xGDBGC(Yv>!WHvN3$SQr1OC^+oBf~?cmOt0^RDvPF--YjhXVGRw6pgj zhl|;npX?jCQV&9dz=y6IM^W$b8ZdQYSAn!)*4nl3R&Y1#{1kyAAgc)??5=N7%hs-S zq^YdReCIwTEc)}yQ6l8S9UMZxyXVT6zn$8DS3Xh~XN5V-pTHY~(FqxHPZ1xB8XX$c|>Q9gE4kiJ9*B)pl7R#eR%F~70%Ut zvxnO)6jf)vc5UL~T82&lZi6fJY5S4zwqU9Dtrd7Iuo(;it3h(yNp#Mrv@A?IJT=4% zTAcneTgsL`K(S>sAE3m)QS|Z}tIKz9w-JbMU6S{$pEFKX(f6m}#rA1f=;tOTzeF-R zN}$QHe$`j!K$=)#_H|#Dc_l&z{ip(6R?gbKo zdy?OGvQ8VWEK*G5zSeWB@UpbdD#lddWBrX0GlKH{KEQ>;9hJf11N7jzd_pz}Am-lw zWEALRCAxk^c+i<3GUgJDP|ur|?>Vmh-@75|nf*vzJ9FAFGSZRQR`hUs1~*Qsl9??t zANir@r)wO3uZ5X^8Hx#>Db;>`4|dkM;_W{^Gb1z9dj+o}P!!-S@3GgR+iz`=8mCQS zwBKC6^2QBbL{8<{b1e^-9j9-M;wA{tLy|LOfF= zB>3V^P2?Csa4Lm6crxtQD>JPwoyomm*2D2kLRIl+Ng4vqvYW{EPo_bmO(0kd3>ZZ^ zHZU4-JG;yUe3|11Bq(08hfbJa05S7M`}RKW1V3T4xyoylR*dCc`OH}&of;H>)}5B( zrkZ`_bL;(iTT-Ixmox2`A%BMMCd}Ha;9^E?j;GRlpA#50$N|#NGGCl~vO@OG9_OS5 z`-tf!DL4&$_UWz$JB4_L<#ZmOJ`%yi0fP+$978#PGAZ6NMq_DkMUHl&JEQj*Z87Ep z5lEG+(9HJNd9NMoB`7oLBR#a&jLY94M{`r}xV*Ta^(ALjR==VXEfdnKi??dTugef| zsW=l@E!{FPp}mIFWA&+m&)fNmeV)X(wQY1s(AsErs`yld)lkJhTR8p9Lfc3Ic$Qma zEUs_mVW~;MxQC%xeWCS1al+=Ew!&52eXDDS#V7nv3o3?JGJVbzJ%63LSTff{)s0$L z9(Snd1Is!+u9D%4H56WByByN2V!!CXIZVnFtbL4dRc*cs_(%XX!u{R@eh+bEW6N<) zfDv(M$eF4{Q$|GzHALuWY_$Orc3xKfcG6ZHnbEnGpQm?t{-qLil+V!P+6-YY(kGi zU>l{zv}7o3-l;l!=jz_TO)rdD;#QY?kP~0;uN;v2!}EFFcuzpPv#J*!SCd+r%@cp& zEt2#LQYWlgC$Icvr;>P2#LQpiB1i7ipp(}n*EhFL&fjfbW!0C}E=h+5+Qm}VPU4m7 zs&JpqAC;M0@!O`E&hcB?JZQMc@$#bPS+CxjkAokr5D78WpmM704SfQw1Xe4ix@M~c^_FRut$HZBt0h{9KJ3G?{NwOS{M@as_x{_tQXiDb4&?vL@Rte z>!Qur)cYYdk|8kFW?xCep){p2&JMRShU?wBQhSR%3=7*wSJeAV4J=DZP4VicKGDkV zs1Tm2aMEnk96XhES)66im*AHlZ9kjwcB!+4 zeNea;Qp2uaPdhpkxuQF8dcMo2!dF2m>V+NQN}{pJ8BrDm5#NU!t}u`3EjnB~WmDRj z@ioC_z$=UXP4ydvNLI>aUX^T-pVpRZ4g30agQD~ko=Mo)J^z zZyQZr`IVA$WJ8ILx2WC523j*q%|5-)_vN_+M}I6$$~;h_vdi!SO|PXR`f}d~-qL-M zo35HwuHv_Rm?5y_OS**4>W-+^@0^WmMD5v}S|oS()4bL>UyfDqiEp;icC~jcG)^)d z4-Q>8F+NW%Np8IN;{|mc9!|Rkg}4W)g>s{z6s7oz+ZEc}`@C}Wc2p)NW~6OQq|BE~ z)QO-}z30~7P}jcRpJ9=Gv@7Mx&WGK1Ja*f&4GveX?&&Tm&x!A)R8l7Scz5}K>X7X; z=_y3gNTDY3r3VU9W!W{Vo7KMNlAMVQH?w@ z+X0UvX|%!aqXM9M($dU{w1Ny+04n2-k!j2M*Y3JS<|*UQd@5wggGO{K*dXkeHiJxR zAR7qtijIuTsshvKgE=wHQ^x2mwuc)y|ghr5{C1l)U@y*nSL#1W{nay;GaUIxT7yGSWiCI6XPQ&Y?>9d9j^||-Wj`IY zLF7S?9SiotPItQxOGmhSsnQkseaX%3D!UyLHQEwcMHMr*x)zw(^Sm;kY8?^{t*1G+ z$v(#1gH@|my#pn7rQ-EEBmnRDdCLkq5hcDBcqL>?QvYY3BNOef%^}78p6jdAKhBo@73Pw`Q7mqj` z)ipEEE=|#qDprTC!Jy%%Sm&L^6-oAN-q*dpE#JbmPG|>(73ITuT9&rP-O0s|twa(} zHJ0|ByF{@S)Z}N99TJ~tKNurC)_MDLhw3*{k;I&`+xpj%?N+168a)G~hg1=ZWlrEi zK$UX@@B^-Ss2ybzoAvpdB0!UmOy{w4v5HQ|th@kJ<12>Y?Zh0v@TQ5sDBYG4NaKa6 zb4VA9Q~28KneDPTI$k{^%C(G1;;)%AQdXChdi)Rm)NZ=)+np^LGnueDieaQkC4F z{+oM6F5mL6w|9>GI9&ev;7PbEl{>Y;9K!oH9G2HN-np6yg?WrS3jD7R1bNB#yyh za!Cdo8|`U_Ff(MF%g)N`$n>QM=Uu4=C8E0?02T3OuKn~We7uwMpOB`B(`VvnDddaGVk`IPx<$C)H$6xQf~m9jVpJ*6Gbd+}Q?( zkZEgSTyL448l}D}rW+a7%THVs(ROiMkT?OHS?g+w0=6;id4T#4!3P&qwDDZjqWI2K7r#bT|M z5wdR38zZ+*={xl2LKrBj2K&coEEA8?sTh@voi~<4tWI)`)`|Seg=_2o$#{*>3i$>H z;$gTT=sl8rAc_WqQHJ3GHbH28hDj=XOUvyd?N$g(UqHgNqA*Rbm`$jFw?8V0DZ|0f z@#|nLh%a61l83DFnNEB{t|kT)yUkOHp{E;%de_vpi!P9xs_KmN74X(~GT%|!Fmc}w zNa*GM$mKU^1$hF3LSHqy9_hKzDqdc?FAOxFC>e|x^dhr(hj~FU=o-%EMij| zOmVu}eRjj7^HsaohlsFx7<>)lKE_>&dH0syYFCS3X7--ux*Lq9;VXWm~M)%o0!yO7O4vQ z>A&lG(%ZnlZ0Tjkna`L~E{+YIR9LDjzfnL)Fjj3?FFM?Qf49nHN}a|NFMIj1u$`ZW zr=OV9c(v`Ah@6*P&_A zzwj9itfN3E%gLNNI>6((U_R!JLmESvb~0%Sr82m94}=9uk23?h)&-%cWel6l%*PVC z{~cY?c64Q%L02NG3KcC^mY9wd{F|(b?M3z&v=}m0x~0MzMybOo(hq(JYEsMJeN;hHzP=$rZ!GsZa4+}=sh6uYrtXW|nhJ?(>N zuI|5550^hV}OiG~+o40gr*pMeY4q#iYM*~X zgIZrdO0!B?Z~L-n{WA2@ONIuwJo7Sk%oFvTUYr@&;CXP&^!ztUxbu*(s8&7A#YffJ zU(`MjKx1@T*G|aHofG_u$aK-unC)QO*z7D|-nBL}Gt=`RKg=M&M38PHp{dzE8b71_ z3V(&S>R2K;Ud!&Atf%i*LYuZod{Wqq$9OLoQ(rMDNE5x~WF)>DmJn>!tyf+~-* zWXW8qJ}%3&QWm{y`u0KFjJJa$L(xAcj)>S@YH&zQ*z>mT&=uxy&(5@Z=QP$Xw(ox& zB+sOyv$^YA%+K-VV$_T&wWHPlerVd85z?P!KWKC9Xf}HWDo9WEtRoJeMQK3nwNWPS zoSEK=`+LfBhPB?jS*xoo>1?P=aEzT$^2el?mzQs>Yxny6(p>)4k4jGOT);~kcB|bC zE}OHuxicSLL`IY|?kHm!6OsC17L;Kle%}xt{B-f;E#*Id!&L6}OtUhAwil*IBfGYg z>5@d&EJ%i*p0K=XdTl;Ah%0`4DGliHn+pE@C-CJtIxlrT_)cbzPsc!?EMq%&`gAi2 zD_E9IFxI36R^Dd}5hb}yw3DUTd9-X}-DY)IhWwpQhoqr6nQipZJ*sjee1W>s-?>f1rmtm(V$KkRc;e zYqN+*rkDouP|b$gPD7CFoUk7=!{5Alv$S*TuUcob?D`o8u#^K@9Tx;*K*jI@z-w<_ zkv1P0R)7PA4megBiYjaca2dKd#6|hGkCkZBnEZMhv#zK}zp@4GXJTQwHN<*DZ1-p@ zTT8*ysF+oC3G63jdM*#v^SzNbq}#&Y}>vCmB?WLA7)RUa(CD~v@X0}YYYgCu_!FoUst=V;st zE3(|dr{$(6aePzP9#ANfDLogq%2dL(=0V2&o;C(>0rPIwaL2qTu?g0#i&#fz*x!Ge zka2z4il5o+v69HBUfhBq62*yz5&(zHZk5`k@@Qp0xTUJ6coFdk8V0tTflfIx(E%f3 z;a&NdmuH3`AnYKr%~M1V1JwIM^YCv>J4AanLxWi_$9Wyj+&K9YV z=nRG%laVn$Ewx-%#xKIy=u-uMvSvvbOa655xL8uU8)wGL#)8zUV|D>q?hp8O&@J5e zTEgkHZT=bIb3JL4eW_90MSX8G#RkK`EFV5h2O-~cyke*??}D$fnS%Ea)6|SNU`#Z2 za$?qn8N8~NOwa>#PxKG~)3KK!Mw{yf2ky&JOy6}Z`3+CI`yQEHUe6$ZF~Lk7D4GO; zu%Nsqk0|ESLq~zdONS^a5@h-nG1Hh!OtF$^@9Vyw{pn`BVzy3GWQTMAh~i_Yn(0Uv zfDK^J1_orn*VdAT2+|HHB*>`1XXpbQN425h7Dm-W*gS&tAm;Ux=4%sn6gHutFE3^N z{p&eloRFP^o%3PW#`FaAR`URLZ-z$((d~Ly_Cr_^Bl(6m>*hq)U z(vv+H%QuG|B=8>00a6E@N30SMe7O1=Qt1QgI5BJ{`T6-xxFZbg!c~kvWaRBE z*cWH+l#>)BBvodXaobIc@w^jU*Mr+PsegW?=C-XP|YY4}+nVw$u}ctu1COQzCL zW-~_~VSYP$A{5@lV~_Ti=^&yhSs^L6!yHee)X;IteHdfNH77F?BDl%G=-H2nvGcPn zQw6*NWF84x;u9c8q3sC;rR*vhA@x94etP$1$>$nOnOVFqBvY5YDUqMgU+ z8r67CGb^b=fsXy)we8qr|3P;8Ge|E-TOn@22t!8zBxTQzEHF60f&!D*G!q4D(2b$L z;5ul7B%a7Q$&56|NhEKVLT{&6@F#hks?4z1zE(s3Yg$0?FpjI#&7fS)25)m5(>Zia zjiXYjGE=NI#l}kJtO$1D@^WcxJg@NNI~EtAyi;VCB6BM7Y1|}a(7hg*_)YYD7vp<3 z*RDHz=oCf`2;vVN9hNa%#wNsQwxMFS&qB4`t5{ikzVB;0N7$o9 zi!M!0&r0(&*ZoTAKXxoyZC348z6#!Gk{|CX`Nzc1`l>~L*>8Qp%rTO$b$a&X{dF@? zo{RnWXKaX+Lunon*L>1^0Vvba6^X(iJlu-3x9r_pdZhYa(lzs$4mjlQS@_7&6D(Y~ zaG=fOT#Xxw$r2C&t3fkD4Ppg{59`y=--p>f>BTcgyWVcUqFmsoS1`vGN64=7Zver7 zOyyv(39MC78mrg-d98Y(v+U9vtVyuXXAltJegMt|qz1A*^8E9=m3GaZl;;srVt`Zu zl9lu|{Lk0(x4s}$)~V%*NGgIa?v}SVV#j~~9gJf*MlOmw^LLyjC?9-D*c68-Xz!&M z{s0dbt4mJH_n*Hv++%H9let>z z_x7?`>d+iV`1`_yZ~FiD870<$n|ST-vKvPQ4lbJeb?#?e8nP;hJS;zG=GR_*GjThv z@0S*!BdX@$zrI($Jj5sX;wnb>tgkake}7))=KXhO-r@hpXGV#*^Xi*5dz{z;ZvVNE z<*Q~M6(xGHu5z<$OE(Ykdip;-9K5Xv%B<-RD8VP|dFFnzx00#)?HMo3?+XIG>5l)t zB>yRL6DY*q2@8<%St?gyq$`)qe|uI9o882^^K2DwEk%C{US#T(K!=+ zZFWijdWq#GJ9KF8p87dN_^}{{TWz>YJ`ns}J^T~RX@x-q^gqB45(-+w>KUa{M6DR_9Ri8uPWTe0UuWW$Mz1qMOxS4?eX#s3!lB9!RX%=#kj;G~JIgpl08P zTEu$l7Od+??$0}4%5o4BPDlNn!;Htb7M-4X{cUGZ0-O^!*d<%BG7moJQ#P1e873L@ zGW(J&CQ&GdZ&0-xS;Qt}kzH2i5|c^2?f#)Z7%?Qrj#G-AIb=*Ad4`5dH&bC?>l*g! zDX+bycgV2p>lb)lm9$3a|@0!XVaq#6E6A-4zXQ! zcX-{K2I-QnZs$Z(WzD*S{ho^4-sX`my-If5s1Z_)FpGnuqN>!&t>-7RYct zHBgJ)u)EgaX%zj6p!|@pml98jpVI72a=2#5bh`KNyc;bt(c;w`PuE?e4XM|zh_ZEl zwV&4PE5bK0bdj;@!e~&TdRL1^yUms#T_0J3eM|qYqO8`$v`~C_n)7f!%yG%essG?m zq?lCN?d1n?| zEDN}6d&+1>V*abGE6pg6+~@cQPkf#(Ii;GpI$0)az-Cfj>FCitetZKzhh9=EWHt7r zn2e8COD%hM;j7cBl6<;%1!W%evK5@W|9*BxFq1YI_A_l_n=ikt6!QJ-ZwxY0PCs~XMmL-)7Wy)<_2Z|DZGPfg zM^PAWY3Yg>9OgQxpZwOYCgr|@hzQDZXpZt~MhNzL?8JdhA|kg!3%oFM+8L@noSl*K z$$?ijPRa^MdKdGJy5&yJ z(qXT@ZCtgdv}Jhyx*vXH?+Ckpz3|}syIZ*eS})Lot*4w`v-aPJW=dUuMd@=vGNnW$ zCNuS(TNs0;nNg0*ZTl=9jljQx_i$*ah@8+6NwwalEo{bgxro94fy$Y(q*j3xgVp?p z67KX4JAE;#^TP9^5T&W5BNGr z_rCZTBY8S^*gyYZfq8>un>I(stpL7p)k^20pY-_!e9d}A?ODbmirS6*K|zu?BtK=T z@;Em48*n9NJTHr>`<{g6{H~St_0>|z^V03CZm*y;W<ei-h9R&i#0&d`>3?odZ*QUUv+n8Ka`js&LNk{ zuRolnbhtoYVRuIZBa>i3iq`vwt4){{1jvi_XbtysP7E|>E}XdD+|zN| zq<+bp>;{g&m)-XqwOq%>bjmgNUfm|b&grmbY&l1+VTN&(mp^X~pWu&AVH<9Y1zgJ- zq@UWOp~9{`oMXK}nK^h*R!zexOtrum#A7m%N~d852v-&LM-w)bAM6%u)LSFQw}KTh zh-`o)hWdk{xQ@Ow5aZ1RE-jYnNwPsb=FqJKCcXHke^2DjJTg_u-M9UVd$1A=^C{`x zZ(qYWyW*s}`j0P}hM}!(9n9cFFrOB2d*RCIfYYNmcL@4qOxbOPVHA_ypss@o01+H# za#C&Iv0*2HXUqe$*zUmwf;>!uOu#Af2`aen4N*e;4c}8%URFSaq~QVjF(bz!y$^cv zm?-n6#$aJGWhLG7cg2b8cnjBYX0YbeEwdHzI}lmdIbG)1&1@z+)lzGCU|KKvU5YUy z-TI3>iMLrU?{AkD@IrI~y6cGc7~LWc?y3`ZDk3Oa7tuP}p=mB*x%hgtQOMavt6&e~ zwT4rl{dm{(^c|0j;FDpwF*I_ZIMiR`vqrM)j*c9YU3~5~NfIAQT8dqNG8ioz)zY85 zf99c9Vq2}~xSg_E^;V&XP*c&8iX5TsbR}Nga2$Pl*q%#qZq*@Lu%v)3P~IGhN_bZ)({#W`2e(C2G%PZnEfD_U7`34RHLN zu(k&I?JX!%(pTS;^J%%LVM~-{V^}5kp=Cu2ljL?`gI=|E!@UUo?7+=GHf}_Z+;Zvt zF46dEu`_x*vya?LNqIu)%$XSeL5H2iQ$VFF)okG+`l>wO%)W z!SHs6WXqgJY2`-&Ivr7od!S0&lXjBt?%0)26aF{8QLp><#tt%cuaXIe(Pd3%Y4Mt- ze&VP|3{oAq9=du%lWNNbtqeQVI3YTkCHK&Y zsjAw5fu63k@-Yu{Oq+F{tH%^As+c%26zZ*_-JN7O{Z?Z!Bjdx~ys=)c$>$E}JL~G|2g6}n zj>Us$KRvh@Lm9b;gfNZW56U=V3#%`PjEtr25ER^a^UL$E7%ESQ~z8||ZuFUj*amIoZCO|5`G9l9GV5n{}; zDN%v;vo}W-+BTHDKkOCTb-u6ID17IRC{-<6R&{sNXgUO#Y*-*YJq(-u_O*8e@-KK} zR%vu#y3wtC#b1}l4;>A^RGd{A@mbycy`H0SCEMSkjp8bq;a{>g`Ut+IO#i)0CQR?X zY{UEko&KWtS9V$1xA$HcJoJ=TsgkK+3|ThkB#qbwQ>{Y+N?Lk57q9D*UAI2M69VW5 zxo^etI2p2@S5cEpBY7@;l}kJ!@-8*`C1X$XX({cc^LR5{i?riA&wD=gyi~fY@>u^C z))h7PvemW}rmhdT?SCpMCBDVrz{p!J{)k4s;=H27CJL3$IC?7=mtSO$L0p_^{^w%t z`brOkhARcc`m!q0{XcBI1yq!6*ET#f(kh(-64E8jpdg?Ef=Gj;v~+iOhjf=H-QC^N z-3`*+|2g;lJn#E`>-*PYv6eUtGuK?_-p4-nv5yUAcnPyIgG@Q$yBBVb_9gC(W*f++ zY@K7MZ=rO%Q)J_z+qOo}8wV5&*lS<@3kfy_iyHl+!Gh}5J#QLD^f*6E%aarM(S6sU zpQ%|I?>jFZO`X3o0$wA~AQ}||RcpROV#mT&fZJ@*tJ7GoZ|t#kcq(oxw^@4j+6K76 z7%djO8irmwWaRvI3lGxYOm+myoWsjTxNL6e&JnWuBx`!MFY@vWCmay^9C`@UEi)bk@@C;A;mbhZrYl z%SgPkM&boOD4hTUqvm`;kNY^j!Mg9Ty0KI} z-bn^-xv+up`Jv<$?}YOB*N4`#8dHfs(;Q+XXoUyABHYXsXB157!>-lnF9ee6?=-?o z?|^L->*J+Lk%_|H&+n3QU(H_voA7uc11$4qp)A>Bf^^flX17IbGU&keh%T_+YXdv- zfKF_4Fa<1NU;|6UG>1JN?yoQE4}q~EY;?_VgvaJk(LyuT>Bgw(!HE(~{3i=J#{ydT z_#%X8gZMTLkS^`p?7F;8T*!cH0p6xpyZNkv-{o#JK7bLu@3R$wz|`dksMcW1_#n>5 z_F$aS!BOF%{U3}jXRX2%>qQiuGf*=)Jlw7GN)KVi3MOUnMp+zOHE*T{t0FWIfoZu} zh9EGF*_o=A`JuQ622OdvYOk$U6gn{1$wAEh9rexk?^pu%`MACB+*vdR(HU`ZXaMXA5$hZV!6DTn|W~A-8_mjF<7@H_ei= zkB2>dj60XgbVK`Yp=%P&InqCI!`|(;4fZC)@-(lOB=_&h;p(ye%i1%_R~;7<`JQXI zcz#)<(+ZJnDRi`kPjzp4|5$E$Y-2o=C&F%bSB4IZW7=36-d3ny=}nnI9S=?6@9s-g z3kA=%qt&N=(>$kWpa6l`{by5OxAo@9oqK9U_~~iUg3H|y253rMYY+10HebS_Zw@lp z_2%;|x`pRTQ#ovf8o_!e=d0$9rzw|L+bKI2q!$l$X$gnT-ZF)%4aXPGQf_8wm*I7B zz2Tpzpp>4sbkMf_>ho5e%CZBnRMY8!)Y#Ekw|wUP2kF8;LN&{KX)?N8Dn#xk=#%@x z?fH@yqg!?Pya~y@=5$}C?{D5Lsz<#EM+=*~olJ7H_-hD&6>zfAJ~{oO{5U3lGjGKy zRBx+$aW@yu<>8=M}DYSz@7SOfJR>%g3doD+U? z+DEPK+qJ*@?02$sP(gYm=(%W<*UW)}9YG5T2*hA3 z1_QU$3KhY^7-@i6uO9!V*e5?1Bk6YU{c7dAm%@0aq2MHu*h2XtU;|G!k~Sa@tefw>e$9~*`qZwO`K3X?~!d zC(m$p_(%&ptOXT|A5!|`#J#=2%G&YX=U^0}b0DGo=zMBv`K}f=irSLqUeun_g9_SY zUV18mt~c^F^SKyg+!#P~+qxp^zr4KcL9sd1`6r{@Vt4(Ch7j1vOa`du79i;u0rOWN zU~M1nC?XwBm!|MwdA&ddU#p$(@+YjYI8xU%|E@rgJxWSs8-$jnzt5K80S`8DYl}+N z(3lw9v)vg4Fy&Ll7Ye+RR>Lbz*z^YsfbXFgh^Sx%QMxR>Y@zDwya{6_l`2vVjYTkt z1Xo=jna`N|PC{}hO}PTYUh!avnunf1X?!WgE?p2m@R>)0Gj2KGXE&UcX z5e+x!$%%r6wSv~nU+i=n8%WmELx|hfpc_PMB*QqES|P{wP1VQXE^R@^Inn0sLQ$mY za&4-+-xcC!R#Y#n=n7XOR4^`h=c)_dVrjvyKE3z)@0c2AX1K&RJXc6cVmD=jJ)Z^< z*Zz7*7(=F3Wo6Sl&y$Y-a9||Q!{cnz2~G>zobf(1I$_T6)xMYd*tWeQ8mf4DfprLH z&T5AEz{QP^O(E2F$HxhQp!vchEIFb-Z%RS4Bh6oeuT%=3IRTEQ$ z>+Ymm?kb$S`hM08!U0l$nc-3{p5kLIIZ;BwkxSLY-kwnXUbUU+NWl}9$5Ym}dh59k zA4C*tE?&g&th99Tb7OjX*|mXdbi;*AKEe5g6?1KxbR6d*s93XNrN!w72B$$jkKTKY zRv##9tLu?Duz$OqRm>@M^U^5%h&SJ8PcgzS6B(C=V$RC??%m?sOO_O1x)iu6ow$5d zx<<(1@M7HfHPAwO|B7zH$9woN@I}yxG!!(YkRUa@0$J`$i&r~aG!T#YpE8d9tlUs? z9&-t;-HzgOwIkwLFG8*m3`b++of!tiTZcFOkTgug))e-fk=|z+W&i&2{m9c( zh4-I6Y*c3bO(xAh>HMin=65avR(hhf`^(O3?<%Gpk>5Ct6xlL>Q}A2FKMKW zni)<`Z-PSc$Lj1Kp=OH>f$jcS@UTr>Y``An>|hL~nLT9jiLlNU8`umE4_hwc?I`?! z8mqUmx(ff}qwm*x`z$2~ULX|#W*f!|^ZZ~bgvG^QKCl!B6>w-;D)>M%&{<)$L`_2@ zme#~KTMeXBdk^m%9BLv@2_V34D|85n7Iu2Um#EFweyk4yp+nW^#4?i>SzWn%j# zX%x-XX3_^#G&B)?ePk=EGRuuVJTBvLRG^l~Q!FF~e^NGIkr@J;i3L>P(PI6D{gARq zdrxag%U;SrF{J7E(2;cUk>K0)d@|+95E*F4wJt4DV3LeiGcfy&h{TpqU@cOjQgBf~ zENm7Gc+0K%7w>cb!2Mm#^og#fYp|90+f!gax_ff|$zcU;d0m3Qa%Y%wn`7A((MQqW z8p(WJ;~J0P@X)Jmh8rc@jODeiy6}2Aht%UevB&C+-He>7o5|E(+VQ=sbZL$`CdGIS z+OQJ^6)32S$OUZRrye79zo_AP#gIqzqq8+gOZJePA<7QtRuG!P;SSp!5FeXan{C{b z*9~$oDaWpmjzVzJQ1XW%RqL{q6Oq41QRm7-CRR7>5T33dqCa-ETRLn=DCTIJCuqv| zz)9gCzQxgtQ2zm6%=IgP?VHGE4^>Aq701bahBgGVU8EmAuDCPfv-gYn1}D5$s_tvn z*Tw=?&rr+M)rgfz6FtWA}7Q%B8V*=6N`LNC2=o3lBf;NBe#rE3ZIN1y@Mq;5P z4>Vty57NH=_4Ol4i=Gd@SQZ&4l2TG$<>lp_r`Db6df!lwtlDefOxbkrpKukE`1z`_ z?kYjj?u_VlJ`MZIMeA7;*EP`NqRHni2i;;*oXkePzJB&a+{3aFheVuSSZ6o(r-3$k z%%_j{lB6a>@KsaqIM`l5AUMFVdc={KganB~nN z$QjA)bL9lalM}Rb8}v5s)BMk^Q+4}IwiJY0tAy(^7*_`qOdW=3E>=h26ZtYqQ^_lADSEUz5JZh_O#^W9#?ie zgP{`~oFGTK3}j_Ik>5Pxa9K}@x(J>7?lMLGC!yu1$c}vQ^YP|PKST2xt*6CZtW5V~ zT*lWw`);g5^_yp1cuY=0%R`u$=eyQ%(@LL+CI!X%#ul{z9f}!V4V<~06$fxO4o$A` zn`0?w3XU0<2Xhq!fpbg4*ck0v7ne&rYteoA`*SN37yDo@Ua<6>oz`vvMTzYKg<|&? z#vU^EUo&}6*PrkhAaSW8FMY2;l|bQBoy6$yYMf$AHwsX&?CR=VnePmqt~F|Olicda ze)&z0fFDhb+r$q^n6=Y=s%Y(KWNmx0Ho6JJr|s6knVM06l5+R-29gr8ENp}>ei8yFsn9RNcVY#0!%vvnV3_y>EuK+QzOXy3M{D)SM^i1-T$3k+}3^?m<6)i;ce&aSp64IHr0E=xQ9vJ4BgdWhw+EaZB zAvYi3x;~!Y&G%L%wi7sx{n?oaa{#UnFE<8Nxe7>C@wXQTtT=Ee?H!fW_&u8N>rU>uo-k_Io!g3^R@S+8X0fjBfjV z3C|#FD|}@Q9G3XvhlSdOi3+AaSA*9zW%g8eqWLXztf6u8MLJ}B_B?I*xn3j28Lm~C>MYTs85TaEe*vb>WNX5$XQcb_<%y97 z3M;QnR~+W+95K%u@^zg0BgKd3g49{Ov$7DXm>v<5kQAZi97Hi_b5-{+^mlXGo5L%o zKul@Cg22Y-Q3WyHGS(s!67yjXZ~W%=nG%KVEl4LZZxdhdd3fbTYpDxao*L2qrXhW2 zIQ*$+=Jd(*g*9AU+k_b~Bs{X&0=5Do%7&=F<$wE}Csk>6NKlAmjcBb+ni|gi&WRNL zzSP?8P-txcV%5r@5nU}J#pa-ih=}E&JdisS@2ZG(B|?3&vZ$3~I|~EebCqaMn1Lgu z94qG0{8;{@{Ji9{D?zj^gI_$pM9iHL|SEbY3$Tp6$BE=FR5+$ds3$A7f|yhL_3HgA50?Uy>&}D)3#OX_$N(J5; z?LFywa0Ui6{Ec#CS@?&oKCvHqZp>dnUaZVme{JITWHj+#d+=y-!WXLrz+;(S?Zc<>$4X7^3HsUs%xl}_80gY$~z+4Eq_fI_v4mf67U=+V1u zDV8`e6SM;)(x3C>ayS6%kC-DDI8eX@Q*&;Q)^Y-`j3D442&6I;z$}mnxLIl~h6`uf zA5Mv-Fq46G7_oX!@$r$dv9T*m=a3nd3gP?9@)XK*6)dOnK9d0>g>IfDsSYq=mM+iG z%7($BUK0R{uj4)-fpZ6fNNmuM2BH-@9CC6j$c+i0ZN$LzOH}kTq|Vitw$+IW_V(P zXfUz5WsqcQBwd@jb*qzYXQ4~EVvTm4V<9=^*5j@Jz)>>?(I%$3v9;=jMdk^=wFH$NhCkEMwpX*O9Okj$Gdo80oZXJ4CW4W0c`>?a^d z^LRC8(7LdIS;WHf{*!d)&n)rj^*4J}E8 zS#G#b2m*X>XVrLKJt-peSzHlf`+lYj)b|jkKTXmm>+2iDzB`LKoZ>Dwe8jKw!qZ#c z+}maxaU@`@LGKa2=nu3-uDNHWzX=W7pQYLz(Y<$VI*IQ67JoItJCM)}K|pT8HaBMl zJ1uk;3l0mdK=R^YCb|_j!8NR?sjlMOwR=EZ%EIw8M4Ly!hP}WCL%Vzex)og@a2p4;WWDv!l^m)MkJzd@xbuoN@pH60sIDtx^6CZ zyq~uFi?YP6g-o4M?X;jq1%M@E!Z;Fm zc0ZV(i2*IMY@_PyyV468(D&L^B9XM@W&H86k@LExRdl-Co&sick|5;=HhsMlatTPR zGzLzLcnZJeTf>E!TWW@dMS*_%<;G{GkXQl6YLoh#8_r0T*g3KwZBMnSq8IE?GSETZD#>M+D&@k?HJS@Vgg?kiROg(OAsfdUU_5VH)>^q*Wy~9_()*<;XoD zARyr7dj*yr!MvP5P_V{*Q>~(^$gF;i5Qs)-KBO{ zSTHnnYzZ6lNmsaNZTQgF7_6g$^a?&^=k!tHInsWP2v1K8;+_gV3nm8)z9h_0YrT@N zMGq;Bml!b5`rp%I5es!rzJ*gYfk)B*1jYB87nSa^mAS_=0-rCbRw!`4+ek;+k=1^7 ze|xa+D1zYa&l?j^ToRZQf#)dZzHcjjt9IP+)`VN_HC_77F`SR`>~ z{;%JtYHgUR&lB>asT<~#nZ$J80ZVZ-wDCOoS5l80JrQ8J30OR%>^fPbnBNI38Pi7$ z_wnuZ74gC3>r-4@L~Q#Ngt4!T76zR4K+g;g!cAeS3_y@Td%c&^T+4U21x+%Ud-4zE z5U`cxd`YpqSvc2V{Uub~TVGn^tzn*RHwIBuOmfIZzq@Ze*t5&YIZ|v+k!5X9A>Ai%Sp=p$V}1-rzeBSyl&Q%6U>Jhf_aaxo$JZyZ;**DHl^ z_k|z#jv6kuk0u4QDv^M`x+A5>*>KV8v@m(L3BlF91_g5EG^vkqyKXA)&OJyXY|WV) zW@tX;vePr%FH)>=7!-8+VcfN*Mm5ZOJly%*ot20HD^xt|G4E{wH8(2{infADjG=05 z$wV$hYpRtxZz@BW*gx?0joVsX<@Q)(zO<1MA-*WS`a2t`g5~lbqM{{M-#O`#7C+UDzV zyQo!NBcy*(q}V9c1yyd(iuLxzHbH>T3g+0LrA*Wb-Pl@%6v-niQ#L$)Vnz{Ja z@&(yzWCOc>&yQ;%wB9^w>h~th|Mwj$H)2SHak~?9{BBf>itIUw|Kjs1_+{EC5?WiY zXb}gK+>bx5swL4e@s`L5R{@93pzwS8wVBEOT$FCLYrS2?KIgwdxK(nHc;a)p__>Zu zH%K;Vp=91_)zpA^RzSQWao1AI`Kae|(@Q^JN9GAM%J{YB=l~yS5hAOosL0aef__&= zH??1uLVm3B`J8bw)EdA)t~Ws29@7^!S5&RrtJ z;BIS6kq`gIEC`JW!H|d<#QPP3WfbPDiKV0}4*>$@9OTLSDm99sSiBaI>oT4t(@-z> zjka*qTr+1MBZS|vw-w)MG_zXTABR>Cd<929IgzFASGlG-OWKtE*<_=})&lN;ngrAM zx@C9@li>$4$lgXo*ACCp0?#n;PS9)c2J6_~^@=ijzz$$0;?sI~-s|O`sHVy!LmFUH z#xbGoo|4#I3TjW|997=67L$4t-lIk{O3RhhaJaAw*=vRmY12<9E!VG}zzmlru=ax% zGL&L_`rb2Gg^u8-zhDGvfwrKBBMc00lQ&>n>jiv|3(Ed{~t~##G z*Pbe$ow7naN_C-cL7F_fRD3K&uD3h%46o+Nl?DbUif%Qp^eXi~SZ+-p)>zw+5&EeW z{`S|F%`w%K-{~ORi;m3k`dNQ`@NtoRwCcVJMI_&9-*XzTp3pJ@{Q?45(-bm&^^%@UPfNv2wzG`G!KtR@!v`$x%)b`P zxB7XrPB^O;8#}p zJpbo9{~xaayr$tN6-ZpLAH~3d$oJ2+>tjb^z>E3xDTf*PIK&Gc4`TeET=dUNpeOq; zW$W?#^tMhy%knTP}O~Ar_ZYp8+m?u_G)uL(_^UEu0ql!oLsmo@tkC& z%#pLcLy4H5F_i0z+!xEEqo?yVw&s-|eGkXB;0bmf* zk4s#`a<*)?o8SM=xI?f3YAL6c4ml8VxWb|Pl+rb^#}fgvWqJ#Qm!H_Y;B;xi1Qg?b zfQj8^qY+bg5q!8mWPnGw%aEo2OI+R9-#UAiUH0v|c)GzbkBKMr<;n#{N`EJ?l;`YJ ziEm?mT8;G9xlEWhI1&gz>17y~~5HExu@ZaOh!eRCXRZd9o&ax!0Qk8OXj zD0&u67a;TAM?A~V-=FRqM}orfhZNm_oPwPZoUv;#OUg3zwZ;RK1eDy|Sa08Y$};>A z*Cv;cSi1`RQsXE9k7dONa8q!HiE{YWrWbg^_QJWR4y0s-k6Vfv@akW`4tD*V;P5&* z&H;knU%==MycwNTsF>mVfOM?u)bP;Yc3;Uj%i|NhcS25xKz9LTV=5t%A<(NcR(&&R z97H&a!=(S_R8hYWQeR!VPq7Yl*Vhb&T&ne*2H`G1Cc1X#8*CSas$B7nXm`9wulh>L zR`Hph{rL^%OIlhy6JkF>r#r$!j0c=8R*g2mwqV>HQQz60CqB3h62Cs`KhbntyV`2b z2TO1(RglL7B_~>Q`j!4DYi|Bqae|8^j!VxRg(}FM>Mn#>r^;f($XYmu-zHAfqLUS0_w%v=Sa%ofT zBUQGH>UAgBht__2OcQp&2%^_M(~XA^EHb_zXk*(*&R9Opixk(AO6E#&9+&vBBxa7l zA3rN@T&-gy5(?jbZ#EsaYvGd4fDX)0cgv=hWjeFhJ>TW&*scWbwjNlyp z*|fK~(MFRP*Z<+~MA(cjlNtT+i9)mP@u=Q(sR_?y!JLN-^zoT1azp5X^Q8)&!H1Bs zzC?0q_LL>N!!ZJrDf5=DmNLYdpzAo zcbDPEiYtP@0uHF*nQ6;czs3lU7PXRjZD+x++~ABh$Jf3Fu>p(!-vHJJ_arb-i%|S= zqgb^NIqUUf88tIA83gQ6)w^f{C4h(y72tZ$@*hF3BZT6w;bzFcu+dy5poeD5NP&m3YGzPmknwUT1>O2KfU3@KMS6&0e~bi%`Q<@jXs zp-Wn--p&aR-A#jP7u^J^7Dbg6b{2v`j~7IxnVduos=)V)dW=dd&G}O$7uA|i@ri#o zlUr_ueXa?PW6kF9sYNUNE4o(Mok_s&S+B84Q;5gFcYO?tlu$_7s8h z1;v}CDdz;&Pk;k{#Yg1UzA`YGLzg^8|4~jOfR6d(t-4D? z=iN3NlWqoYXWz>zvdLB@f`Z7y2k%?M$?QPsdh+)>G9ByQGGNx2vW{OFdO*SUe0+<2Lbm6Rpe_dN`d^FTw59|^lPi&AG8vT~{h zaGFZPe+gbpHqw0EDUlB^97(~FtXm#4b*%Vc&5vV@&3vBXH=QY*VfQDtRLN*UiOj{d zjJa-0G~`}+jz=|L7KR%vYzLduT_X(?k0BYh-U8csx@GcahF%qdr} z`cV0y$*u$zu&ht#mf~1N0ros^?sgS=+FYm-obcr*1oALnLqvh$f=z)zj0$o1Wl24d z%0mJBN3FWz%VBUsb_)<7dYFCX*F6i+#^2xyk$DfzsQoE>1$Td=({t->aWU`6q*5BC z(aO^i;82KX!!w)o|sR(wvgG;L3Z>bF?0M@y_ zO~sdO1Zi&YkgM&r5r=$t;d`H3*t6Vnn_8@2S>~akrS~am4}~0#tZ&3YEYuYDg>48I zH8kFT{M4FHHmf^&#Y!31Z#pE=ks=V!W`wqGMWl?sJyMt-xfqth8^}CopXr)32~^jN zs$3>$wLPqN*knRMip;8njkwFvbnLI59AcAr7I(X5EU{eV2S&ph6;df@(oa)V`pPMj zs!h9(pZ_dbjT5^u0o$yPcWR1D3~up3&3m*vK`L~6(Ve_A_9ssp0rF-&rJnG1!+~`6 zG){{gZuxA+y2kkt3UzX~h^!&lC3bD#6<}w z?fiqiSe^Gc7a6JCV3VMt_AKgi0BC5i>)l`gf6)x!1P<0c4zRt1~%IVYz3A2v&$hq(pnupu4?0 zH=g)z%qq1;z+jNUEy+~JG1+QRuk)=emkj?40mjB!|LNI`@bYxv^N8McUZ*>A43ZN# zC;v;_@0@=}(gJqMnR-oo$veiaMHiT^63S~K?(W&yxtS(gO6&(?>|PuCpUgon=45=X zNvyL`8l#!f)FQ`sJfWRY##+ol^2~9IrRl*A?HdQ5a_rm7qOYZVFbd5Xx* z^WEy}S$efDjmfa*J~n#Nt`j+|oj1v_zd%->D^;X{M`U{q-H_QyVN#zT{!mZ*-Fdm@2? z(s(+>HmtMh1!-YZ?da@opX%-|c)lR(?{X6OL5AMF6^}7-;s;HEYm*3)QkJgWz+lex z8D+M6skI3JvmXCK+0VIu47qA7b~vy|`9rJ;STaAiBX3WX3v3Q2{L&n`nb^{`yx)|@ zcv!06o~@0B{;g0c|6KgZlu5Gz2b?(fyIQO<{lTuo{y6yMCK0Ih0sMn{`j{KI$*Si-+E#*zC_g5vsGro1eV~lR?|l zO^qY3p^gC2vK!qg#dDir8BY{=Hp`^8Wp$4zms{0FhS~6!#~6s#*&c+X#TC9eizbvu zTTAwH#wL9S{QJ+iTYf58!(P1Bd9#Xz9xH7)#Nx44G+D^KwE%$^BqPuv144*#&3wX+ z`+F`HMd2NwnBZnnuluG5CEH?EFEf-B;W%k!V`{p0<4k*D*|1898492x-6OB@uteIJpP0ARDw zvoZf0Rc#i%{i-vjTsDaYxt(KDyVtI>@0+v$lRSYG>@Zd@NqKmCBf#|CJM_8&%O?MO z42Q@&wr6E|u(Qaumpr|vYxSt-7v@2p0&wD8r=Ftu3OyA#g_AwHy$`%JzC zG*R2x;yGNAZ<-d-VW|knjY>7cSap^}?yG9;mIrE2x%&gMLz#!go5Z->KbEtUm zqw5i6anaRo>FYYqj%1cA8#Qv%QF#;*UCpMZFYn!5%0SorxaxCNNaJ^6I^{nWbu66!)&(_AizFCW2N`$_bFbeDRokb>-)^sGAvwJ|QRK)}o(nl7xIRkcX z<;gR{pxWy~O2_MK&`RW5++C7!-!GRD*FBIykBm%s0@`s|-H%;LnlAd>!#Z1DK)Bsr zBy<~*6c;>H+!XX=4LVi)ev8rh%XLR?cU<%8vfaCvpq2=@@ZOApGyewf5sb6vK|XKU(bE`>?Uc3m>MYZm)A zwr8%51W{4ZPtzg-lmsfe_XFMo(6gPkw^WS{a2?#6MM{x=OTi_uY`+u-LhiA5w&A+SzYB^Y%0g2nIT_PMbDoA+D7pnLu z!n3lTp3|ejU<70tIBnT463f0!1+^ZA+u_%O$2A_N8%q{II2}!N^g1~IL8&2oYQ%ou zk^SxJ#z8G-s;W>eLn-{=%H@tn`Mg_oAemci{Gu%uEC&|J#~&4>S}82Iulk+uv{FUqAFL_X|FR60)(RqJ z(By8OkGRI-Sp|<&W|#qyaykQjOYfLL1h}CeJT&Qhqnz!L{Xbu7a7%52uEh*pNB5DMeU6;c8BwnZRfI>oebK*y#YdN8c^vXXd zQ#8T%jDFlcs5Pp<<>pybupS>>f?__B?rI6^{!QPMy>gcG8;4}pKDUBolfa3zh`Y(N zw<2|nfu%LmRd0VjHZ-`5Sj`wm>wJfSOWZMV7C?Ym`jiv~n)nX+k0dvHRtXqZwHV